From dd58ef019b700900793a1eb48b52123db01b654e Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 30 Dec 2015 11:46:15 +0000 Subject: [PATCH 01/10] Vendor import of llvm trunk r256633: https://llvm.org/svn/llvm-project/llvm/trunk@256633 --- .clang-tidy | 14 +- CMakeLists.txt | 110 +- CODE_OWNERS.TXT | 32 +- CREDITS.TXT | 51 - Makefile.config.in | 4 +- Makefile.rules | 7 +- README.txt | 2 +- autoconf/configure.ac | 81 +- autoconf/m4/rand48.m4 | 12 - bindings/go/llvm/DIBuilderBindings.cpp | 32 +- bindings/go/llvm/DIBuilderBindings.h | 14 +- bindings/go/llvm/IRBindings.cpp | 4 + bindings/go/llvm/IRBindings.h | 2 + bindings/go/llvm/analysis.go | 1 + bindings/go/llvm/bitreader.go | 3 +- bindings/go/llvm/dibuilder.go | 45 +- bindings/go/llvm/executionengine.go | 1 + bindings/go/llvm/ir.go | 10 +- bindings/go/llvm/linker.go | 7 +- bindings/go/llvm/target.go | 1 + bindings/ocaml/Makefile.ocaml | 2 + bindings/ocaml/bitreader/bitreader_ocaml.c | 10 +- bindings/ocaml/linker/linker_ocaml.c | 6 +- bindings/ocaml/linker/llvm_linker.ml | 4 +- bindings/ocaml/linker/llvm_linker.mli | 6 +- bindings/ocaml/llvm/llvm.ml | 2 + bindings/ocaml/llvm/llvm.mli | 10 + bindings/ocaml/llvm/llvm_ocaml.c | 11 + bindings/python/llvm/bit_reader.py | 9 +- bindings/python/llvm/core.py | 4 - cmake/config-ix.cmake | 13 +- cmake/dummy.cpp | 1 + cmake/modules/AddLLVM.cmake | 432 +- cmake/modules/AddLLVMDefinitions.cmake | 6 +- cmake/modules/CrossCompile.cmake | 2 + cmake/modules/DetermineGCCCompatible.cmake | 11 + cmake/modules/HandleLLVMOptions.cmake | 125 +- cmake/modules/HandleLLVMStdlib.cmake | 10 +- cmake/modules/LLVM-Config.cmake | 18 +- cmake/modules/LLVMConfig.cmake.in | 5 +- cmake/modules/LLVMExternalProjectUtils.cmake | 195 + cmake/modules/LLVMInstallSymlink.cmake | 21 + cmake/modules/Makefile | 19 +- cmake/modules/TableGen.cmake | 24 +- configure | 524 +- docs/AliasAnalysis.rst | 9 +- docs/Atomics.rst | 2 +- docs/BitCodeFormat.rst | 5 +- docs/BitSets.rst | 65 +- docs/BranchWeightMetadata.rst | 6 +- docs/BuildingLLVMWithAutotools.rst | 6 + docs/CMake.rst | 214 +- docs/CMakeLists.txt | 4 +- docs/CodeGenerator.rst | 2 +- docs/CodingStandards.rst | 4 +- docs/CommandGuide/index.rst | 1 + docs/CommandGuide/lit.rst | 5 + docs/CommandGuide/llc.rst | 6 + docs/CommandGuide/lli.rst | 168 +- docs/CommandGuide/llvm-lib.rst | 31 + docs/CommandGuide/llvm-profdata.rst | 62 +- docs/CommandGuide/llvm-symbolizer.rst | 16 + docs/CommandLine.rst | 1 + docs/CompileCudaWithLLVM.rst | 169 + docs/CompilerWriterInfo.rst | 6 +- docs/CoverageMappingFormat.rst | 2 +- docs/DeveloperPolicy.rst | 40 +- docs/ExceptionHandling.rst | 436 +- docs/ExtendingLLVM.rst | 6 +- docs/Frontend/PerformanceTips.rst | 227 +- docs/GettingStarted.rst | 88 +- docs/HowToBuildOnARM.rst | 64 +- docs/HowToReleaseLLVM.rst | 57 +- docs/LangRef.rst | 1202 +- docs/LibFuzzer.rst | 144 +- docs/MIRLangRef.rst | 495 + docs/Phabricator.rst | 26 +- docs/ProgrammersManual.rst | 28 +- docs/README.txt | 2 +- docs/ReleaseNotes.rst | 491 +- docs/ReleaseProcess.rst | 2 +- docs/SourceLevelDebugging.rst | 34 +- docs/StackMaps.rst | 10 + docs/Statepoints.rst | 115 +- docs/TestingGuide.rst | 4 + docs/WritingAnLLVMPass.rst | 30 +- docs/_ocamldoc/style.css | 97 + docs/conf.py | 4 +- docs/doxygen.cfg.in | 2 +- docs/index.rst | 14 + docs/tutorial/LangImpl1.rst | 25 +- docs/tutorial/LangImpl2.rst | 185 +- docs/tutorial/LangImpl3.rst | 277 +- docs/tutorial/LangImpl4.rst | 356 +- docs/tutorial/LangImpl5.rst | 201 +- docs/tutorial/LangImpl6.rst | 120 +- docs/tutorial/LangImpl7.rst | 119 +- docs/tutorial/LangImpl8.rst | 43 +- docs/tutorial/LangImpl9.rst | 2 +- docs/tutorial/OCamlLangImpl1.rst | 4 +- docs/tutorial/OCamlLangImpl2.rst | 6 +- docs/tutorial/OCamlLangImpl3.rst | 24 +- docs/tutorial/OCamlLangImpl4.rst | 2 +- docs/tutorial/OCamlLangImpl5.rst | 2 +- docs/tutorial/OCamlLangImpl6.rst | 4 +- docs/tutorial/OCamlLangImpl7.rst | 8 +- docs/tutorial/OCamlLangImpl8.rst | 2 +- docs/yaml2obj.rst | 1 + examples/BrainF/BrainF.cpp | 10 +- examples/BrainF/BrainFDriver.cpp | 4 +- examples/CMakeLists.txt | 2 +- examples/ExceptionDemo/CMakeLists.txt | 8 +- examples/ExceptionDemo/ExceptionDemo.cpp | 21 +- examples/Fibonacci/fibonacci.cpp | 7 +- examples/HowToUseJIT/HowToUseJIT.cpp | 7 +- examples/Kaleidoscope/Chapter2/CMakeLists.txt | 6 + examples/Kaleidoscope/Chapter2/Makefile | 2 + examples/Kaleidoscope/Chapter2/toy.cpp | 282 +- examples/Kaleidoscope/Chapter3/toy.cpp | 460 +- examples/Kaleidoscope/Chapter4/CMakeLists.txt | 5 +- examples/Kaleidoscope/Chapter4/toy.cpp | 620 +- examples/Kaleidoscope/Chapter5/CMakeLists.txt | 4 +- examples/Kaleidoscope/Chapter5/toy.cpp | 561 +- examples/Kaleidoscope/Chapter6/CMakeLists.txt | 4 +- examples/Kaleidoscope/Chapter6/toy.cpp | 608 +- examples/Kaleidoscope/Chapter7/CMakeLists.txt | 5 +- examples/Kaleidoscope/Chapter7/toy.cpp | 704 +- examples/Kaleidoscope/Chapter8/CMakeLists.txt | 5 +- examples/Kaleidoscope/Chapter8/toy.cpp | 776 +- examples/Kaleidoscope/Orc/fully_lazy/toy.cpp | 248 +- examples/Kaleidoscope/Orc/initial/toy.cpp | 239 +- .../Kaleidoscope/Orc/lazy_codegen/toy.cpp | 239 +- examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp | 239 +- .../Kaleidoscope/include/KaleidoscopeJIT.h | 114 + examples/ParallelJIT/ParallelJIT.cpp | 19 +- include/llvm-c/Analysis.h | 2 +- include/llvm-c/BitReader.h | 45 +- include/llvm-c/BitWriter.h | 2 +- include/llvm-c/Core.h | 135 +- include/llvm-c/ErrorHandling.h | 51 + include/llvm-c/ExecutionEngine.h | 26 +- include/llvm-c/IRReader.h | 2 +- include/llvm-c/Initialization.h | 2 +- include/llvm-c/Linker.h | 24 +- include/llvm-c/Object.h | 2 +- include/llvm-c/OrcBindings.h | 134 + include/llvm-c/Support.h | 20 +- include/llvm-c/Target.h | 2 +- include/llvm-c/TargetMachine.h | 4 +- include/llvm-c/Transforms/IPO.h | 2 +- .../llvm-c/Transforms/PassManagerBuilder.h | 2 +- include/llvm-c/Transforms/Scalar.h | 2 +- include/llvm-c/Transforms/Vectorize.h | 3 +- include/llvm-c/Types.h | 124 + include/llvm-c/lto.h | 4 +- include/llvm/ADT/APFloat.h | 8 +- include/llvm/ADT/APInt.h | 7 +- include/llvm/ADT/APSInt.h | 6 +- include/llvm/ADT/ArrayRef.h | 21 +- include/llvm/ADT/BitVector.h | 10 +- include/llvm/ADT/DeltaAlgorithm.h | 2 +- include/llvm/ADT/DenseMap.h | 26 +- include/llvm/ADT/DenseMapInfo.h | 28 +- include/llvm/ADT/DenseSet.h | 5 +- include/llvm/ADT/DepthFirstIterator.h | 22 +- include/llvm/ADT/FoldingSet.h | 37 +- include/llvm/ADT/ImmutableList.h | 12 +- include/llvm/ADT/ImmutableMap.h | 101 +- include/llvm/ADT/IntrusiveRefCntPtr.h | 4 +- include/llvm/ADT/Optional.h | 19 + include/llvm/ADT/PackedVector.h | 28 +- include/llvm/ADT/PointerIntPair.h | 178 +- include/llvm/ADT/PointerUnion.h | 870 +- include/llvm/ADT/PostOrderIterator.h | 8 +- include/llvm/ADT/STLExtras.h | 52 +- include/llvm/ADT/ScopedHashTable.h | 40 +- include/llvm/ADT/SetOperations.h | 2 +- include/llvm/ADT/SetVector.h | 30 +- include/llvm/ADT/SmallBitVector.h | 23 +- include/llvm/ADT/SmallPtrSet.h | 13 +- include/llvm/ADT/SmallSet.h | 4 +- include/llvm/ADT/SmallVector.h | 7 + include/llvm/ADT/SparseBitVector.h | 48 +- include/llvm/ADT/Statistic.h | 5 + include/llvm/ADT/StringMap.h | 22 +- include/llvm/ADT/StringRef.h | 37 +- include/llvm/ADT/StringSet.h | 5 + include/llvm/ADT/StringSwitch.h | 50 +- include/llvm/ADT/TinyPtrVector.h | 3 +- include/llvm/ADT/Triple.h | 97 +- include/llvm/ADT/UniqueVector.h | 1 + include/llvm/ADT/ilist.h | 152 +- include/llvm/ADT/ilist_node.h | 89 +- include/llvm/ADT/iterator_range.h | 12 + include/llvm/Analysis/AliasAnalysis.h | 1147 +- include/llvm/Analysis/AliasSetTracker.h | 15 +- include/llvm/Analysis/AssumptionCache.h | 6 +- include/llvm/Analysis/BasicAliasAnalysis.h | 223 + include/llvm/Analysis/BlockFrequencyInfo.h | 42 +- .../llvm/Analysis/BlockFrequencyInfoImpl.h | 88 +- include/llvm/Analysis/BranchProbabilityInfo.h | 91 +- include/llvm/Analysis/CFG.h | 2 +- include/llvm/Analysis/CFLAliasAnalysis.h | 158 + include/llvm/Analysis/CGSCCPassManager.h | 2 +- include/llvm/Analysis/CallGraph.h | 33 +- include/llvm/Analysis/CallGraphSCCPass.h | 17 +- include/llvm/Analysis/CaptureTracking.h | 7 +- include/llvm/Analysis/DOTGraphTraitsPass.h | 34 +- include/llvm/Analysis/DemandedBits.h | 75 + include/llvm/Analysis/DependenceAnalysis.h | 61 +- include/llvm/Analysis/DivergenceAnalysis.h | 48 + include/llvm/Analysis/EHPersonalities.h | 94 + include/llvm/Analysis/GlobalsModRef.h | 160 + include/llvm/Analysis/IVUsers.h | 2 +- include/llvm/Analysis/InlineCost.h | 63 +- include/llvm/Analysis/InstructionSimplify.h | 2 +- .../llvm/Analysis/IteratedDominanceFrontier.h | 6 +- include/llvm/Analysis/LazyCallGraph.h | 113 +- include/llvm/Analysis/LazyValueInfo.h | 17 +- include/llvm/Analysis/LibCallAliasAnalysis.h | 71 - include/llvm/Analysis/LibCallSemantics.h | 225 - include/llvm/Analysis/Loads.h | 13 +- include/llvm/Analysis/LoopAccessAnalysis.h | 226 +- include/llvm/Analysis/LoopInfo.h | 111 +- include/llvm/Analysis/LoopInfoImpl.h | 8 +- include/llvm/Analysis/LoopPass.h | 19 +- include/llvm/Analysis/MemoryBuiltins.h | 5 - .../llvm/Analysis/MemoryDependenceAnalysis.h | 39 +- include/llvm/Analysis/ObjCARCAliasAnalysis.h | 102 + include/llvm/Analysis/ObjCARCAnalysisUtils.h | 287 + .../llvm/Analysis/ObjCARCInstKind.h | 8 +- include/llvm/Analysis/OrderedBasicBlock.h | 66 + include/llvm/Analysis/PHITransAddr.h | 23 +- include/llvm/Analysis/Passes.h | 74 +- include/llvm/Analysis/RegionInfo.h | 57 +- include/llvm/Analysis/RegionInfoImpl.h | 68 +- include/llvm/Analysis/RegionPrinter.h | 45 + include/llvm/Analysis/ScalarEvolution.h | 1005 +- .../Analysis/ScalarEvolutionAliasAnalysis.h | 79 + .../llvm/Analysis/ScalarEvolutionExpander.h | 36 +- .../Analysis/ScalarEvolutionExpressions.h | 227 +- include/llvm/Analysis/ScopedNoAliasAA.h | 92 + include/llvm/Analysis/SparsePropagation.h | 93 +- include/llvm/Analysis/TargetLibraryInfo.def | 90 + include/llvm/Analysis/TargetLibraryInfo.h | 8 +- include/llvm/Analysis/TargetTransformInfo.h | 376 +- .../llvm/Analysis/TargetTransformInfoImpl.h | 101 +- .../llvm/Analysis/TypeBasedAliasAnalysis.h | 93 + include/llvm/Analysis/ValueTracking.h | 180 +- include/llvm/Analysis/VectorUtils.h | 52 +- include/llvm/AsmParser/Parser.h | 12 + include/llvm/AsmParser/SlotMapping.h | 12 +- include/llvm/Bitcode/BitcodeWriterPass.h | 16 +- include/llvm/Bitcode/BitstreamReader.h | 2 + include/llvm/Bitcode/BitstreamWriter.h | 125 +- include/llvm/Bitcode/LLVMBitCodes.h | 122 +- include/llvm/Bitcode/ReaderWriter.h | 64 +- include/llvm/CodeGen/Analysis.h | 8 +- include/llvm/CodeGen/AsmPrinter.h | 33 +- include/llvm/CodeGen/AtomicExpandUtils.h | 57 + include/llvm/CodeGen/BasicTTIImpl.h | 77 +- include/llvm/CodeGen/CalcSpillWeights.h | 7 +- include/llvm/CodeGen/CallingConvLower.h | 19 +- include/llvm/CodeGen/CommandFlags.h | 36 + include/llvm/CodeGen/DFAPacketizer.h | 70 +- include/llvm/CodeGen/DIE.h | 126 +- include/llvm/CodeGen/FastISel.h | 28 +- include/llvm/CodeGen/FunctionLoweringInfo.h | 25 +- include/llvm/CodeGen/GCMetadata.h | 4 +- include/llvm/CodeGen/GCStrategy.h | 4 +- include/llvm/CodeGen/ISDOpcodes.h | 49 +- include/llvm/CodeGen/IntrinsicLowering.h | 59 +- include/llvm/CodeGen/LiveInterval.h | 30 +- include/llvm/CodeGen/LiveIntervalAnalysis.h | 28 +- include/llvm/CodeGen/LivePhysRegs.h | 8 +- include/llvm/CodeGen/LiveRangeEdit.h | 2 +- include/llvm/CodeGen/LiveRegMatrix.h | 2 - include/llvm/CodeGen/LiveStackAnalysis.h | 110 +- include/llvm/CodeGen/MIRParser/MIRParser.h | 6 +- include/llvm/CodeGen/MIRYamlMapping.h | 202 +- include/llvm/CodeGen/MachineBasicBlock.h | 460 +- .../CodeGen/MachineBranchProbabilityInfo.h | 31 +- include/llvm/CodeGen/MachineCombinerPattern.h | 31 +- include/llvm/CodeGen/MachineConstantPool.h | 47 +- include/llvm/CodeGen/MachineDominators.h | 32 +- include/llvm/CodeGen/MachineFrameInfo.h | 46 +- include/llvm/CodeGen/MachineFunction.h | 61 +- include/llvm/CodeGen/MachineInstr.h | 92 +- include/llvm/CodeGen/MachineInstrBuilder.h | 85 +- include/llvm/CodeGen/MachineInstrBundle.h | 53 +- include/llvm/CodeGen/MachineMemOperand.h | 12 +- include/llvm/CodeGen/MachineModuleInfo.h | 42 +- include/llvm/CodeGen/MachineModuleInfoImpls.h | 120 +- include/llvm/CodeGen/MachineRegisterInfo.h | 118 +- include/llvm/CodeGen/MachineScheduler.h | 30 +- include/llvm/CodeGen/MachineValueType.h | 248 +- include/llvm/CodeGen/ParallelCG.h | 43 + include/llvm/CodeGen/Passes.h | 14 +- include/llvm/CodeGen/PseudoSourceValue.h | 229 +- include/llvm/CodeGen/RegAllocPBQP.h | 2 +- include/llvm/CodeGen/RegAllocRegistry.h | 7 +- include/llvm/CodeGen/RegisterPressure.h | 89 +- include/llvm/CodeGen/RegisterScavenging.h | 19 +- include/llvm/CodeGen/RuntimeLibcalls.h | 8 - include/llvm/CodeGen/ScheduleDAG.h | 53 +- include/llvm/CodeGen/ScheduleDAGInstrs.h | 56 +- include/llvm/CodeGen/SchedulerRegistry.h | 6 - include/llvm/CodeGen/SelectionDAG.h | 39 +- include/llvm/CodeGen/SelectionDAGNodes.h | 210 +- include/llvm/CodeGen/SlotIndexes.h | 93 +- include/llvm/CodeGen/StackMaps.h | 1 + .../CodeGen/TargetLoweringObjectFileImpl.h | 12 +- include/llvm/CodeGen/TargetSchedule.h | 6 + include/llvm/CodeGen/ValueTypes.h | 20 + include/llvm/CodeGen/ValueTypes.td | 107 +- include/llvm/CodeGen/WinEHFuncInfo.h | 177 +- include/llvm/Config/config.h.cmake | 9 +- include/llvm/Config/config.h.in | 18 +- include/llvm/DebugInfo/CodeView/CodeView.h | 367 + .../llvm/DebugInfo/CodeView/CodeViewOStream.h | 39 + .../CodeView/FieldListRecordBuilder.h | 78 + include/llvm/DebugInfo/CodeView/FunctionId.h | 56 + include/llvm/DebugInfo/CodeView/Line.h | 124 + .../DebugInfo/CodeView/ListRecordBuilder.h | 43 + .../CodeView/MemoryTypeTableBuilder.h | 68 + .../CodeView/MethodListRecordBuilder.h | 35 + include/llvm/DebugInfo/CodeView/TypeIndex.h | 176 + include/llvm/DebugInfo/CodeView/TypeRecord.h | 270 + .../DebugInfo/CodeView/TypeRecordBuilder.h | 57 + .../DebugInfo/CodeView/TypeSymbolEmitter.h | 37 + .../DebugInfo/CodeView/TypeTableBuilder.h | 60 + include/llvm/DebugInfo/DIContext.h | 33 +- .../llvm/DebugInfo/DWARF/DWARFCompileUnit.h | 9 +- include/llvm/DebugInfo/DWARF/DWARFContext.h | 21 +- include/llvm/DebugInfo/DWARF/DWARFDebugLine.h | 3 +- .../llvm/DebugInfo/DWARF/DWARFDebugMacro.h | 59 + include/llvm/DebugInfo/DWARF/DWARFFormValue.h | 3 + include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h | 10 +- include/llvm/DebugInfo/DWARF/DWARFUnit.h | 49 +- include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h | 81 + include/llvm/DebugInfo/PDB/PDBContext.h | 3 +- include/llvm/DebugInfo/PDB/PDBTypes.h | 31 + include/llvm/DebugInfo/Symbolize/DIPrinter.h | 47 + .../DebugInfo/Symbolize/SymbolizableModule.h | 53 + include/llvm/DebugInfo/Symbolize/Symbolize.h | 105 + .../llvm/ExecutionEngine/ExecutionEngine.h | 17 +- include/llvm/ExecutionEngine/Interpreter.h | 12 +- .../Orc/CompileOnDemandLayer.h | 375 +- .../llvm/ExecutionEngine/Orc/CompileUtils.h | 1 - .../ExecutionEngine/Orc/GlobalMappingLayer.h | 108 + .../llvm/ExecutionEngine/Orc/IRCompileLayer.h | 2 - .../ExecutionEngine/Orc/IndirectionUtils.h | 326 +- .../ExecutionEngine/Orc/LazyEmittingLayer.h | 8 +- .../llvm/ExecutionEngine/Orc/LogicalDylib.h | 40 +- .../ExecutionEngine/Orc/ObjectLinkingLayer.h | 57 +- .../Orc/ObjectTransformLayer.h | 8 - .../ExecutionEngine/Orc/OrcTargetSupport.h | 90 +- include/llvm/ExecutionEngine/RuntimeDyld.h | 31 +- .../ExecutionEngine/SectionMemoryManager.h | 25 +- include/llvm/IR/Argument.h | 9 +- include/llvm/IR/AssemblyAnnotationWriter.h | 3 +- include/llvm/IR/Attributes.h | 89 +- include/llvm/IR/Attributes.td | 192 + include/llvm/IR/BasicBlock.h | 44 +- include/llvm/IR/CFG.h | 150 +- include/llvm/IR/CMakeLists.txt | 5 +- include/llvm/IR/CallSite.h | 197 +- include/llvm/IR/CallingConv.h | 27 +- include/llvm/IR/Comdat.h | 2 +- include/llvm/IR/Constant.h | 51 +- include/llvm/IR/ConstantRange.h | 15 +- include/llvm/IR/Constants.h | 30 +- include/llvm/IR/DIBuilder.h | 107 +- include/llvm/IR/DataLayout.h | 7 +- include/llvm/IR/DebugInfo.h | 15 +- include/llvm/IR/DebugInfoFlags.def | 1 + include/llvm/IR/DebugInfoMetadata.h | 564 +- include/llvm/IR/DerivedTypes.h | 100 +- include/llvm/IR/DiagnosticInfo.h | 133 +- include/llvm/IR/DiagnosticPrinter.h | 2 +- include/llvm/IR/Dominators.h | 49 +- include/llvm/IR/Function.h | 156 +- include/llvm/IR/FunctionInfo.h | 241 + include/llvm/IR/GVMaterializer.h | 23 +- include/llvm/IR/GetElementPtrTypeIterator.h | 2 +- include/llvm/IR/GlobalAlias.h | 25 +- include/llvm/IR/GlobalObject.h | 8 +- include/llvm/IR/GlobalValue.h | 26 +- include/llvm/IR/GlobalVariable.h | 24 +- include/llvm/IR/IRBuilder.h | 159 +- include/llvm/IR/IRPrintingPasses.h | 6 + include/llvm/IR/InlineAsm.h | 65 +- include/llvm/IR/InstIterator.h | 29 +- include/llvm/IR/InstVisitor.h | 6 + include/llvm/IR/InstrTypes.h | 838 +- include/llvm/IR/Instruction.def | 173 +- include/llvm/IR/Instruction.h | 88 +- include/llvm/IR/Instructions.h | 948 +- include/llvm/IR/IntrinsicInst.h | 35 +- include/llvm/IR/Intrinsics.h | 2 +- include/llvm/IR/Intrinsics.td | 74 +- include/llvm/IR/IntrinsicsAArch64.td | 3 + include/llvm/IR/IntrinsicsAMDGPU.td | 72 + include/llvm/IR/IntrinsicsARM.td | 44 +- include/llvm/IR/IntrinsicsHexagon.td | 4411 ++++- include/llvm/IR/IntrinsicsPowerPC.td | 18 + include/llvm/IR/IntrinsicsWebAssembly.td | 6 + include/llvm/IR/IntrinsicsX86.td | 2508 ++- include/llvm/IR/LLVMContext.h | 25 +- include/llvm/IR/LegacyPassManagers.h | 75 +- include/llvm/IR/MDBuilder.h | 3 + include/llvm/IR/Mangler.h | 2 +- include/llvm/IR/Metadata.def | 72 +- include/llvm/IR/Metadata.h | 115 +- include/llvm/IR/MetadataTracking.h | 99 - include/llvm/IR/Module.h | 117 +- include/llvm/IR/ModuleSlotTracker.h | 8 + include/llvm/IR/PassManager.h | 3 +- include/llvm/IR/PatternMatch.h | 40 + include/llvm/IR/Statepoint.h | 29 +- include/llvm/IR/SymbolTableListTraits.h | 64 +- include/llvm/IR/TrackingMDRef.h | 6 +- include/llvm/IR/Type.h | 115 +- include/llvm/IR/TypeFinder.h | 2 +- include/llvm/IR/Use.h | 1 - include/llvm/IR/UseListOrder.h | 4 +- include/llvm/IR/User.h | 37 +- include/llvm/IR/Value.def | 3 +- include/llvm/IR/Value.h | 142 +- include/llvm/IR/ValueHandle.h | 33 +- include/llvm/IR/ValueMap.h | 4 +- include/llvm/IR/ValueSymbolTable.h | 44 +- include/llvm/IRReader/IRReader.h | 9 +- include/llvm/InitializePasses.h | 47 +- include/llvm/LTO/LTOCodeGenerator.h | 141 +- include/llvm/LTO/LTOModule.h | 49 +- include/llvm/LibDriver/LibDriver.h | 2 +- include/llvm/LinkAllPasses.h | 29 +- include/llvm/Linker/IRMover.h | 76 + include/llvm/Linker/Linker.h | 96 +- include/llvm/MC/ConstantPools.h | 10 +- include/llvm/MC/MCAsmBackend.h | 5 + include/llvm/MC/MCAsmInfo.h | 9 + include/llvm/MC/MCAssembler.h | 512 +- include/llvm/MC/MCContext.h | 36 +- include/llvm/MC/MCDirectives.h | 4 +- include/llvm/MC/MCDwarf.h | 44 +- include/llvm/MC/MCELFObjectWriter.h | 2 - include/llvm/MC/MCELFStreamer.h | 13 +- include/llvm/MC/MCExpr.h | 6 +- include/llvm/MC/MCFixedLenDisassembler.h | 2 + include/llvm/MC/MCFragment.h | 506 + include/llvm/MC/MCInstrDesc.h | 32 +- include/llvm/MC/MCInstrItineraries.h | 2 +- include/llvm/MC/MCLinkerOptimizationHint.h | 2 +- include/llvm/MC/MCMachObjectWriter.h | 22 +- include/llvm/MC/MCObjectFileInfo.h | 36 +- include/llvm/MC/MCObjectStreamer.h | 7 +- include/llvm/MC/MCObjectWriter.h | 38 +- include/llvm/MC/MCParser/AsmLexer.h | 3 +- include/llvm/MC/MCParser/MCAsmLexer.h | 40 +- .../llvm/MC/MCParser/MCAsmParserExtension.h | 3 + include/llvm/MC/MCParser/MCParsedAsmOperand.h | 10 +- include/llvm/MC/MCRegisterInfo.h | 8 +- include/llvm/MC/MCSchedule.h | 5 +- include/llvm/MC/MCSection.h | 16 +- include/llvm/MC/MCSectionCOFF.h | 89 +- include/llvm/MC/MCSectionELF.h | 27 +- include/llvm/MC/MCSectionMachO.h | 27 +- include/llvm/MC/MCStreamer.h | 31 +- include/llvm/MC/MCSubtargetInfo.h | 12 +- include/llvm/MC/MCSymbol.h | 101 +- include/llvm/MC/MCTargetAsmParser.h | 28 +- include/llvm/MC/MCTargetOptions.h | 4 + include/llvm/MC/MCTargetOptionsCommandFlags.h | 15 + include/llvm/MC/MCValue.h | 5 - include/llvm/MC/MCWinCOFFStreamer.h | 2 +- include/llvm/MC/MachineLocation.h | 4 - include/llvm/MC/SectionKind.h | 59 +- include/llvm/MC/StringTableBuilder.h | 40 +- include/llvm/MC/SubtargetFeature.h | 2 +- include/llvm/Object/Archive.h | 67 +- include/llvm/Object/ArchiveWriter.h | 13 +- include/llvm/Object/Binary.h | 10 +- include/llvm/Object/COFF.h | 4 +- include/llvm/Object/COFFImportFile.h | 74 + include/llvm/Object/ELF.h | 879 +- include/llvm/Object/ELFObjectFile.h | 171 +- include/llvm/Object/ELFTypes.h | 37 +- include/llvm/Object/Error.h | 1 + include/llvm/Object/FunctionIndexObjectFile.h | 110 + include/llvm/Object/MachO.h | 26 +- include/llvm/Object/ObjectFile.h | 13 +- include/llvm/Object/SymbolicFile.h | 8 + include/llvm/Option/Arg.h | 1 + include/llvm/Option/ArgList.h | 6 + include/llvm/Option/OptTable.h | 8 +- include/llvm/Option/Option.h | 1 + include/llvm/PassAnalysisSupport.h | 33 +- include/llvm/PassInfo.h | 36 +- include/llvm/PassRegistry.h | 1 - include/llvm/PassSupport.h | 2 +- include/llvm/ProfileData/CoverageMapping.h | 6 +- include/llvm/ProfileData/InstrProf.h | 552 +- include/llvm/ProfileData/InstrProfData.inc | 735 + include/llvm/ProfileData/InstrProfReader.h | 175 +- include/llvm/ProfileData/InstrProfWriter.h | 23 +- include/llvm/ProfileData/SampleProf.h | 303 +- include/llvm/ProfileData/SampleProfReader.h | 245 +- include/llvm/ProfileData/SampleProfWriter.h | 114 +- include/llvm/Support/ARMTargetParser.def | 223 + include/llvm/Support/AlignOf.h | 37 +- include/llvm/Support/Allocator.h | 15 +- include/llvm/Support/BlockFrequency.h | 26 +- include/llvm/Support/BranchProbability.h | 174 +- include/llvm/Support/CBindingWrapping.h | 1 + include/llvm/Support/COFF.h | 2 + include/llvm/Support/CommandLine.h | 32 +- include/llvm/Support/Compiler.h | 74 +- include/llvm/Support/CrashRecoveryContext.h | 35 +- include/llvm/Support/DOTGraphTraits.h | 11 +- include/llvm/Support/Debug.h | 2 +- include/llvm/Support/Dwarf.def | 10 +- include/llvm/Support/Dwarf.h | 54 +- include/llvm/Support/ELF.h | 69 +- include/llvm/Support/ELFRelocs/AVR.def | 40 + include/llvm/Support/ELFRelocs/PowerPC.def | 62 + include/llvm/Support/ELFRelocs/PowerPC64.def | 93 + include/llvm/Support/Endian.h | 141 +- include/llvm/Support/ErrorHandling.h | 32 +- include/llvm/Support/ErrorOr.h | 11 +- include/llvm/Support/FileOutputBuffer.h | 6 +- include/llvm/Support/FileSystem.h | 51 +- include/llvm/Support/Format.h | 5 +- include/llvm/Support/GCOV.h | 19 +- include/llvm/Support/GenericDomTree.h | 14 +- .../llvm/Support/GenericDomTreeConstruction.h | 18 +- include/llvm/Support/GraphWriter.h | 10 +- include/llvm/Support/JamCRC.h | 48 + include/llvm/Support/MachO.h | 13 +- include/llvm/Support/ManagedStatic.h | 2 +- include/llvm/Support/MathExtras.h | 96 +- include/llvm/Support/Memory.h | 33 +- include/llvm/Support/MemoryBuffer.h | 6 +- include/llvm/Support/OnDiskHashTable.h | 216 +- include/llvm/Support/Options.h | 4 +- include/llvm/Support/OutputBuffer.h | 166 - include/llvm/Support/Path.h | 35 +- include/llvm/Support/PointerLikeTypeTraits.h | 67 +- include/llvm/Support/PrettyStackTrace.h | 12 + include/llvm/Support/Printable.h | 52 + include/llvm/Support/Program.h | 3 +- include/llvm/Support/Recycler.h | 80 +- include/llvm/Support/Registry.h | 14 +- include/llvm/Support/SMLoc.h | 6 +- include/llvm/Support/ScaledNumber.h | 4 +- include/llvm/Support/Signals.h | 3 + include/llvm/Support/StreamingMemoryObject.h | 4 +- include/llvm/Support/StringSaver.h | 16 +- include/llvm/Support/TargetParser.h | 278 +- include/llvm/Support/TargetRegistry.h | 15 +- include/llvm/Support/TargetSelect.h | 27 +- include/llvm/Support/ThreadPool.h | 136 + include/llvm/Support/Threading.h | 2 +- include/llvm/Support/Timer.h | 63 +- include/llvm/Support/TrailingObjects.h | 349 + include/llvm/Support/UnicodeCharRanges.h | 5 + include/llvm/Support/Valgrind.h | 39 - include/llvm/Support/YAMLParser.h | 7 +- include/llvm/Support/YAMLTraits.h | 49 +- include/llvm/Support/circular_raw_ostream.h | 4 +- include/llvm/Support/raw_ostream.h | 60 +- include/llvm/Support/thread.h | 66 + include/llvm/Support/type_traits.h | 9 + include/llvm/TableGen/Record.h | 10 +- include/llvm/Target/CostTable.h | 60 +- include/llvm/Target/Target.td | 38 +- include/llvm/Target/TargetCallingConv.h | 5 + include/llvm/Target/TargetFrameLowering.h | 39 +- include/llvm/Target/TargetInstrInfo.h | 225 +- include/llvm/Target/TargetItinerary.td | 16 + include/llvm/Target/TargetLowering.h | 304 +- .../llvm/Target/TargetLoweringObjectFile.h | 16 +- include/llvm/Target/TargetMachine.h | 42 +- include/llvm/Target/TargetOpcodes.h | 6 +- include/llvm/Target/TargetOptions.h | 64 +- include/llvm/Target/TargetRecip.h | 14 +- include/llvm/Target/TargetRegisterInfo.h | 439 +- include/llvm/Target/TargetSelectionDAG.td | 61 +- include/llvm/Target/TargetSelectionDAGInfo.h | 45 +- include/llvm/Target/TargetSubtargetInfo.h | 6 + include/llvm/Transforms/IPO.h | 15 + .../llvm/Transforms/IPO/ForceFunctionAttrs.h | 35 + include/llvm/Transforms/IPO/FunctionImport.h | 43 + .../llvm/Transforms/IPO/InferFunctionAttrs.h | 38 + include/llvm/Transforms/IPO/InlinerPass.h | 13 +- include/llvm/Transforms/IPO/LowerBitSets.h | 7 +- .../llvm/Transforms/IPO/PassManagerBuilder.h | 10 + .../llvm/Transforms/IPO/StripDeadPrototypes.h | 34 + .../InstCombine/InstCombineWorklist.h | 12 +- include/llvm/Transforms/Instrumentation.h | 42 +- include/llvm/Transforms/Scalar.h | 18 +- include/llvm/Transforms/Scalar/ADCE.h | 38 + include/llvm/Transforms/Scalar/SROA.h | 129 + .../llvm/Transforms/Utils/BasicBlockUtils.h | 40 +- include/llvm/Transforms/Utils/Cloning.h | 32 +- include/llvm/Transforms/Utils/Local.h | 49 +- include/llvm/Transforms/Utils/LoopUtils.h | 161 +- .../llvm/Transforms/Utils/LoopVersioning.h | 56 +- include/llvm/Transforms/Utils/ModuleUtils.h | 4 +- .../llvm/Transforms/Utils/SSAUpdaterImpl.h | 2 +- .../llvm/Transforms/Utils/SimplifyIndVar.h | 11 +- .../llvm/Transforms/Utils/SimplifyLibCalls.h | 3 + include/llvm/Transforms/Utils/SplitModule.h | 43 + include/llvm/Transforms/Utils/UnrollLoop.h | 9 +- include/llvm/Transforms/Utils/ValueMapper.h | 48 +- include/llvm/module.modulemap | 12 +- lib/Analysis/AliasAnalysis.cpp | 601 +- lib/Analysis/AliasAnalysisCounter.cpp | 173 - lib/Analysis/AliasAnalysisEvaluator.cpp | 54 +- lib/Analysis/AliasDebugger.cpp | 136 - lib/Analysis/AliasSetTracker.cpp | 44 +- lib/Analysis/Analysis.cpp | 32 +- lib/Analysis/BasicAliasAnalysis.cpp | 1094 +- lib/Analysis/BlockFrequencyInfo.cpp | 93 +- lib/Analysis/BlockFrequencyInfoImpl.cpp | 12 +- lib/Analysis/BranchProbabilityInfo.cpp | 397 +- lib/Analysis/CFG.cpp | 8 +- lib/Analysis/CFLAliasAnalysis.cpp | 253 +- lib/Analysis/CMakeLists.txt | 18 +- lib/Analysis/{IPA => }/CallGraph.cpp | 42 +- lib/Analysis/{IPA => }/CallGraphSCCPass.cpp | 0 lib/Analysis/{IPA => }/CallPrinter.cpp | 0 lib/Analysis/CaptureTracking.cpp | 98 +- lib/Analysis/CodeMetrics.cpp | 15 +- lib/Analysis/ConstantFolding.cpp | 136 +- lib/Analysis/CostModel.cpp | 19 +- lib/Analysis/Delinearization.cpp | 14 +- lib/Analysis/DemandedBits.cpp | 392 + lib/Analysis/DependenceAnalysis.cpp | 182 +- lib/Analysis/DivergenceAnalysis.cpp | 115 +- lib/Analysis/EHPersonalities.cpp | 106 + lib/Analysis/GlobalsModRef.cpp | 1002 + lib/Analysis/IPA/CMakeLists.txt | 10 - lib/Analysis/IPA/GlobalsModRef.cpp | 609 - lib/Analysis/IPA/IPA.cpp | 30 - lib/Analysis/IVUsers.cpp | 8 +- lib/Analysis/{IPA => }/InlineCost.cpp | 102 +- lib/Analysis/InstructionSimplify.cpp | 81 +- lib/Analysis/LLVMBuild.txt | 3 - lib/Analysis/LazyCallGraph.cpp | 8 +- lib/Analysis/LazyValueInfo.cpp | 341 +- lib/Analysis/LibCallAliasAnalysis.cpp | 141 - lib/Analysis/LibCallSemantics.cpp | 89 - lib/Analysis/Lint.cpp | 303 +- lib/Analysis/Loads.cpp | 24 +- lib/Analysis/LoopAccessAnalysis.cpp | 541 +- lib/Analysis/LoopInfo.cpp | 69 +- lib/Analysis/LoopPass.cpp | 141 +- lib/Analysis/Makefile | 1 - lib/Analysis/MemDepPrinter.cpp | 6 +- lib/Analysis/MemDerefPrinter.cpp | 18 +- lib/Analysis/MemoryBuiltins.cpp | 55 +- lib/Analysis/MemoryDependenceAnalysis.cpp | 174 +- lib/Analysis/NoAliasAnalysis.cpp | 95 - .../ObjCARCAliasAnalysis.cpp | 130 +- lib/Analysis/ObjCARCAnalysisUtils.cpp | 28 + .../ObjCARCInstKind.cpp} | 8 +- lib/Analysis/OrderedBasicBlock.cpp | 85 + lib/Analysis/RegionInfo.cpp | 9 + lib/Analysis/RegionPrinter.cpp | 153 +- lib/Analysis/ScalarEvolution.cpp | 3267 +++- lib/Analysis/ScalarEvolutionAliasAnalysis.cpp | 178 +- lib/Analysis/ScalarEvolutionExpander.cpp | 361 +- lib/Analysis/ScalarEvolutionNormalization.cpp | 2 +- lib/Analysis/ScopedNoAliasAA.cpp | 204 +- lib/Analysis/SparsePropagation.cpp | 14 +- lib/Analysis/TargetLibraryInfo.cpp | 42 +- lib/Analysis/TargetTransformInfo.cpp | 236 +- lib/Analysis/TypeBasedAliasAnalysis.cpp | 638 +- lib/Analysis/ValueTracking.cpp | 1162 +- lib/Analysis/VectorUtils.cpp | 199 +- lib/AsmParser/LLLexer.cpp | 21 +- lib/AsmParser/LLParser.cpp | 732 +- lib/AsmParser/LLParser.h | 57 +- lib/AsmParser/LLToken.h | 16 +- lib/AsmParser/Parser.cpp | 12 + lib/Bitcode/Reader/BitReader.cpp | 105 +- lib/Bitcode/Reader/BitcodeReader.cpp | 1927 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 741 +- lib/Bitcode/Writer/BitcodeWriterPass.cpp | 18 +- lib/Bitcode/Writer/ValueEnumerator.cpp | 96 +- lib/Bitcode/Writer/ValueEnumerator.h | 7 +- lib/CodeGen/AggressiveAntiDepBreaker.cpp | 61 +- lib/CodeGen/AllocationOrder.cpp | 5 +- lib/CodeGen/AllocationOrder.h | 4 +- lib/CodeGen/Analysis.cpp | 98 +- lib/CodeGen/AsmPrinter/ARMException.cpp | 5 +- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 459 +- lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 42 +- lib/CodeGen/AsmPrinter/AsmPrinterHandler.h | 6 + .../AsmPrinter/AsmPrinterInlineAsm.cpp | 14 +- lib/CodeGen/AsmPrinter/ByteStreamer.h | 13 +- lib/CodeGen/AsmPrinter/DIE.cpp | 124 +- lib/CodeGen/AsmPrinter/DIEHash.cpp | 32 - lib/CodeGen/AsmPrinter/DIEHash.h | 3 - lib/CodeGen/AsmPrinter/DebugLocEntry.h | 3 +- lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 2 +- lib/CodeGen/AsmPrinter/DwarfCFIException.cpp | 10 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 59 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 10 + lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 109 +- lib/CodeGen/AsmPrinter/DwarfDebug.h | 84 +- lib/CodeGen/AsmPrinter/DwarfExpression.cpp | 15 +- lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 98 +- lib/CodeGen/AsmPrinter/DwarfUnit.h | 30 +- lib/CodeGen/AsmPrinter/EHStreamer.cpp | 5 +- lib/CodeGen/AsmPrinter/EHStreamer.h | 8 +- lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp | 2 +- lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp | 2 +- .../AsmPrinter/WinCodeViewLineTables.cpp | 13 +- .../AsmPrinter/WinCodeViewLineTables.h | 10 +- lib/CodeGen/AsmPrinter/WinException.cpp | 1212 +- lib/CodeGen/AsmPrinter/WinException.h | 31 +- lib/CodeGen/AtomicExpandPass.cpp | 348 +- lib/CodeGen/BasicTargetTransformInfo.cpp | 2 +- lib/CodeGen/BranchFolding.cpp | 284 +- lib/CodeGen/BranchFolding.h | 1 + lib/CodeGen/CMakeLists.txt | 10 + lib/CodeGen/CalcSpillWeights.cpp | 39 +- lib/CodeGen/CallingConvLower.cpp | 3 + lib/CodeGen/CodeGen.cpp | 2 + lib/CodeGen/CodeGenPrepare.cpp | 1325 +- lib/CodeGen/CoreCLRGC.cpp | 4 +- lib/CodeGen/CriticalAntiDepBreaker.cpp | 7 +- lib/CodeGen/DFAPacketizer.cpp | 87 +- lib/CodeGen/DeadMachineInstructionElim.cpp | 20 +- lib/CodeGen/DwarfEHPrepare.cpp | 6 +- lib/CodeGen/EarlyIfConversion.cpp | 6 +- lib/CodeGen/ExecutionDepsFix.cpp | 36 +- lib/CodeGen/ExpandISelPseudos.cpp | 4 +- lib/CodeGen/FuncletLayout.cpp | 55 + lib/CodeGen/GCRootLowering.cpp | 6 +- lib/CodeGen/GlobalMerge.cpp | 138 +- lib/CodeGen/IfConversion.cpp | 189 +- lib/CodeGen/ImplicitNullChecks.cpp | 201 +- lib/CodeGen/InlineSpiller.cpp | 76 +- lib/CodeGen/InterferenceCache.cpp | 3 +- lib/CodeGen/InterleavedAccessPass.cpp | 4 +- lib/CodeGen/IntrinsicLowering.cpp | 65 +- lib/CodeGen/LLVMBuild.txt | 2 +- lib/CodeGen/LLVMTargetMachine.cpp | 7 +- lib/CodeGen/LiveDebugValues.cpp | 405 + lib/CodeGen/LiveDebugVariables.cpp | 105 +- lib/CodeGen/LiveDebugVariables.h | 1 - lib/CodeGen/LiveInterval.cpp | 117 +- lib/CodeGen/LiveIntervalAnalysis.cpp | 129 +- lib/CodeGen/LivePhysRegs.cpp | 18 +- lib/CodeGen/LiveRangeCalc.cpp | 15 +- lib/CodeGen/LiveRangeCalc.h | 2 +- lib/CodeGen/LiveRangeEdit.cpp | 37 +- lib/CodeGen/LiveRegMatrix.cpp | 7 +- lib/CodeGen/LiveVariables.cpp | 27 +- lib/CodeGen/LocalStackSlotAllocation.cpp | 2 +- lib/CodeGen/MIRParser/LLVMBuild.txt | 2 +- lib/CodeGen/MIRParser/MILexer.cpp | 451 +- lib/CodeGen/MIRParser/MILexer.h | 102 +- lib/CodeGen/MIRParser/MIParser.cpp | 1599 +- lib/CodeGen/MIRParser/MIParser.h | 54 +- lib/CodeGen/MIRParser/MIRParser.cpp | 432 +- lib/CodeGen/MIRPrinter.cpp | 771 +- lib/CodeGen/MIRPrintingPass.cpp | 4 +- lib/CodeGen/MachineBasicBlock.cpp | 507 +- lib/CodeGen/MachineBlockFrequencyInfo.cpp | 4 +- lib/CodeGen/MachineBlockPlacement.cpp | 364 +- lib/CodeGen/MachineBranchProbabilityInfo.cpp | 75 +- lib/CodeGen/MachineCSE.cpp | 6 +- lib/CodeGen/MachineCombiner.cpp | 202 +- lib/CodeGen/MachineFunction.cpp | 88 +- lib/CodeGen/MachineFunctionPass.cpp | 10 +- lib/CodeGen/MachineInstr.cpp | 89 +- lib/CodeGen/MachineInstrBundle.cpp | 43 +- lib/CodeGen/MachineLICM.cpp | 259 +- lib/CodeGen/MachineLoopInfo.cpp | 13 +- lib/CodeGen/MachineModuleInfo.cpp | 86 +- lib/CodeGen/MachineRegisterInfo.cpp | 32 +- lib/CodeGen/MachineScheduler.cpp | 232 +- lib/CodeGen/MachineSink.cpp | 11 +- lib/CodeGen/MachineTraceMetrics.cpp | 10 +- lib/CodeGen/MachineVerifier.cpp | 326 +- lib/CodeGen/PHIElimination.cpp | 2 +- lib/CodeGen/PHIEliminationUtils.cpp | 2 +- lib/CodeGen/ParallelCG.cpp | 96 + lib/CodeGen/Passes.cpp | 144 +- lib/CodeGen/PeepholeOptimizer.cpp | 1011 +- lib/CodeGen/PostRASchedulerList.cpp | 37 +- lib/CodeGen/ProcessImplicitDefs.cpp | 6 +- lib/CodeGen/PrologEpilogInserter.cpp | 149 +- lib/CodeGen/PseudoSourceValue.cpp | 140 +- lib/CodeGen/RegAllocBasic.cpp | 8 +- lib/CodeGen/RegAllocFast.cpp | 21 +- lib/CodeGen/RegAllocGreedy.cpp | 53 +- lib/CodeGen/RegAllocPBQP.cpp | 37 +- lib/CodeGen/RegisterCoalescer.cpp | 256 +- lib/CodeGen/RegisterPressure.cpp | 373 +- lib/CodeGen/RegisterScavenging.cpp | 14 +- lib/CodeGen/ScheduleDAG.cpp | 1 - lib/CodeGen/ScheduleDAGInstrs.cpp | 313 +- lib/CodeGen/ScheduleDAGPrinter.cpp | 9 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2674 ++- lib/CodeGen/SelectionDAG/FastISel.cpp | 142 +- .../SelectionDAG/FunctionLoweringInfo.cpp | 192 +- lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 2 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 1283 +- .../SelectionDAG/LegalizeFloatTypes.cpp | 274 +- .../SelectionDAG/LegalizeIntegerTypes.cpp | 304 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 124 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 110 +- .../SelectionDAG/LegalizeTypesGeneric.cpp | 14 +- .../SelectionDAG/LegalizeVectorOps.cpp | 58 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 277 +- .../SelectionDAG/ResourcePriorityQueue.cpp | 6 +- lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 4 +- .../SelectionDAG/ScheduleDAGRRList.cpp | 34 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 6 - lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 530 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 1052 +- .../SelectionDAG/SelectionDAGBuilder.h | 109 +- .../SelectionDAG/SelectionDAGDumper.cpp | 133 +- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 263 +- .../SelectionDAG/SelectionDAGPrinter.cpp | 13 +- .../SelectionDAG/StatepointLowering.cpp | 137 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 267 +- lib/CodeGen/ShadowStackGCLowering.cpp | 10 +- lib/CodeGen/ShrinkWrap.cpp | 175 +- lib/CodeGen/SjLjEHPrepare.cpp | 44 +- lib/CodeGen/SlotIndexes.cpp | 4 +- lib/CodeGen/SpillPlacement.cpp | 7 +- lib/CodeGen/SplitKit.cpp | 49 +- lib/CodeGen/StackMaps.cpp | 4 +- lib/CodeGen/StackProtector.cpp | 20 +- lib/CodeGen/StackSlotColoring.cpp | 2 +- lib/CodeGen/StatepointExampleGC.cpp | 4 +- lib/CodeGen/TailDuplication.cpp | 115 +- lib/CodeGen/TargetFrameLoweringImpl.cpp | 30 +- lib/CodeGen/TargetInstrInfo.cpp | 310 +- lib/CodeGen/TargetLoweringBase.cpp | 148 +- lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 80 +- lib/CodeGen/TargetRegisterInfo.cpp | 185 +- lib/CodeGen/TargetSchedule.cpp | 8 +- lib/CodeGen/TwoAddressInstructionPass.cpp | 302 +- lib/CodeGen/UnreachableBlockElim.cpp | 8 +- lib/CodeGen/VirtRegMap.cpp | 166 +- lib/CodeGen/WinEHPrepare.cpp | 3748 +--- lib/DebugInfo/CMakeLists.txt | 4 +- lib/DebugInfo/CodeView/CMakeLists.txt | 12 + .../CodeView/FieldListRecordBuilder.cpp | 165 + .../IPA => DebugInfo/CodeView}/LLVMBuild.txt | 9 +- lib/DebugInfo/CodeView/Line.cpp | 22 + lib/DebugInfo/CodeView/ListRecordBuilder.cpp | 31 + lib/DebugInfo/CodeView/Makefile | 14 + .../CodeView/MemoryTypeTableBuilder.cpp | 35 + .../CodeView/MethodListRecordBuilder.cpp | 49 + lib/DebugInfo/CodeView/TypeRecordBuilder.cpp | 113 + lib/DebugInfo/CodeView/TypeTableBuilder.cpp | 217 + lib/DebugInfo/DWARF/CMakeLists.txt | 2 + lib/DebugInfo/DWARF/DWARFContext.cpp | 93 +- lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp | 2 +- lib/DebugInfo/DWARF/DWARFDebugMacro.cpp | 103 + lib/DebugInfo/DWARF/DWARFFormValue.cpp | 18 +- lib/DebugInfo/DWARF/DWARFUnit.cpp | 44 +- lib/DebugInfo/DWARF/DWARFUnitIndex.cpp | 168 + lib/DebugInfo/DWARF/SyntaxHighlighting.cpp | 1 + lib/DebugInfo/DWARF/SyntaxHighlighting.h | 2 +- lib/DebugInfo/LLVMBuild.txt | 2 +- lib/DebugInfo/Makefile | 4 +- lib/DebugInfo/PDB/PDB.cpp | 2 +- lib/DebugInfo/PDB/PDBContext.cpp | 21 +- lib/DebugInfo/Symbolize/CMakeLists.txt | 8 + lib/DebugInfo/Symbolize/DIPrinter.cpp | 69 + lib/DebugInfo/Symbolize/LLVMBuild.txt | 22 + .../IPA => DebugInfo/Symbolize}/Makefile | 6 +- .../Symbolize/SymbolizableObjectFile.cpp | 254 + .../Symbolize/SymbolizableObjectFile.h | 82 + lib/DebugInfo/Symbolize/Symbolize.cpp | 456 + lib/ExecutionEngine/ExecutionEngine.cpp | 69 +- .../ExecutionEngineBindings.cpp | 46 +- lib/ExecutionEngine/Interpreter/Execution.cpp | 76 +- .../Interpreter/ExternalFunctions.cpp | 10 +- .../Interpreter/Interpreter.cpp | 7 +- lib/ExecutionEngine/Interpreter/Interpreter.h | 2 - lib/ExecutionEngine/MCJIT/MCJIT.cpp | 23 +- lib/ExecutionEngine/MCJIT/MCJIT.h | 17 +- lib/ExecutionEngine/Orc/CMakeLists.txt | 2 + lib/ExecutionEngine/Orc/IndirectionUtils.cpp | 25 +- lib/ExecutionEngine/Orc/OrcCBindings.cpp | 97 + lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp | 43 + lib/ExecutionEngine/Orc/OrcCBindingsStack.h | 282 + lib/ExecutionEngine/Orc/OrcMCJITReplacement.h | 44 +- lib/ExecutionEngine/Orc/OrcTargetSupport.cpp | 271 +- .../RuntimeDyld/RuntimeDyld.cpp | 163 +- .../RuntimeDyld/RuntimeDyldCOFF.cpp | 15 +- .../RuntimeDyld/RuntimeDyldChecker.cpp | 25 +- .../RuntimeDyld/RuntimeDyldELF.cpp | 460 +- .../RuntimeDyld/RuntimeDyldELF.h | 13 + .../RuntimeDyld/RuntimeDyldImpl.h | 69 +- .../RuntimeDyld/RuntimeDyldMachO.cpp | 80 +- .../RuntimeDyld/RuntimeDyldMachO.h | 8 +- .../RuntimeDyld/Targets/RuntimeDyldCOFFI386.h | 201 + .../Targets/RuntimeDyldCOFFX86_64.h | 32 +- .../Targets/RuntimeDyldMachOAArch64.h | 23 +- .../RuntimeDyld/Targets/RuntimeDyldMachOARM.h | 34 +- .../Targets/RuntimeDyldMachOI386.h | 48 +- .../Targets/RuntimeDyldMachOX86_64.h | 69 +- lib/ExecutionEngine/SectionMemoryManager.cpp | 103 +- lib/Fuzzer/CMakeLists.txt | 6 + lib/Fuzzer/FuzzerCrossOver.cpp | 10 +- lib/Fuzzer/FuzzerDFSan.h | 61 + lib/Fuzzer/FuzzerDriver.cpp | 168 +- lib/Fuzzer/FuzzerFlags.def | 46 +- lib/Fuzzer/FuzzerIO.cpp | 27 +- lib/Fuzzer/FuzzerInterface.cpp | 19 +- lib/Fuzzer/FuzzerInterface.h | 117 +- lib/Fuzzer/FuzzerInternal.h | 93 +- lib/Fuzzer/FuzzerLoop.cpp | 464 +- lib/Fuzzer/FuzzerMain.cpp | 2 +- lib/Fuzzer/FuzzerMutate.cpp | 208 +- lib/Fuzzer/FuzzerTraceState.cpp | 220 +- lib/Fuzzer/FuzzerUtil.cpp | 129 +- lib/Fuzzer/cxx.dict | 122 + lib/Fuzzer/cxx_fuzzer_tokens.txt | 218 - lib/Fuzzer/test/CMakeLists.txt | 36 +- lib/Fuzzer/test/CallerCalleeTest.cpp | 56 + lib/Fuzzer/test/CounterTest.cpp | 3 +- lib/Fuzzer/test/CxxTokensTest.cpp | 24 - lib/Fuzzer/test/DFSanMemcmpTest.cpp | 12 - .../test/FourIndependentBranchesTest.cpp | 3 +- lib/Fuzzer/test/FullCoverageSetTest.cpp | 3 +- lib/Fuzzer/test/FuzzerUnittest.cpp | 308 +- lib/Fuzzer/test/InfiniteTest.cpp | 24 - lib/Fuzzer/test/MemcmpTest.cpp | 20 + lib/Fuzzer/test/NullDerefTest.cpp | 3 +- ...SanSimpleCmpTest.cpp => SimpleCmpTest.cpp} | 5 +- lib/Fuzzer/test/SimpleDictionaryTest.cpp | 26 + lib/Fuzzer/test/SimpleHashTest.cpp | 37 + lib/Fuzzer/test/SimpleTest.cpp | 5 +- lib/Fuzzer/test/StrcmpTest.cpp | 29 + lib/Fuzzer/test/StrncmpTest.cpp | 25 + lib/Fuzzer/test/SwitchTest.cpp | 55 + lib/Fuzzer/test/TimeoutTest.cpp | 3 +- lib/Fuzzer/test/UninstrumentedTest.cpp | 8 + lib/Fuzzer/test/UserSuppliedFuzzerTest.cpp | 16 +- lib/Fuzzer/test/dict1.txt | 4 + lib/Fuzzer/test/fuzzer-dfsan.test | 22 + lib/Fuzzer/test/fuzzer-drill.test | 8 + lib/Fuzzer/test/fuzzer-timeout.test | 13 + lib/Fuzzer/test/fuzzer-traces.test | 19 + lib/Fuzzer/test/fuzzer.test | 40 +- lib/Fuzzer/test/hi.txt | 1 + lib/Fuzzer/test/lit.cfg | 5 +- lib/Fuzzer/test/merge.test | 29 + lib/Fuzzer/test/trace-bb/CMakeLists.txt | 14 + lib/Fuzzer/test/uninstrumented/CMakeLists.txt | 14 + lib/IR/AsmWriter.cpp | 407 +- lib/IR/AttributeImpl.h | 37 +- lib/IR/Attributes.cpp | 213 +- lib/IR/AttributesCompatFunc.td | 1 + lib/IR/AutoUpgrade.cpp | 103 +- lib/IR/BasicBlock.cpp | 42 +- lib/IR/CMakeLists.txt | 14 +- lib/IR/ConstantFold.cpp | 35 +- lib/IR/ConstantRange.cpp | 53 + lib/IR/Constants.cpp | 326 +- lib/IR/ConstantsContext.h | 47 +- lib/IR/Core.cpp | 113 +- lib/IR/DIBuilder.cpp | 120 +- lib/IR/DataLayout.cpp | 13 +- lib/IR/DebugInfo.cpp | 55 +- lib/IR/DebugInfoMetadata.cpp | 90 +- lib/IR/DiagnosticInfo.cpp | 41 +- lib/IR/Dominators.cpp | 32 +- lib/IR/Function.cpp | 211 +- lib/IR/FunctionInfo.cpp | 67 + lib/IR/GCOV.cpp | 4 +- lib/IR/Globals.cpp | 78 +- lib/IR/IRBuilder.cpp | 115 +- lib/IR/InlineAsm.cpp | 27 +- lib/IR/Instruction.cpp | 40 +- lib/IR/Instructions.cpp | 418 +- lib/IR/LLVMContext.cpp | 48 +- lib/IR/LLVMContextImpl.cpp | 24 +- lib/IR/LLVMContextImpl.h | 170 +- lib/IR/LegacyPassManager.cpp | 159 +- lib/IR/MDBuilder.cpp | 48 +- lib/IR/Makefile | 30 +- lib/IR/Metadata.cpp | 89 +- lib/IR/MetadataImpl.h | 13 + lib/IR/MetadataTracking.cpp | 55 - lib/IR/Module.cpp | 60 +- lib/IR/Statepoint.cpp | 5 +- lib/IR/SymbolTableListTraitsImpl.h | 50 +- lib/IR/Type.cpp | 154 +- lib/IR/TypeFinder.cpp | 14 +- lib/IR/User.cpp | 64 +- lib/IR/Value.cpp | 13 +- lib/IR/ValueSymbolTable.cpp | 50 +- lib/IR/ValueTypes.cpp | 27 + lib/IR/Verifier.cpp | 682 +- lib/IRReader/IRReader.cpp | 12 +- lib/LTO/LLVMBuild.txt | 1 - lib/LTO/LTOCodeGenerator.cpp | 505 +- lib/LTO/LTOModule.cpp | 155 +- lib/LibDriver/LibDriver.cpp | 11 +- lib/LibDriver/Options.td | 2 + lib/Linker/CMakeLists.txt | 1 + lib/Linker/IRMover.cpp | 1657 ++ lib/Linker/LinkDiagnosticInfo.h | 25 + lib/Linker/LinkModules.cpp | 1906 +- lib/MC/CMakeLists.txt | 1 + lib/MC/ConstantPools.cpp | 10 +- lib/MC/ELFObjectWriter.cpp | 144 +- lib/MC/MCAsmBackend.cpp | 4 + lib/MC/MCAsmInfo.cpp | 6 + lib/MC/MCAsmInfoCOFF.cpp | 3 +- lib/MC/MCAsmInfoDarwin.cpp | 5 - lib/MC/MCAsmStreamer.cpp | 95 +- lib/MC/MCAssembler.cpp | 527 +- lib/MC/MCContext.cpp | 56 +- lib/MC/MCDisassembler/Disassembler.cpp | 4 - lib/MC/MCDwarf.cpp | 393 +- lib/MC/MCELFObjectTargetWriter.cpp | 16 - lib/MC/MCELFStreamer.cpp | 60 +- lib/MC/MCExpr.cpp | 74 +- lib/MC/MCFragment.cpp | 458 + lib/MC/MCInst.cpp | 2 + lib/MC/MCInstrDesc.cpp | 2 +- lib/MC/MCMachOStreamer.cpp | 47 +- lib/MC/MCObjectFileInfo.cpp | 230 +- lib/MC/MCObjectStreamer.cpp | 96 +- lib/MC/MCObjectWriter.cpp | 10 +- lib/MC/MCParser/AsmLexer.cpp | 16 +- lib/MC/MCParser/AsmParser.cpp | 270 +- lib/MC/MCParser/COFFAsmParser.cpp | 11 +- lib/MC/MCParser/DarwinAsmParser.cpp | 78 +- lib/MC/MCParser/ELFAsmParser.cpp | 22 +- lib/MC/MCParser/MCAsmLexer.cpp | 4 +- lib/MC/MCParser/MCTargetAsmParser.cpp | 17 +- lib/MC/MCSection.cpp | 4 +- lib/MC/MCSectionCOFF.cpp | 1 + lib/MC/MCSectionELF.cpp | 9 +- lib/MC/MCSectionMachO.cpp | 4 +- lib/MC/MCStreamer.cpp | 31 +- lib/MC/MCSubtargetInfo.cpp | 17 +- lib/MC/MCSymbol.cpp | 7 +- lib/MC/MCTargetOptions.cpp | 7 +- lib/MC/MCWinEH.cpp | 8 +- lib/MC/MachObjectWriter.cpp | 135 +- lib/MC/StringTableBuilder.cpp | 118 +- lib/MC/SubtargetFeature.cpp | 2 +- lib/MC/WinCOFFObjectWriter.cpp | 193 +- lib/MC/WinCOFFStreamer.cpp | 41 +- lib/Object/Archive.cpp | 210 +- lib/Object/ArchiveWriter.cpp | 117 +- lib/Object/CMakeLists.txt | 1 + lib/Object/COFFObjectFile.cpp | 56 +- lib/Object/COFFYAML.cpp | 2 + lib/Object/ELF.cpp | 1 + lib/Object/ELFYAML.cpp | 33 + lib/Object/Error.cpp | 2 + lib/Object/FunctionIndexObjectFile.cpp | 143 + lib/Object/IRObjectFile.cpp | 8 +- lib/Object/MachOObjectFile.cpp | 175 +- lib/Object/MachOUniversal.cpp | 16 +- lib/Object/Object.cpp | 4 +- lib/Object/ObjectFile.cpp | 6 +- lib/Object/SymbolicFile.cpp | 5 +- lib/Option/Arg.cpp | 21 +- lib/Option/ArgList.cpp | 25 + lib/Option/OptTable.cpp | 12 +- lib/Option/Option.cpp | 31 +- lib/Passes/LLVMBuild.txt | 2 +- lib/Passes/PassBuilder.cpp | 6 + lib/Passes/PassRegistry.def | 7 + lib/ProfileData/CoverageMapping.cpp | 18 +- lib/ProfileData/CoverageMappingReader.cpp | 101 +- lib/ProfileData/InstrProf.cpp | 434 +- lib/ProfileData/InstrProfIndexed.h | 56 - lib/ProfileData/InstrProfReader.cpp | 454 +- lib/ProfileData/InstrProfWriter.cpp | 175 +- lib/ProfileData/SampleProf.cpp | 106 +- lib/ProfileData/SampleProfReader.cpp | 762 +- lib/ProfileData/SampleProfWriter.cpp | 180 +- lib/Support/APFloat.cpp | 46 +- lib/Support/BlockFrequency.cpp | 36 +- lib/Support/BranchProbability.cpp | 49 +- lib/Support/CMakeLists.txt | 5 +- lib/Support/CommandLine.cpp | 60 +- lib/Support/CrashRecoveryContext.cpp | 26 +- lib/Support/Dwarf.cpp | 37 + lib/Support/ErrorHandling.cpp | 2 +- lib/Support/FileOutputBuffer.cpp | 18 +- lib/Support/FoldingSet.cpp | 20 + lib/Support/GraphWriter.cpp | 74 +- lib/Support/Host.cpp | 43 +- lib/Support/JamCRC.cpp | 96 + lib/Support/Locale.cpp | 1 + lib/Support/ManagedStatic.cpp | 1 + lib/Support/MemoryBuffer.cpp | 5 +- lib/Support/Path.cpp | 110 +- lib/Support/PrettyStackTrace.cpp | 16 +- lib/Support/Signals.cpp | 140 +- lib/Support/Statistic.cpp | 18 +- lib/Support/StringRef.cpp | 101 +- lib/Support/StringSaver.cpp | 2 +- lib/Support/TargetParser.cpp | 506 +- lib/Support/ThreadPool.cpp | 155 + lib/Support/TimeValue.cpp | 6 +- lib/Support/Timer.cpp | 74 +- lib/Support/Triple.cpp | 217 +- lib/Support/Unix/Memory.inc | 15 +- lib/Support/Unix/Path.inc | 120 +- lib/Support/Unix/Process.inc | 13 +- lib/Support/Unix/Program.inc | 7 - lib/Support/Unix/Signals.inc | 151 +- lib/Support/Unix/Unix.h | 13 +- lib/Support/Valgrind.cpp | 21 +- lib/Support/Windows/COM.inc | 2 +- lib/Support/Windows/DynamicLibrary.inc | 4 +- lib/Support/Windows/Memory.inc | 4 +- lib/Support/Windows/Path.inc | 109 +- lib/Support/Windows/Process.inc | 11 +- lib/Support/Windows/Program.inc | 25 +- lib/Support/Windows/Signals.inc | 131 +- lib/Support/Windows/WindowsSupport.h | 18 +- lib/Support/YAMLParser.cpp | 18 +- lib/Support/YAMLTraits.cpp | 16 +- lib/Support/raw_ostream.cpp | 83 +- lib/TableGen/Record.cpp | 10 +- lib/TableGen/SetTheory.cpp | 2 +- lib/TableGen/TGParser.cpp | 30 +- lib/TableGen/TGParser.h | 16 +- lib/Target/AArch64/AArch64.td | 43 +- lib/Target/AArch64/AArch64A53Fix835769.cpp | 15 +- .../AArch64/AArch64A57FPLoadBalancing.cpp | 1 - .../AArch64/AArch64AddressTypePromotion.cpp | 8 +- .../AArch64/AArch64AdvSIMDScalarPass.cpp | 17 +- .../AArch64/AArch64BranchRelaxation.cpp | 27 +- lib/Target/AArch64/AArch64CallingConvention.h | 38 +- .../AArch64/AArch64CallingConvention.td | 19 +- .../AArch64CleanupLocalDynamicTLSPass.cpp | 8 +- lib/Target/AArch64/AArch64CollectLOH.cpp | 32 +- .../AArch64/AArch64ConditionOptimizer.cpp | 22 +- .../AArch64/AArch64ConditionalCompares.cpp | 20 +- .../AArch64DeadRegisterDefinitionsPass.cpp | 16 +- .../AArch64/AArch64ExpandPseudoInsts.cpp | 15 +- lib/Target/AArch64/AArch64FastISel.cpp | 221 +- lib/Target/AArch64/AArch64FrameLowering.cpp | 93 +- lib/Target/AArch64/AArch64FrameLowering.h | 6 +- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 246 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 1446 +- lib/Target/AArch64/AArch64ISelLowering.h | 110 +- lib/Target/AArch64/AArch64InstrFormats.td | 1076 +- lib/Target/AArch64/AArch64InstrInfo.cpp | 225 +- lib/Target/AArch64/AArch64InstrInfo.h | 12 +- lib/Target/AArch64/AArch64InstrInfo.td | 483 +- .../AArch64/AArch64LoadStoreOptimizer.cpp | 1310 +- lib/Target/AArch64/AArch64MCInstLower.cpp | 4 +- .../AArch64/AArch64MachineCombinerPattern.h | 42 - .../AArch64/AArch64MachineFunctionInfo.h | 17 +- lib/Target/AArch64/AArch64PromoteConstant.cpp | 4 +- lib/Target/AArch64/AArch64RegisterInfo.cpp | 56 +- lib/Target/AArch64/AArch64RegisterInfo.h | 5 +- lib/Target/AArch64/AArch64RegisterInfo.td | 2 +- lib/Target/AArch64/AArch64Subtarget.cpp | 31 +- lib/Target/AArch64/AArch64Subtarget.h | 27 +- lib/Target/AArch64/AArch64TargetMachine.cpp | 2 +- .../AArch64/AArch64TargetTransformInfo.cpp | 150 +- .../AArch64/AArch64TargetTransformInfo.h | 39 +- .../AArch64/AsmParser/AArch64AsmParser.cpp | 201 +- .../Disassembler/AArch64Disassembler.cpp | 4 + .../InstPrinter/AArch64InstPrinter.cpp | 46 +- .../AArch64/InstPrinter/AArch64InstPrinter.h | 10 +- .../MCTargetDesc/AArch64AddressingModes.h | 26 + .../MCTargetDesc/AArch64ELFStreamer.cpp | 5 +- .../AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp | 4 - .../AArch64/MCTargetDesc/AArch64MCExpr.cpp | 6 +- .../AArch64/MCTargetDesc/AArch64MCExpr.h | 8 +- .../MCTargetDesc/AArch64MachObjectWriter.cpp | 88 +- .../MCTargetDesc/AArch64TargetStreamer.cpp | 5 +- .../MCTargetDesc/AArch64TargetStreamer.h | 2 +- lib/Target/AArch64/Utils/AArch64BaseInfo.cpp | 33 +- lib/Target/AArch64/Utils/AArch64BaseInfo.h | 41 +- lib/Target/AMDGPU/AMDGPU.h | 16 +- lib/Target/AMDGPU/AMDGPU.td | 10 + .../AMDGPU/AMDGPUAnnotateKernelFeatures.cpp | 126 + .../AMDGPU/AMDGPUAnnotateUniformValues.cpp | 84 + lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 200 +- lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 4 +- .../AMDGPUDiagnosticInfoUnsupported.cpp | 26 + .../AMDGPU/AMDGPUDiagnosticInfoUnsupported.h | 48 + lib/Target/AMDGPU/AMDGPUFrameLowering.cpp | 10 +- lib/Target/AMDGPU/AMDGPUFrameLowering.h | 11 +- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 477 +- lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 195 +- lib/Target/AMDGPU/AMDGPUISelLowering.h | 15 +- lib/Target/AMDGPU/AMDGPUInstrInfo.cpp | 20 +- lib/Target/AMDGPU/AMDGPUInstrInfo.h | 6 +- lib/Target/AMDGPU/AMDGPUInstrInfo.td | 2 - lib/Target/AMDGPU/AMDGPUInstructions.td | 4 +- lib/Target/AMDGPU/AMDGPUIntrinsics.td | 4 +- lib/Target/AMDGPU/AMDGPUMCInstLower.cpp | 18 +- lib/Target/AMDGPU/AMDGPUMachineFunction.cpp | 11 +- lib/Target/AMDGPU/AMDGPUMachineFunction.h | 5 + .../AMDGPUOpenCLImageTypeLoweringPass.cpp | 373 + lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 11 +- lib/Target/AMDGPU/AMDGPURegisterInfo.h | 4 - lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 25 +- lib/Target/AMDGPU/AMDGPUSubtarget.h | 26 +- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 77 +- lib/Target/AMDGPU/AMDGPUTargetMachine.h | 4 +- lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp | 87 + lib/Target/AMDGPU/AMDGPUTargetObjectFile.h | 51 + .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 102 +- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 5 + lib/Target/AMDGPU/AMDILCFGStructurizer.cpp | 43 +- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 266 +- lib/Target/AMDGPU/CIInstructions.td | 336 +- lib/Target/AMDGPU/CMakeLists.txt | 7 +- lib/Target/AMDGPU/CaymanInstructions.td | 4 + lib/Target/AMDGPU/EvergreenInstructions.td | 11 + .../AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp | 16 +- .../AMDGPU/InstPrinter/AMDGPUInstPrinter.h | 2 - .../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp | 27 +- .../AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp | 26 + .../AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h | 40 + .../AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h | 3 - .../AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp | 14 +- .../AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h | 3 +- .../MCTargetDesc/AMDGPUMCTargetDesc.cpp | 11 + .../MCTargetDesc/AMDGPUTargetStreamer.cpp | 51 +- .../MCTargetDesc/AMDGPUTargetStreamer.h | 21 + lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt | 1 + lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt | 2 +- .../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp | 6 +- .../AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp | 15 +- lib/Target/AMDGPU/Processors.td | 4 + .../AMDGPU/R600ControlFlowFinalizer.cpp | 4 +- lib/Target/AMDGPU/R600ISelLowering.cpp | 21 +- lib/Target/AMDGPU/R600InstrInfo.cpp | 6 +- lib/Target/AMDGPU/R600InstrInfo.h | 6 +- lib/Target/AMDGPU/R600Instructions.td | 2 +- .../AMDGPU/R600OptimizeVectorRegisters.cpp | 2 +- lib/Target/AMDGPU/R600Packetizer.cpp | 18 +- lib/Target/AMDGPU/R600RegisterInfo.h | 2 +- lib/Target/AMDGPU/SIAnnotateControlFlow.cpp | 5 +- lib/Target/AMDGPU/SIDefines.h | 3 +- .../AMDGPU/SIFixControlFlowLiveIntervals.cpp | 6 - lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 247 +- lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp | 129 +- lib/Target/AMDGPU/SIFoldOperands.cpp | 204 +- lib/Target/AMDGPU/SIFrameLowering.cpp | 243 + lib/Target/AMDGPU/SIFrameLowering.h | 34 + lib/Target/AMDGPU/SIISelLowering.cpp | 666 +- lib/Target/AMDGPU/SIISelLowering.h | 12 +- lib/Target/AMDGPU/SIInsertWaits.cpp | 84 +- lib/Target/AMDGPU/SIInstrFormats.td | 44 +- lib/Target/AMDGPU/SIInstrInfo.cpp | 1330 +- lib/Target/AMDGPU/SIInstrInfo.h | 154 +- lib/Target/AMDGPU/SIInstrInfo.td | 839 +- lib/Target/AMDGPU/SIInstructions.td | 487 +- lib/Target/AMDGPU/SILowerControlFlow.cpp | 36 +- lib/Target/AMDGPU/SILowerI1Copies.cpp | 1 + lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 109 +- lib/Target/AMDGPU/SIMachineFunctionInfo.h | 232 +- lib/Target/AMDGPU/SIPrepareScratchRegs.cpp | 193 - lib/Target/AMDGPU/SIRegisterInfo.cpp | 271 +- lib/Target/AMDGPU/SIRegisterInfo.h | 64 +- lib/Target/AMDGPU/SIRegisterInfo.td | 135 +- lib/Target/AMDGPU/SISchedule.td | 18 +- lib/Target/AMDGPU/SIShrinkInstructions.cpp | 61 +- lib/Target/AMDGPU/SITypeRewriter.cpp | 10 +- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 97 + lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 26 + lib/Target/AMDGPU/Utils/LLVMBuild.txt | 2 +- lib/Target/AMDGPU/VIInstructions.td | 69 +- lib/Target/ARM/ARM.h | 1 - lib/Target/ARM/ARM.td | 694 +- lib/Target/ARM/ARMAsmPrinter.cpp | 119 +- lib/Target/ARM/ARMAsmPrinter.h | 9 +- lib/Target/ARM/ARMBaseInstrInfo.cpp | 172 +- lib/Target/ARM/ARMBaseInstrInfo.h | 23 +- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 17 +- lib/Target/ARM/ARMBaseRegisterInfo.h | 8 +- lib/Target/ARM/ARMCallingConv.h | 22 +- lib/Target/ARM/ARMCallingConv.td | 2 + lib/Target/ARM/ARMConstantIslandPass.cpp | 44 +- lib/Target/ARM/ARMConstantPoolValue.cpp | 3 +- lib/Target/ARM/ARMConstantPoolValue.h | 5 +- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 64 +- lib/Target/ARM/ARMFastISel.cpp | 108 +- lib/Target/ARM/ARMFrameLowering.cpp | 202 +- lib/Target/ARM/ARMFrameLowering.h | 8 +- lib/Target/ARM/ARMISelDAGToDAG.cpp | 275 +- lib/Target/ARM/ARMISelLowering.cpp | 2091 ++- lib/Target/ARM/ARMISelLowering.h | 49 +- lib/Target/ARM/ARMInstrInfo.cpp | 77 +- lib/Target/ARM/ARMInstrInfo.td | 73 +- lib/Target/ARM/ARMInstrNEON.td | 440 +- lib/Target/ARM/ARMInstrThumb.td | 185 +- lib/Target/ARM/ARMInstrThumb2.td | 95 +- lib/Target/ARM/ARMInstrVFP.td | 55 +- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 514 +- lib/Target/ARM/ARMMachineFunctionInfo.cpp | 5 +- lib/Target/ARM/ARMMachineFunctionInfo.h | 14 +- lib/Target/ARM/ARMRegisterInfo.td | 37 +- lib/Target/ARM/ARMScheduleSwift.td | 1046 +- lib/Target/ARM/ARMSelectionDAGInfo.cpp | 56 +- lib/Target/ARM/ARMSubtarget.cpp | 169 +- lib/Target/ARM/ARMSubtarget.h | 72 +- lib/Target/ARM/ARMTargetMachine.cpp | 50 +- lib/Target/ARM/ARMTargetMachine.h | 3 +- lib/Target/ARM/ARMTargetTransformInfo.cpp | 170 +- lib/Target/ARM/ARMTargetTransformInfo.h | 33 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 204 +- .../ARM/Disassembler/ARMDisassembler.cpp | 113 +- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 3 +- lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 3 - lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 389 +- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h | 7 + .../ARM/MCTargetDesc/ARMAsmBackendDarwin.h | 8 +- .../ARM/MCTargetDesc/ARMELFObjectWriter.cpp | 4 +- .../ARM/MCTargetDesc/ARMELFStreamer.cpp | 54 +- lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp | 4 +- lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h | 47 +- lib/Target/ARM/MCTargetDesc/ARMMCExpr.h | 4 +- .../ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 99 +- lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h | 3 +- .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp | 32 +- .../ARM/MCTargetDesc/ARMTargetStreamer.cpp | 4 +- .../ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp | 12 +- lib/Target/ARM/README.txt | 1 - lib/Target/ARM/Thumb1FrameLowering.cpp | 293 +- lib/Target/ARM/Thumb1FrameLowering.h | 36 + lib/Target/ARM/Thumb1InstrInfo.cpp | 16 +- lib/Target/ARM/Thumb2ITBlockPass.cpp | 4 +- lib/Target/ARM/Thumb2InstrInfo.cpp | 16 +- lib/Target/ARM/Thumb2SizeReduction.cpp | 47 +- lib/Target/AVR/AVR.td | 563 + lib/Target/AVR/AVRCallingConv.td | 65 + lib/Target/AVR/AVRConfig.h | 15 + lib/Target/AVR/AVRMachineFunctionInfo.h | 73 + lib/Target/AVR/AVRRegisterInfo.td | 216 + lib/Target/AVR/AVRTargetMachine.cpp | 4 + lib/Target/AVR/CMakeLists.txt | 14 + lib/Target/AVR/LLVMBuild.txt | 33 + lib/Target/AVR/Makefile | 19 + lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp | 25 + lib/Target/AVR/TargetInfo/CMakeLists.txt | 7 + lib/Target/AVR/TargetInfo/LLVMBuild.txt | 23 + lib/Target/AVR/TargetInfo/Makefile | 16 + lib/Target/BPF/BPF.td | 7 + lib/Target/BPF/BPFISelLowering.cpp | 3 +- lib/Target/BPF/InstPrinter/BPFInstPrinter.h | 2 - lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp | 25 +- .../BPF/MCTargetDesc/BPFELFObjectWriter.cpp | 4 + lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h | 2 + lib/Target/CppBackend/CPPBackend.cpp | 91 +- lib/Target/Hexagon/AsmParser/CMakeLists.txt | 7 + .../Hexagon/AsmParser/HexagonAsmParser.cpp | 2152 +++ lib/Target/Hexagon/AsmParser/LLVMBuild.txt | 23 + lib/Target/Hexagon/AsmParser/Makefile | 15 + lib/Target/Hexagon/BitTracker.cpp | 12 +- lib/Target/Hexagon/BitTracker.h | 16 +- lib/Target/Hexagon/CMakeLists.txt | 10 +- .../Disassembler/HexagonDisassembler.cpp | 1010 +- lib/Target/Hexagon/Disassembler/LLVMBuild.txt | 2 +- lib/Target/Hexagon/Hexagon.h | 7 - lib/Target/Hexagon/Hexagon.td | 82 +- lib/Target/Hexagon/HexagonAsmPrinter.cpp | 435 +- lib/Target/Hexagon/HexagonAsmPrinter.h | 4 + lib/Target/Hexagon/HexagonBitSimplify.cpp | 2778 +++ lib/Target/Hexagon/HexagonBitTracker.cpp | 33 +- lib/Target/Hexagon/HexagonCFGOptimizer.cpp | 27 +- lib/Target/Hexagon/HexagonCommonGEP.cpp | 43 +- lib/Target/Hexagon/HexagonEarlyIfConv.cpp | 1063 ++ .../Hexagon/HexagonExpandPredSpillCode.cpp | 2 +- lib/Target/Hexagon/HexagonFrameLowering.cpp | 435 +- lib/Target/Hexagon/HexagonFrameLowering.h | 11 +- lib/Target/Hexagon/HexagonGenExtract.cpp | 2 +- lib/Target/Hexagon/HexagonGenInsert.cpp | 15 +- lib/Target/Hexagon/HexagonGenMux.cpp | 319 + lib/Target/Hexagon/HexagonGenPredicate.cpp | 2 +- lib/Target/Hexagon/HexagonHardwareLoops.cpp | 16 +- lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 160 +- lib/Target/Hexagon/HexagonISelLowering.cpp | 774 +- lib/Target/Hexagon/HexagonISelLowering.h | 48 +- lib/Target/Hexagon/HexagonInstrAlias.td | 462 + lib/Target/Hexagon/HexagonInstrEnc.td | 1019 + lib/Target/Hexagon/HexagonInstrFormats.td | 46 +- lib/Target/Hexagon/HexagonInstrFormatsV4.td | 2 - lib/Target/Hexagon/HexagonInstrFormatsV60.td | 238 + lib/Target/Hexagon/HexagonInstrInfo.cpp | 4472 +++-- lib/Target/Hexagon/HexagonInstrInfo.h | 400 +- lib/Target/Hexagon/HexagonInstrInfo.td | 65 +- lib/Target/Hexagon/HexagonInstrInfoV4.td | 130 +- lib/Target/Hexagon/HexagonInstrInfoV5.td | 10 +- lib/Target/Hexagon/HexagonInstrInfoV60.td | 2241 +++ lib/Target/Hexagon/HexagonInstrInfoVector.td | 43 + lib/Target/Hexagon/HexagonIntrinsics.td | 24 +- lib/Target/Hexagon/HexagonIntrinsicsV60.td | 836 + lib/Target/Hexagon/HexagonMCInstLower.cpp | 60 +- .../Hexagon/HexagonMachineScheduler.cpp | 6 +- lib/Target/Hexagon/HexagonNewValueJump.cpp | 53 +- lib/Target/Hexagon/HexagonOperands.td | 378 +- .../Hexagon/HexagonOptimizeSZextends.cpp | 150 + lib/Target/Hexagon/HexagonPeephole.cpp | 4 +- lib/Target/Hexagon/HexagonRegisterInfo.cpp | 94 +- lib/Target/Hexagon/HexagonRegisterInfo.h | 2 - lib/Target/Hexagon/HexagonRegisterInfo.td | 81 +- lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp | 91 - lib/Target/Hexagon/HexagonSchedule.td | 8 +- lib/Target/Hexagon/HexagonScheduleV4.td | 5 +- lib/Target/Hexagon/HexagonScheduleV55.td | 170 + lib/Target/Hexagon/HexagonScheduleV60.td | 310 + .../Hexagon/HexagonSelectionDAGInfo.cpp | 48 +- .../Hexagon/HexagonSplitConst32AndConst64.cpp | 2 +- lib/Target/Hexagon/HexagonSplitDouble.cpp | 1209 ++ lib/Target/Hexagon/HexagonStoreWidening.cpp | 616 + lib/Target/Hexagon/HexagonSubtarget.cpp | 84 +- lib/Target/Hexagon/HexagonSubtarget.h | 20 +- lib/Target/Hexagon/HexagonTargetMachine.cpp | 97 +- lib/Target/Hexagon/HexagonTargetMachine.h | 15 +- .../Hexagon/HexagonTargetObjectFile.cpp | 7 +- .../Hexagon/HexagonTargetTransformInfo.cpp | 38 + .../Hexagon/HexagonTargetTransformInfo.h | 70 + lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 2021 +- lib/Target/Hexagon/HexagonVLIWPacketizer.h | 114 + lib/Target/Hexagon/LLVMBuild.txt | 3 +- .../Hexagon/MCTargetDesc/CMakeLists.txt | 2 + .../MCTargetDesc/HexagonAsmBackend.cpp | 52 +- .../Hexagon/MCTargetDesc/HexagonBaseInfo.h | 65 +- .../MCTargetDesc/HexagonInstPrinter.cpp | 282 +- .../Hexagon/MCTargetDesc/HexagonInstPrinter.h | 119 +- .../Hexagon/MCTargetDesc/HexagonMCAsmInfo.h | 13 +- .../Hexagon/MCTargetDesc/HexagonMCChecker.cpp | 581 + .../Hexagon/MCTargetDesc/HexagonMCChecker.h | 218 + .../MCTargetDesc/HexagonMCCodeEmitter.cpp | 24 +- .../MCTargetDesc/HexagonMCCompound.cpp | 29 +- .../MCTargetDesc/HexagonMCDuplexInfo.cpp | 126 +- .../MCTargetDesc/HexagonMCELFStreamer.cpp | 12 +- .../Hexagon/MCTargetDesc/HexagonMCExpr.cpp | 49 + .../Hexagon/MCTargetDesc/HexagonMCExpr.h | 35 + .../MCTargetDesc/HexagonMCInstrInfo.cpp | 228 +- .../Hexagon/MCTargetDesc/HexagonMCInstrInfo.h | 58 +- .../MCTargetDesc/HexagonMCTargetDesc.cpp | 52 +- .../MCTargetDesc/HexagonMCTargetDesc.h | 17 +- .../Hexagon/MCTargetDesc/HexagonShuffler.cpp | 90 +- .../Hexagon/MCTargetDesc/HexagonShuffler.h | 52 +- lib/Target/Hexagon/Makefile | 3 +- lib/Target/LLVMBuild.txt | 1 + .../MSP430/InstPrinter/MSP430InstPrinter.h | 2 - .../MSP430/MCTargetDesc/MSP430MCAsmInfo.h | 13 +- lib/Target/MSP430/MSP430BranchSelector.cpp | 2 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 17 +- lib/Target/MSP430/MSP430InstrInfo.cpp | 18 +- lib/Target/MSP430/MSP430MCInstLower.cpp | 8 +- .../MSP430/MSP430MachineFunctionInfo.cpp | 2 +- lib/Target/MSP430/MSP430MachineFunctionInfo.h | 2 +- lib/Target/MSP430/README.txt | 2 +- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 2242 ++- .../Mips/Disassembler/MipsDisassembler.cpp | 318 +- .../Mips/InstPrinter/MipsInstPrinter.cpp | 2 + lib/Target/Mips/InstPrinter/MipsInstPrinter.h | 2 - lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp | 13 +- lib/Target/Mips/MCTargetDesc/MipsABIInfo.h | 7 +- .../Mips/MCTargetDesc/MipsAsmBackend.cpp | 62 +- lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h | 1 + .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp | 17 +- .../Mips/MCTargetDesc/MipsELFStreamer.cpp | 2 +- .../Mips/MCTargetDesc/MipsELFStreamer.h | 3 +- lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h | 5 +- lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h | 13 +- .../Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 73 +- .../Mips/MCTargetDesc/MipsMCCodeEmitter.h | 25 +- lib/Target/Mips/MCTargetDesc/MipsMCExpr.h | 4 +- .../Mips/MCTargetDesc/MipsTargetStreamer.cpp | 82 +- lib/Target/Mips/MicroMips32r6InstrFormats.td | 579 +- lib/Target/Mips/MicroMips32r6InstrInfo.td | 887 +- lib/Target/Mips/MicroMips64r6InstrFormats.td | 86 + lib/Target/Mips/MicroMips64r6InstrInfo.td | 119 + lib/Target/Mips/MicroMipsDSPInstrFormats.td | 244 + lib/Target/Mips/MicroMipsDSPInstrInfo.td | 528 + lib/Target/Mips/MicroMipsInstrFPU.td | 28 +- lib/Target/Mips/MicroMipsInstrFormats.td | 81 +- lib/Target/Mips/MicroMipsInstrInfo.td | 174 +- lib/Target/Mips/Mips.td | 17 +- lib/Target/Mips/Mips16FrameLowering.cpp | 8 +- lib/Target/Mips/Mips16HardFloat.cpp | 200 +- lib/Target/Mips/Mips16ISelDAGToDAG.cpp | 2 +- lib/Target/Mips/Mips16ISelLowering.cpp | 9 +- lib/Target/Mips/Mips16InstrInfo.cpp | 6 +- lib/Target/Mips/Mips16InstrInfo.td | 120 +- lib/Target/Mips/Mips32r6InstrInfo.td | 298 +- lib/Target/Mips/Mips64InstrInfo.td | 91 +- lib/Target/Mips/Mips64r6InstrInfo.td | 12 +- lib/Target/Mips/MipsAsmPrinter.cpp | 13 +- lib/Target/Mips/MipsCCState.cpp | 16 +- lib/Target/Mips/MipsCallingConv.td | 25 + lib/Target/Mips/MipsConstantIslandPass.cpp | 31 +- lib/Target/Mips/MipsDSPInstrFormats.td | 38 +- lib/Target/Mips/MipsDSPInstrInfo.td | 378 +- lib/Target/Mips/MipsDelaySlotFiller.cpp | 34 +- lib/Target/Mips/MipsEVAInstrFormats.td | 84 + lib/Target/Mips/MipsEVAInstrInfo.td | 192 + lib/Target/Mips/MipsFastISel.cpp | 59 +- lib/Target/Mips/MipsISelLowering.cpp | 125 +- lib/Target/Mips/MipsISelLowering.h | 37 +- lib/Target/Mips/MipsInstrFPU.td | 42 +- lib/Target/Mips/MipsInstrFormats.td | 8 +- lib/Target/Mips/MipsInstrInfo.cpp | 4 +- lib/Target/Mips/MipsInstrInfo.td | 508 +- lib/Target/Mips/MipsLongBranch.cpp | 9 +- lib/Target/Mips/MipsMSAInstrFormats.td | 24 +- lib/Target/Mips/MipsMSAInstrInfo.td | 208 +- lib/Target/Mips/MipsMachineFunction.cpp | 69 +- lib/Target/Mips/MipsMachineFunction.h | 51 +- lib/Target/Mips/MipsRegisterInfo.cpp | 59 +- lib/Target/Mips/MipsRegisterInfo.h | 4 +- lib/Target/Mips/MipsSEFrameLowering.cpp | 241 +- lib/Target/Mips/MipsSEFrameLowering.h | 10 +- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 2 +- lib/Target/Mips/MipsSEISelLowering.cpp | 22 +- lib/Target/Mips/MipsSEInstrInfo.cpp | 85 +- lib/Target/Mips/MipsSEInstrInfo.h | 2 + lib/Target/Mips/MipsSERegisterInfo.cpp | 6 +- lib/Target/Mips/MipsSchedule.td | 68 +- lib/Target/Mips/MipsScheduleP5600.td | 392 + lib/Target/Mips/MipsSubtarget.cpp | 5 +- lib/Target/Mips/MipsSubtarget.h | 21 +- lib/Target/Mips/MipsTargetMachine.cpp | 2 +- lib/Target/Mips/MipsTargetObjectFile.cpp | 24 +- lib/Target/Mips/MipsTargetObjectFile.h | 4 +- lib/Target/Mips/MipsTargetStreamer.h | 19 +- .../NVPTX/InstPrinter/NVPTXInstPrinter.h | 2 - .../NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h | 1 + lib/Target/NVPTX/NVPTX.h | 18 - lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 132 +- lib/Target/NVPTX/NVPTXAsmPrinter.h | 15 +- .../NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp | 10 +- lib/Target/NVPTX/NVPTXGenericToNVVM.cpp | 18 +- lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 59 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 181 +- lib/Target/NVPTX/NVPTXISelLowering.h | 17 +- lib/Target/NVPTX/NVPTXInstrInfo.cpp | 64 +- lib/Target/NVPTX/NVPTXInstrInfo.h | 1 - lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp | 343 +- lib/Target/NVPTX/NVPTXLowerAlloca.cpp | 2 +- lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp | 104 +- lib/Target/NVPTX/NVPTXMCExpr.h | 4 +- lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp | 2 +- lib/Target/NVPTX/NVPTXSection.h | 9 +- lib/Target/NVPTX/NVPTXTargetMachine.cpp | 60 +- lib/Target/NVPTX/NVPTXTargetObjectFile.h | 5 +- lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 4 +- lib/Target/NVPTX/NVPTXTargetTransformInfo.h | 2 +- lib/Target/NVPTX/NVPTXUtilities.cpp | 105 +- lib/Target/NVPTX/NVPTXUtilities.h | 21 - lib/Target/NVPTX/NVPTXVector.td | 58 +- lib/Target/NVPTX/NVVMReflect.cpp | 4 +- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 24 +- lib/Target/PowerPC/CMakeLists.txt | 2 + .../PowerPC/Disassembler/PPCDisassembler.cpp | 2 - .../PowerPC/InstPrinter/PPCInstPrinter.h | 2 - .../MCTargetDesc/PPCELFObjectWriter.cpp | 16 +- .../PowerPC/MCTargetDesc/PPCMCAsmInfo.h | 21 +- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h | 4 +- .../MCTargetDesc/PPCMachObjectWriter.cpp | 4 +- .../PowerPC/MCTargetDesc/PPCPredicates.h | 8 + lib/Target/PowerPC/PPC.h | 3 + lib/Target/PowerPC/PPC.td | 14 +- lib/Target/PowerPC/PPCAsmPrinter.cpp | 265 +- lib/Target/PowerPC/PPCBoolRetToInt.cpp | 253 + lib/Target/PowerPC/PPCBranchSelector.cpp | 2 +- lib/Target/PowerPC/PPCCTRLoops.cpp | 26 +- lib/Target/PowerPC/PPCEarlyReturn.cpp | 35 +- lib/Target/PowerPC/PPCFastISel.cpp | 87 +- lib/Target/PowerPC/PPCFrameLowering.cpp | 238 +- lib/Target/PowerPC/PPCFrameLowering.h | 31 + lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 140 +- lib/Target/PowerPC/PPCISelLowering.cpp | 504 +- lib/Target/PowerPC/PPCISelLowering.h | 22 +- lib/Target/PowerPC/PPCInstr64Bit.td | 2 + lib/Target/PowerPC/PPCInstrInfo.cpp | 131 +- lib/Target/PowerPC/PPCInstrInfo.h | 53 +- lib/Target/PowerPC/PPCInstrInfo.td | 11 +- lib/Target/PowerPC/PPCInstrQPX.td | 20 +- lib/Target/PowerPC/PPCInstrVSX.td | 612 +- lib/Target/PowerPC/PPCLoopDataPrefetch.cpp | 9 +- lib/Target/PowerPC/PPCLoopPreIncPrep.cpp | 131 +- lib/Target/PowerPC/PPCMCInstLower.cpp | 6 +- lib/Target/PowerPC/PPCMIPeephole.cpp | 230 + lib/Target/PowerPC/PPCMachineFunctionInfo.cpp | 4 +- lib/Target/PowerPC/PPCRegisterInfo.cpp | 114 +- lib/Target/PowerPC/PPCRegisterInfo.h | 15 +- lib/Target/PowerPC/PPCSubtarget.cpp | 31 + lib/Target/PowerPC/PPCSubtarget.h | 11 + lib/Target/PowerPC/PPCTargetMachine.cpp | 52 +- lib/Target/PowerPC/PPCTargetObjectFile.cpp | 4 +- lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 137 +- lib/Target/PowerPC/PPCTargetTransformInfo.h | 32 +- lib/Target/PowerPC/PPCVSXCopy.cpp | 14 +- lib/Target/PowerPC/PPCVSXFMAMutate.cpp | 53 +- lib/Target/PowerPC/PPCVSXSwapRemoval.cpp | 89 +- lib/Target/Sparc/AsmParser/SparcAsmParser.cpp | 248 +- lib/Target/Sparc/DelaySlotFiller.cpp | 2 + .../Sparc/Disassembler/SparcDisassembler.cpp | 52 + .../Sparc/InstPrinter/SparcInstPrinter.h | 3 - .../Sparc/MCTargetDesc/SparcMCAsmInfo.h | 1 + lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h | 4 +- lib/Target/Sparc/SparcAsmPrinter.cpp | 8 +- lib/Target/Sparc/SparcCallingConv.td | 9 +- lib/Target/Sparc/SparcFrameLowering.cpp | 154 +- lib/Target/Sparc/SparcFrameLowering.h | 8 + lib/Target/Sparc/SparcISelDAGToDAG.cpp | 183 + lib/Target/Sparc/SparcISelLowering.cpp | 295 +- lib/Target/Sparc/SparcISelLowering.h | 18 +- lib/Target/Sparc/SparcInstrAliases.td | 9 + lib/Target/Sparc/SparcInstrInfo.cpp | 39 +- lib/Target/Sparc/SparcInstrInfo.td | 146 +- lib/Target/Sparc/SparcRegisterInfo.cpp | 64 +- lib/Target/Sparc/SparcRegisterInfo.h | 4 +- lib/Target/Sparc/SparcRegisterInfo.td | 54 + lib/Target/Sparc/SparcSubtarget.cpp | 6 +- lib/Target/Sparc/SparcSubtarget.h | 3 +- .../SystemZ/AsmParser/SystemZAsmParser.cpp | 19 +- .../InstPrinter/SystemZInstPrinter.cpp | 8 +- .../SystemZ/InstPrinter/SystemZInstPrinter.h | 1 - .../MCTargetDesc/SystemZMCTargetDesc.cpp | 2 +- lib/Target/SystemZ/README.txt | 6 - lib/Target/SystemZ/SystemZAsmPrinter.cpp | 2 +- .../SystemZ/SystemZConstantPoolValue.cpp | 15 - lib/Target/SystemZ/SystemZConstantPoolValue.h | 1 - lib/Target/SystemZ/SystemZElimCompare.cpp | 86 +- lib/Target/SystemZ/SystemZFrameLowering.cpp | 24 +- lib/Target/SystemZ/SystemZFrameLowering.h | 3 +- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 24 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 159 +- lib/Target/SystemZ/SystemZISelLowering.h | 20 +- lib/Target/SystemZ/SystemZInstrBuilder.h | 8 +- lib/Target/SystemZ/SystemZInstrFP.td | 55 +- lib/Target/SystemZ/SystemZInstrFormats.td | 1 + lib/Target/SystemZ/SystemZInstrInfo.cpp | 77 +- lib/Target/SystemZ/SystemZInstrInfo.h | 4 +- lib/Target/SystemZ/SystemZInstrInfo.td | 34 +- .../SystemZ/SystemZMachineFunctionInfo.cpp | 2 +- .../SystemZ/SystemZMachineFunctionInfo.h | 2 +- lib/Target/SystemZ/SystemZRegisterInfo.cpp | 4 +- lib/Target/SystemZ/SystemZRegisterInfo.td | 3 +- lib/Target/SystemZ/SystemZShortenInst.cpp | 128 +- lib/Target/SystemZ/SystemZTargetMachine.cpp | 24 +- lib/Target/SystemZ/SystemZTargetMachine.h | 3 + .../SystemZ/SystemZTargetTransformInfo.cpp | 10 +- .../SystemZ/SystemZTargetTransformInfo.h | 11 +- lib/Target/TargetLoweringObjectFile.cpp | 55 +- lib/Target/TargetMachine.cpp | 20 +- lib/Target/TargetMachineC.cpp | 46 +- lib/Target/TargetRecip.cpp | 12 +- lib/Target/WebAssembly/CMakeLists.txt | 21 +- .../InstPrinter/WebAssemblyInstPrinter.cpp | 94 +- .../InstPrinter/WebAssemblyInstPrinter.h | 18 +- lib/Target/WebAssembly/LLVMBuild.txt | 2 +- .../WebAssembly/MCTargetDesc/CMakeLists.txt | 3 + .../MCTargetDesc/WebAssemblyAsmBackend.cpp | 103 + .../WebAssemblyELFObjectWriter.cpp | 54 + .../MCTargetDesc/WebAssemblyMCAsmInfo.cpp | 5 +- .../MCTargetDesc/WebAssemblyMCAsmInfo.h | 4 +- .../MCTargetDesc/WebAssemblyMCCodeEmitter.cpp | 100 + .../MCTargetDesc/WebAssemblyMCTargetDesc.cpp | 41 +- .../MCTargetDesc/WebAssemblyMCTargetDesc.h | 16 +- lib/Target/WebAssembly/Makefile | 10 +- lib/Target/WebAssembly/README.txt | 68 +- lib/Target/WebAssembly/Relooper.cpp | 984 + lib/Target/WebAssembly/Relooper.h | 186 + lib/Target/WebAssembly/WebAssembly.h | 14 + lib/Target/WebAssembly/WebAssembly.td | 12 +- .../WebAssembly/WebAssemblyArgumentMove.cpp | 110 + .../WebAssembly/WebAssemblyAsmPrinter.cpp | 285 + .../WebAssembly/WebAssemblyCFGStackify.cpp | 468 + .../WebAssembly/WebAssemblyFastISel.cpp | 81 + .../WebAssembly/WebAssemblyFrameLowering.cpp | 117 +- .../WebAssembly/WebAssemblyFrameLowering.h | 3 - lib/Target/WebAssembly/WebAssemblyISD.def | 25 + .../WebAssembly/WebAssemblyISelDAGToDAG.cpp | 57 +- .../WebAssembly/WebAssemblyISelLowering.cpp | 602 +- .../WebAssembly/WebAssemblyISelLowering.h | 50 +- .../WebAssembly/WebAssemblyInstrCall.td | 67 +- .../WebAssembly/WebAssemblyInstrControl.td | 82 + .../WebAssembly/WebAssemblyInstrConv.td | 125 +- .../WebAssembly/WebAssemblyInstrFloat.td | 111 +- .../WebAssembly/WebAssemblyInstrFormats.td | 78 +- .../WebAssembly/WebAssemblyInstrInfo.cpp | 133 +- lib/Target/WebAssembly/WebAssemblyInstrInfo.h | 20 +- .../WebAssembly/WebAssemblyInstrInfo.td | 115 +- .../WebAssembly/WebAssemblyInstrInteger.td | 101 +- .../WebAssembly/WebAssemblyInstrMemory.td | 529 +- .../WebAssembly/WebAssemblyLowerBrUnless.cpp | 133 + .../WebAssembly/WebAssemblyMCInstLower.cpp | 106 + .../WebAssembly/WebAssemblyMCInstLower.h | 45 + .../WebAssemblyMachineFunctionInfo.cpp | 6 + .../WebAssemblyMachineFunctionInfo.h | 68 +- .../WebAssemblyOptimizeReturned.cpp | 76 + lib/Target/WebAssembly/WebAssemblyPEI.cpp | 1066 ++ .../WebAssembly/WebAssemblyPeephole.cpp | 86 + .../WebAssembly/WebAssemblyRegColoring.cpp | 175 + .../WebAssembly/WebAssemblyRegNumbering.cpp | 109 + .../WebAssembly/WebAssemblyRegStackify.cpp | 265 + .../WebAssembly/WebAssemblyRegisterInfo.cpp | 60 +- .../WebAssembly/WebAssemblyRegisterInfo.h | 6 +- .../WebAssembly/WebAssemblyRegisterInfo.td | 32 +- .../WebAssembly/WebAssemblyStoreResults.cpp | 124 + .../WebAssembly/WebAssemblySubtarget.cpp | 1 + lib/Target/WebAssembly/WebAssemblySubtarget.h | 8 +- .../WebAssembly/WebAssemblyTargetMachine.cpp | 96 +- .../WebAssemblyTargetObjectFile.cpp | 24 + .../WebAssembly/WebAssemblyTargetObjectFile.h | 43 +- .../WebAssemblyTargetTransformInfo.cpp | 5 +- .../WebAssemblyTargetTransformInfo.h | 4 +- .../WebAssembly/known_gcc_test_failures.txt | 311 + lib/Target/X86/AsmParser/CMakeLists.txt | 3 - lib/Target/X86/AsmParser/LLVMBuild.txt | 2 +- lib/Target/X86/AsmParser/Makefile | 2 +- .../X86/AsmParser/X86AsmInstrumentation.cpp | 274 +- .../X86/AsmParser/X86AsmInstrumentation.h | 10 +- lib/Target/X86/AsmParser/X86AsmParser.cpp | 319 +- lib/Target/X86/AsmParser/X86AsmParserCommon.h | 19 +- lib/Target/X86/CMakeLists.txt | 11 +- .../X86/Disassembler/X86Disassembler.cpp | 7 + .../Disassembler/X86DisassemblerDecoder.cpp | 47 +- .../X86/Disassembler/X86DisassemblerDecoder.h | 2 - .../X86/InstPrinter/X86ATTInstPrinter.cpp | 1 + .../X86/InstPrinter/X86ATTInstPrinter.h | 3 - .../X86/InstPrinter/X86InstComments.cpp | 735 +- .../X86/InstPrinter/X86IntelInstPrinter.h | 2 - lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 83 +- lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 48 +- .../X86/MCTargetDesc/X86ELFObjectWriter.cpp | 11 +- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h | 57 +- .../X86/MCTargetDesc/X86MCCodeEmitter.cpp | 11 +- .../X86/MCTargetDesc/X86MCTargetDesc.cpp | 184 +- lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h | 13 +- .../X86/MCTargetDesc/X86MachObjectWriter.cpp | 99 +- .../X86/MCTargetDesc/X86WinCOFFStreamer.cpp | 4 +- lib/Target/X86/Utils/X86ShuffleDecode.cpp | 197 +- lib/Target/X86/Utils/X86ShuffleDecode.h | 27 +- lib/Target/X86/X86.h | 59 +- lib/Target/X86/X86.td | 657 +- lib/Target/X86/X86AsmPrinter.cpp | 25 +- lib/Target/X86/X86AsmPrinter.h | 2 - lib/Target/X86/X86CallFrameOptimization.cpp | 109 +- lib/Target/X86/X86CallingConv.h | 59 + lib/Target/X86/X86CallingConv.td | 79 +- .../X86/X86CompilationCallback_Win64.asm | 68 - lib/Target/X86/X86ExpandPseudo.cpp | 21 +- lib/Target/X86/X86FastISel.cpp | 168 +- lib/Target/X86/X86FixupLEAs.cpp | 89 +- lib/Target/X86/X86FloatingPoint.cpp | 76 +- lib/Target/X86/X86FrameLowering.cpp | 1162 +- lib/Target/X86/X86FrameLowering.h | 65 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 503 +- lib/Target/X86/X86ISelLowering.cpp | 6418 +++++-- lib/Target/X86/X86ISelLowering.h | 124 +- lib/Target/X86/X86InstrAVX512.td | 3690 ++-- lib/Target/X86/X86InstrArithmetic.td | 69 +- lib/Target/X86/X86InstrBuilder.h | 7 +- lib/Target/X86/X86InstrCMovSetCC.td | 2 +- lib/Target/X86/X86InstrCompiler.td | 216 +- lib/Target/X86/X86InstrControl.td | 13 + lib/Target/X86/X86InstrFMA.td | 252 +- lib/Target/X86/X86InstrFPStack.td | 136 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 336 +- lib/Target/X86/X86InstrInfo.cpp | 1516 +- lib/Target/X86/X86InstrInfo.h | 142 +- lib/Target/X86/X86InstrInfo.td | 257 +- lib/Target/X86/X86InstrMMX.td | 24 +- lib/Target/X86/X86InstrSSE.td | 771 +- lib/Target/X86/X86InstrShiftRotate.td | 140 +- lib/Target/X86/X86InstrSystem.td | 101 +- lib/Target/X86/X86InstrXOP.td | 119 +- lib/Target/X86/X86IntrinsicsInfo.h | 980 +- lib/Target/X86/X86MCInstLower.cpp | 216 +- lib/Target/X86/X86MachineFunctionInfo.cpp | 2 +- lib/Target/X86/X86MachineFunctionInfo.h | 8 +- lib/Target/X86/X86OptimizeLEAs.cpp | 326 + lib/Target/X86/X86PadShortFunction.cpp | 5 +- lib/Target/X86/X86RegisterInfo.cpp | 318 +- lib/Target/X86/X86RegisterInfo.h | 25 +- lib/Target/X86/X86RegisterInfo.td | 41 +- lib/Target/X86/X86SelectionDAGInfo.cpp | 43 +- lib/Target/X86/X86Subtarget.cpp | 46 +- lib/Target/X86/X86Subtarget.h | 57 +- lib/Target/X86/X86TargetMachine.cpp | 17 +- lib/Target/X86/X86TargetObjectFile.cpp | 8 +- lib/Target/X86/X86TargetObjectFile.h | 2 +- lib/Target/X86/X86TargetTransformInfo.cpp | 875 +- lib/Target/X86/X86TargetTransformInfo.h | 56 +- lib/Target/X86/X86WinEHState.cpp | 232 +- .../XCore/Disassembler/XCoreDisassembler.cpp | 2 +- .../XCore/InstPrinter/XCoreInstPrinter.h | 2 - lib/Target/XCore/XCoreAsmPrinter.cpp | 18 +- lib/Target/XCore/XCoreFrameLowering.cpp | 38 +- lib/Target/XCore/XCoreISelDAGToDAG.cpp | 5 +- lib/Target/XCore/XCoreISelLowering.cpp | 26 +- lib/Target/XCore/XCoreISelLowering.h | 14 + lib/Target/XCore/XCoreInstrInfo.cpp | 18 +- lib/Target/XCore/XCoreLowerThreadLocal.cpp | 9 +- lib/Target/XCore/XCoreMachineFunctionInfo.cpp | 2 +- lib/Target/XCore/XCoreMachineFunctionInfo.h | 2 +- lib/Target/XCore/XCoreTargetMachine.cpp | 2 +- lib/Target/XCore/XCoreTargetObjectFile.cpp | 14 +- lib/Target/XCore/XCoreTargetObjectFile.h | 2 +- lib/Target/XCore/XCoreTargetTransformInfo.h | 2 +- lib/Transforms/IPO/ArgumentPromotion.cpp | 128 +- lib/Transforms/IPO/CMakeLists.txt | 5 + lib/Transforms/IPO/ConstantMerge.cpp | 4 +- lib/Transforms/IPO/CrossDSOCFI.cpp | 166 + .../IPO/DeadArgumentElimination.cpp | 83 +- lib/Transforms/IPO/ElimAvailExtern.cpp | 54 +- lib/Transforms/IPO/ExtractGV.cpp | 8 +- lib/Transforms/IPO/ForceFunctionAttrs.cpp | 121 + lib/Transforms/IPO/FunctionAttrs.cpp | 1889 +- lib/Transforms/IPO/FunctionImport.cpp | 433 + lib/Transforms/IPO/GlobalDCE.cpp | 114 +- lib/Transforms/IPO/GlobalOpt.cpp | 633 +- lib/Transforms/IPO/IPO.cpp | 11 +- lib/Transforms/IPO/InferFunctionAttrs.cpp | 937 + lib/Transforms/IPO/InlineAlways.cpp | 27 +- lib/Transforms/IPO/InlineSimple.cpp | 25 +- lib/Transforms/IPO/Inliner.cpp | 150 +- lib/Transforms/IPO/Internalize.cpp | 103 +- lib/Transforms/IPO/LLVMBuild.txt | 2 +- lib/Transforms/IPO/LoopExtractor.cpp | 20 +- lib/Transforms/IPO/LowerBitSets.cpp | 593 +- lib/Transforms/IPO/MergeFunctions.cpp | 560 +- lib/Transforms/IPO/PartialInlining.cpp | 18 +- lib/Transforms/IPO/PassManagerBuilder.cpp | 125 +- lib/Transforms/IPO/PruneEH.cpp | 31 +- .../{Scalar => IPO}/SampleProfile.cpp | 736 +- lib/Transforms/IPO/StripDeadPrototypes.cpp | 64 +- lib/Transforms/IPO/StripSymbols.cpp | 12 +- .../InstCombine/InstCombineAddSub.cpp | 58 +- .../InstCombine/InstCombineAndOrXor.cpp | 443 +- .../InstCombine/InstCombineCalls.cpp | 934 +- .../InstCombine/InstCombineCasts.cpp | 353 +- .../InstCombine/InstCombineCompares.cpp | 242 +- .../InstCombine/InstCombineInternal.h | 12 +- .../InstCombineLoadStoreAlloca.cpp | 136 +- .../InstCombine/InstCombineMulDivRem.cpp | 26 +- lib/Transforms/InstCombine/InstCombinePHI.cpp | 113 +- .../InstCombine/InstCombineSelect.cpp | 78 +- .../InstCombine/InstCombineShifts.cpp | 4 +- .../InstCombineSimplifyDemanded.cpp | 90 +- .../InstCombine/InstCombineVectorOps.cpp | 121 +- .../InstCombine/InstructionCombining.cpp | 346 +- .../Instrumentation/AddressSanitizer.cpp | 253 +- .../Instrumentation/BoundsChecking.cpp | 2 +- lib/Transforms/Instrumentation/CFGMST.h | 217 + lib/Transforms/Instrumentation/CMakeLists.txt | 1 + .../Instrumentation/DataFlowSanitizer.cpp | 84 +- .../Instrumentation/GCOVProfiling.cpp | 49 +- .../Instrumentation/InstrProfiling.cpp | 287 +- .../Instrumentation/Instrumentation.cpp | 39 +- lib/Transforms/Instrumentation/LLVMBuild.txt | 2 +- .../Instrumentation/MemorySanitizer.cpp | 434 +- .../Instrumentation/PGOInstrumentation.cpp | 718 + lib/Transforms/Instrumentation/SafeStack.cpp | 497 +- .../Instrumentation/SanitizerCoverage.cpp | 117 +- .../Instrumentation/ThreadSanitizer.cpp | 40 +- lib/Transforms/ObjCARC/CMakeLists.txt | 2 - lib/Transforms/ObjCARC/DependencyAnalysis.cpp | 6 +- lib/Transforms/ObjCARC/ObjCARC.cpp | 10 +- lib/Transforms/ObjCARC/ObjCARC.h | 242 +- lib/Transforms/ObjCARC/ObjCARCAPElim.cpp | 13 +- lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h | 74 - lib/Transforms/ObjCARC/ObjCARCContract.cpp | 28 +- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 77 +- lib/Transforms/ObjCARC/ProvenanceAnalysis.h | 2 +- .../ObjCARC/ProvenanceAnalysisEvaluator.cpp | 6 +- lib/Transforms/ObjCARC/PtrState.cpp | 8 +- lib/Transforms/ObjCARC/PtrState.h | 8 +- lib/Transforms/Scalar/ADCE.cpp | 69 +- .../Scalar/AlignmentFromAssumptions.cpp | 17 +- lib/Transforms/Scalar/BDCE.cpp | 350 +- lib/Transforms/Scalar/CMakeLists.txt | 2 +- lib/Transforms/Scalar/ConstantHoisting.cpp | 10 +- .../Scalar/CorrelatedValuePropagation.cpp | 120 +- lib/Transforms/Scalar/DCE.cpp | 78 +- .../Scalar/DeadStoreElimination.cpp | 278 +- lib/Transforms/Scalar/EarlyCSE.cpp | 269 +- lib/Transforms/Scalar/FlattenCFGPass.cpp | 8 +- lib/Transforms/Scalar/Float2Int.cpp | 49 +- lib/Transforms/Scalar/GVN.cpp | 319 +- lib/Transforms/Scalar/IndVarSimplify.cpp | 712 +- .../Scalar/InductiveRangeCheckElimination.cpp | 24 +- lib/Transforms/Scalar/JumpThreading.cpp | 263 +- lib/Transforms/Scalar/LICM.cpp | 65 +- lib/Transforms/Scalar/LLVMBuild.txt | 2 +- lib/Transforms/Scalar/LoadCombine.cpp | 12 +- lib/Transforms/Scalar/LoopDeletion.cpp | 19 +- lib/Transforms/Scalar/LoopDistribute.cpp | 113 +- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 1391 +- lib/Transforms/Scalar/LoopInstSimplify.cpp | 4 +- lib/Transforms/Scalar/LoopInterchange.cpp | 79 +- lib/Transforms/Scalar/LoopLoadElimination.cpp | 566 + lib/Transforms/Scalar/LoopRerollPass.cpp | 141 +- lib/Transforms/Scalar/LoopRotation.cpp | 447 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 828 +- lib/Transforms/Scalar/LoopUnrollPass.cpp | 278 +- lib/Transforms/Scalar/LoopUnswitch.cpp | 455 +- lib/Transforms/Scalar/LowerAtomic.cpp | 6 +- .../Scalar/LowerExpectIntrinsic.cpp | 2 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 174 +- .../Scalar/MergedLoadStoreMotion.cpp | 51 +- lib/Transforms/Scalar/NaryReassociate.cpp | 199 +- .../Scalar/PartiallyInlineLibCalls.cpp | 2 +- lib/Transforms/Scalar/PlaceSafepoints.cpp | 104 +- lib/Transforms/Scalar/Reassociate.cpp | 109 +- lib/Transforms/Scalar/Reg2Mem.cpp | 9 +- .../Scalar/RewriteStatepointsForGC.cpp | 1814 +- lib/Transforms/Scalar/SCCP.cpp | 166 +- lib/Transforms/Scalar/SROA.cpp | 991 +- lib/Transforms/Scalar/Scalar.cpp | 15 +- .../Scalar/ScalarReplAggregates.cpp | 7 +- lib/Transforms/Scalar/Scalarizer.cpp | 32 +- .../Scalar/SeparateConstOffsetFromGEP.cpp | 251 +- lib/Transforms/Scalar/SimplifyCFGPass.cpp | 11 +- lib/Transforms/Scalar/Sink.cpp | 25 +- .../Scalar/SpeculativeExecution.cpp | 2 +- .../Scalar/StraightLineStrengthReduce.cpp | 9 +- lib/Transforms/Scalar/StructurizeCFG.cpp | 12 +- .../Scalar/TailRecursionElimination.cpp | 42 +- lib/Transforms/Utils/ASanStackFrameLayout.cpp | 2 +- lib/Transforms/Utils/AddDiscriminators.cpp | 147 +- lib/Transforms/Utils/BasicBlockUtils.cpp | 54 +- lib/Transforms/Utils/BreakCriticalEdges.cpp | 19 +- lib/Transforms/Utils/BuildLibCalls.cpp | 28 +- lib/Transforms/Utils/BypassSlowDivision.cpp | 6 +- lib/Transforms/Utils/CMakeLists.txt | 1 + lib/Transforms/Utils/CloneFunction.cpp | 122 +- lib/Transforms/Utils/CloneModule.cpp | 89 +- lib/Transforms/Utils/CodeExtractor.cpp | 36 +- lib/Transforms/Utils/CtorUtils.cpp | 2 +- lib/Transforms/Utils/DemoteRegToStack.cpp | 21 +- lib/Transforms/Utils/FlattenCFG.cpp | 34 +- lib/Transforms/Utils/GlobalStatus.cpp | 4 + lib/Transforms/Utils/InlineFunction.cpp | 431 +- lib/Transforms/Utils/IntegerDivision.cpp | 58 +- lib/Transforms/Utils/LCSSA.cpp | 76 +- lib/Transforms/Utils/LLVMBuild.txt | 2 +- lib/Transforms/Utils/Local.cpp | 270 +- lib/Transforms/Utils/LoopSimplify.cpp | 118 +- lib/Transforms/Utils/LoopUnroll.cpp | 99 +- lib/Transforms/Utils/LoopUnrollRuntime.cpp | 36 +- lib/Transforms/Utils/LoopUtils.cpp | 276 +- lib/Transforms/Utils/LoopVersioning.cpp | 79 +- lib/Transforms/Utils/LowerInvoke.cpp | 2 +- lib/Transforms/Utils/LowerSwitch.cpp | 93 +- lib/Transforms/Utils/Mem2Reg.cpp | 3 + lib/Transforms/Utils/MetaRenamer.cpp | 69 +- lib/Transforms/Utils/ModuleUtils.cpp | 14 +- .../Utils/PromoteMemoryToRegister.cpp | 57 +- lib/Transforms/Utils/SimplifyCFG.cpp | 706 +- lib/Transforms/Utils/SimplifyIndVar.cpp | 130 +- lib/Transforms/Utils/SimplifyInstructions.cpp | 4 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 445 +- lib/Transforms/Utils/SplitModule.cpp | 85 + lib/Transforms/Utils/SymbolRewriter.cpp | 1 - .../Utils/UnifyFunctionExitNodes.cpp | 10 +- lib/Transforms/Utils/ValueMapper.cpp | 254 +- lib/Transforms/Vectorize/BBVectorize.cpp | 191 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 2595 +-- lib/Transforms/Vectorize/SLPVectorizer.cpp | 489 +- llvm.spec.in | 1 + projects/CMakeLists.txt | 3 +- test/Analysis/BasicAA/2007-11-05-SizeCrash.ll | 2 +- .../BasicAA/2007-12-08-OutOfBoundsCrash.ll | 2 +- test/Analysis/BasicAA/bug.23540.ll | 17 + test/Analysis/BasicAA/bug.23626.ll | 31 + test/Analysis/BasicAA/cs-cs.ll | 41 +- .../BasicAA/full-store-partial-alias.ll | 4 +- test/Analysis/BasicAA/intrinsics.ll | 27 +- test/Analysis/BasicAA/modref.ll | 37 + test/Analysis/BasicAA/noalias-bugs.ll | 2 +- test/Analysis/BasicAA/phi-aa.ll | 1 + test/Analysis/BasicAA/phi-loop.ll | 75 + test/Analysis/BasicAA/q.bad.ll | 180 + test/Analysis/BasicAA/sequential-gep.ll | 54 + test/Analysis/BasicAA/zext.ll | 231 + test/Analysis/BlockFrequencyInfo/bad_input.ll | 4 +- test/Analysis/BlockFrequencyInfo/basic.ll | 6 +- .../irreducible_loop_crash.ll | 155 + .../loops_with_profile_info.ll | 6 +- test/Analysis/BranchProbabilityInfo/basic.ll | 68 +- test/Analysis/BranchProbabilityInfo/loop.ll | 152 +- .../BranchProbabilityInfo/noreturn.ll | 67 +- .../Analysis/BranchProbabilityInfo/pr18705.ll | 4 +- .../Analysis/BranchProbabilityInfo/pr22718.ll | 4 +- .../CFLAliasAnalysis/arguments-globals.ll | 2 +- .../CFLAliasAnalysis/basic-interproc.ll | 2 +- .../Analysis/CFLAliasAnalysis/branch-alias.ll | 2 +- .../CFLAliasAnalysis/const-expr-gep.ll | 2 +- .../full-store-partial-alias.ll | 4 +- .../CFLAliasAnalysis/gep-signed-arithmetic.ll | 2 +- .../CFLAliasAnalysis/multilevel-combine.ll | 2 +- .../CFLAliasAnalysis/must-and-partial.ll | 2 +- .../CFLAliasAnalysis/opaque-call-alias.ll | 20 + test/Analysis/CFLAliasAnalysis/va.ll | 2 +- .../Analysis/CallGraph/non-leaf-intrinsics.ll | 4 +- test/Analysis/CostModel/AArch64/select.ll | 12 +- test/Analysis/CostModel/AMDGPU/br.ll | 45 + .../CostModel/AMDGPU/extractelement.ll | 110 + test/Analysis/CostModel/AMDGPU/lit.local.cfg | 2 + test/Analysis/CostModel/ARM/cast.ll | 507 +- test/Analysis/CostModel/ARM/gep.ll | 88 +- test/Analysis/CostModel/ARM/select.ll | 6 +- test/Analysis/CostModel/PowerPC/load_store.ll | 4 +- .../CostModel/PowerPC/unal-vec-ldst.ll | 404 + test/Analysis/CostModel/X86/arith.ll | 4 +- test/Analysis/CostModel/X86/cast.ll | 38 +- .../CostModel/X86/masked-intrinsic-cost.ll | 215 +- test/Analysis/CostModel/X86/reduction.ll | 2 +- test/Analysis/CostModel/X86/sitofp.ll | 346 +- test/Analysis/CostModel/X86/sse-itoi.ll | 353 + test/Analysis/CostModel/X86/testshiftashr.ll | 32 +- test/Analysis/CostModel/X86/testshiftlshr.ll | 16 +- test/Analysis/CostModel/X86/testshiftshl.ll | 16 +- test/Analysis/CostModel/X86/uitofp.ll | 418 +- test/Analysis/CostModel/X86/vector_gep.ll | 17 + .../CostModel/X86/vshift-ashr-cost.ll | 392 + test/Analysis/CostModel/X86/vshift-cost.ll | 167 - .../CostModel/X86/vshift-lshr-cost.ll | 400 + .../Analysis/CostModel/X86/vshift-shl-cost.ll | 580 + test/Analysis/CostModel/no_info.ll | 22 +- test/Analysis/Delinearization/a.ll | 2 +- .../multidim_ivs_and_integer_offsets_3d.ll | 2 +- ...multidim_ivs_and_integer_offsets_nts_3d.ll | 2 +- ...multidim_ivs_and_parameteric_offsets_3d.ll | 2 +- .../parameter_addrec_product.ll | 56 + test/Analysis/DemandedBits/basic.ll | 34 + test/Analysis/DependenceAnalysis/GCD.ll | 14 +- .../DependenceAnalysis/NonAffineExpr.ll | 36 + test/Analysis/DependenceAnalysis/PR21585.ll | 2 +- .../DivergenceAnalysis/AMDGPU/kernel-args.ll | 16 + .../DivergenceAnalysis/AMDGPU/lit.local.cfg | 2 + .../DivergenceAnalysis/NVPTX/diverge.ll | 23 +- .../GlobalsModRef/2008-09-03-ReadGlobals.ll | 2 +- test/Analysis/GlobalsModRef/aliastest.ll | 5 +- .../GlobalsModRef/argmemonly-escape.ll | 47 + test/Analysis/GlobalsModRef/atomic-instrs.ll | 37 + .../GlobalsModRef/chaining-analysis.ll | 2 +- .../Analysis/GlobalsModRef/indirect-global.ll | 5 +- test/Analysis/GlobalsModRef/memset-escape.ll | 65 + test/Analysis/GlobalsModRef/modreftest.ll | 22 +- test/Analysis/GlobalsModRef/nocapture.ll | 57 + .../GlobalsModRef/nonescaping-noalias.ll | 116 + test/Analysis/GlobalsModRef/pr12351.ll | 2 +- test/Analysis/GlobalsModRef/pr25309.ll | 27 + test/Analysis/GlobalsModRef/purecse.ll | 2 +- .../GlobalsModRef/weak-interposition.ll | 24 + .../LazyCallGraph/non-leaf-intrinsics.ll | 4 +- test/Analysis/Lint/cppeh-catch-intrinsics.ll | 278 - .../forward-loop-carried.ll | 44 + .../forward-loop-independent.ll | 64 + test/Analysis/LoopAccessAnalysis/nullptr.ll | 38 + .../LoopAccessAnalysis/number-of-memchecks.ll | 51 +- .../pointer-with-unknown-bounds.ll | 4 +- .../resort-to-memchecks-only.ll | 4 +- .../reverse-memcheck-bounds.ll | 89 + .../LoopAccessAnalysis/safe-no-checks.ll | 8 + .../stride-access-dependence.ll | 28 +- .../underlying-objects-2.ll | 2 +- .../unsafe-and-rt-checks.ll | 2 +- .../memdep-block-scan-limit.ll | 15 + .../ScalarEvolution/avoid-assume-hang.ll | 139 + .../ScalarEvolution/constant_condition.ll | 51 + .../ScalarEvolution/flags-from-poison.ll | 592 + .../ScalarEvolution/infer-prestart-no-wrap.ll | 10 +- .../Analysis/ScalarEvolution/min-max-exprs.ll | 2 +- .../ScalarEvolution/no-wrap-add-exprs.ll | 122 + test/Analysis/ScalarEvolution/non-IV-phi.ll | 59 + test/Analysis/ScalarEvolution/pr24757.ll | 35 + test/Analysis/ScalarEvolution/pr25369.ll | 78 + test/Analysis/ScalarEvolution/scev-aa.ll | 2 +- test/Analysis/ScalarEvolution/shift-op.ll | 164 + .../ScalarEvolution/smax-br-phi-idioms.ll | 128 + test/Analysis/ScalarEvolution/trip-count.ll | 5 +- test/Analysis/ScalarEvolution/zext-wrap.ll | 2 +- .../TypeBasedAliasAnalysis/functionattrs.ll | 8 +- .../TypeBasedAliasAnalysis/intrinsics.ll | 17 +- test/Analysis/TypeBasedAliasAnalysis/licm.ll | 2 +- .../TypeBasedAliasAnalysis/precedence.ll | 8 +- .../ValueTracking/known-bits-from-range-md.ll | 34 + .../Analysis/ValueTracking/known-non-equal.ll | 21 + .../ValueTracking/knownnonzero-shift.ll | 13 + .../Analysis/ValueTracking/knownzero-shift.ll | 14 + .../ValueTracking/memory-dereferenceable.ll | 117 +- test/Analysis/ValueTracking/monotonic-phi.ll | 49 + test/Analysis/ValueTracking/pr24866.ll | 44 + test/Assembler/2007-09-10-AliasFwdRef.ll | 2 +- ...-02-05-FunctionLocalMetadataBecomesNull.ll | 8 +- test/Assembler/ConstantExprFoldCast.ll | 4 + test/Assembler/ConstantExprNoFold.ll | 8 +- test/Assembler/addrspacecast-alias.ll | 4 +- test/Assembler/alias-redefinition.ll | 6 +- test/Assembler/alias-use-list-order.ll | 6 +- test/Assembler/anon-functions.ll | 4 +- test/Assembler/debug-info.ll | 19 +- test/Assembler/dicompileunit.ll | 31 +- test/Assembler/diimportedentity.ll | 4 +- test/Assembler/dilexicalblock.ll | 2 +- test/Assembler/dilocalvariable-arg-large.ll | 4 +- test/Assembler/dilocalvariable.ll | 16 +- test/Assembler/dilocation.ll | 4 +- test/Assembler/disubprogram.ll | 31 +- test/Assembler/drop-debug-info.ll | 6 +- test/Assembler/global-addrspace-forwardref.ll | 9 + .../Assembler/incorrect-tdep-attrs-parsing.ll | 6 + test/Assembler/internal-hidden-alias.ll | 2 +- test/Assembler/internal-protected-alias.ll | 2 +- .../invalid-alias-mismatched-explicit-type.ll | 4 + .../invalid-dicompileunit-language-bad.ll | 6 +- ...invalid-dicompileunit-language-overflow.ll | 10 +- .../invalid-dicompileunit-missing-language.ll | 4 +- .../invalid-dicompileunit-null-file.ll | 4 +- .../invalid-dicompileunit-uniqued.ll | 4 + .../invalid-dilocalvariable-arg-large.ll | 6 +- .../invalid-dilocalvariable-arg-negative.ll | 7 +- .../invalid-dilocalvariable-missing-scope.ll | 4 +- .../invalid-dilocalvariable-missing-tag.ll | 4 - ...invalid-disubprogram-uniqued-definition.ll | 4 + test/Assembler/invalid-fp80hex.ll | 6 + test/Assembler/invalid-fwdref2.ll | 2 +- test/Assembler/invalid-inline-constraint.ll | 7 + test/Assembler/invalid-untyped-metadata.ll | 6 + ...invalid-uselistorder-indexes-duplicated.ll | 6 +- .../invalid-uselistorder-indexes-one.ll | 2 +- .../invalid-uselistorder-indexes-ordered.ll | 6 +- .../invalid-uselistorder-indexes-range.ll | 6 +- .../invalid-uselistorder-indexes-toofew.ll | 6 +- .../invalid-uselistorder-indexes-toomany.ll | 4 +- test/Assembler/metadata.ll | 2 +- test/Assembler/private-hidden-alias.ll | 2 +- test/Assembler/private-protected-alias.ll | 2 +- test/Assembler/token.ll | 11 + test/Assembler/unnamed-alias.ll | 8 +- test/Assembler/uselistorder.ll | 2 +- test/Bindings/Go/go.test | 2 +- test/Bindings/Go/lit.local.cfg | 3 + test/Bindings/OCaml/analysis.ml | 4 +- test/Bindings/OCaml/bitreader.ml | 4 +- test/Bindings/OCaml/bitwriter.ml | 4 +- test/Bindings/OCaml/core.ml | 4 +- test/Bindings/OCaml/executionengine.ml | 4 +- test/Bindings/OCaml/ext_exc.ml | 4 +- test/Bindings/OCaml/ipo.ml | 4 +- test/Bindings/OCaml/irreader.ml | 4 +- test/Bindings/OCaml/linker.ml | 14 +- test/Bindings/OCaml/passmgr_builder.ml | 4 +- test/Bindings/OCaml/scalar_opts.ml | 4 +- test/Bindings/OCaml/target.ml | 4 +- test/Bindings/OCaml/transform_utils.ml | 4 +- test/Bindings/OCaml/vectorize.ml | 4 +- test/Bindings/llvm-c/Inputs/invalid.ll.bc | Bin 332 -> 688 bytes test/Bindings/llvm-c/functions.ll | 11 +- test/Bindings/llvm-c/invalid-bitcode.test | 9 +- test/Bitcode/DICompileUnit-no-DWOId.ll | 2 +- test/Bitcode/DILocalVariable-explicit-tags.ll | 16 + .../DILocalVariable-explicit-tags.ll.bc | Bin 0 -> 500 bytes .../DISubprogram-distinct-definitions.ll | 11 + .../DISubprogram-distinct-definitions.ll.bc | Bin 0 -> 512 bytes test/Bitcode/Inputs/invalid-abbrev.bc | Bin 129 -> 132 bytes test/Bitcode/Inputs/invalid-cast.bc | Bin 0 -> 1236 bytes .../Inputs/invalid-name-with-0-byte.bc | Bin 0 -> 1265 bytes .../Inputs/invalid-no-function-block.bc | Bin 0 -> 548 bytes test/Bitcode/anon-functions.ll | 18 + test/Bitcode/attributes.ll | 32 +- test/Bitcode/compatibility-3.6.ll | 1207 ++ test/Bitcode/compatibility-3.6.ll.bc | Bin 0 -> 10192 bytes test/Bitcode/compatibility-3.7.ll | 1280 ++ test/Bitcode/compatibility-3.7.ll.bc | Bin 0 -> 11584 bytes test/Bitcode/compatibility.ll | 1560 ++ test/Bitcode/debug-loc-again.ll | 6 +- test/Bitcode/highLevelStructure.3.2.ll | 20 +- test/Bitcode/identification.ll | 6 + test/Bitcode/invalid.ll | 2 +- test/Bitcode/invalid.ll.bc | Bin 332 -> 688 bytes test/Bitcode/invalid.test | 15 + .../local-linkage-default-visibility.3.4.ll | 24 +- test/Bitcode/old-aliases.ll | 16 +- test/Bitcode/operand-bundles.ll | 152 + test/Bitcode/select.ll | 8 + test/Bitcode/tailcall.ll | 16 +- test/Bitcode/thinlto-function-summary.ll | 45 + test/Bitcode/upgrade-subprogram.ll | 17 + test/Bitcode/upgrade-subprogram.ll.bc | Bin 0 -> 784 bytes test/Bitcode/use-list-order.ll | 14 +- test/Bitcode/use-list-order2.ll | 57 + test/Bitcode/vst-forward-declaration.ll | 29 + test/BugPoint/metadata.ll | 6 +- test/BugPoint/named-md.ll | 39 + test/BugPoint/remove_arguments_test.ll | 6 +- test/BugPoint/replace-funcs-with-null.ll | 2 +- test/CMakeLists.txt | 33 +- ...aarch64-2014-08-11-MachineCombinerCrash.ll | 46 +- test/CodeGen/AArch64/aarch64-addv.ll | 98 + .../AArch64/aarch64-deferred-spilling.ll | 514 + .../AArch64/aarch64-dynamic-stack-layout.ll | 24 +- .../AArch64/aarch64-interleaved-accesses.ll | 147 +- test/CodeGen/AArch64/aarch64-loop-gep-opt.ll | 50 + test/CodeGen/AArch64/aarch64-minmaxv.ll | 511 + .../AArch64/aarch64-smax-constantfold.ll | 12 + test/CodeGen/AArch64/addsub_ext.ll | 146 + test/CodeGen/AArch64/alloca.ll | 4 +- .../arm64-2011-03-17-AsmPrinterCrash.ll | 14 +- test/CodeGen/AArch64/arm64-aapcs-be.ll | 2 +- test/CodeGen/AArch64/arm64-aapcs.ll | 21 +- test/CodeGen/AArch64/arm64-abi_align.ll | 2 +- .../AArch64/arm64-addr-type-promotion.ll | 9 +- .../arm64-alloca-frame-pointer-offset.ll | 6 +- test/CodeGen/AArch64/arm64-arith.ll | 3 +- test/CodeGen/AArch64/arm64-atomic-128.ll | 7 +- test/CodeGen/AArch64/arm64-atomic.ll | 70 +- test/CodeGen/AArch64/arm64-builtins-linux.ll | 11 + test/CodeGen/AArch64/arm64-ccmp-heuristics.ll | 4 +- test/CodeGen/AArch64/arm64-ccmp.ll | 166 +- .../AArch64/arm64-coalescing-MOVi32imm.ll | 17 + test/CodeGen/AArch64/arm64-collect-loh.ll | 604 + test/CodeGen/AArch64/arm64-fast-isel-br.ll | 15 +- test/CodeGen/AArch64/arm64-fmax-safe.ll | 53 + test/CodeGen/AArch64/arm64-fmax.ll | 46 +- test/CodeGen/AArch64/arm64-fp128.ll | 31 +- test/CodeGen/AArch64/arm64-hello.ll | 4 +- test/CodeGen/AArch64/arm64-indexed-memory.ll | 33 + .../AArch64/arm64-indexed-vector-ldst.ll | 26 +- test/CodeGen/AArch64/arm64-inline-asm.ll | 2 +- test/CodeGen/AArch64/arm64-join-reserved.ll | 2 +- test/CodeGen/AArch64/arm64-large-frame.ll | 2 +- test/CodeGen/AArch64/arm64-ld-from-st.ll | 666 + test/CodeGen/AArch64/arm64-ldp.ll | 188 +- test/CodeGen/AArch64/arm64-long-shift.ll | 80 +- .../AArch64/arm64-misaligned-memcpy-inline.ll | 2 +- .../AArch64/arm64-narrow-ldst-merge.ll | 406 + test/CodeGen/AArch64/arm64-neon-2velem.ll | 55 + test/CodeGen/AArch64/arm64-neon-copy.ll | 17 +- .../AArch64/arm64-patchpoint-webkit_jscc.ll | 8 +- test/CodeGen/AArch64/arm64-platform-reg.ll | 4 +- test/CodeGen/AArch64/arm64-popcnt.ll | 8 +- test/CodeGen/AArch64/arm64-rounding.ll | 62 +- test/CodeGen/AArch64/arm64-shrink-wrapping.ll | 95 +- test/CodeGen/AArch64/arm64-spill-lr.ll | 6 +- test/CodeGen/AArch64/arm64-stackmap.ll | 4 +- test/CodeGen/AArch64/arm64-stp.ll | 34 +- test/CodeGen/AArch64/arm64-strict-align.ll | 5 +- .../AArch64/arm64-tls-dynamic-together.ll | 43 +- test/CodeGen/AArch64/arm64-trunc-store.ll | 2 +- test/CodeGen/AArch64/arm64-vabs.ll | 66 + test/CodeGen/AArch64/arm64-variadic-aapcs.ll | 2 +- test/CodeGen/AArch64/arm64-vector-ext.ll | 54 +- test/CodeGen/AArch64/arm64-vminmaxnm.ll | 17 +- test/CodeGen/AArch64/arm64-xaluo.ll | 4 +- test/CodeGen/AArch64/atomic-ops.ll | 20 +- test/CodeGen/AArch64/bitcast-v2i8.ll | 2 +- test/CodeGen/AArch64/bitfield-insert.ll | 41 + test/CodeGen/AArch64/bitfield.ll | 46 +- test/CodeGen/AArch64/bitreverse.ll | 87 + .../AArch64/combine-comparisons-by-cse.ll | 26 + test/CodeGen/AArch64/cpus.ll | 1 + test/CodeGen/AArch64/cxx-tlscc.ll | 76 + test/CodeGen/AArch64/dag-combine-select.ll | 47 + test/CodeGen/AArch64/divrem.ll | 22 + test/CodeGen/AArch64/emutls.ll | 116 + test/CodeGen/AArch64/emutls_generic.ll | 59 + test/CodeGen/AArch64/eon.ll | 29 + test/CodeGen/AArch64/f16-instructions.ll | 111 +- .../AArch64/fast-isel-branch-cond-mask.ll | 19 + .../AArch64/fast-isel-branch-cond-split.ll | 52 +- test/CodeGen/AArch64/fast-isel-cmp-vec.ll | 100 + .../CodeGen/AArch64/fast-isel-folded-shift.ll | 125 + test/CodeGen/AArch64/fast-isel-logic-op.ll | 2 +- test/CodeGen/AArch64/fastcc-reserved.ll | 4 +- test/CodeGen/AArch64/fastcc.ll | 8 +- test/CodeGen/AArch64/fcvt_combine.ll | 154 + test/CodeGen/AArch64/fdiv_combine.ll | 115 + test/CodeGen/AArch64/fold-constants.ll | 19 +- test/CodeGen/AArch64/fp16-v4-instructions.ll | 51 +- test/CodeGen/AArch64/fp16-v8-instructions.ll | 63 + test/CodeGen/AArch64/free-zext.ll | 59 +- test/CodeGen/AArch64/func-argpassing.ll | 4 +- test/CodeGen/AArch64/func-calls.ll | 4 +- test/CodeGen/AArch64/global-alignment.ll | 2 +- test/CodeGen/AArch64/global-merge-1.ll | 16 +- test/CodeGen/AArch64/global-merge-2.ll | 34 +- test/CodeGen/AArch64/global-merge-3.ll | 46 +- test/CodeGen/AArch64/global-merge-4.ll | 6 +- .../AArch64/global-merge-group-by-use.ll | 6 +- .../global-merge-ignore-single-use-minsize.ll | 2 +- .../AArch64/global-merge-ignore-single-use.ll | 2 +- test/CodeGen/AArch64/ldst-opt.ll | 477 +- test/CodeGen/AArch64/merge-store.ll | 30 + test/CodeGen/AArch64/misched-fusion.ll | 34 + test/CodeGen/AArch64/mul-lohi.ll | 29 + test/CodeGen/AArch64/nest-register.ll | 2 +- test/CodeGen/AArch64/nontemporal.ll | 339 + test/CodeGen/AArch64/pic-eh-stubs.ll | 2 +- test/CodeGen/AArch64/readcyclecounter.ll | 15 + test/CodeGen/AArch64/regress-tblgen-chains.ll | 4 +- test/CodeGen/AArch64/remat.ll | 1 + test/CodeGen/AArch64/rotate.ll | 14 + test/CodeGen/AArch64/round-conv.ll | 330 + test/CodeGen/AArch64/shrink-wrap.ll | 184 + test/CodeGen/AArch64/stackmap-frame-setup.ll | 20 + test/CodeGen/AArch64/tail-call.ll | 6 +- .../CodeGen/AArch64/tailcall-explicit-sret.ll | 2 +- test/CodeGen/AArch64/tbi.ll | 102 + test/CodeGen/AArch64/vector-fcopysign.ll | 178 + test/CodeGen/AArch64/xbfiz.ll | 30 + test/CodeGen/AMDGPU/add.ll | 14 +- test/CodeGen/AMDGPU/address-space.ll | 6 +- test/CodeGen/AMDGPU/addrspacecast.ll | 66 + test/CodeGen/AMDGPU/and.ll | 101 +- .../AMDGPU/annotate-kernel-features.ll | 193 + test/CodeGen/AMDGPU/array-ptr-calc-i32.ll | 8 +- test/CodeGen/AMDGPU/bitreverse.ll | 115 + test/CodeGen/AMDGPU/calling-conventions.ll | 20 + .../AMDGPU/cgp-addressing-modes-flat.ll | 98 + test/CodeGen/AMDGPU/cgp-addressing-modes.ll | 254 +- test/CodeGen/AMDGPU/ci-use-flat-for-global.ll | 15 + test/CodeGen/AMDGPU/ctpop64.ll | 22 +- test/CodeGen/AMDGPU/cvt_f32_ubyte.ll | 12 +- .../AMDGPU/drop-mem-operand-move-smrd.ll | 52 + ...ds-negative-offset-addressing-mode-loop.ll | 10 +- test/CodeGen/AMDGPU/ds-sub-offset.ll | 125 + test/CodeGen/AMDGPU/ds_read2.ll | 10 +- test/CodeGen/AMDGPU/ds_read2_superreg.ll | 89 +- test/CodeGen/AMDGPU/ds_read2st64.ll | 8 +- test/CodeGen/AMDGPU/ds_write2.ll | 9 +- test/CodeGen/AMDGPU/ds_write2st64.ll | 4 +- test/CodeGen/AMDGPU/dynamic_stackalloc.ll | 11 + test/CodeGen/AMDGPU/extract-vector-elt-i64.ll | 43 + test/CodeGen/AMDGPU/fadd64.ll | 52 +- test/CodeGen/AMDGPU/fceil64.ll | 12 +- test/CodeGen/AMDGPU/fcmp.ll | 2 +- test/CodeGen/AMDGPU/flat-address-space.ll | 77 +- test/CodeGen/AMDGPU/flat-scratch-reg.ll | 36 + test/CodeGen/AMDGPU/fma-combine.ll | 200 + test/CodeGen/AMDGPU/fmax_legacy.ll | 40 + test/CodeGen/AMDGPU/fmin_legacy.ll | 63 + .../AMDGPU/fmul-2-combine-multi-use.ll | 102 + test/CodeGen/AMDGPU/fneg-fabs.ll | 27 +- test/CodeGen/AMDGPU/ftrunc.f64.ll | 12 +- test/CodeGen/AMDGPU/gep-address-space.ll | 34 +- test/CodeGen/AMDGPU/global-constant.ll | 27 + test/CodeGen/AMDGPU/global-extload-i32.ll | 321 +- test/CodeGen/AMDGPU/global_atomics.ll | 20 +- test/CodeGen/AMDGPU/half.ll | 256 +- test/CodeGen/AMDGPU/hsa-globals.ll | 132 + test/CodeGen/AMDGPU/hsa-group-segment.ll | 14 + test/CodeGen/AMDGPU/hsa.ll | 36 +- test/CodeGen/AMDGPU/image-attributes.ll | 206 + test/CodeGen/AMDGPU/image-resource-id.ll | 409 + test/CodeGen/AMDGPU/imm.ll | 24 +- test/CodeGen/AMDGPU/indirect-addressing-si.ll | 67 +- test/CodeGen/AMDGPU/indirect-private-64.ll | 34 +- test/CodeGen/AMDGPU/inline-constraints.ll | 23 + test/CodeGen/AMDGPU/insert_vector_elt.ll | 103 +- test/CodeGen/AMDGPU/kernel-args.ll | 26 +- test/CodeGen/AMDGPU/large-alloca-compute.ll | 57 + test/CodeGen/AMDGPU/large-alloca-graphics.ll | 47 + test/CodeGen/AMDGPU/large-alloca.ll | 15 - test/CodeGen/AMDGPU/literals.ll | 8 +- test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll | 4 +- test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll | 2 +- test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll | 28 - test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll | 6 +- test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll | 1 - .../AMDGPU/llvm.AMDGPU.read.workdim.ll | 37 + test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll | 2 +- test/CodeGen/AMDGPU/llvm.SI.packf16.ll | 29 + .../AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll | 16 + .../AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll | 14 + .../AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll | 16 + .../AMDGPU/llvm.amdgcn.dispatch.ptr.ll | 16 + test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll | 30 + test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll | 24 + .../AMDGPU/llvm.amdgcn.s.dcache.inv.ll | 29 + .../AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll | 29 + .../CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll | 27 + .../AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll | 27 + test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll | 2 +- test/CodeGen/AMDGPU/llvm.dbg.value.ll | 12 +- test/CodeGen/AMDGPU/llvm.memcpy.ll | 66 +- .../AMDGPU/llvm.r600.read.local.size.ll | 184 + test/CodeGen/AMDGPU/llvm.round.f64.ll | 5 +- test/CodeGen/AMDGPU/load.ll | 34 +- .../AMDGPU/local-memory-two-objects.ll | 4 +- test/CodeGen/AMDGPU/local-memory.ll | 4 +- test/CodeGen/AMDGPU/max.ll | 116 +- test/CodeGen/AMDGPU/merge-stores.ll | 196 +- test/CodeGen/AMDGPU/min.ll | 171 +- .../move-addr64-rsrc-dead-subreg-writes.ll | 36 + test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll | 52 + .../CodeGen/AMDGPU/no-hsa-graphics-shaders.ll | 18 + test/CodeGen/AMDGPU/no-shrink-extloads.ll | 12 + test/CodeGen/AMDGPU/opencl-image-metadata.ll | 24 + test/CodeGen/AMDGPU/operand-folding.ll | 2 +- test/CodeGen/AMDGPU/or.ll | 2 +- ...partially-dead-super-register-immediate.ll | 28 + test/CodeGen/AMDGPU/private-memory.ll | 12 + .../CodeGen/AMDGPU/register-count-comments.ll | 3 +- test/CodeGen/AMDGPU/reorder-stores.ll | 58 +- test/CodeGen/AMDGPU/s_movk_i32.ll | 18 +- test/CodeGen/AMDGPU/salu-to-valu.ll | 416 +- test/CodeGen/AMDGPU/sampler-resource-id.ll | 65 + .../schedule-vs-if-nested-loop-failure.ll | 2 +- test/CodeGen/AMDGPU/scratch-buffer.ll | 2 +- test/CodeGen/AMDGPU/select64.ll | 8 +- test/CodeGen/AMDGPU/set-dx10.ll | 48 +- test/CodeGen/AMDGPU/setcc-opt.ll | 22 +- test/CodeGen/AMDGPU/sext-in-reg.ll | 54 +- test/CodeGen/AMDGPU/shl.ll | 15 +- test/CodeGen/AMDGPU/shl_add_constant.ll | 6 +- test/CodeGen/AMDGPU/shl_add_ptr.ll | 2 +- ...si-instr-info-correct-implicit-operands.ll | 16 + test/CodeGen/AMDGPU/si-literal-folding.ll | 17 + test/CodeGen/AMDGPU/si-sgpr-spill.ll | 10 + .../AMDGPU/si-triv-disjoint-mem-access.ll | 7 +- test/CodeGen/AMDGPU/sint_to_fp.f64.ll | 6 +- test/CodeGen/AMDGPU/sminmax.ll | 130 + test/CodeGen/AMDGPU/smrd.ll | 73 +- test/CodeGen/AMDGPU/split-scalar-i64-add.ll | 42 +- .../AMDGPU/split-vector-memoperand-offsets.ll | 104 + test/CodeGen/AMDGPU/sra.ll | 8 +- test/CodeGen/AMDGPU/srl.ll | 13 +- test/CodeGen/AMDGPU/store-barrier.ll | 4 +- test/CodeGen/AMDGPU/store.ll | 25 +- test/CodeGen/AMDGPU/store_typed.ll | 24 + test/CodeGen/AMDGPU/sub.ll | 14 +- test/CodeGen/AMDGPU/trunc.ll | 8 +- test/CodeGen/AMDGPU/udivrem.ll | 130 +- test/CodeGen/AMDGPU/uint_to_fp.f64.ll | 6 +- test/CodeGen/AMDGPU/unsupported-cc.ll | 32 +- .../CodeGen/AMDGPU/use-sgpr-multiple-times.ll | 167 + test/CodeGen/AMDGPU/valu-i1.ll | 16 +- ...vgpr-spill-emergency-stack-slot-compute.ll | 585 + .../AMDGPU/vgpr-spill-emergency-stack-slot.ll | 494 + test/CodeGen/AMDGPU/vop-shrink.ll | 4 +- test/CodeGen/AMDGPU/wait.ll | 59 +- test/CodeGen/AMDGPU/work-item-intrinsics.ll | 263 +- test/CodeGen/AMDGPU/xor.ll | 2 +- test/CodeGen/AMDGPU/zero_extend.ll | 3 +- test/CodeGen/ARM/2007-03-13-InstrSched.ll | 2 +- test/CodeGen/ARM/2009-10-16-Scope.ll | 6 +- .../ARM/2010-04-15-ScavengerDebugValue.ll | 6 +- test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll | 24 +- test/CodeGen/ARM/2010-05-21-BuildVector.ll | 4 +- .../CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll | 4 +- test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll | 2 +- .../ARM/2010-06-25-Thumb2ITInvalidIterator.ll | 12 +- .../ARM/2010-06-29-PartialRedefFastAlloc.ll | 4 +- test/CodeGen/ARM/2010-08-04-StackVariable.ll | 24 +- .../CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll | 42 +- .../ARM/2011-06-29-MergeGlobalsAlign.ll | 2 +- .../CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll | 42 +- .../CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll | 4 +- test/CodeGen/ARM/2011-10-26-memset-inline.ll | 2 +- .../ARM/2012-01-24-RegSequenceLiveRange.ll | 10 +- .../ARM/2012-05-10-PreferVMOVtoVDUP32.ll | 4 +- .../ARM/2012-08-27-CopyPhysRegCrash.ll | 14 +- test/CodeGen/ARM/2012-11-14-subs_carry.ll | 10 +- test/CodeGen/ARM/2013-10-11-select-stalls.ll | 13 +- .../2014-01-09-pseudo_expand_implicit_reg.ll | 4 +- test/CodeGen/ARM/MachO-subtypes.ll | 68 + test/CodeGen/ARM/Windows/division.ll | 38 + ...oating-point-conversion.ll => libcalls.ll} | 3 +- test/CodeGen/ARM/Windows/no-eabi.ll | 10 + test/CodeGen/ARM/Windows/no-frame-register.ll | 22 + test/CodeGen/ARM/Windows/overflow.ll | 77 + test/CodeGen/ARM/adv-copy-opt.ll | 14 +- test/CodeGen/ARM/aliases.ll | 30 +- test/CodeGen/ARM/align-sp-adjustment.ll | 47 + test/CodeGen/ARM/apcs-vfp.ll | 153 + test/CodeGen/ARM/arm-eabi.ll | 63 + test/CodeGen/ARM/arm-interleaved-accesses.ll | 190 +- test/CodeGen/ARM/arm-shrink-wrapping-linux.ll | 142 + test/CodeGen/ARM/arm-shrink-wrapping.ll | 683 + test/CodeGen/ARM/atomic-64bit.ll | 6 + test/CodeGen/ARM/atomic-cmp.ll | 4 +- test/CodeGen/ARM/atomic-cmpxchg.ll | 96 +- test/CodeGen/ARM/atomic-op.ll | 46 +- test/CodeGen/ARM/atomic-ops-v8.ll | 38 +- test/CodeGen/ARM/avoid-cpsr-rmw.ll | 16 +- test/CodeGen/ARM/bfi.ll | 95 + .../build-attributes-optimization-minsize.ll | 18 + .../build-attributes-optimization-mixed.ll | 23 + .../build-attributes-optimization-optnone.ll | 18 + .../build-attributes-optimization-optsize.ll | 18 + .../ARM/build-attributes-optimization.ll | 23 + test/CodeGen/ARM/build-attributes.ll | 142 +- test/CodeGen/ARM/call-tc.ll | 8 +- test/CodeGen/ARM/cfi-alignment.ll | 48 + test/CodeGen/ARM/cmpxchg-idioms.ll | 6 +- test/CodeGen/ARM/cmpxchg-weak.ll | 56 +- test/CodeGen/ARM/coalesce-dbgvalue.ll | 10 +- test/CodeGen/ARM/coalesce-subregs.ll | 38 +- test/CodeGen/ARM/combine-vmovdrr.ll | 72 + test/CodeGen/ARM/constants.ll | 6 +- test/CodeGen/ARM/dagcombine-concatvector.ll | 4 +- test/CodeGen/ARM/debug-frame-vararg.ll | 14 +- test/CodeGen/ARM/debug-frame.ll | 28 +- test/CodeGen/ARM/debug-info-arg.ll | 20 +- test/CodeGen/ARM/debug-info-blocks.ll | 40 +- test/CodeGen/ARM/debug-info-branch-folding.ll | 32 +- test/CodeGen/ARM/debug-info-d16-reg.ll | 38 +- test/CodeGen/ARM/debug-info-no-frame.ll | 8 +- test/CodeGen/ARM/debug-info-qreg.ll | 28 +- test/CodeGen/ARM/debug-info-s16-reg.ll | 38 +- test/CodeGen/ARM/debug-info-sreg2.ll | 10 +- test/CodeGen/ARM/debug-segmented-stacks.ll | 14 +- test/CodeGen/ARM/debugtrap.ll | 17 + test/CodeGen/ARM/div.ll | 71 +- test/CodeGen/ARM/divmod-eabi.ll | 4 +- test/CodeGen/ARM/eh-resume-darwin.ll | 8 +- test/CodeGen/ARM/emutls.ll | 258 + test/CodeGen/ARM/emutls1.ll | 31 + test/CodeGen/ARM/emutls_generic.ll | 61 + test/CodeGen/ARM/fast-isel-align.ll | 22 +- test/CodeGen/ARM/fast-isel-ext.ll | 35 - test/CodeGen/ARM/fast-isel-mvn.ll | 10 +- test/CodeGen/ARM/fast-isel-pic.ll | 23 +- test/CodeGen/ARM/fold-stack-adjust.ll | 18 +- test/CodeGen/ARM/fp16-args.ll | 40 + test/CodeGen/ARM/fp16-promote.ll | 471 +- test/CodeGen/ARM/fp16.ll | 62 +- test/CodeGen/ARM/fparith.ll | 4 +- test/CodeGen/ARM/gep-optimization.ll | 77 + test/CodeGen/ARM/global-merge-1.ll | 6 +- test/CodeGen/ARM/global-merge-external.ll | 46 + test/CodeGen/ARM/globals.ll | 9 +- test/CodeGen/ARM/ifcvt-branch-weight-bug.ll | 14 +- test/CodeGen/ARM/ifcvt-branch-weight.ll | 2 +- test/CodeGen/ARM/ifcvt-iter-indbr.ll | 6 + test/CodeGen/ARM/ifcvt4.ll | 6 +- test/CodeGen/ARM/ifcvt5.ll | 4 +- test/CodeGen/ARM/ifcvt6.ll | 2 +- test/CodeGen/ARM/ifcvt8.ll | 4 +- test/CodeGen/ARM/inlineasm-switch-mode.ll | 4 +- .../ARM/ldm-stm-base-materialization.ll | 93 + test/CodeGen/ARM/ldrd.ll | 58 +- test/CodeGen/ARM/legalize-unaligned-load.ll | 35 + test/CodeGen/ARM/load-global.ll | 12 +- test/CodeGen/ARM/load-store-flags.ll | 4 +- test/CodeGen/ARM/load.ll | 571 +- test/CodeGen/ARM/machine-cse-cmp.ll | 2 +- test/CodeGen/ARM/memcpy-inline.ll | 2 +- test/CodeGen/ARM/memcpy-ldm-stm.ll | 94 + test/CodeGen/ARM/memfunc.ll | 255 +- test/CodeGen/ARM/minmax.ll | 193 + test/CodeGen/ARM/neon_minmax.ll | 1 + test/CodeGen/ARM/neon_spill.ll | 6 +- test/CodeGen/ARM/neon_vabs.ll | 38 + test/CodeGen/ARM/neon_vshl_minint.ll | 13 + test/CodeGen/ARM/out-of-registers.ll | 8 +- test/CodeGen/ARM/pr25317.ll | 11 + test/CodeGen/ARM/pr25838.ll | 34 + test/CodeGen/ARM/rbit.ll | 11 + test/CodeGen/ARM/reg_sequence.ll | 64 +- test/CodeGen/ARM/rotate.ll | 14 + test/CodeGen/ARM/sat-arith.ll | 63 + test/CodeGen/ARM/sched-it-debug-nodes.ll | 88 - test/CodeGen/ARM/setjmp_longjmp.ll | 113 + test/CodeGen/ARM/shifter_operand.ll | 228 +- .../CodeGen/ARM/sjlj-prepare-critical-edge.ll | 2 +- .../ARM/sjljehprepare-lower-empty-struct.ll | 1 + test/CodeGen/ARM/softfp-fabs-fneg.ll | 41 + test/CodeGen/ARM/special-reg-mcore.ll | 2 +- test/CodeGen/ARM/spill-q.ll | 28 +- test/CodeGen/ARM/ssat-lower.ll | 11 + test/CodeGen/ARM/ssat-upper.ll | 11 + test/CodeGen/ARM/subtarget-no-movt.ll | 45 + test/CodeGen/ARM/tail-merge-branch-weight.ll | 2 +- test/CodeGen/ARM/taildup-branch-weight.ll | 4 +- test/CodeGen/ARM/test-sharedidx.ll | 15 +- test/CodeGen/ARM/thumb-alignment.ll | 2 +- test/CodeGen/ARM/thumb1-ldst-opt.ll | 27 + test/CodeGen/ARM/thumb1_return_sequence.ll | 70 +- test/CodeGen/ARM/thumb2-it-block.ll | 24 +- test/CodeGen/ARM/thumb_indirect_calls.ll | 5 +- test/CodeGen/ARM/tls-models.ll | 74 +- test/CodeGen/ARM/tls3.ll | 29 +- test/CodeGen/ARM/unaligned_load_store.ll | 4 +- test/CodeGen/ARM/unaligned_load_store_vfp.ll | 98 + test/CodeGen/ARM/usat-lower.ll | 11 + test/CodeGen/ARM/usat-upper.ll | 11 + test/CodeGen/ARM/v7k-abi-align.ll | 152 + test/CodeGen/ARM/v7k-libcalls.ll | 154 + test/CodeGen/ARM/v7k-sincos.ll | 16 + test/CodeGen/ARM/vcge.ll | 4 +- test/CodeGen/ARM/vcombine.ll | 64 +- test/CodeGen/ARM/vcvt_combine.ll | 103 +- test/CodeGen/ARM/vdiv_combine.ll | 17 + test/CodeGen/ARM/vdup.ll | 16 + test/CodeGen/ARM/vector-DAGCombine.ll | 4 +- test/CodeGen/ARM/vector-load.ll | 4 +- test/CodeGen/ARM/vector-store.ll | 6 +- test/CodeGen/ARM/vext.ll | 34 +- test/CodeGen/ARM/vfp-reg-stride.ll | 42 + test/CodeGen/ARM/vfp-regs-dwarf.ll | 6 +- test/CodeGen/ARM/vld-vst-upgrade.ll | 139 + test/CodeGen/ARM/vld1.ll | 52 +- test/CodeGen/ARM/vld2.ll | 40 +- test/CodeGen/ARM/vld3.ll | 42 +- test/CodeGen/ARM/vld4.ll | 42 +- test/CodeGen/ARM/vlddup.ll | 30 +- test/CodeGen/ARM/vldlane.ll | 92 +- test/CodeGen/ARM/vminmaxnm-safe.ll | 396 + test/CodeGen/ARM/vminmaxnm.ll | 358 +- test/CodeGen/ARM/vmov.ll | 4 +- test/CodeGen/ARM/vmul.ll | 14 +- test/CodeGen/ARM/vpadd.ll | 2 +- test/CodeGen/ARM/vselect_imax.ll | 26 +- test/CodeGen/ARM/vst1.ll | 48 +- test/CodeGen/ARM/vst2.ll | 44 +- test/CodeGen/ARM/vst3.ll | 42 +- test/CodeGen/ARM/vst4.ll | 42 +- test/CodeGen/ARM/vstlane.ll | 90 +- test/CodeGen/ARM/vtrn.ll | 124 +- test/CodeGen/ARM/vuzp.ll | 136 +- test/CodeGen/ARM/vzip.ll | 82 +- test/CodeGen/BPF/sockex2.ll | 2 +- test/CodeGen/CPP/gep.ll | 10 + test/CodeGen/Generic/2009-03-17-LSR-APInt.ll | 28 +- test/CodeGen/Generic/ForceStackAlign.ll | 27 + test/CodeGen/Generic/MachineBranchProb.ll | 8 +- test/CodeGen/Generic/dbg_value.ll | 5 +- test/CodeGen/Generic/lit.local.cfg | 3 + .../Generic/overloaded-intrinsic-name.ll | 32 +- test/CodeGen/Generic/vector.ll | 6 + test/CodeGen/Hexagon/NVJumpCmp.ll | 89 + test/CodeGen/Hexagon/absaddr-store.ll | 1 + test/CodeGen/Hexagon/adde.ll | 6 +- test/CodeGen/Hexagon/alu64.ll | 134 +- test/CodeGen/Hexagon/bit-eval.ll | 53 + test/CodeGen/Hexagon/bit-loop.ll | 80 + test/CodeGen/Hexagon/cfi-late.ll | 65 + test/CodeGen/Hexagon/clr_set_toggle.ll | 2 +- test/CodeGen/Hexagon/combine.ll | 2 +- test/CodeGen/Hexagon/combine_ir.ll | 16 +- .../Hexagon/early-if-conversion-bug1.ll | 412 + test/CodeGen/Hexagon/early-if-phi-i1.ll | 17 + test/CodeGen/Hexagon/early-if-spare.ll | 57 + test/CodeGen/Hexagon/early-if.ll | 75 + test/CodeGen/Hexagon/extload-combine.ll | 2 +- test/CodeGen/Hexagon/hwloop-dbg.ll | 12 +- test/CodeGen/Hexagon/i16_VarArg.ll | 2 +- test/CodeGen/Hexagon/i1_VarArg.ll | 2 +- test/CodeGen/Hexagon/i8_VarArg.ll | 2 +- test/CodeGen/Hexagon/ifcvt-edge-weight.ll | 64 + test/CodeGen/Hexagon/memcpy-likely-aligned.ll | 32 + test/CodeGen/Hexagon/mux-basic.ll | 28 + test/CodeGen/Hexagon/opt-fabs.ll | 2 +- test/CodeGen/Hexagon/pic-jumptables.ll | 48 + test/CodeGen/Hexagon/pic-simple.ll | 22 + test/CodeGen/Hexagon/pic-static.ll | 21 + test/CodeGen/Hexagon/relax.ll | 9 +- test/CodeGen/Hexagon/sdr-basic.ll | 15 + test/CodeGen/Hexagon/sdr-shr32.ll | 22 + test/CodeGen/Hexagon/simple_addend.ll | 2 +- .../Hexagon/store-widen-aliased-load.ll | 21 + test/CodeGen/Hexagon/store-widen-negv.ll | 11 + test/CodeGen/Hexagon/store-widen-negv2.ll | 19 + test/CodeGen/Hexagon/store-widen.ll | 18 + test/CodeGen/Hexagon/struct_args.ll | 2 +- test/CodeGen/Hexagon/sube.ll | 10 +- test/CodeGen/Hexagon/tail-dup-subreg-abort.ll | 28 + test/CodeGen/Hexagon/tfr-to-combine.ll | 2 +- test/CodeGen/Hexagon/union-1.ll | 2 - test/CodeGen/Hexagon/v60Intrins.ll | 2559 +++ test/CodeGen/Hexagon/v60Vasr.ll | 247 + test/CodeGen/Hexagon/v60small.ll | 51 + test/CodeGen/Hexagon/vect/vect-cst-v4i32.ll | 2 +- test/CodeGen/Hexagon/vect/vect-loadv4i16.ll | 2 +- test/CodeGen/Hexagon/vect/vect-shuffle.ll | 2 +- test/CodeGen/Hexagon/vect/vect-splat.ll | 2 +- test/CodeGen/Hexagon/vect/vect-xor.ll | 2 +- test/CodeGen/Inputs/DbgValueOtherTargets.ll | 8 +- test/CodeGen/MIR/AArch64/cfi-def-cfa.mir | 31 + .../MIR/AArch64/expected-target-flag-name.mir | 23 + .../MIR/AArch64/invalid-target-flag-name.mir | 23 + test/CodeGen/MIR/AArch64/lit.local.cfg | 8 + .../MIR/AArch64/multiple-lhs-operands.mir | 28 + .../MIR/AArch64/stack-object-local-offset.mir | 41 + test/CodeGen/MIR/AArch64/target-flags.mir | 39 + .../MIR/AMDGPU/expected-target-index-name.mir | 64 + .../AMDGPU/invalid-target-index-operand.mir | 64 + test/CodeGen/MIR/AMDGPU/lit.local.cfg | 2 + .../MIR/AMDGPU/target-index-operands.mir | 104 + test/CodeGen/MIR/ARM/ARMLoadStoreDBG.mir | 165 + test/CodeGen/MIR/ARM/bundled-instructions.mir | 75 + test/CodeGen/MIR/ARM/cfi-same-value.mir | 80 + .../MIR/ARM/expected-closing-brace.mir | 50 + .../ARM/extraneous-closing-brace-error.mir | 20 + test/CodeGen/MIR/ARM/lit.local.cfg | 2 + .../ARM/nested-instruction-bundle-error.mir | 30 + test/CodeGen/MIR/ARM/sched-it-debug-nodes.mir | 160 + test/CodeGen/MIR/Generic/basic-blocks.mir | 49 + .../expected-colon-after-basic-block.mir} | 7 +- ...pected-mbb-reference-for-successor-mbb.mir | 17 +- test/CodeGen/MIR/{ => Generic}/frame-info.mir | 10 +- .../function-missing-machine-function.mir | 0 .../MIR/Generic/invalid-jump-table-kind.mir | 53 + test/CodeGen/MIR/Generic/lit.local.cfg | 3 + .../{ => Generic}/llvm-ir-error-reported.mir | 0 test/CodeGen/MIR/{ => Generic}/llvmIR.mir | 4 +- .../MIR/{ => Generic}/llvmIRMissing.mir | 4 +- ...machine-basic-block-ir-block-reference.mir | 17 + ...machine-basic-block-redefinition-error.mir | 18 + ...machine-basic-block-undefined-ir-block.mir | 15 + .../machine-basic-block-unknown-name.mir | 7 +- .../machine-function-missing-body-error.mir | 0 .../machine-function-missing-function.mir | 8 +- .../machine-function-missing-name.mir | 8 +- .../machine-function-redefinition-error.mir | 0 .../MIR/{ => Generic}/machine-function.mir | 16 +- .../MIR/{ => Generic}/register-info.mir | 8 +- ...lobal-value-or-symbol-after-call-entry.mir | 41 + test/CodeGen/MIR/Mips/lit.local.cfg | 2 + test/CodeGen/MIR/Mips/memory-operands.mir | 102 + .../NVPTX/expected-floating-point-literal.mir | 24 + .../floating-point-immediate-operands.mir | 81 + .../floating-point-invalid-type-error.mir | 24 + test/CodeGen/MIR/NVPTX/lit.local.cfg | 2 + test/CodeGen/MIR/PowerPC/lit.local.cfg | 2 + .../PowerPC/unordered-implicit-registers.mir | 45 + test/CodeGen/MIR/X86/basic-block-liveins.mir | 57 +- ...basic-block-not-at-start-of-line-error.mir | 41 + .../MIR/X86/block-address-operands.mir | 121 + test/CodeGen/MIR/X86/callee-saved-info.mir | 95 + test/CodeGen/MIR/X86/cfi-def-cfa-offset.mir | 29 + test/CodeGen/MIR/X86/cfi-def-cfa-register.mir | 32 + test/CodeGen/MIR/X86/cfi-offset.mir | 47 + .../constant-pool-item-redefinition-error.mir | 25 + test/CodeGen/MIR/X86/constant-pool.mir | 139 + test/CodeGen/MIR/X86/constant-value-error.mir | 25 + test/CodeGen/MIR/X86/dead-register-flag.mir | 14 +- .../X86/def-register-already-tied-error.mir | 25 + .../MIR/X86/duplicate-memory-operand-flag.mir | 27 + .../MIR/X86/duplicate-register-flag-error.mir | 35 + .../MIR/X86/early-clobber-register-flag.mir | 45 + .../X86/expected-align-in-memory-operand.mir | 30 + ...lignment-after-align-in-memory-operand.mir | 30 + .../expected-basic-block-at-start-of-body.mir | 40 + ...pected-block-reference-in-blockaddress.mir | 30 + .../X86/expected-comma-after-cfi-register.mir | 42 + .../expected-comma-after-memory-operand.mir | 25 + .../expected-different-implicit-operand.mir | 28 +- ...ected-different-implicit-register-flag.mir | 28 +- .../X86/expected-from-in-memory-operand.mir | 24 + ...-function-reference-after-blockaddress.mir | 30 + ...pected-global-value-after-blockaddress.mir | 30 + .../expected-integer-after-offset-sign.mir | 24 + .../X86/expected-integer-after-tied-def.mir | 25 + .../expected-integer-in-successor-weight.mir | 38 + ...pected-load-or-store-in-memory-operand.mir | 23 + .../MIR/X86/expected-machine-operand.mir | 12 +- ...ted-metadata-node-after-debug-location.mir | 59 + .../expected-metadata-node-after-exclaim.mir | 59 + ...expected-metadata-node-in-stack-object.mir | 25 + ...cted-named-register-in-allocation-hint.mir | 29 + ...amed-register-in-callee-saved-register.mir | 88 + ...ted-named-register-in-functions-livein.mir | 27 + .../X86/expected-named-register-livein.mir | 15 +- .../X86/expected-newline-at-end-of-list.mir | 41 + .../MIR/X86/expected-number-after-bb.mir | 28 +- .../X86/expected-offset-after-cfi-operand.mir | 27 + ...pected-pointer-value-in-memory-operand.mir | 24 + ...xpected-positive-alignment-after-align.mir | 30 + .../expected-register-after-cfi-operand.mir | 42 + .../MIR/X86/expected-register-after-flags.mir | 12 +- ...ed-size-integer-after-memory-operation.mir | 24 + .../CodeGen/MIR/X86/expected-stack-object.mir | 67 + .../X86/expected-subregister-after-colon.mir | 18 +- .../MIR/X86/expected-target-flag-name.mir | 24 + .../X86/expected-tied-def-after-lparen.mir | 25 + .../X86/expected-value-in-memory-operand.mir | 24 + ...d-virtual-register-in-functions-livein.mir | 27 + .../MIR/X86/external-symbol-operands.mir | 64 + .../MIR/X86/fixed-stack-memory-operands.mir | 39 + .../fixed-stack-object-redefinition-error.mir | 28 + test/CodeGen/MIR/X86/fixed-stack-objects.mir | 12 +- .../X86/frame-info-save-restore-points.mir | 73 + .../MIR/X86/frame-info-stack-references.mir | 79 + .../MIR/X86/frame-setup-instruction-flag.mir | 35 + test/CodeGen/MIR/X86/function-liveins.mir | 37 + .../CodeGen/MIR/X86/global-value-operands.mir | 127 +- test/CodeGen/MIR/X86/immediate-operands.mir | 28 +- .../MIR/X86/implicit-register-flag.mir | 65 +- test/CodeGen/MIR/X86/inline-asm-registers.mir | 54 + .../MIR/X86/instructions-debug-location.mir | 98 + .../MIR/X86/invalid-constant-pool-item.mir | 25 + .../MIR/X86/invalid-metadata-node-type.mir | 53 + .../MIR/X86/invalid-target-flag-name.mir | 24 + .../MIR/X86/invalid-tied-def-index-error.mir | 25 + test/CodeGen/MIR/X86/jump-table-info.mir | 150 + .../MIR/X86/jump-table-redefinition-error.mir | 76 + test/CodeGen/MIR/X86/killed-register-flag.mir | 38 +- .../MIR/X86/large-cfi-offset-number-error.mir | 27 + .../MIR/X86/large-immediate-operand-error.mir | 18 + .../MIR/X86/large-index-number-error.mir | 26 +- .../MIR/X86/large-offset-number-error.mir | 24 + .../large-size-in-memory-operand-error.mir | 24 + .../CodeGen/MIR/X86/liveout-register-mask.mir | 42 + .../MIR/X86/machine-basic-block-operands.mir | 68 +- test/CodeGen/MIR/X86/machine-instructions.mir | 14 +- test/CodeGen/MIR/X86/machine-verifier.mir | 22 + test/CodeGen/MIR/X86/memory-operands.mir | 508 + test/CodeGen/MIR/X86/metadata-operands.mir | 63 + .../CodeGen/MIR/X86/missing-closing-quote.mir | 22 + test/CodeGen/MIR/X86/missing-comma.mir | 12 +- .../MIR/X86/missing-implicit-operand.mir | 30 +- test/CodeGen/MIR/X86/missing-instruction.mir | 19 - test/CodeGen/MIR/X86/named-registers.mir | 14 +- test/CodeGen/MIR/X86/newline-handling.mir | 109 + .../MIR/X86/null-register-operands.mir | 14 +- .../MIR/X86/register-mask-operands.mir | 28 +- .../register-operands-target-flag-error.mir | 24 + .../X86/simple-register-allocation-hints.mir | 34 + .../spill-slot-fixed-stack-object-aliased.mir | 12 +- ...pill-slot-fixed-stack-object-immutable.mir | 12 +- .../X86/spill-slot-fixed-stack-objects.mir | 12 +- .../MIR/X86/stack-object-debug-info.mir | 65 + .../MIR/X86/stack-object-invalid-name.mir | 28 + ...ack-object-operand-name-mismatch-error.mir | 33 + .../CodeGen/MIR/X86/stack-object-operands.mir | 45 + .../X86/stack-object-redefinition-error.mir | 37 + test/CodeGen/MIR/X86/stack-objects.mir | 22 +- .../MIR/X86/standalone-register-error.mir | 24 + test/CodeGen/MIR/X86/subregister-operands.mir | 21 +- .../X86/successor-basic-blocks-weights.mir | 42 + .../MIR/X86/successor-basic-blocks.mir | 83 + .../MIR/X86/tied-def-operand-invalid.mir | 25 + test/CodeGen/MIR/X86/undef-register-flag.mir | 26 +- .../MIR/X86/undefined-fixed-stack-object.mir | 38 + .../MIR/X86/undefined-global-value.mir | 16 +- .../undefined-ir-block-in-blockaddress.mir | 30 + ...ndefined-ir-block-slot-in-blockaddress.mir | 29 + .../MIR/X86/undefined-jump-table-id.mir | 73 + .../MIR/X86/undefined-named-global-value.mir | 16 +- .../MIR/X86/undefined-register-class.mir | 8 +- .../MIR/X86/undefined-stack-object.mir | 30 + .../X86/undefined-value-in-memory-operand.mir | 24 + .../MIR/X86/undefined-virtual-register.mir | 14 +- test/CodeGen/MIR/X86/unknown-instruction.mir | 10 +- .../MIR/X86/unknown-machine-basic-block.mir | 26 +- .../MIR/X86/unknown-metadata-keyword.mir | 25 + .../CodeGen/MIR/X86/unknown-metadata-node.mir | 59 + .../X86/unknown-named-machine-basic-block.mir | 28 +- test/CodeGen/MIR/X86/unknown-register.mir | 12 +- .../MIR/X86/unknown-subregister-index.mir | 18 +- .../MIR/X86/unrecognized-character.mir | 10 +- .../MIR/X86/used-physical-register-info.mir | 109 + ...variable-sized-stack-object-size-error.mir | 14 +- .../MIR/X86/variable-sized-stack-objects.mir | 18 +- .../virtual-register-redefinition-error.mir | 27 + test/CodeGen/MIR/X86/virtual-registers.mir | 90 +- test/CodeGen/MIR/basic-blocks.mir | 49 - .../MIR/expected-eof-after-successor-mbb.mir | 29 - test/CodeGen/MIR/successor-basic-blocks.mir | 58 - .../Mips/Fast-ISel/check-disabled-mcpus.ll | 27 + test/CodeGen/Mips/addi.ll | 2 +- test/CodeGen/Mips/adjust-callstack-sp.ll | 2 +- test/CodeGen/Mips/align16.ll | 2 +- test/CodeGen/Mips/alloca16.ll | 2 +- test/CodeGen/Mips/and1.ll | 2 +- test/CodeGen/Mips/asm-large-immediate.ll | 3 +- test/CodeGen/Mips/atomicops.ll | 2 +- test/CodeGen/Mips/beqzc.ll | 2 +- test/CodeGen/Mips/beqzc1.ll | 2 +- test/CodeGen/Mips/br-jmp.ll | 4 +- test/CodeGen/Mips/brconeq.ll | 2 +- test/CodeGen/Mips/brconeqk.ll | 2 +- test/CodeGen/Mips/brconeqz.ll | 2 +- test/CodeGen/Mips/brconge.ll | 2 +- test/CodeGen/Mips/brcongt.ll | 2 +- test/CodeGen/Mips/brconle.ll | 2 +- test/CodeGen/Mips/brconlt.ll | 2 +- test/CodeGen/Mips/brconne.ll | 2 +- test/CodeGen/Mips/brconnek.ll | 2 +- test/CodeGen/Mips/brconnez.ll | 2 +- test/CodeGen/Mips/brind.ll | 2 +- test/CodeGen/Mips/brsize3.ll | 4 +- test/CodeGen/Mips/brsize3a.ll | 2 +- test/CodeGen/Mips/cconv/arguments-varargs.ll | 72 +- test/CodeGen/Mips/ci2.ll | 2 +- test/CodeGen/Mips/cmplarge.ll | 2 +- test/CodeGen/Mips/const1.ll | 2 +- test/CodeGen/Mips/const4a.ll | 2 +- test/CodeGen/Mips/const6.ll | 4 +- test/CodeGen/Mips/const6a.ll | 4 +- test/CodeGen/Mips/div.ll | 2 +- test/CodeGen/Mips/div_rem.ll | 2 +- test/CodeGen/Mips/divu.ll | 2 +- test/CodeGen/Mips/divu_remu.ll | 2 +- test/CodeGen/Mips/eh.ll | 2 +- .../Mips/emergency-spill-slot-near-fp.ll | 4 +- test/CodeGen/Mips/emutls_generic.ll | 70 + test/CodeGen/Mips/ex2.ll | 2 +- test/CodeGen/Mips/extins.ll | 2 +- test/CodeGen/Mips/f16abs.ll | 2 +- test/CodeGen/Mips/fixdfsf.ll | 4 +- test/CodeGen/Mips/fp16instrinsmc.ll | 4 +- test/CodeGen/Mips/fp16mix.ll | 6 +- test/CodeGen/Mips/fp16static.ll | 2 +- test/CodeGen/Mips/helloworld.ll | 12 +- test/CodeGen/Mips/hf16_1.ll | 4 +- test/CodeGen/Mips/hf16call32.ll | 408 +- test/CodeGen/Mips/hf16call32_body.ll | 206 +- test/CodeGen/Mips/hf1_body.ll | 18 +- test/CodeGen/Mips/hfptrcall.ll | 2 +- test/CodeGen/Mips/i32k.ll | 2 +- .../Mips/inlineasm-assembler-directives.ll | 4 +- test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll | 36 +- test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll | 4 +- test/CodeGen/Mips/inlineasm-operand-code.ll | 185 +- test/CodeGen/Mips/inlineasm_constraint.ll | 118 +- test/CodeGen/Mips/inlineasmmemop.ll | 8 +- test/CodeGen/Mips/insn-zero-size-bb.ll | 4 +- test/CodeGen/Mips/interrupt-attr-64-error.ll | 9 + .../CodeGen/Mips/interrupt-attr-args-error.ll | 9 + test/CodeGen/Mips/interrupt-attr-error.ll | 9 + test/CodeGen/Mips/interrupt-attr.ll | 244 + test/CodeGen/Mips/jtstat.ll | 2 +- test/CodeGen/Mips/l3mc.ll | 20 +- test/CodeGen/Mips/lb1.ll | 2 +- test/CodeGen/Mips/lbu1.ll | 2 +- test/CodeGen/Mips/lcb2.ll | 4 +- test/CodeGen/Mips/lcb3c.ll | 2 +- test/CodeGen/Mips/lcb4a.ll | 2 +- test/CodeGen/Mips/lcb5.ll | 2 +- test/CodeGen/Mips/lh1.ll | 2 +- test/CodeGen/Mips/lhu1.ll | 2 +- test/CodeGen/Mips/llcarry.ll | 2 +- test/CodeGen/Mips/llvm-ir/atomicrmx.ll | 26 + test/CodeGen/Mips/llvm-ir/call.ll | 14 + test/CodeGen/Mips/llvm-ir/load-atomic.ll | 42 + test/CodeGen/Mips/llvm-ir/sqrt.ll | 13 + test/CodeGen/Mips/llvm-ir/store-atomic.ll | 42 + test/CodeGen/Mips/madd-msub.ll | 2 +- test/CodeGen/Mips/mbrsize4a.ll | 2 +- test/CodeGen/Mips/mips16-hf-attr-2.ll | 2 +- test/CodeGen/Mips/mips16-hf-attr.ll | 2 +- test/CodeGen/Mips/mips16_32_1.ll | 2 +- test/CodeGen/Mips/mips16_32_10.ll | 2 +- test/CodeGen/Mips/mips16_32_3.ll | 2 +- test/CodeGen/Mips/mips16_32_4.ll | 2 +- test/CodeGen/Mips/mips16_32_5.ll | 2 +- test/CodeGen/Mips/mips16_32_6.ll | 2 +- test/CodeGen/Mips/mips16_32_7.ll | 2 +- test/CodeGen/Mips/mips16_fpret.ll | 8 +- test/CodeGen/Mips/mips16ex.ll | 2 +- test/CodeGen/Mips/mips16fpe.ll | 6 +- test/CodeGen/Mips/misha.ll | 2 +- test/CodeGen/Mips/msa/elm_copy.ll | 5 +- test/CodeGen/Mips/mul.ll | 2 +- test/CodeGen/Mips/mulll.ll | 2 +- test/CodeGen/Mips/mulull.ll | 2 +- test/CodeGen/Mips/nacl-align.ll | 7 +- test/CodeGen/Mips/neg1.ll | 2 +- test/CodeGen/Mips/no-odd-spreg-msa.ll | 24 +- test/CodeGen/Mips/nomips16.ll | 2 +- test/CodeGen/Mips/not1.ll | 2 +- test/CodeGen/Mips/null.ll | 2 +- test/CodeGen/Mips/or1.ll | 2 +- test/CodeGen/Mips/powif64_16.ll | 2 +- test/CodeGen/Mips/rem.ll | 2 +- test/CodeGen/Mips/remu.ll | 2 +- test/CodeGen/Mips/s2rem.ll | 4 +- test/CodeGen/Mips/sb1.ll | 2 +- test/CodeGen/Mips/sel1c.ll | 2 +- test/CodeGen/Mips/sel2c.ll | 2 +- test/CodeGen/Mips/selTBteqzCmpi.ll | 2 +- test/CodeGen/Mips/selTBtnezCmpi.ll | 2 +- test/CodeGen/Mips/selTBtnezSlti.ll | 2 +- test/CodeGen/Mips/seleq.ll | 2 +- test/CodeGen/Mips/seleqk.ll | 2 +- test/CodeGen/Mips/selgek.ll | 2 +- test/CodeGen/Mips/selgt.ll | 2 +- test/CodeGen/Mips/selle.ll | 2 +- test/CodeGen/Mips/selltk.ll | 2 +- test/CodeGen/Mips/selne.ll | 2 +- test/CodeGen/Mips/selnek.ll | 2 +- test/CodeGen/Mips/selpat.ll | 2 +- test/CodeGen/Mips/seteq.ll | 2 +- test/CodeGen/Mips/seteqz.ll | 2 +- test/CodeGen/Mips/setge.ll | 2 +- test/CodeGen/Mips/setgek.ll | 2 +- test/CodeGen/Mips/setle.ll | 2 +- test/CodeGen/Mips/setlt.ll | 2 +- test/CodeGen/Mips/setltk.ll | 2 +- test/CodeGen/Mips/setne.ll | 2 +- test/CodeGen/Mips/setuge.ll | 2 +- test/CodeGen/Mips/setugt.ll | 2 +- test/CodeGen/Mips/setule.ll | 2 +- test/CodeGen/Mips/setult.ll | 2 +- test/CodeGen/Mips/setultk.ll | 2 +- test/CodeGen/Mips/sh1.ll | 2 +- test/CodeGen/Mips/simplebr.ll | 2 +- test/CodeGen/Mips/sitofp-selectcc-opt.ll | 3 +- test/CodeGen/Mips/sll1.ll | 2 +- test/CodeGen/Mips/sll2.ll | 2 +- test/CodeGen/Mips/sr1.ll | 4 +- test/CodeGen/Mips/sra1.ll | 2 +- test/CodeGen/Mips/sra2.ll | 2 +- test/CodeGen/Mips/srl1.ll | 2 +- test/CodeGen/Mips/srl2.ll | 2 +- test/CodeGen/Mips/stchar.ll | 4 +- test/CodeGen/Mips/stldst.ll | 2 +- test/CodeGen/Mips/sub1.ll | 2 +- test/CodeGen/Mips/sub2.ll | 2 +- test/CodeGen/Mips/tail16.ll | 2 +- test/CodeGen/Mips/tailcall.ll | 2 +- test/CodeGen/Mips/tls-alias.ll | 2 +- test/CodeGen/Mips/tls16.ll | 2 +- test/CodeGen/Mips/tls16_2.ll | 2 +- test/CodeGen/Mips/trap1.ll | 2 +- test/CodeGen/Mips/ul1.ll | 2 +- test/CodeGen/Mips/xor1.ll | 2 +- test/CodeGen/NVPTX/branch-fold.ll | 40 + test/CodeGen/NVPTX/bypass-div.ll | 80 + test/CodeGen/NVPTX/combine-min-max.ll | 307 + test/CodeGen/NVPTX/fma-assoc.ll | 13 + test/CodeGen/NVPTX/global-addrspace.ll | 12 + .../NVPTX/load-with-non-coherent-cache.ll | 264 + test/CodeGen/NVPTX/lower-aggr-copies.ll | 118 +- test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll | 20 +- test/CodeGen/NVPTX/reg-copy.ll | 224 + test/CodeGen/NVPTX/symbol-naming.ll | 4 +- test/CodeGen/NVPTX/vector-call.ll | 2 +- .../PowerPC/2006-01-20-ShiftPartsCrash.ll | 1 + .../PowerPC/2006-08-15-SelectionCrash.ll | 1 + .../CodeGen/PowerPC/2006-12-07-LargeAlloca.ll | 1 + .../CodeGen/PowerPC/2006-12-07-SelectCrash.ll | 1 + .../PowerPC/2007-11-19-VectorSplitting.ll | 1 + test/CodeGen/PowerPC/BoolRetToIntTest.ll | 203 + .../CodeGen/PowerPC/BreakableToken-reduced.ll | 335 + test/CodeGen/PowerPC/aantidep-def-ec.mir | 117 + .../PowerPC/aantidep-inline-asm-use.ll | 305 + test/CodeGen/PowerPC/addisdtprelha-nonr3.mir | 80 + test/CodeGen/PowerPC/alias.ll | 4 +- test/CodeGen/PowerPC/bitcasts-direct-move.ll | 83 + test/CodeGen/PowerPC/bitreverse.ll | 23 + test/CodeGen/PowerPC/branch-hint.ll | 135 + test/CodeGen/PowerPC/coal-sections.ll | 24 + test/CodeGen/PowerPC/crbit-asm-disabled.ll | 16 + test/CodeGen/PowerPC/crbit-asm.ll | 3 +- test/CodeGen/PowerPC/cttz.ll | 2 +- test/CodeGen/PowerPC/dbg.ll | 10 +- test/CodeGen/PowerPC/dyn-alloca-offset.ll | 21 + test/CodeGen/PowerPC/e500-1.ll | 30 + test/CodeGen/PowerPC/emutls_generic.ll | 41 + test/CodeGen/PowerPC/fast-isel-binary.ll | 26 +- test/CodeGen/PowerPC/fast-isel-br-const.ll | 2 +- test/CodeGen/PowerPC/fast-isel-call.ll | 14 +- test/CodeGen/PowerPC/fast-isel-cmp-imm.ll | 34 +- test/CodeGen/PowerPC/fast-isel-const.ll | 2 +- .../PowerPC/fast-isel-conversion-p5.ll | 20 +- test/CodeGen/PowerPC/fast-isel-conversion.ll | 48 +- test/CodeGen/PowerPC/fast-isel-crash.ll | 4 +- test/CodeGen/PowerPC/fast-isel-ext.ll | 20 +- test/CodeGen/PowerPC/fast-isel-fold.ll | 26 +- test/CodeGen/PowerPC/fast-isel-indirectbr.ll | 2 +- test/CodeGen/PowerPC/fast-isel-load-store.ll | 34 +- .../CodeGen/PowerPC/fast-isel-redefinition.ll | 2 +- test/CodeGen/PowerPC/fast-isel-ret.ll | 52 +- test/CodeGen/PowerPC/fast-isel-shifter.ll | 12 +- .../fastisel-gep-promote-before-add.ll | 2 +- .../PowerPC/fma-mutate-register-constraint.ll | 89 + .../fp-int-conversions-direct-moves.ll | 24 +- .../PowerPC/fp128-bitcast-after-operation.ll | 137 + test/CodeGen/PowerPC/load-shift-combine.ll | 1 + test/CodeGen/PowerPC/long-compare.ll | 2 +- test/CodeGen/PowerPC/machine-combiner.ll | 188 + test/CodeGen/PowerPC/mc-instrlat.ll | 25 + test/CodeGen/PowerPC/mcm-13.ll | 27 + test/CodeGen/PowerPC/memcpy-vec.ll | 7 +- test/CodeGen/PowerPC/merge-st-chain-op.ll | 41 + .../PowerPC/p8-scalar_vector_conversions.ll | 1476 ++ test/CodeGen/PowerPC/peephole-align.ll | 335 + test/CodeGen/PowerPC/ppc-shrink-wrapping.ll | 784 + test/CodeGen/PowerPC/ppc32-i1-vaarg.ll | 2 +- test/CodeGen/PowerPC/ppc64-icbt-pwr7.ll | 8 +- test/CodeGen/PowerPC/ppcsoftops.ll | 50 + test/CodeGen/PowerPC/pr17168.ll | 366 +- test/CodeGen/PowerPC/pr24546.ll | 22 +- test/CodeGen/PowerPC/pr24636.ll | 41 + test/CodeGen/PowerPC/pr25157-peephole.ll | 61 + .../PowerPC/preincprep-nontrans-crash.ll | 94 + test/CodeGen/PowerPC/qpx-unal-cons-lds.ll | 217 + test/CodeGen/PowerPC/retaddr2.ll | 6 +- test/CodeGen/PowerPC/rm-zext.ll | 6 +- test/CodeGen/PowerPC/rotl-rotr-crash.ll | 12 + test/CodeGen/PowerPC/sdiv-pow2.ll | 8 +- .../selectiondag-extload-computeknownbits.ll | 12 + test/CodeGen/PowerPC/seteq-0.ll | 2 +- test/CodeGen/PowerPC/sjlj.ll | 20 +- test/CodeGen/PowerPC/stack-realign.ll | 26 +- test/CodeGen/PowerPC/stackmap-frame-setup.ll | 20 + test/CodeGen/PowerPC/swaps-le-5.ll | 4 +- test/CodeGen/PowerPC/swaps-le-6.ll | 42 + test/CodeGen/PowerPC/unal-vec-ldst.ll | 580 + test/CodeGen/PowerPC/unal-vec-negarith.ll | 17 + test/CodeGen/PowerPC/unwind-dw2-g.ll | 6 +- .../PowerPC/variable_elem_vec_extracts.ll | 114 + test/CodeGen/PowerPC/vec-asm-disabled.ll | 14 + test/CodeGen/PowerPC/vec_add_sub_quadword.ll | 6 +- .../vector-merge-store-fp-constants.ll | 28 + test/CodeGen/PowerPC/vsx.ll | 5 +- test/CodeGen/PowerPC/vsx_insert_extract_le.ll | 6 +- test/CodeGen/PowerPC/vsx_scalar_ld_st.ll | 6 +- test/CodeGen/PowerPC/vsx_shuffle_le.ll | 20 +- test/CodeGen/SPARC/2011-01-22-SRet.ll | 2 +- test/CodeGen/SPARC/32abi.ll | 191 + test/CodeGen/SPARC/64abi.ll | 84 +- test/CodeGen/SPARC/basictest.ll | 21 +- test/CodeGen/SPARC/float-constants.ll | 41 + test/CodeGen/SPARC/float.ll | 10 +- test/CodeGen/SPARC/fp128.ll | 4 +- test/CodeGen/SPARC/inlineasm.ll | 53 +- test/CodeGen/SPARC/missing-sret.ll | 9 + test/CodeGen/SPARC/reserved-regs.ll | 135 + test/CodeGen/SPARC/select-mask.ll | 17 + test/CodeGen/SPARC/spill.ll | 64 + test/CodeGen/SPARC/stack-align.ll | 22 + test/CodeGen/SPARC/tls.ll | 2 +- test/CodeGen/SPARC/varargs.ll | 2 +- test/CodeGen/SystemZ/alloca-03.ll | 84 + test/CodeGen/SystemZ/alloca-04.ll | 14 + test/CodeGen/SystemZ/args-01.ll | 4 +- test/CodeGen/SystemZ/args-02.ll | 4 +- test/CodeGen/SystemZ/args-03.ll | 4 +- test/CodeGen/SystemZ/args-04.ll | 2 +- test/CodeGen/SystemZ/args-07.ll | 2 +- test/CodeGen/SystemZ/asm-17.ll | 3 +- test/CodeGen/SystemZ/asm-18.ll | 3 +- test/CodeGen/SystemZ/dag-combine-01.ll | 97 + test/CodeGen/SystemZ/fp-abs-01.ll | 4 +- test/CodeGen/SystemZ/fp-abs-02.ll | 4 +- test/CodeGen/SystemZ/fp-add-02.ll | 2 +- test/CodeGen/SystemZ/fp-cmp-02.ll | 5 +- test/CodeGen/SystemZ/fp-cmp-05.ll | 80 + test/CodeGen/SystemZ/fp-const-02.ll | 4 +- test/CodeGen/SystemZ/fp-libcall.ll | 273 + test/CodeGen/SystemZ/fp-move-05.ll | 2 +- test/CodeGen/SystemZ/fp-neg-01.ll | 4 +- test/CodeGen/SystemZ/fp-sincos-01.ll | 56 + test/CodeGen/SystemZ/insert-05.ll | 4 +- test/CodeGen/SystemZ/int-cmp-44.ll | 3 +- test/CodeGen/SystemZ/int-cmp-51.ll | 34 + test/CodeGen/SystemZ/int-cmp-52.ll | 24 + test/CodeGen/SystemZ/memchr-01.ll | 2 +- test/CodeGen/SystemZ/spill-01.ll | 2 +- test/CodeGen/SystemZ/vec-args-04.ll | 26 +- test/CodeGen/SystemZ/vec-args-05.ll | 10 +- test/CodeGen/SystemZ/vec-perm-12.ll | 43 + test/CodeGen/SystemZ/vec-perm-13.ll | 38 + test/CodeGen/SystemZ/xor-01.ll | 2 +- .../CodeGen/Thumb/2010-07-15-debugOrdering.ll | 14 +- .../Thumb/cortex-m0-unaligned-access.ll | 2 +- test/CodeGen/Thumb/large-stack.ll | 20 +- .../ldm-stm-base-materialization-thumb2.ll | 93 + .../Thumb/ldm-stm-base-materialization.ll | 77 +- test/CodeGen/Thumb/pop.ll | 4 +- test/CodeGen/Thumb/segmented-stacks.ll | 24 +- test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll | 36 - test/CodeGen/Thumb/thumb-shrink-wrapping.ll | 691 + test/CodeGen/Thumb/vargs.ll | 6 +- test/CodeGen/Thumb2/crash.ll | 14 +- test/CodeGen/Thumb2/emit-unwinding.ll | 11 + test/CodeGen/Thumb2/float-cmp.ll | 44 +- .../CodeGen/Thumb2/float-intrinsics-double.ll | 11 +- test/CodeGen/Thumb2/float-intrinsics-float.ll | 4 +- test/CodeGen/Thumb2/ifcvt-compare.ll | 6 +- test/CodeGen/Thumb2/machine-licm.ll | 8 +- test/CodeGen/Thumb2/pic-load.ll | 12 +- test/CodeGen/Thumb2/setjmp_longjmp.ll | 89 + test/CodeGen/Thumb2/thumb2-ifcvt1.ll | 14 +- test/CodeGen/Thumb2/thumb2-ifcvt2.ll | 4 +- test/CodeGen/Thumb2/thumb2-mulhi.ll | 2 +- test/CodeGen/Thumb2/thumb2-smla.ll | 4 +- test/CodeGen/Thumb2/thumb2-smul.ll | 2 +- test/CodeGen/Thumb2/thumb2-spill-q.ll | 28 +- test/CodeGen/Thumb2/thumb2-uxt_rot.ll | 8 +- test/CodeGen/Thumb2/v8_IT_1.ll | 4 +- test/CodeGen/Thumb2/v8_IT_3.ll | 5 +- test/CodeGen/Thumb2/v8_IT_5.ll | 4 +- test/CodeGen/WebAssembly/call.ll | 127 + test/CodeGen/WebAssembly/cfg-stackify.ll | 1102 ++ test/CodeGen/WebAssembly/comparisons_f32.ll | 181 + test/CodeGen/WebAssembly/comparisons_f64.ll | 181 + test/CodeGen/WebAssembly/comparisons_i32.ll | 98 + test/CodeGen/WebAssembly/comparisons_i64.ll | 98 + test/CodeGen/WebAssembly/conv.ll | 255 + test/CodeGen/WebAssembly/copysign-casts.ll | 28 + test/CodeGen/WebAssembly/cpus.ll | 17 + test/CodeGen/WebAssembly/dead-vreg.ll | 51 + test/CodeGen/WebAssembly/f32.ll | 154 + test/CodeGen/WebAssembly/f64.ll | 154 + test/CodeGen/WebAssembly/fast-isel.ll | 20 + test/CodeGen/WebAssembly/frem.ll | 26 + test/CodeGen/WebAssembly/func.ll | 62 + test/CodeGen/WebAssembly/global.ll | 177 + test/CodeGen/WebAssembly/globl.ll | 10 + test/CodeGen/WebAssembly/i32.ll | 190 + test/CodeGen/WebAssembly/i64.ll | 190 + test/CodeGen/WebAssembly/ident.ll | 12 + test/CodeGen/WebAssembly/immediates.ll | 198 + test/CodeGen/WebAssembly/inline-asm.ll | 94 + test/CodeGen/WebAssembly/legalize.ll | 62 + test/CodeGen/WebAssembly/load-ext.ll | 96 + test/CodeGen/WebAssembly/load-store-i1.ll | 68 + test/CodeGen/WebAssembly/load.ll | 46 + test/CodeGen/WebAssembly/loop-idiom.ll | 53 + test/CodeGen/WebAssembly/memory-addr32.ll | 27 + test/CodeGen/WebAssembly/memory-addr64.ll | 27 + test/CodeGen/WebAssembly/offset-folding.ll | 48 + test/CodeGen/WebAssembly/offset.ll | 185 + test/CodeGen/WebAssembly/phi.ll | 47 + test/CodeGen/WebAssembly/reg-stackify.ll | 126 + test/CodeGen/WebAssembly/return-int32.ll | 10 + test/CodeGen/WebAssembly/return-void.ll | 10 + test/CodeGen/WebAssembly/returned.ll | 49 + test/CodeGen/WebAssembly/select.ll | 135 + test/CodeGen/WebAssembly/signext-zeroext.ll | 60 + test/CodeGen/WebAssembly/store-results.ll | 61 + test/CodeGen/WebAssembly/store-trunc.ll | 46 + test/CodeGen/WebAssembly/store.ll | 42 + test/CodeGen/WebAssembly/switch.ll | 174 + test/CodeGen/WebAssembly/unreachable.ll | 34 + test/CodeGen/WebAssembly/unused-argument.ll | 31 + test/CodeGen/WebAssembly/userstack.ll | 81 + test/CodeGen/WebAssembly/varargs.ll | 123 + test/CodeGen/WebAssembly/vtable.ll | 171 + test/CodeGen/WinEH/cppeh-alloca-sink.ll | 180 - test/CodeGen/WinEH/cppeh-catch-all-win32.ll | 86 - test/CodeGen/WinEH/cppeh-catch-all.ll | 97 - test/CodeGen/WinEH/cppeh-catch-and-throw.ll | 143 - test/CodeGen/WinEH/cppeh-catch-scalar.ll | 126 - test/CodeGen/WinEH/cppeh-catch-unwind.ll | 240 - test/CodeGen/WinEH/cppeh-cleanup-invoke.ll | 91 - test/CodeGen/WinEH/cppeh-demote-liveout.ll | 72 - test/CodeGen/WinEH/cppeh-frame-vars.ll | 272 - test/CodeGen/WinEH/cppeh-inalloca.ll | 194 - test/CodeGen/WinEH/cppeh-min-unwind.ll | 99 - .../WinEH/cppeh-mixed-catch-and-cleanup.ll | 106 - test/CodeGen/WinEH/cppeh-multi-catch.ll | 226 - test/CodeGen/WinEH/cppeh-nested-1.ll | 194 - test/CodeGen/WinEH/cppeh-nested-2.ll | 324 - test/CodeGen/WinEH/cppeh-nested-3.ll | 260 - test/CodeGen/WinEH/cppeh-nested-rethrow.ll | 212 - .../WinEH/cppeh-nonalloca-frame-values.ll | 278 - .../CodeGen/WinEH/cppeh-prepared-catch-all.ll | 47 - .../WinEH/cppeh-prepared-catch-reordered.ll | 165 - test/CodeGen/WinEH/cppeh-prepared-catch.ll | 232 - test/CodeGen/WinEH/cppeh-prepared-cleanups.ll | 245 - .../CodeGen/WinEH/cppeh-shared-empty-catch.ll | 110 - .../WinEH/cppeh-similar-catch-blocks.ll | 394 - test/CodeGen/WinEH/cppeh-state-calc-1.ll | 289 - test/CodeGen/WinEH/seh-catch-all.ll | 59 - test/CodeGen/WinEH/seh-exception-code.ll | 66 - test/CodeGen/WinEH/seh-exception-code2.ll | 91 - test/CodeGen/WinEH/seh-inlined-finally.ll | 83 - .../WinEH/seh-outlined-finally-win32.ll | 172 - test/CodeGen/WinEH/seh-outlined-finally.ll | 155 - test/CodeGen/WinEH/seh-prepared-basic.ll | 83 - test/CodeGen/WinEH/seh-resume-phi.ll | 66 - test/CodeGen/WinEH/seh-simple.ll | 233 - test/CodeGen/WinEH/wineh-cloning.ll | 391 + test/CodeGen/WinEH/wineh-demotion.ll | 356 + .../CodeGen/WinEH/wineh-intrinsics-invalid.ll | 26 + test/CodeGen/WinEH/wineh-intrinsics.ll | 44 + test/CodeGen/WinEH/wineh-no-demotion.ll | 130 + .../WinEH/wineh-statenumbering-cleanups.ll | 62 + test/CodeGen/WinEH/wineh-statenumbering.ll | 148 + test/CodeGen/X86/2006-10-02-BoolRetCrash.ll | 1 + .../2006-10-19-SwitchUnnecessaryBranching.ll | 4 +- test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll | 2 +- .../X86/2008-03-12-ThreadLocalAlias.ll | 2 +- test/CodeGen/X86/2008-03-14-SpillerCrash.ll | 2 +- .../X86/2008-06-13-NotVolatileLoadStore.ll | 8 +- test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll | 15 +- test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll | 8 +- .../X86/2009-05-23-dagcombine-shifts.ll | 14 +- test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll | 8 +- .../X86/2009-06-05-VariableIndexInsert.ll | 1 + test/CodeGen/X86/2009-06-06-ConcatVectors.ll | 1 + test/CodeGen/X86/2009-10-16-Scope.ll | 6 +- test/CodeGen/X86/2010-01-18-DbgValue.ll | 8 +- test/CodeGen/X86/2010-02-01-DbgValueCrash.ll | 8 +- test/CodeGen/X86/2010-05-25-DotDebugLoc.ll | 22 +- test/CodeGen/X86/2010-05-26-DotDebugLoc.ll | 20 +- test/CodeGen/X86/2010-05-28-Crash.ll | 18 +- .../CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll | 28 +- test/CodeGen/X86/2010-07-06-DbgCrash.ll | 7 +- test/CodeGen/X86/2010-08-04-StackVariable.ll | 24 +- test/CodeGen/X86/2010-09-16-EmptyFilename.ll | 10 +- test/CodeGen/X86/2010-11-02-DbgParameter.ll | 8 +- .../X86/2011-01-24-DbgValue-Before-Use.ll | 20 +- test/CodeGen/X86/2011-10-21-widen-cmp.ll | 42 +- .../X86/2011-12-06-AVXVectorExtractCombine.ll | 13 +- test/CodeGen/X86/2011-20-21-zext-ui2fp.ll | 14 +- test/CodeGen/X86/2012-01-12-extract-sv.ll | 28 +- .../CodeGen/X86/2012-08-17-legalizer-crash.ll | 3 +- test/CodeGen/X86/2012-1-10-buildvector.ll | 1 + test/CodeGen/X86/2012-11-30-handlemove-dbg.ll | 8 +- test/CodeGen/X86/2012-11-30-misched-dbg.ll | 16 +- test/CodeGen/X86/2012-11-30-regpres-dbg.ll | 8 +- test/CodeGen/X86/3dnow-intrinsics.ll | 4 +- test/CodeGen/X86/GC/alloc_loop.ll | 1 + test/CodeGen/X86/GC/cg-O0.ll | 1 + test/CodeGen/X86/GC/dynamic-frame-size.ll | 10 +- test/CodeGen/X86/GC/lower_gcroot.ll | 1 + test/CodeGen/X86/MachineBranchProb.ll | 4 +- test/CodeGen/X86/MachineSink-DbgValue.ll | 12 +- test/CodeGen/X86/MergeConsecutiveStores.ll | 37 +- test/CodeGen/X86/StackColoring-dbg.ll | 6 +- test/CodeGen/X86/add-nsw-sext.ll | 168 + test/CodeGen/X86/aliases.ll | 26 +- test/CodeGen/X86/and-encoding.ll | 41 + test/CodeGen/X86/atomic-flags.ll | 61 + test/CodeGen/X86/atomic-minmax-i6432.ll | 8 +- test/CodeGen/X86/atomic-non-integer.ll | 108 + test/CodeGen/X86/atomic128.ll | 52 +- test/CodeGen/X86/atomic_mi.ll | 662 +- test/CodeGen/X86/avg.ll | 724 + test/CodeGen/X86/avx-cvt-2.ll | 1 + test/CodeGen/X86/avx-cvt.ll | 6 +- .../CodeGen/X86/avx-intrinsics-x86-upgrade.ll | 66 + test/CodeGen/X86/avx-intrinsics-x86.ll | 685 +- test/CodeGen/X86/avx-isa-check.ll | 570 + test/CodeGen/X86/avx-load-store.ll | 4 +- test/CodeGen/X86/avx-logic.ll | 2 + test/CodeGen/X86/avx-shift.ll | 1 + test/CodeGen/X86/avx-shuffle-x86_32.ll | 26 +- test/CodeGen/X86/avx-splat.ll | 114 +- test/CodeGen/X86/avx-vbroadcast.ll | 261 +- test/CodeGen/X86/avx-vperm2x128.ll | 44 +- test/CodeGen/X86/avx-win64.ll | 2 - test/CodeGen/X86/avx.ll | 6 +- test/CodeGen/X86/avx2-conversions.ll | 131 +- .../X86/avx2-intrinsics-x86-upgrade.ll | 120 + test/CodeGen/X86/avx2-intrinsics-x86.ll | 94 +- test/CodeGen/X86/avx2-nontemporal.ll | 17 +- test/CodeGen/X86/avx2-vbroadcast.ll | 441 +- test/CodeGen/X86/avx512-arith.ll | 358 +- test/CodeGen/X86/avx512-bugfix-25270.ll | 35 + test/CodeGen/X86/avx512-build-vector.ll | 1 + test/CodeGen/X86/avx512-calling-conv.ll | 481 +- test/CodeGen/X86/avx512-cvt.ll | 119 +- test/CodeGen/X86/avx512-ext.ll | 1835 ++ test/CodeGen/X86/avx512-extract-subvector.ll | 56 + test/CodeGen/X86/avx512-fma.ll | 155 +- .../X86/avx512-gather-scatter-intrin.ll | 185 +- test/CodeGen/X86/avx512-insert-extract.ll | 519 +- test/CodeGen/X86/avx512-intrinsics.ll | 4955 ++++- test/CodeGen/X86/avx512-logic.ll | 164 +- test/CodeGen/X86/avx512-mask-op.ll | 1474 +- test/CodeGen/X86/avx512-skx-insert-subvec.ll | 135 + test/CodeGen/X86/avx512-trunc-ext.ll | 961 - test/CodeGen/X86/avx512-trunc.ll | 488 + test/CodeGen/X86/avx512-vbroadcast.ll | 262 +- test/CodeGen/X86/avx512-vec-cmp.ll | 27 +- test/CodeGen/X86/avx512bw-intrinsics.ll | 2658 ++- test/CodeGen/X86/avx512bwvl-intrinsics.ll | 748 + test/CodeGen/X86/avx512cd-intrinsics.ll | 18 + test/CodeGen/X86/avx512cdvl-intrinsics.ll | 179 + test/CodeGen/X86/avx512dq-intrinsics.ll | 667 + test/CodeGen/X86/avx512dqvl-intrinsics.ll | 818 +- test/CodeGen/X86/avx512vl-intrinsics.ll | 2977 ++- test/CodeGen/X86/bit-piece-comment.ll | 64 + test/CodeGen/X86/bitreverse.ll | 22 + test/CodeGen/X86/branchfolding-catchpads.ll | 95 + test/CodeGen/X86/buildvec-insertvec.ll | 1 + test/CodeGen/X86/catchpad-realign-savexmm.ll | 53 + test/CodeGen/X86/catchpad-regmask.ll | 144 + test/CodeGen/X86/catchpad-weight.ll | 82 + .../CodeGen/X86/catchret-empty-fallthrough.ll | 53 + test/CodeGen/X86/catchret-fallthrough.ll | 42 + test/CodeGen/X86/cleanuppad-inalloca.ll | 68 + .../CodeGen/X86/cleanuppad-large-codemodel.ll | 27 + test/CodeGen/X86/cleanuppad-realign.ll | 78 + test/CodeGen/X86/clz.ll | 148 +- test/CodeGen/X86/cmp.ll | 44 + test/CodeGen/X86/cmpxchg-clobber-flags.ll | 144 +- test/CodeGen/X86/coal-sections.ll | 23 + test/CodeGen/X86/coalescer-win64.ll | 16 + .../X86/code_placement_cold_loop_blocks.ll | 122 + ...ode_placement_ignore_succ_in_inner_loop.ll | 123 + .../X86/code_placement_loop_rotation.ll | 80 + .../X86/code_placement_loop_rotation2.ll | 122 + test/CodeGen/X86/codegen-prepare-cast.ll | 2 +- test/CodeGen/X86/coff-comdat.ll | 2 +- test/CodeGen/X86/combine-and.ll | 1 + test/CodeGen/X86/combine-avx-intrinsics.ll | 59 - test/CodeGen/X86/combine-avx2-intrinsics.ll | 74 - test/CodeGen/X86/combine-multiplies.ll | 163 + test/CodeGen/X86/combine-or.ll | 1 + test/CodeGen/X86/combine-sse2-intrinsics.ll | 53 - test/CodeGen/X86/combine-sse41-intrinsics.ll | 91 - test/CodeGen/X86/commute-two-addr.ll | 2 +- test/CodeGen/X86/constant-hoisting-and.ll | 19 + test/CodeGen/X86/constant-hoisting-cmp.ll | 25 + .../X86/copysign-constant-magnitude.ll | 24 +- test/CodeGen/X86/cppeh-nounwind.ll | 35 - test/CodeGen/X86/cxx_tlscc64.ll | 71 + test/CodeGen/X86/dag-fmf-cse.ll | 22 + test/CodeGen/X86/dag-merge-fast-accesses.ll | 90 + test/CodeGen/X86/darwin-tls.ll | 28 + .../X86/dbg-changes-codegen-branch-folding.ll | 48 +- test/CodeGen/X86/dbg-changes-codegen.ll | 9 +- test/CodeGen/X86/dbg-combine.ll | 12 +- test/CodeGen/X86/debugloc-argsize.ll | 58 + test/CodeGen/X86/divide-by-constant.ll | 32 + test/CodeGen/X86/dllexport-x86_64.ll | 10 +- test/CodeGen/X86/dllexport.ll | 8 +- test/CodeGen/X86/dwarf-comp-dir.ll | 2 +- test/CodeGen/X86/dynamic-allocas-VLAs.ll | 2 +- test/CodeGen/X86/eh-null-personality.ll | 25 + test/CodeGen/X86/eh_frame.ll | 4 +- test/CodeGen/X86/emutls-pic.ll | 168 + test/CodeGen/X86/emutls-pie.ll | 131 + test/CodeGen/X86/emutls.ll | 347 + test/CodeGen/X86/emutls_generic.ll | 107 + test/CodeGen/X86/exedeps-movq.ll | 19 + test/CodeGen/X86/expand-vr64-gr64-copy.mir | 36 + .../X86/extractelement-legalization-cycle.ll | 21 + test/CodeGen/X86/extractelement-shuffle.ll | 1 + test/CodeGen/X86/fadd-combines.ll | 224 + test/CodeGen/X86/fast-isel-bitcasts-avx.ll | 244 + test/CodeGen/X86/fast-isel-bitcasts.ll | 245 + test/CodeGen/X86/fast-isel-cmp-branch.ll | 17 +- test/CodeGen/X86/fast-isel-deadcode.ll | 147 + test/CodeGen/X86/fast-isel-emutls.ll | 48 + test/CodeGen/X86/fast-isel-nontemporal.ll | 111 + test/CodeGen/X86/fast-isel-stackcheck.ll | 44 + test/CodeGen/X86/fast-isel-tls.ll | 2 +- test/CodeGen/X86/fdiv-combine.ll | 69 +- test/CodeGen/X86/fdiv.ll | 52 +- test/CodeGen/X86/fixup-lea.ll | 34 + test/CodeGen/X86/float-asmprint.ll | 15 + test/CodeGen/X86/floor-soft-float.ll | 2 +- test/CodeGen/X86/fma-commute-x86.ll | 761 + test/CodeGen/X86/fma-do-not-commute.ll | 2 +- .../X86/fma-intrinsics-phi-213-to-231.ll | 499 +- test/CodeGen/X86/fma-intrinsics-x86.ll | 752 +- test/CodeGen/X86/fma-scalar-memfold.ll | 383 + test/CodeGen/X86/fma_patterns.ll | 1317 +- test/CodeGen/X86/fma_patterns_wide.ll | 849 +- test/CodeGen/X86/fmaxnum.ll | 203 +- test/CodeGen/X86/fminnum.ll | 181 +- test/CodeGen/X86/fmul-combines.ll | 44 +- test/CodeGen/X86/fold-load-binops.ll | 1 + test/CodeGen/X86/fold-load-unops.ll | 1 + test/CodeGen/X86/fold-push.ll | 40 + test/CodeGen/X86/force-align-stack-alloca.ll | 2 +- test/CodeGen/X86/force-align-stack.ll | 2 +- test/CodeGen/X86/fp-fast.ll | 1 + test/CodeGen/X86/fp-logic.ll | 264 + test/CodeGen/X86/fp128-calling-conv.ll | 47 + test/CodeGen/X86/fp128-cast.ll | 279 + test/CodeGen/X86/fp128-compare.ll | 96 + test/CodeGen/X86/fp128-i128.ll | 320 + test/CodeGen/X86/fp128-libcalls.ll | 107 + test/CodeGen/X86/fp128-load.ll | 35 + test/CodeGen/X86/fp128-store.ll | 14 + test/CodeGen/X86/fpcmp-soft-fp.ll | 127 + test/CodeGen/X86/fpstack-debuginstr-kill.ll | 16 +- test/CodeGen/X86/frem-msvc32.ll | 12 + test/CodeGen/X86/funclet-layout.ll | 158 + test/CodeGen/X86/function-alias.ll | 12 + test/CodeGen/X86/gcc_except_table.ll | 2 +- test/CodeGen/X86/global-sections.ll | 7 +- test/CodeGen/X86/h-register-store.ll | 25 +- test/CodeGen/X86/h-registers-0.ll | 1 + test/CodeGen/X86/h-registers-1.ll | 1 + test/CodeGen/X86/h-registers-3.ll | 1 + test/CodeGen/X86/half.ll | 4 +- test/CodeGen/X86/hhvm-cc.ll | 241 + test/CodeGen/X86/i386-shrink-wrapping.ll | 113 + test/CodeGen/X86/immediate_merging.ll | 82 + test/CodeGen/X86/implicit-null-check.ll | 51 +- test/CodeGen/X86/imul.ll | 63 + test/CodeGen/X86/inalloca-stdcall.ll | 5 +- test/CodeGen/X86/inalloca.ll | 15 +- test/CodeGen/X86/inconsistent_landingpad.ll | 30 + test/CodeGen/X86/inline-asm-2addr.ll | 11 +- .../X86/inline-asm-sp-clobber-memcpy.ll | 2 +- test/CodeGen/X86/inline-sse.ll | 34 + .../CodeGen/X86/insertps-from-constantpool.ll | 20 + test/CodeGen/X86/insertps-unfold-load-bug.ll | 33 + test/CodeGen/X86/int-intrinsic.ll | 2 +- test/CodeGen/X86/late-address-taken.ll | 68 + test/CodeGen/X86/lea-opt.ll | 131 + test/CodeGen/X86/lit.local.cfg | 2 +- .../X86/{frameescape.ll => localescape.ll} | 51 +- test/CodeGen/X86/lower-vec-shift-2.ll | 1 + test/CodeGen/X86/lsr-static-addr.ll | 2 +- test/CodeGen/X86/machine-combiner-int-vec.ll | 112 + test/CodeGen/X86/machine-combiner-int.ll | 194 + test/CodeGen/X86/machine-combiner.ll | 467 +- test/CodeGen/X86/machine-cp.ll | 38 +- .../X86/machine-trace-metrics-crash.ll | 4 +- test/CodeGen/X86/masked_gather_scatter.ll | 2012 +- test/CodeGen/X86/masked_memop.ll | 524 +- test/CodeGen/X86/materialize.ll | 184 + test/CodeGen/X86/mcu-abi.ll | 112 + test/CodeGen/X86/memcpy-2.ll | 26 +- test/CodeGen/X86/memcpy.ll | 33 + .../X86/merge-store-partially-alias-loads.ll | 52 + .../X86/misched-code-difference-with-debug.ll | 12 +- test/CodeGen/X86/mmx-arg-passing-x86-64.ll | 1 + test/CodeGen/X86/mmx-arg-passing.ll | 1 + test/CodeGen/X86/mmx-coalescing.ll | 84 + test/CodeGen/X86/mmx-intrinsics.ll | 291 +- test/CodeGen/X86/mmx-only.ll | 21 + test/CodeGen/X86/movntdq-no-avx.ll | 2 +- test/CodeGen/X86/movpc32-check.ll | 42 + test/CodeGen/X86/movtopush.ll | 25 +- test/CodeGen/X86/mult-alt-x86.ll | 2 +- test/CodeGen/X86/musttail-varargs.ll | 43 + test/CodeGen/X86/nontemporal-2.ll | 21 +- test/CodeGen/X86/nontemporal.ll | 11 +- test/CodeGen/X86/null-streamer.ll | 4 +- test/CodeGen/X86/opt-ext-uses.ll | 8 +- test/CodeGen/X86/or-branch.ll | 30 +- test/CodeGen/X86/or-lea.ll | 120 + test/CodeGen/X86/palignr.ll | 1 + test/CodeGen/X86/patchpoint-verifiable.mir | 42 + .../X86/peephole-na-phys-copy-folding.ll | 190 + test/CodeGen/X86/pmul.ll | 297 +- test/CodeGen/X86/pop-stack-cleanup.ll | 76 + test/CodeGen/X86/powi.ll | 38 +- test/CodeGen/X86/pr11415.ll | 8 +- test/CodeGen/X86/pr11468.ll | 2 +- test/CodeGen/X86/pr11985.ll | 30 +- test/CodeGen/X86/pr13577.ll | 5 +- test/CodeGen/X86/pr15267.ll | 240 +- test/CodeGen/X86/pr17631.ll | 2 +- test/CodeGen/X86/pr21529.ll | 15 - test/CodeGen/X86/pr22019.ll | 2 +- test/CodeGen/X86/pr23900.ll | 29 - test/CodeGen/X86/pr24139.ll | 148 + test/CodeGen/X86/pr24602.ll | 17 + test/CodeGen/X86/pr25828.ll | 30 + test/CodeGen/X86/prolog-push-seq.ll | 19 + test/CodeGen/X86/pseudo_cmov_lower.ll | 267 + test/CodeGen/X86/pseudo_cmov_lower1.ll | 39 + test/CodeGen/X86/pseudo_cmov_lower2.ll | 100 + test/CodeGen/X86/psubus.ll | 580 +- test/CodeGen/X86/push-cfi-debug.ll | 53 + test/CodeGen/X86/push-cfi-obj.ll | 44 + test/CodeGen/X86/push-cfi.ll | 304 + test/CodeGen/X86/ragreedy-hoist-spill.ll | 2 +- test/CodeGen/X86/rem_crash.ll | 257 + test/CodeGen/X86/remat-invalid-liveness.ll | 85 - test/CodeGen/X86/rodata-relocs.ll | 8 +- test/CodeGen/X86/rounding-ops.ll | 24 +- test/CodeGen/X86/safestack.ll | 32 + test/CodeGen/X86/sar_fold.ll | 37 + test/CodeGen/X86/sar_fold64.ll | 43 + test/CodeGen/X86/scalar-fp-to-i64.ll | 151 + test/CodeGen/X86/scalar-int-to-fp.ll | 132 + test/CodeGen/X86/sdiv-pow2.ll | 33 + test/CodeGen/X86/seh-catch-all-win32.ll | 33 +- test/CodeGen/X86/seh-catch-all.ll | 29 +- test/CodeGen/X86/seh-catchpad.ll | 198 + test/CodeGen/X86/seh-except-finally.ll | 71 +- test/CodeGen/X86/seh-exception-code.ll | 38 + test/CodeGen/X86/seh-filter.ll | 21 - test/CodeGen/X86/seh-finally.ll | 48 +- test/CodeGen/X86/seh-safe-div-win32.ll | 40 +- test/CodeGen/X86/seh-safe-div.ll | 52 +- test/CodeGen/X86/seh-stack-realign-win32.ll | 99 - test/CodeGen/X86/seh-stack-realign.ll | 34 +- test/CodeGen/X86/setcc-lowering.ll | 1 + test/CodeGen/X86/setcc.ll | 20 + test/CodeGen/X86/shift-bmi2.ll | 20 +- test/CodeGen/X86/shrink-wrap-chkstk.ll | 37 + test/CodeGen/X86/slow-div.ll | 15 + test/CodeGen/X86/slow-unaligned-mem.ll | 95 + test/CodeGen/X86/soft-fp.ll | 34 +- test/CodeGen/X86/soft-sitofp.ll | 169 + test/CodeGen/X86/splat-for-size.ll | 197 +- test/CodeGen/X86/sqrt-fastmath.ll | 9 +- test/CodeGen/X86/sse-align-12.ll | 1 + test/CodeGen/X86/sse-minmax.ll | 2 +- test/CodeGen/X86/sse-only.ll | 20 + test/CodeGen/X86/sse-scalar-fp-arith-unary.ll | 1 + test/CodeGen/X86/sse2-vector-shifts.ll | 282 +- test/CodeGen/X86/sse2.ll | 1 + test/CodeGen/X86/sse3-avx-addsub-2.ll | 312 +- test/CodeGen/X86/sse3-avx-addsub.ll | 197 +- test/CodeGen/X86/sse3-intrinsics-fast-isel.ll | 171 + test/CodeGen/X86/sse3.ll | 7 +- .../X86/sse41-intrinsics-x86-upgrade.ll | 47 +- test/CodeGen/X86/sse41-intrinsics-x86.ll | 48 - test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll | 185 +- test/CodeGen/X86/sse41.ll | 65 +- .../CodeGen/X86/sse4a-intrinsics-fast-isel.ll | 98 + test/CodeGen/X86/sse_partial_update.ll | 33 + .../CodeGen/X86/ssse3-intrinsics-fast-isel.ll | 290 + test/CodeGen/X86/stack-align-memcpy.ll | 2 +- test/CodeGen/X86/stack-folding-adx-x86_64.ll | 45 + test/CodeGen/X86/stack-folding-fp-avx1.ll | 18 +- test/CodeGen/X86/stack-folding-fp-sse42.ll | 28 +- test/CodeGen/X86/stack-folding-int-avx1.ll | 40 +- test/CodeGen/X86/stack-folding-int-avx2.ll | 55 +- test/CodeGen/X86/stack-folding-int-sse42.ll | 38 +- test/CodeGen/X86/stack-folding-mmx.ll | 148 +- test/CodeGen/X86/stack-folding-x86_64.ll | 2 +- test/CodeGen/X86/stack-folding-xop.ll | 2 +- test/CodeGen/X86/stack-probe-size.ll | 3 +- test/CodeGen/X86/stack-protector-dbginfo.ll | 36 +- test/CodeGen/X86/stack-protector-weight.ll | 4 +- test/CodeGen/X86/stackmap-frame-setup.ll | 20 + test/CodeGen/X86/statepoint-allocas.ll | 10 +- test/CodeGen/X86/statepoint-call-lowering.ll | 103 +- test/CodeGen/X86/statepoint-far-call.ll | 4 +- test/CodeGen/X86/statepoint-forward.ll | 16 +- .../statepoint-gctransition-call-lowering.ll | 66 +- test/CodeGen/X86/statepoint-invoke.ll | 78 +- test/CodeGen/X86/statepoint-stack-usage.ll | 54 +- .../CodeGen/X86/statepoint-stackmap-format.ll | 96 +- test/CodeGen/X86/stdarg.ll | 10 +- test/CodeGen/X86/stores-merging.ll | 46 +- test/CodeGen/X86/switch-bt.ll | 8 +- test/CodeGen/X86/switch-edge-weight.ll | 281 + test/CodeGen/X86/switch-jump-table.ll | 54 +- test/CodeGen/X86/switch-order-weight.ll | 2 +- test/CodeGen/X86/switch.ll | 85 +- test/CodeGen/X86/swizzle-2.ll | 1 + .../CodeGen/X86/system-intrinsics-64-xsave.ll | 41 + .../X86/system-intrinsics-64-xsavec.ll | 21 + .../X86/system-intrinsics-64-xsaveopt.ll | 21 + .../X86/system-intrinsics-64-xsaves.ll | 41 + test/CodeGen/X86/system-intrinsics-64.ll | 2 +- test/CodeGen/X86/system-intrinsics-xsave.ll | 23 + test/CodeGen/X86/system-intrinsics-xsavec.ll | 12 + .../CodeGen/X86/system-intrinsics-xsaveopt.ll | 12 + test/CodeGen/X86/system-intrinsics-xsaves.ll | 23 + test/CodeGen/X86/system-intrinsics.ll | 2 +- test/CodeGen/X86/tail-dup-catchret.ll | 31 + test/CodeGen/X86/tail-merge-wineh.ll | 107 + test/CodeGen/X86/tail-opts.ll | 40 +- test/CodeGen/X86/tailcall-mem-intrinsics.ll | 4 +- test/CodeGen/X86/tailcall-msvc-conventions.ll | 189 + test/CodeGen/X86/tailcall-readnone.ll | 15 + test/CodeGen/X86/tls-android-negative.ll | 65 + test/CodeGen/X86/tls-android.ll | 89 + test/CodeGen/X86/tls-models.ll | 2 + test/CodeGen/X86/tls-pie.ll | 8 + test/CodeGen/X86/token_landingpad.ll | 21 + test/CodeGen/X86/trunc-store.ll | 49 + test/CodeGen/X86/unaligned-32-byte-memops.ll | 7 +- test/CodeGen/X86/unaligned-spill-folding.ll | 2 +- test/CodeGen/X86/unknown-location.ll | 8 +- test/CodeGen/X86/v2f32.ll | 1 + test/CodeGen/X86/vec_cast2.ll | 31 +- test/CodeGen/X86/vec_cmp_sint-128.ll | 722 + test/CodeGen/X86/vec_cmp_uint-128.ll | 860 + test/CodeGen/X86/vec_ctbits.ll | 129 +- test/CodeGen/X86/vec_extract-avx.ll | 116 +- test/CodeGen/X86/vec_fabs.ll | 2 +- test/CodeGen/X86/vec_fp_to_int.ll | 1245 +- test/CodeGen/X86/vec_insert-5.ll | 1 + test/CodeGen/X86/vec_int_to_fp.ll | 1872 +- test/CodeGen/X86/vec_minmax_sint.ll | 2090 +++ test/CodeGen/X86/vec_minmax_uint.ll | 2229 +++ test/CodeGen/X86/vec_sdiv_to_shift.ll | 13 + test/CodeGen/X86/vec_trunc_sext.ll | 31 +- test/CodeGen/X86/vec_uint_to_fp-fastmath.ll | 130 + test/CodeGen/X86/vec_uint_to_fp.ll | 8 +- test/CodeGen/X86/vector-blend.ll | 72 +- test/CodeGen/X86/vector-idiv.ll | 1 + test/CodeGen/X86/vector-lzcnt-128.ll | 472 +- test/CodeGen/X86/vector-lzcnt-256.ll | 257 +- test/CodeGen/X86/vector-lzcnt-512.ll | 219 + .../X86/vector-merge-store-fp-constants.ll | 35 + test/CodeGen/X86/vector-popcnt-128.ll | 37 +- test/CodeGen/X86/vector-popcnt-256.ll | 73 +- test/CodeGen/X86/vector-popcnt-512.ll | 161 + test/CodeGen/X86/vector-rotate-128.ll | 1595 ++ test/CodeGen/X86/vector-rotate-256.ll | 1089 ++ test/CodeGen/X86/vector-sext.ll | 4090 +++- test/CodeGen/X86/vector-shift-ashr-128.ll | 917 +- test/CodeGen/X86/vector-shift-ashr-256.ll | 691 +- test/CodeGen/X86/vector-shift-ashr-512.ll | 378 + test/CodeGen/X86/vector-shift-lshr-128.ll | 619 +- test/CodeGen/X86/vector-shift-lshr-256.ll | 444 +- test/CodeGen/X86/vector-shift-lshr-512.ll | 317 + test/CodeGen/X86/vector-shift-shl-128.ll | 501 +- test/CodeGen/X86/vector-shift-shl-256.ll | 403 +- test/CodeGen/X86/vector-shift-shl-512.ll | 293 + test/CodeGen/X86/vector-shuffle-128-v16.ll | 276 + test/CodeGen/X86/vector-shuffle-128-v2.ll | 318 +- test/CodeGen/X86/vector-shuffle-128-v4.ll | 92 + test/CodeGen/X86/vector-shuffle-128-v8.ll | 252 + test/CodeGen/X86/vector-shuffle-256-v16.ll | 249 +- test/CodeGen/X86/vector-shuffle-256-v32.ll | 210 +- test/CodeGen/X86/vector-shuffle-256-v4.ll | 703 +- test/CodeGen/X86/vector-shuffle-256-v8.ll | 221 +- test/CodeGen/X86/vector-shuffle-512-v16.ll | 134 + test/CodeGen/X86/vector-shuffle-512-v32.ll | 44 + test/CodeGen/X86/vector-shuffle-512-v8.ll | 2487 ++- test/CodeGen/X86/vector-shuffle-combining.ll | 1 + test/CodeGen/X86/vector-shuffle-mmx.ll | 1 + test/CodeGen/X86/vector-shuffle-sse1.ll | 1 + test/CodeGen/X86/vector-shuffle-sse4a.ll | 140 + test/CodeGen/X86/vector-shuffle-v1.ll | 439 + test/CodeGen/X86/vector-trunc.ll | 685 +- test/CodeGen/X86/vector-tzcnt-128.ll | 2035 +- test/CodeGen/X86/vector-tzcnt-256.ll | 1455 +- test/CodeGen/X86/vector-tzcnt-512.ll | 271 + test/CodeGen/X86/vector-zext.ll | 1239 +- test/CodeGen/X86/vector-zmov.ll | 1 + ...rs-cleared-in-machine-functions-liveins.ll | 19 + test/CodeGen/X86/vmovq.ll | 28 + test/CodeGen/X86/vselect-2.ll | 1 + test/CodeGen/X86/vselect-avx.ll | 12 +- test/CodeGen/X86/vselect-minmax.ll | 15574 +++++++++++----- test/CodeGen/X86/vselect.ll | 1 + test/CodeGen/X86/vshift_scalar.ll | 1 + test/CodeGen/X86/wide-integer-cmp.ll | 130 + test/CodeGen/X86/widen_load-2.ll | 4 +- test/CodeGen/X86/widen_shuffle-1.ll | 1 + test/CodeGen/X86/win-catchpad-csrs.ll | 268 + test/CodeGen/X86/win-catchpad-nested-cxx.ll | 105 + test/CodeGen/X86/win-catchpad-nested.ll | 42 + test/CodeGen/X86/win-catchpad-varargs.ll | 101 + test/CodeGen/X86/win-catchpad.ll | 353 + test/CodeGen/X86/win-cleanuppad.ll | 199 + test/CodeGen/X86/win-funclet-cfi.ll | 95 + ..._prepare.ll => win-mixed-ehpersonality.ll} | 51 +- test/CodeGen/X86/win32-eh-states.ll | 213 +- test/CodeGen/X86/win32-eh.ll | 49 +- test/CodeGen/X86/win32-pic-jumptable.ll | 8 +- .../CodeGen/X86/win32-seh-catchpad-realign.ll | 77 + test/CodeGen/X86/win32-seh-catchpad.ll | 231 + test/CodeGen/X86/win32-seh-nested-finally.ll | 80 + test/CodeGen/X86/win32-spill-xmm.ll | 40 + test/CodeGen/X86/win64_frame.ll | 70 +- test/CodeGen/X86/win64_sibcall.ll | 38 + test/CodeGen/X86/win_coreclr_chkstk.ll | 143 + test/CodeGen/X86/win_ftol2.ll | 166 - test/CodeGen/X86/wineh-coreclr.ll | 267 + test/CodeGen/X86/wineh-exceptionpointer.ll | 26 + test/CodeGen/X86/wineh-no-ehpads.ll | 20 + test/CodeGen/X86/x32-function_pointer-3.ll | 2 +- test/CodeGen/X86/x32-indirectbr.ll | 26 + test/CodeGen/X86/x32-landingpad.ll | 27 + test/CodeGen/X86/x32-va_start.ll | 99 + test/CodeGen/X86/x86-32-intrcc.ll | 79 + test/CodeGen/X86/x86-64-baseptr.ll | 4 +- .../X86/x86-64-double-precision-shift-left.ll | 17 +- .../x86-64-double-precision-shift-right.ll | 9 +- .../X86/x86-64-double-shifts-Oz-Os-O2.ll | 4 +- test/CodeGen/X86/x86-64-intrcc.ll | 86 + test/CodeGen/X86/x86-64-ms_abi-vararg.ll | 108 + test/CodeGen/X86/x86-64-pic-10.ll | 2 +- test/CodeGen/X86/x86-fold-pshufb.ll | 20 +- .../X86/x86-sanitizer-shrink-wrapping.ll | 40 + .../X86/x86-setcc-int-to-fp-combine.ll | 16 +- test/CodeGen/X86/x86-shrink-wrap-unwind.ll | 153 + test/CodeGen/X86/x86-shrink-wrapping.ll | 254 +- test/CodeGen/X86/x86-win64-shrink-wrapping.ll | 126 + test/CodeGen/X86/xop-intrinsics-x86_64.ll | 33 +- test/CodeGen/X86/xop-pcmov.ll | 163 + test/CodeGen/XCore/aliases.ll | 6 +- test/CodeGen/XCore/dwarf_debug.ll | 8 +- test/DebugInfo/AArch64/big-endian.ll | 2 +- test/DebugInfo/AArch64/bitfields.ll | 2 +- test/DebugInfo/AArch64/cfi-eof-prologue.ll | 16 +- test/DebugInfo/AArch64/coalescing.ll | 10 +- test/DebugInfo/AArch64/constant-dbgloc.ll | 6 +- test/DebugInfo/AArch64/dwarfdump.ll | 6 +- test/DebugInfo/AArch64/frameindices.ll | 24 +- test/DebugInfo/AArch64/prologue_end.ll | 43 + test/DebugInfo/AArch64/struct_by_value.ll | 8 +- test/DebugInfo/ARM/PR16736.ll | 18 +- test/DebugInfo/ARM/bitfield.ll | 2 +- test/DebugInfo/ARM/cfi-eof-prologue.ll | 16 +- test/DebugInfo/ARM/constant-dbgloc.ll | 6 +- test/DebugInfo/ARM/float-args.ll | 45 + test/DebugInfo/ARM/header.ll | 6 +- test/DebugInfo/ARM/lowerbdgdeclare_vla.ll | 14 +- .../multiple-constant-uses-drops-dbgloc.ll | 6 +- test/DebugInfo/ARM/prologue_end.ll | 46 + test/DebugInfo/ARM/s-super-register.ll | 10 +- test/DebugInfo/ARM/selectiondag-deadcode.ll | 6 +- .../single-constant-use-preserves-dbgloc.ll | 8 +- test/DebugInfo/ARM/tls.ll | 12 +- test/DebugInfo/COFF/asan-module-ctor.ll | 8 +- .../COFF/asan-module-without-functions.ll | 4 +- test/DebugInfo/COFF/asm.ll | 34 +- test/DebugInfo/COFF/cpp-mangling.ll | 6 +- test/DebugInfo/COFF/multifile.ll | 12 +- test/DebugInfo/COFF/multifunction.ll | 24 +- test/DebugInfo/COFF/simple.ll | 12 +- .../COFF/tail-call-without-lexical-scopes.ll | 8 +- .../DebugInfo/{ => Generic}/2009-10-16-Phi.ll | 0 .../2009-11-03-InsertExtractValue.ll | 2 +- .../2009-11-05-DeadGlobalVariable.ll | 6 +- .../2009-11-06-NamelessGlobalVariable.ll | 2 +- .../{ => Generic}/2009-11-10-CurrentFn.ll | 8 +- .../{ => Generic}/2010-01-05-DbgScope.ll | 4 +- .../{ => Generic}/2010-03-12-llc-crash.ll | 8 +- .../{ => Generic}/2010-03-19-DbgDeclare.ll | 6 +- .../{ => Generic}/2010-03-24-MemberFn.ll | 14 +- .../2010-04-06-NestedFnDbgInfo.ll | 26 +- .../{ => Generic}/2010-04-19-FramePtr.ll | 6 +- .../2010-05-03-DisableFramePtr.ll | 6 +- .../{ => Generic}/2010-05-03-OriginDIE.ll | 18 +- .../{ => Generic}/2010-05-10-MultipleCU.ll | 12 +- .../2010-06-29-InlinedFnLocalVar.ll | 16 +- .../{ => Generic}/2010-07-19-Crash.ll | 10 +- .../{ => Generic}/2010-10-01-crash.ll | 8 +- test/DebugInfo/Generic/Inputs/gmlt.ll | 153 + test/DebugInfo/{ => Generic}/PR20038.ll | 29 +- .../accel-table-hash-collisions.ll | 2 +- test/DebugInfo/{ => Generic}/array.ll | 8 +- test/DebugInfo/{ => Generic}/block-asan.ll | 8 +- .../{ => Generic}/bug_null_debuginfo.ll | 2 +- .../{ => Generic}/constant-pointers.ll | 6 +- .../constant-sdnodes-have-dbg-location.ll | 10 +- .../constantfp-sdnodes-have-dbg-location.ll | 10 +- .../{ => Generic}/cross-cu-inlining.ll | 20 +- .../cross-cu-linkonce-distinct.ll | 12 +- .../{ => Generic}/cross-cu-linkonce.ll | 10 +- test/DebugInfo/{ => Generic}/cu-range-hole.ll | 14 +- test/DebugInfo/{ => Generic}/cu-ranges.ll | 14 +- .../{X86 => Generic}/dbg-at-specficiation.ll | 2 +- .../{ => Generic}/dead-argument-order.ll | 10 +- .../{ => Generic}/debug-info-always-inline.ll | 0 .../{ => Generic}/debug-info-qualifiers.ll | 12 +- .../debuginfofinder-forward-declaration.ll | 2 +- .../debuginfofinder-multiple-cu.ll | 12 +- test/DebugInfo/Generic/def-line.ll | 93 + test/DebugInfo/Generic/discriminator.ll | 52 + .../{ => Generic}/dwarf-public-names.ll | 20 +- test/DebugInfo/{ => Generic}/empty.ll | 2 +- test/DebugInfo/{ => Generic}/enum-types.ll | 18 +- test/DebugInfo/{ => Generic}/enum.ll | 8 +- test/DebugInfo/{ => Generic}/global.ll | 6 +- test/DebugInfo/{ => Generic}/gmlt.test | 0 test/DebugInfo/Generic/gvn.ll | 114 + .../incorrect-variable-debugloc.ll | 24 +- .../incorrect-variable-debugloc1.ll | 8 +- test/DebugInfo/{ => Generic}/inheritance.ll | 20 +- .../inline-debug-info-multiret.ll | 16 +- .../{ => Generic}/inline-debug-info.ll | 16 +- .../{ => Generic}/inline-no-debug-info.ll | 8 +- test/DebugInfo/{ => Generic}/inline-scopes.ll | 14 +- .../{ => Generic}/inlined-arguments.ll | 18 +- test/DebugInfo/{ => Generic}/inlined-vars.ll | 16 +- test/DebugInfo/Generic/lit.local.cfg | 3 + .../{ => Generic}/location-verifier.ll | 6 +- test/DebugInfo/{ => Generic}/lto-comp-dir.ll | 12 +- test/DebugInfo/{ => Generic}/member-order.ll | 8 +- .../{ => Generic}/member-pointers.ll | 2 +- .../missing-abstract-variable.ll | 32 +- test/DebugInfo/{ => Generic}/multiline.ll | 6 +- test/DebugInfo/{ => Generic}/namespace.ll | 35 +- .../namespace_function_definition.ll | 8 +- .../namespace_inline_function_definition.ll | 16 +- test/DebugInfo/{ => Generic}/nodebug.ll | 4 +- .../DebugInfo/{ => Generic}/piece-verifier.ll | 12 +- test/DebugInfo/Generic/ptrsize.ll | 47 + .../{X86 => Generic}/recursive_inlining.ll | 42 +- test/DebugInfo/{ => Generic}/restrict.ll | 8 +- test/DebugInfo/Generic/skeletoncu.ll | 16 + .../{ => Generic}/sugared-constants.ll | 12 +- .../{ => Generic}/template-recursive-void.ll | 2 +- test/DebugInfo/{ => Generic}/tu-composite.ll | 24 +- .../{ => Generic}/tu-member-pointer.ll | 2 +- .../{ => Generic}/two-cus-from-same-file.ll | 16 +- test/DebugInfo/{ => Generic}/typedef.ll | 2 +- .../{ => Generic}/unconditional-branch.ll | 8 +- test/DebugInfo/{ => Generic}/varargs.ll | 12 +- test/DebugInfo/{ => Generic}/version.ll | 6 +- test/DebugInfo/Inputs/dwarfdump-dwp.x86_64.o | Bin 0 -> 2000 bytes .../dwarfdump-macho-relocs.macho.x86_64.o | Bin 0 -> 2364 bytes test/DebugInfo/Inputs/dwarfdump-macro-cmd.h | 1 + test/DebugInfo/Inputs/dwarfdump-macro.cc | 11 + test/DebugInfo/Inputs/dwarfdump-macro.h | 5 + test/DebugInfo/Inputs/dwarfdump-macro.o | Bin 0 -> 5616 bytes test/DebugInfo/Inputs/dwarfdump-test.cc | 2 + .../Inputs/dwarfdump-test.macho-i386.o | Bin 0 -> 3620 bytes test/DebugInfo/Inputs/fat-test.o | Bin 0 -> 5000 bytes test/DebugInfo/Inputs/gmlt.ll | 18 +- test/DebugInfo/Inputs/line.ll | 6 +- test/DebugInfo/MIR/X86/lit.local.cfg | 2 + .../MIR/X86/live-debug-values-3preds.mir | 299 + test/DebugInfo/MIR/X86/live-debug-values.mir | 260 + test/DebugInfo/MIR/lit.local.cfg | 2 + test/DebugInfo/Mips/InlinedFnLocalVar.ll | 16 +- test/DebugInfo/Mips/delay-slot.ll | 12 +- test/DebugInfo/Mips/dsr-fixed-objects.ll | 156 + test/DebugInfo/Mips/dsr-non-fixed-objects.ll | 125 + test/DebugInfo/Mips/fn-call-line.ll | 6 +- test/DebugInfo/Mips/prologue_end.ll | 70 + test/DebugInfo/PDB/{ => DIA}/lit.local.cfg | 0 .../PDB/{ => DIA}/pdbdump-flags.test | 8 +- .../PDB/{ => DIA}/pdbdump-symbol-format.test | 6 +- test/DebugInfo/PDB/pdbdump-headers.test | 12 + test/DebugInfo/PowerPC/tls-fission.ll | 2 +- test/DebugInfo/PowerPC/tls.ll | 2 +- test/DebugInfo/Sparc/gnu-window-save.ll | 6 +- test/DebugInfo/Sparc/prologue_end.ll | 41 + test/DebugInfo/SystemZ/prologue_end.ll | 42 + test/DebugInfo/SystemZ/variable-loc.ll | 14 +- test/DebugInfo/X86/2010-04-13-PubType.ll | 10 +- .../X86/2011-09-26-GlobalVarContext.ll | 8 +- test/DebugInfo/X86/2011-12-16-BadStructRef.ll | 44 +- test/DebugInfo/X86/DIModuleContext.ll | 30 + test/DebugInfo/X86/DW_AT_byte_size.ll | 8 +- test/DebugInfo/X86/DW_AT_linkage_name.ll | 20 +- .../DebugInfo/X86/DW_AT_location-reference.ll | 8 +- test/DebugInfo/X86/DW_AT_object_pointer.ll | 22 +- test/DebugInfo/X86/DW_AT_specification.ll | 6 +- .../X86/DW_AT_stmt_list_sec_offset.ll | 6 +- test/DebugInfo/X86/DW_TAG_friend.ll | 2 +- test/DebugInfo/X86/InlinedFnLocalVar.ll | 16 +- test/DebugInfo/X86/aligned_stack_var.ll | 8 +- test/DebugInfo/X86/arange-and-stub.ll | 14 +- test/DebugInfo/X86/arange.ll | 2 +- test/DebugInfo/X86/arguments.ll | 10 +- test/DebugInfo/X86/array.ll | 30 +- test/DebugInfo/X86/array2.ll | 20 +- test/DebugInfo/X86/bbjoin.ll | 101 + test/DebugInfo/X86/bitfields.ll | 2 +- test/DebugInfo/X86/block-capture.ll | 10 +- test/DebugInfo/X86/byvalstruct.ll | 16 +- test/DebugInfo/X86/c-type-units.ll | 2 +- test/DebugInfo/X86/coff_debug_info_type.ll | 10 +- test/DebugInfo/X86/coff_relative_names.ll | 6 +- test/DebugInfo/X86/concrete_out_of_line.ll | 26 +- test/DebugInfo/X86/constant-aggregate.ll | 20 +- test/DebugInfo/X86/cu-ranges-odr.ll | 18 +- test/DebugInfo/X86/cu-ranges.ll | 14 +- test/DebugInfo/X86/data_member_location.ll | 2 +- test/DebugInfo/X86/dbg-byval-parameter.ll | 8 +- test/DebugInfo/X86/dbg-const-int.ll | 8 +- test/DebugInfo/X86/dbg-const.ll | 8 +- test/DebugInfo/X86/dbg-declare-arg.ll | 24 +- test/DebugInfo/X86/dbg-declare.ll | 10 +- test/DebugInfo/X86/dbg-file-name.ll | 6 +- test/DebugInfo/X86/dbg-i128-const.ll | 8 +- test/DebugInfo/X86/dbg-merge-loc-entry.ll | 12 +- test/DebugInfo/X86/dbg-prolog-end.ll | 14 +- test/DebugInfo/X86/dbg-subrange.ll | 6 +- test/DebugInfo/X86/dbg-value-const-byref.ll | 10 +- test/DebugInfo/X86/dbg-value-dag-combine.ll | 14 +- .../X86/dbg-value-inlined-parameter.ll | 14 +- test/DebugInfo/X86/dbg-value-isel.ll | 14 +- test/DebugInfo/X86/dbg-value-location.ll | 16 +- test/DebugInfo/X86/dbg-value-range.ll | 10 +- test/DebugInfo/X86/dbg-value-terminator.ll | 12 +- test/DebugInfo/X86/dbg_value_direct.ll | 10 +- test/DebugInfo/X86/debug-dead-local-var.ll | 10 +- test/DebugInfo/X86/debug-info-access.ll | 6 +- .../X86/debug-info-block-captured-self.ll | 14 +- test/DebugInfo/X86/debug-info-blocks.ll | 46 +- .../DebugInfo/X86/debug-info-packed-struct.ll | 2 +- .../DebugInfo/X86/debug-info-static-member.ll | 8 +- test/DebugInfo/X86/debug-loc-asan.ll | 19 +- test/DebugInfo/X86/debug-loc-empty-entries.ll | 8 +- test/DebugInfo/X86/debug-loc-offset.ll | 18 +- test/DebugInfo/X86/debug-ranges-offset.ll | 14 +- test/DebugInfo/X86/debug_frame.ll | 6 +- test/DebugInfo/X86/debugger-tune.ll | 44 + test/DebugInfo/X86/decl-derived-member.ll | 26 +- test/DebugInfo/X86/deleted-bit-piece.ll | 8 +- test/DebugInfo/X86/discriminator.ll | 6 +- test/DebugInfo/X86/dw_op_minus.ll | 84 + .../X86/dwarf-aranges-no-dwarf-labels.ll | 18 +- test/DebugInfo/X86/dwarf-aranges.ll | 6 +- test/DebugInfo/X86/dwarf-linkage-names.ll | 71 + test/DebugInfo/X86/dwarf-public-names.ll | 22 +- test/DebugInfo/X86/dwarf-pubnames-split.ll | 6 +- test/DebugInfo/X86/earlydup-crash.ll | 8 +- test/DebugInfo/X86/elf-names.ll | 20 +- .../DebugInfo/X86/empty-and-one-elem-array.ll | 10 +- test/DebugInfo/X86/empty-array.ll | 2 +- test/DebugInfo/X86/empty.ll | 2 +- test/DebugInfo/X86/ending-run.ll | 10 +- test/DebugInfo/X86/enum-class.ll | 2 +- test/DebugInfo/X86/enum-fwd-decl.ll | 2 +- test/DebugInfo/X86/externaltyperef.ll | 51 + test/DebugInfo/X86/fission-cu.ll | 2 +- test/DebugInfo/X86/fission-hash.ll | 2 +- test/DebugInfo/X86/fission-inline.ll | 8 +- test/DebugInfo/X86/fission-ranges.ll | 30 +- test/DebugInfo/X86/float_const.ll | 8 +- test/DebugInfo/X86/formal_parameter.ll | 8 +- test/DebugInfo/X86/frame-register.ll | 12 +- test/DebugInfo/X86/generate-odr-hash.ll | 22 +- test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll | 18 +- test/DebugInfo/X86/gnu-public-names-empty.ll | 2 +- test/DebugInfo/X86/gnu-public-names.ll | 28 +- test/DebugInfo/X86/header.ll | 6 +- test/DebugInfo/X86/inline-member-function.ll | 12 +- test/DebugInfo/X86/inline-seldag-test.ll | 12 +- .../DebugInfo/X86/inlined-formal-parameter.ll | 10 +- test/DebugInfo/X86/inlined-indirect-value.ll | 8 +- test/DebugInfo/X86/instcombine-instrinsics.ll | 8 +- test/DebugInfo/X86/lexical_block.ll | 8 +- test/DebugInfo/X86/line-info.ll | 12 +- test/DebugInfo/X86/linkage-name.ll | 10 +- test/DebugInfo/X86/live-debug-values.ll | 152 + test/DebugInfo/X86/low-pc-cu.ll | 6 +- test/DebugInfo/X86/memberfnptr.ll | 2 +- test/DebugInfo/X86/mi-print.ll | 14 +- test/DebugInfo/X86/misched-dbg-value.ll | 18 +- test/DebugInfo/X86/missing-file-line.ll | 8 +- test/DebugInfo/X86/multiple-aranges.ll | 4 +- test/DebugInfo/X86/multiple-at-const-val.ll | 6 +- test/DebugInfo/X86/nodebug_with_debug_loc.ll | 12 +- .../X86/nondefault-subrange-array.ll | 2 +- test/DebugInfo/X86/nophysreg.ll | 24 +- test/DebugInfo/X86/objc-fwd-decl.ll | 2 +- test/DebugInfo/X86/objc-property-void.ll | 10 +- test/DebugInfo/X86/op_deref.ll | 18 +- test/DebugInfo/X86/parameters.ll | 16 +- test/DebugInfo/X86/pieces-1.ll | 12 +- test/DebugInfo/X86/pieces-2.ll | 10 +- test/DebugInfo/X86/pieces-3.ll | 18 +- test/DebugInfo/X86/pointer-type-size.ll | 2 +- test/DebugInfo/X86/pr11300.ll | 14 +- test/DebugInfo/X86/pr12831.ll | 28 +- test/DebugInfo/X86/pr13303.ll | 6 +- test/DebugInfo/X86/pr19307.ll | 12 +- test/DebugInfo/X86/processes-relocations.ll | 2 +- test/DebugInfo/X86/prologue-stack.ll | 6 +- test/DebugInfo/X86/ref_addr_relocation.ll | 4 +- test/DebugInfo/X86/reference-argument.ll | 47 +- test/DebugInfo/X86/rvalue-ref.ll | 8 +- test/DebugInfo/X86/safestack-byval.ll | 91 + test/DebugInfo/X86/sret.ll | 76 +- test/DebugInfo/X86/sroasplit-1.ll | 12 +- test/DebugInfo/X86/sroasplit-2.ll | 14 +- test/DebugInfo/X86/sroasplit-3.ll | 10 +- test/DebugInfo/X86/sroasplit-4.ll | 10 +- test/DebugInfo/X86/sroasplit-5.ll | 10 +- .../X86/stmt-list-multiple-compile-units.ll | 16 +- test/DebugInfo/X86/stmt-list.ll | 6 +- test/DebugInfo/X86/stringpool.ll | 2 +- test/DebugInfo/X86/struct-loc.ll | 2 +- test/DebugInfo/X86/subrange-type.ll | 8 +- test/DebugInfo/X86/subreg.ll | 8 +- test/DebugInfo/X86/subregisters.ll | 16 +- test/DebugInfo/X86/template.ll | 12 +- test/DebugInfo/X86/tls.ll | 28 +- .../X86/type_units_with_addresses.ll | 2 +- test/DebugInfo/X86/union-const.ll | 8 +- test/DebugInfo/X86/union-template.ll | 10 +- test/DebugInfo/X86/vector.ll | 2 +- test/DebugInfo/X86/vla.ll | 20 +- test/DebugInfo/debugmacinfo.test | 27 + test/DebugInfo/dwarfdump-accel.test | 2 +- test/DebugInfo/dwarfdump-dump-flags.test | 3 + test/DebugInfo/dwarfdump-dwp.test | 53 + test/DebugInfo/dwarfdump-macho-relocs.test | 27 + test/DebugInfo/dwarfdump-macho-universal.test | 17 + test/DebugInfo/dwo.ll | 15 + test/DebugInfo/gvn.ll | 135 - test/DebugInfo/llvm-symbolizer.test | 6 +- test/DebugInfo/skeletoncu.ll | 17 + test/Examples/Kaleidoscope/Chapter3.test | 17 + test/Examples/Kaleidoscope/Chapter4.test | 17 + test/Examples/Kaleidoscope/Chapter5.test | 19 + test/Examples/Kaleidoscope/Chapter6.test | 15 + test/Examples/Kaleidoscope/Chapter7.test | 15 + test/Examples/lit.local.cfg | 1 + test/ExecutionEngine/MCJIT/eh-lg-pic.ll | 2 +- test/ExecutionEngine/MCJIT/eh-sm-pic.ll | 2 +- test/ExecutionEngine/MCJIT/eh.ll | 2 +- .../MCJIT/multi-module-eh-a.ll | 2 +- .../ExecutionEngine/OrcLazy/global_aliases.ll | 21 + test/ExecutionEngine/OrcMCJIT/eh-lg-pic.ll | 2 +- test/ExecutionEngine/OrcMCJIT/eh-sm-pic.ll | 2 +- test/ExecutionEngine/OrcMCJIT/eh.ll | 2 +- .../OrcMCJIT/multi-module-eh-a.ll | 2 +- .../AArch64/MachO_ARM64_relocations.s | 12 + .../ARM/MachO_ARM_PIC_relocations.s | 7 + .../Mips/ELF_Mips64r2N64_PIC_relocations.s | 10 +- .../RuntimeDyld/Mips/ELF_N64R6_relocations.s | 54 + .../Mips/ELF_O32_PIC_relocations.s | 13 +- .../RuntimeDyld/PowerPC/lit.local.cfg | 3 + .../PowerPC/ppc32_elf_rel_addr16.s | 47 + .../RuntimeDyld/X86/COFF_i386.s | 66 + .../X86/{COFF_x86_64 => COFF_x86_64.s} | 9 + .../RuntimeDyld/X86/ELF_STT_FILE.s | 14 + .../X86/ELF_x64-64_PC8_relocations.s | 26 + .../RuntimeDyld/X86/ELF_x86_64_StubBuf.s | 26 + .../X86/Inputs/ELF_STT_FILE_FILE.s | 3 + .../X86/Inputs/ELF_STT_FILE_GLOBAL.s | 2 + .../X86/Inputs/ELF_x86_64_StubBuf.ll | 12 + .../X86/MachO_x86-64_PIC_relocations.s | 18 + test/ExecutionEngine/lit.local.cfg | 5 +- test/Feature/OperandBundles/adce.ll | 49 + .../OperandBundles/basic-aa-argmemonly.ll | 51 + test/Feature/OperandBundles/dse.ll | 62 + test/Feature/OperandBundles/early-cse.ll | 89 + test/Feature/OperandBundles/function-attrs.ll | 33 + .../OperandBundles/inliner-conservative.ll | 17 + test/Feature/OperandBundles/merge-func.ll | 64 + test/Feature/OperandBundles/special-state.ll | 21 + test/Feature/alias2.ll | 24 +- test/Feature/aliases.ll | 18 +- test/Feature/callingconventions.ll | 7 + test/Feature/comdat.ll | 4 +- test/Feature/exception.ll | 109 + test/Feature/optnone-llc.ll | 10 +- .../Instrumentation/AddressSanitizer/basic.ll | 2 +- .../AddressSanitizer/debug_info.ll | 14 +- .../debug_info_noninstrumented_alloca.ll | 40 + .../do-not-instrument-cstring.ll | 8 - ...ll => do-not-instrument-globals-darwin.ll} | 8 +- .../do-not-instrument-globals-linux.ll | 35 + .../do-not-instrument-llvm-metadata.ll | 15 - .../do-not-touch-comdat-global.ll | 4 +- .../instrument-dynamic-allocas.ll | 16 + .../AddressSanitizer/keep_going.ll | 14 + .../AddressSanitizer/localescape.ll | 86 + .../AddressSanitizer/stack_dynamic_alloca.ll | 31 + .../Instrumentation/AddressSanitizer/twice.ll | 8 + .../DataFlowSanitizer/abilist.ll | 4 +- .../DataFlowSanitizer/debug.ll | 9 +- .../DataFlowSanitizer/external_mask.ll | 14 + .../DataFlowSanitizer/prefix-rename.ll | 4 +- .../Instrumentation/InstrProfiling/PR23499.ll | 21 +- .../Instrumentation/InstrProfiling/linkage.ll | 49 +- .../InstrProfiling/no-counters.ll | 4 +- .../InstrProfiling/noruntime.ll | 4 +- .../InstrProfiling/platform.ll | 31 +- .../InstrProfiling/profiling.ll | 36 +- .../MemorySanitizer/AArch64/vararg.ll | 75 + .../MemorySanitizer/atomics.ll | 24 +- .../MemorySanitizer/check_access_address.ll | 4 +- .../MemorySanitizer/msan_basic.ll | 201 +- .../MemorySanitizer/mul_by_constant.ll | 23 + .../MemorySanitizer/origin-alignment.ll | 9 +- .../MemorySanitizer/return_from_main.ll | 2 +- .../MemorySanitizer/store-origin.ll | 12 +- .../MemorySanitizer/unreachable.ll | 4 +- .../MemorySanitizer/vector_cvt.ll | 6 +- .../MemorySanitizer/vector_shift.ll | 10 +- .../SanitizerCoverage/coverage-dbg.ll | 8 +- .../SanitizerCoverage/coverage.ll | 4 +- .../SanitizerCoverage/coverage2-dbg.ll | 8 +- test/Instrumentation/SanitizerCoverage/seh.ll | 86 + .../SanitizerCoverage/switch-tracing.ll | 56 + .../Instrumentation/ThreadSanitizer/atomic.ll | 2 +- test/JitListener/multiple.ll | 20 +- test/JitListener/simple.ll | 8 +- test/LTO/X86/Inputs/invalid.ll.bc | Bin 332 -> 688 bytes test/LTO/X86/bcsection.ll | 2 + test/LTO/X86/current-section.ll | 1 + test/LTO/X86/diagnostic-handler-noexit.ll | 4 +- test/LTO/X86/diagnostic-handler-remarks.ll | 9 +- test/LTO/X86/disable-verify.ll | 18 + test/LTO/X86/invalid.ll | 2 +- test/LTO/X86/list-symbols.ll | 1 + test/LTO/X86/llvm-lto-output.ll | 21 + test/LTO/X86/parallel.ll | 25 + test/LibDriver/thin.test | 9 + test/Linker/2003-01-30-LinkerRename.ll | 4 +- test/Linker/2003-04-23-LinkOnceLost.ll | 13 +- test/Linker/2003-05-31-LinkerRename.ll | 12 +- test/Linker/2008-03-05-AliasReference.ll | 2 +- test/Linker/2008-07-06-AliasFnDecl.ll | 2 +- test/Linker/2008-07-06-AliasWeakDest.ll | 4 +- test/Linker/2009-09-03-mdnode.ll | 4 +- test/Linker/2009-09-03-mdnode2.ll | 4 +- test/Linker/2011-08-04-DebugLoc.ll | 6 +- test/Linker/2011-08-04-DebugLoc2.ll | 6 +- test/Linker/2011-08-04-Metadata.ll | 6 +- test/Linker/2011-08-04-Metadata2.ll | 6 +- test/Linker/2011-08-18-unique-class-type.ll | 8 +- test/Linker/2011-08-18-unique-class-type2.ll | 8 +- test/Linker/2011-08-18-unique-debug-type.ll | 6 +- test/Linker/2011-08-18-unique-debug-type2.ll | 6 +- test/Linker/ConstantGlobals.ll | 4 + test/Linker/DbgDeclare.ll | 10 +- test/Linker/DbgDeclare2.ll | 12 +- test/Linker/Inputs/PR8300.b.ll | 2 +- test/Linker/Inputs/alias.ll | 4 +- .../Inputs/available_externally_over_decl.ll | 10 + test/Linker/Inputs/comdat11.ll | 9 + test/Linker/Inputs/comdat13.ll | 9 + test/Linker/Inputs/comdat14.ll | 12 + test/Linker/Inputs/comdat15.ll | 6 + test/Linker/Inputs/comdat5.ll | 2 +- test/Linker/Inputs/comdat8.ll | 2 +- test/Linker/Inputs/ctors2.ll | 6 + test/Linker/Inputs/ctors3.ll | 7 + test/Linker/Inputs/funcimport.ll | 28 + .../Inputs/funcimport_appending_global.ll | 6 + test/Linker/Inputs/internalize-lazy.ll | 8 + test/Linker/Inputs/linkage.c.ll | 4 + test/Linker/Inputs/mdlocation.ll | 12 +- .../Inputs/only-needed-debug-metadata.ll | 27 + .../Inputs/only-needed-named-metadata.ll | 9 + test/Linker/Inputs/opaque.ll | 8 + ...laced-function-matches-first-subprogram.ll | 6 +- .../Inputs/subprogram-linkonce-weak-odr.ll | 15 - .../Linker/Inputs/subprogram-linkonce-weak.ll | 6 +- test/Linker/Inputs/testlink.ll | 4 +- .../Linker/Inputs/thinlto_funcimport_debug.ll | 38 + test/Linker/Inputs/type-unique-alias.ll | 2 +- test/Linker/Inputs/type-unique-dst-types2.ll | 4 + test/Linker/Inputs/type-unique-dst-types3.ll | 4 + .../Inputs/type-unique-inheritance-a.ll | 10 +- .../Inputs/type-unique-inheritance-b.ll | 16 +- test/Linker/Inputs/type-unique-simple2-a.ll | 10 +- test/Linker/Inputs/type-unique-simple2-b.ll | 14 +- test/Linker/Inputs/visibility.ll | 6 +- test/Linker/alias.ll | 39 +- test/Linker/available_externally_over_decl.ll | 15 + test/Linker/comdat11.ll | 13 + test/Linker/comdat12.ll | 8 + test/Linker/comdat13.ll | 30 + test/Linker/comdat14.ll | 9 + test/Linker/comdat15.ll | 9 + test/Linker/comdat6.ll | 2 +- test/Linker/comdat8.ll | 2 +- test/Linker/comdat9.ll | 7 +- test/Linker/comdat_group.ll | 18 + test/Linker/constructor-comdat.ll | 4 +- test/Linker/ctors.ll | 3 + test/Linker/ctors2.ll | 7 + test/Linker/ctors3.ll | 8 + test/Linker/ctors4.ll | 14 + test/Linker/ctors5.ll | 8 + test/Linker/debug-info-version-a.ll | 2 +- test/Linker/debug-info-version-b.ll | 2 +- test/Linker/distinct.ll | 2 + test/Linker/drop-debug.ll | 2 +- test/Linker/funcimport.ll | 195 + test/Linker/funcimport_appending_global.ll | 20 + test/Linker/global_ctors.ll | 5 +- test/Linker/internalize-lazy.ll | 4 + test/Linker/link-flags.ll | 19 + test/Linker/mdlocation.ll | 36 +- test/Linker/only-needed-debug-metadata.ll | 49 + test/Linker/only-needed-named-metadata.ll | 65 + test/Linker/opaque.ll | 4 + test/Linker/override-with-internal-linkage.ll | 4 +- test/Linker/pr21494.ll | 4 +- test/Linker/prologuedata.ll | 10 +- ...laced-function-matches-first-subprogram.ll | 27 +- test/Linker/subprogram-linkonce-weak-odr.ll | 177 - test/Linker/subprogram-linkonce-weak.ll | 53 +- test/Linker/testlink.ll | 11 +- test/Linker/thinlto_funcimport_debug.ll | 80 + test/Linker/type-unique-alias.ll | 4 +- test/Linker/type-unique-dst-types.ll | 4 + test/Linker/type-unique-odr-a.ll | 16 +- test/Linker/type-unique-odr-b.ll | 16 +- test/Linker/type-unique-simple-a.ll | 12 +- test/Linker/type-unique-simple-b.ll | 14 +- test/Linker/type-unique-simple2-a.ll | 23 +- test/Linker/type-unique-simple2-b.ll | 14 +- test/Linker/type-unique-simple2.ll | 1 + test/Linker/type-unique-src-type.ll | 4 +- test/Linker/type-unique-type-array-a.ll | 24 +- test/Linker/type-unique-type-array-b.ll | 18 +- test/Linker/uniqued-distinct-cycles.ll | 14 + test/Linker/unnamed-addr1-a.ll | 18 +- test/Linker/unnamed-addr1-b.ll | 8 +- test/Linker/visibility.ll | 12 +- test/Linker/weakextern.ll | 14 +- test/MC/AArch64/arm64-advsimd.s | 153 +- test/MC/AArch64/arm64-diags.s | 63 + test/MC/AArch64/arm64-fp-encoding.s | 424 +- test/MC/AArch64/arm64-leaf-compact-unwind.s | 1 + test/MC/AArch64/arm64-small-data-fixups.s | 27 +- test/MC/AArch64/armv8.1a-pan.s | 10 +- test/MC/AArch64/armv8.1a-rdma.s | 18 - test/MC/AArch64/armv8.2a-at.s | 9 + test/MC/AArch64/armv8.2a-mmfr2.s | 6 + test/MC/AArch64/armv8.2a-persistent-memory.s | 6 + .../AArch64/armv8.2a-statistical-profiling.s | 87 + test/MC/AArch64/armv8.2a-uao.s | 17 + test/MC/AArch64/basic-a64-diagnostics.s | 4 +- test/MC/AArch64/elf_osabi_flags.s | 9 +- test/MC/AArch64/error-location-ldr-pseudo.s | 5 + test/MC/AArch64/error-location.s | 49 + test/MC/AArch64/fullfp16-diagnostics.s | 82 + test/MC/AArch64/fullfp16-neon-neg.s | 382 + test/MC/AArch64/ldr-pseudo-diagnostics.s | 18 + test/MC/AArch64/neon-2velem.s | 18 +- test/MC/AArch64/neon-aba-abd.s | 4 +- test/MC/AArch64/neon-across.s | 18 +- test/MC/AArch64/neon-add-pairwise.s | 6 +- test/MC/AArch64/neon-add-sub-instructions.s | 10 +- test/MC/AArch64/neon-compare-instructions.s | 62 +- test/MC/AArch64/neon-diagnostics.s | 900 +- test/MC/AArch64/neon-facge-facgt.s | 18 +- test/MC/AArch64/neon-frsqrt-frecp.s | 10 +- test/MC/AArch64/neon-max-min-pairwise.s | 18 +- test/MC/AArch64/neon-max-min.s | 18 +- test/MC/AArch64/neon-mla-mls-instructions.s | 10 +- test/MC/AArch64/neon-scalar-abs.s | 4 +- test/MC/AArch64/neon-scalar-by-elem-mla.s | 6 +- test/MC/AArch64/neon-scalar-by-elem-mul.s | 6 +- test/MC/AArch64/neon-scalar-cvt.s | 34 +- test/MC/AArch64/neon-scalar-fp-compare.s | 32 +- test/MC/AArch64/neon-scalar-mul.s | 4 +- test/MC/AArch64/neon-scalar-recip.s | 12 +- test/MC/AArch64/neon-scalar-reduce-pairwise.s | 7 +- test/MC/AArch64/neon-simd-misc.s | 98 +- test/MC/AArch64/neon-simd-shift.s | 18 +- test/MC/AArch64/noneon-diagnostics.s | 15 + test/MC/AMDGPU/buffer_wbinv1l_vol_vi.s | 7 + test/MC/AMDGPU/flat-scratch.s | 33 + test/MC/AMDGPU/flat.s | 516 +- test/MC/AMDGPU/hsa-text.s | 34 + test/MC/AMDGPU/hsa.s | 31 +- test/MC/AMDGPU/mubuf.s | 224 +- test/MC/AMDGPU/out-of-range-registers.s | 62 + test/MC/AMDGPU/smem.s | 11 + test/MC/AMDGPU/smrd-err.s | 15 + test/MC/AMDGPU/smrd.s | 61 +- test/MC/AMDGPU/sop1-err.s | 50 +- test/MC/AMDGPU/sop1.s | 17 + test/MC/AMDGPU/sop2.s | 3 + test/MC/AMDGPU/vop1.s | 19 + test/MC/AMDGPU/vop2-err.s | 27 + test/MC/AMDGPU/vop2.s | 123 +- test/MC/AMDGPU/vop3-vop1-nosrc.s | 14 + test/MC/AMDGPU/vop3.s | 44 +- test/MC/AMDGPU/vopc-errs.s | 8 + test/MC/AMDGPU/vopc.s | 21 +- test/MC/ARM/Windows/invalid-relocation.s | 2 +- test/MC/ARM/arm-elf-relocation-diagnostics.s | 29 +- test/MC/ARM/arm-thumb-trustzone.s | 1 + test/MC/ARM/arm-trustzone.s | 3 +- test/MC/ARM/arm11-hint-instr.s | 23 +- test/MC/ARM/basic-arm-instructions-v8.1a.s | 2 +- test/MC/ARM/basic-arm-instructions.s | 11 + test/MC/ARM/basic-thumb2-instructions-v8.s | 23 +- test/MC/ARM/big-endian-thumb2-fixup.s | 4 +- test/MC/ARM/coff-debugging-secrel.ll | 9 +- test/MC/ARM/data-in-code.ll | 16 +- test/MC/ARM/diagnostics.s | 64 +- test/MC/ARM/directive-arch-armv6j.s | 34 - test/MC/ARM/directive-arch-armv6z.s | 4 +- ...h-armv6zk.s => directive-arch-armv8.2-a.s} | 24 +- test/MC/ARM/directive-arch-semantic-action.s | 4 +- test/MC/ARM/directive-arch_extension-sec.s | 13 +- .../ARM/dwarf-asm-multiple-sections-dwarf-2.s | 2 +- test/MC/ARM/dwarf-asm-multiple-sections.s | 23 +- test/MC/ARM/dwarf-asm-nonstandard-section.s | 2 +- test/MC/ARM/dwarf-asm-single-section.s | 4 +- test/MC/ARM/eh-compact-pr0.s | 4 +- test/MC/ARM/eh-compact-pr1.s | 2 +- test/MC/ARM/eh-directive-handlerdata.s | 4 +- test/MC/ARM/eh-directive-personalityindex.s | 12 +- test/MC/ARM/eh-directive-section-comdat.s | 16 +- .../ARM/eh-directive-section-multiple-func.s | 4 +- test/MC/ARM/eh-directive-section.s | 8 +- test/MC/ARM/eh-directive-text-section.s | 2 +- test/MC/ARM/eh-link.s | 12 +- test/MC/ARM/error-location-ldr-pseudo.s | 5 + test/MC/ARM/error-location.s | 49 + test/MC/ARM/fullfp16-neon-neg.s | 289 + test/MC/ARM/fullfp16-neon.s | 404 + test/MC/ARM/neon-vcvt-fp16.s | 18 + test/MC/ARM/thumb-branches.s | 25 + test/MC/ARM/thumb-shift-encoding.s | 16 +- test/MC/ARM/thumb1-relax.s | 35 + test/MC/ARM/thumb2-diagnostics.s | 19 +- test/MC/ARM/v7k-dsp.s | 4 + test/MC/AsmParser/dot-symbol-non-absolute.s | 2 +- test/MC/AsmParser/expr-shr.s | 5 +- test/MC/AsmParser/exprs-invalid.s | 3 + test/MC/AsmParser/exprs.s | 2 +- test/MC/AsmParser/macros-darwin-vararg.s | 2 +- test/MC/AsmParser/reassign.s | 12 + test/MC/AsmParser/undefined-local-symbol.s | 8 + test/MC/AsmParser/vararg.s | 2 +- test/MC/COFF/ARM/directive-type-diagnostics.s | 10 - test/MC/COFF/alias.s | 4 +- test/MC/COFF/bad-expr.s | 2 + test/MC/COFF/basic-coff-64.s | 4 +- test/MC/COFF/basic-coff.s | 4 +- test/MC/COFF/invalid-def.s | 5 +- test/MC/COFF/invalid-endef.s | 5 +- test/MC/COFF/invalid-scl-range.s | 3 + test/MC/COFF/invalid-scl.s | 5 +- test/MC/COFF/invalid-type.s | 5 +- test/MC/COFF/label-undefined.s | 6 + test/MC/COFF/secidx-diagnostic.s | 2 + test/MC/COFF/simple-fixups.s | 4 +- test/MC/COFF/stdin.s | 3 + test/MC/COFF/symbol-fragment-offset-64.s | 4 +- test/MC/COFF/symbol-fragment-offset.s | 4 +- test/MC/COFF/temporary-alias.s | 21 + test/MC/COFF/timestamp.s | 6 +- .../Disassembler/AArch64/arm64-scalar-fp.txt | 69 + test/MC/Disassembler/AArch64/armv8.1a-pan.txt | 2 + test/MC/Disassembler/AArch64/armv8.2a-at.txt | 9 + .../Disassembler/AArch64/armv8.2a-mmfr2.txt | 4 + .../AArch64/armv8.2a-persistent-memory.txt | 6 + .../armv8.2a-statistical-profiling.txt | 87 + test/MC/Disassembler/AArch64/armv8.2a-uao.txt | 19 + .../AArch64/basic-a64-instructions.txt | 116 + test/MC/Disassembler/AArch64/fullfp16-neg.txt | 145 + .../AArch64/fullfp16-neon-neg.txt | 382 + .../ARM/fullfp16-neon-arm-neg.txt | 274 + .../MC/Disassembler/ARM/fullfp16-neon-arm.txt | 309 + .../ARM/fullfp16-neon-thumb-neg.txt | 274 + .../Disassembler/ARM/fullfp16-neon-thumb.txt | 309 + test/MC/Disassembler/ARM/invalid-thumbv7.txt | 23 +- test/MC/Disassembler/ARM/thumb-v8.txt | 10 +- .../Disassembler/Hexagon/invalid_packet.txt | 4 + test/MC/Disassembler/Hexagon/j.txt | 148 +- test/MC/Disassembler/Hexagon/ld.txt | 90 +- test/MC/Disassembler/Hexagon/lit.local.cfg | 6 +- test/MC/Disassembler/Hexagon/nv_j.txt | 88 +- test/MC/Disassembler/Hexagon/nv_st.txt | 127 +- test/MC/Disassembler/Hexagon/st.txt | 84 +- .../Hexagon/too_many_instructions.txt | 4 + .../Hexagon/too_many_loop_ends.txt | 4 + test/MC/Disassembler/Hexagon/unextendable.txt | 9 + test/MC/Disassembler/Mips/dsp/valid-el.txt | 12 + test/MC/Disassembler/Mips/dsp/valid.txt | 125 + test/MC/Disassembler/Mips/dspr2/valid.txt | 173 + .../MC/Disassembler/Mips/eva/valid_R6-eva.txt | 38 + .../Disassembler/Mips/eva/valid_preR6-eva.txt | 54 + .../Disassembler/Mips/micromips-dsp/valid.txt | 103 + .../Mips/micromips-dspr2/valid.txt | 125 + .../Mips/micromips32r3/invalid.txt | 4 + .../valid-el.txt} | 427 +- .../valid.txt} | 427 +- test/MC/Disassembler/Mips/micromips32r6.txt | 114 - .../Disassembler/Mips/micromips32r6/valid.txt | 258 + .../Disassembler/Mips/micromips64r6/valid.txt | 171 + test/MC/Disassembler/Mips/mips-dsp.txt | 22 - .../Disassembler/Mips/mips1/invalid-xfail.txt | 11 + test/MC/Disassembler/Mips/mips1/invalid.txt | 45 + .../Mips/mips1/valid-mips1-el.txt | 2 + .../Disassembler/Mips/mips1/valid-mips1.txt | 5 + .../Disassembler/Mips/mips1/valid-xfail.txt | 8 + .../Disassembler/Mips/mips2/invalid-xfail.txt | 13 + .../Mips/mips2/valid-mips2-el.txt | 2 + .../Disassembler/Mips/mips2/valid-mips2.txt | 20 + .../Disassembler/Mips/mips2/valid-xfail.txt | 13 + .../Disassembler/Mips/mips3/invalid-xfail.txt | 14 + .../Mips/mips3/valid-mips3-el.txt | 2 + .../Disassembler/Mips/mips3/valid-mips3.txt | 25 + .../Disassembler/Mips/mips3/valid-xfail.txt | 13 + .../Mips/mips32/invalid-xfail.txt | 13 + .../Mips/mips32/valid-mips32-el.txt | 11 + .../Disassembler/Mips/mips32/valid-mips32.txt | 180 + .../Disassembler/Mips/mips32/valid-xfail.txt | 13 + test/MC/Disassembler/Mips/mips32_le.txt | 450 - .../Mips/mips32r2/invalid-xfail.txt | 13 + .../Mips/mips32r2/valid-mips32r2-el.txt | 2 + .../Mips/mips32r2/valid-mips32r2.txt | 195 + .../Mips/mips32r2/valid-xfail.txt | 13 + test/MC/Disassembler/Mips/mips32r2_le.txt | 459 - .../Mips/mips32r3/invalid-xfail.txt | 13 + .../Mips/mips32r3/valid-mips32r3.txt | 197 + .../Mips/mips32r3/valid-xfail.txt | 13 + .../Mips/mips32r5/invalid-xfail.txt | 13 + .../Mips/mips32r5/valid-mips32r5.txt | 198 + .../Mips/mips32r5/valid-xfail.txt | 13 + .../Mips/mips32r6/valid-mips32r6-el.txt | 2 +- .../Mips/mips32r6/valid-mips32r6.txt | 3 +- .../Disassembler/Mips/mips4/invalid-xfail.txt | 13 + .../Mips/mips4/valid-mips4-el.txt | 2 + .../Disassembler/Mips/mips4/valid-mips4.txt | 29 + .../Disassembler/Mips/mips4/valid-xfail.txt | 16 + .../Mips/mips64/invalid-xfail.txt | 13 + .../Mips/mips64/valid-mips64-el.txt | 30 + .../Disassembler/Mips/mips64/valid-mips64.txt | 193 + .../Disassembler/Mips/mips64/valid-xfail.txt | 13 + test/MC/Disassembler/Mips/mips64_le.txt | 84 - .../Mips/mips64r2/invalid-xfail.txt | 13 + .../Mips/mips64r2/valid-mips64r2-el.txt | 32 + .../Mips/mips64r2/valid-mips64r2.txt | 212 + .../Mips/mips64r2/valid-xfail.txt | 14 + test/MC/Disassembler/Mips/mips64r2_le.txt | 90 - .../Mips/mips64r3/invalid-xfail.txt | 13 + .../Mips/mips64r3/valid-mips64r3-el.txt | 2 + .../Mips/mips64r3/valid-mips64r3.txt | 243 + .../Mips/mips64r3/valid-xfail.txt | 14 + .../Mips/mips64r5/invalid-xfail.txt | 13 + .../Mips/mips64r5/valid-mips64r5-el.txt | 2 + .../Mips/mips64r5/valid-mips64r5.txt | 244 + .../Mips/mips64r5/valid-xfail.txt | 14 + .../Mips/mips64r6/valid-mips64r6-el.txt | 6 +- .../Mips/mips64r6/valid-mips64r6.txt | 7 +- test/MC/Disassembler/Mips/msa/test_elm.txt | 1 - .../Disassembler/Mips/msa/test_elm_msa64.txt | 5 +- .../Disassembler/PowerPC/ppc64-encoding.txt | 4 +- .../Disassembler/PowerPC/ppc64le-encoding.txt | 4 +- test/MC/Disassembler/PowerPC/vsx.txt | 12 + test/MC/Disassembler/Sparc/sparc-mem.txt | 24 + test/MC/Disassembler/Sparc/sparc-v9.txt | 4 + test/MC/Disassembler/SystemZ/insns.txt | 74 +- test/MC/Disassembler/X86/x86-64.txt | 111 + test/MC/ELF/ARM/directive-type-diagnostics.s | 10 + test/MC/ELF/align-zero.s | 4 + test/MC/ELF/align.s | 14 +- test/MC/ELF/cfi-adjust-cfa-offset.s | 2 +- test/MC/ELF/cfi-advance-loc2.s | 2 +- test/MC/ELF/cfi-def-cfa-offset.s | 2 +- test/MC/ELF/cfi-def-cfa-register.s | 2 +- test/MC/ELF/cfi-def-cfa.s | 2 +- test/MC/ELF/cfi-escape.s | 2 +- test/MC/ELF/cfi-large-model.s | 2 +- test/MC/ELF/cfi-offset.s | 2 +- test/MC/ELF/cfi-register.s | 2 +- test/MC/ELF/cfi-rel-offset.s | 2 +- test/MC/ELF/cfi-rel-offset2.s | 2 +- test/MC/ELF/cfi-remember.s | 2 +- test/MC/ELF/cfi-restore.s | 2 +- test/MC/ELF/cfi-same-value.s | 2 +- test/MC/ELF/cfi-signal-frame.s | 2 +- test/MC/ELF/cfi-undefined.s | 2 +- test/MC/ELF/cfi-version.ll | 8 +- test/MC/ELF/cfi-window-save.s | 2 +- test/MC/ELF/cfi-zero-addr-delta.s | 2 +- test/MC/ELF/cfi.s | 2 +- test/MC/ELF/comdat-dup-group-name.s | 16 +- test/MC/ELF/comdat-reloc.s | 6 +- test/MC/ELF/comdat.s | 14 +- test/MC/ELF/common-error1.s | 2 +- test/MC/ELF/common-error2.s | 2 +- test/MC/ELF/common2.s | 7 +- test/MC/ELF/debug-loc.s | 2 +- test/MC/ELF/div-by-zero.s | 6 + test/MC/ELF/dot-symbol-assignment.s | 5 +- test/MC/ELF/empty-twice.ll | 6 + test/MC/ELF/empty.s | 36 +- test/MC/ELF/many-sections-2.s | 3 + test/MC/ELF/many-sections-3.s | 2 + test/MC/ELF/many-sections.s | 3 +- test/MC/ELF/popsection.s | 4 +- test/MC/ELF/relax-arith.s | 32 + test/MC/ELF/relocation-386.s | 5 +- test/MC/ELF/relocation-pc.s | 8 +- test/MC/ELF/relocation.s | 8 +- test/MC/ELF/section-sym.s | 18 +- test/MC/ELF/section-unique.s | 4 +- test/MC/ELF/section.s | 2 +- test/MC/ELF/sleb.s | 16 +- test/MC/ELF/strtab-suffix-opt.s | 6 +- test/MC/ELF/uleb.s | 16 +- test/MC/Hexagon/asmMap.s | 608 + test/MC/Hexagon/capitalizedEndloop.s | 29 + test/MC/Hexagon/dcfetch.s | 15 + test/MC/Hexagon/empty_asm.s | 15 + test/MC/Hexagon/endloop.s | 19 + test/MC/Hexagon/got.s | 11 + test/MC/Hexagon/inst_and64.ll | 2 +- test/MC/Hexagon/inst_or64.ll | 2 +- test/MC/Hexagon/inst_xor64.ll | 2 +- test/MC/Hexagon/instructions/alu32_alu.s | 84 + test/MC/Hexagon/instructions/alu32_perm.s | 40 + test/MC/Hexagon/instructions/alu32_pred.s | 222 + test/MC/Hexagon/instructions/cr.s | 78 + test/MC/Hexagon/instructions/j.s | 206 + test/MC/Hexagon/instructions/jr.s | 38 + test/MC/Hexagon/instructions/ld.s | 493 + test/MC/Hexagon/instructions/memop.s | 56 + test/MC/Hexagon/instructions/nv_j.s | 180 + test/MC/Hexagon/instructions/nv_st.s | 290 + test/MC/Hexagon/instructions/st.s | 434 + test/MC/Hexagon/instructions/system_user.s | 26 + test/MC/Hexagon/instructions/xtype_alu.s | 395 + test/MC/Hexagon/instructions/xtype_bit.s | 118 + test/MC/Hexagon/instructions/xtype_complex.s | 128 + test/MC/Hexagon/instructions/xtype_fp.s | 146 + test/MC/Hexagon/instructions/xtype_mpy.s | 400 + test/MC/Hexagon/instructions/xtype_perm.s | 104 + test/MC/Hexagon/instructions/xtype_pred.s | 136 + test/MC/Hexagon/instructions/xtype_shift.s | 260 + test/MC/Hexagon/jumpdoublepound.s | 13 + test/MC/Hexagon/labels.s | 26 + test/MC/Hexagon/new-value-check.s | 72 + test/MC/Hexagon/out_of_range.s | 10 + test/MC/Hexagon/pcrel.s | 11 + test/MC/Hexagon/relaxed_newvalue.s | 10 + test/MC/Hexagon/test.s | 4 + test/MC/Hexagon/two_ext.s | 12 + test/MC/Hexagon/v60-alu.s | 312 + test/MC/Hexagon/v60-permute.s | 51 + test/MC/Hexagon/v60-shift.s | 39 + test/MC/Hexagon/v60-vcmp.s | 84 + test/MC/Hexagon/v60-vmem.s | 424 + test/MC/Hexagon/v60-vmpy-acc.s | 123 + test/MC/Hexagon/v60-vmpy1.s | 138 + test/MC/Hexagon/v60lookup.s | 14 + .../AArch64/darwin-ARM64-local-label-diff.s | 11 +- test/MC/MachO/AArch64/reloc-errors.s | 10 + test/MC/MachO/ARM/bad-darwin-ARM-reloc.s | 6 + test/MC/MachO/ARM/compact-unwind-armv7k.s | 124 + test/MC/MachO/ARM/darwin-ARM-reloc.s | 315 +- test/MC/MachO/ARM/darwin-Thumb-reloc.s | 241 +- test/MC/MachO/ARM/data-in-code.s | 53 +- test/MC/MachO/ARM/empty-function-nop.ll | 24 +- .../MachO/ARM/ios-version-min-load-command.s | 18 +- .../ARM/long-call-branch-island-relocation.s | 22 +- test/MC/MachO/ARM/no-subsections-reloc.s | 8 +- test/MC/MachO/ARM/nop-armv4-padding.s | 7 +- test/MC/MachO/ARM/nop-armv6t2-padding.s | 7 +- test/MC/MachO/ARM/nop-thumb-padding.s | 7 +- test/MC/MachO/ARM/nop-thumb2-padding.s | 7 +- test/MC/MachO/ARM/relax-thumb-ldr-literal.s | 15 +- test/MC/MachO/ARM/relax-thumb2-branches.s | 39 +- test/MC/MachO/ARM/thumb-bl-jbits.s | 12 +- .../MachO/ARM/thumb2-function-relative-load.s | 6 +- test/MC/MachO/ARM/thumb2-movt-fixup.s | 32 +- test/MC/MachO/ARM/thumb2-movw-fixup.s | 90 +- .../MachO/ARM/tvos-version-min-load-command.s | 13 + test/MC/MachO/ARM/version-min-diagnostics.s | 40 + test/MC/MachO/ARM/version-min-diagnostics2.s | 34 + test/MC/MachO/ARM/version-min.s | 16 + .../ARM/watchos-version-min-load-command.s | 13 + test/MC/MachO/PowerPC/coal-sections-powerpc.s | 46 + test/MC/MachO/PowerPC/lit.local.cfg | 2 + test/MC/MachO/absolute.s | 297 +- test/MC/MachO/absolutize.s | 290 +- test/MC/MachO/bad-darwin-x86_64-diff-relocs.s | 10 +- test/MC/MachO/bad-darwin-x86_64-reloc-expr1.s | 6 - test/MC/MachO/bad-darwin-x86_64-reloc-expr2.s | 6 - test/MC/MachO/coal-sections-x86_64.s | 48 + test/MC/MachO/comm-1.s | 223 +- test/MC/MachO/cstexpr-gotpcrel-64.ll | 12 +- test/MC/MachO/darwin-complex-difference.s | 228 +- .../MachO/darwin-version-min-load-command.s | 28 + .../MachO/darwin-x86_64-diff-reloc-assign.s | 16 +- test/MC/MachO/darwin-x86_64-diff-relocs.s | 400 +- test/MC/MachO/darwin-x86_64-nobase-relocs.s | 98 +- test/MC/MachO/darwin-x86_64-reloc-offsets.s | 404 +- test/MC/MachO/data.s | 115 +- test/MC/MachO/debug_frame.s | 52 +- test/MC/MachO/diff-with-two-sections.s | 135 +- test/MC/MachO/direction_labels.s | 169 +- test/MC/MachO/empty-twice.ll | 12 + test/MC/MachO/file.s | 2 +- test/MC/MachO/gen-dwarf.s | 4 +- test/MC/MachO/i386-large-relocations.s | 24 +- test/MC/MachO/indirect-symbols.s | 365 +- test/MC/MachO/jcc.s | 98 +- test/MC/MachO/lcomm-attributes.s | 253 +- test/MC/MachO/linker-option-2.s | 37 +- test/MC/MachO/linker-options.ll | 47 +- test/MC/MachO/loc.s | 50 +- test/MC/MachO/osx-version-min-load-command.s | 18 +- test/MC/MachO/pcrel-to-other-section.s | 216 +- test/MC/MachO/relax-jumps.s | 15 +- test/MC/MachO/relax-recompute-align.s | 35 +- test/MC/MachO/reloc-diff.s | 41 +- test/MC/MachO/reloc-pcrel-offset.s | 33 +- test/MC/MachO/reloc-pcrel.s | 53 +- test/MC/MachO/section-align-1.s | 164 +- test/MC/MachO/section-align-2.s | 257 +- test/MC/MachO/section-attributes.s | 9 +- test/MC/MachO/section-flags.s | 53 +- test/MC/MachO/string-table.s | 201 +- test/MC/MachO/symbol-diff.s | 241 +- test/MC/MachO/symbol-flags.s | 628 +- test/MC/MachO/symbol-indirect.s | 444 +- test/MC/MachO/symbols-1.s | 620 +- test/MC/MachO/tbss.s | 228 +- test/MC/MachO/tdata.s | 211 +- test/MC/MachO/temp-labels.s | 50 +- test/MC/MachO/thread_init_func.s | 132 +- test/MC/MachO/tls.s | 468 +- test/MC/MachO/tlv-bss.ll | 15 +- test/MC/MachO/tlv-reloc.s | 321 +- test/MC/MachO/tlv.s | 213 +- test/MC/MachO/values.s | 247 +- test/MC/MachO/variable-exprs.s | 814 +- test/MC/MachO/weakdef.s | 266 +- test/MC/MachO/x86-data-in-code.ll | 5 +- test/MC/MachO/x86_32-optimal_nop.s | 229 +- .../MachO/x86_32-scattered-reloc-fallback.s | 8 +- test/MC/MachO/x86_32-sections.s | 1190 +- test/MC/MachO/x86_32-symbols.s | 2004 +- test/MC/MachO/x86_64-reloc-arithmetic.s | 42 +- test/MC/MachO/x86_64-sections.s | 1160 +- test/MC/MachO/zerofill-1.s | 234 +- test/MC/MachO/zerofill-2.s | 201 +- test/MC/MachO/zerofill-3.s | 253 +- test/MC/MachO/zerofill-4.s | 104 +- test/MC/MachO/zerofill-5.s | 209 +- test/MC/MachO/zerofill-sect-align.s | 32 +- test/MC/Mips/branch-pseudos-bad.s | 17 + test/MC/Mips/branch-pseudos.s | 180 + test/MC/Mips/cnmips/invalid.s | 15 + test/MC/Mips/cprestore-bad.s | 23 + test/MC/Mips/cprestore-noreorder.s | 97 + test/MC/Mips/cprestore-reorder.s | 98 + test/MC/Mips/cprestore-warning-unused.s | 10 + test/MC/Mips/cpsetup.s | 158 +- test/MC/Mips/directive-ent.s | 50 + test/MC/Mips/dsp/invalid.s | 25 + test/MC/Mips/dsp/valid.s | 131 + test/MC/Mips/dspr2/invalid.s | 20 + test/MC/Mips/dspr2/valid.s | 179 + test/MC/Mips/elf_basic.s | 2 +- test/MC/Mips/eva/invalid-noeva-wrong-error.s | 69 + test/MC/Mips/eva/invalid-noeva.s | 22 + test/MC/Mips/eva/invalid.s | 11 + test/MC/Mips/eva/invalid_R6.s | 20 + test/MC/Mips/eva/valid_R6.s | 47 + test/MC/Mips/eva/valid_preR6.s | 62 + test/MC/Mips/expansion-jal-sym-pic.s | 183 + test/MC/Mips/instalias-imm-expanding.s | 273 + test/MC/Mips/macro-bcc-imm-bad.s | 12 + test/MC/Mips/macro-bcc-imm.s | 69 + test/MC/Mips/macro-ddiv-bad.s | 18 + test/MC/Mips/macro-ddiv.s | 85 + test/MC/Mips/macro-ddivu-bad.s | 18 + test/MC/Mips/macro-ddivu.s | 59 + test/MC/Mips/macro-div-bad.s | 18 + test/MC/Mips/macro-div.s | 64 + test/MC/Mips/macro-divu-bad.s | 18 + test/MC/Mips/macro-divu.s | 49 + test/MC/Mips/macro-dla.s | 707 + test/MC/Mips/macro-dli.s | 534 + test/MC/Mips/macro-la-bad.s | 24 +- test/MC/Mips/macro-la.s | 40 +- test/MC/Mips/micromips-control-instructions.s | 43 +- test/MC/Mips/micromips-diagnostic-fixup.s | 7 +- .../Mips/micromips-dsp/invalid-wrong-error.s | 7 + test/MC/Mips/micromips-dsp/invalid.s | 23 + test/MC/Mips/micromips-dsp/valid.s | 105 + test/MC/Mips/micromips-dspr2/invalid.s | 9 + test/MC/Mips/micromips-dspr2/valid.s | 127 + test/MC/Mips/micromips-invalid.s | 27 +- .../Mips/micromips-loadstore-instructions.s | 24 + test/MC/Mips/micromips-pc16-fixup.s | 2 +- test/MC/Mips/micromips/invalid.s | 35 + test/MC/Mips/micromips32r6/invalid.s | 119 +- test/MC/Mips/micromips32r6/valid.s | 199 +- test/MC/Mips/micromips64r6/invalid.s | 145 + test/MC/Mips/micromips64r6/valid.s | 154 + test/MC/Mips/mips-alu-instructions.s | 2 +- test/MC/Mips/mips-diagnostic-fixup.s | 7 +- test/MC/Mips/mips-dsp-instructions.s | 97 - test/MC/Mips/mips-expansions-bad.s | 9 + test/MC/Mips/mips-expansions.s | 215 +- test/MC/Mips/mips-pc16-fixup.s | 2 +- test/MC/Mips/mips-pdr.s | 2 +- test/MC/Mips/mips1/valid.s | 4 +- test/MC/Mips/mips2/valid.s | 4 +- test/MC/Mips/mips3/valid.s | 8 +- test/MC/Mips/mips32/valid.s | 6 +- test/MC/Mips/mips32r2/invalid-dsp.s | 97 + test/MC/Mips/mips32r2/invalid-dspr2.s | 134 + test/MC/Mips/mips32r2/invalid-msa.s | 62 + test/MC/Mips/mips32r2/invalid.s | 26 +- test/MC/Mips/mips32r2/valid-xfail.s | 178 - test/MC/Mips/mips32r2/valid.s | 6 +- test/MC/Mips/mips32r3/invalid.s | 10 +- test/MC/Mips/mips32r3/valid-xfail.s | 178 - test/MC/Mips/mips32r3/valid.s | 6 +- test/MC/Mips/mips32r5/invalid-mips32.s | 8 + test/MC/Mips/mips32r5/invalid-mips32r2.s | 8 + test/MC/Mips/mips32r5/invalid-mips32r3.s | 8 + test/MC/Mips/mips32r5/invalid.s | 10 +- test/MC/Mips/mips32r5/valid-xfail.s | 178 - test/MC/Mips/mips32r5/valid.s | 7 +- .../Mips/mips32r6/invalid-mips1-wrong-error.s | 9 +- .../Mips/mips32r6/invalid-mips4-wrong-error.s | 1 - test/MC/Mips/mips32r6/invalid-mips4.s | 1 + test/MC/Mips/mips32r6/invalid.s | 43 +- test/MC/Mips/mips32r6/valid.s | 10 +- test/MC/Mips/mips4/valid.s | 8 +- test/MC/Mips/mips5/valid.s | 8 +- test/MC/Mips/mips64-alu-instructions.s | 6 +- test/MC/Mips/mips64-expansions.s | 221 +- test/MC/Mips/mips64/valid.s | 10 +- test/MC/Mips/mips64r2/invalid.s | 64 +- test/MC/Mips/mips64r2/valid-xfail.s | 197 +- test/MC/Mips/mips64r2/valid.s | 10 +- test/MC/Mips/mips64r3/invalid.s | 12 +- test/MC/Mips/mips64r3/valid-xfail.s | 194 +- test/MC/Mips/mips64r3/valid.s | 10 +- test/MC/Mips/mips64r5/invalid-mips64.s | 8 + test/MC/Mips/mips64r5/invalid-mips64r2.s | 8 + test/MC/Mips/mips64r5/invalid-mips64r3.s | 8 + test/MC/Mips/mips64r5/invalid.s | 12 +- test/MC/Mips/mips64r5/valid-xfail.s | 194 +- test/MC/Mips/mips64r5/valid.s | 11 +- .../Mips/mips64r6/invalid-mips1-wrong-error.s | 8 +- .../Mips/mips64r6/invalid-mips3-wrong-error.s | 8 +- .../Mips/mips64r6/invalid-mips4-wrong-error.s | 1 - test/MC/Mips/mips64r6/invalid-mips4.s | 1 + test/MC/Mips/mips64r6/invalid.s | 45 +- test/MC/Mips/mips64r6/valid.s | 11 +- test/MC/Mips/msa/invalid-64.s | 66 + test/MC/Mips/msa/invalid.s | 67 + test/MC/Mips/msa/test_elm.s | 45 +- test/MC/Mips/msa/test_elm_msa64.s | 6 +- test/MC/Mips/reloc-directive-bad.s | 6 + test/MC/Mips/reloc-directive.s | 58 + test/MC/Mips/rotations32-bad.s | 31 + test/MC/Mips/rotations32.s | 87 + test/MC/Mips/rotations64.s | 238 + test/MC/Mips/set-nomacro.s | 15 + test/MC/PowerPC/ppc-llong.s | 2 +- test/MC/PowerPC/ppc-word.s | 2 +- test/MC/PowerPC/ppc64-encoding.s | 22 +- test/MC/PowerPC/ppc64-fixup-apply.s | 2 +- test/MC/PowerPC/pr24686.s | 7 + test/MC/PowerPC/st-other-crash.s | 2 +- test/MC/PowerPC/vsx.s | 12 + test/MC/Sparc/sparc-alu-instructions.s | 6 + test/MC/Sparc/sparc-asm-errors.s | 8 + test/MC/Sparc/sparc-assembly-exprs.s | 9 +- test/MC/Sparc/sparc-atomic-instructions.s | 9 + test/MC/Sparc/sparc-ctrl-instructions.s | 4 + test/MC/Sparc/sparc-fp-instructions.s | 9 + test/MC/Sparc/sparc-mem-instructions.s | 18 + test/MC/Sparc/sparc-pic.s | 44 +- test/MC/Sparc/sparc-relocations.s | 10 +- test/MC/Sparc/sparc-special-registers.s | 18 + test/MC/Sparc/sparc-synthetic-instructions.s | 72 +- test/MC/Sparc/sparcv9-instructions.s | 272 + test/MC/SystemZ/fixups.s | 8 +- test/MC/SystemZ/insn-good-z13.s | 66 +- test/MC/SystemZ/insn-good.s | 56 + test/MC/SystemZ/lit.local.cfg | 4 - test/MC/X86/X86_64-pku.s | 8 + test/MC/X86/avx512-encodings.s | 5181 +++++ test/MC/X86/avx512vl-encoding.s | 209 + test/MC/X86/cfi_def_cfa-crash.s | 24 +- test/MC/X86/encoder-fail.s | 3 + test/MC/X86/expand-var.s | 7 +- test/MC/X86/i386-darwin-frame-register.ll | 2 +- test/MC/X86/intel-syntax-2.s | 14 + test/MC/X86/intel-syntax-ambiguous.s | 12 + test/MC/X86/intel-syntax-avx512.s | 96 + test/MC/X86/intel-syntax-print.ll | 10 + test/MC/X86/intel-syntax.s | 72 + test/MC/X86/large-bss.s | 14 + test/MC/X86/macho-reloc-errors-x86.s | 15 + test/MC/X86/macho-reloc-errors-x86_64.s | 19 + test/MC/X86/validate-inst-att.s | 17 +- test/MC/X86/validate-inst-intel.s | 8 +- test/MC/X86/x86-32-coverage.s | 20 + test/MC/X86/x86-64-avx512bw.s | 1059 ++ test/MC/X86/x86-64-avx512bw_vl.s | 3264 +++- test/MC/X86/x86-64-avx512cd.s | 450 + test/MC/X86/x86-64-avx512cd_vl.s | 913 + test/MC/X86/x86-64-avx512dq.s | 2229 +++ test/MC/X86/x86-64-avx512dq_vl.s | 1720 ++ test/MC/X86/x86-64-avx512f_vl.s | 5560 ++++++ test/MC/X86/x86-64.s | 18 +- test/MC/X86/x86-evenDirective.s | 47 + test/MC/X86/x86_nop.s | 8 +- test/Makefile | 3 + test/Object/AMDGPU/elf-definitios.yaml | 27 + test/Object/Inputs/coff-short-import-code | Bin 0 -> 31 bytes test/Object/Inputs/coff-short-import-data | Bin 0 -> 31 bytes ...pt-invalid-dynamic-table-offset.elf.x86-64 | Bin 0 -> 1688 bytes ...rupt-invalid-dynamic-table-size.elf.x86-64 | Bin 0 -> 1736 bytes ...invalid-dynamic-table-too-large.elf.x86-64 | Bin 0 -> 1688 bytes .../corrupt-invalid-phentsize.elf.x86-64 | Bin 0 -> 1720 bytes ...corrupt-invalid-relocation-size.elf.x86-64 | Bin 0 -> 2160 bytes .../Inputs/corrupt-invalid-strtab.elf.x86-64 | Bin 0 -> 1712 bytes .../corrupt-invalid-virtual-addr.elf.x86-64 | Bin 0 -> 1720 bytes .../Inputs/invalid-symbol-table-size.elf | Bin 0 -> 536 bytes test/Object/Inputs/invalid-xindex-size.elf | Bin 0 -> 624 bytes test/Object/Inputs/main-ret-zero-pe-i386.dll | Bin 0 -> 5120 bytes test/Object/Inputs/main-ret-zero-pe-i386.exe | Bin 0 -> 5120 bytes .../no-section-header-string-table.elf-x86-64 | Bin 0 -> 1024 bytes test/Object/Inputs/pr25877.lib | Bin 0 -> 774 bytes .../Object/Inputs/rel-no-sec-table.elf-x86-64 | Bin 0 -> 2152 bytes test/Object/Inputs/shndx.elf | Bin 0 -> 824 bytes .../Object/Inputs/trivial-object-test.elf-avr | Bin 0 -> 840 bytes test/Object/X86/nm-ir.ll | 4 +- test/Object/archive-format.test | 41 +- test/Object/archive-symtab.test | 23 +- test/Object/archive-update.test | 17 +- test/Object/corrupt.test | 58 +- test/Object/invalid.test | 8 + test/Object/nm-archive.test | 9 + test/Object/nm-pe-image.test | 31 + .../no-section-header-string-table.test | 10 + test/Object/obj2yaml.test | 65 + test/Object/objdump-shndx.test | 8 + test/Object/pr25877.test | 9 + test/Object/readobj-absent.test | 2 + test/Object/readobj-shared-object.test | 38 +- test/Object/relocation-executable.test | 12 + test/Other/2010-05-06-Printer.ll | 1 + test/Other/extract-alias.ll | 22 +- test/Other/llvm-nm-without-aliases.ll | 4 +- test/Other/opt-twice.ll | 14 + test/SymbolRewriter/rewrite.ll | 2 +- test/TableGen/cast-list-initializer.td | 10 + test/TableGen/intrinsic-varargs.td | 4 +- test/TableGen/trydecode-emission.td | 43 + test/TableGen/trydecode-emission2.td | 44 + test/TableGen/trydecode-emission3.td | 44 + test/Transforms/ADCE/basictest.ll | 3 +- test/Transforms/AddDiscriminators/basic.ll | 14 +- test/Transforms/AddDiscriminators/call.ll | 52 + .../dbg-declare-discriminator.ll | 30 + test/Transforms/AddDiscriminators/diamond.ll | 72 + .../AddDiscriminators/first-only.ll | 14 +- test/Transforms/AddDiscriminators/multiple.ll | 12 +- .../AddDiscriminators/no-discriminators.ll | 14 +- test/Transforms/AddDiscriminators/oneline.ll | 102 + test/Transforms/ArgumentPromotion/dbg.ll | 12 +- .../AtomicExpand/ARM/atomic-expansion-v7.ll | 24 +- .../AtomicExpand/ARM/atomic-expansion-v8.ll | 24 +- .../AtomicExpand/ARM/cmpxchg-weak.ll | 20 +- .../X86/expand-atomic-non-integer.ll | 82 + .../X86/expand-atomic-rmw-initial-load.ll | 11 + .../Transforms/AtomicExpand/X86/lit.local.cfg | 2 + test/Transforms/BBVectorize/X86/wr-aliases.ll | 2 +- test/Transforms/BBVectorize/simple3.ll | 16 +- .../2007-10-19-InlineAsmDirectives.ll | 9 +- .../CodeGenPrepare/AArch64/free-zext.ll | 82 + .../CodeGenPrepare/AArch64/widen_switch.ll | 95 + .../CodeGenPrepare/X86/catchpad-phi-cast.ll | 118 + .../CodeGenPrepare/X86/cttz-ctlz.ll | 56 + test/Transforms/CodeGenPrepare/X86/select.ll | 141 + .../CodeGenPrepare/X86/widen_switch.ll | 95 + .../CodeGenPrepare/invariant.group.ll | 23 + .../CodeGenPrepare/statepoint-relocate.ll | 87 +- test/Transforms/ConstProp/calls.ll | 240 +- test/Transforms/ConstProp/insertvalue.ll | 10 + test/Transforms/ConstProp/loads.ll | 7 +- test/Transforms/ConstantMerge/merge-both.ll | 2 +- .../CorrelatedValuePropagation/non-null.ll | 60 + .../CorrelatedValuePropagation/range.ll | 24 + .../CorrelatedValuePropagation/select.ll | 2 +- test/Transforms/CrossDSOCFI/basic.ll | 88 + .../DeadArgElim/2010-04-30-DbgInfo.ll | 24 +- test/Transforms/DeadArgElim/aggregates.ll | 26 +- test/Transforms/DeadArgElim/dbginfo.ll | 15 +- .../Transforms/DeadArgElim/naked_functions.ll | 31 + test/Transforms/DeadArgElim/operandbundle.ll | 12 + .../DeadStoreElimination/calloc-store.ll | 65 + .../DeadStoreElimination/inst-limits.ll | 8 +- .../Transforms/DeadStoreElimination/simple.ll | 147 + test/Transforms/EarlyCSE/AArch64/ldstN.ll | 18 + test/Transforms/EarlyCSE/atomics.ll | 259 + test/Transforms/EarlyCSE/basic.ll | 74 + test/Transforms/EarlyCSE/fence.ll | 86 + test/Transforms/Float2Int/basic.ll | 10 + test/Transforms/ForcedFunctionAttrs/forced.ll | 12 + .../FunctionAttrs/2008-09-03-ReadNone.ll | 5 +- .../FunctionAttrs/2009-01-04-Annotate.ll | 21 - .../FunctionAttrs/2010-10-30-volatile.ll | 4 +- test/Transforms/FunctionAttrs/atomic.ll | 4 +- test/Transforms/FunctionAttrs/nonnull.ll | 74 + test/Transforms/FunctionAttrs/norecurse.ll | 57 + test/Transforms/FunctionAttrs/optnone.ll | 6 +- .../out-of-bounds-iterator-bug.ll | 30 + test/Transforms/FunctionAttrs/readattrs.ll | 38 + .../FunctionImport/Inputs/funcimport.ll | 87 + .../FunctionImport/Inputs/funcimport_debug.ll | 27 + test/Transforms/FunctionImport/funcimport.ll | 75 + .../FunctionImport/funcimport_debug.ll | 45 + .../GCOVProfiling/function-numbering.ll | 14 +- test/Transforms/GCOVProfiling/global-ctor.ll | 8 +- test/Transforms/GCOVProfiling/linezero.ll | 18 +- test/Transforms/GCOVProfiling/linkagename.ll | 6 +- test/Transforms/GCOVProfiling/return-block.ll | 6 +- test/Transforms/GCOVProfiling/version.ll | 6 +- test/Transforms/GVN/2009-03-10-PREOnVoid.ll | 28 +- test/Transforms/GVN/assume-equal.ll | 235 + test/Transforms/GVN/crash-no-aa.ll | 2 +- test/Transforms/GVN/funclet.ll | 44 + test/Transforms/GVN/invariant-load.ll | 17 + test/Transforms/GVN/invariant.group.ll | 337 + test/Transforms/GVN/load-pre-nonlocal.ll | 4 +- .../GVN/no_speculative_loads_with_asan.ll | 57 + test/Transforms/GVN/phi-translate.ll | 4 +- test/Transforms/GVN/pr14166.ll | 2 +- test/Transforms/GVN/pr24426.ll | 18 + test/Transforms/GVN/pr25440.ll | 108 + test/Transforms/GVN/pre-gep-load.ll | 31 + test/Transforms/GVN/pre-load.ll | 41 + test/Transforms/GVN/range.ll | 24 +- .../GlobalDCE/2009-01-05-DeadAliases.ll | 14 +- .../GlobalDCE/2009-02-17-AliasUsesAliasee.ll | 2 +- test/Transforms/GlobalDCE/pr20981.ll | 4 +- .../GlobalOpt/2009-02-15-BitcastAlias.ll | 2 +- .../GlobalOpt/2009-02-15-ResolveAlias.ll | 4 +- test/Transforms/GlobalOpt/2009-03-05-dbg.ll | 8 +- test/Transforms/GlobalOpt/alias-resolve.ll | 22 +- .../GlobalOpt/alias-used-address-space.ll | 6 +- .../GlobalOpt/alias-used-section.ll | 2 +- test/Transforms/GlobalOpt/alias-used.ll | 16 +- test/Transforms/GlobalOpt/assume.ll | 21 + .../available_externally_global_ctors.ll | 22 + test/Transforms/GlobalOpt/deadglobal.ll | 3 + .../externally-initialized-aggregate.ll | 50 + .../GlobalOpt/externally-initialized.ll | 37 + test/Transforms/GlobalOpt/global-demotion.ll | 80 + .../GlobalOpt/invariant.group.barrier.ll | 79 + .../GlobalOpt/localize-constexpr.ll | 28 + test/Transforms/GlobalOpt/metadata.ll | 2 +- test/Transforms/GlobalOpt/tls.ll | 1 + test/Transforms/GlobalOpt/unnamed-addr.ll | 6 + test/Transforms/IndVarSimplify/bec-cmp.ll | 47 + test/Transforms/IndVarSimplify/const_phi.ll | 33 + .../IndVarSimplify/eliminate-comparison.ll | 348 + test/Transforms/IndVarSimplify/iv-widen.ll | 30 +- .../loop-invariant-conditions.ll | 279 + test/Transforms/IndVarSimplify/pr24356.ll | 63 + test/Transforms/IndVarSimplify/pr24783.ll | 30 + test/Transforms/IndVarSimplify/pr24804.ll | 25 + test/Transforms/IndVarSimplify/pr24952.ll | 27 + test/Transforms/IndVarSimplify/pr24956.ll | 37 + test/Transforms/IndVarSimplify/pr25047.ll | 49 + test/Transforms/IndVarSimplify/pr25051.ll | 44 + test/Transforms/IndVarSimplify/pr25060.ll | 37 + test/Transforms/IndVarSimplify/pr25360.ll | 33 + test/Transforms/IndVarSimplify/pr25421.ll | 30 + test/Transforms/IndVarSimplify/pr25578.ll | 45 + .../IndVarSimplify/tripcount_infinite.ll | 15 +- .../IndVarSimplify/widen-loop-comp.ll | 160 + test/Transforms/IndVarSimplify/zext-nuw.ll | 49 + .../annotate.ll} | 5 +- .../Inline/alloca-dbgdeclare-merge.ll | 102 + test/Transforms/Inline/alloca-dbgdeclare.ll | 16 +- .../Inline/debug-info-duplicate-calls.ll | 26 +- test/Transforms/Inline/debug-invoke.ll | 4 +- test/Transforms/Inline/deopt-bundles.ll | 203 + test/Transforms/Inline/ignore-debug-info.ll | 16 +- test/Transforms/Inline/inline-assume.ll | 31 + test/Transforms/Inline/inline-cold-callee.ll | 39 + ...inline-constexpr-addrspacecast-argument.ll | 30 + test/Transforms/Inline/inline-hot-callee.ll | 39 + test/Transforms/Inline/inline-optsize.ll | 2 +- test/Transforms/Inline/inline_dbg_declare.ll | 26 +- test/Transforms/Inline/inline_invoke.ll | 3 +- test/Transforms/Inline/noalias-calls.ll | 19 +- test/Transforms/Inline/noalias-cs.ll | 12 +- test/Transforms/Inline/noalias2.ll | 4 +- test/Transforms/Inline/zero-cost.ll | 17 + .../InstCombine/2007-09-10-AliasConstFold.ll | 2 +- .../InstCombine/2007-09-17-AliasConstFold2.ll | 2 +- .../InstCombine/LandingPadClauses.ll | 11 +- test/Transforms/InstCombine/add2.ll | 10 + .../Transforms/InstCombine/alias-recursion.ll | 2 +- test/Transforms/InstCombine/all-bits-shift.ll | 46 + test/Transforms/InstCombine/alloca.ll | 11 + test/Transforms/InstCombine/and-compare.ll | 23 + test/Transforms/InstCombine/and2.ll | 68 + test/Transforms/InstCombine/apint-or.ll | 79 + test/Transforms/InstCombine/apint-or1.ll | 36 - test/Transforms/InstCombine/apint-or2.ll | 35 - .../InstCombine/assume-redundant.ll | 26 + .../InstCombine/bitcast-alias-function.ll | 24 +- .../Transforms/InstCombine/bitcast-bitcast.ll | 84 + .../InstCombine/bitcast-vec-canon.ll | 25 +- test/Transforms/InstCombine/bitcast.ll | 55 + .../Transforms/InstCombine/bitreverse-fold.ll | 11 + .../InstCombine/bitreverse-recognize.ll | 114 + test/Transforms/InstCombine/blend_x86.ll | 102 +- test/Transforms/InstCombine/bswap-fold.ll | 6 +- .../InstCombine/bswap-known-bits.ll | 47 + test/Transforms/InstCombine/bswap.ll | 14 +- .../InstCombine/call_nonnull_arg.ll | 20 + .../InstCombine/cast-callee-deopt-bundles.ll | 11 + .../InstCombine/cast-int-fcmp-eq-0.ll | 108 +- test/Transforms/InstCombine/cast-set.ll | 4 +- test/Transforms/InstCombine/cast.ll | 54 +- test/Transforms/InstCombine/compare-alloca.ll | 97 + test/Transforms/InstCombine/compare-signs.ll | 40 + .../InstCombine/constant-fold-alias.ll | 4 +- test/Transforms/InstCombine/ctpop.ll | 45 + test/Transforms/InstCombine/debug-line.ll | 6 +- test/Transforms/InstCombine/debuginfo.ll | 12 +- test/Transforms/InstCombine/demorgan-zext.ll | 34 + test/Transforms/InstCombine/div.ll | 24 +- test/Transforms/InstCombine/exp2-1.ll | 19 +- test/Transforms/InstCombine/extractvalue.ll | 22 +- test/Transforms/InstCombine/fabs.ll | 25 + test/Transforms/InstCombine/fast-math.ll | 141 +- test/Transforms/InstCombine/ffs-1.ll | 69 +- .../InstCombine/fold-phi-load-metadata.ll | 69 + test/Transforms/InstCombine/gc.relocate.ll | 39 +- test/Transforms/InstCombine/gepphigep.ll | 50 + test/Transforms/InstCombine/icmp-range.ll | 89 + test/Transforms/InstCombine/icmp-shr.ll | 9 + test/Transforms/InstCombine/icmp.ll | 73 +- .../InstCombine/inline-intrinsic-assert.ll | 2 +- .../InstCombine/insert-extract-shuffle.ll | 47 +- test/Transforms/InstCombine/intrinsics.ll | 65 + test/Transforms/InstCombine/lifetime.ll | 93 + test/Transforms/InstCombine/load-cmp.ll | 7 +- .../InstCombine/load-combine-metadata-2.ll | 20 + .../InstCombine/load-combine-metadata-3.ll | 20 + .../InstCombine/load-combine-metadata-4.ll | 20 + .../InstCombine/load-combine-metadata.ll | 6 +- .../InstCombine/loadstore-metadata.ll | 38 +- .../InstCombine/log-pow-nofastmath.ll | 30 + test/Transforms/InstCombine/log-pow.ll | 41 + .../InstCombine/malloc-free-delete.ll | 11 + test/Transforms/InstCombine/memcmp-1.ll | 53 +- test/Transforms/InstCombine/memset_chk-1.ll | 26 + test/Transforms/InstCombine/minmax-fp.ll | 156 + .../Transforms/InstCombine/neon-intrinsics.ll | 12 +- .../Transforms/InstCombine/no_cgscc_assert.ll | 2 +- .../InstCombine/nonnull-attribute.ll | 19 + test/Transforms/InstCombine/not.ll | 47 +- .../InstCombine/objsize-address-space.ll | 2 +- test/Transforms/InstCombine/objsize.ll | 4 +- test/Transforms/InstCombine/or.ll | 2 +- .../InstCombine/phi-load-metadata-2.ll | 30 + .../InstCombine/phi-load-metadata-3.ll | 30 + .../InstCombine/phi-load-metadata.ll | 30 + test/Transforms/InstCombine/phi.ll | 130 + test/Transforms/InstCombine/pow-1.ll | 2 + test/Transforms/InstCombine/pow-4.ll | 120 + .../InstCombine/pow-exp-nofastmath.ll | 17 + test/Transforms/InstCombine/pow-exp.ll | 28 + test/Transforms/InstCombine/pow-exp2.ll | 19 + test/Transforms/InstCombine/pow-sqrt.ll | 15 + test/Transforms/InstCombine/pr20059.ll | 16 - test/Transforms/InstCombine/pr24605.ll | 15 + test/Transforms/InstCombine/pr25745.ll | 20 + test/Transforms/InstCombine/shift.ll | 4 +- test/Transforms/InstCombine/sincospi.ll | 9 + test/Transforms/InstCombine/sqrt-nofast.ll | 25 + test/Transforms/InstCombine/statepoint.ll | 20 +- test/Transforms/InstCombine/store.ll | 113 + test/Transforms/InstCombine/strto-1.ll | 2 +- test/Transforms/InstCombine/tan-nofastmath.ll | 17 + test/Transforms/InstCombine/tan.ll | 24 + test/Transforms/InstCombine/token.ll | 89 + test/Transforms/InstCombine/trunc.ll | 42 + test/Transforms/InstCombine/unpack-fca.ll | 168 +- .../InstCombine/vec_demanded_elts.ll | 359 +- test/Transforms/InstCombine/vec_shuffle.ll | 27 +- test/Transforms/InstCombine/vector_gep2.ll | 23 + test/Transforms/InstCombine/x86-f16c.ll | 61 + test/Transforms/InstCombine/x86-pmovsx.ll | 136 + test/Transforms/InstCombine/x86-pmovzx.ll | 136 + test/Transforms/InstCombine/x86-pshufb.ll | 267 + test/Transforms/InstCombine/x86-sse4a.ll | 336 + .../InstCombine/x86-vector-shifts.ll | 1318 ++ test/Transforms/InstCombine/x86-xop.ll | 209 + test/Transforms/InstCombine/xor.ll | 8 +- test/Transforms/InstSimplify/add-mask.ll | 65 + test/Transforms/InstSimplify/apint-or.ll | 36 +- test/Transforms/InstSimplify/bswap.ll | 41 + test/Transforms/InstSimplify/compare.ll | 8 + test/Transforms/InstSimplify/implies.ll | 217 + test/Transforms/InstSimplify/shift-128-kb.ll | 22 + test/Transforms/InstSimplify/shr-nop.ll | 12 +- .../2009-01-05-InternalizeAliases.ll | 8 +- test/Transforms/Internalize/comdat.ll | 52 + .../Internalize/local-visibility.ll | 8 +- test/Transforms/JumpThreading/basic.ll | 34 +- test/Transforms/JumpThreading/implied-cond.ll | 98 + test/Transforms/JumpThreading/phi-known.ll | 66 + test/Transforms/JumpThreading/select.ll | 30 + .../JumpThreading/update-edge-weight.ll | 43 + test/Transforms/LCSSA/mixed-catch.ll | 95 + .../2004-09-14-AliasAnalysisInvalidate.ll | 2 +- test/Transforms/LICM/argmemonly-call.ll | 69 + test/Transforms/LICM/debug-value.ll | 12 +- test/Transforms/LICM/hoist-deref-load.ll | 44 + test/Transforms/LICM/hoist-invariant-load.ll | 2 +- test/Transforms/LICM/pr23608.ll | 2 +- .../LoopDistribute/basic-with-memchecks.ll | 2 +- .../LoopDistribute/bounds-expansion-bug.ll | 106 + .../unknown-bounds-for-memchecks.ll | 57 + test/Transforms/LoopIdiom/basic.ll | 107 + test/Transforms/LoopIdiom/debug-line.ll | 10 +- test/Transforms/LoopLoadElim/backward.ll | 32 + .../LoopLoadElim/def-store-before-load.ll | 35 + test/Transforms/LoopLoadElim/forward.ll | 47 + test/Transforms/LoopLoadElim/memcheck.ll | 52 + .../multiple-stores-same-block.ll | 48 + test/Transforms/LoopLoadElim/unknown-dep.ll | 54 + test/Transforms/LoopReroll/negative.ll | 48 + test/Transforms/LoopReroll/reroll_with_dbg.ll | 139 + test/Transforms/LoopRotate/dbgvalue.ll | 14 +- test/Transforms/LoopSimplify/dbg-loc.ll | 4 +- .../LoopSimplify/single-backedge.ll | 2 +- ...fferent-addrspace-addressing-mode-loops.ll | 156 + .../LoopStrengthReduce/AMDGPU/lit.local.cfg | 3 + .../AMDGPU/lsr-postinc-pos-addrspace.ll | 113 + .../LoopStrengthReduce/ARM/ivchain-ARM.ll | 50 +- .../LoopStrengthReduce/NVPTX/lit.local.cfg | 2 + .../LoopStrengthReduce/NVPTX/trunc.ll | 45 + .../X86/ivchain-stress-X86.ll | 2 +- test/Transforms/LoopStrengthReduce/funclet.ll | 216 + test/Transforms/LoopStrengthReduce/pr12018.ll | 5 +- test/Transforms/LoopStrengthReduce/pr25541.ll | 48 + .../quadradic-exit-value.ll | 2 +- .../LoopStrengthReduce/sext-ind-var.ll | 140 + .../LoopUnroll/AMDGPU/lit.local.cfg | 3 + .../LoopUnroll/AMDGPU/unroll-barrier.ll | 33 + test/Transforms/LoopUnroll/X86/partial.ll | 9 +- .../LoopUnroll/full-unroll-bad-geps.ll | 34 - .../LoopUnroll/full-unroll-crashers.ll | 102 + .../LoopUnroll/full-unroll-heuristics-2.ll | 57 + .../LoopUnroll/full-unroll-heuristics-cast.ll | 97 + .../LoopUnroll/full-unroll-heuristics-cmp.ll | 207 + .../full-unroll-heuristics-phi-prop.ll | 23 + test/Transforms/LoopUnroll/pr18861.ll | 93 +- test/Transforms/LoopUnroll/rebuild_lcssa.ll | 119 + test/Transforms/LoopUnroll/runtime-loop1.ll | 4 +- test/Transforms/LoopUnroll/unroll-pragmas.ll | 66 + .../LoopUnswitch/2011-11-18-SimpleSwitch.ll | 2 +- .../LoopUnswitch/2011-11-18-TwoSwitches.ll | 2 +- .../LoopUnswitch/2015-09-18-Addrspace.ll | 28 + .../LoopUnswitch/LIV-loop-condtion.ll | 28 + test/Transforms/LoopUnswitch/basictest.ll | 39 + test/Transforms/LoopUnswitch/cleanuppad.ll | 44 + test/Transforms/LoopUnswitch/cold-loop.ll | 52 + test/Transforms/LoopUnswitch/copy-metadata.ll | 23 + test/Transforms/LoopUnswitch/infinite-loop.ll | 10 +- .../LoopUnswitch/trivial-unswitch.ll | 47 + .../AArch64/arbitrary-induction-step.ll | 4 +- .../AArch64/deterministic-type-shrinkage.ll | 54 + .../LoopVectorize/AArch64/interleaved_cost.ll | 39 + .../AArch64/loop-vectorization-factors.ll | 243 + .../AArch64/reduction-small-size.ll | 191 + .../LoopVectorize/ARM/interleaved_cost.ll | 39 + .../LoopVectorize/ARM/vector_cast.ll | 37 + .../PowerPC/agg-interleave-a2.ll | 40 + .../PowerPC/stride-vectorization.ll | 30 + .../LoopVectorize/X86/masked_load_store.ll | 142 + .../LoopVectorize/X86/metadata-enable.ll | 6 +- .../Transforms/LoopVectorize/X86/no_fpmath.ll | 104 + .../Transforms/LoopVectorize/X86/powof2div.ll | 8 +- .../LoopVectorize/X86/reduction-crash.ll | 2 +- .../Transforms/LoopVectorize/X86/reg-usage.ll | 71 + .../LoopVectorize/X86/vector_max_bandwidth.ll | 46 + .../X86/vector_ptr_load_store.ll | 8 +- .../X86/vectorization-remarks-missed.ll | 16 +- .../X86/vectorization-remarks-profitable.ll | 113 + .../X86/vectorization-remarks.ll | 8 +- .../LoopVectorize/conditional-assignment.ll | 6 +- test/Transforms/LoopVectorize/control-flow.ll | 8 +- test/Transforms/LoopVectorize/dbg.value.ll | 8 +- test/Transforms/LoopVectorize/debugloc.ll | 18 +- .../LoopVectorize/gep_with_bitcast.ll | 40 + .../LoopVectorize/if-pred-stores.ll | 43 +- test/Transforms/LoopVectorize/induction.ll | 13 +- test/Transforms/LoopVectorize/miniters.ll | 45 + .../LoopVectorize/minmax_reduction.ll | 104 +- .../LoopVectorize/no_array_bounds.ll | 6 +- .../LoopVectorize/no_outside_user.ll | 2 +- test/Transforms/LoopVectorize/no_switch.ll | 18 +- test/Transforms/LoopVectorize/nontemporal.ll | 47 + test/Transforms/LoopVectorize/optsize.ll | 43 +- .../Transforms/LoopVectorize/ptr-induction.ll | 34 + test/Transforms/LoopVectorize/reduction.ll | 2 +- .../LoopVectorize/reverse_induction.ll | 9 +- .../Transforms/LoopVectorize/runtime-check.ll | 6 +- .../Transforms/LoopVectorize/runtime-limit.ll | 21 +- test/Transforms/LowerBitSets/function-ext.ll | 22 + test/Transforms/LowerBitSets/function.ll | 35 + test/Transforms/LowerBitSets/nonstring.ll | 34 + test/Transforms/LowerBitSets/pr25902.ll | 21 + test/Transforms/LowerBitSets/simple.ll | 36 +- test/Transforms/LowerExpectIntrinsic/basic.ll | 2 +- .../LowerSwitch/delete-default-block-crash.ll | 27 + test/Transforms/LowerSwitch/feature.ll | 60 +- test/Transforms/Mem2Reg/ConvertDebugInfo.ll | 14 +- test/Transforms/Mem2Reg/ConvertDebugInfo2.ll | 28 +- test/Transforms/Mem2Reg/optnone.ll | 21 + test/Transforms/Mem2Reg/pr24179.ll | 44 + test/Transforms/MemCpyOpt/memcpy.ll | 5 +- test/Transforms/MemCpyOpt/nontemporal.ll | 49 + .../MergeFunc/apply_function_attributes.ll | 47 + .../MergeFunc/call-and-invoke-with-ranges.ll | 18 +- .../MergeFunc/constant-entire-value.ll | 42 + test/Transforms/MergeFunc/crash2.ll | 54 + test/Transforms/MergeFunc/gep-base-type.ll | 46 + .../MergeFunc/inttoptr-address-space.ll | 2 +- test/Transforms/MergeFunc/inttoptr.ll | 2 +- .../merge-block-address-other-function.ll | 49 + .../MergeFunc/merge-block-address.ll | 91 + .../MergeFunc/merge-const-ptr-and-int.ll | 20 + .../MergeFunc/merge-different-vector-types.ll | 18 + ...no-merge-block-address-different-labels.ll | 96 + .../no-merge-block-address-other-function.ll | 61 + .../MergeFunc/no-merge-ptr-different-sizes.ll | 24 + .../no-merge-ptr-int-different-values.ll | 23 + test/Transforms/MergeFunc/ranges-multiple.ll | 44 + test/Transforms/MergeFunc/ranges.ll | 8 +- .../MergeFunc/self-referential-global.ll | 40 + .../MergeFunc/undef-different-types.ll | 21 + test/Transforms/MetaRenamer/metarenamer.ll | 2 +- .../NaryReassociate/NVPTX/nary-gep.ll | 17 + test/Transforms/NaryReassociate/nary-add.ll | 6 +- test/Transforms/NaryReassociate/nary-mul.ll | 19 + test/Transforms/NaryReassociate/pr24301.ll | 14 + test/Transforms/ObjCARC/basic.ll | 6 +- ...e-that-exception-unwind-path-is-visited.ll | 18 +- test/Transforms/ObjCARC/nested.ll | 4 +- test/Transforms/ObjCARC/provenance.ll | 2 +- .../PGOProfile/Inputs/branch1.proftext | 6 + .../PGOProfile/Inputs/branch2.proftext | 6 + .../PGOProfile/Inputs/criticaledge.proftext | 17 + .../PGOProfile/Inputs/diag.proftext | 5 + .../PGOProfile/Inputs/landingpad.proftext | 14 + .../PGOProfile/Inputs/loop1.proftext | 6 + .../PGOProfile/Inputs/loop2.proftext | 7 + .../PGOProfile/Inputs/switch.proftext | 8 + test/Transforms/PGOProfile/branch1.ll | 30 + test/Transforms/PGOProfile/branch2.ll | 37 + test/Transforms/PGOProfile/criticaledge.ll | 108 + test/Transforms/PGOProfile/diag_mismatch.ll | 12 + .../PGOProfile/diag_no_funcprofdata.ll | 12 + test/Transforms/PGOProfile/diag_no_profile.ll | 9 + test/Transforms/PGOProfile/landingpad.ll | 124 + test/Transforms/PGOProfile/loop1.ll | 42 + test/Transforms/PGOProfile/loop2.ll | 70 + test/Transforms/PGOProfile/single_bb.ll | 12 + test/Transforms/PGOProfile/switch.ll | 47 + test/Transforms/PlaceSafepoints/basic.ll | 2 +- .../PlaceSafepoints/call_gc_result.ll | 4 +- .../PlaceSafepoints/finite-loops.ll | 65 +- .../PlaceSafepoints/patchable-statepoints.ll | 4 +- .../statepoint-calling-conventions.ll | 4 +- .../PlaceSafepoints/statepoint-format.ll | 4 +- test/Transforms/PruneEH/operand-bundles.ll | 26 + .../Reassociate/fast-ReassociateVector.ll | 10 +- test/Transforms/Reassociate/fast-basictest.ll | 2 +- .../Transforms/Reassociate/fast-fp-commute.ll | 4 +- test/Transforms/Reassociate/fast-multistep.ll | 6 +- test/Transforms/Reassociate/fp-expr.ll | 33 + test/Transforms/Reassociate/multistep.ll | 6 +- .../Reassociate/reassoc-intermediate-fnegs.ll | 31 + test/Transforms/Reassociate/secondary.ll | 2 +- test/Transforms/Reassociate/vaarg_movable.ll | 28 + test/Transforms/Reassociate/xor_reassoc.ll | 4 +- .../base-pointers-1.ll | 8 +- .../base-pointers-10.ll | 11 +- .../base-pointers-11.ll | 4 +- .../base-pointers-12.ll | 20 + .../base-pointers-13.ll | 19 + .../base-pointers-2.ll | 4 +- .../base-pointers-3.ll | 4 +- .../base-pointers-4.ll | 21 +- .../base-pointers-5.ll | 8 +- .../base-pointers-6.ll | 8 +- .../base-pointers-7.ll | 10 +- .../base-pointers-8.ll | 4 +- .../base-pointers-9.ll | 4 +- .../RewriteStatepointsForGC/base-pointers.ll | 73 +- .../RewriteStatepointsForGC/base-vector.ll | 167 + .../RewriteStatepointsForGC/basics.ll | 16 +- .../RewriteStatepointsForGC/codegen-cond.ll | 74 + .../RewriteStatepointsForGC/constants.ll | 43 +- .../deopt-bundles/base-pointers-1.ll | 25 + .../deopt-bundles/base-pointers-10.ll | 35 + .../deopt-bundles/base-pointers-11.ll | 24 + .../deopt-bundles/base-pointers-2.ll | 19 + .../deopt-bundles/base-pointers-3.ll | 19 + .../deopt-bundles/base-pointers-4.ll | 44 + .../deopt-bundles/base-pointers-5.ll | 28 + .../deopt-bundles/base-pointers-6.ll | 37 + .../deopt-bundles/base-pointers-7.ll | 45 + .../deopt-bundles/base-pointers-8.ll | 37 + .../deopt-bundles/base-pointers-9.ll | 20 + .../deopt-bundles/base-pointers.ll | 151 + .../deopt-bundles/base-vector.ll | 167 + .../deopt-bundles/basic.ll | 65 + .../deopt-bundles/basics.ll | 88 + .../deopt-bundles/codegen-cond.ll | 81 + .../deopt-bundles/constants.ll | 51 + .../deopt-bundles/deref-pointers.ll | 104 + .../deopt-bundles/gc-relocate-creation.ll | 22 + .../deopt-bundles/live-vector.ll | 149 + .../deopt-bundles/liveness-basics.ll | 165 + .../deopt-bundles/patchable-statepoints.ll | 44 + .../deopt-bundles/preprocess.ll | 62 + .../deopt-bundles/relocate-invoke-result.ll | 32 + .../deopt-bundles/relocation.ll | 279 + .../rematerialize-derived-pointers.ll | 150 + .../deopt-bundles/rewrite-invoke.ll | 32 + .../RewriteStatepointsForGC/deref-pointers.ll | 53 +- .../gc_relocate_creation.ll | 6 +- .../RewriteStatepointsForGC/live-vector.ll | 25 +- .../liveness-basics.ll | 22 +- .../RewriteStatepointsForGC/preprocess.ll | 8 +- .../relocate_invoke_result.ll | 4 +- .../RewriteStatepointsForGC/relocation.ll | 40 +- .../rematerialize-derived-pointers.ll | 66 +- .../Transforms/SCCP/global-alias-constprop.ll | 11 + .../SLPVectorizer/AArch64/commute.ll | 2 +- .../SLPVectorizer/AArch64/horizontal.ll | 270 + .../SLPVectorizer/AArch64/nontemporal.ll | 76 + .../Transforms/SLPVectorizer/X86/bad_types.ll | 26 + .../SLPVectorizer/X86/commutativity.ll | 78 + .../SLPVectorizer/X86/debug_info.ll | 16 +- .../SLPVectorizer/X86/horizontal.ll | 2 +- test/Transforms/SLPVectorizer/X86/pr23510.ll | 38 + .../SLPVectorizer/X86/schedule_budget.ll | 93 + test/Transforms/SROA/basictest.ll | 25 +- test/Transforms/SROA/big-endian.ll | 1 - test/Transforms/SROA/fca.ll | 1 - test/Transforms/SafeStack/AArch64/abi.ll | 20 + .../SafeStack/AArch64/lit.local.cfg | 3 + test/Transforms/SafeStack/ARM/abi.ll | 18 + test/Transforms/SafeStack/ARM/lit.local.cfg | 3 + test/Transforms/SafeStack/ARM/setjmp.ll | 34 + test/Transforms/SafeStack/X86/abi.ll | 30 + test/Transforms/SafeStack/X86/lit.local.cfg | 3 + test/Transforms/SafeStack/array.ll | 53 + test/Transforms/SafeStack/byval.ll | 51 + test/Transforms/SafeStack/call.ll | 160 +- test/Transforms/SafeStack/cast.ll | 28 +- test/Transforms/SafeStack/debug-loc.ll | 83 + test/Transforms/SafeStack/ret.ll | 17 + test/Transforms/SafeStack/setjmp2.ll | 2 +- test/Transforms/SafeStack/store.ll | 63 + .../Inputs/bad_discriminator_value.prof | 2 +- .../SampleProfile/Inputs/bad_fn_header.prof | 4 +- .../SampleProfile/Inputs/bad_mangle.prof | 4 +- .../SampleProfile/Inputs/bad_sample_line.prof | 4 +- .../SampleProfile/Inputs/bad_samples.prof | 2 +- .../SampleProfile/Inputs/branch.prof | 16 +- .../SampleProfile/Inputs/calls.prof | 16 +- .../Inputs/cov-zero-samples.prof | 10 + .../Inputs/coverage-warning.prof | 5 + .../SampleProfile/Inputs/discriminator.prof | 14 +- .../SampleProfile/Inputs/entry_counts.prof | 4 +- .../SampleProfile/Inputs/fnptr.binprof | Bin 112 -> 105 bytes .../SampleProfile/Inputs/fnptr.prof | 18 +- .../SampleProfile/Inputs/gcc-simple.afdo | Bin 0 -> 1972 bytes .../SampleProfile/Inputs/inline-coverage.prof | 7 + .../SampleProfile/Inputs/inline-hint.prof | 3 + .../SampleProfile/Inputs/inline.prof | 7 + .../SampleProfile/Inputs/nolocinfo.prof | 3 + .../SampleProfile/Inputs/offset.prof | 4 + .../SampleProfile/Inputs/propagate.prof | 32 +- .../SampleProfile/Inputs/remarks.prof | 7 + .../SampleProfile/Inputs/syntax.prof | 4 +- test/Transforms/SampleProfile/branch.ll | 294 +- test/Transforms/SampleProfile/calls.ll | 18 +- .../SampleProfile/cov-zero-samples.ll | 142 + .../SampleProfile/coverage-warning.ll | 46 + .../Transforms/SampleProfile/discriminator.ll | 14 +- test/Transforms/SampleProfile/entry_counts.ll | 8 +- test/Transforms/SampleProfile/fnptr.ll | 24 +- test/Transforms/SampleProfile/gcc-simple.ll | 218 + .../SampleProfile/inline-coverage.ll | 135 + test/Transforms/SampleProfile/inline-hint.ll | 38 + test/Transforms/SampleProfile/inline.ll | 108 + test/Transforms/SampleProfile/nolocinfo.ll | 38 + test/Transforms/SampleProfile/offset.ll | 82 + test/Transforms/SampleProfile/propagate.ll | 26 +- test/Transforms/SampleProfile/remarks.ll | 185 + test/Transforms/SampleProfile/syntax.ll | 2 +- .../ScalarRepl/debuginfo-preserved.ll | 12 +- test/Transforms/Scalarizer/dbginfo.ll | 12 +- test/Transforms/Scalarizer/store-bug.ll | 25 + ...-gep-and-gvn-addrspace-addressing-modes.ll | 14 +- .../NVPTX/split-gep-and-gvn.ll | 62 +- .../NVPTX/split-gep.ll | 16 +- .../NVPTX/value-tracking-domtree.ll | 2 +- .../SimplifyCFG/AArch64/cttz-ctlz.ll | 43 + test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll | 43 + test/Transforms/SimplifyCFG/ARM/lit.local.cfg | 5 + test/Transforms/SimplifyCFG/Mips/cttz-ctlz.ll | 43 + .../Transforms/SimplifyCFG/Mips/lit.local.cfg | 5 + test/Transforms/SimplifyCFG/PR25267.ll | 24 + .../Transforms/SimplifyCFG/SpeculativeExec.ll | 26 + .../SimplifyCFG/X86/speculate-cttz-ctlz.ll | 24 +- test/Transforms/SimplifyCFG/basictest.ll | 14 +- .../Transforms/SimplifyCFG/branch-fold-dbg.ll | 8 +- .../SimplifyCFG/empty-cleanuppad.ll | 415 + test/Transforms/SimplifyCFG/hoist-dbgvalue.ll | 10 +- test/Transforms/SimplifyCFG/implied-cond.ll | 81 + test/Transforms/SimplifyCFG/invoke_unwind.ll | 13 + .../SimplifyCFG/merge-cond-stores-2.ll | 215 + .../SimplifyCFG/merge-cond-stores.ll | 241 + .../no_speculative_loads_with_asan.ll | 40 + .../SimplifyCFG/preserve-load-metadata-2.ll | 32 + .../SimplifyCFG/preserve-load-metadata-3.ll | 32 + .../SimplifyCFG/preserve-load-metadata.ll | 32 + .../preserve-make-implicit-on-switch-to-br.ll | 30 + test/Transforms/SimplifyCFG/speculate-math.ll | 45 +- .../SimplifyCFG/statepoint-invoke-unwind.ll | 6 +- .../SimplifyCFG/switch-dead-default.ll | 179 + test/Transforms/SimplifyCFG/trap-debugloc.ll | 6 +- .../SimplifyCFG/wineh-unreachable.ll | 83 + test/Transforms/Sink/catchswitch.ll | 37 + test/Transforms/Sink/landingpad.ll | 33 + .../reassociate-geps-and-slsr-addrspace.ll | 4 +- .../NVPTX/speculative-slsr.ll | 71 + test/Transforms/StripDeadPrototypes/basic.ll | 12 + .../StripSymbols/2010-06-30-StripDebug.ll | 8 +- .../StripSymbols/2010-08-25-crash.ll | 6 +- .../StripSymbols/strip-dead-debug-info.ll | 16 +- .../StructurizeCFG/nested-loop-order.ll | 2 +- test/Transforms/TailCallElim/basic.ll | 12 +- test/Transforms/TailCallElim/notail.ll | 24 + test/Transforms/Util/lowerswitch.ll | 6 +- .../Util/simplify-dbg-declare-load.ll | 52 + test/Verifier/alias.ll | 18 +- test/Verifier/align-md.ll | 59 + test/Verifier/atomics.ll | 14 + test/Verifier/bitcast-alias-address-space.ll | 2 +- test/Verifier/dbg-null-retained-type.ll | 10 + test/Verifier/dbg-typerefs.ll | 2 +- test/Verifier/dbg.ll | 3 +- test/Verifier/dereferenceable-md.ll | 86 + test/Verifier/func-dbg.ll | 25 + test/Verifier/gc_relocate_addrspace.ll | 10 +- test/Verifier/gc_relocate_operand.ll | 8 +- test/Verifier/gc_relocate_return.ll | 8 +- test/Verifier/invalid-eh.ll | 38 + test/Verifier/invalid-patchable-statepoint.ll | 14 - test/Verifier/invalid-statepoint.ll | 8 +- test/Verifier/invalid-statepoint2.ll | 10 +- test/Verifier/invoke.ll | 4 +- test/Verifier/llvm.dbg.declare-address.ll | 4 +- test/Verifier/llvm.dbg.declare-expression.ll | 4 +- test/Verifier/llvm.dbg.declare-variable.ll | 2 +- .../llvm.dbg.intrinsic-dbg-attachment.ll | 20 +- test/Verifier/llvm.dbg.value-expression.ll | 4 +- test/Verifier/llvm.dbg.value-value.ll | 4 +- test/Verifier/llvm.dbg.value-variable.ll | 2 +- test/Verifier/metadata-function-dbg.ll | 23 + test/Verifier/operand-bundles.ll | 49 + test/Verifier/statepoint.ll | 31 +- test/Verifier/token1.ll | 11 + test/Verifier/token2.ll | 11 + test/Verifier/token3.ll | 8 + test/Verifier/token4.ll | 4 + test/Verifier/token5.ll | 7 + test/Verifier/token6.ll | 7 + test/Verifier/token7.ll | 8 + test/lit.cfg | 92 +- test/lit.site.cfg.in | 4 + .../dsymutil/ARM/dummy-debug-map-amr64.map | 15 + test/tools/dsymutil/ARM/empty-map.test | 8 + test/tools/dsymutil/ARM/fat-arch-name.test | 21 + .../dsymutil/ARM/fat-arch-not-found.test | 13 + test/tools/dsymutil/ARM/inlined-low_pc.c | 15 + test/tools/dsymutil/ARM/lit.local.cfg | 7 + .../dsymutil/Inputs/absolute_sym.macho.i386 | Bin 0 -> 8592 bytes .../dsymutil/Inputs/absolute_sym.macho.i386.o | Bin 0 -> 2472 bytes test/tools/dsymutil/Inputs/basic.macho.i386 | Bin 0 -> 9080 bytes .../basic2-custom-linetable.macho.x86_64.o | Bin 0 -> 3144 bytes test/tools/dsymutil/Inputs/basic2.c | 6 + test/tools/dsymutil/Inputs/dead-stripped/1.o | Bin 0 -> 3200 bytes test/tools/dsymutil/Inputs/empty_range/1.o | Bin 0 -> 636 bytes test/tools/dsymutil/Inputs/fat-test.arm.dylib | Bin 0 -> 25180 bytes test/tools/dsymutil/Inputs/fat-test.arm.o | Bin 0 -> 50736 bytes test/tools/dsymutil/Inputs/fat-test.c | 28 + test/tools/dsymutil/Inputs/fat-test.dylib | Bin 0 -> 13012 bytes test/tools/dsymutil/Inputs/fat-test.o | Bin 0 -> 5000 bytes test/tools/dsymutil/Inputs/frame-dw2.ll | 16 +- test/tools/dsymutil/Inputs/frame-dw4.ll | 16 +- test/tools/dsymutil/Inputs/inlined-low_pc/1.o | Bin 0 -> 1960 bytes test/tools/dsymutil/Inputs/libfat-test.a | Bin 0 -> 5136 bytes test/tools/dsymutil/Inputs/mismatch/1.o | Bin 0 -> 1972 bytes .../dsymutil/Inputs/mismatch/mismatch.pcm | Bin 0 -> 24940 bytes test/tools/dsymutil/Inputs/modules/1.o | Bin 0 -> 2444 bytes test/tools/dsymutil/Inputs/modules/Bar.pcm | Bin 0 -> 25636 bytes test/tools/dsymutil/Inputs/modules/Foo.pcm | Bin 0 -> 26060 bytes .../dsymutil/Inputs/odr-anon-namespace/1.o | Bin 0 -> 2084 bytes .../dsymutil/Inputs/odr-anon-namespace/2.o | Bin 0 -> 2084 bytes .../dsymutil/Inputs/odr-member-functions/1.o | Bin 0 -> 2236 bytes .../dsymutil/Inputs/odr-member-functions/2.o | Bin 0 -> 2660 bytes .../dsymutil/Inputs/odr-member-functions/3.o | Bin 0 -> 2832 bytes test/tools/dsymutil/Inputs/odr-uniquing/1.o | Bin 0 -> 2544 bytes test/tools/dsymutil/Inputs/odr-uniquing/2.o | Bin 0 -> 2544 bytes test/tools/dsymutil/Inputs/submodules/1.o | Bin 0 -> 2232 bytes .../dsymutil/Inputs/submodules/Parent.pcm | Bin 0 -> 25260 bytes .../dsymutil/X86/basic-linking-bundle.test | 38 + .../tools/dsymutil/X86/basic-linking-x86.test | 13 +- .../X86/basic-lto-dw4-linking-x86.test | 3 +- .../dsymutil/X86/basic-lto-linking-x86.test | 7 +- .../tools/dsymutil/X86/custom-line-table.test | 40 + test/tools/dsymutil/X86/dead-stripped.cpp | 48 + test/tools/dsymutil/X86/dsym-companion.test | 339 + test/tools/dsymutil/X86/dummy-debug-map.map | 22 + test/tools/dsymutil/X86/empty_range.s | 61 + .../dsymutil/X86/fat-archive-input-i386.test | 16 + .../dsymutil/X86/fat-object-input-x86_64.test | 16 + .../X86/fat-object-input-x86_64h.test | 16 + test/tools/dsymutil/X86/frame-1.test | 4 +- test/tools/dsymutil/X86/frame-2.test | 4 +- test/tools/dsymutil/X86/lit.local.cfg | 2 + test/tools/dsymutil/X86/mismatch.m | 23 + test/tools/dsymutil/X86/modules.m | 117 + test/tools/dsymutil/X86/multiple-inputs.test | 31 + .../tools/dsymutil/X86/odr-anon-namespace.cpp | 65 + .../dsymutil/X86/odr-member-functions.cpp | 109 + test/tools/dsymutil/X86/odr-uniquing.cpp | 187 + test/tools/dsymutil/X86/submodules.m | 52 + test/tools/dsymutil/absolute_symbol.test | 16 + test/tools/dsymutil/arch-option.test | 39 + test/tools/dsymutil/archive-timestamp.test | 24 + test/tools/dsymutil/basic-linking.test | 7 +- test/tools/dsymutil/debug-map-parsing.test | 22 +- test/tools/dsymutil/dump-symtab.test | 44 + test/tools/dsymutil/fat-binary-output.test | 32 + .../dsymutil/yaml-object-address-rewrite.test | 10 +- test/tools/gold/Inputs/linkonce-weak.ll | 3 - test/tools/gold/{ => PowerPC}/lit.local.cfg | 1 - test/tools/gold/{ => PowerPC}/mtriple.ll | 0 test/tools/gold/{ => X86}/Inputs/alias-1.ll | 0 .../gold/X86/Inputs/available-externally.ll | 3 + test/tools/gold/{ => X86}/Inputs/bcsection.s | 0 test/tools/gold/{ => X86}/Inputs/comdat.ll | 10 +- test/tools/gold/X86/Inputs/comdat2.ll | 9 + test/tools/gold/{ => X86}/Inputs/common.ll | 0 test/tools/gold/X86/Inputs/ctors2.ll | 5 + .../tools/gold/{ => X86}/Inputs/drop-debug.bc | Bin test/tools/gold/X86/Inputs/drop-linkage.ll | 9 + test/tools/gold/{ => X86}/Inputs/invalid.bc | Bin .../{ => X86}/Inputs/linker-script.export | 0 test/tools/gold/X86/Inputs/linkonce-weak.ll | 19 + test/tools/gold/{ => X86}/Inputs/pr19901-1.ll | 0 .../tools/gold/X86/Inputs/resolve-to-alias.ll | 4 + test/tools/gold/X86/Inputs/thinlto.ll | 4 + test/tools/gold/X86/Inputs/type-merge.ll | 5 + test/tools/gold/X86/Inputs/type-merge2.ll | 5 + test/tools/gold/{ => X86}/Inputs/weak.ll | 0 test/tools/gold/{ => X86}/alias.ll | 2 +- test/tools/gold/X86/alias2.ll | 23 + .../available-externally.ll} | 16 +- test/tools/gold/{ => X86}/bad-alias.ll | 4 +- test/tools/gold/X86/bcsection.ll | 13 + test/tools/gold/{ => X86}/coff.ll | 0 test/tools/gold/X86/comdat.ll | 65 + test/tools/gold/X86/comdat2.ll | 19 + test/tools/gold/{ => X86}/common.ll | 0 test/tools/gold/X86/ctors.ll | 13 + test/tools/gold/X86/ctors2.ll | 14 + test/tools/gold/X86/disable-verify.ll | 25 + test/tools/gold/{ => X86}/drop-debug.ll | 0 test/tools/gold/X86/drop-linkage.ll | 14 + test/tools/gold/{ => X86}/emit-llvm.ll | 25 +- test/tools/gold/{ => X86}/invalid.ll | 0 test/tools/gold/{ => X86}/linker-script.ll | 0 test/tools/gold/X86/linkonce-weak.ll | 39 + test/tools/gold/X86/lit.local.cfg | 3 + .../tools/gold/{ => X86}/no-map-whole-file.ll | 0 test/tools/gold/{ => X86}/opt-level.ll | 0 test/tools/gold/X86/parallel.ll | 22 + test/tools/gold/{ => X86}/pr19901.ll | 0 test/tools/gold/X86/pr25907.ll | 28 + test/tools/gold/X86/pr25915.ll | 17 + test/tools/gold/{ => X86}/remarks.ll | 9 +- test/tools/gold/X86/resolve-to-alias.ll | 33 + test/tools/gold/{ => X86}/slp-vectorize.ll | 0 test/tools/gold/{ => X86}/stats.ll | 0 test/tools/gold/X86/thinlto.ll | 34 + test/tools/gold/X86/type-merge.ll | 24 + test/tools/gold/X86/type-merge2.ll | 26 + test/tools/gold/X86/unnamed-addr.ll | 14 + test/tools/gold/{ => X86}/vectorize.ll | 0 test/tools/gold/{ => X86}/weak.ll | 0 test/tools/gold/bcsection.ll | 11 - test/tools/gold/comdat.ll | 65 - test/tools/llvm-cxxdump/trivial.test | 3 + .../llvm-dwp/Inputs/simple/notypes/a.dwo | Bin 0 -> 1193 bytes .../llvm-dwp/Inputs/simple/notypes/b.dwo | Bin 0 -> 1241 bytes test/tools/llvm-dwp/Inputs/simple/types/a.dwo | Bin 0 -> 1369 bytes test/tools/llvm-dwp/Inputs/simple/types/b.dwo | Bin 0 -> 1409 bytes test/tools/llvm-dwp/Inputs/type_dedup/a.dwo | Bin 0 -> 1449 bytes test/tools/llvm-dwp/Inputs/type_dedup/b.dwo | Bin 0 -> 1449 bytes test/tools/llvm-dwp/X86/lit.local.cfg | 4 + test/tools/llvm-dwp/X86/simple.test | 98 + test/tools/llvm-dwp/X86/type_dedup.test | 35 + test/tools/llvm-lto/Inputs/thinlto.ll | 4 + test/tools/llvm-lto/thinlto.ll | 24 + test/tools/llvm-mc/basic.test | 3 + test/tools/llvm-mc/fatal_warnings.test | 4 + test/tools/llvm-mc/line_end_with_space.test | 1 - test/tools/llvm-mc/lit.local.cfg | 4 + test/tools/llvm-mc/no_warnings.test | 4 + test/tools/llvm-nm/X86/IRobj.test | 11 + .../llvm-nm/X86/Inputs/hello.obj.macho-x86_64 | Bin 0 -> 844 bytes .../llvm-nm/X86/Inputs/test.IRobj-x86_64 | Bin 0 -> 1168 bytes test/tools/llvm-nm/X86/externalonly.test | 4 + test/tools/llvm-nm/X86/groupingflags.test | 5 + test/tools/llvm-nm/X86/posixMachO.test | 7 + test/tools/llvm-nm/lit.local.cfg | 2 + .../AArch64/elf-aarch64-mapping-symbols.test | 30 + .../llvm-objdump/Inputs/eh_frame.macho-arm64 | Bin 0 -> 888 bytes test/tools/llvm-objdump/Inputs/libbogus1.a | 13 + test/tools/llvm-objdump/Inputs/libbogus2.a | 13 + test/tools/llvm-objdump/Inputs/libbogus3.a | 16 + .../llvm-objdump/Inputs/section-filter.obj | Bin 0 -> 441 bytes .../X86/Inputs/disassemble-data.obj | Bin 0 -> 254 bytes .../X86/Inputs/disassemble.dll.coff-i386 | Bin 0 -> 1536 bytes .../X86/Inputs/internal.exe.coff-x86_64 | Bin 0 -> 6144 bytes .../X86/Inputs/malformed-machos/00000031.a | Bin 0 -> 2768 bytes .../malformed-machos/mem-crup-0001.macho | Bin 0 -> 9248 bytes .../malformed-machos/mem-crup-0006.macho | Bin 0 -> 9248 bytes .../malformed-machos/mem-crup-0010.macho | Bin 0 -> 9248 bytes .../malformed-machos/mem-crup-0040.macho | Bin 0 -> 9248 bytes .../malformed-machos/mem-crup-0080.macho | Bin 0 -> 9166 bytes .../malformed-machos/mem-crup-0261.macho | Bin 0 -> 8752 bytes .../malformed-machos/mem-crup-0337.macho | Bin 0 -> 9248 bytes .../llvm-objdump/X86/coff-dis-internal.test | 3 + .../X86/coff-disassemble-export.test | 8 + .../llvm-objdump/X86/disassemble-data.test | 4 + .../llvm-objdump/X86/macho-symbol-table.test | 6 +- .../llvm-objdump/X86/malformed-machos.test | 41 + test/tools/llvm-objdump/eh_frame-arm64.test | 23 + .../llvm-objdump/malformed-archives.test | 20 + test/tools/llvm-objdump/section-filter.test | 7 + test/tools/llvm-pdbdump/regex-filter.test | 20 + .../tools/llvm-profdata/Inputs/basic.proftext | 19 + .../llvm-profdata/Inputs/c-general.profraw | Bin 1384 -> 1776 bytes .../llvm-profdata/Inputs/compat.profdata.v2 | Bin 0 -> 712 bytes .../Inputs/gcc-sample-profile.gcov | Bin 0 -> 1960 bytes .../llvm-profdata/Inputs/inline-samples.afdo | 20 + .../Inputs/overflow-instr.proftext | 6 + .../Inputs/overflow-sample.proftext | 7 + .../Inputs/sample-profile.proftext | 18 +- .../Inputs/text-format-errors.text.bin | 1 + .../llvm-profdata/Inputs/vp-malform.proftext | 42 + .../llvm-profdata/Inputs/vp-malform2.proftext | 32 + .../llvm-profdata/Inputs/vp-truncate.proftext | 36 + .../Inputs/weight-instr-bar.profdata | Bin 0 -> 1320 bytes .../Inputs/weight-instr-foo.profdata | Bin 0 -> 1320 bytes .../Inputs/weight-sample-bar.proftext | 8 + .../Inputs/weight-sample-foo.proftext | 8 + test/tools/llvm-profdata/c-general.test | 6 +- test/tools/llvm-profdata/compat.proftext | 20 + .../llvm-profdata/count-mismatch.proftext | 3 +- .../gcc-gcov-sample-profile.test | 29 + test/tools/llvm-profdata/inline-samples.test | 30 + test/tools/llvm-profdata/overflow-instr.test | 17 + test/tools/llvm-profdata/overflow-sample.test | 43 + test/tools/llvm-profdata/overflow.proftext | 12 - test/tools/llvm-profdata/raw-32-bits-be.test | 13 +- test/tools/llvm-profdata/raw-32-bits-le.test | 13 +- test/tools/llvm-profdata/raw-64-bits-be.test | 13 +- test/tools/llvm-profdata/raw-64-bits-le.test | 13 +- .../raw-magic-but-no-header.test | 2 +- .../tools/llvm-profdata/raw-two-profiles.test | 31 +- .../llvm-profdata/sample-profile-basic.test | 10 +- test/tools/llvm-profdata/text-dump.test | 21 + .../llvm-profdata/text-format-errors.test | 29 +- test/tools/llvm-profdata/value-prof.proftext | 57 + test/tools/llvm-profdata/weight-instr.test | 69 + test/tools/llvm-profdata/weight-sample.test | 56 + test/tools/llvm-readobj/ARM/attribute-4.s | 7 + .../llvm-readobj/Inputs/gnuhash.so.elf-i386 | Bin 0 -> 1080 bytes .../llvm-readobj/Inputs/gnuhash.so.elf-ppc | Bin 0 -> 1204 bytes .../llvm-readobj/Inputs/gnuhash.so.elf-ppc64 | Bin 0 -> 1784 bytes .../llvm-readobj/Inputs/gnuhash.so.elf-x86_64 | Bin 0 -> 1616 bytes .../Inputs/mips-rld-map-rel.elf-mipsel | Bin 0 -> 2484 bytes .../Inputs/trivial.elf-amdhsa-kaveri | Bin 0 -> 13208 bytes .../llvm-readobj/Inputs/verdef.elf-x86-64 | Bin 0 -> 2256 bytes test/tools/llvm-readobj/amdgpu-elf-defs.test | 28 + test/tools/llvm-readobj/basic.test | 2 + .../llvm-readobj/codeview-linetables.test | 36 +- test/tools/llvm-readobj/elf-gnuhash.test | 63 + test/tools/llvm-readobj/elf-versioninfo.test | 81 + test/tools/llvm-readobj/file-headers.test | 18 +- test/tools/llvm-readobj/mips-rld-map-rel.test | 24 + test/tools/llvm-readobj/sections-ext.test | 7 + test/tools/llvm-readobj/sections.test | 7 + test/tools/llvm-size/basic.test | 2 + test/tools/llvm-split/alias.ll | 19 + test/tools/llvm-split/comdat.ll | 19 + test/tools/llvm-split/function.ll | 17 + test/tools/llvm-split/global.ll | 11 + test/tools/llvm-split/internal.ll | 17 + test/tools/llvm-split/unnamed.ll | 31 + test/tools/llvm-symbolizer/Inputs/addr.exe | Bin 0 -> 10109 bytes test/tools/llvm-symbolizer/Inputs/addr.inp | 1 + .../llvm-symbolizer/Inputs/coff-dwarf.cpp | 19 + .../llvm-symbolizer/Inputs/coff-dwarf.exe | Bin 0 -> 18944 bytes .../llvm-symbolizer/Inputs/coff-exports.cpp | 20 + .../llvm-symbolizer/Inputs/coff-exports.exe | Bin 0 -> 8192 bytes test/tools/llvm-symbolizer/coff-dwarf.test | 16 + test/tools/llvm-symbolizer/coff-exports.test | 20 + .../tools/llvm-symbolizer/pdb/Inputs/test.cpp | 7 + .../tools/llvm-symbolizer/pdb/Inputs/test.exe | Bin 165888 -> 126464 bytes .../llvm-symbolizer/pdb/Inputs/test.exe.input | 4 - .../tools/llvm-symbolizer/pdb/Inputs/test.pdb | Bin 1552384 -> 1626112 bytes test/tools/llvm-symbolizer/pdb/pdb.test | 63 +- test/tools/llvm-symbolizer/sym.test | 30 + test/tools/lto/opt-level.ll | 5 +- test/tools/sancov/Inputs/blacklist.txt | 1 + test/tools/sancov/Inputs/foo.cpp | 5 + test/tools/sancov/Inputs/test-linux_x86_64 | Bin 0 -> 2355767 bytes .../sancov/Inputs/test-linux_x86_64-1.sancov | Bin 0 -> 80 bytes .../sancov/Inputs/test-linux_x86_64.sancov | Bin 0 -> 64 bytes test/tools/sancov/Inputs/test.cpp | 19 + test/tools/sancov/blacklist.test | 5 + test/tools/sancov/covered_functions.test | 13 + test/tools/sancov/not_covered_functions.test | 8 + test/tools/sancov/print.test | 11 + tools/CMakeLists.txt | 95 +- tools/LLVMBuild.txt | 3 +- tools/Makefile | 5 +- tools/bugpoint-passes/TestPasses.cpp | 53 +- tools/bugpoint/BugDriver.cpp | 6 +- tools/bugpoint/BugDriver.h | 19 +- tools/bugpoint/CMakeLists.txt | 1 - tools/bugpoint/CrashDebugger.cpp | 265 +- tools/bugpoint/ExecutionDriver.cpp | 41 +- tools/bugpoint/ExtractFunction.cpp | 71 +- tools/bugpoint/ListReducer.h | 12 +- tools/bugpoint/Miscompilation.cpp | 277 +- tools/bugpoint/ToolRunner.cpp | 217 +- tools/bugpoint/ToolRunner.h | 50 +- tools/bugpoint/bugpoint.cpp | 10 +- tools/dsymutil/BinaryHolder.cpp | 172 +- tools/dsymutil/BinaryHolder.h | 98 +- tools/dsymutil/CMakeLists.txt | 1 + tools/dsymutil/DebugMap.cpp | 38 +- tools/dsymutil/DebugMap.h | 19 +- tools/dsymutil/DwarfLinker.cpp | 1700 +- tools/dsymutil/MachODebugMapParser.cpp | 323 +- tools/dsymutil/MachOUtils.cpp | 521 + tools/dsymutil/MachOUtils.h | 39 + tools/dsymutil/NonRelocatableStringpool.h | 68 + tools/dsymutil/dsymutil.cpp | 297 +- tools/dsymutil/dsymutil.h | 25 +- tools/gold/gold-plugin.cpp | 493 +- tools/llc/CMakeLists.txt | 1 + tools/llc/LLVMBuild.txt | 2 +- tools/llc/Makefile | 2 +- tools/llc/llc.cpp | 55 +- tools/lli/OrcLazyJIT.cpp | 62 +- tools/lli/OrcLazyJIT.h | 54 +- tools/lli/RemoteTarget.cpp | 4 +- tools/lli/lli.cpp | 5 +- tools/llvm-ar/CMakeLists.txt | 29 +- tools/llvm-ar/install_symlink.cmake | 31 - tools/llvm-ar/llvm-ar.cpp | 104 +- tools/llvm-as-fuzzer/CMakeLists.txt | 13 + tools/llvm-as-fuzzer/llvm-as-fuzzer.cpp | 76 + tools/llvm-as/llvm-as.cpp | 7 +- tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp | 67 +- tools/llvm-c-test/llvm-c-test.h | 4 +- tools/llvm-c-test/main.c | 18 +- tools/llvm-c-test/module.c | 38 +- tools/llvm-c-test/object.c | 1 + tools/llvm-config/BuildVariables.inc.in | 5 + tools/llvm-config/CMakeLists.txt | 8 + tools/llvm-config/Makefile | 18 + tools/llvm-config/llvm-config.cpp | 285 +- tools/llvm-cov/CoverageReport.cpp | 31 +- tools/llvm-cov/CoverageViewOptions.h | 1 + tools/llvm-cov/gcov.cpp | 6 +- tools/llvm-cxxdump/llvm-cxxdump.cpp | 104 +- tools/llvm-diff/DiffLog.cpp | 3 +- tools/llvm-diff/DiffLog.h | 10 +- tools/llvm-diff/DifferenceEngine.cpp | 4 +- tools/llvm-dis/llvm-dis.cpp | 2 +- tools/llvm-dwarfdump/llvm-dwarfdump.cpp | 107 +- tools/llvm-dwp/CMakeLists.txt | 13 + tools/llvm-dwp/LLVMBuild.txt | 23 + tools/llvm-dwp/Makefile | 18 + tools/llvm-dwp/llvm-dwp.cpp | 420 + tools/llvm-extract/llvm-extract.cpp | 53 +- tools/llvm-go/llvm-go.go | 91 +- tools/llvm-link/CMakeLists.txt | 1 + tools/llvm-link/LLVMBuild.txt | 2 +- tools/llvm-link/Makefile | 2 +- tools/llvm-link/llvm-link.cpp | 194 +- tools/llvm-lto/CMakeLists.txt | 3 + tools/llvm-lto/LLVMBuild.txt | 2 +- tools/llvm-lto/Makefile | 2 - tools/llvm-lto/llvm-lto.cpp | 269 +- tools/llvm-mc-fuzzer/CMakeLists.txt | 18 + tools/llvm-mc-fuzzer/llvm-mc-fuzzer.cpp | 138 + tools/llvm-mc/llvm-mc.cpp | 14 +- tools/llvm-nm/CMakeLists.txt | 4 +- tools/llvm-nm/llvm-nm.cpp | 258 +- tools/llvm-objdump/CMakeLists.txt | 5 +- tools/llvm-objdump/COFFDump.cpp | 85 +- tools/llvm-objdump/ELFDump.cpp | 32 +- tools/llvm-objdump/MachODump.cpp | 802 +- tools/llvm-objdump/llvm-objdump.cpp | 564 +- tools/llvm-objdump/llvm-objdump.h | 8 +- tools/llvm-pdbdump/BuiltinDumper.cpp | 66 +- tools/llvm-pdbdump/BuiltinDumper.h | 2 + tools/llvm-pdbdump/LinePrinter.cpp | 104 +- tools/llvm-pdbdump/LinePrinter.h | 13 +- tools/llvm-pdbdump/llvm-pdbdump.cpp | 299 +- tools/llvm-pdbdump/llvm-pdbdump.h | 3 + tools/llvm-profdata/llvm-profdata.cpp | 269 +- tools/llvm-readobj/ARMAttributeParser.cpp | 72 +- tools/llvm-readobj/ARMEHABIPrinter.h | 60 +- tools/llvm-readobj/ARMWinEHPrinter.cpp | 5 +- tools/llvm-readobj/CMakeLists.txt | 1 + tools/llvm-readobj/COFFDumper.cpp | 120 +- tools/llvm-readobj/COFFImportDumper.cpp | 52 + tools/llvm-readobj/ELFDumper.cpp | 903 +- tools/llvm-readobj/MachODumper.cpp | 240 +- tools/llvm-readobj/ObjDumper.h | 19 +- tools/llvm-readobj/StreamWriter.h | 32 +- tools/llvm-readobj/Win64EHDumper.cpp | 21 +- tools/llvm-readobj/llvm-readobj.cpp | 147 +- tools/llvm-readobj/llvm-readobj.h | 4 +- tools/llvm-rtdyld/llvm-rtdyld.cpp | 235 +- tools/llvm-shlib/CMakeLists.txt | 144 +- tools/llvm-shlib/Makefile | 2 +- tools/llvm-size/llvm-size.cpp | 44 +- tools/llvm-split/CMakeLists.txt | 11 + .../{macho-dump => llvm-split}/LLVMBuild.txt | 6 +- tools/{macho-dump => llvm-split}/Makefile | 10 +- tools/llvm-split/llvm-split.cpp | 67 + tools/llvm-stress/CMakeLists.txt | 2 +- tools/llvm-stress/llvm-stress.cpp | 5 +- tools/llvm-symbolizer/CMakeLists.txt | 2 +- tools/llvm-symbolizer/LLVMSymbolize.cpp | 532 - tools/llvm-symbolizer/LLVMSymbolize.h | 144 - tools/llvm-symbolizer/Makefile | 2 +- tools/llvm-symbolizer/llvm-symbolizer.cpp | 50 +- tools/lto/CMakeLists.txt | 10 + tools/lto/lto.cpp | 133 +- tools/macho-dump/CMakeLists.txt | 8 - tools/macho-dump/macho-dump.cpp | 434 - tools/obj2yaml/elf2yaml.cpp | 79 +- tools/obj2yaml/obj2yaml.cpp | 3 - tools/opt/CMakeLists.txt | 1 - tools/opt/opt.cpp | 56 +- tools/sancov/CMakeLists.txt | 17 + tools/sancov/Makefile | 18 + tools/sancov/sancov.cc | 533 + .../verify-uselistorder.cpp | 1 + tools/xcode-toolchain/CMakeLists.txt | 72 + tools/yaml2obj/yaml2elf.cpp | 16 +- unittests/ADT/APFloatTest.cpp | 16 +- unittests/ADT/APIntTest.cpp | 7 + unittests/ADT/ArrayRefTest.cpp | 31 +- unittests/ADT/BitVectorTest.cpp | 4 +- unittests/ADT/CMakeLists.txt | 1 + unittests/ADT/DenseMapTest.cpp | 16 + unittests/ADT/FoldingSet.cpp | 4 +- unittests/ADT/OptionalTest.cpp | 13 + unittests/ADT/PackedVectorTest.cpp | 12 - unittests/ADT/PointerIntPairTest.cpp | 25 +- unittests/ADT/RangeAdapterTest.cpp | 83 + unittests/ADT/SmallStringTest.cpp | 11 + unittests/ADT/SparseBitVectorTest.cpp | 94 + unittests/ADT/StringRefTest.cpp | 53 + unittests/ADT/TripleTest.cpp | 115 +- unittests/ADT/ilistTest.cpp | 14 +- unittests/Analysis/AliasAnalysisTest.cpp | 252 +- unittests/Analysis/CFGTest.cpp | 2 +- unittests/Analysis/CMakeLists.txt | 2 +- unittests/Analysis/Makefile | 2 +- unittests/Analysis/MixedTBAATest.cpp | 3 +- unittests/Analysis/ScalarEvolutionTest.cpp | 35 +- unittests/Analysis/ValueTrackingTest.cpp | 189 + unittests/AsmParser/AsmParserTest.cpp | 88 + unittests/Bitcode/BitReaderTest.cpp | 75 +- .../ExecutionEngine/MCJIT/MCJITCAPITest.cpp | 6 +- .../MCJIT/MCJITMultipleModuleTest.cpp | 10 +- .../MCJIT/MCJITObjectCacheTest.cpp | 7 +- unittests/ExecutionEngine/MCJIT/MCJITTest.cpp | 12 +- .../MCJIT/MCJITTestAPICommon.h | 6 + .../ExecutionEngine/MCJIT/MCJITTestBase.h | 26 +- unittests/ExecutionEngine/Orc/CMakeLists.txt | 9 + .../Orc/CompileOnDemandLayerTest.cpp | 75 + .../Orc/GlobalMappingLayerTest.cpp | 55 + .../Orc/IndirectionUtilsTest.cpp | 2 +- .../Orc/ObjectLinkingLayerTest.cpp | 94 + .../Orc/ObjectTransformLayerTest.cpp | 27 - unittests/ExecutionEngine/Orc/OrcCAPITest.cpp | 160 + .../ExecutionEngine/Orc/OrcTestCommon.cpp | 2 + unittests/ExecutionEngine/Orc/OrcTestCommon.h | 171 +- unittests/IR/CMakeLists.txt | 1 - unittests/IR/ConstantRangeTest.cpp | 52 + unittests/IR/ConstantsTest.cpp | 73 +- unittests/IR/DominatorTreeTest.cpp | 28 +- unittests/IR/IRBuilderTest.cpp | 38 +- unittests/IR/LegacyPassManagerTest.cpp | 2 +- unittests/IR/Makefile | 2 +- unittests/IR/MetadataTest.cpp | 323 +- unittests/IR/TypesTest.cpp | 8 + unittests/IR/UserTest.cpp | 24 + unittests/IR/ValueHandleTest.cpp | 12 +- unittests/IR/ValueTest.cpp | 70 +- unittests/IR/VerifierTest.cpp | 47 + unittests/Linker/LinkModulesTest.cpp | 127 +- unittests/MC/StringTableBuilderTest.cpp | 8 +- unittests/Option/OptionParsingTest.cpp | 2 +- unittests/ProfileData/CMakeLists.txt | 1 + unittests/ProfileData/CoverageMappingTest.cpp | 12 +- unittests/ProfileData/InstrProfTest.cpp | 496 +- unittests/ProfileData/SampleProfTest.cpp | 132 + unittests/Support/AlignOfTest.cpp | 20 + unittests/Support/BlockFrequencyTest.cpp | 18 +- unittests/Support/BranchProbabilityTest.cpp | 236 +- unittests/Support/CMakeLists.txt | 4 + unittests/Support/CommandLineTest.cpp | 14 +- unittests/Support/EndianTest.cpp | 132 + unittests/Support/FileOutputBufferTest.cpp | 25 +- unittests/Support/MathExtrasTest.cpp | 119 + unittests/Support/MemoryBufferTest.cpp | 11 +- unittests/Support/Path.cpp | 206 +- unittests/Support/ProgramTest.cpp | 108 +- unittests/Support/ReplaceFileTest.cpp | 113 + unittests/Support/ThreadPool.cpp | 168 + unittests/Support/TimerTest.cpp | 65 + unittests/Support/TrailingObjectsTest.cpp | 195 + unittests/Transforms/Utils/Cloning.cpp | 43 +- .../Transforms/Utils/IntegerDivision.cpp | 32 +- .../Transforms/Utils/ValueMapperTest.cpp | 31 + utils/FileCheck/FileCheck.cpp | 44 +- utils/PerfectShuffle/PerfectShuffle.cpp | 6 +- utils/TableGen/AsmMatcherEmitter.cpp | 372 +- utils/TableGen/AsmWriterEmitter.cpp | 109 +- utils/TableGen/Attributes.cpp | 156 + utils/TableGen/CMakeLists.txt | 1 + utils/TableGen/CallingConvEmitter.cpp | 8 +- utils/TableGen/CodeGenDAGPatterns.cpp | 613 +- utils/TableGen/CodeGenDAGPatterns.h | 16 +- utils/TableGen/CodeGenInstruction.cpp | 1 + utils/TableGen/CodeGenIntrinsics.h | 5 +- utils/TableGen/CodeGenMapTable.cpp | 2 +- utils/TableGen/CodeGenRegisters.cpp | 31 +- utils/TableGen/CodeGenSchedule.cpp | 91 +- utils/TableGen/CodeGenSchedule.h | 49 +- utils/TableGen/CodeGenTarget.cpp | 12 +- utils/TableGen/CodeGenTarget.h | 2 +- utils/TableGen/DAGISelMatcherEmitter.cpp | 36 +- utils/TableGen/DFAPacketizerEmitter.cpp | 710 +- utils/TableGen/DisassemblerEmitter.cpp | 5 +- utils/TableGen/FixedLenDecoderEmitter.cpp | 189 +- utils/TableGen/InstrInfoEmitter.cpp | 50 +- utils/TableGen/IntrinsicEmitter.cpp | 136 +- utils/TableGen/OptParserEmitter.cpp | 20 +- utils/TableGen/RegisterInfoEmitter.cpp | 41 +- utils/TableGen/SubtargetEmitter.cpp | 17 +- utils/TableGen/TableGen.cpp | 8 +- utils/TableGen/TableGenBackends.h | 1 + utils/TableGen/X86RecognizableInstr.cpp | 17 +- utils/emacs/llvm-mode.el | 97 +- utils/lit/TODO | 12 +- utils/lit/lit/LitConfig.py | 36 +- utils/lit/lit/Test.py | 4 +- utils/lit/lit/TestRunner.py | 308 +- utils/lit/lit/TestingConfig.py | 2 +- utils/lit/lit/formats/googletest.py | 11 +- utils/lit/lit/main.py | 72 +- utils/lit/lit/util.py | 100 +- .../googletest-timeout/DummySubDir/OneTest | 35 + .../tests/Inputs/googletest-timeout/lit.cfg | 9 + .../Inputs/shtest-timeout/infinite_loop.py | 10 + utils/lit/tests/Inputs/shtest-timeout/lit.cfg | 32 + .../Inputs/shtest-timeout/quick_then_slow.py | 24 + .../lit/tests/Inputs/shtest-timeout/short.py | 6 + utils/lit/tests/Inputs/shtest-timeout/slow.py | 9 + utils/lit/tests/discovery.py | 8 +- utils/lit/tests/googletest-timeout.py | 29 + utils/lit/tests/lit.cfg | 9 + utils/lit/tests/shtest-timeout.py | 116 + utils/llvm-build/llvmbuild/main.py | 98 +- utils/release/build_llvm_package.bat | 93 + utils/release/merge.sh | 23 +- utils/release/test-release.sh | 77 +- utils/schedcover.py | 77 + utils/shuffle_fuzz.py | 4 +- .../googletest/src/gtest-internal-inl.h | 58 +- utils/update_llc_test_checks.py | 8 +- utils/vim/ftplugin/llvm.vim | 1 + utils/vim/syntax/llvm.vim | 21 +- 6088 files changed, 431649 insertions(+), 132043 deletions(-) delete mode 100644 autoconf/m4/rand48.m4 create mode 100644 cmake/dummy.cpp create mode 100644 cmake/modules/DetermineGCCCompatible.cmake create mode 100644 cmake/modules/LLVMExternalProjectUtils.cmake create mode 100644 cmake/modules/LLVMInstallSymlink.cmake create mode 100644 docs/CommandGuide/llvm-lib.rst create mode 100644 docs/CompileCudaWithLLVM.rst create mode 100644 docs/MIRLangRef.rst create mode 100644 docs/_ocamldoc/style.css create mode 100644 examples/Kaleidoscope/include/KaleidoscopeJIT.h create mode 100644 include/llvm-c/ErrorHandling.h create mode 100644 include/llvm-c/OrcBindings.h create mode 100644 include/llvm-c/Types.h create mode 100644 include/llvm/Analysis/BasicAliasAnalysis.h create mode 100644 include/llvm/Analysis/CFLAliasAnalysis.h create mode 100644 include/llvm/Analysis/DemandedBits.h create mode 100644 include/llvm/Analysis/DivergenceAnalysis.h create mode 100644 include/llvm/Analysis/EHPersonalities.h create mode 100644 include/llvm/Analysis/GlobalsModRef.h delete mode 100644 include/llvm/Analysis/LibCallAliasAnalysis.h delete mode 100644 include/llvm/Analysis/LibCallSemantics.h create mode 100644 include/llvm/Analysis/ObjCARCAliasAnalysis.h create mode 100644 include/llvm/Analysis/ObjCARCAnalysisUtils.h rename lib/Transforms/ObjCARC/ARCInstKind.h => include/llvm/Analysis/ObjCARCInstKind.h (94%) create mode 100644 include/llvm/Analysis/OrderedBasicBlock.h create mode 100644 include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h create mode 100644 include/llvm/Analysis/ScopedNoAliasAA.h create mode 100644 include/llvm/Analysis/TypeBasedAliasAnalysis.h create mode 100644 include/llvm/CodeGen/AtomicExpandUtils.h create mode 100644 include/llvm/CodeGen/ParallelCG.h create mode 100644 include/llvm/DebugInfo/CodeView/CodeView.h create mode 100644 include/llvm/DebugInfo/CodeView/CodeViewOStream.h create mode 100644 include/llvm/DebugInfo/CodeView/FieldListRecordBuilder.h create mode 100644 include/llvm/DebugInfo/CodeView/FunctionId.h create mode 100644 include/llvm/DebugInfo/CodeView/Line.h create mode 100644 include/llvm/DebugInfo/CodeView/ListRecordBuilder.h create mode 100644 include/llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h create mode 100644 include/llvm/DebugInfo/CodeView/MethodListRecordBuilder.h create mode 100644 include/llvm/DebugInfo/CodeView/TypeIndex.h create mode 100644 include/llvm/DebugInfo/CodeView/TypeRecord.h create mode 100644 include/llvm/DebugInfo/CodeView/TypeRecordBuilder.h create mode 100644 include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h create mode 100644 include/llvm/DebugInfo/CodeView/TypeTableBuilder.h create mode 100644 include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h create mode 100644 include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h create mode 100644 include/llvm/DebugInfo/Symbolize/DIPrinter.h create mode 100644 include/llvm/DebugInfo/Symbolize/SymbolizableModule.h create mode 100644 include/llvm/DebugInfo/Symbolize/Symbolize.h create mode 100644 include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h create mode 100644 include/llvm/IR/Attributes.td create mode 100644 include/llvm/IR/FunctionInfo.h delete mode 100644 include/llvm/IR/MetadataTracking.h create mode 100644 include/llvm/Linker/IRMover.h create mode 100644 include/llvm/MC/MCFragment.h create mode 100644 include/llvm/Object/COFFImportFile.h create mode 100644 include/llvm/Object/FunctionIndexObjectFile.h create mode 100644 include/llvm/ProfileData/InstrProfData.inc create mode 100644 include/llvm/Support/ARMTargetParser.def create mode 100644 include/llvm/Support/ELFRelocs/AVR.def create mode 100644 include/llvm/Support/JamCRC.h delete mode 100644 include/llvm/Support/OutputBuffer.h create mode 100644 include/llvm/Support/Printable.h create mode 100644 include/llvm/Support/ThreadPool.h create mode 100644 include/llvm/Support/TrailingObjects.h create mode 100644 include/llvm/Support/thread.h create mode 100644 include/llvm/Transforms/IPO/ForceFunctionAttrs.h create mode 100644 include/llvm/Transforms/IPO/FunctionImport.h create mode 100644 include/llvm/Transforms/IPO/InferFunctionAttrs.h create mode 100644 include/llvm/Transforms/IPO/StripDeadPrototypes.h create mode 100644 include/llvm/Transforms/Scalar/ADCE.h create mode 100644 include/llvm/Transforms/Scalar/SROA.h create mode 100644 include/llvm/Transforms/Utils/SplitModule.h delete mode 100644 lib/Analysis/AliasAnalysisCounter.cpp delete mode 100644 lib/Analysis/AliasDebugger.cpp rename lib/Analysis/{IPA => }/CallGraph.cpp (91%) rename lib/Analysis/{IPA => }/CallGraphSCCPass.cpp (100%) rename lib/Analysis/{IPA => }/CallPrinter.cpp (100%) create mode 100644 lib/Analysis/DemandedBits.cpp create mode 100644 lib/Analysis/EHPersonalities.cpp create mode 100644 lib/Analysis/GlobalsModRef.cpp delete mode 100644 lib/Analysis/IPA/CMakeLists.txt delete mode 100644 lib/Analysis/IPA/GlobalsModRef.cpp delete mode 100644 lib/Analysis/IPA/IPA.cpp rename lib/Analysis/{IPA => }/InlineCost.cpp (95%) delete mode 100644 lib/Analysis/LibCallAliasAnalysis.cpp delete mode 100644 lib/Analysis/LibCallSemantics.cpp delete mode 100644 lib/Analysis/NoAliasAnalysis.cpp rename lib/{Transforms/ObjCARC => Analysis}/ObjCARCAliasAnalysis.cpp (53%) create mode 100644 lib/Analysis/ObjCARCAnalysisUtils.cpp rename lib/{Transforms/ObjCARC/ARCInstKind.cpp => Analysis/ObjCARCInstKind.cpp} (99%) create mode 100644 lib/Analysis/OrderedBasicBlock.cpp create mode 100644 lib/CodeGen/FuncletLayout.cpp create mode 100644 lib/CodeGen/LiveDebugValues.cpp create mode 100644 lib/CodeGen/ParallelCG.cpp create mode 100644 lib/DebugInfo/CodeView/CMakeLists.txt create mode 100644 lib/DebugInfo/CodeView/FieldListRecordBuilder.cpp rename lib/{Analysis/IPA => DebugInfo/CodeView}/LLVMBuild.txt (78%) create mode 100644 lib/DebugInfo/CodeView/Line.cpp create mode 100644 lib/DebugInfo/CodeView/ListRecordBuilder.cpp create mode 100644 lib/DebugInfo/CodeView/Makefile create mode 100644 lib/DebugInfo/CodeView/MemoryTypeTableBuilder.cpp create mode 100644 lib/DebugInfo/CodeView/MethodListRecordBuilder.cpp create mode 100644 lib/DebugInfo/CodeView/TypeRecordBuilder.cpp create mode 100644 lib/DebugInfo/CodeView/TypeTableBuilder.cpp create mode 100644 lib/DebugInfo/DWARF/DWARFDebugMacro.cpp create mode 100644 lib/DebugInfo/DWARF/DWARFUnitIndex.cpp create mode 100644 lib/DebugInfo/Symbolize/CMakeLists.txt create mode 100644 lib/DebugInfo/Symbolize/DIPrinter.cpp create mode 100644 lib/DebugInfo/Symbolize/LLVMBuild.txt rename lib/{Analysis/IPA => DebugInfo/Symbolize}/Makefile (74%) create mode 100644 lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp create mode 100644 lib/DebugInfo/Symbolize/SymbolizableObjectFile.h create mode 100644 lib/DebugInfo/Symbolize/Symbolize.cpp create mode 100644 lib/ExecutionEngine/Orc/OrcCBindings.cpp create mode 100644 lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp create mode 100644 lib/ExecutionEngine/Orc/OrcCBindingsStack.h create mode 100644 lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h create mode 100644 lib/Fuzzer/FuzzerDFSan.h create mode 100644 lib/Fuzzer/cxx.dict delete mode 100644 lib/Fuzzer/cxx_fuzzer_tokens.txt create mode 100644 lib/Fuzzer/test/CallerCalleeTest.cpp delete mode 100644 lib/Fuzzer/test/CxxTokensTest.cpp delete mode 100644 lib/Fuzzer/test/DFSanMemcmpTest.cpp delete mode 100644 lib/Fuzzer/test/InfiniteTest.cpp create mode 100644 lib/Fuzzer/test/MemcmpTest.cpp rename lib/Fuzzer/test/{DFSanSimpleCmpTest.cpp => SimpleCmpTest.cpp} (85%) create mode 100644 lib/Fuzzer/test/SimpleDictionaryTest.cpp create mode 100644 lib/Fuzzer/test/SimpleHashTest.cpp create mode 100644 lib/Fuzzer/test/StrcmpTest.cpp create mode 100644 lib/Fuzzer/test/StrncmpTest.cpp create mode 100644 lib/Fuzzer/test/SwitchTest.cpp create mode 100644 lib/Fuzzer/test/UninstrumentedTest.cpp create mode 100644 lib/Fuzzer/test/dict1.txt create mode 100644 lib/Fuzzer/test/fuzzer-dfsan.test create mode 100644 lib/Fuzzer/test/fuzzer-drill.test create mode 100644 lib/Fuzzer/test/fuzzer-timeout.test create mode 100644 lib/Fuzzer/test/fuzzer-traces.test create mode 100644 lib/Fuzzer/test/hi.txt create mode 100644 lib/Fuzzer/test/merge.test create mode 100644 lib/Fuzzer/test/trace-bb/CMakeLists.txt create mode 100644 lib/Fuzzer/test/uninstrumented/CMakeLists.txt create mode 100644 lib/IR/AttributesCompatFunc.td create mode 100644 lib/IR/FunctionInfo.cpp delete mode 100644 lib/IR/MetadataTracking.cpp create mode 100644 lib/Linker/IRMover.cpp create mode 100644 lib/Linker/LinkDiagnosticInfo.h create mode 100644 lib/MC/MCFragment.cpp create mode 100644 lib/Object/FunctionIndexObjectFile.cpp delete mode 100644 lib/ProfileData/InstrProfIndexed.h create mode 100644 lib/Support/JamCRC.cpp create mode 100644 lib/Support/ThreadPool.cpp delete mode 100644 lib/Target/AArch64/AArch64MachineCombinerPattern.h create mode 100644 lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp create mode 100644 lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp create mode 100644 lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.cpp create mode 100644 lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.h create mode 100644 lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp create mode 100644 lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp create mode 100644 lib/Target/AMDGPU/AMDGPUTargetObjectFile.h create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h create mode 100644 lib/Target/AMDGPU/SIFrameLowering.cpp create mode 100644 lib/Target/AMDGPU/SIFrameLowering.h delete mode 100644 lib/Target/AMDGPU/SIPrepareScratchRegs.cpp create mode 100644 lib/Target/AVR/AVR.td create mode 100644 lib/Target/AVR/AVRCallingConv.td create mode 100644 lib/Target/AVR/AVRConfig.h create mode 100644 lib/Target/AVR/AVRMachineFunctionInfo.h create mode 100644 lib/Target/AVR/AVRRegisterInfo.td create mode 100644 lib/Target/AVR/AVRTargetMachine.cpp create mode 100644 lib/Target/AVR/CMakeLists.txt create mode 100644 lib/Target/AVR/LLVMBuild.txt create mode 100644 lib/Target/AVR/Makefile create mode 100644 lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp create mode 100644 lib/Target/AVR/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/AVR/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/AVR/TargetInfo/Makefile create mode 100644 lib/Target/Hexagon/AsmParser/CMakeLists.txt create mode 100644 lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp create mode 100644 lib/Target/Hexagon/AsmParser/LLVMBuild.txt create mode 100644 lib/Target/Hexagon/AsmParser/Makefile create mode 100644 lib/Target/Hexagon/HexagonBitSimplify.cpp create mode 100644 lib/Target/Hexagon/HexagonEarlyIfConv.cpp create mode 100644 lib/Target/Hexagon/HexagonGenMux.cpp create mode 100644 lib/Target/Hexagon/HexagonInstrAlias.td create mode 100644 lib/Target/Hexagon/HexagonInstrEnc.td create mode 100644 lib/Target/Hexagon/HexagonInstrFormatsV60.td create mode 100644 lib/Target/Hexagon/HexagonInstrInfoV60.td create mode 100644 lib/Target/Hexagon/HexagonIntrinsicsV60.td create mode 100644 lib/Target/Hexagon/HexagonOptimizeSZextends.cpp delete mode 100644 lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp create mode 100644 lib/Target/Hexagon/HexagonScheduleV55.td create mode 100644 lib/Target/Hexagon/HexagonScheduleV60.td create mode 100644 lib/Target/Hexagon/HexagonSplitDouble.cpp create mode 100644 lib/Target/Hexagon/HexagonStoreWidening.cpp create mode 100644 lib/Target/Hexagon/HexagonTargetTransformInfo.cpp create mode 100644 lib/Target/Hexagon/HexagonTargetTransformInfo.h create mode 100644 lib/Target/Hexagon/HexagonVLIWPacketizer.h create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h create mode 100644 lib/Target/Mips/MicroMips64r6InstrFormats.td create mode 100644 lib/Target/Mips/MicroMips64r6InstrInfo.td create mode 100644 lib/Target/Mips/MicroMipsDSPInstrFormats.td create mode 100644 lib/Target/Mips/MicroMipsDSPInstrInfo.td create mode 100644 lib/Target/Mips/MipsEVAInstrFormats.td create mode 100644 lib/Target/Mips/MipsEVAInstrInfo.td create mode 100644 lib/Target/Mips/MipsScheduleP5600.td create mode 100644 lib/Target/PowerPC/PPCBoolRetToInt.cpp create mode 100644 lib/Target/PowerPC/PPCMIPeephole.cpp create mode 100644 lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp create mode 100644 lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp create mode 100644 lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp create mode 100644 lib/Target/WebAssembly/Relooper.cpp create mode 100644 lib/Target/WebAssembly/Relooper.h create mode 100644 lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp create mode 100644 lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp create mode 100644 lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp create mode 100644 lib/Target/WebAssembly/WebAssemblyFastISel.cpp create mode 100644 lib/Target/WebAssembly/WebAssemblyISD.def create mode 100644 lib/Target/WebAssembly/WebAssemblyInstrControl.td create mode 100644 lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp create mode 100644 lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp create mode 100644 lib/Target/WebAssembly/WebAssemblyMCInstLower.h create mode 100644 lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp create mode 100644 lib/Target/WebAssembly/WebAssemblyPEI.cpp create mode 100644 lib/Target/WebAssembly/WebAssemblyPeephole.cpp create mode 100644 lib/Target/WebAssembly/WebAssemblyRegColoring.cpp create mode 100644 lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp create mode 100644 lib/Target/WebAssembly/WebAssemblyRegStackify.cpp create mode 100644 lib/Target/WebAssembly/WebAssemblyStoreResults.cpp create mode 100644 lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp create mode 100644 lib/Target/WebAssembly/known_gcc_test_failures.txt delete mode 100644 lib/Target/X86/X86CompilationCallback_Win64.asm create mode 100644 lib/Target/X86/X86OptimizeLEAs.cpp create mode 100644 lib/Transforms/IPO/CrossDSOCFI.cpp create mode 100644 lib/Transforms/IPO/ForceFunctionAttrs.cpp create mode 100644 lib/Transforms/IPO/FunctionImport.cpp create mode 100644 lib/Transforms/IPO/InferFunctionAttrs.cpp rename lib/Transforms/{Scalar => IPO}/SampleProfile.cpp (50%) create mode 100644 lib/Transforms/Instrumentation/CFGMST.h create mode 100644 lib/Transforms/Instrumentation/PGOInstrumentation.cpp delete mode 100644 lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h create mode 100644 lib/Transforms/Scalar/LoopLoadElimination.cpp create mode 100644 lib/Transforms/Utils/SplitModule.cpp create mode 100644 test/Analysis/BasicAA/bug.23540.ll create mode 100644 test/Analysis/BasicAA/bug.23626.ll create mode 100644 test/Analysis/BasicAA/phi-loop.ll create mode 100644 test/Analysis/BasicAA/q.bad.ll create mode 100644 test/Analysis/BasicAA/sequential-gep.ll create mode 100644 test/Analysis/BasicAA/zext.ll create mode 100644 test/Analysis/BlockFrequencyInfo/irreducible_loop_crash.ll create mode 100644 test/Analysis/CFLAliasAnalysis/opaque-call-alias.ll create mode 100644 test/Analysis/CostModel/AMDGPU/br.ll create mode 100644 test/Analysis/CostModel/AMDGPU/extractelement.ll create mode 100644 test/Analysis/CostModel/AMDGPU/lit.local.cfg create mode 100644 test/Analysis/CostModel/PowerPC/unal-vec-ldst.ll create mode 100644 test/Analysis/CostModel/X86/sse-itoi.ll create mode 100644 test/Analysis/CostModel/X86/vector_gep.ll create mode 100644 test/Analysis/CostModel/X86/vshift-ashr-cost.ll delete mode 100644 test/Analysis/CostModel/X86/vshift-cost.ll create mode 100644 test/Analysis/CostModel/X86/vshift-lshr-cost.ll create mode 100644 test/Analysis/CostModel/X86/vshift-shl-cost.ll create mode 100644 test/Analysis/Delinearization/parameter_addrec_product.ll create mode 100644 test/Analysis/DemandedBits/basic.ll create mode 100644 test/Analysis/DependenceAnalysis/NonAffineExpr.ll create mode 100644 test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll create mode 100644 test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg create mode 100644 test/Analysis/GlobalsModRef/argmemonly-escape.ll create mode 100644 test/Analysis/GlobalsModRef/atomic-instrs.ll create mode 100644 test/Analysis/GlobalsModRef/memset-escape.ll create mode 100644 test/Analysis/GlobalsModRef/nocapture.ll create mode 100644 test/Analysis/GlobalsModRef/nonescaping-noalias.ll create mode 100644 test/Analysis/GlobalsModRef/pr25309.ll create mode 100644 test/Analysis/GlobalsModRef/weak-interposition.ll delete mode 100644 test/Analysis/Lint/cppeh-catch-intrinsics.ll create mode 100644 test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll create mode 100644 test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll create mode 100644 test/Analysis/LoopAccessAnalysis/nullptr.ll create mode 100644 test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll create mode 100644 test/Analysis/MemoryDependenceAnalysis/memdep-block-scan-limit.ll create mode 100644 test/Analysis/ScalarEvolution/avoid-assume-hang.ll create mode 100644 test/Analysis/ScalarEvolution/constant_condition.ll create mode 100644 test/Analysis/ScalarEvolution/flags-from-poison.ll create mode 100644 test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll create mode 100644 test/Analysis/ScalarEvolution/non-IV-phi.ll create mode 100644 test/Analysis/ScalarEvolution/pr24757.ll create mode 100644 test/Analysis/ScalarEvolution/pr25369.ll create mode 100644 test/Analysis/ScalarEvolution/shift-op.ll create mode 100644 test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll create mode 100644 test/Analysis/ValueTracking/known-bits-from-range-md.ll create mode 100644 test/Analysis/ValueTracking/known-non-equal.ll create mode 100644 test/Analysis/ValueTracking/knownnonzero-shift.ll create mode 100644 test/Analysis/ValueTracking/knownzero-shift.ll create mode 100644 test/Analysis/ValueTracking/monotonic-phi.ll create mode 100644 test/Analysis/ValueTracking/pr24866.ll create mode 100644 test/Assembler/incorrect-tdep-attrs-parsing.ll create mode 100644 test/Assembler/invalid-alias-mismatched-explicit-type.ll create mode 100644 test/Assembler/invalid-dicompileunit-uniqued.ll delete mode 100644 test/Assembler/invalid-dilocalvariable-missing-tag.ll create mode 100644 test/Assembler/invalid-disubprogram-uniqued-definition.ll create mode 100644 test/Assembler/invalid-fp80hex.ll create mode 100644 test/Assembler/invalid-inline-constraint.ll create mode 100644 test/Assembler/invalid-untyped-metadata.ll create mode 100644 test/Assembler/token.ll create mode 100644 test/Bitcode/DILocalVariable-explicit-tags.ll create mode 100644 test/Bitcode/DILocalVariable-explicit-tags.ll.bc create mode 100644 test/Bitcode/DISubprogram-distinct-definitions.ll create mode 100644 test/Bitcode/DISubprogram-distinct-definitions.ll.bc create mode 100644 test/Bitcode/Inputs/invalid-cast.bc create mode 100644 test/Bitcode/Inputs/invalid-name-with-0-byte.bc create mode 100644 test/Bitcode/Inputs/invalid-no-function-block.bc create mode 100644 test/Bitcode/anon-functions.ll create mode 100644 test/Bitcode/compatibility-3.6.ll create mode 100644 test/Bitcode/compatibility-3.6.ll.bc create mode 100644 test/Bitcode/compatibility-3.7.ll create mode 100644 test/Bitcode/compatibility-3.7.ll.bc create mode 100644 test/Bitcode/compatibility.ll create mode 100644 test/Bitcode/identification.ll create mode 100644 test/Bitcode/operand-bundles.ll create mode 100644 test/Bitcode/thinlto-function-summary.ll create mode 100644 test/Bitcode/upgrade-subprogram.ll create mode 100644 test/Bitcode/upgrade-subprogram.ll.bc create mode 100644 test/Bitcode/use-list-order2.ll create mode 100644 test/Bitcode/vst-forward-declaration.ll create mode 100644 test/BugPoint/named-md.ll create mode 100644 test/CodeGen/AArch64/aarch64-addv.ll create mode 100644 test/CodeGen/AArch64/aarch64-deferred-spilling.ll create mode 100644 test/CodeGen/AArch64/aarch64-loop-gep-opt.ll create mode 100644 test/CodeGen/AArch64/aarch64-minmaxv.ll create mode 100644 test/CodeGen/AArch64/aarch64-smax-constantfold.ll create mode 100644 test/CodeGen/AArch64/arm64-builtins-linux.ll create mode 100644 test/CodeGen/AArch64/arm64-coalescing-MOVi32imm.ll create mode 100644 test/CodeGen/AArch64/arm64-fmax-safe.ll create mode 100644 test/CodeGen/AArch64/arm64-ld-from-st.ll create mode 100644 test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll create mode 100644 test/CodeGen/AArch64/bitreverse.ll create mode 100644 test/CodeGen/AArch64/cxx-tlscc.ll create mode 100644 test/CodeGen/AArch64/dag-combine-select.ll create mode 100644 test/CodeGen/AArch64/divrem.ll create mode 100644 test/CodeGen/AArch64/emutls.ll create mode 100644 test/CodeGen/AArch64/emutls_generic.ll create mode 100644 test/CodeGen/AArch64/eon.ll create mode 100644 test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll create mode 100644 test/CodeGen/AArch64/fast-isel-cmp-vec.ll create mode 100644 test/CodeGen/AArch64/fast-isel-folded-shift.ll create mode 100644 test/CodeGen/AArch64/fcvt_combine.ll create mode 100644 test/CodeGen/AArch64/fdiv_combine.ll create mode 100644 test/CodeGen/AArch64/misched-fusion.ll create mode 100644 test/CodeGen/AArch64/nontemporal.ll create mode 100644 test/CodeGen/AArch64/readcyclecounter.ll create mode 100644 test/CodeGen/AArch64/rotate.ll create mode 100644 test/CodeGen/AArch64/round-conv.ll create mode 100755 test/CodeGen/AArch64/shrink-wrap.ll create mode 100644 test/CodeGen/AArch64/stackmap-frame-setup.ll create mode 100644 test/CodeGen/AArch64/tbi.ll create mode 100644 test/CodeGen/AArch64/vector-fcopysign.ll create mode 100644 test/CodeGen/AMDGPU/addrspacecast.ll create mode 100644 test/CodeGen/AMDGPU/annotate-kernel-features.ll create mode 100644 test/CodeGen/AMDGPU/bitreverse.ll create mode 100644 test/CodeGen/AMDGPU/calling-conventions.ll create mode 100644 test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll create mode 100644 test/CodeGen/AMDGPU/ci-use-flat-for-global.ll create mode 100644 test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll create mode 100644 test/CodeGen/AMDGPU/ds-sub-offset.ll create mode 100644 test/CodeGen/AMDGPU/dynamic_stackalloc.ll create mode 100644 test/CodeGen/AMDGPU/extract-vector-elt-i64.ll create mode 100644 test/CodeGen/AMDGPU/flat-scratch-reg.ll create mode 100644 test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll create mode 100644 test/CodeGen/AMDGPU/global-constant.ll create mode 100644 test/CodeGen/AMDGPU/hsa-globals.ll create mode 100644 test/CodeGen/AMDGPU/hsa-group-segment.ll create mode 100644 test/CodeGen/AMDGPU/image-attributes.ll create mode 100644 test/CodeGen/AMDGPU/image-resource-id.ll create mode 100644 test/CodeGen/AMDGPU/inline-constraints.ll create mode 100644 test/CodeGen/AMDGPU/large-alloca-compute.ll create mode 100644 test/CodeGen/AMDGPU/large-alloca-graphics.ll delete mode 100644 test/CodeGen/AMDGPU/large-alloca.ll delete mode 100644 test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll create mode 100644 test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll create mode 100644 test/CodeGen/AMDGPU/llvm.SI.packf16.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll create mode 100644 test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll create mode 100644 test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll create mode 100644 test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll create mode 100644 test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll create mode 100644 test/CodeGen/AMDGPU/opencl-image-metadata.ll create mode 100644 test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll create mode 100644 test/CodeGen/AMDGPU/sampler-resource-id.ll create mode 100644 test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll create mode 100644 test/CodeGen/AMDGPU/si-literal-folding.ll create mode 100644 test/CodeGen/AMDGPU/sminmax.ll create mode 100644 test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll create mode 100644 test/CodeGen/AMDGPU/store_typed.ll create mode 100644 test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll create mode 100644 test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll create mode 100644 test/CodeGen/ARM/MachO-subtypes.ll create mode 100644 test/CodeGen/ARM/Windows/division.ll rename test/CodeGen/ARM/Windows/{integer-floating-point-conversion.ll => libcalls.ll} (88%) create mode 100644 test/CodeGen/ARM/Windows/no-eabi.ll create mode 100644 test/CodeGen/ARM/Windows/no-frame-register.ll create mode 100644 test/CodeGen/ARM/Windows/overflow.ll create mode 100644 test/CodeGen/ARM/align-sp-adjustment.ll create mode 100644 test/CodeGen/ARM/apcs-vfp.ll create mode 100644 test/CodeGen/ARM/arm-eabi.ll create mode 100644 test/CodeGen/ARM/arm-shrink-wrapping-linux.ll create mode 100644 test/CodeGen/ARM/arm-shrink-wrapping.ll create mode 100644 test/CodeGen/ARM/build-attributes-optimization-minsize.ll create mode 100644 test/CodeGen/ARM/build-attributes-optimization-mixed.ll create mode 100644 test/CodeGen/ARM/build-attributes-optimization-optnone.ll create mode 100644 test/CodeGen/ARM/build-attributes-optimization-optsize.ll create mode 100644 test/CodeGen/ARM/build-attributes-optimization.ll create mode 100644 test/CodeGen/ARM/cfi-alignment.ll create mode 100644 test/CodeGen/ARM/combine-vmovdrr.ll create mode 100644 test/CodeGen/ARM/debugtrap.ll create mode 100644 test/CodeGen/ARM/emutls.ll create mode 100644 test/CodeGen/ARM/emutls1.ll create mode 100644 test/CodeGen/ARM/emutls_generic.ll create mode 100644 test/CodeGen/ARM/fp16-args.ll create mode 100644 test/CodeGen/ARM/gep-optimization.ll create mode 100644 test/CodeGen/ARM/global-merge-external.ll create mode 100644 test/CodeGen/ARM/ldm-stm-base-materialization.ll create mode 100644 test/CodeGen/ARM/legalize-unaligned-load.ll create mode 100644 test/CodeGen/ARM/memcpy-ldm-stm.ll create mode 100644 test/CodeGen/ARM/minmax.ll create mode 100644 test/CodeGen/ARM/neon_vshl_minint.ll create mode 100644 test/CodeGen/ARM/pr25317.ll create mode 100644 test/CodeGen/ARM/pr25838.ll create mode 100644 test/CodeGen/ARM/rotate.ll create mode 100644 test/CodeGen/ARM/sat-arith.ll delete mode 100644 test/CodeGen/ARM/sched-it-debug-nodes.ll create mode 100644 test/CodeGen/ARM/setjmp_longjmp.ll create mode 100644 test/CodeGen/ARM/softfp-fabs-fneg.ll create mode 100644 test/CodeGen/ARM/ssat-lower.ll create mode 100644 test/CodeGen/ARM/ssat-upper.ll create mode 100644 test/CodeGen/ARM/subtarget-no-movt.ll create mode 100644 test/CodeGen/ARM/thumb1-ldst-opt.ll create mode 100644 test/CodeGen/ARM/unaligned_load_store_vfp.ll create mode 100644 test/CodeGen/ARM/usat-lower.ll create mode 100644 test/CodeGen/ARM/usat-upper.ll create mode 100644 test/CodeGen/ARM/v7k-abi-align.ll create mode 100644 test/CodeGen/ARM/v7k-libcalls.ll create mode 100644 test/CodeGen/ARM/v7k-sincos.ll create mode 100644 test/CodeGen/ARM/vfp-reg-stride.ll create mode 100644 test/CodeGen/ARM/vld-vst-upgrade.ll create mode 100644 test/CodeGen/ARM/vminmaxnm-safe.ll create mode 100644 test/CodeGen/CPP/gep.ll create mode 100644 test/CodeGen/Generic/ForceStackAlign.ll create mode 100644 test/CodeGen/Generic/lit.local.cfg create mode 100644 test/CodeGen/Hexagon/NVJumpCmp.ll create mode 100644 test/CodeGen/Hexagon/bit-eval.ll create mode 100644 test/CodeGen/Hexagon/bit-loop.ll create mode 100644 test/CodeGen/Hexagon/cfi-late.ll create mode 100644 test/CodeGen/Hexagon/early-if-conversion-bug1.ll create mode 100644 test/CodeGen/Hexagon/early-if-phi-i1.ll create mode 100644 test/CodeGen/Hexagon/early-if-spare.ll create mode 100644 test/CodeGen/Hexagon/early-if.ll create mode 100644 test/CodeGen/Hexagon/ifcvt-edge-weight.ll create mode 100644 test/CodeGen/Hexagon/memcpy-likely-aligned.ll create mode 100644 test/CodeGen/Hexagon/mux-basic.ll create mode 100644 test/CodeGen/Hexagon/pic-jumptables.ll create mode 100644 test/CodeGen/Hexagon/pic-simple.ll create mode 100644 test/CodeGen/Hexagon/pic-static.ll create mode 100644 test/CodeGen/Hexagon/sdr-basic.ll create mode 100644 test/CodeGen/Hexagon/sdr-shr32.ll create mode 100644 test/CodeGen/Hexagon/store-widen-aliased-load.ll create mode 100644 test/CodeGen/Hexagon/store-widen-negv.ll create mode 100644 test/CodeGen/Hexagon/store-widen-negv2.ll create mode 100644 test/CodeGen/Hexagon/store-widen.ll create mode 100644 test/CodeGen/Hexagon/tail-dup-subreg-abort.ll create mode 100644 test/CodeGen/Hexagon/v60Intrins.ll create mode 100644 test/CodeGen/Hexagon/v60Vasr.ll create mode 100644 test/CodeGen/Hexagon/v60small.ll create mode 100644 test/CodeGen/MIR/AArch64/cfi-def-cfa.mir create mode 100644 test/CodeGen/MIR/AArch64/expected-target-flag-name.mir create mode 100644 test/CodeGen/MIR/AArch64/invalid-target-flag-name.mir create mode 100644 test/CodeGen/MIR/AArch64/lit.local.cfg create mode 100644 test/CodeGen/MIR/AArch64/multiple-lhs-operands.mir create mode 100644 test/CodeGen/MIR/AArch64/stack-object-local-offset.mir create mode 100644 test/CodeGen/MIR/AArch64/target-flags.mir create mode 100644 test/CodeGen/MIR/AMDGPU/expected-target-index-name.mir create mode 100644 test/CodeGen/MIR/AMDGPU/invalid-target-index-operand.mir create mode 100644 test/CodeGen/MIR/AMDGPU/lit.local.cfg create mode 100644 test/CodeGen/MIR/AMDGPU/target-index-operands.mir create mode 100644 test/CodeGen/MIR/ARM/ARMLoadStoreDBG.mir create mode 100644 test/CodeGen/MIR/ARM/bundled-instructions.mir create mode 100644 test/CodeGen/MIR/ARM/cfi-same-value.mir create mode 100644 test/CodeGen/MIR/ARM/expected-closing-brace.mir create mode 100644 test/CodeGen/MIR/ARM/extraneous-closing-brace-error.mir create mode 100644 test/CodeGen/MIR/ARM/lit.local.cfg create mode 100644 test/CodeGen/MIR/ARM/nested-instruction-bundle-error.mir create mode 100644 test/CodeGen/MIR/ARM/sched-it-debug-nodes.mir create mode 100644 test/CodeGen/MIR/Generic/basic-blocks.mir rename test/CodeGen/MIR/{machine-basic-block-redefinition-error.mir => Generic/expected-colon-after-basic-block.mir} (67%) rename test/CodeGen/MIR/{ => Generic}/expected-mbb-reference-for-successor-mbb.mir (56%) rename test/CodeGen/MIR/{ => Generic}/frame-info.mir (95%) rename test/CodeGen/MIR/{ => Generic}/function-missing-machine-function.mir (100%) create mode 100644 test/CodeGen/MIR/Generic/invalid-jump-table-kind.mir create mode 100644 test/CodeGen/MIR/Generic/lit.local.cfg rename test/CodeGen/MIR/{ => Generic}/llvm-ir-error-reported.mir (100%) rename test/CodeGen/MIR/{ => Generic}/llvmIR.mir (98%) rename test/CodeGen/MIR/{ => Generic}/llvmIRMissing.mir (92%) create mode 100644 test/CodeGen/MIR/Generic/machine-basic-block-ir-block-reference.mir create mode 100644 test/CodeGen/MIR/Generic/machine-basic-block-redefinition-error.mir create mode 100644 test/CodeGen/MIR/Generic/machine-basic-block-undefined-ir-block.mir rename test/CodeGen/MIR/{ => Generic}/machine-basic-block-unknown-name.mir (71%) rename test/CodeGen/MIR/{ => Generic}/machine-function-missing-body-error.mir (100%) rename test/CodeGen/MIR/{ => Generic}/machine-function-missing-function.mir (93%) rename test/CodeGen/MIR/{ => Generic}/machine-function-missing-name.mir (92%) rename test/CodeGen/MIR/{ => Generic}/machine-function-redefinition-error.mir (100%) rename test/CodeGen/MIR/{ => Generic}/machine-function.mir (94%) rename test/CodeGen/MIR/{ => Generic}/register-info.mir (95%) create mode 100644 test/CodeGen/MIR/Mips/expected-global-value-or-symbol-after-call-entry.mir create mode 100644 test/CodeGen/MIR/Mips/lit.local.cfg create mode 100644 test/CodeGen/MIR/Mips/memory-operands.mir create mode 100644 test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir create mode 100644 test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir create mode 100644 test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir create mode 100644 test/CodeGen/MIR/NVPTX/lit.local.cfg create mode 100644 test/CodeGen/MIR/PowerPC/lit.local.cfg create mode 100644 test/CodeGen/MIR/PowerPC/unordered-implicit-registers.mir create mode 100644 test/CodeGen/MIR/X86/basic-block-not-at-start-of-line-error.mir create mode 100644 test/CodeGen/MIR/X86/block-address-operands.mir create mode 100644 test/CodeGen/MIR/X86/callee-saved-info.mir create mode 100644 test/CodeGen/MIR/X86/cfi-def-cfa-offset.mir create mode 100644 test/CodeGen/MIR/X86/cfi-def-cfa-register.mir create mode 100644 test/CodeGen/MIR/X86/cfi-offset.mir create mode 100644 test/CodeGen/MIR/X86/constant-pool-item-redefinition-error.mir create mode 100644 test/CodeGen/MIR/X86/constant-pool.mir create mode 100644 test/CodeGen/MIR/X86/constant-value-error.mir create mode 100644 test/CodeGen/MIR/X86/def-register-already-tied-error.mir create mode 100644 test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir create mode 100644 test/CodeGen/MIR/X86/duplicate-register-flag-error.mir create mode 100644 test/CodeGen/MIR/X86/early-clobber-register-flag.mir create mode 100644 test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir create mode 100644 test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir create mode 100644 test/CodeGen/MIR/X86/expected-basic-block-at-start-of-body.mir create mode 100644 test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir create mode 100644 test/CodeGen/MIR/X86/expected-comma-after-cfi-register.mir create mode 100644 test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir create mode 100644 test/CodeGen/MIR/X86/expected-from-in-memory-operand.mir create mode 100644 test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir create mode 100644 test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir create mode 100644 test/CodeGen/MIR/X86/expected-integer-after-offset-sign.mir create mode 100644 test/CodeGen/MIR/X86/expected-integer-after-tied-def.mir create mode 100644 test/CodeGen/MIR/X86/expected-integer-in-successor-weight.mir create mode 100644 test/CodeGen/MIR/X86/expected-load-or-store-in-memory-operand.mir create mode 100644 test/CodeGen/MIR/X86/expected-metadata-node-after-debug-location.mir create mode 100644 test/CodeGen/MIR/X86/expected-metadata-node-after-exclaim.mir create mode 100644 test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir create mode 100644 test/CodeGen/MIR/X86/expected-named-register-in-allocation-hint.mir create mode 100644 test/CodeGen/MIR/X86/expected-named-register-in-callee-saved-register.mir create mode 100644 test/CodeGen/MIR/X86/expected-named-register-in-functions-livein.mir create mode 100644 test/CodeGen/MIR/X86/expected-newline-at-end-of-list.mir create mode 100644 test/CodeGen/MIR/X86/expected-offset-after-cfi-operand.mir create mode 100644 test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir create mode 100644 test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir create mode 100644 test/CodeGen/MIR/X86/expected-register-after-cfi-operand.mir create mode 100644 test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation.mir create mode 100644 test/CodeGen/MIR/X86/expected-stack-object.mir create mode 100644 test/CodeGen/MIR/X86/expected-target-flag-name.mir create mode 100644 test/CodeGen/MIR/X86/expected-tied-def-after-lparen.mir create mode 100644 test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir create mode 100644 test/CodeGen/MIR/X86/expected-virtual-register-in-functions-livein.mir create mode 100644 test/CodeGen/MIR/X86/external-symbol-operands.mir create mode 100644 test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir create mode 100644 test/CodeGen/MIR/X86/fixed-stack-object-redefinition-error.mir create mode 100644 test/CodeGen/MIR/X86/frame-info-save-restore-points.mir create mode 100644 test/CodeGen/MIR/X86/frame-info-stack-references.mir create mode 100644 test/CodeGen/MIR/X86/frame-setup-instruction-flag.mir create mode 100644 test/CodeGen/MIR/X86/function-liveins.mir create mode 100644 test/CodeGen/MIR/X86/inline-asm-registers.mir create mode 100644 test/CodeGen/MIR/X86/instructions-debug-location.mir create mode 100644 test/CodeGen/MIR/X86/invalid-constant-pool-item.mir create mode 100644 test/CodeGen/MIR/X86/invalid-metadata-node-type.mir create mode 100644 test/CodeGen/MIR/X86/invalid-target-flag-name.mir create mode 100644 test/CodeGen/MIR/X86/invalid-tied-def-index-error.mir create mode 100644 test/CodeGen/MIR/X86/jump-table-info.mir create mode 100644 test/CodeGen/MIR/X86/jump-table-redefinition-error.mir create mode 100644 test/CodeGen/MIR/X86/large-cfi-offset-number-error.mir create mode 100644 test/CodeGen/MIR/X86/large-immediate-operand-error.mir create mode 100644 test/CodeGen/MIR/X86/large-offset-number-error.mir create mode 100644 test/CodeGen/MIR/X86/large-size-in-memory-operand-error.mir create mode 100644 test/CodeGen/MIR/X86/liveout-register-mask.mir create mode 100644 test/CodeGen/MIR/X86/machine-verifier.mir create mode 100644 test/CodeGen/MIR/X86/memory-operands.mir create mode 100644 test/CodeGen/MIR/X86/metadata-operands.mir create mode 100644 test/CodeGen/MIR/X86/missing-closing-quote.mir delete mode 100644 test/CodeGen/MIR/X86/missing-instruction.mir create mode 100644 test/CodeGen/MIR/X86/newline-handling.mir create mode 100644 test/CodeGen/MIR/X86/register-operands-target-flag-error.mir create mode 100644 test/CodeGen/MIR/X86/simple-register-allocation-hints.mir create mode 100644 test/CodeGen/MIR/X86/stack-object-debug-info.mir create mode 100644 test/CodeGen/MIR/X86/stack-object-invalid-name.mir create mode 100644 test/CodeGen/MIR/X86/stack-object-operand-name-mismatch-error.mir create mode 100644 test/CodeGen/MIR/X86/stack-object-operands.mir create mode 100644 test/CodeGen/MIR/X86/stack-object-redefinition-error.mir create mode 100644 test/CodeGen/MIR/X86/standalone-register-error.mir create mode 100644 test/CodeGen/MIR/X86/successor-basic-blocks-weights.mir create mode 100644 test/CodeGen/MIR/X86/successor-basic-blocks.mir create mode 100644 test/CodeGen/MIR/X86/tied-def-operand-invalid.mir create mode 100644 test/CodeGen/MIR/X86/undefined-fixed-stack-object.mir create mode 100644 test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir create mode 100644 test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir create mode 100644 test/CodeGen/MIR/X86/undefined-jump-table-id.mir create mode 100644 test/CodeGen/MIR/X86/undefined-stack-object.mir create mode 100644 test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir create mode 100644 test/CodeGen/MIR/X86/unknown-metadata-keyword.mir create mode 100644 test/CodeGen/MIR/X86/unknown-metadata-node.mir create mode 100644 test/CodeGen/MIR/X86/used-physical-register-info.mir create mode 100644 test/CodeGen/MIR/X86/virtual-register-redefinition-error.mir delete mode 100644 test/CodeGen/MIR/basic-blocks.mir delete mode 100644 test/CodeGen/MIR/expected-eof-after-successor-mbb.mir delete mode 100644 test/CodeGen/MIR/successor-basic-blocks.mir create mode 100644 test/CodeGen/Mips/Fast-ISel/check-disabled-mcpus.ll create mode 100644 test/CodeGen/Mips/emutls_generic.ll create mode 100644 test/CodeGen/Mips/interrupt-attr-64-error.ll create mode 100644 test/CodeGen/Mips/interrupt-attr-args-error.ll create mode 100644 test/CodeGen/Mips/interrupt-attr-error.ll create mode 100644 test/CodeGen/Mips/interrupt-attr.ll create mode 100644 test/CodeGen/Mips/llvm-ir/atomicrmx.ll create mode 100644 test/CodeGen/Mips/llvm-ir/load-atomic.ll create mode 100644 test/CodeGen/Mips/llvm-ir/sqrt.ll create mode 100644 test/CodeGen/Mips/llvm-ir/store-atomic.ll create mode 100644 test/CodeGen/NVPTX/branch-fold.ll create mode 100644 test/CodeGen/NVPTX/bypass-div.ll create mode 100644 test/CodeGen/NVPTX/combine-min-max.ll create mode 100644 test/CodeGen/NVPTX/global-addrspace.ll create mode 100644 test/CodeGen/NVPTX/load-with-non-coherent-cache.ll create mode 100644 test/CodeGen/NVPTX/reg-copy.ll create mode 100644 test/CodeGen/PowerPC/BoolRetToIntTest.ll create mode 100644 test/CodeGen/PowerPC/BreakableToken-reduced.ll create mode 100644 test/CodeGen/PowerPC/aantidep-def-ec.mir create mode 100644 test/CodeGen/PowerPC/aantidep-inline-asm-use.ll create mode 100644 test/CodeGen/PowerPC/addisdtprelha-nonr3.mir create mode 100644 test/CodeGen/PowerPC/bitcasts-direct-move.ll create mode 100644 test/CodeGen/PowerPC/bitreverse.ll create mode 100644 test/CodeGen/PowerPC/branch-hint.ll create mode 100644 test/CodeGen/PowerPC/coal-sections.ll create mode 100644 test/CodeGen/PowerPC/crbit-asm-disabled.ll create mode 100644 test/CodeGen/PowerPC/dyn-alloca-offset.ll create mode 100644 test/CodeGen/PowerPC/e500-1.ll create mode 100644 test/CodeGen/PowerPC/emutls_generic.ll create mode 100644 test/CodeGen/PowerPC/fma-mutate-register-constraint.ll create mode 100644 test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll create mode 100644 test/CodeGen/PowerPC/machine-combiner.ll create mode 100644 test/CodeGen/PowerPC/mc-instrlat.ll create mode 100644 test/CodeGen/PowerPC/mcm-13.ll create mode 100644 test/CodeGen/PowerPC/merge-st-chain-op.ll create mode 100644 test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll create mode 100644 test/CodeGen/PowerPC/peephole-align.ll create mode 100644 test/CodeGen/PowerPC/ppc-shrink-wrapping.ll create mode 100644 test/CodeGen/PowerPC/ppcsoftops.ll create mode 100644 test/CodeGen/PowerPC/pr24636.ll create mode 100644 test/CodeGen/PowerPC/pr25157-peephole.ll create mode 100644 test/CodeGen/PowerPC/preincprep-nontrans-crash.ll create mode 100644 test/CodeGen/PowerPC/qpx-unal-cons-lds.ll create mode 100644 test/CodeGen/PowerPC/rotl-rotr-crash.ll create mode 100644 test/CodeGen/PowerPC/selectiondag-extload-computeknownbits.ll create mode 100644 test/CodeGen/PowerPC/stackmap-frame-setup.ll create mode 100644 test/CodeGen/PowerPC/swaps-le-6.ll create mode 100644 test/CodeGen/PowerPC/unal-vec-ldst.ll create mode 100644 test/CodeGen/PowerPC/unal-vec-negarith.ll create mode 100644 test/CodeGen/PowerPC/variable_elem_vec_extracts.ll create mode 100644 test/CodeGen/PowerPC/vec-asm-disabled.ll create mode 100644 test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll create mode 100644 test/CodeGen/SPARC/32abi.ll create mode 100644 test/CodeGen/SPARC/float-constants.ll create mode 100644 test/CodeGen/SPARC/missing-sret.ll create mode 100644 test/CodeGen/SPARC/reserved-regs.ll create mode 100644 test/CodeGen/SPARC/select-mask.ll create mode 100644 test/CodeGen/SPARC/spill.ll create mode 100644 test/CodeGen/SPARC/stack-align.ll create mode 100644 test/CodeGen/SystemZ/alloca-03.ll create mode 100644 test/CodeGen/SystemZ/alloca-04.ll create mode 100644 test/CodeGen/SystemZ/dag-combine-01.ll create mode 100644 test/CodeGen/SystemZ/fp-cmp-05.ll create mode 100644 test/CodeGen/SystemZ/fp-libcall.ll create mode 100644 test/CodeGen/SystemZ/fp-sincos-01.ll create mode 100644 test/CodeGen/SystemZ/int-cmp-51.ll create mode 100644 test/CodeGen/SystemZ/int-cmp-52.ll create mode 100644 test/CodeGen/SystemZ/vec-perm-12.ll create mode 100644 test/CodeGen/SystemZ/vec-perm-13.ll create mode 100644 test/CodeGen/Thumb/ldm-stm-base-materialization-thumb2.ll delete mode 100644 test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll create mode 100644 test/CodeGen/Thumb/thumb-shrink-wrapping.ll create mode 100644 test/CodeGen/Thumb2/emit-unwinding.ll create mode 100644 test/CodeGen/Thumb2/setjmp_longjmp.ll create mode 100644 test/CodeGen/WebAssembly/call.ll create mode 100644 test/CodeGen/WebAssembly/cfg-stackify.ll create mode 100644 test/CodeGen/WebAssembly/comparisons_f32.ll create mode 100644 test/CodeGen/WebAssembly/comparisons_f64.ll create mode 100644 test/CodeGen/WebAssembly/comparisons_i32.ll create mode 100644 test/CodeGen/WebAssembly/comparisons_i64.ll create mode 100644 test/CodeGen/WebAssembly/conv.ll create mode 100644 test/CodeGen/WebAssembly/copysign-casts.ll create mode 100644 test/CodeGen/WebAssembly/cpus.ll create mode 100644 test/CodeGen/WebAssembly/dead-vreg.ll create mode 100644 test/CodeGen/WebAssembly/f32.ll create mode 100644 test/CodeGen/WebAssembly/f64.ll create mode 100644 test/CodeGen/WebAssembly/fast-isel.ll create mode 100644 test/CodeGen/WebAssembly/frem.ll create mode 100644 test/CodeGen/WebAssembly/func.ll create mode 100644 test/CodeGen/WebAssembly/global.ll create mode 100644 test/CodeGen/WebAssembly/globl.ll create mode 100644 test/CodeGen/WebAssembly/i32.ll create mode 100644 test/CodeGen/WebAssembly/i64.ll create mode 100644 test/CodeGen/WebAssembly/ident.ll create mode 100644 test/CodeGen/WebAssembly/immediates.ll create mode 100644 test/CodeGen/WebAssembly/inline-asm.ll create mode 100644 test/CodeGen/WebAssembly/legalize.ll create mode 100644 test/CodeGen/WebAssembly/load-ext.ll create mode 100644 test/CodeGen/WebAssembly/load-store-i1.ll create mode 100644 test/CodeGen/WebAssembly/load.ll create mode 100644 test/CodeGen/WebAssembly/loop-idiom.ll create mode 100644 test/CodeGen/WebAssembly/memory-addr32.ll create mode 100644 test/CodeGen/WebAssembly/memory-addr64.ll create mode 100644 test/CodeGen/WebAssembly/offset-folding.ll create mode 100644 test/CodeGen/WebAssembly/offset.ll create mode 100644 test/CodeGen/WebAssembly/phi.ll create mode 100644 test/CodeGen/WebAssembly/reg-stackify.ll create mode 100644 test/CodeGen/WebAssembly/return-int32.ll create mode 100644 test/CodeGen/WebAssembly/return-void.ll create mode 100644 test/CodeGen/WebAssembly/returned.ll create mode 100644 test/CodeGen/WebAssembly/select.ll create mode 100644 test/CodeGen/WebAssembly/signext-zeroext.ll create mode 100644 test/CodeGen/WebAssembly/store-results.ll create mode 100644 test/CodeGen/WebAssembly/store-trunc.ll create mode 100644 test/CodeGen/WebAssembly/store.ll create mode 100644 test/CodeGen/WebAssembly/switch.ll create mode 100644 test/CodeGen/WebAssembly/unreachable.ll create mode 100644 test/CodeGen/WebAssembly/unused-argument.ll create mode 100644 test/CodeGen/WebAssembly/userstack.ll create mode 100644 test/CodeGen/WebAssembly/varargs.ll create mode 100644 test/CodeGen/WebAssembly/vtable.ll delete mode 100644 test/CodeGen/WinEH/cppeh-alloca-sink.ll delete mode 100644 test/CodeGen/WinEH/cppeh-catch-all-win32.ll delete mode 100644 test/CodeGen/WinEH/cppeh-catch-all.ll delete mode 100644 test/CodeGen/WinEH/cppeh-catch-and-throw.ll delete mode 100644 test/CodeGen/WinEH/cppeh-catch-scalar.ll delete mode 100644 test/CodeGen/WinEH/cppeh-catch-unwind.ll delete mode 100644 test/CodeGen/WinEH/cppeh-cleanup-invoke.ll delete mode 100644 test/CodeGen/WinEH/cppeh-demote-liveout.ll delete mode 100644 test/CodeGen/WinEH/cppeh-frame-vars.ll delete mode 100644 test/CodeGen/WinEH/cppeh-inalloca.ll delete mode 100644 test/CodeGen/WinEH/cppeh-min-unwind.ll delete mode 100644 test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll delete mode 100644 test/CodeGen/WinEH/cppeh-multi-catch.ll delete mode 100644 test/CodeGen/WinEH/cppeh-nested-1.ll delete mode 100644 test/CodeGen/WinEH/cppeh-nested-2.ll delete mode 100644 test/CodeGen/WinEH/cppeh-nested-3.ll delete mode 100644 test/CodeGen/WinEH/cppeh-nested-rethrow.ll delete mode 100644 test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll delete mode 100644 test/CodeGen/WinEH/cppeh-prepared-catch-all.ll delete mode 100644 test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll delete mode 100644 test/CodeGen/WinEH/cppeh-prepared-catch.ll delete mode 100644 test/CodeGen/WinEH/cppeh-prepared-cleanups.ll delete mode 100644 test/CodeGen/WinEH/cppeh-shared-empty-catch.ll delete mode 100644 test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll delete mode 100644 test/CodeGen/WinEH/cppeh-state-calc-1.ll delete mode 100644 test/CodeGen/WinEH/seh-catch-all.ll delete mode 100644 test/CodeGen/WinEH/seh-exception-code.ll delete mode 100644 test/CodeGen/WinEH/seh-exception-code2.ll delete mode 100644 test/CodeGen/WinEH/seh-inlined-finally.ll delete mode 100644 test/CodeGen/WinEH/seh-outlined-finally-win32.ll delete mode 100644 test/CodeGen/WinEH/seh-outlined-finally.ll delete mode 100644 test/CodeGen/WinEH/seh-prepared-basic.ll delete mode 100644 test/CodeGen/WinEH/seh-resume-phi.ll delete mode 100644 test/CodeGen/WinEH/seh-simple.ll create mode 100644 test/CodeGen/WinEH/wineh-cloning.ll create mode 100644 test/CodeGen/WinEH/wineh-demotion.ll create mode 100644 test/CodeGen/WinEH/wineh-intrinsics-invalid.ll create mode 100644 test/CodeGen/WinEH/wineh-intrinsics.ll create mode 100644 test/CodeGen/WinEH/wineh-no-demotion.ll create mode 100644 test/CodeGen/WinEH/wineh-statenumbering-cleanups.ll create mode 100644 test/CodeGen/WinEH/wineh-statenumbering.ll create mode 100644 test/CodeGen/X86/add-nsw-sext.ll create mode 100644 test/CodeGen/X86/and-encoding.ll create mode 100644 test/CodeGen/X86/atomic-flags.ll create mode 100644 test/CodeGen/X86/atomic-non-integer.ll create mode 100644 test/CodeGen/X86/avg.ll create mode 100644 test/CodeGen/X86/avx-isa-check.ll create mode 100644 test/CodeGen/X86/avx512-bugfix-25270.ll create mode 100644 test/CodeGen/X86/avx512-ext.ll create mode 100644 test/CodeGen/X86/avx512-extract-subvector.ll create mode 100644 test/CodeGen/X86/avx512-skx-insert-subvec.ll delete mode 100644 test/CodeGen/X86/avx512-trunc-ext.ll create mode 100644 test/CodeGen/X86/avx512-trunc.ll create mode 100644 test/CodeGen/X86/avx512cd-intrinsics.ll create mode 100644 test/CodeGen/X86/avx512cdvl-intrinsics.ll create mode 100644 test/CodeGen/X86/avx512dq-intrinsics.ll create mode 100644 test/CodeGen/X86/bit-piece-comment.ll create mode 100644 test/CodeGen/X86/bitreverse.ll create mode 100644 test/CodeGen/X86/branchfolding-catchpads.ll create mode 100644 test/CodeGen/X86/catchpad-realign-savexmm.ll create mode 100644 test/CodeGen/X86/catchpad-regmask.ll create mode 100644 test/CodeGen/X86/catchpad-weight.ll create mode 100644 test/CodeGen/X86/catchret-empty-fallthrough.ll create mode 100644 test/CodeGen/X86/catchret-fallthrough.ll create mode 100644 test/CodeGen/X86/cleanuppad-inalloca.ll create mode 100644 test/CodeGen/X86/cleanuppad-large-codemodel.ll create mode 100644 test/CodeGen/X86/cleanuppad-realign.ll create mode 100644 test/CodeGen/X86/coal-sections.ll create mode 100644 test/CodeGen/X86/coalescer-win64.ll create mode 100644 test/CodeGen/X86/code_placement_cold_loop_blocks.ll create mode 100644 test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll create mode 100644 test/CodeGen/X86/code_placement_loop_rotation.ll create mode 100644 test/CodeGen/X86/code_placement_loop_rotation2.ll create mode 100644 test/CodeGen/X86/combine-multiplies.ll delete mode 100644 test/CodeGen/X86/combine-sse2-intrinsics.ll create mode 100644 test/CodeGen/X86/constant-hoisting-and.ll create mode 100644 test/CodeGen/X86/constant-hoisting-cmp.ll delete mode 100644 test/CodeGen/X86/cppeh-nounwind.ll create mode 100644 test/CodeGen/X86/cxx_tlscc64.ll create mode 100644 test/CodeGen/X86/dag-fmf-cse.ll create mode 100644 test/CodeGen/X86/dag-merge-fast-accesses.ll create mode 100644 test/CodeGen/X86/darwin-tls.ll create mode 100644 test/CodeGen/X86/debugloc-argsize.ll create mode 100644 test/CodeGen/X86/eh-null-personality.ll create mode 100644 test/CodeGen/X86/emutls-pic.ll create mode 100644 test/CodeGen/X86/emutls-pie.ll create mode 100644 test/CodeGen/X86/emutls.ll create mode 100644 test/CodeGen/X86/emutls_generic.ll create mode 100644 test/CodeGen/X86/expand-vr64-gr64-copy.mir create mode 100644 test/CodeGen/X86/extractelement-legalization-cycle.ll create mode 100644 test/CodeGen/X86/fadd-combines.ll create mode 100644 test/CodeGen/X86/fast-isel-bitcasts-avx.ll create mode 100644 test/CodeGen/X86/fast-isel-bitcasts.ll create mode 100644 test/CodeGen/X86/fast-isel-deadcode.ll create mode 100644 test/CodeGen/X86/fast-isel-emutls.ll create mode 100644 test/CodeGen/X86/fast-isel-nontemporal.ll create mode 100644 test/CodeGen/X86/fast-isel-stackcheck.ll create mode 100644 test/CodeGen/X86/fixup-lea.ll create mode 100644 test/CodeGen/X86/fma-commute-x86.ll create mode 100644 test/CodeGen/X86/fma-scalar-memfold.ll create mode 100644 test/CodeGen/X86/fold-push.ll create mode 100644 test/CodeGen/X86/fp-logic.ll create mode 100644 test/CodeGen/X86/fp128-calling-conv.ll create mode 100644 test/CodeGen/X86/fp128-cast.ll create mode 100644 test/CodeGen/X86/fp128-compare.ll create mode 100644 test/CodeGen/X86/fp128-i128.ll create mode 100644 test/CodeGen/X86/fp128-libcalls.ll create mode 100644 test/CodeGen/X86/fp128-load.ll create mode 100644 test/CodeGen/X86/fp128-store.ll create mode 100644 test/CodeGen/X86/fpcmp-soft-fp.ll create mode 100644 test/CodeGen/X86/frem-msvc32.ll create mode 100644 test/CodeGen/X86/funclet-layout.ll create mode 100644 test/CodeGen/X86/function-alias.ll create mode 100644 test/CodeGen/X86/hhvm-cc.ll create mode 100644 test/CodeGen/X86/i386-shrink-wrapping.ll create mode 100644 test/CodeGen/X86/immediate_merging.ll create mode 100644 test/CodeGen/X86/inconsistent_landingpad.ll create mode 100644 test/CodeGen/X86/inline-sse.ll create mode 100644 test/CodeGen/X86/insertps-from-constantpool.ll create mode 100644 test/CodeGen/X86/insertps-unfold-load-bug.ll create mode 100644 test/CodeGen/X86/late-address-taken.ll create mode 100644 test/CodeGen/X86/lea-opt.ll rename test/CodeGen/X86/{frameescape.ll => localescape.ll} (76%) create mode 100644 test/CodeGen/X86/machine-combiner-int-vec.ll create mode 100644 test/CodeGen/X86/machine-combiner-int.ll create mode 100644 test/CodeGen/X86/materialize.ll create mode 100644 test/CodeGen/X86/mcu-abi.ll create mode 100644 test/CodeGen/X86/merge-store-partially-alias-loads.ll create mode 100644 test/CodeGen/X86/mmx-coalescing.ll create mode 100644 test/CodeGen/X86/mmx-only.ll create mode 100644 test/CodeGen/X86/movpc32-check.ll create mode 100644 test/CodeGen/X86/or-lea.ll create mode 100644 test/CodeGen/X86/patchpoint-verifiable.mir create mode 100644 test/CodeGen/X86/peephole-na-phys-copy-folding.ll create mode 100644 test/CodeGen/X86/pop-stack-cleanup.ll delete mode 100644 test/CodeGen/X86/pr21529.ll delete mode 100644 test/CodeGen/X86/pr23900.ll create mode 100644 test/CodeGen/X86/pr24139.ll create mode 100644 test/CodeGen/X86/pr24602.ll create mode 100644 test/CodeGen/X86/pr25828.ll create mode 100644 test/CodeGen/X86/prolog-push-seq.ll create mode 100644 test/CodeGen/X86/pseudo_cmov_lower.ll create mode 100644 test/CodeGen/X86/pseudo_cmov_lower1.ll create mode 100644 test/CodeGen/X86/pseudo_cmov_lower2.ll create mode 100644 test/CodeGen/X86/push-cfi-debug.ll create mode 100644 test/CodeGen/X86/push-cfi-obj.ll create mode 100644 test/CodeGen/X86/push-cfi.ll create mode 100644 test/CodeGen/X86/rem_crash.ll delete mode 100644 test/CodeGen/X86/remat-invalid-liveness.ll create mode 100644 test/CodeGen/X86/safestack.ll create mode 100644 test/CodeGen/X86/sar_fold.ll create mode 100644 test/CodeGen/X86/sar_fold64.ll create mode 100644 test/CodeGen/X86/scalar-fp-to-i64.ll create mode 100644 test/CodeGen/X86/scalar-int-to-fp.ll create mode 100644 test/CodeGen/X86/sdiv-pow2.ll create mode 100644 test/CodeGen/X86/seh-catchpad.ll create mode 100644 test/CodeGen/X86/seh-exception-code.ll delete mode 100644 test/CodeGen/X86/seh-filter.ll delete mode 100644 test/CodeGen/X86/seh-stack-realign-win32.ll create mode 100644 test/CodeGen/X86/shrink-wrap-chkstk.ll create mode 100644 test/CodeGen/X86/slow-unaligned-mem.ll create mode 100644 test/CodeGen/X86/soft-sitofp.ll create mode 100644 test/CodeGen/X86/sse-only.ll create mode 100644 test/CodeGen/X86/sse3-intrinsics-fast-isel.ll create mode 100644 test/CodeGen/X86/sse4a-intrinsics-fast-isel.ll create mode 100644 test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll create mode 100644 test/CodeGen/X86/stack-folding-adx-x86_64.ll create mode 100644 test/CodeGen/X86/stackmap-frame-setup.ll create mode 100644 test/CodeGen/X86/switch-edge-weight.ll create mode 100644 test/CodeGen/X86/system-intrinsics-64-xsave.ll create mode 100644 test/CodeGen/X86/system-intrinsics-64-xsavec.ll create mode 100644 test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll create mode 100644 test/CodeGen/X86/system-intrinsics-64-xsaves.ll create mode 100644 test/CodeGen/X86/system-intrinsics-xsave.ll create mode 100644 test/CodeGen/X86/system-intrinsics-xsavec.ll create mode 100644 test/CodeGen/X86/system-intrinsics-xsaveopt.ll create mode 100644 test/CodeGen/X86/system-intrinsics-xsaves.ll create mode 100644 test/CodeGen/X86/tail-dup-catchret.ll create mode 100644 test/CodeGen/X86/tail-merge-wineh.ll create mode 100644 test/CodeGen/X86/tailcall-msvc-conventions.ll create mode 100644 test/CodeGen/X86/tailcall-readnone.ll create mode 100644 test/CodeGen/X86/tls-android-negative.ll create mode 100644 test/CodeGen/X86/tls-android.ll create mode 100644 test/CodeGen/X86/token_landingpad.ll create mode 100644 test/CodeGen/X86/trunc-store.ll create mode 100644 test/CodeGen/X86/vec_cmp_sint-128.ll create mode 100644 test/CodeGen/X86/vec_cmp_uint-128.ll create mode 100644 test/CodeGen/X86/vec_minmax_sint.ll create mode 100644 test/CodeGen/X86/vec_minmax_uint.ll create mode 100644 test/CodeGen/X86/vec_uint_to_fp-fastmath.ll create mode 100644 test/CodeGen/X86/vector-lzcnt-512.ll create mode 100644 test/CodeGen/X86/vector-merge-store-fp-constants.ll create mode 100644 test/CodeGen/X86/vector-popcnt-512.ll create mode 100644 test/CodeGen/X86/vector-rotate-128.ll create mode 100644 test/CodeGen/X86/vector-rotate-256.ll create mode 100644 test/CodeGen/X86/vector-shift-ashr-512.ll create mode 100644 test/CodeGen/X86/vector-shift-lshr-512.ll create mode 100644 test/CodeGen/X86/vector-shift-shl-512.ll create mode 100644 test/CodeGen/X86/vector-shuffle-512-v32.ll create mode 100644 test/CodeGen/X86/vector-shuffle-v1.ll create mode 100644 test/CodeGen/X86/vector-tzcnt-512.ll create mode 100644 test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll create mode 100644 test/CodeGen/X86/vmovq.ll create mode 100644 test/CodeGen/X86/wide-integer-cmp.ll create mode 100644 test/CodeGen/X86/win-catchpad-csrs.ll create mode 100644 test/CodeGen/X86/win-catchpad-nested-cxx.ll create mode 100644 test/CodeGen/X86/win-catchpad-nested.ll create mode 100644 test/CodeGen/X86/win-catchpad-varargs.ll create mode 100644 test/CodeGen/X86/win-catchpad.ll create mode 100644 test/CodeGen/X86/win-cleanuppad.ll create mode 100644 test/CodeGen/X86/win-funclet-cfi.ll rename test/CodeGen/X86/{win_eh_prepare.ll => win-mixed-ehpersonality.ll} (55%) create mode 100644 test/CodeGen/X86/win32-seh-catchpad-realign.ll create mode 100644 test/CodeGen/X86/win32-seh-catchpad.ll create mode 100644 test/CodeGen/X86/win32-seh-nested-finally.ll create mode 100644 test/CodeGen/X86/win32-spill-xmm.ll create mode 100644 test/CodeGen/X86/win64_sibcall.ll create mode 100644 test/CodeGen/X86/win_coreclr_chkstk.ll delete mode 100644 test/CodeGen/X86/win_ftol2.ll create mode 100644 test/CodeGen/X86/wineh-coreclr.ll create mode 100644 test/CodeGen/X86/wineh-exceptionpointer.ll create mode 100644 test/CodeGen/X86/wineh-no-ehpads.ll create mode 100644 test/CodeGen/X86/x32-indirectbr.ll create mode 100644 test/CodeGen/X86/x32-landingpad.ll create mode 100644 test/CodeGen/X86/x32-va_start.ll create mode 100644 test/CodeGen/X86/x86-32-intrcc.ll create mode 100644 test/CodeGen/X86/x86-64-intrcc.ll create mode 100644 test/CodeGen/X86/x86-64-ms_abi-vararg.ll create mode 100644 test/CodeGen/X86/x86-sanitizer-shrink-wrapping.ll create mode 100644 test/CodeGen/X86/x86-shrink-wrap-unwind.ll create mode 100644 test/CodeGen/X86/x86-win64-shrink-wrapping.ll create mode 100644 test/CodeGen/X86/xop-pcmov.ll create mode 100644 test/DebugInfo/AArch64/prologue_end.ll create mode 100644 test/DebugInfo/ARM/float-args.ll create mode 100644 test/DebugInfo/ARM/prologue_end.ll rename test/DebugInfo/{ => Generic}/2009-10-16-Phi.ll (100%) rename test/DebugInfo/{ => Generic}/2009-11-03-InsertExtractValue.ll (86%) rename test/DebugInfo/{ => Generic}/2009-11-05-DeadGlobalVariable.ll (65%) rename test/DebugInfo/{ => Generic}/2009-11-06-NamelessGlobalVariable.ll (71%) rename test/DebugInfo/{ => Generic}/2009-11-10-CurrentFn.ll (59%) rename test/DebugInfo/{ => Generic}/2010-01-05-DbgScope.ll (70%) rename test/DebugInfo/{ => Generic}/2010-03-12-llc-crash.ll (58%) rename test/DebugInfo/{ => Generic}/2010-03-19-DbgDeclare.ll (65%) rename test/DebugInfo/{ => Generic}/2010-03-24-MemberFn.ll (77%) rename test/DebugInfo/{ => Generic}/2010-04-06-NestedFnDbgInfo.ll (76%) rename test/DebugInfo/{ => Generic}/2010-04-19-FramePtr.ll (73%) rename test/DebugInfo/{ => Generic}/2010-05-03-DisableFramePtr.ll (77%) rename test/DebugInfo/{ => Generic}/2010-05-03-OriginDIE.ll (80%) rename test/DebugInfo/{ => Generic}/2010-05-10-MultipleCU.ll (56%) rename test/DebugInfo/{ => Generic}/2010-06-29-InlinedFnLocalVar.ll (67%) rename test/DebugInfo/{ => Generic}/2010-07-19-Crash.ll (58%) rename test/DebugInfo/{ => Generic}/2010-10-01-crash.ll (59%) create mode 100644 test/DebugInfo/Generic/Inputs/gmlt.ll rename test/DebugInfo/{ => Generic}/PR20038.ll (77%) rename test/DebugInfo/{ => Generic}/accel-table-hash-collisions.ll (96%) rename test/DebugInfo/{ => Generic}/array.ll (71%) rename test/DebugInfo/{ => Generic}/block-asan.ll (89%) rename test/DebugInfo/{ => Generic}/bug_null_debuginfo.ll (57%) rename test/DebugInfo/{ => Generic}/constant-pointers.ll (77%) rename test/DebugInfo/{ => Generic}/constant-sdnodes-have-dbg-location.ll (53%) rename test/DebugInfo/{ => Generic}/constantfp-sdnodes-have-dbg-location.ll (50%) rename test/DebugInfo/{ => Generic}/cross-cu-inlining.ll (77%) rename test/DebugInfo/{ => Generic}/cross-cu-linkonce-distinct.ll (76%) rename test/DebugInfo/{ => Generic}/cross-cu-linkonce.ll (75%) rename test/DebugInfo/{ => Generic}/cu-range-hole.ll (72%) rename test/DebugInfo/{ => Generic}/cu-ranges.ll (70%) rename test/DebugInfo/{X86 => Generic}/dbg-at-specficiation.ll (79%) rename test/DebugInfo/{ => Generic}/dead-argument-order.ll (79%) rename test/DebugInfo/{ => Generic}/debug-info-always-inline.ll (100%) rename test/DebugInfo/{ => Generic}/debug-info-qualifiers.ll (83%) rename test/DebugInfo/{ => Generic}/debuginfofinder-forward-declaration.ll (80%) rename test/DebugInfo/{ => Generic}/debuginfofinder-multiple-cu.ll (54%) create mode 100644 test/DebugInfo/Generic/def-line.ll create mode 100644 test/DebugInfo/Generic/discriminator.ll rename test/DebugInfo/{ => Generic}/dwarf-public-names.ll (72%) rename test/DebugInfo/{ => Generic}/empty.ll (82%) rename test/DebugInfo/{ => Generic}/enum-types.ll (65%) rename test/DebugInfo/{ => Generic}/enum.ll (83%) rename test/DebugInfo/{ => Generic}/global.ll (74%) rename test/DebugInfo/{ => Generic}/gmlt.test (100%) create mode 100644 test/DebugInfo/Generic/gvn.ll rename test/DebugInfo/{ => Generic}/incorrect-variable-debugloc.ll (89%) rename test/DebugInfo/{ => Generic}/incorrect-variable-debugloc1.ll (85%) rename test/DebugInfo/{ => Generic}/inheritance.ll (84%) rename test/DebugInfo/{ => Generic}/inline-debug-info-multiret.ll (84%) rename test/DebugInfo/{ => Generic}/inline-debug-info.ll (84%) rename test/DebugInfo/{ => Generic}/inline-no-debug-info.ll (77%) rename test/DebugInfo/{ => Generic}/inline-scopes.ll (81%) rename test/DebugInfo/{ => Generic}/inlined-arguments.ll (67%) rename test/DebugInfo/{ => Generic}/inlined-vars.ll (61%) create mode 100644 test/DebugInfo/Generic/lit.local.cfg rename test/DebugInfo/{ => Generic}/location-verifier.ll (69%) rename test/DebugInfo/{ => Generic}/lto-comp-dir.ll (74%) rename test/DebugInfo/{ => Generic}/member-order.ll (77%) rename test/DebugInfo/{ => Generic}/member-pointers.ll (89%) rename test/DebugInfo/{ => Generic}/missing-abstract-variable.ll (76%) rename test/DebugInfo/{ => Generic}/multiline.ll (86%) rename test/DebugInfo/{ => Generic}/namespace.ll (84%) rename test/DebugInfo/{ => Generic}/namespace_function_definition.ll (64%) rename test/DebugInfo/{ => Generic}/namespace_inline_function_definition.ll (74%) rename test/DebugInfo/{ => Generic}/nodebug.ll (77%) rename test/DebugInfo/{ => Generic}/piece-verifier.ll (73%) create mode 100755 test/DebugInfo/Generic/ptrsize.ll rename test/DebugInfo/{X86 => Generic}/recursive_inlining.ll (79%) rename test/DebugInfo/{ => Generic}/restrict.ll (75%) create mode 100644 test/DebugInfo/Generic/skeletoncu.ll rename test/DebugInfo/{ => Generic}/sugared-constants.ll (79%) rename test/DebugInfo/{ => Generic}/template-recursive-void.ll (92%) rename test/DebugInfo/{ => Generic}/tu-composite.ll (85%) rename test/DebugInfo/{ => Generic}/tu-member-pointer.ll (84%) rename test/DebugInfo/{ => Generic}/two-cus-from-same-file.ll (67%) rename test/DebugInfo/{ => Generic}/typedef.ll (82%) rename test/DebugInfo/{ => Generic}/unconditional-branch.ll (78%) rename test/DebugInfo/{ => Generic}/varargs.ll (80%) rename test/DebugInfo/{ => Generic}/version.ll (69%) create mode 100644 test/DebugInfo/Inputs/dwarfdump-dwp.x86_64.o create mode 100644 test/DebugInfo/Inputs/dwarfdump-macho-relocs.macho.x86_64.o create mode 100644 test/DebugInfo/Inputs/dwarfdump-macro-cmd.h create mode 100644 test/DebugInfo/Inputs/dwarfdump-macro.cc create mode 100644 test/DebugInfo/Inputs/dwarfdump-macro.h create mode 100644 test/DebugInfo/Inputs/dwarfdump-macro.o create mode 100644 test/DebugInfo/Inputs/dwarfdump-test.macho-i386.o create mode 100644 test/DebugInfo/Inputs/fat-test.o create mode 100644 test/DebugInfo/MIR/X86/lit.local.cfg create mode 100644 test/DebugInfo/MIR/X86/live-debug-values-3preds.mir create mode 100644 test/DebugInfo/MIR/X86/live-debug-values.mir create mode 100644 test/DebugInfo/MIR/lit.local.cfg create mode 100644 test/DebugInfo/Mips/dsr-fixed-objects.ll create mode 100644 test/DebugInfo/Mips/dsr-non-fixed-objects.ll create mode 100644 test/DebugInfo/Mips/prologue_end.ll rename test/DebugInfo/PDB/{ => DIA}/lit.local.cfg (100%) rename test/DebugInfo/PDB/{ => DIA}/pdbdump-flags.test (77%) rename test/DebugInfo/PDB/{ => DIA}/pdbdump-symbol-format.test (87%) create mode 100644 test/DebugInfo/PDB/pdbdump-headers.test create mode 100644 test/DebugInfo/Sparc/prologue_end.ll create mode 100644 test/DebugInfo/SystemZ/prologue_end.ll create mode 100644 test/DebugInfo/X86/DIModuleContext.ll create mode 100644 test/DebugInfo/X86/bbjoin.ll create mode 100644 test/DebugInfo/X86/debugger-tune.ll create mode 100644 test/DebugInfo/X86/dw_op_minus.ll create mode 100644 test/DebugInfo/X86/dwarf-linkage-names.ll create mode 100644 test/DebugInfo/X86/externaltyperef.ll create mode 100644 test/DebugInfo/X86/live-debug-values.ll create mode 100644 test/DebugInfo/X86/safestack-byval.ll create mode 100644 test/DebugInfo/debugmacinfo.test create mode 100644 test/DebugInfo/dwarfdump-dwp.test create mode 100644 test/DebugInfo/dwarfdump-macho-relocs.test create mode 100644 test/DebugInfo/dwarfdump-macho-universal.test create mode 100644 test/DebugInfo/dwo.ll delete mode 100644 test/DebugInfo/gvn.ll create mode 100644 test/DebugInfo/skeletoncu.ll create mode 100644 test/Examples/Kaleidoscope/Chapter3.test create mode 100644 test/Examples/Kaleidoscope/Chapter4.test create mode 100644 test/Examples/Kaleidoscope/Chapter5.test create mode 100644 test/Examples/Kaleidoscope/Chapter6.test create mode 100644 test/Examples/Kaleidoscope/Chapter7.test create mode 100644 test/Examples/lit.local.cfg create mode 100644 test/ExecutionEngine/OrcLazy/global_aliases.ll create mode 100644 test/ExecutionEngine/RuntimeDyld/Mips/ELF_N64R6_relocations.s create mode 100644 test/ExecutionEngine/RuntimeDyld/PowerPC/lit.local.cfg create mode 100644 test/ExecutionEngine/RuntimeDyld/PowerPC/ppc32_elf_rel_addr16.s create mode 100644 test/ExecutionEngine/RuntimeDyld/X86/COFF_i386.s rename test/ExecutionEngine/RuntimeDyld/X86/{COFF_x86_64 => COFF_x86_64.s} (80%) create mode 100644 test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_FILE.s create mode 100644 test/ExecutionEngine/RuntimeDyld/X86/ELF_x64-64_PC8_relocations.s create mode 100644 test/ExecutionEngine/RuntimeDyld/X86/ELF_x86_64_StubBuf.s create mode 100644 test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_FILE.s create mode 100644 test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_GLOBAL.s create mode 100644 test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_x86_64_StubBuf.ll create mode 100644 test/Feature/OperandBundles/adce.ll create mode 100644 test/Feature/OperandBundles/basic-aa-argmemonly.ll create mode 100644 test/Feature/OperandBundles/dse.ll create mode 100644 test/Feature/OperandBundles/early-cse.ll create mode 100644 test/Feature/OperandBundles/function-attrs.ll create mode 100644 test/Feature/OperandBundles/inliner-conservative.ll create mode 100644 test/Feature/OperandBundles/merge-func.ll create mode 100644 test/Feature/OperandBundles/special-state.ll create mode 100644 test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll delete mode 100644 test/Instrumentation/AddressSanitizer/do-not-instrument-cstring.ll rename test/Instrumentation/AddressSanitizer/{do-not-instrument-llvm-metadata-darwin.ll => do-not-instrument-globals-darwin.ll} (60%) create mode 100644 test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll delete mode 100644 test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll create mode 100644 test/Instrumentation/AddressSanitizer/keep_going.ll create mode 100644 test/Instrumentation/AddressSanitizer/localescape.ll create mode 100644 test/Instrumentation/AddressSanitizer/twice.ll create mode 100644 test/Instrumentation/DataFlowSanitizer/external_mask.ll create mode 100644 test/Instrumentation/MemorySanitizer/AArch64/vararg.ll create mode 100644 test/Instrumentation/SanitizerCoverage/seh.ll create mode 100644 test/Instrumentation/SanitizerCoverage/switch-tracing.ll create mode 100644 test/LTO/X86/disable-verify.ll create mode 100644 test/LTO/X86/llvm-lto-output.ll create mode 100644 test/LTO/X86/parallel.ll create mode 100644 test/LibDriver/thin.test create mode 100644 test/Linker/Inputs/available_externally_over_decl.ll create mode 100644 test/Linker/Inputs/comdat11.ll create mode 100644 test/Linker/Inputs/comdat13.ll create mode 100644 test/Linker/Inputs/comdat14.ll create mode 100644 test/Linker/Inputs/comdat15.ll create mode 100644 test/Linker/Inputs/ctors2.ll create mode 100644 test/Linker/Inputs/ctors3.ll create mode 100644 test/Linker/Inputs/funcimport.ll create mode 100644 test/Linker/Inputs/funcimport_appending_global.ll create mode 100644 test/Linker/Inputs/internalize-lazy.ll create mode 100644 test/Linker/Inputs/linkage.c.ll create mode 100644 test/Linker/Inputs/only-needed-debug-metadata.ll create mode 100644 test/Linker/Inputs/only-needed-named-metadata.ll delete mode 100644 test/Linker/Inputs/subprogram-linkonce-weak-odr.ll create mode 100644 test/Linker/Inputs/thinlto_funcimport_debug.ll create mode 100644 test/Linker/available_externally_over_decl.ll create mode 100644 test/Linker/comdat11.ll create mode 100644 test/Linker/comdat12.ll create mode 100644 test/Linker/comdat13.ll create mode 100644 test/Linker/comdat14.ll create mode 100644 test/Linker/comdat15.ll create mode 100644 test/Linker/comdat_group.ll create mode 100644 test/Linker/ctors2.ll create mode 100644 test/Linker/ctors3.ll create mode 100644 test/Linker/ctors4.ll create mode 100644 test/Linker/ctors5.ll create mode 100644 test/Linker/funcimport.ll create mode 100644 test/Linker/funcimport_appending_global.ll create mode 100644 test/Linker/internalize-lazy.ll create mode 100644 test/Linker/link-flags.ll create mode 100644 test/Linker/only-needed-debug-metadata.ll create mode 100644 test/Linker/only-needed-named-metadata.ll delete mode 100644 test/Linker/subprogram-linkonce-weak-odr.ll create mode 100644 test/Linker/thinlto_funcimport_debug.ll create mode 100644 test/Linker/uniqued-distinct-cycles.ll create mode 100644 test/MC/AArch64/armv8.2a-at.s create mode 100644 test/MC/AArch64/armv8.2a-mmfr2.s create mode 100644 test/MC/AArch64/armv8.2a-persistent-memory.s create mode 100644 test/MC/AArch64/armv8.2a-statistical-profiling.s create mode 100644 test/MC/AArch64/armv8.2a-uao.s create mode 100644 test/MC/AArch64/error-location-ldr-pseudo.s create mode 100644 test/MC/AArch64/error-location.s create mode 100644 test/MC/AArch64/fullfp16-diagnostics.s create mode 100644 test/MC/AArch64/fullfp16-neon-neg.s create mode 100644 test/MC/AMDGPU/buffer_wbinv1l_vol_vi.s create mode 100644 test/MC/AMDGPU/flat-scratch.s create mode 100644 test/MC/AMDGPU/hsa-text.s create mode 100644 test/MC/AMDGPU/out-of-range-registers.s create mode 100644 test/MC/AMDGPU/smem.s create mode 100644 test/MC/AMDGPU/smrd-err.s create mode 100644 test/MC/AMDGPU/vop3-vop1-nosrc.s create mode 100644 test/MC/AMDGPU/vopc-errs.s delete mode 100644 test/MC/ARM/directive-arch-armv6j.s rename test/MC/ARM/{directive-arch-armv6zk.s => directive-arch-armv8.2-a.s} (60%) create mode 100644 test/MC/ARM/error-location-ldr-pseudo.s create mode 100644 test/MC/ARM/error-location.s create mode 100644 test/MC/ARM/fullfp16-neon-neg.s create mode 100644 test/MC/ARM/fullfp16-neon.s create mode 100644 test/MC/ARM/neon-vcvt-fp16.s create mode 100644 test/MC/ARM/thumb-branches.s create mode 100644 test/MC/ARM/thumb1-relax.s create mode 100644 test/MC/ARM/v7k-dsp.s create mode 100644 test/MC/AsmParser/reassign.s create mode 100644 test/MC/AsmParser/undefined-local-symbol.s delete mode 100644 test/MC/COFF/ARM/directive-type-diagnostics.s create mode 100644 test/MC/COFF/label-undefined.s create mode 100644 test/MC/COFF/stdin.s create mode 100644 test/MC/COFF/temporary-alias.s create mode 100644 test/MC/Disassembler/AArch64/armv8.2a-at.txt create mode 100644 test/MC/Disassembler/AArch64/armv8.2a-mmfr2.txt create mode 100644 test/MC/Disassembler/AArch64/armv8.2a-persistent-memory.txt create mode 100644 test/MC/Disassembler/AArch64/armv8.2a-statistical-profiling.txt create mode 100644 test/MC/Disassembler/AArch64/armv8.2a-uao.txt create mode 100644 test/MC/Disassembler/AArch64/fullfp16-neg.txt create mode 100644 test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt create mode 100644 test/MC/Disassembler/ARM/fullfp16-neon-arm-neg.txt create mode 100644 test/MC/Disassembler/ARM/fullfp16-neon-arm.txt create mode 100644 test/MC/Disassembler/ARM/fullfp16-neon-thumb-neg.txt create mode 100644 test/MC/Disassembler/ARM/fullfp16-neon-thumb.txt create mode 100644 test/MC/Disassembler/Hexagon/invalid_packet.txt create mode 100644 test/MC/Disassembler/Hexagon/too_many_instructions.txt create mode 100644 test/MC/Disassembler/Hexagon/too_many_loop_ends.txt create mode 100644 test/MC/Disassembler/Hexagon/unextendable.txt create mode 100644 test/MC/Disassembler/Mips/dsp/valid-el.txt create mode 100644 test/MC/Disassembler/Mips/dsp/valid.txt create mode 100644 test/MC/Disassembler/Mips/dspr2/valid.txt create mode 100644 test/MC/Disassembler/Mips/eva/valid_R6-eva.txt create mode 100644 test/MC/Disassembler/Mips/eva/valid_preR6-eva.txt create mode 100644 test/MC/Disassembler/Mips/micromips-dsp/valid.txt create mode 100644 test/MC/Disassembler/Mips/micromips-dspr2/valid.txt create mode 100644 test/MC/Disassembler/Mips/micromips32r3/invalid.txt rename test/MC/Disassembler/Mips/{micromips_le.txt => micromips32r3/valid-el.txt} (86%) rename test/MC/Disassembler/Mips/{micromips.txt => micromips32r3/valid.txt} (84%) delete mode 100644 test/MC/Disassembler/Mips/micromips32r6.txt create mode 100644 test/MC/Disassembler/Mips/micromips32r6/valid.txt create mode 100644 test/MC/Disassembler/Mips/micromips64r6/valid.txt delete mode 100644 test/MC/Disassembler/Mips/mips-dsp.txt create mode 100644 test/MC/Disassembler/Mips/mips1/invalid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips1/invalid.txt create mode 100644 test/MC/Disassembler/Mips/mips2/invalid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips2/valid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips3/invalid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips3/valid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips32/invalid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips32/valid-xfail.txt delete mode 100644 test/MC/Disassembler/Mips/mips32_le.txt create mode 100644 test/MC/Disassembler/Mips/mips32r2/invalid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips32r2/valid-xfail.txt delete mode 100644 test/MC/Disassembler/Mips/mips32r2_le.txt create mode 100644 test/MC/Disassembler/Mips/mips32r3/invalid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips32r3/valid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips32r5/invalid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips32r5/valid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips4/invalid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips4/valid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips64/invalid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips64/valid-xfail.txt delete mode 100644 test/MC/Disassembler/Mips/mips64_le.txt create mode 100644 test/MC/Disassembler/Mips/mips64r2/invalid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips64r2/valid-xfail.txt delete mode 100644 test/MC/Disassembler/Mips/mips64r2_le.txt create mode 100644 test/MC/Disassembler/Mips/mips64r3/invalid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips64r3/valid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips64r5/invalid-xfail.txt create mode 100644 test/MC/Disassembler/Mips/mips64r5/valid-xfail.txt create mode 100644 test/MC/Disassembler/Sparc/sparc-v9.txt create mode 100644 test/MC/ELF/ARM/directive-type-diagnostics.s create mode 100644 test/MC/ELF/align-zero.s create mode 100644 test/MC/ELF/div-by-zero.s create mode 100644 test/MC/ELF/empty-twice.ll create mode 100644 test/MC/Hexagon/asmMap.s create mode 100644 test/MC/Hexagon/capitalizedEndloop.s create mode 100644 test/MC/Hexagon/dcfetch.s create mode 100644 test/MC/Hexagon/empty_asm.s create mode 100644 test/MC/Hexagon/endloop.s create mode 100644 test/MC/Hexagon/got.s create mode 100644 test/MC/Hexagon/instructions/alu32_alu.s create mode 100644 test/MC/Hexagon/instructions/alu32_perm.s create mode 100644 test/MC/Hexagon/instructions/alu32_pred.s create mode 100644 test/MC/Hexagon/instructions/cr.s create mode 100644 test/MC/Hexagon/instructions/j.s create mode 100644 test/MC/Hexagon/instructions/jr.s create mode 100644 test/MC/Hexagon/instructions/ld.s create mode 100644 test/MC/Hexagon/instructions/memop.s create mode 100644 test/MC/Hexagon/instructions/nv_j.s create mode 100644 test/MC/Hexagon/instructions/nv_st.s create mode 100644 test/MC/Hexagon/instructions/st.s create mode 100644 test/MC/Hexagon/instructions/system_user.s create mode 100644 test/MC/Hexagon/instructions/xtype_alu.s create mode 100644 test/MC/Hexagon/instructions/xtype_bit.s create mode 100644 test/MC/Hexagon/instructions/xtype_complex.s create mode 100644 test/MC/Hexagon/instructions/xtype_fp.s create mode 100644 test/MC/Hexagon/instructions/xtype_mpy.s create mode 100644 test/MC/Hexagon/instructions/xtype_perm.s create mode 100644 test/MC/Hexagon/instructions/xtype_pred.s create mode 100644 test/MC/Hexagon/instructions/xtype_shift.s create mode 100644 test/MC/Hexagon/jumpdoublepound.s create mode 100644 test/MC/Hexagon/labels.s create mode 100644 test/MC/Hexagon/new-value-check.s create mode 100644 test/MC/Hexagon/out_of_range.s create mode 100644 test/MC/Hexagon/pcrel.s create mode 100644 test/MC/Hexagon/relaxed_newvalue.s create mode 100644 test/MC/Hexagon/test.s create mode 100644 test/MC/Hexagon/two_ext.s create mode 100644 test/MC/Hexagon/v60-alu.s create mode 100644 test/MC/Hexagon/v60-permute.s create mode 100644 test/MC/Hexagon/v60-shift.s create mode 100644 test/MC/Hexagon/v60-vcmp.s create mode 100644 test/MC/Hexagon/v60-vmem.s create mode 100644 test/MC/Hexagon/v60-vmpy-acc.s create mode 100644 test/MC/Hexagon/v60-vmpy1.s create mode 100644 test/MC/Hexagon/v60lookup.s create mode 100644 test/MC/MachO/AArch64/reloc-errors.s create mode 100644 test/MC/MachO/ARM/compact-unwind-armv7k.s create mode 100644 test/MC/MachO/ARM/tvos-version-min-load-command.s create mode 100644 test/MC/MachO/ARM/version-min-diagnostics2.s create mode 100644 test/MC/MachO/ARM/watchos-version-min-load-command.s create mode 100644 test/MC/MachO/PowerPC/coal-sections-powerpc.s create mode 100644 test/MC/MachO/PowerPC/lit.local.cfg delete mode 100644 test/MC/MachO/bad-darwin-x86_64-reloc-expr1.s delete mode 100644 test/MC/MachO/bad-darwin-x86_64-reloc-expr2.s create mode 100644 test/MC/MachO/coal-sections-x86_64.s create mode 100644 test/MC/MachO/darwin-version-min-load-command.s create mode 100644 test/MC/MachO/empty-twice.ll create mode 100644 test/MC/Mips/cnmips/invalid.s create mode 100644 test/MC/Mips/cprestore-bad.s create mode 100644 test/MC/Mips/cprestore-noreorder.s create mode 100644 test/MC/Mips/cprestore-reorder.s create mode 100644 test/MC/Mips/cprestore-warning-unused.s create mode 100644 test/MC/Mips/directive-ent.s create mode 100644 test/MC/Mips/dsp/invalid.s create mode 100644 test/MC/Mips/dsp/valid.s create mode 100644 test/MC/Mips/dspr2/invalid.s create mode 100644 test/MC/Mips/dspr2/valid.s create mode 100644 test/MC/Mips/eva/invalid-noeva-wrong-error.s create mode 100644 test/MC/Mips/eva/invalid-noeva.s create mode 100644 test/MC/Mips/eva/invalid.s create mode 100644 test/MC/Mips/eva/invalid_R6.s create mode 100644 test/MC/Mips/eva/valid_R6.s create mode 100644 test/MC/Mips/eva/valid_preR6.s create mode 100644 test/MC/Mips/expansion-jal-sym-pic.s create mode 100644 test/MC/Mips/instalias-imm-expanding.s create mode 100644 test/MC/Mips/macro-bcc-imm-bad.s create mode 100644 test/MC/Mips/macro-bcc-imm.s create mode 100644 test/MC/Mips/macro-ddiv-bad.s create mode 100644 test/MC/Mips/macro-ddiv.s create mode 100644 test/MC/Mips/macro-ddivu-bad.s create mode 100644 test/MC/Mips/macro-ddivu.s create mode 100644 test/MC/Mips/macro-div-bad.s create mode 100644 test/MC/Mips/macro-div.s create mode 100644 test/MC/Mips/macro-divu-bad.s create mode 100644 test/MC/Mips/macro-divu.s create mode 100644 test/MC/Mips/macro-dla.s create mode 100644 test/MC/Mips/macro-dli.s create mode 100644 test/MC/Mips/micromips-dsp/invalid-wrong-error.s create mode 100644 test/MC/Mips/micromips-dsp/invalid.s create mode 100644 test/MC/Mips/micromips-dsp/valid.s create mode 100644 test/MC/Mips/micromips-dspr2/invalid.s create mode 100644 test/MC/Mips/micromips-dspr2/valid.s create mode 100644 test/MC/Mips/micromips/invalid.s create mode 100644 test/MC/Mips/micromips64r6/invalid.s create mode 100644 test/MC/Mips/micromips64r6/valid.s delete mode 100644 test/MC/Mips/mips-dsp-instructions.s create mode 100644 test/MC/Mips/mips32r2/invalid-dsp.s create mode 100644 test/MC/Mips/mips32r2/invalid-dspr2.s create mode 100644 test/MC/Mips/mips32r2/invalid-msa.s create mode 100644 test/MC/Mips/mips32r5/invalid-mips32.s create mode 100644 test/MC/Mips/mips32r5/invalid-mips32r2.s create mode 100644 test/MC/Mips/mips32r5/invalid-mips32r3.s create mode 100644 test/MC/Mips/mips64r5/invalid-mips64.s create mode 100644 test/MC/Mips/mips64r5/invalid-mips64r2.s create mode 100644 test/MC/Mips/mips64r5/invalid-mips64r3.s create mode 100644 test/MC/Mips/msa/invalid-64.s create mode 100644 test/MC/Mips/msa/invalid.s create mode 100644 test/MC/Mips/reloc-directive-bad.s create mode 100644 test/MC/Mips/reloc-directive.s create mode 100644 test/MC/Mips/rotations32-bad.s create mode 100644 test/MC/Mips/rotations32.s create mode 100644 test/MC/Mips/rotations64.s create mode 100644 test/MC/PowerPC/pr24686.s create mode 100644 test/MC/Sparc/sparc-asm-errors.s create mode 100644 test/MC/X86/X86_64-pku.s create mode 100644 test/MC/X86/encoder-fail.s create mode 100644 test/MC/X86/intel-syntax-print.ll create mode 100644 test/MC/X86/large-bss.s create mode 100644 test/MC/X86/macho-reloc-errors-x86.s create mode 100644 test/MC/X86/macho-reloc-errors-x86_64.s create mode 100644 test/MC/X86/x86-64-avx512cd.s create mode 100644 test/MC/X86/x86-64-avx512cd_vl.s create mode 100644 test/MC/X86/x86-evenDirective.s create mode 100644 test/Object/AMDGPU/elf-definitios.yaml create mode 100644 test/Object/Inputs/coff-short-import-code create mode 100644 test/Object/Inputs/coff-short-import-data create mode 100755 test/Object/Inputs/corrupt-invalid-dynamic-table-offset.elf.x86-64 create mode 100755 test/Object/Inputs/corrupt-invalid-dynamic-table-size.elf.x86-64 create mode 100755 test/Object/Inputs/corrupt-invalid-dynamic-table-too-large.elf.x86-64 create mode 100755 test/Object/Inputs/corrupt-invalid-phentsize.elf.x86-64 create mode 100755 test/Object/Inputs/corrupt-invalid-relocation-size.elf.x86-64 create mode 100755 test/Object/Inputs/corrupt-invalid-strtab.elf.x86-64 create mode 100755 test/Object/Inputs/corrupt-invalid-virtual-addr.elf.x86-64 create mode 100755 test/Object/Inputs/invalid-symbol-table-size.elf create mode 100644 test/Object/Inputs/invalid-xindex-size.elf create mode 100755 test/Object/Inputs/main-ret-zero-pe-i386.dll create mode 100755 test/Object/Inputs/main-ret-zero-pe-i386.exe create mode 100644 test/Object/Inputs/no-section-header-string-table.elf-x86-64 create mode 100644 test/Object/Inputs/pr25877.lib create mode 100755 test/Object/Inputs/rel-no-sec-table.elf-x86-64 create mode 100644 test/Object/Inputs/shndx.elf create mode 100755 test/Object/Inputs/trivial-object-test.elf-avr create mode 100644 test/Object/nm-pe-image.test create mode 100644 test/Object/no-section-header-string-table.test create mode 100644 test/Object/objdump-shndx.test create mode 100644 test/Object/pr25877.test create mode 100644 test/Object/readobj-absent.test create mode 100644 test/Other/opt-twice.ll create mode 100644 test/TableGen/cast-list-initializer.td create mode 100644 test/TableGen/trydecode-emission.td create mode 100644 test/TableGen/trydecode-emission2.td create mode 100644 test/TableGen/trydecode-emission3.td create mode 100644 test/Transforms/AddDiscriminators/call.ll create mode 100644 test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll create mode 100644 test/Transforms/AddDiscriminators/diamond.ll create mode 100644 test/Transforms/AddDiscriminators/oneline.ll create mode 100644 test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll create mode 100644 test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll create mode 100644 test/Transforms/AtomicExpand/X86/lit.local.cfg create mode 100644 test/Transforms/CodeGenPrepare/AArch64/free-zext.ll create mode 100644 test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll create mode 100644 test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll create mode 100644 test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll create mode 100644 test/Transforms/CodeGenPrepare/X86/select.ll create mode 100644 test/Transforms/CodeGenPrepare/X86/widen_switch.ll create mode 100644 test/Transforms/CodeGenPrepare/invariant.group.ll create mode 100644 test/Transforms/CrossDSOCFI/basic.ll create mode 100644 test/Transforms/DeadArgElim/naked_functions.ll create mode 100644 test/Transforms/DeadArgElim/operandbundle.ll create mode 100644 test/Transforms/DeadStoreElimination/calloc-store.ll create mode 100644 test/Transforms/EarlyCSE/AArch64/ldstN.ll create mode 100644 test/Transforms/EarlyCSE/atomics.ll create mode 100644 test/Transforms/EarlyCSE/fence.ll create mode 100644 test/Transforms/ForcedFunctionAttrs/forced.ll delete mode 100644 test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll create mode 100644 test/Transforms/FunctionAttrs/nonnull.ll create mode 100644 test/Transforms/FunctionAttrs/norecurse.ll create mode 100644 test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll create mode 100644 test/Transforms/FunctionImport/Inputs/funcimport.ll create mode 100644 test/Transforms/FunctionImport/Inputs/funcimport_debug.ll create mode 100644 test/Transforms/FunctionImport/funcimport.ll create mode 100644 test/Transforms/FunctionImport/funcimport_debug.ll create mode 100644 test/Transforms/GVN/assume-equal.ll create mode 100644 test/Transforms/GVN/funclet.ll create mode 100644 test/Transforms/GVN/invariant.group.ll create mode 100644 test/Transforms/GVN/no_speculative_loads_with_asan.ll create mode 100644 test/Transforms/GVN/pr24426.ll create mode 100644 test/Transforms/GVN/pr25440.ll create mode 100644 test/Transforms/GlobalOpt/assume.ll create mode 100644 test/Transforms/GlobalOpt/available_externally_global_ctors.ll create mode 100644 test/Transforms/GlobalOpt/externally-initialized-aggregate.ll create mode 100644 test/Transforms/GlobalOpt/externally-initialized.ll create mode 100644 test/Transforms/GlobalOpt/global-demotion.ll create mode 100644 test/Transforms/GlobalOpt/invariant.group.barrier.ll create mode 100644 test/Transforms/GlobalOpt/localize-constexpr.ll create mode 100644 test/Transforms/IndVarSimplify/bec-cmp.ll create mode 100644 test/Transforms/IndVarSimplify/const_phi.ll create mode 100644 test/Transforms/IndVarSimplify/loop-invariant-conditions.ll create mode 100644 test/Transforms/IndVarSimplify/pr24356.ll create mode 100644 test/Transforms/IndVarSimplify/pr24783.ll create mode 100644 test/Transforms/IndVarSimplify/pr24804.ll create mode 100644 test/Transforms/IndVarSimplify/pr24952.ll create mode 100644 test/Transforms/IndVarSimplify/pr24956.ll create mode 100644 test/Transforms/IndVarSimplify/pr25047.ll create mode 100644 test/Transforms/IndVarSimplify/pr25051.ll create mode 100644 test/Transforms/IndVarSimplify/pr25060.ll create mode 100644 test/Transforms/IndVarSimplify/pr25360.ll create mode 100644 test/Transforms/IndVarSimplify/pr25421.ll create mode 100644 test/Transforms/IndVarSimplify/pr25578.ll create mode 100644 test/Transforms/IndVarSimplify/zext-nuw.ll rename test/Transforms/{FunctionAttrs/annotate-1.ll => InferFunctionAttrs/annotate.ll} (76%) create mode 100644 test/Transforms/Inline/alloca-dbgdeclare-merge.ll create mode 100644 test/Transforms/Inline/deopt-bundles.ll create mode 100644 test/Transforms/Inline/inline-assume.ll create mode 100644 test/Transforms/Inline/inline-cold-callee.ll create mode 100644 test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll create mode 100644 test/Transforms/Inline/inline-hot-callee.ll create mode 100644 test/Transforms/Inline/zero-cost.ll create mode 100644 test/Transforms/InstCombine/all-bits-shift.ll create mode 100644 test/Transforms/InstCombine/apint-or.ll delete mode 100644 test/Transforms/InstCombine/apint-or1.ll delete mode 100644 test/Transforms/InstCombine/apint-or2.ll create mode 100644 test/Transforms/InstCombine/bitcast-bitcast.ll create mode 100644 test/Transforms/InstCombine/bitreverse-fold.ll create mode 100644 test/Transforms/InstCombine/bitreverse-recognize.ll create mode 100644 test/Transforms/InstCombine/bswap-known-bits.ll create mode 100644 test/Transforms/InstCombine/call_nonnull_arg.ll create mode 100644 test/Transforms/InstCombine/cast-callee-deopt-bundles.ll create mode 100644 test/Transforms/InstCombine/compare-alloca.ll create mode 100644 test/Transforms/InstCombine/ctpop.ll create mode 100644 test/Transforms/InstCombine/demorgan-zext.ll create mode 100644 test/Transforms/InstCombine/fold-phi-load-metadata.ll create mode 100644 test/Transforms/InstCombine/lifetime.ll create mode 100644 test/Transforms/InstCombine/load-combine-metadata-2.ll create mode 100644 test/Transforms/InstCombine/load-combine-metadata-3.ll create mode 100644 test/Transforms/InstCombine/load-combine-metadata-4.ll create mode 100644 test/Transforms/InstCombine/log-pow-nofastmath.ll create mode 100644 test/Transforms/InstCombine/log-pow.ll create mode 100644 test/Transforms/InstCombine/minmax-fp.ll create mode 100644 test/Transforms/InstCombine/nonnull-attribute.ll create mode 100644 test/Transforms/InstCombine/phi-load-metadata-2.ll create mode 100644 test/Transforms/InstCombine/phi-load-metadata-3.ll create mode 100644 test/Transforms/InstCombine/phi-load-metadata.ll create mode 100644 test/Transforms/InstCombine/pow-4.ll create mode 100644 test/Transforms/InstCombine/pow-exp-nofastmath.ll create mode 100644 test/Transforms/InstCombine/pow-exp.ll create mode 100644 test/Transforms/InstCombine/pow-exp2.ll create mode 100644 test/Transforms/InstCombine/pow-sqrt.ll delete mode 100644 test/Transforms/InstCombine/pr20059.ll create mode 100644 test/Transforms/InstCombine/pr24605.ll create mode 100644 test/Transforms/InstCombine/pr25745.ll create mode 100644 test/Transforms/InstCombine/sqrt-nofast.ll create mode 100644 test/Transforms/InstCombine/tan-nofastmath.ll create mode 100644 test/Transforms/InstCombine/tan.ll create mode 100644 test/Transforms/InstCombine/token.ll create mode 100644 test/Transforms/InstCombine/x86-f16c.ll create mode 100644 test/Transforms/InstCombine/x86-pmovsx.ll create mode 100644 test/Transforms/InstCombine/x86-pmovzx.ll create mode 100644 test/Transforms/InstCombine/x86-pshufb.ll create mode 100644 test/Transforms/InstCombine/x86-sse4a.ll create mode 100644 test/Transforms/InstCombine/x86-vector-shifts.ll create mode 100644 test/Transforms/InstCombine/x86-xop.ll create mode 100644 test/Transforms/InstSimplify/add-mask.ll create mode 100644 test/Transforms/InstSimplify/bswap.ll create mode 100644 test/Transforms/InstSimplify/implies.ll create mode 100644 test/Transforms/InstSimplify/shift-128-kb.ll create mode 100644 test/Transforms/Internalize/comdat.ll create mode 100644 test/Transforms/JumpThreading/implied-cond.ll create mode 100644 test/Transforms/JumpThreading/phi-known.ll create mode 100644 test/Transforms/JumpThreading/update-edge-weight.ll create mode 100644 test/Transforms/LCSSA/mixed-catch.ll create mode 100644 test/Transforms/LICM/argmemonly-call.ll create mode 100644 test/Transforms/LoopDistribute/bounds-expansion-bug.ll create mode 100644 test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll create mode 100644 test/Transforms/LoopLoadElim/backward.ll create mode 100644 test/Transforms/LoopLoadElim/def-store-before-load.ll create mode 100644 test/Transforms/LoopLoadElim/forward.ll create mode 100644 test/Transforms/LoopLoadElim/memcheck.ll create mode 100644 test/Transforms/LoopLoadElim/multiple-stores-same-block.ll create mode 100644 test/Transforms/LoopLoadElim/unknown-dep.ll create mode 100644 test/Transforms/LoopReroll/negative.ll create mode 100644 test/Transforms/LoopReroll/reroll_with_dbg.ll create mode 100644 test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll create mode 100644 test/Transforms/LoopStrengthReduce/AMDGPU/lit.local.cfg create mode 100644 test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll create mode 100644 test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg create mode 100644 test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll create mode 100644 test/Transforms/LoopStrengthReduce/funclet.ll create mode 100644 test/Transforms/LoopStrengthReduce/pr25541.ll create mode 100644 test/Transforms/LoopStrengthReduce/sext-ind-var.ll create mode 100644 test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg create mode 100644 test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll delete mode 100644 test/Transforms/LoopUnroll/full-unroll-bad-geps.ll create mode 100644 test/Transforms/LoopUnroll/full-unroll-crashers.ll create mode 100644 test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll create mode 100644 test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll create mode 100644 test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll create mode 100644 test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll create mode 100644 test/Transforms/LoopUnroll/rebuild_lcssa.ll create mode 100644 test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll create mode 100644 test/Transforms/LoopUnswitch/LIV-loop-condtion.ll create mode 100644 test/Transforms/LoopUnswitch/cleanuppad.ll create mode 100644 test/Transforms/LoopUnswitch/cold-loop.ll create mode 100644 test/Transforms/LoopUnswitch/copy-metadata.ll create mode 100644 test/Transforms/LoopUnswitch/trivial-unswitch.ll create mode 100644 test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll create mode 100644 test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll create mode 100644 test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll create mode 100644 test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll create mode 100644 test/Transforms/LoopVectorize/ARM/interleaved_cost.ll create mode 100644 test/Transforms/LoopVectorize/ARM/vector_cast.ll create mode 100644 test/Transforms/LoopVectorize/PowerPC/agg-interleave-a2.ll create mode 100644 test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll create mode 100644 test/Transforms/LoopVectorize/X86/no_fpmath.ll create mode 100644 test/Transforms/LoopVectorize/X86/reg-usage.ll create mode 100644 test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll create mode 100644 test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll create mode 100644 test/Transforms/LoopVectorize/gep_with_bitcast.ll create mode 100644 test/Transforms/LoopVectorize/miniters.ll create mode 100644 test/Transforms/LoopVectorize/nontemporal.ll create mode 100644 test/Transforms/LoopVectorize/ptr-induction.ll create mode 100644 test/Transforms/LowerBitSets/function-ext.ll create mode 100644 test/Transforms/LowerBitSets/function.ll create mode 100644 test/Transforms/LowerBitSets/nonstring.ll create mode 100644 test/Transforms/LowerBitSets/pr25902.ll create mode 100644 test/Transforms/LowerSwitch/delete-default-block-crash.ll create mode 100644 test/Transforms/Mem2Reg/optnone.ll create mode 100644 test/Transforms/Mem2Reg/pr24179.ll create mode 100644 test/Transforms/MemCpyOpt/nontemporal.ll create mode 100644 test/Transforms/MergeFunc/apply_function_attributes.ll create mode 100644 test/Transforms/MergeFunc/constant-entire-value.ll create mode 100644 test/Transforms/MergeFunc/crash2.ll create mode 100644 test/Transforms/MergeFunc/gep-base-type.ll create mode 100644 test/Transforms/MergeFunc/merge-block-address-other-function.ll create mode 100644 test/Transforms/MergeFunc/merge-block-address.ll create mode 100644 test/Transforms/MergeFunc/merge-const-ptr-and-int.ll create mode 100644 test/Transforms/MergeFunc/merge-different-vector-types.ll create mode 100644 test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll create mode 100644 test/Transforms/MergeFunc/no-merge-block-address-other-function.ll create mode 100644 test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll create mode 100644 test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll create mode 100644 test/Transforms/MergeFunc/ranges-multiple.ll create mode 100644 test/Transforms/MergeFunc/self-referential-global.ll create mode 100644 test/Transforms/MergeFunc/undef-different-types.ll create mode 100644 test/Transforms/NaryReassociate/nary-mul.ll create mode 100644 test/Transforms/NaryReassociate/pr24301.ll create mode 100644 test/Transforms/PGOProfile/Inputs/branch1.proftext create mode 100644 test/Transforms/PGOProfile/Inputs/branch2.proftext create mode 100644 test/Transforms/PGOProfile/Inputs/criticaledge.proftext create mode 100644 test/Transforms/PGOProfile/Inputs/diag.proftext create mode 100644 test/Transforms/PGOProfile/Inputs/landingpad.proftext create mode 100644 test/Transforms/PGOProfile/Inputs/loop1.proftext create mode 100644 test/Transforms/PGOProfile/Inputs/loop2.proftext create mode 100644 test/Transforms/PGOProfile/Inputs/switch.proftext create mode 100644 test/Transforms/PGOProfile/branch1.ll create mode 100644 test/Transforms/PGOProfile/branch2.ll create mode 100644 test/Transforms/PGOProfile/criticaledge.ll create mode 100644 test/Transforms/PGOProfile/diag_mismatch.ll create mode 100644 test/Transforms/PGOProfile/diag_no_funcprofdata.ll create mode 100644 test/Transforms/PGOProfile/diag_no_profile.ll create mode 100644 test/Transforms/PGOProfile/landingpad.ll create mode 100644 test/Transforms/PGOProfile/loop1.ll create mode 100644 test/Transforms/PGOProfile/loop2.ll create mode 100644 test/Transforms/PGOProfile/single_bb.ll create mode 100644 test/Transforms/PGOProfile/switch.ll create mode 100644 test/Transforms/PruneEH/operand-bundles.ll create mode 100644 test/Transforms/Reassociate/fp-expr.ll create mode 100644 test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll create mode 100644 test/Transforms/Reassociate/vaarg_movable.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/base-vector.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/codegen-cond.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/basic.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/gc-relocate-creation.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocate-invoke-result.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/rewrite-invoke.ll create mode 100644 test/Transforms/SCCP/global-alias-constprop.ll create mode 100644 test/Transforms/SLPVectorizer/AArch64/horizontal.ll create mode 100644 test/Transforms/SLPVectorizer/AArch64/nontemporal.ll create mode 100644 test/Transforms/SLPVectorizer/X86/commutativity.ll create mode 100644 test/Transforms/SLPVectorizer/X86/pr23510.ll create mode 100644 test/Transforms/SLPVectorizer/X86/schedule_budget.ll create mode 100644 test/Transforms/SafeStack/AArch64/abi.ll create mode 100644 test/Transforms/SafeStack/AArch64/lit.local.cfg create mode 100644 test/Transforms/SafeStack/ARM/abi.ll create mode 100644 test/Transforms/SafeStack/ARM/lit.local.cfg create mode 100644 test/Transforms/SafeStack/ARM/setjmp.ll create mode 100644 test/Transforms/SafeStack/X86/abi.ll create mode 100644 test/Transforms/SafeStack/X86/lit.local.cfg create mode 100644 test/Transforms/SafeStack/byval.ll create mode 100644 test/Transforms/SafeStack/debug-loc.ll create mode 100644 test/Transforms/SafeStack/ret.ll create mode 100644 test/Transforms/SafeStack/store.ll create mode 100644 test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof create mode 100644 test/Transforms/SampleProfile/Inputs/coverage-warning.prof create mode 100644 test/Transforms/SampleProfile/Inputs/gcc-simple.afdo create mode 100644 test/Transforms/SampleProfile/Inputs/inline-coverage.prof create mode 100644 test/Transforms/SampleProfile/Inputs/inline-hint.prof create mode 100644 test/Transforms/SampleProfile/Inputs/inline.prof create mode 100644 test/Transforms/SampleProfile/Inputs/nolocinfo.prof create mode 100644 test/Transforms/SampleProfile/Inputs/offset.prof create mode 100644 test/Transforms/SampleProfile/Inputs/remarks.prof create mode 100644 test/Transforms/SampleProfile/cov-zero-samples.ll create mode 100644 test/Transforms/SampleProfile/coverage-warning.ll create mode 100644 test/Transforms/SampleProfile/gcc-simple.ll create mode 100644 test/Transforms/SampleProfile/inline-coverage.ll create mode 100644 test/Transforms/SampleProfile/inline-hint.ll create mode 100644 test/Transforms/SampleProfile/inline.ll create mode 100644 test/Transforms/SampleProfile/nolocinfo.ll create mode 100644 test/Transforms/SampleProfile/offset.ll create mode 100644 test/Transforms/SampleProfile/remarks.ll create mode 100644 test/Transforms/Scalarizer/store-bug.ll create mode 100644 test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll create mode 100644 test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll create mode 100644 test/Transforms/SimplifyCFG/ARM/lit.local.cfg create mode 100644 test/Transforms/SimplifyCFG/Mips/cttz-ctlz.ll create mode 100644 test/Transforms/SimplifyCFG/Mips/lit.local.cfg create mode 100644 test/Transforms/SimplifyCFG/PR25267.ll create mode 100644 test/Transforms/SimplifyCFG/empty-cleanuppad.ll create mode 100644 test/Transforms/SimplifyCFG/implied-cond.ll create mode 100644 test/Transforms/SimplifyCFG/merge-cond-stores-2.ll create mode 100644 test/Transforms/SimplifyCFG/merge-cond-stores.ll create mode 100644 test/Transforms/SimplifyCFG/no_speculative_loads_with_asan.ll create mode 100644 test/Transforms/SimplifyCFG/preserve-load-metadata-2.ll create mode 100644 test/Transforms/SimplifyCFG/preserve-load-metadata-3.ll create mode 100644 test/Transforms/SimplifyCFG/preserve-load-metadata.ll create mode 100644 test/Transforms/SimplifyCFG/preserve-make-implicit-on-switch-to-br.ll create mode 100644 test/Transforms/SimplifyCFG/switch-dead-default.ll create mode 100644 test/Transforms/SimplifyCFG/wineh-unreachable.ll create mode 100644 test/Transforms/Sink/catchswitch.ll create mode 100644 test/Transforms/Sink/landingpad.ll create mode 100644 test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll create mode 100644 test/Transforms/StripDeadPrototypes/basic.ll create mode 100644 test/Transforms/TailCallElim/notail.ll create mode 100644 test/Transforms/Util/simplify-dbg-declare-load.ll create mode 100644 test/Verifier/align-md.ll create mode 100644 test/Verifier/atomics.ll create mode 100644 test/Verifier/dbg-null-retained-type.ll create mode 100644 test/Verifier/dereferenceable-md.ll create mode 100644 test/Verifier/func-dbg.ll create mode 100644 test/Verifier/invalid-eh.ll delete mode 100644 test/Verifier/invalid-patchable-statepoint.ll create mode 100644 test/Verifier/metadata-function-dbg.ll create mode 100644 test/Verifier/operand-bundles.ll create mode 100644 test/Verifier/token1.ll create mode 100644 test/Verifier/token2.ll create mode 100644 test/Verifier/token3.ll create mode 100644 test/Verifier/token4.ll create mode 100644 test/Verifier/token5.ll create mode 100644 test/Verifier/token6.ll create mode 100644 test/Verifier/token7.ll create mode 100644 test/tools/dsymutil/ARM/dummy-debug-map-amr64.map create mode 100644 test/tools/dsymutil/ARM/empty-map.test create mode 100644 test/tools/dsymutil/ARM/fat-arch-name.test create mode 100644 test/tools/dsymutil/ARM/fat-arch-not-found.test create mode 100644 test/tools/dsymutil/ARM/inlined-low_pc.c create mode 100644 test/tools/dsymutil/ARM/lit.local.cfg create mode 100755 test/tools/dsymutil/Inputs/absolute_sym.macho.i386 create mode 100644 test/tools/dsymutil/Inputs/absolute_sym.macho.i386.o create mode 100755 test/tools/dsymutil/Inputs/basic.macho.i386 create mode 100644 test/tools/dsymutil/Inputs/basic2-custom-linetable.macho.x86_64.o create mode 100644 test/tools/dsymutil/Inputs/dead-stripped/1.o create mode 100644 test/tools/dsymutil/Inputs/empty_range/1.o create mode 100755 test/tools/dsymutil/Inputs/fat-test.arm.dylib create mode 100644 test/tools/dsymutil/Inputs/fat-test.arm.o create mode 100644 test/tools/dsymutil/Inputs/fat-test.c create mode 100755 test/tools/dsymutil/Inputs/fat-test.dylib create mode 100644 test/tools/dsymutil/Inputs/fat-test.o create mode 100644 test/tools/dsymutil/Inputs/inlined-low_pc/1.o create mode 100644 test/tools/dsymutil/Inputs/libfat-test.a create mode 100644 test/tools/dsymutil/Inputs/mismatch/1.o create mode 100644 test/tools/dsymutil/Inputs/mismatch/mismatch.pcm create mode 100644 test/tools/dsymutil/Inputs/modules/1.o create mode 100644 test/tools/dsymutil/Inputs/modules/Bar.pcm create mode 100644 test/tools/dsymutil/Inputs/modules/Foo.pcm create mode 100644 test/tools/dsymutil/Inputs/odr-anon-namespace/1.o create mode 100644 test/tools/dsymutil/Inputs/odr-anon-namespace/2.o create mode 100644 test/tools/dsymutil/Inputs/odr-member-functions/1.o create mode 100644 test/tools/dsymutil/Inputs/odr-member-functions/2.o create mode 100644 test/tools/dsymutil/Inputs/odr-member-functions/3.o create mode 100644 test/tools/dsymutil/Inputs/odr-uniquing/1.o create mode 100644 test/tools/dsymutil/Inputs/odr-uniquing/2.o create mode 100644 test/tools/dsymutil/Inputs/submodules/1.o create mode 100644 test/tools/dsymutil/Inputs/submodules/Parent.pcm create mode 100644 test/tools/dsymutil/X86/basic-linking-bundle.test create mode 100644 test/tools/dsymutil/X86/custom-line-table.test create mode 100644 test/tools/dsymutil/X86/dead-stripped.cpp create mode 100644 test/tools/dsymutil/X86/dsym-companion.test create mode 100644 test/tools/dsymutil/X86/dummy-debug-map.map create mode 100644 test/tools/dsymutil/X86/empty_range.s create mode 100644 test/tools/dsymutil/X86/fat-archive-input-i386.test create mode 100644 test/tools/dsymutil/X86/fat-object-input-x86_64.test create mode 100644 test/tools/dsymutil/X86/fat-object-input-x86_64h.test create mode 100644 test/tools/dsymutil/X86/mismatch.m create mode 100644 test/tools/dsymutil/X86/modules.m create mode 100644 test/tools/dsymutil/X86/multiple-inputs.test create mode 100644 test/tools/dsymutil/X86/odr-anon-namespace.cpp create mode 100644 test/tools/dsymutil/X86/odr-member-functions.cpp create mode 100644 test/tools/dsymutil/X86/odr-uniquing.cpp create mode 100644 test/tools/dsymutil/X86/submodules.m create mode 100644 test/tools/dsymutil/absolute_symbol.test create mode 100644 test/tools/dsymutil/arch-option.test create mode 100644 test/tools/dsymutil/archive-timestamp.test create mode 100644 test/tools/dsymutil/dump-symtab.test create mode 100644 test/tools/dsymutil/fat-binary-output.test delete mode 100644 test/tools/gold/Inputs/linkonce-weak.ll rename test/tools/gold/{ => PowerPC}/lit.local.cfg (75%) rename test/tools/gold/{ => PowerPC}/mtriple.ll (100%) rename test/tools/gold/{ => X86}/Inputs/alias-1.ll (100%) create mode 100644 test/tools/gold/X86/Inputs/available-externally.ll rename test/tools/gold/{ => X86}/Inputs/bcsection.s (100%) rename test/tools/gold/{ => X86}/Inputs/comdat.ll (69%) create mode 100644 test/tools/gold/X86/Inputs/comdat2.ll rename test/tools/gold/{ => X86}/Inputs/common.ll (100%) create mode 100644 test/tools/gold/X86/Inputs/ctors2.ll rename test/tools/gold/{ => X86}/Inputs/drop-debug.bc (100%) create mode 100644 test/tools/gold/X86/Inputs/drop-linkage.ll rename test/tools/gold/{ => X86}/Inputs/invalid.bc (100%) rename test/tools/gold/{ => X86}/Inputs/linker-script.export (100%) create mode 100644 test/tools/gold/X86/Inputs/linkonce-weak.ll rename test/tools/gold/{ => X86}/Inputs/pr19901-1.ll (100%) create mode 100644 test/tools/gold/X86/Inputs/resolve-to-alias.ll create mode 100644 test/tools/gold/X86/Inputs/thinlto.ll create mode 100644 test/tools/gold/X86/Inputs/type-merge.ll create mode 100644 test/tools/gold/X86/Inputs/type-merge2.ll rename test/tools/gold/{ => X86}/Inputs/weak.ll (100%) rename test/tools/gold/{ => X86}/alias.ll (92%) create mode 100644 test/tools/gold/X86/alias2.ll rename test/tools/gold/{linkonce-weak.ll => X86/available-externally.ll} (54%) rename test/tools/gold/{ => X86}/bad-alias.ll (64%) create mode 100644 test/tools/gold/X86/bcsection.ll rename test/tools/gold/{ => X86}/coff.ll (100%) create mode 100644 test/tools/gold/X86/comdat.ll create mode 100644 test/tools/gold/X86/comdat2.ll rename test/tools/gold/{ => X86}/common.ll (100%) create mode 100644 test/tools/gold/X86/ctors.ll create mode 100644 test/tools/gold/X86/ctors2.ll create mode 100644 test/tools/gold/X86/disable-verify.ll rename test/tools/gold/{ => X86}/drop-debug.ll (100%) create mode 100644 test/tools/gold/X86/drop-linkage.ll rename test/tools/gold/{ => X86}/emit-llvm.ll (78%) rename test/tools/gold/{ => X86}/invalid.ll (100%) rename test/tools/gold/{ => X86}/linker-script.ll (100%) create mode 100644 test/tools/gold/X86/linkonce-weak.ll create mode 100644 test/tools/gold/X86/lit.local.cfg rename test/tools/gold/{ => X86}/no-map-whole-file.ll (100%) rename test/tools/gold/{ => X86}/opt-level.ll (100%) create mode 100644 test/tools/gold/X86/parallel.ll rename test/tools/gold/{ => X86}/pr19901.ll (100%) create mode 100644 test/tools/gold/X86/pr25907.ll create mode 100644 test/tools/gold/X86/pr25915.ll rename test/tools/gold/{ => X86}/remarks.ll (69%) create mode 100644 test/tools/gold/X86/resolve-to-alias.ll rename test/tools/gold/{ => X86}/slp-vectorize.ll (100%) rename test/tools/gold/{ => X86}/stats.ll (100%) create mode 100644 test/tools/gold/X86/thinlto.ll create mode 100644 test/tools/gold/X86/type-merge.ll create mode 100644 test/tools/gold/X86/type-merge2.ll create mode 100644 test/tools/gold/X86/unnamed-addr.ll rename test/tools/gold/{ => X86}/vectorize.ll (100%) rename test/tools/gold/{ => X86}/weak.ll (100%) delete mode 100644 test/tools/gold/bcsection.ll delete mode 100644 test/tools/gold/comdat.ll create mode 100644 test/tools/llvm-dwp/Inputs/simple/notypes/a.dwo create mode 100644 test/tools/llvm-dwp/Inputs/simple/notypes/b.dwo create mode 100644 test/tools/llvm-dwp/Inputs/simple/types/a.dwo create mode 100644 test/tools/llvm-dwp/Inputs/simple/types/b.dwo create mode 100644 test/tools/llvm-dwp/Inputs/type_dedup/a.dwo create mode 100644 test/tools/llvm-dwp/Inputs/type_dedup/b.dwo create mode 100644 test/tools/llvm-dwp/X86/lit.local.cfg create mode 100644 test/tools/llvm-dwp/X86/simple.test create mode 100644 test/tools/llvm-dwp/X86/type_dedup.test create mode 100644 test/tools/llvm-lto/Inputs/thinlto.ll create mode 100644 test/tools/llvm-lto/thinlto.ll create mode 100644 test/tools/llvm-mc/basic.test create mode 100644 test/tools/llvm-mc/fatal_warnings.test create mode 100644 test/tools/llvm-mc/lit.local.cfg create mode 100644 test/tools/llvm-mc/no_warnings.test create mode 100644 test/tools/llvm-nm/X86/IRobj.test create mode 100644 test/tools/llvm-nm/X86/Inputs/hello.obj.macho-x86_64 create mode 100644 test/tools/llvm-nm/X86/Inputs/test.IRobj-x86_64 create mode 100644 test/tools/llvm-nm/X86/externalonly.test create mode 100644 test/tools/llvm-nm/X86/groupingflags.test create mode 100644 test/tools/llvm-nm/X86/posixMachO.test create mode 100644 test/tools/llvm-nm/lit.local.cfg create mode 100644 test/tools/llvm-objdump/AArch64/elf-aarch64-mapping-symbols.test create mode 100644 test/tools/llvm-objdump/Inputs/eh_frame.macho-arm64 create mode 100644 test/tools/llvm-objdump/Inputs/libbogus1.a create mode 100644 test/tools/llvm-objdump/Inputs/libbogus2.a create mode 100644 test/tools/llvm-objdump/Inputs/libbogus3.a create mode 100644 test/tools/llvm-objdump/Inputs/section-filter.obj create mode 100644 test/tools/llvm-objdump/X86/Inputs/disassemble-data.obj create mode 100755 test/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386 create mode 100755 test/tools/llvm-objdump/X86/Inputs/internal.exe.coff-x86_64 create mode 100644 test/tools/llvm-objdump/X86/Inputs/malformed-machos/00000031.a create mode 100644 test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0001.macho create mode 100644 test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0006.macho create mode 100644 test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0010.macho create mode 100644 test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0040.macho create mode 100644 test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0080.macho create mode 100644 test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0261.macho create mode 100644 test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0337.macho create mode 100644 test/tools/llvm-objdump/X86/coff-dis-internal.test create mode 100644 test/tools/llvm-objdump/X86/coff-disassemble-export.test create mode 100644 test/tools/llvm-objdump/X86/disassemble-data.test create mode 100644 test/tools/llvm-objdump/X86/malformed-machos.test create mode 100644 test/tools/llvm-objdump/eh_frame-arm64.test create mode 100644 test/tools/llvm-objdump/malformed-archives.test create mode 100644 test/tools/llvm-objdump/section-filter.test create mode 100644 test/tools/llvm-profdata/Inputs/basic.proftext create mode 100644 test/tools/llvm-profdata/Inputs/compat.profdata.v2 create mode 100644 test/tools/llvm-profdata/Inputs/gcc-sample-profile.gcov create mode 100644 test/tools/llvm-profdata/Inputs/inline-samples.afdo create mode 100644 test/tools/llvm-profdata/Inputs/overflow-instr.proftext create mode 100644 test/tools/llvm-profdata/Inputs/overflow-sample.proftext create mode 100644 test/tools/llvm-profdata/Inputs/text-format-errors.text.bin create mode 100644 test/tools/llvm-profdata/Inputs/vp-malform.proftext create mode 100644 test/tools/llvm-profdata/Inputs/vp-malform2.proftext create mode 100644 test/tools/llvm-profdata/Inputs/vp-truncate.proftext create mode 100644 test/tools/llvm-profdata/Inputs/weight-instr-bar.profdata create mode 100644 test/tools/llvm-profdata/Inputs/weight-instr-foo.profdata create mode 100644 test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext create mode 100644 test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext create mode 100644 test/tools/llvm-profdata/gcc-gcov-sample-profile.test create mode 100644 test/tools/llvm-profdata/inline-samples.test create mode 100644 test/tools/llvm-profdata/overflow-instr.test create mode 100644 test/tools/llvm-profdata/overflow-sample.test delete mode 100644 test/tools/llvm-profdata/overflow.proftext create mode 100644 test/tools/llvm-profdata/text-dump.test create mode 100644 test/tools/llvm-profdata/value-prof.proftext create mode 100644 test/tools/llvm-profdata/weight-instr.test create mode 100644 test/tools/llvm-profdata/weight-sample.test create mode 100644 test/tools/llvm-readobj/Inputs/gnuhash.so.elf-i386 create mode 100644 test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc create mode 100644 test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc64 create mode 100644 test/tools/llvm-readobj/Inputs/gnuhash.so.elf-x86_64 create mode 100755 test/tools/llvm-readobj/Inputs/mips-rld-map-rel.elf-mipsel create mode 100755 test/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveri create mode 100755 test/tools/llvm-readobj/Inputs/verdef.elf-x86-64 create mode 100644 test/tools/llvm-readobj/amdgpu-elf-defs.test create mode 100644 test/tools/llvm-readobj/basic.test create mode 100644 test/tools/llvm-readobj/elf-gnuhash.test create mode 100644 test/tools/llvm-readobj/elf-versioninfo.test create mode 100644 test/tools/llvm-readobj/mips-rld-map-rel.test create mode 100644 test/tools/llvm-size/basic.test create mode 100644 test/tools/llvm-split/alias.ll create mode 100644 test/tools/llvm-split/comdat.ll create mode 100644 test/tools/llvm-split/function.ll create mode 100644 test/tools/llvm-split/global.ll create mode 100644 test/tools/llvm-split/internal.ll create mode 100644 test/tools/llvm-split/unnamed.ll create mode 100755 test/tools/llvm-symbolizer/Inputs/addr.exe create mode 100644 test/tools/llvm-symbolizer/Inputs/addr.inp create mode 100644 test/tools/llvm-symbolizer/Inputs/coff-dwarf.cpp create mode 100644 test/tools/llvm-symbolizer/Inputs/coff-dwarf.exe create mode 100644 test/tools/llvm-symbolizer/Inputs/coff-exports.cpp create mode 100644 test/tools/llvm-symbolizer/Inputs/coff-exports.exe create mode 100644 test/tools/llvm-symbolizer/coff-dwarf.test create mode 100644 test/tools/llvm-symbolizer/coff-exports.test delete mode 100644 test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input create mode 100644 test/tools/llvm-symbolizer/sym.test create mode 100644 test/tools/sancov/Inputs/blacklist.txt create mode 100644 test/tools/sancov/Inputs/foo.cpp create mode 100755 test/tools/sancov/Inputs/test-linux_x86_64 create mode 100644 test/tools/sancov/Inputs/test-linux_x86_64-1.sancov create mode 100644 test/tools/sancov/Inputs/test-linux_x86_64.sancov create mode 100644 test/tools/sancov/Inputs/test.cpp create mode 100644 test/tools/sancov/blacklist.test create mode 100644 test/tools/sancov/covered_functions.test create mode 100644 test/tools/sancov/not_covered_functions.test create mode 100644 test/tools/sancov/print.test create mode 100644 tools/dsymutil/MachOUtils.cpp create mode 100644 tools/dsymutil/MachOUtils.h create mode 100644 tools/dsymutil/NonRelocatableStringpool.h delete mode 100644 tools/llvm-ar/install_symlink.cmake create mode 100644 tools/llvm-as-fuzzer/CMakeLists.txt create mode 100644 tools/llvm-as-fuzzer/llvm-as-fuzzer.cpp create mode 100644 tools/llvm-dwp/CMakeLists.txt create mode 100644 tools/llvm-dwp/LLVMBuild.txt create mode 100644 tools/llvm-dwp/Makefile create mode 100644 tools/llvm-dwp/llvm-dwp.cpp create mode 100644 tools/llvm-mc-fuzzer/CMakeLists.txt create mode 100644 tools/llvm-mc-fuzzer/llvm-mc-fuzzer.cpp create mode 100644 tools/llvm-readobj/COFFImportDumper.cpp create mode 100644 tools/llvm-split/CMakeLists.txt rename tools/{macho-dump => llvm-split}/LLVMBuild.txt (79%) rename tools/{macho-dump => llvm-split}/Makefile (71%) create mode 100644 tools/llvm-split/llvm-split.cpp delete mode 100644 tools/llvm-symbolizer/LLVMSymbolize.cpp delete mode 100644 tools/llvm-symbolizer/LLVMSymbolize.h delete mode 100644 tools/macho-dump/CMakeLists.txt delete mode 100644 tools/macho-dump/macho-dump.cpp create mode 100644 tools/sancov/CMakeLists.txt create mode 100644 tools/sancov/Makefile create mode 100644 tools/sancov/sancov.cc create mode 100644 tools/xcode-toolchain/CMakeLists.txt create mode 100644 unittests/ADT/RangeAdapterTest.cpp create mode 100644 unittests/Analysis/ValueTrackingTest.cpp create mode 100644 unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp create mode 100644 unittests/ExecutionEngine/Orc/GlobalMappingLayerTest.cpp create mode 100644 unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp create mode 100644 unittests/ExecutionEngine/Orc/OrcCAPITest.cpp create mode 100644 unittests/ProfileData/SampleProfTest.cpp create mode 100644 unittests/Support/ReplaceFileTest.cpp create mode 100644 unittests/Support/ThreadPool.cpp create mode 100644 unittests/Support/TimerTest.cpp create mode 100644 unittests/Support/TrailingObjectsTest.cpp create mode 100644 utils/TableGen/Attributes.cpp create mode 100755 utils/lit/tests/Inputs/googletest-timeout/DummySubDir/OneTest create mode 100644 utils/lit/tests/Inputs/googletest-timeout/lit.cfg create mode 100644 utils/lit/tests/Inputs/shtest-timeout/infinite_loop.py create mode 100644 utils/lit/tests/Inputs/shtest-timeout/lit.cfg create mode 100644 utils/lit/tests/Inputs/shtest-timeout/quick_then_slow.py create mode 100644 utils/lit/tests/Inputs/shtest-timeout/short.py create mode 100644 utils/lit/tests/Inputs/shtest-timeout/slow.py create mode 100644 utils/lit/tests/googletest-timeout.py create mode 100644 utils/lit/tests/shtest-timeout.py create mode 100755 utils/release/build_llvm_package.bat create mode 100644 utils/schedcover.py diff --git a/.clang-tidy b/.clang-tidy index 3186da43d43d..97fbe23333bd 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1 +1,13 @@ -Checks: '-*,clang-diagnostic-*,llvm-*,misc-*' +Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,readability-identifier-naming' +CheckOptions: + - key: readability-identifier-naming.ClassCase + value: CamelCase + - key: readability-identifier-naming.EnumCase + value: CamelCase + - key: readability-identifier-naming.FunctionCase + value: lowerCase + - key: readability-identifier-naming.UnionCase + value: CamelCase + - key: readability-identifier-naming.VariableCase + value: CamelCase + diff --git a/CMakeLists.txt b/CMakeLists.txt index 78fc78b11781..3d2093fde634 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,13 +26,41 @@ else() set(cmake_3_2_USES_TERMINAL USES_TERMINAL) endif() -project(LLVM) +if(NOT DEFINED LLVM_VERSION_MAJOR) + set(LLVM_VERSION_MAJOR 3) +endif() +if(NOT DEFINED LLVM_VERSION_MINOR) + set(LLVM_VERSION_MINOR 8) +endif() +if(NOT DEFINED LLVM_VERSION_PATCH) + set(LLVM_VERSION_PATCH 0) +endif() +if(NOT DEFINED LLVM_VERSION_SUFFIX) + set(LLVM_VERSION_SUFFIX svn) +endif() + +if (POLICY CMP0048) + cmake_policy(SET CMP0048 NEW) + set(cmake_3_0_PROJ_VERSION + VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}) + set(cmake_3_0_LANGUAGES LANGUAGES) +endif() + +if (NOT PACKAGE_VERSION) + set(PACKAGE_VERSION + "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}") +endif() + +project(LLVM + ${cmake_3_0_PROJ_VERSION} + ${cmake_3_0_LANGUAGES} + C CXX ASM) # The following only works with the Ninja generator in CMake >= 3.0. set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING "Define the maximum number of concurrent compilation jobs.") if(LLVM_PARALLEL_COMPILE_JOBS) - if(CMAKE_VERSION VERSION_LESS 3.0 OR NOT CMAKE_MAKE_PROGRAM MATCHES "ninja$") + if(CMAKE_VERSION VERSION_LESS 3.0 OR NOT CMAKE_MAKE_PROGRAM MATCHES "ninja") message(WARNING "Job pooling is only available with Ninja generators and CMake 3.0 and later.") else() set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${LLVM_PARALLEL_COMPILE_JOBS}) @@ -43,7 +71,7 @@ endif() set(LLVM_PARALLEL_LINK_JOBS "" CACHE STRING "Define the maximum number of concurrent link jobs.") if(LLVM_PARALLEL_LINK_JOBS) - if(CMAKE_VERSION VERSION_LESS 3.0 OR NOT CMAKE_MAKE_PROGRAM MATCHES "ninja$") + if(CMAKE_VERSION VERSION_LESS 3.0 OR NOT CMAKE_MAKE_PROGRAM MATCHES "ninja") message(WARNING "Job pooling is only available with Ninja generators and CMake 3.0 and later.") else() set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${LLVM_PARALLEL_LINK_JOBS}) @@ -58,15 +86,9 @@ set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules" ) -set(LLVM_VERSION_MAJOR 3) -set(LLVM_VERSION_MINOR 7) -set(LLVM_VERSION_PATCH 1) -set(LLVM_VERSION_SUFFIX "") - -if (NOT PACKAGE_VERSION) - set(PACKAGE_VERSION - "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}") -endif() +# Generate a CompilationDatabase (compile_commands.json file) for our build, +# for use by clang_complete, YouCompleteMe, etc. +set(CMAKE_EXPORT_COMPILE_COMMANDS 1) option(LLVM_INSTALL_UTILS "Include utility binaries in the 'install' target." OFF) @@ -152,6 +174,11 @@ endif() string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) +if (CMAKE_BUILD_TYPE AND + NOT uppercase_CMAKE_BUILD_TYPE MATCHES "^(DEBUG|RELEASE|RELWITHDEBINFO|MINSIZEREL)$") + message(FATAL_ERROR "Invalid value for CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") +endif() + set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" ) # They are used as destination of target generators. @@ -278,6 +305,9 @@ endif( LLVM_USE_INTEL_JITEVENTS ) option(LLVM_USE_OPROFILE "Use opagent JIT interface to inform OProfile about JIT code" OFF) +option(LLVM_EXTERNALIZE_DEBUGINFO + "Generate dSYM files and strip executables and libraries (Darwin Only)" OFF) + # If enabled, verify we are on a platform that supports oprofile. if( LLVM_USE_OPROFILE ) if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) @@ -304,7 +334,7 @@ endif() # Define the default arguments to use with 'lit', and an option for the user to # override. set(LIT_ARGS_DEFAULT "-sv") -if (MSVC OR XCODE) +if (MSVC_IDE OR XCODE) set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar") endif() set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit") @@ -333,6 +363,7 @@ option(LLVM_INCLUDE_EXAMPLES "Generate build targets for the LLVM examples" ON) option(LLVM_BUILD_TESTS "Build LLVM unit tests. If OFF, just generate build targets." OFF) option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON) +option(LLVM_INCLUDE_GO_TESTS "Include the Go bindings tests in test build targets." ON) option (LLVM_BUILD_DOCS "Build the llvm documentation." OFF) option (LLVM_INCLUDE_DOCS "Generate build targets for llvm documentation." ON) @@ -342,9 +373,25 @@ option (LLVM_ENABLE_SPHINX "Use Sphinx to generate llvm documentation." OFF) option (LLVM_BUILD_EXTERNAL_COMPILER_RT "Build compiler-rt as an external project." OFF) -option(LLVM_BUILD_LLVM_DYLIB "Build libllvm dynamic library" OFF) -option(LLVM_DYLIB_EXPORT_ALL "Export all symbols from libLLVM.dylib (default is C API only" OFF) -option(LLVM_DISABLE_LLVM_DYLIB_ATEXIT "Disable llvm-shlib's atexit destructors." ON) +# You can configure which libraries from LLVM you want to include in the +# shared library by setting LLVM_DYLIB_COMPONENTS to a semi-colon delimited +# list of LLVM components. All component names handled by llvm-config are valid. +if(NOT DEFINED LLVM_DYLIB_COMPONENTS) + set(LLVM_DYLIB_COMPONENTS "all" CACHE STRING + "Semicolon-separated list of components to include in libLLVM, or \"all\".") +endif() +option(LLVM_LINK_LLVM_DYLIB "Link tools against the libllvm dynamic library" OFF) +option(LLVM_BUILD_LLVM_C_DYLIB "Build libllvm-c re-export library (Darwin Only)" OFF) +set(LLVM_BUILD_LLVM_DYLIB_default OFF) +if(LLVM_LINK_LLVM_DYLIB OR LLVM_BUILD_LLVM_C_DYLIB) + set(LLVM_BUILD_LLVM_DYLIB_default ON) +endif() +option(LLVM_BUILD_LLVM_DYLIB "Build libllvm dynamic library" ${LLVM_BUILD_LLVM_DYLIB_default}) +set(LLVM_DISABLE_LLVM_DYLIB_ATEXIT_DEFAULT ON) +if (LLVM_LINK_LLVM_DYLIB) + set(LLVM_DISABLE_LLVM_DYLIB_ATEXIT_DEFAULT OFF) +endif() +option(LLVM_DISABLE_LLVM_DYLIB_ATEXIT "Disable llvm-shlib's atexit destructors." ${LLVM_DISABLE_LLVM_DYLIB_ATEXIT_DEFAULT}) if(LLVM_DISABLE_LLVM_DYLIB_ATEXIT) set(DISABLE_LLVM_DYLIB_ATEXIT 1) endif() @@ -525,6 +572,15 @@ else(UNIX) endif(NOT DEFINED CMAKE_INSTALL_RPATH) endif() +if(APPLE AND DARWIN_LTO_LIBRARY) + set(CMAKE_EXE_LINKER_FLAGS + "${CMAKE_EXE_LINKER_FLAGS} -Wl,-lto_library -Wl,${DARWIN_LTO_LIBRARY}") + set(CMAKE_SHARED_LINKER_FLAGS + "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-lto_library -Wl,${DARWIN_LTO_LIBRARY}") + set(CMAKE_MODULE_LINKER_FLAGS + "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-lto_library -Wl,${DARWIN_LTO_LIBRARY}") +endif() + # Work around a broken bfd ld behavior. When linking a binary with a # foo.so library, it will try to find any library that foo.so uses and # check its symbols. This is wasteful (the check was done when foo.so @@ -543,6 +599,10 @@ include_directories( ${LLVM_INCLUDE_DIR} ${LLVM_MAIN_INCLUDE_DIR}) if(LLVM_USE_HOST_TOOLS) include(CrossCompile) endif(LLVM_USE_HOST_TOOLS) +if(LLVM_TARGET_IS_CROSSCOMPILE_HOST) +# Dummy use to avoid CMake Wraning: Manually-specified variables were not used +# (this is a variable that CrossCompile sets on recursive invocations) +endif() if(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)") # On FreeBSD, /usr/local/* is not used by default. In order to build LLVM @@ -559,6 +619,17 @@ endif( ${CMAKE_SYSTEM_NAME} MATCHES SunOS ) # use export_executable_symbols(target). set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") +set(LLVM_PROFDATA_FILE "" CACHE FILEPATH + "Profiling data file to use when compiling in order to improve runtime performance.") + +if(LLVM_PROFDATA_FILE AND EXISTS ${LLVM_PROFDATA_FILE}) + if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" ) + add_definitions("-fprofile-instr-use=${LLVM_PROFDATA_FILE}") + else() + message(FATAL_ERROR "LLVM_PROFDATA_FILE can only be specified when compiling with clang") + endif() +endif() + include(AddLLVM) include(TableGen) @@ -619,6 +690,13 @@ if( LLVM_INCLUDE_EXAMPLES ) endif() if( LLVM_INCLUDE_TESTS ) + if(EXISTS ${LLVM_MAIN_SRC_DIR}/projects/test-suite AND TARGET clang) + include(LLVMExternalProjectUtils) + llvm_ExternalProject_Add(test-suite ${LLVM_MAIN_SRC_DIR}/projects/test-suite + USE_TOOLCHAIN + EXCLUDE_FROM_ALL + NO_INSTALL) + endif() add_subdirectory(test) add_subdirectory(unittests) if (MSVC) diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT index a15f2919cc78..da0e7a471738 100644 --- a/CODE_OWNERS.TXT +++ b/CODE_OWNERS.TXT @@ -31,7 +31,7 @@ D: Config, ADT, Support, inlining & related passes, SROA/mem2reg & related passe N: Evan Cheng E: evan.cheng@apple.com -D: ARM target, parts of code generator not covered by someone else +D: parts of code generator not covered by someone else N: Eric Christopher E: echristo@gmail.com @@ -53,10 +53,6 @@ N: Quentin Colombet E: qcolombet@apple.com D: Register allocators -N: Anshuman Dasgupta -E: adasgupt@codeaurora.org -D: Hexagon Backend - N: Duncan P. N. Exon Smith E: dexonsmith@apple.com D: Branch weights and BlockFrequencyInfo @@ -93,10 +89,6 @@ N: Lang Hames E: lhames@gmail.com D: MCJIT, RuntimeDyld and JIT event listeners -N: David Majnemer -E: david.majnemer@gmail.com -D: IR Constant Folder - N: Galina Kistanova E: gkistanova@gmail.com D: LLVM Buildbot @@ -118,9 +110,17 @@ E: sabre@nondot.org W: http://nondot.org/~sabre/ D: Everything not covered by someone else +N: David Majnemer +E: david.majnemer@gmail.com +D: IR Constant Folder, InstCombine + +N: Dylan McKay +E: dylanmckay34@gmail.com +D: AVR Backend + N: Tim Northover E: t.p.northover@gmail.com -D: AArch64 backend +D: AArch64 backend, misc ARM backend N: Diego Novillo E: dnovillo@google.com @@ -134,14 +134,18 @@ N: Richard Osborne E: richard@xmos.com D: XCore Backend +N: Krzysztof Parzyszek +E: kparzysz@codeaurora.org +D: Hexagon Backend + +N: Paul Robinson +E: paul_robinson@playstation.sony.com +D: Sony PlayStation®4 support + N: Chad Rosier E: mcrosier@codeaurora.org D: Fast-Isel -N: Alex Rosenberg -E: alexr@leftfield.org -D: Sony PlayStation®4 support - N: Nadav Rotem E: nrotem@apple.com D: X86 Backend, Loop Vectorizer diff --git a/CREDITS.TXT b/CREDITS.TXT index fd5119f01111..da1fb010e35b 100644 --- a/CREDITS.TXT +++ b/CREDITS.TXT @@ -465,54 +465,3 @@ N: Bob Wilson E: bob.wilson@acm.org D: Advanced SIMD (NEON) support in the ARM backend. -N: Alexey Bataev -E: a.bataev@hotmail.com -D: Clang OpenMP implementation - -N: Andrey Bokhanko -E: andreybokhanko@gmail.com -D: Clang OpenMP implementation - -N: Carlo Bertolli -E: cbertol@us.ibm.com -D: Clang OpenMP implementation - -N: Eric Stotzer -E: estotzer@ti.com -D: Clang OpenMP implementation - -N: Kelvin Li -E: kkwli0@gmail.com -D: Clang OpenMP implementation - -N: Samuel Antao -E: sfantao@us.ibm.com -D: Clang OpenMP implementation - -N: Sergey Ostanevich -E: sergos.gnu@gmail.com -D: Clang OpenMP implementation - -N: Alexandre Eichenberger -E: alexe@us.ibm.com -D: Clang OpenMP implementation - -N: Guansong Zhang -E: guansong.zhang@amd.com -D: Clang OpenMP implementation - -N: Sunita Chandrasekaran -E: sunisg123@gmail.com -D: Clang OpenMP implementation - -N: Michael Wong -E: fraggamuffin@gmail.com -D: Clang OpenMP implementation - -N: Alexander Mussman -E: alexander.musman@intel.com -D: Clang OpenMP implementation - -N: Kevin O'Brien -E: caomhin@us.ibm.com -D: Clang OpenMP implementation \ No newline at end of file diff --git a/Makefile.config.in b/Makefile.config.in index 9df9834f4dbe..5ca264320c51 100644 --- a/Makefile.config.in +++ b/Makefile.config.in @@ -396,8 +396,8 @@ endif BINUTILS_INCDIR := @BINUTILS_INCDIR@ # Optional flags supported by the compiler -# -Wno-missing-field-initializers -NO_MISSING_FIELD_INITIALIZERS = @NO_MISSING_FIELD_INITIALIZERS@ +# -Wmissing-field-initializers +MISSING_FIELD_INITIALIZERS = @MISSING_FIELD_INITIALIZERS@ # -Wno-variadic-macros NO_VARIADIC_MACROS = @NO_VARIADIC_MACROS@ # -Wcovered-switch-default diff --git a/Makefile.rules b/Makefile.rules index 24cac3b37659..a67aef7c97aa 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -486,6 +486,8 @@ endif ObjRootDir := $(PROJ_OBJ_DIR)/$(BuildMode) ObjDir := $(ObjRootDir) LibDir := $(PROJ_OBJ_ROOT)/$(BuildMode)/lib +LibexecDir := $(PROJ_OBJ_ROOT)/$(BuildMode)/libexec +ShareDir := $(PROJ_OBJ_ROOT)/$(BuildMode)/share ToolDir := $(PROJ_OBJ_ROOT)/$(BuildMode)/bin ExmplDir := $(PROJ_OBJ_ROOT)/$(BuildMode)/examples LLVMLibDir := $(LLVM_OBJ_ROOT)/$(BuildMode)/lib @@ -686,7 +688,7 @@ endif CompileCommonOpts += -Wall -W -Wno-unused-parameter -Wwrite-strings \ $(EXTRA_OPTIONS) $(COVERED_SWITCH_DEFAULT) \ $(NO_UNINITIALIZED) $(NO_MAYBE_UNINITIALIZED) \ - $(NO_MISSING_FIELD_INITIALIZERS) $(NO_COMMENT) + $(MISSING_FIELD_INITIALIZERS) $(NO_COMMENT) # Enable cast-qual for C++; the workaround is to use const_cast. CXX.Flags += -Wcast-qual @@ -857,6 +859,7 @@ $(DESTDIR)$(PROJ_bindir) $(DESTDIR)$(PROJ_libdir) $(DESTDIR)$(PROJ_includedir) $ .PRECIOUS: $(LibDir)/.dir $(ToolDir)/.dir $(ExmplDir)/.dir .PRECIOUS: $(LLVMLibDir)/.dir $(LLVMToolDir)/.dir $(LLVMExmplDir)/.dir +.PRECIOUS: $(LibexecDir)/.dir $(ShareDir)/.dir #--------------------------------------------------------- # Collect the object directories (as there may be more @@ -2144,6 +2147,8 @@ printvars:: $(Echo) "SrcMakefiles : " '$(SrcMakefiles)' $(Echo) "ObjDir : " '$(ObjDir)' $(Echo) "LibDir : " '$(LibDir)' + $(Echo) "LibexecDir : " '$(LibexecDir)' + $(Echo) "ShareDir : " '$(ShareDir)' $(Echo) "ToolDir : " '$(ToolDir)' $(Echo) "ExmplDir : " '$(ExmplDir)' $(Echo) "Sources : " '$(Sources)' diff --git a/README.txt b/README.txt index 6358a0684211..7cfb1640995f 100644 --- a/README.txt +++ b/README.txt @@ -13,5 +13,5 @@ assistance with LLVM, and in particular docs/GettingStarted.rst for getting started with LLVM and docs/README.txt for an overview of LLVM's documentation setup. -If you're writing a package for LLVM, see docs/Packaging.rst for our +If you are writing a package for LLVM, see docs/Packaging.rst for our suggestions. diff --git a/autoconf/configure.ac b/autoconf/configure.ac index af57712b57c5..02ab161e3b03 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -32,12 +32,12 @@ dnl===-----------------------------------------------------------------------=== dnl Initialize autoconf and define the package name, version number and dnl address for reporting bugs. -AC_INIT([LLVM],[3.7.1],[http://llvm.org/bugs/]) +AC_INIT([LLVM],[3.8.0svn],[http://llvm.org/bugs/]) LLVM_VERSION_MAJOR=3 -LLVM_VERSION_MINOR=7 -LLVM_VERSION_PATCH=1 -LLVM_VERSION_SUFFIX= +LLVM_VERSION_MINOR=8 +LLVM_VERSION_PATCH=0 +LLVM_VERSION_SUFFIX=svn AC_DEFINE_UNQUOTED([LLVM_VERSION_MAJOR], $LLVM_VERSION_MAJOR, [Major version of the LLVM API]) AC_DEFINE_UNQUOTED([LLVM_VERSION_MINOR], $LLVM_VERSION_MINOR, [Minor version of the LLVM API]) @@ -74,7 +74,7 @@ if test ${srcdir} != "." ; then fi dnl Quit if it is an in-source build -if test ${srcdir} == "." ; then +if test ${srcdir} = "." ; then AC_MSG_ERROR([In-source builds are not allowed. Please configure from a separate build directory!]) fi @@ -133,6 +133,7 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[#if ! __clang__ llvm_cv_cxx_compiler=gcc, [])]) AC_LANG_POP([C++]) AC_MSG_RESULT([${llvm_cv_cxx_compiler}]) +AC_SUBST(CXX_COMPILER,$llvm_cv_cxx_compiler) dnl Configure all of the projects present in our source tree. While we could dnl just AC_CONFIG_SUBDIRS on the set of directories in projects that have a @@ -430,6 +431,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch], arm64*-*) llvm_cv_target_arch="AArch64" ;; arm*-*) llvm_cv_target_arch="ARM" ;; aarch64*-*) llvm_cv_target_arch="AArch64" ;; + avr-*) llvm_cv_target_arch="AVR" ;; mips-* | mips64-*) llvm_cv_target_arch="Mips" ;; mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;; xcore-*) llvm_cv_target_arch="XCore" ;; @@ -467,6 +469,7 @@ case $host in arm64*-*) host_arch="AArch64" ;; arm*-*) host_arch="ARM" ;; aarch64*-*) host_arch="AArch64" ;; + avr-*) host_arch="AVR" ;; mips-* | mips64-*) host_arch="Mips" ;; mipsel-* | mips64el-*) host_arch="Mips" ;; xcore-*) host_arch="XCore" ;; @@ -800,6 +803,7 @@ else PowerPC) AC_SUBST(TARGET_HAS_JIT,1) ;; x86_64) AC_SUBST(TARGET_HAS_JIT,1) ;; ARM) AC_SUBST(TARGET_HAS_JIT,1) ;; + AVR) AC_SUBST(TARGET_HAS_JIT,0) ;; Mips) AC_SUBST(TARGET_HAS_JIT,1) ;; XCore) AC_SUBST(TARGET_HAS_JIT,0) ;; MSP430) AC_SUBST(TARGET_HAS_JIT,0) ;; @@ -1339,7 +1343,7 @@ AC_DEFINE_UNQUOTED(DEFAULT_SYSROOT,"$withval", AC_ARG_WITH(clang-default-openmp-runtime, AS_HELP_STRING([--with-clang-default-openmp-runtime], [The default OpenMP runtime for Clang.]),, - withval="libgomp") + withval="libomp") AC_DEFINE_UNQUOTED(CLANG_DEFAULT_OPENMP_RUNTIME,"$withval", [Default OpenMP runtime used by -fopenmp.]) @@ -1548,25 +1552,31 @@ AC_MSG_RESULT([ok]) dnl Check optional compiler flags. AC_MSG_CHECKING([optional compiler flags]) -CXX_FLAG_CHECK(NO_VARIADIC_MACROS, [-Wno-variadic-macros]) -CXX_FLAG_CHECK(NO_MISSING_FIELD_INITIALIZERS, [-Wno-missing-field-initializers]) -CXX_FLAG_CHECK(COVERED_SWITCH_DEFAULT, [-Wcovered-switch-default]) - -dnl GCC's potential uninitialized use analysis is weak and presents lots of -dnl false positives, so disable it. -NO_UNINITIALIZED= -NO_MAYBE_UNINITIALIZED= -if test "$GXX" = "yes" -then - CXX_FLAG_CHECK(NO_MAYBE_UNINITIALIZED, [-Wno-maybe-uninitialized]) - dnl gcc 4.7 introduced -Wmaybe-uninitialized to distinguish cases which are - dnl known to be uninitialized from cases which might be uninitialized. We - dnl still want to catch the first kind of errors. - if test -z "$NO_MAYBE_UNINITIALIZED" - then - CXX_FLAG_CHECK(NO_UNINITIALIZED, [-Wno-uninitialized]) - fi -fi +case "$llvm_cv_cxx_compiler" in + clang) + CXX_FLAG_CHECK(NO_VARIADIC_MACROS, [-Wno-variadic-macros]) + CXX_FLAG_CHECK(MISSING_FIELD_INITIALIZERS, [-Wmissing-field-initializers]) + CXX_FLAG_CHECK(COVERED_SWITCH_DEFAULT, [-Wcovered-switch-default]) + ;; + gcc) + dnl If we're using gcc check for -Wno-missing-field-initializers as gcc will warn + dnl on plain open brace initializations. clang won't so use -Wmissing-field-initializers + dnl there. + CXX_FLAG_CHECK(MISSING_FIELD_INITIALIZERS, [-Wno-missing-field-initializers]) + CXX_FLAG_CHECK(NO_VARIADIC_MACROS, [-Wno-variadic-macros]) + CXX_FLAG_CHECK(COVERED_SWITCH_DEFAULT, [-Wcovered-switch-default]) + CXX_FLAG_CHECK(NO_MAYBE_UNINITIALIZED, [-Wno-maybe-uninitialized]) + dnl gcc 4.7 introduced -Wmaybe-uninitialized to distinguish cases which are + dnl known to be uninitialized from cases which might be uninitialized. We + dnl still want to catch the first kind of errors. + if test -z "$NO_MAYBE_UNINITIALIZED" + then + CXX_FLAG_CHECK(NO_UNINITIALIZED, [-Wno-uninitialized]) + fi + ;; + unknown) + ;; +esac dnl Check for misbehaving -Wcomment (gcc-4.7 has this) and maybe add dnl -Wno-comment to the flags. @@ -1587,7 +1597,7 @@ int main() { return 0; } AC_SUBST(NO_COMMENT, [$no_comment]) CXXFLAGS="$llvm_cv_old_cxxflags" -AC_MSG_RESULT([$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED $NO_COMMENT]) +AC_MSG_RESULT([$NO_VARIADIC_MACROS $MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED $NO_COMMENT]) AC_ARG_WITH([python], [AS_HELP_STRING([--with-python], [path to python])], @@ -1628,9 +1638,12 @@ dnl===-----------------------------------------------------------------------=== AC_CHECK_LIB(m,sin) if test "$llvm_cv_os_type" = "MingW" ; then + dnl mingw-gcc's driver doesn't imply -lole32 by default so we may need this + dnl when being built with gcc for bootstrapping purposes. AC_CHECK_LIB(ole32, main) AC_CHECK_LIB(psapi, main) AC_CHECK_LIB(shell32, main) + AC_CHECK_LIB(uuid,main) fi dnl dlopen() is required for plugin support. @@ -1786,7 +1799,6 @@ dnl Generally we're looking for POSIX headers. AC_HEADER_DIRENT AC_HEADER_MMAP_ANONYMOUS AC_HEADER_STAT -AC_HEADER_SYS_WAIT AC_HEADER_TIME AC_LANG_PUSH([C++]) @@ -1798,7 +1810,6 @@ AC_LANG_POP([C++]) AC_CHECK_HEADERS([dlfcn.h execinfo.h fcntl.h inttypes.h link.h]) AC_CHECK_HEADERS([malloc.h setjmp.h signal.h stdint.h termios.h unistd.h]) -AC_CHECK_HEADERS([utime.h]) AC_CHECK_HEADERS([sys/mman.h sys/param.h sys/resource.h sys/time.h sys/uio.h]) AC_CHECK_HEADERS([sys/ioctl.h malloc/malloc.h mach/mach.h]) AC_CHECK_HEADERS([valgrind/valgrind.h]) @@ -1876,10 +1887,9 @@ AC_CHECK_FUNCS([isatty mkdtemp mkstemp ]) AC_CHECK_FUNCS([mktemp posix_spawn pread realpath sbrk setrlimit ]) AC_CHECK_FUNCS([strerror strerror_r setenv ]) AC_CHECK_FUNCS([strtoll strtoq sysconf malloc_zone_statistics ]) -AC_CHECK_FUNCS([setjmp longjmp sigsetjmp siglongjmp writev]) +AC_CHECK_FUNCS([setjmp longjmp writev]) AC_CHECK_FUNCS([futimes futimens]) AC_C_PRINTF_A -AC_FUNC_RAND48 dnl Check for arc4random accessible via AC_INCLUDES_DEFAULT. AC_CHECK_DECLS([arc4random]) @@ -2232,3 +2242,14 @@ AC_CONFIG_MAKEFILE(bindings/ocaml/Makefile.ocaml) dnl Finally, crank out the output AC_OUTPUT +echo "" +echo "" +echo "################################################################################" +echo "################################################################################" +echo "The LLVM project has deprecated building with configure & make." +echo "The autoconf-based makefile build system will be removed in the 3.9 release." +echo "" +echo "Please migrate to the CMake-based build system." +echo "For more information see: http://llvm.org/docs/CMake.html" +echo "################################################################################" +echo "################################################################################" diff --git a/autoconf/m4/rand48.m4 b/autoconf/m4/rand48.m4 deleted file mode 100644 index 76f08faad284..000000000000 --- a/autoconf/m4/rand48.m4 +++ /dev/null @@ -1,12 +0,0 @@ -# -# This function determins if the srand48,drand48,lrand48 functions are -# available on this platform. -# -AC_DEFUN([AC_FUNC_RAND48],[ -AC_SINGLE_CXX_CHECK([ac_cv_func_rand48], - [srand48/lrand48/drand48], [], - [srand48(0);lrand48();drand48();]) -if test "$ac_cv_func_rand48" = "yes" ; then -AC_DEFINE([HAVE_RAND48],1,[Define to 1 if srand48/lrand48/drand48 exist in ]) -fi -]) diff --git a/bindings/go/llvm/DIBuilderBindings.cpp b/bindings/go/llvm/DIBuilderBindings.cpp index df5885de25c4..e767144bb322 100644 --- a/bindings/go/llvm/DIBuilderBindings.cpp +++ b/bindings/go/llvm/DIBuilderBindings.cpp @@ -74,23 +74,34 @@ LLVMMetadataRef LLVMDIBuilderCreateFunction( LLVMDIBuilderRef Dref, LLVMMetadataRef Scope, const char *Name, const char *LinkageName, LLVMMetadataRef File, unsigned Line, LLVMMetadataRef CompositeType, int IsLocalToUnit, int IsDefinition, - unsigned ScopeLine, unsigned Flags, int IsOptimized, LLVMValueRef Func) { + unsigned ScopeLine, unsigned Flags, int IsOptimized) { DIBuilder *D = unwrap(Dref); return wrap(D->createFunction(unwrap(Scope), Name, LinkageName, File ? unwrap(File) : nullptr, Line, unwrap(CompositeType), IsLocalToUnit, IsDefinition, ScopeLine, Flags, - IsOptimized, unwrap(Func))); + IsOptimized)); } -LLVMMetadataRef LLVMDIBuilderCreateLocalVariable( - LLVMDIBuilderRef Dref, unsigned Tag, LLVMMetadataRef Scope, - const char *Name, LLVMMetadataRef File, unsigned Line, LLVMMetadataRef Ty, - int AlwaysPreserve, unsigned Flags, unsigned ArgNo) { +LLVMMetadataRef +LLVMDIBuilderCreateAutoVariable(LLVMDIBuilderRef Dref, LLVMMetadataRef Scope, + const char *Name, LLVMMetadataRef File, + unsigned Line, LLVMMetadataRef Ty, + int AlwaysPreserve, unsigned Flags) { DIBuilder *D = unwrap(Dref); - return wrap(D->createLocalVariable( - Tag, unwrap(Scope), Name, unwrap(File), Line, - unwrap(Ty), AlwaysPreserve, Flags, ArgNo)); + return wrap(D->createAutoVariable(unwrap(Scope), Name, + unwrap(File), Line, + unwrap(Ty), AlwaysPreserve, Flags)); +} + +LLVMMetadataRef LLVMDIBuilderCreateParameterVariable( + LLVMDIBuilderRef Dref, LLVMMetadataRef Scope, const char *Name, + unsigned ArgNo, LLVMMetadataRef File, unsigned Line, LLVMMetadataRef Ty, + int AlwaysPreserve, unsigned Flags) { + DIBuilder *D = unwrap(Dref); + return wrap(D->createParameterVariable( + unwrap(Scope), Name, ArgNo, unwrap(File), Line, + unwrap(Ty), AlwaysPreserve, Flags)); } LLVMMetadataRef LLVMDIBuilderCreateBasicType(LLVMDIBuilderRef Dref, @@ -117,8 +128,7 @@ LLVMDIBuilderCreateSubroutineType(LLVMDIBuilderRef Dref, LLVMMetadataRef File, LLVMMetadataRef ParameterTypes) { DIBuilder *D = unwrap(Dref); return wrap( - D->createSubroutineType(File ? unwrap(File) : nullptr, - DITypeRefArray(unwrap(ParameterTypes)))); + D->createSubroutineType(DITypeRefArray(unwrap(ParameterTypes)))); } LLVMMetadataRef LLVMDIBuilderCreateStructType( diff --git a/bindings/go/llvm/DIBuilderBindings.h b/bindings/go/llvm/DIBuilderBindings.h index a4fba2784185..f14fd0f7b5f6 100644 --- a/bindings/go/llvm/DIBuilderBindings.h +++ b/bindings/go/llvm/DIBuilderBindings.h @@ -55,12 +55,18 @@ LLVMMetadataRef LLVMDIBuilderCreateFunction( LLVMDIBuilderRef D, LLVMMetadataRef Scope, const char *Name, const char *LinkageName, LLVMMetadataRef File, unsigned Line, LLVMMetadataRef CompositeType, int IsLocalToUnit, int IsDefinition, - unsigned ScopeLine, unsigned Flags, int IsOptimized, LLVMValueRef Function); + unsigned ScopeLine, unsigned Flags, int IsOptimized); -LLVMMetadataRef LLVMDIBuilderCreateLocalVariable( - LLVMDIBuilderRef D, unsigned Tag, LLVMMetadataRef Scope, const char *Name, +LLVMMetadataRef +LLVMDIBuilderCreateAutoVariable(LLVMDIBuilderRef D, LLVMMetadataRef Scope, + const char *Name, LLVMMetadataRef File, + unsigned Line, LLVMMetadataRef Ty, + int AlwaysPreserve, unsigned Flags); + +LLVMMetadataRef LLVMDIBuilderCreateParameterVariable( + LLVMDIBuilderRef D, LLVMMetadataRef Scope, const char *Name, unsigned ArgNo, LLVMMetadataRef File, unsigned Line, LLVMMetadataRef Ty, int AlwaysPreserve, - unsigned Flags, unsigned ArgNo); + unsigned Flags); LLVMMetadataRef LLVMDIBuilderCreateBasicType(LLVMDIBuilderRef D, const char *Name, diff --git a/bindings/go/llvm/IRBindings.cpp b/bindings/go/llvm/IRBindings.cpp index fd0cb8006a4f..4308f84cc1be 100644 --- a/bindings/go/llvm/IRBindings.cpp +++ b/bindings/go/llvm/IRBindings.cpp @@ -98,3 +98,7 @@ void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Bref, unsigned Line, DebugLoc::get(Line, Col, Scope ? unwrap(Scope) : nullptr, InlinedAt ? unwrap(InlinedAt) : nullptr)); } + +void LLVMSetSubprogram(LLVMValueRef Func, LLVMMetadataRef SP) { + unwrap(Func)->setSubprogram(unwrap(SP)); +} diff --git a/bindings/go/llvm/IRBindings.h b/bindings/go/llvm/IRBindings.h index a53e178f1e0c..dcdb26eda795 100644 --- a/bindings/go/llvm/IRBindings.h +++ b/bindings/go/llvm/IRBindings.h @@ -55,6 +55,8 @@ void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Bref, unsigned Line, unsigned Col, LLVMMetadataRef Scope, LLVMMetadataRef InlinedAt); +void LLVMSetSubprogram(LLVMValueRef Fn, LLVMMetadataRef SP); + #ifdef __cplusplus } diff --git a/bindings/go/llvm/analysis.go b/bindings/go/llvm/analysis.go index 7b0d8e3e8b8a..3ae4b71def7c 100644 --- a/bindings/go/llvm/analysis.go +++ b/bindings/go/llvm/analysis.go @@ -15,6 +15,7 @@ package llvm /* #include "llvm-c/Analysis.h" // If you are getting an error here read bindings/go/README.txt +#include "llvm-c/Core.h" #include */ import "C" diff --git a/bindings/go/llvm/bitreader.go b/bindings/go/llvm/bitreader.go index 98112a99dd3b..c3bf07a19044 100644 --- a/bindings/go/llvm/bitreader.go +++ b/bindings/go/llvm/bitreader.go @@ -15,6 +15,7 @@ package llvm /* #include "llvm-c/BitReader.h" +#include "llvm-c/Core.h" #include */ import "C" @@ -40,7 +41,7 @@ func ParseBitcodeFile(name string) (Module, error) { defer C.LLVMDisposeMemoryBuffer(buf) var m Module - if C.LLVMParseBitcode(buf, &m.C, &errmsg) == 0 { + if C.LLVMParseBitcode2(buf, &m.C) == 0 { return m, nil } diff --git a/bindings/go/llvm/dibuilder.go b/bindings/go/llvm/dibuilder.go index f03f740b7770..778c31785836 100644 --- a/bindings/go/llvm/dibuilder.go +++ b/bindings/go/llvm/dibuilder.go @@ -189,7 +189,6 @@ type DIFunction struct { ScopeLine int Flags int Optimized bool - Function Value } // CreateCompileUnit creates function debug metadata. @@ -211,14 +210,39 @@ func (d *DIBuilder) CreateFunction(diScope Metadata, f DIFunction) Metadata { C.unsigned(f.ScopeLine), C.unsigned(f.Flags), boolToCInt(f.Optimized), - f.Function.C, ) return Metadata{C: result} } -// DILocalVariable holds the values for creating local variable debug metadata. -type DILocalVariable struct { - Tag dwarf.Tag +// DIAutoVariable holds the values for creating auto variable debug metadata. +type DIAutoVariable struct { + Name string + File Metadata + Line int + Type Metadata + AlwaysPreserve bool + Flags int +} + +// CreateAutoVariable creates local variable debug metadata. +func (d *DIBuilder) CreateAutoVariable(scope Metadata, v DIAutoVariable) Metadata { + name := C.CString(v.Name) + defer C.free(unsafe.Pointer(name)) + result := C.LLVMDIBuilderCreateAutoVariable( + d.ref, + scope.C, + name, + v.File.C, + C.unsigned(v.Line), + v.Type.C, + boolToCInt(v.AlwaysPreserve), + C.unsigned(v.Flags), + ) + return Metadata{C: result} +} + +// DIParameterVariable holds the values for creating parameter variable debug metadata. +type DIParameterVariable struct { Name string File Metadata Line int @@ -227,25 +251,24 @@ type DILocalVariable struct { Flags int // ArgNo is the 1-based index of the argument in the function's - // parameter list if it is an argument, or 0 otherwise. + // parameter list. ArgNo int } -// CreateLocalVariable creates local variable debug metadata. -func (d *DIBuilder) CreateLocalVariable(scope Metadata, v DILocalVariable) Metadata { +// CreateParameterVariable creates parameter variable debug metadata. +func (d *DIBuilder) CreateParameterVariable(scope Metadata, v DIParameterVariable) Metadata { name := C.CString(v.Name) defer C.free(unsafe.Pointer(name)) - result := C.LLVMDIBuilderCreateLocalVariable( + result := C.LLVMDIBuilderCreateParameterVariable( d.ref, - C.unsigned(v.Tag), scope.C, name, + C.unsigned(v.ArgNo), v.File.C, C.unsigned(v.Line), v.Type.C, boolToCInt(v.AlwaysPreserve), C.unsigned(v.Flags), - C.unsigned(v.ArgNo), ) return Metadata{C: result} } diff --git a/bindings/go/llvm/executionengine.go b/bindings/go/llvm/executionengine.go index 94d4e83b4cf3..91f8366ca75d 100644 --- a/bindings/go/llvm/executionengine.go +++ b/bindings/go/llvm/executionengine.go @@ -14,6 +14,7 @@ package llvm /* +#include "llvm-c/Core.h" #include "llvm-c/ExecutionEngine.h" #include */ diff --git a/bindings/go/llvm/ir.go b/bindings/go/llvm/ir.go index 76f5f06017c8..b8ea28d3a0db 100644 --- a/bindings/go/llvm/ir.go +++ b/bindings/go/llvm/ir.go @@ -1054,6 +1054,12 @@ func (v Value) AddTargetDependentFunctionAttr(attr, value string) { defer C.free(unsafe.Pointer(cvalue)) C.LLVMAddTargetDependentFunctionAttr(v.C, cattr, cvalue) } +func (v Value) SetPersonality(p Value) { + C.LLVMSetPersonalityFn(v.C, p.C) +} +func (v Value) SetSubprogram(sp Metadata) { + C.LLVMSetSubprogram(v.C, sp.C) +} // Operations on parameters func (v Value) ParamsCount() int { return int(C.LLVMCountParams(v.C)) } @@ -1206,7 +1212,7 @@ func (b Builder) Dispose() { C.LLVMDisposeBuilder(b.C) } func (b Builder) SetCurrentDebugLocation(line, col uint, scope, inlinedAt Metadata) { C.LLVMSetCurrentDebugLocation2(b.C, C.unsigned(line), C.unsigned(col), scope.C, inlinedAt.C) } -func (b Builder) SetInstDebugLocation(v Value) { C.LLVMSetInstDebugLocation(b.C, v.C) } +func (b Builder) SetInstDebugLocation(v Value) { C.LLVMSetInstDebugLocation(b.C, v.C) } func (b Builder) InsertDeclare(module Module, storage Value, md Value) Value { f := module.NamedFunction("llvm.dbg.declare") if f.IsNil() { @@ -1725,7 +1731,7 @@ func (b Builder) CreatePtrDiff(lhs, rhs Value, name string) (v Value) { return } -func (b Builder) CreateLandingPad(t Type, personality Value, nclauses int, name string) (l Value) { +func (b Builder) CreateLandingPad(t Type, nclauses int, name string) (l Value) { cname := C.CString(name) defer C.free(unsafe.Pointer(cname)) l.C = C.LLVMBuildLandingPad(b.C, t.C, nil, C.unsigned(nclauses), cname) diff --git a/bindings/go/llvm/linker.go b/bindings/go/llvm/linker.go index f64f66c858e8..ca16f7637b2b 100644 --- a/bindings/go/llvm/linker.go +++ b/bindings/go/llvm/linker.go @@ -14,6 +14,7 @@ package llvm /* +#include "llvm-c/Core.h" #include "llvm-c/Linker.h" #include */ @@ -21,11 +22,9 @@ import "C" import "errors" func LinkModules(Dest, Src Module) error { - var cmsg *C.char - failed := C.LLVMLinkModules(Dest.C, Src.C, C.LLVMLinkerDestroySource, &cmsg) + failed := C.LLVMLinkModules2(Dest.C, Src.C) if failed != 0 { - err := errors.New(C.GoString(cmsg)) - C.LLVMDisposeMessage(cmsg) + err := errors.New("Linking failed") return err } return nil diff --git a/bindings/go/llvm/target.go b/bindings/go/llvm/target.go index bd1d0f3a440d..6b1895b9ac60 100644 --- a/bindings/go/llvm/target.go +++ b/bindings/go/llvm/target.go @@ -14,6 +14,7 @@ package llvm /* +#include "llvm-c/Core.h" #include "llvm-c/Target.h" #include "llvm-c/TargetMachine.h" #include diff --git a/bindings/ocaml/Makefile.ocaml b/bindings/ocaml/Makefile.ocaml index 1f65a7b8f905..22b96a298eff 100644 --- a/bindings/ocaml/Makefile.ocaml +++ b/bindings/ocaml/Makefile.ocaml @@ -277,6 +277,8 @@ uninstall-local:: uninstall-deplibs build-deplibs: $(OutputLibs) +$(OcamlDir)/%.so: $(LibDir)/%.so + $(Verb) ln -sf $< $@ $(OcamlDir)/%.a: $(LibDir)/%.a $(Verb) ln -sf $< $@ diff --git a/bindings/ocaml/bitreader/bitreader_ocaml.c b/bindings/ocaml/bitreader/bitreader_ocaml.c index 15ebd5f635fd..f91b092d9176 100644 --- a/bindings/ocaml/bitreader/bitreader_ocaml.c +++ b/bindings/ocaml/bitreader/bitreader_ocaml.c @@ -23,10 +23,9 @@ void llvm_raise(value Prototype, char *Message); /* Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule */ CAMLprim LLVMModuleRef llvm_get_module(LLVMContextRef C, LLVMMemoryBufferRef MemBuf) { LLVMModuleRef M; - char *Message; - if (LLVMGetBitcodeModuleInContext(C, MemBuf, &M, &Message)) - llvm_raise(*caml_named_value("Llvm_bitreader.Error"), Message); + if (LLVMGetBitcodeModuleInContext2(C, MemBuf, &M)) + llvm_raise(*caml_named_value("Llvm_bitreader.Error"), ""); return M; } @@ -34,10 +33,9 @@ CAMLprim LLVMModuleRef llvm_get_module(LLVMContextRef C, LLVMMemoryBufferRef Mem /* Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule */ CAMLprim LLVMModuleRef llvm_parse_bitcode(LLVMContextRef C, LLVMMemoryBufferRef MemBuf) { LLVMModuleRef M; - char *Message; - if (LLVMParseBitcodeInContext(C, MemBuf, &M, &Message)) - llvm_raise(*caml_named_value("Llvm_bitreader.Error"), Message); + if (LLVMParseBitcodeInContext2(C, MemBuf, &M)) + llvm_raise(*caml_named_value("Llvm_bitreader.Error"), ""); return M; } diff --git a/bindings/ocaml/linker/linker_ocaml.c b/bindings/ocaml/linker/linker_ocaml.c index 3b8512aa5953..498a5f0c8453 100644 --- a/bindings/ocaml/linker/linker_ocaml.c +++ b/bindings/ocaml/linker/linker_ocaml.c @@ -25,10 +25,8 @@ void llvm_raise(value Prototype, char *Message); /* llmodule -> llmodule -> unit */ CAMLprim value llvm_link_modules(LLVMModuleRef Dst, LLVMModuleRef Src) { - char* Message; - - if (LLVMLinkModules(Dst, Src, 0, &Message)) - llvm_raise(*caml_named_value("Llvm_linker.Error"), Message); + if (LLVMLinkModules2(Dst, Src)) + llvm_raise(*caml_named_value("Llvm_linker.Error"), "Linking failed"); return Val_unit; } diff --git a/bindings/ocaml/linker/llvm_linker.ml b/bindings/ocaml/linker/llvm_linker.ml index 3044abd8b6cf..f2b64eeee918 100644 --- a/bindings/ocaml/linker/llvm_linker.ml +++ b/bindings/ocaml/linker/llvm_linker.ml @@ -11,5 +11,5 @@ exception Error of string let () = Callback.register_exception "Llvm_linker.Error" (Error "") -external link_modules : Llvm.llmodule -> Llvm.llmodule -> unit - = "llvm_link_modules" +external link_modules' : Llvm.llmodule -> Llvm.llmodule -> unit + = "llvm_link_modules" diff --git a/bindings/ocaml/linker/llvm_linker.mli b/bindings/ocaml/linker/llvm_linker.mli index 06c3b92a577e..5f558ffb1162 100644 --- a/bindings/ocaml/linker/llvm_linker.mli +++ b/bindings/ocaml/linker/llvm_linker.mli @@ -14,6 +14,6 @@ exception Error of string -(** [link_modules dst src mode] links [src] into [dst], raising [Error] - if the linking fails. *) -val link_modules : Llvm.llmodule -> Llvm.llmodule -> unit \ No newline at end of file +(** [link_modules' dst src] links [src] into [dst], raising [Error] + if the linking fails. The src module is destroyed. *) +val link_modules' : Llvm.llmodule -> Llvm.llmodule -> unit \ No newline at end of file diff --git a/bindings/ocaml/llvm/llvm.ml b/bindings/ocaml/llvm/llvm.ml index 9a3cb1f0de09..259d57bc0680 100644 --- a/bindings/ocaml/llvm/llvm.ml +++ b/bindings/ocaml/llvm/llvm.ml @@ -579,6 +579,8 @@ external global_parent : llvalue -> llmodule = "LLVMGetGlobalParent" external is_declaration : llvalue -> bool = "llvm_is_declaration" external linkage : llvalue -> Linkage.t = "llvm_linkage" external set_linkage : Linkage.t -> llvalue -> unit = "llvm_set_linkage" +external unnamed_addr : llvalue -> bool = "llvm_unnamed_addr" +external set_unnamed_addr : bool -> llvalue -> unit = "llvm_set_unnamed_addr" external section : llvalue -> string = "llvm_section" external set_section : string -> llvalue -> unit = "llvm_set_section" external visibility : llvalue -> Visibility.t = "llvm_visibility" diff --git a/bindings/ocaml/llvm/llvm.mli b/bindings/ocaml/llvm/llvm.mli index dcda02764f54..541c35a2a229 100644 --- a/bindings/ocaml/llvm/llvm.mli +++ b/bindings/ocaml/llvm/llvm.mli @@ -1255,6 +1255,16 @@ val linkage : llvalue -> Linkage.t See the method [llvm::GlobalValue::setLinkage]. *) val set_linkage : Linkage.t -> llvalue -> unit +(** [unnamed_addr g] returns [true] if the global value [g] has the unnamed_addr + attribute. Returns [false] otherwise. + See the method [llvm::GlobalValue::getUnnamedAddr]. *) +val unnamed_addr : llvalue -> bool + +(** [set_unnamed_addr b g] if [b] is [true], sets the unnamed_addr attribute of + the global value [g]. Unset it otherwise. + See the method [llvm::GlobalValue::setUnnamedAddr]. *) +val set_unnamed_addr : bool -> llvalue -> unit + (** [section g] returns the linker section of the global value [g]. See the method [llvm::GlobalValue::getSection]. *) val section : llvalue -> string diff --git a/bindings/ocaml/llvm/llvm_ocaml.c b/bindings/ocaml/llvm/llvm_ocaml.c index 3889f9276ccd..b4c47e7475e6 100644 --- a/bindings/ocaml/llvm/llvm_ocaml.c +++ b/bindings/ocaml/llvm/llvm_ocaml.c @@ -940,6 +940,17 @@ CAMLprim value llvm_set_linkage(value Linkage, LLVMValueRef Global) { return Val_unit; } +/* llvalue -> bool */ +CAMLprim value llvm_unnamed_addr(LLVMValueRef Global) { + return Val_bool(LLVMHasUnnamedAddr(Global)); +} + +/* bool -> llvalue -> unit */ +CAMLprim value llvm_set_unnamed_addr(value UseUnnamedAddr, LLVMValueRef Global) { + LLVMSetUnnamedAddr(Global, Bool_val(UseUnnamedAddr)); + return Val_unit; +} + /* llvalue -> string */ CAMLprim value llvm_section(LLVMValueRef Global) { return caml_copy_string(LLVMGetSection(Global)); diff --git a/bindings/python/llvm/bit_reader.py b/bindings/python/llvm/bit_reader.py index 5bf5e22025a3..33b8211076b8 100644 --- a/bindings/python/llvm/bit_reader.py +++ b/bindings/python/llvm/bit_reader.py @@ -16,16 +16,15 @@ lib = get_library() def parse_bitcode(mem_buffer): """Input is .core.MemoryBuffer""" module = c_object_p() - out = c_char_p(None) - result = lib.LLVMParseBitcode(mem_buffer, byref(module), byref(out)) + result = lib.LLVMParseBitcode2(mem_buffer, byref(module)) if result: - raise RuntimeError('LLVM Error: %s' % out.value) + raise RuntimeError('LLVM Error') m = Module(module) m.take_ownership(mem_buffer) return m def register_library(library): - library.LLVMParseBitcode.argtypes = [MemoryBuffer, POINTER(c_object_p), POINTER(c_char_p)] - library.LLVMParseBitcode.restype = bool + library.LLVMParseBitcode2.argtypes = [MemoryBuffer, POINTER(c_object_p)] + library.LLVMParseBitcode2.restype = bool register_library(lib) diff --git a/bindings/python/llvm/core.py b/bindings/python/llvm/core.py index c95952db6fc5..47e81dd1a4f9 100644 --- a/bindings/python/llvm/core.py +++ b/bindings/python/llvm/core.py @@ -465,9 +465,6 @@ def register_library(library): library.LLVMInitializeAnalysis.argtypes = [PassRegistry] library.LLVMInitializeAnalysis.restype = None - library.LLVMInitializeIPA.argtypes = [PassRegistry] - library.LLVMInitializeIPA.restype = None - library.LLVMInitializeCodeGen.argtypes = [PassRegistry] library.LLVMInitializeCodeGen.restype = None @@ -621,7 +618,6 @@ def initialize_llvm(): lib.LLVMInitializeIPO(p) lib.LLVMInitializeInstrumentation(p) lib.LLVMInitializeAnalysis(p) - lib.LLVMInitializeIPA(p) lib.LLVMInitializeCodeGen(p) lib.LLVMInitializeTarget(p) diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index 3203d1ea708b..f699211bd5b4 100755 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -67,7 +67,6 @@ check_include_file(sys/resource.h HAVE_SYS_RESOURCE_H) check_include_file(sys/stat.h HAVE_SYS_STAT_H) check_include_file(sys/time.h HAVE_SYS_TIME_H) check_include_file(sys/uio.h HAVE_SYS_UIO_H) -check_include_file(sys/wait.h HAVE_SYS_WAIT_H) check_include_file(termios.h HAVE_TERMIOS_H) check_include_file(unistd.h HAVE_UNISTD_H) check_include_file(utime.h HAVE_UTIME_H) @@ -106,6 +105,12 @@ if( NOT PURE_WINDOWS ) endif() check_library_exists(dl dlopen "" HAVE_LIBDL) check_library_exists(rt clock_gettime "" HAVE_LIBRT) +endif() + +# Don't look for these libraries on Windows. Also don't look for them if we're +# using MSan, since uninstrmented third party code may call MSan interceptors +# like strlen, leading to false positives. +if( NOT PURE_WINDOWS AND NOT LLVM_USE_SANITIZER MATCHES "Memory.*") if (LLVM_ENABLE_ZLIB) check_library_exists(z compress2 "" HAVE_LIBZ) else() @@ -294,6 +299,10 @@ if( LLVM_ENABLE_PIC ) set(ENABLE_PIC 1) else() set(ENABLE_PIC 0) + check_cxx_compiler_flag("-fno-pie" SUPPORTS_NO_PIE_FLAG) + if(SUPPORTS_NO_PIE_FLAG) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fno-pie") + endif() endif() check_cxx_compiler_flag("-Wno-variadic-macros" SUPPORTS_NO_VARIADIC_MACROS_FLAG) @@ -491,7 +500,7 @@ if (LLVM_ENABLE_DOXYGEN) option(LLVM_DOXYGEN_EXTERNAL_SEARCH "Enable doxygen external search." OFF) if (LLVM_DOXYGEN_EXTERNAL_SEARCH) - set(LLVM_DOXYGEN_SEARCHENGINE_URL "" CACHE STRING "URL to use for external searhc.") + set(LLVM_DOXYGEN_SEARCHENGINE_URL "" CACHE STRING "URL to use for external search.") set(LLVM_DOXYGEN_SEARCH_MAPPINGS "" CACHE STRING "Doxygen Search Mappings") endif() endif() diff --git a/cmake/dummy.cpp b/cmake/dummy.cpp new file mode 100644 index 000000000000..c4e7e9536871 --- /dev/null +++ b/cmake/dummy.cpp @@ -0,0 +1 @@ +typedef int dummy; diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index 45f6746948d2..bed81b28426e 100755 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -1,5 +1,6 @@ include(LLVMProcessSources) include(LLVM-Config) +include(DetermineGCCCompatible) function(llvm_update_compile_flags name) get_property(sources TARGET ${name} PROPERTY SOURCES) @@ -21,15 +22,13 @@ function(llvm_update_compile_flags name) list(APPEND LLVM_COMPILE_DEFINITIONS _HAS_EXCEPTIONS=0) list(APPEND LLVM_COMPILE_FLAGS "/EHs-c-") endif() - if (CLANG_CL) - # FIXME: Remove this once clang-cl supports SEH - list(APPEND LLVM_COMPILE_DEFINITIONS "GTEST_HAS_SEH=0") - endif() endif() # LLVM_REQUIRES_RTTI is an internal flag that individual # targets can use to force RTTI + set(LLVM_CONFIG_HAS_RTTI YES CACHE INTERNAL "") if(NOT (LLVM_REQUIRES_RTTI OR LLVM_ENABLE_RTTI)) + set(LLVM_CONFIG_HAS_RTTI NO CACHE INTERNAL "") list(APPEND LLVM_COMPILE_DEFINITIONS GTEST_HAS_RTTI=0) if (LLVM_COMPILER_IS_GCC_COMPATIBLE) list(APPEND LLVM_COMPILE_FLAGS "-fno-rtti") @@ -41,7 +40,7 @@ function(llvm_update_compile_flags name) # Assume that; # - LLVM_COMPILE_FLAGS is list. # - PROPERTY COMPILE_FLAGS is string. - string(REPLACE ";" " " target_compile_flags "${LLVM_COMPILE_FLAGS}") + string(REPLACE ";" " " target_compile_flags " ${LLVM_COMPILE_FLAGS}") if(update_src_props) foreach(fn ${sources}) @@ -193,34 +192,43 @@ endfunction(add_link_opts) # Set each output directory according to ${CMAKE_CONFIGURATION_TYPES}. # Note: Don't set variables CMAKE_*_OUTPUT_DIRECTORY any more, # or a certain builder, for eaxample, msbuild.exe, would be confused. -function(set_output_directory target bindir libdir) - # Do nothing if *_OUTPUT_INTDIR is empty. - if("${bindir}" STREQUAL "") - return() - endif() +function(set_output_directory target) + cmake_parse_arguments(ARG "" "BINARY_DIR;LIBRARY_DIR" "" ${ARGN}) - # moddir -- corresponding to LIBRARY_OUTPUT_DIRECTORY. + # module_dir -- corresponding to LIBRARY_OUTPUT_DIRECTORY. # It affects output of add_library(MODULE). if(WIN32 OR CYGWIN) # DLL platform - set(moddir ${bindir}) + set(module_dir ${ARG_BINARY_DIR}) else() - set(moddir ${libdir}) + set(module_dir ${ARG_LIBRARY_DIR}) endif() if(NOT "${CMAKE_CFG_INTDIR}" STREQUAL ".") foreach(build_mode ${CMAKE_CONFIGURATION_TYPES}) string(TOUPPER "${build_mode}" CONFIG_SUFFIX) - string(REPLACE ${CMAKE_CFG_INTDIR} ${build_mode} bi ${bindir}) - string(REPLACE ${CMAKE_CFG_INTDIR} ${build_mode} li ${libdir}) - string(REPLACE ${CMAKE_CFG_INTDIR} ${build_mode} mi ${moddir}) - set_target_properties(${target} PROPERTIES "RUNTIME_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${bi}) - set_target_properties(${target} PROPERTIES "ARCHIVE_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${li}) - set_target_properties(${target} PROPERTIES "LIBRARY_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${mi}) + if(ARG_BINARY_DIR) + string(REPLACE ${CMAKE_CFG_INTDIR} ${build_mode} bi ${ARG_BINARY_DIR}) + set_target_properties(${target} PROPERTIES "RUNTIME_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${bi}) + endif() + if(ARG_LIBRARY_DIR) + string(REPLACE ${CMAKE_CFG_INTDIR} ${build_mode} li ${ARG_LIBRARY_DIR}) + set_target_properties(${target} PROPERTIES "ARCHIVE_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${li}) + endif() + if(module_dir) + string(REPLACE ${CMAKE_CFG_INTDIR} ${build_mode} mi ${module_dir}) + set_target_properties(${target} PROPERTIES "LIBRARY_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${mi}) + endif() endforeach() else() - set_target_properties(${target} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${bindir}) - set_target_properties(${target} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${libdir}) - set_target_properties(${target} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${moddir}) + if(ARG_BINARY_DIR) + set_target_properties(${target} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${ARG_BINARY_DIR}) + endif() + if(ARG_LIBRARY_DIR) + set_target_properties(${target} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${ARG_LIBRARY_DIR}) + endif() + if(module_dir) + set_target_properties(${target} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${module_dir}) + endif() endif() endfunction() @@ -303,6 +311,9 @@ endfunction(set_windows_version_resource_properties) # MODULE # Target ${name} might not be created on unsupported platforms. # Check with "if(TARGET ${name})". +# DISABLE_LLVM_LINK_LLVM_DYLIB +# Do not link this library to libLLVM, even if +# LLVM_LINK_LLVM_DYLIB is enabled. # OUTPUT_NAME name # Corresponds to OUTPUT_NAME in target properties. # DEPENDS targets... @@ -313,10 +324,12 @@ endfunction(set_windows_version_resource_properties) # Same semantics as target_link_libraries(). # ADDITIONAL_HEADERS # May specify header files for IDE generators. +# SONAME +# Should set SONAME link flags and create symlinks # ) function(llvm_add_library name) cmake_parse_arguments(ARG - "MODULE;SHARED;STATIC" + "MODULE;SHARED;STATIC;DISABLE_LLVM_LINK_LLVM_DYLIB;SONAME" "OUTPUT_NAME" "ADDITIONAL_HEADERS;DEPENDS;LINK_COMPONENTS;LINK_LIBS;OBJLIBS" ${ARGN}) @@ -395,8 +408,11 @@ function(llvm_add_library name) set(windows_resource_file ${windows_resource_file} PARENT_SCOPE) endif() - set_output_directory(${name} ${LLVM_RUNTIME_OUTPUT_INTDIR} ${LLVM_LIBRARY_OUTPUT_INTDIR}) - llvm_update_compile_flags(${name}) + set_output_directory(${name} BINARY_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR} LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) + # $ doesn't require compile flags. + if(NOT obj_name) + llvm_update_compile_flags(${name}) + endif() add_link_opts( ${name} ) if(ARG_OUTPUT_NAME) set_target_properties(${name} @@ -418,11 +434,6 @@ function(llvm_add_library name) PREFIX "" ) endif() - - set_target_properties(${name} - PROPERTIES - SOVERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR} - VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}) endif() if(ARG_MODULE OR ARG_SHARED) @@ -437,6 +448,24 @@ function(llvm_add_library name) endif() endif() + if(ARG_SHARED AND UNIX) + if(NOT APPLE AND ARG_SONAME) + get_target_property(output_name ${name} OUTPUT_NAME) + if(${output_name} STREQUAL "output_name-NOTFOUND") + set(output_name ${name}) + endif() + set(library_name ${output_name}-${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}${LLVM_VERSION_SUFFIX}) + set(api_name ${output_name}-${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}) + set_target_properties(${name} PROPERTIES OUTPUT_NAME ${library_name}) + llvm_install_library_symlink(${api_name} ${library_name} SHARED + COMPONENT ${name} + ALWAYS_GENERATE) + llvm_install_library_symlink(${output_name} ${library_name} SHARED + COMPONENT ${name} + ALWAYS_GENERATE) + endif() + endif() + # Add the explicit dependency information for this library. # # It would be nice to verify that we have the dependencies for this library @@ -444,10 +473,14 @@ function(llvm_add_library name) # property has been set to an empty value. get_property(lib_deps GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_${name}) - llvm_map_components_to_libnames(llvm_libs - ${ARG_LINK_COMPONENTS} - ${LLVM_LINK_COMPONENTS} - ) + if (LLVM_LINK_LLVM_DYLIB AND NOT ARG_STATIC AND NOT ARG_DISABLE_LLVM_LINK_LLVM_DYLIB) + set(llvm_libs LLVM) + else() + llvm_map_components_to_libnames(llvm_libs + ${ARG_LINK_COMPONENTS} + ${LLVM_LINK_COMPONENTS} + ) + endif() if(CMAKE_VERSION VERSION_LESS 2.8.12) # Link libs w/o keywords, assuming PUBLIC. @@ -479,6 +512,10 @@ function(llvm_add_library name) add_dependencies(${objlib} ${LLVM_COMMON_DEPENDS}) endforeach() endif() + + if(ARG_SHARED OR ARG_MODULE) + llvm_externalize_debuginfo(${name}) + endif() endfunction() macro(add_llvm_library name) @@ -504,9 +541,11 @@ macro(add_llvm_library name) set_target_properties( ${name} PROPERTIES EXCLUDE_FROM_ALL ON) elseif(NOT _is_gtest) if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY OR ${name} STREQUAL "LTO") + set(install_dir lib${LLVM_LIBDIR_SUFFIX}) if(ARG_SHARED OR BUILD_SHARED_LIBS) - if(WIN32 OR CYGWIN) + if(WIN32 OR CYGWIN OR MINGW) set(install_type RUNTIME) + set(install_dir bin) else() set(install_type LIBRARY) endif() @@ -516,7 +555,7 @@ macro(add_llvm_library name) install(TARGETS ${name} EXPORT LLVMExports - ${install_type} DESTINATION lib${LLVM_LIBDIR_SUFFIX} + ${install_type} DESTINATION ${install_dir} COMPONENT ${name}) if (NOT CMAKE_CONFIGURATION_TYPES) @@ -562,9 +601,30 @@ endmacro(add_llvm_loadable_module name) macro(add_llvm_executable name) - llvm_process_sources( ALL_FILES ${ARGN} ) + cmake_parse_arguments(ARG "DISABLE_LLVM_LINK_LLVM_DYLIB;IGNORE_EXTERNALIZE_DEBUGINFO" "" "" ${ARGN}) + llvm_process_sources( ALL_FILES ${ARG_UNPARSED_ARGUMENTS} ) + + # Generate objlib + if(LLVM_ENABLE_OBJLIB) + # Generate an obj library for both targets. + set(obj_name "obj.${name}") + add_library(${obj_name} OBJECT EXCLUDE_FROM_ALL + ${ALL_FILES} + ) + llvm_update_compile_flags(${obj_name}) + set(ALL_FILES "$") + + set_target_properties(${obj_name} PROPERTIES FOLDER "Object Libraries") + endif() + add_windows_version_resource_file(ALL_FILES ${ALL_FILES}) + if(XCODE) + # Note: the dummy.cpp source file provides no definitions. However, + # it forces Xcode to properly link the static library. + list(APPEND ALL_FILES "${LLVM_MAIN_SRC_DIR}/cmake/dummy.cpp") + endif() + if( EXCLUDE_FROM_ALL ) add_executable(${name} EXCLUDE_FROM_ALL ${ALL_FILES}) else() @@ -575,7 +635,10 @@ macro(add_llvm_executable name) set_windows_version_resource_properties(${name} ${windows_resource_file}) endif() - llvm_update_compile_flags(${name}) + # $ doesn't require compile flags. + if(NOT LLVM_ENABLE_OBJLIB) + llvm_update_compile_flags(${name}) + endif() add_link_opts( ${name} ) # Do not add -Dname_EXPORTS to the command-line when building files in this @@ -588,25 +651,40 @@ macro(add_llvm_executable name) add_llvm_symbol_exports( ${name} ${LLVM_EXPORTED_SYMBOL_FILE} ) endif(LLVM_EXPORTED_SYMBOL_FILE) + if (LLVM_LINK_LLVM_DYLIB AND NOT ARG_DISABLE_LLVM_LINK_LLVM_DYLIB) + set(USE_SHARED USE_SHARED) + endif() + set(EXCLUDE_FROM_ALL OFF) - set_output_directory(${name} ${LLVM_RUNTIME_OUTPUT_INTDIR} ${LLVM_LIBRARY_OUTPUT_INTDIR}) - llvm_config( ${name} ${LLVM_LINK_COMPONENTS} ) + set_output_directory(${name} BINARY_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR} LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) + llvm_config( ${name} ${USE_SHARED} ${LLVM_LINK_COMPONENTS} ) if( LLVM_COMMON_DEPENDS ) add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} ) endif( LLVM_COMMON_DEPENDS ) + + if(NOT ARG_IGNORE_EXTERNALIZE_DEBUGINFO) + llvm_externalize_debuginfo(${name}) + endif() endmacro(add_llvm_executable name) function(export_executable_symbols target) if (NOT MSVC) # MSVC's linker doesn't support exporting all symbols. set_target_properties(${target} PROPERTIES ENABLE_EXPORTS 1) + if (APPLE) + set_property(TARGET ${target} APPEND_STRING PROPERTY + LINK_FLAGS " -rdynamic") + endif() endif() endfunction() - -set (LLVM_TOOLCHAIN_TOOLS - llvm-ar - llvm-objdump - ) +if(NOT LLVM_TOOLCHAIN_TOOLS) + set (LLVM_TOOLCHAIN_TOOLS + llvm-ar + llvm-ranlib + llvm-lib + llvm-objdump + ) +endif() macro(add_llvm_tool name) if( NOT LLVM_BUILD_TOOLS ) @@ -651,7 +729,7 @@ endmacro(add_llvm_example name) macro(add_llvm_utility name) - add_llvm_executable(${name} ${ARGN}) + add_llvm_executable(${name} DISABLE_LLVM_LINK_LLVM_DYLIB ${ARGN}) set_target_properties(${name} PROPERTIES FOLDER "Utils") if( LLVM_INSTALL_UTILS ) install (TARGETS ${name} @@ -676,56 +754,104 @@ macro(add_llvm_target target_name) set( CURRENT_LLVM_TARGET LLVM${target_name} ) endmacro(add_llvm_target) +function(canonicalize_tool_name name output) + string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" nameStrip ${name}) + string(REPLACE "-" "_" nameUNDERSCORE ${nameStrip}) + string(TOUPPER ${nameUNDERSCORE} nameUPPER) + set(${output} "${nameUPPER}" PARENT_SCOPE) +endfunction(canonicalize_tool_name) + +# Custom add_subdirectory wrapper +# Takes in a project name (i.e. LLVM), the the subdirectory name, and an +# and an optional path if it differs from the name. +macro(add_llvm_subdirectory project type name) + set(add_llvm_external_dir "${ARGN}") + if("${add_llvm_external_dir}" STREQUAL "") + set(add_llvm_external_dir ${name}) + endif() + canonicalize_tool_name(${name} nameUPPER) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${add_llvm_external_dir}/CMakeLists.txt) + # Treat it as in-tree subproject. + option(${project}_${type}_${nameUPPER}_BUILD + "Whether to build ${name} as part of ${project}" On) + mark_as_advanced(${project}_${type}_${name}_BUILD) + if(${project}_${type}_${nameUPPER}_BUILD) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${add_llvm_external_dir} ${add_llvm_external_dir}) + # Don't process it in add_llvm_implicit_projects(). + set(${project}_${type}_${nameUPPER}_BUILD OFF) + endif() + else() + set(LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR + "${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}" + CACHE PATH "Path to ${name} source directory") + set(${project}_${type}_${nameUPPER}_BUILD_DEFAULT ON) + if(NOT LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR OR NOT EXISTS ${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}) + set(${project}_${type}_${nameUPPER}_BUILD_DEFAULT OFF) + endif() + if("${LLVM_EXTERNAL_${nameUPPER}_BUILD}" STREQUAL "OFF") + set(${project}_${type}_${nameUPPER}_BUILD_DEFAULT OFF) + endif() + option(${project}_${type}_${nameUPPER}_BUILD + "Whether to build ${name} as part of LLVM" + ${${project}_${type}_${nameUPPER}_BUILD_DEFAULT}) + if (${project}_${type}_${nameUPPER}_BUILD) + if(EXISTS ${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}) + add_subdirectory(${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR} ${add_llvm_external_dir}) + elseif(NOT "${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}" STREQUAL "") + message(WARNING "Nonexistent directory for ${name}: ${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}") + endif() + # FIXME: It'd be redundant. + set(${project}_${type}_${nameUPPER}_BUILD Off) + endif() + endif() +endmacro() + # Add external project that may want to be built as part of llvm such as Clang, # lld, and Polly. This adds two options. One for the source directory of the # project, which defaults to ${CMAKE_CURRENT_SOURCE_DIR}/${name}. Another to # enable or disable building it with everything else. # Additional parameter can be specified as the name of directory. macro(add_llvm_external_project name) - set(add_llvm_external_dir "${ARGN}") - if("${add_llvm_external_dir}" STREQUAL "") - set(add_llvm_external_dir ${name}) - endif() - list(APPEND LLVM_IMPLICIT_PROJECT_IGNORE "${CMAKE_CURRENT_SOURCE_DIR}/${add_llvm_external_dir}") - string(REPLACE "-" "_" nameUNDERSCORE ${name}) - string(TOUPPER ${nameUNDERSCORE} nameUPPER) - #TODO: Remove this check in a few days once it has circulated through - # buildbots and people's checkouts (cbieneman - July 14, 2015) - if("${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}/${add_llvm_external_dir}") - unset(LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR CACHE) - endif() - if(NOT LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR) - set(LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/${add_llvm_external_dir}") - else() - set(LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR - CACHE PATH "Path to ${name} source directory") - endif() - if (EXISTS ${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}/CMakeLists.txt) - option(LLVM_EXTERNAL_${nameUPPER}_BUILD - "Whether to build ${name} as part of LLVM" ON) - if (LLVM_EXTERNAL_${nameUPPER}_BUILD) - add_subdirectory(${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR} ${add_llvm_external_dir}) - endif() - endif() -endmacro(add_llvm_external_project) + add_llvm_subdirectory(LLVM TOOL ${name} ${ARGN}) +endmacro() macro(add_llvm_tool_subdirectory name) - list(APPEND LLVM_IMPLICIT_PROJECT_IGNORE "${CMAKE_CURRENT_SOURCE_DIR}/${name}") - add_subdirectory(${name}) + add_llvm_external_project(${name}) endmacro(add_llvm_tool_subdirectory) -macro(ignore_llvm_tool_subdirectory name) - list(APPEND LLVM_IMPLICIT_PROJECT_IGNORE "${CMAKE_CURRENT_SOURCE_DIR}/${name}") -endmacro(ignore_llvm_tool_subdirectory) +function(get_project_name_from_src_var var output) + string(REGEX MATCH "LLVM_EXTERNAL_(.*)_SOURCE_DIR" + MACHED_TOOL "${var}") + if(MACHED_TOOL) + set(${output} ${CMAKE_MATCH_1} PARENT_SCOPE) + else() + set(${output} PARENT_SCOPE) + endif() +endfunction() -function(add_llvm_implicit_external_projects) +function(create_subdirectory_options project type) + file(GLOB sub-dirs "${CMAKE_CURRENT_SOURCE_DIR}/*") + foreach(dir ${sub-dirs}) + if(IS_DIRECTORY "${dir}" AND EXISTS "${dir}/CMakeLists.txt") + canonicalize_tool_name(${dir} name) + option(${project}_${type}_${name}_BUILD + "Whether to build ${name} as part of ${project}" On) + mark_as_advanced(${project}_${type}_${name}_BUILD) + endif() + endforeach() +endfunction(create_subdirectory_options) + +function(create_llvm_tool_options) + create_subdirectory_options(LLVM TOOL) +endfunction(create_llvm_tool_options) + +function(add_llvm_implicit_projects) set(list_of_implicit_subdirs "") file(GLOB sub-dirs "${CMAKE_CURRENT_SOURCE_DIR}/*") foreach(dir ${sub-dirs}) - if(IS_DIRECTORY "${dir}") - list(FIND LLVM_IMPLICIT_PROJECT_IGNORE "${dir}" tool_subdir_ignore) - if( tool_subdir_ignore EQUAL -1 - AND EXISTS "${dir}/CMakeLists.txt") + if(IS_DIRECTORY "${dir}" AND EXISTS "${dir}/CMakeLists.txt") + canonicalize_tool_name(${dir} name) + if (LLVM_TOOL_${name}_BUILD) get_filename_component(fn "${dir}" NAME) list(APPEND list_of_implicit_subdirs "${fn}") endif() @@ -735,7 +861,7 @@ function(add_llvm_implicit_external_projects) foreach(external_proj ${list_of_implicit_subdirs}) add_llvm_external_project("${external_proj}") endforeach() -endfunction(add_llvm_implicit_external_projects) +endfunction(add_llvm_implicit_projects) # Generic support for adding a unittest. function(add_unittest test_suite test_name) @@ -754,9 +880,9 @@ function(add_unittest test_suite test_name) set(LLVM_REQUIRES_RTTI OFF) - add_llvm_executable(${test_name} ${ARGN}) + add_llvm_executable(${test_name} IGNORE_EXTERNALIZE_DEBUGINFO ${ARGN}) set(outdir ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}) - set_output_directory(${test_name} ${outdir} ${outdir}) + set_output_directory(${test_name} BINARY_DIR ${outdir} LIBRARY_DIR ${outdir}) target_link_libraries(${test_name} gtest gtest_main @@ -785,8 +911,13 @@ function(llvm_add_go_executable binary pkgpath) set(cppflags "${cppflags} -I${d}") endforeach(d) set(ldflags "${CMAKE_EXE_LINKER_FLAGS}") + if (LLVM_LINK_LLVM_DYLIB) + set(linkmode "dylib") + else() + set(linkmode "component-libs") + endif() add_custom_command(OUTPUT ${binpath} - COMMAND ${CMAKE_BINARY_DIR}/bin/llvm-go "cc=${cc}" "cxx=${cxx}" "cppflags=${cppflags}" "ldflags=${ldflags}" + COMMAND ${CMAKE_BINARY_DIR}/bin/llvm-go "go=${GO_EXECUTABLE}" "cc=${cc}" "cxx=${cxx}" "cppflags=${cppflags}" "ldflags=${ldflags}" "linkmode=${linkmode}" ${ARG_GOFLAGS} build -o ${binpath} ${pkgpath} DEPENDS llvm-config ${CMAKE_BINARY_DIR}/bin/llvm-go${CMAKE_EXECUTABLE_SUFFIX} ${llvmlibs} ${ARG_DEPENDS} @@ -939,3 +1070,132 @@ function(add_lit_testsuites project directory) endforeach() endif() endfunction() + +function(llvm_install_library_symlink name dest type) + cmake_parse_arguments(ARG "ALWAYS_GENERATE" "COMPONENT" "" ${ARGN}) + foreach(path ${CMAKE_MODULE_PATH}) + if(EXISTS ${path}/LLVMInstallSymlink.cmake) + set(INSTALL_SYMLINK ${path}/LLVMInstallSymlink.cmake) + break() + endif() + endforeach() + + set(component ${ARG_COMPONENT}) + if(NOT component) + set(component ${name}) + endif() + + set(full_name ${CMAKE_${type}_LIBRARY_PREFIX}${name}${CMAKE_${type}_LIBRARY_SUFFIX}) + set(full_dest ${CMAKE_${type}_LIBRARY_PREFIX}${dest}${CMAKE_${type}_LIBRARY_SUFFIX}) + + set(output_dir lib${LLVM_LIBDIR_SUFFIX}) + if(WIN32 AND "${type}" STREQUAL "SHARED") + set(output_dir bin) + endif() + + install(SCRIPT ${INSTALL_SYMLINK} + CODE "install_symlink(${full_name} ${full_dest} ${output_dir})" + COMPONENT ${component}) + + if (NOT CMAKE_CONFIGURATION_TYPES AND NOT ARG_ALWAYS_GENERATE) + add_custom_target(install-${name} + DEPENDS ${name} ${dest} install-${dest} + COMMAND "${CMAKE_COMMAND}" + -DCMAKE_INSTALL_COMPONENT=${name} + -P "${CMAKE_BINARY_DIR}/cmake_install.cmake") + endif() +endfunction() + +function(llvm_install_symlink name dest) + cmake_parse_arguments(ARG "ALWAYS_GENERATE" "" "" ${ARGN}) + foreach(path ${CMAKE_MODULE_PATH}) + if(EXISTS ${path}/LLVMInstallSymlink.cmake) + set(INSTALL_SYMLINK ${path}/LLVMInstallSymlink.cmake) + break() + endif() + endforeach() + + if(ARG_ALWAYS_GENERATE) + set(component ${dest}) + else() + set(component ${name}) + endif() + + set(full_name ${name}${CMAKE_EXECUTABLE_SUFFIX}) + set(full_dest ${dest}${CMAKE_EXECUTABLE_SUFFIX}) + + install(SCRIPT ${INSTALL_SYMLINK} + CODE "install_symlink(${full_name} ${full_dest} bin)" + COMPONENT ${component}) + + if (NOT CMAKE_CONFIGURATION_TYPES AND NOT ARG_ALWAYS_GENERATE) + add_custom_target(install-${name} + DEPENDS ${name} ${dest} install-${dest} + COMMAND "${CMAKE_COMMAND}" + -DCMAKE_INSTALL_COMPONENT=${name} + -P "${CMAKE_BINARY_DIR}/cmake_install.cmake") + endif() +endfunction() + +function(add_llvm_tool_symlink name dest) + cmake_parse_arguments(ARG "ALWAYS_GENERATE" "" "" ${ARGN}) + if(UNIX) + set(LLVM_LINK_OR_COPY create_symlink) + set(dest_binary "${dest}${CMAKE_EXECUTABLE_SUFFIX}") + else() + set(LLVM_LINK_OR_COPY copy) + set(dest_binary "${LLVM_RUNTIME_OUTPUT_INTDIR}/${dest}${CMAKE_EXECUTABLE_SUFFIX}") + endif() + + set(output_path "${LLVM_RUNTIME_OUTPUT_INTDIR}/${name}${CMAKE_EXECUTABLE_SUFFIX}") + + if(ARG_ALWAYS_GENERATE) + set_property(DIRECTORY APPEND PROPERTY + ADDITIONAL_MAKE_CLEAN_FILES ${dest_binary}) + add_custom_command(TARGET ${dest} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E ${LLVM_LINK_OR_COPY} "${dest_binary}" "${output_path}") + else() + add_custom_command(OUTPUT ${output_path} + COMMAND ${CMAKE_COMMAND} -E ${LLVM_LINK_OR_COPY} "${dest_binary}" "${output_path}" + DEPENDS ${dest}) + add_custom_target(${name} ALL DEPENDS ${output_path}) + set_target_properties(${name} PROPERTIES FOLDER Tools) + + # Make sure the parent tool is a toolchain tool, otherwise exclude this tool + list(FIND LLVM_TOOLCHAIN_TOOLS ${dest} LLVM_IS_${dest}_TOOLCHAIN_TOOL) + if (NOT LLVM_IS_${dest}_TOOLCHAIN_TOOL GREATER -1) + set(LLVM_IS_${name}_TOOLCHAIN_TOOL ${LLVM_IS_${dest}_TOOLCHAIN_TOOL}) + else() + list(FIND LLVM_TOOLCHAIN_TOOLS ${name} LLVM_IS_${name}_TOOLCHAIN_TOOL) + endif() + + # LLVM_IS_${name}_TOOLCHAIN_TOOL will only be greater than -1 if both this + # tool and its parent tool are in LLVM_TOOLCHAIN_TOOLS + if (LLVM_IS_${name}_TOOLCHAIN_TOOL GREATER -1 OR NOT LLVM_INSTALL_TOOLCHAIN_ONLY) + if( LLVM_BUILD_TOOLS ) + llvm_install_symlink(${name} ${dest}) + endif() + endif() + endif() +endfunction() + +function(llvm_externalize_debuginfo name) + if(NOT LLVM_EXTERNALIZE_DEBUGINFO) + return() + endif() + + if(APPLE) + if(CMAKE_CXX_FLAGS MATCHES "-flto" + OR CMAKE_CXX_FLAGS_${uppercase_CMAKE_BUILD_TYPE} MATCHES "-flto") + + set(lto_object ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${name}-lto.o) + set_property(TARGET ${name} APPEND_STRING PROPERTY + LINK_FLAGS " -Wl,-object_path_lto,${lto_object}") + endif() + add_custom_command(TARGET ${name} POST_BUILD + COMMAND xcrun dsymutil $ + COMMAND xcrun strip -Sl $) + else() + message(FATAL_ERROR "LLVM_EXTERNALIZE_DEBUGINFO isn't implemented for non-darwin platforms!") + endif() +endfunction() diff --git a/cmake/modules/AddLLVMDefinitions.cmake b/cmake/modules/AddLLVMDefinitions.cmake index 33ac9731db5d..dab16236d3e9 100644 --- a/cmake/modules/AddLLVMDefinitions.cmake +++ b/cmake/modules/AddLLVMDefinitions.cmake @@ -7,7 +7,11 @@ macro(add_llvm_definitions) # We don't want no semicolons on LLVM_DEFINITIONS: foreach(arg ${ARGN}) - set(LLVM_DEFINITIONS "${LLVM_DEFINITIONS} ${arg}") + if(DEFINED LLVM_DEFINITIONS) + set(LLVM_DEFINITIONS "${LLVM_DEFINITIONS} ${arg}") + else() + set(LLVM_DEFINITIONS ${arg}) + endif() endforeach(arg) add_definitions( ${ARGN} ) endmacro(add_llvm_definitions) diff --git a/cmake/modules/CrossCompile.cmake b/cmake/modules/CrossCompile.cmake index 76a3078a5440..c136dfaa612e 100644 --- a/cmake/modules/CrossCompile.cmake +++ b/cmake/modules/CrossCompile.cmake @@ -19,6 +19,7 @@ function(llvm_create_cross_target_internal target_name toochain buildtype) add_custom_command(OUTPUT ${LLVM_${target_name}_BUILD}/CMakeCache.txt COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" ${CROSS_TOOLCHAIN_FLAGS_${target_name}} ${CMAKE_SOURCE_DIR} + -DLLVM_TARGET_IS_CROSSCOMPILE_HOST=TRUE WORKING_DIRECTORY ${LLVM_${target_name}_BUILD} DEPENDS ${LLVM_${target_name}_BUILD} COMMENT "Configuring ${target_name} LLVM...") @@ -43,6 +44,7 @@ function(llvm_create_cross_target_internal target_name toochain buildtype) execute_process(COMMAND ${CMAKE_COMMAND} ${build_type_flags} -G "${CMAKE_GENERATOR}" -DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD} ${CROSS_TOOLCHAIN_FLAGS_${target_name}} ${CMAKE_SOURCE_DIR} + -DLLVM_TARGET_IS_CROSSCOMPILE_HOST=TRUE WORKING_DIRECTORY ${LLVM_${target_name}_BUILD} ) endif(NOT IS_DIRECTORY ${LLVM_${target_name}_BUILD}) diff --git a/cmake/modules/DetermineGCCCompatible.cmake b/cmake/modules/DetermineGCCCompatible.cmake new file mode 100644 index 000000000000..1bf15fcba72f --- /dev/null +++ b/cmake/modules/DetermineGCCCompatible.cmake @@ -0,0 +1,11 @@ +# Determine if the compiler has GCC-compatible command-line syntax. + +if(NOT DEFINED LLVM_COMPILER_IS_GCC_COMPATIBLE) + if(CMAKE_COMPILER_IS_GNUCXX) + set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON) + elseif( MSVC ) + set(LLVM_COMPILER_IS_GCC_COMPATIBLE OFF) + elseif( "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" ) + set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON) + endif() +endif() diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index 4db27033b203..4c5ffe2f7b28 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -132,7 +132,8 @@ endif() # Pass -Wl,-z,defs. This makes sure all symbols are defined. Otherwise a DSO # build might work on ELF but fail on MachO/COFF. if(NOT (${CMAKE_SYSTEM_NAME} MATCHES "Darwin" OR WIN32 OR CYGWIN OR - ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") AND + ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR + ${CMAKE_SYSTEM_NAME} MATCHES "OpenBSD") AND NOT LLVM_USE_SANITIZER) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,defs") endif() @@ -166,6 +167,7 @@ function(add_flag_or_print_warning flag name) message(STATUS "Building with ${flag}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}" PARENT_SCOPE) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flag}" PARENT_SCOPE) + set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${flag}" PARENT_SCOPE) else() message(WARNING "${flag} is not supported.") endif() @@ -180,16 +182,15 @@ if( LLVM_ENABLE_PIC ) # On Windows all code is PIC. MinGW warns if -fPIC is used. else() add_flag_or_print_warning("-fPIC" FPIC) - - if( WIN32 OR CYGWIN) - # MinGW warns if -fvisibility-inlines-hidden is used. - else() - check_cxx_compiler_flag("-fvisibility-inlines-hidden" SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG) - append_if(SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG "-fvisibility-inlines-hidden" CMAKE_CXX_FLAGS) - endif() endif() endif() +if(NOT WIN32 AND NOT CYGWIN) + # MinGW warns if -fvisibility-inlines-hidden is used. + check_cxx_compiler_flag("-fvisibility-inlines-hidden" SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG) + append_if(SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG "-fvisibility-inlines-hidden" CMAKE_CXX_FLAGS) +endif() + if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 ) # TODO: support other platforms and toolchains. if( LLVM_BUILD_32_BITS ) @@ -246,6 +247,12 @@ if( MSVC_IDE ) endif() if( MSVC ) + if( CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.0 ) + # For MSVC 2013, disable iterator null pointer checking in debug mode, + # especially so std::equal(nullptr, nullptr, nullptr) will not assert. + add_llvm_definitions("-D_DEBUG_POINTER_IMPL=") + endif() + include(ChooseMSVCCRT) if( NOT (${CMAKE_VERSION} VERSION_LESS 2.8.11) ) @@ -274,6 +281,7 @@ if( MSVC ) set(msvc_warning_flags # Disabled warnings. + -wd4141 # Suppress ''modifier' : used more than once' (because of __forceinline combined with inline) -wd4146 # Suppress 'unary minus operator applied to unsigned type, result still unsigned' -wd4180 # Suppress 'qualifier applied to function type has no meaning; ignored' -wd4244 # Suppress ''argument' : conversion from 'type1' to 'type2', possible loss of data' @@ -307,6 +315,11 @@ if( MSVC ) -wd4611 # Suppress 'interaction between '_setjmp' and C++ object destruction is non-portable' -wd4805 # Suppress 'unsafe mix of type and type in operation' -wd4204 # Suppress 'nonstandard extension used : non-constant aggregate initializer' + -wd4577 # Suppress 'noexcept used with no exception handling mode specified; termination on exception is not guaranteed' + -wd4091 # Suppress 'typedef: ignored on left of '' when no variable is declared' + # C4592 is disabled because of false positives in Visual Studio 2015 + # Update 1. Re-evaluate the usefulness of this diagnostic with Update 2. + -wd4592 # Suppress ''var': symbol will be dynamically initialized (implementation limitation) # Ideally, we'd like this warning to be enabled, but MSVC 2013 doesn't # support the 'aligned' attribute in the way that clang sources requires (for @@ -325,7 +338,10 @@ if( MSVC ) # Enable warnings if (LLVM_ENABLE_WARNINGS) - append("/W4" msvc_warning_flags) + # Put /W4 in front of all the -we flags. cl.exe doesn't care, but for + # clang-cl having /W4 after the -we flags will re-enable the warnings + # disabled by -we. + set(msvc_warning_flags "/W4 ${msvc_warning_flags}") # CMake appends /W3 by default, and having /W3 followed by /W4 will result in # cl : Command line warning D9025 : overriding '/W3' with '/W4'. Since this is # a command line warning and not a compiler warning, it cannot be suppressed except @@ -345,6 +361,8 @@ if( MSVC ) append("${flag}" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) endforeach(flag) + append("/Zc:inline" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + # Disable sized deallocation if the flag is supported. MSVC fails to compile # the operator new overload in User otherwise. check_c_compiler_flag("/WX /Zc:sizedDealloc-" SUPPORTS_SIZED_DEALLOC) @@ -367,7 +385,8 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE ) endif() endif() - append_if(LLVM_ENABLE_PEDANTIC "-pedantic -Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + append_if(LLVM_ENABLE_PEDANTIC "-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + append_if(LLVM_ENABLE_PEDANTIC "-Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) add_flag_if_supported("-Wcovered-switch-default" COVERED_SWITCH_DEFAULT_FLAG) append_if(USE_NO_UNINITIALIZED "-Wno-uninitialized" CMAKE_CXX_FLAGS) append_if(USE_NO_MAYBE_UNINITIALIZED "-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS) @@ -375,16 +394,23 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE ) # Check if -Wnon-virtual-dtor warns even though the class is marked final. # If it does, don't add it. So it won't be added on clang 3.4 and older. # This also catches cases when -Wnon-virtual-dtor isn't supported by - # the compiler at all. - set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) - set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11 -Werror=non-virtual-dtor") - CHECK_CXX_SOURCE_COMPILES("class base {public: virtual void anchor();protected: ~base();}; - class derived final : public base { public: ~derived();}; - int main() { return 0; }" - CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR) - set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) - append_if(CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR - "-Wnon-virtual-dtor" CMAKE_CXX_FLAGS) + # the compiler at all. This flag is not activated for gcc since it will + # incorrectly identify a protected non-virtual base when there is a friend + # declaration. + if (NOT CMAKE_COMPILER_IS_GNUCXX) + set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11 -Werror=non-virtual-dtor") + CHECK_CXX_SOURCE_COMPILES("class base {public: virtual void anchor();protected: ~base();}; + class derived final : public base { public: ~derived();}; + int main() { return 0; }" + CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR) + set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) + append_if(CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR + "-Wnon-virtual-dtor" CMAKE_CXX_FLAGS) + endif() + + # Enable -Wdelete-non-virtual-dtor if available. + add_flag_if_supported("-Wdelete-non-virtual-dtor" DELETE_NON_VIRTUAL_DTOR_FLAG) # Check if -Wcomment is OK with an // comment ending with '\' if the next # line is also a // comment. @@ -420,7 +446,7 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE ) endif() if (LLVM_ENABLE_MODULES) set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) - set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -fmodules -fcxx-modules") + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -fmodules") # Check that we can build code with modules enabled, and that repeatedly # including still manages to respect NDEBUG properly. CHECK_CXX_SOURCE_COMPILES("#undef NDEBUG @@ -440,16 +466,34 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE ) endif( MSVC ) macro(append_common_sanitizer_flags) - # Append -fno-omit-frame-pointer and turn on debug info to get better - # stack traces. - add_flag_if_supported("-fno-omit-frame-pointer" FNO_OMIT_FRAME_POINTER) - if (NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" AND - NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELWITHDEBINFO") - add_flag_if_supported("-gline-tables-only" GLINE_TABLES_ONLY) - endif() - # Use -O1 even in debug mode, otherwise sanitizers slowdown is too large. - if (uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG") - add_flag_if_supported("-O1" O1) + if (NOT MSVC) + # Append -fno-omit-frame-pointer and turn on debug info to get better + # stack traces. + add_flag_if_supported("-fno-omit-frame-pointer" FNO_OMIT_FRAME_POINTER) + if (NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" AND + NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELWITHDEBINFO") + add_flag_if_supported("-gline-tables-only" GLINE_TABLES_ONLY) + endif() + # Use -O1 even in debug mode, otherwise sanitizers slowdown is too large. + if (uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG") + add_flag_if_supported("-O1" O1) + endif() + elseif (CLANG_CL) + # Keep frame pointers around. + append("/Oy-" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + if (CMAKE_LINKER MATCHES "lld-link.exe") + # Use DWARF debug info with LLD. + append("-gdwarf" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + # Pass /MANIFEST:NO so that CMake doesn't run mt.exe on our binaries. + # Adding manifests with mt.exe breaks LLD's symbol tables. See PR24476. + append("/MANIFEST:NO" + CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) + else() + # Enable codeview otherwise. + append("/Z7" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + endif() + # Always ask the linker to produce symbols with asan. + append("-debug" CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) endif() endmacro() @@ -478,10 +522,17 @@ if(LLVM_USE_SANITIZER) append("-fsanitize=address,undefined -fno-sanitize=vptr,function -fno-sanitize-recover=all" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) else() - message(WARNING "Unsupported value of LLVM_USE_SANITIZER: ${LLVM_USE_SANITIZER}") + message(FATAL_ERROR "Unsupported value of LLVM_USE_SANITIZER: ${LLVM_USE_SANITIZER}") + endif() + elseif(MSVC) + if (LLVM_USE_SANITIZER STREQUAL "Address") + append_common_sanitizer_flags() + append("-fsanitize=address" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + else() + message(FATAL_ERROR "This sanitizer not yet supported in the MSVC environment: ${LLVM_USE_SANITIZER}") endif() else() - message(WARNING "LLVM_USE_SANITIZER is not supported on this platform.") + message(FATAL_ERROR "LLVM_USE_SANITIZER is not supported on this platform.") endif() if (LLVM_USE_SANITIZE_COVERAGE) append("-fsanitize-coverage=edge,indirect-calls,8bit-counters,trace-cmp" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) @@ -546,6 +597,14 @@ if(LLVM_ENABLE_EH AND NOT LLVM_ENABLE_RTTI) message(FATAL_ERROR "Exception handling requires RTTI. You must set LLVM_ENABLE_RTTI to ON") endif() +option(LLVM_BUILD_INSTRUMENTED "Build LLVM and tools with PGO instrumentation (experimental)" Off) +mark_as_advanced(LLVM_BUILD_INSTRUMENTED) +append_if(LLVM_BUILD_INSTRUMENTED "-fprofile-instr-generate" + CMAKE_CXX_FLAGS + CMAKE_C_FLAGS + CMAKE_EXE_LINKER_FLAGS + CMAKE_SHARED_LINKER_FLAGS) + # Plugin support # FIXME: Make this configurable. if(WIN32 OR CYGWIN) diff --git a/cmake/modules/HandleLLVMStdlib.cmake b/cmake/modules/HandleLLVMStdlib.cmake index 66ad078fb66e..b07781c3f290 100644 --- a/cmake/modules/HandleLLVMStdlib.cmake +++ b/cmake/modules/HandleLLVMStdlib.cmake @@ -1,17 +1,11 @@ # This CMake module is responsible for setting the standard library to libc++ # if the user has requested it. +include(DetermineGCCCompatible) + if(NOT DEFINED LLVM_STDLIB_HANDLED) set(LLVM_STDLIB_HANDLED ON) - if(CMAKE_COMPILER_IS_GNUCXX) - set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON) - elseif( MSVC ) - set(LLVM_COMPILER_IS_GCC_COMPATIBLE OFF) - elseif( "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" ) - set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON) - endif() - function(append value) foreach(variable ${ARGN}) set(${variable} "${${variable}} ${value}" PARENT_SCOPE) diff --git a/cmake/modules/LLVM-Config.cmake b/cmake/modules/LLVM-Config.cmake index 22ac71451917..aa68b4007602 100755 --- a/cmake/modules/LLVM-Config.cmake +++ b/cmake/modules/LLVM-Config.cmake @@ -31,7 +31,23 @@ endfunction(is_llvm_target_library) macro(llvm_config executable) - explicit_llvm_config(${executable} ${ARGN}) + cmake_parse_arguments(ARG "USE_SHARED" "" "" ${ARGN}) + set(link_components ${ARG_UNPARSED_ARGUMENTS}) + + if(USE_SHARED) + # If USE_SHARED is specified, then we link against libLLVM, + # but also against the component libraries below. This is + # done in case libLLVM does not contain all of the components + # the target requires. + # + # TODO strip LLVM_DYLIB_COMPONENTS out of link_components. + # To do this, we need special handling for "all", since that + # may imply linking to libraries that are not included in + # libLLVM. + target_link_libraries(${executable} LLVM) + endif() + + explicit_llvm_config(${executable} ${link_components}) endmacro(llvm_config) diff --git a/cmake/modules/LLVMConfig.cmake.in b/cmake/modules/LLVMConfig.cmake.in index 5b7789dbc9a1..6855c4422543 100644 --- a/cmake/modules/LLVMConfig.cmake.in +++ b/cmake/modules/LLVMConfig.cmake.in @@ -39,6 +39,9 @@ set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@) set(LLVM_ENABLE_PIC @LLVM_ENABLE_PIC@) +set(LLVM_ENABLE_PLUGINS @LLVM_ENABLE_PLUGINS@) +set(LLVM_PLUGIN_EXT @LLVM_PLUGIN_EXT@) + set(LLVM_ON_UNIX @LLVM_ON_UNIX@) set(LLVM_ON_WIN32 @LLVM_ON_WIN32@) @@ -46,7 +49,7 @@ set(LLVM_LIBDIR_SUFFIX @LLVM_LIBDIR_SUFFIX@) set(LLVM_INCLUDE_DIRS "@LLVM_CONFIG_INCLUDE_DIRS@") set(LLVM_LIBRARY_DIRS "@LLVM_CONFIG_LIBRARY_DIRS@") -set(LLVM_DEFINITIONS "-D__STDC_LIMIT_MACROS" "-D__STDC_CONSTANT_MACROS") +set(LLVM_DEFINITIONS "@LLVM_DEFINITIONS@") set(LLVM_CMAKE_DIR "@LLVM_CONFIG_CMAKE_DIR@") set(LLVM_TOOLS_BINARY_DIR "@LLVM_CONFIG_TOOLS_BINARY_DIR@") diff --git a/cmake/modules/LLVMExternalProjectUtils.cmake b/cmake/modules/LLVMExternalProjectUtils.cmake new file mode 100644 index 000000000000..c2d9f530c200 --- /dev/null +++ b/cmake/modules/LLVMExternalProjectUtils.cmake @@ -0,0 +1,195 @@ +include(ExternalProject) + +# llvm_ExternalProject_BuildCmd(out_var target) +# Utility function for constructing command lines for external project targets +function(llvm_ExternalProject_BuildCmd out_var target) + if (CMAKE_GENERATOR MATCHES "Make") + # Use special command for Makefiles to support parallelism. + set(${out_var} "$(MAKE)" "${target}" PARENT_SCOPE) + else() + set(${out_var} ${CMAKE_COMMAND} --build . --target ${target} + --config $ PARENT_SCOPE) + endif() +endfunction() + +# llvm_ExternalProject_Add(name source_dir ... +# USE_TOOLCHAIN +# Use just-built tools (see TOOLCHAIN_TOOLS) +# EXCLUDE_FROM_ALL +# Exclude this project from the all target +# NO_INSTALL +# Don't generate install targets for this project +# CMAKE_ARGS arguments... +# Optional cmake arguments to pass when configuring the project +# TOOLCHAIN_TOOLS targets... +# Targets for toolchain tools (defaults to clang;lld) +# DEPENDS targets... +# Targets that this project depends on +# EXTRA_TARGETS targets... +# Extra targets in the subproject to generate targets for +# ) +function(llvm_ExternalProject_Add name source_dir) + cmake_parse_arguments(ARG "USE_TOOLCHAIN;EXCLUDE_FROM_ALL;NO_INSTALL" + "SOURCE_DIR" + "CMAKE_ARGS;TOOLCHAIN_TOOLS;RUNTIME_LIBRARIES;DEPENDS;EXTRA_TARGETS" ${ARGN}) + canonicalize_tool_name(${name} nameCanon) + if(NOT ARG_TOOLCHAIN_TOOLS) + set(ARG_TOOLCHAIN_TOOLS clang lld) + endif() + foreach(tool ${ARG_TOOLCHAIN_TOOLS}) + if(TARGET ${tool}) + list(APPEND TOOLCHAIN_TOOLS ${tool}) + list(APPEND TOOLCHAIN_BINS $) + endif() + endforeach() + + if(NOT ARG_RUNTIME_LIBRARIES) + set(ARG_RUNTIME_LIBRARIES compiler-rt libcxx) + endif() + foreach(lib ${ARG_RUNTIME_LIBRARIES}) + if(TARGET ${lib}) + list(APPEND RUNTIME_LIBRARIES ${lib}) + endif() + endforeach() + + list(FIND TOOLCHAIN_TOOLS clang FOUND_CLANG) + if(FOUND_CLANG GREATER -1) + set(CLANG_IN_TOOLCHAIN On) + endif() + + if(RUNTIME_LIBRARIES AND CLANG_IN_TOOLCHAIN) + list(APPEND TOOLCHAIN_BINS ${RUNTIME_LIBRARIES}) + endif() + + if(CMAKE_VERSION VERSION_GREATER 3.1.0) + set(cmake_3_1_EXCLUDE_FROM_ALL EXCLUDE_FROM_ALL 1) + endif() + + if(CMAKE_VERSION VERSION_GREATER 3.3.20150708) + set(cmake_3_4_USES_TERMINAL_OPTIONS + USES_TERMINAL_CONFIGURE 1 + USES_TERMINAL_BUILD 1 + USES_TERMINAL_INSTALL 1 + ) + set(cmake_3_4_USES_TERMINAL USES_TERMINAL 1) + endif() + + if(CMAKE_VERSION VERSION_GREATER 3.1.20141116) + set(cmake_3_2_USES_TERMINAL USES_TERMINAL) + endif() + + set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/${name}-stamps/) + set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${name}-bins/) + + add_custom_target(${name}-clear + COMMAND ${CMAKE_COMMAND} -E remove_directory ${BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E remove_directory ${STAMP_DIR} + COMMENT "Clobbering ${name} build and stamp directories" + ${cmake_3_2_USES_TERMINAL} + ) + + # Find all variables that start with COMPILER_RT and populate a variable with + # them. + get_cmake_property(variableNames VARIABLES) + foreach(variableName ${variableNames}) + if(variableName MATCHES "^${nameCanon}") + string(REPLACE ";" "\;" value "${${variableName}}") + list(APPEND PASSTHROUGH_VARIABLES + -D${variableName}=${value}) + endif() + endforeach() + + if(ARG_USE_TOOLCHAIN) + if(CLANG_IN_TOOLCHAIN) + set(compiler_args -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang + -DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++) + endif() + list(APPEND ARG_DEPENDS ${TOOLCHAIN_TOOLS}) + endif() + + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${name}-clobber-stamp + DEPENDS ${ARG_DEPENDS} + COMMAND ${CMAKE_COMMAND} -E touch ${BINARY_DIR}/CMakeCache.txt + COMMAND ${CMAKE_COMMAND} -E touch ${STAMP_DIR}/${name}-mkdir + COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/${name}-clobber-stamp + COMMENT "Clobbering bootstrap build and stamp directories" + ) + + add_custom_target(${name}-clobber + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${name}-clobber-stamp) + + if(ARG_EXCLUDE_FROM_ALL) + set(exclude ${cmake_3_1_EXCLUDE_FROM_ALL}) + endif() + + ExternalProject_Add(${name} + DEPENDS ${ARG_DEPENDS} + ${name}-clobber + PREFIX ${CMAKE_BINARY_DIR}/projects/${name} + SOURCE_DIR ${source_dir} + STAMP_DIR ${STAMP_DIR} + BINARY_DIR ${BINARY_DIR} + ${exclude} + CMAKE_ARGS ${${nameCanon}_CMAKE_ARGS} + ${compiler_args} + -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX} + ${ARG_CMAKE_ARGS} + ${PASSTHROUGH_VARIABLES} + INSTALL_COMMAND "" + STEP_TARGETS configure build + ${cmake_3_4_USES_TERMINAL_OPTIONS} + ) + + if(ARG_USE_TOOLCHAIN) + ExternalProject_Add_Step(${name} force-rebuild + COMMENT "Forcing rebuild becaues tools have changed" + DEPENDERS configure + DEPENDS ${TOOLCHAIN_BINS} + ${cmake_3_4_USES_TERMINAL} ) + endif() + + if(ARG_USE_TOOLCHAIN) + set(force_deps DEPENDS ${TOOLCHAIN_BINS}) + endif() + + llvm_ExternalProject_BuildCmd(run_clean clean) + ExternalProject_Add_Step(${name} clean + COMMAND ${run_clean} + COMMENT "Cleaning ${name}..." + DEPENDEES configure + ${force_deps} + WORKING_DIRECTORY ${BINARY_DIR} + ${cmake_3_4_USES_TERMINAL} + ) + ExternalProject_Add_StepTargets(${name} clean) + + if(ARG_USE_TOOLCHAIN) + add_dependencies(${name}-clean ${name}-clobber) + set_target_properties(${name}-clean PROPERTIES + SOURCES ${CMAKE_CURRENT_BINARY_DIR}/${name}-clobber-stamp) + endif() + + if(NOT ARG_NO_INSTALL) + install(CODE "execute_process\(COMMAND \${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=\${CMAKE_INSTALL_PREFIX} -P ${BINARY_DIR}/cmake_install.cmake \)" + COMPONENT ${name}) + + add_custom_target(install-${name} + DEPENDS ${name} + COMMAND "${CMAKE_COMMAND}" + -DCMAKE_INSTALL_COMPONENT=${name} + -P "${CMAKE_BINARY_DIR}/cmake_install.cmake" + ${cmake_3_2_USES_TERMINAL}) + endif() + + # Add top-level targets + foreach(target ${ARG_EXTRA_TARGETS}) + llvm_ExternalProject_BuildCmd(build_runtime_cmd ${target}) + add_custom_target(${target} + COMMAND ${build_runtime_cmd} + DEPENDS ${name}-configure + WORKING_DIRECTORY ${BINARY_DIR} + VERBATIM + ${cmake_3_2_USES_TERMINAL}) + endforeach() +endfunction() diff --git a/cmake/modules/LLVMInstallSymlink.cmake b/cmake/modules/LLVMInstallSymlink.cmake new file mode 100644 index 000000000000..482697b06baf --- /dev/null +++ b/cmake/modules/LLVMInstallSymlink.cmake @@ -0,0 +1,21 @@ +# We need to execute this script at installation time because the +# DESTDIR environment variable may be unset at configuration time. +# See PR8397. + +function(install_symlink name target outdir) + if(UNIX) + set(LINK_OR_COPY create_symlink) + set(DESTDIR $ENV{DESTDIR}) + else() + set(LINK_OR_COPY copy) + endif() + + set(bindir "${DESTDIR}${CMAKE_INSTALL_PREFIX}/${outdir}/") + + message("Creating ${name}") + + execute_process( + COMMAND "${CMAKE_COMMAND}" -E ${LINK_OR_COPY} "${target}" "${name}" + WORKING_DIRECTORY "${bindir}") + +endfunction() diff --git a/cmake/modules/Makefile b/cmake/modules/Makefile index f644c45dcd0d..abfda93b210f 100644 --- a/cmake/modules/Makefile +++ b/cmake/modules/Makefile @@ -9,8 +9,6 @@ LEVEL = ../.. -LINK_COMPONENTS := all - include $(LEVEL)/Makefile.common PROJ_cmake := $(DESTDIR)$(PROJ_prefix)/share/llvm/cmake @@ -39,24 +37,9 @@ else LLVM_ENABLE_RTTI := 0 endif -# Don't try to run llvm-config during clean because it won't be available -ifneq ($(MAKECMDGOALS),clean) -LLVM_LIBS_TO_EXPORT := $(subst -l,,$(shell $(LLVM_CONFIG) --libs $(LINK_COMPONENTS) || echo Error)) - -ifeq ($(LLVM_LIBS_TO_EXPORT),Error) -$(error llvm-config --libs failed) -endif - -# Strip out gtest and gtest_main from LLVM_LIBS_TO_EXPORT, these are not -# installed and won't be available from the install tree. -# FIXME: If we used llvm-config from the install tree this wouldn't be -# necessary. -LLVM_LIBS_TO_EXPORT := $(filter-out gtest gtest_main,$(LLVM_LIBS_TO_EXPORT)) - ifndef LLVM_LIBS_TO_EXPORT $(error LLVM_LIBS_TO_EXPORT cannot be empty) endif -endif OBJMODS := LLVMConfig.cmake LLVMConfigVersion.cmake LLVMExports.cmake @@ -135,7 +118,7 @@ $(PROJ_OBJ_DIR)/LLVMExports.cmake: $(LLVMBuildCMakeExportsFrag) Makefile done && \ cat "$(LLVMBuildCMakeExportsFrag)" && \ echo 'set_property(TARGET LLVMSupport APPEND PROPERTY IMPORTED_LINK_INTERFACE_LIBRARIES '"$(subst -l,,$(LIBS))"')' \ - ) | grep -v gtest > $@ + ) > $@ all-local:: $(addprefix $(PROJ_OBJ_DIR)/, $(OBJMODS)) diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake index 85d720e91fd7..fca7d1bda4b3 100644 --- a/cmake/modules/TableGen.cmake +++ b/cmake/modules/TableGen.cmake @@ -70,9 +70,31 @@ function(add_public_tablegen_target target) set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} ${target} PARENT_SCOPE) endfunction() +if(LLVM_USE_HOST_TOOLS) + add_custom_command(OUTPUT LIB_LLVMSUPPORT + COMMAND ${CMAKE_COMMAND} --build . --target LLVMSupport --config Release + DEPENDS CONFIGURE_LLVM_NATIVE + WORKING_DIRECTORY ${LLVM_NATIVE_BUILD} + COMMENT "Building libLLVMSupport for native TableGen...") + add_custom_target(NATIVE_LIB_LLVMSUPPORT DEPENDS LIB_LLVMSUPPORT) + + add_custom_command(OUTPUT LIB_LLVMTABLEGEN + COMMAND ${CMAKE_COMMAND} --build . --target LLVMTableGen --config Release + DEPENDS CONFIGURE_LLVM_NATIVE + WORKING_DIRECTORY ${LLVM_NATIVE_BUILD} + COMMENT "Building libLLVMTableGen for native TableGen...") + add_custom_target(NATIVE_LIB_LLVMTABLEGEN DEPENDS LIB_LLVMTABLEGEN) +endif(LLVM_USE_HOST_TOOLS) + macro(add_tablegen target project) set(${target}_OLD_LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS}) set(LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS} TableGen) + + if(NOT XCODE) + # FIXME: It leaks to user, callee of add_tablegen. + set(LLVM_ENABLE_OBJLIB ON) + endif() + add_llvm_utility(${target} ${ARGN}) set(LLVM_LINK_COMPONENTS ${${target}_OLD_LLVM_LINK_COMPONENTS}) @@ -103,7 +125,7 @@ macro(add_tablegen target project) add_custom_command(OUTPUT ${${project}_TABLEGEN_EXE} COMMAND ${CMAKE_COMMAND} --build . --target ${target} --config Release - DEPENDS CONFIGURE_LLVM_NATIVE ${target} + DEPENDS ${target} NATIVE_LIB_LLVMSUPPORT NATIVE_LIB_LLVMTABLEGEN WORKING_DIRECTORY ${LLVM_NATIVE_BUILD} COMMENT "Building native TableGen...") add_custom_target(${project}-tablegen-host DEPENDS ${${project}_TABLEGEN_EXE}) diff --git a/configure b/configure index c192415c24a8..33438c60365e 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.60 for LLVM 3.7.1. +# Generated by GNU Autoconf 2.60 for LLVM 3.8.0svn. # # Report bugs to . # @@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='LLVM' PACKAGE_TARNAME='llvm' -PACKAGE_VERSION='3.7.1' -PACKAGE_STRING='LLVM 3.7.1' +PACKAGE_VERSION='3.8.0svn' +PACKAGE_STRING='LLVM 3.8.0svn' PACKAGE_BUGREPORT='http://llvm.org/bugs/' ac_unique_file="lib/IR/Module.cpp" @@ -655,6 +655,7 @@ CXX CXXFLAGS ac_ct_CXX CPP +CXX_COMPILER subdirs ENABLE_POLLY LLVM_HAS_POLLY @@ -777,7 +778,7 @@ GAS HAVE_LINK_VERSION_SCRIPT EGREP NO_VARIADIC_MACROS -NO_MISSING_FIELD_INITIALIZERS +MISSING_FIELD_INITIALIZERS COVERED_SWITCH_DEFAULT NO_MAYBE_UNINITIALIZED NO_UNINITIALIZED @@ -1333,7 +1334,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures LLVM 3.7.1 to adapt to many kinds of systems. +\`configure' configures LLVM 3.8.0svn to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1399,7 +1400,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of LLVM 3.7.1:";; + short | recursive ) echo "Configuration of LLVM 3.8.0svn:";; esac cat <<\_ACEOF @@ -1583,7 +1584,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -LLVM configure 3.7.1 +LLVM configure 3.8.0svn generated by GNU Autoconf 2.60 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -1599,7 +1600,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by LLVM $as_me 3.7.1, which was +It was created by LLVM $as_me 3.8.0svn, which was generated by GNU Autoconf 2.60. Invocation command line was $ $0 $@ @@ -1954,9 +1955,9 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu LLVM_VERSION_MAJOR=3 -LLVM_VERSION_MINOR=7 -LLVM_VERSION_PATCH=1 -LLVM_VERSION_SUFFIX= +LLVM_VERSION_MINOR=8 +LLVM_VERSION_PATCH=0 +LLVM_VERSION_SUFFIX=svn cat >>confdefs.h <<_ACEOF @@ -2032,7 +2033,7 @@ echo "$as_me: error: Already configured in ${srcdir}" >&2;} fi fi -if test ${srcdir} == "." ; then +if test ${srcdir} = "." ; then { { echo "$as_me:$LINENO: error: In-source builds are not allowed. Please configure from a separate build directory!" >&5 echo "$as_me: error: In-source builds are not allowed. Please configure from a separate build directory!" >&2;} { (exit 1); exit 1; }; } @@ -3752,6 +3753,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: result: ${llvm_cv_cxx_compiler}" >&5 echo "${ECHO_T}${llvm_cv_cxx_compiler}" >&6; } +CXX_COMPILER=$llvm_cv_cxx_compiler + @@ -4185,6 +4188,7 @@ else arm64*-*) llvm_cv_target_arch="AArch64" ;; arm*-*) llvm_cv_target_arch="ARM" ;; aarch64*-*) llvm_cv_target_arch="AArch64" ;; + avr-*) llvm_cv_target_arch="AVR" ;; mips-* | mips64-*) llvm_cv_target_arch="Mips" ;; mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;; xcore-*) llvm_cv_target_arch="XCore" ;; @@ -4223,6 +4227,7 @@ case $host in arm64*-*) host_arch="AArch64" ;; arm*-*) host_arch="ARM" ;; aarch64*-*) host_arch="AArch64" ;; + avr-*) host_arch="AVR" ;; mips-* | mips64-*) host_arch="Mips" ;; mipsel-* | mips64el-*) host_arch="Mips" ;; xcore-*) host_arch="XCore" ;; @@ -5144,6 +5149,8 @@ else x86_64) TARGET_HAS_JIT=1 ;; ARM) TARGET_HAS_JIT=1 + ;; + AVR) TARGET_HAS_JIT=0 ;; Mips) TARGET_HAS_JIT=1 ;; @@ -5947,7 +5954,7 @@ _ACEOF if test "${with_clang_default_openmp_runtime+set}" = set; then withval=$with_clang_default_openmp_runtime; else - withval="libgomp" + withval="libomp" fi @@ -8379,25 +8386,33 @@ echo "${ECHO_T}ok" >&6; } { echo "$as_me:$LINENO: checking optional compiler flags" >&5 echo $ECHO_N "checking optional compiler flags... $ECHO_C" >&6; } -NO_VARIADIC_MACROS=`$CXX -Werror -Wvariadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros` +case "$llvm_cv_cxx_compiler" in + clang) + NO_VARIADIC_MACROS=`$CXX -Werror -Wvariadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros` -NO_MISSING_FIELD_INITIALIZERS=`$CXX -Werror -Wmissing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers` + MISSING_FIELD_INITIALIZERS=`$CXX -Werror -Wmissing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wmissing-field-initializers` -COVERED_SWITCH_DEFAULT=`$CXX -Werror -Wcovered-switch-default -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wcovered-switch-default` + COVERED_SWITCH_DEFAULT=`$CXX -Werror -Wcovered-switch-default -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wcovered-switch-default` + ;; + gcc) + MISSING_FIELD_INITIALIZERS=`$CXX -Werror -Wmissing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers` -NO_UNINITIALIZED= -NO_MAYBE_UNINITIALIZED= -if test "$GXX" = "yes" -then - NO_MAYBE_UNINITIALIZED=`$CXX -Werror -Wmaybe-uninitialized -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-maybe-uninitialized` + NO_VARIADIC_MACROS=`$CXX -Werror -Wvariadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros` - if test -z "$NO_MAYBE_UNINITIALIZED" - then - NO_UNINITIALIZED=`$CXX -Werror -Wuninitialized -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-uninitialized` + COVERED_SWITCH_DEFAULT=`$CXX -Werror -Wcovered-switch-default -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wcovered-switch-default` - fi -fi + NO_MAYBE_UNINITIALIZED=`$CXX -Werror -Wmaybe-uninitialized -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-maybe-uninitialized` + + if test -z "$NO_MAYBE_UNINITIALIZED" + then + NO_UNINITIALIZED=`$CXX -Werror -Wuninitialized -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-uninitialized` + + fi + ;; + unknown) + ;; +esac no_comment= llvm_cv_old_cxxflags="$CXXFLAGS" @@ -8464,8 +8479,8 @@ NO_COMMENT=$no_comment CXXFLAGS="$llvm_cv_old_cxxflags" -{ echo "$as_me:$LINENO: result: $NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED $NO_COMMENT" >&5 -echo "${ECHO_T}$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED $NO_COMMENT" >&6; } +{ echo "$as_me:$LINENO: result: $NO_VARIADIC_MACROS $MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED $NO_COMMENT" >&5 +echo "${ECHO_T}$NO_VARIADIC_MACROS $MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED $NO_COMMENT" >&6; } # Check whether --with-python was given. @@ -8885,6 +8900,87 @@ _ACEOF fi + +{ echo "$as_me:$LINENO: checking for main in -luuid" >&5 +echo $ECHO_N "checking for main in -luuid... $ECHO_C" >&6; } +if test "${ac_cv_lib_uuid_main+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-luuid $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + + +int +main () +{ +return main (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_uuid_main=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_lib_uuid_main=no +fi + +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ echo "$as_me:$LINENO: result: $ac_cv_lib_uuid_main" >&5 +echo "${ECHO_T}$ac_cv_lib_uuid_main" >&6; } +if test $ac_cv_lib_uuid_main = yes; then + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBUUID 1 +_ACEOF + + LIBS="-luuid $LIBS" + +fi + fi { echo "$as_me:$LINENO: checking for library containing dlopen" >&5 @@ -11043,90 +11139,6 @@ _ACEOF fi -{ echo "$as_me:$LINENO: checking for sys/wait.h that is POSIX.1 compatible" >&5 -echo $ECHO_N "checking for sys/wait.h that is POSIX.1 compatible... $ECHO_C" >&6; } -if test "${ac_cv_header_sys_wait_h+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -#include -#ifndef WEXITSTATUS -# define WEXITSTATUS(stat_val) ((unsigned int) (stat_val) >> 8) -#endif -#ifndef WIFEXITED -# define WIFEXITED(stat_val) (((stat_val) & 255) == 0) -#endif - -int -main () -{ - int s; - wait (&s); - s = WIFEXITED (s) ? WEXITSTATUS (s) : 1; - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && - { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; } && - { ac_try='test -s conftest.$ac_objext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - ac_cv_header_sys_wait_h=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_cv_header_sys_wait_h=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -fi -{ echo "$as_me:$LINENO: result: $ac_cv_header_sys_wait_h" >&5 -echo "${ECHO_T}$ac_cv_header_sys_wait_h" >&6; } -if test $ac_cv_header_sys_wait_h = yes; then - -cat >>confdefs.h <<\_ACEOF -#define HAVE_SYS_WAIT_H 1 -_ACEOF - -fi - { echo "$as_me:$LINENO: checking whether time.h and sys/time.h may both be included" >&5 echo $ECHO_N "checking whether time.h and sys/time.h may both be included... $ECHO_C" >&6; } if test "${ac_cv_header_time+set}" = set; then @@ -11643,175 +11655,6 @@ fi done -for ac_header in utime.h -do -as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - { echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } -else - # Is the header compilable? -{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 -echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -$ac_includes_default -#include <$ac_header> -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && - { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; } && - { ac_try='test -s conftest.$ac_objext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - ac_header_compiler=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_header_compiler=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 -echo "${ECHO_T}$ac_header_compiler" >&6; } - -# Is the header present? -{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 -echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include <$ac_header> -_ACEOF -if { (ac_try="$ac_cpp conftest.$ac_ext" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } >/dev/null; then - if test -s conftest.err; then - ac_cpp_err=$ac_c_preproc_warn_flag - ac_cpp_err=$ac_cpp_err$ac_c_werror_flag - else - ac_cpp_err= - fi -else - ac_cpp_err=yes -fi -if test -z "$ac_cpp_err"; then - ac_header_preproc=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_header_preproc=no -fi - -rm -f conftest.err conftest.$ac_ext -{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 -echo "${ECHO_T}$ac_header_preproc" >&6; } - -# So? What about this header? -case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in - yes:no: ) - { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 -echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} - ac_header_preproc=yes - ;; - no:yes:* ) - { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 -echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 -echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 -echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 -echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 -echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} - { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 -echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} - ( cat <<\_ASBOX -## ------------------------------------ ## -## Report this to http://llvm.org/bugs/ ## -## ------------------------------------ ## -_ASBOX - ) | sed "s/^/$as_me: WARNING: /" >&2 - ;; -esac -{ echo "$as_me:$LINENO: checking for $ac_header" >&5 -echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } -if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - eval "$as_ac_Header=\$ac_header_preproc" -fi -ac_res=`eval echo '${'$as_ac_Header'}'` - { echo "$as_me:$LINENO: result: $ac_res" >&5 -echo "${ECHO_T}$ac_res" >&6; } - -fi -if test `eval echo '${'$as_ac_Header'}'` = yes; then - cat >>confdefs.h <<_ACEOF -#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 -_ACEOF - -fi - -done - - @@ -14694,9 +14537,7 @@ done - - -for ac_func in setjmp longjmp sigsetjmp siglongjmp writev +for ac_func in setjmp longjmp writev do as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` { echo "$as_me:$LINENO: checking for $ac_func" >&5 @@ -15011,97 +14852,6 @@ _ACEOF fi - - { echo "$as_me:$LINENO: checking for srand48/lrand48/drand48 in " >&5 -echo $ECHO_N "checking for srand48/lrand48/drand48 in ... $ECHO_C" >&6; } -if test "${ac_cv_func_rand48+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - - ac_ext=cpp -ac_cpp='$CXXCPP $CPPFLAGS' -ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -int -main () -{ -srand48(0);lrand48();drand48(); - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_compile") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && - { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; } && - { ac_try='test -s conftest.$ac_objext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - ac_cv_func_rand48=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_cv_func_rand48=no -fi - -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu - - -fi -{ echo "$as_me:$LINENO: result: $ac_cv_func_rand48" >&5 -echo "${ECHO_T}$ac_cv_func_rand48" >&6; } - -if test "$ac_cv_func_rand48" = "yes" ; then - -cat >>confdefs.h <<\_ACEOF -#define HAVE_RAND48 1 -_ACEOF - -fi - - { echo "$as_me:$LINENO: checking whether arc4random is declared" >&5 echo $ECHO_N "checking whether arc4random is declared... $ECHO_C" >&6; } if test "${ac_cv_have_decl_arc4random+set}" = set; then @@ -18529,7 +18279,7 @@ exec 6>&1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by LLVM $as_me 3.7.1, which was +This file was extended by LLVM $as_me 3.8.0svn, which was generated by GNU Autoconf 2.60. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -18582,7 +18332,7 @@ Report bugs to ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -LLVM config.status 3.7.1 +LLVM config.status 3.8.0svn configured by $0, generated by GNU Autoconf 2.60, with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" @@ -18836,6 +18586,7 @@ CXX!$CXX$ac_delim CXXFLAGS!$CXXFLAGS$ac_delim ac_ct_CXX!$ac_ct_CXX$ac_delim CPP!$CPP$ac_delim +CXX_COMPILER!$CXX_COMPILER$ac_delim subdirs!$subdirs$ac_delim ENABLE_POLLY!$ENABLE_POLLY$ac_delim LLVM_HAS_POLLY!$LLVM_HAS_POLLY$ac_delim @@ -18879,7 +18630,6 @@ ENABLE_WERROR!$ENABLE_WERROR$ac_delim ENABLE_EXPENSIVE_CHECKS!$ENABLE_EXPENSIVE_CHECKS$ac_delim EXPENSIVE_CHECKS!$EXPENSIVE_CHECKS$ac_delim ENABLE_ABI_BREAKING_CHECKS!$ENABLE_ABI_BREAKING_CHECKS$ac_delim -DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim _ACEOF if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then @@ -18921,6 +18671,7 @@ _ACEOF ac_delim='%!_!# ' for ac_last_try in false false false false false :; do cat >conf$$subs.sed <<_ACEOF +DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim KEEP_SYMBOLS!$KEEP_SYMBOLS$ac_delim JIT!$JIT$ac_delim @@ -18999,7 +18750,7 @@ GAS!$GAS$ac_delim HAVE_LINK_VERSION_SCRIPT!$HAVE_LINK_VERSION_SCRIPT$ac_delim EGREP!$EGREP$ac_delim NO_VARIADIC_MACROS!$NO_VARIADIC_MACROS$ac_delim -NO_MISSING_FIELD_INITIALIZERS!$NO_MISSING_FIELD_INITIALIZERS$ac_delim +MISSING_FIELD_INITIALIZERS!$MISSING_FIELD_INITIALIZERS$ac_delim COVERED_SWITCH_DEFAULT!$COVERED_SWITCH_DEFAULT$ac_delim NO_MAYBE_UNINITIALIZED!$NO_MAYBE_UNINITIALIZED$ac_delim NO_UNINITIALIZED!$NO_UNINITIALIZED$ac_delim @@ -19017,7 +18768,6 @@ HAVE_LIBZ!$HAVE_LIBZ$ac_delim HUGE_VAL_SANITY!$HUGE_VAL_SANITY$ac_delim MMAP_FILE!$MMAP_FILE$ac_delim SHLIBEXT!$SHLIBEXT$ac_delim -LLVM_PREFIX!$LLVM_PREFIX$ac_delim _ACEOF if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then @@ -19059,6 +18809,7 @@ _ACEOF ac_delim='%!_!# ' for ac_last_try in false false false false false :; do cat >conf$$subs.sed <<_ACEOF +LLVM_PREFIX!$LLVM_PREFIX$ac_delim LLVM_BINDIR!$LLVM_BINDIR$ac_delim LLVM_DATADIR!$LLVM_DATADIR$ac_delim LLVM_DOCSDIR!$LLVM_DOCSDIR$ac_delim @@ -19079,7 +18830,7 @@ LIBOBJS!$LIBOBJS$ac_delim LTLIBOBJS!$LTLIBOBJS$ac_delim _ACEOF - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 18; then + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 19; then break elif $ac_last_try; then { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 @@ -19713,3 +19464,14 @@ echo "$as_me: error: $ac_sub_configure failed for $ac_dir" >&2;} done fi +echo "" +echo "" +echo "################################################################################" +echo "################################################################################" +echo "The LLVM project has deprecated building with configure & make." +echo "The autoconf-based makefile build system will be removed in the 3.9 release." +echo "" +echo "Please migrate to the CMake-based build system." +echo "For more information see: http://llvm.org/docs/CMake.html" +echo "################################################################################" +echo "################################################################################" diff --git a/docs/AliasAnalysis.rst b/docs/AliasAnalysis.rst index f62cc3fe4d31..fe7fcbd4bc50 100644 --- a/docs/AliasAnalysis.rst +++ b/docs/AliasAnalysis.rst @@ -389,11 +389,10 @@ in its ``getAnalysisUsage`` that it does so. Some passes attempt to use ``AU.addPreserved``, however this doesn't actually have any effect. -``AliasAnalysisCounter`` (``-count-aa``) and ``AliasDebugger`` (``-debug-aa``) -are implemented as ``ModulePass`` classes, so if your alias analysis uses -``FunctionPass``, it won't be able to use these utilities. If you try to use -them, the pass manager will silently route alias analysis queries directly to -``BasicAliasAnalysis`` instead. +``AliasAnalysisCounter`` (``-count-aa``) are implemented as ``ModulePass`` +classes, so if your alias analysis uses ``FunctionPass``, it won't be able to +use these utilities. If you try to use them, the pass manager will silently +route alias analysis queries directly to ``BasicAliasAnalysis`` instead. Similarly, the ``opt -p`` option introduces ``ModulePass`` passes between each pass, which prevents the use of ``FunctionPass`` alias analysis passes. diff --git a/docs/Atomics.rst b/docs/Atomics.rst index 9068df46b023..79ab74792dd4 100644 --- a/docs/Atomics.rst +++ b/docs/Atomics.rst @@ -446,7 +446,7 @@ It is often easiest for backends to use AtomicExpandPass to lower some of the atomic constructs. Here are some lowerings it can do: * cmpxchg -> loop with load-linked/store-conditional - by overriding ``hasLoadLinkedStoreConditional()``, ``emitLoadLinked()``, + by overriding ``shouldExpandAtomicCmpXchgInIR()``, ``emitLoadLinked()``, ``emitStoreConditional()`` * large loads/stores -> ll-sc/cmpxchg by overriding ``shouldExpandAtomicStoreInIR()``/``shouldExpandAtomicLoadInIR()`` diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst index 25ea421ed083..d6e3099bdb63 100644 --- a/docs/BitCodeFormat.rst +++ b/docs/BitCodeFormat.rst @@ -756,6 +756,7 @@ function. The operand fields are: * ``anyregcc``: code 13 * ``preserve_mostcc``: code 14 * ``preserve_allcc``: code 15 + * ``cxx_fast_tlscc``: code 17 * ``x86_stdcallcc``: code 64 * ``x86_fastcallcc``: code 65 * ``arm_apcscc``: code 66 @@ -851,7 +852,7 @@ in the *paramattr* field of module block `FUNCTION`_ records, or within the *attr* field of function block ``INST_INVOKE`` and ``INST_CALL`` records. Entries within ``PARAMATTR_BLOCK`` are constructed to ensure that each is unique -(i.e., no two indicies represent equivalent attribute lists). +(i.e., no two indices represent equivalent attribute lists). .. _PARAMATTR_CODE_ENTRY: @@ -904,7 +905,7 @@ table entry, which may be referenced by 0-based index from instructions, constants, metadata, type symbol table entries, or other type operator records. Entries within ``TYPE_BLOCK`` are constructed to ensure that each entry is -unique (i.e., no two indicies represent structurally equivalent types). +unique (i.e., no two indices represent structurally equivalent types). .. _TYPE_CODE_NUMENTRY: .. _NUMENTRY: diff --git a/docs/BitSets.rst b/docs/BitSets.rst index c6ffdbdb8a11..18dbf6df563f 100644 --- a/docs/BitSets.rst +++ b/docs/BitSets.rst @@ -10,17 +10,41 @@ for the type of the class or its derived classes. To use the mechanism, a client creates a global metadata node named ``llvm.bitsets``. Each element is a metadata node with three elements: -the first is a metadata string containing an identifier for the bitset, -the second is a global variable and the third is a byte offset into the -global variable. + +1. a metadata object representing an identifier for the bitset +2. either a global variable or a function +3. a byte offset into the global (generally zero for functions) + +Each bitset must exclusively contain either global variables or functions. + +.. admonition:: Limitation + + The current implementation only supports functions as members of bitsets on + the x86-32 and x86-64 architectures. This will cause a link-time optimization pass to generate bitsets from the -memory addresses referenced from the elements of the bitset metadata. The pass -will lay out the referenced globals consecutively, so their definitions must -be available at LTO time. The `GlobalLayoutBuilder`_ class is responsible for -laying out the globals efficiently to minimize the sizes of the underlying -bitsets. An intrinsic, :ref:`llvm.bitset.test `, generates code -to test whether a given pointer is a member of a bitset. +memory addresses referenced from the elements of the bitset metadata. The +pass will lay out referenced global variables consecutively, so their +definitions must be available at LTO time. + +A bit set containing functions is transformed into a jump table, which +is a block of code consisting of one branch instruction for each of the +functions in the bit set that branches to the target function, and redirect +any taken function addresses to the corresponding jump table entry. In the +object file's symbol table, the jump table entries take the identities of +the original functions, so that addresses taken outside the module will pass +any verification done inside the module. + +Jump tables may call external functions, so their definitions need not +be available at LTO time. Note that if an externally defined function is a +member of a bitset, there is no guarantee that its identity within the module +will be the same as its identity outside of the module, as the former will +be the jump table entry if a jump table is necessary. + +The `GlobalLayoutBuilder`_ class is responsible for laying out the globals +efficiently to minimize the sizes of the underlying bitsets. An intrinsic, +:ref:`llvm.bitset.test `, generates code to test whether a +given pointer is a member of a bitset. :Example: @@ -33,13 +57,25 @@ to test whether a given pointer is a member of a bitset. @c = internal global i32 0 @d = internal global [2 x i32] [i32 0, i32 0] - !llvm.bitsets = !{!0, !1, !2, !3, !4} + define void @e() { + ret void + } + + define void @f() { + ret void + } + + declare void @g() + + !llvm.bitsets = !{!0, !1, !2, !3, !4, !5, !6} !0 = !{!"bitset1", i32* @a, i32 0} !1 = !{!"bitset1", i32* @b, i32 0} !2 = !{!"bitset2", i32* @b, i32 0} !3 = !{!"bitset2", i32* @c, i32 0} !4 = !{!"bitset2", i32* @d, i32 4} + !5 = !{!"bitset3", void ()* @e, i32 0} + !6 = !{!"bitset3", void ()* @g, i32 0} declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone @@ -55,6 +91,12 @@ to test whether a given pointer is a member of a bitset. ret i1 %x } + define i1 @baz(void ()* %p) { + %pi8 = bitcast void ()* %p to i8* + %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset3") + ret i1 %x + } + define void @main() { %a1 = call i1 @foo(i32* @a) ; returns 1 %b1 = call i1 @foo(i32* @b) ; returns 1 @@ -64,6 +106,9 @@ to test whether a given pointer is a member of a bitset. %c2 = call i1 @bar(i32* @c) ; returns 1 %d02 = call i1 @bar(i32* getelementptr ([2 x i32]* @d, i32 0, i32 0)) ; returns 0 %d12 = call i1 @bar(i32* getelementptr ([2 x i32]* @d, i32 0, i32 1)) ; returns 1 + %e = call i1 @baz(void ()* @e) ; returns 1 + %f = call i1 @baz(void ()* @f) ; returns 0 + %g = call i1 @baz(void ()* @g) ; returns 1 ret void } diff --git a/docs/BranchWeightMetadata.rst b/docs/BranchWeightMetadata.rst index 2ebc6c32416e..6cbcb0f0fb24 100644 --- a/docs/BranchWeightMetadata.rst +++ b/docs/BranchWeightMetadata.rst @@ -27,7 +27,7 @@ Supported Instructions ^^^^^^^^^^^^^^ Metadata is only assigned to the conditional branches. There are two extra -operarands for the true and the false branch. +operands for the true and the false branch. .. code-block:: llvm @@ -114,12 +114,12 @@ CFG Modifications Branch Weight Metatada is not proof against CFG changes. If terminator operands' are changed some action should be taken. In other case some misoptimizations may -occur due to incorrent branch prediction information. +occur due to incorrect branch prediction information. Function Entry Counts ===================== -To allow comparing different functions durint inter-procedural analysis and +To allow comparing different functions during inter-procedural analysis and optimization, ``MD_prof`` nodes can also be assigned to a function definition. The first operand is a string indicating the name of the associated counter. diff --git a/docs/BuildingLLVMWithAutotools.rst b/docs/BuildingLLVMWithAutotools.rst index 6f9a13410555..083ead67ebb6 100644 --- a/docs/BuildingLLVMWithAutotools.rst +++ b/docs/BuildingLLVMWithAutotools.rst @@ -5,6 +5,12 @@ Building LLVM With Autotools .. contents:: :local: +.. warning:: + + Building LLVM with autoconf is deprecated as of 3.8. The autoconf build + system will be removed in 3.9. Please migrate to using CMake. For more + information see: `Building LLVM with CMake `_ + Overview ======== diff --git a/docs/CMake.rst b/docs/CMake.rst index 909fc04248c7..38199e5cc587 100644 --- a/docs/CMake.rst +++ b/docs/CMake.rst @@ -10,11 +10,11 @@ Introduction `CMake `_ is a cross-platform build-generator tool. CMake does not build the project, it generates the files needed by your build tool -(GNU make, Visual Studio, etc) for building LLVM. +(GNU make, Visual Studio, etc.) for building LLVM. If you are really anxious about getting a functional LLVM build, go to the -`Quick start`_ section. If you are a CMake novice, start on `Basic CMake usage`_ -and then go back to the `Quick start`_ once you know what you are doing. The +`Quick start`_ section. If you are a CMake novice, start with `Basic CMake usage`_ +and then go back to the `Quick start`_ section once you know what you are doing. The `Options and variables`_ section is a reference for customizing your build. If you already have experience with CMake, this is the recommended starting point. @@ -31,35 +31,35 @@ We use here the command-line, non-interactive CMake interface. #. Open a shell. Your development tools must be reachable from this shell through the PATH environment variable. -#. Create a directory for containing the build. It is not supported to build - LLVM on the source directory. cd to this directory: +#. Create a build directory. Building LLVM in the source + directory is not supported. cd to this directory: .. code-block:: console $ mkdir mybuilddir $ cd mybuilddir -#. Execute this command on the shell replacing `path/to/llvm/source/root` with +#. Execute this command in the shell replacing `path/to/llvm/source/root` with the path to the root of your LLVM source tree: .. code-block:: console $ cmake path/to/llvm/source/root - CMake will detect your development environment, perform a series of test and + CMake will detect your development environment, perform a series of tests, and generate the files required for building LLVM. CMake will use default values for all build parameters. See the `Options and variables`_ section for - fine-tuning your build + a list of build parameters that you can modify. This can fail if CMake can't detect your toolset, or if it thinks that the - environment is not sane enough. On this case make sure that the toolset that - you intend to use is the only one reachable from the shell and that the shell - itself is the correct one for you development environment. CMake will refuse + environment is not sane enough. In this case, make sure that the toolset that + you intend to use is the only one reachable from the shell, and that the shell + itself is the correct one for your development environment. CMake will refuse to build MinGW makefiles if you have a POSIX shell reachable through the PATH environment variable, for instance. You can force CMake to use a given build - tool, see the `Usage`_ section. + tool; for instructions, see the `Usage`_ section, below. -#. After CMake has finished running, proceed to use IDE project files or start +#. After CMake has finished running, proceed to use IDE project files, or start the build from the build directory: .. code-block:: console @@ -67,9 +67,9 @@ We use here the command-line, non-interactive CMake interface. $ cmake --build . The ``--build`` option tells ``cmake`` to invoke the underlying build - tool (``make``, ``ninja``, ``xcodebuild``, ``msbuild``, etc). + tool (``make``, ``ninja``, ``xcodebuild``, ``msbuild``, etc.) - The underlying build tool can be invoked directly either of course, but + The underlying build tool can be invoked directly, of course, but the ``--build`` option is portable. #. After LLVM has finished building, install it from the build directory: @@ -95,33 +95,39 @@ We use here the command-line, non-interactive CMake interface. Basic CMake usage ================= -This section explains basic aspects of CMake, mostly for explaining those -options which you may need on your day-to-day usage. +This section explains basic aspects of CMake +which you may need in your day-to-day usage. -CMake comes with extensive documentation in the form of html files and on the -cmake executable itself. Execute ``cmake --help`` for further help options. +CMake comes with extensive documentation, in the form of html files, and as +online help accessible via the ``cmake`` executable itself. Execute ``cmake +--help`` for further help options. -CMake requires to know for which build tool it shall generate files (GNU make, -Visual Studio, Xcode, etc). If not specified on the command line, it tries to -guess it based on you environment. Once identified the build tool, CMake uses -the corresponding *Generator* for creating files for your build tool. You can +CMake allows you to specify a build tool (e.g., GNU make, Visual Studio, +or Xcode). If not specified on the command line, CMake tries to guess which +build tool to use, based on your environment. Once it has identified your +build tool, CMake uses the corresponding *Generator* to create files for your +build tool (e.g., Makefiles or Visual Studio or Xcode project files). You can explicitly specify the generator with the command line option ``-G "Name of the -generator"``. For knowing the available generators on your platform, execute +generator"``. To see a list of the available generators on your system, execute .. code-block:: console $ cmake --help -This will list the generator's names at the end of the help text. Generator's -names are case-sensitive. Example: +This will list the generator names at the end of the help text. + +Generators' names are case-sensitive, and may contain spaces. For this reason, +you should enter them exactly as they are listed in the ``cmake --help`` +output, in quotes. For example, to generate project files specifically for +Visual Studio 12, you can execute: .. code-block:: console - $ cmake -G "Visual Studio 11" path/to/llvm/source/root + $ cmake -G "Visual Studio 12" path/to/llvm/source/root For a given development platform there can be more than one adequate -generator. If you use Visual Studio "NMake Makefiles" is a generator you can use -for building with NMake. By default, CMake chooses the more specific generator +generator. If you use Visual Studio, "NMake Makefiles" is a generator you can use +for building with NMake. By default, CMake chooses the most specific generator supported by your development environment. If you want an alternative generator, you must tell this to CMake with the ``-G`` option. @@ -142,18 +148,20 @@ CMake command line like this: $ cmake -DVARIABLE=value path/to/llvm/source -You can set a variable after the initial CMake invocation for changing its +You can set a variable after the initial CMake invocation to change its value. You can also undefine a variable: .. code-block:: console $ cmake -UVARIABLE path/to/llvm/source -Variables are stored on the CMake cache. This is a file named ``CMakeCache.txt`` -on the root of the build directory. Do not hand-edit it. +Variables are stored in the CMake cache. This is a file named ``CMakeCache.txt`` +stored at the root of your build directory that is generated by ``cmake``. +Editing it yourself is not recommended. -Variables are listed here appending its type after a colon. It is correct to -write the variable and the type on the CMake command line: +Variables are listed in the CMake cache and later in this document with +the variable name and type separated by a colon. You can also specify the +variable and type on the CMake command line: .. code-block:: console @@ -163,17 +171,17 @@ Frequently-used CMake variables ------------------------------- Here are some of the CMake variables that are used often, along with a -brief explanation and LLVM-specific notes. For full documentation, check the -CMake docs or execute ``cmake --help-variable VARIABLE_NAME``. +brief explanation and LLVM-specific notes. For full documentation, consult the +CMake manual, or execute ``cmake --help-variable VARIABLE_NAME``. **CMAKE_BUILD_TYPE**:STRING - Sets the build type for ``make`` based generators. Possible values are - Release, Debug, RelWithDebInfo and MinSizeRel. On systems like Visual Studio - the user sets the build type with the IDE settings. + Sets the build type for ``make``-based generators. Possible values are + Release, Debug, RelWithDebInfo and MinSizeRel. If you are using an IDE such as + Visual Studio, you should use the IDE settings to set the build type. **CMAKE_INSTALL_PREFIX**:PATH Path where LLVM will be installed if "make install" is invoked or the - "INSTALL" target is built. + "install" target is built. **LLVM_LIBDIR_SUFFIX**:STRING Extra suffix to append to the directory where libraries are to be @@ -188,8 +196,9 @@ CMake docs or execute ``cmake --help-variable VARIABLE_NAME``. **BUILD_SHARED_LIBS**:BOOL Flag indicating if shared libraries will be built. Its default value is - OFF. Shared libraries are not supported on Windows and not recommended on the - other OSes. + OFF. This option is only recommended for use by LLVM developers. + On Windows, shared libraries may be used when building with MinGW, including + mingw-w64, but not when building with the Microsoft toolchain. .. _LLVM-specific variables: @@ -203,13 +212,13 @@ LLVM-specific variables **LLVM_BUILD_TOOLS**:BOOL Build LLVM tools. Defaults to ON. Targets for building each tool are generated - in any case. You can build an tool separately by invoking its target. For - example, you can build *llvm-as* with a makefile-based system executing *make - llvm-as* on the root of your build directory. + in any case. You can build a tool separately by invoking its target. For + example, you can build *llvm-as* with a Makefile-based system by executing *make + llvm-as* at the root of your build directory. **LLVM_INCLUDE_TOOLS**:BOOL - Generate build targets for the LLVM tools. Defaults to ON. You can use that - option for disabling the generation of build targets for the LLVM tools. + Generate build targets for the LLVM tools. Defaults to ON. You can use this + option to disable the generation of build targets for the LLVM tools. **LLVM_BUILD_EXAMPLES**:BOOL Build LLVM examples. Defaults to OFF. Targets for building each example are @@ -217,20 +226,20 @@ LLVM-specific variables details. **LLVM_INCLUDE_EXAMPLES**:BOOL - Generate build targets for the LLVM examples. Defaults to ON. You can use that - option for disabling the generation of build targets for the LLVM examples. + Generate build targets for the LLVM examples. Defaults to ON. You can use this + option to disable the generation of build targets for the LLVM examples. **LLVM_BUILD_TESTS**:BOOL Build LLVM unit tests. Defaults to OFF. Targets for building each unit test - are generated in any case. You can build a specific unit test with the target - *UnitTestNameTests* (where at this time *UnitTestName* can be ADT, Analysis, - ExecutionEngine, JIT, Support, Transform, VMCore; see the subdirectories of - *unittests* for an updated list.) It is possible to build all unit tests with - the target *UnitTests*. + are generated in any case. You can build a specific unit test using the + targets defined under *unittests*, such as ADTTests, IRTests, SupportTests, + etc. (Search for ``add_llvm_unittest`` in the subdirectories of *unittests* + for a complete list of unit tests.) It is possible to build all unit tests + with the target *UnitTests*. **LLVM_INCLUDE_TESTS**:BOOL Generate build targets for the LLVM unit tests. Defaults to ON. You can use - that option for disabling the generation of build targets for the LLVM unit + this option to disable the generation of build targets for the LLVM unit tests. **LLVM_APPEND_VC_REV**:BOOL @@ -249,39 +258,39 @@ LLVM-specific variables is *Debug*. **LLVM_ENABLE_EH**:BOOL - Build LLVM with exception handling support. This is necessary if you wish to + Build LLVM with exception-handling support. This is necessary if you wish to link against LLVM libraries and make use of C++ exceptions in your own code that need to propagate through LLVM code. Defaults to OFF. **LLVM_ENABLE_PIC**:BOOL - Add the ``-fPIC`` flag for the compiler command-line, if the compiler supports + Add the ``-fPIC`` flag to the compiler command-line, if the compiler supports this flag. Some systems, like Windows, do not need this flag. Defaults to ON. **LLVM_ENABLE_RTTI**:BOOL - Build LLVM with run time type information. Defaults to OFF. + Build LLVM with run-time type information. Defaults to OFF. **LLVM_ENABLE_WARNINGS**:BOOL Enable all compiler warnings. Defaults to ON. **LLVM_ENABLE_PEDANTIC**:BOOL - Enable pedantic mode. This disables compiler specific extensions, if + Enable pedantic mode. This disables compiler-specific extensions, if possible. Defaults to ON. **LLVM_ENABLE_WERROR**:BOOL - Stop and fail build, if a compiler warning is triggered. Defaults to OFF. + Stop and fail the build, if a compiler warning is triggered. Defaults to OFF. **LLVM_ABI_BREAKING_CHECKS**:STRING Used to decide if LLVM should be built with ABI breaking checks or not. Allowed values are `WITH_ASSERTS` (default), `FORCE_ON` and `FORCE_OFF`. `WITH_ASSERTS` turns on ABI breaking checks in an assertion enabled build. `FORCE_ON` (`FORCE_OFF`) turns them on - (off) irrespective of whether normal (`NDEBUG` based) assertions are + (off) irrespective of whether normal (`NDEBUG`-based) assertions are enabled or not. A version of LLVM built with ABI breaking checks is not ABI compatible with a version built without it. **LLVM_BUILD_32_BITS**:BOOL - Build 32-bits executables and libraries on 64-bits systems. This option is - available only on some 64-bits unix systems. Defaults to OFF. + Build 32-bit executables and libraries on 64-bit systems. This option is + available only on some 64-bit Unix systems. Defaults to OFF. **LLVM_TARGET_ARCH**:STRING LLVM target to use for native code generation. This is required for JIT @@ -290,7 +299,7 @@ LLVM-specific variables to the target architecture name. **LLVM_TABLEGEN**:STRING - Full path to a native TableGen executable (usually named ``tblgen``). This is + Full path to a native TableGen executable (usually named ``llvm-tblgen``). This is intended for cross-compiling: if the user sets this variable, no native TableGen will be created. @@ -300,29 +309,40 @@ LLVM-specific variables others. **LLVM_LIT_TOOLS_DIR**:PATH - The path to GnuWin32 tools for tests. Valid on Windows host. Defaults to "", - then Lit seeks tools according to %PATH%. Lit can find tools(eg. grep, sort, - &c) on LLVM_LIT_TOOLS_DIR at first, without specifying GnuWin32 to %PATH%. + The path to GnuWin32 tools for tests. Valid on Windows host. Defaults to + the empty string, in which case lit will look for tools needed for tests + (e.g. ``grep``, ``sort``, etc.) in your %PATH%. If GnuWin32 is not in your + %PATH%, then you can set this variable to the GnuWin32 directory so that + lit can find tools needed for tests in that directory. **LLVM_ENABLE_FFI**:BOOL - Indicates whether LLVM Interpreter will be linked with Foreign Function - Interface library. If the library or its headers are installed on a custom - location, you can set the variables FFI_INCLUDE_DIR and - FFI_LIBRARY_DIR. Defaults to OFF. + Indicates whether the LLVM Interpreter will be linked with the Foreign Function + Interface library (libffi) in order to enable calling external functions. + If the library or its headers are installed in a custom + location, you can also set the variables FFI_INCLUDE_DIR and + FFI_LIBRARY_DIR to the directories where ffi.h and libffi.so can be found, + respectively. Defaults to OFF. **LLVM_EXTERNAL_{CLANG,LLD,POLLY}_SOURCE_DIR**:PATH - Path to ``{Clang,lld,Polly}``\'s source directory. Defaults to - ``tools/{clang,lld,polly}``. ``{Clang,lld,Polly}`` will not be built when it - is empty or it does not point to a valid path. + These variables specify the path to the source directory for the external + LLVM projects Clang, lld, and Polly, respectively, relative to the top-level + source directory. If the in-tree subdirectory for an external project + exists (e.g., llvm/tools/clang for Clang), then the corresponding variable + will not be used. If the variable for an external project does not point + to a valid path, then that project will not be built. **LLVM_USE_OPROFILE**:BOOL - Enable building OProfile JIT support. Defaults to OFF + Enable building OProfile JIT support. Defaults to OFF. + +**LLVM_PROFDATA_FILE**:PATH + Path to a profdata file to pass into clang's -fprofile-instr-use flag. This + can only be specified if you're building with clang. **LLVM_USE_INTEL_JITEVENTS**:BOOL - Enable building support for Intel JIT Events API. Defaults to OFF + Enable building support for Intel JIT Events API. Defaults to OFF. **LLVM_ENABLE_ZLIB**:BOOL - Build with zlib to support compression/uncompression in LLVM tools. + Enable building with zlib to support compression/uncompression in LLVM tools. Defaults to ON. **LLVM_USE_SANITIZER**:STRING @@ -361,14 +381,14 @@ LLVM-specific variables ``org.llvm.qch``. This option is only useful in combination with ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; - otherwise this has no effect. + otherwise it has no effect. **LLVM_DOXYGEN_QHP_NAMESPACE**:STRING Namespace under which the intermediate Qt Help Project file lives. See `Qt Help Project`_ for more information. Defaults to "org.llvm". This option is only useful in combination with ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; otherwise - this has no effect. + it has no effect. **LLVM_DOXYGEN_QHP_CUST_FILTER_NAME**:STRING See `Qt Help Project`_ for @@ -377,14 +397,14 @@ LLVM-specific variables be used in Qt Creator to select only documentation from LLVM when browsing through all the help files that you might have loaded. This option is only useful in combination with ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; - otherwise this has no effect. + otherwise it has no effect. .. _Qt Help Project: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-filters **LLVM_DOXYGEN_QHELPGENERATOR_PATH**:STRING The path to the ``qhelpgenerator`` executable. Defaults to whatever CMake's ``find_program()`` can find. This option is only useful in combination with - ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; otherwise this has no + ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; otherwise it has no effect. **LLVM_DOXYGEN_SVG**:BOOL @@ -416,18 +436,24 @@ LLVM-specific variables If enabled then sphinx documentation warnings will be treated as errors. Defaults to ON. +**LLVM_CREATE_XCODE_TOOLCHAIN**:BOOL + OS X Only: If enabled CMake will generate a target named + 'install-xcode-toolchain'. This target will create a directory at + $CMAKE_INSTALL_PREFIX/Toolchains containing an xctoolchain directory which can + be used to override the default system tools. + Executing the test suite ======================== -Testing is performed when the *check* target is built. For instance, if you are -using makefiles, execute this command while on the top level of your build -directory: +Testing is performed when the *check-all* target is built. For instance, if you are +using Makefiles, execute this command in the root of your build directory: .. code-block:: console - $ make check + $ make check-all -On Visual Studio, you may run tests to build the project "check". +On Visual Studio, you may run tests by building the project "check-all". +For more information about testing, see the :doc:`TestingGuide`. Cross compiling =============== @@ -447,10 +473,10 @@ Embedding LLVM in your project From LLVM 3.5 onwards both the CMake and autoconf/Makefile build systems export LLVM libraries as importable CMake targets. This means that clients of LLVM can -now reliably use CMake to develop their own LLVM based projects against an +now reliably use CMake to develop their own LLVM-based projects against an installed version of LLVM regardless of how it was built. -Here is a simple example of CMakeLists.txt file that imports the LLVM libraries +Here is a simple example of a CMakeLists.txt file that imports the LLVM libraries and uses them to build a simple application ``simple-tool``. .. code-block:: cmake @@ -495,8 +521,8 @@ This file is available in two different locations. On Linux typically this is ``/usr/share/llvm/cmake/LLVMConfig.cmake``. * ``/share/llvm/cmake/LLVMConfig.cmake`` where - ```` is the root of the LLVM build tree. **Note this only - available when building LLVM with CMake** + ```` is the root of the LLVM build tree. **Note: this is only + available when building LLVM with CMake.** If LLVM is installed in your operating system's normal installation prefix (e.g. on Linux this is usually ``/usr/``) ``find_package(LLVM ...)`` will @@ -529,7 +555,7 @@ include A list of include paths to directories containing LLVM header files. ``LLVM_PACKAGE_VERSION`` - The LLVM version. This string can be used with CMake conditionals. E.g. ``if + The LLVM version. This string can be used with CMake conditionals, e.g., ``if (${LLVM_PACKAGE_VERSION} VERSION_LESS "3.5")``. ``LLVM_TOOLS_BINARY_DIR`` @@ -582,7 +608,7 @@ Contents of ``//CMakeLists.txt``: Note if you intend for this pass to be merged into the LLVM source tree at some point in the future it might make more sense to use LLVM's internal -add_llvm_loadable_module function instead by... +``add_llvm_loadable_module`` function instead by... Adding the following to ``/CMakeLists.txt`` (after @@ -602,7 +628,7 @@ And then changing ``//CMakeLists.txt`` to ) When you are done developing your pass, you may wish to integrate it -into LLVM source tree. You can achieve it in two easy steps: +into the LLVM source tree. You can achieve it in two easy steps: #. Copying ```` folder into ``/lib/Transform`` directory. @@ -618,6 +644,6 @@ Microsoft Visual C++ -------------------- **LLVM_COMPILER_JOBS**:STRING - Specifies the maximum number of parallell compiler jobs to use per project + Specifies the maximum number of parallel compiler jobs to use per project when building with msbuild or Visual Studio. Only supported for the Visual Studio 2010 CMake generator. 0 means use all processors. Default is 0. diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 2388a92d39ef..eaa175062b61 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -147,7 +147,9 @@ if( NOT uses_ocaml LESS 0 ) COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html COMMAND ${OCAMLFIND} ocamldoc -d ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html - -sort -colorize-code -html ${odoc_files}) + -sort -colorize-code -html ${odoc_files} + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/_ocamldoc/style.css + ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html) add_dependencies(ocaml_doc ${doc_targets}) diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst index 03f5cbd726d8..f3b949c7ad15 100644 --- a/docs/CodeGenerator.rst +++ b/docs/CodeGenerator.rst @@ -640,7 +640,7 @@ For target specific directives, the MCStreamer has a MCTargetStreamer instance. Each target that needs it defines a class that inherits from it and is a lot like MCStreamer itself: It has one method per directive and two classes that inherit from it, a target object streamer and a target asm streamer. The target -asm streamer just prints it (``emitFnStart -> .fnstrart``), and the object +asm streamer just prints it (``emitFnStart -> .fnstart``), and the object streamer implement the assembler logic for it. To make llvm use these classes, the target initialization must call diff --git a/docs/CodingStandards.rst b/docs/CodingStandards.rst index de4f73c546b5..91faadffea62 100644 --- a/docs/CodingStandards.rst +++ b/docs/CodingStandards.rst @@ -39,7 +39,7 @@ hand, it is reasonable to rename the methods of a class if you're about to change it in some other way. Just do the reformating as a separate commit from the functionality change. -The ultimate goal of these guidelines is the increase readability and +The ultimate goal of these guidelines is to increase the readability and maintainability of our common source base. If you have suggestions for topics to be included, please mail them to `Chris `_. @@ -178,8 +178,6 @@ being aware of: * While most of the atomics library is well implemented, the fences are missing. Fortunately, they are rarely needed. * The locale support is incomplete. -* ``std::equal()`` (and other algorithms) incorrectly assert in MSVC when given - ``nullptr`` as an iterator. Other than these areas you should assume the standard library is available and working as expected until some build bot tells you otherwise. If you're in an diff --git a/docs/CommandGuide/index.rst b/docs/CommandGuide/index.rst index ed18cd048aa5..46db57f1c845 100644 --- a/docs/CommandGuide/index.rst +++ b/docs/CommandGuide/index.rst @@ -21,6 +21,7 @@ Basic Commands lli llvm-link llvm-ar + llvm-lib llvm-nm llvm-config llvm-diff diff --git a/docs/CommandGuide/lit.rst b/docs/CommandGuide/lit.rst index e820eef2faff..0ec14bb2236e 100644 --- a/docs/CommandGuide/lit.rst +++ b/docs/CommandGuide/lit.rst @@ -80,6 +80,11 @@ OUTPUT OPTIONS Show more information on test failures, for example the entire test output instead of just the test result. +.. option:: -a, --show-all + + Show more information about all tests, for example the entire test + commandline and output. + .. option:: --no-progress-bar Do not use curses based progress bar. diff --git a/docs/CommandGuide/llc.rst b/docs/CommandGuide/llc.rst index 8d5c9ce8f8a1..5094259f9f95 100644 --- a/docs/CommandGuide/llc.rst +++ b/docs/CommandGuide/llc.rst @@ -127,6 +127,12 @@ End-user Options implements an LLVM target. This will permit the target name to be used with the :option:`-march` option so that code can be generated for that target. +.. option:: -meabi=[default|gnu|4|5] + + Specify which EABI version should conform to. Valid EABI versions are *gnu*, + *4* and *5*. Default value (*default*) depends on the triple. + + Tuning/Configuration Options ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/CommandGuide/lli.rst b/docs/CommandGuide/lli.rst index 502fbd609353..9da13ee47e0e 100644 --- a/docs/CommandGuide/lli.rst +++ b/docs/CommandGuide/lli.rst @@ -1,172 +1,127 @@ lli - directly execute programs from LLVM bitcode ================================================= - SYNOPSIS -------- - -**lli** [*options*] [*filename*] [*program args*] - +:program:`lli` [*options*] [*filename*] [*program args*] DESCRIPTION ----------- +:program:`lli` directly executes programs in LLVM bitcode format. It takes a program +in LLVM bitcode format and executes it using a just-in-time compiler or an +interpreter. -**lli** directly executes programs in LLVM bitcode format. It takes a program -in LLVM bitcode format and executes it using a just-in-time compiler, if one is -available for the current architecture, or an interpreter. **lli** takes all of -the same code generator options as llc|llc, but they are only effective when -**lli** is using the just-in-time compiler. +:program:`lli` is *not* an emulator. It will not execute IR of different architectures +and it can only interpret (or JIT-compile) for the host architecture. -If *filename* is not specified, then **lli** reads the LLVM bitcode for the +The JIT compiler takes the same arguments as other tools, like :program:`llc`, +but they don't necessarily work for the interpreter. + +If `filename` is not specified, then :program:`lli` reads the LLVM bitcode for the program from standard input. The optional *args* specified on the command line are passed to the program as arguments. - GENERAL OPTIONS --------------- - - -**-fake-argv0**\ =\ *executable* +.. option:: -fake-argv0=executable Override the ``argv[0]`` value passed into the executing program. - - -**-force-interpreter**\ =\ *{false,true}* +.. option:: -force-interpreter={false,true} If set to true, use the interpreter even if a just-in-time compiler is available for this architecture. Defaults to false. - - -**-help** +.. option:: -help Print a summary of command line options. +.. option:: -load=pluginfilename - -**-load**\ =\ *pluginfilename* - - Causes **lli** to load the plugin (shared object) named *pluginfilename* and use + Causes :program:`lli` to load the plugin (shared object) named *pluginfilename* and use it for optimization. - - -**-stats** +.. option:: -stats Print statistics from the code-generation passes. This is only meaningful for the just-in-time compiler, at present. - - -**-time-passes** +.. option:: -time-passes Record the amount of time needed for each code-generation pass and print it to standard error. +.. option:: -version - -**-version** - - Print out the version of **lli** and exit without doing anything else. - - - + Print out the version of :program:`lli` and exit without doing anything else. TARGET OPTIONS -------------- - - -**-mtriple**\ =\ *target triple* +.. option:: -mtriple=target triple Override the target triple specified in the input bitcode file with the specified string. This may result in a crash if you pick an architecture which is not compatible with the current system. - - -**-march**\ =\ *arch* +.. option:: -march=arch Specify the architecture for which to generate assembly, overriding the target encoded in the bitcode file. See the output of **llc -help** for a list of valid architectures. By default this is inferred from the target triple or autodetected to the current architecture. - - -**-mcpu**\ =\ *cpuname* +.. option:: -mcpu=cpuname Specify a specific chip in the current architecture to generate code for. By default this is inferred from the target triple and autodetected to the current architecture. For a list of available CPUs, use: **llvm-as < /dev/null | llc -march=xyz -mcpu=help** - - -**-mattr**\ =\ *a1,+a2,-a3,...* +.. option:: -mattr=a1,+a2,-a3,... Override or control specific attributes of the target, such as whether SIMD operations are enabled or not. The default set of attributes is set by the current CPU. For a list of available attributes, use: **llvm-as < /dev/null | llc -march=xyz -mattr=help** - - - FLOATING POINT OPTIONS ---------------------- - - -**-disable-excess-fp-precision** +.. option:: -disable-excess-fp-precision Disable optimizations that may increase floating point precision. - - -**-enable-no-infs-fp-math** +.. option:: -enable-no-infs-fp-math Enable optimizations that assume no Inf values. - - -**-enable-no-nans-fp-math** +.. option:: -enable-no-nans-fp-math Enable optimizations that assume no NAN values. +.. option:: -enable-unsafe-fp-math - -**-enable-unsafe-fp-math** - - Causes **lli** to enable optimizations that may decrease floating point + Causes :program:`lli` to enable optimizations that may decrease floating point precision. +.. option:: -soft-float - -**-soft-float** - - Causes **lli** to generate software floating point library calls instead of + Causes :program:`lli` to generate software floating point library calls instead of equivalent hardware instructions. - - - CODE GENERATION OPTIONS ----------------------- - - -**-code-model**\ =\ *model* +.. option:: -code-model=model Choose the code model from: - .. code-block:: perl default: Target default code model @@ -175,42 +130,30 @@ CODE GENERATION OPTIONS medium: Medium code model large: Large code model - - - -**-disable-post-RA-scheduler** +.. option:: -disable-post-RA-scheduler Disable scheduling after register allocation. - - -**-disable-spill-fusing** +.. option:: -disable-spill-fusing Disable fusing of spill code into instructions. - - -**-jit-enable-eh** +.. option:: -jit-enable-eh Exception handling should be enabled in the just-in-time compiler. - - -**-join-liveintervals** +.. option:: -join-liveintervals Coalesce copies (default=true). +.. option:: -nozero-initialized-in-bss + Don't place zero-initialized symbols into the BSS section. -**-nozero-initialized-in-bss** Don't place zero-initialized symbols into the BSS section. - - - -**-pre-RA-sched**\ =\ *scheduler* +.. option:: -pre-RA-sched=scheduler Instruction schedulers available (before register allocation): - .. code-block:: perl =default: Best scheduler for the target @@ -221,74 +164,51 @@ CODE GENERATION OPTIONS =list-tdrr: Top-down register reduction list scheduling =list-td: Top-down list scheduler -print-machineinstrs - Print generated machine code - - - -**-regalloc**\ =\ *allocator* +.. option:: -regalloc=allocator Register allocator to use (default=linearscan) - .. code-block:: perl =bigblock: Big-block register allocator =linearscan: linear scan register allocator =local - local register allocator =simple: simple register allocator - - - -**-relocation-model**\ =\ *model* +.. option:: -relocation-model=model Choose relocation model from: - .. code-block:: perl =default: Target default relocation model =static: Non-relocatable code =pic - Fully relocatable, position independent code =dynamic-no-pic: Relocatable external references, non-relocatable code - - - -**-spiller** +.. option:: -spiller Spiller to use (default=local) - .. code-block:: perl =simple: simple spiller =local: local spiller - - - -**-x86-asm-syntax**\ =\ *syntax* +.. option:: -x86-asm-syntax=syntax Choose style of code to emit from X86 backend: - .. code-block:: perl =att: Emit AT&T-style assembly =intel: Emit Intel-style assembly - - - - EXIT STATUS ----------- - -If **lli** fails to load the program, it will exit with an exit code of 1. +If :program:`lli` fails to load the program, it will exit with an exit code of 1. Otherwise, it will return the exit code of the program it executes. - SEE ALSO -------- - -llc|llc +:program:`llc` diff --git a/docs/CommandGuide/llvm-lib.rst b/docs/CommandGuide/llvm-lib.rst new file mode 100644 index 000000000000..ecd0a7db7e37 --- /dev/null +++ b/docs/CommandGuide/llvm-lib.rst @@ -0,0 +1,31 @@ +llvm-lib - LLVM lib.exe compatible library tool +=============================================== + + +SYNOPSIS +-------- + + +**llvm-lib** [/libpath:] [/out:] [/llvmlibthin] +[/ignore] [/machine] [/nologo] [files...] + + +DESCRIPTION +----------- + + +The **llvm-lib** command is intended to be a ``lib.exe`` compatible +tool. See https://msdn.microsoft.com/en-us/library/7ykb2k5f for the +general description. + +**llvm-lib** has the following extensions: + +* Bitcode files in symbol tables. + **llvm-lib** includes symbols from both bitcode files and regular + object files in the symbol table. + +* Creating thin archives. + The /llvmlibthin option causes **llvm-lib** to create thin archive + that contain only the symbol table and the header for the various + members. These files are much smaller, but are not compatible with + link.exe (lld can handle them). diff --git a/docs/CommandGuide/llvm-profdata.rst b/docs/CommandGuide/llvm-profdata.rst index 7053b7fa710e..74fe4ee9d219 100644 --- a/docs/CommandGuide/llvm-profdata.rst +++ b/docs/CommandGuide/llvm-profdata.rst @@ -28,7 +28,7 @@ MERGE SYNOPSIS ^^^^^^^^ -:program:`llvm-profdata merge` [*options*] [*filenames...*] +:program:`llvm-profdata merge` [*options*] [*filename...*] DESCRIPTION ^^^^^^^^^^^ @@ -37,6 +37,14 @@ DESCRIPTION generated by PGO instrumentation and merges them together into a single indexed profile data file. +By default profile data is merged without modification. This means that the +relative importance of each input file is proportional to the number of samples +or counts it contains. In general, the input from a longer training run will be +interpreted as relatively more important than a shorter run. Depending on the +nature of the training runs it may be useful to adjust the weight given to each +input file by using the ``-weighted-input`` option. + + OPTIONS ^^^^^^^ @@ -49,28 +57,63 @@ OPTIONS Specify the output file name. *Output* cannot be ``-`` as the resulting indexed profile data can't be written to standard output. +.. option:: -weighted-input=weight,filename + + Specify an input file name along with a weight. The profile counts of the input + file will be scaled (multiplied) by the supplied ``weight``, where where ``weight`` + is a decimal integer >= 1. Input files specified without using this option are + assigned a default weight of 1. Examples are shown below. + .. option:: -instr (default) Specify that the input profile is an instrumentation-based profile. .. option:: -sample - Specify that the input profile is a sample-based profile. When using - sample-based profiles, the format of the generated file can be generated - in one of three ways: + Specify that the input profile is a sample-based profile. + + The format of the generated file can be generated in one of three ways: .. option:: -binary (default) - Emit the profile using a binary encoding. + Emit the profile using a binary encoding. For instrumentation-based profile + the output format is the indexed binary format. .. option:: -text - Emit the profile in text mode. + Emit the profile in text mode. This option can also be used with both + sample-based and instrumentation-based profile. When this option is used + the profile will be dumped in the text format that is parsable by the profile + reader. .. option:: -gcc Emit the profile using GCC's gcov format (Not yet supported). +EXAMPLES +^^^^^^^^ +Basic Usage ++++++++++++ +Merge three profiles: + +:: + + llvm-profdata merge foo.profdata bar.profdata baz.profdata -output merged.profdata + +Weighted Input +++++++++++++++ +The input file `foo.profdata` is especially important, multiply its counts by 10: + +:: + + llvm-profdata merge -weighted-input=10,foo.profdata bar.profdata baz.profdata -output merged.profdata + +Exactly equivalent to the previous invocation (explicit form; useful for programmatic invocation): + +:: + + llvm-profdata merge -weighted-input=10,foo.profdata -weighted-input=1,bar.profdata -weighted-input=1,baz.profdata -output merged.profdata + .. program:: llvm-profdata show .. _profdata-show: @@ -121,6 +164,13 @@ OPTIONS Specify that the input profile is an instrumentation-based profile. +.. option:: -text + + Instruct the profile dumper to show profile counts in the text format of the + instrumentation-based profile data representation. By default, the profile + information is dumped in a more human readable form (also in text) with + annotations. + .. option:: -sample Specify that the input profile is a sample-based profile. diff --git a/docs/CommandGuide/llvm-symbolizer.rst b/docs/CommandGuide/llvm-symbolizer.rst index 96720e633f2f..ec4178e4e7ab 100644 --- a/docs/CommandGuide/llvm-symbolizer.rst +++ b/docs/CommandGuide/llvm-symbolizer.rst @@ -56,6 +56,14 @@ EXAMPLE foo(int) /tmp/a.cc:12 + $cat addr.txt + 0x40054d + $llvm-symbolizer -inlining -print-address -pretty-print -obj=addr.exe < addr.txt + 0x40054d: inc at /tmp/x.c:3:3 + (inlined by) main at /tmp/x.c:9:0 + $llvm-symbolizer -inlining -pretty-print -obj=addr.exe < addr.txt + inc at /tmp/x.c:3:3 + (inlined by) main at /tmp/x.c:9:0 OPTIONS ------- @@ -98,6 +106,14 @@ OPTIONS location, look for the debug info at the .dSYM path provided via the ``-dsym-hint`` flag. This flag can be used multiple times. +.. option:: -print-address + + Print address before the source code location. Defaults to false. + +.. option:: -pretty-print + + Print human readable output. If ``-inlining`` is specified, enclosing scope is + prefixed by (inlined by). Refer to listed examples. EXIT STATUS ----------- diff --git a/docs/CommandLine.rst b/docs/CommandLine.rst index 1d85215f2af3..556c302501e2 100644 --- a/docs/CommandLine.rst +++ b/docs/CommandLine.rst @@ -1737,6 +1737,7 @@ exported by the ``lib/VMCore/PassManager.cpp`` file. .. _dynamically loaded options: Dynamically adding command line options +--------------------------------------- .. todo:: diff --git a/docs/CompileCudaWithLLVM.rst b/docs/CompileCudaWithLLVM.rst new file mode 100644 index 000000000000..a981ffe1e8f5 --- /dev/null +++ b/docs/CompileCudaWithLLVM.rst @@ -0,0 +1,169 @@ +=================================== +Compiling CUDA C/C++ with LLVM +=================================== + +.. contents:: + :local: + +Introduction +============ + +This document contains the user guides and the internals of compiling CUDA +C/C++ with LLVM. It is aimed at both users who want to compile CUDA with LLVM +and developers who want to improve LLVM for GPUs. This document assumes a basic +familiarity with CUDA. Information about CUDA programming can be found in the +`CUDA programming guide +`_. + +How to Build LLVM with CUDA Support +=================================== + +Below is a quick summary of downloading and building LLVM. Consult the `Getting +Started `_ page for more details on +setting up LLVM. + +#. Checkout LLVM + + .. code-block:: console + + $ cd where-you-want-llvm-to-live + $ svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm + +#. Checkout Clang + + .. code-block:: console + + $ cd where-you-want-llvm-to-live + $ cd llvm/tools + $ svn co http://llvm.org/svn/llvm-project/cfe/trunk clang + +#. Configure and build LLVM and Clang + + .. code-block:: console + + $ cd where-you-want-llvm-to-live + $ mkdir build + $ cd build + $ cmake [options] .. + $ make + +How to Compile CUDA C/C++ with LLVM +=================================== + +We assume you have installed the CUDA driver and runtime. Consult the `NVIDIA +CUDA installation Guide +`_ if +you have not. + +Suppose you want to compile and run the following CUDA program (``axpy.cu``) +which multiplies a ``float`` array by a ``float`` scalar (AXPY). + +.. code-block:: c++ + + #include // for checkCudaErrors + + #include + + __global__ void axpy(float a, float* x, float* y) { + y[threadIdx.x] = a * x[threadIdx.x]; + } + + int main(int argc, char* argv[]) { + const int kDataLen = 4; + + float a = 2.0f; + float host_x[kDataLen] = {1.0f, 2.0f, 3.0f, 4.0f}; + float host_y[kDataLen]; + + // Copy input data to device. + float* device_x; + float* device_y; + checkCudaErrors(cudaMalloc(&device_x, kDataLen * sizeof(float))); + checkCudaErrors(cudaMalloc(&device_y, kDataLen * sizeof(float))); + checkCudaErrors(cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), + cudaMemcpyHostToDevice)); + + // Launch the kernel. + axpy<<<1, kDataLen>>>(a, device_x, device_y); + + // Copy output data to host. + checkCudaErrors(cudaDeviceSynchronize()); + checkCudaErrors(cudaMemcpy(host_y, device_y, kDataLen * sizeof(float), + cudaMemcpyDeviceToHost)); + + // Print the results. + for (int i = 0; i < kDataLen; ++i) { + std::cout << "y[" << i << "] = " << host_y[i] << "\n"; + } + + checkCudaErrors(cudaDeviceReset()); + return 0; + } + +The command line for compilation is similar to what you would use for C++. + +.. code-block:: console + + $ clang++ -o axpy -I/samples/common/inc -L/ axpy.cu -lcudart_static -lcuda -ldl -lrt -pthread + $ ./axpy + y[0] = 2 + y[1] = 4 + y[2] = 6 + y[3] = 8 + +Note that ``helper_cuda.h`` comes from the CUDA samples, so you need the +samples installed for this example. ```` is the root +directory where you installed CUDA SDK, typically ``/usr/local/cuda``. + +Optimizations +============= + +CPU and GPU have different design philosophies and architectures. For example, a +typical CPU has branch prediction, out-of-order execution, and is superscalar, +whereas a typical GPU has none of these. Due to such differences, an +optimization pipeline well-tuned for CPUs may be not suitable for GPUs. + +LLVM performs several general and CUDA-specific optimizations for GPUs. The +list below shows some of the more important optimizations for GPUs. Most of +them have been upstreamed to ``lib/Transforms/Scalar`` and +``lib/Target/NVPTX``. A few of them have not been upstreamed due to lack of a +customizable target-independent optimization pipeline. + +* **Straight-line scalar optimizations**. These optimizations reduce redundancy + in straight-line code. Details can be found in the `design document for + straight-line scalar optimizations `_. + +* **Inferring memory spaces**. `This optimization + `_ + infers the memory space of an address so that the backend can emit faster + special loads and stores from it. Details can be found in the `design + document for memory space inference `_. + +* **Aggressive loop unrooling and function inlining**. Loop unrolling and + function inlining need to be more aggressive for GPUs than for CPUs because + control flow transfer in GPU is more expensive. They also promote other + optimizations such as constant propagation and SROA which sometimes speed up + code by over 10x. An empirical inline threshold for GPUs is 1100. This + configuration has yet to be upstreamed with a target-specific optimization + pipeline. LLVM also provides `loop unrolling pragmas + `_ + and ``__attribute__((always_inline))`` for programmers to force unrolling and + inling. + +* **Aggressive speculative execution**. `This transformation + `_ is + mainly for promoting straight-line scalar optimizations which are most + effective on code along dominator paths. + +* **Memory-space alias analysis**. `This alias analysis + `_ infers that two pointers in different + special memory spaces do not alias. It has yet to be integrated to the new + alias analysis infrastructure; the new infrastructure does not run + target-specific alias analysis. + +* **Bypassing 64-bit divides**. `An existing optimization + `_ + enabled in the NVPTX backend. 64-bit integer divides are much slower than + 32-bit ones on NVIDIA GPUs due to lack of a divide unit. Many of the 64-bit + divides in our benchmarks have a divisor and dividend which fit in 32-bits at + runtime. This optimization provides a fast path for this common case. diff --git a/docs/CompilerWriterInfo.rst b/docs/CompilerWriterInfo.rst index 900ba24e230f..6c3ff4b10f1e 100644 --- a/docs/CompilerWriterInfo.rst +++ b/docs/CompilerWriterInfo.rst @@ -22,14 +22,16 @@ ARM * `ABI Addenda and Errata `_ -* `ARM C Language Extensions `_ +* `ARM C Language Extensions `_ AArch64 ------- +* `ARMv8 Architecture Reference Manual `_ + * `ARMv8 Instruction Set Overview `_ -* `ARM C Language Extensions `_ +* `ARM C Language Extensions `_ Itanium (ia64) -------------- diff --git a/docs/CoverageMappingFormat.rst b/docs/CoverageMappingFormat.rst index 8fcffb838a3f..9ac476c88b34 100644 --- a/docs/CoverageMappingFormat.rst +++ b/docs/CoverageMappingFormat.rst @@ -405,7 +405,7 @@ LEB128 is an unsigned interger value that is encoded using DWARF's LEB128 encoding, optimizing for the case where values are small (1 byte for values less than 128). -.. _strings: +.. _Strings: Strings ^^^^^^^ diff --git a/docs/DeveloperPolicy.rst b/docs/DeveloperPolicy.rst index 9e458559fbcd..17baf2d27b13 100644 --- a/docs/DeveloperPolicy.rst +++ b/docs/DeveloperPolicy.rst @@ -505,8 +505,15 @@ for llvm users and not imposing a big burden on llvm developers: * The textual format is not backwards compatible. We don't change it too often, but there are no specific promises. -* The bitcode format produced by a X.Y release will be readable by all following - X.Z releases and the (X+1).0 release. +* Additions and changes to the IR should be reflected in + ``test/Bitcode/compatibility.ll``. + +* The bitcode format produced by a X.Y release will be readable by all + following X.Z releases and the (X+1).0 release. + +* After each X.Y release, ``compatibility.ll`` must be copied to + ``compatibility-X.Y.ll``. The corresponding bitcode file should be assembled + using the X.Y build and committed as ``compatibility-X.Y.ll.bc``. * Newer releases can ignore features from older releases, but they cannot miscompile them. For example, if nsw is ever replaced with something else, @@ -518,6 +525,33 @@ for llvm users and not imposing a big burden on llvm developers: it is to drop it. That is not very user friendly and a bit more effort is expected, but no promises are made. +C API Changes +---------------- + +* Stability Guarantees: The C API is, in general, a "best effort" for stability. + This means that we make every attempt to keep the C API stable, but that + stability will be limited by the abstractness of the interface and the + stability of the C++ API that it wraps. In practice, this means that things + like "create debug info" or "create this type of instruction" are likely to be + less stable than "take this IR file and JIT it for my current machine". + +* Release stability: We won't break the C API on the release branch with patches + that go on that branch, with the exception that we will fix an unintentional + C API break that will keep the release consistent with both the previous and + next release. + +* Testing: Patches to the C API are expected to come with tests just like any + other patch. + +* Including new things into the API: If an LLVM subcomponent has a C API already + included, then expanding that C API is acceptable. Adding C API for + subcomponents that don't currently have one needs to be discussed on the + mailing list for design and maintainability feedback prior to implementation. + +* Documentation: Any changes to the C API are required to be documented in the + release notes so that it's clear to external users who do not follow the + project how the C API is changing and evolving. + .. _copyright-license-patents: Copyright, License, and Patents @@ -624,5 +658,5 @@ patent-related trouble with their changes (including from third parties). If you or your employer own the rights to a patent and would like to contribute code to LLVM that relies on it, we require that the copyright owner sign an agreement that allows any other user of LLVM to freely use your patent. Please -contact the `oversight group `_ for more +contact the `LLVM Foundation Board of Directors `_ for more details. diff --git a/docs/ExceptionHandling.rst b/docs/ExceptionHandling.rst index 55ffdb45efe9..74827c02a272 100644 --- a/docs/ExceptionHandling.rst +++ b/docs/ExceptionHandling.rst @@ -67,17 +67,10 @@ exception handling is generally preferred to SJLJ. Windows Runtime Exception Handling ----------------------------------- -Windows runtime based exception handling uses the same basic IR structure as -Itanium ABI based exception handling, but it relies on the personality -functions provided by the native Windows runtime library, ``__CxxFrameHandler3`` -for C++ exceptions: ``__C_specific_handler`` for 64-bit SEH or -``_frame_handler3/4`` for 32-bit SEH. This results in a very different -execution model and requires some minor modifications to the initial IR -representation and a significant restructuring just before code generation. - -General information about the Windows x64 exception handling mechanism can be -found at `MSDN Exception Handling (x64) -`_. +LLVM supports handling exceptions produced by the Windows runtime, but it +requires a very different intermediate representation. It is not based on the +":ref:`landingpad `" instruction like the other two models, and is +described later in this document under :ref:`wineh`. Overview -------- @@ -169,11 +162,11 @@ pad to the back end. For C++, the ``landingpad`` instruction returns a pointer and integer pair corresponding to the pointer to the *exception structure* and the *selector value* respectively. -The ``landingpad`` instruction takes a reference to the personality function to -be used for this ``try``/``catch`` sequence. The remainder of the instruction is -a list of *cleanup*, *catch*, and *filter* clauses. The exception is tested -against the clauses sequentially from first to last. The clauses have the -following meanings: +The ``landingpad`` instruction looks for a reference to the personality +function to be used for this ``try``/``catch`` sequence in the parent +function's attribute list. The instruction contains a list of *cleanup*, +*catch*, and *filter* clauses. The exception is tested against the clauses +sequentially from first to last. The clauses have the following meanings: - ``catch @ExcType`` @@ -321,97 +314,6 @@ the selector results they understand and then resume exception propagation with the `resume instruction `_ if none of the conditions match. -C++ Exception Handling using the Windows Runtime -================================================= - -(Note: Windows C++ exception handling support is a work in progress and is - not yet fully implemented. The text below describes how it will work - when completed.) - -The Windows runtime function for C++ exception handling uses a multi-phase -approach. When an exception occurs it searches the current callstack for a -frame that has a handler for the exception. If a handler is found, it then -calls the cleanup handler for each frame above the handler which has a -cleanup handler before calling the catch handler. These calls are all made -from a stack context different from the original frame in which the handler -is defined. Therefore, it is necessary to outline these handlers from their -original context before code generation. - -Catch handlers are called with a pointer to the handler itself as the first -argument and a pointer to the parent function's stack frame as the second -argument. The catch handler uses the `llvm.localrecover -`_ to get a -pointer to a frame allocation block that is created in the parent frame using -the `llvm.localescape -`_ intrinsic. -The ``WinEHPrepare`` pass will have created a structure definition for the -contents of this block. The first two members of the structure will always be -(1) a 32-bit integer that the runtime uses to track the exception state of the -parent frame for the purposes of handling chained exceptions and (2) a pointer -to the object associated with the exception (roughly, the parameter of the -catch clause). These two members will be followed by any frame variables from -the parent function which must be accessed in any of the functions unwind or -catch handlers. The catch handler returns the address at which execution -should continue. - -Cleanup handlers perform any cleanup necessary as the frame goes out of scope, -such as calling object destructors. The runtime handles the actual unwinding -of the stack. If an exception occurs in a cleanup handler the runtime manages -termination of the process. Cleanup handlers are called with the same arguments -as catch handlers (a pointer to the handler and a pointer to the parent stack -frame) and use the same mechanism described above to access frame variables -in the parent function. Cleanup handlers do not return a value. - -The IR generated for Windows runtime based C++ exception handling is initially -very similar to the ``landingpad`` mechanism described above. Calls to -libc++abi functions (such as ``__cxa_begin_catch``/``__cxa_end_catch`` and -``__cxa_throw_exception`` are replaced with calls to intrinsics or Windows -runtime functions (such as ``llvm.eh.begincatch``/``llvm.eh.endcatch`` and -``__CxxThrowException``). - -During the WinEHPrepare pass, the handler functions are outlined into handler -functions and the original landing pad code is replaced with a call to the -``llvm.eh.actions`` intrinsic that describes the order in which handlers will -be processed from the logical location of the landing pad and an indirect -branch to the return value of the ``llvm.eh.actions`` intrinsic. The -``llvm.eh.actions`` intrinsic is defined as returning the address at which -execution will continue. This is a temporary construct which will be removed -before code generation, but it allows for the accurate tracking of control -flow until then. - -A typical landing pad will look like this after outlining: - -.. code-block:: llvm - - lpad: - %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) - cleanup - catch i8* bitcast (i8** @_ZTIi to i8*) - catch i8* bitcast (i8** @_ZTIf to i8*) - %recover = call i8* (...)* @llvm.eh.actions( - i32 3, i8* bitcast (i8** @_ZTIi to i8*), i8* (i8*, i8*)* @_Z4testb.catch.1) - i32 2, i8* null, void (i8*, i8*)* @_Z4testb.cleanup.1) - i32 1, i8* bitcast (i8** @_ZTIf to i8*), i8* (i8*, i8*)* @_Z4testb.catch.0) - i32 0, i8* null, void (i8*, i8*)* @_Z4testb.cleanup.0) - indirectbr i8* %recover, [label %try.cont1, label %try.cont2] - -In this example, the landing pad represents an exception handling context with -two catch handlers and a cleanup handler that have been outlined. If an -exception is thrown with a type that matches ``_ZTIi``, the ``_Z4testb.catch.1`` -handler will be called an no clean-up is needed. If an exception is thrown -with a type that matches ``_ZTIf``, first the ``_Z4testb.cleanup.1`` handler -will be called to perform unwind-related cleanup, then the ``_Z4testb.catch.1`` -handler will be called. If an exception is throw which does not match either -of these types and the exception is handled by another frame further up the -call stack, first the ``_Z4testb.cleanup.1`` handler will be called, then the -``_Z4testb.cleanup.0`` handler (which corresponds to a different scope) will be -called, and exception handling will continue at the next frame in the call -stack will be called. One of the catch handlers will return the address of -``%try.cont1`` in the parent function and the other will return the address of -``%try.cont2``, meaning that execution continues at one of those blocks after -an exception is caught. - - Exception Handling Intrinsics ============================= @@ -498,50 +400,19 @@ When used in the native Windows C++ exception handling implementation, this intrinsic serves as a placeholder to delimit code before a catch handler is outlined. After the handler is outlined, this intrinsic is simply removed. -.. _llvm.eh.actions: -``llvm.eh.actions`` ----------------------- +.. _llvm.eh.exceptionpointer: + +``llvm.eh.exceptionpointer`` +---------------------------- .. code-block:: llvm - void @llvm.eh.actions() + i8 addrspace(N)* @llvm.eh.padparam.pNi8(token %catchpad) -This intrinsic represents the list of actions to take when an exception is -thrown. It is typically used by Windows exception handling schemes where cleanup -outlining is required by the runtime. The arguments are a sequence of ``i32`` -sentinels indicating the action type followed by some pre-determined number of -arguments required to implement that action. -A code of ``i32 0`` indicates a cleanup action, which expects one additional -argument. The argument is a pointer to a function that implements the cleanup -action. - -A code of ``i32 1`` indicates a catch action, which expects three additional -arguments. Different EH schemes give different meanings to the three arguments, -but the first argument indicates whether the catch should fire, the second is -the localescape index of the exception object, and the third is the code to run -to catch the exception. - -For Windows C++ exception handling, the first argument for a catch handler is a -pointer to the RTTI type descriptor for the object to catch. The second -argument is an index into the argument list of the ``llvm.localescape`` call in -the main function. The exception object will be copied into the provided stack -object. If the exception object is not required, this argument should be -1. -The third argument is a pointer to a function implementing the catch. This -function returns the address of the basic block where execution should resume -after handling the exception. - -For Windows SEH, the first argument is a pointer to the filter function, which -indicates if the exception should be caught or not. The second argument is -typically negative one. The third argument is the address of a basic block -where the exception will be handled. In other words, catch handlers are not -outlined in SEH. After running cleanups, execution immediately resumes at this -PC. - -In order to preserve the structure of the CFG, a call to '``llvm.eh.actions``' -must be followed by an ':ref:`indirectbr `' instruction that -jumps to the result of the intrinsic call. +This intrinsic retrieves a pointer to the exception caught by the given +``catchpad``. SJLJ Intrinsics @@ -628,10 +499,279 @@ an exception handling frame for each function in a compile unit, plus a common exception handling frame that defines information common to all functions in the unit. +The format of this call frame information (CFI) is often platform-dependent, +however. ARM, for example, defines their own format. Apple has their own compact +unwind info format. On Windows, another format is used for all architectures +since 32-bit x86. LLVM will emit whatever information is required by the +target. + Exception Tables ---------------- An exception table contains information about what actions to take when an -exception is thrown in a particular part of a function's code. There is one -exception table per function, except leaf functions and functions that have -calls only to non-throwing functions. They do not need an exception table. +exception is thrown in a particular part of a function's code. This is typically +referred to as the language-specific data area (LSDA). The format of the LSDA +table is specific to the personality function, but the majority of personalities +out there use a variation of the tables consumed by ``__gxx_personality_v0``. +There is one exception table per function, except leaf functions and functions +that have calls only to non-throwing functions. They do not need an exception +table. + +.. _wineh: + +Exception Handling using the Windows Runtime +================================================= + +Background on Windows exceptions +--------------------------------- + +Interacting with exceptions on Windows is significantly more complicated than +on Itanium C++ ABI platforms. The fundamental difference between the two models +is that Itanium EH is designed around the idea of "successive unwinding," while +Windows EH is not. + +Under Itanium, throwing an exception typically involes allocating thread local +memory to hold the exception, and calling into the EH runtime. The runtime +identifies frames with appropriate exception handling actions, and successively +resets the register context of the current thread to the most recently active +frame with actions to run. In LLVM, execution resumes at a ``landingpad`` +instruction, which produces register values provided by the runtime. If a +function is only cleaning up allocated resources, the function is responsible +for calling ``_Unwind_Resume`` to transition to the next most recently active +frame after it is finished cleaning up. Eventually, the frame responsible for +handling the exception calls ``__cxa_end_catch`` to destroy the exception, +release its memory, and resume normal control flow. + +The Windows EH model does not use these successive register context resets. +Instead, the active exception is typically described by a frame on the stack. +In the case of C++ exceptions, the exception object is allocated in stack memory +and its address is passed to ``__CxxThrowException``. General purpose structured +exceptions (SEH) are more analogous to Linux signals, and they are dispatched by +userspace DLLs provided with Windows. Each frame on the stack has an assigned EH +personality routine, which decides what actions to take to handle the exception. +There are a few major personalities for C and C++ code: the C++ personality +(``__CxxFrameHandler3``) and the SEH personalities (``_except_handler3``, +``_except_handler4``, and ``__C_specific_handler``). All of them implement +cleanups by calling back into a "funclet" contained in the parent function. + +Funclets, in this context, are regions of the parent function that can be called +as though they were a function pointer with a very special calling convention. +The frame pointer of the parent frame is passed into the funclet either using +the standard EBP register or as the first parameter register, depending on the +architecture. The funclet implements the EH action by accessing local variables +in memory through the frame pointer, and returning some appropriate value, +continuing the EH process. No variables live in to or out of the funclet can be +allocated in registers. + +The C++ personality also uses funclets to contain the code for catch blocks +(i.e. all user code between the braces in ``catch (Type obj) { ... }``). The +runtime must use funclets for catch bodies because the C++ exception object is +allocated in a child stack frame of the function handling the exception. If the +runtime rewound the stack back to frame of the catch, the memory holding the +exception would be overwritten quickly by subsequent function calls. The use of +funclets also allows ``__CxxFrameHandler3`` to implement rethrow without +resorting to TLS. Instead, the runtime throws a special exception, and then uses +SEH (``__try / __except``) to resume execution with new information in the child +frame. + +In other words, the successive unwinding approach is incompatible with Visual +C++ exceptions and general purpose Windows exception handling. Because the C++ +exception object lives in stack memory, LLVM cannot provide a custom personality +function that uses landingpads. Similarly, SEH does not provide any mechanism +to rethrow an exception or continue unwinding. Therefore, LLVM must use the IR +constructs described later in this document to implement compatible exception +handling. + +SEH filter expressions +----------------------- + +The SEH personality functions also use funclets to implement filter expressions, +which allow executing arbitrary user code to decide which exceptions to catch. +Filter expressions should not be confused with the ``filter`` clause of the LLVM +``landingpad`` instruction. Typically filter expressions are used to determine +if the exception came from a particular DLL or code region, or if code faulted +while accessing a particular memory address range. LLVM does not currently have +IR to represent filter expressions because it is difficult to represent their +control dependencies. Filter expressions run during the first phase of EH, +before cleanups run, making it very difficult to build a faithful control flow +graph. For now, the new EH instructions cannot represent SEH filter +expressions, and frontends must outline them ahead of time. Local variables of +the parent function can be escaped and accessed using the ``llvm.localescape`` +and ``llvm.localrecover`` intrinsics. + +New exception handling instructions +------------------------------------ + +The primary design goal of the new EH instructions is to support funclet +generation while preserving information about the CFG so that SSA formation +still works. As a secondary goal, they are designed to be generic across MSVC +and Itanium C++ exceptions. They make very few assumptions about the data +required by the personality, so long as it uses the familiar core EH actions: +catch, cleanup, and terminate. However, the new instructions are hard to modify +without knowing details of the EH personality. While they can be used to +represent Itanium EH, the landingpad model is strictly better for optimization +purposes. + +The following new instructions are considered "exception handling pads", in that +they must be the first non-phi instruction of a basic block that may be the +unwind destination of an EH flow edge: +``catchswitch``, ``catchpad``, and ``cleanuppad``. +As with landingpads, when entering a try scope, if the +frontend encounters a call site that may throw an exception, it should emit an +invoke that unwinds to a ``catchswitch`` block. Similarly, inside the scope of a +C++ object with a destructor, invokes should unwind to a ``cleanuppad``. + +New instructions are also used to mark the points where control is transferred +out of a catch/cleanup handler (which will correspond to exits from the +generated funclet). A catch handler which reaches its end by normal execution +executes a ``catchret`` instruction, which is a terminator indicating where in +the function control is returned to. A cleanup handler which reaches its end +by normal execution executes a ``cleanupret`` instruction, which is a terminator +indicating where the active exception will unwind to next. + +Each of these new EH pad instructions has a way to identify which action should +be considered after this action. The ``catchswitch`` instruction is a terminator +and has an unwind destination operand analogous to the unwind destination of an +invoke. The ``cleanuppad`` instruction is not +a terminator, so the unwind destination is stored on the ``cleanupret`` +instruction instead. Successfully executing a catch handler should resume +normal control flow, so neither ``catchpad`` nor ``catchret`` instructions can +unwind. All of these "unwind edges" may refer to a basic block that contains an +EH pad instruction, or they may unwind to the caller. Unwinding to the caller +has roughly the same semantics as the ``resume`` instruction in the landingpad +model. When inlining through an invoke, instructions that unwind to the caller +are hooked up to unwind to the unwind destination of the call site. + +Putting things together, here is a hypothetical lowering of some C++ that uses +all of the new IR instructions: + +.. code-block:: c + + struct Cleanup { + Cleanup(); + ~Cleanup(); + int m; + }; + void may_throw(); + int f() noexcept { + try { + Cleanup obj; + may_throw(); + } catch (int e) { + may_throw(); + return e; + } + return 0; + } + +.. code-block:: llvm + + define i32 @f() nounwind personality i32 (...)* @__CxxFrameHandler3 { + entry: + %obj = alloca %struct.Cleanup, align 4 + %e = alloca i32, align 4 + %call = invoke %struct.Cleanup* @"\01??0Cleanup@@QEAA@XZ"(%struct.Cleanup* nonnull %obj) + to label %invoke.cont unwind label %lpad.catch + + invoke.cont: ; preds = %entry + invoke void @"\01?may_throw@@YAXXZ"() + to label %invoke.cont.2 unwind label %lpad.cleanup + + invoke.cont.2: ; preds = %invoke.cont + call void @"\01??_DCleanup@@QEAA@XZ"(%struct.Cleanup* nonnull %obj) nounwind + br label %return + + return: ; preds = %invoke.cont.3, %invoke.cont.2 + %retval.0 = phi i32 [ 0, %invoke.cont.2 ], [ %3, %invoke.cont.3 ] + ret i32 %retval.0 + + lpad.cleanup: ; preds = %invoke.cont.2 + %0 = cleanuppad within none [] + call void @"\01??1Cleanup@@QEAA@XZ"(%struct.Cleanup* nonnull %obj) nounwind + cleanupret %0 unwind label %lpad.catch + + lpad.catch: ; preds = %lpad.cleanup, %entry + %1 = catchswitch within none [label %catch.body] unwind label %lpad.terminate + + catch.body: ; preds = %lpad.catch + %catch = catchpad within %1 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i32* %e] + invoke void @"\01?may_throw@@YAXXZ"() + to label %invoke.cont.3 unwind label %lpad.terminate + + invoke.cont.3: ; preds = %catch.body + %3 = load i32, i32* %e, align 4 + catchret from %catch to label %return + + lpad.terminate: ; preds = %catch.body, %lpad.catch + cleanuppad within none [] + call void @"\01?terminate@@YAXXZ" + unreachable + } + +Funclet parent tokens +----------------------- + +In order to produce tables for EH personalities that use funclets, it is +necessary to recover the nesting that was present in the source. This funclet +parent relationship is encoded in the IR using tokens produced by the new "pad" +instructions. The token operand of a "pad" or "ret" instruction indicates which +funclet it is in, or "none" if it is not nested within another funclet. + +The ``catchpad`` and ``cleanuppad`` instructions establish new funclets, and +their tokens are consumed by other "pad" instructions to establish membership. +The ``catchswitch`` instruction does not create a funclet, but it produces a +token that is always consumed by its immediate successor ``catchpad`` +instructions. This ensures that every catch handler modelled by a ``catchpad`` +belongs to exactly one ``catchswitch``, which models the dispatch point after a +C++ try. + +Here is an example of what this nesting looks like using some hypothetical +C++ code: + +.. code-block:: c + + void f() { + try { + throw; + } catch (...) { + try { + throw; + } catch (...) { + } + } + } + +.. code-block:: llvm + + define void @f() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { + entry: + invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1 + to label %unreachable unwind label %catch.dispatch + + catch.dispatch: ; preds = %entry + %0 = catchswitch within none [label %catch] unwind to caller + + catch: ; preds = %catch.dispatch + %1 = catchpad within %0 [i8* null, i32 64, i8* null] + invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1 + to label %unreachable unwind label %catch.dispatch2 + + catch.dispatch2: ; preds = %catch + %2 = catchswitch within %1 [label %catch3] unwind to caller + + catch3: ; preds = %catch.dispatch2 + %3 = catchpad within %2 [i8* null, i32 64, i8* null] + catchret from %3 to label %try.cont + + try.cont: ; preds = %catch3 + catchret from %1 to label %try.cont6 + + try.cont6: ; preds = %try.cont + ret void + + unreachable: ; preds = %catch, %entry + unreachable + } + +The "inner" ``catchswitch`` consumes ``%1`` which is produced by the outer +catchswitch. diff --git a/docs/ExtendingLLVM.rst b/docs/ExtendingLLVM.rst index 3fd54c8360e5..87f48c993425 100644 --- a/docs/ExtendingLLVM.rst +++ b/docs/ExtendingLLVM.rst @@ -49,9 +49,9 @@ function and then be turned into an instruction if warranted. Add an entry for your intrinsic. Describe its memory access characteristics for optimization (this controls whether it will be DCE'd, CSE'd, etc). Note - that any intrinsic using the ``llvm_int_ty`` type for an argument will - be deemed by ``tblgen`` as overloaded and the corresponding suffix will - be required on the intrinsic's name. + that any intrinsic using one of the ``llvm_any*_ty`` types for an argument or + return type will be deemed by ``tblgen`` as overloaded and the corresponding + suffix will be required on the intrinsic's name. #. ``llvm/lib/Analysis/ConstantFolding.cpp``: diff --git a/docs/Frontend/PerformanceTips.rst b/docs/Frontend/PerformanceTips.rst index 8d0abcd1c172..142d262eb657 100644 --- a/docs/Frontend/PerformanceTips.rst +++ b/docs/Frontend/PerformanceTips.rst @@ -11,12 +11,60 @@ Abstract The intended audience of this document is developers of language frontends targeting LLVM IR. This document is home to a collection of tips on how to -generate IR that optimizes well. As with any optimizer, LLVM has its strengths -and weaknesses. In some cases, surprisingly small changes in the source IR -can have a large effect on the generated code. +generate IR that optimizes well. + +IR Best Practices +================= + +As with any optimizer, LLVM has its strengths and weaknesses. In some cases, +surprisingly small changes in the source IR can have a large effect on the +generated code. + +Beyond the specific items on the list below, it's worth noting that the most +mature frontend for LLVM is Clang. As a result, the further your IR gets from what Clang might emit, the less likely it is to be effectively optimized. It +can often be useful to write a quick C program with the semantics you're trying +to model and see what decisions Clang's IRGen makes about what IR to emit. +Studying Clang's CodeGen directory can also be a good source of ideas. Note +that Clang and LLVM are explicitly version locked so you'll need to make sure +you're using a Clang built from the same svn revision or release as the LLVM +library you're using. As always, it's *strongly* recommended that you track +tip of tree development, particularly during bring up of a new project. + +The Basics +^^^^^^^^^^^ + +#. Make sure that your Modules contain both a data layout specification and + target triple. Without these pieces, non of the target specific optimization + will be enabled. This can have a major effect on the generated code quality. + +#. For each function or global emitted, use the most private linkage type + possible (private, internal or linkonce_odr preferably). Doing so will + make LLVM's inter-procedural optimizations much more effective. + +#. Avoid high in-degree basic blocks (e.g. basic blocks with dozens or hundreds + of predecessors). Among other issues, the register allocator is known to + perform badly with confronted with such structures. The only exception to + this guidance is that a unified return block with high in-degree is fine. + +Use of allocas +^^^^^^^^^^^^^^ + +An alloca instruction can be used to represent a function scoped stack slot, +but can also represent dynamic frame expansion. When representing function +scoped variables or locations, placing alloca instructions at the beginning of +the entry block should be preferred. In particular, place them before any +call instructions. Call instructions might get inlined and replaced with +multiple basic blocks. The end result is that a following alloca instruction +would no longer be in the entry basic block afterward. + +The SROA (Scalar Replacement Of Aggregates) and Mem2Reg passes only attempt +to eliminate alloca instructions that are in the entry basic block. Given +SSA is the canonical form expected by much of the optimizer; if allocas can +not be eliminated by Mem2Reg or SROA, the optimizer is likely to be less +effective than it could be. Avoid loads and stores of large aggregate type -================================================ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ LLVM currently does not optimize well loads and stores of large :ref:`aggregate types ` (i.e. structs and arrays). As an alternative, consider @@ -27,7 +75,7 @@ instruction supported by the targeted hardware are well supported. These can be an effective way to represent collections of small packed fields. Prefer zext over sext when legal -================================== +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ On some architectures (X86_64 is one), sign extension can involve an extra instruction whereas zero extension can be folded into a load. LLVM will try to @@ -39,7 +87,7 @@ Alternatively, you can :ref:`specify the range of the value using metadata ` and LLVM can do the sext to zext conversion for you. Zext GEP indices to machine register width -============================================ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Internally, LLVM often promotes the width of GEP indices to machine register width. When it does so, it will default to using sign extension (sext) @@ -47,47 +95,37 @@ operations for safety. If your source language provides information about the range of the index, you may wish to manually extend indices to machine register width using a zext instruction. -Other things to consider -========================= +When to specify alignment +^^^^^^^^^^^^^^^^^^^^^^^^^^ +LLVM will always generate correct code if you don’t specify alignment, but may +generate inefficient code. For example, if you are targeting MIPS (or older +ARM ISAs) then the hardware does not handle unaligned loads and stores, and +so you will enter a trap-and-emulate path if you do a load or store with +lower-than-natural alignment. To avoid this, LLVM will emit a slower +sequence of loads, shifts and masks (or load-right + load-left on MIPS) for +all cases where the load / store does not have a sufficiently high alignment +in the IR. -#. Make sure that a DataLayout is provided (this will likely become required in - the near future, but is certainly important for optimization). +The alignment is used to guarantee the alignment on allocas and globals, +though in most cases this is unnecessary (most targets have a sufficiently +high default alignment that they’ll be fine). It is also used to provide a +contract to the back end saying ‘either this load/store has this alignment, or +it is undefined behavior’. This means that the back end is free to emit +instructions that rely on that alignment (and mid-level optimizers are free to +perform transforms that require that alignment). For x86, it doesn’t make +much difference, as almost all instructions are alignment-independent. For +MIPS, it can make a big difference. -#. Add nsw/nuw flags as appropriate. Reasoning about overflow is - generally hard for an optimizer so providing these facts from the frontend - can be very impactful. +Note that if your loads and stores are atomic, the backend will be unable to +lower an under aligned access into a sequence of natively aligned accesses. +As a result, alignment is mandatory for atomic loads and stores. -#. Use fast-math flags on floating point operations if legal. If you don't - need strict IEEE floating point semantics, there are a number of additional - optimizations that can be performed. This can be highly impactful for - floating point intensive computations. - -#. Use inbounds on geps. This can help to disambiguate some aliasing queries. - -#. Add noalias/align/dereferenceable/nonnull to function arguments and return - values as appropriate - -#. Mark functions as readnone/readonly or noreturn/nounwind when known. The - optimizer will try to infer these flags, but may not always be able to. - Manual annotations are particularly important for external functions that - the optimizer can not analyze. +Other Things to Consider +^^^^^^^^^^^^^^^^^^^^^^^^ #. Use ptrtoint/inttoptr sparingly (they interfere with pointer aliasing analysis), prefer GEPs -#. Use the lifetime.start/lifetime.end and invariant.start/invariant.end - intrinsics where possible. Common profitable uses are for stack like data - structures (thus allowing dead store elimination) and for describing - life times of allocas (thus allowing smaller stack sizes). - -#. Use pointer aliasing metadata, especially tbaa metadata, to communicate - otherwise-non-deducible pointer aliasing facts - -#. Use the "most-private" possible linkage types for the functions being defined - (private, internal or linkonce_odr preferably) - -#. Mark invariant locations using !invariant.load and TBAA's constant flags - #. Prefer globals over inttoptr of a constant address - this gives you dereferencability information. In MCJIT, use getSymbolAddress to provide actual address. @@ -104,15 +142,6 @@ Other things to consider desired. This is generally not required because the optimizer will convert an invoke with an unreachable unwind destination to a call instruction. -#. If you language uses range checks, consider using the IRCE pass. It is not - currently part of the standard pass order. - -#. For languages with numerous rarely executed guard conditions (e.g. null - checks, type checks, range checks) consider adding an extra execution or - two of LoopUnswith and LICM to your pass order. The standard pass order, - which is tuned for C and C++ applications, may not be sufficient to remove - all dischargeable checks from loops. - #. Use profile metadata to indicate statically known cold paths, even if dynamic profiling information is not available. This can make a large difference in code placement and thus the performance of tight loops. @@ -136,11 +165,6 @@ Other things to consider improvement. Note that this is not always profitable and does involve a potentially large increase in code size. -#. Avoid high in-degree basic blocks (e.g. basic blocks with dozens or hundreds - of predecessors). Among other issues, the register allocator is known to - perform badly with confronted with such structures. The only exception to - this guidance is that a unified return block with high in-degree is fine. - #. When checking a value against a constant, emit the check using a consistent comparison type. The GVN pass *will* optimize redundant equalities even if the type of comparison is inverted, but GVN only runs late in the pipeline. @@ -164,10 +188,99 @@ Other things to consider time and optimization effectiveness. The former is fixable with enough effort, but the later is fairly fundamental to their designed purpose. -p.s. If you want to help improve this document, patches expanding any of the -above items into standalone sections of their own with a more complete -discussion would be very welcome. +Describing Language Specific Properties +======================================= + +When translating a source language to LLVM, finding ways to express concepts +and guarantees available in your source language which are not natively +provided by LLVM IR will greatly improve LLVM's ability to optimize your code. +As an example, C/C++'s ability to mark every add as "no signed wrap (nsw)" goes +a long way to assisting the optimizer in reasoning about loop induction +variables and thus generating more optimal code for loops. + +The LLVM LangRef includes a number of mechanisms for annotating the IR with +additional semantic information. It is *strongly* recommended that you become +highly familiar with this document. The list below is intended to highlight a +couple of items of particular interest, but is by no means exhaustive. + +Restricted Operation Semantics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#. Add nsw/nuw flags as appropriate. Reasoning about overflow is + generally hard for an optimizer so providing these facts from the frontend + can be very impactful. + +#. Use fast-math flags on floating point operations if legal. If you don't + need strict IEEE floating point semantics, there are a number of additional + optimizations that can be performed. This can be highly impactful for + floating point intensive computations. + +Describing Aliasing Properties +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +#. Add noalias/align/dereferenceable/nonnull to function arguments and return + values as appropriate + +#. Use pointer aliasing metadata, especially tbaa metadata, to communicate + otherwise-non-deducible pointer aliasing facts + +#. Use inbounds on geps. This can help to disambiguate some aliasing queries. + + +Modeling Memory Effects +^^^^^^^^^^^^^^^^^^^^^^^^ + +#. Mark functions as readnone/readonly/argmemonly or noreturn/nounwind when + known. The optimizer will try to infer these flags, but may not always be + able to. Manual annotations are particularly important for external + functions that the optimizer can not analyze. + +#. Use the lifetime.start/lifetime.end and invariant.start/invariant.end + intrinsics where possible. Common profitable uses are for stack like data + structures (thus allowing dead store elimination) and for describing + life times of allocas (thus allowing smaller stack sizes). + +#. Mark invariant locations using !invariant.load and TBAA's constant flags + +Pass Ordering +^^^^^^^^^^^^^ + +One of the most common mistakes made by new language frontend projects is to +use the existing -O2 or -O3 pass pipelines as is. These pass pipelines make a +good starting point for an optimizing compiler for any language, but they have +been carefully tuned for C and C++, not your target language. You will almost +certainly need to use a custom pass order to achieve optimal performance. A +couple specific suggestions: + +#. For languages with numerous rarely executed guard conditions (e.g. null + checks, type checks, range checks) consider adding an extra execution or + two of LoopUnswith and LICM to your pass order. The standard pass order, + which is tuned for C and C++ applications, may not be sufficient to remove + all dischargeable checks from loops. + +#. If you language uses range checks, consider using the IRCE pass. It is not + currently part of the standard pass order. + +#. A useful sanity check to run is to run your optimized IR back through the + -O2 pipeline again. If you see noticeable improvement in the resulting IR, + you likely need to adjust your pass order. + + +I Still Can't Find What I'm Looking For +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you didn't find what you were looking for above, consider proposing an piece +of metadata which provides the optimization hint you need. Such extensions are +relatively common and are generally well received by the community. You will +need to ensure that your proposal is sufficiently general so that it benefits +others if you wish to contribute it upstream. + +You should also consider describing the problem you're facing on `llvm-dev +`_ and asking for advice. +It's entirely possible someone has encountered your problem before and can +give good advice. If there are multiple interested parties, that also +increases the chances that a metadata extension would be well received by the +community as a whole. Adding to this document ======================= diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst index df6bd7bc6ba8..2585ce135ba6 100644 --- a/docs/GettingStarted.rst +++ b/docs/GettingStarted.rst @@ -1,5 +1,5 @@ ==================================== -Getting Started with the LLVM System +Getting Started with the LLVM System ==================================== .. contents:: @@ -49,12 +49,25 @@ Here's the short story for getting up and running quickly with LLVM: * ``cd llvm/tools`` * ``svn co http://llvm.org/svn/llvm-project/cfe/trunk clang`` -#. Checkout Compiler-RT: +#. Checkout Compiler-RT (required to build the sanitizers): * ``cd where-you-want-llvm-to-live`` * ``cd llvm/projects`` * ``svn co http://llvm.org/svn/llvm-project/compiler-rt/trunk compiler-rt`` +#. Checkout Libomp (required for OpenMP support): + + * ``cd where-you-want-llvm-to-live`` + * ``cd llvm/projects`` + * ``svn co http://llvm.org/svn/llvm-project/openmp/trunk openmp`` + +#. Checkout libcxx and libcxxabi **[Optional]**: + + * ``cd where-you-want-llvm-to-live`` + * ``cd llvm/projects`` + * ``svn co http://llvm.org/svn/llvm-project/libcxx/trunk libcxx`` + * ``svn co http://llvm.org/svn/llvm-project/libcxxabi/trunk libcxxabi`` + #. Get the Test Suite Source Code **[Optional]** * ``cd where-you-want-llvm-to-live`` @@ -62,7 +75,7 @@ Here's the short story for getting up and running quickly with LLVM: * ``svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite`` #. Configure and build LLVM and Clang: - + The usual build uses `CMake `_. If you would rather use autotools, see `Building LLVM with autotools `_. @@ -70,16 +83,16 @@ Here's the short story for getting up and running quickly with LLVM: * ``mkdir build`` * ``cd build`` * ``cmake -G [options] `` - + Some common generators are: * ``Unix Makefiles`` --- for generating make-compatible parallel makefiles. * ``Ninja`` --- for generating `Ninja ` - build files. + build files. Most llvm developers use Ninja. * ``Visual Studio`` --- for generating Visual Studio projects and solutions. * ``Xcode`` --- for generating Xcode projects. - + Some Common options: * ``-DCMAKE_INSTALL_PREFIX=directory`` --- Specify for *directory* the full @@ -125,20 +138,20 @@ Hardware LLVM is known to work on the following host platforms: ================== ===================== ============= -OS Arch Compilers +OS Arch Compilers ================== ===================== ============= -Linux x86\ :sup:`1` GCC, Clang -Linux amd64 GCC, Clang -Linux ARM\ :sup:`4` GCC, Clang -Linux PowerPC GCC, Clang -Solaris V9 (Ultrasparc) GCC -FreeBSD x86\ :sup:`1` GCC, Clang -FreeBSD amd64 GCC, Clang -MacOS X\ :sup:`2` PowerPC GCC -MacOS X x86 GCC, Clang -Cygwin/Win32 x86\ :sup:`1, 3` GCC -Windows x86\ :sup:`1` Visual Studio -Windows x64 x86-64 Visual Studio +Linux x86\ :sup:`1` GCC, Clang +Linux amd64 GCC, Clang +Linux ARM\ :sup:`4` GCC, Clang +Linux PowerPC GCC, Clang +Solaris V9 (Ultrasparc) GCC +FreeBSD x86\ :sup:`1` GCC, Clang +FreeBSD amd64 GCC, Clang +MacOS X\ :sup:`2` PowerPC GCC +MacOS X x86 GCC, Clang +Cygwin/Win32 x86\ :sup:`1, 3` GCC +Windows x86\ :sup:`1` Visual Studio +Windows x64 x86-64 Visual Studio ================== ===================== ============= .. note:: @@ -207,14 +220,14 @@ Unix utilities. Specifically: * **chmod** --- change permissions on a file * **cat** --- output concatenation utility * **cp** --- copy files -* **date** --- print the current date/time +* **date** --- print the current date/time * **echo** --- print to standard output * **egrep** --- extended regular expression search utility * **find** --- find files/dirs in a file system * **grep** --- regular expression search utility * **gzip** --- gzip command for distribution generation * **gunzip** --- gunzip command for distribution checking -* **install** --- install directories/files +* **install** --- install directories/files * **mkdir** --- create a directory * **mv** --- move (rename) files * **ranlib** --- symbol table builder for archive libraries @@ -521,13 +534,28 @@ If you want to check out clang too, run: % cd llvm/tools % git clone http://llvm.org/git/clang.git -If you want to check out compiler-rt too, run: +If you want to check out compiler-rt (required to build the sanitizers), run: .. code-block:: console % cd llvm/projects % git clone http://llvm.org/git/compiler-rt.git +If you want to check out libomp (required for OpenMP support), run: + +.. code-block:: console + + % cd llvm/projects + % git clone http://llvm.org/git/openmp.git + +If you want to check out libcxx and libcxxabi (optional), run: + +.. code-block:: console + + % cd llvm/projects + % git clone http://llvm.org/git/libcxx.git + % git clone http://llvm.org/git/libcxxabi.git + If you want to check out the Test Suite Source Code (optional), run: .. code-block:: console @@ -619,7 +647,7 @@ To set up clone from which you can submit code using ``git-svn``, run: % git config svn-remote.svn.fetch :refs/remotes/origin/master % git svn rebase -l -Likewise for compiler-rt and test-suite. +Likewise for compiler-rt, libomp and test-suite. To update this clone without generating git-svn tags that conflict with the upstream Git repo, run: @@ -633,7 +661,7 @@ upstream Git repo, run: git checkout master && git svn rebase -l) -Likewise for compiler-rt and test-suite. +Likewise for compiler-rt, libomp and test-suite. This leaves your working directories on their master branches, so you'll need to ``checkout`` each working branch individually and ``rebase`` it on top of its @@ -838,7 +866,7 @@ with the latest Xcode: .. code-block:: console - % cmake -G "Ninja" -DCMAKE_OSX_ARCHITECTURES=“armv7;armv7s;arm64" + % cmake -G "Ninja" -DCMAKE_OSX_ARCHITECTURES="armv7;armv7s;arm64" -DCMAKE_TOOLCHAIN_FILE=/cmake/platforms/iOS.cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_BUILD_RUNTIME=Off -DLLVM_INCLUDE_TESTS=Off -DLLVM_INCLUDE_EXAMPLES=Off -DLLVM_ENABLE_BACKTRACES=Off [options] @@ -881,7 +909,7 @@ Underneath that directory there is another directory with a name ending in For example: .. code-block:: console - + % cd llvm_build_dir % find lib/Support/ -name APFloat* lib/Support/CMakeFiles/LLVMSupport.dir/APFloat.cpp.o @@ -990,7 +1018,7 @@ different `tools`_. code generation. For example, the ``llvm/lib/Target/X86`` directory holds the X86 machine description while ``llvm/lib/Target/ARM`` implements the ARM backend. - + ``llvm/lib/CodeGen/`` This directory contains the major parts of the code generator: Instruction @@ -1075,7 +1103,7 @@ the `Command Guide `_. The archiver produces an archive containing the given LLVM bitcode files, optionally with an index for faster lookup. - + ``llvm-as`` The assembler transforms the human readable LLVM assembly to LLVM bitcode. @@ -1088,7 +1116,7 @@ the `Command Guide `_. ``llvm-link``, not surprisingly, links multiple LLVM modules into a single program. - + ``lli`` ``lli`` is the LLVM interpreter, which can directly execute LLVM bitcode @@ -1219,7 +1247,7 @@ Example with clang .. code-block:: console % ./hello - + and .. code-block:: console diff --git a/docs/HowToBuildOnARM.rst b/docs/HowToBuildOnARM.rst index 6579d36a72a6..356c846d82bc 100644 --- a/docs/HowToBuildOnARM.rst +++ b/docs/HowToBuildOnARM.rst @@ -18,33 +18,44 @@ Here are some notes on building/testing LLVM/Clang on ARM. Note that ARM encompasses a wide variety of CPUs; this advice is primarily based on the ARMv6 and ARMv7 architectures and may be inapplicable to older chips. -#. If you are building LLVM/Clang on an ARM board with 1G of memory or less, - please use ``gold`` rather then GNU ``ld``. - Building LLVM/Clang with ``--enable-optimized`` - is preferred since it consumes less memory. Otherwise, the building - process will very likely fail due to insufficient memory. In any - case it is probably a good idea to set up a swap partition. - -#. If you want to run ``make check-all`` after building LLVM/Clang, to avoid - false alarms (e.g., ARCMT failure) please use at least the following - configuration: - - .. code-block:: bash - - $ ../$LLVM_SRC_DIR/configure --with-abi=aapcs-vfp - #. The most popular Linaro/Ubuntu OS's for ARM boards, e.g., the - Pandaboard, have become hard-float platforms. The following set - of configuration options appears to be a good choice for this - platform: + Pandaboard, have become hard-float platforms. There are a number of + choices when using CMake. Autoconf usage is deprecated as of 3.8. + + Building LLVM/Clang in ``Relese`` mode is preferred since it consumes + a lot less memory. Otherwise, the building process will very likely + fail due to insufficient memory. It's also a lot quicker to only build + the relevant back-ends (ARM and AArch64), since it's very unlikely that + you'll use an ARM board to cross-compile to other arches. If you're + running Compiler-RT tests, also include the x86 back-end, or some tests + will fail. .. code-block:: bash - ../$LLVM_SRC_DIR/configure --build=armv7l-unknown-linux-gnueabihf \ - --host=armv7l-unknown-linux-gnueabihf \ - --target=armv7l-unknown-linux-gnueabihf --with-cpu=cortex-a9 \ - --with-float=hard --with-abi=aapcs-vfp --with-fpu=neon \ - --enable-targets=arm --enable-optimized --enable-assertions + cmake $LLVM_SRC_DIR -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_TARGETS_TO_BUILD="ARM;X86;AArch64" + + Other options you can use are: + + .. code-block:: bash + + Use Ninja instead of Make: "-G Ninja" + Build with assertions on: "-DLLVM_ENABLE_ASSERTIONS=True" + Force Python2: "-DPYTHON_EXECUTABLE=/usr/bin/python2" + Local (non-sudo) install path: "-DCMAKE_INSTALL_PREFIX=$HOME/llvm/instal" + CPU flags: "DCMAKE_C_FLAGS=-mcpu=cortex-a15" (same for CXX_FLAGS) + + After that, just typing ``make -jN`` or ``ninja`` will build everything. + ``make -jN check-all`` or ``ninja check-all`` will run all compiler tests. For + running the test suite, please refer to :doc:`TestingGuide`. + +#. If you are building LLVM/Clang on an ARM board with 1G of memory or less, + please use ``gold`` rather then GNU ``ld``. In any case it is probably a good + idea to set up a swap partition, too. + + .. code-block:: bash + + $ sudo ln -sf /usr/bin/ld /usr/bin/ld.gold #. ARM development boards can be unstable and you may experience that cores are disappearing, caches being flushed on every big.LITTLE switch, and @@ -58,6 +69,10 @@ on the ARMv6 and ARMv7 architectures and may be inapplicable to older chips. sudo cpufreq-set -c $cpu -g performance done + Remember to turn that off after the build, or you may risk burning your + CPU. Most modern kernels don't need that, so only use it if you have + problems. + #. Running the build on SD cards is ok, but they are more prone to failures than good quality USB sticks, and those are more prone to failures than external hard-drives (those are also a lot faster). So, at least, you @@ -66,4 +81,5 @@ on the ARMv6 and ARMv7 architectures and may be inapplicable to older chips. #. Make sure you have a decent power supply (dozens of dollars worth) that can provide *at least* 4 amperes, this is especially important if you use USB - devices with your board. + devices with your board. Externally powered USB/SATA harddrives are even + better than having a good power supply. diff --git a/docs/HowToReleaseLLVM.rst b/docs/HowToReleaseLLVM.rst index 26e9f3b2ee87..33c547e97a88 100644 --- a/docs/HowToReleaseLLVM.rst +++ b/docs/HowToReleaseLLVM.rst @@ -136,51 +136,24 @@ Regenerate the configure scripts for both ``llvm`` and the ``test-suite``. In addition, the version numbers of all the Bugzilla components must be updated for the next release. -Build the LLVM Release Candidates -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Tagging the LLVM Release Candidates +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Create release candidates for ``llvm``, ``clang``, ``dragonegg``, and the LLVM -``test-suite`` by tagging the branch with the respective release candidate -number. For instance, to create **Release Candidate 1** you would issue the -following commands: +Tag release candidates using the tag.sh script in utils/release. :: - $ svn mkdir https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XYZ - $ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \ - https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XYZ/rc1 - - $ svn mkdir https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XYZ - $ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \ - https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XYZ/rc1 - - $ svn mkdir https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XYZ - $ svn copy https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY \ - https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XYZ/rc1 - - $ svn mkdir https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XYZ - $ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \ - https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XYZ/rc1 - -Similarly, **Release Candidate 2** would be named ``RC2`` and so on. This keeps -a permanent copy of the release candidate around for people to export and build -as they wish. The final released sources will be tagged in the ``RELEASE_XYZ`` -directory as ``Final`` (c.f. :ref:`tag`). + $ ./tag.sh -release X.Y.Z -rc $RC The Release Manager may supply pre-packaged source tarballs for users. This can -be done with the following commands: +be done with the export.sh script in utils/release. :: - $ svn export https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XYZ/rc1 llvm-X.Yrc1 - $ svn export https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XYZ/rc1 clang-X.Yrc1 - $ svn export https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XYZ/rc1 dragonegg-X.Yrc1 - $ svn export https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XYZ/rc1 llvm-test-X.Yrc1 + $ ./export.sh -release X.Y.Z -rc $RC - $ tar -cvf - llvm-X.Yrc1 | gzip > llvm-X.Yrc1.src.tar.gz - $ tar -cvf - clang-X.Yrc1 | gzip > clang-X.Yrc1.src.tar.gz - $ tar -cvf - dragonegg-X.Yrc1 | gzip > dragonegg-X.Yrc1.src.tar.gz - $ tar -cvf - llvm-test-X.Yrc1 | gzip > llvm-test-X.Yrc1.src.tar.gz +This will generate source tarballs for each LLVM project being validated, which +can be uploaded to the website for further testing. Building the Release -------------------- @@ -384,21 +357,11 @@ mainline into the release branch. Tag the LLVM Final Release ^^^^^^^^^^^^^^^^^^^^^^^^^^ -Tag the final release sources using the following procedure: +Tag the final release sources using the tag.sh script in utils/release. :: - $ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \ - https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XYZ/Final - - $ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \ - https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XYZ/Final - - $ svn copy https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY \ - https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XYZ/Final - - $ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \ - https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XYZ/Final + $ ./tag.sh -release X.Y.Z -final Update the LLVM Demo Page ------------------------- diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 0039d014275a..103d876b3cef 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -83,7 +83,7 @@ identifiers, for different purposes: can be used on global variables to suppress mangling. #. Unnamed values are represented as an unsigned numeric value with their prefix. For example, ``%12``, ``@2``, ``%44``. -#. Constants, which are described in the section Constants_ below. +#. Constants, which are described in the section Constants_ below. LLVM requires that values start with a prefix for two reasons: Compilers don't need to worry about name clashes with reserved words, and the set @@ -204,14 +204,15 @@ linkage: (``STB_LOCAL`` in the case of ELF) in the object file. This corresponds to the notion of the '``static``' keyword in C. ``available_externally`` - Globals with "``available_externally``" linkage are never emitted - into the object file corresponding to the LLVM module. They exist to - allow inlining and other optimizations to take place given knowledge - of the definition of the global, which is known to be somewhere - outside the module. Globals with ``available_externally`` linkage - are allowed to be discarded at will, and are otherwise the same as - ``linkonce_odr``. This linkage type is only allowed on definitions, - not declarations. + Globals with "``available_externally``" linkage are never emitted into + the object file corresponding to the LLVM module. From the linker's + perspective, an ``available_externally`` global is equivalent to + an external declaration. They exist to allow inlining and other + optimizations to take place given knowledge of the definition of the + global, which is known to be somewhere outside the module. Globals + with ``available_externally`` linkage are allowed to be discarded at + will, and allow inlining and other optimizations. This linkage type is + only allowed on definitions, not declarations. ``linkonce`` Globals with "``linkonce``" linkage are merged with other globals of the same name when linkage occurs. This can be used to implement @@ -257,7 +258,7 @@ linkage: Some languages allow differing globals to be merged, such as two functions with different semantics. Other languages, such as ``C++``, ensure that only equivalent globals are ever merged (the - "one definition rule" --- "ODR"). Such languages can use the + "one definition rule" --- "ODR"). Such languages can use the ``linkonce_odr`` and ``weak_odr`` linkage types to indicate that the global will only be merged with equivalent globals. These linkage types are otherwise the same as their non-``odr`` versions. @@ -406,6 +407,26 @@ added in the future: This calling convention, like the `PreserveMost` calling convention, will be used by a future version of the ObjectiveC runtime and should be considered experimental at this time. +"``cxx_fast_tlscc``" - The `CXX_FAST_TLS` calling convention for access functions + Clang generates an access function to access C++-style TLS. The access + function generally has an entry block, an exit block and an initialization + block that is run at the first time. The entry and exit blocks can access + a few TLS IR variables, each access will be lowered to a platform-specific + sequence. + + This calling convention aims to minimize overhead in the caller by + preserving as many registers as possible (all the registers that are + perserved on the fast path, composed of the entry and exit blocks). + + This calling convention behaves identical to the `C` calling convention on + how arguments and return values are passed, but it uses a different set of + caller/callee-saved registers. + + Given that each platform has its own lowering sequence, hence its own set + of preserved registers, we can't use the existing `PreserveMost`. + + - On X86-64 the callee preserves all general purpose registers, except for + RDI and RAX. "``cc ``" - Numbered convention Any calling convention may be specified by number, allowing target-specific calling conventions to be used. Target specific @@ -491,26 +512,29 @@ more information on under which circumstances the different models may be used. The target may choose a different TLS model if the specified model is not supported, or if a better choice of model can be made. -A model can also be specified in a alias, but then it only governs how +A model can also be specified in an alias, but then it only governs how the alias is accessed. It will not have any effect in the aliasee. +For platforms without linker support of ELF TLS model, the -femulated-tls +flag can be used to generate GCC compatible emulated TLS code. + .. _namedtypes: Structure Types --------------- LLVM IR allows you to specify both "identified" and "literal" :ref:`structure -types `. Literal types are uniqued structurally, but identified types -are never uniqued. An :ref:`opaque structural type ` can also be used +types `. Literal types are uniqued structurally, but identified types +are never uniqued. An :ref:`opaque structural type ` can also be used to forward declare a type that is not yet available. -An example of a identified structure specification is: +An example of an identified structure specification is: .. code-block:: llvm %mytype = type { %mytype*, i32 } -Prior to the LLVM 3.0 release, identified types were structurally uniqued. Only +Prior to the LLVM 3.0 release, identified types were structurally uniqued. Only literal types are uniqued in recent versions of LLVM. .. _globalvars: @@ -569,7 +593,7 @@ support. By default, global initializers are optimized by assuming that global variables defined within the module are not modified from their -initial values before the start of the global initializer. This is +initial values before the start of the global initializer. This is true even for variables potentially accessible from outside the module, including those with external linkage or appearing in ``@llvm.used`` or dllexported variables. This assumption may be suppressed @@ -637,6 +661,7 @@ an optional :ref:`comdat `, an optional :ref:`garbage collector name `, an optional :ref:`prefix `, an optional :ref:`prologue `, an optional :ref:`personality `, +an optional list of attached :ref:`metadata `, an opening curly brace, a list of basic blocks, and a closing curly brace. LLVM function declarations consist of the "``declare``" keyword, an @@ -685,10 +710,10 @@ Syntax:: @ ([argument list]) [unnamed_addr] [fn Attrs] [section "name"] [comdat [($name)]] [align N] [gc] [prefix Constant] [prologue Constant] - [personality Constant] { ... } + [personality Constant] (!name !N)* { ... } -The argument list is a comma seperated sequence of arguments where each -argument is of the following form +The argument list is a comma separated sequence of arguments where each +argument is of the following form: Syntax:: @@ -712,7 +737,7 @@ Aliases may have an optional :ref:`linkage type `, an optional Syntax:: - @ = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal] [unnamed_addr] alias @ + @ = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal] [unnamed_addr] alias , * @ The linkage must be one of ``private``, ``internal``, ``linkonce``, ``weak``, ``linkonce_odr``, ``weak_odr``, ``external``. Note that some system linkers @@ -742,9 +767,9 @@ Comdats Comdat IR provides access to COFF and ELF object file COMDAT functionality. -Comdats have a name which represents the COMDAT key. All global objects that +Comdats have a name which represents the COMDAT key. All global objects that specify this key will only end up in the final object file if the linker chooses -that key over some other key. Aliases are placed in the same COMDAT that their +that key over some other key. Aliases are placed in the same COMDAT that their aliasee computes to, if any. Comdats have a selection kind to provide input on how the linker should @@ -819,13 +844,13 @@ For example: @g2 = global i32 42, section "sec", comdat($bar) From the object file perspective, this requires the creation of two sections -with the same name. This is necessary because both globals belong to different +with the same name. This is necessary because both globals belong to different COMDAT groups and COMDATs, at the object file level, are represented by sections. Note that certain IR constructs like global variables and functions may create COMDATs in the object file in addition to any which are specified using -COMDAT IR. This arises when the code generator is configured to emit globals +COMDAT IR. This arises when the code generator is configured to emit globals in individual sections (e.g. when `-data-sections` or `-function-sections` is supplied to `llc`). @@ -891,7 +916,7 @@ Currently, only the following parameter attributes are defined: the callee (for a return value). ``inreg`` This indicates that this parameter or return value should be treated - in a special target-dependent fashion during while emitting code for + in a special target-dependent fashion while emitting code for a function call or return (usually, by putting it in a register as opposed to memory, though some targets use it to distinguish between two different kinds of registers). Use of this attribute is @@ -919,23 +944,23 @@ Currently, only the following parameter attributes are defined: ``inalloca`` The ``inalloca`` argument attribute allows the caller to take the - address of outgoing stack arguments. An ``inalloca`` argument must + address of outgoing stack arguments. An ``inalloca`` argument must be a pointer to stack memory produced by an ``alloca`` instruction. The alloca, or argument allocation, must also be tagged with the - inalloca keyword. Only the last argument may have the ``inalloca`` + inalloca keyword. Only the last argument may have the ``inalloca`` attribute, and that argument is guaranteed to be passed in memory. An argument allocation may be used by a call at most once because - the call may deallocate it. The ``inalloca`` attribute cannot be + the call may deallocate it. The ``inalloca`` attribute cannot be used in conjunction with other attributes that affect argument - storage, like ``inreg``, ``nest``, ``sret``, or ``byval``. The + storage, like ``inreg``, ``nest``, ``sret``, or ``byval``. The ``inalloca`` attribute also disables LLVM's implicit lowering of large aggregate return values, which means that frontend authors must lower them with ``sret`` pointers. When the call site is reached, the argument allocation must have been the most recent stack allocation that is still live, or the - results are undefined. It is possible to allocate additional stack + results are undefined. It is possible to allocate additional stack space after an argument allocation and before its call site, but it must be cleared off with :ref:`llvm.stackrestore `. @@ -1024,14 +1049,14 @@ Currently, only the following parameter attributes are defined: ``dereferenceable_or_null()`` This indicates that the parameter or return value isn't both non-null and non-dereferenceable (up to ```` bytes) at the same - time. All non-null pointers tagged with + time. All non-null pointers tagged with ``dereferenceable_or_null()`` are ``dereferenceable()``. For address space 0 ``dereferenceable_or_null()`` implies that a pointer is exactly one of ``dereferenceable()`` or ``null``, and in other address spaces ``dereferenceable_or_null()`` implies that a pointer is at least one of ``dereferenceable()`` or ``null`` (i.e. it may be both ``null`` and - ``dereferenceable()``). This attribute may only be applied to + ``dereferenceable()``). This attribute may only be applied to pointer typed parameters. .. _gc: @@ -1047,9 +1072,9 @@ string: define void @f() gc "name" { ... } The supported values of *name* includes those :ref:`built in to LLVM -` and any provided by loaded plugins. Specifying a GC +` and any provided by loaded plugins. Specifying a GC strategy will cause the compiler to alter its output in order to support the -named garbage collection algorithm. Note that LLVM itself does not contain a +named garbage collection algorithm. Note that LLVM itself does not contain a garbage collector, this functionality is restricted to generating machine code which can interoperate with a collector provided externally. @@ -1067,7 +1092,7 @@ function pointer to be called. To access the data for a given function, a program may bitcast the function pointer to a pointer to the constant's type and dereference -index -1. This implies that the IR symbol points just past the end of +index -1. This implies that the IR symbol points just past the end of the prefix data. For instance, take the example of a function annotated with a single ``i32``, @@ -1084,14 +1109,14 @@ The prefix data can be referenced as, %b = load i32, i32* %a Prefix data is laid out as if it were an initializer for a global variable -of the prefix data's type. The function will be placed such that the +of the prefix data's type. The function will be placed such that the beginning of the prefix data is aligned. This means that if the size of the prefix data is not a multiple of the alignment size, the function's entrypoint will not be aligned. If alignment of the function's entrypoint is desired, padding must be added to the prefix data. -A function may have prefix data but no body. This has similar semantics +A function may have prefix data but no body. This has similar semantics to the ``available_externally`` linkage in that the data may be used by the optimizers but will not be emitted in the object file. @@ -1105,12 +1130,12 @@ be inserted prior to the function body. This can be used for enabling function hot-patching and instrumentation. To maintain the semantics of ordinary function calls, the prologue data must -have a particular format. Specifically, it must begin with a sequence of +have a particular format. Specifically, it must begin with a sequence of bytes which decode to a sequence of machine instructions, valid for the module's target, which transfer control to the point immediately succeeding -the prologue data, without performing any other visible action. This allows +the prologue data, without performing any other visible action. This allows the inliner and other passes to reason about the semantics of the function -definition without needing to reason about the prologue data. Obviously this +definition without needing to reason about the prologue data. Obviously this makes the format of the prologue data highly target dependent. A trivial example of valid prologue data for the x86 architecture is ``i8 144``, @@ -1130,7 +1155,7 @@ x86_64 architecture, where the first two bytes encode ``jmp .+10``: define void @f() prologue %0 <{ i8 235, i8 8, i8* @md}> { ... } -A function may have prologue data but no body. This has similar semantics +A function may have prologue data but no body. This has similar semantics to the ``available_externally`` linkage in that the data may be used by the optimizers but will not be emitted in the object file. @@ -1216,10 +1241,16 @@ example: ``convergent`` This attribute indicates that the callee is dependent on a convergent thread execution pattern under certain parallel execution models. - Transformations that are execution model agnostic may only move or - tranform this call if the final location is control equivalent to its - original position in the program, where control equivalence is defined as - A dominates B and B post-dominates A, or vice versa. + Transformations that are execution model agnostic may not make the execution + of a convergent operation control dependent on any additional values. +``inaccessiblememonly`` + This attribute indicates that the function may only access memory that + is not accessible by the module being compiled. This is a weaker form + of ``readnone``. +``inaccessiblemem_or_argmemonly`` + This attribute indicates that the function may only access memory that is + either not accessible by the module being compiled, or is pointed to + by its pointer arguments. This is a weaker form of ``argmemonly`` ``inlinehint`` This attribute indicates that the source code contained a hint that inlining this function is desirable (such as the "inline" keyword in @@ -1275,6 +1306,10 @@ example: This function attribute indicates that the function never returns normally. This produces undefined behavior at runtime if the function ever does dynamically return. +``norecurse`` + This function attribute indicates that the function does not call itself + either directly or indirectly down any possible call path. This produces + undefined behavior at runtime if the function ever does recurse. ``nounwind`` This function attribute indicates that the function never raises an exception. If the function does raise an exception, its runtime @@ -1283,9 +1318,9 @@ example: that are recognized by LLVM to handle asynchronous exceptions, such as SEH, will still provide their implementation defined semantics. ``optnone`` - This function attribute indicates that the function is not optimized - by any optimization or code generator passes with the - exception of interprocedural optimization passes. + This function attribute indicates that most optimization passes will skip + this function, with the exception of interprocedural optimization passes. + Code generation defaults to the "fast" instruction selector. This attribute cannot be used together with the ``alwaysinline`` attribute; this attribute is also incompatible with the ``minsize`` attribute and the ``optsize`` attribute. @@ -1399,7 +1434,7 @@ example: ``sspstrong`` This attribute indicates that the function should emit a stack smashing protector. This attribute causes a strong heuristic to be used when - determining if a function needs stack protectors. The strong heuristic + determining if a function needs stack protectors. The strong heuristic will enable protectors for functions with: - Arrays of any size and type @@ -1430,11 +1465,129 @@ example: match the thunk target prototype. ``uwtable`` This attribute indicates that the ABI being targeted requires that - an unwind table entry be produce for this function even if we can + an unwind table entry be produced for this function even if we can show that no exceptions passes by it. This is normally the case for the ELF x86-64 abi, but it can be disabled for some compilation units. + +.. _opbundles: + +Operand Bundles +--------------- + +Note: operand bundles are a work in progress, and they should be +considered experimental at this time. + +Operand bundles are tagged sets of SSA values that can be associated +with certain LLVM instructions (currently only ``call`` s and +``invoke`` s). In a way they are like metadata, but dropping them is +incorrect and will change program semantics. + +Syntax:: + + operand bundle set ::= '[' operand bundle (, operand bundle )* ']' + operand bundle ::= tag '(' [ bundle operand ] (, bundle operand )* ')' + bundle operand ::= SSA value + tag ::= string constant + +Operand bundles are **not** part of a function's signature, and a +given function may be called from multiple places with different kinds +of operand bundles. This reflects the fact that the operand bundles +are conceptually a part of the ``call`` (or ``invoke``), not the +callee being dispatched to. + +Operand bundles are a generic mechanism intended to support +runtime-introspection-like functionality for managed languages. While +the exact semantics of an operand bundle depend on the bundle tag, +there are certain limitations to how much the presence of an operand +bundle can influence the semantics of a program. These restrictions +are described as the semantics of an "unknown" operand bundle. As +long as the behavior of an operand bundle is describable within these +restrictions, LLVM does not need to have special knowledge of the +operand bundle to not miscompile programs containing it. + +- The bundle operands for an unknown operand bundle escape in unknown + ways before control is transferred to the callee or invokee. +- Calls and invokes with operand bundles have unknown read / write + effect on the heap on entry and exit (even if the call target is + ``readnone`` or ``readonly``), unless they're overriden with + callsite specific attributes. +- An operand bundle at a call site cannot change the implementation + of the called function. Inter-procedural optimizations work as + usual as long as they take into account the first two properties. + +More specific types of operand bundles are described below. + +Deoptimization Operand Bundles +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Deoptimization operand bundles are characterized by the ``"deopt"`` +operand bundle tag. These operand bundles represent an alternate +"safe" continuation for the call site they're attached to, and can be +used by a suitable runtime to deoptimize the compiled frame at the +specified call site. There can be at most one ``"deopt"`` operand +bundle attached to a call site. Exact details of deoptimization is +out of scope for the language reference, but it usually involves +rewriting a compiled frame into a set of interpreted frames. + +From the compiler's perspective, deoptimization operand bundles make +the call sites they're attached to at least ``readonly``. They read +through all of their pointer typed operands (even if they're not +otherwise escaped) and the entire visible heap. Deoptimization +operand bundles do not capture their operands except during +deoptimization, in which case control will not be returned to the +compiled frame. + +The inliner knows how to inline through calls that have deoptimization +operand bundles. Just like inlining through a normal call site +involves composing the normal and exceptional continuations, inlining +through a call site with a deoptimization operand bundle needs to +appropriately compose the "safe" deoptimization continuation. The +inliner does this by prepending the parent's deoptimization +continuation to every deoptimization continuation in the inlined body. +E.g. inlining ``@f`` into ``@g`` in the following example + +.. code-block:: llvm + + define void @f() { + call void @x() ;; no deopt state + call void @y() [ "deopt"(i32 10) ] + call void @y() [ "deopt"(i32 10), "unknown"(i8* null) ] + ret void + } + + define void @g() { + call void @f() [ "deopt"(i32 20) ] + ret void + } + +will result in + +.. code-block:: llvm + + define void @g() { + call void @x() ;; still no deopt state + call void @y() [ "deopt"(i32 20, i32 10) ] + call void @y() [ "deopt"(i32 20, i32 10), "unknown"(i8* null) ] + ret void + } + +It is the frontend's responsibility to structure or encode the +deoptimization state in a way that syntactically prepending the +caller's deoptimization state to the callee's deoptimization state is +semantically equivalent to composing the caller's deoptimization +continuation after the callee's deoptimization continuation. + +Funclet Operand Bundles +^^^^^^^^^^^^^^^^^^^^^^^ + +Funclet operand bundles are characterized by the ``"funclet"`` +operand bundle tag. These operand bundles indicate that a call site +is within a particular funclet. There can be at most one +``"funclet"`` operand bundle attached to a call site and it must have +exactly one bundle operand. + .. _moduleasm: Module-Level Inline Assembly @@ -1494,8 +1647,8 @@ as follows: ``p[n]:::`` This specifies the *size* of a pointer and its ```` and ````\erred alignments for address space ``n``. All sizes are in - bits. The address space, ``n`` is optional, and if not specified, - denotes the default address space 0. The value of ``n`` must be + bits. The address space, ``n``, is optional, and if not specified, + denotes the default address space 0. The value of ``n`` must be in the range [1,2^23). ``i::`` This specifies the alignment for an integer type of a given bit @@ -1521,6 +1674,8 @@ as follows: symbols get a ``_`` prefix. * ``w``: Windows COFF prefix: Similar to Mach-O, but stdcall and fastcall functions also get a suffix based on the frame size. + * ``x``: Windows x86 COFF prefix: Similar to Windows COFF, but use a ``_`` + prefix for ``__cdecl`` functions. ``n::...`` This specifies a set of native integer widths for the target CPU in bits. For example, it might contain ``n32`` for 32-bit PowerPC, @@ -1687,7 +1842,7 @@ target-legal volatile load/store instructions. this holds for an l-value of volatile primitive type with native hardware support, but not necessarily for aggregate types. The frontend upholds these expectations, which are intentionally - unspecified in the IR. The rules above ensure that IR transformation + unspecified in the IR. The rules above ensure that IR transformations do not violate the frontend's contract with the language. .. _memmodel: @@ -1877,12 +2032,12 @@ Use-list Order Directives ------------------------- Use-list directives encode the in-memory order of each use-list, allowing the -order to be recreated. ```` is a comma-separated list of -indexes that are assigned to the referenced value's uses. The referenced +order to be recreated. ```` is a comma-separated list of +indexes that are assigned to the referenced value's uses. The referenced value's use-list is immediately sorted by these indexes. -Use-list directives may appear at function scope or global scope. They are not -instructions, and have no effect on the semantics of the IR. When they're at +Use-list directives may appear at function scope or global scope. They are not +instructions, and have no effect on the semantics of the IR. When they're at function scope, they must appear after the terminator of the final basic block. If basic blocks have their address taken via ``blockaddress()`` expressions, @@ -1969,9 +2124,9 @@ and :ref:`metadata ` types. ...where '````' is a comma-separated list of type specifiers. Optionally, the parameter list may include a type ``...``, which -indicates that the function takes a variable number of arguments. Variable +indicates that the function takes a variable number of arguments. Variable argument functions can access their arguments with the :ref:`variable argument -handling intrinsic ` functions. '````' is any type +handling intrinsic ` functions. '````' is any type except :ref:`label ` and :ref:`metadata `. :Examples: @@ -2165,6 +2320,26 @@ The label type represents code labels. label +.. _t_token: + +Token Type +^^^^^^^^^^ + +:Overview: + +The token type is used when a value is associated with an instruction +but all uses of the value must not attempt to introspect or obscure it. +As such, it is not appropriate to have a :ref:`phi ` or +:ref:`select ` of type token. + +:Syntax: + +:: + + token + + + .. _t_metadata: Metadata Type @@ -2338,6 +2513,9 @@ Simple Constants **Null pointer constants** The identifier '``null``' is recognized as a null pointer constant and must be of :ref:`pointer type `. +**Token constants** + The identifier '``none``' is recognized as an empty token constant + and must be of :ref:`token type `. The one non-intuitive notation for constants is the hexadecimal form of floating point constants. For example, the form @@ -2406,8 +2584,8 @@ constants and smaller complex constants. having to print large zero initializers (e.g. for large arrays) and is always exactly equivalent to using explicit zero initializers. **Metadata node** - A metadata node is a constant tuple without types. For example: - "``!{!0, !{!2, !0}, !"test"}``". Metadata can reference constant values, + A metadata node is a constant tuple without types. For example: + "``!{!0, !{!2, !0}, !"test"}``". Metadata can reference constant values, for example: "``!{!0, i32 0, i8* @global, i64 (i64)* @function, !"str"}``". Unlike other typed constants that are meant to be interpreted as part of the instruction stream, metadata is a place to attach additional @@ -3325,7 +3503,7 @@ and GCC likely indicates a bug in LLVM. Target-independent: -- ``c``: Print an immediate integer constant unadorned, without +- ``c``: Print an immediate integer constant unadorned, without the target-specific immediate punctuation (e.g. no ``$`` prefix). - ``n``: Negate and print immediate integer constant unadorned, without the target-specific immediate punctuation (e.g. no ``$`` prefix). @@ -3505,7 +3683,7 @@ that can convey extra information about the code to the optimizers and code generator. One example application of metadata is source-level debug information. There are two metadata primitives: strings and nodes. -Metadata does not have a type, and is not a value. If referenced from a +Metadata does not have a type, and is not a value. If referenced from a ``call`` instruction, it uses the ``metadata`` type. All metadata are identified in syntax by a exclamation point ('``!``'). @@ -3536,7 +3714,7 @@ Metadata nodes that aren't uniqued use the ``distinct`` keyword. For example: !0 = distinct !{!"test\00", i32 10} ``distinct`` nodes are useful when nodes shouldn't be merged based on their -content. They can also occur when transformations cause uniquing collisions +content. They can also occur when transformations cause uniquing collisions when metadata operands change. A :ref:`named metadata ` is a collection of @@ -3554,13 +3732,22 @@ function is using two metadata arguments: call void @llvm.dbg.value(metadata !24, i64 0, metadata !25) -Metadata can be attached with an instruction. Here metadata ``!21`` is -attached to the ``add`` instruction using the ``!dbg`` identifier: +Metadata can be attached to an instruction. Here metadata ``!21`` is attached +to the ``add`` instruction using the ``!dbg`` identifier: .. code-block:: llvm %indvar.next = add i64 %indvar, 1, !dbg !21 +Metadata can also be attached to a function definition. Here metadata ``!22`` +is attached to the ``foo`` function using the ``!dbg`` identifier: + +.. code-block:: llvm + + define void @foo() !dbg !22 { + ret void + } + More information about specific metadata nodes recognized by the optimizers and code generator is found below. @@ -3570,7 +3757,7 @@ Specialized Metadata Nodes ^^^^^^^^^^^^^^^^^^^^^^^^^^ Specialized metadata nodes are custom data structures in metadata (as opposed -to generic tuples). Their fields are labelled, and can be specified in any +to generic tuples). Their fields are labelled, and can be specified in any order. These aren't inherently debug info centric, but currently all the specialized @@ -3581,10 +3768,10 @@ metadata nodes are related to debug info. DICompileUnit """"""""""""" -``DICompileUnit`` nodes represent a compile unit. The ``enums:``, -``retainedTypes:``, ``subprograms:``, ``globals:`` and ``imports:`` fields are -tuples containing the debug info to be emitted along with the compile unit, -regardless of code optimizations (some nodes are only emitted if there are +``DICompileUnit`` nodes represent a compile unit. The ``enums:``, +``retainedTypes:``, ``subprograms:``, ``globals:``, ``imports:`` and ``macros:`` +fields are tuples containing the debug info to be emitted along with the compile +unit, regardless of code optimizations (some nodes are only emitted if there are references to them from instructions). .. code-block:: llvm @@ -3593,11 +3780,11 @@ references to them from instructions). isOptimized: true, flags: "-O2", runtimeVersion: 2, splitDebugFilename: "abc.debug", emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !4, - globals: !5, imports: !6) + globals: !5, imports: !6, macros: !7, dwoId: 0x0abcd) Compile unit descriptors provide the root scope for objects declared in a -specific compilation unit. File descriptors are defined using this scope. -These descriptors are collected by a named metadata ``!llvm.dbg.cu``. They +specific compilation unit. File descriptors are defined using this scope. +These descriptors are collected by a named metadata ``!llvm.dbg.cu``. They keep track of subprograms, global variables, type information, and imported entities (declarations and namespaces). @@ -3606,7 +3793,7 @@ entities (declarations and namespaces). DIFile """""" -``DIFile`` nodes represent files. The ``filename:`` can include slashes. +``DIFile`` nodes represent files. The ``filename:`` can include slashes. .. code-block:: llvm @@ -3621,7 +3808,7 @@ DIBasicType """"""""""" ``DIBasicType`` nodes represent primitive types, such as ``int``, ``bool`` and -``float``. ``tag:`` defaults to ``DW_TAG_base_type``. +``float``. ``tag:`` defaults to ``DW_TAG_base_type``. .. code-block:: llvm @@ -3629,7 +3816,7 @@ DIBasicType encoding: DW_ATE_unsigned_char) !1 = !DIBasicType(tag: DW_TAG_unspecified_type, name: "decltype(nullptr)") -The ``encoding:`` describes the details of the type. Usually it's one of the +The ``encoding:`` describes the details of the type. Usually it's one of the following: .. code-block:: llvm @@ -3647,9 +3834,9 @@ following: DISubroutineType """""""""""""""" -``DISubroutineType`` nodes represent subroutine types. Their ``types:`` field +``DISubroutineType`` nodes represent subroutine types. Their ``types:`` field refers to a tuple; the first operand is the return type, while the rest are the -types of the formal arguments in order. If the first operand is ``null``, that +types of the formal arguments in order. If the first operand is ``null``, that represents a function with no return value (such as ``void foo() {}`` in C++). .. code-block:: llvm @@ -3688,8 +3875,8 @@ The following ``tag:`` values are valid: DW_TAG_restrict_type = 55 ``DW_TAG_member`` is used to define a member of a :ref:`composite type -` or :ref:`subprogram `. The type of the member -is the ``baseType:``. The ``offset:`` is the member's bit offset. +` or :ref:`subprogram `. The type of the member +is the ``baseType:``. The ``offset:`` is the member's bit offset. ``DW_TAG_formal_parameter`` is used to define a member which is a formal argument of a subprogram. @@ -3707,10 +3894,10 @@ DICompositeType """"""""""""""" ``DICompositeType`` nodes represent types composed of other types, like -structures and unions. ``elements:`` points to a tuple of the composed types. +structures and unions. ``elements:`` points to a tuple of the composed types. If the source language supports ODR, the ``identifier:`` field gives the unique -identifier used for type merging between modules. When specified, other types +identifier used for type merging between modules. When specified, other types can refer to composite types indirectly via a :ref:`metadata string ` that matches their identifier. @@ -3738,12 +3925,12 @@ The following ``tag:`` values are valid: For ``DW_TAG_array_type``, the ``elements:`` should be :ref:`subrange descriptors `, each representing the range of subscripts at that -level of indexing. The ``DIFlagVector`` flag to ``flags:`` indicates that an +level of indexing. The ``DIFlagVector`` flag to ``flags:`` indicates that an array type is a native packed vector. For ``DW_TAG_enumeration_type``, the ``elements:`` should be :ref:`enumerator descriptors `, each representing the definition of an enumeration -value for the set. All enumeration type descriptors are collected in the +value for the set. All enumeration type descriptors are collected in the ``enums:`` field of the :ref:`compile unit `. For ``DW_TAG_structure_type``, ``DW_TAG_class_type``, and @@ -3756,7 +3943,7 @@ DISubrange """""""""" ``DISubrange`` nodes are the elements for ``DW_TAG_array_type`` variants of -:ref:`DICompositeType`. ``count: -1`` indicates an empty array. +:ref:`DICompositeType`. ``count: -1`` indicates an empty array. .. code-block:: llvm @@ -3782,7 +3969,7 @@ DITemplateTypeParameter """"""""""""""""""""""" ``DITemplateTypeParameter`` nodes represent type parameters to generic source -language constructs. They are used (optionally) in :ref:`DICompositeType` and +language constructs. They are used (optionally) in :ref:`DICompositeType` and :ref:`DISubprogram` ``templateParams:`` fields. .. code-block:: llvm @@ -3793,9 +3980,9 @@ DITemplateValueParameter """""""""""""""""""""""" ``DITemplateValueParameter`` nodes represent value parameters to generic source -language constructs. ``tag:`` defaults to ``DW_TAG_template_value_parameter``, +language constructs. ``tag:`` defaults to ``DW_TAG_template_value_parameter``, but if specified can also be set to ``DW_TAG_GNU_template_template_param`` or -``DW_TAG_GNU_template_param_pack``. They are used (optionally) in +``DW_TAG_GNU_template_param_pack``. They are used (optionally) in :ref:`DICompositeType` and :ref:`DISubprogram` ``templateParams:`` fields. .. code-block:: llvm @@ -3831,20 +4018,26 @@ All global variables should be referenced by the `globals:` field of a DISubprogram """""""""""" -``DISubprogram`` nodes represent functions from the source language. The -``variables:`` field points at :ref:`variables ` that must be -retained, even if their IR counterparts are optimized out of the IR. The -``type:`` field must point at an :ref:`DISubroutineType`. +``DISubprogram`` nodes represent functions from the source language. A +``DISubprogram`` may be attached to a function definition using ``!dbg`` +metadata. The ``variables:`` field points at :ref:`variables ` +that must be retained, even if their IR counterparts are optimized out of +the IR. The ``type:`` field must point at an :ref:`DISubroutineType`. .. code-block:: llvm - !0 = !DISubprogram(name: "foo", linkageName: "_Zfoov", scope: !1, - file: !2, line: 7, type: !3, isLocal: true, - isDefinition: false, scopeLine: 8, containingType: !4, - virtuality: DW_VIRTUALITY_pure_virtual, virtualIndex: 10, - flags: DIFlagPrototyped, isOptimized: true, - function: void ()* @_Z3foov, - templateParams: !5, declaration: !6, variables: !7) + define void @_Z3foov() !dbg !0 { + ... + } + + !0 = distinct !DISubprogram(name: "foo", linkageName: "_Zfoov", scope: !1, + file: !2, line: 7, type: !3, isLocal: true, + isDefinition: false, scopeLine: 8, + containingType: !4, + virtuality: DW_VIRTUALITY_pure_virtual, + virtualIndex: 10, flags: DIFlagPrototyped, + isOptimized: true, templateParams: !5, + declaration: !6, variables: !7) .. _DILexicalBlock: @@ -3852,8 +4045,8 @@ DILexicalBlock """""""""""""" ``DILexicalBlock`` nodes describe nested blocks within a :ref:`subprogram -`. The line number and column numbers are used to dinstinguish -two lexical blocks at same depth. They are valid targets for ``scope:`` +`. The line number and column numbers are used to distinguish +two lexical blocks at same depth. They are valid targets for ``scope:`` fields. .. code-block:: llvm @@ -3869,7 +4062,7 @@ DILexicalBlockFile """""""""""""""""" ``DILexicalBlockFile`` nodes are used to discriminate between sections of a -:ref:`lexical block `. The ``file:`` field can be changed to +:ref:`lexical block `. The ``file:`` field can be changed to indicate textual inclusion, or the ``discriminator:`` field can be used to discriminate between control flow within a single block in the source language. @@ -3884,7 +4077,7 @@ discriminate between control flow within a single block in the source language. DILocation """""""""" -``DILocation`` nodes represent source debug locations. The ``scope:`` field is +``DILocation`` nodes represent source debug locations. The ``scope:`` field is mandatory, and points at an :ref:`DILexicalBlockFile`, an :ref:`DILexicalBlock`, or an :ref:`DISubprogram`. @@ -3897,27 +4090,23 @@ mandatory, and points at an :ref:`DILexicalBlockFile`, an DILocalVariable """"""""""""""" -``DILocalVariable`` nodes represent local variables in the source language. -Instead of ``DW_TAG_variable``, they use LLVM-specific fake tags to -discriminate between local variables (``DW_TAG_auto_variable``) and subprogram -arguments (``DW_TAG_arg_variable``). In the latter case, the ``arg:`` field -specifies the argument position, and this variable will be included in the -``variables:`` field of its :ref:`DISubprogram`. +``DILocalVariable`` nodes represent local variables in the source language. If +the ``arg:`` field is set to non-zero, then this variable is a subprogram +parameter, and it will be included in the ``variables:`` field of its +:ref:`DISubprogram`. .. code-block:: llvm - !0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 0, - scope: !3, file: !2, line: 7, type: !3, - flags: DIFlagArtificial) - !1 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1, - scope: !4, file: !2, line: 7, type: !3) - !1 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", - scope: !5, file: !2, line: 7, type: !3) + !0 = !DILocalVariable(name: "this", arg: 1, scope: !3, file: !2, line: 7, + type: !3, flags: DIFlagArtificial) + !1 = !DILocalVariable(name: "x", arg: 2, scope: !4, file: !2, line: 7, + type: !3) + !2 = !DILocalVariable(name: "y", scope: !5, file: !2, line: 7, type: !3) DIExpression """""""""""" -``DIExpression`` nodes represent DWARF expression sequences. They are used in +``DIExpression`` nodes represent DWARF expression sequences. They are used in :ref:`debug intrinsics` (such as ``llvm.dbg.declare``) to describe how the referenced LLVM variable relates to the source language variable. @@ -3957,6 +4146,32 @@ compile unit. !2 = !DIImportedEntity(tag: DW_TAG_imported_module, name: "foo", scope: !0, entity: !1, line: 7) +DIMacro +""""""" + +``DIMacro`` nodes represent definition or undefinition of a macro identifiers. +The ``name:`` field is the macro identifier, followed by macro parameters when +definining a function-like macro, and the ``value`` field is the token-string +used to expand the macro identifier. + +.. code-block:: llvm + + !2 = !DIMacro(macinfo: DW_MACINFO_define, line: 7, name: "foo(x)", + value: "((x) + 1)") + !3 = !DIMacro(macinfo: DW_MACINFO_undef, line: 30, name: "foo") + +DIMacroFile +""""""""""" + +``DIMacroFile`` nodes represent inclusion of source files. +The ``nodes:`` field is a list of ``DIMacro`` and ``DIMacroFile`` nodes that +appear in the included source file. + +.. code-block:: llvm + + !2 = !DIMacroFile(macinfo: DW_MACINFO_start_file, line: 7, file: !2, + nodes: !3) + '``tbaa``' Metadata ^^^^^^^^^^^^^^^^^^^ @@ -4041,13 +4256,13 @@ alias. The metadata identifying each domain is itself a list containing one or two entries. The first entry is the name of the domain. Note that if the name is a -string then it can be combined accross functions and translation units. A +string then it can be combined across functions and translation units. A self-reference can be used to create globally unique domain names. A descriptive string may optionally be provided as a second list entry. The metadata identifying each scope is also itself a list containing two or three entries. The first entry is the name of the scope. Note that if the name -is a string then it can be combined accross functions and translation units. A +is a string then it can be combined across functions and translation units. A self-reference can be used to create globally unique scope names. A metadata reference to the scope's domain is the second entry. A descriptive string may optionally be provided as a third list entry. @@ -4144,6 +4359,16 @@ Examples: !2 = !{ i8 0, i8 2, i8 3, i8 6 } !3 = !{ i8 -2, i8 0, i8 3, i8 6 } +'``unpredictable``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``unpredictable`` metadata may be attached to any branch or switch +instruction. It can be used to express the unpredictability of control +flow. Similar to the llvm.expect intrinsic, it may be used to alter +optimizations related to compare and branch instructions. The metadata +is treated as a boolean value; if it exists, it signals that the branch +or switch that it is attached to is completely unpredictable. + '``llvm.loop``' ^^^^^^^^^^^^^^^ @@ -4182,11 +4407,11 @@ suggests an unroll factor to the loop unroller: Metadata prefixed with ``llvm.loop.vectorize`` or ``llvm.loop.interleave`` are used to control per-loop vectorization and interleaving parameters such as -vectorization width and interleave count. These metadata should be used in -conjunction with ``llvm.loop`` loop identification metadata. The +vectorization width and interleave count. These metadata should be used in +conjunction with ``llvm.loop`` loop identification metadata. The ``llvm.loop.vectorize`` and ``llvm.loop.interleave`` metadata are only optimization hints and the optimizer will only interleave and vectorize loops if -it believes it is safe to do so. The ``llvm.mem.parallel_loop_access`` metadata +it believes it is safe to do so. The ``llvm.mem.parallel_loop_access`` metadata which contains information about loop-carried memory dependencies can be helpful in determining the safety of these transformations. @@ -4203,7 +4428,7 @@ example: !0 = !{!"llvm.loop.interleave.count", i32 4} Note that setting ``llvm.loop.interleave.count`` to 1 disables interleaving -multiple iterations of the loop. If ``llvm.loop.interleave.count`` is set to 0 +multiple iterations of the loop. If ``llvm.loop.interleave.count`` is set to 0 then the interleave count will be determined automatically. '``llvm.loop.vectorize.enable``' Metadata @@ -4211,7 +4436,7 @@ then the interleave count will be determined automatically. This metadata selectively enables or disables vectorization for the loop. The first operand is the string ``llvm.loop.vectorize.enable`` and the second operand -is a bit. If the bit operand value is 1 vectorization is enabled. A value of +is a bit. If the bit operand value is 1 vectorization is enabled. A value of 0 disables vectorization: .. code-block:: llvm @@ -4231,7 +4456,7 @@ operand is an integer specifying the width. For example: !0 = !{!"llvm.loop.vectorize.width", i32 4} Note that setting ``llvm.loop.vectorize.width`` to 1 disables -vectorization of the loop. If ``llvm.loop.vectorize.width`` is set to +vectorization of the loop. If ``llvm.loop.vectorize.width`` is set to 0 or if the loop does not have this metadata the width will be determined automatically. @@ -4264,7 +4489,7 @@ will be partially unrolled. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This metadata disables loop unrolling. The metadata has a single operand -which is the string ``llvm.loop.unroll.disable``. For example: +which is the string ``llvm.loop.unroll.disable``. For example: .. code-block:: llvm @@ -4274,12 +4499,24 @@ which is the string ``llvm.loop.unroll.disable``. For example: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This metadata disables runtime loop unrolling. The metadata has a single -operand which is the string ``llvm.loop.unroll.runtime.disable``. For example: +operand which is the string ``llvm.loop.unroll.runtime.disable``. For example: .. code-block:: llvm !0 = !{!"llvm.loop.unroll.runtime.disable"} +'``llvm.loop.unroll.enable``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This metadata suggests that the loop should be fully unrolled if the trip count +is known at compile time and partially unrolled if the trip count is not known +at compile time. The metadata has a single operand which is the string +``llvm.loop.unroll.enable``. For example: + +.. code-block:: llvm + + !0 = !{!"llvm.loop.unroll.enable"} + '``llvm.loop.unroll.full``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -4319,7 +4556,7 @@ loop. Note that if not all memory access instructions have such metadata referring to the loop, then the loop is considered not being trivially parallel. Additional -memory dependence analysis is required to make that determination. As a fail +memory dependence analysis is required to make that determination. As a fail safe mechanism, this causes loops that were originally parallel to be considered sequential (if optimization passes that are unaware of the parallel semantics insert new memory instructions into the loop body). @@ -4380,6 +4617,50 @@ the loop identifier metadata node directly: The ``llvm.bitsets`` global metadata is used to implement :doc:`bitsets `. +'``invariant.group``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``invariant.group`` metadata may be attached to ``load``/``store`` instructions. +The existence of the ``invariant.group`` metadata on the instruction tells +the optimizer that every ``load`` and ``store`` to the same pointer operand +within the same invariant group can be assumed to load or store the same +value (but see the ``llvm.invariant.group.barrier`` intrinsic which affects +when two pointers are considered the same). + +Examples: + +.. code-block:: llvm + + @unknownPtr = external global i8 + ... + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + + %a = load i8, i8* %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change + call void @foo(i8* %ptr) + %b = load i8, i8* %ptr, !invariant.group !1 ; Can't assume anything, because group changed + + %newPtr = call i8* @getPointer(i8* %ptr) + %c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr + + %unknownValue = load i8, i8* @unknownPtr + store i8 %unknownValue, i8* %ptr, !invariant.group !0 ; Can assume that %unknownValue == 42 + + call void @foo(i8* %ptr) + %newPtr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %d = load i8, i8* %newPtr2, !invariant.group !0 ; Can't step through invariant.group.barrier to get value of %ptr + + ... + declare void @foo(i8*) + declare i8* @getPointer(i8*) + declare i8* @llvm.invariant.group.barrier(i8*) + + !0 = !{!"magic ptr"} + !1 = !{!"other ptr"} + + + Module Flags Metadata ===================== @@ -4738,7 +5019,10 @@ control flow, not values (the one exception being the The terminator instructions are: ':ref:`ret `', ':ref:`br `', ':ref:`switch `', ':ref:`indirectbr `', ':ref:`invoke `', -':ref:`resume `', and ':ref:`unreachable `'. +':ref:`resume `', ':ref:`catchswitch `', +':ref:`catchret `', +':ref:`cleanupret `', +and ':ref:`unreachable `'. .. _i_ret: @@ -4970,7 +5254,7 @@ Syntax: :: = invoke [cconv] [ret attrs] () [fn attrs] - to label unwind label + [operand bundles] to label unwind label Overview: """"""""" @@ -5024,6 +5308,7 @@ This instruction requires several arguments: #. The optional :ref:`function attributes ` list. Only '``noreturn``', '``nounwind``', '``readonly``' and '``readnone``' attributes are valid here. +#. The optional :ref:`operand bundles ` list. Semantics: """""""""" @@ -5092,6 +5377,235 @@ Example: resume { i8*, i32 } %exn +.. _i_catchswitch: + +'``catchswitch``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = catchswitch within [ label , label , ... ] unwind to caller + = catchswitch within [ label , label , ... ] unwind label + +Overview: +""""""""" + +The '``catchswitch``' instruction is used by `LLVM's exception handling system +`_ to describe the set of possible catch handlers +that may be executed by the :ref:`EH personality routine `. + +Arguments: +"""""""""" + +The ``parent`` argument is the token of the funclet that contains the +``catchswitch`` instruction. If the ``catchswitch`` is not inside a funclet, +this operand may be the token ``none``. + +The ``default`` argument is the label of another basic block beginning with a +"pad" instruction, one of ``cleanuppad`` or ``catchswitch``. + +The ``handlers`` are a list of successor blocks that each begin with a +:ref:`catchpad ` instruction. + +Semantics: +"""""""""" + +Executing this instruction transfers control to one of the successors in +``handlers``, if appropriate, or continues to unwind via the unwind label if +present. + +The ``catchswitch`` is both a terminator and a "pad" instruction, meaning that +it must be both the first non-phi instruction and last instruction in the basic +block. Therefore, it must be the only non-phi instruction in the block. + +Example: +"""""""" + +.. code-block:: llvm + + dispatch1: + %cs1 = catchswitch within none [label %handler0, label %handler1] unwind to caller + dispatch2: + %cs2 = catchswitch within %parenthandler [label %handler0] unwind label %cleanup + +.. _i_catchpad: + +'``catchpad``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = catchpad within [*] + +Overview: +""""""""" + +The '``catchpad``' instruction is used by `LLVM's exception handling +system `_ to specify that a basic block +begins a catch handler --- one where a personality routine attempts to transfer +control to catch an exception. + +Arguments: +"""""""""" + +The ``catchswitch`` operand must always be a token produced by a +:ref:`catchswitch ` instruction in a predecessor block. This +ensures that each ``catchpad`` has exactly one predecessor block, and it always +terminates in a ``catchswitch``. + +The ``args`` correspond to whatever information the personality routine +requires to know if this is an appropriate handler for the exception. Control +will transfer to the ``catchpad`` if this is the first appropriate handler for +the exception. + +The ``resultval`` has the type :ref:`token ` and is used to match the +``catchpad`` to corresponding :ref:`catchrets ` and other nested EH +pads. + +Semantics: +"""""""""" + +When the call stack is being unwound due to an exception being thrown, the +exception is compared against the ``args``. If it doesn't match, control will +not reach the ``catchpad`` instruction. The representation of ``args`` is +entirely target and personality function-specific. + +Like the :ref:`landingpad ` instruction, the ``catchpad`` +instruction must be the first non-phi of its parent basic block. + +The meaning of the tokens produced and consumed by ``catchpad`` and other "pad" +instructions is described in the +`Windows exception handling documentation `. + +Executing a ``catchpad`` instruction constitutes "entering" that pad. +The pad may then be "exited" in one of three ways: + +1) explicitly via a ``catchret`` that consumes it. Executing such a ``catchret`` + is undefined behavior if any descendant pads have been entered but not yet + exited. +2) implicitly via a call (which unwinds all the way to the current function's caller), + or via a ``catchswitch`` or a ``cleanupret`` that unwinds to caller. +3) implicitly via an unwind edge whose destination EH pad isn't a descendant of + the ``catchpad``. When the ``catchpad`` is exited in this manner, it is + undefined behavior if the destination EH pad has a parent which is not an + ancestor of the ``catchpad`` being exited. + +Example: +"""""""" + +.. code-block:: llvm + + dispatch: + %cs = catchswitch within none [label %handler0] unwind to caller + ;; A catch block which can catch an integer. + handler0: + %tok = catchpad within %cs [i8** @_ZTIi] + +.. _i_catchret: + +'``catchret``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + catchret from to label + +Overview: +""""""""" + +The '``catchret``' instruction is a terminator instruction that has a +single successor. + + +Arguments: +"""""""""" + +The first argument to a '``catchret``' indicates which ``catchpad`` it +exits. It must be a :ref:`catchpad `. +The second argument to a '``catchret``' specifies where control will +transfer to next. + +Semantics: +"""""""""" + +The '``catchret``' instruction ends an existing (in-flight) exception whose +unwinding was interrupted with a :ref:`catchpad ` instruction. The +:ref:`personality function ` gets a chance to execute arbitrary +code to, for example, destroy the active exception. Control then transfers to +``normal``. + +The ``token`` argument must be a token produced by a dominating ``catchpad`` +instruction. The ``catchret`` destroys the physical frame established by +``catchpad``, so executing multiple returns on the same token without +re-executing the ``catchpad`` will result in undefined behavior. +See :ref:`catchpad ` for more details. + +Example: +"""""""" + +.. code-block:: llvm + + catchret from %catch label %continue + +.. _i_cleanupret: + +'``cleanupret``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + cleanupret from unwind label + cleanupret from unwind to caller + +Overview: +""""""""" + +The '``cleanupret``' instruction is a terminator instruction that has +an optional successor. + + +Arguments: +"""""""""" + +The '``cleanupret``' instruction requires one argument, which indicates +which ``cleanuppad`` it exits, and must be a :ref:`cleanuppad `. +It also has an optional successor, ``continue``. + +Semantics: +"""""""""" + +The '``cleanupret``' instruction indicates to the +:ref:`personality function ` that one +:ref:`cleanuppad ` it transferred control to has ended. +It transfers control to ``continue`` or unwinds out of the function. + +The unwind destination ``continue``, if present, must be an EH pad +whose parent is either ``none`` or an ancestor of the ``cleanuppad`` +being returned from. This constitutes an exceptional exit from all +ancestors of the completed ``cleanuppad``, up to but not including +the parent of ``continue``. +See :ref:`cleanuppad ` for more details. + +Example: +"""""""" + +.. code-block:: llvm + + cleanupret from %cleanup unwind to caller + cleanupret from %cleanup unwind label %continue + .. _i_unreachable: '``unreachable``' Instruction @@ -6165,7 +6679,7 @@ Arguments: """""""""" The first operand of an '``extractvalue``' instruction is a value of -:ref:`struct ` or :ref:`array ` type. The operands are +:ref:`struct ` or :ref:`array ` type. The other operands are constant indices to specify which value to extract in a similar manner as indices in a '``getelementptr``' instruction. @@ -6312,9 +6826,11 @@ Syntax: :: - = load [volatile] , * [, align ][, !nontemporal !][, !invariant.load !][, !nonnull !][, !dereferenceable !][, !dereferenceable_or_null !] - = load atomic [volatile] * [singlethread] , align + = load [volatile] , * [, align ][, !nontemporal !][, !invariant.load !][, !invariant.group !][, !nonnull !][, !dereferenceable !][, !dereferenceable_or_null !][, !align !] + = load atomic [volatile] * [singlethread] , align [, !invariant.group !] ! = !{ i32 1 } + ! = !{i64 } + ! = !{ i64 } Overview: """"""""" @@ -6331,17 +6847,16 @@ then the optimizer is not allowed to modify the number or order of execution of this ``load`` with other :ref:`volatile operations `. -If the ``load`` is marked as ``atomic``, it takes an extra -:ref:`ordering ` and optional ``singlethread`` argument. The -``release`` and ``acq_rel`` orderings are not valid on ``load`` -instructions. Atomic loads produce :ref:`defined ` results -when they may see multiple atomic stores. The type of the pointee must -be an integer type whose bit width is a power of two greater than or -equal to eight and less than or equal to a target-specific size limit. -``align`` must be explicitly specified on atomic loads, and the load has -undefined behavior if the alignment is not set to a value which is at -least the size in bytes of the pointee. ``!nontemporal`` does not have -any defined semantics for atomic loads. +If the ``load`` is marked as ``atomic``, it takes an extra :ref:`ordering +` and optional ``singlethread`` argument. The ``release`` and +``acq_rel`` orderings are not valid on ``load`` instructions. Atomic loads +produce :ref:`defined ` results when they may see multiple atomic +stores. The type of the pointee must be an integer, pointer, or floating-point +type whose bit width is a power of two greater than or equal to eight and less +than or equal to a target-specific size limit. ``align`` must be explicitly +specified on atomic loads, and the load has undefined behavior if the alignment +is not set to a value which is at least the size in bytes of the +pointee. ``!nontemporal`` does not have any defined semantics for atomic loads. The optional constant ``align`` argument specifies the alignment of the operation (that is, the alignment of the memory address). A value of 0 @@ -6369,33 +6884,44 @@ Being invariant does not imply that a location is dereferenceable, but it does imply that once the location is known dereferenceable its value is henceforth unchanging. +The optional ``!invariant.group`` metadata must reference a single metadata name + ```` corresponding to a metadata node. See ``invariant.group`` metadata. + The optional ``!nonnull`` metadata must reference a single metadata name ```` corresponding to a metadata node with no entries. The existence of the ``!nonnull`` metadata on the instruction tells the optimizer that the value loaded is known to -never be null. This is analogous to the ''nonnull'' attribute -on parameters and return values. This metadata can only be applied +never be null. This is analogous to the ``nonnull`` attribute +on parameters and return values. This metadata can only be applied to loads of a pointer type. -The optional ``!dereferenceable`` metadata must reference a single -metadata name ```` corresponding to a metadata node with one ``i64`` -entry. The existence of the ``!dereferenceable`` metadata on the instruction +The optional ``!dereferenceable`` metadata must reference a single metadata +name ```` corresponding to a metadata node with one ``i64`` +entry. The existence of the ``!dereferenceable`` metadata on the instruction tells the optimizer that the value loaded is known to be dereferenceable. -The number of bytes known to be dereferenceable is specified by the integer -value in the metadata node. This is analogous to the ''dereferenceable'' -attribute on parameters and return values. This metadata can only be applied +The number of bytes known to be dereferenceable is specified by the integer +value in the metadata node. This is analogous to the ''dereferenceable'' +attribute on parameters and return values. This metadata can only be applied to loads of a pointer type. The optional ``!dereferenceable_or_null`` metadata must reference a single -metadata name ```` corresponding to a metadata node with one ``i64`` -entry. The existence of the ``!dereferenceable_or_null`` metadata on the +metadata name ```` corresponding to a metadata node with one +``i64`` entry. The existence of the ``!dereferenceable_or_null`` metadata on the instruction tells the optimizer that the value loaded is known to be either dereferenceable or null. -The number of bytes known to be dereferenceable is specified by the integer -value in the metadata node. This is analogous to the ''dereferenceable_or_null'' -attribute on parameters and return values. This metadata can only be applied +The number of bytes known to be dereferenceable is specified by the integer +value in the metadata node. This is analogous to the ''dereferenceable_or_null'' +attribute on parameters and return values. This metadata can only be applied to loads of a pointer type. +The optional ``!align`` metadata must reference a single metadata name +```` corresponding to a metadata node with one ``i64`` entry. +The existence of the ``!align`` metadata on the instruction tells the +optimizer that the value loaded is known to be aligned to a boundary specified +by the integer value in the metadata node. The alignment must be a power of 2. +This is analogous to the ''align'' attribute on parameters and return values. +This metadata can only be applied to loads of a pointer type. + Semantics: """""""""" @@ -6426,8 +6952,8 @@ Syntax: :: - store [volatile] , * [, align ][, !nontemporal !] ; yields void - store atomic [volatile] , * [singlethread] , align ; yields void + store [volatile] , * [, align ][, !nontemporal !][, !invariant.group !] ; yields void + store atomic [volatile] , * [singlethread] , align [, !invariant.group !] ; yields void Overview: """"""""" @@ -6445,17 +6971,16 @@ then the optimizer is not allowed to modify the number or order of execution of this ``store`` with other :ref:`volatile operations `. -If the ``store`` is marked as ``atomic``, it takes an extra -:ref:`ordering ` and optional ``singlethread`` argument. The -``acquire`` and ``acq_rel`` orderings aren't valid on ``store`` -instructions. Atomic loads produce :ref:`defined ` results -when they may see multiple atomic stores. The type of the pointee must -be an integer type whose bit width is a power of two greater than or -equal to eight and less than or equal to a target-specific size limit. -``align`` must be explicitly specified on atomic stores, and the store -has undefined behavior if the alignment is not set to a value which is -at least the size in bytes of the pointee. ``!nontemporal`` does not -have any defined semantics for atomic stores. +If the ``store`` is marked as ``atomic``, it takes an extra :ref:`ordering +` and optional ``singlethread`` argument. The ``acquire`` and +``acq_rel`` orderings aren't valid on ``store`` instructions. Atomic loads +produce :ref:`defined ` results when they may see multiple atomic +stores. The type of the pointee must be an integer, pointer, or floating-point +type whose bit width is a power of two greater than or equal to eight and less +than or equal to a target-specific size limit. ``align`` must be explicitly +specified on atomic stores, and the store has undefined behavior if the +alignment is not set to a value which is at least the size in bytes of the +pointee. ``!nontemporal`` does not have any defined semantics for atomic stores. The optional constant ``align`` argument specifies the alignment of the operation (that is, the alignment of the memory address). A value of 0 @@ -6474,6 +6999,9 @@ be reused in the cache. The code generator may select special instructions to save cache bandwidth, such as the MOVNT instruction on x86. +The optional ``!invariant.group`` metadata must reference a +single metadata name ````. See ``invariant.group`` metadata. + Semantics: """""""""" @@ -6869,15 +7397,15 @@ will be effectively broadcast into a vector during address calculation. ; All arguments are vectors: ; A[i] = ptrs[i] + offsets[i]*sizeof(i8) %A = getelementptr i8, <4 x i8*> %ptrs, <4 x i64> %offsets - + ; Add the same scalar offset to each pointer of a vector: ; A[i] = ptrs[i] + offset*sizeof(i8) %A = getelementptr i8, <4 x i8*> %ptrs, i64 %offset - + ; Add distinct offsets to the same pointer: ; A[i] = ptr + offsets[i]*sizeof(i8) %A = getelementptr i8, i8* %ptr, <4 x i64> %offsets - + ; In all cases described above the type of the result is <4 x i8*> The two following instructions are equivalent: @@ -6889,7 +7417,7 @@ The two following instructions are equivalent: <4 x i32> , <4 x i32> %ind4, <4 x i64> - + getelementptr %struct.ST, <4 x %struct.ST*> %s, <4 x i64> %ind1, i32 2, i32 1, <4 x i32> %ind4, i64 13 @@ -7068,10 +7596,12 @@ implies that ``fptrunc`` cannot be used to make a *no-op cast*. Semantics: """""""""" -The '``fptrunc``' instruction truncates a ``value`` from a larger +The '``fptrunc``' instruction casts a ``value`` from a larger :ref:`floating point ` type to a smaller :ref:`floating -point ` type. If the value cannot fit within the -destination type, ``ty2``, then the results are undefined. +point ` type. If the value cannot fit (i.e. overflows) within the +destination type, ``ty2``, then the results are undefined. If the cast produces +an inexact result, how rounding is performed (e.g. truncation, also known as +round to zero) is undefined. Example: """""""" @@ -7403,7 +7933,7 @@ The '``bitcast``' instruction takes a value to cast, which must be a non-aggregate first class value, and a type to cast it to, which must also be a non-aggregate :ref:`first class ` type. The bit sizes of ``value`` and the destination type, ``ty2``, must be -identical. If the source type is a pointer, the destination type must +identical. If the source type is a pointer, the destination type must also be a pointer of the same size. This instruction supports bitwise conversion of vectors to integers and to vectors of other types (as long as they have the same size). @@ -7800,7 +8330,8 @@ Syntax: :: - = [tail | musttail] call [cconv] [ret attrs] [*] () [fn attrs] + = [tail | musttail | notail ] call [fast-math flags] [cconv] [ret attrs] [*] () [fn attrs] + [ operand bundles ] Overview: """"""""" @@ -7813,10 +8344,10 @@ Arguments: This instruction requires several arguments: #. The optional ``tail`` and ``musttail`` markers indicate that the optimizers - should perform tail call optimization. The ``tail`` marker is a hint that - `can be ignored `_. The ``musttail`` marker + should perform tail call optimization. The ``tail`` marker is a hint that + `can be ignored `_. The ``musttail`` marker means that the call must be tail call optimized in order for the program to - be correct. The ``musttail`` marker provides these guarantees: + be correct. The ``musttail`` marker provides these guarantees: #. The call will not cause unbounded stack growth if it is part of a recursive cycle in the call graph. @@ -7824,14 +8355,14 @@ This instruction requires several arguments: forwarded in place. Both markers imply that the callee does not access allocas or varargs from - the caller. Calls marked ``musttail`` must obey the following additional + the caller. Calls marked ``musttail`` must obey the following additional rules: - The call must immediately precede a :ref:`ret ` instruction, or a pointer bitcast followed by a ret instruction. - The ret instruction must return the (possibly bitcasted) value produced by the call or void. - - The caller and callee prototypes must match. Pointer types of + - The caller and callee prototypes must match. Pointer types of parameters or return types may differ in pointee type, but not in address space. - The calling conventions of the caller and callee must match. @@ -7852,6 +8383,15 @@ This instruction requires several arguments: - `Platform-specific constraints are met. `_ +#. The optional ``notail`` marker indicates that the optimizers should not add + ``tail`` or ``musttail`` markers to the call. It is used to prevent tail + call optimization from being performed on the call. + +#. The optional ``fast-math flags`` marker indicates that the call has one or more + :ref:`fast-math flags `, which are optimization hints to enable + otherwise unsafe floating-point optimizations. Fast-math flags are only valid + for calls that return a floating-point scalar or vector type. + #. The optional "cconv" marker indicates which :ref:`calling convention ` the call should use. If none is specified, the call defaults to using C calling conventions. The @@ -7880,6 +8420,7 @@ This instruction requires several arguments: #. The optional :ref:`function attributes ` list. Only '``noreturn``', '``nounwind``', '``readonly``' and '``readnone``' attributes are valid here. +#. The optional :ref:`operand bundles ` list. Semantics: """""""""" @@ -8049,6 +8590,84 @@ Example: catch i8** @_ZTIi filter [1 x i8**] [@_ZTId] +.. _i_cleanuppad: + +'``cleanuppad``' Instruction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + = cleanuppad within [*] + +Overview: +""""""""" + +The '``cleanuppad``' instruction is used by `LLVM's exception handling +system `_ to specify that a basic block +is a cleanup block --- one where a personality routine attempts to +transfer control to run cleanup actions. +The ``args`` correspond to whatever additional +information the :ref:`personality function ` requires to +execute the cleanup. +The ``resultval`` has the type :ref:`token ` and is used to +match the ``cleanuppad`` to corresponding :ref:`cleanuprets `. +The ``parent`` argument is the token of the funclet that contains the +``cleanuppad`` instruction. If the ``cleanuppad`` is not inside a funclet, +this operand may be the token ``none``. + +Arguments: +"""""""""" + +The instruction takes a list of arbitrary values which are interpreted +by the :ref:`personality function `. + +Semantics: +"""""""""" + +When the call stack is being unwound due to an exception being thrown, +the :ref:`personality function ` transfers control to the +``cleanuppad`` with the aid of the personality-specific arguments. +As with calling conventions, how the personality function results are +represented in LLVM IR is target specific. + +The ``cleanuppad`` instruction has several restrictions: + +- A cleanup block is a basic block which is the unwind destination of + an exceptional instruction. +- A cleanup block must have a '``cleanuppad``' instruction as its + first non-PHI instruction. +- There can be only one '``cleanuppad``' instruction within the + cleanup block. +- A basic block that is not a cleanup block may not include a + '``cleanuppad``' instruction. + +Executing a ``cleanuppad`` instruction constitutes "entering" that pad. +The pad may then be "exited" in one of three ways: + +1) explicitly via a ``cleanupret`` that consumes it. Executing such a ``cleanupret`` + is undefined behavior if any descendant pads have been entered but not yet + exited. +2) implicitly via a call (which unwinds all the way to the current function's caller), + or via a ``catchswitch`` or a ``cleanupret`` that unwinds to caller. +3) implicitly via an unwind edge whose destination EH pad isn't a descendant of + the ``cleanuppad``. When the ``cleanuppad`` is exited in this manner, it is + undefined behavior if the destination EH pad has a parent which is not an + ancestor of the ``cleanuppad`` being exited. + +It is undefined behavior for the ``cleanuppad`` to exit via an unwind edge which +does not transitively unwind to the same destination as a constituent +``cleanupret``. + +Example: +"""""""" + +.. code-block:: llvm + + %tok = cleanuppad within %cs [] + .. _intrinsics: Intrinsic Functions @@ -8265,11 +8884,11 @@ Experimental Statepoint Intrinsics ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ LLVM provides an second experimental set of intrinsics for describing garbage -collection safepoints in compiled code. These intrinsics are an alternative +collection safepoints in compiled code. These intrinsics are an alternative to the ``llvm.gcroot`` intrinsics, but are compatible with the ones for -:ref:`read ` and :ref:`write ` barriers. The +:ref:`read ` and :ref:`write ` barriers. The differences in approach are covered in the `Garbage Collection with LLVM -`_ documentation. The intrinsics themselves are +`_ documentation. The intrinsics themselves are described in :doc:`Statepoints`. .. _int_gcroot: @@ -8613,6 +9232,48 @@ Semantics: See the description for :ref:`llvm.stacksave `. +.. _int_get_dynamic_area_offset: + +'``llvm.get.dynamic.area.offset``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i32 @llvm.get.dynamic.area.offset.i32() + declare i64 @llvm.get.dynamic.area.offset.i64() + + Overview: + """"""""" + + The '``llvm.get.dynamic.area.offset.*``' intrinsic family is used to + get the offset from native stack pointer to the address of the most + recent dynamic alloca on the caller's stack. These intrinsics are + intendend for use in combination with + :ref:`llvm.stacksave ` to get a + pointer to the most recent dynamic alloca. This is useful, for example, + for AddressSanitizer's stack unpoisoning routines. + +Semantics: +"""""""""" + + These intrinsics return a non-negative integer value that can be used to + get the address of the most recent dynamic alloca, allocated by :ref:`alloca ` + on the caller's stack. In particular, for targets where stack grows downwards, + adding this offset to the native stack pointer would get the address of the most + recent dynamic alloca. For targets where stack grows upwards, the situation is a bit more + complicated, because substracting this value from stack pointer would get the address + one past the end of the most recent dynamic alloca. + + Although for most targets `llvm.get.dynamic.area.offset ` + returns just a zero, for others, such as PowerPC and PowerPC64, it returns a + compile-time-known constant value. + + The return value type of :ref:`llvm.get.dynamic.area.offset ` + must match the target's generic address space's (address space 0) pointer type. + '``llvm.prefetch``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -8791,6 +9452,55 @@ structures and the code to increment the appropriate value, in a format that can be written out by a compiler runtime and consumed via the ``llvm-profdata`` tool. +'``llvm.instrprof_value_profile``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.instrprof_value_profile(i8* , i64 , + i64 , i32 , + i32 ) + +Overview: +""""""""" + +The '``llvm.instrprof_value_profile``' intrinsic can be emitted by a +frontend for use with instrumentation based profiling. This will be +lowered by the ``-instrprof`` pass to find out the target values, +instrumented expressions take in a program at runtime. + +Arguments: +"""""""""" + +The first argument is a pointer to a global variable containing the +name of the entity being instrumented. ``name`` should generally be the +(mangled) function name for a set of counters. + +The second argument is a hash value that can be used by the consumer +of the profile data to detect changes to the instrumented source. It +is an error if ``hash`` differs between two instances of +``llvm.instrprof_*`` that refer to the same name. + +The third argument is the value of the expression being profiled. The profiled +expression's value should be representable as an unsigned 64-bit value. The +fourth argument represents the kind of value profiling that is being done. The +supported value profiling kinds are enumerated through the +``InstrProfValueKind`` type declared in the +```` header file. The last argument is the +index of the instrumented expression within ``name``. It should be >= 0. + +Semantics: +"""""""""" + +This intrinsic represents the point where a call to a runtime routine +should be inserted for value profiling of target expressions. ``-instrprof`` +pass will generate the appropriate data structures and replace the +``llvm.instrprof_value_profile`` intrinsic with the call to the profile +runtime library with proper arguments. + Standard C Library Intrinsics ----------------------------- @@ -9734,6 +10444,34 @@ Bit Manipulation Intrinsics LLVM provides intrinsics for a few important bit manipulation operations. These allow efficient code generation for some algorithms. +'``llvm.bitreverse.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic function. You can use bitreverse on any +integer type. + +:: + + declare i16 @llvm.bitreverse.i16(i16 ) + declare i32 @llvm.bitreverse.i32(i32 ) + declare i64 @llvm.bitreverse.i64(i64 ) + +Overview: +""""""""" + +The '``llvm.bitreverse``' family of intrinsics is used to reverse the +bitpattern of an integer value; for example ``0b1234567`` becomes +``0b7654321``. + +Semantics: +"""""""""" + +The ``llvm.bitreverse.iN`` intrinsic returns an i16 value that has bit +``M`` in the input moved to bit ``N-M`` in the output. + '``llvm.bswap.*``' Intrinsics ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10225,23 +10963,23 @@ Overview: """"""""" The '``llvm.canonicalize.*``' intrinsic returns the platform specific canonical -encoding of a floating point number. This canonicalization is useful for +encoding of a floating point number. This canonicalization is useful for implementing certain numeric primitives such as frexp. The canonical encoding is defined by IEEE-754-2008 to be: :: 2.1.8 canonical encoding: The preferred encoding of a floating-point - representation in a format. Applied to declets, significands of finite + representation in a format. Applied to declets, significands of finite numbers, infinities, and NaNs, especially in decimal formats. This operation can also be considered equivalent to the IEEE-754-2008 -conversion of a floating-point value to the same format. NaNs are handled +conversion of a floating-point value to the same format. NaNs are handled according to section 6.2. Examples of non-canonical encodings: -- x87 pseudo denormals, pseudo NaNs, pseudo Infinity, Unnormals. These are +- x87 pseudo denormals, pseudo NaNs, pseudo Infinity, Unnormals. These are converted to a canonical representation per hardware-specific protocol. - Many normal decimal floating point numbers have non-canonical alternative encodings. @@ -10254,11 +10992,11 @@ default exception handling must signal an invalid exception, and produce a quiet NaN result. This function should always be implementable as multiplication by 1.0, provided -that the compiler does not constant fold the operation. Likewise, division by -1.0 and ``llvm.minnum(x, x)`` are possible implementations. Addition with +that the compiler does not constant fold the operation. Likewise, division by +1.0 and ``llvm.minnum(x, x)`` are possible implementations. Addition with -0.0 is also sufficient provided that the rounding mode is not -Infinity. -``@llvm.canonicalize`` must preserve the equality relation. That is: +``@llvm.canonicalize`` must preserve the equality relation. That is: - ``(@llvm.canonicalize(x) == x)`` is equivalent to ``(x == x)`` - ``(@llvm.canonicalize(x) == @llvm.canonicalize(y))`` is equivalent to @@ -10269,15 +11007,15 @@ Additionally, the sign of zero must be conserved: The payload bits of a NaN must be conserved, with two exceptions. First, environments which use only a single canonical representation of NaN -must perform said canonicalization. Second, SNaNs must be quieted per the +must perform said canonicalization. Second, SNaNs must be quieted per the usual methods. The canonicalization operation may be optimized away if: -- The input is known to be canonical. For example, it was produced by a +- The input is known to be canonical. For example, it was produced by a floating-point operation that is required by the standard to be canonical. - The result is consumed only by (or fused with) other floating-point - operations. That is, the bits of the floating point value are not examined. + operations. That is, the bits of the floating point value are not examined. '``llvm.fmuladd.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10566,12 +11304,16 @@ LLVM provides intrinsics for predicated vector load and store operations. The pr Syntax: """"""" -This is an overloaded intrinsic. The loaded data is a vector of any integer or floating point data type. +This is an overloaded intrinsic. The loaded data is a vector of any integer, floating point or pointer data type. :: - declare <16 x float> @llvm.masked.load.v16f32 (<16 x float>* , i32 , <16 x i1> , <16 x float> ) - declare <2 x double> @llvm.masked.load.v2f64 (<2 x double>* , i32 , <2 x i1> , <2 x double> ) + declare <16 x float> @llvm.masked.load.v16f32 (<16 x float>* , i32 , <16 x i1> , <16 x float> ) + declare <2 x double> @llvm.masked.load.v2f64 (<2 x double>* , i32 , <2 x i1> , <2 x double> ) + ;; The data is a vector of pointers to double + declare <8 x double*> @llvm.masked.load.v8p0f64 (<8 x double*>* , i32 , <8 x i1> , <8 x double*> ) + ;; The data is a vector of function pointers + declare <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f (<8 x i32 ()*>* , i32 , <8 x i1> , <8 x i32 ()*> ) Overview: """"""""" @@ -10607,12 +11349,16 @@ The result of this operation is equivalent to a regular vector load instruction Syntax: """"""" -This is an overloaded intrinsic. The data stored in memory is a vector of any integer or floating point data type. +This is an overloaded intrinsic. The data stored in memory is a vector of any integer, floating point or pointer data type. :: - declare void @llvm.masked.store.v8i32 (<8 x i32> , <8 x i32> * , i32 , <8 x i1> ) - declare void @llvm.masked.store.v16f32(<16 x i32> , <16 x i32>* , i32 , <16 x i1> ) + declare void @llvm.masked.store.v8i32 (<8 x i32> , <8 x i32>* , i32 , <8 x i1> ) + declare void @llvm.masked.store.v16f32 (<16 x float> , <16 x float>* , i32 , <16 x i1> ) + ;; The data is a vector of pointers to double + declare void @llvm.masked.store.v8p0f64 (<8 x double*> , <8 x double*>* , i32 , <8 x i1> ) + ;; The data is a vector of function pointers + declare void @llvm.masked.store.v4p0f_i32f (<4 x i32 ()*> , <4 x i32 ()*>* , i32 , <4 x i1> ) Overview: """"""""" @@ -10653,12 +11399,13 @@ LLVM provides intrinsics for vector gather and scatter operations. They are simi Syntax: """"""" -This is an overloaded intrinsic. The loaded data are multiple scalar values of any integer or floating point data type gathered together into one vector. +This is an overloaded intrinsic. The loaded data are multiple scalar values of any integer, floating point or pointer data type gathered together into one vector. :: - declare <16 x float> @llvm.masked.gather.v16f32 (<16 x float*> , i32 , <16 x i1> , <16 x float> ) - declare <2 x double> @llvm.masked.gather.v2f64 (<2 x double*> , i32 , <2 x i1> , <2 x double> ) + declare <16 x float> @llvm.masked.gather.v16f32 (<16 x float*> , i32 , <16 x i1> , <16 x float> ) + declare <2 x double> @llvm.masked.gather.v2f64 (<2 x double*> , i32 , <2 x i1> , <2 x double> ) + declare <8 x float*> @llvm.masked.gather.v8p0f32 (<8 x float**> , i32 , <8 x i1> , <8 x float*> ) Overview: """"""""" @@ -10706,12 +11453,13 @@ The semantics of this operation are equivalent to a sequence of conditional scal Syntax: """"""" -This is an overloaded intrinsic. The data stored in memory is a vector of any integer or floating point data type. Each vector element is stored in an arbitrary memory addresses. Scatter with overlapping addresses is guaranteed to be ordered from least-significant to most-significant element. +This is an overloaded intrinsic. The data stored in memory is a vector of any integer, floating point or pointer data type. Each vector element is stored in an arbitrary memory address. Scatter with overlapping addresses is guaranteed to be ordered from least-significant to most-significant element. :: - declare void @llvm.masked.scatter.v8i32 (<8 x i32> , <8 x i32*> , i32 , <8 x i1> ) - declare void @llvm.masked.scatter.v16f32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> ) + declare void @llvm.masked.scatter.v8i32 (<8 x i32> , <8 x i32*> , i32 , <8 x i1> ) + declare void @llvm.masked.scatter.v16f32 (<16 x float> , <16 x float*> , i32 , <16 x i1> ) + declare void @llvm.masked.scatter.v4p0f64 (<4 x double*> , <4 x double**> , i32 , <4 x i1> ) Overview: """"""""" @@ -10727,7 +11475,7 @@ The first operand is a vector value to be written to memory. The second operand Semantics: """""""""" -The '``llvm.masked.scatter``' intrinsics is designed for writing selected vector elements to arbitrary memory addresses in a single IR operation. The operation may be conditional, when not all bits in the mask are switched on. It is useful for targets that support vector masked scatter and allows vectorizing basic blocks with data and control divergency. Other targets may support this intrinsic differently, for example by lowering it into a sequence of branches that guard scalar store operations. +The '``llvm.masked.scatter``' intrinsics is designed for writing selected vector elements to arbitrary memory addresses in a single IR operation. The operation may be conditional, when not all bits in the mask are switched on. It is useful for targets that support vector masked scatter and allows vectorizing basic blocks with data and control divergence. Other targets may support this intrinsic differently, for example by lowering it into a sequence of branches that guard scalar store operations. :: @@ -10881,6 +11629,36 @@ Semantics: This intrinsic indicates that the memory is mutable again. +'``llvm.invariant.group.barrier``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i8* @llvm.invariant.group.barrier(i8* ) + +Overview: +""""""""" + +The '``llvm.invariant.group.barrier``' intrinsic can be used when an invariant +established by invariant.group metadata no longer holds, to obtain a new pointer +value that does not carry the invariant information. + + +Arguments: +"""""""""" + +The ``llvm.invariant.group.barrier`` takes only one argument, which is +the pointer to the memory for which the ``invariant.group`` no longer holds. + +Semantics: +"""""""""" + +Returns another pointer that aliases its argument but which is considered different +for the purposes of ``load``/``store`` ``invariant.group`` metadata. + General Intrinsics ------------------ @@ -11253,7 +12031,7 @@ Arguments: """""""""" The first argument is a pointer to be tested. The second argument is a -metadata string containing the name of a :doc:`bitset `. +metadata object representing an identifier for a :doc:`bitset `. Overview: """"""""" diff --git a/docs/LibFuzzer.rst b/docs/LibFuzzer.rst index 1ac75a406985..84adff3616f7 100644 --- a/docs/LibFuzzer.rst +++ b/docs/LibFuzzer.rst @@ -21,7 +21,8 @@ This library is intended primarily for in-process coverage-guided fuzz testing optimizations options (e.g. -O0, -O1, -O2) to diversify testing. * Build a test driver using the same options as the library. The test driver is a C/C++ file containing interesting calls to the library - inside a single function ``extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);`` + inside a single function ``extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);``. + Currently, the only expected return value is 0, others are reserved for future. * Link the Fuzzer, the library and the driver together into an executable using the same sanitizer options as for the library. * Collect the initial corpus of inputs for the @@ -60,14 +61,18 @@ The most important flags are:: cross_over 1 If 1, cross over inputs. mutate_depth 5 Apply this number of consecutive mutations to each input. timeout 1200 Timeout in seconds (if positive). If one unit runs more than this number of seconds the process will abort. + max_total_time 0 If positive, indicates the maximal total time in seconds to run the fuzzer. help 0 Print help. - save_minimized_corpus 0 If 1, the minimized corpus is saved into the first input directory + merge 0 If 1, the 2-nd, 3-rd, etc corpora will be merged into the 1-st corpus. Only interesting units will be taken. jobs 0 Number of jobs to run. If jobs >= 1 we spawn this number of jobs in separate worker processes with stdout/stderr redirected to fuzz-JOB.log. workers 0 Number of simultaneous worker processes to run the jobs. If zero, "min(jobs,NumberOfCpuCores()/2)" is used. - tokens 0 Use the file with tokens (one token per line) to fuzz a token based input language. - apply_tokens 0 Read the given input file, substitute bytes with tokens and write the result to stdout. sync_command 0 Execute an external command " " to synchronize the test corpus. sync_timeout 600 Minimum timeout between syncs. + use_traces 0 Experimental: use instruction traces + only_ascii 0 If 1, generate only ASCII (isprint+isspace) inputs. + test_single_input "" Use specified file content as test input. Test will be run only once. Useful for debugging a particular case. + artifact_prefix "" Write fuzzing artifacts (crash, timeout, or slow inputs) as $(artifact_prefix)file + exact_artifact_path "" Write the single artifact on failure (crash, timeout) as $(exact_artifact_path). This overrides -artifact_prefix and will not use checksum in the file name. Do not use the same path for several parallel processes. For the full list of flags run the fuzzer binary with ``-help=1``. @@ -80,11 +85,14 @@ Toy example A simple function that does something interesting if it receives the input "HI!":: cat << EOF >> test_fuzzer.cc - extern "C" void LLVMFuzzerTestOneInput(const unsigned char *data, unsigned long size) { + #include + #include + extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { if (size > 0 && data[0] == 'H') if (size > 1 && data[1] == 'I') if (size > 2 && data[2] == '!') __builtin_trap(); + return 0; } EOF # Get lib/Fuzzer. Assuming that you already have fresh clang in PATH. @@ -115,9 +123,10 @@ Here we show how to use lib/Fuzzer on something real, yet simple: pcre2_:: # Build the actual function that does something interesting with PCRE2. cat << EOF > pcre_fuzzer.cc #include + #include #include "pcre2posix.h" - extern "C" void LLVMFuzzerTestOneInput(const unsigned char *data, size_t size) { - if (size < 1) return; + extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (size < 1) return 0; char *str = new char[size+1]; memcpy(str, data, size); str[size] = 0; @@ -127,6 +136,7 @@ Here we show how to use lib/Fuzzer on something real, yet simple: pcre2_:: regfree(&preg); } delete [] str; + return 0; } EOF clang++ -g -fsanitize=address $COV_FLAGS -c -std=c++11 -I inst/include/ pcre_fuzzer.cc @@ -213,6 +223,9 @@ to find Heartbleed with LibFuzzer:: #include #include #include + #include + #include + SSL_CTX *sctx; int Init() { SSL_library_init(); @@ -224,7 +237,7 @@ to find Heartbleed with LibFuzzer:: assert (SSL_CTX_use_PrivateKey_file(sctx, "server.key", SSL_FILETYPE_PEM)); return 0; } - extern "C" void LLVMFuzzerTestOneInput(unsigned char *Data, size_t Size) { + extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { static int unused = Init(); SSL *server = SSL_new(sctx); BIO *sinbio = BIO_new(BIO_s_mem()); @@ -234,9 +247,10 @@ to find Heartbleed with LibFuzzer:: BIO_write(sinbio, Data, Size); SSL_do_handshake(server); SSL_free(server); + return 0; } EOF - # Build the fuzzer. + # Build the fuzzer. clang++ -g handshake-fuzz.cc -fsanitize=address \ openssl-1.0.1f/libssl.a openssl-1.0.1f/libcrypto.a Fuzzer*.o # Run 20 independent fuzzer jobs. @@ -252,26 +266,43 @@ Voila:: #1 0x4db504 in tls1_process_heartbeat openssl-1.0.1f/ssl/t1_lib.c:2586:3 #2 0x580be3 in ssl3_read_bytes openssl-1.0.1f/ssl/s3_pkt.c:1092:4 +Note: a `similar fuzzer `_ +is now a part of the boringssl source tree. + Advanced features ================= -Tokens ------- +Dictionaries +------------ +*EXPERIMENTAL*. +LibFuzzer supports user-supplied dictionaries with input language keywords +or other interesting byte sequences (e.g. multi-byte magic values). +Use ``-dict=DICTIONARY_FILE``. For some input languages using a dictionary +may significantly improve the search speed. +The dictionary syntax is similar to that used by AFL_ for its ``-x`` option:: -By default, the fuzzer is not aware of complexities of the input language -and when fuzzing e.g. a C++ parser it will mostly stress the lexer. -It is very hard for the fuzzer to come up with something like ``reinterpret_cast`` -from a test corpus that doesn't have it. -See a detailed discussion of this topic at -http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html. + # Lines starting with '#' and empty lines are ignored. -lib/Fuzzer implements a simple technique that allows to fuzz input languages with -long tokens. All you need is to prepare a text file containing up to 253 tokens, one token per line, -and pass it to the fuzzer as ``-tokens=TOKENS_FILE.txt``. -Three implicit tokens are added: ``" "``, ``"\t"``, and ``"\n"``. -The fuzzer itself will still be mutating a string of bytes -but before passing this input to the target library it will replace every byte ``b`` with the ``b``-th token. -If there are less than ``b`` tokens, a space will be added instead. + # Adds "blah" (w/o quotes) to the dictionary. + kw1="blah" + # Use \\ for backslash and \" for quotes. + kw2="\"ac\\dc\"" + # Use \xAB for hex values + kw3="\xF7\xF8" + # the name of the keyword followed by '=' may be omitted: + "foo\x0Abar" + +Data-flow-guided fuzzing +------------------------ + +*EXPERIMENTAL*. +With an additional compiler flag ``-fsanitize-coverage=trace-cmp`` (see SanitizerCoverageTraceDataFlow_) +and extra run-time flag ``-use_traces=1`` the fuzzer will try to apply *data-flow-guided fuzzing*. +That is, the fuzzer will record the inputs to comparison instructions, switch statements, +and several libc functions (``memcmp``, ``strcmp``, ``strncmp``, etc). +It will later use those recorded inputs during mutations. + +This mode can be combined with DataFlowSanitizer_ to achieve better sensitivity. AFL compatibility ----------------- @@ -321,18 +352,38 @@ Build (make sure to use fresh clang as the host compiler):: Optionally build other kinds of binaries (asan+Debug, msan, ubsan, etc). -TODO: commit the pre-fuzzed corpus to svn (?). - Tracking bug: https://llvm.org/bugs/show_bug.cgi?id=23052 clang-fuzzer ------------ -The default behavior is very similar to ``clang-format-fuzzer``. -Clang can also be fuzzed with Tokens_ using ``-tokens=$LLVM/lib/Fuzzer/cxx_fuzzer_tokens.txt`` option. +The behavior is very similar to ``clang-format-fuzzer``. Tracking bug: https://llvm.org/bugs/show_bug.cgi?id=23057 +llvm-as-fuzzer +-------------- + +Tracking bug: https://llvm.org/bugs/show_bug.cgi?id=24639 + +llvm-mc-fuzzer +-------------- + +This tool fuzzes the MC layer. Currently it is only able to fuzz the +disassembler but it is hoped that assembly, and round-trip verification will be +added in future. + +When run in dissassembly mode, the inputs are opcodes to be disassembled. The +fuzzer will consume as many instructions as possible and will stop when it +finds an invalid instruction or runs out of data. + +Please note that the command line interface differs slightly from that of other +fuzzers. The fuzzer arguments should follow ``--fuzzer-args`` and should have +a single dash, while other arguments control the operation mode and target in a +similar manner to ``llvm-mc`` and should have two dashes. For example:: + + llvm-mc-fuzzer --triple=aarch64-linux-gnu --disassemble --fuzzer-args -max_len=4 -jobs=10 + Buildbot -------- @@ -348,7 +399,7 @@ The corpuses are stored in git on github and can be used like this:: git clone https://github.com/kcc/fuzzing-with-sanitizers.git bin/clang-format-fuzzer fuzzing-with-sanitizers/llvm/clang-format/C1 bin/clang-fuzzer fuzzing-with-sanitizers/llvm/clang/C1/ - bin/clang-fuzzer fuzzing-with-sanitizers/llvm/clang/TOK1 -tokens=$LLVM/llvm/lib/Fuzzer/cxx_fuzzer_tokens.txt + bin/llvm-as-fuzzer fuzzing-with-sanitizers/llvm/llvm-as/C1 -only_ascii=1 FAQ @@ -407,11 +458,46 @@ small inputs, each input takes < 1ms to run, and the library code is not expecte to crash on invalid inputs. Examples: regular expression matchers, text or binary format parsers. +Trophies +======== +* GLIBC: https://sourceware.org/glibc/wiki/FuzzingLibc + +* MUSL LIBC: + + * http://git.musl-libc.org/cgit/musl/commit/?id=39dfd58417ef642307d90306e1c7e50aaec5a35c + * http://www.openwall.com/lists/oss-security/2015/03/30/3 + +* `pugixml `_ + +* PCRE: Search for "LLVM fuzzer" in http://vcs.pcre.org/pcre2/code/trunk/ChangeLog?view=markup; + also in `bugzilla `_ + +* `ICU `_ + +* `Freetype `_ + +* `Harfbuzz `_ + +* `SQLite `_ + +* `Python `_ + +* OpenSSL/BoringSSL: `[1] `_ + +* `Libxml2 + `_ + +* `Linux Kernel's BPF verifier `_ + +* LLVM: `Clang `_, `Clang-format `_, `libc++ `_, `llvm-as `_, Disassembler: http://reviews.llvm.org/rL247405, http://reviews.llvm.org/rL247414, http://reviews.llvm.org/rL247416, http://reviews.llvm.org/rL247417, http://reviews.llvm.org/rL247420, http://reviews.llvm.org/rL247422. + .. _pcre2: http://www.pcre.org/ .. _AFL: http://lcamtuf.coredump.cx/afl/ .. _SanitizerCoverage: http://clang.llvm.org/docs/SanitizerCoverage.html +.. _SanitizerCoverageTraceDataFlow: http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow +.. _DataFlowSanitizer: http://clang.llvm.org/docs/DataFlowSanitizer.html .. _Heartbleed: http://en.wikipedia.org/wiki/Heartbleed diff --git a/docs/MIRLangRef.rst b/docs/MIRLangRef.rst new file mode 100644 index 000000000000..a5f8c8c743ab --- /dev/null +++ b/docs/MIRLangRef.rst @@ -0,0 +1,495 @@ +======================================== +Machine IR (MIR) Format Reference Manual +======================================== + +.. contents:: + :local: + +.. warning:: + This is a work in progress. + +Introduction +============ + +This document is a reference manual for the Machine IR (MIR) serialization +format. MIR is a human readable serialization format that is used to represent +LLVM's :ref:`machine specific intermediate representation +`. + +The MIR serialization format is designed to be used for testing the code +generation passes in LLVM. + +Overview +======== + +The MIR serialization format uses a YAML container. YAML is a standard +data serialization language, and the full YAML language spec can be read at +`yaml.org +`_. + +A MIR file is split up into a series of `YAML documents`_. The first document +can contain an optional embedded LLVM IR module, and the rest of the documents +contain the serialized machine functions. + +.. _YAML documents: http://www.yaml.org/spec/1.2/spec.html#id2800132 + +MIR Testing Guide +================= + +You can use the MIR format for testing in two different ways: + +- You can write MIR tests that invoke a single code generation pass using the + ``run-pass`` option in llc. + +- You can use llc's ``stop-after`` option with existing or new LLVM assembly + tests and check the MIR output of a specific code generation pass. + +Testing Individual Code Generation Passes +----------------------------------------- + +The ``run-pass`` option in llc allows you to create MIR tests that invoke +just a single code generation pass. When this option is used, llc will parse +an input MIR file, run the specified code generation pass, and print the +resulting MIR to the standard output stream. + +You can generate an input MIR file for the test by using the ``stop-after`` +option in llc. For example, if you would like to write a test for the +post register allocation pseudo instruction expansion pass, you can specify +the machine copy propagation pass in the ``stop-after`` option, as it runs +just before the pass that we are trying to test: + + ``llc -stop-after machine-cp bug-trigger.ll > test.mir`` + +After generating the input MIR file, you'll have to add a run line that uses +the ``-run-pass`` option to it. In order to test the post register allocation +pseudo instruction expansion pass on X86-64, a run line like the one shown +below can be used: + + ``# RUN: llc -run-pass postrapseudos -march=x86-64 %s -o /dev/null | FileCheck %s`` + +The MIR files are target dependent, so they have to be placed in the target +specific test directories. They also need to specify a target triple or a +target architecture either in the run line or in the embedded LLVM IR module. + +Limitations +----------- + +Currently the MIR format has several limitations in terms of which state it +can serialize: + +- The target-specific state in the target-specific ``MachineFunctionInfo`` + subclasses isn't serialized at the moment. + +- The target-specific ``MachineConstantPoolValue`` subclasses (in the ARM and + SystemZ backends) aren't serialized at the moment. + +- The ``MCSymbol`` machine operands are only printed, they can't be parsed. + +- A lot of the state in ``MachineModuleInfo`` isn't serialized - only the CFI + instructions and the variable debug information from MMI is serialized right + now. + +These limitations impose restrictions on what you can test with the MIR format. +For now, tests that would like to test some behaviour that depends on the state +of certain ``MCSymbol`` operands or the exception handling state in MMI, can't +use the MIR format. As well as that, tests that test some behaviour that +depends on the state of the target specific ``MachineFunctionInfo`` or +``MachineConstantPoolValue`` subclasses can't use the MIR format at the moment. + +High Level Structure +==================== + +.. _embedded-module: + +Embedded Module +--------------- + +When the first YAML document contains a `YAML block literal string`_, the MIR +parser will treat this string as an LLVM assembly language string that +represents an embedded LLVM IR module. +Here is an example of a YAML document that contains an LLVM module: + +.. code-block:: llvm + + --- | + define i32 @inc(i32* %x) { + entry: + %0 = load i32, i32* %x + %1 = add i32 %0, 1 + store i32 %1, i32* %x + ret i32 %1 + } + ... + +.. _YAML block literal string: http://www.yaml.org/spec/1.2/spec.html#id2795688 + +Machine Functions +----------------- + +The remaining YAML documents contain the machine functions. This is an example +of such YAML document: + +.. code-block:: llvm + + --- + name: inc + tracksRegLiveness: true + liveins: + - { reg: '%rdi' } + body: | + bb.0.entry: + liveins: %rdi + + %eax = MOV32rm %rdi, 1, _, 0, _ + %eax = INC32r killed %eax, implicit-def dead %eflags + MOV32mr killed %rdi, 1, _, 0, _, %eax + RETQ %eax + ... + +The document above consists of attributes that represent the various +properties and data structures in a machine function. + +The attribute ``name`` is required, and its value should be identical to the +name of a function that this machine function is based on. + +The attribute ``body`` is a `YAML block literal string`_. Its value represents +the function's machine basic blocks and their machine instructions. + +Machine Instructions Format Reference +===================================== + +The machine basic blocks and their instructions are represented using a custom, +human readable serialization language. This language is used in the +`YAML block literal string`_ that corresponds to the machine function's body. + +A source string that uses this language contains a list of machine basic +blocks, which are described in the section below. + +Machine Basic Blocks +-------------------- + +A machine basic block is defined in a single block definition source construct +that contains the block's ID. +The example below defines two blocks that have an ID of zero and one: + +.. code-block:: llvm + + bb.0: + + bb.1: + + +A machine basic block can also have a name. It should be specified after the ID +in the block's definition: + +.. code-block:: llvm + + bb.0.entry: ; This block's name is "entry" + + +The block's name should be identical to the name of the IR block that this +machine block is based on. + +Block References +^^^^^^^^^^^^^^^^ + +The machine basic blocks are identified by their ID numbers. Individual +blocks are referenced using the following syntax: + +.. code-block:: llvm + + %bb.[.] + +Examples: + +.. code-block:: llvm + + %bb.0 + %bb.1.then + +Successors +^^^^^^^^^^ + +The machine basic block's successors have to be specified before any of the +instructions: + +.. code-block:: llvm + + bb.0.entry: + successors: %bb.1.then, %bb.2.else + + bb.1.then: + + bb.2.else: + + +The branch weights can be specified in brackets after the successor blocks. +The example below defines a block that has two successors with branch weights +of 32 and 16: + +.. code-block:: llvm + + bb.0.entry: + successors: %bb.1.then(32), %bb.2.else(16) + +.. _bb-liveins: + +Live In Registers +^^^^^^^^^^^^^^^^^ + +The machine basic block's live in registers have to be specified before any of +the instructions: + +.. code-block:: llvm + + bb.0.entry: + liveins: %edi, %esi + +The list of live in registers and successors can be empty. The language also +allows multiple live in register and successor lists - they are combined into +one list by the parser. + +Miscellaneous Attributes +^^^^^^^^^^^^^^^^^^^^^^^^ + +The attributes ``IsAddressTaken``, ``IsLandingPad`` and ``Alignment`` can be +specified in brackets after the block's definition: + +.. code-block:: llvm + + bb.0.entry (address-taken): + + bb.2.else (align 4): + + bb.3(landing-pad, align 4): + + +.. TODO: Describe the way the reference to an unnamed LLVM IR block can be + preserved. + +Machine Instructions +-------------------- + +A machine instruction is composed of a name, +:ref:`machine operands `, +:ref:`instruction flags `, and machine memory operands. + +The instruction's name is usually specified before the operands. The example +below shows an instance of the X86 ``RETQ`` instruction with a single machine +operand: + +.. code-block:: llvm + + RETQ %eax + +However, if the machine instruction has one or more explicitly defined register +operands, the instruction's name has to be specified after them. The example +below shows an instance of the AArch64 ``LDPXpost`` instruction with three +defined register operands: + +.. code-block:: llvm + + %sp, %fp, %lr = LDPXpost %sp, 2 + +The instruction names are serialized using the exact definitions from the +target's ``*InstrInfo.td`` files, and they are case sensitive. This means that +similar instruction names like ``TSTri`` and ``tSTRi`` represent different +machine instructions. + +.. _instruction-flags: + +Instruction Flags +^^^^^^^^^^^^^^^^^ + +The flag ``frame-setup`` can be specified before the instruction's name: + +.. code-block:: llvm + + %fp = frame-setup ADDXri %sp, 0, 0 + +.. _registers: + +Registers +--------- + +Registers are one of the key primitives in the machine instructions +serialization language. They are primarly used in the +:ref:`register machine operands `, +but they can also be used in a number of other places, like the +:ref:`basic block's live in list `. + +The physical registers are identified by their name. They use the following +syntax: + +.. code-block:: llvm + + % + +The example below shows three X86 physical registers: + +.. code-block:: llvm + + %eax + %r15 + %eflags + +The virtual registers are identified by their ID number. They use the following +syntax: + +.. code-block:: llvm + + % + +Example: + +.. code-block:: llvm + + %0 + +The null registers are represented using an underscore ('``_``'). They can also be +represented using a '``%noreg``' named register, although the former syntax +is preferred. + +.. _machine-operands: + +Machine Operands +---------------- + +There are seventeen different kinds of machine operands, and all of them, except +the ``MCSymbol`` operand, can be serialized. The ``MCSymbol`` operands are +just printed out - they can't be parsed back yet. + +Immediate Operands +^^^^^^^^^^^^^^^^^^ + +The immediate machine operands are untyped, 64-bit signed integers. The +example below shows an instance of the X86 ``MOV32ri`` instruction that has an +immediate machine operand ``-42``: + +.. code-block:: llvm + + %eax = MOV32ri -42 + +.. TODO: Describe the CIMM (Rare) and FPIMM immediate operands. + +.. _register-operands: + +Register Operands +^^^^^^^^^^^^^^^^^ + +The :ref:`register ` primitive is used to represent the register +machine operands. The register operands can also have optional +:ref:`register flags `, +:ref:`a subregister index `, +and a reference to the tied register operand. +The full syntax of a register operand is shown below: + +.. code-block:: llvm + + [] [ : ] [ (tied-def ) ] + +This example shows an instance of the X86 ``XOR32rr`` instruction that has +5 register operands with different register flags: + +.. code-block:: llvm + + dead %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, implicit-def %al + +.. _register-flags: + +Register Flags +~~~~~~~~~~~~~~ + +The table below shows all of the possible register flags along with the +corresponding internal ``llvm::RegState`` representation: + +.. list-table:: + :header-rows: 1 + + * - Flag + - Internal Value + + * - ``implicit`` + - ``RegState::Implicit`` + + * - ``implicit-def`` + - ``RegState::ImplicitDefine`` + + * - ``def`` + - ``RegState::Define`` + + * - ``dead`` + - ``RegState::Dead`` + + * - ``killed`` + - ``RegState::Kill`` + + * - ``undef`` + - ``RegState::Undef`` + + * - ``internal`` + - ``RegState::InternalRead`` + + * - ``early-clobber`` + - ``RegState::EarlyClobber`` + + * - ``debug-use`` + - ``RegState::Debug`` + +.. _subregister-indices: + +Subregister Indices +~~~~~~~~~~~~~~~~~~~ + +The register machine operands can reference a portion of a register by using +the subregister indices. The example below shows an instance of the ``COPY`` +pseudo instruction that uses the X86 ``sub_8bit`` subregister index to copy 8 +lower bits from the 32-bit virtual register 0 to the 8-bit virtual register 1: + +.. code-block:: llvm + + %1 = COPY %0:sub_8bit + +The names of the subregister indices are target specific, and are typically +defined in the target's ``*RegisterInfo.td`` file. + +Global Value Operands +^^^^^^^^^^^^^^^^^^^^^ + +The global value machine operands reference the global values from the +:ref:`embedded LLVM IR module `. +The example below shows an instance of the X86 ``MOV64rm`` instruction that has +a global value operand named ``G``: + +.. code-block:: llvm + + %rax = MOV64rm %rip, 1, _, @G, _ + +The named global values are represented using an identifier with the '@' prefix. +If the identifier doesn't match the regular expression +`[-a-zA-Z$._][-a-zA-Z$._0-9]*`, then this identifier must be quoted. + +The unnamed global values are represented using an unsigned numeric value with +the '@' prefix, like in the following examples: ``@0``, ``@989``. + +.. TODO: Describe the parsers default behaviour when optional YAML attributes + are missing. +.. TODO: Describe the syntax for the bundled instructions. +.. TODO: Describe the syntax for virtual register YAML definitions. +.. TODO: Describe the machine function's YAML flag attributes. +.. TODO: Describe the syntax for the external symbol and register + mask machine operands. +.. TODO: Describe the frame information YAML mapping. +.. TODO: Describe the syntax of the stack object machine operands and their + YAML definitions. +.. TODO: Describe the syntax of the constant pool machine operands and their + YAML definitions. +.. TODO: Describe the syntax of the jump table machine operands and their + YAML definitions. +.. TODO: Describe the syntax of the block address machine operands. +.. TODO: Describe the syntax of the CFI index machine operands. +.. TODO: Describe the syntax of the metadata machine operands, and the + instructions debug location attribute. +.. TODO: Describe the syntax of the target index machine operands. +.. TODO: Describe the syntax of the register live out machine operands. +.. TODO: Describe the syntax of the machine memory operands. diff --git a/docs/Phabricator.rst b/docs/Phabricator.rst index 3426bfff164f..73704d9b17d7 100644 --- a/docs/Phabricator.rst +++ b/docs/Phabricator.rst @@ -21,7 +21,7 @@ click the power icon in the top right. You can register with a GitHub account, a Google account, or you can create your own profile. Make *sure* that the email address registered with Phabricator is subscribed -to the relevant -commits mailing list. If your are not subscribed to the commit +to the relevant -commits mailing list. If you are not subscribed to the commit list, all mail sent by Phabricator on your behalf will be held for moderation. Note that if you use your Subversion user name as Phabricator user name, @@ -66,7 +66,7 @@ To upload a new patch: * Leave the drop down on *Create a new Revision...* and click *Continue*. * Enter a descriptive title and summary. The title and summary are usually in the form of a :ref:`commit message `. -* Add reviewers and mailing +* Add reviewers (see below for advice) and subscribe mailing lists that you want to be included in the review. If your patch is for LLVM, add llvm-commits as a Subscriber; if your patch is for Clang, add cfe-commits. @@ -83,6 +83,24 @@ To submit an updated patch: * Leave the Repository and Project fields blank. * Add comments about the changes in the new diff. Click *Save*. +Choosing reviewers: You typically pick one or two people as initial reviewers. +This choice is not crucial, because you are merely suggesting and not requiring +them to participate. Many people will see the email notification on cfe-commits +or llvm-commits, and if the subject line suggests the patch is something they +should look at, they will. + +Here are a couple of ways to pick the initial reviewer(s): + +* Use ``svn blame`` and the commit log to find names of people who have + recently modified the same area of code that you are modifying. +* Look in CODE_OWNERS.TXT to see who might be responsible for that area. +* If you've discussed the change on a dev list, the people who participated + might be appropriate reviewers. + +Even if you think the code owner is the busiest person in the world, it's still +okay to put them as a reviewer. Being the code owner means they have accepted +responsibility for making sure the review happens. + Reviewing code with Phabricator ------------------------------- @@ -162,6 +180,6 @@ trivially a good fit for an official LLVM project. .. _LLVM's Phabricator: http://reviews.llvm.org .. _`http://reviews.llvm.org`: http://reviews.llvm.org .. _Code Repository Browser: http://reviews.llvm.org/diffusion/ -.. _Arcanist Quick Start: http://www.phabricator.com/docs/phabricator/article/Arcanist_Quick_Start.html -.. _Arcanist User Guide: http://www.phabricator.com/docs/phabricator/article/Arcanist_User_Guide.html +.. _Arcanist Quick Start: https://secure.phabricator.com/book/phabricator/article/arcanist_quick_start/ +.. _Arcanist User Guide: https://secure.phabricator.com/book/phabricator/article/arcanist/ .. _llvm-reviews GitHub project: https://github.com/r4nt/llvm-reviews/ diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst index 08cc61a187b5..44f76fef8f1f 100644 --- a/docs/ProgrammersManual.rst +++ b/docs/ProgrammersManual.rst @@ -366,7 +366,7 @@ Then you can run your pass like this: Using the ``DEBUG()`` macro instead of a home-brewed solution allows you to not have to create "yet another" command line option for the debug output for your -pass. Note that ``DEBUG()`` macros are disabled for optimized builds, so they +pass. Note that ``DEBUG()`` macros are disabled for non-asserts builds, so they do not cause a performance impact at all (for the same reason, they should also not contain side-effects!). @@ -383,21 +383,17 @@ Fine grained debug info with ``DEBUG_TYPE`` and the ``-debug-only`` option Sometimes you may find yourself in a situation where enabling ``-debug`` just turns on **too much** information (such as when working on the code generator). If you want to enable debug information with more fine-grained control, you -can define the ``DEBUG_TYPE`` macro and use the ``-debug-only`` option as +should define the ``DEBUG_TYPE`` macro and use the ``-debug-only`` option as follows: .. code-block:: c++ - #undef DEBUG_TYPE - DEBUG(errs() << "No debug type\n"); #define DEBUG_TYPE "foo" DEBUG(errs() << "'foo' debug type\n"); #undef DEBUG_TYPE #define DEBUG_TYPE "bar" DEBUG(errs() << "'bar' debug type\n")); #undef DEBUG_TYPE - #define DEBUG_TYPE "" - DEBUG(errs() << "No debug type (2)\n"); Then you can run your pass like this: @@ -406,24 +402,22 @@ Then you can run your pass like this: $ opt < a.bc > /dev/null -mypass $ opt < a.bc > /dev/null -mypass -debug - No debug type 'foo' debug type 'bar' debug type - No debug type (2) $ opt < a.bc > /dev/null -mypass -debug-only=foo 'foo' debug type $ opt < a.bc > /dev/null -mypass -debug-only=bar 'bar' debug type Of course, in practice, you should only set ``DEBUG_TYPE`` at the top of a file, -to specify the debug type for the entire module (if you do this before you -``#include "llvm/Support/Debug.h"``, you don't have to insert the ugly -``#undef``'s). Also, you should use names more meaningful than "foo" and "bar", -because there is no system in place to ensure that names do not conflict. If -two different modules use the same string, they will all be turned on when the -name is specified. This allows, for example, all debug information for -instruction scheduling to be enabled with ``-debug-only=InstrSched``, even if -the source lives in multiple files. +to specify the debug type for the entire module. Be careful that you only do +this after including Debug.h and not around any #include of headers. Also, you +should use names more meaningful than "foo" and "bar", because there is no +system in place to ensure that names do not conflict. If two different modules +use the same string, they will all be turned on when the name is specified. +This allows, for example, all debug information for instruction scheduling to be +enabled with ``-debug-only=InstrSched``, even if the source lives in multiple +files. For performance reasons, -debug-only is not available in optimized build (``--enable-optimized``) of LLVM. @@ -435,10 +429,8 @@ preceding example could be written as: .. code-block:: c++ - DEBUG_WITH_TYPE("", errs() << "No debug type\n"); DEBUG_WITH_TYPE("foo", errs() << "'foo' debug type\n"); DEBUG_WITH_TYPE("bar", errs() << "'bar' debug type\n")); - DEBUG_WITH_TYPE("", errs() << "No debug type (2)\n"); .. _Statistic: diff --git a/docs/README.txt b/docs/README.txt index 3d6342929808..31764b2951b2 100644 --- a/docs/README.txt +++ b/docs/README.txt @@ -44,7 +44,7 @@ viewable online (as noted above) at e.g. Checking links ============== -The reachibility of external links in the documentation can be checked by +The reachability of external links in the documentation can be checked by running: cd docs/ diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index b68f5ecd493e..b3f7c005ed19 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -1,15 +1,21 @@ ====================== -LLVM 3.7 Release Notes +LLVM 3.8 Release Notes ====================== .. contents:: :local: +.. warning:: + These are in-progress notes for the upcoming LLVM 3.8 release. You may + prefer the `LLVM 3.7 Release Notes `_. + + Introduction ============ This document contains the release notes for the LLVM Compiler Infrastructure, -release 3.7. Here we describe the status of LLVM, including major improvements +release 3.8. Here we describe the status of LLVM, including major improvements from the previous release, improvements in various subprojects of LLVM, and some of the current users of the code. All LLVM releases may be downloaded from the `LLVM releases web site `_. @@ -25,36 +31,42 @@ LLVM web page, this document applies to the *next* release, not the current one. To see the release notes for a specific release, please see the `releases page `_. -Major changes in 3.7.1 -====================== - -* 3.7.0 was released with an inadvertent change to the signature of the C - API function: LLVMBuildLandingPad, which made the C API incompatible with - prior releases. This has been corrected in LLVM 3.7.1. - - As a result of this change, 3.7.0 is not ABI compatible with 3.7.1. - - +----------------------------------------------------------------------------+ - | History of the LLVMBuildLandingPad() function | - +===========================+================================================+ - | 3.6.2 and prior releases | LLVMBuildLandingPad(LLVMBuilderRef, | - | | LLVMTypeRef, | - | | LLVMValueRef, | - | | unsigned, const char*) | - +---------------------------+------------------------------------------------+ - | 3.7.0 | LLVMBuildLandingPad(LLVMBuilderRef, | - | | LLVMTypeRef, | - | | unsigned, const char*) | - +---------------------------+------------------------------------------------+ - | 3.7.1 and future releases | LLVMBuildLandingPad(LLVMBuilderRef, | - | | LLVMTypeRef, | - | | LLVMValueRef, | - | | unsigned, const char*) | - +---------------------------+------------------------------------------------+ - - -Non-comprehensive list of changes in 3.7.0 +Non-comprehensive list of changes in this release ================================================= +* With this release, the minimum Windows version required for running LLVM is + Windows 7. Earlier versions, including Windows Vista and XP are no longer + supported. + +* With this release, the autoconf build system is deprecated. It will be removed + in the 3.9 release. Please migrate to using CMake. For more information see: + `Building LLVM with CMake `_ + +* The C API function LLVMLinkModules is deprecated. It will be removed in the + 3.9 release. Please migrate to LLVMLinkModules2. Unlike the old function the + new one + + * Doesn't take an unused parameter. + * Destroys the source instead of only damaging it. + * Does not record a message. Use the diagnostic handler instead. + +* The C API functions LLVMParseBitcode, LLVMParseBitcodeInContext, + LLVMGetBitcodeModuleInContext and LLVMGetBitcodeModule have been deprecated. + They will be removed in 3.9. Please migrate to the versions with a 2 suffix. + Unlike the old ones the new ones do not record a diagnostic message. Use + the diagnostic handler instead. + +* The deprecated C APIs LLVMGetBitcodeModuleProviderInContext and + LLVMGetBitcodeModuleProvider have been removed. + +* The deprecated C APIs LLVMCreateExecutionEngine, LLVMCreateInterpreter, + LLVMCreateJITCompiler, LLVMAddModuleProvider and LLVMRemoveModuleProvider + have been removed. + +* With this release, the C API headers have been reorganized to improve build + time. Type specific declarations have been moved to Type.h, and error + handling routines have been moved to ErrorHandling.h. Both are included in + Core.h so nothing should change for projects directly including the headers, + but transitive dependencies may be affected. .. NOTE For small 1-3 sentence descriptions, just add an entry at the end of @@ -63,429 +75,61 @@ Non-comprehensive list of changes in 3.7.0 functionality, or simply have a lot to talk about), see the `NOTE` below for adding a new subsection. -* The minimum required Visual Studio version for building LLVM is now 2013 - Update 4. +* ... next change ... -* A new documentation page, :doc:`Frontend/PerformanceTips`, contains a - collection of tips for frontend authors on how to generate IR which LLVM is - able to effectively optimize. +.. NOTE + If you would like to document a larger change, then you can add a + subsection about it right here. You can copy the following boilerplate + and un-indent it (the indentation causes it to be inside this comment). -* The ``DataLayout`` is no longer optional. All the IR level optimizations expects - it to be present and the API has been changed to use a reference instead of - a pointer to make it explicit. The Module owns the datalayout and it has to - match the one attached to the TargetMachine for generating code. + Special New Feature + ------------------- - In 3.6, a pass was inserted in the pipeline to make the ``DataLayout`` accessible: - ``MyPassManager->add(new DataLayoutPass(MyTargetMachine->getDataLayout()));`` - In 3.7, you don't need a pass, you set the ``DataLayout`` on the ``Module``: - ``MyModule->setDataLayout(MyTargetMachine->createDataLayout());`` + Makes programs 10x faster by doing Special New Thing. - The LLVM C API ``LLVMGetTargetMachineData`` is deprecated to reflect the fact - that it won't be available anymore from ``TargetMachine`` in 3.8. +Changes to the ARM Backend +-------------------------- -* Comdats are now orthogonal to the linkage. LLVM will not create - comdats for weak linkage globals and the frontends are responsible - for explicitly adding them. + During this release ... -* On ELF we now support multiple sections with the same name and - comdat. This allows for smaller object files since multiple - sections can have a simple name (`.text`, `.rodata`, etc). - -* LLVM now lazily loads metadata in some cases. Creating archives - with IR files with debug info is now 25X faster. - -* llvm-ar can create archives in the BSD format used by OS X. - -* LLVM received a backend for the extended Berkely Packet Filter - instruction set that can be dynamically loaded into the Linux kernel via the - `bpf(2) `_ syscall. - - Support for BPF has been present in the kernel for some time, but starting - from 3.18 has been extended with such features as: 64-bit registers, 8 - additional registers registers, conditional backwards jumps, call - instruction, shift instructions, map (hash table, array, etc.), 1-8 byte - load/store from stack, and more. - - Up until now, users of BPF had to write bytecode by hand, or use - custom generators. This release adds a proper LLVM backend target for the BPF - bytecode architecture. - - The BPF target is now available by default, and options exist in both Clang - (-target bpf) or llc (-march=bpf) to pick eBPF as a backend. - -* Switch-case lowering was rewritten to avoid generating unbalanced search trees - (`PR22262 `_) and to exploit profile information - when available. Some lowering strategies are now disabled when optimizations - are turned off, to save compile time. - -* The debug info IR class hierarchy now inherits from ``Metadata`` and has its - own bitcode records and assembly syntax - (`documented in LangRef `_). The debug - info verifier has been merged with the main verifier. - -* LLVM IR and APIs are in a period of transition to aid in the removal of - pointer types (the end goal being that pointers are typeless/opaque - void*, - if you will). Some APIs and IR constructs have been modified to take - explicit types that are currently checked to match the target type of their - pre-existing pointer type operands. Further changes are still needed, but the - more you can avoid using ``PointerType::getPointeeType``, the easier the - migration will be. - -* Argument-less ``TargetMachine::getSubtarget`` and - ``TargetMachine::getSubtargetImpl`` have been removed from the tree. Updating - out of tree ports is as simple as implementing a non-virtual version in the - target, but implementing full ``Function`` based ``TargetSubtargetInfo`` - support is recommended. - -* This is expected to be the last major release of LLVM that supports being - run on Windows XP and Windows Vista. For the next major release the minimum - Windows version requirement will be Windows 7. Changes to the MIPS Target -------------------------- -During this release the MIPS target has: + During this release ... -* Added support for MIPS32R3, MIPS32R5, MIPS32R3, MIPS32R5, and microMIPS32. - -* Added support for dynamic stack realignment. This is of particular importance - to MSA on 32-bit subtargets since vectors always exceed the stack alignment on - the O32 ABI. - -* Added support for compiler-rt including: - - * Support for the Address, and Undefined Behaviour Sanitizers for all MIPS - subtargets. - - * Support for the Data Flow, and Memory Sanitizer for 64-bit subtargets. - - * Support for the Profiler for all MIPS subtargets. - -* Added support for libcxx, and libcxxabi. - -* Improved inline assembly support such that memory constraints may now make use - of the appropriate address offsets available to the instructions. Also, added - support for the ``ZC`` constraint. - -* Added support for 128-bit integers on 64-bit subtargets and 16-bit floating - point conversions on all subtargets. - -* Added support for read-only ``.eh_frame`` sections by storing type information - indirectly. - -* Added support for MCJIT on all 64-bit subtargets as well as MIPS32R6. - -* Added support for fast instruction selection on MIPS32 and MIPS32R2 with PIC. - -* Various bug fixes. Including the following notable fixes: - - * Fixed 'jumpy' debug line info around calls where calculation of the address - of the function would inappropriately change the line number. - - * Fixed missing ``__mips_isa_rev`` macro on the MIPS32R6 and MIPS32R6 - subtargets. - - * Fixed representation of NaN when targeting systems using traditional - encodings. Traditionally, MIPS has used NaN encodings that were compatible - with IEEE754-1985 but would later be found incompatible with IEEE754-2008. - - * Fixed multiple segfaults and assertions in the disassembler when - disassembling instructions that have memory operands. - - * Fixed multiple cases of suboptimal code generation involving $zero. - - * Fixed code generation of 128-bit shifts on 64-bit subtargets. - - * Prevented the delay slot filler from filling call delay slots with - instructions that modify or use $ra. - - * Fixed some remaining N32/N64 calling convention bugs when using small - structures on big-endian subtargets. - - * Fixed missing sign-extensions that are required by the N32/N64 calling - convention when generating calls to library functions with 32-bit - parameters. - - * Corrected the ``int64_t`` typedef to be ``long`` for N64. - - * ``-mno-odd-spreg`` is now honoured for vector insertion/extraction - operations when using -mmsa. - - * Fixed vector insertion and extraction for MSA on 64-bit subtargets. - - * Corrected the representation of member function pointers. This makes them - usable on microMIPS subtargets. Changes to the PowerPC Target ----------------------------- -There are numerous improvements to the PowerPC target in this release: + During this release ... -* LLVM now supports the ISA 2.07B (POWER8) instruction set, including - direct moves between general registers and vector registers, and - built-in support for hardware transactional memory (HTM). Some missing - instructions from ISA 2.06 (POWER7) were also added. -* Code generation for the local-dynamic and global-dynamic thread-local - storage models has been improved. - -* Loops may be restructured to leverage pre-increment loads and stores. - -* QPX - The vector instruction set used by the IBM Blue Gene/Q supercomputers - is now supported. - -* Loads from the TOC area are now correctly treated as invariant. - -* PowerPC now has support for i128 and v1i128 types. The types differ - in how they are passed in registers for the ELFv2 ABI. - -* Disassembly will now print shorter mnemonic aliases when available. - -* Optional register name prefixes for VSX and QPX registers are now - supported in the assembly parser. - -* The back end now contains a pass to remove unnecessary vector swaps - from POWER8 little-endian code generation. Additional improvements - are planned for release 3.8. - -* The undefined-behavior sanitizer (UBSan) is now supported for PowerPC. - -* Many new vector programming APIs have been added to altivec.h. - Additional ones are planned for release 3.8. - -* PowerPC now supports __builtin_call_with_static_chain. - -* PowerPC now supports the revised -mrecip option that permits finer - control over reciprocal estimates. - -* Many bugs have been identified and fixed. - -Changes to the SystemZ Target +Changes to the X86 Target ----------------------------- -* LLVM no longer attempts to automatically detect the current host CPU when - invoked natively. + During this release ... -* Support for all thread-local storage models. (Previous releases would support - only the local-exec TLS model.) - -* The POPCNT instruction is now used on z196 and above. - -* The RISBGN instruction is now used on zEC12 and above. - -* Support for the transactional-execution facility on zEC12 and above. - -* Support for the z13 processor and its vector facility. +* TLS is enabled for Cygwin as emutls. -Changes to the JIT APIs ------------------------ +Changes to the OCaml bindings +----------------------------- -* Added a new C++ JIT API called On Request Compilation, or ORC. + During this release ... - ORC is a new JIT API inspired by MCJIT but designed to be more testable, and - easier to extend with new features. A key new feature already in tree is lazy, - function-at-a-time compilation for X86. Also included is a reimplementation of - MCJIT's API and behavior (OrcMCJITReplacement). MCJIT itself remains in tree, - and continues to be the default JIT ExecutionEngine, though new users are - encouraged to try ORC out for their projects. (A good place to start is the - new ORC tutorials under llvm/examples/kaleidoscope/orc). +* The ocaml function link_modules has been replaced with link_modules' which + uses LLVMLinkModules2. -Sub-project Status Update -========================= -In addition to the core LLVM 3.7 distribution of production-quality compiler -infrastructure, the LLVM project includes sub-projects that use the LLVM core -and share the same distribution license. This section provides updates on these -sub-projects. - -Polly - The Polyhedral Loop Optimizer in LLVM ---------------------------------------------- - -`Polly `_ is a polyhedral loop optimization -infrastructure that provides data-locality optimizations to LLVM-based -compilers. When compiled as part of clang or loaded as a module into clang, -it can perform loop optimizations such as tiling, loop fusion or outer-loop -vectorization. As a generic loop optimization infrastructure it allows -developers to get a per-loop-iteration model of a loop nest on which detailed -analysis and transformations can be performed. - -Changes since the last release: - -* isl imported into Polly distribution - - `isl `_, the math library Polly uses, has been - imported into the source code repository of Polly and is now distributed as part - of Polly. As this was the last external library dependency of Polly, Polly can - now be compiled right after checking out the Polly source code without the need - for any additional libraries to be pre-installed. - -* Small integer optimization of isl - - The MIT licensed imath backend using in `isl `_ for - arbitrary width integer computations has been optimized to use native integer - operations for the common case where the operands of a computation fit into 32 - bit and to only fall back to large arbitrary precision integers for the - remaining cases. This optimization has greatly improved the compile-time - performance of Polly, both due to faster native operations also due to a - reduction in malloc traffic and pointer indirections. As a result, computations - that use arbitrary precision integers heavily have been speed up by almost 6x. - As a result, the compile-time of Polly on the Polybench test kernels in the LNT - suite has been reduced by 20% on average with compile time reductions between - 9-43%. - -* Schedule Trees - - Polly now uses internally so-called > Schedule Trees < to model the loop - structure it optimizes. Schedule trees are an easy to understand tree structure - that describes a loop nest using integer constraint sets to keep track of - execution constraints. It allows the developer to use per-tree-node operations - to modify the loop tree. Programatic analysis that work on the schedule tree - (e.g., as dependence analysis) also show a visible speedup as they can exploit - the tree structure of the schedule and need to fall back to ILP based - optimization problems less often. Section 6 of `Polyhedral AST generation is - more than scanning polyhedra - `_ gives a detailed - explanation of this schedule trees. - -* Scalar and PHI node modeling - Polly as an analysis - - Polly now requires almost no preprocessing to analyse LLVM-IR, which makes it - easier to use Polly as a pure analysis pass e.g. to provide more precise - dependence information to non-polyhedral transformation passes. Originally, - Polly required the input LLVM-IR to be preprocessed such that all scalar and - PHI-node dependences are translated to in-memory operations. Since this release, - Polly has full support for scalar and PHI node dependences and requires no - scalar-to-memory translation for such kind of dependences. - -* Modeling of modulo and non-affine conditions - - Polly can now supports modulo operations such as A[t%2][i][j] as they appear - often in stencil computations and also allows data-dependent conditional - branches as they result e.g. from ternary conditions ala A[i] > 255 ? 255 : - A[i]. - -* Delinearization - - Polly now support the analysis of manually linearized multi-dimensional arrays - as they result form macros such as - "#define 2DARRAY(A,i,j) (A.data[(i) * A.size + (j)]". Similar constructs appear - in old C code written before C99, C++ code such as boost::ublas, LLVM exported - from Julia, Matlab generated code and many others. Our work titled - `Optimistic Delinearization of Parametrically Sized Arrays - `_ gives details. - -* Compile time improvements - - Pratik Bahtu worked on compile-time performance tuning of Polly. His work - together with the support for schedule trees and the small integer optimization - in isl notably reduced the compile time. - -* Increased compute timeouts - - As Polly's compile time has been notabily improved, we were able to increase - the compile time saveguards in Polly. As a result, the default configuration - of Polly can now analyze larger loop nests without running into compile time - restrictions. - -* Export Debug Locations via JSCoP file - - Polly's JSCoP import/export format gained support for debug locations that show - to the user the source code location of detected scops. - -* Improved windows support - - The compilation of Polly on windows using cmake has been improved and several - visual studio build issues have been addressed. - -* Many bug fixes - -libunwind ---------- - -The unwind implementation which use to reside in `libc++abi` has been moved into -a separate repository. This implementation can still be used for `libc++abi` by -specifying `-DLIBCXXABI_USE_LLVM_UNWINDER=YES` and -`-DLIBCXXABI_LIBUNWIND_PATH=` when configuring -`libc++abi`, which defaults to `true` when building on ARM. - -The new repository can also be built standalone if just `libunwind` is desired. - -External Open Source Projects Using LLVM 3.7 +External Open Source Projects Using LLVM 3.8 ============================================ An exciting aspect of LLVM is that it is used as an enabling technology for a lot of other language and tools projects. This section lists some of the -projects that have already been updated to work with LLVM 3.7. +projects that have already been updated to work with LLVM 3.8. - -LDC - the LLVM-based D compiler -------------------------------- - -`D `_ is a language with C-like syntax and static typing. It -pragmatically combines efficiency, control, and modeling power, with safety and -programmer productivity. D supports powerful concepts like Compile-Time Function -Execution (CTFE) and Template Meta-Programming, provides an innovative approach -to concurrency and offers many classical paradigms. - -`LDC `_ uses the frontend from the reference compiler -combined with LLVM as backend to produce efficient native code. LDC targets -x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on -PowerPC (32/64 bit). Ports to other architectures like ARM, AArch64 and MIPS64 -are underway. - -Portable Computing Language (pocl) ----------------------------------- - -In addition to producing an easily portable open source OpenCL -implementation, another major goal of `pocl `_ -is improving performance portability of OpenCL programs with -compiler optimizations, reducing the need for target-dependent manual -optimizations. An important part of pocl is a set of LLVM passes used to -statically parallelize multiple work-items with the kernel compiler, even in -the presence of work-group barriers. - - -TTA-based Co-design Environment (TCE) -------------------------------------- - -`TCE `_ is a toolset for designing customized -exposed datapath processors based on the Transport triggered -architecture (TTA). - -The toolset provides a complete co-design flow from C/C++ -programs down to synthesizable VHDL/Verilog and parallel program binaries. -Processor customization points include the register files, function units, -supported operations, and the interconnection network. - -TCE uses Clang and LLVM for C/C++/OpenCL C language support, target independent -optimizations and also for parts of code generation. It generates -new LLVM-based code generators "on the fly" for the designed processors and -loads them in to the compiler backend as runtime libraries to avoid -per-target recompilation of larger parts of the compiler chain. - -BPF Compiler Collection (BCC) ------------------------------ -`BCC `_ is a Python + C framework for tracing and -networking that is using Clang rewriter + 2nd pass of Clang + BPF backend to -generate eBPF and push it into the kernel. - -LLVMSharp & ClangSharp ----------------------- - -`LLVMSharp `_ and -`ClangSharp `_ are type-safe C# bindings for -Microsoft.NET and Mono that Platform Invoke into the native libraries. -ClangSharp is self-hosted and is used to generated LLVMSharp using the -LLVM-C API. - -`LLVMSharp Kaleidoscope Tutorials `_ -are instructive examples of writing a compiler in C#, with certain improvements -like using the visitor pattern to generate LLVM IR. - -`ClangSharp PInvoke Generator `_ is the -self-hosting mechanism for LLVM/ClangSharp and is demonstrative of using -LibClang to generate Platform Invoke (PInvoke) signatures for C APIs. +* A project Additional Information @@ -500,3 +144,4 @@ going into the ``llvm/docs/`` directory in the LLVM tree. If you have any questions or comments about LLVM, please feel free to contact us via the `mailing lists `_. + diff --git a/docs/ReleaseProcess.rst b/docs/ReleaseProcess.rst index c4bbc91c63ce..d7f703126019 100644 --- a/docs/ReleaseProcess.rst +++ b/docs/ReleaseProcess.rst @@ -53,7 +53,7 @@ test-release.sh --------------- This script will check-out, configure and compile LLVM+Clang (+ most add-ons, like ``compiler-rt``, -``libcxx`` and ``clang-extra-tools``) in three stages, and will test the final stage. +``libcxx``, ``libomp`` and ``clang-extra-tools``) in three stages, and will test the final stage. It'll have installed the final binaries on the Phase3/Releasei(+Asserts) directory, and that's the one you should use for the test-suite and other external tests. diff --git a/docs/SourceLevelDebugging.rst b/docs/SourceLevelDebugging.rst index 99186f581881..270c44eb50ba 100644 --- a/docs/SourceLevelDebugging.rst +++ b/docs/SourceLevelDebugging.rst @@ -231,7 +231,7 @@ Compiled to LLVM, this function would be represented like this: .. code-block:: llvm ; Function Attrs: nounwind ssp uwtable - define void @foo() #0 { + define void @foo() #0 !dbg !4 { entry: %X = alloca i32, align 4 %Y = alloca i32, align 4 @@ -263,20 +263,20 @@ Compiled to LLVM, this function would be represented like this: !1 = !DIFile(filename: "/dev/stdin", directory: "/Users/dexonsmith/data/llvm/debug-info") !2 = !{} !3 = !{!4} - !4 = !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, function: void ()* @foo, variables: !2) + !4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: !2) !5 = !DISubroutineType(types: !6) !6 = !{null} !7 = !{i32 2, !"Dwarf Version", i32 2} !8 = !{i32 2, !"Debug Info Version", i32 3} !9 = !{i32 1, !"PIC Level", i32 2} !10 = !{!"clang version 3.7.0 (trunk 231150) (llvm/trunk 231154)"} - !11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "X", scope: !4, file: !1, line: 2, type: !12) + !11 = !DILocalVariable(name: "X", scope: !4, file: !1, line: 2, type: !12) !12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) !13 = !DIExpression() !14 = !DILocation(line: 2, column: 9, scope: !4) - !15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Y", scope: !4, file: !1, line: 3, type: !12) + !15 = !DILocalVariable(name: "Y", scope: !4, file: !1, line: 3, type: !12) !16 = !DILocation(line: 3, column: 9, scope: !4) - !17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Z", scope: !18, file: !1, line: 5, type: !12) + !17 = !DILocalVariable(name: "Z", scope: !18, file: !1, line: 5, type: !12) !18 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5) !19 = !DILocation(line: 5, column: 11, scope: !18) !20 = !DILocation(line: 6, column: 11, scope: !18) @@ -304,10 +304,9 @@ scope information for the variable ``X``. .. code-block:: llvm !14 = !DILocation(line: 2, column: 9, scope: !4) - !4 = !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, - isLocal: false, isDefinition: true, scopeLine: 1, - isOptimized: false, function: void ()* @foo, - variables: !2) + !4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, + isLocal: false, isDefinition: true, scopeLine: 1, + isOptimized: false, variables: !2) Here ``!14`` is metadata providing `location information `_. In this example, scope is encoded by ``!4``, a @@ -368,15 +367,14 @@ C/C++ source file information ``llvm::Instruction`` provides easy access to metadata attached with an instruction. One can extract line number information encoded in LLVM IR using -``Instruction::getMetadata()`` and ``DILocation::getLineNumber()``. +``Instruction::getDebugLoc()`` and ``DILocation::getLine()``. .. code-block:: c++ - if (MDNode *N = I->getMetadata("dbg")) { // Here I is an LLVM instruction - DILocation Loc(N); // DILocation is in DebugInfo.h - unsigned Line = Loc.getLineNumber(); - StringRef File = Loc.getFilename(); - StringRef Dir = Loc.getDirectory(); + if (DILocation *Loc = I->getDebugLoc()) { // Here I is an LLVM instruction + unsigned Line = Loc->getLine(); + StringRef File = Loc->getFilename(); + StringRef Dir = Loc->getDirectory(); } C/C++ global variable information @@ -464,12 +462,12 @@ a C/C++ front-end would generate the following descriptors: !4 = !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, - function: i32 (i32, i8**)* @main, variables: !2) + variables: !2) ;; ;; Define the subprogram itself. ;; - define i32 @main(i32 %argc, i8** %argv) { + define i32 @main(i32 %argc, i8** %argv) !dbg !4 { ... } @@ -709,7 +707,7 @@ qualified name. Debugger users tend not to enter their search strings as "``a::b::c``". So the name entered in the name table must be demangled in order to chop it up appropriately and additional names must be manually entered into the table to make it effective as a name lookup table for debuggers to -se. +use. All debuggers currently ignore the "``.debug_pubnames``" table as a result of its inconsistent and useless public-only name content making it a waste of diff --git a/docs/StackMaps.rst b/docs/StackMaps.rst index dbdf78f992ca..5bdae38b699d 100644 --- a/docs/StackMaps.rst +++ b/docs/StackMaps.rst @@ -499,3 +499,13 @@ the same requirement imposed by the llvm.gcroot intrinsic.) LLVM transformations must not substitute the alloca with any intervening value. This can be verified by the runtime simply by checking that the stack map's location is a Direct location type. + + +Supported Architectures +======================= + +Support for StackMap generation and the related intrinsics requires +some code for each backend. Today, only a subset of LLVM's backends +are supported. The currently supported architectures are X86_64, +PowerPC, and Aarch64. + diff --git a/docs/Statepoints.rst b/docs/Statepoints.rst index eb5866eb552f..442b1c269c47 100644 --- a/docs/Statepoints.rst +++ b/docs/Statepoints.rst @@ -53,7 +53,7 @@ load barriers, store barriers, and safepoints. loads, merely loads of a particular type (in the original source language), or none at all. -#. Analogously, a store barrier is a code fragement that runs +#. Analogously, a store barrier is a code fragment that runs immediately before the machine store instruction, but after the computation of the value stored. The most common use of a store barrier is to update a 'card table' in a generational garbage @@ -142,8 +142,8 @@ resulting relocation sequence is: define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) gc "statepoint-example" { - %0 = call i32 (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj) - %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %0, i32 7, i32 7) + %0 = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj) + %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %0, i32 7, i32 7) ret i8 addrspace(1)* %obj.relocated } @@ -160,7 +160,7 @@ of the call, we use the ``gc.result`` intrinsic. To get the relocation of each pointer in turn, we use the ``gc.relocate`` intrinsic with the appropriate index. Note that both the ``gc.relocate`` and ``gc.result`` are tied to the statepoint. The combination forms a "statepoint relocation -sequence" and represents the entitety of a parseable call or 'statepoint'. +sequence" and represents the entirety of a parseable call or 'statepoint'. When lowered, this example would generate the following x86 assembly: @@ -206,6 +206,52 @@ This example was taken from the tests for the :ref:`RewriteStatepointsForGC` uti opt -rewrite-statepoints-for-gc test/Transforms/RewriteStatepointsForGC/basics.ll -S | llc -debug-only=stackmaps +Base & Derived Pointers +^^^^^^^^^^^^^^^^^^^^^^^ + +A "base pointer" is one which points to the starting address of an allocation +(object). A "derived pointer" is one which is offset from a base pointer by +some amount. When relocating objects, a garbage collector needs to be able +to relocate each derived pointer associated with an allocation to the same +offset from the new address. + +"Interior derived pointers" remain within the bounds of the allocation +they're associated with. As a result, the base object can be found at +runtime provided the bounds of allocations are known to the runtime system. + +"Exterior derived pointers" are outside the bounds of the associated object; +they may even fall within *another* allocations address range. As a result, +there is no way for a garbage collector to determine which allocation they +are associated with at runtime and compiler support is needed. + +The ``gc.relocate`` intrinsic supports an explicit operand for describing the +allocation associated with a derived pointer. This operand is frequently +referred to as the base operand, but does not strictly speaking have to be +a base pointer, but it does need to lie within the bounds of the associated +allocation. Some collectors may require that the operand be an actual base +pointer rather than merely an internal derived pointer. Note that during +lowering both the base and derived pointer operands are required to be live +over the associated call safepoint even if the base is otherwise unused +afterwards. + +If we extend our previous example to include a pointless derived pointer, +we get: + +.. code-block:: llvm + + define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) + gc "statepoint-example" { + %gep = getelementptr i8, i8 addrspace(1)* %obj, i64 20000 + %token = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj, i8 addrspace(1)* %gep) + %obj.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token, i32 7, i32 7) + %gep.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token, i32 7, i32 8) + %p = getelementptr i8, i8 addrspace(1)* %gep, i64 -20000 + ret i8 addrspace(1)* %p + } + +Note that in this example %p and %obj.relocate are the same address and we +could replace one with the other, potentially removing the derived pointer +from the live set at the safepoint entirely. GC Transitions ^^^^^^^^^^^^^^^^^^ @@ -225,7 +271,7 @@ statepoint. transitions based on the function symbols involved (e.g. a call from a function with GC strategy "foo" to a function with GC strategy "bar"), indirect calls that are also GC transitions must also be supported. This - requirement is the driving force behing the decision to require that GC + requirement is the driving force behind the decision to require that GC transitions are explicitly marked. Let's revisit the sample given above, this time treating the call to ``@foo`` @@ -242,8 +288,8 @@ to unmanaged code. The resulting relocation sequence is: define i8 addrspace(1)* @test1(i8 addrspace(1) *%obj) gc "hypothetical-gc" { - %0 = call i32 (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 1, i32* @Flag, i32 0, i8 addrspace(1)* %obj) - %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %0, i32 7, i32 7) + %0 = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 1, i32* @Flag, i32 0, i8 addrspace(1)* %obj) + %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %0, i32 7, i32 7) ret i8 addrspace(1)* %obj.relocated } @@ -296,7 +342,7 @@ Syntax: :: - declare i32 + declare token @llvm.experimental.gc.statepoint(i64 , i32 , func_type , i64 <#call args>, i64 , @@ -331,14 +377,16 @@ the user will patch over the 'num patch bytes' bytes of nops with a calling sequence specific to their runtime before executing the generated machine code. There are no guarantees with respect to the alignment of the nop sequence. Unlike :doc:`StackMaps` statepoints do -not have a concept of shadow bytes. +not have a concept of shadow bytes. Note that semantically the +statepoint still represents a call or invoke to 'target', and the nop +sequence after patching is expected to represent an operation +equivalent to a call or invoke to 'target'. The 'target' operand is the function actually being called. The target can be specified as either a symbolic LLVM function, or as an arbitrary Value of appropriate function type. Note that the function type must match the signature of the callee and the types of the 'call -parameters' arguments. If 'num patch bytes' is non-zero then 'target' -has to be the constant pointer null of the appropriate function type. +parameters' arguments. The '#call args' operand is the number of arguments to the actual call. It must exactly match the number of arguments passed in the @@ -408,7 +456,7 @@ Syntax: :: declare type* - @llvm.experimental.gc.result(i32 %statepoint_token) + @llvm.experimental.gc.result(token %statepoint_token) Overview: """"""""" @@ -424,7 +472,7 @@ Operands: The first and only argument is the ``gc.statepoint`` which starts the safepoint sequence of which this ``gc.result`` is a part. -Despite the typing of this as a generic i32, *only* the value defined +Despite the typing of this as a generic token, *only* the value defined by a ``gc.statepoint`` is legal here. Semantics: @@ -448,7 +496,7 @@ Syntax: :: declare - @llvm.experimental.gc.relocate(i32 %statepoint_token, + @llvm.experimental.gc.relocate(token %statepoint_token, i32 %base_offset, i32 %pointer_offset) @@ -463,13 +511,18 @@ Operands: The first argument is the ``gc.statepoint`` which starts the safepoint sequence of which this ``gc.relocation`` is a part. -Despite the typing of this as a generic i32, *only* the value defined +Despite the typing of this as a generic token, *only* the value defined by a ``gc.statepoint`` is legal here. The second argument is an index into the statepoints list of arguments -which specifies the base pointer for the pointer being relocated. +which specifies the allocation for the pointer being relocated. This index must land within the 'gc parameter' section of the -statepoint's argument list. +statepoint's argument list. The associated value must be within the +object with which the pointer being relocated is associated. The optimizer +is free to change *which* interior derived pointer is reported, provided that +it does not replace an actual base pointer with another interior derived +pointer. Collectors are allowed to rely on the base pointer operand +remaining an actual base pointer if so constructed. The third argument is an index into the statepoint's list of arguments which specify the (potentially) derived pointer being relocated. It @@ -590,7 +643,7 @@ As an example, given this code: define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) gc "statepoint-example" { - call i32 (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0) + call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0) ret i8 addrspace(1)* %obj } @@ -600,8 +653,8 @@ The pass would produce this IR: define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) gc "statepoint-example" { - %0 = call i32 (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj) - %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %0, i32 12, i32 12) + %0 = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj) + %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %0, i32 12, i32 12) ret i8 addrspace(1)* %obj.relocated } @@ -612,8 +665,18 @@ non references. Address space 1 is not globally reserved for this purpose. This pass can be used an utility function by a language frontend that doesn't want to manually reason about liveness, base pointers, or relocation when constructing IR. As currently implemented, RewriteStatepointsForGC must be -run after SSA construction (i.e. mem2ref). +run after SSA construction (i.e. mem2ref). +RewriteStatepointsForGC will ensure that appropriate base pointers are listed +for every relocation created. It will do so by duplicating code as needed to +propagate the base pointer associated with each pointer being relocated to +the appropriate safepoints. The implementation assumes that the following +IR constructs produce base pointers: loads from the heap, addresses of global +variables, function arguments, function return values. Constant pointers (such +as null) are also assumed to be base pointers. In practice, this constraint +can be relaxed to producing interior derived pointers provided the target +collector can find the associated allocation from an arbitrary interior +derived pointer. In practice, RewriteStatepointsForGC can be run much later in the pass pipeline, after most optimization is already done. This helps to improve @@ -654,8 +717,8 @@ This pass would produce the following IR: .. code-block:: llvm define void @test() gc "statepoint-example" { - %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) - %safepoint_token1 = call i32 (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0) + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) + %safepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0) ret void } @@ -699,6 +762,12 @@ deoptimization or introspection) at safepoints. In that case, ask on the llvm-dev mailing list for suggestions. +Supported Architectures +======================= + +Support for statepoint generation requires some code for each backend. +Today, only X86_64 is supported. + Bugs and Enhancements ===================== diff --git a/docs/TestingGuide.rst b/docs/TestingGuide.rst index adf5f3d4cfbd..134ddd88c87d 100644 --- a/docs/TestingGuide.rst +++ b/docs/TestingGuide.rst @@ -240,6 +240,10 @@ The recommended way to examine output to figure out if the test passes is using the :doc:`FileCheck tool `. *[The usage of grep in RUN lines is deprecated - please do not send or commit patches that use it.]* +Put related tests into a single file rather than having a separate file per +test. Check if there are files already covering your feature and consider +adding your code there instead of creating a new file. + Extra files ----------- diff --git a/docs/WritingAnLLVMPass.rst b/docs/WritingAnLLVMPass.rst index 1d5a52f21b3f..241066842b7b 100644 --- a/docs/WritingAnLLVMPass.rst +++ b/docs/WritingAnLLVMPass.rst @@ -47,14 +47,11 @@ source tree in the ``lib/Transforms/Hello`` directory. Setting up the build environment -------------------------------- -.. FIXME: Why does this recommend to build in-tree? - -First, configure and build LLVM. This needs to be done directly inside the -LLVM source tree rather than in a separate objects directory. Next, you need -to create a new directory somewhere in the LLVM source base. For this example, -we'll assume that you made ``lib/Transforms/Hello``. Finally, you must set up -a build script (``Makefile``) that will compile the source code for the new -pass. To do this, copy the following into ``Makefile``: +First, configure and build LLVM. Next, you need to create a new directory +somewhere in the LLVM source base. For this example, we'll assume that you +made ``lib/Transforms/Hello``. Finally, you must set up a build script +(``Makefile``) that will compile the source code for the new pass. To do this, +copy the following into ``Makefile``: .. code-block:: make @@ -206,9 +203,8 @@ As a whole, the ``.cpp`` file looks like: static RegisterPass X("hello", "Hello World Pass", false, false); Now that it's all together, compile the file with a simple "``gmake``" command -in the local directory and you should get a new file -"``Debug+Asserts/lib/Hello.so``" under the top level directory of the LLVM -source tree (not in the local directory). Note that everything in this file is +from the top level of your build directory and you should get a new file +"``Debug+Asserts/lib/Hello.so``". Note that everything in this file is contained in an anonymous namespace --- this reflects the fact that passes are self contained units that do not need external interfaces (although they can have them) to be useful. @@ -228,7 +224,7 @@ will work): .. code-block:: console - $ opt -load ../../../Debug+Asserts/lib/Hello.so -hello < hello.bc > /dev/null + $ opt -load ../../Debug+Asserts/lib/Hello.so -hello < hello.bc > /dev/null Hello: __main Hello: puts Hello: main @@ -245,7 +241,7 @@ To see what happened to the other string you registered, try running .. code-block:: console - $ opt -load ../../../Debug+Asserts/lib/Hello.so -help + $ opt -load ../../Debug+Asserts/lib/Hello.so -help OVERVIEW: llvm .bc -> .bc modular optimizer USAGE: opt [options] @@ -272,7 +268,7 @@ you queue up. For example: .. code-block:: console - $ opt -load ../../../Debug+Asserts/lib/Hello.so -hello -time-passes < hello.bc > /dev/null + $ opt -load ../../Debug+Asserts/lib/Hello.so -hello -time-passes < hello.bc > /dev/null Hello: __main Hello: puts Hello: main @@ -1092,7 +1088,7 @@ passes. Lets try it out with the gcse and licm passes: .. code-block:: console - $ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -licm --debug-pass=Structure < hello.bc > /dev/null + $ opt -load ../../Debug+Asserts/lib/Hello.so -gcse -licm --debug-pass=Structure < hello.bc > /dev/null Module Pass Manager Function Pass Manager Dominator Set Construction @@ -1129,7 +1125,7 @@ Lets see how this changes when we run the :ref:`Hello World .. code-block:: console - $ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null + $ opt -load ../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null Module Pass Manager Function Pass Manager Dominator Set Construction @@ -1170,7 +1166,7 @@ Now when we run our pass, we get this output: .. code-block:: console - $ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null + $ opt -load ../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null Pass Arguments: -gcse -hello -licm Module Pass Manager Function Pass Manager diff --git a/docs/_ocamldoc/style.css b/docs/_ocamldoc/style.css new file mode 100644 index 000000000000..00595d7f2f29 --- /dev/null +++ b/docs/_ocamldoc/style.css @@ -0,0 +1,97 @@ +/* A style for ocamldoc. Daniel C. Buenzli */ + +/* Reset a few things. */ +html,body,div,span,applet,object,iframe,h1,h2,h3,h4,h5,h6,p,blockquote,pre, +a,abbr,acronym,address,big,cite,code,del,dfn,em,font,img,ins,kbd,q,s,samp, +small,strike,strong,sub,sup,tt,var,b,u,i,center,dl,dt,dd,ol,ul,li,fieldset, +form,label,legend,table,caption,tbody,tfoot,thead,tr,th,td +{ margin: 0; padding: 0; border: 0 none; outline: 0; font-size: 100%; + font-weight: inherit; font-style:inherit; font-family:inherit; + line-height: inherit; vertical-align: baseline; text-align:inherit; + color:inherit; background: transparent; } + +table { border-collapse: collapse; border-spacing: 0; } + +/* Basic page layout */ + +body { font: normal 10pt/1.375em helvetica, arial, sans-serif; text-align:left; + margin: 1.375em 10%; min-width: 40ex; max-width: 72ex; + color: black; background: transparent /* url(line-height-22.gif) */; } + +b { font-weight: bold } +em { font-style: italic } + +tt, code, pre { font-family: WorkAroundWebKitAndMozilla, monospace; + font-size: 1em; } +pre code { font-size : inherit; } +.codepre { margin-bottom:1.375em /* after code example we introduce space. */ } + +.superscript,.subscript +{ font-size : 0.813em; line-height:0; margin-left:0.4ex;} +.superscript { vertical-align: super; } +.subscript { vertical-align: sub; } + +/* ocamldoc markup workaround hacks */ + + + +hr, hr + br, div + br, center + br, span + br, ul + br, ol + br, pre + br +{ display: none } /* annoying */ + +div.info + br { display:block} + +.codepre br + br { display: none } +h1 + pre { margin-bottom:1.375em} /* Toplevel module description */ + +/* Sections and document divisions */ + +/* .navbar { margin-bottom: -1.375em } */ +h1 { font-weight: bold; font-size: 1.5em; /* margin-top:1.833em; */ + margin-top:0.917em; padding-top:0.875em; + border-top-style:solid; border-width:1px; border-color:#AAA; } +h2 { font-weight: bold; font-size: 1.313em; margin-top: 1.048em } +h3 { font-weight: bold; font-size: 1.125em; margin-top: 1.222em } +h3 { font-weight: bold; font-size: 1em; margin-top: 1.375em} +h4 { font-style: italic; } + +/* Used by OCaml's own library documentation. */ + h6 { font-weight: bold; font-size: 1.125em; margin-top: 1.222em } + .h7 { font-weight: bold; font-size: 1em; margin-top: 1.375em } + +p { margin-top: 1.375em } +pre { margin-top: 1.375em } +.info { margin: 0.458em 0em -0.458em 2em;}/* Description of types values etc. */ +td .info { margin:0; padding:0; margin-left: 2em;} /* Description in indexes */ + +ul, ol { margin-top:0.688em; padding-bottom:0.687em; + list-style-position:outside} +ul + p, ol + p { margin-top: 0em } +ul { list-style-type: square } + + +/* h2 + ul, h3 + ul, p + ul { } */ +ul > li { margin-left: 1.375em; } +ol > li { margin-left: 1.7em; } +/* Links */ + +a, a:link, a:visited, a:active, a:hover { color : #00B; text-decoration: none } +a:hover { text-decoration : underline } +*:target {background-color: #FFFF99;} /* anchor highlight */ + +/* Code */ + +.keyword { font-weight: bold; } +.comment { color : red } +.constructor { color : green } +.string { color : brown } +.warning { color : red ; font-weight : bold } + +/* Functors */ + +.paramstable { border-style : hidden ; padding-bottom:1.375em} +.paramstable code { margin-left: 1ex; margin-right: 1ex } +.sig_block {margin-left: 1em} + +/* Images */ + +img { margin-top: 1.375em } diff --git a/docs/conf.py b/docs/conf.py index 27919c20a7a5..6e3f16ceef1a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -48,9 +48,9 @@ copyright = u'2003-%d, LLVM Project' % date.today().year # built documents. # # The short X.Y version. -version = '3.7' +version = '3.8' # The full version, including alpha/beta/rc tags. -release = '3.7' +release = '3.8' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/doxygen.cfg.in b/docs/doxygen.cfg.in index 5c70db0332d5..5a74cecc8aac 100644 --- a/docs/doxygen.cfg.in +++ b/docs/doxygen.cfg.in @@ -1409,7 +1409,7 @@ FORMULA_TRANSPARENT = YES # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. -USE_MATHJAX = NO +USE_MATHJAX = YES # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: diff --git a/docs/index.rst b/docs/index.rst index 66c55758c4db..a69ecfedc580 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,6 +1,11 @@ Overview ======== +.. warning:: + + If you are using a released version of LLVM, see `the download page + `_ to find your documentation. + The LLVM compiler infrastructure supports a wide range of projects, from industrial strength compilers to specialized JIT applications to small research projects. @@ -81,6 +86,7 @@ representation. GetElementPtr Frontend/PerformanceTips MCJITDesignAndImplementation + CompileCudaWithLLVM :doc:`GettingStarted` Discusses how to get up and running quickly with the LLVM infrastructure. @@ -256,6 +262,7 @@ For API clients and LLVM developers. MergeFunctions BitSets FaultMaps + MIRLangRef :doc:`WritingAnLLVMPass` Information on how to write LLVM transformations and analyses. @@ -268,6 +275,10 @@ For API clients and LLVM developers. working on retargetting LLVM to a new architecture, designing a new codegen pass, or enhancing existing components. +:doc:`Machine IR (MIR) Format Reference Manual ` + A reference manual for the MIR serialization format, which is used to test + LLVM's code generation passes. + :doc:`TableGen ` Describes the TableGen tool, which is used heavily by the LLVM code generator. @@ -361,6 +372,9 @@ For API clients and LLVM developers. :doc:`FaultMaps` LLVM support for folding control flow into faulting machine instructions. +:doc:`CompileCudaWithLLVM` + LLVM support for CUDA. + Development Process Documentation ================================= diff --git a/docs/tutorial/LangImpl1.rst b/docs/tutorial/LangImpl1.rst index f4b019166af3..b04cde10274e 100644 --- a/docs/tutorial/LangImpl1.rst +++ b/docs/tutorial/LangImpl1.rst @@ -25,7 +25,7 @@ It is useful to point out ahead of time that this tutorial is really about teaching compiler techniques and LLVM specifically, *not* about teaching modern and sane software engineering principles. In practice, this means that we'll take a number of shortcuts to simplify the -exposition. For example, the code leaks memory, uses global variables +exposition. For example, the code uses global variables all over the place, doesn't use nice design patterns like `visitors `_, etc... but it is very simple. If you dig in and use the code as a basis for future @@ -146,7 +146,7 @@ useful for mutually recursive functions). For example: A more interesting example is included in Chapter 6 where we write a little Kaleidoscope application that `displays a Mandelbrot -Set `_ at various levels of magnification. +Set `_ at various levels of magnification. Lets dive into the implementation of this language! @@ -169,14 +169,16 @@ numeric value of a number). First, we define the possibilities: tok_eof = -1, // commands - tok_def = -2, tok_extern = -3, + tok_def = -2, + tok_extern = -3, // primary - tok_identifier = -4, tok_number = -5, + tok_identifier = -4, + tok_number = -5, }; - static std::string IdentifierStr; // Filled in if tok_identifier - static double NumVal; // Filled in if tok_number + static std::string IdentifierStr; // Filled in if tok_identifier + static double NumVal; // Filled in if tok_number Each token returned by our lexer will either be one of the Token enum values or it will be an 'unknown' character like '+', which is returned @@ -217,8 +219,10 @@ loop: while (isalnum((LastChar = getchar()))) IdentifierStr += LastChar; - if (IdentifierStr == "def") return tok_def; - if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "def") + return tok_def; + if (IdentifierStr == "extern") + return tok_extern; return tok_identifier; } @@ -250,7 +254,8 @@ extend it :). Next we handle comments: if (LastChar == '#') { // Comment until end of line. - do LastChar = getchar(); + do + LastChar = getchar(); while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); if (LastChar != EOF) @@ -275,7 +280,7 @@ file. These are handled with this code: } With this, we have the complete lexer for the basic Kaleidoscope -language (the `full code listing `_ for the Lexer +language (the `full code listing `_ for the Lexer is available in the `next chapter `_ of the tutorial). Next we'll `build a simple parser that uses this to build an Abstract Syntax Tree `_. When we have that, we'll include a diff --git a/docs/tutorial/LangImpl2.rst b/docs/tutorial/LangImpl2.rst index 06b18ff6c239..dab60172b988 100644 --- a/docs/tutorial/LangImpl2.rst +++ b/docs/tutorial/LangImpl2.rst @@ -44,8 +44,9 @@ We'll start with expressions first: /// NumberExprAST - Expression class for numeric literals like "1.0". class NumberExprAST : public ExprAST { double Val; + public: - NumberExprAST(double val) : Val(val) {} + NumberExprAST(double Val) : Val(Val) {} }; The code above shows the definition of the base ExprAST class and one @@ -65,26 +66,31 @@ language: /// VariableExprAST - Expression class for referencing a variable, like "a". class VariableExprAST : public ExprAST { std::string Name; + public: - VariableExprAST(const std::string &name) : Name(name) {} + VariableExprAST(const std::string &Name) : Name(Name) {} }; /// BinaryExprAST - Expression class for a binary operator. class BinaryExprAST : public ExprAST { char Op; - ExprAST *LHS, *RHS; + std::unique_ptr LHS, RHS; + public: - BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) - : Op(op), LHS(lhs), RHS(rhs) {} + BinaryExprAST(char op, std::unique_ptr LHS, + std::unique_ptr RHS) + : Op(op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} }; /// CallExprAST - Expression class for function calls. class CallExprAST : public ExprAST { std::string Callee; - std::vector Args; + std::vector> Args; + public: - CallExprAST(const std::string &callee, std::vector &args) - : Callee(callee), Args(args) {} + CallExprAST(const std::string &Callee, + std::vector> Args) + : Callee(Callee), Args(std::move(Args)) {} }; This is all (intentionally) rather straight-forward: variables capture @@ -109,18 +115,21 @@ way to talk about functions themselves: class PrototypeAST { std::string Name; std::vector Args; + public: - PrototypeAST(const std::string &name, const std::vector &args) - : Name(name), Args(args) {} + PrototypeAST(const std::string &name, std::vector Args) + : Name(name), Args(std::move(Args)) {} }; /// FunctionAST - This class represents a function definition itself. class FunctionAST { - PrototypeAST *Proto; - ExprAST *Body; + std::unique_ptr Proto; + std::unique_ptr Body; + public: - FunctionAST(PrototypeAST *proto, ExprAST *body) - : Proto(proto), Body(body) {} + FunctionAST(std::unique_ptr Proto, + std::unique_ptr Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} }; In Kaleidoscope, functions are typed with just a count of their @@ -142,9 +151,10 @@ be generated with calls like this: .. code-block:: c++ - ExprAST *X = new VariableExprAST("x"); - ExprAST *Y = new VariableExprAST("y"); - ExprAST *Result = new BinaryExprAST('+', X, Y); + auto LHS = llvm::make_unique("x"); + auto RHS = llvm::make_unique("y"); + auto Result = std::make_unique('+', std::move(LHS), + std::move(RHS)); In order to do this, we'll start by defining some basic helper routines: @@ -167,9 +177,14 @@ be parsed. /// Error* - These are little helper functions for error handling. - ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} - PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } - FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + std::unique_ptr Error(const char *Str) { + fprintf(stderr, "Error: %s\n", Str); + return nullptr; + } + std::unique_ptr ErrorP(const char *Str) { + Error(Str); + return nullptr; + } The ``Error`` routines are simple helper routines that our parser will use to handle errors. The error recovery in our parser will not be the @@ -190,10 +205,10 @@ which parses that production. For numeric literals, we have: .. code-block:: c++ /// numberexpr ::= number - static ExprAST *ParseNumberExpr() { - ExprAST *Result = new NumberExprAST(NumVal); + static std::unique_ptr ParseNumberExpr() { + auto Result = llvm::make_unique(NumVal); getNextToken(); // consume the number - return Result; + return std::move(Result); } This routine is very simple: it expects to be called when the current @@ -211,14 +226,15 @@ the parenthesis operator is defined like this: .. code-block:: c++ /// parenexpr ::= '(' expression ')' - static ExprAST *ParseParenExpr() { - getNextToken(); // eat (. - ExprAST *V = ParseExpression(); - if (!V) return 0; + static std::unique_ptr ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; if (CurTok != ')') return Error("expected ')'"); - getNextToken(); // eat ). + getNextToken(); // eat ). return V; } @@ -250,24 +266,26 @@ function calls: /// identifierexpr /// ::= identifier /// ::= identifier '(' expression* ')' - static ExprAST *ParseIdentifierExpr() { + static std::unique_ptr ParseIdentifierExpr() { std::string IdName = IdentifierStr; getNextToken(); // eat identifier. if (CurTok != '(') // Simple variable ref. - return new VariableExprAST(IdName); + return llvm::make_unique(IdName); // Call. getNextToken(); // eat ( - std::vector Args; + std::vector> Args; if (CurTok != ')') { while (1) { - ExprAST *Arg = ParseExpression(); - if (!Arg) return 0; - Args.push_back(Arg); + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; - if (CurTok == ')') break; + if (CurTok == ')') + break; if (CurTok != ',') return Error("Expected ')' or ',' in argument list"); @@ -278,7 +296,7 @@ function calls: // Eat the ')'. getNextToken(); - return new CallExprAST(IdName, Args); + return llvm::make_unique(IdName, std::move(Args)); } This routine follows the same style as the other routines. (It expects @@ -294,7 +312,7 @@ Now that we have all of our simple expression-parsing logic in place, we can define a helper function to wrap it together into one entry point. We call this class of expressions "primary" expressions, for reasons that will become more clear `later in the -tutorial `_. In order to parse an arbitrary +tutorial `_. In order to parse an arbitrary primary expression, we need to determine what sort of expression it is: .. code-block:: c++ @@ -303,12 +321,16 @@ primary expression, we need to determine what sort of expression it is: /// ::= identifierexpr /// ::= numberexpr /// ::= parenexpr - static ExprAST *ParsePrimary() { + static std::unique_ptr ParsePrimary() { switch (CurTok) { - default: return Error("unknown token when expecting an expression"); - case tok_identifier: return ParseIdentifierExpr(); - case tok_number: return ParseNumberExpr(); - case '(': return ParseParenExpr(); + default: + return Error("unknown token when expecting an expression"); + case tok_identifier: + return ParseIdentifierExpr(); + case tok_number: + return ParseNumberExpr(); + case '(': + return ParseParenExpr(); } } @@ -374,7 +396,7 @@ would be easy enough to eliminate the map and do the comparisons in the With the helper above defined, we can now start parsing binary expressions. The basic idea of operator precedence parsing is to break down an expression with potentially ambiguous binary operators into -pieces. Consider ,for example, the expression "a+b+(c+d)\*e\*f+g". +pieces. Consider, for example, the expression "a+b+(c+d)\*e\*f+g". Operator precedence parsing considers this as a stream of primary expressions separated by binary operators. As such, it will first parse the leading primary expression "a", then it will see the pairs [+, b] @@ -390,11 +412,12 @@ a sequence of [binop,primaryexpr] pairs: /// expression /// ::= primary binoprhs /// - static ExprAST *ParseExpression() { - ExprAST *LHS = ParsePrimary(); - if (!LHS) return 0; + static std::unique_ptr ParseExpression() { + auto LHS = ParsePrimary(); + if (!LHS) + return nullptr; - return ParseBinOpRHS(0, LHS); + return ParseBinOpRHS(0, std::move(LHS)); } ``ParseBinOpRHS`` is the function that parses the sequence of pairs for @@ -416,7 +439,8 @@ starts with: /// binoprhs /// ::= ('+' primary)* - static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + static std::unique_ptr ParseBinOpRHS(int ExprPrec, + std::unique_ptr LHS) { // If this is a binop, find its precedence. while (1) { int TokPrec = GetTokPrecedence(); @@ -440,8 +464,9 @@ expression: getNextToken(); // eat binop // Parse the primary expression after the binary operator. - ExprAST *RHS = ParsePrimary(); - if (!RHS) return 0; + auto RHS = ParsePrimary(); + if (!RHS) + return nullptr; As such, this code eats (and remembers) the binary operator and then parses the primary expression that follows. This builds up the whole @@ -474,7 +499,8 @@ then continue parsing: } // Merge LHS/RHS. - LHS = new BinaryExprAST(BinOp, LHS, RHS); + LHS = llvm::make_unique(BinOp, std::move(LHS), + std::move(RHS)); } // loop around to the top of the while loop. } @@ -498,11 +524,13 @@ above two blocks duplicated for context): // the pending operator take RHS as its LHS. int NextPrec = GetTokPrecedence(); if (TokPrec < NextPrec) { - RHS = ParseBinOpRHS(TokPrec+1, RHS); - if (RHS == 0) return 0; + RHS = ParseBinOpRHS(TokPrec+1, std::move(RHS)); + if (!RHS) + return nullptr; } // Merge LHS/RHS. - LHS = new BinaryExprAST(BinOp, LHS, RHS); + LHS = llvm::make_unique(BinOp, std::move(LHS), + std::move(RHS)); } // loop around to the top of the while loop. } @@ -541,7 +569,7 @@ expressions): /// prototype /// ::= id '(' id* ')' - static PrototypeAST *ParsePrototype() { + static std::unique_ptr ParsePrototype() { if (CurTok != tok_identifier) return ErrorP("Expected function name in prototype"); @@ -561,7 +589,7 @@ expressions): // success. getNextToken(); // eat ')'. - return new PrototypeAST(FnName, ArgNames); + return llvm::make_unique(FnName, std::move(ArgNames)); } Given this, a function definition is very simple, just a prototype plus @@ -570,14 +598,14 @@ an expression to implement the body: .. code-block:: c++ /// definition ::= 'def' prototype expression - static FunctionAST *ParseDefinition() { + static std::unique_ptr ParseDefinition() { getNextToken(); // eat def. - PrototypeAST *Proto = ParsePrototype(); - if (Proto == 0) return 0; + auto Proto = ParsePrototype(); + if (!Proto) return nullptr; - if (ExprAST *E = ParseExpression()) - return new FunctionAST(Proto, E); - return 0; + if (auto E = ParseExpression()) + return llvm::make_unique(std::move(Proto), std::move(E)); + return nullptr; } In addition, we support 'extern' to declare functions like 'sin' and @@ -587,7 +615,7 @@ In addition, we support 'extern' to declare functions like 'sin' and .. code-block:: c++ /// external ::= 'extern' prototype - static PrototypeAST *ParseExtern() { + static std::unique_ptr ParseExtern() { getNextToken(); // eat extern. return ParsePrototype(); } @@ -599,13 +627,13 @@ nullary (zero argument) functions for them: .. code-block:: c++ /// toplevelexpr ::= expression - static FunctionAST *ParseTopLevelExpr() { - if (ExprAST *E = ParseExpression()) { + static std::unique_ptr ParseTopLevelExpr() { + if (auto E = ParseExpression()) { // Make an anonymous proto. - PrototypeAST *Proto = new PrototypeAST("", std::vector()); - return new FunctionAST(Proto, E); + auto Proto = llvm::make_unique("", std::vector()); + return llvm::make_unique(std::move(Proto), std::move(E)); } - return 0; + return nullptr; } Now that we have all the pieces, let's build a little driver that will @@ -616,7 +644,7 @@ The Driver The driver for this simply invokes all of the parsing pieces with a top-level dispatch loop. There isn't much interesting here, so I'll just -include the top-level loop. See `below <#code>`_ for full code in the +include the top-level loop. See `below <#full-code-listing>`_ for full code in the "Top-Level Parsing" section. .. code-block:: c++ @@ -626,11 +654,20 @@ include the top-level loop. See `below <#code>`_ for full code in the while (1) { fprintf(stderr, "ready> "); switch (CurTok) { - case tok_eof: return; - case ';': getNextToken(); break; // ignore top-level semicolons. - case tok_def: HandleDefinition(); break; - case tok_extern: HandleExtern(); break; - default: HandleTopLevelExpression(); break; + case tok_eof: + return; + case ';': // ignore top-level semicolons. + getNextToken(); + break; + case tok_def: + HandleDefinition(); + break; + case tok_extern: + HandleExtern(); + break; + default: + HandleTopLevelExpression(); + break; } } } diff --git a/docs/tutorial/LangImpl3.rst b/docs/tutorial/LangImpl3.rst index 26ba4aae956c..83ad35f14aee 100644 --- a/docs/tutorial/LangImpl3.rst +++ b/docs/tutorial/LangImpl3.rst @@ -15,8 +15,8 @@ LLVM IR. This will teach you a little bit about how LLVM does things, as well as demonstrate how easy it is to use. It's much more work to build a lexer and parser than it is to generate LLVM IR code. :) -**Please note**: the code in this chapter and later require LLVM 2.2 or -later. LLVM 2.1 and before will not work with it. Also note that you +**Please note**: the code in this chapter and later require LLVM 3.7 or +later. LLVM 3.6 and before will not work with it. Also note that you need to use a version of this tutorial that matches your LLVM release: If you are using an official LLVM release, use the version of the documentation included with your release or on the `llvm.org releases @@ -35,19 +35,20 @@ class: class ExprAST { public: virtual ~ExprAST() {} - virtual Value *Codegen() = 0; + virtual Value *codegen() = 0; }; /// NumberExprAST - Expression class for numeric literals like "1.0". class NumberExprAST : public ExprAST { double Val; + public: - NumberExprAST(double val) : Val(val) {} - virtual Value *Codegen(); + NumberExprAST(double Val) : Val(Val) {} + virtual Value *codegen(); }; ... -The Codegen() method says to emit IR for that AST node along with all +The codegen() method says to emit IR for that AST node along with all the things it depends on, and they all return an LLVM Value object. "Value" is the class used to represent a "`Static Single Assignment (SSA) `_ @@ -72,16 +73,20 @@ parser, which will be used to report errors found during code generation .. code-block:: c++ - Value *ErrorV(const char *Str) { Error(Str); return 0; } - - static Module *TheModule; + static std::unique_ptr *TheModule; static IRBuilder<> Builder(getGlobalContext()); static std::map NamedValues; + Value *ErrorV(const char *Str) { + Error(Str); + return nullptr; + } + The static variables will be used during code generation. ``TheModule`` -is the LLVM construct that contains all of the functions and global -variables in a chunk of code. In many ways, it is the top-level -structure that the LLVM IR uses to contain code. +is an LLVM construct that contains functions and global variables. In many +ways, it is the top-level structure that the LLVM IR uses to contain code. +It will own the memory for all of the IR that we generate, which is why +the codegen() method returns a raw Value\*, rather than a unique_ptr. The ``Builder`` object is a helper object that makes it easy to generate LLVM instructions. Instances of the @@ -110,7 +115,7 @@ First we'll do numeric literals: .. code-block:: c++ - Value *NumberExprAST::Codegen() { + Value *NumberExprAST::codegen() { return ConstantFP::get(getGlobalContext(), APFloat(Val)); } @@ -124,10 +129,12 @@ are all uniqued together and shared. For this reason, the API uses the .. code-block:: c++ - Value *VariableExprAST::Codegen() { + Value *VariableExprAST::codegen() { // Look this variable up in the function. Value *V = NamedValues[Name]; - return V ? V : ErrorV("Unknown variable name"); + if (!V) + ErrorV("Unknown variable name"); + return V; } References to variables are also quite simple using LLVM. In the simple @@ -137,26 +144,31 @@ values that can be in the ``NamedValues`` map are function arguments. This code simply checks to see that the specified name is in the map (if not, an unknown variable is being referenced) and returns the value for it. In future chapters, we'll add support for `loop induction -variables `_ in the symbol table, and for `local -variables `_. +variables `_ in the symbol table, and for `local +variables `_. .. code-block:: c++ - Value *BinaryExprAST::Codegen() { - Value *L = LHS->Codegen(); - Value *R = RHS->Codegen(); - if (L == 0 || R == 0) return 0; + Value *BinaryExprAST::codegen() { + Value *L = LHS->codegen(); + Value *R = RHS->codegen(); + if (!L || !R) + return nullptr; switch (Op) { - case '+': return Builder.CreateFAdd(L, R, "addtmp"); - case '-': return Builder.CreateFSub(L, R, "subtmp"); - case '*': return Builder.CreateFMul(L, R, "multmp"); + case '+': + return Builder.CreateFAdd(L, R, "addtmp"); + case '-': + return Builder.CreateFSub(L, R, "subtmp"); + case '*': + return Builder.CreateFMul(L, R, "multmp"); case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), "booltmp"); - default: return ErrorV("invalid binary operator"); + default: + return ErrorV("invalid binary operator"); } } @@ -178,55 +190,55 @@ automatically provide each one with an increasing, unique numeric suffix. Local value names for instructions are purely optional, but it makes it much easier to read the IR dumps. -`LLVM instructions <../LangRef.html#instref>`_ are constrained by strict +`LLVM instructions <../LangRef.html#instruction-reference>`_ are constrained by strict rules: for example, the Left and Right operators of an `add -instruction <../LangRef.html#i_add>`_ must have the same type, and the +instruction <../LangRef.html#add-instruction>`_ must have the same type, and the result type of the add must match the operand types. Because all values in Kaleidoscope are doubles, this makes for very simple code for add, sub and mul. On the other hand, LLVM specifies that the `fcmp -instruction <../LangRef.html#i_fcmp>`_ always returns an 'i1' value (a +instruction <../LangRef.html#fcmp-instruction>`_ always returns an 'i1' value (a one bit integer). The problem with this is that Kaleidoscope wants the value to be a 0.0 or 1.0 value. In order to get these semantics, we combine the fcmp instruction with a `uitofp -instruction <../LangRef.html#i_uitofp>`_. This instruction converts its +instruction <../LangRef.html#uitofp-to-instruction>`_. This instruction converts its input integer into a floating point value by treating the input as an unsigned value. In contrast, if we used the `sitofp -instruction <../LangRef.html#i_sitofp>`_, the Kaleidoscope '<' operator +instruction <../LangRef.html#sitofp-to-instruction>`_, the Kaleidoscope '<' operator would return 0.0 and -1.0, depending on the input value. .. code-block:: c++ - Value *CallExprAST::Codegen() { + Value *CallExprAST::codegen() { // Look up the name in the global module table. Function *CalleeF = TheModule->getFunction(Callee); - if (CalleeF == 0) + if (!CalleeF) return ErrorV("Unknown function referenced"); // If argument mismatch error. if (CalleeF->arg_size() != Args.size()) return ErrorV("Incorrect # arguments passed"); - std::vector ArgsV; + std::vector ArgsV; for (unsigned i = 0, e = Args.size(); i != e; ++i) { - ArgsV.push_back(Args[i]->Codegen()); - if (ArgsV.back() == 0) return 0; + ArgsV.push_back(Args[i]->codegen()); + if (!ArgsV.back()) + return nullptr; } return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); } -Code generation for function calls is quite straightforward with LLVM. -The code above initially does a function name lookup in the LLVM -Module's symbol table. Recall that the LLVM Module is the container that -holds all of the functions we are JIT'ing. By giving each function the -same name as what the user specifies, we can use the LLVM symbol table -to resolve function names for us. +Code generation for function calls is quite straightforward with LLVM. The code +above initially does a function name lookup in the LLVM Module's symbol table. +Recall that the LLVM Module is the container that holds the functions we are +JIT'ing. By giving each function the same name as what the user specifies, we +can use the LLVM symbol table to resolve function names for us. Once we have the function to call, we recursively codegen each argument that is to be passed in, and create an LLVM `call -instruction <../LangRef.html#i_call>`_. Note that LLVM uses the native C +instruction <../LangRef.html#call-instruction>`_. Note that LLVM uses the native C calling conventions by default, allowing these calls to also call into standard library functions like "sin" and "cos", with no additional effort. @@ -249,14 +261,15 @@ with: .. code-block:: c++ - Function *PrototypeAST::Codegen() { + Function *PrototypeAST::codegen() { // Make the function type: double(double,double) etc. std::vector Doubles(Args.size(), Type::getDoubleTy(getGlobalContext())); - FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), - Doubles, false); + FunctionType *FT = + FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false); - Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + Function *F = + Function::Create(FT, Function::ExternalLinkage, Name, TheModule); This code packs a lot of power into a few lines. Note first that this function returns a "Function\*" instead of a "Value\*". Because a @@ -273,118 +286,67 @@ double as a result, and that is not vararg (the false parameter indicates this). Note that Types in LLVM are uniqued just like Constants are, so you don't "new" a type, you "get" it. -The final line above actually creates the function that the prototype -will correspond to. This indicates the type, linkage and name to use, as +The final line above actually creates the IR Function corresponding to +the Prototype. This indicates the type, linkage and name to use, as well as which module to insert into. "`external linkage <../LangRef.html#linkage>`_" means that the function may be defined outside the current module and/or that it is callable by functions outside the module. The Name passed in is the name the user specified: since "``TheModule``" is specified, this name is registered -in "``TheModule``"s symbol table, which is used by the function call -code above. +in "``TheModule``"s symbol table. .. code-block:: c++ - // If F conflicted, there was already something named 'Name'. If it has a - // body, don't allow redefinition or reextern. - if (F->getName() != Name) { - // Delete the one we just made and get the existing one. - F->eraseFromParent(); - F = TheModule->getFunction(Name); + // Set names for all arguments. + unsigned Idx = 0; + for (auto &Arg : F->args()) + Arg.setName(Args[Idx++]); -The Module symbol table works just like the Function symbol table when -it comes to name conflicts: if a new function is created with a name -that was previously added to the symbol table, the new function will get -implicitly renamed when added to the Module. The code above exploits -this fact to determine if there was a previous definition of this -function. + return F; -In Kaleidoscope, I choose to allow redefinitions of functions in two -cases: first, we want to allow 'extern'ing a function more than once, as -long as the prototypes for the externs match (since all arguments have -the same type, we just have to check that the number of arguments -match). Second, we want to allow 'extern'ing a function and then -defining a body for it. This is useful when defining mutually recursive -functions. +Finally, we set the name of each of the function's arguments according to the +names given in the Prototype. This step isn't strictly necessary, but keeping +the names consistent makes the IR more readable, and allows subsequent code to +refer directly to the arguments for their names, rather than having to look up +them up in the Prototype AST. -In order to implement this, the code above first checks to see if there -is a collision on the name of the function. If so, it deletes the -function we just created (by calling ``eraseFromParent``) and then -calling ``getFunction`` to get the existing function with the specified -name. Note that many APIs in LLVM have "erase" forms and "remove" forms. -The "remove" form unlinks the object from its parent (e.g. a Function -from a Module) and returns it. The "erase" form unlinks the object and -then deletes it. +At this point we have a function prototype with no body. This is how LLVM IR +represents function declarations. For extern statements in Kaleidoscope, this +is as far as we need to go. For function definitions however, we need to +codegen and attach a function body. .. code-block:: c++ - // If F already has a body, reject this. - if (!F->empty()) { - ErrorF("redefinition of function"); - return 0; - } + Function *FunctionAST::codegen() { + // First, check for an existing function from a previous 'extern' declaration. + Function *TheFunction = TheModule->getFunction(Proto->getName()); - // If F took a different number of args, reject. - if (F->arg_size() != Args.size()) { - ErrorF("redefinition of function with different # args"); - return 0; - } - } + if (!TheFunction) + TheFunction = Proto->codegen(); -In order to verify the logic above, we first check to see if the -pre-existing function is "empty". In this case, empty means that it has -no basic blocks in it, which means it has no body. If it has no body, it -is a forward declaration. Since we don't allow anything after a full -definition of the function, the code rejects this case. If the previous -reference to a function was an 'extern', we simply verify that the -number of arguments for that definition and this one match up. If not, -we emit an error. + if (!TheFunction) + return nullptr; + + if (!TheFunction->empty()) + return (Function*)ErrorV("Function cannot be redefined."); + + +For function definitions, we start by searching TheModule's symbol table for an +existing version of this function, in case one has already been created using an +'extern' statement. If Module::getFunction returns null then no previous version +exists, so we'll codegen one from the Prototype. In either case, we want to +assert that the function is empty (i.e. has no body yet) before we start. .. code-block:: c++ - // Set names for all arguments. - unsigned Idx = 0; - for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); - ++AI, ++Idx) { - AI->setName(Args[Idx]); + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); - // Add arguments to variable symbol table. - NamedValues[Args[Idx]] = AI; - } - return F; - } - -The last bit of code for prototypes loops over all of the arguments in -the function, setting the name of the LLVM Argument objects to match, -and registering the arguments in the ``NamedValues`` map for future use -by the ``VariableExprAST`` AST node. Once this is set up, it returns the -Function object to the caller. Note that we don't check for conflicting -argument names here (e.g. "extern foo(a b a)"). Doing so would be very -straight-forward with the mechanics we have already used above. - -.. code-block:: c++ - - Function *FunctionAST::Codegen() { - NamedValues.clear(); - - Function *TheFunction = Proto->Codegen(); - if (TheFunction == 0) - return 0; - -Code generation for function definitions starts out simply enough: we -just codegen the prototype (Proto) and verify that it is ok. We then -clear out the ``NamedValues`` map to make sure that there isn't anything -in it from the last function we compiled. Code generation of the -prototype ensures that there is an LLVM Function object that is ready to -go for us. - -.. code-block:: c++ - - // Create a new basic block to start insertion into. - BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); - Builder.SetInsertPoint(BB); - - if (Value *RetVal = Body->Codegen()) { + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + for (auto &Arg : TheFunction->args()) + NamedValues[Arg.getName()] = &Arg; Now we get to the point where the ``Builder`` is set up. The first line creates a new `basic block `_ @@ -396,9 +358,12 @@ Graph `_. Since we don't have any control flow, our functions will only contain one block at this point. We'll fix this in `Chapter 5 `_ :). +Next we add the function arguments to the NamedValues map (after first clearing +it out) so that they're accessible to ``VariableExprAST`` nodes. + .. code-block:: c++ - if (Value *RetVal = Body->Codegen()) { + if (Value *RetVal = Body->codegen()) { // Finish off the function. Builder.CreateRet(RetVal); @@ -408,11 +373,11 @@ at this point. We'll fix this in `Chapter 5 `_ :). return TheFunction; } -Once the insertion point is set up, we call the ``CodeGen()`` method for -the root expression of the function. If no error happens, this emits -code to compute the expression into the entry block and returns the -value that was computed. Assuming no error, we then create an LLVM `ret -instruction <../LangRef.html#i_ret>`_, which completes the function. +Once the insertion point has been set up and the NamedValues map populated, +we call the ``codegen()`` method for the root expression of the function. If no +error happens, this emits code to compute the expression into the entry block +and returns the value that was computed. Assuming no error, we then create an +LLVM `ret instruction <../LangRef.html#ret-instruction>`_, which completes the function. Once the function is built, we call ``verifyFunction``, which is provided by LLVM. This function does a variety of consistency checks on the generated code, to determine if our compiler is doing everything @@ -423,7 +388,7 @@ function is finished and validated, we return it. // Error reading body, remove function. TheFunction->eraseFromParent(); - return 0; + return nullptr; } The only piece left here is handling of the error case. For simplicity, @@ -432,23 +397,25 @@ we handle this by merely deleting the function we produced with the that they incorrectly typed in before: if we didn't delete it, it would live in the symbol table, with a body, preventing future redefinition. -This code does have a bug, though. Since the ``PrototypeAST::Codegen`` -can return a previously defined forward declaration, our code can -actually delete a forward declaration. There are a number of ways to fix -this bug, see what you can come up with! Here is a testcase: +This code does have a bug, though: If the ``FunctionAST::codegen()`` method +finds an existing IR Function, it does not validate its signature against the +definition's own prototype. This means that an earlier 'extern' declaration will +take precedence over the function definition's signature, which can cause +codegen to fail, for instance if the function arguments are named differently. +There are a number of ways to fix this bug, see what you can come up with! Here +is a testcase: :: - extern foo(a b); # ok, defines foo. - def foo(a b) c; # error, 'c' is invalid. - def bar() foo(1, 2); # error, unknown function "foo" + extern foo(a); # ok, defines foo. + def foo(b) b; # Error: Unknown variable name. (decl using 'a' takes precedence). Driver Changes and Closing Thoughts =================================== For now, code generation to LLVM doesn't really get us much, except that we can look at the pretty IR calls. The sample code inserts calls to -Codegen into the "``HandleDefinition``", "``HandleExtern``" etc +codegen into the "``HandleDefinition``", "``HandleExtern``" etc functions, and then dumps out the LLVM IR. This gives a nice way to look at the LLVM IR for simple functions. For example: @@ -463,10 +430,10 @@ at the LLVM IR for simple functions. For example: Note how the parser turns the top-level expression into anonymous functions for us. This will be handy when we add `JIT -support `_ in the next chapter. Also note that the +support `_ in the next chapter. Also note that the code is very literally transcribed, no optimizations are being performed except simple constant folding done by IRBuilder. We will `add -optimizations `_ explicitly in the next +optimizations `_ explicitly in the next chapter. :: diff --git a/docs/tutorial/LangImpl4.rst b/docs/tutorial/LangImpl4.rst index cdaac634dd76..a671d0c37f9d 100644 --- a/docs/tutorial/LangImpl4.rst +++ b/docs/tutorial/LangImpl4.rst @@ -120,57 +120,53 @@ exactly the code we have now, except that we would defer running the optimizer until the entire file has been parsed. In order to get per-function optimizations going, we need to set up a -`FunctionPassManager <../WritingAnLLVMPass.html#passmanager>`_ to hold +`FunctionPassManager <../WritingAnLLVMPass.html#what-passmanager-doesr>`_ to hold and organize the LLVM optimizations that we want to run. Once we have -that, we can add a set of optimizations to run. The code looks like -this: +that, we can add a set of optimizations to run. We'll need a new +FunctionPassManager for each module that we want to optimize, so we'll +write a function to create and initialize both the module and pass manager +for us: .. code-block:: c++ - FunctionPassManager OurFPM(TheModule); + void InitializeModuleAndPassManager(void) { + // Open a new module. + TheModule = llvm::make_unique("my cool jit", getGlobalContext()); + TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); + + // Create a new pass manager attached to it. + TheFPM = llvm::make_unique(TheModule.get()); - // Set up the optimizer pipeline. Start with registering info about how the - // target lays out data structures. - OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout())); // Provide basic AliasAnalysis support for GVN. - OurFPM.add(createBasicAliasAnalysisPass()); + TheFPM.add(createBasicAliasAnalysisPass()); // Do simple "peephole" optimizations and bit-twiddling optzns. - OurFPM.add(createInstructionCombiningPass()); + TheFPM.add(createInstructionCombiningPass()); // Reassociate expressions. - OurFPM.add(createReassociatePass()); + TheFPM.add(createReassociatePass()); // Eliminate Common SubExpressions. - OurFPM.add(createGVNPass()); + TheFPM.add(createGVNPass()); // Simplify the control flow graph (deleting unreachable blocks, etc). - OurFPM.add(createCFGSimplificationPass()); + TheFPM.add(createCFGSimplificationPass()); - OurFPM.doInitialization(); + TheFPM.doInitialization(); + } - // Set the global so the code gen can use this. - TheFPM = &OurFPM; +This code initializes the global module ``TheModule``, and the function pass +manager ``TheFPM``, which is attached to ``TheModule``. Once the pass manager is +set up, we use a series of "add" calls to add a bunch of LLVM passes. - // Run the main "interpreter loop" now. - MainLoop(); - -This code defines a ``FunctionPassManager``, "``OurFPM``". It requires a -pointer to the ``Module`` to construct itself. Once it is set up, we use -a series of "add" calls to add a bunch of LLVM passes. The first pass is -basically boilerplate, it adds a pass so that later optimizations know -how the data structures in the program are laid out. The -"``TheExecutionEngine``" variable is related to the JIT, which we will -get to in the next section. - -In this case, we choose to add 4 optimization passes. The passes we -chose here are a pretty standard set of "cleanup" optimizations that are -useful for a wide variety of code. I won't delve into what they do but, -believe me, they are a good starting place :). +In this case, we choose to add five passes: one analysis pass (alias analysis), +and four optimization passes. The passes we choose here are a pretty standard set +of "cleanup" optimizations that are useful for a wide variety of code. I won't +delve into what they do but, believe me, they are a good starting place :). Once the PassManager is set up, we need to make use of it. We do this by running it after our newly created function is constructed (in -``FunctionAST::Codegen``), but before it is returned to the client: +``FunctionAST::codegen()``), but before it is returned to the client: .. code-block:: c++ - if (Value *RetVal = Body->Codegen()) { + if (Value *RetVal = Body->codegen()) { // Finish off the function. Builder.CreateRet(RetVal); @@ -231,55 +227,85 @@ should evaluate and print out 3. If they define a function, they should be able to call it from the command line. In order to do this, we first declare and initialize the JIT. This is -done by adding a global variable and a call in ``main``: +done by adding a global variable ``TheJIT``, and initializing it in +``main``: .. code-block:: c++ - static ExecutionEngine *TheExecutionEngine; + static std::unique_ptr TheJIT; ... int main() { .. - // Create the JIT. This takes ownership of the module. - TheExecutionEngine = EngineBuilder(TheModule).create(); - .. + TheJIT = llvm::make_unique(); + + // Run the main "interpreter loop" now. + MainLoop(); + + return 0; } -This creates an abstract "Execution Engine" which can be either a JIT -compiler or the LLVM interpreter. LLVM will automatically pick a JIT -compiler for you if one is available for your platform, otherwise it -will fall back to the interpreter. +The KaleidoscopeJIT class is a simple JIT built specifically for these +tutorials. In later chapters we will look at how it works and extend it with +new features, but for now we will take it as given. Its API is very simple:: +``addModule`` adds an LLVM IR module to the JIT, making its functions +available for execution; ``removeModule`` removes a module, freeing any +memory associated with the code in that module; and ``findSymbol`` allows us +to look up pointers to the compiled code. -Once the ``ExecutionEngine`` is created, the JIT is ready to be used. -There are a variety of APIs that are useful, but the simplest one is the -"``getPointerToFunction(F)``" method. This method JIT compiles the -specified LLVM Function and returns a function pointer to the generated -machine code. In our case, this means that we can change the code that -parses a top-level expression to look like this: +We can take this simple API and change our code that parses top-level expressions to +look like this: .. code-block:: c++ static void HandleTopLevelExpression() { // Evaluate a top-level expression into an anonymous function. - if (FunctionAST *F = ParseTopLevelExpr()) { - if (Function *LF = F->Codegen()) { - LF->dump(); // Dump the function for exposition purposes. + if (auto FnAST = ParseTopLevelExpr()) { + if (FnAST->codegen()) { - // JIT the function, returning a function pointer. - void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + // JIT the module containing the anonymous expression, keeping a handle so + // we can free it later. + auto H = TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); - // Cast it to the right type (takes no arguments, returns a double) so we - // can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)FPtr; + // Search the JIT for the __anon_expr symbol. + auto ExprSymbol = TheJIT->findSymbol("__anon_expr"); + assert(ExprSymbol && "Function not found"); + + // Get the symbol's address and cast it to the right type (takes no + // arguments, returns a double) so we can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); fprintf(stderr, "Evaluated to %f\n", FP()); + + // Delete the anonymous expression module from the JIT. + TheJIT->removeModule(H); } -Recall that we compile top-level expressions into a self-contained LLVM -function that takes no arguments and returns the computed double. -Because the LLVM JIT compiler matches the native platform ABI, this -means that you can just cast the result pointer to a function pointer of -that type and call it directly. This means, there is no difference -between JIT compiled code and native machine code that is statically -linked into your application. +If parsing and codegen succeeed, the next step is to add the module containing +the top-level expression to the JIT. We do this by calling addModule, which +triggers code generation for all the functions in the module, and returns a +handle that can be used to remove the module from the JIT later. Once the module +has been added to the JIT it can no longer be modified, so we also open a new +module to hold subsequent code by calling ``InitializeModuleAndPassManager()``. + +Once we've added the module to the JIT we need to get a pointer to the final +generated code. We do this by calling the JIT's findSymbol method, and passing +the name of the top-level expression function: ``__anon_expr``. Since we just +added this function, we assert that findSymbol returned a result. + +Next, we get the in-memory address of the ``__anon_expr`` function by calling +``getAddress()`` on the symbol. Recall that we compile top-level expressions +into a self-contained LLVM function that takes no arguments and returns the +computed double. Because the LLVM JIT compiler matches the native platform ABI, +this means that you can just cast the result pointer to a function pointer of +that type and call it directly. This means, there is no difference between JIT +compiled code and native machine code that is statically linked into your +application. + +Finally, since we don't support re-evaluation of top-level expressions, we +remove the module from the JIT when we're done to free the associated memory. +Recall, however, that the module we created a few lines earlier (via +``InitializeModuleAndPassManager``) is still open and waiting for new code to be +added. With just these two changes, lets see how Kaleidoscope works now! @@ -320,19 +346,161 @@ demonstrates very basic functionality, but can we do more? Evaluated to 24.000000 -This illustrates that we can now call user code, but there is something -a bit subtle going on here. Note that we only invoke the JIT on the -anonymous functions that *call testfunc*, but we never invoked it on -*testfunc* itself. What actually happened here is that the JIT scanned -for all non-JIT'd functions transitively called from the anonymous -function and compiled all of them before returning from -``getPointerToFunction()``. + ready> testfunc(5, 10); + ready> LLVM ERROR: Program used external function 'testfunc' which could not be resolved! -The JIT provides a number of other more advanced interfaces for things -like freeing allocated machine code, rejit'ing functions to update them, -etc. However, even with this simple code, we get some surprisingly -powerful capabilities - check this out (I removed the dump of the -anonymous functions, you should get the idea by now :) : + +Function definitions and calls also work, but something went very wrong on that +last line. The call looks valid, so what happened? As you may have guessed from +the the API a Module is a unit of allocation for the JIT, and testfunc was part +of the same module that contained anonymous expression. When we removed that +module from the JIT to free the memory for the anonymous expression, we deleted +the definition of ``testfunc`` along with it. Then, when we tried to call +testfunc a second time, the JIT could no longer find it. + +The easiest way to fix this is to put the anonymous expression in a separate +module from the rest of the function definitions. The JIT will happily resolve +function calls across module boundaries, as long as each of the functions called +has a prototype, and is added to the JIT before it is called. By putting the +anonymous expression in a different module we can delete it without affecting +the rest of the functions. + +In fact, we're going to go a step further and put every function in its own +module. Doing so allows us to exploit a useful property of the KaleidoscopeJIT +that will make our environment more REPL-like: Functions can be added to the +JIT more than once (unlike a module where every function must have a unique +definition). When you look up a symbol in KaleidoscopeJIT it will always return +the most recent definition: + +:: + + ready> def foo(x) x + 1; + Read function definition: + define double @foo(double %x) { + entry: + %addtmp = fadd double %x, 1.000000e+00 + ret double %addtmp + } + + ready> foo(2); + Evaluated to 3.000000 + + ready> def foo(x) x + 2; + define double @foo(double %x) { + entry: + %addtmp = fadd double %x, 2.000000e+00 + ret double %addtmp + } + + ready> foo(2); + Evaluated to 4.000000 + + +To allow each function to live in its own module we'll need a way to +re-generate previous function declarations into each new module we open: + +.. code-block:: c++ + + static std::unique_ptr TheJIT; + + ... + + Function *getFunction(std::string Name) { + // First, see if the function has already been added to the current module. + if (auto *F = TheModule->getFunction(Name)) + return F; + + // If not, check whether we can codegen the declaration from some existing + // prototype. + auto FI = FunctionProtos.find(Name); + if (FI != FunctionProtos.end()) + return FI->second->codegen(); + + // If no existing prototype exists, return null. + return nullptr; + } + + ... + + Value *CallExprAST::codegen() { + // Look up the name in the global module table. + Function *CalleeF = getFunction(Callee); + + ... + + Function *FunctionAST::codegen() { + // Transfer ownership of the prototype to the FunctionProtos map, but keep a + // reference to it for use below. + auto &P = *Proto; + FunctionProtos[Proto->getName()] = std::move(Proto); + Function *TheFunction = getFunction(P.getName()); + if (!TheFunction) + return nullptr; + + +To enable this, we'll start by adding a new global, ``FunctionProtos``, that +holds the most recent prototype for each function. We'll also add a convenience +method, ``getFunction()``, to replace calls to ``TheModule->getFunction()``. +Our convenience method searches ``TheModule`` for an existing function +declaration, falling back to generating a new declaration from FunctionProtos if +it doesn't find one. In ``CallExprAST::codegen()`` we just need to replace the +call to ``TheModule->getFunction()``. In ``FunctionAST::codegen()`` we need to +update the FunctionProtos map first, then call ``getFunction()``. With this +done, we can always obtain a function declaration in the current module for any +previously declared function. + +We also need to update HandleDefinition and HandleExtern: + +.. code-block:: c++ + + static void HandleDefinition() { + if (auto FnAST = ParseDefinition()) { + if (auto *FnIR = FnAST->codegen()) { + fprintf(stderr, "Read function definition:"); + FnIR->dump(); + TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + + static void HandleExtern() { + if (auto ProtoAST = ParseExtern()) { + if (auto *FnIR = ProtoAST->codegen()) { + fprintf(stderr, "Read extern: "); + FnIR->dump(); + FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); + } + } else { + // Skip token for error recovery. + getNextToken(); + } + } + +In HandleDefinition, we add two lines to transfer the newly defined function to +the JIT and open a new module. In HandleExtern, we just need to add one line to +add the prototype to FunctionProtos. + +With these changes made, lets try our REPL again (I removed the dump of the +anonymous functions this time, you should get the idea by now :) : + +:: + + ready> def foo(x) x + 1; + ready> foo(2); + Evaluated to 3.000000 + + ready> def foo(x) x + 2; + ready> foo(2); + Evaluated to 4.000000 + +It works! + +Even with this simple code, we get some surprisingly powerful capabilities - +check this out: :: @@ -375,34 +543,30 @@ anonymous functions, you should get the idea by now :) : Evaluated to 1.000000 -Whoa, how does the JIT know about sin and cos? The answer is -surprisingly simple: in this example, the JIT started execution of a -function and got to a function call. It realized that the function was -not yet JIT compiled and invoked the standard set of routines to resolve -the function. In this case, there is no body defined for the function, -so the JIT ended up calling "``dlsym("sin")``" on the Kaleidoscope -process itself. Since "``sin``" is defined within the JIT's address -space, it simply patches up calls in the module to call the libm version -of ``sin`` directly. +Whoa, how does the JIT know about sin and cos? The answer is surprisingly +simple: The KaleidoscopeJIT has a straightforward symbol resolution rule that +it uses to find symbols that aren't available in any given module: First +it searches all the modules that have already been added to the JIT, from the +most recent to the oldest, to find the newest definition. If no definition is +found inside the JIT, it falls back to calling "``dlsym("sin")``" on the +Kaleidoscope process itself. Since "``sin``" is defined within the JIT's +address space, it simply patches up calls in the module to call the libm +version of ``sin`` directly. -The LLVM JIT provides a number of interfaces (look in the -``ExecutionEngine.h`` file) for controlling how unknown functions get -resolved. It allows you to establish explicit mappings between IR -objects and addresses (useful for LLVM global variables that you want to -map to static tables, for example), allows you to dynamically decide on -the fly based on the function name, and even allows you to have the JIT -compile functions lazily the first time they're called. +In the future we'll see how tweaking this symbol resolution rule can be used to +enable all sorts of useful features, from security (restricting the set of +symbols available to JIT'd code), to dynamic code generation based on symbol +names, and even lazy compilation. -One interesting application of this is that we can now extend the -language by writing arbitrary C++ code to implement operations. For -example, if we add: +One immediate benefit of the symbol resolution rule is that we can now extend +the language by writing arbitrary C++ code to implement operations. For example, +if we add: .. code-block:: c++ /// putchard - putchar that takes a double and returns 0. - extern "C" - double putchard(double X) { - putchar((char)X); + extern "C" double putchard(double X) { + fputc((char)X, stderr); return 0; } diff --git a/docs/tutorial/LangImpl5.rst b/docs/tutorial/LangImpl5.rst index ca2ffebc19a2..d916f92bf99e 100644 --- a/docs/tutorial/LangImpl5.rst +++ b/docs/tutorial/LangImpl5.rst @@ -66,7 +66,9 @@ for the relevant tokens: .. code-block:: c++ // control - tok_if = -6, tok_then = -7, tok_else = -8, + tok_if = -6, + tok_then = -7, + tok_else = -8, Once we have that, we recognize the new keywords in the lexer. This is pretty simple stuff: @@ -74,11 +76,16 @@ pretty simple stuff: .. code-block:: c++ ... - if (IdentifierStr == "def") return tok_def; - if (IdentifierStr == "extern") return tok_extern; - if (IdentifierStr == "if") return tok_if; - if (IdentifierStr == "then") return tok_then; - if (IdentifierStr == "else") return tok_else; + if (IdentifierStr == "def") + return tok_def; + if (IdentifierStr == "extern") + return tok_extern; + if (IdentifierStr == "if") + return tok_if; + if (IdentifierStr == "then") + return tok_then; + if (IdentifierStr == "else") + return tok_else; return tok_identifier; AST Extensions for If/Then/Else @@ -90,11 +97,13 @@ To represent the new expression we add a new AST node for it: /// IfExprAST - Expression class for if/then/else. class IfExprAST : public ExprAST { - ExprAST *Cond, *Then, *Else; + std::unique_ptr Cond, Then, Else; + public: - IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) - : Cond(cond), Then(then), Else(_else) {} - virtual Value *Codegen(); + IfExprAST(std::unique_ptr Cond, std::unique_ptr Then, + std::unique_ptr Else) + : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} + virtual Value *codegen(); }; The AST node just has pointers to the various subexpressions. @@ -109,42 +118,51 @@ First we define a new parsing function: .. code-block:: c++ /// ifexpr ::= 'if' expression 'then' expression 'else' expression - static ExprAST *ParseIfExpr() { + static std::unique_ptr ParseIfExpr() { getNextToken(); // eat the if. // condition. - ExprAST *Cond = ParseExpression(); - if (!Cond) return 0; + auto Cond = ParseExpression(); + if (!Cond) + return nullptr; if (CurTok != tok_then) return Error("expected then"); getNextToken(); // eat the then - ExprAST *Then = ParseExpression(); - if (Then == 0) return 0; + auto Then = ParseExpression(); + if (!Then) + return nullptr; if (CurTok != tok_else) return Error("expected else"); getNextToken(); - ExprAST *Else = ParseExpression(); - if (!Else) return 0; + auto Else = ParseExpression(); + if (!Else) + return nullptr; - return new IfExprAST(Cond, Then, Else); + return llvm::make_unique(std::move(Cond), std::move(Then), + std::move(Else)); } Next we hook it up as a primary expression: .. code-block:: c++ - static ExprAST *ParsePrimary() { + static std::unique_ptr ParsePrimary() { switch (CurTok) { - default: return Error("unknown token when expecting an expression"); - case tok_identifier: return ParseIdentifierExpr(); - case tok_number: return ParseNumberExpr(); - case '(': return ParseParenExpr(); - case tok_if: return ParseIfExpr(); + default: + return Error("unknown token when expecting an expression"); + case tok_identifier: + return ParseIdentifierExpr(); + case tok_number: + return ParseNumberExpr(); + case '(': + return ParseParenExpr(); + case tok_if: + return ParseIfExpr(); } } @@ -196,7 +214,7 @@ Kaleidoscope looks like this: To visualize the control flow graph, you can use a nifty feature of the LLVM '`opt `_' tool. If you put this LLVM IR into "t.ll" and run "``llvm-as < t.ll | opt -analyze -view-cfg``", `a -window will pop up <../ProgrammersManual.html#ViewGraph>`_ and you'll +window will pop up <../ProgrammersManual.html#viewing-graphs-while-debugging-code>`_ and you'll see this graph: .. figure:: LangImpl5-cfg.png @@ -262,19 +280,19 @@ Okay, enough of the motivation and overview, lets generate code! Code Generation for If/Then/Else -------------------------------- -In order to generate code for this, we implement the ``Codegen`` method +In order to generate code for this, we implement the ``codegen`` method for ``IfExprAST``: .. code-block:: c++ - Value *IfExprAST::Codegen() { - Value *CondV = Cond->Codegen(); - if (CondV == 0) return 0; + Value *IfExprAST::codegen() { + Value *CondV = Cond->codegen(); + if (!CondV) + return nullptr; // Convert condition to a bool by comparing equal to 0.0. - CondV = Builder.CreateFCmpONE(CondV, - ConstantFP::get(getGlobalContext(), APFloat(0.0)), - "ifcond"); + CondV = Builder.CreateFCmpONE( + CondV, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "ifcond"); This code is straightforward and similar to what we saw before. We emit the expression for the condition, then compare that value to zero to get @@ -286,7 +304,8 @@ a truth value as a 1-bit (bool) value. // Create blocks for the then and else cases. Insert the 'then' block at the // end of the function. - BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction); + BasicBlock *ThenBB = + BasicBlock::Create(getGlobalContext(), "then", TheFunction); BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); @@ -318,8 +337,9 @@ that LLVM supports forward references. // Emit then value. Builder.SetInsertPoint(ThenBB); - Value *ThenV = Then->Codegen(); - if (ThenV == 0) return 0; + Value *ThenV = Then->codegen(); + if (!ThenV) + return nullptr; Builder.CreateBr(MergeBB); // Codegen of 'Then' can change the current block, update ThenBB for the PHI. @@ -349,7 +369,7 @@ of the block in the CFG. Why then, are we getting the current block when we just set it to ThenBB 5 lines above? The problem is that the "Then" expression may actually itself change the block that the Builder is emitting into if, for example, it contains a nested "if/then/else" -expression. Because calling Codegen recursively could arbitrarily change +expression. Because calling ``codegen()`` recursively could arbitrarily change the notion of the current block, we are required to get an up-to-date value for code that will set up the Phi node. @@ -359,11 +379,12 @@ value for code that will set up the Phi node. TheFunction->getBasicBlockList().push_back(ElseBB); Builder.SetInsertPoint(ElseBB); - Value *ElseV = Else->Codegen(); - if (ElseV == 0) return 0; + Value *ElseV = Else->codegen(); + if (!ElseV) + return nullptr; Builder.CreateBr(MergeBB); - // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + // codegen of 'Else' can change the current block, update ElseBB for the PHI. ElseBB = Builder.GetInsertBlock(); Code generation for the 'else' block is basically identical to codegen @@ -378,8 +399,8 @@ code: // Emit merge block. TheFunction->getBasicBlockList().push_back(MergeBB); Builder.SetInsertPoint(MergeBB); - PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, - "iftmp"); + PHINode *PN = + Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp"); PN->addIncoming(ThenV, ThenBB); PN->addIncoming(ElseV, ElseBB); @@ -444,13 +465,20 @@ The lexer extensions are the same sort of thing as for if/then/else: tok_for = -9, tok_in = -10 ... in gettok ... - if (IdentifierStr == "def") return tok_def; - if (IdentifierStr == "extern") return tok_extern; - if (IdentifierStr == "if") return tok_if; - if (IdentifierStr == "then") return tok_then; - if (IdentifierStr == "else") return tok_else; - if (IdentifierStr == "for") return tok_for; - if (IdentifierStr == "in") return tok_in; + if (IdentifierStr == "def") + return tok_def; + if (IdentifierStr == "extern") + return tok_extern; + if (IdentifierStr == "if") + return tok_if; + if (IdentifierStr == "then") + return tok_then; + if (IdentifierStr == "else") + return tok_else; + if (IdentifierStr == "for") + return tok_for; + if (IdentifierStr == "in") + return tok_in; return tok_identifier; AST Extensions for the 'for' Loop @@ -464,12 +492,15 @@ variable name and the constituent expressions in the node. /// ForExprAST - Expression class for for/in. class ForExprAST : public ExprAST { std::string VarName; - ExprAST *Start, *End, *Step, *Body; + std::unique_ptr Start, End, Step, Body; + public: - ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, - ExprAST *step, ExprAST *body) - : VarName(varname), Start(start), End(end), Step(step), Body(body) {} - virtual Value *Codegen(); + ForExprAST(const std::string &VarName, std::unique_ptr Start, + std::unique_ptr End, std::unique_ptr Step, + std::unique_ptr Body) + : VarName(VarName), Start(std::move(Start)), End(std::move(End)), + Step(std::move(Step)), Body(std::move(Body)) {} + virtual Value *codegen(); }; Parser Extensions for the 'for' Loop @@ -483,7 +514,7 @@ value to null in the AST node: .. code-block:: c++ /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression - static ExprAST *ParseForExpr() { + static std::unique_ptr ParseForExpr() { getNextToken(); // eat the for. if (CurTok != tok_identifier) @@ -497,31 +528,37 @@ value to null in the AST node: getNextToken(); // eat '='. - ExprAST *Start = ParseExpression(); - if (Start == 0) return 0; + auto Start = ParseExpression(); + if (!Start) + return nullptr; if (CurTok != ',') return Error("expected ',' after for start value"); getNextToken(); - ExprAST *End = ParseExpression(); - if (End == 0) return 0; + auto End = ParseExpression(); + if (!End) + return nullptr; // The step value is optional. - ExprAST *Step = 0; + std::unique_ptr Step; if (CurTok == ',') { getNextToken(); Step = ParseExpression(); - if (Step == 0) return 0; + if (!Step) + return nullptr; } if (CurTok != tok_in) return Error("expected 'in' after for"); getNextToken(); // eat 'in'. - ExprAST *Body = ParseExpression(); - if (Body == 0) return 0; + auto Body = ParseExpression(); + if (!Body) + return nullptr; - return new ForExprAST(IdName, Start, End, Step, Body); + return llvm::make_unique(IdName, std::move(Start), + std::move(End), std::move(Step), + std::move(Body)); } LLVM IR for the 'for' Loop @@ -565,14 +602,14 @@ together. Code Generation for the 'for' Loop ---------------------------------- -The first part of Codegen is very simple: we just output the start +The first part of codegen is very simple: we just output the start expression for the loop value: .. code-block:: c++ - Value *ForExprAST::Codegen() { + Value *ForExprAST::codegen() { // Emit the start code first, without 'variable' in scope. - Value *StartVal = Start->Codegen(); + Value *StartVal = Start->codegen(); if (StartVal == 0) return 0; With this out of the way, the next step is to set up the LLVM basic @@ -587,7 +624,8 @@ expression). // block. Function *TheFunction = Builder.GetInsertBlock()->getParent(); BasicBlock *PreheaderBB = Builder.GetInsertBlock(); - BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); + BasicBlock *LoopBB = + BasicBlock::Create(getGlobalContext(), "loop", TheFunction); // Insert an explicit fall through from the current block to the LoopBB. Builder.CreateBr(LoopBB); @@ -604,7 +642,8 @@ the two blocks. Builder.SetInsertPoint(LoopBB); // Start the PHI node with an entry for Start. - PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str()); + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), + 2, VarName.c_str()); Variable->addIncoming(StartVal, PreheaderBB); Now that the "preheader" for the loop is set up, we switch to emitting @@ -624,8 +663,8 @@ backedge, but we can't set it up yet (because it doesn't exist!). // Emit the body of the loop. This, like any other expr, can change the // current BB. Note that we ignore the value computed by the body, but don't // allow an error. - if (Body->Codegen() == 0) - return 0; + if (!Body->codegen()) + return nullptr; Now the code starts to get more interesting. Our 'for' loop introduces a new variable to the symbol table. This means that our symbol table can @@ -647,10 +686,11 @@ table. .. code-block:: c++ // Emit the step value. - Value *StepVal; + Value *StepVal = nullptr; if (Step) { - StepVal = Step->Codegen(); - if (StepVal == 0) return 0; + StepVal = Step->codegen(); + if (!StepVal) + return nullptr; } else { // If not specified, use 1.0. StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); @@ -666,13 +706,13 @@ iteration of the loop. .. code-block:: c++ // Compute the end condition. - Value *EndCond = End->Codegen(); - if (EndCond == 0) return EndCond; + Value *EndCond = End->codegen(); + if (!EndCond) + return nullptr; // Convert condition to a bool by comparing equal to 0.0. - EndCond = Builder.CreateFCmpONE(EndCond, - ConstantFP::get(getGlobalContext(), APFloat(0.0)), - "loopcond"); + EndCond = Builder.CreateFCmpONE( + EndCond, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "loopcond"); Finally, we evaluate the exit value of the loop, to determine whether the loop should exit. This mirrors the condition evaluation for the @@ -682,7 +722,8 @@ if/then/else statement. // Create the "after loop" block and insert it. BasicBlock *LoopEndBB = Builder.GetInsertBlock(); - BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); + BasicBlock *AfterBB = + BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); // Insert the conditional branch into the end of LoopEndBB. Builder.CreateCondBr(EndCond, LoopBB, AfterBB); @@ -718,7 +759,7 @@ value, we can add the incoming value to the loop PHI node. After that, we remove the loop variable from the symbol table, so that it isn't in scope after the for loop. Finally, code generation of the for loop always returns 0.0, so that is what we return from -``ForExprAST::Codegen``. +``ForExprAST::codegen()``. With this, we conclude the "adding control flow to Kaleidoscope" chapter of the tutorial. In this chapter we added two control flow constructs, diff --git a/docs/tutorial/LangImpl6.rst b/docs/tutorial/LangImpl6.rst index bf78bdea74d6..827cd392effb 100644 --- a/docs/tutorial/LangImpl6.rst +++ b/docs/tutorial/LangImpl6.rst @@ -24,7 +24,7 @@ is good or bad. In this tutorial we'll assume that it is okay to use this as a way to show some interesting parsing techniques. At the end of this tutorial, we'll run through an example Kaleidoscope -application that `renders the Mandelbrot set <#example>`_. This gives an +application that `renders the Mandelbrot set <#kicking-the-tires>`_. This gives an example of what you can build with Kaleidoscope and its feature set. User-defined Operators: the Idea @@ -96,19 +96,24 @@ keywords: enum Token { ... // operators - tok_binary = -11, tok_unary = -12 + tok_binary = -11, + tok_unary = -12 }; ... static int gettok() { ... - if (IdentifierStr == "for") return tok_for; - if (IdentifierStr == "in") return tok_in; - if (IdentifierStr == "binary") return tok_binary; - if (IdentifierStr == "unary") return tok_unary; + if (IdentifierStr == "for") + return tok_for; + if (IdentifierStr == "in") + return tok_in; + if (IdentifierStr == "binary") + return tok_binary; + if (IdentifierStr == "unary") + return tok_unary; return tok_identifier; This just adds lexer support for the unary and binary keywords, like we -did in `previous chapters `_. One nice thing +did in `previous chapters `_. One nice thing about our current AST, is that we represent binary operators with full generalisation by using their ASCII code as the opcode. For our extended operators, we'll use this same representation, so we don't need any new @@ -129,15 +134,17 @@ this: class PrototypeAST { std::string Name; std::vector Args; - bool isOperator; + bool IsOperator; unsigned Precedence; // Precedence if a binary op. - public: - PrototypeAST(const std::string &name, const std::vector &args, - bool isoperator = false, unsigned prec = 0) - : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {} - bool isUnaryOp() const { return isOperator && Args.size() == 1; } - bool isBinaryOp() const { return isOperator && Args.size() == 2; } + public: + PrototypeAST(const std::string &name, std::vector Args, + bool IsOperator = false, unsigned Prec = 0) + : Name(name), Args(std::move(Args)), IsOperator(IsOperator), + Precedence(Prec) {} + + bool isUnaryOp() const { return IsOperator && Args.size() == 1; } + bool isBinaryOp() const { return IsOperator && Args.size() == 2; } char getOperatorName() const { assert(isUnaryOp() || isBinaryOp()); @@ -146,7 +153,7 @@ this: unsigned getBinaryPrecedence() const { return Precedence; } - Function *Codegen(); + Function *codegen(); }; Basically, in addition to knowing a name for the prototype, we now keep @@ -161,7 +168,7 @@ user-defined operator, we need to parse it: /// prototype /// ::= id '(' id* ')' /// ::= binary LETTER number? (id, id) - static PrototypeAST *ParsePrototype() { + static std::unique_ptr ParsePrototype() { std::string FnName; unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. @@ -210,7 +217,8 @@ user-defined operator, we need to parse it: if (Kind && ArgNames.size() != Kind) return ErrorP("Invalid number of operands for operator"); - return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence); + return llvm::make_unique(FnName, std::move(ArgNames), Kind != 0, + BinaryPrecedence); } This is all fairly straightforward parsing code, and we have already @@ -227,26 +235,31 @@ default case for our existing binary operator node: .. code-block:: c++ - Value *BinaryExprAST::Codegen() { - Value *L = LHS->Codegen(); - Value *R = RHS->Codegen(); - if (L == 0 || R == 0) return 0; + Value *BinaryExprAST::codegen() { + Value *L = LHS->codegen(); + Value *R = RHS->codegen(); + if (!L || !R) + return nullptr; switch (Op) { - case '+': return Builder.CreateFAdd(L, R, "addtmp"); - case '-': return Builder.CreateFSub(L, R, "subtmp"); - case '*': return Builder.CreateFMul(L, R, "multmp"); + case '+': + return Builder.CreateFAdd(L, R, "addtmp"); + case '-': + return Builder.CreateFSub(L, R, "subtmp"); + case '*': + return Builder.CreateFMul(L, R, "multmp"); case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), "booltmp"); - default: break; + default: + break; } // If it wasn't a builtin binary operator, it must be a user defined one. Emit // a call to it. - Function *F = TheModule->getFunction(std::string("binary")+Op); + Function *F = TheModule->getFunction(std::string("binary") + Op); assert(F && "binary operator not found!"); Value *Ops[2] = { L, R }; @@ -263,12 +276,12 @@ The final piece of code we are missing, is a bit of top-level magic: .. code-block:: c++ - Function *FunctionAST::Codegen() { + Function *FunctionAST::codegen() { NamedValues.clear(); - Function *TheFunction = Proto->Codegen(); - if (TheFunction == 0) - return 0; + Function *TheFunction = Proto->codegen(); + if (!TheFunction) + return nullptr; // If this is an operator, install it. if (Proto->isBinaryOp()) @@ -278,7 +291,7 @@ The final piece of code we are missing, is a bit of top-level magic: BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); Builder.SetInsertPoint(BB); - if (Value *RetVal = Body->Codegen()) { + if (Value *RetVal = Body->codegen()) { ... Basically, before codegening a function, if it is a user-defined @@ -305,11 +318,12 @@ that, we need an AST node: /// UnaryExprAST - Expression class for a unary operator. class UnaryExprAST : public ExprAST { char Opcode; - ExprAST *Operand; + std::unique_ptr Operand; + public: - UnaryExprAST(char opcode, ExprAST *operand) - : Opcode(opcode), Operand(operand) {} - virtual Value *Codegen(); + UnaryExprAST(char Opcode, std::unique_ptr Operand) + : Opcode(Opcode), Operand(std::move(Operand)) {} + virtual Value *codegen(); }; This AST node is very simple and obvious by now. It directly mirrors the @@ -322,7 +336,7 @@ simple: we'll add a new function to do it: /// unary /// ::= primary /// ::= '!' unary - static ExprAST *ParseUnary() { + static std::unique_ptr ParseUnary() { // If the current token is not an operator, it must be a primary expr. if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') return ParsePrimary(); @@ -330,9 +344,9 @@ simple: we'll add a new function to do it: // If this is a unary operator, read it. int Opc = CurTok; getNextToken(); - if (ExprAST *Operand = ParseUnary()) - return new UnaryExprAST(Opc, Operand); - return 0; + if (auto Operand = ParseUnary()) + return llvm::unique_ptr(Opc, std::move(Operand)); + return nullptr; } The grammar we add is pretty straightforward here. If we see a unary @@ -350,21 +364,24 @@ call ParseUnary instead: /// binoprhs /// ::= ('+' unary)* - static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + static std::unique_ptr ParseBinOpRHS(int ExprPrec, + std::unique_ptr LHS) { ... // Parse the unary expression after the binary operator. - ExprAST *RHS = ParseUnary(); - if (!RHS) return 0; + auto RHS = ParseUnary(); + if (!RHS) + return nullptr; ... } /// expression /// ::= unary binoprhs /// - static ExprAST *ParseExpression() { - ExprAST *LHS = ParseUnary(); - if (!LHS) return 0; + static std::unique_ptr ParseExpression() { + auto LHS = ParseUnary(); + if (!LHS) + return nullptr; - return ParseBinOpRHS(0, LHS); + return ParseBinOpRHS(0, std::move(LHS)); } With these two simple changes, we are now able to parse unary operators @@ -378,7 +395,7 @@ operator code above with: /// ::= id '(' id* ')' /// ::= binary LETTER number? (id, id) /// ::= unary LETTER (id) - static PrototypeAST *ParsePrototype() { + static std::unique_ptr ParsePrototype() { std::string FnName; unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. @@ -411,12 +428,13 @@ unary operators. It looks like this: .. code-block:: c++ - Value *UnaryExprAST::Codegen() { - Value *OperandV = Operand->Codegen(); - if (OperandV == 0) return 0; + Value *UnaryExprAST::codegen() { + Value *OperandV = Operand->codegen(); + if (!OperandV) + return nullptr; Function *F = TheModule->getFunction(std::string("unary")+Opcode); - if (F == 0) + if (!F) return ErrorV("Unknown unary operator"); return Builder.CreateCall(F, OperandV, "unop"); diff --git a/docs/tutorial/LangImpl7.rst b/docs/tutorial/LangImpl7.rst index 648940785b09..1cd7d56fddb4 100644 --- a/docs/tutorial/LangImpl7.rst +++ b/docs/tutorial/LangImpl7.rst @@ -118,7 +118,7 @@ that @G defines *space* for an i32 in the global data area, but its *name* actually refers to the address for that space. Stack variables work the same way, except that instead of being declared with global variable definitions, they are declared with the `LLVM alloca -instruction <../LangRef.html#i_alloca>`_: +instruction <../LangRef.html#alloca-instruction>`_: .. code-block:: llvm @@ -221,7 +221,7 @@ variables in certain circumstances: funny pointer arithmetic is involved, the alloca will not be promoted. #. mem2reg only works on allocas of `first - class <../LangRef.html#t_classifications>`_ values (such as pointers, + class <../LangRef.html#first-class-types>`_ values (such as pointers, scalars and vectors), and only if the array size of the allocation is 1 (or missing in the .ll file). mem2reg is not capable of promoting structs or arrays to registers. Note that the "scalarrepl" pass is @@ -355,10 +355,11 @@ from the stack slot: .. code-block:: c++ - Value *VariableExprAST::Codegen() { + Value *VariableExprAST::codegen() { // Look this variable up in the function. Value *V = NamedValues[Name]; - if (V == 0) return ErrorV("Unknown variable name"); + if (!V) + return ErrorV("Unknown variable name"); // Load the value. return Builder.CreateLoad(V, Name.c_str()); @@ -366,7 +367,7 @@ from the stack slot: As you can see, this is pretty straightforward. Now we need to update the things that define the variables to set up the alloca. We'll start -with ``ForExprAST::Codegen`` (see the `full code listing <#code>`_ for +with ``ForExprAST::codegen()`` (see the `full code listing <#id1>`_ for the unabridged code): .. code-block:: c++ @@ -377,16 +378,18 @@ the unabridged code): AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); // Emit the start code first, without 'variable' in scope. - Value *StartVal = Start->Codegen(); - if (StartVal == 0) return 0; + Value *StartVal = Start->codegen(); + if (!StartVal) + return nullptr; // Store the value into the alloca. Builder.CreateStore(StartVal, Alloca); ... // Compute the end condition. - Value *EndCond = End->Codegen(); - if (EndCond == 0) return EndCond; + Value *EndCond = End->codegen(); + if (!EndCond) + return nullptr; // Reload, increment, and restore the alloca. This handles the case where // the body of the loop mutates the variable. @@ -396,7 +399,7 @@ the unabridged code): ... This code is virtually identical to the code `before we allowed mutable -variables `_. The big difference is that we +variables `_. The big difference is that we no longer have to construct a PHI node, and we use load/store to access the variable as needed. @@ -423,7 +426,7 @@ them. The code for this is also pretty simple: For each argument, we make an alloca, store the input value to the function into the alloca, and register the alloca as the memory location -for the argument. This method gets invoked by ``FunctionAST::Codegen`` +for the argument. This method gets invoked by ``FunctionAST::codegen()`` right after it sets up the entry block for the function. The final missing piece is adding the mem2reg pass, which allows us to @@ -569,11 +572,11 @@ implement codegen for the assignment operator. This looks like: .. code-block:: c++ - Value *BinaryExprAST::Codegen() { + Value *BinaryExprAST::codegen() { // Special case '=' because we don't want to emit the LHS as an expression. if (Op == '=') { // Assignment requires the LHS to be an identifier. - VariableExprAST *LHSE = dynamic_cast(LHS); + VariableExprAST *LHSE = dynamic_cast(LHS.get()); if (!LHSE) return ErrorV("destination of '=' must be a variable"); @@ -587,12 +590,14 @@ allowed. .. code-block:: c++ // Codegen the RHS. - Value *Val = RHS->Codegen(); - if (Val == 0) return 0; + Value *Val = RHS->codegen(); + if (!Val) + return nullptr; // Look up the name. Value *Variable = NamedValues[LHSE->getName()]; - if (Variable == 0) return ErrorV("Unknown variable name"); + if (!Variable) + return ErrorV("Unknown variable name"); Builder.CreateStore(Val, Variable); return Val; @@ -649,10 +654,14 @@ this: ... static int gettok() { ... - if (IdentifierStr == "in") return tok_in; - if (IdentifierStr == "binary") return tok_binary; - if (IdentifierStr == "unary") return tok_unary; - if (IdentifierStr == "var") return tok_var; + if (IdentifierStr == "in") + return tok_in; + if (IdentifierStr == "binary") + return tok_binary; + if (IdentifierStr == "unary") + return tok_unary; + if (IdentifierStr == "var") + return tok_var; return tok_identifier; ... @@ -663,14 +672,15 @@ var/in, it looks like this: /// VarExprAST - Expression class for var/in class VarExprAST : public ExprAST { - std::vector > VarNames; - ExprAST *Body; - public: - VarExprAST(const std::vector > &varnames, - ExprAST *body) - : VarNames(varnames), Body(body) {} + std::vector>> VarNames; + std::unique_ptr Body; - virtual Value *Codegen(); + public: + VarExprAST(std::vector>> VarNames, + std::unique_ptr body) + : VarNames(std::move(VarNames)), Body(std::move(Body)) {} + + virtual Value *codegen(); }; var/in allows a list of names to be defined all at once, and each name @@ -690,15 +700,22 @@ do is add it as a primary expression: /// ::= ifexpr /// ::= forexpr /// ::= varexpr - static ExprAST *ParsePrimary() { + static std::unique_ptr ParsePrimary() { switch (CurTok) { - default: return Error("unknown token when expecting an expression"); - case tok_identifier: return ParseIdentifierExpr(); - case tok_number: return ParseNumberExpr(); - case '(': return ParseParenExpr(); - case tok_if: return ParseIfExpr(); - case tok_for: return ParseForExpr(); - case tok_var: return ParseVarExpr(); + default: + return Error("unknown token when expecting an expression"); + case tok_identifier: + return ParseIdentifierExpr(); + case tok_number: + return ParseNumberExpr(); + case '(': + return ParseParenExpr(); + case tok_if: + return ParseIfExpr(); + case tok_for: + return ParseForExpr(); + case tok_var: + return ParseVarExpr(); } } @@ -708,10 +725,10 @@ Next we define ParseVarExpr: /// varexpr ::= 'var' identifier ('=' expression)? // (',' identifier ('=' expression)?)* 'in' expression - static ExprAST *ParseVarExpr() { + static std::unique_ptr ParseVarExpr() { getNextToken(); // eat the var. - std::vector > VarNames; + std::vector>> VarNames; // At least one variable name is required. if (CurTok != tok_identifier) @@ -727,15 +744,15 @@ into the local ``VarNames`` vector. getNextToken(); // eat identifier. // Read the optional initializer. - ExprAST *Init = 0; + std::unique_ptr Init; if (CurTok == '=') { getNextToken(); // eat the '='. Init = ParseExpression(); - if (Init == 0) return 0; + if (!Init) return nullptr; } - VarNames.push_back(std::make_pair(Name, Init)); + VarNames.push_back(std::make_pair(Name, std::move(Init))); // End of var list, exit loop. if (CurTok != ',') break; @@ -755,10 +772,12 @@ AST node: return Error("expected 'in' keyword after 'var'"); getNextToken(); // eat 'in'. - ExprAST *Body = ParseExpression(); - if (Body == 0) return 0; + auto Body = ParseExpression(); + if (!Body) + return nullptr; - return new VarExprAST(VarNames, Body); + return llvm::make_unique(std::move(VarNames), + std::move(Body)); } Now that we can parse and represent the code, we need to support @@ -766,7 +785,7 @@ emission of LLVM IR for it. This code starts out with: .. code-block:: c++ - Value *VarExprAST::Codegen() { + Value *VarExprAST::codegen() { std::vector OldBindings; Function *TheFunction = Builder.GetInsertBlock()->getParent(); @@ -774,7 +793,7 @@ emission of LLVM IR for it. This code starts out with: // Register all variables and emit their initializer. for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { const std::string &VarName = VarNames[i].first; - ExprAST *Init = VarNames[i].second; + ExprAST *Init = VarNames[i].second.get(); Basically it loops over all the variables, installing them one at a time. For each variable we put into the symbol table, we remember the @@ -789,8 +808,9 @@ previous value that we replace in OldBindings. // var a = a in ... # refers to outer 'a'. Value *InitVal; if (Init) { - InitVal = Init->Codegen(); - if (InitVal == 0) return 0; + InitVal = Init->codegen(); + if (!InitVal) + return nullptr; } else { // If not specified, use 0.0. InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0)); } @@ -814,8 +834,9 @@ we evaluate the body of the var/in expression: .. code-block:: c++ // Codegen the body, now that all vars are in scope. - Value *BodyVal = Body->Codegen(); - if (BodyVal == 0) return 0; + Value *BodyVal = Body->codegen(); + if (!BodyVal) + return nullptr; Finally, before returning, we restore the previous variable bindings: diff --git a/docs/tutorial/LangImpl8.rst b/docs/tutorial/LangImpl8.rst index 0b9b39c84b75..3b0f443f08d5 100644 --- a/docs/tutorial/LangImpl8.rst +++ b/docs/tutorial/LangImpl8.rst @@ -75,8 +75,8 @@ statement be our "main": .. code-block:: udiff - - PrototypeAST *Proto = new PrototypeAST("", std::vector()); - + PrototypeAST *Proto = new PrototypeAST("main", std::vector()); + - auto Proto = llvm::make_unique("", std::vector()); + + auto Proto = llvm::make_unique("main", std::vector()); just with the simple change of giving it a name. @@ -108,19 +108,19 @@ code is that the llvm IR goes to standard error: @@ -1108,17 +1108,8 @@ static void HandleExtern() { static void HandleTopLevelExpression() { // Evaluate a top-level expression into an anonymous function. - if (FunctionAST *F = ParseTopLevelExpr()) { - - if (Function *LF = F->Codegen()) { + if (auto FnAST = ParseTopLevelExpr()) { + - if (auto *FnIR = FnAST->codegen()) { - // We're just doing this to make sure it executes. - TheExecutionEngine->finalizeObject(); - // JIT the function, returning a function pointer. - - void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + - void *FPtr = TheExecutionEngine->getPointerToFunction(FnIR); - - // Cast it to the right type (takes no arguments, returns a double) so we - // can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)FPtr; - // Ignore the return value for this. - (void)FP; - + if (!F->Codegen()) { + + if (!F->codegen()) { + fprintf(stderr, "Error generating code for top level expr"); } } else { @@ -165,13 +165,13 @@ DWARF Emission Setup ==================== Similar to the ``IRBuilder`` class we have a -```DIBuilder`` `_ class +`DIBuilder `_ class that helps in constructing debug metadata for an llvm IR file. It corresponds 1:1 similarly to ``IRBuilder`` and llvm IR, but with nicer names. Using it does require that you be more familiar with DWARF terminology than you needed to be with ``IRBuilder`` and ``Instruction`` names, but if you read through the general documentation on the -```Metadata Format`` `_ it +`Metadata Format `_ it should be a little more clear. We'll be using this class to construct all of our IR level descriptions. Construction for it takes a module so we need to construct it shortly after we construct our module. We've left it @@ -237,7 +237,7 @@ Functions ========= Now that we have our ``Compile Unit`` and our source locations, we can add -function definitions to the debug info. So in ``PrototypeAST::Codegen`` we +function definitions to the debug info. So in ``PrototypeAST::codegen()`` we add a few lines of code to describe a context for our subprogram, in this case the "File", and the actual definition of the function itself. @@ -261,7 +261,8 @@ information) and construct our function definition: DISubprogram *SP = DBuilder->createFunction( FContext, Name, StringRef(), Unit, LineNo, CreateFunctionType(Args.size(), Unit), false /* internal linkage */, - true /* definition */, ScopeLine, DINode::FlagPrototyped, false, F); + true /* definition */, ScopeLine, DINode::FlagPrototyped, false); + F->setSubprogram(SP); and we now have an DISubprogram that contains a reference to all of our metadata for the function. @@ -307,10 +308,12 @@ and then we have added to all of our AST classes a source location: SourceLocation Loc; public: + ExprAST(SourceLocation Loc = CurLoc) : Loc(Loc) {} + virtual ~ExprAST() {} + virtual Value* codegen() = 0; int getLine() const { return Loc.Line; } int getCol() const { return Loc.Col; } - ExprAST(SourceLocation Loc = CurLoc) : Loc(Loc) {} - virtual std::ostream &dump(std::ostream &out, int ind) { + virtual raw_ostream &dump(raw_ostream &out, int ind) { return out << ':' << getLine() << ':' << getCol() << '\n'; } @@ -318,7 +321,8 @@ that we pass down through when we create a new expression: .. code-block:: c++ - LHS = new BinaryExprAST(BinLoc, BinOp, LHS, RHS); + LHS = llvm::make_unique(BinLoc, BinOp, std::move(LHS), + std::move(RHS)); giving us locations for each of our expressions and variables. @@ -395,13 +399,12 @@ argument allocas in ``PrototypeAST::CreateArgumentAllocas``. DIScope *Scope = KSDbgInfo.LexicalBlocks.back(); DIFile *Unit = DBuilder->createFile(KSDbgInfo.TheCU.getFilename(), KSDbgInfo.TheCU.getDirectory()); - DILocalVariable D = DBuilder->createLocalVariable( - dwarf::DW_TAG_arg_variable, Scope, Args[Idx], Unit, Line, - KSDbgInfo.getDoubleTy(), Idx); + DILocalVariable D = DBuilder->createParameterVariable( + Scope, Args[Idx], Idx + 1, Unit, Line, KSDbgInfo.getDoubleTy(), true); - Instruction *Call = DBuilder->insertDeclare( - Alloca, D, DBuilder->createExpression(), Builder.GetInsertBlock()); - Call->setDebugLoc(DebugLoc::get(Line, 0, Scope)); + DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(), + DebugLoc::get(Line, 0, Scope), + Builder.GetInsertBlock()); Here we're doing a few things. First, we're grabbing our current scope for the variable so we can say what range of code our variable is valid @@ -409,7 +412,7 @@ through. Second, we're creating the variable, giving it the scope, the name, source location, type, and since it's an argument, the argument index. Third, we create an ``lvm.dbg.declare`` call to indicate at the IR level that we've got a variable in an alloca (and it gives a starting -location for the variable). Lastly, we set a source location for the +location for the variable), and setting a source location for the beginning of the scope on the declare. One interesting thing to note at this point is that various debuggers have diff --git a/docs/tutorial/LangImpl9.rst b/docs/tutorial/LangImpl9.rst index 6c43d53f90f9..f02bba857c14 100644 --- a/docs/tutorial/LangImpl9.rst +++ b/docs/tutorial/LangImpl9.rst @@ -49,7 +49,7 @@ For example, try adding: extending the type system in all sorts of interesting ways. Simple arrays are very easy and are quite useful for many different applications. Adding them is mostly an exercise in learning how the - LLVM `getelementptr <../LangRef.html#i_getelementptr>`_ instruction + LLVM `getelementptr <../LangRef.html#getelementptr-instruction>`_ instruction works: it is so nifty/unconventional, it `has its own FAQ <../GetElementPtr.html>`_! If you add support for recursive types (e.g. linked lists), make sure to read the `section in the LLVM diff --git a/docs/tutorial/OCamlLangImpl1.rst b/docs/tutorial/OCamlLangImpl1.rst index 94ca3a5aa4d3..cf968b5ae89c 100644 --- a/docs/tutorial/OCamlLangImpl1.rst +++ b/docs/tutorial/OCamlLangImpl1.rst @@ -139,7 +139,7 @@ useful for mutually recursive functions). For example: A more interesting example is included in Chapter 6 where we write a little Kaleidoscope application that `displays a Mandelbrot -Set `_ at various levels of magnification. +Set `_ at various levels of magnification. Lets dive into the implementation of this language! @@ -275,7 +275,7 @@ file. These are handled with this code: | [< >] -> [< >] With this, we have the complete lexer for the basic Kaleidoscope -language (the `full code listing `_ for the +language (the `full code listing `_ for the Lexer is available in the `next chapter `_ of the tutorial). Next we'll `build a simple parser that uses this to build an Abstract Syntax Tree `_. When we have that, we'll diff --git a/docs/tutorial/OCamlLangImpl2.rst b/docs/tutorial/OCamlLangImpl2.rst index 905b306746f1..f5d6cd6822c9 100644 --- a/docs/tutorial/OCamlLangImpl2.rst +++ b/docs/tutorial/OCamlLangImpl2.rst @@ -130,7 +130,7 @@ We start with numeric literals, because they are the simplest to process. For each production in our grammar, we'll define a function which parses that production. We call this class of expressions "primary" expressions, for reasons that will become more clear `later in -the tutorial `_. In order to parse an +the tutorial `_. In order to parse an arbitrary primary expression, we need to determine what sort of expression it is. For numeric literals, we have: @@ -280,7 +280,7 @@ fixed-size array). With the helper above defined, we can now start parsing binary expressions. The basic idea of operator precedence parsing is to break down an expression with potentially ambiguous binary operators into -pieces. Consider ,for example, the expression "a+b+(c+d)\*e\*f+g". +pieces. Consider, for example, the expression "a+b+(c+d)\*e\*f+g". Operator precedence parsing considers this as a stream of primary expressions separated by binary operators. As such, it will first parse the leading primary expression "a", then it will see the pairs [+, b] @@ -505,7 +505,7 @@ The Driver The driver for this simply invokes all of the parsing pieces with a top-level dispatch loop. There isn't much interesting here, so I'll just -include the top-level loop. See `below <#code>`_ for full code in the +include the top-level loop. See `below <#full-code-listing>`_ for full code in the "Top-Level Parsing" section. .. code-block:: ocaml diff --git a/docs/tutorial/OCamlLangImpl3.rst b/docs/tutorial/OCamlLangImpl3.rst index 10d463b93ac3..a76b46d1bf6b 100644 --- a/docs/tutorial/OCamlLangImpl3.rst +++ b/docs/tutorial/OCamlLangImpl3.rst @@ -114,8 +114,8 @@ values that can be in the ``Codegen.named_values`` map are function arguments. This code simply checks to see that the specified name is in the map (if not, an unknown variable is being referenced) and returns the value for it. In future chapters, we'll add support for `loop -induction variables `_ in the symbol table, and for -`local variables `_. +induction variables `_ in the symbol table, and for +`local variables `_. .. code-block:: ocaml @@ -152,22 +152,22 @@ automatically provide each one with an increasing, unique numeric suffix. Local value names for instructions are purely optional, but it makes it much easier to read the IR dumps. -`LLVM instructions <../LangRef.html#instref>`_ are constrained by strict +`LLVM instructions <../LangRef.html#instruction-reference>`_ are constrained by strict rules: for example, the Left and Right operators of an `add -instruction <../LangRef.html#i_add>`_ must have the same type, and the +instruction <../LangRef.html#add-instruction>`_ must have the same type, and the result type of the add must match the operand types. Because all values in Kaleidoscope are doubles, this makes for very simple code for add, sub and mul. On the other hand, LLVM specifies that the `fcmp -instruction <../LangRef.html#i_fcmp>`_ always returns an 'i1' value (a +instruction <../LangRef.html#fcmp-instruction>`_ always returns an 'i1' value (a one bit integer). The problem with this is that Kaleidoscope wants the value to be a 0.0 or 1.0 value. In order to get these semantics, we combine the fcmp instruction with a `uitofp -instruction <../LangRef.html#i_uitofp>`_. This instruction converts its +instruction <../LangRef.html#uitofp-to-instruction>`_. This instruction converts its input integer into a floating point value by treating the input as an unsigned value. In contrast, if we used the `sitofp -instruction <../LangRef.html#i_sitofp>`_, the Kaleidoscope '<' operator +instruction <../LangRef.html#sitofp-to-instruction>`_, the Kaleidoscope '<' operator would return 0.0 and -1.0, depending on the input value. .. code-block:: ocaml @@ -196,7 +196,7 @@ to resolve function names for us. Once we have the function to call, we recursively codegen each argument that is to be passed in, and create an LLVM `call -instruction <../LangRef.html#i_call>`_. Note that LLVM uses the native C +instruction <../LangRef.html#call-instruction>`_. Note that LLVM uses the native C calling conventions by default, allowing these calls to also call into standard library functions like "sin" and "cos", with no additional effort. @@ -253,7 +253,7 @@ The final line above checks if the function has already been defined in This indicates the type and name to use, as well as which module to insert into. By default we assume a function has ``Llvm.Linkage.ExternalLinkage``. "`external -linkage `_" means that the function may be defined +linkage <../LangRef.html#linkage>`_" means that the function may be defined outside the current module and/or that it is callable by functions outside the module. The "``name``" passed in is the name the user specified: this name is registered in "``Codegen.the_module``"s symbol @@ -360,7 +360,7 @@ Once the insertion point is set up, we call the ``Codegen.codegen_func`` method for the root expression of the function. If no error happens, this emits code to compute the expression into the entry block and returns the value that was computed. Assuming no error, we then create -an LLVM `ret instruction <../LangRef.html#i_ret>`_, which completes the +an LLVM `ret instruction <../LangRef.html#ret-instruction>`_, which completes the function. Once the function is built, we call ``Llvm_analysis.assert_valid_function``, which is provided by LLVM. This function does a variety of consistency checks on the generated code, to @@ -413,10 +413,10 @@ For example: Note how the parser turns the top-level expression into anonymous functions for us. This will be handy when we add `JIT -support `_ in the next chapter. Also note that +support `_ in the next chapter. Also note that the code is very literally transcribed, no optimizations are being performed. We will `add -optimizations `_ explicitly in the +optimizations `_ explicitly in the next chapter. :: diff --git a/docs/tutorial/OCamlLangImpl4.rst b/docs/tutorial/OCamlLangImpl4.rst index b13b2afa8883..feeba01be24b 100644 --- a/docs/tutorial/OCamlLangImpl4.rst +++ b/docs/tutorial/OCamlLangImpl4.rst @@ -130,7 +130,7 @@ exactly the code we have now, except that we would defer running the optimizer until the entire file has been parsed. In order to get per-function optimizations going, we need to set up a -`Llvm.PassManager <../WritingAnLLVMPass.html#passmanager>`_ to hold and +`Llvm.PassManager <../WritingAnLLVMPass.html#what-passmanager-does>`_ to hold and organize the LLVM optimizations that we want to run. Once we have that, we can add a set of optimizations to run. The code looks like this: diff --git a/docs/tutorial/OCamlLangImpl5.rst b/docs/tutorial/OCamlLangImpl5.rst index 0faecfb9222e..675b9bc1978b 100644 --- a/docs/tutorial/OCamlLangImpl5.rst +++ b/docs/tutorial/OCamlLangImpl5.rst @@ -175,7 +175,7 @@ Kaleidoscope looks like this: To visualize the control flow graph, you can use a nifty feature of the LLVM '`opt `_' tool. If you put this LLVM IR into "t.ll" and run "``llvm-as < t.ll | opt -analyze -view-cfg``", `a -window will pop up <../ProgrammersManual.html#ViewGraph>`_ and you'll +window will pop up <../ProgrammersManual.html#viewing-graphs-while-debugging-code>`_ and you'll see this graph: .. figure:: LangImpl5-cfg.png diff --git a/docs/tutorial/OCamlLangImpl6.rst b/docs/tutorial/OCamlLangImpl6.rst index 36bffa8e9696..a3ae11fd7e54 100644 --- a/docs/tutorial/OCamlLangImpl6.rst +++ b/docs/tutorial/OCamlLangImpl6.rst @@ -24,7 +24,7 @@ is good or bad. In this tutorial we'll assume that it is okay to use this as a way to show some interesting parsing techniques. At the end of this tutorial, we'll run through an example Kaleidoscope -application that `renders the Mandelbrot set <#example>`_. This gives an +application that `renders the Mandelbrot set <#kicking-the-tires>`_. This gives an example of what you can build with Kaleidoscope and its feature set. User-defined Operators: the Idea @@ -108,7 +108,7 @@ keywords: | "unary" -> [< 'Token.Unary; stream >] This just adds lexer support for the unary and binary keywords, like we -did in `previous chapters `_. One nice +did in `previous chapters `_. One nice thing about our current AST, is that we represent binary operators with full generalisation by using their ASCII code as the opcode. For our extended operators, we'll use this same representation, so we don't need diff --git a/docs/tutorial/OCamlLangImpl7.rst b/docs/tutorial/OCamlLangImpl7.rst index 98ea93f42f3f..c8c701b91012 100644 --- a/docs/tutorial/OCamlLangImpl7.rst +++ b/docs/tutorial/OCamlLangImpl7.rst @@ -118,7 +118,7 @@ that @G defines *space* for an i32 in the global data area, but its *name* actually refers to the address for that space. Stack variables work the same way, except that instead of being declared with global variable definitions, they are declared with the `LLVM alloca -instruction <../LangRef.html#i_alloca>`_: +instruction <../LangRef.html#alloca-instruction>`_: .. code-block:: llvm @@ -221,7 +221,7 @@ variables in certain circumstances: funny pointer arithmetic is involved, the alloca will not be promoted. #. mem2reg only works on allocas of `first - class <../LangRef.html#t_classifications>`_ values (such as pointers, + class <../LangRef.html#first-class-types>`_ values (such as pointers, scalars and vectors), and only if the array size of the allocation is 1 (or missing in the .ll file). mem2reg is not capable of promoting structs or arrays to registers. Note that the "scalarrepl" pass is @@ -367,7 +367,7 @@ from the stack slot: As you can see, this is pretty straightforward. Now we need to update the things that define the variables to set up the alloca. We'll start -with ``codegen_expr Ast.For ...`` (see the `full code listing <#code>`_ +with ``codegen_expr Ast.For ...`` (see the `full code listing <#id1>`_ for the unabridged code): .. code-block:: ocaml @@ -407,7 +407,7 @@ for the unabridged code): ... This code is virtually identical to the code `before we allowed mutable -variables `_. The big difference is that +variables `_. The big difference is that we no longer have to construct a PHI node, and we use load/store to access the variable as needed. diff --git a/docs/tutorial/OCamlLangImpl8.rst b/docs/tutorial/OCamlLangImpl8.rst index 0346fa9fed14..3ab6db35dfb0 100644 --- a/docs/tutorial/OCamlLangImpl8.rst +++ b/docs/tutorial/OCamlLangImpl8.rst @@ -48,7 +48,7 @@ For example, try adding: extending the type system in all sorts of interesting ways. Simple arrays are very easy and are quite useful for many different applications. Adding them is mostly an exercise in learning how the - LLVM `getelementptr <../LangRef.html#i_getelementptr>`_ instruction + LLVM `getelementptr <../LangRef.html#getelementptr-instruction>`_ instruction works: it is so nifty/unconventional, it `has its own FAQ <../GetElementPtr.html>`_! If you add support for recursive types (e.g. linked lists), make sure to read the `section in the LLVM diff --git a/docs/yaml2obj.rst b/docs/yaml2obj.rst index 1812e58914ae..d18ce02a336c 100644 --- a/docs/yaml2obj.rst +++ b/docs/yaml2obj.rst @@ -65,6 +65,7 @@ Here's a simplified Kwalify_ schema with an extension to allow alternate types. , IMAGE_FILE_MACHINE_AMD64 , IMAGE_FILE_MACHINE_ARM , IMAGE_FILE_MACHINE_ARMNT + , IMAGE_FILE_MACHINE_ARM64 , IMAGE_FILE_MACHINE_EBC , IMAGE_FILE_MACHINE_I386 , IMAGE_FILE_MACHINE_IA64 diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp index 8026adc8d075..d8c54b50b854 100644 --- a/examples/BrainF/BrainF.cpp +++ b/examples/BrainF/BrainF.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include + using namespace llvm; //Set the constants for naming @@ -44,7 +45,7 @@ Module *BrainF::parse(std::istream *in1, int mem, CompileFlags cf, comflag = cf; header(Context); - readloop(0, 0, 0, Context); + readloop(nullptr, nullptr, nullptr, Context); delete builder; return module; } @@ -68,7 +69,6 @@ void BrainF::header(LLVMContext& C) { getOrInsertFunction("putchar", IntegerType::getInt32Ty(C), IntegerType::getInt32Ty(C), NULL)); - //Function header //define void @brainf() @@ -85,7 +85,7 @@ void BrainF::header(LLVMContext& C) { Constant* allocsize = ConstantExpr::getSizeOf(Int8Ty); allocsize = ConstantExpr::getTruncOrBitCast(allocsize, IntPtrTy); ptr_arr = CallInst::CreateMalloc(BB, IntPtrTy, Int8Ty, allocsize, val_mem, - NULL, "arr"); + nullptr, "arr"); BB->getInstList().push_back(cast(ptr_arr)); //call void @llvm.memset.p0i8.i32(i8 *%arr, i8 0, i32 %d, i32 1, i1 0) @@ -114,8 +114,6 @@ void BrainF::header(LLVMContext& C) { ConstantInt::get(C, APInt(32, memtotal/2)), headreg); - - //Function footer //brainf.end: @@ -127,8 +125,6 @@ void BrainF::header(LLVMContext& C) { //ret void ReturnInst::Create(C, endbb); - - //Error block for array out of bounds if (comflag & flag_arraybounds) { diff --git a/examples/BrainF/BrainFDriver.cpp b/examples/BrainF/BrainFDriver.cpp index 99c8ff36dc61..1a38c67b0d4a 100644 --- a/examples/BrainF/BrainFDriver.cpp +++ b/examples/BrainF/BrainFDriver.cpp @@ -64,9 +64,9 @@ void addMainFunction(Module *mod) { IntegerType::getInt8Ty(mod->getContext()))), NULL)); { Function::arg_iterator args = main_func->arg_begin(); - Value *arg_0 = args++; + Value *arg_0 = &*args++; arg_0->setName("argc"); - Value *arg_1 = args++; + Value *arg_1 = &*args++; arg_1->setName("argv"); } diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index f98c403deb0b..5727066d6227 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -4,7 +4,7 @@ add_subdirectory(HowToUseJIT) add_subdirectory(Kaleidoscope) add_subdirectory(ModuleMaker) -if( ( NOT WIN32 ) AND ( NOT "${LLVM_NATIVE_ARCH}" STREQUAL "ARM" ) ) +if(LLVM_ENABLE_EH AND (NOT WIN32) AND (NOT "${LLVM_NATIVE_ARCH}" STREQUAL "ARM")) add_subdirectory(ExceptionDemo) endif() diff --git a/examples/ExceptionDemo/CMakeLists.txt b/examples/ExceptionDemo/CMakeLists.txt index b4354f66f0d6..793cf291ca6f 100644 --- a/examples/ExceptionDemo/CMakeLists.txt +++ b/examples/ExceptionDemo/CMakeLists.txt @@ -5,14 +5,14 @@ set(LLVM_LINK_COMPONENTS MCJIT RuntimeDyld Support + Target nativecodegen ) # Enable EH and RTTI for this demo -set(LLVM_REQUIRES_EH 1) -set(LLVM_REQUIRES_RTTI 1) - -set(LLVM_BUILD_EXAMPLES OFF) +if(NOT LLVM_ENABLE_EH) + message(FATAL_ERROR "ExceptionDemo must require EH.") +endif() add_llvm_example(ExceptionDemo ExceptionDemo.cpp diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp index 81337c4823b0..444ee2649fa7 100644 --- a/examples/ExceptionDemo/ExceptionDemo.cpp +++ b/examples/ExceptionDemo/ExceptionDemo.cpp @@ -77,6 +77,7 @@ #include #include +#include #ifndef USE_GLOBAL_STR_CONSTS #define USE_GLOBAL_STR_CONSTS true @@ -319,7 +320,7 @@ void printStr(char *toPrint) { } -/// Deletes the true previosly allocated exception whose address +/// Deletes the true previously allocated exception whose address /// is calculated from the supplied OurBaseException_t::unwindException /// member address. Handles (ignores), NULL pointers. /// @param expToDelete exception to delete @@ -569,8 +570,8 @@ static bool handleActionValue(int64_t *resultAction, fprintf(stderr, "handleActionValue(...): exceptionObject = <%p>, " "excp = <%p>.\n", - exceptionObject, - excp); + (void*)exceptionObject, + (void*)excp); #endif const uint8_t *actionPos = (uint8_t*) actionEntry, @@ -588,8 +589,8 @@ static bool handleActionValue(int64_t *resultAction, #ifdef DEBUG fprintf(stderr, - "handleActionValue(...):typeOffset: <%lld>, " - "actionOffset: <%lld>.\n", + "handleActionValue(...):typeOffset: <%" PRIi64 ">, " + "actionOffset: <%" PRIi64 ">.\n", typeOffset, actionOffset); #endif @@ -848,7 +849,7 @@ _Unwind_Reason_Code ourPersonality(int version, #ifdef DEBUG fprintf(stderr, "ourPersonality(...):lsda = <%p>.\n", - lsda); + (void*)lsda); #endif // The real work of the personality function is captured here @@ -971,7 +972,7 @@ void generateIntegerPrint(llvm::LLVMContext &context, llvm::Value *cast = builder.CreateBitCast(stringVar, builder.getInt8PtrTy()); - builder.CreateCall2(&printFunct, &toPrint, cast); + builder.CreateCall(&printFunct, {&toPrint, cast}); } @@ -1264,10 +1265,10 @@ static llvm::Function *createCatchWrappedInvokeFunction( builder.SetInsertPoint(exceptionBlock); llvm::Function *personality = module.getFunction("ourPersonality"); + ret->setPersonalityFn(personality); llvm::LandingPadInst *caughtResult = builder.CreateLandingPad(ourCaughtResultType, - personality, numExceptionsToCatch, "landingPad"); @@ -1694,7 +1695,7 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos, #ifdef DEBUG fprintf(stderr, "createStandardUtilityFunctions(...):ourBaseFromUnwindOffset " - "= %lld, sizeof(struct OurBaseException_t) - " + "= %" PRIi64 ", sizeof(struct OurBaseException_t) - " "sizeof(struct _Unwind_Exception) = %lu.\n", ourBaseFromUnwindOffset, sizeof(struct OurBaseException_t) - @@ -1973,7 +1974,7 @@ int main(int argc, char *argv[]) { // Set up the optimizer pipeline. // Start with registering info about how the // target lays out data structures. - module->setDataLayout(*executionEngine->getDataLayout()); + module->setDataLayout(executionEngine->getDataLayout()); // Optimizations turned on #ifdef ADD_OPT_PASSES diff --git a/examples/Fibonacci/fibonacci.cpp b/examples/Fibonacci/fibonacci.cpp index 8092e19380dd..ecb49eb92e1a 100644 --- a/examples/Fibonacci/fibonacci.cpp +++ b/examples/Fibonacci/fibonacci.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Module.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; static Function *CreateFibFunction(Module *M, LLVMContext &Context) { @@ -41,7 +42,7 @@ static Function *CreateFibFunction(Module *M, LLVMContext &Context) { Function *FibF = cast(M->getOrInsertFunction("fib", Type::getInt32Ty(Context), Type::getInt32Ty(Context), - (Type *)0)); + nullptr)); // Add a basic block to the function. BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", FibF); @@ -51,7 +52,7 @@ static Function *CreateFibFunction(Module *M, LLVMContext &Context) { Value *Two = ConstantInt::get(Type::getInt32Ty(Context), 2); // Get pointer to the integer argument of the add1 function... - Argument *ArgX = FibF->arg_begin(); // Get the arg. + Argument *ArgX = &*FibF->arg_begin(); // Get the arg. ArgX->setName("AnArg"); // Give it a nice symbolic name for fun. // Create the true_block. @@ -87,7 +88,6 @@ static Function *CreateFibFunction(Module *M, LLVMContext &Context) { return FibF; } - int main(int argc, char **argv) { int n = argc > 1 ? atol(argv[1]) : 24; @@ -106,7 +106,6 @@ int main(int argc, char **argv) { ExecutionEngine *EE = EngineBuilder(std::move(Owner)) .setErrorStr(&errStr) - .setEngineKind(EngineKind::JIT) .create(); if (!EE) { diff --git a/examples/HowToUseJIT/HowToUseJIT.cpp b/examples/HowToUseJIT/HowToUseJIT.cpp index 91ea17dd22bf..e0bf6a00bf01 100644 --- a/examples/HowToUseJIT/HowToUseJIT.cpp +++ b/examples/HowToUseJIT/HowToUseJIT.cpp @@ -65,7 +65,7 @@ int main() { Function *Add1F = cast(M->getOrInsertFunction("add1", Type::getInt32Ty(Context), Type::getInt32Ty(Context), - (Type *)0)); + nullptr)); // Add a basic block to the function. As before, it automatically inserts // because of the last argument. @@ -80,7 +80,7 @@ int main() { // Get pointers to the integer argument of the add1 function... assert(Add1F->arg_begin() != Add1F->arg_end()); // Make sure there's an arg - Argument *ArgX = Add1F->arg_begin(); // Get the arg + Argument *ArgX = &*Add1F->arg_begin(); // Get the arg ArgX->setName("AnArg"); // Give it a nice symbolic name for fun. // Create the add instruction, inserting it into the end of BB. @@ -91,12 +91,11 @@ int main() { // Now, function add1 is ready. - // Now we're going to create function `foo', which returns an int and takes no // arguments. Function *FooF = cast(M->getOrInsertFunction("foo", Type::getInt32Ty(Context), - (Type *)0)); + nullptr)); // Add a basic block to the FooF function. BB = BasicBlock::Create(Context, "EntryBlock", FooF); diff --git a/examples/Kaleidoscope/Chapter2/CMakeLists.txt b/examples/Kaleidoscope/Chapter2/CMakeLists.txt index fed3f4b78c77..6224d9ac8640 100644 --- a/examples/Kaleidoscope/Chapter2/CMakeLists.txt +++ b/examples/Kaleidoscope/Chapter2/CMakeLists.txt @@ -1,3 +1,9 @@ add_kaleidoscope_chapter(Kaleidoscope-Ch2 toy.cpp ) + +if(LLVM_COMPILER_IS_GCC_COMPATIBLE) + target_compile_options(Kaleidoscope-Ch2 PRIVATE + -Wno-unused-private-field + ) +endif() diff --git a/examples/Kaleidoscope/Chapter2/Makefile b/examples/Kaleidoscope/Chapter2/Makefile index 1a9b94ce541e..fa27e6e06687 100644 --- a/examples/Kaleidoscope/Chapter2/Makefile +++ b/examples/Kaleidoscope/Chapter2/Makefile @@ -10,4 +10,6 @@ LEVEL = ../../.. TOOLNAME = Kaleidoscope-Ch2 EXAMPLE_TOOL = 1 +LLVM_CXXFLAGS := -Wno-unused-private-field + include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter2/toy.cpp b/examples/Kaleidoscope/Chapter2/toy.cpp index cd901394a524..69f359961293 100644 --- a/examples/Kaleidoscope/Chapter2/toy.cpp +++ b/examples/Kaleidoscope/Chapter2/toy.cpp @@ -1,10 +1,22 @@ #include #include -#include #include +#include #include #include +namespace helper { +// Cloning make_unique here until it's standard in C++14. +// Using a namespace to avoid conflicting with MSVC's std::make_unique (which +// ADL can sometimes find in unqualified calls). +template +static + typename std::enable_if::value, std::unique_ptr>::type + make_unique(Args &&... args) { + return std::unique_ptr(new T(std::forward(args)...)); +} +} + //===----------------------------------------------------------------------===// // Lexer //===----------------------------------------------------------------------===// @@ -15,14 +27,16 @@ enum Token { tok_eof = -1, // commands - tok_def = -2, tok_extern = -3, + tok_def = -2, + tok_extern = -3, // primary - tok_identifier = -4, tok_number = -5 + tok_identifier = -4, + tok_number = -5 }; -static std::string IdentifierStr; // Filled in if tok_identifier -static double NumVal; // Filled in if tok_number +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number /// gettok - Return the next token from standard input. static int gettok() { @@ -37,31 +51,34 @@ static int gettok() { while (isalnum((LastChar = getchar()))) IdentifierStr += LastChar; - if (IdentifierStr == "def") return tok_def; - if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "def") + return tok_def; + if (IdentifierStr == "extern") + return tok_extern; return tok_identifier; } - if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ std::string NumStr; do { NumStr += LastChar; LastChar = getchar(); } while (isdigit(LastChar) || LastChar == '.'); - NumVal = strtod(NumStr.c_str(), 0); + NumVal = strtod(NumStr.c_str(), nullptr); return tok_number; } if (LastChar == '#') { // Comment until end of line. - do LastChar = getchar(); + do + LastChar = getchar(); while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); - + if (LastChar != EOF) return gettok(); } - + // Check for end of file. Don't eat the EOF. if (LastChar == EOF) return tok_eof; @@ -84,30 +101,40 @@ public: /// NumberExprAST - Expression class for numeric literals like "1.0". class NumberExprAST : public ExprAST { + double Val; + public: - NumberExprAST(double val) {} + NumberExprAST(double Val) : Val(Val) {} }; /// VariableExprAST - Expression class for referencing a variable, like "a". class VariableExprAST : public ExprAST { std::string Name; + public: - VariableExprAST(const std::string &name) : Name(name) {} + VariableExprAST(const std::string &Name) : Name(Name) {} }; /// BinaryExprAST - Expression class for a binary operator. class BinaryExprAST : public ExprAST { + char Op; + std::unique_ptr LHS, RHS; + public: - BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) {} + BinaryExprAST(char Op, std::unique_ptr LHS, + std::unique_ptr RHS) + : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} }; /// CallExprAST - Expression class for function calls. class CallExprAST : public ExprAST { std::string Callee; - std::vector Args; + std::vector> Args; + public: - CallExprAST(const std::string &callee, std::vector &args) - : Callee(callee), Args(args) {} + CallExprAST(const std::string &Callee, + std::vector> Args) + : Callee(Callee), Args(std::move(Args)) {} }; /// PrototypeAST - This class represents the "prototype" for a function, @@ -116,16 +143,21 @@ public: class PrototypeAST { std::string Name; std::vector Args; + public: - PrototypeAST(const std::string &name, const std::vector &args) - : Name(name), Args(args) {} - + PrototypeAST(const std::string &Name, std::vector Args) + : Name(Name), Args(std::move(Args)) {} }; /// FunctionAST - This class represents a function definition itself. class FunctionAST { + std::unique_ptr Proto; + std::unique_ptr Body; + public: - FunctionAST(PrototypeAST *proto, ExprAST *body) {} + FunctionAST(std::unique_ptr Proto, + std::unique_ptr Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} }; } // end anonymous namespace @@ -137,9 +169,7 @@ public: /// token the parser is looking at. getNextToken reads another token from the /// lexer and updates CurTok with its results. static int CurTok; -static int getNextToken() { - return CurTok = gettok(); -} +static int getNextToken() { return CurTok = gettok(); } /// BinopPrecedence - This holds the precedence for each binary operator that is /// defined. @@ -149,40 +179,69 @@ static std::map BinopPrecedence; static int GetTokPrecedence() { if (!isascii(CurTok)) return -1; - + // Make sure it's a declared binop. int TokPrec = BinopPrecedence[CurTok]; - if (TokPrec <= 0) return -1; + if (TokPrec <= 0) + return -1; return TokPrec; } /// Error* - These are little helper functions for error handling. -ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} -PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +std::unique_ptr Error(const char *Str) { + fprintf(stderr, "Error: %s\n", Str); + return nullptr; +} +std::unique_ptr ErrorP(const char *Str) { + Error(Str); + return nullptr; +} -static ExprAST *ParseExpression(); +static std::unique_ptr ParseExpression(); + +/// numberexpr ::= number +static std::unique_ptr ParseNumberExpr() { + auto Result = helper::make_unique(NumVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// parenexpr ::= '(' expression ')' +static std::unique_ptr ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} /// identifierexpr /// ::= identifier /// ::= identifier '(' expression* ')' -static ExprAST *ParseIdentifierExpr() { +static std::unique_ptr ParseIdentifierExpr() { std::string IdName = IdentifierStr; - - getNextToken(); // eat identifier. - + + getNextToken(); // eat identifier. + if (CurTok != '(') // Simple variable ref. - return new VariableExprAST(IdName); - + return helper::make_unique(IdName); + // Call. - getNextToken(); // eat ( - std::vector Args; + getNextToken(); // eat ( + std::vector> Args; if (CurTok != ')') { while (1) { - ExprAST *Arg = ParseExpression(); - if (!Arg) return 0; - Args.push_back(Arg); + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; - if (CurTok == ')') break; + if (CurTok == ')') + break; if (CurTok != ',') return Error("Expected ')' or ',' in argument list"); @@ -192,133 +251,125 @@ static ExprAST *ParseIdentifierExpr() { // Eat the ')'. getNextToken(); - - return new CallExprAST(IdName, Args); -} -/// numberexpr ::= number -static ExprAST *ParseNumberExpr() { - ExprAST *Result = new NumberExprAST(NumVal); - getNextToken(); // consume the number - return Result; -} - -/// parenexpr ::= '(' expression ')' -static ExprAST *ParseParenExpr() { - getNextToken(); // eat (. - ExprAST *V = ParseExpression(); - if (!V) return 0; - - if (CurTok != ')') - return Error("expected ')'"); - getNextToken(); // eat ). - return V; + return helper::make_unique(IdName, std::move(Args)); } /// primary /// ::= identifierexpr /// ::= numberexpr /// ::= parenexpr -static ExprAST *ParsePrimary() { +static std::unique_ptr ParsePrimary() { switch (CurTok) { - default: return Error("unknown token when expecting an expression"); - case tok_identifier: return ParseIdentifierExpr(); - case tok_number: return ParseNumberExpr(); - case '(': return ParseParenExpr(); + default: + return Error("unknown token when expecting an expression"); + case tok_identifier: + return ParseIdentifierExpr(); + case tok_number: + return ParseNumberExpr(); + case '(': + return ParseParenExpr(); } } /// binoprhs /// ::= ('+' primary)* -static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { +static std::unique_ptr ParseBinOpRHS(int ExprPrec, + std::unique_ptr LHS) { // If this is a binop, find its precedence. while (1) { int TokPrec = GetTokPrecedence(); - + // If this is a binop that binds at least as tightly as the current binop, // consume it, otherwise we are done. if (TokPrec < ExprPrec) return LHS; - + // Okay, we know this is a binop. int BinOp = CurTok; - getNextToken(); // eat binop - + getNextToken(); // eat binop + // Parse the primary expression after the binary operator. - ExprAST *RHS = ParsePrimary(); - if (!RHS) return 0; - + auto RHS = ParsePrimary(); + if (!RHS) + return nullptr; + // If BinOp binds less tightly with RHS than the operator after RHS, let // the pending operator take RHS as its LHS. int NextPrec = GetTokPrecedence(); if (TokPrec < NextPrec) { - RHS = ParseBinOpRHS(TokPrec+1, RHS); - if (RHS == 0) return 0; + RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); + if (!RHS) + return nullptr; } - + // Merge LHS/RHS. - LHS = new BinaryExprAST(BinOp, LHS, RHS); + LHS = helper::make_unique(BinOp, std::move(LHS), + std::move(RHS)); } } /// expression /// ::= primary binoprhs /// -static ExprAST *ParseExpression() { - ExprAST *LHS = ParsePrimary(); - if (!LHS) return 0; - - return ParseBinOpRHS(0, LHS); +static std::unique_ptr ParseExpression() { + auto LHS = ParsePrimary(); + if (!LHS) + return nullptr; + + return ParseBinOpRHS(0, std::move(LHS)); } /// prototype /// ::= id '(' id* ')' -static PrototypeAST *ParsePrototype() { +static std::unique_ptr ParsePrototype() { if (CurTok != tok_identifier) return ErrorP("Expected function name in prototype"); std::string FnName = IdentifierStr; getNextToken(); - + if (CurTok != '(') return ErrorP("Expected '(' in prototype"); - + std::vector ArgNames; while (getNextToken() == tok_identifier) ArgNames.push_back(IdentifierStr); if (CurTok != ')') return ErrorP("Expected ')' in prototype"); - + // success. - getNextToken(); // eat ')'. - - return new PrototypeAST(FnName, ArgNames); + getNextToken(); // eat ')'. + + return helper::make_unique(FnName, std::move(ArgNames)); } /// definition ::= 'def' prototype expression -static FunctionAST *ParseDefinition() { - getNextToken(); // eat def. - PrototypeAST *Proto = ParsePrototype(); - if (Proto == 0) return 0; +static std::unique_ptr ParseDefinition() { + getNextToken(); // eat def. + auto Proto = ParsePrototype(); + if (!Proto) + return nullptr; - if (ExprAST *E = ParseExpression()) - return new FunctionAST(Proto, E); - return 0; + if (auto E = ParseExpression()) + return helper::make_unique(std::move(Proto), std::move(E)); + return nullptr; } /// toplevelexpr ::= expression -static FunctionAST *ParseTopLevelExpr() { - if (ExprAST *E = ParseExpression()) { +static std::unique_ptr ParseTopLevelExpr() { + if (auto E = ParseExpression()) { // Make an anonymous proto. - PrototypeAST *Proto = new PrototypeAST("", std::vector()); - return new FunctionAST(Proto, E); + auto Proto = helper::make_unique("__anon_expr", + std::vector()); + return helper::make_unique(std::move(Proto), std::move(E)); } - return 0; + return nullptr; } /// external ::= 'extern' prototype -static PrototypeAST *ParseExtern() { - getNextToken(); // eat extern. +static std::unique_ptr ParseExtern() { + getNextToken(); // eat extern. return ParsePrototype(); } @@ -359,11 +410,20 @@ static void MainLoop() { while (1) { fprintf(stderr, "ready> "); switch (CurTok) { - case tok_eof: return; - case ';': getNextToken(); break; // ignore top-level semicolons. - case tok_def: HandleDefinition(); break; - case tok_extern: HandleExtern(); break; - default: HandleTopLevelExpression(); break; + case tok_eof: + return; + case ';': // ignore top-level semicolons. + getNextToken(); + break; + case tok_def: + HandleDefinition(); + break; + case tok_extern: + HandleExtern(); + break; + default: + HandleTopLevelExpression(); + break; } } } @@ -378,7 +438,7 @@ int main() { BinopPrecedence['<'] = 10; BinopPrecedence['+'] = 20; BinopPrecedence['-'] = 20; - BinopPrecedence['*'] = 40; // highest. + BinopPrecedence['*'] = 40; // highest. // Prime the first token. fprintf(stderr, "ready> "); diff --git a/examples/Kaleidoscope/Chapter3/toy.cpp b/examples/Kaleidoscope/Chapter3/toy.cpp index c60f76725fdb..05697ea70a49 100644 --- a/examples/Kaleidoscope/Chapter3/toy.cpp +++ b/examples/Kaleidoscope/Chapter3/toy.cpp @@ -1,13 +1,14 @@ -#include "llvm/IR/Verifier.h" -#include "llvm/IR/DerivedTypes.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Verifier.h" #include #include #include #include #include + using namespace llvm; //===----------------------------------------------------------------------===// @@ -20,14 +21,16 @@ enum Token { tok_eof = -1, // commands - tok_def = -2, tok_extern = -3, + tok_def = -2, + tok_extern = -3, // primary - tok_identifier = -4, tok_number = -5 + tok_identifier = -4, + tok_number = -5 }; -static std::string IdentifierStr; // Filled in if tok_identifier -static double NumVal; // Filled in if tok_number +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number /// gettok - Return the next token from standard input. static int gettok() { @@ -42,31 +45,34 @@ static int gettok() { while (isalnum((LastChar = getchar()))) IdentifierStr += LastChar; - if (IdentifierStr == "def") return tok_def; - if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "def") + return tok_def; + if (IdentifierStr == "extern") + return tok_extern; return tok_identifier; } - if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ std::string NumStr; do { NumStr += LastChar; LastChar = getchar(); } while (isdigit(LastChar) || LastChar == '.'); - NumVal = strtod(NumStr.c_str(), 0); + NumVal = strtod(NumStr.c_str(), nullptr); return tok_number; } if (LastChar == '#') { // Comment until end of line. - do LastChar = getchar(); + do + LastChar = getchar(); while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); - + if (LastChar != EOF) return gettok(); } - + // Check for end of file. Don't eat the EOF. if (LastChar == EOF) return tok_eof; @@ -85,43 +91,49 @@ namespace { class ExprAST { public: virtual ~ExprAST() {} - virtual Value *Codegen() = 0; + virtual Value *codegen() = 0; }; /// NumberExprAST - Expression class for numeric literals like "1.0". class NumberExprAST : public ExprAST { double Val; + public: - NumberExprAST(double val) : Val(val) {} - Value *Codegen() override; + NumberExprAST(double Val) : Val(Val) {} + Value *codegen() override; }; /// VariableExprAST - Expression class for referencing a variable, like "a". class VariableExprAST : public ExprAST { std::string Name; + public: - VariableExprAST(const std::string &name) : Name(name) {} - Value *Codegen() override; + VariableExprAST(const std::string &Name) : Name(Name) {} + Value *codegen() override; }; /// BinaryExprAST - Expression class for a binary operator. class BinaryExprAST : public ExprAST { char Op; - ExprAST *LHS, *RHS; + std::unique_ptr LHS, RHS; + public: - BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) - : Op(op), LHS(lhs), RHS(rhs) {} - Value *Codegen() override; + BinaryExprAST(char Op, std::unique_ptr LHS, + std::unique_ptr RHS) + : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} + Value *codegen() override; }; /// CallExprAST - Expression class for function calls. class CallExprAST : public ExprAST { std::string Callee; - std::vector Args; + std::vector> Args; + public: - CallExprAST(const std::string &callee, std::vector &args) - : Callee(callee), Args(args) {} - Value *Codegen() override; + CallExprAST(const std::string &Callee, + std::vector> Args) + : Callee(Callee), Args(std::move(Args)) {} + Value *codegen() override; }; /// PrototypeAST - This class represents the "prototype" for a function, @@ -130,22 +142,24 @@ public: class PrototypeAST { std::string Name; std::vector Args; + public: - PrototypeAST(const std::string &name, const std::vector &args) - : Name(name), Args(args) {} - - Function *Codegen(); + PrototypeAST(const std::string &Name, std::vector Args) + : Name(Name), Args(std::move(Args)) {} + Function *codegen(); + const std::string &getName() const { return Name; } }; /// FunctionAST - This class represents a function definition itself. class FunctionAST { - PrototypeAST *Proto; - ExprAST *Body; + std::unique_ptr Proto; + std::unique_ptr Body; + public: - FunctionAST(PrototypeAST *proto, ExprAST *body) - : Proto(proto), Body(body) {} - - Function *Codegen(); + FunctionAST(std::unique_ptr Proto, + std::unique_ptr Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} + Function *codegen(); }; } // end anonymous namespace @@ -157,9 +171,7 @@ public: /// token the parser is looking at. getNextToken reads another token from the /// lexer and updates CurTok with its results. static int CurTok; -static int getNextToken() { - return CurTok = gettok(); -} +static int getNextToken() { return CurTok = gettok(); } /// BinopPrecedence - This holds the precedence for each binary operator that is /// defined. @@ -169,41 +181,70 @@ static std::map BinopPrecedence; static int GetTokPrecedence() { if (!isascii(CurTok)) return -1; - + // Make sure it's a declared binop. int TokPrec = BinopPrecedence[CurTok]; - if (TokPrec <= 0) return -1; + if (TokPrec <= 0) + return -1; return TokPrec; } /// Error* - These are little helper functions for error handling. -ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} -PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } -FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } +std::unique_ptr Error(const char *Str) { + fprintf(stderr, "Error: %s\n", Str); + return nullptr; +} -static ExprAST *ParseExpression(); +std::unique_ptr ErrorP(const char *Str) { + Error(Str); + return nullptr; +} + +static std::unique_ptr ParseExpression(); + +/// numberexpr ::= number +static std::unique_ptr ParseNumberExpr() { + auto Result = llvm::make_unique(NumVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// parenexpr ::= '(' expression ')' +static std::unique_ptr ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} /// identifierexpr /// ::= identifier /// ::= identifier '(' expression* ')' -static ExprAST *ParseIdentifierExpr() { +static std::unique_ptr ParseIdentifierExpr() { std::string IdName = IdentifierStr; - - getNextToken(); // eat identifier. - + + getNextToken(); // eat identifier. + if (CurTok != '(') // Simple variable ref. - return new VariableExprAST(IdName); - + return llvm::make_unique(IdName); + // Call. - getNextToken(); // eat ( - std::vector Args; + getNextToken(); // eat ( + std::vector> Args; if (CurTok != ')') { while (1) { - ExprAST *Arg = ParseExpression(); - if (!Arg) return 0; - Args.push_back(Arg); + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; - if (CurTok == ')') break; + if (CurTok == ')') + break; if (CurTok != ',') return Error("Expected ')' or ',' in argument list"); @@ -213,133 +254,125 @@ static ExprAST *ParseIdentifierExpr() { // Eat the ')'. getNextToken(); - - return new CallExprAST(IdName, Args); -} -/// numberexpr ::= number -static ExprAST *ParseNumberExpr() { - ExprAST *Result = new NumberExprAST(NumVal); - getNextToken(); // consume the number - return Result; -} - -/// parenexpr ::= '(' expression ')' -static ExprAST *ParseParenExpr() { - getNextToken(); // eat (. - ExprAST *V = ParseExpression(); - if (!V) return 0; - - if (CurTok != ')') - return Error("expected ')'"); - getNextToken(); // eat ). - return V; + return llvm::make_unique(IdName, std::move(Args)); } /// primary /// ::= identifierexpr /// ::= numberexpr /// ::= parenexpr -static ExprAST *ParsePrimary() { +static std::unique_ptr ParsePrimary() { switch (CurTok) { - default: return Error("unknown token when expecting an expression"); - case tok_identifier: return ParseIdentifierExpr(); - case tok_number: return ParseNumberExpr(); - case '(': return ParseParenExpr(); + default: + return Error("unknown token when expecting an expression"); + case tok_identifier: + return ParseIdentifierExpr(); + case tok_number: + return ParseNumberExpr(); + case '(': + return ParseParenExpr(); } } /// binoprhs /// ::= ('+' primary)* -static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { +static std::unique_ptr ParseBinOpRHS(int ExprPrec, + std::unique_ptr LHS) { // If this is a binop, find its precedence. while (1) { int TokPrec = GetTokPrecedence(); - + // If this is a binop that binds at least as tightly as the current binop, // consume it, otherwise we are done. if (TokPrec < ExprPrec) return LHS; - + // Okay, we know this is a binop. int BinOp = CurTok; - getNextToken(); // eat binop - + getNextToken(); // eat binop + // Parse the primary expression after the binary operator. - ExprAST *RHS = ParsePrimary(); - if (!RHS) return 0; - + auto RHS = ParsePrimary(); + if (!RHS) + return nullptr; + // If BinOp binds less tightly with RHS than the operator after RHS, let // the pending operator take RHS as its LHS. int NextPrec = GetTokPrecedence(); if (TokPrec < NextPrec) { - RHS = ParseBinOpRHS(TokPrec+1, RHS); - if (RHS == 0) return 0; + RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); + if (!RHS) + return nullptr; } - + // Merge LHS/RHS. - LHS = new BinaryExprAST(BinOp, LHS, RHS); + LHS = + llvm::make_unique(BinOp, std::move(LHS), std::move(RHS)); } } /// expression /// ::= primary binoprhs /// -static ExprAST *ParseExpression() { - ExprAST *LHS = ParsePrimary(); - if (!LHS) return 0; - - return ParseBinOpRHS(0, LHS); +static std::unique_ptr ParseExpression() { + auto LHS = ParsePrimary(); + if (!LHS) + return nullptr; + + return ParseBinOpRHS(0, std::move(LHS)); } /// prototype /// ::= id '(' id* ')' -static PrototypeAST *ParsePrototype() { +static std::unique_ptr ParsePrototype() { if (CurTok != tok_identifier) return ErrorP("Expected function name in prototype"); std::string FnName = IdentifierStr; getNextToken(); - + if (CurTok != '(') return ErrorP("Expected '(' in prototype"); - + std::vector ArgNames; while (getNextToken() == tok_identifier) ArgNames.push_back(IdentifierStr); if (CurTok != ')') return ErrorP("Expected ')' in prototype"); - + // success. - getNextToken(); // eat ')'. - - return new PrototypeAST(FnName, ArgNames); + getNextToken(); // eat ')'. + + return llvm::make_unique(FnName, std::move(ArgNames)); } /// definition ::= 'def' prototype expression -static FunctionAST *ParseDefinition() { - getNextToken(); // eat def. - PrototypeAST *Proto = ParsePrototype(); - if (Proto == 0) return 0; +static std::unique_ptr ParseDefinition() { + getNextToken(); // eat def. + auto Proto = ParsePrototype(); + if (!Proto) + return nullptr; - if (ExprAST *E = ParseExpression()) - return new FunctionAST(Proto, E); - return 0; + if (auto E = ParseExpression()) + return llvm::make_unique(std::move(Proto), std::move(E)); + return nullptr; } /// toplevelexpr ::= expression -static FunctionAST *ParseTopLevelExpr() { - if (ExprAST *E = ParseExpression()) { +static std::unique_ptr ParseTopLevelExpr() { + if (auto E = ParseExpression()) { // Make an anonymous proto. - PrototypeAST *Proto = new PrototypeAST("", std::vector()); - return new FunctionAST(Proto, E); + auto Proto = llvm::make_unique("__anon_expr", + std::vector()); + return llvm::make_unique(std::move(Proto), std::move(E)); } - return 0; + return nullptr; } /// external ::= 'extern' prototype -static PrototypeAST *ParseExtern() { - getNextToken(); // eat extern. +static std::unique_ptr ParseExtern() { + getNextToken(); // eat extern. return ParsePrototype(); } @@ -347,113 +380,108 @@ static PrototypeAST *ParseExtern() { // Code Generation //===----------------------------------------------------------------------===// -static Module *TheModule; +static std::unique_ptr TheModule; static IRBuilder<> Builder(getGlobalContext()); -static std::map NamedValues; +static std::map NamedValues; -Value *ErrorV(const char *Str) { Error(Str); return 0; } +Value *ErrorV(const char *Str) { + Error(Str); + return nullptr; +} -Value *NumberExprAST::Codegen() { +Value *NumberExprAST::codegen() { return ConstantFP::get(getGlobalContext(), APFloat(Val)); } -Value *VariableExprAST::Codegen() { +Value *VariableExprAST::codegen() { // Look this variable up in the function. Value *V = NamedValues[Name]; - return V ? V : ErrorV("Unknown variable name"); + if (!V) + return ErrorV("Unknown variable name"); + return V; } -Value *BinaryExprAST::Codegen() { - Value *L = LHS->Codegen(); - Value *R = RHS->Codegen(); - if (L == 0 || R == 0) return 0; - +Value *BinaryExprAST::codegen() { + Value *L = LHS->codegen(); + Value *R = RHS->codegen(); + if (!L || !R) + return nullptr; + switch (Op) { - case '+': return Builder.CreateFAdd(L, R, "addtmp"); - case '-': return Builder.CreateFSub(L, R, "subtmp"); - case '*': return Builder.CreateFMul(L, R, "multmp"); + case '+': + return Builder.CreateFAdd(L, R, "addtmp"); + case '-': + return Builder.CreateFSub(L, R, "subtmp"); + case '*': + return Builder.CreateFMul(L, R, "multmp"); case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), "booltmp"); - default: return ErrorV("invalid binary operator"); + default: + return ErrorV("invalid binary operator"); } } -Value *CallExprAST::Codegen() { +Value *CallExprAST::codegen() { // Look up the name in the global module table. Function *CalleeF = TheModule->getFunction(Callee); - if (CalleeF == 0) + if (!CalleeF) return ErrorV("Unknown function referenced"); - + // If argument mismatch error. if (CalleeF->arg_size() != Args.size()) return ErrorV("Incorrect # arguments passed"); - std::vector ArgsV; + std::vector ArgsV; for (unsigned i = 0, e = Args.size(); i != e; ++i) { - ArgsV.push_back(Args[i]->Codegen()); - if (ArgsV.back() == 0) return 0; + ArgsV.push_back(Args[i]->codegen()); + if (!ArgsV.back()) + return nullptr; } - + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); } -Function *PrototypeAST::Codegen() { +Function *PrototypeAST::codegen() { // Make the function type: double(double,double) etc. - std::vector Doubles(Args.size(), - Type::getDoubleTy(getGlobalContext())); - FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), - Doubles, false); - - Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); - - // If F conflicted, there was already something named 'Name'. If it has a - // body, don't allow redefinition or reextern. - if (F->getName() != Name) { - // Delete the one we just made and get the existing one. - F->eraseFromParent(); - F = TheModule->getFunction(Name); - - // If F already has a body, reject this. - if (!F->empty()) { - ErrorF("redefinition of function"); - return 0; - } - - // If F took a different number of args, reject. - if (F->arg_size() != Args.size()) { - ErrorF("redefinition of function with different # args"); - return 0; - } - } - + std::vector Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = + FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false); + + Function *F = + Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); + // Set names for all arguments. unsigned Idx = 0; - for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); - ++AI, ++Idx) { - AI->setName(Args[Idx]); - - // Add arguments to variable symbol table. - NamedValues[Args[Idx]] = AI; - } - + for (auto &Arg : F->args()) + Arg.setName(Args[Idx++]); + return F; } -Function *FunctionAST::Codegen() { - NamedValues.clear(); - - Function *TheFunction = Proto->Codegen(); - if (TheFunction == 0) - return 0; - +Function *FunctionAST::codegen() { + // First, check for an existing function from a previous 'extern' declaration. + Function *TheFunction = TheModule->getFunction(Proto->getName()); + + if (!TheFunction) + TheFunction = Proto->codegen(); + + if (!TheFunction) + return nullptr; + // Create a new basic block to start insertion into. BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); Builder.SetInsertPoint(BB); - - if (Value *RetVal = Body->Codegen()) { + + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + for (auto &Arg : TheFunction->args()) + NamedValues[Arg.getName()] = &Arg; + + if (Value *RetVal = Body->codegen()) { // Finish off the function. Builder.CreateRet(RetVal); @@ -462,10 +490,10 @@ Function *FunctionAST::Codegen() { return TheFunction; } - + // Error reading body, remove function. TheFunction->eraseFromParent(); - return 0; + return nullptr; } //===----------------------------------------------------------------------===// @@ -473,10 +501,10 @@ Function *FunctionAST::Codegen() { //===----------------------------------------------------------------------===// static void HandleDefinition() { - if (FunctionAST *F = ParseDefinition()) { - if (Function *LF = F->Codegen()) { + if (auto FnAST = ParseDefinition()) { + if (auto *FnIR = FnAST->codegen()) { fprintf(stderr, "Read function definition:"); - LF->dump(); + FnIR->dump(); } } else { // Skip token for error recovery. @@ -485,10 +513,10 @@ static void HandleDefinition() { } static void HandleExtern() { - if (PrototypeAST *P = ParseExtern()) { - if (Function *F = P->Codegen()) { + if (auto ProtoAST = ParseExtern()) { + if (auto *FnIR = ProtoAST->codegen()) { fprintf(stderr, "Read extern: "); - F->dump(); + FnIR->dump(); } } else { // Skip token for error recovery. @@ -498,10 +526,10 @@ static void HandleExtern() { static void HandleTopLevelExpression() { // Evaluate a top-level expression into an anonymous function. - if (FunctionAST *F = ParseTopLevelExpr()) { - if (Function *LF = F->Codegen()) { + if (auto FnAST = ParseTopLevelExpr()) { + if (auto *FnIR = FnAST->codegen()) { fprintf(stderr, "Read top-level expression:"); - LF->dump(); + FnIR->dump(); } } else { // Skip token for error recovery. @@ -514,46 +542,42 @@ static void MainLoop() { while (1) { fprintf(stderr, "ready> "); switch (CurTok) { - case tok_eof: return; - case ';': getNextToken(); break; // ignore top-level semicolons. - case tok_def: HandleDefinition(); break; - case tok_extern: HandleExtern(); break; - default: HandleTopLevelExpression(); break; + case tok_eof: + return; + case ';': // ignore top-level semicolons. + getNextToken(); + break; + case tok_def: + HandleDefinition(); + break; + case tok_extern: + HandleExtern(); + break; + default: + HandleTopLevelExpression(); + break; } } } -//===----------------------------------------------------------------------===// -// "Library" functions that can be "extern'd" from user code. -//===----------------------------------------------------------------------===// - -/// putchard - putchar that takes a double and returns 0. -extern "C" -double putchard(double X) { - putchar((char)X); - return 0; -} - //===----------------------------------------------------------------------===// // Main driver code. //===----------------------------------------------------------------------===// int main() { - LLVMContext &Context = getGlobalContext(); - // Install standard binary operators. // 1 is lowest precedence. BinopPrecedence['<'] = 10; BinopPrecedence['+'] = 20; BinopPrecedence['-'] = 20; - BinopPrecedence['*'] = 40; // highest. + BinopPrecedence['*'] = 40; // highest. // Prime the first token. fprintf(stderr, "ready> "); getNextToken(); // Make the module, which holds all the code. - TheModule = new Module("my cool jit", Context); + TheModule = llvm::make_unique("my cool jit", getGlobalContext()); // Run the main "interpreter loop" now. MainLoop(); diff --git a/examples/Kaleidoscope/Chapter4/CMakeLists.txt b/examples/Kaleidoscope/Chapter4/CMakeLists.txt index 2c01e120070a..89feed143adc 100644 --- a/examples/Kaleidoscope/Chapter4/CMakeLists.txt +++ b/examples/Kaleidoscope/Chapter4/CMakeLists.txt @@ -3,14 +3,15 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine InstCombine - MCJIT + Object RuntimeDyld ScalarOpts Support - TransformUtils native ) add_kaleidoscope_chapter(Kaleidoscope-Ch4 toy.cpp ) + +export_executable_symbols(Kaleidoscope-Ch4) diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp index ad091e4496b7..4f77ec862b1b 100644 --- a/examples/Kaleidoscope/Chapter4/toy.cpp +++ b/examples/Kaleidoscope/Chapter4/toy.cpp @@ -1,9 +1,5 @@ +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Passes.h" -#include "llvm/ExecutionEngine/ExecutionEngine.h" -#include "llvm/ExecutionEngine/MCJIT.h" -#include "llvm/ExecutionEngine/SectionMemoryManager.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" @@ -16,7 +12,10 @@ #include #include #include +#include "../include/KaleidoscopeJIT.h" + using namespace llvm; +using namespace llvm::orc; //===----------------------------------------------------------------------===// // Lexer @@ -66,7 +65,7 @@ static int gettok() { LastChar = getchar(); } while (isdigit(LastChar) || LastChar == '.'); - NumVal = strtod(NumStr.c_str(), 0); + NumVal = strtod(NumStr.c_str(), nullptr); return tok_number; } @@ -98,7 +97,7 @@ namespace { class ExprAST { public: virtual ~ExprAST() {} - virtual Value *Codegen() = 0; + virtual Value *codegen() = 0; }; /// NumberExprAST - Expression class for numeric literals like "1.0". @@ -106,8 +105,8 @@ class NumberExprAST : public ExprAST { double Val; public: - NumberExprAST(double val) : Val(val) {} - Value *Codegen() override; + NumberExprAST(double Val) : Val(Val) {} + Value *codegen() override; }; /// VariableExprAST - Expression class for referencing a variable, like "a". @@ -115,30 +114,32 @@ class VariableExprAST : public ExprAST { std::string Name; public: - VariableExprAST(const std::string &name) : Name(name) {} - Value *Codegen() override; + VariableExprAST(const std::string &Name) : Name(Name) {} + Value *codegen() override; }; /// BinaryExprAST - Expression class for a binary operator. class BinaryExprAST : public ExprAST { char Op; - ExprAST *LHS, *RHS; + std::unique_ptr LHS, RHS; public: - BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) - : Op(op), LHS(lhs), RHS(rhs) {} - Value *Codegen() override; + BinaryExprAST(char Op, std::unique_ptr LHS, + std::unique_ptr RHS) + : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} + Value *codegen() override; }; /// CallExprAST - Expression class for function calls. class CallExprAST : public ExprAST { std::string Callee; - std::vector Args; + std::vector> Args; public: - CallExprAST(const std::string &callee, std::vector &args) - : Callee(callee), Args(args) {} - Value *Codegen() override; + CallExprAST(const std::string &Callee, + std::vector> Args) + : Callee(Callee), Args(std::move(Args)) {} + Value *codegen() override; }; /// PrototypeAST - This class represents the "prototype" for a function, @@ -149,21 +150,22 @@ class PrototypeAST { std::vector Args; public: - PrototypeAST(const std::string &name, const std::vector &args) - : Name(name), Args(args) {} - - Function *Codegen(); + PrototypeAST(const std::string &Name, std::vector Args) + : Name(Name), Args(std::move(Args)) {} + Function *codegen(); + const std::string &getName() const { return Name; } }; /// FunctionAST - This class represents a function definition itself. class FunctionAST { - PrototypeAST *Proto; - ExprAST *Body; + std::unique_ptr Proto; + std::unique_ptr Body; public: - FunctionAST(PrototypeAST *proto, ExprAST *body) : Proto(proto), Body(body) {} - - Function *Codegen(); + FunctionAST(std::unique_ptr Proto, + std::unique_ptr Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} + Function *codegen(); }; } // end anonymous namespace @@ -194,41 +196,58 @@ static int GetTokPrecedence() { } /// Error* - These are little helper functions for error handling. -ExprAST *Error(const char *Str) { +std::unique_ptr Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str); - return 0; -} -PrototypeAST *ErrorP(const char *Str) { - Error(Str); - return 0; -} -FunctionAST *ErrorF(const char *Str) { - Error(Str); - return 0; + return nullptr; } -static ExprAST *ParseExpression(); +std::unique_ptr ErrorP(const char *Str) { + Error(Str); + return nullptr; +} + +static std::unique_ptr ParseExpression(); + +/// numberexpr ::= number +static std::unique_ptr ParseNumberExpr() { + auto Result = llvm::make_unique(NumVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// parenexpr ::= '(' expression ')' +static std::unique_ptr ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} /// identifierexpr /// ::= identifier /// ::= identifier '(' expression* ')' -static ExprAST *ParseIdentifierExpr() { +static std::unique_ptr ParseIdentifierExpr() { std::string IdName = IdentifierStr; getNextToken(); // eat identifier. if (CurTok != '(') // Simple variable ref. - return new VariableExprAST(IdName); + return llvm::make_unique(IdName); // Call. getNextToken(); // eat ( - std::vector Args; + std::vector> Args; if (CurTok != ')') { while (1) { - ExprAST *Arg = ParseExpression(); - if (!Arg) - return 0; - Args.push_back(Arg); + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; if (CurTok == ')') break; @@ -242,34 +261,14 @@ static ExprAST *ParseIdentifierExpr() { // Eat the ')'. getNextToken(); - return new CallExprAST(IdName, Args); -} - -/// numberexpr ::= number -static ExprAST *ParseNumberExpr() { - ExprAST *Result = new NumberExprAST(NumVal); - getNextToken(); // consume the number - return Result; -} - -/// parenexpr ::= '(' expression ')' -static ExprAST *ParseParenExpr() { - getNextToken(); // eat (. - ExprAST *V = ParseExpression(); - if (!V) - return 0; - - if (CurTok != ')') - return Error("expected ')'"); - getNextToken(); // eat ). - return V; + return llvm::make_unique(IdName, std::move(Args)); } /// primary /// ::= identifierexpr /// ::= numberexpr /// ::= parenexpr -static ExprAST *ParsePrimary() { +static std::unique_ptr ParsePrimary() { switch (CurTok) { default: return Error("unknown token when expecting an expression"); @@ -284,7 +283,8 @@ static ExprAST *ParsePrimary() { /// binoprhs /// ::= ('+' primary)* -static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { +static std::unique_ptr ParseBinOpRHS(int ExprPrec, + std::unique_ptr LHS) { // If this is a binop, find its precedence. while (1) { int TokPrec = GetTokPrecedence(); @@ -299,38 +299,39 @@ static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { getNextToken(); // eat binop // Parse the primary expression after the binary operator. - ExprAST *RHS = ParsePrimary(); + auto RHS = ParsePrimary(); if (!RHS) - return 0; + return nullptr; // If BinOp binds less tightly with RHS than the operator after RHS, let // the pending operator take RHS as its LHS. int NextPrec = GetTokPrecedence(); if (TokPrec < NextPrec) { - RHS = ParseBinOpRHS(TokPrec + 1, RHS); - if (RHS == 0) - return 0; + RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); + if (!RHS) + return nullptr; } // Merge LHS/RHS. - LHS = new BinaryExprAST(BinOp, LHS, RHS); + LHS = + llvm::make_unique(BinOp, std::move(LHS), std::move(RHS)); } } /// expression /// ::= primary binoprhs /// -static ExprAST *ParseExpression() { - ExprAST *LHS = ParsePrimary(); +static std::unique_ptr ParseExpression() { + auto LHS = ParsePrimary(); if (!LHS) - return 0; + return nullptr; - return ParseBinOpRHS(0, LHS); + return ParseBinOpRHS(0, std::move(LHS)); } /// prototype /// ::= id '(' id* ')' -static PrototypeAST *ParsePrototype() { +static std::unique_ptr ParsePrototype() { if (CurTok != tok_identifier) return ErrorP("Expected function name in prototype"); @@ -349,300 +350,86 @@ static PrototypeAST *ParsePrototype() { // success. getNextToken(); // eat ')'. - return new PrototypeAST(FnName, ArgNames); + return llvm::make_unique(FnName, std::move(ArgNames)); } /// definition ::= 'def' prototype expression -static FunctionAST *ParseDefinition() { +static std::unique_ptr ParseDefinition() { getNextToken(); // eat def. - PrototypeAST *Proto = ParsePrototype(); - if (Proto == 0) - return 0; + auto Proto = ParsePrototype(); + if (!Proto) + return nullptr; - if (ExprAST *E = ParseExpression()) - return new FunctionAST(Proto, E); - return 0; + if (auto E = ParseExpression()) + return llvm::make_unique(std::move(Proto), std::move(E)); + return nullptr; } /// toplevelexpr ::= expression -static FunctionAST *ParseTopLevelExpr() { - if (ExprAST *E = ParseExpression()) { +static std::unique_ptr ParseTopLevelExpr() { + if (auto E = ParseExpression()) { // Make an anonymous proto. - PrototypeAST *Proto = new PrototypeAST("", std::vector()); - return new FunctionAST(Proto, E); + auto Proto = llvm::make_unique("__anon_expr", + std::vector()); + return llvm::make_unique(std::move(Proto), std::move(E)); } - return 0; + return nullptr; } /// external ::= 'extern' prototype -static PrototypeAST *ParseExtern() { +static std::unique_ptr ParseExtern() { getNextToken(); // eat extern. return ParsePrototype(); } -//===----------------------------------------------------------------------===// -// Quick and dirty hack -//===----------------------------------------------------------------------===// - -// FIXME: Obviously we can do better than this -std::string GenerateUniqueName(const char *root) { - static int i = 0; - char s[16]; - sprintf(s, "%s%d", root, i++); - std::string S = s; - return S; -} - -std::string MakeLegalFunctionName(std::string Name) { - std::string NewName; - if (!Name.length()) - return GenerateUniqueName("anon_func_"); - - // Start with what we have - NewName = Name; - - // Look for a numberic first character - if (NewName.find_first_of("0123456789") == 0) { - NewName.insert(0, 1, 'n'); - } - - // Replace illegal characters with their ASCII equivalent - std::string legal_elements = - "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; - size_t pos; - while ((pos = NewName.find_first_not_of(legal_elements)) != - std::string::npos) { - char old_c = NewName.at(pos); - char new_str[16]; - sprintf(new_str, "%d", (int)old_c); - NewName = NewName.replace(pos, 1, new_str); - } - - return NewName; -} - -//===----------------------------------------------------------------------===// -// MCJIT helper class -//===----------------------------------------------------------------------===// - -class MCJITHelper { -public: - MCJITHelper(LLVMContext &C) : Context(C), OpenModule(NULL) {} - ~MCJITHelper(); - - Function *getFunction(const std::string FnName); - Module *getModuleForNewFunction(); - void *getPointerToFunction(Function *F); - void *getSymbolAddress(const std::string &Name); - void dump(); - -private: - typedef std::vector ModuleVector; - typedef std::vector EngineVector; - - LLVMContext &Context; - Module *OpenModule; - ModuleVector Modules; - EngineVector Engines; -}; - -class HelpingMemoryManager : public SectionMemoryManager { - HelpingMemoryManager(const HelpingMemoryManager &) = delete; - void operator=(const HelpingMemoryManager &) = delete; - -public: - HelpingMemoryManager(MCJITHelper *Helper) : MasterHelper(Helper) {} - ~HelpingMemoryManager() override {} - - /// This method returns the address of the specified symbol. - /// Our implementation will attempt to find symbols in other - /// modules associated with the MCJITHelper to cross link symbols - /// from one generated module to another. - uint64_t getSymbolAddress(const std::string &Name) override; - -private: - MCJITHelper *MasterHelper; -}; - -uint64_t HelpingMemoryManager::getSymbolAddress(const std::string &Name) { - uint64_t FnAddr = SectionMemoryManager::getSymbolAddress(Name); - if (FnAddr) - return FnAddr; - - uint64_t HelperFun = (uint64_t)MasterHelper->getSymbolAddress(Name); - if (!HelperFun) - report_fatal_error("Program used extern function '" + Name + - "' which could not be resolved!"); - - return HelperFun; -} - -MCJITHelper::~MCJITHelper() { - if (OpenModule) - delete OpenModule; - EngineVector::iterator begin = Engines.begin(); - EngineVector::iterator end = Engines.end(); - EngineVector::iterator it; - for (it = begin; it != end; ++it) - delete *it; -} - -Function *MCJITHelper::getFunction(const std::string FnName) { - ModuleVector::iterator begin = Modules.begin(); - ModuleVector::iterator end = Modules.end(); - ModuleVector::iterator it; - for (it = begin; it != end; ++it) { - Function *F = (*it)->getFunction(FnName); - if (F) { - if (*it == OpenModule) - return F; - - assert(OpenModule != NULL); - - // This function is in a module that has already been JITed. - // We need to generate a new prototype for external linkage. - Function *PF = OpenModule->getFunction(FnName); - if (PF && !PF->empty()) { - ErrorF("redefinition of function across modules"); - return 0; - } - - // If we don't have a prototype yet, create one. - if (!PF) - PF = Function::Create(F->getFunctionType(), Function::ExternalLinkage, - FnName, OpenModule); - return PF; - } - } - return NULL; -} - -Module *MCJITHelper::getModuleForNewFunction() { - // If we have a Module that hasn't been JITed, use that. - if (OpenModule) - return OpenModule; - - // Otherwise create a new Module. - std::string ModName = GenerateUniqueName("mcjit_module_"); - Module *M = new Module(ModName, Context); - Modules.push_back(M); - OpenModule = M; - return M; -} - -void *MCJITHelper::getPointerToFunction(Function *F) { - // See if an existing instance of MCJIT has this function. - EngineVector::iterator begin = Engines.begin(); - EngineVector::iterator end = Engines.end(); - EngineVector::iterator it; - for (it = begin; it != end; ++it) { - void *P = (*it)->getPointerToFunction(F); - if (P) - return P; - } - - // If we didn't find the function, see if we can generate it. - if (OpenModule) { - std::string ErrStr; - ExecutionEngine *NewEngine = - EngineBuilder(std::unique_ptr(OpenModule)) - .setErrorStr(&ErrStr) - .setMCJITMemoryManager(std::unique_ptr( - new HelpingMemoryManager(this))) - .create(); - if (!NewEngine) { - fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); - exit(1); - } - - // Create a function pass manager for this engine - auto *FPM = new legacy::FunctionPassManager(OpenModule); - - // Set up the optimizer pipeline. Start with registering info about how the - // target lays out data structures. - OpenModule->setDataLayout(*NewEngine->getDataLayout()); - // Provide basic AliasAnalysis support for GVN. - FPM->add(createBasicAliasAnalysisPass()); - // Promote allocas to registers. - FPM->add(createPromoteMemoryToRegisterPass()); - // Do simple "peephole" optimizations and bit-twiddling optzns. - FPM->add(createInstructionCombiningPass()); - // Reassociate expressions. - FPM->add(createReassociatePass()); - // Eliminate Common SubExpressions. - FPM->add(createGVNPass()); - // Simplify the control flow graph (deleting unreachable blocks, etc). - FPM->add(createCFGSimplificationPass()); - FPM->doInitialization(); - - // For each function in the module - Module::iterator it; - Module::iterator end = OpenModule->end(); - for (it = OpenModule->begin(); it != end; ++it) { - // Run the FPM on this function - FPM->run(*it); - } - - // We don't need this anymore - delete FPM; - - OpenModule = NULL; - Engines.push_back(NewEngine); - NewEngine->finalizeObject(); - return NewEngine->getPointerToFunction(F); - } - return NULL; -} - -void *MCJITHelper::getSymbolAddress(const std::string &Name) { - // Look for the symbol in each of our execution engines. - EngineVector::iterator begin = Engines.begin(); - EngineVector::iterator end = Engines.end(); - EngineVector::iterator it; - for (it = begin; it != end; ++it) { - uint64_t FAddr = (*it)->getFunctionAddress(Name); - if (FAddr) { - return (void *)FAddr; - } - } - return NULL; -} - -void MCJITHelper::dump() { - ModuleVector::iterator begin = Modules.begin(); - ModuleVector::iterator end = Modules.end(); - ModuleVector::iterator it; - for (it = begin; it != end; ++it) - (*it)->dump(); -} //===----------------------------------------------------------------------===// // Code Generation //===----------------------------------------------------------------------===// -static MCJITHelper *JITHelper; +static std::unique_ptr TheModule; static IRBuilder<> Builder(getGlobalContext()); static std::map NamedValues; +static std::unique_ptr TheFPM; +static std::unique_ptr TheJIT; +static std::map> FunctionProtos; Value *ErrorV(const char *Str) { Error(Str); - return 0; + return nullptr; } -Value *NumberExprAST::Codegen() { +Function *getFunction(std::string Name) { + // First, see if the function has already been added to the current module. + if (auto *F = TheModule->getFunction(Name)) + return F; + + // If not, check whether we can codegen the declaration from some existing + // prototype. + auto FI = FunctionProtos.find(Name); + if (FI != FunctionProtos.end()) + return FI->second->codegen(); + + // If no existing prototype exists, return null. + return nullptr; +} + +Value *NumberExprAST::codegen() { return ConstantFP::get(getGlobalContext(), APFloat(Val)); } -Value *VariableExprAST::Codegen() { +Value *VariableExprAST::codegen() { // Look this variable up in the function. Value *V = NamedValues[Name]; - return V ? V : ErrorV("Unknown variable name"); + if (!V) + return ErrorV("Unknown variable name"); + return V; } -Value *BinaryExprAST::Codegen() { - Value *L = LHS->Codegen(); - Value *R = RHS->Codegen(); - if (L == 0 || R == 0) - return 0; +Value *BinaryExprAST::codegen() { + Value *L = LHS->codegen(); + Value *R = RHS->codegen(); + if (!L || !R) + return nullptr; switch (Op) { case '+': @@ -661,10 +448,10 @@ Value *BinaryExprAST::Codegen() { } } -Value *CallExprAST::Codegen() { +Value *CallExprAST::codegen() { // Look up the name in the global module table. - Function *CalleeF = JITHelper->getFunction(Callee); - if (CalleeF == 0) + Function *CalleeF = getFunction(Callee); + if (!CalleeF) return ErrorV("Unknown function referenced"); // If argument mismatch error. @@ -673,94 +460,99 @@ Value *CallExprAST::Codegen() { std::vector ArgsV; for (unsigned i = 0, e = Args.size(); i != e; ++i) { - ArgsV.push_back(Args[i]->Codegen()); - if (ArgsV.back() == 0) - return 0; + ArgsV.push_back(Args[i]->codegen()); + if (!ArgsV.back()) + return nullptr; } return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); } -Function *PrototypeAST::Codegen() { +Function *PrototypeAST::codegen() { // Make the function type: double(double,double) etc. std::vector Doubles(Args.size(), Type::getDoubleTy(getGlobalContext())); FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false); - std::string FnName = MakeLegalFunctionName(Name); - - Module *M = JITHelper->getModuleForNewFunction(); - - Function *F = Function::Create(FT, Function::ExternalLinkage, FnName, M); - - // If F conflicted, there was already something named 'Name'. If it has a - // body, don't allow redefinition or reextern. - if (F->getName() != FnName) { - // Delete the one we just made and get the existing one. - F->eraseFromParent(); - F = JITHelper->getFunction(Name); - // If F already has a body, reject this. - if (!F->empty()) { - ErrorF("redefinition of function"); - return 0; - } - - // If F took a different number of args, reject. - if (F->arg_size() != Args.size()) { - ErrorF("redefinition of function with different # args"); - return 0; - } - } + Function *F = + Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); // Set names for all arguments. unsigned Idx = 0; - for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); - ++AI, ++Idx) { - AI->setName(Args[Idx]); - - // Add arguments to variable symbol table. - NamedValues[Args[Idx]] = AI; - } + for (auto &Arg : F->args()) + Arg.setName(Args[Idx++]); return F; } -Function *FunctionAST::Codegen() { - NamedValues.clear(); - - Function *TheFunction = Proto->Codegen(); - if (TheFunction == 0) - return 0; +Function *FunctionAST::codegen() { + // Transfer ownership of the prototype to the FunctionProtos map, but keep a + // reference to it for use below. + auto &P = *Proto; + FunctionProtos[Proto->getName()] = std::move(Proto); + Function *TheFunction = getFunction(P.getName()); + if (!TheFunction) + return nullptr; // Create a new basic block to start insertion into. BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); Builder.SetInsertPoint(BB); - if (Value *RetVal = Body->Codegen()) { + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + for (auto &Arg : TheFunction->args()) + NamedValues[Arg.getName()] = &Arg; + + if (Value *RetVal = Body->codegen()) { // Finish off the function. Builder.CreateRet(RetVal); // Validate the generated code, checking for consistency. verifyFunction(*TheFunction); + // Run the optimizer on the function. + TheFPM->run(*TheFunction); + return TheFunction; } // Error reading body, remove function. TheFunction->eraseFromParent(); - return 0; + return nullptr; } //===----------------------------------------------------------------------===// // Top-Level parsing and JIT Driver //===----------------------------------------------------------------------===// +static void InitializeModuleAndPassManager() { + // Open a new module. + TheModule = llvm::make_unique("my cool jit", getGlobalContext()); + TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); + + // Create a new pass manager attached to it. + TheFPM = llvm::make_unique(TheModule.get()); + + // Do simple "peephole" optimizations and bit-twiddling optzns. + TheFPM->add(createInstructionCombiningPass()); + // Reassociate expressions. + TheFPM->add(createReassociatePass()); + // Eliminate Common SubExpressions. + TheFPM->add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + TheFPM->add(createCFGSimplificationPass()); + + TheFPM->doInitialization(); +} + static void HandleDefinition() { - if (FunctionAST *F = ParseDefinition()) { - if (Function *LF = F->Codegen()) { + if (auto FnAST = ParseDefinition()) { + if (auto *FnIR = FnAST->codegen()) { fprintf(stderr, "Read function definition:"); - LF->dump(); + FnIR->dump(); + TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); } } else { // Skip token for error recovery. @@ -769,10 +561,11 @@ static void HandleDefinition() { } static void HandleExtern() { - if (PrototypeAST *P = ParseExtern()) { - if (Function *F = P->Codegen()) { + if (auto ProtoAST = ParseExtern()) { + if (auto *FnIR = ProtoAST->codegen()) { fprintf(stderr, "Read extern: "); - F->dump(); + FnIR->dump(); + FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); } } else { // Skip token for error recovery. @@ -782,15 +575,25 @@ static void HandleExtern() { static void HandleTopLevelExpression() { // Evaluate a top-level expression into an anonymous function. - if (FunctionAST *F = ParseTopLevelExpr()) { - if (Function *LF = F->Codegen()) { - // JIT the function, returning a function pointer. - void *FPtr = JITHelper->getPointerToFunction(LF); + if (auto FnAST = ParseTopLevelExpr()) { + if (FnAST->codegen()) { - // Cast it to the right type (takes no arguments, returns a double) so we - // can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)FPtr; + // JIT the module containing the anonymous expression, keeping a handle so + // we can free it later. + auto H = TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); + + // Search the JIT for the __anon_expr symbol. + auto ExprSymbol = TheJIT->findSymbol("__anon_expr"); + assert(ExprSymbol && "Function not found"); + + // Get the symbol's address and cast it to the right type (takes no + // arguments, returns a double) so we can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); fprintf(stderr, "Evaluated to %f\n", FP()); + + // Delete the anonymous expression module from the JIT. + TheJIT->removeModule(H); } } else { // Skip token for error recovery. @@ -805,9 +608,9 @@ static void MainLoop() { switch (CurTok) { case tok_eof: return; - case ';': + case ';': // ignore top-level semicolons. getNextToken(); - break; // ignore top-level semicolons. + break; case tok_def: HandleDefinition(); break; @@ -827,7 +630,13 @@ static void MainLoop() { /// putchard - putchar that takes a double and returns 0. extern "C" double putchard(double X) { - putchar((char)X); + fputc((char)X, stderr); + return 0; +} + +/// printd - printf that takes a double prints it as "%f\n", returning 0. +extern "C" double printd(double X) { + fprintf(stderr, "%f\n", X); return 0; } @@ -839,8 +648,6 @@ int main() { InitializeNativeTarget(); InitializeNativeTargetAsmPrinter(); InitializeNativeTargetAsmParser(); - LLVMContext &Context = getGlobalContext(); - JITHelper = new MCJITHelper(Context); // Install standard binary operators. // 1 is lowest precedence. @@ -853,11 +660,12 @@ int main() { fprintf(stderr, "ready> "); getNextToken(); + TheJIT = llvm::make_unique(); + + InitializeModuleAndPassManager(); + // Run the main "interpreter loop" now. MainLoop(); - // Print out all of the generated code. - JITHelper->dump(); - return 0; } diff --git a/examples/Kaleidoscope/Chapter5/CMakeLists.txt b/examples/Kaleidoscope/Chapter5/CMakeLists.txt index a938d9731fe8..c0ae70654c36 100644 --- a/examples/Kaleidoscope/Chapter5/CMakeLists.txt +++ b/examples/Kaleidoscope/Chapter5/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine InstCombine - MCJIT + Object RuntimeDyld ScalarOpts Support @@ -13,3 +13,5 @@ set(LLVM_LINK_COMPONENTS add_kaleidoscope_chapter(Kaleidoscope-Ch5 toy.cpp ) + +export_executable_symbols(Kaleidoscope-Ch5) diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp index db9904895739..eeca4775eeb1 100644 --- a/examples/Kaleidoscope/Chapter5/toy.cpp +++ b/examples/Kaleidoscope/Chapter5/toy.cpp @@ -1,10 +1,5 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Passes.h" -#include "llvm/ExecutionEngine/ExecutionEngine.h" -#include "llvm/ExecutionEngine/MCJIT.h" -#include "llvm/ExecutionEngine/SectionMemoryManager.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" @@ -17,7 +12,10 @@ #include #include #include +#include "../include/KaleidoscopeJIT.h" + using namespace llvm; +using namespace llvm::orc; //===----------------------------------------------------------------------===// // Lexer @@ -84,7 +82,7 @@ static int gettok() { LastChar = getchar(); } while (isdigit(LastChar) || LastChar == '.'); - NumVal = strtod(NumStr.c_str(), 0); + NumVal = strtod(NumStr.c_str(), nullptr); return tok_number; } @@ -116,7 +114,7 @@ namespace { class ExprAST { public: virtual ~ExprAST() {} - virtual Value *Codegen() = 0; + virtual Value *codegen() = 0; }; /// NumberExprAST - Expression class for numeric literals like "1.0". @@ -124,8 +122,8 @@ class NumberExprAST : public ExprAST { double Val; public: - NumberExprAST(double val) : Val(val) {} - Value *Codegen() override; + NumberExprAST(double Val) : Val(Val) {} + Value *codegen() override; }; /// VariableExprAST - Expression class for referencing a variable, like "a". @@ -133,52 +131,57 @@ class VariableExprAST : public ExprAST { std::string Name; public: - VariableExprAST(const std::string &name) : Name(name) {} - Value *Codegen() override; + VariableExprAST(const std::string &Name) : Name(Name) {} + Value *codegen() override; }; /// BinaryExprAST - Expression class for a binary operator. class BinaryExprAST : public ExprAST { char Op; - ExprAST *LHS, *RHS; + std::unique_ptr LHS, RHS; public: - BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) - : Op(op), LHS(lhs), RHS(rhs) {} - Value *Codegen() override; + BinaryExprAST(char Op, std::unique_ptr LHS, + std::unique_ptr RHS) + : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} + Value *codegen() override; }; /// CallExprAST - Expression class for function calls. class CallExprAST : public ExprAST { std::string Callee; - std::vector Args; + std::vector> Args; public: - CallExprAST(const std::string &callee, std::vector &args) - : Callee(callee), Args(args) {} - Value *Codegen() override; + CallExprAST(const std::string &Callee, + std::vector> Args) + : Callee(Callee), Args(std::move(Args)) {} + Value *codegen() override; }; /// IfExprAST - Expression class for if/then/else. class IfExprAST : public ExprAST { - ExprAST *Cond, *Then, *Else; + std::unique_ptr Cond, Then, Else; public: - IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) - : Cond(cond), Then(then), Else(_else) {} - Value *Codegen() override; + IfExprAST(std::unique_ptr Cond, std::unique_ptr Then, + std::unique_ptr Else) + : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} + Value *codegen() override; }; /// ForExprAST - Expression class for for/in. class ForExprAST : public ExprAST { std::string VarName; - ExprAST *Start, *End, *Step, *Body; + std::unique_ptr Start, End, Step, Body; public: - ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, - ExprAST *step, ExprAST *body) - : VarName(varname), Start(start), End(end), Step(step), Body(body) {} - Value *Codegen() override; + ForExprAST(const std::string &VarName, std::unique_ptr Start, + std::unique_ptr End, std::unique_ptr Step, + std::unique_ptr Body) + : VarName(VarName), Start(std::move(Start)), End(std::move(End)), + Step(std::move(Step)), Body(std::move(Body)) {} + Value *codegen() override; }; /// PrototypeAST - This class represents the "prototype" for a function, @@ -189,21 +192,22 @@ class PrototypeAST { std::vector Args; public: - PrototypeAST(const std::string &name, const std::vector &args) - : Name(name), Args(args) {} - - Function *Codegen(); + PrototypeAST(const std::string &Name, std::vector Args) + : Name(Name), Args(std::move(Args)) {} + Function *codegen(); + const std::string &getName() const { return Name; } }; /// FunctionAST - This class represents a function definition itself. class FunctionAST { - PrototypeAST *Proto; - ExprAST *Body; + std::unique_ptr Proto; + std::unique_ptr Body; public: - FunctionAST(PrototypeAST *proto, ExprAST *body) : Proto(proto), Body(body) {} - - Function *Codegen(); + FunctionAST(std::unique_ptr Proto, + std::unique_ptr Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} + Function *codegen(); }; } // end anonymous namespace @@ -234,41 +238,58 @@ static int GetTokPrecedence() { } /// Error* - These are little helper functions for error handling. -ExprAST *Error(const char *Str) { +std::unique_ptr Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str); - return 0; -} -PrototypeAST *ErrorP(const char *Str) { - Error(Str); - return 0; -} -FunctionAST *ErrorF(const char *Str) { - Error(Str); - return 0; + return nullptr; } -static ExprAST *ParseExpression(); +std::unique_ptr ErrorP(const char *Str) { + Error(Str); + return nullptr; +} + +static std::unique_ptr ParseExpression(); + +/// numberexpr ::= number +static std::unique_ptr ParseNumberExpr() { + auto Result = llvm::make_unique(NumVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// parenexpr ::= '(' expression ')' +static std::unique_ptr ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} /// identifierexpr /// ::= identifier /// ::= identifier '(' expression* ')' -static ExprAST *ParseIdentifierExpr() { +static std::unique_ptr ParseIdentifierExpr() { std::string IdName = IdentifierStr; getNextToken(); // eat identifier. if (CurTok != '(') // Simple variable ref. - return new VariableExprAST(IdName); + return llvm::make_unique(IdName); // Call. getNextToken(); // eat ( - std::vector Args; + std::vector> Args; if (CurTok != ')') { while (1) { - ExprAST *Arg = ParseExpression(); - if (!Arg) - return 0; - Args.push_back(Arg); + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; if (CurTok == ')') break; @@ -282,60 +303,41 @@ static ExprAST *ParseIdentifierExpr() { // Eat the ')'. getNextToken(); - return new CallExprAST(IdName, Args); -} - -/// numberexpr ::= number -static ExprAST *ParseNumberExpr() { - ExprAST *Result = new NumberExprAST(NumVal); - getNextToken(); // consume the number - return Result; -} - -/// parenexpr ::= '(' expression ')' -static ExprAST *ParseParenExpr() { - getNextToken(); // eat (. - ExprAST *V = ParseExpression(); - if (!V) - return 0; - - if (CurTok != ')') - return Error("expected ')'"); - getNextToken(); // eat ). - return V; + return llvm::make_unique(IdName, std::move(Args)); } /// ifexpr ::= 'if' expression 'then' expression 'else' expression -static ExprAST *ParseIfExpr() { +static std::unique_ptr ParseIfExpr() { getNextToken(); // eat the if. // condition. - ExprAST *Cond = ParseExpression(); + auto Cond = ParseExpression(); if (!Cond) - return 0; + return nullptr; if (CurTok != tok_then) return Error("expected then"); getNextToken(); // eat the then - ExprAST *Then = ParseExpression(); - if (Then == 0) - return 0; + auto Then = ParseExpression(); + if (!Then) + return nullptr; if (CurTok != tok_else) return Error("expected else"); getNextToken(); - ExprAST *Else = ParseExpression(); + auto Else = ParseExpression(); if (!Else) - return 0; + return nullptr; - return new IfExprAST(Cond, Then, Else); + return llvm::make_unique(std::move(Cond), std::move(Then), + std::move(Else)); } /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression -static ExprAST *ParseForExpr() { +static std::unique_ptr ParseForExpr() { getNextToken(); // eat the for. if (CurTok != tok_identifier) @@ -348,35 +350,36 @@ static ExprAST *ParseForExpr() { return Error("expected '=' after for"); getNextToken(); // eat '='. - ExprAST *Start = ParseExpression(); - if (Start == 0) - return 0; + auto Start = ParseExpression(); + if (!Start) + return nullptr; if (CurTok != ',') return Error("expected ',' after for start value"); getNextToken(); - ExprAST *End = ParseExpression(); - if (End == 0) - return 0; + auto End = ParseExpression(); + if (!End) + return nullptr; // The step value is optional. - ExprAST *Step = 0; + std::unique_ptr Step; if (CurTok == ',') { getNextToken(); Step = ParseExpression(); - if (Step == 0) - return 0; + if (!Step) + return nullptr; } if (CurTok != tok_in) return Error("expected 'in' after for"); getNextToken(); // eat 'in'. - ExprAST *Body = ParseExpression(); - if (Body == 0) - return 0; + auto Body = ParseExpression(); + if (!Body) + return nullptr; - return new ForExprAST(IdName, Start, End, Step, Body); + return llvm::make_unique(IdName, std::move(Start), std::move(End), + std::move(Step), std::move(Body)); } /// primary @@ -385,7 +388,7 @@ static ExprAST *ParseForExpr() { /// ::= parenexpr /// ::= ifexpr /// ::= forexpr -static ExprAST *ParsePrimary() { +static std::unique_ptr ParsePrimary() { switch (CurTok) { default: return Error("unknown token when expecting an expression"); @@ -404,7 +407,8 @@ static ExprAST *ParsePrimary() { /// binoprhs /// ::= ('+' primary)* -static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { +static std::unique_ptr ParseBinOpRHS(int ExprPrec, + std::unique_ptr LHS) { // If this is a binop, find its precedence. while (1) { int TokPrec = GetTokPrecedence(); @@ -419,38 +423,39 @@ static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { getNextToken(); // eat binop // Parse the primary expression after the binary operator. - ExprAST *RHS = ParsePrimary(); + auto RHS = ParsePrimary(); if (!RHS) - return 0; + return nullptr; // If BinOp binds less tightly with RHS than the operator after RHS, let // the pending operator take RHS as its LHS. int NextPrec = GetTokPrecedence(); if (TokPrec < NextPrec) { - RHS = ParseBinOpRHS(TokPrec + 1, RHS); - if (RHS == 0) - return 0; + RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); + if (!RHS) + return nullptr; } // Merge LHS/RHS. - LHS = new BinaryExprAST(BinOp, LHS, RHS); + LHS = + llvm::make_unique(BinOp, std::move(LHS), std::move(RHS)); } } /// expression /// ::= primary binoprhs /// -static ExprAST *ParseExpression() { - ExprAST *LHS = ParsePrimary(); +static std::unique_ptr ParseExpression() { + auto LHS = ParsePrimary(); if (!LHS) - return 0; + return nullptr; - return ParseBinOpRHS(0, LHS); + return ParseBinOpRHS(0, std::move(LHS)); } /// prototype /// ::= id '(' id* ')' -static PrototypeAST *ParsePrototype() { +static std::unique_ptr ParsePrototype() { if (CurTok != tok_identifier) return ErrorP("Expected function name in prototype"); @@ -469,33 +474,34 @@ static PrototypeAST *ParsePrototype() { // success. getNextToken(); // eat ')'. - return new PrototypeAST(FnName, ArgNames); + return llvm::make_unique(FnName, std::move(ArgNames)); } /// definition ::= 'def' prototype expression -static FunctionAST *ParseDefinition() { +static std::unique_ptr ParseDefinition() { getNextToken(); // eat def. - PrototypeAST *Proto = ParsePrototype(); - if (Proto == 0) - return 0; + auto Proto = ParsePrototype(); + if (!Proto) + return nullptr; - if (ExprAST *E = ParseExpression()) - return new FunctionAST(Proto, E); - return 0; + if (auto E = ParseExpression()) + return llvm::make_unique(std::move(Proto), std::move(E)); + return nullptr; } /// toplevelexpr ::= expression -static FunctionAST *ParseTopLevelExpr() { - if (ExprAST *E = ParseExpression()) { +static std::unique_ptr ParseTopLevelExpr() { + if (auto E = ParseExpression()) { // Make an anonymous proto. - PrototypeAST *Proto = new PrototypeAST("", std::vector()); - return new FunctionAST(Proto, E); + auto Proto = llvm::make_unique("__anon_expr", + std::vector()); + return llvm::make_unique(std::move(Proto), std::move(E)); } - return 0; + return nullptr; } /// external ::= 'extern' prototype -static PrototypeAST *ParseExtern() { +static std::unique_ptr ParseExtern() { getNextToken(); // eat extern. return ParsePrototype(); } @@ -504,31 +510,50 @@ static PrototypeAST *ParseExtern() { // Code Generation //===----------------------------------------------------------------------===// -static Module *TheModule; +static std::unique_ptr TheModule; static IRBuilder<> Builder(getGlobalContext()); static std::map NamedValues; -static legacy::FunctionPassManager *TheFPM; +static std::unique_ptr TheFPM; +static std::unique_ptr TheJIT; +static std::map> FunctionProtos; Value *ErrorV(const char *Str) { Error(Str); - return 0; + return nullptr; } -Value *NumberExprAST::Codegen() { +Function *getFunction(std::string Name) { + // First, see if the function has already been added to the current module. + if (auto *F = TheModule->getFunction(Name)) + return F; + + // If not, check whether we can codegen the declaration from some existing + // prototype. + auto FI = FunctionProtos.find(Name); + if (FI != FunctionProtos.end()) + return FI->second->codegen(); + + // If no existing prototype exists, return null. + return nullptr; +} + +Value *NumberExprAST::codegen() { return ConstantFP::get(getGlobalContext(), APFloat(Val)); } -Value *VariableExprAST::Codegen() { +Value *VariableExprAST::codegen() { // Look this variable up in the function. Value *V = NamedValues[Name]; - return V ? V : ErrorV("Unknown variable name"); + if (!V) + return ErrorV("Unknown variable name"); + return V; } -Value *BinaryExprAST::Codegen() { - Value *L = LHS->Codegen(); - Value *R = RHS->Codegen(); - if (L == 0 || R == 0) - return 0; +Value *BinaryExprAST::codegen() { + Value *L = LHS->codegen(); + Value *R = RHS->codegen(); + if (!L || !R) + return nullptr; switch (Op) { case '+': @@ -547,10 +572,10 @@ Value *BinaryExprAST::Codegen() { } } -Value *CallExprAST::Codegen() { +Value *CallExprAST::codegen() { // Look up the name in the global module table. - Function *CalleeF = TheModule->getFunction(Callee); - if (CalleeF == 0) + Function *CalleeF = getFunction(Callee); + if (!CalleeF) return ErrorV("Unknown function referenced"); // If argument mismatch error. @@ -559,18 +584,18 @@ Value *CallExprAST::Codegen() { std::vector ArgsV; for (unsigned i = 0, e = Args.size(); i != e; ++i) { - ArgsV.push_back(Args[i]->Codegen()); - if (ArgsV.back() == 0) - return 0; + ArgsV.push_back(Args[i]->codegen()); + if (!ArgsV.back()) + return nullptr; } return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); } -Value *IfExprAST::Codegen() { - Value *CondV = Cond->Codegen(); - if (CondV == 0) - return 0; +Value *IfExprAST::codegen() { + Value *CondV = Cond->codegen(); + if (!CondV) + return nullptr; // Convert condition to a bool by comparing equal to 0.0. CondV = Builder.CreateFCmpONE( @@ -590,9 +615,9 @@ Value *IfExprAST::Codegen() { // Emit then value. Builder.SetInsertPoint(ThenBB); - Value *ThenV = Then->Codegen(); - if (ThenV == 0) - return 0; + Value *ThenV = Then->codegen(); + if (!ThenV) + return nullptr; Builder.CreateBr(MergeBB); // Codegen of 'Then' can change the current block, update ThenBB for the PHI. @@ -602,9 +627,9 @@ Value *IfExprAST::Codegen() { TheFunction->getBasicBlockList().push_back(ElseBB); Builder.SetInsertPoint(ElseBB); - Value *ElseV = Else->Codegen(); - if (ElseV == 0) - return 0; + Value *ElseV = Else->codegen(); + if (!ElseV) + return nullptr; Builder.CreateBr(MergeBB); // Codegen of 'Else' can change the current block, update ElseBB for the PHI. @@ -621,27 +646,26 @@ Value *IfExprAST::Codegen() { return PN; } -Value *ForExprAST::Codegen() { - // Output this as: - // ... - // start = startexpr - // goto loop - // loop: - // variable = phi [start, loopheader], [nextvariable, loopend] - // ... - // bodyexpr - // ... - // loopend: - // step = stepexpr - // nextvariable = variable + step - // endcond = endexpr - // br endcond, loop, endloop - // outloop: - +// Output for-loop as: +// ... +// start = startexpr +// goto loop +// loop: +// variable = phi [start, loopheader], [nextvariable, loopend] +// ... +// bodyexpr +// ... +// loopend: +// step = stepexpr +// nextvariable = variable + step +// endcond = endexpr +// br endcond, loop, endloop +// outloop: +Value *ForExprAST::codegen() { // Emit the start code first, without 'variable' in scope. - Value *StartVal = Start->Codegen(); - if (StartVal == 0) - return 0; + Value *StartVal = Start->codegen(); + if (!StartVal) + return nullptr; // Make the new basic block for the loop header, inserting after current // block. @@ -669,15 +693,15 @@ Value *ForExprAST::Codegen() { // Emit the body of the loop. This, like any other expr, can change the // current BB. Note that we ignore the value computed by the body, but don't // allow an error. - if (Body->Codegen() == 0) - return 0; + if (!Body->codegen()) + return nullptr; // Emit the step value. - Value *StepVal; + Value *StepVal = nullptr; if (Step) { - StepVal = Step->Codegen(); - if (StepVal == 0) - return 0; + StepVal = Step->codegen(); + if (!StepVal) + return nullptr; } else { // If not specified, use 1.0. StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); @@ -686,9 +710,9 @@ Value *ForExprAST::Codegen() { Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar"); // Compute the end condition. - Value *EndCond = End->Codegen(); - if (EndCond == 0) - return EndCond; + Value *EndCond = End->codegen(); + if (!EndCond) + return nullptr; // Convert condition to a bool by comparing equal to 0.0. EndCond = Builder.CreateFCmpONE( @@ -718,7 +742,7 @@ Value *ForExprAST::Codegen() { return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); } -Function *PrototypeAST::Codegen() { +Function *PrototypeAST::codegen() { // Make the function type: double(double,double) etc. std::vector Doubles(Args.size(), Type::getDoubleTy(getGlobalContext())); @@ -726,60 +750,42 @@ Function *PrototypeAST::Codegen() { FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false); Function *F = - Function::Create(FT, Function::ExternalLinkage, Name, TheModule); - - // If F conflicted, there was already something named 'Name'. If it has a - // body, don't allow redefinition or reextern. - if (F->getName() != Name) { - // Delete the one we just made and get the existing one. - F->eraseFromParent(); - F = TheModule->getFunction(Name); - - // If F already has a body, reject this. - if (!F->empty()) { - ErrorF("redefinition of function"); - return 0; - } - - // If F took a different number of args, reject. - if (F->arg_size() != Args.size()) { - ErrorF("redefinition of function with different # args"); - return 0; - } - } + Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); // Set names for all arguments. unsigned Idx = 0; - for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); - ++AI, ++Idx) { - AI->setName(Args[Idx]); - - // Add arguments to variable symbol table. - NamedValues[Args[Idx]] = AI; - } + for (auto &Arg : F->args()) + Arg.setName(Args[Idx++]); return F; } -Function *FunctionAST::Codegen() { - NamedValues.clear(); - - Function *TheFunction = Proto->Codegen(); - if (TheFunction == 0) - return 0; +Function *FunctionAST::codegen() { + // Transfer ownership of the prototype to the FunctionProtos map, but keep a + // reference to it for use below. + auto &P = *Proto; + FunctionProtos[Proto->getName()] = std::move(Proto); + Function *TheFunction = getFunction(P.getName()); + if (!TheFunction) + return nullptr; // Create a new basic block to start insertion into. BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); Builder.SetInsertPoint(BB); - if (Value *RetVal = Body->Codegen()) { + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + for (auto &Arg : TheFunction->args()) + NamedValues[Arg.getName()] = &Arg; + + if (Value *RetVal = Body->codegen()) { // Finish off the function. Builder.CreateRet(RetVal); // Validate the generated code, checking for consistency. verifyFunction(*TheFunction); - // Optimize the function. + // Run the optimizer on the function. TheFPM->run(*TheFunction); return TheFunction; @@ -787,20 +793,40 @@ Function *FunctionAST::Codegen() { // Error reading body, remove function. TheFunction->eraseFromParent(); - return 0; + return nullptr; } //===----------------------------------------------------------------------===// // Top-Level parsing and JIT Driver //===----------------------------------------------------------------------===// -static ExecutionEngine *TheExecutionEngine; +static void InitializeModuleAndPassManager() { + // Open a new module. + TheModule = llvm::make_unique("my cool jit", getGlobalContext()); + TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); + + // Create a new pass manager attached to it. + TheFPM = llvm::make_unique(TheModule.get()); + + // Do simple "peephole" optimizations and bit-twiddling optzns. + TheFPM->add(createInstructionCombiningPass()); + // Reassociate expressions. + TheFPM->add(createReassociatePass()); + // Eliminate Common SubExpressions. + TheFPM->add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + TheFPM->add(createCFGSimplificationPass()); + + TheFPM->doInitialization(); +} static void HandleDefinition() { - if (FunctionAST *F = ParseDefinition()) { - if (Function *LF = F->Codegen()) { + if (auto FnAST = ParseDefinition()) { + if (auto *FnIR = FnAST->codegen()) { fprintf(stderr, "Read function definition:"); - LF->dump(); + FnIR->dump(); + TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); } } else { // Skip token for error recovery. @@ -809,10 +835,11 @@ static void HandleDefinition() { } static void HandleExtern() { - if (PrototypeAST *P = ParseExtern()) { - if (Function *F = P->Codegen()) { + if (auto ProtoAST = ParseExtern()) { + if (auto *FnIR = ProtoAST->codegen()) { fprintf(stderr, "Read extern: "); - F->dump(); + FnIR->dump(); + FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); } } else { // Skip token for error recovery. @@ -822,16 +849,25 @@ static void HandleExtern() { static void HandleTopLevelExpression() { // Evaluate a top-level expression into an anonymous function. - if (FunctionAST *F = ParseTopLevelExpr()) { - if (Function *LF = F->Codegen()) { - TheExecutionEngine->finalizeObject(); - // JIT the function, returning a function pointer. - void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + if (auto FnAST = ParseTopLevelExpr()) { + if (FnAST->codegen()) { - // Cast it to the right type (takes no arguments, returns a double) so we - // can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)FPtr; + // JIT the module containing the anonymous expression, keeping a handle so + // we can free it later. + auto H = TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); + + // Search the JIT for the __anon_expr symbol. + auto ExprSymbol = TheJIT->findSymbol("__anon_expr"); + assert(ExprSymbol && "Function not found"); + + // Get the symbol's address and cast it to the right type (takes no + // arguments, returns a double) so we can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); fprintf(stderr, "Evaluated to %f\n", FP()); + + // Delete the anonymous expression module from the JIT. + TheJIT->removeModule(H); } } else { // Skip token for error recovery. @@ -846,9 +882,9 @@ static void MainLoop() { switch (CurTok) { case tok_eof: return; - case ';': + case ';': // ignore top-level semicolons. getNextToken(); - break; // ignore top-level semicolons. + break; case tok_def: HandleDefinition(); break; @@ -868,7 +904,13 @@ static void MainLoop() { /// putchard - putchar that takes a double and returns 0. extern "C" double putchard(double X) { - putchar((char)X); + fputc((char)X, stderr); + return 0; +} + +/// printd - printf that takes a double prints it as "%f\n", returning 0. +extern "C" double printd(double X) { + fprintf(stderr, "%f\n", X); return 0; } @@ -880,7 +922,6 @@ int main() { InitializeNativeTarget(); InitializeNativeTargetAsmPrinter(); InitializeNativeTargetAsmParser(); - LLVMContext &Context = getGlobalContext(); // Install standard binary operators. // 1 is lowest precedence. @@ -893,50 +934,12 @@ int main() { fprintf(stderr, "ready> "); getNextToken(); - // Make the module, which holds all the code. - std::unique_ptr Owner = make_unique("my cool jit", Context); - TheModule = Owner.get(); + TheJIT = llvm::make_unique(); - // Create the JIT. This takes ownership of the module. - std::string ErrStr; - TheExecutionEngine = - EngineBuilder(std::move(Owner)) - .setErrorStr(&ErrStr) - .setMCJITMemoryManager(llvm::make_unique()) - .create(); - if (!TheExecutionEngine) { - fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); - exit(1); - } - - legacy::FunctionPassManager OurFPM(TheModule); - - // Set up the optimizer pipeline. Start with registering info about how the - // target lays out data structures. - TheModule->setDataLayout(*TheExecutionEngine->getDataLayout()); - // Provide basic AliasAnalysis support for GVN. - OurFPM.add(createBasicAliasAnalysisPass()); - // Do simple "peephole" optimizations and bit-twiddling optzns. - OurFPM.add(createInstructionCombiningPass()); - // Reassociate expressions. - OurFPM.add(createReassociatePass()); - // Eliminate Common SubExpressions. - OurFPM.add(createGVNPass()); - // Simplify the control flow graph (deleting unreachable blocks, etc). - OurFPM.add(createCFGSimplificationPass()); - - OurFPM.doInitialization(); - - // Set the global so the code gen can use this. - TheFPM = &OurFPM; + InitializeModuleAndPassManager(); // Run the main "interpreter loop" now. MainLoop(); - TheFPM = 0; - - // Print out all of the generated code. - TheModule->dump(); - return 0; } diff --git a/examples/Kaleidoscope/Chapter6/CMakeLists.txt b/examples/Kaleidoscope/Chapter6/CMakeLists.txt index 7ac1ca49c4f9..49627f07ddf0 100644 --- a/examples/Kaleidoscope/Chapter6/CMakeLists.txt +++ b/examples/Kaleidoscope/Chapter6/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine InstCombine - MCJIT + Object RuntimeDyld ScalarOpts Support @@ -13,3 +13,5 @@ set(LLVM_LINK_COMPONENTS add_kaleidoscope_chapter(Kaleidoscope-Ch6 toy.cpp ) + +export_executable_symbols(Kaleidoscope-Ch6) diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp index e978a3ea3682..4d04f7e888af 100644 --- a/examples/Kaleidoscope/Chapter6/toy.cpp +++ b/examples/Kaleidoscope/Chapter6/toy.cpp @@ -1,10 +1,5 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Passes.h" -#include "llvm/ExecutionEngine/ExecutionEngine.h" -#include "llvm/ExecutionEngine/MCJIT.h" -#include "llvm/ExecutionEngine/SectionMemoryManager.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" @@ -17,7 +12,10 @@ #include #include #include +#include "../include/KaleidoscopeJIT.h" + using namespace llvm; +using namespace llvm::orc; //===----------------------------------------------------------------------===// // Lexer @@ -92,7 +90,7 @@ static int gettok() { LastChar = getchar(); } while (isdigit(LastChar) || LastChar == '.'); - NumVal = strtod(NumStr.c_str(), 0); + NumVal = strtod(NumStr.c_str(), nullptr); return tok_number; } @@ -124,7 +122,7 @@ namespace { class ExprAST { public: virtual ~ExprAST() {} - virtual Value *Codegen() = 0; + virtual Value *codegen() = 0; }; /// NumberExprAST - Expression class for numeric literals like "1.0". @@ -132,8 +130,8 @@ class NumberExprAST : public ExprAST { double Val; public: - NumberExprAST(double val) : Val(val) {} - Value *Codegen() override; + NumberExprAST(double Val) : Val(Val) {} + Value *codegen() override; }; /// VariableExprAST - Expression class for referencing a variable, like "a". @@ -141,63 +139,68 @@ class VariableExprAST : public ExprAST { std::string Name; public: - VariableExprAST(const std::string &name) : Name(name) {} - Value *Codegen() override; + VariableExprAST(const std::string &Name) : Name(Name) {} + Value *codegen() override; }; /// UnaryExprAST - Expression class for a unary operator. class UnaryExprAST : public ExprAST { char Opcode; - ExprAST *Operand; + std::unique_ptr Operand; public: - UnaryExprAST(char opcode, ExprAST *operand) - : Opcode(opcode), Operand(operand) {} - Value *Codegen() override; + UnaryExprAST(char Opcode, std::unique_ptr Operand) + : Opcode(Opcode), Operand(std::move(Operand)) {} + Value *codegen() override; }; /// BinaryExprAST - Expression class for a binary operator. class BinaryExprAST : public ExprAST { char Op; - ExprAST *LHS, *RHS; + std::unique_ptr LHS, RHS; public: - BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) - : Op(op), LHS(lhs), RHS(rhs) {} - Value *Codegen() override; + BinaryExprAST(char Op, std::unique_ptr LHS, + std::unique_ptr RHS) + : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} + Value *codegen() override; }; /// CallExprAST - Expression class for function calls. class CallExprAST : public ExprAST { std::string Callee; - std::vector Args; + std::vector> Args; public: - CallExprAST(const std::string &callee, std::vector &args) - : Callee(callee), Args(args) {} - Value *Codegen() override; + CallExprAST(const std::string &Callee, + std::vector> Args) + : Callee(Callee), Args(std::move(Args)) {} + Value *codegen() override; }; /// IfExprAST - Expression class for if/then/else. class IfExprAST : public ExprAST { - ExprAST *Cond, *Then, *Else; + std::unique_ptr Cond, Then, Else; public: - IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) - : Cond(cond), Then(then), Else(_else) {} - Value *Codegen() override; + IfExprAST(std::unique_ptr Cond, std::unique_ptr Then, + std::unique_ptr Else) + : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} + Value *codegen() override; }; /// ForExprAST - Expression class for for/in. class ForExprAST : public ExprAST { std::string VarName; - ExprAST *Start, *End, *Step, *Body; + std::unique_ptr Start, End, Step, Body; public: - ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, - ExprAST *step, ExprAST *body) - : VarName(varname), Start(start), End(end), Step(step), Body(body) {} - Value *Codegen() override; + ForExprAST(const std::string &VarName, std::unique_ptr Start, + std::unique_ptr End, std::unique_ptr Step, + std::unique_ptr Body) + : VarName(VarName), Start(std::move(Start)), End(std::move(End)), + Step(std::move(Step)), Body(std::move(Body)) {} + Value *codegen() override; }; /// PrototypeAST - This class represents the "prototype" for a function, @@ -206,15 +209,19 @@ public: class PrototypeAST { std::string Name; std::vector Args; - bool isOperator; + bool IsOperator; unsigned Precedence; // Precedence if a binary op. -public: - PrototypeAST(const std::string &name, const std::vector &args, - bool isoperator = false, unsigned prec = 0) - : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {} - bool isUnaryOp() const { return isOperator && Args.size() == 1; } - bool isBinaryOp() const { return isOperator && Args.size() == 2; } +public: + PrototypeAST(const std::string &Name, std::vector Args, + bool IsOperator = false, unsigned Prec = 0) + : Name(Name), Args(std::move(Args)), IsOperator(IsOperator), + Precedence(Prec) {} + Function *codegen(); + const std::string &getName() const { return Name; } + + bool isUnaryOp() const { return IsOperator && Args.size() == 1; } + bool isBinaryOp() const { return IsOperator && Args.size() == 2; } char getOperatorName() const { assert(isUnaryOp() || isBinaryOp()); @@ -222,19 +229,18 @@ public: } unsigned getBinaryPrecedence() const { return Precedence; } - - Function *Codegen(); }; /// FunctionAST - This class represents a function definition itself. class FunctionAST { - PrototypeAST *Proto; - ExprAST *Body; + std::unique_ptr Proto; + std::unique_ptr Body; public: - FunctionAST(PrototypeAST *proto, ExprAST *body) : Proto(proto), Body(body) {} - - Function *Codegen(); + FunctionAST(std::unique_ptr Proto, + std::unique_ptr Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} + Function *codegen(); }; } // end anonymous namespace @@ -265,41 +271,58 @@ static int GetTokPrecedence() { } /// Error* - These are little helper functions for error handling. -ExprAST *Error(const char *Str) { +std::unique_ptr Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str); - return 0; -} -PrototypeAST *ErrorP(const char *Str) { - Error(Str); - return 0; -} -FunctionAST *ErrorF(const char *Str) { - Error(Str); - return 0; + return nullptr; } -static ExprAST *ParseExpression(); +std::unique_ptr ErrorP(const char *Str) { + Error(Str); + return nullptr; +} + +static std::unique_ptr ParseExpression(); + +/// numberexpr ::= number +static std::unique_ptr ParseNumberExpr() { + auto Result = llvm::make_unique(NumVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// parenexpr ::= '(' expression ')' +static std::unique_ptr ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} /// identifierexpr /// ::= identifier /// ::= identifier '(' expression* ')' -static ExprAST *ParseIdentifierExpr() { +static std::unique_ptr ParseIdentifierExpr() { std::string IdName = IdentifierStr; getNextToken(); // eat identifier. if (CurTok != '(') // Simple variable ref. - return new VariableExprAST(IdName); + return llvm::make_unique(IdName); // Call. getNextToken(); // eat ( - std::vector Args; + std::vector> Args; if (CurTok != ')') { while (1) { - ExprAST *Arg = ParseExpression(); - if (!Arg) - return 0; - Args.push_back(Arg); + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; if (CurTok == ')') break; @@ -313,60 +336,41 @@ static ExprAST *ParseIdentifierExpr() { // Eat the ')'. getNextToken(); - return new CallExprAST(IdName, Args); -} - -/// numberexpr ::= number -static ExprAST *ParseNumberExpr() { - ExprAST *Result = new NumberExprAST(NumVal); - getNextToken(); // consume the number - return Result; -} - -/// parenexpr ::= '(' expression ')' -static ExprAST *ParseParenExpr() { - getNextToken(); // eat (. - ExprAST *V = ParseExpression(); - if (!V) - return 0; - - if (CurTok != ')') - return Error("expected ')'"); - getNextToken(); // eat ). - return V; + return llvm::make_unique(IdName, std::move(Args)); } /// ifexpr ::= 'if' expression 'then' expression 'else' expression -static ExprAST *ParseIfExpr() { +static std::unique_ptr ParseIfExpr() { getNextToken(); // eat the if. // condition. - ExprAST *Cond = ParseExpression(); + auto Cond = ParseExpression(); if (!Cond) - return 0; + return nullptr; if (CurTok != tok_then) return Error("expected then"); getNextToken(); // eat the then - ExprAST *Then = ParseExpression(); - if (Then == 0) - return 0; + auto Then = ParseExpression(); + if (!Then) + return nullptr; if (CurTok != tok_else) return Error("expected else"); getNextToken(); - ExprAST *Else = ParseExpression(); + auto Else = ParseExpression(); if (!Else) - return 0; + return nullptr; - return new IfExprAST(Cond, Then, Else); + return llvm::make_unique(std::move(Cond), std::move(Then), + std::move(Else)); } /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression -static ExprAST *ParseForExpr() { +static std::unique_ptr ParseForExpr() { getNextToken(); // eat the for. if (CurTok != tok_identifier) @@ -379,35 +383,36 @@ static ExprAST *ParseForExpr() { return Error("expected '=' after for"); getNextToken(); // eat '='. - ExprAST *Start = ParseExpression(); - if (Start == 0) - return 0; + auto Start = ParseExpression(); + if (!Start) + return nullptr; if (CurTok != ',') return Error("expected ',' after for start value"); getNextToken(); - ExprAST *End = ParseExpression(); - if (End == 0) - return 0; + auto End = ParseExpression(); + if (!End) + return nullptr; // The step value is optional. - ExprAST *Step = 0; + std::unique_ptr Step; if (CurTok == ',') { getNextToken(); Step = ParseExpression(); - if (Step == 0) - return 0; + if (!Step) + return nullptr; } if (CurTok != tok_in) return Error("expected 'in' after for"); getNextToken(); // eat 'in'. - ExprAST *Body = ParseExpression(); - if (Body == 0) - return 0; + auto Body = ParseExpression(); + if (!Body) + return nullptr; - return new ForExprAST(IdName, Start, End, Step, Body); + return llvm::make_unique(IdName, std::move(Start), std::move(End), + std::move(Step), std::move(Body)); } /// primary @@ -416,7 +421,7 @@ static ExprAST *ParseForExpr() { /// ::= parenexpr /// ::= ifexpr /// ::= forexpr -static ExprAST *ParsePrimary() { +static std::unique_ptr ParsePrimary() { switch (CurTok) { default: return Error("unknown token when expecting an expression"); @@ -436,7 +441,7 @@ static ExprAST *ParsePrimary() { /// unary /// ::= primary /// ::= '!' unary -static ExprAST *ParseUnary() { +static std::unique_ptr ParseUnary() { // If the current token is not an operator, it must be a primary expr. if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') return ParsePrimary(); @@ -444,14 +449,15 @@ static ExprAST *ParseUnary() { // If this is a unary operator, read it. int Opc = CurTok; getNextToken(); - if (ExprAST *Operand = ParseUnary()) - return new UnaryExprAST(Opc, Operand); - return 0; + if (auto Operand = ParseUnary()) + return llvm::make_unique(Opc, std::move(Operand)); + return nullptr; } /// binoprhs /// ::= ('+' unary)* -static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { +static std::unique_ptr ParseBinOpRHS(int ExprPrec, + std::unique_ptr LHS) { // If this is a binop, find its precedence. while (1) { int TokPrec = GetTokPrecedence(); @@ -466,40 +472,41 @@ static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { getNextToken(); // eat binop // Parse the unary expression after the binary operator. - ExprAST *RHS = ParseUnary(); + auto RHS = ParseUnary(); if (!RHS) - return 0; + return nullptr; // If BinOp binds less tightly with RHS than the operator after RHS, let // the pending operator take RHS as its LHS. int NextPrec = GetTokPrecedence(); if (TokPrec < NextPrec) { - RHS = ParseBinOpRHS(TokPrec + 1, RHS); - if (RHS == 0) - return 0; + RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); + if (!RHS) + return nullptr; } // Merge LHS/RHS. - LHS = new BinaryExprAST(BinOp, LHS, RHS); + LHS = + llvm::make_unique(BinOp, std::move(LHS), std::move(RHS)); } } /// expression /// ::= unary binoprhs /// -static ExprAST *ParseExpression() { - ExprAST *LHS = ParseUnary(); +static std::unique_ptr ParseExpression() { + auto LHS = ParseUnary(); if (!LHS) - return 0; + return nullptr; - return ParseBinOpRHS(0, LHS); + return ParseBinOpRHS(0, std::move(LHS)); } /// prototype /// ::= id '(' id* ')' /// ::= binary LETTER number? (id, id) /// ::= unary LETTER (id) -static PrototypeAST *ParsePrototype() { +static std::unique_ptr ParsePrototype() { std::string FnName; unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. @@ -557,33 +564,35 @@ static PrototypeAST *ParsePrototype() { if (Kind && ArgNames.size() != Kind) return ErrorP("Invalid number of operands for operator"); - return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence); + return llvm::make_unique(FnName, ArgNames, Kind != 0, + BinaryPrecedence); } /// definition ::= 'def' prototype expression -static FunctionAST *ParseDefinition() { +static std::unique_ptr ParseDefinition() { getNextToken(); // eat def. - PrototypeAST *Proto = ParsePrototype(); - if (Proto == 0) - return 0; + auto Proto = ParsePrototype(); + if (!Proto) + return nullptr; - if (ExprAST *E = ParseExpression()) - return new FunctionAST(Proto, E); - return 0; + if (auto E = ParseExpression()) + return llvm::make_unique(std::move(Proto), std::move(E)); + return nullptr; } /// toplevelexpr ::= expression -static FunctionAST *ParseTopLevelExpr() { - if (ExprAST *E = ParseExpression()) { +static std::unique_ptr ParseTopLevelExpr() { + if (auto E = ParseExpression()) { // Make an anonymous proto. - PrototypeAST *Proto = new PrototypeAST("", std::vector()); - return new FunctionAST(Proto, E); + auto Proto = llvm::make_unique("__anon_expr", + std::vector()); + return llvm::make_unique(std::move(Proto), std::move(E)); } - return 0; + return nullptr; } /// external ::= 'extern' prototype -static PrototypeAST *ParseExtern() { +static std::unique_ptr ParseExtern() { getNextToken(); // eat extern. return ParsePrototype(); } @@ -592,43 +601,62 @@ static PrototypeAST *ParseExtern() { // Code Generation //===----------------------------------------------------------------------===// -static Module *TheModule; +static std::unique_ptr TheModule; static IRBuilder<> Builder(getGlobalContext()); static std::map NamedValues; -static legacy::FunctionPassManager *TheFPM; +static std::unique_ptr TheFPM; +static std::unique_ptr TheJIT; +static std::map> FunctionProtos; Value *ErrorV(const char *Str) { Error(Str); - return 0; + return nullptr; } -Value *NumberExprAST::Codegen() { +Function *getFunction(std::string Name) { + // First, see if the function has already been added to the current module. + if (auto *F = TheModule->getFunction(Name)) + return F; + + // If not, check whether we can codegen the declaration from some existing + // prototype. + auto FI = FunctionProtos.find(Name); + if (FI != FunctionProtos.end()) + return FI->second->codegen(); + + // If no existing prototype exists, return null. + return nullptr; +} + +Value *NumberExprAST::codegen() { return ConstantFP::get(getGlobalContext(), APFloat(Val)); } -Value *VariableExprAST::Codegen() { +Value *VariableExprAST::codegen() { // Look this variable up in the function. Value *V = NamedValues[Name]; - return V ? V : ErrorV("Unknown variable name"); + if (!V) + return ErrorV("Unknown variable name"); + return V; } -Value *UnaryExprAST::Codegen() { - Value *OperandV = Operand->Codegen(); - if (OperandV == 0) - return 0; +Value *UnaryExprAST::codegen() { + Value *OperandV = Operand->codegen(); + if (!OperandV) + return nullptr; - Function *F = TheModule->getFunction(std::string("unary") + Opcode); - if (F == 0) + Function *F = getFunction(std::string("unary") + Opcode); + if (!F) return ErrorV("Unknown unary operator"); return Builder.CreateCall(F, OperandV, "unop"); } -Value *BinaryExprAST::Codegen() { - Value *L = LHS->Codegen(); - Value *R = RHS->Codegen(); - if (L == 0 || R == 0) - return 0; +Value *BinaryExprAST::codegen() { + Value *L = LHS->codegen(); + Value *R = RHS->codegen(); + if (!L || !R) + return nullptr; switch (Op) { case '+': @@ -648,17 +676,17 @@ Value *BinaryExprAST::Codegen() { // If it wasn't a builtin binary operator, it must be a user defined one. Emit // a call to it. - Function *F = TheModule->getFunction(std::string("binary") + Op); + Function *F = getFunction(std::string("binary") + Op); assert(F && "binary operator not found!"); - Value *Ops[] = { L, R }; + Value *Ops[] = {L, R}; return Builder.CreateCall(F, Ops, "binop"); } -Value *CallExprAST::Codegen() { +Value *CallExprAST::codegen() { // Look up the name in the global module table. - Function *CalleeF = TheModule->getFunction(Callee); - if (CalleeF == 0) + Function *CalleeF = getFunction(Callee); + if (!CalleeF) return ErrorV("Unknown function referenced"); // If argument mismatch error. @@ -667,18 +695,18 @@ Value *CallExprAST::Codegen() { std::vector ArgsV; for (unsigned i = 0, e = Args.size(); i != e; ++i) { - ArgsV.push_back(Args[i]->Codegen()); - if (ArgsV.back() == 0) - return 0; + ArgsV.push_back(Args[i]->codegen()); + if (!ArgsV.back()) + return nullptr; } return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); } -Value *IfExprAST::Codegen() { - Value *CondV = Cond->Codegen(); - if (CondV == 0) - return 0; +Value *IfExprAST::codegen() { + Value *CondV = Cond->codegen(); + if (!CondV) + return nullptr; // Convert condition to a bool by comparing equal to 0.0. CondV = Builder.CreateFCmpONE( @@ -698,9 +726,9 @@ Value *IfExprAST::Codegen() { // Emit then value. Builder.SetInsertPoint(ThenBB); - Value *ThenV = Then->Codegen(); - if (ThenV == 0) - return 0; + Value *ThenV = Then->codegen(); + if (!ThenV) + return nullptr; Builder.CreateBr(MergeBB); // Codegen of 'Then' can change the current block, update ThenBB for the PHI. @@ -710,9 +738,9 @@ Value *IfExprAST::Codegen() { TheFunction->getBasicBlockList().push_back(ElseBB); Builder.SetInsertPoint(ElseBB); - Value *ElseV = Else->Codegen(); - if (ElseV == 0) - return 0; + Value *ElseV = Else->codegen(); + if (!ElseV) + return nullptr; Builder.CreateBr(MergeBB); // Codegen of 'Else' can change the current block, update ElseBB for the PHI. @@ -729,27 +757,26 @@ Value *IfExprAST::Codegen() { return PN; } -Value *ForExprAST::Codegen() { - // Output this as: - // ... - // start = startexpr - // goto loop - // loop: - // variable = phi [start, loopheader], [nextvariable, loopend] - // ... - // bodyexpr - // ... - // loopend: - // step = stepexpr - // nextvariable = variable + step - // endcond = endexpr - // br endcond, loop, endloop - // outloop: - +// Output for-loop as: +// ... +// start = startexpr +// goto loop +// loop: +// variable = phi [start, loopheader], [nextvariable, loopend] +// ... +// bodyexpr +// ... +// loopend: +// step = stepexpr +// nextvariable = variable + step +// endcond = endexpr +// br endcond, loop, endloop +// outloop: +Value *ForExprAST::codegen() { // Emit the start code first, without 'variable' in scope. - Value *StartVal = Start->Codegen(); - if (StartVal == 0) - return 0; + Value *StartVal = Start->codegen(); + if (!StartVal) + return nullptr; // Make the new basic block for the loop header, inserting after current // block. @@ -777,15 +804,15 @@ Value *ForExprAST::Codegen() { // Emit the body of the loop. This, like any other expr, can change the // current BB. Note that we ignore the value computed by the body, but don't // allow an error. - if (Body->Codegen() == 0) - return 0; + if (!Body->codegen()) + return nullptr; // Emit the step value. - Value *StepVal; + Value *StepVal = nullptr; if (Step) { - StepVal = Step->Codegen(); - if (StepVal == 0) - return 0; + StepVal = Step->codegen(); + if (!StepVal) + return nullptr; } else { // If not specified, use 1.0. StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); @@ -794,9 +821,9 @@ Value *ForExprAST::Codegen() { Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar"); // Compute the end condition. - Value *EndCond = End->Codegen(); - if (EndCond == 0) - return EndCond; + Value *EndCond = End->codegen(); + if (!EndCond) + return nullptr; // Convert condition to a bool by comparing equal to 0.0. EndCond = Builder.CreateFCmpONE( @@ -826,7 +853,7 @@ Value *ForExprAST::Codegen() { return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); } -Function *PrototypeAST::Codegen() { +Function *PrototypeAST::codegen() { // Make the function type: double(double,double) etc. std::vector Doubles(Args.size(), Type::getDoubleTy(getGlobalContext())); @@ -834,64 +861,46 @@ Function *PrototypeAST::Codegen() { FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false); Function *F = - Function::Create(FT, Function::ExternalLinkage, Name, TheModule); - - // If F conflicted, there was already something named 'Name'. If it has a - // body, don't allow redefinition or reextern. - if (F->getName() != Name) { - // Delete the one we just made and get the existing one. - F->eraseFromParent(); - F = TheModule->getFunction(Name); - - // If F already has a body, reject this. - if (!F->empty()) { - ErrorF("redefinition of function"); - return 0; - } - - // If F took a different number of args, reject. - if (F->arg_size() != Args.size()) { - ErrorF("redefinition of function with different # args"); - return 0; - } - } + Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); // Set names for all arguments. unsigned Idx = 0; - for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); - ++AI, ++Idx) { - AI->setName(Args[Idx]); - - // Add arguments to variable symbol table. - NamedValues[Args[Idx]] = AI; - } + for (auto &Arg : F->args()) + Arg.setName(Args[Idx++]); return F; } -Function *FunctionAST::Codegen() { - NamedValues.clear(); - - Function *TheFunction = Proto->Codegen(); - if (TheFunction == 0) - return 0; +Function *FunctionAST::codegen() { + // Transfer ownership of the prototype to the FunctionProtos map, but keep a + // reference to it for use below. + auto &P = *Proto; + FunctionProtos[Proto->getName()] = std::move(Proto); + Function *TheFunction = getFunction(P.getName()); + if (!TheFunction) + return nullptr; // If this is an operator, install it. - if (Proto->isBinaryOp()) - BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence(); + if (P.isBinaryOp()) + BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); // Create a new basic block to start insertion into. BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); Builder.SetInsertPoint(BB); - if (Value *RetVal = Body->Codegen()) { + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + for (auto &Arg : TheFunction->args()) + NamedValues[Arg.getName()] = &Arg; + + if (Value *RetVal = Body->codegen()) { // Finish off the function. Builder.CreateRet(RetVal); // Validate the generated code, checking for consistency. verifyFunction(*TheFunction); - // Optimize the function. + // Run the optimizer on the function. TheFPM->run(*TheFunction); return TheFunction; @@ -900,22 +909,42 @@ Function *FunctionAST::Codegen() { // Error reading body, remove function. TheFunction->eraseFromParent(); - if (Proto->isBinaryOp()) + if (P.isBinaryOp()) BinopPrecedence.erase(Proto->getOperatorName()); - return 0; + return nullptr; } //===----------------------------------------------------------------------===// // Top-Level parsing and JIT Driver //===----------------------------------------------------------------------===// -static ExecutionEngine *TheExecutionEngine; +static void InitializeModuleAndPassManager() { + // Open a new module. + TheModule = llvm::make_unique("my cool jit", getGlobalContext()); + TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); + + // Create a new pass manager attached to it. + TheFPM = llvm::make_unique(TheModule.get()); + + // Do simple "peephole" optimizations and bit-twiddling optzns. + TheFPM->add(createInstructionCombiningPass()); + // Reassociate expressions. + TheFPM->add(createReassociatePass()); + // Eliminate Common SubExpressions. + TheFPM->add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + TheFPM->add(createCFGSimplificationPass()); + + TheFPM->doInitialization(); +} static void HandleDefinition() { - if (FunctionAST *F = ParseDefinition()) { - if (Function *LF = F->Codegen()) { + if (auto FnAST = ParseDefinition()) { + if (auto *FnIR = FnAST->codegen()) { fprintf(stderr, "Read function definition:"); - LF->dump(); + FnIR->dump(); + TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); } } else { // Skip token for error recovery. @@ -924,10 +953,11 @@ static void HandleDefinition() { } static void HandleExtern() { - if (PrototypeAST *P = ParseExtern()) { - if (Function *F = P->Codegen()) { + if (auto ProtoAST = ParseExtern()) { + if (auto *FnIR = ProtoAST->codegen()) { fprintf(stderr, "Read extern: "); - F->dump(); + FnIR->dump(); + FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); } } else { // Skip token for error recovery. @@ -937,16 +967,25 @@ static void HandleExtern() { static void HandleTopLevelExpression() { // Evaluate a top-level expression into an anonymous function. - if (FunctionAST *F = ParseTopLevelExpr()) { - if (Function *LF = F->Codegen()) { - TheExecutionEngine->finalizeObject(); - // JIT the function, returning a function pointer. - void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + if (auto FnAST = ParseTopLevelExpr()) { + if (FnAST->codegen()) { - // Cast it to the right type (takes no arguments, returns a double) so we - // can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)FPtr; + // JIT the module containing the anonymous expression, keeping a handle so + // we can free it later. + auto H = TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); + + // Search the JIT for the __anon_expr symbol. + auto ExprSymbol = TheJIT->findSymbol("__anon_expr"); + assert(ExprSymbol && "Function not found"); + + // Get the symbol's address and cast it to the right type (takes no + // arguments, returns a double) so we can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); fprintf(stderr, "Evaluated to %f\n", FP()); + + // Delete the anonymous expression module from the JIT. + TheJIT->removeModule(H); } } else { // Skip token for error recovery. @@ -961,9 +1000,9 @@ static void MainLoop() { switch (CurTok) { case tok_eof: return; - case ';': + case ';': // ignore top-level semicolons. getNextToken(); - break; // ignore top-level semicolons. + break; case tok_def: HandleDefinition(); break; @@ -983,13 +1022,13 @@ static void MainLoop() { /// putchard - putchar that takes a double and returns 0. extern "C" double putchard(double X) { - putchar((char)X); + fputc((char)X, stderr); return 0; } /// printd - printf that takes a double prints it as "%f\n", returning 0. extern "C" double printd(double X) { - printf("%f\n", X); + fprintf(stderr, "%f\n", X); return 0; } @@ -1001,7 +1040,6 @@ int main() { InitializeNativeTarget(); InitializeNativeTargetAsmPrinter(); InitializeNativeTargetAsmParser(); - LLVMContext &Context = getGlobalContext(); // Install standard binary operators. // 1 is lowest precedence. @@ -1014,50 +1052,12 @@ int main() { fprintf(stderr, "ready> "); getNextToken(); - // Make the module, which holds all the code. - std::unique_ptr Owner = make_unique("my cool jit", Context); - TheModule = Owner.get(); + TheJIT = llvm::make_unique(); - // Create the JIT. This takes ownership of the module. - std::string ErrStr; - TheExecutionEngine = - EngineBuilder(std::move(Owner)) - .setErrorStr(&ErrStr) - .setMCJITMemoryManager(llvm::make_unique()) - .create(); - if (!TheExecutionEngine) { - fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); - exit(1); - } - - legacy::FunctionPassManager OurFPM(TheModule); - - // Set up the optimizer pipeline. Start with registering info about how the - // target lays out data structures. - TheModule->setDataLayout(*TheExecutionEngine->getDataLayout()); - // Provide basic AliasAnalysis support for GVN. - OurFPM.add(createBasicAliasAnalysisPass()); - // Do simple "peephole" optimizations and bit-twiddling optzns. - OurFPM.add(createInstructionCombiningPass()); - // Reassociate expressions. - OurFPM.add(createReassociatePass()); - // Eliminate Common SubExpressions. - OurFPM.add(createGVNPass()); - // Simplify the control flow graph (deleting unreachable blocks, etc). - OurFPM.add(createCFGSimplificationPass()); - - OurFPM.doInitialization(); - - // Set the global so the code gen can use this. - TheFPM = &OurFPM; + InitializeModuleAndPassManager(); // Run the main "interpreter loop" now. MainLoop(); - TheFPM = 0; - - // Print out all of the generated code. - TheModule->dump(); - return 0; } diff --git a/examples/Kaleidoscope/Chapter7/CMakeLists.txt b/examples/Kaleidoscope/Chapter7/CMakeLists.txt index 8725e4761f78..e67d7928efe7 100644 --- a/examples/Kaleidoscope/Chapter7/CMakeLists.txt +++ b/examples/Kaleidoscope/Chapter7/CMakeLists.txt @@ -3,14 +3,15 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine InstCombine - MCJIT + Object RuntimeDyld ScalarOpts Support - TransformUtils native ) add_kaleidoscope_chapter(Kaleidoscope-Ch7 toy.cpp ) + +export_executable_symbols(Kaleidoscope-Ch7) diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp index b1a41fa01b76..5c0094013d97 100644 --- a/examples/Kaleidoscope/Chapter7/toy.cpp +++ b/examples/Kaleidoscope/Chapter7/toy.cpp @@ -1,10 +1,5 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Passes.h" -#include "llvm/ExecutionEngine/ExecutionEngine.h" -#include "llvm/ExecutionEngine/MCJIT.h" -#include "llvm/ExecutionEngine/SectionMemoryManager.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" @@ -17,7 +12,10 @@ #include #include #include +#include "../include/KaleidoscopeJIT.h" + using namespace llvm; +using namespace llvm::orc; //===----------------------------------------------------------------------===// // Lexer @@ -97,7 +95,7 @@ static int gettok() { LastChar = getchar(); } while (isdigit(LastChar) || LastChar == '.'); - NumVal = strtod(NumStr.c_str(), 0); + NumVal = strtod(NumStr.c_str(), nullptr); return tok_number; } @@ -129,7 +127,7 @@ namespace { class ExprAST { public: virtual ~ExprAST() {} - virtual Value *Codegen() = 0; + virtual Value *codegen() = 0; }; /// NumberExprAST - Expression class for numeric literals like "1.0". @@ -137,8 +135,8 @@ class NumberExprAST : public ExprAST { double Val; public: - NumberExprAST(double val) : Val(val) {} - Value *Codegen() override; + NumberExprAST(double Val) : Val(Val) {} + Value *codegen() override; }; /// VariableExprAST - Expression class for referencing a variable, like "a". @@ -146,93 +144,103 @@ class VariableExprAST : public ExprAST { std::string Name; public: - VariableExprAST(const std::string &name) : Name(name) {} + VariableExprAST(const std::string &Name) : Name(Name) {} const std::string &getName() const { return Name; } - Value *Codegen() override; + Value *codegen() override; }; /// UnaryExprAST - Expression class for a unary operator. class UnaryExprAST : public ExprAST { char Opcode; - ExprAST *Operand; + std::unique_ptr Operand; public: - UnaryExprAST(char opcode, ExprAST *operand) - : Opcode(opcode), Operand(operand) {} - Value *Codegen() override; + UnaryExprAST(char Opcode, std::unique_ptr Operand) + : Opcode(Opcode), Operand(std::move(Operand)) {} + Value *codegen() override; }; /// BinaryExprAST - Expression class for a binary operator. class BinaryExprAST : public ExprAST { char Op; - ExprAST *LHS, *RHS; + std::unique_ptr LHS, RHS; public: - BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) - : Op(op), LHS(lhs), RHS(rhs) {} - Value *Codegen() override; + BinaryExprAST(char Op, std::unique_ptr LHS, + std::unique_ptr RHS) + : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} + Value *codegen() override; }; /// CallExprAST - Expression class for function calls. class CallExprAST : public ExprAST { std::string Callee; - std::vector Args; + std::vector> Args; public: - CallExprAST(const std::string &callee, std::vector &args) - : Callee(callee), Args(args) {} - Value *Codegen() override; + CallExprAST(const std::string &Callee, + std::vector> Args) + : Callee(Callee), Args(std::move(Args)) {} + Value *codegen() override; }; /// IfExprAST - Expression class for if/then/else. class IfExprAST : public ExprAST { - ExprAST *Cond, *Then, *Else; + std::unique_ptr Cond, Then, Else; public: - IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) - : Cond(cond), Then(then), Else(_else) {} - Value *Codegen() override; + IfExprAST(std::unique_ptr Cond, std::unique_ptr Then, + std::unique_ptr Else) + : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} + Value *codegen() override; }; /// ForExprAST - Expression class for for/in. class ForExprAST : public ExprAST { std::string VarName; - ExprAST *Start, *End, *Step, *Body; + std::unique_ptr Start, End, Step, Body; public: - ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, - ExprAST *step, ExprAST *body) - : VarName(varname), Start(start), End(end), Step(step), Body(body) {} - Value *Codegen() override; + ForExprAST(const std::string &VarName, std::unique_ptr Start, + std::unique_ptr End, std::unique_ptr Step, + std::unique_ptr Body) + : VarName(VarName), Start(std::move(Start)), End(std::move(End)), + Step(std::move(Step)), Body(std::move(Body)) {} + Value *codegen() override; }; /// VarExprAST - Expression class for var/in class VarExprAST : public ExprAST { - std::vector > VarNames; - ExprAST *Body; + std::vector>> VarNames; + std::unique_ptr Body; public: - VarExprAST(const std::vector > &varnames, - ExprAST *body) - : VarNames(varnames), Body(body) {} - - Value *Codegen() override; + VarExprAST( + std::vector>> VarNames, + std::unique_ptr Body) + : VarNames(std::move(VarNames)), Body(std::move(Body)) {} + Value *codegen() override; }; /// PrototypeAST - This class represents the "prototype" for a function, -/// which captures its argument names as well as if it is an operator. +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes), as well as if it is an operator. class PrototypeAST { std::string Name; std::vector Args; - bool isOperator; + bool IsOperator; unsigned Precedence; // Precedence if a binary op. -public: - PrototypeAST(const std::string &name, const std::vector &args, - bool isoperator = false, unsigned prec = 0) - : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {} - bool isUnaryOp() const { return isOperator && Args.size() == 1; } - bool isBinaryOp() const { return isOperator && Args.size() == 2; } +public: + PrototypeAST(const std::string &Name, std::vector Args, + bool IsOperator = false, unsigned Prec = 0) + : Name(Name), Args(std::move(Args)), IsOperator(IsOperator), + Precedence(Prec) {} + Function *codegen(); + const std::string &getName() const { return Name; } + + bool isUnaryOp() const { return IsOperator && Args.size() == 1; } + bool isBinaryOp() const { return IsOperator && Args.size() == 2; } char getOperatorName() const { assert(isUnaryOp() || isBinaryOp()); @@ -240,21 +248,18 @@ public: } unsigned getBinaryPrecedence() const { return Precedence; } - - Function *Codegen(); - - void CreateArgumentAllocas(Function *F); }; /// FunctionAST - This class represents a function definition itself. class FunctionAST { - PrototypeAST *Proto; - ExprAST *Body; + std::unique_ptr Proto; + std::unique_ptr Body; public: - FunctionAST(PrototypeAST *proto, ExprAST *body) : Proto(proto), Body(body) {} - - Function *Codegen(); + FunctionAST(std::unique_ptr Proto, + std::unique_ptr Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} + Function *codegen(); }; } // end anonymous namespace @@ -285,41 +290,58 @@ static int GetTokPrecedence() { } /// Error* - These are little helper functions for error handling. -ExprAST *Error(const char *Str) { +std::unique_ptr Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str); - return 0; -} -PrototypeAST *ErrorP(const char *Str) { - Error(Str); - return 0; -} -FunctionAST *ErrorF(const char *Str) { - Error(Str); - return 0; + return nullptr; } -static ExprAST *ParseExpression(); +std::unique_ptr ErrorP(const char *Str) { + Error(Str); + return nullptr; +} + +static std::unique_ptr ParseExpression(); + +/// numberexpr ::= number +static std::unique_ptr ParseNumberExpr() { + auto Result = llvm::make_unique(NumVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// parenexpr ::= '(' expression ')' +static std::unique_ptr ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} /// identifierexpr /// ::= identifier /// ::= identifier '(' expression* ')' -static ExprAST *ParseIdentifierExpr() { +static std::unique_ptr ParseIdentifierExpr() { std::string IdName = IdentifierStr; getNextToken(); // eat identifier. if (CurTok != '(') // Simple variable ref. - return new VariableExprAST(IdName); + return llvm::make_unique(IdName); // Call. getNextToken(); // eat ( - std::vector Args; + std::vector> Args; if (CurTok != ')') { while (1) { - ExprAST *Arg = ParseExpression(); - if (!Arg) - return 0; - Args.push_back(Arg); + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; if (CurTok == ')') break; @@ -333,60 +355,41 @@ static ExprAST *ParseIdentifierExpr() { // Eat the ')'. getNextToken(); - return new CallExprAST(IdName, Args); -} - -/// numberexpr ::= number -static ExprAST *ParseNumberExpr() { - ExprAST *Result = new NumberExprAST(NumVal); - getNextToken(); // consume the number - return Result; -} - -/// parenexpr ::= '(' expression ')' -static ExprAST *ParseParenExpr() { - getNextToken(); // eat (. - ExprAST *V = ParseExpression(); - if (!V) - return 0; - - if (CurTok != ')') - return Error("expected ')'"); - getNextToken(); // eat ). - return V; + return llvm::make_unique(IdName, std::move(Args)); } /// ifexpr ::= 'if' expression 'then' expression 'else' expression -static ExprAST *ParseIfExpr() { +static std::unique_ptr ParseIfExpr() { getNextToken(); // eat the if. // condition. - ExprAST *Cond = ParseExpression(); + auto Cond = ParseExpression(); if (!Cond) - return 0; + return nullptr; if (CurTok != tok_then) return Error("expected then"); getNextToken(); // eat the then - ExprAST *Then = ParseExpression(); - if (Then == 0) - return 0; + auto Then = ParseExpression(); + if (!Then) + return nullptr; if (CurTok != tok_else) return Error("expected else"); getNextToken(); - ExprAST *Else = ParseExpression(); + auto Else = ParseExpression(); if (!Else) - return 0; + return nullptr; - return new IfExprAST(Cond, Then, Else); + return llvm::make_unique(std::move(Cond), std::move(Then), + std::move(Else)); } /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression -static ExprAST *ParseForExpr() { +static std::unique_ptr ParseForExpr() { getNextToken(); // eat the for. if (CurTok != tok_identifier) @@ -399,43 +402,44 @@ static ExprAST *ParseForExpr() { return Error("expected '=' after for"); getNextToken(); // eat '='. - ExprAST *Start = ParseExpression(); - if (Start == 0) - return 0; + auto Start = ParseExpression(); + if (!Start) + return nullptr; if (CurTok != ',') return Error("expected ',' after for start value"); getNextToken(); - ExprAST *End = ParseExpression(); - if (End == 0) - return 0; + auto End = ParseExpression(); + if (!End) + return nullptr; // The step value is optional. - ExprAST *Step = 0; + std::unique_ptr Step; if (CurTok == ',') { getNextToken(); Step = ParseExpression(); - if (Step == 0) - return 0; + if (!Step) + return nullptr; } if (CurTok != tok_in) return Error("expected 'in' after for"); getNextToken(); // eat 'in'. - ExprAST *Body = ParseExpression(); - if (Body == 0) - return 0; + auto Body = ParseExpression(); + if (!Body) + return nullptr; - return new ForExprAST(IdName, Start, End, Step, Body); + return llvm::make_unique(IdName, std::move(Start), std::move(End), + std::move(Step), std::move(Body)); } /// varexpr ::= 'var' identifier ('=' expression)? // (',' identifier ('=' expression)?)* 'in' expression -static ExprAST *ParseVarExpr() { +static std::unique_ptr ParseVarExpr() { getNextToken(); // eat the var. - std::vector > VarNames; + std::vector>> VarNames; // At least one variable name is required. if (CurTok != tok_identifier) @@ -446,16 +450,16 @@ static ExprAST *ParseVarExpr() { getNextToken(); // eat identifier. // Read the optional initializer. - ExprAST *Init = 0; + std::unique_ptr Init = nullptr; if (CurTok == '=') { getNextToken(); // eat the '='. Init = ParseExpression(); - if (Init == 0) - return 0; + if (!Init) + return nullptr; } - VarNames.push_back(std::make_pair(Name, Init)); + VarNames.push_back(std::make_pair(Name, std::move(Init))); // End of var list, exit loop. if (CurTok != ',') @@ -471,11 +475,11 @@ static ExprAST *ParseVarExpr() { return Error("expected 'in' keyword after 'var'"); getNextToken(); // eat 'in'. - ExprAST *Body = ParseExpression(); - if (Body == 0) - return 0; + auto Body = ParseExpression(); + if (!Body) + return nullptr; - return new VarExprAST(VarNames, Body); + return llvm::make_unique(std::move(VarNames), std::move(Body)); } /// primary @@ -485,7 +489,7 @@ static ExprAST *ParseVarExpr() { /// ::= ifexpr /// ::= forexpr /// ::= varexpr -static ExprAST *ParsePrimary() { +static std::unique_ptr ParsePrimary() { switch (CurTok) { default: return Error("unknown token when expecting an expression"); @@ -507,7 +511,7 @@ static ExprAST *ParsePrimary() { /// unary /// ::= primary /// ::= '!' unary -static ExprAST *ParseUnary() { +static std::unique_ptr ParseUnary() { // If the current token is not an operator, it must be a primary expr. if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') return ParsePrimary(); @@ -515,14 +519,15 @@ static ExprAST *ParseUnary() { // If this is a unary operator, read it. int Opc = CurTok; getNextToken(); - if (ExprAST *Operand = ParseUnary()) - return new UnaryExprAST(Opc, Operand); - return 0; + if (auto Operand = ParseUnary()) + return llvm::make_unique(Opc, std::move(Operand)); + return nullptr; } /// binoprhs /// ::= ('+' unary)* -static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { +static std::unique_ptr ParseBinOpRHS(int ExprPrec, + std::unique_ptr LHS) { // If this is a binop, find its precedence. while (1) { int TokPrec = GetTokPrecedence(); @@ -537,40 +542,41 @@ static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { getNextToken(); // eat binop // Parse the unary expression after the binary operator. - ExprAST *RHS = ParseUnary(); + auto RHS = ParseUnary(); if (!RHS) - return 0; + return nullptr; // If BinOp binds less tightly with RHS than the operator after RHS, let // the pending operator take RHS as its LHS. int NextPrec = GetTokPrecedence(); if (TokPrec < NextPrec) { - RHS = ParseBinOpRHS(TokPrec + 1, RHS); - if (RHS == 0) - return 0; + RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); + if (!RHS) + return nullptr; } // Merge LHS/RHS. - LHS = new BinaryExprAST(BinOp, LHS, RHS); + LHS = + llvm::make_unique(BinOp, std::move(LHS), std::move(RHS)); } } /// expression /// ::= unary binoprhs /// -static ExprAST *ParseExpression() { - ExprAST *LHS = ParseUnary(); +static std::unique_ptr ParseExpression() { + auto LHS = ParseUnary(); if (!LHS) - return 0; + return nullptr; - return ParseBinOpRHS(0, LHS); + return ParseBinOpRHS(0, std::move(LHS)); } /// prototype /// ::= id '(' id* ')' /// ::= binary LETTER number? (id, id) /// ::= unary LETTER (id) -static PrototypeAST *ParsePrototype() { +static std::unique_ptr ParsePrototype() { std::string FnName; unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. @@ -628,33 +634,35 @@ static PrototypeAST *ParsePrototype() { if (Kind && ArgNames.size() != Kind) return ErrorP("Invalid number of operands for operator"); - return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence); + return llvm::make_unique(FnName, ArgNames, Kind != 0, + BinaryPrecedence); } /// definition ::= 'def' prototype expression -static FunctionAST *ParseDefinition() { +static std::unique_ptr ParseDefinition() { getNextToken(); // eat def. - PrototypeAST *Proto = ParsePrototype(); - if (Proto == 0) - return 0; + auto Proto = ParsePrototype(); + if (!Proto) + return nullptr; - if (ExprAST *E = ParseExpression()) - return new FunctionAST(Proto, E); - return 0; + if (auto E = ParseExpression()) + return llvm::make_unique(std::move(Proto), std::move(E)); + return nullptr; } /// toplevelexpr ::= expression -static FunctionAST *ParseTopLevelExpr() { - if (ExprAST *E = ParseExpression()) { +static std::unique_ptr ParseTopLevelExpr() { + if (auto E = ParseExpression()) { // Make an anonymous proto. - PrototypeAST *Proto = new PrototypeAST("", std::vector()); - return new FunctionAST(Proto, E); + auto Proto = llvm::make_unique("__anon_expr", + std::vector()); + return llvm::make_unique(std::move(Proto), std::move(E)); } - return 0; + return nullptr; } /// external ::= 'extern' prototype -static PrototypeAST *ParseExtern() { +static std::unique_ptr ParseExtern() { getNextToken(); // eat extern. return ParsePrototype(); } @@ -663,14 +671,31 @@ static PrototypeAST *ParseExtern() { // Code Generation //===----------------------------------------------------------------------===// -static Module *TheModule; +static std::unique_ptr TheModule; static IRBuilder<> Builder(getGlobalContext()); static std::map NamedValues; -static legacy::FunctionPassManager *TheFPM; +static std::unique_ptr TheFPM; +static std::unique_ptr TheJIT; +static std::map> FunctionProtos; Value *ErrorV(const char *Str) { Error(Str); - return 0; + return nullptr; +} + +Function *getFunction(std::string Name) { + // First, see if the function has already been added to the current module. + if (auto *F = TheModule->getFunction(Name)) + return F; + + // If not, check whether we can codegen the declaration from some existing + // prototype. + auto FI = FunctionProtos.find(Name); + if (FI != FunctionProtos.end()) + return FI->second->codegen(); + + // If no existing prototype exists, return null. + return nullptr; } /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of @@ -679,64 +704,64 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, const std::string &VarName) { IRBuilder<> TmpB(&TheFunction->getEntryBlock(), TheFunction->getEntryBlock().begin()); - return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0, + return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr, VarName.c_str()); } -Value *NumberExprAST::Codegen() { +Value *NumberExprAST::codegen() { return ConstantFP::get(getGlobalContext(), APFloat(Val)); } -Value *VariableExprAST::Codegen() { +Value *VariableExprAST::codegen() { // Look this variable up in the function. Value *V = NamedValues[Name]; - if (V == 0) + if (!V) return ErrorV("Unknown variable name"); // Load the value. return Builder.CreateLoad(V, Name.c_str()); } -Value *UnaryExprAST::Codegen() { - Value *OperandV = Operand->Codegen(); - if (OperandV == 0) - return 0; +Value *UnaryExprAST::codegen() { + Value *OperandV = Operand->codegen(); + if (!OperandV) + return nullptr; - Function *F = TheModule->getFunction(std::string("unary") + Opcode); - if (F == 0) + Function *F = getFunction(std::string("unary") + Opcode); + if (!F) return ErrorV("Unknown unary operator"); return Builder.CreateCall(F, OperandV, "unop"); } -Value *BinaryExprAST::Codegen() { +Value *BinaryExprAST::codegen() { // Special case '=' because we don't want to emit the LHS as an expression. if (Op == '=') { // Assignment requires the LHS to be an identifier. // This assume we're building without RTTI because LLVM builds that way by // default. If you build LLVM with RTTI this can be changed to a // dynamic_cast for automatic error checking. - VariableExprAST *LHSE = static_cast(LHS); + VariableExprAST *LHSE = static_cast(LHS.get()); if (!LHSE) return ErrorV("destination of '=' must be a variable"); // Codegen the RHS. - Value *Val = RHS->Codegen(); - if (Val == 0) - return 0; + Value *Val = RHS->codegen(); + if (!Val) + return nullptr; // Look up the name. Value *Variable = NamedValues[LHSE->getName()]; - if (Variable == 0) + if (!Variable) return ErrorV("Unknown variable name"); Builder.CreateStore(Val, Variable); return Val; } - Value *L = LHS->Codegen(); - Value *R = RHS->Codegen(); - if (L == 0 || R == 0) - return 0; + Value *L = LHS->codegen(); + Value *R = RHS->codegen(); + if (!L || !R) + return nullptr; switch (Op) { case '+': @@ -756,17 +781,17 @@ Value *BinaryExprAST::Codegen() { // If it wasn't a builtin binary operator, it must be a user defined one. Emit // a call to it. - Function *F = TheModule->getFunction(std::string("binary") + Op); + Function *F = getFunction(std::string("binary") + Op); assert(F && "binary operator not found!"); - Value *Ops[] = { L, R }; + Value *Ops[] = {L, R}; return Builder.CreateCall(F, Ops, "binop"); } -Value *CallExprAST::Codegen() { +Value *CallExprAST::codegen() { // Look up the name in the global module table. - Function *CalleeF = TheModule->getFunction(Callee); - if (CalleeF == 0) + Function *CalleeF = getFunction(Callee); + if (!CalleeF) return ErrorV("Unknown function referenced"); // If argument mismatch error. @@ -775,18 +800,18 @@ Value *CallExprAST::Codegen() { std::vector ArgsV; for (unsigned i = 0, e = Args.size(); i != e; ++i) { - ArgsV.push_back(Args[i]->Codegen()); - if (ArgsV.back() == 0) - return 0; + ArgsV.push_back(Args[i]->codegen()); + if (!ArgsV.back()) + return nullptr; } return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); } -Value *IfExprAST::Codegen() { - Value *CondV = Cond->Codegen(); - if (CondV == 0) - return 0; +Value *IfExprAST::codegen() { + Value *CondV = Cond->codegen(); + if (!CondV) + return nullptr; // Convert condition to a bool by comparing equal to 0.0. CondV = Builder.CreateFCmpONE( @@ -806,9 +831,9 @@ Value *IfExprAST::Codegen() { // Emit then value. Builder.SetInsertPoint(ThenBB); - Value *ThenV = Then->Codegen(); - if (ThenV == 0) - return 0; + Value *ThenV = Then->codegen(); + if (!ThenV) + return nullptr; Builder.CreateBr(MergeBB); // Codegen of 'Then' can change the current block, update ThenBB for the PHI. @@ -818,9 +843,9 @@ Value *IfExprAST::Codegen() { TheFunction->getBasicBlockList().push_back(ElseBB); Builder.SetInsertPoint(ElseBB); - Value *ElseV = Else->Codegen(); - if (ElseV == 0) - return 0; + Value *ElseV = Else->codegen(); + if (!ElseV) + return nullptr; Builder.CreateBr(MergeBB); // Codegen of 'Else' can change the current block, update ElseBB for the PHI. @@ -837,36 +862,35 @@ Value *IfExprAST::Codegen() { return PN; } -Value *ForExprAST::Codegen() { - // Output this as: - // var = alloca double - // ... - // start = startexpr - // store start -> var - // goto loop - // loop: - // ... - // bodyexpr - // ... - // loopend: - // step = stepexpr - // endcond = endexpr - // - // curvar = load var - // nextvar = curvar + step - // store nextvar -> var - // br endcond, loop, endloop - // outloop: - +// Output for-loop as: +// var = alloca double +// ... +// start = startexpr +// store start -> var +// goto loop +// loop: +// ... +// bodyexpr +// ... +// loopend: +// step = stepexpr +// endcond = endexpr +// +// curvar = load var +// nextvar = curvar + step +// store nextvar -> var +// br endcond, loop, endloop +// outloop: +Value *ForExprAST::codegen() { Function *TheFunction = Builder.GetInsertBlock()->getParent(); // Create an alloca for the variable in the entry block. AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); // Emit the start code first, without 'variable' in scope. - Value *StartVal = Start->Codegen(); - if (StartVal == 0) - return 0; + Value *StartVal = Start->codegen(); + if (!StartVal) + return nullptr; // Store the value into the alloca. Builder.CreateStore(StartVal, Alloca); @@ -890,24 +914,24 @@ Value *ForExprAST::Codegen() { // Emit the body of the loop. This, like any other expr, can change the // current BB. Note that we ignore the value computed by the body, but don't // allow an error. - if (Body->Codegen() == 0) - return 0; + if (!Body->codegen()) + return nullptr; // Emit the step value. - Value *StepVal; + Value *StepVal = nullptr; if (Step) { - StepVal = Step->Codegen(); - if (StepVal == 0) - return 0; + StepVal = Step->codegen(); + if (!StepVal) + return nullptr; } else { // If not specified, use 1.0. StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); } // Compute the end condition. - Value *EndCond = End->Codegen(); - if (EndCond == 0) - return EndCond; + Value *EndCond = End->codegen(); + if (!EndCond) + return nullptr; // Reload, increment, and restore the alloca. This handles the case where // the body of the loop mutates the variable. @@ -939,7 +963,7 @@ Value *ForExprAST::Codegen() { return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); } -Value *VarExprAST::Codegen() { +Value *VarExprAST::codegen() { std::vector OldBindings; Function *TheFunction = Builder.GetInsertBlock()->getParent(); @@ -947,7 +971,7 @@ Value *VarExprAST::Codegen() { // Register all variables and emit their initializer. for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { const std::string &VarName = VarNames[i].first; - ExprAST *Init = VarNames[i].second; + ExprAST *Init = VarNames[i].second.get(); // Emit the initializer before adding the variable to scope, this prevents // the initializer from referencing the variable itself, and permits stuff @@ -956,9 +980,9 @@ Value *VarExprAST::Codegen() { // var a = a in ... # refers to outer 'a'. Value *InitVal; if (Init) { - InitVal = Init->Codegen(); - if (InitVal == 0) - return 0; + InitVal = Init->codegen(); + if (!InitVal) + return nullptr; } else { // If not specified, use 0.0. InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0)); } @@ -975,9 +999,9 @@ Value *VarExprAST::Codegen() { } // Codegen the body, now that all vars are in scope. - Value *BodyVal = Body->Codegen(); - if (BodyVal == 0) - return 0; + Value *BodyVal = Body->codegen(); + if (!BodyVal) + return nullptr; // Pop all our variables from scope. for (unsigned i = 0, e = VarNames.size(); i != e; ++i) @@ -987,7 +1011,7 @@ Value *VarExprAST::Codegen() { return BodyVal; } -Function *PrototypeAST::Codegen() { +Function *PrototypeAST::codegen() { // Make the function type: double(double,double) etc. std::vector Doubles(Args.size(), Type::getDoubleTy(getGlobalContext())); @@ -995,79 +1019,54 @@ Function *PrototypeAST::Codegen() { FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false); Function *F = - Function::Create(FT, Function::ExternalLinkage, Name, TheModule); - - // If F conflicted, there was already something named 'Name'. If it has a - // body, don't allow redefinition or reextern. - if (F->getName() != Name) { - // Delete the one we just made and get the existing one. - F->eraseFromParent(); - F = TheModule->getFunction(Name); - - // If F already has a body, reject this. - if (!F->empty()) { - ErrorF("redefinition of function"); - return 0; - } - - // If F took a different number of args, reject. - if (F->arg_size() != Args.size()) { - ErrorF("redefinition of function with different # args"); - return 0; - } - } + Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); // Set names for all arguments. unsigned Idx = 0; - for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); - ++AI, ++Idx) - AI->setName(Args[Idx]); + for (auto &Arg : F->args()) + Arg.setName(Args[Idx++]); return F; } -/// CreateArgumentAllocas - Create an alloca for each argument and register the -/// argument in the symbol table so that references to it will succeed. -void PrototypeAST::CreateArgumentAllocas(Function *F) { - Function::arg_iterator AI = F->arg_begin(); - for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) { - // Create an alloca for this variable. - AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]); - - // Store the initial value into the alloca. - Builder.CreateStore(AI, Alloca); - - // Add arguments to variable symbol table. - NamedValues[Args[Idx]] = Alloca; - } -} - -Function *FunctionAST::Codegen() { - NamedValues.clear(); - - Function *TheFunction = Proto->Codegen(); - if (TheFunction == 0) - return 0; +Function *FunctionAST::codegen() { + // Transfer ownership of the prototype to the FunctionProtos map, but keep a + // reference to it for use below. + auto &P = *Proto; + FunctionProtos[Proto->getName()] = std::move(Proto); + Function *TheFunction = getFunction(P.getName()); + if (!TheFunction) + return nullptr; // If this is an operator, install it. - if (Proto->isBinaryOp()) - BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence(); + if (P.isBinaryOp()) + BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); // Create a new basic block to start insertion into. BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); Builder.SetInsertPoint(BB); - // Add all arguments to the symbol table and create their allocas. - Proto->CreateArgumentAllocas(TheFunction); + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + for (auto &Arg : TheFunction->args()) { + // Create an alloca for this variable. + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); - if (Value *RetVal = Body->Codegen()) { + // Store the initial value into the alloca. + Builder.CreateStore(&Arg, Alloca); + + // Add arguments to variable symbol table. + NamedValues[Arg.getName()] = Alloca; + } + + if (Value *RetVal = Body->codegen()) { // Finish off the function. Builder.CreateRet(RetVal); // Validate the generated code, checking for consistency. verifyFunction(*TheFunction); - // Optimize the function. + // Run the optimizer on the function. TheFPM->run(*TheFunction); return TheFunction; @@ -1076,22 +1075,42 @@ Function *FunctionAST::Codegen() { // Error reading body, remove function. TheFunction->eraseFromParent(); - if (Proto->isBinaryOp()) + if (P.isBinaryOp()) BinopPrecedence.erase(Proto->getOperatorName()); - return 0; + return nullptr; } //===----------------------------------------------------------------------===// // Top-Level parsing and JIT Driver //===----------------------------------------------------------------------===// -static ExecutionEngine *TheExecutionEngine; +static void InitializeModuleAndPassManager() { + // Open a new module. + TheModule = llvm::make_unique("my cool jit", getGlobalContext()); + TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); + + // Create a new pass manager attached to it. + TheFPM = llvm::make_unique(TheModule.get()); + + // Do simple "peephole" optimizations and bit-twiddling optzns. + TheFPM->add(createInstructionCombiningPass()); + // Reassociate expressions. + TheFPM->add(createReassociatePass()); + // Eliminate Common SubExpressions. + TheFPM->add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + TheFPM->add(createCFGSimplificationPass()); + + TheFPM->doInitialization(); +} static void HandleDefinition() { - if (FunctionAST *F = ParseDefinition()) { - if (Function *LF = F->Codegen()) { + if (auto FnAST = ParseDefinition()) { + if (auto *FnIR = FnAST->codegen()) { fprintf(stderr, "Read function definition:"); - LF->dump(); + FnIR->dump(); + TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); } } else { // Skip token for error recovery. @@ -1100,10 +1119,11 @@ static void HandleDefinition() { } static void HandleExtern() { - if (PrototypeAST *P = ParseExtern()) { - if (Function *F = P->Codegen()) { + if (auto ProtoAST = ParseExtern()) { + if (auto *FnIR = ProtoAST->codegen()) { fprintf(stderr, "Read extern: "); - F->dump(); + FnIR->dump(); + FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); } } else { // Skip token for error recovery. @@ -1113,16 +1133,25 @@ static void HandleExtern() { static void HandleTopLevelExpression() { // Evaluate a top-level expression into an anonymous function. - if (FunctionAST *F = ParseTopLevelExpr()) { - if (Function *LF = F->Codegen()) { - TheExecutionEngine->finalizeObject(); - // JIT the function, returning a function pointer. - void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + if (auto FnAST = ParseTopLevelExpr()) { + if (FnAST->codegen()) { - // Cast it to the right type (takes no arguments, returns a double) so we - // can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)FPtr; + // JIT the module containing the anonymous expression, keeping a handle so + // we can free it later. + auto H = TheJIT->addModule(std::move(TheModule)); + InitializeModuleAndPassManager(); + + // Search the JIT for the __anon_expr symbol. + auto ExprSymbol = TheJIT->findSymbol("__anon_expr"); + assert(ExprSymbol && "Function not found"); + + // Get the symbol's address and cast it to the right type (takes no + // arguments, returns a double) so we can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); fprintf(stderr, "Evaluated to %f\n", FP()); + + // Delete the anonymous expression module from the JIT. + TheJIT->removeModule(H); } } else { // Skip token for error recovery. @@ -1137,9 +1166,9 @@ static void MainLoop() { switch (CurTok) { case tok_eof: return; - case ';': + case ';': // ignore top-level semicolons. getNextToken(); - break; // ignore top-level semicolons. + break; case tok_def: HandleDefinition(); break; @@ -1159,13 +1188,13 @@ static void MainLoop() { /// putchard - putchar that takes a double and returns 0. extern "C" double putchard(double X) { - putchar((char)X); + fputc((char)X, stderr); return 0; } /// printd - printf that takes a double prints it as "%f\n", returning 0. extern "C" double printd(double X) { - printf("%f\n", X); + fprintf(stderr, "%f\n", X); return 0; } @@ -1177,7 +1206,6 @@ int main() { InitializeNativeTarget(); InitializeNativeTargetAsmPrinter(); InitializeNativeTargetAsmParser(); - LLVMContext &Context = getGlobalContext(); // Install standard binary operators. // 1 is lowest precedence. @@ -1191,52 +1219,12 @@ int main() { fprintf(stderr, "ready> "); getNextToken(); - // Make the module, which holds all the code. - std::unique_ptr Owner = make_unique("my cool jit", Context); - TheModule = Owner.get(); + TheJIT = llvm::make_unique(); - // Create the JIT. This takes ownership of the module. - std::string ErrStr; - TheExecutionEngine = - EngineBuilder(std::move(Owner)) - .setErrorStr(&ErrStr) - .setMCJITMemoryManager(llvm::make_unique()) - .create(); - if (!TheExecutionEngine) { - fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); - exit(1); - } - - legacy::FunctionPassManager OurFPM(TheModule); - - // Set up the optimizer pipeline. Start with registering info about how the - // target lays out data structures. - TheModule->setDataLayout(*TheExecutionEngine->getDataLayout()); - // Provide basic AliasAnalysis support for GVN. - OurFPM.add(createBasicAliasAnalysisPass()); - // Promote allocas to registers. - OurFPM.add(createPromoteMemoryToRegisterPass()); - // Do simple "peephole" optimizations and bit-twiddling optzns. - OurFPM.add(createInstructionCombiningPass()); - // Reassociate expressions. - OurFPM.add(createReassociatePass()); - // Eliminate Common SubExpressions. - OurFPM.add(createGVNPass()); - // Simplify the control flow graph (deleting unreachable blocks, etc). - OurFPM.add(createCFGSimplificationPass()); - - OurFPM.doInitialization(); - - // Set the global so the code gen can use this. - TheFPM = &OurFPM; + InitializeModuleAndPassManager(); // Run the main "interpreter loop" now. MainLoop(); - TheFPM = 0; - - // Print out all of the generated code. - TheModule->dump(); - return 0; } diff --git a/examples/Kaleidoscope/Chapter8/CMakeLists.txt b/examples/Kaleidoscope/Chapter8/CMakeLists.txt index f94ed7436189..d9b5cc421be3 100644 --- a/examples/Kaleidoscope/Chapter8/CMakeLists.txt +++ b/examples/Kaleidoscope/Chapter8/CMakeLists.txt @@ -1,8 +1,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine - MCJIT - RuntimeDyld + Object Support native ) @@ -10,3 +9,5 @@ set(LLVM_LINK_COMPONENTS add_kaleidoscope_chapter(Kaleidoscope-Ch8 toy.cpp ) + +export_executable_symbols(Kaleidoscope-Ch8) diff --git a/examples/Kaleidoscope/Chapter8/toy.cpp b/examples/Kaleidoscope/Chapter8/toy.cpp index 71bc2f684027..289209b3df49 100644 --- a/examples/Kaleidoscope/Chapter8/toy.cpp +++ b/examples/Kaleidoscope/Chapter8/toy.cpp @@ -1,27 +1,23 @@ #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/Passes.h" -#include "llvm/ExecutionEngine/ExecutionEngine.h" -#include "llvm/ExecutionEngine/MCJIT.h" -#include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" -#include "llvm/Support/Host.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Transforms/Scalar.h" #include #include -#include #include #include #include +#include "../include/KaleidoscopeJIT.h" + using namespace llvm; +using namespace llvm::orc; //===----------------------------------------------------------------------===// // Lexer @@ -96,20 +92,17 @@ struct DebugInfo { DICompileUnit *TheCU; DIType *DblTy; std::vector LexicalBlocks; - std::map FnScopeMap; void emitLocation(ExprAST *AST); DIType *getDoubleTy(); } KSDbgInfo; -static std::string IdentifierStr; // Filled in if tok_identifier -static double NumVal; // Filled in if tok_number struct SourceLocation { int Line; int Col; }; static SourceLocation CurLoc; -static SourceLocation LexLoc = { 1, 0 }; +static SourceLocation LexLoc = {1, 0}; static int advance() { int LastChar = getchar(); @@ -122,6 +115,9 @@ static int advance() { return LastChar; } +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + /// gettok - Return the next token from standard input. static int gettok() { static int LastChar = ' '; @@ -167,7 +163,7 @@ static int gettok() { LastChar = advance(); } while (isdigit(LastChar) || LastChar == '.'); - NumVal = strtod(NumStr.c_str(), 0); + NumVal = strtod(NumStr.c_str(), nullptr); return tok_number; } @@ -196,7 +192,7 @@ static int gettok() { //===----------------------------------------------------------------------===// namespace { -std::ostream &indent(std::ostream &O, int size) { +raw_ostream &indent(raw_ostream &O, int size) { return O << std::string(size, ' '); } @@ -205,14 +201,14 @@ class ExprAST { SourceLocation Loc; public: + ExprAST(SourceLocation Loc = CurLoc) : Loc(Loc) {} + virtual ~ExprAST() {} + virtual Value *codegen() = 0; int getLine() const { return Loc.Line; } int getCol() const { return Loc.Col; } - ExprAST(SourceLocation Loc = CurLoc) : Loc(Loc) {} - virtual std::ostream &dump(std::ostream &out, int ind) { + virtual raw_ostream &dump(raw_ostream &out, int ind) { return out << ':' << getLine() << ':' << getCol() << '\n'; } - virtual ~ExprAST() {} - virtual Value *Codegen() = 0; }; /// NumberExprAST - Expression class for numeric literals like "1.0". @@ -220,11 +216,11 @@ class NumberExprAST : public ExprAST { double Val; public: - NumberExprAST(double val) : Val(val) {} - std::ostream &dump(std::ostream &out, int ind) override { + NumberExprAST(double Val) : Val(Val) {} + raw_ostream &dump(raw_ostream &out, int ind) override { return ExprAST::dump(out << Val, ind); } - Value *Codegen() override; + Value *codegen() override; }; /// VariableExprAST - Expression class for referencing a variable, like "a". @@ -232,93 +228,99 @@ class VariableExprAST : public ExprAST { std::string Name; public: - VariableExprAST(SourceLocation Loc, const std::string &name) - : ExprAST(Loc), Name(name) {} + VariableExprAST(SourceLocation Loc, const std::string &Name) + : ExprAST(Loc), Name(Name) {} const std::string &getName() const { return Name; } - std::ostream &dump(std::ostream &out, int ind) override { + Value *codegen() override; + raw_ostream &dump(raw_ostream &out, int ind) override { return ExprAST::dump(out << Name, ind); } - Value *Codegen() override; }; /// UnaryExprAST - Expression class for a unary operator. class UnaryExprAST : public ExprAST { char Opcode; - ExprAST *Operand; + std::unique_ptr Operand; public: - UnaryExprAST(char opcode, ExprAST *operand) - : Opcode(opcode), Operand(operand) {} - std::ostream &dump(std::ostream &out, int ind) override { + UnaryExprAST(char Opcode, std::unique_ptr Operand) + : Opcode(Opcode), Operand(std::move(Operand)) {} + Value *codegen() override; + raw_ostream &dump(raw_ostream &out, int ind) override { ExprAST::dump(out << "unary" << Opcode, ind); Operand->dump(out, ind + 1); return out; } - Value *Codegen() override; }; /// BinaryExprAST - Expression class for a binary operator. class BinaryExprAST : public ExprAST { char Op; - ExprAST *LHS, *RHS; + std::unique_ptr LHS, RHS; public: - BinaryExprAST(SourceLocation Loc, char op, ExprAST *lhs, ExprAST *rhs) - : ExprAST(Loc), Op(op), LHS(lhs), RHS(rhs) {} - std::ostream &dump(std::ostream &out, int ind) override { + BinaryExprAST(SourceLocation Loc, char Op, std::unique_ptr LHS, + std::unique_ptr RHS) + : ExprAST(Loc), Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} + Value *codegen() override; + raw_ostream &dump(raw_ostream &out, int ind) override { ExprAST::dump(out << "binary" << Op, ind); LHS->dump(indent(out, ind) << "LHS:", ind + 1); RHS->dump(indent(out, ind) << "RHS:", ind + 1); return out; } - Value *Codegen() override; }; /// CallExprAST - Expression class for function calls. class CallExprAST : public ExprAST { std::string Callee; - std::vector Args; + std::vector> Args; public: - CallExprAST(SourceLocation Loc, const std::string &callee, - std::vector &args) - : ExprAST(Loc), Callee(callee), Args(args) {} - std::ostream &dump(std::ostream &out, int ind) override { + CallExprAST(SourceLocation Loc, const std::string &Callee, + std::vector> Args) + : ExprAST(Loc), Callee(Callee), Args(std::move(Args)) {} + Value *codegen() override; + raw_ostream &dump(raw_ostream &out, int ind) override { ExprAST::dump(out << "call " << Callee, ind); - for (ExprAST *Arg : Args) + for (const auto &Arg : Args) Arg->dump(indent(out, ind + 1), ind + 1); return out; } - Value *Codegen() override; }; /// IfExprAST - Expression class for if/then/else. class IfExprAST : public ExprAST { - ExprAST *Cond, *Then, *Else; + std::unique_ptr Cond, Then, Else; public: - IfExprAST(SourceLocation Loc, ExprAST *cond, ExprAST *then, ExprAST *_else) - : ExprAST(Loc), Cond(cond), Then(then), Else(_else) {} - std::ostream &dump(std::ostream &out, int ind) override { + IfExprAST(SourceLocation Loc, std::unique_ptr Cond, + std::unique_ptr Then, std::unique_ptr Else) + : ExprAST(Loc), Cond(std::move(Cond)), Then(std::move(Then)), + Else(std::move(Else)) {} + Value *codegen() override; + raw_ostream &dump(raw_ostream &out, int ind) override { ExprAST::dump(out << "if", ind); Cond->dump(indent(out, ind) << "Cond:", ind + 1); Then->dump(indent(out, ind) << "Then:", ind + 1); Else->dump(indent(out, ind) << "Else:", ind + 1); return out; } - Value *Codegen() override; }; /// ForExprAST - Expression class for for/in. class ForExprAST : public ExprAST { std::string VarName; - ExprAST *Start, *End, *Step, *Body; + std::unique_ptr Start, End, Step, Body; public: - ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, - ExprAST *step, ExprAST *body) - : VarName(varname), Start(start), End(end), Step(step), Body(body) {} - std::ostream &dump(std::ostream &out, int ind) override { + ForExprAST(const std::string &VarName, std::unique_ptr Start, + std::unique_ptr End, std::unique_ptr Step, + std::unique_ptr Body) + : VarName(VarName), Start(std::move(Start)), End(std::move(End)), + Step(std::move(Step)), Body(std::move(Body)) {} + Value *codegen() override; + raw_ostream &dump(raw_ostream &out, int ind) override { ExprAST::dump(out << "for", ind); Start->dump(indent(out, ind) << "Cond:", ind + 1); End->dump(indent(out, ind) << "End:", ind + 1); @@ -326,47 +328,49 @@ public: Body->dump(indent(out, ind) << "Body:", ind + 1); return out; } - Value *Codegen() override; }; /// VarExprAST - Expression class for var/in class VarExprAST : public ExprAST { - std::vector > VarNames; - ExprAST *Body; + std::vector>> VarNames; + std::unique_ptr Body; public: - VarExprAST(const std::vector > &varnames, - ExprAST *body) - : VarNames(varnames), Body(body) {} - - std::ostream &dump(std::ostream &out, int ind) override { + VarExprAST( + std::vector>> VarNames, + std::unique_ptr Body) + : VarNames(std::move(VarNames)), Body(std::move(Body)) {} + Value *codegen() override; + raw_ostream &dump(raw_ostream &out, int ind) override { ExprAST::dump(out << "var", ind); for (const auto &NamedVar : VarNames) NamedVar.second->dump(indent(out, ind) << NamedVar.first << ':', ind + 1); Body->dump(indent(out, ind) << "Body:", ind + 1); return out; } - Value *Codegen() override; }; /// PrototypeAST - This class represents the "prototype" for a function, -/// which captures its argument names as well as if it is an operator. +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes), as well as if it is an operator. class PrototypeAST { std::string Name; std::vector Args; - bool isOperator; + bool IsOperator; unsigned Precedence; // Precedence if a binary op. int Line; public: - PrototypeAST(SourceLocation Loc, const std::string &name, - const std::vector &args, bool isoperator = false, - unsigned prec = 0) - : Name(name), Args(args), isOperator(isoperator), Precedence(prec), - Line(Loc.Line) {} + PrototypeAST(SourceLocation Loc, const std::string &Name, + std::vector Args, bool IsOperator = false, + unsigned Prec = 0) + : Name(Name), Args(std::move(Args)), IsOperator(IsOperator), + Precedence(Prec), Line(Loc.Line) {} + Function *codegen(); + const std::string &getName() const { return Name; } - bool isUnaryOp() const { return isOperator && Args.size() == 1; } - bool isBinaryOp() const { return isOperator && Args.size() == 2; } + bool isUnaryOp() const { return IsOperator && Args.size() == 1; } + bool isBinaryOp() const { return IsOperator && Args.size() == 2; } char getOperatorName() const { assert(isUnaryOp() || isBinaryOp()); @@ -374,29 +378,25 @@ public: } unsigned getBinaryPrecedence() const { return Precedence; } - - Function *Codegen(); - - void CreateArgumentAllocas(Function *F); - const std::vector &getArgs() const { return Args; } + int getLine() const { return Line; } }; /// FunctionAST - This class represents a function definition itself. class FunctionAST { - PrototypeAST *Proto; - ExprAST *Body; + std::unique_ptr Proto; + std::unique_ptr Body; public: - FunctionAST(PrototypeAST *proto, ExprAST *body) : Proto(proto), Body(body) {} - - std::ostream &dump(std::ostream &out, int ind) { + FunctionAST(std::unique_ptr Proto, + std::unique_ptr Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} + Function *codegen(); + raw_ostream &dump(raw_ostream &out, int ind) { indent(out, ind) << "FunctionAST\n"; ++ind; indent(out, ind) << "Body:"; return Body ? Body->dump(out, ind) : out << "null\n"; } - - Function *Codegen(); }; } // end anonymous namespace @@ -427,25 +427,42 @@ static int GetTokPrecedence() { } /// Error* - These are little helper functions for error handling. -ExprAST *Error(const char *Str) { +std::unique_ptr Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str); - return 0; -} -PrototypeAST *ErrorP(const char *Str) { - Error(Str); - return 0; -} -FunctionAST *ErrorF(const char *Str) { - Error(Str); - return 0; + return nullptr; } -static ExprAST *ParseExpression(); +std::unique_ptr ErrorP(const char *Str) { + Error(Str); + return nullptr; +} + +static std::unique_ptr ParseExpression(); + +/// numberexpr ::= number +static std::unique_ptr ParseNumberExpr() { + auto Result = llvm::make_unique(NumVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// parenexpr ::= '(' expression ')' +static std::unique_ptr ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} /// identifierexpr /// ::= identifier /// ::= identifier '(' expression* ')' -static ExprAST *ParseIdentifierExpr() { +static std::unique_ptr ParseIdentifierExpr() { std::string IdName = IdentifierStr; SourceLocation LitLoc = CurLoc; @@ -453,17 +470,17 @@ static ExprAST *ParseIdentifierExpr() { getNextToken(); // eat identifier. if (CurTok != '(') // Simple variable ref. - return new VariableExprAST(LitLoc, IdName); + return llvm::make_unique(LitLoc, IdName); // Call. getNextToken(); // eat ( - std::vector Args; + std::vector> Args; if (CurTok != ')') { while (1) { - ExprAST *Arg = ParseExpression(); - if (!Arg) - return 0; - Args.push_back(Arg); + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; if (CurTok == ')') break; @@ -477,62 +494,43 @@ static ExprAST *ParseIdentifierExpr() { // Eat the ')'. getNextToken(); - return new CallExprAST(LitLoc, IdName, Args); -} - -/// numberexpr ::= number -static ExprAST *ParseNumberExpr() { - ExprAST *Result = new NumberExprAST(NumVal); - getNextToken(); // consume the number - return Result; -} - -/// parenexpr ::= '(' expression ')' -static ExprAST *ParseParenExpr() { - getNextToken(); // eat (. - ExprAST *V = ParseExpression(); - if (!V) - return 0; - - if (CurTok != ')') - return Error("expected ')'"); - getNextToken(); // eat ). - return V; + return llvm::make_unique(LitLoc, IdName, std::move(Args)); } /// ifexpr ::= 'if' expression 'then' expression 'else' expression -static ExprAST *ParseIfExpr() { +static std::unique_ptr ParseIfExpr() { SourceLocation IfLoc = CurLoc; getNextToken(); // eat the if. // condition. - ExprAST *Cond = ParseExpression(); + auto Cond = ParseExpression(); if (!Cond) - return 0; + return nullptr; if (CurTok != tok_then) return Error("expected then"); getNextToken(); // eat the then - ExprAST *Then = ParseExpression(); - if (Then == 0) - return 0; + auto Then = ParseExpression(); + if (!Then) + return nullptr; if (CurTok != tok_else) return Error("expected else"); getNextToken(); - ExprAST *Else = ParseExpression(); + auto Else = ParseExpression(); if (!Else) - return 0; + return nullptr; - return new IfExprAST(IfLoc, Cond, Then, Else); + return llvm::make_unique(IfLoc, std::move(Cond), std::move(Then), + std::move(Else)); } /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression -static ExprAST *ParseForExpr() { +static std::unique_ptr ParseForExpr() { getNextToken(); // eat the for. if (CurTok != tok_identifier) @@ -545,43 +543,44 @@ static ExprAST *ParseForExpr() { return Error("expected '=' after for"); getNextToken(); // eat '='. - ExprAST *Start = ParseExpression(); - if (Start == 0) - return 0; + auto Start = ParseExpression(); + if (!Start) + return nullptr; if (CurTok != ',') return Error("expected ',' after for start value"); getNextToken(); - ExprAST *End = ParseExpression(); - if (End == 0) - return 0; + auto End = ParseExpression(); + if (!End) + return nullptr; // The step value is optional. - ExprAST *Step = 0; + std::unique_ptr Step; if (CurTok == ',') { getNextToken(); Step = ParseExpression(); - if (Step == 0) - return 0; + if (!Step) + return nullptr; } if (CurTok != tok_in) return Error("expected 'in' after for"); getNextToken(); // eat 'in'. - ExprAST *Body = ParseExpression(); - if (Body == 0) - return 0; + auto Body = ParseExpression(); + if (!Body) + return nullptr; - return new ForExprAST(IdName, Start, End, Step, Body); + return llvm::make_unique(IdName, std::move(Start), std::move(End), + std::move(Step), std::move(Body)); } /// varexpr ::= 'var' identifier ('=' expression)? // (',' identifier ('=' expression)?)* 'in' expression -static ExprAST *ParseVarExpr() { +static std::unique_ptr ParseVarExpr() { getNextToken(); // eat the var. - std::vector > VarNames; + std::vector>> VarNames; // At least one variable name is required. if (CurTok != tok_identifier) @@ -592,16 +591,16 @@ static ExprAST *ParseVarExpr() { getNextToken(); // eat identifier. // Read the optional initializer. - ExprAST *Init = 0; + std::unique_ptr Init = nullptr; if (CurTok == '=') { getNextToken(); // eat the '='. Init = ParseExpression(); - if (Init == 0) - return 0; + if (!Init) + return nullptr; } - VarNames.push_back(std::make_pair(Name, Init)); + VarNames.push_back(std::make_pair(Name, std::move(Init))); // End of var list, exit loop. if (CurTok != ',') @@ -617,11 +616,11 @@ static ExprAST *ParseVarExpr() { return Error("expected 'in' keyword after 'var'"); getNextToken(); // eat 'in'. - ExprAST *Body = ParseExpression(); - if (Body == 0) - return 0; + auto Body = ParseExpression(); + if (!Body) + return nullptr; - return new VarExprAST(VarNames, Body); + return llvm::make_unique(std::move(VarNames), std::move(Body)); } /// primary @@ -631,7 +630,7 @@ static ExprAST *ParseVarExpr() { /// ::= ifexpr /// ::= forexpr /// ::= varexpr -static ExprAST *ParsePrimary() { +static std::unique_ptr ParsePrimary() { switch (CurTok) { default: return Error("unknown token when expecting an expression"); @@ -653,7 +652,7 @@ static ExprAST *ParsePrimary() { /// unary /// ::= primary /// ::= '!' unary -static ExprAST *ParseUnary() { +static std::unique_ptr ParseUnary() { // If the current token is not an operator, it must be a primary expr. if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') return ParsePrimary(); @@ -661,14 +660,15 @@ static ExprAST *ParseUnary() { // If this is a unary operator, read it. int Opc = CurTok; getNextToken(); - if (ExprAST *Operand = ParseUnary()) - return new UnaryExprAST(Opc, Operand); - return 0; + if (auto Operand = ParseUnary()) + return llvm::make_unique(Opc, std::move(Operand)); + return nullptr; } /// binoprhs /// ::= ('+' unary)* -static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { +static std::unique_ptr ParseBinOpRHS(int ExprPrec, + std::unique_ptr LHS) { // If this is a binop, find its precedence. while (1) { int TokPrec = GetTokPrecedence(); @@ -684,40 +684,41 @@ static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { getNextToken(); // eat binop // Parse the unary expression after the binary operator. - ExprAST *RHS = ParseUnary(); + auto RHS = ParseUnary(); if (!RHS) - return 0; + return nullptr; // If BinOp binds less tightly with RHS than the operator after RHS, let // the pending operator take RHS as its LHS. int NextPrec = GetTokPrecedence(); if (TokPrec < NextPrec) { - RHS = ParseBinOpRHS(TokPrec + 1, RHS); - if (RHS == 0) - return 0; + RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); + if (!RHS) + return nullptr; } // Merge LHS/RHS. - LHS = new BinaryExprAST(BinLoc, BinOp, LHS, RHS); + LHS = llvm::make_unique(BinLoc, BinOp, std::move(LHS), + std::move(RHS)); } } /// expression /// ::= unary binoprhs /// -static ExprAST *ParseExpression() { - ExprAST *LHS = ParseUnary(); +static std::unique_ptr ParseExpression() { + auto LHS = ParseUnary(); if (!LHS) - return 0; + return nullptr; - return ParseBinOpRHS(0, LHS); + return ParseBinOpRHS(0, std::move(LHS)); } /// prototype /// ::= id '(' id* ')' /// ::= binary LETTER number? (id, id) /// ::= unary LETTER (id) -static PrototypeAST *ParsePrototype() { +static std::unique_ptr ParsePrototype() { std::string FnName; SourceLocation FnLoc = CurLoc; @@ -777,35 +778,36 @@ static PrototypeAST *ParsePrototype() { if (Kind && ArgNames.size() != Kind) return ErrorP("Invalid number of operands for operator"); - return new PrototypeAST(FnLoc, FnName, ArgNames, Kind != 0, BinaryPrecedence); + return llvm::make_unique(FnLoc, FnName, ArgNames, Kind != 0, + BinaryPrecedence); } /// definition ::= 'def' prototype expression -static FunctionAST *ParseDefinition() { +static std::unique_ptr ParseDefinition() { getNextToken(); // eat def. - PrototypeAST *Proto = ParsePrototype(); - if (Proto == 0) - return 0; + auto Proto = ParsePrototype(); + if (!Proto) + return nullptr; - if (ExprAST *E = ParseExpression()) - return new FunctionAST(Proto, E); - return 0; + if (auto E = ParseExpression()) + return llvm::make_unique(std::move(Proto), std::move(E)); + return nullptr; } /// toplevelexpr ::= expression -static FunctionAST *ParseTopLevelExpr() { +static std::unique_ptr ParseTopLevelExpr() { SourceLocation FnLoc = CurLoc; - if (ExprAST *E = ParseExpression()) { + if (auto E = ParseExpression()) { // Make an anonymous proto. - PrototypeAST *Proto = - new PrototypeAST(FnLoc, "main", std::vector()); - return new FunctionAST(Proto, E); + auto Proto = llvm::make_unique(FnLoc, "__anon_expr", + std::vector()); + return llvm::make_unique(std::move(Proto), std::move(E)); } - return 0; + return nullptr; } /// external ::= 'extern' prototype -static PrototypeAST *ParseExtern() { +static std::unique_ptr ParseExtern() { getNextToken(); // eat extern. return ParsePrototype(); } @@ -814,7 +816,7 @@ static PrototypeAST *ParseExtern() { // Debug Info Support //===----------------------------------------------------------------------===// -static DIBuilder *DBuilder; +static std::unique_ptr DBuilder; DIType *DebugInfo::getDoubleTy() { if (DblTy) @@ -846,21 +848,36 @@ static DISubroutineType *CreateFunctionType(unsigned NumArgs, DIFile *Unit) { for (unsigned i = 0, e = NumArgs; i != e; ++i) EltTys.push_back(DblTy); - return DBuilder->createSubroutineType(Unit, - DBuilder->getOrCreateTypeArray(EltTys)); + return DBuilder->createSubroutineType(DBuilder->getOrCreateTypeArray(EltTys)); } //===----------------------------------------------------------------------===// // Code Generation //===----------------------------------------------------------------------===// -static Module *TheModule; +static std::unique_ptr TheModule; static std::map NamedValues; -static legacy::FunctionPassManager *TheFPM; +static std::unique_ptr TheJIT; +static std::map> FunctionProtos; Value *ErrorV(const char *Str) { Error(Str); - return 0; + return nullptr; +} + +Function *getFunction(std::string Name) { + // First, see if the function has already been added to the current module. + if (auto *F = TheModule->getFunction(Name)) + return F; + + // If not, check whether we can codegen the declaration from some existing + // prototype. + auto FI = FunctionProtos.find(Name); + if (FI != FunctionProtos.end()) + return FI->second->codegen(); + + // If no existing prototype exists, return null. + return nullptr; } /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of @@ -869,19 +886,19 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, const std::string &VarName) { IRBuilder<> TmpB(&TheFunction->getEntryBlock(), TheFunction->getEntryBlock().begin()); - return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0, + return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr, VarName.c_str()); } -Value *NumberExprAST::Codegen() { +Value *NumberExprAST::codegen() { KSDbgInfo.emitLocation(this); return ConstantFP::get(getGlobalContext(), APFloat(Val)); } -Value *VariableExprAST::Codegen() { +Value *VariableExprAST::codegen() { // Look this variable up in the function. Value *V = NamedValues[Name]; - if (V == 0) + if (!V) return ErrorV("Unknown variable name"); KSDbgInfo.emitLocation(this); @@ -889,20 +906,20 @@ Value *VariableExprAST::Codegen() { return Builder.CreateLoad(V, Name.c_str()); } -Value *UnaryExprAST::Codegen() { - Value *OperandV = Operand->Codegen(); - if (OperandV == 0) - return 0; +Value *UnaryExprAST::codegen() { + Value *OperandV = Operand->codegen(); + if (!OperandV) + return nullptr; - Function *F = TheModule->getFunction(std::string("unary") + Opcode); - if (F == 0) + Function *F = getFunction(std::string("unary") + Opcode); + if (!F) return ErrorV("Unknown unary operator"); KSDbgInfo.emitLocation(this); return Builder.CreateCall(F, OperandV, "unop"); } -Value *BinaryExprAST::Codegen() { +Value *BinaryExprAST::codegen() { KSDbgInfo.emitLocation(this); // Special case '=' because we don't want to emit the LHS as an expression. @@ -911,27 +928,27 @@ Value *BinaryExprAST::Codegen() { // This assume we're building without RTTI because LLVM builds that way by // default. If you build LLVM with RTTI this can be changed to a // dynamic_cast for automatic error checking. - VariableExprAST *LHSE = static_cast(LHS); + VariableExprAST *LHSE = static_cast(LHS.get()); if (!LHSE) return ErrorV("destination of '=' must be a variable"); // Codegen the RHS. - Value *Val = RHS->Codegen(); - if (Val == 0) - return 0; + Value *Val = RHS->codegen(); + if (!Val) + return nullptr; // Look up the name. Value *Variable = NamedValues[LHSE->getName()]; - if (Variable == 0) + if (!Variable) return ErrorV("Unknown variable name"); Builder.CreateStore(Val, Variable); return Val; } - Value *L = LHS->Codegen(); - Value *R = RHS->Codegen(); - if (L == 0 || R == 0) - return 0; + Value *L = LHS->codegen(); + Value *R = RHS->codegen(); + if (!L || !R) + return nullptr; switch (Op) { case '+': @@ -951,19 +968,19 @@ Value *BinaryExprAST::Codegen() { // If it wasn't a builtin binary operator, it must be a user defined one. Emit // a call to it. - Function *F = TheModule->getFunction(std::string("binary") + Op); + Function *F = getFunction(std::string("binary") + Op); assert(F && "binary operator not found!"); - Value *Ops[] = { L, R }; + Value *Ops[] = {L, R}; return Builder.CreateCall(F, Ops, "binop"); } -Value *CallExprAST::Codegen() { +Value *CallExprAST::codegen() { KSDbgInfo.emitLocation(this); // Look up the name in the global module table. - Function *CalleeF = TheModule->getFunction(Callee); - if (CalleeF == 0) + Function *CalleeF = getFunction(Callee); + if (!CalleeF) return ErrorV("Unknown function referenced"); // If argument mismatch error. @@ -972,20 +989,20 @@ Value *CallExprAST::Codegen() { std::vector ArgsV; for (unsigned i = 0, e = Args.size(); i != e; ++i) { - ArgsV.push_back(Args[i]->Codegen()); - if (ArgsV.back() == 0) - return 0; + ArgsV.push_back(Args[i]->codegen()); + if (!ArgsV.back()) + return nullptr; } return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); } -Value *IfExprAST::Codegen() { +Value *IfExprAST::codegen() { KSDbgInfo.emitLocation(this); - Value *CondV = Cond->Codegen(); - if (CondV == 0) - return 0; + Value *CondV = Cond->codegen(); + if (!CondV) + return nullptr; // Convert condition to a bool by comparing equal to 0.0. CondV = Builder.CreateFCmpONE( @@ -1005,9 +1022,9 @@ Value *IfExprAST::Codegen() { // Emit then value. Builder.SetInsertPoint(ThenBB); - Value *ThenV = Then->Codegen(); - if (ThenV == 0) - return 0; + Value *ThenV = Then->codegen(); + if (!ThenV) + return nullptr; Builder.CreateBr(MergeBB); // Codegen of 'Then' can change the current block, update ThenBB for the PHI. @@ -1017,9 +1034,9 @@ Value *IfExprAST::Codegen() { TheFunction->getBasicBlockList().push_back(ElseBB); Builder.SetInsertPoint(ElseBB); - Value *ElseV = Else->Codegen(); - if (ElseV == 0) - return 0; + Value *ElseV = Else->codegen(); + if (!ElseV) + return nullptr; Builder.CreateBr(MergeBB); // Codegen of 'Else' can change the current block, update ElseBB for the PHI. @@ -1036,27 +1053,26 @@ Value *IfExprAST::Codegen() { return PN; } -Value *ForExprAST::Codegen() { - // Output this as: - // var = alloca double - // ... - // start = startexpr - // store start -> var - // goto loop - // loop: - // ... - // bodyexpr - // ... - // loopend: - // step = stepexpr - // endcond = endexpr - // - // curvar = load var - // nextvar = curvar + step - // store nextvar -> var - // br endcond, loop, endloop - // outloop: - +// Output for-loop as: +// var = alloca double +// ... +// start = startexpr +// store start -> var +// goto loop +// loop: +// ... +// bodyexpr +// ... +// loopend: +// step = stepexpr +// endcond = endexpr +// +// curvar = load var +// nextvar = curvar + step +// store nextvar -> var +// br endcond, loop, endloop +// outloop: +Value *ForExprAST::codegen() { Function *TheFunction = Builder.GetInsertBlock()->getParent(); // Create an alloca for the variable in the entry block. @@ -1065,9 +1081,9 @@ Value *ForExprAST::Codegen() { KSDbgInfo.emitLocation(this); // Emit the start code first, without 'variable' in scope. - Value *StartVal = Start->Codegen(); - if (StartVal == 0) - return 0; + Value *StartVal = Start->codegen(); + if (!StartVal) + return nullptr; // Store the value into the alloca. Builder.CreateStore(StartVal, Alloca); @@ -1091,24 +1107,24 @@ Value *ForExprAST::Codegen() { // Emit the body of the loop. This, like any other expr, can change the // current BB. Note that we ignore the value computed by the body, but don't // allow an error. - if (Body->Codegen() == 0) - return 0; + if (!Body->codegen()) + return nullptr; // Emit the step value. - Value *StepVal; + Value *StepVal = nullptr; if (Step) { - StepVal = Step->Codegen(); - if (StepVal == 0) - return 0; + StepVal = Step->codegen(); + if (!StepVal) + return nullptr; } else { // If not specified, use 1.0. StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); } // Compute the end condition. - Value *EndCond = End->Codegen(); - if (EndCond == 0) - return EndCond; + Value *EndCond = End->codegen(); + if (!EndCond) + return nullptr; // Reload, increment, and restore the alloca. This handles the case where // the body of the loop mutates the variable. @@ -1140,7 +1156,7 @@ Value *ForExprAST::Codegen() { return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); } -Value *VarExprAST::Codegen() { +Value *VarExprAST::codegen() { std::vector OldBindings; Function *TheFunction = Builder.GetInsertBlock()->getParent(); @@ -1148,7 +1164,7 @@ Value *VarExprAST::Codegen() { // Register all variables and emit their initializer. for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { const std::string &VarName = VarNames[i].first; - ExprAST *Init = VarNames[i].second; + ExprAST *Init = VarNames[i].second.get(); // Emit the initializer before adding the variable to scope, this prevents // the initializer from referencing the variable itself, and permits stuff @@ -1157,9 +1173,9 @@ Value *VarExprAST::Codegen() { // var a = a in ... # refers to outer 'a'. Value *InitVal; if (Init) { - InitVal = Init->Codegen(); - if (InitVal == 0) - return 0; + InitVal = Init->codegen(); + if (!InitVal) + return nullptr; } else { // If not specified, use 0.0. InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0)); } @@ -1178,9 +1194,9 @@ Value *VarExprAST::Codegen() { KSDbgInfo.emitLocation(this); // Codegen the body, now that all vars are in scope. - Value *BodyVal = Body->Codegen(); - if (BodyVal == 0) - return 0; + Value *BodyVal = Body->codegen(); + if (!BodyVal) + return nullptr; // Pop all our variables from scope. for (unsigned i = 0, e = VarNames.size(); i != e; ++i) @@ -1190,7 +1206,7 @@ Value *VarExprAST::Codegen() { return BodyVal; } -Function *PrototypeAST::Codegen() { +Function *PrototypeAST::codegen() { // Make the function type: double(double,double) etc. std::vector Doubles(Args.size(), Type::getDoubleTy(getGlobalContext())); @@ -1198,106 +1214,80 @@ Function *PrototypeAST::Codegen() { FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false); Function *F = - Function::Create(FT, Function::ExternalLinkage, Name, TheModule); - - // If F conflicted, there was already something named 'Name'. If it has a - // body, don't allow redefinition or reextern. - if (F->getName() != Name) { - // Delete the one we just made and get the existing one. - F->eraseFromParent(); - F = TheModule->getFunction(Name); - - // If F already has a body, reject this. - if (!F->empty()) { - ErrorF("redefinition of function"); - return 0; - } - - // If F took a different number of args, reject. - if (F->arg_size() != Args.size()) { - ErrorF("redefinition of function with different # args"); - return 0; - } - } + Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); // Set names for all arguments. unsigned Idx = 0; - for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); - ++AI, ++Idx) - AI->setName(Args[Idx]); + for (auto &Arg : F->args()) + Arg.setName(Args[Idx++]); + + return F; +} + +Function *FunctionAST::codegen() { + // Transfer ownership of the prototype to the FunctionProtos map, but keep a + // reference to it for use below. + auto &P = *Proto; + FunctionProtos[Proto->getName()] = std::move(Proto); + Function *TheFunction = getFunction(P.getName()); + if (!TheFunction) + return nullptr; + + // If this is an operator, install it. + if (P.isBinaryOp()) + BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); // Create a subprogram DIE for this function. DIFile *Unit = DBuilder->createFile(KSDbgInfo.TheCU->getFilename(), KSDbgInfo.TheCU->getDirectory()); DIScope *FContext = Unit; - unsigned LineNo = Line; - unsigned ScopeLine = Line; + unsigned LineNo = P.getLine(); + unsigned ScopeLine = LineNo; DISubprogram *SP = DBuilder->createFunction( - FContext, Name, StringRef(), Unit, LineNo, - CreateFunctionType(Args.size(), Unit), false /* internal linkage */, - true /* definition */, ScopeLine, DINode::FlagPrototyped, false, F); - - KSDbgInfo.FnScopeMap[this] = SP; - return F; -} - -/// CreateArgumentAllocas - Create an alloca for each argument and register the -/// argument in the symbol table so that references to it will succeed. -void PrototypeAST::CreateArgumentAllocas(Function *F) { - Function::arg_iterator AI = F->arg_begin(); - for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) { - // Create an alloca for this variable. - AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]); - - // Create a debug descriptor for the variable. - DIScope *Scope = KSDbgInfo.LexicalBlocks.back(); - DIFile *Unit = DBuilder->createFile(KSDbgInfo.TheCU->getFilename(), - KSDbgInfo.TheCU->getDirectory()); - DILocalVariable *D = DBuilder->createLocalVariable( - dwarf::DW_TAG_arg_variable, Scope, Args[Idx], Unit, Line, - KSDbgInfo.getDoubleTy(), Idx); - - DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(), - DebugLoc::get(Line, 0, Scope), - Builder.GetInsertBlock()); - - // Store the initial value into the alloca. - Builder.CreateStore(AI, Alloca); - - // Add arguments to variable symbol table. - NamedValues[Args[Idx]] = Alloca; - } -} - -Function *FunctionAST::Codegen() { - NamedValues.clear(); - - Function *TheFunction = Proto->Codegen(); - if (TheFunction == 0) - return 0; + FContext, P.getName(), StringRef(), Unit, LineNo, + CreateFunctionType(TheFunction->arg_size(), Unit), + false /* internal linkage */, true /* definition */, ScopeLine, + DINode::FlagPrototyped, false); + TheFunction->setSubprogram(SP); // Push the current scope. - KSDbgInfo.LexicalBlocks.push_back(KSDbgInfo.FnScopeMap[Proto]); + KSDbgInfo.LexicalBlocks.push_back(SP); // Unset the location for the prologue emission (leading instructions with no // location in a function are considered part of the prologue and the debugger // will run past them when breaking on a function) KSDbgInfo.emitLocation(nullptr); - // If this is an operator, install it. - if (Proto->isBinaryOp()) - BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence(); + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + unsigned ArgIdx = 0; + for (auto &Arg : TheFunction->args()) { + // Create an alloca for this variable. + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); - // Create a new basic block to start insertion into. - BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); - Builder.SetInsertPoint(BB); + // Create a debug descriptor for the variable. + DILocalVariable *D = DBuilder->createParameterVariable( + SP, Arg.getName(), ++ArgIdx, Unit, LineNo, KSDbgInfo.getDoubleTy(), + true); - // Add all arguments to the symbol table and create their allocas. - Proto->CreateArgumentAllocas(TheFunction); + DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(), + DebugLoc::get(LineNo, 0, SP), + Builder.GetInsertBlock()); - KSDbgInfo.emitLocation(Body); + // Store the initial value into the alloca. + Builder.CreateStore(&Arg, Alloca); - if (Value *RetVal = Body->Codegen()) { + // Add arguments to variable symbol table. + NamedValues[Arg.getName()] = Alloca; + } + + KSDbgInfo.emitLocation(Body.get()); + + if (Value *RetVal = Body->codegen()) { // Finish off the function. Builder.CreateRet(RetVal); @@ -1307,36 +1297,36 @@ Function *FunctionAST::Codegen() { // Validate the generated code, checking for consistency. verifyFunction(*TheFunction); - // Optimize the function. - TheFPM->run(*TheFunction); - return TheFunction; } // Error reading body, remove function. TheFunction->eraseFromParent(); - if (Proto->isBinaryOp()) + if (P.isBinaryOp()) BinopPrecedence.erase(Proto->getOperatorName()); // Pop off the lexical block for the function since we added it // unconditionally. KSDbgInfo.LexicalBlocks.pop_back(); - return 0; + return nullptr; } //===----------------------------------------------------------------------===// // Top-Level parsing and JIT Driver //===----------------------------------------------------------------------===// -static ExecutionEngine *TheExecutionEngine; +static void InitializeModule() { + // Open a new module. + TheModule = llvm::make_unique("my cool jit", getGlobalContext()); + TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout()); +} static void HandleDefinition() { - if (FunctionAST *F = ParseDefinition()) { - if (!F->Codegen()) { + if (auto FnAST = ParseDefinition()) { + if (!FnAST->codegen()) fprintf(stderr, "Error reading function definition:"); - } } else { // Skip token for error recovery. getNextToken(); @@ -1344,10 +1334,11 @@ static void HandleDefinition() { } static void HandleExtern() { - if (PrototypeAST *P = ParseExtern()) { - if (!P->Codegen()) { + if (auto ProtoAST = ParseExtern()) { + if (!ProtoAST->codegen()) fprintf(stderr, "Error reading extern"); - } + else + FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); } else { // Skip token for error recovery. getNextToken(); @@ -1356,8 +1347,8 @@ static void HandleExtern() { static void HandleTopLevelExpression() { // Evaluate a top-level expression into an anonymous function. - if (FunctionAST *F = ParseTopLevelExpr()) { - if (!F->Codegen()) { + if (auto FnAST = ParseTopLevelExpr()) { + if (!FnAST->codegen()) { fprintf(stderr, "Error generating code for top level expr"); } } else { @@ -1372,9 +1363,9 @@ static void MainLoop() { switch (CurTok) { case tok_eof: return; - case ';': + case ';': // ignore top-level semicolons. getNextToken(); - break; // ignore top-level semicolons. + break; case tok_def: HandleDefinition(); break; @@ -1394,13 +1385,13 @@ static void MainLoop() { /// putchard - putchar that takes a double and returns 0. extern "C" double putchard(double X) { - putchar((char)X); + fputc((char)X, stderr); return 0; } /// printd - printf that takes a double prints it as "%f\n", returning 0. extern "C" double printd(double X) { - printf("%f\n", X); + fprintf(stderr, "%f\n", X); return 0; } @@ -1412,7 +1403,6 @@ int main() { InitializeNativeTarget(); InitializeNativeTargetAsmPrinter(); InitializeNativeTargetAsmParser(); - LLVMContext &Context = getGlobalContext(); // Install standard binary operators. // 1 is lowest precedence. @@ -1425,9 +1415,9 @@ int main() { // Prime the first token. getNextToken(); - // Make the module, which holds all the code. - std::unique_ptr Owner = make_unique("my cool jit", Context); - TheModule = Owner.get(); + TheJIT = llvm::make_unique(); + + InitializeModule(); // Add the current debug info version into the module. TheModule->addModuleFlag(Module::Warning, "Debug Info Version", @@ -1438,7 +1428,7 @@ int main() { TheModule->addModuleFlag(llvm::Module::Warning, "Dwarf Version", 2); // Construct the DIBuilder, we do this here because we need the module. - DBuilder = new DIBuilder(*TheModule); + DBuilder = llvm::make_unique(*TheModule); // Create the compile unit for the module. // Currently down as "fib.ks" as a filename since we're redirecting stdin @@ -1446,47 +1436,9 @@ int main() { KSDbgInfo.TheCU = DBuilder->createCompileUnit( dwarf::DW_LANG_C, "fib.ks", ".", "Kaleidoscope Compiler", 0, "", 0); - // Create the JIT. This takes ownership of the module. - std::string ErrStr; - TheExecutionEngine = - EngineBuilder(std::move(Owner)) - .setErrorStr(&ErrStr) - .setMCJITMemoryManager(llvm::make_unique()) - .create(); - if (!TheExecutionEngine) { - fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); - exit(1); - } - - legacy::FunctionPassManager OurFPM(TheModule); - - // Set up the optimizer pipeline. Start with registering info about how the - // target lays out data structures. - TheModule->setDataLayout(*TheExecutionEngine->getDataLayout()); -#if 0 - // Provide basic AliasAnalysis support for GVN. - OurFPM.add(createBasicAliasAnalysisPass()); - // Promote allocas to registers. - OurFPM.add(createPromoteMemoryToRegisterPass()); - // Do simple "peephole" optimizations and bit-twiddling optzns. - OurFPM.add(createInstructionCombiningPass()); - // Reassociate expressions. - OurFPM.add(createReassociatePass()); - // Eliminate Common SubExpressions. - OurFPM.add(createGVNPass()); - // Simplify the control flow graph (deleting unreachable blocks, etc). - OurFPM.add(createCFGSimplificationPass()); - #endif - OurFPM.doInitialization(); - - // Set the global so the code gen can use this. - TheFPM = &OurFPM; - // Run the main "interpreter loop" now. MainLoop(); - TheFPM = 0; - // Finalize the debug info. DBuilder->finalize(); diff --git a/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp b/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp index c9b2c6af5658..78184f5d32cd 100644 --- a/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp +++ b/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp @@ -39,14 +39,14 @@ enum Token { // primary tok_identifier = -4, tok_number = -5, - + // control tok_if = -6, tok_then = -7, tok_else = -8, tok_for = -9, tok_in = -10, - + // operators tok_binary = -11, tok_unary = -12, - + // var definition tok_var = -13 }; @@ -87,7 +87,7 @@ static int gettok() { LastChar = getchar(); } while (isdigit(LastChar) || LastChar == '.'); - NumVal = strtod(NumStr.c_str(), 0); + NumVal = strtod(NumStr.c_str(), nullptr); return tok_number; } @@ -95,11 +95,11 @@ static int gettok() { // Comment until end of line. do LastChar = getchar(); while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); - + if (LastChar != EOF) return gettok(); } - + // Check for end of file. Don't eat the EOF. if (LastChar == EOF) return tok_eof; @@ -140,7 +140,7 @@ struct VariableExprAST : public ExprAST { /// UnaryExprAST - Expression class for a unary operator. struct UnaryExprAST : public ExprAST { - UnaryExprAST(char Opcode, std::unique_ptr Operand) + UnaryExprAST(char Opcode, std::unique_ptr Operand) : Opcode(std::move(Opcode)), Operand(std::move(Operand)) {} Value *IRGen(IRGenContext &C) const override; @@ -152,7 +152,7 @@ struct UnaryExprAST : public ExprAST { /// BinaryExprAST - Expression class for a binary operator. struct BinaryExprAST : public ExprAST { BinaryExprAST(char Op, std::unique_ptr LHS, - std::unique_ptr RHS) + std::unique_ptr RHS) : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} Value *IRGen(IRGenContext &C) const override; @@ -224,7 +224,7 @@ struct PrototypeAST { bool isUnaryOp() const { return IsOperator && Args.size() == 1; } bool isBinaryOp() const { return IsOperator && Args.size() == 2; } - + char getOperatorName() const { assert(isUnaryOp() || isBinaryOp()); return Name[Name.size()-1]; @@ -268,7 +268,7 @@ static std::map BinopPrecedence; static int GetTokPrecedence() { if (!isascii(CurTok)) return -1; - + // Make sure it's a declared binop. int TokPrec = BinopPrecedence[CurTok]; if (TokPrec <= 0) return -1; @@ -294,12 +294,12 @@ static std::unique_ptr ParseExpression(); /// ::= identifier '(' expression* ')' static std::unique_ptr ParseIdentifierExpr() { std::string IdName = IdentifierStr; - + getNextToken(); // eat identifier. - + if (CurTok != '(') // Simple variable ref. return llvm::make_unique(IdName); - + // Call. getNextToken(); // eat ( std::vector> Args; @@ -319,7 +319,7 @@ static std::unique_ptr ParseIdentifierExpr() { // Eat the ')'. getNextToken(); - + return llvm::make_unique(IdName, std::move(Args)); } @@ -336,7 +336,7 @@ static std::unique_ptr ParseParenExpr() { auto V = ParseExpression(); if (!V) return nullptr; - + if (CurTok != ')') return ErrorU("expected ')'"); getNextToken(); // eat ). @@ -346,29 +346,29 @@ static std::unique_ptr ParseParenExpr() { /// ifexpr ::= 'if' expression 'then' expression 'else' expression static std::unique_ptr ParseIfExpr() { getNextToken(); // eat the if. - + // condition. auto Cond = ParseExpression(); if (!Cond) return nullptr; - + if (CurTok != tok_then) return ErrorU("expected then"); getNextToken(); // eat the then - + auto Then = ParseExpression(); if (!Then) return nullptr; - + if (CurTok != tok_else) return ErrorU("expected else"); - + getNextToken(); - + auto Else = ParseExpression(); if (!Else) return nullptr; - + return llvm::make_unique(std::move(Cond), std::move(Then), std::move(Else)); } @@ -379,26 +379,25 @@ static std::unique_ptr ParseForExpr() { if (CurTok != tok_identifier) return ErrorU("expected identifier after for"); - + std::string IdName = IdentifierStr; getNextToken(); // eat identifier. - + if (CurTok != '=') return ErrorU("expected '=' after for"); getNextToken(); // eat '='. - - + auto Start = ParseExpression(); if (!Start) return nullptr; if (CurTok != ',') return ErrorU("expected ',' after for start value"); getNextToken(); - + auto End = ParseExpression(); if (!End) return nullptr; - + // The step value is optional. std::unique_ptr Step; if (CurTok == ',') { @@ -407,11 +406,11 @@ static std::unique_ptr ParseForExpr() { if (!Step) return nullptr; } - + if (CurTok != tok_in) return ErrorU("expected 'in' after for"); getNextToken(); // eat 'in'. - + auto Body = ParseExpression(); if (Body) return nullptr; @@ -420,7 +419,7 @@ static std::unique_ptr ParseForExpr() { std::move(Step), std::move(Body)); } -/// varexpr ::= 'var' identifier ('=' expression)? +/// varexpr ::= 'var' identifier ('=' expression)? // (',' identifier ('=' expression)?)* 'in' expression static std::unique_ptr ParseVarExpr() { getNextToken(); // eat the var. @@ -430,7 +429,7 @@ static std::unique_ptr ParseVarExpr() { // At least one variable name is required. if (CurTok != tok_identifier) return ErrorU("expected identifier after var"); - + while (1) { std::string Name = IdentifierStr; getNextToken(); // eat identifier. @@ -439,31 +438,31 @@ static std::unique_ptr ParseVarExpr() { std::unique_ptr Init; if (CurTok == '=') { getNextToken(); // eat the '='. - + Init = ParseExpression(); if (!Init) return nullptr; } - + VarBindings.push_back(VarExprAST::Binding(Name, std::move(Init))); - + // End of var list, exit loop. if (CurTok != ',') break; getNextToken(); // eat the ','. - + if (CurTok != tok_identifier) return ErrorU("expected identifier list after var"); } - + // At this point, we have to have 'in'. if (CurTok != tok_in) return ErrorU("expected 'in' keyword after 'var'"); getNextToken(); // eat 'in'. - + auto Body = ParseExpression(); if (!Body) return nullptr; - + return llvm::make_unique(std::move(VarBindings), std::move(Body)); } @@ -493,7 +492,7 @@ static std::unique_ptr ParseUnary() { // If the current token is not an operator, it must be a primary expr. if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') return ParsePrimary(); - + // If this is a unary operator, read it. int Opc = CurTok; getNextToken(); @@ -509,21 +508,21 @@ static std::unique_ptr ParseBinOpRHS(int ExprPrec, // If this is a binop, find its precedence. while (1) { int TokPrec = GetTokPrecedence(); - + // If this is a binop that binds at least as tightly as the current binop, // consume it, otherwise we are done. if (TokPrec < ExprPrec) return LHS; - + // Okay, we know this is a binop. int BinOp = CurTok; getNextToken(); // eat binop - + // Parse the unary expression after the binary operator. auto RHS = ParseUnary(); if (!RHS) return nullptr; - + // If BinOp binds less tightly with RHS than the operator after RHS, let // the pending operator take RHS as its LHS. int NextPrec = GetTokPrecedence(); @@ -532,7 +531,7 @@ static std::unique_ptr ParseBinOpRHS(int ExprPrec, if (!RHS) return nullptr; } - + // Merge LHS/RHS. LHS = llvm::make_unique(BinOp, std::move(LHS), std::move(RHS)); } @@ -545,7 +544,7 @@ static std::unique_ptr ParseExpression() { auto LHS = ParseUnary(); if (!LHS) return nullptr; - + return ParseBinOpRHS(0, std::move(LHS)); } @@ -555,10 +554,10 @@ static std::unique_ptr ParseExpression() { /// ::= unary LETTER (id) static std::unique_ptr ParsePrototype() { std::string FnName; - + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. unsigned BinaryPrecedence = 30; - + switch (CurTok) { default: return ErrorU("Expected function name in prototype"); @@ -584,7 +583,7 @@ static std::unique_ptr ParsePrototype() { FnName += (char)CurTok; Kind = 2; getNextToken(); - + // Read the precedence if present. if (CurTok == tok_number) { if (NumVal < 1 || NumVal > 100) @@ -594,23 +593,23 @@ static std::unique_ptr ParsePrototype() { } break; } - + if (CurTok != '(') return ErrorU("Expected '(' in prototype"); - + std::vector ArgNames; while (getNextToken() == tok_identifier) ArgNames.push_back(IdentifierStr); if (CurTok != ')') return ErrorU("Expected ')' in prototype"); - + // success. getNextToken(); // eat ')'. - + // Verify right number of names for operator. if (Kind && ArgNames.size() != Kind) return ErrorU("Invalid number of operands for operator"); - + return llvm::make_unique(FnName, std::move(ArgNames), Kind != 0, BinaryPrecedence); } @@ -691,10 +690,10 @@ public: PrototypeAST* getPrototypeAST(const std::string &Name); private: typedef std::map> PrototypeMap; - + LLVMContext &Context; std::unique_ptr TM; - + PrototypeMap Prototypes; }; @@ -717,7 +716,7 @@ public: M(new Module(GenerateUniqueName("jit_module_"), Session.getLLVMContext())), Builder(Session.getLLVMContext()) { - M->setDataLayout(*Session.getTarget().getDataLayout()); + M->setDataLayout(Session.getTarget().createDataLayout()); } SessionContext& getSession() { return Session; } @@ -748,7 +747,7 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, const std::string &VarName) { IRBuilder<> TmpB(&TheFunction->getEntryBlock(), TheFunction->getEntryBlock().begin()); - return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0, + return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr, VarName.c_str()); } @@ -760,7 +759,7 @@ Value *VariableExprAST::IRGen(IRGenContext &C) const { // Look this variable up in the function. Value *V = C.NamedValues[Name]; - if (V == 0) + if (!V) return ErrorP("Unknown variable name '" + Name + "'"); // Load the value. @@ -783,7 +782,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const { // Special case '=' because we don't want to emit the LHS as an expression. if (Op == '=') { // Assignment requires the LHS to be an identifier. - auto LHSVar = static_cast(*LHS); + auto &LHSVar = static_cast(*LHS); // Codegen the RHS. Value *Val = RHS->IRGen(C); if (!Val) return nullptr; @@ -795,11 +794,11 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const { } return ErrorP("Unknown variable name"); } - + Value *L = LHS->IRGen(C); Value *R = RHS->IRGen(C); if (!L || !R) return nullptr; - + switch (Op) { case '+': return C.getBuilder().CreateFAdd(L, R, "addtmp"); case '-': return C.getBuilder().CreateFSub(L, R, "subtmp"); @@ -812,7 +811,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const { "booltmp"); default: break; } - + // If it wasn't a builtin binary operator, it must be a user defined one. Emit // a call to it. std::string FnName = MakeLegalFunctionName(std::string("binary")+Op); @@ -820,7 +819,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const { Value *Ops[] = { L, R }; return C.getBuilder().CreateCall(F, Ops, "binop"); } - + return ErrorP("Unknown binary operator"); } @@ -836,7 +835,7 @@ Value *CallExprAST::IRGen(IRGenContext &C) const { ArgsV.push_back(Args[i]->IRGen(C)); if (!ArgsV.back()) return nullptr; } - + return C.getBuilder().CreateCall(CalleeF, ArgsV, "calltmp"); } @@ -846,49 +845,49 @@ Value *CallExprAST::IRGen(IRGenContext &C) const { Value *IfExprAST::IRGen(IRGenContext &C) const { Value *CondV = Cond->IRGen(C); if (!CondV) return nullptr; - + // Convert condition to a bool by comparing equal to 0.0. - ConstantFP *FPZero = + ConstantFP *FPZero = ConstantFP::get(C.getLLVMContext(), APFloat(0.0)); CondV = C.getBuilder().CreateFCmpONE(CondV, FPZero, "ifcond"); - + Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent(); - + // Create blocks for the then and else cases. Insert the 'then' block at the // end of the function. BasicBlock *ThenBB = BasicBlock::Create(C.getLLVMContext(), "then", TheFunction); BasicBlock *ElseBB = BasicBlock::Create(C.getLLVMContext(), "else"); BasicBlock *MergeBB = BasicBlock::Create(C.getLLVMContext(), "ifcont"); - + C.getBuilder().CreateCondBr(CondV, ThenBB, ElseBB); - + // Emit then value. C.getBuilder().SetInsertPoint(ThenBB); - + Value *ThenV = Then->IRGen(C); if (!ThenV) return nullptr; - + C.getBuilder().CreateBr(MergeBB); // Codegen of 'Then' can change the current block, update ThenBB for the PHI. ThenBB = C.getBuilder().GetInsertBlock(); - + // Emit else block. TheFunction->getBasicBlockList().push_back(ElseBB); C.getBuilder().SetInsertPoint(ElseBB); - + Value *ElseV = Else->IRGen(C); if (!ElseV) return nullptr; - + C.getBuilder().CreateBr(MergeBB); // Codegen of 'Else' can change the current block, update ElseBB for the PHI. ElseBB = C.getBuilder().GetInsertBlock(); - + // Emit merge block. TheFunction->getBasicBlockList().push_back(MergeBB); C.getBuilder().SetInsertPoint(MergeBB); PHINode *PN = C.getBuilder().CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp"); - + PN->addIncoming(ThenV, ThenBB); PN->addIncoming(ElseV, ElseBB); return PN; @@ -901,7 +900,7 @@ Value *ForExprAST::IRGen(IRGenContext &C) const { // start = startexpr // store start -> var // goto loop - // loop: + // loop: // ... // bodyexpr // ... @@ -914,40 +913,40 @@ Value *ForExprAST::IRGen(IRGenContext &C) const { // store nextvar -> var // br endcond, loop, endloop // outloop: - + Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent(); // Create an alloca for the variable in the entry block. AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); - + // Emit the start code first, without 'variable' in scope. Value *StartVal = Start->IRGen(C); if (!StartVal) return nullptr; - + // Store the value into the alloca. C.getBuilder().CreateStore(StartVal, Alloca); - + // Make the new basic block for the loop header, inserting after current // block. BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); - + // Insert an explicit fall through from the current block to the LoopBB. C.getBuilder().CreateBr(LoopBB); // Start insertion in LoopBB. C.getBuilder().SetInsertPoint(LoopBB); - + // Within the loop, the variable is defined equal to the PHI node. If it // shadows an existing variable, we have to restore it, so save it now. AllocaInst *OldVal = C.NamedValues[VarName]; C.NamedValues[VarName] = Alloca; - + // Emit the body of the loop. This, like any other expr, can change the // current BB. Note that we ignore the value computed by the body, but don't // allow an error. if (!Body->IRGen(C)) return nullptr; - + // Emit the step value. Value *StepVal; if (Step) { @@ -957,52 +956,51 @@ Value *ForExprAST::IRGen(IRGenContext &C) const { // If not specified, use 1.0. StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); } - + // Compute the end condition. Value *EndCond = End->IRGen(C); - if (EndCond == 0) return EndCond; - + if (!EndCond) return nullptr; + // Reload, increment, and restore the alloca. This handles the case where // the body of the loop mutates the variable. Value *CurVar = C.getBuilder().CreateLoad(Alloca, VarName.c_str()); Value *NextVar = C.getBuilder().CreateFAdd(CurVar, StepVal, "nextvar"); C.getBuilder().CreateStore(NextVar, Alloca); - + // Convert condition to a bool by comparing equal to 0.0. - EndCond = C.getBuilder().CreateFCmpONE(EndCond, + EndCond = C.getBuilder().CreateFCmpONE(EndCond, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "loopcond"); - + // Create the "after loop" block and insert it. BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); - + // Insert the conditional branch into the end of LoopEndBB. C.getBuilder().CreateCondBr(EndCond, LoopBB, AfterBB); - + // Any new code will be inserted in AfterBB. C.getBuilder().SetInsertPoint(AfterBB); - + // Restore the unshadowed variable. if (OldVal) C.NamedValues[VarName] = OldVal; else C.NamedValues.erase(VarName); - // for expr always returns 0.0. return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); } Value *VarExprAST::IRGen(IRGenContext &C) const { std::vector OldBindings; - + Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent(); // Register all variables and emit their initializer. for (unsigned i = 0, e = VarBindings.size(); i != e; ++i) { auto &VarName = VarBindings[i].first; auto &Init = VarBindings[i].second; - + // Emit the initializer before adding the variable to scope, this prevents // the initializer from referencing the variable itself, and permits stuff // like this: @@ -1014,22 +1012,22 @@ Value *VarExprAST::IRGen(IRGenContext &C) const { if (!InitVal) return nullptr; } else // If not specified, use 0.0. InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0)); - + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); C.getBuilder().CreateStore(InitVal, Alloca); // Remember the old variable binding so that we can restore the binding when // we unrecurse. OldBindings.push_back(C.NamedValues[VarName]); - + // Remember this binding. C.NamedValues[VarName] = Alloca; } - + // Codegen the body, now that all vars are in scope. Value *BodyVal = Body->IRGen(C); if (!BodyVal) return nullptr; - + // Pop all our variables from scope. for (unsigned i = 0, e = VarBindings.size(); i != e; ++i) C.NamedValues[VarBindings[i].first] = OldBindings[i]; @@ -1042,7 +1040,7 @@ Function *PrototypeAST::IRGen(IRGenContext &C) const { std::string FnName = MakeLegalFunctionName(Name); // Make the function type: double(double,double) etc. - std::vector Doubles(Args.size(), + std::vector Doubles(Args.size(), Type::getDoubleTy(getGlobalContext())); FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false); @@ -1055,26 +1053,26 @@ Function *PrototypeAST::IRGen(IRGenContext &C) const { // Delete the one we just made and get the existing one. F->eraseFromParent(); F = C.getM().getFunction(Name); - + // If F already has a body, reject this. if (!F->empty()) { ErrorP("redefinition of function"); return nullptr; } - + // If F took a different number of args, reject. if (F->arg_size() != Args.size()) { ErrorP("redefinition of function with different # args"); return nullptr; } } - + // Set names for all arguments. unsigned Idx = 0; for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); ++AI, ++Idx) AI->setName(Args[Idx]); - + return F; } @@ -1087,7 +1085,7 @@ void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) { AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]); // Store the initial value into the alloca. - C.getBuilder().CreateStore(AI, Alloca); + C.getBuilder().CreateStore(&*AI, Alloca); // Add arguments to variable symbol table. C.NamedValues[Args[Idx]] = Alloca; @@ -1096,19 +1094,19 @@ void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) { Function *FunctionAST::IRGen(IRGenContext &C) const { C.NamedValues.clear(); - + Function *TheFunction = Proto->IRGen(C); if (!TheFunction) return nullptr; - + // If this is an operator, install it. if (Proto->isBinaryOp()) BinopPrecedence[Proto->getOperatorName()] = Proto->Precedence; - + // Create a new basic block to start insertion into. BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); C.getBuilder().SetInsertPoint(BB); - + // Add all arguments to the symbol table and create their allocas. Proto->CreateArgumentAllocas(TheFunction, C); @@ -1121,7 +1119,7 @@ Function *FunctionAST::IRGen(IRGenContext &C) const { return TheFunction; } - + // Error reading body, remove function. TheFunction->eraseFromParent(); @@ -1170,16 +1168,14 @@ public: : Session(Session), CompileLayer(ObjectLayer, SimpleCompiler(Session.getTarget())), LazyEmitLayer(CompileLayer), - CompileCallbacks(LazyEmitLayer, CCMgrMemMgr, Session.getLLVMContext(), - reinterpret_cast(EarthShatteringKaboom), - 64) {} + CompileCallbacks(reinterpret_cast(EarthShatteringKaboom)) {} std::string mangle(const std::string &Name) { std::string MangledName; { raw_string_ostream MangledNameStream(MangledName); Mangler::getNameWithPrefix(MangledNameStream, Name, - *Session.getTarget().getDataLayout()); + Session.getTarget().createDataLayout()); } return MangledName; } @@ -1236,7 +1232,7 @@ private: RuntimeDyld::SymbolInfo searchFunctionASTs(const std::string &Name) { auto DefI = FunctionDefs.find(Name); if (DefI == FunctionDefs.end()) - return 0; + return nullptr; // Return the address of the stub. // Take the FunctionAST out of the map. @@ -1262,8 +1258,7 @@ private: // the function. The resulting CallbackInfo type will let us set the // compile and update actions for the callback, and get a pointer to // the jit trampoline that we need to call to trigger those actions. - auto CallbackInfo = - CompileCallbacks.getCompileCallback(F->getContext()); + auto CallbackInfo = CompileCallbacks.getCompileCallback(); // Step 3) Create a stub that will indirectly call the body of this // function once it is compiled. Initially, set the function @@ -1313,7 +1308,7 @@ private: std::map> FunctionDefs; - JITCompileCallbackManager CompileCallbacks; + LocalJITCompileCallbackManager CompileCallbacks; }; static void HandleDefinition(SessionContext &S, KaleidoscopeJIT &J) { @@ -1350,7 +1345,7 @@ static void HandleTopLevelExpression(SessionContext &S, KaleidoscopeJIT &J) { // Get the address of the JIT'd function in memory. auto ExprSymbol = J.findUnmangledSymbol("__anon_expr"); - + // Cast it to the right type (takes no arguments, returns a double) so we // can call it as a native function. double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); @@ -1393,20 +1388,20 @@ static void MainLoop() { //===----------------------------------------------------------------------===// /// putchard - putchar that takes a double and returns 0. -extern "C" +extern "C" double putchard(double X) { putchar((char)X); return 0; } /// printd - printf that takes a double prints it as "%f\n", returning 0. -extern "C" +extern "C" double printd(double X) { printf("%f", X); return 0; } -extern "C" +extern "C" double printlf() { printf("\n"); return 0; @@ -1443,4 +1438,3 @@ int main() { return 0; } - diff --git a/examples/Kaleidoscope/Orc/initial/toy.cpp b/examples/Kaleidoscope/Orc/initial/toy.cpp index 7e99c0f5ba54..2a6bb92246d0 100644 --- a/examples/Kaleidoscope/Orc/initial/toy.cpp +++ b/examples/Kaleidoscope/Orc/initial/toy.cpp @@ -38,14 +38,14 @@ enum Token { // primary tok_identifier = -4, tok_number = -5, - + // control tok_if = -6, tok_then = -7, tok_else = -8, tok_for = -9, tok_in = -10, - + // operators tok_binary = -11, tok_unary = -12, - + // var definition tok_var = -13 }; @@ -86,7 +86,7 @@ static int gettok() { LastChar = getchar(); } while (isdigit(LastChar) || LastChar == '.'); - NumVal = strtod(NumStr.c_str(), 0); + NumVal = strtod(NumStr.c_str(), nullptr); return tok_number; } @@ -94,11 +94,11 @@ static int gettok() { // Comment until end of line. do LastChar = getchar(); while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); - + if (LastChar != EOF) return gettok(); } - + // Check for end of file. Don't eat the EOF. if (LastChar == EOF) return tok_eof; @@ -139,7 +139,7 @@ struct VariableExprAST : public ExprAST { /// UnaryExprAST - Expression class for a unary operator. struct UnaryExprAST : public ExprAST { - UnaryExprAST(char Opcode, std::unique_ptr Operand) + UnaryExprAST(char Opcode, std::unique_ptr Operand) : Opcode(std::move(Opcode)), Operand(std::move(Operand)) {} Value *IRGen(IRGenContext &C) const override; @@ -151,7 +151,7 @@ struct UnaryExprAST : public ExprAST { /// BinaryExprAST - Expression class for a binary operator. struct BinaryExprAST : public ExprAST { BinaryExprAST(char Op, std::unique_ptr LHS, - std::unique_ptr RHS) + std::unique_ptr RHS) : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} Value *IRGen(IRGenContext &C) const override; @@ -223,7 +223,7 @@ struct PrototypeAST { bool isUnaryOp() const { return IsOperator && Args.size() == 1; } bool isBinaryOp() const { return IsOperator && Args.size() == 2; } - + char getOperatorName() const { assert(isUnaryOp() || isBinaryOp()); return Name[Name.size()-1]; @@ -267,7 +267,7 @@ static std::map BinopPrecedence; static int GetTokPrecedence() { if (!isascii(CurTok)) return -1; - + // Make sure it's a declared binop. int TokPrec = BinopPrecedence[CurTok]; if (TokPrec <= 0) return -1; @@ -293,12 +293,12 @@ static std::unique_ptr ParseExpression(); /// ::= identifier '(' expression* ')' static std::unique_ptr ParseIdentifierExpr() { std::string IdName = IdentifierStr; - + getNextToken(); // eat identifier. - + if (CurTok != '(') // Simple variable ref. return llvm::make_unique(IdName); - + // Call. getNextToken(); // eat ( std::vector> Args; @@ -318,7 +318,7 @@ static std::unique_ptr ParseIdentifierExpr() { // Eat the ')'. getNextToken(); - + return llvm::make_unique(IdName, std::move(Args)); } @@ -335,7 +335,7 @@ static std::unique_ptr ParseParenExpr() { auto V = ParseExpression(); if (!V) return nullptr; - + if (CurTok != ')') return ErrorU("expected ')'"); getNextToken(); // eat ). @@ -345,29 +345,29 @@ static std::unique_ptr ParseParenExpr() { /// ifexpr ::= 'if' expression 'then' expression 'else' expression static std::unique_ptr ParseIfExpr() { getNextToken(); // eat the if. - + // condition. auto Cond = ParseExpression(); if (!Cond) return nullptr; - + if (CurTok != tok_then) return ErrorU("expected then"); getNextToken(); // eat the then - + auto Then = ParseExpression(); if (!Then) return nullptr; - + if (CurTok != tok_else) return ErrorU("expected else"); - + getNextToken(); - + auto Else = ParseExpression(); if (!Else) return nullptr; - + return llvm::make_unique(std::move(Cond), std::move(Then), std::move(Else)); } @@ -378,26 +378,25 @@ static std::unique_ptr ParseForExpr() { if (CurTok != tok_identifier) return ErrorU("expected identifier after for"); - + std::string IdName = IdentifierStr; getNextToken(); // eat identifier. - + if (CurTok != '=') return ErrorU("expected '=' after for"); getNextToken(); // eat '='. - - + auto Start = ParseExpression(); if (!Start) return nullptr; if (CurTok != ',') return ErrorU("expected ',' after for start value"); getNextToken(); - + auto End = ParseExpression(); if (!End) return nullptr; - + // The step value is optional. std::unique_ptr Step; if (CurTok == ',') { @@ -406,11 +405,11 @@ static std::unique_ptr ParseForExpr() { if (!Step) return nullptr; } - + if (CurTok != tok_in) return ErrorU("expected 'in' after for"); getNextToken(); // eat 'in'. - + auto Body = ParseExpression(); if (Body) return nullptr; @@ -419,7 +418,7 @@ static std::unique_ptr ParseForExpr() { std::move(Step), std::move(Body)); } -/// varexpr ::= 'var' identifier ('=' expression)? +/// varexpr ::= 'var' identifier ('=' expression)? // (',' identifier ('=' expression)?)* 'in' expression static std::unique_ptr ParseVarExpr() { getNextToken(); // eat the var. @@ -429,7 +428,7 @@ static std::unique_ptr ParseVarExpr() { // At least one variable name is required. if (CurTok != tok_identifier) return ErrorU("expected identifier after var"); - + while (1) { std::string Name = IdentifierStr; getNextToken(); // eat identifier. @@ -438,31 +437,31 @@ static std::unique_ptr ParseVarExpr() { std::unique_ptr Init; if (CurTok == '=') { getNextToken(); // eat the '='. - + Init = ParseExpression(); if (!Init) return nullptr; } - + VarBindings.push_back(VarExprAST::Binding(Name, std::move(Init))); - + // End of var list, exit loop. if (CurTok != ',') break; getNextToken(); // eat the ','. - + if (CurTok != tok_identifier) return ErrorU("expected identifier list after var"); } - + // At this point, we have to have 'in'. if (CurTok != tok_in) return ErrorU("expected 'in' keyword after 'var'"); getNextToken(); // eat 'in'. - + auto Body = ParseExpression(); if (!Body) return nullptr; - + return llvm::make_unique(std::move(VarBindings), std::move(Body)); } @@ -492,7 +491,7 @@ static std::unique_ptr ParseUnary() { // If the current token is not an operator, it must be a primary expr. if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') return ParsePrimary(); - + // If this is a unary operator, read it. int Opc = CurTok; getNextToken(); @@ -508,21 +507,21 @@ static std::unique_ptr ParseBinOpRHS(int ExprPrec, // If this is a binop, find its precedence. while (1) { int TokPrec = GetTokPrecedence(); - + // If this is a binop that binds at least as tightly as the current binop, // consume it, otherwise we are done. if (TokPrec < ExprPrec) return LHS; - + // Okay, we know this is a binop. int BinOp = CurTok; getNextToken(); // eat binop - + // Parse the unary expression after the binary operator. auto RHS = ParseUnary(); if (!RHS) return nullptr; - + // If BinOp binds less tightly with RHS than the operator after RHS, let // the pending operator take RHS as its LHS. int NextPrec = GetTokPrecedence(); @@ -531,7 +530,7 @@ static std::unique_ptr ParseBinOpRHS(int ExprPrec, if (!RHS) return nullptr; } - + // Merge LHS/RHS. LHS = llvm::make_unique(BinOp, std::move(LHS), std::move(RHS)); } @@ -544,7 +543,7 @@ static std::unique_ptr ParseExpression() { auto LHS = ParseUnary(); if (!LHS) return nullptr; - + return ParseBinOpRHS(0, std::move(LHS)); } @@ -554,10 +553,10 @@ static std::unique_ptr ParseExpression() { /// ::= unary LETTER (id) static std::unique_ptr ParsePrototype() { std::string FnName; - + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. unsigned BinaryPrecedence = 30; - + switch (CurTok) { default: return ErrorU("Expected function name in prototype"); @@ -583,7 +582,7 @@ static std::unique_ptr ParsePrototype() { FnName += (char)CurTok; Kind = 2; getNextToken(); - + // Read the precedence if present. if (CurTok == tok_number) { if (NumVal < 1 || NumVal > 100) @@ -593,23 +592,23 @@ static std::unique_ptr ParsePrototype() { } break; } - + if (CurTok != '(') return ErrorU("Expected '(' in prototype"); - + std::vector ArgNames; while (getNextToken() == tok_identifier) ArgNames.push_back(IdentifierStr); if (CurTok != ')') return ErrorU("Expected ')' in prototype"); - + // success. getNextToken(); // eat ')'. - + // Verify right number of names for operator. if (Kind && ArgNames.size() != Kind) return ErrorU("Invalid number of operands for operator"); - + return llvm::make_unique(FnName, std::move(ArgNames), Kind != 0, BinaryPrecedence); } @@ -690,10 +689,10 @@ public: PrototypeAST* getPrototypeAST(const std::string &Name); private: typedef std::map> PrototypeMap; - + LLVMContext &Context; std::unique_ptr TM; - + PrototypeMap Prototypes; }; @@ -716,7 +715,7 @@ public: M(new Module(GenerateUniqueName("jit_module_"), Session.getLLVMContext())), Builder(Session.getLLVMContext()) { - M->setDataLayout(*Session.getTarget().getDataLayout()); + M->setDataLayout(Session.getTarget().createDataLayout()); } SessionContext& getSession() { return Session; } @@ -747,7 +746,7 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, const std::string &VarName) { IRBuilder<> TmpB(&TheFunction->getEntryBlock(), TheFunction->getEntryBlock().begin()); - return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0, + return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr, VarName.c_str()); } @@ -759,7 +758,7 @@ Value *VariableExprAST::IRGen(IRGenContext &C) const { // Look this variable up in the function. Value *V = C.NamedValues[Name]; - if (V == 0) + if (!V) return ErrorP("Unknown variable name '" + Name + "'"); // Load the value. @@ -782,7 +781,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const { // Special case '=' because we don't want to emit the LHS as an expression. if (Op == '=') { // Assignment requires the LHS to be an identifier. - auto LHSVar = static_cast(*LHS); + auto &LHSVar = static_cast(*LHS); // Codegen the RHS. Value *Val = RHS->IRGen(C); if (!Val) return nullptr; @@ -794,11 +793,11 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const { } return ErrorP("Unknown variable name"); } - + Value *L = LHS->IRGen(C); Value *R = RHS->IRGen(C); if (!L || !R) return nullptr; - + switch (Op) { case '+': return C.getBuilder().CreateFAdd(L, R, "addtmp"); case '-': return C.getBuilder().CreateFSub(L, R, "subtmp"); @@ -811,7 +810,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const { "booltmp"); default: break; } - + // If it wasn't a builtin binary operator, it must be a user defined one. Emit // a call to it. std::string FnName = MakeLegalFunctionName(std::string("binary")+Op); @@ -819,7 +818,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const { Value *Ops[] = { L, R }; return C.getBuilder().CreateCall(F, Ops, "binop"); } - + return ErrorP("Unknown binary operator"); } @@ -835,7 +834,7 @@ Value *CallExprAST::IRGen(IRGenContext &C) const { ArgsV.push_back(Args[i]->IRGen(C)); if (!ArgsV.back()) return nullptr; } - + return C.getBuilder().CreateCall(CalleeF, ArgsV, "calltmp"); } @@ -845,49 +844,49 @@ Value *CallExprAST::IRGen(IRGenContext &C) const { Value *IfExprAST::IRGen(IRGenContext &C) const { Value *CondV = Cond->IRGen(C); if (!CondV) return nullptr; - + // Convert condition to a bool by comparing equal to 0.0. - ConstantFP *FPZero = + ConstantFP *FPZero = ConstantFP::get(C.getLLVMContext(), APFloat(0.0)); CondV = C.getBuilder().CreateFCmpONE(CondV, FPZero, "ifcond"); - + Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent(); - + // Create blocks for the then and else cases. Insert the 'then' block at the // end of the function. BasicBlock *ThenBB = BasicBlock::Create(C.getLLVMContext(), "then", TheFunction); BasicBlock *ElseBB = BasicBlock::Create(C.getLLVMContext(), "else"); BasicBlock *MergeBB = BasicBlock::Create(C.getLLVMContext(), "ifcont"); - + C.getBuilder().CreateCondBr(CondV, ThenBB, ElseBB); - + // Emit then value. C.getBuilder().SetInsertPoint(ThenBB); - + Value *ThenV = Then->IRGen(C); if (!ThenV) return nullptr; - + C.getBuilder().CreateBr(MergeBB); // Codegen of 'Then' can change the current block, update ThenBB for the PHI. ThenBB = C.getBuilder().GetInsertBlock(); - + // Emit else block. TheFunction->getBasicBlockList().push_back(ElseBB); C.getBuilder().SetInsertPoint(ElseBB); - + Value *ElseV = Else->IRGen(C); if (!ElseV) return nullptr; - + C.getBuilder().CreateBr(MergeBB); // Codegen of 'Else' can change the current block, update ElseBB for the PHI. ElseBB = C.getBuilder().GetInsertBlock(); - + // Emit merge block. TheFunction->getBasicBlockList().push_back(MergeBB); C.getBuilder().SetInsertPoint(MergeBB); PHINode *PN = C.getBuilder().CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp"); - + PN->addIncoming(ThenV, ThenBB); PN->addIncoming(ElseV, ElseBB); return PN; @@ -900,7 +899,7 @@ Value *ForExprAST::IRGen(IRGenContext &C) const { // start = startexpr // store start -> var // goto loop - // loop: + // loop: // ... // bodyexpr // ... @@ -913,40 +912,40 @@ Value *ForExprAST::IRGen(IRGenContext &C) const { // store nextvar -> var // br endcond, loop, endloop // outloop: - + Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent(); // Create an alloca for the variable in the entry block. AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); - + // Emit the start code first, without 'variable' in scope. Value *StartVal = Start->IRGen(C); if (!StartVal) return nullptr; - + // Store the value into the alloca. C.getBuilder().CreateStore(StartVal, Alloca); - + // Make the new basic block for the loop header, inserting after current // block. BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); - + // Insert an explicit fall through from the current block to the LoopBB. C.getBuilder().CreateBr(LoopBB); // Start insertion in LoopBB. C.getBuilder().SetInsertPoint(LoopBB); - + // Within the loop, the variable is defined equal to the PHI node. If it // shadows an existing variable, we have to restore it, so save it now. AllocaInst *OldVal = C.NamedValues[VarName]; C.NamedValues[VarName] = Alloca; - + // Emit the body of the loop. This, like any other expr, can change the // current BB. Note that we ignore the value computed by the body, but don't // allow an error. if (!Body->IRGen(C)) return nullptr; - + // Emit the step value. Value *StepVal; if (Step) { @@ -956,52 +955,51 @@ Value *ForExprAST::IRGen(IRGenContext &C) const { // If not specified, use 1.0. StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); } - + // Compute the end condition. Value *EndCond = End->IRGen(C); - if (EndCond == 0) return EndCond; - + if (!EndCond) return nullptr; + // Reload, increment, and restore the alloca. This handles the case where // the body of the loop mutates the variable. Value *CurVar = C.getBuilder().CreateLoad(Alloca, VarName.c_str()); Value *NextVar = C.getBuilder().CreateFAdd(CurVar, StepVal, "nextvar"); C.getBuilder().CreateStore(NextVar, Alloca); - + // Convert condition to a bool by comparing equal to 0.0. - EndCond = C.getBuilder().CreateFCmpONE(EndCond, + EndCond = C.getBuilder().CreateFCmpONE(EndCond, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "loopcond"); - + // Create the "after loop" block and insert it. BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); - + // Insert the conditional branch into the end of LoopEndBB. C.getBuilder().CreateCondBr(EndCond, LoopBB, AfterBB); - + // Any new code will be inserted in AfterBB. C.getBuilder().SetInsertPoint(AfterBB); - + // Restore the unshadowed variable. if (OldVal) C.NamedValues[VarName] = OldVal; else C.NamedValues.erase(VarName); - // for expr always returns 0.0. return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); } Value *VarExprAST::IRGen(IRGenContext &C) const { std::vector OldBindings; - + Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent(); // Register all variables and emit their initializer. for (unsigned i = 0, e = VarBindings.size(); i != e; ++i) { auto &VarName = VarBindings[i].first; auto &Init = VarBindings[i].second; - + // Emit the initializer before adding the variable to scope, this prevents // the initializer from referencing the variable itself, and permits stuff // like this: @@ -1013,22 +1011,22 @@ Value *VarExprAST::IRGen(IRGenContext &C) const { if (!InitVal) return nullptr; } else // If not specified, use 0.0. InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0)); - + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); C.getBuilder().CreateStore(InitVal, Alloca); // Remember the old variable binding so that we can restore the binding when // we unrecurse. OldBindings.push_back(C.NamedValues[VarName]); - + // Remember this binding. C.NamedValues[VarName] = Alloca; } - + // Codegen the body, now that all vars are in scope. Value *BodyVal = Body->IRGen(C); if (!BodyVal) return nullptr; - + // Pop all our variables from scope. for (unsigned i = 0, e = VarBindings.size(); i != e; ++i) C.NamedValues[VarBindings[i].first] = OldBindings[i]; @@ -1041,7 +1039,7 @@ Function *PrototypeAST::IRGen(IRGenContext &C) const { std::string FnName = MakeLegalFunctionName(Name); // Make the function type: double(double,double) etc. - std::vector Doubles(Args.size(), + std::vector Doubles(Args.size(), Type::getDoubleTy(getGlobalContext())); FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false); @@ -1054,26 +1052,26 @@ Function *PrototypeAST::IRGen(IRGenContext &C) const { // Delete the one we just made and get the existing one. F->eraseFromParent(); F = C.getM().getFunction(Name); - + // If F already has a body, reject this. if (!F->empty()) { ErrorP("redefinition of function"); return nullptr; } - + // If F took a different number of args, reject. if (F->arg_size() != Args.size()) { ErrorP("redefinition of function with different # args"); return nullptr; } } - + // Set names for all arguments. unsigned Idx = 0; for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); ++AI, ++Idx) AI->setName(Args[Idx]); - + return F; } @@ -1086,7 +1084,7 @@ void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) { AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]); // Store the initial value into the alloca. - C.getBuilder().CreateStore(AI, Alloca); + C.getBuilder().CreateStore(&*AI, Alloca); // Add arguments to variable symbol table. C.NamedValues[Args[Idx]] = Alloca; @@ -1095,19 +1093,19 @@ void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) { Function *FunctionAST::IRGen(IRGenContext &C) const { C.NamedValues.clear(); - + Function *TheFunction = Proto->IRGen(C); if (!TheFunction) return nullptr; - + // If this is an operator, install it. if (Proto->isBinaryOp()) BinopPrecedence[Proto->getOperatorName()] = Proto->Precedence; - + // Create a new basic block to start insertion into. BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); C.getBuilder().SetInsertPoint(BB); - + // Add all arguments to the symbol table and create their allocas. Proto->CreateArgumentAllocas(TheFunction, C); @@ -1120,7 +1118,7 @@ Function *FunctionAST::IRGen(IRGenContext &C) const { return TheFunction; } - + // Error reading body, remove function. TheFunction->eraseFromParent(); @@ -1160,7 +1158,7 @@ public: typedef CompileLayerT::ModuleSetHandleT ModuleHandleT; KaleidoscopeJIT(SessionContext &Session) - : DL(*Session.getTarget().getDataLayout()), + : DL(Session.getTarget().createDataLayout()), CompileLayer(ObjectLayer, SimpleCompiler(Session.getTarget())) {} std::string mangle(const std::string &Name) { @@ -1201,7 +1199,7 @@ public: } private: - const DataLayout &DL; + const DataLayout DL; ObjLayerT ObjectLayer; CompileLayerT CompileLayer; }; @@ -1242,7 +1240,7 @@ static void HandleTopLevelExpression(SessionContext &S, KaleidoscopeJIT &J) { // Get the address of the JIT'd function in memory. auto ExprSymbol = J.findUnmangledSymbol("__anon_expr"); - + // Cast it to the right type (takes no arguments, returns a double) so we // can call it as a native function. double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); @@ -1285,20 +1283,20 @@ static void MainLoop() { //===----------------------------------------------------------------------===// /// putchard - putchar that takes a double and returns 0. -extern "C" +extern "C" double putchard(double X) { putchar((char)X); return 0; } /// printd - printf that takes a double prints it as "%f\n", returning 0. -extern "C" +extern "C" double printd(double X) { printf("%f", X); return 0; } -extern "C" +extern "C" double printlf() { printf("\n"); return 0; @@ -1335,4 +1333,3 @@ int main() { return 0; } - diff --git a/examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp b/examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp index 4b4c191171b4..5205b406ed71 100644 --- a/examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp +++ b/examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp @@ -38,14 +38,14 @@ enum Token { // primary tok_identifier = -4, tok_number = -5, - + // control tok_if = -6, tok_then = -7, tok_else = -8, tok_for = -9, tok_in = -10, - + // operators tok_binary = -11, tok_unary = -12, - + // var definition tok_var = -13 }; @@ -86,7 +86,7 @@ static int gettok() { LastChar = getchar(); } while (isdigit(LastChar) || LastChar == '.'); - NumVal = strtod(NumStr.c_str(), 0); + NumVal = strtod(NumStr.c_str(), nullptr); return tok_number; } @@ -94,11 +94,11 @@ static int gettok() { // Comment until end of line. do LastChar = getchar(); while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); - + if (LastChar != EOF) return gettok(); } - + // Check for end of file. Don't eat the EOF. if (LastChar == EOF) return tok_eof; @@ -139,7 +139,7 @@ struct VariableExprAST : public ExprAST { /// UnaryExprAST - Expression class for a unary operator. struct UnaryExprAST : public ExprAST { - UnaryExprAST(char Opcode, std::unique_ptr Operand) + UnaryExprAST(char Opcode, std::unique_ptr Operand) : Opcode(std::move(Opcode)), Operand(std::move(Operand)) {} Value *IRGen(IRGenContext &C) const override; @@ -151,7 +151,7 @@ struct UnaryExprAST : public ExprAST { /// BinaryExprAST - Expression class for a binary operator. struct BinaryExprAST : public ExprAST { BinaryExprAST(char Op, std::unique_ptr LHS, - std::unique_ptr RHS) + std::unique_ptr RHS) : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} Value *IRGen(IRGenContext &C) const override; @@ -223,7 +223,7 @@ struct PrototypeAST { bool isUnaryOp() const { return IsOperator && Args.size() == 1; } bool isBinaryOp() const { return IsOperator && Args.size() == 2; } - + char getOperatorName() const { assert(isUnaryOp() || isBinaryOp()); return Name[Name.size()-1]; @@ -267,7 +267,7 @@ static std::map BinopPrecedence; static int GetTokPrecedence() { if (!isascii(CurTok)) return -1; - + // Make sure it's a declared binop. int TokPrec = BinopPrecedence[CurTok]; if (TokPrec <= 0) return -1; @@ -293,12 +293,12 @@ static std::unique_ptr ParseExpression(); /// ::= identifier '(' expression* ')' static std::unique_ptr ParseIdentifierExpr() { std::string IdName = IdentifierStr; - + getNextToken(); // eat identifier. - + if (CurTok != '(') // Simple variable ref. return llvm::make_unique(IdName); - + // Call. getNextToken(); // eat ( std::vector> Args; @@ -318,7 +318,7 @@ static std::unique_ptr ParseIdentifierExpr() { // Eat the ')'. getNextToken(); - + return llvm::make_unique(IdName, std::move(Args)); } @@ -335,7 +335,7 @@ static std::unique_ptr ParseParenExpr() { auto V = ParseExpression(); if (!V) return nullptr; - + if (CurTok != ')') return ErrorU("expected ')'"); getNextToken(); // eat ). @@ -345,29 +345,29 @@ static std::unique_ptr ParseParenExpr() { /// ifexpr ::= 'if' expression 'then' expression 'else' expression static std::unique_ptr ParseIfExpr() { getNextToken(); // eat the if. - + // condition. auto Cond = ParseExpression(); if (!Cond) return nullptr; - + if (CurTok != tok_then) return ErrorU("expected then"); getNextToken(); // eat the then - + auto Then = ParseExpression(); if (!Then) return nullptr; - + if (CurTok != tok_else) return ErrorU("expected else"); - + getNextToken(); - + auto Else = ParseExpression(); if (!Else) return nullptr; - + return llvm::make_unique(std::move(Cond), std::move(Then), std::move(Else)); } @@ -378,26 +378,25 @@ static std::unique_ptr ParseForExpr() { if (CurTok != tok_identifier) return ErrorU("expected identifier after for"); - + std::string IdName = IdentifierStr; getNextToken(); // eat identifier. - + if (CurTok != '=') return ErrorU("expected '=' after for"); getNextToken(); // eat '='. - - + auto Start = ParseExpression(); if (!Start) return nullptr; if (CurTok != ',') return ErrorU("expected ',' after for start value"); getNextToken(); - + auto End = ParseExpression(); if (!End) return nullptr; - + // The step value is optional. std::unique_ptr Step; if (CurTok == ',') { @@ -406,11 +405,11 @@ static std::unique_ptr ParseForExpr() { if (!Step) return nullptr; } - + if (CurTok != tok_in) return ErrorU("expected 'in' after for"); getNextToken(); // eat 'in'. - + auto Body = ParseExpression(); if (Body) return nullptr; @@ -419,7 +418,7 @@ static std::unique_ptr ParseForExpr() { std::move(Step), std::move(Body)); } -/// varexpr ::= 'var' identifier ('=' expression)? +/// varexpr ::= 'var' identifier ('=' expression)? // (',' identifier ('=' expression)?)* 'in' expression static std::unique_ptr ParseVarExpr() { getNextToken(); // eat the var. @@ -429,7 +428,7 @@ static std::unique_ptr ParseVarExpr() { // At least one variable name is required. if (CurTok != tok_identifier) return ErrorU("expected identifier after var"); - + while (1) { std::string Name = IdentifierStr; getNextToken(); // eat identifier. @@ -438,31 +437,31 @@ static std::unique_ptr ParseVarExpr() { std::unique_ptr Init; if (CurTok == '=') { getNextToken(); // eat the '='. - + Init = ParseExpression(); if (!Init) return nullptr; } - + VarBindings.push_back(VarExprAST::Binding(Name, std::move(Init))); - + // End of var list, exit loop. if (CurTok != ',') break; getNextToken(); // eat the ','. - + if (CurTok != tok_identifier) return ErrorU("expected identifier list after var"); } - + // At this point, we have to have 'in'. if (CurTok != tok_in) return ErrorU("expected 'in' keyword after 'var'"); getNextToken(); // eat 'in'. - + auto Body = ParseExpression(); if (!Body) return nullptr; - + return llvm::make_unique(std::move(VarBindings), std::move(Body)); } @@ -492,7 +491,7 @@ static std::unique_ptr ParseUnary() { // If the current token is not an operator, it must be a primary expr. if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') return ParsePrimary(); - + // If this is a unary operator, read it. int Opc = CurTok; getNextToken(); @@ -508,21 +507,21 @@ static std::unique_ptr ParseBinOpRHS(int ExprPrec, // If this is a binop, find its precedence. while (1) { int TokPrec = GetTokPrecedence(); - + // If this is a binop that binds at least as tightly as the current binop, // consume it, otherwise we are done. if (TokPrec < ExprPrec) return LHS; - + // Okay, we know this is a binop. int BinOp = CurTok; getNextToken(); // eat binop - + // Parse the unary expression after the binary operator. auto RHS = ParseUnary(); if (!RHS) return nullptr; - + // If BinOp binds less tightly with RHS than the operator after RHS, let // the pending operator take RHS as its LHS. int NextPrec = GetTokPrecedence(); @@ -531,7 +530,7 @@ static std::unique_ptr ParseBinOpRHS(int ExprPrec, if (!RHS) return nullptr; } - + // Merge LHS/RHS. LHS = llvm::make_unique(BinOp, std::move(LHS), std::move(RHS)); } @@ -544,7 +543,7 @@ static std::unique_ptr ParseExpression() { auto LHS = ParseUnary(); if (!LHS) return nullptr; - + return ParseBinOpRHS(0, std::move(LHS)); } @@ -554,10 +553,10 @@ static std::unique_ptr ParseExpression() { /// ::= unary LETTER (id) static std::unique_ptr ParsePrototype() { std::string FnName; - + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. unsigned BinaryPrecedence = 30; - + switch (CurTok) { default: return ErrorU("Expected function name in prototype"); @@ -583,7 +582,7 @@ static std::unique_ptr ParsePrototype() { FnName += (char)CurTok; Kind = 2; getNextToken(); - + // Read the precedence if present. if (CurTok == tok_number) { if (NumVal < 1 || NumVal > 100) @@ -593,23 +592,23 @@ static std::unique_ptr ParsePrototype() { } break; } - + if (CurTok != '(') return ErrorU("Expected '(' in prototype"); - + std::vector ArgNames; while (getNextToken() == tok_identifier) ArgNames.push_back(IdentifierStr); if (CurTok != ')') return ErrorU("Expected ')' in prototype"); - + // success. getNextToken(); // eat ')'. - + // Verify right number of names for operator. if (Kind && ArgNames.size() != Kind) return ErrorU("Invalid number of operands for operator"); - + return llvm::make_unique(FnName, std::move(ArgNames), Kind != 0, BinaryPrecedence); } @@ -690,10 +689,10 @@ public: PrototypeAST* getPrototypeAST(const std::string &Name); private: typedef std::map> PrototypeMap; - + LLVMContext &Context; std::unique_ptr TM; - + PrototypeMap Prototypes; }; @@ -716,7 +715,7 @@ public: M(new Module(GenerateUniqueName("jit_module_"), Session.getLLVMContext())), Builder(Session.getLLVMContext()) { - M->setDataLayout(*Session.getTarget().getDataLayout()); + M->setDataLayout(Session.getTarget().createDataLayout()); } SessionContext& getSession() { return Session; } @@ -747,7 +746,7 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, const std::string &VarName) { IRBuilder<> TmpB(&TheFunction->getEntryBlock(), TheFunction->getEntryBlock().begin()); - return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0, + return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr, VarName.c_str()); } @@ -759,7 +758,7 @@ Value *VariableExprAST::IRGen(IRGenContext &C) const { // Look this variable up in the function. Value *V = C.NamedValues[Name]; - if (V == 0) + if (!V) return ErrorP("Unknown variable name '" + Name + "'"); // Load the value. @@ -782,7 +781,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const { // Special case '=' because we don't want to emit the LHS as an expression. if (Op == '=') { // Assignment requires the LHS to be an identifier. - auto LHSVar = static_cast(*LHS); + auto &LHSVar = static_cast(*LHS); // Codegen the RHS. Value *Val = RHS->IRGen(C); if (!Val) return nullptr; @@ -794,11 +793,11 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const { } return ErrorP("Unknown variable name"); } - + Value *L = LHS->IRGen(C); Value *R = RHS->IRGen(C); if (!L || !R) return nullptr; - + switch (Op) { case '+': return C.getBuilder().CreateFAdd(L, R, "addtmp"); case '-': return C.getBuilder().CreateFSub(L, R, "subtmp"); @@ -811,7 +810,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const { "booltmp"); default: break; } - + // If it wasn't a builtin binary operator, it must be a user defined one. Emit // a call to it. std::string FnName = MakeLegalFunctionName(std::string("binary")+Op); @@ -819,7 +818,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const { Value *Ops[] = { L, R }; return C.getBuilder().CreateCall(F, Ops, "binop"); } - + return ErrorP("Unknown binary operator"); } @@ -835,7 +834,7 @@ Value *CallExprAST::IRGen(IRGenContext &C) const { ArgsV.push_back(Args[i]->IRGen(C)); if (!ArgsV.back()) return nullptr; } - + return C.getBuilder().CreateCall(CalleeF, ArgsV, "calltmp"); } @@ -845,49 +844,49 @@ Value *CallExprAST::IRGen(IRGenContext &C) const { Value *IfExprAST::IRGen(IRGenContext &C) const { Value *CondV = Cond->IRGen(C); if (!CondV) return nullptr; - + // Convert condition to a bool by comparing equal to 0.0. - ConstantFP *FPZero = + ConstantFP *FPZero = ConstantFP::get(C.getLLVMContext(), APFloat(0.0)); CondV = C.getBuilder().CreateFCmpONE(CondV, FPZero, "ifcond"); - + Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent(); - + // Create blocks for the then and else cases. Insert the 'then' block at the // end of the function. BasicBlock *ThenBB = BasicBlock::Create(C.getLLVMContext(), "then", TheFunction); BasicBlock *ElseBB = BasicBlock::Create(C.getLLVMContext(), "else"); BasicBlock *MergeBB = BasicBlock::Create(C.getLLVMContext(), "ifcont"); - + C.getBuilder().CreateCondBr(CondV, ThenBB, ElseBB); - + // Emit then value. C.getBuilder().SetInsertPoint(ThenBB); - + Value *ThenV = Then->IRGen(C); if (!ThenV) return nullptr; - + C.getBuilder().CreateBr(MergeBB); // Codegen of 'Then' can change the current block, update ThenBB for the PHI. ThenBB = C.getBuilder().GetInsertBlock(); - + // Emit else block. TheFunction->getBasicBlockList().push_back(ElseBB); C.getBuilder().SetInsertPoint(ElseBB); - + Value *ElseV = Else->IRGen(C); if (!ElseV) return nullptr; - + C.getBuilder().CreateBr(MergeBB); // Codegen of 'Else' can change the current block, update ElseBB for the PHI. ElseBB = C.getBuilder().GetInsertBlock(); - + // Emit merge block. TheFunction->getBasicBlockList().push_back(MergeBB); C.getBuilder().SetInsertPoint(MergeBB); PHINode *PN = C.getBuilder().CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp"); - + PN->addIncoming(ThenV, ThenBB); PN->addIncoming(ElseV, ElseBB); return PN; @@ -900,7 +899,7 @@ Value *ForExprAST::IRGen(IRGenContext &C) const { // start = startexpr // store start -> var // goto loop - // loop: + // loop: // ... // bodyexpr // ... @@ -913,40 +912,40 @@ Value *ForExprAST::IRGen(IRGenContext &C) const { // store nextvar -> var // br endcond, loop, endloop // outloop: - + Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent(); // Create an alloca for the variable in the entry block. AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); - + // Emit the start code first, without 'variable' in scope. Value *StartVal = Start->IRGen(C); if (!StartVal) return nullptr; - + // Store the value into the alloca. C.getBuilder().CreateStore(StartVal, Alloca); - + // Make the new basic block for the loop header, inserting after current // block. BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); - + // Insert an explicit fall through from the current block to the LoopBB. C.getBuilder().CreateBr(LoopBB); // Start insertion in LoopBB. C.getBuilder().SetInsertPoint(LoopBB); - + // Within the loop, the variable is defined equal to the PHI node. If it // shadows an existing variable, we have to restore it, so save it now. AllocaInst *OldVal = C.NamedValues[VarName]; C.NamedValues[VarName] = Alloca; - + // Emit the body of the loop. This, like any other expr, can change the // current BB. Note that we ignore the value computed by the body, but don't // allow an error. if (!Body->IRGen(C)) return nullptr; - + // Emit the step value. Value *StepVal; if (Step) { @@ -956,52 +955,51 @@ Value *ForExprAST::IRGen(IRGenContext &C) const { // If not specified, use 1.0. StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); } - + // Compute the end condition. Value *EndCond = End->IRGen(C); - if (EndCond == 0) return EndCond; - + if (!EndCond) return nullptr; + // Reload, increment, and restore the alloca. This handles the case where // the body of the loop mutates the variable. Value *CurVar = C.getBuilder().CreateLoad(Alloca, VarName.c_str()); Value *NextVar = C.getBuilder().CreateFAdd(CurVar, StepVal, "nextvar"); C.getBuilder().CreateStore(NextVar, Alloca); - + // Convert condition to a bool by comparing equal to 0.0. - EndCond = C.getBuilder().CreateFCmpONE(EndCond, + EndCond = C.getBuilder().CreateFCmpONE(EndCond, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "loopcond"); - + // Create the "after loop" block and insert it. BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); - + // Insert the conditional branch into the end of LoopEndBB. C.getBuilder().CreateCondBr(EndCond, LoopBB, AfterBB); - + // Any new code will be inserted in AfterBB. C.getBuilder().SetInsertPoint(AfterBB); - + // Restore the unshadowed variable. if (OldVal) C.NamedValues[VarName] = OldVal; else C.NamedValues.erase(VarName); - // for expr always returns 0.0. return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); } Value *VarExprAST::IRGen(IRGenContext &C) const { std::vector OldBindings; - + Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent(); // Register all variables and emit their initializer. for (unsigned i = 0, e = VarBindings.size(); i != e; ++i) { auto &VarName = VarBindings[i].first; auto &Init = VarBindings[i].second; - + // Emit the initializer before adding the variable to scope, this prevents // the initializer from referencing the variable itself, and permits stuff // like this: @@ -1013,22 +1011,22 @@ Value *VarExprAST::IRGen(IRGenContext &C) const { if (!InitVal) return nullptr; } else // If not specified, use 0.0. InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0)); - + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); C.getBuilder().CreateStore(InitVal, Alloca); // Remember the old variable binding so that we can restore the binding when // we unrecurse. OldBindings.push_back(C.NamedValues[VarName]); - + // Remember this binding. C.NamedValues[VarName] = Alloca; } - + // Codegen the body, now that all vars are in scope. Value *BodyVal = Body->IRGen(C); if (!BodyVal) return nullptr; - + // Pop all our variables from scope. for (unsigned i = 0, e = VarBindings.size(); i != e; ++i) C.NamedValues[VarBindings[i].first] = OldBindings[i]; @@ -1041,7 +1039,7 @@ Function *PrototypeAST::IRGen(IRGenContext &C) const { std::string FnName = MakeLegalFunctionName(Name); // Make the function type: double(double,double) etc. - std::vector Doubles(Args.size(), + std::vector Doubles(Args.size(), Type::getDoubleTy(getGlobalContext())); FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false); @@ -1054,26 +1052,26 @@ Function *PrototypeAST::IRGen(IRGenContext &C) const { // Delete the one we just made and get the existing one. F->eraseFromParent(); F = C.getM().getFunction(Name); - + // If F already has a body, reject this. if (!F->empty()) { ErrorP("redefinition of function"); return nullptr; } - + // If F took a different number of args, reject. if (F->arg_size() != Args.size()) { ErrorP("redefinition of function with different # args"); return nullptr; } } - + // Set names for all arguments. unsigned Idx = 0; for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); ++AI, ++Idx) AI->setName(Args[Idx]); - + return F; } @@ -1086,7 +1084,7 @@ void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) { AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]); // Store the initial value into the alloca. - C.getBuilder().CreateStore(AI, Alloca); + C.getBuilder().CreateStore(&*AI, Alloca); // Add arguments to variable symbol table. C.NamedValues[Args[Idx]] = Alloca; @@ -1095,19 +1093,19 @@ void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) { Function *FunctionAST::IRGen(IRGenContext &C) const { C.NamedValues.clear(); - + Function *TheFunction = Proto->IRGen(C); if (!TheFunction) return nullptr; - + // If this is an operator, install it. if (Proto->isBinaryOp()) BinopPrecedence[Proto->getOperatorName()] = Proto->Precedence; - + // Create a new basic block to start insertion into. BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); C.getBuilder().SetInsertPoint(BB); - + // Add all arguments to the symbol table and create their allocas. Proto->CreateArgumentAllocas(TheFunction, C); @@ -1120,7 +1118,7 @@ Function *FunctionAST::IRGen(IRGenContext &C) const { return TheFunction; } - + // Error reading body, remove function. TheFunction->eraseFromParent(); @@ -1162,7 +1160,7 @@ public: typedef LazyEmitLayerT::ModuleSetHandleT ModuleHandleT; KaleidoscopeJIT(SessionContext &Session) - : DL(*Session.getTarget().getDataLayout()), + : DL(Session.getTarget().createDataLayout()), CompileLayer(ObjectLayer, SimpleCompiler(Session.getTarget())), LazyEmitLayer(CompileLayer) {} @@ -1204,7 +1202,7 @@ public: } private: - const DataLayout &DL; + const DataLayout DL; ObjLayerT ObjectLayer; CompileLayerT CompileLayer; LazyEmitLayerT LazyEmitLayer; @@ -1246,7 +1244,7 @@ static void HandleTopLevelExpression(SessionContext &S, KaleidoscopeJIT &J) { // Get the address of the JIT'd function in memory. auto ExprSymbol = J.findUnmangledSymbol("__anon_expr"); - + // Cast it to the right type (takes no arguments, returns a double) so we // can call it as a native function. double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); @@ -1289,20 +1287,20 @@ static void MainLoop() { //===----------------------------------------------------------------------===// /// putchard - putchar that takes a double and returns 0. -extern "C" +extern "C" double putchard(double X) { putchar((char)X); return 0; } /// printd - printf that takes a double prints it as "%f\n", returning 0. -extern "C" +extern "C" double printd(double X) { printf("%f", X); return 0; } -extern "C" +extern "C" double printlf() { printf("\n"); return 0; @@ -1339,4 +1337,3 @@ int main() { return 0; } - diff --git a/examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp b/examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp index ca34de7e2244..ebaff49e89b2 100644 --- a/examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp +++ b/examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp @@ -38,14 +38,14 @@ enum Token { // primary tok_identifier = -4, tok_number = -5, - + // control tok_if = -6, tok_then = -7, tok_else = -8, tok_for = -9, tok_in = -10, - + // operators tok_binary = -11, tok_unary = -12, - + // var definition tok_var = -13 }; @@ -86,7 +86,7 @@ static int gettok() { LastChar = getchar(); } while (isdigit(LastChar) || LastChar == '.'); - NumVal = strtod(NumStr.c_str(), 0); + NumVal = strtod(NumStr.c_str(), nullptr); return tok_number; } @@ -94,11 +94,11 @@ static int gettok() { // Comment until end of line. do LastChar = getchar(); while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); - + if (LastChar != EOF) return gettok(); } - + // Check for end of file. Don't eat the EOF. if (LastChar == EOF) return tok_eof; @@ -139,7 +139,7 @@ struct VariableExprAST : public ExprAST { /// UnaryExprAST - Expression class for a unary operator. struct UnaryExprAST : public ExprAST { - UnaryExprAST(char Opcode, std::unique_ptr Operand) + UnaryExprAST(char Opcode, std::unique_ptr Operand) : Opcode(std::move(Opcode)), Operand(std::move(Operand)) {} Value *IRGen(IRGenContext &C) const override; @@ -151,7 +151,7 @@ struct UnaryExprAST : public ExprAST { /// BinaryExprAST - Expression class for a binary operator. struct BinaryExprAST : public ExprAST { BinaryExprAST(char Op, std::unique_ptr LHS, - std::unique_ptr RHS) + std::unique_ptr RHS) : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} Value *IRGen(IRGenContext &C) const override; @@ -223,7 +223,7 @@ struct PrototypeAST { bool isUnaryOp() const { return IsOperator && Args.size() == 1; } bool isBinaryOp() const { return IsOperator && Args.size() == 2; } - + char getOperatorName() const { assert(isUnaryOp() || isBinaryOp()); return Name[Name.size()-1]; @@ -267,7 +267,7 @@ static std::map BinopPrecedence; static int GetTokPrecedence() { if (!isascii(CurTok)) return -1; - + // Make sure it's a declared binop. int TokPrec = BinopPrecedence[CurTok]; if (TokPrec <= 0) return -1; @@ -293,12 +293,12 @@ static std::unique_ptr ParseExpression(); /// ::= identifier '(' expression* ')' static std::unique_ptr ParseIdentifierExpr() { std::string IdName = IdentifierStr; - + getNextToken(); // eat identifier. - + if (CurTok != '(') // Simple variable ref. return llvm::make_unique(IdName); - + // Call. getNextToken(); // eat ( std::vector> Args; @@ -318,7 +318,7 @@ static std::unique_ptr ParseIdentifierExpr() { // Eat the ')'. getNextToken(); - + return llvm::make_unique(IdName, std::move(Args)); } @@ -335,7 +335,7 @@ static std::unique_ptr ParseParenExpr() { auto V = ParseExpression(); if (!V) return nullptr; - + if (CurTok != ')') return ErrorU("expected ')'"); getNextToken(); // eat ). @@ -345,29 +345,29 @@ static std::unique_ptr ParseParenExpr() { /// ifexpr ::= 'if' expression 'then' expression 'else' expression static std::unique_ptr ParseIfExpr() { getNextToken(); // eat the if. - + // condition. auto Cond = ParseExpression(); if (!Cond) return nullptr; - + if (CurTok != tok_then) return ErrorU("expected then"); getNextToken(); // eat the then - + auto Then = ParseExpression(); if (!Then) return nullptr; - + if (CurTok != tok_else) return ErrorU("expected else"); - + getNextToken(); - + auto Else = ParseExpression(); if (!Else) return nullptr; - + return llvm::make_unique(std::move(Cond), std::move(Then), std::move(Else)); } @@ -378,26 +378,25 @@ static std::unique_ptr ParseForExpr() { if (CurTok != tok_identifier) return ErrorU("expected identifier after for"); - + std::string IdName = IdentifierStr; getNextToken(); // eat identifier. - + if (CurTok != '=') return ErrorU("expected '=' after for"); getNextToken(); // eat '='. - - + auto Start = ParseExpression(); if (!Start) return nullptr; if (CurTok != ',') return ErrorU("expected ',' after for start value"); getNextToken(); - + auto End = ParseExpression(); if (!End) return nullptr; - + // The step value is optional. std::unique_ptr Step; if (CurTok == ',') { @@ -406,11 +405,11 @@ static std::unique_ptr ParseForExpr() { if (!Step) return nullptr; } - + if (CurTok != tok_in) return ErrorU("expected 'in' after for"); getNextToken(); // eat 'in'. - + auto Body = ParseExpression(); if (Body) return nullptr; @@ -419,7 +418,7 @@ static std::unique_ptr ParseForExpr() { std::move(Step), std::move(Body)); } -/// varexpr ::= 'var' identifier ('=' expression)? +/// varexpr ::= 'var' identifier ('=' expression)? // (',' identifier ('=' expression)?)* 'in' expression static std::unique_ptr ParseVarExpr() { getNextToken(); // eat the var. @@ -429,7 +428,7 @@ static std::unique_ptr ParseVarExpr() { // At least one variable name is required. if (CurTok != tok_identifier) return ErrorU("expected identifier after var"); - + while (1) { std::string Name = IdentifierStr; getNextToken(); // eat identifier. @@ -438,31 +437,31 @@ static std::unique_ptr ParseVarExpr() { std::unique_ptr Init; if (CurTok == '=') { getNextToken(); // eat the '='. - + Init = ParseExpression(); if (!Init) return nullptr; } - + VarBindings.push_back(VarExprAST::Binding(Name, std::move(Init))); - + // End of var list, exit loop. if (CurTok != ',') break; getNextToken(); // eat the ','. - + if (CurTok != tok_identifier) return ErrorU("expected identifier list after var"); } - + // At this point, we have to have 'in'. if (CurTok != tok_in) return ErrorU("expected 'in' keyword after 'var'"); getNextToken(); // eat 'in'. - + auto Body = ParseExpression(); if (!Body) return nullptr; - + return llvm::make_unique(std::move(VarBindings), std::move(Body)); } @@ -492,7 +491,7 @@ static std::unique_ptr ParseUnary() { // If the current token is not an operator, it must be a primary expr. if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') return ParsePrimary(); - + // If this is a unary operator, read it. int Opc = CurTok; getNextToken(); @@ -508,21 +507,21 @@ static std::unique_ptr ParseBinOpRHS(int ExprPrec, // If this is a binop, find its precedence. while (1) { int TokPrec = GetTokPrecedence(); - + // If this is a binop that binds at least as tightly as the current binop, // consume it, otherwise we are done. if (TokPrec < ExprPrec) return LHS; - + // Okay, we know this is a binop. int BinOp = CurTok; getNextToken(); // eat binop - + // Parse the unary expression after the binary operator. auto RHS = ParseUnary(); if (!RHS) return nullptr; - + // If BinOp binds less tightly with RHS than the operator after RHS, let // the pending operator take RHS as its LHS. int NextPrec = GetTokPrecedence(); @@ -531,7 +530,7 @@ static std::unique_ptr ParseBinOpRHS(int ExprPrec, if (!RHS) return nullptr; } - + // Merge LHS/RHS. LHS = llvm::make_unique(BinOp, std::move(LHS), std::move(RHS)); } @@ -544,7 +543,7 @@ static std::unique_ptr ParseExpression() { auto LHS = ParseUnary(); if (!LHS) return nullptr; - + return ParseBinOpRHS(0, std::move(LHS)); } @@ -554,10 +553,10 @@ static std::unique_ptr ParseExpression() { /// ::= unary LETTER (id) static std::unique_ptr ParsePrototype() { std::string FnName; - + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. unsigned BinaryPrecedence = 30; - + switch (CurTok) { default: return ErrorU("Expected function name in prototype"); @@ -583,7 +582,7 @@ static std::unique_ptr ParsePrototype() { FnName += (char)CurTok; Kind = 2; getNextToken(); - + // Read the precedence if present. if (CurTok == tok_number) { if (NumVal < 1 || NumVal > 100) @@ -593,23 +592,23 @@ static std::unique_ptr ParsePrototype() { } break; } - + if (CurTok != '(') return ErrorU("Expected '(' in prototype"); - + std::vector ArgNames; while (getNextToken() == tok_identifier) ArgNames.push_back(IdentifierStr); if (CurTok != ')') return ErrorU("Expected ')' in prototype"); - + // success. getNextToken(); // eat ')'. - + // Verify right number of names for operator. if (Kind && ArgNames.size() != Kind) return ErrorU("Invalid number of operands for operator"); - + return llvm::make_unique(FnName, std::move(ArgNames), Kind != 0, BinaryPrecedence); } @@ -690,10 +689,10 @@ public: PrototypeAST* getPrototypeAST(const std::string &Name); private: typedef std::map> PrototypeMap; - + LLVMContext &Context; std::unique_ptr TM; - + PrototypeMap Prototypes; }; @@ -716,7 +715,7 @@ public: M(new Module(GenerateUniqueName("jit_module_"), Session.getLLVMContext())), Builder(Session.getLLVMContext()) { - M->setDataLayout(*Session.getTarget().getDataLayout()); + M->setDataLayout(Session.getTarget().createDataLayout()); } SessionContext& getSession() { return Session; } @@ -747,7 +746,7 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, const std::string &VarName) { IRBuilder<> TmpB(&TheFunction->getEntryBlock(), TheFunction->getEntryBlock().begin()); - return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0, + return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr, VarName.c_str()); } @@ -759,7 +758,7 @@ Value *VariableExprAST::IRGen(IRGenContext &C) const { // Look this variable up in the function. Value *V = C.NamedValues[Name]; - if (V == 0) + if (!V) return ErrorP("Unknown variable name '" + Name + "'"); // Load the value. @@ -782,7 +781,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const { // Special case '=' because we don't want to emit the LHS as an expression. if (Op == '=') { // Assignment requires the LHS to be an identifier. - auto LHSVar = static_cast(*LHS); + auto &LHSVar = static_cast(*LHS); // Codegen the RHS. Value *Val = RHS->IRGen(C); if (!Val) return nullptr; @@ -794,11 +793,11 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const { } return ErrorP("Unknown variable name"); } - + Value *L = LHS->IRGen(C); Value *R = RHS->IRGen(C); if (!L || !R) return nullptr; - + switch (Op) { case '+': return C.getBuilder().CreateFAdd(L, R, "addtmp"); case '-': return C.getBuilder().CreateFSub(L, R, "subtmp"); @@ -811,7 +810,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const { "booltmp"); default: break; } - + // If it wasn't a builtin binary operator, it must be a user defined one. Emit // a call to it. std::string FnName = MakeLegalFunctionName(std::string("binary")+Op); @@ -819,7 +818,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const { Value *Ops[] = { L, R }; return C.getBuilder().CreateCall(F, Ops, "binop"); } - + return ErrorP("Unknown binary operator"); } @@ -835,7 +834,7 @@ Value *CallExprAST::IRGen(IRGenContext &C) const { ArgsV.push_back(Args[i]->IRGen(C)); if (!ArgsV.back()) return nullptr; } - + return C.getBuilder().CreateCall(CalleeF, ArgsV, "calltmp"); } @@ -845,49 +844,49 @@ Value *CallExprAST::IRGen(IRGenContext &C) const { Value *IfExprAST::IRGen(IRGenContext &C) const { Value *CondV = Cond->IRGen(C); if (!CondV) return nullptr; - + // Convert condition to a bool by comparing equal to 0.0. - ConstantFP *FPZero = + ConstantFP *FPZero = ConstantFP::get(C.getLLVMContext(), APFloat(0.0)); CondV = C.getBuilder().CreateFCmpONE(CondV, FPZero, "ifcond"); - + Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent(); - + // Create blocks for the then and else cases. Insert the 'then' block at the // end of the function. BasicBlock *ThenBB = BasicBlock::Create(C.getLLVMContext(), "then", TheFunction); BasicBlock *ElseBB = BasicBlock::Create(C.getLLVMContext(), "else"); BasicBlock *MergeBB = BasicBlock::Create(C.getLLVMContext(), "ifcont"); - + C.getBuilder().CreateCondBr(CondV, ThenBB, ElseBB); - + // Emit then value. C.getBuilder().SetInsertPoint(ThenBB); - + Value *ThenV = Then->IRGen(C); if (!ThenV) return nullptr; - + C.getBuilder().CreateBr(MergeBB); // Codegen of 'Then' can change the current block, update ThenBB for the PHI. ThenBB = C.getBuilder().GetInsertBlock(); - + // Emit else block. TheFunction->getBasicBlockList().push_back(ElseBB); C.getBuilder().SetInsertPoint(ElseBB); - + Value *ElseV = Else->IRGen(C); if (!ElseV) return nullptr; - + C.getBuilder().CreateBr(MergeBB); // Codegen of 'Else' can change the current block, update ElseBB for the PHI. ElseBB = C.getBuilder().GetInsertBlock(); - + // Emit merge block. TheFunction->getBasicBlockList().push_back(MergeBB); C.getBuilder().SetInsertPoint(MergeBB); PHINode *PN = C.getBuilder().CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp"); - + PN->addIncoming(ThenV, ThenBB); PN->addIncoming(ElseV, ElseBB); return PN; @@ -900,7 +899,7 @@ Value *ForExprAST::IRGen(IRGenContext &C) const { // start = startexpr // store start -> var // goto loop - // loop: + // loop: // ... // bodyexpr // ... @@ -913,40 +912,40 @@ Value *ForExprAST::IRGen(IRGenContext &C) const { // store nextvar -> var // br endcond, loop, endloop // outloop: - + Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent(); // Create an alloca for the variable in the entry block. AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); - + // Emit the start code first, without 'variable' in scope. Value *StartVal = Start->IRGen(C); if (!StartVal) return nullptr; - + // Store the value into the alloca. C.getBuilder().CreateStore(StartVal, Alloca); - + // Make the new basic block for the loop header, inserting after current // block. BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); - + // Insert an explicit fall through from the current block to the LoopBB. C.getBuilder().CreateBr(LoopBB); // Start insertion in LoopBB. C.getBuilder().SetInsertPoint(LoopBB); - + // Within the loop, the variable is defined equal to the PHI node. If it // shadows an existing variable, we have to restore it, so save it now. AllocaInst *OldVal = C.NamedValues[VarName]; C.NamedValues[VarName] = Alloca; - + // Emit the body of the loop. This, like any other expr, can change the // current BB. Note that we ignore the value computed by the body, but don't // allow an error. if (!Body->IRGen(C)) return nullptr; - + // Emit the step value. Value *StepVal; if (Step) { @@ -956,52 +955,51 @@ Value *ForExprAST::IRGen(IRGenContext &C) const { // If not specified, use 1.0. StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); } - + // Compute the end condition. Value *EndCond = End->IRGen(C); - if (EndCond == 0) return EndCond; - + if (!EndCond) return nullptr; + // Reload, increment, and restore the alloca. This handles the case where // the body of the loop mutates the variable. Value *CurVar = C.getBuilder().CreateLoad(Alloca, VarName.c_str()); Value *NextVar = C.getBuilder().CreateFAdd(CurVar, StepVal, "nextvar"); C.getBuilder().CreateStore(NextVar, Alloca); - + // Convert condition to a bool by comparing equal to 0.0. - EndCond = C.getBuilder().CreateFCmpONE(EndCond, + EndCond = C.getBuilder().CreateFCmpONE(EndCond, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "loopcond"); - + // Create the "after loop" block and insert it. BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); - + // Insert the conditional branch into the end of LoopEndBB. C.getBuilder().CreateCondBr(EndCond, LoopBB, AfterBB); - + // Any new code will be inserted in AfterBB. C.getBuilder().SetInsertPoint(AfterBB); - + // Restore the unshadowed variable. if (OldVal) C.NamedValues[VarName] = OldVal; else C.NamedValues.erase(VarName); - // for expr always returns 0.0. return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); } Value *VarExprAST::IRGen(IRGenContext &C) const { std::vector OldBindings; - + Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent(); // Register all variables and emit their initializer. for (unsigned i = 0, e = VarBindings.size(); i != e; ++i) { auto &VarName = VarBindings[i].first; auto &Init = VarBindings[i].second; - + // Emit the initializer before adding the variable to scope, this prevents // the initializer from referencing the variable itself, and permits stuff // like this: @@ -1013,22 +1011,22 @@ Value *VarExprAST::IRGen(IRGenContext &C) const { if (!InitVal) return nullptr; } else // If not specified, use 0.0. InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0)); - + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); C.getBuilder().CreateStore(InitVal, Alloca); // Remember the old variable binding so that we can restore the binding when // we unrecurse. OldBindings.push_back(C.NamedValues[VarName]); - + // Remember this binding. C.NamedValues[VarName] = Alloca; } - + // Codegen the body, now that all vars are in scope. Value *BodyVal = Body->IRGen(C); if (!BodyVal) return nullptr; - + // Pop all our variables from scope. for (unsigned i = 0, e = VarBindings.size(); i != e; ++i) C.NamedValues[VarBindings[i].first] = OldBindings[i]; @@ -1041,7 +1039,7 @@ Function *PrototypeAST::IRGen(IRGenContext &C) const { std::string FnName = MakeLegalFunctionName(Name); // Make the function type: double(double,double) etc. - std::vector Doubles(Args.size(), + std::vector Doubles(Args.size(), Type::getDoubleTy(getGlobalContext())); FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false); @@ -1054,26 +1052,26 @@ Function *PrototypeAST::IRGen(IRGenContext &C) const { // Delete the one we just made and get the existing one. F->eraseFromParent(); F = C.getM().getFunction(Name); - + // If F already has a body, reject this. if (!F->empty()) { ErrorP("redefinition of function"); return nullptr; } - + // If F took a different number of args, reject. if (F->arg_size() != Args.size()) { ErrorP("redefinition of function with different # args"); return nullptr; } } - + // Set names for all arguments. unsigned Idx = 0; for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); ++AI, ++Idx) AI->setName(Args[Idx]); - + return F; } @@ -1086,7 +1084,7 @@ void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) { AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]); // Store the initial value into the alloca. - C.getBuilder().CreateStore(AI, Alloca); + C.getBuilder().CreateStore(&*AI, Alloca); // Add arguments to variable symbol table. C.NamedValues[Args[Idx]] = Alloca; @@ -1095,19 +1093,19 @@ void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) { Function *FunctionAST::IRGen(IRGenContext &C) const { C.NamedValues.clear(); - + Function *TheFunction = Proto->IRGen(C); if (!TheFunction) return nullptr; - + // If this is an operator, install it. if (Proto->isBinaryOp()) BinopPrecedence[Proto->getOperatorName()] = Proto->Precedence; - + // Create a new basic block to start insertion into. BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); C.getBuilder().SetInsertPoint(BB); - + // Add all arguments to the symbol table and create their allocas. Proto->CreateArgumentAllocas(TheFunction, C); @@ -1120,7 +1118,7 @@ Function *FunctionAST::IRGen(IRGenContext &C) const { return TheFunction; } - + // Error reading body, remove function. TheFunction->eraseFromParent(); @@ -1170,7 +1168,7 @@ public: { raw_string_ostream MangledNameStream(MangledName); Mangler::getNameWithPrefix(MangledNameStream, Name, - *Session.getTarget().getDataLayout()); + Session.getTarget().createDataLayout()); } return MangledName; } @@ -1223,7 +1221,7 @@ private: RuntimeDyld::SymbolInfo searchFunctionASTs(const std::string &Name) { auto DefI = FunctionDefs.find(Name); if (DefI == FunctionDefs.end()) - return 0; + return nullptr; // Take the FunctionAST out of the map. auto FnAST = std::move(DefI->second); @@ -1277,7 +1275,7 @@ static void HandleTopLevelExpression(SessionContext &S, KaleidoscopeJIT &J) { // Get the address of the JIT'd function in memory. auto ExprSymbol = J.findUnmangledSymbol("__anon_expr"); - + // Cast it to the right type (takes no arguments, returns a double) so we // can call it as a native function. double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); @@ -1320,20 +1318,20 @@ static void MainLoop() { //===----------------------------------------------------------------------===// /// putchard - putchar that takes a double and returns 0. -extern "C" +extern "C" double putchard(double X) { putchar((char)X); return 0; } /// printd - printf that takes a double prints it as "%f\n", returning 0. -extern "C" +extern "C" double printd(double X) { printf("%f", X); return 0; } -extern "C" +extern "C" double printlf() { printf("\n"); return 0; @@ -1370,4 +1368,3 @@ int main() { return 0; } - diff --git a/examples/Kaleidoscope/include/KaleidoscopeJIT.h b/examples/Kaleidoscope/include/KaleidoscopeJIT.h new file mode 100644 index 000000000000..0c825cc94c0e --- /dev/null +++ b/examples/Kaleidoscope/include/KaleidoscopeJIT.h @@ -0,0 +1,114 @@ +//===----- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Contains a simple JIT definition for use in the kaleidoscope tutorials. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H +#define LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H + +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/CompileUtils.h" +#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" +#include "llvm/ExecutionEngine/Orc/LambdaResolver.h" +#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" +#include "llvm/IR/Mangler.h" +#include "llvm/Support/DynamicLibrary.h" + +namespace llvm { +namespace orc { + +class KaleidoscopeJIT { +public: + typedef ObjectLinkingLayer<> ObjLayerT; + typedef IRCompileLayer CompileLayerT; + typedef CompileLayerT::ModuleSetHandleT ModuleHandleT; + + KaleidoscopeJIT() + : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), + CompileLayer(ObjectLayer, SimpleCompiler(*TM)) { + llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr); + } + + TargetMachine &getTargetMachine() { return *TM; } + + ModuleHandleT addModule(std::unique_ptr M) { + // We need a memory manager to allocate memory and resolve symbols for this + // new module. Create one that resolves symbols by looking back into the + // JIT. + auto Resolver = createLambdaResolver( + [&](const std::string &Name) { + if (auto Sym = findMangledSymbol(Name)) + return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags()); + return RuntimeDyld::SymbolInfo(nullptr); + }, + [](const std::string &S) { return nullptr; }); + auto H = CompileLayer.addModuleSet(singletonSet(std::move(M)), + make_unique(), + std::move(Resolver)); + + ModuleHandles.push_back(H); + return H; + } + + void removeModule(ModuleHandleT H) { + ModuleHandles.erase( + std::find(ModuleHandles.begin(), ModuleHandles.end(), H)); + CompileLayer.removeModuleSet(H); + } + + JITSymbol findSymbol(const std::string Name) { + return findMangledSymbol(mangle(Name)); + } + +private: + + std::string mangle(const std::string &Name) { + std::string MangledName; + { + raw_string_ostream MangledNameStream(MangledName); + Mangler::getNameWithPrefix(MangledNameStream, Name, DL); + } + return MangledName; + } + + template static std::vector singletonSet(T t) { + std::vector Vec; + Vec.push_back(std::move(t)); + return Vec; + } + + JITSymbol findMangledSymbol(const std::string &Name) { + // Search modules in reverse order: from last added to first added. + // This is the opposite of the usual search order for dlsym, but makes more + // sense in a REPL where we want to bind to the newest available definition. + for (auto H : make_range(ModuleHandles.rbegin(), ModuleHandles.rend())) + if (auto Sym = CompileLayer.findSymbolIn(H, Name, true)) + return Sym; + + // If we can't find the symbol in the JIT, try looking in the host process. + if (auto SymAddr = RTDyldMemoryManager::getSymbolAddressInProcess(Name)) + return JITSymbol(SymAddr, JITSymbolFlags::Exported); + + return nullptr; + } + + std::unique_ptr TM; + const DataLayout DL; + ObjLayerT ObjectLayer; + CompileLayerT CompileLayer; + std::vector ModuleHandles; +}; + +} // End namespace orc. +} // End namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H diff --git a/examples/ParallelJIT/ParallelJIT.cpp b/examples/ParallelJIT/ParallelJIT.cpp index b2c53a9bb10e..3c485d4c964d 100644 --- a/examples/ParallelJIT/ParallelJIT.cpp +++ b/examples/ParallelJIT/ParallelJIT.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/TargetSelect.h" #include #include + using namespace llvm; static Function* createAdd1(Module *M) { @@ -38,7 +39,7 @@ static Function* createAdd1(Module *M) { cast(M->getOrInsertFunction("add1", Type::getInt32Ty(M->getContext()), Type::getInt32Ty(M->getContext()), - (Type *)0)); + nullptr)); // Add a basic block to the function. As before, it automatically inserts // because of the last argument. @@ -49,7 +50,7 @@ static Function* createAdd1(Module *M) { // Get pointers to the integer argument of the add1 function... assert(Add1F->arg_begin() != Add1F->arg_end()); // Make sure there's an arg - Argument *ArgX = Add1F->arg_begin(); // Get the arg + Argument *ArgX = &*Add1F->arg_begin(); // Get the arg ArgX->setName("AnArg"); // Give it a nice symbolic name for fun. // Create the add instruction, inserting it into the end of BB. @@ -69,7 +70,7 @@ static Function *CreateFibFunction(Module *M) { cast(M->getOrInsertFunction("fib", Type::getInt32Ty(M->getContext()), Type::getInt32Ty(M->getContext()), - (Type *)0)); + nullptr)); // Add a basic block to the function. BasicBlock *BB = BasicBlock::Create(M->getContext(), "EntryBlock", FibF); @@ -79,7 +80,7 @@ static Function *CreateFibFunction(Module *M) { Value *Two = ConstantInt::get(Type::getInt32Ty(M->getContext()), 2); // Get pointer to the integer argument of the add1 function... - Argument *ArgX = FibF->arg_begin(); // Get the arg. + Argument *ArgX = &*FibF->arg_begin(); // Get the arg. ArgX->setName("AnArg"); // Give it a nice symbolic name for fun. // Create the true_block. @@ -129,10 +130,10 @@ public: n = 0; waitFor = 0; - int result = pthread_cond_init( &condition, NULL ); + int result = pthread_cond_init( &condition, nullptr ); assert( result == 0 ); - result = pthread_mutex_init( &mutex, NULL ); + result = pthread_mutex_init( &mutex, nullptr ); assert( result == 0 ); } @@ -261,21 +262,21 @@ int main() { struct threadParams fib2 = { EE, fibF, 42 }; pthread_t add1Thread; - int result = pthread_create( &add1Thread, NULL, callFunc, &add1 ); + int result = pthread_create( &add1Thread, nullptr, callFunc, &add1 ); if ( result != 0 ) { std::cerr << "Could not create thread" << std::endl; return 1; } pthread_t fibThread1; - result = pthread_create( &fibThread1, NULL, callFunc, &fib1 ); + result = pthread_create( &fibThread1, nullptr, callFunc, &fib1 ); if ( result != 0 ) { std::cerr << "Could not create thread" << std::endl; return 1; } pthread_t fibThread2; - result = pthread_create( &fibThread2, NULL, callFunc, &fib2 ); + result = pthread_create( &fibThread2, nullptr, callFunc, &fib2 ); if ( result != 0 ) { std::cerr << "Could not create thread" << std::endl; return 1; diff --git a/include/llvm-c/Analysis.h b/include/llvm-c/Analysis.h index f0bdddc50ab7..36dcb89e0e08 100644 --- a/include/llvm-c/Analysis.h +++ b/include/llvm-c/Analysis.h @@ -19,7 +19,7 @@ #ifndef LLVM_C_ANALYSIS_H #define LLVM_C_ANALYSIS_H -#include "llvm-c/Core.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { diff --git a/include/llvm-c/BitReader.h b/include/llvm-c/BitReader.h index f3b388bc4fb4..d1fc302767ba 100644 --- a/include/llvm-c/BitReader.h +++ b/include/llvm-c/BitReader.h @@ -19,7 +19,7 @@ #ifndef LLVM_C_BITREADER_H #define LLVM_C_BITREADER_H -#include "llvm-c/Core.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { @@ -34,36 +34,45 @@ extern "C" { /* Builds a module from the bitcode in the specified memory buffer, returning a reference to the module via the OutModule parameter. Returns 0 on success. - Optionally returns a human-readable error message via OutMessage. */ -LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, - LLVMModuleRef *OutModule, char **OutMessage); + Optionally returns a human-readable error message via OutMessage. + This is deprecated. Use LLVMParseBitcode2. */ +LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutModule, + char **OutMessage); + +/* Builds a module from the bitcode in the specified memory buffer, returning a + reference to the module via the OutModule parameter. Returns 0 on success. */ +LLVMBool LLVMParseBitcode2(LLVMMemoryBufferRef MemBuf, + LLVMModuleRef *OutModule); + +/* This is deprecated. Use LLVMParseBitcodeInContext2. */ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef, LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutModule, char **OutMessage); +LLVMBool LLVMParseBitcodeInContext2(LLVMContextRef ContextRef, + LLVMMemoryBufferRef MemBuf, + LLVMModuleRef *OutModule); + /** Reads a module from the specified path, returning via the OutMP parameter a module provider which performs lazy deserialization. Returns 0 on success. - Optionally returns a human-readable error message via OutMessage. */ + Optionally returns a human-readable error message via OutMessage. + This is deprecated. Use LLVMGetBitcodeModuleInContext2. */ LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef, LLVMMemoryBufferRef MemBuf, - LLVMModuleRef *OutM, - char **OutMessage); + LLVMModuleRef *OutM, char **OutMessage); +/** Reads a module from the specified path, returning via the OutMP parameter a + * module provider which performs lazy deserialization. Returns 0 on success. */ +LLVMBool LLVMGetBitcodeModuleInContext2(LLVMContextRef ContextRef, + LLVMMemoryBufferRef MemBuf, + LLVMModuleRef *OutM); + +/* This is deprecated. Use LLVMGetBitcodeModule2. */ LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM, char **OutMessage); - -/** Deprecated: Use LLVMGetBitcodeModuleInContext instead. */ -LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef, - LLVMMemoryBufferRef MemBuf, - LLVMModuleProviderRef *OutMP, - char **OutMessage); - -/** Deprecated: Use LLVMGetBitcodeModule instead. */ -LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, - LLVMModuleProviderRef *OutMP, - char **OutMessage); +LLVMBool LLVMGetBitcodeModule2(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM); /** * @} diff --git a/include/llvm-c/BitWriter.h b/include/llvm-c/BitWriter.h index f25ad3a445f5..797d03179ab3 100644 --- a/include/llvm-c/BitWriter.h +++ b/include/llvm-c/BitWriter.h @@ -19,7 +19,7 @@ #ifndef LLVM_C_BITWRITER_H #define LLVM_C_BITWRITER_H -#include "llvm-c/Core.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index 9dbcbfea387f..c8fda15c5ed6 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -15,7 +15,8 @@ #ifndef LLVM_C_CORE_H #define LLVM_C_CORE_H -#include "llvm-c/Support.h" +#include "llvm-c/ErrorHandling.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { @@ -40,15 +41,6 @@ extern "C" { * the LLVM intermediate representation as well as other related types * and utilities. * - * LLVM uses a polymorphic type hierarchy which C cannot represent, therefore - * parameters must be passed as base types. Despite the declared types, most - * of the functions provided operate only on branches of the type hierarchy. - * The declared parameter names are descriptive and specify which type is - * required. Additionally, each type hierarchy is documented along with the - * functions that operate upon it. For more detail, refer to LLVM's C++ code. - * If in doubt, refer to Core.cpp, which performs parameter downcasts in the - * form unwrap(Param). - * * Many exotic languages can interoperate with C code but have a harder time * with C++ due to name mangling. So in addition to C, this interface enables * tools written in such languages. @@ -62,74 +54,6 @@ extern "C" { * @{ */ -/* Opaque types. */ - -/** - * The top-level container for all LLVM global data. See the LLVMContext class. - */ -typedef struct LLVMOpaqueContext *LLVMContextRef; - -/** - * The top-level container for all other LLVM Intermediate Representation (IR) - * objects. - * - * @see llvm::Module - */ -typedef struct LLVMOpaqueModule *LLVMModuleRef; - -/** - * Each value in the LLVM IR has a type, an LLVMTypeRef. - * - * @see llvm::Type - */ -typedef struct LLVMOpaqueType *LLVMTypeRef; - -/** - * Represents an individual value in LLVM IR. - * - * This models llvm::Value. - */ -typedef struct LLVMOpaqueValue *LLVMValueRef; - -/** - * Represents a basic block of instructions in LLVM IR. - * - * This models llvm::BasicBlock. - */ -typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef; - -/** - * Represents an LLVM basic block builder. - * - * This models llvm::IRBuilder. - */ -typedef struct LLVMOpaqueBuilder *LLVMBuilderRef; - -/** - * Interface used to provide a module to JIT or interpreter. - * This is now just a synonym for llvm::Module, but we have to keep using the - * different type to keep binary compatibility. - */ -typedef struct LLVMOpaqueModuleProvider *LLVMModuleProviderRef; - -/** @see llvm::PassManagerBase */ -typedef struct LLVMOpaquePassManager *LLVMPassManagerRef; - -/** @see llvm::PassRegistry */ -typedef struct LLVMOpaquePassRegistry *LLVMPassRegistryRef; - -/** - * Used to get the users and usees of a Value. - * - * @see llvm::Use */ -typedef struct LLVMOpaqueUse *LLVMUseRef; - - -/** - * @see llvm::DiagnosticInfo - */ -typedef struct LLVMOpaqueDiagnosticInfo *LLVMDiagnosticInfoRef; - typedef enum { LLVMZExtAttribute = 1<<0, LLVMSExtAttribute = 1<<1, @@ -248,8 +172,12 @@ typedef enum { /* Exception Handling Operators */ LLVMResume = 58, - LLVMLandingPad = 59 - + LLVMLandingPad = 59, + LLVMCleanupRet = 61, + LLVMCatchRet = 62, + LLVMCatchPad = 63, + LLVMCleanupPad = 64, + LLVMCatchSwitch = 65 } LLVMOpcode; typedef enum { @@ -268,7 +196,8 @@ typedef enum { LLVMPointerTypeKind, /**< Pointers */ LLVMVectorTypeKind, /**< SIMD 'packed' format, or other vector type */ LLVMMetadataTypeKind, /**< Metadata */ - LLVMX86_MMXTypeKind /**< X86 MMX */ + LLVMX86_MMXTypeKind, /**< X86 MMX */ + LLVMTokenTypeKind /**< Tokens */ } LLVMTypeKind; typedef enum { @@ -428,36 +357,11 @@ void LLVMInitializeCore(LLVMPassRegistryRef R); @see ManagedStatic */ void LLVMShutdown(void); - /*===-- Error handling ----------------------------------------------------===*/ char *LLVMCreateMessage(const char *Message); void LLVMDisposeMessage(char *Message); -typedef void (*LLVMFatalErrorHandler)(const char *Reason); - -/** - * Install a fatal error handler. By default, if LLVM detects a fatal error, it - * will call exit(1). This may not be appropriate in many contexts. For example, - * doing exit(1) will bypass many crash reporting/tracing system tools. This - * function allows you to install a callback that will be invoked prior to the - * call to exit(1). - */ -void LLVMInstallFatalErrorHandler(LLVMFatalErrorHandler Handler); - -/** - * Reset the fatal error handler. This resets LLVM's fatal error handling - * behavior to the default. - */ -void LLVMResetFatalErrorHandler(void); - -/** - * Enable LLVM's built-in stack trace code. This intercepts the OS's crash - * signals and prints which component of LLVM you were in at the time if the - * crash. - */ -void LLVMEnablePrettyStackTrace(void); - /** * @defgroup LLVMCCoreContext Contexts * @@ -808,6 +712,7 @@ LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C); LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C); LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C); LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C); +LLVMTypeRef LLVMInt128TypeInContext(LLVMContextRef C); LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits); /** @@ -819,6 +724,7 @@ LLVMTypeRef LLVMInt8Type(void); LLVMTypeRef LLVMInt16Type(void); LLVMTypeRef LLVMInt32Type(void); LLVMTypeRef LLVMInt64Type(void); +LLVMTypeRef LLVMInt128Type(void); LLVMTypeRef LLVMIntType(unsigned NumBits); unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy); @@ -1022,7 +928,6 @@ LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy); * @} */ - /** * @defgroup LLVMCCoreTypeSequential Sequential Types * @@ -1178,6 +1083,7 @@ LLVMTypeRef LLVMX86MMXType(void); macro(ConstantInt) \ macro(ConstantPointerNull) \ macro(ConstantStruct) \ + macro(ConstantTokenNone) \ macro(ConstantVector) \ macro(GlobalValue) \ macro(GlobalAlias) \ @@ -1215,6 +1121,11 @@ LLVMTypeRef LLVMX86MMXType(void); macro(SwitchInst) \ macro(UnreachableInst) \ macro(ResumeInst) \ + macro(CleanupReturnInst) \ + macro(CatchReturnInst) \ + macro(FuncletPadInst) \ + macro(CatchPadInst) \ + macro(CleanupPadInst) \ macro(UnaryInstruction) \ macro(AllocaInst) \ macro(CastInst) \ @@ -1950,7 +1861,7 @@ void LLVMSetGC(LLVMValueRef Fn, const char *Name); void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA); /** - * Add a target-dependent attribute to a fuction + * Add a target-dependent attribute to a function * @see llvm::AttrBuilder::addAttribute() */ void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A, @@ -2427,7 +2338,7 @@ void LLVMInstructionEraseFromParent(LLVMValueRef Inst); * * @see llvm::Instruction::getOpCode() */ -LLVMOpcode LLVMGetInstructionOpcode(LLVMValueRef Inst); +LLVMOpcode LLVMGetInstructionOpcode(LLVMValueRef Inst); /** * Obtain the predicate of an instruction. @@ -2780,6 +2691,8 @@ LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str, const char *Name); LLVMBool LLVMGetVolatile(LLVMValueRef MemoryAccessInst); void LLVMSetVolatile(LLVMValueRef MemoryAccessInst, LLVMBool IsVolatile); +LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemoryAccessInst); +void LLVMSetOrdering(LLVMValueRef MemoryAccessInst, LLVMAtomicOrdering Ordering); /* Casts */ LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef, LLVMValueRef Val, @@ -3020,6 +2933,6 @@ LLVMBool LLVMIsMultithreaded(void); #ifdef __cplusplus } -#endif /* !defined(__cplusplus) */ +#endif -#endif /* !defined(LLVM_C_CORE_H) */ +#endif /* LLVM_C_CORE_H */ diff --git a/include/llvm-c/ErrorHandling.h b/include/llvm-c/ErrorHandling.h new file mode 100644 index 000000000000..5a80bc5e654f --- /dev/null +++ b/include/llvm-c/ErrorHandling.h @@ -0,0 +1,51 @@ +/*===-- llvm-c/ErrorHandling.h - Error Handling C Interface -------*- C -*-===*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This file defines the C interface to LLVM's error handling mechanism. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_C_ERROR_HANDLING_H +#define LLVM_C_ERROR_HANDLING_H + +#include "llvm-c/Types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void (*LLVMFatalErrorHandler)(const char *Reason); + +/** + * Install a fatal error handler. By default, if LLVM detects a fatal error, it + * will call exit(1). This may not be appropriate in many contexts. For example, + * doing exit(1) will bypass many crash reporting/tracing system tools. This + * function allows you to install a callback that will be invoked prior to the + * call to exit(1). + */ +void LLVMInstallFatalErrorHandler(LLVMFatalErrorHandler Handler); + +/** + * Reset the fatal error handler. This resets LLVM's fatal error handling + * behavior to the default. + */ +void LLVMResetFatalErrorHandler(void); + +/** + * Enable LLVM's built-in stack trace code. This intercepts the OS's crash + * signals and prints which component of LLVM you were in at the time if the + * crash. + */ +void LLVMEnablePrettyStackTrace(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h index eb3ecabfa8a8..b72a91a8b137 100644 --- a/include/llvm-c/ExecutionEngine.h +++ b/include/llvm-c/ExecutionEngine.h @@ -19,7 +19,7 @@ #ifndef LLVM_C_EXECUTIONENGINE_H #define LLVM_C_EXECUTIONENGINE_H -#include "llvm-c/Core.h" +#include "llvm-c/Types.h" #include "llvm-c/Target.h" #include "llvm-c/TargetMachine.h" @@ -110,22 +110,6 @@ LLVMBool LLVMCreateMCJITCompilerForModule( struct LLVMMCJITCompilerOptions *Options, size_t SizeOfOptions, char **OutError); -/** Deprecated: Use LLVMCreateExecutionEngineForModule instead. */ -LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE, - LLVMModuleProviderRef MP, - char **OutError); - -/** Deprecated: Use LLVMCreateInterpreterForModule instead. */ -LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp, - LLVMModuleProviderRef MP, - char **OutError); - -/** Deprecated: Use LLVMCreateJITCompilerForModule instead. */ -LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT, - LLVMModuleProviderRef MP, - unsigned OptLevel, - char **OutError); - void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE); void LLVMRunStaticConstructors(LLVMExecutionEngineRef EE); @@ -144,17 +128,9 @@ void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F); void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M); -/** Deprecated: Use LLVMAddModule instead. */ -void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP); - LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M, LLVMModuleRef *OutMod, char **OutError); -/** Deprecated: Use LLVMRemoveModule instead. */ -LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE, - LLVMModuleProviderRef MP, - LLVMModuleRef *OutMod, char **OutError); - LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name, LLVMValueRef *OutFn); diff --git a/include/llvm-c/IRReader.h b/include/llvm-c/IRReader.h index 5001afb7ed7d..5b58d9921fb0 100644 --- a/include/llvm-c/IRReader.h +++ b/include/llvm-c/IRReader.h @@ -14,7 +14,7 @@ #ifndef LLVM_C_IRREADER_H #define LLVM_C_IRREADER_H -#include "llvm-c/Core.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { diff --git a/include/llvm-c/Initialization.h b/include/llvm-c/Initialization.h index 44194f8ea311..90c8396f7ad3 100644 --- a/include/llvm-c/Initialization.h +++ b/include/llvm-c/Initialization.h @@ -16,7 +16,7 @@ #ifndef LLVM_C_INITIALIZATION_H #define LLVM_C_INITIALIZATION_H -#include "llvm-c/Core.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { diff --git a/include/llvm-c/Linker.h b/include/llvm-c/Linker.h index 9f98a3342d0b..4d9bd46a259b 100644 --- a/include/llvm-c/Linker.h +++ b/include/llvm-c/Linker.h @@ -14,7 +14,7 @@ #ifndef LLVM_C_LINKER_H #define LLVM_C_LINKER_H -#include "llvm-c/Core.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { @@ -27,17 +27,27 @@ typedef enum { should not be used. */ } LLVMLinkerMode; -/* Links the source module into the destination module, taking ownership - * of the source module away from the caller. Optionally returns a - * human-readable description of any errors that occurred in linking. - * OutMessage must be disposed with LLVMDisposeMessage. The return value - * is true if an error occurred, false otherwise. +/* Links the source module into the destination module. The source module is + * damaged. The only thing that can be done is destroy it. Optionally returns a + * human-readable description of any errors that occurred in linking. OutMessage + * must be disposed with LLVMDisposeMessage. The return value is true if an + * error occurred, false otherwise. * * Note that the linker mode parameter \p Unused is no longer used, and has - * no effect. */ + * no effect. + * + * This function is deprecated. Use LLVMLinkModules2 instead. + */ LLVMBool LLVMLinkModules(LLVMModuleRef Dest, LLVMModuleRef Src, LLVMLinkerMode Unused, char **OutMessage); +/* Links the source module into the destination module. The source module is + * destroyed. + * The return value is true if an error occurred, false otherwise. + * Use the diagnostic handler to get any diagnostic message. +*/ +LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src); + #ifdef __cplusplus } #endif diff --git a/include/llvm-c/Object.h b/include/llvm-c/Object.h index 9cab5c426c45..a2980e89fe3d 100644 --- a/include/llvm-c/Object.h +++ b/include/llvm-c/Object.h @@ -19,7 +19,7 @@ #ifndef LLVM_C_OBJECT_H #define LLVM_C_OBJECT_H -#include "llvm-c/Core.h" +#include "llvm-c/Types.h" #include "llvm/Config/llvm-config.h" #ifdef __cplusplus diff --git a/include/llvm-c/OrcBindings.h b/include/llvm-c/OrcBindings.h new file mode 100644 index 000000000000..f6aff916999a --- /dev/null +++ b/include/llvm-c/OrcBindings.h @@ -0,0 +1,134 @@ +/*===----------- llvm-c/OrcBindings.h - Orc Lib C Iface ---------*- C++ -*-===*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header declares the C interface to libLLVMOrcJIT.a, which implements *| +|* JIT compilation of LLVM IR. *| +|* *| +|* Many exotic languages can interoperate with C code but have a harder time *| +|* with C++ due to name mangling. So in addition to C, this interface enables *| +|* tools written in such languages. *| +|* *| +|* Note: This interface is experimental. It is *NOT* stable, and may be *| +|* changed without warning. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_C_ORCBINDINGS_H +#define LLVM_C_ORCBINDINGS_H + +#include "llvm-c/Object.h" +#include "llvm-c/Support.h" +#include "llvm-c/TargetMachine.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct LLVMOrcOpaqueJITStack *LLVMOrcJITStackRef; +typedef uint32_t LLVMOrcModuleHandle; +typedef uint64_t LLVMOrcTargetAddress; +typedef uint64_t (*LLVMOrcSymbolResolverFn)(const char *Name, + void *LookupCtx); +typedef uint64_t (*LLVMOrcLazyCompileCallbackFn)(LLVMOrcJITStackRef JITStack, + void *CallbackCtx); + +/** + * Create an ORC JIT stack. + * + * The client owns the resulting stack, and must call OrcDisposeInstance(...) + * to destroy it and free its memory. The JIT stack will take ownership of the + * TargetMachine, which will be destroyed when the stack is destroyed. The + * client should not attempt to dispose of the Target Machine, or it will result + * in a double-free. + */ +LLVMOrcJITStackRef LLVMOrcCreateInstance(LLVMTargetMachineRef TM); + +/** + * Mangle the given symbol. + * Memory will be allocated for MangledSymbol to hold the result. The client + */ +void LLVMOrcGetMangledSymbol(LLVMOrcJITStackRef JITStack, char **MangledSymbol, + const char *Symbol); + +/** + * Dispose of a mangled symbol. + */ + +void LLVMOrcDisposeMangledSymbol(char *MangledSymbol); + +/** + * Create a lazy compile callback. + */ +LLVMOrcTargetAddress +LLVMOrcCreateLazyCompileCallback(LLVMOrcJITStackRef JITStack, + LLVMOrcLazyCompileCallbackFn Callback, + void *CallbackCtx); + +/** + * Create a named indirect call stub. + */ +void LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack, + const char *StubName, + LLVMOrcTargetAddress InitAddr); + +/** + * Set the pointer for the given indirect stub. + */ +void LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack, + const char *StubName, + LLVMOrcTargetAddress NewAddr); + +/** + * Add module to be eagerly compiled. + */ +LLVMOrcModuleHandle +LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod, + LLVMOrcSymbolResolverFn SymbolResolver, + void *SymbolResolverCtx); + +/** + * Add module to be lazily compiled one function at a time. + */ +LLVMOrcModuleHandle +LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod, + LLVMOrcSymbolResolverFn SymbolResolver, + void *SymbolResolverCtx); + +/** + * Add an object file. + */ +LLVMOrcModuleHandle +LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack, LLVMObjectFileRef Obj, + LLVMOrcSymbolResolverFn SymbolResolver, + void *SymbolResolverCtx); + +/** + * Remove a module set from the JIT. + * + * This works for all modules that can be added via OrcAdd*, including object + * files. + */ +void LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, LLVMOrcModuleHandle H); + +/** + * Get symbol address from JIT instance. + */ +LLVMOrcTargetAddress LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack, + const char *SymbolName); + +/** + * Dispose of an ORC JIT stack. + */ +void LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack); + +#ifdef __cplusplus +} +#endif /* extern "C" */ + +#endif /* LLVM_C_ORCBINDINGS_H */ diff --git a/include/llvm-c/Support.h b/include/llvm-c/Support.h index eca3b7a42037..735d1fbc78cc 100644 --- a/include/llvm-c/Support.h +++ b/include/llvm-c/Support.h @@ -15,30 +15,12 @@ #define LLVM_C_SUPPORT_H #include "llvm/Support/DataTypes.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { #endif -/** - * @defgroup LLVMCSupportTypes Types and Enumerations - * - * @{ - */ - -typedef int LLVMBool; - -/** - * Used to pass regions of memory through LLVM interfaces. - * - * @see llvm::MemoryBuffer - */ -typedef struct LLVMOpaqueMemoryBuffer *LLVMMemoryBufferRef; - -/** - * @} - */ - /** * This function permanently loads the dynamic library at the given path. * It is safe to call this function multiple times for the same library. diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h index b465b4b88db5..24d2cb4c9598 100644 --- a/include/llvm-c/Target.h +++ b/include/llvm-c/Target.h @@ -19,7 +19,7 @@ #ifndef LLVM_C_TARGET_H #define LLVM_C_TARGET_H -#include "llvm-c/Core.h" +#include "llvm-c/Types.h" #include "llvm/Config/llvm-config.h" #if defined(_MSC_VER) && !defined(inline) diff --git a/include/llvm-c/TargetMachine.h b/include/llvm-c/TargetMachine.h index 8cf1f43cb3c5..303708093653 100644 --- a/include/llvm-c/TargetMachine.h +++ b/include/llvm-c/TargetMachine.h @@ -19,7 +19,7 @@ #ifndef LLVM_C_TARGETMACHINE_H #define LLVM_C_TARGETMACHINE_H -#include "llvm-c/Core.h" +#include "llvm-c/Types.h" #include "llvm-c/Target.h" #ifdef __cplusplus @@ -115,7 +115,7 @@ char *LLVMGetTargetMachineCPU(LLVMTargetMachineRef T); LLVMDisposeMessage. */ char *LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T); -/** Deprecated: use LLVMGetDataLayout(LLVMModuleRef M) instead. */ +/** Returns the llvm::DataLayout used for this llvm:TargetMachine. */ LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T); /** Set the target machine's ASM verbosity. */ diff --git a/include/llvm-c/Transforms/IPO.h b/include/llvm-c/Transforms/IPO.h index 448078012eac..3af7425dd268 100644 --- a/include/llvm-c/Transforms/IPO.h +++ b/include/llvm-c/Transforms/IPO.h @@ -15,7 +15,7 @@ #ifndef LLVM_C_TRANSFORMS_IPO_H #define LLVM_C_TRANSFORMS_IPO_H -#include "llvm-c/Core.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { diff --git a/include/llvm-c/Transforms/PassManagerBuilder.h b/include/llvm-c/Transforms/PassManagerBuilder.h index 3d7a9d677eab..69786b341ab4 100644 --- a/include/llvm-c/Transforms/PassManagerBuilder.h +++ b/include/llvm-c/Transforms/PassManagerBuilder.h @@ -14,7 +14,7 @@ #ifndef LLVM_C_TRANSFORMS_PASSMANAGERBUILDER_H #define LLVM_C_TRANSFORMS_PASSMANAGERBUILDER_H -#include "llvm-c/Core.h" +#include "llvm-c/Types.h" typedef struct LLVMOpaquePassManagerBuilder *LLVMPassManagerBuilderRef; diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h index 48c19a6e3117..c989ee86b9f7 100644 --- a/include/llvm-c/Transforms/Scalar.h +++ b/include/llvm-c/Transforms/Scalar.h @@ -19,7 +19,7 @@ #ifndef LLVM_C_TRANSFORMS_SCALAR_H #define LLVM_C_TRANSFORMS_SCALAR_H -#include "llvm-c/Core.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { diff --git a/include/llvm-c/Transforms/Vectorize.h b/include/llvm-c/Transforms/Vectorize.h index c9102da60297..a82ef49cb167 100644 --- a/include/llvm-c/Transforms/Vectorize.h +++ b/include/llvm-c/Transforms/Vectorize.h @@ -20,7 +20,7 @@ #ifndef LLVM_C_TRANSFORMS_VECTORIZE_H #define LLVM_C_TRANSFORMS_VECTORIZE_H -#include "llvm-c/Core.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { @@ -51,4 +51,3 @@ void LLVMAddSLPVectorizePass(LLVMPassManagerRef PM); #endif /* defined(__cplusplus) */ #endif - diff --git a/include/llvm-c/Types.h b/include/llvm-c/Types.h new file mode 100644 index 000000000000..19029584efcc --- /dev/null +++ b/include/llvm-c/Types.h @@ -0,0 +1,124 @@ +/*===-- llvm-c/Support.h - C Interface Types declarations ---------*- C -*-===*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This file defines types used by the the C interface to LLVM. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_C_TYPES_H +#define LLVM_C_TYPES_H + +#include "llvm/Support/DataTypes.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @defgroup LLVMCSupportTypes Types and Enumerations + * + * @{ + */ + +typedef int LLVMBool; + +/* Opaque types. */ + +/** + * LLVM uses a polymorphic type hierarchy which C cannot represent, therefore + * parameters must be passed as base types. Despite the declared types, most + * of the functions provided operate only on branches of the type hierarchy. + * The declared parameter names are descriptive and specify which type is + * required. Additionally, each type hierarchy is documented along with the + * functions that operate upon it. For more detail, refer to LLVM's C++ code. + * If in doubt, refer to Core.cpp, which performs parameter downcasts in the + * form unwrap(Param). + */ + +/** + * Used to pass regions of memory through LLVM interfaces. + * + * @see llvm::MemoryBuffer + */ +typedef struct LLVMOpaqueMemoryBuffer *LLVMMemoryBufferRef; + +/** + * The top-level container for all LLVM global data. See the LLVMContext class. + */ +typedef struct LLVMOpaqueContext *LLVMContextRef; + +/** + * The top-level container for all other LLVM Intermediate Representation (IR) + * objects. + * + * @see llvm::Module + */ +typedef struct LLVMOpaqueModule *LLVMModuleRef; + +/** + * Each value in the LLVM IR has a type, an LLVMTypeRef. + * + * @see llvm::Type + */ +typedef struct LLVMOpaqueType *LLVMTypeRef; + +/** + * Represents an individual value in LLVM IR. + * + * This models llvm::Value. + */ +typedef struct LLVMOpaqueValue *LLVMValueRef; + +/** + * Represents a basic block of instructions in LLVM IR. + * + * This models llvm::BasicBlock. + */ +typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef; + +/** + * Represents an LLVM basic block builder. + * + * This models llvm::IRBuilder. + */ +typedef struct LLVMOpaqueBuilder *LLVMBuilderRef; + +/** + * Interface used to provide a module to JIT or interpreter. + * This is now just a synonym for llvm::Module, but we have to keep using the + * different type to keep binary compatibility. + */ +typedef struct LLVMOpaqueModuleProvider *LLVMModuleProviderRef; + +/** @see llvm::PassManagerBase */ +typedef struct LLVMOpaquePassManager *LLVMPassManagerRef; + +/** @see llvm::PassRegistry */ +typedef struct LLVMOpaquePassRegistry *LLVMPassRegistryRef; + +/** + * Used to get the users and usees of a Value. + * + * @see llvm::Use */ +typedef struct LLVMOpaqueUse *LLVMUseRef; + +/** + * @see llvm::DiagnosticInfo + */ +typedef struct LLVMOpaqueDiagnosticInfo *LLVMDiagnosticInfoRef; + +/** + * @} + */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h index cb3a69160454..691a0cd3f55c 100644 --- a/include/llvm-c/lto.h +++ b/include/llvm-c/lto.h @@ -374,8 +374,8 @@ extern lto_bool_t lto_codegen_add_module(lto_code_gen_t cg, lto_module_t mod); /** - * Sets the object module for code generation. This will transfer the ownship of - * the module to code generator. + * Sets the object module for code generation. This will transfer the ownership + * of the module to the code generator. * * \c cg and \c mod must both be in the same context. * diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h index 76615affb253..3fe04060fd59 100644 --- a/include/llvm/ADT/APFloat.h +++ b/include/llvm/ADT/APFloat.h @@ -142,6 +142,9 @@ public: /// @} static unsigned int semanticsPrecision(const fltSemantics &); + static ExponentType semanticsMinExponent(const fltSemantics &); + static ExponentType semanticsMaxExponent(const fltSemantics &); + static unsigned int semanticsSizeInBits(const fltSemantics &); /// IEEE-754R 5.11: Floating Point Comparison Relations. enum cmpResult { @@ -296,7 +299,7 @@ public: /// IEEE remainder. opStatus remainder(const APFloat &); /// C fmod, or llvm frem. - opStatus mod(const APFloat &, roundingMode); + opStatus mod(const APFloat &); opStatus fusedMultiplyAdd(const APFloat &, const APFloat &, roundingMode); opStatus roundToIntegral(roundingMode); /// IEEE-754R 5.3.1: nextUp/nextDown. @@ -445,6 +448,9 @@ public: /// Returns true if and only if the number has the largest possible finite /// magnitude in the current semantics. bool isLargest() const; + + /// Returns true if and only if the number is an exact integer. + bool isInteger() const; /// @} diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h index 5013f295f5c7..e2a0cb5e69dc 100644 --- a/include/llvm/ADT/APInt.h +++ b/include/llvm/ADT/APInt.h @@ -294,11 +294,12 @@ public: delete[] pVal; } - /// \brief Default constructor that creates an uninitialized APInt. + /// \brief Default constructor that creates an uninteresting APInt + /// representing a 1-bit zero value. /// /// This is useful for object deserialization (pair this with the static /// method Read). - explicit APInt() : BitWidth(1) {} + explicit APInt() : BitWidth(1), VAL(0) {} /// \brief Returns whether this instance allocated memory. bool needsCleanup() const { return !isSingleWord(); } @@ -1528,7 +1529,7 @@ public: /// \returns the nearest log base 2 of this APInt. Ties round up. /// /// NOTE: When we have a BitWidth of 1, we define: - /// + /// /// log2(0) = UINT32_MAX /// log2(1) = 0 /// diff --git a/include/llvm/ADT/APSInt.h b/include/llvm/ADT/APSInt.h index a187515f8592..a6552d0a2f36 100644 --- a/include/llvm/ADT/APSInt.h +++ b/include/llvm/ADT/APSInt.h @@ -21,6 +21,7 @@ namespace llvm { class APSInt : public APInt { bool IsUnsigned; + public: /// Default constructor that creates an uninitialized APInt. explicit APSInt() : IsUnsigned(false) {} @@ -246,8 +247,7 @@ public: return this->operator|(RHS); } - - APSInt operator^(const APSInt& RHS) const { + APSInt operator^(const APSInt &RHS) const { assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!"); return APSInt(static_cast(*this) ^ RHS, IsUnsigned); } @@ -286,7 +286,7 @@ public: } /// \brief Determine if two APSInts have the same value, zero- or - /// sign-extending as needed. + /// sign-extending as needed. static bool isSameValue(const APSInt &I1, const APSInt &I2) { return !compareValues(I1, I2); } diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h index c8795fd89e33..517ba39849e1 100644 --- a/include/llvm/ADT/ArrayRef.h +++ b/include/llvm/ADT/ArrayRef.h @@ -10,6 +10,7 @@ #ifndef LLVM_ADT_ARRAYREF_H #define LLVM_ADT_ARRAYREF_H +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/None.h" #include "llvm/ADT/SmallVector.h" #include @@ -85,7 +86,7 @@ namespace llvm { /// Construct an ArrayRef from a std::initializer_list. /*implicit*/ ArrayRef(const std::initializer_list &Vec) - : Data(Vec.begin() == Vec.end() ? (T*)0 : Vec.begin()), + : Data(Vec.begin() == Vec.end() ? (T*)nullptr : Vec.begin()), Length(Vec.size()) {} /// Construct an ArrayRef from ArrayRef. This uses SFINAE to @@ -148,7 +149,7 @@ namespace llvm { // copy - Allocate copy in Allocator and return ArrayRef to it. template ArrayRef copy(Allocator &A) { T *Buff = A.template Allocate(Length); - std::copy(begin(), end(), Buff); + std::uninitialized_copy(begin(), end(), Buff); return ArrayRef(Buff, Length); } @@ -156,8 +157,6 @@ namespace llvm { bool equals(ArrayRef RHS) const { if (Length != RHS.Length) return false; - if (Length == 0) - return true; return std::equal(begin(), end(), RHS.begin()); } @@ -339,6 +338,16 @@ namespace llvm { return Vec; } + /// Construct an ArrayRef from an ArrayRef (no-op) (const) + template ArrayRef makeArrayRef(const ArrayRef &Vec) { + return Vec; + } + + /// Construct an ArrayRef from an ArrayRef (no-op) + template ArrayRef &makeArrayRef(ArrayRef &Vec) { + return Vec; + } + /// Construct an ArrayRef from a C array. template ArrayRef makeArrayRef(const T (&Arr)[N]) { @@ -366,6 +375,10 @@ namespace llvm { template struct isPodLike > { static const bool value = true; }; + + template hash_code hash_value(ArrayRef S) { + return hash_combine_range(S.begin(), S.end()); + } } #endif diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h index f58dd7356c7d..ad00d51f99e9 100644 --- a/include/llvm/ADT/BitVector.h +++ b/include/llvm/ADT/BitVector.h @@ -34,7 +34,7 @@ class BitVector { BitWord *Bits; // Actual bits. unsigned Size; // Size of bitvector in bits. - unsigned Capacity; // Size of allocated memory in BitWord. + unsigned Capacity; // Number of BitWords allocated in the Bits array. public: typedef unsigned size_type; @@ -566,8 +566,16 @@ private: if (AddBits) clear_unused_bits(); } + +public: + /// Return the size (in bytes) of the bit vector. + size_t getMemorySize() const { return Capacity * sizeof(BitWord); } }; +static inline size_t capacity_in_bytes(const BitVector &X) { + return X.getMemorySize(); +} + } // End llvm namespace namespace std { diff --git a/include/llvm/ADT/DeltaAlgorithm.h b/include/llvm/ADT/DeltaAlgorithm.h index 21bc1e80c9d8..a26f37dfdc7d 100644 --- a/include/llvm/ADT/DeltaAlgorithm.h +++ b/include/llvm/ADT/DeltaAlgorithm.h @@ -68,7 +68,7 @@ private: /// \return - True on success. bool Search(const changeset_ty &Changes, const changesetlist_ty &Sets, changeset_ty &Res); - + protected: /// UpdatedSearchState - Callback used when the search state changes. virtual void UpdatedSearchState(const changeset_ty &Changes, diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h index 27f73157a29f..6ee1960b5c82 100644 --- a/include/llvm/ADT/DenseMap.h +++ b/include/llvm/ADT/DenseMap.h @@ -282,7 +282,7 @@ protected: "# initial buckets must be a power of two!"); const KeyT EmptyKey = getEmptyKey(); for (BucketT *B = getBuckets(), *E = getBucketsEnd(); B != E; ++B) - new (&B->getFirst()) KeyT(EmptyKey); + ::new (&B->getFirst()) KeyT(EmptyKey); } void moveFromOldBuckets(BucketT *OldBucketsBegin, BucketT *OldBucketsEnd) { @@ -300,7 +300,7 @@ protected: (void)FoundVal; // silence warning. assert(!FoundVal && "Key already in new map?"); DestBucket->getFirst() = std::move(B->getFirst()); - new (&DestBucket->getSecond()) ValueT(std::move(B->getSecond())); + ::new (&DestBucket->getSecond()) ValueT(std::move(B->getSecond())); incrementNumEntries(); // Free the value. @@ -324,11 +324,11 @@ protected: getNumBuckets() * sizeof(BucketT)); else for (size_t i = 0; i < getNumBuckets(); ++i) { - new (&getBuckets()[i].getFirst()) + ::new (&getBuckets()[i].getFirst()) KeyT(other.getBuckets()[i].getFirst()); if (!KeyInfoT::isEqual(getBuckets()[i].getFirst(), getEmptyKey()) && !KeyInfoT::isEqual(getBuckets()[i].getFirst(), getTombstoneKey())) - new (&getBuckets()[i].getSecond()) + ::new (&getBuckets()[i].getSecond()) ValueT(other.getBuckets()[i].getSecond()); } } @@ -402,7 +402,7 @@ private: TheBucket = InsertIntoBucketImpl(Key, TheBucket); TheBucket->getFirst() = Key; - new (&TheBucket->getSecond()) ValueT(Value); + ::new (&TheBucket->getSecond()) ValueT(Value); return TheBucket; } @@ -411,7 +411,7 @@ private: TheBucket = InsertIntoBucketImpl(Key, TheBucket); TheBucket->getFirst() = Key; - new (&TheBucket->getSecond()) ValueT(std::move(Value)); + ::new (&TheBucket->getSecond()) ValueT(std::move(Value)); return TheBucket; } @@ -419,7 +419,7 @@ private: TheBucket = InsertIntoBucketImpl(Key, TheBucket); TheBucket->getFirst() = std::move(Key); - new (&TheBucket->getSecond()) ValueT(std::move(Value)); + ::new (&TheBucket->getSecond()) ValueT(std::move(Value)); return TheBucket; } @@ -766,10 +766,10 @@ public: // Swap separately and handle any assymetry. std::swap(LHSB->getFirst(), RHSB->getFirst()); if (hasLHSValue) { - new (&RHSB->getSecond()) ValueT(std::move(LHSB->getSecond())); + ::new (&RHSB->getSecond()) ValueT(std::move(LHSB->getSecond())); LHSB->getSecond().~ValueT(); } else if (hasRHSValue) { - new (&LHSB->getSecond()) ValueT(std::move(RHSB->getSecond())); + ::new (&LHSB->getSecond()) ValueT(std::move(RHSB->getSecond())); RHSB->getSecond().~ValueT(); } } @@ -795,11 +795,11 @@ public: for (unsigned i = 0, e = InlineBuckets; i != e; ++i) { BucketT *NewB = &LargeSide.getInlineBuckets()[i], *OldB = &SmallSide.getInlineBuckets()[i]; - new (&NewB->getFirst()) KeyT(std::move(OldB->getFirst())); + ::new (&NewB->getFirst()) KeyT(std::move(OldB->getFirst())); OldB->getFirst().~KeyT(); if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey) && !KeyInfoT::isEqual(NewB->getFirst(), TombstoneKey)) { - new (&NewB->getSecond()) ValueT(std::move(OldB->getSecond())); + ::new (&NewB->getSecond()) ValueT(std::move(OldB->getSecond())); OldB->getSecond().~ValueT(); } } @@ -866,8 +866,8 @@ public: !KeyInfoT::isEqual(P->getFirst(), TombstoneKey)) { assert(size_t(TmpEnd - TmpBegin) < InlineBuckets && "Too many inline buckets!"); - new (&TmpEnd->getFirst()) KeyT(std::move(P->getFirst())); - new (&TmpEnd->getSecond()) ValueT(std::move(P->getSecond())); + ::new (&TmpEnd->getFirst()) KeyT(std::move(P->getFirst())); + ::new (&TmpEnd->getSecond()) ValueT(std::move(P->getSecond())); ++TmpEnd; P->getSecond().~ValueT(); } diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h index b0a053072079..a844ebcccf5b 100644 --- a/include/llvm/ADT/DenseMapInfo.h +++ b/include/llvm/ADT/DenseMapInfo.h @@ -14,6 +14,7 @@ #ifndef LLVM_ADT_DENSEMAPINFO_H #define LLVM_ADT_DENSEMAPINFO_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/PointerLikeTypeTraits.h" @@ -58,7 +59,7 @@ template<> struct DenseMapInfo { return LHS == RHS; } }; - + // Provide DenseMapInfo for unsigned ints. template<> struct DenseMapInfo { static inline unsigned getEmptyKey() { return ~0U; } @@ -190,6 +191,31 @@ template <> struct DenseMapInfo { } }; +// Provide DenseMapInfo for ArrayRefs. +template struct DenseMapInfo> { + static inline ArrayRef getEmptyKey() { + return ArrayRef(reinterpret_cast(~static_cast(0)), + size_t(0)); + } + static inline ArrayRef getTombstoneKey() { + return ArrayRef(reinterpret_cast(~static_cast(1)), + size_t(0)); + } + static unsigned getHashValue(ArrayRef Val) { + assert(Val.data() != getEmptyKey().data() && "Cannot hash the empty key!"); + assert(Val.data() != getTombstoneKey().data() && + "Cannot hash the tombstone key!"); + return (unsigned)(hash_value(Val)); + } + static bool isEqual(ArrayRef LHS, ArrayRef RHS) { + if (RHS.data() == getEmptyKey().data()) + return LHS.data() == getEmptyKey().data(); + if (RHS.data() == getTombstoneKey().data()) + return LHS.data() == getTombstoneKey().data(); + return LHS == RHS; + } +}; + } // end namespace llvm #endif diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h index d34024005dfe..ef09dce37980 100644 --- a/include/llvm/ADT/DenseSet.h +++ b/include/llvm/ADT/DenseSet.h @@ -42,6 +42,7 @@ class DenseSet { static_assert(sizeof(typename MapTy::value_type) == sizeof(ValueT), "DenseMap buckets unexpectedly large!"); MapTy TheMap; + public: typedef ValueT key_type; typedef ValueT value_type; @@ -79,6 +80,7 @@ public: class Iterator { typename MapTy::iterator I; friend class DenseSet; + public: typedef typename MapTy::iterator::difference_type difference_type; typedef ValueT value_type; @@ -99,6 +101,7 @@ public: class ConstIterator { typename MapTy::const_iterator I; friend class DenseSet; + public: typedef typename MapTy::const_iterator::difference_type difference_type; typedef ValueT value_type; @@ -148,7 +151,7 @@ public: detail::DenseSetEmpty Empty; return TheMap.insert(std::make_pair(V, Empty)); } - + // Range insertion of values. template void insert(InputIt I, InputIt E) { diff --git a/include/llvm/ADT/DepthFirstIterator.h b/include/llvm/ADT/DepthFirstIterator.h index d79b9acacfa9..c9317b8539b3 100644 --- a/include/llvm/ADT/DepthFirstIterator.h +++ b/include/llvm/ADT/DepthFirstIterator.h @@ -58,7 +58,6 @@ public: SetType &Visited; }; - // Generic Depth First Iterator template::NodeType*, 8>, @@ -76,21 +75,22 @@ class df_iterator : public std::iterator > VisitStack; + std::vector> VisitStack; + private: inline df_iterator(NodeType *Node) { this->Visited.insert(Node); - VisitStack.push_back(std::make_pair(PointerIntTy(Node, 0), - GT::child_begin(Node))); + VisitStack.push_back( + std::make_pair(PointerIntTy(Node, 0), GT::child_begin(Node))); } - inline df_iterator() { - // End is when stack is empty + inline df_iterator() { + // End is when stack is empty } inline df_iterator(NodeType *Node, SetType &S) : df_iterator_storage(S) { if (!S.count(Node)) { - VisitStack.push_back(std::make_pair(PointerIntTy(Node, 0), - GT::child_begin(Node))); + VisitStack.push_back( + std::make_pair(PointerIntTy(Node, 0), GT::child_begin(Node))); this->Visited.insert(Node); } } @@ -115,8 +115,8 @@ private: // Has our next sibling been visited? if (Next && this->Visited.insert(Next).second) { // No, do it now. - VisitStack.push_back(std::make_pair(PointerIntTy(Next, 0), - GT::child_begin(Next))); + VisitStack.push_back( + std::make_pair(PointerIntTy(Next, 0), GT::child_begin(Next))); return; } } @@ -195,7 +195,6 @@ public: } }; - // Provide global constructors that automatically figure out correct types... // template @@ -237,7 +236,6 @@ iterator_range> depth_first_ext(const T& G, return make_range(df_ext_begin(G, S), df_ext_end(G, S)); } - // Provide global definitions of inverse depth first iterators... template ::NodeType*, 8>, diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h index 52d10c1c1245..c9205396591b 100644 --- a/include/llvm/ADT/FoldingSet.h +++ b/include/llvm/ADT/FoldingSet.h @@ -122,9 +122,10 @@ protected: /// is greater than twice the number of buckets. unsigned NumNodes; - ~FoldingSetImpl(); - explicit FoldingSetImpl(unsigned Log2InitSize = 6); + FoldingSetImpl(FoldingSetImpl &&Arg); + FoldingSetImpl &operator=(FoldingSetImpl &&RHS); + ~FoldingSetImpl(); public: //===--------------------------------------------------------------------===// @@ -137,7 +138,6 @@ public: void *NextInFoldingSetBucket; public: - Node() : NextInFoldingSetBucket(nullptr) {} // Accessors @@ -182,13 +182,11 @@ public: bool empty() const { return NumNodes == 0; } private: - /// GrowHashTable - Double the size of the hash table and rehash everything. /// void GrowHashTable(); protected: - /// GetNodeProfile - Instantiations of the FoldingSet template implement /// this function to gather data bits for the given node. virtual void GetNodeProfile(Node *N, FoldingSetNodeID &ID) const = 0; @@ -269,6 +267,7 @@ template struct ContextualFoldingSetTrait class FoldingSetNodeIDRef { const unsigned *Data; size_t Size; + public: FoldingSetNodeIDRef() : Data(nullptr), Size(0) {} FoldingSetNodeIDRef(const unsigned *D, size_t S) : Data(D), Size(S) {} @@ -393,6 +392,10 @@ DefaultContextualFoldingSetTrait::ComputeHash(T &X, /// implementation of the folding set to the node class T. T must be a /// subclass of FoldingSetNode and implement a Profile function. /// +/// Note that this set type is movable and move-assignable. However, its +/// moved-from state is not a valid state for anything other than +/// move-assigning and destroying. This is primarily to enable movable APIs +/// that incorporate these objects. template class FoldingSet final : public FoldingSetImpl { private: /// GetNodeProfile - Each instantiatation of the FoldingSet needs to provide a @@ -417,8 +420,13 @@ private: public: explicit FoldingSet(unsigned Log2InitSize = 6) - : FoldingSetImpl(Log2InitSize) - {} + : FoldingSetImpl(Log2InitSize) {} + + FoldingSet(FoldingSet &&Arg) : FoldingSetImpl(std::move(Arg)) {} + FoldingSet &operator=(FoldingSet &&RHS) { + (void)FoldingSetImpl::operator=(std::move(RHS)); + return *this; + } typedef FoldingSetIterator iterator; iterator begin() { return iterator(Buckets); } @@ -498,7 +506,6 @@ public: Ctx getContext() const { return Context; } - typedef FoldingSetIterator iterator; iterator begin() { return iterator(Buckets); } iterator end() { return iterator(Buckets+NumBuckets); } @@ -614,9 +621,7 @@ public: } }; - -template -class FoldingSetIterator : public FoldingSetIteratorImpl { +template class FoldingSetIterator : public FoldingSetIteratorImpl { public: explicit FoldingSetIterator(void **Bucket) : FoldingSetIteratorImpl(Bucket) {} @@ -666,8 +671,7 @@ public: } }; - -template +template class FoldingSetBucketIterator : public FoldingSetBucketIteratorImpl { public: explicit FoldingSetBucketIterator(void **Bucket) : @@ -694,6 +698,7 @@ public: template class FoldingSetNodeWrapper : public FoldingSetNode { T data; + public: template explicit FoldingSetNodeWrapper(Ts &&... Args) @@ -716,12 +721,12 @@ public: /// information that would otherwise only be required for recomputing an ID. class FastFoldingSetNode : public FoldingSetNode { FoldingSetNodeID FastID; + protected: explicit FastFoldingSetNode(const FoldingSetNodeID &ID) : FastID(ID) {} + public: - void Profile(FoldingSetNodeID &ID) const { - ID.AddNodeID(FastID); - } + void Profile(FoldingSetNodeID &ID) const { ID.AddNodeID(FastID); } }; //===----------------------------------------------------------------------===// diff --git a/include/llvm/ADT/ImmutableList.h b/include/llvm/ADT/ImmutableList.h index 748d3e4bf9ff..a1d26bd97045 100644 --- a/include/llvm/ADT/ImmutableList.h +++ b/include/llvm/ADT/ImmutableList.h @@ -28,7 +28,7 @@ class ImmutableListImpl : public FoldingSetNode { T Head; const ImmutableListImpl* Tail; - ImmutableListImpl(const T& head, const ImmutableListImpl* tail = 0) + ImmutableListImpl(const T& head, const ImmutableListImpl* tail = nullptr) : Head(head), Tail(tail) {} friend class ImmutableListFactory; @@ -72,7 +72,7 @@ public: // This constructor should normally only be called by ImmutableListFactory. // There may be cases, however, when one needs to extract the internal pointer // and reconstruct a list object from that pointer. - ImmutableList(const ImmutableListImpl* x = 0) : X(x) {} + ImmutableList(const ImmutableListImpl* x = nullptr) : X(x) {} const ImmutableListImpl* getInternalPointer() const { return X; @@ -81,7 +81,7 @@ public: class iterator { const ImmutableListImpl* L; public: - iterator() : L(0) {} + iterator() : L(nullptr) {} iterator(ImmutableList l) : L(l.getInternalPointer()) {} iterator& operator++() { L = L->getTail(); return *this; } @@ -128,7 +128,7 @@ public: /// getTail - Returns the tail of the list, which is another (possibly empty) /// ImmutableList. ImmutableList getTail() { - return X ? X->getTail() : 0; + return X ? X->getTail() : nullptr; } void Profile(FoldingSetNodeID& ID) const { @@ -190,7 +190,7 @@ public: } ImmutableList getEmptyList() const { - return ImmutableList(0); + return ImmutableList(nullptr); } ImmutableList create(const T& X) { @@ -226,4 +226,4 @@ struct isPodLike > { static const bool value = true; }; } // end llvm namespace -#endif +#endif // LLVM_ADT_IMMUTABLELIST_H diff --git a/include/llvm/ADT/ImmutableMap.h b/include/llvm/ADT/ImmutableMap.h index 438dec2333c5..7480cd73da61 100644 --- a/include/llvm/ADT/ImmutableMap.h +++ b/include/llvm/ADT/ImmutableMap.h @@ -55,7 +55,6 @@ struct ImutKeyValueInfo { } }; - template > class ImmutableMap { @@ -79,9 +78,11 @@ public: explicit ImmutableMap(const TreeTy* R) : Root(const_cast(R)) { if (Root) { Root->retain(); } } + ImmutableMap(const ImmutableMap &X) : Root(X.Root) { if (Root) { Root->retain(); } } + ImmutableMap &operator=(const ImmutableMap &X) { if (Root != X.Root) { if (X.Root) { X.Root->retain(); } @@ -90,6 +91,7 @@ public: } return *this; } + ~ImmutableMap() { if (Root) { Root->release(); } } @@ -99,11 +101,10 @@ public: const bool Canonicalize; public: - Factory(bool canonicalize = true) - : Canonicalize(canonicalize) {} - - Factory(BumpPtrAllocator& Alloc, bool canonicalize = true) - : F(Alloc), Canonicalize(canonicalize) {} + Factory(bool canonicalize = true) : Canonicalize(canonicalize) {} + + Factory(BumpPtrAllocator &Alloc, bool canonicalize = true) + : F(Alloc), Canonicalize(canonicalize) {} ImmutableMap getEmptyMap() { return ImmutableMap(F.getEmptyTree()); } @@ -143,14 +144,12 @@ public: return Root; } - TreeTy *getRootWithoutRetain() const { - return Root; - } - + TreeTy *getRootWithoutRetain() const { return Root; } + void manualRetain() { if (Root) Root->retain(); } - + void manualRelease() { if (Root) Root->release(); } @@ -224,7 +223,7 @@ public: return nullptr; } - + /// getMaxElement - Returns the pair in the ImmutableMap for /// which key is the highest in the ordering of keys in the map. This /// method returns NULL if the map is empty. @@ -260,20 +259,21 @@ public: typedef typename ValInfo::data_type_ref data_type_ref; typedef ImutAVLTree TreeTy; typedef typename TreeTy::Factory FactoryTy; - + protected: TreeTy *Root; FactoryTy *Factory; - + public: /// Constructs a map from a pointer to a tree root. In general one /// should use a Factory object to create maps instead of directly /// invoking the constructor, but there are cases where make this /// constructor public is useful. - explicit ImmutableMapRef(const TreeTy* R, FactoryTy *F) - : Root(const_cast(R)), - Factory(F) { - if (Root) { Root->retain(); } + explicit ImmutableMapRef(const TreeTy *R, FactoryTy *F) + : Root(const_cast(R)), Factory(F) { + if (Root) { + Root->retain(); + } } explicit ImmutableMapRef(const ImmutableMap &X, @@ -282,21 +282,21 @@ public: Factory(F.getTreeFactory()) { if (Root) { Root->retain(); } } - - ImmutableMapRef(const ImmutableMapRef &X) - : Root(X.Root), - Factory(X.Factory) { - if (Root) { Root->retain(); } + + ImmutableMapRef(const ImmutableMapRef &X) : Root(X.Root), Factory(X.Factory) { + if (Root) { + Root->retain(); + } } ImmutableMapRef &operator=(const ImmutableMapRef &X) { if (Root != X.Root) { if (X.Root) X.Root->retain(); - + if (Root) Root->release(); - + Root = X.Root; Factory = X.Factory; } @@ -307,7 +307,7 @@ public: if (Root) Root->release(); } - + static inline ImmutableMapRef getEmptyMap(FactoryTy *F) { return ImmutableMapRef(0, F); } @@ -329,31 +329,34 @@ public: TreeTy *NewT = Factory->remove(Root, K); return ImmutableMapRef(NewT, Factory); } - + bool contains(key_type_ref K) const { return Root ? Root->contains(K) : false; } - + ImmutableMap asImmutableMap() const { return ImmutableMap(Factory->getCanonicalTree(Root)); } - + bool operator==(const ImmutableMapRef &RHS) const { return Root && RHS.Root ? Root->isEqual(*RHS.Root) : Root == RHS.Root; } - + bool operator!=(const ImmutableMapRef &RHS) const { return Root && RHS.Root ? Root->isNotEqual(*RHS.Root) : Root != RHS.Root; } - + bool isEmpty() const { return !Root; } - + //===--------------------------------------------------===// // For testing. //===--------------------------------------------------===// - - void verify() const { if (Root) Root->verify(); } - + + void verify() const { + if (Root) + Root->verify(); + } + //===--------------------------------------------------===// // Iterators. //===--------------------------------------------------===// @@ -370,38 +373,36 @@ public: iterator begin() const { return iterator(Root); } iterator end() const { return iterator(); } - - data_type* lookup(key_type_ref K) const { + + data_type *lookup(key_type_ref K) const { if (Root) { TreeTy* T = Root->find(K); if (T) return &T->getValue().second; } - - return 0; + + return nullptr; } - + /// getMaxElement - Returns the pair in the ImmutableMap for /// which key is the highest in the ordering of keys in the map. This /// method returns NULL if the map is empty. value_type* getMaxElement() const { return Root ? &(Root->getMaxElement()->getValue()) : 0; } - + //===--------------------------------------------------===// // Utility methods. //===--------------------------------------------------===// - + unsigned getHeight() const { return Root ? Root->getHeight() : 0; } - - static inline void Profile(FoldingSetNodeID& ID, const ImmutableMapRef &M) { + + static inline void Profile(FoldingSetNodeID &ID, const ImmutableMapRef &M) { ID.AddPointer(M.Root); } - - inline void Profile(FoldingSetNodeID& ID) const { - return Profile(ID, *this); - } + + inline void Profile(FoldingSetNodeID &ID) const { return Profile(ID, *this); } }; - + } // end namespace llvm -#endif +#endif // LLVM_ADT_IMMUTABLEMAP_H diff --git a/include/llvm/ADT/IntrusiveRefCntPtr.h b/include/llvm/ADT/IntrusiveRefCntPtr.h index 65b2da793d7c..8057ec10be00 100644 --- a/include/llvm/ADT/IntrusiveRefCntPtr.h +++ b/include/llvm/ADT/IntrusiveRefCntPtr.h @@ -154,7 +154,7 @@ public: template IntrusiveRefCntPtr(IntrusiveRefCntPtr&& S) : Obj(S.get()) { - S.Obj = 0; + S.Obj = nullptr; } template @@ -190,7 +190,7 @@ public: } void resetWithoutRelease() { - Obj = 0; + Obj = nullptr; } private: diff --git a/include/llvm/ADT/Optional.h b/include/llvm/ADT/Optional.h index 855ab890392e..d9acaf6d23b0 100644 --- a/include/llvm/ADT/Optional.h +++ b/include/llvm/ADT/Optional.h @@ -159,6 +159,25 @@ template struct isPodLike > { template void operator==(const Optional &X, const Optional &Y); +template +bool operator==(const Optional &X, NoneType) { + return !X.hasValue(); +} + +template +bool operator==(NoneType, const Optional &X) { + return X == None; +} + +template +bool operator!=(const Optional &X, NoneType) { + return !(X == None); +} + +template +bool operator!=(NoneType, const Optional &X) { + return X != None; +} /// \brief Poison comparison between two \c Optional objects. Clients needs to /// explicitly compare the underlying values and account for empty \c Optional /// objects. diff --git a/include/llvm/ADT/PackedVector.h b/include/llvm/ADT/PackedVector.h index 1ae2a77e7eaf..09267173fd77 100644 --- a/include/llvm/ADT/PackedVector.h +++ b/include/llvm/ADT/PackedVector.h @@ -83,9 +83,9 @@ public: PackedVector &Vec; const unsigned Idx; - reference(); // Undefined + reference(); // Undefined public: - reference(PackedVector &vec, unsigned idx) : Vec(vec), Idx(idx) { } + reference(PackedVector &vec, unsigned idx) : Vec(vec), Idx(idx) {} reference &operator=(T val) { Vec.setValue(Vec.Bits, Idx, val); @@ -96,16 +96,16 @@ public: } }; - PackedVector() { } + PackedVector() = default; explicit PackedVector(unsigned size) : Bits(size << (BitNum-1)) { } bool empty() const { return Bits.empty(); } - unsigned size() const { return Bits.size() >> (BitNum-1); } - + unsigned size() const { return Bits.size() >> (BitNum - 1); } + void clear() { Bits.clear(); } - - void resize(unsigned N) { Bits.resize(N << (BitNum-1)); } + + void resize(unsigned N) { Bits.resize(N << (BitNum - 1)); } void reserve(unsigned N) { Bits.reserve(N << (BitNum-1)); } @@ -135,24 +135,14 @@ public: return Bits != RHS.Bits; } - const PackedVector &operator=(const PackedVector &RHS) { - Bits = RHS.Bits; - return *this; - } - PackedVector &operator|=(const PackedVector &RHS) { Bits |= RHS.Bits; return *this; } - - void swap(PackedVector &RHS) { - Bits.swap(RHS.Bits); - } }; -// Leave BitNum=0 undefined. -template -class PackedVector; +// Leave BitNum=0 undefined. +template class PackedVector; } // end llvm namespace diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h index 45a40db85c04..0058d85d1ae4 100644 --- a/include/llvm/ADT/PointerIntPair.h +++ b/include/llvm/ADT/PointerIntPair.h @@ -21,8 +21,10 @@ namespace llvm { -template -struct DenseMapInfo; +template struct DenseMapInfo; + +template +struct PointerIntPairInfo; /// PointerIntPair - This class implements a pair of a pointer and small /// integer. It is designed to represent this in the space required by one @@ -38,83 +40,35 @@ struct DenseMapInfo; /// PointerIntPair, 1, bool> /// ... and the two bools will land in different bits. /// -template > +template , + typename Info = PointerIntPairInfo> class PointerIntPair { intptr_t Value; - static_assert(PtrTraits::NumLowBitsAvailable < - std::numeric_limits::digits, - "cannot use a pointer type that has all bits free"); - static_assert(IntBits <= PtrTraits::NumLowBitsAvailable, - "PointerIntPair with integer size too large for pointer"); - enum : uintptr_t { - /// PointerBitMask - The bits that come from the pointer. - PointerBitMask = - ~(uintptr_t)(((intptr_t)1 << PtrTraits::NumLowBitsAvailable)-1), - /// IntShift - The number of low bits that we reserve for other uses, and - /// keep zero. - IntShift = (uintptr_t)PtrTraits::NumLowBitsAvailable-IntBits, - - /// IntMask - This is the unshifted mask for valid bits of the int type. - IntMask = (uintptr_t)(((intptr_t)1 << IntBits)-1), - - // ShiftedIntMask - This is the bits for the integer shifted in place. - ShiftedIntMask = (uintptr_t)(IntMask << IntShift) - }; public: PointerIntPair() : Value(0) {} PointerIntPair(PointerTy PtrVal, IntType IntVal) { setPointerAndInt(PtrVal, IntVal); } - explicit PointerIntPair(PointerTy PtrVal) { - initWithPointer(PtrVal); - } + explicit PointerIntPair(PointerTy PtrVal) { initWithPointer(PtrVal); } - PointerTy getPointer() const { - return PtrTraits::getFromVoidPointer( - reinterpret_cast(Value & PointerBitMask)); - } + PointerTy getPointer() const { return Info::getPointer(Value); } - IntType getInt() const { - return (IntType)((Value >> IntShift) & IntMask); - } + IntType getInt() const { return (IntType)Info::getInt(Value); } void setPointer(PointerTy PtrVal) { - intptr_t PtrWord - = reinterpret_cast(PtrTraits::getAsVoidPointer(PtrVal)); - assert((PtrWord & ~PointerBitMask) == 0 && - "Pointer is not sufficiently aligned"); - // Preserve all low bits, just update the pointer. - Value = PtrWord | (Value & ~PointerBitMask); + Value = Info::updatePointer(Value, PtrVal); } - void setInt(IntType IntVal) { - intptr_t IntWord = static_cast(IntVal); - assert((IntWord & ~IntMask) == 0 && "Integer too large for field"); - - // Preserve all bits other than the ones we are updating. - Value &= ~ShiftedIntMask; // Remove integer field. - Value |= IntWord << IntShift; // Set new integer. - } + void setInt(IntType IntVal) { Value = Info::updateInt(Value, IntVal); } void initWithPointer(PointerTy PtrVal) { - intptr_t PtrWord - = reinterpret_cast(PtrTraits::getAsVoidPointer(PtrVal)); - assert((PtrWord & ~PointerBitMask) == 0 && - "Pointer is not sufficiently aligned"); - Value = PtrWord; + Value = Info::updatePointer(0, PtrVal); } void setPointerAndInt(PointerTy PtrVal, IntType IntVal) { - intptr_t PtrWord - = reinterpret_cast(PtrTraits::getAsVoidPointer(PtrVal)); - assert((PtrWord & ~PointerBitMask) == 0 && - "Pointer is not sufficiently aligned"); - intptr_t IntWord = static_cast(IntVal); - assert((IntWord & ~IntMask) == 0 && "Integer too large for field"); - - Value = PtrWord | (IntWord << IntShift); + Value = Info::updateInt(Info::updatePointer(0, PtrVal), IntVal); } PointerTy const *getAddrOfPointer() const { @@ -128,11 +82,15 @@ public: return reinterpret_cast(&Value); } - void *getOpaqueValue() const { return reinterpret_cast(Value); } - void setFromOpaqueValue(void *Val) { Value = reinterpret_cast(Val);} + void *getOpaqueValue() const { return reinterpret_cast(Value); } + void setFromOpaqueValue(void *Val) { + Value = reinterpret_cast(Val); + } static PointerIntPair getFromOpaqueValue(void *V) { - PointerIntPair P; P.setFromOpaqueValue(V); return P; + PointerIntPair P; + P.setFromOpaqueValue(V); + return P; } // Allow PointerIntPairs to be created from const void * if and only if the @@ -142,23 +100,81 @@ public: return getFromOpaqueValue(const_cast(V)); } - bool operator==(const PointerIntPair &RHS) const {return Value == RHS.Value;} - bool operator!=(const PointerIntPair &RHS) const {return Value != RHS.Value;} - bool operator<(const PointerIntPair &RHS) const {return Value < RHS.Value;} - bool operator>(const PointerIntPair &RHS) const {return Value > RHS.Value;} - bool operator<=(const PointerIntPair &RHS) const {return Value <= RHS.Value;} - bool operator>=(const PointerIntPair &RHS) const {return Value >= RHS.Value;} + bool operator==(const PointerIntPair &RHS) const { + return Value == RHS.Value; + } + bool operator!=(const PointerIntPair &RHS) const { + return Value != RHS.Value; + } + bool operator<(const PointerIntPair &RHS) const { return Value < RHS.Value; } + bool operator>(const PointerIntPair &RHS) const { return Value > RHS.Value; } + bool operator<=(const PointerIntPair &RHS) const { + return Value <= RHS.Value; + } + bool operator>=(const PointerIntPair &RHS) const { + return Value >= RHS.Value; + } +}; + +template +struct PointerIntPairInfo { + static_assert(PtrTraits::NumLowBitsAvailable < + std::numeric_limits::digits, + "cannot use a pointer type that has all bits free"); + static_assert(IntBits <= PtrTraits::NumLowBitsAvailable, + "PointerIntPair with integer size too large for pointer"); + enum : uintptr_t { + /// PointerBitMask - The bits that come from the pointer. + PointerBitMask = + ~(uintptr_t)(((intptr_t)1 << PtrTraits::NumLowBitsAvailable) - 1), + + /// IntShift - The number of low bits that we reserve for other uses, and + /// keep zero. + IntShift = (uintptr_t)PtrTraits::NumLowBitsAvailable - IntBits, + + /// IntMask - This is the unshifted mask for valid bits of the int type. + IntMask = (uintptr_t)(((intptr_t)1 << IntBits) - 1), + + // ShiftedIntMask - This is the bits for the integer shifted in place. + ShiftedIntMask = (uintptr_t)(IntMask << IntShift) + }; + + static PointerT getPointer(intptr_t Value) { + return PtrTraits::getFromVoidPointer( + reinterpret_cast(Value & PointerBitMask)); + } + + static intptr_t getInt(intptr_t Value) { + return (Value >> IntShift) & IntMask; + } + + static intptr_t updatePointer(intptr_t OrigValue, PointerT Ptr) { + intptr_t PtrWord = + reinterpret_cast(PtrTraits::getAsVoidPointer(Ptr)); + assert((PtrWord & ~PointerBitMask) == 0 && + "Pointer is not sufficiently aligned"); + // Preserve all low bits, just update the pointer. + return PtrWord | (OrigValue & ~PointerBitMask); + } + + static intptr_t updateInt(intptr_t OrigValue, intptr_t Int) { + intptr_t IntWord = static_cast(Int); + assert((IntWord & ~IntMask) == 0 && "Integer too large for field"); + + // Preserve all bits other than the ones we are updating. + return (OrigValue & ~ShiftedIntMask) | IntWord << IntShift; + } }; template struct isPodLike; -template -struct isPodLike > { - static const bool value = true; +template +struct isPodLike> { + static const bool value = true; }; - + // Provide specialization of DenseMapInfo for PointerIntPair. -template -struct DenseMapInfo > { +template +struct DenseMapInfo> { typedef PointerIntPair Ty; static Ty getEmptyKey() { uintptr_t Val = static_cast(-1); @@ -178,10 +194,10 @@ struct DenseMapInfo > { }; // Teach SmallPtrSet that PointerIntPair is "basically a pointer". -template -class PointerLikeTypeTraits > { +template +class PointerLikeTypeTraits< + PointerIntPair> { public: static inline void * getAsVoidPointer(const PointerIntPair &P) { @@ -195,9 +211,7 @@ public: getFromVoidPointer(const void *P) { return PointerIntPair::getFromOpaqueValue(P); } - enum { - NumLowBitsAvailable = PtrTraits::NumLowBitsAvailable - IntBits - }; + enum { NumLowBitsAvailable = PtrTraits::NumLowBitsAvailable - IntBits }; }; } // end namespace llvm diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h index f27b81113ec5..6b3fe5749ad5 100644 --- a/include/llvm/ADT/PointerUnion.h +++ b/include/llvm/ADT/PointerUnion.h @@ -21,492 +21,454 @@ namespace llvm { - template - struct PointerUnionTypeSelectorReturn { - typedef T Return; - }; +template struct PointerUnionTypeSelectorReturn { + typedef T Return; +}; - /// \brief Get a type based on whether two types are the same or not. For: - /// @code - /// typedef typename PointerUnionTypeSelector::Return Ret; - /// @endcode - /// Ret will be EQ type if T1 is same as T2 or NE type otherwise. - template - struct PointerUnionTypeSelector { - typedef typename PointerUnionTypeSelectorReturn::Return Return; - }; +/// Get a type based on whether two types are the same or not. +/// +/// For: +/// +/// \code +/// typedef typename PointerUnionTypeSelector::Return Ret; +/// \endcode +/// +/// Ret will be EQ type if T1 is same as T2 or NE type otherwise. +template +struct PointerUnionTypeSelector { + typedef typename PointerUnionTypeSelectorReturn::Return Return; +}; - template - struct PointerUnionTypeSelector { - typedef typename PointerUnionTypeSelectorReturn::Return Return; - }; +template +struct PointerUnionTypeSelector { + typedef typename PointerUnionTypeSelectorReturn::Return Return; +}; - template - struct PointerUnionTypeSelectorReturn< - PointerUnionTypeSelector > { - typedef typename PointerUnionTypeSelector::Return - Return; - }; +template +struct PointerUnionTypeSelectorReturn< + PointerUnionTypeSelector> { + typedef + typename PointerUnionTypeSelector::Return Return; +}; - /// Provide PointerLikeTypeTraits for void* that is used by PointerUnion - /// for the two template arguments. - template - class PointerUnionUIntTraits { - public: - static inline void *getAsVoidPointer(void *P) { return P; } - static inline void *getFromVoidPointer(void *P) { return P; } - enum { - PT1BitsAv = (int)(PointerLikeTypeTraits::NumLowBitsAvailable), - PT2BitsAv = (int)(PointerLikeTypeTraits::NumLowBitsAvailable), - NumLowBitsAvailable = PT1BitsAv < PT2BitsAv ? PT1BitsAv : PT2BitsAv - }; +/// Provide PointerLikeTypeTraits for void* that is used by PointerUnion +/// for the two template arguments. +template class PointerUnionUIntTraits { +public: + static inline void *getAsVoidPointer(void *P) { return P; } + static inline void *getFromVoidPointer(void *P) { return P; } + enum { + PT1BitsAv = (int)(PointerLikeTypeTraits::NumLowBitsAvailable), + PT2BitsAv = (int)(PointerLikeTypeTraits::NumLowBitsAvailable), + NumLowBitsAvailable = PT1BitsAv < PT2BitsAv ? PT1BitsAv : PT2BitsAv }; +}; - /// PointerUnion - This implements a discriminated union of two pointer types, - /// and keeps the discriminator bit-mangled into the low bits of the pointer. - /// This allows the implementation to be extremely efficient in space, but - /// permits a very natural and type-safe API. +/// A discriminated union of two pointer types, with the discriminator in the +/// low bit of the pointer. +/// +/// This implementation is extremely efficient in space due to leveraging the +/// low bits of the pointer, while exposing a natural and type-safe API. +/// +/// Common use patterns would be something like this: +/// PointerUnion P; +/// P = (int*)0; +/// printf("%d %d", P.is(), P.is()); // prints "1 0" +/// X = P.get(); // ok. +/// Y = P.get(); // runtime assertion failure. +/// Z = P.get(); // compile time failure. +/// P = (float*)0; +/// Y = P.get(); // ok. +/// X = P.get(); // runtime assertion failure. +template class PointerUnion { +public: + typedef PointerIntPair> + ValTy; + +private: + ValTy Val; + + struct IsPT1 { + static const int Num = 0; + }; + struct IsPT2 { + static const int Num = 1; + }; + template struct UNION_DOESNT_CONTAIN_TYPE {}; + +public: + PointerUnion() {} + + PointerUnion(PT1 V) + : Val(const_cast( + PointerLikeTypeTraits::getAsVoidPointer(V))) {} + PointerUnion(PT2 V) + : Val(const_cast(PointerLikeTypeTraits::getAsVoidPointer(V)), + 1) {} + + /// Test if the pointer held in the union is null, regardless of + /// which type it is. + bool isNull() const { + // Convert from the void* to one of the pointer types, to make sure that + // we recursively strip off low bits if we have a nested PointerUnion. + return !PointerLikeTypeTraits::getFromVoidPointer(Val.getPointer()); + } + explicit operator bool() const { return !isNull(); } + + /// Test if the Union currently holds the type matching T. + template int is() const { + typedef typename ::llvm::PointerUnionTypeSelector< + PT1, T, IsPT1, ::llvm::PointerUnionTypeSelector< + PT2, T, IsPT2, UNION_DOESNT_CONTAIN_TYPE>>::Return + Ty; + int TyNo = Ty::Num; + return static_cast(Val.getInt()) == TyNo; + } + + /// Returns the value of the specified pointer type. /// - /// Common use patterns would be something like this: - /// PointerUnion P; - /// P = (int*)0; - /// printf("%d %d", P.is(), P.is()); // prints "1 0" - /// X = P.get(); // ok. - /// Y = P.get(); // runtime assertion failure. - /// Z = P.get(); // compile time failure. - /// P = (float*)0; - /// Y = P.get(); // ok. - /// X = P.get(); // runtime assertion failure. - template - class PointerUnion { - public: - typedef PointerIntPair > ValTy; - private: - ValTy Val; + /// If the specified pointer type is incorrect, assert. + template T get() const { + assert(is() && "Invalid accessor called"); + return PointerLikeTypeTraits::getFromVoidPointer(Val.getPointer()); + } - struct IsPT1 { - static const int Num = 0; - }; - struct IsPT2 { - static const int Num = 1; - }; - template - struct UNION_DOESNT_CONTAIN_TYPE { }; + /// Returns the current pointer if it is of the specified pointer type, + /// otherwises returns null. + template T dyn_cast() const { + if (is()) + return get(); + return T(); + } - public: - PointerUnion() {} + /// If the union is set to the first pointer type get an address pointing to + /// it. + PT1 const *getAddrOfPtr1() const { + return const_cast(this)->getAddrOfPtr1(); + } - PointerUnion(PT1 V) : Val( - const_cast(PointerLikeTypeTraits::getAsVoidPointer(V))) { - } - PointerUnion(PT2 V) : Val( - const_cast(PointerLikeTypeTraits::getAsVoidPointer(V)), 1) { - } + /// If the union is set to the first pointer type get an address pointing to + /// it. + PT1 *getAddrOfPtr1() { + assert(is() && "Val is not the first pointer"); + assert( + get() == Val.getPointer() && + "Can't get the address because PointerLikeTypeTraits changes the ptr"); + return (PT1 *)Val.getAddrOfPointer(); + } - /// isNull - Return true if the pointer held in the union is null, - /// regardless of which type it is. - bool isNull() const { - // Convert from the void* to one of the pointer types, to make sure that - // we recursively strip off low bits if we have a nested PointerUnion. - return !PointerLikeTypeTraits::getFromVoidPointer(Val.getPointer()); - } - explicit operator bool() const { return !isNull(); } + /// Assignment from nullptr which just clears the union. + const PointerUnion &operator=(std::nullptr_t) { + Val.initWithPointer(nullptr); + return *this; + } - /// is() return true if the Union currently holds the type matching T. - template - int is() const { - typedef typename - ::llvm::PointerUnionTypeSelector > >::Return Ty; - int TyNo = Ty::Num; - return static_cast(Val.getInt()) == TyNo; - } - - /// get() - Return the value of the specified pointer type. If the - /// specified pointer type is incorrect, assert. - template - T get() const { - assert(is() && "Invalid accessor called"); - return PointerLikeTypeTraits::getFromVoidPointer(Val.getPointer()); - } - - /// dyn_cast() - If the current value is of the specified pointer type, - /// return it, otherwise return null. - template - T dyn_cast() const { - if (is()) return get(); - return T(); - } - - /// \brief If the union is set to the first pointer type get an address - /// pointing to it. - PT1 const *getAddrOfPtr1() const { - return const_cast(this)->getAddrOfPtr1(); - } - - /// \brief If the union is set to the first pointer type get an address - /// pointing to it. - PT1 *getAddrOfPtr1() { - assert(is() && "Val is not the first pointer"); - assert(get() == Val.getPointer() && - "Can't get the address because PointerLikeTypeTraits changes the ptr"); - return (PT1 *)Val.getAddrOfPointer(); - } - - /// \brief Assignment from nullptr which just clears the union. - const PointerUnion &operator=(std::nullptr_t) { - Val.initWithPointer(nullptr); - return *this; - } - - /// Assignment operators - Allow assigning into this union from either - /// pointer type, setting the discriminator to remember what it came from. - const PointerUnion &operator=(const PT1 &RHS) { - Val.initWithPointer( - const_cast(PointerLikeTypeTraits::getAsVoidPointer(RHS))); - return *this; - } - const PointerUnion &operator=(const PT2 &RHS) { - Val.setPointerAndInt( + /// Assignment operators - Allow assigning into this union from either + /// pointer type, setting the discriminator to remember what it came from. + const PointerUnion &operator=(const PT1 &RHS) { + Val.initWithPointer( + const_cast(PointerLikeTypeTraits::getAsVoidPointer(RHS))); + return *this; + } + const PointerUnion &operator=(const PT2 &RHS) { + Val.setPointerAndInt( const_cast(PointerLikeTypeTraits::getAsVoidPointer(RHS)), 1); - return *this; - } + return *this; + } - void *getOpaqueValue() const { return Val.getOpaqueValue(); } - static inline PointerUnion getFromOpaqueValue(void *VP) { - PointerUnion V; - V.Val = ValTy::getFromOpaqueValue(VP); - return V; - } + void *getOpaqueValue() const { return Val.getOpaqueValue(); } + static inline PointerUnion getFromOpaqueValue(void *VP) { + PointerUnion V; + V.Val = ValTy::getFromOpaqueValue(VP); + return V; + } +}; + +template +static bool operator==(PointerUnion lhs, PointerUnion rhs) { + return lhs.getOpaqueValue() == rhs.getOpaqueValue(); +} + +template +static bool operator!=(PointerUnion lhs, PointerUnion rhs) { + return lhs.getOpaqueValue() != rhs.getOpaqueValue(); +} + +template +static bool operator<(PointerUnion lhs, PointerUnion rhs) { + return lhs.getOpaqueValue() < rhs.getOpaqueValue(); +} + +// Teach SmallPtrSet that PointerUnion is "basically a pointer", that has +// # low bits available = min(PT1bits,PT2bits)-1. +template +class PointerLikeTypeTraits> { +public: + static inline void *getAsVoidPointer(const PointerUnion &P) { + return P.getOpaqueValue(); + } + static inline PointerUnion getFromVoidPointer(void *P) { + return PointerUnion::getFromOpaqueValue(P); + } + + // The number of bits available are the min of the two pointer types. + enum { + NumLowBitsAvailable = PointerLikeTypeTraits< + typename PointerUnion::ValTy>::NumLowBitsAvailable }; +}; - template - static bool operator==(PointerUnion lhs, - PointerUnion rhs) { - return lhs.getOpaqueValue() == rhs.getOpaqueValue(); - } +/// A pointer union of three pointer types. See documentation for PointerUnion +/// for usage. +template class PointerUnion3 { +public: + typedef PointerUnion InnerUnion; + typedef PointerUnion ValTy; - template - static bool operator!=(PointerUnion lhs, - PointerUnion rhs) { - return lhs.getOpaqueValue() != rhs.getOpaqueValue(); - } +private: + ValTy Val; - template - static bool operator<(PointerUnion lhs, - PointerUnion rhs) { - return lhs.getOpaqueValue() < rhs.getOpaqueValue(); - } - - // Teach SmallPtrSet that PointerUnion is "basically a pointer", that has - // # low bits available = min(PT1bits,PT2bits)-1. - template - class PointerLikeTypeTraits > { - public: - static inline void * - getAsVoidPointer(const PointerUnion &P) { - return P.getOpaqueValue(); - } - static inline PointerUnion - getFromVoidPointer(void *P) { - return PointerUnion::getFromOpaqueValue(P); - } - - // The number of bits available are the min of the two pointer types. - enum { - NumLowBitsAvailable = - PointerLikeTypeTraits::ValTy> - ::NumLowBitsAvailable - }; - }; - - - /// PointerUnion3 - This is a pointer union of three pointer types. See - /// documentation for PointerUnion for usage. - template - class PointerUnion3 { - public: - typedef PointerUnion InnerUnion; - typedef PointerUnion ValTy; - private: + struct IsInnerUnion { ValTy Val; - - struct IsInnerUnion { - ValTy Val; - IsInnerUnion(ValTy val) : Val(val) { } - template - int is() const { - return Val.template is() && - Val.template get().template is(); - } - template - T get() const { - return Val.template get().template get(); - } - }; - - struct IsPT3 { - ValTy Val; - IsPT3(ValTy val) : Val(val) { } - template - int is() const { - return Val.template is(); - } - template - T get() const { - return Val.template get(); - } - }; - - public: - PointerUnion3() {} - - PointerUnion3(PT1 V) { - Val = InnerUnion(V); + IsInnerUnion(ValTy val) : Val(val) {} + template int is() const { + return Val.template is() && + Val.template get().template is(); } - PointerUnion3(PT2 V) { - Val = InnerUnion(V); - } - PointerUnion3(PT3 V) { - Val = V; - } - - /// isNull - Return true if the pointer held in the union is null, - /// regardless of which type it is. - bool isNull() const { return Val.isNull(); } - explicit operator bool() const { return !isNull(); } - - /// is() return true if the Union currently holds the type matching T. - template - int is() const { - // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3. - typedef typename - ::llvm::PointerUnionTypeSelector - >::Return Ty; - return Ty(Val).template is(); - } - - /// get() - Return the value of the specified pointer type. If the - /// specified pointer type is incorrect, assert. - template - T get() const { - assert(is() && "Invalid accessor called"); - // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3. - typedef typename - ::llvm::PointerUnionTypeSelector - >::Return Ty; - return Ty(Val).template get(); - } - - /// dyn_cast() - If the current value is of the specified pointer type, - /// return it, otherwise return null. - template - T dyn_cast() const { - if (is()) return get(); - return T(); - } - - /// \brief Assignment from nullptr which just clears the union. - const PointerUnion3 &operator=(std::nullptr_t) { - Val = nullptr; - return *this; - } - - /// Assignment operators - Allow assigning into this union from either - /// pointer type, setting the discriminator to remember what it came from. - const PointerUnion3 &operator=(const PT1 &RHS) { - Val = InnerUnion(RHS); - return *this; - } - const PointerUnion3 &operator=(const PT2 &RHS) { - Val = InnerUnion(RHS); - return *this; - } - const PointerUnion3 &operator=(const PT3 &RHS) { - Val = RHS; - return *this; - } - - void *getOpaqueValue() const { return Val.getOpaqueValue(); } - static inline PointerUnion3 getFromOpaqueValue(void *VP) { - PointerUnion3 V; - V.Val = ValTy::getFromOpaqueValue(VP); - return V; + template T get() const { + return Val.template get().template get(); } }; - // Teach SmallPtrSet that PointerUnion3 is "basically a pointer", that has - // # low bits available = min(PT1bits,PT2bits,PT2bits)-2. - template - class PointerLikeTypeTraits > { - public: - static inline void * - getAsVoidPointer(const PointerUnion3 &P) { - return P.getOpaqueValue(); - } - static inline PointerUnion3 - getFromVoidPointer(void *P) { - return PointerUnion3::getFromOpaqueValue(P); - } - - // The number of bits available are the min of the two pointer types. - enum { - NumLowBitsAvailable = - PointerLikeTypeTraits::ValTy> - ::NumLowBitsAvailable - }; - }; - - /// PointerUnion4 - This is a pointer union of four pointer types. See - /// documentation for PointerUnion for usage. - template - class PointerUnion4 { - public: - typedef PointerUnion InnerUnion1; - typedef PointerUnion InnerUnion2; - typedef PointerUnion ValTy; - private: + struct IsPT3 { ValTy Val; - public: - PointerUnion4() {} - - PointerUnion4(PT1 V) { - Val = InnerUnion1(V); - } - PointerUnion4(PT2 V) { - Val = InnerUnion1(V); - } - PointerUnion4(PT3 V) { - Val = InnerUnion2(V); - } - PointerUnion4(PT4 V) { - Val = InnerUnion2(V); - } - - /// isNull - Return true if the pointer held in the union is null, - /// regardless of which type it is. - bool isNull() const { return Val.isNull(); } - explicit operator bool() const { return !isNull(); } - - /// is() return true if the Union currently holds the type matching T. - template - int is() const { - // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2. - typedef typename - ::llvm::PointerUnionTypeSelector - >::Return Ty; - return Val.template is() && - Val.template get().template is(); - } - - /// get() - Return the value of the specified pointer type. If the - /// specified pointer type is incorrect, assert. - template - T get() const { - assert(is() && "Invalid accessor called"); - // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2. - typedef typename - ::llvm::PointerUnionTypeSelector - >::Return Ty; - return Val.template get().template get(); - } - - /// dyn_cast() - If the current value is of the specified pointer type, - /// return it, otherwise return null. - template - T dyn_cast() const { - if (is()) return get(); - return T(); - } - - /// \brief Assignment from nullptr which just clears the union. - const PointerUnion4 &operator=(std::nullptr_t) { - Val = nullptr; - return *this; - } - - /// Assignment operators - Allow assigning into this union from either - /// pointer type, setting the discriminator to remember what it came from. - const PointerUnion4 &operator=(const PT1 &RHS) { - Val = InnerUnion1(RHS); - return *this; - } - const PointerUnion4 &operator=(const PT2 &RHS) { - Val = InnerUnion1(RHS); - return *this; - } - const PointerUnion4 &operator=(const PT3 &RHS) { - Val = InnerUnion2(RHS); - return *this; - } - const PointerUnion4 &operator=(const PT4 &RHS) { - Val = InnerUnion2(RHS); - return *this; - } - - void *getOpaqueValue() const { return Val.getOpaqueValue(); } - static inline PointerUnion4 getFromOpaqueValue(void *VP) { - PointerUnion4 V; - V.Val = ValTy::getFromOpaqueValue(VP); - return V; - } + IsPT3(ValTy val) : Val(val) {} + template int is() const { return Val.template is(); } + template T get() const { return Val.template get(); } }; - // Teach SmallPtrSet that PointerUnion4 is "basically a pointer", that has - // # low bits available = min(PT1bits,PT2bits,PT2bits)-2. - template - class PointerLikeTypeTraits > { - public: - static inline void * - getAsVoidPointer(const PointerUnion4 &P) { - return P.getOpaqueValue(); - } - static inline PointerUnion4 - getFromVoidPointer(void *P) { - return PointerUnion4::getFromOpaqueValue(P); - } +public: + PointerUnion3() {} - // The number of bits available are the min of the two pointer types. - enum { - NumLowBitsAvailable = - PointerLikeTypeTraits::ValTy> - ::NumLowBitsAvailable - }; + PointerUnion3(PT1 V) { Val = InnerUnion(V); } + PointerUnion3(PT2 V) { Val = InnerUnion(V); } + PointerUnion3(PT3 V) { Val = V; } + + /// Test if the pointer held in the union is null, regardless of + /// which type it is. + bool isNull() const { return Val.isNull(); } + explicit operator bool() const { return !isNull(); } + + /// Test if the Union currently holds the type matching T. + template int is() const { + // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3. + typedef typename ::llvm::PointerUnionTypeSelector< + PT1, T, IsInnerUnion, + ::llvm::PointerUnionTypeSelector>::Return + Ty; + return Ty(Val).template is(); + } + + /// Returns the value of the specified pointer type. + /// + /// If the specified pointer type is incorrect, assert. + template T get() const { + assert(is() && "Invalid accessor called"); + // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3. + typedef typename ::llvm::PointerUnionTypeSelector< + PT1, T, IsInnerUnion, + ::llvm::PointerUnionTypeSelector>::Return + Ty; + return Ty(Val).template get(); + } + + /// Returns the current pointer if it is of the specified pointer type, + /// otherwises returns null. + template T dyn_cast() const { + if (is()) + return get(); + return T(); + } + + /// Assignment from nullptr which just clears the union. + const PointerUnion3 &operator=(std::nullptr_t) { + Val = nullptr; + return *this; + } + + /// Assignment operators - Allow assigning into this union from either + /// pointer type, setting the discriminator to remember what it came from. + const PointerUnion3 &operator=(const PT1 &RHS) { + Val = InnerUnion(RHS); + return *this; + } + const PointerUnion3 &operator=(const PT2 &RHS) { + Val = InnerUnion(RHS); + return *this; + } + const PointerUnion3 &operator=(const PT3 &RHS) { + Val = RHS; + return *this; + } + + void *getOpaqueValue() const { return Val.getOpaqueValue(); } + static inline PointerUnion3 getFromOpaqueValue(void *VP) { + PointerUnion3 V; + V.Val = ValTy::getFromOpaqueValue(VP); + return V; + } +}; + +// Teach SmallPtrSet that PointerUnion3 is "basically a pointer", that has +// # low bits available = min(PT1bits,PT2bits,PT2bits)-2. +template +class PointerLikeTypeTraits> { +public: + static inline void *getAsVoidPointer(const PointerUnion3 &P) { + return P.getOpaqueValue(); + } + static inline PointerUnion3 getFromVoidPointer(void *P) { + return PointerUnion3::getFromOpaqueValue(P); + } + + // The number of bits available are the min of the two pointer types. + enum { + NumLowBitsAvailable = PointerLikeTypeTraits< + typename PointerUnion3::ValTy>::NumLowBitsAvailable }; +}; - // Teach DenseMap how to use PointerUnions as keys. - template - struct DenseMapInfo > { - typedef PointerUnion Pair; - typedef DenseMapInfo FirstInfo; - typedef DenseMapInfo SecondInfo; +/// A pointer union of four pointer types. See documentation for PointerUnion +/// for usage. +template +class PointerUnion4 { +public: + typedef PointerUnion InnerUnion1; + typedef PointerUnion InnerUnion2; + typedef PointerUnion ValTy; - static inline Pair getEmptyKey() { - return Pair(FirstInfo::getEmptyKey()); - } - static inline Pair getTombstoneKey() { - return Pair(FirstInfo::getTombstoneKey()); - } - static unsigned getHashValue(const Pair &PairVal) { - intptr_t key = (intptr_t)PairVal.getOpaqueValue(); - return DenseMapInfo::getHashValue(key); - } - static bool isEqual(const Pair &LHS, const Pair &RHS) { - return LHS.template is() == RHS.template is() && - (LHS.template is() ? - FirstInfo::isEqual(LHS.template get(), - RHS.template get()) : - SecondInfo::isEqual(LHS.template get(), - RHS.template get())); - } +private: + ValTy Val; + +public: + PointerUnion4() {} + + PointerUnion4(PT1 V) { Val = InnerUnion1(V); } + PointerUnion4(PT2 V) { Val = InnerUnion1(V); } + PointerUnion4(PT3 V) { Val = InnerUnion2(V); } + PointerUnion4(PT4 V) { Val = InnerUnion2(V); } + + /// Test if the pointer held in the union is null, regardless of + /// which type it is. + bool isNull() const { return Val.isNull(); } + explicit operator bool() const { return !isNull(); } + + /// Test if the Union currently holds the type matching T. + template int is() const { + // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2. + typedef typename ::llvm::PointerUnionTypeSelector< + PT1, T, InnerUnion1, ::llvm::PointerUnionTypeSelector< + PT2, T, InnerUnion1, InnerUnion2>>::Return Ty; + return Val.template is() && Val.template get().template is(); + } + + /// Returns the value of the specified pointer type. + /// + /// If the specified pointer type is incorrect, assert. + template T get() const { + assert(is() && "Invalid accessor called"); + // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2. + typedef typename ::llvm::PointerUnionTypeSelector< + PT1, T, InnerUnion1, ::llvm::PointerUnionTypeSelector< + PT2, T, InnerUnion1, InnerUnion2>>::Return Ty; + return Val.template get().template get(); + } + + /// Returns the current pointer if it is of the specified pointer type, + /// otherwises returns null. + template T dyn_cast() const { + if (is()) + return get(); + return T(); + } + + /// Assignment from nullptr which just clears the union. + const PointerUnion4 &operator=(std::nullptr_t) { + Val = nullptr; + return *this; + } + + /// Assignment operators - Allow assigning into this union from either + /// pointer type, setting the discriminator to remember what it came from. + const PointerUnion4 &operator=(const PT1 &RHS) { + Val = InnerUnion1(RHS); + return *this; + } + const PointerUnion4 &operator=(const PT2 &RHS) { + Val = InnerUnion1(RHS); + return *this; + } + const PointerUnion4 &operator=(const PT3 &RHS) { + Val = InnerUnion2(RHS); + return *this; + } + const PointerUnion4 &operator=(const PT4 &RHS) { + Val = InnerUnion2(RHS); + return *this; + } + + void *getOpaqueValue() const { return Val.getOpaqueValue(); } + static inline PointerUnion4 getFromOpaqueValue(void *VP) { + PointerUnion4 V; + V.Val = ValTy::getFromOpaqueValue(VP); + return V; + } +}; + +// Teach SmallPtrSet that PointerUnion4 is "basically a pointer", that has +// # low bits available = min(PT1bits,PT2bits,PT2bits)-2. +template +class PointerLikeTypeTraits> { +public: + static inline void * + getAsVoidPointer(const PointerUnion4 &P) { + return P.getOpaqueValue(); + } + static inline PointerUnion4 getFromVoidPointer(void *P) { + return PointerUnion4::getFromOpaqueValue(P); + } + + // The number of bits available are the min of the two pointer types. + enum { + NumLowBitsAvailable = PointerLikeTypeTraits< + typename PointerUnion4::ValTy>::NumLowBitsAvailable }; +}; + +// Teach DenseMap how to use PointerUnions as keys. +template struct DenseMapInfo> { + typedef PointerUnion Pair; + typedef DenseMapInfo FirstInfo; + typedef DenseMapInfo SecondInfo; + + static inline Pair getEmptyKey() { return Pair(FirstInfo::getEmptyKey()); } + static inline Pair getTombstoneKey() { + return Pair(FirstInfo::getTombstoneKey()); + } + static unsigned getHashValue(const Pair &PairVal) { + intptr_t key = (intptr_t)PairVal.getOpaqueValue(); + return DenseMapInfo::getHashValue(key); + } + static bool isEqual(const Pair &LHS, const Pair &RHS) { + return LHS.template is() == RHS.template is() && + (LHS.template is() ? FirstInfo::isEqual(LHS.template get(), + RHS.template get()) + : SecondInfo::isEqual(LHS.template get(), + RHS.template get())); + } +}; + } #endif diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h index 759a2db24f2a..ce343a161b7b 100644 --- a/include/llvm/ADT/PostOrderIterator.h +++ b/include/llvm/ADT/PostOrderIterator.h @@ -215,8 +215,8 @@ struct ipo_iterator : public po_iterator, SetType, External > { }; template -ipo_iterator ipo_begin(const T &G, bool Reverse = false) { - return ipo_iterator::begin(G, Reverse); +ipo_iterator ipo_begin(const T &G) { + return ipo_iterator::begin(G); } template @@ -225,8 +225,8 @@ ipo_iterator ipo_end(const T &G){ } template -iterator_range> inverse_post_order(const T &G, bool Reverse = false) { - return make_range(ipo_begin(G, Reverse), ipo_end(G)); +iterator_range> inverse_post_order(const T &G) { + return make_range(ipo_begin(G), ipo_end(G)); } // Provide global definitions of external inverse postorder iterators... diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h index b68345a1dcf6..d4360fa8d218 100644 --- a/include/llvm/ADT/STLExtras.h +++ b/include/llvm/ADT/STLExtras.h @@ -196,6 +196,41 @@ inline mapped_iterator map_iterator(const ItTy &I, FuncTy F) { return mapped_iterator(I, F); } +/// \brief Metafunction to determine if type T has a member called rbegin(). +template struct has_rbegin { + template static char(&f(const U &, decltype(&U::rbegin)))[1]; + static char(&f(...))[2]; + const static bool value = sizeof(f(std::declval(), nullptr)) == 1; +}; + +// Returns an iterator_range over the given container which iterates in reverse. +// Note that the container must have rbegin()/rend() methods for this to work. +template +auto reverse(ContainerTy &&C, + typename std::enable_if::value>::type * = + nullptr) -> decltype(make_range(C.rbegin(), C.rend())) { + return make_range(C.rbegin(), C.rend()); +} + +// Returns a std::reverse_iterator wrapped around the given iterator. +template +std::reverse_iterator make_reverse_iterator(IteratorTy It) { + return std::reverse_iterator(It); +} + +// Returns an iterator_range over the given container which iterates in reverse. +// Note that the container must have begin()/end() methods which return +// bidirectional iterators for this to work. +template +auto reverse( + ContainerTy &&C, + typename std::enable_if::value>::type * = nullptr) + -> decltype(make_range(llvm::make_reverse_iterator(std::end(C)), + llvm::make_reverse_iterator(std::begin(C)))) { + return make_range(llvm::make_reverse_iterator(std::end(C)), + llvm::make_reverse_iterator(std::begin(C))); +} + //===----------------------------------------------------------------------===// // Extra additions to //===----------------------------------------------------------------------===// @@ -329,13 +364,28 @@ void DeleteContainerSeconds(Container &C) { } /// Provide wrappers to std::all_of which take ranges instead of having to pass -/// being/end explicitly. +/// begin/end explicitly. template bool all_of(R &&Range, UnaryPredicate &&P) { return std::all_of(Range.begin(), Range.end(), std::forward(P)); } +/// Provide wrappers to std::any_of which take ranges instead of having to pass +/// begin/end explicitly. +template +bool any_of(R &&Range, UnaryPredicate &&P) { + return std::any_of(Range.begin(), Range.end(), + std::forward(P)); +} + +/// Provide wrappers to std::find which take ranges instead of having to pass +/// begin/end explicitly. +template +auto find(R &&Range, const T &val) -> decltype(Range.begin()) { + return std::find(Range.begin(), Range.end(), val); +} + //===----------------------------------------------------------------------===// // Extra additions to //===----------------------------------------------------------------------===// diff --git a/include/llvm/ADT/ScopedHashTable.h b/include/llvm/ADT/ScopedHashTable.h index 5abe76c12259..4af3d6d37e33 100644 --- a/include/llvm/ADT/ScopedHashTable.h +++ b/include/llvm/ADT/ScopedHashTable.h @@ -1,4 +1,4 @@ -//===- ScopedHashTable.h - A simple scoped hash table ---------------------===// +//===- ScopedHashTable.h - A simple scoped hash table -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -47,8 +47,8 @@ class ScopedHashTableVal { K Key; V Val; ScopedHashTableVal(const K &key, const V &val) : Key(key), Val(val) {} -public: +public: const K &getKey() const { return Key; } const V &getValue() const { return Val; } V &getValue() { return Val; } @@ -56,7 +56,7 @@ public: ScopedHashTableVal *getNextForKey() { return NextForKey; } const ScopedHashTableVal *getNextForKey() const { return NextForKey; } ScopedHashTableVal *getNextInScope() { return NextInScope; } - + template static ScopedHashTableVal *Create(ScopedHashTableVal *nextInScope, ScopedHashTableVal *nextForKey, @@ -66,12 +66,11 @@ public: // Set up the value. new (New) ScopedHashTableVal(key, val); New->NextInScope = nextInScope; - New->NextForKey = nextForKey; + New->NextForKey = nextForKey; return New; } - - template - void Destroy(AllocatorTy &Allocator) { + + template void Destroy(AllocatorTy &Allocator) { // Free memory referenced by the item. this->~ScopedHashTableVal(); Allocator.Deallocate(this); @@ -90,15 +89,16 @@ class ScopedHashTableScope { /// LastValInScope - This is the last value that was inserted for this scope /// or null if none have been inserted yet. ScopedHashTableVal *LastValInScope; - void operator=(ScopedHashTableScope&) = delete; - ScopedHashTableScope(ScopedHashTableScope&) = delete; + void operator=(ScopedHashTableScope &) = delete; + ScopedHashTableScope(ScopedHashTableScope &) = delete; + public: ScopedHashTableScope(ScopedHashTable &HT); ~ScopedHashTableScope(); ScopedHashTableScope *getParentScope() { return PrevScope; } const ScopedHashTableScope *getParentScope() const { return PrevScope; } - + private: friend class ScopedHashTable; ScopedHashTableVal *getLastValInScope() { @@ -109,10 +109,10 @@ private: } }; - -template > +template > class ScopedHashTableIterator { ScopedHashTableVal *Node; + public: ScopedHashTableIterator(ScopedHashTableVal *node) : Node(node) {} @@ -141,7 +141,6 @@ public: } }; - template class ScopedHashTable { public: @@ -149,23 +148,24 @@ public: /// to the name of the scope for this hash table. typedef ScopedHashTableScope ScopeTy; typedef unsigned size_type; + private: typedef ScopedHashTableVal ValTy; DenseMap TopLevelMap; ScopeTy *CurScope; - + AllocatorTy Allocator; - - ScopedHashTable(const ScopedHashTable&); // NOT YET IMPLEMENTED - void operator=(const ScopedHashTable&); // NOT YET IMPLEMENTED + + ScopedHashTable(const ScopedHashTable &); // NOT YET IMPLEMENTED + void operator=(const ScopedHashTable &); // NOT YET IMPLEMENTED friend class ScopedHashTableScope; + public: ScopedHashTable() : CurScope(nullptr) {} ScopedHashTable(AllocatorTy A) : CurScope(0), Allocator(A) {} ~ScopedHashTable() { assert(!CurScope && TopLevelMap.empty() && "Scope imbalance!"); } - /// Access to the allocator. AllocatorTy &getAllocator() { return Allocator; } @@ -180,7 +180,7 @@ public: typename DenseMap::iterator I = TopLevelMap.find(Key); if (I != TopLevelMap.end()) return I->second->getValue(); - + return V(); } @@ -198,7 +198,7 @@ public: if (I == TopLevelMap.end()) return end(); return iterator(I->second); } - + ScopeTy *getCurScope() { return CurScope; } const ScopeTy *getCurScope() const { return CurScope; } diff --git a/include/llvm/ADT/SetOperations.h b/include/llvm/ADT/SetOperations.h index 71f5db380f6e..7c9f2fbe066e 100644 --- a/include/llvm/ADT/SetOperations.h +++ b/include/llvm/ADT/SetOperations.h @@ -39,7 +39,7 @@ bool set_union(S1Ty &S1, const S2Ty &S2) { template void set_intersect(S1Ty &S1, const S2Ty &S2) { for (typename S1Ty::iterator I = S1.begin(); I != S1.end();) { - const typename S1Ty::key_type &E = *I; + const auto &E = *I; ++I; if (!S2.count(E)) S1.erase(E); // Erase element if not in S2 } diff --git a/include/llvm/ADT/SetVector.h b/include/llvm/ADT/SetVector.h index a7fd408c854a..bc563570c203 100644 --- a/include/llvm/ADT/SetVector.h +++ b/include/llvm/ADT/SetVector.h @@ -20,6 +20,7 @@ #ifndef LLVM_ADT_SETVECTOR_H #define LLVM_ADT_SETVECTOR_H +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallSet.h" #include #include @@ -33,7 +34,7 @@ namespace llvm { /// property of a deterministic iteration order. The order of iteration is the /// order of insertion. template , - typename Set = SmallSet > + typename Set = DenseSet> class SetVector { public: typedef T value_type; @@ -44,6 +45,8 @@ public: typedef Vector vector_type; typedef typename vector_type::const_iterator iterator; typedef typename vector_type::const_iterator const_iterator; + typedef typename vector_type::const_reverse_iterator reverse_iterator; + typedef typename vector_type::const_reverse_iterator const_reverse_iterator; typedef typename vector_type::size_type size_type; /// \brief Construct an empty SetVector @@ -55,6 +58,8 @@ public: insert(Start, End); } + ArrayRef getArrayRef() const { return vector_; } + /// \brief Determine if the SetVector is empty or not. bool empty() const { return vector_.empty(); @@ -85,6 +90,26 @@ public: return vector_.end(); } + /// \brief Get an reverse_iterator to the end of the SetVector. + reverse_iterator rbegin() { + return vector_.rbegin(); + } + + /// \brief Get a const_reverse_iterator to the end of the SetVector. + const_reverse_iterator rbegin() const { + return vector_.rbegin(); + } + + /// \brief Get a reverse_iterator to the beginning of the SetVector. + reverse_iterator rend() { + return vector_.rend(); + } + + /// \brief Get a const_reverse_iterator to the beginning of the SetVector. + const_reverse_iterator rend() const { + return vector_.rend(); + } + /// \brief Return the last element of the SetVector. const T &back() const { assert(!empty() && "Cannot call back() on empty SetVector!"); @@ -150,7 +175,6 @@ public: return true; } - /// \brief Count the number of elements of a given key in the SetVector. /// \returns 0 if the element is not in the SetVector, 1 if it is. size_type count(const key_type &key) const { @@ -169,7 +193,7 @@ public: set_.erase(back()); vector_.pop_back(); } - + T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val() { T Ret = back(); pop_back(); diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h index ae3d645396fd..4aa3bc217f41 100644 --- a/include/llvm/ADT/SmallBitVector.h +++ b/include/llvm/ADT/SmallBitVector.h @@ -551,19 +551,18 @@ public: } private: - template + template void applyMask(const uint32_t *Mask, unsigned MaskWords) { - if (NumBaseBits == 64 && MaskWords >= 2) { - uint64_t M = Mask[0] | (uint64_t(Mask[1]) << 32); - if (InvertMask) M = ~M; - if (AddBits) setSmallBits(getSmallBits() | M); - else setSmallBits(getSmallBits() & ~M); - } else { - uint32_t M = Mask[0]; - if (InvertMask) M = ~M; - if (AddBits) setSmallBits(getSmallBits() | M); - else setSmallBits(getSmallBits() & ~M); - } + assert(MaskWords <= sizeof(uintptr_t) && "Mask is larger than base!"); + uintptr_t M = Mask[0]; + if (NumBaseBits == 64) + M |= uint64_t(Mask[1]) << 32; + if (InvertMask) + M = ~M; + if (AddBits) + setSmallBits(getSmallBits() | M); + else + setSmallBits(getSmallBits() & ~M); } }; diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h index 3e3c9c154ef4..3d98e8fac43b 100644 --- a/include/llvm/ADT/SmallPtrSet.h +++ b/include/llvm/ADT/SmallPtrSet.h @@ -48,6 +48,7 @@ class SmallPtrSetIteratorImpl; /// class SmallPtrSetImplBase { friend class SmallPtrSetIteratorImpl; + protected: /// SmallArray - Points to a fixed size set of buckets, used in 'small mode'. const void **SmallArray; @@ -133,6 +134,7 @@ private: void Grow(unsigned NewSize); void operator=(const SmallPtrSetImplBase &RHS) = delete; + protected: /// swap - Swaps the elements of two sets. /// Note: This method assumes that both sets have the same small size. @@ -148,6 +150,7 @@ class SmallPtrSetIteratorImpl { protected: const void *const *Bucket; const void *const *End; + public: explicit SmallPtrSetIteratorImpl(const void *const *BP, const void*const *E) : Bucket(BP), End(E) { @@ -178,14 +181,14 @@ protected: template class SmallPtrSetIterator : public SmallPtrSetIteratorImpl { typedef PointerLikeTypeTraits PtrTraits; - + public: typedef PtrTy value_type; typedef PtrTy reference; typedef PtrTy pointer; typedef std::ptrdiff_t difference_type; typedef std::forward_iterator_tag iterator_category; - + explicit SmallPtrSetIterator(const void *const *BP, const void *const *E) : SmallPtrSetIteratorImpl(BP, E) {} @@ -231,7 +234,6 @@ template struct RoundUpToPowerOfTwo { enum { Val = RoundUpToPowerOfTwoH::Val }; }; - /// \brief A templated base class for \c SmallPtrSet which provides the /// typesafe interface that is common across all small sizes. @@ -242,7 +244,8 @@ template class SmallPtrSetImpl : public SmallPtrSetImplBase { typedef PointerLikeTypeTraits PtrTraits; - SmallPtrSetImpl(const SmallPtrSetImpl&) = delete; + SmallPtrSetImpl(const SmallPtrSetImpl &) = delete; + protected: // Constructors that forward to the base. SmallPtrSetImpl(const void **SmallStorage, const SmallPtrSetImpl &that) @@ -303,6 +306,7 @@ class SmallPtrSet : public SmallPtrSetImpl { enum { SmallSizePowTwo = RoundUpToPowerOfTwo::Val }; /// SmallStorage - Fixed size storage used in 'small mode'. const void *SmallStorage[SmallSizePowTwo]; + public: SmallPtrSet() : BaseT(SmallStorage, SmallSizePowTwo) {} SmallPtrSet(const SmallPtrSet &that) : BaseT(SmallStorage, that) {} @@ -333,7 +337,6 @@ public: SmallPtrSetImplBase::swap(RHS); } }; - } namespace std { diff --git a/include/llvm/ADT/SmallSet.h b/include/llvm/ADT/SmallSet.h index bc6493554c8b..39a57b87b2a7 100644 --- a/include/llvm/ADT/SmallSet.h +++ b/include/llvm/ADT/SmallSet.h @@ -37,6 +37,7 @@ class SmallSet { std::set Set; typedef typename SmallVector::const_iterator VIterator; typedef typename SmallVector::iterator mutable_iterator; + public: typedef size_t size_type; SmallSet() {} @@ -92,7 +93,7 @@ public: for (; I != E; ++I) insert(*I); } - + bool erase(const T &V) { if (!isSmall()) return Set.erase(V); @@ -108,6 +109,7 @@ public: Vector.clear(); Set.clear(); } + private: bool isSmall() const { return Set.empty(); } diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h index b9384702c3ba..d1062acbbb61 100644 --- a/include/llvm/ADT/SmallVector.h +++ b/include/llvm/ADT/SmallVector.h @@ -109,9 +109,13 @@ public: typedef const T *const_pointer; // forward iterator creation methods. + LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin() { return (iterator)this->BeginX; } + LLVM_ATTRIBUTE_ALWAYS_INLINE const_iterator begin() const { return (const_iterator)this->BeginX; } + LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end() { return (iterator)this->EndX; } + LLVM_ATTRIBUTE_ALWAYS_INLINE const_iterator end() const { return (const_iterator)this->EndX; } protected: iterator capacity_ptr() { return (iterator)this->CapacityX; } @@ -124,6 +128,7 @@ public: reverse_iterator rend() { return reverse_iterator(begin()); } const_reverse_iterator rend() const { return const_reverse_iterator(begin());} + LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const { return end()-begin(); } size_type max_size() const { return size_type(-1) / sizeof(T); } @@ -135,10 +140,12 @@ public: /// Return a pointer to the vector's buffer, even if empty(). const_pointer data() const { return const_pointer(begin()); } + LLVM_ATTRIBUTE_ALWAYS_INLINE reference operator[](size_type idx) { assert(idx < size()); return begin()[idx]; } + LLVM_ATTRIBUTE_ALWAYS_INLINE const_reference operator[](size_type idx) const { assert(idx < size()); return begin()[idx]; diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h index 20cbe2cddfc2..e6e72413da4e 100644 --- a/include/llvm/ADT/SparseBitVector.h +++ b/include/llvm/ADT/SparseBitVector.h @@ -39,7 +39,6 @@ namespace llvm { /// etc) do not perform as well in practice as a linked list with this iterator /// kept up to date. They are also significantly more memory intensive. - template struct SparseBitVectorElement : public ilist_node > { @@ -204,6 +203,7 @@ public: BecameZero = allzero; return changed; } + // Intersect this Element with the complement of RHS and return true if this // one changed. BecameZero is set to true if this element became all-zero // bits. @@ -226,6 +226,7 @@ public: BecameZero = allzero; return changed; } + // Three argument version of intersectWithComplement that intersects // RHS1 & ~RHS2 into this element void intersectWithComplement(const SparseBitVectorElement &RHS1, @@ -408,12 +409,13 @@ class SparseBitVector { // bitmap. return AtEnd == RHS.AtEnd && RHS.BitNumber == BitNumber; } + bool operator!=(const SparseBitVectorIterator &RHS) const { return !(*this == RHS); } - SparseBitVectorIterator(): BitVector(NULL) { - } + SparseBitVectorIterator(): BitVector(nullptr) { + } SparseBitVectorIterator(const SparseBitVector *RHS, bool end = false):BitVector(RHS) { @@ -453,6 +455,9 @@ public: // Assignment SparseBitVector& operator=(const SparseBitVector& RHS) { + if (this == &RHS) + return *this; + Elements.clear(); ElementListConstIter ElementIter = RHS.Elements.begin(); @@ -559,6 +564,9 @@ public: // Union our bitmap with the RHS and return true if we changed. bool operator|=(const SparseBitVector &RHS) { + if (this == &RHS) + return false; + bool changed = false; ElementListIter Iter1 = Elements.begin(); ElementListConstIter Iter2 = RHS.Elements.begin(); @@ -587,6 +595,9 @@ public: // Intersect our bitmap with the RHS and return true if ours changed. bool operator&=(const SparseBitVector &RHS) { + if (this == &RHS) + return false; + bool changed = false; ElementListIter Iter1 = Elements.begin(); ElementListConstIter Iter2 = RHS.Elements.begin(); @@ -619,9 +630,13 @@ public: ElementListIter IterTmp = Iter1; ++Iter1; Elements.erase(IterTmp); + changed = true; } } - Elements.erase(Iter1, Elements.end()); + if (Iter1 != Elements.end()) { + Elements.erase(Iter1, Elements.end()); + changed = true; + } CurrElementIter = Elements.begin(); return changed; } @@ -629,6 +644,14 @@ public: // Intersect our bitmap with the complement of the RHS and return true // if ours changed. bool intersectWithComplement(const SparseBitVector &RHS) { + if (this == &RHS) { + if (!empty()) { + clear(); + return true; + } + return false; + } + bool changed = false; ElementListIter Iter1 = Elements.begin(); ElementListConstIter Iter2 = RHS.Elements.begin(); @@ -669,12 +692,20 @@ public: return intersectWithComplement(*RHS); } - // Three argument version of intersectWithComplement. // Result of RHS1 & ~RHS2 is stored into this bitmap. void intersectWithComplement(const SparseBitVector &RHS1, const SparseBitVector &RHS2) { + if (this == &RHS1) { + intersectWithComplement(RHS2); + return; + } else if (this == &RHS2) { + SparseBitVector RHS2Copy(RHS2); + intersectWithComplement(RHS1, RHS2Copy); + return; + } + Elements.clear(); CurrElementIter = Elements.begin(); ElementListConstIter Iter1 = RHS1.Elements.begin(); @@ -719,8 +750,6 @@ public: Elements.push_back(NewElement); ++Iter1; } - - return; } void intersectWithComplement(const SparseBitVector *RHS1, @@ -855,9 +884,6 @@ operator-(const SparseBitVector &LHS, return Result; } - - - // Dump a SparseBitVector to a stream template void dump(const SparseBitVector &LHS, raw_ostream &out) { @@ -875,4 +901,4 @@ void dump(const SparseBitVector &LHS, raw_ostream &out) { } } // end namespace llvm -#endif +#endif // LLVM_ADT_SPARSEBITVECTOR_H diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h index d98abc375e8a..7c84e3ef6b4d 100644 --- a/include/llvm/ADT/Statistic.h +++ b/include/llvm/ADT/Statistic.h @@ -28,9 +28,11 @@ #include "llvm/Support/Atomic.h" #include "llvm/Support/Valgrind.h" +#include namespace llvm { class raw_ostream; +class raw_fd_ostream; class Statistic { public: @@ -170,6 +172,9 @@ void EnableStatistics(); /// \brief Check if statistics are enabled. bool AreStatisticsEnabled(); +/// \brief Return a file stream to print our output on. +std::unique_ptr CreateInfoOutputFile(); + /// \brief Print statistics to the file returned by CreateInfoOutputFile(). void PrintStatistics(); diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h index 9d038560bf92..700bb9e10ef7 100644 --- a/include/llvm/ADT/StringMap.h +++ b/include/llvm/ADT/StringMap.h @@ -30,6 +30,7 @@ namespace llvm { /// StringMapEntryBase - Shared base class of StringMapEntry instances. class StringMapEntryBase { unsigned StrLen; + public: explicit StringMapEntryBase(unsigned Len) : StrLen(Len) {} @@ -48,6 +49,7 @@ protected: unsigned NumItems; unsigned NumTombstones; unsigned ItemSize; + protected: explicit StringMapImpl(unsigned itemSize) : TheTable(nullptr), @@ -85,8 +87,10 @@ protected: /// RemoveKey - Remove the StringMapEntry for the specified key from the /// table, returning it. If the key is not in the table, this returns null. StringMapEntryBase *RemoveKey(StringRef Key); + private: void init(unsigned Size); + public: static StringMapEntryBase *getTombstoneVal() { return (StringMapEntryBase*)-1; @@ -112,6 +116,7 @@ public: template class StringMapEntry : public StringMapEntryBase { StringMapEntry(StringMapEntry &E) = delete; + public: ValueTy second; @@ -205,7 +210,6 @@ public: } }; - /// StringMap - This is an unconventional map that is specialized for handling /// keys that are "strings", which are basically ranges of bytes. This does some /// funky memory allocation and hashing things to make it extremely efficient, @@ -213,9 +217,10 @@ public: template class StringMap : public StringMapImpl { AllocatorTy Allocator; + public: typedef StringMapEntry MapEntryTy; - + StringMap() : StringMapImpl(static_cast(sizeof(MapEntryTy))) {} explicit StringMap(unsigned InitialSize) : StringMapImpl(InitialSize, static_cast(sizeof(MapEntryTy))) {} @@ -227,6 +232,13 @@ public: : StringMapImpl(InitialSize, static_cast(sizeof(MapEntryTy))), Allocator(A) {} + StringMap(std::initializer_list> List) + : StringMapImpl(static_cast(sizeof(MapEntryTy))) { + for (const auto &P : List) { + insert(P); + } + } + StringMap(StringMap &&RHS) : StringMapImpl(std::move(RHS)), Allocator(std::move(RHS.Allocator)) {} @@ -386,11 +398,10 @@ public: } }; - -template -class StringMapConstIterator { +template class StringMapConstIterator { protected: StringMapEntryBase **Ptr; + public: typedef StringMapEntry value_type; @@ -447,7 +458,6 @@ public: return static_cast*>(*this->Ptr); } }; - } #endif diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h index 95660a49f1f1..350032b8c4e7 100644 --- a/include/llvm/ADT/StringRef.h +++ b/include/llvm/ADT/StringRef.h @@ -10,6 +10,7 @@ #ifndef LLVM_ADT_STRINGREF_H #define LLVM_ADT_STRINGREF_H +#include "llvm/Support/Compiler.h" #include #include #include @@ -53,6 +54,7 @@ namespace llvm { // Workaround memcmp issue with null pointers (undefined behavior) // by providing a specialized version + LLVM_ATTRIBUTE_ALWAYS_INLINE static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) { if (Length == 0) { return 0; } return ::memcmp(Lhs,Rhs,Length); @@ -73,6 +75,7 @@ namespace llvm { } /// Construct a string ref from a pointer and length. + LLVM_ATTRIBUTE_ALWAYS_INLINE /*implicit*/ StringRef(const char *data, size_t length) : Data(data), Length(length) { assert((data || length == 0) && @@ -80,6 +83,7 @@ namespace llvm { } /// Construct a string ref from an std::string. + LLVM_ATTRIBUTE_ALWAYS_INLINE /*implicit*/ StringRef(const std::string &Str) : Data(Str.data()), Length(Str.length()) {} @@ -104,12 +108,15 @@ namespace llvm { /// data - Get a pointer to the start of the string (which may not be null /// terminated). + LLVM_ATTRIBUTE_ALWAYS_INLINE const char *data() const { return Data; } /// empty - Check if the string is empty. + LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const { return Length == 0; } /// size - Get the string size. + LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const { return Length; } /// front - Get the first character in the string. @@ -133,6 +140,7 @@ namespace llvm { /// equals - Check for string equality, this is more efficient than /// compare() when the relative ordering of inequal strings isn't needed. + LLVM_ATTRIBUTE_ALWAYS_INLINE bool equals(StringRef RHS) const { return (Length == RHS.Length && compareMemory(Data, RHS.Data, RHS.Length) == 0); @@ -145,6 +153,7 @@ namespace llvm { /// compare - Compare two strings; the result is -1, 0, or 1 if this string /// is lexicographically less than, equal to, or greater than the \p RHS. + LLVM_ATTRIBUTE_ALWAYS_INLINE int compare(StringRef RHS) const { // Check the prefix for a mismatch. if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length))) @@ -212,6 +221,7 @@ namespace llvm { /// @{ /// Check if this string starts with the given \p Prefix. + LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const { return Length >= Prefix.Length && compareMemory(Data, Prefix.Data, Prefix.Length) == 0; @@ -221,6 +231,7 @@ namespace llvm { bool startswith_lower(StringRef Prefix) const; /// Check if this string ends with the given \p Suffix. + LLVM_ATTRIBUTE_ALWAYS_INLINE bool endswith(StringRef Suffix) const { return Length >= Suffix.Length && compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0; @@ -237,6 +248,7 @@ namespace llvm { /// /// \returns The index of the first occurrence of \p C, or npos if not /// found. + LLVM_ATTRIBUTE_ALWAYS_INLINE size_t find(char C, size_t From = 0) const { size_t FindBegin = std::min(From, Length); if (FindBegin < Length) { // Avoid calling memchr with nullptr. @@ -402,6 +414,7 @@ namespace llvm { /// \param N The number of characters to included in the substring. If N /// exceeds the number of characters remaining in the string, the string /// suffix (starting with \p Start) will be returned. + LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N = npos) const { Start = std::min(Start, Length); return StringRef(Data + Start, std::min(N, Length - Start)); @@ -409,6 +422,7 @@ namespace llvm { /// Return a StringRef equal to 'this' but with the first \p N elements /// dropped. + LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef drop_front(size_t N = 1) const { assert(size() >= N && "Dropping more elements than exist"); return substr(N); @@ -416,6 +430,7 @@ namespace llvm { /// Return a StringRef equal to 'this' but with the last \p N elements /// dropped. + LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef drop_back(size_t N = 1) const { assert(size() >= N && "Dropping more elements than exist"); return substr(0, size()-N); @@ -431,6 +446,7 @@ namespace llvm { /// substring. If this is npos, or less than \p Start, or exceeds the /// number of characters remaining in the string, the string suffix /// (starting with \p Start) will be returned. + LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef slice(size_t Start, size_t End) const { Start = std::min(Start, Length); End = std::min(std::max(Start, End), Length); @@ -474,7 +490,7 @@ namespace llvm { /// Split into substrings around the occurrences of a separator string. /// /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most - /// \p MaxSplit splits are done and consequently <= \p MaxSplit + /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 /// elements are added to A. /// If \p KeepEmpty is false, empty strings are not added to \p A. They /// still count when considering \p MaxSplit @@ -489,6 +505,23 @@ namespace llvm { StringRef Separator, int MaxSplit = -1, bool KeepEmpty = true) const; + /// Split into substrings around the occurrences of a separator character. + /// + /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most + /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 + /// elements are added to A. + /// If \p KeepEmpty is false, empty strings are not added to \p A. They + /// still count when considering \p MaxSplit + /// An useful invariant is that + /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true + /// + /// \param A - Where to put the substrings. + /// \param Separator - The string to split on. + /// \param MaxSplit - The maximum number of times the string is split. + /// \param KeepEmpty - True if empty substring should be added. + void split(SmallVectorImpl &A, char Separator, int MaxSplit = -1, + bool KeepEmpty = true) const; + /// Split into two substrings around the last occurrence of a separator /// character. /// @@ -530,10 +563,12 @@ namespace llvm { /// @name StringRef Comparison Operators /// @{ + LLVM_ATTRIBUTE_ALWAYS_INLINE inline bool operator==(StringRef LHS, StringRef RHS) { return LHS.equals(RHS); } + LLVM_ATTRIBUTE_ALWAYS_INLINE inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); } diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h index 3e0cc200b6dd..08626dc7af84 100644 --- a/include/llvm/ADT/StringSet.h +++ b/include/llvm/ADT/StringSet.h @@ -23,6 +23,11 @@ namespace llvm { class StringSet : public llvm::StringMap { typedef llvm::StringMap base; public: + StringSet() = default; + StringSet(std::initializer_list S) { + for (StringRef X : S) + insert(X); + } std::pair insert(StringRef Key) { assert(!Key.empty()); diff --git a/include/llvm/ADT/StringSwitch.h b/include/llvm/ADT/StringSwitch.h index 0393a0c373ef..42b0fc4bc441 100644 --- a/include/llvm/ADT/StringSwitch.h +++ b/include/llvm/ADT/StringSwitch.h @@ -14,6 +14,7 @@ #define LLVM_ADT_STRINGSWITCH_H #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Compiler.h" #include #include @@ -48,10 +49,12 @@ class StringSwitch { const T *Result; public: + LLVM_ATTRIBUTE_ALWAYS_INLINE explicit StringSwitch(StringRef S) : Str(S), Result(nullptr) { } template + LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch& Case(const char (&S)[N], const T& Value) { if (!Result && N-1 == Str.size() && (std::memcmp(S, Str.data(), N-1) == 0)) { @@ -62,6 +65,7 @@ public: } template + LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch& EndsWith(const char (&S)[N], const T &Value) { if (!Result && Str.size() >= N-1 && std::memcmp(S, Str.data() + Str.size() + 1 - N, N-1) == 0) { @@ -72,6 +76,7 @@ public: } template + LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch& StartsWith(const char (&S)[N], const T &Value) { if (!Result && Str.size() >= N-1 && std::memcmp(S, Str.data(), N-1) == 0) { @@ -82,32 +87,66 @@ public: } template + LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1], const T& Value) { - return Case(S0, Value).Case(S1, Value); + if (!Result && ( + (N0-1 == Str.size() && std::memcmp(S0, Str.data(), N0-1) == 0) || + (N1-1 == Str.size() && std::memcmp(S1, Str.data(), N1-1) == 0))) { + Result = &Value; + } + + return *this; } template + LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1], const char (&S2)[N2], const T& Value) { - return Case(S0, Value).Case(S1, Value).Case(S2, Value); + if (!Result && ( + (N0-1 == Str.size() && std::memcmp(S0, Str.data(), N0-1) == 0) || + (N1-1 == Str.size() && std::memcmp(S1, Str.data(), N1-1) == 0) || + (N2-1 == Str.size() && std::memcmp(S2, Str.data(), N2-1) == 0))) { + Result = &Value; + } + + return *this; } template + LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1], const char (&S2)[N2], const char (&S3)[N3], const T& Value) { - return Case(S0, Value).Case(S1, Value).Case(S2, Value).Case(S3, Value); + if (!Result && ( + (N0-1 == Str.size() && std::memcmp(S0, Str.data(), N0-1) == 0) || + (N1-1 == Str.size() && std::memcmp(S1, Str.data(), N1-1) == 0) || + (N2-1 == Str.size() && std::memcmp(S2, Str.data(), N2-1) == 0) || + (N3-1 == Str.size() && std::memcmp(S3, Str.data(), N3-1) == 0))) { + Result = &Value; + } + + return *this; } template + LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1], const char (&S2)[N2], const char (&S3)[N3], const char (&S4)[N4], const T& Value) { - return Case(S0, Value).Case(S1, Value).Case(S2, Value).Case(S3, Value) - .Case(S4, Value); + if (!Result && ( + (N0-1 == Str.size() && std::memcmp(S0, Str.data(), N0-1) == 0) || + (N1-1 == Str.size() && std::memcmp(S1, Str.data(), N1-1) == 0) || + (N2-1 == Str.size() && std::memcmp(S2, Str.data(), N2-1) == 0) || + (N3-1 == Str.size() && std::memcmp(S3, Str.data(), N3-1) == 0) || + (N4-1 == Str.size() && std::memcmp(S4, Str.data(), N4-1) == 0))) { + Result = &Value; + } + + return *this; } + LLVM_ATTRIBUTE_ALWAYS_INLINE R Default(const T& Value) const { if (Result) return *Result; @@ -115,6 +154,7 @@ public: return Value; } + LLVM_ATTRIBUTE_ALWAYS_INLINE operator R() const { assert(Result && "Fell off the end of a string-switch"); return *Result; diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h index f29608f3d3d1..487aa46cf642 100644 --- a/include/llvm/ADT/TinyPtrVector.h +++ b/include/llvm/ADT/TinyPtrVector.h @@ -15,7 +15,7 @@ #include "llvm/ADT/SmallVector.h" namespace llvm { - + /// TinyPtrVector - This class is specialized for cases where there are /// normally 0 or 1 element in a vector, but is general enough to go beyond that /// when required. @@ -150,7 +150,6 @@ public: return Val.getAddrOfPtr1(); return Val.template get()->begin(); - } iterator end() { if (Val.template is()) diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index 947812d94ecb..e01db0a61fd5 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -50,6 +50,7 @@ public: armeb, // ARM (big endian): armeb aarch64, // AArch64 (little endian): aarch64 aarch64_be, // AArch64 (big endian): aarch64_be + avr, // AVR: Atmel AVR microcontroller bpfel, // eBPF or extended BPF or 64-bit BPF (little endian) bpfeb, // eBPF or extended BPF or 64-bit BPF (big endian) hexagon, // Hexagon: hexagon @@ -75,8 +76,8 @@ public: xcore, // XCore: xcore nvptx, // NVPTX: 32-bit nvptx64, // NVPTX: 64-bit - le32, // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten) - le64, // le64: generic little-endian 64-bit CPU (PNaCl / Emscripten) + le32, // le32: generic little-endian 32-bit CPU (PNaCl) + le64, // le64: generic little-endian 64-bit CPU (PNaCl) amdil, // AMDIL amdil64, // AMDIL with 64-bit pointers hsail, // AMD HSAIL @@ -92,12 +93,14 @@ public: enum SubArchType { NoSubArch, + ARMSubArch_v8_2a, ARMSubArch_v8_1a, ARMSubArch_v8, ARMSubArch_v7, ARMSubArch_v7em, ARMSubArch_v7m, ARMSubArch_v7s, + ARMSubArch_v7k, ARMSubArch_v6, ARMSubArch_v6m, ARMSubArch_v6k, @@ -124,7 +127,8 @@ public: MipsTechnologies, NVIDIA, CSR, - LastVendorType = CSR + Myriad, + LastVendorType = Myriad }; enum OSType { UnknownOS, @@ -153,7 +157,10 @@ public: NVCL, // NVIDIA OpenCL AMDHSA, // AMD HSA Runtime PS4, - LastOSType = PS4 + ELFIAMCU, + TvOS, // Apple tvOS + WatchOS, // Apple watchOS + LastOSType = WatchOS }; enum EnvironmentType { UnknownEnvironment, @@ -170,7 +177,9 @@ public: MSVC, Itanium, Cygnus, - LastEnvironmentType = Cygnus + AMDOpenCL, + CoreCLR, + LastEnvironmentType = CoreCLR }; enum ObjectFormatType { UnknownObjectFormat, @@ -205,7 +214,7 @@ public: /// @name Constructors /// @{ - /// \brief Default constructor is the same as an empty string and leaves all + /// Default constructor is the same as an empty string and leaves all /// triple fields unknown. Triple() : Data(), Arch(), Vendor(), OS(), Environment(), ObjectFormat() {} @@ -231,7 +240,7 @@ public: /// common case in which otherwise valid components are in the wrong order. static std::string normalize(StringRef Str); - /// \brief Return the normalized form of this triple's string. + /// Return the normalized form of this triple's string. std::string normalize() const { return normalize(Data); } /// @} @@ -259,7 +268,7 @@ public: /// getEnvironment - Get the parsed environment type of this triple. EnvironmentType getEnvironment() const { return Environment; } - /// \brief Parse the version number from the OS name component of the + /// Parse the version number from the OS name component of the /// triple, if present. /// /// For example, "fooos1.2.3" would return (1, 2, 3). @@ -295,10 +304,15 @@ public: unsigned &Micro) const; /// getiOSVersion - Parse the version number as with getOSVersion. This should - /// only be called with IOS triples. + /// only be called with IOS or generic triples. void getiOSVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const; + /// getWatchOSVersion - Parse the version number as with getOSVersion. This + /// should only be called with WatchOS or generic triples. + void getWatchOSVersion(unsigned &Major, unsigned &Minor, + unsigned &Micro) const; + /// @} /// @name Direct Component Access /// @{ @@ -331,7 +345,7 @@ public: /// @name Convenience Predicates /// @{ - /// \brief Test whether the architecture is 64-bit + /// Test whether the architecture is 64-bit /// /// Note that this tests for 64-bit pointer width, and nothing else. Note /// that we intentionally expose only three predicates, 64-bit, 32-bit, and @@ -340,12 +354,12 @@ public: /// system is provided. bool isArch64Bit() const; - /// \brief Test whether the architecture is 32-bit + /// Test whether the architecture is 32-bit /// /// Note that this tests for 32-bit pointer width, and nothing else. bool isArch32Bit() const; - /// \brief Test whether the architecture is 16-bit + /// Test whether the architecture is 16-bit /// /// Note that this tests for 16-bit pointer width, and nothing else. bool isArch16Bit() const; @@ -396,13 +410,27 @@ public: } /// Is this an iOS triple. + /// Note: This identifies tvOS as a variant of iOS. If that ever + /// changes, i.e., if the two operating systems diverge or their version + /// numbers get out of sync, that will need to be changed. + /// watchOS has completely different version numbers so it is not included. bool isiOS() const { - return getOS() == Triple::IOS; + return getOS() == Triple::IOS || isTvOS(); } - /// isOSDarwin - Is this a "Darwin" OS (OS X or iOS). + /// Is this an Apple tvOS triple. + bool isTvOS() const { + return getOS() == Triple::TvOS; + } + + /// Is this an Apple watchOS triple. + bool isWatchOS() const { + return getOS() == Triple::WatchOS; + } + + /// isOSDarwin - Is this a "Darwin" OS (OS X, iOS, or watchOS). bool isOSDarwin() const { - return isMacOSX() || isiOS(); + return isMacOSX() || isiOS() || isWatchOS(); } bool isOSNetBSD() const { @@ -427,16 +455,26 @@ public: return getOS() == Triple::Bitrig; } + bool isOSIAMCU() const { + return getOS() == Triple::ELFIAMCU; + } + + /// Checks if the environment could be MSVC. bool isWindowsMSVCEnvironment() const { return getOS() == Triple::Win32 && (getEnvironment() == Triple::UnknownEnvironment || getEnvironment() == Triple::MSVC); } + /// Checks if the environment is MSVC. bool isKnownWindowsMSVCEnvironment() const { return getOS() == Triple::Win32 && getEnvironment() == Triple::MSVC; } + bool isWindowsCoreCLREnvironment() const { + return getOS() == Triple::Win32 && getEnvironment() == Triple::CoreCLR; + } + bool isWindowsItaniumEnvironment() const { return getOS() == Triple::Win32 && getEnvironment() == Triple::Itanium; } @@ -449,60 +487,63 @@ public: return getOS() == Triple::Win32 && getEnvironment() == Triple::GNU; } - /// \brief Tests for either Cygwin or MinGW OS + /// Tests for either Cygwin or MinGW OS bool isOSCygMing() const { return isWindowsCygwinEnvironment() || isWindowsGNUEnvironment(); } - /// \brief Is this a "Windows" OS targeting a "MSVCRT.dll" environment. + /// Is this a "Windows" OS targeting a "MSVCRT.dll" environment. bool isOSMSVCRT() const { return isWindowsMSVCEnvironment() || isWindowsGNUEnvironment() || isWindowsItaniumEnvironment(); } - /// \brief Tests whether the OS is Windows. + /// Tests whether the OS is Windows. bool isOSWindows() const { return getOS() == Triple::Win32; } - /// \brief Tests whether the OS is NaCl (Native Client) + /// Tests whether the OS is NaCl (Native Client) bool isOSNaCl() const { return getOS() == Triple::NaCl; } - /// \brief Tests whether the OS is Linux. + /// Tests whether the OS is Linux. bool isOSLinux() const { return getOS() == Triple::Linux; } - /// \brief Tests whether the OS uses the ELF binary format. + /// Tests whether the OS uses the ELF binary format. bool isOSBinFormatELF() const { return getObjectFormat() == Triple::ELF; } - /// \brief Tests whether the OS uses the COFF binary format. + /// Tests whether the OS uses the COFF binary format. bool isOSBinFormatCOFF() const { return getObjectFormat() == Triple::COFF; } - /// \brief Tests whether the environment is MachO. + /// Tests whether the environment is MachO. bool isOSBinFormatMachO() const { return getObjectFormat() == Triple::MachO; } - /// \brief Tests whether the target is the PS4 CPU + /// Tests whether the target is the PS4 CPU bool isPS4CPU() const { return getArch() == Triple::x86_64 && getVendor() == Triple::SCEI && getOS() == Triple::PS4; } - /// \brief Tests whether the target is the PS4 platform + /// Tests whether the target is the PS4 platform bool isPS4() const { return getVendor() == Triple::SCEI && getOS() == Triple::PS4; } + /// Tests whether the target is Android + bool isAndroid() const { return getEnvironment() == Triple::Android; } + /// @} /// @name Mutators /// @{ @@ -553,7 +594,7 @@ public: /// @name Helpers to build variants of a particular triple. /// @{ - /// \brief Form a triple with a 32-bit variant of the current architecture. + /// Form a triple with a 32-bit variant of the current architecture. /// /// This can be used to move across "families" of architectures where useful. /// @@ -561,7 +602,7 @@ public: /// architecture if no such variant can be found. llvm::Triple get32BitArchVariant() const; - /// \brief Form a triple with a 64-bit variant of the current architecture. + /// Form a triple with a 64-bit variant of the current architecture. /// /// This can be used to move across "families" of architectures where useful. /// @@ -589,7 +630,7 @@ public: /// /// \param Arch the architecture name (e.g., "armv7s"). If it is an empty /// string then the triple's arch name is used. - const char* getARMCPUForArch(StringRef Arch = StringRef()) const; + StringRef getARMCPUForArch(StringRef Arch = StringRef()) const; /// @} /// @name Static helpers for IDs. diff --git a/include/llvm/ADT/UniqueVector.h b/include/llvm/ADT/UniqueVector.h index a9cb2f5709eb..e1ab4b56023f 100644 --- a/include/llvm/ADT/UniqueVector.h +++ b/include/llvm/ADT/UniqueVector.h @@ -11,6 +11,7 @@ #define LLVM_ADT_UNIQUEVECTOR_H #include +#include #include #include diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h index a7b9306b3a73..3044a6c435f1 100644 --- a/include/llvm/ADT/ilist.h +++ b/include/llvm/ADT/ilist.h @@ -104,6 +104,53 @@ struct ilist_sentinel_traits { } }; +template class ilist_half_node; +template class ilist_node; + +/// Traits with an embedded ilist_node as a sentinel. +/// +/// FIXME: The downcast in createSentinel() is UB. +template struct ilist_embedded_sentinel_traits { + /// Get hold of the node that marks the end of the list. + NodeTy *createSentinel() const { + // Since i(p)lists always publicly derive from their corresponding traits, + // placing a data member in this class will augment the i(p)list. But since + // the NodeTy is expected to be publicly derive from ilist_node, + // there is a legal viable downcast from it to NodeTy. We use this trick to + // superimpose an i(p)list with a "ghostly" NodeTy, which becomes the + // sentinel. Dereferencing the sentinel is forbidden (save the + // ilist_node), so no one will ever notice the superposition. + return static_cast(&Sentinel); + } + static void destroySentinel(NodeTy *) {} + + NodeTy *provideInitialHead() const { return createSentinel(); } + NodeTy *ensureHead(NodeTy *) const { return createSentinel(); } + static void noteHead(NodeTy *, NodeTy *) {} + +private: + mutable ilist_node Sentinel; +}; + +/// Trait with an embedded ilist_half_node as a sentinel. +/// +/// FIXME: The downcast in createSentinel() is UB. +template struct ilist_half_embedded_sentinel_traits { + /// Get hold of the node that marks the end of the list. + NodeTy *createSentinel() const { + // See comment in ilist_embedded_sentinel_traits::createSentinel(). + return static_cast(&Sentinel); + } + static void destroySentinel(NodeTy *) {} + + NodeTy *provideInitialHead() const { return createSentinel(); } + NodeTy *ensureHead(NodeTy *) const { return createSentinel(); } + static void noteHead(NodeTy *, NodeTy *) {} + +private: + mutable ilist_half_node Sentinel; +}; + /// ilist_node_traits - A fragment for template traits for intrusive list /// that provides default node related operations. /// @@ -173,8 +220,8 @@ private: template void operator-(T) const; public: - ilist_iterator(pointer NP) : NodePtr(NP) {} - ilist_iterator(reference NR) : NodePtr(&NR) {} + explicit ilist_iterator(pointer NP) : NodePtr(NP) {} + explicit ilist_iterator(reference NR) : NodePtr(&NR) {} ilist_iterator() : NodePtr(nullptr) {} // This is templated so that we can allow constructing a const iterator from @@ -191,8 +238,10 @@ public: return *this; } + void reset(pointer NP) { NodePtr = NP; } + // Accessors... - operator pointer() const { + explicit operator pointer() const { return NodePtr; } @@ -202,11 +251,11 @@ public: pointer operator->() const { return &operator*(); } // Comparison operators - bool operator==(const ilist_iterator &RHS) const { - return NodePtr == RHS.NodePtr; + template bool operator==(const ilist_iterator &RHS) const { + return NodePtr == RHS.getNodePtrUnchecked(); } - bool operator!=(const ilist_iterator &RHS) const { - return NodePtr != RHS.NodePtr; + template bool operator!=(const ilist_iterator &RHS) const { + return NodePtr != RHS.getNodePtrUnchecked(); } // Increment and decrement operators... @@ -422,7 +471,7 @@ public: this->setPrev(CurNode, New); this->addNodeToList(New); // Notify traits that we added a node... - return New; + return iterator(New); } iterator insertAfter(iterator where, NodeTy *New) { @@ -443,7 +492,7 @@ public: else Head = NextNode; this->setPrev(NextNode, PrevNode); - IT = NextNode; + IT.reset(NextNode); this->removeNodeFromList(Node); // Notify traits that we removed a node... // Set the next/prev pointers of the current node to null. This isn't @@ -461,12 +510,18 @@ public: return remove(MutIt); } + NodeTy *remove(NodeTy *IT) { return remove(iterator(IT)); } + NodeTy *remove(NodeTy &IT) { return remove(iterator(IT)); } + // erase - remove a node from the controlled sequence... and delete it. iterator erase(iterator where) { this->deleteNode(remove(where)); return where; } + iterator erase(NodeTy *IT) { return erase(iterator(IT)); } + iterator erase(NodeTy &IT) { return erase(iterator(IT)); } + /// Remove all nodes from the list like clear(), but do not call /// removeNodeFromList() or deleteNode(). /// @@ -522,7 +577,7 @@ private: this->setNext(Last, PosNext); this->setPrev(PosNext, Last); - this->transferNodesFromList(L2, First, PosNext); + this->transferNodesFromList(L2, iterator(First), iterator(PosNext)); // Now that everything is set, restore the pointers to the list sentinels. L2.setTail(L2Sentinel); @@ -579,6 +634,83 @@ public: void splice(iterator where, iplist &L2, iterator first, iterator last) { if (first != last) transfer(where, L2, first, last); } + void splice(iterator where, iplist &L2, NodeTy &N) { + splice(where, L2, iterator(N)); + } + void splice(iterator where, iplist &L2, NodeTy *N) { + splice(where, L2, iterator(N)); + } + + template + void merge(iplist &Right, Compare comp) { + if (this == &Right) + return; + iterator First1 = begin(), Last1 = end(); + iterator First2 = Right.begin(), Last2 = Right.end(); + while (First1 != Last1 && First2 != Last2) { + if (comp(*First2, *First1)) { + iterator Next = First2; + transfer(First1, Right, First2, ++Next); + First2 = Next; + } else { + ++First1; + } + } + if (First2 != Last2) + transfer(Last1, Right, First2, Last2); + } + void merge(iplist &Right) { return merge(Right, op_less); } + + template + void sort(Compare comp) { + // The list is empty, vacuously sorted. + if (empty()) + return; + // The list has a single element, vacuously sorted. + if (std::next(begin()) == end()) + return; + // Find the split point for the list. + iterator Center = begin(), End = begin(); + while (End != end() && std::next(End) != end()) { + Center = std::next(Center); + End = std::next(std::next(End)); + } + // Split the list into two. + iplist RightHalf; + RightHalf.splice(RightHalf.begin(), *this, Center, end()); + + // Sort the two sublists. + sort(comp); + RightHalf.sort(comp); + + // Merge the two sublists back together. + merge(RightHalf, comp); + } + void sort() { sort(op_less); } + + /// \brief Get the previous node, or \c nullptr for the list head. + NodeTy *getPrevNode(NodeTy &N) const { + auto I = N.getIterator(); + if (I == begin()) + return nullptr; + return &*std::prev(I); + } + /// \brief Get the previous node, or \c nullptr for the list head. + const NodeTy *getPrevNode(const NodeTy &N) const { + return getPrevNode(const_cast(N)); + } + + /// \brief Get the next node, or \c nullptr for the list tail. + NodeTy *getNextNode(NodeTy &N) const { + auto Next = std::next(N.getIterator()); + if (Next == end()) + return nullptr; + return &*Next; + } + /// \brief Get the next node, or \c nullptr for the list tail. + const NodeTy *getNextNode(const NodeTy &N) const { + return getNextNode(const_cast(N)); + } }; diff --git a/include/llvm/ADT/ilist_node.h b/include/llvm/ADT/ilist_node.h index 26d0b55e4093..7e5a0e0e5ad8 100644 --- a/include/llvm/ADT/ilist_node.h +++ b/include/llvm/ADT/ilist_node.h @@ -19,12 +19,15 @@ namespace llvm { template struct ilist_traits; +template struct ilist_embedded_sentinel_traits; +template struct ilist_half_embedded_sentinel_traits; /// ilist_half_node - Base class that provides prev services for sentinels. /// template class ilist_half_node { friend struct ilist_traits; + friend struct ilist_half_embedded_sentinel_traits; NodeTy *Prev; protected: NodeTy *getPrev() { return Prev; } @@ -36,6 +39,8 @@ protected: template struct ilist_nextprev_traits; +template class ilist_iterator; + /// ilist_node - Base class that provides next/prev services for nodes /// that use ilist_nextprev_traits or ilist_default_traits. /// @@ -43,6 +48,8 @@ template class ilist_node : private ilist_half_node { friend struct ilist_nextprev_traits; friend struct ilist_traits; + friend struct ilist_half_embedded_sentinel_traits; + friend struct ilist_embedded_sentinel_traits; NodeTy *Next; NodeTy *getNext() { return Next; } const NodeTy *getNext() const { return Next; } @@ -50,54 +57,64 @@ class ilist_node : private ilist_half_node { protected: ilist_node() : Next(nullptr) {} +public: + ilist_iterator getIterator() { + // FIXME: Stop downcasting to create the iterator (potential UB). + return ilist_iterator(static_cast(this)); + } + ilist_iterator getIterator() const { + // FIXME: Stop downcasting to create the iterator (potential UB). + return ilist_iterator(static_cast(this)); + } +}; + +/// An ilist node that can access its parent list. +/// +/// Requires \c NodeTy to have \a getParent() to find the parent node, and the +/// \c ParentTy to have \a getSublistAccess() to get a reference to the list. +template +class ilist_node_with_parent : public ilist_node { +protected: + ilist_node_with_parent() = default; + +private: + /// Forward to NodeTy::getParent(). + /// + /// Note: do not use the name "getParent()". We want a compile error + /// (instead of recursion) when the subclass fails to implement \a + /// getParent(). + const ParentTy *getNodeParent() const { + return static_cast(this)->getParent(); + } + public: /// @name Adjacent Node Accessors /// @{ - - /// \brief Get the previous node, or 0 for the list head. + /// \brief Get the previous node, or \c nullptr for the list head. NodeTy *getPrevNode() { - NodeTy *Prev = this->getPrev(); - - // Check for sentinel. - if (!Prev->getNext()) - return nullptr; - - return Prev; + // Should be separated to a reused function, but then we couldn't use auto + // (and would need the type of the list). + const auto &List = + getNodeParent()->*(ParentTy::getSublistAccess((NodeTy *)nullptr)); + return List.getPrevNode(*static_cast(this)); } - - /// \brief Get the previous node, or 0 for the list head. + /// \brief Get the previous node, or \c nullptr for the list head. const NodeTy *getPrevNode() const { - const NodeTy *Prev = this->getPrev(); - - // Check for sentinel. - if (!Prev->getNext()) - return nullptr; - - return Prev; + return const_cast(this)->getPrevNode(); } - /// \brief Get the next node, or 0 for the list tail. + /// \brief Get the next node, or \c nullptr for the list tail. NodeTy *getNextNode() { - NodeTy *Next = getNext(); - - // Check for sentinel. - if (!Next->getNext()) - return nullptr; - - return Next; + // Should be separated to a reused function, but then we couldn't use auto + // (and would need the type of the list). + const auto &List = + getNodeParent()->*(ParentTy::getSublistAccess((NodeTy *)nullptr)); + return List.getNextNode(*static_cast(this)); } - - /// \brief Get the next node, or 0 for the list tail. + /// \brief Get the next node, or \c nullptr for the list tail. const NodeTy *getNextNode() const { - const NodeTy *Next = getNext(); - - // Check for sentinel. - if (!Next->getNext()) - return nullptr; - - return Next; + return const_cast(this)->getNextNode(); } - /// @} }; diff --git a/include/llvm/ADT/iterator_range.h b/include/llvm/ADT/iterator_range.h index 523a86f02e08..3dd679bd9b79 100644 --- a/include/llvm/ADT/iterator_range.h +++ b/include/llvm/ADT/iterator_range.h @@ -20,6 +20,7 @@ #define LLVM_ADT_ITERATOR_RANGE_H #include +#include namespace llvm { @@ -32,6 +33,12 @@ class iterator_range { IteratorT begin_iterator, end_iterator; public: + //TODO: Add SFINAE to test that the Container's iterators match the range's + // iterators. + template + iterator_range(Container &&c) + //TODO: Consider ADL/non-member begin/end calls. + : begin_iterator(c.begin()), end_iterator(c.end()) {} iterator_range(IteratorT begin_iterator, IteratorT end_iterator) : begin_iterator(std::move(begin_iterator)), end_iterator(std::move(end_iterator)) {} @@ -51,6 +58,11 @@ template iterator_range make_range(T x, T y) { template iterator_range make_range(std::pair p) { return iterator_range(std::move(p.first), std::move(p.second)); } + +template +iterator_range()))> drop_begin(T &&t, int n) { + return make_range(std::next(begin(t), n), end(t)); +} } #endif diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index 36f8199a0322..5cc840a64a62 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -41,10 +41,11 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PassManager.h" #include "llvm/Analysis/MemoryLocation.h" namespace llvm { - +class BasicAAResult; class LoadInst; class StoreInst; class VAArgInst; @@ -55,6 +56,7 @@ class AnalysisUsage; class MemTransferInst; class MemIntrinsic; class DominatorTree; +class OrderedBasicBlock; /// The possible results of an alias query. /// @@ -84,261 +86,390 @@ enum AliasResult { MustAlias, }; -class AliasAnalysis { -protected: - const DataLayout *DL; - const TargetLibraryInfo *TLI; +/// Flags indicating whether a memory access modifies or references memory. +/// +/// This is no access at all, a modification, a reference, or both +/// a modification and a reference. These are specifically structured such that +/// they form a two bit matrix and bit-tests for 'mod' or 'ref' work with any +/// of the possible values. +enum ModRefInfo { + /// The access neither references nor modifies the value stored in memory. + MRI_NoModRef = 0, + /// The access references the value stored in memory. + MRI_Ref = 1, + /// The access modifies the value stored in memory. + MRI_Mod = 2, + /// The access both references and modifies the value stored in memory. + MRI_ModRef = MRI_Ref | MRI_Mod +}; -private: - AliasAnalysis *AA; // Previous Alias Analysis to chain to. +/// The locations at which a function might access memory. +/// +/// These are primarily used in conjunction with the \c AccessKind bits to +/// describe both the nature of access and the locations of access for a +/// function call. +enum FunctionModRefLocation { + /// Base case is no access to memory. + FMRL_Nowhere = 0, + /// Access to memory via argument pointers. + FMRL_ArgumentPointees = 4, + /// Access to any memory. + FMRL_Anywhere = 8 | FMRL_ArgumentPointees +}; -protected: - /// InitializeAliasAnalysis - Subclasses must call this method to initialize - /// the AliasAnalysis interface before any other methods are called. This is - /// typically called by the run* methods of these subclasses. This may be - /// called multiple times. +/// Summary of how a function affects memory in the program. +/// +/// Loads from constant globals are not considered memory accesses for this +/// interface. Also, functions may freely modify stack space local to their +/// invocation without having to report it through these interfaces. +enum FunctionModRefBehavior { + /// This function does not perform any non-local loads or stores to memory. /// - void InitializeAliasAnalysis(Pass *P, const DataLayout *DL); + /// This property corresponds to the GCC 'const' attribute. + /// This property corresponds to the LLVM IR 'readnone' attribute. + /// This property corresponds to the IntrNoMem LLVM intrinsic flag. + FMRB_DoesNotAccessMemory = FMRL_Nowhere | MRI_NoModRef, - /// getAnalysisUsage - All alias analysis implementations should invoke this - /// directly (using AliasAnalysis::getAnalysisUsage(AU)). - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + /// The only memory references in this function (if it has any) are + /// non-volatile loads from objects pointed to by its pointer-typed + /// arguments, with arbitrary offsets. + /// + /// This property corresponds to the IntrReadArgMem LLVM intrinsic flag. + FMRB_OnlyReadsArgumentPointees = FMRL_ArgumentPointees | MRI_Ref, + /// The only memory references in this function (if it has any) are + /// non-volatile loads and stores from objects pointed to by its + /// pointer-typed arguments, with arbitrary offsets. + /// + /// This property corresponds to the IntrReadWriteArgMem LLVM intrinsic flag. + FMRB_OnlyAccessesArgumentPointees = FMRL_ArgumentPointees | MRI_ModRef, + + /// This function does not perform any non-local stores or volatile loads, + /// but may read from any memory location. + /// + /// This property corresponds to the GCC 'pure' attribute. + /// This property corresponds to the LLVM IR 'readonly' attribute. + /// This property corresponds to the IntrReadMem LLVM intrinsic flag. + FMRB_OnlyReadsMemory = FMRL_Anywhere | MRI_Ref, + + /// This indicates that the function could not be classified into one of the + /// behaviors above. + FMRB_UnknownModRefBehavior = FMRL_Anywhere | MRI_ModRef +}; + +class AAResults { public: - static char ID; // Class identification, replacement for typeinfo - AliasAnalysis() : DL(nullptr), TLI(nullptr), AA(nullptr) {} - virtual ~AliasAnalysis(); // We want to be subclassed + // Make these results default constructable and movable. We have to spell + // these out because MSVC won't synthesize them. + AAResults() {} + AAResults(AAResults &&Arg); + AAResults &operator=(AAResults &&Arg); + ~AAResults(); - /// getTargetLibraryInfo - Return a pointer to the current TargetLibraryInfo - /// object, or null if no TargetLibraryInfo object is available. - /// - const TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; } - - /// getTypeStoreSize - Return the DataLayout store size for the given type, - /// if known, or a conservative value otherwise. - /// - uint64_t getTypeStoreSize(Type *Ty); + /// Register a specific AA result. + template void addAAResult(AAResultT &AAResult) { + // FIXME: We should use a much lighter weight system than the usual + // polymorphic pattern because we don't own AAResult. It should + // ideally involve two pointers and no separate allocation. + AAs.emplace_back(new Model(AAResult, *this)); + } //===--------------------------------------------------------------------===// - /// Alias Queries... - /// + /// \name Alias Queries + /// @{ - /// alias - The main low level interface to the alias analysis implementation. + /// The main low level interface to the alias analysis implementation. /// Returns an AliasResult indicating whether the two pointers are aliased to - /// each other. This is the interface that must be implemented by specific + /// each other. This is the interface that must be implemented by specific /// alias analysis implementations. - virtual AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB); + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); - /// alias - A convenience wrapper. - AliasResult alias(const Value *V1, uint64_t V1Size, - const Value *V2, uint64_t V2Size) { + /// A convenience wrapper around the primary \c alias interface. + AliasResult alias(const Value *V1, uint64_t V1Size, const Value *V2, + uint64_t V2Size) { return alias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size)); } - /// alias - A convenience wrapper. + /// A convenience wrapper around the primary \c alias interface. AliasResult alias(const Value *V1, const Value *V2) { return alias(V1, MemoryLocation::UnknownSize, V2, MemoryLocation::UnknownSize); } - /// isNoAlias - A trivial helper function to check to see if the specified - /// pointers are no-alias. + /// A trivial helper function to check to see if the specified pointers are + /// no-alias. bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) { return alias(LocA, LocB) == NoAlias; } - /// isNoAlias - A convenience wrapper. - bool isNoAlias(const Value *V1, uint64_t V1Size, - const Value *V2, uint64_t V2Size) { + /// A convenience wrapper around the \c isNoAlias helper interface. + bool isNoAlias(const Value *V1, uint64_t V1Size, const Value *V2, + uint64_t V2Size) { return isNoAlias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size)); } - - /// isNoAlias - A convenience wrapper. + + /// A convenience wrapper around the \c isNoAlias helper interface. bool isNoAlias(const Value *V1, const Value *V2) { return isNoAlias(MemoryLocation(V1), MemoryLocation(V2)); } - - /// isMustAlias - A convenience wrapper. + + /// A trivial helper function to check to see if the specified pointers are + /// must-alias. bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) { return alias(LocA, LocB) == MustAlias; } - /// isMustAlias - A convenience wrapper. + /// A convenience wrapper around the \c isMustAlias helper interface. bool isMustAlias(const Value *V1, const Value *V2) { return alias(V1, 1, V2, 1) == MustAlias; } - - /// pointsToConstantMemory - If the specified memory location is - /// known to be constant, return true. If OrLocal is true and the - /// specified memory location is known to be "local" (derived from - /// an alloca), return true. Otherwise return false. - virtual bool pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal = false); - /// pointsToConstantMemory - A convenient wrapper. + /// Checks whether the given location points to constant memory, or if + /// \p OrLocal is true whether it points to a local alloca. + bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false); + + /// A convenience wrapper around the primary \c pointsToConstantMemory + /// interface. bool pointsToConstantMemory(const Value *P, bool OrLocal = false) { return pointsToConstantMemory(MemoryLocation(P), OrLocal); } + /// @} //===--------------------------------------------------------------------===// - /// Simple mod/ref information... - /// - - /// ModRefResult - Represent the result of a mod/ref query. Mod and Ref are - /// bits which may be or'd together. - /// - enum ModRefResult { NoModRef = 0, Ref = 1, Mod = 2, ModRef = 3 }; - - /// These values define additional bits used to define the - /// ModRefBehavior values. - enum { Nowhere = 0, ArgumentPointees = 4, Anywhere = 8 | ArgumentPointees }; - - /// ModRefBehavior - Summary of how a function affects memory in the program. - /// Loads from constant globals are not considered memory accesses for this - /// interface. Also, functions may freely modify stack space local to their - /// invocation without having to report it through these interfaces. - enum ModRefBehavior { - /// DoesNotAccessMemory - This function does not perform any non-local loads - /// or stores to memory. - /// - /// This property corresponds to the GCC 'const' attribute. - /// This property corresponds to the LLVM IR 'readnone' attribute. - /// This property corresponds to the IntrNoMem LLVM intrinsic flag. - DoesNotAccessMemory = Nowhere | NoModRef, - - /// OnlyReadsArgumentPointees - The only memory references in this function - /// (if it has any) are non-volatile loads from objects pointed to by its - /// pointer-typed arguments, with arbitrary offsets. - /// - /// This property corresponds to the LLVM IR 'argmemonly' attribute combined - /// with 'readonly' attribute. - /// This property corresponds to the IntrReadArgMem LLVM intrinsic flag. - OnlyReadsArgumentPointees = ArgumentPointees | Ref, - - /// OnlyAccessesArgumentPointees - The only memory references in this - /// function (if it has any) are non-volatile loads and stores from objects - /// pointed to by its pointer-typed arguments, with arbitrary offsets. - /// - /// This property corresponds to the LLVM IR 'argmemonly' attribute. - /// This property corresponds to the IntrReadWriteArgMem LLVM intrinsic flag. - OnlyAccessesArgumentPointees = ArgumentPointees | ModRef, - - /// OnlyReadsMemory - This function does not perform any non-local stores or - /// volatile loads, but may read from any memory location. - /// - /// This property corresponds to the GCC 'pure' attribute. - /// This property corresponds to the LLVM IR 'readonly' attribute. - /// This property corresponds to the IntrReadMem LLVM intrinsic flag. - OnlyReadsMemory = Anywhere | Ref, - - /// UnknownModRefBehavior - This indicates that the function could not be - /// classified into one of the behaviors above. - UnknownModRefBehavior = Anywhere | ModRef - }; + /// \name Simple mod/ref information + /// @{ /// Get the ModRef info associated with a pointer argument of a callsite. The /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note /// that these bits do not necessarily account for the overall behavior of /// the function, but rather only provide additional per-argument /// information. - virtual ModRefResult getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx); + ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx); - /// getModRefBehavior - Return the behavior when calling the given call site. - virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); + /// Return the behavior of the given call site. + FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS); - /// getModRefBehavior - Return the behavior when calling the given function. - /// For use when the call site is not known. - virtual ModRefBehavior getModRefBehavior(const Function *F); + /// Return the behavior when calling the given function. + FunctionModRefBehavior getModRefBehavior(const Function *F); - /// doesNotAccessMemory - If the specified call is known to never read or - /// write memory, return true. If the call only reads from known-constant - /// memory, it is also legal to return true. Calls that unwind the stack - /// are legal for this predicate. + /// Checks if the specified call is known to never read or write memory. + /// + /// Note that if the call only reads from known-constant memory, it is also + /// legal to return true. Also, calls that unwind the stack are legal for + /// this predicate. /// /// Many optimizations (such as CSE and LICM) can be performed on such calls /// without worrying about aliasing properties, and many calls have this /// property (e.g. calls to 'sin' and 'cos'). /// /// This property corresponds to the GCC 'const' attribute. - /// bool doesNotAccessMemory(ImmutableCallSite CS) { - return getModRefBehavior(CS) == DoesNotAccessMemory; + return getModRefBehavior(CS) == FMRB_DoesNotAccessMemory; } - /// doesNotAccessMemory - If the specified function is known to never read or - /// write memory, return true. For use when the call site is not known. + /// Checks if the specified function is known to never read or write memory. /// + /// Note that if the function only reads from known-constant memory, it is + /// also legal to return true. Also, function that unwind the stack are legal + /// for this predicate. + /// + /// Many optimizations (such as CSE and LICM) can be performed on such calls + /// to such functions without worrying about aliasing properties, and many + /// functions have this property (e.g. 'sin' and 'cos'). + /// + /// This property corresponds to the GCC 'const' attribute. bool doesNotAccessMemory(const Function *F) { - return getModRefBehavior(F) == DoesNotAccessMemory; + return getModRefBehavior(F) == FMRB_DoesNotAccessMemory; } - /// onlyReadsMemory - If the specified call is known to only read from - /// non-volatile memory (or not access memory at all), return true. Calls - /// that unwind the stack are legal for this predicate. + /// Checks if the specified call is known to only read from non-volatile + /// memory (or not access memory at all). + /// + /// Calls that unwind the stack are legal for this predicate. /// /// This property allows many common optimizations to be performed in the /// absence of interfering store instructions, such as CSE of strlen calls. /// /// This property corresponds to the GCC 'pure' attribute. - /// bool onlyReadsMemory(ImmutableCallSite CS) { return onlyReadsMemory(getModRefBehavior(CS)); } - /// onlyReadsMemory - If the specified function is known to only read from - /// non-volatile memory (or not access memory at all), return true. For use - /// when the call site is not known. + /// Checks if the specified function is known to only read from non-volatile + /// memory (or not access memory at all). /// + /// Functions that unwind the stack are legal for this predicate. + /// + /// This property allows many common optimizations to be performed in the + /// absence of interfering store instructions, such as CSE of strlen calls. + /// + /// This property corresponds to the GCC 'pure' attribute. bool onlyReadsMemory(const Function *F) { return onlyReadsMemory(getModRefBehavior(F)); } - /// onlyReadsMemory - Return true if functions with the specified behavior are - /// known to only read from non-volatile memory (or not access memory at all). - /// - static bool onlyReadsMemory(ModRefBehavior MRB) { - return !(MRB & Mod); + /// Checks if functions with the specified behavior are known to only read + /// from non-volatile memory (or not access memory at all). + static bool onlyReadsMemory(FunctionModRefBehavior MRB) { + return !(MRB & MRI_Mod); } - /// onlyAccessesArgPointees - Return true if functions with the specified - /// behavior are known to read and write at most from objects pointed to by - /// their pointer-typed arguments (with arbitrary offsets). - /// - static bool onlyAccessesArgPointees(ModRefBehavior MRB) { - return !(MRB & Anywhere & ~ArgumentPointees); + /// Checks if functions with the specified behavior are known to read and + /// write at most from objects pointed to by their pointer-typed arguments + /// (with arbitrary offsets). + static bool onlyAccessesArgPointees(FunctionModRefBehavior MRB) { + return !(MRB & FMRL_Anywhere & ~FMRL_ArgumentPointees); } - /// doesAccessArgPointees - Return true if functions with the specified - /// behavior are known to potentially read or write from objects pointed - /// to be their pointer-typed arguments (with arbitrary offsets). - /// - static bool doesAccessArgPointees(ModRefBehavior MRB) { - return (MRB & ModRef) && (MRB & ArgumentPointees); + /// Checks if functions with the specified behavior are known to potentially + /// read or write from objects pointed to be their pointer-typed arguments + /// (with arbitrary offsets). + static bool doesAccessArgPointees(FunctionModRefBehavior MRB) { + return (MRB & MRI_ModRef) && (MRB & FMRL_ArgumentPointees); } - /// getModRefInfo - Return information about whether or not an - /// instruction may read or write memory (without regard to a - /// specific location) - ModRefResult getModRefInfo(const Instruction *I) { + /// getModRefInfo (for call sites) - Return information about whether + /// a particular call site modifies or reads the specified memory location. + ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc); + + /// getModRefInfo (for call sites) - A convenience wrapper. + ModRefInfo getModRefInfo(ImmutableCallSite CS, const Value *P, + uint64_t Size) { + return getModRefInfo(CS, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for calls) - Return information about whether + /// a particular call modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const CallInst *C, const MemoryLocation &Loc) { + return getModRefInfo(ImmutableCallSite(C), Loc); + } + + /// getModRefInfo (for calls) - A convenience wrapper. + ModRefInfo getModRefInfo(const CallInst *C, const Value *P, uint64_t Size) { + return getModRefInfo(C, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for invokes) - Return information about whether + /// a particular invoke modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const InvokeInst *I, const MemoryLocation &Loc) { + return getModRefInfo(ImmutableCallSite(I), Loc); + } + + /// getModRefInfo (for invokes) - A convenience wrapper. + ModRefInfo getModRefInfo(const InvokeInst *I, const Value *P, uint64_t Size) { + return getModRefInfo(I, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for loads) - Return information about whether + /// a particular load modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const LoadInst *L, const MemoryLocation &Loc); + + /// getModRefInfo (for loads) - A convenience wrapper. + ModRefInfo getModRefInfo(const LoadInst *L, const Value *P, uint64_t Size) { + return getModRefInfo(L, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for stores) - Return information about whether + /// a particular store modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const StoreInst *S, const MemoryLocation &Loc); + + /// getModRefInfo (for stores) - A convenience wrapper. + ModRefInfo getModRefInfo(const StoreInst *S, const Value *P, uint64_t Size) { + return getModRefInfo(S, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for fences) - Return information about whether + /// a particular store modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) { + // Conservatively correct. (We could possibly be a bit smarter if + // Loc is a alloca that doesn't escape.) + return MRI_ModRef; + } + + /// getModRefInfo (for fences) - A convenience wrapper. + ModRefInfo getModRefInfo(const FenceInst *S, const Value *P, uint64_t Size) { + return getModRefInfo(S, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for cmpxchges) - Return information about whether + /// a particular cmpxchg modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX, + const MemoryLocation &Loc); + + /// getModRefInfo (for cmpxchges) - A convenience wrapper. + ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX, const Value *P, + unsigned Size) { + return getModRefInfo(CX, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for atomicrmws) - Return information about whether + /// a particular atomicrmw modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc); + + /// getModRefInfo (for atomicrmws) - A convenience wrapper. + ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const Value *P, + unsigned Size) { + return getModRefInfo(RMW, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for va_args) - Return information about whether + /// a particular va_arg modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const VAArgInst *I, const MemoryLocation &Loc); + + /// getModRefInfo (for va_args) - A convenience wrapper. + ModRefInfo getModRefInfo(const VAArgInst *I, const Value *P, uint64_t Size) { + return getModRefInfo(I, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for catchpads) - Return information about whether + /// a particular catchpad modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const CatchPadInst *I, const MemoryLocation &Loc); + + /// getModRefInfo (for catchpads) - A convenience wrapper. + ModRefInfo getModRefInfo(const CatchPadInst *I, const Value *P, + uint64_t Size) { + return getModRefInfo(I, MemoryLocation(P, Size)); + } + + /// getModRefInfo (for catchrets) - Return information about whether + /// a particular catchret modifies or reads the specified memory location. + ModRefInfo getModRefInfo(const CatchReturnInst *I, const MemoryLocation &Loc); + + /// getModRefInfo (for catchrets) - A convenience wrapper. + ModRefInfo getModRefInfo(const CatchReturnInst *I, const Value *P, + uint64_t Size) { + return getModRefInfo(I, MemoryLocation(P, Size)); + } + + /// Check whether or not an instruction may read or write memory (without + /// regard to a specific location). + /// + /// For function calls, this delegates to the alias-analysis specific + /// call-site mod-ref behavior queries. Otherwise it delegates to the generic + /// mod ref information query without a location. + ModRefInfo getModRefInfo(const Instruction *I) { if (auto CS = ImmutableCallSite(I)) { auto MRB = getModRefBehavior(CS); - if (MRB & ModRef) - return ModRef; - else if (MRB & Ref) - return Ref; - else if (MRB & Mod) - return Mod; - return NoModRef; + if (MRB & MRI_ModRef) + return MRI_ModRef; + else if (MRB & MRI_Ref) + return MRI_Ref; + else if (MRB & MRI_Mod) + return MRI_Mod; + return MRI_NoModRef; } return getModRefInfo(I, MemoryLocation()); } - /// getModRefInfo - Return information about whether or not an instruction may - /// read or write the specified memory location. An instruction - /// that doesn't read or write memory may be trivially LICM'd for example. - ModRefResult getModRefInfo(const Instruction *I, const MemoryLocation &Loc) { + /// Check whether or not an instruction may read or write the specified + /// memory location. + /// + /// An instruction that doesn't read or write memory may be trivially LICM'd + /// for example. + /// + /// This primarily delegates to specific helpers above. + ModRefInfo getModRefInfo(const Instruction *I, const MemoryLocation &Loc) { switch (I->getOpcode()) { case Instruction::VAArg: return getModRefInfo((const VAArgInst*)I, Loc); case Instruction::Load: return getModRefInfo((const LoadInst*)I, Loc); @@ -350,196 +481,476 @@ public: return getModRefInfo((const AtomicRMWInst*)I, Loc); case Instruction::Call: return getModRefInfo((const CallInst*)I, Loc); case Instruction::Invoke: return getModRefInfo((const InvokeInst*)I,Loc); - default: return NoModRef; + case Instruction::CatchPad: + return getModRefInfo((const CatchPadInst *)I, Loc); + case Instruction::CatchRet: + return getModRefInfo((const CatchReturnInst *)I, Loc); + default: + return MRI_NoModRef; } } - /// getModRefInfo - A convenience wrapper. - ModRefResult getModRefInfo(const Instruction *I, - const Value *P, uint64_t Size) { + /// A convenience wrapper for constructing the memory location. + ModRefInfo getModRefInfo(const Instruction *I, const Value *P, + uint64_t Size) { return getModRefInfo(I, MemoryLocation(P, Size)); } - /// getModRefInfo (for call sites) - Return information about whether - /// a particular call site modifies or reads the specified memory location. - virtual ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc); + /// Return information about whether a call and an instruction may refer to + /// the same memory locations. + ModRefInfo getModRefInfo(Instruction *I, ImmutableCallSite Call); - /// getModRefInfo (for call sites) - A convenience wrapper. - ModRefResult getModRefInfo(ImmutableCallSite CS, - const Value *P, uint64_t Size) { - return getModRefInfo(CS, MemoryLocation(P, Size)); - } - - /// getModRefInfo (for calls) - Return information about whether - /// a particular call modifies or reads the specified memory location. - ModRefResult getModRefInfo(const CallInst *C, const MemoryLocation &Loc) { - return getModRefInfo(ImmutableCallSite(C), Loc); - } - - /// getModRefInfo (for calls) - A convenience wrapper. - ModRefResult getModRefInfo(const CallInst *C, const Value *P, uint64_t Size) { - return getModRefInfo(C, MemoryLocation(P, Size)); - } - - /// getModRefInfo (for invokes) - Return information about whether - /// a particular invoke modifies or reads the specified memory location. - ModRefResult getModRefInfo(const InvokeInst *I, const MemoryLocation &Loc) { - return getModRefInfo(ImmutableCallSite(I), Loc); - } - - /// getModRefInfo (for invokes) - A convenience wrapper. - ModRefResult getModRefInfo(const InvokeInst *I, - const Value *P, uint64_t Size) { - return getModRefInfo(I, MemoryLocation(P, Size)); - } - - /// getModRefInfo (for loads) - Return information about whether - /// a particular load modifies or reads the specified memory location. - ModRefResult getModRefInfo(const LoadInst *L, const MemoryLocation &Loc); - - /// getModRefInfo (for loads) - A convenience wrapper. - ModRefResult getModRefInfo(const LoadInst *L, const Value *P, uint64_t Size) { - return getModRefInfo(L, MemoryLocation(P, Size)); - } - - /// getModRefInfo (for stores) - Return information about whether - /// a particular store modifies or reads the specified memory location. - ModRefResult getModRefInfo(const StoreInst *S, const MemoryLocation &Loc); - - /// getModRefInfo (for stores) - A convenience wrapper. - ModRefResult getModRefInfo(const StoreInst *S, const Value *P, uint64_t Size){ - return getModRefInfo(S, MemoryLocation(P, Size)); - } - - /// getModRefInfo (for fences) - Return information about whether - /// a particular store modifies or reads the specified memory location. - ModRefResult getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) { - // Conservatively correct. (We could possibly be a bit smarter if - // Loc is a alloca that doesn't escape.) - return ModRef; - } - - /// getModRefInfo (for fences) - A convenience wrapper. - ModRefResult getModRefInfo(const FenceInst *S, const Value *P, uint64_t Size){ - return getModRefInfo(S, MemoryLocation(P, Size)); - } - - /// getModRefInfo (for cmpxchges) - Return information about whether - /// a particular cmpxchg modifies or reads the specified memory location. - ModRefResult getModRefInfo(const AtomicCmpXchgInst *CX, - const MemoryLocation &Loc); - - /// getModRefInfo (for cmpxchges) - A convenience wrapper. - ModRefResult getModRefInfo(const AtomicCmpXchgInst *CX, - const Value *P, unsigned Size) { - return getModRefInfo(CX, MemoryLocation(P, Size)); - } - - /// getModRefInfo (for atomicrmws) - Return information about whether - /// a particular atomicrmw modifies or reads the specified memory location. - ModRefResult getModRefInfo(const AtomicRMWInst *RMW, - const MemoryLocation &Loc); - - /// getModRefInfo (for atomicrmws) - A convenience wrapper. - ModRefResult getModRefInfo(const AtomicRMWInst *RMW, - const Value *P, unsigned Size) { - return getModRefInfo(RMW, MemoryLocation(P, Size)); - } - - /// getModRefInfo (for va_args) - Return information about whether - /// a particular va_arg modifies or reads the specified memory location. - ModRefResult getModRefInfo(const VAArgInst *I, const MemoryLocation &Loc); - - /// getModRefInfo (for va_args) - A convenience wrapper. - ModRefResult getModRefInfo(const VAArgInst* I, const Value* P, uint64_t Size){ - return getModRefInfo(I, MemoryLocation(P, Size)); - } - /// getModRefInfo - Return information about whether a call and an instruction - /// may refer to the same memory locations. - ModRefResult getModRefInfo(Instruction *I, - ImmutableCallSite Call); - - /// getModRefInfo - Return information about whether two call sites may refer - /// to the same set of memory locations. See + /// Return information about whether two call sites may refer to the same set + /// of memory locations. See the AA documentation for details: /// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo - /// for details. - virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2); + ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2); - /// callCapturesBefore - Return information about whether a particular call - /// site modifies or reads the specified memory location. - ModRefResult callCapturesBefore(const Instruction *I, - const MemoryLocation &MemLoc, - DominatorTree *DT); + /// \brief Return information about whether a particular call site modifies + /// or reads the specified memory location \p MemLoc before instruction \p I + /// in a BasicBlock. A ordered basic block \p OBB can be used to speed up + /// instruction ordering queries inside the BasicBlock containing \p I. + ModRefInfo callCapturesBefore(const Instruction *I, + const MemoryLocation &MemLoc, DominatorTree *DT, + OrderedBasicBlock *OBB = nullptr); - /// callCapturesBefore - A convenience wrapper. - ModRefResult callCapturesBefore(const Instruction *I, const Value *P, - uint64_t Size, DominatorTree *DT) { - return callCapturesBefore(I, MemoryLocation(P, Size), DT); + /// \brief A convenience wrapper to synthesize a memory location. + ModRefInfo callCapturesBefore(const Instruction *I, const Value *P, + uint64_t Size, DominatorTree *DT, + OrderedBasicBlock *OBB = nullptr) { + return callCapturesBefore(I, MemoryLocation(P, Size), DT, OBB); } + /// @} //===--------------------------------------------------------------------===// - /// Higher level methods for querying mod/ref information. - /// + /// \name Higher level methods for querying mod/ref information. + /// @{ - /// canBasicBlockModify - Return true if it is possible for execution of the - /// specified basic block to modify the location Loc. + /// Check if it is possible for execution of the specified basic block to + /// modify the location Loc. bool canBasicBlockModify(const BasicBlock &BB, const MemoryLocation &Loc); - /// canBasicBlockModify - A convenience wrapper. - bool canBasicBlockModify(const BasicBlock &BB, const Value *P, uint64_t Size){ + /// A convenience wrapper synthesizing a memory location. + bool canBasicBlockModify(const BasicBlock &BB, const Value *P, + uint64_t Size) { return canBasicBlockModify(BB, MemoryLocation(P, Size)); } - /// canInstructionRangeModRef - Return true if it is possible for the - /// execution of the specified instructions to mod\ref (according to the - /// mode) the location Loc. The instructions to consider are all - /// of the instructions in the range of [I1,I2] INCLUSIVE. - /// I1 and I2 must be in the same basic block. + /// Check if it is possible for the execution of the specified instructions + /// to mod\ref (according to the mode) the location Loc. + /// + /// The instructions to consider are all of the instructions in the range of + /// [I1,I2] INCLUSIVE. I1 and I2 must be in the same basic block. bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2, const MemoryLocation &Loc, - const ModRefResult Mode); + const ModRefInfo Mode); - /// canInstructionRangeModRef - A convenience wrapper. - bool canInstructionRangeModRef(const Instruction &I1, - const Instruction &I2, const Value *Ptr, - uint64_t Size, const ModRefResult Mode) { + /// A convenience wrapper synthesizing a memory location. + bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2, + const Value *Ptr, uint64_t Size, + const ModRefInfo Mode) { return canInstructionRangeModRef(I1, I2, MemoryLocation(Ptr, Size), Mode); } +private: + class Concept; + template class Model; + + template friend class AAResultBase; + + std::vector> AAs; +}; + +/// Temporary typedef for legacy code that uses a generic \c AliasAnalysis +/// pointer or reference. +typedef AAResults AliasAnalysis; + +/// A private abstract base class describing the concept of an individual alias +/// analysis implementation. +/// +/// This interface is implemented by any \c Model instantiation. It is also the +/// interface which a type used to instantiate the model must provide. +/// +/// All of these methods model methods by the same name in the \c +/// AAResults class. Only differences and specifics to how the +/// implementations are called are documented here. +class AAResults::Concept { +public: + virtual ~Concept() = 0; + + /// An update API used internally by the AAResults to provide + /// a handle back to the top level aggregation. + virtual void setAAResults(AAResults *NewAAR) = 0; + //===--------------------------------------------------------------------===// - /// Methods that clients should call when they transform the program to allow - /// alias analyses to update their internal data structures. Note that these - /// methods may be called on any instruction, regardless of whether or not - /// they have pointer-analysis implications. - /// + /// \name Alias Queries + /// @{ - /// deleteValue - This method should be called whenever an LLVM Value is - /// deleted from the program, for example when an instruction is found to be - /// redundant and is eliminated. - /// - virtual void deleteValue(Value *V); + /// The main low level interface to the alias analysis implementation. + /// Returns an AliasResult indicating whether the two pointers are aliased to + /// each other. This is the interface that must be implemented by specific + /// alias analysis implementations. + virtual AliasResult alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) = 0; - /// addEscapingUse - This method should be used whenever an escaping use is - /// added to a pointer value. Analysis implementations may either return - /// conservative responses for that value in the future, or may recompute - /// some or all internal state to continue providing precise responses. - /// - /// Escaping uses are considered by anything _except_ the following: - /// - GEPs or bitcasts of the pointer - /// - Loads through the pointer - /// - Stores through (but not of) the pointer - virtual void addEscapingUse(Use &U); + /// Checks whether the given location points to constant memory, or if + /// \p OrLocal is true whether it points to a local alloca. + virtual bool pointsToConstantMemory(const MemoryLocation &Loc, + bool OrLocal) = 0; - /// replaceWithNewValue - This method is the obvious combination of the two - /// above, and it provided as a helper to simplify client code. - /// - void replaceWithNewValue(Value *Old, Value *New) { - deleteValue(Old); + /// @} + //===--------------------------------------------------------------------===// + /// \name Simple mod/ref information + /// @{ + + /// Get the ModRef info associated with a pointer argument of a callsite. The + /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note + /// that these bits do not necessarily account for the overall behavior of + /// the function, but rather only provide additional per-argument + /// information. + virtual ModRefInfo getArgModRefInfo(ImmutableCallSite CS, + unsigned ArgIdx) = 0; + + /// Return the behavior of the given call site. + virtual FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) = 0; + + /// Return the behavior when calling the given function. + virtual FunctionModRefBehavior getModRefBehavior(const Function *F) = 0; + + /// getModRefInfo (for call sites) - Return information about whether + /// a particular call site modifies or reads the specified memory location. + virtual ModRefInfo getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) = 0; + + /// Return information about whether two call sites may refer to the same set + /// of memory locations. See the AA documentation for details: + /// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo + virtual ModRefInfo getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) = 0; + + /// @} +}; + +/// A private class template which derives from \c Concept and wraps some other +/// type. +/// +/// This models the concept by directly forwarding each interface point to the +/// wrapped type which must implement a compatible interface. This provides +/// a type erased binding. +template class AAResults::Model final : public Concept { + AAResultT &Result; + +public: + explicit Model(AAResultT &Result, AAResults &AAR) : Result(Result) { + Result.setAAResults(&AAR); + } + ~Model() override {} + + void setAAResults(AAResults *NewAAR) override { Result.setAAResults(NewAAR); } + + AliasResult alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) override { + return Result.alias(LocA, LocB); + } + + bool pointsToConstantMemory(const MemoryLocation &Loc, + bool OrLocal) override { + return Result.pointsToConstantMemory(Loc, OrLocal); + } + + ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) override { + return Result.getArgModRefInfo(CS, ArgIdx); + } + + FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) override { + return Result.getModRefBehavior(CS); + } + + FunctionModRefBehavior getModRefBehavior(const Function *F) override { + return Result.getModRefBehavior(F); + } + + ModRefInfo getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) override { + return Result.getModRefInfo(CS, Loc); + } + + ModRefInfo getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) override { + return Result.getModRefInfo(CS1, CS2); } }; +/// A CRTP-driven "mixin" base class to help implement the function alias +/// analysis results concept. +/// +/// Because of the nature of many alias analysis implementations, they often +/// only implement a subset of the interface. This base class will attempt to +/// implement the remaining portions of the interface in terms of simpler forms +/// of the interface where possible, and otherwise provide conservatively +/// correct fallback implementations. +/// +/// Implementors of an alias analysis should derive from this CRTP, and then +/// override specific methods that they wish to customize. There is no need to +/// use virtual anywhere, the CRTP base class does static dispatch to the +/// derived type passed into it. +template class AAResultBase { + // Expose some parts of the interface only to the AAResults::Model + // for wrapping. Specifically, this allows the model to call our + // setAAResults method without exposing it as a fully public API. + friend class AAResults::Model; + + /// A pointer to the AAResults object that this AAResult is + /// aggregated within. May be null if not aggregated. + AAResults *AAR; + + /// Helper to dispatch calls back through the derived type. + DerivedT &derived() { return static_cast(*this); } + + /// A setter for the AAResults pointer, which is used to satisfy the + /// AAResults::Model contract. + void setAAResults(AAResults *NewAAR) { AAR = NewAAR; } + +protected: + /// This proxy class models a common pattern where we delegate to either the + /// top-level \c AAResults aggregation if one is registered, or to the + /// current result if none are registered. + class AAResultsProxy { + AAResults *AAR; + DerivedT &CurrentResult; + + public: + AAResultsProxy(AAResults *AAR, DerivedT &CurrentResult) + : AAR(AAR), CurrentResult(CurrentResult) {} + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { + return AAR ? AAR->alias(LocA, LocB) : CurrentResult.alias(LocA, LocB); + } + + bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) { + return AAR ? AAR->pointsToConstantMemory(Loc, OrLocal) + : CurrentResult.pointsToConstantMemory(Loc, OrLocal); + } + + ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { + return AAR ? AAR->getArgModRefInfo(CS, ArgIdx) : CurrentResult.getArgModRefInfo(CS, ArgIdx); + } + + FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) { + return AAR ? AAR->getModRefBehavior(CS) : CurrentResult.getModRefBehavior(CS); + } + + FunctionModRefBehavior getModRefBehavior(const Function *F) { + return AAR ? AAR->getModRefBehavior(F) : CurrentResult.getModRefBehavior(F); + } + + ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { + return AAR ? AAR->getModRefInfo(CS, Loc) + : CurrentResult.getModRefInfo(CS, Loc); + } + + ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { + return AAR ? AAR->getModRefInfo(CS1, CS2) : CurrentResult.getModRefInfo(CS1, CS2); + } + }; + + const TargetLibraryInfo &TLI; + + explicit AAResultBase(const TargetLibraryInfo &TLI) : TLI(TLI) {} + + // Provide all the copy and move constructors so that derived types aren't + // constrained. + AAResultBase(const AAResultBase &Arg) : TLI(Arg.TLI) {} + AAResultBase(AAResultBase &&Arg) : TLI(Arg.TLI) {} + + /// Get a proxy for the best AA result set to query at this time. + /// + /// When this result is part of a larger aggregation, this will proxy to that + /// aggregation. When this result is used in isolation, it will just delegate + /// back to the derived class's implementation. + AAResultsProxy getBestAAResults() { return AAResultsProxy(AAR, derived()); } + +public: + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { + return MayAlias; + } + + bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) { + return false; + } + + ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { + return MRI_ModRef; + } + + FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) { + if (!CS.hasOperandBundles()) + // If CS has operand bundles then aliasing attributes from the function it + // calls do not directly apply to the CallSite. This can be made more + // precise in the future. + if (const Function *F = CS.getCalledFunction()) + return getBestAAResults().getModRefBehavior(F); + + return FMRB_UnknownModRefBehavior; + } + + FunctionModRefBehavior getModRefBehavior(const Function *F) { + return FMRB_UnknownModRefBehavior; + } + + ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc); + + ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2); +}; + +/// Synthesize \c ModRefInfo for a call site and memory location by examining +/// the general behavior of the call site and any specific information for its +/// arguments. +/// +/// This essentially, delegates across the alias analysis interface to collect +/// information which may be enough to (conservatively) fulfill the query. +template +ModRefInfo AAResultBase::getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) { + auto MRB = getBestAAResults().getModRefBehavior(CS); + if (MRB == FMRB_DoesNotAccessMemory) + return MRI_NoModRef; + + ModRefInfo Mask = MRI_ModRef; + if (AAResults::onlyReadsMemory(MRB)) + Mask = MRI_Ref; + + if (AAResults::onlyAccessesArgPointees(MRB)) { + bool DoesAlias = false; + ModRefInfo AllArgsMask = MRI_NoModRef; + if (AAResults::doesAccessArgPointees(MRB)) { + for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), + AE = CS.arg_end(); + AI != AE; ++AI) { + const Value *Arg = *AI; + if (!Arg->getType()->isPointerTy()) + continue; + unsigned ArgIdx = std::distance(CS.arg_begin(), AI); + MemoryLocation ArgLoc = MemoryLocation::getForArgument(CS, ArgIdx, TLI); + AliasResult ArgAlias = getBestAAResults().alias(ArgLoc, Loc); + if (ArgAlias != NoAlias) { + ModRefInfo ArgMask = getBestAAResults().getArgModRefInfo(CS, ArgIdx); + DoesAlias = true; + AllArgsMask = ModRefInfo(AllArgsMask | ArgMask); + } + } + } + if (!DoesAlias) + return MRI_NoModRef; + Mask = ModRefInfo(Mask & AllArgsMask); + } + + // If Loc is a constant memory location, the call definitely could not + // modify the memory location. + if ((Mask & MRI_Mod) && + getBestAAResults().pointsToConstantMemory(Loc, /*OrLocal*/ false)) + Mask = ModRefInfo(Mask & ~MRI_Mod); + + return Mask; +} + +/// Synthesize \c ModRefInfo for two call sites by examining the general +/// behavior of the call site and any specific information for its arguments. +/// +/// This essentially, delegates across the alias analysis interface to collect +/// information which may be enough to (conservatively) fulfill the query. +template +ModRefInfo AAResultBase::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + // If CS1 or CS2 are readnone, they don't interact. + auto CS1B = getBestAAResults().getModRefBehavior(CS1); + if (CS1B == FMRB_DoesNotAccessMemory) + return MRI_NoModRef; + + auto CS2B = getBestAAResults().getModRefBehavior(CS2); + if (CS2B == FMRB_DoesNotAccessMemory) + return MRI_NoModRef; + + // If they both only read from memory, there is no dependence. + if (AAResults::onlyReadsMemory(CS1B) && AAResults::onlyReadsMemory(CS2B)) + return MRI_NoModRef; + + ModRefInfo Mask = MRI_ModRef; + + // If CS1 only reads memory, the only dependence on CS2 can be + // from CS1 reading memory written by CS2. + if (AAResults::onlyReadsMemory(CS1B)) + Mask = ModRefInfo(Mask & MRI_Ref); + + // If CS2 only access memory through arguments, accumulate the mod/ref + // information from CS1's references to the memory referenced by + // CS2's arguments. + if (AAResults::onlyAccessesArgPointees(CS2B)) { + ModRefInfo R = MRI_NoModRef; + if (AAResults::doesAccessArgPointees(CS2B)) { + for (ImmutableCallSite::arg_iterator I = CS2.arg_begin(), + E = CS2.arg_end(); + I != E; ++I) { + const Value *Arg = *I; + if (!Arg->getType()->isPointerTy()) + continue; + unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I); + auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, TLI); + + // ArgMask indicates what CS2 might do to CS2ArgLoc, and the dependence + // of CS1 on that location is the inverse. + ModRefInfo ArgMask = + getBestAAResults().getArgModRefInfo(CS2, CS2ArgIdx); + if (ArgMask == MRI_Mod) + ArgMask = MRI_ModRef; + else if (ArgMask == MRI_Ref) + ArgMask = MRI_Mod; + + ArgMask = ModRefInfo(ArgMask & + getBestAAResults().getModRefInfo(CS1, CS2ArgLoc)); + + R = ModRefInfo((R | ArgMask) & Mask); + if (R == Mask) + break; + } + } + return R; + } + + // If CS1 only accesses memory through arguments, check if CS2 references + // any of the memory referenced by CS1's arguments. If not, return NoModRef. + if (AAResults::onlyAccessesArgPointees(CS1B)) { + ModRefInfo R = MRI_NoModRef; + if (AAResults::doesAccessArgPointees(CS1B)) { + for (ImmutableCallSite::arg_iterator I = CS1.arg_begin(), + E = CS1.arg_end(); + I != E; ++I) { + const Value *Arg = *I; + if (!Arg->getType()->isPointerTy()) + continue; + unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I); + auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, TLI); + + // ArgMask indicates what CS1 might do to CS1ArgLoc; if CS1 might Mod + // CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If CS1 + // might Ref, then we care only about a Mod by CS2. + ModRefInfo ArgMask = getBestAAResults().getArgModRefInfo(CS1, CS1ArgIdx); + ModRefInfo ArgR = getBestAAResults().getModRefInfo(CS2, CS1ArgLoc); + if (((ArgMask & MRI_Mod) != MRI_NoModRef && + (ArgR & MRI_ModRef) != MRI_NoModRef) || + ((ArgMask & MRI_Ref) != MRI_NoModRef && + (ArgR & MRI_Mod) != MRI_NoModRef)) + R = ModRefInfo((R | ArgMask) & Mask); + + if (R == Mask) + break; + } + } + return R; + } + + return Mask; +} + /// isNoAliasCall - Return true if this pointer is returned by a noalias /// function. bool isNoAliasCall(const Value *V); @@ -564,6 +975,98 @@ bool isIdentifiedObject(const Value *V); /// IdentifiedObjects. bool isIdentifiedFunctionLocal(const Value *V); +/// A manager for alias analyses. +/// +/// This class can have analyses registered with it and when run, it will run +/// all of them and aggregate their results into single AA results interface +/// that dispatches across all of the alias analysis results available. +/// +/// Note that the order in which analyses are registered is very significant. +/// That is the order in which the results will be aggregated and queried. +/// +/// This manager effectively wraps the AnalysisManager for registering alias +/// analyses. When you register your alias analysis with this manager, it will +/// ensure the analysis itself is registered with its AnalysisManager. +class AAManager { +public: + typedef AAResults Result; + + // This type hase value semantics. We have to spell these out because MSVC + // won't synthesize them. + AAManager() {} + AAManager(AAManager &&Arg) + : FunctionResultGetters(std::move(Arg.FunctionResultGetters)) {} + AAManager(const AAManager &Arg) + : FunctionResultGetters(Arg.FunctionResultGetters) {} + AAManager &operator=(AAManager &&RHS) { + FunctionResultGetters = std::move(RHS.FunctionResultGetters); + return *this; + } + AAManager &operator=(const AAManager &RHS) { + FunctionResultGetters = RHS.FunctionResultGetters; + return *this; + } + + /// Register a specific AA result. + template void registerFunctionAnalysis() { + FunctionResultGetters.push_back(&getFunctionAAResultImpl); + } + + Result run(Function &F, AnalysisManager &AM) { + Result R; + for (auto &Getter : FunctionResultGetters) + (*Getter)(F, AM, R); + return R; + } + +private: + SmallVector &AM, + AAResults &AAResults), + 4> FunctionResultGetters; + + template + static void getFunctionAAResultImpl(Function &F, + AnalysisManager &AM, + AAResults &AAResults) { + AAResults.addAAResult(AM.template getResult(F)); + } +}; + +/// A wrapper pass to provide the legacy pass manager access to a suitably +/// prepared AAResults object. +class AAResultsWrapperPass : public FunctionPass { + std::unique_ptr AAR; + +public: + static char ID; + + AAResultsWrapperPass(); + + AAResults &getAAResults() { return *AAR; } + const AAResults &getAAResults() const { return *AAR; } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +FunctionPass *createAAResultsWrapperPass(); + +/// A wrapper pass around a callback which can be used to populate the +/// AAResults in the AAResultsWrapperPass from an external AA. +/// +/// The callback provided here will be used each time we prepare an AAResults +/// object, and will receive a reference to the function wrapper pass, the +/// function, and the AAResults object to populate. This should be used when +/// setting up a custom pass pipeline to inject a hook into the AA results. +ImmutablePass *createExternalAAWrapperPass( + std::function Callback); + +/// A helper for the legacy pass manager to create a \c AAResults +/// object populated to the best of our ability for a particular function when +/// inside of a \c ModulePass or a \c CallGraphSCCPass. +AAResults createLegacyPMAAResults(Pass &P, Function &F, BasicAAResult &BAR); + } // End llvm namespace #endif diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h index 881699d09225..37fd69b081cc 100644 --- a/include/llvm/Analysis/AliasSetTracker.h +++ b/include/llvm/Analysis/AliasSetTracker.h @@ -20,13 +20,13 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/ValueHandle.h" #include namespace llvm { -class AliasAnalysis; class LoadInst; class StoreInst; class VAArgInst; @@ -42,13 +42,14 @@ class AliasSet : public ilist_node { AliasSet *AS; uint64_t Size; AAMDNodes AAInfo; + public: PointerRec(Value *V) : Val(V), PrevInList(nullptr), NextInList(nullptr), AS(nullptr), Size(0), AAInfo(DenseMapInfo::getEmptyKey()) {} Value *getValue() const { return Val; } - + PointerRec *getNext() const { return NextInList; } bool hasAliasSet() const { return AS != nullptr; } @@ -156,7 +157,7 @@ class AliasSet : public ilist_node { assert(i < UnknownInsts.size()); return UnknownInsts[i]; } - + public: /// Accessors... bool isRef() const { return Access & RefAccess; } @@ -190,6 +191,7 @@ public: class iterator : public std::iterator { PointerRec *CurNode; + public: explicit iterator(PointerRec *CN = nullptr) : CurNode(CN) {} @@ -282,14 +284,14 @@ inline raw_ostream& operator<<(raw_ostream &OS, const AliasSet &AS) { return OS; } - class AliasSetTracker { /// CallbackVH - A CallbackVH to arrange for AliasSetTracker to be /// notified whenever a Value is deleted. - class ASTCallbackVH : public CallbackVH { + class ASTCallbackVH final : public CallbackVH { AliasSetTracker *AST; void deleted() override; void allUsesReplacedWith(Value *) override; + public: ASTCallbackVH(Value *V, AliasSetTracker *AST = nullptr); ASTCallbackVH &operator=(Value *V); @@ -347,7 +349,7 @@ public: bool remove(Instruction *I); void remove(AliasSet &AS); bool removeUnknown(Instruction *I); - + void clear(); /// getAliasSets - Return the alias sets that are active. @@ -398,7 +400,6 @@ public: /// void copyValue(Value *From, Value *To); - typedef ilist::iterator iterator; typedef ilist::const_iterator const_iterator; diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h index 1f00b691b305..b903f96d55b2 100644 --- a/include/llvm/Analysis/AssumptionCache.h +++ b/include/llvm/Analysis/AssumptionCache.h @@ -66,7 +66,7 @@ public: /// \brief Add an @llvm.assume intrinsic to this function's cache. /// - /// The call passed in must be an instruction within this fuction and must + /// The call passed in must be an instruction within this function and must /// not already be in the cache. void registerAssumption(CallInst *CI); @@ -79,7 +79,7 @@ public: } /// \brief Access the list of assumption handles currently tracked for this - /// fuction. + /// function. /// /// Note that these produce weak handles that may be null. The caller must /// handle that case. @@ -140,7 +140,7 @@ public: class AssumptionCacheTracker : public ImmutablePass { /// A callback value handle applied to function objects, which we use to /// delete our cache of intrinsics for a function when it is deleted. - class FunctionCallbackVH : public CallbackVH { + class FunctionCallbackVH final : public CallbackVH { AssumptionCacheTracker *ACT; void deleted() override; diff --git a/include/llvm/Analysis/BasicAliasAnalysis.h b/include/llvm/Analysis/BasicAliasAnalysis.h new file mode 100644 index 000000000000..181a9327024c --- /dev/null +++ b/include/llvm/Analysis/BasicAliasAnalysis.h @@ -0,0 +1,223 @@ +//===- BasicAliasAnalysis.h - Stateless, local Alias Analysis ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the interface for LLVM's primary stateless and local alias analysis. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_BASICALIASANALYSIS_H +#define LLVM_ANALYSIS_BASICALIASANALYSIS_H + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { +class AssumptionCache; +class DominatorTree; +class LoopInfo; + +/// This is the AA result object for the basic, local, and stateless alias +/// analysis. It implements the AA query interface in an entirely stateless +/// manner. As one consequence, it is never invalidated. While it does retain +/// some storage, that is used as an optimization and not to preserve +/// information from query to query. +class BasicAAResult : public AAResultBase { + friend AAResultBase; + + const DataLayout &DL; + AssumptionCache &AC; + DominatorTree *DT; + LoopInfo *LI; + +public: + BasicAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI, + AssumptionCache &AC, DominatorTree *DT = nullptr, + LoopInfo *LI = nullptr) + : AAResultBase(TLI), DL(DL), AC(AC), DT(DT), LI(LI) {} + + BasicAAResult(const BasicAAResult &Arg) + : AAResultBase(Arg), DL(Arg.DL), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI) {} + BasicAAResult(BasicAAResult &&Arg) + : AAResultBase(std::move(Arg)), DL(Arg.DL), AC(Arg.AC), DT(Arg.DT), + LI(Arg.LI) {} + + /// Handle invalidation events from the new pass manager. + /// + /// By definition, this result is stateless and so remains valid. + bool invalidate(Function &, const PreservedAnalyses &) { return false; } + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + + ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc); + + ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2); + + /// Chases pointers until we find a (constant global) or not. + bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal); + + /// Get the location associated with a pointer argument of a callsite. + ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx); + + /// Returns the behavior when calling the given call site. + FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS); + + /// Returns the behavior when calling the given function. For use when the + /// call site is not known. + FunctionModRefBehavior getModRefBehavior(const Function *F); + +private: + // A linear transformation of a Value; this class represents ZExt(SExt(V, + // SExtBits), ZExtBits) * Scale + Offset. + struct VariableGEPIndex { + + // An opaque Value - we can't decompose this further. + const Value *V; + + // We need to track what extensions we've done as we consider the same Value + // with different extensions as different variables in a GEP's linear + // expression; + // e.g.: if V == -1, then sext(x) != zext(x). + unsigned ZExtBits; + unsigned SExtBits; + + int64_t Scale; + + bool operator==(const VariableGEPIndex &Other) const { + return V == Other.V && ZExtBits == Other.ZExtBits && + SExtBits == Other.SExtBits && Scale == Other.Scale; + } + + bool operator!=(const VariableGEPIndex &Other) const { + return !operator==(Other); + } + }; + + /// Track alias queries to guard against recursion. + typedef std::pair LocPair; + typedef SmallDenseMap AliasCacheTy; + AliasCacheTy AliasCache; + + /// Tracks phi nodes we have visited. + /// + /// When interpret "Value" pointer equality as value equality we need to make + /// sure that the "Value" is not part of a cycle. Otherwise, two uses could + /// come from different "iterations" of a cycle and see different values for + /// the same "Value" pointer. + /// + /// The following example shows the problem: + /// %p = phi(%alloca1, %addr2) + /// %l = load %ptr + /// %addr1 = gep, %alloca2, 0, %l + /// %addr2 = gep %alloca2, 0, (%l + 1) + /// alias(%p, %addr1) -> MayAlias ! + /// store %l, ... + SmallPtrSet VisitedPhiBBs; + + /// Tracks instructions visited by pointsToConstantMemory. + SmallPtrSet Visited; + + static const Value * + GetLinearExpression(const Value *V, APInt &Scale, APInt &Offset, + unsigned &ZExtBits, unsigned &SExtBits, + const DataLayout &DL, unsigned Depth, AssumptionCache *AC, + DominatorTree *DT, bool &NSW, bool &NUW); + + static const Value * + DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, + SmallVectorImpl &VarIndices, + bool &MaxLookupReached, const DataLayout &DL, + AssumptionCache *AC, DominatorTree *DT); + /// \brief A Heuristic for aliasGEP that searches for a constant offset + /// between the variables. + /// + /// GetLinearExpression has some limitations, as generally zext(%x + 1) + /// != zext(%x) + zext(1) if the arithmetic overflows. GetLinearExpression + /// will therefore conservatively refuse to decompose these expressions. + /// However, we know that, for all %x, zext(%x) != zext(%x + 1), even if + /// the addition overflows. + bool + constantOffsetHeuristic(const SmallVectorImpl &VarIndices, + uint64_t V1Size, uint64_t V2Size, int64_t BaseOffset, + AssumptionCache *AC, DominatorTree *DT); + + bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2); + + void GetIndexDifference(SmallVectorImpl &Dest, + const SmallVectorImpl &Src); + + AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size, + const AAMDNodes &V1AAInfo, const Value *V2, + uint64_t V2Size, const AAMDNodes &V2AAInfo, + const Value *UnderlyingV1, const Value *UnderlyingV2); + + AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize, + const AAMDNodes &PNAAInfo, const Value *V2, + uint64_t V2Size, const AAMDNodes &V2AAInfo); + + AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize, + const AAMDNodes &SIAAInfo, const Value *V2, + uint64_t V2Size, const AAMDNodes &V2AAInfo); + + AliasResult aliasCheck(const Value *V1, uint64_t V1Size, AAMDNodes V1AATag, + const Value *V2, uint64_t V2Size, AAMDNodes V2AATag); +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +class BasicAA { +public: + typedef BasicAAResult Result; + + /// \brief Opaque, unique identifier for this analysis pass. + static void *ID() { return (void *)&PassID; } + + BasicAAResult run(Function &F, AnalysisManager *AM); + + /// \brief Provide access to a name for this pass for debugging purposes. + static StringRef name() { return "BasicAliasAnalysis"; } + +private: + static char PassID; +}; + +/// Legacy wrapper pass to provide the BasicAAResult object. +class BasicAAWrapperPass : public FunctionPass { + std::unique_ptr Result; + + virtual void anchor(); + +public: + static char ID; + + BasicAAWrapperPass(); + + BasicAAResult &getResult() { return *Result; } + const BasicAAResult &getResult() const { return *Result; } + + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +FunctionPass *createBasicAAWrapperPass(); + +/// A helper for the legacy pass manager to create a \c BasicAAResult object +/// populated to the best of our ability for a particular function when inside +/// of a \c ModulePass or a \c CallGraphSCCPass. +BasicAAResult createLegacyPMBasicAAResult(Pass &P, Function &F); +} + +#endif diff --git a/include/llvm/Analysis/BlockFrequencyInfo.h b/include/llvm/Analysis/BlockFrequencyInfo.h index f27c32df9283..6f2a2b522769 100644 --- a/include/llvm/Analysis/BlockFrequencyInfo.h +++ b/include/llvm/Analysis/BlockFrequencyInfo.h @@ -21,26 +21,20 @@ namespace llvm { class BranchProbabilityInfo; +class LoopInfo; template class BlockFrequencyInfoImpl; /// BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to /// estimate IR basic block frequencies. -class BlockFrequencyInfo : public FunctionPass { +class BlockFrequencyInfo { typedef BlockFrequencyInfoImpl ImplType; std::unique_ptr BFI; public: - static char ID; - BlockFrequencyInfo(); + BlockFrequencyInfo(const Function &F, const BranchProbabilityInfo &BPI, + const LoopInfo &LI); - ~BlockFrequencyInfo() override; - - void getAnalysisUsage(AnalysisUsage &AU) const override; - - bool runOnFunction(Function &F) override; - void releaseMemory() override; - void print(raw_ostream &O, const Module *M) const override; const Function *getFunction() const; void view() const; @@ -51,6 +45,13 @@ public: /// floating points. BlockFrequency getBlockFreq(const BasicBlock *BB) const; + // Set the frequency of the given basic block. + void setBlockFreq(const BasicBlock *BB, uint64_t Freq); + + /// calculate - compute block frequency info for the given function. + void calculate(const Function &F, const BranchProbabilityInfo &BPI, + const LoopInfo &LI); + // Print the block frequency Freq to OS using the current functions entry // frequency to convert freq into a relative decimal form. raw_ostream &printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const; @@ -60,7 +61,28 @@ public: raw_ostream &printBlockFreq(raw_ostream &OS, const BasicBlock *BB) const; uint64_t getEntryFreq() const; + void releaseMemory(); + void print(raw_ostream &OS) const; +}; +/// \brief Legacy analysis pass which computes \c BlockFrequencyInfo. +class BlockFrequencyInfoWrapperPass : public FunctionPass { + BlockFrequencyInfo BFI; + +public: + static char ID; + + BlockFrequencyInfoWrapperPass(); + ~BlockFrequencyInfoWrapperPass() override; + + BlockFrequencyInfo &getBFI() { return BFI; } + const BlockFrequencyInfo &getBFI() const { return BFI; } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnFunction(Function &F) override; + void releaseMemory() override; + void print(raw_ostream &OS, const Module *M) const override; }; } diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 32d96090f456..387e9a887d93 100644 --- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -84,7 +84,7 @@ public: /// \brief Add another mass. /// /// Adds another mass, saturating at \a isFull() rather than overflowing. - BlockMass &operator+=(const BlockMass &X) { + BlockMass &operator+=(BlockMass X) { uint64_t Sum = Mass + X.Mass; Mass = Sum < Mass ? UINT64_MAX : Sum; return *this; @@ -94,23 +94,23 @@ public: /// /// Subtracts another mass, saturating at \a isEmpty() rather than /// undeflowing. - BlockMass &operator-=(const BlockMass &X) { + BlockMass &operator-=(BlockMass X) { uint64_t Diff = Mass - X.Mass; Mass = Diff > Mass ? 0 : Diff; return *this; } - BlockMass &operator*=(const BranchProbability &P) { + BlockMass &operator*=(BranchProbability P) { Mass = P.scale(Mass); return *this; } - bool operator==(const BlockMass &X) const { return Mass == X.Mass; } - bool operator!=(const BlockMass &X) const { return Mass != X.Mass; } - bool operator<=(const BlockMass &X) const { return Mass <= X.Mass; } - bool operator>=(const BlockMass &X) const { return Mass >= X.Mass; } - bool operator<(const BlockMass &X) const { return Mass < X.Mass; } - bool operator>(const BlockMass &X) const { return Mass > X.Mass; } + bool operator==(BlockMass X) const { return Mass == X.Mass; } + bool operator!=(BlockMass X) const { return Mass != X.Mass; } + bool operator<=(BlockMass X) const { return Mass <= X.Mass; } + bool operator>=(BlockMass X) const { return Mass >= X.Mass; } + bool operator<(BlockMass X) const { return Mass < X.Mass; } + bool operator>(BlockMass X) const { return Mass > X.Mass; } /// \brief Convert to scaled number. /// @@ -122,20 +122,20 @@ public: raw_ostream &print(raw_ostream &OS) const; }; -inline BlockMass operator+(const BlockMass &L, const BlockMass &R) { +inline BlockMass operator+(BlockMass L, BlockMass R) { return BlockMass(L) += R; } -inline BlockMass operator-(const BlockMass &L, const BlockMass &R) { +inline BlockMass operator-(BlockMass L, BlockMass R) { return BlockMass(L) -= R; } -inline BlockMass operator*(const BlockMass &L, const BranchProbability &R) { +inline BlockMass operator*(BlockMass L, BranchProbability R) { return BlockMass(L) *= R; } -inline BlockMass operator*(const BranchProbability &L, const BlockMass &R) { +inline BlockMass operator*(BranchProbability L, BlockMass R) { return BlockMass(R) *= L; } -inline raw_ostream &operator<<(raw_ostream &OS, const BlockMass &X) { +inline raw_ostream &operator<<(raw_ostream &OS, BlockMass X) { return X.print(OS); } @@ -477,6 +477,8 @@ public: BlockFrequency getBlockFreq(const BlockNode &Node) const; + void setBlockFreq(const BlockNode &Node, uint64_t Freq); + raw_ostream &printBlockFreq(raw_ostream &OS, const BlockNode &Node) const; raw_ostream &printBlockFreq(raw_ostream &OS, const BlockFrequency &Freq) const; @@ -905,14 +907,15 @@ template class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase { public: const FunctionT *getFunction() const { return F; } - void doFunction(const FunctionT *F, const BranchProbabilityInfoT *BPI, - const LoopInfoT *LI); + void calculate(const FunctionT &F, const BranchProbabilityInfoT &BPI, + const LoopInfoT &LI); BlockFrequencyInfoImpl() : BPI(nullptr), LI(nullptr), F(nullptr) {} using BlockFrequencyInfoImplBase::getEntryFreq; BlockFrequency getBlockFreq(const BlockT *BB) const { return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB)); } + void setBlockFreq(const BlockT *BB, uint64_t Freq); Scaled64 getFloatingBlockFreq(const BlockT *BB) const { return BlockFrequencyInfoImplBase::getFloatingBlockFreq(getNode(BB)); } @@ -938,13 +941,13 @@ public: }; template -void BlockFrequencyInfoImpl::doFunction(const FunctionT *F, - const BranchProbabilityInfoT *BPI, - const LoopInfoT *LI) { +void BlockFrequencyInfoImpl::calculate(const FunctionT &F, + const BranchProbabilityInfoT &BPI, + const LoopInfoT &LI) { // Save the parameters. - this->BPI = BPI; - this->LI = LI; - this->F = F; + this->BPI = &BPI; + this->LI = &LI; + this->F = &F; // Clean up left-over data structures. BlockFrequencyInfoImplBase::clear(); @@ -952,8 +955,8 @@ void BlockFrequencyInfoImpl::doFunction(const FunctionT *F, Nodes.clear(); // Initialize. - DEBUG(dbgs() << "\nblock-frequency: " << F->getName() << "\n=================" - << std::string(F->getName().size(), '=') << "\n"); + DEBUG(dbgs() << "\nblock-frequency: " << F.getName() << "\n=================" + << std::string(F.getName().size(), '=') << "\n"); initializeRPOT(); initializeLoops(); @@ -965,8 +968,23 @@ void BlockFrequencyInfoImpl::doFunction(const FunctionT *F, finalizeMetrics(); } +template +void BlockFrequencyInfoImpl::setBlockFreq(const BlockT *BB, uint64_t Freq) { + if (Nodes.count(BB)) + BlockFrequencyInfoImplBase::setBlockFreq(getNode(BB), Freq); + else { + // If BB is a newly added block after BFI is done, we need to create a new + // BlockNode for it assigned with a new index. The index can be determined + // by the size of Freqs. + BlockNode NewNode(Freqs.size()); + Nodes[BB] = NewNode; + Freqs.emplace_back(); + BlockFrequencyInfoImplBase::setBlockFreq(NewNode, Freq); + } +} + template void BlockFrequencyInfoImpl::initializeRPOT() { - const BlockT *Entry = F->begin(); + const BlockT *Entry = &F->front(); RPOT.reserve(F->size()); std::copy(po_begin(Entry), po_end(Entry), std::back_inserter(RPOT)); std::reverse(RPOT.begin(), RPOT.end()); @@ -1155,6 +1173,13 @@ void BlockFrequencyInfoImpl::computeIrreducibleMass( updateLoopWithIrreducible(*OuterLoop); } +namespace { +// A helper function that converts a branch probability into weight. +inline uint32_t getWeightFromBranchProb(const BranchProbability Prob) { + return Prob.getNumerator(); +} +} // namespace + template bool BlockFrequencyInfoImpl::propagateMassToSuccessors(LoopData *OuterLoop, @@ -1171,10 +1196,8 @@ BlockFrequencyInfoImpl::propagateMassToSuccessors(LoopData *OuterLoop, const BlockT *BB = getBlock(Node); for (auto SI = Successor::child_begin(BB), SE = Successor::child_end(BB); SI != SE; ++SI) - // Do not dereference SI, or getEdgeWeight() is linear in the number of - // successors. if (!addToDist(Dist, OuterLoop, Node, getNode(*SI), - BPI->getEdgeWeight(BB, SI))) + getWeightFromBranchProb(BPI->getEdgeProbability(BB, SI)))) // Irreducible backedge. return false; } @@ -1190,10 +1213,11 @@ raw_ostream &BlockFrequencyInfoImpl::print(raw_ostream &OS) const { if (!F) return OS; OS << "block-frequency-info: " << F->getName() << "\n"; - for (const BlockT &BB : *F) - OS << " - " << bfi_detail::getBlockName(&BB) - << ": float = " << getFloatingBlockFreq(&BB) - << ", int = " << getBlockFreq(&BB).getFrequency() << "\n"; + for (const BlockT &BB : *F) { + OS << " - " << bfi_detail::getBlockName(&BB) << ": float = "; + getFloatingBlockFreq(&BB).print(OS, 5) + << ", int = " << getBlockFreq(&BB).getFrequency() << "\n"; + } // Add an extra newline for readability. OS << "\n"; diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h index 9d867567ba29..cfdf218491bd 100644 --- a/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/include/llvm/Analysis/BranchProbabilityInfo.h @@ -25,9 +25,9 @@ namespace llvm { class LoopInfo; class raw_ostream; -/// \brief Analysis pass providing branch probability information. +/// \brief Analysis providing branch probability information. /// -/// This is a function analysis pass which provides information on the relative +/// This is a function analysis which provides information on the relative /// probabilities of each "edge" in the function's CFG where such an edge is /// defined by a pair (PredBlock and an index in the successors). The /// probability of an edge from one block is always relative to the @@ -37,20 +37,14 @@ class raw_ostream; /// identify an edge, since we can have multiple edges from Src to Dst. /// As an example, we can have a switch which jumps to Dst with value 0 and /// value 10. -class BranchProbabilityInfo : public FunctionPass { +class BranchProbabilityInfo { public: - static char ID; + BranchProbabilityInfo() {} + BranchProbabilityInfo(Function &F, const LoopInfo &LI) { calculate(F, LI); } - BranchProbabilityInfo() : FunctionPass(ID) { - initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); - } + void releaseMemory(); - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnFunction(Function &F) override; - - void releaseMemory() override; - - void print(raw_ostream &OS, const Module *M = nullptr) const override; + void print(raw_ostream &OS) const; /// \brief Get an edge's probability, relative to other out-edges of the Src. /// @@ -67,6 +61,9 @@ public: BranchProbability getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const; + BranchProbability getEdgeProbability(const BasicBlock *Src, + succ_const_iterator Dst) const; + /// \brief Test if an edge is hot relative to other out-edges of the Src. /// /// Check whether this edge out of the source block is 'hot'. We define hot @@ -87,37 +84,22 @@ public: raw_ostream &printEdgeProbability(raw_ostream &OS, const BasicBlock *Src, const BasicBlock *Dst) const; - /// \brief Get the raw edge weight calculated for the edge. + /// \brief Set the raw edge probability for the given edge. /// - /// This returns the raw edge weight. It is guaranteed to fall between 1 and - /// UINT32_MAX. Note that the raw edge weight is not meaningful in isolation. - /// This interface should be very carefully, and primarily by routines that - /// are updating the analysis by later calling setEdgeWeight. - uint32_t getEdgeWeight(const BasicBlock *Src, - unsigned IndexInSuccessors) const; - - /// \brief Get the raw edge weight calculated for the block pair. - /// - /// This returns the sum of all raw edge weights from Src to Dst. - /// It is guaranteed to fall between 1 and UINT32_MAX. - uint32_t getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const; - - uint32_t getEdgeWeight(const BasicBlock *Src, - succ_const_iterator Dst) const; - - /// \brief Set the raw edge weight for a given edge. - /// - /// This allows a pass to explicitly set the edge weight for an edge. It can - /// be used when updating the CFG to update and preserve the branch + /// This allows a pass to explicitly set the edge probability for an edge. It + /// can be used when updating the CFG to update and preserve the branch /// probability information. Read the implementation of how these edge - /// weights are calculated carefully before using! - void setEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors, - uint32_t Weight); + /// probabilities are calculated carefully before using! + void setEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors, + BranchProbability Prob); - static uint32_t getBranchWeightStackProtector(bool IsLikely) { - return IsLikely ? (1u << 20) - 1 : 1; + static BranchProbability getBranchProbStackProtector(bool IsLikely) { + static const BranchProbability LikelyProb((1u << 20) - 1, 1u << 20); + return IsLikely ? LikelyProb : LikelyProb.getCompl(); } + void calculate(Function &F, const LoopInfo& LI); + private: // Since we allow duplicate edges from one basic block to another, we use // a pair (PredBlock and an index in the successors) to specify an edge. @@ -131,10 +113,7 @@ private: // weight to just "inherit" the non-zero weight of an adjacent successor. static const uint32_t DEFAULT_WEIGHT = 16; - DenseMap Weights; - - /// \brief Handle to the LoopInfo analysis. - LoopInfo *LI; + DenseMap Probs; /// \brief Track the last function we run over for printing. Function *LastF; @@ -145,19 +124,37 @@ private: /// \brief Track the set of blocks that always lead to a cold call. SmallPtrSet PostDominatedByColdCall; - /// \brief Get sum of the block successors' weights. - uint32_t getSumForBlock(const BasicBlock *BB) const; - bool calcUnreachableHeuristics(BasicBlock *BB); bool calcMetadataWeights(BasicBlock *BB); bool calcColdCallHeuristics(BasicBlock *BB); bool calcPointerHeuristics(BasicBlock *BB); - bool calcLoopBranchHeuristics(BasicBlock *BB); + bool calcLoopBranchHeuristics(BasicBlock *BB, const LoopInfo &LI); bool calcZeroHeuristics(BasicBlock *BB); bool calcFloatingPointHeuristics(BasicBlock *BB); bool calcInvokeHeuristics(BasicBlock *BB); }; +/// \brief Legacy analysis pass which computes \c BranchProbabilityInfo. +class BranchProbabilityInfoWrapperPass : public FunctionPass { + BranchProbabilityInfo BPI; + +public: + static char ID; + + BranchProbabilityInfoWrapperPass() : FunctionPass(ID) { + initializeBranchProbabilityInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); + } + + BranchProbabilityInfo &getBPI() { return BPI; } + const BranchProbabilityInfo &getBPI() const { return BPI; } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; + void releaseMemory() override; + void print(raw_ostream &OS, const Module *M = nullptr) const override; +}; + } #endif diff --git a/include/llvm/Analysis/CFG.h b/include/llvm/Analysis/CFG.h index 7c4df780198c..35165f4061f1 100644 --- a/include/llvm/Analysis/CFG.h +++ b/include/llvm/Analysis/CFG.h @@ -40,7 +40,7 @@ void FindFunctionBackedges( /// Search for the specified successor of basic block BB and return its position /// in the terminator instruction's list of successors. It is an error to call /// this with a block that is not a successor. -unsigned GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ); +unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ); /// Return true if the specified edge is a critical edge. Critical edges are /// edges from a block with multiple successors to a block with multiple diff --git a/include/llvm/Analysis/CFLAliasAnalysis.h b/include/llvm/Analysis/CFLAliasAnalysis.h new file mode 100644 index 000000000000..7473a454ab30 --- /dev/null +++ b/include/llvm/Analysis/CFLAliasAnalysis.h @@ -0,0 +1,158 @@ +//===- CFLAliasAnalysis.h - CFL-Based Alias Analysis Interface ---*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the interface for LLVM's primary stateless and local alias analysis. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CFLALIASANALYSIS_H +#define LLVM_ANALYSIS_CFLALIASANALYSIS_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include + +namespace llvm { + +class CFLAAResult : public AAResultBase { + friend AAResultBase; + + struct FunctionInfo; + +public: + explicit CFLAAResult(const TargetLibraryInfo &TLI); + CFLAAResult(CFLAAResult &&Arg); + + /// Handle invalidation events from the new pass manager. + /// + /// By definition, this result is stateless and so remains valid. + bool invalidate(Function &, const PreservedAnalyses &) { return false; } + + /// \brief Inserts the given Function into the cache. + void scan(Function *Fn); + + void evict(Function *Fn); + + /// \brief Ensures that the given function is available in the cache. + /// Returns the appropriate entry from the cache. + const Optional &ensureCached(Function *Fn); + + AliasResult query(const MemoryLocation &LocA, const MemoryLocation &LocB); + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { + if (LocA.Ptr == LocB.Ptr) { + if (LocA.Size == LocB.Size) { + return MustAlias; + } else { + return PartialAlias; + } + } + + // Comparisons between global variables and other constants should be + // handled by BasicAA. + // TODO: ConstantExpr handling -- CFLAA may report NoAlias when comparing + // a GlobalValue and ConstantExpr, but every query needs to have at least + // one Value tied to a Function, and neither GlobalValues nor ConstantExprs + // are. + if (isa(LocA.Ptr) && isa(LocB.Ptr)) { + return AAResultBase::alias(LocA, LocB); + } + + AliasResult QueryResult = query(LocA, LocB); + if (QueryResult == MayAlias) + return AAResultBase::alias(LocA, LocB); + + return QueryResult; + } + +private: + struct FunctionHandle final : public CallbackVH { + FunctionHandle(Function *Fn, CFLAAResult *Result) + : CallbackVH(Fn), Result(Result) { + assert(Fn != nullptr); + assert(Result != nullptr); + } + + void deleted() override { removeSelfFromCache(); } + void allUsesReplacedWith(Value *) override { removeSelfFromCache(); } + + private: + CFLAAResult *Result; + + void removeSelfFromCache() { + assert(Result != nullptr); + auto *Val = getValPtr(); + Result->evict(cast(Val)); + setValPtr(nullptr); + } + }; + + /// \brief Cached mapping of Functions to their StratifiedSets. + /// If a function's sets are currently being built, it is marked + /// in the cache as an Optional without a value. This way, if we + /// have any kind of recursion, it is discernable from a function + /// that simply has empty sets. + DenseMap> Cache; + std::forward_list Handles; + + FunctionInfo buildSetsFrom(Function *F); +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +/// +/// FIXME: We really should refactor CFL to use the analysis more heavily, and +/// in particular to leverage invalidation to trigger re-computation of sets. +class CFLAA { +public: + typedef CFLAAResult Result; + + /// \brief Opaque, unique identifier for this analysis pass. + static void *ID() { return (void *)&PassID; } + + CFLAAResult run(Function &F, AnalysisManager *AM); + + /// \brief Provide access to a name for this pass for debugging purposes. + static StringRef name() { return "CFLAA"; } + +private: + static char PassID; +}; + +/// Legacy wrapper pass to provide the CFLAAResult object. +class CFLAAWrapperPass : public ImmutablePass { + std::unique_ptr Result; + +public: + static char ID; + + CFLAAWrapperPass(); + + CFLAAResult &getResult() { return *Result; } + const CFLAAResult &getResult() const { return *Result; } + + bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +//===--------------------------------------------------------------------===// +// +// createCFLAAWrapperPass - This pass implements a set-based approach to +// alias analysis. +// +ImmutablePass *createCFLAAWrapperPass(); +} + +#endif diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h index 6a406cd24402..e7635eb1ab67 100644 --- a/include/llvm/Analysis/CGSCCPassManager.h +++ b/include/llvm/Analysis/CGSCCPassManager.h @@ -358,7 +358,7 @@ private: /// returned PreservedAnalysis set. class CGSCCAnalysisManagerFunctionProxy { public: - /// \brief Result proxy object for \c ModuleAnalysisManagerFunctionProxy. + /// \brief Result proxy object for \c CGSCCAnalysisManagerFunctionProxy. class Result { public: explicit Result(const CGSCCAnalysisManager &CGAM) : CGAM(&CGAM) {} diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h index 662ae0e6363c..5562e9b9465f 100644 --- a/include/llvm/Analysis/CallGraph.h +++ b/include/llvm/Analysis/CallGraph.h @@ -75,7 +75,8 @@ class CallGraphNode; class CallGraph { Module &M; - typedef std::map FunctionMapTy; + typedef std::map> + FunctionMapTy; /// \brief A map from \c Function* to \c CallGraphNode*. FunctionMapTy FunctionMap; @@ -90,7 +91,7 @@ class CallGraph { /// \brief This node has edges to it from all functions making indirect calls /// or calling an external function. - CallGraphNode *CallsExternalNode; + std::unique_ptr CallsExternalNode; /// \brief Replace the function represented by this node by another. /// @@ -104,7 +105,8 @@ class CallGraph { void addToCallGraph(Function *F); public: - CallGraph(Module &M); + explicit CallGraph(Module &M); + CallGraph(CallGraph &&Arg); ~CallGraph(); void print(raw_ostream &OS) const; @@ -125,21 +127,23 @@ public: inline const CallGraphNode *operator[](const Function *F) const { const_iterator I = FunctionMap.find(F); assert(I != FunctionMap.end() && "Function not in callgraph!"); - return I->second; + return I->second.get(); } /// \brief Returns the call graph node for the provided function. inline CallGraphNode *operator[](const Function *F) { const_iterator I = FunctionMap.find(F); assert(I != FunctionMap.end() && "Function not in callgraph!"); - return I->second; + return I->second.get(); } /// \brief Returns the \c CallGraphNode which is used to represent /// undetermined calls into the callgraph. CallGraphNode *getExternalCallingNode() const { return ExternalCallingNode; } - CallGraphNode *getCallsExternalNode() const { return CallsExternalNode; } + CallGraphNode *getCallsExternalNode() const { + return CallsExternalNode.get(); + } //===--------------------------------------------------------------------- // Functions to keep a call graph up to date with a function that has been @@ -444,8 +448,10 @@ struct GraphTraits : public GraphTraits { static NodeType *getEntryNode(CallGraph *CGN) { return CGN->getExternalCallingNode(); // Start at the external node! } - typedef std::pair PairTy; - typedef std::pointer_to_unary_function DerefFun; + typedef std::pair> + PairTy; + typedef std::pointer_to_unary_function + DerefFun; // nodes_iterator/begin/end - Allow iteration over all nodes in the graph typedef mapped_iterator nodes_iterator; @@ -456,7 +462,7 @@ struct GraphTraits : public GraphTraits { return map_iterator(CG->end(), DerefFun(CGdereference)); } - static CallGraphNode &CGdereference(PairTy P) { return *P.second; } + static CallGraphNode &CGdereference(const PairTy &P) { return *P.second; } }; template <> @@ -465,8 +471,9 @@ struct GraphTraits : public GraphTraits< static NodeType *getEntryNode(const CallGraph *CGN) { return CGN->getExternalCallingNode(); // Start at the external node! } - typedef std::pair PairTy; - typedef std::pointer_to_unary_function + typedef std::pair> + PairTy; + typedef std::pointer_to_unary_function DerefFun; // nodes_iterator/begin/end - Allow iteration over all nodes in the graph @@ -478,7 +485,9 @@ struct GraphTraits : public GraphTraits< return map_iterator(CG->end(), DerefFun(CGdereference)); } - static const CallGraphNode &CGdereference(PairTy P) { return *P.second; } + static const CallGraphNode &CGdereference(const PairTy &P) { + return *P.second; + } }; } // End llvm namespace diff --git a/include/llvm/Analysis/CallGraphSCCPass.h b/include/llvm/Analysis/CallGraphSCCPass.h index 667e1715775f..9c7f7bd34cce 100644 --- a/include/llvm/Analysis/CallGraphSCCPass.h +++ b/include/llvm/Analysis/CallGraphSCCPass.h @@ -30,7 +30,7 @@ class CallGraphNode; class CallGraph; class PMStack; class CallGraphSCC; - + class CallGraphSCCPass : public Pass { public: explicit CallGraphSCCPass(char &pid) : Pass(PT_CallGraphSCC, pid) {} @@ -79,25 +79,26 @@ public: void getAnalysisUsage(AnalysisUsage &Info) const override; }; -/// CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on. +/// CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on. class CallGraphSCC { void *Context; // The CGPassManager object that is vending this. std::vector Nodes; + public: CallGraphSCC(void *context) : Context(context) {} - - void initialize(CallGraphNode*const*I, CallGraphNode*const*E) { + + void initialize(CallGraphNode *const *I, CallGraphNode *const *E) { Nodes.assign(I, E); } - + bool isSingular() const { return Nodes.size() == 1; } unsigned size() const { return Nodes.size(); } - + /// ReplaceNode - This informs the SCC and the pass manager that the specified /// Old node has been deleted, and New is to be used in its place. void ReplaceNode(CallGraphNode *Old, CallGraphNode *New); - - typedef std::vector::const_iterator iterator; + + typedef std::vector::const_iterator iterator; iterator begin() const { return Nodes.begin(); } iterator end() const { return Nodes.end(); } }; diff --git a/include/llvm/Analysis/CaptureTracking.h b/include/llvm/Analysis/CaptureTracking.h index 8b7c7a90f7c0..8d2c095d8585 100644 --- a/include/llvm/Analysis/CaptureTracking.h +++ b/include/llvm/Analysis/CaptureTracking.h @@ -20,6 +20,7 @@ namespace llvm { class Use; class Instruction; class DominatorTree; + class OrderedBasicBlock; /// PointerMayBeCaptured - Return true if this pointer value may be captured /// by the enclosing function (which is required to exist). This routine can @@ -41,10 +42,12 @@ namespace llvm { /// it or not. The boolean StoreCaptures specified whether storing the value /// (or part of it) into memory anywhere automatically counts as capturing it /// or not. Captures by the provided instruction are considered if the - /// final parameter is true. + /// final parameter is true. An ordered basic block in \p OBB could be used + /// to speed up capture-tracker queries. bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, - DominatorTree *DT, bool IncludeI = false); + DominatorTree *DT, bool IncludeI = false, + OrderedBasicBlock *OBB = nullptr); /// This callback is used in conjunction with PointerMayBeCaptured. In /// addition to the interface here, you'll need to provide your own getters diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h index cb74e9f32d3d..ca50ee2f829a 100644 --- a/include/llvm/Analysis/DOTGraphTraitsPass.h +++ b/include/llvm/Analysis/DOTGraphTraitsPass.h @@ -36,8 +36,23 @@ public: DOTGraphTraitsViewer(StringRef GraphName, char &ID) : FunctionPass(ID), Name(GraphName) {} + /// @brief Return true if this function should be processed. + /// + /// An implementation of this class my override this function to indicate that + /// only certain functions should be viewed. + /// + /// @param Analysis The current analysis result for this function. + virtual bool processFunction(Function &F, AnalysisT &Analysis) { + return true; + } + bool runOnFunction(Function &F) override { - GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis()); + auto &Analysis = getAnalysis(); + + if (!processFunction(F, Analysis)) + return false; + + GraphT Graph = AnalysisGraphTraitsT::getGraph(&Analysis); std::string GraphName = DOTGraphTraits::getGraphName(Graph); std::string Title = GraphName + " for '" + F.getName().str() + "' function"; @@ -63,8 +78,23 @@ public: DOTGraphTraitsPrinter(StringRef GraphName, char &ID) : FunctionPass(ID), Name(GraphName) {} + /// @brief Return true if this function should be processed. + /// + /// An implementation of this class my override this function to indicate that + /// only certain functions should be printed. + /// + /// @param Analysis The current analysis result for this function. + virtual bool processFunction(Function &F, AnalysisT &Analysis) { + return true; + } + bool runOnFunction(Function &F) override { - GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis()); + auto &Analysis = getAnalysis(); + + if (!processFunction(F, Analysis)) + return false; + + GraphT Graph = AnalysisGraphTraitsT::getGraph(&Analysis); std::string Filename = Name + "." + F.getName().str() + ".dot"; std::error_code EC; diff --git a/include/llvm/Analysis/DemandedBits.h b/include/llvm/Analysis/DemandedBits.h new file mode 100644 index 000000000000..42932bfd3491 --- /dev/null +++ b/include/llvm/Analysis/DemandedBits.h @@ -0,0 +1,75 @@ +//===-- llvm/Analysis/DemandedBits.h - Determine demanded bits --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements a demanded bits analysis. A demanded bit is one that +// contributes to a result; bits that are not demanded can be either zero or +// one without affecting control or data flow. For example in this sequence: +// +// %1 = add i32 %x, %y +// %2 = trunc i32 %1 to i16 +// +// Only the lowest 16 bits of %1 are demanded; the rest are removed by the +// trunc. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DEMANDED_BITS_H +#define LLVM_ANALYSIS_DEMANDED_BITS_H + +#include "llvm/Pass.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" + +namespace llvm { + +class FunctionPass; +class Function; +class Instruction; +class DominatorTree; +class AssumptionCache; + +struct DemandedBits : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + DemandedBits(); + + bool runOnFunction(Function& F) override; + void getAnalysisUsage(AnalysisUsage& AU) const override; + void print(raw_ostream &OS, const Module *M) const override; + + /// Return the bits demanded from instruction I. + APInt getDemandedBits(Instruction *I); + + /// Return true if, during analysis, I could not be reached. + bool isInstructionDead(Instruction *I); + +private: + void performAnalysis(); + void determineLiveOperandBits(const Instruction *UserI, + const Instruction *I, unsigned OperandNo, + const APInt &AOut, APInt &AB, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2); + + AssumptionCache *AC; + DominatorTree *DT; + Function *F; + bool Analyzed; + + // The set of visited instructions (non-integer-typed only). + SmallPtrSet Visited; + DenseMap AliveBits; +}; + +/// Create a demanded bits analysis pass. +FunctionPass *createDemandedBitsPass(); + +} // End llvm namespace + +#endif diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h index a08ce574ea56..5290552b41dc 100644 --- a/include/llvm/Analysis/DependenceAnalysis.h +++ b/include/llvm/Analysis/DependenceAnalysis.h @@ -42,11 +42,11 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/Instructions.h" #include "llvm/Pass.h" namespace llvm { - class AliasAnalysis; class Loop; class LoopInfo; class ScalarEvolution; @@ -69,6 +69,15 @@ namespace llvm { /// as singly-linked lists, with the "next" fields stored in the dependence /// itelf. class Dependence { + protected: + Dependence(const Dependence &) = default; + + // FIXME: When we move to MSVC 2015 as the base compiler for Visual Studio + // support, uncomment this line to allow a defaulted move constructor for + // Dependence. Currently, FullDependence relies on the copy constructor, but + // that is acceptable given the triviality of the class. + // Dependence(Dependence &&) = default; + public: Dependence(Instruction *Source, Instruction *Destination) : @@ -176,38 +185,30 @@ namespace llvm { /// getNextPredecessor - Returns the value of the NextPredecessor /// field. - const Dependence *getNextPredecessor() const { - return NextPredecessor; - } - + const Dependence *getNextPredecessor() const { return NextPredecessor; } + /// getNextSuccessor - Returns the value of the NextSuccessor /// field. - const Dependence *getNextSuccessor() const { - return NextSuccessor; - } - + const Dependence *getNextSuccessor() const { return NextSuccessor; } + /// setNextPredecessor - Sets the value of the NextPredecessor /// field. - void setNextPredecessor(const Dependence *pred) { - NextPredecessor = pred; - } - + void setNextPredecessor(const Dependence *pred) { NextPredecessor = pred; } + /// setNextSuccessor - Sets the value of the NextSuccessor /// field. - void setNextSuccessor(const Dependence *succ) { - NextSuccessor = succ; - } - + void setNextSuccessor(const Dependence *succ) { NextSuccessor = succ; } + /// dump - For debugging purposes, dumps a dependence to OS. /// void dump(raw_ostream &OS) const; + private: Instruction *Src, *Dst; const Dependence *NextPredecessor, *NextSuccessor; friend class DependenceAnalysis; }; - /// FullDependence - This class represents a dependence between two memory /// references in a function. It contains detailed information about the /// dependence (direction vectors, etc.) and is used when the compiler is @@ -216,11 +217,15 @@ namespace llvm { /// (for output, flow, and anti dependences), the dependence implies an /// ordering, where the source must precede the destination; in contrast, /// input dependences are unordered. - class FullDependence : public Dependence { + class FullDependence final : public Dependence { public: FullDependence(Instruction *Src, Instruction *Dst, bool LoopIndependent, unsigned Levels); - ~FullDependence() override { delete[] DV; } + + FullDependence(FullDependence &&RHS) + : Dependence(std::move(RHS)), Levels(RHS.Levels), + LoopIndependent(RHS.LoopIndependent), Consistent(RHS.Consistent), + DV(std::move(RHS.DV)) {} /// isLoopIndependent - Returns true if this is a loop-independent /// dependence. @@ -268,16 +273,16 @@ namespace llvm { unsigned short Levels; bool LoopIndependent; bool Consistent; // Init to true, then refine. - DVEntry *DV; + std::unique_ptr DV; friend class DependenceAnalysis; }; - /// DependenceAnalysis - This class is the main dependence-analysis driver. /// class DependenceAnalysis : public FunctionPass { void operator=(const DependenceAnalysis &) = delete; DependenceAnalysis(const DependenceAnalysis &) = delete; + public: /// depends - Tests for a dependence between the Src and Dst instructions. /// Returns NULL if no dependence; otherwise, returns a Dependence (or a @@ -387,6 +392,7 @@ namespace llvm { const SCEV *B; const SCEV *C; const Loop *AssociatedLoop; + public: /// isEmpty - Return true if the constraint is of kind Empty. bool isEmpty() const { return Kind == Empty; } @@ -453,7 +459,6 @@ namespace llvm { void dump(raw_ostream &OS) const; }; - /// establishNestingLevels - Examines the loop nesting of the Src and Dst /// instructions and establishes their shared loops. Sets the variables /// CommonLevels, SrcLevels, and MaxLevels. @@ -521,10 +526,10 @@ namespace llvm { /// in LoopNest. bool isLoopInvariant(const SCEV *Expression, const Loop *LoopNest) const; - /// Makes sure all subscript pairs share the same integer type by + /// Makes sure all subscript pairs share the same integer type by /// sign-extending as necessary. /// Sign-extending a subscript is safe because getelementptr assumes the - /// array subscripts are signed. + /// array subscripts are signed. void unifySubscriptType(ArrayRef Pairs); /// removeMatchingExtensions - Examines a subscript pair. @@ -806,7 +811,6 @@ namespace llvm { const SCEV *Delta) const; /// testBounds - Returns true iff the current bounds are plausible. - /// bool testBounds(unsigned char DirKind, unsigned Level, BoundInfo *Bound, @@ -913,9 +917,8 @@ namespace llvm { void updateDirection(Dependence::DVEntry &Level, const Constraint &CurConstraint) const; - bool tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV, - SmallVectorImpl &Pair, - const SCEV *ElementSize); + bool tryDelinearize(Instruction *Src, Instruction *Dst, + SmallVectorImpl &Pair); public: static char ID; // Class identification, replacement for typeinfo diff --git a/include/llvm/Analysis/DivergenceAnalysis.h b/include/llvm/Analysis/DivergenceAnalysis.h new file mode 100644 index 000000000000..aa2de571ba1b --- /dev/null +++ b/include/llvm/Analysis/DivergenceAnalysis.h @@ -0,0 +1,48 @@ +//===- llvm/Analysis/DivergenceAnalysis.h - Divergence Analysis -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The divergence analysis is an LLVM pass which can be used to find out +// if a branch instruction in a GPU program is divergent or not. It can help +// branch optimizations such as jump threading and loop unswitching to make +// better decisions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseSet.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" + +namespace llvm { +class Value; +class DivergenceAnalysis : public FunctionPass { +public: + static char ID; + + DivergenceAnalysis() : FunctionPass(ID) { + initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnFunction(Function &F) override; + + // Print all divergent branches in the function. + void print(raw_ostream &OS, const Module *) const override; + + // Returns true if V is divergent. + bool isDivergent(const Value *V) const { return DivergentValues.count(V); } + + // Returns true if V is uniform/non-divergent. + bool isUniform(const Value *V) const { return !isDivergent(V); } + +private: + // Stores all divergent values. + DenseSet DivergentValues; +}; +} // End llvm namespace diff --git a/include/llvm/Analysis/EHPersonalities.h b/include/llvm/Analysis/EHPersonalities.h new file mode 100644 index 000000000000..59e9672b88e5 --- /dev/null +++ b/include/llvm/Analysis/EHPersonalities.h @@ -0,0 +1,94 @@ +//===- EHPersonalities.h - Compute EH-related information -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_EHPERSONALITIES_H +#define LLVM_ANALYSIS_EHPERSONALITIES_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { +class BasicBlock; +class Function; +class Value; + +enum class EHPersonality { + Unknown, + GNU_Ada, + GNU_C, + GNU_CXX, + GNU_ObjC, + MSVC_X86SEH, + MSVC_Win64SEH, + MSVC_CXX, + CoreCLR +}; + +/// \brief See if the given exception handling personality function is one +/// that we understand. If so, return a description of it; otherwise return +/// Unknown. +EHPersonality classifyEHPersonality(const Value *Pers); + +/// \brief Returns true if this personality function catches asynchronous +/// exceptions. +inline bool isAsynchronousEHPersonality(EHPersonality Pers) { + // The two SEH personality functions can catch asynch exceptions. We assume + // unknown personalities don't catch asynch exceptions. + switch (Pers) { + case EHPersonality::MSVC_X86SEH: + case EHPersonality::MSVC_Win64SEH: + return true; + default: + return false; + } + llvm_unreachable("invalid enum"); +} + +/// \brief Returns true if this is a personality function that invokes +/// handler funclets (which must return to it). +inline bool isFuncletEHPersonality(EHPersonality Pers) { + switch (Pers) { + case EHPersonality::MSVC_CXX: + case EHPersonality::MSVC_X86SEH: + case EHPersonality::MSVC_Win64SEH: + case EHPersonality::CoreCLR: + return true; + default: + return false; + } + llvm_unreachable("invalid enum"); +} + +/// \brief Return true if this personality may be safely removed if there +/// are no invoke instructions remaining in the current function. +inline bool isNoOpWithoutInvoke(EHPersonality Pers) { + switch (Pers) { + case EHPersonality::Unknown: + return false; + // All known personalities currently have this behavior + default: + return true; + } + llvm_unreachable("invalid enum"); +} + +bool canSimplifyInvokeNoUnwind(const Function *F); + +typedef TinyPtrVector ColorVector; + +/// \brief If an EH funclet personality is in use (see isFuncletEHPersonality), +/// this will recompute which blocks are in which funclet. It is possible that +/// some blocks are in multiple funclets. Consider this analysis to be +/// expensive. +DenseMap colorEHFunclets(Function &F); + +} // end namespace llvm + +#endif diff --git a/include/llvm/Analysis/GlobalsModRef.h b/include/llvm/Analysis/GlobalsModRef.h new file mode 100644 index 000000000000..bcd102e7ded2 --- /dev/null +++ b/include/llvm/Analysis/GlobalsModRef.h @@ -0,0 +1,160 @@ +//===- GlobalsModRef.h - Simple Mod/Ref AA for Globals ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the interface for a simple mod/ref and alias analysis over globals. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_GLOBALSMODREF_H +#define LLVM_ANALYSIS_GLOBALSMODREF_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include + +namespace llvm { + +/// An alias analysis result set for globals. +/// +/// This focuses on handling aliasing properties of globals and interprocedural +/// function call mod/ref information. +class GlobalsAAResult : public AAResultBase { + friend AAResultBase; + + class FunctionInfo; + + const DataLayout &DL; + + /// The globals that do not have their addresses taken. + SmallPtrSet NonAddressTakenGlobals; + + /// IndirectGlobals - The memory pointed to by this global is known to be + /// 'owned' by the global. + SmallPtrSet IndirectGlobals; + + /// AllocsForIndirectGlobals - If an instruction allocates memory for an + /// indirect global, this map indicates which one. + DenseMap AllocsForIndirectGlobals; + + /// For each function, keep track of what globals are modified or read. + DenseMap FunctionInfos; + + /// A map of functions to SCC. The SCCs are described by a simple integer + /// ID that is only useful for comparing for equality (are two functions + /// in the same SCC or not?) + DenseMap FunctionToSCCMap; + + /// Handle to clear this analysis on deletion of values. + struct DeletionCallbackHandle final : CallbackVH { + GlobalsAAResult *GAR; + std::list::iterator I; + + DeletionCallbackHandle(GlobalsAAResult &GAR, Value *V) + : CallbackVH(V), GAR(&GAR) {} + + void deleted() override; + }; + + /// List of callbacks for globals being tracked by this analysis. Note that + /// these objects are quite large, but we only anticipate having one per + /// global tracked by this analysis. There are numerous optimizations we + /// could perform to the memory utilization here if this becomes a problem. + std::list Handles; + + explicit GlobalsAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI); + +public: + GlobalsAAResult(GlobalsAAResult &&Arg); + + static GlobalsAAResult analyzeModule(Module &M, const TargetLibraryInfo &TLI, + CallGraph &CG); + + //------------------------------------------------ + // Implement the AliasAnalysis API + // + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + + using AAResultBase::getModRefInfo; + ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc); + + /// getModRefBehavior - Return the behavior of the specified function if + /// called from the specified call site. The call site may be null in which + /// case the most generic behavior of this function should be returned. + FunctionModRefBehavior getModRefBehavior(const Function *F); + + /// getModRefBehavior - Return the behavior of the specified function if + /// called from the specified call site. The call site may be null in which + /// case the most generic behavior of this function should be returned. + FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS); + +private: + FunctionInfo *getFunctionInfo(const Function *F); + + void AnalyzeGlobals(Module &M); + void AnalyzeCallGraph(CallGraph &CG, Module &M); + bool AnalyzeUsesOfPointer(Value *V, + SmallPtrSetImpl *Readers = nullptr, + SmallPtrSetImpl *Writers = nullptr, + GlobalValue *OkayStoreDest = nullptr); + bool AnalyzeIndirectGlobalMemory(GlobalVariable *GV); + void CollectSCCMembership(CallGraph &CG); + + bool isNonEscapingGlobalNoAlias(const GlobalValue *GV, const Value *V); + ModRefInfo getModRefInfoForArgument(ImmutableCallSite CS, + const GlobalValue *GV); +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +class GlobalsAA { +public: + typedef GlobalsAAResult Result; + + /// \brief Opaque, unique identifier for this analysis pass. + static void *ID() { return (void *)&PassID; } + + GlobalsAAResult run(Module &M, AnalysisManager *AM); + + /// \brief Provide access to a name for this pass for debugging purposes. + static StringRef name() { return "GlobalsAA"; } + +private: + static char PassID; +}; + +/// Legacy wrapper pass to provide the GlobalsAAResult object. +class GlobalsAAWrapperPass : public ModulePass { + std::unique_ptr Result; + +public: + static char ID; + + GlobalsAAWrapperPass(); + + GlobalsAAResult &getResult() { return *Result; } + const GlobalsAAResult &getResult() const { return *Result; } + + bool runOnModule(Module &M) override; + bool doFinalization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +//===--------------------------------------------------------------------===// +// +// createGlobalsAAWrapperPass - This pass provides alias and mod/ref info for +// global values that do not have their addresses taken. +// +ModulePass *createGlobalsAAWrapperPass(); +} + +#endif diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h index 00dbcbdd7806..37d01490dac6 100644 --- a/include/llvm/Analysis/IVUsers.h +++ b/include/llvm/Analysis/IVUsers.h @@ -34,7 +34,7 @@ class DataLayout; /// The Expr member keeps track of the expression, User is the actual user /// instruction of the operand, and 'OperandValToReplace' is the operand of /// the User that is the use. -class IVStrideUse : public CallbackVH, public ilist_node { +class IVStrideUse final : public CallbackVH, public ilist_node { friend class IVUsers; public: IVStrideUse(IVUsers *P, Instruction* U, Value *O) diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index 79ed74d82411..35f991cb3f67 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -23,7 +23,7 @@ class AssumptionCacheTracker; class CallSite; class DataLayout; class Function; -class TargetTransformInfoWrapperPass; +class TargetTransformInfo; namespace InlineConstants { // Various magic constants used to adjust heuristics. @@ -98,46 +98,31 @@ public: int getCostDelta() const { return Threshold - getCost(); } }; -/// \brief Cost analyzer used by inliner. -class InlineCostAnalysis : public CallGraphSCCPass { - TargetTransformInfoWrapperPass *TTIWP; - AssumptionCacheTracker *ACT; +/// \brief Get an InlineCost object representing the cost of inlining this +/// callsite. +/// +/// Note that threshold is passed into this function. Only costs below the +/// threshold are computed with any accuracy. The threshold can be used to +/// bound the computation necessary to determine whether the cost is +/// sufficiently low to warrant inlining. +/// +/// Also note that calling this function *dynamically* computes the cost of +/// inlining the callsite. It is an expensive, heavyweight call. +InlineCost getInlineCost(CallSite CS, int Threshold, + TargetTransformInfo &CalleeTTI, + AssumptionCacheTracker *ACT); -public: - static char ID; - - InlineCostAnalysis(); - ~InlineCostAnalysis() override; - - // Pass interface implementation. - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnSCC(CallGraphSCC &SCC) override; - - /// \brief Get an InlineCost object representing the cost of inlining this - /// callsite. - /// - /// Note that threshold is passed into this function. Only costs below the - /// threshold are computed with any accuracy. The threshold can be used to - /// bound the computation necessary to determine whether the cost is - /// sufficiently low to warrant inlining. - /// - /// Also note that calling this function *dynamically* computes the cost of - /// inlining the callsite. It is an expensive, heavyweight call. - InlineCost getInlineCost(CallSite CS, int Threshold); - - /// \brief Get an InlineCost with the callee explicitly specified. - /// This allows you to calculate the cost of inlining a function via a - /// pointer. This behaves exactly as the version with no explicit callee - /// parameter in all other respects. - // - // Note: This is used by out-of-tree passes, please do not remove without - // adding a replacement API. - InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold); - - /// \brief Minimal filter to detect invalid constructs for inlining. - bool isInlineViable(Function &Callee); -}; +/// \brief Get an InlineCost with the callee explicitly specified. +/// This allows you to calculate the cost of inlining a function via a +/// pointer. This behaves exactly as the version with no explicit callee +/// parameter in all other respects. +// +InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold, + TargetTransformInfo &CalleeTTI, + AssumptionCacheTracker *ACT); +/// \brief Minimal filter to detect invalid constructs for inlining. +bool isInlineViable(Function &Callee); } #endif diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h index d44c5ff4078d..ed313dae9ab1 100644 --- a/include/llvm/Analysis/InstructionSimplify.h +++ b/include/llvm/Analysis/InstructionSimplify.h @@ -207,7 +207,7 @@ namespace llvm { const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, - Instruction *CxtI = nullptr); + const Instruction *CxtI = nullptr); /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can /// fold the result. If not, this returns null. diff --git a/include/llvm/Analysis/IteratedDominanceFrontier.h b/include/llvm/Analysis/IteratedDominanceFrontier.h index 5a339f10f50f..a1ded2554d44 100644 --- a/include/llvm/Analysis/IteratedDominanceFrontier.h +++ b/include/llvm/Analysis/IteratedDominanceFrontier.h @@ -34,7 +34,7 @@ namespace llvm { class BasicBlock; template class DomTreeNodeBase; typedef DomTreeNodeBase DomTreeNode; -class DominatorTree; +template class DominatorTreeBase; /// \brief Determine the iterated dominance frontier, given a set of defining /// blocks, and optionally, a set of live-in blocks. @@ -47,7 +47,7 @@ class DominatorTree; class IDFCalculator { public: - IDFCalculator(DominatorTree &DT) : DT(DT), useLiveIn(false) {} + IDFCalculator(DominatorTreeBase &DT) : DT(DT), useLiveIn(false) {} /// \brief Give the IDF calculator the set of blocks in which the value is /// defined. This is equivalent to the set of starting blocks it should be @@ -85,7 +85,7 @@ public: void calculate(SmallVectorImpl &IDFBlocks); private: - DominatorTree &DT; + DominatorTreeBase &DT; bool useLiveIn; DenseMap DomLevels; const SmallPtrSetImpl *LiveInBlocks; diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h index b0b9068de34b..ef3d5e8fe3df 100644 --- a/include/llvm/Analysis/LazyCallGraph.h +++ b/include/llvm/Analysis/LazyCallGraph.h @@ -54,7 +54,7 @@ namespace llvm { class PreservedAnalyses; class raw_ostream; -/// \brief A lazily constructed view of the call graph of a module. +/// A lazily constructed view of the call graph of a module. /// /// With the edges of this graph, the motivating constraint that we are /// attempting to maintain is that function-local optimization, CGSCC-local @@ -107,7 +107,7 @@ public: typedef SmallVector, 4> NodeVectorT; typedef SmallVectorImpl> NodeVectorImplT; - /// \brief A lazy iterator used for both the entry nodes and child nodes. + /// A lazy iterator used for both the entry nodes and child nodes. /// /// When this iterator is dereferenced, if not yet available, a function will /// be scanned for "calls" or uses of functions and its child information @@ -152,7 +152,7 @@ public: } }; - /// \brief A node in the call graph. + /// A node in the call graph. /// /// This represents a single node. It's primary roles are to cache the list of /// callees, de-duplicate and provide fast testing of whether a function is @@ -172,25 +172,23 @@ public: mutable NodeVectorT Callees; DenseMap CalleeIndexMap; - /// \brief Basic constructor implements the scanning of F into Callees and + /// Basic constructor implements the scanning of F into Callees and /// CalleeIndexMap. Node(LazyCallGraph &G, Function &F); - /// \brief Internal helper to insert a callee. + /// Internal helper to insert a callee. void insertEdgeInternal(Function &Callee); - /// \brief Internal helper to insert a callee. + /// Internal helper to insert a callee. void insertEdgeInternal(Node &CalleeN); - /// \brief Internal helper to remove a callee from this node. + /// Internal helper to remove a callee from this node. void removeEdgeInternal(Function &Callee); public: typedef LazyCallGraph::iterator iterator; - Function &getFunction() const { - return F; - }; + Function &getFunction() const { return F; } iterator begin() const { return iterator(*G, Callees.begin(), Callees.end()); @@ -202,7 +200,7 @@ public: bool operator!=(const Node &N) const { return !operator==(N); } }; - /// \brief An SCC of the call graph. + /// An SCC of the call graph. /// /// This represents a Strongly Connected Component of the call graph as /// a collection of call graph nodes. While the order of nodes in the SCC is @@ -226,7 +224,8 @@ public: public: typedef SmallVectorImpl::const_iterator iterator; - typedef pointee_iterator::const_iterator> parent_iterator; + typedef pointee_iterator::const_iterator> + parent_iterator; iterator begin() const { return Nodes.begin(); } iterator end() const { return Nodes.end(); } @@ -235,24 +234,24 @@ public: parent_iterator parent_end() const { return ParentSCCs.end(); } iterator_range parents() const { - return iterator_range(parent_begin(), parent_end()); + return make_range(parent_begin(), parent_end()); } - /// \brief Test if this SCC is a parent of \a C. + /// Test if this SCC is a parent of \a C. bool isParentOf(const SCC &C) const { return C.isChildOf(*this); } - /// \brief Test if this SCC is an ancestor of \a C. + /// Test if this SCC is an ancestor of \a C. bool isAncestorOf(const SCC &C) const { return C.isDescendantOf(*this); } - /// \brief Test if this SCC is a child of \a C. + /// Test if this SCC is a child of \a C. bool isChildOf(const SCC &C) const { return ParentSCCs.count(const_cast(&C)); } - /// \brief Test if this SCC is a descendant of \a C. + /// Test if this SCC is a descendant of \a C. bool isDescendantOf(const SCC &C) const; - /// \brief Short name useful for debugging or logging. + /// Short name useful for debugging or logging. /// /// We use the name of the first function in the SCC to name the SCC for /// the purposes of debugging and logging. @@ -267,22 +266,21 @@ public: /// Note that these methods sometimes have complex runtimes, so be careful /// how you call them. - /// \brief Insert an edge from one node in this SCC to another in this SCC. + /// Insert an edge from one node in this SCC to another in this SCC. /// /// By the definition of an SCC, this does not change the nature or make-up /// of any SCCs. void insertIntraSCCEdge(Node &CallerN, Node &CalleeN); - /// \brief Insert an edge whose tail is in this SCC and head is in some - /// child SCC. + /// Insert an edge whose tail is in this SCC and head is in some child SCC. /// /// There must be an existing path from the caller to the callee. This /// operation is inexpensive and does not change the set of SCCs in the /// graph. void insertOutgoingEdge(Node &CallerN, Node &CalleeN); - /// \brief Insert an edge whose tail is in a descendant SCC and head is in - /// this SCC. + /// Insert an edge whose tail is in a descendant SCC and head is in this + /// SCC. /// /// There must be an existing path from the callee to the caller in this /// case. NB! This is has the potential to be a very expensive function. It @@ -297,7 +295,7 @@ public: /// implementation for details, but that use case might impact users. SmallVector insertIncomingEdge(Node &CallerN, Node &CalleeN); - /// \brief Remove an edge whose source is in this SCC and target is *not*. + /// Remove an edge whose source is in this SCC and target is *not*. /// /// This removes an inter-SCC edge. All inter-SCC edges originating from /// this SCC have been fully explored by any in-flight DFS SCC formation, @@ -309,7 +307,7 @@ public: /// them. void removeInterSCCEdge(Node &CallerN, Node &CalleeN); - /// \brief Remove an edge which is entirely within this SCC. + /// Remove an edge which is entirely within this SCC. /// /// Both the \a Caller and the \a Callee must be within this SCC. Removing /// such an edge make break cycles that form this SCC and thus this @@ -346,7 +344,7 @@ public: ///@} }; - /// \brief A post-order depth-first SCC iterator over the call graph. + /// A post-order depth-first SCC iterator over the call graph. /// /// This iterator triggers the Tarjan DFS-based formation of the SCC DAG for /// the call graph, walking it lazily in depth-first post-order. That is, it @@ -358,7 +356,7 @@ public: friend class LazyCallGraph; friend class LazyCallGraph::Node; - /// \brief Nonce type to select the constructor for the end iterator. + /// Nonce type to select the constructor for the end iterator. struct IsAtEndT {}; LazyCallGraph *G; @@ -387,7 +385,7 @@ public: } }; - /// \brief Construct a graph for the given module. + /// Construct a graph for the given module. /// /// This sets up the graph and computes all of the entry points of the graph. /// No function definitions are scanned until their nodes in the graph are @@ -410,22 +408,20 @@ public: } iterator_range postorder_sccs() { - return iterator_range(postorder_scc_begin(), - postorder_scc_end()); + return make_range(postorder_scc_begin(), postorder_scc_end()); } - /// \brief Lookup a function in the graph which has already been scanned and - /// added. + /// Lookup a function in the graph which has already been scanned and added. Node *lookup(const Function &F) const { return NodeMap.lookup(&F); } - /// \brief Lookup a function's SCC in the graph. + /// Lookup a function's SCC in the graph. /// /// \returns null if the function hasn't been assigned an SCC via the SCC /// iterator walk. SCC *lookupSCC(Node &N) const { return SCCMap.lookup(&N); } - /// \brief Get a graph node for a given function, scanning it to populate the - /// graph data as necessary. + /// Get a graph node for a given function, scanning it to populate the graph + /// data as necessary. Node &get(Function &F) { Node *&N = NodeMap[&F]; if (N) @@ -444,18 +440,18 @@ public: /// Once you begin manipulating a call graph's SCCs, you must perform all /// mutation of the graph via the SCC methods. - /// \brief Update the call graph after inserting a new edge. + /// Update the call graph after inserting a new edge. void insertEdge(Node &Caller, Function &Callee); - /// \brief Update the call graph after inserting a new edge. + /// Update the call graph after inserting a new edge. void insertEdge(Function &Caller, Function &Callee) { return insertEdge(get(Caller), Callee); } - /// \brief Update the call graph after deleting an edge. + /// Update the call graph after deleting an edge. void removeEdge(Node &Caller, Function &Callee); - /// \brief Update the call graph after deleting an edge. + /// Update the call graph after deleting an edge. void removeEdge(Function &Caller, Function &Callee) { return removeEdge(get(Caller), Callee); } @@ -463,57 +459,56 @@ public: ///@} private: - /// \brief Allocator that holds all the call graph nodes. + /// Allocator that holds all the call graph nodes. SpecificBumpPtrAllocator BPA; - /// \brief Maps function->node for fast lookup. + /// Maps function->node for fast lookup. DenseMap NodeMap; - /// \brief The entry nodes to the graph. + /// The entry nodes to the graph. /// /// These nodes are reachable through "external" means. Put another way, they /// escape at the module scope. NodeVectorT EntryNodes; - /// \brief Map of the entry nodes in the graph to their indices in - /// \c EntryNodes. + /// Map of the entry nodes in the graph to their indices in \c EntryNodes. DenseMap EntryIndexMap; - /// \brief Allocator that holds all the call graph SCCs. + /// Allocator that holds all the call graph SCCs. SpecificBumpPtrAllocator SCCBPA; - /// \brief Maps Function -> SCC for fast lookup. + /// Maps Function -> SCC for fast lookup. DenseMap SCCMap; - /// \brief The leaf SCCs of the graph. + /// The leaf SCCs of the graph. /// /// These are all of the SCCs which have no children. SmallVector LeafSCCs; - /// \brief Stack of nodes in the DFS walk. + /// Stack of nodes in the DFS walk. SmallVector, 4> DFSStack; - /// \brief Set of entry nodes not-yet-processed into SCCs. + /// Set of entry nodes not-yet-processed into SCCs. SmallVector SCCEntryNodes; - /// \brief Stack of nodes the DFS has walked but not yet put into a SCC. + /// Stack of nodes the DFS has walked but not yet put into a SCC. SmallVector PendingSCCStack; - /// \brief Counter for the next DFS number to assign. + /// Counter for the next DFS number to assign. int NextDFSNumber; - /// \brief Helper to insert a new function, with an already looked-up entry in + /// Helper to insert a new function, with an already looked-up entry in /// the NodeMap. Node &insertInto(Function &F, Node *&MappedN); - /// \brief Helper to update pointers back to the graph object during moves. + /// Helper to update pointers back to the graph object during moves. void updateGraphPtrs(); - /// \brief Helper to form a new SCC out of the top of a DFSStack-like + /// Helper to form a new SCC out of the top of a DFSStack-like /// structure. SCC *formSCC(Node *RootN, SmallVectorImpl &NodeStack); - /// \brief Retrieve the next node in the post-order SCC walk of the call graph. + /// Retrieve the next node in the post-order SCC walk of the call graph. SCC *getNextSCCInPostOrder(); }; @@ -535,17 +530,17 @@ template <> struct GraphTraits { static ChildIteratorType child_end(NodeType *N) { return N->end(); } }; -/// \brief An analysis pass which computes the call graph for a module. +/// An analysis pass which computes the call graph for a module. class LazyCallGraphAnalysis { public: - /// \brief Inform generic clients of the result type. + /// Inform generic clients of the result type. typedef LazyCallGraph Result; static void *ID() { return (void *)&PassID; } static StringRef name() { return "Lazy CallGraph Analysis"; } - /// \brief Compute the \c LazyCallGraph for the module \c M. + /// Compute the \c LazyCallGraph for the module \c M. /// /// This just builds the set of entry points to the call graph. The rest is /// built lazily as it is walked. @@ -555,7 +550,7 @@ private: static char PassID; }; -/// \brief A pass which prints the call graph to a \c raw_ostream. +/// A pass which prints the call graph to a \c raw_ostream. /// /// This is primarily useful for testing the analysis. class LazyCallGraphPrinterPass { diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h index 1051cff5efb7..42002062dca2 100644 --- a/include/llvm/Analysis/LazyValueInfo.h +++ b/include/llvm/Analysis/LazyValueInfo.h @@ -25,7 +25,7 @@ namespace llvm { class Instruction; class TargetLibraryInfo; class Value; - + /// This pass computes, caches, and vends lazy value constraint information. class LazyValueInfo : public FunctionPass { AssumptionCache *AC; @@ -45,23 +45,22 @@ public: enum Tristate { Unknown = -1, False = 0, True = 1 }; - - + // Public query interface. - + /// Determine whether the specified value comparison with a constant is known /// to be true or false on the specified CFG edge. /// Pred is a CmpInst predicate. Tristate getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI = nullptr); - + /// Determine whether the specified value comparison with a constant is known /// to be true or false at the specified instruction /// (from an assume intrinsic). Pred is a CmpInst predicate. Tristate getPredicateAt(unsigned Pred, Value *V, Constant *C, Instruction *CxtI); - + /// Determine whether the specified value is known to be a /// constant at the end of the specified block. Return null if not. Constant *getConstant(Value *V, BasicBlock *BB, Instruction *CxtI = nullptr); @@ -70,14 +69,14 @@ public: /// constant on the specified edge. Return null if not. Constant *getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI = nullptr); - + /// Inform the analysis cache that we have threaded an edge from /// PredBB to OldSucc to be from PredBB to NewSucc instead. void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc); - + /// Inform the analysis cache that we have erased a block. void eraseBlock(BasicBlock *BB); - + // Implementation boilerplate. void getAnalysisUsage(AnalysisUsage &AU) const override; diff --git a/include/llvm/Analysis/LibCallAliasAnalysis.h b/include/llvm/Analysis/LibCallAliasAnalysis.h deleted file mode 100644 index 6589ac13c746..000000000000 --- a/include/llvm/Analysis/LibCallAliasAnalysis.h +++ /dev/null @@ -1,71 +0,0 @@ -//===- LibCallAliasAnalysis.h - Implement AliasAnalysis for libcalls ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the LibCallAliasAnalysis class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ANALYSIS_LIBCALLALIASANALYSIS_H -#define LLVM_ANALYSIS_LIBCALLALIASANALYSIS_H - -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" - -namespace llvm { - class LibCallInfo; - struct LibCallFunctionInfo; - - /// LibCallAliasAnalysis - Alias analysis driven from LibCallInfo. - struct LibCallAliasAnalysis : public FunctionPass, public AliasAnalysis { - static char ID; // Class identification - - LibCallInfo *LCI; - - explicit LibCallAliasAnalysis(LibCallInfo *LC = nullptr) - : FunctionPass(ID), LCI(LC) { - initializeLibCallAliasAnalysisPass(*PassRegistry::getPassRegistry()); - } - explicit LibCallAliasAnalysis(char &ID, LibCallInfo *LC) - : FunctionPass(ID), LCI(LC) { - initializeLibCallAliasAnalysisPass(*PassRegistry::getPassRegistry()); - } - ~LibCallAliasAnalysis() override; - - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override; - - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override { - // TODO: Could compare two direct calls against each other if we cared to. - return AliasAnalysis::getModRefInfo(CS1, CS2); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override; - - bool runOnFunction(Function &F) override; - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(const void *PI) override { - if (PI == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } - - private: - ModRefResult AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, - ImmutableCallSite CS, - const MemoryLocation &Loc); - }; -} // End of llvm namespace - -#endif diff --git a/include/llvm/Analysis/LibCallSemantics.h b/include/llvm/Analysis/LibCallSemantics.h deleted file mode 100644 index b4bef310e590..000000000000 --- a/include/llvm/Analysis/LibCallSemantics.h +++ /dev/null @@ -1,225 +0,0 @@ -//===- LibCallSemantics.h - Describe library semantics --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines interfaces that can be used to describe language specific -// runtime library interfaces (e.g. libc, libm, etc) to LLVM optimizers. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ANALYSIS_LIBCALLSEMANTICS_H -#define LLVM_ANALYSIS_LIBCALLSEMANTICS_H - -#include "llvm/Analysis/AliasAnalysis.h" - -namespace llvm { -class InvokeInst; - - /// LibCallLocationInfo - This struct describes a set of memory locations that - /// are accessed by libcalls. Identification of a location is doing with a - /// simple callback function. - /// - /// For example, the LibCallInfo may be set up to model the behavior of - /// standard libm functions. The location that they may be interested in is - /// an abstract location that represents errno for the current target. In - /// this case, a location for errno is anything such that the predicate - /// returns true. On Mac OS X, this predicate would return true if the - /// pointer is the result of a call to "__error()". - /// - /// Locations can also be defined in a constant-sensitive way. For example, - /// it is possible to define a location that returns true iff it is passed - /// into the call as a specific argument. This is useful for modeling things - /// like "printf", which can store to memory, but only through pointers passed - /// with a '%n' constraint. - /// - struct LibCallLocationInfo { - // TODO: Flags: isContextSensitive etc. - - /// isLocation - Return a LocResult if the specified pointer refers to this - /// location for the specified call site. This returns "Yes" if we can tell - /// that the pointer *does definitely* refer to the location, "No" if we can - /// tell that the location *definitely does not* refer to the location, and - /// returns "Unknown" if we cannot tell for certain. - enum LocResult { - Yes, No, Unknown - }; - LocResult (*isLocation)(ImmutableCallSite CS, const MemoryLocation &Loc); - }; - - /// LibCallFunctionInfo - Each record in the array of FunctionInfo structs - /// records the behavior of one libcall that is known by the optimizer. This - /// captures things like the side effects of the call. Side effects are - /// modeled both universally (in the readnone/readonly) sense, but also - /// potentially against a set of abstract locations defined by the optimizer. - /// This allows an optimizer to define that some libcall (e.g. sqrt) is - /// side-effect free except that it might modify errno (thus, the call is - /// *not* universally readonly). Or it might say that the side effects - /// are unknown other than to say that errno is not modified. - /// - struct LibCallFunctionInfo { - /// Name - This is the name of the libcall this describes. - const char *Name; - - /// TODO: Constant folding function: Constant* vector -> Constant*. - - /// UniversalBehavior - This captures the absolute mod/ref behavior without - /// any specific context knowledge. For example, if the function is known - /// to be readonly, this would be set to 'ref'. If known to be readnone, - /// this is set to NoModRef. - AliasAnalysis::ModRefResult UniversalBehavior; - - /// LocationMRInfo - This pair captures info about whether a specific - /// location is modified or referenced by a libcall. - struct LocationMRInfo { - /// LocationID - ID # of the accessed location or ~0U for array end. - unsigned LocationID; - /// MRInfo - Mod/Ref info for this location. - AliasAnalysis::ModRefResult MRInfo; - }; - - /// DetailsType - Indicate the sense of the LocationDetails array. This - /// controls how the LocationDetails array is interpreted. - enum { - /// DoesOnly - If DetailsType is set to DoesOnly, then we know that the - /// *only* mod/ref behavior of this function is captured by the - /// LocationDetails array. If we are trying to say that 'sqrt' can only - /// modify errno, we'd have the {errnoloc,mod} in the LocationDetails - /// array and have DetailsType set to DoesOnly. - DoesOnly, - - /// DoesNot - If DetailsType is set to DoesNot, then the sense of the - /// LocationDetails array is completely inverted. This means that we *do - /// not* know everything about the side effects of this libcall, but we do - /// know things that the libcall cannot do. This is useful for complex - /// functions like 'ctime' which have crazy mod/ref behavior, but are - /// known to never read or write errno. In this case, we'd have - /// {errnoloc,modref} in the LocationDetails array and DetailsType would - /// be set to DoesNot, indicating that ctime does not read or write the - /// errno location. - DoesNot - } DetailsType; - - /// LocationDetails - This is a pointer to an array of LocationMRInfo - /// structs which indicates the behavior of the libcall w.r.t. specific - /// locations. For example, if this libcall is known to only modify - /// 'errno', it would have a LocationDetails array with the errno ID and - /// 'mod' in it. See the DetailsType field for how this is interpreted. - /// - /// In the "DoesOnly" case, this information is 'may' information for: there - /// is no guarantee that the specified side effect actually does happen, - /// just that it could. In the "DoesNot" case, this is 'must not' info. - /// - /// If this pointer is null, no details are known. - /// - const LocationMRInfo *LocationDetails; - }; - - - /// LibCallInfo - Abstract interface to query about library call information. - /// Instances of this class return known information about some set of - /// libcalls. - /// - class LibCallInfo { - // Implementation details of this object, private. - mutable void *Impl; - mutable const LibCallLocationInfo *Locations; - mutable unsigned NumLocations; - public: - LibCallInfo() : Impl(nullptr), Locations(nullptr), NumLocations(0) {} - virtual ~LibCallInfo(); - - //===------------------------------------------------------------------===// - // Accessor Methods: Efficient access to contained data. - //===------------------------------------------------------------------===// - - /// getLocationInfo - Return information about the specified LocationID. - const LibCallLocationInfo &getLocationInfo(unsigned LocID) const; - - - /// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to - /// the specified function if we have it. If not, return null. - const LibCallFunctionInfo *getFunctionInfo(const Function *F) const; - - - //===------------------------------------------------------------------===// - // Implementation Methods: Subclasses should implement these. - //===------------------------------------------------------------------===// - - /// getLocationInfo - Return descriptors for the locations referenced by - /// this set of libcalls. - virtual unsigned getLocationInfo(const LibCallLocationInfo *&Array) const { - return 0; - } - - /// getFunctionInfoArray - Return an array of descriptors that describe the - /// set of libcalls represented by this LibCallInfo object. This array is - /// terminated by an entry with a NULL name. - virtual const LibCallFunctionInfo *getFunctionInfoArray() const = 0; - }; - - enum class EHPersonality { - Unknown, - GNU_Ada, - GNU_C, - GNU_CXX, - GNU_ObjC, - MSVC_X86SEH, - MSVC_Win64SEH, - MSVC_CXX, - }; - - /// \brief See if the given exception handling personality function is one - /// that we understand. If so, return a description of it; otherwise return - /// Unknown. - EHPersonality classifyEHPersonality(const Value *Pers); - - /// \brief Returns true if this personality function catches asynchronous - /// exceptions. - inline bool isAsynchronousEHPersonality(EHPersonality Pers) { - // The two SEH personality functions can catch asynch exceptions. We assume - // unknown personalities don't catch asynch exceptions. - switch (Pers) { - case EHPersonality::MSVC_X86SEH: - case EHPersonality::MSVC_Win64SEH: - return true; - default: return false; - } - llvm_unreachable("invalid enum"); - } - - /// \brief Returns true if this is an MSVC personality function. - inline bool isMSVCEHPersonality(EHPersonality Pers) { - // The two SEH personality functions can catch asynch exceptions. We assume - // unknown personalities don't catch asynch exceptions. - switch (Pers) { - case EHPersonality::MSVC_CXX: - case EHPersonality::MSVC_X86SEH: - case EHPersonality::MSVC_Win64SEH: - return true; - default: return false; - } - llvm_unreachable("invalid enum"); - } - - /// \brief Return true if this personality may be safely removed if there - /// are no invoke instructions remaining in the current function. - inline bool isNoOpWithoutInvoke(EHPersonality Pers) { - switch (Pers) { - case EHPersonality::Unknown: - return false; - // All known personalities currently have this behavior - default: return true; - } - llvm_unreachable("invalid enum"); - } - - bool canSimplifyInvokeNoUnwind(const Function *F); - -} // end namespace llvm - -#endif diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h index 42667d2af14a..939663b0def1 100644 --- a/include/llvm/Analysis/Loads.h +++ b/include/llvm/Analysis/Loads.h @@ -14,11 +14,12 @@ #ifndef LLVM_ANALYSIS_LOADS_H #define LLVM_ANALYSIS_LOADS_H +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/Support/CommandLine.h" namespace llvm { -class AliasAnalysis; class DataLayout; class MDNode; @@ -29,15 +30,19 @@ class MDNode; bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, unsigned Align); +/// DefMaxInstsToScan - the default number of maximum instructions +/// to scan in the block, used by FindAvailableLoadedValue(). +extern cl::opt DefMaxInstsToScan; + /// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at /// the instruction before ScanFrom) checking to see if we have the value at /// the memory address *Ptr locally available within a small number of /// instructions. If the value is available, return it. /// -/// If not, return the iterator for the last validated instruction that the +/// If not, return the iterator for the last validated instruction that the /// value would be live through. If we scanned the entire block and didn't /// find something that invalidates *Ptr or provides it, ScanFrom would be -/// left at begin() and this returns null. ScanFrom could also be left +/// left at begin() and this returns null. ScanFrom could also be left /// /// MaxInstsToScan specifies the maximum instructions to scan in the block. /// If it is set to 0, it will scan the whole block. You can also optionally @@ -48,7 +53,7 @@ bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, /// is found, it is left unmodified. Value *FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, - unsigned MaxInstsToScan = 6, + unsigned MaxInstsToScan = DefMaxInstsToScan, AliasAnalysis *AA = nullptr, AAMDNodes *AATags = nullptr); diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h index 476e4b6686bb..871d35e99b74 100644 --- a/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/include/llvm/Analysis/LoopAccessAnalysis.h @@ -29,10 +29,11 @@ namespace llvm { class Value; class DataLayout; -class AliasAnalysis; class ScalarEvolution; class Loop; class SCEV; +class SCEVUnionPredicate; +class LoopAccessInfo; /// Optimization analysis message produced during vectorization. Messages inform /// the user why vectorization did not occur. @@ -136,6 +137,14 @@ public: // We couldn't determine the direction or the distance. Unknown, // Lexically forward. + // + // FIXME: If we only have loop-independent forward dependences (e.g. a + // read and write of A[i]), LAA will locally deem the dependence "safe" + // without querying the MemoryDepChecker. Therefore we can miss + // enumerating loop-independent forward dependences in + // getDependences. Note that as soon as there are different + // indices used to access the same array, the MemoryDepChecker *is* + // queried and the dependence list is complete. Forward, // Forward, but if vectorized, is likely to prevent store-to-load // forwarding. @@ -162,13 +171,20 @@ public: Dependence(unsigned Source, unsigned Destination, DepType Type) : Source(Source), Destination(Destination), Type(Type) {} + /// \brief Return the source instruction of the dependence. + Instruction *getSource(const LoopAccessInfo &LAI) const; + /// \brief Return the destination instruction of the dependence. + Instruction *getDestination(const LoopAccessInfo &LAI) const; + /// \brief Dependence types that don't prevent vectorization. static bool isSafeForVectorization(DepType Type); - /// \brief Dependence types that can be queried from the analysis. - static bool isInterestingDependence(DepType Type); + /// \brief Lexically forward dependence. + bool isForward() const; + /// \brief Lexically backward dependence. + bool isBackward() const; - /// \brief Lexically backward dependence types. + /// \brief May be a lexically backward dependence type (includes Unknown). bool isPossiblyBackward() const; /// \brief Print the dependence. \p Instr is used to map the instruction @@ -177,10 +193,10 @@ public: const SmallVectorImpl &Instrs) const; }; - MemoryDepChecker(ScalarEvolution *Se, const Loop *L) - : SE(Se), InnermostLoop(L), AccessIdx(0), + MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L) + : PSE(PSE), InnermostLoop(L), AccessIdx(0), ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true), - RecordInterestingDependences(true) {} + RecordDependences(true) {} /// \brief Register the location (instructions are given increasing numbers) /// of a write access. @@ -218,14 +234,14 @@ public: /// vectorize the loop with a dynamic array access check. bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; } - /// \brief Returns the interesting dependences. If null is returned we - /// exceeded the MaxInterestingDependence threshold and this information is - /// not available. - const SmallVectorImpl *getInterestingDependences() const { - return RecordInterestingDependences ? &InterestingDependences : nullptr; + /// \brief Returns the memory dependences. If null is returned we exceeded + /// the MaxDependences threshold and this information is not + /// available. + const SmallVectorImpl *getDependences() const { + return RecordDependences ? &Dependences : nullptr; } - void clearInterestingDependences() { InterestingDependences.clear(); } + void clearDependences() { Dependences.clear(); } /// \brief The vector of memory access instructions. The indices are used as /// instruction identifiers in the Dependence class. @@ -233,12 +249,29 @@ public: return InstMap; } + /// \brief Generate a mapping between the memory instructions and their + /// indices according to program order. + DenseMap generateInstructionOrderMap() const { + DenseMap OrderMap; + + for (unsigned I = 0; I < InstMap.size(); ++I) + OrderMap[InstMap[I]] = I; + + return OrderMap; + } + /// \brief Find the set of instructions that read or write via \p Ptr. SmallVector getInstructionsForAccess(Value *Ptr, bool isWrite) const; private: - ScalarEvolution *SE; + /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and + /// applies dynamic knowledge to simplify SCEV expressions and convert them + /// to a more usable form. We need this in case assumptions about SCEV + /// expressions need to be made in order to avoid unknown dependences. For + /// example we might assume a unit stride for a pointer in order to prove + /// that a memory access is strided and doesn't wrap. + PredicatedScalarEvolution &PSE; const Loop *InnermostLoop; /// \brief Maps access locations (ptr, read/write) to program order. @@ -261,15 +294,14 @@ private: /// vectorization. bool SafeForVectorization; - //// \brief True if InterestingDependences reflects the dependences in the - //// loop. If false we exceeded MaxInterestingDependence and - //// InterestingDependences is invalid. - bool RecordInterestingDependences; + //// \brief True if Dependences reflects the dependences in the + //// loop. If false we exceeded MaxDependences and + //// Dependences is invalid. + bool RecordDependences; - /// \brief Interesting memory dependences collected during the analysis as - /// defined by isInterestingDependence. Only valid if - /// RecordInterestingDependences is true. - SmallVector InterestingDependences; + /// \brief Memory dependences collected during the analysis. Only valid if + /// RecordDependences is true. + SmallVector Dependences; /// \brief Check whether there is a plausible dependence between the two /// accesses. @@ -327,11 +359,17 @@ public: void reset() { Need = false; Pointers.clear(); + Checks.clear(); } /// Insert a pointer and calculate the start and end SCEVs. + /// \p We need Preds in order to compute the SCEV expression of the pointer + /// according to the assumptions that we've made during the analysis. + /// The method might also version the pointer stride according to \p Strides, + /// and change \p Preds. void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, - unsigned ASId, const ValueToValueMap &Strides); + unsigned ASId, const ValueToValueMap &Strides, + PredicatedScalarEvolution &PSE); /// \brief No run-time memory checking is necessary. bool empty() const { return Pointers.empty(); } @@ -368,33 +406,38 @@ public: SmallVector Members; }; - /// \brief Groups pointers such that a single memcheck is required - /// between two different groups. This will clear the CheckingGroups vector - /// and re-compute it. We will only group dependecies if \p UseDependencies - /// is true, otherwise we will create a separate group for each pointer. - void groupChecks(MemoryDepChecker::DepCandidates &DepCands, - bool UseDependencies); + /// \brief A memcheck which made up of a pair of grouped pointers. + /// + /// These *have* to be const for now, since checks are generated from + /// CheckingPtrGroups in LAI::addRuntimeChecks which is a const member + /// function. FIXME: once check-generation is moved inside this class (after + /// the PtrPartition hack is removed), we could drop const. + typedef std::pair + PointerCheck; + + /// \brief Generate the checks and store it. This also performs the grouping + /// of pointers to reduce the number of memchecks necessary. + void generateChecks(MemoryDepChecker::DepCandidates &DepCands, + bool UseDependencies); + + /// \brief Returns the checks that generateChecks created. + const SmallVector &getChecks() const { return Checks; } /// \brief Decide if we need to add a check between two groups of pointers, /// according to needsChecking. - bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N, - const SmallVectorImpl *PtrPartition) const; - - /// \brief Return true if any pointer requires run-time checking according - /// to needsChecking. - bool needsAnyChecking(const SmallVectorImpl *PtrPartition) const; + bool needsChecking(const CheckingPtrGroup &M, + const CheckingPtrGroup &N) const; /// \brief Returns the number of run-time checks required according to /// needsChecking. - unsigned getNumberOfChecks(const SmallVectorImpl *PtrPartition) const; + unsigned getNumberOfChecks() const { return Checks.size(); } /// \brief Print the list run-time memory checks necessary. - /// - /// If \p PtrPartition is set, it contains the partition number for - /// pointers (-1 if the pointer belongs to multiple partitions). In this - /// case omit checks between pointers belonging to the same partition. - void print(raw_ostream &OS, unsigned Depth = 0, - const SmallVectorImpl *PtrPartition = nullptr) const; + void print(raw_ostream &OS, unsigned Depth = 0) const; + + /// Print \p Checks. + void printChecks(raw_ostream &OS, const SmallVectorImpl &Checks, + unsigned Depth = 0) const; /// This flag indicates if we need to add the runtime check. bool Need; @@ -405,18 +448,41 @@ public: /// Holds a partitioning of pointers into "check groups". SmallVector CheckingGroups; -private: + /// \brief Check if pointers are in the same partition + /// + /// \p PtrToPartition contains the partition number for pointers (-1 if the + /// pointer belongs to multiple partitions). + static bool + arePointersInSamePartition(const SmallVectorImpl &PtrToPartition, + unsigned PtrIdx1, unsigned PtrIdx2); + /// \brief Decide whether we need to issue a run-time check for pointer at /// index \p I and \p J to prove their independence. - /// - /// If \p PtrPartition is set, it contains the partition number for - /// pointers (-1 if the pointer belongs to multiple partitions). In this - /// case omit checks between pointers belonging to the same partition. - bool needsChecking(unsigned I, unsigned J, - const SmallVectorImpl *PtrPartition) const; + bool needsChecking(unsigned I, unsigned J) const; + + /// \brief Return PointerInfo for pointer at index \p PtrIdx. + const PointerInfo &getPointerInfo(unsigned PtrIdx) const { + return Pointers[PtrIdx]; + } + +private: + /// \brief Groups pointers such that a single memcheck is required + /// between two different groups. This will clear the CheckingGroups vector + /// and re-compute it. We will only group dependecies if \p UseDependencies + /// is true, otherwise we will create a separate group for each pointer. + void groupChecks(MemoryDepChecker::DepCandidates &DepCands, + bool UseDependencies); + + /// Generate the checks and return them. + SmallVector + generateChecks() const; /// Holds a pointer to the ScalarEvolution analysis. ScalarEvolution *SE; + + /// \brief Set of run-time checks required to establish independence of + /// otherwise may-aliasing pointers in the loop. + SmallVector Checks; }; /// \brief Drive the analysis of memory accesses in the loop @@ -433,6 +499,13 @@ private: /// generates run-time checks to prove independence. This is done by /// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the /// RuntimePointerCheck class. +/// +/// If pointers can wrap or can't be expressed as affine AddRec expressions by +/// ScalarEvolution, we will generate run-time checks by emitting a +/// SCEVUnionPredicate. +/// +/// Checks for both memory dependences and the SCEV predicates contained in the +/// PSE must be emitted in order for the results of this analysis to be valid. class LoopAccessInfo { public: LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL, @@ -450,9 +523,8 @@ public: /// \brief Number of memchecks required to prove independence of otherwise /// may-alias pointers. - unsigned getNumRuntimePointerChecks( - const SmallVectorImpl *PtrPartition = nullptr) const { - return PtrRtChecking.getNumberOfChecks(PtrPartition); + unsigned getNumRuntimePointerChecks() const { + return PtrRtChecking.getNumberOfChecks(); } /// Return true if the block BB needs to be predicated in order for the loop @@ -472,13 +544,18 @@ public: /// Returns a pair of instructions where the first element is the first /// instruction generated in possibly a sequence of instructions and the /// second value is the final comparator value or NULL if no check is needed. - /// - /// If \p PtrPartition is set, it contains the partition number for pointers - /// (-1 if the pointer belongs to multiple partitions). In this case omit - /// checks between pointers belonging to the same partition. std::pair - addRuntimeCheck(Instruction *Loc, - const SmallVectorImpl *PtrPartition = nullptr) const; + addRuntimeChecks(Instruction *Loc) const; + + /// \brief Generete the instructions for the checks in \p PointerChecks. + /// + /// Returns a pair of instructions where the first element is the first + /// instruction generated in possibly a sequence of instructions and the + /// second value is the final comparator value or NULL if no check is needed. + std::pair + addRuntimeChecks(Instruction *Loc, + const SmallVectorImpl + &PointerChecks) const; /// \brief The diagnostics report generated for the analysis. E.g. why we /// couldn't analyze the loop. @@ -510,6 +587,13 @@ public: return StoreToLoopInvariantAddress; } + /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts + /// them to a more usable form. All SCEV expressions during the analysis + /// should be re-written (and therefore simplified) according to PSE. + /// A user of LoopAccessAnalysis will need to emit the runtime checks + /// associated with this predicate. + PredicatedScalarEvolution PSE; + private: /// \brief Analyze the loop. Substitute symbolic strides using Strides. void analyzeLoop(const ValueToValueMap &Strides); @@ -529,7 +613,6 @@ private: MemoryDepChecker DepChecker; Loop *TheLoop; - ScalarEvolution *SE; const DataLayout &DL; const TargetLibraryInfo *TLI; AliasAnalysis *AA; @@ -556,18 +639,24 @@ private: Value *stripIntegerCast(Value *V); ///\brief Return the SCEV corresponding to a pointer with the symbolic stride -///replaced with constant one. +/// replaced with constant one, assuming \p Preds is true. +/// +/// If necessary this method will version the stride of the pointer according +/// to \p PtrToStride and therefore add a new predicate to \p Preds. /// /// If \p OrigPtr is not null, use it to look up the stride value instead of \p /// Ptr. \p PtrToStride provides the mapping between the pointer value and its /// stride as collected by LoopVectorizationLegality::collectStridedAccess. -const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE, +const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr = nullptr); /// \brief Check the stride of the pointer and ensure that it does not wrap in -/// the address space. -int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, +/// the address space, assuming \p Preds is true. +/// +/// If necessary this method will version the stride of the pointer according +/// to \p PtrToStride and therefore add a new predicate to \p Preds. +int isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap); /// \brief This analysis provides dependence information for the memory accesses @@ -616,6 +705,17 @@ private: DominatorTree *DT; LoopInfo *LI; }; + +inline Instruction *MemoryDepChecker::Dependence::getSource( + const LoopAccessInfo &LAI) const { + return LAI.getDepChecker().getMemoryInstructions()[Source]; +} + +inline Instruction *MemoryDepChecker::Dependence::getDestination( + const LoopAccessInfo &LAI) const { + return LAI.getDepChecker().getMemoryInstructions()[Destination]; +} + } // End llvm namespace #endif diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 3ec83f2c21fd..c219bd85a48a 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -37,6 +37,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/Pass.h" #include @@ -72,6 +73,10 @@ class LoopBase { SmallPtrSet DenseBlockSet; + /// Indicator that this loops has been "unlooped", so there's no loop here + /// anymore. + bool IsUnloop = false; + LoopBase(const LoopBase &) = delete; const LoopBase& operator=(const LoopBase &) = delete; @@ -140,12 +145,22 @@ public: typedef typename std::vector::const_iterator block_iterator; block_iterator block_begin() const { return Blocks.begin(); } block_iterator block_end() const { return Blocks.end(); } + inline iterator_range blocks() const { + return make_range(block_begin(), block_end()); + } /// getNumBlocks - Get the number of blocks in this loop in constant time. unsigned getNumBlocks() const { return Blocks.size(); } + /// Mark this loop as having been unlooped - the last backedge was removed and + /// we no longer have a loop. + void markUnlooped() { IsUnloop = true; } + + /// Return true if this no longer represents a loop. + bool isUnloop() const { return IsUnloop; } + /// isLoopExiting - True if terminator in the block can branch to another /// block that is outside of the current loop. /// @@ -398,6 +413,9 @@ public: /// isLCSSAForm - Return true if the Loop is in LCSSA form bool isLCSSAForm(DominatorTree &DT) const; + /// \brief Return true if this Loop and all inner subloops are in LCSSA form. + bool isRecursivelyLCSSAForm(DominatorTree &DT) const; + /// isLoopSimplifyForm - Return true if the Loop is in the form that /// the LoopSimplify form transforms loops to, which is sometimes called /// normal form. @@ -622,7 +640,7 @@ public: } /// Create the loop forest using a stable algorithm. - void Analyze(DominatorTreeBase &DomTree); + void analyze(const DominatorTreeBase &DomTree); // Debugging void print(raw_ostream &OS) const; @@ -642,6 +660,7 @@ class LoopInfo : public LoopInfoBase { LoopInfo(const LoopInfo &) = delete; public: LoopInfo() {} + explicit LoopInfo(const DominatorTreeBase &DomTree); LoopInfo(LoopInfo &&Arg) : BaseT(std::move(static_cast(Arg))) {} LoopInfo &operator=(LoopInfo &&RHS) { @@ -653,8 +672,9 @@ public: /// updateUnloop - Update LoopInfo after removing the last backedge from a /// loop--now the "unloop". This updates the loop forest and parent loops for - /// each block so that Unloop is no longer referenced, but the caller must - /// actually delete the Unloop object. + /// each block so that Unloop is no longer referenced, but does not actually + /// delete the Unloop object. Generally, the loop pass manager should manage + /// deleting the Unloop. void updateUnloop(Loop *Unloop); /// replacementPreservesLCSSAForm - Returns true if replacing From with To @@ -677,6 +697,78 @@ public: // it as a replacement will not break LCSSA form. return ToLoop->contains(getLoopFor(From->getParent())); } + + /// \brief Checks if moving a specific instruction can break LCSSA in any + /// loop. + /// + /// Return true if moving \p Inst to before \p NewLoc will break LCSSA, + /// assuming that the function containing \p Inst and \p NewLoc is currently + /// in LCSSA form. + bool movementPreservesLCSSAForm(Instruction *Inst, Instruction *NewLoc) { + assert(Inst->getFunction() == NewLoc->getFunction() && + "Can't reason about IPO!"); + + auto *OldBB = Inst->getParent(); + auto *NewBB = NewLoc->getParent(); + + // Movement within the same loop does not break LCSSA (the equality check is + // to avoid doing a hashtable lookup in case of intra-block movement). + if (OldBB == NewBB) + return true; + + auto *OldLoop = getLoopFor(OldBB); + auto *NewLoop = getLoopFor(NewBB); + + if (OldLoop == NewLoop) + return true; + + // Check if Outer contains Inner; with the null loop counting as the + // "outermost" loop. + auto Contains = [](const Loop *Outer, const Loop *Inner) { + return !Outer || Outer->contains(Inner); + }; + + // To check that the movement of Inst to before NewLoc does not break LCSSA, + // we need to check two sets of uses for possible LCSSA violations at + // NewLoc: the users of NewInst, and the operands of NewInst. + + // If we know we're hoisting Inst out of an inner loop to an outer loop, + // then the uses *of* Inst don't need to be checked. + + if (!Contains(NewLoop, OldLoop)) { + for (Use &U : Inst->uses()) { + auto *UI = cast(U.getUser()); + auto *UBB = isa(UI) ? cast(UI)->getIncomingBlock(U) + : UI->getParent(); + if (UBB != NewBB && getLoopFor(UBB) != NewLoop) + return false; + } + } + + // If we know we're sinking Inst from an outer loop into an inner loop, then + // the *operands* of Inst don't need to be checked. + + if (!Contains(OldLoop, NewLoop)) { + // See below on why we can't handle phi nodes here. + if (isa(Inst)) + return false; + + for (Use &U : Inst->operands()) { + auto *DefI = dyn_cast(U.get()); + if (!DefI) + return false; + + // This would need adjustment if we allow Inst to be a phi node -- the + // new use block won't simply be NewBB. + + auto *DefBlock = DefI->getParent(); + if (DefBlock != NewBB && getLoopFor(DefBlock) != NewLoop) + return false; + } + } + + return true; + } }; // Allow clients to walk the list of nested loops... @@ -759,6 +851,19 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; }; +/// \brief Pass for printing a loop's contents as LLVM's text IR assembly. +class PrintLoopPass { + raw_ostream &OS; + std::string Banner; + +public: + PrintLoopPass(); + PrintLoopPass(raw_ostream &OS, const std::string &Banner = ""); + + PreservedAnalyses run(Loop &L); + static StringRef name() { return "PrintLoopPass"; } +}; + } // End llvm namespace #endif diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h index f5cc856f6247..824fc7e8f155 100644 --- a/include/llvm/Analysis/LoopInfoImpl.h +++ b/include/llvm/Analysis/LoopInfoImpl.h @@ -269,7 +269,7 @@ void LoopBase::verifyLoop() const { // A non-header loop shouldn't be reachable from outside the loop, // though it is permitted if the predecessor is not itself actually // reachable. - BlockT *EntryBB = BB->getParent()->begin(); + BlockT *EntryBB = &BB->getParent()->front(); for (BlockT *CB : depth_first(EntryBB)) for (unsigned i = 0, e = OutsideLoopPreds.size(); i != e; ++i) assert(CB != OutsideLoopPreds[i] && @@ -345,7 +345,7 @@ void LoopBase::print(raw_ostream &OS, unsigned Depth) const { template static void discoverAndMapSubloop(LoopT *L, ArrayRef Backedges, LoopInfoBase *LI, - DominatorTreeBase &DomTree) { + const DominatorTreeBase &DomTree) { typedef GraphTraits > InvBlockTraits; unsigned NumBlocks = 0; @@ -468,10 +468,10 @@ void PopulateLoopsDFS::insertIntoLoop(BlockT *Block) { /// insertions per block. template void LoopInfoBase:: -Analyze(DominatorTreeBase &DomTree) { +analyze(const DominatorTreeBase &DomTree) { // Postorder traversal of the dominator tree. - DomTreeNodeBase* DomRoot = DomTree.getRootNode(); + const DomTreeNodeBase *DomRoot = DomTree.getRootNode(); for (auto DomNode : post_order(DomRoot)) { BlockT *Header = DomNode->getBlock(); diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h index 8650000fcfb6..2cf734e53bb4 100644 --- a/include/llvm/Analysis/LoopPass.h +++ b/include/llvm/Analysis/LoopPass.h @@ -127,20 +127,9 @@ public: } public: - // Delete loop from the loop queue and loop nest (LoopInfo). - void deleteLoopFromQueue(Loop *L); - - // Insert loop into the loop queue and add it as a child of the - // given parent. - void insertLoop(Loop *L, Loop *ParentLoop); - - // Insert a loop into the loop queue. - void insertLoopIntoQueue(Loop *L); - - // Reoptimize this loop. LPPassManager will re-insert this loop into the - // queue. This allows LoopPass to change loop nest for the loop. This - // utility may send LPPassManager into infinite loops so use caution. - void redoLoop(Loop *L); + // Add a new loop into the loop queue as a child of the given parent, or at + // the top level if \c ParentLoop is null. + Loop &addLoop(Loop *ParentLoop); //===--------------------------------------------------------------------===// /// SimpleAnalysis - Provides simple interface to update analysis info @@ -163,8 +152,6 @@ public: private: std::deque LQ; - bool skipThisLoop; - bool redoThisLoop; LoopInfo *LI; Loop *CurrentLoop; }; diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index 805a43dfb070..87fb3efaf50e 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -59,11 +59,6 @@ bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast = false); -/// \brief Tests if a value is a call or invoke to a library function that -/// reallocates memory (such as realloc). -bool isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, - bool LookThroughBitCast = false); - /// \brief Tests if a value is a call or invoke to a library function that /// allocates memory and never returns null (such as operator new). bool isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index 511898071c22..daa1ba91c071 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -28,7 +28,6 @@ namespace llvm { class FunctionPass; class Instruction; class CallSite; - class AliasAnalysis; class AssumptionCache; class MemoryDependenceAnalysis; class PredIteratorCache; @@ -97,6 +96,7 @@ namespace llvm { typedef PointerIntPair PairTy; PairTy Value; explicit MemDepResult(PairTy V) : Value(V) {} + public: MemDepResult() : Value(nullptr, Invalid) {} @@ -164,6 +164,7 @@ namespace llvm { bool operator!=(const MemDepResult &M) const { return Value != M.Value; } bool operator<(const MemDepResult &M) const { return Value < M.Value; } bool operator>(const MemDepResult &M) const { return Value > M.Value; } + private: friend class MemoryDependenceAnalysis; /// Dirty - Entries with this marker occur in a LocalDeps map or @@ -190,6 +191,7 @@ namespace llvm { class NonLocalDepEntry { BasicBlock *BB; MemDepResult Result; + public: NonLocalDepEntry(BasicBlock *bb, MemDepResult result) : BB(bb), Result(result) {} @@ -215,6 +217,7 @@ namespace llvm { class NonLocalDepResult { NonLocalDepEntry Entry; Value *Address; + public: NonLocalDepResult(BasicBlock *bb, MemDepResult result, Value *address) : Entry(bb, result), Address(address) {} @@ -261,6 +264,7 @@ namespace llvm { public: typedef std::vector NonLocalDepInfo; + private: /// ValueIsLoadPair - This is a pair where the bool is true if /// the dependence is a read only dependence, false if read/write. @@ -302,7 +306,6 @@ namespace llvm { SmallPtrSet > ReverseNonLocalPtrDepTy; ReverseNonLocalPtrDepTy ReverseNonLocalPtrDeps; - /// PerInstNLInfo - This is the instruction we keep for each cached access /// that we have for an instruction. The pointer is an owning pointer and /// the bool indicates whether we have any dirty bits in the set. @@ -326,6 +329,7 @@ namespace llvm { AliasAnalysis *AA; DominatorTree *DT; AssumptionCache *AC; + const TargetLibraryInfo *TLI; PredIteratorCache PredCache; public: @@ -363,14 +367,13 @@ namespace llvm { /// that. const NonLocalDepInfo &getNonLocalCallDependency(CallSite QueryCS); - /// getNonLocalPointerDependency - Perform a full dependency query for an /// access to the QueryInst's specified memory location, returning the set /// of instructions that either define or clobber the value. /// /// Warning: For a volatile query instruction, the dependencies will be /// accurate, and thus usable for reordering, but it is never legal to - /// remove the query instruction. + /// remove the query instruction. /// /// This method assumes the pointer has a "NonLocal" dependency within /// QueryInst's parent basic block. @@ -394,12 +397,12 @@ namespace llvm { /// critical edges. void invalidateCachedPredecessors(); - /// getPointerDependencyFrom - Return the instruction on which a memory - /// location depends. If isLoad is true, this routine ignores may-aliases - /// with read-only operations. If isLoad is false, this routine ignores - /// may-aliases with reads from read-only locations. If possible, pass - /// the query instruction as well; this function may take advantage of - /// the metadata annotated to the query instruction to refine the result. + /// \brief Return the instruction on which a memory location depends. + /// If isLoad is true, this routine ignores may-aliases with read-only + /// operations. If isLoad is false, this routine ignores may-aliases + /// with reads from read-only locations. If possible, pass the query + /// instruction as well; this function may take advantage of the metadata + /// annotated to the query instruction to refine the result. /// /// Note that this is an uncached query, and thus may be inefficient. /// @@ -409,6 +412,21 @@ namespace llvm { BasicBlock *BB, Instruction *QueryInst = nullptr); + MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, + bool isLoad, + BasicBlock::iterator ScanIt, + BasicBlock *BB, + Instruction *QueryInst); + + /// This analysis looks for other loads and stores with invariant.group + /// metadata and the same pointer operand. Returns Unknown if it does not + /// find anything, and Def if it can be assumed that 2 instructions load or + /// store the same value. + /// FIXME: This analysis works only on single block because of restrictions + /// at the call site. + MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, + BasicBlock *BB); + /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that /// looks at a memory location for a load (specified by MemLocBase, Offs, /// and Size) and compares it against a load. If the specified load could @@ -442,7 +460,6 @@ namespace llvm { /// verifyRemoved - Verify that the specified instruction does not occur /// in our internal data structures. void verifyRemoved(Instruction *Inst) const; - }; } // End llvm namespace diff --git a/include/llvm/Analysis/ObjCARCAliasAnalysis.h b/include/llvm/Analysis/ObjCARCAliasAnalysis.h new file mode 100644 index 000000000000..ac01154bac6c --- /dev/null +++ b/include/llvm/Analysis/ObjCARCAliasAnalysis.h @@ -0,0 +1,102 @@ +//===- ObjCARCAliasAnalysis.h - ObjC ARC Alias Analysis ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares a simple ARC-aware AliasAnalysis using special knowledge +/// of Objective C to enhance other optimization passes which rely on the Alias +/// Analysis infrastructure. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_OBJCARCALIASANALYSIS_H +#define LLVM_ANALYSIS_OBJCARCALIASANALYSIS_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Pass.h" + +namespace llvm { +namespace objcarc { + +/// \brief This is a simple alias analysis implementation that uses knowledge +/// of ARC constructs to answer queries. +/// +/// TODO: This class could be generalized to know about other ObjC-specific +/// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing +/// even though their offsets are dynamic. +class ObjCARCAAResult : public AAResultBase { + friend AAResultBase; + + const DataLayout &DL; + +public: + explicit ObjCARCAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI) + : AAResultBase(TLI), DL(DL) {} + ObjCARCAAResult(ObjCARCAAResult &&Arg) + : AAResultBase(std::move(Arg)), DL(Arg.DL) {} + + /// Handle invalidation events from the new pass manager. + /// + /// By definition, this result is stateless and so remains valid. + bool invalidate(Function &, const PreservedAnalyses &) { return false; } + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal); + + using AAResultBase::getModRefBehavior; + FunctionModRefBehavior getModRefBehavior(const Function *F); + + using AAResultBase::getModRefInfo; + ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc); +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +class ObjCARCAA { +public: + typedef ObjCARCAAResult Result; + + /// \brief Opaque, unique identifier for this analysis pass. + static void *ID() { return (void *)&PassID; } + + ObjCARCAAResult run(Function &F, AnalysisManager *AM); + + /// \brief Provide access to a name for this pass for debugging purposes. + static StringRef name() { return "ObjCARCAA"; } + +private: + static char PassID; +}; + +/// Legacy wrapper pass to provide the ObjCARCAAResult object. +class ObjCARCAAWrapperPass : public ImmutablePass { + std::unique_ptr Result; + +public: + static char ID; + + ObjCARCAAWrapperPass(); + + ObjCARCAAResult &getResult() { return *Result; } + const ObjCARCAAResult &getResult() const { return *Result; } + + bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +} // namespace objcarc +} // namespace llvm + +#endif diff --git a/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/include/llvm/Analysis/ObjCARCAnalysisUtils.h new file mode 100644 index 000000000000..29d99c9d316d --- /dev/null +++ b/include/llvm/Analysis/ObjCARCAnalysisUtils.h @@ -0,0 +1,287 @@ +//===- ObjCARCAnalysisUtils.h - ObjC ARC Analysis Utilities -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines common analysis utilities used by the ObjC ARC Optimizer. +/// ARC stands for Automatic Reference Counting and is a system for managing +/// reference counts for objects in Objective C. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H +#define LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H + +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Optional.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ObjCARCInstKind.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" + +namespace llvm { +class raw_ostream; +} + +namespace llvm { +namespace objcarc { + +/// \brief A handy option to enable/disable all ARC Optimizations. +extern bool EnableARCOpts; + +/// \brief Test if the given module looks interesting to run ARC optimization +/// on. +inline bool ModuleHasARC(const Module &M) { + return + M.getNamedValue("objc_retain") || + M.getNamedValue("objc_release") || + M.getNamedValue("objc_autorelease") || + M.getNamedValue("objc_retainAutoreleasedReturnValue") || + M.getNamedValue("objc_retainBlock") || + M.getNamedValue("objc_autoreleaseReturnValue") || + M.getNamedValue("objc_autoreleasePoolPush") || + M.getNamedValue("objc_loadWeakRetained") || + M.getNamedValue("objc_loadWeak") || + M.getNamedValue("objc_destroyWeak") || + M.getNamedValue("objc_storeWeak") || + M.getNamedValue("objc_initWeak") || + M.getNamedValue("objc_moveWeak") || + M.getNamedValue("objc_copyWeak") || + M.getNamedValue("objc_retainedObject") || + M.getNamedValue("objc_unretainedObject") || + M.getNamedValue("objc_unretainedPointer") || + M.getNamedValue("clang.arc.use"); +} + +/// \brief This is a wrapper around getUnderlyingObject which also knows how to +/// look through objc_retain and objc_autorelease calls, which we know to return +/// their argument verbatim. +inline const Value *GetUnderlyingObjCPtr(const Value *V, + const DataLayout &DL) { + for (;;) { + V = GetUnderlyingObject(V, DL); + if (!IsForwarding(GetBasicARCInstKind(V))) + break; + V = cast(V)->getArgOperand(0); + } + + return V; +} + +/// The RCIdentity root of a value \p V is a dominating value U for which +/// retaining or releasing U is equivalent to retaining or releasing V. In other +/// words, ARC operations on \p V are equivalent to ARC operations on \p U. +/// +/// We use this in the ARC optimizer to make it easier to match up ARC +/// operations by always mapping ARC operations to RCIdentityRoots instead of +/// pointers themselves. +/// +/// The two ways that we see RCIdentical values in ObjC are via: +/// +/// 1. PointerCasts +/// 2. Forwarding Calls that return their argument verbatim. +/// +/// Thus this function strips off pointer casts and forwarding calls. *NOTE* +/// This implies that two RCIdentical values must alias. +inline const Value *GetRCIdentityRoot(const Value *V) { + for (;;) { + V = V->stripPointerCasts(); + if (!IsForwarding(GetBasicARCInstKind(V))) + break; + V = cast(V)->getArgOperand(0); + } + return V; +} + +/// Helper which calls const Value *GetRCIdentityRoot(const Value *V) and just +/// casts away the const of the result. For documentation about what an +/// RCIdentityRoot (and by extension GetRCIdentityRoot is) look at that +/// function. +inline Value *GetRCIdentityRoot(Value *V) { + return const_cast(GetRCIdentityRoot((const Value *)V)); +} + +/// \brief Assuming the given instruction is one of the special calls such as +/// objc_retain or objc_release, return the RCIdentity root of the argument of +/// the call. +inline Value *GetArgRCIdentityRoot(Value *Inst) { + return GetRCIdentityRoot(cast(Inst)->getArgOperand(0)); +} + +inline bool IsNullOrUndef(const Value *V) { + return isa(V) || isa(V); +} + +inline bool IsNoopInstruction(const Instruction *I) { + return isa(I) || + (isa(I) && + cast(I)->hasAllZeroIndices()); +} + +/// \brief Test whether the given value is possible a retainable object pointer. +inline bool IsPotentialRetainableObjPtr(const Value *Op) { + // Pointers to static or stack storage are not valid retainable object + // pointers. + if (isa(Op) || isa(Op)) + return false; + // Special arguments can not be a valid retainable object pointer. + if (const Argument *Arg = dyn_cast(Op)) + if (Arg->hasByValAttr() || + Arg->hasInAllocaAttr() || + Arg->hasNestAttr() || + Arg->hasStructRetAttr()) + return false; + // Only consider values with pointer types. + // + // It seemes intuitive to exclude function pointer types as well, since + // functions are never retainable object pointers, however clang occasionally + // bitcasts retainable object pointers to function-pointer type temporarily. + PointerType *Ty = dyn_cast(Op->getType()); + if (!Ty) + return false; + // Conservatively assume anything else is a potential retainable object + // pointer. + return true; +} + +inline bool IsPotentialRetainableObjPtr(const Value *Op, + AliasAnalysis &AA) { + // First make the rudimentary check. + if (!IsPotentialRetainableObjPtr(Op)) + return false; + + // Objects in constant memory are not reference-counted. + if (AA.pointsToConstantMemory(Op)) + return false; + + // Pointers in constant memory are not pointing to reference-counted objects. + if (const LoadInst *LI = dyn_cast(Op)) + if (AA.pointsToConstantMemory(LI->getPointerOperand())) + return false; + + // Otherwise assume the worst. + return true; +} + +/// \brief Helper for GetARCInstKind. Determines what kind of construct CS +/// is. +inline ARCInstKind GetCallSiteClass(ImmutableCallSite CS) { + for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) + if (IsPotentialRetainableObjPtr(*I)) + return CS.onlyReadsMemory() ? ARCInstKind::User : ARCInstKind::CallOrUser; + + return CS.onlyReadsMemory() ? ARCInstKind::None : ARCInstKind::Call; +} + +/// \brief Return true if this value refers to a distinct and identifiable +/// object. +/// +/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses +/// special knowledge of ObjC conventions. +inline bool IsObjCIdentifiedObject(const Value *V) { + // Assume that call results and arguments have their own "provenance". + // Constants (including GlobalVariables) and Allocas are never + // reference-counted. + if (isa(V) || isa(V) || + isa(V) || isa(V) || + isa(V)) + return true; + + if (const LoadInst *LI = dyn_cast(V)) { + const Value *Pointer = + GetRCIdentityRoot(LI->getPointerOperand()); + if (const GlobalVariable *GV = dyn_cast(Pointer)) { + // A constant pointer can't be pointing to an object on the heap. It may + // be reference-counted, but it won't be deleted. + if (GV->isConstant()) + return true; + StringRef Name = GV->getName(); + // These special variables are known to hold values which are not + // reference-counted pointers. + if (Name.startswith("\01l_objc_msgSend_fixup_")) + return true; + + StringRef Section = GV->getSection(); + if (Section.find("__message_refs") != StringRef::npos || + Section.find("__objc_classrefs") != StringRef::npos || + Section.find("__objc_superrefs") != StringRef::npos || + Section.find("__objc_methname") != StringRef::npos || + Section.find("__cstring") != StringRef::npos) + return true; + } + } + + return false; +} + +enum class ARCMDKindID { + ImpreciseRelease, + CopyOnEscape, + NoObjCARCExceptions, +}; + +/// A cache of MDKinds used by various ARC optimizations. +class ARCMDKindCache { + Module *M; + + /// The Metadata Kind for clang.imprecise_release metadata. + llvm::Optional ImpreciseReleaseMDKind; + + /// The Metadata Kind for clang.arc.copy_on_escape metadata. + llvm::Optional CopyOnEscapeMDKind; + + /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata. + llvm::Optional NoObjCARCExceptionsMDKind; + +public: + void init(Module *Mod) { + M = Mod; + ImpreciseReleaseMDKind = NoneType::None; + CopyOnEscapeMDKind = NoneType::None; + NoObjCARCExceptionsMDKind = NoneType::None; + } + + unsigned get(ARCMDKindID ID) { + switch (ID) { + case ARCMDKindID::ImpreciseRelease: + if (!ImpreciseReleaseMDKind) + ImpreciseReleaseMDKind = + M->getContext().getMDKindID("clang.imprecise_release"); + return *ImpreciseReleaseMDKind; + case ARCMDKindID::CopyOnEscape: + if (!CopyOnEscapeMDKind) + CopyOnEscapeMDKind = + M->getContext().getMDKindID("clang.arc.copy_on_escape"); + return *CopyOnEscapeMDKind; + case ARCMDKindID::NoObjCARCExceptions: + if (!NoObjCARCExceptionsMDKind) + NoObjCARCExceptionsMDKind = + M->getContext().getMDKindID("clang.arc.no_objc_arc_exceptions"); + return *NoObjCARCExceptionsMDKind; + } + llvm_unreachable("Covered switch isn't covered?!"); + } +}; + +} // end namespace objcarc +} // end namespace llvm + +#endif diff --git a/lib/Transforms/ObjCARC/ARCInstKind.h b/include/llvm/Analysis/ObjCARCInstKind.h similarity index 94% rename from lib/Transforms/ObjCARC/ARCInstKind.h rename to include/llvm/Analysis/ObjCARCInstKind.h index 636c65c9b627..13efb4b160be 100644 --- a/lib/Transforms/ObjCARC/ARCInstKind.h +++ b/include/llvm/Analysis/ObjCARCInstKind.h @@ -1,4 +1,4 @@ -//===--- ARCInstKind.h - ARC instruction equivalence classes -*- C++ -*----===// +//===- ObjCARCInstKind.h - ARC instruction equivalence classes --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_ARCINSTKIND_H -#define LLVM_LIB_TRANSFORMS_OBJCARC_ARCINSTKIND_H +#ifndef LLVM_ANALYSIS_OBJCARCINSTKIND_H +#define LLVM_ANALYSIS_OBJCARCINSTKIND_H #include "llvm/IR/Instructions.h" #include "llvm/IR/Function.h" @@ -98,7 +98,7 @@ ARCInstKind GetFunctionClass(const Function *F); /// This is similar to GetARCInstKind except that it only detects objc /// runtime calls. This allows it to be faster. /// -static inline ARCInstKind GetBasicARCInstKind(const Value *V) { +inline ARCInstKind GetBasicARCInstKind(const Value *V) { if (const CallInst *CI = dyn_cast(V)) { if (const Function *F = CI->getCalledFunction()) return GetFunctionClass(F); diff --git a/include/llvm/Analysis/OrderedBasicBlock.h b/include/llvm/Analysis/OrderedBasicBlock.h new file mode 100644 index 000000000000..5aa813eb4832 --- /dev/null +++ b/include/llvm/Analysis/OrderedBasicBlock.h @@ -0,0 +1,66 @@ +//===- llvm/Analysis/OrderedBasicBlock.h --------------------- -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the OrderedBasicBlock class. OrderedBasicBlock maintains +// an interface where clients can query if one instruction comes before another +// in a BasicBlock. Since BasicBlock currently lacks a reliable way to query +// relative position between instructions one can use OrderedBasicBlock to do +// such queries. OrderedBasicBlock is lazily built on a source BasicBlock and +// maintains an internal Instruction -> Position map. A OrderedBasicBlock +// instance should be discarded whenever the source BasicBlock changes. +// +// It's currently used by the CaptureTracker in order to find relative +// positions of a pair of instructions inside a BasicBlock. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_ORDEREDBASICBLOCK_H +#define LLVM_ANALYSIS_ORDEREDBASICBLOCK_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/BasicBlock.h" + +namespace llvm { + +class Instruction; +class BasicBlock; + +class OrderedBasicBlock { +private: + /// \brief Map a instruction to its position in a BasicBlock. + SmallDenseMap NumberedInsts; + + /// \brief Keep track of last instruction inserted into \p NumberedInsts. + /// It speeds up queries for uncached instructions by providing a start point + /// for new queries in OrderedBasicBlock::comesBefore. + BasicBlock::const_iterator LastInstFound; + + /// \brief The position/number to tag the next instruction to be found. + unsigned NextInstPos; + + /// \brief The source BasicBlock to map. + const BasicBlock *BB; + + /// \brief Given no cached results, find if \p A comes before \p B in \p BB. + /// Cache and number out instruction while walking \p BB. + bool comesBefore(const Instruction *A, const Instruction *B); + +public: + OrderedBasicBlock(const BasicBlock *BasicB); + + /// \brief Find out whether \p A dominates \p B, meaning whether \p A + /// comes before \p B in \p BB. This is a simplification that considers + /// cached instruction positions and ignores other basic blocks, being + /// only relevant to compare relative instructions positions inside \p BB. + bool dominates(const Instruction *A, const Instruction *B); +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/Analysis/PHITransAddr.h b/include/llvm/Analysis/PHITransAddr.h index cbdbb88f7407..f0f34f3a51f5 100644 --- a/include/llvm/Analysis/PHITransAddr.h +++ b/include/llvm/Analysis/PHITransAddr.h @@ -48,6 +48,7 @@ class PHITransAddr { /// InstInputs - The inputs for our symbolic address. SmallVector InstInputs; + public: PHITransAddr(Value *addr, const DataLayout &DL, AssumptionCache *AC) : Addr(addr), DL(DL), TLI(nullptr), AC(AC) { @@ -55,9 +56,9 @@ public: if (Instruction *I = dyn_cast(Addr)) InstInputs.push_back(I); } - + Value *getAddr() const { return Addr; } - + /// NeedsPHITranslationFromBlock - Return true if moving from the specified /// BasicBlock to its predecessors requires PHI translation. bool NeedsPHITranslationFromBlock(BasicBlock *BB) const { @@ -68,12 +69,12 @@ public: return true; return false; } - + /// IsPotentiallyPHITranslatable - If this needs PHI translation, return true /// if we have some hope of doing it. This should be used as a filter to /// avoid calling PHITranslateValue in hopeless situations. bool IsPotentiallyPHITranslatable() const; - + /// PHITranslateValue - PHI translate the current address up the CFG from /// CurBB to Pred, updating our state to reflect any needed changes. If /// 'MustDominate' is true, the translated value must dominate @@ -90,18 +91,19 @@ public: /// Value *PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree &DT, - SmallVectorImpl &NewInsts); - + SmallVectorImpl &NewInsts); + void dump() const; - + /// Verify - Check internal consistency of this data structure. If the /// structure is valid, it returns true. If invalid, it prints errors and /// returns false. bool Verify() const; + private: Value *PHITranslateSubExpr(Value *V, BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT); - + /// InsertPHITranslatedSubExpr - Insert a computation of the PHI translated /// version of 'V' for the edge PredBB->CurBB into the end of the PredBB /// block. All newly created instructions are added to the NewInsts list. @@ -109,8 +111,8 @@ private: /// Value *InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree &DT, - SmallVectorImpl &NewInsts); - + SmallVectorImpl &NewInsts); + /// AddAsInput - If the specified value is an instruction, add it as an input. Value *AddAsInput(Value *V) { // If V is an instruction, it is now an input. @@ -118,7 +120,6 @@ private: InstInputs.push_back(VI); return V; } - }; } // end namespace llvm diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h index d112ab1823b4..da17457d3446 100644 --- a/include/llvm/Analysis/Passes.h +++ b/include/llvm/Analysis/Passes.h @@ -22,27 +22,6 @@ namespace llvm { class ModulePass; class Pass; class PassInfo; - class LibCallInfo; - - //===--------------------------------------------------------------------===// - // - // createGlobalsModRefPass - This pass provides alias and mod/ref info for - // global values that do not have their addresses taken. - // - Pass *createGlobalsModRefPass(); - - //===--------------------------------------------------------------------===// - // - // createAliasDebugger - This pass helps debug clients of AA - // - Pass *createAliasDebugger(); - - //===--------------------------------------------------------------------===// - // - // createAliasAnalysisCounterPass - This pass counts alias queries and how the - // alias analysis implementation responds. - // - ModulePass *createAliasAnalysisCounterPass(); //===--------------------------------------------------------------------===// // @@ -53,59 +32,10 @@ namespace llvm { //===--------------------------------------------------------------------===// // - // createNoAAPass - This pass implements a "I don't know" alias analysis. - // - ImmutablePass *createNoAAPass(); - - //===--------------------------------------------------------------------===// - // - // createBasicAliasAnalysisPass - This pass implements the stateless alias - // analysis. - // - ImmutablePass *createBasicAliasAnalysisPass(); - - //===--------------------------------------------------------------------===// - // - // createCFLAliasAnalysisPass - This pass implements a set-based approach to + // createObjCARCAAWrapperPass - This pass implements ObjC-ARC-based // alias analysis. // - ImmutablePass *createCFLAliasAnalysisPass(); - - //===--------------------------------------------------------------------===// - // - /// createLibCallAliasAnalysisPass - Create an alias analysis pass that knows - /// about the semantics of a set of libcalls specified by LCI. The newly - /// constructed pass takes ownership of the pointer that is provided. - /// - FunctionPass *createLibCallAliasAnalysisPass(LibCallInfo *LCI); - - //===--------------------------------------------------------------------===// - // - // createScalarEvolutionAliasAnalysisPass - This pass implements a simple - // alias analysis using ScalarEvolution queries. - // - FunctionPass *createScalarEvolutionAliasAnalysisPass(); - - //===--------------------------------------------------------------------===// - // - // createTypeBasedAliasAnalysisPass - This pass implements metadata-based - // type-based alias analysis. - // - ImmutablePass *createTypeBasedAliasAnalysisPass(); - - //===--------------------------------------------------------------------===// - // - // createScopedNoAliasAAPass - This pass implements metadata-based - // scoped noalias analysis. - // - ImmutablePass *createScopedNoAliasAAPass(); - - //===--------------------------------------------------------------------===// - // - // createObjCARCAliasAnalysisPass - This pass implements ObjC-ARC-based - // alias analysis. - // - ImmutablePass *createObjCARCAliasAnalysisPass(); + ImmutablePass *createObjCARCAAWrapperPass(); FunctionPass *createPAEvalPass(); diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h index 8560f1f67160..4988386fdc82 100644 --- a/include/llvm/Analysis/RegionInfo.h +++ b/include/llvm/Analysis/RegionInfo.h @@ -47,7 +47,7 @@ namespace llvm { -// RegionTraits - Class to be specialized for different users of RegionInfo +// Class to be specialized for different users of RegionInfo // (i.e. BasicBlocks or MachineBasicBlocks). This is only to avoid needing to // pass around an unreasonable number of template parameters. template @@ -282,17 +282,16 @@ class RegionBase : public RegionNodeBase { // Save the BasicBlock RegionNodes that are element of this Region. mutable BBNodeMapT BBNodeMap; - /// verifyBBInRegion - Check if a BB is in this Region. This check also works + /// Check if a BB is in this Region. This check also works /// if the region is incorrectly built. (EXPENSIVE!) void verifyBBInRegion(BlockT *BB) const; - /// verifyWalk - Walk over all the BBs of the region starting from BB and + /// Walk over all the BBs of the region starting from BB and /// verify that all reachable basic blocks are elements of the region. /// (EXPENSIVE!) void verifyWalk(BlockT *BB, std::set *visitedBB) const; - /// verifyRegionNest - Verify if the region and its children are valid - /// regions (EXPENSIVE!) + /// Verify if the region and its children are valid regions (EXPENSIVE!) void verifyRegionNest() const; public: @@ -688,45 +687,50 @@ private: /// Map every BB to the smallest region, that contains BB. BBtoRegionMap BBtoRegion; - // isCommonDomFrontier - Returns true if BB is in the dominance frontier of + // Check whether the entries of BBtoRegion for the BBs of region + // SR are correct. Triggers an assertion if not. Calls itself recursively for + // subregions. + void verifyBBMap(const RegionT *SR) const; + + // Returns true if BB is in the dominance frontier of // entry, because it was inherited from exit. In the other case there is an // edge going from entry to BB without passing exit. bool isCommonDomFrontier(BlockT *BB, BlockT *entry, BlockT *exit) const; - // isRegion - Check if entry and exit surround a valid region, based on + // Check if entry and exit surround a valid region, based on // dominance tree and dominance frontier. bool isRegion(BlockT *entry, BlockT *exit) const; - // insertShortCut - Saves a shortcut pointing from entry to exit. + // Saves a shortcut pointing from entry to exit. // This function may extend this shortcut if possible. void insertShortCut(BlockT *entry, BlockT *exit, BBtoBBMap *ShortCut) const; - // getNextPostDom - Returns the next BB that postdominates N, while skipping + // Returns the next BB that postdominates N, while skipping // all post dominators that cannot finish a canonical region. DomTreeNodeT *getNextPostDom(DomTreeNodeT *N, BBtoBBMap *ShortCut) const; - // isTrivialRegion - A region is trivial, if it contains only one BB. + // A region is trivial, if it contains only one BB. bool isTrivialRegion(BlockT *entry, BlockT *exit) const; - // createRegion - Creates a single entry single exit region. + // Creates a single entry single exit region. RegionT *createRegion(BlockT *entry, BlockT *exit); - // findRegionsWithEntry - Detect all regions starting with bb 'entry'. + // Detect all regions starting with bb 'entry'. void findRegionsWithEntry(BlockT *entry, BBtoBBMap *ShortCut); - // scanForRegions - Detects regions in F. + // Detects regions in F. void scanForRegions(FuncT &F, BBtoBBMap *ShortCut); - // getTopMostParent - Get the top most parent with the same entry block. + // Get the top most parent with the same entry block. RegionT *getTopMostParent(RegionT *region); - // buildRegionsTree - build the region hierarchy after all region detected. + // Build the region hierarchy after all region detected. void buildRegionsTree(DomTreeNodeT *N, RegionT *region); - // updateStatistics - Update statistic about created regions. + // Update statistic about created regions. virtual void updateStatistics(RegionT *R) = 0; - // calculate - detect all regions in function and build the region tree. + // Detect all regions in function and build the region tree. void calculate(FuncT &F); public: @@ -796,12 +800,6 @@ public: RegionT *getTopLevelRegion() const { return TopLevelRegion; } - /// @brief Update RegionInfo after a basic block was split. - /// - /// @param NewBB The basic block that was created before OldBB. - /// @param OldBB The old basic block. - void splitBlock(BlockT *NewBB, BlockT *OldBB); - /// @brief Clear the Node Cache for all Regions. /// /// @see Region::clearNodeCache() @@ -847,6 +845,19 @@ public: void recalculate(Function &F, DominatorTree *DT, PostDominatorTree *PDT, DominanceFrontier *DF); + +#ifndef NDEBUG + /// @brief Opens a viewer to show the GraphViz visualization of the regions. + /// + /// Useful during debugging as an alternative to dump(). + void view(); + + /// @brief Opens a viewer to show the GraphViz visualization of this region + /// without instructions in the BasicBlocks. + /// + /// Useful during debugging as an alternative to dump(). + void viewOnly(); +#endif }; class RegionInfoPass : public FunctionPass { diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h index b31eefc15f78..134cd8f96fbe 100644 --- a/include/llvm/Analysis/RegionInfoImpl.h +++ b/include/llvm/Analysis/RegionInfoImpl.h @@ -236,7 +236,7 @@ std::string RegionBase::getNameStr() const { template void RegionBase::verifyBBInRegion(BlockT *BB) const { if (!contains(BB)) - llvm_unreachable("Broken region found!"); + llvm_unreachable("Broken region found: enumerated BB not in region!"); BlockT *entry = getEntry(), *exit = getExit(); @@ -244,7 +244,8 @@ void RegionBase::verifyBBInRegion(BlockT *BB) const { SE = BlockTraits::child_end(BB); SI != SE; ++SI) { if (!contains(*SI) && exit != *SI) - llvm_unreachable("Broken region found!"); + llvm_unreachable("Broken region found: edges leaving the region must go " + "to the exit node!"); } if (entry != BB) { @@ -252,7 +253,8 @@ void RegionBase::verifyBBInRegion(BlockT *BB) const { SE = InvBlockTraits::child_end(BB); SI != SE; ++SI) { if (!contains(*SI)) - llvm_unreachable("Broken region found!"); + llvm_unreachable("Broken region found: edges entering the region must " + "go to the entry node!"); } } } @@ -442,16 +444,14 @@ typename Tr::RegionT *RegionBase::getExpandedRegion() const { if (NumSuccessors == 0) return nullptr; - for (PredIterTy PI = InvBlockTraits::child_begin(getExit()), - PE = InvBlockTraits::child_end(getExit()); - PI != PE; ++PI) { - if (!DT->dominates(getEntry(), *PI)) - return nullptr; - } - RegionT *R = RI->getRegionFor(exit); if (R->getEntry() != exit) { + for (PredIterTy PI = InvBlockTraits::child_begin(getExit()), + PE = InvBlockTraits::child_end(getExit()); + PI != PE; ++PI) + if (!contains(*PI)) + return nullptr; if (Tr::getNumSuccessors(exit) == 1) return new RegionT(getEntry(), *BlockTraits::child_begin(exit), RI, DT); return nullptr; @@ -460,13 +460,11 @@ typename Tr::RegionT *RegionBase::getExpandedRegion() const { while (R->getParent() && R->getParent()->getEntry() == exit) R = R->getParent(); - if (!DT->dominates(getEntry(), R->getExit())) { - for (PredIterTy PI = InvBlockTraits::child_begin(getExit()), - PE = InvBlockTraits::child_end(getExit()); - PI != PE; ++PI) { - if (!DT->dominates(R->getExit(), *PI)) - return nullptr; - } + for (PredIterTy PI = InvBlockTraits::child_begin(getExit()), + PE = InvBlockTraits::child_end(getExit()); + PI != PE; ++PI) { + if (!(contains(*PI) || R->contains(*PI))) + return nullptr; } return new RegionT(getEntry(), R->getExit(), RI, DT); @@ -541,6 +539,21 @@ RegionInfoBase::~RegionInfoBase() { releaseMemory(); } +template +void RegionInfoBase::verifyBBMap(const RegionT *R) const { + assert(R && "Re must be non-null"); + for (auto I = R->element_begin(), E = R->element_end(); I != E; ++I) { + if (I->isSubRegion()) { + const RegionT *SR = I->template getNodeAs(); + verifyBBMap(SR); + } else { + BlockT *BB = I->template getNodeAs(); + if (getRegionFor(BB) != R) + llvm_unreachable("BB map does not match region nesting"); + } + } +} + template bool RegionInfoBase::isCommonDomFrontier(BlockT *BB, BlockT *entry, BlockT *exit) const { @@ -786,7 +799,14 @@ void RegionInfoBase::releaseMemory() { template void RegionInfoBase::verifyAnalysis() const { + // Do only verify regions if explicitely activated using XDEBUG or + // -verify-region-info + if (!RegionInfoBase::VerifyRegionInfo) + return; + TopLevelRegion->verifyRegionNest(); + + verifyBBMap(TopLevelRegion); } // Region pass manager support. @@ -886,20 +906,6 @@ RegionInfoBase::getCommonRegion(SmallVectorImpl &BBs) const { return ret; } -template -void RegionInfoBase::splitBlock(BlockT *NewBB, BlockT *OldBB) { - RegionT *R = getRegionFor(OldBB); - - setRegionFor(NewBB, R); - - while (R->getEntry() == OldBB && !R->isTopLevelRegion()) { - R->replaceEntry(NewBB); - R = R->getParent(); - } - - setRegionFor(OldBB, R); -} - template void RegionInfoBase::calculate(FuncT &F) { typedef typename std::add_pointer::type FuncPtrT; diff --git a/include/llvm/Analysis/RegionPrinter.h b/include/llvm/Analysis/RegionPrinter.h index 758748aad9e6..8f0035cfd8e6 100644 --- a/include/llvm/Analysis/RegionPrinter.h +++ b/include/llvm/Analysis/RegionPrinter.h @@ -17,10 +17,55 @@ namespace llvm { class FunctionPass; + class Function; + class RegionInfo; + FunctionPass *createRegionViewerPass(); FunctionPass *createRegionOnlyViewerPass(); FunctionPass *createRegionPrinterPass(); FunctionPass *createRegionOnlyPrinterPass(); + +#ifndef NDEBUG + /// @brief Open a viewer to display the GraphViz vizualization of the analysis + /// result. + /// + /// Practical to call in the debugger. + /// Includes the instructions in each BasicBlock. + /// + /// @param RI The analysis to display. + void viewRegion(llvm::RegionInfo *RI); + + /// @brief Analyze the regions of a function and open its GraphViz + /// visualization in a viewer. + /// + /// Useful to call in the debugger. + /// Includes the instructions in each BasicBlock. + /// The result of a new analysis may differ from the RegionInfo the pass + /// manager currently holds. + /// + /// @param F Function to analyze. + void viewRegion(const llvm::Function *F); + + /// @brief Open a viewer to display the GraphViz vizualization of the analysis + /// result. + /// + /// Useful to call in the debugger. + /// Shows only the BasicBlock names without their instructions. + /// + /// @param RI The analysis to display. + void viewRegionOnly(llvm::RegionInfo *RI); + + /// @brief Analyze the regions of a function and open its GraphViz + /// visualization in a viewer. + /// + /// Useful to call in the debugger. + /// Shows only the BasicBlock names without their instructions. + /// The result of a new analysis may differ from the RegionInfo the pass + /// manager currently holds. + /// + /// @param F Function to analyze. + void viewRegionOnly(const llvm::Function *F); +#endif } // End llvm namespace #endif diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index d47cab829ced..c08335de3e7d 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -23,10 +23,12 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/Allocator.h" @@ -44,30 +46,33 @@ namespace llvm { class DataLayout; class TargetLibraryInfo; class LLVMContext; - class Loop; - class LoopInfo; class Operator; - class SCEVUnknown; class SCEV; - template<> struct FoldingSetTrait; + class SCEVAddRecExpr; + class SCEVConstant; + class SCEVExpander; + class SCEVPredicate; + class SCEVUnknown; - /// SCEV - This class represents an analyzed expression in the program. These - /// are opaque objects that the client is not allowed to do much with - /// directly. + template <> struct FoldingSetTrait; + template <> struct FoldingSetTrait; + + /// This class represents an analyzed expression in the program. These are + /// opaque objects that the client is not allowed to do much with directly. /// class SCEV : public FoldingSetNode { friend struct FoldingSetTrait; - /// FastID - A reference to an Interned FoldingSetNodeID for this node. - /// The ScalarEvolution's BumpPtrAllocator holds the data. + /// A reference to an Interned FoldingSetNodeID for this node. The + /// ScalarEvolution's BumpPtrAllocator holds the data. FoldingSetNodeIDRef FastID; // The SCEV baseclass this node corresponds to const unsigned short SCEVType; protected: - /// SubclassData - This field is initialized to zero and may be used in - /// subclasses to store miscellaneous information. + /// This field is initialized to zero and may be used in subclasses to store + /// miscellaneous information. unsigned short SubclassData; private: @@ -104,37 +109,32 @@ namespace llvm { unsigned getSCEVType() const { return SCEVType; } - /// getType - Return the LLVM type of this SCEV expression. + /// Return the LLVM type of this SCEV expression. /// Type *getType() const; - /// isZero - Return true if the expression is a constant zero. + /// Return true if the expression is a constant zero. /// bool isZero() const; - /// isOne - Return true if the expression is a constant one. + /// Return true if the expression is a constant one. /// bool isOne() const; - /// isAllOnesValue - Return true if the expression is a constant - /// all-ones value. + /// Return true if the expression is a constant all-ones value. /// bool isAllOnesValue() const; - /// isNonConstantNegative - Return true if the specified scev is negated, - /// but not a constant. + /// Return true if the specified scev is negated, but not a constant. bool isNonConstantNegative() const; - /// print - Print out the internal representation of this scalar to the - /// specified stream. This should really only be used for debugging - /// purposes. + /// Print out the internal representation of this scalar to the specified + /// stream. This should really only be used for debugging purposes. void print(raw_ostream &OS) const; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// dump - This method is used for debugging. + /// This method is used for debugging. /// void dump() const; -#endif }; // Specialize FoldingSetTrait for SCEV to avoid needing to compute @@ -157,11 +157,10 @@ namespace llvm { return OS; } - /// SCEVCouldNotCompute - An object of this class is returned by queries that - /// could not be answered. For example, if you ask for the number of - /// iterations of a linked-list traversal loop, you will get one of these. - /// None of the standard SCEV operations are valid on this class, it is just a - /// marker. + /// An object of this class is returned by queries that could not be answered. + /// For example, if you ask for the number of iterations of a linked-list + /// traversal loop, you will get one of these. None of the standard SCEV + /// operations are valid on this class, it is just a marker. struct SCEVCouldNotCompute : public SCEV { SCEVCouldNotCompute(); @@ -169,22 +168,162 @@ namespace llvm { static bool classof(const SCEV *S); }; - /// ScalarEvolution - This class is the main scalar evolution driver. Because - /// client code (intentionally) can't do much with the SCEV objects directly, - /// they must ask this class for services. - /// - class ScalarEvolution : public FunctionPass { + /// SCEVPredicate - This class represents an assumption made using SCEV + /// expressions which can be checked at run-time. + class SCEVPredicate : public FoldingSetNode { + friend struct FoldingSetTrait; + + /// A reference to an Interned FoldingSetNodeID for this node. The + /// ScalarEvolution's BumpPtrAllocator holds the data. + FoldingSetNodeIDRef FastID; + public: - /// LoopDisposition - An enum describing the relationship between a - /// SCEV and a loop. + enum SCEVPredicateKind { P_Union, P_Equal }; + + protected: + SCEVPredicateKind Kind; + ~SCEVPredicate() = default; + SCEVPredicate(const SCEVPredicate&) = default; + SCEVPredicate &operator=(const SCEVPredicate&) = default; + + public: + SCEVPredicate(const FoldingSetNodeIDRef ID, SCEVPredicateKind Kind); + + SCEVPredicateKind getKind() const { return Kind; } + + /// \brief Returns the estimated complexity of this predicate. + /// This is roughly measured in the number of run-time checks required. + virtual unsigned getComplexity() const { return 1; } + + /// \brief Returns true if the predicate is always true. This means that no + /// assumptions were made and nothing needs to be checked at run-time. + virtual bool isAlwaysTrue() const = 0; + + /// \brief Returns true if this predicate implies \p N. + virtual bool implies(const SCEVPredicate *N) const = 0; + + /// \brief Prints a textual representation of this predicate with an + /// indentation of \p Depth. + virtual void print(raw_ostream &OS, unsigned Depth = 0) const = 0; + + /// \brief Returns the SCEV to which this predicate applies, or nullptr + /// if this is a SCEVUnionPredicate. + virtual const SCEV *getExpr() const = 0; + }; + + inline raw_ostream &operator<<(raw_ostream &OS, const SCEVPredicate &P) { + P.print(OS); + return OS; + } + + // Specialize FoldingSetTrait for SCEVPredicate to avoid needing to compute + // temporary FoldingSetNodeID values. + template <> + struct FoldingSetTrait + : DefaultFoldingSetTrait { + + static void Profile(const SCEVPredicate &X, FoldingSetNodeID &ID) { + ID = X.FastID; + } + + static bool Equals(const SCEVPredicate &X, const FoldingSetNodeID &ID, + unsigned IDHash, FoldingSetNodeID &TempID) { + return ID == X.FastID; + } + static unsigned ComputeHash(const SCEVPredicate &X, + FoldingSetNodeID &TempID) { + return X.FastID.ComputeHash(); + } + }; + + /// SCEVEqualPredicate - This class represents an assumption that two SCEV + /// expressions are equal, and this can be checked at run-time. We assume + /// that the left hand side is a SCEVUnknown and the right hand side a + /// constant. + class SCEVEqualPredicate final : public SCEVPredicate { + /// We assume that LHS == RHS, where LHS is a SCEVUnknown and RHS a + /// constant. + const SCEVUnknown *LHS; + const SCEVConstant *RHS; + + public: + SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEVUnknown *LHS, + const SCEVConstant *RHS); + + /// Implementation of the SCEVPredicate interface + bool implies(const SCEVPredicate *N) const override; + void print(raw_ostream &OS, unsigned Depth = 0) const override; + bool isAlwaysTrue() const override; + const SCEV *getExpr() const override; + + /// \brief Returns the left hand side of the equality. + const SCEVUnknown *getLHS() const { return LHS; } + + /// \brief Returns the right hand side of the equality. + const SCEVConstant *getRHS() const { return RHS; } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const SCEVPredicate *P) { + return P->getKind() == P_Equal; + } + }; + + /// SCEVUnionPredicate - This class represents a composition of other + /// SCEV predicates, and is the class that most clients will interact with. + /// This is equivalent to a logical "AND" of all the predicates in the union. + class SCEVUnionPredicate final : public SCEVPredicate { + private: + typedef DenseMap> + PredicateMap; + + /// Vector with references to all predicates in this union. + SmallVector Preds; + /// Maps SCEVs to predicates for quick look-ups. + PredicateMap SCEVToPreds; + + public: + SCEVUnionPredicate(); + + const SmallVectorImpl &getPredicates() const { + return Preds; + } + + /// \brief Adds a predicate to this union. + void add(const SCEVPredicate *N); + + /// \brief Returns a reference to a vector containing all predicates + /// which apply to \p Expr. + ArrayRef getPredicatesForExpr(const SCEV *Expr); + + /// Implementation of the SCEVPredicate interface + bool isAlwaysTrue() const override; + bool implies(const SCEVPredicate *N) const override; + void print(raw_ostream &OS, unsigned Depth) const override; + const SCEV *getExpr() const override; + + /// \brief We estimate the complexity of a union predicate as the size + /// number of predicates in the union. + unsigned getComplexity() const override { return Preds.size(); } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const SCEVPredicate *P) { + return P->getKind() == P_Union; + } + }; + + /// The main scalar evolution driver. Because client code (intentionally) + /// can't do much with the SCEV objects directly, they must ask this class + /// for services. + class ScalarEvolution { + public: + /// An enum describing the relationship between a SCEV and a loop. enum LoopDisposition { LoopVariant, ///< The SCEV is loop-variant (unknown). LoopInvariant, ///< The SCEV is loop-invariant. LoopComputable ///< The SCEV varies predictably with the loop. }; - /// BlockDisposition - An enum describing the relationship between a - /// SCEV and a basic block. + /// An enum describing the relationship between a SCEV and a basic block. enum BlockDisposition { DoesNotDominateBlock, ///< The SCEV does not dominate the block. DominatesBlock, ///< The SCEV dominates the block. @@ -207,9 +346,9 @@ namespace llvm { } private: - /// SCEVCallbackVH - A CallbackVH to arrange for ScalarEvolution to be - /// notified whenever a Value is deleted. - class SCEVCallbackVH : public CallbackVH { + /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a + /// Value is deleted. + class SCEVCallbackVH final : public CallbackVH { ScalarEvolution *SE; void deleted() override; void allUsesReplacedWith(Value *New) override; @@ -221,35 +360,34 @@ namespace llvm { friend class SCEVExpander; friend class SCEVUnknown; - /// F - The function we are analyzing. + /// The function we are analyzing. /// - Function *F; + Function &F; + + /// The target library information for the target we are targeting. + /// + TargetLibraryInfo &TLI; /// The tracker for @llvm.assume intrinsics in this function. - AssumptionCache *AC; + AssumptionCache &AC; - /// LI - The loop information for the function we are currently analyzing. + /// The dominator tree. /// - LoopInfo *LI; + DominatorTree &DT; - /// TLI - The target library information for the target we are targeting. + /// The loop information for the function we are currently analyzing. /// - TargetLibraryInfo *TLI; + LoopInfo &LI; - /// DT - The dominator tree. - /// - DominatorTree *DT; + /// This SCEV is used to represent unknown trip counts and things. + std::unique_ptr CouldNotCompute; - /// CouldNotCompute - This SCEV is used to represent unknown trip - /// counts and things. - SCEVCouldNotCompute CouldNotCompute; - - /// ValueExprMapType - The typedef for ValueExprMap. + /// The typedef for ValueExprMap. /// typedef DenseMap > ValueExprMapType; - /// ValueExprMap - This is a cache of the values we have analyzed so far. + /// This is a cache of the values we have analyzed so far. /// ValueExprMapType ValueExprMap; @@ -260,10 +398,14 @@ namespace llvm { /// conditions dominating the backedge of a loop. bool WalkingBEDominatingConds; - /// ExitLimit - Information about the number of loop iterations for which a - /// loop exit's branch condition evaluates to the not-taken path. This is a - /// temporary pair of exact and max expressions that are eventually - /// summarized in ExitNotTakenInfo and BackedgeTakenInfo. + /// Set to true by isKnownPredicateViaSplitting when we're trying to prove a + /// predicate by splitting it into a set of independent predicates. + bool ProvingSplitPredicate; + + /// Information about the number of loop iterations for which a loop exit's + /// branch condition evaluates to the not-taken path. This is a temporary + /// pair of exact and max expressions that are eventually summarized in + /// ExitNotTakenInfo and BackedgeTakenInfo. struct ExitLimit { const SCEV *Exact; const SCEV *Max; @@ -272,16 +414,16 @@ namespace llvm { ExitLimit(const SCEV *E, const SCEV *M) : Exact(E), Max(M) {} - /// hasAnyInfo - Test whether this ExitLimit contains any computed - /// information, or whether it's all SCEVCouldNotCompute values. + /// Test whether this ExitLimit contains any computed information, or + /// whether it's all SCEVCouldNotCompute values. bool hasAnyInfo() const { return !isa(Exact) || !isa(Max); } }; - /// ExitNotTakenInfo - Information about the number of times a particular - /// loop exit may be reached before exiting the loop. + /// Information about the number of times a particular loop exit may be + /// reached before exiting the loop. struct ExitNotTakenInfo { AssertingVH ExitingBlock; const SCEV *ExactNotTaken; @@ -289,14 +431,14 @@ namespace llvm { ExitNotTakenInfo() : ExitingBlock(nullptr), ExactNotTaken(nullptr) {} - /// isCompleteList - Return true if all loop exits are computable. + /// Return true if all loop exits are computable. bool isCompleteList() const { return NextExit.getInt() == 0; } void setIncomplete() { NextExit.setInt(1); } - /// getNextExit - Return a pointer to the next exit's not-taken info. + /// Return a pointer to the next exit's not-taken info. ExitNotTakenInfo *getNextExit() const { return NextExit.getPointer(); } @@ -304,16 +446,16 @@ namespace llvm { void setNextExit(ExitNotTakenInfo *ENT) { NextExit.setPointer(ENT); } }; - /// BackedgeTakenInfo - Information about the backedge-taken count - /// of a loop. This currently includes an exact count and a maximum count. + /// Information about the backedge-taken count of a loop. This currently + /// includes an exact count and a maximum count. /// class BackedgeTakenInfo { - /// ExitNotTaken - A list of computable exits and their not-taken counts. - /// Loops almost never have more than one computable exit. + /// A list of computable exits and their not-taken counts. Loops almost + /// never have more than one computable exit. ExitNotTakenInfo ExitNotTaken; - /// Max - An expression indicating the least maximum backedge-taken - /// count of the loop that is known, or a SCEVCouldNotCompute. + /// An expression indicating the least maximum backedge-taken count of the + /// loop that is known, or a SCEVCouldNotCompute. const SCEV *Max; public: @@ -324,80 +466,78 @@ namespace llvm { SmallVectorImpl< std::pair > &ExitCounts, bool Complete, const SCEV *MaxCount); - /// hasAnyInfo - Test whether this BackedgeTakenInfo contains any - /// computed information, or whether it's all SCEVCouldNotCompute - /// values. + /// Test whether this BackedgeTakenInfo contains any computed information, + /// or whether it's all SCEVCouldNotCompute values. bool hasAnyInfo() const { return ExitNotTaken.ExitingBlock || !isa(Max); } - /// getExact - Return an expression indicating the exact backedge-taken - /// count of the loop if it is known, or SCEVCouldNotCompute - /// otherwise. This is the number of times the loop header can be - /// guaranteed to execute, minus one. + /// Return an expression indicating the exact backedge-taken count of the + /// loop if it is known, or SCEVCouldNotCompute otherwise. This is the + /// number of times the loop header can be guaranteed to execute, minus + /// one. const SCEV *getExact(ScalarEvolution *SE) const; - /// getExact - Return the number of times this loop exit may fall through - /// to the back edge, or SCEVCouldNotCompute. The loop is guaranteed not - /// to exit via this block before this number of iterations, but may exit - /// via another block. + /// Return the number of times this loop exit may fall through to the back + /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via + /// this block before this number of iterations, but may exit via another + /// block. const SCEV *getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const; - /// getMax - Get the max backedge taken count for the loop. + /// Get the max backedge taken count for the loop. const SCEV *getMax(ScalarEvolution *SE) const; /// Return true if any backedge taken count expressions refer to the given /// subexpression. bool hasOperand(const SCEV *S, ScalarEvolution *SE) const; - /// clear - Invalidate this result and free associated memory. + /// Invalidate this result and free associated memory. void clear(); }; - /// BackedgeTakenCounts - Cache the backedge-taken count of the loops for - /// this function as they are computed. + /// Cache the backedge-taken count of the loops for this function as they + /// are computed. DenseMap BackedgeTakenCounts; - /// ConstantEvolutionLoopExitValue - This map contains entries for all of - /// the PHI instructions that we attempt to compute constant evolutions for. - /// This allows us to avoid potentially expensive recomputation of these - /// properties. An instruction maps to null if we are unable to compute its - /// exit value. + /// This map contains entries for all of the PHI instructions that we + /// attempt to compute constant evolutions for. This allows us to avoid + /// potentially expensive recomputation of these properties. An instruction + /// maps to null if we are unable to compute its exit value. DenseMap ConstantEvolutionLoopExitValue; - /// ValuesAtScopes - This map contains entries for all the expressions - /// that we attempt to compute getSCEVAtScope information for, which can - /// be expensive in extreme cases. + /// This map contains entries for all the expressions that we attempt to + /// compute getSCEVAtScope information for, which can be expensive in + /// extreme cases. DenseMap, 2> > ValuesAtScopes; - /// LoopDispositions - Memoized computeLoopDisposition results. + /// Memoized computeLoopDisposition results. DenseMap, 2>> LoopDispositions; - /// computeLoopDisposition - Compute a LoopDisposition value. + /// Compute a LoopDisposition value. LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L); - /// BlockDispositions - Memoized computeBlockDisposition results. + /// Memoized computeBlockDisposition results. DenseMap< const SCEV *, SmallVector, 2>> BlockDispositions; - /// computeBlockDisposition - Compute a BlockDisposition value. + /// Compute a BlockDisposition value. BlockDisposition computeBlockDisposition(const SCEV *S, const BasicBlock *BB); - /// UnsignedRanges - Memoized results from getRange + /// Memoized results from getRange DenseMap UnsignedRanges; - /// SignedRanges - Memoized results from getRange + /// Memoized results from getRange DenseMap SignedRanges; - /// RangeSignHint - Used to parameterize getRange + /// Used to parameterize getRange enum RangeSignHint { HINT_RANGE_UNSIGNED, HINT_RANGE_SIGNED }; - /// setRange - Set the memoized range for the given SCEV. + /// Set the memoized range for the given SCEV. const ConstantRange &setRange(const SCEV *S, RangeSignHint Hint, const ConstantRange &CR) { DenseMap &Cache = @@ -410,198 +550,275 @@ namespace llvm { return Pair.first->second; } - /// getRange - Determine the range for a particular SCEV. + /// Determine the range for a particular SCEV. ConstantRange getRange(const SCEV *S, RangeSignHint Hint); - /// createSCEV - We know that there is no SCEV for the specified value. - /// Analyze the expression. + /// We know that there is no SCEV for the specified value. Analyze the + /// expression. const SCEV *createSCEV(Value *V); - /// createNodeForPHI - Provide the special handling we need to analyze PHI - /// SCEVs. + /// Provide the special handling we need to analyze PHI SCEVs. const SCEV *createNodeForPHI(PHINode *PN); - /// createNodeForGEP - Provide the special handling we need to analyze GEP - /// SCEVs. + /// Helper function called from createNodeForPHI. + const SCEV *createAddRecFromPHI(PHINode *PN); + + /// Helper function called from createNodeForPHI. + const SCEV *createNodeFromSelectLikePHI(PHINode *PN); + + /// Provide special handling for a select-like instruction (currently this + /// is either a select instruction or a phi node). \p I is the instruction + /// being processed, and it is assumed equivalent to "Cond ? TrueVal : + /// FalseVal". + const SCEV *createNodeForSelectOrPHI(Instruction *I, Value *Cond, + Value *TrueVal, Value *FalseVal); + + /// Provide the special handling we need to analyze GEP SCEVs. const SCEV *createNodeForGEP(GEPOperator *GEP); - /// computeSCEVAtScope - Implementation code for getSCEVAtScope; called - /// at most once for each SCEV+Loop pair. + /// Implementation code for getSCEVAtScope; called at most once for each + /// SCEV+Loop pair. /// const SCEV *computeSCEVAtScope(const SCEV *S, const Loop *L); - /// ForgetSymbolicValue - This looks up computed SCEV values for all - /// instructions that depend on the given instruction and removes them from - /// the ValueExprMap map if they reference SymName. This is used during PHI - /// resolution. + /// This looks up computed SCEV values for all instructions that depend on + /// the given instruction and removes them from the ValueExprMap map if they + /// reference SymName. This is used during PHI resolution. void ForgetSymbolicName(Instruction *I, const SCEV *SymName); - /// getBackedgeTakenInfo - Return the BackedgeTakenInfo for the given - /// loop, lazily computing new values if the loop hasn't been analyzed - /// yet. + /// Return the BackedgeTakenInfo for the given loop, lazily computing new + /// values if the loop hasn't been analyzed yet. const BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L); - /// ComputeBackedgeTakenCount - Compute the number of times the specified - /// loop will iterate. - BackedgeTakenInfo ComputeBackedgeTakenCount(const Loop *L); + /// Compute the number of times the specified loop will iterate. + BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L); - /// ComputeExitLimit - Compute the number of times the backedge of the - /// specified loop will execute if it exits via the specified block. - ExitLimit ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock); + /// Compute the number of times the backedge of the specified loop will + /// execute if it exits via the specified block. + ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock); - /// ComputeExitLimitFromCond - Compute the number of times the backedge of - /// the specified loop will execute if its exit condition were a conditional - /// branch of ExitCond, TBB, and FBB. - ExitLimit ComputeExitLimitFromCond(const Loop *L, + /// Compute the number of times the backedge of the specified loop will + /// execute if its exit condition were a conditional branch of ExitCond, + /// TBB, and FBB. + ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, bool IsSubExpr); - /// ComputeExitLimitFromICmp - Compute the number of times the backedge of - /// the specified loop will execute if its exit condition were a conditional - /// branch of the ICmpInst ExitCond, TBB, and FBB. - ExitLimit ComputeExitLimitFromICmp(const Loop *L, + /// Compute the number of times the backedge of the specified loop will + /// execute if its exit condition were a conditional branch of the ICmpInst + /// ExitCond, TBB, and FBB. + ExitLimit computeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond, BasicBlock *TBB, BasicBlock *FBB, bool IsSubExpr); - /// ComputeExitLimitFromSingleExitSwitch - Compute the number of times the - /// backedge of the specified loop will execute if its exit condition were a - /// switch with a single exiting case to ExitingBB. + /// Compute the number of times the backedge of the specified loop will + /// execute if its exit condition were a switch with a single exiting case + /// to ExitingBB. ExitLimit - ComputeExitLimitFromSingleExitSwitch(const Loop *L, SwitchInst *Switch, + computeExitLimitFromSingleExitSwitch(const Loop *L, SwitchInst *Switch, BasicBlock *ExitingBB, bool IsSubExpr); - /// ComputeLoadConstantCompareExitLimit - Given an exit condition - /// of 'icmp op load X, cst', try to see if we can compute the - /// backedge-taken count. - ExitLimit ComputeLoadConstantCompareExitLimit(LoadInst *LI, + /// Given an exit condition of 'icmp op load X, cst', try to see if we can + /// compute the backedge-taken count. + ExitLimit computeLoadConstantCompareExitLimit(LoadInst *LI, Constant *RHS, const Loop *L, ICmpInst::Predicate p); - /// ComputeExitCountExhaustively - If the loop is known to execute a - /// constant number of times (the condition evolves only from constants), - /// try to evaluate a few iterations of the loop until we get the exit - /// condition gets a value of ExitWhen (true or false). If we cannot - /// evaluate the exit count of the loop, return CouldNotCompute. - const SCEV *ComputeExitCountExhaustively(const Loop *L, + /// Compute the exit limit of a loop that is controlled by a + /// "(IV >> 1) != 0" type comparison. We cannot compute the exact trip + /// count in these cases (since SCEV has no way of expressing them), but we + /// can still sometimes compute an upper bound. + /// + /// Return an ExitLimit for a loop whose backedge is guarded by `LHS Pred + /// RHS`. + ExitLimit computeShiftCompareExitLimit(Value *LHS, Value *RHS, + const Loop *L, + ICmpInst::Predicate Pred); + + /// If the loop is known to execute a constant number of times (the + /// condition evolves only from constants), try to evaluate a few iterations + /// of the loop until we get the exit condition gets a value of ExitWhen + /// (true or false). If we cannot evaluate the exit count of the loop, + /// return CouldNotCompute. + const SCEV *computeExitCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen); - /// HowFarToZero - Return the number of times an exit condition comparing - /// the specified value to zero will execute. If not computable, return - /// CouldNotCompute. + /// Return the number of times an exit condition comparing the specified + /// value to zero will execute. If not computable, return CouldNotCompute. ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr); - /// HowFarToNonZero - Return the number of times an exit condition checking - /// the specified value for nonzero will execute. If not computable, return + /// Return the number of times an exit condition checking the specified + /// value for nonzero will execute. If not computable, return /// CouldNotCompute. ExitLimit HowFarToNonZero(const SCEV *V, const Loop *L); - /// HowManyLessThans - Return the number of times an exit condition - /// containing the specified less-than comparison will execute. If not - /// computable, return CouldNotCompute. isSigned specifies whether the - /// less-than is signed. + /// Return the number of times an exit condition containing the specified + /// less-than comparison will execute. If not computable, return + /// CouldNotCompute. isSigned specifies whether the less-than is signed. ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool isSigned, bool IsSubExpr); ExitLimit HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool isSigned, bool IsSubExpr); - /// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB - /// (which may not be an immediate predecessor) which has exactly one - /// successor from which BB is reachable, or null if no such block is - /// found. + /// Return a predecessor of BB (which may not be an immediate predecessor) + /// which has exactly one successor from which BB is reachable, or null if + /// no such block is found. std::pair getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB); - /// isImpliedCond - Test whether the condition described by Pred, LHS, and - /// RHS is true whenever the given FoundCondValue value evaluates to true. + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the given FoundCondValue value evaluates to true. bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, Value *FoundCondValue, bool Inverse); - /// isImpliedCondOperands - Test whether the condition described by Pred, - /// LHS, and RHS is true whenever the condition described by Pred, FoundLHS, - /// and FoundRHS is true. + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is + /// true. + bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, ICmpInst::Predicate FoundPred, + const SCEV *FoundLHS, const SCEV *FoundRHS); + + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. bool isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS); - /// isImpliedCondOperandsHelper - Test whether the condition described by - /// Pred, LHS, and RHS is true whenever the condition described by Pred, - /// FoundLHS, and FoundRHS is true. + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS); - /// isImpliedCondOperandsViaRanges - Test whether the condition described by - /// Pred, LHS, and RHS is true whenever the condition described by Pred, - /// FoundLHS, and FoundRHS is true. Utility function used by - /// isImpliedCondOperands. + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. Utility function used by isImpliedCondOperands. bool isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS); - /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is - /// in the header of its containing loop, we know the loop executes a - /// constant number of times, and the PHI node is just a recurrence - /// involving constants, fold it. + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. + /// + /// This routine tries to rule out certain kinds of integer overflow, and + /// then tries to reason about arithmetic properties of the predicates. + bool isImpliedCondOperandsViaNoOverflow(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS); + + /// If we know that the specified Phi is in the header of its containing + /// loop, we know the loop executes a constant number of times, and the PHI + /// node is just a recurrence involving constants, fold it. Constant *getConstantEvolutionLoopExitValue(PHINode *PN, const APInt& BEs, const Loop *L); - /// isKnownPredicateWithRanges - Test if the given expression is known to - /// satisfy the condition described by Pred and the known constant ranges - /// of LHS and RHS. + /// Test if the given expression is known to satisfy the condition described + /// by Pred and the known constant ranges of LHS and RHS. /// bool isKnownPredicateWithRanges(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); - /// forgetMemoizedResults - Drop memoized information computed for S. + /// Try to prove the condition described by "LHS Pred RHS" by ruling out + /// integer overflow. + /// + /// For instance, this will return true for "A s< (A + C)" if C is + /// positive. + bool isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS); + + /// Try to split Pred LHS RHS into logical conjunctions (and's) and try to + /// prove them individually. + bool isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS); + + /// Try to match the Expr as "(L + R)". + bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R, + SCEV::NoWrapFlags &Flags); + + /// Return true if More == (Less + C), where C is a constant. This is + /// intended to be used as a cheaper substitute for full SCEV subtraction. + bool computeConstantDifference(const SCEV *Less, const SCEV *More, + APInt &C); + + /// Drop memoized information computed for S. void forgetMemoizedResults(const SCEV *S); + /// Return an existing SCEV for V if there is one, otherwise return nullptr. + const SCEV *getExistingSCEV(Value *V); + /// Return false iff given SCEV contains a SCEVUnknown with NULL value- /// pointer. bool checkValidity(const SCEV *S) const; - // Return true if `ExtendOpTy`({`Start`,+,`Step`}) can be proved to be equal - // to {`ExtendOpTy`(`Start`),+,`ExtendOpTy`(`Step`)}. This is equivalent to - // proving no signed (resp. unsigned) wrap in {`Start`,+,`Step`} if - // `ExtendOpTy` is `SCEVSignExtendExpr` (resp. `SCEVZeroExtendExpr`). - // + /// Return true if `ExtendOpTy`({`Start`,+,`Step`}) can be proved to be + /// equal to {`ExtendOpTy`(`Start`),+,`ExtendOpTy`(`Step`)}. This is + /// equivalent to proving no signed (resp. unsigned) wrap in + /// {`Start`,+,`Step`} if `ExtendOpTy` is `SCEVSignExtendExpr` + /// (resp. `SCEVZeroExtendExpr`). + /// template bool proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step, const Loop *L); + bool isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS, + ICmpInst::Predicate Pred, bool &Increasing); + + /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X" + /// is monotonically increasing or decreasing. In the former case set + /// `Increasing` to true and in the latter case set `Increasing` to false. + /// + /// A predicate is said to be monotonically increasing if may go from being + /// false to being true as the loop iterates, but never the other way + /// around. A predicate is said to be monotonically decreasing if may go + /// from being true to being false as the loop iterates, but never the other + /// way around. + bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, + ICmpInst::Predicate Pred, bool &Increasing); + + // Return SCEV no-wrap flags that can be proven based on reasoning + // about how poison produced from no-wrap flags on this value + // (e.g. a nuw add) would trigger undefined behavior on overflow. + SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V); + public: - static char ID; // Pass identification, replacement for typeid - ScalarEvolution(); + ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC, + DominatorTree &DT, LoopInfo &LI); + ~ScalarEvolution(); + ScalarEvolution(ScalarEvolution &&Arg); - LLVMContext &getContext() const { return F->getContext(); } + LLVMContext &getContext() const { return F.getContext(); } - /// isSCEVable - Test if values of the given type are analyzable within - /// the SCEV framework. This primarily includes integer types, and it - /// can optionally include pointer types if the ScalarEvolution class - /// has access to target-specific information. + /// Test if values of the given type are analyzable within the SCEV + /// framework. This primarily includes integer types, and it can optionally + /// include pointer types if the ScalarEvolution class has access to + /// target-specific information. bool isSCEVable(Type *Ty) const; - /// getTypeSizeInBits - Return the size in bits of the specified type, - /// for which isSCEVable must return true. + /// Return the size in bits of the specified type, for which isSCEVable must + /// return true. uint64_t getTypeSizeInBits(Type *Ty) const; - /// getEffectiveSCEVType - Return a type with the same bitwidth as - /// the given type and which represents how SCEV will treat the given - /// type, for which isSCEVable must return true. For pointer types, - /// this is the pointer-sized integer type. + /// Return a type with the same bitwidth as the given type and which + /// represents how SCEV will treat the given type, for which isSCEVable must + /// return true. For pointer types, this is the pointer-sized integer type. Type *getEffectiveSCEVType(Type *Ty) const; - /// getSCEV - Return a SCEV expression for the full generality of the - /// specified expression. + /// Return a SCEV expression for the full generality of the specified + /// expression. const SCEV *getSCEV(Value *V); const SCEV *getConstant(ConstantInt *V); @@ -615,35 +832,24 @@ namespace llvm { SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { - SmallVector Ops; - Ops.push_back(LHS); - Ops.push_back(RHS); + SmallVector Ops = {LHS, RHS}; return getAddExpr(Ops, Flags); } const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { - SmallVector Ops; - Ops.push_back(Op0); - Ops.push_back(Op1); - Ops.push_back(Op2); + SmallVector Ops = {Op0, Op1, Op2}; return getAddExpr(Ops, Flags); } const SCEV *getMulExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) - { - SmallVector Ops; - Ops.push_back(LHS); - Ops.push_back(RHS); + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { + SmallVector Ops = {LHS, RHS}; return getMulExpr(Ops, Flags); } const SCEV *getMulExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { - SmallVector Ops; - Ops.push_back(Op0); - Ops.push_back(Op1); - Ops.push_back(Op2); + SmallVector Ops = {Op0, Op1, Op2}; return getMulExpr(Ops, Flags); } const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS); @@ -675,81 +881,80 @@ namespace llvm { const SCEV *getUnknown(Value *V); const SCEV *getCouldNotCompute(); - /// getSizeOfExpr - Return an expression for sizeof AllocTy that is type - /// IntTy + /// \brief Return a SCEV for the constant 0 of a specific type. + const SCEV *getZero(Type *Ty) { return getConstant(Ty, 0); } + + /// \brief Return a SCEV for the constant 1 of a specific type. + const SCEV *getOne(Type *Ty) { return getConstant(Ty, 1); } + + /// Return an expression for sizeof AllocTy that is type IntTy /// const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy); - /// getOffsetOfExpr - Return an expression for offsetof on the given field - /// with type IntTy + /// Return an expression for offsetof on the given field with type IntTy /// const SCEV *getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo); - /// getNegativeSCEV - Return the SCEV object corresponding to -V. + /// Return the SCEV object corresponding to -V. /// - const SCEV *getNegativeSCEV(const SCEV *V); + const SCEV *getNegativeSCEV(const SCEV *V, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); - /// getNotSCEV - Return the SCEV object corresponding to ~V. + /// Return the SCEV object corresponding to ~V. /// const SCEV *getNotSCEV(const SCEV *V); - /// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1. + /// Return LHS-RHS. Minus is represented in SCEV as A+B*-1. const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); - /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion - /// of the input value to the specified type. If the type must be - /// extended, it is zero extended. + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is zero extended. const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty); - /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion - /// of the input value to the specified type. If the type must be - /// extended, it is sign extended. + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is sign extended. const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty); - /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of - /// the input value to the specified type. If the type must be extended, - /// it is zero extended. The conversion must not be narrowing. + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is zero extended. The + /// conversion must not be narrowing. const SCEV *getNoopOrZeroExtend(const SCEV *V, Type *Ty); - /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of - /// the input value to the specified type. If the type must be extended, - /// it is sign extended. The conversion must not be narrowing. + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is sign extended. The + /// conversion must not be narrowing. const SCEV *getNoopOrSignExtend(const SCEV *V, Type *Ty); - /// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of - /// the input value to the specified type. If the type must be extended, - /// it is extended with unspecified bits. The conversion must not be - /// narrowing. + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is extended with + /// unspecified bits. The conversion must not be narrowing. const SCEV *getNoopOrAnyExtend(const SCEV *V, Type *Ty); - /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the - /// input value to the specified type. The conversion must not be - /// widening. + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. The conversion must not be widening. const SCEV *getTruncateOrNoop(const SCEV *V, Type *Ty); - /// getUMaxFromMismatchedTypes - Promote the operands to the wider of - /// the types using zero-extension, and then perform a umax operation - /// with them. + /// Promote the operands to the wider of the types using zero-extension, and + /// then perform a umax operation with them. const SCEV *getUMaxFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS); - /// getUMinFromMismatchedTypes - Promote the operands to the wider of - /// the types using zero-extension, and then perform a umin operation - /// with them. + /// Promote the operands to the wider of the types using zero-extension, and + /// then perform a umin operation with them. const SCEV *getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS); - /// getPointerBase - Transitively follow the chain of pointer-type operands - /// until reaching a SCEV that does not have a single pointer operand. This - /// returns a SCEVUnknown pointer for well-formed pointer-type expressions, - /// but corner cases do exist. + /// Transitively follow the chain of pointer-type operands until reaching a + /// SCEV that does not have a single pointer operand. This returns a + /// SCEVUnknown pointer for well-formed pointer-type expressions, but corner + /// cases do exist. const SCEV *getPointerBase(const SCEV *V); - /// getSCEVAtScope - Return a SCEV expression for the specified value - /// at the specified scope in the program. The L value specifies a loop - /// nest to evaluate the expression at, where null is the top-level or a - /// specified loop is immediately inside of the loop. + /// Return a SCEV expression for the specified value at the specified scope + /// in the program. The L value specifies a loop nest to evaluate the + /// expression at, where null is the top-level or a specified loop is + /// immediately inside of the loop. /// /// This method can be used to compute the exit value for a variable defined /// in a loop by querying what the value will hold in the parent loop. @@ -758,19 +963,17 @@ namespace llvm { /// original value V is returned. const SCEV *getSCEVAtScope(const SCEV *S, const Loop *L); - /// getSCEVAtScope - This is a convenience function which does - /// getSCEVAtScope(getSCEV(V), L). + /// This is a convenience function which does getSCEVAtScope(getSCEV(V), L). const SCEV *getSCEVAtScope(Value *V, const Loop *L); - /// isLoopEntryGuardedByCond - Test whether entry to the loop is protected - /// by a conditional between LHS and RHS. This is used to help avoid max - /// expressions in loop trip counts, and to eliminate casts. + /// Test whether entry to the loop is protected by a conditional between LHS + /// and RHS. This is used to help avoid max expressions in loop trip + /// counts, and to eliminate casts. bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); - /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is - /// protected by a conditional between LHS and RHS. This is used to - /// to eliminate casts. + /// Test whether the backedge of the loop is protected by a conditional + /// between LHS and RHS. This is used to to eliminate casts. bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); @@ -781,13 +984,13 @@ namespace llvm { /// the single exiting block passed to it. See that routine for details. unsigned getSmallConstantTripCount(Loop *L); - /// getSmallConstantTripCount - Returns the maximum trip count of this loop - /// as a normal unsigned value. Returns 0 if the trip count is unknown or - /// not constant. This "trip count" assumes that control exits via - /// ExitingBlock. More precisely, it is the number of times that control may - /// reach ExitingBlock before taking the branch. For loops with multiple - /// exits, it may not be the number times that the loop header executes if - /// the loop exits prematurely via another branch. + /// Returns the maximum trip count of this loop as a normal unsigned + /// value. Returns 0 if the trip count is unknown or not constant. This + /// "trip count" assumes that control exits via ExitingBlock. More + /// precisely, it is the number of times that control may reach ExitingBlock + /// before taking the branch. For loops with multiple exits, it may not be + /// the number times that the loop header executes if the loop exits + /// prematurely via another branch. unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock); /// \brief Returns the largest constant divisor of the trip count of the @@ -798,25 +1001,25 @@ namespace llvm { /// the single exiting block passed to it. See that routine for details. unsigned getSmallConstantTripMultiple(Loop *L); - /// getSmallConstantTripMultiple - Returns the largest constant divisor of - /// the trip count of this loop as a normal unsigned value, if - /// possible. This means that the actual trip count is always a multiple of - /// the returned value (don't forget the trip count could very well be zero - /// as well!). As explained in the comments for getSmallConstantTripCount, - /// this assumes that control exits the loop via ExitingBlock. + /// Returns the largest constant divisor of the trip count of this loop as a + /// normal unsigned value, if possible. This means that the actual trip + /// count is always a multiple of the returned value (don't forget the trip + /// count could very well be zero as well!). As explained in the comments + /// for getSmallConstantTripCount, this assumes that control exits the loop + /// via ExitingBlock. unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock); - // getExitCount - Get the expression for the number of loop iterations for - // which this loop is guaranteed not to exit via ExitingBlock. Otherwise - // return SCEVCouldNotCompute. + /// Get the expression for the number of loop iterations for which this loop + /// is guaranteed not to exit via ExitingBlock. Otherwise return + /// SCEVCouldNotCompute. const SCEV *getExitCount(Loop *L, BasicBlock *ExitingBlock); - /// getBackedgeTakenCount - If the specified loop has a predictable - /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute - /// object. The backedge-taken count is the number of times the loop header - /// will be branched to from within the loop. This is one less than the - /// trip count of the loop, since it doesn't count the first iteration, - /// when the header is branched to from outside the loop. + /// If the specified loop has a predictable backedge-taken count, return it, + /// otherwise return a SCEVCouldNotCompute object. The backedge-taken count + /// is the number of times the loop header will be branched to from within + /// the loop. This is one less than the trip count of the loop, since it + /// doesn't count the first iteration, when the header is branched to from + /// outside the loop. /// /// Note that it is not valid to call this method on a loop without a /// loop-invariant backedge-taken count (see @@ -824,24 +1027,23 @@ namespace llvm { /// const SCEV *getBackedgeTakenCount(const Loop *L); - /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except - /// return the least SCEV value that is known never to be less than the - /// actual backedge taken count. + /// Similar to getBackedgeTakenCount, except return the least SCEV value + /// that is known never to be less than the actual backedge taken count. const SCEV *getMaxBackedgeTakenCount(const Loop *L); - /// hasLoopInvariantBackedgeTakenCount - Return true if the specified loop - /// has an analyzable loop-invariant backedge-taken count. + /// Return true if the specified loop has an analyzable loop-invariant + /// backedge-taken count. bool hasLoopInvariantBackedgeTakenCount(const Loop *L); - /// forgetLoop - This method should be called by the client when it has - /// changed a loop in a way that may effect ScalarEvolution's ability to - /// compute a trip count, or if the loop is deleted. This call is - /// potentially expensive for large loop bodies. + /// This method should be called by the client when it has changed a loop in + /// a way that may effect ScalarEvolution's ability to compute a trip count, + /// or if the loop is deleted. This call is potentially expensive for large + /// loop bodies. void forgetLoop(const Loop *L); - /// forgetValue - This method should be called by the client when it has - /// changed a value in a way that may effect its value, or which may - /// disconnect it from a def-use chain linking it to a loop. + /// This method should be called by the client when it has changed a value + /// in a way that may effect its value, or which may disconnect it from a + /// def-use chain linking it to a loop. void forgetValue(Value *V); /// \brief Called when the client has changed the disposition of values in @@ -851,92 +1053,97 @@ namespace llvm { /// recompute is simpler. void forgetLoopDispositions(const Loop *L) { LoopDispositions.clear(); } - /// GetMinTrailingZeros - Determine the minimum number of zero bits that S - /// is guaranteed to end in (at every loop iteration). It is, at the same - /// time, the minimum number of times S is divisible by 2. For example, - /// given {4,+,8} it returns 2. If S is guaranteed to be 0, it returns the - /// bitwidth of S. + /// Determine the minimum number of zero bits that S is guaranteed to end in + /// (at every loop iteration). It is, at the same time, the minimum number + /// of times S is divisible by 2. For example, given {4,+,8} it returns 2. + /// If S is guaranteed to be 0, it returns the bitwidth of S. uint32_t GetMinTrailingZeros(const SCEV *S); - /// getUnsignedRange - Determine the unsigned range for a particular SCEV. + /// Determine the unsigned range for a particular SCEV. /// ConstantRange getUnsignedRange(const SCEV *S) { return getRange(S, HINT_RANGE_UNSIGNED); } - /// getSignedRange - Determine the signed range for a particular SCEV. + /// Determine the signed range for a particular SCEV. /// ConstantRange getSignedRange(const SCEV *S) { return getRange(S, HINT_RANGE_SIGNED); } - /// isKnownNegative - Test if the given expression is known to be negative. + /// Test if the given expression is known to be negative. /// bool isKnownNegative(const SCEV *S); - /// isKnownPositive - Test if the given expression is known to be positive. + /// Test if the given expression is known to be positive. /// bool isKnownPositive(const SCEV *S); - /// isKnownNonNegative - Test if the given expression is known to be - /// non-negative. + /// Test if the given expression is known to be non-negative. /// bool isKnownNonNegative(const SCEV *S); - /// isKnownNonPositive - Test if the given expression is known to be - /// non-positive. + /// Test if the given expression is known to be non-positive. /// bool isKnownNonPositive(const SCEV *S); - /// isKnownNonZero - Test if the given expression is known to be - /// non-zero. + /// Test if the given expression is known to be non-zero. /// bool isKnownNonZero(const SCEV *S); - /// isKnownPredicate - Test if the given expression is known to satisfy - /// the condition described by Pred, LHS, and RHS. + /// Test if the given expression is known to satisfy the condition described + /// by Pred, LHS, and RHS. /// bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); - /// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with - /// predicate Pred. Return true iff any changes were made. If the - /// operands are provably equal or unequal, LHS and RHS are set to - /// the same value and Pred is set to either ICMP_EQ or ICMP_NE. + /// Return true if the result of the predicate LHS `Pred` RHS is loop + /// invariant with respect to L. Set InvariantPred, InvariantLHS and + /// InvariantLHS so that InvariantLHS `InvariantPred` InvariantRHS is the + /// loop invariant form of LHS `Pred` RHS. + bool isLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, const Loop *L, + ICmpInst::Predicate &InvariantPred, + const SCEV *&InvariantLHS, + const SCEV *&InvariantRHS); + + /// Simplify LHS and RHS in a comparison with predicate Pred. Return true + /// iff any changes were made. If the operands are provably equal or + /// unequal, LHS and RHS are set to the same value and Pred is set to either + /// ICMP_EQ or ICMP_NE. /// bool SimplifyICmpOperands(ICmpInst::Predicate &Pred, const SCEV *&LHS, const SCEV *&RHS, unsigned Depth = 0); - /// getLoopDisposition - Return the "disposition" of the given SCEV with - /// respect to the given loop. + /// Return the "disposition" of the given SCEV with respect to the given + /// loop. LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L); - /// isLoopInvariant - Return true if the value of the given SCEV is - /// unchanging in the specified loop. + /// Return true if the value of the given SCEV is unchanging in the + /// specified loop. bool isLoopInvariant(const SCEV *S, const Loop *L); - /// hasComputableLoopEvolution - Return true if the given SCEV changes value - /// in a known way in the specified loop. This property being true implies - /// that the value is variant in the loop AND that we can emit an expression - /// to compute the value of the expression at any particular loop iteration. + /// Return true if the given SCEV changes value in a known way in the + /// specified loop. This property being true implies that the value is + /// variant in the loop AND that we can emit an expression to compute the + /// value of the expression at any particular loop iteration. bool hasComputableLoopEvolution(const SCEV *S, const Loop *L); - /// getLoopDisposition - Return the "disposition" of the given SCEV with - /// respect to the given block. + /// Return the "disposition" of the given SCEV with respect to the given + /// block. BlockDisposition getBlockDisposition(const SCEV *S, const BasicBlock *BB); - /// dominates - Return true if elements that makes up the given SCEV - /// dominate the specified basic block. + /// Return true if elements that makes up the given SCEV dominate the + /// specified basic block. bool dominates(const SCEV *S, const BasicBlock *BB); - /// properlyDominates - Return true if elements that makes up the given SCEV - /// properly dominate the specified basic block. + /// Return true if elements that makes up the given SCEV properly dominate + /// the specified basic block. bool properlyDominates(const SCEV *S, const BasicBlock *BB); - /// hasOperand - Test whether the given SCEV has Op as a direct or - /// indirect operand. + /// Test whether the given SCEV has Op as a direct or indirect operand. bool hasOperand(const SCEV *S, const SCEV *Op) const; /// Return the size of an element read or written by Inst. @@ -948,11 +1155,8 @@ namespace llvm { SmallVectorImpl &Sizes, const SCEV *ElementSize) const; - bool runOnFunction(Function &F) override; - void releaseMemory() override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - void print(raw_ostream &OS, const Module* = nullptr) const override; - void verifyAnalysis() const override; + void print(raw_ostream &OS) const; + void verify() const; /// Collect parametric terms occurring in step expressions. void collectParametricTerms(const SCEV *Expr, @@ -1034,6 +1238,18 @@ namespace llvm { SmallVectorImpl &Sizes, const SCEV *ElementSize); + /// Return the DataLayout associated with the module this SCEV instance is + /// operating on. + const DataLayout &getDataLayout() const { + return F.getParent()->getDataLayout(); + } + + const SCEVPredicate *getEqualPredicate(const SCEVUnknown *LHS, + const SCEVConstant *RHS); + + /// Re-writes the SCEV according to the Predicates in \p Preds. + const SCEV *rewriteUsingPredicate(const SCEV *Scev, SCEVUnionPredicate &A); + private: /// Compute the backedge taken count knowing the interval difference, the /// stride and presence of the equality in the comparison. @@ -1054,13 +1270,112 @@ namespace llvm { private: FoldingSet UniqueSCEVs; + FoldingSet UniquePreds; BumpPtrAllocator SCEVAllocator; - /// FirstUnknown - The head of a linked list of all SCEVUnknown - /// values that have been allocated. This is used by releaseMemory - /// to locate them all and call their destructors. + /// The head of a linked list of all SCEVUnknown values that have been + /// allocated. This is used by releaseMemory to locate them all and call + /// their destructors. SCEVUnknown *FirstUnknown; }; + + /// \brief Analysis pass that exposes the \c ScalarEvolution for a function. + class ScalarEvolutionAnalysis { + static char PassID; + + public: + typedef ScalarEvolution Result; + + /// \brief Opaque, unique identifier for this analysis pass. + static void *ID() { return (void *)&PassID; } + + /// \brief Provide a name for the analysis for debugging and logging. + static StringRef name() { return "ScalarEvolutionAnalysis"; } + + ScalarEvolution run(Function &F, AnalysisManager *AM); + }; + + /// \brief Printer pass for the \c ScalarEvolutionAnalysis results. + class ScalarEvolutionPrinterPass { + raw_ostream &OS; + + public: + explicit ScalarEvolutionPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, AnalysisManager *AM); + + static StringRef name() { return "ScalarEvolutionPrinterPass"; } + }; + + class ScalarEvolutionWrapperPass : public FunctionPass { + std::unique_ptr SE; + + public: + static char ID; + + ScalarEvolutionWrapperPass(); + + ScalarEvolution &getSE() { return *SE; } + const ScalarEvolution &getSE() const { return *SE; } + + bool runOnFunction(Function &F) override; + void releaseMemory() override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void print(raw_ostream &OS, const Module * = nullptr) const override; + void verifyAnalysis() const override; + }; + + /// An interface layer with SCEV used to manage how we see SCEV expressions + /// for values in the context of existing predicates. We can add new + /// predicates, but we cannot remove them. + /// + /// This layer has multiple purposes: + /// - provides a simple interface for SCEV versioning. + /// - guarantees that the order of transformations applied on a SCEV + /// expression for a single Value is consistent across two different + /// getSCEV calls. This means that, for example, once we've obtained + /// an AddRec expression for a certain value through expression + /// rewriting, we will continue to get an AddRec expression for that + /// Value. + /// - lowers the number of expression rewrites. + class PredicatedScalarEvolution { + public: + PredicatedScalarEvolution(ScalarEvolution &SE); + const SCEVUnionPredicate &getUnionPredicate() const; + /// \brief Returns the SCEV expression of V, in the context of the current + /// SCEV predicate. + /// The order of transformations applied on the expression of V returned + /// by ScalarEvolution is guaranteed to be preserved, even when adding new + /// predicates. + const SCEV *getSCEV(Value *V); + /// \brief Adds a new predicate. + void addPredicate(const SCEVPredicate &Pred); + /// \brief Returns the ScalarEvolution analysis used. + ScalarEvolution *getSE() const { return &SE; } + + private: + /// \brief Increments the version number of the predicate. + /// This needs to be called every time the SCEV predicate changes. + void updateGeneration(); + /// Holds a SCEV and the version number of the SCEV predicate used to + /// perform the rewrite of the expression. + typedef std::pair RewriteEntry; + /// Maps a SCEV to the rewrite result of that SCEV at a certain version + /// number. If this number doesn't match the current Generation, we will + /// need to do a rewrite. To preserve the transformation order of previous + /// rewrites, we will rewrite the previous result instead of the original + /// SCEV. + DenseMap RewriteMap; + /// The ScalarEvolution analysis. + ScalarEvolution &SE; + /// The SCEVPredicate that forms our context. We will rewrite all + /// expressions assuming that this predicate true. + SCEVUnionPredicate Preds; + /// Marks the version of the SCEV predicate used. When rewriting a SCEV + /// expression we mark it with the version of the predicate. We use this to + /// figure out if the predicate has changed from the last rewrite of the + /// SCEV. If so, we need to perform a new rewrite. + unsigned Generation; + }; } #endif diff --git a/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h b/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h new file mode 100644 index 000000000000..7bbbf5562047 --- /dev/null +++ b/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h @@ -0,0 +1,79 @@ +//===- ScalarEvolutionAliasAnalysis.h - SCEV-based AA -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the interface for a SCEV-based alias analysis. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SCALAREVOLUTIONALIASANALYSIS_H +#define LLVM_ANALYSIS_SCALAREVOLUTIONALIASANALYSIS_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" + +namespace llvm { + +/// A simple alias analysis implementation that uses ScalarEvolution to answer +/// queries. +class SCEVAAResult : public AAResultBase { + ScalarEvolution &SE; + +public: + explicit SCEVAAResult(const TargetLibraryInfo &TLI, ScalarEvolution &SE) + : AAResultBase(TLI), SE(SE) {} + SCEVAAResult(SCEVAAResult &&Arg) : AAResultBase(std::move(Arg)), SE(Arg.SE) {} + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + +private: + Value *GetBaseValue(const SCEV *S); +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +class SCEVAA { +public: + typedef SCEVAAResult Result; + + /// \brief Opaque, unique identifier for this analysis pass. + static void *ID() { return (void *)&PassID; } + + SCEVAAResult run(Function &F, AnalysisManager *AM); + + /// \brief Provide access to a name for this pass for debugging purposes. + static StringRef name() { return "SCEVAA"; } + +private: + static char PassID; +}; + +/// Legacy wrapper pass to provide the SCEVAAResult object. +class SCEVAAWrapperPass : public FunctionPass { + std::unique_ptr Result; + +public: + static char ID; + + SCEVAAWrapperPass(); + + SCEVAAResult &getResult() { return *Result; } + const SCEVAAResult &getResult() const { return *Result; } + + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +/// Creates an instance of \c SCEVAAWrapperPass. +FunctionPass *createSCEVAAWrapperPass(); + +} + +#endif diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index 8ec2078258d1..b9939168a99d 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -117,9 +117,14 @@ namespace llvm { /// \brief Return true for expressions that may incur non-trivial cost to /// evaluate at runtime. - bool isHighCostExpansion(const SCEV *Expr, Loop *L) { + /// + /// At is an optional parameter which specifies point in code where user is + /// going to expand this expression. Sometimes this knowledge can lead to a + /// more accurate cost estimation. + bool isHighCostExpansion(const SCEV *Expr, Loop *L, + const Instruction *At = nullptr) { SmallPtrSet Processed; - return isHighCostExpansionHelper(Expr, L, Processed); + return isHighCostExpansionHelper(Expr, L, At, Processed); } /// \brief This method returns the canonical induction variable of the @@ -146,6 +151,22 @@ namespace llvm { /// block. Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I); + /// \brief Generates a code sequence that evaluates this predicate. + /// The inserted instructions will be at position \p Loc. + /// The result will be of type i1 and will have a value of 0 when the + /// predicate is false and 1 otherwise. + Value *expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc); + + /// \brief A specialized variant of expandCodeForPredicate, handling the + /// case when we are expanding code for a SCEVEqualPredicate. + Value *expandEqualPredicate(const SCEVEqualPredicate *Pred, + Instruction *Loc); + + /// \brief A specialized variant of expandCodeForPredicate, handling the + /// case when we are expanding code for a SCEVUnionPredicate. + Value *expandUnionPredicate(const SCEVUnionPredicate *Pred, + Instruction *Loc); + /// \brief Set the current IV increment loop and position. void setIVIncInsertPos(const Loop *L, Instruction *Pos) { assert(!CanonicalMode && @@ -193,11 +214,22 @@ namespace llvm { void setChainedPhi(PHINode *PN) { ChainedPhis.insert(PN); } + /// \brief Try to find LLVM IR value for S available at the point At. + /// + /// L is a hint which tells in which loop to look for the suitable value. + /// On success return value which is equivalent to the expanded S at point + /// At. Return nullptr if value was not found. + /// + /// Note that this function does not perform an exhaustive search. I.e if it + /// didn't find any value it does not mean that there is no such value. + Value *findExistingExpansion(const SCEV *S, const Instruction *At, Loop *L); + private: LLVMContext &getContext() const { return SE.getContext(); } /// \brief Recursive helper function for isHighCostExpansion. bool isHighCostExpansionHelper(const SCEV *S, Loop *L, + const Instruction *At, SmallPtrSetImpl &Processed); /// \brief Insert the specified binary operator, doing a small amount diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h index da24de281d47..16992680577c 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -43,6 +43,7 @@ namespace llvm { SCEV(ID, scConstant), V(v) {} public: ConstantInt *getValue() const { return V; } + const APInt &getAPInt() const { return getValue()->getValue(); } Type *getType() const { return V->getType(); } @@ -404,7 +405,7 @@ namespace llvm { /// value, and only represent it as its LLVM Value. This is the "bottom" /// value for the analysis. /// - class SCEVUnknown : public SCEV, private CallbackVH { + class SCEVUnknown final : public SCEV, private CallbackVH { friend class ScalarEvolution; // Implement CallbackVH. @@ -553,12 +554,88 @@ namespace llvm { T.visitAll(Root); } + /// Recursively visits a SCEV expression and re-writes it. + template + class SCEVRewriteVisitor : public SCEVVisitor { + protected: + ScalarEvolution &SE; + public: + SCEVRewriteVisitor(ScalarEvolution &SE) : SE(SE) {} + + const SCEV *visitConstant(const SCEVConstant *Constant) { + return Constant; + } + + const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { + const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand()); + return SE.getTruncateExpr(Operand, Expr->getType()); + } + + const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand()); + return SE.getZeroExtendExpr(Operand, Expr->getType()); + } + + const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { + const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand()); + return SE.getSignExtendExpr(Operand, Expr->getType()); + } + + const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { + SmallVector Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); + return SE.getAddExpr(Operands); + } + + const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { + SmallVector Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); + return SE.getMulExpr(Operands); + } + + const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { + return SE.getUDivExpr(((SC*)this)->visit(Expr->getLHS()), + ((SC*)this)->visit(Expr->getRHS())); + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + SmallVector Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); + return SE.getAddRecExpr(Operands, Expr->getLoop(), + Expr->getNoWrapFlags()); + } + + const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { + SmallVector Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); + return SE.getSMaxExpr(Operands); + } + + const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { + SmallVector Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); + return SE.getUMaxExpr(Operands); + } + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + return Expr; + } + + const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return Expr; + } + }; + typedef DenseMap ValueToValueMap; /// The SCEVParameterRewriter takes a scalar evolution expression and updates /// the SCEVUnknown components following the Map (Value -> Value). - struct SCEVParameterRewriter - : public SCEVVisitor { + class SCEVParameterRewriter : public SCEVRewriteVisitor { public: static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToValueMap &Map, @@ -567,67 +644,8 @@ namespace llvm { return Rewriter.visit(Scev); } - SCEVParameterRewriter(ScalarEvolution &S, ValueToValueMap &M, bool C) - : SE(S), Map(M), InterpretConsts(C) {} - - const SCEV *visitConstant(const SCEVConstant *Constant) { - return Constant; - } - - const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { - const SCEV *Operand = visit(Expr->getOperand()); - return SE.getTruncateExpr(Operand, Expr->getType()); - } - - const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { - const SCEV *Operand = visit(Expr->getOperand()); - return SE.getZeroExtendExpr(Operand, Expr->getType()); - } - - const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { - const SCEV *Operand = visit(Expr->getOperand()); - return SE.getSignExtendExpr(Operand, Expr->getType()); - } - - const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { - SmallVector Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(visit(Expr->getOperand(i))); - return SE.getAddExpr(Operands); - } - - const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { - SmallVector Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(visit(Expr->getOperand(i))); - return SE.getMulExpr(Operands); - } - - const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { - return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS())); - } - - const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { - SmallVector Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(visit(Expr->getOperand(i))); - return SE.getAddRecExpr(Operands, Expr->getLoop(), - Expr->getNoWrapFlags()); - } - - const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { - SmallVector Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(visit(Expr->getOperand(i))); - return SE.getSMaxExpr(Operands); - } - - const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { - SmallVector Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(visit(Expr->getOperand(i))); - return SE.getUMaxExpr(Operands); - } + SCEVParameterRewriter(ScalarEvolution &SE, ValueToValueMap &M, bool C) + : SCEVRewriteVisitor(SE), Map(M), InterpretConsts(C) {} const SCEV *visitUnknown(const SCEVUnknown *Expr) { Value *V = Expr->getValue(); @@ -640,68 +658,26 @@ namespace llvm { return Expr; } - const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { - return Expr; - } - private: - ScalarEvolution &SE; ValueToValueMap ⤅ bool InterpretConsts; }; typedef DenseMap LoopToScevMapT; - /// The SCEVApplyRewriter takes a scalar evolution expression and applies + /// The SCEVLoopAddRecRewriter takes a scalar evolution expression and applies /// the Map (Loop -> SCEV) to all AddRecExprs. - struct SCEVApplyRewriter - : public SCEVVisitor { + class SCEVLoopAddRecRewriter + : public SCEVRewriteVisitor { public: static const SCEV *rewrite(const SCEV *Scev, LoopToScevMapT &Map, ScalarEvolution &SE) { - SCEVApplyRewriter Rewriter(SE, Map); + SCEVLoopAddRecRewriter Rewriter(SE, Map); return Rewriter.visit(Scev); } - SCEVApplyRewriter(ScalarEvolution &S, LoopToScevMapT &M) - : SE(S), Map(M) {} - - const SCEV *visitConstant(const SCEVConstant *Constant) { - return Constant; - } - - const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { - const SCEV *Operand = visit(Expr->getOperand()); - return SE.getTruncateExpr(Operand, Expr->getType()); - } - - const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { - const SCEV *Operand = visit(Expr->getOperand()); - return SE.getZeroExtendExpr(Operand, Expr->getType()); - } - - const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { - const SCEV *Operand = visit(Expr->getOperand()); - return SE.getSignExtendExpr(Operand, Expr->getType()); - } - - const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { - SmallVector Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(visit(Expr->getOperand(i))); - return SE.getAddExpr(Operands); - } - - const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { - SmallVector Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(visit(Expr->getOperand(i))); - return SE.getMulExpr(Operands); - } - - const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { - return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS())); - } + SCEVLoopAddRecRewriter(ScalarEvolution &SE, LoopToScevMapT &M) + : SCEVRewriteVisitor(SE), Map(M) {} const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { SmallVector Operands; @@ -714,41 +690,18 @@ namespace llvm { if (0 == Map.count(L)) return Res; - const SCEVAddRecExpr *Rec = (const SCEVAddRecExpr *) Res; + const SCEVAddRecExpr *Rec = cast(Res); return Rec->evaluateAtIteration(Map[L], SE); } - const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { - SmallVector Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(visit(Expr->getOperand(i))); - return SE.getSMaxExpr(Operands); - } - - const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { - SmallVector Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(visit(Expr->getOperand(i))); - return SE.getUMaxExpr(Operands); - } - - const SCEV *visitUnknown(const SCEVUnknown *Expr) { - return Expr; - } - - const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { - return Expr; - } - private: - ScalarEvolution &SE; LoopToScevMapT ⤅ }; /// Applies the Map (Loop -> SCEV) to the given Scev. static inline const SCEV *apply(const SCEV *Scev, LoopToScevMapT &Map, ScalarEvolution &SE) { - return SCEVApplyRewriter::rewrite(Scev, Map, SE); + return SCEVLoopAddRecRewriter::rewrite(Scev, Map, SE); } } diff --git a/include/llvm/Analysis/ScopedNoAliasAA.h b/include/llvm/Analysis/ScopedNoAliasAA.h new file mode 100644 index 000000000000..175561687157 --- /dev/null +++ b/include/llvm/Analysis/ScopedNoAliasAA.h @@ -0,0 +1,92 @@ +//===- ScopedNoAliasAA.h - Scoped No-Alias Alias Analysis -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the interface for a metadata-based scoped no-alias analysis. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SCOPEDNOALIASAA_H +#define LLVM_ANALYSIS_SCOPEDNOALIASAA_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" + +namespace llvm { + +/// A simple AA result which uses scoped-noalias metadata to answer queries. +class ScopedNoAliasAAResult : public AAResultBase { + friend AAResultBase; + +public: + explicit ScopedNoAliasAAResult(const TargetLibraryInfo &TLI) + : AAResultBase(TLI) {} + ScopedNoAliasAAResult(ScopedNoAliasAAResult &&Arg) + : AAResultBase(std::move(Arg)) {} + + /// Handle invalidation events from the new pass manager. + /// + /// By definition, this result is stateless and so remains valid. + bool invalidate(Function &, const PreservedAnalyses &) { return false; } + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc); + ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2); + +private: + bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const; + void collectMDInDomain(const MDNode *List, const MDNode *Domain, + SmallPtrSetImpl &Nodes) const; +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +class ScopedNoAliasAA { +public: + typedef ScopedNoAliasAAResult Result; + + /// \brief Opaque, unique identifier for this analysis pass. + static void *ID() { return (void *)&PassID; } + + ScopedNoAliasAAResult run(Function &F, AnalysisManager *AM); + + /// \brief Provide access to a name for this pass for debugging purposes. + static StringRef name() { return "ScopedNoAliasAA"; } + +private: + static char PassID; +}; + +/// Legacy wrapper pass to provide the ScopedNoAliasAAResult object. +class ScopedNoAliasAAWrapperPass : public ImmutablePass { + std::unique_ptr Result; + +public: + static char ID; + + ScopedNoAliasAAWrapperPass(); + + ScopedNoAliasAAResult &getResult() { return *Result; } + const ScopedNoAliasAAResult &getResult() const { return *Result; } + + bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +//===--------------------------------------------------------------------===// +// +// createScopedNoAliasAAWrapperPass - This pass implements metadata-based +// scoped noalias analysis. +// +ImmutablePass *createScopedNoAliasAAWrapperPass(); +} + +#endif diff --git a/include/llvm/Analysis/SparsePropagation.h b/include/llvm/Analysis/SparsePropagation.h index 9ccae5ff89b7..2c7f5dd73547 100644 --- a/include/llvm/Analysis/SparsePropagation.h +++ b/include/llvm/Analysis/SparsePropagation.h @@ -21,19 +21,19 @@ #include namespace llvm { - class Value; - class Constant; - class Argument; - class Instruction; - class PHINode; - class TerminatorInst; - class BasicBlock; - class Function; - class SparseSolver; - class raw_ostream; +class Value; +class Constant; +class Argument; +class Instruction; +class PHINode; +class TerminatorInst; +class BasicBlock; +class Function; +class SparseSolver; +class raw_ostream; + +template class SmallVectorImpl; - template class SmallVectorImpl; - /// AbstractLatticeFunction - This class is implemented by the dataflow instance /// to specify what the lattice values are and how they handle merges etc. /// This gives the client the power to compute lattice values from instructions, @@ -44,8 +44,10 @@ namespace llvm { class AbstractLatticeFunction { public: typedef void *LatticeVal; + private: LatticeVal UndefVal, OverdefinedVal, UntrackedVal; + public: AbstractLatticeFunction(LatticeVal undefVal, LatticeVal overdefinedVal, LatticeVal untrackedVal) { @@ -54,18 +56,16 @@ public: UntrackedVal = untrackedVal; } virtual ~AbstractLatticeFunction(); - + LatticeVal getUndefVal() const { return UndefVal; } LatticeVal getOverdefinedVal() const { return OverdefinedVal; } LatticeVal getUntrackedVal() const { return UntrackedVal; } - + /// IsUntrackedValue - If the specified Value is something that is obviously /// uninteresting to the analysis (and would always return UntrackedVal), /// this function can return true to avoid pointless work. - virtual bool IsUntrackedValue(Value *V) { - return false; - } - + virtual bool IsUntrackedValue(Value *V) { return false; } + /// ComputeConstant - Given a constant value, compute and return a lattice /// value corresponding to the specified constant. virtual LatticeVal ComputeConstant(Constant *C) { @@ -74,10 +74,8 @@ public: /// IsSpecialCasedPHI - Given a PHI node, determine whether this PHI node is /// one that the we want to handle through ComputeInstructionState. - virtual bool IsSpecialCasedPHI(PHINode *PN) { - return false; - } - + virtual bool IsSpecialCasedPHI(PHINode *PN) { return false; } + /// GetConstant - If the specified lattice value is representable as an LLVM /// constant value, return it. Otherwise return null. The returned value /// must be in the same LLVM type as Val. @@ -90,42 +88,41 @@ public: virtual LatticeVal ComputeArgument(Argument *I) { return getOverdefinedVal(); // always safe } - + /// MergeValues - Compute and return the merge of the two specified lattice /// values. Merging should only move one direction down the lattice to /// guarantee convergence (toward overdefined). virtual LatticeVal MergeValues(LatticeVal X, LatticeVal Y) { return getOverdefinedVal(); // always safe, never useful. } - + /// ComputeInstructionState - Given an instruction and a vector of its operand /// values, compute the result value of the instruction. virtual LatticeVal ComputeInstructionState(Instruction &I, SparseSolver &SS) { return getOverdefinedVal(); // always safe, never useful. } - + /// PrintValue - Render the specified lattice value to the specified stream. virtual void PrintValue(LatticeVal V, raw_ostream &OS); }; - /// SparseSolver - This class is a general purpose solver for Sparse Conditional /// Propagation with a programmable lattice function. /// class SparseSolver { typedef AbstractLatticeFunction::LatticeVal LatticeVal; - + /// LatticeFunc - This is the object that knows the lattice and how to do /// compute transfer functions. AbstractLatticeFunction *LatticeFunc; - - DenseMap ValueState; // The state each value is in. - SmallPtrSet BBExecutable; // The bbs that are executable. - - std::vector InstWorkList; // Worklist of insts to process. - - std::vector BBWorkList; // The BasicBlock work list - + + DenseMap ValueState; // The state each value is in. + SmallPtrSet BBExecutable; // The bbs that are executable. + + std::vector InstWorkList; // Worklist of insts to process. + + std::vector BBWorkList; // The BasicBlock work list + /// KnownFeasibleEdges - Entries in this set are edges which have already had /// PHI nodes retriggered. typedef std::pair Edge; @@ -133,17 +130,16 @@ class SparseSolver { SparseSolver(const SparseSolver&) = delete; void operator=(const SparseSolver&) = delete; + public: explicit SparseSolver(AbstractLatticeFunction *Lattice) - : LatticeFunc(Lattice) {} - ~SparseSolver() { - delete LatticeFunc; - } - + : LatticeFunc(Lattice) {} + ~SparseSolver() { delete LatticeFunc; } + /// Solve - Solve for constants and executable blocks. /// void Solve(Function &F); - + void Print(Function &F, raw_ostream &OS) const; /// getLatticeState - Return the LatticeVal object that corresponds to the @@ -153,7 +149,7 @@ public: DenseMap::const_iterator I = ValueState.find(V); return I != ValueState.end() ? I->second : LatticeFunc->getUntrackedVal(); } - + /// getOrInitValueState - Return the LatticeVal object that corresponds to the /// value, initializing the value's state if it hasn't been entered into the /// map yet. This function is necessary because not all values should start @@ -161,7 +157,7 @@ public: /// constants should be marked as constants. /// LatticeVal getOrInitValueState(Value *V); - + /// isEdgeFeasible - Return true if the control flow edge from the 'From' /// basic block to the 'To' basic block is currently feasible. If /// AggressiveUndef is true, then this treats values with unknown lattice @@ -176,29 +172,28 @@ public: bool isBlockExecutable(BasicBlock *BB) const { return BBExecutable.count(BB); } - + private: /// UpdateState - When the state for some instruction is potentially updated, /// this function notices and adds I to the worklist if needed. void UpdateState(Instruction &Inst, LatticeVal V); - + /// MarkBlockExecutable - This method can be used by clients to mark all of /// the blocks that are known to be intrinsically live in the processed unit. void MarkBlockExecutable(BasicBlock *BB); - + /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB /// work list if it is not already executable. void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest); - + /// getFeasibleSuccessors - Return a vector of booleans to indicate which /// successors are reachable from a given terminator instruction. void getFeasibleSuccessors(TerminatorInst &TI, SmallVectorImpl &Succs, bool AggressiveUndef); - + void visitInst(Instruction &I); void visitPHINode(PHINode &I); void visitTerminatorInst(TerminatorInst &TI); - }; } // end namespace llvm diff --git a/include/llvm/Analysis/TargetLibraryInfo.def b/include/llvm/Analysis/TargetLibraryInfo.def index 1c1fdfef980d..7798e3c88248 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.def +++ b/include/llvm/Analysis/TargetLibraryInfo.def @@ -27,6 +27,86 @@ #define TLI_DEFINE_STRING_INTERNAL(string_repr) string_repr, #endif +/// void *new(unsigned int); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_int) +TLI_DEFINE_STRING_INTERNAL("??2@YAPAXI@Z") + +/// void *new(unsigned int, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_int_nothrow) +TLI_DEFINE_STRING_INTERNAL("??2@YAPAXIABUnothrow_t@std@@@Z") + +/// void *new(unsigned long long); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_longlong) +TLI_DEFINE_STRING_INTERNAL("??2@YAPEAX_K@Z") + +/// void *new(unsigned long long, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_longlong_nothrow) +TLI_DEFINE_STRING_INTERNAL("??2@YAPEAX_KAEBUnothrow_t@std@@@Z") + +/// void operator delete(void*); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPAX@Z") + +/// void operator delete(void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32_nothrow) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPAXABUnothrow_t@std@@@Z") + +/// void operator delete(void*, unsigned int); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32_int) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPAXI@Z") + +/// void operator delete(void*); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAX@Z") + +/// void operator delete(void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64_nothrow) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAXAEBUnothrow_t@std@@@Z") + +/// void operator delete(void*, unsigned long long); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64_longlong) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAX_K@Z") + +/// void *new[](unsigned int); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_int) +TLI_DEFINE_STRING_INTERNAL("??_U@YAPAXI@Z") + +/// void *new[](unsigned int, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_int_nothrow) +TLI_DEFINE_STRING_INTERNAL("??_U@YAPAXIABUnothrow_t@std@@@Z") + +/// void *new[](unsigned long long); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_longlong) +TLI_DEFINE_STRING_INTERNAL("??_U@YAPEAX_K@Z") + +/// void *new[](unsigned long long, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_longlong_nothrow) +TLI_DEFINE_STRING_INTERNAL("??_U@YAPEAX_KAEBUnothrow_t@std@@@Z") + +/// void operator delete[](void*); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAX@Z") + +/// void operator delete[](void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32_nothrow) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAXABUnothrow_t@std@@@Z") + +/// void operator delete[](void*, unsigned int); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32_int) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAXI@Z") + +/// void operator delete[](void*); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAX@Z") + +/// void operator delete[](void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64_nothrow) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAXAEBUnothrow_t@std@@@Z") + +/// void operator delete[](void*, unsigned long long); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64_longlong) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAX_K@Z") + /// int _IO_getc(_IO_FILE * __fp); TLI_DEFINE_ENUM_INTERNAL(under_IO_getc) TLI_DEFINE_STRING_INTERNAL("_IO_getc") @@ -406,6 +486,15 @@ TLI_DEFINE_STRING_INTERNAL("floorf") /// long double floorl(long double x); TLI_DEFINE_ENUM_INTERNAL(floorl) TLI_DEFINE_STRING_INTERNAL("floorl") +/// int fls(int i); +TLI_DEFINE_ENUM_INTERNAL(fls) +TLI_DEFINE_STRING_INTERNAL("fls") +/// int flsl(long int i); +TLI_DEFINE_ENUM_INTERNAL(flsl) +TLI_DEFINE_STRING_INTERNAL("flsl") +/// int flsll(long long int i); +TLI_DEFINE_ENUM_INTERNAL(flsll) +TLI_DEFINE_STRING_INTERNAL("flsll") /// double fmax(double x, double y); TLI_DEFINE_ENUM_INTERNAL(fmax) TLI_DEFINE_STRING_INTERNAL("fmax") @@ -664,6 +753,7 @@ TLI_DEFINE_STRING_INTERNAL("modff") /// long double modfl(long double value, long double *iptr); TLI_DEFINE_ENUM_INTERNAL(modfl) TLI_DEFINE_STRING_INTERNAL("modfl") + /// double nearbyint(double x); TLI_DEFINE_ENUM_INTERNAL(nearbyint) TLI_DEFINE_STRING_INTERNAL("nearbyint") diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h index e0a1ee378274..7becdf033dd2 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.h +++ b/include/llvm/Analysis/TargetLibraryInfo.h @@ -42,7 +42,7 @@ class PreservedAnalyses; /// /// This class constructs tables that hold the target library information and /// make it available. However, it is somewhat expensive to compute and only -/// depends on the triple. So users typicaly interact with the \c +/// depends on the triple. So users typically interact with the \c /// TargetLibraryInfo wrapper below. class TargetLibraryInfoImpl { friend class TargetLibraryInfo; @@ -201,13 +201,13 @@ public: } bool isFunctionVectorizable(StringRef F, unsigned VF) const { return Impl->isFunctionVectorizable(F, VF); - }; + } bool isFunctionVectorizable(StringRef F) const { return Impl->isFunctionVectorizable(F); - }; + } StringRef getVectorizedFunction(StringRef F, unsigned VF) const { return Impl->getVectorizedFunction(F, VF); - }; + } /// \brief Tests if the function is both available and a candidate for /// optimized code generation. diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 01f00896410e..3913cc3f107c 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -42,11 +42,13 @@ class Value; /// \brief Information about a load/store intrinsic defined by the target. struct MemIntrinsicInfo { MemIntrinsicInfo() - : ReadMem(false), WriteMem(false), Vol(false), MatchingId(0), + : ReadMem(false), WriteMem(false), IsSimple(false), MatchingId(0), NumMemRefs(0), PtrVal(nullptr) {} bool ReadMem; bool WriteMem; - bool Vol; + /// True only if this memory operation is non-volatile, non-atomic, and + /// unordered. (See LoadInst/StoreInst for details on each) + bool IsSimple; // Same Id is set by the target for corresponding load/store intrinsics. unsigned short MatchingId; int NumMemRefs; @@ -97,11 +99,14 @@ public: /// /// Many APIs in this interface return a cost. This enum defines the /// fundamental values that should be used to interpret (and produce) those - /// costs. The costs are returned as an unsigned rather than a member of this + /// costs. The costs are returned as an int rather than a member of this /// enumeration because it is expected that the cost of one IR instruction /// may have a multiplicative factor to it or otherwise won't fit directly /// into the enum. Moreover, it is common to sum or average costs which works /// better as simple integral values. Thus this enum only provides constants. + /// Also note that the returned costs are signed integers to make it natural + /// to add, subtract, and test with zero (a common boundary condition). It is + /// not expected that 2^32 is a realistic cost to be modeling at any point. /// /// Note that these costs should usually reflect the intersection of code-size /// cost and execution cost. A free instruction is typically one that folds @@ -128,15 +133,15 @@ public: /// /// The returned cost is defined in terms of \c TargetCostConstants, see its /// comments for a detailed explanation of the cost values. - unsigned getOperationCost(unsigned Opcode, Type *Ty, - Type *OpTy = nullptr) const; + int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const; /// \brief Estimate the cost of a GEP operation when lowered. /// /// The contract for this function is the same as \c getOperationCost except /// that it supports an interface that provides extra information specific to /// the GEP operation. - unsigned getGEPCost(const Value *Ptr, ArrayRef Operands) const; + int getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef Operands) const; /// \brief Estimate the cost of a function call when lowered. /// @@ -147,32 +152,31 @@ public: /// This is the most basic query for estimating call cost: it only knows the /// function type and (potentially) the number of arguments at the call site. /// The latter is only interesting for varargs function types. - unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const; + int getCallCost(FunctionType *FTy, int NumArgs = -1) const; /// \brief Estimate the cost of calling a specific function when lowered. /// /// This overload adds the ability to reason about the particular function /// being called in the event it is a library call with special lowering. - unsigned getCallCost(const Function *F, int NumArgs = -1) const; + int getCallCost(const Function *F, int NumArgs = -1) const; /// \brief Estimate the cost of calling a specific function when lowered. /// /// This overload allows specifying a set of candidate argument values. - unsigned getCallCost(const Function *F, + int getCallCost(const Function *F, ArrayRef Arguments) const; + + /// \brief Estimate the cost of an intrinsic when lowered. + /// + /// Mirrors the \c getCallCost method but uses an intrinsic identifier. + int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef ParamTys) const; + + /// \brief Estimate the cost of an intrinsic when lowered. + /// + /// Mirrors the \c getCallCost method but uses an intrinsic identifier. + int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments) const; - /// \brief Estimate the cost of an intrinsic when lowered. - /// - /// Mirrors the \c getCallCost method but uses an intrinsic identifier. - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys) const; - - /// \brief Estimate the cost of an intrinsic when lowered. - /// - /// Mirrors the \c getCallCost method but uses an intrinsic identifier. - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments) const; - /// \brief Estimate the cost of a given IR user when lowered. /// /// This can estimate the cost of either a ConstantExpr or Instruction when @@ -188,7 +192,7 @@ public: /// /// The returned cost is defined in terms of \c TargetCostConstants, see its /// comments for a detailed explanation of the cost values. - unsigned getUserCost(const User *U) const; + int getUserCost(const User *U) const; /// \brief Return true if branch divergence exists. /// @@ -308,12 +312,17 @@ public: bool HasBaseReg, int64_t Scale, unsigned AddrSpace = 0) const; - /// \brief Return true if the target works with masked instruction - /// AVX2 allows masks for consecutive load and store for i32 and i64 elements. - /// AVX-512 architecture will also allow masks for non-consecutive memory - /// accesses. - bool isLegalMaskedStore(Type *DataType, int Consecutive) const; - bool isLegalMaskedLoad(Type *DataType, int Consecutive) const; + /// \brief Return true if the target supports masked load/store + /// AVX2 and AVX-512 targets allow masks for consecutive load and store for + /// 32 and 64 bit elements. + bool isLegalMaskedStore(Type *DataType) const; + bool isLegalMaskedLoad(Type *DataType) const; + + /// \brief Return true if the target supports masked gather/scatter + /// AVX-512 fully supports gather and scatter for vectors with 32 and 64 + /// bits scalar type. + bool isLegalMaskedScatter(Type *DataType) const; + bool isLegalMaskedGather(Type *DataType) const; /// \brief Return the cost of the scaling factor used in the addressing /// mode represented by AM for this target, for a load/store @@ -350,6 +359,9 @@ public: /// \brief Don't restrict interleaved unrolling to small loops. bool enableAggressiveInterleaving(bool LoopHasReductions) const; + /// \brief Enable matching of interleaved access groups. + bool enableInterleavedAccessVectorization() const; + /// \brief Return hardware support for population count. PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; @@ -358,19 +370,19 @@ public: /// \brief Return the expected cost of supporting the floating point operation /// of the specified type. - unsigned getFPOpCost(Type *Ty) const; + int getFPOpCost(Type *Ty) const; /// \brief Return the expected cost of materializing for the given integer /// immediate of the specified type. - unsigned getIntImmCost(const APInt &Imm, Type *Ty) const; + int getIntImmCost(const APInt &Imm, Type *Ty) const; /// \brief Return the expected cost of materialization for the given integer /// immediate of the specified type for a given instruction. The cost can be /// zero if the immediate can be folded into the specified instruction. - unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty) const; - unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty) const; + int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) const; + int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty) const; /// @} /// \name Vector Target Information @@ -410,43 +422,51 @@ public: unsigned getMaxInterleaveFactor(unsigned VF) const; /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc. - unsigned - getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind Opd1Info = OK_AnyValue, - OperandValueKind Opd2Info = OK_AnyValue, - OperandValueProperties Opd1PropInfo = OP_None, - OperandValueProperties Opd2PropInfo = OP_None) const; + int getArithmeticInstrCost( + unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, + OperandValueKind Opd2Info = OK_AnyValue, + OperandValueProperties Opd1PropInfo = OP_None, + OperandValueProperties Opd2PropInfo = OP_None) const; /// \return The cost of a shuffle instruction of kind Kind and of type Tp. /// The index and subtype parameters are used by the subvector insertion and /// extraction shuffle kinds. - unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0, - Type *SubTp = nullptr) const; + int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0, + Type *SubTp = nullptr) const; /// \return The expected cost of cast instructions, such as bitcast, trunc, /// zext, etc. - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const; + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const; /// \return The expected cost of control-flow related instructions such as /// Phi, Ret, Br. - unsigned getCFInstrCost(unsigned Opcode) const; + int getCFInstrCost(unsigned Opcode) const; /// \returns The expected cost of compare and select instructions. - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy = nullptr) const; + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy = nullptr) const; /// \return The expected cost of vector Insert and Extract. /// Use -1 to indicate that there is no information on the index value. - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index = -1) const; + int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const; /// \return The cost of Load and Store instructions. - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const; + int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const; /// \return The cost of masked Load and Store instructions. - unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const; + int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const; + + /// \return The cost of Gather or Scatter operation + /// \p Opcode - is a type of memory access Load or Store + /// \p DataTy - a vector type of the data to be loaded or stored + /// \p Ptr - pointer [or vector of pointers] - address[es] in memory + /// \p VariableMask - true when the memory access is predicated with a mask + /// that is not a compile-time constant + /// \p Alignment - alignment of single element + int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, + bool VariableMask, unsigned Alignment) const; /// \return The cost of the interleaved memory operation. /// \p Opcode is the memory operation code @@ -456,11 +476,9 @@ public: /// load allows gaps) /// \p Alignment is the alignment of the memory operation /// \p AddressSpace is address space of the pointer. - unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, - unsigned Factor, - ArrayRef Indices, - unsigned Alignment, - unsigned AddressSpace) const; + int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, + ArrayRef Indices, unsigned Alignment, + unsigned AddressSpace) const; /// \brief Calculate the cost of performing a vector reduction. /// @@ -475,16 +493,18 @@ public: /// Split: /// (v0, v1, v2, v3) /// ((v0+v2), (v1+v3), undef, undef) - unsigned getReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwiseForm) const; + int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const; - /// \returns The cost of Intrinsic instructions. - unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Tys) const; + /// \returns The cost of Intrinsic instructions. Types analysis only. + int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef Tys) const; + + /// \returns The cost of Intrinsic instructions. Analyses the real arguments. + int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef Args) const; /// \returns The cost of Call instructions. - unsigned getCallInstrCost(Function *F, Type *RetTy, - ArrayRef Tys) const; + int getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys) const; /// \returns The number of pieces into which the provided type must be /// split during legalization. Zero is returned when the answer is unknown. @@ -497,7 +517,7 @@ public: /// The 'IsComplex' parameter is a hint that the address computation is likely /// to involve multiple instructions and as such unlikely to be merged into /// the address indexing mode. - unsigned getAddressComputationCost(Type *Ty, bool IsComplex = false) const; + int getAddressComputationCost(Type *Ty, bool IsComplex = false) const; /// \returns The cost, if any, of keeping values of the given types alive /// over a callsite. @@ -521,8 +541,8 @@ public: /// \returns True if the two functions have compatible attributes for inlining /// purposes. - bool hasCompatibleFunctionAttributes(const Function *Caller, - const Function *Callee) const; + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; /// @} @@ -542,18 +562,18 @@ class TargetTransformInfo::Concept { public: virtual ~Concept() = 0; virtual const DataLayout &getDataLayout() const = 0; - virtual unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0; - virtual unsigned getGEPCost(const Value *Ptr, - ArrayRef Operands) = 0; - virtual unsigned getCallCost(FunctionType *FTy, int NumArgs) = 0; - virtual unsigned getCallCost(const Function *F, int NumArgs) = 0; - virtual unsigned getCallCost(const Function *F, + virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0; + virtual int getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef Operands) = 0; + virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0; + virtual int getCallCost(const Function *F, int NumArgs) = 0; + virtual int getCallCost(const Function *F, + ArrayRef Arguments) = 0; + virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef ParamTys) = 0; + virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments) = 0; - virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys) = 0; - virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments) = 0; - virtual unsigned getUserCost(const User *U) = 0; + virtual int getUserCost(const User *U) = 0; virtual bool hasBranchDivergence() = 0; virtual bool isSourceOfDivergence(const Value *V) = 0; virtual bool isLoweredToCall(const Function *F) = 0; @@ -564,8 +584,10 @@ public: int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) = 0; - virtual bool isLegalMaskedStore(Type *DataType, int Consecutive) = 0; - virtual bool isLegalMaskedLoad(Type *DataType, int Consecutive) = 0; + virtual bool isLegalMaskedStore(Type *DataType) = 0; + virtual bool isLegalMaskedLoad(Type *DataType) = 0; + virtual bool isLegalMaskedScatter(Type *DataType) = 0; + virtual bool isLegalMaskedGather(Type *DataType) = 0; virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) = 0; @@ -576,14 +598,15 @@ public: virtual unsigned getJumpBufSize() = 0; virtual bool shouldBuildLookupTables() = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; + virtual bool enableInterleavedAccessVectorization() = 0; virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; virtual bool haveFastSqrt(Type *Ty) = 0; - virtual unsigned getFPOpCost(Type *Ty) = 0; - virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) = 0; - virtual unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty) = 0; - virtual unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) = 0; + virtual int getFPOpCost(Type *Ty) = 0; + virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0; + virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) = 0; + virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty) = 0; virtual unsigned getNumberOfRegisters(bool Vector) = 0; virtual unsigned getRegisterBitWidth(bool Vector) = 0; virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; @@ -592,40 +615,44 @@ public: OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo) = 0; - virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) = 0; - virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0; - virtual unsigned getCFInstrCost(unsigned Opcode) = 0; - virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) = 0; - virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) = 0; - virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) = 0; - virtual unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, + virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) = 0; + virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0; + virtual int getCFInstrCost(unsigned Opcode) = 0; + virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) = 0; + virtual int getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) = 0; + virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) = 0; + virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) = 0; + virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, + Value *Ptr, bool VariableMask, + unsigned Alignment) = 0; + virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef Indices, unsigned Alignment, unsigned AddressSpace) = 0; - virtual unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, - unsigned Factor, - ArrayRef Indices, - unsigned Alignment, - unsigned AddressSpace) = 0; - virtual unsigned getReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwiseForm) = 0; - virtual unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Tys) = 0; - virtual unsigned getCallInstrCost(Function *F, Type *RetTy, + virtual int getReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm) = 0; + virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys) = 0; + virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef Args) = 0; + virtual int getCallInstrCost(Function *F, Type *RetTy, + ArrayRef Tys) = 0; virtual unsigned getNumberOfParts(Type *Tp) = 0; - virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) = 0; + virtual int getAddressComputationCost(Type *Ty, bool IsComplex) = 0; virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef Tys) = 0; virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) = 0; virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) = 0; - virtual bool hasCompatibleFunctionAttributes(const Function *Caller, - const Function *Callee) const = 0; + virtual bool areInlineCompatible(const Function *Caller, + const Function *Callee) const = 0; }; template @@ -640,32 +667,32 @@ public: return Impl.getDataLayout(); } - unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override { + int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override { return Impl.getOperationCost(Opcode, Ty, OpTy); } - unsigned getGEPCost(const Value *Ptr, - ArrayRef Operands) override { - return Impl.getGEPCost(Ptr, Operands); + int getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef Operands) override { + return Impl.getGEPCost(PointeeType, Ptr, Operands); } - unsigned getCallCost(FunctionType *FTy, int NumArgs) override { + int getCallCost(FunctionType *FTy, int NumArgs) override { return Impl.getCallCost(FTy, NumArgs); } - unsigned getCallCost(const Function *F, int NumArgs) override { + int getCallCost(const Function *F, int NumArgs) override { return Impl.getCallCost(F, NumArgs); } - unsigned getCallCost(const Function *F, - ArrayRef Arguments) override { + int getCallCost(const Function *F, + ArrayRef Arguments) override { return Impl.getCallCost(F, Arguments); } - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys) override { + int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef ParamTys) override { return Impl.getIntrinsicCost(IID, RetTy, ParamTys); } - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments) override { + int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef Arguments) override { return Impl.getIntrinsicCost(IID, RetTy, Arguments); } - unsigned getUserCost(const User *U) override { return Impl.getUserCost(U); } + int getUserCost(const User *U) override { return Impl.getUserCost(U); } bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } bool isSourceOfDivergence(const Value *V) override { return Impl.isSourceOfDivergence(V); @@ -688,11 +715,17 @@ public: return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace); } - bool isLegalMaskedStore(Type *DataType, int Consecutive) override { - return Impl.isLegalMaskedStore(DataType, Consecutive); + bool isLegalMaskedStore(Type *DataType) override { + return Impl.isLegalMaskedStore(DataType); } - bool isLegalMaskedLoad(Type *DataType, int Consecutive) override { - return Impl.isLegalMaskedLoad(DataType, Consecutive); + bool isLegalMaskedLoad(Type *DataType) override { + return Impl.isLegalMaskedLoad(DataType); + } + bool isLegalMaskedScatter(Type *DataType) override { + return Impl.isLegalMaskedScatter(DataType); + } + bool isLegalMaskedGather(Type *DataType) override { + return Impl.isLegalMaskedGather(DataType); } int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, @@ -715,24 +748,25 @@ public: bool enableAggressiveInterleaving(bool LoopHasReductions) override { return Impl.enableAggressiveInterleaving(LoopHasReductions); } + bool enableInterleavedAccessVectorization() override { + return Impl.enableInterleavedAccessVectorization(); + } PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override { return Impl.getPopcntSupport(IntTyWidthInBit); } bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); } - unsigned getFPOpCost(Type *Ty) override { - return Impl.getFPOpCost(Ty); - } + int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); } - unsigned getIntImmCost(const APInt &Imm, Type *Ty) override { + int getIntImmCost(const APInt &Imm, Type *Ty) override { return Impl.getIntImmCost(Imm, Ty); } - unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty) override { + int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) override { return Impl.getIntImmCost(Opc, Idx, Imm, Ty); } - unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty) override { + int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty) override { return Impl.getIntImmCost(IID, Idx, Imm, Ty); } unsigned getNumberOfRegisters(bool Vector) override { @@ -752,56 +786,62 @@ public: return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); } - unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) override { + int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) override { return Impl.getShuffleCost(Kind, Tp, Index, SubTp); } - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override { + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override { return Impl.getCastInstrCost(Opcode, Dst, Src); } - unsigned getCFInstrCost(unsigned Opcode) override { + int getCFInstrCost(unsigned Opcode) override { return Impl.getCFInstrCost(Opcode); } - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) override { + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) override { return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy); } - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) override { + int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override { return Impl.getVectorInstrCost(Opcode, Val, Index); } - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) override { + int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) override { return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); } - unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) override { + int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) override { return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); } - unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, - unsigned Factor, - ArrayRef Indices, - unsigned Alignment, - unsigned AddressSpace) override { + int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, + Value *Ptr, bool VariableMask, + unsigned Alignment) override { + return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, + Alignment); + } + int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, + ArrayRef Indices, unsigned Alignment, + unsigned AddressSpace) override { return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace); } - unsigned getReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwiseForm) override { + int getReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm) override { return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm); } - unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Tys) override { + int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef Tys) override { return Impl.getIntrinsicInstrCost(ID, RetTy, Tys); } - unsigned getCallInstrCost(Function *F, Type *RetTy, - ArrayRef Tys) override { + int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef Args) override { + return Impl.getIntrinsicInstrCost(ID, RetTy, Args); + } + int getCallInstrCost(Function *F, Type *RetTy, + ArrayRef Tys) override { return Impl.getCallInstrCost(F, RetTy, Tys); } unsigned getNumberOfParts(Type *Tp) override { return Impl.getNumberOfParts(Tp); } - unsigned getAddressComputationCost(Type *Ty, bool IsComplex) override { + int getAddressComputationCost(Type *Ty, bool IsComplex) override { return Impl.getAddressComputationCost(Ty, IsComplex); } unsigned getCostOfKeepingLiveOverCall(ArrayRef Tys) override { @@ -815,9 +855,9 @@ public: Type *ExpectedType) override { return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); } - bool hasCompatibleFunctionAttributes(const Function *Caller, - const Function *Callee) const override { - return Impl.hasCompatibleFunctionAttributes(Caller, Callee); + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const override { + return Impl.areInlineCompatible(Caller, Callee); } }; @@ -856,7 +896,7 @@ public: /// /// The callback will be called with a particular function for which the TTI /// is needed and must return a TTI object for that function. - TargetIRAnalysis(std::function TTICallback); + TargetIRAnalysis(std::function TTICallback); // Value semantics. We spell out the constructors for MSVC. TargetIRAnalysis(const TargetIRAnalysis &Arg) @@ -872,7 +912,7 @@ public: return *this; } - Result run(Function &F); + Result run(const Function &F); private: static char PassID; @@ -887,10 +927,10 @@ private: /// the analysis and thus use a function_ref which would be lighter weight. /// This may also be less error prone as the callback is likely to reference /// the external TargetMachine, and that reference needs to never dangle. - std::function TTICallback; + std::function TTICallback; /// \brief Helper function used as the callback in the default constructor. - static Result getDefaultTTI(Function &F); + static Result getDefaultTTI(const Function &F); }; /// \brief Wrapper pass for TargetTransformInfo. @@ -914,7 +954,7 @@ public: explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA); - TargetTransformInfo &getTTI(Function &F); + TargetTransformInfo &getTTI(const Function &F); }; /// \brief Create an analysis pass wrapper around a TTI object. diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index 035cb04870a1..43815234051e 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -19,8 +19,10 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" +#include "llvm/Analysis/VectorUtils.h" namespace llvm { @@ -60,6 +62,14 @@ public: // Otherwise, the default basic cost is used. return TTI::TCC_Basic; + case Instruction::FDiv: + case Instruction::FRem: + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::UDiv: + case Instruction::URem: + return TTI::TCC_Expensive; + case Instruction::IntToPtr: { // An inttoptr cast is free so long as the input is a legal integer type // which doesn't contain values outside the range of a pointer. @@ -92,7 +102,8 @@ public: } } - unsigned getGEPCost(const Value *Ptr, ArrayRef Operands) { + unsigned getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef Operands) { // In the basic model, we just assume that all-constant GEPs will be folded // into their uses via addressing modes. for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) @@ -137,9 +148,6 @@ public: case Intrinsic::objectsize: case Intrinsic::ptr_annotation: case Intrinsic::var_annotation: - case Intrinsic::experimental_gc_result_int: - case Intrinsic::experimental_gc_result_float: - case Intrinsic::experimental_gc_result_ptr: case Intrinsic::experimental_gc_result: case Intrinsic::experimental_gc_relocate: // These intrinsics don't actually represent code after lowering. @@ -199,9 +207,13 @@ public: return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); } - bool isLegalMaskedStore(Type *DataType, int Consecutive) { return false; } + bool isLegalMaskedStore(Type *DataType) { return false; } - bool isLegalMaskedLoad(Type *DataType, int Consecutive) { return false; } + bool isLegalMaskedLoad(Type *DataType) { return false; } + + bool isLegalMaskedScatter(Type *DataType) { return false; } + + bool isLegalMaskedGather(Type *DataType) { return false; } int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { @@ -226,6 +238,8 @@ public: bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } + bool enableInterleavedAccessVectorization() { return false; } + TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) { return TTI::PSK_Software; } @@ -287,6 +301,12 @@ public: return 1; } + unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, + bool VariableMask, + unsigned Alignment) { + return 1; + } + unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, @@ -299,6 +319,10 @@ public: ArrayRef Tys) { return 1; } + unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef Args) { + return 1; + } unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys) { return 1; @@ -321,8 +345,8 @@ public: return nullptr; } - bool hasCompatibleFunctionAttributes(const Function *Caller, - const Function *Callee) const { + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const { return (Caller->getFnAttribute("target-cpu") == Callee->getFnAttribute("target-cpu")) && (Caller->getFnAttribute("target-features") == @@ -378,6 +402,61 @@ public: return static_cast(this)->getCallCost(F, Arguments.size()); } + using BaseT::getGEPCost; + + unsigned getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef Operands) { + const GlobalValue *BaseGV = nullptr; + if (Ptr != nullptr) { + // TODO: will remove this when pointers have an opaque type. + assert(Ptr->getType()->getScalarType()->getPointerElementType() == + PointeeType && + "explicit pointee type doesn't match operand's pointee type"); + BaseGV = dyn_cast(Ptr->stripPointerCasts()); + } + bool HasBaseReg = (BaseGV == nullptr); + int64_t BaseOffset = 0; + int64_t Scale = 0; + + // Assumes the address space is 0 when Ptr is nullptr. + unsigned AS = + (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace()); + auto GTI = gep_type_begin(PointerType::get(PointeeType, AS), Operands); + for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { + // We assume that the cost of Scalar GEP with constant index and the + // cost of Vector GEP with splat constant index are the same. + const ConstantInt *ConstIdx = dyn_cast(*I); + if (!ConstIdx) + if (auto Splat = getSplatValue(*I)) + ConstIdx = dyn_cast(Splat); + if (isa(*GTI)) { + int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); + if (ConstIdx) + BaseOffset += ConstIdx->getSExtValue() * ElementSize; + else { + // Needs scale register. + if (Scale != 0) + // No addressing mode takes two scale registers. + return TTI::TCC_Basic; + Scale = ElementSize; + } + } else { + StructType *STy = cast(*GTI); + // For structures the index is always splat or scalar constant + assert(ConstIdx && "Unexpected GEP index"); + uint64_t Field = ConstIdx->getZExtValue(); + BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); + } + } + + if (static_cast(this)->isLegalAddressingMode( + PointerType::get(*GTI, AS), const_cast(BaseGV), + BaseOffset, HasBaseReg, Scale, AS)) { + return TTI::TCC_Free; + } + return TTI::TCC_Basic; + } + using BaseT::getIntrinsicCost; unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, @@ -397,9 +476,9 @@ public: return TTI::TCC_Free; // Model all PHI nodes as free. if (const GEPOperator *GEP = dyn_cast(U)) { - SmallVector Indices(GEP->idx_begin(), GEP->idx_end()); - return static_cast(this) - ->getGEPCost(GEP->getPointerOperand(), Indices); + SmallVector Indices(GEP->idx_begin(), GEP->idx_end()); + return static_cast(this)->getGEPCost( + GEP->getSourceElementType(), GEP->getPointerOperand(), Indices); } if (auto CS = ImmutableCallSite(U)) { diff --git a/include/llvm/Analysis/TypeBasedAliasAnalysis.h b/include/llvm/Analysis/TypeBasedAliasAnalysis.h new file mode 100644 index 000000000000..7b44ac73f1fa --- /dev/null +++ b/include/llvm/Analysis/TypeBasedAliasAnalysis.h @@ -0,0 +1,93 @@ +//===- TypeBasedAliasAnalysis.h - Type-Based Alias Analysis -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the interface for a metadata-based TBAA. See the source file for +/// details on the algorithm. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_TYPEBASEDALIASANALYSIS_H +#define LLVM_ANALYSIS_TYPEBASEDALIASANALYSIS_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Pass.h" + +namespace llvm { + +/// A simple AA result that uses TBAA metadata to answer queries. +class TypeBasedAAResult : public AAResultBase { + friend AAResultBase; + +public: + explicit TypeBasedAAResult(const TargetLibraryInfo &TLI) + : AAResultBase(TLI) {} + TypeBasedAAResult(TypeBasedAAResult &&Arg) : AAResultBase(std::move(Arg)) {} + + /// Handle invalidation events from the new pass manager. + /// + /// By definition, this result is stateless and so remains valid. + bool invalidate(Function &, const PreservedAnalyses &) { return false; } + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal); + FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS); + FunctionModRefBehavior getModRefBehavior(const Function *F); + ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc); + ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2); + +private: + bool Aliases(const MDNode *A, const MDNode *B) const; + bool PathAliases(const MDNode *A, const MDNode *B) const; +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +class TypeBasedAA { +public: + typedef TypeBasedAAResult Result; + + /// \brief Opaque, unique identifier for this analysis pass. + static void *ID() { return (void *)&PassID; } + + TypeBasedAAResult run(Function &F, AnalysisManager *AM); + + /// \brief Provide access to a name for this pass for debugging purposes. + static StringRef name() { return "TypeBasedAA"; } + +private: + static char PassID; +}; + +/// Legacy wrapper pass to provide the TypeBasedAAResult object. +class TypeBasedAAWrapperPass : public ImmutablePass { + std::unique_ptr Result; + +public: + static char ID; + + TypeBasedAAWrapperPass(); + + TypeBasedAAResult &getResult() { return *Result; } + const TypeBasedAAResult &getResult() const { return *Result; } + + bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +//===--------------------------------------------------------------------===// +// +// createTypeBasedAAWrapperPass - This pass implements metadata-based +// type-based alias analysis. +// +ImmutablePass *createTypeBasedAAWrapperPass(); +} + +#endif diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index 653821d02271..8e0291068472 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -16,20 +16,23 @@ #define LLVM_ANALYSIS_VALUETRACKING_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Instruction.h" #include "llvm/Support/DataTypes.h" namespace llvm { - class Value; - class Instruction; class APInt; - class DataLayout; - class StringRef; - class MDNode; + class AddOperator; class AssumptionCache; + class DataLayout; class DominatorTree; - class TargetLibraryInfo; + class Instruction; + class Loop; class LoopInfo; + class MDNode; + class StringRef; + class TargetLibraryInfo; + class Value; /// Determine which bits of V are known to be either zero or one and return /// them in the KnownZero/KnownOne bit sets. @@ -46,9 +49,10 @@ namespace llvm { const DominatorTree *DT = nullptr); /// Compute known bits from the range metadata. /// \p KnownZero the set of bits that are known to be zero + /// \p KnownOne the set of bits that are known to be one void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, - APInt &KnownZero); - /// Returns true if LHS and RHS have no common bits set. + APInt &KnownZero, APInt &KnownOne); + /// Return true if LHS and RHS have no common bits set. bool haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, @@ -66,7 +70,7 @@ namespace llvm { /// exactly one bit set when defined. For vectors return true if every /// element is known to be a power of two when defined. Supports values with /// integer or pointer type and vectors of integers. If 'OrZero' is set then - /// returns true if the given value is either a power of two or zero. + /// return true if the given value is either a power of two or zero. bool isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero = false, unsigned Depth = 0, AssumptionCache *AC = nullptr, @@ -82,6 +86,19 @@ namespace llvm { const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); + /// Returns true if the give value is known to be non-negative. + bool isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); + + /// isKnownNonEqual - Return true if the given values are known to be + /// non-equal when defined. Supports scalar integer types only. + bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); + /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use /// this predicate to simplify operations downstream. Mask is known to be /// zero for bits that V cannot have. @@ -118,12 +135,12 @@ namespace llvm { bool LookThroughSExt = false, unsigned Depth = 0); - /// CannotBeNegativeZero - Return true if we can prove that the specified FP + /// CannotBeNegativeZero - Return true if we can prove that the specified FP /// value is never equal to -0.0. /// bool CannotBeNegativeZero(const Value *V, unsigned Depth = 0); - /// CannotBeOrderedLessThanZero - Return true if we can prove that the + /// CannotBeOrderedLessThanZero - Return true if we can prove that the /// specified FP value is either a NaN or never less than 0.0. /// bool CannotBeOrderedLessThanZero(const Value *V, unsigned Depth = 0); @@ -134,7 +151,7 @@ namespace llvm { /// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated /// byte store (e.g. i16 0x1234), return null. Value *isBytewiseValue(Value *V); - + /// FindInsertedValue - Given an aggregrate and an sequence of indices, see if /// the scalar value indexed is already around as a register, for example if /// it were inserted directly into the aggregrate. @@ -156,7 +173,7 @@ namespace llvm { return GetPointerBaseWithConstantOffset(const_cast(Ptr), Offset, DL); } - + /// getConstantStringInfo - This function computes the length of a /// null-terminated C string pointed to by V. If successful, it returns true /// and returns the string in Str. If unsuccessful, it returns false. This @@ -227,7 +244,17 @@ namespace llvm { const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr, const TargetLibraryInfo *TLI = nullptr); - + + /// Returns true if V is always a dereferenceable pointer with alignment + /// greater or equal than requested. If the context instruction is specified + /// performs context-sensitive analysis and returns true if the pointer is + /// dereferenceable at the specified instruction. + bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, + const DataLayout &DL, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); + /// isSafeToSpeculativelyExecute - Return true if the instruction does not /// have any effects besides calculating the result and does not have /// undefined behavior. @@ -257,6 +284,16 @@ namespace llvm { const DominatorTree *DT = nullptr, const TargetLibraryInfo *TLI = nullptr); + /// Returns true if the result or effects of the given instructions \p I + /// depend on or influence global memory. + /// Memory dependence arises for example if the instruction reads from + /// memory or may produce effects or undefined behaviour. Memory dependent + /// instructions generally cannot be reorderd with respect to other memory + /// dependent instructions or moved into non-dominated basic blocks. + /// Instructions which just compute a value based on the values of their + /// operands are not memory dependent. + bool mayBeMemoryDependent(const Instruction &I); + /// isKnownNonNull - Return true if this pointer couldn't possibly be null by /// its definition. This returns true for allocas, non-extern-weak globals /// and byval arguments. @@ -288,16 +325,98 @@ namespace llvm { AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT); - + OverflowResult computeOverflowForSignedAdd(Value *LHS, Value *RHS, + const DataLayout &DL, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); + /// This version also leverages the sign bit of Add if known. + OverflowResult computeOverflowForSignedAdd(AddOperator *Add, + const DataLayout &DL, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); + + /// Return true if this function can prove that the instruction I will + /// always transfer execution to one of its successors (including the next + /// instruction that follows within a basic block). E.g. this is not + /// guaranteed for function calls that could loop infinitely. + /// + /// In other words, this function returns false for instructions that may + /// transfer execution or fail to transfer execution in a way that is not + /// captured in the CFG nor in the sequence of instructions within a basic + /// block. + /// + /// Undefined behavior is assumed not to happen, so e.g. division is + /// guaranteed to transfer execution to the following instruction even + /// though division by zero might cause undefined behavior. + bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I); + + /// Return true if this function can prove that the instruction I + /// is executed for every iteration of the loop L. + /// + /// Note that this currently only considers the loop header. + bool isGuaranteedToExecuteForEveryIteration(const Instruction *I, + const Loop *L); + + /// Return true if this function can prove that I is guaranteed to yield + /// full-poison (all bits poison) if at least one of its operands are + /// full-poison (all bits poison). + /// + /// The exact rules for how poison propagates through instructions have + /// not been settled as of 2015-07-10, so this function is conservative + /// and only considers poison to be propagated in uncontroversial + /// cases. There is no attempt to track values that may be only partially + /// poison. + bool propagatesFullPoison(const Instruction *I); + + /// Return either nullptr or an operand of I such that I will trigger + /// undefined behavior if I is executed and that operand has a full-poison + /// value (all bits poison). + const Value *getGuaranteedNonFullPoisonOp(const Instruction *I); + + /// Return true if this function can prove that if PoisonI is executed + /// and yields a full-poison value (all bits poison), then that will + /// trigger undefined behavior. + /// + /// Note that this currently only considers the basic block that is + /// the parent of I. + bool isKnownNotFullPoison(const Instruction *PoisonI); + /// \brief Specific patterns of select instructions we can match. enum SelectPatternFlavor { SPF_UNKNOWN = 0, - SPF_SMIN, // Signed minimum - SPF_UMIN, // Unsigned minimum - SPF_SMAX, // Signed maximum - SPF_UMAX, // Unsigned maximum - SPF_ABS, // Absolute value - SPF_NABS // Negated absolute value + SPF_SMIN, /// Signed minimum + SPF_UMIN, /// Unsigned minimum + SPF_SMAX, /// Signed maximum + SPF_UMAX, /// Unsigned maximum + SPF_FMINNUM, /// Floating point minnum + SPF_FMAXNUM, /// Floating point maxnum + SPF_ABS, /// Absolute value + SPF_NABS /// Negated absolute value + }; + /// \brief Behavior when a floating point min/max is given one NaN and one + /// non-NaN as input. + enum SelectPatternNaNBehavior { + SPNB_NA = 0, /// NaN behavior not applicable. + SPNB_RETURNS_NAN, /// Given one NaN input, returns the NaN. + SPNB_RETURNS_OTHER, /// Given one NaN input, returns the non-NaN. + SPNB_RETURNS_ANY /// Given one NaN input, can return either (or + /// it has been determined that no operands can + /// be NaN). + }; + struct SelectPatternResult { + SelectPatternFlavor Flavor; + SelectPatternNaNBehavior NaNBehavior; /// Only applicable if Flavor is + /// SPF_FMINNUM or SPF_FMAXNUM. + bool Ordered; /// When implementing this min/max pattern as + /// fcmp; select, does the fcmp have to be + /// ordered? + + /// \brief Return true if \p SPF is a min or a max pattern. + static bool isMinOrMax(SelectPatternFlavor SPF) { + return !(SPF == SPF_UNKNOWN || SPF == SPF_ABS || SPF == SPF_NABS); + } }; /// Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind /// and providing the out parameter results if we successfully match. @@ -314,9 +433,26 @@ namespace llvm { /// /// -> LHS = %a, RHS = i32 4, *CastOp = Instruction::SExt /// - SelectPatternFlavor matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, + SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp = nullptr); + /// Parse out a conservative ConstantRange from !range metadata. + /// + /// E.g. if RangeMD is !{i32 0, i32 10, i32 15, i32 20} then return [0, 20). + ConstantRange getConstantRangeFromMetadata(MDNode &RangeMD); + + /// Return true if RHS is known to be implied by LHS. A & B must be i1 + /// (boolean) values or a vector of such values. Note that the truth table for + /// implication is the same as <=u on i1 values (but not <=s!). The truth + /// table for both is: + /// | T | F (B) + /// T | T | F + /// F | T | T + /// (A) + bool isImpliedCondition(Value *LHS, Value *RHS, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); } // end namespace llvm #endif diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h index d8e9ca42e623..531803adf5e4 100644 --- a/include/llvm/Analysis/VectorUtils.h +++ b/include/llvm/Analysis/VectorUtils.h @@ -14,15 +14,19 @@ #ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H #define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/MapVector.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" namespace llvm { +struct DemandedBits; class GetElementPtrInst; class Loop; class ScalarEvolution; +class TargetTransformInfo; class Type; class Value; @@ -62,8 +66,8 @@ Intrinsic::ID getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI); /// pointer. unsigned getGEPInductionOperand(const GetElementPtrInst *Gep); -/// \brief If the argument is a GEP, then returns the operand identified by -/// getGEPInductionOperand. However, if there is some other non-loop-invariant +/// \brief If the argument is a GEP, then returns the operand identified by +/// getGEPInductionOperand. However, if there is some other non-loop-invariant /// operand, it returns that instead. Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp); @@ -79,6 +83,50 @@ Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp); /// from the vector. Value *findScalarElement(Value *V, unsigned EltNo); +/// \brief Get splat value if the input is a splat vector or return nullptr. +/// The value may be extracted from a splat constants vector or from +/// a sequence of instructions that broadcast a single value into a vector. +const Value *getSplatValue(const Value *V); + +/// \brief Compute a map of integer instructions to their minimum legal type +/// size. +/// +/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int +/// type (e.g. i32) whenever arithmetic is performed on them. +/// +/// For targets with native i8 or i16 operations, usually InstCombine can shrink +/// the arithmetic type down again. However InstCombine refuses to create +/// illegal types, so for targets without i8 or i16 registers, the lengthening +/// and shrinking remains. +/// +/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when +/// their scalar equivalents do not, so during vectorization it is important to +/// remove these lengthens and truncates when deciding the profitability of +/// vectorization. +/// +/// This function analyzes the given range of instructions and determines the +/// minimum type size each can be converted to. It attempts to remove or +/// minimize type size changes across each def-use chain, so for example in the +/// following code: +/// +/// %1 = load i8, i8* +/// %2 = add i8 %1, 2 +/// %3 = load i16, i16* +/// %4 = zext i8 %2 to i32 +/// %5 = zext i16 %3 to i32 +/// %6 = add i32 %4, %5 +/// %7 = trunc i32 %6 to i16 +/// +/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes +/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}. +/// +/// If the optional TargetTransformInfo is provided, this function tries harder +/// to do less work by only looking at illegal types. +MapVector +computeMinimumValueSizes(ArrayRef Blocks, + DemandedBits &DB, + const TargetTransformInfo *TTI=nullptr); + } // llvm namespace #endif diff --git a/include/llvm/AsmParser/Parser.h b/include/llvm/AsmParser/Parser.h index 52151409f946..96a15c1ec45c 100644 --- a/include/llvm/AsmParser/Parser.h +++ b/include/llvm/AsmParser/Parser.h @@ -18,6 +18,7 @@ namespace llvm { +class Constant; class LLVMContext; class Module; struct SlotMapping; @@ -79,6 +80,17 @@ std::unique_ptr parseAssembly(MemoryBufferRef F, SMDiagnostic &Err, bool parseAssemblyInto(MemoryBufferRef F, Module &M, SMDiagnostic &Err, SlotMapping *Slots = nullptr); +/// Parse a type and a constant value in the given string. +/// +/// The constant value can be any LLVM constant, including a constant +/// expression. +/// +/// \param Slots The optional slot mapping that will restore the parsing state +/// of the module. +/// \return null on error. +Constant *parseConstantValue(StringRef Asm, SMDiagnostic &Err, const Module &M, + const SlotMapping *Slots = nullptr); + } // End llvm namespace #endif diff --git a/include/llvm/AsmParser/SlotMapping.h b/include/llvm/AsmParser/SlotMapping.h index c5f61d25c3a8..bd7e8fcad8bc 100644 --- a/include/llvm/AsmParser/SlotMapping.h +++ b/include/llvm/AsmParser/SlotMapping.h @@ -14,6 +14,7 @@ #ifndef LLVM_ASMPARSER_SLOTMAPPING_H #define LLVM_ASMPARSER_SLOTMAPPING_H +#include "llvm/ADT/StringMap.h" #include "llvm/IR/TrackingMDRef.h" #include #include @@ -21,12 +22,19 @@ namespace llvm { class GlobalValue; +class Type; -/// This struct contains the mapping from the slot numbers to unnamed metadata -/// nodes and global values. +/// This struct contains the mappings from the slot numbers to unnamed metadata +/// nodes, global values and types. It also contains the mapping for the named +/// types. +/// It can be used to save the parsing state of an LLVM IR module so that the +/// textual references to the values in the module can be parsed outside of the +/// module's source. struct SlotMapping { std::vector GlobalValues; std::map MetadataNodes; + StringMap NamedTypes; + std::map Types; }; } // end namespace llvm diff --git a/include/llvm/Bitcode/BitcodeWriterPass.h b/include/llvm/Bitcode/BitcodeWriterPass.h index ae915c688ba0..a1272cf156e5 100644 --- a/include/llvm/Bitcode/BitcodeWriterPass.h +++ b/include/llvm/Bitcode/BitcodeWriterPass.h @@ -29,8 +29,12 @@ class PreservedAnalyses; /// /// If \c ShouldPreserveUseListOrder, encode use-list order so it can be /// reproduced when deserialized. +/// +/// If \c EmitFunctionSummary, emit the function summary index (currently +/// for use in ThinLTO optimization). ModulePass *createBitcodeWriterPass(raw_ostream &Str, - bool ShouldPreserveUseListOrder = false); + bool ShouldPreserveUseListOrder = false, + bool EmitFunctionSummary = false); /// \brief Pass for writing a module of IR out to a bitcode file. /// @@ -39,15 +43,21 @@ ModulePass *createBitcodeWriterPass(raw_ostream &Str, class BitcodeWriterPass { raw_ostream &OS; bool ShouldPreserveUseListOrder; + bool EmitFunctionSummary; public: /// \brief Construct a bitcode writer pass around a particular output stream. /// /// If \c ShouldPreserveUseListOrder, encode use-list order so it can be /// reproduced when deserialized. + /// + /// If \c EmitFunctionSummary, emit the function summary index (currently + /// for use in ThinLTO optimization). explicit BitcodeWriterPass(raw_ostream &OS, - bool ShouldPreserveUseListOrder = false) - : OS(OS), ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {} + bool ShouldPreserveUseListOrder = false, + bool EmitFunctionSummary = false) + : OS(OS), ShouldPreserveUseListOrder(ShouldPreserveUseListOrder), + EmitFunctionSummary(EmitFunctionSummary) {} /// \brief Run the bitcode writer pass, and output the module to the selected /// output stream. diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h index 4c040a7f3e22..c0cf6cde887f 100644 --- a/include/llvm/Bitcode/BitstreamReader.h +++ b/include/llvm/Bitcode/BitstreamReader.h @@ -325,6 +325,8 @@ public: // If we run out of data, stop at the end of the stream. if (BytesRead == 0) { + CurWord = 0; + BitsInCurWord = 0; Size = NextChar; return; } diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h index 9f23023a1419..438f4a6fb69b 100644 --- a/include/llvm/Bitcode/BitstreamWriter.h +++ b/include/llvm/Bitcode/BitstreamWriter.h @@ -15,6 +15,8 @@ #ifndef LLVM_BITCODE_BITSTREAMWRITER_H #define LLVM_BITCODE_BITSTREAMWRITER_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Bitcode/BitCodes.h" @@ -45,9 +47,9 @@ class BitstreamWriter { struct Block { unsigned PrevCodeSize; - unsigned StartSizeWord; + size_t StartSizeWord; std::vector> PrevAbbrevs; - Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {} + Block(unsigned PCS, size_t SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {} }; /// BlockScope - This tracks the current blocks that we have entered. @@ -61,12 +63,6 @@ class BitstreamWriter { }; std::vector BlockInfoRecords; - // BackpatchWord - Backpatch a 32-bit word in the output with the specified - // value. - void BackpatchWord(unsigned ByteNo, unsigned NewWord) { - support::endian::write32le(&Out[ByteNo], NewWord); - } - void WriteByte(unsigned char Value) { Out.push_back(Value); } @@ -77,12 +73,10 @@ class BitstreamWriter { reinterpret_cast(&Value + 1)); } - unsigned GetBufferOffset() const { - return Out.size(); - } + size_t GetBufferOffset() const { return Out.size(); } - unsigned GetWordIndex() const { - unsigned Offset = GetBufferOffset(); + size_t GetWordIndex() const { + size_t Offset = GetBufferOffset(); assert((Offset & 3) == 0 && "Not 32-bit aligned"); return Offset / 4; } @@ -99,10 +93,25 @@ public: /// \brief Retrieve the current position in the stream, in bits. uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; } + /// \brief Retrieve the number of bits currently used to encode an abbrev ID. + unsigned GetAbbrevIDWidth() const { return CurCodeSize; } + //===--------------------------------------------------------------------===// // Basic Primitives for emitting bits to the stream. //===--------------------------------------------------------------------===// + /// Backpatch a 32-bit word in the output at the given bit offset + /// with the specified value. + void BackpatchWord(uint64_t BitNo, unsigned NewWord) { + using namespace llvm::support; + unsigned ByteNo = BitNo / 8; + assert((!endian::readAtBitAlignment( + &Out[ByteNo], BitNo & 7)) && + "Expected to be patching over 0-value placeholders"); + endian::writeAtBitAlignment( + &Out[ByteNo], NewWord, BitNo & 7); + } + void Emit(uint32_t Val, unsigned NumBits) { assert(NumBits && NumBits <= 32 && "Invalid value size!"); assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!"); @@ -200,7 +209,7 @@ public: EmitVBR(CodeLen, bitc::CodeLenWidth); FlushToWord(); - unsigned BlockSizeWordIndex = GetWordIndex(); + size_t BlockSizeWordIndex = GetWordIndex(); unsigned OldCodeSize = CurCodeSize; // Emit a placeholder, which will be replaced when the block is popped. @@ -231,11 +240,11 @@ public: FlushToWord(); // Compute the size of the block, in words, not counting the size field. - unsigned SizeInWords = GetWordIndex() - B.StartSizeWord - 1; - unsigned ByteNo = B.StartSizeWord*4; + size_t SizeInWords = GetWordIndex() - B.StartSizeWord - 1; + uint64_t BitNo = uint64_t(B.StartSizeWord) * 32; // Update the block size field in the header of this sub-block. - BackpatchWord(ByteNo, SizeInWords); + BackpatchWord(BitNo, SizeInWords); // Restore the inner block's code size and abbrev table. CurCodeSize = B.PrevCodeSize; @@ -285,10 +294,12 @@ private: /// EmitRecordWithAbbrevImpl - This is the core implementation of the record /// emission code. If BlobData is non-null, then it specifies an array of /// data that should be emitted as part of the Blob or Array operand that is - /// known to exist at the end of the record. - template - void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl &Vals, - StringRef Blob) { + /// known to exist at the end of the record. If Code is specified, then + /// it is the record code to emit before the Vals, which must not contain + /// the code. + template + void EmitRecordWithAbbrevImpl(unsigned Abbrev, ArrayRef Vals, + StringRef Blob, Optional Code) { const char *BlobData = Blob.data(); unsigned BlobLen = (unsigned) Blob.size(); unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV; @@ -297,9 +308,23 @@ private: EmitCode(Abbrev); + unsigned i = 0, e = static_cast(Abbv->getNumOperandInfos()); + if (Code) { + assert(e && "Expected non-empty abbreviation"); + const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i++); + + if (Op.isLiteral()) + EmitAbbreviatedLiteral(Op, Code.getValue()); + else { + assert(Op.getEncoding() != BitCodeAbbrevOp::Array && + Op.getEncoding() != BitCodeAbbrevOp::Blob && + "Expected literal or scalar"); + EmitAbbreviatedField(Op, Code.getValue()); + } + } + unsigned RecordIdx = 0; - for (unsigned i = 0, e = static_cast(Abbv->getNumOperandInfos()); - i != e; ++i) { + for (; i != e; ++i) { const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); if (Op.isLiteral()) { assert(RecordIdx < Vals.size() && "Invalid abbrev/record"); @@ -307,7 +332,7 @@ private: ++RecordIdx; } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) { // Array case. - assert(i+2 == e && "array op not second to last?"); + assert(i + 2 == e && "array op not second to last?"); const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); // If this record has blob data, emit it, otherwise we must have record @@ -381,32 +406,29 @@ public: /// EmitRecord - Emit the specified record to the stream, using an abbrev if /// we have one to compress the output. - template - void EmitRecord(unsigned Code, SmallVectorImpl &Vals, - unsigned Abbrev = 0) { + template + void EmitRecord(unsigned Code, const Container &Vals, unsigned Abbrev = 0) { if (!Abbrev) { // If we don't have an abbrev to use, emit this in its fully unabbreviated // form. + auto Count = static_cast(makeArrayRef(Vals).size()); EmitCode(bitc::UNABBREV_RECORD); EmitVBR(Code, 6); - EmitVBR(static_cast(Vals.size()), 6); - for (unsigned i = 0, e = static_cast(Vals.size()); i != e; ++i) + EmitVBR(Count, 6); + for (unsigned i = 0, e = Count; i != e; ++i) EmitVBR64(Vals[i], 6); return; } - // Insert the code into Vals to treat it uniformly. - Vals.insert(Vals.begin(), Code); - - EmitRecordWithAbbrev(Abbrev, Vals); + EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), StringRef(), Code); } /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation. /// Unlike EmitRecord, the code for the record should be included in Vals as /// the first entry. - template - void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl &Vals) { - EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef()); + template + void EmitRecordWithAbbrev(unsigned Abbrev, const Container &Vals) { + EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), StringRef(), None); } /// EmitRecordWithBlob - Emit the specified record to the stream, using an @@ -414,29 +436,30 @@ public: /// specified by the pointer and length specified at the end. In contrast to /// EmitRecord, this routine expects that the first entry in Vals is the code /// of the record. - template - void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl &Vals, + template + void EmitRecordWithBlob(unsigned Abbrev, const Container &Vals, StringRef Blob) { - EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob); + EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), Blob, None); } - template - void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl &Vals, + template + void EmitRecordWithBlob(unsigned Abbrev, const Container &Vals, const char *BlobData, unsigned BlobLen) { - return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(BlobData, BlobLen)); + return EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), + StringRef(BlobData, BlobLen), None); } /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records /// that end with an array. - template - void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl &Vals, - StringRef Array) { - EmitRecordWithAbbrevImpl(Abbrev, Vals, Array); + template + void EmitRecordWithArray(unsigned Abbrev, const Container &Vals, + StringRef Array) { + EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), Array, None); } - template - void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl &Vals, - const char *ArrayData, unsigned ArrayLen) { - return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData, - ArrayLen)); + template + void EmitRecordWithArray(unsigned Abbrev, const Container &Vals, + const char *ArrayData, unsigned ArrayLen) { + return EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), + StringRef(ArrayData, ArrayLen), None); } //===--------------------------------------------------------------------===// diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index 7130ee755237..bcc84bedbed0 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -23,28 +23,52 @@ namespace llvm { namespace bitc { // The only top-level block type defined is for a module. - enum BlockIDs { - // Blocks - MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID, +enum BlockIDs { + // Blocks + MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID, - // Module sub-block id's. - PARAMATTR_BLOCK_ID, - PARAMATTR_GROUP_BLOCK_ID, + // Module sub-block id's. + PARAMATTR_BLOCK_ID, + PARAMATTR_GROUP_BLOCK_ID, - CONSTANTS_BLOCK_ID, - FUNCTION_BLOCK_ID, + CONSTANTS_BLOCK_ID, + FUNCTION_BLOCK_ID, - UNUSED_ID1, + // Block intended to contains information on the bitcode versioning. + // Can be used to provide better error messages when we fail to parse a + // bitcode file. + IDENTIFICATION_BLOCK_ID, - VALUE_SYMTAB_BLOCK_ID, - METADATA_BLOCK_ID, - METADATA_ATTACHMENT_ID, + VALUE_SYMTAB_BLOCK_ID, + METADATA_BLOCK_ID, + METADATA_ATTACHMENT_ID, - TYPE_BLOCK_ID_NEW, + TYPE_BLOCK_ID_NEW, - USELIST_BLOCK_ID - }; + USELIST_BLOCK_ID, + MODULE_STRTAB_BLOCK_ID, + FUNCTION_SUMMARY_BLOCK_ID, + + OPERAND_BUNDLE_TAGS_BLOCK_ID, + + METADATA_KIND_BLOCK_ID +}; + +/// Identification block contains a string that describes the producer details, +/// and an epoch that defines the auto-upgrade capability. +enum IdentificationCodes { + IDENTIFICATION_CODE_STRING = 1, // IDENTIFICATION: [strchr x N] + IDENTIFICATION_CODE_EPOCH = 2, // EPOCH: [epoch#] +}; + +/// The epoch that defines the auto-upgrade compatibility for the bitcode. +/// +/// LLVM guarantees in a major release that a minor release can read bitcode +/// generated by previous minor releases. We translate this by making the reader +/// accepting only bitcode with the same epoch, except for the X.0 release which +/// also accepts N-1. +enum { BITCODE_CURRENT_EPOCH = 0 }; /// MODULE blocks have a number of optional fields and subblocks. enum ModuleCodes { @@ -66,13 +90,21 @@ namespace bitc { MODULE_CODE_FUNCTION = 8, // ALIAS: [alias type, aliasee val#, linkage, visibility] - MODULE_CODE_ALIAS = 9, + MODULE_CODE_ALIAS_OLD = 9, // MODULE_CODE_PURGEVALS: [numvals] MODULE_CODE_PURGEVALS = 10, MODULE_CODE_GCNAME = 11, // GCNAME: [strchr x N] MODULE_CODE_COMDAT = 12, // COMDAT: [selection_kind, name] + + MODULE_CODE_VSTOFFSET = 13, // VSTOFFSET: [offset] + + // ALIAS: [alias value type, addrspace, aliasee val#, linkage, visibility] + MODULE_CODE_ALIAS = 14, + + // METADATA_VALUES: [numvals] + MODULE_CODE_METADATA_VALUES = 15, }; /// PARAMATTR blocks have code for defining a parameter attribute set. @@ -121,7 +153,13 @@ namespace bitc { TYPE_CODE_STRUCT_NAME = 19, // STRUCT_NAME: [strchr x N] TYPE_CODE_STRUCT_NAMED = 20,// STRUCT_NAMED: [ispacked, eltty x N] - TYPE_CODE_FUNCTION = 21 // FUNCTION: [vararg, retty, paramty x N] + TYPE_CODE_FUNCTION = 21, // FUNCTION: [vararg, retty, paramty x N] + + TYPE_CODE_TOKEN = 22 // TOKEN + }; + + enum OperandBundleTagCode { + OPERAND_BUNDLE_TAG = 1, // TAG: [strchr x N] }; // The type symbol table only has one code (TST_ENTRY_CODE). @@ -129,10 +167,25 @@ namespace bitc { TST_CODE_ENTRY = 1 // TST_ENTRY: [typeid, namechar x N] }; - // The value symbol table only has one code (VST_ENTRY_CODE). + // Value symbol table codes. enum ValueSymtabCodes { - VST_CODE_ENTRY = 1, // VST_ENTRY: [valid, namechar x N] - VST_CODE_BBENTRY = 2 // VST_BBENTRY: [bbid, namechar x N] + VST_CODE_ENTRY = 1, // VST_ENTRY: [valueid, namechar x N] + VST_CODE_BBENTRY = 2, // VST_BBENTRY: [bbid, namechar x N] + VST_CODE_FNENTRY = 3, // VST_FNENTRY: [valueid, offset, namechar x N] + // VST_COMBINED_FNENTRY: [offset, namechar x N] + VST_CODE_COMBINED_FNENTRY = 4 + }; + + // The module path symbol table only has one code (MST_CODE_ENTRY). + enum ModulePathSymtabCodes { + MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, namechar x N] + }; + + // The function summary section uses different codes in the per-module + // and combined index cases. + enum FunctionSummarySymtabCodes { + FS_CODE_PERMODULE_ENTRY = 1, // FS_ENTRY: [valueid, islocal, instcount] + FS_CODE_COMBINED_ENTRY = 2, // FS_ENTRY: [modid, instcount] }; enum MetadataCodes { @@ -167,7 +220,9 @@ namespace bitc { METADATA_EXPRESSION = 29, // [distinct, n x element] METADATA_OBJC_PROPERTY = 30, // [distinct, name, file, line, ...] METADATA_IMPORTED_ENTITY=31, // [distinct, tag, scope, entity, line, name] - METADATA_MODULE=32, // [distinct, scope, name, ...] + METADATA_MODULE = 32, // [distinct, scope, name, ...] + METADATA_MACRO = 33, // [distinct, macinfo, line, name, value] + METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...] }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each @@ -287,6 +342,16 @@ namespace bitc { SYNCHSCOPE_CROSSTHREAD = 1 }; + /// Markers and flags for call instruction. + enum CallMarkersFlags { + CALL_TAIL = 0, + CALL_CCONV = 1, + CALL_MUSTTAIL = 14, + CALL_EXPLICIT_TYPE = 15, + CALL_NOTAIL = 16, + CALL_FMF = 17 // Call has optional fast-math-flags. + }; + // The function body block (FUNCTION_BLOCK_ID) describes function bodies. It // can contain a constant block (CONSTANTS_BLOCK_ID). enum FunctionCodes { @@ -354,6 +419,14 @@ namespace bitc { FUNC_CODE_INST_CMPXCHG = 46, // CMPXCHG: [ptrty,ptr,valty,cmp,new, align, // vol,ordering,synchscope] FUNC_CODE_INST_LANDINGPAD = 47, // LANDINGPAD: [ty,val,num,id0,val0...] + FUNC_CODE_INST_CLEANUPRET = 48, // CLEANUPRET: [val] or [val,bb#] + FUNC_CODE_INST_CATCHRET = 49, // CATCHRET: [val,bb#] + FUNC_CODE_INST_CATCHPAD = 50, // CATCHPAD: [bb#,bb#,num,args...] + FUNC_CODE_INST_CLEANUPPAD = 51, // CLEANUPPAD: [num,args...] + FUNC_CODE_INST_CATCHSWITCH = 52, // CATCHSWITCH: [num,args...] or [num,args...,bb] + // 53 is unused. + // 54 is unused. + FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...] }; enum UseListCodes { @@ -407,7 +480,12 @@ namespace bitc { ATTR_KIND_DEREFERENCEABLE_OR_NULL = 42, ATTR_KIND_CONVERGENT = 43, ATTR_KIND_SAFESTACK = 44, - ATTR_KIND_ARGMEMONLY = 45 + ATTR_KIND_ARGMEMONLY = 45, + ATTR_KIND_SWIFT_SELF = 46, + ATTR_KIND_SWIFT_ERROR = 47, + ATTR_KIND_NO_RECURSE = 48, + ATTR_KIND_INACCESSIBLEMEM_ONLY = 49, + ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY = 50 }; enum ComdatSelectionKindCodes { diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h index 452ec3bd0187..60d865fd2355 100644 --- a/include/llvm/Bitcode/ReaderWriter.h +++ b/include/llvm/Bitcode/ReaderWriter.h @@ -15,6 +15,7 @@ #define LLVM_BITCODE_READERWRITER_H #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/FunctionInfo.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" @@ -36,27 +37,54 @@ namespace llvm { ErrorOr> getLazyBitcodeModule(std::unique_ptr &&Buffer, LLVMContext &Context, - DiagnosticHandlerFunction DiagnosticHandler = nullptr, bool ShouldLazyLoadMetadata = false); /// Read the header of the specified stream and prepare for lazy /// deserialization and streaming of function bodies. - ErrorOr> getStreamedBitcodeModule( - StringRef Name, std::unique_ptr Streamer, - LLVMContext &Context, - DiagnosticHandlerFunction DiagnosticHandler = nullptr); + ErrorOr> + getStreamedBitcodeModule(StringRef Name, + std::unique_ptr Streamer, + LLVMContext &Context); /// Read the header of the specified bitcode buffer and extract just the /// triple information. If successful, this returns a string. On error, this /// returns "". - std::string - getBitcodeTargetTriple(MemoryBufferRef Buffer, LLVMContext &Context, - DiagnosticHandlerFunction DiagnosticHandler = nullptr); + std::string getBitcodeTargetTriple(MemoryBufferRef Buffer, + LLVMContext &Context); + + /// Read the header of the specified bitcode buffer and extract just the + /// producer string information. If successful, this returns a string. On + /// error, this returns "". + std::string getBitcodeProducerString(MemoryBufferRef Buffer, + LLVMContext &Context); /// Read the specified bitcode file, returning the module. - ErrorOr> - parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, - DiagnosticHandlerFunction DiagnosticHandler = nullptr); + ErrorOr> parseBitcodeFile(MemoryBufferRef Buffer, + LLVMContext &Context); + + /// Check if the given bitcode buffer contains a function summary block. + bool hasFunctionSummary(MemoryBufferRef Buffer, + DiagnosticHandlerFunction DiagnosticHandler); + + /// Parse the specified bitcode buffer, returning the function info index. + /// If IsLazy is true, parse the entire function summary into + /// the index. Otherwise skip the function summary section, and only create + /// an index object with a map from function name to function summary offset. + /// The index is used to perform lazy function summary reading later. + ErrorOr> + getFunctionInfoIndex(MemoryBufferRef Buffer, + DiagnosticHandlerFunction DiagnosticHandler, + bool IsLazy = false); + + /// This method supports lazy reading of function summary data from the + /// combined index during function importing. When reading the combined index + /// file, getFunctionInfoIndex is first invoked with IsLazy=true. + /// Then this method is called for each function considered for importing, + /// to parse the summary information for the given function name into + /// the index. + std::error_code readFunctionSummary( + MemoryBufferRef Buffer, DiagnosticHandlerFunction DiagnosticHandler, + StringRef FunctionName, std::unique_ptr Index); /// \brief Write the specified module to the specified raw output stream. /// @@ -66,8 +94,18 @@ namespace llvm { /// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a /// Value in \c M. These will be reconstructed exactly when \a M is /// deserialized. + /// + /// If \c EmitFunctionSummary, emit the function summary index (currently + /// for use in ThinLTO optimization). void WriteBitcodeToFile(const Module *M, raw_ostream &Out, - bool ShouldPreserveUseListOrder = false); + bool ShouldPreserveUseListOrder = false, + bool EmitFunctionSummary = false); + + /// Write the specified function summary index to the given raw output stream, + /// where it will be written in a new bitcode block. This is used when + /// writing the combined index file for ThinLTO. + void WriteFunctionSummaryToFile(const FunctionInfoIndex &Index, + raw_ostream &Out); /// isBitcodeWrapper - Return true if the given bytes are the magic bytes /// for an LLVM IR bitcode wrapper. @@ -159,7 +197,7 @@ namespace llvm { BitcodeDiagnosticInfo(std::error_code EC, DiagnosticSeverity Severity, const Twine &Msg); void print(DiagnosticPrinter &DP) const override; - std::error_code getError() const { return EC; }; + std::error_code getError() const { return EC; } static bool classof(const DiagnosticInfo *DI) { return DI->getKind() == DK_Bitcode; diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h index 82d1e8ada17d..38e64ad3be29 100644 --- a/include/llvm/CodeGen/Analysis.h +++ b/include/llvm/CodeGen/Analysis.h @@ -15,6 +15,7 @@ #define LLVM_CODEGEN_ANALYSIS_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/IR/CallSite.h" @@ -23,6 +24,8 @@ namespace llvm { class GlobalValue; +class MachineBasicBlock; +class MachineFunction; class TargetLoweringBase; class TargetLowering; class TargetMachine; @@ -37,7 +40,7 @@ struct EVT; /// Given an LLVM IR aggregate type and a sequence of insertvalue or /// extractvalue indices that identify a member, return the linearized index of /// the start of the member, i.e the number of element in memory before the -/// seeked one. This is disconnected from the number of bytes. +/// sought one. This is disconnected from the number of bytes. /// /// \param Ty is the type indexed by \p Indices. /// \param Indices is an optional pointer in the indices list to the current @@ -115,6 +118,9 @@ bool returnTypeIsEligibleForTailCall(const Function *F, // or we are in LTO. bool canBeOmittedFromSymbolTable(const GlobalValue *GV); +DenseMap +getFuncletMembership(const MachineFunction &MF); + } // End llvm namespace #endif diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index fe7efae325c4..f5e778b2f262 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -165,6 +165,9 @@ public: /// Return information about data layout. const DataLayout &getDataLayout() const; + /// Return the pointer size from the TargetMachine + unsigned getPointerSize() const; + /// Return information about subtarget. const MCSubtargetInfo &getSubtargetInfo() const; @@ -233,7 +236,12 @@ public: /// Print assembly representations of the jump tables used by the current /// function to the current output stream. /// - void EmitJumpTableInfo(); + virtual void EmitJumpTableInfo(); + + /// Emit the control variable for an emulated TLS variable. + virtual void EmitEmulatedTLSControlVariable(const GlobalVariable *GV, + MCSymbol *EmittedSym, + bool AllZeroInitValue); /// Emit the specified global variable to the .s file. virtual void EmitGlobalVariable(const GlobalVariable *GV); @@ -254,7 +262,7 @@ public: const MCExpr *lowerConstant(const Constant *CV); /// \brief Print a general LLVM constant to the .s file. - void EmitGlobalConstant(const Constant *CV); + void EmitGlobalConstant(const DataLayout &DL, const Constant *CV); /// \brief Unnamed constant global variables solely contaning a pointer to /// another globals variable act like a global variable "proxy", or GOT @@ -317,7 +325,9 @@ public: /// Targets can override this to change how global constants that are part of /// a C++ static/global constructor list are emitted. - virtual void EmitXXStructor(const Constant *CV) { EmitGlobalConstant(CV); } + virtual void EmitXXStructor(const DataLayout &DL, const Constant *CV) { + EmitGlobalConstant(DL, CV); + } /// Return true if the basic block has exactly one predecessor and the control /// transfer mechanism between the predecessor and this block is a @@ -404,9 +414,6 @@ public: void EmitULEB128(uint64_t Value, const char *Desc = nullptr, unsigned PadTo = 0) const; - /// Emit a .byte 42 directive for a DW_CFA_xxx value. - void EmitCFAByte(unsigned Val) const; - /// Emit a .byte 42 directive that corresponds to an encoding. If verbose /// assembly output is enabled, we output comments describing the encoding. /// Desc is a string saying what the encoding is specifying (e.g. "LSDA"). @@ -446,7 +453,16 @@ public: void emitCFIInstruction(const MCCFIInstruction &Inst) const; /// \brief Emit Dwarf abbreviation table. - void emitDwarfAbbrevs(const std::vector& Abbrevs) const; + template void emitDwarfAbbrevs(const T &Abbrevs) const { + // For each abbreviation. + for (const auto &Abbrev : Abbrevs) + emitDwarfAbbrev(*Abbrev); + + // Mark end of abbreviations. + EmitULEB128(0, "EOM(3)"); + } + + void emitDwarfAbbrev(const DIEAbbrev &Abbrev) const; /// \brief Recursively emit Dwarf DIE tree. void emitDwarfDIE(const DIE &Die) const; @@ -532,7 +548,8 @@ private: void EmitLLVMUsedList(const ConstantArray *InitList); /// Emit llvm.ident metadata in an '.ident' directive. void EmitModuleIdents(Module &M); - void EmitXXStructorList(const Constant *List, bool isCtor); + void EmitXXStructorList(const DataLayout &DL, const Constant *List, + bool isCtor); GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &C); }; } diff --git a/include/llvm/CodeGen/AtomicExpandUtils.h b/include/llvm/CodeGen/AtomicExpandUtils.h new file mode 100644 index 000000000000..ac18eac8a1ce --- /dev/null +++ b/include/llvm/CodeGen/AtomicExpandUtils.h @@ -0,0 +1,57 @@ +//===-- AtomicExpandUtils.h - Utilities for expanding atomic instructions -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/IRBuilder.h" + +namespace llvm { +class Value; +class AtomicRMWInst; + + +/// Parameters (see the expansion example below): +/// (the builder, %addr, %loaded, %new_val, ordering, +/// /* OUT */ %success, /* OUT */ %new_loaded) +typedef function_ref &, Value *, Value *, Value *, + AtomicOrdering, Value *&, Value *&)> CreateCmpXchgInstFun; + +/// \brief Expand an atomic RMW instruction into a loop utilizing +/// cmpxchg. You'll want to make sure your target machine likes cmpxchg +/// instructions in the first place and that there isn't another, better, +/// transformation available (for example AArch32/AArch64 have linked loads). +/// +/// This is useful in passes which can't rewrite the more exotic RMW +/// instructions directly into a platform specific intrinsics (because, say, +/// those intrinsics don't exist). If such a pass is able to expand cmpxchg +/// instructions directly however, then, with this function, it could avoid two +/// extra module passes (avoiding passes by `-atomic-expand` and itself). A +/// specific example would be PNaCl's `RewriteAtomics` pass. +/// +/// Given: atomicrmw some_op iN* %addr, iN %incr ordering +/// +/// The standard expansion we produce is: +/// [...] +/// %init_loaded = load atomic iN* %addr +/// br label %loop +/// loop: +/// %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] +/// %new = some_op iN %loaded, %incr +/// ; This is what -atomic-expand will produce using this function on i686 targets: +/// %pair = cmpxchg iN* %addr, iN %loaded, iN %new_val +/// %new_loaded = extractvalue { iN, i1 } %pair, 0 +/// %success = extractvalue { iN, i1 } %pair, 1 +/// ; End callback produced IR +/// br i1 %success, label %atomicrmw.end, label %loop +/// atomicrmw.end: +/// [...] +/// +/// Returns true if the containing function was modified. +bool +expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun Factory); +} diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index 9ba25169fda6..d99054eb6f36 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -166,7 +166,7 @@ public: } if (IID == Intrinsic::ctlz) { - if (getTLI()->isCheapToSpeculateCtlz()) + if (getTLI()->isCheapToSpeculateCtlz()) return TargetTransformInfo::TCC_Basic; return TargetTransformInfo::TCC_Expensive; } @@ -256,7 +256,7 @@ public: for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) if (isa(J) || isa(J)) { - ImmutableCallSite CS(J); + ImmutableCallSite CS(&*J); if (const Function *F = CS.getCalledFunction()) { if (!static_cast(this)->isLoweredToCall(F)) continue; @@ -302,12 +302,8 @@ public: if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { // The operation is legal. Assume it costs 1. - // If the type is split to multiple registers, assume that there is some - // overhead to this. // TODO: Once we have extract/insert subvector cost we need to use them. - if (LT.first > 1) - return LT.first * 2 * OpCost; - return LT.first * 1 * OpCost; + return LT.first * OpCost; } if (!TLI->isOperationExpand(ISD, LT.second)) { @@ -496,13 +492,11 @@ public: // itself. Unless the corresponding extending load or truncating store is // legal, then this will scalarize. TargetLowering::LegalizeAction LA = TargetLowering::Expand; - EVT MemVT = getTLI()->getValueType(DL, Src, true); - if (MemVT.isSimple() && MemVT != MVT::Other) { - if (Opcode == Instruction::Store) - LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT()); - else - LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); - } + EVT MemVT = getTLI()->getValueType(DL, Src); + if (Opcode == Instruction::Store) + LA = getTLI()->getTruncStoreAction(LT.second, MemVT); + else + LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { // This is a vector load/store for some illegal type that is scalarized. @@ -530,7 +524,8 @@ public: VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts); // Firstly, the cost of load/store operation. - unsigned Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace); + unsigned Cost = static_cast(this)->getMemoryOpCost( + Opcode, VecTy, Alignment, AddressSpace); // Then plus the cost of interleave operation. if (Opcode == Instruction::Load) { @@ -545,18 +540,20 @@ public: assert(Indices.size() <= Factor && "Interleaved memory op has too many members"); + for (unsigned Index : Indices) { assert(Index < Factor && "Invalid index for interleaved memory op"); // Extract elements from loaded vector for each sub vector. for (unsigned i = 0; i < NumSubElts; i++) - Cost += getVectorInstrCost(Instruction::ExtractElement, VT, - Index + i * Factor); + Cost += static_cast(this)->getVectorInstrCost( + Instruction::ExtractElement, VT, Index + i * Factor); } unsigned InsSubCost = 0; for (unsigned i = 0; i < NumSubElts; i++) - InsSubCost += getVectorInstrCost(Instruction::InsertElement, SubVT, i); + InsSubCost += static_cast(this)->getVectorInstrCost( + Instruction::InsertElement, SubVT, i); Cost += Indices.size() * InsSubCost; } else { @@ -571,17 +568,51 @@ public: unsigned ExtSubCost = 0; for (unsigned i = 0; i < NumSubElts; i++) - ExtSubCost += getVectorInstrCost(Instruction::ExtractElement, SubVT, i); - - Cost += Factor * ExtSubCost; + ExtSubCost += static_cast(this)->getVectorInstrCost( + Instruction::ExtractElement, SubVT, i); + Cost += ExtSubCost * Factor; for (unsigned i = 0; i < NumElts; i++) - Cost += getVectorInstrCost(Instruction::InsertElement, VT, i); + Cost += static_cast(this) + ->getVectorInstrCost(Instruction::InsertElement, VT, i); } return Cost; } + /// Get intrinsic cost based on arguments + unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef Args) { + switch (IID) { + default: { + SmallVector Types; + for (Value *Op : Args) + Types.push_back(Op->getType()); + return getIntrinsicInstrCost(IID, RetTy, Types); + } + case Intrinsic::masked_scatter: { + Value *Mask = Args[3]; + bool VarMask = !isa(Mask); + unsigned Alignment = cast(Args[2])->getZExtValue(); + return + static_cast(this)->getGatherScatterOpCost(Instruction::Store, + Args[0]->getType(), + Args[1], VarMask, + Alignment); + } + case Intrinsic::masked_gather: { + Value *Mask = Args[2]; + bool VarMask = !isa(Mask); + unsigned Alignment = cast(Args[1])->getZExtValue(); + return + static_cast(this)->getGatherScatterOpCost(Instruction::Load, + RetTy, Args[0], VarMask, + Alignment); + } + } + } + + /// Get intrinsic cost based on argument types unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Tys) { unsigned ISD = 0; @@ -800,7 +831,7 @@ class BasicTTIImpl : public BasicTTIImplBase { const TargetLoweringBase *getTLI() const { return TLI; } public: - explicit BasicTTIImpl(const TargetMachine *ST, Function &F); + explicit BasicTTIImpl(const TargetMachine *ST, const Function &F); // Provide value semantics. MSVC requires that we spell all of these out. BasicTTIImpl(const BasicTTIImpl &Arg) diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h index 91fb0a9d7e77..17c9415a81cb 100644 --- a/include/llvm/CodeGen/CalcSpillWeights.h +++ b/include/llvm/CodeGen/CalcSpillWeights.h @@ -20,6 +20,7 @@ namespace llvm { class LiveIntervals; class MachineBlockFrequencyInfo; class MachineLoopInfo; + class VirtRegMap; /// \brief Normalize the spill weight of a live interval /// @@ -51,6 +52,7 @@ namespace llvm { private: MachineFunction &MF; LiveIntervals &LIS; + VirtRegMap *VRM; const MachineLoopInfo &Loops; const MachineBlockFrequencyInfo &MBFI; DenseMap Hint; @@ -58,10 +60,10 @@ namespace llvm { public: VirtRegAuxInfo(MachineFunction &mf, LiveIntervals &lis, - const MachineLoopInfo &loops, + VirtRegMap *vrm, const MachineLoopInfo &loops, const MachineBlockFrequencyInfo &mbfi, NormalizingFn norm = normalizeSpillWeight) - : MF(mf), LIS(lis), Loops(loops), MBFI(mbfi), normalize(norm) {} + : MF(mf), LIS(lis), VRM(vrm), Loops(loops), MBFI(mbfi), normalize(norm) {} /// \brief (re)compute li's spill weight and allocation hint. void calculateSpillWeightAndHint(LiveInterval &li); @@ -70,6 +72,7 @@ namespace llvm { /// \brief Compute spill weights and allocation hints for all virtual register /// live intervals. void calculateSpillWeightsAndHints(LiveIntervals &LIS, MachineFunction &MF, + VirtRegMap *VRM, const MachineLoopInfo &MLI, const MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo::NormalizingFn norm = diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h index 1fd4eeb46b38..415abb90da57 100644 --- a/include/llvm/CodeGen/CallingConvLower.h +++ b/include/llvm/CodeGen/CallingConvLower.h @@ -201,6 +201,7 @@ private: LLVMContext &Context; unsigned StackOffset; + unsigned MaxStackArgAlign; SmallVector UsedRegs; SmallVector PendingLocs; @@ -270,7 +271,18 @@ public: CallingConv::ID getCallingConv() const { return CallingConv; } bool isVarArg() const { return IsVarArg; } - unsigned getNextStackOffset() const { return StackOffset; } + /// getNextStackOffset - Return the next stack offset such that all stack + /// slots satisfy their alignment requirements. + unsigned getNextStackOffset() const { + return StackOffset; + } + + /// getAlignedCallFrameSize - Return the size of the call frame needed to + /// be able to store all arguments and such that the alignment requirement + /// of each of the arguments is satisfied. + unsigned getAlignedCallFrameSize() const { + return RoundUpToAlignment(StackOffset, MaxStackArgAlign); + } /// isAllocated - Return true if the specified register (or an alias) is /// allocated. @@ -357,7 +369,7 @@ public: /// AllocateRegBlock - Attempt to allocate a block of RegsRequired consecutive /// registers. If this is not possible, return zero. Otherwise, return the first /// register of the block that were allocated, marking the entire block as allocated. - unsigned AllocateRegBlock(ArrayRef Regs, unsigned RegsRequired) { + unsigned AllocateRegBlock(ArrayRef Regs, unsigned RegsRequired) { if (RegsRequired > Regs.size()) return 0; @@ -400,9 +412,10 @@ public: /// and alignment. unsigned AllocateStack(unsigned Size, unsigned Align) { assert(Align && ((Align - 1) & Align) == 0); // Align is power of 2. - StackOffset = ((StackOffset + Align - 1) & ~(Align - 1)); + StackOffset = RoundUpToAlignment(StackOffset, Align); unsigned Result = StackOffset; StackOffset += Size; + MaxStackArgAlign = std::max(Align, MaxStackArgAlign); MF.getFrameInfo()->ensureMaxAlignment(Align); return Result; } diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h index bedb7d5549eb..0d37dc00422f 100644 --- a/include/llvm/CodeGen/CommandFlags.h +++ b/include/llvm/CodeGen/CommandFlags.h @@ -182,6 +182,11 @@ OverrideStackAlignment("stack-alignment", cl::desc("Override default stack alignment"), cl::init(0)); +cl::opt +StackRealign("stackrealign", + cl::desc("Force align the stack to the minimum alignment"), + cl::init(false)); + cl::opt TrapFuncName("trap-func", cl::Hidden, cl::desc("Emit a call to trap function rather than a trap instruction"), @@ -219,6 +224,10 @@ FunctionSections("function-sections", cl::desc("Emit functions into separate sections"), cl::init(false)); +cl::opt EmulatedTLS("emulated-tls", + cl::desc("Use emulated TLS model"), + cl::init(false)); + cl::opt UniqueSectionNames("unique-section-names", cl::desc("Give unique names to every section"), cl::init(true)); @@ -238,6 +247,26 @@ JTableType("jump-table-type", "Create one table per unique function type."), clEnumValEnd)); +cl::opt EABIVersion( + "meabi", cl::desc("Set EABI type (default depends on triple):"), + cl::init(EABI::Default), + cl::values(clEnumValN(EABI::Default, "default", + "Triple default EABI version"), + clEnumValN(EABI::EABI4, "4", "EABI version 4"), + clEnumValN(EABI::EABI5, "5", "EABI version 5"), + clEnumValN(EABI::GNU, "gnu", "EABI GNU"), clEnumValEnd)); + +cl::opt +DebuggerTuningOpt("debugger-tune", + cl::desc("Tune debug info for a particular debugger"), + cl::init(DebuggerKind::Default), + cl::values( + clEnumValN(DebuggerKind::GDB, "gdb", "gdb"), + clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"), + clEnumValN(DebuggerKind::SCE, "sce", + "SCE targets (e.g. PS4)"), + clEnumValEnd)); + // Common utility function tightly tied to the options listed here. Initializes // a TargetOptions object with CodeGen flags and returns it. static inline TargetOptions InitTargetOptionsFromCodeGenFlags() { @@ -260,11 +289,14 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() { Options.DataSections = DataSections; Options.FunctionSections = FunctionSections; Options.UniqueSectionNames = UniqueSectionNames; + Options.EmulatedTLS = EmulatedTLS; Options.MCOptions = InitMCTargetOptionsFromFlags(); Options.JTType = JTableType; Options.ThreadModel = TMModel; + Options.EABIVersion = EABIVersion; + Options.DebuggerTuning = DebuggerTuningOpt; return Options; } @@ -325,6 +357,10 @@ static inline void setFunctionAttributes(StringRef CPU, StringRef Features, "disable-tail-calls", toStringRef(DisableTailCalls)); + if (StackRealign) + NewAttrs = NewAttrs.addAttribute(Ctx, AttributeSet::FunctionIndex, + "stackrealign"); + if (TrapFuncName.getNumOccurrences() > 0) for (auto &B : F) for (auto &I : B) diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h index c44a7e0b6736..40ec201107e8 100644 --- a/include/llvm/CodeGen/DFAPacketizer.h +++ b/include/llvm/CodeGen/DFAPacketizer.h @@ -40,22 +40,51 @@ class InstrItineraryData; class DefaultVLIWScheduler; class SUnit; +// -------------------------------------------------------------------- +// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp + +// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput. +// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer. +// +// e.g. terms x resource bit combinations that fit in uint32_t: +// 4 terms x 8 bits = 32 bits +// 3 terms x 10 bits = 30 bits +// 2 terms x 16 bits = 32 bits +// +// e.g. terms x resource bit combinations that fit in uint64_t: +// 8 terms x 8 bits = 64 bits +// 7 terms x 9 bits = 63 bits +// 6 terms x 10 bits = 60 bits +// 5 terms x 12 bits = 60 bits +// 4 terms x 16 bits = 64 bits <--- current +// 3 terms x 21 bits = 63 bits +// 2 terms x 32 bits = 64 bits +// +#define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms. +#define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term. + +typedef uint64_t DFAInput; +typedef int64_t DFAStateInput; +#define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable. +// -------------------------------------------------------------------- + class DFAPacketizer { private: - typedef std::pair UnsignPair; + typedef std::pair UnsignPair; + const InstrItineraryData *InstrItins; int CurrentState; - const int (*DFAStateInputTable)[2]; + const DFAStateInput (*DFAStateInputTable)[2]; const unsigned *DFAStateEntryTable; // CachedTable is a map from to ToState. DenseMap CachedTable; // ReadTable - Read the DFA transition table and update CachedTable. - void ReadTable(unsigned int state); + void ReadTable(unsigned state); public: - DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2], + DFAPacketizer(const InstrItineraryData *I, const DFAStateInput (*SIT)[2], const unsigned *SET); // Reset the current state to make all resources available. @@ -63,6 +92,12 @@ public: CurrentState = 0; } + // getInsnInput - Return the DFAInput for an instruction class. + DFAInput getInsnInput(unsigned InsnClass); + + // getInsnInput - Return the DFAInput for an instruction class input vector. + static DFAInput getInsnInput(const std::vector &InsnClass); + // canReserveResources - Check if the resources occupied by a MCInstrDesc // are available in the current state. bool canReserveResources(const llvm::MCInstrDesc *MID); @@ -93,6 +128,7 @@ class VLIWPacketizerList { protected: MachineFunction &MF; const TargetInstrInfo *TII; + AliasAnalysis *AA; // The VLIW Scheduler. DefaultVLIWScheduler *VLIWScheduler; @@ -106,7 +142,9 @@ protected: std::map MIToSUnit; public: - VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, bool IsPostRA); + // The AliasAnalysis parameter can be nullptr. + VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, + AliasAnalysis *AA); virtual ~VLIWPacketizerList(); @@ -126,8 +164,10 @@ public: return MII; } - // endPacket - End the current packet. - void endPacket(MachineBasicBlock *MBB, MachineInstr *MI); + // End the current packet and reset the state of the packetizer. + // Overriding this function allows the target-specific packetizer + // to perform custom finalization. + virtual void endPacket(MachineBasicBlock *MBB, MachineInstr *MI); // initPacketizerState - perform initialization before packetizing // an instruction. This function is supposed to be overrided by @@ -135,14 +175,24 @@ public: virtual void initPacketizerState() { return; } // ignorePseudoInstruction - Ignore bundling of pseudo instructions. - virtual bool ignorePseudoInstruction(MachineInstr *I, - MachineBasicBlock *MBB) { + virtual bool ignorePseudoInstruction(const MachineInstr *I, + const MachineBasicBlock *MBB) { return false; } // isSoloInstruction - return true if instruction MI can not be packetized // with any other instruction, which means that MI itself is a packet. - virtual bool isSoloInstruction(MachineInstr *MI) { + virtual bool isSoloInstruction(const MachineInstr *MI) { + return true; + } + + // Check if the packetizer should try to add the given instruction to + // the current packet. One reasons for which it may not be desirable + // to include an instruction in the current packet could be that it + // would cause a stall. + // If this function returns "false", the current packet will be ended, + // and the instruction will be added to the next packet. + virtual bool shouldAddToPacket(const MachineInstr *MI) { return true; } diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h index f07712a676da..fa612d981dec 100644 --- a/include/llvm/CodeGen/DIE.h +++ b/include/llvm/CodeGen/DIE.h @@ -100,10 +100,8 @@ public: /// void Emit(const AsmPrinter *AP) const; -#ifndef NDEBUG void print(raw_ostream &O); void dump(); -#endif }; //===--------------------------------------------------------------------===// @@ -143,9 +141,7 @@ public: void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const; unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const; -#ifndef NDEBUG void print(raw_ostream &O) const; -#endif }; //===--------------------------------------------------------------------===// @@ -164,9 +160,7 @@ public: void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const; unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const; -#ifndef NDEBUG void print(raw_ostream &O) const; -#endif }; //===--------------------------------------------------------------------===// @@ -185,9 +179,7 @@ public: void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const; unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const; -#ifndef NDEBUG void print(raw_ostream &O) const; -#endif }; //===--------------------------------------------------------------------===// @@ -203,9 +195,7 @@ public: void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const; unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const; -#ifndef NDEBUG void print(raw_ostream &O) const; -#endif }; //===--------------------------------------------------------------------===// @@ -223,9 +213,7 @@ public: void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const; unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const; -#ifndef NDEBUG void print(raw_ostream &O) const; -#endif }; //===--------------------------------------------------------------------===// @@ -252,9 +240,7 @@ public: : sizeof(int32_t); } -#ifndef NDEBUG void print(raw_ostream &O) const; -#endif }; //===--------------------------------------------------------------------===// @@ -273,9 +259,7 @@ public: return 8; } -#ifndef NDEBUG void print(raw_ostream &O) const; -#endif }; //===--------------------------------------------------------------------===// @@ -295,9 +279,7 @@ public: void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const; unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const; -#ifndef NDEBUG void print(raw_ostream &O) const; -#endif }; //===--------------------------------------------------------------------===// @@ -444,10 +426,8 @@ public: /// unsigned SizeOf(const AsmPrinter *AP) const; -#ifndef NDEBUG void print(raw_ostream &O) const; void dump() const; -#endif }; struct IntrusiveBackListNode { @@ -566,64 +546,70 @@ class DIEValueList { ListTy List; public: - bool empty() const { return List.empty(); } - - class const_iterator; - class iterator - : public iterator_adaptor_base { - friend class const_iterator; - typedef iterator_adaptor_base iterator_adaptor; public: - iterator() = default; - explicit iterator(ListTy::iterator X) : iterator_adaptor(X) {} + value_iterator() = default; + explicit value_iterator(ListTy::iterator X) : iterator_adaptor(X) {} explicit operator bool() const { return bool(wrapped()); } DIEValue &operator*() const { return wrapped()->V; } }; - class const_iterator - : public iterator_adaptor_base { - typedef iterator_adaptor_base { + typedef iterator_adaptor_base iterator_adaptor; public: - const_iterator() = default; - const_iterator(DIEValueList::iterator X) : iterator_adaptor(X.wrapped()) {} - explicit const_iterator(ListTy::const_iterator X) : iterator_adaptor(X) {} + const_value_iterator() = default; + const_value_iterator(DIEValueList::value_iterator X) + : iterator_adaptor(X.wrapped()) {} + explicit const_value_iterator(ListTy::const_iterator X) + : iterator_adaptor(X) {} explicit operator bool() const { return bool(wrapped()); } const DIEValue &operator*() const { return wrapped()->V; } }; - iterator insert(BumpPtrAllocator &Alloc, DIEValue V) { + typedef iterator_range value_range; + typedef iterator_range const_value_range; + + value_iterator addValue(BumpPtrAllocator &Alloc, DIEValue V) { List.push_back(*new (Alloc) Node(V)); - return iterator(ListTy::toIterator(List.back())); + return value_iterator(ListTy::toIterator(List.back())); } - template - iterator emplace(BumpPtrAllocator &Alloc, Ts &&... Args) { - return insert(Alloc, DIEValue(std::forward(Args)...)); + template + value_iterator addValue(BumpPtrAllocator &Alloc, dwarf::Attribute Attribute, + dwarf::Form Form, T &&Value) { + return addValue(Alloc, DIEValue(Attribute, Form, std::forward(Value))); } - iterator begin() { return iterator(List.begin()); } - iterator end() { return iterator(List.end()); } - const_iterator begin() const { return const_iterator(List.begin()); } - const_iterator end() const { return const_iterator(List.end()); } + value_range values() { + return llvm::make_range(value_iterator(List.begin()), + value_iterator(List.end())); + } + const_value_range values() const { + return llvm::make_range(const_value_iterator(List.begin()), + const_value_iterator(List.end())); + } }; //===--------------------------------------------------------------------===// /// DIE - A structured debug information entry. Has an abbreviation which /// describes its organization. -class DIE : IntrusiveBackListNode { +class DIE : IntrusiveBackListNode, public DIEValueList { friend class IntrusiveBackList; -protected: /// Offset - Offset in debug info section. /// unsigned Offset; @@ -643,14 +629,7 @@ protected: DIE *Parent = nullptr; - /// Attribute values. - /// - DIEValueList Values; - -protected: - DIE() : Offset(0), Size(0) {} - -private: + DIE() = delete; explicit DIE(dwarf::Tag Tag) : Offset(0), Size(0), Tag(Tag) {} public: @@ -677,20 +656,6 @@ public: return llvm::make_range(Children.begin(), Children.end()); } - typedef DIEValueList::iterator value_iterator; - typedef iterator_range value_range; - - value_range values() { - return llvm::make_range(Values.begin(), Values.end()); - } - - typedef DIEValueList::const_iterator const_value_iterator; - typedef iterator_range const_value_range; - - const_value_range values() const { - return llvm::make_range(Values.begin(), Values.end()); - } - DIE *getParent() const { return Parent; } /// Generate the abbreviation for this DIE. @@ -711,17 +676,6 @@ public: void setOffset(unsigned O) { Offset = O; } void setSize(unsigned S) { Size = S; } - /// addValue - Add a value and attributes to a DIE. - /// - value_iterator addValue(BumpPtrAllocator &Alloc, DIEValue Value) { - return Values.insert(Alloc, Value); - } - template - value_iterator addValue(BumpPtrAllocator &Alloc, dwarf::Attribute Attribute, - dwarf::Form Form, T &&Value) { - return Values.emplace(Alloc, Attribute, Form, std::forward(Value)); - } - /// Add a child to the DIE. DIE &addChild(DIE *Child) { assert(!Child->getParent() && "Child should be orphaned"); @@ -736,16 +690,14 @@ public: /// gives \a DIEValue::isNone) if no such attribute exists. DIEValue findAttribute(dwarf::Attribute Attribute) const; -#ifndef NDEBUG void print(raw_ostream &O, unsigned IndentCount = 0) const; void dump(); -#endif }; //===--------------------------------------------------------------------===// /// DIELoc - Represents an expression location. // -class DIELoc : public DIE { +class DIELoc : public DIEValueList { mutable unsigned Size; // Size in bytes excluding size header. public: @@ -773,15 +725,13 @@ public: void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const; unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const; -#ifndef NDEBUG void print(raw_ostream &O) const; -#endif }; //===--------------------------------------------------------------------===// /// DIEBlock - Represents a block of values. // -class DIEBlock : public DIE { +class DIEBlock : public DIEValueList { mutable unsigned Size; // Size in bytes excluding size header. public: @@ -806,9 +756,7 @@ public: void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const; unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const; -#ifndef NDEBUG void print(raw_ostream &O) const; -#endif }; } // end llvm namespace diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h index f04a7cd69664..cc4e37059bb8 100644 --- a/include/llvm/CodeGen/FastISel.h +++ b/include/llvm/CodeGen/FastISel.h @@ -419,11 +419,11 @@ protected: const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm1, uint64_t Imm2); - /// \brief Emit a MachineInstr with two register operands and a result + /// \brief Emit a MachineInstr with a floating point immediate, and a result /// register in the given register class. - unsigned fastEmitInst_rf(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, unsigned Op0, - bool Op0IsKill, const ConstantFP *FPImm); + unsigned fastEmitInst_f(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + const ConstantFP *FPImm); /// \brief Emit a MachineInstr with two register operands, an immediate, and a /// result register in the given register class. @@ -432,23 +432,11 @@ protected: bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm); - /// \brief Emit a MachineInstr with two register operands, two immediates - /// operands, and a result register in the given register class. - unsigned fastEmitInst_rrii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, unsigned Op0, - bool Op0IsKill, unsigned Op1, bool Op1IsKill, - uint64_t Imm1, uint64_t Imm2); - /// \brief Emit a MachineInstr with a single immediate operand, and a result /// register in the given register class. unsigned fastEmitInst_i(unsigned MachineInstrOpcode, const TargetRegisterClass *RC, uint64_t Imm); - /// \brief Emit a MachineInstr with a two immediate operands. - unsigned fastEmitInst_ii(unsigned MachineInstrOpcode, - const TargetRegisterClass *RC, uint64_t Imm1, - uint64_t Imm2); - /// \brief Emit a MachineInstr for an extract_subreg from a specified index of /// a superregister to a specified type. unsigned fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, @@ -462,6 +450,11 @@ protected: /// immediate (fall-through) successor, and update the CFG. void fastEmitBranch(MachineBasicBlock *MBB, DebugLoc DL); + /// Emit an unconditional branch to \p FalseMBB, obtains the branch weight + /// and adds TrueMBB and FalseMBB to the successor list. + void finishCondBranch(const BasicBlock *BranchBB, MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB); + /// \brief Update the value map to include the new mapping for this /// instruction, or insert an extra copy to get the result in a previous /// determined register. @@ -566,6 +559,9 @@ private: /// across heavy instructions like calls. void flushLocalValueMap(); + /// \brief Removes dead local value instructions after SavedLastLocalvalue. + void removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue); + /// \brief Insertion point before trying to select the current instruction. MachineBasicBlock::iterator SavedInsertPt; diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h index 82c762ed850f..09a9991912da 100644 --- a/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -62,6 +62,9 @@ public: /// registers. bool CanLowerReturn; + /// True if part of the CSRs will be handled via explicit copies. + bool SplitCSR; + /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg /// allocated to hold a pointer to the hidden sret parameter. unsigned DemoteRegister; @@ -72,7 +75,10 @@ public: /// ValueMap - Since we emit code for the function a basic block at a time, /// we must remember which virtual registers hold the values for /// cross-basic-block values. - DenseMap ValueMap; + DenseMap ValueMap; + + /// Track virtual registers created for exception pointers. + DenseMap CatchPadExceptionPointers; // Keep track of frame indices allocated for statepoints as they could be used // across basic block boundaries. @@ -99,7 +105,7 @@ public: /// RegFixups - Registers which need to be replaced after isel is done. DenseMap RegFixups; - /// StatepointStackSlots - A list of temporary stack slots (frame indices) + /// StatepointStackSlots - A list of temporary stack slots (frame indices) /// used to spill values at a statepoint. We store them here to enable /// reuse of the same stack slots across different statepoints in different /// basic blocks. @@ -111,11 +117,6 @@ public: /// MBB - The current insert position inside the current block. MachineBasicBlock::iterator InsertPt; -#ifndef NDEBUG - SmallPtrSet CatchInfoLost; - SmallPtrSet CatchInfoFound; -#endif - struct LiveOutInfo { unsigned NumSignBits : 31; bool IsValid : 1; @@ -161,10 +162,13 @@ public: } unsigned CreateReg(MVT VT); - + unsigned CreateRegs(Type *Ty); - + unsigned InitializeRegForValue(const Value *V) { + // Tokens never live in vregs. + if (V->getType()->isTokenTy()) + return 0; unsigned &R = ValueMap[V]; assert(R == 0 && "Already initialized this value register!"); return R = CreateRegs(V->getType()); @@ -231,6 +235,9 @@ public: /// getArgumentFrameIndex - Get frame index for the byval argument. int getArgumentFrameIndex(const Argument *A); + unsigned getCatchPadExceptionPointerVReg(const Value *CPI, + const TargetRegisterClass *RC); + private: void addSEHHandlersForLPads(ArrayRef LPads); diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h index e883bd196ea3..163117b0781c 100644 --- a/include/llvm/CodeGen/GCMetadata.h +++ b/include/llvm/CodeGen/GCMetadata.h @@ -160,9 +160,9 @@ class GCModuleInfo : public ImmutablePass { public: /// Lookup the GCStrategy object associated with the given gc name. /// Objects are owned internally; No caller should attempt to delete the - /// returned objects. + /// returned objects. GCStrategy *getGCStrategy(const StringRef Name); - + /// List of per function info objects. In theory, Each of these /// may be associated with a different GC. typedef std::vector> FuncInfoVec; diff --git a/include/llvm/CodeGen/GCStrategy.h b/include/llvm/CodeGen/GCStrategy.h index a1b8e895898f..3088a86a3260 100644 --- a/include/llvm/CodeGen/GCStrategy.h +++ b/include/llvm/CodeGen/GCStrategy.h @@ -117,11 +117,11 @@ public: /** @name Statepoint Specific Properties */ ///@{ - /// If the value specified can be reliably distinguished, returns true for + /// If the type specified can be reliably distinguished, returns true for /// pointers to GC managed locations and false for pointers to non-GC /// managed locations. Note a GCStrategy can always return 'None' (i.e. an /// empty optional indicating it can't reliably distinguish. - virtual Optional isGCManagedPointer(const Value *V) const { + virtual Optional isGCManagedPointer(const Type *Ty) const { return None; } ///@} diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index fa44301a2d4a..158ff3cd36a8 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -108,6 +108,10 @@ namespace ISD { /// and returns an outchain. EH_SJLJ_LONGJMP, + /// OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) + /// The target initializes the dispatch table here. + EH_SJLJ_SETUP_DISPATCH, + /// TargetConstant* - Like Constant*, but the DAG does not do any folding, /// simplification, or lowering of the constant. They are used for constants /// which are known to fit in the immediate fields of their users, or for @@ -332,7 +336,7 @@ namespace ISD { SHL, SRA, SRL, ROTL, ROTR, /// Byte Swap and Counting operators. - BSWAP, CTTZ, CTLZ, CTPOP, + BSWAP, CTTZ, CTLZ, CTPOP, BITREVERSE, /// Bit counting operators with an undefined result for zero inputs. CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF, @@ -364,9 +368,14 @@ namespace ISD { /// then the result type must also be a vector type. SETCC, + /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but + /// op #2 is a *carry value*. This operator checks the result of + /// "LHS - RHS - Carry", and can be used to compare two wide integers: + /// (setcce lhshi rhshi (subc lhslo rhslo) cc). Only valid for integers. + SETCCE, + /// SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded - /// integer shift operations, just like ADD/SUB_PARTS. The operation - /// ordering is: + /// integer shift operations. The operation ordering is: /// [Lo,Hi] = op [LoLHS,HiLHS], Amt SHL_PARTS, SRA_PARTS, SRL_PARTS, @@ -506,7 +515,15 @@ namespace ISD { FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR, + /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two + /// values. + /// In the case where a single input is NaN, the non-NaN input is returned. + /// + /// The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0. FMINNUM, FMAXNUM, + /// FMINNAN/FMAXNAN - Behave identically to FMINNUM/FMAXNUM, except that + /// when a single input is NaN, NaN is returned. + FMINNAN, FMAXNAN, /// FSINCOS - Compute both fsin and fcos as a single operation. FSINCOS, @@ -575,6 +592,18 @@ namespace ISD { /// take a chain as input and return a chain. EH_LABEL, + /// CATCHPAD - Represents a catchpad instruction. + CATCHPAD, + + /// CATCHRET - Represents a return from a catch block funclet. Used for + /// MSVC compatible exception handling. Takes a chain operand and a + /// destination basic block operand. + CATCHRET, + + /// CLEANUPRET - Represents a return from a cleanup block funclet. Used for + /// MSVC compatible exception handling. Takes only a chain operand. + CLEANUPRET, + /// STACKSAVE - STACKSAVE has one operand, an input chain. It produces a /// value, the same type as the pointer type for the system, and an output /// chain. @@ -618,9 +647,11 @@ namespace ISD { PCMARKER, /// READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic. - /// The only operand is a chain and a value and a chain are produced. The - /// value is the contents of the architecture specific cycle counter like - /// register (or other high accuracy low latency clock source) + /// It produces a chain and one i64 value. The only operand is a chain. + /// If i64 is not legal, the result will be expanded into smaller values. + /// Still, it returns an i64, so targets should set legality for i64. + /// The result is the content of the architecture-specific cycle + /// counter-like register (or other high accuracy low latency clock source). READCYCLECOUNTER, /// HANDLENODE node - Used as a handle for various purposes. @@ -719,6 +750,12 @@ namespace ISD { GC_TRANSITION_START, GC_TRANSITION_END, + /// GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of + /// the most recent dynamic alloca. For most targets that would be 0, but + /// for some others (e.g. PowerPC, PowerPC64) that would be compile-time + /// known nonzero constant. The only operand here is the chain. + GET_DYNAMIC_AREA_OFFSET, + /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific pre-isel opcode values start here. BUILTIN_OP_END diff --git a/include/llvm/CodeGen/IntrinsicLowering.h b/include/llvm/CodeGen/IntrinsicLowering.h index 9e6ab7d45977..a404b9b70d3a 100644 --- a/include/llvm/CodeGen/IntrinsicLowering.h +++ b/include/llvm/CodeGen/IntrinsicLowering.h @@ -19,41 +19,40 @@ #include "llvm/IR/Intrinsics.h" namespace llvm { - class CallInst; - class Module; - class DataLayout; +class CallInst; +class Module; +class DataLayout; - class IntrinsicLowering { - const DataLayout& DL; +class IntrinsicLowering { + const DataLayout &DL; - - bool Warned; - public: - explicit IntrinsicLowering(const DataLayout &DL) : - DL(DL), Warned(false) {} + bool Warned; - /// AddPrototypes - This method, if called, causes all of the prototypes - /// that might be needed by an intrinsic lowering implementation to be - /// inserted into the module specified. - void AddPrototypes(Module &M); +public: + explicit IntrinsicLowering(const DataLayout &DL) : DL(DL), Warned(false) {} - /// LowerIntrinsicCall - This method replaces a call with the LLVM function - /// which should be used to implement the specified intrinsic function call. - /// If an intrinsic function must be implemented by the code generator - /// (such as va_start), this function should print a message and abort. - /// - /// Otherwise, if an intrinsic function call can be lowered, the code to - /// implement it (often a call to a non-intrinsic function) is inserted - /// _after_ the call instruction and the call is deleted. The caller must - /// be capable of handling this kind of change. - /// - void LowerIntrinsicCall(CallInst *CI); + /// AddPrototypes - This method, if called, causes all of the prototypes + /// that might be needed by an intrinsic lowering implementation to be + /// inserted into the module specified. + void AddPrototypes(Module &M); - /// LowerToByteSwap - Replace a call instruction into a call to bswap - /// intrinsic. Return false if it has determined the call is not a - /// simple integer bswap. - static bool LowerToByteSwap(CallInst *CI); - }; + /// LowerIntrinsicCall - This method replaces a call with the LLVM function + /// which should be used to implement the specified intrinsic function call. + /// If an intrinsic function must be implemented by the code generator + /// (such as va_start), this function should print a message and abort. + /// + /// Otherwise, if an intrinsic function call can be lowered, the code to + /// implement it (often a call to a non-intrinsic function) is inserted + /// _after_ the call instruction and the call is deleted. The caller must + /// be capable of handling this kind of change. + /// + void LowerIntrinsicCall(CallInst *CI); + + /// LowerToByteSwap - Replace a call instruction into a call to bswap + /// intrinsic. Return false if it has determined the call is not a + /// simple integer bswap. + static bool LowerToByteSwap(CallInst *CI); +}; } #endif diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h index 9b8b91c9b80e..0157bf9117e5 100644 --- a/include/llvm/CodeGen/LiveInterval.h +++ b/include/llvm/CodeGen/LiveInterval.h @@ -25,6 +25,7 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" +#include "llvm/Target/TargetRegisterInfo.h" #include #include #include @@ -595,15 +596,15 @@ namespace llvm { class SubRange : public LiveRange { public: SubRange *Next; - unsigned LaneMask; + LaneBitmask LaneMask; /// Constructs a new SubRange object. - SubRange(unsigned LaneMask) + SubRange(LaneBitmask LaneMask) : Next(nullptr), LaneMask(LaneMask) { } /// Constructs a new SubRange object by copying liveness from @p Other. - SubRange(unsigned LaneMask, const LiveRange &Other, + SubRange(LaneBitmask LaneMask, const LiveRange &Other, BumpPtrAllocator &Allocator) : LiveRange(Other, Allocator), Next(nullptr), LaneMask(LaneMask) { } @@ -677,7 +678,8 @@ namespace llvm { /// Creates a new empty subregister live range. The range is added at the /// beginning of the subrange list; subrange iterators stay valid. - SubRange *createSubRange(BumpPtrAllocator &Allocator, unsigned LaneMask) { + SubRange *createSubRange(BumpPtrAllocator &Allocator, + LaneBitmask LaneMask) { SubRange *Range = new (Allocator) SubRange(LaneMask); appendSubRange(Range); return Range; @@ -685,7 +687,8 @@ namespace llvm { /// Like createSubRange() but the new range is filled with a copy of the /// liveness information in @p CopyFrom. - SubRange *createSubRangeFrom(BumpPtrAllocator &Allocator, unsigned LaneMask, + SubRange *createSubRangeFrom(BumpPtrAllocator &Allocator, + LaneBitmask LaneMask, const LiveRange &CopyFrom) { SubRange *Range = new (Allocator) SubRange(LaneMask, CopyFrom, Allocator); appendSubRange(Range); @@ -842,11 +845,6 @@ namespace llvm { LiveIntervals &LIS; IntEqClasses EqClass; - // Note that values a and b are connected. - void Connect(unsigned a, unsigned b); - - unsigned Renumber(); - public: explicit ConnectedVNInfoEqClasses(LiveIntervals &lis) : LIS(lis) {} @@ -858,12 +856,12 @@ namespace llvm { /// the equivalence class assigned the VNI. unsigned getEqClass(const VNInfo *VNI) const { return EqClass[VNI->id]; } - /// Distribute - Distribute values in LIV[0] into a separate LiveInterval - /// for each connected component. LIV must have a LiveInterval for each - /// connected component. The LiveIntervals in Liv[1..] must be empty. - /// Instructions using LIV[0] are rewritten. - void Distribute(LiveInterval *LIV[], MachineRegisterInfo &MRI); - + /// Distribute values in \p LI into a separate LiveIntervals + /// for each connected component. LIV must have an empty LiveInterval for + /// each additional connected component. The first connected component is + /// left in \p LI. + void Distribute(LiveInterval &LI, LiveInterval *LIV[], + MachineRegisterInfo &MRI); }; } diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h index 9673f80e0856..87421e2f83b4 100644 --- a/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -22,6 +22,7 @@ #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -36,7 +37,6 @@ namespace llvm { extern cl::opt UseSegmentSetForPhysRegs; - class AliasAnalysis; class BitVector; class BlockFrequency; class LiveRangeCalc; @@ -147,13 +147,12 @@ extern cl::opt UseSegmentSetForPhysRegs; LiveInterval::Segment addSegmentToEndOfBlock(unsigned reg, MachineInstr* startInst); - /// shrinkToUses - After removing some uses of a register, shrink its live - /// range to just the remaining uses. This method does not compute reaching - /// defs for new uses, and it doesn't remove dead defs. - /// Dead PHIDef values are marked as unused. - /// New dead machine instructions are added to the dead vector. - /// Return true if the interval may have been separated into multiple - /// connected components. + /// After removing some uses of a register, shrink its live range to just + /// the remaining uses. This method does not compute reaching defs for new + /// uses, and it doesn't remove dead defs. + /// Dead PHIDef values are marked as unused. New dead machine instructions + /// are added to the dead vector. Returns true if the interval may have been + /// separated into multiple connected components. bool shrinkToUses(LiveInterval *li, SmallVectorImpl *dead = nullptr); @@ -161,6 +160,8 @@ extern cl::opt UseSegmentSetForPhysRegs; /// shrinkToUses(LiveInterval *li, SmallVectorImpl *dead) /// that works on a subregister live range and only looks at uses matching /// the lane mask of the subregister range. + /// This may leave the subrange empty which needs to be cleaned up with + /// LiveInterval::removeEmptySubranges() afterwards. void shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg); /// extendToIndices - Extend the live range of LI to reach all points in @@ -257,11 +258,6 @@ extern cl::opt UseSegmentSetForPhysRegs; Indexes->replaceMachineInstrInMaps(MI, NewMI); } - bool findLiveInMBBs(SlotIndex Start, SlotIndex End, - SmallVectorImpl &MBBs) const { - return Indexes->findLiveInMBBs(Start, End, MBBs); - } - VNInfo::Allocator& getVNInfoAllocator() { return VNInfoAllocator; } void getAnalysisUsage(AnalysisUsage &AU) const override; @@ -406,6 +402,10 @@ extern cl::opt UseSegmentSetForPhysRegs; /// that start at position @p Pos. void removeVRegDefAt(LiveInterval &LI, SlotIndex Pos); + /// Split separate components in LiveInterval \p LI into separate intervals. + void splitSeparateComponents(LiveInterval &LI, + SmallVectorImpl &SplitLIs); + private: /// Compute live intervals for all virtual registers. void computeVirtRegs(); @@ -440,7 +440,7 @@ extern cl::opt UseSegmentSetForPhysRegs; void repairOldRegInRange(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, const SlotIndex endIdx, LiveRange &LR, - unsigned Reg, unsigned LaneMask = ~0u); + unsigned Reg, LaneBitmask LaneMask = ~0u); class HMEditor; }; diff --git a/include/llvm/CodeGen/LivePhysRegs.h b/include/llvm/CodeGen/LivePhysRegs.h index 6475e7b4af37..3bdf5ae8d013 100644 --- a/include/llvm/CodeGen/LivePhysRegs.h +++ b/include/llvm/CodeGen/LivePhysRegs.h @@ -109,7 +109,7 @@ public: /// \brief Simulates liveness when stepping forward over an /// instruction(bundle): Remove killed-uses, add defs. This is the not /// recommended way, because it depends on accurate kill flags. If possible - /// use stepBackwards() instead of this function. + /// use stepBackward() instead of this function. /// The clobbers set will be the list of registers either defined or clobbered /// by a regmask. The operand will identify whether this is a regmask or /// register operand. @@ -122,9 +122,9 @@ public: void addLiveIns(const MachineBasicBlock *MBB, bool AddPristines = false); /// \brief Adds all live-out registers of basic block @p MBB; After prologue/ - /// epilogue insertion \p AddPristines should be set to true to insert the - /// pristine registers. - void addLiveOuts(const MachineBasicBlock *MBB, bool AddPristines = false); + /// epilogue insertion \p AddPristinesAndCSRs should be set to true. + void addLiveOuts(const MachineBasicBlock *MBB, + bool AddPristinesAndCSRs = false); typedef SparseSet::const_iterator const_iterator; const_iterator begin() const { return LiveRegs.begin(); } diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h index c97c636abbb4..2271e3352aa2 100644 --- a/include/llvm/CodeGen/LiveRangeEdit.h +++ b/include/llvm/CodeGen/LiveRangeEdit.h @@ -21,6 +21,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" @@ -28,7 +29,6 @@ namespace llvm { -class AliasAnalysis; class LiveIntervals; class MachineBlockFrequencyInfo; class MachineLoopInfo; diff --git a/include/llvm/CodeGen/LiveRegMatrix.h b/include/llvm/CodeGen/LiveRegMatrix.h index 86a0c7bd626f..e169058ca563 100644 --- a/include/llvm/CodeGen/LiveRegMatrix.h +++ b/include/llvm/CodeGen/LiveRegMatrix.h @@ -32,13 +32,11 @@ namespace llvm { class LiveInterval; class LiveIntervalAnalysis; -class MachineRegisterInfo; class TargetRegisterInfo; class VirtRegMap; class LiveRegMatrix : public MachineFunctionPass { const TargetRegisterInfo *TRI; - MachineRegisterInfo *MRI; LiveIntervals *LIS; VirtRegMap *VRM; diff --git a/include/llvm/CodeGen/LiveStackAnalysis.h b/include/llvm/CodeGen/LiveStackAnalysis.h index f495507c66ec..3ffbe3d775b4 100644 --- a/include/llvm/CodeGen/LiveStackAnalysis.h +++ b/include/llvm/CodeGen/LiveStackAnalysis.h @@ -25,76 +25,74 @@ namespace llvm { - class LiveStacks : public MachineFunctionPass { - const TargetRegisterInfo *TRI; +class LiveStacks : public MachineFunctionPass { + const TargetRegisterInfo *TRI; - /// Special pool allocator for VNInfo's (LiveInterval val#). - /// - VNInfo::Allocator VNInfoAllocator; + /// Special pool allocator for VNInfo's (LiveInterval val#). + /// + VNInfo::Allocator VNInfoAllocator; - /// S2IMap - Stack slot indices to live interval mapping. - /// - typedef std::unordered_map SS2IntervalMap; - SS2IntervalMap S2IMap; + /// S2IMap - Stack slot indices to live interval mapping. + /// + typedef std::unordered_map SS2IntervalMap; + SS2IntervalMap S2IMap; - /// S2RCMap - Stack slot indices to register class mapping. - std::map S2RCMap; - - public: - static char ID; // Pass identification, replacement for typeid - LiveStacks() : MachineFunctionPass(ID) { - initializeLiveStacksPass(*PassRegistry::getPassRegistry()); - } + /// S2RCMap - Stack slot indices to register class mapping. + std::map S2RCMap; - typedef SS2IntervalMap::iterator iterator; - typedef SS2IntervalMap::const_iterator const_iterator; - const_iterator begin() const { return S2IMap.begin(); } - const_iterator end() const { return S2IMap.end(); } - iterator begin() { return S2IMap.begin(); } - iterator end() { return S2IMap.end(); } +public: + static char ID; // Pass identification, replacement for typeid + LiveStacks() : MachineFunctionPass(ID) { + initializeLiveStacksPass(*PassRegistry::getPassRegistry()); + } - unsigned getNumIntervals() const { return (unsigned)S2IMap.size(); } + typedef SS2IntervalMap::iterator iterator; + typedef SS2IntervalMap::const_iterator const_iterator; + const_iterator begin() const { return S2IMap.begin(); } + const_iterator end() const { return S2IMap.end(); } + iterator begin() { return S2IMap.begin(); } + iterator end() { return S2IMap.end(); } - LiveInterval &getOrCreateInterval(int Slot, const TargetRegisterClass *RC); + unsigned getNumIntervals() const { return (unsigned)S2IMap.size(); } - LiveInterval &getInterval(int Slot) { - assert(Slot >= 0 && "Spill slot indice must be >= 0"); - SS2IntervalMap::iterator I = S2IMap.find(Slot); - assert(I != S2IMap.end() && "Interval does not exist for stack slot"); - return I->second; - } + LiveInterval &getOrCreateInterval(int Slot, const TargetRegisterClass *RC); - const LiveInterval &getInterval(int Slot) const { - assert(Slot >= 0 && "Spill slot indice must be >= 0"); - SS2IntervalMap::const_iterator I = S2IMap.find(Slot); - assert(I != S2IMap.end() && "Interval does not exist for stack slot"); - return I->second; - } + LiveInterval &getInterval(int Slot) { + assert(Slot >= 0 && "Spill slot indice must be >= 0"); + SS2IntervalMap::iterator I = S2IMap.find(Slot); + assert(I != S2IMap.end() && "Interval does not exist for stack slot"); + return I->second; + } - bool hasInterval(int Slot) const { - return S2IMap.count(Slot); - } + const LiveInterval &getInterval(int Slot) const { + assert(Slot >= 0 && "Spill slot indice must be >= 0"); + SS2IntervalMap::const_iterator I = S2IMap.find(Slot); + assert(I != S2IMap.end() && "Interval does not exist for stack slot"); + return I->second; + } - const TargetRegisterClass *getIntervalRegClass(int Slot) const { - assert(Slot >= 0 && "Spill slot indice must be >= 0"); - std::map::const_iterator - I = S2RCMap.find(Slot); - assert(I != S2RCMap.end() && - "Register class info does not exist for stack slot"); - return I->second; - } + bool hasInterval(int Slot) const { return S2IMap.count(Slot); } - VNInfo::Allocator& getVNInfoAllocator() { return VNInfoAllocator; } + const TargetRegisterClass *getIntervalRegClass(int Slot) const { + assert(Slot >= 0 && "Spill slot indice must be >= 0"); + std::map::const_iterator I = + S2RCMap.find(Slot); + assert(I != S2RCMap.end() && + "Register class info does not exist for stack slot"); + return I->second; + } - void getAnalysisUsage(AnalysisUsage &AU) const override; - void releaseMemory() override; + VNInfo::Allocator &getVNInfoAllocator() { return VNInfoAllocator; } - /// runOnMachineFunction - pass entry point - bool runOnMachineFunction(MachineFunction&) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; - /// print - Implement the dump method. - void print(raw_ostream &O, const Module* = nullptr) const override; - }; + /// runOnMachineFunction - pass entry point + bool runOnMachineFunction(MachineFunction &) override; + + /// print - Implement the dump method. + void print(raw_ostream &O, const Module * = nullptr) const override; +}; } #endif /* LLVM_CODEGEN_LIVESTACK_ANALYSIS_H */ diff --git a/include/llvm/CodeGen/MIRParser/MIRParser.h b/include/llvm/CodeGen/MIRParser/MIRParser.h index 67b756d5e886..a569d5ec1f5e 100644 --- a/include/llvm/CodeGen/MIRParser/MIRParser.h +++ b/include/llvm/CodeGen/MIRParser/MIRParser.h @@ -1,4 +1,4 @@ -//===- MIRParser.h - MIR serialization format parser ----------------------===// +//===- MIRParser.h - MIR serialization format parser ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -37,7 +37,7 @@ class MIRParser : public MachineFunctionInitializer { public: MIRParser(std::unique_ptr Impl); MIRParser(const MIRParser &) = delete; - ~MIRParser(); + ~MIRParser() override; /// Parse the optional LLVM IR module that's embedded in the MIR file. /// @@ -78,4 +78,4 @@ createMIRParser(std::unique_ptr Contents, LLVMContext &Context); } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MIRPARSER_MIRPARSER_H diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h index 9798e5cef645..14d3744741c5 100644 --- a/include/llvm/CodeGen/MIRYamlMapping.h +++ b/include/llvm/CodeGen/MIRYamlMapping.h @@ -19,6 +19,7 @@ #define LLVM_LIB_CODEGEN_MIRYAMLMAPPING_H #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/Support/YAMLTraits.h" #include @@ -72,54 +73,109 @@ template <> struct ScalarTraits { static bool mustQuote(StringRef Scalar) { return needsQuotes(Scalar); } }; +struct BlockStringValue { + StringValue Value; +}; + +template <> struct BlockScalarTraits { + static void output(const BlockStringValue &S, void *Ctx, raw_ostream &OS) { + return ScalarTraits::output(S.Value, Ctx, OS); + } + + static StringRef input(StringRef Scalar, void *Ctx, BlockStringValue &S) { + return ScalarTraits::input(Scalar, Ctx, S.Value); + } +}; + +/// A wrapper around unsigned which contains a source range that's being set +/// during parsing. +struct UnsignedValue { + unsigned Value; + SMRange SourceRange; + + UnsignedValue() : Value(0) {} + UnsignedValue(unsigned Value) : Value(Value) {} + + bool operator==(const UnsignedValue &Other) const { + return Value == Other.Value; + } +}; + +template <> struct ScalarTraits { + static void output(const UnsignedValue &Value, void *Ctx, raw_ostream &OS) { + return ScalarTraits::output(Value.Value, Ctx, OS); + } + + static StringRef input(StringRef Scalar, void *Ctx, UnsignedValue &Value) { + if (const auto *Node = + reinterpret_cast(Ctx)->getCurrentNode()) + Value.SourceRange = Node->getSourceRange(); + return ScalarTraits::input(Scalar, Ctx, Value.Value); + } + + static bool mustQuote(StringRef Scalar) { + return ScalarTraits::mustQuote(Scalar); + } +}; + +template <> struct ScalarEnumerationTraits { + static void enumeration(yaml::IO &IO, + MachineJumpTableInfo::JTEntryKind &EntryKind) { + IO.enumCase(EntryKind, "block-address", + MachineJumpTableInfo::EK_BlockAddress); + IO.enumCase(EntryKind, "gp-rel64-block-address", + MachineJumpTableInfo::EK_GPRel64BlockAddress); + IO.enumCase(EntryKind, "gp-rel32-block-address", + MachineJumpTableInfo::EK_GPRel32BlockAddress); + IO.enumCase(EntryKind, "label-difference32", + MachineJumpTableInfo::EK_LabelDifference32); + IO.enumCase(EntryKind, "inline", MachineJumpTableInfo::EK_Inline); + IO.enumCase(EntryKind, "custom32", MachineJumpTableInfo::EK_Custom32); + } +}; + } // end namespace yaml } // end namespace llvm LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::StringValue) LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::FlowStringValue) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::UnsignedValue) namespace llvm { namespace yaml { struct VirtualRegisterDefinition { - unsigned ID; + UnsignedValue ID; StringValue Class; - // TODO: Serialize the virtual register hints. + StringValue PreferredRegister; + // TODO: Serialize the target specific register hints. }; template <> struct MappingTraits { static void mapping(IO &YamlIO, VirtualRegisterDefinition &Reg) { YamlIO.mapRequired("id", Reg.ID); YamlIO.mapRequired("class", Reg.Class); + YamlIO.mapOptional("preferred-register", Reg.PreferredRegister, + StringValue()); // Don't print out when it's empty. } static const bool flow = true; }; -struct MachineBasicBlock { - unsigned ID; - StringValue Name; - unsigned Alignment = 0; - bool IsLandingPad = false; - bool AddressTaken = false; - // TODO: Serialize the successor weights. - std::vector Successors; - std::vector LiveIns; - std::vector Instructions; +struct MachineFunctionLiveIn { + StringValue Register; + StringValue VirtualRegister; }; -template <> struct MappingTraits { - static void mapping(IO &YamlIO, MachineBasicBlock &MBB) { - YamlIO.mapRequired("id", MBB.ID); - YamlIO.mapOptional("name", MBB.Name, - StringValue()); // Don't print out an empty name. - YamlIO.mapOptional("alignment", MBB.Alignment); - YamlIO.mapOptional("isLandingPad", MBB.IsLandingPad); - YamlIO.mapOptional("addressTaken", MBB.AddressTaken); - YamlIO.mapOptional("successors", MBB.Successors); - YamlIO.mapOptional("liveins", MBB.LiveIns); - YamlIO.mapOptional("instructions", MBB.Instructions); +template <> struct MappingTraits { + static void mapping(IO &YamlIO, MachineFunctionLiveIn &LiveIn) { + YamlIO.mapRequired("reg", LiveIn.Register); + YamlIO.mapOptional( + "virtual-reg", LiveIn.VirtualRegister, + StringValue()); // Don't print the virtual register when it's empty. } + + static const bool flow = true; }; /// Serializable representation of stack object from the MachineFrameInfo class. @@ -128,16 +184,21 @@ template <> struct MappingTraits { /// determined by the object's type and frame information flags. /// Dead stack objects aren't serialized. /// -/// TODO: Determine isPreallocated flag by mapping between objects and local -/// objects (Serialize local objects). +/// The 'isPreallocated' flag is determined by the local offset. struct MachineStackObject { enum ObjectType { DefaultType, SpillSlot, VariableSized }; - // TODO: Serialize LLVM alloca reference. - unsigned ID; + UnsignedValue ID; + StringValue Name; + // TODO: Serialize unnamed LLVM alloca reference. ObjectType Type = DefaultType; int64_t Offset = 0; uint64_t Size = 0; unsigned Alignment = 0; + StringValue CalleeSavedRegister; + Optional LocalOffset; + StringValue DebugVar; + StringValue DebugExpr; + StringValue DebugLoc; }; template <> struct ScalarEnumerationTraits { @@ -151,6 +212,8 @@ template <> struct ScalarEnumerationTraits { template <> struct MappingTraits { static void mapping(yaml::IO &YamlIO, MachineStackObject &Object) { YamlIO.mapRequired("id", Object.ID); + YamlIO.mapOptional("name", Object.Name, + StringValue()); // Don't print out an empty name. YamlIO.mapOptional( "type", Object.Type, MachineStackObject::DefaultType); // Don't print the default type. @@ -158,6 +221,15 @@ template <> struct MappingTraits { if (Object.Type != MachineStackObject::VariableSized) YamlIO.mapRequired("size", Object.Size); YamlIO.mapOptional("alignment", Object.Alignment); + YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister, + StringValue()); // Don't print it out when it's empty. + YamlIO.mapOptional("local-offset", Object.LocalOffset); + YamlIO.mapOptional("di-variable", Object.DebugVar, + StringValue()); // Don't print it out when it's empty. + YamlIO.mapOptional("di-expression", Object.DebugExpr, + StringValue()); // Don't print it out when it's empty. + YamlIO.mapOptional("di-location", Object.DebugLoc, + StringValue()); // Don't print it out when it's empty. } static const bool flow = true; @@ -167,13 +239,14 @@ template <> struct MappingTraits { /// MachineFrameInfo class. struct FixedMachineStackObject { enum ObjectType { DefaultType, SpillSlot }; - unsigned ID; + UnsignedValue ID; ObjectType Type = DefaultType; int64_t Offset = 0; uint64_t Size = 0; unsigned Alignment = 0; bool IsImmutable = false; bool IsAliased = false; + StringValue CalleeSavedRegister; }; template <> @@ -198,22 +271,64 @@ template <> struct MappingTraits { YamlIO.mapOptional("isImmutable", Object.IsImmutable); YamlIO.mapOptional("isAliased", Object.IsAliased); } + YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister, + StringValue()); // Don't print it out when it's empty. } static const bool flow = true; }; +struct MachineConstantPoolValue { + UnsignedValue ID; + StringValue Value; + unsigned Alignment = 0; +}; + +template <> struct MappingTraits { + static void mapping(IO &YamlIO, MachineConstantPoolValue &Constant) { + YamlIO.mapRequired("id", Constant.ID); + YamlIO.mapOptional("value", Constant.Value); + YamlIO.mapOptional("alignment", Constant.Alignment); + } +}; + +struct MachineJumpTable { + struct Entry { + UnsignedValue ID; + std::vector Blocks; + }; + + MachineJumpTableInfo::JTEntryKind Kind = MachineJumpTableInfo::EK_Custom32; + std::vector Entries; +}; + +template <> struct MappingTraits { + static void mapping(IO &YamlIO, MachineJumpTable::Entry &Entry) { + YamlIO.mapRequired("id", Entry.ID); + YamlIO.mapOptional("blocks", Entry.Blocks); + } +}; + } // end namespace yaml } // end namespace llvm +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineFunctionLiveIn) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::VirtualRegisterDefinition) -LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineBasicBlock) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineStackObject) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::FixedMachineStackObject) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineConstantPoolValue) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineJumpTable::Entry) namespace llvm { namespace yaml { +template <> struct MappingTraits { + static void mapping(IO &YamlIO, MachineJumpTable &JT) { + YamlIO.mapRequired("kind", JT.Kind); + YamlIO.mapOptional("entries", JT.Entries); + } +}; + /// Serializable representation of MachineFrameInfo. /// /// Doesn't serialize attributes like 'StackAlignment', 'IsStackRealignable' and @@ -231,14 +346,14 @@ struct MachineFrameInfo { unsigned MaxAlignment = 0; bool AdjustsStack = false; bool HasCalls = false; - // TODO: Serialize StackProtectorIdx and FunctionContextIdx + StringValue StackProtector; + // TODO: Serialize FunctionContextIdx unsigned MaxCallFrameSize = 0; - // TODO: Serialize callee saved info. - // TODO: Serialize local frame objects. bool HasOpaqueSPAdjustment = false; bool HasVAStart = false; bool HasMustTailInVarArgFunc = false; - // TODO: Serialize save and restore MBB references. + StringValue SavePoint; + StringValue RestorePoint; }; template <> struct MappingTraits { @@ -252,10 +367,16 @@ template <> struct MappingTraits { YamlIO.mapOptional("maxAlignment", MFI.MaxAlignment); YamlIO.mapOptional("adjustsStack", MFI.AdjustsStack); YamlIO.mapOptional("hasCalls", MFI.HasCalls); + YamlIO.mapOptional("stackProtector", MFI.StackProtector, + StringValue()); // Don't print it out when it's empty. YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize); YamlIO.mapOptional("hasOpaqueSPAdjustment", MFI.HasOpaqueSPAdjustment); YamlIO.mapOptional("hasVAStart", MFI.HasVAStart); YamlIO.mapOptional("hasMustTailInVarArgFunc", MFI.HasMustTailInVarArgFunc); + YamlIO.mapOptional("savePoint", MFI.SavePoint, + StringValue()); // Don't print it out when it's empty. + YamlIO.mapOptional("restorePoint", MFI.RestorePoint, + StringValue()); // Don't print it out when it's empty. } }; @@ -269,14 +390,16 @@ struct MachineFunction { bool TracksRegLiveness = false; bool TracksSubRegLiveness = false; std::vector VirtualRegisters; + std::vector LiveIns; + Optional> CalleeSavedRegisters; // TODO: Serialize the various register masks. - // TODO: Serialize live in registers. // Frame information MachineFrameInfo FrameInfo; std::vector FixedStackObjects; std::vector StackObjects; - - std::vector BasicBlocks; + std::vector Constants; /// Constant pool. + MachineJumpTable JumpTableInfo; + BlockStringValue Body; }; template <> struct MappingTraits { @@ -289,10 +412,15 @@ template <> struct MappingTraits { YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness); YamlIO.mapOptional("tracksSubRegLiveness", MF.TracksSubRegLiveness); YamlIO.mapOptional("registers", MF.VirtualRegisters); + YamlIO.mapOptional("liveins", MF.LiveIns); + YamlIO.mapOptional("calleeSavedRegisters", MF.CalleeSavedRegisters); YamlIO.mapOptional("frameInfo", MF.FrameInfo); YamlIO.mapOptional("fixedStack", MF.FixedStackObjects); YamlIO.mapOptional("stack", MF.StackObjects); - YamlIO.mapOptional("body", MF.BasicBlocks); + YamlIO.mapOptional("constants", MF.Constants); + if (!YamlIO.outputting() || !MF.JumpTableInfo.Entries.empty()) + YamlIO.mapOptional("jumpTable", MF.JumpTableInfo); + YamlIO.mapOptional("body", MF.Body); } }; diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 5e5f45cae8fb..3d58c499823e 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -16,6 +16,8 @@ #include "llvm/ADT/GraphTraits.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/DataTypes.h" #include @@ -25,11 +27,15 @@ class Pass; class BasicBlock; class MachineFunction; class MCSymbol; +class MIPrinter; class SlotIndexes; class StringRef; class raw_ostream; class MachineBranchProbabilityInfo; +// Forward declaration to avoid circular include problem with TargetRegisterInfo +typedef unsigned LaneBitmask; + template <> struct ilist_traits : public ilist_default_traits { private: @@ -52,57 +58,76 @@ public: void addNodeToList(MachineInstr* N); void removeNodeFromList(MachineInstr* N); void transferNodesFromList(ilist_traits &SrcTraits, - ilist_iterator first, - ilist_iterator last); + ilist_iterator First, + ilist_iterator Last); void deleteNode(MachineInstr *N); private: void createNode(const MachineInstr &); }; -class MachineBasicBlock : public ilist_node { +class MachineBasicBlock + : public ilist_node_with_parent { +public: + /// Pair of physical register and lane mask. + /// This is not simply a std::pair typedef because the members should be named + /// clearly as they both have an integer type. + struct RegisterMaskPair { + public: + MCPhysReg PhysReg; + LaneBitmask LaneMask; + + RegisterMaskPair(MCPhysReg PhysReg, LaneBitmask LaneMask) + : PhysReg(PhysReg), LaneMask(LaneMask) {} + }; + +private: typedef ilist Instructions; Instructions Insts; const BasicBlock *BB; int Number; MachineFunction *xParent; - /// Predecessors/Successors - Keep track of the predecessor / successor - /// basicblocks. + /// Keep track of the predecessor / successor basic blocks. std::vector Predecessors; std::vector Successors; - /// Weights - Keep track of the weights to the successors. This vector - /// has the same order as Successors, or it is empty if we don't use it - /// (disable optimization). - std::vector Weights; - typedef std::vector::iterator weight_iterator; - typedef std::vector::const_iterator const_weight_iterator; + /// Keep track of the probabilities to the successors. This vector has the + /// same order as Successors, or it is empty if we don't use it (disable + /// optimization). + std::vector Probs; + typedef std::vector::iterator probability_iterator; + typedef std::vector::const_iterator + const_probability_iterator; - /// LiveIns - Keep track of the physical registers that are livein of - /// the basicblock. - std::vector LiveIns; + /// Keep track of the physical registers that are livein of the basicblock. + typedef std::vector LiveInVector; + LiveInVector LiveIns; - /// Alignment - Alignment of the basic block. Zero if the basic block does - /// not need to be aligned. - /// The alignment is specified as log2(bytes). - unsigned Alignment; + /// Alignment of the basic block. Zero if the basic block does not need to be + /// aligned. The alignment is specified as log2(bytes). + unsigned Alignment = 0; - /// IsLandingPad - Indicate that this basic block is entered via an - /// exception handler. - bool IsLandingPad; + /// Indicate that this basic block is entered via an exception handler. + bool IsEHPad = false; - /// AddressTaken - Indicate that this basic block is potentially the - /// target of an indirect branch. - bool AddressTaken; + /// Indicate that this basic block is potentially the target of an indirect + /// branch. + bool AddressTaken = false; + + /// Indicate that this basic block is the entry block of an EH funclet. + bool IsEHFuncletEntry = false; + + /// Indicate that this basic block is the entry block of a cleanup funclet. + bool IsCleanupFuncletEntry = false; /// \brief since getSymbol is a relatively heavy-weight operation, the symbol /// is only computed once and is cached. - mutable MCSymbol *CachedMCSymbol; + mutable MCSymbol *CachedMCSymbol = nullptr; // Intrusive list support MachineBasicBlock() {} - explicit MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb); + explicit MachineBasicBlock(MachineFunction &MF, const BasicBlock *BB); ~MachineBasicBlock(); @@ -110,50 +135,44 @@ class MachineBasicBlock : public ilist_node { friend class MachineFunction; public: - /// getBasicBlock - Return the LLVM basic block that this instance - /// corresponded to originally. Note that this may be NULL if this instance - /// does not correspond directly to an LLVM basic block. - /// + /// Return the LLVM basic block that this instance corresponded to originally. + /// Note that this may be NULL if this instance does not correspond directly + /// to an LLVM basic block. const BasicBlock *getBasicBlock() const { return BB; } - /// getName - Return the name of the corresponding LLVM basic block, or - /// "(null)". + /// Return the name of the corresponding LLVM basic block, or "(null)". StringRef getName() const; - /// getFullName - Return a formatted string to identify this block and its - /// parent function. + /// Return a formatted string to identify this block and its parent function. std::string getFullName() const; - /// hasAddressTaken - Test whether this block is potentially the target - /// of an indirect branch. + /// Test whether this block is potentially the target of an indirect branch. bool hasAddressTaken() const { return AddressTaken; } - /// setHasAddressTaken - Set this block to reflect that it potentially - /// is the target of an indirect branch. + /// Set this block to reflect that it potentially is the target of an indirect + /// branch. void setHasAddressTaken() { AddressTaken = true; } - /// getParent - Return the MachineFunction containing this basic block. - /// + /// Return the MachineFunction containing this basic block. const MachineFunction *getParent() const { return xParent; } MachineFunction *getParent() { return xParent; } - - /// bundle_iterator - MachineBasicBlock iterator that automatically skips over - /// MIs that are inside bundles (i.e. walk top level MIs only). + /// MachineBasicBlock iterator that automatically skips over MIs that are + /// inside bundles (i.e. walk top level MIs only). template class bundle_iterator : public std::iterator { IterTy MII; public: - bundle_iterator(IterTy mii) : MII(mii) {} + bundle_iterator(IterTy MI) : MII(MI) {} - bundle_iterator(Ty &mi) : MII(mi) { - assert(!mi.isBundledWithPred() && + bundle_iterator(Ty &MI) : MII(MI) { + assert(!MI.isBundledWithPred() && "It's not legal to initialize bundle_iterator with a bundled MI"); } - bundle_iterator(Ty *mi) : MII(mi) { - assert((!mi || !mi->isBundledWithPred()) && + bundle_iterator(Ty *MI) : MII(MI) { + assert((!MI || !MI->isBundledWithPred()) && "It's not legal to initialize bundle_iterator with a bundled MI"); } // Template allows conversion from const to nonconst. @@ -165,13 +184,13 @@ public: Ty &operator*() const { return *MII; } Ty *operator->() const { return &operator*(); } - operator Ty*() const { return MII; } + operator Ty *() const { return MII.getNodePtrUnchecked(); } - bool operator==(const bundle_iterator &x) const { - return MII == x.MII; + bool operator==(const bundle_iterator &X) const { + return MII == X.MII; } - bool operator!=(const bundle_iterator &x) const { - return !operator==(x); + bool operator!=(const bundle_iterator &X) const { + return !operator==(X); } // Increment and decrement operators... @@ -247,11 +266,16 @@ public: reverse_iterator rend () { return instr_rend(); } const_reverse_iterator rend () const { return instr_rend(); } + /// Support for MachineInstr::getNextNode(). + static Instructions MachineBasicBlock::*getSublistAccess(MachineInstr *) { + return &MachineBasicBlock::Insts; + } + inline iterator_range terminators() { - return iterator_range(getFirstTerminator(), end()); + return make_range(getFirstTerminator(), end()); } inline iterator_range terminators() const { - return iterator_range(getFirstTerminator(), end()); + return make_range(getFirstTerminator(), end()); } // Machine-CFG iterators @@ -301,16 +325,16 @@ public: bool succ_empty() const { return Successors.empty(); } inline iterator_range predecessors() { - return iterator_range(pred_begin(), pred_end()); + return make_range(pred_begin(), pred_end()); } inline iterator_range predecessors() const { - return iterator_range(pred_begin(), pred_end()); + return make_range(pred_begin(), pred_end()); } inline iterator_range successors() { - return iterator_range(succ_begin(), succ_end()); + return make_range(succ_begin(), succ_end()); } inline iterator_range successors() const { - return iterator_range(succ_begin(), succ_end()); + return make_range(succ_begin(), succ_end()); } // LiveIn management methods. @@ -318,131 +342,177 @@ public: /// Adds the specified register as a live in. Note that it is an error to add /// the same register to the same set more than once unless the intention is /// to call sortUniqueLiveIns after all registers are added. - void addLiveIn(unsigned Reg) { LiveIns.push_back(Reg); } + void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask = ~0u) { + LiveIns.push_back(RegisterMaskPair(PhysReg, LaneMask)); + } + void addLiveIn(const RegisterMaskPair &RegMaskPair) { + LiveIns.push_back(RegMaskPair); + } /// Sorts and uniques the LiveIns vector. It can be significantly faster to do /// this than repeatedly calling isLiveIn before calling addLiveIn for every /// LiveIn insertion. - void sortUniqueLiveIns() { - std::sort(LiveIns.begin(), LiveIns.end()); - LiveIns.erase(std::unique(LiveIns.begin(), LiveIns.end()), LiveIns.end()); - } + void sortUniqueLiveIns(); /// Add PhysReg as live in to this block, and ensure that there is a copy of /// PhysReg to a virtual register of class RC. Return the virtual register /// that is a copy of the live in PhysReg. - unsigned addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC); + unsigned addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC); - /// removeLiveIn - Remove the specified register from the live in set. - /// - void removeLiveIn(unsigned Reg); + /// Remove the specified register from the live in set. + void removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask = ~0u); - /// isLiveIn - Return true if the specified register is in the live in set. - /// - bool isLiveIn(unsigned Reg) const; + /// Return true if the specified register is in the live in set. + bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask = ~0u) const; // Iteration support for live in sets. These sets are kept in sorted // order by their register number. - typedef std::vector::const_iterator livein_iterator; + typedef LiveInVector::const_iterator livein_iterator; livein_iterator livein_begin() const { return LiveIns.begin(); } livein_iterator livein_end() const { return LiveIns.end(); } bool livein_empty() const { return LiveIns.empty(); } + iterator_range liveins() const { + return make_range(livein_begin(), livein_end()); + } - /// getAlignment - Return alignment of the basic block. - /// The alignment is specified as log2(bytes). - /// + /// Get the clobber mask for the start of this basic block. Funclets use this + /// to prevent register allocation across funclet transitions. + const uint32_t *getBeginClobberMask(const TargetRegisterInfo *TRI) const; + + /// Get the clobber mask for the end of the basic block. + /// \see getBeginClobberMask() + const uint32_t *getEndClobberMask(const TargetRegisterInfo *TRI) const; + + /// Return alignment of the basic block. The alignment is specified as + /// log2(bytes). unsigned getAlignment() const { return Alignment; } - /// setAlignment - Set alignment of the basic block. - /// The alignment is specified as log2(bytes). - /// + /// Set alignment of the basic block. The alignment is specified as + /// log2(bytes). void setAlignment(unsigned Align) { Alignment = Align; } - /// isLandingPad - Returns true if the block is a landing pad. That is - /// this basic block is entered via an exception handler. - bool isLandingPad() const { return IsLandingPad; } + /// Returns true if the block is a landing pad. That is this basic block is + /// entered via an exception handler. + bool isEHPad() const { return IsEHPad; } - /// setIsLandingPad - Indicates the block is a landing pad. That is - /// this basic block is entered via an exception handler. - void setIsLandingPad(bool V = true) { IsLandingPad = V; } + /// Indicates the block is a landing pad. That is this basic block is entered + /// via an exception handler. + void setIsEHPad(bool V = true) { IsEHPad = V; } - /// getLandingPadSuccessor - If this block has a successor that is a landing - /// pad, return it. Otherwise return NULL. + /// If this block has a successor that is a landing pad, return it. Otherwise + /// return NULL. const MachineBasicBlock *getLandingPadSuccessor() const; + bool hasEHPadSuccessor() const; + + /// Returns true if this is the entry block of an EH funclet. + bool isEHFuncletEntry() const { return IsEHFuncletEntry; } + + /// Indicates if this is the entry block of an EH funclet. + void setIsEHFuncletEntry(bool V = true) { IsEHFuncletEntry = V; } + + /// Returns true if this is the entry block of a cleanup funclet. + bool isCleanupFuncletEntry() const { return IsCleanupFuncletEntry; } + + /// Indicates if this is the entry block of a cleanup funclet. + void setIsCleanupFuncletEntry(bool V = true) { IsCleanupFuncletEntry = V; } + // Code Layout methods. - /// moveBefore/moveAfter - move 'this' block before or after the specified - /// block. This only moves the block, it does not modify the CFG or adjust - /// potential fall-throughs at the end of the block. + /// Move 'this' block before or after the specified block. This only moves + /// the block, it does not modify the CFG or adjust potential fall-throughs at + /// the end of the block. void moveBefore(MachineBasicBlock *NewAfter); void moveAfter(MachineBasicBlock *NewBefore); - /// updateTerminator - Update the terminator instructions in block to account - /// for changes to the layout. If the block previously used a fallthrough, - /// it may now need a branch, and if it previously used branching it may now - /// be able to use a fallthrough. + /// Update the terminator instructions in block to account for changes to the + /// layout. If the block previously used a fallthrough, it may now need a + /// branch, and if it previously used branching it may now be able to use a + /// fallthrough. void updateTerminator(); // Machine-CFG mutators - /// addSuccessor - Add succ as a successor of this MachineBasicBlock. - /// The Predecessors list of succ is automatically updated. WEIGHT - /// parameter is stored in Weights list and it may be used by - /// MachineBranchProbabilityInfo analysis to calculate branch probability. + /// Add Succ as a successor of this MachineBasicBlock. The Predecessors list + /// of Succ is automatically updated. PROB parameter is stored in + /// Probabilities list. The default probability is set as unknown. Mixing + /// known and unknown probabilities in successor list is not allowed. When all + /// successors have unknown probabilities, 1 / N is returned as the + /// probability for each successor, where N is the number of successors. /// /// Note that duplicate Machine CFG edges are not allowed. - /// - void addSuccessor(MachineBasicBlock *succ, uint32_t weight = 0); + void addSuccessor(MachineBasicBlock *Succ, + BranchProbability Prob = BranchProbability::getUnknown()); - /// Set successor weight of a given iterator. - void setSuccWeight(succ_iterator I, uint32_t weight); + /// Add Succ as a successor of this MachineBasicBlock. The Predecessors list + /// of Succ is automatically updated. The probability is not provided because + /// BPI is not available (e.g. -O0 is used), in which case edge probabilities + /// won't be used. Using this interface can save some space. + void addSuccessorWithoutProb(MachineBasicBlock *Succ); - /// removeSuccessor - Remove successor from the successors list of this - /// MachineBasicBlock. The Predecessors list of succ is automatically updated. - /// - void removeSuccessor(MachineBasicBlock *succ); + /// Set successor probability of a given iterator. + void setSuccProbability(succ_iterator I, BranchProbability Prob); - /// removeSuccessor - Remove specified successor from the successors list of - /// this MachineBasicBlock. The Predecessors list of succ is automatically - /// updated. Return the iterator to the element after the one removed. - /// - succ_iterator removeSuccessor(succ_iterator I); + /// Normalize probabilities of all successors so that the sum of them becomes + /// one. This is usually done when the current update on this MBB is done, and + /// the sum of its successors' probabilities is not guaranteed to be one. The + /// user is responsible for the correct use of this function. + /// MBB::removeSuccessor() has an option to do this automatically. + void normalizeSuccProbs() { + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); + } - /// replaceSuccessor - Replace successor OLD with NEW and update weight info. - /// + /// Validate successors' probabilities and check if the sum of them is + /// approximate one. This only works in DEBUG mode. + void validateSuccProbs() const; + + /// Remove successor from the successors list of this MachineBasicBlock. The + /// Predecessors list of Succ is automatically updated. + /// If NormalizeSuccProbs is true, then normalize successors' probabilities + /// after the successor is removed. + void removeSuccessor(MachineBasicBlock *Succ, + bool NormalizeSuccProbs = false); + + /// Remove specified successor from the successors list of this + /// MachineBasicBlock. The Predecessors list of Succ is automatically updated. + /// If NormalizeSuccProbs is true, then normalize successors' probabilities + /// after the successor is removed. + /// Return the iterator to the element after the one removed. + succ_iterator removeSuccessor(succ_iterator I, + bool NormalizeSuccProbs = false); + + /// Replace successor OLD with NEW and update probability info. void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New); + /// Transfers all the successors from MBB to this machine basic block (i.e., + /// copies all the successors FromMBB and remove all the successors from + /// FromMBB). + void transferSuccessors(MachineBasicBlock *FromMBB); - /// transferSuccessors - Transfers all the successors from MBB to this - /// machine basic block (i.e., copies all the successors fromMBB and - /// remove all the successors from fromMBB). - void transferSuccessors(MachineBasicBlock *fromMBB); + /// Transfers all the successors, as in transferSuccessors, and update PHI + /// operands in the successor blocks which refer to FromMBB to refer to this. + void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB); - /// transferSuccessorsAndUpdatePHIs - Transfers all the successors, as - /// in transferSuccessors, and update PHI operands in the successor blocks - /// which refer to fromMBB to refer to this. - void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB); + /// Return true if any of the successors have probabilities attached to them. + bool hasSuccessorProbabilities() const { return !Probs.empty(); } - /// isPredecessor - Return true if the specified MBB is a predecessor of this - /// block. + /// Return true if the specified MBB is a predecessor of this block. bool isPredecessor(const MachineBasicBlock *MBB) const; - /// isSuccessor - Return true if the specified MBB is a successor of this - /// block. + /// Return true if the specified MBB is a successor of this block. bool isSuccessor(const MachineBasicBlock *MBB) const; - /// isLayoutSuccessor - Return true if the specified MBB will be emitted - /// immediately after this block, such that if this block exits by - /// falling through, control will transfer to the specified MBB. Note - /// that MBB need not be a successor at all, for example if this block - /// ends with an unconditional branch to some other block. + /// Return true if the specified MBB will be emitted immediately after this + /// block, such that if this block exits by falling through, control will + /// transfer to the specified MBB. Note that MBB need not be a successor at + /// all, for example if this block ends with an unconditional branch to some + /// other block. bool isLayoutSuccessor(const MachineBasicBlock *MBB) const; - /// canFallThrough - Return true if the block can implicitly transfer - /// control to the block after it by falling off the end of it. This should - /// return false if it can reach the block after it, but it uses an explicit - /// branch to do so (e.g., a table jump). True is a conservative answer. + /// Return true if the block can implicitly transfer control to the block + /// after it by falling off the end of it. This should return false if it can + /// reach the block after it, but it uses an explicit branch to do so (e.g., a + /// table jump). True is a conservative answer. bool canFallThrough(); /// Returns a pointer to the first instruction in this block that is not a @@ -452,40 +522,44 @@ public: /// Returns end() is there's no non-PHI instruction. iterator getFirstNonPHI(); - /// SkipPHIsAndLabels - Return the first instruction in MBB after I that is - /// not a PHI or a label. This is the correct point to insert copies at the - /// beginning of a basic block. + /// Return the first instruction in MBB after I that is not a PHI or a label. + /// This is the correct point to insert copies at the beginning of a basic + /// block. iterator SkipPHIsAndLabels(iterator I); - /// getFirstTerminator - returns an iterator to the first terminator - /// instruction of this basic block. If a terminator does not exist, - /// it returns end() + /// Returns an iterator to the first terminator instruction of this basic + /// block. If a terminator does not exist, it returns end(). iterator getFirstTerminator(); const_iterator getFirstTerminator() const { return const_cast(this)->getFirstTerminator(); } - /// getFirstInstrTerminator - Same getFirstTerminator but it ignores bundles - /// and return an instr_iterator instead. + /// Same getFirstTerminator but it ignores bundles and return an + /// instr_iterator instead. instr_iterator getFirstInstrTerminator(); - /// getFirstNonDebugInstr - returns an iterator to the first non-debug - /// instruction in the basic block, or end() + /// Returns an iterator to the first non-debug instruction in the basic block, + /// or end(). iterator getFirstNonDebugInstr(); const_iterator getFirstNonDebugInstr() const { return const_cast(this)->getFirstNonDebugInstr(); } - /// getLastNonDebugInstr - returns an iterator to the last non-debug - /// instruction in the basic block, or end() + /// Returns an iterator to the last non-debug instruction in the basic block, + /// or end(). iterator getLastNonDebugInstr(); const_iterator getLastNonDebugInstr() const { return const_cast(this)->getLastNonDebugInstr(); } - /// SplitCriticalEdge - Split the critical edge from this block to the - /// given successor block, and return the newly created block, or null - /// if splitting is not possible. + /// Convenience function that returns true if the block ends in a return + /// instruction. + bool isReturnBlock() const { + return !empty() && back().isReturn(); + } + + /// Split the critical edge from this block to the given successor block, and + /// return the newly created block, or null if splitting is not possible. /// /// This function updates LiveVariables, MachineDominatorTree, and /// MachineLoopInfo, as applicable. @@ -570,7 +644,7 @@ public: /// remove_instr to remove individual instructions from a bundle. MachineInstr *remove(MachineInstr *I) { assert(!I->isBundled() && "Cannot remove bundled instructions"); - return Insts.remove(I); + return Insts.remove(instr_iterator(I)); } /// Remove the possibly bundled instruction from the instruction list @@ -605,30 +679,29 @@ public: From.getInstrIterator(), To.getInstrIterator()); } - /// removeFromParent - This method unlinks 'this' from the containing - /// function, and returns it, but does not delete it. + /// This method unlinks 'this' from the containing function, and returns it, + /// but does not delete it. MachineBasicBlock *removeFromParent(); - /// eraseFromParent - This method unlinks 'this' from the containing - /// function and deletes it. + /// This method unlinks 'this' from the containing function and deletes it. void eraseFromParent(); - /// ReplaceUsesOfBlockWith - Given a machine basic block that branched to - /// 'Old', change the code and CFG so that it branches to 'New' instead. + /// Given a machine basic block that branched to 'Old', change the code and + /// CFG so that it branches to 'New' instead. void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New); - /// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in - /// the CFG to be inserted. If we have proven that MBB can only branch to - /// DestA and DestB, remove any other MBB successors from the CFG. DestA and - /// DestB can be null. Besides DestA and DestB, retain other edges leading - /// to LandingPads (currently there can be only one; we don't check or require - /// that here). Note it is possible that DestA and/or DestB are LandingPads. + /// Various pieces of code can cause excess edges in the CFG to be inserted. + /// If we have proven that MBB can only branch to DestA and DestB, remove any + /// other MBB successors from the CFG. DestA and DestB can be null. Besides + /// DestA and DestB, retain other edges leading to LandingPads (currently + /// there can be only one; we don't check or require that here). Note it is + /// possible that DestA and/or DestB are LandingPads. bool CorrectExtraCFGEdges(MachineBasicBlock *DestA, MachineBasicBlock *DestB, - bool isCond); + bool IsCond); - /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping - /// any DBG_VALUE instructions. Return UnknownLoc if there is none. + /// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE + /// instructions. Return UnknownLoc if there is none. DebugLoc findDebugLoc(instr_iterator MBBI); DebugLoc findDebugLoc(iterator MBBI) { return findDebugLoc(MBBI.getInstrIterator()); @@ -636,12 +709,9 @@ public: /// Possible outcome of a register liveness query to computeRegisterLiveness() enum LivenessQueryResult { - LQR_Live, ///< Register is known to be live. - LQR_OverlappingLive, ///< Register itself is not live, but some overlapping - ///< register is. - LQR_Dead, ///< Register is known to be dead. - LQR_Unknown ///< Register liveness not decidable from local - ///< neighborhood. + LQR_Live, ///< Register is known to be (at least partially) live. + LQR_Dead, ///< Register is known to be fully dead. + LQR_Unknown ///< Register liveness not decidable from local neighborhood. }; /// Return whether (physical) register \p Reg has been ined and not @@ -666,49 +736,43 @@ public: // Printing method used by LoopInfo. void printAsOperand(raw_ostream &OS, bool PrintType = true) const; - /// getNumber - MachineBasicBlocks are uniquely numbered at the function - /// level, unless they're not in a MachineFunction yet, in which case this - /// will return -1. - /// + /// MachineBasicBlocks are uniquely numbered at the function level, unless + /// they're not in a MachineFunction yet, in which case this will return -1. int getNumber() const { return Number; } void setNumber(int N) { Number = N; } - /// getSymbol - Return the MCSymbol for this basic block. - /// + /// Return the MCSymbol for this basic block. MCSymbol *getSymbol() const; private: - /// getWeightIterator - Return weight iterator corresponding to the I - /// successor iterator. - weight_iterator getWeightIterator(succ_iterator I); - const_weight_iterator getWeightIterator(const_succ_iterator I) const; + /// Return probability iterator corresponding to the I successor iterator. + probability_iterator getProbabilityIterator(succ_iterator I); + const_probability_iterator + getProbabilityIterator(const_succ_iterator I) const; friend class MachineBranchProbabilityInfo; + friend class MIPrinter; - /// getSuccWeight - Return weight of the edge from this block to MBB. This - /// method should NOT be called directly, but by using getEdgeWeight method - /// from MachineBranchProbabilityInfo class. - uint32_t getSuccWeight(const_succ_iterator Succ) const; - + /// Return probability of the edge from this block to MBB. This method should + /// NOT be called directly, but by using getEdgeProbability method from + /// MachineBranchProbabilityInfo class. + BranchProbability getSuccProbability(const_succ_iterator Succ) const; // Methods used to maintain doubly linked list of blocks... friend struct ilist_traits; // Machine-CFG mutators - /// addPredecessor - Remove pred as a predecessor of this MachineBasicBlock. - /// Don't do this unless you know what you're doing, because it doesn't - /// update pred's successors list. Use pred->addSuccessor instead. - /// - void addPredecessor(MachineBasicBlock *pred); + /// Remove Pred as a predecessor of this MachineBasicBlock. Don't do this + /// unless you know what you're doing, because it doesn't update Pred's + /// successors list. Use Pred->addSuccessor instead. + void addPredecessor(MachineBasicBlock *Pred); - /// removePredecessor - Remove pred as a predecessor of this - /// MachineBasicBlock. Don't do this unless you know what you're - /// doing, because it doesn't update pred's successors list. Use - /// pred->removeSuccessor instead. - /// - void removePredecessor(MachineBasicBlock *pred); + /// Remove Pred as a predecessor of this MachineBasicBlock. Don't do this + /// unless you know what you're doing, because it doesn't update Pred's + /// successors list. Use Pred->removeSuccessor instead. + void removePredecessor(MachineBasicBlock *Pred); }; raw_ostream& operator<<(raw_ostream &OS, const MachineBasicBlock &MBB); @@ -726,7 +790,7 @@ struct MBB2NumberFunctor : //===--------------------------------------------------------------------===// // Provide specializations of GraphTraits to be able to treat a -// MachineFunction as a graph of MachineBasicBlocks... +// MachineFunction as a graph of MachineBasicBlocks. // template <> struct GraphTraits { @@ -756,7 +820,7 @@ template <> struct GraphTraits { }; // Provide specializations of GraphTraits to be able to treat a -// MachineFunction as a graph of MachineBasicBlocks... and to walk it +// MachineFunction as a graph of MachineBasicBlocks and to walk it // in inverse order. Inverse order for a function is considered // to be when traversing the predecessor edges of a MBB // instead of the successor edges. diff --git a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h index 7ba749559c0f..81b0524cf0a4 100644 --- a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h +++ b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h @@ -18,6 +18,7 @@ #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" #include +#include namespace llvm { @@ -44,20 +45,15 @@ public: AU.setPreservesAll(); } - // Return edge weight. If we don't have any informations about it - return - // DEFAULT_WEIGHT. - uint32_t getEdgeWeight(const MachineBasicBlock *Src, - const MachineBasicBlock *Dst) const; + // Return edge probability. + BranchProbability getEdgeProbability(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const; - // Same thing, but using a const_succ_iterator from Src. This is faster when - // the iterator is already available. - uint32_t getEdgeWeight(const MachineBasicBlock *Src, - MachineBasicBlock::const_succ_iterator Dst) const; - - // Get sum of the block successors' weights, potentially scaling them to fit - // within 32-bits. If scaling is required, sets Scale based on the necessary - // adjustment. Any edge weights used with the sum should be divided by Scale. - uint32_t getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const; + // Same as above, but using a const_succ_iterator from Src. This is faster + // when the iterator is already available. + BranchProbability + getEdgeProbability(const MachineBasicBlock *Src, + MachineBasicBlock::const_succ_iterator Dst) const; // A 'Hot' edge is an edge which probability is >= 80%. bool isEdgeHot(const MachineBasicBlock *Src, @@ -67,15 +63,6 @@ public: // NB: This routine's complexity is linear on the number of successors. MachineBasicBlock *getHotSucc(MachineBasicBlock *MBB) const; - // Return a probability as a fraction between 0 (0% probability) and - // 1 (100% probability), however the value is never equal to 0, and can be 1 - // only iff SRC block has only one successor. - // NB: This routine's complexity is linear on the number of successors of - // Src. Querying sequentially for each successor's probability is a quadratic - // query pattern. - BranchProbability getEdgeProbability(const MachineBasicBlock *Src, - const MachineBasicBlock *Dst) const; - // Print value between 0 (0% probability) and 1 (100% probability), // however the value is never equal to 0, and can be 1 only iff SRC block // has only one successor. diff --git a/include/llvm/CodeGen/MachineCombinerPattern.h b/include/llvm/CodeGen/MachineCombinerPattern.h index 176af14dc317..f3891227746f 100644 --- a/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/include/llvm/CodeGen/MachineCombinerPattern.h @@ -17,13 +17,30 @@ namespace llvm { -/// Enumeration of instruction pattern supported by machine combiner -/// -/// -namespace MachineCombinerPattern { -// Forward declaration -enum MC_PATTERN : int; -} // end namespace MachineCombinerPattern +/// These are instruction patterns matched by the machine combiner pass. +enum class MachineCombinerPattern { + // These are commutative variants for reassociating a computation chain. See + // the comments before getMachineCombinerPatterns() in TargetInstrInfo.cpp. + REASSOC_AX_BY, + REASSOC_AX_YB, + REASSOC_XA_BY, + REASSOC_XA_YB, + + // These are multiply-add patterns matched by the AArch64 machine combiner. + MULADDW_OP1, + MULADDW_OP2, + MULSUBW_OP1, + MULSUBW_OP2, + MULADDWI_OP1, + MULSUBWI_OP1, + MULADDX_OP1, + MULADDX_OP2, + MULSUBX_OP1, + MULSUBX_OP2, + MULADDXI_OP1, + MULSUBXI_OP1 +}; + } // end namespace llvm #endif diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h index 628400322f60..d2036c4a29a5 100644 --- a/include/llvm/CodeGen/MachineConstantPool.h +++ b/include/llvm/CodeGen/MachineConstantPool.h @@ -46,13 +46,6 @@ public: /// Type *getType() const { return Ty; } - - /// getRelocationInfo - This method classifies the entry according to - /// whether or not it may generate a relocation entry. This must be - /// conservative, so if it might codegen to a relocatable entry, it should say - /// so. The return values are the same as Constant::getRelocationInfo(). - virtual unsigned getRelocationInfo() const = 0; - virtual int getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) = 0; @@ -67,7 +60,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, V.print(OS); return OS; } - /// This class is a data container for one entry in a MachineConstantPool. /// It contains a pointer to the value and an offset from the start of @@ -90,9 +82,9 @@ public: Val.ConstVal = V; } MachineConstantPoolEntry(MachineConstantPoolValue *V, unsigned A) - : Alignment(A) { - Val.MachineCPVal = V; - Alignment |= 1U << (sizeof(unsigned)*CHAR_BIT-1); + : Alignment(A) { + Val.MachineCPVal = V; + Alignment |= 1U << (sizeof(unsigned) * CHAR_BIT - 1); } /// isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry @@ -102,28 +94,20 @@ public: return (int)Alignment < 0; } - int getAlignment() const { - return Alignment & ~(1 << (sizeof(unsigned)*CHAR_BIT-1)); + int getAlignment() const { + return Alignment & ~(1 << (sizeof(unsigned) * CHAR_BIT - 1)); } Type *getType() const; - - /// getRelocationInfo - This method classifies the entry according to - /// whether or not it may generate a relocation entry. This must be - /// conservative, so if it might codegen to a relocatable entry, it should say - /// so. The return values are: - /// - /// 0: This constant pool entry is guaranteed to never have a relocation - /// applied to it (because it holds a simple constant like '4'). - /// 1: This entry has relocations, but the entries are guaranteed to be - /// resolvable by the static linker, so the dynamic linker will never see - /// them. - /// 2: This entry may have arbitrary relocations. - unsigned getRelocationInfo() const; + + /// This method classifies the entry according to whether or not it may + /// generate a relocation entry. This must be conservative, so if it might + /// codegen to a relocatable entry, it should say so. + bool needsRelocation() const; SectionKind getSectionKind(const DataLayout *DL) const; }; - + /// The MachineConstantPool class keeps track of constants referenced by a /// function which must be spilled to memory. This is used for constants which /// are unable to be used directly as operands to instructions, which typically @@ -148,17 +132,18 @@ public: explicit MachineConstantPool(const DataLayout &DL) : PoolAlignment(1), DL(DL) {} ~MachineConstantPool(); - + /// getConstantPoolAlignment - Return the alignment required by /// the whole constant pool, of which the first element must be aligned. unsigned getConstantPoolAlignment() const { return PoolAlignment; } - + /// getConstantPoolIndex - Create a new entry in the constant pool or return /// an existing one. User must specify the minimum required alignment for /// the object. unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment); - unsigned getConstantPoolIndex(MachineConstantPoolValue *V,unsigned Alignment); - + unsigned getConstantPoolIndex(MachineConstantPoolValue *V, + unsigned Alignment); + /// isEmpty - Return true if this constant pool contains no constants. bool isEmpty() const { return Constants.empty(); } diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h index 735dd069cf7f..a69936f6e267 100644 --- a/include/llvm/CodeGen/MachineDominators.h +++ b/include/llvm/CodeGen/MachineDominators.h @@ -246,21 +246,29 @@ public: /// iterable by generic graph iterators. /// -template struct GraphTraits; +template +struct MachineDomTreeGraphTraitsBase { + typedef Node NodeType; + typedef ChildIterator ChildIteratorType; -template <> struct GraphTraits { - typedef MachineDomTreeNode NodeType; - typedef NodeType::iterator ChildIteratorType; - - static NodeType *getEntryNode(NodeType *N) { - return N; - } - static inline ChildIteratorType child_begin(NodeType* N) { + static NodeType *getEntryNode(NodeType *N) { return N; } + static inline ChildIteratorType child_begin(NodeType *N) { return N->begin(); } - static inline ChildIteratorType child_end(NodeType* N) { - return N->end(); - } + static inline ChildIteratorType child_end(NodeType *N) { return N->end(); } +}; + +template struct GraphTraits; + +template <> +struct GraphTraits + : public MachineDomTreeGraphTraitsBase {}; + +template <> +struct GraphTraits + : public MachineDomTreeGraphTraitsBase { }; template <> struct GraphTraits diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index cbc4e66ccc46..48e8ca75052e 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -101,6 +101,13 @@ class MachineFrameInfo { // cannot alias any other memory objects. bool isSpillSlot; + /// If true, this stack slot is used to spill a value (could be deopt + /// and/or GC related) over a statepoint. We know that the address of the + /// slot can't alias any LLVM IR value. This is very similiar to a Spill + /// Slot, but is created by statepoint lowering is SelectionDAG, not the + /// register allocator. + bool isStatepointSpillSlot; + /// If this stack object is originated from an Alloca instruction /// this value saves the original IR allocation. Can be NULL. const AllocaInst *Alloca; @@ -118,13 +125,24 @@ class MachineFrameInfo { StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM, bool isSS, const AllocaInst *Val, bool A) : SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM), - isSpillSlot(isSS), Alloca(Val), PreAllocated(false), isAliased(A) {} + isSpillSlot(isSS), isStatepointSpillSlot(false), Alloca(Val), + PreAllocated(false), isAliased(A) {} }; /// The alignment of the stack. unsigned StackAlignment; /// Can the stack be realigned. + /// Targets that set this to false don't have the ability to overalign + /// their stack frame, and thus, overaligned allocas are all treated + /// as dynamic allocations and the target must handle them as part + /// of DYNAMIC_STACKALLOC lowering. + /// FIXME: There is room for improvement in this case, in terms of + /// grouping overaligned allocas into a "secondary stack frame" and + /// then only use a single alloca to allocate this frame and only a + /// single virtual register to access it. Currently, without such an + /// optimization, each such alloca gets it's own dynamic + /// realignment. bool StackRealignable; /// The list of stack objects allocated. @@ -168,7 +186,7 @@ class MachineFrameInfo { /// SP then OffsetAdjustment is zero; if FP is used, OffsetAdjustment is set /// to the distance between the initial SP and the value in FP. For many /// targets, this value is only used when generating debug info (via - /// TargetRegisterInfo::getFrameIndexOffset); when generating code, the + /// TargetRegisterInfo::getFrameIndexReference); when generating code, the /// corresponding adjustments are performed directly. int OffsetAdjustment; @@ -198,7 +216,7 @@ class MachineFrameInfo { /// This contains the size of the largest call frame if the target uses frame /// setup/destroy pseudo instructions (as defined in the TargetFrameInfo /// class). This information is important for frame pointer elimination. - /// If is only valid during and after prolog/epilog code insertion. + /// It is only valid during and after prolog/epilog code insertion. unsigned MaxCallFrameSize; /// The prolog/epilog code inserter fills in this vector with each @@ -288,6 +306,7 @@ public: /// Return the index for the stack protector object. int getStackProtectorIndex() const { return StackProtectorIdx; } void setStackProtectorIndex(int I) { StackProtectorIdx = I; } + bool hasStackProtectorIndex() const { return StackProtectorIdx != -1; } /// Return the index for the function context object. /// This object is used for SjLj exceptions. @@ -337,14 +356,14 @@ public: } /// Get the local offset mapping for a for an object. - std::pair getLocalFrameObjectMap(int i) { + std::pair getLocalFrameObjectMap(int i) const { assert (i >= 0 && (unsigned)i < LocalFrameObjects.size() && "Invalid local object reference!"); return LocalFrameObjects[i]; } /// Return the number of objects allocated into the local object block. - int64_t getLocalFrameObjectCount() { return LocalFrameObjects.size(); } + int64_t getLocalFrameObjectCount() const { return LocalFrameObjects.size(); } /// Set the size of the local object blob. void setLocalFrameSize(int64_t sz) { LocalFrameSize = sz; } @@ -361,7 +380,9 @@ public: /// Get whether the local allocation blob should be allocated together or /// let PEI allocate the locals in it directly. - bool getUseLocalStackAllocationBlock() {return UseLocalStackAllocationBlock;} + bool getUseLocalStackAllocationBlock() const { + return UseLocalStackAllocationBlock; + } /// setUseLocalStackAllocationBlock - Set whether the local allocation blob /// should be allocated together or let PEI allocate the locals in it @@ -534,6 +555,12 @@ public: return Objects[ObjectIdx+NumFixedObjects].isSpillSlot; } + bool isStatepointSpillSlotObjectIndex(int ObjectIdx) const { + assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && + "Invalid Object Idx!"); + return Objects[ObjectIdx+NumFixedObjects].isStatepointSpillSlot; + } + /// Returns true if the specified index corresponds to a dead object. bool isDeadObjectIndex(int ObjectIdx) const { assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && @@ -549,6 +576,13 @@ public: return Objects[ObjectIdx + NumFixedObjects].Size == 0; } + void markAsStatepointSpillSlotObjectIndex(int ObjectIdx) { + assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && + "Invalid Object Idx!"); + Objects[ObjectIdx+NumFixedObjects].isStatepointSpillSlot = true; + assert(isStatepointSpillSlotObjectIndex(ObjectIdx) && "inconsistent"); + } + /// Create a new statically sized stack object, returning /// a nonnegative identifier to represent it. int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index c15ee1c006cd..82c30d39afd6 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -38,10 +38,12 @@ class MachineJumpTableInfo; class MachineModuleInfo; class MCContext; class Pass; +class PseudoSourceValueManager; class TargetMachine; class TargetSubtargetInfo; class TargetRegisterClass; struct MachinePointerInfo; +struct WinEHFuncInfo; template <> struct ilist_traits @@ -102,10 +104,14 @@ class MachineFunction { // Keep track of constants which are spilled to memory MachineConstantPool *ConstantPool; - + // Keep track of jump tables for switch instructions MachineJumpTableInfo *JumpTableInfo; + // Keeps track of Windows exception handling related data. This will be null + // for functions that aren't using a funclet-based EH personality. + WinEHFuncInfo *WinEHInfo = nullptr; + // Function-level unique numbering for MachineBasicBlocks. When a // MachineBasicBlock is inserted into a MachineFunction is it automatically // numbered and this vector keeps track of the mapping from ID's to MBB's. @@ -131,7 +137,7 @@ class MachineFunction { /// this translation unit. /// unsigned FunctionNumber; - + /// Alignment - The alignment of the function. unsigned Alignment; @@ -145,6 +151,9 @@ class MachineFunction { /// True if the function includes any inline assembly. bool HasInlineAsm; + // Allocation management for pseudo source values. + std::unique_ptr PSVManager; + MachineFunction(const MachineFunction &) = delete; void operator=(const MachineFunction&) = delete; public: @@ -155,6 +164,8 @@ public: MachineModuleInfo &getMMI() const { return MMI; } MCContext &getContext() const { return Ctx; } + PseudoSourceValueManager &getPSVManager() const { return *PSVManager; } + /// Return the DataLayout attached to the Module associated to this MF. const DataLayout &getDataLayout() const; @@ -198,7 +209,7 @@ public: MachineFrameInfo *getFrameInfo() { return FrameInfo; } const MachineFrameInfo *getFrameInfo() const { return FrameInfo; } - /// getJumpTableInfo - Return the jump table info object for the current + /// getJumpTableInfo - Return the jump table info object for the current /// function. This object contains information about jump tables in the /// current function. If the current function has no jump tables, this will /// return null. @@ -209,13 +220,18 @@ public: /// does already exist, allocate one. MachineJumpTableInfo *getOrCreateJumpTableInfo(unsigned JTEntryKind); - /// getConstantPool - Return the constant pool object for the current /// function. /// MachineConstantPool *getConstantPool() { return ConstantPool; } const MachineConstantPool *getConstantPool() const { return ConstantPool; } + /// getWinEHFuncInfo - Return information about how the current function uses + /// Windows exception handling. Returns null for functions that don't use + /// funclets for exception handling. + const WinEHFuncInfo *getWinEHFuncInfo() const { return WinEHInfo; } + WinEHFuncInfo *getWinEHFuncInfo() { return WinEHInfo; } + /// getAlignment - Return the alignment (log2, not bytes) of the function. /// unsigned getAlignment() const { return Alignment; } @@ -284,14 +300,14 @@ public: /// getNumBlockIDs - Return the number of MBB ID's allocated. /// unsigned getNumBlockIDs() const { return (unsigned)MBBNumbering.size(); } - + /// RenumberBlocks - This discards all of the MachineBasicBlock numbers and /// recomputes them. This guarantees that the MBB numbers are sequential, /// dense, and match the ordering of the blocks within the function. If a /// specific MachineBasicBlock is specified, only that block and those after /// it are renumbered. void RenumberBlocks(MachineBasicBlock *MBBFrom = nullptr); - + /// print - Print out the MachineFunction in a format suitable for debugging /// to the specified stream. /// @@ -326,6 +342,12 @@ public: typedef std::reverse_iterator const_reverse_iterator; typedef std::reverse_iterator reverse_iterator; + /// Support for MachineBasicBlock::getNextNode(). + static BasicBlockListType MachineFunction::* + getSublistAccess(MachineBasicBlock *) { + return &MachineFunction::BasicBlocks; + } + /// addLiveIn - Add the specified physical register as a live-in value and /// create a corresponding virtual register for it. unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC); @@ -358,15 +380,21 @@ public: void splice(iterator InsertPt, iterator MBBI) { BasicBlocks.splice(InsertPt, BasicBlocks, MBBI); } + void splice(iterator InsertPt, MachineBasicBlock *MBB) { + BasicBlocks.splice(InsertPt, BasicBlocks, MBB); + } void splice(iterator InsertPt, iterator MBBI, iterator MBBE) { BasicBlocks.splice(InsertPt, BasicBlocks, MBBI, MBBE); } - void remove(iterator MBBI) { - BasicBlocks.remove(MBBI); - } - void erase(iterator MBBI) { - BasicBlocks.erase(MBBI); + void remove(iterator MBBI) { BasicBlocks.remove(MBBI); } + void remove(MachineBasicBlock *MBBI) { BasicBlocks.remove(MBBI); } + void erase(iterator MBBI) { BasicBlocks.erase(MBBI); } + void erase(MachineBasicBlock *MBBI) { BasicBlocks.erase(MBBI); } + + template + void sort(Comp comp) { + BasicBlocks.sort(comp); } //===--------------------------------------------------------------------===// @@ -425,7 +453,7 @@ public: unsigned base_alignment, const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr); - + /// getMachineMemOperand - Allocate a new MachineMemOperand by copying /// an existing one, adjusting by an offset and using the given size. /// MachineMemOperands are owned by the MachineFunction and need not be @@ -475,16 +503,19 @@ public: extractStoreMemRefs(MachineInstr::mmo_iterator Begin, MachineInstr::mmo_iterator End); + /// Allocate a string and populate it with the given external symbol name. + const char *createExternalSymbolName(StringRef Name); + //===--------------------------------------------------------------------===// // Label Manipulation. // - + /// getJTISymbol - Return the MCSymbol for the specified non-empty jump table. /// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a /// normal 'L' label is returned. - MCSymbol *getJTISymbol(unsigned JTI, MCContext &Ctx, + MCSymbol *getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate = false) const; - + /// getPICBaseSymbol - Return a function-local symbol to represent the PIC /// base. MCSymbol *getPICBaseSymbol() const; diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index de7e0a29ea0d..978864e96ca5 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -23,6 +23,7 @@ #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" @@ -34,7 +35,6 @@ namespace llvm { template class SmallVectorImpl; -class AliasAnalysis; class TargetInstrInfo; class TargetRegisterClass; class TargetRegisterInfo; @@ -48,7 +48,8 @@ class MachineMemOperand; /// MachineFunction is deleted, all the contained MachineInstrs are deallocated /// without having their destructor called. /// -class MachineInstr : public ilist_node { +class MachineInstr + : public ilist_node_with_parent { public: typedef MachineMemOperand **mmo_iterator; @@ -64,8 +65,10 @@ public: NoFlags = 0, FrameSetup = 1 << 0, // Instruction is used as a part of // function frame setup code. - BundledPred = 1 << 1, // Instruction has bundled predecessors. - BundledSucc = 1 << 2 // Instruction has bundled successors. + FrameDestroy = 1 << 1, // Instruction is used as a part of + // function frame destruction code. + BundledPred = 1 << 2, // Instruction has bundled predecessors. + BundledSucc = 1 << 3 // Instruction has bundled successors. }; private: const MCInstrDesc *MCID; // Instruction descriptor. @@ -89,6 +92,12 @@ private: // information to AsmPrinter. uint8_t NumMemRefs; // Information on memory references. + // Note that MemRefs == nullptr, means 'don't know', not 'no memory access'. + // Calling code must treat missing information conservatively. If the number + // of memory operands required to be precise exceeds the maximum value of + // NumMemRefs - currently 256 - we remove the operands entirely. Note also + // that this is a non-owning reference to a shared copy on write buffer owned + // by the MachineFunction and created via MF.allocateMemRefsArray. mmo_iterator MemRefs; DebugLoc debugLoc; // Source line information. @@ -293,42 +302,46 @@ public: const_mop_iterator operands_end() const { return Operands + NumOperands; } iterator_range operands() { - return iterator_range(operands_begin(), operands_end()); + return make_range(operands_begin(), operands_end()); } iterator_range operands() const { - return iterator_range(operands_begin(), operands_end()); + return make_range(operands_begin(), operands_end()); } iterator_range explicit_operands() { - return iterator_range( - operands_begin(), operands_begin() + getNumExplicitOperands()); + return make_range(operands_begin(), + operands_begin() + getNumExplicitOperands()); } iterator_range explicit_operands() const { - return iterator_range( - operands_begin(), operands_begin() + getNumExplicitOperands()); + return make_range(operands_begin(), + operands_begin() + getNumExplicitOperands()); } iterator_range implicit_operands() { - return iterator_range(explicit_operands().end(), - operands_end()); + return make_range(explicit_operands().end(), operands_end()); } iterator_range implicit_operands() const { - return iterator_range(explicit_operands().end(), - operands_end()); + return make_range(explicit_operands().end(), operands_end()); } + /// Returns a range over all explicit operands that are register definitions. + /// Implicit definition are not included! iterator_range defs() { - return iterator_range( - operands_begin(), operands_begin() + getDesc().getNumDefs()); + return make_range(operands_begin(), + operands_begin() + getDesc().getNumDefs()); } + /// \copydoc defs() iterator_range defs() const { - return iterator_range( - operands_begin(), operands_begin() + getDesc().getNumDefs()); + return make_range(operands_begin(), + operands_begin() + getDesc().getNumDefs()); } + /// Returns a range that includes all operands that are register uses. + /// This may include unrelated operands which are not register uses. iterator_range uses() { - return iterator_range( - operands_begin() + getDesc().getNumDefs(), operands_end()); + return make_range(operands_begin() + getDesc().getNumDefs(), + operands_end()); } + /// \copydoc uses() iterator_range uses() const { - return iterator_range( - operands_begin() + getDesc().getNumDefs(), operands_end()); + return make_range(operands_begin() + getDesc().getNumDefs(), + operands_end()); } /// Returns the number of the operand iterator \p I points to. @@ -339,13 +352,16 @@ public: /// Access to memory operands of the instruction mmo_iterator memoperands_begin() const { return MemRefs; } mmo_iterator memoperands_end() const { return MemRefs + NumMemRefs; } + /// Return true if we don't have any memory operands which described the the + /// memory access done by this instruction. If this is true, calling code + /// must be conservative. bool memoperands_empty() const { return NumMemRefs == 0; } iterator_range memoperands() { - return iterator_range(memoperands_begin(), memoperands_end()); + return make_range(memoperands_begin(), memoperands_end()); } iterator_range memoperands() const { - return iterator_range(memoperands_begin(), memoperands_end()); + return make_range(memoperands_begin(), memoperands_end()); } /// Return true if this instruction has exactly one MachineMemOperand. @@ -489,8 +505,8 @@ public: } /// Return true if this instruction is convergent. - /// Convergent instructions can only be moved to locations that are - /// control-equivalent to their initial position. + /// Convergent instructions can not be made control-dependent on any + /// additional values. bool isConvergent(QueryType Type = AnyInBundle) const { return hasProperty(MCID::Convergent, Type); } @@ -897,6 +913,13 @@ public: return (Idx == -1) ? nullptr : &getOperand(Idx); } + const MachineOperand *findRegisterUseOperand( + unsigned Reg, bool isKill = false, + const TargetRegisterInfo *TRI = nullptr) const { + return const_cast(this)-> + findRegisterUseOperand(Reg, isKill, TRI); + } + /// Returns the operand index that is a def of the specified register or /// -1 if it is not found. If isDead is true, defs that are not dead are /// skipped. If Overlap is true, then it also looks for defs that merely @@ -1048,7 +1071,7 @@ public: /// Mark all subregister defs of register @p Reg with the undef flag. /// This function is used when we determined to have a subregister def in an /// otherwise undefined super register. - void addRegisterDefReadUndef(unsigned Reg); + void setRegisterDefReadUndef(unsigned Reg, bool IsUndef = true); /// We have determined MI defines a register. Make sure there is an operand /// defining Reg. @@ -1094,6 +1117,9 @@ public: /// bool hasUnmodeledSideEffects() const; + /// Returns true if it is illegal to fold a load across this instruction. + bool isLoadFoldBarrier() const; + /// Return true if all the defs of this instruction are dead. bool allDefsAreDead() const; @@ -1159,8 +1185,11 @@ public: assert(NumMemRefs == NewMemRefsEnd - NewMemRefs && "Too many memrefs"); } - /// Clear this MachineInstr's memory reference descriptor list. - void clearMemRefs() { + /// Clear this MachineInstr's memory reference descriptor list. This resets + /// the memrefs to their most conservative state. This should be used only + /// as a last resort since it greatly pessimizes our knowledge of the memory + /// access performed by the instruction. + void dropMemRefs() { MemRefs = nullptr; NumMemRefs = 0; } @@ -1174,6 +1203,8 @@ public: } } + /// Add all implicit def and use operands to this instruction. + void addImplicitDefUseOperands(MachineFunction &MF); private: /// If this instruction is embedded into a MachineFunction, return the @@ -1181,9 +1212,6 @@ private: /// return null. MachineRegisterInfo *getRegInfo(); - /// Add all implicit def and use operands to this instruction. - void addImplicitDefUseOperands(MachineFunction &MF); - /// Unlink all of the register operands in this instruction from their /// respective use lists. This requires that the operands already be on their /// use lists. diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h index 4f68f38b7bbf..aa5f4b24df61 100644 --- a/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/include/llvm/CodeGen/MachineInstrBuilder.h @@ -49,11 +49,10 @@ public: MachineInstrBuilder() : MF(nullptr), MI(nullptr) {} /// Create a MachineInstrBuilder for manipulating an existing instruction. - /// F must be the machine function that was used to allocate I. + /// F must be the machine function that was used to allocate I. MachineInstrBuilder(MachineFunction &F, MachineInstr *I) : MF(&F), MI(I) {} /// Allow automatic conversion to the machine instruction we are working on. - /// operator MachineInstr*() const { return MI; } MachineInstr *operator->() const { return MI; } operator MachineBasicBlock::iterator() const { return MI; } @@ -62,11 +61,9 @@ public: /// explicitly. MachineInstr *getInstr() const { return MI; } - /// addReg - Add a new virtual register operand... - /// - const - MachineInstrBuilder &addReg(unsigned RegNo, unsigned flags = 0, - unsigned SubReg = 0) const { + /// Add a new virtual register operand. + const MachineInstrBuilder &addReg(unsigned RegNo, unsigned flags = 0, + unsigned SubReg = 0) const { assert((flags & 0x1) == 0 && "Passing in 'true' to addReg is forbidden! Use enums instead."); MI->addOperand(*MF, MachineOperand::CreateReg(RegNo, @@ -82,8 +79,7 @@ public: return *this; } - /// addImm - Add a new immediate operand. - /// + /// Add a new immediate operand. const MachineInstrBuilder &addImm(int64_t Val) const { MI->addOperand(*MF, MachineOperand::CreateImm(Val)); return *this; @@ -204,44 +200,44 @@ public: // Add a displacement from an existing MachineOperand with an added offset. const MachineInstrBuilder &addDisp(const MachineOperand &Disp, int64_t off, unsigned char TargetFlags = 0) const { + // If caller specifies new TargetFlags then use it, otherwise the + // default behavior is to copy the target flags from the existing + // MachineOperand. This means if the caller wants to clear the + // target flags it needs to do so explicitly. + if (0 == TargetFlags) + TargetFlags = Disp.getTargetFlags(); + switch (Disp.getType()) { default: llvm_unreachable("Unhandled operand type in addDisp()"); case MachineOperand::MO_Immediate: return addImm(Disp.getImm() + off); - case MachineOperand::MO_GlobalAddress: { - // If caller specifies new TargetFlags then use it, otherwise the - // default behavior is to copy the target flags from the existing - // MachineOperand. This means if the caller wants to clear the - // target flags it needs to do so explicitly. - if (TargetFlags) - return addGlobalAddress(Disp.getGlobal(), Disp.getOffset() + off, - TargetFlags); + case MachineOperand::MO_ConstantPoolIndex: + return addConstantPoolIndex(Disp.getIndex(), Disp.getOffset() + off, + TargetFlags); + case MachineOperand::MO_GlobalAddress: return addGlobalAddress(Disp.getGlobal(), Disp.getOffset() + off, - Disp.getTargetFlags()); - } + TargetFlags); } } /// Copy all the implicit operands from OtherMI onto this one. - const MachineInstrBuilder ©ImplicitOps(const MachineInstr *OtherMI) { + const MachineInstrBuilder & + copyImplicitOps(const MachineInstr *OtherMI) const { MI->copyImplicitOps(*MF, OtherMI); return *this; } }; -/// BuildMI - Builder interface. Specify how to create the initial instruction -/// itself. -/// +/// Builder interface. Specify how to create the initial instruction itself. inline MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID) { return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, DL)); } -/// BuildMI - This version of the builder sets up the first operand as a +/// This version of the builder sets up the first operand as a /// destination virtual register. -/// inline MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID, @@ -250,10 +246,9 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, .addReg(DestReg, RegState::Define); } -/// BuildMI - This version of the builder inserts the newly-built -/// instruction before the given position in the given MachineBasicBlock, and -/// sets up the first operand as a destination virtual register. -/// +/// This version of the builder inserts the newly-built instruction before +/// the given position in the given MachineBasicBlock, and sets up the first +/// operand as a destination virtual register. inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, DebugLoc DL, @@ -282,7 +277,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, const MCInstrDesc &MCID, unsigned DestReg) { if (I->isInsideBundle()) { - MachineBasicBlock::instr_iterator MII = I; + MachineBasicBlock::instr_iterator MII(I); return BuildMI(BB, MII, DL, MCID, DestReg); } @@ -290,10 +285,9 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, return BuildMI(BB, MII, DL, MCID, DestReg); } -/// BuildMI - This version of the builder inserts the newly-built -/// instruction before the given position in the given MachineBasicBlock, and -/// does NOT take a destination register. -/// +/// This version of the builder inserts the newly-built instruction before the +/// given position in the given MachineBasicBlock, and does NOT take a +/// destination register. inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, DebugLoc DL, @@ -319,7 +313,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, DebugLoc DL, const MCInstrDesc &MCID) { if (I->isInsideBundle()) { - MachineBasicBlock::instr_iterator MII = I; + MachineBasicBlock::instr_iterator MII(I); return BuildMI(BB, MII, DL, MCID); } @@ -327,20 +321,17 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, return BuildMI(BB, MII, DL, MCID); } -/// BuildMI - This version of the builder inserts the newly-built -/// instruction at the end of the given MachineBasicBlock, and does NOT take a -/// destination register. -/// +/// This version of the builder inserts the newly-built instruction at the end +/// of the given MachineBasicBlock, and does NOT take a destination register. inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, DebugLoc DL, const MCInstrDesc &MCID) { return BuildMI(*BB, BB->end(), DL, MCID); } -/// BuildMI - This version of the builder inserts the newly-built -/// instruction at the end of the given MachineBasicBlock, and sets up the first -/// operand as a destination virtual register. -/// +/// This version of the builder inserts the newly-built instruction at the +/// end of the given MachineBasicBlock, and sets up the first operand as a +/// destination virtual register. inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, DebugLoc DL, const MCInstrDesc &MCID, @@ -348,11 +339,10 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, return BuildMI(*BB, BB->end(), DL, MCID, DestReg); } -/// BuildMI - This version of the builder builds a DBG_VALUE intrinsic +/// This version of the builder builds a DBG_VALUE intrinsic /// for either a value in a register or a register-indirect+offset /// address. The convention is that a DBG_VALUE is indirect iff the /// second operand is an immediate. -/// inline MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID, bool IsIndirect, unsigned Reg, unsigned Offset, @@ -377,10 +367,9 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, } } -/// BuildMI - This version of the builder builds a DBG_VALUE intrinsic +/// This version of the builder builds a DBG_VALUE intrinsic /// for either a value in a register or a register-indirect+offset /// address and inserts it at position I. -/// inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::iterator I, DebugLoc DL, const MCInstrDesc &MCID, bool IsIndirect, @@ -476,7 +465,7 @@ public: if (I == Begin) { if (!empty()) MI->bundleWithSucc(); - Begin = MI; + Begin = MI->getIterator(); return *this; } if (I == End) { diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h index 122022486345..4fbe206fceb9 100644 --- a/include/llvm/CodeGen/MachineInstrBundle.h +++ b/include/llvm/CodeGen/MachineInstrBundle.h @@ -28,7 +28,7 @@ namespace llvm { void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI); - + /// finalizeBundle - Same functionality as the previous finalizeBundle except /// the last instruction in the bundle is not provided as an input. This is /// used in cases where bundles are pre-determined by marking instructions @@ -44,23 +44,23 @@ bool finalizeBundles(MachineFunction &MF); /// getBundleStart - Returns the first instruction in the bundle containing MI. /// inline MachineInstr *getBundleStart(MachineInstr *MI) { - MachineBasicBlock::instr_iterator I = MI; + MachineBasicBlock::instr_iterator I(MI); while (I->isBundledWithPred()) --I; - return I; + return &*I; } inline const MachineInstr *getBundleStart(const MachineInstr *MI) { - MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator I(MI); while (I->isBundledWithPred()) --I; - return I; + return &*I; } /// Return an iterator pointing beyond the bundle containing MI. inline MachineBasicBlock::instr_iterator getBundleEnd(MachineInstr *MI) { - MachineBasicBlock::instr_iterator I = MI; + MachineBasicBlock::instr_iterator I(MI); while (I->isBundledWithSucc()) ++I; return ++I; @@ -69,7 +69,7 @@ getBundleEnd(MachineInstr *MI) { /// Return an iterator pointing beyond the bundle containing MI. inline MachineBasicBlock::const_instr_iterator getBundleEnd(const MachineInstr *MI) { - MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator I(MI); while (I->isBundledWithSucc()) ++I; return ++I; @@ -116,10 +116,10 @@ protected: /// explicit MachineOperandIteratorBase(MachineInstr *MI, bool WholeBundle) { if (WholeBundle) { - InstrI = getBundleStart(MI); + InstrI = getBundleStart(MI)->getIterator(); InstrE = MI->getParent()->instr_end(); } else { - InstrI = InstrE = MI; + InstrI = InstrE = MI->getIterator(); ++InstrE; } OpI = InstrI->operands_begin(); @@ -164,27 +164,32 @@ public: bool Tied; }; - /// PhysRegInfo - Information about a physical register used by a set of + /// Information about how a physical register Reg is used by a set of /// operands. struct PhysRegInfo { - /// Clobbers - Reg or an overlapping register is defined, or a regmask - /// clobbers Reg. - bool Clobbers; + /// There is a regmask operand indicating Reg is clobbered. + /// \see MachineOperand::CreateRegMask(). + bool Clobbered; - /// Defines - Reg or a super-register is defined. - bool Defines; + /// Reg or one of its aliases is defined. The definition may only cover + /// parts of the register. + bool Defined; + /// Reg or a super-register is defined. The definition covers the full + /// register. + bool FullyDefined; - /// Reads - Read or a super-register is read. - bool Reads; + /// Reg or ont of its aliases is read. The register may only be read + /// partially. + bool Read; + /// Reg or a super-register is read. The full register is read. + bool FullyRead; - /// ReadsOverlap - Reg or an overlapping register is read. - bool ReadsOverlap; + /// Reg is FullyDefined and all defs of reg or an overlapping register are + /// dead. + bool DeadDef; - /// DefinesDead - All defs of a Reg or a super-register are dead. - bool DefinesDead; - - /// There is a kill of Reg or a super-register. - bool Kills; + /// There is a use operand of reg or a super-register with kill flag set. + bool Killed; }; /// analyzeVirtReg - Analyze how the current instruction or bundle uses a diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h index a73b92f9a252..1ca0d90465a4 100644 --- a/include/llvm/CodeGen/MachineMemOperand.h +++ b/include/llvm/CodeGen/MachineMemOperand.h @@ -27,6 +27,7 @@ namespace llvm { class FoldingSetNodeID; class MDNode; class raw_ostream; +class MachineFunction; class ModuleSlotTracker; /// MachinePointerInfo - This class contains a discriminated union of @@ -62,22 +63,23 @@ struct MachinePointerInfo { /// getConstantPool - Return a MachinePointerInfo record that refers to the /// constant pool. - static MachinePointerInfo getConstantPool(); + static MachinePointerInfo getConstantPool(MachineFunction &MF); /// getFixedStack - Return a MachinePointerInfo record that refers to the /// the specified FrameIndex. - static MachinePointerInfo getFixedStack(int FI, int64_t offset = 0); + static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, + int64_t Offset = 0); /// getJumpTable - Return a MachinePointerInfo record that refers to a /// jump table entry. - static MachinePointerInfo getJumpTable(); + static MachinePointerInfo getJumpTable(MachineFunction &MF); /// getGOT - Return a MachinePointerInfo record that refers to a /// GOT entry. - static MachinePointerInfo getGOT(); + static MachinePointerInfo getGOT(MachineFunction &MF); /// getStack - stack pointer relative access. - static MachinePointerInfo getStack(int64_t Offset); + static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset); }; diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index 4cdfe2463c99..77571124a1b8 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -35,11 +35,12 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/ValueHandle.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Pass.h" #include "llvm/Support/DataTypes.h" @@ -59,7 +60,6 @@ class MachineFunction; class Module; class PointerType; class StructType; -struct WinEHFuncInfo; struct SEHHandler { // Filter or finally function. Null indicates a catch-all. @@ -79,13 +79,10 @@ struct LandingPadInfo { SmallVector EndLabels; // Labels after invoke. SmallVector SEHHandlers; // SEH handlers active at this lpad. MCSymbol *LandingPadLabel; // Label at beginning of landing pad. - const Function *Personality; // Personality function. std::vector TypeIds; // List of type ids (filters negative). - int WinEHState; // WinEH specific state number. explicit LandingPadInfo(MachineBasicBlock *MBB) - : LandingPadBlock(MBB), LandingPadLabel(nullptr), Personality(nullptr), - WinEHState(-1) {} + : LandingPadBlock(MBB), LandingPadLabel(nullptr) {} }; //===----------------------------------------------------------------------===// @@ -163,6 +160,13 @@ class MachineModuleInfo : public ImmutablePass { bool CallsEHReturn; bool CallsUnwindInit; + bool HasEHFunclets; + + // TODO: Ideally, what we'd like is to have a switch that allows emitting + // synchronous (precise at call-sites only) CFA into .eh_frame. However, + // even under this switch, we'd like .debug_frame to be precise when using. + // -g. At this moment, there's no way to specify that some CFI directives + // go into .eh_frame only, while others go into .debug_frame only. /// DbgInfoAvailable - True if debugging information is available /// in this module. @@ -182,8 +186,6 @@ class MachineModuleInfo : public ImmutablePass { EHPersonality PersonalityTypeCache; - DenseMap> FuncInfoMap; - public: static char ID; // Pass identification, replacement for typeid @@ -220,12 +222,6 @@ public: void setModule(const Module *M) { TheModule = M; } const Module *getModule() const { return TheModule; } - const Function *getWinEHParent(const Function *F) const; - WinEHFuncInfo &getWinEHFuncInfo(const Function *F); - bool hasWinEHFuncInfo(const Function *F) const { - return FuncInfoMap.count(getWinEHParent(F)) > 0; - } - /// getInfo - Keep track of various per-function pieces of information for /// backends that would like to do so. /// @@ -252,6 +248,9 @@ public: bool callsUnwindInit() const { return CallsUnwindInit; } void setCallsUnwindInit(bool b) { CallsUnwindInit = b; } + bool hasEHFunclets() const { return HasEHFunclets; } + void setHasEHFunclets(bool V) { HasEHFunclets = V; } + bool usesVAFloatArgument() const { return UsesVAFloatArgument; } @@ -318,16 +317,8 @@ public: /// addPersonality - Provide the personality function for the exception /// information. - void addPersonality(MachineBasicBlock *LandingPad, - const Function *Personality); void addPersonality(const Function *Personality); - void addWinEHState(MachineBasicBlock *LandingPad, int State); - - /// getPersonalityIndex - Get index of the current personality function inside - /// Personalitites array - unsigned getPersonalityIndex() const; - /// getPersonalities - Return array of personality functions ever seen. const std::vector& getPersonalities() const { return Personalities; @@ -426,13 +417,6 @@ public: return FilterIds; } - /// getPersonality - Return a personality function if available. The presence - /// of one is required to emit exception handling info. - const Function *getPersonality() const; - - /// Classify the personality function amongst known EH styles. - EHPersonality getPersonalityType(); - /// setVariableDbgInfo - Collect information used to emit debugging /// information of a variable. void setVariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr, diff --git a/include/llvm/CodeGen/MachineModuleInfoImpls.h b/include/llvm/CodeGen/MachineModuleInfoImpls.h index a67f9b5666b1..e7472145e71f 100644 --- a/include/llvm/CodeGen/MachineModuleInfoImpls.h +++ b/include/llvm/CodeGen/MachineModuleInfoImpls.h @@ -18,79 +18,71 @@ #include "llvm/CodeGen/MachineModuleInfo.h" namespace llvm { - class MCSymbol; +class MCSymbol; - /// MachineModuleInfoMachO - This is a MachineModuleInfoImpl implementation - /// for MachO targets. - class MachineModuleInfoMachO : public MachineModuleInfoImpl { - /// FnStubs - Darwin '$stub' stubs. The key is something like "Lfoo$stub", - /// the value is something like "_foo". - DenseMap FnStubs; - - /// GVStubs - Darwin '$non_lazy_ptr' stubs. The key is something like - /// "Lfoo$non_lazy_ptr", the value is something like "_foo". The extra bit - /// is true if this GV is external. - DenseMap GVStubs; - - /// HiddenGVStubs - Darwin '$non_lazy_ptr' stubs. The key is something like - /// "Lfoo$non_lazy_ptr", the value is something like "_foo". Unlike GVStubs - /// these are for things with hidden visibility. The extra bit is true if - /// this GV is external. - DenseMap HiddenGVStubs; - - virtual void anchor(); // Out of line virtual method. - public: - MachineModuleInfoMachO(const MachineModuleInfo &) {} - - StubValueTy &getFnStubEntry(MCSymbol *Sym) { - assert(Sym && "Key cannot be null"); - return FnStubs[Sym]; - } +/// MachineModuleInfoMachO - This is a MachineModuleInfoImpl implementation +/// for MachO targets. +class MachineModuleInfoMachO : public MachineModuleInfoImpl { + /// FnStubs - Darwin '$stub' stubs. The key is something like "Lfoo$stub", + /// the value is something like "_foo". + DenseMap FnStubs; - StubValueTy &getGVStubEntry(MCSymbol *Sym) { - assert(Sym && "Key cannot be null"); - return GVStubs[Sym]; - } + /// GVStubs - Darwin '$non_lazy_ptr' stubs. The key is something like + /// "Lfoo$non_lazy_ptr", the value is something like "_foo". The extra bit + /// is true if this GV is external. + DenseMap GVStubs; - StubValueTy &getHiddenGVStubEntry(MCSymbol *Sym) { - assert(Sym && "Key cannot be null"); - return HiddenGVStubs[Sym]; - } + /// HiddenGVStubs - Darwin '$non_lazy_ptr' stubs. The key is something like + /// "Lfoo$non_lazy_ptr", the value is something like "_foo". Unlike GVStubs + /// these are for things with hidden visibility. The extra bit is true if + /// this GV is external. + DenseMap HiddenGVStubs; - /// Accessor methods to return the set of stubs in sorted order. - SymbolListTy GetFnStubList() { - return getSortedStubs(FnStubs); - } - SymbolListTy GetGVStubList() { - return getSortedStubs(GVStubs); - } - SymbolListTy GetHiddenGVStubList() { - return getSortedStubs(HiddenGVStubs); - } - }; + virtual void anchor(); // Out of line virtual method. +public: + MachineModuleInfoMachO(const MachineModuleInfo &) {} - /// MachineModuleInfoELF - This is a MachineModuleInfoImpl implementation - /// for ELF targets. - class MachineModuleInfoELF : public MachineModuleInfoImpl { - /// GVStubs - These stubs are used to materialize global addresses in PIC - /// mode. - DenseMap GVStubs; + StubValueTy &getFnStubEntry(MCSymbol *Sym) { + assert(Sym && "Key cannot be null"); + return FnStubs[Sym]; + } - virtual void anchor(); // Out of line virtual method. - public: - MachineModuleInfoELF(const MachineModuleInfo &) {} + StubValueTy &getGVStubEntry(MCSymbol *Sym) { + assert(Sym && "Key cannot be null"); + return GVStubs[Sym]; + } - StubValueTy &getGVStubEntry(MCSymbol *Sym) { - assert(Sym && "Key cannot be null"); - return GVStubs[Sym]; - } + StubValueTy &getHiddenGVStubEntry(MCSymbol *Sym) { + assert(Sym && "Key cannot be null"); + return HiddenGVStubs[Sym]; + } - /// Accessor methods to return the set of stubs in sorted order. + /// Accessor methods to return the set of stubs in sorted order. + SymbolListTy GetFnStubList() { return getSortedStubs(FnStubs); } + SymbolListTy GetGVStubList() { return getSortedStubs(GVStubs); } + SymbolListTy GetHiddenGVStubList() { return getSortedStubs(HiddenGVStubs); } +}; - SymbolListTy GetGVStubList() { - return getSortedStubs(GVStubs); - } - }; +/// MachineModuleInfoELF - This is a MachineModuleInfoImpl implementation +/// for ELF targets. +class MachineModuleInfoELF : public MachineModuleInfoImpl { + /// GVStubs - These stubs are used to materialize global addresses in PIC + /// mode. + DenseMap GVStubs; + + virtual void anchor(); // Out of line virtual method. +public: + MachineModuleInfoELF(const MachineModuleInfo &) {} + + StubValueTy &getGVStubEntry(MCSymbol *Sym) { + assert(Sym && "Key cannot be null"); + return GVStubs[Sym]; + } + + /// Accessor methods to return the set of stubs in sorted order. + + SymbolListTy GetGVStubList() { return getSortedStubs(GVStubs); } +}; } // end namespace llvm diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index 5e607cdae48e..04191bc1b74f 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -73,7 +73,7 @@ private: /// PhysRegUseDefLists - This is an array of the head of the use/def list for /// physical registers. - std::vector PhysRegUseDefLists; + std::unique_ptr PhysRegUseDefLists; /// getRegUseDefListHead - Return the head pointer for the register use/def /// list for the specified virtual or physical register. @@ -95,20 +95,8 @@ private: return MO->Contents.Reg.Next; } - /// UsedRegUnits - This is a bit vector that is computed and set by the - /// register allocator, and must be kept up to date by passes that run after - /// register allocation (though most don't modify this). This is used - /// so that the code generator knows which callee save registers to save and - /// for other target specific uses. - /// This vector has bits set for register units that are modified in the - /// current function. It doesn't include registers clobbered by function - /// calls with register mask operands. - BitVector UsedRegUnits; - /// UsedPhysRegMask - Additional used physregs including aliases. /// This bit vector represents all the registers clobbered by function calls. - /// It can model things that UsedRegUnits can't, such as function calls that - /// clobber ymm7 but preserve the low half in xmm7. BitVector UsedPhysRegMask; /// ReservedRegs - This is a bit vector of reserved registers. The target @@ -246,7 +234,7 @@ public: static reg_iterator reg_end() { return reg_iterator(nullptr); } inline iterator_range reg_operands(unsigned Reg) const { - return iterator_range(reg_begin(Reg), reg_end()); + return make_range(reg_begin(Reg), reg_end()); } /// reg_instr_iterator/reg_instr_begin/reg_instr_end - Walk all defs and uses @@ -262,8 +250,7 @@ public: inline iterator_range reg_instructions(unsigned Reg) const { - return iterator_range(reg_instr_begin(Reg), - reg_instr_end()); + return make_range(reg_instr_begin(Reg), reg_instr_end()); } /// reg_bundle_iterator/reg_bundle_begin/reg_bundle_end - Walk all defs and uses @@ -278,8 +265,7 @@ public: } inline iterator_range reg_bundles(unsigned Reg) const { - return iterator_range(reg_bundle_begin(Reg), - reg_bundle_end()); + return make_range(reg_bundle_begin(Reg), reg_bundle_end()); } /// reg_empty - Return true if there are no instructions using or defining the @@ -299,8 +285,7 @@ public: inline iterator_range reg_nodbg_operands(unsigned Reg) const { - return iterator_range(reg_nodbg_begin(Reg), - reg_nodbg_end()); + return make_range(reg_nodbg_begin(Reg), reg_nodbg_end()); } /// reg_instr_nodbg_iterator/reg_instr_nodbg_begin/reg_instr_nodbg_end - Walk @@ -317,8 +302,7 @@ public: inline iterator_range reg_nodbg_instructions(unsigned Reg) const { - return iterator_range(reg_instr_nodbg_begin(Reg), - reg_instr_nodbg_end()); + return make_range(reg_instr_nodbg_begin(Reg), reg_instr_nodbg_end()); } /// reg_bundle_nodbg_iterator/reg_bundle_nodbg_begin/reg_bundle_nodbg_end - Walk @@ -333,10 +317,9 @@ public: return reg_bundle_nodbg_iterator(nullptr); } - inline iterator_range + inline iterator_range reg_nodbg_bundles(unsigned Reg) const { - return iterator_range(reg_bundle_nodbg_begin(Reg), - reg_bundle_nodbg_end()); + return make_range(reg_bundle_nodbg_begin(Reg), reg_bundle_nodbg_end()); } /// reg_nodbg_empty - Return true if the only instructions using or defining @@ -354,7 +337,7 @@ public: static def_iterator def_end() { return def_iterator(nullptr); } inline iterator_range def_operands(unsigned Reg) const { - return iterator_range(def_begin(Reg), def_end()); + return make_range(def_begin(Reg), def_end()); } /// def_instr_iterator/def_instr_begin/def_instr_end - Walk all defs of the @@ -370,8 +353,7 @@ public: inline iterator_range def_instructions(unsigned Reg) const { - return iterator_range(def_instr_begin(Reg), - def_instr_end()); + return make_range(def_instr_begin(Reg), def_instr_end()); } /// def_bundle_iterator/def_bundle_begin/def_bundle_end - Walk all defs of the @@ -386,8 +368,7 @@ public: } inline iterator_range def_bundles(unsigned Reg) const { - return iterator_range(def_bundle_begin(Reg), - def_bundle_end()); + return make_range(def_bundle_begin(Reg), def_bundle_end()); } /// def_empty - Return true if there are no instructions defining the @@ -412,7 +393,7 @@ public: static use_iterator use_end() { return use_iterator(nullptr); } inline iterator_range use_operands(unsigned Reg) const { - return iterator_range(use_begin(Reg), use_end()); + return make_range(use_begin(Reg), use_end()); } /// use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the @@ -428,8 +409,7 @@ public: inline iterator_range use_instructions(unsigned Reg) const { - return iterator_range(use_instr_begin(Reg), - use_instr_end()); + return make_range(use_instr_begin(Reg), use_instr_end()); } /// use_bundle_iterator/use_bundle_begin/use_bundle_end - Walk all uses of the @@ -444,8 +424,7 @@ public: } inline iterator_range use_bundles(unsigned Reg) const { - return iterator_range(use_bundle_begin(Reg), - use_bundle_end()); + return make_range(use_bundle_begin(Reg), use_bundle_end()); } /// use_empty - Return true if there are no instructions using the specified @@ -474,8 +453,7 @@ public: inline iterator_range use_nodbg_operands(unsigned Reg) const { - return iterator_range(use_nodbg_begin(Reg), - use_nodbg_end()); + return make_range(use_nodbg_begin(Reg), use_nodbg_end()); } /// use_instr_nodbg_iterator/use_instr_nodbg_begin/use_instr_nodbg_end - Walk @@ -492,8 +470,7 @@ public: inline iterator_range use_nodbg_instructions(unsigned Reg) const { - return iterator_range(use_instr_nodbg_begin(Reg), - use_instr_nodbg_end()); + return make_range(use_instr_nodbg_begin(Reg), use_instr_nodbg_end()); } /// use_bundle_nodbg_iterator/use_bundle_nodbg_begin/use_bundle_nodbg_end - Walk @@ -510,8 +487,7 @@ public: inline iterator_range use_nodbg_bundles(unsigned Reg) const { - return iterator_range(use_bundle_nodbg_begin(Reg), - use_bundle_nodbg_end()); + return make_range(use_bundle_nodbg_begin(Reg), use_bundle_nodbg_end()); } /// use_nodbg_empty - Return true if there are no non-Debug instructions @@ -540,7 +516,7 @@ public: /// apply sub registers to ToReg in order to obtain a final/proper physical /// register. void replaceRegWith(unsigned FromReg, unsigned ToReg); - + /// getVRegDef - Return the machine instr that defines the specified virtual /// register or null if none is found. This assumes that the code is in SSA /// form, so there should only be one definition. @@ -626,6 +602,12 @@ public: RegAllocHints[VReg].second = PrefReg; } + /// Specify the preferred register allocation hint for the specified virtual + /// register. + void setSimpleHint(unsigned VReg, unsigned PrefReg) { + setRegAllocationHint(VReg, /*Type=*/0, PrefReg); + } + /// getRegAllocationHint - Return the register allocation hint for the /// specified virtual register. std::pair @@ -650,41 +632,15 @@ public: /// Return true if the specified register is modified in this function. /// This checks that no defining machine operands exist for the register or /// any of its aliases. Definitions found on functions marked noreturn are - /// ignored. + /// ignored. The register is also considered modified when it is set in the + /// UsedPhysRegMask. bool isPhysRegModified(unsigned PhysReg) const; - //===--------------------------------------------------------------------===// - // Physical Register Use Info - //===--------------------------------------------------------------------===// - - /// isPhysRegUsed - Return true if the specified register is used in this - /// function. Also check for clobbered aliases and registers clobbered by - /// function calls with register mask operands. - /// - /// This only works after register allocation. - bool isPhysRegUsed(unsigned Reg) const { - if (UsedPhysRegMask.test(Reg)) - return true; - for (MCRegUnitIterator Units(Reg, getTargetRegisterInfo()); - Units.isValid(); ++Units) - if (UsedRegUnits.test(*Units)) - return true; - return false; - } - - /// Mark the specified register unit as used in this function. - /// This should only be called during and after register allocation. - void setRegUnitUsed(unsigned RegUnit) { - UsedRegUnits.set(RegUnit); - } - - /// setPhysRegUsed - Mark the specified register used in this function. - /// This should only be called during and after register allocation. - void setPhysRegUsed(unsigned Reg) { - for (MCRegUnitIterator Units(Reg, getTargetRegisterInfo()); - Units.isValid(); ++Units) - UsedRegUnits.set(*Units); - } + /// Return true if the specified register is modified or read in this + /// function. This checks that no machine operands exist for the register or + /// any of its aliases. The register is also considered used when it is set + /// in the UsedPhysRegMask. + bool isPhysRegUsed(unsigned PhysReg) const; /// addPhysRegsUsedFromRegMask - Mark any registers not in RegMask as used. /// This corresponds to the bit mask attached to register mask operands. @@ -692,15 +648,9 @@ public: UsedPhysRegMask.setBitsNotInMask(RegMask); } - /// setPhysRegUnused - Mark the specified register unused in this function. - /// This should only be called during and after register allocation. - void setPhysRegUnused(unsigned Reg) { - UsedPhysRegMask.reset(Reg); - for (MCRegUnitIterator Units(Reg, getTargetRegisterInfo()); - Units.isValid(); ++Units) - UsedRegUnits.reset(*Units); - } + const BitVector &getUsedPhysRegsMask() const { return UsedPhysRegMask; } + void setUsedPhysRegMask(BitVector &Mask) { UsedPhysRegMask = Mask; } //===--------------------------------------------------------------------===// // Reserved Register Info @@ -797,7 +747,7 @@ public: /// Returns a mask covering all bits that can appear in lane masks of /// subregisters of the virtual register @p Reg. - unsigned getMaxLaneMaskForVReg(unsigned Reg) const; + LaneBitmask getMaxLaneMaskForVReg(unsigned Reg) const; /// defusechain_iterator - This class provides iterator support for machine /// operands in the function that use or define a specific register. If diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h index e80e14e5ccf7..358fd5a3732a 100644 --- a/include/llvm/CodeGen/MachineScheduler.h +++ b/include/llvm/CodeGen/MachineScheduler.h @@ -77,6 +77,7 @@ #ifndef LLVM_CODEGEN_MACHINESCHEDULER_H #define LLVM_CODEGEN_MACHINESCHEDULER_H +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" @@ -87,7 +88,6 @@ namespace llvm { extern cl::opt ForceTopDown; extern cl::opt ForceBottomUp; -class AliasAnalysis; class LiveIntervals; class MachineDominatorTree; class MachineLoopInfo; @@ -156,8 +156,12 @@ struct MachineSchedPolicy { bool OnlyTopDown; bool OnlyBottomUp; + // Disable heuristic that tries to fetch nodes from long dependency chains + // first. + bool DisableLatencyHeuristic; + MachineSchedPolicy(): ShouldTrackPressure(false), OnlyTopDown(false), - OnlyBottomUp(false) {} + OnlyBottomUp(false), DisableLatencyHeuristic(false) {} }; /// MachineSchedStrategy - Interface to the scheduling algorithm used by @@ -175,6 +179,8 @@ public: MachineBasicBlock::iterator End, unsigned NumRegionInstrs) {} + virtual void dumpPolicy() {} + /// Check if pressure tracking is needed before building the DAG and /// initializing this strategy. Called after initPolicy. virtual bool shouldTrackPressure() const { return true; } @@ -222,6 +228,7 @@ public: class ScheduleDAGMI : public ScheduleDAGInstrs { protected: AliasAnalysis *AA; + LiveIntervals *LIS; std::unique_ptr SchedImpl; /// Topo - A topological ordering for SUnits which permits fast IsReachable @@ -248,11 +255,11 @@ protected: #endif public: ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr S, - bool IsPostRA) - : ScheduleDAGInstrs(*C->MF, C->MLI, IsPostRA, - /*RemoveKillFlags=*/IsPostRA, C->LIS), - AA(C->AA), SchedImpl(std::move(S)), Topo(SUnits, &ExitSU), CurrentTop(), - CurrentBottom(), NextClusterPred(nullptr), NextClusterSucc(nullptr) { + bool RemoveKillFlags) + : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA), + LIS(C->LIS), SchedImpl(std::move(S)), Topo(SUnits, &ExitSU), + CurrentTop(), CurrentBottom(), NextClusterPred(nullptr), + NextClusterSucc(nullptr) { #ifndef NDEBUG NumInstrsScheduled = 0; #endif @@ -261,6 +268,9 @@ public: // Provide a vtable anchor ~ScheduleDAGMI() override; + // Returns LiveIntervals instance for use in DAG mutators and such. + LiveIntervals *getLIS() const { return LIS; } + /// Return true if this DAG supports VReg liveness and RegPressure. virtual bool hasVRegLiveness() const { return false; } @@ -380,7 +390,7 @@ protected: public: ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr S) - : ScheduleDAGMI(C, std::move(S), /*IsPostRA=*/false), + : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false), RegClassInfo(C->RegClassInfo), DFSResult(nullptr), ShouldTrackPressure(false), RPTracker(RegPressure), TopRPTracker(TopPressure), BotRPTracker(BotPressure) {} @@ -858,6 +868,8 @@ public: MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override; + void dumpPolicy() override; + bool shouldTrackPressure() const override { return RegionPolicy.ShouldTrackPressure; } @@ -915,7 +927,7 @@ public: MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override { /* no configurable policy */ - }; + } /// PostRA scheduling does not track pressure. bool shouldTrackPressure() const override { return false; } diff --git a/include/llvm/CodeGen/MachineValueType.h b/include/llvm/CodeGen/MachineValueType.h index a728df354677..04d6ee3be531 100644 --- a/include/llvm/CodeGen/MachineValueType.h +++ b/include/llvm/CodeGen/MachineValueType.h @@ -56,53 +56,66 @@ class MVT { FIRST_FP_VALUETYPE = f16, LAST_FP_VALUETYPE = ppcf128, - v2i1 = 13, // 2 x i1 - v4i1 = 14, // 4 x i1 - v8i1 = 15, // 8 x i1 - v16i1 = 16, // 16 x i1 - v32i1 = 17, // 32 x i1 - v64i1 = 18, // 64 x i1 + v2i1 = 13, // 2 x i1 + v4i1 = 14, // 4 x i1 + v8i1 = 15, // 8 x i1 + v16i1 = 16, // 16 x i1 + v32i1 = 17, // 32 x i1 + v64i1 = 18, // 64 x i1 + v512i1 = 19, // 512 x i1 + v1024i1 = 20, // 1024 x i1 + + v1i8 = 21, // 1 x i8 + v2i8 = 22, // 2 x i8 + v4i8 = 23, // 4 x i8 + v8i8 = 24, // 8 x i8 + v16i8 = 25, // 16 x i8 + v32i8 = 26, // 32 x i8 + v64i8 = 27, // 64 x i8 + v128i8 = 28, //128 x i8 + v256i8 = 29, //256 x i8 + + v1i16 = 30, // 1 x i16 + v2i16 = 31, // 2 x i16 + v4i16 = 32, // 4 x i16 + v8i16 = 33, // 8 x i16 + v16i16 = 34, // 16 x i16 + v32i16 = 35, // 32 x i16 + v64i16 = 36, // 64 x i16 + v128i16 = 37, //128 x i16 + + v1i32 = 38, // 1 x i32 + v2i32 = 39, // 2 x i32 + v4i32 = 40, // 4 x i32 + v8i32 = 41, // 8 x i32 + v16i32 = 42, // 16 x i32 + v32i32 = 43, // 32 x i32 + v64i32 = 44, // 64 x i32 + + v1i64 = 45, // 1 x i64 + v2i64 = 46, // 2 x i64 + v4i64 = 47, // 4 x i64 + v8i64 = 48, // 8 x i64 + v16i64 = 49, // 16 x i64 + v32i64 = 50, // 32 x i64 + + v1i128 = 51, // 1 x i128 - v1i8 = 19, // 1 x i8 - v2i8 = 20, // 2 x i8 - v4i8 = 21, // 4 x i8 - v8i8 = 22, // 8 x i8 - v16i8 = 23, // 16 x i8 - v32i8 = 24, // 32 x i8 - v64i8 = 25, // 64 x i8 - v1i16 = 26, // 1 x i16 - v2i16 = 27, // 2 x i16 - v4i16 = 28, // 4 x i16 - v8i16 = 29, // 8 x i16 - v16i16 = 30, // 16 x i16 - v32i16 = 31, // 32 x i16 - v1i32 = 32, // 1 x i32 - v2i32 = 33, // 2 x i32 - v4i32 = 34, // 4 x i32 - v8i32 = 35, // 8 x i32 - v16i32 = 36, // 16 x i32 - v1i64 = 37, // 1 x i64 - v2i64 = 38, // 2 x i64 - v4i64 = 39, // 4 x i64 - v8i64 = 40, // 8 x i64 - v16i64 = 41, // 16 x i64 - v1i128 = 42, // 1 x i128 - FIRST_INTEGER_VECTOR_VALUETYPE = v2i1, LAST_INTEGER_VECTOR_VALUETYPE = v1i128, - v2f16 = 43, // 2 x f16 - v4f16 = 44, // 4 x f16 - v8f16 = 45, // 8 x f16 - v1f32 = 46, // 1 x f32 - v2f32 = 47, // 2 x f32 - v4f32 = 48, // 4 x f32 - v8f32 = 49, // 8 x f32 - v16f32 = 50, // 16 x f32 - v1f64 = 51, // 1 x f64 - v2f64 = 52, // 2 x f64 - v4f64 = 53, // 4 x f64 - v8f64 = 54, // 8 x f64 + v2f16 = 52, // 2 x f16 + v4f16 = 53, // 4 x f16 + v8f16 = 54, // 8 x f16 + v1f32 = 55, // 1 x f32 + v2f32 = 56, // 2 x f32 + v4f32 = 57, // 4 x f32 + v8f32 = 58, // 8 x f32 + v16f32 = 59, // 16 x f32 + v1f64 = 60, // 1 x f64 + v2f64 = 61, // 2 x f64 + v4f64 = 62, // 4 x f64 + v8f64 = 63, // 8 x f64 FIRST_FP_VECTOR_VALUETYPE = v2f16, LAST_FP_VECTOR_VALUETYPE = v8f64, @@ -110,23 +123,26 @@ class MVT { FIRST_VECTOR_VALUETYPE = v2i1, LAST_VECTOR_VALUETYPE = v8f64, - x86mmx = 55, // This is an X86 MMX value + x86mmx = 64, // This is an X86 MMX value - Glue = 56, // This glues nodes together during pre-RA sched + Glue = 65, // This glues nodes together during pre-RA sched - isVoid = 57, // This has no value + isVoid = 66, // This has no value - Untyped = 58, // This value takes a register, but has + Untyped = 67, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. FIRST_VALUETYPE = 0, // This is always the beginning of the list. - LAST_VALUETYPE = 59, // This always remains at the end of the list. + LAST_VALUETYPE = 68, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors // This value must be a multiple of 32. - MAX_ALLOWED_VALUETYPE = 64, + MAX_ALLOWED_VALUETYPE = 96, + + // Token - A value of type llvm::TokenTy + token = 249, // Metadata - This is MDNode or MDString. Metadata = 250, @@ -238,14 +254,23 @@ class MVT { /// is512BitVector - Return true if this is a 512-bit vector type. bool is512BitVector() const { - return (SimpleTy == MVT::v8f64 || SimpleTy == MVT::v16f32 || - SimpleTy == MVT::v64i8 || SimpleTy == MVT::v32i16 || - SimpleTy == MVT::v8i64 || SimpleTy == MVT::v16i32); + return (SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64 || + SimpleTy == MVT::v512i1 || SimpleTy == MVT::v64i8 || + SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 || + SimpleTy == MVT::v8i64); } /// is1024BitVector - Return true if this is a 1024-bit vector type. bool is1024BitVector() const { - return (SimpleTy == MVT::v16i64); + return (SimpleTy == MVT::v1024i1 || SimpleTy == MVT::v128i8 || + SimpleTy == MVT::v64i16 || SimpleTy == MVT::v32i32 || + SimpleTy == MVT::v16i64); + } + + /// is2048BitVector - Return true if this is a 1024-bit vector type. + bool is2048BitVector() const { + return (SimpleTy == MVT::v256i8 || SimpleTy == MVT::v128i16 || + SimpleTy == MVT::v64i32 || SimpleTy == MVT::v32i64); } /// isOverloaded - Return true if this is an overloaded type for TableGen. @@ -282,35 +307,44 @@ class MVT { switch (SimpleTy) { default: llvm_unreachable("Not a vector MVT!"); - case v2i1 : - case v4i1 : - case v8i1 : - case v16i1 : - case v32i1 : - case v64i1: return i1; - case v1i8 : - case v2i8 : - case v4i8 : - case v8i8 : + case v2i1: + case v4i1: + case v8i1: + case v16i1: + case v32i1: + case v64i1: + case v512i1: + case v1024i1: return i1; + case v1i8: + case v2i8: + case v4i8: + case v8i8: case v16i8: case v32i8: - case v64i8: return i8; + case v64i8: + case v128i8: + case v256i8: return i8; case v1i16: case v2i16: case v4i16: case v8i16: case v16i16: - case v32i16: return i16; + case v32i16: + case v64i16: + case v128i16: return i16; case v1i32: case v2i32: case v4i32: case v8i32: - case v16i32: return i32; + case v16i32: + case v32i32: + case v64i32: return i32; case v1i64: case v2i64: case v4i64: case v8i64: - case v16i64: return i64; + case v16i64: + case v32i64: return i64; case v1i128: return i128; case v2f16: case v4f16: @@ -331,19 +365,28 @@ class MVT { switch (SimpleTy) { default: llvm_unreachable("Not a vector MVT!"); + case v1024i1: return 1024; + case v512i1: return 512; + case v256i8: return 256; + case v128i8: + case v128i16: return 128; + case v64i1: + case v64i8: + case v64i16: + case v64i32: return 64; case v32i1: case v32i8: - case v32i16: return 32; - case v64i1: - case v64i8: return 64; + case v32i16: + case v32i32: + case v32i64: return 32; case v16i1: case v16i8: case v16i16: case v16i32: case v16i64: case v16f32: return 16; - case v8i1 : - case v8i8 : + case v8i1: + case v8i8: case v8i16: case v8i32: case v8i64: @@ -390,6 +433,9 @@ class MVT { case vAny: case Any: llvm_unreachable("Value type is overloaded."); + case token: + llvm_unreachable("Token type is a sentinel that cannot be used " + "in codegen and has no size"); case Metadata: llvm_unreachable("Value type is metadata."); case i1 : return 1; @@ -440,13 +486,22 @@ class MVT { case v4i64: case v8f32: case v4f64: return 256; + case v512i1: case v64i8: case v32i16: case v16i32: case v8i64: case v16f32: case v8f64: return 512; - case v16i64:return 1024; + case v1024i1: + case v128i8: + case v64i16: + case v32i32: + case v16i64: return 1024; + case v256i8: + case v128i16: + case v64i32: + case v32i64: return 2048; } } @@ -528,29 +583,35 @@ class MVT { default: break; case MVT::i1: - if (NumElements == 2) return MVT::v2i1; - if (NumElements == 4) return MVT::v4i1; - if (NumElements == 8) return MVT::v8i1; - if (NumElements == 16) return MVT::v16i1; - if (NumElements == 32) return MVT::v32i1; - if (NumElements == 64) return MVT::v64i1; + if (NumElements == 2) return MVT::v2i1; + if (NumElements == 4) return MVT::v4i1; + if (NumElements == 8) return MVT::v8i1; + if (NumElements == 16) return MVT::v16i1; + if (NumElements == 32) return MVT::v32i1; + if (NumElements == 64) return MVT::v64i1; + if (NumElements == 512) return MVT::v512i1; + if (NumElements == 1024) return MVT::v1024i1; break; case MVT::i8: - if (NumElements == 1) return MVT::v1i8; - if (NumElements == 2) return MVT::v2i8; - if (NumElements == 4) return MVT::v4i8; - if (NumElements == 8) return MVT::v8i8; - if (NumElements == 16) return MVT::v16i8; - if (NumElements == 32) return MVT::v32i8; - if (NumElements == 64) return MVT::v64i8; + if (NumElements == 1) return MVT::v1i8; + if (NumElements == 2) return MVT::v2i8; + if (NumElements == 4) return MVT::v4i8; + if (NumElements == 8) return MVT::v8i8; + if (NumElements == 16) return MVT::v16i8; + if (NumElements == 32) return MVT::v32i8; + if (NumElements == 64) return MVT::v64i8; + if (NumElements == 128) return MVT::v128i8; + if (NumElements == 256) return MVT::v256i8; break; case MVT::i16: - if (NumElements == 1) return MVT::v1i16; - if (NumElements == 2) return MVT::v2i16; - if (NumElements == 4) return MVT::v4i16; - if (NumElements == 8) return MVT::v8i16; - if (NumElements == 16) return MVT::v16i16; - if (NumElements == 32) return MVT::v32i16; + if (NumElements == 1) return MVT::v1i16; + if (NumElements == 2) return MVT::v2i16; + if (NumElements == 4) return MVT::v4i16; + if (NumElements == 8) return MVT::v8i16; + if (NumElements == 16) return MVT::v16i16; + if (NumElements == 32) return MVT::v32i16; + if (NumElements == 64) return MVT::v64i16; + if (NumElements == 128) return MVT::v128i16; break; case MVT::i32: if (NumElements == 1) return MVT::v1i32; @@ -558,6 +619,8 @@ class MVT { if (NumElements == 4) return MVT::v4i32; if (NumElements == 8) return MVT::v8i32; if (NumElements == 16) return MVT::v16i32; + if (NumElements == 32) return MVT::v32i32; + if (NumElements == 64) return MVT::v64i32; break; case MVT::i64: if (NumElements == 1) return MVT::v1i64; @@ -565,6 +628,7 @@ class MVT { if (NumElements == 4) return MVT::v4i64; if (NumElements == 8) return MVT::v8i64; if (NumElements == 16) return MVT::v16i64; + if (NumElements == 32) return MVT::v32i64; break; case MVT::i128: if (NumElements == 1) return MVT::v1i128; diff --git a/include/llvm/CodeGen/ParallelCG.h b/include/llvm/CodeGen/ParallelCG.h new file mode 100644 index 000000000000..fa7002fa21fb --- /dev/null +++ b/include/llvm/CodeGen/ParallelCG.h @@ -0,0 +1,43 @@ +//===-- llvm/CodeGen/ParallelCG.h - Parallel code generation ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header declares functions that can be used for parallel code generation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PARALLELCG_H +#define LLVM_CODEGEN_PARALLELCG_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class Module; +class TargetOptions; +class raw_pwrite_stream; + +/// Split M into OSs.size() partitions, and generate code for each. Writes +/// OSs.size() output files to the output streams in OSs. The resulting output +/// files if linked together are intended to be equivalent to the single output +/// file that would have been code generated from M. +/// +/// \returns M if OSs.size() == 1, otherwise returns std::unique_ptr(). +std::unique_ptr +splitCodeGen(std::unique_ptr M, ArrayRef OSs, + StringRef CPU, StringRef Features, const TargetOptions &Options, + Reloc::Model RM = Reloc::Default, + CodeModel::Model CM = CodeModel::Default, + CodeGenOpt::Level OL = CodeGenOpt::Default, + TargetMachine::CodeGenFileType FT = TargetMachine::CGFT_ObjectFile); + +} // namespace llvm + +#endif diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 5d8292174476..f45f0ed57d6b 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -120,9 +120,6 @@ protected: /// Default setting for -enable-tail-merge on this target. bool EnableTailMerge; - /// Default setting for -enable-shrink-wrap on this target. - bool EnableShrinkWrap; - public: TargetPassConfig(TargetMachine *tm, PassManagerBase &pm); // Dummy constructor. @@ -173,7 +170,8 @@ public: void substitutePass(AnalysisID StandardID, IdentifyingPassPtr TargetID); /// Insert InsertedPassID pass after TargetPassID pass. - void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID); + void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID, + bool VerifyAfter = true, bool PrintAfter = true); /// Allow the target to enable a specific standard pass by default. void enablePass(AnalysisID PassID) { substitutePass(PassID, PassID); } @@ -228,7 +226,7 @@ public: /// /// This can also be used to plug a new MachineSchedStrategy into an instance /// of the standard ScheduleDAGMI: - /// return new ScheduleDAGMI(C, make_unique(C), /* IsPostRA= */false) + /// return new ScheduleDAGMI(C, make_unique(C), /*RemoveKillFlags=*/false) /// /// Return NULL to select the default (generic) machine scheduler. virtual ScheduleDAGInstrs * @@ -585,6 +583,9 @@ namespace llvm { /// StackSlotColoring - This pass performs stack slot coloring. extern char &StackSlotColoringID; + /// \brief This pass lays out funclets contiguously. + extern char &FuncletLayoutID; + /// createStackProtectorPass - This pass adds stack protectors to functions. /// FunctionPass *createStackProtectorPass(const TargetMachine *TM); @@ -639,6 +640,9 @@ namespace llvm { /// the intrinsic for later emission to the StackMap. extern char &StackMapLivenessID; + /// LiveDebugValues pass + extern char &LiveDebugValuesID; + /// createJumpInstrTables - This pass creates jump-instruction tables. ModulePass *createJumpInstrTablesPass(); diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h index a518b6233250..f67552030db4 100644 --- a/include/llvm/CodeGen/PseudoSourceValue.h +++ b/include/llvm/CodeGen/PseudoSourceValue.h @@ -14,97 +14,170 @@ #ifndef LLVM_CODEGEN_PSEUDOSOURCEVALUE_H #define LLVM_CODEGEN_PSEUDOSOURCEVALUE_H +#include "llvm/ADT/StringMap.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/Value.h" +#include "llvm/IR/ValueMap.h" +#include namespace llvm { - class MachineFrameInfo; - class MachineMemOperand; - class raw_ostream; - raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MMO); +class MachineFrameInfo; +class MachineMemOperand; +class raw_ostream; - /// PseudoSourceValue - Special value supplied for machine level alias - /// analysis. It indicates that a memory access references the functions - /// stack frame (e.g., a spill slot), below the stack frame (e.g., argument - /// space), or constant pool. - class PseudoSourceValue { - private: - friend class MachineMemOperand; // For printCustom(). +raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MMO); - /// printCustom - Implement printing for PseudoSourceValue. This is called - /// from Value::print or Value's operator<<. - /// - virtual void printCustom(raw_ostream &O) const; - - public: - /// isFixed - Whether this is a FixedStackPseudoSourceValue. - bool isFixed; - - explicit PseudoSourceValue(bool isFixed = false); - - virtual ~PseudoSourceValue(); - - /// isConstant - Test whether the memory pointed to by this - /// PseudoSourceValue has a constant value. - /// - virtual bool isConstant(const MachineFrameInfo *) const; - - /// isAliased - Test whether the memory pointed to by this - /// PseudoSourceValue may also be pointed to by an LLVM IR Value. - virtual bool isAliased(const MachineFrameInfo *) const; - - /// mayAlias - Return true if the memory pointed to by this - /// PseudoSourceValue can ever alias an LLVM IR Value. - virtual bool mayAlias(const MachineFrameInfo *) const; - - /// A pseudo source value referencing a fixed stack frame entry, - /// e.g., a spill slot. - static const PseudoSourceValue *getFixedStack(int FI); - - /// A pseudo source value referencing the area below the stack frame of - /// a function, e.g., the argument space. - static const PseudoSourceValue *getStack(); - - /// A pseudo source value referencing the global offset table - /// (or something the like). - static const PseudoSourceValue *getGOT(); - - /// A pseudo source value referencing the constant pool. Since constant - /// pools are constant, this doesn't need to identify a specific constant - /// pool entry. - static const PseudoSourceValue *getConstantPool(); - - /// A pseudo source value referencing a jump table. Since jump tables are - /// constant, this doesn't need to identify a specific jump table. - static const PseudoSourceValue *getJumpTable(); +/// Special value supplied for machine level alias analysis. It indicates that +/// a memory access references the functions stack frame (e.g., a spill slot), +/// below the stack frame (e.g., argument space), or constant pool. +class PseudoSourceValue { +public: + enum PSVKind { + Stack, + GOT, + JumpTable, + ConstantPool, + FixedStack, + GlobalValueCallEntry, + ExternalSymbolCallEntry }; - /// FixedStackPseudoSourceValue - A specialized PseudoSourceValue - /// for holding FixedStack values, which must include a frame - /// index. - class FixedStackPseudoSourceValue : public PseudoSourceValue { - const int FI; - public: - explicit FixedStackPseudoSourceValue(int fi) : - PseudoSourceValue(true), FI(fi) {} +private: + PSVKind Kind; - /// classof - Methods for support type inquiry through isa, cast, and - /// dyn_cast: - /// - static inline bool classof(const PseudoSourceValue *V) { - return V->isFixed == true; - } + friend class MachineMemOperand; // For printCustom(). - bool isConstant(const MachineFrameInfo *MFI) const override; + /// Implement printing for PseudoSourceValue. This is called from + /// Value::print or Value's operator<<. + virtual void printCustom(raw_ostream &O) const; - bool isAliased(const MachineFrameInfo *MFI) const override; +public: + explicit PseudoSourceValue(PSVKind Kind); - bool mayAlias(const MachineFrameInfo *) const override; + virtual ~PseudoSourceValue(); - void printCustom(raw_ostream &OS) const override; + PSVKind kind() const { return Kind; } - int getFrameIndex() const { return FI; } - }; -} // End llvm namespace + bool isStack() const { return Kind == Stack; } + bool isGOT() const { return Kind == GOT; } + bool isConstantPool() const { return Kind == ConstantPool; } + bool isJumpTable() const { return Kind == JumpTable; } + + /// Test whether the memory pointed to by this PseudoSourceValue has a + /// constant value. + virtual bool isConstant(const MachineFrameInfo *) const; + + /// Test whether the memory pointed to by this PseudoSourceValue may also be + /// pointed to by an LLVM IR Value. + virtual bool isAliased(const MachineFrameInfo *) const; + + /// Return true if the memory pointed to by this PseudoSourceValue can ever + /// alias an LLVM IR Value. + virtual bool mayAlias(const MachineFrameInfo *) const; +}; + +/// A specialized PseudoSourceValue for holding FixedStack values, which must +/// include a frame index. +class FixedStackPseudoSourceValue : public PseudoSourceValue { + const int FI; + +public: + explicit FixedStackPseudoSourceValue(int FI) + : PseudoSourceValue(FixedStack), FI(FI) {} + + static inline bool classof(const PseudoSourceValue *V) { + return V->kind() == FixedStack; + } + + bool isConstant(const MachineFrameInfo *MFI) const override; + + bool isAliased(const MachineFrameInfo *MFI) const override; + + bool mayAlias(const MachineFrameInfo *) const override; + + void printCustom(raw_ostream &OS) const override; + + int getFrameIndex() const { return FI; } +}; + +class CallEntryPseudoSourceValue : public PseudoSourceValue { +protected: + CallEntryPseudoSourceValue(PSVKind Kind); + +public: + bool isConstant(const MachineFrameInfo *) const override; + bool isAliased(const MachineFrameInfo *) const override; + bool mayAlias(const MachineFrameInfo *) const override; +}; + +/// A specialized pseudo soruce value for holding GlobalValue values. +class GlobalValuePseudoSourceValue : public CallEntryPseudoSourceValue { + const GlobalValue *GV; + +public: + GlobalValuePseudoSourceValue(const GlobalValue *GV); + + static inline bool classof(const PseudoSourceValue *V) { + return V->kind() == GlobalValueCallEntry; + } + + const GlobalValue *getValue() const { return GV; } +}; + +/// A specialized pseudo source value for holding external symbol values. +class ExternalSymbolPseudoSourceValue : public CallEntryPseudoSourceValue { + const char *ES; + +public: + ExternalSymbolPseudoSourceValue(const char *ES); + + static inline bool classof(const PseudoSourceValue *V) { + return V->kind() == ExternalSymbolCallEntry; + } + + const char *getSymbol() const { return ES; } +}; + +/// Manages creation of pseudo source values. +class PseudoSourceValueManager { + const PseudoSourceValue StackPSV, GOTPSV, JumpTablePSV, ConstantPoolPSV; + std::map> FSValues; + StringMap> + ExternalCallEntries; + ValueMap> + GlobalCallEntries; + +public: + PseudoSourceValueManager(); + + /// Return a pseudo source value referencing the area below the stack frame of + /// a function, e.g., the argument space. + const PseudoSourceValue *getStack(); + + /// Return a pseudo source value referencing the global offset table + /// (or something the like). + const PseudoSourceValue *getGOT(); + + /// Return a pseudo source value referencing the constant pool. Since constant + /// pools are constant, this doesn't need to identify a specific constant + /// pool entry. + const PseudoSourceValue *getConstantPool(); + + /// Return a pseudo source value referencing a jump table. Since jump tables + /// are constant, this doesn't need to identify a specific jump table. + const PseudoSourceValue *getJumpTable(); + + /// Return a pseudo source value referencing a fixed stack frame entry, + /// e.g., a spill slot. + const PseudoSourceValue *getFixedStack(int FI); + + const PseudoSourceValue *getGlobalValueCallEntry(const GlobalValue *GV); + + const PseudoSourceValue *getExternalSymbolCallEntry(const char *ES); +}; + +} // end namespace llvm #endif diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h index 6046e46547b2..4122811a9e5c 100644 --- a/include/llvm/CodeGen/RegAllocPBQP.h +++ b/include/llvm/CodeGen/RegAllocPBQP.h @@ -134,7 +134,7 @@ inline hash_code hash_value(const AllowedRegVector &OptRegs) { hash_combine_range(OStart, OEnd)); } -/// \brief Holds graph-level metadata relevent to PBQP RA problems. +/// \brief Holds graph-level metadata relevant to PBQP RA problems. class GraphMetadata { private: typedef ValuePool AllowedRegVecPool; diff --git a/include/llvm/CodeGen/RegAllocRegistry.h b/include/llvm/CodeGen/RegAllocRegistry.h index ca495778446f..5c7e9999cc9a 100644 --- a/include/llvm/CodeGen/RegAllocRegistry.h +++ b/include/llvm/CodeGen/RegAllocRegistry.h @@ -33,12 +33,10 @@ public: static MachinePassRegistry Registry; RegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C) - : MachinePassRegistryNode(N, D, (MachinePassCtor)C) - { - Registry.Add(this); + : MachinePassRegistryNode(N, D, (MachinePassCtor)C) { + Registry.Add(this); } ~RegisterRegAlloc() { Registry.Remove(this); } - // Accessors. // @@ -57,7 +55,6 @@ public: static void setListener(MachinePassRegistryListener *L) { Registry.setListener(L); } - }; } // end namespace llvm diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h index 9d8843d1d74a..987634fb36c3 100644 --- a/include/llvm/CodeGen/RegisterPressure.h +++ b/include/llvm/CodeGen/RegisterPressure.h @@ -125,11 +125,13 @@ class PressureDiff { enum { MaxPSets = 16 }; PressureChange PressureChanges[MaxPSets]; -public: + typedef PressureChange* iterator; + iterator nonconst_begin() { return &PressureChanges[0]; } + iterator nonconst_end() { return &PressureChanges[MaxPSets]; } + +public: typedef const PressureChange* const_iterator; - iterator begin() { return &PressureChanges[0]; } - iterator end() { return &PressureChanges[MaxPSets]; } const_iterator begin() const { return &PressureChanges[0]; } const_iterator end() const { return &PressureChanges[MaxPSets]; } @@ -191,30 +193,56 @@ struct RegPressureDelta { } }; -/// \brief A set of live virtual registers and physical register units. +/// A set of live virtual registers and physical register units. /// -/// Virtual and physical register numbers require separate sparse sets, but most -/// of the RegisterPressureTracker handles them uniformly. -struct LiveRegSet { - SparseSet PhysRegs; - SparseSet VirtRegs; +/// This is a wrapper around a SparseSet which deals with mapping register unit +/// and virtual register indexes to an index usable by the sparse set. +class LiveRegSet { +private: + SparseSet Regs; + unsigned NumRegUnits; + + unsigned getSparseIndexFromReg(unsigned Reg) const { + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return TargetRegisterInfo::virtReg2Index(Reg) + NumRegUnits; + assert(Reg < NumRegUnits); + return Reg; + } + unsigned getRegFromSparseIndex(unsigned SparseIndex) const { + if (SparseIndex >= NumRegUnits) + return TargetRegisterInfo::index2VirtReg(SparseIndex-NumRegUnits); + return SparseIndex; + } + +public: + void clear(); + void init(const MachineRegisterInfo &MRI); bool contains(unsigned Reg) const { - if (TargetRegisterInfo::isVirtualRegister(Reg)) - return VirtRegs.count(Reg); - return PhysRegs.count(Reg); + unsigned SparseIndex = getSparseIndexFromReg(Reg); + return Regs.count(SparseIndex); } bool insert(unsigned Reg) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) - return VirtRegs.insert(Reg).second; - return PhysRegs.insert(Reg).second; + unsigned SparseIndex = getSparseIndexFromReg(Reg); + return Regs.insert(SparseIndex).second; } bool erase(unsigned Reg) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) - return VirtRegs.erase(Reg); - return PhysRegs.erase(Reg); + unsigned SparseIndex = getSparseIndexFromReg(Reg); + return Regs.erase(SparseIndex); + } + + size_t size() const { + return Regs.size(); + } + + template + void appendTo(ContainerT &To) const { + for (unsigned I : Regs) { + unsigned Reg = getRegFromSparseIndex(I); + To.push_back(Reg); + } } }; @@ -300,16 +328,12 @@ public: // position changes while pressure does not. void setPos(MachineBasicBlock::const_iterator Pos) { CurrPos = Pos; } - /// \brief Get the SlotIndex for the first nondebug instruction including or - /// after the current position. - SlotIndex getCurrSlot() const; - /// Recede across the previous instruction. - bool recede(SmallVectorImpl *LiveUses = nullptr, + void recede(SmallVectorImpl *LiveUses = nullptr, PressureDiff *PDiff = nullptr); /// Advance across the current instruction. - bool advance(); + void advance(); /// Finalize the region boundaries and recored live ins and live outs. void closeRegion(); @@ -326,17 +350,15 @@ public: ArrayRef getLiveThru() const { return LiveThruPressure; } /// Get the resulting register pressure over the traversed region. - /// This result is complete if either advance() or recede() has returned true, - /// or if closeRegion() was explicitly invoked. + /// This result is complete if closeRegion() was explicitly invoked. RegisterPressure &getPressure() { return P; } const RegisterPressure &getPressure() const { return P; } /// Get the register set pressure at the current position, which may be less /// than the pressure across the traversed region. - std::vector &getRegSetPressureAtPos() { return CurrSetPressure; } - - void discoverLiveOut(unsigned Reg); - void discoverLiveIn(unsigned Reg); + const std::vector &getRegSetPressureAtPos() const { + return CurrSetPressure; + } bool isTopClosed() const; bool isBottomClosed() const; @@ -412,7 +434,12 @@ public: void dump() const; protected: - const LiveRange *getLiveRange(unsigned Reg) const; + void discoverLiveOut(unsigned Reg); + void discoverLiveIn(unsigned Reg); + + /// \brief Get the SlotIndex for the first nondebug instruction including or + /// after the current position. + SlotIndex getCurrSlot() const; void increaseRegPressure(ArrayRef Regs); void decreaseRegPressure(ArrayRef Regs); diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h index df3fd34e0af6..122c78534253 100644 --- a/include/llvm/CodeGen/RegisterScavenging.h +++ b/include/llvm/CodeGen/RegisterScavenging.h @@ -74,10 +74,6 @@ public: /// Start tracking liveness from the begin of the specific basic block. void enterBasicBlock(MachineBasicBlock *mbb); - /// Allow resetting register state info for multiple - /// passes over/within the same function. - void initRegState(); - /// Move the internal MBB iterator and update register states. void forward(); @@ -104,10 +100,8 @@ public: MBBI = I; } - MachineBasicBlock::iterator getCurrentPosition() const { - return MBBI; - } - + MachineBasicBlock::iterator getCurrentPosition() const { return MBBI; } + /// Return if a specific register is currently used. bool isRegUsed(unsigned Reg, bool includeReserved = true) const; @@ -152,7 +146,7 @@ public: } /// Tell the scavenger a register is used. - void setRegUsed(unsigned Reg); + void setRegUsed(unsigned Reg, LaneBitmask LaneMask = ~0u); private: /// Returns true if a register is reserved. It is never "unused". bool isReserved(unsigned Reg) const { return MRI->isReserved(Reg); } @@ -169,10 +163,10 @@ private: /// Processes the current instruction and fill the KillRegUnits and /// DefRegUnits bit vectors. void determineKillsAndDefs(); - + /// Add all Reg Units that Reg contains to BV. void addRegUnits(BitVector &BV, unsigned Reg); - + /// Return the candidate register that is unused for the longest after /// StartMI. UseMI is set to the instruction where the search stopped. /// @@ -182,6 +176,9 @@ private: unsigned InstrLimit, MachineBasicBlock::iterator &UseMI); + /// Allow resetting register state info for multiple + /// passes over/within the same function. + void initRegState(); }; } // End llvm namespace diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h index 2be5de640e29..7db03459f9bf 100644 --- a/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/include/llvm/CodeGen/RuntimeLibcalls.h @@ -231,13 +231,9 @@ namespace RTLIB { FPROUND_F80_F64, FPROUND_F128_F64, FPROUND_PPCF128_F64, - FPTOSINT_F32_I8, - FPTOSINT_F32_I16, FPTOSINT_F32_I32, FPTOSINT_F32_I64, FPTOSINT_F32_I128, - FPTOSINT_F64_I8, - FPTOSINT_F64_I16, FPTOSINT_F64_I32, FPTOSINT_F64_I64, FPTOSINT_F64_I128, @@ -250,13 +246,9 @@ namespace RTLIB { FPTOSINT_PPCF128_I32, FPTOSINT_PPCF128_I64, FPTOSINT_PPCF128_I128, - FPTOUINT_F32_I8, - FPTOUINT_F32_I16, FPTOUINT_F32_I32, FPTOUINT_F32_I64, FPTOUINT_F32_I128, - FPTOUINT_F64_I8, - FPTOUINT_F64_I16, FPTOUINT_F64_I32, FPTOUINT_F64_I64, FPTOUINT_F64_I128, diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 839131416560..bda9dbd51fff 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -20,11 +20,11 @@ #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Target/TargetLowering.h" namespace llvm { - class AliasAnalysis; class SUnit; class MachineConstantPool; class MachineFunction; @@ -122,18 +122,7 @@ namespace llvm { } /// Return true if the specified SDep is equivalent except for latency. - bool overlaps(const SDep &Other) const { - if (Dep != Other.Dep) return false; - switch (Dep.getInt()) { - case Data: - case Anti: - case Output: - return Contents.Reg == Other.Contents.Reg; - case Order: - return Contents.OrdKind == Other.Contents.OrdKind; - } - llvm_unreachable("Invalid dependency kind!"); - } + bool overlaps(const SDep &Other) const; bool operator==(const SDep &Other) const { return overlaps(Other) && Latency == Other.Latency; @@ -157,19 +146,13 @@ namespace llvm { } //// getSUnit - Return the SUnit to which this edge points. - SUnit *getSUnit() const { - return Dep.getPointer(); - } + SUnit *getSUnit() const; //// setSUnit - Assign the SUnit to which this edge points. - void setSUnit(SUnit *SU) { - Dep.setPointer(SU); - } + void setSUnit(SUnit *SU); /// getKind - Return an enum value representing the kind of the dependence. - Kind getKind() const { - return Dep.getInt(); - } + Kind getKind() const; /// isCtrl - Shorthand for getKind() != SDep::Data. bool isCtrl() const { @@ -374,7 +357,7 @@ namespace llvm { /// correspond to schedulable entities (e.g. instructions) and do not have a /// valid ID. Consequently, always check for boundary nodes before accessing /// an assoicative data structure keyed on node ID. - bool isBoundaryNode() const { return NodeNum == BoundaryID; }; + bool isBoundaryNode() const { return NodeNum == BoundaryID; } /// setNode - Assign the representative SDNode for this SUnit. /// This may be used during pre-regalloc scheduling. @@ -490,6 +473,30 @@ namespace llvm { void ComputeHeight(); }; + /// Return true if the specified SDep is equivalent except for latency. + inline bool SDep::overlaps(const SDep &Other) const { + if (Dep != Other.Dep) + return false; + switch (Dep.getInt()) { + case Data: + case Anti: + case Output: + return Contents.Reg == Other.Contents.Reg; + case Order: + return Contents.OrdKind == Other.Contents.OrdKind; + } + llvm_unreachable("Invalid dependency kind!"); + } + + //// getSUnit - Return the SUnit to which this edge points. + inline SUnit *SDep::getSUnit() const { return Dep.getPointer(); } + + //// setSUnit - Assign the SUnit to which this edge points. + inline void SDep::setSUnit(SUnit *SU) { Dep.setPointer(SU); } + + /// getKind - Return an enum value representing the kind of the dependence. + inline SDep::Kind SDep::getKind() const { return Dep.getInt(); } + //===--------------------------------------------------------------------===// /// SchedulingPriorityQueue - This interface is used to plug different /// priorities computation algorithms into the list scheduler. It implements diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index b56d5ec8ce63..c574df094911 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -26,22 +26,32 @@ namespace llvm { class MachineFrameInfo; class MachineLoopInfo; class MachineDominatorTree; - class LiveIntervals; class RegPressureTracker; class PressureDiffs; /// An individual mapping from virtual register number to SUnit. struct VReg2SUnit { unsigned VirtReg; + LaneBitmask LaneMask; SUnit *SU; - VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {} + VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU) + : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {} unsigned getSparseSetIndex() const { return TargetRegisterInfo::virtReg2Index(VirtReg); } }; + /// Mapping from virtual register to SUnit including an operand index. + struct VReg2SUnitOperIdx : public VReg2SUnit { + unsigned OperandIndex; + + VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask, + unsigned OperandIndex, SUnit *SU) + : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {} + }; + /// Record a physical register access. /// For non-data-dependent uses, OpIdx == -1. struct PhysRegSUOper { @@ -69,7 +79,10 @@ namespace llvm { /// Track local uses of virtual registers. These uses are gathered by the DAG /// builder and may be consulted by the scheduler to avoid iterating an entire /// vreg use list. - typedef SparseMultiSet VReg2UseMap; + typedef SparseMultiSet VReg2SUnitMultiMap; + + typedef SparseMultiSet + VReg2SUnitOperIdxMultiMap; /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of /// MachineInstrs. @@ -78,15 +91,9 @@ namespace llvm { const MachineLoopInfo *MLI; const MachineFrameInfo *MFI; - /// Live Intervals provides reaching defs in preRA scheduling. - LiveIntervals *LIS; - /// TargetSchedModel provides an interface to the machine model. TargetSchedModel SchedModel; - /// isPostRA flag indicates vregs cannot be present. - bool IsPostRA; - /// True if the DAG builder should remove kill flags (in preparation for /// rescheduling). bool RemoveKillFlags; @@ -98,6 +105,9 @@ namespace llvm { /// it has taken responsibility for scheduling the terminator correctly. bool CanHandleTerminators; + /// Whether lane masks should get tracked. + bool TrackLaneMasks; + /// State specific to the current scheduling region. /// ------------------------------------------------ @@ -120,7 +130,7 @@ namespace llvm { /// After calling BuildSchedGraph, each vreg used in the scheduling region /// is mapped to a set of SUnits. These include all local vreg uses, not /// just the uses for a singly defined vreg. - VReg2UseMap VRegUses; + VReg2SUnitMultiMap VRegUses; /// State internal to DAG building. /// ------------------------------- @@ -132,8 +142,12 @@ namespace llvm { Reg2SUnitsMap Defs; Reg2SUnitsMap Uses; - /// Track the last instruction in this region defining each virtual register. - VReg2SUnitMap VRegDefs; + /// Tracks the last instruction(s) in this region defining each virtual + /// register. There may be multiple current definitions for a register with + /// disjunct lanemasks. + VReg2SUnitMultiMap CurrentVRegDefs; + /// Tracks the last instructions in this region using each virtual register. + VReg2SUnitOperIdxMultiMap CurrentVRegUses; /// PendingLoads - Remember where unknown loads are after the most recent /// unknown store, as we iterate. As with Defs and Uses, this is here @@ -154,17 +168,10 @@ namespace llvm { public: explicit ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, - bool IsPostRAFlag, - bool RemoveKillFlags = false, - LiveIntervals *LIS = nullptr); + bool RemoveKillFlags = false); ~ScheduleDAGInstrs() override {} - bool isPostRA() const { return IsPostRA; } - - /// \brief Expose LiveIntervals for use in DAG mutators and such. - LiveIntervals *getLIS() const { return LIS; } - /// \brief Get the machine model for instruction scheduling. const TargetSchedModel *getSchedModel() const { return &SchedModel; } @@ -206,7 +213,8 @@ namespace llvm { /// input. void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker = nullptr, - PressureDiffs *PDiffs = nullptr); + PressureDiffs *PDiffs = nullptr, + bool TrackLaneMasks = false); /// addSchedBarrierDeps - Add dependencies from instructions in the current /// list of instructions being scheduled to scheduling barrier. We want to @@ -253,6 +261,12 @@ namespace llvm { /// Other adjustments may be made to the instruction if necessary. Return /// true if the operand has been deleted, false if not. bool toggleKillFlag(MachineInstr *MI, MachineOperand &MO); + + /// Returns a mask for which lanes get read/written by the given (register) + /// machine operand. + LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; + + void collectVRegUses(SUnit *SU); }; /// newSUnit - Creates a new SUnit and return a ptr to it. diff --git a/include/llvm/CodeGen/SchedulerRegistry.h b/include/llvm/CodeGen/SchedulerRegistry.h index 51ac7f28527f..a7a6227664de 100644 --- a/include/llvm/CodeGen/SchedulerRegistry.h +++ b/include/llvm/CodeGen/SchedulerRegistry.h @@ -52,12 +52,6 @@ public: static RegisterScheduler *getList() { return (RegisterScheduler *)Registry.getList(); } - static FunctionPassCtor getDefault() { - return (FunctionPassCtor)Registry.getDefault(); - } - static void setDefault(FunctionPassCtor C) { - Registry.setDefault((MachinePassCtor)C); - } static void setListener(MachinePassRegistryListener *L) { Registry.setListener(L); } diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 1ee92380e690..a21e9ae881a7 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -19,6 +19,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/ilist.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -31,7 +32,6 @@ namespace llvm { -class AliasAnalysis; class MachineConstantPoolValue; class MachineFunction; class MDNode; @@ -215,6 +215,8 @@ class SelectionDAG { /// Tracks dbg_value information through SDISel. SDDbgInfo *DbgInfo; + uint16_t NextPersistentId = 0; + public: /// Clients of various APIs that cause global effects on /// the DAG can optionally implement this interface. This allows the clients @@ -324,11 +326,10 @@ public: } iterator_range allnodes() { - return iterator_range(allnodes_begin(), allnodes_end()); + return make_range(allnodes_begin(), allnodes_end()); } iterator_range allnodes() const { - return iterator_range(allnodes_begin(), - allnodes_end()); + return make_range(allnodes_begin(), allnodes_end()); } /// Return the root tag of the SelectionDAG. @@ -532,7 +533,7 @@ public: SDVTList VTs = getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, getRegister(Reg, N.getValueType()), N, Glue }; return getNode(ISD::CopyToReg, dl, VTs, - ArrayRef(Ops, Glue.getNode() ? 4 : 3)); + makeArrayRef(Ops, Glue.getNode() ? 4 : 3)); } // Similar to last getCopyToReg() except parameter Reg is a SDValue @@ -541,7 +542,7 @@ public: SDVTList VTs = getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Reg, N, Glue }; return getNode(ISD::CopyToReg, dl, VTs, - ArrayRef(Ops, Glue.getNode() ? 4 : 3)); + makeArrayRef(Ops, Glue.getNode() ? 4 : 3)); } SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT) { @@ -558,7 +559,7 @@ public: SDVTList VTs = getVTList(VT, MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, getRegister(Reg, VT), Glue }; return getNode(ISD::CopyFromReg, dl, VTs, - ArrayRef(Ops, Glue.getNode() ? 3 : 2)); + makeArrayRef(Ops, Glue.getNode() ? 3 : 2)); } SDValue getCondCode(ISD::CondCode Cond); @@ -670,7 +671,7 @@ public: SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, ArrayRef Ops); SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, - ArrayRef Ops); + ArrayRef Ops, const SDNodeFlags *Flags = nullptr); SDValue getNode(unsigned Opcode, SDLoc DL, ArrayRef ResultTys, ArrayRef Ops); SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs, @@ -687,7 +688,7 @@ public: SDValue N3, SDValue N4); SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5); - + // Specialize again based on number of operands for nodes with a VTList // rather than a single VT. SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs); @@ -901,6 +902,12 @@ public: /// the target's desired shift amount type. SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op); + /// Expand the specified \c ISD::VAARG node as the Legalize pass would. + SDValue expandVAArg(SDNode *Node); + + /// Expand the specified \c ISD::VACOPY node as the Legalize pass would. + SDValue expandVACopy(SDNode *Node); + /// *Mutate* the specified node in-place to have the /// specified operands. If the resultant node already exists in the DAG, /// this does not modify the specified node, instead it returns the node that @@ -1072,6 +1079,10 @@ public: // target info. switch (Opcode) { case ISD::ADD: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: case ISD::MUL: case ISD::MULHU: case ISD::MULHS: @@ -1088,6 +1099,8 @@ public: case ISD::ADDE: case ISD::FMINNUM: case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: return true; default: return false; } @@ -1150,6 +1163,10 @@ public: const ConstantSDNode *Cst1, const ConstantSDNode *Cst2); + SDValue FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL, + EVT VT, ArrayRef Ops, + const SDNodeFlags *Flags = nullptr); + /// Constant fold a setcc to true or false. SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, SDLoc dl); @@ -1199,6 +1216,10 @@ public: /// other positive zero. bool isEqualTo(SDValue A, SDValue B) const; + /// Return true if A and B have no common bits set. As an example, this can + /// allow an 'add' to be transformed into an 'or'. + bool haveNoCommonBitsSet(SDValue A, SDValue B) const; + /// Utility function used by legalize and lowering to /// "unroll" a vector operation by splitting out the scalars and operating /// on each element individually. If the ResNE is 0, fully unroll the vector diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 4821d1aae9e5..23816bde07c0 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -44,6 +44,7 @@ class GlobalValue; class MachineBasicBlock; class MachineConstantPoolValue; class SDNode; +class BinaryWithFlagsSDNode; class Value; class MCSymbol; template struct DenseMapInfo; @@ -81,11 +82,6 @@ namespace ISD { /// all ConstantFPSDNode or undef. bool isBuildVectorOfConstantFPSDNodes(const SDNode *N); - /// Return true if the specified node is a - /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low - /// element is not an undef. - bool isScalarToVector(const SDNode *N); - /// Return true if the node has at least one operand /// and all operands of the specified node are ISD::UNDEF. bool allOperandsUndef(const SDNode *N); @@ -139,7 +135,7 @@ public: return SDValue(Node, R); } - // Return true if this node is an operand of N. + /// Return true if this node is an operand of N. bool isOperandOf(const SDNode *N) const; /// Return the ValueType of the referenced return value. @@ -167,6 +163,7 @@ public: inline bool isTargetMemoryOpcode() const; inline bool isTargetOpcode() const; inline bool isMachineOpcode() const; + inline bool isUndef() const; inline unsigned getMachineOpcode() const; inline const DebugLoc &getDebugLoc() const; inline void dump() const; @@ -318,6 +315,61 @@ template<> struct simplify_type { } }; +/// These are IR-level optimization flags that may be propagated to SDNodes. +/// TODO: This data structure should be shared by the IR optimizer and the +/// the backend. +struct SDNodeFlags { +private: + bool NoUnsignedWrap : 1; + bool NoSignedWrap : 1; + bool Exact : 1; + bool UnsafeAlgebra : 1; + bool NoNaNs : 1; + bool NoInfs : 1; + bool NoSignedZeros : 1; + bool AllowReciprocal : 1; + +public: + /// Default constructor turns off all optimization flags. + SDNodeFlags() { + NoUnsignedWrap = false; + NoSignedWrap = false; + Exact = false; + UnsafeAlgebra = false; + NoNaNs = false; + NoInfs = false; + NoSignedZeros = false; + AllowReciprocal = false; + } + + // These are mutators for each flag. + void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; } + void setNoSignedWrap(bool b) { NoSignedWrap = b; } + void setExact(bool b) { Exact = b; } + void setUnsafeAlgebra(bool b) { UnsafeAlgebra = b; } + void setNoNaNs(bool b) { NoNaNs = b; } + void setNoInfs(bool b) { NoInfs = b; } + void setNoSignedZeros(bool b) { NoSignedZeros = b; } + void setAllowReciprocal(bool b) { AllowReciprocal = b; } + + // These are accessors for each flag. + bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } + bool hasNoSignedWrap() const { return NoSignedWrap; } + bool hasExact() const { return Exact; } + bool hasUnsafeAlgebra() const { return UnsafeAlgebra; } + bool hasNoNaNs() const { return NoNaNs; } + bool hasNoInfs() const { return NoInfs; } + bool hasNoSignedZeros() const { return NoSignedZeros; } + bool hasAllowReciprocal() const { return AllowReciprocal; } + + /// Return a raw encoding of the flags. + /// This function should only be used to add data to the NodeID value. + unsigned getRawFlags() const { + return (NoUnsignedWrap << 0) | (NoSignedWrap << 1) | (Exact << 2) | + (UnsafeAlgebra << 3) | (NoNaNs << 4) | (NoInfs << 5) | + (NoSignedZeros << 6) | (AllowReciprocal << 7); + } +}; /// Represents one node in the SelectionDAG. /// @@ -374,6 +426,10 @@ private: friend struct ilist_traits; public: + /// Unique and persistent id per SDNode in the DAG. + /// Used for debug printing. + uint16_t PersistentId; + //===--------------------------------------------------------------------===// // Accessors // @@ -395,6 +451,9 @@ public: return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE; } + /// Return true if the type of the node type undefined. + bool isUndef() const { return NodeType == ISD::UNDEF; } + /// Test if this node is a memory intrinsic (with valid pointer information). /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for /// non-memory intrinsics (with chains) that are not really instances of @@ -517,10 +576,10 @@ public: static use_iterator use_end() { return use_iterator(nullptr); } inline iterator_range uses() { - return iterator_range(use_begin(), use_end()); + return make_range(use_begin(), use_end()); } inline iterator_range uses() const { - return iterator_range(use_begin(), use_end()); + return make_range(use_begin(), use_end()); } /// Return true if there are exactly NUSES uses of the indicated value. @@ -592,8 +651,8 @@ public: }; iterator_range op_values() const { - return iterator_range(value_op_iterator(op_begin()), - value_op_iterator(op_end())); + return make_range(value_op_iterator(op_begin()), + value_op_iterator(op_end())); } SDVTList getVTList() const { @@ -605,27 +664,11 @@ public: /// to which the glue operand points. Otherwise return NULL. SDNode *getGluedNode() const { if (getNumOperands() != 0 && - getOperand(getNumOperands()-1).getValueType() == MVT::Glue) + getOperand(getNumOperands()-1).getValueType() == MVT::Glue) return getOperand(getNumOperands()-1).getNode(); return nullptr; } - // If this is a pseudo op, like copyfromreg, look to see if there is a - // real target node glued to it. If so, return the target node. - const SDNode *getGluedMachineNode() const { - const SDNode *FoundNode = this; - - // Climb up glue edges until a machine-opcode node is found, or the - // end of the chain is reached. - while (!FoundNode->isMachineOpcode()) { - const SDNode *N = FoundNode->getGluedNode(); - if (!N) break; - FoundNode = N; - } - - return FoundNode; - } - /// If this node has a glue value with a user, return /// the user (there is at most one). Otherwise return NULL. SDNode *getGluedUser() const { @@ -635,6 +678,10 @@ public: return nullptr; } + /// This could be defined as a virtual function and implemented more simply + /// and directly, but it is not to avoid creating a vtable for this class. + const SDNodeFlags *getFlags() const; + /// Return the number of values defined/returned by this operator. unsigned getNumValues() const { return NumValues; } @@ -909,6 +956,9 @@ inline bool SDValue::isMachineOpcode() const { inline unsigned SDValue::getMachineOpcode() const { return Node->getMachineOpcode(); } +inline bool SDValue::isUndef() const { + return Node->isUndef(); +} inline bool SDValue::use_empty() const { return !Node->hasAnyUseOfValue(ResNo); } @@ -943,62 +993,6 @@ inline void SDUse::setNode(SDNode *N) { if (N) N->addUse(*this); } -/// These are IR-level optimization flags that may be propagated to SDNodes. -/// TODO: This data structure should be shared by the IR optimizer and the -/// the backend. -struct SDNodeFlags { -private: - bool NoUnsignedWrap : 1; - bool NoSignedWrap : 1; - bool Exact : 1; - bool UnsafeAlgebra : 1; - bool NoNaNs : 1; - bool NoInfs : 1; - bool NoSignedZeros : 1; - bool AllowReciprocal : 1; - -public: - /// Default constructor turns off all optimization flags. - SDNodeFlags() { - NoUnsignedWrap = false; - NoSignedWrap = false; - Exact = false; - UnsafeAlgebra = false; - NoNaNs = false; - NoInfs = false; - NoSignedZeros = false; - AllowReciprocal = false; - } - - // These are mutators for each flag. - void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; } - void setNoSignedWrap(bool b) { NoSignedWrap = b; } - void setExact(bool b) { Exact = b; } - void setUnsafeAlgebra(bool b) { UnsafeAlgebra = b; } - void setNoNaNs(bool b) { NoNaNs = b; } - void setNoInfs(bool b) { NoInfs = b; } - void setNoSignedZeros(bool b) { NoSignedZeros = b; } - void setAllowReciprocal(bool b) { AllowReciprocal = b; } - - // These are accessors for each flag. - bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } - bool hasNoSignedWrap() const { return NoSignedWrap; } - bool hasExact() const { return Exact; } - bool hasUnsafeAlgebra() const { return UnsafeAlgebra; } - bool hasNoNaNs() const { return NoNaNs; } - bool hasNoInfs() const { return NoInfs; } - bool hasNoSignedZeros() const { return NoSignedZeros; } - bool hasAllowReciprocal() const { return AllowReciprocal; } - - /// Return a raw encoding of the flags. - /// This function should only be used to add data to the NodeID value. - unsigned getRawFlags() const { - return (NoUnsignedWrap << 0) | (NoSignedWrap << 1) | (Exact << 2) | - (UnsafeAlgebra << 3) | (NoNaNs << 4) | (NoInfs << 5) | - (NoSignedZeros << 6) | (AllowReciprocal << 7); - } -}; - /// This class is used for single-operand SDNodes. This is solely /// to allow co-allocation of node operands with the node itself. class UnarySDNode : public SDNode { @@ -1080,6 +1074,9 @@ class HandleSDNode : public SDNode { public: explicit HandleSDNode(SDValue X) : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) { + // HandleSDNodes are never inserted into the DAG, so they won't be + // auto-numbered. Use ID 65535 as a sentinel. + PersistentId = 0xffff; InitOperands(&Op, X); } ~HandleSDNode(); @@ -1497,6 +1494,15 @@ public: } }; +/// Returns true if \p V is a constant integer zero. +bool isNullConstant(SDValue V); +/// Returns true if \p V is an FP constant with a value of positive zero. +bool isNullFPConstant(SDValue V); +/// Returns true if \p V is an integer constant with all bits set. +bool isAllOnesConstant(SDValue V); +/// Returns true if \p V is a constant integer one. +bool isOneConstant(SDValue V); + class GlobalAddressSDNode : public SDNode { const GlobalValue *TheGlobal; int64_t Offset; @@ -1697,6 +1703,14 @@ public: ConstantFPSDNode * getConstantFPSplatNode(BitVector *UndefElements = nullptr) const; + /// \brief If this is a constant FP splat and the splatted constant FP is an + /// exact power or 2, return the log base 2 integer value. Otherwise, + /// return -1. + /// + /// The BitWidth specifies the necessary bit precision. + int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, + uint32_t BitWidth) const; + bool isConstant() const; static inline bool classof(const SDNode *N) { @@ -2003,9 +2017,9 @@ class MaskedLoadStoreSDNode : public MemSDNode { public: friend class SelectionDAG; MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, DebugLoc dl, - SDValue *Operands, unsigned numOperands, - SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) - : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { + SDValue *Operands, unsigned numOperands, SDVTList VTs, + EVT MemVT, MachineMemOperand *MMO) + : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { InitOperands(Ops, Operands, numOperands); } @@ -2036,7 +2050,7 @@ public: ISD::LoadExtType getExtensionType() const { return ISD::LoadExtType(SubclassData & 3); - } + } const SDValue &getSrc0() const { return getOperand(3); } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::MLOAD; @@ -2103,17 +2117,18 @@ public: class MaskedGatherSDNode : public MaskedGatherScatterSDNode { public: friend class SelectionDAG; - MaskedGatherSDNode(unsigned Order, DebugLoc dl, ArrayRef Operands, + MaskedGatherSDNode(unsigned Order, DebugLoc dl, ArrayRef Operands, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, Operands, VTs, MemVT, MMO) { assert(getValue().getValueType() == getValueType(0) && - "Incompatible type of the PathThru value in MaskedGatherSDNode"); - assert(getMask().getValueType().getVectorNumElements() == - getValueType(0).getVectorNumElements() && - "Vector width mismatch between mask and data"); - assert(getMask().getValueType().getScalarType() == MVT::i1 && + "Incompatible type of the PassThru value in MaskedGatherSDNode"); + assert(getMask().getValueType().getVectorNumElements() == + getValueType(0).getVectorNumElements() && "Vector width mismatch between mask and data"); + assert(getIndex().getValueType().getVectorNumElements() == + getValueType(0).getVectorNumElements() && + "Vector width mismatch between index and data"); } static bool classof(const SDNode *N) { @@ -2131,11 +2146,12 @@ public: SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, Operands, VTs, MemVT, MMO) { - assert(getMask().getValueType().getVectorNumElements() == - getValue().getValueType().getVectorNumElements() && - "Vector width mismatch between mask and data"); - assert(getMask().getValueType().getScalarType() == MVT::i1 && + assert(getMask().getValueType().getVectorNumElements() == + getValue().getValueType().getVectorNumElements() && "Vector width mismatch between mask and data"); + assert(getIndex().getValueType().getVectorNumElements() == + getValue().getValueType().getVectorNumElements() && + "Vector width mismatch between index and data"); } static bool classof(const SDNode *N) { diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index 9d6d6f5b1be0..7b621bee259f 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -155,7 +155,7 @@ namespace llvm { "Attempt to construct index with 0 pointer."); } - /// Returns true if this is a valid index. Invalid indicies do + /// Returns true if this is a valid index. Invalid indices do /// not point into an index table, and cannot be compared. bool isValid() const { return lie.getPointer(); @@ -272,7 +272,7 @@ namespace llvm { SlotIndex getNextSlot() const { Slot s = getSlot(); if (s == Slot_Dead) { - return SlotIndex(listEntry()->getNextNode(), Slot_Block); + return SlotIndex(&*++listEntry()->getIterator(), Slot_Block); } return SlotIndex(listEntry(), s + 1); } @@ -280,7 +280,7 @@ namespace llvm { /// Returns the next index. This is the index corresponding to the this /// index's slot, but for the next instruction. SlotIndex getNextIndex() const { - return SlotIndex(listEntry()->getNextNode(), getSlot()); + return SlotIndex(&*++listEntry()->getIterator(), getSlot()); } /// Returns the previous slot in the index list. This could be either the @@ -292,7 +292,7 @@ namespace llvm { SlotIndex getPrevSlot() const { Slot s = getSlot(); if (s == Slot_Block) { - return SlotIndex(listEntry()->getPrevNode(), Slot_Dead); + return SlotIndex(&*--listEntry()->getIterator(), Slot_Dead); } return SlotIndex(listEntry(), s - 1); } @@ -300,7 +300,7 @@ namespace llvm { /// Returns the previous index. This is the index corresponding to this /// index's slot, but for the previous instruction. SlotIndex getPrevIndex() const { - return SlotIndex(listEntry()->getPrevNode(), getSlot()); + return SlotIndex(&*--listEntry()->getIterator(), getSlot()); } }; @@ -333,6 +333,8 @@ namespace llvm { /// This pass assigns indexes to each instruction. class SlotIndexes : public MachineFunctionPass { private: + // IndexListEntry allocator. + BumpPtrAllocator ileAllocator; typedef ilist IndexList; IndexList indexList; @@ -353,9 +355,6 @@ namespace llvm { /// and MBB id. SmallVector idx2MBBMap; - // IndexListEntry allocator. - BumpPtrAllocator ileAllocator; - IndexListEntry* createEntry(MachineInstr *mi, unsigned index) { IndexListEntry *entry = static_cast( @@ -377,6 +376,11 @@ namespace llvm { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); } + ~SlotIndexes() { + // The indexList's nodes are all allocated in the BumpPtrAllocator. + indexList.clearAndLeakNodesUnsafely(); + } + void getAnalysisUsage(AnalysisUsage &au) const override; void releaseMemory() override; @@ -427,11 +431,11 @@ namespace llvm { /// Returns the next non-null index, if one exists. /// Otherwise returns getLastIndex(). SlotIndex getNextNonNullIndex(SlotIndex Index) { - IndexList::iterator I = Index.listEntry(); + IndexList::iterator I = Index.listEntry()->getIterator(); IndexList::iterator E = indexList.end(); while (++I != E) if (I->getInstr()) - return SlotIndex(I, Index.getSlot()); + return SlotIndex(&*I, Index.getSlot()); // We reached the end of the function. return getLastIndex(); } @@ -502,49 +506,52 @@ namespace llvm { return getMBBRange(mbb).second; } + /// Iterator over the idx2MBBMap (sorted pairs of slot index of basic block + /// begin and basic block) + typedef SmallVectorImpl::const_iterator MBBIndexIterator; + /// Move iterator to the next IdxMBBPair where the SlotIndex is greater or + /// equal to \p To. + MBBIndexIterator advanceMBBIndex(MBBIndexIterator I, SlotIndex To) const { + return std::lower_bound(I, idx2MBBMap.end(), To); + } + /// Get an iterator pointing to the IdxMBBPair with the biggest SlotIndex + /// that is greater or equal to \p Idx. + MBBIndexIterator findMBBIndex(SlotIndex Idx) const { + return advanceMBBIndex(idx2MBBMap.begin(), Idx); + } + /// Returns an iterator for the begin of the idx2MBBMap. + MBBIndexIterator MBBIndexBegin() const { + return idx2MBBMap.begin(); + } + /// Return an iterator for the end of the idx2MBBMap. + MBBIndexIterator MBBIndexEnd() const { + return idx2MBBMap.end(); + } + /// Returns the basic block which the given index falls in. MachineBasicBlock* getMBBFromIndex(SlotIndex index) const { if (MachineInstr *MI = getInstructionFromIndex(index)) return MI->getParent(); - SmallVectorImpl::const_iterator I = - std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), index); - // Take the pair containing the index - SmallVectorImpl::const_iterator J = - ((I != idx2MBBMap.end() && I->first > index) || - (I == idx2MBBMap.end() && idx2MBBMap.size()>0)) ? (I-1): I; - assert(J != idx2MBBMap.end() && J->first <= index && + MBBIndexIterator I = findMBBIndex(index); + // Take the pair containing the index + MBBIndexIterator J = + ((I != MBBIndexEnd() && I->first > index) || + (I == MBBIndexEnd() && !idx2MBBMap.empty())) ? std::prev(I) : I; + + assert(J != MBBIndexEnd() && J->first <= index && index < getMBBEndIdx(J->second) && "index does not correspond to an MBB"); return J->second; } - bool findLiveInMBBs(SlotIndex start, SlotIndex end, - SmallVectorImpl &mbbs) const { - SmallVectorImpl::const_iterator itr = - std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), start); - bool resVal = false; - - while (itr != idx2MBBMap.end()) { - if (itr->first >= end) - break; - mbbs.push_back(itr->second); - resVal = true; - ++itr; - } - return resVal; - } - /// Returns the MBB covering the given range, or null if the range covers /// more than one basic block. MachineBasicBlock* getMBBCoveringRange(SlotIndex start, SlotIndex end) const { assert(start < end && "Backwards ranges not allowed."); - - SmallVectorImpl::const_iterator itr = - std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), start); - - if (itr == idx2MBBMap.end()) { + MBBIndexIterator itr = findMBBIndex(start); + if (itr == MBBIndexEnd()) { itr = std::prev(itr); return itr->second; } @@ -580,11 +587,11 @@ namespace llvm { IndexList::iterator prevItr, nextItr; if (Late) { // Insert mi's index immediately before the following instruction. - nextItr = getIndexAfter(mi).listEntry(); + nextItr = getIndexAfter(mi).listEntry()->getIterator(); prevItr = std::prev(nextItr); } else { // Insert mi's index immediately after the preceding instruction. - prevItr = getIndexBefore(mi).listEntry(); + prevItr = getIndexBefore(mi).listEntry()->getIterator(); nextItr = std::next(prevItr); } @@ -646,11 +653,11 @@ namespace llvm { if (nextMBB == mbb->getParent()->end()) { startEntry = &indexList.back(); endEntry = createEntry(nullptr, 0); - newItr = indexList.insertAfter(startEntry, endEntry); + newItr = indexList.insertAfter(startEntry->getIterator(), endEntry); } else { startEntry = createEntry(nullptr, 0); - endEntry = getMBBStartIdx(nextMBB).listEntry(); - newItr = indexList.insert(endEntry, startEntry); + endEntry = getMBBStartIdx(&*nextMBB).listEntry(); + newItr = indexList.insert(endEntry->getIterator(), startEntry); } SlotIndex startIdx(startEntry, SlotIndex::Slot_Block); diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h index fdc1a9143ed2..972a616ad779 100644 --- a/include/llvm/CodeGen/StackMaps.h +++ b/include/llvm/CodeGen/StackMaps.h @@ -13,6 +13,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" #include #include diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index 10c099d2c2f5..2f1379131cbd 100644 --- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -41,12 +41,12 @@ public: ~TargetLoweringObjectFileELF() override {} - void emitPersonalityValue(MCStreamer &Streamer, const TargetMachine &TM, + void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM, const MCSymbol *Sym) const override; /// Given a constant with the SectionKind, return a section that it should be /// placed in. - MCSection *getSectionForConstant(SectionKind Kind, + MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, const Constant *C) const override; MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, @@ -103,7 +103,7 @@ public: Mangler &Mang, const TargetMachine &TM) const override; - MCSection *getSectionForConstant(SectionKind Kind, + MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, const Constant *C) const override; /// The mach-o version of this method defaults to returning a stub reference. @@ -123,6 +123,9 @@ public: const MCValue &MV, int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const override; + + void getNameWithPrefix(SmallVectorImpl &OutName, const GlobalValue *GV, + Mangler &Mang, const TargetMachine &TM) const override; }; @@ -140,8 +143,7 @@ public: const TargetMachine &TM) const override; void getNameWithPrefix(SmallVectorImpl &OutName, const GlobalValue *GV, - bool CannotUsePrivateLabel, Mangler &Mang, - const TargetMachine &TM) const override; + Mangler &Mang, const TargetMachine &TM) const override; MCSection *getSectionForJumpTable(const Function &F, Mangler &Mang, const TargetMachine &TM) const override; diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h index 751fac411ce6..81054aba066f 100644 --- a/include/llvm/CodeGen/TargetSchedule.h +++ b/include/llvm/CodeGen/TargetSchedule.h @@ -81,6 +81,12 @@ public: return nullptr; } + /// \brief Return true if this machine model includes an instruction-level + /// scheduling model or cycle-to-cycle itinerary data. + bool hasInstrSchedModelOrItineraries() const { + return hasInstrSchedModel() || hasInstrItineraries(); + } + /// \brief Identify the processor corresponding to the current subtarget. unsigned getProcessorID() const { return SchedModel.getProcessorID(); } diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h index e1a9fd38290b..929eb88a0393 100644 --- a/include/llvm/CodeGen/ValueTypes.h +++ b/include/llvm/CodeGen/ValueTypes.h @@ -89,6 +89,19 @@ namespace llvm { return VecTy; } + /// Return the type converted to an equivalently sized integer or vector + /// with integer element type. Similar to changeVectorElementTypeToInteger, + /// but also handles scalars. + EVT changeTypeToInteger() { + if (isVector()) + return changeVectorElementTypeToInteger(); + + if (isSimple()) + return MVT::getIntegerVT(getSizeInBits()); + + return changeExtendedTypeToInteger(); + } + /// isSimple - Test if the given EVT is simple (as opposed to being /// extended). bool isSimple() const { @@ -151,6 +164,11 @@ namespace llvm { return isSimple() ? V.is1024BitVector() : isExtended1024BitVector(); } + /// is2048BitVector - Return true if this is a 2048-bit vector type. + bool is2048BitVector() const { + return isSimple() ? V.is2048BitVector() : isExtended2048BitVector(); + } + /// isOverloaded - Return true if this is an overloaded type for TableGen. bool isOverloaded() const { return (V==MVT::iAny || V==MVT::fAny || V==MVT::vAny || V==MVT::iPTRAny); @@ -342,6 +360,7 @@ namespace llvm { // Methods for handling the Extended-type case in functions above. // These are all out-of-line to prevent users of this header file // from having a dependency on Type.h. + EVT changeExtendedTypeToInteger() const; EVT changeExtendedVectorElementTypeToInteger() const; static EVT getExtendedIntegerVT(LLVMContext &C, unsigned BitWidth); static EVT getExtendedVectorVT(LLVMContext &C, EVT VT, @@ -356,6 +375,7 @@ namespace llvm { bool isExtended256BitVector() const LLVM_READONLY; bool isExtended512BitVector() const LLVM_READONLY; bool isExtended1024BitVector() const LLVM_READONLY; + bool isExtended2048BitVector() const LLVM_READONLY; EVT getExtendedVectorElementType() const; unsigned getExtendedVectorNumElements() const LLVM_READONLY; unsigned getExtendedSizeInBits() const; diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td index 2b30f14f902c..f29ec42714e8 100644 --- a/include/llvm/CodeGen/ValueTypes.td +++ b/include/llvm/CodeGen/ValueTypes.td @@ -33,55 +33,70 @@ def f80 : ValueType<80 , 10>; // 80-bit floating point value def f128 : ValueType<128, 11>; // 128-bit floating point value def ppcf128: ValueType<128, 12>; // PPC 128-bit floating point value -def v2i1 : ValueType<2 , 13>; // 2 x i1 vector value -def v4i1 : ValueType<4 , 14>; // 4 x i1 vector value -def v8i1 : ValueType<8 , 15>; // 8 x i1 vector value -def v16i1 : ValueType<16, 16>; // 16 x i1 vector value -def v32i1 : ValueType<32 , 17>; // 32 x i1 vector value -def v64i1 : ValueType<64 , 18>; // 64 x i1 vector value -def v1i8 : ValueType<16, 19>; // 1 x i8 vector value -def v2i8 : ValueType<16 , 20>; // 2 x i8 vector value -def v4i8 : ValueType<32 , 21>; // 4 x i8 vector value -def v8i8 : ValueType<64 , 22>; // 8 x i8 vector value -def v16i8 : ValueType<128, 23>; // 16 x i8 vector value -def v32i8 : ValueType<256, 24>; // 32 x i8 vector value -def v64i8 : ValueType<512, 25>; // 64 x i8 vector value -def v1i16 : ValueType<16 , 26>; // 1 x i16 vector value -def v2i16 : ValueType<32 , 27>; // 2 x i16 vector value -def v4i16 : ValueType<64 , 28>; // 4 x i16 vector value -def v8i16 : ValueType<128, 29>; // 8 x i16 vector value -def v16i16 : ValueType<256, 30>; // 16 x i16 vector value -def v32i16 : ValueType<512, 31>; // 32 x i16 vector value -def v1i32 : ValueType<32 , 32>; // 1 x i32 vector value -def v2i32 : ValueType<64 , 33>; // 2 x i32 vector value -def v4i32 : ValueType<128, 34>; // 4 x i32 vector value -def v8i32 : ValueType<256, 35>; // 8 x i32 vector value -def v16i32 : ValueType<512, 36>; // 16 x i32 vector value -def v1i64 : ValueType<64 , 37>; // 1 x i64 vector value -def v2i64 : ValueType<128, 38>; // 2 x i64 vector value -def v4i64 : ValueType<256, 39>; // 4 x i64 vector value -def v8i64 : ValueType<512, 40>; // 8 x i64 vector value -def v16i64 : ValueType<1024,41>; // 16 x i64 vector value -def v1i128 : ValueType<128, 42>; // 1 x i128 vector value +def v2i1 : ValueType<2 , 13>; // 2 x i1 vector value +def v4i1 : ValueType<4 , 14>; // 4 x i1 vector value +def v8i1 : ValueType<8 , 15>; // 8 x i1 vector value +def v16i1 : ValueType<16, 16>; // 16 x i1 vector value +def v32i1 : ValueType<32 , 17>; // 32 x i1 vector value +def v64i1 : ValueType<64 , 18>; // 64 x i1 vector value +def v512i1 : ValueType<512, 19>; // 512 x i8 vector value +def v1024i1: ValueType<1024,20>; //1024 x i8 vector value -def v2f16 : ValueType<32 , 43>; // 2 x f16 vector value -def v4f16 : ValueType<64 , 44>; // 4 x f16 vector value -def v8f16 : ValueType<128, 45>; // 8 x f16 vector value -def v1f32 : ValueType<32 , 46>; // 1 x f32 vector value -def v2f32 : ValueType<64 , 47>; // 2 x f32 vector value -def v4f32 : ValueType<128, 48>; // 4 x f32 vector value -def v8f32 : ValueType<256, 49>; // 8 x f32 vector value -def v16f32 : ValueType<512, 50>; // 16 x f32 vector value -def v1f64 : ValueType<64, 51>; // 1 x f64 vector value -def v2f64 : ValueType<128, 52>; // 2 x f64 vector value -def v4f64 : ValueType<256, 53>; // 4 x f64 vector value -def v8f64 : ValueType<512, 54>; // 8 x f64 vector value +def v1i8 : ValueType<16, 21>; // 1 x i8 vector value +def v2i8 : ValueType<16 , 22>; // 2 x i8 vector value +def v4i8 : ValueType<32 , 23>; // 4 x i8 vector value +def v8i8 : ValueType<64 , 24>; // 8 x i8 vector value +def v16i8 : ValueType<128, 25>; // 16 x i8 vector value +def v32i8 : ValueType<256, 26>; // 32 x i8 vector value +def v64i8 : ValueType<512, 27>; // 64 x i8 vector value +def v128i8 : ValueType<1024,28>; //128 x i8 vector value +def v256i8 : ValueType<2048,29>; //256 x i8 vector value + +def v1i16 : ValueType<16 , 30>; // 1 x i16 vector value +def v2i16 : ValueType<32 , 31>; // 2 x i16 vector value +def v4i16 : ValueType<64 , 32>; // 4 x i16 vector value +def v8i16 : ValueType<128, 33>; // 8 x i16 vector value +def v16i16 : ValueType<256, 34>; // 16 x i16 vector value +def v32i16 : ValueType<512, 35>; // 32 x i16 vector value +def v64i16 : ValueType<1024,36>; // 64 x i16 vector value +def v128i16: ValueType<2048,37>; //128 x i16 vector value + +def v1i32 : ValueType<32 , 38>; // 1 x i32 vector value +def v2i32 : ValueType<64 , 39>; // 2 x i32 vector value +def v4i32 : ValueType<128, 40>; // 4 x i32 vector value +def v8i32 : ValueType<256, 41>; // 8 x i32 vector value +def v16i32 : ValueType<512, 42>; // 16 x i32 vector value +def v32i32 : ValueType<1024,43>; // 32 x i32 vector value +def v64i32 : ValueType<2048,44>; // 32 x i32 vector value + +def v1i64 : ValueType<64 , 45>; // 1 x i64 vector value +def v2i64 : ValueType<128, 46>; // 2 x i64 vector value +def v4i64 : ValueType<256, 47>; // 4 x i64 vector value +def v8i64 : ValueType<512, 48>; // 8 x i64 vector value +def v16i64 : ValueType<1024,49>; // 16 x i64 vector value +def v32i64 : ValueType<2048,50>; // 32 x i64 vector value + +def v1i128 : ValueType<128, 51>; // 1 x i128 vector value + +def v2f16 : ValueType<32 , 52>; // 2 x f16 vector value +def v4f16 : ValueType<64 , 53>; // 4 x f16 vector value +def v8f16 : ValueType<128, 54>; // 8 x f16 vector value +def v1f32 : ValueType<32 , 55>; // 1 x f32 vector value +def v2f32 : ValueType<64 , 56>; // 2 x f32 vector value +def v4f32 : ValueType<128, 57>; // 4 x f32 vector value +def v8f32 : ValueType<256, 58>; // 8 x f32 vector value +def v16f32 : ValueType<512, 59>; // 16 x f32 vector value +def v1f64 : ValueType<64, 60>; // 1 x f64 vector value +def v2f64 : ValueType<128, 61>; // 2 x f64 vector value +def v4f64 : ValueType<256, 62>; // 4 x f64 vector value +def v8f64 : ValueType<512, 63>; // 8 x f64 vector value -def x86mmx : ValueType<64 , 55>; // X86 MMX value -def FlagVT : ValueType<0 , 56>; // Pre-RA sched glue -def isVoid : ValueType<0 , 57>; // Produces no value -def untyped: ValueType<8 , 58>; // Produces an untyped value +def x86mmx : ValueType<64 , 64>; // X86 MMX value +def FlagVT : ValueType<0 , 65>; // Pre-RA sched glue +def isVoid : ValueType<0 , 66>; // Produces no value +def untyped: ValueType<8 , 67>; // Produces an untyped value +def token : ValueType<0 , 249>; // TokenTy def MetadataVT: ValueType<0, 250>; // Metadata // Pseudo valuetype mapped to the current pointer size to any address space. diff --git a/include/llvm/CodeGen/WinEHFuncInfo.h b/include/llvm/CodeGen/WinEHFuncInfo.h index 75638a058a30..70d558f5cfbd 100644 --- a/include/llvm/CodeGen/WinEHFuncInfo.h +++ b/include/llvm/CodeGen/WinEHFuncInfo.h @@ -14,145 +14,103 @@ #ifndef LLVM_CODEGEN_WINEHFUNCINFO_H #define LLVM_CODEGEN_WINEHFUNCINFO_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/TinyPtrVector.h" -#include "llvm/ADT/DenseMap.h" namespace llvm { +class AllocaInst; class BasicBlock; +class CatchReturnInst; class Constant; class Function; class GlobalVariable; class InvokeInst; class IntrinsicInst; class LandingPadInst; +class MCExpr; class MCSymbol; +class MachineBasicBlock; class Value; -enum ActionType { Catch, Cleanup }; +// The following structs respresent the .xdata tables for various +// Windows-related EH personalities. -class ActionHandler { -public: - ActionHandler(BasicBlock *BB, ActionType Type) - : StartBB(BB), Type(Type), EHState(-1), HandlerBlockOrFunc(nullptr) {} +typedef PointerUnion MBBOrBasicBlock; - ActionType getType() const { return Type; } - BasicBlock *getStartBlock() const { return StartBB; } - - bool hasBeenProcessed() { return HandlerBlockOrFunc != nullptr; } - - void setHandlerBlockOrFunc(Constant *F) { HandlerBlockOrFunc = F; } - Constant *getHandlerBlockOrFunc() { return HandlerBlockOrFunc; } - - void setEHState(int State) { EHState = State; } - int getEHState() const { return EHState; } - -private: - BasicBlock *StartBB; - ActionType Type; - int EHState; - - // Can be either a BlockAddress or a Function depending on the EH personality. - Constant *HandlerBlockOrFunc; -}; - -class CatchHandler : public ActionHandler { -public: - CatchHandler(BasicBlock *BB, Constant *Selector, BasicBlock *NextBB) - : ActionHandler(BB, ActionType::Catch), Selector(Selector), - NextBB(NextBB), ExceptionObjectVar(nullptr), - ExceptionObjectIndex(-1) {} - - // Method for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const ActionHandler *H) { - return H->getType() == ActionType::Catch; - } - - Constant *getSelector() const { return Selector; } - BasicBlock *getNextBB() const { return NextBB; } - - const Value *getExceptionVar() { return ExceptionObjectVar; } - TinyPtrVector &getReturnTargets() { return ReturnTargets; } - - void setExceptionVar(const Value *Val) { ExceptionObjectVar = Val; } - void setExceptionVarIndex(int Index) { ExceptionObjectIndex = Index; } - int getExceptionVarIndex() const { return ExceptionObjectIndex; } - void setReturnTargets(TinyPtrVector &Targets) { - ReturnTargets = Targets; - } - -private: - Constant *Selector; - BasicBlock *NextBB; - // While catch handlers are being outlined the ExceptionObjectVar field will - // be populated with the instruction in the parent frame that corresponds - // to the exception object (or nullptr if the catch does not use an - // exception object) and the ExceptionObjectIndex field will be -1. - // When the parseEHActions function is called to populate a vector of - // instances of this class, the ExceptionObjectVar field will be nullptr - // and the ExceptionObjectIndex will be the index of the exception object in - // the parent function's localescape block. - const Value *ExceptionObjectVar; - int ExceptionObjectIndex; - TinyPtrVector ReturnTargets; -}; - -class CleanupHandler : public ActionHandler { -public: - CleanupHandler(BasicBlock *BB) : ActionHandler(BB, ActionType::Cleanup) {} - - // Method for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const ActionHandler *H) { - return H->getType() == ActionType::Cleanup; - } -}; - -void parseEHActions(const IntrinsicInst *II, - SmallVectorImpl> &Actions); - -// The following structs respresent the .xdata for functions using C++ -// exceptions on Windows. - -struct WinEHUnwindMapEntry { +struct CxxUnwindMapEntry { int ToState; - Function *Cleanup; + MBBOrBasicBlock Cleanup; +}; + +/// Similar to CxxUnwindMapEntry, but supports SEH filters. +struct SEHUnwindMapEntry { + /// If unwinding continues through this handler, transition to the handler at + /// this state. This indexes into SEHUnwindMap. + int ToState = -1; + + bool IsFinally = false; + + /// Holds the filter expression function. + const Function *Filter = nullptr; + + /// Holds the __except or __finally basic block. + MBBOrBasicBlock Handler; }; struct WinEHHandlerType { int Adjectives; + /// The CatchObj starts out life as an LLVM alloca and is eventually turned + /// frame index. + union { + const AllocaInst *Alloca; + int FrameIndex; + } CatchObj = {}; GlobalVariable *TypeDescriptor; - int CatchObjRecoverIdx; - Function *Handler; + MBBOrBasicBlock Handler; }; struct WinEHTryBlockMapEntry { - int TryLow; - int TryHigh; + int TryLow = -1; + int TryHigh = -1; + int CatchHigh = -1; SmallVector HandlerArray; }; +enum class ClrHandlerType { Catch, Finally, Fault, Filter }; + +struct ClrEHUnwindMapEntry { + MBBOrBasicBlock Handler; + uint32_t TypeToken; + int Parent; + ClrHandlerType HandlerType; +}; + struct WinEHFuncInfo { - DenseMap RootLPad; - DenseMap LastInvoke; - DenseMap HandlerEnclosedState; - DenseMap LastInvokeVisited; - DenseMap LandingPadStateMap; - DenseMap CatchHandlerParentFrameObjIdx; - DenseMap CatchHandlerParentFrameObjOffset; - DenseMap CatchHandlerMaxState; - DenseMap HandlerBaseState; - SmallVector UnwindMap; + DenseMap EHPadStateMap; + DenseMap FuncletBaseStateMap; + DenseMap InvokeStateMap; + DenseMap + CatchRetSuccessorColorMap; + DenseMap> LabelToStateMap; + SmallVector CxxUnwindMap; SmallVector TryBlockMap; - SmallVector, 4> IPToStateList; + SmallVector SEHUnwindMap; + SmallVector ClrEHUnwindMap; int UnwindHelpFrameIdx = INT_MAX; - int UnwindHelpFrameOffset = -1; - unsigned NumIPToStateFuncsVisited = 0; + int PSPSymFrameIdx = INT_MAX; - /// localescape index of the 32-bit EH registration node. Set by - /// WinEHStatePass and used indirectly by SEH filter functions of the parent. - int EHRegNodeEscapeIndex = INT_MAX; + int getLastStateNumber() const { return CxxUnwindMap.size() - 1; } - WinEHFuncInfo() {} + void addIPToStateRange(const InvokeInst *II, MCSymbol *InvokeBegin, + MCSymbol *InvokeEnd); + + int EHRegNodeFrameIndex = INT_MAX; + int EHRegNodeEndOffset = INT_MAX; + int SEHSetFrameOffset = INT_MAX; + + WinEHFuncInfo(); }; /// Analyze the IR in ParentFn and it's handlers to build WinEHFuncInfo, which @@ -161,5 +119,12 @@ struct WinEHFuncInfo { void calculateWinCXXEHStateNumbers(const Function *ParentFn, WinEHFuncInfo &FuncInfo); +void calculateSEHStateNumbers(const Function *ParentFn, + WinEHFuncInfo &FuncInfo); + +void calculateClrEHStateNumbers(const Function *Fn, WinEHFuncInfo &FuncInfo); + +void calculateCatchReturnSuccessorColors(const Function *Fn, + WinEHFuncInfo &FuncInfo); } #endif // LLVM_CODEGEN_WINEHFUNCINFO_H diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake index b9fd4504ad76..6a5ac889e6f0 100644 --- a/include/llvm/Config/config.h.cmake +++ b/include/llvm/Config/config.h.cmake @@ -324,9 +324,6 @@ /* Define to 1 if you have the header file. */ #cmakedefine HAVE_SYS_UIO_H ${HAVE_SYS_UIO_H} -/* Define to 1 if you have that is POSIX.1 compatible. */ -#cmakedefine HAVE_SYS_WAIT_H ${HAVE_SYS_WAIT_H} - /* Define if the setupterm() function is supported this platform. */ #cmakedefine HAVE_TERMINFO ${HAVE_TERMINFO} @@ -423,8 +420,10 @@ /* Installation directory for data files */ #cmakedefine LLVM_DATADIR "${LLVM_DATADIR}" -/* Target triple LLVM will generate code for by default */ -#cmakedefine LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}" +/* Target triple LLVM will generate code for by default + * Doesn't use `cmakedefine` because it is allowed to be empty. + */ +#define LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}" /* Installation directory for documentation */ #cmakedefine LLVM_DOCSDIR "${LLVM_DOCSDIR}" diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in index 09706499ea30..498aa9ee6b5d 100644 --- a/include/llvm/Config/config.h.in +++ b/include/llvm/Config/config.h.in @@ -152,6 +152,9 @@ /* Define to 1 if you have the `shell32' library (-lshell32). */ #undef HAVE_LIBSHELL32 +/* Define to 1 if you have the `uuid' library (-luuid). */ +#undef HAVE_LIBUUID + /* Define to 1 if you have the `z' library (-lz). */ #undef HAVE_LIBZ @@ -229,9 +232,6 @@ /* Have pthread_rwlock_init */ #undef HAVE_PTHREAD_RWLOCK_INIT -/* Define to 1 if srand48/lrand48/drand48 exist in */ -#undef HAVE_RAND48 - /* Define to 1 if you have the `realpath' function. */ #undef HAVE_REALPATH @@ -250,15 +250,9 @@ /* Define to 1 if you have the `setrlimit' function. */ #undef HAVE_SETRLIMIT -/* Define to 1 if you have the `siglongjmp' function. */ -#undef HAVE_SIGLONGJMP - /* Define to 1 if you have the header file. */ #undef HAVE_SIGNAL_H -/* Define to 1 if you have the `sigsetjmp' function. */ -#undef HAVE_SIGSETJMP - /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H @@ -318,9 +312,6 @@ /* Define to 1 if you have the header file. */ #undef HAVE_SYS_UIO_H -/* Define to 1 if you have that is POSIX.1 compatible. */ -#undef HAVE_SYS_WAIT_H - /* Define if the setupterm() function is supported this platform. */ #undef HAVE_TERMINFO @@ -333,9 +324,6 @@ /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H -/* Define to 1 if you have the header file. */ -#undef HAVE_UTIME_H - /* Define to 1 if the system has the type `u_int64_t'. */ #undef HAVE_U_INT64_T diff --git a/include/llvm/DebugInfo/CodeView/CodeView.h b/include/llvm/DebugInfo/CodeView/CodeView.h new file mode 100644 index 000000000000..7728120d68de --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/CodeView.h @@ -0,0 +1,367 @@ +//===- CodeView.h -----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_CODEVIEW_H +#define LLVM_DEBUGINFO_CODEVIEW_CODEVIEW_H + +#include + +namespace llvm { +namespace codeview { + +enum class CallingConvention : uint8_t { + NearC = 0x00, // near right to left push, caller pops stack + FarC = 0x01, // far right to left push, caller pops stack + NearPascal = 0x02, // near left to right push, callee pops stack + FarPascal = 0x03, // far left to right push, callee pops stack + NearFast = 0x04, // near left to right push with regs, callee pops stack + FarFast = 0x05, // far left to right push with regs, callee pops stack + NearStdCall = 0x07, // near standard call + FarStdCall = 0x08, // far standard call + NearSysCall = 0x09, // near sys call + FarSysCall = 0x0a, // far sys call + ThisCall = 0x0b, // this call (this passed in register) + MipsCall = 0x0c, // Mips call + Generic = 0x0d, // Generic call sequence + AlphaCall = 0x0e, // Alpha call + PpcCall = 0x0f, // PPC call + SHCall = 0x10, // Hitachi SuperH call + ArmCall = 0x11, // ARM call + AM33Call = 0x12, // AM33 call + TriCall = 0x13, // TriCore Call + SH5Call = 0x14, // Hitachi SuperH-5 call + M32RCall = 0x15, // M32R Call + ClrCall = 0x16, // clr call + Inline = + 0x17, // Marker for routines always inlined and thus lacking a convention + NearVector = 0x18 // near left to right push with regs, callee pops stack +}; + +enum class ClassOptions : uint16_t { + None = 0x0000, + Packed = 0x0001, + HasConstructorOrDestructor = 0x0002, + HasOverloadedOperator = 0x0004, + Nested = 0x0008, + ContainsNestedClass = 0x0010, + HasOverloadedAssignmentOperator = 0x0020, + HasConversionOperator = 0x0040, + ForwardReference = 0x0080, + Scoped = 0x0100, + HasUniqueName = 0x0200, + Sealed = 0x0400, + Intrinsic = 0x2000 +}; + +inline ClassOptions operator|(ClassOptions a, ClassOptions b) { + return static_cast(static_cast(a) | + static_cast(b)); +} + +inline ClassOptions operator&(ClassOptions a, ClassOptions b) { + return static_cast(static_cast(a) & + static_cast(b)); +} + +inline ClassOptions operator~(ClassOptions a) { + return static_cast(~static_cast(a)); +} + +enum class FrameProcedureOptions : uint32_t { + None = 0x00000000, + HasAlloca = 0x00000001, + HasSetJmp = 0x00000002, + HasLongJmp = 0x00000004, + HasInlineAssembly = 0x00000008, + HasExceptionHandling = 0x00000010, + MarkedInline = 0x00000020, + HasStructuredExceptionHandling = 0x00000040, + Naked = 0x00000080, + SecurityChecks = 0x00000100, + AsynchronousExceptionHandling = 0x00000200, + NoStackOrderingForSecurityChecks = 0x00000400, + Inlined = 0x00000800, + StrictSecurityChecks = 0x00001000, + SafeBuffers = 0x00002000, + ProfileGuidedOptimization = 0x00040000, + ValidProfileCounts = 0x00080000, + OptimizedForSpeed = 0x00100000, + GuardCfg = 0x00200000, + GuardCfw = 0x00400000 +}; + +inline FrameProcedureOptions operator|(FrameProcedureOptions a, + FrameProcedureOptions b) { + return static_cast(static_cast(a) | + static_cast(b)); +} + +inline FrameProcedureOptions operator&(FrameProcedureOptions a, + FrameProcedureOptions b) { + return static_cast(static_cast(a) & + static_cast(b)); +} + +inline FrameProcedureOptions operator~(FrameProcedureOptions a) { + return static_cast(~static_cast(a)); +} + +enum class FunctionOptions : uint8_t { + None = 0x00, + CxxReturnUdt = 0x01, + Constructor = 0x02, + ConstructorWithVirtualBases = 0x04 +}; + +inline FunctionOptions operator|(FunctionOptions a, FunctionOptions b) { + return static_cast(static_cast(a) | + static_cast(b)); +} + +inline FunctionOptions operator&(FunctionOptions a, FunctionOptions b) { + return static_cast(static_cast(a) & + static_cast(b)); +} + +inline FunctionOptions operator~(FunctionOptions a) { + return static_cast(~static_cast(a)); +} + +enum class HfaKind : uint8_t { + None = 0x00, + Float = 0x01, + Double = 0x02, + Other = 0x03 +}; + +enum class MemberAccess : uint8_t { + None = 0, + Private = 1, + Protected = 2, + Public = 3 +}; + +enum class MethodKind : uint8_t { + Vanilla = 0x00, + Virtual = 0x01, + Static = 0x02, + Friend = 0x03, + IntroducingVirtual = 0x04, + PureVirtual = 0x05, + PureIntroducingVirtual = 0x06 +}; + +enum class MethodOptions : uint16_t { + None = 0x0000, + Pseudo = 0x0020, + CompilerGenerated = 0x0100, + Sealed = 0x0200 +}; + +inline MethodOptions operator|(MethodOptions a, MethodOptions b) { + return static_cast(static_cast(a) | + static_cast(b)); +} + +inline MethodOptions operator&(MethodOptions a, MethodOptions b) { + return static_cast(static_cast(a) & + static_cast(b)); +} + +inline MethodOptions operator~(MethodOptions a) { + return static_cast(~static_cast(a)); +} + +enum class ModifierOptions : uint16_t { + None = 0x0000, + Const = 0x0001, + Volatile = 0x0002, + Unaligned = 0x0004 +}; + +inline ModifierOptions operator|(ModifierOptions a, ModifierOptions b) { + return static_cast(static_cast(a) | + static_cast(b)); +} + +inline ModifierOptions operator&(ModifierOptions a, ModifierOptions b) { + return static_cast(static_cast(a) & + static_cast(b)); +} + +inline ModifierOptions operator~(ModifierOptions a) { + return static_cast(~static_cast(a)); +} + +enum class ModuleSubstreamKind : uint32_t { + Symbols = 0xf1, + Lines = 0xf2, + StringTable = 0xf3, + FileChecksums = 0xf4, + FrameData = 0xf5, + InlineeLines = 0xf6, + CrossScopeImports = 0xf7, + CrossScopeExports = 0xf8 +}; + +enum class PointerKind : uint8_t { + Near16 = 0x00, // 16 bit pointer + Far16 = 0x01, // 16:16 far pointer + Huge16 = 0x02, // 16:16 huge pointer + BasedOnSegment = 0x03, // based on segment + BasedOnValue = 0x04, // based on value of base + BasedOnSegmentValue = 0x05, // based on segment value of base + BasedOnAddress = 0x06, // based on address of base + BasedOnSegmentAddress = 0x07, // based on segment address of base + BasedOnType = 0x08, // based on type + BasedOnSelf = 0x09, // based on self + Near32 = 0x0a, // 32 bit pointer + Far32 = 0x0b, // 16:32 pointer + Near64 = 0x0c // 64 bit pointer +}; + +enum class PointerMode : uint8_t { + Pointer = 0x00, // "normal" pointer + LValueReference = 0x01, // "old" reference + PointerToDataMember = 0x02, // pointer to data member + PointerToMemberFunction = 0x03, // pointer to member function + RValueReference = 0x04 // r-value reference +}; + +enum class PointerOptions : uint32_t { + None = 0x00000000, + Flat32 = 0x00000100, + Volatile = 0x00000200, + Const = 0x00000400, + Unaligned = 0x00000800, + Restrict = 0x00001000, + WinRTSmartPointer = 0x00080000 +}; + +inline PointerOptions operator|(PointerOptions a, PointerOptions b) { + return static_cast(static_cast(a) | + static_cast(b)); +} + +inline PointerOptions operator&(PointerOptions a, PointerOptions b) { + return static_cast(static_cast(a) & + static_cast(b)); +} + +inline PointerOptions operator~(PointerOptions a) { + return static_cast(~static_cast(a)); +} + +enum class PointerToMemberRepresentation : uint16_t { + Unknown = 0x00, // not specified (pre VC8) + SingleInheritanceData = 0x01, // member data, single inheritance + MultipleInheritanceData = 0x02, // member data, multiple inheritance + VirtualInheritanceData = 0x03, // member data, virtual inheritance + GeneralData = 0x04, // member data, most general + SingleInheritanceFunction = 0x05, // member function, single inheritance + MultipleInheritanceFunction = 0x06, // member function, multiple inheritance + VirtualInheritanceFunction = 0x07, // member function, virtual inheritance + GeneralFunction = 0x08 // member function, most general +}; + +enum class TypeRecordKind : uint16_t { + None = 0, + + VirtualTableShape = 0x000a, + Label = 0x000e, + EndPrecompiledHeader = 0x0014, + + Modifier = 0x1001, + Pointer = 0x1002, + Procedure = 0x1008, + MemberFunction = 0x1009, + + Oem = 0x100f, + Oem2 = 0x1011, + + ArgumentList = 0x1201, + FieldList = 0x1203, + BitField = 0x1205, + MethodList = 0x1206, + + BaseClass = 0x1400, + VirtualBaseClass = 0x1401, + IndirectVirtualBaseClass = 0x1402, + Index = 0x1404, + VirtualFunctionTablePointer = 0x1409, + + Enumerate = 0x1502, + Array = 0x1503, + Class = 0x1504, + Structure = 0x1505, + Union = 0x1506, + Enum = 0x1507, + Alias = 0x150a, + Member = 0x150d, + StaticMember = 0x150e, + Method = 0x150f, + NestedType = 0x1510, + OneMethod = 0x1511, + VirtualFunctionTable = 0x151d, + + FunctionId = 0x1601, + MemberFunctionId = 0x1602, + BuildInfo = 0x1603, + SubstringList = 0x1604, + StringId = 0x1605, + UdtSourceLine = 0x1606, + + SByte = 0x8000, + Int16 = 0x8001, + UInt16 = 0x8002, + Int32 = 0x8003, + UInt32 = 0x8004, + Single = 0x8005, + Double = 0x8006, + Float80 = 0x8007, + Float128 = 0x8008, + Int64 = 0x8009, + UInt64 = 0x800a, + Float48 = 0x800b, + Complex32 = 0x800c, + Complex64 = 0x800d, + Complex80 = 0x800e, + Complex128 = 0x800f, + VarString = 0x8010, + + Int128 = 0x8017, + UInt128 = 0x8018, + + Decimal = 0x8019, + Date = 0x801a, + Utf8String = 0x801b, + + Float16 = 0x801c +}; + +enum class VirtualTableSlotKind : uint8_t { + Near16 = 0x00, + Far16 = 0x01, + This = 0x02, + Outer = 0x03, + Meta = 0x04, + Near = 0x05, + Far = 0x06 +}; + +enum class WindowsRTClassKind : uint8_t { + None = 0x00, + RefClass = 0x01, + ValueClass = 0x02, + Interface = 0x03 +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/CodeViewOStream.h b/include/llvm/DebugInfo/CodeView/CodeViewOStream.h new file mode 100644 index 000000000000..14d057a249a5 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/CodeViewOStream.h @@ -0,0 +1,39 @@ +//===- CodeViewOStream.h ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_CODEVIEWOSTREAM_H +#define LLVM_DEBUGINFO_CODEVIEW_CODEVIEWOSTREAM_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" + +namespace llvm { +namespace codeview { + +template class CodeViewOStream { +private: + CodeViewOStream(const CodeViewOStream &) = delete; + CodeViewOStream &operator=(const CodeViewOStream &) = delete; + +public: + typedef typename Writer::LabelType LabelType; + +public: + explicit CodeViewOStream(Writer &W); + +private: + uint64_t size() const { return W.tell(); } + +private: + Writer &W; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/FieldListRecordBuilder.h b/include/llvm/DebugInfo/CodeView/FieldListRecordBuilder.h new file mode 100644 index 000000000000..1ed62487aecc --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/FieldListRecordBuilder.h @@ -0,0 +1,78 @@ +//===- FieldListRecordBuilder.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_FIELDLISTRECORDBUILDER_H +#define LLVM_DEBUGINFO_CODEVIEW_FIELDLISTRECORDBUILDER_H + +#include "llvm/DebugInfo/CodeView/ListRecordBuilder.h" + +namespace llvm { +namespace codeview { + +class MethodInfo { +public: + MethodInfo() : Access(), Kind(), Options(), Type(), VTableSlotOffset(-1) {} + + MethodInfo(MemberAccess Access, MethodKind Kind, MethodOptions Options, + TypeIndex Type, int32_t VTableSlotOffset) + : Access(Access), Kind(Kind), Options(Options), Type(Type), + VTableSlotOffset(VTableSlotOffset) {} + + MemberAccess getAccess() const { return Access; } + MethodKind getKind() const { return Kind; } + MethodOptions getOptions() const { return Options; } + TypeIndex getType() const { return Type; } + int32_t getVTableSlotOffset() const { return VTableSlotOffset; } + +private: + MemberAccess Access; + MethodKind Kind; + MethodOptions Options; + TypeIndex Type; + int32_t VTableSlotOffset; +}; + +class FieldListRecordBuilder : public ListRecordBuilder { +private: + FieldListRecordBuilder(const FieldListRecordBuilder &) = delete; + void operator=(const FieldListRecordBuilder &) = delete; + +public: + FieldListRecordBuilder(); + + void writeBaseClass(MemberAccess Access, TypeIndex Type, uint64_t Offset); + void writeEnumerate(MemberAccess Access, uint64_t Value, StringRef Name); + void writeIndirectVirtualBaseClass(MemberAccess Access, TypeIndex Type, + TypeIndex VirtualBasePointerType, + int64_t VirtualBasePointerOffset, + uint64_t SlotIndex); + void writeMember(MemberAccess Access, TypeIndex Type, uint64_t Offset, + StringRef Name); + void writeOneMethod(MemberAccess Access, MethodKind Kind, + MethodOptions Options, TypeIndex Type, + int32_t VTableSlotOffset, StringRef Name); + void writeOneMethod(const MethodInfo &Method, StringRef Name); + void writeMethod(uint16_t OverloadCount, TypeIndex MethodList, + StringRef Name); + void writeNestedType(TypeIndex Type, StringRef Name); + void writeStaticMember(MemberAccess Access, TypeIndex Type, StringRef Name); + void writeVirtualBaseClass(MemberAccess Access, TypeIndex Type, + TypeIndex VirtualBasePointerType, + int64_t VirtualBasePointerOffset, + uint64_t SlotIndex); + void writeVirtualBaseClass(TypeRecordKind Kind, MemberAccess Access, + TypeIndex Type, TypeIndex VirtualBasePointerType, + int64_t VirtualBasePointerOffset, + uint64_t SlotIndex); + void writeVirtualFunctionTablePointer(TypeIndex Type); +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/FunctionId.h b/include/llvm/DebugInfo/CodeView/FunctionId.h new file mode 100644 index 000000000000..1af3da810b5a --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/FunctionId.h @@ -0,0 +1,56 @@ +//===- FunctionId.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_FUNCTIONID_H +#define LLVM_DEBUGINFO_CODEVIEW_FUNCTIONID_H + +#include + +namespace llvm { +namespace codeview { + +class FunctionId { +public: + FunctionId() : Index(0) {} + + explicit FunctionId(uint32_t Index) : Index(Index) {} + + uint32_t getIndex() const { return Index; } + +private: + uint32_t Index; +}; + +inline bool operator==(const FunctionId &A, const FunctionId &B) { + return A.getIndex() == B.getIndex(); +} + +inline bool operator!=(const FunctionId &A, const FunctionId &B) { + return A.getIndex() != B.getIndex(); +} + +inline bool operator<(const FunctionId &A, const FunctionId &B) { + return A.getIndex() < B.getIndex(); +} + +inline bool operator<=(const FunctionId &A, const FunctionId &B) { + return A.getIndex() <= B.getIndex(); +} + +inline bool operator>(const FunctionId &A, const FunctionId &B) { + return A.getIndex() > B.getIndex(); +} + +inline bool operator>=(const FunctionId &A, const FunctionId &B) { + return A.getIndex() >= B.getIndex(); +} +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/Line.h b/include/llvm/DebugInfo/CodeView/Line.h new file mode 100644 index 000000000000..a7cdbdaac32f --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/Line.h @@ -0,0 +1,124 @@ +//===- Line.h ---------------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_LINE_H +#define LLVM_DEBUGINFO_CODEVIEW_LINE_H + +#include + +namespace llvm { +namespace codeview { + +class LineInfo { +public: + static const uint32_t AlwaysStepIntoLineNumber = 0xfeefee; + static const uint32_t NeverStepIntoLineNumber = 0xf00f00; + +private: + static const uint32_t StartLineMask = 0x00ffffff; + static const uint32_t EndLineDeltaMask = 0x7f000000; + static const int EndLineDeltaShift = 24; + static const uint32_t StatementFlag = 0x80000000u; + +public: + LineInfo(uint32_t StartLine, uint32_t EndLine, bool IsStatement); + + uint32_t getStartLine() const { return LineData & StartLineMask; } + + uint32_t getLineDelta() const { + return (LineData & EndLineDeltaMask) >> EndLineDeltaShift; + } + + uint32_t getEndLine() const { return getStartLine() + getLineDelta(); } + + bool isStatement() const { return (LineData & StatementFlag) != 0; } + + uint32_t getRawData() const { return LineData; } + + bool isAlwaysStepInto() const { + return getStartLine() == AlwaysStepIntoLineNumber; + } + + bool isNeverStepInto() const { + return getStartLine() == NeverStepIntoLineNumber; + } + +private: + uint32_t LineData; +}; + +class ColumnInfo { +private: + static const uint32_t StartColumnMask = 0x0000ffffu; + static const uint32_t EndColumnMask = 0xffff0000u; + static const int EndColumnShift = 16; + +public: + ColumnInfo(uint16_t StartColumn, uint16_t EndColumn) { + ColumnData = + (static_cast(StartColumn) & StartColumnMask) | + ((static_cast(EndColumn) << EndColumnShift) & EndColumnMask); + } + + uint16_t getStartColumn() const { + return static_cast(ColumnData & StartColumnMask); + } + + uint16_t getEndColumn() const { + return static_cast((ColumnData & EndColumnMask) >> + EndColumnShift); + } + + uint32_t getRawData() const { return ColumnData; } + +private: + uint32_t ColumnData; +}; + +class Line { +private: + int32_t CodeOffset; + LineInfo LineInf; + ColumnInfo ColumnInf; + +public: + Line(int32_t CodeOffset, uint32_t StartLine, uint32_t EndLine, + uint16_t StartColumn, uint16_t EndColumn, bool IsStatement) + : CodeOffset(CodeOffset), LineInf(StartLine, EndLine, IsStatement), + ColumnInf(StartColumn, EndColumn) {} + + Line(int32_t CodeOffset, LineInfo LineInf, ColumnInfo ColumnInf) + : CodeOffset(CodeOffset), LineInf(LineInf), ColumnInf(ColumnInf) {} + + LineInfo getLineInfo() const { return LineInf; } + + ColumnInfo getColumnInfo() const { return ColumnInf; } + + int32_t getCodeOffset() const { return CodeOffset; } + + uint32_t getStartLine() const { return LineInf.getStartLine(); } + + uint32_t getLineDelta() const { return LineInf.getLineDelta(); } + + uint32_t getEndLine() const { return LineInf.getEndLine(); } + + uint16_t getStartColumn() const { return ColumnInf.getStartColumn(); } + + uint16_t getEndColumn() const { return ColumnInf.getEndColumn(); } + + bool isStatement() const { return LineInf.isStatement(); } + + bool isAlwaysStepInto() const { return LineInf.isAlwaysStepInto(); } + + bool isNeverStepInto() const { return LineInf.isNeverStepInto(); } +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/ListRecordBuilder.h b/include/llvm/DebugInfo/CodeView/ListRecordBuilder.h new file mode 100644 index 000000000000..df0a2e08a418 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/ListRecordBuilder.h @@ -0,0 +1,43 @@ +//===- ListRecordBuilder.h --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_LISTRECORDBUILDER_H +#define LLVM_DEBUGINFO_CODEVIEW_LISTRECORDBUILDER_H + +#include "llvm/DebugInfo/CodeView/TypeRecordBuilder.h" + +namespace llvm { +namespace codeview { + +class ListRecordBuilder { +private: + ListRecordBuilder(const ListRecordBuilder &) = delete; + ListRecordBuilder &operator=(const ListRecordBuilder &) = delete; + +protected: + const int MethodKindShift = 2; + + explicit ListRecordBuilder(TypeRecordKind Kind); + +public: + llvm::StringRef str() { return Builder.str(); } + +protected: + void finishSubRecord(); + + TypeRecordBuilder &getBuilder() { return Builder; } + +private: + TypeRecordBuilder Builder; + SmallVector ContinuationOffsets; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h new file mode 100644 index 000000000000..5bfe2a068672 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h @@ -0,0 +1,68 @@ +//===- MemoryTypeTableBuilder.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_MEMORYTYPETABLEBUILDER_H +#define LLVM_DEBUGINFO_CODEVIEW_MEMORYTYPETABLEBUILDER_H + +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" +#include +#include +#include +#include + +namespace llvm { +namespace codeview { + +class MemoryTypeTableBuilder : public TypeTableBuilder { +public: + class Record { + public: + explicit Record(llvm::StringRef RData); + + const char *data() const { return Data.get(); } + uint16_t size() const { return Size; } + + private: + uint16_t Size; + std::unique_ptr Data; + }; + +private: + class RecordHash : std::unary_function { + public: + size_t operator()(llvm::StringRef Val) const { + return static_cast(llvm::hash_value(Val)); + } + }; + +public: + MemoryTypeTableBuilder() {} + + template void ForEachRecord(TFunc Func) { + uint32_t Index = TypeIndex::FirstNonSimpleIndex; + + for (const std::unique_ptr &R : Records) { + Func(TypeIndex(Index), R.get()); + ++Index; + } + } + +private: + virtual TypeIndex writeRecord(llvm::StringRef Data) override; + +private: + std::vector> Records; + std::unordered_map HashedRecords; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/MethodListRecordBuilder.h b/include/llvm/DebugInfo/CodeView/MethodListRecordBuilder.h new file mode 100644 index 000000000000..faa404d41b1f --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/MethodListRecordBuilder.h @@ -0,0 +1,35 @@ +//===- MethodListRecordBuilder.h --------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_METHODLISTRECORDBUILDER_H +#define LLVM_DEBUGINFO_CODEVIEW_METHODLISTRECORDBUILDER_H + +#include "llvm/DebugInfo/CodeView/ListRecordBuilder.h" + +namespace llvm { +namespace codeview { + +class MethodInfo; + +class MethodListRecordBuilder : public ListRecordBuilder { +private: + MethodListRecordBuilder(const MethodListRecordBuilder &) = delete; + MethodListRecordBuilder &operator=(const MethodListRecordBuilder &) = delete; + +public: + MethodListRecordBuilder(); + + void writeMethod(MemberAccess Access, MethodKind Kind, MethodOptions Options, + TypeIndex Type, int32_t VTableSlotOffset); + void writeMethod(const MethodInfo &Method); +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/TypeIndex.h b/include/llvm/DebugInfo/CodeView/TypeIndex.h new file mode 100644 index 000000000000..d3a541be4c62 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/TypeIndex.h @@ -0,0 +1,176 @@ +//===- TypeIndex.h ----------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEINDEX_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPEINDEX_H + +#include +#include + +namespace llvm { +namespace codeview { + +enum class SimpleTypeKind : uint32_t { + None = 0x0000, // uncharacterized type (no type) + Void = 0x0003, // void + NotTranslated = 0x0007, // type not translated by cvpack + HResult = 0x0008, // OLE/COM HRESULT + + SignedCharacter = 0x0010, // 8 bit signed + UnsignedCharacter = 0x0020, // 8 bit unsigned + NarrowCharacter = 0x0070, // really a char + WideCharacter = 0x0071, // wide char + + SByte = 0x0068, // 8 bit signed int + Byte = 0x0069, // 8 bit unsigned int + Int16Short = 0x0011, // 16 bit signed + UInt16Short = 0x0021, // 16 bit unsigned + Int16 = 0x0072, // 16 bit signed int + UInt16 = 0x0073, // 16 bit unsigned int + Int32Long = 0x0012, // 32 bit signed + UInt32Long = 0x0022, // 32 bit unsigned + Int32 = 0x0074, // 32 bit signed int + UInt32 = 0x0075, // 32 bit unsigned int + Int64Quad = 0x0013, // 64 bit signed + UInt64Quad = 0x0023, // 64 bit unsigned + Int64 = 0x0076, // 64 bit signed int + UInt64 = 0x0077, // 64 bit unsigned int + Int128 = 0x0078, // 128 bit signed int + UInt128 = 0x0079, // 128 bit unsigned int + + Float16 = 0x0046, // 16 bit real + Float32 = 0x0040, // 32 bit real + Float32PartialPrecision = 0x0045, // 32 bit PP real + Float48 = 0x0044, // 48 bit real + Float64 = 0x0041, // 64 bit real + Float80 = 0x0042, // 80 bit real + Float128 = 0x0043, // 128 bit real + + Complex32 = 0x0050, // 32 bit complex + Complex64 = 0x0051, // 64 bit complex + Complex80 = 0x0052, // 80 bit complex + Complex128 = 0x0053, // 128 bit complex + + Boolean8 = 0x0030, // 8 bit boolean + Boolean16 = 0x0031, // 16 bit boolean + Boolean32 = 0x0032, // 32 bit boolean + Boolean64 = 0x0033 // 64 bit boolean +}; + +enum class SimpleTypeMode : uint32_t { + Direct = 0x00000000, // Not a pointer + NearPointer = 0x00000100, // Near pointer + FarPointer = 0x00000200, // Far pointer + HugePointer = 0x00000300, // Huge pointer + NearPointer32 = 0x00000400, // 32 bit near pointer + FarPointer32 = 0x00000500, // 32 bit far pointer + NearPointer64 = 0x00000600, // 64 bit near pointer + NearPointer128 = 0x00000700 // 128 bit near pointer +}; + +class TypeIndex { +public: + static const uint32_t FirstNonSimpleIndex = 0x1000; + static const uint32_t SimpleKindMask = 0x000000ff; + static const uint32_t SimpleModeMask = 0x00000700; + +public: + TypeIndex() : Index(0) {} + explicit TypeIndex(uint32_t Index) : Index(Index) {} + explicit TypeIndex(SimpleTypeKind Kind) + : Index(static_cast(Kind)) {} + TypeIndex(SimpleTypeKind Kind, SimpleTypeMode Mode) + : Index(static_cast(Kind) | static_cast(Mode)) {} + + uint32_t getIndex() const { return Index; } + bool isSimple() const { return Index < FirstNonSimpleIndex; } + + SimpleTypeKind getSimpleKind() const { + assert(isSimple()); + return static_cast(Index & SimpleKindMask); + } + + SimpleTypeMode getSimpleMode() const { + assert(isSimple()); + return static_cast(Index & SimpleModeMask); + } + + static TypeIndex Void() { return TypeIndex(SimpleTypeKind::Void); } + static TypeIndex VoidPointer32() { + return TypeIndex(SimpleTypeKind::Void, SimpleTypeMode::NearPointer32); + } + static TypeIndex VoidPointer64() { + return TypeIndex(SimpleTypeKind::Void, SimpleTypeMode::NearPointer64); + } + + static TypeIndex SignedCharacter() { + return TypeIndex(SimpleTypeKind::SignedCharacter); + } + static TypeIndex UnsignedCharacter() { + return TypeIndex(SimpleTypeKind::UnsignedCharacter); + } + static TypeIndex NarrowCharacter() { + return TypeIndex(SimpleTypeKind::NarrowCharacter); + } + static TypeIndex WideCharacter() { + return TypeIndex(SimpleTypeKind::WideCharacter); + } + static TypeIndex Int16Short() { + return TypeIndex(SimpleTypeKind::Int16Short); + } + static TypeIndex UInt16Short() { + return TypeIndex(SimpleTypeKind::UInt16Short); + } + static TypeIndex Int32() { return TypeIndex(SimpleTypeKind::Int32); } + static TypeIndex UInt32() { return TypeIndex(SimpleTypeKind::UInt32); } + static TypeIndex Int32Long() { return TypeIndex(SimpleTypeKind::Int32Long); } + static TypeIndex UInt32Long() { + return TypeIndex(SimpleTypeKind::UInt32Long); + } + static TypeIndex Int64() { return TypeIndex(SimpleTypeKind::Int64); } + static TypeIndex UInt64() { return TypeIndex(SimpleTypeKind::UInt64); } + static TypeIndex Int64Quad() { return TypeIndex(SimpleTypeKind::Int64Quad); } + static TypeIndex UInt64Quad() { + return TypeIndex(SimpleTypeKind::UInt64Quad); + } + + static TypeIndex Float32() { return TypeIndex(SimpleTypeKind::Float32); } + static TypeIndex Float64() { return TypeIndex(SimpleTypeKind::Float64); } + +private: + uint32_t Index; +}; + +inline bool operator==(const TypeIndex &A, const TypeIndex &B) { + return A.getIndex() == B.getIndex(); +} + +inline bool operator!=(const TypeIndex &A, const TypeIndex &B) { + return A.getIndex() != B.getIndex(); +} + +inline bool operator<(const TypeIndex &A, const TypeIndex &B) { + return A.getIndex() < B.getIndex(); +} + +inline bool operator<=(const TypeIndex &A, const TypeIndex &B) { + return A.getIndex() <= B.getIndex(); +} + +inline bool operator>(const TypeIndex &A, const TypeIndex &B) { + return A.getIndex() > B.getIndex(); +} + +inline bool operator>=(const TypeIndex &A, const TypeIndex &B) { + return A.getIndex() >= B.getIndex(); +} +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/TypeRecord.h b/include/llvm/DebugInfo/CodeView/TypeRecord.h new file mode 100644 index 000000000000..21755f5d9b09 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/TypeRecord.h @@ -0,0 +1,270 @@ +//===- TypeRecord.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPERECORD_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPERECORD_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include + +namespace llvm { +namespace codeview { + +class TypeRecord { +protected: + explicit TypeRecord(TypeRecordKind Kind) : Kind(Kind) {} + +public: + TypeRecordKind getKind() const { return Kind; } + +private: + TypeRecordKind Kind; +}; + +class ModifierRecord : public TypeRecord { +public: + ModifierRecord(TypeIndex ModifiedType, ModifierOptions Options) + : TypeRecord(TypeRecordKind::Modifier), ModifiedType(ModifiedType), + Options(Options) {} + + TypeIndex getModifiedType() const { return ModifiedType; } + ModifierOptions getOptions() const { return Options; } + +private: + TypeIndex ModifiedType; + ModifierOptions Options; +}; + +class ProcedureRecord : public TypeRecord { +public: + ProcedureRecord(TypeIndex ReturnType, CallingConvention CallConv, + FunctionOptions Options, uint16_t ParameterCount, + TypeIndex ArgumentList) + : TypeRecord(TypeRecordKind::Procedure), ReturnType(ReturnType), + CallConv(CallConv), Options(Options), ParameterCount(ParameterCount), + ArgumentList(ArgumentList) {} + + TypeIndex getReturnType() const { return ReturnType; } + CallingConvention getCallConv() const { return CallConv; } + FunctionOptions getOptions() const { return Options; } + uint16_t getParameterCount() const { return ParameterCount; } + TypeIndex getArgumentList() const { return ArgumentList; } + +private: + TypeIndex ReturnType; + CallingConvention CallConv; + FunctionOptions Options; + uint16_t ParameterCount; + TypeIndex ArgumentList; +}; + +class MemberFunctionRecord : public TypeRecord { +public: + MemberFunctionRecord(TypeIndex ReturnType, TypeIndex ClassType, + TypeIndex ThisType, CallingConvention CallConv, + FunctionOptions Options, uint16_t ParameterCount, + TypeIndex ArgumentList, int32_t ThisPointerAdjustment) + : TypeRecord(TypeRecordKind::MemberFunction), ReturnType(ReturnType), + ClassType(ClassType), ThisType(ThisType), CallConv(CallConv), + Options(Options), ParameterCount(ParameterCount), + ArgumentList(ArgumentList), + ThisPointerAdjustment(ThisPointerAdjustment) {} + + TypeIndex getReturnType() const { return ReturnType; } + TypeIndex getClassType() const { return ClassType; } + TypeIndex getThisType() const { return ThisType; } + CallingConvention getCallConv() const { return CallConv; } + FunctionOptions getOptions() const { return Options; } + uint16_t getParameterCount() const { return ParameterCount; } + TypeIndex getArgumentList() const { return ArgumentList; } + int32_t getThisPointerAdjustment() const { return ThisPointerAdjustment; } + +private: + TypeIndex ReturnType; + TypeIndex ClassType; + TypeIndex ThisType; + CallingConvention CallConv; + FunctionOptions Options; + uint16_t ParameterCount; + TypeIndex ArgumentList; + int32_t ThisPointerAdjustment; +}; + +class ArgumentListRecord : public TypeRecord { +public: + explicit ArgumentListRecord(llvm::ArrayRef ArgumentTypes) + : TypeRecord(TypeRecordKind::ArgumentList), ArgumentTypes(ArgumentTypes) { + } + + llvm::ArrayRef getArgumentTypes() const { return ArgumentTypes; } + +private: + llvm::ArrayRef ArgumentTypes; +}; + +class PointerRecordBase : public TypeRecord { +public: + PointerRecordBase(TypeIndex ReferentType, PointerKind Kind, PointerMode Mode, + PointerOptions Options, uint8_t Size) + : TypeRecord(TypeRecordKind::Pointer), ReferentType(ReferentType), + PtrKind(Kind), Mode(Mode), Options(Options), Size(Size) {} + + TypeIndex getReferentType() const { return ReferentType; } + PointerKind getPointerKind() const { return PtrKind; } + PointerMode getMode() const { return Mode; } + PointerOptions getOptions() const { return Options; } + uint8_t getSize() const { return Size; } + +private: + TypeIndex ReferentType; + PointerKind PtrKind; + PointerMode Mode; + PointerOptions Options; + uint8_t Size; +}; + +class PointerRecord : public PointerRecordBase { +public: + PointerRecord(TypeIndex ReferentType, PointerKind Kind, PointerMode Mode, + PointerOptions Options, uint8_t Size) + : PointerRecordBase(ReferentType, Kind, Mode, Options, Size) {} +}; + +class PointerToMemberRecord : public PointerRecordBase { +public: + PointerToMemberRecord(TypeIndex ReferentType, PointerKind Kind, + PointerMode Mode, PointerOptions Options, uint8_t Size, + TypeIndex ContainingType, + PointerToMemberRepresentation Representation) + : PointerRecordBase(ReferentType, Kind, Mode, Options, Size), + ContainingType(ContainingType), Representation(Representation) {} + + TypeIndex getContainingType() const { return ContainingType; } + PointerToMemberRepresentation getRepresentation() const { + return Representation; + } + +private: + TypeIndex ContainingType; + PointerToMemberRepresentation Representation; +}; + +class ArrayRecord : public TypeRecord { +public: + ArrayRecord(TypeIndex ElementType, TypeIndex IndexType, uint64_t Size, + llvm::StringRef Name) + : TypeRecord(TypeRecordKind::Array), ElementType(ElementType), + IndexType(IndexType), Size(Size), Name(Name) {} + + TypeIndex getElementType() const { return ElementType; } + TypeIndex getIndexType() const { return IndexType; } + uint64_t getSize() const { return Size; } + llvm::StringRef getName() const { return Name; } + +private: + TypeIndex ElementType; + TypeIndex IndexType; + uint64_t Size; + llvm::StringRef Name; +}; + +class TagRecord : public TypeRecord { +protected: + TagRecord(TypeRecordKind Kind, uint16_t MemberCount, ClassOptions Options, + TypeIndex FieldList, StringRef Name, StringRef UniqueName) + : TypeRecord(Kind), MemberCount(MemberCount), Options(Options), + FieldList(FieldList), Name(Name), UniqueName(UniqueName) {} + +public: + uint16_t getMemberCount() const { return MemberCount; } + ClassOptions getOptions() const { return Options; } + TypeIndex getFieldList() const { return FieldList; } + StringRef getName() const { return Name; } + StringRef getUniqueName() const { return UniqueName; } + +private: + uint16_t MemberCount; + ClassOptions Options; + TypeIndex FieldList; + StringRef Name; + StringRef UniqueName; +}; + +class AggregateRecord : public TagRecord { +public: + AggregateRecord(TypeRecordKind Kind, uint16_t MemberCount, + ClassOptions Options, HfaKind Hfa, + WindowsRTClassKind WinRTKind, TypeIndex FieldList, + TypeIndex DerivationList, TypeIndex VTableShape, + uint64_t Size, StringRef Name, StringRef UniqueName) + : TagRecord(Kind, MemberCount, Options, FieldList, Name, UniqueName), + Hfa(Hfa), WinRTKind(WinRTKind), DerivationList(DerivationList), + VTableShape(VTableShape), Size(Size) {} + + HfaKind getHfa() const { return Hfa; } + WindowsRTClassKind getWinRTKind() const { return WinRTKind; } + TypeIndex getDerivationList() const { return DerivationList; } + TypeIndex getVTableShape() const { return VTableShape; } + uint64_t getSize() const { return Size; } + +private: + HfaKind Hfa; + WindowsRTClassKind WinRTKind; + TypeIndex DerivationList; + TypeIndex VTableShape; + uint64_t Size; +}; + +class EnumRecord : public TagRecord { +public: + EnumRecord(uint16_t MemberCount, ClassOptions Options, TypeIndex FieldList, + StringRef Name, StringRef UniqueName, TypeIndex UnderlyingType) + : TagRecord(TypeRecordKind::Enum, MemberCount, Options, FieldList, Name, + UniqueName), + UnderlyingType(UnderlyingType) {} + + TypeIndex getUnderlyingType() const { return UnderlyingType; } + +private: + TypeIndex UnderlyingType; +}; + +class BitFieldRecord : TypeRecord { +public: + BitFieldRecord(TypeIndex Type, uint8_t BitSize, uint8_t BitOffset) + : TypeRecord(TypeRecordKind::BitField), Type(Type), BitSize(BitSize), + BitOffset(BitOffset) {} + + TypeIndex getType() const { return Type; } + uint8_t getBitOffset() const { return BitOffset; } + uint8_t getBitSize() const { return BitSize; } + +private: + TypeIndex Type; + uint8_t BitSize; + uint8_t BitOffset; +}; + +class VirtualTableShapeRecord : TypeRecord { +public: + explicit VirtualTableShapeRecord(ArrayRef Slots) + : TypeRecord(TypeRecordKind::VirtualTableShape), Slots(Slots) {} + + ArrayRef getSlots() const { return Slots; } + +private: + ArrayRef Slots; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/TypeRecordBuilder.h b/include/llvm/DebugInfo/CodeView/TypeRecordBuilder.h new file mode 100644 index 000000000000..1f48cf70666d --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/TypeRecordBuilder.h @@ -0,0 +1,57 @@ +//===- TypeRecordBuilder.h --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPERECORDBUILDER_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPERECORDBUILDER_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace codeview { + +class TypeRecordBuilder { +private: + TypeRecordBuilder(const TypeRecordBuilder &) = delete; + TypeRecordBuilder &operator=(const TypeRecordBuilder &) = delete; + +public: + explicit TypeRecordBuilder(TypeRecordKind Kind); + + void writeUInt8(uint8_t Value); + void writeInt16(int16_t Value); + void writeUInt16(uint16_t Value); + void writeInt32(int32_t Value); + void writeUInt32(uint32_t Value); + void writeInt64(int64_t Value); + void writeUInt64(uint64_t Value); + void writeTypeIndex(TypeIndex TypeInd); + void writeTypeRecordKind(TypeRecordKind Kind); + void writeEncodedInteger(int64_t Value); + void writeEncodedSignedInteger(int64_t Value); + void writeEncodedUnsignedInteger(uint64_t Value); + void writeNullTerminatedString(const char *Value); + void writeNullTerminatedString(StringRef Value); + + llvm::StringRef str(); + + uint64_t size() const { return Stream.tell(); } + +private: + llvm::SmallVector Buffer; + llvm::raw_svector_ostream Stream; + llvm::support::endian::Writer Writer; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h b/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h new file mode 100644 index 000000000000..9de110e8236f --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h @@ -0,0 +1,37 @@ +//===- TypeSymbolEmitter.h --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPESYMBOLEMITTER_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPESYMBOLEMITTER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" + +namespace llvm { +namespace codeview { + +class TypeSymbolEmitter { +private: + TypeSymbolEmitter(const TypeSymbolEmitter &) = delete; + TypeSymbolEmitter &operator=(const TypeSymbolEmitter &) = delete; + +protected: + TypeSymbolEmitter() {} + +public: + virtual ~TypeSymbolEmitter() {} + +public: + virtual void writeUserDefinedType(TypeIndex TI, StringRef Name) = 0; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h new file mode 100644 index 000000000000..2c950e8af792 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h @@ -0,0 +1,60 @@ +//===- TypeTableBuilder.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPETABLEBUILDER_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPETABLEBUILDER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { +namespace codeview { + +class FieldListRecordBuilder; +class MethodListRecordBuilder; +class TypeRecordBuilder; + +class TypeTableBuilder { +private: + TypeTableBuilder(const TypeTableBuilder &) = delete; + TypeTableBuilder &operator=(const TypeTableBuilder &) = delete; + +protected: + TypeTableBuilder(); + +public: + virtual ~TypeTableBuilder(); + +public: + TypeIndex writeModifier(const ModifierRecord &Record); + TypeIndex writeProcedure(const ProcedureRecord &Record); + TypeIndex writeMemberFunction(const MemberFunctionRecord &Record); + TypeIndex writeArgumentList(const ArgumentListRecord &Record); + TypeIndex writeRecord(TypeRecordBuilder &builder); + TypeIndex writePointer(const PointerRecord &Record); + TypeIndex writePointerToMember(const PointerToMemberRecord &Record); + TypeIndex writeArray(const ArrayRecord &Record); + TypeIndex writeAggregate(const AggregateRecord &Record); + TypeIndex writeEnum(const EnumRecord &Record); + TypeIndex writeBitField(const BitFieldRecord &Record); + TypeIndex writeVirtualTableShape(const VirtualTableShapeRecord &Record); + + TypeIndex writeFieldList(FieldListRecordBuilder &FieldList); + TypeIndex writeMethodList(MethodListRecordBuilder &MethodList); + +private: + virtual TypeIndex writeRecord(llvm::StringRef record) = 0; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h index 871e60c56b13..6659a97a042b 100644 --- a/include/llvm/DebugInfo/DIContext.h +++ b/include/llvm/DebugInfo/DIContext.h @@ -57,6 +57,10 @@ class DIInliningInfo { assert(Index < Frames.size()); return Frames[Index]; } + DILineInfo *getMutableFrame(unsigned Index) { + assert(Index < Frames.size()); + return &Frames[Index]; + } uint32_t getNumberOfFrames() const { return Frames.size(); } @@ -65,6 +69,15 @@ class DIInliningInfo { } }; +/// DIGlobal - container for description of a global variable. +struct DIGlobal { + std::string Name; + uint64_t Start; + uint64_t Size; + + DIGlobal() : Name(""), Start(0), Size(0) {} +}; + /// A DINameKind is passed to name search methods to specify a /// preference regarding the type of name resolution the caller wants. enum class DINameKind { None, ShortName, LinkageName }; @@ -99,6 +112,7 @@ enum DIDumpType { DIDT_LineDwo, DIDT_Loc, DIDT_LocDwo, + DIDT_Macro, DIDT_Ranges, DIDT_Pubnames, DIDT_Pubtypes, @@ -110,7 +124,9 @@ enum DIDumpType { DIDT_AppleNames, DIDT_AppleTypes, DIDT_AppleNamespaces, - DIDT_AppleObjC + DIDT_AppleObjC, + DIDT_CUIndex, + DIDT_TUIndex, }; class DIContext { @@ -140,17 +156,21 @@ private: /// to be used by the DIContext implementations when applying relocations /// on the fly. class LoadedObjectInfo { +protected: + LoadedObjectInfo(const LoadedObjectInfo &) = default; + LoadedObjectInfo() = default; + public: virtual ~LoadedObjectInfo() = default; - /// Obtain the Load Address of a section by Name. + /// Obtain the Load Address of a section by SectionRef. /// - /// Calculate the address of the section identified by the passed in Name. + /// Calculate the address of the given section. /// The section need not be present in the local address space. The addresses /// need to be consistent with the addresses used to query the DIContext and /// the output of this function should be deterministic, i.e. repeated calls with - /// the same Name should give the same address. - virtual uint64_t getSectionLoadAddress(StringRef Name) const = 0; + /// the same Sec should give the same address. + virtual uint64_t getSectionLoadAddress(const object::SectionRef &Sec) const = 0; /// If conveniently available, return the content of the given Section. /// @@ -162,7 +182,8 @@ public: /// local (unrelocated) object file and applied on the fly. Note that this method /// is used purely for optimzation purposes in the common case of JITting in the /// local address space, so returning false should always be correct. - virtual bool getLoadedSectionContents(StringRef Name, StringRef &Data) const { + virtual bool getLoadedSectionContents(const object::SectionRef &Sec, + StringRef &Data) const { return false; } diff --git a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h index 743f9c696e9e..bae3154b3b5f 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h @@ -18,10 +18,13 @@ class DWARFCompileUnit : public DWARFUnit { public: DWARFCompileUnit(DWARFContext &Context, const DWARFSection &Section, const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, bool LE, - const DWARFUnitSectionBase &UnitSection) - : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LE, UnitSection) {} + StringRef SOS, StringRef AOS, StringRef LS, bool LE, + const DWARFUnitSectionBase &UnitSection, + const DWARFUnitIndex::Entry *Entry) + : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LS, LE, UnitSection, + Entry) {} void dump(raw_ostream &OS); + static const DWARFSectionKind Section = DW_SECT_INFO; // VTable anchor. ~DWARFCompileUnit() override; }; diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h index 423c0d32f1b5..c91012bc9a24 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -18,6 +18,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" @@ -40,11 +41,14 @@ class DWARFContext : public DIContext { DWARFUnitSection CUs; std::vector> TUs; + std::unique_ptr CUIndex; + std::unique_ptr TUIndex; std::unique_ptr Abbrev; std::unique_ptr Loc; std::unique_ptr Aranges; std::unique_ptr Line; std::unique_ptr DebugFrame; + std::unique_ptr Macro; DWARFUnitSection DWOCUs; std::vector> DWOTUs; @@ -143,6 +147,9 @@ public: return DWOCUs[index].get(); } + const DWARFUnitIndex &getCUIndex(); + const DWARFUnitIndex &getTUIndex(); + /// Get a pointer to the parsed DebugAbbrev object. const DWARFDebugAbbrev *getDebugAbbrev(); @@ -161,6 +168,9 @@ public: /// Get a pointer to the parsed frame information object. const DWARFDebugFrame *getDebugFrame(); + /// Get a pointer to the parsed DebugMacro object. + const DWARFDebugMacro *getDebugMacro(); + /// Get a pointer to a parsed line table corresponding to a compile unit. const DWARFDebugLine::LineTable *getLineTableForUnit(DWARFUnit *cu); @@ -184,6 +194,7 @@ public: virtual const DWARFSection &getLineSection() = 0; virtual StringRef getStringSection() = 0; virtual StringRef getRangeSection() = 0; + virtual StringRef getMacinfoSection() = 0; virtual StringRef getPubNamesSection() = 0; virtual StringRef getPubTypesSection() = 0; virtual StringRef getGnuPubNamesSection() = 0; @@ -203,9 +214,11 @@ public: virtual const DWARFSection& getAppleTypesSection() = 0; virtual const DWARFSection& getAppleNamespacesSection() = 0; virtual const DWARFSection& getAppleObjCSection() = 0; + virtual StringRef getCUIndexSection() = 0; + virtual StringRef getTUIndexSection() = 0; static bool isSupportedVersion(unsigned version) { - return version == 2 || version == 3 || version == 4; + return version == 2 || version == 3 || version == 4 || version == 5; } private: /// Return the compile unit that includes an offset (relative to .debug_info). @@ -232,6 +245,7 @@ class DWARFContextInMemory : public DWARFContext { DWARFSection LineSection; StringRef StringSection; StringRef RangeSection; + StringRef MacinfoSection; StringRef PubNamesSection; StringRef PubTypesSection; StringRef GnuPubNamesSection; @@ -251,6 +265,8 @@ class DWARFContextInMemory : public DWARFContext { DWARFSection AppleTypesSection; DWARFSection AppleNamespacesSection; DWARFSection AppleObjCSection; + StringRef CUIndexSection; + StringRef TUIndexSection; SmallVector, 4> UncompressedSections; @@ -268,6 +284,7 @@ public: const DWARFSection &getLineSection() override { return LineSection; } StringRef getStringSection() override { return StringSection; } StringRef getRangeSection() override { return RangeSection; } + StringRef getMacinfoSection() override { return MacinfoSection; } StringRef getPubNamesSection() override { return PubNamesSection; } StringRef getPubTypesSection() override { return PubTypesSection; } StringRef getGnuPubNamesSection() override { return GnuPubNamesSection; } @@ -293,6 +310,8 @@ public: StringRef getAddrSection() override { return AddrSection; } + StringRef getCUIndexSection() override { return CUIndexSection; } + StringRef getTUIndexSection() override { return TUIndexSection; } }; } diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index 93e7c790ccf9..760950b726b3 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -196,7 +196,7 @@ public: // Fills the Result argument with the file and line information // corresponding to Address. Returns true on success. - bool getFileLineInfoForAddress(uint64_t Address, const char *CompDir, + bool getFileLineInfoForAddress(uint64_t Address, const char *CompDir, DILineInfoSpecifier::FileLineInfoKind Kind, DILineInfo &Result) const; @@ -247,7 +247,6 @@ private: const RelocAddrMap *RelocMap; LineTableMapTy LineTableMap; }; - } #endif diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h b/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h new file mode 100644 index 000000000000..e17c993d275b --- /dev/null +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h @@ -0,0 +1,59 @@ +//===-- DWARFDebugMacro.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARF_DWARFDEBUGMACRO_H +#define LLVM_DEBUGINFO_DWARF_DWARFDEBUGMACRO_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Dwarf.h" + +namespace llvm { + +class raw_ostream; + +class DWARFDebugMacro { + /// A single macro entry within a macro list. + struct Entry { + /// The type of the macro entry. + uint32_t Type; + union { + /// The source line where the macro is defined. + uint64_t Line; + /// Vendor extension constant value. + uint64_t ExtConstant; + }; + + union { + /// The string (name, value) of the macro entry. + const char *MacroStr; + // An unsigned integer indicating the identity of the source file. + uint64_t File; + /// Vendor extension string. + const char *ExtStr; + }; + }; + + typedef SmallVector MacroList; + + /// A list of all the macro entries in the debug_macinfo section. + MacroList Macros; + +public: + DWARFDebugMacro() {} + /// Print the macro list found within the debug_macinfo section. + void dump(raw_ostream &OS) const; + /// Parse the debug_macinfo section accessible via the 'data' parameter. + void parse(DataExtractor data); +}; + +} + +#endif diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index 7ddcc0d81d59..3c32a3e5b794 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -84,6 +84,9 @@ public: const DWARFUnit *u) const; static bool skipValue(uint16_t form, DataExtractor debug_info_data, uint32_t *offset_ptr, const DWARFUnit *u); + static bool skipValue(uint16_t form, DataExtractor debug_info_data, + uint32_t *offset_ptr, uint16_t Version, + uint8_t AddrSize); static ArrayRef getFixedFormSizes(uint8_t AddrSize, uint16_t Version); diff --git a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h index f24e27819da2..894a88dce440 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h @@ -21,13 +21,17 @@ private: public: DWARFTypeUnit(DWARFContext &Context, const DWARFSection &Section, const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, bool LE, - const DWARFUnitSectionBase &UnitSection) - : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LE, UnitSection) {} + StringRef SOS, StringRef AOS, StringRef LS, bool LE, + const DWARFUnitSectionBase &UnitSection, + const DWARFUnitIndex::Entry *Entry) + : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LS, LE, UnitSection, + Entry) {} uint32_t getHeaderSize() const override { return DWARFUnit::getHeaderSize() + 12; } void dump(raw_ostream &OS); + static const DWARFSectionKind Section = DW_SECT_TYPES; + protected: bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) override; }; diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h index 5604b93f2205..681b2aa19a79 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -16,6 +16,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" +#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" #include namespace llvm { @@ -39,28 +40,25 @@ public: virtual DWARFUnit *getUnitForOffset(uint32_t Offset) const = 0; void parse(DWARFContext &C, const DWARFSection &Section); - void parseDWO(DWARFContext &C, const DWARFSection &DWOSection); + void parseDWO(DWARFContext &C, const DWARFSection &DWOSection, + DWARFUnitIndex *Index = nullptr); protected: virtual void parseImpl(DWARFContext &Context, const DWARFSection &Section, const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, bool isLittleEndian) = 0; + StringRef SOS, StringRef AOS, StringRef LS, + bool isLittleEndian) = 0; ~DWARFUnitSectionBase() = default; }; +const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context, + DWARFSectionKind Kind); + /// Concrete instance of DWARFUnitSection, specialized for one Unit type. template class DWARFUnitSection final : public SmallVector, 1>, public DWARFUnitSectionBase { - - struct UnitOffsetComparator { - bool operator()(uint32_t LHS, - const std::unique_ptr &RHS) const { - return LHS < RHS->getNextUnitOffset(); - } - }; - bool Parsed; public: @@ -73,8 +71,11 @@ public: typedef llvm::iterator_range iterator_range; UnitType *getUnitForOffset(uint32_t Offset) const override { - auto *CU = std::upper_bound(this->begin(), this->end(), Offset, - UnitOffsetComparator()); + auto *CU = std::upper_bound( + this->begin(), this->end(), Offset, + [](uint32_t LHS, const std::unique_ptr &RHS) { + return LHS < RHS->getNextUnitOffset(); + }); if (CU != this->end()) return CU->get(); return nullptr; @@ -83,14 +84,16 @@ public: private: void parseImpl(DWARFContext &Context, const DWARFSection &Section, const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, bool LE) override { + StringRef SOS, StringRef AOS, StringRef LS, bool LE) override { if (Parsed) return; + const auto &Index = getDWARFUnitIndex(Context, UnitType::Section); DataExtractor Data(Section.Data, LE, 0); uint32_t Offset = 0; while (Data.isValidOffset(Offset)) { auto U = llvm::make_unique(Context, Section, DA, RS, SS, SOS, - AOS, LE, *this); + AOS, LS, LE, *this, + Index.getFromOffset(Offset)); if (!U->extract(Data, &Offset)) break; this->push_back(std::move(U)); @@ -108,6 +111,7 @@ class DWARFUnit { const DWARFDebugAbbrev *Abbrev; StringRef RangeSection; uint32_t RangeSectionBase; + StringRef LineSection; StringRef StringSection; StringRef StringOffsetSection; StringRef AddrOffsetSection; @@ -134,6 +138,8 @@ class DWARFUnit { }; std::unique_ptr DWO; + const DWARFUnitIndex::Entry *IndexEntry; + protected: virtual bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr); /// Size in bytes of the unit header. @@ -142,13 +148,15 @@ protected: public: DWARFUnit(DWARFContext &Context, const DWARFSection &Section, const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, bool LE, - const DWARFUnitSectionBase &UnitSection); + StringRef SOS, StringRef AOS, StringRef LS, bool LE, + const DWARFUnitSectionBase &UnitSection, + const DWARFUnitIndex::Entry *IndexEntry = nullptr); virtual ~DWARFUnit(); DWARFContext& getContext() const { return Context; } + StringRef getLineSection() const { return LineSection; } StringRef getStringSection() const { return StringSection; } StringRef getStringOffsetSection() const { return StringOffsetSection; } void setAddrOffsetSection(StringRef AOS, uint32_t Base) { @@ -246,12 +254,19 @@ public: assert(!DieArray.empty()); auto it = std::lower_bound( DieArray.begin(), DieArray.end(), Offset, - [=](const DWARFDebugInfoEntryMinimal &LHS, uint32_t Offset) { + [](const DWARFDebugInfoEntryMinimal &LHS, uint32_t Offset) { return LHS.getOffset() < Offset; }); return it == DieArray.end() ? nullptr : &*it; } + uint32_t getLineTableOffset() const { + if (IndexEntry) + if (const auto *Contrib = IndexEntry->getOffset(DW_SECT_LINE)) + return Contrib->Offset; + return 0; + } + private: /// Size in bytes of the .debug_info data associated with this compile unit. size_t getDebugInfoSize() const { return Length + 4 - getHeaderSize(); } diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h b/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h new file mode 100644 index 000000000000..a85c2f9f0a23 --- /dev/null +++ b/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h @@ -0,0 +1,81 @@ +//===-- DWARFUnitIndex.h --------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_DEBUGINFO_DWARFUNITINDEX_H +#define LLVM_LIB_DEBUGINFO_DWARFUNITINDEX_H + +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { + +enum DWARFSectionKind { + DW_SECT_INFO = 1, + DW_SECT_TYPES, + DW_SECT_ABBREV, + DW_SECT_LINE, + DW_SECT_LOC, + DW_SECT_STR_OFFSETS, + DW_SECT_MACINFO, + DW_SECT_MACRO, +}; + +class DWARFUnitIndex { + struct Header { + uint32_t Version; + uint32_t NumColumns; + uint32_t NumUnits; + uint32_t NumBuckets = 0; + + bool parse(DataExtractor IndexData, uint32_t *OffsetPtr); + void dump(raw_ostream &OS) const; + }; + +public: + class Entry { + public: + struct SectionContribution { + uint32_t Offset; + uint32_t Length; + }; + + private: + const DWARFUnitIndex *Index; + uint64_t Signature; + std::unique_ptr Contributions; + friend class DWARFUnitIndex; + + public: + const SectionContribution *getOffset(DWARFSectionKind Sec) const; + const SectionContribution *getOffset() const; + }; + +private: + struct Header Header; + + DWARFSectionKind InfoColumnKind; + int InfoColumn = -1; + std::unique_ptr ColumnKinds; + std::unique_ptr Rows; + + static StringRef getColumnHeader(DWARFSectionKind DS); + bool parseImpl(DataExtractor IndexData); + +public: + bool parse(DataExtractor IndexData); + DWARFUnitIndex(DWARFSectionKind InfoColumnKind) + : InfoColumnKind(InfoColumnKind) {} + void dump(raw_ostream &OS) const; + const Entry *getFromOffset(uint32_t Offset) const; +}; +} + +#endif diff --git a/include/llvm/DebugInfo/PDB/PDBContext.h b/include/llvm/DebugInfo/PDB/PDBContext.h index 2bb97463f90d..9404a5922449 100644 --- a/include/llvm/DebugInfo/PDB/PDBContext.h +++ b/include/llvm/DebugInfo/PDB/PDBContext.h @@ -32,8 +32,7 @@ class PDBContext : public DIContext { public: PDBContext(const object::COFFObjectFile &Object, - std::unique_ptr PDBSession, - bool RelativeAddress); + std::unique_ptr PDBSession); static bool classof(const DIContext *DICtx) { return DICtx->getKind() == CK_PDB; diff --git a/include/llvm/DebugInfo/PDB/PDBTypes.h b/include/llvm/DebugInfo/PDB/PDBTypes.h index 2d19e792d3d0..a932a56bb953 100644 --- a/include/llvm/DebugInfo/PDB/PDBTypes.h +++ b/include/llvm/DebugInfo/PDB/PDBTypes.h @@ -11,6 +11,7 @@ #define LLVM_DEBUGINFO_PDB_PDBTYPES_H #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Endian.h" #include #include @@ -500,6 +501,35 @@ struct Variant { bool operator!=(const Variant &Other) const { return !(*this == Other); } }; +namespace PDB { +static const char Magic[] = {'M', 'i', 'c', 'r', 'o', 's', 'o', 'f', + 't', ' ', 'C', '/', 'C', '+', '+', ' ', + 'M', 'S', 'F', ' ', '7', '.', '0', '0', + '\r', '\n', '\x1a', 'D', 'S', '\0', '\0', '\0'}; + +// The superblock is overlaid at the beginning of the file (offset 0). +// It starts with a magic header and is followed by information which describes +// the layout of the file system. +struct SuperBlock { + char MagicBytes[sizeof(Magic)]; + // The file system is split into a variable number of fixed size elements. + // These elements are referred to as blocks. The size of a block may vary + // from system to system. + support::ulittle32_t BlockSize; + // This field's purpose is not yet known. + support::ulittle32_t Unknown0; + // This contains the number of blocks resident in the file system. In + // practice, NumBlocks * BlockSize is equivalent to the size of the PDB file. + support::ulittle32_t NumBlocks; + // This contains the number of bytes which make up the directory. + support::ulittle32_t NumDirectoryBytes; + // This field's purpose is not yet known. + support::ulittle32_t Unknown1; + // This contains the block # of the block map. + support::ulittle32_t BlockMapAddr; +}; +} + } // namespace llvm namespace std { @@ -513,4 +543,5 @@ template <> struct hash { }; } + #endif diff --git a/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/include/llvm/DebugInfo/Symbolize/DIPrinter.h new file mode 100644 index 000000000000..0703fb14da61 --- /dev/null +++ b/include/llvm/DebugInfo/Symbolize/DIPrinter.h @@ -0,0 +1,47 @@ +//===- llvm/DebugInfo/Symbolize/DIPrinter.h ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the DIPrinter class, which is responsible for printing +// structures defined in DebugInfo/DIContext.h +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_SYMBOLIZE_DIPRINTER_H +#define LLVM_DEBUGINFO_SYMBOLIZE_DIPRINTER_H + +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +struct DILineInfo; +class DIInliningInfo; +struct DIGlobal; + +namespace symbolize { + +class DIPrinter { + raw_ostream &OS; + bool PrintFunctionNames; + bool PrintPretty; + void printName(const DILineInfo &Info, bool Inlined); + +public: + DIPrinter(raw_ostream &OS, bool PrintFunctionNames = true, + bool PrintPretty = false) + : OS(OS), PrintFunctionNames(PrintFunctionNames), + PrintPretty(PrintPretty) {} + + DIPrinter &operator<<(const DILineInfo &Info); + DIPrinter &operator<<(const DIInliningInfo &Info); + DIPrinter &operator<<(const DIGlobal &Global); +}; +} +} + +#endif + diff --git a/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h b/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h new file mode 100644 index 000000000000..ff9cc808875d --- /dev/null +++ b/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h @@ -0,0 +1,53 @@ +//===-- SymbolizableModule.h ------------------------------------ C++ -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SymbolizableModule interface. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEMODULE_H +#define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEMODULE_H + +#include "llvm/DebugInfo/DIContext.h" +#include +#include + +namespace llvm { +namespace object { +class ObjectFile; +} +} + +namespace llvm { +namespace symbolize { + +using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind; + +class SymbolizableModule { +public: + virtual ~SymbolizableModule() {} + virtual DILineInfo symbolizeCode(uint64_t ModuleOffset, + FunctionNameKind FNKind, + bool UseSymbolTable) const = 0; + virtual DIInliningInfo symbolizeInlinedCode(uint64_t ModuleOffset, + FunctionNameKind FNKind, + bool UseSymbolTable) const = 0; + virtual DIGlobal symbolizeData(uint64_t ModuleOffset) const = 0; + + // Return true if this is a 32-bit x86 PE COFF module. + virtual bool isWin32Module() const = 0; + + // Returns the preferred base of the module, i.e. where the loader would place + // it in memory assuming there were no conflicts. + virtual uint64_t getModulePreferredBase() const = 0; +}; + +} // namespace symbolize +} // namespace llvm + +#endif // LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEMODULE_H diff --git a/include/llvm/DebugInfo/Symbolize/Symbolize.h b/include/llvm/DebugInfo/Symbolize/Symbolize.h new file mode 100644 index 000000000000..ec3ae002659c --- /dev/null +++ b/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -0,0 +1,105 @@ +//===-- Symbolize.h --------------------------------------------- C++ -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Header for LLVM symbolization library. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H +#define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H + +#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/ErrorOr.h" +#include +#include +#include + +namespace llvm { +namespace symbolize { + +using namespace object; +using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind; + +class LLVMSymbolizer { +public: + struct Options { + FunctionNameKind PrintFunctions; + bool UseSymbolTable : 1; + bool Demangle : 1; + bool RelativeAddresses : 1; + std::string DefaultArch; + std::vector DsymHints; + Options(FunctionNameKind PrintFunctions = FunctionNameKind::LinkageName, + bool UseSymbolTable = true, bool Demangle = true, + bool RelativeAddresses = false, std::string DefaultArch = "") + : PrintFunctions(PrintFunctions), UseSymbolTable(UseSymbolTable), + Demangle(Demangle), RelativeAddresses(RelativeAddresses), + DefaultArch(DefaultArch) {} + }; + + LLVMSymbolizer(const Options &Opts = Options()) : Opts(Opts) {} + ~LLVMSymbolizer() { + flush(); + } + + ErrorOr symbolizeCode(const std::string &ModuleName, + uint64_t ModuleOffset); + ErrorOr symbolizeInlinedCode(const std::string &ModuleName, + uint64_t ModuleOffset); + ErrorOr symbolizeData(const std::string &ModuleName, + uint64_t ModuleOffset); + void flush(); + static std::string DemangleName(const std::string &Name, + const SymbolizableModule *ModInfo); + +private: + // Bundles together object file with code/data and object file with + // corresponding debug info. These objects can be the same. + typedef std::pair ObjectPair; + + ErrorOr + getOrCreateModuleInfo(const std::string &ModuleName); + ObjectFile *lookUpDsymFile(const std::string &Path, + const MachOObjectFile *ExeObj, + const std::string &ArchName); + ObjectFile *lookUpDebuglinkObject(const std::string &Path, + const ObjectFile *Obj, + const std::string &ArchName); + + /// \brief Returns pair of pointers to object and debug object. + ErrorOr getOrCreateObjectPair(const std::string &Path, + const std::string &ArchName); + + /// \brief Return a pointer to object file at specified path, for a specified + /// architecture (e.g. if path refers to a Mach-O universal binary, only one + /// object file from it will be returned). + ErrorOr getOrCreateObject(const std::string &Path, + const std::string &ArchName); + + std::map>> Modules; + + /// \brief Contains cached results of getOrCreateObjectPair(). + std::map, ErrorOr> + ObjectPairForPathArch; + + /// \brief Contains parsed binary for each path, or parsing error. + std::map>> BinaryForPath; + + /// \brief Parsed object file for path/architecture pair, where "path" refers + /// to Mach-O universal binary. + std::map, ErrorOr>> + ObjectForUBPathAndArch; + + Options Opts; +}; + +} // namespace symbolize +} // namespace llvm + +#endif diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h index 821c0181ce83..a7302602dcd8 100644 --- a/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -104,7 +104,12 @@ class ExecutionEngine { ExecutionEngineState EEState; /// The target data for the platform for which execution is being performed. - const DataLayout *DL; + /// + /// Note: the DataLayout is LLVMContext specific because it has an + /// internal cache based on type pointers. It makes unsafe to reuse the + /// ExecutionEngine across context, we don't enforce this rule but undefined + /// behavior can occurs if the user tries to do it. + const DataLayout DL; /// Whether lazy JIT compilation is enabled. bool CompilingLazily; @@ -126,8 +131,6 @@ protected: /// optimize for the case where there is only one module. SmallVector, 1> Modules; - void setDataLayout(const DataLayout *Val) { DL = Val; } - /// getMemoryforGV - Allocate memory for a global variable. virtual char *getMemoryForGV(const GlobalVariable *GV); @@ -194,7 +197,7 @@ public: //===--------------------------------------------------------------------===// - const DataLayout *getDataLayout() const { return DL; } + const DataLayout &getDataLayout() const { return DL; } /// removeModule - Remove a Module from the list of modules. Returns true if /// M is found. @@ -478,7 +481,8 @@ public: } protected: - ExecutionEngine() {} + ExecutionEngine(const DataLayout DL) : DL(std::move(DL)){} + explicit ExecutionEngine(DataLayout DL, std::unique_ptr M); explicit ExecutionEngine(std::unique_ptr M); void emitGlobals(); @@ -488,6 +492,9 @@ protected: GenericValue getConstantValue(const Constant *C); void LoadValueFromMemory(GenericValue &Result, GenericValue *Ptr, Type *Ty); + +private: + void Init(std::unique_ptr M); }; namespace EngineKind { diff --git a/include/llvm/ExecutionEngine/Interpreter.h b/include/llvm/ExecutionEngine/Interpreter.h index f49d0c487fe9..a14707840ad8 100644 --- a/include/llvm/ExecutionEngine/Interpreter.h +++ b/include/llvm/ExecutionEngine/Interpreter.h @@ -16,22 +16,12 @@ #define LLVM_EXECUTIONENGINE_INTERPRETER_H #include "llvm/ExecutionEngine/ExecutionEngine.h" -#include extern "C" void LLVMLinkInInterpreter(); namespace { struct ForceInterpreterLinking { - ForceInterpreterLinking() { - // We must reference the interpreter in such a way that compilers will not - // delete it all as dead code, even with whole program optimization, - // yet is effectively a NO-OP. As the compiler isn't smart enough - // to know that getenv() never returns -1, this will do the job. - if (std::getenv("bar") != (char*) -1) - return; - - LLVMLinkInInterpreter(); - } + ForceInterpreterLinking() { LLVMLinkInInterpreter(); } } ForceInterpreterLinking; } diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index 9694b80d1928..7dab5d1bc67f 100644 --- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -22,6 +22,7 @@ #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/Transforms/Utils/Cloning.h" #include +#include #include #include "llvm/Support/Debug.h" @@ -36,56 +37,89 @@ namespace orc { /// added to the layer below. When a stub is called it triggers the extraction /// of the function body from the original module. The extracted body is then /// compiled and executed. -template (Function&)>> +template class CompileOnDemandLayer { private: - // Utility class for MapValue. Only materializes declarations for global - // variables. - class GlobalDeclMaterializer : public ValueMaterializer { + template + class LambdaMaterializer final : public ValueMaterializer { public: - typedef std::set StubSet; + LambdaMaterializer(MaterializerFtor M) : M(std::move(M)) {} + Value *materializeDeclFor(Value *V) final { return M(V); } - GlobalDeclMaterializer(Module &Dst, const StubSet *StubsToClone = nullptr) - : Dst(Dst), StubsToClone(StubsToClone) {} - - Value* materializeValueFor(Value *V) final { - if (auto *GV = dyn_cast(V)) - return cloneGlobalVariableDecl(Dst, *GV); - else if (auto *F = dyn_cast(V)) { - auto *ClonedF = cloneFunctionDecl(Dst, *F); - if (StubsToClone && StubsToClone->count(F)) { - GlobalVariable *FnBodyPtr = - createImplPointer(*ClonedF->getType(), *ClonedF->getParent(), - ClonedF->getName() + "$orc_addr", nullptr); - makeStub(*ClonedF, *FnBodyPtr); - ClonedF->setLinkage(GlobalValue::AvailableExternallyLinkage); - ClonedF->addFnAttr(Attribute::AlwaysInline); - } - return ClonedF; - } - // Else. - return nullptr; - } private: - Module &Dst; - const StubSet *StubsToClone; + MaterializerFtor M; }; + template + LambdaMaterializer + createLambdaMaterializer(MaterializerFtor M) { + return LambdaMaterializer(std::move(M)); + } + typedef typename BaseLayerT::ModuleSetHandleT BaseLayerModuleSetHandleT; - struct LogicalModuleResources { - std::shared_ptr SourceModule; - std::set StubsToClone; + class ModuleOwner { + public: + ModuleOwner() = default; + ModuleOwner(const ModuleOwner&) = delete; + ModuleOwner& operator=(const ModuleOwner&) = delete; + virtual ~ModuleOwner() { } + virtual Module& getModule() const = 0; }; + template + class ModuleOwnerImpl : public ModuleOwner { + public: + ModuleOwnerImpl(ModulePtrT ModulePtr) : ModulePtr(std::move(ModulePtr)) {} + Module& getModule() const override { return *ModulePtr; } + private: + ModulePtrT ModulePtr; + }; + + template + std::unique_ptr wrapOwnership(ModulePtrT ModulePtr) { + return llvm::make_unique>(std::move(ModulePtr)); + } + + struct LogicalModuleResources { + std::unique_ptr SourceModuleOwner; + std::set StubsToClone; + std::unique_ptr StubsMgr; + + LogicalModuleResources() = default; + + // Explicit move constructor to make MSVC happy. + LogicalModuleResources(LogicalModuleResources &&Other) + : SourceModuleOwner(std::move(Other.SourceModuleOwner)), + StubsToClone(std::move(Other.StubsToClone)), + StubsMgr(std::move(Other.StubsMgr)) {} + + // Explicit move assignment to make MSVC happy. + LogicalModuleResources& operator=(LogicalModuleResources &&Other) { + SourceModuleOwner = std::move(Other.SourceModuleOwner); + StubsToClone = std::move(Other.StubsToClone); + StubsMgr = std::move(Other.StubsMgr); + } + + JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) { + if (Name.endswith("$stub_ptr") && !ExportedSymbolsOnly) { + assert(!ExportedSymbolsOnly && "Stubs are never exported"); + return StubsMgr->findPointer(Name.drop_back(9)); + } + return StubsMgr->findStub(Name, ExportedSymbolsOnly); + } + + }; + + + struct LogicalDylibResources { typedef std::function SymbolResolverFtor; SymbolResolverFtor ExternalSymbolResolver; - PartitioningFtor Partitioner; }; typedef LogicalDylib LogicalDylibList; public: + /// @brief Handle to a set of loaded modules. typedef typename LogicalDylibList::iterator ModuleSetHandleT; + /// @brief Module partitioning functor. + typedef std::function(Function&)> PartitioningFtor; + + /// @brief Builder for IndirectStubsManagers. + typedef std::function()> + IndirectStubsManagerBuilderT; + /// @brief Construct a compile-on-demand layer instance. - CompileOnDemandLayer(BaseLayerT &BaseLayer, CompileCallbackMgrT &CallbackMgr, - bool CloneStubsIntoPartitions) - : BaseLayer(BaseLayer), CompileCallbackMgr(CallbackMgr), + CompileOnDemandLayer(BaseLayerT &BaseLayer, PartitioningFtor Partition, + CompileCallbackMgrT &CallbackMgr, + IndirectStubsManagerBuilderT CreateIndirectStubsManager, + bool CloneStubsIntoPartitions = true) + : BaseLayer(BaseLayer), Partition(Partition), + CompileCallbackMgr(CallbackMgr), + CreateIndirectStubsManager(std::move(CreateIndirectStubsManager)), CloneStubsIntoPartitions(CloneStubsIntoPartitions) {} /// @brief Add a module to the compile-on-demand layer. @@ -122,17 +168,9 @@ public: return Resolver->findSymbol(Name); }; - LDResources.Partitioner = - [](Function &F) { - std::set Partition; - Partition.insert(&F); - return Partition; - }; - // Process each of the modules in this module set. for (auto &M : Ms) - addLogicalModule(LogicalDylibs.back(), - std::shared_ptr(std::move(M))); + addLogicalModule(LogicalDylibs.back(), std::move(M)); return std::prev(LogicalDylibs.end()); } @@ -150,6 +188,10 @@ public: /// @param ExportedSymbolsOnly If true, search only for exported symbols. /// @return A handle for the given named symbol, if it exists. JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) { + for (auto LDI = LogicalDylibs.begin(), LDE = LogicalDylibs.end(); + LDI != LDE; ++LDI) + if (auto Symbol = findSymbolIn(LDI, Name, ExportedSymbolsOnly)) + return Symbol; return BaseLayer.findSymbol(Name, ExportedSymbolsOnly); } @@ -162,85 +204,138 @@ public: private: - void addLogicalModule(CODLogicalDylib &LD, std::shared_ptr SrcM) { + template + void addLogicalModule(CODLogicalDylib &LD, ModulePtrT SrcMPtr) { // Bump the linkage and rename any anonymous/privote members in SrcM to // ensure that everything will resolve properly after we partition SrcM. - makeAllSymbolsExternallyAccessible(*SrcM); + makeAllSymbolsExternallyAccessible(*SrcMPtr); // Create a logical module handle for SrcM within the logical dylib. auto LMH = LD.createLogicalModule(); auto &LMResources = LD.getLogicalModuleResources(LMH); - LMResources.SourceModule = SrcM; - // Create the GVs-and-stubs module. - auto GVsAndStubsM = llvm::make_unique( - (SrcM->getName() + ".globals_and_stubs").str(), - SrcM->getContext()); - GVsAndStubsM->setDataLayout(SrcM->getDataLayout()); + LMResources.SourceModuleOwner = wrapOwnership(std::move(SrcMPtr)); + + Module &SrcM = LMResources.SourceModuleOwner->getModule(); + + // Create the GlobalValues module. + const DataLayout &DL = SrcM.getDataLayout(); + auto GVsM = llvm::make_unique((SrcM.getName() + ".globals").str(), + SrcM.getContext()); + GVsM->setDataLayout(DL); + + // Create function stubs. ValueToValueMapTy VMap; + { + typename IndirectStubsMgrT::StubInitsMap StubInits; + for (auto &F : SrcM) { + // Skip declarations. + if (F.isDeclaration()) + continue; - // Process module and create stubs. - // We create the stubs before copying the global variables as we know the - // stubs won't refer to any globals (they only refer to their implementation - // pointer) so there's no ordering/value-mapping issues. - for (auto &F : *SrcM) { + // Record all functions defined by this module. + if (CloneStubsIntoPartitions) + LMResources.StubsToClone.insert(&F); - // Skip declarations. - if (F.isDeclaration()) - continue; - - // Record all functions defined by this module. - if (CloneStubsIntoPartitions) - LMResources.StubsToClone.insert(&F); - - // For each definition: create a callback, a stub, and a function body - // pointer. Initialize the function body pointer to point at the callback, - // and set the callback to compile the function body. - auto CCInfo = CompileCallbackMgr.getCompileCallback(SrcM->getContext()); - Function *StubF = cloneFunctionDecl(*GVsAndStubsM, F, &VMap); - GlobalVariable *FnBodyPtr = - createImplPointer(*StubF->getType(), *StubF->getParent(), - StubF->getName() + "$orc_addr", - createIRTypedAddress(*StubF->getFunctionType(), - CCInfo.getAddress())); - makeStub(*StubF, *FnBodyPtr); - CCInfo.setCompileAction( - [this, &LD, LMH, &F]() { + // Create a callback, associate it with the stub for the function, + // and set the compile action to compile the partition containing the + // function. + auto CCInfo = CompileCallbackMgr.getCompileCallback(); + StubInits[mangle(F.getName(), DL)] = + std::make_pair(CCInfo.getAddress(), + JITSymbolBase::flagsFromGlobalValue(F)); + CCInfo.setCompileAction([this, &LD, LMH, &F]() { return this->extractAndCompile(LD, LMH, F); }); + } + + LMResources.StubsMgr = CreateIndirectStubsManager(); + auto EC = LMResources.StubsMgr->createStubs(StubInits); + (void)EC; + // FIXME: This should be propagated back to the user. Stub creation may + // fail for remote JITs. + assert(!EC && "Error generating stubs"); } - // Now clone the global variable declarations. - GlobalDeclMaterializer GDMat(*GVsAndStubsM); - for (auto &GV : SrcM->globals()) - if (!GV.isDeclaration()) - cloneGlobalVariableDecl(*GVsAndStubsM, GV, &VMap); + // Clone global variable decls. + for (auto &GV : SrcM.globals()) + if (!GV.isDeclaration() && !VMap.count(&GV)) + cloneGlobalVariableDecl(*GVsM, GV, &VMap); - // Then clone the initializers. - for (auto &GV : SrcM->globals()) - if (!GV.isDeclaration()) - moveGlobalVariableInitializer(GV, VMap, &GDMat); + // And the aliases. + for (auto &A : SrcM.aliases()) + if (!VMap.count(&A)) + cloneGlobalAliasDecl(*GVsM, A, VMap); - // Build a resolver for the stubs module and add it to the base layer. - auto GVsAndStubsResolver = createLambdaResolver( - [&LD](const std::string &Name) { + // Now we need to clone the GV and alias initializers. + + // Initializers may refer to functions declared (but not defined) in this + // module. Build a materializer to clone decls on demand. + auto Materializer = createLambdaMaterializer( + [this, &GVsM, &LMResources](Value *V) -> Value* { + if (auto *F = dyn_cast(V)) { + // Decls in the original module just get cloned. + if (F->isDeclaration()) + return cloneFunctionDecl(*GVsM, *F); + + // Definitions in the original module (which we have emitted stubs + // for at this point) get turned into a constant alias to the stub + // instead. + const DataLayout &DL = GVsM->getDataLayout(); + std::string FName = mangle(F->getName(), DL); + auto StubSym = LMResources.StubsMgr->findStub(FName, false); + unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(F->getType()); + ConstantInt *StubAddr = + ConstantInt::get(GVsM->getContext(), + APInt(PtrBitWidth, StubSym.getAddress())); + Constant *Init = ConstantExpr::getCast(Instruction::IntToPtr, + StubAddr, F->getType()); + return GlobalAlias::create(F->getFunctionType(), + F->getType()->getAddressSpace(), + F->getLinkage(), F->getName(), + Init, GVsM.get()); + } + // else.... + return nullptr; + }); + + // Clone the global variable initializers. + for (auto &GV : SrcM.globals()) + if (!GV.isDeclaration()) + moveGlobalVariableInitializer(GV, VMap, &Materializer); + + // Clone the global alias initializers. + for (auto &A : SrcM.aliases()) { + auto *NewA = cast(VMap[&A]); + assert(NewA && "Alias not cloned?"); + Value *Init = MapValue(A.getAliasee(), VMap, RF_None, nullptr, + &Materializer); + NewA->setAliasee(cast(Init)); + } + + // Build a resolver for the globals module and add it to the base layer. + auto GVsResolver = createLambdaResolver( + [&LD, LMH](const std::string &Name) { + auto &LMResources = LD.getLogicalModuleResources(LMH); + if (auto Sym = LMResources.StubsMgr->findStub(Name, false)) + return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags()); return LD.getDylibResources().ExternalSymbolResolver(Name); }, [](const std::string &Name) { return RuntimeDyld::SymbolInfo(nullptr); }); - std::vector> GVsAndStubsMSet; - GVsAndStubsMSet.push_back(std::move(GVsAndStubsM)); - auto GVsAndStubsH = - BaseLayer.addModuleSet(std::move(GVsAndStubsMSet), + std::vector> GVsMSet; + GVsMSet.push_back(std::move(GVsM)); + auto GVsH = + BaseLayer.addModuleSet(std::move(GVsMSet), llvm::make_unique(), - std::move(GVsAndStubsResolver)); - LD.addToLogicalModule(LMH, GVsAndStubsH); + std::move(GVsResolver)); + LD.addToLogicalModule(LMH, GVsH); } - static std::string Mangle(StringRef Name, const DataLayout &DL) { + static std::string mangle(StringRef Name, const DataLayout &DL) { std::string MangledName; { raw_string_ostream MangledNameStream(MangledName); @@ -252,42 +347,35 @@ private: TargetAddress extractAndCompile(CODLogicalDylib &LD, LogicalModuleHandle LMH, Function &F) { - Module &SrcM = *LD.getLogicalModuleResources(LMH).SourceModule; + auto &LMResources = LD.getLogicalModuleResources(LMH); + Module &SrcM = LMResources.SourceModuleOwner->getModule(); // If F is a declaration we must already have compiled it. if (F.isDeclaration()) return 0; // Grab the name of the function being called here. - std::string CalledFnName = Mangle(F.getName(), SrcM.getDataLayout()); + std::string CalledFnName = mangle(F.getName(), SrcM.getDataLayout()); - auto Partition = LD.getDylibResources().Partitioner(F); - auto PartitionH = emitPartition(LD, LMH, Partition); + auto Part = Partition(F); + auto PartH = emitPartition(LD, LMH, Part); TargetAddress CalledAddr = 0; - for (auto *SubF : Partition) { - std::string FName = SubF->getName(); - auto FnBodySym = - BaseLayer.findSymbolIn(PartitionH, Mangle(FName, SrcM.getDataLayout()), - false); - auto FnPtrSym = - BaseLayer.findSymbolIn(*LD.moduleHandlesBegin(LMH), - Mangle(FName + "$orc_addr", - SrcM.getDataLayout()), - false); + for (auto *SubF : Part) { + std::string FnName = mangle(SubF->getName(), SrcM.getDataLayout()); + auto FnBodySym = BaseLayer.findSymbolIn(PartH, FnName, false); assert(FnBodySym && "Couldn't find function body."); - assert(FnPtrSym && "Couldn't find function body pointer."); TargetAddress FnBodyAddr = FnBodySym.getAddress(); - void *FnPtrAddr = reinterpret_cast( - static_cast(FnPtrSym.getAddress())); // If this is the function we're calling record the address so we can // return it from this function. if (SubF == &F) CalledAddr = FnBodyAddr; - memcpy(FnPtrAddr, &FnBodyAddr, sizeof(uintptr_t)); + // Update the function body pointer for the stub. + if (auto EC = LMResources.StubsMgr->updatePointer(FnName, FnBodyAddr)) + return 0; } return CalledAddr; @@ -296,13 +384,13 @@ private: template BaseLayerModuleSetHandleT emitPartition(CODLogicalDylib &LD, LogicalModuleHandle LMH, - const PartitionT &Partition) { + const PartitionT &Part) { auto &LMResources = LD.getLogicalModuleResources(LMH); - Module &SrcM = *LMResources.SourceModule; + Module &SrcM = LMResources.SourceModuleOwner->getModule(); // Create the module. std::string NewName = SrcM.getName(); - for (auto *F : Partition) { + for (auto *F : Part) { NewName += "."; NewName += F->getName(); } @@ -310,15 +398,51 @@ private: auto M = llvm::make_unique(NewName, SrcM.getContext()); M->setDataLayout(SrcM.getDataLayout()); ValueToValueMapTy VMap; - GlobalDeclMaterializer GDM(*M, &LMResources.StubsToClone); + + auto Materializer = createLambdaMaterializer([this, &LMResources, &M, + &VMap](Value *V) -> Value * { + if (auto *GV = dyn_cast(V)) + return cloneGlobalVariableDecl(*M, *GV); + + if (auto *F = dyn_cast(V)) { + // Check whether we want to clone an available_externally definition. + if (!LMResources.StubsToClone.count(F)) + return cloneFunctionDecl(*M, *F); + + // Ok - we want an inlinable stub. For that to work we need a decl + // for the stub pointer. + auto *StubPtr = createImplPointer(*F->getType(), *M, + F->getName() + "$stub_ptr", nullptr); + auto *ClonedF = cloneFunctionDecl(*M, *F); + makeStub(*ClonedF, *StubPtr); + ClonedF->setLinkage(GlobalValue::AvailableExternallyLinkage); + ClonedF->addFnAttr(Attribute::AlwaysInline); + return ClonedF; + } + + if (auto *A = dyn_cast(V)) { + auto *Ty = A->getValueType(); + if (Ty->isFunctionTy()) + return Function::Create(cast(Ty), + GlobalValue::ExternalLinkage, A->getName(), + M.get()); + + return new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, + nullptr, A->getName(), nullptr, + GlobalValue::NotThreadLocal, + A->getType()->getAddressSpace()); + } + + return nullptr; + }); // Create decls in the new module. - for (auto *F : Partition) + for (auto *F : Part) cloneFunctionDecl(*M, *F, &VMap); // Move the function bodies. - for (auto *F : Partition) - moveFunctionBody(*F, VMap, &GDM); + for (auto *F : Part) + moveFunctionBody(*F, VMap, &Materializer); // Create memory manager and symbol resolver. auto MemMgr = llvm::make_unique(); @@ -342,7 +466,10 @@ private: } BaseLayerT &BaseLayer; + PartitioningFtor Partition; CompileCallbackMgrT &CompileCallbackMgr; + IndirectStubsManagerBuilderT CreateIndirectStubsManager; + LogicalDylibList LogicalDylibs; bool CloneStubsIntoPartitions; }; diff --git a/include/llvm/ExecutionEngine/Orc/CompileUtils.h b/include/llvm/ExecutionEngine/Orc/CompileUtils.h index 49a1fbadb295..1e7d211196f5 100644 --- a/include/llvm/ExecutionEngine/Orc/CompileUtils.h +++ b/include/llvm/ExecutionEngine/Orc/CompileUtils.h @@ -40,7 +40,6 @@ public: if (TM.addPassesToEmitMC(PM, Ctx, ObjStream)) llvm_unreachable("Target does not support MC emission."); PM.run(M); - ObjStream.flush(); std::unique_ptr ObjBuffer( new ObjectMemoryBuffer(std::move(ObjBufferSV))); ErrorOr> Obj = diff --git a/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h b/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h new file mode 100644 index 000000000000..9fa222c340f8 --- /dev/null +++ b/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h @@ -0,0 +1,108 @@ +//===---- GlobalMappingLayer.h - Run all IR through a functor ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Convenience layer for injecting symbols that will appear in calls to +// findSymbol. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H +#define LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H + +#include "JITSymbol.h" +#include + +namespace llvm { +namespace orc { + +/// @brief Global mapping layer. +/// +/// This layer overrides the findSymbol method to first search a local symbol +/// table that the client can define. It can be used to inject new symbol +/// mappings into the JIT. Beware, however: symbols within a single IR module or +/// object file will still resolve locally (via RuntimeDyld's symbol table) - +/// such internal references cannot be overriden via this layer. +template +class GlobalMappingLayer { +public: + /// @brief Handle to a set of added modules. + typedef typename BaseLayerT::ModuleSetHandleT ModuleSetHandleT; + + /// @brief Construct an GlobalMappingLayer with the given BaseLayer + GlobalMappingLayer(BaseLayerT &BaseLayer) : BaseLayer(BaseLayer) {} + + /// @brief Add the given module set to the JIT. + /// @return A handle for the added modules. + template + ModuleSetHandleT addModuleSet(ModuleSetT Ms, + MemoryManagerPtrT MemMgr, + SymbolResolverPtrT Resolver) { + return BaseLayer.addModuleSet(std::move(Ms), std::move(MemMgr), + std::move(Resolver)); + } + + /// @brief Remove the module set associated with the handle H. + void removeModuleSet(ModuleSetHandleT H) { BaseLayer.removeModuleSet(H); } + + /// @brief Manually set the address to return for the given symbol. + void setGlobalMapping(const std::string &Name, TargetAddress Addr) { + SymbolTable[Name] = Addr; + } + + /// @brief Remove the given symbol from the global mapping. + void eraseGlobalMapping(const std::string &Name) { + SymbolTable.erase(Name); + } + + /// @brief Search for the given named symbol. + /// + /// This method will first search the local symbol table, returning + /// any symbol found there. If the symbol is not found in the local + /// table then this call will be passed through to the base layer. + /// + /// @param Name The name of the symbol to search for. + /// @param ExportedSymbolsOnly If true, search only for exported symbols. + /// @return A handle for the given named symbol, if it exists. + JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) { + auto I = SymbolTable.find(Name); + if (I != SymbolTable.end()) + return JITSymbol(I->second, JITSymbolFlags::Exported); + return BaseLayer.findSymbol(Name, ExportedSymbolsOnly); + } + + /// @brief Get the address of the given symbol in the context of the set of + /// modules represented by the handle H. This call is forwarded to the + /// base layer's implementation. + /// @param H The handle for the module set to search in. + /// @param Name The name of the symbol to search for. + /// @param ExportedSymbolsOnly If true, search only for exported symbols. + /// @return A handle for the given named symbol, if it is found in the + /// given module set. + JITSymbol findSymbolIn(ModuleSetHandleT H, const std::string &Name, + bool ExportedSymbolsOnly) { + return BaseLayer.findSymbolIn(H, Name, ExportedSymbolsOnly); + } + + /// @brief Immediately emit and finalize the module set represented by the + /// given handle. + /// @param H Handle for module set to emit/finalize. + void emitAndFinalize(ModuleSetHandleT H) { + BaseLayer.emitAndFinalize(H); + } + +private: + BaseLayerT &BaseLayer; + std::map SymbolTable; +}; + +} // End namespace orc. +} // End namespace llvm. + +#endif // LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H diff --git a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h index 637902200786..e4bed95fdabf 100644 --- a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h +++ b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h @@ -85,8 +85,6 @@ public: ModuleSetHandleT H = BaseLayer.addObjectSet(Objects, std::move(MemMgr), std::move(Resolver)); - BaseLayer.takeOwnershipOfBuffers(H, std::move(Buffers)); - return H; } diff --git a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h index 4b7fc5e84b9c..d6ee3a846b04 100644 --- a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h +++ b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h @@ -27,9 +27,8 @@ namespace llvm { namespace orc { -/// @brief Base class for JITLayer independent aspects of -/// JITCompileCallbackManager. -class JITCompileCallbackManagerBase { +/// @brief Target-independent base class for compile callback management. +class JITCompileCallbackManager { public: typedef std::function CompileFtor; @@ -51,18 +50,13 @@ public: CompileFtor &Compile; }; - /// @brief Construct a JITCompileCallbackManagerBase. + /// @brief Construct a JITCompileCallbackManager. /// @param ErrorHandlerAddress The address of an error handler in the target /// process to be used if a compile callback fails. - /// @param NumTrampolinesPerBlock Number of trampolines to emit if there is no - /// available trampoline when getCompileCallback is - /// called. - JITCompileCallbackManagerBase(TargetAddress ErrorHandlerAddress, - unsigned NumTrampolinesPerBlock) - : ErrorHandlerAddress(ErrorHandlerAddress), - NumTrampolinesPerBlock(NumTrampolinesPerBlock) {} + JITCompileCallbackManager(TargetAddress ErrorHandlerAddress) + : ErrorHandlerAddress(ErrorHandlerAddress) {} - virtual ~JITCompileCallbackManagerBase() {} + virtual ~JITCompileCallbackManager() {} /// @brief Execute the callback for the given trampoline id. Called by the JIT /// to compile functions on demand. @@ -90,7 +84,11 @@ public: } /// @brief Reserve a compile callback. - virtual CompileCallbackInfo getCompileCallback(LLVMContext &Context) = 0; + CompileCallbackInfo getCompileCallback() { + TargetAddress TrampolineAddr = getAvailableTrampolineAddr(); + auto &Compile = this->ActiveTrampolines[TrampolineAddr]; + return CompileCallbackInfo(TrampolineAddr, Compile); + } /// @brief Get a CompileCallbackInfo for an existing callback. CompileCallbackInfo getCompileCallbackInfo(TargetAddress TrampolineAddr) { @@ -113,77 +111,16 @@ public: protected: TargetAddress ErrorHandlerAddress; - unsigned NumTrampolinesPerBlock; typedef std::map TrampolineMapT; TrampolineMapT ActiveTrampolines; std::vector AvailableTrampolines; -}; - -/// @brief Manage compile callbacks. -template -class JITCompileCallbackManager : public JITCompileCallbackManagerBase { -public: - - /// @brief Construct a JITCompileCallbackManager. - /// @param JIT JIT layer to emit callback trampolines, etc. into. - /// @param Context LLVMContext to use for trampoline & resolve block modules. - /// @param ErrorHandlerAddress The address of an error handler in the target - /// process to be used if a compile callback fails. - /// @param NumTrampolinesPerBlock Number of trampolines to allocate whenever - /// there is no existing callback trampoline. - /// (Trampolines are allocated in blocks for - /// efficiency.) - JITCompileCallbackManager(JITLayerT &JIT, RuntimeDyld::MemoryManager &MemMgr, - LLVMContext &Context, - TargetAddress ErrorHandlerAddress, - unsigned NumTrampolinesPerBlock) - : JITCompileCallbackManagerBase(ErrorHandlerAddress, - NumTrampolinesPerBlock), - JIT(JIT), MemMgr(MemMgr) { - emitResolverBlock(Context); - } - - /// @brief Get/create a compile callback with the given signature. - CompileCallbackInfo getCompileCallback(LLVMContext &Context) final { - TargetAddress TrampolineAddr = getAvailableTrampolineAddr(Context); - auto &Compile = this->ActiveTrampolines[TrampolineAddr]; - return CompileCallbackInfo(TrampolineAddr, Compile); - } private: - std::vector> - SingletonSet(std::unique_ptr M) { - std::vector> Ms; - Ms.push_back(std::move(M)); - return Ms; - } - - void emitResolverBlock(LLVMContext &Context) { - std::unique_ptr M(new Module("resolver_block_module", - Context)); - TargetT::insertResolverBlock(*M, *this); - auto NonResolver = - createLambdaResolver( - [](const std::string &Name) -> RuntimeDyld::SymbolInfo { - llvm_unreachable("External symbols in resolver block?"); - }, - [](const std::string &Name) -> RuntimeDyld::SymbolInfo { - llvm_unreachable("Dylib symbols in resolver block?"); - }); - auto H = JIT.addModuleSet(SingletonSet(std::move(M)), &MemMgr, - std::move(NonResolver)); - JIT.emitAndFinalize(H); - auto ResolverBlockSymbol = - JIT.findSymbolIn(H, TargetT::ResolverBlockName, false); - assert(ResolverBlockSymbol && "Failed to insert resolver block"); - ResolverBlockAddr = ResolverBlockSymbol.getAddress(); - } - - TargetAddress getAvailableTrampolineAddr(LLVMContext &Context) { + TargetAddress getAvailableTrampolineAddr() { if (this->AvailableTrampolines.empty()) - grow(Context); + grow(); assert(!this->AvailableTrampolines.empty() && "Failed to grow available trampolines."); TargetAddress TrampolineAddr = this->AvailableTrampolines.back(); @@ -191,35 +128,212 @@ private: return TrampolineAddr; } - void grow(LLVMContext &Context) { - assert(this->AvailableTrampolines.empty() && "Growing prematurely?"); - std::unique_ptr M(new Module("trampoline_block", Context)); - auto GetLabelName = - TargetT::insertCompileCallbackTrampolines(*M, ResolverBlockAddr, - this->NumTrampolinesPerBlock, - this->ActiveTrampolines.size()); - auto NonResolver = - createLambdaResolver( - [](const std::string &Name) -> RuntimeDyld::SymbolInfo { - llvm_unreachable("External symbols in trampoline block?"); - }, - [](const std::string &Name) -> RuntimeDyld::SymbolInfo { - llvm_unreachable("Dylib symbols in trampoline block?"); - }); - auto H = JIT.addModuleSet(SingletonSet(std::move(M)), &MemMgr, - std::move(NonResolver)); - JIT.emitAndFinalize(H); - for (unsigned I = 0; I < this->NumTrampolinesPerBlock; ++I) { - std::string Name = GetLabelName(I); - auto TrampolineSymbol = JIT.findSymbolIn(H, Name, false); - assert(TrampolineSymbol && "Failed to emit trampoline."); - this->AvailableTrampolines.push_back(TrampolineSymbol.getAddress()); - } + // Create new trampolines - to be implemented in subclasses. + virtual void grow() = 0; + + virtual void anchor(); +}; + +/// @brief Manage compile callbacks for in-process JITs. +template +class LocalJITCompileCallbackManager : public JITCompileCallbackManager { +public: + + /// @brief Construct a InProcessJITCompileCallbackManager. + /// @param ErrorHandlerAddress The address of an error handler in the target + /// process to be used if a compile callback fails. + LocalJITCompileCallbackManager(TargetAddress ErrorHandlerAddress) + : JITCompileCallbackManager(ErrorHandlerAddress) { + + /// Set up the resolver block. + std::error_code EC; + ResolverBlock = + sys::OwningMemoryBlock( + sys::Memory::allocateMappedMemory(TargetT::ResolverCodeSize, nullptr, + sys::Memory::MF_READ | + sys::Memory::MF_WRITE, EC)); + assert(!EC && "Failed to allocate resolver block"); + + TargetT::writeResolverCode(static_cast(ResolverBlock.base()), + &reenter, this); + + EC = sys::Memory::protectMappedMemory(ResolverBlock.getMemoryBlock(), + sys::Memory::MF_READ | + sys::Memory::MF_EXEC); + assert(!EC && "Failed to mprotect resolver block"); } - JITLayerT &JIT; - RuntimeDyld::MemoryManager &MemMgr; - TargetAddress ResolverBlockAddr; +private: + + static TargetAddress reenter(void *CCMgr, void *TrampolineId) { + JITCompileCallbackManager *Mgr = + static_cast(CCMgr); + return Mgr->executeCompileCallback( + static_cast( + reinterpret_cast(TrampolineId))); + } + + void grow() override { + assert(this->AvailableTrampolines.empty() && "Growing prematurely?"); + + std::error_code EC; + auto TrampolineBlock = + sys::OwningMemoryBlock( + sys::Memory::allocateMappedMemory(TargetT::PageSize, nullptr, + sys::Memory::MF_READ | + sys::Memory::MF_WRITE, EC)); + assert(!EC && "Failed to allocate trampoline block"); + + + unsigned NumTrampolines = + (TargetT::PageSize - TargetT::PointerSize) / TargetT::TrampolineSize; + + uint8_t *TrampolineMem = static_cast(TrampolineBlock.base()); + TargetT::writeTrampolines(TrampolineMem, ResolverBlock.base(), + NumTrampolines); + + for (unsigned I = 0; I < NumTrampolines; ++I) + this->AvailableTrampolines.push_back( + static_cast(reinterpret_cast( + TrampolineMem + (I * TargetT::TrampolineSize)))); + + EC = sys::Memory::protectMappedMemory(TrampolineBlock.getMemoryBlock(), + sys::Memory::MF_READ | + sys::Memory::MF_EXEC); + assert(!EC && "Failed to mprotect trampoline block"); + + TrampolineBlocks.push_back(std::move(TrampolineBlock)); + } + + sys::OwningMemoryBlock ResolverBlock; + std::vector TrampolineBlocks; +}; + +/// @brief Base class for managing collections of named indirect stubs. +class IndirectStubsManager { +public: + + /// @brief Map type for initializing the manager. See init. + typedef StringMap> StubInitsMap; + + virtual ~IndirectStubsManager() {} + + /// @brief Create a single stub with the given name, target address and flags. + virtual std::error_code createStub(StringRef StubName, TargetAddress StubAddr, + JITSymbolFlags StubFlags) = 0; + + /// @brief Create StubInits.size() stubs with the given names, target + /// addresses, and flags. + virtual std::error_code createStubs(const StubInitsMap &StubInits) = 0; + + /// @brief Find the stub with the given name. If ExportedStubsOnly is true, + /// this will only return a result if the stub's flags indicate that it + /// is exported. + virtual JITSymbol findStub(StringRef Name, bool ExportedStubsOnly) = 0; + + /// @brief Find the implementation-pointer for the stub. + virtual JITSymbol findPointer(StringRef Name) = 0; + + /// @brief Change the value of the implementation pointer for the stub. + virtual std::error_code updatePointer(StringRef Name, TargetAddress NewAddr) = 0; +private: + virtual void anchor(); +}; + +/// @brief IndirectStubsManager implementation for a concrete target, e.g. +/// OrcX86_64. (See OrcTargetSupport.h). +template +class LocalIndirectStubsManager : public IndirectStubsManager { +public: + + std::error_code createStub(StringRef StubName, TargetAddress StubAddr, + JITSymbolFlags StubFlags) override { + if (auto EC = reserveStubs(1)) + return EC; + + createStubInternal(StubName, StubAddr, StubFlags); + + return std::error_code(); + } + + std::error_code createStubs(const StubInitsMap &StubInits) override { + if (auto EC = reserveStubs(StubInits.size())) + return EC; + + for (auto &Entry : StubInits) + createStubInternal(Entry.first(), Entry.second.first, + Entry.second.second); + + return std::error_code(); + } + + JITSymbol findStub(StringRef Name, bool ExportedStubsOnly) override { + auto I = StubIndexes.find(Name); + if (I == StubIndexes.end()) + return nullptr; + auto Key = I->second.first; + void *StubAddr = IndirectStubsInfos[Key.first].getStub(Key.second); + assert(StubAddr && "Missing stub address"); + auto StubTargetAddr = + static_cast(reinterpret_cast(StubAddr)); + auto StubSymbol = JITSymbol(StubTargetAddr, I->second.second); + if (ExportedStubsOnly && !StubSymbol.isExported()) + return nullptr; + return StubSymbol; + } + + JITSymbol findPointer(StringRef Name) override { + auto I = StubIndexes.find(Name); + if (I == StubIndexes.end()) + return nullptr; + auto Key = I->second.first; + void *PtrAddr = IndirectStubsInfos[Key.first].getPtr(Key.second); + assert(PtrAddr && "Missing pointer address"); + auto PtrTargetAddr = + static_cast(reinterpret_cast(PtrAddr)); + return JITSymbol(PtrTargetAddr, I->second.second); + } + + std::error_code updatePointer(StringRef Name, TargetAddress NewAddr) override { + auto I = StubIndexes.find(Name); + assert(I != StubIndexes.end() && "No stub pointer for symbol"); + auto Key = I->second.first; + *IndirectStubsInfos[Key.first].getPtr(Key.second) = + reinterpret_cast(static_cast(NewAddr)); + return std::error_code(); + } + +private: + + std::error_code reserveStubs(unsigned NumStubs) { + if (NumStubs <= FreeStubs.size()) + return std::error_code(); + + unsigned NewStubsRequired = NumStubs - FreeStubs.size(); + unsigned NewBlockId = IndirectStubsInfos.size(); + typename TargetT::IndirectStubsInfo ISI; + if (auto EC = TargetT::emitIndirectStubsBlock(ISI, NewStubsRequired, + nullptr)) + return EC; + for (unsigned I = 0; I < ISI.getNumStubs(); ++I) + FreeStubs.push_back(std::make_pair(NewBlockId, I)); + IndirectStubsInfos.push_back(std::move(ISI)); + return std::error_code(); + } + + void createStubInternal(StringRef StubName, TargetAddress InitAddr, + JITSymbolFlags StubFlags) { + auto Key = FreeStubs.back(); + FreeStubs.pop_back(); + *IndirectStubsInfos[Key.first].getPtr(Key.second) = + reinterpret_cast(static_cast(InitAddr)); + StubIndexes[StubName] = std::make_pair(Key, StubFlags); + } + + std::vector IndirectStubsInfos; + typedef std::pair StubKey; + std::vector FreeStubs; + StringMap> StubIndexes; }; /// @brief Build a function pointer of FunctionType with the given constant @@ -236,7 +350,7 @@ GlobalVariable* createImplPointer(PointerType &PT, Module &M, /// @brief Turn a function declaration into a stub function that makes an /// indirect call using the given function pointer. -void makeStub(Function &F, GlobalVariable &ImplPointer); +void makeStub(Function &F, Value &ImplPointer); /// @brief Raise linkage types and rename as necessary to ensure that all /// symbols are accessible for other modules. @@ -289,6 +403,10 @@ void moveGlobalVariableInitializer(GlobalVariable &OrigGV, ValueMaterializer *Materializer = nullptr, GlobalVariable *NewGV = nullptr); +/// @brief Clone +GlobalAlias* cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA, + ValueToValueMapTy &VMap); + } // End namespace orc. } // End namespace llvm. diff --git a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h index 93ba02b38706..a5286ff9adde 100644 --- a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h @@ -67,10 +67,10 @@ private: } else return nullptr; case Emitting: - // Calling "emit" can trigger external symbol lookup (e.g. to check for - // pre-existing definitions of common-symbol), but it will never find in - // this module that it would not have found already, so return null from - // here. + // Calling "emit" can trigger a recursive call to 'find' (e.g. to check + // for pre-existing definitions of common-symbol), but any symbol in + // this module would already have been found internally (in the + // RuntimeDyld that did the lookup), so just return a nullptr here. return nullptr; case Emitted: return B.findSymbolIn(Handle, Name, ExportedSymbolsOnly); diff --git a/include/llvm/ExecutionEngine/Orc/LogicalDylib.h b/include/llvm/ExecutionEngine/Orc/LogicalDylib.h index 28700ef347d6..883fa9eac560 100644 --- a/include/llvm/ExecutionEngine/Orc/LogicalDylib.h +++ b/include/llvm/ExecutionEngine/Orc/LogicalDylib.h @@ -14,6 +14,10 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_LOGICALDYLIB_H #define LLVM_EXECUTIONENGINE_ORC_LOGICALDYLIB_H +#include "llvm/ExecutionEngine/Orc/JITSymbol.h" +#include +#include + namespace llvm { namespace orc { @@ -28,6 +32,12 @@ private: typedef std::vector BaseLayerHandleList; struct LogicalModule { + // Make this move-only to ensure they don't get duplicated across moves of + // LogicalDylib or anything like that. + LogicalModule(LogicalModule &&RHS) + : Resources(std::move(RHS.Resources)), + BaseLayerHandles(std::move(RHS.BaseLayerHandles)) {} + LogicalModule() = default; LogicalModuleResources Resources; BaseLayerHandleList BaseLayerHandles; }; @@ -46,6 +56,13 @@ public: BaseLayer.removeModuleSet(BLH); } + // If possible, remove this and ~LogicalDylib once the work in the dtor is + // moved to members (eg: self-unregistering base layer handles). + LogicalDylib(LogicalDylib &&RHS) + : BaseLayer(std::move(RHS.BaseLayer)), + LogicalModules(std::move(RHS.LogicalModules)), + DylibResources(std::move(RHS.DylibResources)) {} + LogicalModuleHandle createLogicalModule() { LogicalModules.push_back(LogicalModule()); return std::prev(LogicalModules.end()); @@ -69,22 +86,27 @@ public: } JITSymbol findSymbolInLogicalModule(LogicalModuleHandle LMH, - const std::string &Name) { + const std::string &Name, + bool ExportedSymbolsOnly) { + + if (auto StubSym = LMH->Resources.findSymbol(Name, ExportedSymbolsOnly)) + return StubSym; + for (auto BLH : LMH->BaseLayerHandles) - if (auto Symbol = BaseLayer.findSymbolIn(BLH, Name, false)) + if (auto Symbol = BaseLayer.findSymbolIn(BLH, Name, ExportedSymbolsOnly)) return Symbol; return nullptr; } JITSymbol findSymbolInternally(LogicalModuleHandle LMH, const std::string &Name) { - if (auto Symbol = findSymbolInLogicalModule(LMH, Name)) + if (auto Symbol = findSymbolInLogicalModule(LMH, Name, false)) return Symbol; for (auto LMI = LogicalModules.begin(), LME = LogicalModules.end(); LMI != LME; ++LMI) { if (LMI != LMH) - if (auto Symbol = findSymbolInLogicalModule(LMI, Name)) + if (auto Symbol = findSymbolInLogicalModule(LMI, Name, false)) return Symbol; } @@ -92,11 +114,10 @@ public: } JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) { - for (auto &LM : LogicalModules) - for (auto BLH : LM.BaseLayerHandles) - if (auto Symbol = - BaseLayer.findSymbolIn(BLH, Name, ExportedSymbolsOnly)) - return Symbol; + for (auto LMI = LogicalModules.begin(), LME = LogicalModules.end(); + LMI != LME; ++LMI) + if (auto Sym = findSymbolInLogicalModule(LMI, Name, ExportedSymbolsOnly)) + return Sym; return nullptr; } @@ -106,7 +127,6 @@ protected: BaseLayerT BaseLayer; LogicalModuleList LogicalModules; LogicalDylibResources DylibResources; - }; } // End namespace orc. diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index f3094dafae3c..2acfecfb94dc 100644 --- a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -39,9 +39,12 @@ protected: void operator=(const LinkedObjectSet&) = delete; public: LinkedObjectSet(RuntimeDyld::MemoryManager &MemMgr, - RuntimeDyld::SymbolResolver &Resolver) + RuntimeDyld::SymbolResolver &Resolver, + bool ProcessAllSections) : RTDyld(llvm::make_unique(MemMgr, Resolver)), - State(Raw) {} + State(Raw) { + RTDyld->setProcessAllSections(ProcessAllSections); + } virtual ~LinkedObjectSet() {} @@ -64,18 +67,9 @@ protected: RTDyld->mapSectionAddress(LocalAddress, TargetAddr); } - void takeOwnershipOfBuffer(std::unique_ptr B) { - OwnedBuffers.push_back(std::move(B)); - } - protected: std::unique_ptr RTDyld; enum { Raw, Finalizing, Finalized } State; - - // FIXME: This ownership hack only exists because RuntimeDyldELF still - // wants to be able to inspect the original object when resolving - // relocations. As soon as that can be fixed this should be removed. - std::vector> OwnedBuffers; }; typedef std::list> LinkedObjectSetListT; @@ -83,16 +77,6 @@ protected: public: /// @brief Handle to a set of loaded objects. typedef LinkedObjectSetListT::iterator ObjSetHandleT; - - // Ownership hack. - // FIXME: Remove this as soon as RuntimeDyldELF can apply relocations without - // referencing the original object. - template - void takeOwnershipOfBuffers(ObjSetHandleT H, OwningMBSet MBs) { - for (auto &MB : MBs) - (*H)->takeOwnershipOfBuffer(std::move(MB)); - } - }; /// @brief Default (no-op) action to perform when loading objects. @@ -117,16 +101,16 @@ private: class ConcreteLinkedObjectSet : public LinkedObjectSet { public: ConcreteLinkedObjectSet(MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) - : LinkedObjectSet(*MemMgr, *Resolver), MemMgr(std::move(MemMgr)), - Resolver(std::move(Resolver)) { } + SymbolResolverPtrT Resolver, + bool ProcessAllSections) + : LinkedObjectSet(*MemMgr, *Resolver, ProcessAllSections), + MemMgr(std::move(MemMgr)), Resolver(std::move(Resolver)) { } void Finalize() override { State = Finalizing; RTDyld->resolveRelocations(); RTDyld->registerEHFrames(); MemMgr->finalizeMemory(); - OwnedBuffers.clear(); State = Finalized; } @@ -137,9 +121,11 @@ private: template std::unique_ptr - createLinkedObjectSet(MemoryManagerPtrT MemMgr, SymbolResolverPtrT Resolver) { + createLinkedObjectSet(MemoryManagerPtrT MemMgr, SymbolResolverPtrT Resolver, + bool ProcessAllSections) { typedef ConcreteLinkedObjectSet LOS; - return llvm::make_unique(std::move(MemMgr), std::move(Resolver)); + return llvm::make_unique(std::move(MemMgr), std::move(Resolver), + ProcessAllSections); } public: @@ -158,7 +144,18 @@ public: NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(), NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor()) : NotifyLoaded(std::move(NotifyLoaded)), - NotifyFinalized(std::move(NotifyFinalized)) {} + NotifyFinalized(std::move(NotifyFinalized)), + ProcessAllSections(false) {} + + /// @brief Set the 'ProcessAllSections' flag. + /// + /// If set to true, all sections in each object file will be allocated using + /// the memory manager, rather than just the sections required for execution. + /// + /// This is kludgy, and may be removed in the future. + void setProcessAllSections(bool ProcessAllSections) { + this->ProcessAllSections = ProcessAllSections; + } /// @brief Add a set of objects (or archives) that will be treated as a unit /// for the purposes of symbol lookup and memory management. @@ -180,7 +177,8 @@ public: ObjSetHandleT Handle = LinkedObjSetList.insert( LinkedObjSetList.end(), - createLinkedObjectSet(std::move(MemMgr), std::move(Resolver))); + createLinkedObjectSet(std::move(MemMgr), std::move(Resolver), + ProcessAllSections)); LinkedObjectSet &LOS = **Handle; LoadedObjInfoList LoadedObjInfos; @@ -276,6 +274,7 @@ private: LinkedObjectSetListT LinkedObjSetList; NotifyLoadedFtor NotifyLoaded; NotifyFinalizedFtor NotifyFinalized; + bool ProcessAllSections; }; } // End namespace orc. diff --git a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h index 7af662085474..f96e83ed5a1a 100644 --- a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h +++ b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h @@ -87,14 +87,6 @@ public: BaseLayer.mapSectionAddress(H, LocalAddress, TargetAddr); } - // Ownership hack. - // FIXME: Remove this as soon as RuntimeDyldELF can apply relocations without - // referencing the original object. - template - void takeOwnershipOfBuffers(ObjSetHandleT H, OwningMBSet MBs) { - BaseLayer.takeOwnershipOfBuffers(H, std::move(MBs)); - } - /// @brief Access the transform functor directly. TransformFtor &getTransform() { return Transform; } diff --git a/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h b/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h index 309f5a96090e..246d3e0a9fc6 100644 --- a/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h +++ b/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h @@ -9,42 +9,92 @@ // // Target specific code for Orc, e.g. callback assembly. // +// Target classes should be part of the JIT *target* process, not the host +// process (except where you're doing hosted JITing and the two are one and the +// same). +// //===----------------------------------------------------------------------===// #ifndef LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H #define LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H #include "IndirectionUtils.h" +#include "llvm/Support/Memory.h" namespace llvm { namespace orc { class OrcX86_64 { public: - static const char *ResolverBlockName; + static const unsigned PageSize = 4096; + static const unsigned PointerSize = 8; + static const unsigned TrampolineSize = 8; + static const unsigned ResolverCodeSize = 0x78; - /// @brief Insert module-level inline callback asm into module M for the - /// symbols managed by JITResolveCallbackHandler J. - static void insertResolverBlock(Module &M, - JITCompileCallbackManagerBase &JCBM); + typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, + void *TrampolineId); - /// @brief Get a label name from the given index. - typedef std::function LabelNameFtor; + /// @brief Write the resolver code into the given memory. The user is be + /// responsible for allocating the memory and setting permissions. + static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry, + void *CallbackMgr); - /// @brief Insert the requested number of trampolines into the given module. - /// @param M Module to insert the call block into. - /// @param NumCalls Number of calls to create in the call block. - /// @param StartIndex Optional argument specifying the index suffix to start - /// with. - /// @return A functor that provides the symbol name for each entry in the call - /// block. + /// @brief Write the requsted number of trampolines into the given memory, + /// which must be big enough to hold 1 pointer, plus NumTrampolines + /// trampolines. + static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr, + unsigned NumTrampolines); + + /// @brief Provide information about stub blocks generated by the + /// makeIndirectStubsBlock function. + class IndirectStubsInfo { + friend class OrcX86_64; + public: + const static unsigned StubSize = 8; + const static unsigned PtrSize = 8; + + IndirectStubsInfo() : NumStubs(0) {} + IndirectStubsInfo(IndirectStubsInfo &&Other) + : NumStubs(Other.NumStubs), StubsMem(std::move(Other.StubsMem)) { + Other.NumStubs = 0; + } + IndirectStubsInfo& operator=(IndirectStubsInfo &&Other) { + NumStubs = Other.NumStubs; + Other.NumStubs = 0; + StubsMem = std::move(Other.StubsMem); + return *this; + } + + /// @brief Number of stubs in this block. + unsigned getNumStubs() const { return NumStubs; } + + /// @brief Get a pointer to the stub at the given index, which must be in + /// the range 0 .. getNumStubs() - 1. + void* getStub(unsigned Idx) const { + return static_cast(StubsMem.base()) + Idx; + } + + /// @brief Get a pointer to the implementation-pointer at the given index, + /// which must be in the range 0 .. getNumStubs() - 1. + void** getPtr(unsigned Idx) const { + char *PtrsBase = + static_cast(StubsMem.base()) + NumStubs * StubSize; + return reinterpret_cast(PtrsBase) + Idx; + } + private: + unsigned NumStubs; + sys::OwningMemoryBlock StubsMem; + }; + + /// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to + /// the nearest page size. /// - static LabelNameFtor insertCompileCallbackTrampolines( - Module &M, - TargetAddress TrampolineAddr, - unsigned NumCalls, - unsigned StartIndex = 0); - + /// E.g. Asking for 4 stubs on x86-64, where stubs are 8-bytes, with 4k + /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513 + /// will return a block of 1024 (2-pages worth). + static std::error_code emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, + unsigned MinStubs, + void *InitialPtrVal); }; } // End namespace orc. diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h index a808d9231167..385b8d0a30b1 100644 --- a/include/llvm/ExecutionEngine/RuntimeDyld.h +++ b/include/llvm/ExecutionEngine/RuntimeDyld.h @@ -17,8 +17,10 @@ #include "JITSymbolFlags.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/Memory.h" #include "llvm/DebugInfo/DIContext.h" +#include #include namespace llvm { @@ -59,26 +61,33 @@ public: class LoadedObjectInfo : public llvm::LoadedObjectInfo { friend class RuntimeDyldImpl; public: - LoadedObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx, - unsigned EndIdx) - : RTDyld(RTDyld), BeginIdx(BeginIdx), EndIdx(EndIdx) { } + typedef std::map ObjSectionToIDMap; + + LoadedObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap) + : RTDyld(RTDyld), ObjSecToIDMap(ObjSecToIDMap) { } virtual object::OwningBinary getObjectForDebug(const object::ObjectFile &Obj) const = 0; - uint64_t getSectionLoadAddress(StringRef Name) const; + uint64_t + getSectionLoadAddress(const object::SectionRef &Sec) const override; protected: virtual void anchor(); RuntimeDyldImpl &RTDyld; - unsigned BeginIdx, EndIdx; + ObjSectionToIDMap ObjSecToIDMap; }; template struct LoadedObjectInfoHelper : LoadedObjectInfo { - LoadedObjectInfoHelper(RuntimeDyldImpl &RTDyld, unsigned BeginIdx, - unsigned EndIdx) - : LoadedObjectInfo(RTDyld, BeginIdx, EndIdx) {} + protected: + LoadedObjectInfoHelper(const LoadedObjectInfoHelper &) = default; + LoadedObjectInfoHelper() = default; + + public: + LoadedObjectInfoHelper(RuntimeDyldImpl &RTDyld, + LoadedObjectInfo::ObjSectionToIDMap ObjSecToIDMap) + : LoadedObjectInfo(RTDyld, std::move(ObjSecToIDMap)) {} std::unique_ptr clone() const override { return llvm::make_unique(static_cast(*this)); } @@ -87,7 +96,7 @@ public: /// \brief Memory Management. class MemoryManager { public: - virtual ~MemoryManager() {}; + virtual ~MemoryManager() {} /// Allocate a memory block of (at least) the given size suitable for /// executable code. The SectionID is a unique identifier assigned by the @@ -149,7 +158,7 @@ public: /// \brief Symbol resolution. class SymbolResolver { public: - virtual ~SymbolResolver() {}; + virtual ~SymbolResolver() {} /// This method returns the address of the specified function or variable. /// It is used to resolve symbols during module linking. @@ -244,4 +253,4 @@ private: } // end namespace llvm -#endif +#endif // LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H diff --git a/include/llvm/ExecutionEngine/SectionMemoryManager.h b/include/llvm/ExecutionEngine/SectionMemoryManager.h index 0b0dcb021f14..7bb96eb8b71b 100644 --- a/include/llvm/ExecutionEngine/SectionMemoryManager.h +++ b/include/llvm/ExecutionEngine/SectionMemoryManager.h @@ -83,10 +83,28 @@ public: virtual void invalidateInstructionCache(); private: + struct FreeMemBlock { + // The actual block of free memory + sys::MemoryBlock Free; + // If there is a pending allocation from the same reservation right before + // this block, store it's index in PendingMem, to be able to update the + // pending region if part of this block is allocated, rather than having to + // create a new one + unsigned PendingPrefixIndex; + }; + struct MemoryGroup { - SmallVector AllocatedMem; - SmallVector FreeMem; - sys::MemoryBlock Near; + // PendingMem contains all blocks of memory (subblocks of AllocatedMem) + // which have not yet had their permissions applied, but have been given + // out to the user. FreeMem contains all block of memory, which have + // neither had their permissions applied, nor been given out to the user. + SmallVector PendingMem; + SmallVector FreeMem; + + // All memory blocks that have been requested from the system + SmallVector AllocatedMem; + + sys::MemoryBlock Near; }; uint8_t *allocateSection(MemoryGroup &MemGroup, uintptr_t Size, @@ -103,4 +121,3 @@ private: } #endif // LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H - diff --git a/include/llvm/IR/Argument.h b/include/llvm/IR/Argument.h index fc04fe71cbf0..0092f49e49ad 100644 --- a/include/llvm/IR/Argument.h +++ b/include/llvm/IR/Argument.h @@ -21,8 +21,7 @@ namespace llvm { -template - class SymbolTableListTraits; +template class SymbolTableListTraits; /// \brief LLVM Argument representation /// @@ -36,7 +35,7 @@ class Argument : public Value, public ilist_node { virtual void anchor(); Function *Parent; - friend class SymbolTableListTraits; + friend class SymbolTableListTraits; void setParent(Function *parent); public: @@ -64,8 +63,8 @@ public: /// containing function, return the number of bytes known to be /// dereferenceable. Otherwise, zero is returned. uint64_t getDereferenceableBytes() const; - - /// \brief If this argument has the dereferenceable_or_null attribute on + + /// \brief If this argument has the dereferenceable_or_null attribute on /// it in its containing function, return the number of bytes known to be /// dereferenceable. Otherwise, zero is returned. uint64_t getDereferenceableOrNullBytes() const; diff --git a/include/llvm/IR/AssemblyAnnotationWriter.h b/include/llvm/IR/AssemblyAnnotationWriter.h index 19e32a2dcdcc..6e1f5c43e12e 100644 --- a/include/llvm/IR/AssemblyAnnotationWriter.h +++ b/include/llvm/IR/AssemblyAnnotationWriter.h @@ -27,7 +27,6 @@ class formatted_raw_ostream; class AssemblyAnnotationWriter { public: - virtual ~AssemblyAnnotationWriter(); /// emitFunctionAnnot - This may be implemented to emit a string right before @@ -50,7 +49,7 @@ public: /// emitInstructionAnnot - This may be implemented to emit a string right /// before an instruction is emitted. - virtual void emitInstructionAnnot(const Instruction *, + virtual void emitInstructionAnnot(const Instruction *, formatted_raw_ostream &) {} /// printInfoComment - This may be implemented to emit a comment to the diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h index 4d6d7da1fa5b..0e3373165407 100644 --- a/include/llvm/IR/Attributes.h +++ b/include/llvm/IR/Attributes.h @@ -33,6 +33,7 @@ class AttributeSetImpl; class AttributeSetNode; class Constant; template struct DenseMapInfo; +class Function; class LLVMContext; class Type; @@ -64,65 +65,15 @@ public: enum AttrKind { // IR-Level Attributes None, ///< No attributes have been set - Alignment, ///< Alignment of parameter (5 bits) - ///< stored as log2 of alignment with +1 bias - ///< 0 means unaligned (different from align(1)) - AlwaysInline, ///< inline=always - Builtin, ///< Callee is recognized as a builtin, despite - ///< nobuiltin attribute on its declaration. - ByVal, ///< Pass structure by value - InAlloca, ///< Pass structure in an alloca - Cold, ///< Marks function as being in a cold path. - Convergent, ///< Can only be moved to control-equivalent blocks - InlineHint, ///< Source said inlining was desirable - InReg, ///< Force argument to be passed in register - JumpTable, ///< Build jump-instruction tables and replace refs. - MinSize, ///< Function must be optimized for size first - Naked, ///< Naked function - Nest, ///< Nested function static chain - NoAlias, ///< Considered to not alias after call - NoBuiltin, ///< Callee isn't recognized as a builtin - NoCapture, ///< Function creates no aliases of pointer - NoDuplicate, ///< Call cannot be duplicated - NoImplicitFloat, ///< Disable implicit floating point insts - NoInline, ///< inline=never - NonLazyBind, ///< Function is called early and/or - ///< often, so lazy binding isn't worthwhile - NonNull, ///< Pointer is known to be not null - Dereferenceable, ///< Pointer is known to be dereferenceable - DereferenceableOrNull, ///< Pointer is either null or dereferenceable - NoRedZone, ///< Disable redzone - NoReturn, ///< Mark the function as not returning - NoUnwind, ///< Function doesn't unwind stack - OptimizeForSize, ///< opt_size - OptimizeNone, ///< Function must not be optimized. - ReadNone, ///< Function does not access memory - ReadOnly, ///< Function only reads from memory - ArgMemOnly, ///< Funciton can access memory only using pointers - ///< based on its arguments. - Returned, ///< Return value is always equal to this argument - ReturnsTwice, ///< Function can return twice - SExt, ///< Sign extended before/after call - StackAlignment, ///< Alignment of stack for function (3 bits) - ///< stored as log2 of alignment with +1 bias 0 - ///< means unaligned (different from - ///< alignstack=(1)) - StackProtect, ///< Stack protection. - StackProtectReq, ///< Stack protection required. - StackProtectStrong, ///< Strong Stack protection. - SafeStack, ///< Safe Stack protection. - StructRet, ///< Hidden pointer to structure to return - SanitizeAddress, ///< AddressSanitizer is on. - SanitizeThread, ///< ThreadSanitizer is on. - SanitizeMemory, ///< MemorySanitizer is on. - UWTable, ///< Function must be in a unwind table - ZExt, ///< Zero extended before/after call - + #define GET_ATTR_ENUM + #include "llvm/IR/Attributes.inc" EndAttrKinds ///< Sentinal value useful for loops }; + private: AttributeImpl *pImpl; Attribute(AttributeImpl *A) : pImpl(A) {} + public: Attribute() : pImpl(nullptr) {} @@ -189,11 +140,11 @@ public: unsigned getStackAlignment() const; /// \brief Returns the number of dereferenceable bytes from the - /// dereferenceable attribute (or zero if unknown). + /// dereferenceable attribute. uint64_t getDereferenceableBytes() const; /// \brief Returns the number of dereferenceable_or_null bytes from the - /// dereferenceable_or_null attribute (or zero if unknown). + /// dereferenceable_or_null attribute. uint64_t getDereferenceableOrNullBytes() const; /// \brief The Attribute is converted to a string of equivalent mnemonic. This @@ -226,6 +177,7 @@ public: ReturnIndex = 0U, FunctionIndex = ~0U }; + private: friend class AttrBuilder; friend class AttributeSetImpl; @@ -249,8 +201,8 @@ private: ArrayRef > Attrs); - explicit AttributeSet(AttributeSetImpl *LI) : pImpl(LI) {} + public: AttributeSet() : pImpl(nullptr) {} @@ -276,6 +228,11 @@ public: AttributeSet addAttribute(LLVMContext &C, unsigned Index, StringRef Kind, StringRef Value) const; + /// Add an attribute to the attribute set at the given indices. Because + /// attribute sets are immutable, this returns a new set. + AttributeSet addAttribute(LLVMContext &C, ArrayRef Indices, + Attribute A) const; + /// \brief Add attributes to the attribute set at the given index. Because /// attribute sets are immutable, this returns a new set. AttributeSet addAttributes(LLVMContext &C, unsigned Index, @@ -284,13 +241,13 @@ public: /// \brief Remove the specified attribute at the specified index from this /// attribute list. Because attribute lists are immutable, this returns the /// new list. - AttributeSet removeAttribute(LLVMContext &C, unsigned Index, + AttributeSet removeAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Attr) const; /// \brief Remove the specified attributes at the specified index from this /// attribute list. Because attribute lists are immutable, this returns the /// new list. - AttributeSet removeAttributes(LLVMContext &C, unsigned Index, + AttributeSet removeAttributes(LLVMContext &C, unsigned Index, AttributeSet Attrs) const; /// \brief Remove the specified attributes at the specified index from this @@ -439,6 +396,7 @@ class AttrBuilder { uint64_t StackAlignment; uint64_t DerefBytes; uint64_t DerefOrNullBytes; + public: AttrBuilder() : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0), @@ -511,8 +469,8 @@ public: /// \brief Retrieve the stack alignment attribute, if it exists. uint64_t getStackAlignment() const { return StackAlignment; } - /// \brief Retrieve the number of dereferenceable bytes, if the dereferenceable - /// attribute exists (zero is returned otherwise). + /// \brief Retrieve the number of dereferenceable bytes, if the + /// dereferenceable attribute exists (zero is returned otherwise). uint64_t getDereferenceableBytes() const { return DerefBytes; } /// \brief Retrieve the number of dereferenceable_or_null bytes, if the @@ -573,7 +531,14 @@ public: namespace AttributeFuncs { /// \brief Which attributes cannot be applied to a type. -AttrBuilder typeIncompatible(const Type *Ty); +AttrBuilder typeIncompatible(Type *Ty); + +/// \returns Return true if the two functions have compatible target-independent +/// attributes for inlining purposes. +bool areInlineCompatible(const Function &Caller, const Function &Callee); + +/// \brief Merge caller's and callee's attributes. +void mergeAttributesForInlining(Function &Caller, const Function &Callee); } // end AttributeFuncs namespace diff --git a/include/llvm/IR/Attributes.td b/include/llvm/IR/Attributes.td new file mode 100644 index 000000000000..797cd55427b3 --- /dev/null +++ b/include/llvm/IR/Attributes.td @@ -0,0 +1,192 @@ +/// Attribute base class. +class Attr { + // String representation of this attribute in the IR. + string AttrString = S; +} + +/// Enum attribute. +class EnumAttr : Attr; + +/// StringBool attribute. +class StrBoolAttr : Attr; + +/// Target-independent enum attributes. + +/// Alignment of parameter (5 bits) stored as log2 of alignment with +1 bias. +/// 0 means unaligned (different from align(1)). +def Alignment : EnumAttr<"align">; + +/// inline=always. +def AlwaysInline : EnumAttr<"alwaysinline">; + +/// Function can access memory only using pointers based on its arguments. +def ArgMemOnly : EnumAttr<"argmemonly">; + +/// Callee is recognized as a builtin, despite nobuiltin attribute on its +/// declaration. +def Builtin : EnumAttr<"builtin">; + +/// Pass structure by value. +def ByVal : EnumAttr<"byval">; + +/// Marks function as being in a cold path. +def Cold : EnumAttr<"cold">; + +/// Can only be moved to control-equivalent blocks. +def Convergent : EnumAttr<"convergent">; + +/// Pointer is known to be dereferenceable. +def Dereferenceable : EnumAttr<"dereferenceable">; + +/// Pointer is either null or dereferenceable. +def DereferenceableOrNull : EnumAttr<"dereferenceable_or_null">; + +/// Function may only access memory that is inaccessible from IR. +def InaccessibleMemOnly : EnumAttr<"inaccessiblememonly">; + +/// Function may only access memory that is either inaccessible from the IR, +/// or pointed to by its pointer arguments. +def InaccessibleMemOrArgMemOnly : EnumAttr<"inaccessiblemem_or_argmemonly">; + +/// Pass structure in an alloca. +def InAlloca : EnumAttr<"inalloca">; + +/// Source said inlining was desirable. +def InlineHint : EnumAttr<"inlinehint">; + +/// Force argument to be passed in register. +def InReg : EnumAttr<"inreg">; + +/// Build jump-instruction tables and replace refs. +def JumpTable : EnumAttr<"jumptable">; + +/// Function must be optimized for size first. +def MinSize : EnumAttr<"minsize">; + +/// Naked function. +def Naked : EnumAttr<"naked">; + +/// Nested function static chain. +def Nest : EnumAttr<"nest">; + +/// Considered to not alias after call. +def NoAlias : EnumAttr<"noalias">; + +/// Callee isn't recognized as a builtin. +def NoBuiltin : EnumAttr<"nobuiltin">; + +/// Function creates no aliases of pointer. +def NoCapture : EnumAttr<"nocapture">; + +/// Call cannot be duplicated. +def NoDuplicate : EnumAttr<"noduplicate">; + +/// Disable implicit floating point insts. +def NoImplicitFloat : EnumAttr<"noimplicitfloat">; + +/// inline=never. +def NoInline : EnumAttr<"noinline">; + +/// Function is called early and/or often, so lazy binding isn't worthwhile. +def NonLazyBind : EnumAttr<"nonlazybind">; + +/// Pointer is known to be not null. +def NonNull : EnumAttr<"nonnull">; + +/// The function does not recurse. +def NoRecurse : EnumAttr<"norecurse">; + +/// Disable redzone. +def NoRedZone : EnumAttr<"noredzone">; + +/// Mark the function as not returning. +def NoReturn : EnumAttr<"noreturn">; + +/// Function doesn't unwind stack. +def NoUnwind : EnumAttr<"nounwind">; + +/// opt_size. +def OptimizeForSize : EnumAttr<"optsize">; + +/// Function must not be optimized. +def OptimizeNone : EnumAttr<"optnone">; + +/// Function does not access memory. +def ReadNone : EnumAttr<"readnone">; + +/// Function only reads from memory. +def ReadOnly : EnumAttr<"readonly">; + +/// Return value is always equal to this argument. +def Returned : EnumAttr<"returned">; + +/// Function can return twice. +def ReturnsTwice : EnumAttr<"returns_twice">; + +/// Safe Stack protection. +def SafeStack : EnumAttr<"safestack">; + +/// Sign extended before/after call. +def SExt : EnumAttr<"signext">; + +/// Alignment of stack for function (3 bits) stored as log2 of alignment with +/// +1 bias 0 means unaligned (different from alignstack=(1)). +def StackAlignment : EnumAttr<"alignstack">; + +/// Stack protection. +def StackProtect : EnumAttr<"ssp">; + +/// Stack protection required. +def StackProtectReq : EnumAttr<"sspreq">; + +/// Strong Stack protection. +def StackProtectStrong : EnumAttr<"sspstrong">; + +/// Hidden pointer to structure to return. +def StructRet : EnumAttr<"sret">; + +/// AddressSanitizer is on. +def SanitizeAddress : EnumAttr<"sanitize_address">; + +/// ThreadSanitizer is on. +def SanitizeThread : EnumAttr<"sanitize_thread">; + +/// MemorySanitizer is on. +def SanitizeMemory : EnumAttr<"sanitize_memory">; + +/// Function must be in a unwind table. +def UWTable : EnumAttr<"uwtable">; + +/// Zero extended before/after call. +def ZExt : EnumAttr<"zeroext">; + +/// Target-independent string attributes. +def LessPreciseFPMAD : StrBoolAttr<"less-precise-fpmad">; +def NoInfsFPMath : StrBoolAttr<"no-infs-fp-math">; +def NoNansFPMath : StrBoolAttr<"no-nans-fp-math">; +def UnsafeFPMath : StrBoolAttr<"unsafe-fp-math">; + +class CompatRule { + // The name of the function called to check the attribute of the caller and + // callee and decide whether inlining should be allowed. The function's + // signature must match "bool(const Function&, const Function &)", where the + // first parameter is the reference to the caller and the second parameter is + // the reference to the callee. It must return false if the attributes of the + // caller and callee are incompatible, and true otherwise. + string CompatFunc = F; +} + +def : CompatRule<"isEqual">; +def : CompatRule<"isEqual">; +def : CompatRule<"isEqual">; + +class MergeRule { + // The name of the function called to merge the attributes of the caller and + // callee. The function's signature must match + // "void(Function&, const Function &)", where the first parameter is the + // reference to the caller and the second parameter is the reference to the + // callee. + string MergeFunc = F; +} + +def : MergeRule<"adjustCallerSSPLevel">; diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h index 66581bfedbe6..c6b54d308ce6 100644 --- a/include/llvm/IR/BasicBlock.h +++ b/include/llvm/IR/BasicBlock.h @@ -30,22 +30,9 @@ class LLVMContext; class BlockAddress; class Function; -// Traits for intrusive list of basic blocks... -template<> struct ilist_traits - : public SymbolTableListTraits { - - BasicBlock *createSentinel() const; - static void destroySentinel(BasicBlock*) {} - - BasicBlock *provideInitialHead() const { return createSentinel(); } - BasicBlock *ensureHead(BasicBlock*) const { return createSentinel(); } - static void noteHead(BasicBlock*, BasicBlock*) {} - - static ValueSymbolTable *getSymTab(Function *ItemParent); -private: - mutable ilist_half_node Sentinel; -}; - +template <> +struct SymbolTableListSentinelTraits + : public ilist_half_embedded_sentinel_traits {}; /// \brief LLVM Basic Block Representation /// @@ -63,16 +50,17 @@ private: /// modifying a program. However, the verifier will ensure that basic blocks /// are "well formed". class BasicBlock : public Value, // Basic blocks are data objects also - public ilist_node { + public ilist_node_with_parent { friend class BlockAddress; public: - typedef iplist InstListType; + typedef SymbolTableList InstListType; + private: InstListType InstList; Function *Parent; void setParent(Function *parent); - friend class SymbolTableListTraits; + friend class SymbolTableListTraits; BasicBlock(const BasicBlock &) = delete; void operator=(const BasicBlock &) = delete; @@ -171,7 +159,7 @@ public: /// \brief Unlink 'this' from the containing function and delete it. /// // \returns an iterator pointing to the element after the erased one. - iplist::iterator eraseFromParent(); + SymbolTableList::iterator eraseFromParent(); /// \brief Unlink this basic block from its current function and insert it /// into the function that \p MovePos lives in, right before \p MovePos. @@ -253,7 +241,7 @@ public: InstListType &getInstList() { return InstList; } /// \brief Returns a pointer to a member of the instruction list. - static iplist BasicBlock::*getSublistAccess(Instruction*) { + static InstListType BasicBlock::*getSublistAccess(Instruction*) { return &BasicBlock::InstList; } @@ -283,6 +271,8 @@ public: /// should be called while the predecessor still refers to this block. void removePredecessor(BasicBlock *Pred, bool DontDeleteUselessPHIs = false); + bool canSplitPredecessors() const; + /// \brief Split the basic block into two basic blocks at the specified /// instruction. /// @@ -300,6 +290,9 @@ public: /// Also note that this doesn't preserve any passes. To split blocks while /// keeping loop information consistent, use the SplitBlock utility function. BasicBlock *splitBasicBlock(iterator I, const Twine &BBName = ""); + BasicBlock *splitBasicBlock(Instruction *I, const Twine &BBName = "") { + return splitBasicBlock(I->getIterator(), BBName); + } /// \brief Returns true if there are any uses of this basic block other than /// direct branches, switches, etc. to it. @@ -309,6 +302,9 @@ public: /// basic block \p New instead of to it. void replaceSuccessorsPhiUsesWith(BasicBlock *New); + /// \brief Return true if this basic block is an exception handling block. + bool isEHPad() const { return getFirstNonPHI()->isEHPad(); } + /// \brief Return true if this basic block is a landing pad. /// /// Being a ``landing pad'' means that the basic block is the destination of @@ -337,12 +333,6 @@ private: } }; -// createSentinel is used to get hold of the node that marks the end of the -// list... (same trick used here as in ilist_traits) -inline BasicBlock *ilist_traits::createSentinel() const { - return static_cast(&Sentinel); -} - // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(BasicBlock, LLVMBasicBlockRef) diff --git a/include/llvm/IR/CFG.h b/include/llvm/IR/CFG.h index f78220a52033..e9bf09333a23 100644 --- a/include/llvm/IR/CFG.h +++ b/include/llvm/IR/CFG.h @@ -107,149 +107,13 @@ inline pred_const_range predecessors(const BasicBlock *BB) { } //===----------------------------------------------------------------------===// -// BasicBlock succ_iterator definition +// BasicBlock succ_iterator helpers //===----------------------------------------------------------------------===// -template // Successor Iterator -class SuccIterator : public std::iterator { - typedef std::iterator - super; - -public: - typedef typename super::pointer pointer; - typedef typename super::reference reference; - -private: - Term_ Term; - unsigned idx; - typedef SuccIterator Self; - - inline bool index_is_valid(int idx) { - return idx >= 0 && (unsigned) idx < Term->getNumSuccessors(); - } - - /// \brief Proxy object to allow write access in operator[] - class SuccessorProxy { - Self it; - - public: - explicit SuccessorProxy(const Self &it) : it(it) {} - - SuccessorProxy(const SuccessorProxy&) = default; - - SuccessorProxy &operator=(SuccessorProxy r) { - *this = reference(r); - return *this; - } - - SuccessorProxy &operator=(reference r) { - it.Term->setSuccessor(it.idx, r); - return *this; - } - - operator reference() const { return *it; } - }; - -public: - explicit inline SuccIterator(Term_ T) : Term(T), idx(0) {// begin iterator - } - inline SuccIterator(Term_ T, bool) // end iterator - : Term(T) { - if (Term) - idx = Term->getNumSuccessors(); - else - // Term == NULL happens, if a basic block is not fully constructed and - // consequently getTerminator() returns NULL. In this case we construct a - // SuccIterator which describes a basic block that has zero successors. - // Defining SuccIterator for incomplete and malformed CFGs is especially - // useful for debugging. - idx = 0; - } - - /// getSuccessorIndex - This is used to interface between code that wants to - /// operate on terminator instructions directly. - unsigned getSuccessorIndex() const { return idx; } - - inline bool operator==(const Self& x) const { return idx == x.idx; } - inline bool operator!=(const Self& x) const { return !operator==(x); } - - inline reference operator*() const { return Term->getSuccessor(idx); } - inline pointer operator->() const { return operator*(); } - - inline Self& operator++() { ++idx; return *this; } // Preincrement - - inline Self operator++(int) { // Postincrement - Self tmp = *this; ++*this; return tmp; - } - - inline Self& operator--() { --idx; return *this; } // Predecrement - inline Self operator--(int) { // Postdecrement - Self tmp = *this; --*this; return tmp; - } - - inline bool operator<(const Self& x) const { - assert(Term == x.Term && "Cannot compare iterators of different blocks!"); - return idx < x.idx; - } - - inline bool operator<=(const Self& x) const { - assert(Term == x.Term && "Cannot compare iterators of different blocks!"); - return idx <= x.idx; - } - inline bool operator>=(const Self& x) const { - assert(Term == x.Term && "Cannot compare iterators of different blocks!"); - return idx >= x.idx; - } - - inline bool operator>(const Self& x) const { - assert(Term == x.Term && "Cannot compare iterators of different blocks!"); - return idx > x.idx; - } - - inline Self& operator+=(int Right) { - unsigned new_idx = idx + Right; - assert(index_is_valid(new_idx) && "Iterator index out of bound"); - idx = new_idx; - return *this; - } - - inline Self operator+(int Right) const { - Self tmp = *this; - tmp += Right; - return tmp; - } - - inline Self& operator-=(int Right) { - return operator+=(-Right); - } - - inline Self operator-(int Right) const { - return operator+(-Right); - } - - inline int operator-(const Self& x) const { - assert(Term == x.Term && "Cannot work on iterators of different blocks!"); - int distance = idx - x.idx; - return distance; - } - - inline SuccessorProxy operator[](int offset) { - Self tmp = *this; - tmp += offset; - return SuccessorProxy(tmp); - } - - /// Get the source BB of this iterator. - inline BB_ *getSource() { - assert(Term && "Source not available, if basic block was malformed"); - return Term->getParent(); - } -}; - -typedef SuccIterator succ_iterator; -typedef SuccIterator succ_const_iterator; +typedef TerminatorInst::SuccIterator + succ_iterator; +typedef TerminatorInst::SuccIterator + succ_const_iterator; typedef llvm::iterator_range succ_range; typedef llvm::iterator_range succ_const_range; @@ -275,8 +139,8 @@ inline succ_const_range successors(const BasicBlock *BB) { return succ_const_range(succ_begin(BB), succ_end(BB)); } - -template struct isPodLike > { +template +struct isPodLike> { static const bool value = isPodLike::value; }; diff --git a/include/llvm/IR/CMakeLists.txt b/include/llvm/IR/CMakeLists.txt index dd8e04f1510c..eade87e05bc9 100644 --- a/include/llvm/IR/CMakeLists.txt +++ b/include/llvm/IR/CMakeLists.txt @@ -1,5 +1,6 @@ +set(LLVM_TARGET_DEFINITIONS Attributes.td) +tablegen(LLVM Attributes.inc -gen-attrs) + set(LLVM_TARGET_DEFINITIONS Intrinsics.td) - tablegen(LLVM Intrinsics.gen -gen-intrinsic) - add_public_tablegen_target(intrinsics_gen) diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h index 2841781e8a9e..f4b8a8a5a1c9 100644 --- a/include/llvm/IR/CallSite.h +++ b/include/llvm/IR/CallSite.h @@ -41,6 +41,7 @@ template getUser()); + return arg_begin() <= U && U < arg_end(); + } + + /// \brief Determine whether the passed iterator points to a bundle operand. + bool isBundleOperand(Value::const_user_iterator UI) const { + return isBundleOperand(&UI.getUse()); + } + + /// \brief Determine whether the passed use points to a bundle operand. + bool isBundleOperand(const Use *U) const { + assert(getInstruction() == U->getUser()); + if (!hasOperandBundles()) + return false; + unsigned OperandNo = U - (*this)->op_begin(); + return getBundleOperandsStartIndex() <= OperandNo && + OperandNo < getBundleOperandsEndIndex(); + } + + /// \brief Determine whether the passed iterator points to a data operand. + bool isDataOperand(Value::const_user_iterator UI) const { + return isDataOperand(&UI.getUse()); + } + + /// \brief Determine whether the passed use points to a data operand. + bool isDataOperand(const Use *U) const { + return data_operands_begin() <= U && U < data_operands_end(); + } + ValTy *getArgument(unsigned ArgNo) const { assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!"); return *(arg_begin() + ArgNo); @@ -137,8 +176,7 @@ public: /// it. unsigned getArgumentNo(const Use *U) const { assert(getInstruction() && "Not a call or invoke instruction!"); - assert(arg_begin() <= U && U < arg_end() - && "Argument # out of range!"); + assert(isArgOperand(U) && "Argument # out of range!"); return U - arg_begin(); } @@ -146,21 +184,55 @@ public: /// arguments at this call site. typedef IterTy arg_iterator; - /// arg_begin/arg_end - Return iterators corresponding to the actual argument - /// list for a call site. - IterTy arg_begin() const { - assert(getInstruction() && "Not a call or invoke instruction!"); - // Skip non-arguments - return (*this)->op_begin(); - } - - IterTy arg_end() const { return (*this)->op_end() - getArgumentEndOffset(); } iterator_range args() const { - return iterator_range(arg_begin(), arg_end()); + return make_range(arg_begin(), arg_end()); } bool arg_empty() const { return arg_end() == arg_begin(); } unsigned arg_size() const { return unsigned(arg_end() - arg_begin()); } + /// Given a value use iterator, returns the data operand that corresponds to + /// it. + /// Iterator must actually correspond to a data operand. + unsigned getDataOperandNo(Value::const_user_iterator UI) const { + return getDataOperandNo(&UI.getUse()); + } + + /// Given a use for a data operand, get the data operand number that + /// corresponds to it. + unsigned getDataOperandNo(const Use *U) const { + assert(getInstruction() && "Not a call or invoke instruction!"); + assert(isDataOperand(U) && "Data operand # out of range!"); + return U - data_operands_begin(); + } + + /// Type of iterator to use when looping over data operands at this call site + /// (see below). + typedef IterTy data_operand_iterator; + + /// data_operands_begin/data_operands_end - Return iterators iterating over + /// the call / invoke argument list and bundle operands. For invokes, this is + /// the set of instruction operands except the invoke target and the two + /// successor blocks; and for calls this is the set of instruction operands + /// except the call target. + + IterTy data_operands_begin() const { + assert(getInstruction() && "Not a call or invoke instruction!"); + return (*this)->op_begin(); + } + IterTy data_operands_end() const { + assert(getInstruction() && "Not a call or invoke instruction!"); + return (*this)->op_end() - (isCall() ? 1 : 3); + } + iterator_range data_ops() const { + return make_range(data_operands_begin(), data_operands_end()); + } + bool data_operands_empty() const { + return data_operands_end() == data_operands_begin(); + } + unsigned data_operands_size() const { + return std::distance(data_operands_begin(), data_operands_end()); + } + /// getType - Return the type of the instruction that generated this call site /// Type *getType() const { return (*this)->getType(); } @@ -197,11 +269,11 @@ public: CALLSITE_DELEGATE_GETTER(getNumArgOperands()); } - ValTy *getArgOperand(unsigned i) const { + ValTy *getArgOperand(unsigned i) const { CALLSITE_DELEGATE_GETTER(getArgOperand(i)); } - bool isInlineAsm() const { + bool isInlineAsm() const { if (isCall()) return cast(getInstruction())->isInlineAsm(); return false; @@ -243,6 +315,17 @@ public: CALLSITE_DELEGATE_GETTER(paramHasAttr(i, A)); } + /// \brief Return true if the data operand at index \p i directly or + /// indirectly has the attribute \p A. + /// + /// Normal call or invoke arguments have per operand attributes, as specified + /// in the attribute set attached to this instruction, while operand bundle + /// operands may have some attributes implied by the type of its containing + /// operand bundle. + bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind A) const { + CALLSITE_DELEGATE_GETTER(dataOperandHasImpliedAttr(i, A)); + } + /// @brief Extract the alignment for a call or parameter (0=unknown). uint16_t getParamAlignment(uint16_t i) const { CALLSITE_DELEGATE_GETTER(getParamAlignment(i)); @@ -253,13 +336,20 @@ public: uint64_t getDereferenceableBytes(uint16_t i) const { CALLSITE_DELEGATE_GETTER(getDereferenceableBytes(i)); } - + /// @brief Extract the number of dereferenceable_or_null bytes for a call or /// parameter (0=unknown). uint64_t getDereferenceableOrNullBytes(uint16_t i) const { CALLSITE_DELEGATE_GETTER(getDereferenceableOrNullBytes(i)); } - + + /// @brief Determine if the parameter or return value is marked with NoAlias + /// attribute. + /// @param n The parameter to check. 1 is the first parameter, 0 is the return + bool doesNotAlias(unsigned n) const { + CALLSITE_DELEGATE_GETTER(doesNotAlias(n)); + } + /// \brief Return true if the call should not be treated as a call to a /// builtin. bool isNoBuiltin() const { @@ -315,12 +405,62 @@ public: CALLSITE_DELEGATE_SETTER(setDoesNotThrow()); } + unsigned getNumOperandBundles() const { + CALLSITE_DELEGATE_GETTER(getNumOperandBundles()); + } + + bool hasOperandBundles() const { + CALLSITE_DELEGATE_GETTER(hasOperandBundles()); + } + + unsigned getBundleOperandsStartIndex() const { + CALLSITE_DELEGATE_GETTER(getBundleOperandsStartIndex()); + } + + unsigned getBundleOperandsEndIndex() const { + CALLSITE_DELEGATE_GETTER(getBundleOperandsEndIndex()); + } + + unsigned getNumTotalBundleOperands() const { + CALLSITE_DELEGATE_GETTER(getNumTotalBundleOperands()); + } + + OperandBundleUse getOperandBundleAt(unsigned Index) const { + CALLSITE_DELEGATE_GETTER(getOperandBundleAt(Index)); + } + + Optional getOperandBundle(StringRef Name) const { + CALLSITE_DELEGATE_GETTER(getOperandBundle(Name)); + } + + Optional getOperandBundle(uint32_t ID) const { + CALLSITE_DELEGATE_GETTER(getOperandBundle(ID)); + } + + IterTy arg_begin() const { + CALLSITE_DELEGATE_GETTER(arg_begin()); + } + + IterTy arg_end() const { + CALLSITE_DELEGATE_GETTER(arg_end()); + } + #undef CALLSITE_DELEGATE_GETTER #undef CALLSITE_DELEGATE_SETTER - /// @brief Determine whether this argument is not captured. - bool doesNotCapture(unsigned ArgNo) const { - return paramHasAttr(ArgNo + 1, Attribute::NoCapture); + void getOperandBundlesAsDefs(SmallVectorImpl &Defs) const { + const Instruction *II = getInstruction(); + // Since this is actually a getter that "looks like" a setter, don't use the + // above macros to avoid confusion. + if (isCall()) + cast(II)->getOperandBundlesAsDefs(Defs); + else + cast(II)->getOperandBundlesAsDefs(Defs); + } + + /// @brief Determine whether this data operand is not captured. + bool doesNotCapture(unsigned OpNo) const { + return dataOperandHasImpliedAttr(OpNo + 1, Attribute::NoCapture); } /// @brief Determine whether this argument is passed by value. @@ -345,13 +485,13 @@ public: return paramHasAttr(arg_size(), Attribute::InAlloca); } - bool doesNotAccessMemory(unsigned ArgNo) const { - return paramHasAttr(ArgNo + 1, Attribute::ReadNone); + bool doesNotAccessMemory(unsigned OpNo) const { + return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone); } - bool onlyReadsMemory(unsigned ArgNo) const { - return paramHasAttr(ArgNo + 1, Attribute::ReadOnly) || - paramHasAttr(ArgNo + 1, Attribute::ReadNone); + bool onlyReadsMemory(unsigned OpNo) const { + return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadOnly) || + dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone); } /// @brief Return true if the return value is known to be not null. @@ -378,13 +518,6 @@ public: } private: - unsigned getArgumentEndOffset() const { - if (isCall()) - return 1; // Skip Callee - else - return 3; // Skip BB, BB, Callee - } - IterTy getCallee() const { if (isCall()) // Skip Callee return cast(getInstruction())->op_end() - 1; @@ -393,7 +526,7 @@ private: } }; -class CallSite : public CallSiteBase { public: diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h index 9872e6ec794d..bc050928266e 100644 --- a/include/llvm/IR/CallingConv.h +++ b/include/llvm/IR/CallingConv.h @@ -69,6 +69,12 @@ namespace CallingConv { // (almost) all registers. PreserveAll = 15, + // Swift - Calling convention for Swift. + Swift = 16, + + // CXX_FAST_TLS - Calling convention for access functions. + CXX_FAST_TLS = 17, + // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. FirstTargetCC = 64, @@ -144,7 +150,26 @@ namespace CallingConv { /// \brief MSVC calling convention that passes vectors and vector aggregates /// in SSE registers. - X86_VectorCall = 80 + X86_VectorCall = 80, + + /// \brief Calling convention used by HipHop Virtual Machine (HHVM) to + /// perform calls to and from translation cache, and for calling PHP + /// functions. + /// HHVM calling convention supports tail/sibling call elimination. + HHVM = 81, + + /// \brief HHVM calling convention for invoking C/C++ helpers. + HHVM_C = 82, + + /// X86_INTR - x86 hardware interrupt context. Callee may take one or two + /// parameters, where the 1st represents a pointer to hardware context frame + /// and the 2nd represents hardware error code, the presence of the later + /// depends on the interrupt vector taken. Valid for both 32- and 64-bit + /// subtargets. + X86_INTR = 83, + + /// The highest possible calling convention ID. Must be some 2^k - 1. + MaxID = 1023 }; } // End CallingConv namespace diff --git a/include/llvm/IR/Comdat.h b/include/llvm/IR/Comdat.h index 4d4c15fb68cd..fb79e13af3a5 100644 --- a/include/llvm/IR/Comdat.h +++ b/include/llvm/IR/Comdat.h @@ -42,7 +42,7 @@ public: SelectionKind getSelectionKind() const { return SK; } void setSelectionKind(SelectionKind Val) { SK = Val; } StringRef getName() const; - void print(raw_ostream &OS) const; + void print(raw_ostream &OS, bool IsForDebug = false) const; void dump() const; private: diff --git a/include/llvm/IR/Constant.h b/include/llvm/IR/Constant.h index 019b4343a133..bb88905aa57a 100644 --- a/include/llvm/IR/Constant.h +++ b/include/llvm/IR/Constant.h @@ -24,18 +24,18 @@ namespace llvm { /// This is an important base class in LLVM. It provides the common facilities /// of all constant values in an LLVM program. A constant is a value that is /// immutable at runtime. Functions are constants because their address is -/// immutable. Same with global variables. -/// +/// immutable. Same with global variables. +/// /// All constants share the capabilities provided in this class. All constants /// can have a null value. They can have an operand list. Constants can be /// simple (integer and floating point values), complex (arrays and structures), -/// or expression based (computations yielding a constant value composed of +/// or expression based (computations yielding a constant value composed of /// only certain operators and other constant values). -/// -/// Note that Constants are immutable (once created they never change) -/// and are fully shared by structural equivalence. This means that two -/// structurally equivalent constants will always have the same address. -/// Constants are created on demand as needed and never deleted: thus clients +/// +/// Note that Constants are immutable (once created they never change) +/// and are fully shared by structural equivalence. This means that two +/// structurally equivalent constants will always have the same address. +/// Constants are created on demand as needed and never deleted: thus clients /// don't have to worry about the lifetime of the objects. /// @brief LLVM Constant Representation class Constant : public User { @@ -59,7 +59,7 @@ public: /// getAllOnesValue. bool isAllOnesValue() const; - /// isNegativeZeroValue - Return true if the value is what would be returned + /// isNegativeZeroValue - Return true if the value is what would be returned /// by getZeroValueForNegation. bool isNegativeZeroValue() const; @@ -85,29 +85,14 @@ public: /// isConstantUsed - Return true if the constant has users other than constant /// exprs and other dangling things. bool isConstantUsed() const; - - enum PossibleRelocationsTy { - NoRelocation = 0, - LocalRelocation = 1, - GlobalRelocations = 2 - }; - - /// getRelocationInfo - This method classifies the entry according to - /// whether or not it may generate a relocation entry. This must be - /// conservative, so if it might codegen to a relocatable entry, it should say - /// so. The return values are: - /// - /// NoRelocation: This constant pool entry is guaranteed to never have a - /// relocation applied to it (because it holds a simple constant like - /// '4'). - /// LocalRelocation: This entry has relocations, but the entries are - /// guaranteed to be resolvable by the static linker, so the dynamic - /// linker will never see them. - /// GlobalRelocations: This entry may have arbitrary relocations. + + /// This method classifies the entry according to whether or not it may + /// generate a relocation entry. This must be conservative, so if it might + /// codegen to a relocatable entry, it should say so. /// - /// FIXME: This really should not be in VMCore. - PossibleRelocationsTy getRelocationInfo() const; - + /// FIXME: This really should not be in IR. + bool needsRelocation() const; + /// getAggregateElement - For aggregates (struct/array/vector) return the /// constant that corresponds to the specified element if possible, or null if /// not. This can return null if the element index is a ConstantExpr, or if @@ -159,8 +144,8 @@ public: /// getIntegerValue - Return the value for an integer or pointer constant, /// or a vector thereof, with the given scalar value. - static Constant *getIntegerValue(Type* Ty, const APInt &V); - + static Constant *getIntegerValue(Type *Ty, const APInt &V); + /// removeDeadConstantUsers - If there are any dead constant users dangling /// off of this constant, remove them. This method is useful for clients /// that want to check to see if a global is unused, but don't want to deal diff --git a/include/llvm/IR/ConstantRange.h b/include/llvm/IR/ConstantRange.h index 9ded3ca36a70..fb596a3bf16e 100644 --- a/include/llvm/IR/ConstantRange.h +++ b/include/llvm/IR/ConstantRange.h @@ -82,6 +82,17 @@ public: static ConstantRange makeSatisfyingICmpRegion(CmpInst::Predicate Pred, const ConstantRange &Other); + /// Return the largest range containing all X such that "X BinOpC C" does not + /// wrap (overflow). + /// + /// Example: + /// typedef OverflowingBinaryOperator OBO; + /// makeNoWrapRegion(Add, i8 1, OBO::NoSignedWrap) == [-128, 127) + /// makeNoWrapRegion(Add, i8 1, OBO::NoUnsignedWrap) == [0, -1) + /// makeNoWrapRegion(Add, i8 0, OBO::NoUnsignedWrap) == Full Set + static ConstantRange makeNoWrapRegion(Instruction::BinaryOps BinOp, + const APInt &C, unsigned NoWrapKind); + /// Return the lower value for this range. /// const APInt &getLower() const { return Lower; } @@ -207,7 +218,7 @@ public: /// Make this range have the bit width given by \p BitWidth. The /// value is zero extended, truncated, or left alone to make it that width. ConstantRange zextOrTrunc(uint32_t BitWidth) const; - + /// Make this range have the bit width given by \p BitWidth. The /// value is sign extended, truncated, or left alone to make it that width. ConstantRange sextOrTrunc(uint32_t BitWidth) const; @@ -258,7 +269,7 @@ public: /// Return a new range that is the logical not of the current set. /// ConstantRange inverse() const; - + /// Print out the bounds to a stream. /// void print(raw_ostream &OS) const; diff --git a/include/llvm/IR/Constants.h b/include/llvm/IR/Constants.h index 0c7a84fc8bfe..a5a20c9c5701 100644 --- a/include/llvm/IR/Constants.h +++ b/include/llvm/IR/Constants.h @@ -590,7 +590,7 @@ public: /// formed with a vector or array of the specified element type. /// ConstantDataArray only works with normal float and int types that are /// stored densely in memory, not with things like i42 or x86_f80. - static bool isElementTypeCompatible(const Type *Ty); + static bool isElementTypeCompatible(Type *Ty); /// getElementAsInteger - If this is a sequential container of integers (of /// any size), return the specified element in the low bits of a uint64_t. @@ -795,7 +795,32 @@ public: } }; +//===----------------------------------------------------------------------===// +/// ConstantTokenNone - a constant token which is empty +/// +class ConstantTokenNone : public Constant { + void *operator new(size_t, unsigned) = delete; + ConstantTokenNone(const ConstantTokenNone &) = delete; + friend class Constant; + void destroyConstantImpl(); + Value *handleOperandChangeImpl(Value *From, Value *To, Use *U); + +protected: + explicit ConstantTokenNone(LLVMContext &Context) + : Constant(Type::getTokenTy(Context), ConstantTokenNoneVal, nullptr, 0) {} + // allocate space for exactly zero operands + void *operator new(size_t s) { return User::operator new(s, 0); } + +public: + /// Return the ConstantTokenNone. + static ConstantTokenNone *get(LLVMContext &Context); + + /// @brief Methods to support type inquiry through isa, cast, and dyn_cast. + static bool classof(const Value *V) { + return V->getValueID() == ConstantTokenNoneVal; + } +}; /// BlockAddress - The address of a basic block. /// @@ -1175,7 +1200,8 @@ public: /// gets constant-folded, the type changes, or the expression is otherwise /// canonicalized. This parameter should almost always be \c false. Constant *getWithOperands(ArrayRef Ops, Type *Ty, - bool OnlyIfReduced = false) const; + bool OnlyIfReduced = false, + Type *SrcTy = nullptr) const; /// getAsInstruction - Returns an Instruction which implements the same /// operation as this ConstantExpr. The instruction is not linked to any basic diff --git a/include/llvm/IR/DIBuilder.h b/include/llvm/IR/DIBuilder.h index aa43c02d5cd8..aeec39541154 100644 --- a/include/llvm/IR/DIBuilder.h +++ b/include/llvm/IR/DIBuilder.h @@ -158,7 +158,9 @@ namespace llvm { /// Create debugging information entry for a c++ /// style reference or rvalue reference type. - DIDerivedType *createReferenceType(unsigned Tag, DIType *RTy); + DIDerivedType *createReferenceType(unsigned Tag, DIType *RTy, + uint64_t SizeInBits = 0, + uint64_t AlignInBits = 0); /// Create debugging information entry for a typedef. /// \param Ty Original type. @@ -375,15 +377,20 @@ namespace llvm { DIType *UnderlyingType, StringRef UniqueIdentifier = ""); /// Create subroutine type. - /// \param File File in which this subroutine is defined. /// \param ParameterTypes An array of subroutine parameter types. This /// includes return type at 0th index. /// \param Flags E.g.: LValueReference. /// These flags are used to emit dwarf attributes. - DISubroutineType *createSubroutineType(DIFile *File, - DITypeRefArray ParameterTypes, + DISubroutineType *createSubroutineType(DITypeRefArray ParameterTypes, unsigned Flags = 0); + /// Create an external type reference. + /// \param Tag Dwarf TAG. + /// \param File File in which the type is defined. + /// \param UniqueIdentifier A unique identifier for the type. + DICompositeType *createExternalTypeRef(unsigned Tag, DIFile *File, + StringRef UniqueIdentifier); + /// Create a new DIType* with "artificial" flag set. DIType *createArtificialType(DIType *Ty); @@ -450,26 +457,36 @@ namespace llvm { unsigned LineNo, DIType *Ty, bool isLocalToUnit, llvm::Constant *Val, MDNode *Decl = nullptr); - /// Create a new descriptor for the specified - /// local variable. - /// \param Tag Dwarf TAG. Usually DW_TAG_auto_variable or - /// DW_TAG_arg_variable. - /// \param Scope Variable scope. - /// \param Name Variable name. - /// \param File File where this variable is defined. - /// \param LineNo Line number. - /// \param Ty Variable Type - /// \param AlwaysPreserve Boolean. Set to true if debug info for this - /// variable should be preserved in optimized build. - /// \param Flags Flags, e.g. artificial variable. - /// \param ArgNo If this variable is an argument then this argument's - /// number. 1 indicates 1st argument. - DILocalVariable *createLocalVariable(unsigned Tag, DIScope *Scope, - StringRef Name, DIFile *File, - unsigned LineNo, DIType *Ty, + /// Create a new descriptor for an auto variable. This is a local variable + /// that is not a subprogram parameter. + /// + /// \c Scope must be a \a DILocalScope, and thus its scope chain eventually + /// leads to a \a DISubprogram. + /// + /// If \c AlwaysPreserve, this variable will be referenced from its + /// containing subprogram, and will survive some optimizations. + DILocalVariable *createAutoVariable(DIScope *Scope, StringRef Name, + DIFile *File, unsigned LineNo, + DIType *Ty, bool AlwaysPreserve = false, - unsigned Flags = 0, - unsigned ArgNo = 0); + unsigned Flags = 0); + + /// Create a new descriptor for a parameter variable. + /// + /// \c Scope must be a \a DILocalScope, and thus its scope chain eventually + /// leads to a \a DISubprogram. + /// + /// \c ArgNo is the index (starting from \c 1) of this variable in the + /// subprogram parameters. \c ArgNo should not conflict with other + /// parameters of the same subprogram. + /// + /// If \c AlwaysPreserve, this variable will be referenced from its + /// containing subprogram, and will survive some optimizations. + DILocalVariable *createParameterVariable(DIScope *Scope, StringRef Name, + unsigned ArgNo, DIFile *File, + unsigned LineNo, DIType *Ty, + bool AlwaysPreserve = false, + unsigned Flags = 0); /// Create a new descriptor for the specified /// variable which has a complex address expression for its address. @@ -499,15 +516,15 @@ namespace llvm { /// \param Flags e.g. is this function prototyped or not. /// These flags are used to emit dwarf attributes. /// \param isOptimized True if optimization is ON. - /// \param Fn llvm::Function pointer. - /// \param TParam Function template parameters. - DISubprogram * - createFunction(DIScope *Scope, StringRef Name, StringRef LinkageName, - DIFile *File, unsigned LineNo, DISubroutineType *Ty, - bool isLocalToUnit, bool isDefinition, unsigned ScopeLine, - unsigned Flags = 0, bool isOptimized = false, - Function *Fn = nullptr, MDNode *TParam = nullptr, - MDNode *Decl = nullptr); + /// \param TParams Function template parameters. + DISubprogram *createFunction(DIScope *Scope, StringRef Name, + StringRef LinkageName, DIFile *File, + unsigned LineNo, DISubroutineType *Ty, + bool isLocalToUnit, bool isDefinition, + unsigned ScopeLine, unsigned Flags = 0, + bool isOptimized = false, + DITemplateParameterArray TParams = nullptr, + DISubprogram *Decl = nullptr); /// Identical to createFunction, /// except that the resulting DbgNode is meant to be RAUWed. @@ -515,18 +532,19 @@ namespace llvm { DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File, unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit, bool isDefinition, unsigned ScopeLine, unsigned Flags = 0, - bool isOptimized = false, Function *Fn = nullptr, - MDNode *TParam = nullptr, MDNode *Decl = nullptr); + bool isOptimized = false, DITemplateParameterArray TParams = nullptr, + DISubprogram *Decl = nullptr); /// FIXME: this is added for dragonegg. Once we update dragonegg /// to call resolve function, this will be removed. - DISubprogram * - createFunction(DIScopeRef Scope, StringRef Name, StringRef LinkageName, - DIFile *File, unsigned LineNo, DISubroutineType *Ty, - bool isLocalToUnit, bool isDefinition, unsigned ScopeLine, - unsigned Flags = 0, bool isOptimized = false, - Function *Fn = nullptr, MDNode *TParam = nullptr, - MDNode *Decl = nullptr); + DISubprogram *createFunction(DIScopeRef Scope, StringRef Name, + StringRef LinkageName, DIFile *File, + unsigned LineNo, DISubroutineType *Ty, + bool isLocalToUnit, bool isDefinition, + unsigned ScopeLine, unsigned Flags = 0, + bool isOptimized = false, + DITemplateParameterArray TParams = nullptr, + DISubprogram *Decl = nullptr); /// Create a new descriptor for the specified C++ method. /// See comments in \a DISubprogram* for descriptions of these fields. @@ -545,15 +563,14 @@ namespace llvm { /// \param Flags e.g. is this function prototyped or not. /// This flags are used to emit dwarf attributes. /// \param isOptimized True if optimization is ON. - /// \param Fn llvm::Function pointer. - /// \param TParam Function template parameters. + /// \param TParams Function template parameters. DISubprogram * createMethod(DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File, unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit, bool isDefinition, unsigned Virtuality = 0, unsigned VTableIndex = 0, DIType *VTableHolder = nullptr, unsigned Flags = 0, bool isOptimized = false, - Function *Fn = nullptr, MDNode *TParam = nullptr); + DITemplateParameterArray TParams = nullptr); /// This creates new descriptor for a namespace with the specified /// parent scope. @@ -685,7 +702,7 @@ namespace llvm { /// has a self-reference -- \a DIBuilder needs to track the array to /// resolve cycles. void replaceArrays(DICompositeType *&T, DINodeArray Elements, - DINodeArray TParems = DINodeArray()); + DINodeArray TParams = DINodeArray()); /// Replace a temporary node. /// diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h index 892d6c9936c0..19a3a6661feb 100644 --- a/include/llvm/IR/DataLayout.h +++ b/include/llvm/IR/DataLayout.h @@ -475,7 +475,8 @@ inline LLVMTargetDataRef wrap(const DataLayout *P) { class StructLayout { uint64_t StructSize; unsigned StructAlignment; - unsigned NumElements; + bool IsPadded : 1; + unsigned NumElements : 31; uint64_t MemberOffsets[1]; // variable sized array! public: uint64_t getSizeInBytes() const { return StructSize; } @@ -484,6 +485,10 @@ public: unsigned getAlignment() const { return StructAlignment; } + /// Returns whether the struct has padding or not between its fields. + /// NB: Padding in nested element is not taken into account. + bool hasPadding() const { return IsPadded; } + /// \brief Given a valid byte offset into the structure, returns the structure /// index that contains it. unsigned getElementContainingOffset(uint64_t Offset) const; diff --git a/include/llvm/IR/DebugInfo.h b/include/llvm/IR/DebugInfo.h index 5429648ade2c..4caceacbb58e 100644 --- a/include/llvm/IR/DebugInfo.h +++ b/include/llvm/IR/DebugInfo.h @@ -44,9 +44,6 @@ DISubprogram *getDISubprogram(const MDNode *Scope); /// \returns a valid subprogram, if found. Otherwise, return \c nullptr. DISubprogram *getDISubprogram(const Function *F); -/// \brief Find underlying composite type. -DICompositeTypeBase *getDICompositeType(DIType *T); - /// \brief Generate map by visiting all retained types. DITypeIdentifierMap generateDITypeIdentifierMap(const NamedMDNode *CU_Nodes); @@ -108,23 +105,23 @@ public: typedef SmallVectorImpl::const_iterator scope_iterator; iterator_range compile_units() const { - return iterator_range(CUs.begin(), CUs.end()); + return make_range(CUs.begin(), CUs.end()); } iterator_range subprograms() const { - return iterator_range(SPs.begin(), SPs.end()); + return make_range(SPs.begin(), SPs.end()); } iterator_range global_variables() const { - return iterator_range(GVs.begin(), GVs.end()); + return make_range(GVs.begin(), GVs.end()); } iterator_range types() const { - return iterator_range(TYs.begin(), TYs.end()); + return make_range(TYs.begin(), TYs.end()); } iterator_range scopes() const { - return iterator_range(Scopes.begin(), Scopes.end()); + return make_range(Scopes.begin(), Scopes.end()); } unsigned compile_unit_count() const { return CUs.size(); } @@ -146,8 +143,6 @@ private: bool TypeMapInitialized; }; -DenseMap makeSubprogramMap(const Module &M); - } // end namespace llvm #endif diff --git a/include/llvm/IR/DebugInfoFlags.def b/include/llvm/IR/DebugInfoFlags.def index d5de8683fd3b..9756c12264b4 100644 --- a/include/llvm/IR/DebugInfoFlags.def +++ b/include/llvm/IR/DebugInfoFlags.def @@ -32,5 +32,6 @@ HANDLE_DI_FLAG((1 << 11), Vector) HANDLE_DI_FLAG((1 << 12), StaticMember) HANDLE_DI_FLAG((1 << 13), LValueReference) HANDLE_DI_FLAG((1 << 14), RValueReference) +HANDLE_DI_FLAG((1 << 15), ExternalTypeRef) #undef HANDLE_DI_FLAG diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h index 9c5a95721d79..456313a70e83 100644 --- a/include/llvm/IR/DebugInfoMetadata.h +++ b/include/llvm/IR/DebugInfoMetadata.h @@ -20,15 +20,7 @@ // Helper macros for defining get() overrides. #define DEFINE_MDNODE_GET_UNPACK_IMPL(...) __VA_ARGS__ #define DEFINE_MDNODE_GET_UNPACK(ARGS) DEFINE_MDNODE_GET_UNPACK_IMPL ARGS -#define DEFINE_MDNODE_GET(CLASS, FORMAL, ARGS) \ - static CLASS *get(LLVMContext &Context, DEFINE_MDNODE_GET_UNPACK(FORMAL)) { \ - return getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Uniqued); \ - } \ - static CLASS *getIfExists(LLVMContext &Context, \ - DEFINE_MDNODE_GET_UNPACK(FORMAL)) { \ - return getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Uniqued, \ - /* ShouldCreate */ false); \ - } \ +#define DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(CLASS, FORMAL, ARGS) \ static CLASS *getDistinct(LLVMContext &Context, \ DEFINE_MDNODE_GET_UNPACK(FORMAL)) { \ return getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Distinct); \ @@ -38,6 +30,16 @@ return Temp##CLASS( \ getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Temporary)); \ } +#define DEFINE_MDNODE_GET(CLASS, FORMAL, ARGS) \ + static CLASS *get(LLVMContext &Context, DEFINE_MDNODE_GET_UNPACK(FORMAL)) { \ + return getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Uniqued); \ + } \ + static CLASS *getIfExists(LLVMContext &Context, \ + DEFINE_MDNODE_GET_UNPACK(FORMAL)) { \ + return getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Uniqued, \ + /* ShouldCreate */ false); \ + } \ + DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(CLASS, FORMAL, ARGS) namespace llvm { @@ -67,8 +69,8 @@ public: operator Metadata *() const { return const_cast(MD); } - bool operator==(const TypedDINodeRef &X) const { return MD == X.MD; }; - bool operator!=(const TypedDINodeRef &X) const { return MD != X.MD; }; + bool operator==(const TypedDINodeRef &X) const { return MD == X.MD; } + bool operator!=(const TypedDINodeRef &X) const { return MD != X.MD; } /// \brief Create a reference. /// @@ -97,6 +99,7 @@ class DITypeRefArray { const MDTuple *N = nullptr; public: + DITypeRefArray() = default; DITypeRefArray(const MDTuple *N) : N(N) {} explicit operator bool() const { return get(); } @@ -574,6 +577,7 @@ public: bool isStaticMember() const { return getFlags() & FlagStaticMember; } bool isLValueReference() const { return getFlags() & FlagLValueReference; } bool isRValueReference() const { return getFlags() & FlagRValueReference; } + bool isExternalTypeRef() const { return getFlags() & FlagExternalTypeRef; } DITypeRef getRef() const { return DITypeRef::get(this); } @@ -646,45 +650,21 @@ public: } }; -/// \brief Base class for DIDerivedType and DICompositeType. -/// -/// TODO: Delete; they're not really related. -class DIDerivedTypeBase : public DIType { -protected: - DIDerivedTypeBase(LLVMContext &C, unsigned ID, StorageType Storage, - unsigned Tag, unsigned Line, uint64_t SizeInBits, - uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, - ArrayRef Ops) - : DIType(C, ID, Storage, Tag, Line, SizeInBits, AlignInBits, OffsetInBits, - Flags, Ops) {} - ~DIDerivedTypeBase() = default; - -public: - DITypeRef getBaseType() const { return DITypeRef(getRawBaseType()); } - Metadata *getRawBaseType() const { return getOperand(3); } - - static bool classof(const Metadata *MD) { - return MD->getMetadataID() == DIDerivedTypeKind || - MD->getMetadataID() == DICompositeTypeKind || - MD->getMetadataID() == DISubroutineTypeKind; - } -}; - /// \brief Derived types. /// /// This includes qualified types, pointers, references, friends, typedefs, and /// class members. /// /// TODO: Split out members (inheritance, fields, methods, etc.). -class DIDerivedType : public DIDerivedTypeBase { +class DIDerivedType : public DIType { friend class LLVMContextImpl; friend class MDNode; DIDerivedType(LLVMContext &C, StorageType Storage, unsigned Tag, unsigned Line, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, ArrayRef Ops) - : DIDerivedTypeBase(C, DIDerivedTypeKind, Storage, Tag, Line, SizeInBits, - AlignInBits, OffsetInBits, Flags, Ops) {} + : DIType(C, DIDerivedTypeKind, Storage, Tag, Line, SizeInBits, + AlignInBits, OffsetInBits, Flags, Ops) {} ~DIDerivedType() = default; static DIDerivedType *getImpl(LLVMContext &Context, unsigned Tag, @@ -732,6 +712,10 @@ public: TempDIDerivedType clone() const { return cloneImpl(); } + //// Get the base type this is derived from. + DITypeRef getBaseType() const { return DITypeRef(getRawBaseType()); } + Metadata *getRawBaseType() const { return getOperand(3); } + /// \brief Get extra data associated with this derived type. /// /// Class type for pointer-to-members, objective-c property node for ivars, @@ -764,88 +748,23 @@ public: } }; -/// \brief Base class for DICompositeType and DISubroutineType. -/// -/// TODO: Delete; they're not really related. -class DICompositeTypeBase : public DIDerivedTypeBase { - unsigned RuntimeLang; - -protected: - DICompositeTypeBase(LLVMContext &C, unsigned ID, StorageType Storage, - unsigned Tag, unsigned Line, unsigned RuntimeLang, - uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, - ArrayRef Ops) - : DIDerivedTypeBase(C, ID, Storage, Tag, Line, SizeInBits, AlignInBits, - OffsetInBits, Flags, Ops), - RuntimeLang(RuntimeLang) {} - ~DICompositeTypeBase() = default; - -public: - /// \brief Get the elements of the composite type. - /// - /// \note Calling this is only valid for \a DICompositeType. This assertion - /// can be removed once \a DISubroutineType has been separated from - /// "composite types". - DINodeArray getElements() const { - assert(!isa(this) && "no elements for DISubroutineType"); - return cast_or_null(getRawElements()); - } - DITypeRef getVTableHolder() const { return DITypeRef(getRawVTableHolder()); } - DITemplateParameterArray getTemplateParams() const { - return cast_or_null(getRawTemplateParams()); - } - StringRef getIdentifier() const { return getStringOperand(7); } - unsigned getRuntimeLang() const { return RuntimeLang; } - - Metadata *getRawElements() const { return getOperand(4); } - Metadata *getRawVTableHolder() const { return getOperand(5); } - Metadata *getRawTemplateParams() const { return getOperand(6); } - MDString *getRawIdentifier() const { return getOperandAs(7); } - - /// \brief Replace operands. - /// - /// If this \a isUniqued() and not \a isResolved(), on a uniquing collision - /// this will be RAUW'ed and deleted. Use a \a TrackingMDRef to keep track - /// of its movement if necessary. - /// @{ - void replaceElements(DINodeArray Elements) { -#ifndef NDEBUG - for (DINode *Op : getElements()) - assert(std::find(Elements->op_begin(), Elements->op_end(), Op) && - "Lost a member during member list replacement"); -#endif - replaceOperandWith(4, Elements.get()); - } - void replaceVTableHolder(DITypeRef VTableHolder) { - replaceOperandWith(5, VTableHolder); - } - void replaceTemplateParams(DITemplateParameterArray TemplateParams) { - replaceOperandWith(6, TemplateParams.get()); - } - /// @} - - static bool classof(const Metadata *MD) { - return MD->getMetadataID() == DICompositeTypeKind || - MD->getMetadataID() == DISubroutineTypeKind; - } -}; - /// \brief Composite types. /// /// TODO: Detach from DerivedTypeBase (split out MDEnumType?). /// TODO: Create a custom, unrelated node for DW_TAG_array_type. -class DICompositeType : public DICompositeTypeBase { +class DICompositeType : public DIType { friend class LLVMContextImpl; friend class MDNode; + unsigned RuntimeLang; + DICompositeType(LLVMContext &C, StorageType Storage, unsigned Tag, unsigned Line, unsigned RuntimeLang, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, ArrayRef Ops) - : DICompositeTypeBase(C, DICompositeTypeKind, Storage, Tag, Line, - RuntimeLang, SizeInBits, AlignInBits, OffsetInBits, - Flags, Ops) {} + : DIType(C, DICompositeTypeKind, Storage, Tag, Line, SizeInBits, + AlignInBits, OffsetInBits, Flags, Ops), + RuntimeLang(RuntimeLang) {} ~DICompositeType() = default; static DICompositeType * @@ -903,6 +822,45 @@ public: TempDICompositeType clone() const { return cloneImpl(); } + DITypeRef getBaseType() const { return DITypeRef(getRawBaseType()); } + DINodeArray getElements() const { + return cast_or_null(getRawElements()); + } + DITypeRef getVTableHolder() const { return DITypeRef(getRawVTableHolder()); } + DITemplateParameterArray getTemplateParams() const { + return cast_or_null(getRawTemplateParams()); + } + StringRef getIdentifier() const { return getStringOperand(7); } + unsigned getRuntimeLang() const { return RuntimeLang; } + + Metadata *getRawBaseType() const { return getOperand(3); } + Metadata *getRawElements() const { return getOperand(4); } + Metadata *getRawVTableHolder() const { return getOperand(5); } + Metadata *getRawTemplateParams() const { return getOperand(6); } + MDString *getRawIdentifier() const { return getOperandAs(7); } + + /// \brief Replace operands. + /// + /// If this \a isUniqued() and not \a isResolved(), on a uniquing collision + /// this will be RAUW'ed and deleted. Use a \a TrackingMDRef to keep track + /// of its movement if necessary. + /// @{ + void replaceElements(DINodeArray Elements) { +#ifndef NDEBUG + for (DINode *Op : getElements()) + assert(std::find(Elements->op_begin(), Elements->op_end(), Op) && + "Lost a member during member list replacement"); +#endif + replaceOperandWith(4, Elements.get()); + } + void replaceVTableHolder(DITypeRef VTableHolder) { + replaceOperandWith(5, VTableHolder); + } + void replaceTemplateParams(DITemplateParameterArray TemplateParams) { + replaceOperandWith(6, TemplateParams.get()); + } + /// @} + static bool classof(const Metadata *MD) { return MD->getMetadataID() == DICompositeTypeKind; } @@ -918,17 +876,15 @@ template TypedDINodeRef TypedDINodeRef::get(const T *N) { /// \brief Type array for a subprogram. /// -/// TODO: Detach from CompositeType, and fold the array of types in directly -/// as operands. -class DISubroutineType : public DICompositeTypeBase { +/// TODO: Fold the array of types in directly as operands. +class DISubroutineType : public DIType { friend class LLVMContextImpl; friend class MDNode; DISubroutineType(LLVMContext &C, StorageType Storage, unsigned Flags, ArrayRef Ops) - : DICompositeTypeBase(C, DISubroutineTypeKind, Storage, - dwarf::DW_TAG_subroutine_type, 0, 0, 0, 0, 0, Flags, - Ops) {} + : DIType(C, DISubroutineTypeKind, Storage, dwarf::DW_TAG_subroutine_type, + 0, 0, 0, 0, Flags, Ops) {} ~DISubroutineType() = default; static DISubroutineType *getImpl(LLVMContext &Context, unsigned Flags, @@ -957,7 +913,7 @@ public: DITypeRefArray getTypeArray() const { return cast_or_null(getRawTypeArray()); } - Metadata *getRawTypeArray() const { return getRawElements(); } + Metadata *getRawTypeArray() const { return getOperand(3); } static bool classof(const Metadata *MD) { return MD->getMetadataID() == DISubroutineTypeKind; @@ -981,7 +937,9 @@ class DICompileUnit : public DIScope { : DIScope(C, DICompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops), SourceLanguage(SourceLanguage), IsOptimized(IsOptimized), RuntimeVersion(RuntimeVersion), EmissionKind(EmissionKind), - DWOId(DWOId) {} + DWOId(DWOId) { + assert(Storage != Uniqued); + } ~DICompileUnit() = default; static DICompileUnit * @@ -991,15 +949,16 @@ class DICompileUnit : public DIScope { unsigned EmissionKind, DICompositeTypeArray EnumTypes, DITypeArray RetainedTypes, DISubprogramArray Subprograms, DIGlobalVariableArray GlobalVariables, - DIImportedEntityArray ImportedEntities, uint64_t DWOId, - StorageType Storage, bool ShouldCreate = true) { + DIImportedEntityArray ImportedEntities, DIMacroNodeArray Macros, + uint64_t DWOId, StorageType Storage, bool ShouldCreate = true) { return getImpl(Context, SourceLanguage, File, getCanonicalMDString(Context, Producer), IsOptimized, getCanonicalMDString(Context, Flags), RuntimeVersion, getCanonicalMDString(Context, SplitDebugFilename), EmissionKind, EnumTypes.get(), RetainedTypes.get(), Subprograms.get(), GlobalVariables.get(), - ImportedEntities.get(), DWOId, Storage, ShouldCreate); + ImportedEntities.get(), Macros.get(), DWOId, Storage, + ShouldCreate); } static DICompileUnit * getImpl(LLVMContext &Context, unsigned SourceLanguage, Metadata *File, @@ -1007,40 +966,44 @@ class DICompileUnit : public DIScope { unsigned RuntimeVersion, MDString *SplitDebugFilename, unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes, Metadata *Subprograms, Metadata *GlobalVariables, - Metadata *ImportedEntities, uint64_t DWOId, StorageType Storage, - bool ShouldCreate = true); + Metadata *ImportedEntities, Metadata *Macros, uint64_t DWOId, + StorageType Storage, bool ShouldCreate = true); TempDICompileUnit cloneImpl() const { return getTemporary( getContext(), getSourceLanguage(), getFile(), getProducer(), isOptimized(), getFlags(), getRuntimeVersion(), getSplitDebugFilename(), getEmissionKind(), getEnumTypes(), getRetainedTypes(), getSubprograms(), - getGlobalVariables(), getImportedEntities(), DWOId); + getGlobalVariables(), getImportedEntities(), getMacros(), DWOId); } + static void get() = delete; + static void getIfExists() = delete; + public: - DEFINE_MDNODE_GET(DICompileUnit, - (unsigned SourceLanguage, DIFile *File, StringRef Producer, - bool IsOptimized, StringRef Flags, unsigned RuntimeVersion, - StringRef SplitDebugFilename, unsigned EmissionKind, - DICompositeTypeArray EnumTypes, DITypeArray RetainedTypes, - DISubprogramArray Subprograms, - DIGlobalVariableArray GlobalVariables, - DIImportedEntityArray ImportedEntities, uint64_t DWOId), - (SourceLanguage, File, Producer, IsOptimized, Flags, - RuntimeVersion, SplitDebugFilename, EmissionKind, - EnumTypes, RetainedTypes, Subprograms, GlobalVariables, - ImportedEntities, DWOId)) - DEFINE_MDNODE_GET( + DEFINE_MDNODE_GET_DISTINCT_TEMPORARY( + DICompileUnit, + (unsigned SourceLanguage, DIFile *File, StringRef Producer, + bool IsOptimized, StringRef Flags, unsigned RuntimeVersion, + StringRef SplitDebugFilename, unsigned EmissionKind, + DICompositeTypeArray EnumTypes, DITypeArray RetainedTypes, + DISubprogramArray Subprograms, DIGlobalVariableArray GlobalVariables, + DIImportedEntityArray ImportedEntities, DIMacroNodeArray Macros, + uint64_t DWOId), + (SourceLanguage, File, Producer, IsOptimized, Flags, RuntimeVersion, + SplitDebugFilename, EmissionKind, EnumTypes, RetainedTypes, Subprograms, + GlobalVariables, ImportedEntities, Macros, DWOId)) + DEFINE_MDNODE_GET_DISTINCT_TEMPORARY( DICompileUnit, (unsigned SourceLanguage, Metadata *File, MDString *Producer, bool IsOptimized, MDString *Flags, unsigned RuntimeVersion, MDString *SplitDebugFilename, unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes, Metadata *Subprograms, - Metadata *GlobalVariables, Metadata *ImportedEntities, uint64_t DWOId), + Metadata *GlobalVariables, Metadata *ImportedEntities, Metadata *Macros, + uint64_t DWOId), (SourceLanguage, File, Producer, IsOptimized, Flags, RuntimeVersion, SplitDebugFilename, EmissionKind, EnumTypes, RetainedTypes, Subprograms, - GlobalVariables, ImportedEntities, DWOId)) + GlobalVariables, ImportedEntities, Macros, DWOId)) TempDICompileUnit clone() const { return cloneImpl(); } @@ -1066,7 +1029,11 @@ public: DIImportedEntityArray getImportedEntities() const { return cast_or_null(getRawImportedEntities()); } - unsigned getDWOId() const { return DWOId; } + DIMacroNodeArray getMacros() const { + return cast_or_null(getRawMacros()); + } + uint64_t getDWOId() const { return DWOId; } + void setDWOId(uint64_t DwoId) { DWOId = DwoId; } MDString *getRawProducer() const { return getOperandAs(1); } MDString *getRawFlags() const { return getOperandAs(2); } @@ -1078,6 +1045,7 @@ public: Metadata *getRawSubprograms() const { return getOperand(6); } Metadata *getRawGlobalVariables() const { return getOperand(7); } Metadata *getRawImportedEntities() const { return getOperand(8); } + Metadata *getRawMacros() const { return getOperand(9); } /// \brief Replace arrays. /// @@ -1100,6 +1068,7 @@ public: void replaceImportedEntities(DIImportedEntityArray N) { replaceOperandWith(8, N.get()); } + void replaceMacros(DIMacroNodeArray N) { replaceOperandWith(9, N.get()); } /// @} static bool classof(const Metadata *MD) { @@ -1157,8 +1126,10 @@ class DILocation : public MDNode { } TempDILocation cloneImpl() const { - return getTemporary(getContext(), getLine(), getColumn(), getScope(), - getInlinedAt()); + // Get the raw scope/inlinedAt since it is possible to invoke this on + // a DILocation containing temporary metadata. + return getTemporary(getContext(), getLine(), getColumn(), getRawScope(), + getRawInlinedAt()); } // Disallow replacing operands. @@ -1276,14 +1247,13 @@ class DISubprogram : public DILocalScope { DISubroutineType *Type, bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine, DITypeRef ContainingType, unsigned Virtuality, unsigned VirtualIndex, unsigned Flags, bool IsOptimized, - Constant *Function, DITemplateParameterArray TemplateParams, - DISubprogram *Declaration, DILocalVariableArray Variables, - StorageType Storage, bool ShouldCreate = true) { + DITemplateParameterArray TemplateParams, DISubprogram *Declaration, + DILocalVariableArray Variables, StorageType Storage, + bool ShouldCreate = true) { return getImpl(Context, Scope, getCanonicalMDString(Context, Name), getCanonicalMDString(Context, LinkageName), File, Line, Type, IsLocalToUnit, IsDefinition, ScopeLine, ContainingType, Virtuality, VirtualIndex, Flags, IsOptimized, - Function ? ConstantAsMetadata::get(Function) : nullptr, TemplateParams.get(), Declaration, Variables.get(), Storage, ShouldCreate); } @@ -1292,17 +1262,16 @@ class DISubprogram : public DILocalScope { MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type, bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine, Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex, - unsigned Flags, bool IsOptimized, Metadata *Function, - Metadata *TemplateParams, Metadata *Declaration, Metadata *Variables, - StorageType Storage, bool ShouldCreate = true); + unsigned Flags, bool IsOptimized, Metadata *TemplateParams, + Metadata *Declaration, Metadata *Variables, StorageType Storage, + bool ShouldCreate = true); TempDISubprogram cloneImpl() const { - return getTemporary(getContext(), getScope(), getName(), getLinkageName(), - getFile(), getLine(), getType(), isLocalToUnit(), - isDefinition(), getScopeLine(), getContainingType(), - getVirtuality(), getVirtualIndex(), getFlags(), - isOptimized(), getFunctionConstant(), - getTemplateParams(), getDeclaration(), getVariables()); + return getTemporary( + getContext(), getScope(), getName(), getLinkageName(), getFile(), + getLine(), getType(), isLocalToUnit(), isDefinition(), getScopeLine(), + getContainingType(), getVirtuality(), getVirtualIndex(), getFlags(), + isOptimized(), getTemplateParams(), getDeclaration(), getVariables()); } public: @@ -1312,13 +1281,12 @@ public: bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine, DITypeRef ContainingType, unsigned Virtuality, unsigned VirtualIndex, unsigned Flags, bool IsOptimized, - Constant *Function = nullptr, DITemplateParameterArray TemplateParams = nullptr, DISubprogram *Declaration = nullptr, DILocalVariableArray Variables = nullptr), (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition, ScopeLine, ContainingType, Virtuality, - VirtualIndex, Flags, IsOptimized, Function, TemplateParams, + VirtualIndex, Flags, IsOptimized, TemplateParams, Declaration, Variables)) DEFINE_MDNODE_GET( DISubprogram, @@ -1326,11 +1294,11 @@ public: unsigned Line, Metadata *Type, bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine, Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex, unsigned Flags, bool IsOptimized, - Metadata *Function = nullptr, Metadata *TemplateParams = nullptr, - Metadata *Declaration = nullptr, Metadata *Variables = nullptr), + Metadata *TemplateParams = nullptr, Metadata *Declaration = nullptr, + Metadata *Variables = nullptr), (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition, ScopeLine, ContainingType, Virtuality, VirtualIndex, Flags, IsOptimized, - Function, TemplateParams, Declaration, Variables)) + TemplateParams, Declaration, Variables)) TempDISubprogram clone() const { return cloneImpl(); } @@ -1389,11 +1357,6 @@ public: return DITypeRef(getRawContainingType()); } - Constant *getFunctionConstant() const { - if (auto *C = cast_or_null(getRawFunction())) - return C->getValue(); - return nullptr; - } DITemplateParameterArray getTemplateParams() const { return cast_or_null(getRawTemplateParams()); } @@ -1407,30 +1370,11 @@ public: Metadata *getRawScope() const { return getOperand(1); } Metadata *getRawType() const { return getOperand(5); } Metadata *getRawContainingType() const { return getOperand(6); } - Metadata *getRawFunction() const { return getOperand(7); } - Metadata *getRawTemplateParams() const { return getOperand(8); } - Metadata *getRawDeclaration() const { return getOperand(9); } - Metadata *getRawVariables() const { return getOperand(10); } + Metadata *getRawTemplateParams() const { return getOperand(7); } + Metadata *getRawDeclaration() const { return getOperand(8); } + Metadata *getRawVariables() const { return getOperand(9); } - /// \brief Get a pointer to the function this subprogram describes. - /// - /// This dyn_casts \a getFunctionConstant() to \a Function. - /// - /// FIXME: Should this be looking through bitcasts? - Function *getFunction() const; - - /// \brief Replace the function. - /// - /// If \a isUniqued() and not \a isResolved(), this could node will be - /// RAUW'ed and deleted out from under the caller. Use a \a TrackingMDRef if - /// that's a problem. - /// @{ - void replaceFunction(Function *F); - void replaceFunction(ConstantAsMetadata *MD) { replaceOperandWith(7, MD); } - void replaceFunction(std::nullptr_t) { replaceOperandWith(7, nullptr); } - /// @} - - /// \brief Check if this subprogram decribes the given function. + /// \brief Check if this subprogram describes the given function. /// /// FIXME: Should this be looking through bitcasts? bool describes(const Function *F) const; @@ -1452,13 +1396,6 @@ public: Metadata *getRawScope() const { return getOperand(1); } - /// \brief Forwarding accessors to LexicalBlock. - /// - /// TODO: Remove these and update code to use \a DILexicalBlock directly. - /// @{ - inline unsigned getLine() const; - inline unsigned getColumn() const; - /// @} static bool classof(const Metadata *MD) { return MD->getMetadataID() == DILexicalBlockKind || MD->getMetadataID() == DILexicalBlockFileKind; @@ -1470,12 +1407,14 @@ class DILexicalBlock : public DILexicalBlockBase { friend class MDNode; unsigned Line; - unsigned Column; + uint16_t Column; DILexicalBlock(LLVMContext &C, StorageType Storage, unsigned Line, unsigned Column, ArrayRef Ops) : DILexicalBlockBase(C, DILexicalBlockKind, Storage, Ops), Line(Line), - Column(Column) {} + Column(Column) { + assert(Column < (1u << 16) && "Expected 16-bit column"); + } ~DILexicalBlock() = default; static DILexicalBlock *getImpl(LLVMContext &Context, DILocalScope *Scope, @@ -1514,18 +1453,6 @@ public: } }; -unsigned DILexicalBlockBase::getLine() const { - if (auto *N = dyn_cast(this)) - return N->getLine(); - return 0; -} - -unsigned DILexicalBlockBase::getColumn() const { - if (auto *N = dyn_cast(this)) - return N->getColumn(); - return 0; -} - class DILexicalBlockFile : public DILexicalBlockBase { friend class LLVMContextImpl; friend class MDNode; @@ -1797,15 +1724,13 @@ public: }; /// \brief Base class for variables. -/// -/// TODO: Hardcode to DW_TAG_variable. class DIVariable : public DINode { unsigned Line; protected: - DIVariable(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Tag, - unsigned Line, ArrayRef Ops) - : DINode(C, ID, Storage, Tag, Ops), Line(Line) {} + DIVariable(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Line, + ArrayRef Ops) + : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line) {} ~DIVariable() = default; public: @@ -1850,8 +1775,7 @@ class DIGlobalVariable : public DIVariable { DIGlobalVariable(LLVMContext &C, StorageType Storage, unsigned Line, bool IsLocalToUnit, bool IsDefinition, ArrayRef Ops) - : DIVariable(C, DIGlobalVariableKind, Storage, dwarf::DW_TAG_variable, - Line, Ops), + : DIVariable(C, DIGlobalVariableKind, Storage, Line, Ops), IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition) {} ~DIGlobalVariable() = default; @@ -1923,8 +1847,6 @@ public: /// \brief Local variable. /// -/// TODO: Split between arguments and otherwise. -/// TODO: Use \c DW_TAG_variable instead of fake tags. /// TODO: Split up flags. class DILocalVariable : public DIVariable { friend class LLVMContextImpl; @@ -1933,42 +1855,42 @@ class DILocalVariable : public DIVariable { unsigned Arg; unsigned Flags; - DILocalVariable(LLVMContext &C, StorageType Storage, unsigned Tag, - unsigned Line, unsigned Arg, unsigned Flags, - ArrayRef Ops) - : DIVariable(C, DILocalVariableKind, Storage, Tag, Line, Ops), Arg(Arg), + DILocalVariable(LLVMContext &C, StorageType Storage, unsigned Line, + unsigned Arg, unsigned Flags, ArrayRef Ops) + : DIVariable(C, DILocalVariableKind, Storage, Line, Ops), Arg(Arg), Flags(Flags) {} ~DILocalVariable() = default; - static DILocalVariable *getImpl(LLVMContext &Context, unsigned Tag, - DIScope *Scope, StringRef Name, DIFile *File, - unsigned Line, DITypeRef Type, unsigned Arg, - unsigned Flags, StorageType Storage, + static DILocalVariable *getImpl(LLVMContext &Context, DIScope *Scope, + StringRef Name, DIFile *File, unsigned Line, + DITypeRef Type, unsigned Arg, unsigned Flags, + StorageType Storage, bool ShouldCreate = true) { - return getImpl(Context, Tag, Scope, getCanonicalMDString(Context, Name), - File, Line, Type, Arg, Flags, Storage, ShouldCreate); + return getImpl(Context, Scope, getCanonicalMDString(Context, Name), File, + Line, Type, Arg, Flags, Storage, ShouldCreate); } - static DILocalVariable * - getImpl(LLVMContext &Context, unsigned Tag, Metadata *Scope, MDString *Name, - Metadata *File, unsigned Line, Metadata *Type, unsigned Arg, - unsigned Flags, StorageType Storage, bool ShouldCreate = true); + static DILocalVariable *getImpl(LLVMContext &Context, Metadata *Scope, + MDString *Name, Metadata *File, unsigned Line, + Metadata *Type, unsigned Arg, unsigned Flags, + StorageType Storage, + bool ShouldCreate = true); TempDILocalVariable cloneImpl() const { - return getTemporary(getContext(), getTag(), getScope(), getName(), - getFile(), getLine(), getType(), getArg(), getFlags()); + return getTemporary(getContext(), getScope(), getName(), getFile(), + getLine(), getType(), getArg(), getFlags()); } public: DEFINE_MDNODE_GET(DILocalVariable, - (unsigned Tag, DILocalScope *Scope, StringRef Name, - DIFile *File, unsigned Line, DITypeRef Type, unsigned Arg, + (DILocalScope * Scope, StringRef Name, DIFile *File, + unsigned Line, DITypeRef Type, unsigned Arg, unsigned Flags), - (Tag, Scope, Name, File, Line, Type, Arg, Flags)) + (Scope, Name, File, Line, Type, Arg, Flags)) DEFINE_MDNODE_GET(DILocalVariable, - (unsigned Tag, Metadata *Scope, MDString *Name, - Metadata *File, unsigned Line, Metadata *Type, - unsigned Arg, unsigned Flags), - (Tag, Scope, Name, File, Line, Type, Arg, Flags)) + (Metadata * Scope, MDString *Name, Metadata *File, + unsigned Line, Metadata *Type, unsigned Arg, + unsigned Flags), + (Scope, Name, File, Line, Type, Arg, Flags)) TempDILocalVariable clone() const { return cloneImpl(); } @@ -1979,6 +1901,7 @@ public: return cast(DIVariable::getScope()); } + bool isParameter() const { return Arg; } unsigned getArg() const { return Arg; } unsigned getFlags() const { return Flags; } @@ -1988,7 +1911,7 @@ public: /// \brief Check that a location is valid for this variable. /// /// Check that \c DL exists, is in the same subprogram, and has the same - /// inlined-at location as \c this. (Otherwise, it's not a valid attachemnt + /// inlined-at location as \c this. (Otherwise, it's not a valid attachment /// to a \a DbgInfoIntrinsic.) bool isValidLocationForIntrinsic(const DILocation *DL) const { return DL && getScope()->getSubprogram() == DL->getScope()->getSubprogram(); @@ -2284,6 +2207,165 @@ public: } }; +/// \brief Macro Info DWARF-like metadata node. +/// +/// A metadata node with a DWARF macro info (i.e., a constant named +/// \c DW_MACINFO_*, defined in llvm/Support/Dwarf.h). Called \a DIMacroNode +/// because it's potentially used for non-DWARF output. +class DIMacroNode : public MDNode { + friend class LLVMContextImpl; + friend class MDNode; + +protected: + DIMacroNode(LLVMContext &C, unsigned ID, StorageType Storage, unsigned MIType, + ArrayRef Ops1, ArrayRef Ops2 = None) + : MDNode(C, ID, Storage, Ops1, Ops2) { + assert(MIType < 1u << 16); + SubclassData16 = MIType; + } + ~DIMacroNode() = default; + + template Ty *getOperandAs(unsigned I) const { + return cast_or_null(getOperand(I)); + } + + StringRef getStringOperand(unsigned I) const { + if (auto *S = getOperandAs(I)) + return S->getString(); + return StringRef(); + } + + static MDString *getCanonicalMDString(LLVMContext &Context, StringRef S) { + if (S.empty()) + return nullptr; + return MDString::get(Context, S); + } + +public: + unsigned getMacinfoType() const { return SubclassData16; } + + static bool classof(const Metadata *MD) { + switch (MD->getMetadataID()) { + default: + return false; + case DIMacroKind: + case DIMacroFileKind: + return true; + } + } +}; + +class DIMacro : public DIMacroNode { + friend class LLVMContextImpl; + friend class MDNode; + + unsigned Line; + + DIMacro(LLVMContext &C, StorageType Storage, unsigned MIType, unsigned Line, + ArrayRef Ops) + : DIMacroNode(C, DIMacroKind, Storage, MIType, Ops), Line(Line) {} + ~DIMacro() = default; + + static DIMacro *getImpl(LLVMContext &Context, unsigned MIType, unsigned Line, + StringRef Name, StringRef Value, StorageType Storage, + bool ShouldCreate = true) { + return getImpl(Context, MIType, Line, getCanonicalMDString(Context, Name), + getCanonicalMDString(Context, Value), Storage, ShouldCreate); + } + static DIMacro *getImpl(LLVMContext &Context, unsigned MIType, unsigned Line, + MDString *Name, MDString *Value, StorageType Storage, + bool ShouldCreate = true); + + TempDIMacro cloneImpl() const { + return getTemporary(getContext(), getMacinfoType(), getLine(), getName(), + getValue()); + } + +public: + DEFINE_MDNODE_GET(DIMacro, (unsigned MIType, unsigned Line, StringRef Name, + StringRef Value = ""), + (MIType, Line, Name, Value)) + DEFINE_MDNODE_GET(DIMacro, (unsigned MIType, unsigned Line, MDString *Name, + MDString *Value), + (MIType, Line, Name, Value)) + + TempDIMacro clone() const { return cloneImpl(); } + + unsigned getLine() const { return Line; } + + StringRef getName() const { return getStringOperand(0); } + StringRef getValue() const { return getStringOperand(1); } + + MDString *getRawName() const { return getOperandAs(0); } + MDString *getRawValue() const { return getOperandAs(1); } + + static bool classof(const Metadata *MD) { + return MD->getMetadataID() == DIMacroKind; + } +}; + +class DIMacroFile : public DIMacroNode { + friend class LLVMContextImpl; + friend class MDNode; + + unsigned Line; + + DIMacroFile(LLVMContext &C, StorageType Storage, unsigned MIType, + unsigned Line, ArrayRef Ops) + : DIMacroNode(C, DIMacroFileKind, Storage, MIType, Ops), Line(Line) {} + ~DIMacroFile() = default; + + static DIMacroFile *getImpl(LLVMContext &Context, unsigned MIType, + unsigned Line, DIFile *File, + DIMacroNodeArray Elements, StorageType Storage, + bool ShouldCreate = true) { + return getImpl(Context, MIType, Line, static_cast(File), + Elements.get(), Storage, ShouldCreate); + } + + static DIMacroFile *getImpl(LLVMContext &Context, unsigned MIType, + unsigned Line, Metadata *File, Metadata *Elements, + StorageType Storage, bool ShouldCreate = true); + + TempDIMacroFile cloneImpl() const { + return getTemporary(getContext(), getMacinfoType(), getLine(), getFile(), + getElements()); + } + +public: + DEFINE_MDNODE_GET(DIMacroFile, (unsigned MIType, unsigned Line, DIFile *File, + DIMacroNodeArray Elements), + (MIType, Line, File, Elements)) + DEFINE_MDNODE_GET(DIMacroFile, (unsigned MIType, unsigned Line, + Metadata *File, Metadata *Elements), + (MIType, Line, File, Elements)) + + TempDIMacroFile clone() const { return cloneImpl(); } + + void replaceElements(DIMacroNodeArray Elements) { +#ifndef NDEBUG + for (DIMacroNode *Op : getElements()) + assert(std::find(Elements->op_begin(), Elements->op_end(), Op) && + "Lost a macro node during macro node list replacement"); +#endif + replaceOperandWith(1, Elements.get()); + } + + unsigned getLine() const { return Line; } + DIFile *getFile() const { return cast_or_null(getRawFile()); } + + DIMacroNodeArray getElements() const { + return cast_or_null(getRawElements()); + } + + Metadata *getRawFile() const { return getOperand(0); } + Metadata *getRawElements() const { return getOperand(1); } + + static bool classof(const Metadata *MD) { + return MD->getMetadataID() == DIMacroFileKind; + } +}; + } // end namespace llvm #undef DEFINE_MDNODE_GET_UNPACK_IMPL diff --git a/include/llvm/IR/DerivedTypes.h b/include/llvm/IR/DerivedTypes.h index 4a94499b4cf5..071e69b1e808 100644 --- a/include/llvm/IR/DerivedTypes.h +++ b/include/llvm/IR/DerivedTypes.h @@ -36,11 +36,12 @@ class StringRef; /// @brief Integer representation type class IntegerType : public Type { friend class LLVMContextImpl; - + protected: explicit IntegerType(LLVMContext &C, unsigned NumBits) : Type(C, IntegerTyID){ setSubclassData(NumBits); } + public: /// This enum is just used to hold constants we need for IntegerType. enum { @@ -90,6 +91,9 @@ public: } }; +unsigned Type::getIntegerBitWidth() const { + return cast(this)->getBitWidth(); +} /// FunctionType - Class to represent function types /// @@ -108,7 +112,7 @@ public: /// FunctionType::get - Create a FunctionType taking no parameters. /// static FunctionType *get(Type *Result, bool isVarArg); - + /// isValidReturnType - Return true if the specified type is valid as a return /// type. static bool isValidReturnType(Type *RetTy); @@ -143,18 +147,30 @@ public: static_assert(AlignOf::Alignment >= AlignOf::Alignment, "Alignment sufficient for objects appended to FunctionType"); +bool Type::isFunctionVarArg() const { + return cast(this)->isVarArg(); +} + +Type *Type::getFunctionParamType(unsigned i) const { + return cast(this)->getParamType(i); +} + +unsigned Type::getFunctionNumParams() const { + return cast(this)->getNumParams(); +} + /// CompositeType - Common super class of ArrayType, StructType, PointerType /// and VectorType. class CompositeType : public Type { protected: - explicit CompositeType(LLVMContext &C, TypeID tid) : Type(C, tid) { } -public: + explicit CompositeType(LLVMContext &C, TypeID tid) : Type(C, tid) {} +public: /// getTypeAtIndex - Given an index value into the type, return the type of /// the element. /// - Type *getTypeAtIndex(const Value *V); - Type *getTypeAtIndex(unsigned Idx); + Type *getTypeAtIndex(const Value *V) const; + Type *getTypeAtIndex(unsigned Idx) const; bool indexValid(const Value *V) const; bool indexValid(unsigned Idx) const; @@ -167,14 +183,13 @@ public: } }; - /// StructType - Class to represent struct types. There are two different kinds /// of struct types: Literal structs and Identified structs. /// /// Literal struct types (e.g. { i32, i32 }) are uniqued structurally, and must /// always have a body when created. You can get one of these by using one of /// the StructType::get() forms. -/// +/// /// Identified structs (e.g. %foo or %42) may optionally have a name and are not /// uniqued. The names for identified structs are managed at the LLVMContext /// level, so there can only be a single identified struct with a given name in @@ -205,23 +220,20 @@ class StructType : public CompositeType { /// pointer to the symbol table entry (maintained by LLVMContext) for the /// struct. This is null if the type is an literal struct or if it is /// a identified type that has an empty name. - /// + /// void *SymbolTableEntry; -public: +public: /// StructType::create - This creates an identified struct. static StructType *create(LLVMContext &Context, StringRef Name); static StructType *create(LLVMContext &Context); - - static StructType *create(ArrayRef Elements, - StringRef Name, + + static StructType *create(ArrayRef Elements, StringRef Name, bool isPacked = false); - static StructType *create(ArrayRef Elements); - static StructType *create(LLVMContext &Context, - ArrayRef Elements, - StringRef Name, - bool isPacked = false); - static StructType *create(LLVMContext &Context, ArrayRef Elements); + static StructType *create(ArrayRef Elements); + static StructType *create(LLVMContext &Context, ArrayRef Elements, + StringRef Name, bool isPacked = false); + static StructType *create(LLVMContext &Context, ArrayRef Elements); static StructType *create(StringRef Name, Type *elt1, ...) LLVM_END_WITH_NULL; /// StructType::get - This static method is the primary way to create a @@ -232,7 +244,7 @@ public: /// StructType::get - Create an empty structure type. /// static StructType *get(LLVMContext &Context, bool isPacked = false); - + /// StructType::get - This static method is a convenience method for creating /// structure types by specifying the elements as arguments. Note that this /// method always returns a non-packed struct, and requires at least one @@ -240,26 +252,26 @@ public: static StructType *get(Type *elt1, ...) LLVM_END_WITH_NULL; bool isPacked() const { return (getSubclassData() & SCDB_Packed) != 0; } - + /// isLiteral - Return true if this type is uniqued by structural /// equivalence, false if it is a struct definition. bool isLiteral() const { return (getSubclassData() & SCDB_IsLiteral) != 0; } - + /// isOpaque - Return true if this is a type with an identity that has no body /// specified yet. These prints as 'opaque' in .ll files. bool isOpaque() const { return (getSubclassData() & SCDB_HasBody) == 0; } /// isSized - Return true if this is a sized type. - bool isSized(SmallPtrSetImpl *Visited = nullptr) const; - + bool isSized(SmallPtrSetImpl *Visited = nullptr) const; + /// hasName - Return true if this is a named struct that has a non-empty name. bool hasName() const { return SymbolTableEntry != nullptr; } - + /// getName - Return the name for this struct type if it has an identity. /// This may return an empty string for an unnamed struct type. Do not call /// this on an literal type. StringRef getName() const; - + /// setName - Change the name of this type to the specified name, or to a name /// with a suffix if there is a collision. Do not call this on an literal /// type. @@ -268,11 +280,10 @@ public: /// setBody - Specify a body for an opaque identified type. void setBody(ArrayRef Elements, bool isPacked = false); void setBody(Type *elt1, ...) LLVM_END_WITH_NULL; - + /// isValidElementType - Return true if the specified type is valid as a /// element type. static bool isValidElementType(Type *ElemTy); - // Iterator access to the elements. typedef Type::subtype_iterator element_iterator; @@ -284,8 +295,8 @@ public: /// isLayoutIdentical - Return true if this is layout identical to the /// specified struct. - bool isLayoutIdentical(StructType *Other) const; - + bool isLayoutIdentical(StructType *Other) const; + /// Random access to the elements unsigned getNumElements() const { return NumContainedTys; } Type *getElementType(unsigned N) const { @@ -299,6 +310,18 @@ public: } }; +StringRef Type::getStructName() const { + return cast(this)->getName(); +} + +unsigned Type::getStructNumElements() const { + return cast(this)->getNumElements(); +} + +Type *Type::getStructElementType(unsigned N) const { + return cast(this)->getElementType(N); +} + /// SequentialType - This is the superclass of the array, pointer and vector /// type classes. All of these represent "arrays" in memory. The array type /// represents a specifically sized array, pointer types are unsized/unknown @@ -330,6 +353,9 @@ public: } }; +Type *Type::getSequentialElementType() const { + return cast(this)->getElementType(); +} /// ArrayType - Class to represent array types. /// @@ -339,6 +365,7 @@ class ArrayType : public SequentialType { ArrayType(const ArrayType &) = delete; const ArrayType &operator=(const ArrayType &) = delete; ArrayType(Type *ElType, uint64_t NumEl); + public: /// ArrayType::get - This static method is the primary way to construct an /// ArrayType @@ -357,6 +384,10 @@ public: } }; +uint64_t Type::getArrayNumElements() const { + return cast(this)->getNumElements(); +} + /// VectorType - Class to represent vector types. /// class VectorType : public SequentialType { @@ -365,6 +396,7 @@ class VectorType : public SequentialType { VectorType(const VectorType &) = delete; const VectorType &operator=(const VectorType &) = delete; VectorType(Type *ElType, unsigned NumEl); + public: /// VectorType::get - This static method is the primary way to construct an /// VectorType. @@ -443,6 +475,9 @@ public: } }; +unsigned Type::getVectorNumElements() const { + return cast(this)->getNumElements(); +} /// PointerType - Class to represent pointers. /// @@ -450,6 +485,7 @@ class PointerType : public SequentialType { PointerType(const PointerType &) = delete; const PointerType &operator=(const PointerType &) = delete; explicit PointerType(Type *ElType, unsigned AddrSpace); + public: /// PointerType::get - This constructs a pointer to an object of the specified /// type in a numbered address space. @@ -477,6 +513,10 @@ public: } }; +unsigned Type::getPointerAddressSpace() const { + return cast(getScalarType())->getAddressSpace(); +} + } // End llvm namespace #endif diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h index f38313f82ea7..f69955e5ed48 100644 --- a/include/llvm/IR/DiagnosticInfo.h +++ b/include/llvm/IR/DiagnosticInfo.h @@ -15,7 +15,6 @@ #ifndef LLVM_IR_DIAGNOSTICINFO_H #define LLVM_IR_DIAGNOSTICINFO_H -#include "llvm-c/Core.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Module.h" @@ -56,8 +55,11 @@ enum DiagnosticKind { DK_OptimizationRemark, DK_OptimizationRemarkMissed, DK_OptimizationRemarkAnalysis, + DK_OptimizationRemarkAnalysisFPCommute, + DK_OptimizationRemarkAnalysisAliasing, DK_OptimizationFailure, DK_MIRParser, + DK_PGOProfile, DK_FirstPluginKind }; @@ -99,6 +101,8 @@ public: /// The printed message must not end with '.' nor start with a severity /// keyword. virtual void print(DiagnosticPrinter &DP) const = 0; + + static const char *AlwaysPrint; }; typedef std::function DiagnosticHandlerFunction; @@ -210,19 +214,18 @@ public: /// Diagnostic information for the sample profiler. class DiagnosticInfoSampleProfile : public DiagnosticInfo { public: - DiagnosticInfoSampleProfile(const char *FileName, unsigned LineNum, + DiagnosticInfoSampleProfile(StringRef FileName, unsigned LineNum, const Twine &Msg, DiagnosticSeverity Severity = DS_Error) : DiagnosticInfo(DK_SampleProfile, Severity), FileName(FileName), LineNum(LineNum), Msg(Msg) {} - DiagnosticInfoSampleProfile(const char *FileName, const Twine &Msg, + DiagnosticInfoSampleProfile(StringRef FileName, const Twine &Msg, DiagnosticSeverity Severity = DS_Error) : DiagnosticInfo(DK_SampleProfile, Severity), FileName(FileName), LineNum(0), Msg(Msg) {} DiagnosticInfoSampleProfile(const Twine &Msg, DiagnosticSeverity Severity = DS_Error) - : DiagnosticInfo(DK_SampleProfile, Severity), FileName(nullptr), - LineNum(0), Msg(Msg) {} + : DiagnosticInfo(DK_SampleProfile, Severity), LineNum(0), Msg(Msg) {} /// \see DiagnosticInfo::print. void print(DiagnosticPrinter &DP) const override; @@ -231,13 +234,13 @@ public: return DI->getKind() == DK_SampleProfile; } - const char *getFileName() const { return FileName; } + StringRef getFileName() const { return FileName; } unsigned getLineNum() const { return LineNum; } const Twine &getMsg() const { return Msg; } private: /// Name of the input file associated with this diagnostic. - const char *FileName; + StringRef FileName; /// Line number where the diagnostic occurred. If 0, no line number will /// be emitted in the message. @@ -247,6 +250,31 @@ private: const Twine &Msg; }; +/// Diagnostic information for the PGO profiler. +class DiagnosticInfoPGOProfile : public DiagnosticInfo { +public: + DiagnosticInfoPGOProfile(const char *FileName, const Twine &Msg, + DiagnosticSeverity Severity = DS_Error) + : DiagnosticInfo(DK_PGOProfile, Severity), FileName(FileName), Msg(Msg) {} + + /// \see DiagnosticInfo::print. + void print(DiagnosticPrinter &DP) const override; + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_PGOProfile; + } + + const char *getFileName() const { return FileName; } + const Twine &getMsg() const { return Msg; } + +private: + /// Name of the input file associated with this diagnostic. + const char *FileName; + + /// Message to report. + const Twine &Msg; +}; + /// Common features for diagnostics dealing with optimization remarks. class DiagnosticInfoOptimizationBase : public DiagnosticInfo { public: @@ -267,10 +295,6 @@ public: /// \see DiagnosticInfo::print. void print(DiagnosticPrinter &DP) const override; - static bool classof(const DiagnosticInfo *DI) { - return DI->getKind() == DK_OptimizationRemark; - } - /// Return true if this optimization remark is enabled by one of /// of the LLVM command line flags (-pass-remarks, -pass-remarks-missed, /// or -pass-remarks-analysis). Note that this only handles the LLVM @@ -386,6 +410,69 @@ public: /// \see DiagnosticInfoOptimizationBase::isEnabled. bool isEnabled() const override; + +protected: + DiagnosticInfoOptimizationRemarkAnalysis(enum DiagnosticKind Kind, + const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, + const Twine &Msg) + : DiagnosticInfoOptimizationBase(Kind, DS_Remark, PassName, Fn, DLoc, + Msg) {} +}; + +/// Diagnostic information for optimization analysis remarks related to +/// floating-point non-commutativity. +class DiagnosticInfoOptimizationRemarkAnalysisFPCommute + : public DiagnosticInfoOptimizationRemarkAnalysis { +public: + /// \p PassName is the name of the pass emitting this diagnostic. If + /// this name matches the regular expression given in -Rpass-analysis=, then + /// the diagnostic will be emitted. \p Fn is the function where the diagnostic + /// is being emitted. \p DLoc is the location information to use in the + /// diagnostic. If line table information is available, the diagnostic will + /// include the source code location. \p Msg is the message to show. The + /// front-end will append its own message related to options that address + /// floating-point non-commutativity. Note that this class does not copy this + /// message, so this reference must be valid for the whole life time of the + /// diagnostic. + DiagnosticInfoOptimizationRemarkAnalysisFPCommute(const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, + const Twine &Msg) + : DiagnosticInfoOptimizationRemarkAnalysis( + DK_OptimizationRemarkAnalysisFPCommute, PassName, Fn, DLoc, Msg) {} + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_OptimizationRemarkAnalysisFPCommute; + } +}; + +/// Diagnostic information for optimization analysis remarks related to +/// pointer aliasing. +class DiagnosticInfoOptimizationRemarkAnalysisAliasing + : public DiagnosticInfoOptimizationRemarkAnalysis { +public: + /// \p PassName is the name of the pass emitting this diagnostic. If + /// this name matches the regular expression given in -Rpass-analysis=, then + /// the diagnostic will be emitted. \p Fn is the function where the diagnostic + /// is being emitted. \p DLoc is the location information to use in the + /// diagnostic. If line table information is available, the diagnostic will + /// include the source code location. \p Msg is the message to show. The + /// front-end will append its own message related to options that address + /// pointer aliasing legality. Note that this class does not copy this + /// message, so this reference must be valid for the whole life time of the + /// diagnostic. + DiagnosticInfoOptimizationRemarkAnalysisAliasing(const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, + const Twine &Msg) + : DiagnosticInfoOptimizationRemarkAnalysis( + DK_OptimizationRemarkAnalysisAliasing, PassName, Fn, DLoc, Msg) {} + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_OptimizationRemarkAnalysisAliasing; + } }; /// Diagnostic information for machine IR parser. @@ -438,6 +525,30 @@ void emitOptimizationRemarkAnalysis(LLVMContext &Ctx, const char *PassName, const Function &Fn, const DebugLoc &DLoc, const Twine &Msg); +/// Emit an optimization analysis remark related to messages about +/// floating-point non-commutativity. \p PassName is the name of the pass +/// emitting the message. If -Rpass-analysis= is given and \p PassName matches +/// the regular expression in -Rpass, then the remark will be emitted. \p Fn is +/// the function triggering the remark, \p DLoc is the debug location where the +/// diagnostic is generated. \p Msg is the message string to use. +void emitOptimizationRemarkAnalysisFPCommute(LLVMContext &Ctx, + const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, + const Twine &Msg); + +/// Emit an optimization analysis remark related to messages about +/// pointer aliasing. \p PassName is the name of the pass emitting the message. +/// If -Rpass-analysis= is given and \p PassName matches the regular expression +/// in -Rpass, then the remark will be emitted. \p Fn is the function triggering +/// the remark, \p DLoc is the debug location where the diagnostic is generated. +/// \p Msg is the message string to use. +void emitOptimizationRemarkAnalysisAliasing(LLVMContext &Ctx, + const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, + const Twine &Msg); + /// Diagnostic information for optimization failures. class DiagnosticInfoOptimizationFailure : public DiagnosticInfoOptimizationBase { diff --git a/include/llvm/IR/DiagnosticPrinter.h b/include/llvm/IR/DiagnosticPrinter.h index 735e3ad7a8b0..1bcd73738b66 100644 --- a/include/llvm/IR/DiagnosticPrinter.h +++ b/include/llvm/IR/DiagnosticPrinter.h @@ -63,7 +63,7 @@ protected: raw_ostream &Stream; public: - DiagnosticPrinterRawOStream(raw_ostream &Stream) : Stream(Stream) {}; + DiagnosticPrinterRawOStream(raw_ostream &Stream) : Stream(Stream) {} // Simple types. DiagnosticPrinter &operator<<(char C) override; diff --git a/include/llvm/IR/Dominators.h b/include/llvm/IR/Dominators.h index 27d989b0344c..37447c353b19 100644 --- a/include/llvm/IR/Dominators.h +++ b/include/llvm/IR/Dominators.h @@ -64,11 +64,30 @@ public: /// \brief Concrete subclass of DominatorTreeBase that is used to compute a /// normal dominator tree. +/// +/// Definition: A block is said to be forward statically reachable if there is +/// a path from the entry of the function to the block. A statically reachable +/// block may become statically unreachable during optimization. +/// +/// A forward unreachable block may appear in the dominator tree, or it may +/// not. If it does, dominance queries will return results as if all reachable +/// blocks dominate it. When asking for a Node corresponding to a potentially +/// unreachable block, calling code must handle the case where the block was +/// unreachable and the result of getNode() is nullptr. +/// +/// Generally, a block known to be unreachable when the dominator tree is +/// constructed will not be in the tree. One which becomes unreachable after +/// the dominator tree is initially constructed may still exist in the tree, +/// even if the tree is properly updated. Calling code should not rely on the +/// preceding statements; this is stated only to assist human understanding. class DominatorTree : public DominatorTreeBase { public: typedef DominatorTreeBase Base; DominatorTree() : DominatorTreeBase(false) {} + explicit DominatorTree(Function &F) : DominatorTreeBase(false) { + recalculate(F); + } DominatorTree(DominatorTree &&Arg) : Base(std::move(static_cast(Arg))) {} @@ -122,31 +141,35 @@ public: // DominatorTree GraphTraits specializations so the DominatorTree can be // iterable by generic graph iterators. -template <> struct GraphTraits { - typedef DomTreeNode NodeType; - typedef NodeType::iterator ChildIteratorType; +template struct DomTreeGraphTraitsBase { + typedef Node NodeType; + typedef ChildIterator ChildIteratorType; + typedef df_iterator> nodes_iterator; - static NodeType *getEntryNode(NodeType *N) { - return N; - } + static NodeType *getEntryNode(NodeType *N) { return N; } static inline ChildIteratorType child_begin(NodeType *N) { return N->begin(); } - static inline ChildIteratorType child_end(NodeType *N) { - return N->end(); - } + static inline ChildIteratorType child_end(NodeType *N) { return N->end(); } - typedef df_iterator nodes_iterator; - - static nodes_iterator nodes_begin(DomTreeNode *N) { + static nodes_iterator nodes_begin(NodeType *N) { return df_begin(getEntryNode(N)); } - static nodes_iterator nodes_end(DomTreeNode *N) { + static nodes_iterator nodes_end(NodeType *N) { return df_end(getEntryNode(N)); } }; +template <> +struct GraphTraits + : public DomTreeGraphTraitsBase {}; + +template <> +struct GraphTraits + : public DomTreeGraphTraitsBase {}; + template <> struct GraphTraits : public GraphTraits { static NodeType *getEntryNode(DominatorTree *DT) { diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h index ec9f4cad094a..2a983930bf4d 100644 --- a/include/llvm/IR/Function.h +++ b/include/llvm/IR/Function.h @@ -32,28 +32,16 @@ namespace llvm { class FunctionType; class LLVMContext; +class DISubprogram; -template<> struct ilist_traits - : public SymbolTableListTraits { - - Argument *createSentinel() const { - return static_cast(&Sentinel); - } - static void destroySentinel(Argument*) {} - - Argument *provideInitialHead() const { return createSentinel(); } - Argument *ensureHead(Argument*) const { return createSentinel(); } - static void noteHead(Argument*, Argument*) {} - - static ValueSymbolTable *getSymTab(Function *ItemParent); -private: - mutable ilist_half_node Sentinel; -}; +template <> +struct SymbolTableListSentinelTraits + : public ilist_half_embedded_sentinel_traits {}; class Function : public GlobalObject, public ilist_node { public: - typedef iplist ArgumentListType; - typedef iplist BasicBlockListType; + typedef SymbolTableList ArgumentListType; + typedef SymbolTableList BasicBlockListType; // BasicBlock iterators... typedef BasicBlockListType::iterator iterator; @@ -73,10 +61,12 @@ private: /* * Value::SubclassData * - * bit 0 : HasLazyArguments - * bit 1 : HasPrefixData - * bit 2 : HasPrologueData - * bit 3-6: CallingConvention + * bit 0 : HasLazyArguments + * bit 1 : HasPrefixData + * bit 2 : HasPrologueData + * bit 3 : HasPersonalityFn + * bits 4-13 : CallingConvention + * bits 14-15 : [reserved] */ /// Bits from GlobalObject::GlobalObjectSubclassData. @@ -90,7 +80,7 @@ private: (Value ? Mask : 0u)); } - friend class SymbolTableListTraits; + friend class SymbolTableListTraits; void setParent(Module *parent); @@ -120,7 +110,7 @@ private: public: static Function *Create(FunctionType *Ty, LinkageTypes Linkage, const Twine &N = "", Module *M = nullptr) { - return new(1) Function(Ty, Linkage, N, M); + return new Function(Ty, Linkage, N, M); } ~Function() override; @@ -128,14 +118,6 @@ public: /// \brief Provide fast operand accessors DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); - /// \brief Get the personality function associated with this function. - bool hasPersonalityFn() const { return getNumOperands() != 0; } - Constant *getPersonalityFn() const { - assert(hasPersonalityFn()); - return cast(Op<0>()); - } - void setPersonalityFn(Constant *C); - Type *getReturnType() const; // Return the type of the ret val FunctionType *getFunctionType() const; // Return the FunctionType for me @@ -170,11 +152,13 @@ public: /// calling convention of this function. The enum values for the known /// calling conventions are defined in CallingConv.h. CallingConv::ID getCallingConv() const { - return static_cast(getSubclassDataFromValue() >> 3); + return static_cast((getSubclassDataFromValue() >> 4) & + CallingConv::MaxID); } void setCallingConv(CallingConv::ID CC) { - setValueSubclassData((getSubclassDataFromValue() & 7) | - (static_cast(CC) << 3)); + auto ID = static_cast(CC); + assert(!(ID & ~CallingConv::MaxID) && "Unsupported calling convention"); + setValueSubclassData((getSubclassDataFromValue() & 0xc00f) | (ID << 4)); } /// @brief Return the attribute list for this Function. @@ -267,13 +251,13 @@ public: uint64_t getDereferenceableBytes(unsigned i) const { return AttributeSets.getDereferenceableBytes(i); } - + /// @brief Extract the number of dereferenceable_or_null bytes for a call or /// parameter (0=unknown). uint64_t getDereferenceableOrNullBytes(unsigned i) const { return AttributeSets.getDereferenceableOrNullBytes(i); } - + /// @brief Determine if the function does not access memory. bool doesNotAccessMemory() const { return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, @@ -299,10 +283,28 @@ public: return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, Attribute::ArgMemOnly); } - void setOnlyAccessesArgMemory() { - addFnAttr(Attribute::ArgMemOnly); + void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); } + + /// @brief Determine if the function may only access memory that is + /// inaccessible from the IR. + bool onlyAccessesInaccessibleMemory() const { + return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, + Attribute::InaccessibleMemOnly); } - + void setOnlyAccessesInaccessibleMemory() { + addFnAttr(Attribute::InaccessibleMemOnly); + } + + /// @brief Determine if the function may only access memory that is + // either inaccessible from the IR or pointed to by its arguments. + bool onlyAccessesInaccessibleMemOrArgMem() const { + return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, + Attribute::InaccessibleMemOrArgMemOnly); + } + void setOnlyAccessesInaccessibleMemOrArgMem() { + addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + } + /// @brief Determine if the function cannot return. bool doesNotReturn() const { return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, @@ -339,6 +341,15 @@ public: addFnAttr(Attribute::Convergent); } + /// Determine if the function is known not to recurse, directly or + /// indirectly. + bool doesNotRecurse() const { + return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoRecurse); + } + void setDoesNotRecurse() { + addFnAttr(Attribute::NoRecurse); + } /// @brief True if the ABI mandates (or the user requested) that this /// function be in a unwind table. @@ -362,7 +373,8 @@ public: AttributeSets.hasAttribute(2, Attribute::StructRet); } - /// @brief Determine if the parameter does not alias other parameters. + /// @brief Determine if the parameter or return value is marked with NoAlias + /// attribute. /// @param n The parameter to check. 1 is the first parameter, 0 is the return bool doesNotAlias(unsigned n) const { return AttributeSets.hasAttribute(n, Attribute::NoAlias); @@ -395,6 +407,14 @@ public: addAttribute(n, Attribute::ReadOnly); } + /// Optimize this function for minimum size (-Oz). + bool optForMinSize() const { return hasFnAttribute(Attribute::MinSize); }; + + /// Optimize this function for size (-Os) or minimum size (-Oz). + bool optForSize() const { + return hasFnAttribute(Attribute::OptimizeForSize) || optForMinSize(); + } + /// copyAttributesFrom - copy all additional attributes (those not needed to /// create a Function) from the Function Src to this one. void copyAttributesFrom(const GlobalValue *Src) override; @@ -417,7 +437,6 @@ public: /// void eraseFromParent() override; - /// Get the underlying elements of the Function... the basic block list is /// empty for external functions. /// @@ -429,13 +448,13 @@ public: CheckLazyArguments(); return ArgumentList; } - static iplist Function::*getSublistAccess(Argument*) { + static ArgumentListType Function::*getSublistAccess(Argument*) { return &Function::ArgumentList; } const BasicBlockListType &getBasicBlockList() const { return BasicBlocks; } BasicBlockListType &getBasicBlockList() { return BasicBlocks; } - static iplist Function::*getSublistAccess(BasicBlock*) { + static BasicBlockListType Function::*getSublistAccess(BasicBlock*) { return &Function::BasicBlocks; } @@ -450,7 +469,6 @@ public: inline ValueSymbolTable &getValueSymbolTable() { return *SymTab; } inline const ValueSymbolTable &getValueSymbolTable() const { return *SymTab; } - //===--------------------------------------------------------------------===// // BasicBlock iterator forwarding functions // @@ -487,11 +505,11 @@ public: } iterator_range args() { - return iterator_range(arg_begin(), arg_end()); + return make_range(arg_begin(), arg_end()); } iterator_range args() const { - return iterator_range(arg_begin(), arg_end()); + return make_range(arg_begin(), arg_end()); } /// @} @@ -499,24 +517,33 @@ public: size_t arg_size() const; bool arg_empty() const; + /// \brief Check whether this function has a personality function. + bool hasPersonalityFn() const { + return getSubclassDataFromValue() & (1<<3); + } + + /// \brief Get the personality function associated with this function. + Constant *getPersonalityFn() const; + void setPersonalityFn(Constant *Fn); + + /// \brief Check whether this function has prefix data. bool hasPrefixData() const { return getSubclassDataFromValue() & (1<<1); } + /// \brief Get the prefix data associated with this function. Constant *getPrefixData() const; void setPrefixData(Constant *PrefixData); + /// \brief Check whether this function has prologue data. bool hasPrologueData() const { return getSubclassDataFromValue() & (1<<2); } + /// \brief Get the prologue data associated with this function. Constant *getPrologueData() const; void setPrologueData(Constant *PrologueData); - /// Print the function to an output stream with an optional - /// AssemblyAnnotationWriter. - void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW = nullptr) const; - /// viewCFG - This function is meant for use from the debugger. You can just /// say 'call F->viewCFG()' and a ghostview window should pop up from the /// program, displaying the CFG of the current function with the code for each @@ -596,12 +623,27 @@ public: /// Drop all metadata from \c this not included in \c KnownIDs. void dropUnknownMetadata(ArrayRef KnownIDs); + /// \brief Set the attached subprogram. + /// + /// Calls \a setMetadata() with \a LLVMContext::MD_dbg. + void setSubprogram(DISubprogram *SP); + + /// \brief Get the attached subprogram. + /// + /// Calls \a getMetadata() with \a LLVMContext::MD_dbg and casts the result + /// to \a DISubprogram. + DISubprogram *getSubprogram() const; + private: + void allocHungoffUselist(); + template void setHungoffOperand(Constant *C); + // Shadow Value::setValueSubclassData with a private forwarding method so that // subclasses cannot accidentally use it. void setValueSubclassData(unsigned short D) { Value::setValueSubclassData(D); } + void setValueSubclassDataBit(unsigned Bit, bool On); bool hasMetadataHashEntry() const { return getGlobalObjectSubClassData() & HasMetadataHashEntryBit; @@ -613,18 +655,8 @@ private: void clearMetadata(); }; -inline ValueSymbolTable * -ilist_traits::getSymTab(Function *F) { - return F ? &F->getValueSymbolTable() : nullptr; -} - -inline ValueSymbolTable * -ilist_traits::getSymTab(Function *F) { - return F ? &F->getValueSymbolTable() : nullptr; -} - template <> -struct OperandTraits : public OptionalOperandTraits {}; +struct OperandTraits : public HungoffOperandTraits<3> {}; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(Function, Value) diff --git a/include/llvm/IR/FunctionInfo.h b/include/llvm/IR/FunctionInfo.h new file mode 100644 index 000000000000..eba088a61bc0 --- /dev/null +++ b/include/llvm/IR/FunctionInfo.h @@ -0,0 +1,241 @@ +//===-- llvm/FunctionInfo.h - Function Info Index ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// @file +/// FunctionInfo.h This file contains the declarations the classes that hold +/// the function info index and summary. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_FUNCTIONINFO_H +#define LLVM_IR_FUNCTIONINFO_H + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +/// \brief Function summary information to aid decisions and implementation of +/// importing. +/// +/// This is a separate class from FunctionInfo to enable lazy reading of this +/// function summary information from the combined index file during imporing. +class FunctionSummary { +private: + /// \brief Path of module containing function IR, used to locate module when + /// importing this function. + /// + /// This is only used during parsing of the combined function index, or when + /// parsing the per-module index for creation of the combined function index, + /// not during writing of the per-module index which doesn't contain a + /// module path string table. + StringRef ModulePath; + + /// \brief Used to flag functions that have local linkage types and need to + /// have module identifier appended before placing into the combined + /// index, to disambiguate from other functions with the same name. + /// + /// This is only used in the per-module function index, as it is consumed + /// while creating the combined index. + bool IsLocalFunction; + + // The rest of the information is used to help decide whether importing + // is likely to be profitable. + // Other information will be added as the importing is tuned, such + // as hotness (when profile available), and other function characteristics. + + /// Number of instructions (ignoring debug instructions, e.g.) computed + /// during the initial compile step when the function index is first built. + unsigned InstCount; + +public: + /// Construct a summary object from summary data expected for all + /// summary records. + FunctionSummary(unsigned NumInsts) : InstCount(NumInsts) {} + + /// Set the path to the module containing this function, for use in + /// the combined index. + void setModulePath(StringRef ModPath) { ModulePath = ModPath; } + + /// Get the path to the module containing this function. + StringRef modulePath() const { return ModulePath; } + + /// Record whether this is a local function in the per-module index. + void setLocalFunction(bool IsLocal) { IsLocalFunction = IsLocal; } + + /// Check whether this was a local function, for use in creating + /// the combined index. + bool isLocalFunction() const { return IsLocalFunction; } + + /// Get the instruction count recorded for this function. + unsigned instCount() const { return InstCount; } +}; + +/// \brief Class to hold pointer to function summary and information required +/// for parsing it. +/// +/// For the per-module index, this holds the bitcode offset +/// of the corresponding function block. For the combined index, +/// after parsing of the \a ValueSymbolTable, this initially +/// holds the offset of the corresponding function summary bitcode +/// record. After parsing the associated summary information from the summary +/// block the \a FunctionSummary is populated and stored here. +class FunctionInfo { +private: + /// Function summary information used to help make ThinLTO importing + /// decisions. + std::unique_ptr Summary; + + /// \brief The bitcode offset corresponding to either the associated + /// function's function body record, or its function summary record, + /// depending on whether this is a per-module or combined index. + /// + /// This bitcode offset is written to or read from the associated + /// \a ValueSymbolTable entry for the function. + /// For the per-module index this holds the bitcode offset of the + /// function's body record within bitcode module block in its module, + /// which is used during lazy function parsing or ThinLTO importing. + /// For the combined index this holds the offset of the corresponding + /// function summary record, to enable associating the combined index + /// VST records with the summary records. + uint64_t BitcodeIndex; + +public: + /// Constructor used during parsing of VST entries. + FunctionInfo(uint64_t FuncOffset) + : Summary(nullptr), BitcodeIndex(FuncOffset) {} + + /// Constructor used for per-module index bitcode writing. + FunctionInfo(uint64_t FuncOffset, + std::unique_ptr FuncSummary) + : Summary(std::move(FuncSummary)), BitcodeIndex(FuncOffset) {} + + /// Record the function summary information parsed out of the function + /// summary block during parsing or combined index creation. + void setFunctionSummary(std::unique_ptr FuncSummary) { + Summary = std::move(FuncSummary); + } + + /// Get the function summary recorded for this function. + FunctionSummary *functionSummary() const { return Summary.get(); } + + /// Get the bitcode index recorded for this function, depending on + /// the index type. + uint64_t bitcodeIndex() const { return BitcodeIndex; } + + /// Record the bitcode index for this function, depending on + /// the index type. + void setBitcodeIndex(uint64_t FuncOffset) { BitcodeIndex = FuncOffset; } +}; + +/// List of function info structures for a particular function name held +/// in the FunctionMap. Requires a vector in the case of multiple +/// COMDAT functions of the same name. +typedef std::vector> FunctionInfoList; + +/// Map from function name to corresponding function info structures. +typedef StringMap FunctionInfoMapTy; + +/// Type used for iterating through the function info map. +typedef FunctionInfoMapTy::const_iterator const_funcinfo_iterator; +typedef FunctionInfoMapTy::iterator funcinfo_iterator; + +/// String table to hold/own module path strings, which additionally holds the +/// module ID assigned to each module during the plugin step. The StringMap +/// makes a copy of and owns inserted strings. +typedef StringMap ModulePathStringTableTy; + +/// Class to hold module path string table and function map, +/// and encapsulate methods for operating on them. +class FunctionInfoIndex { +private: + /// Map from function name to list of function information instances + /// for functions of that name (may be duplicates in the COMDAT case, e.g.). + FunctionInfoMapTy FunctionMap; + + /// Holds strings for combined index, mapping to the corresponding module ID. + ModulePathStringTableTy ModulePathStringTable; + +public: + FunctionInfoIndex() = default; + + // Disable the copy constructor and assignment operators, so + // no unexpected copying/moving occurs. + FunctionInfoIndex(const FunctionInfoIndex &) = delete; + void operator=(const FunctionInfoIndex &) = delete; + + funcinfo_iterator begin() { return FunctionMap.begin(); } + const_funcinfo_iterator begin() const { return FunctionMap.begin(); } + funcinfo_iterator end() { return FunctionMap.end(); } + const_funcinfo_iterator end() const { return FunctionMap.end(); } + + /// Get the list of function info objects for a given function. + const FunctionInfoList &getFunctionInfoList(StringRef FuncName) { + return FunctionMap[FuncName]; + } + + /// Get the list of function info objects for a given function. + const const_funcinfo_iterator findFunctionInfoList(StringRef FuncName) const { + return FunctionMap.find(FuncName); + } + + /// Add a function info for a function of the given name. + void addFunctionInfo(StringRef FuncName, std::unique_ptr Info) { + FunctionMap[FuncName].push_back(std::move(Info)); + } + + /// Iterator to allow writer to walk through table during emission. + iterator_range::const_iterator> + modPathStringEntries() const { + return llvm::make_range(ModulePathStringTable.begin(), + ModulePathStringTable.end()); + } + + /// Get the module ID recorded for the given module path. + uint64_t getModuleId(const StringRef ModPath) const { + return ModulePathStringTable.lookup(ModPath); + } + + /// Add the given per-module index into this function index/summary, + /// assigning it the given module ID. Each module merged in should have + /// a unique ID, necessary for consistent renaming of promoted + /// static (local) variables. + void mergeFrom(std::unique_ptr Other, + uint64_t NextModuleId); + + /// Convenience method for creating a promoted global name + /// for the given value name of a local, and its original module's ID. + static std::string getGlobalNameForLocal(StringRef Name, uint64_t ModId) { + SmallString<256> NewName(Name); + NewName += ".llvm."; + raw_svector_ostream(NewName) << ModId; + return NewName.str(); + } + + /// Add a new module path, mapped to the given module Id, and return StringRef + /// owned by string table map. + StringRef addModulePath(StringRef ModPath, uint64_t ModId) { + return ModulePathStringTable.insert(std::make_pair(ModPath, ModId)) + .first->first(); + } + + /// Check if the given Module has any functions available for exporting + /// in the index. We consider any module present in the ModulePathStringTable + /// to have exported functions. + bool hasExportedFunctions(const Module &M) const { + return ModulePathStringTable.count(M.getModuleIdentifier()); + } +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/IR/GVMaterializer.h b/include/llvm/IR/GVMaterializer.h index 1d6c9157f0b8..6cb593c7a3da 100644 --- a/include/llvm/IR/GVMaterializer.h +++ b/include/llvm/IR/GVMaterializer.h @@ -18,12 +18,14 @@ #ifndef LLVM_IR_GVMATERIALIZER_H #define LLVM_IR_GVMATERIALIZER_H +#include "llvm/ADT/DenseMap.h" #include #include namespace llvm { class Function; class GlobalValue; +class Metadata; class Module; class StructType; @@ -34,28 +36,25 @@ protected: public: virtual ~GVMaterializer(); - /// True if GV has been materialized and can be dematerialized back to - /// whatever backing store this GVMaterializer uses. - virtual bool isDematerializable(const GlobalValue *GV) const = 0; - /// Make sure the given GlobalValue is fully read. /// virtual std::error_code materialize(GlobalValue *GV) = 0; - /// If the given GlobalValue is read in, and if the GVMaterializer supports - /// it, release the memory for the GV, and set it up to be materialized - /// lazily. If the Materializer doesn't support this capability, this method - /// is a noop. - /// - virtual void dematerialize(GlobalValue *) {} - /// Make sure the entire Module has been completely read. /// - virtual std::error_code materializeModule(Module *M) = 0; + virtual std::error_code materializeModule() = 0; virtual std::error_code materializeMetadata() = 0; virtual void setStripDebugInfo() = 0; + /// Client should define this interface if the mapping between metadata + /// values and value ids needs to be preserved, e.g. across materializer + /// instantiations. If OnlyTempMD is true, only those that have remained + /// temporary metadata are recorded in the map. + virtual void + saveMetadataList(DenseMap &MetadataToIDs, + bool OnlyTempMD) {} + virtual std::vector getIdentifiedStructTypes() const = 0; }; diff --git a/include/llvm/IR/GetElementPtrTypeIterator.h b/include/llvm/IR/GetElementPtrTypeIterator.h index 6bba0ae29e98..7cb13fa33aa6 100644 --- a/include/llvm/IR/GetElementPtrTypeIterator.h +++ b/include/llvm/IR/GetElementPtrTypeIterator.h @@ -78,7 +78,7 @@ namespace llvm { // current type directly. Type *operator->() const { return operator*(); } - Value *getOperand() const { return *OpIt; } + Value *getOperand() const { return const_cast(&**OpIt); } generic_gep_type_iterator& operator++() { // Preincrement if (CurTy.getInt()) { diff --git a/include/llvm/IR/GlobalAlias.h b/include/llvm/IR/GlobalAlias.h index ce73b7af8ca1..b0772143309f 100644 --- a/include/llvm/IR/GlobalAlias.h +++ b/include/llvm/IR/GlobalAlias.h @@ -23,18 +23,17 @@ namespace llvm { class Module; -template - class SymbolTableListTraits; +template class SymbolTableListTraits; class GlobalAlias : public GlobalValue, public ilist_node { - friend class SymbolTableListTraits; + friend class SymbolTableListTraits; void operator=(const GlobalAlias &) = delete; GlobalAlias(const GlobalAlias &) = delete; void setParent(Module *parent); - GlobalAlias(PointerType *Ty, LinkageTypes Linkage, const Twine &Name, - Constant *Aliasee, Module *Parent); + GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, + const Twine &Name, Constant *Aliasee, Module *Parent); public: // allocate space for exactly one operand @@ -44,17 +43,19 @@ public: /// If a parent module is specified, the alias is automatically inserted into /// the end of the specified module's alias list. - static GlobalAlias *create(PointerType *Ty, LinkageTypes Linkage, - const Twine &Name, Constant *Aliasee, - Module *Parent); + static GlobalAlias *create(Type *Ty, unsigned AddressSpace, + LinkageTypes Linkage, const Twine &Name, + Constant *Aliasee, Module *Parent); // Without the Aliasee. - static GlobalAlias *create(PointerType *Ty, LinkageTypes Linkage, - const Twine &Name, Module *Parent); + static GlobalAlias *create(Type *Ty, unsigned AddressSpace, + LinkageTypes Linkage, const Twine &Name, + Module *Parent); // The module is taken from the Aliasee. - static GlobalAlias *create(PointerType *Ty, LinkageTypes Linkage, - const Twine &Name, GlobalValue *Aliasee); + static GlobalAlias *create(Type *Ty, unsigned AddressSpace, + LinkageTypes Linkage, const Twine &Name, + GlobalValue *Aliasee); // Type, Parent and AddressSpace taken from the Aliasee. static GlobalAlias *create(LinkageTypes Linkage, const Twine &Name, diff --git a/include/llvm/IR/GlobalObject.h b/include/llvm/IR/GlobalObject.h index f0552410b61d..ee111a046d73 100644 --- a/include/llvm/IR/GlobalObject.h +++ b/include/llvm/IR/GlobalObject.h @@ -27,9 +27,11 @@ class GlobalObject : public GlobalValue { GlobalObject(const GlobalObject &) = delete; protected: - GlobalObject(PointerType *Ty, ValueTy VTy, Use *Ops, unsigned NumOps, - LinkageTypes Linkage, const Twine &Name) - : GlobalValue(Ty, VTy, Ops, NumOps, Linkage, Name), ObjComdat(nullptr) { + GlobalObject(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps, + LinkageTypes Linkage, const Twine &Name, + unsigned AddressSpace = 0) + : GlobalValue(Ty, VTy, Ops, NumOps, Linkage, Name, AddressSpace), + ObjComdat(nullptr) { setGlobalValueSubClassData(0); } diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h index 2961369a7327..4fa4e7daeab0 100644 --- a/include/llvm/IR/GlobalValue.h +++ b/include/llvm/IR/GlobalValue.h @@ -65,15 +65,16 @@ public: }; protected: - GlobalValue(PointerType *Ty, ValueTy VTy, Use *Ops, unsigned NumOps, - LinkageTypes Linkage, const Twine &Name) - : Constant(Ty, VTy, Ops, NumOps), Linkage(Linkage), - Visibility(DefaultVisibility), UnnamedAddr(0), - DllStorageClass(DefaultStorageClass), + GlobalValue(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps, + LinkageTypes Linkage, const Twine &Name, unsigned AddressSpace) + : Constant(PointerType::get(Ty, AddressSpace), VTy, Ops, NumOps), + ValueType(Ty), Linkage(Linkage), Visibility(DefaultVisibility), + UnnamedAddr(0), DllStorageClass(DefaultStorageClass), ThreadLocal(NotThreadLocal), IntID((Intrinsic::ID)0U), Parent(nullptr) { setName(Name); } + Type *ValueType; // Note: VC++ treats enums as signed, so an extra bit is required to prevent // Linkage and Visibility from turning into negative values. LinkageTypes Linkage : 5; // The linkage of this global @@ -184,7 +185,7 @@ public: /// Global values are always pointers. PointerType *getType() const { return cast(User::getType()); } - Type *getValueType() const { return getType()->getElementType(); } + Type *getValueType() const { return ValueType; } static LinkageTypes getLinkOnceLinkage(bool ODR) { return ODR ? LinkOnceODRLinkage : LinkOnceAnyLinkage; @@ -236,7 +237,8 @@ public: /// Whether the definition of this global may be discarded if it is not used /// in its compilation unit. static bool isDiscardableIfUnused(LinkageTypes Linkage) { - return isLinkOnceLinkage(Linkage) || isLocalLinkage(Linkage); + return isLinkOnceLinkage(Linkage) || isLocalLinkage(Linkage) || + isAvailableExternallyLinkage(Linkage); } /// Whether the definition of this global may be replaced by something @@ -320,21 +322,11 @@ public: /// function has been read in yet or not. bool isMaterializable() const; - /// Returns true if this function was loaded from a GVMaterializer that's - /// still attached to its Module and that knows how to dematerialize the - /// function. - bool isDematerializable() const; - /// Make sure this GlobalValue is fully read. If the module is corrupt, this /// returns true and fills in the optional string with information about the /// problem. If successful, this returns false. std::error_code materialize(); - /// If this GlobalValue is read in, and if the GVMaterializer supports it, - /// release the memory for the function, and set it up to be materialized - /// lazily. If !isDematerializable(), this method is a noop. - void dematerialize(); - /// @} /// Return true if the primary definition of this global value is outside of diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h index a0159830ba3b..342bdc01bfbd 100644 --- a/include/llvm/IR/GlobalVariable.h +++ b/include/llvm/IR/GlobalVariable.h @@ -29,11 +29,10 @@ namespace llvm { class Module; class Constant; -template - class SymbolTableListTraits; +template class SymbolTableListTraits; class GlobalVariable : public GlobalObject, public ilist_node { - friend class SymbolTableListTraits; + friend class SymbolTableListTraits; void *operator new(size_t, unsigned) = delete; void operator=(const GlobalVariable &) = delete; GlobalVariable(const GlobalVariable &) = delete; @@ -106,18 +105,13 @@ public: /// hasUniqueInitializer - Whether the global variable has an initializer, and /// any changes made to the initializer will turn up in the final executable. inline bool hasUniqueInitializer() const { - return hasInitializer() && - // It's not safe to modify initializers of global variables with weak - // linkage, because the linker might choose to discard the initializer and - // use the initializer from another instance of the global variable - // instead. It is wrong to modify the initializer of a global variable - // with *_odr linkage because then different instances of the global may - // have different initializers, breaking the One Definition Rule. - !isWeakForLinker() && - // It is not safe to modify initializers of global variables with the - // external_initializer marker since the value may be changed at runtime - // before C++ initializers are evaluated. - !isExternallyInitialized(); + return + // We need to be sure this is the definition that will actually be used + isStrongDefinitionForLinker() && + // It is not safe to modify initializers of global variables with the + // external_initializer marker since the value may be changed at runtime + // before C++ initializers are evaluated. + !isExternallyInitialized(); } /// getInitializer - Return the initializer for this global variable. It is diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h index 6c67c79b6c0e..7fe04f2a091a 100644 --- a/include/llvm/IR/IRBuilder.h +++ b/include/llvm/IR/IRBuilder.h @@ -24,6 +24,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" @@ -51,6 +52,7 @@ protected: /// \brief Common base class shared among various IRBuilders. class IRBuilderBase { DebugLoc CurDbgLocation; + protected: BasicBlock *BB; BasicBlock::iterator InsertPt; @@ -58,8 +60,8 @@ protected: MDNode *DefaultFPMathTag; FastMathFlags FMF; -public: +public: IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr) : Context(context), DefaultFPMathTag(FPMathTag), FMF() { ClearInsertionPoint(); @@ -73,7 +75,7 @@ public: /// inserted into a block. void ClearInsertionPoint() { BB = nullptr; - InsertPt = nullptr; + InsertPt.reset(nullptr); } BasicBlock *GetInsertBlock() const { return BB; } @@ -91,8 +93,8 @@ public: /// the specified instruction. void SetInsertPoint(Instruction *I) { BB = I->getParent(); - InsertPt = I; - assert(I != BB->end() && "Can't read debug loc from end()"); + InsertPt = I->getIterator(); + assert(InsertPt != BB->end() && "Can't read debug loc from end()"); SetCurrentDebugLocation(I->getDebugLoc()); } @@ -313,10 +315,8 @@ public: } /// \brief Fetch the type representing a 128-bit integer. - IntegerType *getInt128Ty() { - return Type::getInt128Ty(Context); - } - + IntegerType *getInt128Ty() { return Type::getInt128Ty(Context); } + /// \brief Fetch the type representing an N-bit integer. IntegerType *getIntNTy(unsigned N) { return Type::getIntNTy(Context, N); @@ -426,7 +426,7 @@ public: /// \brief Create a call to Masked Load intrinsic CallInst *CreateMaskedLoad(Value *Ptr, unsigned Align, Value *Mask, - Value *PassThru = 0, const Twine &Name = ""); + Value *PassThru = nullptr, const Twine &Name = ""); /// \brief Create a call to Masked Store intrinsic CallInst *CreateMaskedStore(Value *Val, Value *Ptr, unsigned Align, @@ -445,6 +445,16 @@ public: ArrayRef GCArgs, const Twine &Name = ""); + /// \brief Create a call to the experimental.gc.statepoint intrinsic to + /// start a new statepoint sequence. + CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes, + Value *ActualCallee, uint32_t Flags, + ArrayRef CallArgs, + ArrayRef TransitionArgs, + ArrayRef DeoptArgs, + ArrayRef GCArgs, + const Twine &Name = ""); + // \brief Conveninence function for the common case when CallArgs are filled // in using makeArrayRef(CS.arg_begin(), CS.arg_end()); Use needs to be // .get()'ed to get the Value pointer. @@ -463,6 +473,15 @@ public: ArrayRef DeoptArgs, ArrayRef GCArgs, const Twine &Name = ""); + /// brief Create an invoke to the experimental.gc.statepoint intrinsic to + /// start a new statepoint sequence. + InvokeInst *CreateGCStatepointInvoke( + uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee, + BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags, + ArrayRef InvokeArgs, ArrayRef TransitionArgs, + ArrayRef DeoptArgs, ArrayRef GCArgs, + const Twine &Name = ""); + // Conveninence function for the common case when CallArgs are filled in using // makeArrayRef(CS.arg_begin(), CS.arg_end()); Use needs to be .get()'ed to // get the Value *. @@ -516,11 +535,11 @@ template > class IRBuilder : public IRBuilderBase, public Inserter { T Folder; + public: - IRBuilder(LLVMContext &C, const T &F, const Inserter &I = Inserter(), + IRBuilder(LLVMContext &C, const T &F, Inserter I = Inserter(), MDNode *FPMathTag = nullptr) - : IRBuilderBase(C, FPMathTag), Inserter(I), Folder(F) { - } + : IRBuilderBase(C, FPMathTag), Inserter(std::move(I)), Folder(F) {} explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = nullptr) : IRBuilderBase(C, FPMathTag), Folder() { @@ -578,12 +597,15 @@ public: //===--------------------------------------------------------------------===// private: - /// \brief Helper to add branch weight metadata onto an instruction. + /// \brief Helper to add branch weight and unpredictable metadata onto an + /// instruction. /// \returns The annotated instruction. template - InstTy *addBranchWeights(InstTy *I, MDNode *Weights) { + InstTy *addBranchMetadata(InstTy *I, MDNode *Weights, MDNode *Unpredictable) { if (Weights) I->setMetadata(LLVMContext::MD_prof, Weights); + if (Unpredictable) + I->setMetadata(LLVMContext::MD_unpredictable, Unpredictable); return I; } @@ -620,18 +642,20 @@ public: /// \brief Create a conditional 'br Cond, TrueDest, FalseDest' /// instruction. BranchInst *CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, - MDNode *BranchWeights = nullptr) { - return Insert(addBranchWeights(BranchInst::Create(True, False, Cond), - BranchWeights)); + MDNode *BranchWeights = nullptr, + MDNode *Unpredictable = nullptr) { + return Insert(addBranchMetadata(BranchInst::Create(True, False, Cond), + BranchWeights, Unpredictable)); } /// \brief Create a switch instruction with the specified value, default dest, /// and with a hint for the number of cases that will be added (for efficient /// allocation). SwitchInst *CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases = 10, - MDNode *BranchWeights = nullptr) { - return Insert(addBranchWeights(SwitchInst::Create(V, Dest, NumCases), - BranchWeights)); + MDNode *BranchWeights = nullptr, + MDNode *Unpredictable = nullptr) { + return Insert(addBranchMetadata(SwitchInst::Create(V, Dest, NumCases), + BranchWeights, Unpredictable)); } /// \brief Create an indirect branch instruction with the specified address @@ -667,11 +691,45 @@ public: return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args), Name); } + InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest, + BasicBlock *UnwindDest, ArrayRef Args, + ArrayRef OpBundles, + const Twine &Name = "") { + return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args, + OpBundles), Name); + } ResumeInst *CreateResume(Value *Exn) { return Insert(ResumeInst::Create(Exn)); } + CleanupReturnInst *CreateCleanupRet(CleanupPadInst *CleanupPad, + BasicBlock *UnwindBB = nullptr) { + return Insert(CleanupReturnInst::Create(CleanupPad, UnwindBB)); + } + + CatchSwitchInst *CreateCatchSwitch(Value *ParentPad, BasicBlock *UnwindBB, + unsigned NumHandlers, + const Twine &Name = "") { + return Insert(CatchSwitchInst::Create(ParentPad, UnwindBB, NumHandlers), + Name); + } + + CatchPadInst *CreateCatchPad(Value *ParentPad, ArrayRef Args, + const Twine &Name = "") { + return Insert(CatchPadInst::Create(ParentPad, Args), Name); + } + + CleanupPadInst *CreateCleanupPad(Value *ParentPad, + ArrayRef Args = None, + const Twine &Name = "") { + return Insert(CleanupPadInst::Create(ParentPad, Args), Name); + } + + CatchReturnInst *CreateCatchRet(CatchPadInst *CatchPad, BasicBlock *BB) { + return Insert(CatchReturnInst::Create(CatchPad, BB)); + } + UnreachableInst *CreateUnreachable() { return Insert(new UnreachableInst(Context)); } @@ -700,6 +758,7 @@ private: I->setFastMathFlags(FMF); return I; } + public: Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "", bool HasNUW = false, bool HasNSW = false) { @@ -1326,18 +1385,22 @@ public: const Twine &Name = "") { if (V->getType() == DestTy) return V; - if (V->getType()->isPointerTy() && DestTy->isIntegerTy()) + if (V->getType()->getScalarType()->isPointerTy() && + DestTy->getScalarType()->isIntegerTy()) return CreatePtrToInt(V, DestTy, Name); - if (V->getType()->isIntegerTy() && DestTy->isPointerTy()) + if (V->getType()->getScalarType()->isIntegerTy() && + DestTy->getScalarType()->isPointerTy()) return CreateIntToPtr(V, DestTy, Name); return CreateBitCast(V, DestTy, Name); } + private: // \brief Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a // compile time error, instead of converting the string to bool for the // isSigned parameter. Value *CreateIntCast(Value *, Type *, const char *) = delete; + public: Value *CreateFPCast(Value *V, Type *DestTy, const Twine &Name = "") { if (V->getType() == DestTy) @@ -1465,18 +1528,30 @@ public: } CallInst *CreateCall(Value *Callee, ArrayRef Args = None, + ArrayRef OpBundles = None, const Twine &Name = "") { - return Insert(CallInst::Create(Callee, Args), Name); + return Insert(CallInst::Create(Callee, Args, OpBundles), Name); + } + + CallInst *CreateCall(Value *Callee, ArrayRef Args, + const Twine &Name, MDNode *FPMathTag = nullptr) { + PointerType *PTy = cast(Callee->getType()); + FunctionType *FTy = cast(PTy->getElementType()); + return CreateCall(FTy, Callee, Args, Name, FPMathTag); } CallInst *CreateCall(llvm::FunctionType *FTy, Value *Callee, - ArrayRef Args, const Twine &Name = "") { - return Insert(CallInst::Create(FTy, Callee, Args), Name); + ArrayRef Args, const Twine &Name = "", + MDNode *FPMathTag = nullptr) { + CallInst *CI = CallInst::Create(FTy, Callee, Args); + if (isa(CI)) + CI = cast(AddFPMathAttributes(CI, FPMathTag, FMF)); + return Insert(CI, Name); } CallInst *CreateCall(Function *Callee, ArrayRef Args, - const Twine &Name = "") { - return CreateCall(Callee->getFunctionType(), Callee, Args, Name); + const Twine &Name = "", MDNode *FPMathTag = nullptr) { + return CreateCall(Callee->getFunctionType(), Callee, Args, Name, FPMathTag); } Value *CreateSelect(Value *C, Value *True, Value *False, @@ -1594,6 +1669,32 @@ public: Name); } + /// \brief Create an invariant.group.barrier intrinsic call, that stops + /// optimizer to propagate equality using invariant.group metadata. + /// If Ptr type is different from i8*, it's casted to i8* before call + /// and casted back to Ptr type after call. + Value *CreateInvariantGroupBarrier(Value *Ptr) { + Module *M = BB->getParent()->getParent(); + Function *FnInvariantGroupBarrier = Intrinsic::getDeclaration(M, + Intrinsic::invariant_group_barrier); + + Type *ArgumentAndReturnType = FnInvariantGroupBarrier->getReturnType(); + assert(ArgumentAndReturnType == + FnInvariantGroupBarrier->getFunctionType()->getParamType(0) && + "InvariantGroupBarrier should take and return the same type"); + Type *PtrType = Ptr->getType(); + + bool PtrTypeConversionNeeded = PtrType != ArgumentAndReturnType; + if (PtrTypeConversionNeeded) + Ptr = CreateBitCast(Ptr, ArgumentAndReturnType); + + CallInst *Fn = CreateCall(FnInvariantGroupBarrier, {Ptr}); + + if (PtrTypeConversionNeeded) + return CreateBitCast(Fn, PtrType); + return Fn; + } + /// \brief Return a vector value that contains \arg V broadcasted to \p /// NumElts elements. Value *CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name = "") { @@ -1676,6 +1777,6 @@ public: // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(IRBuilder<>, LLVMBuilderRef) -} +} // end namespace llvm -#endif +#endif // LLVM_IR_IRBUILDER_H diff --git a/include/llvm/IR/IRPrintingPasses.h b/include/llvm/IR/IRPrintingPasses.h index 5f1d56f7e831..88b18e826daf 100644 --- a/include/llvm/IR/IRPrintingPasses.h +++ b/include/llvm/IR/IRPrintingPasses.h @@ -47,6 +47,12 @@ FunctionPass *createPrintFunctionPass(raw_ostream &OS, BasicBlockPass *createPrintBasicBlockPass(raw_ostream &OS, const std::string &Banner = ""); +/// Print out a name of an LLVM value without any prefixes. +/// +/// The name is surrounded with ""'s and escaped if it has any special or +/// non-printable characters in it. +void printLLVMNameWithoutPrefix(raw_ostream &OS, StringRef Name); + /// \brief Pass for printing a Module as LLVM's text IR assembly. /// /// Note: This pass is for use with the new pass manager. Use the create...Pass diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h index 08b51021116c..d2e9e48539ce 100644 --- a/include/llvm/IR/InlineAsm.h +++ b/include/llvm/IR/InlineAsm.h @@ -44,11 +44,12 @@ private: void operator=(const InlineAsm&) = delete; std::string AsmString, Constraints; + FunctionType *FTy; bool HasSideEffects; bool IsAlignStack; AsmDialect Dialect; - InlineAsm(PointerType *Ty, const std::string &AsmString, + InlineAsm(FunctionType *Ty, const std::string &AsmString, const std::string &Constraints, bool hasSideEffects, bool isAlignStack, AsmDialect asmDialect); ~InlineAsm() override; @@ -56,15 +57,15 @@ private: /// When the ConstantUniqueMap merges two types and makes two InlineAsms /// identical, it destroys one of them with this method. void destroyConstant(); -public: +public: /// InlineAsm::get - Return the specified uniqued inline asm string. /// static InlineAsm *get(FunctionType *Ty, StringRef AsmString, StringRef Constraints, bool hasSideEffects, bool isAlignStack = false, AsmDialect asmDialect = AD_ATT); - + bool hasSideEffects() const { return HasSideEffects; } bool isAlignStack() const { return IsAlignStack; } AsmDialect getDialect() const { return Dialect; } @@ -74,11 +75,11 @@ public: PointerType *getType() const { return reinterpret_cast(Value::getType()); } - + /// getFunctionType - InlineAsm's are always pointers to functions. /// FunctionType *getFunctionType() const; - + const std::string &getAsmString() const { return AsmString; } const std::string &getConstraintString() const { return Constraints; } @@ -88,15 +89,15 @@ public: /// static bool Verify(FunctionType *Ty, StringRef Constraints); - // Constraint String Parsing + // Constraint String Parsing enum ConstraintPrefix { isInput, // 'x' isOutput, // '=x' isClobber // '~x' }; - + typedef std::vector ConstraintCodeVector; - + struct SubConstraintInfo { /// MatchingInput - If this is not -1, this is an output constraint where an /// input constraint is required to match it (e.g. "0"). The value is the @@ -113,80 +114,79 @@ public: typedef std::vector SubConstraintInfoVector; struct ConstraintInfo; typedef std::vector ConstraintInfoVector; - + struct ConstraintInfo { /// Type - The basic type of the constraint: input/output/clobber /// ConstraintPrefix Type; - + /// isEarlyClobber - "&": output operand writes result before inputs are all /// read. This is only ever set for an output operand. - bool isEarlyClobber; - + bool isEarlyClobber; + /// MatchingInput - If this is not -1, this is an output constraint where an /// input constraint is required to match it (e.g. "0"). The value is the /// constraint number that matches this one (for example, if this is /// constraint #0 and constraint #4 has the value "0", this will be 4). signed char MatchingInput; - + /// hasMatchingInput - Return true if this is an output constraint that has /// a matching input constraint. bool hasMatchingInput() const { return MatchingInput != -1; } - + /// isCommutative - This is set to true for a constraint that is commutative /// with the next operand. bool isCommutative; - + /// isIndirect - True if this operand is an indirect operand. This means /// that the address of the source or destination is present in the call /// instruction, instead of it being returned or passed in explicitly. This /// is represented with a '*' in the asm string. bool isIndirect; - + /// Code - The constraint code, either the register name (in braces) or the /// constraint letter/number. ConstraintCodeVector Codes; - + /// isMultipleAlternative - '|': has multiple-alternative constraints. bool isMultipleAlternative; - + /// multipleAlternatives - If there are multiple alternative constraints, /// this array will contain them. Otherwise it will be empty. SubConstraintInfoVector multipleAlternatives; - + /// The currently selected alternative constraint index. unsigned currentAlternativeIndex; - - ///Default constructor. + + /// Default constructor. ConstraintInfo(); - + /// Parse - Analyze the specified string (e.g. "=*&{eax}") and fill in the /// fields in this structure. If the constraint string is not understood, /// return true, otherwise return false. bool Parse(StringRef Str, ConstraintInfoVector &ConstraintsSoFar); - + /// selectAlternative - Point this constraint to the alternative constraint /// indicated by the index. void selectAlternative(unsigned index); }; - + /// ParseConstraints - Split up the constraint string into the specific /// constraints and their prefixes. If this returns an empty vector, and if /// the constraint string itself isn't empty, there was an error parsing. static ConstraintInfoVector ParseConstraints(StringRef ConstraintString); - - /// ParseConstraints - Parse the constraints of this inlineasm object, + + /// ParseConstraints - Parse the constraints of this inlineasm object, /// returning them the same way that ParseConstraints(str) does. ConstraintInfoVector ParseConstraints() const { return ParseConstraints(Constraints); } - + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Value *V) { return V->getValueID() == Value::InlineAsmVal; } - // These are helper methods for dealing with flags in the INLINEASM SDNode // in the backend. // @@ -203,7 +203,7 @@ public: // code. // Else: // Bit 30-16 - The register class ID to use for the operand. - + enum : uint32_t { // Fixed operands on an INLINEASM SDNode. Op_InputChain = 0, @@ -264,15 +264,15 @@ public: Flag_MatchingOperand = 0x80000000 }; - + static unsigned getFlagWord(unsigned Kind, unsigned NumOps) { assert(((NumOps << 3) & ~0xffff) == 0 && "Too many inline asm operands!"); assert(Kind >= Kind_RegUse && Kind <= Kind_Mem && "Invalid Kind"); return Kind | (NumOps << 3); } - + /// getFlagWordForMatchingOp - Augment an existing flag word returned by - /// getFlagWord with information indicating that this input operand is tied + /// getFlagWord with information indicating that this input operand is tied /// to a previous output operand. static unsigned getFlagWordForMatchingOp(unsigned InputFlag, unsigned MatchedOperandNo) { @@ -355,7 +355,6 @@ public: RC = High - 1; return true; } - }; } // End llvm namespace diff --git a/include/llvm/IR/InstIterator.h b/include/llvm/IR/InstIterator.h index f3ce6490fb66..1baca21c73af 100644 --- a/include/llvm/IR/InstIterator.h +++ b/include/llvm/IR/InstIterator.h @@ -115,19 +115,18 @@ private: } }; - -typedef InstIterator, - Function::iterator, BasicBlock::iterator, - Instruction> inst_iterator; -typedef InstIterator, - Function::const_iterator, - BasicBlock::const_iterator, +typedef InstIterator, Function::iterator, + BasicBlock::iterator, Instruction> inst_iterator; +typedef InstIterator, + Function::const_iterator, BasicBlock::const_iterator, const Instruction> const_inst_iterator; +typedef iterator_range inst_range; +typedef iterator_range const_inst_range; inline inst_iterator inst_begin(Function *F) { return inst_iterator(*F); } inline inst_iterator inst_end(Function *F) { return inst_iterator(*F, true); } -inline iterator_range inst_range(Function *F) { - return iterator_range(inst_begin(F), inst_end(F)); +inline inst_range instructions(Function *F) { + return inst_range(inst_begin(F), inst_end(F)); } inline const_inst_iterator inst_begin(const Function *F) { return const_inst_iterator(*F); @@ -135,13 +134,13 @@ inline const_inst_iterator inst_begin(const Function *F) { inline const_inst_iterator inst_end(const Function *F) { return const_inst_iterator(*F, true); } -inline iterator_range inst_range(const Function *F) { - return iterator_range(inst_begin(F), inst_end(F)); +inline const_inst_range instructions(const Function *F) { + return const_inst_range(inst_begin(F), inst_end(F)); } inline inst_iterator inst_begin(Function &F) { return inst_iterator(F); } inline inst_iterator inst_end(Function &F) { return inst_iterator(F, true); } -inline iterator_range inst_range(Function &F) { - return iterator_range(inst_begin(F), inst_end(F)); +inline inst_range instructions(Function &F) { + return inst_range(inst_begin(F), inst_end(F)); } inline const_inst_iterator inst_begin(const Function &F) { return const_inst_iterator(F); @@ -149,8 +148,8 @@ inline const_inst_iterator inst_begin(const Function &F) { inline const_inst_iterator inst_end(const Function &F) { return const_inst_iterator(F, true); } -inline iterator_range inst_range(const Function &F) { - return iterator_range(inst_begin(F), inst_end(F)); +inline const_inst_range instructions(const Function &F) { + return const_inst_range(inst_begin(F), inst_end(F)); } } // End llvm namespace diff --git a/include/llvm/IR/InstVisitor.h b/include/llvm/IR/InstVisitor.h index 581e860b8382..088d3e0fbfa5 100644 --- a/include/llvm/IR/InstVisitor.h +++ b/include/llvm/IR/InstVisitor.h @@ -169,6 +169,9 @@ public: RetTy visitIndirectBrInst(IndirectBrInst &I) { DELEGATE(TerminatorInst);} RetTy visitResumeInst(ResumeInst &I) { DELEGATE(TerminatorInst);} RetTy visitUnreachableInst(UnreachableInst &I) { DELEGATE(TerminatorInst);} + RetTy visitCleanupReturnInst(CleanupReturnInst &I) { DELEGATE(TerminatorInst);} + RetTy visitCatchReturnInst(CatchReturnInst &I) { DELEGATE(TerminatorInst); } + RetTy visitCatchSwitchInst(CatchSwitchInst &I) { DELEGATE(TerminatorInst);} RetTy visitICmpInst(ICmpInst &I) { DELEGATE(CmpInst);} RetTy visitFCmpInst(FCmpInst &I) { DELEGATE(CmpInst);} RetTy visitAllocaInst(AllocaInst &I) { DELEGATE(UnaryInstruction);} @@ -200,6 +203,9 @@ public: RetTy visitExtractValueInst(ExtractValueInst &I){ DELEGATE(UnaryInstruction);} RetTy visitInsertValueInst(InsertValueInst &I) { DELEGATE(Instruction); } RetTy visitLandingPadInst(LandingPadInst &I) { DELEGATE(Instruction); } + RetTy visitFuncletPadInst(FuncletPadInst &I) { DELEGATE(Instruction); } + RetTy visitCleanupPadInst(CleanupPadInst &I) { DELEGATE(FuncletPadInst); } + RetTy visitCatchPadInst(CatchPadInst &I) { DELEGATE(FuncletPadInst); } // Handle the special instrinsic instruction classes. RetTy visitDbgDeclareInst(DbgDeclareInst &I) { DELEGATE(DbgInfoIntrinsic);} diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h index b791ded0e194..5091bb407833 100644 --- a/include/llvm/IR/InstrTypes.h +++ b/include/llvm/IR/InstrTypes.h @@ -16,9 +16,12 @@ #ifndef LLVM_IR_INSTRTYPES_H #define LLVM_IR_INSTRTYPES_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/Twine.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/OperandTraits.h" namespace llvm { @@ -51,8 +54,8 @@ protected: virtual BasicBlock *getSuccessorV(unsigned idx) const = 0; virtual unsigned getNumSuccessorsV() const = 0; virtual void setSuccessorV(unsigned idx, BasicBlock *B) = 0; -public: +public: /// Return the number of successors that this terminator has. unsigned getNumSuccessors() const { return getNumSuccessorsV(); @@ -75,8 +78,198 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } -}; + // \brief Returns true if this terminator relates to exception handling. + bool isExceptional() const { + switch (getOpcode()) { + case Instruction::CatchSwitch: + case Instruction::CatchRet: + case Instruction::CleanupRet: + case Instruction::Invoke: + case Instruction::Resume: + return true; + default: + return false; + } + } + + //===--------------------------------------------------------------------===// + // succ_iterator definition + //===--------------------------------------------------------------------===// + + template // Successor Iterator + class SuccIterator : public std::iterator { + typedef std::iterator + super; + + public: + typedef typename super::pointer pointer; + typedef typename super::reference reference; + + private: + Term TermInst; + unsigned idx; + typedef SuccIterator Self; + + inline bool index_is_valid(unsigned idx) { + return idx < TermInst->getNumSuccessors(); + } + + /// \brief Proxy object to allow write access in operator[] + class SuccessorProxy { + Self it; + + public: + explicit SuccessorProxy(const Self &it) : it(it) {} + + SuccessorProxy(const SuccessorProxy &) = default; + + SuccessorProxy &operator=(SuccessorProxy r) { + *this = reference(r); + return *this; + } + + SuccessorProxy &operator=(reference r) { + it.TermInst->setSuccessor(it.idx, r); + return *this; + } + + operator reference() const { return *it; } + }; + + public: + // begin iterator + explicit inline SuccIterator(Term T) : TermInst(T), idx(0) {} + // end iterator + inline SuccIterator(Term T, bool) : TermInst(T) { + if (TermInst) + idx = TermInst->getNumSuccessors(); + else + // Term == NULL happens, if a basic block is not fully constructed and + // consequently getTerminator() returns NULL. In this case we construct + // a SuccIterator which describes a basic block that has zero + // successors. + // Defining SuccIterator for incomplete and malformed CFGs is especially + // useful for debugging. + idx = 0; + } + + /// This is used to interface between code that wants to + /// operate on terminator instructions directly. + unsigned getSuccessorIndex() const { return idx; } + + inline bool operator==(const Self &x) const { return idx == x.idx; } + inline bool operator!=(const Self &x) const { return !operator==(x); } + + inline reference operator*() const { return TermInst->getSuccessor(idx); } + inline pointer operator->() const { return operator*(); } + + inline Self &operator++() { + ++idx; + return *this; + } // Preincrement + + inline Self operator++(int) { // Postincrement + Self tmp = *this; + ++*this; + return tmp; + } + + inline Self &operator--() { + --idx; + return *this; + } // Predecrement + inline Self operator--(int) { // Postdecrement + Self tmp = *this; + --*this; + return tmp; + } + + inline bool operator<(const Self &x) const { + assert(TermInst == x.TermInst && + "Cannot compare iterators of different blocks!"); + return idx < x.idx; + } + + inline bool operator<=(const Self &x) const { + assert(TermInst == x.TermInst && + "Cannot compare iterators of different blocks!"); + return idx <= x.idx; + } + inline bool operator>=(const Self &x) const { + assert(TermInst == x.TermInst && + "Cannot compare iterators of different blocks!"); + return idx >= x.idx; + } + + inline bool operator>(const Self &x) const { + assert(TermInst == x.TermInst && + "Cannot compare iterators of different blocks!"); + return idx > x.idx; + } + + inline Self &operator+=(int Right) { + unsigned new_idx = idx + Right; + assert(index_is_valid(new_idx) && "Iterator index out of bound"); + idx = new_idx; + return *this; + } + + inline Self operator+(int Right) const { + Self tmp = *this; + tmp += Right; + return tmp; + } + + inline Self &operator-=(int Right) { return operator+=(-Right); } + + inline Self operator-(int Right) const { return operator+(-Right); } + + inline int operator-(const Self &x) const { + assert(TermInst == x.TermInst && + "Cannot work on iterators of different blocks!"); + int distance = idx - x.idx; + return distance; + } + + inline SuccessorProxy operator[](int offset) { + Self tmp = *this; + tmp += offset; + return SuccessorProxy(tmp); + } + + /// Get the source BB of this iterator. + inline BB *getSource() { + assert(TermInst && "Source not available, if basic block was malformed"); + return TermInst->getParent(); + } + }; + + typedef SuccIterator succ_iterator; + typedef SuccIterator + succ_const_iterator; + typedef llvm::iterator_range succ_range; + typedef llvm::iterator_range succ_const_range; + +private: + inline succ_iterator succ_begin() { return succ_iterator(this); } + inline succ_const_iterator succ_begin() const { + return succ_const_iterator(this); + } + inline succ_iterator succ_end() { return succ_iterator(this, true); } + inline succ_const_iterator succ_end() const { + return succ_const_iterator(this, true); + } + +public: + inline succ_range successors() { + return succ_range(succ_begin(), succ_end()); + } + inline succ_const_range successors() const { + return succ_const_range(succ_begin(), succ_end()); + } +}; //===----------------------------------------------------------------------===// // UnaryInstruction Class @@ -95,6 +288,7 @@ protected: : Instruction(Ty, iType, &Op<0>(), 1, IAE) { Op<0>() = V; } + public: // allocate space for exactly one operand void *operator new(size_t s) { @@ -133,6 +327,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryInstruction, Value) class BinaryOperator : public Instruction { void *operator new(size_t, unsigned) = delete; + protected: void init(BinaryOps iType); BinaryOperator(BinaryOps iType, Value *S1, Value *S2, Type *Ty, @@ -209,7 +404,7 @@ public: BO->setHasNoSignedWrap(true); return BO; } - + static BinaryOperator *CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name = "") { BinaryOperator *BO = Create(Opc, V1, V2, Name); @@ -228,7 +423,7 @@ public: BO->setHasNoUnsignedWrap(true); return BO; } - + static BinaryOperator *CreateExact(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name = "") { BinaryOperator *BO = Create(Opc, V1, V2, Name); @@ -247,29 +442,29 @@ public: BO->setIsExact(true); return BO; } - -#define DEFINE_HELPERS(OPC, NUWNSWEXACT) \ - static BinaryOperator *Create ## NUWNSWEXACT ## OPC \ - (Value *V1, Value *V2, const Twine &Name = "") { \ - return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name); \ - } \ - static BinaryOperator *Create ## NUWNSWEXACT ## OPC \ - (Value *V1, Value *V2, const Twine &Name, BasicBlock *BB) { \ - return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name, BB); \ - } \ - static BinaryOperator *Create ## NUWNSWEXACT ## OPC \ - (Value *V1, Value *V2, const Twine &Name, Instruction *I) { \ - return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name, I); \ + +#define DEFINE_HELPERS(OPC, NUWNSWEXACT) \ + static BinaryOperator *Create##NUWNSWEXACT##OPC(Value *V1, Value *V2, \ + const Twine &Name = "") { \ + return Create##NUWNSWEXACT(Instruction::OPC, V1, V2, Name); \ + } \ + static BinaryOperator *Create##NUWNSWEXACT##OPC( \ + Value *V1, Value *V2, const Twine &Name, BasicBlock *BB) { \ + return Create##NUWNSWEXACT(Instruction::OPC, V1, V2, Name, BB); \ + } \ + static BinaryOperator *Create##NUWNSWEXACT##OPC( \ + Value *V1, Value *V2, const Twine &Name, Instruction *I) { \ + return Create##NUWNSWEXACT(Instruction::OPC, V1, V2, Name, I); \ } - - DEFINE_HELPERS(Add, NSW) // CreateNSWAdd - DEFINE_HELPERS(Add, NUW) // CreateNUWAdd - DEFINE_HELPERS(Sub, NSW) // CreateNSWSub - DEFINE_HELPERS(Sub, NUW) // CreateNUWSub - DEFINE_HELPERS(Mul, NSW) // CreateNSWMul - DEFINE_HELPERS(Mul, NUW) // CreateNUWMul - DEFINE_HELPERS(Shl, NSW) // CreateNSWShl - DEFINE_HELPERS(Shl, NUW) // CreateNUWShl + + DEFINE_HELPERS(Add, NSW) // CreateNSWAdd + DEFINE_HELPERS(Add, NUW) // CreateNUWAdd + DEFINE_HELPERS(Sub, NSW) // CreateNSWSub + DEFINE_HELPERS(Sub, NUW) // CreateNUWSub + DEFINE_HELPERS(Mul, NSW) // CreateNSWMul + DEFINE_HELPERS(Mul, NUW) // CreateNUWMul + DEFINE_HELPERS(Shl, NSW) // CreateNSWShl + DEFINE_HELPERS(Shl, NUW) // CreateNUWShl DEFINE_HELPERS(SDiv, Exact) // CreateExactSDiv DEFINE_HELPERS(UDiv, Exact) // CreateExactUDiv @@ -277,7 +472,7 @@ public: DEFINE_HELPERS(LShr, Exact) // CreateExactLShr #undef DEFINE_HELPERS - + /// Helper functions to construct and inspect unary operations (NEG and NOT) /// via binary operators SUB and XOR: /// @@ -355,7 +550,7 @@ public: /// Convenience method to copy supported wrapping, exact, and fast-math flags /// from V to this instruction. void copyIRFlags(const Value *V); - + /// Logical 'and' of any supported wrapping, exact, and fast-math flags of /// V and this instruction. void andIRFlags(const Value *V); @@ -388,6 +583,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value) /// @brief Base class of casting instructions. class CastInst : public UnaryInstruction { void anchor() override; + protected: /// @brief Constructor with insert-before-instruction semantics for subclasses CastInst(Type *Ty, unsigned iType, Value *S, @@ -401,6 +597,7 @@ protected: : UnaryInstruction(Ty, iType, S, InsertAtEnd) { setName(NameStr); } + public: /// Provides a way to construct any of the CastInst subclasses using an /// opcode instead of the subclass's constructor. The opcode must be in the @@ -490,7 +687,7 @@ public: Value *S, ///< The pointer value to be casted (operand 0) Type *Ty, ///< The type to which cast should be made const Twine &Name = "", ///< Name for the instruction - Instruction *InsertBefore = 0 ///< Place to insert the instruction + Instruction *InsertBefore = nullptr ///< Place to insert the instruction ); /// @brief Create a BitCast, a PtrToInt, or an IntToPTr cast instruction. @@ -503,7 +700,7 @@ public: Value *S, ///< The pointer value to be casted (operand 0) Type *Ty, ///< The type to which cast should be made const Twine &Name = "", ///< Name for the instruction - Instruction *InsertBefore = 0 ///< Place to insert the instruction + Instruction *InsertBefore = nullptr ///< Place to insert the instruction ); /// @brief Create a ZExt, BitCast, or Trunc for int -> int casts. @@ -677,18 +874,6 @@ public: /// This class is the base class for the comparison instructions. /// @brief Abstract base class of comparison instructions. class CmpInst : public Instruction { - void *operator new(size_t, unsigned) = delete; - CmpInst() = delete; -protected: - CmpInst(Type *ty, Instruction::OtherOps op, unsigned short pred, - Value *LHS, Value *RHS, const Twine &Name = "", - Instruction *InsertBefore = nullptr); - - CmpInst(Type *ty, Instruction::OtherOps op, unsigned short pred, - Value *LHS, Value *RHS, const Twine &Name, - BasicBlock *InsertAtEnd); - - void anchor() override; // Out of line virtual method. public: /// This enumeration lists the possible predicates for CmpInst subclasses. /// Values in the range 0-31 are reserved for FCmpInst, while values in the @@ -730,6 +915,22 @@ public: BAD_ICMP_PREDICATE = ICMP_SLE + 1 }; +private: + void *operator new(size_t, unsigned) = delete; + CmpInst() = delete; + +protected: + CmpInst(Type *ty, Instruction::OtherOps op, Predicate pred, + Value *LHS, Value *RHS, const Twine &Name = "", + Instruction *InsertBefore = nullptr); + + CmpInst(Type *ty, Instruction::OtherOps op, Predicate pred, + Value *LHS, Value *RHS, const Twine &Name, + BasicBlock *InsertAtEnd); + + void anchor() override; // Out of line virtual method. + +public: // allocate space for exactly two operands void *operator new(size_t s) { return User::operator new(s, 2); @@ -740,7 +941,7 @@ public: /// The specified Instruction is allowed to be a dereferenced end iterator. /// @brief Create a CmpInst static CmpInst *Create(OtherOps Op, - unsigned short predicate, Value *S1, + Predicate predicate, Value *S1, Value *S2, const Twine &Name = "", Instruction *InsertBefore = nullptr); @@ -748,7 +949,7 @@ public: /// two operands. Also automatically insert this instruction to the end of /// the BasicBlock specified. /// @brief Create a CmpInst - static CmpInst *Create(OtherOps Op, unsigned short predicate, Value *S1, + static CmpInst *Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name, BasicBlock *InsertAtEnd); /// @brief Get the opcode casted to the right type @@ -775,7 +976,6 @@ public: bool isFPPredicate() const { return isFPPredicate(getPredicate()); } bool isIntPredicate() const { return isIntPredicate(getPredicate()); } - /// For example, EQ -> NE, UGT -> ULE, SLT -> SGE, /// OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc. /// @returns the inverse predicate for the instruction's current predicate. @@ -833,6 +1033,19 @@ public: return isUnsigned(getPredicate()); } + /// For example, ULT->SLT, ULE->SLE, UGT->SGT, UGE->SGE, SLT->Failed assert + /// @returns the signed version of the unsigned predicate pred. + /// @brief return the signed version of a predicate + static Predicate getSignedPredicate(Predicate pred); + + /// For example, ULT->SLT, ULE->SLE, UGT->SGT, UGE->SGE, SLT->Failed assert + /// @returns the signed version of the predicate for this instruction (which + /// has to be an unsigned predicate). + /// @brief return the signed version of a predicate + Predicate getSignedPredicate() { + return getSignedPredicate(getPredicate()); + } + /// This is just a convenience. /// @brief Determine if this is true when both operands are the same. bool isTrueWhenEqual() const { @@ -847,23 +1060,23 @@ public: /// @returns true if the predicate is unsigned, false otherwise. /// @brief Determine if the predicate is an unsigned operation. - static bool isUnsigned(unsigned short predicate); + static bool isUnsigned(Predicate predicate); /// @returns true if the predicate is signed, false otherwise. /// @brief Determine if the predicate is an signed operation. - static bool isSigned(unsigned short predicate); + static bool isSigned(Predicate predicate); /// @brief Determine if the predicate is an ordered operation. - static bool isOrdered(unsigned short predicate); + static bool isOrdered(Predicate predicate); /// @brief Determine if the predicate is an unordered operation. - static bool isUnordered(unsigned short predicate); + static bool isUnordered(Predicate predicate); /// Determine if the predicate is true when comparing a value with itself. - static bool isTrueWhenEqual(unsigned short predicate); + static bool isTrueWhenEqual(Predicate predicate); /// Determine if the predicate is false when comparing a value with itself. - static bool isFalseWhenEqual(unsigned short predicate); + static bool isFalseWhenEqual(Predicate predicate); /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { @@ -882,6 +1095,7 @@ public: } return Type::getInt1Ty(opnd_type->getContext()); } + private: // Shadow Value::setValueSubclassData with a private forwarding method so that // subclasses cannot accidentally use it. @@ -890,7 +1104,6 @@ private: } }; - // FIXME: these are redundant if CmpInst < BinaryOperator template <> struct OperandTraits : public FixedNumOperandTraits { @@ -898,6 +1111,523 @@ struct OperandTraits : public FixedNumOperandTraits { DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CmpInst, Value) -} // End llvm namespace +//===----------------------------------------------------------------------===// +// FuncletPadInst Class +//===----------------------------------------------------------------------===// +class FuncletPadInst : public Instruction { +private: + void init(Value *ParentPad, ArrayRef Args, const Twine &NameStr); -#endif + FuncletPadInst(const FuncletPadInst &CPI); + + explicit FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad, + ArrayRef Args, unsigned Values, + const Twine &NameStr, Instruction *InsertBefore); + explicit FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad, + ArrayRef Args, unsigned Values, + const Twine &NameStr, BasicBlock *InsertAtEnd); + +protected: + // Note: Instruction needs to be a friend here to call cloneImpl. + friend class Instruction; + friend class CatchPadInst; + friend class CleanupPadInst; + FuncletPadInst *cloneImpl() const; + +public: + /// Provide fast operand accessors + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + + /// getNumArgOperands - Return the number of funcletpad arguments. + /// + unsigned getNumArgOperands() const { return getNumOperands() - 1; } + + /// Convenience accessors + + /// \brief Return the outer EH-pad this funclet is nested within. + /// + /// Note: This returns the associated CatchSwitchInst if this FuncletPadInst + /// is a CatchPadInst. + Value *getParentPad() const { return Op<-1>(); } + void setParentPad(Value *ParentPad) { + assert(ParentPad); + Op<-1>() = ParentPad; + } + + /// getArgOperand/setArgOperand - Return/set the i-th funcletpad argument. + /// + Value *getArgOperand(unsigned i) const { return getOperand(i); } + void setArgOperand(unsigned i, Value *v) { setOperand(i, v); } + + /// arg_operands - iteration adapter for range-for loops. + op_range arg_operands() { return op_range(op_begin(), op_end() - 1); } + + /// arg_operands - iteration adapter for range-for loops. + const_op_range arg_operands() const { + return const_op_range(op_begin(), op_end() - 1); + } + + // Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const Instruction *I) { return I->isFuncletPad(); } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +template <> +struct OperandTraits + : public VariadicOperandTraits {}; + +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(FuncletPadInst, Value) + +/// \brief A lightweight accessor for an operand bundle meant to be passed +/// around by value. +struct OperandBundleUse { + ArrayRef Inputs; + + OperandBundleUse() {} + explicit OperandBundleUse(StringMapEntry *Tag, ArrayRef Inputs) + : Inputs(Inputs), Tag(Tag) {} + + /// \brief Return true if the operand at index \p Idx in this operand bundle + /// has the attribute A. + bool operandHasAttr(unsigned Idx, Attribute::AttrKind A) const { + if (isDeoptOperandBundle()) + if (A == Attribute::ReadOnly || A == Attribute::NoCapture) + return Inputs[Idx]->getType()->isPointerTy(); + + // Conservative answer: no operands have any attributes. + return false; + }; + + /// \brief Return the tag of this operand bundle as a string. + StringRef getTagName() const { + return Tag->getKey(); + } + + /// \brief Return the tag of this operand bundle as an integer. + /// + /// Operand bundle tags are interned by LLVMContextImpl::getOrInsertBundleTag, + /// and this function returns the unique integer getOrInsertBundleTag + /// associated the tag of this operand bundle to. + uint32_t getTagID() const { + return Tag->getValue(); + } + + /// \brief Return true if this is a "deopt" operand bundle. + bool isDeoptOperandBundle() const { + return getTagID() == LLVMContext::OB_deopt; + } + + /// \brief Return true if this is a "funclet" operand bundle. + bool isFuncletOperandBundle() const { + return getTagID() == LLVMContext::OB_funclet; + } + +private: + /// \brief Pointer to an entry in LLVMContextImpl::getOrInsertBundleTag. + StringMapEntry *Tag; +}; + +/// \brief A container for an operand bundle being viewed as a set of values +/// rather than a set of uses. +/// +/// Unlike OperandBundleUse, OperandBundleDefT owns the memory it carries, and +/// so it is possible to create and pass around "self-contained" instances of +/// OperandBundleDef and ConstOperandBundleDef. +template class OperandBundleDefT { + std::string Tag; + std::vector Inputs; + +public: + explicit OperandBundleDefT(std::string Tag, std::vector Inputs) + : Tag(std::move(Tag)), Inputs(std::move(Inputs)) {} + explicit OperandBundleDefT(std::string Tag, ArrayRef Inputs) + : Tag(std::move(Tag)), Inputs(Inputs) {} + + explicit OperandBundleDefT(const OperandBundleUse &OBU) { + Tag = OBU.getTagName(); + Inputs.insert(Inputs.end(), OBU.Inputs.begin(), OBU.Inputs.end()); + } + + ArrayRef inputs() const { return Inputs; } + + typedef typename std::vector::const_iterator input_iterator; + size_t input_size() const { return Inputs.size(); } + input_iterator input_begin() const { return Inputs.begin(); } + input_iterator input_end() const { return Inputs.end(); } + + StringRef getTag() const { return Tag; } +}; + +typedef OperandBundleDefT OperandBundleDef; +typedef OperandBundleDefT ConstOperandBundleDef; + +/// \brief A mixin to add operand bundle functionality to llvm instruction +/// classes. +/// +/// OperandBundleUser uses the descriptor area co-allocated with the host User +/// to store some meta information about which operands are "normal" operands, +/// and which ones belong to some operand bundle. +/// +/// The layout of an operand bundle user is +/// +/// +-----------uint32_t End-------------------------------------+ +/// | | +/// | +--------uint32_t Begin--------------------+ | +/// | | | | +/// ^ ^ v v +/// |------|------|----|----|----|----|----|---------|----|---------|----|----- +/// | BOI0 | BOI1 | .. | DU | U0 | U1 | .. | BOI0_U0 | .. | BOI1_U0 | .. | Un +/// |------|------|----|----|----|----|----|---------|----|---------|----|----- +/// v v ^ ^ +/// | | | | +/// | +--------uint32_t Begin------------+ | +/// | | +/// +-----------uint32_t End-----------------------------+ +/// +/// +/// BOI0, BOI1 ... are descriptions of operand bundles in this User's use list. +/// These descriptions are installed and managed by this class, and they're all +/// instances of OperandBundleUser::BundleOpInfo. +/// +/// DU is an additional descriptor installed by User's 'operator new' to keep +/// track of the 'BOI0 ... BOIN' co-allocation. OperandBundleUser does not +/// access or modify DU in any way, it's an implementation detail private to +/// User. +/// +/// The regular Use& vector for the User starts at U0. The operand bundle uses +/// are part of the Use& vector, just like normal uses. In the diagram above, +/// the operand bundle uses start at BOI0_U0. Each instance of BundleOpInfo has +/// information about a contiguous set of uses constituting an operand bundle, +/// and the total set of operand bundle uses themselves form a contiguous set of +/// uses (i.e. there are no gaps between uses corresponding to individual +/// operand bundles). +/// +/// This class does not know the location of the set of operand bundle uses +/// within the use list -- that is decided by the User using this class via the +/// BeginIdx argument in populateBundleOperandInfos. +/// +/// Currently operand bundle users with hung-off operands are not supported. +template class OperandBundleUser { +public: + /// \brief Return the number of operand bundles associated with this User. + unsigned getNumOperandBundles() const { + return std::distance(bundle_op_info_begin(), bundle_op_info_end()); + } + + /// \brief Return true if this User has any operand bundles. + bool hasOperandBundles() const { return getNumOperandBundles() != 0; } + + /// \brief Return the index of the first bundle operand in the Use array. + unsigned getBundleOperandsStartIndex() const { + assert(hasOperandBundles() && "Don't call otherwise!"); + return bundle_op_info_begin()->Begin; + } + + /// \brief Return the index of the last bundle operand in the Use array. + unsigned getBundleOperandsEndIndex() const { + assert(hasOperandBundles() && "Don't call otherwise!"); + return bundle_op_info_end()[-1].End; + } + + /// \brief Return the total number operands (not operand bundles) used by + /// every operand bundle in this OperandBundleUser. + unsigned getNumTotalBundleOperands() const { + if (!hasOperandBundles()) + return 0; + + unsigned Begin = getBundleOperandsStartIndex(); + unsigned End = getBundleOperandsEndIndex(); + + assert(Begin <= End && "Should be!"); + return End - Begin; + } + + /// \brief Return the operand bundle at a specific index. + OperandBundleUse getOperandBundleAt(unsigned Index) const { + assert(Index < getNumOperandBundles() && "Index out of bounds!"); + return operandBundleFromBundleOpInfo(*(bundle_op_info_begin() + Index)); + } + + /// \brief Return the number of operand bundles with the tag Name attached to + /// this instruction. + unsigned countOperandBundlesOfType(StringRef Name) const { + unsigned Count = 0; + for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i) + if (getOperandBundleAt(i).getTagName() == Name) + Count++; + + return Count; + } + + /// \brief Return the number of operand bundles with the tag ID attached to + /// this instruction. + unsigned countOperandBundlesOfType(uint32_t ID) const { + unsigned Count = 0; + for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i) + if (getOperandBundleAt(i).getTagID() == ID) + Count++; + + return Count; + } + + /// \brief Return an operand bundle by name, if present. + /// + /// It is an error to call this for operand bundle types that may have + /// multiple instances of them on the same instruction. + Optional getOperandBundle(StringRef Name) const { + assert(countOperandBundlesOfType(Name) < 2 && "Precondition violated!"); + + for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i) { + OperandBundleUse U = getOperandBundleAt(i); + if (U.getTagName() == Name) + return U; + } + + return None; + } + + /// \brief Return an operand bundle by tag ID, if present. + /// + /// It is an error to call this for operand bundle types that may have + /// multiple instances of them on the same instruction. + Optional getOperandBundle(uint32_t ID) const { + assert(countOperandBundlesOfType(ID) < 2 && "Precondition violated!"); + + for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i) { + OperandBundleUse U = getOperandBundleAt(i); + if (U.getTagID() == ID) + return U; + } + + return None; + } + + /// \brief Return the list of operand bundles attached to this instruction as + /// a vector of OperandBundleDefs. + /// + /// This function copies the OperandBundeUse instances associated with this + /// OperandBundleUser to a vector of OperandBundleDefs. Note: + /// OperandBundeUses and OperandBundleDefs are non-trivially *different* + /// representations of operand bundles (see documentation above). + void getOperandBundlesAsDefs(SmallVectorImpl &Defs) const { + for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i) + Defs.emplace_back(getOperandBundleAt(i)); + } + + /// \brief Return the operand bundle for the operand at index OpIdx. + /// + /// It is an error to call this with an OpIdx that does not correspond to an + /// bundle operand. + OperandBundleUse getOperandBundleForOperand(unsigned OpIdx) const { + return operandBundleFromBundleOpInfo(getBundleOpInfoForOperand(OpIdx)); + } + + /// \brief Return true if this operand bundle user has operand bundles that + /// may read from the heap. + bool hasReadingOperandBundles() const { + // Implementation note: this is a conservative implementation of operand + // bundle semantics, where *any* operand bundle forces a callsite to be at + // least readonly. + return hasOperandBundles(); + } + + /// \brief Return true if this operand bundle user has operand bundles that + /// may write to the heap. + bool hasClobberingOperandBundles() const { + for (auto &BOI : bundle_op_infos()) { + if (BOI.Tag->second == LLVMContext::OB_deopt || + BOI.Tag->second == LLVMContext::OB_funclet) + continue; + + // This instruction has an operand bundle that is not known to us. + // Assume the worst. + return true; + } + + return false; + } + + /// \brief Return true if the bundle operand at index \p OpIdx has the + /// attribute \p A. + bool bundleOperandHasAttr(unsigned OpIdx, Attribute::AttrKind A) const { + auto &BOI = getBundleOpInfoForOperand(OpIdx); + auto OBU = operandBundleFromBundleOpInfo(BOI); + return OBU.operandHasAttr(OpIdx - BOI.Begin, A); + } + + /// \brief Return true if \p Other has the same sequence of operand bundle + /// tags with the same number of operands on each one of them as this + /// OperandBundleUser. + bool hasIdenticalOperandBundleSchema( + const OperandBundleUser &Other) const { + if (getNumOperandBundles() != Other.getNumOperandBundles()) + return false; + + return std::equal(bundle_op_info_begin(), bundle_op_info_end(), + Other.bundle_op_info_begin()); + }; + +protected: + /// \brief Is the function attribute S disallowed by some operand bundle on + /// this operand bundle user? + bool isFnAttrDisallowedByOpBundle(StringRef S) const { + // Operand bundles only possibly disallow readnone, readonly and argmenonly + // attributes. All String attributes are fine. + return false; + } + + /// \brief Is the function attribute A disallowed by some operand bundle on + /// this operand bundle user? + bool isFnAttrDisallowedByOpBundle(Attribute::AttrKind A) const { + switch (A) { + default: + return false; + + case Attribute::ArgMemOnly: + return hasReadingOperandBundles(); + + case Attribute::ReadNone: + return hasReadingOperandBundles(); + + case Attribute::ReadOnly: + return hasClobberingOperandBundles(); + } + + llvm_unreachable("switch has a default case!"); + } + + /// \brief Used to keep track of an operand bundle. See the main comment on + /// OperandBundleUser above. + struct BundleOpInfo { + /// \brief The operand bundle tag, interned by + /// LLVMContextImpl::getOrInsertBundleTag. + StringMapEntry *Tag; + + /// \brief The index in the Use& vector where operands for this operand + /// bundle starts. + uint32_t Begin; + + /// \brief The index in the Use& vector where operands for this operand + /// bundle ends. + uint32_t End; + + bool operator==(const BundleOpInfo &Other) const { + return Tag == Other.Tag && Begin == Other.Begin && End == Other.End; + } + }; + + /// \brief Simple helper function to map a BundleOpInfo to an + /// OperandBundleUse. + OperandBundleUse + operandBundleFromBundleOpInfo(const BundleOpInfo &BOI) const { + auto op_begin = static_cast(this)->op_begin(); + ArrayRef Inputs(op_begin + BOI.Begin, op_begin + BOI.End); + return OperandBundleUse(BOI.Tag, Inputs); + } + + typedef BundleOpInfo *bundle_op_iterator; + typedef const BundleOpInfo *const_bundle_op_iterator; + + /// \brief Return the start of the list of BundleOpInfo instances associated + /// with this OperandBundleUser. + bundle_op_iterator bundle_op_info_begin() { + if (!static_cast(this)->hasDescriptor()) + return nullptr; + + uint8_t *BytesBegin = static_cast(this)->getDescriptor().begin(); + return reinterpret_cast(BytesBegin); + } + + /// \brief Return the start of the list of BundleOpInfo instances associated + /// with this OperandBundleUser. + const_bundle_op_iterator bundle_op_info_begin() const { + auto *NonConstThis = + const_cast *>(this); + return NonConstThis->bundle_op_info_begin(); + } + + /// \brief Return the end of the list of BundleOpInfo instances associated + /// with this OperandBundleUser. + bundle_op_iterator bundle_op_info_end() { + if (!static_cast(this)->hasDescriptor()) + return nullptr; + + uint8_t *BytesEnd = static_cast(this)->getDescriptor().end(); + return reinterpret_cast(BytesEnd); + } + + /// \brief Return the end of the list of BundleOpInfo instances associated + /// with this OperandBundleUser. + const_bundle_op_iterator bundle_op_info_end() const { + auto *NonConstThis = + const_cast *>(this); + return NonConstThis->bundle_op_info_end(); + } + + /// \brief Return the range [\p bundle_op_info_begin, \p bundle_op_info_end). + iterator_range bundle_op_infos() { + return make_range(bundle_op_info_begin(), bundle_op_info_end()); + } + + /// \brief Return the range [\p bundle_op_info_begin, \p bundle_op_info_end). + iterator_range bundle_op_infos() const { + return make_range(bundle_op_info_begin(), bundle_op_info_end()); + } + + /// \brief Populate the BundleOpInfo instances and the Use& vector from \p + /// Bundles. Return the op_iterator pointing to the Use& one past the last + /// last bundle operand use. + /// + /// Each \p OperandBundleDef instance is tracked by a OperandBundleInfo + /// instance allocated in this User's descriptor. + OpIteratorTy populateBundleOperandInfos(ArrayRef Bundles, + const unsigned BeginIndex) { + auto It = static_cast(this)->op_begin() + BeginIndex; + for (auto &B : Bundles) + It = std::copy(B.input_begin(), B.input_end(), It); + + auto *ContextImpl = static_cast(this)->getContext().pImpl; + auto BI = Bundles.begin(); + unsigned CurrentIndex = BeginIndex; + + for (auto &BOI : bundle_op_infos()) { + assert(BI != Bundles.end() && "Incorrect allocation?"); + + BOI.Tag = ContextImpl->getOrInsertBundleTag(BI->getTag()); + BOI.Begin = CurrentIndex; + BOI.End = CurrentIndex + BI->input_size(); + CurrentIndex = BOI.End; + BI++; + } + + assert(BI == Bundles.end() && "Incorrect allocation?"); + + return It; + } + + /// \brief Return the BundleOpInfo for the operand at index OpIdx. + /// + /// It is an error to call this with an OpIdx that does not correspond to an + /// bundle operand. + const BundleOpInfo &getBundleOpInfoForOperand(unsigned OpIdx) const { + for (auto &BOI : bundle_op_infos()) + if (BOI.Begin <= OpIdx && OpIdx < BOI.End) + return BOI; + + llvm_unreachable("Did not find operand bundle for operand!"); + } + + /// \brief Return the total number of values used in \p Bundles. + static unsigned CountBundleInputs(ArrayRef Bundles) { + unsigned Total = 0; + for (auto &B : Bundles) + Total += B.input_size(); + return Total; + } +}; + +} // end llvm namespace + +#endif // LLVM_IR_INSTRTYPES_H diff --git a/include/llvm/IR/Instruction.def b/include/llvm/IR/Instruction.def index d46314cc761d..18711abb8060 100644 --- a/include/llvm/IR/Instruction.def +++ b/include/llvm/IR/Instruction.def @@ -1,21 +1,21 @@ //===-- llvm/Instruction.def - File that describes Instructions -*- C++ -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file contains descriptions of the various LLVM instructions. This is -// used as a central place for enumerating the different instructions and +// used as a central place for enumerating the different instructions and // should eventually be the place to put comments about the instructions. // //===----------------------------------------------------------------------===// // NOTE: NO INCLUDE GUARD DESIRED! -// Provide definitions of macros so that users of this file do not have to +// Provide definitions of macros so that users of this file do not have to // define everything to use it... // #ifndef FIRST_TERM_INST @@ -74,6 +74,20 @@ #define LAST_CAST_INST(num) #endif +#ifndef FIRST_FUNCLETPAD_INST +#define FIRST_FUNCLETPAD_INST(num) +#endif +#ifndef HANDLE_FUNCLETPAD_INST +#ifndef HANDLE_INST +#define HANDLE_FUNCLETPAD_INST(num, opcode, Class) +#else +#define HANDLE_FUNCLETPAD_INST(num, opcode, Class) HANDLE_INST(num, opcode, Class) +#endif +#endif +#ifndef LAST_FUNCLETPAD_INST +#define LAST_FUNCLETPAD_INST(num) +#endif + #ifndef FIRST_OTHER_INST #define FIRST_OTHER_INST(num) #endif @@ -88,92 +102,99 @@ #define LAST_OTHER_INST(num) #endif - // Terminator Instructions - These instructions are used to terminate a basic // block of the program. Every basic block must end with one of these // instructions for it to be a well formed basic block. // FIRST_TERM_INST ( 1) -HANDLE_TERM_INST ( 1, Ret , ReturnInst) -HANDLE_TERM_INST ( 2, Br , BranchInst) -HANDLE_TERM_INST ( 3, Switch , SwitchInst) -HANDLE_TERM_INST ( 4, IndirectBr , IndirectBrInst) -HANDLE_TERM_INST ( 5, Invoke , InvokeInst) -HANDLE_TERM_INST ( 6, Resume , ResumeInst) -HANDLE_TERM_INST ( 7, Unreachable, UnreachableInst) - LAST_TERM_INST ( 7) +HANDLE_TERM_INST ( 1, Ret , ReturnInst) +HANDLE_TERM_INST ( 2, Br , BranchInst) +HANDLE_TERM_INST ( 3, Switch , SwitchInst) +HANDLE_TERM_INST ( 4, IndirectBr , IndirectBrInst) +HANDLE_TERM_INST ( 5, Invoke , InvokeInst) +HANDLE_TERM_INST ( 6, Resume , ResumeInst) +HANDLE_TERM_INST ( 7, Unreachable , UnreachableInst) +HANDLE_TERM_INST ( 8, CleanupRet , CleanupReturnInst) +HANDLE_TERM_INST ( 9, CatchRet , CatchReturnInst) +HANDLE_TERM_INST (10, CatchSwitch , CatchSwitchInst) + LAST_TERM_INST (10) // Standard binary operators... - FIRST_BINARY_INST( 8) -HANDLE_BINARY_INST( 8, Add , BinaryOperator) -HANDLE_BINARY_INST( 9, FAdd , BinaryOperator) -HANDLE_BINARY_INST(10, Sub , BinaryOperator) -HANDLE_BINARY_INST(11, FSub , BinaryOperator) -HANDLE_BINARY_INST(12, Mul , BinaryOperator) -HANDLE_BINARY_INST(13, FMul , BinaryOperator) -HANDLE_BINARY_INST(14, UDiv , BinaryOperator) -HANDLE_BINARY_INST(15, SDiv , BinaryOperator) -HANDLE_BINARY_INST(16, FDiv , BinaryOperator) -HANDLE_BINARY_INST(17, URem , BinaryOperator) -HANDLE_BINARY_INST(18, SRem , BinaryOperator) -HANDLE_BINARY_INST(19, FRem , BinaryOperator) + FIRST_BINARY_INST(11) +HANDLE_BINARY_INST(11, Add , BinaryOperator) +HANDLE_BINARY_INST(12, FAdd , BinaryOperator) +HANDLE_BINARY_INST(13, Sub , BinaryOperator) +HANDLE_BINARY_INST(14, FSub , BinaryOperator) +HANDLE_BINARY_INST(15, Mul , BinaryOperator) +HANDLE_BINARY_INST(16, FMul , BinaryOperator) +HANDLE_BINARY_INST(17, UDiv , BinaryOperator) +HANDLE_BINARY_INST(18, SDiv , BinaryOperator) +HANDLE_BINARY_INST(19, FDiv , BinaryOperator) +HANDLE_BINARY_INST(20, URem , BinaryOperator) +HANDLE_BINARY_INST(21, SRem , BinaryOperator) +HANDLE_BINARY_INST(22, FRem , BinaryOperator) // Logical operators (integer operands) -HANDLE_BINARY_INST(20, Shl , BinaryOperator) // Shift left (logical) -HANDLE_BINARY_INST(21, LShr , BinaryOperator) // Shift right (logical) -HANDLE_BINARY_INST(22, AShr , BinaryOperator) // Shift right (arithmetic) -HANDLE_BINARY_INST(23, And , BinaryOperator) -HANDLE_BINARY_INST(24, Or , BinaryOperator) -HANDLE_BINARY_INST(25, Xor , BinaryOperator) - LAST_BINARY_INST(25) +HANDLE_BINARY_INST(23, Shl , BinaryOperator) // Shift left (logical) +HANDLE_BINARY_INST(24, LShr , BinaryOperator) // Shift right (logical) +HANDLE_BINARY_INST(25, AShr , BinaryOperator) // Shift right (arithmetic) +HANDLE_BINARY_INST(26, And , BinaryOperator) +HANDLE_BINARY_INST(27, Or , BinaryOperator) +HANDLE_BINARY_INST(28, Xor , BinaryOperator) + LAST_BINARY_INST(28) // Memory operators... - FIRST_MEMORY_INST(26) -HANDLE_MEMORY_INST(26, Alloca, AllocaInst) // Stack management -HANDLE_MEMORY_INST(27, Load , LoadInst ) // Memory manipulation instrs -HANDLE_MEMORY_INST(28, Store , StoreInst ) -HANDLE_MEMORY_INST(29, GetElementPtr, GetElementPtrInst) -HANDLE_MEMORY_INST(30, Fence , FenceInst ) -HANDLE_MEMORY_INST(31, AtomicCmpXchg , AtomicCmpXchgInst ) -HANDLE_MEMORY_INST(32, AtomicRMW , AtomicRMWInst ) - LAST_MEMORY_INST(32) + FIRST_MEMORY_INST(29) +HANDLE_MEMORY_INST(29, Alloca, AllocaInst) // Stack management +HANDLE_MEMORY_INST(30, Load , LoadInst ) // Memory manipulation instrs +HANDLE_MEMORY_INST(31, Store , StoreInst ) +HANDLE_MEMORY_INST(32, GetElementPtr, GetElementPtrInst) +HANDLE_MEMORY_INST(33, Fence , FenceInst ) +HANDLE_MEMORY_INST(34, AtomicCmpXchg , AtomicCmpXchgInst ) +HANDLE_MEMORY_INST(35, AtomicRMW , AtomicRMWInst ) + LAST_MEMORY_INST(35) // Cast operators ... -// NOTE: The order matters here because CastInst::isEliminableCastPair +// NOTE: The order matters here because CastInst::isEliminableCastPair // NOTE: (see Instructions.cpp) encodes a table based on this ordering. - FIRST_CAST_INST(33) -HANDLE_CAST_INST(33, Trunc , TruncInst ) // Truncate integers -HANDLE_CAST_INST(34, ZExt , ZExtInst ) // Zero extend integers -HANDLE_CAST_INST(35, SExt , SExtInst ) // Sign extend integers -HANDLE_CAST_INST(36, FPToUI , FPToUIInst ) // floating point -> UInt -HANDLE_CAST_INST(37, FPToSI , FPToSIInst ) // floating point -> SInt -HANDLE_CAST_INST(38, UIToFP , UIToFPInst ) // UInt -> floating point -HANDLE_CAST_INST(39, SIToFP , SIToFPInst ) // SInt -> floating point -HANDLE_CAST_INST(40, FPTrunc , FPTruncInst ) // Truncate floating point -HANDLE_CAST_INST(41, FPExt , FPExtInst ) // Extend floating point -HANDLE_CAST_INST(42, PtrToInt, PtrToIntInst) // Pointer -> Integer -HANDLE_CAST_INST(43, IntToPtr, IntToPtrInst) // Integer -> Pointer -HANDLE_CAST_INST(44, BitCast , BitCastInst ) // Type cast -HANDLE_CAST_INST(45, AddrSpaceCast, AddrSpaceCastInst) // addrspace cast - LAST_CAST_INST(45) + FIRST_CAST_INST(36) +HANDLE_CAST_INST(36, Trunc , TruncInst ) // Truncate integers +HANDLE_CAST_INST(37, ZExt , ZExtInst ) // Zero extend integers +HANDLE_CAST_INST(38, SExt , SExtInst ) // Sign extend integers +HANDLE_CAST_INST(39, FPToUI , FPToUIInst ) // floating point -> UInt +HANDLE_CAST_INST(40, FPToSI , FPToSIInst ) // floating point -> SInt +HANDLE_CAST_INST(41, UIToFP , UIToFPInst ) // UInt -> floating point +HANDLE_CAST_INST(42, SIToFP , SIToFPInst ) // SInt -> floating point +HANDLE_CAST_INST(43, FPTrunc , FPTruncInst ) // Truncate floating point +HANDLE_CAST_INST(44, FPExt , FPExtInst ) // Extend floating point +HANDLE_CAST_INST(45, PtrToInt, PtrToIntInst) // Pointer -> Integer +HANDLE_CAST_INST(46, IntToPtr, IntToPtrInst) // Integer -> Pointer +HANDLE_CAST_INST(47, BitCast , BitCastInst ) // Type cast +HANDLE_CAST_INST(48, AddrSpaceCast, AddrSpaceCastInst) // addrspace cast + LAST_CAST_INST(48) + + FIRST_FUNCLETPAD_INST(49) +HANDLE_FUNCLETPAD_INST(49, CleanupPad, CleanupPadInst) +HANDLE_FUNCLETPAD_INST(50, CatchPad , CatchPadInst) + LAST_FUNCLETPAD_INST(50) // Other operators... - FIRST_OTHER_INST(46) -HANDLE_OTHER_INST(46, ICmp , ICmpInst ) // Integer comparison instruction -HANDLE_OTHER_INST(47, FCmp , FCmpInst ) // Floating point comparison instr. -HANDLE_OTHER_INST(48, PHI , PHINode ) // PHI node instruction -HANDLE_OTHER_INST(49, Call , CallInst ) // Call a function -HANDLE_OTHER_INST(50, Select , SelectInst ) // select instruction -HANDLE_OTHER_INST(51, UserOp1, Instruction) // May be used internally in a pass -HANDLE_OTHER_INST(52, UserOp2, Instruction) // Internal to passes only -HANDLE_OTHER_INST(53, VAArg , VAArgInst ) // vaarg instruction -HANDLE_OTHER_INST(54, ExtractElement, ExtractElementInst)// extract from vector -HANDLE_OTHER_INST(55, InsertElement, InsertElementInst) // insert into vector -HANDLE_OTHER_INST(56, ShuffleVector, ShuffleVectorInst) // shuffle two vectors. -HANDLE_OTHER_INST(57, ExtractValue, ExtractValueInst)// extract from aggregate -HANDLE_OTHER_INST(58, InsertValue, InsertValueInst) // insert into aggregate -HANDLE_OTHER_INST(59, LandingPad, LandingPadInst) // Landing pad instruction. - LAST_OTHER_INST(59) + FIRST_OTHER_INST(51) +HANDLE_OTHER_INST(51, ICmp , ICmpInst ) // Integer comparison instruction +HANDLE_OTHER_INST(52, FCmp , FCmpInst ) // Floating point comparison instr. +HANDLE_OTHER_INST(53, PHI , PHINode ) // PHI node instruction +HANDLE_OTHER_INST(54, Call , CallInst ) // Call a function +HANDLE_OTHER_INST(55, Select , SelectInst ) // select instruction +HANDLE_OTHER_INST(56, UserOp1, Instruction) // May be used internally in a pass +HANDLE_OTHER_INST(57, UserOp2, Instruction) // Internal to passes only +HANDLE_OTHER_INST(58, VAArg , VAArgInst ) // vaarg instruction +HANDLE_OTHER_INST(59, ExtractElement, ExtractElementInst)// extract from vector +HANDLE_OTHER_INST(60, InsertElement, InsertElementInst) // insert into vector +HANDLE_OTHER_INST(61, ShuffleVector, ShuffleVectorInst) // shuffle two vectors. +HANDLE_OTHER_INST(62, ExtractValue, ExtractValueInst)// extract from aggregate +HANDLE_OTHER_INST(63, InsertValue, InsertValueInst) // insert into aggregate +HANDLE_OTHER_INST(64, LandingPad, LandingPadInst) // Landing pad instruction. + LAST_OTHER_INST(64) #undef FIRST_TERM_INST #undef HANDLE_TERM_INST @@ -191,6 +212,10 @@ HANDLE_OTHER_INST(59, LandingPad, LandingPadInst) // Landing pad instruction. #undef HANDLE_CAST_INST #undef LAST_CAST_INST +#undef FIRST_FUNCLETPAD_INST +#undef HANDLE_FUNCLETPAD_INST +#undef LAST_FUNCLETPAD_INST + #undef FIRST_OTHER_INST #undef HANDLE_OTHER_INST #undef LAST_OTHER_INST diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h index 31f363f70a5b..03c45497fa95 100644 --- a/include/llvm/IR/Instruction.h +++ b/include/llvm/IR/Instruction.h @@ -30,25 +30,11 @@ class BasicBlock; struct AAMDNodes; template <> -struct ilist_traits - : public SymbolTableListTraits { +struct SymbolTableListSentinelTraits + : public ilist_half_embedded_sentinel_traits {}; - /// \brief Return a node that marks the end of a list. - /// - /// The sentinel is relative to this instance, so we use a non-static - /// method. - Instruction *createSentinel() const; - static void destroySentinel(Instruction *) {} - - Instruction *provideInitialHead() const { return createSentinel(); } - Instruction *ensureHead(Instruction *) const { return createSentinel(); } - static void noteHead(Instruction *, Instruction *) {} - -private: - mutable ilist_half_node Sentinel; -}; - -class Instruction : public User, public ilist_node { +class Instruction : public User, + public ilist_node_with_parent { void operator=(const Instruction &) = delete; Instruction(const Instruction &) = delete; @@ -80,6 +66,13 @@ public: const Module *getModule() const; Module *getModule(); + /// \brief Return the function this instruction belongs to. + /// + /// Note: it is undefined behavior to call this on an instruction not + /// currently inserted into a function. + const Function *getFunction() const; + Function *getFunction(); + /// removeFromParent - This method unlinks 'this' from the containing basic /// block, but does not delete it. /// @@ -89,7 +82,7 @@ public: /// block and deletes it. /// /// \returns an iterator pointing to the element after the erased one - iplist::iterator eraseFromParent(); + SymbolTableList::iterator eraseFromParent(); /// Insert an unlinked instruction into a basic block immediately before /// the specified instruction. @@ -116,6 +109,7 @@ public: bool isBinaryOp() const { return isBinaryOp(getOpcode()); } bool isShift() { return isShift(getOpcode()); } bool isCast() const { return isCast(getOpcode()); } + bool isFuncletPad() const { return isFuncletPad(getOpcode()); } static const char* getOpcodeName(unsigned OpCode); @@ -148,6 +142,11 @@ public: return OpCode >= CastOpsBegin && OpCode < CastOpsEnd; } + /// @brief Determine if the OpCode is one of the FuncletPadInst instructions. + static inline bool isFuncletPad(unsigned OpCode) { + return OpCode >= FuncletPadOpsBegin && OpCode < FuncletPadOpsEnd; + } + //===--------------------------------------------------------------------===// // Metadata manipulation. //===--------------------------------------------------------------------===// @@ -204,20 +203,22 @@ public: void setMetadata(unsigned KindID, MDNode *Node); void setMetadata(StringRef Kind, MDNode *Node); - /// \brief Drop unknown metadata. + /// Drop all unknown metadata except for debug locations. + /// @{ /// Passes are required to drop metadata they don't understand. This is a /// convenience method for passes to do so. - void dropUnknownMetadata(ArrayRef KnownIDs); - void dropUnknownMetadata() { - return dropUnknownMetadata(None); + void dropUnknownNonDebugMetadata(ArrayRef KnownIDs); + void dropUnknownNonDebugMetadata() { + return dropUnknownNonDebugMetadata(None); } - void dropUnknownMetadata(unsigned ID1) { - return dropUnknownMetadata(makeArrayRef(ID1)); + void dropUnknownNonDebugMetadata(unsigned ID1) { + return dropUnknownNonDebugMetadata(makeArrayRef(ID1)); } - void dropUnknownMetadata(unsigned ID1, unsigned ID2) { + void dropUnknownNonDebugMetadata(unsigned ID1, unsigned ID2) { unsigned IDs[] = {ID1, ID2}; - return dropUnknownMetadata(IDs); + return dropUnknownNonDebugMetadata(IDs); } + /// @} /// setAAMetadata - Sets the metadata on this instruction from the /// AAMDNodes structure. @@ -388,6 +389,19 @@ public: return mayWriteToMemory() || mayThrow() || !mayReturn(); } + /// \brief Return true if the instruction is a variety of EH-block. + bool isEHPad() const { + switch (getOpcode()) { + case Instruction::CatchSwitch: + case Instruction::CatchPad: + case Instruction::CleanupPad: + case Instruction::LandingPad: + return true; + default: + return false; + } + } + /// clone() - Create a copy of 'this' instruction that is identical in all /// ways except the following: /// * The instruction has no parent @@ -468,6 +482,13 @@ public: #include "llvm/IR/Instruction.def" }; + enum FuncletPadOps { +#define FIRST_FUNCLETPAD_INST(N) FuncletPadOpsBegin = N, +#define HANDLE_FUNCLETPAD_INST(N, OPC, CLASS) OPC = N, +#define LAST_FUNCLETPAD_INST(N) FuncletPadOpsEnd = N+1 +#include "llvm/IR/Instruction.def" + }; + enum OtherOps { #define FIRST_OTHER_INST(N) OtherOpsBegin = N, #define HANDLE_OTHER_INST(N, OPC, CLASS) OPC = N, @@ -489,7 +510,7 @@ private: (V ? HasMetadataBit : 0)); } - friend class SymbolTableListTraits; + friend class SymbolTableListTraits; void setParent(BasicBlock *P); protected: // Instruction subclasses can stick up to 15 bits of stuff into the @@ -515,17 +536,6 @@ private: Instruction *cloneImpl() const; }; -inline Instruction *ilist_traits::createSentinel() const { - // Since i(p)lists always publicly derive from their corresponding traits, - // placing a data member in this class will augment the i(p)list. But since - // the NodeTy is expected to be publicly derive from ilist_node, - // there is a legal viable downcast from it to NodeTy. We use this trick to - // superimpose an i(p)list with a "ghostly" NodeTy, which becomes the - // sentinel. Dereferencing the sentinel is forbidden (save the - // ilist_node), so no one will ever notice the superposition. - return static_cast(&Sentinel); -} - // Instruction* is only 4-byte aligned. template<> class PointerLikeTypeTraits { diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index 07d5f111b9e1..d781c7af36d7 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -18,6 +18,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" @@ -158,6 +159,7 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } + private: // Shadow Instruction::setInstructionSubclassData with a private forwarding // method so that subclasses cannot accidentally use it. @@ -166,7 +168,6 @@ private: } }; - //===----------------------------------------------------------------------===// // LoadInst Class //===----------------------------------------------------------------------===// @@ -176,6 +177,7 @@ private: /// class LoadInst : public UnaryInstruction { void AssertOK(); + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -290,7 +292,6 @@ public: return getPointerOperand()->getType()->getPointerAddressSpace(); } - // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Load; @@ -298,6 +299,7 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } + private: // Shadow Instruction::setInstructionSubclassData with a private forwarding // method so that subclasses cannot accidentally use it. @@ -306,7 +308,6 @@ private: } }; - //===----------------------------------------------------------------------===// // StoreInst Class //===----------------------------------------------------------------------===// @@ -316,6 +317,7 @@ private: class StoreInst : public Instruction { void *operator new(size_t, unsigned) = delete; void AssertOK(); + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -344,7 +346,6 @@ public: SynchronizationScope SynchScope, BasicBlock *InsertAtEnd); - /// isVolatile - Return true if this is a store to a volatile memory /// location. /// @@ -422,6 +423,7 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } + private: // Shadow Instruction::setInstructionSubclassData with a private forwarding // method so that subclasses cannot accidentally use it. @@ -445,6 +447,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value) class FenceInst : public Instruction { void *operator new(size_t, unsigned) = delete; void Init(AtomicOrdering Ordering, SynchronizationScope SynchScope); + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -496,6 +499,7 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } + private: // Shadow Instruction::setInstructionSubclassData with a private forwarding // method so that subclasses cannot accidentally use it. @@ -517,6 +521,7 @@ class AtomicCmpXchgInst : public Instruction { void Init(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SynchronizationScope SynchScope); + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -648,6 +653,7 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } + private: // Shadow Instruction::setInstructionSubclassData with a private forwarding // method so that subclasses cannot accidentally use it. @@ -673,6 +679,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(AtomicCmpXchgInst, Value) /// class AtomicRMWInst : public Instruction { void *operator new(size_t, unsigned) = delete; + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -795,6 +802,7 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } + private: void Init(BinOp Operation, Value *Ptr, Value *Val, AtomicOrdering Ordering, SynchronizationScope SynchScope); @@ -831,6 +839,8 @@ class GetElementPtrInst : public Instruction { Type *SourceElementType; Type *ResultElementType; + void anchor() override; + GetElementPtrInst(const GetElementPtrInst &GEPI); void init(Value *Ptr, ArrayRef IdxList, const Twine &NameStr); @@ -1078,10 +1088,8 @@ GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr, init(Ptr, IdxList, NameStr); } - DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value) - //===----------------------------------------------------------------------===// // ICmpInst Class //===----------------------------------------------------------------------===// @@ -1091,6 +1099,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value) /// must be identical types. /// \brief Represent an integer comparison operator. class ICmpInst: public CmpInst { + void anchor() override; + void AssertOK() { assert(getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE && getPredicate() <= CmpInst::LAST_ICMP_PREDICATE && @@ -1226,7 +1236,6 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - }; //===----------------------------------------------------------------------===// @@ -1350,62 +1359,102 @@ public: /// field to indicate whether or not this is a tail call. The rest of the bits /// hold the calling convention of the call. /// -class CallInst : public Instruction { +class CallInst : public Instruction, + public OperandBundleUser { AttributeSet AttributeList; ///< parameter attributes for call FunctionType *FTy; CallInst(const CallInst &CI); - void init(Value *Func, ArrayRef Args, const Twine &NameStr) { + void init(Value *Func, ArrayRef Args, + ArrayRef Bundles, const Twine &NameStr) { init(cast( cast(Func->getType())->getElementType()), - Func, Args, NameStr); + Func, Args, Bundles, NameStr); } void init(FunctionType *FTy, Value *Func, ArrayRef Args, - const Twine &NameStr); + ArrayRef Bundles, const Twine &NameStr); void init(Value *Func, const Twine &NameStr); /// Construct a CallInst given a range of arguments. /// \brief Construct a CallInst from a range of arguments inline CallInst(FunctionType *Ty, Value *Func, ArrayRef Args, - const Twine &NameStr, Instruction *InsertBefore); - inline CallInst(Value *Func, ArrayRef Args, const Twine &NameStr, + ArrayRef Bundles, const Twine &NameStr, + Instruction *InsertBefore); + inline CallInst(Value *Func, ArrayRef Args, + ArrayRef Bundles, const Twine &NameStr, Instruction *InsertBefore) : CallInst(cast( cast(Func->getType())->getElementType()), - Func, Args, NameStr, InsertBefore) {} + Func, Args, Bundles, NameStr, InsertBefore) {} + + inline CallInst(Value *Func, ArrayRef Args, const Twine &NameStr, + Instruction *InsertBefore) + : CallInst(Func, Args, None, NameStr, InsertBefore) {} /// Construct a CallInst given a range of arguments. /// \brief Construct a CallInst from a range of arguments inline CallInst(Value *Func, ArrayRef Args, - const Twine &NameStr, BasicBlock *InsertAtEnd); + ArrayRef Bundles, const Twine &NameStr, + BasicBlock *InsertAtEnd); explicit CallInst(Value *F, const Twine &NameStr, Instruction *InsertBefore); CallInst(Value *F, const Twine &NameStr, BasicBlock *InsertAtEnd); + + friend class OperandBundleUser; + bool hasDescriptor() const { return HasDescriptor; } + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; CallInst *cloneImpl() const; public: - static CallInst *Create(Value *Func, - ArrayRef Args, + static CallInst *Create(Value *Func, ArrayRef Args, + ArrayRef Bundles = None, const Twine &NameStr = "", Instruction *InsertBefore = nullptr) { return Create(cast( cast(Func->getType())->getElementType()), - Func, Args, NameStr, InsertBefore); + Func, Args, Bundles, NameStr, InsertBefore); + } + static CallInst *Create(Value *Func, ArrayRef Args, + const Twine &NameStr, + Instruction *InsertBefore = nullptr) { + return Create(cast( + cast(Func->getType())->getElementType()), + Func, Args, None, NameStr, InsertBefore); } static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef Args, - const Twine &NameStr = "", + const Twine &NameStr, Instruction *InsertBefore = nullptr) { return new (unsigned(Args.size() + 1)) - CallInst(Ty, Func, Args, NameStr, InsertBefore); + CallInst(Ty, Func, Args, None, NameStr, InsertBefore); } - static CallInst *Create(Value *Func, - ArrayRef Args, + static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef Args, + ArrayRef Bundles = None, + const Twine &NameStr = "", + Instruction *InsertBefore = nullptr) { + const unsigned TotalOps = + unsigned(Args.size()) + CountBundleInputs(Bundles) + 1; + const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo); + + return new (TotalOps, DescriptorBytes) + CallInst(Ty, Func, Args, Bundles, NameStr, InsertBefore); + } + static CallInst *Create(Value *Func, ArrayRef Args, + ArrayRef Bundles, const Twine &NameStr, BasicBlock *InsertAtEnd) { - return new(unsigned(Args.size() + 1)) - CallInst(Func, Args, NameStr, InsertAtEnd); + const unsigned TotalOps = + unsigned(Args.size()) + CountBundleInputs(Bundles) + 1; + const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo); + + return new (TotalOps, DescriptorBytes) + CallInst(Func, Args, Bundles, NameStr, InsertAtEnd); + } + static CallInst *Create(Value *Func, ArrayRef Args, + const Twine &NameStr, BasicBlock *InsertAtEnd) { + return new (unsigned(Args.size() + 1)) + CallInst(Func, Args, None, NameStr, InsertAtEnd); } static CallInst *Create(Value *F, const Twine &NameStr = "", Instruction *InsertBefore = nullptr) { @@ -1415,6 +1464,16 @@ public: BasicBlock *InsertAtEnd) { return new(1) CallInst(F, NameStr, InsertAtEnd); } + + /// \brief Create a clone of \p CI with a different set of operand bundles and + /// insert it before \p InsertPt. + /// + /// The returned call instruction is identical \p CI in every way except that + /// the operand bundles for the new instruction are set to the operand bundles + /// in \p Bundles. + static CallInst *Create(CallInst *CI, ArrayRef Bundles, + Instruction *InsertPt = nullptr); + /// CreateMalloc - Generate the IR for a call to malloc: /// 1. Compute the malloc call's argument as the specified type's size, /// possibly multiplied by the array size if the array size is not @@ -1445,16 +1504,21 @@ public: } // Note that 'musttail' implies 'tail'. - enum TailCallKind { TCK_None = 0, TCK_Tail = 1, TCK_MustTail = 2 }; + enum TailCallKind { TCK_None = 0, TCK_Tail = 1, TCK_MustTail = 2, + TCK_NoTail = 3 }; TailCallKind getTailCallKind() const { return TailCallKind(getSubclassDataFromInstruction() & 3); } bool isTailCall() const { - return (getSubclassDataFromInstruction() & 3) != TCK_None; + unsigned Kind = getSubclassDataFromInstruction() & 3; + return Kind == TCK_Tail || Kind == TCK_MustTail; } bool isMustTailCall() const { return (getSubclassDataFromInstruction() & 3) == TCK_MustTail; } + bool isNoTailCall() const { + return (getSubclassDataFromInstruction() & 3) == TCK_NoTail; + } void setTailCall(bool isTC = true) { setInstructionSubclassData((getSubclassDataFromInstruction() & ~3) | unsigned(isTC ? TCK_Tail : TCK_None)); @@ -1469,28 +1533,58 @@ public: /// getNumArgOperands - Return the number of call arguments. /// - unsigned getNumArgOperands() const { return getNumOperands() - 1; } + unsigned getNumArgOperands() const { + return getNumOperands() - getNumTotalBundleOperands() - 1; + } /// getArgOperand/setArgOperand - Return/set the i-th call argument. /// - Value *getArgOperand(unsigned i) const { return getOperand(i); } - void setArgOperand(unsigned i, Value *v) { setOperand(i, v); } - - /// arg_operands - iteration adapter for range-for loops. - iterator_range arg_operands() { - // The last operand in the op list is the callee - it's not one of the args - // so we don't want to iterate over it. - return iterator_range(op_begin(), op_end() - 1); + Value *getArgOperand(unsigned i) const { + assert(i < getNumArgOperands() && "Out of bounds!"); + return getOperand(i); + } + void setArgOperand(unsigned i, Value *v) { + assert(i < getNumArgOperands() && "Out of bounds!"); + setOperand(i, v); } - /// arg_operands - iteration adapter for range-for loops. + /// \brief Return the iterator pointing to the beginning of the argument list. + op_iterator arg_begin() { return op_begin(); } + + /// \brief Return the iterator pointing to the end of the argument list. + op_iterator arg_end() { + // [ call args ], [ operand bundles ], callee + return op_end() - getNumTotalBundleOperands() - 1; + }; + + /// \brief Iteration adapter for range-for loops. + iterator_range arg_operands() { + return make_range(arg_begin(), arg_end()); + } + + /// \brief Return the iterator pointing to the beginning of the argument list. + const_op_iterator arg_begin() const { return op_begin(); } + + /// \brief Return the iterator pointing to the end of the argument list. + const_op_iterator arg_end() const { + // [ call args ], [ operand bundles ], callee + return op_end() - getNumTotalBundleOperands() - 1; + }; + + /// \brief Iteration adapter for range-for loops. iterator_range arg_operands() const { - return iterator_range(op_begin(), op_end() - 1); + return make_range(arg_begin(), arg_end()); } /// \brief Wrappers for getting the \c Use of a call argument. - const Use &getArgOperandUse(unsigned i) const { return getOperandUse(i); } - Use &getArgOperandUse(unsigned i) { return getOperandUse(i); } + const Use &getArgOperandUse(unsigned i) const { + assert(i < getNumArgOperands() && "Out of bounds!"); + return getOperandUse(i); + } + Use &getArgOperandUse(unsigned i) { + assert(i < getNumArgOperands() && "Out of bounds!"); + return getOperandUse(i); + } /// getCallingConv/setCallingConv - Get or set the calling convention of this /// function call. @@ -1498,8 +1592,10 @@ public: return static_cast(getSubclassDataFromInstruction() >> 2); } void setCallingConv(CallingConv::ID CC) { + auto ID = static_cast(CC); + assert(!(ID & ~CallingConv::MaxID) && "Unsupported calling convention"); setInstructionSubclassData((getSubclassDataFromInstruction() & 3) | - (static_cast(CC) << 2)); + (ID << 2)); } /// getAttributes - Return the parameter attributes for this call. @@ -1541,6 +1637,21 @@ public: /// \brief Determine whether the call or the callee has the given attributes. bool paramHasAttr(unsigned i, Attribute::AttrKind A) const; + /// \brief Return true if the data operand at index \p i has the attribute \p + /// A. + /// + /// Data operands include call arguments and values used in operand bundles, + /// but does not include the callee operand. This routine dispatches to the + /// underlying AttributeList or the OperandBundleUser as appropriate. + /// + /// The index \p i is interpreted as + /// + /// \p i == Attribute::ReturnIndex -> the return value + /// \p i in [1, arg_size + 1) -> argument number (\p i - 1) + /// \p i in [arg_size + 1, data_operand_size + 1) -> bundle operand at index + /// (\p i - 1) in the operand list. + bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind A) const; + /// \brief Extract the alignment for a call or parameter (0=unknown). unsigned getParamAlignment(unsigned i) const { return AttributeList.getParamAlignment(i); @@ -1557,7 +1668,14 @@ public: uint64_t getDereferenceableOrNullBytes(unsigned i) const { return AttributeList.getDereferenceableOrNullBytes(i); } - + + /// @brief Determine if the parameter or return value is marked with NoAlias + /// attribute. + /// @param n The parameter to check. 1 is the first parameter, 0 is the return + bool doesNotAlias(unsigned n) const { + return AttributeList.hasAttribute(n, Attribute::NoAlias); + } + /// \brief Return true if the call should not be treated as a call to a /// builtin. bool isNoBuiltin() const { @@ -1622,9 +1740,18 @@ public: addAttribute(AttributeSet::FunctionIndex, Attribute::NoDuplicate); } + /// \brief Determine if the call is convergent + bool isConvergent() const { return hasFnAttr(Attribute::Convergent); } + void setConvergent() { + addAttribute(AttributeSet::FunctionIndex, Attribute::Convergent); + } + /// \brief Determine if the call returns a structure through first /// pointer argument. bool hasStructRetAttr() const { + if (getNumArgOperands() == 0) + return false; + // Be friendly and also check the callee. return paramHasAttr(1, Attribute::StructRet); } @@ -1671,12 +1798,17 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } -private: - template - bool hasFnAttrImpl(AttrKind A) const { +private: + template bool hasFnAttrImpl(AttrKind A) const { if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A)) return true; + + // Operand bundles override attributes on the called function, but don't + // override attributes directly present on the call instruction. + if (isFnAttrDisallowedByOpBundle(A)) + return false; + if (const Function *F = getCalledFunction()) return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A); return false; @@ -1694,24 +1826,28 @@ struct OperandTraits : public VariadicOperandTraits { }; CallInst::CallInst(Value *Func, ArrayRef Args, - const Twine &NameStr, BasicBlock *InsertAtEnd) - : Instruction(cast(cast(Func->getType()) - ->getElementType())->getReturnType(), - Instruction::Call, - OperandTraits::op_end(this) - (Args.size() + 1), - unsigned(Args.size() + 1), InsertAtEnd) { - init(Func, Args, NameStr); + ArrayRef Bundles, const Twine &NameStr, + BasicBlock *InsertAtEnd) + : Instruction( + cast(cast(Func->getType()) + ->getElementType())->getReturnType(), + Instruction::Call, OperandTraits::op_end(this) - + (Args.size() + CountBundleInputs(Bundles) + 1), + unsigned(Args.size() + CountBundleInputs(Bundles) + 1), InsertAtEnd) { + init(Func, Args, Bundles, NameStr); } CallInst::CallInst(FunctionType *Ty, Value *Func, ArrayRef Args, - const Twine &NameStr, Instruction *InsertBefore) + ArrayRef Bundles, const Twine &NameStr, + Instruction *InsertBefore) : Instruction(Ty->getReturnType(), Instruction::Call, - OperandTraits::op_end(this) - (Args.size() + 1), - unsigned(Args.size() + 1), InsertBefore) { - init(Ty, Func, Args, NameStr); + OperandTraits::op_end(this) - + (Args.size() + CountBundleInputs(Bundles) + 1), + unsigned(Args.size() + CountBundleInputs(Bundles) + 1), + InsertBefore) { + init(Ty, Func, Args, Bundles, NameStr); } - // Note: if you get compile errors about private methods then // please update your code to use the high-level operand // interfaces. See line 943 above. @@ -1745,6 +1881,7 @@ class SelectInst : public Instruction { init(C, S1, S2); setName(NameStr); } + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -1845,6 +1982,7 @@ class ExtractElementInst : public Instruction { Instruction *InsertBefore = nullptr); ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr, BasicBlock *InsertAtEnd); + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -1875,7 +2013,6 @@ public: return cast(getVectorOperand()->getType()); } - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); @@ -1906,8 +2043,9 @@ class InsertElementInst : public Instruction { InsertElementInst(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr = "", Instruction *InsertBefore = nullptr); - InsertElementInst(Value *Vec, Value *NewElt, Value *Idx, - const Twine &NameStr, BasicBlock *InsertAtEnd); + InsertElementInst(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr, + BasicBlock *InsertAtEnd); + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -2020,7 +2158,6 @@ public: return Mask; } - // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->getOpcode() == Instruction::ShuffleVector; @@ -2063,9 +2200,8 @@ class ExtractValueInst : public UnaryInstruction { const Twine &NameStr, BasicBlock *InsertAtEnd); // allocate space for exactly one operand - void *operator new(size_t s) { - return User::operator new(s, 1); - } + void *operator new(size_t s) { return User::operator new(s, 1); } + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -2096,7 +2232,7 @@ public: inline idx_iterator idx_begin() const { return Indices.begin(); } inline idx_iterator idx_end() const { return Indices.end(); } inline iterator_range indices() const { - return iterator_range(idx_begin(), idx_end()); + return make_range(idx_begin(), idx_end()); } Value *getAggregateOperand() { @@ -2147,7 +2283,6 @@ ExtractValueInst::ExtractValueInst(Value *Agg, init(Idxs, NameStr); } - //===----------------------------------------------------------------------===// // InsertValueInst Class //===----------------------------------------------------------------------===// @@ -2177,11 +2312,12 @@ class InsertValueInst : public Instruction { /// Constructors - These two constructors are convenience methods because one /// and two index insertvalue instructions are so common. - InsertValueInst(Value *Agg, Value *Val, - unsigned Idx, const Twine &NameStr = "", - Instruction *InsertBefore = nullptr); InsertValueInst(Value *Agg, Value *Val, unsigned Idx, - const Twine &NameStr, BasicBlock *InsertAtEnd); + const Twine &NameStr = "", + Instruction *InsertBefore = nullptr); + InsertValueInst(Value *Agg, Value *Val, unsigned Idx, const Twine &NameStr, + BasicBlock *InsertAtEnd); + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -2213,7 +2349,7 @@ public: inline idx_iterator idx_begin() const { return Indices.begin(); } inline idx_iterator idx_end() const { return Indices.end(); } inline iterator_range indices() const { - return iterator_range(idx_begin(), idx_end()); + return make_range(idx_begin(), idx_end()); } Value *getAggregateOperand() { @@ -2294,6 +2430,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueInst, Value) // scientist's overactive imagination. // class PHINode : public Instruction { + void anchor() override; + void *operator new(size_t, unsigned) = delete; /// ReservedSpace - The number of operands actually allocated. NumOperands is /// the number actually in use. @@ -2319,6 +2457,7 @@ class PHINode : public Instruction { setName(NameStr); allocHungoffUses(ReservedSpace); } + protected: // allocHungoffUses - this is more complicated than the generic // User::allocHungoffUses, because we have to allocate Uses for the incoming @@ -2387,6 +2526,9 @@ public: return getOperand(i); } void setIncomingValue(unsigned i, Value *V) { + assert(V && "PHI node got a null value!"); + assert(getType() == V->getType() && + "All operands to PHI node must be the same type as the PHI node!"); setOperand(i, V); } static unsigned getOperandNumForIncomingValue(unsigned i) { @@ -2418,16 +2560,13 @@ public: } void setIncomingBlock(unsigned i, BasicBlock *BB) { + assert(BB && "PHI node got a null basic block!"); block_begin()[i] = BB; } /// addIncoming - Add an incoming value to the end of the PHI list /// void addIncoming(Value *V, BasicBlock *BB) { - assert(V && "PHI node got a null value!"); - assert(BB && "PHI node got a null basic block!"); - assert(getType() == V->getType() && - "All operands to PHI node must be the same type as the PHI node!"); if (getNumOperands() == ReservedSpace) growOperands(); // Get more space! // Initialize some new operands. @@ -2479,7 +2618,8 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - private: + +private: void growOperands(); }; @@ -2506,8 +2646,10 @@ class LandingPadInst : public Instruction { /// the number actually in use. unsigned ReservedSpace; LandingPadInst(const LandingPadInst &LP); + public: enum ClauseType { Catch, Filter }; + private: void *operator new(size_t, unsigned) = delete; // Allocate space for exactly zero operands. @@ -2618,6 +2760,7 @@ private: Instruction *InsertBefore = nullptr); ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd); explicit ReturnInst(LLVMContext &C, BasicBlock *InsertAtEnd); + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -2654,7 +2797,8 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - private: + +private: BasicBlock *getSuccessorV(unsigned idx) const override; unsigned getNumSuccessorsV() const override; void setSuccessorV(unsigned idx, BasicBlock *B) override; @@ -2693,6 +2837,7 @@ class BranchInst : public TerminatorInst { BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd); BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond, BasicBlock *InsertAtEnd); + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -2740,7 +2885,7 @@ public: void setSuccessor(unsigned idx, BasicBlock *NewSucc) { assert(idx < getNumSuccessors() && "Successor # out of range for Branch!"); - *(&Op<-1>() - idx) = (Value*)NewSucc; + *(&Op<-1>() - idx) = NewSucc; } /// \brief Swap the successors of this branch instruction. @@ -2757,6 +2902,7 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } + private: BasicBlock *getSuccessorV(unsigned idx) const override; unsigned getNumSuccessorsV() const override; @@ -2803,25 +2949,23 @@ class SwitchInst : public TerminatorInst { /// constructor also autoinserts at the end of the specified BasicBlock. SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases, BasicBlock *InsertAtEnd); + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; SwitchInst *cloneImpl() const; public: - // -2 static const unsigned DefaultPseudoIndex = static_cast(~0L-1); template class CaseIteratorT { protected: - SwitchInstTy *SI; unsigned Index; public: - typedef CaseIteratorT Self; /// Initializes case iterator for given SwitchInst and for given @@ -2912,8 +3056,7 @@ public: typedef CaseIteratorT ParentTy; public: - - CaseIt(const ParentTy& Src) : ParentTy(Src) {} + CaseIt(const ParentTy &Src) : ParentTy(Src) {} CaseIt(SwitchInst *SI, unsigned CaseNum) : ParentTy(SI, CaseNum) {} /// Sets the new value for current case. @@ -2983,12 +3126,12 @@ public: /// cases - iteration adapter for range-for loops. iterator_range cases() { - return iterator_range(case_begin(), case_end()); + return make_range(case_begin(), case_end()); } /// cases - iteration adapter for range-for loops. iterator_range cases() const { - return iterator_range(case_begin(), case_end()); + return make_range(case_begin(), case_end()); } /// Returns an iterator that points to the default case. @@ -3056,7 +3199,7 @@ public: } void setSuccessor(unsigned idx, BasicBlock *NewSucc) { assert(idx < getNumSuccessors() && "Successor # out of range for switch!"); - setOperand(idx*2+1, (Value*)NewSucc); + setOperand(idx * 2 + 1, NewSucc); } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -3066,6 +3209,7 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } + private: BasicBlock *getSuccessorV(unsigned idx) const override; unsigned getNumSuccessorsV() const override; @@ -3078,7 +3222,6 @@ struct OperandTraits : public HungoffOperandTraits<2> { DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SwitchInst, Value) - //===----------------------------------------------------------------------===// // IndirectBrInst Class //===----------------------------------------------------------------------===// @@ -3111,6 +3254,7 @@ class IndirectBrInst : public TerminatorInst { /// here to make memory allocation more efficient. This constructor also /// autoinserts at the end of the specified BasicBlock. IndirectBrInst(Value *Address, unsigned NumDests, BasicBlock *InsertAtEnd); + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -3134,7 +3278,6 @@ public: const Value *getAddress() const { return getOperand(0); } void setAddress(Value *V) { setOperand(0, V); } - /// getNumDestinations - return the number of possible destinations in this /// indirectbr instruction. unsigned getNumDestinations() const { return getNumOperands()-1; } @@ -3156,7 +3299,7 @@ public: return cast(getOperand(i+1)); } void setSuccessor(unsigned i, BasicBlock *NewSucc) { - setOperand(i+1, (Value*)NewSucc); + setOperand(i + 1, NewSucc); } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -3166,6 +3309,7 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } + private: BasicBlock *getSuccessorV(unsigned idx) const override; unsigned getNumSuccessorsV() const override; @@ -3178,7 +3322,6 @@ struct OperandTraits : public HungoffOperandTraits<1> { DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value) - //===----------------------------------------------------------------------===// // InvokeInst Class //===----------------------------------------------------------------------===// @@ -3186,71 +3329,122 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value) /// InvokeInst - Invoke instruction. The SubclassData field is used to hold the /// calling convention of the call. /// -class InvokeInst : public TerminatorInst { +class InvokeInst : public TerminatorInst, + public OperandBundleUser { AttributeSet AttributeList; FunctionType *FTy; InvokeInst(const InvokeInst &BI); void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, - ArrayRef Args, const Twine &NameStr) { + ArrayRef Args, ArrayRef Bundles, + const Twine &NameStr) { init(cast( cast(Func->getType())->getElementType()), - Func, IfNormal, IfException, Args, NameStr); + Func, IfNormal, IfException, Args, Bundles, NameStr); } void init(FunctionType *FTy, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef Args, - const Twine &NameStr); + ArrayRef Bundles, const Twine &NameStr); /// Construct an InvokeInst given a range of arguments. /// /// \brief Construct an InvokeInst from a range of arguments inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, - ArrayRef Args, unsigned Values, - const Twine &NameStr, Instruction *InsertBefore) + ArrayRef Args, ArrayRef Bundles, + unsigned Values, const Twine &NameStr, + Instruction *InsertBefore) : InvokeInst(cast( cast(Func->getType())->getElementType()), - Func, IfNormal, IfException, Args, Values, NameStr, + Func, IfNormal, IfException, Args, Bundles, Values, NameStr, InsertBefore) {} inline InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef Args, - unsigned Values, const Twine &NameStr, - Instruction *InsertBefore); + ArrayRef Bundles, unsigned Values, + const Twine &NameStr, Instruction *InsertBefore); /// Construct an InvokeInst given a range of arguments. /// /// \brief Construct an InvokeInst from a range of arguments inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, - ArrayRef Args, unsigned Values, - const Twine &NameStr, BasicBlock *InsertAtEnd); + ArrayRef Args, ArrayRef Bundles, + unsigned Values, const Twine &NameStr, + BasicBlock *InsertAtEnd); + + friend class OperandBundleUser; + bool hasDescriptor() const { return HasDescriptor; } + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; InvokeInst *cloneImpl() const; public: - static InvokeInst *Create(Value *Func, - BasicBlock *IfNormal, BasicBlock *IfException, - ArrayRef Args, const Twine &NameStr = "", + static InvokeInst *Create(Value *Func, BasicBlock *IfNormal, + BasicBlock *IfException, ArrayRef Args, + const Twine &NameStr, Instruction *InsertBefore = nullptr) { return Create(cast( cast(Func->getType())->getElementType()), - Func, IfNormal, IfException, Args, NameStr, InsertBefore); + Func, IfNormal, IfException, Args, None, NameStr, + InsertBefore); + } + static InvokeInst *Create(Value *Func, BasicBlock *IfNormal, + BasicBlock *IfException, ArrayRef Args, + ArrayRef Bundles = None, + const Twine &NameStr = "", + Instruction *InsertBefore = nullptr) { + return Create(cast( + cast(Func->getType())->getElementType()), + Func, IfNormal, IfException, Args, Bundles, NameStr, + InsertBefore); } static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef Args, - const Twine &NameStr = "", + const Twine &NameStr, Instruction *InsertBefore = nullptr) { unsigned Values = unsigned(Args.size()) + 3; - return new (Values) InvokeInst(Ty, Func, IfNormal, IfException, Args, + return new (Values) InvokeInst(Ty, Func, IfNormal, IfException, Args, None, Values, NameStr, InsertBefore); } + static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, + BasicBlock *IfException, ArrayRef Args, + ArrayRef Bundles = None, + const Twine &NameStr = "", + Instruction *InsertBefore = nullptr) { + unsigned Values = unsigned(Args.size()) + CountBundleInputs(Bundles) + 3; + unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo); + + return new (Values, DescriptorBytes) + InvokeInst(Ty, Func, IfNormal, IfException, Args, Bundles, Values, + NameStr, InsertBefore); + } static InvokeInst *Create(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef Args, const Twine &NameStr, BasicBlock *InsertAtEnd) { unsigned Values = unsigned(Args.size()) + 3; - return new(Values) InvokeInst(Func, IfNormal, IfException, Args, - Values, NameStr, InsertAtEnd); + return new (Values) InvokeInst(Func, IfNormal, IfException, Args, None, + Values, NameStr, InsertAtEnd); } + static InvokeInst *Create(Value *Func, BasicBlock *IfNormal, + BasicBlock *IfException, ArrayRef Args, + ArrayRef Bundles, + const Twine &NameStr, BasicBlock *InsertAtEnd) { + unsigned Values = unsigned(Args.size()) + CountBundleInputs(Bundles) + 3; + unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo); + + return new (Values, DescriptorBytes) + InvokeInst(Func, IfNormal, IfException, Args, Bundles, Values, NameStr, + InsertAtEnd); + } + + /// \brief Create a clone of \p II with a different set of operand bundles and + /// insert it before \p InsertPt. + /// + /// The returned invoke instruction is identical to \p II in every way except + /// that the operand bundles for the new instruction are set to the operand + /// bundles in \p Bundles. + static InvokeInst *Create(InvokeInst *II, ArrayRef Bundles, + Instruction *InsertPt = nullptr); /// Provide fast operand accessors DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); @@ -3264,26 +3458,58 @@ public: /// getNumArgOperands - Return the number of invoke arguments. /// - unsigned getNumArgOperands() const { return getNumOperands() - 3; } + unsigned getNumArgOperands() const { + return getNumOperands() - getNumTotalBundleOperands() - 3; + } /// getArgOperand/setArgOperand - Return/set the i-th invoke argument. /// - Value *getArgOperand(unsigned i) const { return getOperand(i); } - void setArgOperand(unsigned i, Value *v) { setOperand(i, v); } - - /// arg_operands - iteration adapter for range-for loops. - iterator_range arg_operands() { - return iterator_range(op_begin(), op_end() - 3); + Value *getArgOperand(unsigned i) const { + assert(i < getNumArgOperands() && "Out of bounds!"); + return getOperand(i); + } + void setArgOperand(unsigned i, Value *v) { + assert(i < getNumArgOperands() && "Out of bounds!"); + setOperand(i, v); } - /// arg_operands - iteration adapter for range-for loops. + /// \brief Return the iterator pointing to the beginning of the argument list. + op_iterator arg_begin() { return op_begin(); } + + /// \brief Return the iterator pointing to the end of the argument list. + op_iterator arg_end() { + // [ invoke args ], [ operand bundles ], normal dest, unwind dest, callee + return op_end() - getNumTotalBundleOperands() - 3; + }; + + /// \brief Iteration adapter for range-for loops. + iterator_range arg_operands() { + return make_range(arg_begin(), arg_end()); + } + + /// \brief Return the iterator pointing to the beginning of the argument list. + const_op_iterator arg_begin() const { return op_begin(); } + + /// \brief Return the iterator pointing to the end of the argument list. + const_op_iterator arg_end() const { + // [ invoke args ], [ operand bundles ], normal dest, unwind dest, callee + return op_end() - getNumTotalBundleOperands() - 3; + }; + + /// \brief Iteration adapter for range-for loops. iterator_range arg_operands() const { - return iterator_range(op_begin(), op_end() - 3); + return make_range(arg_begin(), arg_end()); } /// \brief Wrappers for getting the \c Use of a invoke argument. - const Use &getArgOperandUse(unsigned i) const { return getOperandUse(i); } - Use &getArgOperandUse(unsigned i) { return getOperandUse(i); } + const Use &getArgOperandUse(unsigned i) const { + assert(i < getNumArgOperands() && "Out of bounds!"); + return getOperandUse(i); + } + Use &getArgOperandUse(unsigned i) { + assert(i < getNumArgOperands() && "Out of bounds!"); + return getOperandUse(i); + } /// getCallingConv/setCallingConv - Get or set the calling convention of this /// function call. @@ -3291,7 +3517,9 @@ public: return static_cast(getSubclassDataFromInstruction()); } void setCallingConv(CallingConv::ID CC) { - setInstructionSubclassData(static_cast(CC)); + auto ID = static_cast(CC); + assert(!(ID & ~CallingConv::MaxID) && "Unsupported calling convention"); + setInstructionSubclassData(ID); } /// getAttributes - Return the parameter attributes for this invoke. @@ -3325,6 +3553,22 @@ public: /// \brief Determine whether the call or the callee has the given attributes. bool paramHasAttr(unsigned i, Attribute::AttrKind A) const; + /// \brief Return true if the data operand at index \p i has the attribute \p + /// A. + /// + /// Data operands include invoke arguments and values used in operand bundles, + /// but does not include the invokee operand, or the two successor blocks. + /// This routine dispatches to the underlying AttributeList or the + /// OperandBundleUser as appropriate. + /// + /// The index \p i is interpreted as + /// + /// \p i == Attribute::ReturnIndex -> the return value + /// \p i in [1, arg_size + 1) -> argument number (\p i - 1) + /// \p i in [arg_size + 1, data_operand_size + 1) -> bundle operand at index + /// (\p i - 1) in the operand list. + bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind A) const; + /// \brief Extract the alignment for a call or parameter (0=unknown). unsigned getParamAlignment(unsigned i) const { return AttributeList.getParamAlignment(i); @@ -3335,13 +3579,20 @@ public: uint64_t getDereferenceableBytes(unsigned i) const { return AttributeList.getDereferenceableBytes(i); } - + /// \brief Extract the number of dereferenceable_or_null bytes for a call or /// parameter (0=unknown). uint64_t getDereferenceableOrNullBytes(unsigned i) const { return AttributeList.getDereferenceableOrNullBytes(i); } + /// @brief Determine if the parameter or return value is marked with NoAlias + /// attribute. + /// @param n The parameter to check. 1 is the first parameter, 0 is the return + bool doesNotAlias(unsigned n) const { + return AttributeList.hasAttribute(n, Attribute::NoAlias); + } + /// \brief Return true if the call should not be treated as a call to a /// builtin. bool isNoBuiltin() const { @@ -3403,6 +3654,9 @@ public: /// \brief Determine if the call returns a structure through first /// pointer argument. bool hasStructRetAttr() const { + if (getNumArgOperands() == 0) + return false; + // Be friendly and also check the callee. return paramHasAttr(1, Attribute::StructRet); } @@ -3495,23 +3749,23 @@ struct OperandTraits : public VariadicOperandTraits { InvokeInst::InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef Args, - unsigned Values, const Twine &NameStr, - Instruction *InsertBefore) + ArrayRef Bundles, unsigned Values, + const Twine &NameStr, Instruction *InsertBefore) : TerminatorInst(Ty->getReturnType(), Instruction::Invoke, OperandTraits::op_end(this) - Values, Values, InsertBefore) { - init(Ty, Func, IfNormal, IfException, Args, NameStr); + init(Ty, Func, IfNormal, IfException, Args, Bundles, NameStr); } -InvokeInst::InvokeInst(Value *Func, - BasicBlock *IfNormal, BasicBlock *IfException, - ArrayRef Args, unsigned Values, +InvokeInst::InvokeInst(Value *Func, BasicBlock *IfNormal, + BasicBlock *IfException, ArrayRef Args, + ArrayRef Bundles, unsigned Values, const Twine &NameStr, BasicBlock *InsertAtEnd) - : TerminatorInst(cast(cast(Func->getType()) - ->getElementType())->getReturnType(), - Instruction::Invoke, - OperandTraits::op_end(this) - Values, - Values, InsertAtEnd) { - init(Func, IfNormal, IfException, Args, NameStr); + : TerminatorInst( + cast(cast(Func->getType()) + ->getElementType())->getReturnType(), + Instruction::Invoke, OperandTraits::op_end(this) - Values, + Values, InsertAtEnd) { + init(Func, IfNormal, IfException, Args, Bundles, NameStr); } DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InvokeInst, Value) @@ -3528,6 +3782,7 @@ class ResumeInst : public TerminatorInst { explicit ResumeInst(Value *Exn, Instruction *InsertBefore=nullptr); ResumeInst(Value *Exn, BasicBlock *InsertAtEnd); + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -3556,6 +3811,7 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } + private: BasicBlock *getSuccessorV(unsigned idx) const override; unsigned getNumSuccessorsV() const override; @@ -3569,6 +3825,430 @@ struct OperandTraits : DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ResumeInst, Value) +//===----------------------------------------------------------------------===// +// CatchSwitchInst Class +//===----------------------------------------------------------------------===// +class CatchSwitchInst : public TerminatorInst { + void *operator new(size_t, unsigned) = delete; + /// ReservedSpace - The number of operands actually allocated. NumOperands is + /// the number actually in use. + unsigned ReservedSpace; + // Operand[0] = Outer scope + // Operand[1] = Unwind block destination + // Operand[n] = BasicBlock to go to on match + CatchSwitchInst(const CatchSwitchInst &CSI); + void init(Value *ParentPad, BasicBlock *UnwindDest, unsigned NumReserved); + void growOperands(unsigned Size); + // allocate space for exactly zero operands + void *operator new(size_t s) { return User::operator new(s); } + /// CatchSwitchInst ctor - Create a new switch instruction, specifying a + /// default destination. The number of additional handlers can be specified + /// here to make memory allocation more efficient. + /// This constructor can also autoinsert before another instruction. + CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest, + unsigned NumHandlers, const Twine &NameStr, + Instruction *InsertBefore); + + /// CatchSwitchInst ctor - Create a new switch instruction, specifying a + /// default destination. The number of additional handlers can be specified + /// here to make memory allocation more efficient. + /// This constructor also autoinserts at the end of the specified BasicBlock. + CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest, + unsigned NumHandlers, const Twine &NameStr, + BasicBlock *InsertAtEnd); + +protected: + // Note: Instruction needs to be a friend here to call cloneImpl. + friend class Instruction; + CatchSwitchInst *cloneImpl() const; + +public: + static CatchSwitchInst *Create(Value *ParentPad, BasicBlock *UnwindDest, + unsigned NumHandlers, + const Twine &NameStr = "", + Instruction *InsertBefore = nullptr) { + return new CatchSwitchInst(ParentPad, UnwindDest, NumHandlers, NameStr, + InsertBefore); + } + static CatchSwitchInst *Create(Value *ParentPad, BasicBlock *UnwindDest, + unsigned NumHandlers, const Twine &NameStr, + BasicBlock *InsertAtEnd) { + return new CatchSwitchInst(ParentPad, UnwindDest, NumHandlers, NameStr, + InsertAtEnd); + } + + /// Provide fast operand accessors + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + + // Accessor Methods for CatchSwitch stmt + Value *getParentPad() const { return getOperand(0); } + void setParentPad(Value *ParentPad) { setOperand(0, ParentPad); } + + // Accessor Methods for CatchSwitch stmt + bool hasUnwindDest() const { return getSubclassDataFromInstruction() & 1; } + bool unwindsToCaller() const { return !hasUnwindDest(); } + BasicBlock *getUnwindDest() const { + if (hasUnwindDest()) + return cast(getOperand(1)); + return nullptr; + } + void setUnwindDest(BasicBlock *UnwindDest) { + assert(UnwindDest); + assert(hasUnwindDest()); + setOperand(1, UnwindDest); + } + + /// getNumHandlers - return the number of 'handlers' in this catchswitch + /// instruction, except the default handler + unsigned getNumHandlers() const { + if (hasUnwindDest()) + return getNumOperands() - 2; + return getNumOperands() - 1; + } + +private: + static BasicBlock *handler_helper(Value *V) { return cast(V); } + static const BasicBlock *handler_helper(const Value *V) { + return cast(V); + } + +public: + typedef std::pointer_to_unary_function DerefFnTy; + typedef mapped_iterator handler_iterator; + typedef iterator_range handler_range; + + + typedef std::pointer_to_unary_function + ConstDerefFnTy; + typedef mapped_iterator const_handler_iterator; + typedef iterator_range const_handler_range; + + /// Returns an iterator that points to the first handler in CatchSwitchInst. + handler_iterator handler_begin() { + op_iterator It = op_begin() + 1; + if (hasUnwindDest()) + ++It; + return handler_iterator(It, DerefFnTy(handler_helper)); + } + /// Returns an iterator that points to the first handler in the + /// CatchSwitchInst. + const_handler_iterator handler_begin() const { + const_op_iterator It = op_begin() + 1; + if (hasUnwindDest()) + ++It; + return const_handler_iterator(It, ConstDerefFnTy(handler_helper)); + } + + /// Returns a read-only iterator that points one past the last + /// handler in the CatchSwitchInst. + handler_iterator handler_end() { + return handler_iterator(op_end(), DerefFnTy(handler_helper)); + } + /// Returns an iterator that points one past the last handler in the + /// CatchSwitchInst. + const_handler_iterator handler_end() const { + return const_handler_iterator(op_end(), ConstDerefFnTy(handler_helper)); + } + + /// handlers - iteration adapter for range-for loops. + handler_range handlers() { + return make_range(handler_begin(), handler_end()); + } + + /// handlers - iteration adapter for range-for loops. + const_handler_range handlers() const { + return make_range(handler_begin(), handler_end()); + } + + /// addHandler - Add an entry to the switch instruction... + /// Note: + /// This action invalidates handler_end(). Old handler_end() iterator will + /// point to the added handler. + void addHandler(BasicBlock *Dest); + + unsigned getNumSuccessors() const { return getNumOperands() - 1; } + BasicBlock *getSuccessor(unsigned Idx) const { + assert(Idx < getNumSuccessors() && + "Successor # out of range for catchswitch!"); + return cast(getOperand(Idx + 1)); + } + void setSuccessor(unsigned Idx, BasicBlock *NewSucc) { + assert(Idx < getNumSuccessors() && + "Successor # out of range for catchswitch!"); + setOperand(Idx + 1, NewSucc); + } + + // Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const Instruction *I) { + return I->getOpcode() == Instruction::CatchSwitch; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + +private: + BasicBlock *getSuccessorV(unsigned Idx) const override; + unsigned getNumSuccessorsV() const override; + void setSuccessorV(unsigned Idx, BasicBlock *B) override; +}; + +template <> +struct OperandTraits : public HungoffOperandTraits<2> {}; + +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CatchSwitchInst, Value) + +//===----------------------------------------------------------------------===// +// CleanupPadInst Class +//===----------------------------------------------------------------------===// +class CleanupPadInst : public FuncletPadInst { +private: + explicit CleanupPadInst(Value *ParentPad, ArrayRef Args, + unsigned Values, const Twine &NameStr, + Instruction *InsertBefore) + : FuncletPadInst(Instruction::CleanupPad, ParentPad, Args, Values, + NameStr, InsertBefore) {} + explicit CleanupPadInst(Value *ParentPad, ArrayRef Args, + unsigned Values, const Twine &NameStr, + BasicBlock *InsertAtEnd) + : FuncletPadInst(Instruction::CleanupPad, ParentPad, Args, Values, + NameStr, InsertAtEnd) {} + +public: + static CleanupPadInst *Create(Value *ParentPad, ArrayRef Args = None, + const Twine &NameStr = "", + Instruction *InsertBefore = nullptr) { + unsigned Values = 1 + Args.size(); + return new (Values) + CleanupPadInst(ParentPad, Args, Values, NameStr, InsertBefore); + } + static CleanupPadInst *Create(Value *ParentPad, ArrayRef Args, + const Twine &NameStr, BasicBlock *InsertAtEnd) { + unsigned Values = 1 + Args.size(); + return new (Values) + CleanupPadInst(ParentPad, Args, Values, NameStr, InsertAtEnd); + } + + /// \brief Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const Instruction *I) { + return I->getOpcode() == Instruction::CleanupPad; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +//===----------------------------------------------------------------------===// +// CatchPadInst Class +//===----------------------------------------------------------------------===// +class CatchPadInst : public FuncletPadInst { +private: + explicit CatchPadInst(Value *CatchSwitch, ArrayRef Args, + unsigned Values, const Twine &NameStr, + Instruction *InsertBefore) + : FuncletPadInst(Instruction::CatchPad, CatchSwitch, Args, Values, + NameStr, InsertBefore) {} + explicit CatchPadInst(Value *CatchSwitch, ArrayRef Args, + unsigned Values, const Twine &NameStr, + BasicBlock *InsertAtEnd) + : FuncletPadInst(Instruction::CatchPad, CatchSwitch, Args, Values, + NameStr, InsertAtEnd) {} + +public: + static CatchPadInst *Create(Value *CatchSwitch, ArrayRef Args, + const Twine &NameStr = "", + Instruction *InsertBefore = nullptr) { + unsigned Values = 1 + Args.size(); + return new (Values) + CatchPadInst(CatchSwitch, Args, Values, NameStr, InsertBefore); + } + static CatchPadInst *Create(Value *CatchSwitch, ArrayRef Args, + const Twine &NameStr, BasicBlock *InsertAtEnd) { + unsigned Values = 1 + Args.size(); + return new (Values) + CatchPadInst(CatchSwitch, Args, Values, NameStr, InsertAtEnd); + } + + /// Convenience accessors + CatchSwitchInst *getCatchSwitch() const { + return cast(Op<-1>()); + } + void setCatchSwitch(Value *CatchSwitch) { + assert(CatchSwitch); + Op<-1>() = CatchSwitch; + } + + /// \brief Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const Instruction *I) { + return I->getOpcode() == Instruction::CatchPad; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +//===----------------------------------------------------------------------===// +// CatchReturnInst Class +//===----------------------------------------------------------------------===// + +class CatchReturnInst : public TerminatorInst { + CatchReturnInst(const CatchReturnInst &RI); + + void init(Value *CatchPad, BasicBlock *BB); + CatchReturnInst(Value *CatchPad, BasicBlock *BB, Instruction *InsertBefore); + CatchReturnInst(Value *CatchPad, BasicBlock *BB, BasicBlock *InsertAtEnd); + +protected: + // Note: Instruction needs to be a friend here to call cloneImpl. + friend class Instruction; + CatchReturnInst *cloneImpl() const; + +public: + static CatchReturnInst *Create(Value *CatchPad, BasicBlock *BB, + Instruction *InsertBefore = nullptr) { + assert(CatchPad); + assert(BB); + return new (2) CatchReturnInst(CatchPad, BB, InsertBefore); + } + static CatchReturnInst *Create(Value *CatchPad, BasicBlock *BB, + BasicBlock *InsertAtEnd) { + assert(CatchPad); + assert(BB); + return new (2) CatchReturnInst(CatchPad, BB, InsertAtEnd); + } + + /// Provide fast operand accessors + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + + /// Convenience accessors. + CatchPadInst *getCatchPad() const { return cast(Op<0>()); } + void setCatchPad(CatchPadInst *CatchPad) { + assert(CatchPad); + Op<0>() = CatchPad; + } + + BasicBlock *getSuccessor() const { return cast(Op<1>()); } + void setSuccessor(BasicBlock *NewSucc) { + assert(NewSucc); + Op<1>() = NewSucc; + } + unsigned getNumSuccessors() const { return 1; } + + Value *getParentPad() const { + return getCatchPad()->getCatchSwitch()->getParentPad(); + } + + // Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const Instruction *I) { + return (I->getOpcode() == Instruction::CatchRet); + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + +private: + BasicBlock *getSuccessorV(unsigned Idx) const override; + unsigned getNumSuccessorsV() const override; + void setSuccessorV(unsigned Idx, BasicBlock *B) override; +}; + +template <> +struct OperandTraits + : public FixedNumOperandTraits {}; + +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CatchReturnInst, Value) + +//===----------------------------------------------------------------------===// +// CleanupReturnInst Class +//===----------------------------------------------------------------------===// + +class CleanupReturnInst : public TerminatorInst { +private: + CleanupReturnInst(const CleanupReturnInst &RI); + + void init(Value *CleanupPad, BasicBlock *UnwindBB); + CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, unsigned Values, + Instruction *InsertBefore = nullptr); + CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, unsigned Values, + BasicBlock *InsertAtEnd); + +protected: + // Note: Instruction needs to be a friend here to call cloneImpl. + friend class Instruction; + CleanupReturnInst *cloneImpl() const; + +public: + static CleanupReturnInst *Create(Value *CleanupPad, + BasicBlock *UnwindBB = nullptr, + Instruction *InsertBefore = nullptr) { + assert(CleanupPad); + unsigned Values = 1; + if (UnwindBB) + ++Values; + return new (Values) + CleanupReturnInst(CleanupPad, UnwindBB, Values, InsertBefore); + } + static CleanupReturnInst *Create(Value *CleanupPad, BasicBlock *UnwindBB, + BasicBlock *InsertAtEnd) { + assert(CleanupPad); + unsigned Values = 1; + if (UnwindBB) + ++Values; + return new (Values) + CleanupReturnInst(CleanupPad, UnwindBB, Values, InsertAtEnd); + } + + /// Provide fast operand accessors + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + + bool hasUnwindDest() const { return getSubclassDataFromInstruction() & 1; } + bool unwindsToCaller() const { return !hasUnwindDest(); } + + /// Convenience accessor. + CleanupPadInst *getCleanupPad() const { + return cast(Op<0>()); + } + void setCleanupPad(CleanupPadInst *CleanupPad) { + assert(CleanupPad); + Op<0>() = CleanupPad; + } + + unsigned getNumSuccessors() const { return hasUnwindDest() ? 1 : 0; } + + BasicBlock *getUnwindDest() const { + return hasUnwindDest() ? cast(Op<1>()) : nullptr; + } + void setUnwindDest(BasicBlock *NewDest) { + assert(NewDest); + assert(hasUnwindDest()); + Op<1>() = NewDest; + } + + // Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const Instruction *I) { + return (I->getOpcode() == Instruction::CleanupRet); + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + +private: + BasicBlock *getSuccessorV(unsigned Idx) const override; + unsigned getNumSuccessorsV() const override; + void setSuccessorV(unsigned Idx, BasicBlock *B) override; + + // Shadow Instruction::setInstructionSubclassData with a private forwarding + // method so that subclasses cannot accidentally use it. + void setInstructionSubclassData(unsigned short D) { + Instruction::setInstructionSubclassData(D); + } +}; + +template <> +struct OperandTraits + : public VariadicOperandTraits {}; + +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CleanupReturnInst, Value) + //===----------------------------------------------------------------------===// // UnreachableInst Class //===----------------------------------------------------------------------===// @@ -3580,6 +4260,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ResumeInst, Value) /// class UnreachableInst : public TerminatorInst { void *operator new(size_t, unsigned) = delete; + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -3602,6 +4283,7 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } + private: BasicBlock *getSuccessorV(unsigned idx) const override; unsigned getNumSuccessorsV() const override; diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h index 2c8b6eb6f39a..169bcc021984 100644 --- a/include/llvm/IR/IntrinsicInst.h +++ b/include/llvm/IR/IntrinsicInst.h @@ -372,6 +372,39 @@ namespace llvm { return cast(const_cast(getArgOperand(3))); } }; -} + + /// This represents the llvm.instrprof_value_profile intrinsic. + class InstrProfValueProfileInst : public IntrinsicInst { + public: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::instrprof_value_profile; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + + GlobalVariable *getName() const { + return cast( + const_cast(getArgOperand(0))->stripPointerCasts()); + } + + ConstantInt *getHash() const { + return cast(const_cast(getArgOperand(1))); + } + + Value *getTargetValue() const { + return cast(const_cast(getArgOperand(2))); + } + + ConstantInt *getValueKind() const { + return cast(const_cast(getArgOperand(3))); + } + + // Returns the value site index. + ConstantInt *getIndex() const { + return cast(const_cast(getArgOperand(4))); + } + }; +} // namespace llvm #endif diff --git a/include/llvm/IR/Intrinsics.h b/include/llvm/IR/Intrinsics.h index 43b8325107fa..314e2aaecf4b 100644 --- a/include/llvm/IR/Intrinsics.h +++ b/include/llvm/IR/Intrinsics.h @@ -79,7 +79,7 @@ namespace Intrinsic { /// intrinsic. This is returned by getIntrinsicInfoTableEntries. struct IITDescriptor { enum IITDescriptorKind { - Void, VarArg, MMX, Metadata, Half, Float, Double, + Void, VarArg, MMX, Token, Metadata, Half, Float, Double, Integer, Vector, Pointer, Struct, Argument, ExtendArgument, TruncArgument, HalfVecArgument, SameVecWidthArgument, PtrToArgument, VecOfPtrsToElt diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index bbae720b4e12..5a95ddced538 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -73,8 +73,8 @@ def IntrNoReturn : IntrinsicProperty; // Parallels the noduplicate attribute on LLVM IR functions. def IntrNoDuplicate : IntrinsicProperty; -// IntrConvergent - Calls to this intrinsic are convergent and may only be -// moved to control equivalent blocks. +// IntrConvergent - Calls to this intrinsic are convergent and may not be made +// control-dependent on any additional values. // Parallels the convergent attribute on LLVM IR functions. def IntrConvergent : IntrinsicProperty; @@ -150,16 +150,20 @@ def llvm_anyptr_ty : LLVMAnyPointerType; // (space)i8* def llvm_empty_ty : LLVMType; // { } def llvm_descriptor_ty : LLVMPointerType; // { }* def llvm_metadata_ty : LLVMType; // !{...} +def llvm_token_ty : LLVMType; // token def llvm_x86mmx_ty : LLVMType; def llvm_ptrx86mmx_ty : LLVMPointerType; // <1 x i64>* -def llvm_v2i1_ty : LLVMType; // 2 x i1 -def llvm_v4i1_ty : LLVMType; // 4 x i1 -def llvm_v8i1_ty : LLVMType; // 8 x i1 -def llvm_v16i1_ty : LLVMType; // 16 x i1 -def llvm_v32i1_ty : LLVMType; // 32 x i1 -def llvm_v64i1_ty : LLVMType; // 64 x i1 +def llvm_v2i1_ty : LLVMType; // 2 x i1 +def llvm_v4i1_ty : LLVMType; // 4 x i1 +def llvm_v8i1_ty : LLVMType; // 8 x i1 +def llvm_v16i1_ty : LLVMType; // 16 x i1 +def llvm_v32i1_ty : LLVMType; // 32 x i1 +def llvm_v64i1_ty : LLVMType; // 64 x i1 +def llvm_v512i1_ty : LLVMType; // 512 x i1 +def llvm_v1024i1_ty : LLVMType; //1024 x i1 + def llvm_v1i8_ty : LLVMType; // 1 x i8 def llvm_v2i8_ty : LLVMType; // 2 x i8 def llvm_v4i8_ty : LLVMType; // 4 x i8 @@ -167,6 +171,8 @@ def llvm_v8i8_ty : LLVMType; // 8 x i8 def llvm_v16i8_ty : LLVMType; // 16 x i8 def llvm_v32i8_ty : LLVMType; // 32 x i8 def llvm_v64i8_ty : LLVMType; // 64 x i8 +def llvm_v128i8_ty : LLVMType; //128 x i8 +def llvm_v256i8_ty : LLVMType; //256 x i8 def llvm_v1i16_ty : LLVMType; // 1 x i16 def llvm_v2i16_ty : LLVMType; // 2 x i16 @@ -174,17 +180,23 @@ def llvm_v4i16_ty : LLVMType; // 4 x i16 def llvm_v8i16_ty : LLVMType; // 8 x i16 def llvm_v16i16_ty : LLVMType; // 16 x i16 def llvm_v32i16_ty : LLVMType; // 32 x i16 +def llvm_v64i16_ty : LLVMType; // 64 x i16 +def llvm_v128i16_ty : LLVMType; //128 x i16 def llvm_v1i32_ty : LLVMType; // 1 x i32 def llvm_v2i32_ty : LLVMType; // 2 x i32 def llvm_v4i32_ty : LLVMType; // 4 x i32 def llvm_v8i32_ty : LLVMType; // 8 x i32 def llvm_v16i32_ty : LLVMType; // 16 x i32 +def llvm_v32i32_ty : LLVMType; // 32 x i32 +def llvm_v64i32_ty : LLVMType; // 64 x i32 + def llvm_v1i64_ty : LLVMType; // 1 x i64 def llvm_v2i64_ty : LLVMType; // 2 x i64 def llvm_v4i64_ty : LLVMType; // 4 x i64 def llvm_v8i64_ty : LLVMType; // 8 x i64 def llvm_v16i64_ty : LLVMType; // 16 x i64 +def llvm_v32i64_ty : LLVMType; // 32 x i64 def llvm_v1i128_ty : LLVMType; // 1 x i128 @@ -292,6 +304,8 @@ def int_stacksave : Intrinsic<[llvm_ptr_ty]>, def int_stackrestore : Intrinsic<[], [llvm_ptr_ty]>, GCCBuiltin<"__builtin_stack_restore">; +def int_get_dynamic_area_offset : Intrinsic<[llvm_anyint_ty]>; + // IntrReadWriteArgMem is more pessimistic than strictly necessary for prefetch, // however it does conveniently prevent the prefetch from being reordered // with respect to nearby accesses to the same memory. @@ -319,6 +333,14 @@ def int_instrprof_increment : Intrinsic<[], llvm_i32_ty, llvm_i32_ty], []>; +// A call to profile runtime for value profiling of target expressions +// through instrumentation based profiling. +def int_instrprof_value_profile : Intrinsic<[], + [llvm_ptr_ty, llvm_i64_ty, + llvm_i64_ty, llvm_i32_ty, + llvm_i32_ty], + []>; + //===------------------- Standard C Library Intrinsics --------------------===// // @@ -399,6 +421,7 @@ let Properties = [IntrNoMem] in { def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; + def int_bitreverse : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; } //===------------------------ Debugger Intrinsics -------------------------===// @@ -428,17 +451,13 @@ def int_eh_typeid_for : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; def int_eh_return_i32 : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty]>; def int_eh_return_i64 : Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty]>; -// eh.begincatch takes a pointer returned by a landingpad instruction and -// copies the exception object into the memory pointed to by the second -// parameter. If the second parameter is null, no copy occurs. -def int_eh_begincatch : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], - [NoCapture<0>, NoCapture<1>]>; -def int_eh_endcatch : Intrinsic<[], []>; +// eh.exceptionpointer returns the pointer to the exception caught by +// the given `catchpad`. +def int_eh_exceptionpointer : Intrinsic<[llvm_anyptr_ty], [llvm_token_ty], + [IntrNoMem]>; -// Represents the list of actions to take when an exception is thrown. -def int_eh_actions : Intrinsic<[llvm_ptr_ty], [llvm_vararg_ty], []>; - -def int_eh_exceptioncode : Intrinsic<[llvm_i32_ty], [], [IntrReadMem]>; +// Gets the exception code from a catchpad token. Only used on some platforms. +def int_eh_exceptioncode : Intrinsic<[llvm_i32_ty], [llvm_token_ty], [IntrNoMem]>; // __builtin_unwind_init is an undocumented GCC intrinsic that causes all // callee-saved registers to be saved and restored (regardless of whether they @@ -455,6 +474,7 @@ let Properties = [IntrNoMem] in { def int_eh_sjlj_functioncontext : Intrinsic<[], [llvm_ptr_ty]>; def int_eh_sjlj_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>; def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty], [IntrNoReturn]>; +def int_eh_sjlj_setup_dispatch : Intrinsic<[], []>; //===---------------- Generic Variable Attribute Intrinsics----------------===// // @@ -523,6 +543,10 @@ def int_invariant_end : Intrinsic<[], llvm_ptr_ty], [IntrReadWriteArgMem, NoCapture<2>]>; +def int_invariant_group_barrier : Intrinsic<[llvm_ptr_ty], + [llvm_ptr_ty], + [IntrNoMem]>; + //===------------------------ Stackmap Intrinsics -------------------------===// // def int_experimental_stackmap : Intrinsic<[], @@ -543,21 +567,17 @@ def int_experimental_patchpoint_i64 : Intrinsic<[llvm_i64_ty], //===------------------------ Garbage Collection Intrinsics ---------------===// // These are documented in docs/Statepoint.rst -def int_experimental_gc_statepoint : Intrinsic<[llvm_i32_ty], +def int_experimental_gc_statepoint : Intrinsic<[llvm_token_ty], [llvm_i64_ty, llvm_i32_ty, llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_vararg_ty], [Throws]>; -def int_experimental_gc_result : Intrinsic<[llvm_any_ty], [llvm_i32_ty]>; +def int_experimental_gc_result : Intrinsic<[llvm_any_ty], [llvm_token_ty], + [IntrReadMem]>; def int_experimental_gc_relocate : Intrinsic<[llvm_anyptr_ty], - [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>; - -// Deprecated: will be removed in a couple of weeks -def int_experimental_gc_result_int : Intrinsic<[llvm_anyint_ty], [llvm_i32_ty]>; -def int_experimental_gc_result_float : Intrinsic<[llvm_anyfloat_ty], - [llvm_i32_ty]>; -def int_experimental_gc_result_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_i32_ty]>; + [llvm_token_ty, llvm_i32_ty, llvm_i32_ty], + [IntrReadMem]>; //===-------------------------- Other Intrinsics --------------------------===// // diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index 7d69ed52171c..578f259aae14 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -13,6 +13,9 @@ let TargetPrefix = "aarch64" in { +def int_aarch64_thread_pointer : GCCBuiltin<"__builtin_thread_pointer">, + Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; + def int_aarch64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>; def int_aarch64_ldaxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>; def int_aarch64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>; diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 510e5ad2d9b4..84582e8b9925 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -33,6 +33,14 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_tgid">; defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_tidig">; + +def int_r600_rat_store_typed : + // 1st parameter: Data + // 2nd parameter: Index + // 3rd parameter: Constant RAT ID + Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>, + GCCBuiltin<"__builtin_r600_rat_store_typed">; + } // End TargetPrefix = "r600" let TargetPrefix = "AMDGPU" in { @@ -83,3 +91,67 @@ def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic < "__builtin_amdgpu_read_workdim">; } // End TargetPrefix = "AMDGPU" + +let TargetPrefix = "amdgcn" in { + +// SI only +def int_amdgcn_buffer_wbinvl1_sc : + GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">, + Intrinsic<[], [], []>; + +// On CI+ +def int_amdgcn_buffer_wbinvl1_vol : + GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">, + Intrinsic<[], [], []>; + +def int_amdgcn_buffer_wbinvl1 : + GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">, + Intrinsic<[], [], []>; + +def int_amdgcn_s_dcache_inv : + GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">, + Intrinsic<[], [], []>; + +// CI+ +def int_amdgcn_s_dcache_inv_vol : + GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">, + Intrinsic<[], [], []>; + +// VI +def int_amdgcn_s_dcache_wb : + GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">, + Intrinsic<[], [], []>; + +// VI +def int_amdgcn_s_dcache_wb_vol : + GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, + Intrinsic<[], [], []>; + +def int_amdgcn_dispatch_ptr : + GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">, + Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + +// __builtin_amdgcn_interp_p1 , , , +def int_amdgcn_interp_p1 : + GCCBuiltin<"__builtin_amdgcn_interp_p1">, + Intrinsic<[llvm_float_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; // This intrinsic reads from lds, but the memory + // values are constant, so it behaves like IntrNoMem. + +// __builtin_amdgcn_interp_p2 , , , , +def int_amdgcn_interp_p2 : + GCCBuiltin<"__builtin_amdgcn_interp_p2">, + Intrinsic<[llvm_float_ty], + [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; // See int_amdgcn_v_interp_p1 for why this is + // IntrNoMem. + +def int_amdgcn_mbcnt_lo : + GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_amdgcn_mbcnt_hi : + GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +} diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td index 1dff80878592..c1d911cefee2 100644 --- a/include/llvm/IR/IntrinsicsARM.td +++ b/include/llvm/IR/IntrinsicsARM.td @@ -405,36 +405,36 @@ def int_arm_neon_vrintp : Neon_1Arg_Intrinsic; // De-interleaving vector loads from N-element structures. // Source operands are the address and alignment. def int_arm_neon_vld1 : Intrinsic<[llvm_anyvector_ty], - [llvm_ptr_ty, llvm_i32_ty], + [llvm_anyptr_ty, llvm_i32_ty], [IntrReadArgMem]>; def int_arm_neon_vld2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], - [llvm_ptr_ty, llvm_i32_ty], + [llvm_anyptr_ty, llvm_i32_ty], [IntrReadArgMem]>; def int_arm_neon_vld3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>], - [llvm_ptr_ty, llvm_i32_ty], + [llvm_anyptr_ty, llvm_i32_ty], [IntrReadArgMem]>; def int_arm_neon_vld4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [llvm_ptr_ty, llvm_i32_ty], + [llvm_anyptr_ty, llvm_i32_ty], [IntrReadArgMem]>; // Vector load N-element structure to one lane. // Source operands are: the address, the N input vectors (since only one // lane is assigned), the lane number, and the alignment. def int_arm_neon_vld2lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], - [llvm_ptr_ty, LLVMMatchType<0>, + [llvm_anyptr_ty, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrReadArgMem]>; def int_arm_neon_vld3lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>], - [llvm_ptr_ty, LLVMMatchType<0>, + [llvm_anyptr_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrReadArgMem]>; def int_arm_neon_vld4lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [llvm_ptr_ty, LLVMMatchType<0>, + [llvm_anyptr_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrReadArgMem]>; @@ -442,38 +442,38 @@ def int_arm_neon_vld4lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, // Interleaving vector stores from N-element structures. // Source operands are: the address, the N vectors, and the alignment. def int_arm_neon_vst1 : Intrinsic<[], - [llvm_ptr_ty, llvm_anyvector_ty, + [llvm_anyptr_ty, llvm_anyvector_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; def int_arm_neon_vst2 : Intrinsic<[], - [llvm_ptr_ty, llvm_anyvector_ty, - LLVMMatchType<0>, llvm_i32_ty], + [llvm_anyptr_ty, llvm_anyvector_ty, + LLVMMatchType<1>, llvm_i32_ty], [IntrReadWriteArgMem]>; def int_arm_neon_vst3 : Intrinsic<[], - [llvm_ptr_ty, llvm_anyvector_ty, - LLVMMatchType<0>, LLVMMatchType<0>, + [llvm_anyptr_ty, llvm_anyvector_ty, + LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty], [IntrReadWriteArgMem]>; def int_arm_neon_vst4 : Intrinsic<[], - [llvm_ptr_ty, llvm_anyvector_ty, - LLVMMatchType<0>, LLVMMatchType<0>, - LLVMMatchType<0>, llvm_i32_ty], + [llvm_anyptr_ty, llvm_anyvector_ty, + LLVMMatchType<1>, LLVMMatchType<1>, + LLVMMatchType<1>, llvm_i32_ty], [IntrReadWriteArgMem]>; // Vector store N-element structure from one lane. // Source operands are: the address, the N vectors, the lane number, and // the alignment. def int_arm_neon_vst2lane : Intrinsic<[], - [llvm_ptr_ty, llvm_anyvector_ty, - LLVMMatchType<0>, llvm_i32_ty, + [llvm_anyptr_ty, llvm_anyvector_ty, + LLVMMatchType<1>, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; def int_arm_neon_vst3lane : Intrinsic<[], - [llvm_ptr_ty, llvm_anyvector_ty, - LLVMMatchType<0>, LLVMMatchType<0>, + [llvm_anyptr_ty, llvm_anyvector_ty, + LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; def int_arm_neon_vst4lane : Intrinsic<[], - [llvm_ptr_ty, llvm_anyvector_ty, - LLVMMatchType<0>, LLVMMatchType<0>, - LLVMMatchType<0>, llvm_i32_ty, + [llvm_anyptr_ty, llvm_anyvector_ty, + LLVMMatchType<1>, LLVMMatchType<1>, + LLVMMatchType<1>, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; // Vector bitwise select. diff --git a/include/llvm/IR/IntrinsicsHexagon.td b/include/llvm/IR/IntrinsicsHexagon.td index 78ee651d20f9..ca6fcbd44337 100644 --- a/include/llvm/IR/IntrinsicsHexagon.td +++ b/include/llvm/IR/IntrinsicsHexagon.td @@ -32,14 +32,16 @@ class Hexagon_qi_mem_Intrinsic : Hexagon_Intrinsic; + // // DEF_FUNCTION_TYPE_1(void_ftype_SI,BT_VOID,BT_INT) -> // Hexagon_void_si_Intrinsic // class Hexagon_void_si_Intrinsic : Hexagon_Intrinsic; + [], [llvm_ptr_ty], + []>; + // // DEF_FUNCTION_TYPE_1(HI_ftype_SI,BT_I16,BT_INT) -> // Hexagon_hi_si_Intrinsic @@ -458,6 +460,11 @@ class Hexagon_mem_memdisisi_Intrinsic llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; +class Hexagon_v256_v256v256_Intrinsic + : Hexagon_Intrinsic; + // // Hexagon_sf_df_Intrinsic // @@ -756,6 +763,12 @@ def int_hexagon_circ_stb : Hexagon_mem_memsisisi_Intrinsic<"circ_stb">; +def int_hexagon_mm256i_vaddw : +Hexagon_v256_v256v256_Intrinsic<"_mm256i_vaddw">; + + +// This one above will not be auto-generated, +// so make sure, you don't overwrite this one. // // BUILTIN_INFO(HEXAGON.C2_cmpeq,QI_ftype_SISI,2) // @@ -4946,6 +4959,11 @@ Hexagon_di_di_Intrinsic<"HEXAGON_S2_interleave">; // def int_hexagon_S2_deinterleave : Hexagon_di_di_Intrinsic<"HEXAGON_S2_deinterleave">; +// +// BUILTIN_INFO(HEXAGON.dcfetch_A,v_ftype_DI*,1) +// +def int_hexagon_prefetch : +Hexagon_void_si_Intrinsic<"HEXAGON_prefetch">; def llvm_ptr32_ty : LLVMPointerType; def llvm_ptr64_ty : LLVMPointerType; @@ -4964,3 +4982,4392 @@ Hexagon_Intrinsic<"HEXAGON_S2_storew_locked", [llvm_i32_ty], def int_hexagon_S4_stored_locked : Hexagon_Intrinsic<"HEXAGON_S4_stored_locked", [llvm_i32_ty], [llvm_ptr64_ty, llvm_i64_ty], [IntrReadWriteArgMem, NoCapture<0>]>; + +// V60 + +class Hexagon_v2048v2048_Intrinsic_T + : Hexagon_Intrinsic; + +// tag : V6_hi_W +// tag : V6_lo_W +class Hexagon_v512v1024_Intrinsic_T + : Hexagon_Intrinsic; + +// tag : V6_hi_W_128B +// tag : V6_lo_W_128B +class Hexagon_v1024v2048_Intrinsic_T + : Hexagon_Intrinsic; + +class Hexagon_v1024v1024_Intrinsic_T + : Hexagon_Intrinsic; + +// BUILTIN_INFO(HEXAGON.V6_hi_W,VI_ftype_VI,1) +// tag : V6_hi +def int_hexagon_V6_hi : +Hexagon_v512v1024_Intrinsic_T<"HEXAGON_V6_hi">; + +// BUILTIN_INFO(HEXAGON.V6_lo_W,VI_ftype_VI,1) +// tag : V6_lo +def int_hexagon_V6_lo : +Hexagon_v512v1024_Intrinsic_T<"HEXAGON_V6_lo">; + +// BUILTIN_INFO(HEXAGON.V6_hi_W,VI_ftype_VI,1) +// tag : V6_hi_128B +def int_hexagon_V6_hi_128B : +Hexagon_v1024v2048_Intrinsic_T<"HEXAGON_V6_hi_128B">; + +// BUILTIN_INFO(HEXAGON.V6_lo_W,VI_ftype_VI,1) +// tag : V6_lo_128B +def int_hexagon_V6_lo_128B : +Hexagon_v1024v2048_Intrinsic_T<"HEXAGON_V6_lo_128B">; + +// BUILTIN_INFO(HEXAGON.V6_vassignp,VI_ftype_VI,1) +// tag : V6_vassignp +def int_hexagon_V6_vassignp : +Hexagon_v1024v1024_Intrinsic_T<"HEXAGON_V6_vassignp">; + +// BUILTIN_INFO(HEXAGON.V6_vassignp,VI_ftype_VI,1) +// tag : V6_vassignp_128B +def int_hexagon_V6_vassignp_128B : +Hexagon_v2048v2048_Intrinsic_T<"HEXAGON_V6_vassignp_128B">; + + + +// +// Hexagon_iii_Intrinsic +// tag : S6_rol_i_r +class Hexagon_iii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_LLiLLii_Intrinsic +// tag : S6_rol_i_p +class Hexagon_LLiLLii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_iiii_Intrinsic +// tag : S6_rol_i_r_acc +class Hexagon_iiii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_LLiLLiLLii_Intrinsic +// tag : S6_rol_i_p_acc +class Hexagon_LLiLLiLLii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v512v512v512i_Intrinsic +// tag : V6_valignb +class Hexagon_v512v512v512i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v1024v1024i_Intrinsic +// tag : V6_valignb_128B +class Hexagon_v1024v1024v1024i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v512v512i_Intrinsic +// tag : V6_vror +class Hexagon_v512v512i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v1024i_Intrinsic +// tag : V6_vror_128B +class Hexagon_v1024v1024i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v512_Intrinsic +// tag : V6_vunpackub +class Hexagon_v1024v512_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v2048v1024_Intrinsic +// tag : V6_vunpackub_128B +class Hexagon_v2048v1024_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v1024v512_Intrinsic +// tag : V6_vunpackob +class Hexagon_v1024v1024v512_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v2048v2048v1024_Intrinsic +// tag : V6_vunpackob_128B +class Hexagon_v2048v2048v1024_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v512v512v512_Intrinsic +// tag : V6_vpackeb +class Hexagon_v512v512v512_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v1024v1024_Intrinsic +// tag : V6_vpackeb_128B +class Hexagon_v1024v1024v1024_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v2048v2048i_Intrinsic +// tag : V6_vdmpybus_dv_128B +class Hexagon_v2048v2048i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v2048v2048v2048i_Intrinsic +// tag : V6_vdmpybus_dv_acc_128B +class Hexagon_v2048v2048v2048i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v512v512v512v512_Intrinsic +// tag : V6_vdmpyhvsat_acc +class Hexagon_v512v512v512v512_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v1024v1024v1024_Intrinsic +// tag : V6_vdmpyhvsat_acc_128B +class Hexagon_v1024v1024v1024v1024_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v512v1024i_Intrinsic +// tag : V6_vdmpyhisat +class Hexagon_v512v1024i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v2048i_Intrinsic +// tag : V6_vdmpyhisat_128B +class Hexagon_v1024v2048i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v512v512v1024i_Intrinsic +// tag : V6_vdmpyhisat_acc +class Hexagon_v512v512v1024i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v1024v2048i_Intrinsic +// tag : V6_vdmpyhisat_acc_128B +class Hexagon_v1024v1024v2048i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v1024ii_Intrinsic +// tag : V6_vrmpyubi +class Hexagon_v1024v1024ii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v2048v2048ii_Intrinsic +// tag : V6_vrmpyubi_128B +class Hexagon_v2048v2048ii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v1024v1024ii_Intrinsic +// tag : V6_vrmpyubi_acc +class Hexagon_v1024v1024v1024ii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v2048v2048v2048ii_Intrinsic +// tag : V6_vrmpyubi_acc_128B +class Hexagon_v2048v2048v2048ii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v2048v2048v2048_Intrinsic +// tag : V6_vaddb_dv_128B +class Hexagon_v2048v2048v2048_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v512v512_Intrinsic +// tag : V6_vaddubh +class Hexagon_v1024v512v512_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v2048v1024v1024_Intrinsic +// tag : V6_vaddubh_128B +class Hexagon_v2048v1024v1024_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v512_Intrinsic +// tag : V6_vd0 +class Hexagon_v512_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024_Intrinsic +// tag : V6_vd0_128B +class Hexagon_v1024_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v512v64iv512v512_Intrinsic +// tag : V6_vaddbq +class Hexagon_v512v64iv512v512_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v128iv1024v1024_Intrinsic +// tag : V6_vaddbq_128B +class Hexagon_v1024v128iv1024v1024_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v512v512_Intrinsic +// tag : V6_vabsh +class Hexagon_v512v512_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v1024_Intrinsic +// tag : V6_vabsh_128B +class Hexagon_v1024v1024_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v1024v512v512_Intrinsic +// tag : V6_vmpybv_acc +class Hexagon_v1024v1024v512v512_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v2048v2048v1024v1024_Intrinsic +// tag : V6_vmpybv_acc_128B +class Hexagon_v2048v2048v1024v1024_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v512i_Intrinsic +// tag : V6_vmpyub +class Hexagon_v1024v512i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v2048v1024i_Intrinsic +// tag : V6_vmpyub_128B +class Hexagon_v2048v1024i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v1024v512i_Intrinsic +// tag : V6_vmpyub_acc +class Hexagon_v1024v1024v512i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v2048v2048v1024i_Intrinsic +// tag : V6_vmpyub_acc_128B +class Hexagon_v2048v2048v1024i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v512v64ii_Intrinsic +// tag : V6_vandqrt +class Hexagon_v512v64ii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v128ii_Intrinsic +// tag : V6_vandqrt_128B +class Hexagon_v1024v128ii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v512v512v64ii_Intrinsic +// tag : V6_vandqrt_acc +class Hexagon_v512v512v64ii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v1024v128ii_Intrinsic +// tag : V6_vandqrt_acc_128B +class Hexagon_v1024v1024v128ii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v64iv512i_Intrinsic +// tag : V6_vandvrt +class Hexagon_v64iv512i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v128iv1024i_Intrinsic +// tag : V6_vandvrt_128B +class Hexagon_v128iv1024i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v64iv64iv512i_Intrinsic +// tag : V6_vandvrt_acc +class Hexagon_v64iv64iv512i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v128iv128iv1024i_Intrinsic +// tag : V6_vandvrt_acc_128B +class Hexagon_v128iv128iv1024i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v64iv512v512_Intrinsic +// tag : V6_vgtw +class Hexagon_v64iv512v512_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v128iv1024v1024_Intrinsic +// tag : V6_vgtw_128B +class Hexagon_v128iv1024v1024_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v64iv64iv512v512_Intrinsic +// tag : V6_vgtw_and +class Hexagon_v64iv64iv512v512_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v128iv128iv1024v1024_Intrinsic +// tag : V6_vgtw_and_128B +class Hexagon_v128iv128iv1024v1024_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v64iv64iv64i_Intrinsic +// tag : V6_pred_or +class Hexagon_v64iv64iv64i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v128iv128iv128i_Intrinsic +// tag : V6_pred_or_128B +class Hexagon_v128iv128iv128i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v64iv64i_Intrinsic +// tag : V6_pred_not +class Hexagon_v64iv64i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v128iv128i_Intrinsic +// tag : V6_pred_not_128B +class Hexagon_v128iv128i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v64ii_Intrinsic +// tag : V6_pred_scalar2 +class Hexagon_v64ii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v128ii_Intrinsic +// tag : V6_pred_scalar2_128B +class Hexagon_v128ii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v64iv512v512_Intrinsic +// tag : V6_vswap +class Hexagon_v1024v64iv512v512_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v2048v128iv1024v1024_Intrinsic +// tag : V6_vswap_128B +class Hexagon_v2048v128iv1024v1024_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v512v512i_Intrinsic +// tag : V6_vshuffvdd +class Hexagon_v1024v512v512i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v2048v1024v1024i_Intrinsic +// tag : V6_vshuffvdd_128B +class Hexagon_v2048v1024v1024i_Intrinsic + : Hexagon_Intrinsic; + + +// +// Hexagon_iv512i_Intrinsic +// tag : V6_extractw +class Hexagon_iv512i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_iv1024i_Intrinsic +// tag : V6_extractw_128B +class Hexagon_iv1024i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v512i_Intrinsic +// tag : V6_lvsplatw +class Hexagon_v512i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024i_Intrinsic +// tag : V6_lvsplatw_128B +class Hexagon_v1024i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v512v512LLii_Intrinsic +// tag : V6_vlutb +class Hexagon_v512v512LLii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v1024LLii_Intrinsic +// tag : V6_vlutb_128B +class Hexagon_v1024v1024LLii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v512v512v512LLii_Intrinsic +// tag : V6_vlutb_acc +class Hexagon_v512v512v512LLii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v1024v1024LLii_Intrinsic +// tag : V6_vlutb_acc_128B +class Hexagon_v1024v1024v1024LLii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v2048v2048LLii_Intrinsic +// tag : V6_vlutb_dv_128B +class Hexagon_v2048v2048LLii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v2048v2048v2048LLii_Intrinsic +// tag : V6_vlutb_dv_acc_128B +class Hexagon_v2048v2048v2048LLii_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v512v512v512v512i_Intrinsic +// tag : V6_vlutvvb_oracc +class Hexagon_v512v512v512v512i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v1024v1024v1024i_Intrinsic +// tag : V6_vlutvvb_oracc_128B +class Hexagon_v1024v1024v1024v1024i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v1024v1024v512v512i_Intrinsic +// tag : V6_vlutvwh_oracc +class Hexagon_v1024v1024v512v512i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_v2048v2048v1024v1024i_Intrinsic +// tag : V6_vlutvwh_oracc_128B +class Hexagon_v2048v2048v1024v1024i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_LLiLLiLLi_Intrinsic +// tag : M6_vabsdiffb +class Hexagon_LLiLLiLLi_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_LLii_Intrinsic +// tag : S6_vsplatrbp +class Hexagon_LLii_Intrinsic + : Hexagon_Intrinsic; + +// +// BUILTIN_INFO(HEXAGON.S6_rol_i_r,SI_ftype_SISI,2) +// tag : S6_rol_i_r +def int_hexagon_S6_rol_i_r : +Hexagon_iii_Intrinsic<"HEXAGON_S6_rol_i_r">; + +// +// BUILTIN_INFO(HEXAGON.S6_rol_i_p,DI_ftype_DISI,2) +// tag : S6_rol_i_p +def int_hexagon_S6_rol_i_p : +Hexagon_LLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p">; + +// +// BUILTIN_INFO(HEXAGON.S6_rol_i_r_acc,SI_ftype_SISISI,3) +// tag : S6_rol_i_r_acc +def int_hexagon_S6_rol_i_r_acc : +Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_acc">; + +// +// BUILTIN_INFO(HEXAGON.S6_rol_i_p_acc,DI_ftype_DIDISI,3) +// tag : S6_rol_i_p_acc +def int_hexagon_S6_rol_i_p_acc : +Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_acc">; + +// +// BUILTIN_INFO(HEXAGON.S6_rol_i_r_nac,SI_ftype_SISISI,3) +// tag : S6_rol_i_r_nac +def int_hexagon_S6_rol_i_r_nac : +Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_nac">; + +// +// BUILTIN_INFO(HEXAGON.S6_rol_i_p_nac,DI_ftype_DIDISI,3) +// tag : S6_rol_i_p_nac +def int_hexagon_S6_rol_i_p_nac : +Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_nac">; + +// +// BUILTIN_INFO(HEXAGON.S6_rol_i_r_xacc,SI_ftype_SISISI,3) +// tag : S6_rol_i_r_xacc +def int_hexagon_S6_rol_i_r_xacc : +Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_xacc">; + +// +// BUILTIN_INFO(HEXAGON.S6_rol_i_p_xacc,DI_ftype_DIDISI,3) +// tag : S6_rol_i_p_xacc +def int_hexagon_S6_rol_i_p_xacc : +Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_xacc">; + +// +// BUILTIN_INFO(HEXAGON.S6_rol_i_r_and,SI_ftype_SISISI,3) +// tag : S6_rol_i_r_and +def int_hexagon_S6_rol_i_r_and : +Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_and">; + +// +// BUILTIN_INFO(HEXAGON.S6_rol_i_r_or,SI_ftype_SISISI,3) +// tag : S6_rol_i_r_or +def int_hexagon_S6_rol_i_r_or : +Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_or">; + +// +// BUILTIN_INFO(HEXAGON.S6_rol_i_p_and,DI_ftype_DIDISI,3) +// tag : S6_rol_i_p_and +def int_hexagon_S6_rol_i_p_and : +Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_and">; + +// +// BUILTIN_INFO(HEXAGON.S6_rol_i_p_or,DI_ftype_DIDISI,3) +// tag : S6_rol_i_p_or +def int_hexagon_S6_rol_i_p_or : +Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_or">; + +// +// BUILTIN_INFO(HEXAGON.S2_cabacencbin,DI_ftype_DIDIQI,3) +// tag : S2_cabacencbin +def int_hexagon_S2_cabacencbin : +Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S2_cabacencbin">; + +// +// BUILTIN_INFO(HEXAGON.V6_valignb,VI_ftype_VIVISI,3) +// tag : V6_valignb +def int_hexagon_V6_valignb : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_valignb">; + +// +// BUILTIN_INFO(HEXAGON.V6_valignb_128B,VI_ftype_VIVISI,3) +// tag : V6_valignb_128B +def int_hexagon_V6_valignb_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_valignb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlalignb,VI_ftype_VIVISI,3) +// tag : V6_vlalignb +def int_hexagon_V6_vlalignb : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vlalignb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlalignb_128B,VI_ftype_VIVISI,3) +// tag : V6_vlalignb_128B +def int_hexagon_V6_vlalignb_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlalignb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_valignbi,VI_ftype_VIVISI,3) +// tag : V6_valignbi +def int_hexagon_V6_valignbi : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_valignbi">; + +// +// BUILTIN_INFO(HEXAGON.V6_valignbi_128B,VI_ftype_VIVISI,3) +// tag : V6_valignbi_128B +def int_hexagon_V6_valignbi_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_valignbi_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlalignbi,VI_ftype_VIVISI,3) +// tag : V6_vlalignbi +def int_hexagon_V6_vlalignbi : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vlalignbi">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlalignbi_128B,VI_ftype_VIVISI,3) +// tag : V6_vlalignbi_128B +def int_hexagon_V6_vlalignbi_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlalignbi_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vror,VI_ftype_VISI,2) +// tag : V6_vror +def int_hexagon_V6_vror : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vror">; + +// +// BUILTIN_INFO(HEXAGON.V6_vror_128B,VI_ftype_VISI,2) +// tag : V6_vror_128B +def int_hexagon_V6_vror_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vror_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vunpackub,VD_ftype_VI,1) +// tag : V6_vunpackub +def int_hexagon_V6_vunpackub : +Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackub">; + +// +// BUILTIN_INFO(HEXAGON.V6_vunpackub_128B,VD_ftype_VI,1) +// tag : V6_vunpackub_128B +def int_hexagon_V6_vunpackub_128B : +Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackub_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vunpackb,VD_ftype_VI,1) +// tag : V6_vunpackb +def int_hexagon_V6_vunpackb : +Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vunpackb_128B,VD_ftype_VI,1) +// tag : V6_vunpackb_128B +def int_hexagon_V6_vunpackb_128B : +Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vunpackuh,VD_ftype_VI,1) +// tag : V6_vunpackuh +def int_hexagon_V6_vunpackuh : +Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackuh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vunpackuh_128B,VD_ftype_VI,1) +// tag : V6_vunpackuh_128B +def int_hexagon_V6_vunpackuh_128B : +Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackuh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vunpackh,VD_ftype_VI,1) +// tag : V6_vunpackh +def int_hexagon_V6_vunpackh : +Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vunpackh_128B,VD_ftype_VI,1) +// tag : V6_vunpackh_128B +def int_hexagon_V6_vunpackh_128B : +Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vunpackob,VD_ftype_VDVI,2) +// tag : V6_vunpackob +def int_hexagon_V6_vunpackob : +Hexagon_v1024v1024v512_Intrinsic<"HEXAGON_V6_vunpackob">; + +// +// BUILTIN_INFO(HEXAGON.V6_vunpackob_128B,VD_ftype_VDVI,2) +// tag : V6_vunpackob_128B +def int_hexagon_V6_vunpackob_128B : +Hexagon_v2048v2048v1024_Intrinsic<"HEXAGON_V6_vunpackob_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vunpackoh,VD_ftype_VDVI,2) +// tag : V6_vunpackoh +def int_hexagon_V6_vunpackoh : +Hexagon_v1024v1024v512_Intrinsic<"HEXAGON_V6_vunpackoh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vunpackoh_128B,VD_ftype_VDVI,2) +// tag : V6_vunpackoh_128B +def int_hexagon_V6_vunpackoh_128B : +Hexagon_v2048v2048v1024_Intrinsic<"HEXAGON_V6_vunpackoh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpackeb,VI_ftype_VIVI,2) +// tag : V6_vpackeb +def int_hexagon_V6_vpackeb : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackeb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpackeb_128B,VI_ftype_VIVI,2) +// tag : V6_vpackeb_128B +def int_hexagon_V6_vpackeb_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackeb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpackeh,VI_ftype_VIVI,2) +// tag : V6_vpackeh +def int_hexagon_V6_vpackeh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackeh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpackeh_128B,VI_ftype_VIVI,2) +// tag : V6_vpackeh_128B +def int_hexagon_V6_vpackeh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackeh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpackob,VI_ftype_VIVI,2) +// tag : V6_vpackob +def int_hexagon_V6_vpackob : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackob">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpackob_128B,VI_ftype_VIVI,2) +// tag : V6_vpackob_128B +def int_hexagon_V6_vpackob_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackob_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpackoh,VI_ftype_VIVI,2) +// tag : V6_vpackoh +def int_hexagon_V6_vpackoh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackoh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpackoh_128B,VI_ftype_VIVI,2) +// tag : V6_vpackoh_128B +def int_hexagon_V6_vpackoh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackoh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpackhub_sat,VI_ftype_VIVI,2) +// tag : V6_vpackhub_sat +def int_hexagon_V6_vpackhub_sat : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackhub_sat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpackhub_sat_128B,VI_ftype_VIVI,2) +// tag : V6_vpackhub_sat_128B +def int_hexagon_V6_vpackhub_sat_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackhub_sat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpackhb_sat,VI_ftype_VIVI,2) +// tag : V6_vpackhb_sat +def int_hexagon_V6_vpackhb_sat : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackhb_sat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpackhb_sat_128B,VI_ftype_VIVI,2) +// tag : V6_vpackhb_sat_128B +def int_hexagon_V6_vpackhb_sat_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackhb_sat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpackwuh_sat,VI_ftype_VIVI,2) +// tag : V6_vpackwuh_sat +def int_hexagon_V6_vpackwuh_sat : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackwuh_sat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpackwuh_sat_128B,VI_ftype_VIVI,2) +// tag : V6_vpackwuh_sat_128B +def int_hexagon_V6_vpackwuh_sat_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackwuh_sat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpackwh_sat,VI_ftype_VIVI,2) +// tag : V6_vpackwh_sat +def int_hexagon_V6_vpackwh_sat : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackwh_sat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpackwh_sat_128B,VI_ftype_VIVI,2) +// tag : V6_vpackwh_sat_128B +def int_hexagon_V6_vpackwh_sat_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackwh_sat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vzb,VD_ftype_VI,1) +// tag : V6_vzb +def int_hexagon_V6_vzb : +Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vzb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vzb_128B,VD_ftype_VI,1) +// tag : V6_vzb_128B +def int_hexagon_V6_vzb_128B : +Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vzb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsb,VD_ftype_VI,1) +// tag : V6_vsb +def int_hexagon_V6_vsb : +Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vsb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsb_128B,VD_ftype_VI,1) +// tag : V6_vsb_128B +def int_hexagon_V6_vsb_128B : +Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vsb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vzh,VD_ftype_VI,1) +// tag : V6_vzh +def int_hexagon_V6_vzh : +Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vzh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vzh_128B,VD_ftype_VI,1) +// tag : V6_vzh_128B +def int_hexagon_V6_vzh_128B : +Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vzh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsh,VD_ftype_VI,1) +// tag : V6_vsh +def int_hexagon_V6_vsh : +Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vsh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsh_128B,VD_ftype_VI,1) +// tag : V6_vsh_128B +def int_hexagon_V6_vsh_128B : +Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vsh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpybus,VI_ftype_VISI,2) +// tag : V6_vdmpybus +def int_hexagon_V6_vdmpybus : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpybus">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpybus_128B,VI_ftype_VISI,2) +// tag : V6_vdmpybus_128B +def int_hexagon_V6_vdmpybus_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpybus_acc,VI_ftype_VIVISI,3) +// tag : V6_vdmpybus_acc +def int_hexagon_V6_vdmpybus_acc : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpybus_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpybus_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vdmpybus_acc_128B +def int_hexagon_V6_vdmpybus_acc_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv,VD_ftype_VDSI,2) +// tag : V6_vdmpybus_dv +def int_hexagon_V6_vdmpybus_dv : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv_128B,VD_ftype_VDSI,2) +// tag : V6_vdmpybus_dv_128B +def int_hexagon_V6_vdmpybus_dv_128B : +Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpybus_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv_acc,VD_ftype_VDVDSI,3) +// tag : V6_vdmpybus_dv_acc +def int_hexagon_V6_vdmpybus_dv_acc : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_dv_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv_acc_128B,VD_ftype_VDVDSI,3) +// tag : V6_vdmpybus_dv_acc_128B +def int_hexagon_V6_vdmpybus_dv_acc_128B : +Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpybus_dv_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhb,VI_ftype_VISI,2) +// tag : V6_vdmpyhb +def int_hexagon_V6_vdmpyhb : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_128B,VI_ftype_VISI,2) +// tag : V6_vdmpyhb_128B +def int_hexagon_V6_vdmpyhb_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_acc,VI_ftype_VIVISI,3) +// tag : V6_vdmpyhb_acc +def int_hexagon_V6_vdmpyhb_acc : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhb_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vdmpyhb_acc_128B +def int_hexagon_V6_vdmpyhb_acc_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv,VD_ftype_VDSI,2) +// tag : V6_vdmpyhb_dv +def int_hexagon_V6_vdmpyhb_dv : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv_128B,VD_ftype_VDSI,2) +// tag : V6_vdmpyhb_dv_128B +def int_hexagon_V6_vdmpyhb_dv_128B : +Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv_acc,VD_ftype_VDVDSI,3) +// tag : V6_vdmpyhb_dv_acc +def int_hexagon_V6_vdmpyhb_dv_acc : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv_acc_128B,VD_ftype_VDVDSI,3) +// tag : V6_vdmpyhb_dv_acc_128B +def int_hexagon_V6_vdmpyhb_dv_acc_128B : +Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat,VI_ftype_VIVI,2) +// tag : V6_vdmpyhvsat +def int_hexagon_V6_vdmpyhvsat : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vdmpyhvsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat_128B,VI_ftype_VIVI,2) +// tag : V6_vdmpyhvsat_128B +def int_hexagon_V6_vdmpyhvsat_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdmpyhvsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat_acc,VI_ftype_VIVIVI,3) +// tag : V6_vdmpyhvsat_acc +def int_hexagon_V6_vdmpyhvsat_acc : +Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vdmpyhvsat_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat_acc_128B,VI_ftype_VIVIVI,3) +// tag : V6_vdmpyhvsat_acc_128B +def int_hexagon_V6_vdmpyhvsat_acc_128B : +Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdmpyhvsat_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat,VI_ftype_VISI,2) +// tag : V6_vdmpyhsat +def int_hexagon_V6_vdmpyhsat : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat_128B,VI_ftype_VISI,2) +// tag : V6_vdmpyhsat_128B +def int_hexagon_V6_vdmpyhsat_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat_acc,VI_ftype_VIVISI,3) +// tag : V6_vdmpyhsat_acc +def int_hexagon_V6_vdmpyhsat_acc : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsat_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vdmpyhsat_acc_128B +def int_hexagon_V6_vdmpyhsat_acc_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsat_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat,VI_ftype_VDSI,2) +// tag : V6_vdmpyhisat +def int_hexagon_V6_vdmpyhisat : +Hexagon_v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhisat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat_128B,VI_ftype_VDSI,2) +// tag : V6_vdmpyhisat_128B +def int_hexagon_V6_vdmpyhisat_128B : +Hexagon_v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhisat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat_acc,VI_ftype_VIVDSI,3) +// tag : V6_vdmpyhisat_acc +def int_hexagon_V6_vdmpyhisat_acc : +Hexagon_v512v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhisat_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat_acc_128B,VI_ftype_VIVDSI,3) +// tag : V6_vdmpyhisat_acc_128B +def int_hexagon_V6_vdmpyhisat_acc_128B : +Hexagon_v1024v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhisat_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat,VI_ftype_VISI,2) +// tag : V6_vdmpyhsusat +def int_hexagon_V6_vdmpyhsusat : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsusat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat_128B,VI_ftype_VISI,2) +// tag : V6_vdmpyhsusat_128B +def int_hexagon_V6_vdmpyhsusat_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsusat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat_acc,VI_ftype_VIVISI,3) +// tag : V6_vdmpyhsusat_acc +def int_hexagon_V6_vdmpyhsusat_acc : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsusat_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vdmpyhsusat_acc_128B +def int_hexagon_V6_vdmpyhsusat_acc_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsusat_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat,VI_ftype_VDSI,2) +// tag : V6_vdmpyhsuisat +def int_hexagon_V6_vdmpyhsuisat : +Hexagon_v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat_128B,VI_ftype_VDSI,2) +// tag : V6_vdmpyhsuisat_128B +def int_hexagon_V6_vdmpyhsuisat_128B : +Hexagon_v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat_acc,VI_ftype_VIVDSI,3) +// tag : V6_vdmpyhsuisat_acc +def int_hexagon_V6_vdmpyhsuisat_acc : +Hexagon_v512v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat_acc_128B,VI_ftype_VIVDSI,3) +// tag : V6_vdmpyhsuisat_acc_128B +def int_hexagon_V6_vdmpyhsuisat_acc_128B : +Hexagon_v1024v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vtmpyb,VD_ftype_VDSI,2) +// tag : V6_vtmpyb +def int_hexagon_V6_vtmpyb : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vtmpyb_128B,VD_ftype_VDSI,2) +// tag : V6_vtmpyb_128B +def int_hexagon_V6_vtmpyb_128B : +Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vtmpyb_acc,VD_ftype_VDVDSI,3) +// tag : V6_vtmpyb_acc +def int_hexagon_V6_vtmpyb_acc : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyb_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vtmpyb_acc_128B,VD_ftype_VDVDSI,3) +// tag : V6_vtmpyb_acc_128B +def int_hexagon_V6_vtmpyb_acc_128B : +Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyb_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vtmpybus,VD_ftype_VDSI,2) +// tag : V6_vtmpybus +def int_hexagon_V6_vtmpybus : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpybus">; + +// +// BUILTIN_INFO(HEXAGON.V6_vtmpybus_128B,VD_ftype_VDSI,2) +// tag : V6_vtmpybus_128B +def int_hexagon_V6_vtmpybus_128B : +Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpybus_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vtmpybus_acc,VD_ftype_VDVDSI,3) +// tag : V6_vtmpybus_acc +def int_hexagon_V6_vtmpybus_acc : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpybus_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vtmpybus_acc_128B,VD_ftype_VDVDSI,3) +// tag : V6_vtmpybus_acc_128B +def int_hexagon_V6_vtmpybus_acc_128B : +Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpybus_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vtmpyhb,VD_ftype_VDSI,2) +// tag : V6_vtmpyhb +def int_hexagon_V6_vtmpyhb : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyhb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vtmpyhb_128B,VD_ftype_VDSI,2) +// tag : V6_vtmpyhb_128B +def int_hexagon_V6_vtmpyhb_128B : +Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyhb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vtmpyhb_acc,VD_ftype_VDVDSI,3) +// tag : V6_vtmpyhb_acc +def int_hexagon_V6_vtmpyhb_acc : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyhb_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vtmpyhb_acc_128B,VD_ftype_VDVDSI,3) +// tag : V6_vtmpyhb_acc_128B +def int_hexagon_V6_vtmpyhb_acc_128B : +Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyhb_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyub,VI_ftype_VISI,2) +// tag : V6_vrmpyub +def int_hexagon_V6_vrmpyub : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vrmpyub">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyub_128B,VI_ftype_VISI,2) +// tag : V6_vrmpyub_128B +def int_hexagon_V6_vrmpyub_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpyub_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyub_acc,VI_ftype_VIVISI,3) +// tag : V6_vrmpyub_acc +def int_hexagon_V6_vrmpyub_acc : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vrmpyub_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyub_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vrmpyub_acc_128B +def int_hexagon_V6_vrmpyub_acc_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpyub_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyubv,VI_ftype_VIVI,2) +// tag : V6_vrmpyubv +def int_hexagon_V6_vrmpyubv : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrmpyubv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyubv_128B,VI_ftype_VIVI,2) +// tag : V6_vrmpyubv_128B +def int_hexagon_V6_vrmpyubv_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpyubv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyubv_acc,VI_ftype_VIVIVI,3) +// tag : V6_vrmpyubv_acc +def int_hexagon_V6_vrmpyubv_acc : +Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vrmpyubv_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyubv_acc_128B,VI_ftype_VIVIVI,3) +// tag : V6_vrmpyubv_acc_128B +def int_hexagon_V6_vrmpyubv_acc_128B : +Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpyubv_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybv,VI_ftype_VIVI,2) +// tag : V6_vrmpybv +def int_hexagon_V6_vrmpybv : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybv_128B,VI_ftype_VIVI,2) +// tag : V6_vrmpybv_128B +def int_hexagon_V6_vrmpybv_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybv_acc,VI_ftype_VIVIVI,3) +// tag : V6_vrmpybv_acc +def int_hexagon_V6_vrmpybv_acc : +Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybv_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybv_acc_128B,VI_ftype_VIVIVI,3) +// tag : V6_vrmpybv_acc_128B +def int_hexagon_V6_vrmpybv_acc_128B : +Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybv_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyubi,VD_ftype_VDSISI,3) +// tag : V6_vrmpyubi +def int_hexagon_V6_vrmpyubi : +Hexagon_v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpyubi">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyubi_128B,VD_ftype_VDSISI,3) +// tag : V6_vrmpyubi_128B +def int_hexagon_V6_vrmpyubi_128B : +Hexagon_v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpyubi_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyubi_acc,VD_ftype_VDVDSISI,4) +// tag : V6_vrmpyubi_acc +def int_hexagon_V6_vrmpyubi_acc : +Hexagon_v1024v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpyubi_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyubi_acc_128B,VD_ftype_VDVDSISI,4) +// tag : V6_vrmpyubi_acc_128B +def int_hexagon_V6_vrmpyubi_acc_128B : +Hexagon_v2048v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpyubi_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybus,VI_ftype_VISI,2) +// tag : V6_vrmpybus +def int_hexagon_V6_vrmpybus : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vrmpybus">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybus_128B,VI_ftype_VISI,2) +// tag : V6_vrmpybus_128B +def int_hexagon_V6_vrmpybus_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpybus_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybus_acc,VI_ftype_VIVISI,3) +// tag : V6_vrmpybus_acc +def int_hexagon_V6_vrmpybus_acc : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vrmpybus_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybus_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vrmpybus_acc_128B +def int_hexagon_V6_vrmpybus_acc_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpybus_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybusi,VD_ftype_VDSISI,3) +// tag : V6_vrmpybusi +def int_hexagon_V6_vrmpybusi : +Hexagon_v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpybusi">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybusi_128B,VD_ftype_VDSISI,3) +// tag : V6_vrmpybusi_128B +def int_hexagon_V6_vrmpybusi_128B : +Hexagon_v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpybusi_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybusi_acc,VD_ftype_VDVDSISI,4) +// tag : V6_vrmpybusi_acc +def int_hexagon_V6_vrmpybusi_acc : +Hexagon_v1024v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpybusi_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybusi_acc_128B,VD_ftype_VDVDSISI,4) +// tag : V6_vrmpybusi_acc_128B +def int_hexagon_V6_vrmpybusi_acc_128B : +Hexagon_v2048v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpybusi_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybusv,VI_ftype_VIVI,2) +// tag : V6_vrmpybusv +def int_hexagon_V6_vrmpybusv : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybusv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybusv_128B,VI_ftype_VIVI,2) +// tag : V6_vrmpybusv_128B +def int_hexagon_V6_vrmpybusv_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybusv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybusv_acc,VI_ftype_VIVIVI,3) +// tag : V6_vrmpybusv_acc +def int_hexagon_V6_vrmpybusv_acc : +Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybusv_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybusv_acc_128B,VI_ftype_VIVIVI,3) +// tag : V6_vrmpybusv_acc_128B +def int_hexagon_V6_vrmpybusv_acc_128B : +Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybusv_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdsaduh,VD_ftype_VDSI,2) +// tag : V6_vdsaduh +def int_hexagon_V6_vdsaduh : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdsaduh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdsaduh_128B,VD_ftype_VDSI,2) +// tag : V6_vdsaduh_128B +def int_hexagon_V6_vdsaduh_128B : +Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vdsaduh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdsaduh_acc,VD_ftype_VDVDSI,3) +// tag : V6_vdsaduh_acc +def int_hexagon_V6_vdsaduh_acc : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdsaduh_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdsaduh_acc_128B,VD_ftype_VDVDSI,3) +// tag : V6_vdsaduh_acc_128B +def int_hexagon_V6_vdsaduh_acc_128B : +Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vdsaduh_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrsadubi,VD_ftype_VDSISI,3) +// tag : V6_vrsadubi +def int_hexagon_V6_vrsadubi : +Hexagon_v1024v1024ii_Intrinsic<"HEXAGON_V6_vrsadubi">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrsadubi_128B,VD_ftype_VDSISI,3) +// tag : V6_vrsadubi_128B +def int_hexagon_V6_vrsadubi_128B : +Hexagon_v2048v2048ii_Intrinsic<"HEXAGON_V6_vrsadubi_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrsadubi_acc,VD_ftype_VDVDSISI,4) +// tag : V6_vrsadubi_acc +def int_hexagon_V6_vrsadubi_acc : +Hexagon_v1024v1024v1024ii_Intrinsic<"HEXAGON_V6_vrsadubi_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrsadubi_acc_128B,VD_ftype_VDVDSISI,4) +// tag : V6_vrsadubi_acc_128B +def int_hexagon_V6_vrsadubi_acc_128B : +Hexagon_v2048v2048v2048ii_Intrinsic<"HEXAGON_V6_vrsadubi_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrw,VI_ftype_VISI,2) +// tag : V6_vasrw +def int_hexagon_V6_vasrw : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vasrw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrw_128B,VI_ftype_VISI,2) +// tag : V6_vasrw_128B +def int_hexagon_V6_vasrw_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vasrw_128B">; + + +// +// BUILTIN_INFO(HEXAGON.V6_vaslw,VI_ftype_VISI,2) +// tag : V6_vaslw +def int_hexagon_V6_vaslw : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vaslw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaslw_128B,VI_ftype_VISI,2) +// tag : V6_vaslw_128B +def int_hexagon_V6_vaslw_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vaslw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlsrw,VI_ftype_VISI,2) +// tag : V6_vlsrw +def int_hexagon_V6_vlsrw : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vlsrw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlsrw_128B,VI_ftype_VISI,2) +// tag : V6_vlsrw_128B +def int_hexagon_V6_vlsrw_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vlsrw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrwv,VI_ftype_VIVI,2) +// tag : V6_vasrwv +def int_hexagon_V6_vasrwv : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vasrwv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrwv_128B,VI_ftype_VIVI,2) +// tag : V6_vasrwv_128B +def int_hexagon_V6_vasrwv_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vasrwv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaslwv,VI_ftype_VIVI,2) +// tag : V6_vaslwv +def int_hexagon_V6_vaslwv : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaslwv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaslwv_128B,VI_ftype_VIVI,2) +// tag : V6_vaslwv_128B +def int_hexagon_V6_vaslwv_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaslwv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlsrwv,VI_ftype_VIVI,2) +// tag : V6_vlsrwv +def int_hexagon_V6_vlsrwv : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vlsrwv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlsrwv_128B,VI_ftype_VIVI,2) +// tag : V6_vlsrwv_128B +def int_hexagon_V6_vlsrwv_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vlsrwv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrh,VI_ftype_VISI,2) +// tag : V6_vasrh +def int_hexagon_V6_vasrh : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vasrh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrh_128B,VI_ftype_VISI,2) +// tag : V6_vasrh_128B +def int_hexagon_V6_vasrh_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vasrh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaslh,VI_ftype_VISI,2) +// tag : V6_vaslh +def int_hexagon_V6_vaslh : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vaslh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaslh_128B,VI_ftype_VISI,2) +// tag : V6_vaslh_128B +def int_hexagon_V6_vaslh_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vaslh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlsrh,VI_ftype_VISI,2) +// tag : V6_vlsrh +def int_hexagon_V6_vlsrh : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vlsrh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlsrh_128B,VI_ftype_VISI,2) +// tag : V6_vlsrh_128B +def int_hexagon_V6_vlsrh_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vlsrh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrhv,VI_ftype_VIVI,2) +// tag : V6_vasrhv +def int_hexagon_V6_vasrhv : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vasrhv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrhv_128B,VI_ftype_VIVI,2) +// tag : V6_vasrhv_128B +def int_hexagon_V6_vasrhv_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vasrhv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaslhv,VI_ftype_VIVI,2) +// tag : V6_vaslhv +def int_hexagon_V6_vaslhv : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaslhv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaslhv_128B,VI_ftype_VIVI,2) +// tag : V6_vaslhv_128B +def int_hexagon_V6_vaslhv_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaslhv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlsrhv,VI_ftype_VIVI,2) +// tag : V6_vlsrhv +def int_hexagon_V6_vlsrhv : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vlsrhv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlsrhv_128B,VI_ftype_VIVI,2) +// tag : V6_vlsrhv_128B +def int_hexagon_V6_vlsrhv_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vlsrhv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrwh,VI_ftype_VIVISI,3) +// tag : V6_vasrwh +def int_hexagon_V6_vasrwh : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrwh_128B,VI_ftype_VIVISI,3) +// tag : V6_vasrwh_128B +def int_hexagon_V6_vasrwh_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrwhsat,VI_ftype_VIVISI,3) +// tag : V6_vasrwhsat +def int_hexagon_V6_vasrwhsat : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwhsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrwhsat_128B,VI_ftype_VIVISI,3) +// tag : V6_vasrwhsat_128B +def int_hexagon_V6_vasrwhsat_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwhsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrwhrndsat,VI_ftype_VIVISI,3) +// tag : V6_vasrwhrndsat +def int_hexagon_V6_vasrwhrndsat : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwhrndsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrwhrndsat_128B,VI_ftype_VIVISI,3) +// tag : V6_vasrwhrndsat_128B +def int_hexagon_V6_vasrwhrndsat_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwhrndsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrwuhsat,VI_ftype_VIVISI,3) +// tag : V6_vasrwuhsat +def int_hexagon_V6_vasrwuhsat : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwuhsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrwuhsat_128B,VI_ftype_VIVISI,3) +// tag : V6_vasrwuhsat_128B +def int_hexagon_V6_vasrwuhsat_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwuhsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vroundwh,VI_ftype_VIVI,2) +// tag : V6_vroundwh +def int_hexagon_V6_vroundwh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundwh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vroundwh_128B,VI_ftype_VIVI,2) +// tag : V6_vroundwh_128B +def int_hexagon_V6_vroundwh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundwh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vroundwuh,VI_ftype_VIVI,2) +// tag : V6_vroundwuh +def int_hexagon_V6_vroundwuh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundwuh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vroundwuh_128B,VI_ftype_VIVI,2) +// tag : V6_vroundwuh_128B +def int_hexagon_V6_vroundwuh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundwuh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrhubsat,VI_ftype_VIVISI,3) +// tag : V6_vasrhubsat +def int_hexagon_V6_vasrhubsat : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhubsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrhubsat_128B,VI_ftype_VIVISI,3) +// tag : V6_vasrhubsat_128B +def int_hexagon_V6_vasrhubsat_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhubsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrhubrndsat,VI_ftype_VIVISI,3) +// tag : V6_vasrhubrndsat +def int_hexagon_V6_vasrhubrndsat : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhubrndsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrhubrndsat_128B,VI_ftype_VIVISI,3) +// tag : V6_vasrhubrndsat_128B +def int_hexagon_V6_vasrhubrndsat_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhubrndsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrhbrndsat,VI_ftype_VIVISI,3) +// tag : V6_vasrhbrndsat +def int_hexagon_V6_vasrhbrndsat : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhbrndsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrhbrndsat_128B,VI_ftype_VIVISI,3) +// tag : V6_vasrhbrndsat_128B +def int_hexagon_V6_vasrhbrndsat_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhbrndsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vroundhb,VI_ftype_VIVI,2) +// tag : V6_vroundhb +def int_hexagon_V6_vroundhb : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundhb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vroundhb_128B,VI_ftype_VIVI,2) +// tag : V6_vroundhb_128B +def int_hexagon_V6_vroundhb_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundhb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vroundhub,VI_ftype_VIVI,2) +// tag : V6_vroundhub +def int_hexagon_V6_vroundhub : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundhub">; + +// +// BUILTIN_INFO(HEXAGON.V6_vroundhub_128B,VI_ftype_VIVI,2) +// tag : V6_vroundhub_128B +def int_hexagon_V6_vroundhub_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundhub_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaslw_acc,VI_ftype_VIVISI,3) +// tag : V6_vaslw_acc +def int_hexagon_V6_vaslw_acc : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vaslw_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaslw_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vaslw_acc_128B +def int_hexagon_V6_vaslw_acc_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vaslw_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrw_acc,VI_ftype_VIVISI,3) +// tag : V6_vasrw_acc +def int_hexagon_V6_vasrw_acc : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrw_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrw_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vasrw_acc_128B +def int_hexagon_V6_vasrw_acc_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrw_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddb,VI_ftype_VIVI,2) +// tag : V6_vaddb +def int_hexagon_V6_vaddb : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddb_128B,VI_ftype_VIVI,2) +// tag : V6_vaddb_128B +def int_hexagon_V6_vaddb_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubb,VI_ftype_VIVI,2) +// tag : V6_vsubb +def int_hexagon_V6_vsubb : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubb_128B,VI_ftype_VIVI,2) +// tag : V6_vsubb_128B +def int_hexagon_V6_vsubb_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddb_dv,VD_ftype_VDVD,2) +// tag : V6_vaddb_dv +def int_hexagon_V6_vaddb_dv : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddb_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddb_dv_128B,VD_ftype_VDVD,2) +// tag : V6_vaddb_dv_128B +def int_hexagon_V6_vaddb_dv_128B : +Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddb_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubb_dv,VD_ftype_VDVD,2) +// tag : V6_vsubb_dv +def int_hexagon_V6_vsubb_dv : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubb_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubb_dv_128B,VD_ftype_VDVD,2) +// tag : V6_vsubb_dv_128B +def int_hexagon_V6_vsubb_dv_128B : +Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubb_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddh,VI_ftype_VIVI,2) +// tag : V6_vaddh +def int_hexagon_V6_vaddh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddh_128B,VI_ftype_VIVI,2) +// tag : V6_vaddh_128B +def int_hexagon_V6_vaddh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubh,VI_ftype_VIVI,2) +// tag : V6_vsubh +def int_hexagon_V6_vsubh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubh_128B,VI_ftype_VIVI,2) +// tag : V6_vsubh_128B +def int_hexagon_V6_vsubh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddh_dv,VD_ftype_VDVD,2) +// tag : V6_vaddh_dv +def int_hexagon_V6_vaddh_dv : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddh_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddh_dv_128B,VD_ftype_VDVD,2) +// tag : V6_vaddh_dv_128B +def int_hexagon_V6_vaddh_dv_128B : +Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddh_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubh_dv,VD_ftype_VDVD,2) +// tag : V6_vsubh_dv +def int_hexagon_V6_vsubh_dv : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubh_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubh_dv_128B,VD_ftype_VDVD,2) +// tag : V6_vsubh_dv_128B +def int_hexagon_V6_vsubh_dv_128B : +Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubh_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddw,VI_ftype_VIVI,2) +// tag : V6_vaddw +def int_hexagon_V6_vaddw : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddw_128B,VI_ftype_VIVI,2) +// tag : V6_vaddw_128B +def int_hexagon_V6_vaddw_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubw,VI_ftype_VIVI,2) +// tag : V6_vsubw +def int_hexagon_V6_vsubw : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubw_128B,VI_ftype_VIVI,2) +// tag : V6_vsubw_128B +def int_hexagon_V6_vsubw_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddw_dv,VD_ftype_VDVD,2) +// tag : V6_vaddw_dv +def int_hexagon_V6_vaddw_dv : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddw_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddw_dv_128B,VD_ftype_VDVD,2) +// tag : V6_vaddw_dv_128B +def int_hexagon_V6_vaddw_dv_128B : +Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddw_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubw_dv,VD_ftype_VDVD,2) +// tag : V6_vsubw_dv +def int_hexagon_V6_vsubw_dv : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubw_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubw_dv_128B,VD_ftype_VDVD,2) +// tag : V6_vsubw_dv_128B +def int_hexagon_V6_vsubw_dv_128B : +Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubw_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddubsat,VI_ftype_VIVI,2) +// tag : V6_vaddubsat +def int_hexagon_V6_vaddubsat : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddubsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddubsat_128B,VI_ftype_VIVI,2) +// tag : V6_vaddubsat_128B +def int_hexagon_V6_vaddubsat_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddubsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddubsat_dv,VD_ftype_VDVD,2) +// tag : V6_vaddubsat_dv +def int_hexagon_V6_vaddubsat_dv : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddubsat_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddubsat_dv_128B,VD_ftype_VDVD,2) +// tag : V6_vaddubsat_dv_128B +def int_hexagon_V6_vaddubsat_dv_128B : +Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddubsat_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsububsat,VI_ftype_VIVI,2) +// tag : V6_vsububsat +def int_hexagon_V6_vsububsat : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsububsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsububsat_128B,VI_ftype_VIVI,2) +// tag : V6_vsububsat_128B +def int_hexagon_V6_vsububsat_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsububsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsububsat_dv,VD_ftype_VDVD,2) +// tag : V6_vsububsat_dv +def int_hexagon_V6_vsububsat_dv : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsububsat_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsububsat_dv_128B,VD_ftype_VDVD,2) +// tag : V6_vsububsat_dv_128B +def int_hexagon_V6_vsububsat_dv_128B : +Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsububsat_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vadduhsat,VI_ftype_VIVI,2) +// tag : V6_vadduhsat +def int_hexagon_V6_vadduhsat : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vadduhsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vadduhsat_128B,VI_ftype_VIVI,2) +// tag : V6_vadduhsat_128B +def int_hexagon_V6_vadduhsat_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vadduhsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vadduhsat_dv,VD_ftype_VDVD,2) +// tag : V6_vadduhsat_dv +def int_hexagon_V6_vadduhsat_dv : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vadduhsat_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vadduhsat_dv_128B,VD_ftype_VDVD,2) +// tag : V6_vadduhsat_dv_128B +def int_hexagon_V6_vadduhsat_dv_128B : +Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vadduhsat_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubuhsat,VI_ftype_VIVI,2) +// tag : V6_vsubuhsat +def int_hexagon_V6_vsubuhsat : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubuhsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubuhsat_128B,VI_ftype_VIVI,2) +// tag : V6_vsubuhsat_128B +def int_hexagon_V6_vsubuhsat_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubuhsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubuhsat_dv,VD_ftype_VDVD,2) +// tag : V6_vsubuhsat_dv +def int_hexagon_V6_vsubuhsat_dv : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubuhsat_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubuhsat_dv_128B,VD_ftype_VDVD,2) +// tag : V6_vsubuhsat_dv_128B +def int_hexagon_V6_vsubuhsat_dv_128B : +Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubuhsat_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddhsat,VI_ftype_VIVI,2) +// tag : V6_vaddhsat +def int_hexagon_V6_vaddhsat : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddhsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddhsat_128B,VI_ftype_VIVI,2) +// tag : V6_vaddhsat_128B +def int_hexagon_V6_vaddhsat_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddhsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddhsat_dv,VD_ftype_VDVD,2) +// tag : V6_vaddhsat_dv +def int_hexagon_V6_vaddhsat_dv : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddhsat_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddhsat_dv_128B,VD_ftype_VDVD,2) +// tag : V6_vaddhsat_dv_128B +def int_hexagon_V6_vaddhsat_dv_128B : +Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddhsat_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubhsat,VI_ftype_VIVI,2) +// tag : V6_vsubhsat +def int_hexagon_V6_vsubhsat : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubhsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubhsat_128B,VI_ftype_VIVI,2) +// tag : V6_vsubhsat_128B +def int_hexagon_V6_vsubhsat_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubhsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubhsat_dv,VD_ftype_VDVD,2) +// tag : V6_vsubhsat_dv +def int_hexagon_V6_vsubhsat_dv : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubhsat_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubhsat_dv_128B,VD_ftype_VDVD,2) +// tag : V6_vsubhsat_dv_128B +def int_hexagon_V6_vsubhsat_dv_128B : +Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubhsat_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddwsat,VI_ftype_VIVI,2) +// tag : V6_vaddwsat +def int_hexagon_V6_vaddwsat : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddwsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddwsat_128B,VI_ftype_VIVI,2) +// tag : V6_vaddwsat_128B +def int_hexagon_V6_vaddwsat_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddwsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddwsat_dv,VD_ftype_VDVD,2) +// tag : V6_vaddwsat_dv +def int_hexagon_V6_vaddwsat_dv : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddwsat_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddwsat_dv_128B,VD_ftype_VDVD,2) +// tag : V6_vaddwsat_dv_128B +def int_hexagon_V6_vaddwsat_dv_128B : +Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddwsat_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubwsat,VI_ftype_VIVI,2) +// tag : V6_vsubwsat +def int_hexagon_V6_vsubwsat : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubwsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubwsat_128B,VI_ftype_VIVI,2) +// tag : V6_vsubwsat_128B +def int_hexagon_V6_vsubwsat_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubwsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubwsat_dv,VD_ftype_VDVD,2) +// tag : V6_vsubwsat_dv +def int_hexagon_V6_vsubwsat_dv : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubwsat_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubwsat_dv_128B,VD_ftype_VDVD,2) +// tag : V6_vsubwsat_dv_128B +def int_hexagon_V6_vsubwsat_dv_128B : +Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubwsat_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgub,VI_ftype_VIVI,2) +// tag : V6_vavgub +def int_hexagon_V6_vavgub : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgub">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgub_128B,VI_ftype_VIVI,2) +// tag : V6_vavgub_128B +def int_hexagon_V6_vavgub_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgub_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgubrnd,VI_ftype_VIVI,2) +// tag : V6_vavgubrnd +def int_hexagon_V6_vavgubrnd : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgubrnd">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgubrnd_128B,VI_ftype_VIVI,2) +// tag : V6_vavgubrnd_128B +def int_hexagon_V6_vavgubrnd_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgubrnd_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavguh,VI_ftype_VIVI,2) +// tag : V6_vavguh +def int_hexagon_V6_vavguh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavguh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavguh_128B,VI_ftype_VIVI,2) +// tag : V6_vavguh_128B +def int_hexagon_V6_vavguh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavguhrnd,VI_ftype_VIVI,2) +// tag : V6_vavguhrnd +def int_hexagon_V6_vavguhrnd : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavguhrnd">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavguhrnd_128B,VI_ftype_VIVI,2) +// tag : V6_vavguhrnd_128B +def int_hexagon_V6_vavguhrnd_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguhrnd_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgh,VI_ftype_VIVI,2) +// tag : V6_vavgh +def int_hexagon_V6_vavgh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgh_128B,VI_ftype_VIVI,2) +// tag : V6_vavgh_128B +def int_hexagon_V6_vavgh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavghrnd,VI_ftype_VIVI,2) +// tag : V6_vavghrnd +def int_hexagon_V6_vavghrnd : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavghrnd">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavghrnd_128B,VI_ftype_VIVI,2) +// tag : V6_vavghrnd_128B +def int_hexagon_V6_vavghrnd_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavghrnd_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vnavgh,VI_ftype_VIVI,2) +// tag : V6_vnavgh +def int_hexagon_V6_vnavgh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vnavgh_128B,VI_ftype_VIVI,2) +// tag : V6_vnavgh_128B +def int_hexagon_V6_vnavgh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgw,VI_ftype_VIVI,2) +// tag : V6_vavgw +def int_hexagon_V6_vavgw : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgw_128B,VI_ftype_VIVI,2) +// tag : V6_vavgw_128B +def int_hexagon_V6_vavgw_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgwrnd,VI_ftype_VIVI,2) +// tag : V6_vavgwrnd +def int_hexagon_V6_vavgwrnd : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgwrnd">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgwrnd_128B,VI_ftype_VIVI,2) +// tag : V6_vavgwrnd_128B +def int_hexagon_V6_vavgwrnd_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgwrnd_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vnavgw,VI_ftype_VIVI,2) +// tag : V6_vnavgw +def int_hexagon_V6_vnavgw : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vnavgw_128B,VI_ftype_VIVI,2) +// tag : V6_vnavgw_128B +def int_hexagon_V6_vnavgw_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsdiffub,VI_ftype_VIVI,2) +// tag : V6_vabsdiffub +def int_hexagon_V6_vabsdiffub : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffub">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsdiffub_128B,VI_ftype_VIVI,2) +// tag : V6_vabsdiffub_128B +def int_hexagon_V6_vabsdiffub_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffub_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsdiffuh,VI_ftype_VIVI,2) +// tag : V6_vabsdiffuh +def int_hexagon_V6_vabsdiffuh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffuh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsdiffuh_128B,VI_ftype_VIVI,2) +// tag : V6_vabsdiffuh_128B +def int_hexagon_V6_vabsdiffuh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffuh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsdiffh,VI_ftype_VIVI,2) +// tag : V6_vabsdiffh +def int_hexagon_V6_vabsdiffh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsdiffh_128B,VI_ftype_VIVI,2) +// tag : V6_vabsdiffh_128B +def int_hexagon_V6_vabsdiffh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsdiffw,VI_ftype_VIVI,2) +// tag : V6_vabsdiffw +def int_hexagon_V6_vabsdiffw : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsdiffw_128B,VI_ftype_VIVI,2) +// tag : V6_vabsdiffw_128B +def int_hexagon_V6_vabsdiffw_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vnavgub,VI_ftype_VIVI,2) +// tag : V6_vnavgub +def int_hexagon_V6_vnavgub : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgub">; + +// +// BUILTIN_INFO(HEXAGON.V6_vnavgub_128B,VI_ftype_VIVI,2) +// tag : V6_vnavgub_128B +def int_hexagon_V6_vnavgub_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgub_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddubh,VD_ftype_VIVI,2) +// tag : V6_vaddubh +def int_hexagon_V6_vaddubh : +Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vaddubh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddubh_128B,VD_ftype_VIVI,2) +// tag : V6_vaddubh_128B +def int_hexagon_V6_vaddubh_128B : +Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vaddubh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsububh,VD_ftype_VIVI,2) +// tag : V6_vsububh +def int_hexagon_V6_vsububh : +Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vsububh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsububh_128B,VD_ftype_VIVI,2) +// tag : V6_vsububh_128B +def int_hexagon_V6_vsububh_128B : +Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vsububh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddhw,VD_ftype_VIVI,2) +// tag : V6_vaddhw +def int_hexagon_V6_vaddhw : +Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vaddhw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddhw_128B,VD_ftype_VIVI,2) +// tag : V6_vaddhw_128B +def int_hexagon_V6_vaddhw_128B : +Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vaddhw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubhw,VD_ftype_VIVI,2) +// tag : V6_vsubhw +def int_hexagon_V6_vsubhw : +Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vsubhw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubhw_128B,VD_ftype_VIVI,2) +// tag : V6_vsubhw_128B +def int_hexagon_V6_vsubhw_128B : +Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vsubhw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vadduhw,VD_ftype_VIVI,2) +// tag : V6_vadduhw +def int_hexagon_V6_vadduhw : +Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vadduhw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vadduhw_128B,VD_ftype_VIVI,2) +// tag : V6_vadduhw_128B +def int_hexagon_V6_vadduhw_128B : +Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vadduhw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubuhw,VD_ftype_VIVI,2) +// tag : V6_vsubuhw +def int_hexagon_V6_vsubuhw : +Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vsubuhw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubuhw_128B,VD_ftype_VIVI,2) +// tag : V6_vsubuhw_128B +def int_hexagon_V6_vsubuhw_128B : +Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vsubuhw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vd0,VI_ftype_,0) +// tag : V6_vd0 +def int_hexagon_V6_vd0 : +Hexagon_v512_Intrinsic<"HEXAGON_V6_vd0">; + +// +// BUILTIN_INFO(HEXAGON.V6_vd0_128B,VI_ftype_,0) +// tag : V6_vd0_128B +def int_hexagon_V6_vd0_128B : +Hexagon_v1024_Intrinsic<"HEXAGON_V6_vd0_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddbq,VI_ftype_QVVIVI,3) +// tag : V6_vaddbq +def int_hexagon_V6_vaddbq : +Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddbq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddbq_128B,VI_ftype_QVVIVI,3) +// tag : V6_vaddbq_128B +def int_hexagon_V6_vaddbq_128B : +Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddbq_128B">; + + +// +// BUILTIN_INFO(HEXAGON.V6_vsubbq,VI_ftype_QVVIVI,3) +// tag : V6_vsubbq +def int_hexagon_V6_vsubbq : +Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubbq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubbq_128B,VI_ftype_QVVIVI,3) +// tag : V6_vsubbq_128B +def int_hexagon_V6_vsubbq_128B : +Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubbq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddbnq,VI_ftype_QVVIVI,3) +// tag : V6_vaddbnq +def int_hexagon_V6_vaddbnq : +Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddbnq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddbnq_128B,VI_ftype_QVVIVI,3) +// tag : V6_vaddbnq_128B +def int_hexagon_V6_vaddbnq_128B : +Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddbnq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubbnq,VI_ftype_QVVIVI,3) +// tag : V6_vsubbnq +def int_hexagon_V6_vsubbnq : +Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubbnq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubbnq_128B,VI_ftype_QVVIVI,3) +// tag : V6_vsubbnq_128B +def int_hexagon_V6_vsubbnq_128B : +Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubbnq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddhq,VI_ftype_QVVIVI,3) +// tag : V6_vaddhq +def int_hexagon_V6_vaddhq : +Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddhq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddhq_128B,VI_ftype_QVVIVI,3) +// tag : V6_vaddhq_128B +def int_hexagon_V6_vaddhq_128B : +Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddhq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubhq,VI_ftype_QVVIVI,3) +// tag : V6_vsubhq +def int_hexagon_V6_vsubhq : +Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubhq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubhq_128B,VI_ftype_QVVIVI,3) +// tag : V6_vsubhq_128B +def int_hexagon_V6_vsubhq_128B : +Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubhq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddhnq,VI_ftype_QVVIVI,3) +// tag : V6_vaddhnq +def int_hexagon_V6_vaddhnq : +Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddhnq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddhnq_128B,VI_ftype_QVVIVI,3) +// tag : V6_vaddhnq_128B +def int_hexagon_V6_vaddhnq_128B : +Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddhnq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubhnq,VI_ftype_QVVIVI,3) +// tag : V6_vsubhnq +def int_hexagon_V6_vsubhnq : +Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubhnq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubhnq_128B,VI_ftype_QVVIVI,3) +// tag : V6_vsubhnq_128B +def int_hexagon_V6_vsubhnq_128B : +Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubhnq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddwq,VI_ftype_QVVIVI,3) +// tag : V6_vaddwq +def int_hexagon_V6_vaddwq : +Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddwq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddwq_128B,VI_ftype_QVVIVI,3) +// tag : V6_vaddwq_128B +def int_hexagon_V6_vaddwq_128B : +Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddwq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubwq,VI_ftype_QVVIVI,3) +// tag : V6_vsubwq +def int_hexagon_V6_vsubwq : +Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubwq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubwq_128B,VI_ftype_QVVIVI,3) +// tag : V6_vsubwq_128B +def int_hexagon_V6_vsubwq_128B : +Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubwq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddwnq,VI_ftype_QVVIVI,3) +// tag : V6_vaddwnq +def int_hexagon_V6_vaddwnq : +Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddwnq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddwnq_128B,VI_ftype_QVVIVI,3) +// tag : V6_vaddwnq_128B +def int_hexagon_V6_vaddwnq_128B : +Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddwnq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubwnq,VI_ftype_QVVIVI,3) +// tag : V6_vsubwnq +def int_hexagon_V6_vsubwnq : +Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubwnq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubwnq_128B,VI_ftype_QVVIVI,3) +// tag : V6_vsubwnq_128B +def int_hexagon_V6_vsubwnq_128B : +Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubwnq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsh,VI_ftype_VI,1) +// tag : V6_vabsh +def int_hexagon_V6_vabsh : +Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsh_128B,VI_ftype_VI,1) +// tag : V6_vabsh_128B +def int_hexagon_V6_vabsh_128B : +Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsh_sat,VI_ftype_VI,1) +// tag : V6_vabsh_sat +def int_hexagon_V6_vabsh_sat : +Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsh_sat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsh_sat_128B,VI_ftype_VI,1) +// tag : V6_vabsh_sat_128B +def int_hexagon_V6_vabsh_sat_128B : +Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsh_sat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsw,VI_ftype_VI,1) +// tag : V6_vabsw +def int_hexagon_V6_vabsw : +Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsw_128B,VI_ftype_VI,1) +// tag : V6_vabsw_128B +def int_hexagon_V6_vabsw_128B : +Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsw_sat,VI_ftype_VI,1) +// tag : V6_vabsw_sat +def int_hexagon_V6_vabsw_sat : +Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsw_sat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsw_sat_128B,VI_ftype_VI,1) +// tag : V6_vabsw_sat_128B +def int_hexagon_V6_vabsw_sat_128B : +Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsw_sat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpybv,VD_ftype_VIVI,2) +// tag : V6_vmpybv +def int_hexagon_V6_vmpybv : +Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpybv_128B,VD_ftype_VIVI,2) +// tag : V6_vmpybv_128B +def int_hexagon_V6_vmpybv_128B : +Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpybv_acc,VD_ftype_VDVIVI,3) +// tag : V6_vmpybv_acc +def int_hexagon_V6_vmpybv_acc : +Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybv_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpybv_acc_128B,VD_ftype_VDVIVI,3) +// tag : V6_vmpybv_acc_128B +def int_hexagon_V6_vmpybv_acc_128B : +Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybv_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyubv,VD_ftype_VIVI,2) +// tag : V6_vmpyubv +def int_hexagon_V6_vmpyubv : +Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyubv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyubv_128B,VD_ftype_VIVI,2) +// tag : V6_vmpyubv_128B +def int_hexagon_V6_vmpyubv_128B : +Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyubv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyubv_acc,VD_ftype_VDVIVI,3) +// tag : V6_vmpyubv_acc +def int_hexagon_V6_vmpyubv_acc : +Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyubv_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyubv_acc_128B,VD_ftype_VDVIVI,3) +// tag : V6_vmpyubv_acc_128B +def int_hexagon_V6_vmpyubv_acc_128B : +Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyubv_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpybusv,VD_ftype_VIVI,2) +// tag : V6_vmpybusv +def int_hexagon_V6_vmpybusv : +Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybusv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpybusv_128B,VD_ftype_VIVI,2) +// tag : V6_vmpybusv_128B +def int_hexagon_V6_vmpybusv_128B : +Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybusv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpybusv_acc,VD_ftype_VDVIVI,3) +// tag : V6_vmpybusv_acc +def int_hexagon_V6_vmpybusv_acc : +Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybusv_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpybusv_acc_128B,VD_ftype_VDVIVI,3) +// tag : V6_vmpybusv_acc_128B +def int_hexagon_V6_vmpybusv_acc_128B : +Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybusv_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpabusv,VD_ftype_VDVD,2) +// tag : V6_vmpabusv +def int_hexagon_V6_vmpabusv : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpabusv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpabusv_128B,VD_ftype_VDVD,2) +// tag : V6_vmpabusv_128B +def int_hexagon_V6_vmpabusv_128B : +Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vmpabusv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpabuuv,VD_ftype_VDVD,2) +// tag : V6_vmpabuuv +def int_hexagon_V6_vmpabuuv : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpabuuv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpabuuv_128B,VD_ftype_VDVD,2) +// tag : V6_vmpabuuv_128B +def int_hexagon_V6_vmpabuuv_128B : +Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vmpabuuv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyhv,VD_ftype_VIVI,2) +// tag : V6_vmpyhv +def int_hexagon_V6_vmpyhv : +Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyhv_128B,VD_ftype_VIVI,2) +// tag : V6_vmpyhv_128B +def int_hexagon_V6_vmpyhv_128B : +Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyhv_acc,VD_ftype_VDVIVI,3) +// tag : V6_vmpyhv_acc +def int_hexagon_V6_vmpyhv_acc : +Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhv_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyhv_acc_128B,VD_ftype_VDVIVI,3) +// tag : V6_vmpyhv_acc_128B +def int_hexagon_V6_vmpyhv_acc_128B : +Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhv_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyuhv,VD_ftype_VIVI,2) +// tag : V6_vmpyuhv +def int_hexagon_V6_vmpyuhv : +Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyuhv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyuhv_128B,VD_ftype_VIVI,2) +// tag : V6_vmpyuhv_128B +def int_hexagon_V6_vmpyuhv_128B : +Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyuhv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyuhv_acc,VD_ftype_VDVIVI,3) +// tag : V6_vmpyuhv_acc +def int_hexagon_V6_vmpyuhv_acc : +Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyuhv_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyuhv_acc_128B,VD_ftype_VDVIVI,3) +// tag : V6_vmpyuhv_acc_128B +def int_hexagon_V6_vmpyuhv_acc_128B : +Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyuhv_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyhvsrs,VI_ftype_VIVI,2) +// tag : V6_vmpyhvsrs +def int_hexagon_V6_vmpyhvsrs : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyhvsrs">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyhvsrs_128B,VI_ftype_VIVI,2) +// tag : V6_vmpyhvsrs_128B +def int_hexagon_V6_vmpyhvsrs_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhvsrs_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyhus,VD_ftype_VIVI,2) +// tag : V6_vmpyhus +def int_hexagon_V6_vmpyhus : +Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhus">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyhus_128B,VD_ftype_VIVI,2) +// tag : V6_vmpyhus_128B +def int_hexagon_V6_vmpyhus_128B : +Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhus_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyhus_acc,VD_ftype_VDVIVI,3) +// tag : V6_vmpyhus_acc +def int_hexagon_V6_vmpyhus_acc : +Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhus_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyhus_acc_128B,VD_ftype_VDVIVI,3) +// tag : V6_vmpyhus_acc_128B +def int_hexagon_V6_vmpyhus_acc_128B : +Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhus_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyih,VI_ftype_VIVI,2) +// tag : V6_vmpyih +def int_hexagon_V6_vmpyih : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyih">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyih_128B,VI_ftype_VIVI,2) +// tag : V6_vmpyih_128B +def int_hexagon_V6_vmpyih_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyih_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyih_acc,VI_ftype_VIVIVI,3) +// tag : V6_vmpyih_acc +def int_hexagon_V6_vmpyih_acc : +Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyih_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyih_acc_128B,VI_ftype_VIVIVI,3) +// tag : V6_vmpyih_acc_128B +def int_hexagon_V6_vmpyih_acc_128B : +Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyih_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyewuh,VI_ftype_VIVI,2) +// tag : V6_vmpyewuh +def int_hexagon_V6_vmpyewuh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyewuh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyewuh_128B,VI_ftype_VIVI,2) +// tag : V6_vmpyewuh_128B +def int_hexagon_V6_vmpyewuh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyewuh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyowh,VI_ftype_VIVI,2) +// tag : V6_vmpyowh +def int_hexagon_V6_vmpyowh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyowh_128B,VI_ftype_VIVI,2) +// tag : V6_vmpyowh_128B +def int_hexagon_V6_vmpyowh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd,VI_ftype_VIVI,2) +// tag : V6_vmpyowh_rnd +def int_hexagon_V6_vmpyowh_rnd : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_rnd">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd_128B,VI_ftype_VIVI,2) +// tag : V6_vmpyowh_rnd_128B +def int_hexagon_V6_vmpyowh_rnd_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyowh_sacc,VI_ftype_VIVIVI,3) +// tag : V6_vmpyowh_sacc +def int_hexagon_V6_vmpyowh_sacc : +Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_sacc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyowh_sacc_128B,VI_ftype_VIVIVI,3) +// tag : V6_vmpyowh_sacc_128B +def int_hexagon_V6_vmpyowh_sacc_128B : +Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_sacc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd_sacc,VI_ftype_VIVIVI,3) +// tag : V6_vmpyowh_rnd_sacc +def int_hexagon_V6_vmpyowh_rnd_sacc : +Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_sacc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd_sacc_128B,VI_ftype_VIVIVI,3) +// tag : V6_vmpyowh_rnd_sacc_128B +def int_hexagon_V6_vmpyowh_rnd_sacc_128B : +Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_sacc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyieoh,VI_ftype_VIVI,2) +// tag : V6_vmpyieoh +def int_hexagon_V6_vmpyieoh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyieoh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyieoh_128B,VI_ftype_VIVI,2) +// tag : V6_vmpyieoh_128B +def int_hexagon_V6_vmpyieoh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyieoh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh,VI_ftype_VIVI,2) +// tag : V6_vmpyiewuh +def int_hexagon_V6_vmpyiewuh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiewuh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh_128B,VI_ftype_VIVI,2) +// tag : V6_vmpyiewuh_128B +def int_hexagon_V6_vmpyiewuh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiewuh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyiowh,VI_ftype_VIVI,2) +// tag : V6_vmpyiowh +def int_hexagon_V6_vmpyiowh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiowh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyiowh_128B,VI_ftype_VIVI,2) +// tag : V6_vmpyiowh_128B +def int_hexagon_V6_vmpyiowh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiowh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyiewh_acc,VI_ftype_VIVIVI,3) +// tag : V6_vmpyiewh_acc +def int_hexagon_V6_vmpyiewh_acc : +Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiewh_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyiewh_acc_128B,VI_ftype_VIVIVI,3) +// tag : V6_vmpyiewh_acc_128B +def int_hexagon_V6_vmpyiewh_acc_128B : +Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiewh_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh_acc,VI_ftype_VIVIVI,3) +// tag : V6_vmpyiewuh_acc +def int_hexagon_V6_vmpyiewuh_acc : +Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiewuh_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh_acc_128B,VI_ftype_VIVIVI,3) +// tag : V6_vmpyiewuh_acc_128B +def int_hexagon_V6_vmpyiewuh_acc_128B : +Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiewuh_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyub,VD_ftype_VISI,2) +// tag : V6_vmpyub +def int_hexagon_V6_vmpyub : +Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpyub">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyub_128B,VD_ftype_VISI,2) +// tag : V6_vmpyub_128B +def int_hexagon_V6_vmpyub_128B : +Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyub_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyub_acc,VD_ftype_VDVISI,3) +// tag : V6_vmpyub_acc +def int_hexagon_V6_vmpyub_acc : +Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyub_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyub_acc_128B,VD_ftype_VDVISI,3) +// tag : V6_vmpyub_acc_128B +def int_hexagon_V6_vmpyub_acc_128B : +Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyub_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpybus,VD_ftype_VISI,2) +// tag : V6_vmpybus +def int_hexagon_V6_vmpybus : +Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpybus">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpybus_128B,VD_ftype_VISI,2) +// tag : V6_vmpybus_128B +def int_hexagon_V6_vmpybus_128B : +Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpybus_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpybus_acc,VD_ftype_VDVISI,3) +// tag : V6_vmpybus_acc +def int_hexagon_V6_vmpybus_acc : +Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpybus_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpybus_acc_128B,VD_ftype_VDVISI,3) +// tag : V6_vmpybus_acc_128B +def int_hexagon_V6_vmpybus_acc_128B : +Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpybus_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpabus,VD_ftype_VDSI,2) +// tag : V6_vmpabus +def int_hexagon_V6_vmpabus : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabus">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpabus_128B,VD_ftype_VDSI,2) +// tag : V6_vmpabus_128B +def int_hexagon_V6_vmpabus_128B : +Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabus_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpabus_acc,VD_ftype_VDVDSI,3) +// tag : V6_vmpabus_acc +def int_hexagon_V6_vmpabus_acc : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabus_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpabus_acc_128B,VD_ftype_VDVDSI,3) +// tag : V6_vmpabus_acc_128B +def int_hexagon_V6_vmpabus_acc_128B : +Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabus_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpahb,VD_ftype_VDSI,2) +// tag : V6_vmpahb +def int_hexagon_V6_vmpahb : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpahb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpahb_128B,VD_ftype_VDSI,2) +// tag : V6_vmpahb_128B +def int_hexagon_V6_vmpahb_128B : +Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vmpahb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpahb_acc,VD_ftype_VDVDSI,3) +// tag : V6_vmpahb_acc +def int_hexagon_V6_vmpahb_acc : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpahb_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpahb_acc_128B,VD_ftype_VDVDSI,3) +// tag : V6_vmpahb_acc_128B +def int_hexagon_V6_vmpahb_acc_128B : +Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vmpahb_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyh,VD_ftype_VISI,2) +// tag : V6_vmpyh +def int_hexagon_V6_vmpyh : +Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpyh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyh_128B,VD_ftype_VISI,2) +// tag : V6_vmpyh_128B +def int_hexagon_V6_vmpyh_128B : +Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyhsat_acc,VD_ftype_VDVISI,3) +// tag : V6_vmpyhsat_acc +def int_hexagon_V6_vmpyhsat_acc : +Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyhsat_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyhsat_acc_128B,VD_ftype_VDVISI,3) +// tag : V6_vmpyhsat_acc_128B +def int_hexagon_V6_vmpyhsat_acc_128B : +Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyhsat_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyhss,VI_ftype_VISI,2) +// tag : V6_vmpyhss +def int_hexagon_V6_vmpyhss : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyhss">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyhss_128B,VI_ftype_VISI,2) +// tag : V6_vmpyhss_128B +def int_hexagon_V6_vmpyhss_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyhss_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyhsrs,VI_ftype_VISI,2) +// tag : V6_vmpyhsrs +def int_hexagon_V6_vmpyhsrs : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyhsrs">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyhsrs_128B,VI_ftype_VISI,2) +// tag : V6_vmpyhsrs_128B +def int_hexagon_V6_vmpyhsrs_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyhsrs_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyuh,VD_ftype_VISI,2) +// tag : V6_vmpyuh +def int_hexagon_V6_vmpyuh : +Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpyuh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyuh_128B,VD_ftype_VISI,2) +// tag : V6_vmpyuh_128B +def int_hexagon_V6_vmpyuh_128B : +Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyuh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyuh_acc,VD_ftype_VDVISI,3) +// tag : V6_vmpyuh_acc +def int_hexagon_V6_vmpyuh_acc : +Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyuh_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyuh_acc_128B,VD_ftype_VDVISI,3) +// tag : V6_vmpyuh_acc_128B +def int_hexagon_V6_vmpyuh_acc_128B : +Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyuh_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyihb,VI_ftype_VISI,2) +// tag : V6_vmpyihb +def int_hexagon_V6_vmpyihb : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyihb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyihb_128B,VI_ftype_VISI,2) +// tag : V6_vmpyihb_128B +def int_hexagon_V6_vmpyihb_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyihb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyihb_acc,VI_ftype_VIVISI,3) +// tag : V6_vmpyihb_acc +def int_hexagon_V6_vmpyihb_acc : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyihb_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyihb_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vmpyihb_acc_128B +def int_hexagon_V6_vmpyihb_acc_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyihb_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyiwb,VI_ftype_VISI,2) +// tag : V6_vmpyiwb +def int_hexagon_V6_vmpyiwb : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyiwb_128B,VI_ftype_VISI,2) +// tag : V6_vmpyiwb_128B +def int_hexagon_V6_vmpyiwb_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyiwb_acc,VI_ftype_VIVISI,3) +// tag : V6_vmpyiwb_acc +def int_hexagon_V6_vmpyiwb_acc : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwb_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyiwb_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vmpyiwb_acc_128B +def int_hexagon_V6_vmpyiwb_acc_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwb_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyiwh,VI_ftype_VISI,2) +// tag : V6_vmpyiwh +def int_hexagon_V6_vmpyiwh : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyiwh_128B,VI_ftype_VISI,2) +// tag : V6_vmpyiwh_128B +def int_hexagon_V6_vmpyiwh_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyiwh_acc,VI_ftype_VIVISI,3) +// tag : V6_vmpyiwh_acc +def int_hexagon_V6_vmpyiwh_acc : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwh_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyiwh_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vmpyiwh_acc_128B +def int_hexagon_V6_vmpyiwh_acc_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwh_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vand,VI_ftype_VIVI,2) +// tag : V6_vand +def int_hexagon_V6_vand : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vand">; + +// +// BUILTIN_INFO(HEXAGON.V6_vand_128B,VI_ftype_VIVI,2) +// tag : V6_vand_128B +def int_hexagon_V6_vand_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vand_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vor,VI_ftype_VIVI,2) +// tag : V6_vor +def int_hexagon_V6_vor : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vor">; + +// +// BUILTIN_INFO(HEXAGON.V6_vor_128B,VI_ftype_VIVI,2) +// tag : V6_vor_128B +def int_hexagon_V6_vor_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vor_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vxor,VI_ftype_VIVI,2) +// tag : V6_vxor +def int_hexagon_V6_vxor : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vxor">; + +// +// BUILTIN_INFO(HEXAGON.V6_vxor_128B,VI_ftype_VIVI,2) +// tag : V6_vxor_128B +def int_hexagon_V6_vxor_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vxor_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vnot,VI_ftype_VI,1) +// tag : V6_vnot +def int_hexagon_V6_vnot : +Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vnot">; + +// +// BUILTIN_INFO(HEXAGON.V6_vnot_128B,VI_ftype_VI,1) +// tag : V6_vnot_128B +def int_hexagon_V6_vnot_128B : +Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vnot_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vandqrt,VI_ftype_QVSI,2) +// tag : V6_vandqrt +def int_hexagon_V6_vandqrt : +Hexagon_v512v64ii_Intrinsic<"HEXAGON_V6_vandqrt">; + +// +// BUILTIN_INFO(HEXAGON.V6_vandqrt_128B,VI_ftype_QVSI,2) +// tag : V6_vandqrt_128B +def int_hexagon_V6_vandqrt_128B : +Hexagon_v1024v128ii_Intrinsic<"HEXAGON_V6_vandqrt_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vandqrt_acc,VI_ftype_VIQVSI,3) +// tag : V6_vandqrt_acc +def int_hexagon_V6_vandqrt_acc : +Hexagon_v512v512v64ii_Intrinsic<"HEXAGON_V6_vandqrt_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vandqrt_acc_128B,VI_ftype_VIQVSI,3) +// tag : V6_vandqrt_acc_128B +def int_hexagon_V6_vandqrt_acc_128B : +Hexagon_v1024v1024v128ii_Intrinsic<"HEXAGON_V6_vandqrt_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vandvrt,QV_ftype_VISI,2) +// tag : V6_vandvrt +def int_hexagon_V6_vandvrt : +Hexagon_v64iv512i_Intrinsic<"HEXAGON_V6_vandvrt">; + +// +// BUILTIN_INFO(HEXAGON.V6_vandvrt_128B,QV_ftype_VISI,2) +// tag : V6_vandvrt_128B +def int_hexagon_V6_vandvrt_128B : +Hexagon_v128iv1024i_Intrinsic<"HEXAGON_V6_vandvrt_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vandvrt_acc,QV_ftype_QVVISI,3) +// tag : V6_vandvrt_acc +def int_hexagon_V6_vandvrt_acc : +Hexagon_v64iv64iv512i_Intrinsic<"HEXAGON_V6_vandvrt_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vandvrt_acc_128B,QV_ftype_QVVISI,3) +// tag : V6_vandvrt_acc_128B +def int_hexagon_V6_vandvrt_acc_128B : +Hexagon_v128iv128iv1024i_Intrinsic<"HEXAGON_V6_vandvrt_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtw,QV_ftype_VIVI,2) +// tag : V6_vgtw +def int_hexagon_V6_vgtw : +Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtw_128B,QV_ftype_VIVI,2) +// tag : V6_vgtw_128B +def int_hexagon_V6_vgtw_128B : +Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtw_and,QV_ftype_QVVIVI,3) +// tag : V6_vgtw_and +def int_hexagon_V6_vgtw_and : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtw_and">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtw_and_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgtw_and_128B +def int_hexagon_V6_vgtw_and_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_and_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtw_or,QV_ftype_QVVIVI,3) +// tag : V6_vgtw_or +def int_hexagon_V6_vgtw_or : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtw_or">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtw_or_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgtw_or_128B +def int_hexagon_V6_vgtw_or_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_or_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtw_xor,QV_ftype_QVVIVI,3) +// tag : V6_vgtw_xor +def int_hexagon_V6_vgtw_xor : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtw_xor">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtw_xor_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgtw_xor_128B +def int_hexagon_V6_vgtw_xor_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_xor_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqw,QV_ftype_VIVI,2) +// tag : V6_veqw +def int_hexagon_V6_veqw : +Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_veqw">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqw_128B,QV_ftype_VIVI,2) +// tag : V6_veqw_128B +def int_hexagon_V6_veqw_128B : +Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqw_and,QV_ftype_QVVIVI,3) +// tag : V6_veqw_and +def int_hexagon_V6_veqw_and : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqw_and">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqw_and_128B,QV_ftype_QVVIVI,3) +// tag : V6_veqw_and_128B +def int_hexagon_V6_veqw_and_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_and_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqw_or,QV_ftype_QVVIVI,3) +// tag : V6_veqw_or +def int_hexagon_V6_veqw_or : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqw_or">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqw_or_128B,QV_ftype_QVVIVI,3) +// tag : V6_veqw_or_128B +def int_hexagon_V6_veqw_or_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_or_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqw_xor,QV_ftype_QVVIVI,3) +// tag : V6_veqw_xor +def int_hexagon_V6_veqw_xor : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqw_xor">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqw_xor_128B,QV_ftype_QVVIVI,3) +// tag : V6_veqw_xor_128B +def int_hexagon_V6_veqw_xor_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_xor_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgth,QV_ftype_VIVI,2) +// tag : V6_vgth +def int_hexagon_V6_vgth : +Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgth">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgth_128B,QV_ftype_VIVI,2) +// tag : V6_vgth_128B +def int_hexagon_V6_vgth_128B : +Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgth_and,QV_ftype_QVVIVI,3) +// tag : V6_vgth_and +def int_hexagon_V6_vgth_and : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgth_and">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgth_and_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgth_and_128B +def int_hexagon_V6_vgth_and_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_and_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgth_or,QV_ftype_QVVIVI,3) +// tag : V6_vgth_or +def int_hexagon_V6_vgth_or : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgth_or">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgth_or_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgth_or_128B +def int_hexagon_V6_vgth_or_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_or_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgth_xor,QV_ftype_QVVIVI,3) +// tag : V6_vgth_xor +def int_hexagon_V6_vgth_xor : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgth_xor">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgth_xor_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgth_xor_128B +def int_hexagon_V6_vgth_xor_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_xor_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqh,QV_ftype_VIVI,2) +// tag : V6_veqh +def int_hexagon_V6_veqh : +Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_veqh">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqh_128B,QV_ftype_VIVI,2) +// tag : V6_veqh_128B +def int_hexagon_V6_veqh_128B : +Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqh_and,QV_ftype_QVVIVI,3) +// tag : V6_veqh_and +def int_hexagon_V6_veqh_and : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqh_and">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqh_and_128B,QV_ftype_QVVIVI,3) +// tag : V6_veqh_and_128B +def int_hexagon_V6_veqh_and_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_and_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqh_or,QV_ftype_QVVIVI,3) +// tag : V6_veqh_or +def int_hexagon_V6_veqh_or : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqh_or">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqh_or_128B,QV_ftype_QVVIVI,3) +// tag : V6_veqh_or_128B +def int_hexagon_V6_veqh_or_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_or_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqh_xor,QV_ftype_QVVIVI,3) +// tag : V6_veqh_xor +def int_hexagon_V6_veqh_xor : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqh_xor">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqh_xor_128B,QV_ftype_QVVIVI,3) +// tag : V6_veqh_xor_128B +def int_hexagon_V6_veqh_xor_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_xor_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtb,QV_ftype_VIVI,2) +// tag : V6_vgtb +def int_hexagon_V6_vgtb : +Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtb_128B,QV_ftype_VIVI,2) +// tag : V6_vgtb_128B +def int_hexagon_V6_vgtb_128B : +Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtb_and,QV_ftype_QVVIVI,3) +// tag : V6_vgtb_and +def int_hexagon_V6_vgtb_and : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtb_and">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtb_and_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgtb_and_128B +def int_hexagon_V6_vgtb_and_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_and_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtb_or,QV_ftype_QVVIVI,3) +// tag : V6_vgtb_or +def int_hexagon_V6_vgtb_or : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtb_or">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtb_or_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgtb_or_128B +def int_hexagon_V6_vgtb_or_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_or_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtb_xor,QV_ftype_QVVIVI,3) +// tag : V6_vgtb_xor +def int_hexagon_V6_vgtb_xor : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtb_xor">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtb_xor_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgtb_xor_128B +def int_hexagon_V6_vgtb_xor_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_xor_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqb,QV_ftype_VIVI,2) +// tag : V6_veqb +def int_hexagon_V6_veqb : +Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_veqb">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqb_128B,QV_ftype_VIVI,2) +// tag : V6_veqb_128B +def int_hexagon_V6_veqb_128B : +Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqb_and,QV_ftype_QVVIVI,3) +// tag : V6_veqb_and +def int_hexagon_V6_veqb_and : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqb_and">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqb_and_128B,QV_ftype_QVVIVI,3) +// tag : V6_veqb_and_128B +def int_hexagon_V6_veqb_and_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_and_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqb_or,QV_ftype_QVVIVI,3) +// tag : V6_veqb_or +def int_hexagon_V6_veqb_or : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqb_or">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqb_or_128B,QV_ftype_QVVIVI,3) +// tag : V6_veqb_or_128B +def int_hexagon_V6_veqb_or_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_or_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqb_xor,QV_ftype_QVVIVI,3) +// tag : V6_veqb_xor +def int_hexagon_V6_veqb_xor : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqb_xor">; + +// +// BUILTIN_INFO(HEXAGON.V6_veqb_xor_128B,QV_ftype_QVVIVI,3) +// tag : V6_veqb_xor_128B +def int_hexagon_V6_veqb_xor_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_xor_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtuw,QV_ftype_VIVI,2) +// tag : V6_vgtuw +def int_hexagon_V6_vgtuw : +Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtuw_128B,QV_ftype_VIVI,2) +// tag : V6_vgtuw_128B +def int_hexagon_V6_vgtuw_128B : +Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtuw_and,QV_ftype_QVVIVI,3) +// tag : V6_vgtuw_and +def int_hexagon_V6_vgtuw_and : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw_and">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtuw_and_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgtuw_and_128B +def int_hexagon_V6_vgtuw_and_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_and_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtuw_or,QV_ftype_QVVIVI,3) +// tag : V6_vgtuw_or +def int_hexagon_V6_vgtuw_or : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw_or">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtuw_or_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgtuw_or_128B +def int_hexagon_V6_vgtuw_or_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_or_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtuw_xor,QV_ftype_QVVIVI,3) +// tag : V6_vgtuw_xor +def int_hexagon_V6_vgtuw_xor : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw_xor">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtuw_xor_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgtuw_xor_128B +def int_hexagon_V6_vgtuw_xor_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_xor_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtuh,QV_ftype_VIVI,2) +// tag : V6_vgtuh +def int_hexagon_V6_vgtuh : +Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtuh_128B,QV_ftype_VIVI,2) +// tag : V6_vgtuh_128B +def int_hexagon_V6_vgtuh_128B : +Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtuh_and,QV_ftype_QVVIVI,3) +// tag : V6_vgtuh_and +def int_hexagon_V6_vgtuh_and : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh_and">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtuh_and_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgtuh_and_128B +def int_hexagon_V6_vgtuh_and_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_and_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtuh_or,QV_ftype_QVVIVI,3) +// tag : V6_vgtuh_or +def int_hexagon_V6_vgtuh_or : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh_or">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtuh_or_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgtuh_or_128B +def int_hexagon_V6_vgtuh_or_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_or_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtuh_xor,QV_ftype_QVVIVI,3) +// tag : V6_vgtuh_xor +def int_hexagon_V6_vgtuh_xor : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh_xor">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtuh_xor_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgtuh_xor_128B +def int_hexagon_V6_vgtuh_xor_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_xor_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtub,QV_ftype_VIVI,2) +// tag : V6_vgtub +def int_hexagon_V6_vgtub : +Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtub">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtub_128B,QV_ftype_VIVI,2) +// tag : V6_vgtub_128B +def int_hexagon_V6_vgtub_128B : +Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtub_and,QV_ftype_QVVIVI,3) +// tag : V6_vgtub_and +def int_hexagon_V6_vgtub_and : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtub_and">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtub_and_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgtub_and_128B +def int_hexagon_V6_vgtub_and_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_and_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtub_or,QV_ftype_QVVIVI,3) +// tag : V6_vgtub_or +def int_hexagon_V6_vgtub_or : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtub_or">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtub_or_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgtub_or_128B +def int_hexagon_V6_vgtub_or_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_or_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtub_xor,QV_ftype_QVVIVI,3) +// tag : V6_vgtub_xor +def int_hexagon_V6_vgtub_xor : +Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtub_xor">; + +// +// BUILTIN_INFO(HEXAGON.V6_vgtub_xor_128B,QV_ftype_QVVIVI,3) +// tag : V6_vgtub_xor_128B +def int_hexagon_V6_vgtub_xor_128B : +Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_xor_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_pred_or,QV_ftype_QVQV,2) +// tag : V6_pred_or +def int_hexagon_V6_pred_or : +Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_or">; + +// +// BUILTIN_INFO(HEXAGON.V6_pred_or_128B,QV_ftype_QVQV,2) +// tag : V6_pred_or_128B +def int_hexagon_V6_pred_or_128B : +Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_or_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_pred_and,QV_ftype_QVQV,2) +// tag : V6_pred_and +def int_hexagon_V6_pred_and : +Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_and">; + +// +// BUILTIN_INFO(HEXAGON.V6_pred_and_128B,QV_ftype_QVQV,2) +// tag : V6_pred_and_128B +def int_hexagon_V6_pred_and_128B : +Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_and_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_pred_not,QV_ftype_QV,1) +// tag : V6_pred_not +def int_hexagon_V6_pred_not : +Hexagon_v64iv64i_Intrinsic<"HEXAGON_V6_pred_not">; + +// +// BUILTIN_INFO(HEXAGON.V6_pred_not_128B,QV_ftype_QV,1) +// tag : V6_pred_not_128B +def int_hexagon_V6_pred_not_128B : +Hexagon_v128iv128i_Intrinsic<"HEXAGON_V6_pred_not_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_pred_xor,QV_ftype_QVQV,2) +// tag : V6_pred_xor +def int_hexagon_V6_pred_xor : +Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_xor">; + +// +// BUILTIN_INFO(HEXAGON.V6_pred_xor_128B,QV_ftype_QVQV,2) +// tag : V6_pred_xor_128B +def int_hexagon_V6_pred_xor_128B : +Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_xor_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_pred_and_n,QV_ftype_QVQV,2) +// tag : V6_pred_and_n +def int_hexagon_V6_pred_and_n : +Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_and_n">; + +// +// BUILTIN_INFO(HEXAGON.V6_pred_and_n_128B,QV_ftype_QVQV,2) +// tag : V6_pred_and_n_128B +def int_hexagon_V6_pred_and_n_128B : +Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_and_n_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_pred_or_n,QV_ftype_QVQV,2) +// tag : V6_pred_or_n +def int_hexagon_V6_pred_or_n : +Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_or_n">; + +// +// BUILTIN_INFO(HEXAGON.V6_pred_or_n_128B,QV_ftype_QVQV,2) +// tag : V6_pred_or_n_128B +def int_hexagon_V6_pred_or_n_128B : +Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_or_n_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_pred_scalar2,QV_ftype_SI,1) +// tag : V6_pred_scalar2 +def int_hexagon_V6_pred_scalar2 : +Hexagon_v64ii_Intrinsic<"HEXAGON_V6_pred_scalar2">; + +// +// BUILTIN_INFO(HEXAGON.V6_pred_scalar2_128B,QV_ftype_SI,1) +// tag : V6_pred_scalar2_128B +def int_hexagon_V6_pred_scalar2_128B : +Hexagon_v128ii_Intrinsic<"HEXAGON_V6_pred_scalar2_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmux,VI_ftype_QVVIVI,3) +// tag : V6_vmux +def int_hexagon_V6_vmux : +Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vmux">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmux_128B,VI_ftype_QVVIVI,3) +// tag : V6_vmux_128B +def int_hexagon_V6_vmux_128B : +Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vmux_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vswap,VD_ftype_QVVIVI,3) +// tag : V6_vswap +def int_hexagon_V6_vswap : +Hexagon_v1024v64iv512v512_Intrinsic<"HEXAGON_V6_vswap">; + +// +// BUILTIN_INFO(HEXAGON.V6_vswap_128B,VD_ftype_QVVIVI,3) +// tag : V6_vswap_128B +def int_hexagon_V6_vswap_128B : +Hexagon_v2048v128iv1024v1024_Intrinsic<"HEXAGON_V6_vswap_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmaxub,VI_ftype_VIVI,2) +// tag : V6_vmaxub +def int_hexagon_V6_vmaxub : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxub">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmaxub_128B,VI_ftype_VIVI,2) +// tag : V6_vmaxub_128B +def int_hexagon_V6_vmaxub_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxub_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vminub,VI_ftype_VIVI,2) +// tag : V6_vminub +def int_hexagon_V6_vminub : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminub">; + +// +// BUILTIN_INFO(HEXAGON.V6_vminub_128B,VI_ftype_VIVI,2) +// tag : V6_vminub_128B +def int_hexagon_V6_vminub_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminub_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmaxuh,VI_ftype_VIVI,2) +// tag : V6_vmaxuh +def int_hexagon_V6_vmaxuh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxuh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmaxuh_128B,VI_ftype_VIVI,2) +// tag : V6_vmaxuh_128B +def int_hexagon_V6_vmaxuh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxuh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vminuh,VI_ftype_VIVI,2) +// tag : V6_vminuh +def int_hexagon_V6_vminuh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminuh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vminuh_128B,VI_ftype_VIVI,2) +// tag : V6_vminuh_128B +def int_hexagon_V6_vminuh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminuh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmaxh,VI_ftype_VIVI,2) +// tag : V6_vmaxh +def int_hexagon_V6_vmaxh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmaxh_128B,VI_ftype_VIVI,2) +// tag : V6_vmaxh_128B +def int_hexagon_V6_vmaxh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vminh,VI_ftype_VIVI,2) +// tag : V6_vminh +def int_hexagon_V6_vminh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vminh_128B,VI_ftype_VIVI,2) +// tag : V6_vminh_128B +def int_hexagon_V6_vminh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmaxw,VI_ftype_VIVI,2) +// tag : V6_vmaxw +def int_hexagon_V6_vmaxw : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmaxw_128B,VI_ftype_VIVI,2) +// tag : V6_vmaxw_128B +def int_hexagon_V6_vmaxw_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vminw,VI_ftype_VIVI,2) +// tag : V6_vminw +def int_hexagon_V6_vminw : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vminw_128B,VI_ftype_VIVI,2) +// tag : V6_vminw_128B +def int_hexagon_V6_vminw_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsathub,VI_ftype_VIVI,2) +// tag : V6_vsathub +def int_hexagon_V6_vsathub : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsathub">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsathub_128B,VI_ftype_VIVI,2) +// tag : V6_vsathub_128B +def int_hexagon_V6_vsathub_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsathub_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsatwh,VI_ftype_VIVI,2) +// tag : V6_vsatwh +def int_hexagon_V6_vsatwh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsatwh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsatwh_128B,VI_ftype_VIVI,2) +// tag : V6_vsatwh_128B +def int_hexagon_V6_vsatwh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsatwh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshuffeb,VI_ftype_VIVI,2) +// tag : V6_vshuffeb +def int_hexagon_V6_vshuffeb : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshuffeb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshuffeb_128B,VI_ftype_VIVI,2) +// tag : V6_vshuffeb_128B +def int_hexagon_V6_vshuffeb_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshuffeb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshuffob,VI_ftype_VIVI,2) +// tag : V6_vshuffob +def int_hexagon_V6_vshuffob : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshuffob">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshuffob_128B,VI_ftype_VIVI,2) +// tag : V6_vshuffob_128B +def int_hexagon_V6_vshuffob_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshuffob_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshufeh,VI_ftype_VIVI,2) +// tag : V6_vshufeh +def int_hexagon_V6_vshufeh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshufeh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshufeh_128B,VI_ftype_VIVI,2) +// tag : V6_vshufeh_128B +def int_hexagon_V6_vshufeh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshufeh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshufoh,VI_ftype_VIVI,2) +// tag : V6_vshufoh +def int_hexagon_V6_vshufoh : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshufoh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshufoh_128B,VI_ftype_VIVI,2) +// tag : V6_vshufoh_128B +def int_hexagon_V6_vshufoh_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshufoh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshuffvdd,VD_ftype_VIVISI,3) +// tag : V6_vshuffvdd +def int_hexagon_V6_vshuffvdd : +Hexagon_v1024v512v512i_Intrinsic<"HEXAGON_V6_vshuffvdd">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshuffvdd_128B,VD_ftype_VIVISI,3) +// tag : V6_vshuffvdd_128B +def int_hexagon_V6_vshuffvdd_128B : +Hexagon_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vshuffvdd_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdealvdd,VD_ftype_VIVISI,3) +// tag : V6_vdealvdd +def int_hexagon_V6_vdealvdd : +Hexagon_v1024v512v512i_Intrinsic<"HEXAGON_V6_vdealvdd">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdealvdd_128B,VD_ftype_VIVISI,3) +// tag : V6_vdealvdd_128B +def int_hexagon_V6_vdealvdd_128B : +Hexagon_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vdealvdd_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshufoeh,VD_ftype_VIVI,2) +// tag : V6_vshufoeh +def int_hexagon_V6_vshufoeh : +Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vshufoeh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshufoeh_128B,VD_ftype_VIVI,2) +// tag : V6_vshufoeh_128B +def int_hexagon_V6_vshufoeh_128B : +Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vshufoeh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshufoeb,VD_ftype_VIVI,2) +// tag : V6_vshufoeb +def int_hexagon_V6_vshufoeb : +Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vshufoeb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshufoeb_128B,VD_ftype_VIVI,2) +// tag : V6_vshufoeb_128B +def int_hexagon_V6_vshufoeb_128B : +Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vshufoeb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdealh,VI_ftype_VI,1) +// tag : V6_vdealh +def int_hexagon_V6_vdealh : +Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vdealh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdealh_128B,VI_ftype_VI,1) +// tag : V6_vdealh_128B +def int_hexagon_V6_vdealh_128B : +Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vdealh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdealb,VI_ftype_VI,1) +// tag : V6_vdealb +def int_hexagon_V6_vdealb : +Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vdealb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdealb_128B,VI_ftype_VI,1) +// tag : V6_vdealb_128B +def int_hexagon_V6_vdealb_128B : +Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vdealb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdealb4w,VI_ftype_VIVI,2) +// tag : V6_vdealb4w +def int_hexagon_V6_vdealb4w : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vdealb4w">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdealb4w_128B,VI_ftype_VIVI,2) +// tag : V6_vdealb4w_128B +def int_hexagon_V6_vdealb4w_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdealb4w_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshuffh,VI_ftype_VI,1) +// tag : V6_vshuffh +def int_hexagon_V6_vshuffh : +Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vshuffh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshuffh_128B,VI_ftype_VI,1) +// tag : V6_vshuffh_128B +def int_hexagon_V6_vshuffh_128B : +Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vshuffh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshuffb,VI_ftype_VI,1) +// tag : V6_vshuffb +def int_hexagon_V6_vshuffb : +Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vshuffb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vshuffb_128B,VI_ftype_VI,1) +// tag : V6_vshuffb_128B +def int_hexagon_V6_vshuffb_128B : +Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vshuffb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_extractw,SI_ftype_VISI,2) +// tag : V6_extractw +def int_hexagon_V6_extractw : +Hexagon_iv512i_Intrinsic<"HEXAGON_V6_extractw">; + +// +// BUILTIN_INFO(HEXAGON.V6_extractw_128B,SI_ftype_VISI,2) +// tag : V6_extractw_128B +def int_hexagon_V6_extractw_128B : +Hexagon_iv1024i_Intrinsic<"HEXAGON_V6_extractw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vinsertwr,VI_ftype_VISI,2) +// tag : V6_vinsertwr +def int_hexagon_V6_vinsertwr : +Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vinsertwr">; + +// +// BUILTIN_INFO(HEXAGON.V6_vinsertwr_128B,VI_ftype_VISI,2) +// tag : V6_vinsertwr_128B +def int_hexagon_V6_vinsertwr_128B : +Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vinsertwr_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_lvsplatw,VI_ftype_SI,1) +// tag : V6_lvsplatw +def int_hexagon_V6_lvsplatw : +Hexagon_v512i_Intrinsic<"HEXAGON_V6_lvsplatw">; + +// +// BUILTIN_INFO(HEXAGON.V6_lvsplatw_128B,VI_ftype_SI,1) +// tag : V6_lvsplatw_128B +def int_hexagon_V6_lvsplatw_128B : +Hexagon_v1024i_Intrinsic<"HEXAGON_V6_lvsplatw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vassign,VI_ftype_VI,1) +// tag : V6_vassign +def int_hexagon_V6_vassign : +Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vassign">; + +// +// BUILTIN_INFO(HEXAGON.V6_vassign_128B,VI_ftype_VI,1) +// tag : V6_vassign_128B +def int_hexagon_V6_vassign_128B : +Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vassign_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vcombine,VD_ftype_VIVI,2) +// tag : V6_vcombine +def int_hexagon_V6_vcombine : +Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vcombine">; + +// +// BUILTIN_INFO(HEXAGON.V6_vcombine_128B,VD_ftype_VIVI,2) +// tag : V6_vcombine_128B +def int_hexagon_V6_vcombine_128B : +Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vcombine_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlutb,VI_ftype_VIDISI,3) +// tag : V6_vlutb +def int_hexagon_V6_vlutb : +Hexagon_v512v512LLii_Intrinsic<"HEXAGON_V6_vlutb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlutb_128B,VI_ftype_VIDISI,3) +// tag : V6_vlutb_128B +def int_hexagon_V6_vlutb_128B : +Hexagon_v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlutb_acc,VI_ftype_VIVIDISI,4) +// tag : V6_vlutb_acc +def int_hexagon_V6_vlutb_acc : +Hexagon_v512v512v512LLii_Intrinsic<"HEXAGON_V6_vlutb_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlutb_acc_128B,VI_ftype_VIVIDISI,4) +// tag : V6_vlutb_acc_128B +def int_hexagon_V6_vlutb_acc_128B : +Hexagon_v1024v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlutb_dv,VD_ftype_VDDISI,3) +// tag : V6_vlutb_dv +def int_hexagon_V6_vlutb_dv : +Hexagon_v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_dv">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlutb_dv_128B,VD_ftype_VDDISI,3) +// tag : V6_vlutb_dv_128B +def int_hexagon_V6_vlutb_dv_128B : +Hexagon_v2048v2048LLii_Intrinsic<"HEXAGON_V6_vlutb_dv_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlutb_dv_acc,VD_ftype_VDVDDISI,4) +// tag : V6_vlutb_dv_acc +def int_hexagon_V6_vlutb_dv_acc : +Hexagon_v1024v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_dv_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlutb_dv_acc_128B,VD_ftype_VDVDDISI,4) +// tag : V6_vlutb_dv_acc_128B +def int_hexagon_V6_vlutb_dv_acc_128B : +Hexagon_v2048v2048v2048LLii_Intrinsic<"HEXAGON_V6_vlutb_dv_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdelta,VI_ftype_VIVI,2) +// tag : V6_vdelta +def int_hexagon_V6_vdelta : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vdelta">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdelta_128B,VI_ftype_VIVI,2) +// tag : V6_vdelta_128B +def int_hexagon_V6_vdelta_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdelta_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrdelta,VI_ftype_VIVI,2) +// tag : V6_vrdelta +def int_hexagon_V6_vrdelta : +Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrdelta">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrdelta_128B,VI_ftype_VIVI,2) +// tag : V6_vrdelta_128B +def int_hexagon_V6_vrdelta_128B : +Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrdelta_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vcl0w,VI_ftype_VI,1) +// tag : V6_vcl0w +def int_hexagon_V6_vcl0w : +Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vcl0w">; + +// +// BUILTIN_INFO(HEXAGON.V6_vcl0w_128B,VI_ftype_VI,1) +// tag : V6_vcl0w_128B +def int_hexagon_V6_vcl0w_128B : +Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vcl0w_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vcl0h,VI_ftype_VI,1) +// tag : V6_vcl0h +def int_hexagon_V6_vcl0h : +Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vcl0h">; + +// +// BUILTIN_INFO(HEXAGON.V6_vcl0h_128B,VI_ftype_VI,1) +// tag : V6_vcl0h_128B +def int_hexagon_V6_vcl0h_128B : +Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vcl0h_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vnormamtw,VI_ftype_VI,1) +// tag : V6_vnormamtw +def int_hexagon_V6_vnormamtw : +Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vnormamtw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vnormamtw_128B,VI_ftype_VI,1) +// tag : V6_vnormamtw_128B +def int_hexagon_V6_vnormamtw_128B : +Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vnormamtw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vnormamth,VI_ftype_VI,1) +// tag : V6_vnormamth +def int_hexagon_V6_vnormamth : +Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vnormamth">; + +// +// BUILTIN_INFO(HEXAGON.V6_vnormamth_128B,VI_ftype_VI,1) +// tag : V6_vnormamth_128B +def int_hexagon_V6_vnormamth_128B : +Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vnormamth_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpopcounth,VI_ftype_VI,1) +// tag : V6_vpopcounth +def int_hexagon_V6_vpopcounth : +Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vpopcounth">; + +// +// BUILTIN_INFO(HEXAGON.V6_vpopcounth_128B,VI_ftype_VI,1) +// tag : V6_vpopcounth_128B +def int_hexagon_V6_vpopcounth_128B : +Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vpopcounth_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlutvvb,VI_ftype_VIVISI,3) +// tag : V6_vlutvvb +def int_hexagon_V6_vlutvvb : +Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vlutvvb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlutvvb_128B,VI_ftype_VIVISI,3) +// tag : V6_vlutvvb_128B +def int_hexagon_V6_vlutvvb_128B : +Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvvb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlutvvb_oracc,VI_ftype_VIVIVISI,4) +// tag : V6_vlutvvb_oracc +def int_hexagon_V6_vlutvvb_oracc : +Hexagon_v512v512v512v512i_Intrinsic<"HEXAGON_V6_vlutvvb_oracc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlutvvb_oracc_128B,VI_ftype_VIVIVISI,4) +// tag : V6_vlutvvb_oracc_128B +def int_hexagon_V6_vlutvvb_oracc_128B : +Hexagon_v1024v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvvb_oracc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlutvwh,VD_ftype_VIVISI,3) +// tag : V6_vlutvwh +def int_hexagon_V6_vlutvwh : +Hexagon_v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlutvwh_128B,VD_ftype_VIVISI,3) +// tag : V6_vlutvwh_128B +def int_hexagon_V6_vlutvwh_128B : +Hexagon_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlutvwh_oracc,VD_ftype_VDVIVISI,4) +// tag : V6_vlutvwh_oracc +def int_hexagon_V6_vlutvwh_oracc : +Hexagon_v1024v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwh_oracc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlutvwh_oracc_128B,VD_ftype_VDVIVISI,4) +// tag : V6_vlutvwh_oracc_128B +def int_hexagon_V6_vlutvwh_oracc_128B : +Hexagon_v2048v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_oracc_128B">; + +// +// BUILTIN_INFO(HEXAGON.M6_vabsdiffb,DI_ftype_DIDI,2) +// tag : M6_vabsdiffb +def int_hexagon_M6_vabsdiffb : +Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_M6_vabsdiffb">; + +// +// BUILTIN_INFO(HEXAGON.M6_vabsdiffub,DI_ftype_DIDI,2) +// tag : M6_vabsdiffub +def int_hexagon_M6_vabsdiffub : +Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_M6_vabsdiffub">; + +// +// BUILTIN_INFO(HEXAGON.S6_vsplatrbp,DI_ftype_SI,1) +// tag : S6_vsplatrbp +def int_hexagon_S6_vsplatrbp : +Hexagon_LLii_Intrinsic<"HEXAGON_S6_vsplatrbp">; + +// +// BUILTIN_INFO(HEXAGON.S6_vtrunehb_ppp,DI_ftype_DIDI,2) +// tag : S6_vtrunehb_ppp +def int_hexagon_S6_vtrunehb_ppp : +Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_S6_vtrunehb_ppp">; + +// +// BUILTIN_INFO(HEXAGON.S6_vtrunohb_ppp,DI_ftype_DIDI,2) +// tag : S6_vtrunohb_ppp +def int_hexagon_S6_vtrunohb_ppp : +Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_S6_vtrunohb_ppp">; diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td index eb8f1e6cd079..06dfc329fe32 100644 --- a/include/llvm/IR/IntrinsicsPowerPC.td +++ b/include/llvm/IR/IntrinsicsPowerPC.td @@ -710,21 +710,39 @@ def int_ppc_vsx_xvrsqrtedp : GCCBuiltin<"__builtin_vsx_xvrsqrtedp">, def int_ppc_vsx_xvcmpeqdp : PowerPC_VSX_Intrinsic<"xvcmpeqdp", [llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_ppc_vsx_xvcmpeqdp_p : GCCBuiltin<"__builtin_vsx_xvcmpeqdp_p">, + Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2f64_ty,llvm_v2f64_ty], + [IntrNoMem]>; def int_ppc_vsx_xvcmpeqsp : PowerPC_VSX_Intrinsic<"xvcmpeqsp", [llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_ppc_vsx_xvcmpeqsp_p : GCCBuiltin<"__builtin_vsx_xvcmpeqsp_p">, + Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty], + [IntrNoMem]>; def int_ppc_vsx_xvcmpgedp : PowerPC_VSX_Intrinsic<"xvcmpgedp", [llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_ppc_vsx_xvcmpgedp_p : GCCBuiltin<"__builtin_vsx_xvcmpgedp_p">, + Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2f64_ty,llvm_v2f64_ty], + [IntrNoMem]>; def int_ppc_vsx_xvcmpgesp : PowerPC_VSX_Intrinsic<"xvcmpgesp", [llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_ppc_vsx_xvcmpgesp_p : GCCBuiltin<"__builtin_vsx_xvcmpgesp_p">, + Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty], + [IntrNoMem]>; def int_ppc_vsx_xvcmpgtdp : PowerPC_VSX_Intrinsic<"xvcmpgtdp", [llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_ppc_vsx_xvcmpgtdp_p : GCCBuiltin<"__builtin_vsx_xvcmpgtdp_p">, + Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2f64_ty,llvm_v2f64_ty], + [IntrNoMem]>; def int_ppc_vsx_xvcmpgtsp : PowerPC_VSX_Intrinsic<"xvcmpgtsp", [llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_ppc_vsx_xvcmpgtsp_p : GCCBuiltin<"__builtin_vsx_xvcmpgtsp_p">, + Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty], + [IntrNoMem]>; def int_ppc_vsx_xxleqv : PowerPC_VSX_Intrinsic<"xxleqv", [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; diff --git a/include/llvm/IR/IntrinsicsWebAssembly.td b/include/llvm/IR/IntrinsicsWebAssembly.td index 3ccde4742384..3953aef43dad 100644 --- a/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/include/llvm/IR/IntrinsicsWebAssembly.td @@ -13,4 +13,10 @@ //===----------------------------------------------------------------------===// let TargetPrefix = "wasm" in { // All intrinsics start with "llvm.wasm.". + +// Note that memory_size is not IntrNoMem because it must be sequenced with +// respect to grow_memory calls. +def int_wasm_memory_size : Intrinsic<[llvm_anyint_ty], [], [IntrReadMem]>; +def int_wasm_grow_memory : Intrinsic<[], [llvm_anyint_ty], []>; + } diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index a3bc4af84308..18390f853510 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -22,10 +22,8 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { def int_x86_seh_lsda : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrNoMem]>; - // Restores the frame, base, and stack pointers as necessary after recovering - // from an exception. Any block resuming control flow in the parent function - // should call this before accessing any stack memory. - def int_x86_seh_restoreframe : Intrinsic<[], [], []>; + // Marks the EH registration node created in LLVM IR prior to code generation. + def int_x86_seh_ehregnode : Intrinsic<[], [llvm_ptr_ty], []>; // Given a pointer to the end of an EH registration object, returns the true // parent frame address that can be used with llvm.localrecover. @@ -1406,6 +1404,78 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpermil_pd_128 : + GCCBuiltin<"__builtin_ia32_vpermilpd_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermil_pd_256 : + GCCBuiltin<"__builtin_ia32_vpermilpd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermil_pd_512 : + GCCBuiltin<"__builtin_ia32_vpermilpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermil_ps_128 : + GCCBuiltin<"__builtin_ia32_vpermilps_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermil_ps_256 : + GCCBuiltin<"__builtin_ia32_vpermilps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermil_ps_512 : + GCCBuiltin<"__builtin_ia32_vpermilps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermilvar_pd_256 : + GCCBuiltin<"__builtin_ia32_vpermilvarpd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermilvar_pd_512 : + GCCBuiltin<"__builtin_ia32_vpermilvarpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermilvar_pd_128 : + GCCBuiltin<"__builtin_ia32_vpermilvarpd_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermilvar_ps_256 : + GCCBuiltin<"__builtin_ia32_vpermilvarps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermilvar_ps_512 : + GCCBuiltin<"__builtin_ia32_vpermilvarps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermilvar_ps_128 : + GCCBuiltin<"__builtin_ia32_vpermilvarps_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pshuf_b_128 : GCCBuiltin<"__builtin_ia32_pshufb128_mask">, Intrinsic<[llvm_v16i8_ty], @@ -1423,8 +1493,145 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; -} + def int_x86_avx512_mask_shuf_f32x4_256 : + GCCBuiltin<"__builtin_ia32_shuf_f32x4_256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_shuf_f32x4 : + GCCBuiltin<"__builtin_ia32_shuf_f32x4_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_shuf_f64x2_256 : + GCCBuiltin<"__builtin_ia32_shuf_f64x2_256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_shuf_f64x2 : + GCCBuiltin<"__builtin_ia32_shuf_f64x2_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_shuf_i32x4_256 : + GCCBuiltin<"__builtin_ia32_shuf_i32x4_256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_shuf_i32x4 : + GCCBuiltin<"__builtin_ia32_shuf_i32x4_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_shuf_i64x2_256 : + GCCBuiltin<"__builtin_ia32_shuf_i64x2_256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_shuf_i64x2 : + GCCBuiltin<"__builtin_ia32_shuf_i64x2_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_shuf_pd_128 : + GCCBuiltin<"__builtin_ia32_shufpd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_shuf_pd_256 : + GCCBuiltin<"__builtin_ia32_shufpd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_shuf_pd_512 : + GCCBuiltin<"__builtin_ia32_shufpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_shuf_ps_128 : + GCCBuiltin<"__builtin_ia32_shufps128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_shuf_ps_256 : + GCCBuiltin<"__builtin_ia32_shufps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_shuf_ps_512 : + GCCBuiltin<"__builtin_ia32_shufps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_movshdup_128 : + GCCBuiltin<"__builtin_ia32_movshdup128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_movshdup_256 : + GCCBuiltin<"__builtin_ia32_movshdup256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_movshdup_512 : + GCCBuiltin<"__builtin_ia32_movshdup512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_movsldup_128 : + GCCBuiltin<"__builtin_ia32_movsldup128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_movsldup_256 : + GCCBuiltin<"__builtin_ia32_movsldup256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_movsldup_512 : + GCCBuiltin<"__builtin_ia32_movsldup512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_movddup_128 : + GCCBuiltin<"__builtin_ia32_movddup128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_movddup_256 : + GCCBuiltin<"__builtin_ia32_movddup256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_movddup_512 : + GCCBuiltin<"__builtin_ia32_movddup512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; +} // Vector blend let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". @@ -1526,6 +1733,38 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_ptestm_q_512 : GCCBuiltin<"__builtin_ia32_ptestmq512">, Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_fpclass_pd_128 : + GCCBuiltin<"__builtin_ia32_fpclasspd128_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_fpclass_pd_256 : + GCCBuiltin<"__builtin_ia32_fpclasspd256_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v4f64_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_fpclass_pd_512 : + GCCBuiltin<"__builtin_ia32_fpclasspd512_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_fpclass_ps_128 : + GCCBuiltin<"__builtin_ia32_fpclassps128_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_fpclass_ps_256 : + GCCBuiltin<"__builtin_ia32_fpclassps256_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_fpclass_ps_512 : + GCCBuiltin<"__builtin_ia32_fpclassps512_mask">, + Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_fpclass_sd : + GCCBuiltin<"__builtin_ia32_fpclasssd">, + Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_fpclass_ss : + GCCBuiltin<"__builtin_ia32_fpclassss">, + Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; } // Vector extract sign mask @@ -1573,16 +1812,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Conditional load ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_maskload_pd : GCCBuiltin<"__builtin_ia32_maskloadpd">, - Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty], + Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2i64_ty], [IntrReadArgMem]>; def int_x86_avx_maskload_ps : GCCBuiltin<"__builtin_ia32_maskloadps">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty], + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4i32_ty], [IntrReadArgMem]>; def int_x86_avx_maskload_pd_256 : GCCBuiltin<"__builtin_ia32_maskloadpd256">, - Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty], + Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4i64_ty], [IntrReadArgMem]>; def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty], + Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8i32_ty], [IntrReadArgMem]>; def int_x86_avx512_mask_loadu_ps_512 : GCCBuiltin<"__builtin_ia32_loadups512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], @@ -1596,24 +1835,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_load_pd_512 : GCCBuiltin<"__builtin_ia32_loadapd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrReadArgMem]>; + + def int_x86_avx512_mask_move_ss : GCCBuiltin<"__builtin_ia32_movss_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_move_sd : GCCBuiltin<"__builtin_ia32_movsd_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; } // Conditional store ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_maskstore_pd : GCCBuiltin<"__builtin_ia32_maskstorepd">, Intrinsic<[], [llvm_ptr_ty, - llvm_v2f64_ty, llvm_v2f64_ty], [IntrReadWriteArgMem]>; + llvm_v2i64_ty, llvm_v2f64_ty], [IntrReadWriteArgMem]>; def int_x86_avx_maskstore_ps : GCCBuiltin<"__builtin_ia32_maskstoreps">, Intrinsic<[], [llvm_ptr_ty, - llvm_v4f32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>; + llvm_v4i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>; def int_x86_avx_maskstore_pd_256 : GCCBuiltin<"__builtin_ia32_maskstorepd256">, Intrinsic<[], [llvm_ptr_ty, - llvm_v4f64_ty, llvm_v4f64_ty], [IntrReadWriteArgMem]>; + llvm_v4i64_ty, llvm_v4f64_ty], [IntrReadWriteArgMem]>; def int_x86_avx_maskstore_ps_256 : GCCBuiltin<"__builtin_ia32_maskstoreps256">, Intrinsic<[], [llvm_ptr_ty, - llvm_v8f32_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>; + llvm_v8i32_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>; def int_x86_avx512_mask_storeu_ps_512 : GCCBuiltin<"__builtin_ia32_storeups512_mask">, Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], @@ -1946,6 +2192,25 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_w_128 : GCCBuiltin<"__builtin_ia32_psrlw128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, + llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_w_256 : GCCBuiltin<"__builtin_ia32_psrlw256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_w_512 : GCCBuiltin<"__builtin_ia32_psrlw512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, + llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_wi_128 : GCCBuiltin<"__builtin_ia32_psrlwi128_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, + llvm_i8_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_wi_256 : GCCBuiltin<"__builtin_ia32_psrlwi256_mask">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, + llvm_i8_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psrl_wi_512 : GCCBuiltin<"__builtin_ia32_psrlwi512_mask">, + Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, + llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_psll_d : GCCBuiltin<"__builtin_ia32_pslld512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; @@ -2167,39 +2432,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Vector load with broadcast let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx2_vbroadcast_ss_ps : - GCCBuiltin<"__builtin_ia32_vbroadcastss_ps">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_avx2_vbroadcast_sd_pd_256 : - GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd256">, - Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_avx2_vbroadcast_ss_ps_256 : - GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_avx2_pbroadcastb_128 : - GCCBuiltin<"__builtin_ia32_pbroadcastb128">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; - def int_x86_avx2_pbroadcastb_256 : - GCCBuiltin<"__builtin_ia32_pbroadcastb256">, - Intrinsic<[llvm_v32i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; - def int_x86_avx2_pbroadcastw_128 : - GCCBuiltin<"__builtin_ia32_pbroadcastw128">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_avx2_pbroadcastw_256 : - GCCBuiltin<"__builtin_ia32_pbroadcastw256">, - Intrinsic<[llvm_v16i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_avx2_pbroadcastd_128 : - GCCBuiltin<"__builtin_ia32_pbroadcastd128">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_avx2_pbroadcastd_256 : - GCCBuiltin<"__builtin_ia32_pbroadcastd256">, - Intrinsic<[llvm_v8i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_avx2_pbroadcastq_128 : - GCCBuiltin<"__builtin_ia32_pbroadcastq128">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_avx2_pbroadcastq_256 : - GCCBuiltin<"__builtin_ia32_pbroadcastq256">, - Intrinsic<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx512_mask_pbroadcast_d_gpr_512 : GCCBuiltin<"__builtin_ia32_pbroadcastd512_gpr_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty, llvm_v16i32_ty, @@ -2220,7 +2452,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx2_permps : GCCBuiltin<"__builtin_ia32_permvarsf256">, - Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx2_vperm2i128 : GCCBuiltin<"__builtin_ia32_permti256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, @@ -2231,20 +2463,124 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_vextractf32x4_512 : GCCBuiltin<"__builtin_ia32_extractf32x4_mask">, - Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i8_ty, - llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; + Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i32_ty, + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vextracti32x4_512 : GCCBuiltin<"__builtin_ia32_extracti32x4_mask">, - Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i8_ty, - llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vextractf32x4_256 : + GCCBuiltin<"__builtin_ia32_extractf32x4_256_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v8f32_ty, llvm_i32_ty, + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vextracti32x4_256 : + GCCBuiltin<"__builtin_ia32_extracti32x4_256_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i32_ty, llvm_i32_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vextractf64x2_256 : + GCCBuiltin<"__builtin_ia32_extractf64x2_256_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v4f64_ty, llvm_i32_ty, + llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vextracti64x2_256 : + GCCBuiltin<"__builtin_ia32_extracti64x2_256_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty, llvm_i32_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vextractf64x2_512 : + GCCBuiltin<"__builtin_ia32_extractf64x2_512_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v8f64_ty, llvm_i32_ty, + llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vextracti64x2_512 : + GCCBuiltin<"__builtin_ia32_extracti64x2_512_mask">, + Intrinsic<[llvm_v2i64_ty], [llvm_v8i64_ty, llvm_i32_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vextractf32x8_512 : + GCCBuiltin<"__builtin_ia32_extractf32x8_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v16f32_ty, llvm_i32_ty, + llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vextracti32x8_512 : + GCCBuiltin<"__builtin_ia32_extracti32x8_mask">, + Intrinsic<[llvm_v8i32_ty],[llvm_v16i32_ty, llvm_i32_ty, + llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vextractf64x4_512 : GCCBuiltin<"__builtin_ia32_extractf64x4_mask">, - Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i8_ty, - llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; + Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i32_ty, + llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vextracti64x4_512 : GCCBuiltin<"__builtin_ia32_extracti64x4_mask">, - Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i8_ty, - llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i32_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_insertf32x4_256 : + GCCBuiltin<"__builtin_ia32_insertf32x4_256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_insertf32x4_512 : + GCCBuiltin<"__builtin_ia32_insertf32x4_512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_insertf32x8_512 : + GCCBuiltin<"__builtin_ia32_insertf32x8_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_insertf64x2_256 : + GCCBuiltin<"__builtin_ia32_insertf64x2_256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_insertf64x2_512 : + GCCBuiltin<"__builtin_ia32_insertf64x2_512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_insertf64x4_512 : + GCCBuiltin<"__builtin_ia32_insertf64x4_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_inserti32x4_256 : + GCCBuiltin<"__builtin_ia32_inserti32x4_256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_inserti32x4_512 : + GCCBuiltin<"__builtin_ia32_inserti32x4_512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_inserti32x8_512 : + GCCBuiltin<"__builtin_ia32_inserti32x8_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_inserti64x2_256 : + GCCBuiltin<"__builtin_ia32_inserti64x2_256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_inserti64x2_512 : + GCCBuiltin<"__builtin_ia32_inserti64x2_512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_inserti64x4_512 : + GCCBuiltin<"__builtin_ia32_inserti64x4_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; } // Conditional load ops @@ -2354,6 +2690,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_psll_dq_512 : GCCBuiltin<"__builtin_ia32_pslldq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_psrl_dq_512 : GCCBuiltin<"__builtin_ia32_psrldq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty], + [IntrNoMem]>; } // Gather ops @@ -3544,6 +3886,43 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[], [llvm_ptr_ty], []>; } +//===----------------------------------------------------------------------===// +// XSAVE +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_xsave : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_xsave64 : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_xrstor : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_xrstor64 : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_xsaveopt : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_xsaveopt64 : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_xrstors : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_xrstors64 : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_xsavec : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_xsavec64 : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_xsaves : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_xsaves64 : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>; +} + +//===----------------------------------------------------------------------===// +// Support protection key +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_rdpkru : GCCBuiltin <"__builtin_ia32_rdpkru">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + def int_x86_wrpkru : GCCBuiltin<"__builtin_ia32_wrpkru">, + Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>; +} //===----------------------------------------------------------------------===// // Half float conversion @@ -3561,9 +3940,21 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty, llvm_v8f32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty, llvm_v4f32_ty, + llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph_mask">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty, + llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// @@ -3657,6 +4048,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">, Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_kunpck_wd : GCCBuiltin<"__builtin_ia32_kunpcksi">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_kunpck_dq : GCCBuiltin<"__builtin_ia32_kunpckdi">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">, Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; @@ -3671,10 +4068,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_avx512_cvtss2usi64 : GCCBuiltin<"__builtin_ia32_cvtss2usi64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_cvttss2usi">, - Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_cvttss2usi64">, - Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvttss2si64 : GCCBuiltin<"__builtin_ia32_vcvttss2si64">, + Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_vcvttss2usi32">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_vcvttss2usi64">, + Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss32">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; @@ -3686,10 +4087,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_avx512_cvtsd2usi64 : GCCBuiltin<"__builtin_ia32_cvtsd2usi64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_cvttsd2usi">, - Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_cvttsd2usi64">, - Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_avx512_cvttsd2si : GCCBuiltin<"__builtin_ia32_vcvttsd2si32">, + Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_vcvttsd2si64">, + Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_vcvttsd2usi32">, + Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvttsd2usi64">, + Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd32">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; @@ -3698,17 +4103,74 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtsi2ss32 : GCCBuiltin<"__builtin_ia32_cvtsi2ss32">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, + llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtsi2ss64 : GCCBuiltin<"__builtin_ia32_cvtsi2ss64">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, + llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtsi2sd32 : GCCBuiltin<"__builtin_ia32_cvtsi2sd32">, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, - llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, + llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, - llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, + llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_cvtb2mask_128 : GCCBuiltin<"__builtin_ia32_cvtb2mask128">, + Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_avx512_cvtb2mask_256 : GCCBuiltin<"__builtin_ia32_cvtb2mask256">, + Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; + def int_x86_avx512_cvtb2mask_512 : GCCBuiltin<"__builtin_ia32_cvtb2mask512">, + Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty], [IntrNoMem]>; + + def int_x86_avx512_cvtw2mask_128 : GCCBuiltin<"__builtin_ia32_cvtw2mask128">, + Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_x86_avx512_cvtw2mask_256 : GCCBuiltin<"__builtin_ia32_cvtw2mask256">, + Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; + def int_x86_avx512_cvtw2mask_512 : GCCBuiltin<"__builtin_ia32_cvtw2mask512">, + Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty], [IntrNoMem]>; + + def int_x86_avx512_cvtd2mask_128 : GCCBuiltin<"__builtin_ia32_cvtd2mask128">, + Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvtd2mask_256 : GCCBuiltin<"__builtin_ia32_cvtd2mask256">, + Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvtd2mask_512 : GCCBuiltin<"__builtin_ia32_cvtd2mask512">, + Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty], [IntrNoMem]>; + + def int_x86_avx512_cvtq2mask_128 : GCCBuiltin<"__builtin_ia32_cvtq2mask128">, + Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty], [IntrNoMem]>; + def int_x86_avx512_cvtq2mask_256 : GCCBuiltin<"__builtin_ia32_cvtq2mask256">, + Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty], [IntrNoMem]>; + def int_x86_avx512_cvtq2mask_512 : GCCBuiltin<"__builtin_ia32_cvtq2mask512">, + Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty], [IntrNoMem]>; + + def int_x86_avx512_cvtmask2b_128 : GCCBuiltin<"__builtin_ia32_cvtmask2b128">, + Intrinsic<[llvm_v16i8_ty], [llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_cvtmask2b_256 : GCCBuiltin<"__builtin_ia32_cvtmask2b256">, + Intrinsic<[llvm_v32i8_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_cvtmask2b_512 : GCCBuiltin<"__builtin_ia32_cvtmask2b512">, + Intrinsic<[llvm_v64i8_ty], [llvm_i64_ty], [IntrNoMem]>; + + def int_x86_avx512_cvtmask2w_128 : GCCBuiltin<"__builtin_ia32_cvtmask2w128">, + Intrinsic<[llvm_v8i16_ty], [llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_cvtmask2w_256 : GCCBuiltin<"__builtin_ia32_cvtmask2w256">, + Intrinsic<[llvm_v16i16_ty], [llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_cvtmask2w_512 : GCCBuiltin<"__builtin_ia32_cvtmask2w512">, + Intrinsic<[llvm_v32i16_ty], [llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_cvtmask2d_128 : GCCBuiltin<"__builtin_ia32_cvtmask2d128">, + Intrinsic<[llvm_v4i32_ty], [llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_cvtmask2d_256 : GCCBuiltin<"__builtin_ia32_cvtmask2d256">, + Intrinsic<[llvm_v8i32_ty], [llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_cvtmask2d_512 : GCCBuiltin<"__builtin_ia32_cvtmask2d512">, + Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_cvtmask2q_128 : GCCBuiltin<"__builtin_ia32_cvtmask2q128">, + Intrinsic<[llvm_v2i64_ty], [llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_cvtmask2q_256 : GCCBuiltin<"__builtin_ia32_cvtmask2q256">, + Intrinsic<[llvm_v4i64_ty], [llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_cvtmask2q_512 : GCCBuiltin<"__builtin_ia32_cvtmask2q512">, + Intrinsic<[llvm_v8i64_ty], [llvm_i8_ty], [IntrNoMem]>; + } // Pack ops. @@ -3751,53 +4213,761 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; } +// Unpack ops. +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx512_mask_unpckh_pd_128 : + GCCBuiltin<"__builtin_ia32_unpckhpd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckh_pd_256 : + GCCBuiltin<"__builtin_ia32_unpckhpd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckh_pd_512 : + GCCBuiltin<"__builtin_ia32_unpckhpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckh_ps_128 : + GCCBuiltin<"__builtin_ia32_unpckhps128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckh_ps_256 : + GCCBuiltin<"__builtin_ia32_unpckhps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckh_ps_512 : + GCCBuiltin<"__builtin_ia32_unpckhps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckl_pd_128 : + GCCBuiltin<"__builtin_ia32_unpcklpd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckl_pd_256 : + GCCBuiltin<"__builtin_ia32_unpcklpd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckl_pd_512 : + GCCBuiltin<"__builtin_ia32_unpcklpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckl_ps_128 : + GCCBuiltin<"__builtin_ia32_unpcklps128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckl_ps_256 : + GCCBuiltin<"__builtin_ia32_unpcklps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckl_ps_512 : + GCCBuiltin<"__builtin_ia32_unpcklps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhb_w_128 : + GCCBuiltin<"__builtin_ia32_punpckhbw128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhb_w_256 : + GCCBuiltin<"__builtin_ia32_punpckhbw256_mask">, + Intrinsic<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhb_w_512 : + GCCBuiltin<"__builtin_ia32_punpckhbw512_mask">, + Intrinsic<[llvm_v64i8_ty], + [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhd_q_128 : + GCCBuiltin<"__builtin_ia32_punpckhdq128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhd_q_256 : + GCCBuiltin<"__builtin_ia32_punpckhdq256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhd_q_512 : + GCCBuiltin<"__builtin_ia32_punpckhdq512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhqd_q_128 : + GCCBuiltin<"__builtin_ia32_punpckhqdq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhqd_q_256 : + GCCBuiltin<"__builtin_ia32_punpckhqdq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhqd_q_512 : + GCCBuiltin<"__builtin_ia32_punpckhqdq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhw_d_128 : + GCCBuiltin<"__builtin_ia32_punpckhwd128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhw_d_256 : + GCCBuiltin<"__builtin_ia32_punpckhwd256_mask">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhw_d_512 : + GCCBuiltin<"__builtin_ia32_punpckhwd512_mask">, + Intrinsic<[llvm_v32i16_ty], + [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklb_w_128 : + GCCBuiltin<"__builtin_ia32_punpcklbw128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklb_w_256 : + GCCBuiltin<"__builtin_ia32_punpcklbw256_mask">, + Intrinsic<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklb_w_512 : + GCCBuiltin<"__builtin_ia32_punpcklbw512_mask">, + Intrinsic<[llvm_v64i8_ty], + [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckld_q_128 : + GCCBuiltin<"__builtin_ia32_punpckldq128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckld_q_256 : + GCCBuiltin<"__builtin_ia32_punpckldq256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckld_q_512 : + GCCBuiltin<"__builtin_ia32_punpckldq512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklqd_q_128 : + GCCBuiltin<"__builtin_ia32_punpcklqdq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklqd_q_256 : + GCCBuiltin<"__builtin_ia32_punpcklqdq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklqd_q_512 : + GCCBuiltin<"__builtin_ia32_punpcklqdq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklw_d_128 : + GCCBuiltin<"__builtin_ia32_punpcklwd128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklw_d_256 : + GCCBuiltin<"__builtin_ia32_punpcklwd256_mask">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklw_d_512 : + GCCBuiltin<"__builtin_ia32_punpcklwd512_mask">, + Intrinsic<[llvm_v32i16_ty], + [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrNoMem]>; +} + // Vector convert let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_mask_cvttps2dq_512: GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, - llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvttps2udq_512: GCCBuiltin<"__builtin_ia32_cvttps2udq512_mask">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, - llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvttpd2dq_512: GCCBuiltin<"__builtin_ia32_cvttpd2dq512_mask">, - Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, - llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvttpd2udq_512: GCCBuiltin<"__builtin_ia32_cvttpd2udq512_mask">, - Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, - llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_rndscale_ps_512: GCCBuiltin<"__builtin_ia32_rndscaleps_mask">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, - llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_rndscale_pd_512: GCCBuiltin<"__builtin_ia32_rndscalepd_mask">, + def int_x86_avx512_mask_cvtdq2pd_128 : + GCCBuiltin<"__builtin_ia32_cvtdq2pd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtdq2pd_256 : + GCCBuiltin<"__builtin_ia32_cvtdq2pd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtdq2pd_512 : + GCCBuiltin<"__builtin_ia32_cvtdq2pd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8i32_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtdq2ps_128 : + GCCBuiltin<"__builtin_ia32_cvtdq2ps128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtdq2ps_256 : + GCCBuiltin<"__builtin_ia32_cvtdq2ps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtdq2ps_512 : + GCCBuiltin<"__builtin_ia32_cvtdq2ps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtpd2dq_128 : + GCCBuiltin<"__builtin_ia32_cvtpd2dq128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtpd2dq_256 : + GCCBuiltin<"__builtin_ia32_cvtpd2dq256_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtpd2dq_512 : + GCCBuiltin<"__builtin_ia32_cvtpd2dq512_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtpd2ps_256 : + GCCBuiltin<"__builtin_ia32_cvtpd2ps256_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f64_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtpd2ps_512 : + GCCBuiltin<"__builtin_ia32_cvtpd2ps512_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtsd2ss_round : + GCCBuiltin<"__builtin_ia32_cvtsd2ss_round">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtss2sd_round : + GCCBuiltin<"__builtin_ia32_cvtss2sd_round">, + Intrinsic<[llvm_v2f64_ty], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtpd2ps : + GCCBuiltin<"__builtin_ia32_cvtpd2ps_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v2f64_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtpd2qq_128 : + GCCBuiltin<"__builtin_ia32_cvtpd2qq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtpd2qq_256 : + GCCBuiltin<"__builtin_ia32_cvtpd2qq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtpd2qq_512 : + GCCBuiltin<"__builtin_ia32_cvtpd2qq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtpd2udq_128 : + GCCBuiltin<"__builtin_ia32_cvtpd2udq128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtpd2udq_256 : + GCCBuiltin<"__builtin_ia32_cvtpd2udq256_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtpd2udq_512 : + GCCBuiltin<"__builtin_ia32_cvtpd2udq512_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtpd2uqq_128 : + GCCBuiltin<"__builtin_ia32_cvtpd2uqq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtpd2uqq_256 : + GCCBuiltin<"__builtin_ia32_cvtpd2uqq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtpd2uqq_512 : + GCCBuiltin<"__builtin_ia32_cvtpd2uqq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtps2dq_128 : + GCCBuiltin<"__builtin_ia32_cvtps2dq128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtps2dq_256 : + GCCBuiltin<"__builtin_ia32_cvtps2dq256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtps2dq_512 : + GCCBuiltin<"__builtin_ia32_cvtps2dq512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtps2pd_128 : + GCCBuiltin<"__builtin_ia32_cvtps2pd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v4f32_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtps2pd_256 : + GCCBuiltin<"__builtin_ia32_cvtps2pd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f32_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtps2pd_512 : + GCCBuiltin<"__builtin_ia32_cvtps2pd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtps2qq_128 : + GCCBuiltin<"__builtin_ia32_cvtps2qq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtps2qq_256 : + GCCBuiltin<"__builtin_ia32_cvtps2qq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtps2qq_512 : + GCCBuiltin<"__builtin_ia32_cvtps2qq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtps2udq_128 : + GCCBuiltin<"__builtin_ia32_cvtps2udq128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtps2udq_256 : + GCCBuiltin<"__builtin_ia32_cvtps2udq256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtps2udq_512 : + GCCBuiltin<"__builtin_ia32_cvtps2udq512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtps2uqq_128 : + GCCBuiltin<"__builtin_ia32_cvtps2uqq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtps2uqq_256 : + GCCBuiltin<"__builtin_ia32_cvtps2uqq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtps2uqq_512 : + GCCBuiltin<"__builtin_ia32_cvtps2uqq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtqq2pd_128 : + GCCBuiltin<"__builtin_ia32_cvtqq2pd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtqq2pd_256 : + GCCBuiltin<"__builtin_ia32_cvtqq2pd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtqq2pd_512 : + GCCBuiltin<"__builtin_ia32_cvtqq2pd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtqq2ps_128 : + GCCBuiltin<"__builtin_ia32_cvtqq2ps128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtqq2ps_256 : + GCCBuiltin<"__builtin_ia32_cvtqq2ps256_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtqq2ps_512 : + GCCBuiltin<"__builtin_ia32_cvtqq2ps512_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8i64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttpd2dq_128 : + GCCBuiltin<"__builtin_ia32_cvttpd2dq128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttpd2dq_256 : + GCCBuiltin<"__builtin_ia32_cvttpd2dq256_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttpd2dq_512 : + GCCBuiltin<"__builtin_ia32_cvttpd2dq512_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttpd2qq_128 : + GCCBuiltin<"__builtin_ia32_cvttpd2qq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttpd2qq_256 : + GCCBuiltin<"__builtin_ia32_cvttpd2qq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttpd2qq_512 : + GCCBuiltin<"__builtin_ia32_cvttpd2qq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttpd2udq_128 : + GCCBuiltin<"__builtin_ia32_cvttpd2udq128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v2f64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttpd2udq_256 : + GCCBuiltin<"__builtin_ia32_cvttpd2udq256_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4f64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttpd2udq_512 : + GCCBuiltin<"__builtin_ia32_cvttpd2udq512_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttpd2uqq_128 : + GCCBuiltin<"__builtin_ia32_cvttpd2uqq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttpd2uqq_256 : + GCCBuiltin<"__builtin_ia32_cvttpd2uqq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttpd2uqq_512 : + GCCBuiltin<"__builtin_ia32_cvttpd2uqq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttps2dq_128 : + GCCBuiltin<"__builtin_ia32_cvttps2dq128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttps2dq_256 : + GCCBuiltin<"__builtin_ia32_cvttps2dq256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttps2dq_512 : + GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttps2qq_128 : + GCCBuiltin<"__builtin_ia32_cvttps2qq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttps2qq_256 : + GCCBuiltin<"__builtin_ia32_cvttps2qq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttps2qq_512 : + GCCBuiltin<"__builtin_ia32_cvttps2qq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttps2udq_128 : + GCCBuiltin<"__builtin_ia32_cvttps2udq128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttps2udq_256 : + GCCBuiltin<"__builtin_ia32_cvttps2udq256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttps2udq_512 : + GCCBuiltin<"__builtin_ia32_cvttps2udq512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttps2uqq_128 : + GCCBuiltin<"__builtin_ia32_cvttps2uqq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v4f32_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttps2uqq_256 : + GCCBuiltin<"__builtin_ia32_cvttps2uqq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4f32_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvttps2uqq_512 : + GCCBuiltin<"__builtin_ia32_cvttps2uqq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtudq2pd_128 : + GCCBuiltin<"__builtin_ia32_cvtudq2pd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtudq2pd_256 : + GCCBuiltin<"__builtin_ia32_cvtudq2pd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtudq2pd_512 : + GCCBuiltin<"__builtin_ia32_cvtudq2pd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8i32_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtudq2ps_128 : + GCCBuiltin<"__builtin_ia32_cvtudq2ps128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtudq2ps_256 : + GCCBuiltin<"__builtin_ia32_cvtudq2ps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtudq2ps_512 : + GCCBuiltin<"__builtin_ia32_cvtudq2ps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtuqq2pd_128 : + GCCBuiltin<"__builtin_ia32_cvtuqq2pd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtuqq2pd_256 : + GCCBuiltin<"__builtin_ia32_cvtuqq2pd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtuqq2pd_512 : + GCCBuiltin<"__builtin_ia32_cvtuqq2pd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtuqq2ps_128 : + GCCBuiltin<"__builtin_ia32_cvtuqq2ps128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtuqq2ps_256 : + GCCBuiltin<"__builtin_ia32_cvtuqq2ps256_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_cvtuqq2ps_512 : + GCCBuiltin<"__builtin_ia32_cvtuqq2ps512_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8i64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_rndscale_pd_128 : GCCBuiltin<"__builtin_ia32_rndscalepd_128_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty, + llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_rndscale_pd_256 : GCCBuiltin<"__builtin_ia32_rndscalepd_256_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty, + llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_rndscale_pd_512 : GCCBuiltin<"__builtin_ia32_rndscalepd_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtps2dq_512: GCCBuiltin<"__builtin_ia32_cvtps2dq512_mask">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, + def int_x86_avx512_mask_rndscale_ps_128 : GCCBuiltin<"__builtin_ia32_rndscaleps_128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_rndscale_ps_256 : GCCBuiltin<"__builtin_ia32_rndscaleps_256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty, + llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_rndscale_ps_512 : GCCBuiltin<"__builtin_ia32_rndscaleps_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtpd2dq_512: GCCBuiltin<"__builtin_ia32_cvtpd2dq512_mask">, - Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, - llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtps2udq_512: GCCBuiltin<"__builtin_ia32_cvtps2udq512_mask">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, + def int_x86_avx512_mask_reduce_pd_128 : GCCBuiltin<"__builtin_ia32_reducepd128_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty, + llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_reduce_pd_256 : GCCBuiltin<"__builtin_ia32_reducepd256_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty, + llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_reduce_pd_512 : GCCBuiltin<"__builtin_ia32_reducepd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_reduce_ps_128 : GCCBuiltin<"__builtin_ia32_reduceps128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_reduce_ps_256 : GCCBuiltin<"__builtin_ia32_reduceps256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty, + llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_reduce_ps_512 : GCCBuiltin<"__builtin_ia32_reduceps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtpd2udq_512: GCCBuiltin<"__builtin_ia32_cvtpd2udq512_mask">, - Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, - llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtdq2ps_512 : GCCBuiltin<"__builtin_ia32_cvtdq2ps512_mask">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_v16f32_ty, - llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtdq2pd_512 : GCCBuiltin<"__builtin_ia32_cvtdq2pd512_mask">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_v8f64_ty, - llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtudq2ps_512 : GCCBuiltin<"__builtin_ia32_cvtudq2ps512_mask">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_v16f32_ty, - llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtudq2pd_512 : GCCBuiltin<"__builtin_ia32_cvtudq2pd512_mask">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_v8f64_ty, - llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_cvtpd2ps_512 : GCCBuiltin<"__builtin_ia32_cvtpd2ps512_mask">, - Intrinsic<[llvm_v8f32_ty], [llvm_v8f64_ty, llvm_v8f32_ty, - llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_x86_avx512_mask_range_pd_128 : GCCBuiltin<"__builtin_ia32_rangepd128_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, + llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; +def int_x86_avx512_mask_range_pd_256 : GCCBuiltin<"__builtin_ia32_rangepd256_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, + llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; +def int_x86_avx512_mask_range_pd_512 : GCCBuiltin<"__builtin_ia32_rangepd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, + llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_x86_avx512_mask_range_ps_128 : GCCBuiltin<"__builtin_ia32_rangeps128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; +def int_x86_avx512_mask_range_ps_256 : GCCBuiltin<"__builtin_ia32_rangeps256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, + llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; +def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, + llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; } // Vector load with broadcast @@ -3805,28 +4975,183 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_vbroadcast_ss_512 : GCCBuiltin<"__builtin_ia32_vbroadcastss512">, Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>; - def int_x86_avx512_vbroadcast_ss_ps_512 : - GCCBuiltin<"__builtin_ia32_vbroadcastss_ps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_broadcast_ss_ps_512 : + GCCBuiltin<"__builtin_ia32_broadcastss512">, + Intrinsic<[llvm_v16f32_ty], [llvm_v4f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_broadcast_ss_ps_256 : + GCCBuiltin<"__builtin_ia32_broadcastss256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_broadcast_ss_ps_128 : + GCCBuiltin<"__builtin_ia32_broadcastss128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_vbroadcast_sd_512 : GCCBuiltin<"__builtin_ia32_vbroadcastsd512">, Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>; - def int_x86_avx512_vbroadcast_sd_pd_512 : - GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_avx512_mask_broadcast_sd_pd_512 : + GCCBuiltin<"__builtin_ia32_broadcastsd512">, + Intrinsic<[llvm_v8f64_ty], [llvm_v2f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_broadcast_sd_pd_256 : + GCCBuiltin<"__builtin_ia32_broadcastsd256_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastb_128 : + GCCBuiltin<"__builtin_ia32_pbroadcastb128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastb_256 : + GCCBuiltin<"__builtin_ia32_pbroadcastb256_mask">, + Intrinsic<[llvm_v32i8_ty], + [llvm_v16i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastb_512 : + GCCBuiltin<"__builtin_ia32_pbroadcastb512_mask">, + Intrinsic<[llvm_v64i8_ty], + [llvm_v16i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastw_128 : + GCCBuiltin<"__builtin_ia32_pbroadcastw128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastw_256 : + GCCBuiltin<"__builtin_ia32_pbroadcastw256_mask">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastw_512 : + GCCBuiltin<"__builtin_ia32_pbroadcastw512_mask">, + Intrinsic<[llvm_v32i16_ty], + [llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastd_128 : + GCCBuiltin<"__builtin_ia32_pbroadcastd128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastd_256 : + GCCBuiltin<"__builtin_ia32_pbroadcastd256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_pbroadcastd_512 : - GCCBuiltin<"__builtin_ia32_pbroadcastd512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; + GCCBuiltin<"__builtin_ia32_pbroadcastd512">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastq_128 : + GCCBuiltin<"__builtin_ia32_pbroadcastq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastq_256 : + GCCBuiltin<"__builtin_ia32_pbroadcastq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastq_512 : + GCCBuiltin<"__builtin_ia32_pbroadcastq512">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcastf32x2_256 : + GCCBuiltin<"__builtin_ia32_broadcastf32x2_256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v4f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcastf32x2_512 : + GCCBuiltin<"__builtin_ia32_broadcastf32x2_512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v4f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcasti32x2_128 : + GCCBuiltin<"__builtin_ia32_broadcasti32x2_128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcasti32x2_256 : + GCCBuiltin<"__builtin_ia32_broadcasti32x2_256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcasti32x2_512 : + GCCBuiltin<"__builtin_ia32_broadcasti32x2_512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcastf32x4_256 : + GCCBuiltin<"__builtin_ia32_broadcastf32x4_256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v4f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcastf32x4_512 : + GCCBuiltin<"__builtin_ia32_broadcastf32x4_512">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v4f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcastf32x8_512 : + GCCBuiltin<"__builtin_ia32_broadcastf32x8_512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v8f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcastf64x2_256 : + GCCBuiltin<"__builtin_ia32_broadcastf64x2_256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v2f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcastf64x2_512 : + GCCBuiltin<"__builtin_ia32_broadcastf64x2_512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v2f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcastf64x4_512 : + GCCBuiltin<"__builtin_ia32_broadcastf64x4_512">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v4f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcasti32x4_256 : + GCCBuiltin<"__builtin_ia32_broadcasti32x4_256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcasti32x4_512 : + GCCBuiltin<"__builtin_ia32_broadcasti32x4_512">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcasti32x8_512 : + GCCBuiltin<"__builtin_ia32_broadcasti32x8_512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v8i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcasti64x2_256 : + GCCBuiltin<"__builtin_ia32_broadcasti64x2_256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcasti64x2_512 : + GCCBuiltin<"__builtin_ia32_broadcasti64x2_512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcasti64x4_512 : + GCCBuiltin<"__builtin_ia32_broadcasti64x4_512">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v4i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastd_i32_512 : Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_pbroadcastq_512 : - GCCBuiltin<"__builtin_ia32_pbroadcastq512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx512_pbroadcastq_i64_512 : Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_broadcastmw_512 : + GCCBuiltin<"__builtin_ia32_broadcastmw512">, + Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_broadcastmw_256 : + GCCBuiltin<"__builtin_ia32_broadcastmw256">, + Intrinsic<[llvm_v8i32_ty], [llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_broadcastmw_128 : + GCCBuiltin<"__builtin_ia32_broadcastmw128">, + Intrinsic<[llvm_v4i32_ty], [llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_broadcastmb_512 : + GCCBuiltin<"__builtin_ia32_broadcastmb512">, + Intrinsic<[llvm_v8i64_ty], [llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_broadcastmb_256 : + GCCBuiltin<"__builtin_ia32_broadcastmb256">, + Intrinsic<[llvm_v4i64_ty], [llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_broadcastmb_128 : + GCCBuiltin<"__builtin_ia32_broadcastmb128">, + Intrinsic<[llvm_v2i64_ty], [llvm_i8_ty], [IntrNoMem]>; } // Vector sign and zero extend @@ -4071,15 +5396,36 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_mask">, + def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_mask">, + def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - + def int_x86_avx512_mask_range_ss : GCCBuiltin<"__builtin_ia32_rangess128_round">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_range_sd : GCCBuiltin<"__builtin_ia32_rangesd128_round">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_reduce_ss : GCCBuiltin<"__builtin_ia32_reducess">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_reduce_sd : GCCBuiltin<"__builtin_ia32_reducesd">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_scalef_sd : GCCBuiltin<"__builtin_ia32_scalefsd_round">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_scalef_ss : GCCBuiltin<"__builtin_ia32_scalefss_round">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_scalef_pd_128 : GCCBuiltin<"__builtin_ia32_scalefpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; @@ -4099,12 +5445,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtrndss">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], - [IntrNoMem]>; - def int_x86_avx512_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtrndsd">, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], - [IntrNoMem]>; + def int_x86_avx512_mask_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss_round">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd_round">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_sqrt_pd_128 : GCCBuiltin<"__builtin_ia32_sqrtpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, @@ -4143,29 +5489,108 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">, + def int_x86_avx512_mask_getexp_ss : GCCBuiltin<"__builtin_ia32_getexpss_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_getexp_sd : GCCBuiltin<"__builtin_ia32_getexpsd_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_getmant_pd_128 : + GCCBuiltin<"__builtin_ia32_getmantpd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty,llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_getmant_pd_256 : + GCCBuiltin<"__builtin_ia32_getmantpd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty,llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_getmant_pd_512 : + GCCBuiltin<"__builtin_ia32_getmantpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty,llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty,llvm_i32_ty ], + [IntrNoMem]>; + + def int_x86_avx512_mask_getmant_ps_128 : + GCCBuiltin<"__builtin_ia32_getmantps128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_getmant_ps_256 : + GCCBuiltin<"__builtin_ia32_getmantps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_getmant_ps_512 : + GCCBuiltin<"__builtin_ia32_getmantps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty,llvm_i32_ty, llvm_v16f32_ty,llvm_i16_ty,llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_getmant_ss : + GCCBuiltin<"__builtin_ia32_getmantss_round">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_getmant_sd : + GCCBuiltin<"__builtin_ia32_getmantsd_round">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd_mask">, + def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_rsqrt14_pd_128 : GCCBuiltin<"__builtin_ia32_rsqrt14pd128_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_rsqrt14_pd_256 : GCCBuiltin<"__builtin_ia32_rsqrt14pd256_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, + llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt14pd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_rsqrt14_ps_128 : GCCBuiltin<"__builtin_ia32_rsqrt14ps128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_rsqrt14_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrt14ps256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss_mask">, + def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd_mask">, + def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_rcp14_pd_128 : GCCBuiltin<"__builtin_ia32_rcp14pd128_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_rcp14_pd_256 : GCCBuiltin<"__builtin_ia32_rcp14pd256_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, + llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_rcp14pd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_rcp14_ps_128 : GCCBuiltin<"__builtin_ia32_rcp14ps128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_rcp14_ps_256 : GCCBuiltin<"__builtin_ia32_rcp14ps256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_rcp14ps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; @@ -4183,11 +5608,11 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_mask">, + def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_mask">, + def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; @@ -4199,14 +5624,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_mask">, + def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_mask">, + def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_x86_avx512_psad_bw_512 : GCCBuiltin<"__builtin_ia32_psadbw512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty], + [IntrNoMem]>; } // FP logical ops let TargetPrefix = "x86" in { @@ -4511,6 +5939,54 @@ let TargetPrefix = "x86" in { def int_x86_avx512_mask_pavg_w_256 : GCCBuiltin<"__builtin_ia32_pavgw256_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_mask_pmaddw_d_128 : + GCCBuiltin<"__builtin_ia32_pmaddwd128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmaddw_d_256 : + GCCBuiltin<"__builtin_ia32_pmaddwd256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmaddw_d_512 : + GCCBuiltin<"__builtin_ia32_pmaddwd512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmaddubs_w_128 : + GCCBuiltin<"__builtin_ia32_pmaddubsw128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmaddubs_w_256 : + GCCBuiltin<"__builtin_ia32_pmaddubsw256_mask">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmaddubs_w_512 : + GCCBuiltin<"__builtin_ia32_pmaddubsw512_mask">, + Intrinsic<[llvm_v32i16_ty], + [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_dbpsadbw_128 : + GCCBuiltin<"__builtin_ia32_dbpsadbw128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_v8i16_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_dbpsadbw_256 : + GCCBuiltin<"__builtin_ia32_dbpsadbw256_mask">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty, llvm_v16i16_ty, + llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_dbpsadbw_512 : + GCCBuiltin<"__builtin_ia32_dbpsadbw512_mask">, + Intrinsic<[llvm_v32i16_ty], + [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty, llvm_v32i16_ty, + llvm_i32_ty], [IntrNoMem]>; } // Gather and Scatter ops @@ -4807,27 +6283,71 @@ let TargetPrefix = "x86" in { llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; } -// AVX-512 conflict detection +// AVX-512 conflict detection instruction +// Instructions that count the number of leading zero bits let TargetPrefix = "x86" in { + def int_x86_avx512_mask_conflict_d_128 : + GCCBuiltin<"__builtin_ia32_vpconflictsi_128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_conflict_d_256 : + GCCBuiltin<"__builtin_ia32_vpconflictsi_256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_conflict_d_512 : GCCBuiltin<"__builtin_ia32_vpconflictsi_512_mask">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, - llvm_v16i32_ty, llvm_i16_ty], + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_conflict_q_128 : + GCCBuiltin<"__builtin_ia32_vpconflictdi_128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_conflict_q_256 : + GCCBuiltin<"__builtin_ia32_vpconflictdi_256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_conflict_q_512 : GCCBuiltin<"__builtin_ia32_vpconflictdi_512_mask">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, - llvm_v8i64_ty, llvm_i8_ty], + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_lzcnt_d_128 : + GCCBuiltin<"__builtin_ia32_vplzcntd_128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_lzcnt_d_256 : + GCCBuiltin<"__builtin_ia32_vplzcntd_256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_lzcnt_d_512 : GCCBuiltin<"__builtin_ia32_vplzcntd_512_mask">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, - llvm_v16i32_ty, llvm_i16_ty], + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_lzcnt_q_128 : + GCCBuiltin<"__builtin_ia32_vplzcntq_128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_lzcnt_q_256 : + GCCBuiltin<"__builtin_ia32_vplzcntq_256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_lzcnt_q_512 : GCCBuiltin<"__builtin_ia32_vplzcntq_512_mask">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, - llvm_v8i64_ty, llvm_i8_ty], + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; } @@ -4911,20 +6431,70 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". } let TargetPrefix = "x86" in { - def int_x86_avx512_mask_valign_q_512 : GCCBuiltin<"__builtin_ia32_alignq512_mask">, + def int_x86_avx512_mask_valign_q_512 : + GCCBuiltin<"__builtin_ia32_alignq512_mask">, Intrinsic<[llvm_v8i64_ty], - [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrNoMem]>; + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty, + llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_valign_d_512 : GCCBuiltin<"__builtin_ia32_alignd512_mask">, + def int_x86_avx512_mask_valign_d_512 : + GCCBuiltin<"__builtin_ia32_alignd512_mask">, Intrinsic<[llvm_v16i32_ty], - [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrNoMem]>; + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty, + llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_valign_q_256 : + GCCBuiltin<"__builtin_ia32_alignq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_valign_d_256 : + GCCBuiltin<"__builtin_ia32_alignd256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_valign_q_128 : + GCCBuiltin<"__builtin_ia32_alignq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_valign_d_128 : + GCCBuiltin<"__builtin_ia32_alignd128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_palignr_128 : + GCCBuiltin<"__builtin_ia32_palignr128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, + llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_palignr_256 : + GCCBuiltin<"__builtin_ia32_palignr256_mask">, + Intrinsic<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty, llvm_v32i8_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_palignr_512 : + GCCBuiltin<"__builtin_ia32_palignr512_mask">, + Intrinsic<[llvm_v64i8_ty], + [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty, llvm_v64i8_ty, + llvm_i64_ty], [IntrNoMem]>; } // Compares let TargetPrefix = "x86" in { // 512-bit + def int_x86_avx512_vcomi_sd : GCCBuiltin<"__builtin_ia32_vcomisd">, + Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_vcomi_ss : GCCBuiltin<"__builtin_ia32_vcomiss">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_pcmpeq_b_512 : GCCBuiltin<"__builtin_ia32_pcmpeqb512_mask">, Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; @@ -5288,6 +6858,626 @@ let TargetPrefix = "x86" in { llvm_i8_ty], [IntrReadArgMem]>; } + +// truncate +let TargetPrefix = "x86" in { + def int_x86_avx512_mask_pmov_qb_128 : + GCCBuiltin<"__builtin_ia32_pmovqb128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_qb_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovqb128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_qb_128 : + GCCBuiltin<"__builtin_ia32_pmovsqb128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_qb_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovsqb128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_qb_128 : + GCCBuiltin<"__builtin_ia32_pmovusqb128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_qb_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovusqb128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_qb_256 : + GCCBuiltin<"__builtin_ia32_pmovqb256_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_qb_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovqb256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_qb_256 : + GCCBuiltin<"__builtin_ia32_pmovsqb256_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_qb_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovsqb256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_qb_256 : + GCCBuiltin<"__builtin_ia32_pmovusqb256_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_qb_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovusqb256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_qb_512 : + GCCBuiltin<"__builtin_ia32_pmovqb512_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_qb_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovqb512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_qb_512 : + GCCBuiltin<"__builtin_ia32_pmovsqb512_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_qb_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovsqb512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_qb_512 : + GCCBuiltin<"__builtin_ia32_pmovusqb512_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_qb_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovusqb512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_qw_128 : + GCCBuiltin<"__builtin_ia32_pmovqw128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_qw_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovqw128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_qw_128 : + GCCBuiltin<"__builtin_ia32_pmovsqw128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_qw_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovsqw128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_qw_128 : + GCCBuiltin<"__builtin_ia32_pmovusqw128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_qw_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovusqw128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_qw_256 : + GCCBuiltin<"__builtin_ia32_pmovqw256_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_qw_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovqw256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_qw_256 : + GCCBuiltin<"__builtin_ia32_pmovsqw256_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_qw_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovsqw256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_qw_256 : + GCCBuiltin<"__builtin_ia32_pmovusqw256_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_qw_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovusqw256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_qw_512 : + GCCBuiltin<"__builtin_ia32_pmovqw512_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_qw_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovqw512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_qw_512 : + GCCBuiltin<"__builtin_ia32_pmovsqw512_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_qw_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovsqw512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_qw_512 : + GCCBuiltin<"__builtin_ia32_pmovusqw512_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_qw_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovusqw512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_qd_128 : + GCCBuiltin<"__builtin_ia32_pmovqd128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_qd_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovqd128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_qd_128 : + GCCBuiltin<"__builtin_ia32_pmovsqd128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_qd_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovsqd128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_qd_128 : + GCCBuiltin<"__builtin_ia32_pmovusqd128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_qd_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovusqd128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_qd_256 : + GCCBuiltin<"__builtin_ia32_pmovqd256_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_qd_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovqd256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_qd_256 : + GCCBuiltin<"__builtin_ia32_pmovsqd256_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_qd_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovsqd256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_qd_256 : + GCCBuiltin<"__builtin_ia32_pmovusqd256_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_qd_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovusqd256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_qd_512 : + GCCBuiltin<"__builtin_ia32_pmovqd512_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_qd_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovqd512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_qd_512 : + GCCBuiltin<"__builtin_ia32_pmovsqd512_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_qd_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovsqd512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_qd_512 : + GCCBuiltin<"__builtin_ia32_pmovusqd512_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_qd_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovusqd512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_db_128 : + GCCBuiltin<"__builtin_ia32_pmovdb128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_db_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovdb128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_db_128 : + GCCBuiltin<"__builtin_ia32_pmovsdb128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_db_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovsdb128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_db_128 : + GCCBuiltin<"__builtin_ia32_pmovusdb128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_db_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovusdb128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_db_256 : + GCCBuiltin<"__builtin_ia32_pmovdb256_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_db_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovdb256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_db_256 : + GCCBuiltin<"__builtin_ia32_pmovsdb256_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_db_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovsdb256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_db_256 : + GCCBuiltin<"__builtin_ia32_pmovusdb256_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_db_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovusdb256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_db_512 : + GCCBuiltin<"__builtin_ia32_pmovdb512_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_db_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovdb512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_db_512 : + GCCBuiltin<"__builtin_ia32_pmovsdb512_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_db_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovsdb512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_db_512 : + GCCBuiltin<"__builtin_ia32_pmovusdb512_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_db_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovusdb512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_dw_128 : + GCCBuiltin<"__builtin_ia32_pmovdw128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_dw_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovdw128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_dw_128 : + GCCBuiltin<"__builtin_ia32_pmovsdw128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_dw_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovsdw128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_dw_128 : + GCCBuiltin<"__builtin_ia32_pmovusdw128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_dw_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovusdw128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_dw_256 : + GCCBuiltin<"__builtin_ia32_pmovdw256_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_dw_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovdw256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_dw_256 : + GCCBuiltin<"__builtin_ia32_pmovsdw256_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_dw_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovsdw256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_dw_256 : + GCCBuiltin<"__builtin_ia32_pmovusdw256_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_dw_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovusdw256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_dw_512 : + GCCBuiltin<"__builtin_ia32_pmovdw512_mask">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_dw_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovdw512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_dw_512 : + GCCBuiltin<"__builtin_ia32_pmovsdw512_mask">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_dw_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovsdw512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_dw_512 : + GCCBuiltin<"__builtin_ia32_pmovusdw512_mask">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_dw_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovusdw512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_wb_128 : + GCCBuiltin<"__builtin_ia32_pmovwb128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_wb_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovwb128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_wb_128 : + GCCBuiltin<"__builtin_ia32_pmovswb128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_wb_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovswb128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_wb_128 : + GCCBuiltin<"__builtin_ia32_pmovuswb128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_wb_mem_128 : + GCCBuiltin<"__builtin_ia32_pmovuswb128mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_wb_256 : + GCCBuiltin<"__builtin_ia32_pmovwb256_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_wb_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovwb256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_wb_256 : + GCCBuiltin<"__builtin_ia32_pmovswb256_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_wb_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovswb256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_wb_256 : + GCCBuiltin<"__builtin_ia32_pmovuswb256_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_wb_mem_256 : + GCCBuiltin<"__builtin_ia32_pmovuswb256mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmov_wb_512 : + GCCBuiltin<"__builtin_ia32_pmovwb512_mask">, + Intrinsic<[llvm_v32i8_ty], + [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmov_wb_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovwb512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovs_wb_512 : + GCCBuiltin<"__builtin_ia32_pmovswb512_mask">, + Intrinsic<[llvm_v32i8_ty], + [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovs_wb_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovswb512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + def int_x86_avx512_mask_pmovus_wb_512 : + GCCBuiltin<"__builtin_ia32_pmovuswb512_mask">, + Intrinsic<[llvm_v32i8_ty], + [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pmovus_wb_mem_512 : + GCCBuiltin<"__builtin_ia32_pmovuswb512mem_mask">, + Intrinsic<[], + [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; +} + +// Bitwise ternary logic +let TargetPrefix = "x86" in { + def int_x86_avx512_mask_pternlog_d_128 : + GCCBuiltin<"__builtin_ia32_pternlogd128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_maskz_pternlog_d_128 : + GCCBuiltin<"__builtin_ia32_pternlogd128_maskz">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_pternlog_d_256 : + GCCBuiltin<"__builtin_ia32_pternlogd256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_maskz_pternlog_d_256 : + GCCBuiltin<"__builtin_ia32_pternlogd256_maskz">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_pternlog_d_512 : + GCCBuiltin<"__builtin_ia32_pternlogd512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, + llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_maskz_pternlog_d_512 : + GCCBuiltin<"__builtin_ia32_pternlogd512_maskz">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, + llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_pternlog_q_128 : + GCCBuiltin<"__builtin_ia32_pternlogq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_maskz_pternlog_q_128 : + GCCBuiltin<"__builtin_ia32_pternlogq128_maskz">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_pternlog_q_256 : + GCCBuiltin<"__builtin_ia32_pternlogq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_maskz_pternlog_q_256 : + GCCBuiltin<"__builtin_ia32_pternlogq256_maskz">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_pternlog_q_512 : + GCCBuiltin<"__builtin_ia32_pternlogq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_maskz_pternlog_q_512 : + GCCBuiltin<"__builtin_ia32_pternlogq512_maskz">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; +} + // Misc. let TargetPrefix = "x86" in { def int_x86_avx512_mask_cmp_ps_512 : @@ -5314,6 +7504,14 @@ let TargetPrefix = "x86" in { GCCBuiltin<"__builtin_ia32_cmppd128_mask">, Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_ss : + GCCBuiltin<"__builtin_ia32_cmpss_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_cmp_sd : + GCCBuiltin<"__builtin_ia32_cmpsd_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa512">, diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h index e6c22090ab6d..c546fc3d1ee0 100644 --- a/include/llvm/IR/LLVMContext.h +++ b/include/llvm/IR/LLVMContext.h @@ -15,7 +15,6 @@ #ifndef LLVM_IR_LLVMCONTEXT_H #define LLVM_IR_LLVMCONTEXT_H -#include "llvm-c/Core.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Options.h" @@ -60,7 +59,20 @@ public: MD_mem_parallel_loop_access = 10, // "llvm.mem.parallel_loop_access" MD_nonnull = 11, // "nonnull" MD_dereferenceable = 12, // "dereferenceable" - MD_dereferenceable_or_null = 13 // "dereferenceable_or_null" + MD_dereferenceable_or_null = 13, // "dereferenceable_or_null" + MD_make_implicit = 14, // "make.implicit" + MD_unpredictable = 15, // "unpredictable" + MD_invariant_group = 16, // "invariant.group" + MD_align = 17 // "align" + }; + + /// Known operand bundle tag IDs, which always have the same value. All + /// operand bundle tags that LLVM has special knowledge of are listed here. + /// Additionally, this scheme allows LLVM to efficiently check for specific + /// operand bundle tags without comparing strings. + enum { + OB_deopt = 0, // "deopt" + OB_funclet = 1, // "funclet" }; /// getMDKindID - Return a unique non-zero ID for the specified metadata kind. @@ -71,6 +83,15 @@ public: /// custom metadata IDs registered in this LLVMContext. void getMDKindNames(SmallVectorImpl &Result) const; + /// getOperandBundleTags - Populate client supplied SmallVector with the + /// bundle tags registered in this LLVMContext. The bundle tags are ordered + /// by increasing bundle IDs. + /// \see LLVMContext::getOperandBundleTagID + void getOperandBundleTags(SmallVectorImpl &Result) const; + + /// getOperandBundleTagID - Maps a bundle tag to an integer ID. Every bundle + /// tag registered with an LLVMContext has an unique ID. + uint32_t getOperandBundleTagID(StringRef Tag) const; typedef void (*InlineAsmDiagHandlerTy)(const SMDiagnostic&, void *Context, unsigned LocCookie); diff --git a/include/llvm/IR/LegacyPassManagers.h b/include/llvm/IR/LegacyPassManagers.h index 7f7889ad5fb3..b8e33478d6a9 100644 --- a/include/llvm/IR/LegacyPassManagers.h +++ b/include/llvm/IR/LegacyPassManagers.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Pass.h" @@ -118,6 +119,7 @@ class PassManagerPrettyStackEntry : public PrettyStackTraceEntry { Pass *P; Value *V; Module *M; + public: explicit PassManagerPrettyStackEntry(Pass *p) : P(p), V(nullptr), M(nullptr) {} // When P is releaseMemory'd. @@ -130,7 +132,6 @@ public: void print(raw_ostream &OS) const override; }; - //===----------------------------------------------------------------------===// // PMStack // @@ -158,7 +159,6 @@ private: std::vector S; }; - //===----------------------------------------------------------------------===// // PMTopLevelManager // @@ -204,10 +204,7 @@ public: virtual ~PMTopLevelManager(); /// Add immutable pass and initialize it. - inline void addImmutablePass(ImmutablePass *P) { - P->initializePass(); - ImmutablePasses.push_back(P); - } + void addImmutablePass(ImmutablePass *P); inline SmallVectorImpl& getImmutablePasses() { return ImmutablePasses; @@ -231,12 +228,10 @@ public: PMStack activeStack; protected: - /// Collection of pass managers SmallVector PassManagers; private: - /// Collection of pass managers that are not directly maintained /// by this pass manager SmallVector IndirectPassManagers; @@ -253,7 +248,46 @@ private: /// Immutable passes are managed by top level manager. SmallVector ImmutablePasses; - DenseMap AnUsageMap; + /// Map from ID to immutable passes. + SmallDenseMap ImmutablePassMap; + + + /// A wrapper around AnalysisUsage for the purpose of uniqueing. The wrapper + /// is used to avoid needing to make AnalysisUsage itself a folding set node. + struct AUFoldingSetNode : public FoldingSetNode { + AnalysisUsage AU; + AUFoldingSetNode(const AnalysisUsage &AU) : AU(AU) {} + void Profile(FoldingSetNodeID &ID) const { + Profile(ID, AU); + } + static void Profile(FoldingSetNodeID &ID, const AnalysisUsage &AU) { + // TODO: We could consider sorting the dependency arrays within the + // AnalysisUsage (since they are conceptually unordered). + ID.AddBoolean(AU.getPreservesAll()); + auto ProfileVec = [&](const SmallVectorImpl& Vec) { + ID.AddInteger(Vec.size()); + for(AnalysisID AID : Vec) + ID.AddPointer(AID); + }; + ProfileVec(AU.getRequiredSet()); + ProfileVec(AU.getRequiredTransitiveSet()); + ProfileVec(AU.getPreservedSet()); + ProfileVec(AU.getUsedSet()); + } + }; + + // Contains all of the unique combinations of AnalysisUsage. This is helpful + // when we have multiple instances of the same pass since they'll usually + // have the same analysis usage and can share storage. + FoldingSet UniqueAnalysisUsages; + + // Allocator used for allocating UAFoldingSetNodes. This handles deletion of + // all allocated nodes in one fell swoop. + SpecificBumpPtrAllocator AUFoldingSetNodeAllocator; + + // Maps from a pass to it's associated entry in UniqueAnalysisUsages. Does + // not own the storage associated with either key or value.. + DenseMap AnUsageMap; /// Collection of PassInfo objects found via analysis IDs and in this top /// level manager. This is used to memoize queries to the pass registry. @@ -262,8 +296,6 @@ private: mutable DenseMap AnalysisPassInfos; }; - - //===----------------------------------------------------------------------===// // PMDataManager @@ -271,7 +303,6 @@ private: /// used by pass managers. class PMDataManager { public: - explicit PMDataManager() : TPM(nullptr), Depth(0) { initializeAnalysisInfo(); } @@ -319,13 +350,12 @@ public: // passes that are managed by this manager. bool preserveHigherLevelAnalysis(Pass *P); - - /// Populate RequiredPasses with analysis pass that are required by - /// pass P and are available. Populate ReqPassNotAvailable with analysis - /// pass that are required by pass P but are not available. - void collectRequiredAnalysis(SmallVectorImpl &RequiredPasses, - SmallVectorImpl &ReqPassNotAvailable, - Pass *P); + /// Populate UsedPasses with analysis pass that are used or required by pass + /// P and are available. Populate ReqPassNotAvailable with analysis pass that + /// are required by pass P but are not available. + void collectRequiredAndUsedAnalyses( + SmallVectorImpl &UsedPasses, + SmallVectorImpl &ReqPassNotAvailable, Pass *P); /// All Required analyses should be available to the pass as it runs! Here /// we fill in the AnalysisImpls member of the pass so that it can @@ -351,6 +381,7 @@ public: enum PassDebuggingString S2, StringRef Msg); void dumpRequiredSet(const Pass *P) const; void dumpPreservedSet(const Pass *P) const; + void dumpUsedSet(const Pass *P) const; unsigned getNumContainedPasses() const { return (unsigned)PassVector.size(); @@ -374,7 +405,6 @@ public: } protected: - // Top level manager. PMTopLevelManager *TPM; @@ -439,9 +469,9 @@ public: /// doFinalization - Overrides ModulePass doFinalization for global /// finalization tasks - /// + /// using ModulePass::doFinalization; - + /// doFinalization - Run all of the finalizers for the function passes. /// bool doFinalization(Module &M) override; @@ -473,7 +503,6 @@ public: }; Timer *getPassTimer(Pass *); - } #endif diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h index ceb1c736e5c7..35341e3271ff 100644 --- a/include/llvm/IR/MDBuilder.h +++ b/include/llvm/IR/MDBuilder.h @@ -60,6 +60,9 @@ public: /// \brief Return metadata containing a number of branch weights. MDNode *createBranchWeights(ArrayRef Weights); + /// Return metadata specifying that a branch or switch is unpredictable. + MDNode *createUnpredictable(); + /// Return metadata containing the entry count for a function. MDNode *createFunctionEntryCount(uint64_t Count); diff --git a/include/llvm/IR/Mangler.h b/include/llvm/IR/Mangler.h index b72b259097c3..ea2f0c3f09f3 100644 --- a/include/llvm/IR/Mangler.h +++ b/include/llvm/IR/Mangler.h @@ -15,12 +15,12 @@ #define LLVM_IR_MANGLER_H #include "llvm/ADT/DenseMap.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/raw_ostream.h" namespace llvm { class DataLayout; -class GlobalValue; template class SmallVectorImpl; class Twine; diff --git a/include/llvm/IR/Metadata.def b/include/llvm/IR/Metadata.def index 857e4637d1e4..b1d22178e262 100644 --- a/include/llvm/IR/Metadata.def +++ b/include/llvm/IR/Metadata.def @@ -13,7 +13,8 @@ #if !(defined HANDLE_METADATA || defined HANDLE_METADATA_LEAF || \ defined HANDLE_METADATA_BRANCH || defined HANDLE_MDNODE_LEAF || \ - defined HANDLE_MDNODE_BRANCH || \ + defined HANDLE_MDNODE_LEAF_UNIQUABLE || defined HANDLE_MDNODE_BRANCH || \ + defined HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE || \ defined HANDLE_SPECIALIZED_MDNODE_LEAF || \ defined HANDLE_SPECIALIZED_MDNODE_BRANCH) #error "Missing macro definition of HANDLE_METADATA*" @@ -34,6 +35,24 @@ #define HANDLE_METADATA_BRANCH(CLASS) HANDLE_METADATA(CLASS) #endif +// Handler for specialized and uniquable leaf nodes under MDNode. Defers to +// HANDLE_MDNODE_LEAF_UNIQUABLE if it's defined, otherwise to +// HANDLE_SPECIALIZED_MDNODE_LEAF. +#ifndef HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE +#ifdef HANDLE_MDNODE_LEAF_UNIQUABLE +#define HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(CLASS) \ + HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) +#else +#define HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(CLASS) \ + HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) +#endif +#endif + +// Handler for leaf nodes under MDNode. +#ifndef HANDLE_MDNODE_LEAF_UNIQUABLE +#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) HANDLE_MDNODE_LEAF(CLASS) +#endif + // Handler for leaf nodes under MDNode. #ifndef HANDLE_MDNODE_LEAF #define HANDLE_MDNODE_LEAF(CLASS) HANDLE_METADATA_LEAF(CLASS) @@ -59,43 +78,46 @@ HANDLE_METADATA_BRANCH(ValueAsMetadata) HANDLE_METADATA_LEAF(ConstantAsMetadata) HANDLE_METADATA_LEAF(LocalAsMetadata) HANDLE_MDNODE_BRANCH(MDNode) -HANDLE_MDNODE_LEAF(MDTuple) -HANDLE_SPECIALIZED_MDNODE_LEAF(DILocation) -HANDLE_SPECIALIZED_MDNODE_LEAF(DIExpression) +HANDLE_MDNODE_LEAF_UNIQUABLE(MDTuple) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILocation) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIExpression) HANDLE_SPECIALIZED_MDNODE_BRANCH(DINode) -HANDLE_SPECIALIZED_MDNODE_LEAF(GenericDINode) -HANDLE_SPECIALIZED_MDNODE_LEAF(DISubrange) -HANDLE_SPECIALIZED_MDNODE_LEAF(DIEnumerator) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(GenericDINode) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DISubrange) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIEnumerator) HANDLE_SPECIALIZED_MDNODE_BRANCH(DIScope) HANDLE_SPECIALIZED_MDNODE_BRANCH(DIType) -HANDLE_SPECIALIZED_MDNODE_LEAF(DIBasicType) -HANDLE_SPECIALIZED_MDNODE_BRANCH(DIDerivedTypeBase) -HANDLE_SPECIALIZED_MDNODE_LEAF(DIDerivedType) -HANDLE_SPECIALIZED_MDNODE_BRANCH(DICompositeTypeBase) -HANDLE_SPECIALIZED_MDNODE_LEAF(DICompositeType) -HANDLE_SPECIALIZED_MDNODE_LEAF(DISubroutineType) -HANDLE_SPECIALIZED_MDNODE_LEAF(DIFile) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIBasicType) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIDerivedType) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DICompositeType) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DISubroutineType) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIFile) HANDLE_SPECIALIZED_MDNODE_LEAF(DICompileUnit) HANDLE_SPECIALIZED_MDNODE_BRANCH(DILocalScope) -HANDLE_SPECIALIZED_MDNODE_LEAF(DISubprogram) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DISubprogram) HANDLE_SPECIALIZED_MDNODE_BRANCH(DILexicalBlockBase) -HANDLE_SPECIALIZED_MDNODE_LEAF(DILexicalBlock) -HANDLE_SPECIALIZED_MDNODE_LEAF(DILexicalBlockFile) -HANDLE_SPECIALIZED_MDNODE_LEAF(DINamespace) -HANDLE_SPECIALIZED_MDNODE_LEAF(DIModule) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILexicalBlock) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILexicalBlockFile) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DINamespace) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIModule) HANDLE_SPECIALIZED_MDNODE_BRANCH(DITemplateParameter) -HANDLE_SPECIALIZED_MDNODE_LEAF(DITemplateTypeParameter) -HANDLE_SPECIALIZED_MDNODE_LEAF(DITemplateValueParameter) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DITemplateTypeParameter) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DITemplateValueParameter) HANDLE_SPECIALIZED_MDNODE_BRANCH(DIVariable) -HANDLE_SPECIALIZED_MDNODE_LEAF(DIGlobalVariable) -HANDLE_SPECIALIZED_MDNODE_LEAF(DILocalVariable) -HANDLE_SPECIALIZED_MDNODE_LEAF(DIObjCProperty) -HANDLE_SPECIALIZED_MDNODE_LEAF(DIImportedEntity) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIGlobalVariable) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILocalVariable) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIObjCProperty) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIImportedEntity) +HANDLE_SPECIALIZED_MDNODE_BRANCH(DIMacroNode) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacro) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacroFile) #undef HANDLE_METADATA #undef HANDLE_METADATA_LEAF #undef HANDLE_METADATA_BRANCH #undef HANDLE_MDNODE_LEAF +#undef HANDLE_MDNODE_LEAF_UNIQUABLE #undef HANDLE_MDNODE_BRANCH #undef HANDLE_SPECIALIZED_MDNODE_LEAF +#undef HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE #undef HANDLE_SPECIALIZED_MDNODE_BRANCH diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h index c639625bf16c..2ea591383f82 100644 --- a/include/llvm/IR/Metadata.h +++ b/include/llvm/IR/Metadata.h @@ -18,10 +18,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Constant.h" -#include "llvm/IR/MetadataTracking.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Value.h" #include "llvm/Support/ErrorHandling.h" #include @@ -32,9 +33,6 @@ class LLVMContext; class Module; class ModuleSlotTracker; -template - class SymbolTableListTraits; - enum LLVMConstants : uint32_t { DEBUG_METADATA_VERSION = 3 // Current debug info version number. }; @@ -86,7 +84,9 @@ public: DIImportedEntityKind, ConstantAsMetadataKind, LocalAsMetadataKind, - MDStringKind + MDStringKind, + DIMacroKind, + DIMacroFileKind }; protected: @@ -126,9 +126,10 @@ public: /// If \c M is provided, metadata nodes will be numbered canonically; /// otherwise, pointer addresses are substituted. /// @{ - void print(raw_ostream &OS, const Module *M = nullptr) const; - void print(raw_ostream &OS, ModuleSlotTracker &MST, - const Module *M = nullptr) const; + void print(raw_ostream &OS, const Module *M = nullptr, + bool IsForDebug = false) const; + void print(raw_ostream &OS, ModuleSlotTracker &MST, const Module *M = nullptr, + bool IsForDebug = false) const; /// @} /// \brief Print as operand. @@ -196,6 +197,77 @@ private: void untrack(); }; +/// \brief API for tracking metadata references through RAUW and deletion. +/// +/// Shared API for updating \a Metadata pointers in subclasses that support +/// RAUW. +/// +/// This API is not meant to be used directly. See \a TrackingMDRef for a +/// user-friendly tracking reference. +class MetadataTracking { +public: + /// \brief Track the reference to metadata. + /// + /// Register \c MD with \c *MD, if the subclass supports tracking. If \c *MD + /// gets RAUW'ed, \c MD will be updated to the new address. If \c *MD gets + /// deleted, \c MD will be set to \c nullptr. + /// + /// If tracking isn't supported, \c *MD will not change. + /// + /// \return true iff tracking is supported by \c MD. + static bool track(Metadata *&MD) { + return track(&MD, *MD, static_cast(nullptr)); + } + + /// \brief Track the reference to metadata for \a Metadata. + /// + /// As \a track(Metadata*&), but with support for calling back to \c Owner to + /// tell it that its operand changed. This could trigger \c Owner being + /// re-uniqued. + static bool track(void *Ref, Metadata &MD, Metadata &Owner) { + return track(Ref, MD, &Owner); + } + + /// \brief Track the reference to metadata for \a MetadataAsValue. + /// + /// As \a track(Metadata*&), but with support for calling back to \c Owner to + /// tell it that its operand changed. This could trigger \c Owner being + /// re-uniqued. + static bool track(void *Ref, Metadata &MD, MetadataAsValue &Owner) { + return track(Ref, MD, &Owner); + } + + /// \brief Stop tracking a reference to metadata. + /// + /// Stops \c *MD from tracking \c MD. + static void untrack(Metadata *&MD) { untrack(&MD, *MD); } + static void untrack(void *Ref, Metadata &MD); + + /// \brief Move tracking from one reference to another. + /// + /// Semantically equivalent to \c untrack(MD) followed by \c track(New), + /// except that ownership callbacks are maintained. + /// + /// Note: it is an error if \c *MD does not equal \c New. + /// + /// \return true iff tracking is supported by \c MD. + static bool retrack(Metadata *&MD, Metadata *&New) { + return retrack(&MD, *MD, &New); + } + static bool retrack(void *Ref, Metadata &MD, void *New); + + /// \brief Check whether metadata is replaceable. + static bool isReplaceable(const Metadata &MD); + + typedef PointerUnion OwnerTy; + +private: + /// \brief Track a reference to metadata for an owner. + /// + /// Generalized version of tracking. + static bool track(void *Ref, Metadata &MD, OwnerTy Owner); +}; + /// \brief Shared implementation of use-lists for replaceable metadata. /// /// Most metadata cannot be RAUW'ed. This is a shared implementation of @@ -572,10 +644,12 @@ struct AAMDNodes { template<> struct DenseMapInfo { static inline AAMDNodes getEmptyKey() { - return AAMDNodes(DenseMapInfo::getEmptyKey(), 0, 0); + return AAMDNodes(DenseMapInfo::getEmptyKey(), + nullptr, nullptr); } static inline AAMDNodes getTombstoneKey() { - return AAMDNodes(DenseMapInfo::getTombstoneKey(), 0, 0); + return AAMDNodes(DenseMapInfo::getTombstoneKey(), + nullptr, nullptr); } static unsigned getHashValue(const AAMDNodes &Val) { return DenseMapInfo::getHashValue(Val.TBAA) ^ @@ -830,10 +904,11 @@ public: /// \brief Resolve cycles. /// /// Once all forward declarations have been resolved, force cycles to be - /// resolved. + /// resolved. If \p MDMaterialized is true, then any temporary metadata + /// is ignored, otherwise it asserts when encountering temporary metadata. /// /// \pre No operands (or operands' operands, etc.) have \a isTemporary(). - void resolveCycles(); + void resolveCycles(bool MDMaterialized = true); /// \brief Replace a temporary node with a permanent one. /// @@ -881,6 +956,7 @@ protected: void storeDistinctInContext(); template static T *storeImpl(T *N, StorageType Storage, StoreT &Store); + template static T *storeImpl(T *N, StorageType Storage); private: void handleChangedOperand(void *Ref, Metadata *New); @@ -913,13 +989,13 @@ private: N->recalculateHash(); } template - static void dispatchRecalculateHash(NodeTy *N, std::false_type) {} + static void dispatchRecalculateHash(NodeTy *, std::false_type) {} template static void dispatchResetHash(NodeTy *N, std::true_type) { N->setHash(0); } template - static void dispatchResetHash(NodeTy *N, std::false_type) {} + static void dispatchResetHash(NodeTy *, std::false_type) {} public: typedef const MDOperand *op_iterator; @@ -963,6 +1039,8 @@ public: static MDNode *getMostGenericFPMath(MDNode *A, MDNode *B); static MDNode *getMostGenericRange(MDNode *A, MDNode *B); static MDNode *getMostGenericAliasScope(MDNode *A, MDNode *B); + static MDNode *getMostGenericAlignmentOrDereferenceable(MDNode *A, MDNode *B); + }; /// \brief Tuple of metadata. @@ -1125,7 +1203,6 @@ public: /// /// TODO: Inherit from Metadata. class NamedMDNode : public ilist_node { - friend class SymbolTableListTraits; friend struct ilist_traits; friend class LLVMContextImpl; friend class Module; @@ -1193,7 +1270,7 @@ public: void addOperand(MDNode *M); void setOperand(unsigned I, MDNode *New); StringRef getName() const; - void print(raw_ostream &ROS) const; + void print(raw_ostream &ROS, bool IsForDebug = false) const; void dump() const; // --------------------------------------------------------------------------- @@ -1208,13 +1285,13 @@ public: const_op_iterator op_end() const { return const_op_iterator(this, getNumOperands()); } inline iterator_range operands() { - return iterator_range(op_begin(), op_end()); + return make_range(op_begin(), op_end()); } inline iterator_range operands() const { - return iterator_range(op_begin(), op_end()); + return make_range(op_begin(), op_end()); } }; } // end llvm namespace -#endif +#endif // LLVM_IR_METADATA_H diff --git a/include/llvm/IR/MetadataTracking.h b/include/llvm/IR/MetadataTracking.h deleted file mode 100644 index 541d9b3b1245..000000000000 --- a/include/llvm/IR/MetadataTracking.h +++ /dev/null @@ -1,99 +0,0 @@ -//===- llvm/IR/MetadataTracking.h - Metadata tracking ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Low-level functions to enable tracking of metadata that could RAUW. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_IR_METADATATRACKING_H -#define LLVM_IR_METADATATRACKING_H - -#include "llvm/ADT/PointerUnion.h" -#include "llvm/Support/Casting.h" -#include - -namespace llvm { - -class Metadata; -class MetadataAsValue; - -/// \brief API for tracking metadata references through RAUW and deletion. -/// -/// Shared API for updating \a Metadata pointers in subclasses that support -/// RAUW. -/// -/// This API is not meant to be used directly. See \a TrackingMDRef for a -/// user-friendly tracking reference. -class MetadataTracking { -public: - /// \brief Track the reference to metadata. - /// - /// Register \c MD with \c *MD, if the subclass supports tracking. If \c *MD - /// gets RAUW'ed, \c MD will be updated to the new address. If \c *MD gets - /// deleted, \c MD will be set to \c nullptr. - /// - /// If tracking isn't supported, \c *MD will not change. - /// - /// \return true iff tracking is supported by \c MD. - static bool track(Metadata *&MD) { - return track(&MD, *MD, static_cast(nullptr)); - } - - /// \brief Track the reference to metadata for \a Metadata. - /// - /// As \a track(Metadata*&), but with support for calling back to \c Owner to - /// tell it that its operand changed. This could trigger \c Owner being - /// re-uniqued. - static bool track(void *Ref, Metadata &MD, Metadata &Owner) { - return track(Ref, MD, &Owner); - } - - /// \brief Track the reference to metadata for \a MetadataAsValue. - /// - /// As \a track(Metadata*&), but with support for calling back to \c Owner to - /// tell it that its operand changed. This could trigger \c Owner being - /// re-uniqued. - static bool track(void *Ref, Metadata &MD, MetadataAsValue &Owner) { - return track(Ref, MD, &Owner); - } - - /// \brief Stop tracking a reference to metadata. - /// - /// Stops \c *MD from tracking \c MD. - static void untrack(Metadata *&MD) { untrack(&MD, *MD); } - static void untrack(void *Ref, Metadata &MD); - - /// \brief Move tracking from one reference to another. - /// - /// Semantically equivalent to \c untrack(MD) followed by \c track(New), - /// except that ownership callbacks are maintained. - /// - /// Note: it is an error if \c *MD does not equal \c New. - /// - /// \return true iff tracking is supported by \c MD. - static bool retrack(Metadata *&MD, Metadata *&New) { - return retrack(&MD, *MD, &New); - } - static bool retrack(void *Ref, Metadata &MD, void *New); - - /// \brief Check whether metadata is replaceable. - static bool isReplaceable(const Metadata &MD); - - typedef PointerUnion OwnerTy; - -private: - /// \brief Track a reference to metadata for an owner. - /// - /// Generalized version of tracking. - static bool track(void *Ref, Metadata &MD, OwnerTy Owner); -}; - -} // end namespace llvm - -#endif diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h index 1668b95c8bd1..942f68543cb6 100644 --- a/include/llvm/IR/Module.h +++ b/include/llvm/IR/Module.h @@ -15,6 +15,7 @@ #ifndef LLVM_IR_MODULE_H #define LLVM_IR_MODULE_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/DataLayout.h" @@ -34,54 +35,6 @@ class LLVMContext; class RandomNumberGenerator; class StructType; -template<> struct ilist_traits - : public SymbolTableListTraits { - - // createSentinel is used to get hold of the node that marks the end of the - // list... (same trick used here as in ilist_traits) - Function *createSentinel() const { - return static_cast(&Sentinel); - } - static void destroySentinel(Function*) {} - - Function *provideInitialHead() const { return createSentinel(); } - Function *ensureHead(Function*) const { return createSentinel(); } - static void noteHead(Function*, Function*) {} - -private: - mutable ilist_node Sentinel; -}; - -template<> struct ilist_traits - : public SymbolTableListTraits { - // createSentinel is used to create a node that marks the end of the list. - GlobalVariable *createSentinel() const { - return static_cast(&Sentinel); - } - static void destroySentinel(GlobalVariable*) {} - - GlobalVariable *provideInitialHead() const { return createSentinel(); } - GlobalVariable *ensureHead(GlobalVariable*) const { return createSentinel(); } - static void noteHead(GlobalVariable*, GlobalVariable*) {} -private: - mutable ilist_node Sentinel; -}; - -template<> struct ilist_traits - : public SymbolTableListTraits { - // createSentinel is used to create a node that marks the end of the list. - GlobalAlias *createSentinel() const { - return static_cast(&Sentinel); - } - static void destroySentinel(GlobalAlias*) {} - - GlobalAlias *provideInitialHead() const { return createSentinel(); } - GlobalAlias *ensureHead(GlobalAlias*) const { return createSentinel(); } - static void noteHead(GlobalAlias*, GlobalAlias*) {} -private: - mutable ilist_node Sentinel; -}; - template<> struct ilist_traits : public ilist_default_traits { // createSentinel is used to get hold of a node that marks the end of @@ -96,6 +49,7 @@ template<> struct ilist_traits static void noteHead(NamedMDNode*, NamedMDNode*) {} void addNodeToList(NamedMDNode *) {} void removeNodeFromList(NamedMDNode *) {} + private: mutable ilist_node Sentinel; }; @@ -116,11 +70,11 @@ class Module { /// @{ public: /// The type for the list of global variables. - typedef iplist GlobalListType; + typedef SymbolTableList GlobalListType; /// The type for the list of functions. - typedef iplist FunctionListType; + typedef SymbolTableList FunctionListType; /// The type for the list of aliases. - typedef iplist AliasListType; + typedef SymbolTableList AliasListType; /// The type for the list of named metadata. typedef ilist NamedMDListType; /// The type of the comdat "symbol" table. @@ -328,6 +282,11 @@ public: /// registered in this LLVMContext. void getMDKindNames(SmallVectorImpl &Result) const; + /// Populate client supplied SmallVector with the bundle tags registered in + /// this LLVMContext. The bundle tags are ordered by increasing bundle IDs. + /// \see LLVMContext::getOperandBundleTagID + void getOperandBundleTags(SmallVectorImpl &Result) const; + /// Return the type with the specified name, or null if there is none by that /// name. StructType *getTypeByName(StringRef Name) const; @@ -472,7 +431,7 @@ public: /// Sets the GVMaterializer to GVM. This module must not yet have a /// Materializer. To reset the materializer for a module that already has one, - /// call MaterializeAllPermanently first. Destroying this module will destroy + /// call materializeAll first. Destroying this module will destroy /// its materializer without materializing any more GlobalValues. Without /// destroying the Module, there is no way to detach or destroy a materializer /// without materializing all the GVs it controls, to avoid leaving orphan @@ -480,27 +439,16 @@ public: void setMaterializer(GVMaterializer *GVM); /// Retrieves the GVMaterializer, if any, for this Module. GVMaterializer *getMaterializer() const { return Materializer.get(); } - - /// Returns true if this GV was loaded from this Module's GVMaterializer and - /// the GVMaterializer knows how to dematerialize the GV. - bool isDematerializable(const GlobalValue *GV) const; + bool isMaterialized() const { return !getMaterializer(); } /// Make sure the GlobalValue is fully read. If the module is corrupt, this /// returns true and fills in the optional string with information about the /// problem. If successful, this returns false. std::error_code materialize(GlobalValue *GV); - /// If the GlobalValue is read in, and if the GVMaterializer supports it, - /// release the memory for the function, and set it up to be materialized - /// lazily. If !isDematerializable(), this method is a no-op. - void dematerialize(GlobalValue *GV); - - /// Make sure all GlobalValues in this Module are fully read. - std::error_code materializeAll(); /// Make sure all GlobalValues in this Module are fully read and clear the - /// Materializer. If the module is corrupt, this DOES NOT clear the old /// Materializer. - std::error_code materializeAllPermanently(); + std::error_code materializeAll(); std::error_code materializeMetadata(); @@ -556,10 +504,10 @@ public: bool global_empty() const { return GlobalList.empty(); } iterator_range globals() { - return iterator_range(global_begin(), global_end()); + return make_range(global_begin(), global_end()); } iterator_range globals() const { - return iterator_range(global_begin(), global_end()); + return make_range(global_begin(), global_end()); } /// @} @@ -578,10 +526,10 @@ public: bool empty() const { return FunctionList.empty(); } iterator_range functions() { - return iterator_range(begin(), end()); + return make_range(begin(), end()); } iterator_range functions() const { - return iterator_range(begin(), end()); + return make_range(begin(), end()); } /// @} @@ -596,10 +544,10 @@ public: bool alias_empty() const { return AliasList.empty(); } iterator_range aliases() { - return iterator_range(alias_begin(), alias_end()); + return make_range(alias_begin(), alias_end()); } iterator_range aliases() const { - return iterator_range(alias_begin(), alias_end()); + return make_range(alias_begin(), alias_end()); } /// @} @@ -620,12 +568,10 @@ public: bool named_metadata_empty() const { return NamedMDList.empty(); } iterator_range named_metadata() { - return iterator_range(named_metadata_begin(), - named_metadata_end()); + return make_range(named_metadata_begin(), named_metadata_end()); } iterator_range named_metadata() const { - return iterator_range(named_metadata_begin(), - named_metadata_end()); + return make_range(named_metadata_begin(), named_metadata_end()); } /// Destroy ConstantArrays in LLVMContext if they are not used. @@ -646,11 +592,12 @@ public: /// uselistorder directives so that use-lists can be recreated when reading /// the assembly. void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW, - bool ShouldPreserveUseListOrder = false) const; + bool ShouldPreserveUseListOrder = false, + bool IsForDebug = false) const; /// Dump the module to stderr (for debugging). void dump() const; - + /// This function causes all the subinstructions to "let go" of all references /// that they are maintaining. This allows one to 'delete' a whole class at /// a time, even though there may be circular references... first all @@ -666,6 +613,10 @@ public: /// \brief Returns the Dwarf Version by checking module flags. unsigned getDwarfVersion() const; + /// \brief Returns the CodeView Version by checking module flags. + /// Returns zero if not present in module. + unsigned getCodeViewFlag() const; + /// @} /// @name Utility functions for querying and setting PIC level /// @{ @@ -676,6 +627,16 @@ public: /// \brief Set the PIC level (small or large model) void setPICLevel(PICLevel::Level PL); /// @} + + /// @name Utility functions for querying and setting PGO counts + /// @{ + + /// \brief Set maximum function count in PGO mode + void setMaximumFunctionCount(uint64_t); + + /// \brief Returns maximum function count in PGO mode + Optional getMaximumFunctionCount(); + /// @} }; /// An raw_ostream inserter for modules. @@ -693,7 +654,7 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Module, LLVMModuleRef) inline Module *unwrap(LLVMModuleProviderRef MP) { return reinterpret_cast(MP); } - + } // End llvm namespace #endif diff --git a/include/llvm/IR/ModuleSlotTracker.h b/include/llvm/IR/ModuleSlotTracker.h index c37dcecf8e40..49730a66bdf6 100644 --- a/include/llvm/IR/ModuleSlotTracker.h +++ b/include/llvm/IR/ModuleSlotTracker.h @@ -17,6 +17,7 @@ namespace llvm { class Module; class Function; class SlotTracker; +class Value; /// Manage lifetime of a slot tracker for printing IR. /// @@ -61,6 +62,13 @@ public: /// Purge the currently incorporated function and incorporate \c F. If \c F /// is currently incorporated, this is a no-op. void incorporateFunction(const Function &F); + + /// Return the slot number of the specified local value. + /// + /// A function that defines this value should be incorporated prior to calling + /// this method. + /// Return -1 if the value is not in the function's SlotTracker. + int getLocalSlot(const Value *V); }; } // end namespace llvm diff --git a/include/llvm/IR/PassManager.h b/include/llvm/IR/PassManager.h index 4166babd63e5..2ceb53d21b7a 100644 --- a/include/llvm/IR/PassManager.h +++ b/include/llvm/IR/PassManager.h @@ -203,7 +203,8 @@ public: for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) { if (DebugLogging) - dbgs() << "Running pass: " << Passes[Idx]->name() << "\n"; + dbgs() << "Running pass: " << Passes[Idx]->name() << " on " + << IR.getName() << "\n"; PreservedAnalyses PassPA = Passes[Idx]->run(IR, AM); diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h index 41154e6441a9..f4d7d8c44416 100644 --- a/include/llvm/IR/PatternMatch.h +++ b/include/llvm/IR/PatternMatch.h @@ -1272,6 +1272,46 @@ inline typename m_Intrinsic_Ty::Ty m_FMax(const Opnd0 &Op0, return m_Intrinsic(Op0, Op1); } +template struct Signum_match { + Opnd_t Val; + Signum_match(const Opnd_t &V) : Val(V) {} + + template bool match(OpTy *V) { + unsigned TypeSize = V->getType()->getScalarSizeInBits(); + if (TypeSize == 0) + return false; + + unsigned ShiftWidth = TypeSize - 1; + Value *OpL = nullptr, *OpR = nullptr; + + // This is the representation of signum we match: + // + // signum(x) == (x >> 63) | (-x >>u 63) + // + // An i1 value is its own signum, so it's correct to match + // + // signum(x) == (x >> 0) | (-x >>u 0) + // + // for i1 values. + + auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth)); + auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth)); + auto Signum = m_Or(LHS, RHS); + + return Signum.match(V) && OpL == OpR && Val.match(OpL); + } +}; + +/// \brief Matches a signum pattern. +/// +/// signum(x) = +/// x > 0 -> 1 +/// x == 0 -> 0 +/// x < 0 -> -1 +template inline Signum_match m_Signum(const Val_t &V) { + return Signum_match(V); +} + } // end namespace PatternMatch } // end namespace llvm diff --git a/include/llvm/IR/Statepoint.h b/include/llvm/IR/Statepoint.h index 4ab1f8497adb..7310c5697a7e 100644 --- a/include/llvm/IR/Statepoint.h +++ b/include/llvm/IR/Statepoint.h @@ -173,7 +173,7 @@ public: /// range adapter for call arguments iterator_range call_args() const { - return iterator_range(arg_begin(), arg_end()); + return make_range(arg_begin(), arg_end()); } /// \brief Return true if the call or the callee has the given attribute. @@ -201,8 +201,7 @@ public: /// range adapter for GC transition arguments iterator_range gc_transition_args() const { - return iterator_range(gc_transition_args_begin(), - gc_transition_args_end()); + return make_range(gc_transition_args_begin(), gc_transition_args_end()); } /// Number of additional arguments excluding those intended @@ -225,7 +224,7 @@ public: /// range adapter for vm state arguments iterator_range vm_state_args() const { - return iterator_range(vm_state_begin(), vm_state_end()); + return make_range(vm_state_begin(), vm_state_end()); } typename CallSiteTy::arg_iterator gc_args_begin() const { @@ -235,9 +234,13 @@ public: return getCallSite().arg_end(); } + unsigned gcArgsStartIdx() const { + return gc_args_begin() - getInstruction()->op_begin(); + } + /// range adapter for gc arguments iterator_range gc_args() const { - return iterator_range(gc_args_begin(), gc_args_end()); + return make_range(gc_args_begin(), gc_args_end()); } /// Get list of all gc reloactes linked to this statepoint @@ -320,7 +323,7 @@ public: bool isTiedToInvoke() const { const Value *Token = RelocateCS.getArgument(0); - return isa(Token) || isa(Token); + return isa(Token) || isa(Token); } /// Get enclosed relocate intrinsic @@ -332,7 +335,7 @@ public: // This takes care both of relocates for call statepoints and relocates // on normal path of invoke statepoint. - if (!isa(Token)) { + if (!isa(Token)) { return cast(Token); } @@ -396,16 +399,10 @@ StatepointBase::getRelocates() LandingPadInst *LandingPad = cast(getInstruction())->getLandingPadInst(); - // Search for extract value from landingpad instruction to which - // gc relocates will be attached + // Search for gc relocates that are attached to this landingpad. for (const User *LandingPadUser : LandingPad->users()) { - if (!isa(LandingPadUser)) - continue; - - // gc relocates should be attached to this extract value - for (const User *U : LandingPadUser->users()) - if (isGCRelocate(U)) - Result.push_back(GCRelocateOperands(U)); + if (isGCRelocate(LandingPadUser)) + Result.push_back(GCRelocateOperands(LandingPadUser)); } return Result; } diff --git a/include/llvm/IR/SymbolTableListTraits.h b/include/llvm/IR/SymbolTableListTraits.h index 0a5149c3d938..5fc48d10d63f 100644 --- a/include/llvm/IR/SymbolTableListTraits.h +++ b/include/llvm/IR/SymbolTableListTraits.h @@ -29,31 +29,66 @@ namespace llvm { class ValueSymbolTable; - -template class ilist_iterator; -template class iplist; -template struct ilist_traits; + +template class ilist_iterator; +template class iplist; +template struct ilist_traits; + +template +struct SymbolTableListSentinelTraits + : public ilist_embedded_sentinel_traits {}; + +/// Template metafunction to get the parent type for a symbol table list. +/// +/// Implementations create a typedef called \c type so that we only need a +/// single template parameter for the list and traits. +template struct SymbolTableListParentType {}; +class Argument; +class BasicBlock; +class Function; +class Instruction; +class GlobalVariable; +class GlobalAlias; +class Module; +#define DEFINE_SYMBOL_TABLE_PARENT_TYPE(NODE, PARENT) \ + template <> struct SymbolTableListParentType { typedef PARENT type; }; +DEFINE_SYMBOL_TABLE_PARENT_TYPE(Instruction, BasicBlock) +DEFINE_SYMBOL_TABLE_PARENT_TYPE(BasicBlock, Function) +DEFINE_SYMBOL_TABLE_PARENT_TYPE(Argument, Function) +DEFINE_SYMBOL_TABLE_PARENT_TYPE(Function, Module) +DEFINE_SYMBOL_TABLE_PARENT_TYPE(GlobalVariable, Module) +DEFINE_SYMBOL_TABLE_PARENT_TYPE(GlobalAlias, Module) +#undef DEFINE_SYMBOL_TABLE_PARENT_TYPE + +template class SymbolTableList; // ValueSubClass - The type of objects that I hold, e.g. Instruction. // ItemParentClass - The type of object that owns the list, e.g. BasicBlock. // -template -class SymbolTableListTraits : public ilist_default_traits { - typedef ilist_traits TraitsClass; +template +class SymbolTableListTraits + : public ilist_nextprev_traits, + public SymbolTableListSentinelTraits, + public ilist_node_traits { + typedef SymbolTableList ListTy; + typedef + typename SymbolTableListParentType::type ItemParentClass; + public: SymbolTableListTraits() {} +private: /// getListOwner - Return the object that owns this list. If this is a list /// of instructions, it returns the BasicBlock that owns them. ItemParentClass *getListOwner() { size_t Offset(size_t(&((ItemParentClass*)nullptr->*ItemParentClass:: getSublistAccess(static_cast(nullptr))))); - iplist* Anchor(static_cast*>(this)); + ListTy *Anchor(static_cast(this)); return reinterpret_cast(reinterpret_cast(Anchor)- Offset); } - static iplist &getList(ItemParentClass *Par) { + static ListTy &getList(ItemParentClass *Par) { return Par->*(Par->getSublistAccess((ValueSubClass*)nullptr)); } @@ -61,9 +96,10 @@ public: return Par ? toPtr(Par->getValueSymbolTable()) : nullptr; } +public: void addNodeToList(ValueSubClass *V); void removeNodeFromList(ValueSubClass *V); - void transferNodesFromList(ilist_traits &L2, + void transferNodesFromList(SymbolTableListTraits &L2, ilist_iterator first, ilist_iterator last); //private: @@ -73,6 +109,14 @@ public: static ValueSymbolTable *toPtr(ValueSymbolTable &R) { return &R; } }; +/// List that automatically updates parent links and symbol tables. +/// +/// When nodes are inserted into and removed from this list, the associated +/// symbol table will be automatically updated. Similarly, parent links get +/// updated automatically. +template +class SymbolTableList : public iplist> {}; + } // End llvm namespace #endif diff --git a/include/llvm/IR/TrackingMDRef.h b/include/llvm/IR/TrackingMDRef.h index e24112154e16..97efaff7a377 100644 --- a/include/llvm/IR/TrackingMDRef.h +++ b/include/llvm/IR/TrackingMDRef.h @@ -14,15 +14,11 @@ #ifndef LLVM_IR_TRACKINGMDREF_H #define LLVM_IR_TRACKINGMDREF_H -#include "llvm/IR/MetadataTracking.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" namespace llvm { -class Metadata; -class MDNode; -class ValueAsMetadata; - /// \brief Tracking metadata reference. /// /// This class behaves like \a TrackingVH, but for metadata. diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h index 6ab0bd0631a0..b2920dd3de63 100644 --- a/include/llvm/IR/Type.h +++ b/include/llvm/IR/Type.h @@ -15,7 +15,6 @@ #ifndef LLVM_IR_TYPE_H #define LLVM_IR_TYPE_H -#include "llvm-c/Core.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/CBindingWrapping.h" @@ -38,10 +37,10 @@ template struct GraphTraits; /// they are never changed. Also note that only one instance of a particular /// type is ever created. Thus seeing if two types are equal is a matter of /// doing a trivial pointer comparison. To enforce that no two equal instances -/// are created, Type instances can only be created via static factory methods +/// are created, Type instances can only be created via static factory methods /// in class Type and in derived classes. Once allocated, Types are never /// free'd. -/// +/// class Type { public: //===--------------------------------------------------------------------===// @@ -63,45 +62,36 @@ public: LabelTyID, ///< 7: Labels MetadataTyID, ///< 8: Metadata X86_MMXTyID, ///< 9: MMX vectors (64 bits, X86 specific) + TokenTyID, ///< 10: Tokens // Derived types... see DerivedTypes.h file. // Make sure FirstDerivedTyID stays up to date! - IntegerTyID, ///< 10: Arbitrary bit width integers - FunctionTyID, ///< 11: Functions - StructTyID, ///< 12: Structures - ArrayTyID, ///< 13: Arrays - PointerTyID, ///< 14: Pointers - VectorTyID ///< 15: SIMD 'packed' format, or other vector type + IntegerTyID, ///< 11: Arbitrary bit width integers + FunctionTyID, ///< 12: Functions + StructTyID, ///< 13: Structures + ArrayTyID, ///< 14: Arrays + PointerTyID, ///< 15: Pointers + VectorTyID ///< 16: SIMD 'packed' format, or other vector type }; private: /// Context - This refers to the LLVMContext in which this type was uniqued. LLVMContext &Context; - // Due to Ubuntu GCC bug 910363: - // https://bugs.launchpad.net/ubuntu/+source/gcc-4.5/+bug/910363 - // Bitpack ID and SubclassData manually. - // Note: TypeID : low 8 bit; SubclassData : high 24 bit. - uint32_t IDAndSubclassData; + TypeID ID : 8; // The current base type of this type. + unsigned SubclassData : 24; // Space for subclasses to store data. protected: friend class LLVMContextImpl; explicit Type(LLVMContext &C, TypeID tid) - : Context(C), IDAndSubclassData(0), - NumContainedTys(0), ContainedTys(nullptr) { - setTypeID(tid); - } + : Context(C), ID(tid), SubclassData(0), + NumContainedTys(0), ContainedTys(nullptr) {} ~Type() = default; - void setTypeID(TypeID ID) { - IDAndSubclassData = (ID & 0xFF) | (IDAndSubclassData & 0xFFFFFF00); - assert(getTypeID() == ID && "TypeID data too large for field"); - } - - unsigned getSubclassData() const { return IDAndSubclassData >> 8; } - + unsigned getSubclassData() const { return SubclassData; } + void setSubclassData(unsigned val) { - IDAndSubclassData = (IDAndSubclassData & 0xFF) | (val << 8); + SubclassData = val; // Ensure we don't have any accidental truncation. assert(getSubclassData() == val && "Subclass data too large for field"); } @@ -118,7 +108,7 @@ protected: Type * const *ContainedTys; public: - void print(raw_ostream &O) const; + void print(raw_ostream &O, bool IsForDebug = false) const; void dump() const; /// getContext - Return the LLVMContext in which this type was uniqued. @@ -131,7 +121,7 @@ public: /// getTypeID - Return the type id for the type. This will return one /// of the TypeID enum elements defined above. /// - TypeID getTypeID() const { return (TypeID)(IDAndSubclassData & 0xFF); } + TypeID getTypeID() const { return ID; } /// isVoidTy - Return true if this is 'void'. bool isVoidTy() const { return getTypeID() == VoidTyID; } @@ -141,7 +131,7 @@ public: /// isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type. bool isFloatTy() const { return getTypeID() == FloatTyID; } - + /// isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type. bool isDoubleTy() const { return getTypeID() == DoubleTyID; } @@ -181,16 +171,19 @@ public: /// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP. /// bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); } - + /// isLabelTy - Return true if this is 'label'. bool isLabelTy() const { return getTypeID() == LabelTyID; } /// isMetadataTy - Return true if this is 'metadata'. bool isMetadataTy() const { return getTypeID() == MetadataTyID; } + /// isTokenTy - Return true if this is 'token'. + bool isTokenTy() const { return getTypeID() == TokenTyID; } + /// isIntegerTy - True if this is an instance of IntegerType. /// - bool isIntegerTy() const { return getTypeID() == IntegerTyID; } + bool isIntegerTy() const { return getTypeID() == IntegerTyID; } /// isIntegerTy - Return true if this is an IntegerType of the given width. bool isIntegerTy(unsigned Bitwidth) const; @@ -199,7 +192,7 @@ public: /// integer types. /// bool isIntOrIntVectorTy() const { return getScalarType()->isIntegerTy(); } - + /// isFunctionTy - True if this is an instance of FunctionType. /// bool isFunctionTy() const { return getTypeID() == FunctionTyID; } @@ -220,14 +213,14 @@ public: /// pointer types. /// bool isPtrOrPtrVectorTy() const { return getScalarType()->isPointerTy(); } - + /// isVectorTy - True if this is an instance of VectorType. /// bool isVectorTy() const { return getTypeID() == VectorTyID; } - /// canLosslesslyBitCastTo - Return true if this type could be converted - /// with a lossless BitCast to type 'Ty'. For example, i8* to i32*. BitCasts - /// are valid for types of the same size only where no re-interpretation of + /// canLosslesslyBitCastTo - Return true if this type could be converted + /// with a lossless BitCast to type 'Ty'. For example, i8* to i32*. BitCasts + /// are valid for types of the same size only where no re-interpretation of /// the bits is done. /// @brief Determine if this type could be losslessly bitcast to Ty bool canLosslesslyBitCastTo(Type *Ty) const; @@ -265,7 +258,7 @@ public: /// get the actual size for a particular target, it is reasonable to use the /// DataLayout subsystem to do this. /// - bool isSized(SmallPtrSetImpl *Visited = nullptr) const { + bool isSized(SmallPtrSetImpl *Visited = nullptr) const { // If it's a primitive, it is always sized. if (getTypeID() == IntegerTyID || isFloatingPointTy() || getTypeID() == PointerTyID || @@ -304,8 +297,7 @@ public: /// getScalarType - If this is a vector type, return the element type, /// otherwise return 'this'. - const Type *getScalarType() const LLVM_READONLY; - Type *getScalarType() LLVM_READONLY; + Type *getScalarType() const LLVM_READONLY; //===--------------------------------------------------------------------===// // Type Iteration support. @@ -344,30 +336,30 @@ public: // example) is shorthand for cast(Ty)->getNumElements(). This is // only intended to cover the core methods that are frequently used, helper // methods should not be added here. - - unsigned getIntegerBitWidth() const; - Type *getFunctionParamType(unsigned i) const; - unsigned getFunctionNumParams() const; - bool isFunctionVarArg() const; - - StringRef getStructName() const; - unsigned getStructNumElements() const; - Type *getStructElementType(unsigned N) const; - - Type *getSequentialElementType() const; - - uint64_t getArrayNumElements() const; + inline unsigned getIntegerBitWidth() const; + + inline Type *getFunctionParamType(unsigned i) const; + inline unsigned getFunctionNumParams() const; + inline bool isFunctionVarArg() const; + + inline StringRef getStructName() const; + inline unsigned getStructNumElements() const; + inline Type *getStructElementType(unsigned N) const; + + inline Type *getSequentialElementType() const; + + inline uint64_t getArrayNumElements() const; Type *getArrayElementType() const { return getSequentialElementType(); } - unsigned getVectorNumElements() const; + inline unsigned getVectorNumElements() const; Type *getVectorElementType() const { return getSequentialElementType(); } Type *getPointerElementType() const { return getSequentialElementType(); } /// \brief Get the address space of this pointer or pointer vector type. - unsigned getPointerAddressSpace() const; - + inline unsigned getPointerAddressSpace() const; + //===--------------------------------------------------------------------===// // Static members exported by the Type class itself. Useful for getting // instances of Type. @@ -389,6 +381,7 @@ public: static Type *getFP128Ty(LLVMContext &C); static Type *getPPC_FP128Ty(LLVMContext &C); static Type *getX86_MMXTy(LLVMContext &C); + static Type *getTokenTy(LLVMContext &C); static IntegerType *getIntNTy(LLVMContext &C, unsigned N); static IntegerType *getInt1Ty(LLVMContext &C); static IntegerType *getInt8Ty(LLVMContext &C); @@ -396,7 +389,7 @@ public: static IntegerType *getInt32Ty(LLVMContext &C); static IntegerType *getInt64Ty(LLVMContext &C); static IntegerType *getInt128Ty(LLVMContext &C); - + //===--------------------------------------------------------------------===// // Convenience methods for getting pointer types with one of the above builtin // types as pointee. @@ -417,13 +410,13 @@ public: /// getPointerTo - Return a pointer to the current type. This is equivalent /// to PointerType::get(Foo, AddrSpace). - PointerType *getPointerTo(unsigned AddrSpace = 0); + PointerType *getPointerTo(unsigned AddrSpace = 0) const; private: /// isSizedDerivedType - Derived types like structures and arrays are sized /// iff all of the members of the type are sized as well. Since asking for /// their size is relatively uncommon, move this operation out of line. - bool isSizedDerivedType(SmallPtrSetImpl *Visited = nullptr) const; + bool isSizedDerivedType(SmallPtrSetImpl *Visited = nullptr) const; }; // Printing of types. @@ -439,13 +432,11 @@ template <> struct isa_impl { } }; - //===----------------------------------------------------------------------===// // Provide specializations of GraphTraits to be able to treat a type as a // graph of sub types. - -template <> struct GraphTraits { +template <> struct GraphTraits { typedef Type NodeType; typedef Type::subtype_iterator ChildIteratorType; @@ -483,7 +474,7 @@ inline Type **unwrap(LLVMTypeRef* Tys) { inline LLVMTypeRef *wrap(Type **Tys) { return reinterpret_cast(const_cast(Tys)); } - + } // End llvm namespace #endif diff --git a/include/llvm/IR/TypeFinder.h b/include/llvm/IR/TypeFinder.h index 73a63ad0349e..5f3854377c16 100644 --- a/include/llvm/IR/TypeFinder.h +++ b/include/llvm/IR/TypeFinder.h @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file contains the declaration of the TypeFinder class. +// This file contains the declaration of the TypeFinder class. // //===----------------------------------------------------------------------===// diff --git a/include/llvm/IR/Use.h b/include/llvm/IR/Use.h index 160d71b03e7f..a738677f8e5b 100644 --- a/include/llvm/IR/Use.h +++ b/include/llvm/IR/Use.h @@ -25,7 +25,6 @@ #ifndef LLVM_IR_USE_H #define LLVM_IR_USE_H -#include "llvm-c/Core.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Compiler.h" diff --git a/include/llvm/IR/UseListOrder.h b/include/llvm/IR/UseListOrder.h index b7c2418d348d..1cabf03d1b00 100644 --- a/include/llvm/IR/UseListOrder.h +++ b/include/llvm/IR/UseListOrder.h @@ -34,7 +34,7 @@ struct UseListOrder { UseListOrder(const Value *V, const Function *F, size_t ShuffleSize) : V(V), F(F), Shuffle(ShuffleSize) {} - UseListOrder() : V(0), F(0) {} + UseListOrder() : V(nullptr), F(nullptr) {} UseListOrder(UseListOrder &&X) : V(X.V), F(X.F), Shuffle(std::move(X.Shuffle)) {} UseListOrder &operator=(UseListOrder &&X) { @@ -53,4 +53,4 @@ typedef std::vector UseListOrderStack; } // end namespace llvm -#endif +#endif // LLVM_IR_USELISTORDER_H diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h index 93614fab5759..885ae197d228 100644 --- a/include/llvm/IR/User.h +++ b/include/llvm/IR/User.h @@ -19,6 +19,7 @@ #ifndef LLVM_IR_USER_H #define LLVM_IR_USER_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Value.h" @@ -39,6 +40,9 @@ class User : public Value { friend struct HungoffOperandTraits; virtual void anchor(); + LLVM_ATTRIBUTE_ALWAYS_INLINE inline static void * + allocateFixedOperandUser(size_t, unsigned, unsigned); + protected: /// Allocate a User with an operand pointer co-allocated. /// @@ -51,7 +55,17 @@ protected: /// This is used for subclasses which have a fixed number of operands. void *operator new(size_t Size, unsigned Us); - User(Type *ty, unsigned vty, Use *OpList, unsigned NumOps) + /// Allocate a User with the operands co-allocated. If DescBytes is non-zero + /// then allocate an additional DescBytes bytes before the operands. These + /// bytes can be accessed by calling getDescriptor. + /// + /// DescBytes needs to be divisible by sizeof(void *). The allocated + /// descriptor, if any, is aligned to sizeof(void *) bytes. + /// + /// This is used for subclasses which have a fixed number of operands. + void *operator new(size_t Size, unsigned Us, unsigned DescBytes); + + User(Type *ty, unsigned vty, Use *, unsigned NumOps) : Value(ty, vty) { assert(NumOps < (1u << NumUserOperandsBits) && "Too many operands"); NumUserOperands = NumOps; @@ -137,6 +151,12 @@ public: unsigned getNumOperands() const { return NumUserOperands; } + /// Returns the descriptor co-allocated with this User instance. + ArrayRef getDescriptor() const; + + /// Returns the descriptor co-allocated with this User instance. + MutableArrayRef getDescriptor(); + /// Set the number of operands on a GlobalVariable. /// /// GlobalVariable always allocates space for a single operands, but @@ -150,19 +170,6 @@ public: NumUserOperands = NumOps; } - /// Set the number of operands on a Function. - /// - /// Function always allocates space for a single operands, but - /// doesn't always use it. - /// - /// FIXME: As that the number of operands is used to find the start of - /// the allocated memory in operator delete, we need to always think we have - /// 1 operand before delete. - void setFunctionNumOperands(unsigned NumOps) { - assert(NumOps <= 1 && "Function can only have 0 or 1 operands"); - NumUserOperands = NumOps; - } - /// \brief Subclasses with hung off uses need to manage the operand count /// themselves. In these instances, the operand count isn't used to find the /// OperandList, so there's no issue in having the operand count change. @@ -213,7 +220,7 @@ public: return value_op_iterator(op_end()); } iterator_range operand_values() { - return iterator_range(value_op_begin(), value_op_end()); + return make_range(value_op_begin(), value_op_end()); } /// \brief Drop all references to operands. diff --git a/include/llvm/IR/Value.def b/include/llvm/IR/Value.def index c2a0639603ed..4c5d452fc3c3 100644 --- a/include/llvm/IR/Value.def +++ b/include/llvm/IR/Value.def @@ -70,6 +70,7 @@ HANDLE_CONSTANT(ConstantArray) HANDLE_CONSTANT(ConstantStruct) HANDLE_CONSTANT(ConstantVector) HANDLE_CONSTANT(ConstantPointerNull) +HANDLE_CONSTANT(ConstantTokenNone) HANDLE_METADATA_VALUE(MetadataAsValue) HANDLE_INLINE_ASM_VALUE(InlineAsm) @@ -79,7 +80,7 @@ HANDLE_INSTRUCTION(Instruction) // don't add new values here! HANDLE_CONSTANT_MARKER(ConstantFirstVal, Function) -HANDLE_CONSTANT_MARKER(ConstantLastVal, ConstantPointerNull) +HANDLE_CONSTANT_MARKER(ConstantLastVal, ConstantTokenNone) #undef HANDLE_GLOBAL_VALUE #undef HANDLE_CONSTANT diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h index 17a80c82d1bc..bb7ff278fdef 100644 --- a/include/llvm/IR/Value.h +++ b/include/llvm/IR/Value.h @@ -14,7 +14,6 @@ #ifndef LLVM_IR_VALUE_H #define LLVM_IR_VALUE_H -#include "llvm-c/Core.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Use.h" #include "llvm/Support/CBindingWrapping.h" @@ -104,12 +103,13 @@ protected: /// /// Note, this should *NOT* be used directly by any class other than User. /// User uses this value to find the Use list. - enum : unsigned { NumUserOperandsBits = 29 }; + enum : unsigned { NumUserOperandsBits = 28 }; unsigned NumUserOperands : NumUserOperandsBits; bool IsUsedByMD : 1; bool HasName : 1; bool HasHungOffUses : 1; + bool HasDescriptor : 1; private: template // UseT == 'Use' or 'const Use' @@ -201,8 +201,9 @@ public: /// \brief Implement operator<< on Value. /// @{ - void print(raw_ostream &O) const; - void print(raw_ostream &O, ModuleSlotTracker &MST) const; + void print(raw_ostream &O, bool IsForDebug = false) const; + void print(raw_ostream &O, ModuleSlotTracker &MST, + bool IsForDebug = false) const; /// @} /// \brief Print the name of this Value out to the specified raw_ostream. @@ -272,36 +273,91 @@ public: //---------------------------------------------------------------------- // Methods for handling the chain of uses of this Value. // - bool use_empty() const { return UseList == nullptr; } + // Materializing a function can introduce new uses, so these methods come in + // two variants: + // The methods that start with materialized_ check the uses that are + // currently known given which functions are materialized. Be very careful + // when using them since you might not get all uses. + // The methods that don't start with materialized_ assert that modules is + // fully materialized. +#ifdef NDEBUG + void assertModuleIsMaterialized() const {} +#else + void assertModuleIsMaterialized() const; +#endif - typedef use_iterator_impl use_iterator; + bool use_empty() const { + assertModuleIsMaterialized(); + return UseList == nullptr; + } + + typedef use_iterator_impl use_iterator; typedef use_iterator_impl const_use_iterator; - use_iterator use_begin() { return use_iterator(UseList); } - const_use_iterator use_begin() const { return const_use_iterator(UseList); } - use_iterator use_end() { return use_iterator(); } - const_use_iterator use_end() const { return const_use_iterator(); } + use_iterator materialized_use_begin() { return use_iterator(UseList); } + const_use_iterator materialized_use_begin() const { + return const_use_iterator(UseList); + } + use_iterator use_begin() { + assertModuleIsMaterialized(); + return materialized_use_begin(); + } + const_use_iterator use_begin() const { + assertModuleIsMaterialized(); + return materialized_use_begin(); + } + use_iterator use_end() { return use_iterator(); } + const_use_iterator use_end() const { return const_use_iterator(); } + iterator_range materialized_uses() { + return make_range(materialized_use_begin(), use_end()); + } + iterator_range materialized_uses() const { + return make_range(materialized_use_begin(), use_end()); + } iterator_range uses() { - return iterator_range(use_begin(), use_end()); + assertModuleIsMaterialized(); + return materialized_uses(); } iterator_range uses() const { - return iterator_range(use_begin(), use_end()); + assertModuleIsMaterialized(); + return materialized_uses(); } - bool user_empty() const { return UseList == nullptr; } + bool user_empty() const { + assertModuleIsMaterialized(); + return UseList == nullptr; + } - typedef user_iterator_impl user_iterator; + typedef user_iterator_impl user_iterator; typedef user_iterator_impl const_user_iterator; - user_iterator user_begin() { return user_iterator(UseList); } - const_user_iterator user_begin() const { return const_user_iterator(UseList); } - user_iterator user_end() { return user_iterator(); } - const_user_iterator user_end() const { return const_user_iterator(); } - User *user_back() { return *user_begin(); } - const User *user_back() const { return *user_begin(); } + user_iterator materialized_user_begin() { return user_iterator(UseList); } + const_user_iterator materialized_user_begin() const { + return const_user_iterator(UseList); + } + user_iterator user_begin() { + assertModuleIsMaterialized(); + return materialized_user_begin(); + } + const_user_iterator user_begin() const { + assertModuleIsMaterialized(); + return materialized_user_begin(); + } + user_iterator user_end() { return user_iterator(); } + const_user_iterator user_end() const { return const_user_iterator(); } + User *user_back() { + assertModuleIsMaterialized(); + return *materialized_user_begin(); + } + const User *user_back() const { + assertModuleIsMaterialized(); + return *materialized_user_begin(); + } iterator_range users() { - return iterator_range(user_begin(), user_end()); + assertModuleIsMaterialized(); + return make_range(materialized_user_begin(), user_end()); } iterator_range users() const { - return iterator_range(user_begin(), user_end()); + assertModuleIsMaterialized(); + return make_range(materialized_user_begin(), user_end()); } /// \brief Return true if there is exactly one user of this value. @@ -493,7 +549,28 @@ private: template static Use *mergeUseLists(Use *L, Use *R, Compare Cmp) { Use *Merged; - mergeUseListsImpl(L, R, &Merged, Cmp); + Use **Next = &Merged; + + for (;;) { + if (!L) { + *Next = R; + break; + } + if (!R) { + *Next = L; + break; + } + if (Cmp(*R, *L)) { + *Next = R; + Next = &R->Next; + R = R->Next; + } else { + *Next = L; + Next = &L->Next; + L = L->Next; + } + } + return Merged; } @@ -586,25 +663,6 @@ template void Value::sortUseList(Compare Cmp) { } } -template -void Value::mergeUseListsImpl(Use *L, Use *R, Use **Next, Compare Cmp) { - if (!L) { - *Next = R; - return; - } - if (!R) { - *Next = L; - return; - } - if (Cmp(*R, *L)) { - *Next = R; - mergeUseListsImpl(L, R->Next, &R->Next, Cmp); - return; - } - *Next = L; - mergeUseListsImpl(L->Next, R, &L->Next, Cmp); -} - // isa - Provide some specializations of isa so that we don't have to include // the subtype header files to test to see if the value is a subclass... // diff --git a/include/llvm/IR/ValueHandle.h b/include/llvm/IR/ValueHandle.h index 53fa80a626aa..3c2805913ef5 100644 --- a/include/llvm/IR/ValueHandle.h +++ b/include/llvm/IR/ValueHandle.h @@ -52,13 +52,21 @@ protected: Weak }; + ValueHandleBase(const ValueHandleBase &RHS) + : ValueHandleBase(RHS.PrevPair.getInt(), RHS) {} + + ValueHandleBase(HandleBaseKind Kind, const ValueHandleBase &RHS) + : PrevPair(nullptr, Kind), Next(nullptr), V(RHS.V) { + if (isValid(V)) + AddToExistingUseList(RHS.getPrevPtr()); + } + private: PointerIntPair PrevPair; ValueHandleBase *Next; Value* V; - ValueHandleBase(const ValueHandleBase&) = delete; public: explicit ValueHandleBase(HandleBaseKind Kind) : PrevPair(nullptr, Kind), Next(nullptr), V(nullptr) {} @@ -67,11 +75,7 @@ public: if (isValid(V)) AddToUseList(); } - ValueHandleBase(HandleBaseKind Kind, const ValueHandleBase &RHS) - : PrevPair(nullptr, Kind), Next(nullptr), V(RHS.V) { - if (isValid(V)) - AddToExistingUseList(RHS.getPrevPtr()); - } + ~ValueHandleBase() { if (isValid(V)) RemoveFromUseList(); @@ -145,6 +149,8 @@ public: WeakVH(const WeakVH &RHS) : ValueHandleBase(Weak, RHS) {} + WeakVH &operator=(const WeakVH &RHS) = default; + Value *operator=(Value *RHS) { return ValueHandleBase::operator=(RHS); } @@ -314,7 +320,6 @@ class TrackingVH : public ValueHandleBase { public: TrackingVH() : ValueHandleBase(Tracking) {} TrackingVH(ValueTy *P) : ValueHandleBase(Tracking, GetAsValue(P)) {} - TrackingVH(const TrackingVH &RHS) : ValueHandleBase(Tracking, RHS) {} operator ValueTy*() const { return getValPtr(); @@ -324,10 +329,6 @@ public: setValPtr(RHS); return getValPtr(); } - ValueTy *operator=(const TrackingVH &RHS) { - setValPtr(RHS.getValPtr()); - return getValPtr(); - } ValueTy *operator->() const { return getValPtr(); } ValueTy &operator*() const { return *getValPtr(); } @@ -339,15 +340,13 @@ public: /// when the underlying Value has RAUW called on it or is destroyed. This /// class can be used as the key of a map, as long as the user takes it out of /// the map before calling setValPtr() (since the map has to rearrange itself -/// when the pointer changes). Unlike ValueHandleBase, this class has a vtable -/// and a virtual destructor. +/// when the pointer changes). Unlike ValueHandleBase, this class has a vtable. class CallbackVH : public ValueHandleBase { virtual void anchor(); protected: - CallbackVH(const CallbackVH &RHS) - : ValueHandleBase(Callback, RHS) {} - - virtual ~CallbackVH() {} + ~CallbackVH() = default; + CallbackVH(const CallbackVH &) = default; + CallbackVH &operator=(const CallbackVH &) = default; void setValPtr(Value *P) { ValueHandleBase::operator=(P); diff --git a/include/llvm/IR/ValueMap.h b/include/llvm/IR/ValueMap.h index 4d00b637609c..ad518ac053b2 100644 --- a/include/llvm/IR/ValueMap.h +++ b/include/llvm/IR/ValueMap.h @@ -214,8 +214,8 @@ private: // This CallbackVH updates its ValueMap when the contained Value changes, // according to the user's preferences expressed through the Config object. -template -class ValueMapCallbackVH : public CallbackVH { +template +class ValueMapCallbackVH final : public CallbackVH { friend class ValueMap; friend struct DenseMapInfo; typedef ValueMap ValueMapT; diff --git a/include/llvm/IR/ValueSymbolTable.h b/include/llvm/IR/ValueSymbolTable.h index bf1fade1ccef..65bd7fc2fec1 100644 --- a/include/llvm/IR/ValueSymbolTable.h +++ b/include/llvm/IR/ValueSymbolTable.h @@ -14,13 +14,13 @@ #ifndef LLVM_IR_VALUESYMBOLTABLE_H #define LLVM_IR_VALUESYMBOLTABLE_H +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/IR/Value.h" #include "llvm/Support/DataTypes.h" namespace llvm { - template - class SymbolTableListTraits; + template class SymbolTableListTraits; class BasicBlock; class Function; class NamedMDNode; @@ -33,12 +33,12 @@ namespace llvm { /// class ValueSymbolTable { friend class Value; - friend class SymbolTableListTraits; - friend class SymbolTableListTraits; - friend class SymbolTableListTraits; - friend class SymbolTableListTraits; - friend class SymbolTableListTraits; - friend class SymbolTableListTraits; + friend class SymbolTableListTraits; + friend class SymbolTableListTraits; + friend class SymbolTableListTraits; + friend class SymbolTableListTraits; + friend class SymbolTableListTraits; + friend class SymbolTableListTraits; /// @name Types /// @{ public: @@ -55,7 +55,6 @@ public: /// @name Constructors /// @{ public: - ValueSymbolTable() : vmap(0), LastUnique(0) {} ~ValueSymbolTable(); @@ -63,9 +62,8 @@ public: /// @name Accessors /// @{ public: - /// This method finds the value with the given \p Name in the - /// the symbol table. + /// the symbol table. /// @returns the value associated with the \p Name /// @brief Lookup a named Value. Value *lookup(StringRef Name) const { return vmap.lookup(Name); } @@ -97,30 +95,32 @@ public: /// @brief Get a const_iterator to the end of the symbol table. inline const_iterator end() const { return vmap.end(); } - -/// @} -/// @name Mutators -/// @{ + + /// @} + /// @name Mutators + /// @{ private: + ValueName *makeUniqueName(Value *V, SmallString<256> &UniqueName); + /// This method adds the provided value \p N to the symbol table. The Value - /// must have a name which is used to place the value in the symbol table. + /// must have a name which is used to place the value in the symbol table. /// If the inserted name conflicts, this renames the value. /// @brief Add a named value to the symbol table void reinsertValue(Value *V); - + /// createValueName - This method attempts to create a value name and insert /// it into the symbol table with the specified name. If it conflicts, it /// auto-renames the name and returns that instead. ValueName *createValueName(StringRef Name, Value *V); - + /// This method removes a value from the symbol table. It leaves the /// ValueName attached to the value, but it is no longer inserted in the /// symtab. void removeValueName(ValueName *V); - -/// @} -/// @name Internal Data -/// @{ + + /// @} + /// @name Internal Data + /// @{ private: ValueMap vmap; ///< The map that holds the symbol table. mutable uint32_t LastUnique; ///< Counter for tracking unique names diff --git a/include/llvm/IRReader/IRReader.h b/include/llvm/IRReader/IRReader.h index 2d9ace0b62a0..523cd3d6df72 100644 --- a/include/llvm/IRReader/IRReader.h +++ b/include/llvm/IRReader/IRReader.h @@ -27,10 +27,11 @@ class LLVMContext; /// If the given file holds a bitcode image, return a Module /// for it which does lazy deserialization of function bodies. Otherwise, /// attempt to parse it as LLVM Assembly and return a fully populated -/// Module. -std::unique_ptr getLazyIRFileModule(StringRef Filename, - SMDiagnostic &Err, - LLVMContext &Context); +/// Module. The ShouldLazyLoadMetadata flag is passed down to the bitcode +/// reader to optionally enable lazy metadata loading. +std::unique_ptr +getLazyIRFileModule(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context, + bool ShouldLazyLoadMetadata = false); /// If the given MemoryBuffer holds a bitcode image, return a Module /// for it. Otherwise, attempt to parse it as LLVM Assembly and return diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index e3b9a95f0a3d..cb2b1394e92b 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -53,9 +53,6 @@ void initializeInstrumentation(PassRegistry&); /// initializeAnalysis - Initialize all passes linked into the Analysis library. void initializeAnalysis(PassRegistry&); -/// initializeIPA - Initialize all passes linked into the IPA library. -void initializeIPA(PassRegistry&); - /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void initializeCodeGen(PassRegistry&); @@ -64,11 +61,8 @@ void initializeTarget(PassRegistry&); void initializeAAEvalPass(PassRegistry&); void initializeAddDiscriminatorsPass(PassRegistry&); -void initializeADCEPass(PassRegistry&); +void initializeADCELegacyPassPass(PassRegistry&); void initializeBDCEPass(PassRegistry&); -void initializeAliasAnalysisAnalysisGroup(PassRegistry&); -void initializeAliasAnalysisCounterPass(PassRegistry&); -void initializeAliasDebuggerPass(PassRegistry&); void initializeAliasSetPrinterPass(PassRegistry&); void initializeAlwaysInlinerPass(PassRegistry&); void initializeArgPromotionPass(PassRegistry&); @@ -76,13 +70,13 @@ void initializeAtomicExpandPass(PassRegistry&); void initializeSampleProfileLoaderPass(PassRegistry&); void initializeAlignmentFromAssumptionsPass(PassRegistry&); void initializeBarrierNoopPass(PassRegistry&); -void initializeBasicAliasAnalysisPass(PassRegistry&); +void initializeBasicAAWrapperPassPass(PassRegistry&); void initializeCallGraphWrapperPassPass(PassRegistry &); void initializeBlockExtractorPassPass(PassRegistry&); -void initializeBlockFrequencyInfoPass(PassRegistry&); +void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry&); void initializeBoundsCheckingPass(PassRegistry&); void initializeBranchFolderPassPass(PassRegistry&); -void initializeBranchProbabilityInfoPass(PassRegistry&); +void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&); void initializeBreakCriticalEdgesPass(PassRegistry&); void initializeCallGraphPrinterPass(PassRegistry&); void initializeCallGraphViewerPass(PassRegistry&); @@ -90,7 +84,8 @@ void initializeCFGOnlyPrinterPass(PassRegistry&); void initializeCFGOnlyViewerPass(PassRegistry&); void initializeCFGPrinterPass(PassRegistry&); void initializeCFGSimplifyPassPass(PassRegistry&); -void initializeCFLAliasAnalysisPass(PassRegistry&); +void initializeCFLAAWrapperPassPass(PassRegistry&); +void initializeExternalAAWrapperPassPass(PassRegistry&); void initializeForwardControlFlowIntegrityPass(PassRegistry&); void initializeFlattenCFGPassPass(PassRegistry&); void initializeStructurizeCFGPass(PassRegistry&); @@ -102,6 +97,7 @@ void initializeConstantPropagationPass(PassRegistry&); void initializeMachineCopyPropagationPass(PassRegistry&); void initializeCostModelAnalysisPass(PassRegistry&); void initializeCorrelatedValuePropagationPass(PassRegistry&); +void initializeCrossDSOCFIPass(PassRegistry&); void initializeDAEPass(PassRegistry&); void initializeDAHPass(PassRegistry&); void initializeDCEPass(PassRegistry&); @@ -120,7 +116,10 @@ void initializeDominatorTreeWrapperPassPass(PassRegistry&); void initializeEarlyIfConverterPass(PassRegistry&); void initializeEdgeBundlesPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); +void initializeAAResultsWrapperPassPass(PassRegistry &); void initializeGCOVProfilerPass(PassRegistry&); +void initializePGOInstrumentationGenPass(PassRegistry&); +void initializePGOInstrumentationUsePass(PassRegistry&); void initializeInstrProfilingPass(PassRegistry&); void initializeAddressSanitizerPass(PassRegistry&); void initializeAddressSanitizerModulePass(PassRegistry&); @@ -132,19 +131,21 @@ void initializeScalarizerPass(PassRegistry&); void initializeEarlyCSELegacyPassPass(PassRegistry &); void initializeEliminateAvailableExternallyPass(PassRegistry&); void initializeExpandISelPseudosPass(PassRegistry&); +void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&); void initializeFunctionAttrsPass(PassRegistry&); void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); void initializeGVNPass(PassRegistry&); void initializeGlobalDCEPass(PassRegistry&); void initializeGlobalOptPass(PassRegistry&); -void initializeGlobalsModRefPass(PassRegistry&); +void initializeGlobalsAAWrapperPassPass(PassRegistry&); void initializeIPCPPass(PassRegistry&); void initializeIPSCCPPass(PassRegistry&); void initializeIVUsersPass(PassRegistry&); void initializeIfConverterPass(PassRegistry&); void initializeInductiveRangeCheckEliminationPass(PassRegistry&); void initializeIndVarSimplifyPass(PassRegistry&); +void initializeInferFunctionAttrsLegacyPassPass(PassRegistry&); void initializeInlineCostAnalysisPass(PassRegistry&); void initializeInstructionCombiningPassPass(PassRegistry&); void initializeInstCountPass(PassRegistry&); @@ -155,7 +156,6 @@ void initializeJumpThreadingPass(PassRegistry&); void initializeLCSSAPass(PassRegistry&); void initializeLICMPass(PassRegistry&); void initializeLazyValueInfoPass(PassRegistry&); -void initializeLibCallAliasAnalysisPass(PassRegistry&); void initializeLintPass(PassRegistry&); void initializeLiveDebugVariablesPass(PassRegistry&); void initializeLiveIntervalsPass(PassRegistry&); @@ -210,7 +210,7 @@ void initializeMergeFunctionsPass(PassRegistry&); void initializeModuleDebugInfoPrinterPass(PassRegistry&); void initializeNaryReassociatePass(PassRegistry&); void initializeNoAAPass(PassRegistry&); -void initializeObjCARCAliasAnalysisPass(PassRegistry&); +void initializeObjCARCAAWrapperPassPass(PassRegistry&); void initializeObjCARCAPElimPass(PassRegistry&); void initializeObjCARCExpandPass(PassRegistry&); void initializeObjCARCContractPass(PassRegistry&); @@ -245,14 +245,14 @@ void initializeRegionViewerPass(PassRegistry&); void initializeRewriteStatepointsForGCPass(PassRegistry&); void initializeSafeStackPass(PassRegistry&); void initializeSCCPPass(PassRegistry&); -void initializeSROAPass(PassRegistry&); +void initializeSROALegacyPassPass(PassRegistry&); void initializeSROA_DTPass(PassRegistry&); void initializeSROA_SSAUpPass(PassRegistry&); -void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&); -void initializeScalarEvolutionPass(PassRegistry&); +void initializeSCEVAAWrapperPassPass(PassRegistry&); +void initializeScalarEvolutionWrapperPassPass(PassRegistry&); void initializeShrinkWrapPass(PassRegistry &); void initializeSimpleInlinerPass(PassRegistry&); -void initializeShadowStackGCLoweringPass(PassRegistry&); +void initializeShadowStackGCLoweringPass(PassRegistry&); void initializeRegisterCoalescerPass(PassRegistry&); void initializeSingleLoopExtractorPass(PassRegistry&); void initializeSinkingPass(PassRegistry&); @@ -265,7 +265,7 @@ void initializeStackColoringPass(PassRegistry&); void initializeStackSlotColoringPass(PassRegistry&); void initializeStraightLineStrengthReducePass(PassRegistry &); void initializeStripDeadDebugInfoPass(PassRegistry&); -void initializeStripDeadPrototypesPassPass(PassRegistry&); +void initializeStripDeadPrototypesLegacyPassPass(PassRegistry&); void initializeStripDebugDeclarePass(PassRegistry&); void initializeStripNonDebugSymbolsPass(PassRegistry&); void initializeStripSymbolsPass(PassRegistry&); @@ -276,8 +276,8 @@ void initializeTargetTransformInfoWrapperPassPass(PassRegistry &); void initializeTargetLibraryInfoWrapperPassPass(PassRegistry &); void initializeAssumptionCacheTrackerPass(PassRegistry &); void initializeTwoAddressInstructionPassPass(PassRegistry&); -void initializeTypeBasedAliasAnalysisPass(PassRegistry&); -void initializeScopedNoAliasAAPass(PassRegistry&); +void initializeTypeBasedAAWrapperPassPass(PassRegistry&); +void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&); void initializeUnifyFunctionExitNodesPass(PassRegistry&); void initializeUnreachableBlockElimPass(PassRegistry&); void initializeUnreachableMachineBlockElimPass(PassRegistry&); @@ -294,6 +294,7 @@ void initializeBBVectorizePass(PassRegistry&); void initializeMachineFunctionPrinterPassPass(PassRegistry&); void initializeMIRPrintingPassPass(PassRegistry&); void initializeStackMapLivenessPass(PassRegistry&); +void initializeLiveDebugValuesPass(PassRegistry&); void initializeMachineCombinerPass(PassRegistry &); void initializeLoadCombinePass(PassRegistry&); void initializeRewriteSymbolsPass(PassRegistry&); @@ -304,6 +305,10 @@ void initializeDwarfEHPreparePass(PassRegistry&); void initializeFloat2IntPass(PassRegistry&); void initializeLoopDistributePass(PassRegistry&); void initializeSjLjEHPreparePass(PassRegistry&); +void initializeDemandedBitsPass(PassRegistry&); +void initializeFuncletLayoutPass(PassRegistry &); +void initializeLoopLoadEliminationPass(PassRegistry&); +void initializeFunctionImportPassPass(PassRegistry &); } #endif diff --git a/include/llvm/LTO/LTOCodeGenerator.h b/include/llvm/LTO/LTOCodeGenerator.h index 0c46fc048a43..3820b211a381 100644 --- a/include/llvm/LTO/LTOCodeGenerator.h +++ b/include/llvm/LTO/LTOCodeGenerator.h @@ -39,7 +39,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Linker/Linker.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include #include @@ -48,6 +48,7 @@ namespace llvm { class LLVMContext; class DiagnosticInfo; class GlobalValue; + class Linker; class Mangler; class MemoryBuffer; class TargetLibraryInfo; @@ -61,121 +62,135 @@ namespace llvm { struct LTOCodeGenerator { static const char *getVersionString(); - LTOCodeGenerator(); - LTOCodeGenerator(std::unique_ptr Context); + LTOCodeGenerator(LLVMContext &Context); ~LTOCodeGenerator(); - // Merge given module, return true on success. + /// Merge given module. Return true on success. bool addModule(struct LTOModule *); - // Set the destination module. - void setModule(struct LTOModule *); + /// Set the destination module. + void setModule(std::unique_ptr M); - void setTargetOptions(TargetOptions options); + void setTargetOptions(TargetOptions Options); void setDebugInfo(lto_debug_model); - void setCodePICModel(lto_codegen_model); + void setCodePICModel(Reloc::Model Model) { RelocModel = Model; } + + /// Set the file type to be emitted (assembly or object code). + /// The default is TargetMachine::CGFT_ObjectFile. + void setFileType(TargetMachine::CodeGenFileType FT) { FileType = FT; } - void setCpu(const char *mCpu) { MCpu = mCpu; } - void setAttr(const char *mAttr) { MAttr = mAttr; } - void setOptLevel(unsigned optLevel) { OptLevel = optLevel; } + void setCpu(const char *MCpu) { this->MCpu = MCpu; } + void setAttr(const char *MAttr) { this->MAttr = MAttr; } + void setOptLevel(unsigned OptLevel); void setShouldInternalize(bool Value) { ShouldInternalize = Value; } void setShouldEmbedUselists(bool Value) { ShouldEmbedUselists = Value; } - void addMustPreserveSymbol(StringRef sym) { MustPreserveSymbols[sym] = 1; } + void addMustPreserveSymbol(StringRef Sym) { MustPreserveSymbols[Sym] = 1; } - // To pass options to the driver and optimization passes. These options are - // not necessarily for debugging purpose (The function name is misleading). - // This function should be called before LTOCodeGenerator::compilexxx(), - // and LTOCodeGenerator::writeMergedModules(). - void setCodeGenDebugOptions(const char *opts); + /// Pass options to the driver and optimization passes. + /// + /// These options are not necessarily for debugging purpose (the function + /// name is misleading). This function should be called before + /// LTOCodeGenerator::compilexxx(), and + /// LTOCodeGenerator::writeMergedModules(). + void setCodeGenDebugOptions(const char *Opts); - // Parse the options set in setCodeGenDebugOptions. Like - // setCodeGenDebugOptions, this must be called before - // LTOCodeGenerator::compilexxx() and LTOCodeGenerator::writeMergedModules() + /// Parse the options set in setCodeGenDebugOptions. + /// + /// Like \a setCodeGenDebugOptions(), this must be called before + /// LTOCodeGenerator::compilexxx() and + /// LTOCodeGenerator::writeMergedModules(). void parseCodeGenDebugOptions(); - // Write the merged module to the file specified by the given path. - // Return true on success. - bool writeMergedModules(const char *path, std::string &errMsg); + /// Write the merged module to the file specified by the given path. Return + /// true on success. + bool writeMergedModules(const char *Path); - // Compile the merged module into a *single* object file; the path to object - // file is returned to the caller via argument "name". Return true on - // success. - // - // NOTE that it is up to the linker to remove the intermediate object file. - // Do not try to remove the object file in LTOCodeGenerator's destructor - // as we don't who (LTOCodeGenerator or the obj file) will last longer. - bool compile_to_file(const char **name, - bool disableInline, - bool disableGVNLoadPRE, - bool disableVectorization, - std::string &errMsg); + /// Compile the merged module into a *single* output file; the path to output + /// file is returned to the caller via argument "name". Return true on + /// success. + /// + /// \note It is up to the linker to remove the intermediate output file. Do + /// not try to remove the object file in LTOCodeGenerator's destructor as we + /// don't who (LTOCodeGenerator or the output file) will last longer. + bool compile_to_file(const char **Name, bool DisableVerify, + bool DisableInline, bool DisableGVNLoadPRE, + bool DisableVectorization); - // As with compile_to_file(), this function compiles the merged module into - // single object file. Instead of returning the object-file-path to the caller - // (linker), it brings the object to a buffer, and return the buffer to the - // caller. This function should delete intermediate object file once its content - // is brought to memory. Return NULL if the compilation was not successful. - std::unique_ptr compile(bool disableInline, - bool disableGVNLoadPRE, - bool disableVectorization, - std::string &errMsg); + /// As with compile_to_file(), this function compiles the merged module into + /// single output file. Instead of returning the output file path to the + /// caller (linker), it brings the output to a buffer, and returns the buffer + /// to the caller. This function should delete the intermediate file once + /// its content is brought to memory. Return NULL if the compilation was not + /// successful. + std::unique_ptr compile(bool DisableVerify, bool DisableInline, + bool DisableGVNLoadPRE, + bool DisableVectorization); - // Optimizes the merged module. Returns true on success. - bool optimize(bool disableInline, - bool disableGVNLoadPRE, - bool disableVectorization, - std::string &errMsg); + /// Optimizes the merged module. Returns true on success. + bool optimize(bool DisableVerify, bool DisableInline, bool DisableGVNLoadPRE, + bool DisableVectorization); - // Compiles the merged optimized module into a single object file. It brings - // the object to a buffer, and returns the buffer to the caller. Return NULL - // if the compilation was not successful. - std::unique_ptr compileOptimized(std::string &errMsg); + /// Compiles the merged optimized module into a single output file. It brings + /// the output to a buffer, and returns the buffer to the caller. Return NULL + /// if the compilation was not successful. + std::unique_ptr compileOptimized(); + + /// Compile the merged optimized module into out.size() output files each + /// representing a linkable partition of the module. If out contains more + /// than one element, code generation is done in parallel with out.size() + /// threads. Output files will be written to members of out. Returns true on + /// success. + bool compileOptimized(ArrayRef Out); void setDiagnosticHandler(lto_diagnostic_handler_t, void *); LLVMContext &getContext() { return Context; } + void resetMergedModule() { MergedModule.reset(); } + private: void initializeLTOPasses(); - bool compileOptimized(raw_pwrite_stream &out, std::string &errMsg); - bool compileOptimizedToFile(const char **name, std::string &errMsg); + bool compileOptimizedToFile(const char **Name); void applyScopeRestrictions(); void applyRestriction(GlobalValue &GV, ArrayRef Libcalls, std::vector &MustPreserveList, SmallPtrSetImpl &AsmUsed, Mangler &Mangler); - bool determineTarget(std::string &errMsg); + bool determineTarget(); static void DiagnosticHandler(const DiagnosticInfo &DI, void *Context); void DiagnosticHandler2(const DiagnosticInfo &DI); + void emitError(const std::string &ErrMsg); + typedef StringMap StringSet; - void destroyMergedModule(); - std::unique_ptr OwnedContext; LLVMContext &Context; - Linker IRLinker; - TargetMachine *TargetMach = nullptr; + std::unique_ptr MergedModule; + std::unique_ptr TheLinker; + std::unique_ptr TargetMach; bool EmitDwarfDebugInfo = false; bool ScopeRestrictionsDone = false; - lto_codegen_model CodeModel = LTO_CODEGEN_PIC_MODEL_DEFAULT; + Reloc::Model RelocModel = Reloc::Default; StringSet MustPreserveSymbols; StringSet AsmUndefinedRefs; - std::vector CodegenOptions; + std::vector CodegenOptions; + std::string FeatureStr; std::string MCpu; std::string MAttr; std::string NativeObjectPath; TargetOptions Options; + CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default; unsigned OptLevel = 2; lto_diagnostic_handler_t DiagHandler = nullptr; void *DiagContext = nullptr; - LTOModule *OwnedModule = nullptr; bool ShouldInternalize = true; bool ShouldEmbedUselists = false; + TargetMachine::CodeGenFileType FileType = TargetMachine::CGFT_ObjectFile; }; } #endif diff --git a/include/llvm/LTO/LTOModule.h b/include/llvm/LTO/LTOModule.h index c4e2be627399..97b5865bd47f 100644 --- a/include/llvm/LTO/LTOModule.h +++ b/include/llvm/LTO/LTOModule.h @@ -74,6 +74,11 @@ public: static bool isBitcodeForTarget(MemoryBuffer *memBuffer, StringRef triplePrefix); + /// Returns a string representing the producer identification stored in the + /// bitcode, or "" if the bitcode does not contains any. + /// + static std::string getProducerString(MemoryBuffer *Buffer); + /// Create a MemoryBuffer from a memory range with an optional name. static std::unique_ptr makeBuffer(const void *mem, size_t length, StringRef name = ""); @@ -86,25 +91,24 @@ public: /// InitializeAllTargetMCs(); /// InitializeAllAsmPrinters(); /// InitializeAllAsmParsers(); - static LTOModule *createFromFile(const char *path, TargetOptions options, - std::string &errMsg); - static LTOModule *createFromOpenFile(int fd, const char *path, size_t size, - TargetOptions options, - std::string &errMsg); - static LTOModule *createFromOpenFileSlice(int fd, const char *path, - size_t map_size, off_t offset, - TargetOptions options, - std::string &errMsg); - static LTOModule *createFromBuffer(const void *mem, size_t length, - TargetOptions options, std::string &errMsg, - StringRef path = ""); + static ErrorOr> + createFromFile(LLVMContext &Context, const char *path, TargetOptions options); + static ErrorOr> + createFromOpenFile(LLVMContext &Context, int fd, const char *path, + size_t size, TargetOptions options); + static ErrorOr> + createFromOpenFileSlice(LLVMContext &Context, int fd, const char *path, + size_t map_size, off_t offset, TargetOptions options); + static ErrorOr> + createFromBuffer(LLVMContext &Context, const void *mem, size_t length, + TargetOptions options, StringRef path = ""); - static LTOModule *createInLocalContext(const void *mem, size_t length, - TargetOptions options, - std::string &errMsg, StringRef path); - static LTOModule *createInContext(const void *mem, size_t length, - TargetOptions options, std::string &errMsg, - StringRef path, LLVMContext *Context); + static ErrorOr> + createInLocalContext(const void *mem, size_t length, TargetOptions options, + StringRef path); + static ErrorOr> + createInContext(const void *mem, size_t length, TargetOptions options, + StringRef path, LLVMContext *Context); const Module &getModule() const { return const_cast(this)->getModule(); @@ -113,6 +117,8 @@ public: return IRFile->getModule(); } + std::unique_ptr takeModule() { return IRFile->takeModule(); } + /// Return the Module's target triple. const std::string &getTargetTriple() { return getModule().getTargetTriple(); @@ -163,7 +169,7 @@ private: /// Parse the symbols from the module and model-level ASM and add them to /// either the defined or undefined lists. - bool parseSymbols(std::string &errMsg); + void parseSymbols(); /// Add a symbol which isn't defined just yet to a list to be resolved later. void addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym, @@ -200,8 +206,9 @@ private: bool objcClassNameFromExpression(const Constant *c, std::string &name); /// Create an LTOModule (private version). - static LTOModule *makeLTOModule(MemoryBufferRef Buffer, TargetOptions options, - std::string &errMsg, LLVMContext *Context); + static ErrorOr> + makeLTOModule(MemoryBufferRef Buffer, TargetOptions options, + LLVMContext *Context); }; } #endif diff --git a/include/llvm/LibDriver/LibDriver.h b/include/llvm/LibDriver/LibDriver.h index aaaa7b7d21c3..09495650c1b9 100644 --- a/include/llvm/LibDriver/LibDriver.h +++ b/include/llvm/LibDriver/LibDriver.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // Defines an interface to a lib.exe-compatible driver that also understands -// bitcode files. Used by llvm-lib and lld-link2 /lib. +// bitcode files. Used by llvm-lib and lld-link /lib. // //===----------------------------------------------------------------------===// diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index cea5530db3b8..29fcd93a2a1c 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -17,8 +17,11 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CFLAliasAnalysis.h" #include "llvm/Analysis/CallPrinter.h" #include "llvm/Analysis/DomPrinter.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/IntervalPartition.h" #include "llvm/Analysis/Lint.h" #include "llvm/Analysis/Passes.h" @@ -26,6 +29,9 @@ #include "llvm/Analysis/RegionPass.h" #include "llvm/Analysis/RegionPrinter.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRPrintingPasses.h" @@ -52,21 +58,18 @@ namespace { (void) llvm::createAAEvalPass(); (void) llvm::createAggressiveDCEPass(); (void) llvm::createBitTrackingDCEPass(); - (void) llvm::createAliasAnalysisCounterPass(); - (void) llvm::createAliasDebugger(); (void) llvm::createArgumentPromotionPass(); (void) llvm::createAlignmentFromAssumptionsPass(); - (void) llvm::createBasicAliasAnalysisPass(); - (void) llvm::createLibCallAliasAnalysisPass(nullptr); - (void) llvm::createScalarEvolutionAliasAnalysisPass(); - (void) llvm::createTypeBasedAliasAnalysisPass(); - (void) llvm::createScopedNoAliasAAPass(); + (void) llvm::createBasicAAWrapperPass(); + (void) llvm::createSCEVAAWrapperPass(); + (void) llvm::createTypeBasedAAWrapperPass(); + (void) llvm::createScopedNoAliasAAWrapperPass(); (void) llvm::createBoundsCheckingPass(); (void) llvm::createBreakCriticalEdgesPass(); (void) llvm::createCallGraphPrinterPass(); (void) llvm::createCallGraphViewerPass(); (void) llvm::createCFGSimplificationPass(); - (void) llvm::createCFLAliasAnalysisPass(); + (void) llvm::createCFLAAWrapperPass(); (void) llvm::createStructurizeCFGPass(); (void) llvm::createConstantMergePass(); (void) llvm::createConstantPropagationPass(); @@ -82,12 +85,15 @@ namespace { (void) llvm::createDomOnlyViewerPass(); (void) llvm::createDomViewerPass(); (void) llvm::createGCOVProfilerPass(); + (void) llvm::createPGOInstrumentationGenPass(); + (void) llvm::createPGOInstrumentationUsePass(); (void) llvm::createInstrProfilingPass(); + (void) llvm::createFunctionImportPass(); (void) llvm::createFunctionInliningPass(); (void) llvm::createAlwaysInlinerPass(); (void) llvm::createGlobalDCEPass(); (void) llvm::createGlobalOptimizerPass(); - (void) llvm::createGlobalsModRefPass(); + (void) llvm::createGlobalsAAWrapperPass(); (void) llvm::createIPConstantPropagationPass(); (void) llvm::createIPSCCPPass(); (void) llvm::createInductiveRangeCheckEliminationPass(); @@ -110,8 +116,7 @@ namespace { (void) llvm::createLowerInvokePass(); (void) llvm::createLowerSwitchPass(); (void) llvm::createNaryReassociatePass(); - (void) llvm::createNoAAPass(); - (void) llvm::createObjCARCAliasAnalysisPass(); + (void) llvm::createObjCARCAAWrapperPass(); (void) llvm::createObjCARCAPElimPass(); (void) llvm::createObjCARCExpandPass(); (void) llvm::createObjCARCContractPass(); @@ -179,7 +184,7 @@ namespace { (void) llvm::createEliminateAvailableExternallyPass(); (void)new llvm::IntervalPartition(); - (void)new llvm::ScalarEvolution(); + (void)new llvm::ScalarEvolutionWrapperPass(); ((llvm::Function*)nullptr)->viewCFGOnly(); llvm::RGPassManager RGM; ((llvm::RegionPass*)nullptr)->runOnRegion((llvm::Region*)nullptr, RGM); diff --git a/include/llvm/Linker/IRMover.h b/include/llvm/Linker/IRMover.h new file mode 100644 index 000000000000..a964cc4b72c5 --- /dev/null +++ b/include/llvm/Linker/IRMover.h @@ -0,0 +1,76 @@ +//===- IRMover.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LINKER_IRMOVER_H +#define LLVM_LINKER_IRMOVER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include + +namespace llvm { +class GlobalValue; +class MDNode; +class Module; +class StructType; +class Type; + +class IRMover { + struct StructTypeKeyInfo { + struct KeyTy { + ArrayRef ETypes; + bool IsPacked; + KeyTy(ArrayRef E, bool P); + KeyTy(const StructType *ST); + bool operator==(const KeyTy &that) const; + bool operator!=(const KeyTy &that) const; + }; + static StructType *getEmptyKey(); + static StructType *getTombstoneKey(); + static unsigned getHashValue(const KeyTy &Key); + static unsigned getHashValue(const StructType *ST); + static bool isEqual(const KeyTy &LHS, const StructType *RHS); + static bool isEqual(const StructType *LHS, const StructType *RHS); + }; + +public: + class IdentifiedStructTypeSet { + // The set of opaque types is the composite module. + DenseSet OpaqueStructTypes; + + // The set of identified but non opaque structures in the composite module. + DenseSet NonOpaqueStructTypes; + + public: + void addNonOpaque(StructType *Ty); + void switchToNonOpaque(StructType *Ty); + void addOpaque(StructType *Ty); + StructType *findNonOpaque(ArrayRef ETypes, bool IsPacked); + bool hasType(StructType *Ty); + }; + + IRMover(Module &M); + + typedef std::function ValueAdder; + /// Move in the provide values. The source is destroyed. + /// Returns true on error. + bool move(Module &Src, ArrayRef ValuesToLink, + std::function AddLazyFor, + DenseMap *ValIDToTempMDMap = nullptr, + bool IsMetadataLinkingPostpass = false); + Module &getModule() { return Composite; } + +private: + Module &Composite; + IdentifiedStructTypeSet IdentifiedStructTypes; +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/Linker/Linker.h b/include/llvm/Linker/Linker.h index c43b90e9cd26..dde3f73883ca 100644 --- a/include/llvm/Linker/Linker.h +++ b/include/llvm/Linker/Linker.h @@ -10,10 +10,8 @@ #ifndef LLVM_LINKER_LINKER_H #define LLVM_LINKER_LINKER_H -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/FunctionInfo.h" +#include "llvm/Linker/IRMover.h" namespace llvm { class Module; @@ -25,71 +23,55 @@ class Type; /// module since it is assumed that the user of this class will want to do /// something with it after the linking. class Linker { + IRMover Mover; + public: - struct StructTypeKeyInfo { - struct KeyTy { - ArrayRef ETypes; - bool IsPacked; - KeyTy(ArrayRef E, bool P); - KeyTy(const StructType *ST); - bool operator==(const KeyTy &that) const; - bool operator!=(const KeyTy &that) const; - }; - static StructType *getEmptyKey(); - static StructType *getTombstoneKey(); - static unsigned getHashValue(const KeyTy &Key); - static unsigned getHashValue(const StructType *ST); - static bool isEqual(const KeyTy &LHS, const StructType *RHS); - static bool isEqual(const StructType *LHS, const StructType *RHS); + enum Flags { + None = 0, + OverrideFromSrc = (1 << 0), + LinkOnlyNeeded = (1 << 1), + InternalizeLinkedSymbols = (1 << 2) }; - typedef DenseSet NonOpaqueStructTypeSet; - typedef DenseSet OpaqueStructTypeSet; + Linker(Module &M); - struct IdentifiedStructTypeSet { - // The set of opaque types is the composite module. - OpaqueStructTypeSet OpaqueStructTypes; - - // The set of identified but non opaque structures in the composite module. - NonOpaqueStructTypeSet NonOpaqueStructTypes; - - void addNonOpaque(StructType *Ty); - void switchToNonOpaque(StructType *Ty); - void addOpaque(StructType *Ty); - StructType *findNonOpaque(ArrayRef ETypes, bool IsPacked); - bool hasType(StructType *Ty); - }; - - Linker(Module *M, DiagnosticHandlerFunction DiagnosticHandler); - Linker(Module *M); - ~Linker(); - - Module *getModule() const { return Composite; } - void deleteModule(); - - /// \brief Link \p Src into the composite. The source is destroyed. + /// \brief Link \p Src into the composite. + /// /// Passing OverrideSymbols as true will have symbols from Src /// shadow those in the Dest. + /// For ThinLTO function importing/exporting the \p FunctionInfoIndex + /// is passed. If \p FunctionsToImport is provided, only the functions that + /// are part of the set will be imported from the source module. + /// The \p ValIDToTempMDMap is populated by the linker when function + /// importing is performed. + /// /// Returns true on error. - bool linkInModule(Module *Src, bool OverrideSymbols = false); + bool linkInModule(std::unique_ptr Src, unsigned Flags = Flags::None, + const FunctionInfoIndex *Index = nullptr, + DenseSet *FunctionsToImport = nullptr, + DenseMap *ValIDToTempMDMap = nullptr); - /// \brief Set the composite to the passed-in module. - void setModule(Module *Dst); + /// This exists to implement the deprecated LLVMLinkModules C api. Don't use + /// for anything else. + bool linkInModuleForCAPI(Module &Src); - static bool LinkModules(Module *Dest, Module *Src, - DiagnosticHandlerFunction DiagnosticHandler); + static bool linkModules(Module &Dest, std::unique_ptr Src, + unsigned Flags = Flags::None); - static bool LinkModules(Module *Dest, Module *Src); - -private: - void init(Module *M, DiagnosticHandlerFunction DiagnosticHandler); - Module *Composite; - - IdentifiedStructTypeSet IdentifiedStructTypes; - - DiagnosticHandlerFunction DiagnosticHandler; + /// \brief Link metadata from \p Src into the composite. The source is + /// destroyed. + /// + /// The \p ValIDToTempMDMap sound have been populated earlier during function + /// importing from \p Src. + bool linkInMetadata(Module &Src, + DenseMap *ValIDToTempMDMap); }; +/// Create a new module with exported local functions renamed and promoted +/// for ThinLTO. +std::unique_ptr renameModuleForThinLTO(std::unique_ptr M, + const FunctionInfoIndex *Index); + } // End llvm namespace #endif diff --git a/include/llvm/MC/ConstantPools.h b/include/llvm/MC/ConstantPools.h index 9aa4663ba0fc..552e1443e7d0 100644 --- a/include/llvm/MC/ConstantPools.h +++ b/include/llvm/MC/ConstantPools.h @@ -17,6 +17,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/SMLoc.h" namespace llvm { class MCContext; @@ -26,11 +27,12 @@ class MCStreamer; class MCSymbol; struct ConstantPoolEntry { - ConstantPoolEntry(MCSymbol *L, const MCExpr *Val, unsigned Sz) - : Label(L), Value(Val), Size(Sz) {} + ConstantPoolEntry(MCSymbol *L, const MCExpr *Val, unsigned Sz, SMLoc Loc_) + : Label(L), Value(Val), Size(Sz), Loc(Loc_) {} MCSymbol *Label; const MCExpr *Value; unsigned Size; + SMLoc Loc; }; // A class to keep track of assembler-generated constant pools that are use to @@ -49,7 +51,7 @@ public: // // \returns a MCExpr that references the newly inserted value const MCExpr *addEntry(const MCExpr *Value, MCContext &Context, - unsigned Size); + unsigned Size, SMLoc Loc); // Emit the contents of the constant pool using the provided streamer. void emitEntries(MCStreamer &Streamer); @@ -80,7 +82,7 @@ public: void emitAll(MCStreamer &Streamer); void emitForCurrentSection(MCStreamer &Streamer); const MCExpr *addEntry(MCStreamer &Streamer, const MCExpr *Expr, - unsigned Size); + unsigned Size, SMLoc Loc); private: ConstantPool *getConstantPool(MCSection *Section); diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h index 2bfad2d355b8..51312ff80447 100644 --- a/include/llvm/MC/MCAsmBackend.h +++ b/include/llvm/MC/MCAsmBackend.h @@ -67,6 +67,11 @@ public: /// Get the number of target specific fixup kinds. virtual unsigned getNumFixupKinds() const = 0; + /// Map a relocation name used in .reloc to a fixup kind. + /// Returns true and sets MappedKind if Name is successfully mapped. + /// Otherwise returns false and leaves MappedKind unchanged. + virtual bool getFixupKind(StringRef Name, MCFixupKind &MappedKind) const; + /// Get information on a fixup kind. virtual const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const; diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index 9bb0fa63c523..384584ef4ef0 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -414,6 +414,15 @@ public: /// syntactically correct. virtual bool isValidUnquotedName(StringRef Name) const; + /// Return true if the .section directive should be omitted when + /// emitting \p SectionName. For example: + /// + /// shouldOmitSectionDirective(".text") + /// + /// returns false => .section .text,#alloc,#execinstr + /// returns true => .text + virtual bool shouldOmitSectionDirective(StringRef SectionName) const; + bool usesSunStyleELFSectionSwitchSyntax() const { return SunStyleELFSectionSwitchSyntax; } diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h index 0642af837e7e..c0bd12875839 100644 --- a/include/llvm/MC/MCAssembler.h +++ b/include/llvm/MC/MCAssembler.h @@ -10,23 +10,18 @@ #ifndef LLVM_MC_MCASSEMBLER_H #define LLVM_MC_MCASSEMBLER_H -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator.h" #include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCLinkerOptimizationHint.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/DataTypes.h" -#include -#include // FIXME: Shouldn't be needed. +#include "llvm/MC/MCSymbol.h" namespace llvm { class raw_ostream; @@ -42,476 +37,6 @@ class MCSubtargetInfo; class MCValue; class MCAsmBackend; -class MCFragment : public ilist_node { - friend class MCAsmLayout; - - MCFragment(const MCFragment &) = delete; - void operator=(const MCFragment &) = delete; - -public: - enum FragmentType : uint8_t { - FT_Align, - FT_Data, - FT_CompactEncodedInst, - FT_Fill, - FT_Relaxable, - FT_Org, - FT_Dwarf, - FT_DwarfFrame, - FT_LEB, - FT_SafeSEH - }; - -private: - FragmentType Kind; - -protected: - bool HasInstructions; - -private: - /// \brief Should this fragment be aligned to the end of a bundle? - bool AlignToBundleEnd; - - uint8_t BundlePadding; - - /// LayoutOrder - The layout order of this fragment. - unsigned LayoutOrder; - - /// The data for the section this fragment is in. - MCSection *Parent; - - /// Atom - The atom this fragment is in, as represented by it's defining - /// symbol. - const MCSymbol *Atom; - - /// \name Assembler Backend Data - /// @{ - // - // FIXME: This could all be kept private to the assembler implementation. - - /// Offset - The offset of this fragment in its section. This is ~0 until - /// initialized. - uint64_t Offset; - - /// @} - -protected: - MCFragment(FragmentType Kind, bool HasInstructions, - uint8_t BundlePadding, MCSection *Parent = nullptr); - - ~MCFragment(); -private: - - // This is a friend so that the sentinal can be created. - friend struct ilist_sentinel_traits; - MCFragment(); - -public: - /// Destroys the current fragment. - /// - /// This must be used instead of delete as MCFragment is non-virtual. - /// This method will dispatch to the appropriate subclass. - void destroy(); - - FragmentType getKind() const { return Kind; } - - MCSection *getParent() const { return Parent; } - void setParent(MCSection *Value) { Parent = Value; } - - const MCSymbol *getAtom() const { return Atom; } - void setAtom(const MCSymbol *Value) { Atom = Value; } - - unsigned getLayoutOrder() const { return LayoutOrder; } - void setLayoutOrder(unsigned Value) { LayoutOrder = Value; } - - /// \brief Does this fragment have instructions emitted into it? By default - /// this is false, but specific fragment types may set it to true. - bool hasInstructions() const { return HasInstructions; } - - /// \brief Should this fragment be placed at the end of an aligned bundle? - bool alignToBundleEnd() const { return AlignToBundleEnd; } - void setAlignToBundleEnd(bool V) { AlignToBundleEnd = V; } - - /// \brief Get the padding size that must be inserted before this fragment. - /// Used for bundling. By default, no padding is inserted. - /// Note that padding size is restricted to 8 bits. This is an optimization - /// to reduce the amount of space used for each fragment. In practice, larger - /// padding should never be required. - uint8_t getBundlePadding() const { return BundlePadding; } - - /// \brief Set the padding size for this fragment. By default it's a no-op, - /// and only some fragments have a meaningful implementation. - void setBundlePadding(uint8_t N) { BundlePadding = N; } - - void dump(); -}; - -/// Interface implemented by fragments that contain encoded instructions and/or -/// data. -/// -class MCEncodedFragment : public MCFragment { -protected: - MCEncodedFragment(MCFragment::FragmentType FType, bool HasInstructions, - MCSection *Sec) - : MCFragment(FType, HasInstructions, 0, Sec) {} - -public: - static bool classof(const MCFragment *F) { - MCFragment::FragmentType Kind = F->getKind(); - switch (Kind) { - default: - return false; - case MCFragment::FT_Relaxable: - case MCFragment::FT_CompactEncodedInst: - case MCFragment::FT_Data: - return true; - } - } -}; - -/// Interface implemented by fragments that contain encoded instructions and/or -/// data. -/// -template -class MCEncodedFragmentWithContents : public MCEncodedFragment { - SmallVector Contents; - -protected: - MCEncodedFragmentWithContents(MCFragment::FragmentType FType, - bool HasInstructions, - MCSection *Sec) - : MCEncodedFragment(FType, HasInstructions, Sec) {} - -public: - SmallVectorImpl &getContents() { return Contents; } - const SmallVectorImpl &getContents() const { return Contents; } -}; - -/// Interface implemented by fragments that contain encoded instructions and/or -/// data and also have fixups registered. -/// -template -class MCEncodedFragmentWithFixups : - public MCEncodedFragmentWithContents { - - /// Fixups - The list of fixups in this fragment. - SmallVector Fixups; - -protected: - MCEncodedFragmentWithFixups(MCFragment::FragmentType FType, - bool HasInstructions, - MCSection *Sec) - : MCEncodedFragmentWithContents(FType, HasInstructions, - Sec) {} - -public: - typedef SmallVectorImpl::const_iterator const_fixup_iterator; - typedef SmallVectorImpl::iterator fixup_iterator; - - SmallVectorImpl &getFixups() { return Fixups; } - const SmallVectorImpl &getFixups() const { return Fixups; } - - fixup_iterator fixup_begin() { return Fixups.begin(); } - const_fixup_iterator fixup_begin() const { return Fixups.begin(); } - - fixup_iterator fixup_end() { return Fixups.end(); } - const_fixup_iterator fixup_end() const { return Fixups.end(); } - - static bool classof(const MCFragment *F) { - MCFragment::FragmentType Kind = F->getKind(); - return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data; - } -}; - -/// Fragment for data and encoded instructions. -/// -class MCDataFragment : public MCEncodedFragmentWithFixups<32, 4> { -public: - MCDataFragment(MCSection *Sec = nullptr) - : MCEncodedFragmentWithFixups<32, 4>(FT_Data, false, Sec) {} - - void setHasInstructions(bool V) { HasInstructions = V; } - - static bool classof(const MCFragment *F) { - return F->getKind() == MCFragment::FT_Data; - } -}; - -/// This is a compact (memory-size-wise) fragment for holding an encoded -/// instruction (non-relaxable) that has no fixups registered. When applicable, -/// it can be used instead of MCDataFragment and lead to lower memory -/// consumption. -/// -class MCCompactEncodedInstFragment : public MCEncodedFragmentWithContents<4> { -public: - MCCompactEncodedInstFragment(MCSection *Sec = nullptr) - : MCEncodedFragmentWithContents(FT_CompactEncodedInst, true, Sec) { - } - - static bool classof(const MCFragment *F) { - return F->getKind() == MCFragment::FT_CompactEncodedInst; - } -}; - -/// A relaxable fragment holds on to its MCInst, since it may need to be -/// relaxed during the assembler layout and relaxation stage. -/// -class MCRelaxableFragment : public MCEncodedFragmentWithFixups<8, 1> { - - /// Inst - The instruction this is a fragment for. - MCInst Inst; - - /// STI - The MCSubtargetInfo in effect when the instruction was encoded. - /// Keep a copy instead of a reference to make sure that updates to STI - /// in the assembler are not seen here. - const MCSubtargetInfo STI; - -public: - MCRelaxableFragment(const MCInst &Inst, const MCSubtargetInfo &STI, - MCSection *Sec = nullptr) - : MCEncodedFragmentWithFixups(FT_Relaxable, true, Sec), - Inst(Inst), STI(STI) {} - - const MCInst &getInst() const { return Inst; } - void setInst(const MCInst &Value) { Inst = Value; } - - const MCSubtargetInfo &getSubtargetInfo() { return STI; } - - static bool classof(const MCFragment *F) { - return F->getKind() == MCFragment::FT_Relaxable; - } -}; - -class MCAlignFragment : public MCFragment { - - /// Alignment - The alignment to ensure, in bytes. - unsigned Alignment; - - /// EmitNops - Flag to indicate that (optimal) NOPs should be emitted instead - /// of using the provided value. The exact interpretation of this flag is - /// target dependent. - bool EmitNops : 1; - - /// Value - Value to use for filling padding bytes. - int64_t Value; - - /// ValueSize - The size of the integer (in bytes) of \p Value. - unsigned ValueSize; - - /// MaxBytesToEmit - The maximum number of bytes to emit; if the alignment - /// cannot be satisfied in this width then this fragment is ignored. - unsigned MaxBytesToEmit; - -public: - MCAlignFragment(unsigned Alignment, int64_t Value, unsigned ValueSize, - unsigned MaxBytesToEmit, MCSection *Sec = nullptr) - : MCFragment(FT_Align, false, 0, Sec), Alignment(Alignment), - EmitNops(false), Value(Value), - ValueSize(ValueSize), MaxBytesToEmit(MaxBytesToEmit) {} - - /// \name Accessors - /// @{ - - unsigned getAlignment() const { return Alignment; } - - int64_t getValue() const { return Value; } - - unsigned getValueSize() const { return ValueSize; } - - unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; } - - bool hasEmitNops() const { return EmitNops; } - void setEmitNops(bool Value) { EmitNops = Value; } - - /// @} - - static bool classof(const MCFragment *F) { - return F->getKind() == MCFragment::FT_Align; - } -}; - -class MCFillFragment : public MCFragment { - - /// Value - Value to use for filling bytes. - int64_t Value; - - /// ValueSize - The size (in bytes) of \p Value to use when filling, or 0 if - /// this is a virtual fill fragment. - unsigned ValueSize; - - /// Size - The number of bytes to insert. - uint64_t Size; - -public: - MCFillFragment(int64_t Value, unsigned ValueSize, uint64_t Size, - MCSection *Sec = nullptr) - : MCFragment(FT_Fill, false, 0, Sec), Value(Value), ValueSize(ValueSize), - Size(Size) { - assert((!ValueSize || (Size % ValueSize) == 0) && - "Fill size must be a multiple of the value size!"); - } - - /// \name Accessors - /// @{ - - int64_t getValue() const { return Value; } - - unsigned getValueSize() const { return ValueSize; } - - uint64_t getSize() const { return Size; } - - /// @} - - static bool classof(const MCFragment *F) { - return F->getKind() == MCFragment::FT_Fill; - } -}; - -class MCOrgFragment : public MCFragment { - - /// Offset - The offset this fragment should start at. - const MCExpr *Offset; - - /// Value - Value to use for filling bytes. - int8_t Value; - -public: - MCOrgFragment(const MCExpr &Offset, int8_t Value, MCSection *Sec = nullptr) - : MCFragment(FT_Org, false, 0, Sec), Offset(&Offset), Value(Value) {} - - /// \name Accessors - /// @{ - - const MCExpr &getOffset() const { return *Offset; } - - uint8_t getValue() const { return Value; } - - /// @} - - static bool classof(const MCFragment *F) { - return F->getKind() == MCFragment::FT_Org; - } -}; - -class MCLEBFragment : public MCFragment { - - /// Value - The value this fragment should contain. - const MCExpr *Value; - - /// IsSigned - True if this is a sleb128, false if uleb128. - bool IsSigned; - - SmallString<8> Contents; - -public: - MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSection *Sec = nullptr) - : MCFragment(FT_LEB, false, 0, Sec), Value(&Value_), IsSigned(IsSigned_) { - Contents.push_back(0); - } - - /// \name Accessors - /// @{ - - const MCExpr &getValue() const { return *Value; } - - bool isSigned() const { return IsSigned; } - - SmallString<8> &getContents() { return Contents; } - const SmallString<8> &getContents() const { return Contents; } - - /// @} - - static bool classof(const MCFragment *F) { - return F->getKind() == MCFragment::FT_LEB; - } -}; - -class MCDwarfLineAddrFragment : public MCFragment { - - /// LineDelta - the value of the difference between the two line numbers - /// between two .loc dwarf directives. - int64_t LineDelta; - - /// AddrDelta - The expression for the difference of the two symbols that - /// make up the address delta between two .loc dwarf directives. - const MCExpr *AddrDelta; - - SmallString<8> Contents; - -public: - MCDwarfLineAddrFragment(int64_t LineDelta, const MCExpr &AddrDelta, - MCSection *Sec = nullptr) - : MCFragment(FT_Dwarf, false, 0, Sec), LineDelta(LineDelta), - AddrDelta(&AddrDelta) { - Contents.push_back(0); - } - - /// \name Accessors - /// @{ - - int64_t getLineDelta() const { return LineDelta; } - - const MCExpr &getAddrDelta() const { return *AddrDelta; } - - SmallString<8> &getContents() { return Contents; } - const SmallString<8> &getContents() const { return Contents; } - - /// @} - - static bool classof(const MCFragment *F) { - return F->getKind() == MCFragment::FT_Dwarf; - } -}; - -class MCDwarfCallFrameFragment : public MCFragment { - - /// AddrDelta - The expression for the difference of the two symbols that - /// make up the address delta between two .cfi_* dwarf directives. - const MCExpr *AddrDelta; - - SmallString<8> Contents; - -public: - MCDwarfCallFrameFragment(const MCExpr &AddrDelta, MCSection *Sec = nullptr) - : MCFragment(FT_DwarfFrame, false, 0, Sec), AddrDelta(&AddrDelta) { - Contents.push_back(0); - } - - /// \name Accessors - /// @{ - - const MCExpr &getAddrDelta() const { return *AddrDelta; } - - SmallString<8> &getContents() { return Contents; } - const SmallString<8> &getContents() const { return Contents; } - - /// @} - - static bool classof(const MCFragment *F) { - return F->getKind() == MCFragment::FT_DwarfFrame; - } -}; - -class MCSafeSEHFragment : public MCFragment { - const MCSymbol *Sym; - -public: - MCSafeSEHFragment(const MCSymbol *Sym, MCSection *Sec = nullptr) - : MCFragment(FT_SafeSEH, false, 0, Sec), Sym(Sym) {} - - /// \name Accessors - /// @{ - - const MCSymbol *getSymbol() { return Sym; } - const MCSymbol *getSymbol() const { return Sym; } - - /// @} - - static bool classof(const MCFragment *F) { - return F->getKind() == MCFragment::FT_SafeSEH; - } -}; - // FIXME: This really doesn't belong here. See comments below. struct IndirectSymbolData { MCSymbol *Symbol; @@ -575,8 +100,6 @@ private: MCObjectWriter &Writer; - raw_ostream &OS; - SectionListType Sections; SymbolDataListType Symbols; @@ -591,6 +114,8 @@ private: /// List of declared file names std::vector FileNames; + MCDwarfLineTableParams LTParams; + /// The set of function symbols for which a .thumb_func directive has /// been seen. // @@ -607,6 +132,7 @@ private: unsigned RelaxAll : 1; unsigned SubsectionsViaSymbols : 1; + unsigned IncrementalLinkerCompatible : 1; /// ELF specific e_header flags // It would be good if there were an MCELFAssembler class to hold this. @@ -712,16 +238,13 @@ public: public: /// Construct a new assembler instance. - /// - /// \param OS The stream to output to. // // FIXME: How are we going to parameterize this? Two obvious options are stay // concrete and require clients to pass in a target like object. The other // option is to make this abstract, and have targets provide concrete // implementations as we do with AsmParser. MCAssembler(MCContext &Context_, MCAsmBackend &Backend_, - MCCodeEmitter &Emitter_, MCObjectWriter &Writer_, - raw_ostream &OS); + MCCodeEmitter &Emitter_, MCObjectWriter &Writer_); ~MCAssembler(); /// Reuse an assembler instance @@ -736,15 +259,28 @@ public: MCObjectWriter &getWriter() const { return Writer; } + MCDwarfLineTableParams getDWARFLinetableParams() const { return LTParams; } + void setDWARFLinetableParams(MCDwarfLineTableParams P) { LTParams = P; } + /// Finish - Do final processing and write the object to the output stream. /// \p Writer is used for custom object writer (as the MCJIT does), /// if not specified it is automatically created from backend. void Finish(); + // Layout all section and prepare them for emission. + void layout(MCAsmLayout &Layout); + // FIXME: This does not belong here. bool getSubsectionsViaSymbols() const { return SubsectionsViaSymbols; } void setSubsectionsViaSymbols(bool Value) { SubsectionsViaSymbols = Value; } + bool isIncrementalLinkerCompatible() const { + return IncrementalLinkerCompatible; + } + void setIncrementalLinkerCompatible(bool Value) { + IncrementalLinkerCompatible = Value; + } + bool getRelaxAll() const { return RelaxAll; } void setRelaxAll(bool Value) { RelaxAll = Value; } @@ -856,13 +392,7 @@ public: /// \name Backend Data Access /// @{ - bool registerSection(MCSection &Section) { - if (Section.isRegistered()) - return false; - Sections.push_back(&Section); - Section.setIsRegistered(true); - return true; - } + bool registerSection(MCSection &Section); void registerSymbol(const MCSymbol &Symbol, bool *Created = nullptr); diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h index 41169e9a12a0..e5a9afd9968c 100644 --- a/include/llvm/MC/MCContext.h +++ b/include/llvm/MC/MCContext.h @@ -17,6 +17,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" @@ -71,6 +72,10 @@ namespace llvm { /// objects. BumpPtrAllocator Allocator; + SpecificBumpPtrAllocator COFFAllocator; + SpecificBumpPtrAllocator ELFAllocator; + SpecificBumpPtrAllocator MachOAllocator; + /// Bindings of names to symbols. SymbolTable Symbols; @@ -108,7 +113,7 @@ namespace llvm { /// directive is used or it is an error. char *SecureLogFile; /// The stream that gets written to for the .secure_log_unique directive. - raw_ostream *SecureLog; + std::unique_ptr SecureLog; /// Boolean toggled when .secure_log_unique / .secure_log_reset is seen to /// catch errors if .secure_log_unique appears twice without /// .secure_log_reset appearing between them. @@ -203,9 +208,13 @@ namespace llvm { std::map COFFUniquingMap; StringMap ELFRelSecNames; + SpecificBumpPtrAllocator MCSubtargetAllocator; + /// Do automatic reset in destructor bool AutoReset; + bool HadError; + MCSymbol *createSymbolImpl(const StringMapEntry *Name, bool CanBeUnnamed); MCSymbol *createSymbol(StringRef Name, bool AlwaysAddSuffix, @@ -376,6 +385,9 @@ namespace llvm { MCSectionCOFF *getAssociativeCOFFSection(MCSectionCOFF *Sec, const MCSymbol *KeySym); + // Create and save a copy of STI and return a reference to the copy. + MCSubtargetInfo &getSubtargetCopy(const MCSubtargetInfo &STI); + /// @} /// \name Dwarf Management @@ -494,9 +506,11 @@ namespace llvm { /// @} char *getSecureLogFile() { return SecureLogFile; } - raw_ostream *getSecureLog() { return SecureLog; } + raw_fd_ostream *getSecureLog() { return SecureLog.get(); } bool getSecureLogUsed() { return SecureLogUsed; } - void setSecureLog(raw_ostream *Value) { SecureLog = Value; } + void setSecureLog(std::unique_ptr Value) { + SecureLog = std::move(Value); + } void setSecureLogUsed(bool Value) { SecureLogUsed = Value; } void *allocate(unsigned Size, unsigned Align = 8) { @@ -504,11 +518,13 @@ namespace llvm { } void deallocate(void *Ptr) {} + bool hadError() { return HadError; } + void reportError(SMLoc L, const Twine &Msg); // Unrecoverable error has occurred. Display the best diagnostic we can // and bail via exit(1). For now, most MC backend errors are unrecoverable. // FIXME: We should really do something about that. LLVM_ATTRIBUTE_NORETURN void reportFatalError(SMLoc L, - const Twine &Msg) const; + const Twine &Msg); }; } // end namespace llvm @@ -538,7 +554,7 @@ namespace llvm { /// allocator supports it). /// \return The allocated memory. Could be NULL. inline void *operator new(size_t Bytes, llvm::MCContext &C, - size_t Alignment = 8) throw() { + size_t Alignment = 8) LLVM_NOEXCEPT { return C.allocate(Bytes, Alignment); } /// \brief Placement delete companion to the new above. @@ -547,8 +563,8 @@ inline void *operator new(size_t Bytes, llvm::MCContext &C, /// invoking it directly; see the new operator for more details. This operator /// is called implicitly by the compiler if a placement new expression using /// the MCContext throws in the object constructor. -inline void operator delete(void *Ptr, llvm::MCContext &C, size_t) - throw () { +inline void operator delete(void *Ptr, llvm::MCContext &C, + size_t) LLVM_NOEXCEPT { C.deallocate(Ptr); } @@ -571,8 +587,8 @@ inline void operator delete(void *Ptr, llvm::MCContext &C, size_t) /// \param Alignment The alignment of the allocated memory (if the underlying /// allocator supports it). /// \return The allocated memory. Could be NULL. -inline void *operator new[](size_t Bytes, llvm::MCContext& C, - size_t Alignment = 8) throw() { +inline void *operator new[](size_t Bytes, llvm::MCContext &C, + size_t Alignment = 8) LLVM_NOEXCEPT { return C.allocate(Bytes, Alignment); } @@ -582,7 +598,7 @@ inline void *operator new[](size_t Bytes, llvm::MCContext& C, /// invoking it directly; see the new[] operator for more details. This operator /// is called implicitly by the compiler if a placement new[] expression using /// the MCContext throws in the object constructor. -inline void operator delete[](void *Ptr, llvm::MCContext &C) throw () { +inline void operator delete[](void *Ptr, llvm::MCContext &C) LLVM_NOEXCEPT { C.deallocate(Ptr); } diff --git a/include/llvm/MC/MCDirectives.h b/include/llvm/MC/MCDirectives.h index f9d66e0b15d7..326b2a1ac061 100644 --- a/include/llvm/MC/MCDirectives.h +++ b/include/llvm/MC/MCDirectives.h @@ -62,7 +62,9 @@ enum MCDataRegionType { enum MCVersionMinType { MCVM_IOSVersionMin, ///< .ios_version_min - MCVM_OSXVersionMin ///< .macosx_version_min + MCVM_OSXVersionMin, ///< .macosx_version_min + MCVM_TvOSVersionMin, ///< .tvos_version_min + MCVM_WatchOSVersionMin, ///< .watchos_version_min }; } // end namespace llvm diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h index 1e72dfee4ad1..8a50863a0c39 100644 --- a/include/llvm/MC/MCDwarf.h +++ b/include/llvm/MC/MCDwarf.h @@ -19,6 +19,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCSection.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/raw_ostream.h" @@ -31,7 +32,6 @@ namespace llvm { class MCAsmBackend; class MCContext; class MCObjectStreamer; -class MCSection; class MCStreamer; class MCSymbol; class SourceMgr; @@ -182,6 +182,19 @@ public: } }; +struct MCDwarfLineTableParams { + /// First special line opcode - leave room for the standard opcodes. + /// Note: If you want to change this, you'll have to update the + /// "StandardOpcodeLengths" table that is emitted in + /// \c Emit(). + uint8_t DWARF2LineOpcodeBase = 13; + /// Minimum line offset in a special line info. opcode. The value + /// -5 was chosen to give a reasonable range of values. + int8_t DWARF2LineBase = -5; + /// Range of line offsets in a special line info. opcode. + uint8_t DWARF2LineRange = 14; +}; + struct MCDwarfLineTableHeader { MCSymbol *Label; SmallVector MCDwarfDirs; @@ -192,9 +205,11 @@ struct MCDwarfLineTableHeader { MCDwarfLineTableHeader() : Label(nullptr) {} unsigned getFile(StringRef &Directory, StringRef &FileName, unsigned FileNumber = 0); - std::pair Emit(MCStreamer *MCOS) const; + std::pair Emit(MCStreamer *MCOS, + MCDwarfLineTableParams Params) const; std::pair - Emit(MCStreamer *MCOS, ArrayRef SpecialOpcodeLengths) const; + Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params, + ArrayRef SpecialOpcodeLengths) const; }; class MCDwarfDwoLineTable { @@ -206,7 +221,7 @@ public: unsigned getFile(StringRef Directory, StringRef FileName) { return Header.getFile(Directory, FileName); } - void Emit(MCStreamer &MCOS) const; + void Emit(MCStreamer &MCOS, MCDwarfLineTableParams Params) const; }; class MCDwarfLineTable { @@ -215,10 +230,10 @@ class MCDwarfLineTable { public: // This emits the Dwarf file and the line tables for all Compile Units. - static void Emit(MCObjectStreamer *MCOS); + static void Emit(MCObjectStreamer *MCOS, MCDwarfLineTableParams Params); // This emits the Dwarf file and the line tables for a given Compile Unit. - void EmitCU(MCObjectStreamer *MCOS) const; + void EmitCU(MCObjectStreamer *MCOS, MCDwarfLineTableParams Params) const; unsigned getFile(StringRef &Directory, StringRef &FileName, unsigned FileNumber = 0); @@ -262,11 +277,12 @@ public: class MCDwarfLineAddr { public: /// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas. - static void Encode(MCContext &Context, int64_t LineDelta, uint64_t AddrDelta, - raw_ostream &OS); + static void Encode(MCContext &Context, MCDwarfLineTableParams Params, + int64_t LineDelta, uint64_t AddrDelta, raw_ostream &OS); /// Utility function to emit the encoding to a streamer. - static void Emit(MCStreamer *MCOS, int64_t LineDelta, uint64_t AddrDelta); + static void Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params, + int64_t LineDelta, uint64_t AddrDelta); }; class MCGenDwarfInfo { @@ -324,7 +340,8 @@ public: OpRestore, OpUndefined, OpRegister, - OpWindowSave + OpWindowSave, + OpGnuArgsSize }; private: @@ -438,6 +455,11 @@ public: return MCCFIInstruction(OpEscape, L, 0, 0, Vals); } + /// \brief A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE + static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int Size) { + return MCCFIInstruction(OpGnuArgsSize, L, 0, Size, ""); + } + OpType getOperation() const { return Operation; } MCSymbol *getLabel() const { return Label; } @@ -457,7 +479,7 @@ public: int getOffset() const { assert(Operation == OpDefCfa || Operation == OpOffset || Operation == OpRelOffset || Operation == OpDefCfaOffset || - Operation == OpAdjustCfaOffset); + Operation == OpAdjustCfaOffset || Operation == OpGnuArgsSize); return Offset; } diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h index 01f694d3b756..193dac018b2b 100644 --- a/include/llvm/MC/MCELFObjectWriter.h +++ b/include/llvm/MC/MCELFObjectWriter.h @@ -57,8 +57,6 @@ public: case Triple::PS4: case Triple::FreeBSD: return ELF::ELFOSABI_FREEBSD; - case Triple::Linux: - return ELF::ELFOSABI_LINUX; default: return ELF::ELFOSABI_NONE; } diff --git a/include/llvm/MC/MCELFStreamer.h b/include/llvm/MC/MCELFStreamer.h index a5b257f5958b..6eb2c2c343ff 100644 --- a/include/llvm/MC/MCELFStreamer.h +++ b/include/llvm/MC/MCELFStreamer.h @@ -36,7 +36,6 @@ public: /// state management void reset() override { SeenIdent = false; - LocalCommons.clear(); BundleGroups.clear(); MCObjectStreamer::reset(); } @@ -69,7 +68,7 @@ public: void EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment = 0) override; void EmitValueImpl(const MCExpr *Value, unsigned Size, - const SMLoc &Loc = SMLoc()) override; + SMLoc Loc = SMLoc()) override; void EmitFileDirective(StringRef Filename) override; @@ -77,8 +76,6 @@ public: void EmitValueToAlignment(unsigned, int64_t, unsigned, unsigned) override; - void Flush() override; - void FinishImpl() override; void EmitBundleAlignMode(unsigned AlignPow2) override; @@ -97,14 +94,6 @@ private: bool SeenIdent; - struct LocalCommon { - const MCSymbol *Symbol; - uint64_t Size; - unsigned ByteAlignment; - }; - - std::vector LocalCommons; - /// BundleGroups - The stack of fragments holding the bundle-locked /// instructions. llvm::SmallVector BundleGroups; diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h index b3a607351a82..1d6bdef0af27 100644 --- a/include/llvm/MC/MCExpr.h +++ b/include/llvm/MC/MCExpr.h @@ -20,6 +20,7 @@ class MCAsmLayout; class MCAssembler; class MCContext; class MCFixup; +class MCFragment; class MCSection; class MCStreamer; class MCSymbol; @@ -115,7 +116,7 @@ public: /// currently defined as the absolute section for constants, or /// otherwise the section associated with the first defined symbol in the /// expression. - MCSection *findAssociatedSection() const; + MCFragment *findAssociatedFragment() const; /// @} }; @@ -187,6 +188,7 @@ public: VK_WEAKREF, // The link between the symbols in .weakref foo, bar VK_ARM_NONE, + VK_ARM_GOT_PREL, VK_ARM_TARGET1, VK_ARM_TARGET2, VK_ARM_PREL31, @@ -556,7 +558,7 @@ public: const MCAsmLayout *Layout, const MCFixup *Fixup) const = 0; virtual void visitUsedExpr(MCStreamer& Streamer) const = 0; - virtual MCSection *findAssociatedSection() const = 0; + virtual MCFragment *findAssociatedFragment() const = 0; virtual void fixELFSymbolsInTLSFixups(MCAssembler &) const = 0; diff --git a/include/llvm/MC/MCFixedLenDisassembler.h b/include/llvm/MC/MCFixedLenDisassembler.h index ad99943df2c3..ad34d9494bb9 100644 --- a/include/llvm/MC/MCFixedLenDisassembler.h +++ b/include/llvm/MC/MCFixedLenDisassembler.h @@ -22,6 +22,8 @@ enum DecoderOps { // uleb128 Val, uint16_t NumToSkip) OPC_CheckPredicate, // OPC_CheckPredicate(uleb128 PIdx, uint16_t NumToSkip) OPC_Decode, // OPC_Decode(uleb128 Opcode, uleb128 DIdx) + OPC_TryDecode, // OPC_TryDecode(uleb128 Opcode, uleb128 DIdx, + // uint16_t NumToSkip) OPC_SoftFail, // OPC_SoftFail(uleb128 PMask, uleb128 NMask) OPC_Fail // OPC_Fail() }; diff --git a/include/llvm/MC/MCFragment.h b/include/llvm/MC/MCFragment.h new file mode 100644 index 000000000000..7d6db525ce61 --- /dev/null +++ b/include/llvm/MC/MCFragment.h @@ -0,0 +1,506 @@ +//===- MCFragment.h - Fragment type hierarchy -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCFRAGMENT_H +#define LLVM_MC_MCFRAGMENT_H + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/iterator.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" + +namespace llvm { +class MCSection; +class MCSymbol; +class MCSubtargetInfo; + +class MCFragment : public ilist_node_with_parent { + friend class MCAsmLayout; + + MCFragment(const MCFragment &) = delete; + void operator=(const MCFragment &) = delete; + +public: + enum FragmentType : uint8_t { + FT_Align, + FT_Data, + FT_CompactEncodedInst, + FT_Fill, + FT_Relaxable, + FT_Org, + FT_Dwarf, + FT_DwarfFrame, + FT_LEB, + FT_SafeSEH, + FT_Dummy + }; + +private: + FragmentType Kind; + +protected: + bool HasInstructions; + +private: + /// \brief Should this fragment be aligned to the end of a bundle? + bool AlignToBundleEnd; + + uint8_t BundlePadding; + + /// LayoutOrder - The layout order of this fragment. + unsigned LayoutOrder; + + /// The data for the section this fragment is in. + MCSection *Parent; + + /// Atom - The atom this fragment is in, as represented by it's defining + /// symbol. + const MCSymbol *Atom; + + /// \name Assembler Backend Data + /// @{ + // + // FIXME: This could all be kept private to the assembler implementation. + + /// Offset - The offset of this fragment in its section. This is ~0 until + /// initialized. + uint64_t Offset; + + /// @} + +protected: + MCFragment(FragmentType Kind, bool HasInstructions, + uint8_t BundlePadding, MCSection *Parent = nullptr); + + ~MCFragment(); +private: + + // This is a friend so that the sentinal can be created. + friend struct ilist_sentinel_traits; + MCFragment(); + +public: + /// Destroys the current fragment. + /// + /// This must be used instead of delete as MCFragment is non-virtual. + /// This method will dispatch to the appropriate subclass. + void destroy(); + + FragmentType getKind() const { return Kind; } + + MCSection *getParent() const { return Parent; } + void setParent(MCSection *Value) { Parent = Value; } + + const MCSymbol *getAtom() const { return Atom; } + void setAtom(const MCSymbol *Value) { Atom = Value; } + + unsigned getLayoutOrder() const { return LayoutOrder; } + void setLayoutOrder(unsigned Value) { LayoutOrder = Value; } + + /// \brief Does this fragment have instructions emitted into it? By default + /// this is false, but specific fragment types may set it to true. + bool hasInstructions() const { return HasInstructions; } + + /// \brief Should this fragment be placed at the end of an aligned bundle? + bool alignToBundleEnd() const { return AlignToBundleEnd; } + void setAlignToBundleEnd(bool V) { AlignToBundleEnd = V; } + + /// \brief Get the padding size that must be inserted before this fragment. + /// Used for bundling. By default, no padding is inserted. + /// Note that padding size is restricted to 8 bits. This is an optimization + /// to reduce the amount of space used for each fragment. In practice, larger + /// padding should never be required. + uint8_t getBundlePadding() const { return BundlePadding; } + + /// \brief Set the padding size for this fragment. By default it's a no-op, + /// and only some fragments have a meaningful implementation. + void setBundlePadding(uint8_t N) { BundlePadding = N; } + + /// \brief Return true if given frgment has FT_Dummy type. + bool isDummy() const { return Kind == FT_Dummy; } + + void dump(); +}; + +class MCDummyFragment : public MCFragment { +public: + explicit MCDummyFragment(MCSection *Sec) + : MCFragment(FT_Dummy, false, 0, Sec){}; + static bool classof(const MCFragment *F) { return F->getKind() == FT_Dummy; } +}; + +/// Interface implemented by fragments that contain encoded instructions and/or +/// data. +/// +class MCEncodedFragment : public MCFragment { +protected: + MCEncodedFragment(MCFragment::FragmentType FType, bool HasInstructions, + MCSection *Sec) + : MCFragment(FType, HasInstructions, 0, Sec) {} + +public: + static bool classof(const MCFragment *F) { + MCFragment::FragmentType Kind = F->getKind(); + switch (Kind) { + default: + return false; + case MCFragment::FT_Relaxable: + case MCFragment::FT_CompactEncodedInst: + case MCFragment::FT_Data: + return true; + } + } +}; + +/// Interface implemented by fragments that contain encoded instructions and/or +/// data. +/// +template +class MCEncodedFragmentWithContents : public MCEncodedFragment { + SmallVector Contents; + +protected: + MCEncodedFragmentWithContents(MCFragment::FragmentType FType, + bool HasInstructions, + MCSection *Sec) + : MCEncodedFragment(FType, HasInstructions, Sec) {} + +public: + SmallVectorImpl &getContents() { return Contents; } + const SmallVectorImpl &getContents() const { return Contents; } +}; + +/// Interface implemented by fragments that contain encoded instructions and/or +/// data and also have fixups registered. +/// +template +class MCEncodedFragmentWithFixups : + public MCEncodedFragmentWithContents { + + /// Fixups - The list of fixups in this fragment. + SmallVector Fixups; + +protected: + MCEncodedFragmentWithFixups(MCFragment::FragmentType FType, + bool HasInstructions, + MCSection *Sec) + : MCEncodedFragmentWithContents(FType, HasInstructions, + Sec) {} + +public: + typedef SmallVectorImpl::const_iterator const_fixup_iterator; + typedef SmallVectorImpl::iterator fixup_iterator; + + SmallVectorImpl &getFixups() { return Fixups; } + const SmallVectorImpl &getFixups() const { return Fixups; } + + fixup_iterator fixup_begin() { return Fixups.begin(); } + const_fixup_iterator fixup_begin() const { return Fixups.begin(); } + + fixup_iterator fixup_end() { return Fixups.end(); } + const_fixup_iterator fixup_end() const { return Fixups.end(); } + + static bool classof(const MCFragment *F) { + MCFragment::FragmentType Kind = F->getKind(); + return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data; + } +}; + +/// Fragment for data and encoded instructions. +/// +class MCDataFragment : public MCEncodedFragmentWithFixups<32, 4> { +public: + MCDataFragment(MCSection *Sec = nullptr) + : MCEncodedFragmentWithFixups<32, 4>(FT_Data, false, Sec) {} + + void setHasInstructions(bool V) { HasInstructions = V; } + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_Data; + } +}; + +/// This is a compact (memory-size-wise) fragment for holding an encoded +/// instruction (non-relaxable) that has no fixups registered. When applicable, +/// it can be used instead of MCDataFragment and lead to lower memory +/// consumption. +/// +class MCCompactEncodedInstFragment : public MCEncodedFragmentWithContents<4> { +public: + MCCompactEncodedInstFragment(MCSection *Sec = nullptr) + : MCEncodedFragmentWithContents(FT_CompactEncodedInst, true, Sec) { + } + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_CompactEncodedInst; + } +}; + +/// A relaxable fragment holds on to its MCInst, since it may need to be +/// relaxed during the assembler layout and relaxation stage. +/// +class MCRelaxableFragment : public MCEncodedFragmentWithFixups<8, 1> { + + /// Inst - The instruction this is a fragment for. + MCInst Inst; + + /// STI - The MCSubtargetInfo in effect when the instruction was encoded. + const MCSubtargetInfo &STI; + +public: + MCRelaxableFragment(const MCInst &Inst, const MCSubtargetInfo &STI, + MCSection *Sec = nullptr) + : MCEncodedFragmentWithFixups(FT_Relaxable, true, Sec), + Inst(Inst), STI(STI) {} + + const MCInst &getInst() const { return Inst; } + void setInst(const MCInst &Value) { Inst = Value; } + + const MCSubtargetInfo &getSubtargetInfo() { return STI; } + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_Relaxable; + } +}; + +class MCAlignFragment : public MCFragment { + + /// Alignment - The alignment to ensure, in bytes. + unsigned Alignment; + + /// EmitNops - Flag to indicate that (optimal) NOPs should be emitted instead + /// of using the provided value. The exact interpretation of this flag is + /// target dependent. + bool EmitNops : 1; + + /// Value - Value to use for filling padding bytes. + int64_t Value; + + /// ValueSize - The size of the integer (in bytes) of \p Value. + unsigned ValueSize; + + /// MaxBytesToEmit - The maximum number of bytes to emit; if the alignment + /// cannot be satisfied in this width then this fragment is ignored. + unsigned MaxBytesToEmit; + +public: + MCAlignFragment(unsigned Alignment, int64_t Value, unsigned ValueSize, + unsigned MaxBytesToEmit, MCSection *Sec = nullptr) + : MCFragment(FT_Align, false, 0, Sec), Alignment(Alignment), + EmitNops(false), Value(Value), + ValueSize(ValueSize), MaxBytesToEmit(MaxBytesToEmit) {} + + /// \name Accessors + /// @{ + + unsigned getAlignment() const { return Alignment; } + + int64_t getValue() const { return Value; } + + unsigned getValueSize() const { return ValueSize; } + + unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; } + + bool hasEmitNops() const { return EmitNops; } + void setEmitNops(bool Value) { EmitNops = Value; } + + /// @} + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_Align; + } +}; + +class MCFillFragment : public MCFragment { + + /// Value - Value to use for filling bytes. + int64_t Value; + + /// ValueSize - The size (in bytes) of \p Value to use when filling, or 0 if + /// this is a virtual fill fragment. + unsigned ValueSize; + + /// Size - The number of bytes to insert. + uint64_t Size; + +public: + MCFillFragment(int64_t Value, unsigned ValueSize, uint64_t Size, + MCSection *Sec = nullptr) + : MCFragment(FT_Fill, false, 0, Sec), Value(Value), ValueSize(ValueSize), + Size(Size) { + assert((!ValueSize || (Size % ValueSize) == 0) && + "Fill size must be a multiple of the value size!"); + } + + /// \name Accessors + /// @{ + + int64_t getValue() const { return Value; } + + unsigned getValueSize() const { return ValueSize; } + + uint64_t getSize() const { return Size; } + + /// @} + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_Fill; + } +}; + +class MCOrgFragment : public MCFragment { + + /// Offset - The offset this fragment should start at. + const MCExpr *Offset; + + /// Value - Value to use for filling bytes. + int8_t Value; + +public: + MCOrgFragment(const MCExpr &Offset, int8_t Value, MCSection *Sec = nullptr) + : MCFragment(FT_Org, false, 0, Sec), Offset(&Offset), Value(Value) {} + + /// \name Accessors + /// @{ + + const MCExpr &getOffset() const { return *Offset; } + + uint8_t getValue() const { return Value; } + + /// @} + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_Org; + } +}; + +class MCLEBFragment : public MCFragment { + + /// Value - The value this fragment should contain. + const MCExpr *Value; + + /// IsSigned - True if this is a sleb128, false if uleb128. + bool IsSigned; + + SmallString<8> Contents; + +public: + MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSection *Sec = nullptr) + : MCFragment(FT_LEB, false, 0, Sec), Value(&Value_), IsSigned(IsSigned_) { + Contents.push_back(0); + } + + /// \name Accessors + /// @{ + + const MCExpr &getValue() const { return *Value; } + + bool isSigned() const { return IsSigned; } + + SmallString<8> &getContents() { return Contents; } + const SmallString<8> &getContents() const { return Contents; } + + /// @} + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_LEB; + } +}; + +class MCDwarfLineAddrFragment : public MCFragment { + + /// LineDelta - the value of the difference between the two line numbers + /// between two .loc dwarf directives. + int64_t LineDelta; + + /// AddrDelta - The expression for the difference of the two symbols that + /// make up the address delta between two .loc dwarf directives. + const MCExpr *AddrDelta; + + SmallString<8> Contents; + +public: + MCDwarfLineAddrFragment(int64_t LineDelta, const MCExpr &AddrDelta, + MCSection *Sec = nullptr) + : MCFragment(FT_Dwarf, false, 0, Sec), LineDelta(LineDelta), + AddrDelta(&AddrDelta) { + Contents.push_back(0); + } + + /// \name Accessors + /// @{ + + int64_t getLineDelta() const { return LineDelta; } + + const MCExpr &getAddrDelta() const { return *AddrDelta; } + + SmallString<8> &getContents() { return Contents; } + const SmallString<8> &getContents() const { return Contents; } + + /// @} + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_Dwarf; + } +}; + +class MCDwarfCallFrameFragment : public MCFragment { + + /// AddrDelta - The expression for the difference of the two symbols that + /// make up the address delta between two .cfi_* dwarf directives. + const MCExpr *AddrDelta; + + SmallString<8> Contents; + +public: + MCDwarfCallFrameFragment(const MCExpr &AddrDelta, MCSection *Sec = nullptr) + : MCFragment(FT_DwarfFrame, false, 0, Sec), AddrDelta(&AddrDelta) { + Contents.push_back(0); + } + + /// \name Accessors + /// @{ + + const MCExpr &getAddrDelta() const { return *AddrDelta; } + + SmallString<8> &getContents() { return Contents; } + const SmallString<8> &getContents() const { return Contents; } + + /// @} + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_DwarfFrame; + } +}; + +class MCSafeSEHFragment : public MCFragment { + const MCSymbol *Sym; + +public: + MCSafeSEHFragment(const MCSymbol *Sym, MCSection *Sec = nullptr) + : MCFragment(FT_SafeSEH, false, 0, Sec), Sym(Sym) {} + + /// \name Accessors + /// @{ + + const MCSymbol *getSymbol() { return Sym; } + const MCSymbol *getSymbol() const { return Sym; } + + /// @} + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_SafeSEH; + } +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h index 6a582e82d00e..88aab73d4058 100644 --- a/include/llvm/MC/MCInstrDesc.h +++ b/include/llvm/MC/MCInstrDesc.h @@ -15,12 +15,12 @@ #ifndef LLVM_MC_MCINSTRDESC_H #define LLVM_MC_MCINSTRDESC_H +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/DataTypes.h" #include namespace llvm { class MCInst; - class MCRegisterInfo; class MCSubtargetInfo; class FeatureBitset; @@ -137,16 +137,16 @@ enum Flag { /// directly to describe itself. class MCInstrDesc { public: - unsigned short Opcode; // The opcode number - unsigned short NumOperands; // Num of args (may be more if variable_ops) - unsigned char NumDefs; // Num of args that are definitions - unsigned char Size; // Number of bytes in encoding. - unsigned short SchedClass; // enum identifying instr sched class - uint64_t Flags; // Flags identifying machine instr class - uint64_t TSFlags; // Target Specific Flag values - const uint16_t *ImplicitUses; // Registers implicitly read by this instr - const uint16_t *ImplicitDefs; // Registers implicitly defined by this instr - const MCOperandInfo *OpInfo; // 'NumOperands' entries about operands + unsigned short Opcode; // The opcode number + unsigned short NumOperands; // Num of args (may be more if variable_ops) + unsigned char NumDefs; // Num of args that are definitions + unsigned char Size; // Number of bytes in encoding. + unsigned short SchedClass; // enum identifying instr sched class + uint64_t Flags; // Flags identifying machine instr class + uint64_t TSFlags; // Target Specific Flag values + const MCPhysReg *ImplicitUses; // Registers implicitly read by this instr + const MCPhysReg *ImplicitDefs; // Registers implicitly defined by this instr + const MCOperandInfo *OpInfo; // 'NumOperands' entries about operands // Subtarget feature that this is deprecated on, if any // -1 implies this is not deprecated by any single feature. It may still be // deprecated due to a "complex" reason, below. @@ -336,8 +336,8 @@ public: /// \brief Return true if this instruction is convergent. /// - /// Convergent instructions may only be moved to locations that are - /// control-equivalent to their original positions. + /// Convergent instructions may not be made control-dependent on any + /// additional values. bool isConvergent() const { return Flags & (1 << MCID::Convergent); } //===--------------------------------------------------------------------===// @@ -472,7 +472,7 @@ public: /// marked as implicitly reading the 'CL' register, which it always does. /// /// This method returns null if the instruction has no implicit uses. - const uint16_t *getImplicitUses() const { return ImplicitUses; } + const MCPhysReg *getImplicitUses() const { return ImplicitUses; } /// \brief Return the number of implicit uses this instruction has. unsigned getNumImplicitUses() const { @@ -494,7 +494,7 @@ public: /// EAX/EDX/EFLAGS registers. /// /// This method returns null if the instruction has no implicit defs. - const uint16_t *getImplicitDefs() const { return ImplicitDefs; } + const MCPhysReg *getImplicitDefs() const { return ImplicitDefs; } /// \brief Return the number of implicit defs this instruct has. unsigned getNumImplicitDefs() const { @@ -509,7 +509,7 @@ public: /// \brief Return true if this instruction implicitly /// uses the specified physical register. bool hasImplicitUseOfPhysReg(unsigned Reg) const { - if (const uint16_t *ImpUses = ImplicitUses) + if (const MCPhysReg *ImpUses = ImplicitUses) for (; *ImpUses; ++ImpUses) if (*ImpUses == Reg) return true; diff --git a/include/llvm/MC/MCInstrItineraries.h b/include/llvm/MC/MCInstrItineraries.h index 161705de7c4e..b2871a9805e1 100644 --- a/include/llvm/MC/MCInstrItineraries.h +++ b/include/llvm/MC/MCInstrItineraries.h @@ -38,7 +38,7 @@ namespace llvm { /// /// { 2, x|y, 1 } /// indicates that the stage occupies either FU x or FU y for 2 -/// consecuative cycles and that the next stage starts one cycle +/// consecutive cycles and that the next stage starts one cycle /// after this stage starts. That is, the stage requirements /// overlap in time. /// diff --git a/include/llvm/MC/MCLinkerOptimizationHint.h b/include/llvm/MC/MCLinkerOptimizationHint.h index 4b6f7ecc9fba..a519c4b71b03 100644 --- a/include/llvm/MC/MCLinkerOptimizationHint.h +++ b/include/llvm/MC/MCLinkerOptimizationHint.h @@ -160,7 +160,7 @@ class MCLOHContainer { public: typedef SmallVectorImpl LOHDirectives; - MCLOHContainer() : EmitSize(0) {}; + MCLOHContainer() : EmitSize(0) {} /// Const accessor to the directives. const LOHDirectives &getDirectives() const { diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h index 175d73e72c10..cd3db957afc1 100644 --- a/include/llvm/MC/MCMachObjectWriter.h +++ b/include/llvm/MC/MCMachObjectWriter.h @@ -13,6 +13,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/DataTypes.h" @@ -104,7 +105,7 @@ class MachObjectWriter : public MCObjectWriter { /// \name Symbol Table Data /// @{ - StringTableBuilder StringTable; + StringTableBuilder StringTable{StringTableBuilder::MachO}; std::vector LocalSymbolData; std::vector ExternalSymbolData; std::vector UndefinedSymbolData; @@ -159,19 +160,21 @@ public: /// @} - void writeHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize, - bool SubsectionsViaSymbols); + void writeHeader(MachO::HeaderFileType Type, unsigned NumLoadCommands, + unsigned LoadCommandsSize, bool SubsectionsViaSymbols); /// Write a segment load command. /// /// \param NumSections The number of sections in this segment. /// \param SectionDataSize The total size of the sections. - void writeSegmentLoadCommand(unsigned NumSections, uint64_t VMSize, + void writeSegmentLoadCommand(StringRef Name, unsigned NumSections, + uint64_t VMAddr, uint64_t VMSize, uint64_t SectionDataStartOffset, - uint64_t SectionDataSize); + uint64_t SectionDataSize, uint32_t MaxProt, + uint32_t InitProt); - void writeSection(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCSection &Sec, uint64_t FileOffset, + void writeSection(const MCAsmLayout &Layout, const MCSection &Sec, + uint64_t VMAddr, uint64_t FileOffset, unsigned Flags, uint64_t RelocationsStart, unsigned NumRelocations); void writeSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols, @@ -245,6 +248,11 @@ public: void executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override; + bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, + const MCSymbol &A, + const MCSymbol &B, + bool InSet) const override; + bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB, bool InSet, diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h index 99e3f92bfe26..cf2c3f12bb6b 100644 --- a/include/llvm/MC/MCObjectFileInfo.h +++ b/include/llvm/MC/MCObjectFileInfo.h @@ -35,16 +35,18 @@ protected: /// without an associated EH frame section. bool SupportsCompactUnwindWithoutEHFrame; - /// Some encoding values for EH. + /// OmitDwarfIfHaveCompactUnwind - True if the target object file + /// supports having some functions with compact unwind and other with + /// dwarf unwind. + bool OmitDwarfIfHaveCompactUnwind; + + /// PersonalityEncoding, LSDAEncoding, TTypeEncoding - Some encoding values + /// for EH. unsigned PersonalityEncoding; unsigned LSDAEncoding; unsigned FDECFIEncoding; unsigned TTypeEncoding; - /// Section flags for eh_frame - unsigned EHSectionType; - unsigned EHSectionFlags; - /// Compact unwind encoding indicating that we should emit only an EH frame. unsigned CompactUnwindDwarfEHFrameOnly; @@ -114,6 +116,10 @@ protected: MCSection *DwarfStrOffDWOSection; MCSection *DwarfAddrSection; + // These are for Fission DWP files. + MCSection *DwarfCUIndexSection; + MCSection *DwarfTUIndexSection; + /// Section for newer gnu pubnames. MCSection *DwarfGnuPubNamesSection; /// Section for newer gnu pubtypes. @@ -147,10 +153,7 @@ protected: MCSection *EHFrameSection; // ELF specific sections. - MCSection *DataRelSection; - const MCSection *DataRelLocalSection; MCSection *DataRelROSection; - MCSection *DataRelROLocalSection; MCSection *MergeableConst4Section; MCSection *MergeableConst8Section; MCSection *MergeableConst16Section; @@ -200,6 +203,10 @@ public: bool getSupportsCompactUnwindWithoutEHFrame() const { return SupportsCompactUnwindWithoutEHFrame; } + bool getOmitDwarfIfHaveCompactUnwind() const { + return OmitDwarfIfHaveCompactUnwind; + } + bool getCommDirectiveSupportsAlignment() const { return CommDirectiveSupportsAlignment; } @@ -216,6 +223,7 @@ public: MCSection *getTextSection() const { return TextSection; } MCSection *getDataSection() const { return DataSection; } MCSection *getBSSSection() const { return BSSSection; } + MCSection *getReadOnlySection() const { return ReadOnlySection; } MCSection *getLSDASection() const { return LSDASection; } MCSection *getCompactUnwindSection() const { return CompactUnwindSection; } MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; } @@ -258,6 +266,8 @@ public: MCSection *getDwarfLocDWOSection() const { return DwarfLocDWOSection; } MCSection *getDwarfStrOffDWOSection() const { return DwarfStrOffDWOSection; } MCSection *getDwarfAddrSection() const { return DwarfAddrSection; } + MCSection *getDwarfCUIndexSection() const { return DwarfCUIndexSection; } + MCSection *getDwarfTUIndexSection() const { return DwarfTUIndexSection; } MCSection *getCOFFDebugSymbolsSection() const { return COFFDebugSymbolsSection; @@ -271,12 +281,7 @@ public: MCSection *getFaultMapSection() const { return FaultMapSection; } // ELF specific sections. - MCSection *getDataRelSection() const { return DataRelSection; } - const MCSection *getDataRelLocalSection() const { - return DataRelLocalSection; - } MCSection *getDataRelROSection() const { return DataRelROSection; } - MCSection *getDataRelROLocalSection() const { return DataRelROLocalSection; } const MCSection *getMergeableConst4Section() const { return MergeableConst4Section; } @@ -325,8 +330,6 @@ public: MCSection *getSXDataSection() const { return SXDataSection; } MCSection *getEHFrameSection() { - if (!EHFrameSection) - InitEHFrameSection(); return EHFrameSection; } @@ -346,9 +349,6 @@ private: void initELFMCObjectFileInfo(Triple T); void initCOFFMCObjectFileInfo(Triple T); - /// Initialize EHFrameSection on demand. - void InitEHFrameSection(); - public: const Triple &getTargetTriple() const { return TT; } }; diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h index ce1fc80f2cf2..9fe2fda21353 100644 --- a/include/llvm/MC/MCObjectStreamer.h +++ b/include/llvm/MC/MCObjectStreamer.h @@ -92,7 +92,7 @@ public: void EmitLabel(MCSymbol *Symbol) override; void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override; void EmitValueImpl(const MCExpr *Value, unsigned Size, - const SMLoc &Loc = SMLoc()) override; + SMLoc Loc = SMLoc()) override; void EmitULEB128Value(const MCExpr *Value) override; void EmitSLEB128Value(const MCExpr *Value) override; void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override; @@ -112,7 +112,7 @@ public: unsigned MaxBytesToEmit = 0) override; void EmitCodeAlignment(unsigned ByteAlignment, unsigned MaxBytesToEmit = 0) override; - bool EmitValueToOffset(const MCExpr *Offset, unsigned char Value) override; + void emitValueToOffset(const MCExpr *Offset, unsigned char Value) override; void EmitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column, unsigned Flags, unsigned Isa, unsigned Discriminator, @@ -124,8 +124,9 @@ public: const MCSymbol *Label); void EmitGPRel32Value(const MCExpr *Value) override; void EmitGPRel64Value(const MCExpr *Value) override; + bool EmitRelocDirective(const MCExpr &Offset, StringRef Name, + const MCExpr *Expr, SMLoc Loc) override; void EmitFill(uint64_t NumBytes, uint8_t FillValue) override; - void EmitZeros(uint64_t NumBytes) override; void FinishImpl() override; /// Emit the absolute difference between two symbols if possible. diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h index 2211673efc31..63c833ac20d6 100644 --- a/include/llvm/MC/MCObjectWriter.h +++ b/include/llvm/MC/MCObjectWriter.h @@ -40,14 +40,18 @@ class MCObjectWriter { MCObjectWriter(const MCObjectWriter &) = delete; void operator=(const MCObjectWriter &) = delete; -protected: - raw_pwrite_stream &OS; + raw_pwrite_stream *OS; +protected: unsigned IsLittleEndian : 1; protected: // Can only create subclasses. MCObjectWriter(raw_pwrite_stream &OS, bool IsLittleEndian) - : OS(OS), IsLittleEndian(IsLittleEndian) {} + : OS(&OS), IsLittleEndian(IsLittleEndian) {} + + unsigned getInitialOffset() { + return OS->tell(); + } public: virtual ~MCObjectWriter(); @@ -57,7 +61,8 @@ public: bool isLittleEndian() const { return IsLittleEndian; } - raw_ostream &getStream() { return OS; } + raw_pwrite_stream &getStream() { return *OS; } + void setStream(raw_pwrite_stream &NewOS) { OS = &NewOS; } /// \name High-Level API /// @{ @@ -91,6 +96,11 @@ public: const MCSymbolRefExpr *B, bool InSet) const; + virtual bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, + const MCSymbol &A, + const MCSymbol &B, + bool InSet) const; + virtual bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB, @@ -113,30 +123,30 @@ public: /// \name Binary Output /// @{ - void write8(uint8_t Value) { OS << char(Value); } + void write8(uint8_t Value) { *OS << char(Value); } void writeLE16(uint16_t Value) { - support::endian::Writer(OS).write(Value); + support::endian::Writer(*OS).write(Value); } void writeLE32(uint32_t Value) { - support::endian::Writer(OS).write(Value); + support::endian::Writer(*OS).write(Value); } void writeLE64(uint64_t Value) { - support::endian::Writer(OS).write(Value); + support::endian::Writer(*OS).write(Value); } void writeBE16(uint16_t Value) { - support::endian::Writer(OS).write(Value); + support::endian::Writer(*OS).write(Value); } void writeBE32(uint32_t Value) { - support::endian::Writer(OS).write(Value); + support::endian::Writer(*OS).write(Value); } void writeBE64(uint64_t Value) { - support::endian::Writer(OS).write(Value); + support::endian::Writer(*OS).write(Value); } void write16(uint16_t Value) { @@ -164,9 +174,9 @@ public: const char Zeros[16] = {0}; for (unsigned i = 0, e = N / 16; i != e; ++i) - OS << StringRef(Zeros, 16); + *OS << StringRef(Zeros, 16); - OS << StringRef(Zeros, N % 16); + *OS << StringRef(Zeros, N % 16); } void writeBytes(const SmallVectorImpl &ByteVec, @@ -180,7 +190,7 @@ public: assert( (ZeroFillSize == 0 || Str.size() <= ZeroFillSize) && "data size greater than fill size, unexpected large write will occur"); - OS << Str; + *OS << Str; if (ZeroFillSize) WriteZeros(ZeroFillSize - Str.size()); } diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h index 62d39b26c860..1bb6d212784e 100644 --- a/include/llvm/MC/MCParser/AsmLexer.h +++ b/include/llvm/MC/MCParser/AsmLexer.h @@ -47,7 +47,8 @@ public: StringRef LexUntilEndOfStatement() override; StringRef LexUntilEndOfLine(); - const AsmToken peekTok(bool ShouldSkipSpace = true) override; + size_t peekTokens(MutableArrayRef Buf, + bool ShouldSkipSpace = true) override; bool isAtStartOfComment(const char *Ptr); bool isAtStatementSeparator(const char *Ptr); diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h index 71f15b37c331..55279f49529a 100644 --- a/include/llvm/MC/MCParser/MCAsmLexer.h +++ b/include/llvm/MC/MCParser/MCAsmLexer.h @@ -118,7 +118,7 @@ public: /// lexers. class MCAsmLexer { /// The current token, stored in the base class for faster access. - AsmToken CurTok; + SmallVector CurTok; /// The location and description of the current error SMLoc ErrLoc; @@ -135,7 +135,7 @@ protected: // Can only create subclasses. virtual AsmToken LexToken() = 0; - void SetError(const SMLoc &errLoc, const std::string &err) { + void SetError(SMLoc errLoc, const std::string &err) { ErrLoc = errLoc; Err = err; } @@ -148,7 +148,15 @@ public: /// The lexer will continuosly return the end-of-file token once the end of /// the main input file has been reached. const AsmToken &Lex() { - return CurTok = LexToken(); + assert(!CurTok.empty()); + CurTok.erase(CurTok.begin()); + if (CurTok.empty()) + CurTok.emplace_back(LexToken()); + return CurTok.front(); + } + + void UnLex(AsmToken const &Token) { + CurTok.insert(CurTok.begin(), Token); } virtual StringRef LexUntilEndOfStatement() = 0; @@ -158,14 +166,28 @@ public: /// Get the current (last) lexed token. const AsmToken &getTok() const { - return CurTok; + return CurTok[0]; } /// Look ahead at the next token to be lexed. - virtual const AsmToken peekTok(bool ShouldSkipSpace = true) = 0; + const AsmToken peekTok(bool ShouldSkipSpace = true) { + AsmToken Tok; + + MutableArrayRef Buf(Tok); + size_t ReadCount = peekTokens(Buf, ShouldSkipSpace); + + assert(ReadCount == 1); + (void)ReadCount; + + return Tok; + } + + /// Look ahead an arbitrary number of tokens. + virtual size_t peekTokens(MutableArrayRef Buf, + bool ShouldSkipSpace = true) = 0; /// Get the current error location - const SMLoc &getErrLoc() { + SMLoc getErrLoc() { return ErrLoc; } @@ -175,13 +197,13 @@ public: } /// Get the kind of current token. - AsmToken::TokenKind getKind() const { return CurTok.getKind(); } + AsmToken::TokenKind getKind() const { return getTok().getKind(); } /// Check if the current token has kind \p K. - bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } + bool is(AsmToken::TokenKind K) const { return getTok().is(K); } /// Check if the current token has kind \p K. - bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } + bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); } /// Set whether spaces should be ignored by the lexer void setSkipSpace(bool val) { SkipSpace = val; } diff --git a/include/llvm/MC/MCParser/MCAsmParserExtension.h b/include/llvm/MC/MCParser/MCAsmParserExtension.h index 077fd21e073c..30b25dcfdaec 100644 --- a/include/llvm/MC/MCParser/MCAsmParserExtension.h +++ b/include/llvm/MC/MCParser/MCAsmParserExtension.h @@ -71,6 +71,9 @@ public: bool Error(SMLoc L, const Twine &Msg) { return getParser().Error(L, Msg); } + void Note(SMLoc L, const Twine &Msg) { + getParser().Note(L, Msg); + } bool TokError(const Twine &Msg) { return getParser().TokError(Msg); } diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h index a25108a0effb..a90d280c240c 100644 --- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h +++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h @@ -30,8 +30,16 @@ class MCParsedAsmOperand { /// MS-style inline assembly. std::string Constraint; +protected: + // This only seems to need to be movable (by ARMOperand) but ARMOperand has + // lots of members and MSVC doesn't support defaulted move ops, so to avoid + // that verbosity, just rely on defaulted copy ops. It's only the Constraint + // string member that would benefit from movement anyway. + MCParsedAsmOperand(const MCParsedAsmOperand &RHS) = default; + MCParsedAsmOperand &operator=(const MCParsedAsmOperand &) = default; + MCParsedAsmOperand() = default; + public: - MCParsedAsmOperand() {} virtual ~MCParsedAsmOperand() {} void setConstraint(StringRef C) { Constraint = C.str(); } diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h index 8e25ee18e08d..a4d5e0867232 100644 --- a/include/llvm/MC/MCRegisterInfo.h +++ b/include/llvm/MC/MCRegisterInfo.h @@ -632,7 +632,7 @@ private: unsigned Reg; const MCRegisterInfo *MCRI; bool IncludeSelf; - + MCRegUnitIterator RI; MCRegUnitRootIterator RRI; MCSuperRegIterator SI; @@ -652,10 +652,8 @@ public: } } - bool isValid() const { - return RI.isValid(); - } - + bool isValid() const { return RI.isValid(); } + unsigned operator*() const { assert (SI.isValid() && "Cannot dereference an invalid iterator."); return *SI; diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h index c09791631056..d7f9b69a9a2c 100644 --- a/include/llvm/MC/MCSchedule.h +++ b/include/llvm/MC/MCSchedule.h @@ -183,7 +183,7 @@ struct MCSchedModel { // takes to recover from a branch misprediction. unsigned MispredictPenalty; static const unsigned DefaultMispredictPenalty = 10; - + bool PostRAScheduler; // default value is false bool CompleteModel; @@ -206,6 +206,9 @@ struct MCSchedModel { /// scheduling class (itinerary class or SchedRW list). bool isComplete() const { return CompleteModel; } + /// Return true if machine supports out of order execution. + bool isOutOfOrder() const { return MicroOpBufferSize > 1; } + unsigned getNumProcResourceKinds() const { return NumProcResourceKinds; } diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h index 2d0d4dfc5913..09a98929113a 100644 --- a/include/llvm/MC/MCSection.h +++ b/include/llvm/MC/MCSection.h @@ -18,12 +18,13 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" +#include "llvm/MC/MCFragment.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Compiler.h" namespace llvm { -class MCAssembler; class MCAsmInfo; +class MCAssembler; class MCContext; class MCExpr; class MCFragment; @@ -92,6 +93,8 @@ private: unsigned IsRegistered : 1; + MCDummyFragment DummyFragment; + FragmentListType Fragments; /// Mapping from subsection number to insertion point for subsection numbers @@ -102,10 +105,9 @@ protected: MCSection(SectionVariant V, SectionKind K, MCSymbol *Begin); SectionVariant Variant; SectionKind Kind; + ~MCSection(); public: - virtual ~MCSection(); - SectionKind getKind() const { return Kind; } SectionVariant getVariant() const { return Variant; } @@ -152,6 +154,14 @@ public: return const_cast(this)->getFragmentList(); } + /// Support for MCFragment::getNextNode(). + static FragmentListType MCSection::*getSublistAccess(MCFragment *) { + return &MCSection::Fragments; + } + + const MCDummyFragment &getDummyFragment() const { return DummyFragment; } + MCDummyFragment &getDummyFragment() { return DummyFragment; } + MCSection::iterator begin(); MCSection::const_iterator begin() const { return const_cast(this)->begin(); diff --git a/include/llvm/MC/MCSectionCOFF.h b/include/llvm/MC/MCSectionCOFF.h index 237f6d31fb1b..d94682c8c381 100644 --- a/include/llvm/MC/MCSectionCOFF.h +++ b/include/llvm/MC/MCSectionCOFF.h @@ -16,66 +16,63 @@ #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCSection.h" -#include "llvm/Support/COFF.h" namespace llvm { class MCSymbol; -/// MCSectionCOFF - This represents a section on Windows - class MCSectionCOFF : public MCSection { - // The memory for this string is stored in the same MCContext as *this. - StringRef SectionName; +/// This represents a section on Windows +class MCSectionCOFF final : public MCSection { + // The memory for this string is stored in the same MCContext as *this. + StringRef SectionName; - // FIXME: The following fields should not be mutable, but are for now so - // the asm parser can honor the .linkonce directive. + // FIXME: The following fields should not be mutable, but are for now so the + // asm parser can honor the .linkonce directive. - /// Characteristics - This is the Characteristics field of a section, - /// drawn from the enums below. - mutable unsigned Characteristics; + /// This is the Characteristics field of a section, drawn from the enums + /// below. + mutable unsigned Characteristics; - /// The COMDAT symbol of this section. Only valid if this is a COMDAT - /// section. Two COMDAT sections are merged if they have the same - /// COMDAT symbol. - MCSymbol *COMDATSymbol; + /// The COMDAT symbol of this section. Only valid if this is a COMDAT section. + /// Two COMDAT sections are merged if they have the same COMDAT symbol. + MCSymbol *COMDATSymbol; - /// Selection - This is the Selection field for the section symbol, if - /// it is a COMDAT section (Characteristics & IMAGE_SCN_LNK_COMDAT) != 0 - mutable int Selection; + /// This is the Selection field for the section symbol, if it is a COMDAT + /// section (Characteristics & IMAGE_SCN_LNK_COMDAT) != 0 + mutable int Selection; - private: - friend class MCContext; - MCSectionCOFF(StringRef Section, unsigned Characteristics, - MCSymbol *COMDATSymbol, int Selection, SectionKind K, - MCSymbol *Begin) - : MCSection(SV_COFF, K, Begin), SectionName(Section), - Characteristics(Characteristics), COMDATSymbol(COMDATSymbol), - Selection(Selection) { - assert ((Characteristics & 0x00F00000) == 0 && - "alignment must not be set upon section creation"); - } - ~MCSectionCOFF() override; +private: + friend class MCContext; + MCSectionCOFF(StringRef Section, unsigned Characteristics, + MCSymbol *COMDATSymbol, int Selection, SectionKind K, + MCSymbol *Begin) + : MCSection(SV_COFF, K, Begin), SectionName(Section), + Characteristics(Characteristics), COMDATSymbol(COMDATSymbol), + Selection(Selection) { + assert((Characteristics & 0x00F00000) == 0 && + "alignment must not be set upon section creation"); + } - public: - /// ShouldOmitSectionDirective - Decides whether a '.section' directive - /// should be printed before the section name - bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const; +public: + ~MCSectionCOFF(); - StringRef getSectionName() const { return SectionName; } - unsigned getCharacteristics() const { return Characteristics; } - MCSymbol *getCOMDATSymbol() const { return COMDATSymbol; } - int getSelection() const { return Selection; } + /// Decides whether a '.section' directive should be printed before the + /// section name + bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const; - void setSelection(int Selection) const; + StringRef getSectionName() const { return SectionName; } + unsigned getCharacteristics() const { return Characteristics; } + MCSymbol *getCOMDATSymbol() const { return COMDATSymbol; } + int getSelection() const { return Selection; } - void PrintSwitchToSection(const MCAsmInfo &MAI, raw_ostream &OS, - const MCExpr *Subsection) const override; - bool UseCodeAlign() const override; - bool isVirtualSection() const override; + void setSelection(int Selection) const; - static bool classof(const MCSection *S) { - return S->getVariant() == SV_COFF; - } - }; + void PrintSwitchToSection(const MCAsmInfo &MAI, raw_ostream &OS, + const MCExpr *Subsection) const override; + bool UseCodeAlign() const override; + bool isVirtualSection() const override; + + static bool classof(const MCSection *S) { return S->getVariant() == SV_COFF; } +}; } // end namespace llvm diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h index f6730371fe15..b3bb3ad4e02c 100644 --- a/include/llvm/MC/MCSectionELF.h +++ b/include/llvm/MC/MCSectionELF.h @@ -25,25 +25,24 @@ namespace llvm { class MCSymbol; -/// MCSectionELF - This represents a section on linux, lots of unix variants -/// and some bare metal systems. -class MCSectionELF : public MCSection { - /// SectionName - This is the name of the section. The referenced memory is - /// owned by TargetLoweringObjectFileELF's ELFUniqueMap. +/// This represents a section on linux, lots of unix variants and some bare +/// metal systems. +class MCSectionELF final : public MCSection { + /// This is the name of the section. The referenced memory is owned by + /// TargetLoweringObjectFileELF's ELFUniqueMap. StringRef SectionName; - /// Type - This is the sh_type field of a section, drawn from the enums below. + /// This is the sh_type field of a section, drawn from the enums below. unsigned Type; - /// Flags - This is the sh_flags field of a section, drawn from the enums. - /// below. + /// This is the sh_flags field of a section, drawn from the enums below. unsigned Flags; unsigned UniqueID; - /// EntrySize - The size of each entry in this section. This size only - /// makes sense for sections that contain fixed-sized entries. If a - /// section does not contain fixed-sized entries 'EntrySize' will be 0. + /// The size of each entry in this section. This size only makes sense for + /// sections that contain fixed-sized entries. If a section does not contain + /// fixed-sized entries 'EntrySize' will be 0. unsigned EntrySize; const MCSymbolELF *Group; @@ -62,14 +61,14 @@ private: if (Group) Group->setIsSignature(); } - ~MCSectionELF() override; void setSectionName(StringRef Name) { SectionName = Name; } public: + ~MCSectionELF(); - /// ShouldOmitSectionDirective - Decides whether a '.section' directive - /// should be printed before the section name + /// Decides whether a '.section' directive should be printed before the + /// section name bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const; StringRef getSectionName() const { return SectionName; } diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h index 97227517c82d..658dfcda7268 100644 --- a/include/llvm/MC/MCSectionMachO.h +++ b/include/llvm/MC/MCSectionMachO.h @@ -20,19 +20,18 @@ namespace llvm { -/// MCSectionMachO - This represents a section on a Mach-O system (used by -/// Mac OS X). On a Mac system, these are also described in -/// /usr/include/mach-o/loader.h. -class MCSectionMachO : public MCSection { +/// This represents a section on a Mach-O system (used by Mac OS X). On a Mac +/// system, these are also described in /usr/include/mach-o/loader.h. +class MCSectionMachO final : public MCSection { char SegmentName[16]; // Not necessarily null terminated! char SectionName[16]; // Not necessarily null terminated! - /// TypeAndAttributes - This is the SECTION_TYPE and SECTION_ATTRIBUTES - /// field of a section, drawn from the enums below. + /// This is the SECTION_TYPE and SECTION_ATTRIBUTES field of a section, drawn + /// from the enums below. unsigned TypeAndAttributes; - /// Reserved2 - The 'reserved2' field of a section, used to represent the - /// size of stubs, for example. + /// The 'reserved2' field of a section, used to represent the size of stubs, + /// for example. unsigned Reserved2; MCSectionMachO(StringRef Segment, StringRef Section, unsigned TAA, @@ -64,12 +63,12 @@ public: return (TypeAndAttributes & Value) != 0; } - /// ParseSectionSpecifier - Parse the section specifier indicated by "Spec". - /// This is a string that can appear after a .section directive in a mach-o - /// flavored .s file. If successful, this fills in the specified Out - /// parameters and returns an empty string. When an invalid section - /// specifier is present, this returns a string indicating the problem. - /// If no TAA was parsed, TAA is not altered, and TAAWasSet becomes false. + /// Parse the section specifier indicated by "Spec". This is a string that can + /// appear after a .section directive in a mach-o flavored .s file. If + /// successful, this fills in the specified Out parameters and returns an + /// empty string. When an invalid section specifier is present, this returns + /// a string indicating the problem. If no TAA was parsed, TAA is not altered, + /// and TAAWasSet becomes false. static std::string ParseSectionSpecifier(StringRef Spec, // In. StringRef &Segment, // Out. StringRef &Section, // Out. diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index 6b9b8a153845..494f02dfad3e 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -19,6 +19,7 @@ #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCLinkerOptimizationHint.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCWinEH.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/SMLoc.h" @@ -33,7 +34,6 @@ class MCInst; class MCInstPrinter; class MCSection; class MCStreamer; -class MCSymbol; class MCSymbolELF; class MCSymbolRefExpr; class MCSubtargetInfo; @@ -134,7 +134,7 @@ public: /// Callback used to implement the ldr= pseudo. /// Add a new entry to the constant pool for the current section and return an /// MCExpr that can be used to refer to the constant pool location. - const MCExpr *addConstantPoolEntry(const MCExpr *); + const MCExpr *addConstantPoolEntry(const MCExpr *, SMLoc Loc); /// Callback used to implemnt the .ltorg directive. /// Emit contents of constant pool for the current section. @@ -358,7 +358,7 @@ public: /// /// Each emitted symbol will be tracked in the ordering table, /// so we can sort on them later. - void AssignSection(MCSymbol *Symbol, MCSection *Section); + void AssignFragment(MCSymbol *Symbol, MCFragment *Fragment); /// \brief Emit a label for \p Symbol into the current section. /// @@ -522,10 +522,9 @@ public: /// match a native machine width. /// \param Loc - The location of the expression for error reporting. virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - const SMLoc &Loc = SMLoc()); + SMLoc Loc = SMLoc()); - void EmitValue(const MCExpr *Value, unsigned Size, - const SMLoc &Loc = SMLoc()); + void EmitValue(const MCExpr *Value, unsigned Size, SMLoc Loc = SMLoc()); /// \brief Special case of EmitValue that avoids the client having /// to pass in a MCExpr for constant integers. @@ -568,7 +567,7 @@ public: /// \brief Emit NumBytes worth of zeros. /// This function properly handles data in virtual sections. - virtual void EmitZeros(uint64_t NumBytes); + void EmitZeros(uint64_t NumBytes); /// \brief Emit some number of copies of \p Value until the byte alignment \p /// ByteAlignment is reached. @@ -612,9 +611,7 @@ public: /// \param Offset - The offset to reach. This may be an expression, but the /// expression must be associated with the current section. /// \param Value - The value to use when filling bytes. - /// \return false on success, true if the offset was invalid. - virtual bool EmitValueToOffset(const MCExpr *Offset, - unsigned char Value = 0); + virtual void emitValueToOffset(const MCExpr *Offset, unsigned char Value = 0); /// @} @@ -662,6 +659,7 @@ public: virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset); virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment); virtual void EmitCFIEscape(StringRef Values); + virtual void EmitCFIGnuArgsSize(int64_t Size); virtual void EmitCFISignalFrame(); virtual void EmitCFIUndefined(int64_t Register); virtual void EmitCFIRegister(int64_t Register1, int64_t Register2); @@ -682,6 +680,16 @@ public: virtual void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except); virtual void EmitWinEHHandlerData(); + virtual void EmitSyntaxDirective(); + + /// \brief Emit a .reloc directive. + /// Returns true if the relocation could not be emitted because Name is not + /// known. + virtual bool EmitRelocDirective(const MCExpr &Offset, StringRef Name, + const MCExpr *Expr, SMLoc Loc) { + return true; + } + /// \brief Emit the given \p Instruction into the current section. virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI); @@ -704,9 +712,6 @@ public: /// the hasRawTextSupport() predicate. By default this aborts. void EmitRawText(const Twine &String); - /// \brief Causes any cached state to be written out. - virtual void Flush() {} - /// \brief Streamer specific finalization. virtual void FinishImpl(); /// \brief Finish emission of machine code. diff --git a/include/llvm/MC/MCSubtargetInfo.h b/include/llvm/MC/MCSubtargetInfo.h index d5ad4eebf9ef..446feefc4500 100644 --- a/include/llvm/MC/MCSubtargetInfo.h +++ b/include/llvm/MC/MCSubtargetInfo.h @@ -86,8 +86,9 @@ protected: void InitMCProcessorInfo(StringRef CPU, StringRef FS); public: - /// Set the features to the default for the given CPU. - void setDefaultFeatures(StringRef CPU); + /// Set the features to the default for the given CPU with an appended feature + /// string. + void setDefaultFeatures(StringRef CPU, StringRef FS); /// ToggleFeature - Toggle a feature and returns the re-computed feature /// bits. This version does not change the implied bits. @@ -159,11 +160,8 @@ public: /// Check whether the CPU string is valid. bool isCPUStringValid(StringRef CPU) const { - auto Found = std::find_if(ProcDesc.begin(), ProcDesc.end(), - [=](const SubtargetFeatureKV &KV) { - return CPU == KV.Key; - }); - return Found != ProcDesc.end(); + auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU); + return Found != ProcDesc.end() && StringRef(Found->Key) == CPU; } }; diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h index b2910dfccd63..c51ecfcb0c5c 100644 --- a/include/llvm/MC/MCSymbol.h +++ b/include/llvm/MC/MCSymbol.h @@ -56,19 +56,17 @@ protected: SymContentsCommon, }; - // Special sentinal value for the absolute pseudo section. - // - // FIXME: Use a PointerInt wrapper for this? - static MCSection *AbsolutePseudoSection; + // Special sentinal value for the absolute pseudo fragment. + static MCFragment *AbsolutePseudoFragment; /// If a symbol has a Fragment, the section is implied, so we only need /// one pointer. + /// The special AbsolutePseudoFragment value is for absolute symbols. + /// If this is a variable symbol, this caches the variable value's fragment. /// FIXME: We might be able to simplify this by having the asm streamer create /// dummy fragments. /// If this is a section, then it gives the symbol is defined in. This is null - /// for undefined symbols, and the special AbsolutePseudoSection value for - /// absolute symbols. If this is a variable symbol, this caches the variable - /// value's section. + /// for undefined symbols. /// /// If this is a fragment, then it gives the fragment this symbol's value is /// relative to, if any. @@ -76,8 +74,7 @@ protected: /// For the 'HasName' integer, this is true if this symbol is named. /// A named symbol will have a pointer to the name allocated in the bytes /// immediately prior to the MCSymbol. - mutable PointerIntPair, 1> - SectionOrFragmentAndHasName; + mutable PointerIntPair FragmentAndHasName; /// IsTemporary - True if this is an assembler temporary label, which /// typically does not survive in the .o file's symbol table. Usually @@ -155,7 +152,7 @@ protected: // MCContext creates and uniques these. Kind(Kind), IsUsedInReloc(false), SymbolContents(SymContentsUnset), CommonAlignLog2(0), Flags(0) { Offset = 0; - SectionOrFragmentAndHasName.setInt(!!Name); + FragmentAndHasName.setInt(!!Name); if (Name) getNameEntryPtr() = Name; } @@ -179,20 +176,17 @@ private: MCSymbol(const MCSymbol &) = delete; void operator=(const MCSymbol &) = delete; - MCSection *getSectionPtr() const { - if (MCFragment *F = getFragment()) + MCSection *getSectionPtr(bool SetUsed = true) const { + if (MCFragment *F = getFragment(SetUsed)) { + assert(F != AbsolutePseudoFragment); return F->getParent(); - const auto &SectionOrFragment = SectionOrFragmentAndHasName.getPointer(); - assert(!SectionOrFragment.is() && "Section or null expected"); - MCSection *Section = SectionOrFragment.dyn_cast(); - if (Section || !isVariable()) - return Section; - return Section = getVariableValue()->findAssociatedSection(); + } + return nullptr; } /// \brief Get a reference to the name field. Requires that we have a name const StringMapEntry *&getNameEntryPtr() { - assert(SectionOrFragmentAndHasName.getInt() && "Name is required"); + assert(FragmentAndHasName.getInt() && "Name is required"); NameEntryStorageTy *Name = reinterpret_cast(this); return (*(Name - 1)).NameEntry; } @@ -203,7 +197,7 @@ private: public: /// getName - Get the symbol name. StringRef getName() const { - if (!SectionOrFragmentAndHasName.getInt()) + if (!FragmentAndHasName.getInt()) return StringRef(); return getNameEntryPtr()->first(); @@ -223,7 +217,7 @@ public: /// isUsed - Check if this is used. bool isUsed() const { return IsUsed; } - void setUsed(bool Value) const { IsUsed = Value; } + void setUsed(bool Value) const { IsUsed |= Value; } /// \brief Check if this symbol is redefinable. bool isRedefinable() const { return IsRedefinable; } @@ -248,37 +242,38 @@ public: /// isDefined - Check if this symbol is defined (i.e., it has an address). /// /// Defined symbols are either absolute or in some section. - bool isDefined() const { return getSectionPtr() != nullptr; } + bool isDefined(bool SetUsed = true) const { + return getFragment(SetUsed) != nullptr; + } /// isInSection - Check if this symbol is defined in some section (i.e., it /// is defined but not absolute). - bool isInSection() const { return isDefined() && !isAbsolute(); } - - /// isUndefined - Check if this symbol undefined (i.e., implicitly defined). - bool isUndefined() const { return !isDefined(); } - - /// isAbsolute - Check if this is an absolute symbol. - bool isAbsolute() const { return getSectionPtr() == AbsolutePseudoSection; } - - /// Get the section associated with a defined, non-absolute symbol. - MCSection &getSection() const { - assert(isInSection() && "Invalid accessor!"); - return *getSectionPtr(); + bool isInSection(bool SetUsed = true) const { + return isDefined(SetUsed) && !isAbsolute(SetUsed); } - /// Mark the symbol as defined in the section \p S. - void setSection(MCSection &S) { - assert(!isVariable() && "Cannot set section of variable"); - assert(!SectionOrFragmentAndHasName.getPointer().is() && - "Section or null expected"); - SectionOrFragmentAndHasName.setPointer(&S); + /// isUndefined - Check if this symbol undefined (i.e., implicitly defined). + bool isUndefined(bool SetUsed = true) const { return !isDefined(SetUsed); } + + /// isAbsolute - Check if this is an absolute symbol. + bool isAbsolute(bool SetUsed = true) const { + return getFragment(SetUsed) == AbsolutePseudoFragment; + } + + /// Get the section associated with a defined, non-absolute symbol. + MCSection &getSection(bool SetUsed = true) const { + assert(isInSection(SetUsed) && "Invalid accessor!"); + return *getSectionPtr(SetUsed); + } + + /// Mark the symbol as defined in the fragment \p F. + void setFragment(MCFragment *F) const { + assert(!isVariable() && "Cannot set fragment of variable"); + FragmentAndHasName.setPointer(F); } /// Mark the symbol as undefined. - void setUndefined() { - SectionOrFragmentAndHasName.setPointer( - PointerUnion()); - } + void setUndefined() { FragmentAndHasName.setPointer(nullptr); } bool isELF() const { return Kind == SymbolKindELF; } @@ -295,10 +290,10 @@ public: return SymbolContents == SymContentsVariable; } - /// getVariableValue() - Get the value for variable symbols. - const MCExpr *getVariableValue() const { + /// getVariableValue - Get the value for variable symbols. + const MCExpr *getVariableValue(bool SetUsed = true) const { assert(isVariable() && "Invalid accessor!"); - IsUsed = true; + IsUsed |= SetUsed; return Value; } @@ -379,11 +374,13 @@ public: return SymbolContents == SymContentsCommon; } - MCFragment *getFragment() const { - return SectionOrFragmentAndHasName.getPointer().dyn_cast(); - } - void setFragment(MCFragment *Value) const { - SectionOrFragmentAndHasName.setPointer(Value); + MCFragment *getFragment(bool SetUsed = true) const { + MCFragment *Fragment = FragmentAndHasName.getPointer(); + if (Fragment || !isVariable()) + return Fragment; + Fragment = getVariableValue(SetUsed)->findAssociatedFragment(); + FragmentAndHasName.setPointer(Fragment); + return Fragment; } bool isExternal() const { return IsExternal; } diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCTargetAsmParser.h index 36db3914f017..03b2dc9a282c 100644 --- a/include/llvm/MC/MCTargetAsmParser.h +++ b/include/llvm/MC/MCTargetAsmParser.h @@ -20,6 +20,7 @@ class AsmToken; class MCInst; class MCParsedAsmOperand; class MCStreamer; +class MCSubtargetInfo; class SMLoc; class StringRef; template class SmallVectorImpl; @@ -29,6 +30,7 @@ typedef SmallVectorImpl> OperandVector; enum AsmRewriteKind { AOK_Delete = 0, // Rewrite should be ignored. AOK_Align, // Rewrite align as .align. + AOK_EVEN, // Rewrite even as .even. AOK_DotOperator, // Rewrite a dot operator expression as an immediate. // E.g., [eax].foo.bar -> [eax].8 AOK_Emit, // Rewrite _emit as .byte. @@ -44,6 +46,7 @@ enum AsmRewriteKind { const char AsmRewritePrecedence [] = { 0, // AOK_Delete 2, // AOK_Align + 2, // AOK_EVEN 2, // AOK_DotOperator 2, // AOK_Emit 4, // AOK_Imm @@ -92,7 +95,10 @@ private: MCTargetAsmParser(const MCTargetAsmParser &) = delete; void operator=(const MCTargetAsmParser &) = delete; protected: // Can only create subclasses. - MCTargetAsmParser(); + MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI); + + /// Create a copy of STI and return a non-const reference to it. + MCSubtargetInfo ©STI(); /// AvailableFeatures - The current set of available features. uint64_t AvailableFeatures; @@ -107,9 +113,14 @@ protected: // Can only create subclasses. /// Set of options which affects instrumentation of inline assembly. MCTargetOptions MCOptions; + /// Current STI. + const MCSubtargetInfo *STI; + public: ~MCTargetAsmParser() override; + const MCSubtargetInfo &getSTI() const; + uint64_t getAvailableFeatures() const { return AvailableFeatures; } void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; } @@ -143,6 +154,10 @@ public: /// \return True on failure. virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) = 0; + virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + AsmToken Token, OperandVector &Operands) { + return ParseInstruction(Info, Name, Token.getLoc(), Operands); + } /// ParseDirective - Parse a target specific assembler directive /// @@ -156,10 +171,6 @@ public: /// \param DirectiveID - the identifier token of the directive. virtual bool ParseDirective(AsmToken DirectiveID) = 0; - /// mnemonicIsValid - This returns true if this is a valid mnemonic and false - /// otherwise. - virtual bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) = 0; - /// MatchAndEmitInstruction - Recognize a series of operands of a parsed /// instruction as an actual MCInst and emit it to the specified MCStreamer. /// This returns false on success and returns true on failure to match. @@ -192,13 +203,18 @@ public: virtual void convertToMapAndConstraints(unsigned Kind, const OperandVector &Operands) = 0; + // Return whether this parser uses assignment statements with equals tokens + virtual bool equalIsAsmAssignment() { return true; }; + // Return whether this start of statement identifier is a label + virtual bool isLabel(AsmToken &Token) { return true; }; + virtual const MCExpr *applyModifierToExpr(const MCExpr *E, MCSymbolRefExpr::VariantKind, MCContext &Ctx) { return nullptr; } - virtual void onLabelParsed(MCSymbol *Symbol) { }; + virtual void onLabelParsed(MCSymbol *Symbol) { } }; } // End llvm namespace diff --git a/include/llvm/MC/MCTargetOptions.h b/include/llvm/MC/MCTargetOptions.h index 7f4f23eda27f..4b66a750cb7d 100644 --- a/include/llvm/MC/MCTargetOptions.h +++ b/include/llvm/MC/MCTargetOptions.h @@ -29,8 +29,10 @@ public: bool MCRelaxAll : 1; bool MCNoExecStack : 1; bool MCFatalWarnings : 1; + bool MCNoWarn : 1; bool MCSaveTempLabels : 1; bool MCUseDwarfDirectory : 1; + bool MCIncrementalLinkerCompatible : 1; bool ShowMCEncoding : 1; bool ShowMCInst : 1; bool AsmVerbose : 1; @@ -49,8 +51,10 @@ inline bool operator==(const MCTargetOptions &LHS, const MCTargetOptions &RHS) { ARE_EQUAL(MCRelaxAll) && ARE_EQUAL(MCNoExecStack) && ARE_EQUAL(MCFatalWarnings) && + ARE_EQUAL(MCNoWarn) && ARE_EQUAL(MCSaveTempLabels) && ARE_EQUAL(MCUseDwarfDirectory) && + ARE_EQUAL(MCIncrementalLinkerCompatible) && ARE_EQUAL(ShowMCEncoding) && ARE_EQUAL(ShowMCInst) && ARE_EQUAL(AsmVerbose) && diff --git a/include/llvm/MC/MCTargetOptionsCommandFlags.h b/include/llvm/MC/MCTargetOptionsCommandFlags.h index af23a92e6e99..5180208d33b6 100644 --- a/include/llvm/MC/MCTargetOptionsCommandFlags.h +++ b/include/llvm/MC/MCTargetOptionsCommandFlags.h @@ -33,6 +33,12 @@ cl::opt RelaxAll("mc-relax-all", cl::desc("When used with filetype=obj, " "relax all fixups in the emitted object file")); +cl::opt IncrementalLinkerCompatible( + "incremental-linker-compatible", + cl::desc( + "When used with filetype=obj, " + "emit an object file which can be used with an incremental linker")); + cl::opt DwarfVersion("dwarf-version", cl::desc("Dwarf version"), cl::init(0)); @@ -40,6 +46,12 @@ cl::opt ShowMCInst("asm-show-inst", cl::desc("Emit internal instruction representation to " "assembly file")); +cl::opt FatalWarnings("fatal-warnings", + cl::desc("Treat warnings as errors")); + +cl::opt NoWarn("no-warn", cl::desc("Suppress all warnings")); +cl::alias NoWarnW("W", cl::desc("Alias for --no-warn"), cl::aliasopt(NoWarn)); + cl::opt ABIName("target-abi", cl::Hidden, cl::desc("The name of the ABI to be targeted from the backend."), @@ -50,9 +62,12 @@ static inline MCTargetOptions InitMCTargetOptionsFromFlags() { Options.SanitizeAddress = (AsmInstrumentation == MCTargetOptions::AsmInstrumentationAddress); Options.MCRelaxAll = RelaxAll; + Options.MCIncrementalLinkerCompatible = IncrementalLinkerCompatible; Options.DwarfVersion = DwarfVersion; Options.ShowMCInst = ShowMCInst; Options.ABIName = ABIName; + Options.MCFatalWarnings = FatalWarnings; + Options.MCNoWarn = NoWarn; return Options; } diff --git a/include/llvm/MC/MCValue.h b/include/llvm/MC/MCValue.h index 6bdf43685f21..ead08fd90ca0 100644 --- a/include/llvm/MC/MCValue.h +++ b/include/llvm/MC/MCValue.h @@ -35,10 +35,6 @@ class raw_ostream; /// relocation modifiers apply to the closest symbol or the whole /// expression. /// -/// In the general form, SymbolB can only be defined if SymbolA is, and both -/// must be in the same (non-external) section. The latter constraint is not -/// enforced, since a symbol's section may not be known at construction. -/// /// Note that this class must remain a simple POD value class, because we need /// it to live in unions etc. class MCValue { @@ -67,7 +63,6 @@ public: const MCSymbolRefExpr *SymB = nullptr, int64_t Val = 0, uint32_t RefKind = 0) { MCValue R; - assert((!SymB || SymA) && "Invalid relocatable MCValue!"); R.Cst = Val; R.SymA = SymA; R.SymB = SymB; diff --git a/include/llvm/MC/MCWinCOFFStreamer.h b/include/llvm/MC/MCWinCOFFStreamer.h index 6fbc754f1125..fe1ada9b9e5b 100644 --- a/include/llvm/MC/MCWinCOFFStreamer.h +++ b/include/llvm/MC/MCWinCOFFStreamer.h @@ -73,7 +73,7 @@ protected: void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo &STI) override; private: - LLVM_ATTRIBUTE_NORETURN void FatalError(const Twine &Msg) const; + void Error(const Twine &Msg) const; }; } diff --git a/include/llvm/MC/MachineLocation.h b/include/llvm/MC/MachineLocation.h index 2a18615eff62..4b5cf4357793 100644 --- a/include/llvm/MC/MachineLocation.h +++ b/include/llvm/MC/MachineLocation.h @@ -68,10 +68,6 @@ public: Register = R; Offset = O; } - -#ifndef NDEBUG - void dump(); -#endif }; inline bool operator!=(const MachineLocation &LHS, const MachineLocation &RHS) { diff --git a/include/llvm/MC/SectionKind.h b/include/llvm/MC/SectionKind.h index 9e8b68f4340c..b09b93cfc377 100644 --- a/include/llvm/MC/SectionKind.h +++ b/include/llvm/MC/SectionKind.h @@ -1,4 +1,4 @@ -//===-- llvm/Target/TargetLoweringObjectFile.h - Object Info ----*- C++ -*-===// +//===-- llvm/MC/SectionKind.h - Classification of sections ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,11 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file implements classes used to handle lowerings specific to common -// object file formats. -// -//===----------------------------------------------------------------------===// #ifndef LLVM_MC_SECTIONKIND_H #define LLVM_MC_SECTIONKIND_H @@ -99,21 +94,8 @@ class SectionKind { /// marked 'constant'. Common, - /// DataRel - This is the most general form of data that is written - /// to by the program, it can have random relocations to arbitrary - /// globals. - DataRel, - - /// DataRelLocal - This is writeable data that has a non-zero - /// initializer and has relocations in it, but all of the - /// relocations are known to be within the final linked image - /// the global is linked into. - DataRelLocal, - - /// DataNoRel - This is writeable data that has a non-zero - /// initializer, but whose initializer is known to have no - /// relocations. - DataNoRel, + /// This is writeable data that has a non-zero initializer. + Data, /// ReadOnlyWithRel - These are global variables that are never /// written to by the program, but that have relocations, so they @@ -121,15 +103,7 @@ class SectionKind { /// can write to them. If it chooses to, the dynamic linker can /// mark the pages these globals end up on as read-only after it is /// done with its relocation phase. - ReadOnlyWithRel, - - /// ReadOnlyWithRelLocal - This is data that is readonly by the - /// program, but must be writeable so that the dynamic linker - /// can perform relocations in it. This is used when we know - /// that all the relocations are to globals in this final - /// linked image. - ReadOnlyWithRelLocal - + ReadOnlyWithRel } K : 8; public: @@ -169,7 +143,7 @@ public: bool isThreadData() const { return K == ThreadData; } bool isGlobalWriteableData() const { - return isBSS() || isCommon() || isDataRel() || isReadOnlyWithRel(); + return isBSS() || isCommon() || isData() || isReadOnlyWithRel(); } bool isBSS() const { return K == BSS || K == BSSLocal || K == BSSExtern; } @@ -178,22 +152,10 @@ public: bool isCommon() const { return K == Common; } - bool isDataRel() const { - return K == DataRel || K == DataRelLocal || K == DataNoRel; - } - - bool isDataRelLocal() const { - return K == DataRelLocal || K == DataNoRel; - } - - bool isDataNoRel() const { return K == DataNoRel; } + bool isData() const { return K == Data; } bool isReadOnlyWithRel() const { - return K == ReadOnlyWithRel || K == ReadOnlyWithRelLocal; - } - - bool isReadOnlyWithRelLocal() const { - return K == ReadOnlyWithRelLocal; + return K == ReadOnlyWithRel; } private: static SectionKind get(Kind K) { @@ -224,13 +186,8 @@ public: static SectionKind getBSSLocal() { return get(BSSLocal); } static SectionKind getBSSExtern() { return get(BSSExtern); } static SectionKind getCommon() { return get(Common); } - static SectionKind getDataRel() { return get(DataRel); } - static SectionKind getDataRelLocal() { return get(DataRelLocal); } - static SectionKind getDataNoRel() { return get(DataNoRel); } + static SectionKind getData() { return get(Data); } static SectionKind getReadOnlyWithRel() { return get(ReadOnlyWithRel); } - static SectionKind getReadOnlyWithRelLocal(){ - return get(ReadOnlyWithRelLocal); - } }; } // end namespace llvm diff --git a/include/llvm/MC/StringTableBuilder.h b/include/llvm/MC/StringTableBuilder.h index 897d449254ea..adde86b45583 100644 --- a/include/llvm/MC/StringTableBuilder.h +++ b/include/llvm/MC/StringTableBuilder.h @@ -11,53 +11,51 @@ #define LLVM_MC_STRINGTABLEBUILDER_H #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/DenseMap.h" #include namespace llvm { /// \brief Utility for building string tables with deduplicated suffixes. class StringTableBuilder { +public: + enum Kind { ELF, WinCOFF, MachO, RAW }; + +private: SmallString<256> StringTable; - StringMap StringIndexMap; + DenseMap StringIndexMap; + size_t Size = 0; + Kind K; public: - /// \brief Add a string to the builder. Returns a StringRef to the internal - /// copy of s. Can only be used before the table is finalized. - StringRef add(StringRef s) { - assert(!isFinalized()); - return StringIndexMap.insert(std::make_pair(s, 0)).first->first(); - } + StringTableBuilder(Kind K); - enum Kind { - ELF, - WinCOFF, - MachO - }; + /// \brief Add a string to the builder. Returns the position of S in the + /// table. The position will be changed if finalize is used. + /// Can only be used before the table is finalized. + size_t add(StringRef S); /// \brief Analyze the strings and build the final table. No more strings can /// be added after this point. - void finalize(Kind kind); + void finalize(); /// \brief Retrieve the string table data. Can only be used after the table /// is finalized. - StringRef data() { + StringRef data() const { assert(isFinalized()); return StringTable; } /// \brief Get the offest of a string in the string table. Can only be used /// after the table is finalized. - size_t getOffset(StringRef s) { - assert(isFinalized()); - assert(StringIndexMap.count(s) && "String is not in table!"); - return StringIndexMap[s]; - } + size_t getOffset(StringRef S) const; + const DenseMap &getMap() const { return StringIndexMap; } + size_t getSize() const { return Size; } void clear(); private: - bool isFinalized() { + bool isFinalized() const { return !StringTable.empty(); } }; diff --git a/include/llvm/MC/SubtargetFeature.h b/include/llvm/MC/SubtargetFeature.h index 2fb9b4ae2503..0d97b226d728 100644 --- a/include/llvm/MC/SubtargetFeature.h +++ b/include/llvm/MC/SubtargetFeature.h @@ -30,7 +30,7 @@ namespace llvm { // A container class for subtarget features. // This is convenient because std::bitset does not have a constructor // with an initializer list of set bits. -const unsigned MAX_SUBTARGET_FEATURES = 64; +const unsigned MAX_SUBTARGET_FEATURES = 128; class FeatureBitset : public std::bitset { public: // Cannot inherit constructors because it's not supported by VC++.. diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h index 597f0d48c118..8dd042a2533f 100644 --- a/include/llvm/Object/Archive.h +++ b/include/llvm/Object/Archive.h @@ -37,7 +37,7 @@ struct ArchiveMemberHeader { llvm::StringRef getName() const; /// Members are not larger than 4GB. - uint32_t getSize() const; + ErrorOr getSize() const; sys::fs::perms getAccessMode() const; sys::TimeValue getLastModified() const; @@ -52,6 +52,7 @@ class Archive : public Binary { virtual void anchor(); public: class Child { + friend Archive; const Archive *Parent; /// \brief Includes header but not padding byte. StringRef Data; @@ -62,19 +63,19 @@ public: return reinterpret_cast(Data.data()); } + bool isThinMember() const; + public: - Child(const Archive *Parent, const char *Start); + Child(const Archive *Parent, const char *Start, std::error_code *EC); + Child(const Archive *Parent, StringRef Data, uint16_t StartOfFile); bool operator ==(const Child &other) const { assert(Parent == other.Parent); return Data.begin() == other.Data.begin(); } - bool operator <(const Child &other) const { - return Data.begin() < other.Data.begin(); - } - - Child getNext() const; + const Archive *getParent() const { return Parent; } + ErrorOr getNext() const; ErrorOr getName() const; StringRef getRawName() const { return getHeader()->getName(); } @@ -90,9 +91,9 @@ public: return getHeader()->getAccessMode(); } /// \return the size of the archive member without the header or padding. - uint64_t getSize() const; + ErrorOr getSize() const; /// \return the size in the archive header for this member. - uint64_t getRawSize() const; + ErrorOr getRawSize() const; ErrorOr getBuffer() const; uint64_t getChildOffset() const; @@ -104,28 +105,32 @@ public: }; class child_iterator { - Child child; + ErrorOr child; public: - child_iterator() : child(Child(nullptr, nullptr)) {} + child_iterator() : child(Child(nullptr, nullptr, nullptr)) {} child_iterator(const Child &c) : child(c) {} - const Child *operator->() const { return &child; } - const Child &operator*() const { return child; } + child_iterator(std::error_code EC) : child(EC) {} + const ErrorOr *operator->() const { return &child; } + const ErrorOr &operator*() const { return child; } bool operator==(const child_iterator &other) const { - return child == other.child; + // We ignore error states so that comparisions with end() work, which + // allows range loops. + if (child.getError() || other.child.getError()) + return false; + return *child == *other.child; } bool operator!=(const child_iterator &other) const { return !(*this == other); } - bool operator<(const child_iterator &other) const { - return child < other.child; - } - + // Code in loops with child_iterators must check for errors on each loop + // iteration. And if there is an error break out of the loop. child_iterator &operator++() { // Preincrement - child = child.getNext(); + assert(child && "Can't increment iterator with error"); + child = child->getNext(); return *this; } }; @@ -145,7 +150,7 @@ public: , SymbolIndex(symi) , StringIndex(stri) {} StringRef getName() const; - ErrorOr getMember() const; + ErrorOr getMember() const; Symbol getNext() const; }; @@ -186,14 +191,13 @@ public: child_iterator child_begin(bool SkipInternal = true) const; child_iterator child_end() const; iterator_range children(bool SkipInternal = true) const { - return iterator_range(child_begin(SkipInternal), - child_end()); + return make_range(child_begin(SkipInternal), child_end()); } symbol_iterator symbol_begin() const; symbol_iterator symbol_end() const; iterator_range symbols() const { - return iterator_range(symbol_begin(), symbol_end()); + return make_range(symbol_begin(), symbol_end()); } // Cast methods. @@ -205,18 +209,17 @@ public: child_iterator findSym(StringRef name) const; bool hasSymbolTable() const; - child_iterator getSymbolTableChild() const { return SymbolTable; } - StringRef getSymbolTable() const { - // We know that the symbol table is not an external file, - // so we just assert there is no error. - return *SymbolTable->getBuffer(); - } + StringRef getSymbolTable() const { return SymbolTable; } uint32_t getNumberOfSymbols() const; private: - child_iterator SymbolTable; - child_iterator StringTable; - child_iterator FirstRegular; + StringRef SymbolTable; + StringRef StringTable; + + StringRef FirstRegularData; + uint16_t FirstRegularStartOfFile = -1; + void setFirstRegular(const Child &C); + unsigned Format : 2; unsigned IsThin : 1; mutable std::vector> ThinBuffers; diff --git a/include/llvm/Object/ArchiveWriter.h b/include/llvm/Object/ArchiveWriter.h index 3648d0c77fb5..b5d2ba358080 100644 --- a/include/llvm/Object/ArchiveWriter.h +++ b/include/llvm/Object/ArchiveWriter.h @@ -24,17 +24,15 @@ class NewArchiveIterator { bool IsNewMember; StringRef Name; - object::Archive::child_iterator OldI; - - StringRef NewFilename; + object::Archive::Child OldMember; public: - NewArchiveIterator(object::Archive::child_iterator I, StringRef Name); - NewArchiveIterator(StringRef I, StringRef Name); + NewArchiveIterator(const object::Archive::Child &OldMember, StringRef Name); + NewArchiveIterator(StringRef FileName); bool isNewMember() const; StringRef getName() const; - object::Archive::child_iterator getOld() const; + const object::Archive::Child &getOld() const; StringRef getNew() const; llvm::ErrorOr getFD(sys::fs::file_status &NewStatus) const; @@ -43,7 +41,8 @@ public: std::pair writeArchive(StringRef ArcName, std::vector &NewMembers, - bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic); + bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic, + bool Thin); } #endif diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h index a3d6d0d4d428..a0d1127781f6 100644 --- a/include/llvm/Object/Binary.h +++ b/include/llvm/Object/Binary.h @@ -41,7 +41,9 @@ protected: enum { ID_Archive, ID_MachOUniversalBinary, - ID_IR, // LLVM IR + ID_COFFImportFile, + ID_IR, // LLVM IR + ID_FunctionIndex, // Function summary index // Object and children. ID_StartObjects, @@ -113,10 +115,16 @@ public: return TypeID == ID_COFF; } + bool isCOFFImportFile() const { + return TypeID == ID_COFFImportFile; + } + bool isIR() const { return TypeID == ID_IR; } + bool isFunctionIndex() const { return TypeID == ID_FunctionIndex; } + bool isLittleEndian() const { return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B || TypeID == ID_MachO32B || TypeID == ID_MachO64B); diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h index 025a9dbc6bc0..1b0e2e36bd5e 100644 --- a/include/llvm/Object/COFF.h +++ b/include/llvm/Object/COFF.h @@ -653,8 +653,7 @@ protected: uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override; uint32_t getSymbolFlags(DataRefImpl Symb) const override; SymbolRef::Type getSymbolType(DataRefImpl Symb) const override; - std::error_code getSymbolSection(DataRefImpl Symb, - section_iterator &Res) const override; + ErrorOr getSymbolSection(DataRefImpl Symb) const override; void moveSectionNext(DataRefImpl &Sec) const override; std::error_code getSectionName(DataRefImpl Sec, StringRef &Res) const override; @@ -774,6 +773,7 @@ public: std::error_code getSectionContents(const coff_section *Sec, ArrayRef &Res) const; + uint64_t getImageBase() const; std::error_code getVaPtr(uint64_t VA, uintptr_t &Res) const; std::error_code getRvaPtr(uint32_t Rva, uintptr_t &Res) const; std::error_code getHintName(uint32_t Rva, uint16_t &Hint, diff --git a/include/llvm/Object/COFFImportFile.h b/include/llvm/Object/COFFImportFile.h new file mode 100644 index 000000000000..b04a44ea60d2 --- /dev/null +++ b/include/llvm/Object/COFFImportFile.h @@ -0,0 +1,74 @@ +//===- COFFImportFile.h - COFF short import file implementation -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// COFF short import file is a special kind of file which contains +// only symbol names for DLL-exported symbols. This class implements +// SymbolicFile interface for the file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_COFF_IMPORT_FILE_H +#define LLVM_OBJECT_COFF_IMPORT_FILE_H + +#include "llvm/Object/COFF.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace object { + +class COFFImportFile : public SymbolicFile { +public: + COFFImportFile(MemoryBufferRef Source) + : SymbolicFile(ID_COFFImportFile, Source) {} + + static inline bool classof(Binary const *V) { return V->isCOFFImportFile(); } + + void moveSymbolNext(DataRefImpl &Symb) const override { ++Symb.p; } + + std::error_code printSymbolName(raw_ostream &OS, + DataRefImpl Symb) const override { + if (Symb.p == 0) + OS << "__imp_"; + OS << StringRef(Data.getBufferStart() + sizeof(coff_import_header)); + return std::error_code(); + } + + uint32_t getSymbolFlags(DataRefImpl Symb) const override { + return SymbolRef::SF_Global; + } + + basic_symbol_iterator symbol_begin_impl() const override { + return BasicSymbolRef(DataRefImpl(), this); + } + + basic_symbol_iterator symbol_end_impl() const override { + DataRefImpl Symb; + Symb.p = isCode() ? 2 : 1; + return BasicSymbolRef(Symb, this); + } + + const coff_import_header *getCOFFImportHeader() const { + return reinterpret_cast( + Data.getBufferStart()); + } + +private: + bool isCode() const { + return getCOFFImportHeader()->getType() == COFF::IMPORT_CODE; + } +}; + +} // namespace object +} // namespace llvm + +#endif diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h index cc271851e6b0..b0eaa3f5ed4d 100644 --- a/include/llvm/Object/ELF.h +++ b/include/llvm/Object/ELF.h @@ -14,25 +14,9 @@ #ifndef LLVM_OBJECT_ELF_H #define LLVM_OBJECT_ELF_H -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/IntervalMap.h" -#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" #include "llvm/Object/ELFTypes.h" -#include "llvm/Object/Error.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include namespace llvm { namespace object { @@ -56,78 +40,6 @@ public: typedef typename std::conditional::type uintX_t; - /// \brief Iterate over constant sized entities. - template - class ELFEntityIterator { - public: - typedef ptrdiff_t difference_type; - typedef EntT value_type; - typedef std::forward_iterator_tag iterator_category; - typedef value_type &reference; - typedef value_type *pointer; - - /// \brief Default construct iterator. - ELFEntityIterator() : EntitySize(0), Current(nullptr) {} - ELFEntityIterator(uintX_t EntSize, const char *Start) - : EntitySize(EntSize), Current(Start) {} - - reference operator *() { - assert(Current && "Attempted to dereference an invalid iterator!"); - return *reinterpret_cast(Current); - } - - pointer operator ->() { - assert(Current && "Attempted to dereference an invalid iterator!"); - return reinterpret_cast(Current); - } - - bool operator ==(const ELFEntityIterator &Other) { - return Current == Other.Current; - } - - bool operator !=(const ELFEntityIterator &Other) { - return !(*this == Other); - } - - ELFEntityIterator &operator ++() { - assert(Current && "Attempted to increment an invalid iterator!"); - Current += EntitySize; - return *this; - } - - ELFEntityIterator &operator+(difference_type n) { - assert(Current && "Attempted to increment an invalid iterator!"); - Current += (n * EntitySize); - return *this; - } - - ELFEntityIterator &operator-(difference_type n) { - assert(Current && "Attempted to subtract an invalid iterator!"); - Current -= (n * EntitySize); - return *this; - } - - ELFEntityIterator operator ++(int) { - ELFEntityIterator Tmp = *this; - ++*this; - return Tmp; - } - - difference_type operator -(const ELFEntityIterator &Other) const { - assert(EntitySize == Other.EntitySize && - "Subtracting iterators of different EntitySize!"); - return (Current - Other.Current) / EntitySize; - } - - const char *get() const { return Current; } - - uintX_t getEntSize() const { return EntitySize; } - - private: - uintX_t EntitySize; - const char *Current; - }; - typedef Elf_Ehdr_Impl Elf_Ehdr; typedef Elf_Shdr_Impl Elf_Shdr; typedef Elf_Sym_Impl Elf_Sym; @@ -141,98 +53,22 @@ public: typedef Elf_Vernaux_Impl Elf_Vernaux; typedef Elf_Versym_Impl Elf_Versym; typedef Elf_Hash_Impl Elf_Hash; - typedef ELFEntityIterator Elf_Dyn_Iter; - typedef iterator_range Elf_Dyn_Range; - typedef ELFEntityIterator Elf_Rela_Iter; - typedef ELFEntityIterator Elf_Rel_Iter; + typedef Elf_GnuHash_Impl Elf_GnuHash; + typedef iterator_range Elf_Dyn_Range; typedef iterator_range Elf_Shdr_Range; - - /// \brief Archive files are 2 byte aligned, so we need this for - /// PointerIntPair to work. - template - class ArchivePointerTypeTraits { - public: - static inline const void *getAsVoidPointer(T *P) { return P; } - static inline T *getFromVoidPointer(const void *P) { - return static_cast(P); - } - enum { NumLowBitsAvailable = 1 }; - }; - typedef iterator_range Elf_Sym_Range; -private: - typedef SmallVector Sections_t; - typedef DenseMap IndexMap_t; - - StringRef Buf; - const uint8_t *base() const { return reinterpret_cast(Buf.data()); } +private: + + StringRef Buf; + const Elf_Ehdr *Header; const Elf_Shdr *SectionHeaderTable = nullptr; StringRef DotShstrtab; // Section header string table. - StringRef DotStrtab; // Symbol header string table. - const Elf_Shdr *dot_symtab_sec = nullptr; // Symbol table section. - const Elf_Shdr *DotDynSymSec = nullptr; // Dynamic symbol table section. - const Elf_Hash *HashTable = nullptr; - - const Elf_Shdr *SymbolTableSectionHeaderIndex = nullptr; - DenseMap ExtendedSymbolTable; - - const Elf_Shdr *dot_gnu_version_sec = nullptr; // .gnu.version - const Elf_Shdr *dot_gnu_version_r_sec = nullptr; // .gnu.version_r - const Elf_Shdr *dot_gnu_version_d_sec = nullptr; // .gnu.version_d - - /// \brief Represents a region described by entries in the .dynamic table. - struct DynRegionInfo { - DynRegionInfo() : Addr(nullptr), Size(0), EntSize(0) {} - /// \brief Address in current address space. - const void *Addr; - /// \brief Size in bytes of the region. - uintX_t Size; - /// \brief Size of each entity in the region. - uintX_t EntSize; - }; - - DynRegionInfo DynamicRegion; - DynRegionInfo DynHashRegion; - DynRegionInfo DynStrRegion; - DynRegionInfo DynRelaRegion; - - // Pointer to SONAME entry in dynamic string table - // This is set the first time getLoadName is called. - mutable const char *dt_soname = nullptr; - - // Records for each version index the corresponding Verdef or Vernaux entry. - // This is filled the first time LoadVersionMap() is called. - class VersionMapEntry : public PointerIntPair { - public: - // If the integer is 0, this is an Elf_Verdef*. - // If the integer is 1, this is an Elf_Vernaux*. - VersionMapEntry() : PointerIntPair(nullptr, 0) { } - VersionMapEntry(const Elf_Verdef *verdef) - : PointerIntPair(verdef, 0) { } - VersionMapEntry(const Elf_Vernaux *vernaux) - : PointerIntPair(vernaux, 1) { } - bool isNull() const { return getPointer() == nullptr; } - bool isVerdef() const { return !isNull() && getInt() == 0; } - bool isVernaux() const { return !isNull() && getInt() == 1; } - const Elf_Verdef *getVerdef() const { - return isVerdef() ? (const Elf_Verdef*)getPointer() : nullptr; - } - const Elf_Vernaux *getVernaux() const { - return isVernaux() ? (const Elf_Vernaux*)getPointer() : nullptr; - } - }; - mutable SmallVector VersionMap; - void LoadVersionDefs(const Elf_Shdr *sec) const; - void LoadVersionNeeds(const Elf_Shdr *ec) const; - void LoadVersionMap() const; - - void scanDynamicTable(); public: template @@ -240,25 +76,20 @@ public: template const T *getEntry(const Elf_Shdr *Section, uint32_t Entry) const; - const Elf_Shdr *getDotSymtabSec() const { return dot_symtab_sec; } - const Elf_Shdr *getDotDynSymSec() const { return DotDynSymSec; } - const Elf_Hash *getHashTable() const { return HashTable; } - ErrorOr getStringTable(const Elf_Shdr *Section) const; - const char *getDynamicString(uintX_t Offset) const; - ErrorOr getSymbolVersion(const Elf_Shdr *section, - const Elf_Sym *Symb, - bool &IsDefault) const; + ErrorOr getStringTableForSymtab(const Elf_Shdr &Section) const; + + ErrorOr> getSHNDXTable(const Elf_Shdr &Section) const; + void VerifyStrTab(const Elf_Shdr *sh) const; StringRef getRelocationTypeName(uint32_t Type) const; void getRelocationTypeName(uint32_t Type, SmallVectorImpl &Result) const; - /// \brief Get the symbol table section and symbol for a given relocation. - template - std::pair - getRelocationSymbol(const Elf_Shdr *RelSec, const RelT *Rel) const; + /// \brief Get the symbol for a given relocation. + const Elf_Sym *getRelocationSymbol(const Elf_Rel *Rel, + const Elf_Shdr *SymTab) const; ELFFile(StringRef Object, std::error_code &EC); @@ -273,111 +104,116 @@ public: Header->getDataEncoding() == ELF::ELFDATA2LSB; } + ErrorOr dynamic_table_begin(const Elf_Phdr *Phdr) const; + ErrorOr dynamic_table_end(const Elf_Phdr *Phdr) const; + ErrorOr dynamic_table(const Elf_Phdr *Phdr) const { + ErrorOr Begin = dynamic_table_begin(Phdr); + if (std::error_code EC = Begin.getError()) + return EC; + ErrorOr End = dynamic_table_end(Phdr); + if (std::error_code EC = End.getError()) + return EC; + return make_range(*Begin, *End); + } + const Elf_Shdr *section_begin() const; const Elf_Shdr *section_end() const; Elf_Shdr_Range sections() const { return make_range(section_begin(), section_end()); } - const Elf_Sym *symbol_begin() const; - const Elf_Sym *symbol_end() const; - Elf_Sym_Range symbols() const { - return make_range(symbol_begin(), symbol_end()); - } - - Elf_Dyn_Iter dynamic_table_begin() const; - /// \param NULLEnd use one past the first DT_NULL entry as the end instead of - /// the section size. - Elf_Dyn_Iter dynamic_table_end(bool NULLEnd = false) const; - Elf_Dyn_Range dynamic_table(bool NULLEnd = false) const { - return make_range(dynamic_table_begin(), dynamic_table_end(NULLEnd)); - } - - const Elf_Sym *dynamic_symbol_begin() const { - if (!DotDynSymSec) + const Elf_Sym *symbol_begin(const Elf_Shdr *Sec) const { + if (!Sec) return nullptr; - if (DotDynSymSec->sh_entsize != sizeof(Elf_Sym)) + if (Sec->sh_entsize != sizeof(Elf_Sym)) report_fatal_error("Invalid symbol size"); - return reinterpret_cast(base() + DotDynSymSec->sh_offset); + return reinterpret_cast(base() + Sec->sh_offset); } - - const Elf_Sym *dynamic_symbol_end() const { - if (!DotDynSymSec) + const Elf_Sym *symbol_end(const Elf_Shdr *Sec) const { + if (!Sec) return nullptr; - return reinterpret_cast(base() + DotDynSymSec->sh_offset + - DotDynSymSec->sh_size); + uint64_t Size = Sec->sh_size; + if (Size % sizeof(Elf_Sym)) + report_fatal_error("Invalid symbol table size"); + return symbol_begin(Sec) + Size / sizeof(Elf_Sym); + } + Elf_Sym_Range symbols(const Elf_Shdr *Sec) const { + return make_range(symbol_begin(Sec), symbol_end(Sec)); } - Elf_Sym_Range dynamic_symbols() const { - return make_range(dynamic_symbol_begin(), dynamic_symbol_end()); + typedef iterator_range Elf_Rela_Range; + + const Elf_Rela *rela_begin(const Elf_Shdr *sec) const { + if (sec->sh_entsize != sizeof(Elf_Rela)) + report_fatal_error("Invalid relocation entry size"); + return reinterpret_cast(base() + sec->sh_offset); } - Elf_Rela_Iter dyn_rela_begin() const { - if (DynRelaRegion.Addr) - return Elf_Rela_Iter(DynRelaRegion.EntSize, - (const char *)DynRelaRegion.Addr); - return Elf_Rela_Iter(0, nullptr); + const Elf_Rela *rela_end(const Elf_Shdr *sec) const { + uint64_t Size = sec->sh_size; + if (Size % sizeof(Elf_Rela)) + report_fatal_error("Invalid relocation table size"); + return rela_begin(sec) + Size / sizeof(Elf_Rela); } - Elf_Rela_Iter dyn_rela_end() const { - if (DynRelaRegion.Addr) - return Elf_Rela_Iter( - DynRelaRegion.EntSize, - (const char *)DynRelaRegion.Addr + DynRelaRegion.Size); - return Elf_Rela_Iter(0, nullptr); + Elf_Rela_Range relas(const Elf_Shdr *Sec) const { + return make_range(rela_begin(Sec), rela_end(Sec)); } - Elf_Rela_Iter rela_begin(const Elf_Shdr *sec) const { - return Elf_Rela_Iter(sec->sh_entsize, - (const char *)(base() + sec->sh_offset)); + const Elf_Rel *rel_begin(const Elf_Shdr *sec) const { + if (sec->sh_entsize != sizeof(Elf_Rel)) + report_fatal_error("Invalid relocation entry size"); + return reinterpret_cast(base() + sec->sh_offset); } - Elf_Rela_Iter rela_end(const Elf_Shdr *sec) const { - return Elf_Rela_Iter( - sec->sh_entsize, - (const char *)(base() + sec->sh_offset + sec->sh_size)); + const Elf_Rel *rel_end(const Elf_Shdr *sec) const { + uint64_t Size = sec->sh_size; + if (Size % sizeof(Elf_Rel)) + report_fatal_error("Invalid relocation table size"); + return rel_begin(sec) + Size / sizeof(Elf_Rel); } - Elf_Rel_Iter rel_begin(const Elf_Shdr *sec) const { - return Elf_Rel_Iter(sec->sh_entsize, - (const char *)(base() + sec->sh_offset)); - } - - Elf_Rel_Iter rel_end(const Elf_Shdr *sec) const { - return Elf_Rel_Iter(sec->sh_entsize, - (const char *)(base() + sec->sh_offset + sec->sh_size)); + typedef iterator_range Elf_Rel_Range; + Elf_Rel_Range rels(const Elf_Shdr *Sec) const { + return make_range(rel_begin(Sec), rel_end(Sec)); } /// \brief Iterate over program header table. - typedef ELFEntityIterator Elf_Phdr_Iter; - - Elf_Phdr_Iter program_header_begin() const { - return Elf_Phdr_Iter(Header->e_phentsize, - (const char*)base() + Header->e_phoff); + const Elf_Phdr *program_header_begin() const { + if (Header->e_phnum && Header->e_phentsize != sizeof(Elf_Phdr)) + report_fatal_error("Invalid program header size"); + return reinterpret_cast(base() + Header->e_phoff); } - Elf_Phdr_Iter program_header_end() const { - return Elf_Phdr_Iter(Header->e_phentsize, - (const char*)base() + - Header->e_phoff + - (Header->e_phnum * Header->e_phentsize)); + const Elf_Phdr *program_header_end() const { + return program_header_begin() + Header->e_phnum; + } + + typedef iterator_range Elf_Phdr_Range; + + const Elf_Phdr_Range program_headers() const { + return make_range(program_header_begin(), program_header_end()); } uint64_t getNumSections() const; uintX_t getStringTableIndex() const; - ELF::Elf64_Word getExtendedSymbolTableIndex(const Elf_Sym *symb) const; + uint32_t getExtendedSymbolTableIndex(const Elf_Sym *Sym, + const Elf_Shdr *SymTab, + ArrayRef ShndxTable) const; const Elf_Ehdr *getHeader() const { return Header; } - ErrorOr getSection(const Elf_Sym *symb) const; + ErrorOr getSection(const Elf_Sym *Sym, + const Elf_Shdr *SymTab, + ArrayRef ShndxTable) const; ErrorOr getSection(uint32_t Index) const; - const Elf_Sym *getSymbol(uint32_t index) const; - ErrorOr getStaticSymbolName(const Elf_Sym *Symb) const; - ErrorOr getDynamicSymbolName(const Elf_Sym *Symb) const; - ErrorOr getSymbolName(const Elf_Sym *Symb, bool IsDynamic) const; + const Elf_Sym *getSymbol(const Elf_Shdr *Sec, uint32_t Index) const { + return &*(symbol_begin(Sec) + Index); + } ErrorOr getSectionName(const Elf_Shdr *Section) const; + template + ErrorOr> getSectionContentsAsArray(const Elf_Shdr *Sec) const; ErrorOr > getSectionContents(const Elf_Shdr *Sec) const; - StringRef getLoadName() const; }; typedef ELFFile> ELF32LEFile; @@ -385,118 +221,50 @@ typedef ELFFile> ELF64LEFile; typedef ELFFile> ELF32BEFile; typedef ELFFile> ELF64BEFile; -// Iterate through the version definitions, and place each Elf_Verdef -// in the VersionMap according to its index. template -void ELFFile::LoadVersionDefs(const Elf_Shdr *sec) const { - unsigned vd_size = sec->sh_size; // Size of section in bytes - unsigned vd_count = sec->sh_info; // Number of Verdef entries - const char *sec_start = (const char*)base() + sec->sh_offset; - const char *sec_end = sec_start + vd_size; - // The first Verdef entry is at the start of the section. - const char *p = sec_start; - for (unsigned i = 0; i < vd_count; i++) { - if (p + sizeof(Elf_Verdef) > sec_end) - report_fatal_error("Section ended unexpectedly while scanning " - "version definitions."); - const Elf_Verdef *vd = reinterpret_cast(p); - if (vd->vd_version != ELF::VER_DEF_CURRENT) - report_fatal_error("Unexpected verdef version"); - size_t index = vd->vd_ndx & ELF::VERSYM_VERSION; - if (index >= VersionMap.size()) - VersionMap.resize(index + 1); - VersionMap[index] = VersionMapEntry(vd); - p += vd->vd_next; - } -} +uint32_t ELFFile::getExtendedSymbolTableIndex( + const Elf_Sym *Sym, const Elf_Shdr *SymTab, + ArrayRef ShndxTable) const { + assert(Sym->st_shndx == ELF::SHN_XINDEX); + unsigned Index = Sym - symbol_begin(SymTab); -// Iterate through the versions needed section, and place each Elf_Vernaux -// in the VersionMap according to its index. -template -void ELFFile::LoadVersionNeeds(const Elf_Shdr *sec) const { - unsigned vn_size = sec->sh_size; // Size of section in bytes - unsigned vn_count = sec->sh_info; // Number of Verneed entries - const char *sec_start = (const char *)base() + sec->sh_offset; - const char *sec_end = sec_start + vn_size; - // The first Verneed entry is at the start of the section. - const char *p = sec_start; - for (unsigned i = 0; i < vn_count; i++) { - if (p + sizeof(Elf_Verneed) > sec_end) - report_fatal_error("Section ended unexpectedly while scanning " - "version needed records."); - const Elf_Verneed *vn = reinterpret_cast(p); - if (vn->vn_version != ELF::VER_NEED_CURRENT) - report_fatal_error("Unexpected verneed version"); - // Iterate through the Vernaux entries - const char *paux = p + vn->vn_aux; - for (unsigned j = 0; j < vn->vn_cnt; j++) { - if (paux + sizeof(Elf_Vernaux) > sec_end) - report_fatal_error("Section ended unexpected while scanning auxiliary " - "version needed records."); - const Elf_Vernaux *vna = reinterpret_cast(paux); - size_t index = vna->vna_other & ELF::VERSYM_VERSION; - if (index >= VersionMap.size()) - VersionMap.resize(index + 1); - VersionMap[index] = VersionMapEntry(vna); - paux += vna->vna_next; - } - p += vn->vn_next; - } -} - -template -void ELFFile::LoadVersionMap() const { - // If there is no dynamic symtab or version table, there is nothing to do. - if (!DotDynSymSec || !dot_gnu_version_sec) - return; - - // Has the VersionMap already been loaded? - if (VersionMap.size() > 0) - return; - - // The first two version indexes are reserved. - // Index 0 is LOCAL, index 1 is GLOBAL. - VersionMap.push_back(VersionMapEntry()); - VersionMap.push_back(VersionMapEntry()); - - if (dot_gnu_version_d_sec) - LoadVersionDefs(dot_gnu_version_d_sec); - - if (dot_gnu_version_r_sec) - LoadVersionNeeds(dot_gnu_version_r_sec); -} - -template -ELF::Elf64_Word -ELFFile::getExtendedSymbolTableIndex(const Elf_Sym *symb) const { - assert(symb->st_shndx == ELF::SHN_XINDEX); - return ExtendedSymbolTable.lookup(symb); + // The size of the table was checked in getSHNDXTable. + return ShndxTable[Index]; } template ErrorOr::Elf_Shdr *> -ELFFile::getSection(const Elf_Sym *symb) const { - uint32_t Index = symb->st_shndx; +ELFFile::getSection(const Elf_Sym *Sym, const Elf_Shdr *SymTab, + ArrayRef ShndxTable) const { + uint32_t Index = Sym->st_shndx; if (Index == ELF::SHN_XINDEX) - return getSection(ExtendedSymbolTable.lookup(symb)); + return getSection(getExtendedSymbolTableIndex(Sym, SymTab, ShndxTable)); + if (Index == ELF::SHN_UNDEF || Index >= ELF::SHN_LORESERVE) return nullptr; - return getSection(symb->st_shndx); + return getSection(Sym->st_shndx); } template -const typename ELFFile::Elf_Sym * -ELFFile::getSymbol(uint32_t Index) const { - return &*(symbol_begin() + Index); -} +template +ErrorOr> +ELFFile::getSectionContentsAsArray(const Elf_Shdr *Sec) const { + uintX_t Offset = Sec->sh_offset; + uintX_t Size = Sec->sh_size; -template -ErrorOr > -ELFFile::getSectionContents(const Elf_Shdr *Sec) const { - if (Sec->sh_offset + Sec->sh_size > Buf.size()) + if (Size % sizeof(T)) return object_error::parse_failed; - const uint8_t *Start = base() + Sec->sh_offset; - return makeArrayRef(Start, Sec->sh_size); + if (Offset + Size > Buf.size()) + return object_error::parse_failed; + + const T *Start = reinterpret_cast(base() + Offset); + return makeArrayRef(Start, Size / sizeof(T)); +} + +template +ErrorOr> +ELFFile::getSectionContents(const Elf_Shdr *Sec) const { + return getSectionContentsAsArray(Sec); } template @@ -536,18 +304,13 @@ void ELFFile::getRelocationTypeName(uint32_t Type, } template -template -std::pair::Elf_Shdr *, - const typename ELFFile::Elf_Sym *> -ELFFile::getRelocationSymbol(const Elf_Shdr *Sec, const RelT *Rel) const { - if (!Sec->sh_link) - return std::make_pair(nullptr, nullptr); - ErrorOr SymTableOrErr = getSection(Sec->sh_link); - if (std::error_code EC = SymTableOrErr.getError()) - report_fatal_error(EC.message()); - const Elf_Shdr *SymTable = *SymTableOrErr; - return std::make_pair( - SymTable, getEntry(SymTable, Rel->getSymbol(isMips64EL()))); +const typename ELFFile::Elf_Sym * +ELFFile::getRelocationSymbol(const Elf_Rel *Rel, + const Elf_Shdr *SymTab) const { + uint32_t Index = Rel->getSymbol(isMips64EL()); + if (Index == 0) + return nullptr; + return getEntry(SymTab, Index); } template @@ -584,10 +347,8 @@ ELFFile::ELFFile(StringRef Object, std::error_code &EC) Header = reinterpret_cast(base()); - if (Header->e_shoff == 0) { - scanDynamicTable(); + if (Header->e_shoff == 0) return; - } const uint64_t SectionTableOffset = Header->e_shoff; @@ -608,185 +369,25 @@ ELFFile::ELFFile(StringRef Object, std::error_code &EC) return; } - // Scan sections for special sections. - - for (const Elf_Shdr &Sec : sections()) { - switch (Sec.sh_type) { - case ELF::SHT_HASH: - if (HashTable) { - EC = object_error::parse_failed; - return; - } - HashTable = reinterpret_cast(base() + Sec.sh_offset); - break; - case ELF::SHT_SYMTAB_SHNDX: - if (SymbolTableSectionHeaderIndex) { - // More than one .symtab_shndx! - EC = object_error::parse_failed; - return; - } - SymbolTableSectionHeaderIndex = &Sec; - break; - case ELF::SHT_SYMTAB: { - if (dot_symtab_sec) { - // More than one .symtab! - EC = object_error::parse_failed; - return; - } - dot_symtab_sec = &Sec; - ErrorOr SectionOrErr = getSection(Sec.sh_link); - if ((EC = SectionOrErr.getError())) - return; - ErrorOr SymtabOrErr = getStringTable(*SectionOrErr); - if ((EC = SymtabOrErr.getError())) - return; - DotStrtab = *SymtabOrErr; - } break; - case ELF::SHT_DYNSYM: { - if (DotDynSymSec) { - // More than one .dynsym! - EC = object_error::parse_failed; - return; - } - DotDynSymSec = &Sec; - ErrorOr SectionOrErr = getSection(Sec.sh_link); - if ((EC = SectionOrErr.getError())) - return; - ErrorOr SymtabOrErr = getStringTable(*SectionOrErr); - if ((EC = SymtabOrErr.getError())) - return; - DynStrRegion.Addr = SymtabOrErr->data(); - DynStrRegion.Size = SymtabOrErr->size(); - DynStrRegion.EntSize = 1; - break; - } - case ELF::SHT_DYNAMIC: - if (DynamicRegion.Addr) { - // More than one .dynamic! - EC = object_error::parse_failed; - return; - } - DynamicRegion.Addr = base() + Sec.sh_offset; - DynamicRegion.Size = Sec.sh_size; - DynamicRegion.EntSize = Sec.sh_entsize; - break; - case ELF::SHT_GNU_versym: - if (dot_gnu_version_sec != nullptr) { - // More than one .gnu.version section! - EC = object_error::parse_failed; - return; - } - dot_gnu_version_sec = &Sec; - break; - case ELF::SHT_GNU_verdef: - if (dot_gnu_version_d_sec != nullptr) { - // More than one .gnu.version_d section! - EC = object_error::parse_failed; - return; - } - dot_gnu_version_d_sec = &Sec; - break; - case ELF::SHT_GNU_verneed: - if (dot_gnu_version_r_sec != nullptr) { - // More than one .gnu.version_r section! - EC = object_error::parse_failed; - return; - } - dot_gnu_version_r_sec = &Sec; - break; - } - } - // Get string table sections. - ErrorOr StrTabSecOrErr = getSection(getStringTableIndex()); - if ((EC = StrTabSecOrErr.getError())) - return; + uintX_t StringTableIndex = getStringTableIndex(); + if (StringTableIndex) { + ErrorOr StrTabSecOrErr = getSection(StringTableIndex); + if ((EC = StrTabSecOrErr.getError())) + return; - ErrorOr SymtabOrErr = getStringTable(*StrTabSecOrErr); - if ((EC = SymtabOrErr.getError())) - return; - DotShstrtab = *SymtabOrErr; - - // Build symbol name side-mapping if there is one. - if (SymbolTableSectionHeaderIndex) { - const Elf_Word *ShndxTable = reinterpret_cast(base() + - SymbolTableSectionHeaderIndex->sh_offset); - for (const Elf_Sym &S : symbols()) { - if (*ShndxTable != ELF::SHN_UNDEF) - ExtendedSymbolTable[&S] = *ShndxTable; - ++ShndxTable; - } + ErrorOr StringTableOrErr = getStringTable(*StrTabSecOrErr); + if ((EC = StringTableOrErr.getError())) + return; + DotShstrtab = *StringTableOrErr; } - scanDynamicTable(); - EC = std::error_code(); } template -void ELFFile::scanDynamicTable() { - // Build load-address to file-offset map. - typedef IntervalMap< - uintX_t, uintptr_t, - IntervalMapImpl::NodeSizer::LeafSize, - IntervalMapHalfOpenInfo> LoadMapT; - typename LoadMapT::Allocator Alloc; - // Allocate the IntervalMap on the heap to work around MSVC bug where the - // stack doesn't get realigned despite LoadMap having alignment 8 (PR24113). - std::unique_ptr LoadMap(new LoadMapT(Alloc)); - - for (Elf_Phdr_Iter PhdrI = program_header_begin(), - PhdrE = program_header_end(); - PhdrI != PhdrE; ++PhdrI) { - if (PhdrI->p_type == ELF::PT_DYNAMIC) { - DynamicRegion.Addr = base() + PhdrI->p_offset; - DynamicRegion.Size = PhdrI->p_filesz; - DynamicRegion.EntSize = sizeof(Elf_Dyn); - continue; - } - if (PhdrI->p_type != ELF::PT_LOAD) - continue; - if (PhdrI->p_filesz == 0) - continue; - LoadMap->insert(PhdrI->p_vaddr, PhdrI->p_vaddr + PhdrI->p_filesz, - PhdrI->p_offset); - } - - auto toMappedAddr = [&](uint64_t VAddr) -> const uint8_t * { - auto I = LoadMap->find(VAddr); - if (I == LoadMap->end()) - return nullptr; - return this->base() + I.value() + (VAddr - I.start()); - }; - - for (Elf_Dyn_Iter DynI = dynamic_table_begin(), DynE = dynamic_table_end(); - DynI != DynE; ++DynI) { - switch (DynI->d_tag) { - case ELF::DT_HASH: - if (HashTable) - continue; - HashTable = - reinterpret_cast(toMappedAddr(DynI->getPtr())); - break; - case ELF::DT_STRTAB: - if (!DynStrRegion.Addr) - DynStrRegion.Addr = toMappedAddr(DynI->getPtr()); - break; - case ELF::DT_STRSZ: - if (!DynStrRegion.Size) - DynStrRegion.Size = DynI->getVal(); - break; - case ELF::DT_RELA: - if (!DynRelaRegion.Addr) - DynRelaRegion.Addr = toMappedAddr(DynI->getPtr()); - break; - case ELF::DT_RELASZ: - DynRelaRegion.Size = DynI->getVal(); - break; - case ELF::DT_RELAENT: - DynRelaRegion.EntSize = DynI->getVal(); - } - } +static bool compareAddr(uint64_t VAddr, const Elf_Phdr_Impl *Phdr) { + return VAddr < Phdr->p_vaddr; } template @@ -803,64 +404,31 @@ const typename ELFFile::Elf_Shdr *ELFFile::section_end() const { } template -const typename ELFFile::Elf_Sym *ELFFile::symbol_begin() const { - if (!dot_symtab_sec) +ErrorOr::Elf_Dyn *> +ELFFile::dynamic_table_begin(const Elf_Phdr *Phdr) const { + if (!Phdr) return nullptr; - if (dot_symtab_sec->sh_entsize != sizeof(Elf_Sym)) - report_fatal_error("Invalid symbol size"); - return reinterpret_cast(base() + dot_symtab_sec->sh_offset); + assert(Phdr->p_type == ELF::PT_DYNAMIC && "Got the wrong program header"); + uintX_t Offset = Phdr->p_offset; + if (Offset > Buf.size()) + return object_error::parse_failed; + return reinterpret_cast(base() + Offset); } template -const typename ELFFile::Elf_Sym *ELFFile::symbol_end() const { - if (!dot_symtab_sec) +ErrorOr::Elf_Dyn *> +ELFFile::dynamic_table_end(const Elf_Phdr *Phdr) const { + if (!Phdr) return nullptr; - return reinterpret_cast(base() + dot_symtab_sec->sh_offset + - dot_symtab_sec->sh_size); -} - -template -typename ELFFile::Elf_Dyn_Iter -ELFFile::dynamic_table_begin() const { - if (DynamicRegion.Addr) - return Elf_Dyn_Iter(DynamicRegion.EntSize, - (const char *)DynamicRegion.Addr); - return Elf_Dyn_Iter(0, nullptr); -} - -template -typename ELFFile::Elf_Dyn_Iter -ELFFile::dynamic_table_end(bool NULLEnd) const { - if (!DynamicRegion.Addr) - return Elf_Dyn_Iter(0, nullptr); - Elf_Dyn_Iter Ret(DynamicRegion.EntSize, - (const char *)DynamicRegion.Addr + DynamicRegion.Size); - - if (NULLEnd) { - Elf_Dyn_Iter Start = dynamic_table_begin(); - while (Start != Ret && Start->getTag() != ELF::DT_NULL) - ++Start; - - // Include the DT_NULL. - if (Start != Ret) - ++Start; - Ret = Start; - } - return Ret; -} - -template -StringRef ELFFile::getLoadName() const { - if (!dt_soname) { - dt_soname = ""; - // Find the DT_SONAME entry - for (const auto &Entry : dynamic_table()) - if (Entry.getTag() == ELF::DT_SONAME) { - dt_soname = getDynamicString(Entry.getVal()); - break; - } - } - return dt_soname; + assert(Phdr->p_type == ELF::PT_DYNAMIC && "Got the wrong program header"); + uintX_t Size = Phdr->p_filesz; + if (Size % sizeof(Elf_Dyn)) + return object_error::elf_invalid_dynamic_table_size; + // FIKME: Check for overflow? + uintX_t End = Phdr->p_offset + Size; + if (End > Buf.size()) + return object_error::parse_failed; + return reinterpret_cast(base() + End); } template @@ -908,127 +476,52 @@ ELFFile::getStringTable(const Elf_Shdr *Section) const { } template -const char *ELFFile::getDynamicString(uintX_t Offset) const { - if (Offset >= DynStrRegion.Size) - return nullptr; - return (const char *)DynStrRegion.Addr + Offset; +ErrorOr::Elf_Word>> +ELFFile::getSHNDXTable(const Elf_Shdr &Section) const { + assert(Section.sh_type == ELF::SHT_SYMTAB_SHNDX); + const Elf_Word *ShndxTableBegin = + reinterpret_cast(base() + Section.sh_offset); + uintX_t Size = Section.sh_size; + if (Size % sizeof(uint32_t)) + return object_error::parse_failed; + uintX_t NumSymbols = Size / sizeof(uint32_t); + const Elf_Word *ShndxTableEnd = ShndxTableBegin + NumSymbols; + if (reinterpret_cast(ShndxTableEnd) > Buf.end()) + return object_error::parse_failed; + ErrorOr SymTableOrErr = getSection(Section.sh_link); + if (std::error_code EC = SymTableOrErr.getError()) + return EC; + const Elf_Shdr &SymTable = **SymTableOrErr; + if (SymTable.sh_type != ELF::SHT_SYMTAB && + SymTable.sh_type != ELF::SHT_DYNSYM) + return object_error::parse_failed; + if (NumSymbols != (SymTable.sh_size / sizeof(Elf_Sym))) + return object_error::parse_failed; + return makeArrayRef(ShndxTableBegin, ShndxTableEnd); } template ErrorOr -ELFFile::getStaticSymbolName(const Elf_Sym *Symb) const { - return Symb->getName(DotStrtab); -} - -template -ErrorOr -ELFFile::getDynamicSymbolName(const Elf_Sym *Symb) const { - return StringRef(getDynamicString(Symb->st_name)); -} - -template -ErrorOr ELFFile::getSymbolName(const Elf_Sym *Symb, - bool IsDynamic) const { - if (IsDynamic) - return getDynamicSymbolName(Symb); - return getStaticSymbolName(Symb); +ELFFile::getStringTableForSymtab(const Elf_Shdr &Sec) const { + if (Sec.sh_type != ELF::SHT_SYMTAB && Sec.sh_type != ELF::SHT_DYNSYM) + return object_error::parse_failed; + ErrorOr SectionOrErr = getSection(Sec.sh_link); + if (std::error_code EC = SectionOrErr.getError()) + return EC; + return getStringTable(*SectionOrErr); } template ErrorOr ELFFile::getSectionName(const Elf_Shdr *Section) const { uint32_t Offset = Section->sh_name; + if (Offset == 0) + return StringRef(); if (Offset >= DotShstrtab.size()) return object_error::parse_failed; return StringRef(DotShstrtab.data() + Offset); } -template -ErrorOr ELFFile::getSymbolVersion(const Elf_Shdr *section, - const Elf_Sym *symb, - bool &IsDefault) const { - StringRef StrTab; - if (section) { - ErrorOr StrTabOrErr = getStringTable(section); - if (std::error_code EC = StrTabOrErr.getError()) - return EC; - StrTab = *StrTabOrErr; - } - // Handle non-dynamic symbols. - if (section != DotDynSymSec && section != nullptr) { - // Non-dynamic symbols can have versions in their names - // A name of the form 'foo@V1' indicates version 'V1', non-default. - // A name of the form 'foo@@V2' indicates version 'V2', default version. - ErrorOr SymName = symb->getName(StrTab); - if (!SymName) - return SymName; - StringRef Name = *SymName; - size_t atpos = Name.find('@'); - if (atpos == StringRef::npos) { - IsDefault = false; - return StringRef(""); - } - ++atpos; - if (atpos < Name.size() && Name[atpos] == '@') { - IsDefault = true; - ++atpos; - } else { - IsDefault = false; - } - return Name.substr(atpos); - } - - // This is a dynamic symbol. Look in the GNU symbol version table. - if (!dot_gnu_version_sec) { - // No version table. - IsDefault = false; - return StringRef(""); - } - - // Determine the position in the symbol table of this entry. - size_t entry_index = - (reinterpret_cast(symb) - DotDynSymSec->sh_offset - - reinterpret_cast(base())) / - sizeof(Elf_Sym); - - // Get the corresponding version index entry - const Elf_Versym *vs = getEntry(dot_gnu_version_sec, entry_index); - size_t version_index = vs->vs_index & ELF::VERSYM_VERSION; - - // Special markers for unversioned symbols. - if (version_index == ELF::VER_NDX_LOCAL || - version_index == ELF::VER_NDX_GLOBAL) { - IsDefault = false; - return StringRef(""); - } - - // Lookup this symbol in the version table - LoadVersionMap(); - if (version_index >= VersionMap.size() || VersionMap[version_index].isNull()) - return object_error::parse_failed; - const VersionMapEntry &entry = VersionMap[version_index]; - - // Get the version name string - size_t name_offset; - if (entry.isVerdef()) { - // The first Verdaux entry holds the name. - name_offset = entry.getVerdef()->getAux()->vda_name; - } else { - name_offset = entry.getVernaux()->vna_name; - } - - // Set IsDefault - if (entry.isVerdef()) { - IsDefault = !(vs->vs_index & ELF::VERSYM_HIDDEN); - } else { - IsDefault = false; - } - - if (name_offset >= DynStrRegion.Size) - return object_error::parse_failed; - return StringRef(getDynamicString(name_offset)); -} - /// This function returns the hash value for a symbol in the .dynsym section /// Name of the API remains consistent as specified in the libelf /// REF : http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#hash diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h index 6e8ace427a20..5823848aaacb 100644 --- a/include/llvm/Object/ELFObjectFile.h +++ b/include/llvm/Object/ELFObjectFile.h @@ -189,11 +189,13 @@ public: typedef typename ELFFile::Elf_Rela Elf_Rela; typedef typename ELFFile::Elf_Dyn Elf_Dyn; - typedef typename ELFFile::Elf_Dyn_Iter Elf_Dyn_Iter; - protected: ELFFile EF; + const Elf_Shdr *DotDynSymSec = nullptr; // Dynamic symbol table section. + const Elf_Shdr *DotSymtabSec = nullptr; // Symbol table section. + ArrayRef ShndxTable; + void moveSymbolNext(DataRefImpl &Symb) const override; ErrorOr getSymbolName(DataRefImpl Symb) const override; ErrorOr getSymbolAddress(DataRefImpl Symb) const override; @@ -204,9 +206,9 @@ protected: uint8_t getSymbolOther(DataRefImpl Symb) const override; uint8_t getSymbolELFType(DataRefImpl Symb) const override; SymbolRef::Type getSymbolType(DataRefImpl Symb) const override; - section_iterator getSymbolSection(const Elf_Sym *Symb) const; - std::error_code getSymbolSection(DataRefImpl Symb, - section_iterator &Res) const override; + ErrorOr getSymbolSection(const Elf_Sym *Symb, + const Elf_Shdr *SymTab) const; + ErrorOr getSymbolSection(DataRefImpl Symb) const override; void moveSectionNext(DataRefImpl &Sec) const override; std::error_code getSectionName(DataRefImpl Sec, @@ -240,10 +242,6 @@ protected: return *EF.getSection(Rel.d.a); } - const Elf_Sym *toELFSymIter(DataRefImpl Sym) const { - return EF.template getEntry(Sym.d.a, Sym.d.b); - } - DataRefImpl toDRI(const Elf_Shdr *SymTable, unsigned SymbolNum) const { DataRefImpl DRI; if (!SymTable) { @@ -273,9 +271,9 @@ protected: return DRI; } - DataRefImpl toDRI(Elf_Dyn_Iter Dyn) const { + DataRefImpl toDRI(const Elf_Dyn *Dyn) const { DataRefImpl DRI; - DRI.p = reinterpret_cast(Dyn.get()); + DRI.p = reinterpret_cast(Dyn); return DRI; } @@ -304,7 +302,13 @@ public: const Elf_Rel *getRel(DataRefImpl Rel) const; const Elf_Rela *getRela(DataRefImpl Rela) const; - const Elf_Sym *getSymbol(DataRefImpl Symb) const; + const Elf_Sym *getSymbol(DataRefImpl Sym) const { + return EF.template getEntry(Sym.d.a, Sym.d.b); + } + + const Elf_Shdr *getSection(DataRefImpl Sec) const { + return reinterpret_cast(Sec.p); + } basic_symbol_iterator symbol_begin_impl() const override; basic_symbol_iterator symbol_end_impl() const override; @@ -320,7 +324,6 @@ public: uint8_t getBytesInAddress() const override; StringRef getFileFormatName() const override; unsigned getArch() const override; - StringRef getLoadName() const; std::error_code getPlatformFlags(unsigned &Result) const override { Result = EF.getHeader()->e_flags; @@ -352,7 +355,7 @@ void ELFObjectFile::moveSymbolNext(DataRefImpl &Sym) const { template ErrorOr ELFObjectFile::getSymbolName(DataRefImpl Sym) const { - const Elf_Sym *ESym = toELFSymIter(Sym); + const Elf_Sym *ESym = getSymbol(Sym); const Elf_Shdr *SymTableSec = *EF.getSection(Sym.d.a); const Elf_Shdr *StringTableSec = *EF.getSection(SymTableSec->sh_link); StringRef SymTable = *EF.getStringTable(StringTableSec); @@ -361,12 +364,12 @@ ErrorOr ELFObjectFile::getSymbolName(DataRefImpl Sym) const { template uint64_t ELFObjectFile::getSectionFlags(DataRefImpl Sec) const { - return toELFShdrIter(Sec)->sh_flags; + return getSection(Sec)->sh_flags; } template uint32_t ELFObjectFile::getSectionType(DataRefImpl Sec) const { - return toELFShdrIter(Sec)->sh_type; + return getSection(Sec)->sh_type; } template @@ -398,9 +401,11 @@ ELFObjectFile::getSymbolAddress(DataRefImpl Symb) const { } const Elf_Ehdr *Header = EF.getHeader(); + const Elf_Shdr *SymTab = *EF.getSection(Symb.d.a); if (Header->e_type == ELF::ET_REL) { - ErrorOr SectionOrErr = EF.getSection(ESym); + ErrorOr SectionOrErr = + EF.getSection(ESym, SymTab, ShndxTable); if (std::error_code EC = SectionOrErr.getError()) return EC; const Elf_Shdr *Section = *SectionOrErr; @@ -413,7 +418,7 @@ ELFObjectFile::getSymbolAddress(DataRefImpl Symb) const { template uint32_t ELFObjectFile::getSymbolAlignment(DataRefImpl Symb) const { - const Elf_Sym *Sym = toELFSymIter(Symb); + const Elf_Sym *Sym = getSymbol(Symb); if (Sym->st_shndx == ELF::SHN_COMMON) return Sym->st_value; return 0; @@ -421,22 +426,22 @@ uint32_t ELFObjectFile::getSymbolAlignment(DataRefImpl Symb) const { template uint64_t ELFObjectFile::getSymbolSize(DataRefImpl Sym) const { - return toELFSymIter(Sym)->st_size; + return getSymbol(Sym)->st_size; } template uint64_t ELFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const { - return toELFSymIter(Symb)->st_size; + return getSymbol(Symb)->st_size; } template uint8_t ELFObjectFile::getSymbolOther(DataRefImpl Symb) const { - return toELFSymIter(Symb)->st_other; + return getSymbol(Symb)->st_other; } template uint8_t ELFObjectFile::getSymbolELFType(DataRefImpl Symb) const { - return toELFSymIter(Symb)->getType(); + return getSymbol(Symb)->getType(); } template @@ -463,7 +468,7 @@ SymbolRef::Type ELFObjectFile::getSymbolType(DataRefImpl Symb) const { template uint32_t ELFObjectFile::getSymbolFlags(DataRefImpl Sym) const { - const Elf_Sym *ESym = toELFSymIter(Sym); + const Elf_Sym *ESym = getSymbol(Sym); uint32_t Result = SymbolRef::SF_None; @@ -477,7 +482,8 @@ uint32_t ELFObjectFile::getSymbolFlags(DataRefImpl Sym) const { Result |= SymbolRef::SF_Absolute; if (ESym->getType() == ELF::STT_FILE || ESym->getType() == ELF::STT_SECTION || - ESym == EF.symbol_begin() || ESym == EF.dynamic_symbol_begin()) + ESym == EF.symbol_begin(DotSymtabSec) || + ESym == EF.symbol_begin(DotDynSymSec)) Result |= SymbolRef::SF_FormatSpecific; if (EF.getHeader()->e_machine == ELF::EM_ARM) { @@ -505,11 +511,12 @@ uint32_t ELFObjectFile::getSymbolFlags(DataRefImpl Sym) const { } template -section_iterator -ELFObjectFile::getSymbolSection(const Elf_Sym *ESym) const { - ErrorOr ESecOrErr = EF.getSection(ESym); +ErrorOr +ELFObjectFile::getSymbolSection(const Elf_Sym *ESym, + const Elf_Shdr *SymTab) const { + ErrorOr ESecOrErr = EF.getSection(ESym, SymTab, ShndxTable); if (std::error_code EC = ESecOrErr.getError()) - report_fatal_error(EC.message()); + return EC; const Elf_Shdr *ESec = *ESecOrErr; if (!ESec) @@ -521,23 +528,23 @@ ELFObjectFile::getSymbolSection(const Elf_Sym *ESym) const { } template -std::error_code -ELFObjectFile::getSymbolSection(DataRefImpl Symb, - section_iterator &Res) const { - Res = getSymbolSection(getSymbol(Symb)); - return std::error_code(); +ErrorOr +ELFObjectFile::getSymbolSection(DataRefImpl Symb) const { + const Elf_Sym *Sym = getSymbol(Symb); + const Elf_Shdr *SymTab = *EF.getSection(Symb.d.a); + return getSymbolSection(Sym, SymTab); } template void ELFObjectFile::moveSectionNext(DataRefImpl &Sec) const { - const Elf_Shdr *ESec = toELFShdrIter(Sec); + const Elf_Shdr *ESec = getSection(Sec); Sec = toDRI(++ESec); } template std::error_code ELFObjectFile::getSectionName(DataRefImpl Sec, StringRef &Result) const { - ErrorOr Name = EF.getSectionName(&*toELFShdrIter(Sec)); + ErrorOr Name = EF.getSectionName(&*getSection(Sec)); if (!Name) return Name.getError(); Result = *Name; @@ -546,50 +553,50 @@ std::error_code ELFObjectFile::getSectionName(DataRefImpl Sec, template uint64_t ELFObjectFile::getSectionAddress(DataRefImpl Sec) const { - return toELFShdrIter(Sec)->sh_addr; + return getSection(Sec)->sh_addr; } template uint64_t ELFObjectFile::getSectionSize(DataRefImpl Sec) const { - return toELFShdrIter(Sec)->sh_size; + return getSection(Sec)->sh_size; } template std::error_code ELFObjectFile::getSectionContents(DataRefImpl Sec, StringRef &Result) const { - const Elf_Shdr *EShdr = toELFShdrIter(Sec); + const Elf_Shdr *EShdr = getSection(Sec); Result = StringRef((const char *)base() + EShdr->sh_offset, EShdr->sh_size); return std::error_code(); } template uint64_t ELFObjectFile::getSectionAlignment(DataRefImpl Sec) const { - return toELFShdrIter(Sec)->sh_addralign; + return getSection(Sec)->sh_addralign; } template bool ELFObjectFile::isSectionText(DataRefImpl Sec) const { - return toELFShdrIter(Sec)->sh_flags & ELF::SHF_EXECINSTR; + return getSection(Sec)->sh_flags & ELF::SHF_EXECINSTR; } template bool ELFObjectFile::isSectionData(DataRefImpl Sec) const { - const Elf_Shdr *EShdr = toELFShdrIter(Sec); + const Elf_Shdr *EShdr = getSection(Sec); return EShdr->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE) && EShdr->sh_type == ELF::SHT_PROGBITS; } template bool ELFObjectFile::isSectionBSS(DataRefImpl Sec) const { - const Elf_Shdr *EShdr = toELFShdrIter(Sec); + const Elf_Shdr *EShdr = getSection(Sec); return EShdr->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE) && EShdr->sh_type == ELF::SHT_NOBITS; } template bool ELFObjectFile::isSectionVirtual(DataRefImpl Sec) const { - return toELFShdrIter(Sec)->sh_type == ELF::SHT_NOBITS; + return getSection(Sec)->sh_type == ELF::SHT_NOBITS; } template @@ -636,7 +643,7 @@ ELFObjectFile::getRelocatedSection(DataRefImpl Sec) const { if (EF.getHeader()->e_type != ELF::ET_REL) return section_end(); - const Elf_Shdr *EShdr = toELFShdrIter(Sec); + const Elf_Shdr *EShdr = getSection(Sec); uintX_t Type = EShdr->sh_type; if (Type != ELF::SHT_REL && Type != ELF::SHT_RELA) return section_end(); @@ -668,9 +675,9 @@ ELFObjectFile::getRelocationSymbol(DataRefImpl Rel) const { bool IsDyn = Rel.d.b & 1; DataRefImpl SymbolData; if (IsDyn) - SymbolData = toDRI(EF.getDotDynSymSec(), symbolIdx); + SymbolData = toDRI(DotDynSymSec, symbolIdx); else - SymbolData = toDRI(EF.getDotSymtabSec(), symbolIdx); + SymbolData = toDRI(DotSymtabSec, symbolIdx); return symbol_iterator(SymbolRef(SymbolData, this)); } @@ -714,12 +721,6 @@ ELFObjectFile::getRelocationAddend(DataRefImpl Rel) const { return (int64_t)getRela(Rel)->r_addend; } -template -const typename ELFFile::Elf_Sym * -ELFObjectFile::getSymbol(DataRefImpl Symb) const { - return &*toELFSymIter(Symb); -} - template const typename ELFObjectFile::Elf_Rel * ELFObjectFile::getRel(DataRefImpl Rel) const { @@ -737,21 +738,51 @@ ELFObjectFile::getRela(DataRefImpl Rela) const { template ELFObjectFile::ELFObjectFile(MemoryBufferRef Object, std::error_code &EC) : ELFObjectFileBase( - getELFType(static_cast(ELFT::TargetEndianness) == - support::little, - ELFT::Is64Bits), + getELFType(ELFT::TargetEndianness == support::little, ELFT::Is64Bits), Object), - EF(Data.getBuffer(), EC) {} + EF(Data.getBuffer(), EC) { + if (EC) + return; + for (const Elf_Shdr &Sec : EF.sections()) { + switch (Sec.sh_type) { + case ELF::SHT_DYNSYM: { + if (DotDynSymSec) { + // More than one .dynsym! + EC = object_error::parse_failed; + return; + } + DotDynSymSec = &Sec; + break; + } + case ELF::SHT_SYMTAB: { + if (DotSymtabSec) { + // More than one .dynsym! + EC = object_error::parse_failed; + return; + } + DotSymtabSec = &Sec; + break; + } + case ELF::SHT_SYMTAB_SHNDX: { + ErrorOr> TableOrErr = EF.getSHNDXTable(Sec); + if ((EC = TableOrErr.getError())) + return; + ShndxTable = *TableOrErr; + break; + } + } + } +} template basic_symbol_iterator ELFObjectFile::symbol_begin_impl() const { - DataRefImpl Sym = toDRI(EF.getDotSymtabSec(), 0); + DataRefImpl Sym = toDRI(DotSymtabSec, 0); return basic_symbol_iterator(SymbolRef(Sym, this)); } template basic_symbol_iterator ELFObjectFile::symbol_end_impl() const { - const Elf_Shdr *SymTab = EF.getDotSymtabSec(); + const Elf_Shdr *SymTab = DotSymtabSec; if (!SymTab) return symbol_begin_impl(); DataRefImpl Sym = toDRI(SymTab, SymTab->sh_size / sizeof(Elf_Sym)); @@ -760,13 +791,13 @@ basic_symbol_iterator ELFObjectFile::symbol_end_impl() const { template elf_symbol_iterator ELFObjectFile::dynamic_symbol_begin() const { - DataRefImpl Sym = toDRI(EF.getDotDynSymSec(), 0); + DataRefImpl Sym = toDRI(DotDynSymSec, 0); return symbol_iterator(SymbolRef(Sym, this)); } template elf_symbol_iterator ELFObjectFile::dynamic_symbol_end() const { - const Elf_Shdr *SymTab = EF.getDotDynSymSec(); + const Elf_Shdr *SymTab = DotDynSymSec; DataRefImpl Sym = toDRI(SymTab, SymTab->sh_size / sizeof(Elf_Sym)); return basic_symbol_iterator(SymbolRef(Sym, this)); } @@ -781,19 +812,6 @@ section_iterator ELFObjectFile::section_end() const { return section_iterator(SectionRef(toDRI(EF.section_end()), this)); } -template -StringRef ELFObjectFile::getLoadName() const { - Elf_Dyn_Iter DI = EF.dynamic_table_begin(); - Elf_Dyn_Iter DE = EF.dynamic_table_end(); - - while (DI != DE && DI->getTag() != ELF::DT_SONAME) - ++DI; - - if (DI != DE) - return EF.getDynamicString(DI->getVal()); - return ""; -} - template uint8_t ELFObjectFile::getBytesInAddress() const { return ELFT::Is64Bits ? 8 : 4; @@ -807,10 +825,14 @@ StringRef ELFObjectFile::getFileFormatName() const { switch (EF.getHeader()->e_machine) { case ELF::EM_386: return "ELF32-i386"; + case ELF::EM_IAMCU: + return "ELF32-iamcu"; case ELF::EM_X86_64: return "ELF32-x86-64"; case ELF::EM_ARM: return (IsLittleEndian ? "ELF32-arm-little" : "ELF32-arm-big"); + case ELF::EM_AVR: + return "ELF32-avr"; case ELF::EM_HEXAGON: return "ELF32-hexagon"; case ELF::EM_MIPS: @@ -853,6 +875,7 @@ unsigned ELFObjectFile::getArch() const { bool IsLittleEndian = ELFT::TargetEndianness == support::little; switch (EF.getHeader()->e_machine) { case ELF::EM_386: + case ELF::EM_IAMCU: return Triple::x86; case ELF::EM_X86_64: return Triple::x86_64; @@ -860,6 +883,8 @@ unsigned ELFObjectFile::getArch() const { return Triple::aarch64; case ELF::EM_ARM: return Triple::arm; + case ELF::EM_AVR: + return Triple::avr; case ELF::EM_HEXAGON: return Triple::hexagon; case ELF::EM_MIPS: diff --git a/include/llvm/Object/ELFTypes.h b/include/llvm/Object/ELFTypes.h index 27e987ba2852..07b312a7d77c 100644 --- a/include/llvm/Object/ELFTypes.h +++ b/include/llvm/Object/ELFTypes.h @@ -12,7 +12,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/Object/Error.h" -#include "llvm/Support/DataTypes.h" #include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorOr.h" @@ -307,14 +306,18 @@ struct Elf_Dyn_Base> { } d_un; }; -/// Elf_Dyn_Impl: This inherits from Elf_Dyn_Base, adding getters and setters. +/// Elf_Dyn_Impl: This inherits from Elf_Dyn_Base, adding getters. template struct Elf_Dyn_Impl : Elf_Dyn_Base { using Elf_Dyn_Base::d_tag; using Elf_Dyn_Base::d_un; - int64_t getTag() const { return d_tag; } - uint64_t getVal() const { return d_un.d_val; } - uint64_t getPtr() const { return d_un.d_ptr; } + typedef typename std::conditional::type intX_t; + typedef typename std::conditional::type uintX_t; + intX_t getTag() const { return d_tag; } + uintX_t getVal() const { return d_un.d_val; } + uintX_t getPtr() const { return d_un.d_ptr; } }; // Elf_Rel: Elf Relocation @@ -481,6 +484,30 @@ struct Elf_Hash_Impl { } }; +// .gnu.hash section +template +struct Elf_GnuHash_Impl { + LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) + Elf_Word nbuckets; + Elf_Word symndx; + Elf_Word maskwords; + Elf_Word shift2; + + ArrayRef filter() const { + return ArrayRef(reinterpret_cast(&shift2 + 1), + maskwords); + } + + ArrayRef buckets() const { + return ArrayRef( + reinterpret_cast(filter().end()), nbuckets); + } + + ArrayRef values(unsigned DynamicSymCount) const { + return ArrayRef(buckets().end(), DynamicSymCount - symndx); + } +}; + // MIPS .reginfo section template struct Elf_Mips_RegInfo; diff --git a/include/llvm/Object/Error.h b/include/llvm/Object/Error.h index aa320bb51a46..0f79a6ed0dd8 100644 --- a/include/llvm/Object/Error.h +++ b/include/llvm/Object/Error.h @@ -30,6 +30,7 @@ enum class object_error { string_table_non_null_end, invalid_section_index, bitcode_section_not_found, + elf_invalid_dynamic_table_size, macho_small_load_command, macho_load_segment_too_many_sections, macho_load_segment_too_small, diff --git a/include/llvm/Object/FunctionIndexObjectFile.h b/include/llvm/Object/FunctionIndexObjectFile.h new file mode 100644 index 000000000000..74b461dc7cc7 --- /dev/null +++ b/include/llvm/Object/FunctionIndexObjectFile.h @@ -0,0 +1,110 @@ +//===- FunctionIndexObjectFile.h - Function index file implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the FunctionIndexObjectFile template class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_FUNCTIONINDEXOBJECTFILE_H +#define LLVM_OBJECT_FUNCTIONINDEXOBJECTFILE_H + +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/Object/SymbolicFile.h" + +namespace llvm { +class FunctionInfoIndex; +class Module; + +namespace object { +class ObjectFile; + +/// This class is used to read just the function summary index related +/// sections out of the given object (which may contain a single module's +/// bitcode or be a combined index bitcode file). It builds a FunctionInfoIndex +/// object. +class FunctionIndexObjectFile : public SymbolicFile { + std::unique_ptr Index; + +public: + FunctionIndexObjectFile(MemoryBufferRef Object, + std::unique_ptr I); + ~FunctionIndexObjectFile() override; + + // TODO: Walk through FunctionMap entries for function symbols. + // However, currently these interfaces are not used by any consumers. + void moveSymbolNext(DataRefImpl &Symb) const override { + llvm_unreachable("not implemented"); + } + std::error_code printSymbolName(raw_ostream &OS, + DataRefImpl Symb) const override { + llvm_unreachable("not implemented"); + return std::error_code(); + } + uint32_t getSymbolFlags(DataRefImpl Symb) const override { + llvm_unreachable("not implemented"); + return 0; + } + basic_symbol_iterator symbol_begin_impl() const override { + llvm_unreachable("not implemented"); + return basic_symbol_iterator(BasicSymbolRef()); + } + basic_symbol_iterator symbol_end_impl() const override { + llvm_unreachable("not implemented"); + return basic_symbol_iterator(BasicSymbolRef()); + } + + const FunctionInfoIndex &getIndex() const { + return const_cast(this)->getIndex(); + } + FunctionInfoIndex &getIndex() { return *Index; } + std::unique_ptr takeIndex(); + + static inline bool classof(const Binary *v) { return v->isFunctionIndex(); } + + /// \brief Finds and returns bitcode embedded in the given object file, or an + /// error code if not found. + static ErrorOr findBitcodeInObject(const ObjectFile &Obj); + + /// \brief Finds and returns bitcode in the given memory buffer (which may + /// be either a bitcode file or a native object file with embedded bitcode), + /// or an error code if not found. + static ErrorOr + findBitcodeInMemBuffer(MemoryBufferRef Object); + + /// \brief Looks for function summary in the given memory buffer, + /// returns true if found, else false. + static bool + hasFunctionSummaryInMemBuffer(MemoryBufferRef Object, + DiagnosticHandlerFunction DiagnosticHandler); + + /// \brief Parse function index in the given memory buffer. + /// Return new FunctionIndexObjectFile instance containing parsed function + /// summary/index. + static ErrorOr> + create(MemoryBufferRef Object, DiagnosticHandlerFunction DiagnosticHandler, + bool IsLazy = false); + + /// \brief Parse the function summary information for function with the + /// given name out of the given buffer. Parsed information is + /// stored on the index object saved in this object. + std::error_code + findFunctionSummaryInMemBuffer(MemoryBufferRef Object, + DiagnosticHandlerFunction DiagnosticHandler, + StringRef FunctionName); +}; +} + +/// Parse the function index out of an IR file and return the function +/// index object if found, or nullptr if not. +ErrorOr> +getFunctionIndexForFile(StringRef Path, + DiagnosticHandlerFunction DiagnosticHandler); +} + +#endif diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h index 489ecef5c996..e02ce3b21416 100644 --- a/include/llvm/Object/MachO.h +++ b/include/llvm/Object/MachO.h @@ -100,7 +100,7 @@ private: }; typedef content_iterator export_iterator; -/// MachORebaseEntry encapsulates the current state in the decompression of +/// MachORebaseEntry encapsulates the current state in the decompression of /// rebasing opcodes. This allows you to iterate through the compressed table of /// rebasing using: /// for (const llvm::object::MachORebaseEntry &Entry : Obj->rebaseTable()) { @@ -116,7 +116,7 @@ public: bool operator==(const MachORebaseEntry &) const; void moveNext(); - + private: friend class MachOObjectFile; void moveToFirst(); @@ -210,8 +210,7 @@ public: uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override; SymbolRef::Type getSymbolType(DataRefImpl Symb) const override; uint32_t getSymbolFlags(DataRefImpl Symb) const override; - std::error_code getSymbolSection(DataRefImpl Symb, - section_iterator &Res) const override; + ErrorOr getSymbolSection(DataRefImpl Symb) const override; unsigned getSymbolSectionID(SymbolRef Symb) const; unsigned getSectionID(SectionRef Sec) const; @@ -423,6 +422,24 @@ public: return v->isMachO(); } + static uint32_t + getVersionMinMajor(MachO::version_min_command &C, bool SDK) { + uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version; + return (VersionOrSDK >> 16) & 0xffff; + } + + static uint32_t + getVersionMinMinor(MachO::version_min_command &C, bool SDK) { + uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version; + return (VersionOrSDK >> 8) & 0xff; + } + + static uint32_t + getVersionMinUpdate(MachO::version_min_command &C, bool SDK) { + uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version; + return VersionOrSDK & 0xff; + } + private: uint64_t getSymbolValueImpl(DataRefImpl Symb) const override; @@ -504,4 +521,3 @@ inline const ObjectFile *DiceRef::getObjectFile() const { } #endif - diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h index 8dd525626218..ce0c891ee0c2 100644 --- a/include/llvm/Object/ObjectFile.h +++ b/include/llvm/Object/ObjectFile.h @@ -100,8 +100,7 @@ public: relocation_iterator relocation_begin() const; relocation_iterator relocation_end() const; iterator_range relocations() const { - return iterator_range(relocation_begin(), - relocation_end()); + return make_range(relocation_begin(), relocation_end()); } section_iterator getRelocatedSection() const; @@ -147,7 +146,7 @@ public: /// @brief Get section this symbol is defined in reference to. Result is /// end_sections() if it is undefined or is an absolute symbol. - std::error_code getSection(section_iterator &Result) const; + ErrorOr getSection() const; const ObjectFile *getObject() const; }; @@ -202,8 +201,8 @@ protected: virtual uint32_t getSymbolAlignment(DataRefImpl Symb) const; virtual uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const = 0; virtual SymbolRef::Type getSymbolType(DataRefImpl Symb) const = 0; - virtual std::error_code getSymbolSection(DataRefImpl Symb, - section_iterator &Res) const = 0; + virtual ErrorOr + getSymbolSection(DataRefImpl Symb) const = 0; // Same as above for SectionRef. friend class SectionRef; @@ -323,8 +322,8 @@ inline uint64_t SymbolRef::getCommonSize() const { return getObject()->getCommonSymbolSize(getRawDataRefImpl()); } -inline std::error_code SymbolRef::getSection(section_iterator &Result) const { - return getObject()->getSymbolSection(getRawDataRefImpl(), Result); +inline ErrorOr SymbolRef::getSection() const { + return getObject()->getSymbolSection(getRawDataRefImpl()); } inline SymbolRef::Type SymbolRef::getType() const { diff --git a/include/llvm/Object/SymbolicFile.h b/include/llvm/Object/SymbolicFile.h index 537997ac6318..0c5b38111a9c 100644 --- a/include/llvm/Object/SymbolicFile.h +++ b/include/llvm/Object/SymbolicFile.h @@ -15,6 +15,7 @@ #define LLVM_OBJECT_SYMBOLICFILE_H #include "llvm/Object/Binary.h" +#include "llvm/Support/Format.h" namespace llvm { namespace object { @@ -29,6 +30,12 @@ union DataRefImpl { DataRefImpl() { std::memset(this, 0, sizeof(DataRefImpl)); } }; +template +OStream& operator<<(OStream &OS, const DataRefImpl &D) { + OS << "(" << format("0x%x8", D.p) << " (" << format("0x%x8", D.d.a) << ", " << format("0x%x8", D.d.b) << "))"; + return OS; +} + inline bool operator==(const DataRefImpl &a, const DataRefImpl &b) { // Check bitwise identical. This is the only legal way to compare a union w/o // knowing which member is in use. @@ -94,6 +101,7 @@ public: // (e.g. section symbols) SF_Thumb = 1U << 8, // Thumb symbol in a 32-bit ARM binary SF_Hidden = 1U << 9, // Symbol has hidden visibility + SF_Const = 1U << 10, // Symbol value is constant }; BasicSymbolRef() : OwningObject(nullptr) { } diff --git a/include/llvm/Option/Arg.h b/include/llvm/Option/Arg.h index e1b72b6267cf..99d329693de2 100644 --- a/include/llvm/Option/Arg.h +++ b/include/llvm/Option/Arg.h @@ -113,6 +113,7 @@ public: /// when rendered as a input (e.g., Xlinker). void renderAsInput(const ArgList &Args, ArgStringList &Output) const; + void print(raw_ostream &O) const; void dump() const; /// \brief Return a formatted version of the argument and diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h index ef4005761b75..89771b5c3cf1 100644 --- a/include/llvm/Option/ArgList.h +++ b/include/llvm/Option/ArgList.h @@ -259,6 +259,9 @@ public: void AddLastArg(ArgStringList &Output, OptSpecifier Id0, OptSpecifier Id1) const; + /// AddAllArgs - Render all arguments matching any of the given ids. + void AddAllArgs(ArgStringList &Output, ArrayRef Ids) const; + /// AddAllArgs - Render all arguments matching the given ids. void AddAllArgs(ArgStringList &Output, OptSpecifier Id0, OptSpecifier Id1 = 0U, OptSpecifier Id2 = 0U) const; @@ -303,6 +306,9 @@ public: const char *GetOrMakeJoinedArgString(unsigned Index, StringRef LHS, StringRef RHS) const; + void print(raw_ostream &O) const; + void dump() const; + /// @} }; diff --git a/include/llvm/Option/OptTable.h b/include/llvm/Option/OptTable.h index 96f51cf3317d..390e52774fea 100644 --- a/include/llvm/Option/OptTable.h +++ b/include/llvm/Option/OptTable.h @@ -50,8 +50,7 @@ public: private: /// \brief The static option information table. - const Info *OptionInfos; - unsigned NumOptionInfos; + ArrayRef OptionInfos; bool IgnoreCase; unsigned TheInputOptionID; @@ -74,14 +73,13 @@ private: } protected: - OptTable(const Info *OptionInfos, unsigned NumOptionInfos, - bool IgnoreCase = false); + OptTable(ArrayRef OptionInfos, bool IgnoreCase = false); public: ~OptTable(); /// \brief Return the total number of option classes. - unsigned getNumOptions() const { return NumOptionInfos; } + unsigned getNumOptions() const { return OptionInfos.size(); } /// \brief Get the given Opt's Option instance, lazily creating it /// if necessary. diff --git a/include/llvm/Option/Option.h b/include/llvm/Option/Option.h index 09be26c7cf20..494987a135ef 100644 --- a/include/llvm/Option/Option.h +++ b/include/llvm/Option/Option.h @@ -195,6 +195,7 @@ public: /// start. Arg *accept(const ArgList &Args, unsigned &Index, unsigned ArgSize) const; + void print(raw_ostream &O) const; void dump() const; }; diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h index 0b318fc8fb07..492a4ef464f8 100644 --- a/include/llvm/PassAnalysisSupport.h +++ b/include/llvm/PassAnalysisSupport.h @@ -36,11 +36,17 @@ namespace llvm { /// class AnalysisUsage { public: - typedef SmallVector VectorType; + typedef SmallVectorImpl VectorType; private: /// Sets of analyses required and preserved by a pass - VectorType Required, RequiredTransitive, Preserved; + // TODO: It's not clear that SmallVector is an appropriate data structure for + // this usecase. The sizes were picked to minimize wasted space, but are + // otherwise fairly meaningless. + SmallVector Required; + SmallVector RequiredTransitive; + SmallVector Preserved; + SmallVector Used; bool PreservesAll; public: @@ -72,14 +78,32 @@ public: Preserved.push_back(&ID); return *this; } - ///@} - /// Add the specified Pass class to the set of analyses preserved by this pass. template AnalysisUsage &addPreserved() { Preserved.push_back(&PassClass::ID); return *this; } + ///@} + + ///@{ + /// Add the specified ID to the set of analyses used by this pass if they are + /// available.. + AnalysisUsage &addUsedIfAvailableID(const void *ID) { + Used.push_back(ID); + return *this; + } + AnalysisUsage &addUsedIfAvailableID(char &ID) { + Used.push_back(&ID); + return *this; + } + /// Add the specified Pass class to the set of analyses used by this pass. + template + AnalysisUsage &addUsedIfAvailable() { + Used.push_back(&PassClass::ID); + return *this; + } + ///@} /// Add the Pass with the specified argument string to the set of analyses /// preserved by this pass. If no such Pass exists, do nothing. This can be @@ -108,6 +132,7 @@ public: return RequiredTransitive; } const VectorType &getPreservedSet() const { return Preserved; } + const VectorType &getUsedSet() const { return Used; } }; //===----------------------------------------------------------------------===// diff --git a/include/llvm/PassInfo.h b/include/llvm/PassInfo.h index d10761831b3a..cee4ade323e4 100644 --- a/include/llvm/PassInfo.h +++ b/include/llvm/PassInfo.h @@ -33,13 +33,13 @@ public: typedef Pass *(*TargetMachineCtor_t)(TargetMachine *); private: - const char *const PassName; // Nice name for Pass - const char *const PassArgument; // Command Line argument to run this pass - const void *PassID; - const bool IsCFGOnlyPass; // Pass only looks at the CFG. - const bool IsAnalysis; // True if an analysis pass. - const bool IsAnalysisGroup; // True if an analysis group. - std::vector ItfImpl;// Interfaces implemented by this pass + const char *const PassName; // Nice name for Pass + const char *const PassArgument; // Command Line argument to run this pass + const void *PassID; + const bool IsCFGOnlyPass; // Pass only looks at the CFG. + const bool IsAnalysis; // True if an analysis pass. + const bool IsAnalysisGroup; // True if an analysis group. + std::vector ItfImpl; // Interfaces implemented by this pass NormalCtor_t NormalCtor; TargetMachineCtor_t TargetMachineCtor; @@ -50,18 +50,16 @@ public: PassInfo(const char *name, const char *arg, const void *pi, NormalCtor_t normal, bool isCFGOnly, bool is_analysis, TargetMachineCtor_t machine = nullptr) - : PassName(name), PassArgument(arg), PassID(pi), - IsCFGOnlyPass(isCFGOnly), - IsAnalysis(is_analysis), IsAnalysisGroup(false), NormalCtor(normal), - TargetMachineCtor(machine) {} + : PassName(name), PassArgument(arg), PassID(pi), IsCFGOnlyPass(isCFGOnly), + IsAnalysis(is_analysis), IsAnalysisGroup(false), NormalCtor(normal), + TargetMachineCtor(machine) {} /// PassInfo ctor - Do not call this directly, this should only be invoked /// through RegisterPass. This version is for use by analysis groups; it /// does not auto-register the pass. PassInfo(const char *name, const void *pi) - : PassName(name), PassArgument(""), PassID(pi), - IsCFGOnlyPass(false), - IsAnalysis(false), IsAnalysisGroup(true), NormalCtor(nullptr), - TargetMachineCtor(nullptr) {} + : PassName(name), PassArgument(""), PassID(pi), IsCFGOnlyPass(false), + IsAnalysis(false), IsAnalysisGroup(true), NormalCtor(nullptr), + TargetMachineCtor(nullptr) {} /// getPassName - Return the friendly name for the pass, never returns null /// @@ -78,10 +76,8 @@ public: const void *getTypeInfo() const { return PassID; } /// Return true if this PassID implements the specified ID pointer. - bool isPassID(const void *IDPtr) const { - return PassID == IDPtr; - } - + bool isPassID(const void *IDPtr) const { return PassID == IDPtr; } + /// isAnalysisGroup - Return true if this is an analysis group, not a normal /// pass. /// @@ -91,7 +87,7 @@ public: /// isCFGOnlyPass - return true if this pass only looks at the CFG for the /// function. bool isCFGOnlyPass() const { return IsCFGOnlyPass; } - + /// getNormalCtor - Return a pointer to a function, that when called, creates /// an instance of the pass and returns it. This pointer may be null if there /// is no default constructor for the pass. diff --git a/include/llvm/PassRegistry.h b/include/llvm/PassRegistry.h index 8c28ef5e7e61..e7fe1f53a4d4 100644 --- a/include/llvm/PassRegistry.h +++ b/include/llvm/PassRegistry.h @@ -17,7 +17,6 @@ #ifndef LLVM_PASSREGISTRY_H #define LLVM_PASSREGISTRY_H -#include "llvm-c/Core.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" diff --git a/include/llvm/PassSupport.h b/include/llvm/PassSupport.h index 6cb6516412e8..7c3d49f02e8f 100644 --- a/include/llvm/PassSupport.h +++ b/include/llvm/PassSupport.h @@ -26,7 +26,7 @@ #include "llvm/PassInfo.h" #include "llvm/PassRegistry.h" #include "llvm/Support/Atomic.h" -#include "llvm/Support/Valgrind.h" +#include "llvm/Support/Compiler.h" #include namespace llvm { diff --git a/include/llvm/ProfileData/CoverageMapping.h b/include/llvm/ProfileData/CoverageMapping.h index 3488e793d84f..3790e1358449 100644 --- a/include/llvm/ProfileData/CoverageMapping.h +++ b/include/llvm/ProfileData/CoverageMapping.h @@ -104,7 +104,7 @@ struct CounterExpression { }; /// \brief A Counter expression builder is used to construct the -/// counter expressions. It avoids unecessary duplication +/// counter expressions. It avoids unnecessary duplication /// and simplifies algebraic expressions. class CounterExpressionBuilder { /// \brief A list of all the counter expressions @@ -236,7 +236,7 @@ class CounterMappingContext { public: CounterMappingContext(ArrayRef Expressions, - ArrayRef CounterValues = ArrayRef()) + ArrayRef CounterValues = None) : Expressions(Expressions), CounterValues(CounterValues) {} void setCounts(ArrayRef Counts) { CounterValues = Counts; } @@ -443,7 +443,7 @@ public: /// \brief Get the list of function instantiations in the file. /// - /// Fucntions that are instantiated more than once, such as C++ template + /// Functions that are instantiated more than once, such as C++ template /// specializations, have distinct coverage records for each instantiation. std::vector getInstantiations(StringRef Filename); diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h index 77055ba87268..4688759a3bd1 100644 --- a/include/llvm/ProfileData/InstrProf.h +++ b/include/llvm/ProfileData/InstrProf.h @@ -16,34 +16,310 @@ #ifndef LLVM_PROFILEDATA_INSTRPROF_H_ #define LLVM_PROFILEDATA_INSTRPROF_H_ +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/ProfileData/InstrProfData.inc" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MD5.h" #include +#include #include #include +#define INSTR_PROF_INDEX_VERSION 3 namespace llvm { + +class Function; +class GlobalVariable; +class Module; + +/// Return the name of data section containing profile counter variables. +inline StringRef getInstrProfCountersSectionName(bool AddSegment) { + return AddSegment ? "__DATA," INSTR_PROF_CNTS_SECT_NAME_STR + : INSTR_PROF_CNTS_SECT_NAME_STR; +} + +/// Return the name of data section containing names of instrumented +/// functions. +inline StringRef getInstrProfNameSectionName(bool AddSegment) { + return AddSegment ? "__DATA," INSTR_PROF_NAME_SECT_NAME_STR + : INSTR_PROF_NAME_SECT_NAME_STR; +} + +/// Return the name of the data section containing per-function control +/// data. +inline StringRef getInstrProfDataSectionName(bool AddSegment) { + return AddSegment ? "__DATA," INSTR_PROF_DATA_SECT_NAME_STR + : INSTR_PROF_DATA_SECT_NAME_STR; +} + +/// Return the name profile runtime entry point to do value profiling +/// for a given site. +inline StringRef getInstrProfValueProfFuncName() { + return INSTR_PROF_VALUE_PROF_FUNC_STR; +} + +/// Return the name of the section containing function coverage mapping +/// data. +inline StringRef getInstrProfCoverageSectionName(bool AddSegment) { + return AddSegment ? "__DATA,__llvm_covmap" : "__llvm_covmap"; +} + +/// Return the name prefix of variables containing instrumented function names. +inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; } + +/// Return the name prefix of variables containing per-function control data. +inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; } + +/// Return the name prefix of profile counter variables. +inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; } + +/// Return the name prefix of the COMDAT group for instrumentation variables +/// associated with a COMDAT function. +inline StringRef getInstrProfComdatPrefix() { return "__profv_"; } + +/// Return the name of a covarage mapping variable (internal linkage) +/// for each instrumented source module. Such variables are allocated +/// in the __llvm_covmap section. +inline StringRef getCoverageMappingVarName() { + return "__llvm_coverage_mapping"; +} + +/// Return the name of function that registers all the per-function control +/// data at program startup time by calling __llvm_register_function. This +/// function has internal linkage and is called by __llvm_profile_init +/// runtime method. This function is not generated for these platforms: +/// Darwin, Linux, and FreeBSD. +inline StringRef getInstrProfRegFuncsName() { + return "__llvm_profile_register_functions"; +} + +/// Return the name of the runtime interface that registers per-function control +/// data for one instrumented function. +inline StringRef getInstrProfRegFuncName() { + return "__llvm_profile_register_function"; +} + +/// Return the name of the runtime initialization method that is generated by +/// the compiler. The function calls __llvm_profile_register_functions and +/// __llvm_profile_override_default_filename functions if needed. This function +/// has internal linkage and invoked at startup time via init_array. +inline StringRef getInstrProfInitFuncName() { return "__llvm_profile_init"; } + +/// Return the name of the hook variable defined in profile runtime library. +/// A reference to the variable causes the linker to link in the runtime +/// initialization module (which defines the hook variable). +inline StringRef getInstrProfRuntimeHookVarName() { + return "__llvm_profile_runtime"; +} + +/// Return the name of the compiler generated function that references the +/// runtime hook variable. The function is a weak global. +inline StringRef getInstrProfRuntimeHookVarUseFuncName() { + return "__llvm_profile_runtime_user"; +} + +/// Return the name of the profile runtime interface that overrides the default +/// profile data file name. +inline StringRef getInstrProfFileOverriderFuncName() { + return "__llvm_profile_override_default_filename"; +} + +/// Return the modified name for function \c F suitable to be +/// used the key for profile lookup. +std::string getPGOFuncName(const Function &F, + uint64_t Version = INSTR_PROF_INDEX_VERSION); + +/// Return the modified name for a function suitable to be +/// used the key for profile lookup. The function's original +/// name is \c RawFuncName and has linkage of type \c Linkage. +/// The function is defined in module \c FileName. +std::string getPGOFuncName(StringRef RawFuncName, + GlobalValue::LinkageTypes Linkage, + StringRef FileName, + uint64_t Version = INSTR_PROF_INDEX_VERSION); + +/// Create and return the global variable for function name used in PGO +/// instrumentation. \c FuncName is the name of the function returned +/// by \c getPGOFuncName call. +GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName); + +/// Create and return the global variable for function name used in PGO +/// instrumentation. /// \c FuncName is the name of the function +/// returned by \c getPGOFuncName call, \c M is the owning module, +/// and \c Linkage is the linkage of the instrumented function. +GlobalVariable *createPGOFuncNameVar(Module &M, + GlobalValue::LinkageTypes Linkage, + StringRef FuncName); + +/// Given a PGO function name, remove the filename prefix and return +/// the original (static) function name. +StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName); + const std::error_category &instrprof_category(); enum class instrprof_error { - success = 0, - eof, - bad_magic, - bad_header, - unsupported_version, - unsupported_hash_type, - too_large, - truncated, - malformed, - unknown_function, - hash_mismatch, - count_mismatch, - counter_overflow + success = 0, + eof, + unrecognized_format, + bad_magic, + bad_header, + unsupported_version, + unsupported_hash_type, + too_large, + truncated, + malformed, + unknown_function, + hash_mismatch, + count_mismatch, + counter_overflow, + value_site_count_mismatch }; inline std::error_code make_error_code(instrprof_error E) { return std::error_code(static_cast(E), instrprof_category()); } +inline instrprof_error MergeResult(instrprof_error &Accumulator, + instrprof_error Result) { + // Prefer first error encountered as later errors may be secondary effects of + // the initial problem. + if (Accumulator == instrprof_error::success && + Result != instrprof_error::success) + Accumulator = Result; + return Accumulator; +} + +enum InstrProfValueKind : uint32_t { +#define VALUE_PROF_KIND(Enumerator, Value) Enumerator = Value, +#include "llvm/ProfileData/InstrProfData.inc" +}; + +namespace object { +class SectionRef; +} + +namespace IndexedInstrProf { +uint64_t ComputeHash(StringRef K); +} + +/// A symbol table used for function PGO name look-up with keys +/// (such as pointers, md5hash values) to the function. A function's +/// PGO name or name's md5hash are used in retrieving the profile +/// data of the function. See \c getPGOFuncName() method for details +/// on how PGO name is formed. +class InstrProfSymtab { +public: + typedef std::vector> AddrHashMap; + +private: + StringRef Data; + uint64_t Address; + // A map from MD5 hash keys to function name strings. + std::vector> HashNameMap; + // A map from function runtime address to function name MD5 hash. + // This map is only populated and used by raw instr profile reader. + AddrHashMap AddrToMD5Map; + +public: + InstrProfSymtab() : Data(), Address(0), HashNameMap(), AddrToMD5Map() {} + + /// Create InstrProfSymtab from an object file section which + /// contains function PGO names that are uncompressed. + /// This interface is used by CoverageMappingReader. + std::error_code create(object::SectionRef &Section); + /// This interface is used by reader of CoverageMapping test + /// format. + inline std::error_code create(StringRef D, uint64_t BaseAddr); + /// Create InstrProfSymtab from a set of names iteratable from + /// \p IterRange. This interface is used by IndexedProfReader. + template void create(const NameIterRange &IterRange); + // If the symtab is created by a series of calls to \c addFuncName, \c + // finalizeSymtab needs to be called before looking up function names. + // This is required because the underlying map is a vector (for space + // efficiency) which needs to be sorted. + inline void finalizeSymtab(); + /// Update the symtab by adding \p FuncName to the table. This interface + /// is used by the raw and text profile readers. + void addFuncName(StringRef FuncName) { + HashNameMap.push_back(std::make_pair( + IndexedInstrProf::ComputeHash(FuncName), FuncName.str())); + } + /// Map a function address to its name's MD5 hash. This interface + /// is only used by the raw profiler reader. + void mapAddress(uint64_t Addr, uint64_t MD5Val) { + AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val)); + } + AddrHashMap &getAddrHashMap() { return AddrToMD5Map; } + /// Return function's PGO name from the function name's symabol + /// address in the object file. If an error occurs, Return + /// an empty string. + StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize); + /// Return function's PGO name from the name's md5 hash value. + /// If not found, return an empty string. + inline StringRef getFuncName(uint64_t FuncMD5Hash); +}; + +std::error_code InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) { + Data = D; + Address = BaseAddr; + return std::error_code(); +} + +template +void InstrProfSymtab::create(const NameIterRange &IterRange) { + for (auto Name : IterRange) + HashNameMap.push_back( + std::make_pair(IndexedInstrProf::ComputeHash(Name), Name.str())); + finalizeSymtab(); +} + +void InstrProfSymtab::finalizeSymtab() { + std::sort(HashNameMap.begin(), HashNameMap.end(), less_first()); + HashNameMap.erase(std::unique(HashNameMap.begin(), HashNameMap.end()), + HashNameMap.end()); + std::sort(AddrToMD5Map.begin(), AddrToMD5Map.end(), less_first()); + AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()), + AddrToMD5Map.end()); +} + +StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) { + auto Result = + std::lower_bound(HashNameMap.begin(), HashNameMap.end(), FuncMD5Hash, + [](const std::pair &LHS, + uint64_t RHS) { return LHS.first < RHS; }); + if (Result != HashNameMap.end()) + return Result->second; + return StringRef(); +} + +struct InstrProfValueSiteRecord { + /// Value profiling data pairs at a given value site. + std::list ValueData; + + InstrProfValueSiteRecord() { ValueData.clear(); } + template + InstrProfValueSiteRecord(InputIterator F, InputIterator L) + : ValueData(F, L) {} + + /// Sort ValueData ascending by Value + void sortByTargetValues() { + ValueData.sort( + [](const InstrProfValueData &left, const InstrProfValueData &right) { + return left.Value < right.Value; + }); + } + + /// Merge data from another InstrProfValueSiteRecord + /// Optionally scale merged counts by \p Weight. + instrprof_error mergeValueData(InstrProfValueSiteRecord &Input, + uint64_t Weight = 1); +}; + /// Profiling information for a single function. struct InstrProfRecord { InstrProfRecord() {} @@ -52,8 +328,258 @@ struct InstrProfRecord { StringRef Name; uint64_t Hash; std::vector Counts; + + typedef std::vector> ValueMapType; + + /// Return the number of value profile kinds with non-zero number + /// of profile sites. + inline uint32_t getNumValueKinds() const; + /// Return the number of instrumented sites for ValueKind. + inline uint32_t getNumValueSites(uint32_t ValueKind) const; + /// Return the total number of ValueData for ValueKind. + inline uint32_t getNumValueData(uint32_t ValueKind) const; + /// Return the number of value data collected for ValueKind at profiling + /// site: Site. + inline uint32_t getNumValueDataForSite(uint32_t ValueKind, + uint32_t Site) const; + /// Return the array of profiled values at \p Site. + inline std::unique_ptr + getValueForSite(uint32_t ValueKind, uint32_t Site, + uint64_t (*ValueMapper)(uint32_t, uint64_t) = 0) const; + inline void + getValueForSite(InstrProfValueData Dest[], uint32_t ValueKind, uint32_t Site, + uint64_t (*ValueMapper)(uint32_t, uint64_t) = 0) const; + /// Reserve space for NumValueSites sites. + inline void reserveSites(uint32_t ValueKind, uint32_t NumValueSites); + /// Add ValueData for ValueKind at value Site. + void addValueData(uint32_t ValueKind, uint32_t Site, + InstrProfValueData *VData, uint32_t N, + ValueMapType *ValueMap); + + /// Merge the counts in \p Other into this one. + /// Optionally scale merged counts by \p Weight. + instrprof_error merge(InstrProfRecord &Other, uint64_t Weight = 1); + + /// Clear value data entries + void clearValueData() { + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + getValueSitesForKind(Kind).clear(); + } + +private: + std::vector IndirectCallSites; + const std::vector & + getValueSitesForKind(uint32_t ValueKind) const { + switch (ValueKind) { + case IPVK_IndirectCallTarget: + return IndirectCallSites; + default: + llvm_unreachable("Unknown value kind!"); + } + return IndirectCallSites; + } + + std::vector & + getValueSitesForKind(uint32_t ValueKind) { + return const_cast &>( + const_cast(this) + ->getValueSitesForKind(ValueKind)); + } + + // Map indirect call target name hash to name string. + uint64_t remapValue(uint64_t Value, uint32_t ValueKind, + ValueMapType *HashKeys); + + // Merge Value Profile data from Src record to this record for ValueKind. + // Scale merged value counts by \p Weight. + instrprof_error mergeValueProfData(uint32_t ValueKind, InstrProfRecord &Src, + uint64_t Weight); }; +uint32_t InstrProfRecord::getNumValueKinds() const { + uint32_t NumValueKinds = 0; + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + NumValueKinds += !(getValueSitesForKind(Kind).empty()); + return NumValueKinds; +} + +uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const { + uint32_t N = 0; + const std::vector &SiteRecords = + getValueSitesForKind(ValueKind); + for (auto &SR : SiteRecords) { + N += SR.ValueData.size(); + } + return N; +} + +uint32_t InstrProfRecord::getNumValueSites(uint32_t ValueKind) const { + return getValueSitesForKind(ValueKind).size(); +} + +uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind, + uint32_t Site) const { + return getValueSitesForKind(ValueKind)[Site].ValueData.size(); +} + +std::unique_ptr InstrProfRecord::getValueForSite( + uint32_t ValueKind, uint32_t Site, + uint64_t (*ValueMapper)(uint32_t, uint64_t)) const { + uint32_t N = getNumValueDataForSite(ValueKind, Site); + if (N == 0) + return std::unique_ptr(nullptr); + + auto VD = llvm::make_unique(N); + getValueForSite(VD.get(), ValueKind, Site, ValueMapper); + + return VD; +} + +void InstrProfRecord::getValueForSite(InstrProfValueData Dest[], + uint32_t ValueKind, uint32_t Site, + uint64_t (*ValueMapper)(uint32_t, + uint64_t)) const { + uint32_t I = 0; + for (auto V : getValueSitesForKind(ValueKind)[Site].ValueData) { + Dest[I].Value = ValueMapper ? ValueMapper(ValueKind, V.Value) : V.Value; + Dest[I].Count = V.Count; + I++; + } +} + +void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) { + std::vector &ValueSites = + getValueSitesForKind(ValueKind); + ValueSites.reserve(NumValueSites); +} + +inline support::endianness getHostEndianness() { + return sys::IsLittleEndianHost ? support::little : support::big; +} + +// Include definitions for value profile data +#define INSTR_PROF_VALUE_PROF_DATA +#include "llvm/ProfileData/InstrProfData.inc" + + /* + * Initialize the record for runtime value profile data. + * Return 0 if the initialization is successful, otherwise + * return 1. + */ +int initializeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord, + const uint16_t *NumValueSites, + ValueProfNode **Nodes); + +/* Release memory allocated for the runtime record. */ +void finalizeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord); + +/* Return the size of ValueProfData structure that can be used to store + the value profile data collected at runtime. */ +uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record); + +/* Return a ValueProfData instance that stores the data collected at runtime. */ +ValueProfData * +serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record, + ValueProfData *Dst); + +namespace IndexedInstrProf { + +enum class HashT : uint32_t { + MD5, + + Last = MD5 +}; + +static inline uint64_t MD5Hash(StringRef Str) { + MD5 Hash; + Hash.update(Str); + llvm::MD5::MD5Result Result; + Hash.final(Result); + // Return the least significant 8 bytes. Our MD5 implementation returns the + // result in little endian, so we may need to swap bytes. + using namespace llvm::support; + return endian::read(Result); +} + +inline uint64_t ComputeHash(HashT Type, StringRef K) { + switch (Type) { + case HashT::MD5: + return IndexedInstrProf::MD5Hash(K); + } + llvm_unreachable("Unhandled hash type"); +} + +const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" +const uint64_t Version = INSTR_PROF_INDEX_VERSION; +const HashT HashType = HashT::MD5; + +inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); } + +// This structure defines the file header of the LLVM profile +// data file in indexed-format. +struct Header { + uint64_t Magic; + uint64_t Version; + uint64_t MaxFunctionCount; + uint64_t HashType; + uint64_t HashOffset; +}; + +} // end namespace IndexedInstrProf + +namespace RawInstrProf { + +const uint64_t Version = INSTR_PROF_RAW_VERSION; + +template inline uint64_t getMagic(); +template <> inline uint64_t getMagic() { + return INSTR_PROF_RAW_MAGIC_64; +} + +template <> inline uint64_t getMagic() { + return INSTR_PROF_RAW_MAGIC_32; +} + +// Per-function profile data header/control structure. +// The definition should match the structure defined in +// compiler-rt/lib/profile/InstrProfiling.h. +// It should also match the synthesized type in +// Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters. +template struct LLVM_ALIGNAS(8) ProfileData { + #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name; + #include "llvm/ProfileData/InstrProfData.inc" +}; + +// File header structure of the LLVM profile data in raw format. +// The definition should match the header referenced in +// compiler-rt/lib/profile/InstrProfilingFile.c and +// InstrProfilingBuffer.c. +struct Header { +#define INSTR_PROF_RAW_HEADER(Type, Name, Init) const Type Name; +#include "llvm/ProfileData/InstrProfData.inc" +}; + +} // end namespace RawInstrProf + +namespace coverage { + +// Profile coverage map has the following layout: +// [CoverageMapFileHeader] +// [ArrayStart] +// [CovMapFunctionRecord] +// [CovMapFunctionRecord] +// ... +// [ArrayEnd] +// [Encoded Region Mapping Data] +LLVM_PACKED_START +template struct CovMapFunctionRecord { + #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name; + #include "llvm/ProfileData/InstrProfData.inc" +}; +LLVM_PACKED_END + +} + } // end namespace llvm namespace std { diff --git a/include/llvm/ProfileData/InstrProfData.inc b/include/llvm/ProfileData/InstrProfData.inc new file mode 100644 index 000000000000..48dae506cabb --- /dev/null +++ b/include/llvm/ProfileData/InstrProfData.inc @@ -0,0 +1,735 @@ +/*===-- InstrProfData.inc - instr profiling runtime structures -----------=== *\ +|* +|* The LLVM Compiler Infrastructure +|* +|* This file is distributed under the University of Illinois Open Source +|* License. See LICENSE.TXT for details. +|* +\*===----------------------------------------------------------------------===*/ +/* + * This is the master file that defines all the data structure, signature, + * constant literals that are shared across profiling runtime library, + * compiler (instrumentation), and host tools (reader/writer). The entities + * defined in this file affect the profile runtime ABI, the raw profile format, + * or both. + * + * The file has two identical copies. The master copy lives in LLVM and + * the other one sits in compiler-rt/lib/profile directory. To make changes + * in this file, first modify the master copy and copy it over to compiler-rt. + * Testing of any change in this file can start only after the two copies are + * synced up. + * + * The first part of the file includes macros that defines types, names, and + * initializers for the member fields of the core data structures. The field + * declarations for one structure is enabled by defining the field activation + * macro associated with that structure. Only one field activation record + * can be defined at one time and the rest definitions will be filtered out by + * the preprocessor. + * + * Examples of how the template is used to instantiate structure definition: + * 1. To declare a structure: + * + * struct ProfData { + * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \ + * Type Name; + * #include "llvm/ProfileData/InstrProfData.inc" + * }; + * + * 2. To construct LLVM type arrays for the struct type: + * + * Type *DataTypes[] = { + * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \ + * LLVMType, + * #include "llvm/ProfileData/InstrProfData.inc" + * }; + * + * 4. To construct constant array for the initializers: + * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \ + * Initializer, + * Constant *ConstantVals[] = { + * #include "llvm/ProfileData/InstrProfData.inc" + * }; + * + * + * The second part of the file includes definitions all other entities that + * are related to runtime ABI and format. When no field activation macro is + * defined, this file can be included to introduce the definitions. + * +\*===----------------------------------------------------------------------===*/ + +/* INSTR_PROF_DATA start. */ +/* Definition of member fields of the per-function control structure. */ +#ifndef INSTR_PROF_DATA +#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) +#else +#define INSTR_PROF_DATA_DEFINED +#endif + +INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NameSize, \ + ConstantInt::get(llvm::Type::getInt32Ty(Ctx), \ + NamePtr->getType()->getPointerElementType()->getArrayNumElements())) +INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumCounters, \ + ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumCounters)) +INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \ + ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \ + Inc->getHash()->getZExtValue())) +INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), NamePtr, \ + ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx))) +INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt64PtrTy(Ctx), CounterPtr, \ + ConstantExpr::getBitCast(CounterPtr, \ + llvm::Type::getInt64PtrTy(Ctx))) +INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), FunctionPointer, \ + FunctionAddr) +INSTR_PROF_DATA(IntPtrT, llvm::Type::getInt8PtrTy(Ctx), Values, \ + ConstantPointerNull::get(Int8PtrTy)) +INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \ + ConstantArray::get(Int16ArrayTy, Int16ArrayVals)) +#undef INSTR_PROF_DATA +/* INSTR_PROF_DATA end. */ + +/* INSTR_PROF_RAW_HEADER start */ +/* Definition of member fields of the raw profile header data structure. */ +#ifndef INSTR_PROF_RAW_HEADER +#define INSTR_PROF_RAW_HEADER(Type, Name, Initializer) +#else +#define INSTR_PROF_DATA_DEFINED +#endif +INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) +INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) +INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) +INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) +INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) +INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin) +INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) +INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) +INSTR_PROF_RAW_HEADER(uint64_t, ValueDataSize, ValueDataSize) +INSTR_PROF_RAW_HEADER(uint64_t, ValueDataDelta, (uintptr_t)ValueDataBegin) +#undef INSTR_PROF_RAW_HEADER +/* INSTR_PROF_RAW_HEADER end */ + +/* VALUE_PROF_FUNC_PARAM start */ +/* Definition of parameter types of the runtime API used to do value profiling + * for a given value site. + */ +#ifndef VALUE_PROF_FUNC_PARAM +#define VALUE_PROF_FUNC_PARAM(ArgType, ArgName, ArgLLVMType) +#define INSTR_PROF_COMMA +#else +#define INSTR_PROF_DATA_DEFINED +#define INSTR_PROF_COMMA , +#endif +VALUE_PROF_FUNC_PARAM(uint64_t, TargetValue, Type::getInt64Ty(Ctx)) \ + INSTR_PROF_COMMA +VALUE_PROF_FUNC_PARAM(void *, Data, Type::getInt8PtrTy(Ctx)) INSTR_PROF_COMMA +VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) +#undef VALUE_PROF_FUNC_PARAM +#undef INSTR_PROF_COMMA +/* VALUE_PROF_FUNC_PARAM end */ + +/* VALUE_PROF_KIND start */ +#ifndef VALUE_PROF_KIND +#define VALUE_PROF_KIND(Enumerator, Value) +#else +#define INSTR_PROF_DATA_DEFINED +#endif +VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0) +/* These two kinds must be the last to be + * declared. This is to make sure the string + * array created with the template can be + * indexed with the kind value. + */ +VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget) +VALUE_PROF_KIND(IPVK_Last, IPVK_IndirectCallTarget) + +#undef VALUE_PROF_KIND +/* VALUE_PROF_KIND end */ + +/* COVMAP_FUNC_RECORD start */ +/* Definition of member fields of the function record structure in coverage + * map. + */ +#ifndef COVMAP_FUNC_RECORD +#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Initializer) +#else +#define INSTR_PROF_DATA_DEFINED +#endif +COVMAP_FUNC_RECORD(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), \ + NamePtr, llvm::ConstantExpr::getBitCast(NamePtr, \ + llvm::Type::getInt8PtrTy(Ctx))) +COVMAP_FUNC_RECORD(const uint32_t, llvm::Type::getInt32Ty(Ctx), NameSize, \ + llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx),\ + NameValue.size())) +COVMAP_FUNC_RECORD(const uint32_t, llvm::Type::getInt32Ty(Ctx), DataSize, \ + llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx),\ + CoverageMapping.size())) +COVMAP_FUNC_RECORD(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \ + llvm::ConstantInt::get(llvm::Type::getInt64Ty(Ctx), FuncHash)) +#undef COVMAP_FUNC_RECORD +/* COVMAP_FUNC_RECORD end. */ + + +#ifdef INSTR_PROF_VALUE_PROF_DATA +#define INSTR_PROF_DATA_DEFINED + +/*! + * This is the header of the data structure that defines the on-disk + * layout of the value profile data of a particular kind for one function. + */ +typedef struct ValueProfRecord { + /* The kind of the value profile record. */ + uint32_t Kind; + /* + * The number of value profile sites. It is guaranteed to be non-zero; + * otherwise the record for this kind won't be emitted. + */ + uint32_t NumValueSites; + /* + * The first element of the array that stores the number of profiled + * values for each value site. The size of the array is NumValueSites. + * Since NumValueSites is greater than zero, there is at least one + * element in the array. + */ + uint8_t SiteCountArray[1]; + + /* + * The fake declaration is for documentation purpose only. + * Align the start of next field to be on 8 byte boundaries. + uint8_t Padding[X]; + */ + + /* The array of value profile data. The size of the array is the sum + * of all elements in SiteCountArray[]. + InstrProfValueData ValueData[]; + */ + +#ifdef __cplusplus + /*! + * \brief Return the number of value sites. + */ + uint32_t getNumValueSites() const { return NumValueSites; } + /*! + * \brief Read data from this record and save it to Record. + */ + void deserializeTo(InstrProfRecord &Record, + InstrProfRecord::ValueMapType *VMap); + /* + * In-place byte swap: + * Do byte swap for this instance. \c Old is the original order before + * the swap, and \c New is the New byte order. + */ + void swapBytes(support::endianness Old, support::endianness New); +#endif +} ValueProfRecord; + +/*! + * Per-function header/control data structure for value profiling + * data in indexed format. + */ +typedef struct ValueProfData { + /* + * Total size in bytes including this field. It must be a multiple + * of sizeof(uint64_t). + */ + uint32_t TotalSize; + /* + *The number of value profile kinds that has value profile data. + * In this implementation, a value profile kind is considered to + * have profile data if the number of value profile sites for the + * kind is not zero. More aggressively, the implementation can + * choose to check the actual data value: if none of the value sites + * has any profiled values, the kind can be skipped. + */ + uint32_t NumValueKinds; + + /* + * Following are a sequence of variable length records. The prefix/header + * of each record is defined by ValueProfRecord type. The number of + * records is NumValueKinds. + * ValueProfRecord Record_1; + * ValueProfRecord Record_N; + */ + +#if __cplusplus + /*! + * Return the total size in bytes of the on-disk value profile data + * given the data stored in Record. + */ + static uint32_t getSize(const InstrProfRecord &Record); + /*! + * Return a pointer to \c ValueProfData instance ready to be streamed. + */ + static std::unique_ptr + serializeFrom(const InstrProfRecord &Record); + /*! + * Check the integrity of the record. Return the error code when + * an error is detected, otherwise return instrprof_error::success. + */ + instrprof_error checkIntegrity(); + /*! + * Return a pointer to \c ValueProfileData instance ready to be read. + * All data in the instance are properly byte swapped. The input + * data is assumed to be in little endian order. + */ + static ErrorOr> + getValueProfData(const unsigned char *SrcBuffer, + const unsigned char *const SrcBufferEnd, + support::endianness SrcDataEndianness); + /*! + * Swap byte order from \c Endianness order to host byte order. + */ + void swapBytesToHost(support::endianness Endianness); + /*! + * Swap byte order from host byte order to \c Endianness order. + */ + void swapBytesFromHost(support::endianness Endianness); + /*! + * Return the total size of \c ValueProfileData. + */ + uint32_t getSize() const { return TotalSize; } + /*! + * Read data from this data and save it to \c Record. + */ + void deserializeTo(InstrProfRecord &Record, + InstrProfRecord::ValueMapType *VMap); + void operator delete(void *ptr) { ::operator delete(ptr); } +#endif +} ValueProfData; + +/* + * The closure is designed to abstact away two types of value profile data: + * - InstrProfRecord which is the primary data structure used to + * represent profile data in host tools (reader, writer, and profile-use) + * - value profile runtime data structure suitable to be used by C + * runtime library. + * + * Both sources of data need to serialize to disk/memory-buffer in common + * format: ValueProfData. The abstraction allows compiler-rt's raw profiler + * writer to share the same format and code with indexed profile writer. + * + * For documentation of the member methods below, refer to corresponding methods + * in class InstrProfRecord. + */ +typedef struct ValueProfRecordClosure { + const void *Record; + uint32_t (*GetNumValueKinds)(const void *Record); + uint32_t (*GetNumValueSites)(const void *Record, uint32_t VKind); + uint32_t (*GetNumValueData)(const void *Record, uint32_t VKind); + uint32_t (*GetNumValueDataForSite)(const void *R, uint32_t VK, uint32_t S); + + /* + * After extracting the value profile data from the value profile record, + * this method is used to map the in-memory value to on-disk value. If + * the method is null, value will be written out untranslated. + */ + uint64_t (*RemapValueData)(uint32_t, uint64_t Value); + void (*GetValueForSite)(const void *R, InstrProfValueData *Dst, uint32_t K, + uint32_t S, uint64_t (*Mapper)(uint32_t, uint64_t)); + ValueProfData *(*AllocValueProfData)(size_t TotalSizeInBytes); +} ValueProfRecordClosure; + +/* + * A wrapper struct that represents value profile runtime data. + * Like InstrProfRecord class which is used by profiling host tools, + * ValueProfRuntimeRecord also implements the abstract intefaces defined in + * ValueProfRecordClosure so that the runtime data can be serialized using + * shared C implementation. In this structure, NumValueSites and Nodes + * members are the primary fields while other fields hold the derived + * information for fast implementation of closure interfaces. + */ +typedef struct ValueProfRuntimeRecord { + /* Number of sites for each value profile kind. */ + const uint16_t *NumValueSites; + /* An array of linked-list headers. The size of of the array is the + * total number of value profile sites : sum(NumValueSites[*])). Each + * linked-list stores the values profiled for a value profile site. */ + ValueProfNode **Nodes; + + /* Total number of value profile kinds which have at least one + * value profile sites. */ + uint32_t NumValueKinds; + /* An array recording the number of values tracked at each site. + * The size of the array is TotalNumValueSites. */ + uint8_t *SiteCountArray[IPVK_Last + 1]; + ValueProfNode **NodesKind[IPVK_Last + 1]; +} ValueProfRuntimeRecord; + +/* Forward declarations of C interfaces. */ +int initializeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord, + const uint16_t *NumValueSites, + ValueProfNode **Nodes); +void finalizeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord); +uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record); +ValueProfData * +serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record, + ValueProfData *Dst); +uint32_t getNumValueKindsRT(const void *R); + +#undef INSTR_PROF_VALUE_PROF_DATA +#endif /* INSTR_PROF_VALUE_PROF_DATA */ + + +#ifdef INSTR_PROF_COMMON_API_IMPL +#define INSTR_PROF_DATA_DEFINED +#ifdef __cplusplus +#define INSTR_PROF_INLINE inline +#else +#define INSTR_PROF_INLINE +#endif + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif + +/*! + * \brief Return the \c ValueProfRecord header size including the + * padding bytes. + */ +INSTR_PROF_INLINE +uint32_t getValueProfRecordHeaderSize(uint32_t NumValueSites) { + uint32_t Size = offsetof(ValueProfRecord, SiteCountArray) + + sizeof(uint8_t) * NumValueSites; + /* Round the size to multiple of 8 bytes. */ + Size = (Size + 7) & ~7; + return Size; +} + +/*! + * \brief Return the total size of the value profile record including the + * header and the value data. + */ +INSTR_PROF_INLINE +uint32_t getValueProfRecordSize(uint32_t NumValueSites, + uint32_t NumValueData) { + return getValueProfRecordHeaderSize(NumValueSites) + + sizeof(InstrProfValueData) * NumValueData; +} + +/*! + * \brief Return the pointer to the start of value data array. + */ +INSTR_PROF_INLINE +InstrProfValueData *getValueProfRecordValueData(ValueProfRecord *This) { + return (InstrProfValueData *)((char *)This + getValueProfRecordHeaderSize( + This->NumValueSites)); +} + +/*! + * \brief Return the total number of value data for \c This record. + */ +INSTR_PROF_INLINE +uint32_t getValueProfRecordNumValueData(ValueProfRecord *This) { + uint32_t NumValueData = 0; + uint32_t I; + for (I = 0; I < This->NumValueSites; I++) + NumValueData += This->SiteCountArray[I]; + return NumValueData; +} + +/*! + * \brief Use this method to advance to the next \c This \c ValueProfRecord. + */ +INSTR_PROF_INLINE +ValueProfRecord *getValueProfRecordNext(ValueProfRecord *This) { + uint32_t NumValueData = getValueProfRecordNumValueData(This); + return (ValueProfRecord *)((char *)This + + getValueProfRecordSize(This->NumValueSites, + NumValueData)); +} + +/*! + * \brief Return the first \c ValueProfRecord instance. + */ +INSTR_PROF_INLINE +ValueProfRecord *getFirstValueProfRecord(ValueProfData *This) { + return (ValueProfRecord *)((char *)This + sizeof(ValueProfData)); +} + +/* Closure based interfaces. */ + +/*! + * Return the total size in bytes of the on-disk value profile data + * given the data stored in Record. + */ +uint32_t getValueProfDataSize(ValueProfRecordClosure *Closure) { + uint32_t Kind; + uint32_t TotalSize = sizeof(ValueProfData); + const void *Record = Closure->Record; + uint32_t NumValueKinds = Closure->GetNumValueKinds(Record); + if (NumValueKinds == 0) + return TotalSize; + + for (Kind = IPVK_First; Kind <= IPVK_Last; Kind++) { + uint32_t NumValueSites = Closure->GetNumValueSites(Record, Kind); + if (!NumValueSites) + continue; + TotalSize += getValueProfRecordSize(NumValueSites, + Closure->GetNumValueData(Record, Kind)); + } + return TotalSize; +} + +/*! + * Extract value profile data of a function for the profile kind \c ValueKind + * from the \c Closure and serialize the data into \c This record instance. + */ +void serializeValueProfRecordFrom(ValueProfRecord *This, + ValueProfRecordClosure *Closure, + uint32_t ValueKind, uint32_t NumValueSites) { + uint32_t S; + const void *Record = Closure->Record; + This->Kind = ValueKind; + This->NumValueSites = NumValueSites; + InstrProfValueData *DstVD = getValueProfRecordValueData(This); + + for (S = 0; S < NumValueSites; S++) { + uint32_t ND = Closure->GetNumValueDataForSite(Record, ValueKind, S); + This->SiteCountArray[S] = ND; + Closure->GetValueForSite(Record, DstVD, ValueKind, S, + Closure->RemapValueData); + DstVD += ND; + } +} + +/*! + * Extract value profile data of a function from the \c Closure + * and serialize the data into \c DstData if it is not NULL or heap + * memory allocated by the \c Closure's allocator method. + */ +ValueProfData *serializeValueProfDataFrom(ValueProfRecordClosure *Closure, + ValueProfData *DstData) { + uint32_t Kind; + uint32_t TotalSize = getValueProfDataSize(Closure); + + ValueProfData *VPD = + DstData ? DstData : Closure->AllocValueProfData(TotalSize); + + VPD->TotalSize = TotalSize; + VPD->NumValueKinds = Closure->GetNumValueKinds(Closure->Record); + ValueProfRecord *VR = getFirstValueProfRecord(VPD); + for (Kind = IPVK_First; Kind <= IPVK_Last; Kind++) { + uint32_t NumValueSites = Closure->GetNumValueSites(Closure->Record, Kind); + if (!NumValueSites) + continue; + serializeValueProfRecordFrom(VR, Closure, Kind, NumValueSites); + VR = getValueProfRecordNext(VR); + } + return VPD; +} + +/* + * The value profiler runtime library stores the value profile data + * for a given function in \c NumValueSites and \c Nodes structures. + * \c ValueProfRuntimeRecord class is used to encapsulate the runtime + * profile data and provides fast interfaces to retrieve the profile + * information. This interface is used to initialize the runtime record + * and pre-compute the information needed for efficient implementation + * of callbacks required by ValueProfRecordClosure class. + */ +int initializeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord, + const uint16_t *NumValueSites, + ValueProfNode **Nodes) { + unsigned I, J, S = 0, NumValueKinds = 0; + RuntimeRecord->NumValueSites = NumValueSites; + RuntimeRecord->Nodes = Nodes; + for (I = 0; I <= IPVK_Last; I++) { + uint16_t N = NumValueSites[I]; + if (!N) { + RuntimeRecord->SiteCountArray[I] = 0; + continue; + } + NumValueKinds++; + RuntimeRecord->SiteCountArray[I] = (uint8_t *)calloc(N, 1); + if (!RuntimeRecord->SiteCountArray[I]) + return 1; + RuntimeRecord->NodesKind[I] = Nodes ? &Nodes[S] : NULL; + for (J = 0; J < N; J++) { + /* Compute value count for each site. */ + uint32_t C = 0; + ValueProfNode *Site = Nodes ? RuntimeRecord->NodesKind[I][J] : NULL; + while (Site) { + C++; + Site = Site->Next; + } + if (C > UCHAR_MAX) + C = UCHAR_MAX; + RuntimeRecord->SiteCountArray[I][J] = C; + } + S += N; + } + RuntimeRecord->NumValueKinds = NumValueKinds; + return 0; +} + +void finalizeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord) { + unsigned I; + for (I = 0; I <= IPVK_Last; I++) { + if (RuntimeRecord->SiteCountArray[I]) + free(RuntimeRecord->SiteCountArray[I]); + } +} + +/* ValueProfRecordClosure Interface implementation for + * ValueProfDataRuntimeRecord. */ +uint32_t getNumValueKindsRT(const void *R) { + return ((const ValueProfRuntimeRecord *)R)->NumValueKinds; +} + +uint32_t getNumValueSitesRT(const void *R, uint32_t VK) { + return ((const ValueProfRuntimeRecord *)R)->NumValueSites[VK]; +} + +uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK, uint32_t S) { + const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; + return Record->SiteCountArray[VK][S]; +} + +uint32_t getNumValueDataRT(const void *R, uint32_t VK) { + unsigned I, S = 0; + const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; + if (Record->SiteCountArray[VK] == 0) + return 0; + for (I = 0; I < Record->NumValueSites[VK]; I++) + S += Record->SiteCountArray[VK][I]; + return S; +} + +void getValueForSiteRT(const void *R, InstrProfValueData *Dst, uint32_t VK, + uint32_t S, uint64_t (*Mapper)(uint32_t, uint64_t)) { + unsigned I, N = 0; + const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; + N = getNumValueDataForSiteRT(R, VK, S); + if (N == 0) + return; + ValueProfNode *VNode = Record->NodesKind[VK][S]; + for (I = 0; I < N; I++) { + Dst[I] = VNode->VData; + VNode = VNode->Next; + } +} + +ValueProfData *allocValueProfDataRT(size_t TotalSizeInBytes) { + return (ValueProfData *)calloc(TotalSizeInBytes, 1); +} + +static ValueProfRecordClosure RTRecordClosure = {0, + getNumValueKindsRT, + getNumValueSitesRT, + getNumValueDataRT, + getNumValueDataForSiteRT, + 0, + getValueForSiteRT, + allocValueProfDataRT}; + +/* + * Return the size of ValueProfData structure to store data + * recorded in the runtime record. + */ +uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record) { + RTRecordClosure.Record = Record; + return getValueProfDataSize(&RTRecordClosure); +} + +/* + * Return a ValueProfData instance that stores the data collected + * from runtime. If \c DstData is provided by the caller, the value + * profile data will be store in *DstData and DstData is returned, + * otherwise the method will allocate space for the value data and + * return pointer to the newly allocated space. + */ +ValueProfData * +serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record, + ValueProfData *DstData) { + RTRecordClosure.Record = Record; + return serializeValueProfDataFrom(&RTRecordClosure, DstData); +} + + +#undef INSTR_PROF_COMMON_API_IMPL +#endif /* INSTR_PROF_COMMON_API_IMPL */ + +/*============================================================================*/ + + +#ifndef INSTR_PROF_DATA_DEFINED + +#ifndef INSTR_PROF_DATA_INC_ +#define INSTR_PROF_DATA_INC_ + +/* Helper macros. */ +#define INSTR_PROF_SIMPLE_QUOTE(x) #x +#define INSTR_PROF_QUOTE(x) INSTR_PROF_SIMPLE_QUOTE(x) +#define INSTR_PROF_SIMPLE_CONCAT(x,y) x ## y +#define INSTR_PROF_CONCAT(x,y) INSTR_PROF_SIMPLE_CONCAT(x,y) + +/* Magic number to detect file format and endianness. + * Use 255 at one end, since no UTF-8 file can use that character. Avoid 0, + * so that utilities, like strings, don't grab it as a string. 129 is also + * invalid UTF-8, and high enough to be interesting. + * Use "lprofr" in the centre to stand for "LLVM Profile Raw", or "lprofR" + * for 32-bit platforms. + */ +#define INSTR_PROF_RAW_MAGIC_64 (uint64_t)255 << 56 | (uint64_t)'l' << 48 | \ + (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | \ + (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129 +#define INSTR_PROF_RAW_MAGIC_32 (uint64_t)255 << 56 | (uint64_t)'l' << 48 | \ + (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | \ + (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129 + +/* Raw profile format version. */ +#define INSTR_PROF_RAW_VERSION 2 + +/* Runtime section names and name strings. */ +#define INSTR_PROF_DATA_SECT_NAME __llvm_prf_data +#define INSTR_PROF_NAME_SECT_NAME __llvm_prf_names +#define INSTR_PROF_CNTS_SECT_NAME __llvm_prf_cnts + +#define INSTR_PROF_DATA_SECT_NAME_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_DATA_SECT_NAME) +#define INSTR_PROF_NAME_SECT_NAME_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_NAME_SECT_NAME) +#define INSTR_PROF_CNTS_SECT_NAME_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_CNTS_SECT_NAME) + +/* Macros to define start/stop section symbol for a given + * section on Linux. For instance + * INSTR_PROF_SECT_START(INSTR_PROF_DATA_SECT_NAME) will + * expand to __start___llvm_prof_data + */ +#define INSTR_PROF_SECT_START(Sect) \ + INSTR_PROF_CONCAT(__start_,Sect) +#define INSTR_PROF_SECT_STOP(Sect) \ + INSTR_PROF_CONCAT(__stop_,Sect) + +/* Value Profiling API linkage name. */ +#define INSTR_PROF_VALUE_PROF_FUNC __llvm_profile_instrument_target +#define INSTR_PROF_VALUE_PROF_FUNC_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_FUNC) + +/* InstrProfile per-function control data alignment. */ +#define INSTR_PROF_DATA_ALIGNMENT 8 + +/* The data structure that represents a tracked value by the + * value profiler. + */ +typedef struct InstrProfValueData { + /* Profiled value. */ + uint64_t Value; + /* Number of times the value appears in the training run. */ + uint64_t Count; +} InstrProfValueData; + +/* This is an internal data structure used by value profiler. It + * is defined here to allow serialization code sharing by LLVM + * to be used in unit test. + */ +typedef struct ValueProfNode { + InstrProfValueData VData; + struct ValueProfNode *Next; +} ValueProfNode; + +#endif /* INSTR_PROF_DATA_INC_ */ + +#else +#undef INSTR_PROF_DATA_DEFINED +#endif + diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h index f937e7d08d54..fed3e693e7a0 100644 --- a/include/llvm/ProfileData/InstrProfReader.h +++ b/include/llvm/ProfileData/InstrProfReader.h @@ -23,6 +23,7 @@ #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/OnDiskHashTable.h" +#include "llvm/Support/raw_ostream.h" #include namespace llvm { @@ -53,7 +54,7 @@ class InstrProfReader { std::error_code LastError; public: - InstrProfReader() : LastError(instrprof_error::success) {} + InstrProfReader() : LastError(instrprof_error::success), Symtab() {} virtual ~InstrProfReader() {} /// Read the header. Required before reading first record. @@ -64,7 +65,20 @@ public: InstrProfIterator begin() { return InstrProfIterator(this); } InstrProfIterator end() { return InstrProfIterator(); } + /// Return the PGO symtab. There are three different readers: + /// Raw, Text, and Indexed profile readers. The first two types + /// of readers are used only by llvm-profdata tool, while the indexed + /// profile reader is also used by llvm-cov tool and the compiler ( + /// backend or frontend). Since creating PGO symtab can create + /// significant runtime and memory overhead (as it touches data + /// for the whole program), InstrProfSymtab for the indexed profile + /// reader should be created on demand and it is recommended to be + /// only used for dumping purpose with llvm-proftool, not with the + /// compiler. + virtual InstrProfSymtab &getSymtab() = 0; + protected: + std::unique_ptr Symtab; /// Set the current std::error_code and return same. std::error_code error(std::error_code EC) { LastError = EC; @@ -107,14 +121,24 @@ private: TextInstrProfReader(const TextInstrProfReader &) = delete; TextInstrProfReader &operator=(const TextInstrProfReader &) = delete; + std::error_code readValueProfileData(InstrProfRecord &Record); + public: TextInstrProfReader(std::unique_ptr DataBuffer_) : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {} + /// Return true if the given buffer is in text instrprof format. + static bool hasFormat(const MemoryBuffer &Buffer); + /// Read the header. - std::error_code readHeader() override { return success(); } + std::error_code readHeader() override; /// Read a single record. std::error_code readNextRecord(InstrProfRecord &Record) override; + + InstrProfSymtab &getSymtab() override { + assert(Symtab.get()); + return *Symtab.get(); + } }; /// Reader for the raw instrprof binary format from runtime. @@ -129,31 +153,19 @@ class RawInstrProfReader : public InstrProfReader { private: /// The profile data file contents. std::unique_ptr DataBuffer; - struct ProfileData { - const uint32_t NameSize; - const uint32_t NumCounters; - const uint64_t FuncHash; - const IntPtrT NamePtr; - const IntPtrT CounterPtr; - }; - struct RawHeader { - const uint64_t Magic; - const uint64_t Version; - const uint64_t DataSize; - const uint64_t CountersSize; - const uint64_t NamesSize; - const uint64_t CountersDelta; - const uint64_t NamesDelta; - }; - bool ShouldSwapBytes; uint64_t CountersDelta; uint64_t NamesDelta; - const ProfileData *Data; - const ProfileData *DataEnd; + const RawInstrProf::ProfileData *Data; + const RawInstrProf::ProfileData *DataEnd; const uint64_t *CountersStart; const char *NamesStart; + const uint8_t *ValueDataStart; const char *ProfileEnd; + uint32_t ValueKindLast; + uint32_t CurValueDataSize; + + InstrProfRecord::ValueMapType FunctionPtrToNameMap; RawInstrProfReader(const RawInstrProfReader &) = delete; RawInstrProfReader &operator=(const RawInstrProfReader &) = delete; @@ -165,13 +177,41 @@ public: std::error_code readHeader() override; std::error_code readNextRecord(InstrProfRecord &Record) override; + InstrProfSymtab &getSymtab() override { + assert(Symtab.get()); + return *Symtab.get(); + } + private: + void createSymtab(InstrProfSymtab &Symtab); std::error_code readNextHeader(const char *CurrentPos); - std::error_code readHeader(const RawHeader &Header); - template - IntT swap(IntT Int) const { + std::error_code readHeader(const RawInstrProf::Header &Header); + template IntT swap(IntT Int) const { return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int; } + support::endianness getDataEndianness() const { + support::endianness HostEndian = getHostEndianness(); + if (!ShouldSwapBytes) + return HostEndian; + if (HostEndian == support::little) + return support::big; + else + return support::little; + } + + inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) { + return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t)); + } + std::error_code readName(InstrProfRecord &Record); + std::error_code readFuncHash(InstrProfRecord &Record); + std::error_code readRawCounts(InstrProfRecord &Record); + std::error_code readValueProfilingData(InstrProfRecord &Record); + bool atEnd() const { return Data == DataEnd; } + void advanceData() { + Data++; + ValueDataStart += CurValueDataSize; + } + const uint64_t *getCounter(IntPtrT CounterPtr) const { ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t); return CountersStart + Offset; @@ -195,10 +235,15 @@ class InstrProfLookupTrait { std::vector DataBuffer; IndexedInstrProf::HashT HashType; unsigned FormatVersion; + // Endianness of the input value profile data. + // It should be LE by default, but can be changed + // for testing purpose. + support::endianness ValueProfDataEndianness; public: InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion) - : HashType(HashType), FormatVersion(FormatVersion) {} + : HashType(HashType), FormatVersion(FormatVersion), + ValueProfDataEndianness(support::little) {} typedef ArrayRef data_type; @@ -209,6 +254,7 @@ public: static bool EqualKey(StringRef A, StringRef B) { return A == B; } static StringRef GetInternalKey(StringRef K) { return K; } + static StringRef GetExternalKey(StringRef K) { return K; } hash_value_type ComputeHash(StringRef K); @@ -224,11 +270,64 @@ public: return StringRef((const char *)D, N); } + bool readValueProfilingData(const unsigned char *&D, + const unsigned char *const End); data_type ReadData(StringRef K, const unsigned char *D, offset_type N); + + // Used for testing purpose only. + void setValueProfDataEndianness(support::endianness Endianness) { + ValueProfDataEndianness = Endianness; + } +}; + +struct InstrProfReaderIndexBase { + // Read all the profile records with the same key pointed to the current + // iterator. + virtual std::error_code getRecords(ArrayRef &Data) = 0; + // Read all the profile records with the key equal to FuncName + virtual std::error_code getRecords(StringRef FuncName, + ArrayRef &Data) = 0; + virtual void advanceToNextKey() = 0; + virtual bool atEnd() const = 0; + virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; + virtual ~InstrProfReaderIndexBase() {} + virtual uint64_t getVersion() const = 0; + virtual void populateSymtab(InstrProfSymtab &) = 0; }; typedef OnDiskIterableChainedHashTable - InstrProfReaderIndex; + OnDiskHashTableImplV3; + +template +class InstrProfReaderIndex : public InstrProfReaderIndexBase { + +private: + std::unique_ptr HashTable; + typename HashTableImpl::data_iterator RecordIterator; + uint64_t FormatVersion; + +public: + InstrProfReaderIndex(const unsigned char *Buckets, + const unsigned char *const Payload, + const unsigned char *const Base, + IndexedInstrProf::HashT HashType, uint64_t Version); + + std::error_code getRecords(ArrayRef &Data) override; + std::error_code getRecords(StringRef FuncName, + ArrayRef &Data) override; + void advanceToNextKey() override { RecordIterator++; } + bool atEnd() const override { + return RecordIterator == HashTable->data_end(); + } + void setValueProfDataEndianness(support::endianness Endianness) override { + HashTable->getInfoObj().setValueProfDataEndianness(Endianness); + } + ~InstrProfReaderIndex() override {} + uint64_t getVersion() const override { return FormatVersion; } + void populateSymtab(InstrProfSymtab &Symtab) override { + Symtab.create(HashTable->keys()); + } +}; /// Reader for the indexed binary instrprof format. class IndexedInstrProfReader : public InstrProfReader { @@ -236,17 +335,15 @@ private: /// The profile data file contents. std::unique_ptr DataBuffer; /// The index into the profile data. - std::unique_ptr Index; - /// Iterator over the profile data. - InstrProfReaderIndex::data_iterator RecordIterator; - /// The file format version of the profile data. - uint64_t FormatVersion; + std::unique_ptr Index; /// The maximal execution count among all functions. uint64_t MaxFunctionCount; IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; + public: + uint64_t getVersion() const { return Index->getVersion(); } IndexedInstrProfReader(std::unique_ptr DataBuffer) : DataBuffer(std::move(DataBuffer)), Index(nullptr) {} @@ -258,9 +355,15 @@ public: /// Read a single record. std::error_code readNextRecord(InstrProfRecord &Record) override; + /// Return the pointer to InstrProfRecord associated with FuncName + /// and FuncHash + ErrorOr getInstrProfRecord(StringRef FuncName, + uint64_t FuncHash); + /// Fill Counts with the profile data for the given function name. std::error_code getFunctionCounts(StringRef FuncName, uint64_t FuncHash, std::vector &Counts); + /// Return the maximum of all known function counts. uint64_t getMaximumFunctionCount() { return MaxFunctionCount; } @@ -270,6 +373,16 @@ public: static ErrorOr> create(std::unique_ptr Buffer); + + // Used for testing purpose only. + void setValueProfDataEndianness(support::endianness Endianness) { + Index->setValueProfDataEndianness(Endianness); + } + + // See description in the base class. This interface is designed + // to be used by llvm-profdata (for dumping). Avoid using this when + // the client is the compiler. + InstrProfSymtab &getSymtab() override; }; } // end namespace llvm diff --git a/include/llvm/ProfileData/InstrProfWriter.h b/include/llvm/ProfileData/InstrProfWriter.h index ce0bb5242498..e7f53de051c3 100644 --- a/include/llvm/ProfileData/InstrProfWriter.h +++ b/include/llvm/ProfileData/InstrProfWriter.h @@ -15,38 +15,43 @@ #ifndef LLVM_PROFILEDATA_INSTRPROFWRITER_H #define LLVM_PROFILEDATA_INSTRPROFWRITER_H -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" -#include namespace llvm { /// Writer for instrumentation based profile data. class InstrProfWriter { public: - typedef SmallDenseMap, 1> CounterData; + typedef SmallDenseMap ProfilingData; + private: - StringMap FunctionData; + StringMap FunctionData; uint64_t MaxFunctionCount; + public: InstrProfWriter() : MaxFunctionCount(0) {} /// Add function counts for the given function. If there are already counts /// for this function and the hash and number of counts match, each counter is - /// summed. - std::error_code addFunctionCounts(StringRef FunctionName, - uint64_t FunctionHash, - ArrayRef Counters); + /// summed. Optionally scale counts by \p Weight. + std::error_code addRecord(InstrProfRecord &&I, uint64_t Weight = 1); /// Write the profile to \c OS void write(raw_fd_ostream &OS); + /// Write the profile in text format to \c OS + void writeText(raw_fd_ostream &OS); + /// Write \c Record in text format to \c OS + static void writeRecordInText(const InstrProfRecord &Record, + InstrProfSymtab &Symtab, raw_fd_ostream &OS); /// Write the profile, returning the raw data. For testing. std::unique_ptr writeBuffer(); + // Internal interface for testing purpose only. + void setValueProfDataEndianness(support::endianness Endianness); + private: std::pair writeImpl(raw_ostream &OS); }; diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h index 1b82e55aa77a..8df3fe803209 100644 --- a/include/llvm/ProfileData/SampleProf.h +++ b/include/llvm/ProfileData/SampleProf.h @@ -11,14 +11,17 @@ // sample profile data. // //===----------------------------------------------------------------------===// + #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H_ #define LLVM_PROFILEDATA_SAMPLEPROF_H_ -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorOr.h" #include "llvm/Support/raw_ostream.h" + +#include #include namespace llvm { @@ -32,13 +35,27 @@ enum class sampleprof_error { too_large, truncated, malformed, - unrecognized_format + unrecognized_format, + unsupported_writing_format, + truncated_name_table, + not_implemented, + counter_overflow }; inline std::error_code make_error_code(sampleprof_error E) { return std::error_code(static_cast(E), sampleprof_category()); } +inline sampleprof_error MergeResult(sampleprof_error &Accumulator, + sampleprof_error Result) { + // Prefer first error encountered as later errors may be secondary effects of + // the initial problem. + if (Accumulator == sampleprof_error::success && + Result != sampleprof_error::success) + Accumulator = Result; + return Accumulator; +} + } // end namespace llvm namespace std { @@ -57,7 +74,7 @@ static inline uint64_t SPMagic() { uint64_t('2') << (64 - 56) | uint64_t(0xff); } -static inline uint64_t SPVersion() { return 100; } +static inline uint64_t SPVersion() { return 102; } /// Represents the relative location of an instruction. /// @@ -69,36 +86,36 @@ static inline uint64_t SPVersion() { return 100; } /// that are on the same line but belong to different basic blocks /// (e.g., the two post-increment instructions in "if (p) x++; else y++;"). struct LineLocation { - LineLocation(int L, unsigned D) : LineOffset(L), Discriminator(D) {} - int LineOffset; - unsigned Discriminator; + LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {} + void print(raw_ostream &OS) const; + void dump() const; + bool operator<(const LineLocation &O) const { + return LineOffset < O.LineOffset || + (LineOffset == O.LineOffset && Discriminator < O.Discriminator); + } + + uint32_t LineOffset; + uint32_t Discriminator; }; -} // End namespace sampleprof +raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc); -template <> struct DenseMapInfo { - typedef DenseMapInfo OffsetInfo; - typedef DenseMapInfo DiscriminatorInfo; - static inline sampleprof::LineLocation getEmptyKey() { - return sampleprof::LineLocation(OffsetInfo::getEmptyKey(), - DiscriminatorInfo::getEmptyKey()); - } - static inline sampleprof::LineLocation getTombstoneKey() { - return sampleprof::LineLocation(OffsetInfo::getTombstoneKey(), - DiscriminatorInfo::getTombstoneKey()); - } - static inline unsigned getHashValue(sampleprof::LineLocation Val) { - return DenseMapInfo>::getHashValue( - std::pair(Val.LineOffset, Val.Discriminator)); - } - static inline bool isEqual(sampleprof::LineLocation LHS, - sampleprof::LineLocation RHS) { - return LHS.LineOffset == RHS.LineOffset && - LHS.Discriminator == RHS.Discriminator; - } +/// Represents the relative location of a callsite. +/// +/// Callsite locations are specified by the line offset from the +/// beginning of the function (marked by the line where the function +/// head is), the discriminator value within that line, and the callee +/// function name. +struct CallsiteLocation : public LineLocation { + CallsiteLocation(uint32_t L, uint32_t D, StringRef N) + : LineLocation(L, D), CalleeName(N) {} + void print(raw_ostream &OS) const; + void dump() const; + + StringRef CalleeName; }; -namespace sampleprof { +raw_ostream &operator<<(raw_ostream &OS, const CallsiteLocation &Loc); /// Representation of a single sample record. /// @@ -112,52 +129,79 @@ namespace sampleprof { /// will be a list of one or more functions. class SampleRecord { public: - typedef StringMap CallTargetMap; + typedef StringMap CallTargetMap; SampleRecord() : NumSamples(0), CallTargets() {} /// Increment the number of samples for this record by \p S. + /// Optionally scale sample count \p S by \p Weight. /// /// Sample counts accumulate using saturating arithmetic, to avoid wrapping /// around unsigned integers. - void addSamples(unsigned S) { - if (NumSamples <= std::numeric_limits::max() - S) - NumSamples += S; - else - NumSamples = std::numeric_limits::max(); + sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) { + bool Overflowed; + if (Weight > 1) { + S = SaturatingMultiply(S, Weight, &Overflowed); + if (Overflowed) + return sampleprof_error::counter_overflow; + } + NumSamples = SaturatingAdd(NumSamples, S, &Overflowed); + if (Overflowed) + return sampleprof_error::counter_overflow; + + return sampleprof_error::success; } /// Add called function \p F with samples \p S. + /// Optionally scale sample count \p S by \p Weight. /// /// Sample counts accumulate using saturating arithmetic, to avoid wrapping /// around unsigned integers. - void addCalledTarget(StringRef F, unsigned S) { - unsigned &TargetSamples = CallTargets[F]; - if (TargetSamples <= std::numeric_limits::max() - S) - TargetSamples += S; - else - TargetSamples = std::numeric_limits::max(); + sampleprof_error addCalledTarget(StringRef F, uint64_t S, + uint64_t Weight = 1) { + uint64_t &TargetSamples = CallTargets[F]; + bool Overflowed; + if (Weight > 1) { + S = SaturatingMultiply(S, Weight, &Overflowed); + if (Overflowed) + return sampleprof_error::counter_overflow; + } + TargetSamples = SaturatingAdd(TargetSamples, S, &Overflowed); + if (Overflowed) + return sampleprof_error::counter_overflow; + + return sampleprof_error::success; } /// Return true if this sample record contains function calls. bool hasCalls() const { return CallTargets.size() > 0; } - unsigned getSamples() const { return NumSamples; } + uint64_t getSamples() const { return NumSamples; } const CallTargetMap &getCallTargets() const { return CallTargets; } /// Merge the samples in \p Other into this record. - void merge(const SampleRecord &Other) { - addSamples(Other.getSamples()); - for (const auto &I : Other.getCallTargets()) - addCalledTarget(I.first(), I.second); + /// Optionally scale sample counts by \p Weight. + sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) { + sampleprof_error Result = addSamples(Other.getSamples(), Weight); + for (const auto &I : Other.getCallTargets()) { + MergeResult(Result, addCalledTarget(I.first(), I.second, Weight)); + } + return Result; } + void print(raw_ostream &OS, unsigned Indent) const; + void dump() const; + private: - unsigned NumSamples; + uint64_t NumSamples; CallTargetMap CallTargets; }; -typedef DenseMap BodySampleMap; +raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); + +typedef std::map BodySampleMap; +class FunctionSamples; +typedef std::map CallsiteSampleMap; /// Representation of the samples collected for a function. /// @@ -167,59 +211,109 @@ typedef DenseMap BodySampleMap; class FunctionSamples { public: FunctionSamples() : TotalSamples(0), TotalHeadSamples(0) {} - void print(raw_ostream &OS = dbgs()); - void addTotalSamples(unsigned Num) { TotalSamples += Num; } - void addHeadSamples(unsigned Num) { TotalHeadSamples += Num; } - void addBodySamples(int LineOffset, unsigned Discriminator, unsigned Num) { - assert(LineOffset >= 0); - // When dealing with instruction weights, we use the value - // zero to indicate the absence of a sample. If we read an - // actual zero from the profile file, use the value 1 to - // avoid the confusion later on. - if (Num == 0) - Num = 1; - BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(Num); - } - void addCalledTargetSamples(int LineOffset, unsigned Discriminator, - std::string FName, unsigned Num) { - assert(LineOffset >= 0); - BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(FName, - Num); - } + void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const; + void dump() const; + sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) { + bool Overflowed; + if (Weight > 1) { + Num = SaturatingMultiply(Num, Weight, &Overflowed); + if (Overflowed) + return sampleprof_error::counter_overflow; + } + TotalSamples = SaturatingAdd(TotalSamples, Num, &Overflowed); + if (Overflowed) + return sampleprof_error::counter_overflow; - /// Return the sample record at the given location. - /// Each location is specified by \p LineOffset and \p Discriminator. - SampleRecord &sampleRecordAt(const LineLocation &Loc) { - return BodySamples[Loc]; + return sampleprof_error::success; + } + sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { + bool Overflowed; + if (Weight > 1) { + Num = SaturatingMultiply(Num, Weight, &Overflowed); + if (Overflowed) + return sampleprof_error::counter_overflow; + } + TotalHeadSamples = SaturatingAdd(TotalHeadSamples, Num, &Overflowed); + if (Overflowed) + return sampleprof_error::counter_overflow; + + return sampleprof_error::success; + } + sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator, + uint64_t Num, uint64_t Weight = 1) { + return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples( + Num, Weight); + } + sampleprof_error addCalledTargetSamples(uint32_t LineOffset, + uint32_t Discriminator, + std::string FName, uint64_t Num, + uint64_t Weight = 1) { + return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget( + FName, Num, Weight); } /// Return the number of samples collected at the given location. /// Each location is specified by \p LineOffset and \p Discriminator. - unsigned samplesAt(int LineOffset, unsigned Discriminator) { - return sampleRecordAt(LineLocation(LineOffset, Discriminator)).getSamples(); + /// If the location is not found in profile, return error. + ErrorOr findSamplesAt(uint32_t LineOffset, + uint32_t Discriminator) const { + const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator)); + if (ret == BodySamples.end()) + return std::error_code(); + else + return ret->second.getSamples(); } - bool empty() const { return BodySamples.empty(); } + /// Return the function samples at the given callsite location. + FunctionSamples &functionSamplesAt(const CallsiteLocation &Loc) { + return CallsiteSamples[Loc]; + } + + /// Return a pointer to function samples at the given callsite location. + const FunctionSamples * + findFunctionSamplesAt(const CallsiteLocation &Loc) const { + auto iter = CallsiteSamples.find(Loc); + if (iter == CallsiteSamples.end()) { + return nullptr; + } else { + return &iter->second; + } + } + + bool empty() const { return TotalSamples == 0; } /// Return the total number of samples collected inside the function. - unsigned getTotalSamples() const { return TotalSamples; } + uint64_t getTotalSamples() const { return TotalSamples; } /// Return the total number of samples collected at the head of the /// function. - unsigned getHeadSamples() const { return TotalHeadSamples; } + uint64_t getHeadSamples() const { return TotalHeadSamples; } /// Return all the samples collected in the body of the function. const BodySampleMap &getBodySamples() const { return BodySamples; } + /// Return all the callsite samples collected in the body of the function. + const CallsiteSampleMap &getCallsiteSamples() const { + return CallsiteSamples; + } + /// Merge the samples in \p Other into this one. - void merge(const FunctionSamples &Other) { - addTotalSamples(Other.getTotalSamples()); - addHeadSamples(Other.getHeadSamples()); + /// Optionally scale samples by \p Weight. + sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) { + sampleprof_error Result = sampleprof_error::success; + MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight)); + MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight)); for (const auto &I : Other.getBodySamples()) { const LineLocation &Loc = I.first; const SampleRecord &Rec = I.second; - sampleRecordAt(Loc).merge(Rec); + MergeResult(Result, BodySamples[Loc].merge(Rec, Weight)); } + for (const auto &I : Other.getCallsiteSamples()) { + const CallsiteLocation &Loc = I.first; + const FunctionSamples &Rec = I.second; + MergeResult(Result, functionSamplesAt(Loc).merge(Rec, Weight)); + } + return Result; } private: @@ -227,12 +321,12 @@ private: /// /// Samples are cumulative, they include all the samples collected /// inside this function and all its inlined callees. - unsigned TotalSamples; + uint64_t TotalSamples; /// Total number of samples collected at the head of the function. /// This is an approximation of the number of calls made to this function /// at runtime. - unsigned TotalHeadSamples; + uint64_t TotalHeadSamples; /// Map instruction locations to collected samples. /// @@ -240,10 +334,53 @@ private: /// collected at the corresponding line offset. All line locations /// are an offset from the start of the function. BodySampleMap BodySamples; + + /// Map call sites to collected samples for the called function. + /// + /// Each entry in this map corresponds to all the samples + /// collected for the inlined function call at the given + /// location. For example, given: + /// + /// void foo() { + /// 1 bar(); + /// ... + /// 8 baz(); + /// } + /// + /// If the bar() and baz() calls were inlined inside foo(), this + /// map will contain two entries. One for all the samples collected + /// in the call to bar() at line offset 1, the other for all the samples + /// collected in the call to baz() at line offset 8. + CallsiteSampleMap CallsiteSamples; }; -} // End namespace sampleprof +raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS); -} // End namespace llvm +/// Sort a LocationT->SampleT map by LocationT. +/// +/// It produces a sorted list of records by ascending +/// order of LocationT. +template class SampleSorter { +public: + typedef std::pair SamplesWithLoc; + typedef SmallVector SamplesWithLocList; + + SampleSorter(const std::map &Samples) { + for (const auto &I : Samples) + V.push_back(&I); + std::stable_sort(V.begin(), V.end(), + [](const SamplesWithLoc *A, const SamplesWithLoc *B) { + return A->first < B->first; + }); + } + const SamplesWithLocList &get() const { return V; } + +private: + SamplesWithLocList V; +}; + +} // end namespace sampleprof + +} // end namespace llvm #endif // LLVM_PROFILEDATA_SAMPLEPROF_H_ diff --git a/include/llvm/ProfileData/SampleProfReader.h b/include/llvm/ProfileData/SampleProfReader.h index c082a1abe951..6db0fbb0e7ab 100644 --- a/include/llvm/ProfileData/SampleProfReader.h +++ b/include/llvm/ProfileData/SampleProfReader.h @@ -9,11 +9,181 @@ // // This file contains definitions needed for reading sample profiles. // +// NOTE: If you are making changes to this file format, please remember +// to document them in the Clang documentation at +// tools/clang/docs/UsersManual.rst. +// +// Text format +// ----------- +// +// Sample profiles are written as ASCII text. The file is divided into +// sections, which correspond to each of the functions executed at runtime. +// Each section has the following format +// +// function1:total_samples:total_head_samples +// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ] +// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ] +// ... +// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ] +// offsetA[.discriminator]: fnA:num_of_total_samples +// offsetA1[.discriminator]: number_of_samples [fn7:num fn8:num ... ] +// ... +// +// This is a nested tree in which the identation represents the nesting level +// of the inline stack. There are no blank lines in the file. And the spacing +// within a single line is fixed. Additional spaces will result in an error +// while reading the file. +// +// Any line starting with the '#' character is completely ignored. +// +// Inlined calls are represented with indentation. The Inline stack is a +// stack of source locations in which the top of the stack represents the +// leaf function, and the bottom of the stack represents the actual +// symbol to which the instruction belongs. +// +// Function names must be mangled in order for the profile loader to +// match them in the current translation unit. The two numbers in the +// function header specify how many total samples were accumulated in the +// function (first number), and the total number of samples accumulated +// in the prologue of the function (second number). This head sample +// count provides an indicator of how frequently the function is invoked. +// +// There are two types of lines in the function body. +// +// * Sampled line represents the profile information of a source location. +// * Callsite line represents the profile information of a callsite. +// +// Each sampled line may contain several items. Some are optional (marked +// below): +// +// a. Source line offset. This number represents the line number +// in the function where the sample was collected. The line number is +// always relative to the line where symbol of the function is +// defined. So, if the function has its header at line 280, the offset +// 13 is at line 293 in the file. +// +// Note that this offset should never be a negative number. This could +// happen in cases like macros. The debug machinery will register the +// line number at the point of macro expansion. So, if the macro was +// expanded in a line before the start of the function, the profile +// converter should emit a 0 as the offset (this means that the optimizers +// will not be able to associate a meaningful weight to the instructions +// in the macro). +// +// b. [OPTIONAL] Discriminator. This is used if the sampled program +// was compiled with DWARF discriminator support +// (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators). +// DWARF discriminators are unsigned integer values that allow the +// compiler to distinguish between multiple execution paths on the +// same source line location. +// +// For example, consider the line of code ``if (cond) foo(); else bar();``. +// If the predicate ``cond`` is true 80% of the time, then the edge +// into function ``foo`` should be considered to be taken most of the +// time. But both calls to ``foo`` and ``bar`` are at the same source +// line, so a sample count at that line is not sufficient. The +// compiler needs to know which part of that line is taken more +// frequently. +// +// This is what discriminators provide. In this case, the calls to +// ``foo`` and ``bar`` will be at the same line, but will have +// different discriminator values. This allows the compiler to correctly +// set edge weights into ``foo`` and ``bar``. +// +// c. Number of samples. This is an integer quantity representing the +// number of samples collected by the profiler at this source +// location. +// +// d. [OPTIONAL] Potential call targets and samples. If present, this +// line contains a call instruction. This models both direct and +// number of samples. For example, +// +// 130: 7 foo:3 bar:2 baz:7 +// +// The above means that at relative line offset 130 there is a call +// instruction that calls one of ``foo()``, ``bar()`` and ``baz()``, +// with ``baz()`` being the relatively more frequently called target. +// +// Each callsite line may contain several items. Some are optional. +// +// a. Source line offset. This number represents the line number of the +// callsite that is inlined in the profiled binary. +// +// b. [OPTIONAL] Discriminator. Same as the discriminator for sampled line. +// +// c. Number of samples. This is an integer quantity representing the +// total number of samples collected for the inlined instance at this +// callsite +// +// +// Binary format +// ------------- +// +// This is a more compact encoding. Numbers are encoded as ULEB128 values +// and all strings are encoded in a name table. The file is organized in +// the following sections: +// +// MAGIC (uint64_t) +// File identifier computed by function SPMagic() (0x5350524f463432ff) +// +// VERSION (uint32_t) +// File format version number computed by SPVersion() +// +// NAME TABLE +// SIZE (uint32_t) +// Number of entries in the name table. +// NAMES +// A NUL-separated list of SIZE strings. +// +// FUNCTION BODY (one for each uninlined function body present in the profile) +// HEAD_SAMPLES (uint64_t) [only for top-level functions] +// Total number of samples collected at the head (prologue) of the +// function. +// NOTE: This field should only be present for top-level functions +// (i.e., not inlined into any caller). Inlined function calls +// have no prologue, so they don't need this. +// NAME_IDX (uint32_t) +// Index into the name table indicating the function name. +// SAMPLES (uint64_t) +// Total number of samples collected in this function. +// NRECS (uint32_t) +// Total number of sampling records this function's profile. +// BODY RECORDS +// A list of NRECS entries. Each entry contains: +// OFFSET (uint32_t) +// Line offset from the start of the function. +// DISCRIMINATOR (uint32_t) +// Discriminator value (see description of discriminators +// in the text format documentation above). +// SAMPLES (uint64_t) +// Number of samples collected at this location. +// NUM_CALLS (uint32_t) +// Number of non-inlined function calls made at this location. In the +// case of direct calls, this number will always be 1. For indirect +// calls (virtual functions and function pointers) this will +// represent all the actual functions called at runtime. +// CALL_TARGETS +// A list of NUM_CALLS entries for each called function: +// NAME_IDX (uint32_t) +// Index into the name table with the callee name. +// SAMPLES (uint64_t) +// Number of samples collected at the call site. +// NUM_INLINED_FUNCTIONS (uint32_t) +// Number of callees inlined into this function. +// INLINED FUNCTION RECORDS +// A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined +// callees. +// OFFSET (uint32_t) +// Line offset from the start of the function. +// DISCRIMINATOR (uint32_t) +// Discriminator value (see description of discriminators +// in the text format documentation above). +// FUNCTION BODY +// A FUNCTION BODY entry describing the inlined function. //===----------------------------------------------------------------------===// #ifndef LLVM_PROFILEDATA_SAMPLEPROFREADER_H #define LLVM_PROFILEDATA_SAMPLEPROFREADER_H -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" @@ -24,6 +194,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/GCOV.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" @@ -57,7 +228,7 @@ namespace sampleprof { /// /// The reader supports two file formats: text and binary. The text format /// is useful for debugging and testing, while the binary format is more -/// compact. They can both be used interchangeably. +/// compact and I/O efficient. They can both be used interchangeably. class SampleProfileReader { public: SampleProfileReader(std::unique_ptr B, LLVMContext &C) @@ -86,7 +257,7 @@ public: StringMap &getProfiles() { return Profiles; } /// \brief Report a parse error message. - void reportParseError(int64_t LineNumber, Twine Msg) const { + void reportError(int64_t LineNumber, Twine Msg) const { Ctx.diagnose(DiagnosticInfoSampleProfile(Buffer->getBufferIdentifier(), LineNumber, Msg)); } @@ -95,6 +266,10 @@ public: static ErrorOr> create(StringRef Filename, LLVMContext &C); + /// \brief Create a sample profile reader from the supplied memory buffer. + static ErrorOr> + create(std::unique_ptr &B, LLVMContext &C); + protected: /// \brief Map every function to its associated profile. /// @@ -120,6 +295,9 @@ public: /// \brief Read sample profiles from the associated file. std::error_code read() override; + + /// \brief Return true if \p Buffer is in the format supported by this class. + static bool hasFormat(const MemoryBuffer &Buffer); }; class SampleProfileReaderBinary : public SampleProfileReader { @@ -153,14 +331,75 @@ protected: /// \returns the read value. ErrorOr readString(); + /// Read a string indirectly via the name table. + ErrorOr readStringFromTable(); + /// \brief Return true if we've reached the end of file. bool at_eof() const { return Data >= End; } + /// Read the contents of the given profile instance. + std::error_code readProfile(FunctionSamples &FProfile); + /// \brief Points to the current location in the buffer. const uint8_t *Data; /// \brief Points to the end of the buffer. const uint8_t *End; + + /// Function name table. + std::vector NameTable; +}; + +typedef SmallVector InlineCallStack; + +// Supported histogram types in GCC. Currently, we only need support for +// call target histograms. +enum HistType { + HIST_TYPE_INTERVAL, + HIST_TYPE_POW2, + HIST_TYPE_SINGLE_VALUE, + HIST_TYPE_CONST_DELTA, + HIST_TYPE_INDIR_CALL, + HIST_TYPE_AVERAGE, + HIST_TYPE_IOR, + HIST_TYPE_INDIR_CALL_TOPN +}; + +class SampleProfileReaderGCC : public SampleProfileReader { +public: + SampleProfileReaderGCC(std::unique_ptr B, LLVMContext &C) + : SampleProfileReader(std::move(B), C), GcovBuffer(Buffer.get()) {} + + /// \brief Read and validate the file header. + std::error_code readHeader() override; + + /// \brief Read sample profiles from the associated file. + std::error_code read() override; + + /// \brief Return true if \p Buffer is in the format supported by this class. + static bool hasFormat(const MemoryBuffer &Buffer); + +protected: + std::error_code readNameTable(); + std::error_code readOneFunctionProfile(const InlineCallStack &InlineStack, + bool Update, uint32_t Offset); + std::error_code readFunctionProfiles(); + std::error_code skipNextWord(); + template ErrorOr readNumber(); + ErrorOr readString(); + + /// \brief Read the section tag and check that it's the same as \p Expected. + std::error_code readSectionTag(uint32_t Expected); + + /// GCOV buffer containing the profile. + GCOVBuffer GcovBuffer; + + /// Function names in this profile. + std::vector Names; + + /// GCOV tags used to separate sections in the profile file. + static const uint32_t GCOVTagAFDOFileNames = 0xaa000000; + static const uint32_t GCOVTagAFDOFunction = 0xac000000; }; } // End namespace sampleprof diff --git a/include/llvm/ProfileData/SampleProfWriter.h b/include/llvm/ProfileData/SampleProfWriter.h index 302a82d32861..029dd2ebacb0 100644 --- a/include/llvm/ProfileData/SampleProfWriter.h +++ b/include/llvm/ProfileData/SampleProfWriter.h @@ -13,9 +13,8 @@ #ifndef LLVM_PROFILEDATA_SAMPLEPROFWRITER_H #define LLVM_PROFILEDATA_SAMPLEPROFWRITER_H +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" @@ -30,77 +29,102 @@ enum SampleProfileFormat { SPF_None = 0, SPF_Text, SPF_Binary, SPF_GCC }; /// \brief Sample-based profile writer. Base class. class SampleProfileWriter { public: - SampleProfileWriter(StringRef Filename, std::error_code &EC, - sys::fs::OpenFlags Flags) - : OS(Filename, EC, Flags) {} virtual ~SampleProfileWriter() {} - /// \brief Write sample profiles in \p S for function \p FName. + /// Write sample profiles in \p S for function \p FName. /// - /// \returns true if the file was updated successfully. False, otherwise. - virtual bool write(StringRef FName, const FunctionSamples &S) = 0; + /// \returns status code of the file update operation. + virtual std::error_code write(StringRef FName, const FunctionSamples &S) = 0; - /// \brief Write sample profiles in \p S for function \p F. - bool write(const Function &F, const FunctionSamples &S) { - return write(F.getName(), S); - } - - /// \brief Write all the sample profiles for all the functions in \p M. + /// Write all the sample profiles in the given map of samples. /// - /// \returns true if the file was updated successfully. False, otherwise. - bool write(const Module &M, StringMap &P) { - for (const auto &F : M) { - StringRef Name = F.getName(); - if (!write(Name, P[Name])) - return false; - } - return true; - } + /// \returns status code of the file update operation. + std::error_code write(const StringMap &ProfileMap) { + if (std::error_code EC = writeHeader(ProfileMap)) + return EC; - /// \brief Write all the sample profiles in the given map of samples. - /// - /// \returns true if the file was updated successfully. False, otherwise. - bool write(StringMap &ProfileMap) { - for (auto &I : ProfileMap) { + for (const auto &I : ProfileMap) { StringRef FName = I.first(); - FunctionSamples &Profile = I.second; - if (!write(FName, Profile)) - return false; + const FunctionSamples &Profile = I.second; + if (std::error_code EC = write(FName, Profile)) + return EC; } - return true; + return sampleprof_error::success; } - /// \brief Profile writer factory. Create a new writer based on the value of - /// \p Format. + raw_ostream &getOutputStream() { return *OutputStream; } + + /// Profile writer factory. + /// + /// Create a new file writer based on the value of \p Format. static ErrorOr> create(StringRef Filename, SampleProfileFormat Format); + /// Create a new stream writer based on the value of \p Format. + /// For testing. + static ErrorOr> + create(std::unique_ptr &OS, SampleProfileFormat Format); + protected: + SampleProfileWriter(std::unique_ptr &OS) + : OutputStream(std::move(OS)) {} + + /// \brief Write a file header for the profile file. + virtual std::error_code + writeHeader(const StringMap &ProfileMap) = 0; + /// \brief Output stream where to emit the profile to. - raw_fd_ostream OS; + std::unique_ptr OutputStream; }; /// \brief Sample-based profile writer (text format). class SampleProfileWriterText : public SampleProfileWriter { public: - SampleProfileWriterText(StringRef F, std::error_code &EC) - : SampleProfileWriter(F, EC, sys::fs::F_Text) {} + std::error_code write(StringRef FName, const FunctionSamples &S) override; - bool write(StringRef FName, const FunctionSamples &S) override; - bool write(const Module &M, StringMap &P) { - return SampleProfileWriter::write(M, P); +protected: + SampleProfileWriterText(std::unique_ptr &OS) + : SampleProfileWriter(OS), Indent(0) {} + + std::error_code + writeHeader(const StringMap &ProfileMap) override { + return sampleprof_error::success; } + +private: + /// Indent level to use when writing. + /// + /// This is used when printing inlined callees. + unsigned Indent; + + friend ErrorOr> + SampleProfileWriter::create(std::unique_ptr &OS, + SampleProfileFormat Format); }; /// \brief Sample-based profile writer (binary format). class SampleProfileWriterBinary : public SampleProfileWriter { public: - SampleProfileWriterBinary(StringRef F, std::error_code &EC); + std::error_code write(StringRef F, const FunctionSamples &S) override; - bool write(StringRef F, const FunctionSamples &S) override; - bool write(const Module &M, StringMap &P) { - return SampleProfileWriter::write(M, P); - } +protected: + SampleProfileWriterBinary(std::unique_ptr &OS) + : SampleProfileWriter(OS), NameTable() {} + + std::error_code + writeHeader(const StringMap &ProfileMap) override; + std::error_code writeNameIdx(StringRef FName); + std::error_code writeBody(StringRef FName, const FunctionSamples &S); + +private: + void addName(StringRef FName); + void addNames(const FunctionSamples &S); + + MapVector NameTable; + + friend ErrorOr> + SampleProfileWriter::create(std::unique_ptr &OS, + SampleProfileFormat Format); }; } // End namespace sampleprof diff --git a/include/llvm/Support/ARMTargetParser.def b/include/llvm/Support/ARMTargetParser.def new file mode 100644 index 000000000000..2f99b0717adf --- /dev/null +++ b/include/llvm/Support/ARMTargetParser.def @@ -0,0 +1,223 @@ +//===- ARMTargetParser.def - ARM target parsing defines ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides defines to build up the ARM target parser's logic. +// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +#ifndef ARM_FPU +#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION) +#endif +ARM_FPU("invalid", FK_INVALID, FV_NONE, NS_None, FR_None) +ARM_FPU("none", FK_NONE, FV_NONE, NS_None, FR_None) +ARM_FPU("vfp", FK_VFP, FV_VFPV2, NS_None, FR_None) +ARM_FPU("vfpv2", FK_VFPV2, FV_VFPV2, NS_None, FR_None) +ARM_FPU("vfpv3", FK_VFPV3, FV_VFPV3, NS_None, FR_None) +ARM_FPU("vfpv3-fp16", FK_VFPV3_FP16, FV_VFPV3_FP16, NS_None, FR_None) +ARM_FPU("vfpv3-d16", FK_VFPV3_D16, FV_VFPV3, NS_None, FR_D16) +ARM_FPU("vfpv3-d16-fp16", FK_VFPV3_D16_FP16, FV_VFPV3_FP16, NS_None, FR_D16) +ARM_FPU("vfpv3xd", FK_VFPV3XD, FV_VFPV3, NS_None, FR_SP_D16) +ARM_FPU("vfpv3xd-fp16", FK_VFPV3XD_FP16, FV_VFPV3_FP16, NS_None, FR_SP_D16) +ARM_FPU("vfpv4", FK_VFPV4, FV_VFPV4, NS_None, FR_None) +ARM_FPU("vfpv4-d16", FK_VFPV4_D16, FV_VFPV4, NS_None, FR_D16) +ARM_FPU("fpv4-sp-d16", FK_FPV4_SP_D16, FV_VFPV4, NS_None, FR_SP_D16) +ARM_FPU("fpv5-d16", FK_FPV5_D16, FV_VFPV5, NS_None, FR_D16) +ARM_FPU("fpv5-sp-d16", FK_FPV5_SP_D16, FV_VFPV5, NS_None, FR_SP_D16) +ARM_FPU("fp-armv8", FK_FP_ARMV8, FV_VFPV5, NS_None, FR_None) +ARM_FPU("neon", FK_NEON, FV_VFPV3, NS_Neon, FR_None) +ARM_FPU("neon-fp16", FK_NEON_FP16, FV_VFPV3_FP16, NS_Neon, FR_None) +ARM_FPU("neon-vfpv4", FK_NEON_VFPV4, FV_VFPV4, NS_Neon, FR_None) +ARM_FPU("neon-fp-armv8", FK_NEON_FP_ARMV8, FV_VFPV5, NS_Neon, FR_None) +ARM_FPU("crypto-neon-fp-armv8", FK_CRYPTO_NEON_FP_ARMV8, FV_VFPV5, NS_Crypto, + FR_None) +ARM_FPU("softvfp", FK_SOFTVFP, FV_NONE, NS_None, FR_None) +#undef ARM_FPU + +#ifndef ARM_ARCH +#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) +#endif +ARM_ARCH("invalid", AK_INVALID, nullptr, nullptr, + ARMBuildAttrs::CPUArch::Pre_v4, FK_NONE, AEK_NONE) +ARM_ARCH("armv2", AK_ARMV2, "2", "v2", ARMBuildAttrs::CPUArch::Pre_v4, + FK_NONE, AEK_NONE) +ARM_ARCH("armv2a", AK_ARMV2A, "2A", "v2a", ARMBuildAttrs::CPUArch::Pre_v4, + FK_NONE, AEK_NONE) +ARM_ARCH("armv3", AK_ARMV3, "3", "v3", ARMBuildAttrs::CPUArch::Pre_v4, + FK_NONE, AEK_NONE) +ARM_ARCH("armv3m", AK_ARMV3M, "3M", "v3m", ARMBuildAttrs::CPUArch::Pre_v4, + FK_NONE, AEK_NONE) +ARM_ARCH("armv4", AK_ARMV4, "4", "v4", ARMBuildAttrs::CPUArch::v4, + FK_NONE, AEK_NONE) +ARM_ARCH("armv4t", AK_ARMV4T, "4T", "v4t", ARMBuildAttrs::CPUArch::v4T, + FK_NONE, AEK_NONE) +ARM_ARCH("armv5t", AK_ARMV5T, "5T", "v5", ARMBuildAttrs::CPUArch::v5T, + FK_NONE, AEK_NONE) +ARM_ARCH("armv5te", AK_ARMV5TE, "5TE", "v5e", ARMBuildAttrs::CPUArch::v5TE, + FK_NONE, AEK_DSP) +ARM_ARCH("armv5tej", AK_ARMV5TEJ, "5TEJ", "v5e", ARMBuildAttrs::CPUArch::v5TEJ, + FK_NONE, AEK_DSP) +ARM_ARCH("armv6", AK_ARMV6, "6", "v6", ARMBuildAttrs::CPUArch::v6, + FK_VFPV2, AEK_DSP) +ARM_ARCH("armv6k", AK_ARMV6K, "6K", "v6k", ARMBuildAttrs::CPUArch::v6K, + FK_VFPV2, AEK_DSP) +ARM_ARCH("armv6t2", AK_ARMV6T2, "6T2", "v6t2", ARMBuildAttrs::CPUArch::v6T2, + FK_NONE, AEK_DSP) +ARM_ARCH("armv6kz", AK_ARMV6KZ, "6KZ", "v6kz", ARMBuildAttrs::CPUArch::v6KZ, + FK_VFPV2, (AEK_SEC | AEK_DSP)) +ARM_ARCH("armv6-m", AK_ARMV6M, "6-M", "v6m", ARMBuildAttrs::CPUArch::v6_M, + FK_NONE, AEK_NONE) +ARM_ARCH("armv7-a", AK_ARMV7A, "7-A", "v7", ARMBuildAttrs::CPUArch::v7, + FK_NEON, AEK_DSP) +ARM_ARCH("armv7-r", AK_ARMV7R, "7-R", "v7r", ARMBuildAttrs::CPUArch::v7, + FK_NONE, (AEK_HWDIV | AEK_DSP)) +ARM_ARCH("armv7-m", AK_ARMV7M, "7-M", "v7m", ARMBuildAttrs::CPUArch::v7, + FK_NONE, AEK_HWDIV) +ARM_ARCH("armv7e-m", AK_ARMV7EM, "7E-M", "v7em", ARMBuildAttrs::CPUArch::v7E_M, + FK_NONE, (AEK_HWDIV | AEK_DSP)) +ARM_ARCH("armv8-a", AK_ARMV8A, "8-A", "v8", ARMBuildAttrs::CPUArch::v8, + FK_CRYPTO_NEON_FP_ARMV8, (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM | + AEK_HWDIV | AEK_DSP | AEK_CRC)) +ARM_ARCH("armv8.1-a", AK_ARMV8_1A, "8.1-A", "v8.1a", ARMBuildAttrs::CPUArch::v8, + FK_CRYPTO_NEON_FP_ARMV8, (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM | + AEK_HWDIV | AEK_DSP | AEK_CRC)) +ARM_ARCH("armv8.2-a", AK_ARMV8_2A, "8.2-A", "v8.2a", ARMBuildAttrs::CPUArch::v8, + FK_CRYPTO_NEON_FP_ARMV8, (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM | + AEK_HWDIV | AEK_DSP | AEK_CRC)) +// Non-standard Arch names. +ARM_ARCH("iwmmxt", AK_IWMMXT, "iwmmxt", "", ARMBuildAttrs::CPUArch::v5TE, + FK_NONE, AEK_NONE) +ARM_ARCH("iwmmxt2", AK_IWMMXT2, "iwmmxt2", "", ARMBuildAttrs::CPUArch::v5TE, + FK_NONE, AEK_NONE) +ARM_ARCH("xscale", AK_XSCALE, "xscale", "", ARMBuildAttrs::CPUArch::v5TE, + FK_NONE, AEK_NONE) +ARM_ARCH("armv7s", AK_ARMV7S, "7-S", "v7s", ARMBuildAttrs::CPUArch::v7, + FK_NEON_VFPV4, AEK_DSP) +ARM_ARCH("armv7k", AK_ARMV7K, "7-K", "v7k", ARMBuildAttrs::CPUArch::v7, + FK_NONE, AEK_DSP) +#undef ARM_ARCH + +#ifndef ARM_ARCH_EXT_NAME +#define ARM_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) +#endif +// FIXME: This would be nicer were it tablegen +ARM_ARCH_EXT_NAME("invalid", AEK_INVALID, nullptr, nullptr) +ARM_ARCH_EXT_NAME("none", AEK_NONE, nullptr, nullptr) +ARM_ARCH_EXT_NAME("crc", AEK_CRC, "+crc", "-crc") +ARM_ARCH_EXT_NAME("crypto", AEK_CRYPTO, "+crypto","-crypto") +ARM_ARCH_EXT_NAME("fp", AEK_FP, nullptr, nullptr) +ARM_ARCH_EXT_NAME("idiv", (AEK_HWDIVARM | AEK_HWDIV), nullptr, nullptr) +ARM_ARCH_EXT_NAME("mp", AEK_MP, nullptr, nullptr) +ARM_ARCH_EXT_NAME("simd", AEK_SIMD, nullptr, nullptr) +ARM_ARCH_EXT_NAME("sec", AEK_SEC, nullptr, nullptr) +ARM_ARCH_EXT_NAME("virt", AEK_VIRT, nullptr, nullptr) +ARM_ARCH_EXT_NAME("fp16", AEK_FP16, "+fullfp16", "-fullfp16") +ARM_ARCH_EXT_NAME("os", AEK_OS, nullptr, nullptr) +ARM_ARCH_EXT_NAME("iwmmxt", AEK_IWMMXT, nullptr, nullptr) +ARM_ARCH_EXT_NAME("iwmmxt2", AEK_IWMMXT2, nullptr, nullptr) +ARM_ARCH_EXT_NAME("maverick", AEK_MAVERICK, nullptr, nullptr) +ARM_ARCH_EXT_NAME("xscale", AEK_XSCALE, nullptr, nullptr) +#undef ARM_ARCH_EXT_NAME + +#ifndef ARM_HW_DIV_NAME +#define ARM_HW_DIV_NAME(NAME, ID) +#endif +ARM_HW_DIV_NAME("invalid", AEK_INVALID) +ARM_HW_DIV_NAME("none", AEK_NONE) +ARM_HW_DIV_NAME("thumb", AEK_HWDIV) +ARM_HW_DIV_NAME("arm", AEK_HWDIVARM) +ARM_HW_DIV_NAME("arm,thumb", (AEK_HWDIVARM | AEK_HWDIV)) +#undef ARM_HW_DIV_NAME + +#ifndef ARM_CPU_NAME +#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) +#endif +ARM_CPU_NAME("arm2", AK_ARMV2, FK_NONE, true, AEK_NONE) +ARM_CPU_NAME("arm3", AK_ARMV2A, FK_NONE, true, AEK_NONE) +ARM_CPU_NAME("arm6", AK_ARMV3, FK_NONE, true, AEK_NONE) +ARM_CPU_NAME("arm7m", AK_ARMV3M, FK_NONE, true, AEK_NONE) +ARM_CPU_NAME("arm8", AK_ARMV4, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm810", AK_ARMV4, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("strongarm", AK_ARMV4, FK_NONE, true, AEK_NONE) +ARM_CPU_NAME("strongarm110", AK_ARMV4, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("strongarm1100", AK_ARMV4, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("strongarm1110", AK_ARMV4, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm7tdmi", AK_ARMV4T, FK_NONE, true, AEK_NONE) +ARM_CPU_NAME("arm7tdmi-s", AK_ARMV4T, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm710t", AK_ARMV4T, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm720t", AK_ARMV4T, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm9", AK_ARMV4T, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm9tdmi", AK_ARMV4T, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm920", AK_ARMV4T, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm920t", AK_ARMV4T, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm922t", AK_ARMV4T, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm9312", AK_ARMV4T, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm940t", AK_ARMV4T, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("ep9312", AK_ARMV4T, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm10tdmi", AK_ARMV5T, FK_NONE, true, AEK_NONE) +ARM_CPU_NAME("arm1020t", AK_ARMV5T, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm9e", AK_ARMV5TE, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm946e-s", AK_ARMV5TE, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm966e-s", AK_ARMV5TE, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm968e-s", AK_ARMV5TE, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm10e", AK_ARMV5TE, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm1020e", AK_ARMV5TE, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm1022e", AK_ARMV5TE, FK_NONE, true, AEK_NONE) +ARM_CPU_NAME("arm926ej-s", AK_ARMV5TEJ, FK_NONE, true, AEK_NONE) +ARM_CPU_NAME("arm1136j-s", AK_ARMV6, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm1136jf-s", AK_ARMV6, FK_VFPV2, true, AEK_NONE) +ARM_CPU_NAME("arm1136jz-s", AK_ARMV6, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm1176j-s", AK_ARMV6K, FK_NONE, true, AEK_NONE) +ARM_CPU_NAME("arm1176jz-s", AK_ARMV6KZ, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("mpcore", AK_ARMV6K, FK_VFPV2, false, AEK_NONE) +ARM_CPU_NAME("mpcorenovfp", AK_ARMV6K, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("arm1176jzf-s", AK_ARMV6KZ, FK_VFPV2, true, AEK_NONE) +ARM_CPU_NAME("arm1156t2-s", AK_ARMV6T2, FK_NONE, true, AEK_NONE) +ARM_CPU_NAME("arm1156t2f-s", AK_ARMV6T2, FK_VFPV2, false, AEK_NONE) +ARM_CPU_NAME("cortex-m0", AK_ARMV6M, FK_NONE, true, AEK_NONE) +ARM_CPU_NAME("cortex-m0plus", AK_ARMV6M, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("cortex-m1", AK_ARMV6M, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("sc000", AK_ARMV6M, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("cortex-a5", AK_ARMV7A, FK_NEON_VFPV4, false, (AEK_SEC | AEK_MP)) +ARM_CPU_NAME("cortex-a7", AK_ARMV7A, FK_NEON_VFPV4, false, + (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM | AEK_HWDIV)) +ARM_CPU_NAME("cortex-a8", AK_ARMV7A, FK_NEON, true, AEK_SEC) +ARM_CPU_NAME("cortex-a9", AK_ARMV7A, FK_NEON_FP16, false, (AEK_SEC | AEK_MP)) +ARM_CPU_NAME("cortex-a12", AK_ARMV7A, FK_NEON_VFPV4, false, + (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM | AEK_HWDIV)) +ARM_CPU_NAME("cortex-a15", AK_ARMV7A, FK_NEON_VFPV4, false, + (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM | AEK_HWDIV)) +ARM_CPU_NAME("cortex-a17", AK_ARMV7A, FK_NEON_VFPV4, false, + (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM | AEK_HWDIV)) +ARM_CPU_NAME("krait", AK_ARMV7A, FK_NEON_VFPV4, false, + (AEK_HWDIVARM | AEK_HWDIV)) +ARM_CPU_NAME("cortex-r4", AK_ARMV7R, FK_NONE, true, AEK_NONE) +ARM_CPU_NAME("cortex-r4f", AK_ARMV7R, FK_VFPV3_D16, false, AEK_NONE) +ARM_CPU_NAME("cortex-r5", AK_ARMV7R, FK_VFPV3_D16, false, + (AEK_MP | AEK_HWDIVARM)) +ARM_CPU_NAME("cortex-r7", AK_ARMV7R, FK_VFPV3_D16_FP16, false, + (AEK_MP | AEK_HWDIVARM)) +ARM_CPU_NAME("sc300", AK_ARMV7M, FK_NONE, false, AEK_NONE) +ARM_CPU_NAME("cortex-m3", AK_ARMV7M, FK_NONE, true, AEK_NONE) +ARM_CPU_NAME("cortex-m4", AK_ARMV7EM, FK_FPV4_SP_D16, true, AEK_NONE) +ARM_CPU_NAME("cortex-m7", AK_ARMV7EM, FK_FPV5_D16, false, AEK_NONE) +ARM_CPU_NAME("cortex-a35", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC) +ARM_CPU_NAME("cortex-a53", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true, AEK_CRC) +ARM_CPU_NAME("cortex-a57", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC) +ARM_CPU_NAME("cortex-a72", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC) +ARM_CPU_NAME("cyclone", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC) +// Non-standard Arch names. +ARM_CPU_NAME("iwmmxt", AK_IWMMXT, FK_NONE, true, AEK_NONE) +ARM_CPU_NAME("xscale", AK_XSCALE, FK_NONE, true, AEK_NONE) +ARM_CPU_NAME("swift", AK_ARMV7S, FK_NEON_VFPV4, true, + (AEK_HWDIVARM | AEK_HWDIV)) +// Invalid CPU +ARM_CPU_NAME("invalid", AK_INVALID, FK_INVALID, true, AEK_INVALID) +#undef ARM_CPU_NAME diff --git a/include/llvm/Support/AlignOf.h b/include/llvm/Support/AlignOf.h index 07da02d063c7..5268c8d16986 100644 --- a/include/llvm/Support/AlignOf.h +++ b/include/llvm/Support/AlignOf.h @@ -17,9 +17,15 @@ #include "llvm/Support/Compiler.h" #include +#include namespace llvm { -template + +namespace detail { + +// For everything other than an abstract class we can calulate alignment by +// building a class with a single character and a member of the given type. +template ::value> struct AlignmentCalcImpl { char x; #if defined(_MSC_VER) @@ -35,6 +41,25 @@ private: AlignmentCalcImpl() {} // Never instantiate. }; +// Abstract base class helper, this will have the minimal alignment and size +// for any abstract class. We don't even define its destructor because this +// type should never be used in a way that requires it. +struct AlignmentCalcImplBase { + virtual ~AlignmentCalcImplBase() = 0; +}; + +// When we have an abstract class type, specialize the alignment computation +// engine to create another abstract class that derives from both an empty +// abstract base class and the provided type. This has the same effect as the +// above except that it handles the fact that we can't actually create a member +// of type T. +template +struct AlignmentCalcImpl : AlignmentCalcImplBase, T { + virtual ~AlignmentCalcImpl() = 0; +}; + +} // End detail namespace. + /// AlignOf - A templated class that contains an enum value representing /// the alignment of the template argument. For example, /// AlignOf::Alignment represents the alignment of type "int". The @@ -50,11 +75,13 @@ struct AlignOf { // llvm::AlignOf::' [-Wenum-compare] // by using constexpr instead of enum. // (except on MSVC, since it doesn't support constexpr yet). - static constexpr unsigned Alignment = - static_cast(sizeof(AlignmentCalcImpl) - sizeof(T)); + static constexpr unsigned Alignment = static_cast( + sizeof(detail::AlignmentCalcImpl) - sizeof(T)); #else - enum { Alignment = - static_cast(sizeof(AlignmentCalcImpl) - sizeof(T)) }; + enum { + Alignment = static_cast(sizeof(detail::AlignmentCalcImpl) - + sizeof(T)) + }; #endif enum { Alignment_GreaterEqual_2Bytes = Alignment >= 2 ? 1 : 0 }; enum { Alignment_GreaterEqual_4Bytes = Alignment >= 4 ? 1 : 0 }; diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h index f9b5cf22f97d..c608736fa956 100644 --- a/include/llvm/Support/Allocator.h +++ b/include/llvm/Support/Allocator.h @@ -222,6 +222,8 @@ public: // Without this, MemorySanitizer messages for values originated from here // will point to the allocation of the entire slab. __msan_allocated_memory(AlignedPtr, Size); + // Similarly, tell ASan about this space. + __asan_unpoison_memory_region(AlignedPtr, Size); return AlignedPtr; } @@ -229,12 +231,16 @@ public: size_t PaddedSize = Size + Alignment - 1; if (PaddedSize > SizeThreshold) { void *NewSlab = Allocator.Allocate(PaddedSize, 0); + // We own the new slab and don't want anyone reading anyting other than + // pieces returned from this method. So poison the whole slab. + __asan_poison_memory_region(NewSlab, PaddedSize); CustomSizedSlabs.push_back(std::make_pair(NewSlab, PaddedSize)); uintptr_t AlignedAddr = alignAddr(NewSlab, Alignment); assert(AlignedAddr + Size <= (uintptr_t)NewSlab + PaddedSize); char *AlignedPtr = (char*)AlignedAddr; __msan_allocated_memory(AlignedPtr, Size); + __asan_unpoison_memory_region(AlignedPtr, Size); return AlignedPtr; } @@ -246,13 +252,16 @@ public: char *AlignedPtr = (char*)AlignedAddr; CurPtr = AlignedPtr + Size; __msan_allocated_memory(AlignedPtr, Size); + __asan_unpoison_memory_region(AlignedPtr, Size); return AlignedPtr; } // Pull in base class overloads. using AllocatorBase::Allocate; - void Deallocate(const void * /*Ptr*/, size_t /*Size*/) {} + void Deallocate(const void *Ptr, size_t Size) { + __asan_poison_memory_region(Ptr, Size); + } // Pull in base class overloads. using AllocatorBase::Deallocate; @@ -310,6 +319,10 @@ private: size_t AllocatedSlabSize = computeSlabSize(Slabs.size()); void *NewSlab = Allocator.Allocate(AllocatedSlabSize, 0); + // We own the new slab and don't want anyone reading anything other than + // pieces returned from this method. So poison the whole slab. + __asan_poison_memory_region(NewSlab, AllocatedSlabSize); + Slabs.push_back(NewSlab); CurPtr = (char *)(NewSlab); End = ((char *)NewSlab) + AllocatedSlabSize; diff --git a/include/llvm/Support/BlockFrequency.h b/include/llvm/Support/BlockFrequency.h index 4304a253b287..1b45cc52973f 100644 --- a/include/llvm/Support/BlockFrequency.h +++ b/include/llvm/Support/BlockFrequency.h @@ -14,12 +14,12 @@ #ifndef LLVM_SUPPORT_BLOCKFREQUENCY_H #define LLVM_SUPPORT_BLOCKFREQUENCY_H +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/DataTypes.h" namespace llvm { class raw_ostream; -class BranchProbability; // This class represents Block Frequency as a 64-bit value. class BlockFrequency { @@ -37,34 +37,38 @@ public: /// \brief Multiplies with a branch probability. The computation will never /// overflow. - BlockFrequency &operator*=(const BranchProbability &Prob); - const BlockFrequency operator*(const BranchProbability &Prob) const; + BlockFrequency &operator*=(BranchProbability Prob); + BlockFrequency operator*(BranchProbability Prob) const; /// \brief Divide by a non-zero branch probability using saturating /// arithmetic. - BlockFrequency &operator/=(const BranchProbability &Prob); - BlockFrequency operator/(const BranchProbability &Prob) const; + BlockFrequency &operator/=(BranchProbability Prob); + BlockFrequency operator/(BranchProbability Prob) const; /// \brief Adds another block frequency using saturating arithmetic. - BlockFrequency &operator+=(const BlockFrequency &Freq); - const BlockFrequency operator+(const BlockFrequency &Freq) const; + BlockFrequency &operator+=(BlockFrequency Freq); + BlockFrequency operator+(BlockFrequency Freq) const; + + /// \brief Subtracts another block frequency using saturating arithmetic. + BlockFrequency &operator-=(BlockFrequency Freq); + BlockFrequency operator-(BlockFrequency Freq) const; /// \brief Shift block frequency to the right by count digits saturating to 1. BlockFrequency &operator>>=(const unsigned count); - bool operator<(const BlockFrequency &RHS) const { + bool operator<(BlockFrequency RHS) const { return Frequency < RHS.Frequency; } - bool operator<=(const BlockFrequency &RHS) const { + bool operator<=(BlockFrequency RHS) const { return Frequency <= RHS.Frequency; } - bool operator>(const BlockFrequency &RHS) const { + bool operator>(BlockFrequency RHS) const { return Frequency > RHS.Frequency; } - bool operator>=(const BlockFrequency &RHS) const { + bool operator>=(BlockFrequency RHS) const { return Frequency >= RHS.Frequency; } }; diff --git a/include/llvm/Support/BranchProbability.h b/include/llvm/Support/BranchProbability.h index a6429dd22a3b..26bc888d1cab 100644 --- a/include/llvm/Support/BranchProbability.h +++ b/include/llvm/Support/BranchProbability.h @@ -15,36 +15,59 @@ #define LLVM_SUPPORT_BRANCHPROBABILITY_H #include "llvm/Support/DataTypes.h" +#include #include +#include +#include namespace llvm { class raw_ostream; -// This class represents Branch Probability as a non-negative fraction. +// This class represents Branch Probability as a non-negative fraction that is +// no greater than 1. It uses a fixed-point-like implementation, in which the +// denominator is always a constant value (here we use 1<<31 for maximum +// precision). class BranchProbability { // Numerator uint32_t N; - // Denominator - uint32_t D; + // Denominator, which is a constant value. + static const uint32_t D = 1u << 31; + static const uint32_t UnknownN = UINT32_MAX; + + // Construct a BranchProbability with only numerator assuming the denominator + // is 1<<31. For internal use only. + explicit BranchProbability(uint32_t n) : N(n) {} public: - BranchProbability(uint32_t n, uint32_t d) : N(n), D(d) { - assert(d > 0 && "Denominator cannot be 0!"); - assert(n <= d && "Probability cannot be bigger than 1!"); - } + BranchProbability() : N(UnknownN) {} + BranchProbability(uint32_t Numerator, uint32_t Denominator); - static BranchProbability getZero() { return BranchProbability(0, 1); } - static BranchProbability getOne() { return BranchProbability(1, 1); } + bool isZero() const { return N == 0; } + bool isUnknown() const { return N == UnknownN; } + + static BranchProbability getZero() { return BranchProbability(0); } + static BranchProbability getOne() { return BranchProbability(D); } + static BranchProbability getUnknown() { return BranchProbability(UnknownN); } + // Create a BranchProbability object with the given numerator and 1<<31 + // as denominator. + static BranchProbability getRaw(uint32_t N) { return BranchProbability(N); } + // Create a BranchProbability object from 64-bit integers. + static BranchProbability getBranchProbability(uint64_t Numerator, + uint64_t Denominator); + + // Normalize given probabilties so that the sum of them becomes approximate + // one. + template + static void normalizeProbabilities(ProbabilityIter Begin, + ProbabilityIter End); uint32_t getNumerator() const { return N; } - uint32_t getDenominator() const { return D; } + static uint32_t getDenominator() { return D; } // Return (1 - Probability). - BranchProbability getCompl() const { - return BranchProbability(D - N, D); - } + BranchProbability getCompl() const { return BranchProbability(D - N); } raw_ostream &print(raw_ostream &OS) const; @@ -66,24 +89,131 @@ public: /// \return \c Num divided by \c this. uint64_t scaleByInverse(uint64_t Num) const; - bool operator==(BranchProbability RHS) const { - return (uint64_t)N * RHS.D == (uint64_t)D * RHS.N; + BranchProbability &operator+=(BranchProbability RHS) { + assert(N != UnknownN && RHS.N != UnknownN && + "Unknown probability cannot participate in arithmetics."); + // Saturate the result in case of overflow. + N = (uint64_t(N) + RHS.N > D) ? D : N + RHS.N; + return *this; } - bool operator!=(BranchProbability RHS) const { - return !(*this == RHS); + + BranchProbability &operator-=(BranchProbability RHS) { + assert(N != UnknownN && RHS.N != UnknownN && + "Unknown probability cannot participate in arithmetics."); + // Saturate the result in case of underflow. + N = N < RHS.N ? 0 : N - RHS.N; + return *this; } + + BranchProbability &operator*=(BranchProbability RHS) { + assert(N != UnknownN && RHS.N != UnknownN && + "Unknown probability cannot participate in arithmetics."); + N = (static_cast(N) * RHS.N + D / 2) / D; + return *this; + } + + BranchProbability &operator/=(uint32_t RHS) { + assert(N != UnknownN && + "Unknown probability cannot participate in arithmetics."); + assert(RHS > 0 && "The divider cannot be zero."); + N /= RHS; + return *this; + } + + BranchProbability operator+(BranchProbability RHS) const { + BranchProbability Prob(*this); + return Prob += RHS; + } + + BranchProbability operator-(BranchProbability RHS) const { + BranchProbability Prob(*this); + return Prob -= RHS; + } + + BranchProbability operator*(BranchProbability RHS) const { + BranchProbability Prob(*this); + return Prob *= RHS; + } + + BranchProbability operator/(uint32_t RHS) const { + BranchProbability Prob(*this); + return Prob /= RHS; + } + + bool operator==(BranchProbability RHS) const { return N == RHS.N; } + bool operator!=(BranchProbability RHS) const { return !(*this == RHS); } + bool operator<(BranchProbability RHS) const { - return (uint64_t)N * RHS.D < (uint64_t)D * RHS.N; + assert(N != UnknownN && RHS.N != UnknownN && + "Unknown probability cannot participate in comparisons."); + return N < RHS.N; + } + + bool operator>(BranchProbability RHS) const { + assert(N != UnknownN && RHS.N != UnknownN && + "Unknown probability cannot participate in comparisons."); + return RHS < *this; + } + + bool operator<=(BranchProbability RHS) const { + assert(N != UnknownN && RHS.N != UnknownN && + "Unknown probability cannot participate in comparisons."); + return !(RHS < *this); + } + + bool operator>=(BranchProbability RHS) const { + assert(N != UnknownN && RHS.N != UnknownN && + "Unknown probability cannot participate in comparisons."); + return !(*this < RHS); } - bool operator>(BranchProbability RHS) const { return RHS < *this; } - bool operator<=(BranchProbability RHS) const { return !(RHS < *this); } - bool operator>=(BranchProbability RHS) const { return !(*this < RHS); } }; -inline raw_ostream &operator<<(raw_ostream &OS, const BranchProbability &Prob) { +inline raw_ostream &operator<<(raw_ostream &OS, BranchProbability Prob) { return Prob.print(OS); } +template +void BranchProbability::normalizeProbabilities(ProbabilityIter Begin, + ProbabilityIter End) { + if (Begin == End) + return; + + unsigned UnknownProbCount = 0; + uint64_t Sum = std::accumulate(Begin, End, uint64_t(0), + [&](uint64_t S, const BranchProbability &BP) { + if (!BP.isUnknown()) + return S + BP.N; + UnknownProbCount++; + return S; + }); + + if (UnknownProbCount > 0) { + BranchProbability ProbForUnknown = BranchProbability::getZero(); + // If the sum of all known probabilities is less than one, evenly distribute + // the complement of sum to unknown probabilities. Otherwise, set unknown + // probabilities to zeros and continue to normalize known probabilities. + if (Sum < BranchProbability::getDenominator()) + ProbForUnknown = BranchProbability::getRaw( + (BranchProbability::getDenominator() - Sum) / UnknownProbCount); + + std::replace_if(Begin, End, + [](const BranchProbability &BP) { return BP.isUnknown(); }, + ProbForUnknown); + + if (Sum <= BranchProbability::getDenominator()) + return; + } + + if (Sum == 0) { + BranchProbability BP(1, std::distance(Begin, End)); + std::fill(Begin, End, BP); + return; + } + + for (auto I = Begin; I != End; ++I) + I->N = (I->N * uint64_t(D) + Sum / 2) / Sum; +} + } #endif diff --git a/include/llvm/Support/CBindingWrapping.h b/include/llvm/Support/CBindingWrapping.h index 786ba183b3b0..d4633aa7d3c6 100644 --- a/include/llvm/Support/CBindingWrapping.h +++ b/include/llvm/Support/CBindingWrapping.h @@ -15,6 +15,7 @@ #define LLVM_SUPPORT_CBINDINGWRAPPING_H #include "llvm/Support/Casting.h" +#include "llvm-c/Types.h" #define DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref) \ inline ty *unwrap(ref P) { \ diff --git a/include/llvm/Support/COFF.h b/include/llvm/Support/COFF.h index 3c5ee06969d0..0162175efe3e 100644 --- a/include/llvm/Support/COFF.h +++ b/include/llvm/Support/COFF.h @@ -88,6 +88,7 @@ namespace COFF { IMAGE_FILE_MACHINE_AMD64 = 0x8664, IMAGE_FILE_MACHINE_ARM = 0x1C0, IMAGE_FILE_MACHINE_ARMNT = 0x1C4, + IMAGE_FILE_MACHINE_ARM64 = 0xAA64, IMAGE_FILE_MACHINE_EBC = 0xEBC, IMAGE_FILE_MACHINE_I386 = 0x14C, IMAGE_FILE_MACHINE_IA64 = 0x200, @@ -247,6 +248,7 @@ namespace COFF { enum SectionCharacteristics : uint32_t { SC_Invalid = 0xffffffff, + IMAGE_SCN_TYPE_NOLOAD = 0x00000002, IMAGE_SCN_TYPE_NO_PAD = 0x00000008, IMAGE_SCN_CNT_CODE = 0x00000020, IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040, diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h index 379d06a65741..943d2df37708 100644 --- a/include/llvm/Support/CommandLine.h +++ b/include/llvm/Support/CommandLine.h @@ -33,7 +33,6 @@ namespace llvm { -class BumpPtrStringSaver; class StringSaver; /// cl Namespace - This namespace contains all of the command line option @@ -206,9 +205,9 @@ class Option { unsigned AdditionalVals; // Greater than 0 for multi-valued option. public: - const char *ArgStr; // The argument string itself (ex: "help", "o") - const char *HelpStr; // The descriptive text message for -help - const char *ValueStr; // String describing what the value of this option is + StringRef ArgStr; // The argument string itself (ex: "help", "o") + StringRef HelpStr; // The descriptive text message for -help + StringRef ValueStr; // String describing what the value of this option is OptionCategory *Category; // The Category this option belongs to bool FullyInitialized; // Has addArguemnt been called? @@ -229,14 +228,14 @@ public: inline unsigned getNumAdditionalVals() const { return AdditionalVals; } // hasArgStr - Return true if the argstr != "" - bool hasArgStr() const { return ArgStr[0] != 0; } + bool hasArgStr() const { return !ArgStr.empty(); } //-------------------------------------------------------------------------=== // Accessor functions set by OptionModifiers // - void setArgStr(const char *S); - void setDescription(const char *S) { HelpStr = S; } - void setValueStr(const char *S) { ValueStr = S; } + void setArgStr(StringRef S); + void setDescription(StringRef S) { HelpStr = S; } + void setValueStr(StringRef S) { ValueStr = S; } void setNumOccurrencesFlag(enum NumOccurrencesFlag Val) { Occurrences = Val; } void setValueExpectedFlag(enum ValueExpected Val) { Value = Val; } void setHiddenFlag(enum OptionHidden Val) { HiddenFlag = Val; } @@ -276,7 +275,7 @@ public: virtual void printOptionValue(size_t GlobalWidth, bool Force) const = 0; - virtual void getExtraOptionNames(SmallVectorImpl &) {} + virtual void getExtraOptionNames(SmallVectorImpl &) {} // addOccurrence - Wrapper around handleOccurrence that enforces Flags. // @@ -606,7 +605,7 @@ public: void initialize() {} - void getExtraOptionNames(SmallVectorImpl &OptionNames) { + void getExtraOptionNames(SmallVectorImpl &OptionNames) { // If there has been no argstr specified, that means that we need to add an // argument for every possible option. This ensures that our options are // vectored to us. @@ -715,14 +714,14 @@ public: // class basic_parser_impl { // non-template implementation of basic_parser public: - basic_parser_impl(Option &O) {} + basic_parser_impl(Option &) {} enum ValueExpected getValueExpectedFlagDefault() const { return ValueRequired; } - void getExtraOptionNames(SmallVectorImpl &) {} + void getExtraOptionNames(SmallVectorImpl &) {} void initialize() {} @@ -1206,8 +1205,7 @@ class opt : public Option, enum ValueExpected getValueExpectedFlagDefault() const override { return Parser.getValueExpectedFlagDefault(); } - void - getExtraOptionNames(SmallVectorImpl &OptionNames) override { + void getExtraOptionNames(SmallVectorImpl &OptionNames) override { return Parser.getExtraOptionNames(OptionNames); } @@ -1368,8 +1366,7 @@ class list : public Option, public list_storage { enum ValueExpected getValueExpectedFlagDefault() const override { return Parser.getValueExpectedFlagDefault(); } - void - getExtraOptionNames(SmallVectorImpl &OptionNames) override { + void getExtraOptionNames(SmallVectorImpl &OptionNames) override { return Parser.getExtraOptionNames(OptionNames); } @@ -1508,8 +1505,7 @@ class bits : public Option, public bits_storage { enum ValueExpected getValueExpectedFlagDefault() const override { return Parser.getValueExpectedFlagDefault(); } - void - getExtraOptionNames(SmallVectorImpl &OptionNames) override { + void getExtraOptionNames(SmallVectorImpl &OptionNames) override { return Parser.getExtraOptionNames(OptionNames); } diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h index 141639839cc2..b3416bbfffb6 100644 --- a/include/llvm/Support/Compiler.h +++ b/include/llvm/Support/Compiler.h @@ -69,7 +69,7 @@ #if !defined(_MSC_VER) || defined(__clang__) || LLVM_MSC_PREREQ(1900) #define LLVM_NOEXCEPT noexcept #else -#define LLVM_NOEXCEPT +#define LLVM_NOEXCEPT throw() #endif /// \brief Does the compiler support ref-qualifiers for *this? @@ -189,7 +189,7 @@ /// 3.4 supported this but is buggy in various cases and produces unimplemented /// errors, just use it in GCC 4.0 and later. #if __has_attribute(always_inline) || LLVM_GNUC_PREREQ(4, 0, 0) -#define LLVM_ATTRIBUTE_ALWAYS_INLINE inline __attribute__((always_inline)) +#define LLVM_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline)) #elif defined(_MSC_VER) #define LLVM_ATTRIBUTE_ALWAYS_INLINE __forceinline #else @@ -293,6 +293,34 @@ # define LLVM_ALIGNAS(x) alignas(x) #endif +/// \macro LLVM_PACKED +/// \brief Used to specify a packed structure. +/// LLVM_PACKED( +/// struct A { +/// int i; +/// int j; +/// int k; +/// long long l; +/// }); +/// +/// LLVM_PACKED_START +/// struct B { +/// int i; +/// int j; +/// int k; +/// long long l; +/// }; +/// LLVM_PACKED_END +#ifdef _MSC_VER +# define LLVM_PACKED(d) __pragma(pack(push, 1)) d __pragma(pack(pop)) +# define LLVM_PACKED_START __pragma(pack(push, 1)) +# define LLVM_PACKED_END __pragma(pack(pop)) +#else +# define LLVM_PACKED(d) d __attribute__((packed)) +# define LLVM_PACKED_START _Pragma("pack(push, 1)") +# define LLVM_PACKED_END _Pragma("pack(pop)") +#endif + /// \macro LLVM_PTR_SIZE /// \brief A constant integer equivalent to the value of sizeof(void*). /// Generally used in combination with LLVM_ALIGNAS or when doing computation in @@ -333,8 +361,50 @@ /// \brief Whether LLVM itself is built with AddressSanitizer instrumentation. #if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) # define LLVM_ADDRESS_SANITIZER_BUILD 1 +# include #else # define LLVM_ADDRESS_SANITIZER_BUILD 0 +# define __asan_poison_memory_region(p, size) +# define __asan_unpoison_memory_region(p, size) +#endif + +/// \macro LLVM_THREAD_SANITIZER_BUILD +/// \brief Whether LLVM itself is built with ThreadSanitizer instrumentation. +#if __has_feature(thread_sanitizer) || defined(__SANITIZE_THREAD__) +# define LLVM_THREAD_SANITIZER_BUILD 1 +#else +# define LLVM_THREAD_SANITIZER_BUILD 0 +#endif + +#if LLVM_THREAD_SANITIZER_BUILD +// Thread Sanitizer is a tool that finds races in code. +// See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations . +// tsan detects these exact functions by name. +extern "C" { +void AnnotateHappensAfter(const char *file, int line, const volatile void *cv); +void AnnotateHappensBefore(const char *file, int line, const volatile void *cv); +void AnnotateIgnoreWritesBegin(const char *file, int line); +void AnnotateIgnoreWritesEnd(const char *file, int line); +} + +// This marker is used to define a happens-before arc. The race detector will +// infer an arc from the begin to the end when they share the same pointer +// argument. +# define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv) + +// This marker defines the destination of a happens-before arc. +# define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv) + +// Ignore any races on writes between here and the next TsanIgnoreWritesEnd. +# define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__) + +// Resume checking for racy writes. +# define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__) +#else +# define TsanHappensBefore(cv) +# define TsanHappensAfter(cv) +# define TsanIgnoreWritesBegin() +# define TsanIgnoreWritesEnd() #endif /// \brief Mark debug helper function definitions like dump() that should not be diff --git a/include/llvm/Support/CrashRecoveryContext.h b/include/llvm/Support/CrashRecoveryContext.h index c08c3c1f0d21..1a1c74368761 100644 --- a/include/llvm/Support/CrashRecoveryContext.h +++ b/include/llvm/Support/CrashRecoveryContext.h @@ -39,8 +39,6 @@ class CrashRecoveryContextCleanup; /// /// ... no crash was detected ... /// } -/// -/// Crash recovery contexts may not be nested. class CrashRecoveryContext { void *Impl; CrashRecoveryContextCleanup *head; @@ -109,10 +107,11 @@ class CrashRecoveryContextCleanup { protected: CrashRecoveryContext *context; CrashRecoveryContextCleanup(CrashRecoveryContext *context) - : context(context), cleanupFired(false) {} + : context(context), cleanupFired(false) {} + public: bool cleanupFired; - + virtual ~CrashRecoveryContextCleanup(); virtual void recoverResources() = 0; @@ -129,15 +128,16 @@ template class CrashRecoveryContextCleanupBase : public CrashRecoveryContextCleanup { protected: T *resource; - CrashRecoveryContextCleanupBase(CrashRecoveryContext *context, T* resource) - : CrashRecoveryContextCleanup(context), resource(resource) {} + CrashRecoveryContextCleanupBase(CrashRecoveryContext *context, T *resource) + : CrashRecoveryContextCleanup(context), resource(resource) {} + public: static DERIVED *create(T *x) { if (x) { if (CrashRecoveryContext *context = CrashRecoveryContext::GetCurrent()) return new DERIVED(context, x); } - return 0; + return nullptr; } }; @@ -146,9 +146,9 @@ class CrashRecoveryContextDestructorCleanup : public CrashRecoveryContextCleanupBase, T> { public: CrashRecoveryContextDestructorCleanup(CrashRecoveryContext *context, - T *resource) - : CrashRecoveryContextCleanupBase< - CrashRecoveryContextDestructorCleanup, T>(context, resource) {} + T *resource) + : CrashRecoveryContextCleanupBase< + CrashRecoveryContextDestructorCleanup, T>(context, resource) {} virtual void recoverResources() { this->resource->~T(); @@ -171,7 +171,7 @@ class CrashRecoveryContextReleaseRefCleanup : public CrashRecoveryContextCleanupBase, T> { public: - CrashRecoveryContextReleaseRefCleanup(CrashRecoveryContext *context, + CrashRecoveryContextReleaseRefCleanup(CrashRecoveryContext *context, T *resource) : CrashRecoveryContextCleanupBase, T>(context, resource) {} @@ -182,6 +182,7 @@ public: template > class CrashRecoveryContextCleanupRegistrar { CrashRecoveryContextCleanup *cleanup; + public: CrashRecoveryContextCleanupRegistrar(T *x) : cleanup(Cleanup::create(x)) { @@ -189,16 +190,14 @@ public: cleanup->getContext()->registerCleanup(cleanup); } - ~CrashRecoveryContextCleanupRegistrar() { - unregister(); - } - + ~CrashRecoveryContextCleanupRegistrar() { unregister(); } + void unregister() { if (cleanup && !cleanup->cleanupFired) cleanup->getContext()->unregisterCleanup(cleanup); - cleanup = 0; + cleanup = nullptr; } }; -} +} // end namespace llvm -#endif +#endif // LLVM_SUPPORT_CRASHRECOVERYCONTEXT_H diff --git a/include/llvm/Support/DOTGraphTraits.h b/include/llvm/Support/DOTGraphTraits.h index 95e37c01d7d5..4381b5bf1633 100644 --- a/include/llvm/Support/DOTGraphTraits.h +++ b/include/llvm/Support/DOTGraphTraits.h @@ -72,11 +72,12 @@ public: return ""; } - /// hasNodeAddressLabel - If this method returns true, the address of the node - /// is added to the label of the node. - template - static bool hasNodeAddressLabel(const void *, const GraphType &) { - return false; + // getNodeIdentifierLabel - Returns a string representing the + // address or other unique identifier of the node. (Only used if + // non-empty.) + template + static std::string getNodeIdentifierLabel(const void *, const GraphType &) { + return ""; } template diff --git a/include/llvm/Support/Debug.h b/include/llvm/Support/Debug.h index fff4f986a6c0..6e213477d710 100644 --- a/include/llvm/Support/Debug.h +++ b/include/llvm/Support/Debug.h @@ -13,7 +13,7 @@ // // In particular, just wrap your code with the DEBUG() macro, and it will be // enabled automatically if you specify '-debug' on the command-line. -// Alternatively, you can also define the DEBUG_TYPE macro to "foo" specify +// DEBUG() requires the DEBUG_TYPE macro to be defined. Set it to "foo" specify // that your debug code belongs to class "foo". Be careful that you only do // this after including Debug.h and not around any #include of headers. Headers // should define and undef the macro acround the code that needs to use the diff --git a/include/llvm/Support/Dwarf.def b/include/llvm/Support/Dwarf.def index 4b923b897e6f..b15070b3e9b0 100644 --- a/include/llvm/Support/Dwarf.def +++ b/include/llvm/Support/Dwarf.def @@ -99,10 +99,6 @@ HANDLE_DW_TAG(0x0041, type_unit) HANDLE_DW_TAG(0x0042, rvalue_reference_type) HANDLE_DW_TAG(0x0043, template_alias) -// Mock tags we use as discriminators. -HANDLE_DW_TAG(0x0100, auto_variable) // Tag for local (auto) variables. -HANDLE_DW_TAG(0x0101, arg_variable) // Tag for argument variables. - // New in DWARF v5. HANDLE_DW_TAG(0x0044, coarray_type) HANDLE_DW_TAG(0x0045, generic_subrange) @@ -117,6 +113,11 @@ HANDLE_DW_TAG(0x4106, GNU_template_template_param) HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack) HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack) HANDLE_DW_TAG(0x4200, APPLE_property) +HANDLE_DW_TAG(0xb000, BORLAND_property) +HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string) +HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array) +HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set) +HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant) HANDLE_DW_OP(0x03, addr) HANDLE_DW_OP(0x06, deref) @@ -319,6 +320,7 @@ HANDLE_DW_LANG(0x0021, C_plus_plus_14) HANDLE_DW_LANG(0x0022, Fortran03) HANDLE_DW_LANG(0x0023, Fortran08) HANDLE_DW_LANG(0x8001, Mips_Assembler) +HANDLE_DW_LANG(0xb000, BORLAND_Delphi) // DWARF attribute type encodings. HANDLE_DW_ATE(0x01, address) diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h index 17e9c1540a41..cea61bd75833 100644 --- a/include/llvm/Support/Dwarf.h +++ b/include/llvm/Support/Dwarf.h @@ -40,6 +40,7 @@ enum LLVMConstants : uint32_t { // LLVM mock tags (see also llvm/Support/Dwarf.def). DW_TAG_invalid = ~0U, // Tag for invalid results. DW_VIRTUALITY_invalid = ~0U, // Virtuality for invalid results. + DW_MACINFO_invalid = ~0U, // Macinfo type for invalid results. // Other constants. DWARF_VERSION = 4, // Default dwarf version we output. @@ -195,6 +196,7 @@ enum Attribute : uint16_t { DW_AT_dwo_name = 0x76, DW_AT_reference = 0x77, DW_AT_rvalue_reference = 0x78, + DW_AT_macros = 0x79, DW_AT_lo_user = 0x2000, DW_AT_hi_user = 0x3fff, @@ -230,6 +232,7 @@ enum Attribute : uint16_t { DW_AT_GNU_template_name = 0x2110, DW_AT_GNU_odr_signature = 0x210f, + DW_AT_GNU_macros = 0x2119, // Extensions for Fission proposal. DW_AT_GNU_dwo_name = 0x2130, @@ -238,6 +241,26 @@ enum Attribute : uint16_t { DW_AT_GNU_addr_base = 0x2133, DW_AT_GNU_pubnames = 0x2134, DW_AT_GNU_pubtypes = 0x2135, + DW_AT_GNU_discriminator = 0x2136, + + // Borland extensions. + DW_AT_BORLAND_property_read = 0x3b11, + DW_AT_BORLAND_property_write = 0x3b12, + DW_AT_BORLAND_property_implements = 0x3b13, + DW_AT_BORLAND_property_index = 0x3b14, + DW_AT_BORLAND_property_default = 0x3b15, + DW_AT_BORLAND_Delphi_unit = 0x3b20, + DW_AT_BORLAND_Delphi_class = 0x3b21, + DW_AT_BORLAND_Delphi_record = 0x3b22, + DW_AT_BORLAND_Delphi_metaclass = 0x3b23, + DW_AT_BORLAND_Delphi_constructor = 0x3b24, + DW_AT_BORLAND_Delphi_destructor = 0x3b25, + DW_AT_BORLAND_Delphi_anonymous_method = 0x3b26, + DW_AT_BORLAND_Delphi_interface = 0x3b27, + DW_AT_BORLAND_Delphi_ABI = 0x3b28, + DW_AT_BORLAND_Delphi_return = 0x3b29, + DW_AT_BORLAND_Delphi_frameptr = 0x3b30, + DW_AT_BORLAND_closure = 0x3b31, // LLVM project extensions. DW_AT_LLVM_include_path = 0x3e00, @@ -370,6 +393,14 @@ enum CallingConvention { DW_CC_program = 0x02, DW_CC_nocall = 0x03, DW_CC_lo_user = 0x40, + DW_CC_GNU_borland_fastcall_i386 = 0x41, + DW_CC_BORLAND_safecall = 0xb0, + DW_CC_BORLAND_stdcall = 0xb1, + DW_CC_BORLAND_pascal = 0xb2, + DW_CC_BORLAND_msfastcall = 0xb3, + DW_CC_BORLAND_msreturn = 0xb4, + DW_CC_BORLAND_thiscall = 0xb5, + DW_CC_BORLAND_fastcall = 0xb6, DW_CC_hi_user = 0xff }; @@ -429,6 +460,24 @@ enum MacinfoRecordType { DW_MACINFO_vendor_ext = 0xff }; +enum MacroEntryType { + // Macro Information Entry Type Encodings + DW_MACRO_define = 0x01, + DW_MACRO_undef = 0x02, + DW_MACRO_start_file = 0x03, + DW_MACRO_end_file = 0x04, + DW_MACRO_define_indirect = 0x05, + DW_MACRO_undef_indirect = 0x06, + DW_MACRO_transparent_include = 0x07, + DW_MACRO_define_indirect_sup = 0x08, + DW_MACRO_undef_indirect_sup = 0x09, + DW_MACRO_transparent_include_sup = 0x0a, + DW_MACRO_define_indirectx = 0x0b, + DW_MACRO_undef_indirectx = 0x0c, + DW_MACRO_lo_user = 0xe0, + DW_MACRO_hi_user = 0xff +}; + enum CallFrameInfo { // Call frame instruction encodings DW_CFA_extended = 0x00, @@ -596,6 +645,7 @@ const char *GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage); /// /// \li \a getTag() returns \a DW_TAG_invalid on invalid input. /// \li \a getVirtuality() returns \a DW_VIRTUALITY_invalid on invalid input. +/// \li \a getMacinfo() returns \a DW_MACINFO_invalid on invalid input. /// /// @{ unsigned getTag(StringRef TagString); @@ -603,6 +653,7 @@ unsigned getOperationEncoding(StringRef OperationEncodingString); unsigned getVirtuality(StringRef VirtualityString); unsigned getLanguage(StringRef LanguageString); unsigned getAttributeEncoding(StringRef EncodingString); +unsigned getMacinfo(StringRef MacinfoString); /// @} /// \brief Returns the symbolic string representing Val when used as a value @@ -610,7 +661,7 @@ unsigned getAttributeEncoding(StringRef EncodingString); const char *AttributeValueString(uint16_t Attr, unsigned Val); /// \brief Decsribes an entry of the various gnu_pub* debug sections. -/// +/// /// The gnu_pub* kind looks like: /// /// 0-3 reserved @@ -642,7 +693,6 @@ private: }; }; - } // End of namespace dwarf } // End of namespace llvm diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h index 94a4bfb22025..97708a7cdd63 100644 --- a/include/llvm/Support/ELF.h +++ b/include/llvm/Support/ELF.h @@ -429,6 +429,33 @@ enum { #include "ELFRelocs/ARM.def" }; +// AVR specific e_flags +enum : unsigned { + EF_AVR_ARCH_AVR1 = 1, + EF_AVR_ARCH_AVR2 = 2, + EF_AVR_ARCH_AVR25 = 25, + EF_AVR_ARCH_AVR3 = 3, + EF_AVR_ARCH_AVR31 = 31, + EF_AVR_ARCH_AVR35 = 35, + EF_AVR_ARCH_AVR4 = 4, + EF_AVR_ARCH_AVR5 = 5, + EF_AVR_ARCH_AVR51 = 51, + EF_AVR_ARCH_AVR6 = 6, + EF_AVR_ARCH_AVRTINY = 100, + EF_AVR_ARCH_XMEGA1 = 101, + EF_AVR_ARCH_XMEGA2 = 102, + EF_AVR_ARCH_XMEGA3 = 103, + EF_AVR_ARCH_XMEGA4 = 104, + EF_AVR_ARCH_XMEGA5 = 105, + EF_AVR_ARCH_XMEGA6 = 106, + EF_AVR_ARCH_XMEGA7 = 107 +}; + +// ELF Relocation types for AVR +enum { +#include "ELFRelocs/AVR.def" +}; + // Mips Specific e_flags enum : unsigned { EF_MIPS_NOREORDER = 0x00000001, // Don't reorder instructions @@ -522,26 +549,28 @@ enum { ODK_PAGESIZE = 11 // Page size information }; -// Hexagon Specific e_flags -// Release 5 ABI +// Hexagon-specific e_flags enum { - // Object processor version flags, bits[3:0] + // Object processor version flags, bits[11:0] EF_HEXAGON_MACH_V2 = 0x00000001, // Hexagon V2 EF_HEXAGON_MACH_V3 = 0x00000002, // Hexagon V3 EF_HEXAGON_MACH_V4 = 0x00000003, // Hexagon V4 EF_HEXAGON_MACH_V5 = 0x00000004, // Hexagon V5 + EF_HEXAGON_MACH_V55 = 0x00000005, // Hexagon V55 + EF_HEXAGON_MACH_V60 = 0x00000060, // Hexagon V60 // Highest ISA version flags - EF_HEXAGON_ISA_MACH = 0x00000000, // Same as specified in bits[3:0] + EF_HEXAGON_ISA_MACH = 0x00000000, // Same as specified in bits[11:0] // of e_flags EF_HEXAGON_ISA_V2 = 0x00000010, // Hexagon V2 ISA EF_HEXAGON_ISA_V3 = 0x00000020, // Hexagon V3 ISA EF_HEXAGON_ISA_V4 = 0x00000030, // Hexagon V4 ISA - EF_HEXAGON_ISA_V5 = 0x00000040 // Hexagon V5 ISA + EF_HEXAGON_ISA_V5 = 0x00000040, // Hexagon V5 ISA + EF_HEXAGON_ISA_V55 = 0x00000050, // Hexagon V55 ISA + EF_HEXAGON_ISA_V60 = 0x00000060, // Hexagon V60 ISA }; -// Hexagon specific Section indexes for common small data -// Release 5 ABI +// Hexagon-specific section indexes for common small data enum { SHN_HEXAGON_SCOMMON = 0xff00, // Other access sizes SHN_HEXAGON_SCOMMON_1 = 0xff01, // Byte-sized access @@ -747,7 +776,12 @@ enum : unsigned { SHF_MIPS_ADDR = 0x40000000, // Section data is string data by default. - SHF_MIPS_STRING = 0x80000000 + SHF_MIPS_STRING = 0x80000000, + + SHF_AMDGPU_HSA_GLOBAL = 0x00100000, + SHF_AMDGPU_HSA_READONLY = 0x00200000, + SHF_AMDGPU_HSA_CODE = 0x00400000, + SHF_AMDGPU_HSA_AGENT = 0x00800000 }; // Section Group Flags @@ -828,7 +862,12 @@ enum { STT_LOOS = 10, // Lowest operating system-specific symbol type STT_HIOS = 12, // Highest operating system-specific symbol type STT_LOPROC = 13, // Lowest processor-specific symbol type - STT_HIPROC = 15 // Highest processor-specific symbol type + STT_HIPROC = 15, // Highest processor-specific symbol type + + // AMDGPU symbol types + STT_AMDGPU_HSA_KERNEL = 10, + STT_AMDGPU_HSA_INDIRECT_FUNCTION = 11, + STT_AMDGPU_HSA_METADATA = 12 }; enum { @@ -979,7 +1018,13 @@ enum { PT_MIPS_REGINFO = 0x70000000, // Register usage information. PT_MIPS_RTPROC = 0x70000001, // Runtime procedure table. PT_MIPS_OPTIONS = 0x70000002, // Options segment. - PT_MIPS_ABIFLAGS = 0x70000003 // Abiflags segment. + PT_MIPS_ABIFLAGS = 0x70000003, // Abiflags segment. + + // AMDGPU program header types. + PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM = 0x60000000, + PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT = 0x60000001, + PT_AMDGPU_HSA_LOAD_READONLY_AGENT = 0x60000002, + PT_AMDGPU_HSA_LOAD_CODE_AGENT = 0x60000003 }; // Segment flag bits. @@ -1139,8 +1184,10 @@ enum { DT_MIPS_GP_VALUE = 0x70000030, // GP value for auxiliary GOTs. DT_MIPS_AUX_DYNAMIC = 0x70000031, // Address of auxiliary .dynamic. DT_MIPS_PLTGOT = 0x70000032, // Address of the base of the PLTGOT. - DT_MIPS_RWPLT = 0x70000034 // Points to the base + DT_MIPS_RWPLT = 0x70000034, // Points to the base // of a writable PLT. + DT_MIPS_RLD_MAP_REL = 0x70000035 // Relative offset of run time loader + // map, used for debugging. }; // DT_FLAGS values. diff --git a/include/llvm/Support/ELFRelocs/AVR.def b/include/llvm/Support/ELFRelocs/AVR.def new file mode 100644 index 000000000000..5692d6cb9aa0 --- /dev/null +++ b/include/llvm/Support/ELFRelocs/AVR.def @@ -0,0 +1,40 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_AVR_NONE, 0) +ELF_RELOC(R_AVR_32, 1) +ELF_RELOC(R_AVR_7_PCREL, 2) +ELF_RELOC(R_AVR_13_PCREL, 3) +ELF_RELOC(R_AVR_16, 4) +ELF_RELOC(R_AVR_16_PM, 5) +ELF_RELOC(R_AVR_LO8_LDI, 6) +ELF_RELOC(R_AVR_HI8_LDI, 7) +ELF_RELOC(R_AVR_HH8_LDI, 8) +ELF_RELOC(R_AVR_LO8_LDI_NEG, 9) +ELF_RELOC(R_AVR_HI8_LDI_NEG, 10) +ELF_RELOC(R_AVR_HH8_LDI_NEG, 11) +ELF_RELOC(R_AVR_LO8_LDI_PM, 12) +ELF_RELOC(R_AVR_HI8_LDI_PM, 13) +ELF_RELOC(R_AVR_HH8_LDI_PM, 14) +ELF_RELOC(R_AVR_LO8_LDI_PM_NEG, 15) +ELF_RELOC(R_AVR_HI8_LDI_PM_NEG, 16) +ELF_RELOC(R_AVR_HH8_LDI_PM_NEG, 17) +ELF_RELOC(R_AVR_CALL, 18) +ELF_RELOC(R_AVR_LDI, 19) +ELF_RELOC(R_AVR_6, 20) +ELF_RELOC(R_AVR_6_ADIW, 21) +ELF_RELOC(R_AVR_MS8_LDI, 22) +ELF_RELOC(R_AVR_MS8_LDI_NEG, 23) +ELF_RELOC(R_AVR_LO8_LDI_GS, 24) +ELF_RELOC(R_AVR_HI8_LDI_GS, 25) +ELF_RELOC(R_AVR_8, 26) +ELF_RELOC(R_AVR_8_LO8, 27) +ELF_RELOC(R_AVR_8_HI8, 28) +ELF_RELOC(R_AVR_8_HLO8, 29) +ELF_RELOC(R_AVR_SYM_DIFF, 30) +ELF_RELOC(R_AVR_16_LDST, 31) +ELF_RELOC(R_AVR_LDS_STS_16, 33) +ELF_RELOC(R_AVR_PORT6, 34) +ELF_RELOC(R_AVR_PORT5, 35) diff --git a/include/llvm/Support/ELFRelocs/PowerPC.def b/include/llvm/Support/ELFRelocs/PowerPC.def index b6c39419b0f7..e4f8ee0ebe2b 100644 --- a/include/llvm/Support/ELFRelocs/PowerPC.def +++ b/include/llvm/Support/ELFRelocs/PowerPC.def @@ -3,6 +3,68 @@ #error "ELF_RELOC must be defined" #endif +// glibc's PowerPC asm/sigcontext.h, when compiling for PPC64, has the +// unfortunate behavior of including asm/elf.h, which defines R_PPC_NONE, etc. +// to their corresponding integer values. As a result, we need to undef them +// here before continuing. + +#undef R_PPC_NONE +#undef R_PPC_ADDR32 +#undef R_PPC_ADDR24 +#undef R_PPC_ADDR16 +#undef R_PPC_ADDR16_LO +#undef R_PPC_ADDR16_HI +#undef R_PPC_ADDR16_HA +#undef R_PPC_ADDR14 +#undef R_PPC_ADDR14_BRTAKEN +#undef R_PPC_ADDR14_BRNTAKEN +#undef R_PPC_REL24 +#undef R_PPC_REL14 +#undef R_PPC_REL14_BRTAKEN +#undef R_PPC_REL14_BRNTAKEN +#undef R_PPC_GOT16 +#undef R_PPC_GOT16_LO +#undef R_PPC_GOT16_HI +#undef R_PPC_GOT16_HA +#undef R_PPC_PLTREL24 +#undef R_PPC_JMP_SLOT +#undef R_PPC_LOCAL24PC +#undef R_PPC_REL32 +#undef R_PPC_TLS +#undef R_PPC_DTPMOD32 +#undef R_PPC_TPREL16 +#undef R_PPC_TPREL16_LO +#undef R_PPC_TPREL16_HI +#undef R_PPC_TPREL16_HA +#undef R_PPC_TPREL32 +#undef R_PPC_DTPREL16 +#undef R_PPC_DTPREL16_LO +#undef R_PPC_DTPREL16_HI +#undef R_PPC_DTPREL16_HA +#undef R_PPC_DTPREL32 +#undef R_PPC_GOT_TLSGD16 +#undef R_PPC_GOT_TLSGD16_LO +#undef R_PPC_GOT_TLSGD16_HI +#undef R_PPC_GOT_TLSGD16_HA +#undef R_PPC_GOT_TLSLD16 +#undef R_PPC_GOT_TLSLD16_LO +#undef R_PPC_GOT_TLSLD16_HI +#undef R_PPC_GOT_TLSLD16_HA +#undef R_PPC_GOT_TPREL16 +#undef R_PPC_GOT_TPREL16_LO +#undef R_PPC_GOT_TPREL16_HI +#undef R_PPC_GOT_TPREL16_HA +#undef R_PPC_GOT_DTPREL16 +#undef R_PPC_GOT_DTPREL16_LO +#undef R_PPC_GOT_DTPREL16_HI +#undef R_PPC_GOT_DTPREL16_HA +#undef R_PPC_TLSGD +#undef R_PPC_TLSLD +#undef R_PPC_REL16 +#undef R_PPC_REL16_LO +#undef R_PPC_REL16_HI +#undef R_PPC_REL16_HA + ELF_RELOC(R_PPC_NONE, 0) /* No relocation. */ ELF_RELOC(R_PPC_ADDR32, 1) ELF_RELOC(R_PPC_ADDR24, 2) diff --git a/include/llvm/Support/ELFRelocs/PowerPC64.def b/include/llvm/Support/ELFRelocs/PowerPC64.def index 7b2a3cb2235b..3a47c5a07574 100644 --- a/include/llvm/Support/ELFRelocs/PowerPC64.def +++ b/include/llvm/Support/ELFRelocs/PowerPC64.def @@ -3,6 +3,97 @@ #error "ELF_RELOC must be defined" #endif +// glibc's PowerPC asm/sigcontext.h, when compiling for PPC64, has the +// unfortunate behavior of including asm/elf.h, which defines R_PPC_NONE, etc. +// to their corresponding integer values. As a result, we need to undef them +// here before continuing. + +#undef R_PPC64_NONE +#undef R_PPC64_ADDR32 +#undef R_PPC64_ADDR24 +#undef R_PPC64_ADDR16 +#undef R_PPC64_ADDR16_LO +#undef R_PPC64_ADDR16_HI +#undef R_PPC64_ADDR16_HA +#undef R_PPC64_ADDR14 +#undef R_PPC64_ADDR14_BRTAKEN +#undef R_PPC64_ADDR14_BRNTAKEN +#undef R_PPC64_REL24 +#undef R_PPC64_REL14 +#undef R_PPC64_REL14_BRTAKEN +#undef R_PPC64_REL14_BRNTAKEN +#undef R_PPC64_GOT16 +#undef R_PPC64_GOT16_LO +#undef R_PPC64_GOT16_HI +#undef R_PPC64_GOT16_HA +#undef R_PPC64_GLOB_DAT +#undef R_PPC64_JMP_SLOT +#undef R_PPC64_RELATIVE +#undef R_PPC64_REL32 +#undef R_PPC64_ADDR64 +#undef R_PPC64_ADDR16_HIGHER +#undef R_PPC64_ADDR16_HIGHERA +#undef R_PPC64_ADDR16_HIGHEST +#undef R_PPC64_ADDR16_HIGHESTA +#undef R_PPC64_REL64 +#undef R_PPC64_TOC16 +#undef R_PPC64_TOC16_LO +#undef R_PPC64_TOC16_HI +#undef R_PPC64_TOC16_HA +#undef R_PPC64_TOC +#undef R_PPC64_ADDR16_DS +#undef R_PPC64_ADDR16_LO_DS +#undef R_PPC64_GOT16_DS +#undef R_PPC64_GOT16_LO_DS +#undef R_PPC64_TOC16_DS +#undef R_PPC64_TOC16_LO_DS +#undef R_PPC64_TLS +#undef R_PPC64_DTPMOD64 +#undef R_PPC64_TPREL16 +#undef R_PPC64_TPREL16_LO +#undef R_PPC64_TPREL16_HI +#undef R_PPC64_TPREL16_HA +#undef R_PPC64_TPREL64 +#undef R_PPC64_DTPREL16 +#undef R_PPC64_DTPREL16_LO +#undef R_PPC64_DTPREL16_HI +#undef R_PPC64_DTPREL16_HA +#undef R_PPC64_DTPREL64 +#undef R_PPC64_GOT_TLSGD16 +#undef R_PPC64_GOT_TLSGD16_LO +#undef R_PPC64_GOT_TLSGD16_HI +#undef R_PPC64_GOT_TLSGD16_HA +#undef R_PPC64_GOT_TLSLD16 +#undef R_PPC64_GOT_TLSLD16_LO +#undef R_PPC64_GOT_TLSLD16_HI +#undef R_PPC64_GOT_TLSLD16_HA +#undef R_PPC64_GOT_TPREL16_DS +#undef R_PPC64_GOT_TPREL16_LO_DS +#undef R_PPC64_GOT_TPREL16_HI +#undef R_PPC64_GOT_TPREL16_HA +#undef R_PPC64_GOT_DTPREL16_DS +#undef R_PPC64_GOT_DTPREL16_LO_DS +#undef R_PPC64_GOT_DTPREL16_HI +#undef R_PPC64_GOT_DTPREL16_HA +#undef R_PPC64_TPREL16_DS +#undef R_PPC64_TPREL16_LO_DS +#undef R_PPC64_TPREL16_HIGHER +#undef R_PPC64_TPREL16_HIGHERA +#undef R_PPC64_TPREL16_HIGHEST +#undef R_PPC64_TPREL16_HIGHESTA +#undef R_PPC64_DTPREL16_DS +#undef R_PPC64_DTPREL16_LO_DS +#undef R_PPC64_DTPREL16_HIGHER +#undef R_PPC64_DTPREL16_HIGHERA +#undef R_PPC64_DTPREL16_HIGHEST +#undef R_PPC64_DTPREL16_HIGHESTA +#undef R_PPC64_TLSGD +#undef R_PPC64_TLSLD +#undef R_PPC64_REL16 +#undef R_PPC64_REL16_LO +#undef R_PPC64_REL16_HI +#undef R_PPC64_REL16_HA + ELF_RELOC(R_PPC64_NONE, 0) ELF_RELOC(R_PPC64_ADDR32, 1) ELF_RELOC(R_PPC64_ADDR24, 2) @@ -21,7 +112,9 @@ ELF_RELOC(R_PPC64_GOT16, 14) ELF_RELOC(R_PPC64_GOT16_LO, 15) ELF_RELOC(R_PPC64_GOT16_HI, 16) ELF_RELOC(R_PPC64_GOT16_HA, 17) +ELF_RELOC(R_PPC64_GLOB_DAT, 20) ELF_RELOC(R_PPC64_JMP_SLOT, 21) +ELF_RELOC(R_PPC64_RELATIVE, 22) ELF_RELOC(R_PPC64_REL32, 26) ELF_RELOC(R_PPC64_ADDR64, 38) ELF_RELOC(R_PPC64_ADDR16_HIGHER, 39) diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h index fd59009e0d3a..bc93c9a66eef 100644 --- a/include/llvm/Support/Endian.h +++ b/include/llvm/Support/Endian.h @@ -77,6 +77,95 @@ inline void write(void *memory, value_type value) { &value, sizeof(value_type)); } + +template +using make_unsigned_t = typename std::make_unsigned::type; + +/// Read a value of a particular endianness from memory, for a location +/// that starts at the given bit offset within the first byte. +template +inline value_type readAtBitAlignment(const void *memory, uint64_t startBit) { + assert(startBit < 8); + if (startBit == 0) + return read(memory); + else { + // Read two values and compose the result from them. + value_type val[2]; + memcpy(&val[0], + LLVM_ASSUME_ALIGNED( + memory, (detail::PickAlignment::value)), + sizeof(value_type) * 2); + val[0] = byte_swap(val[0]); + val[1] = byte_swap(val[1]); + + // Shift bits from the lower value into place. + make_unsigned_t lowerVal = val[0] >> startBit; + // Mask off upper bits after right shift in case of signed type. + make_unsigned_t numBitsFirstVal = + (sizeof(value_type) * 8) - startBit; + lowerVal &= ((make_unsigned_t)1 << numBitsFirstVal) - 1; + + // Get the bits from the upper value. + make_unsigned_t upperVal = + val[1] & (((make_unsigned_t)1 << startBit) - 1); + // Shift them in to place. + upperVal <<= numBitsFirstVal; + + return lowerVal | upperVal; + } +} + +/// Write a value to memory with a particular endianness, for a location +/// that starts at the given bit offset within the first byte. +template +inline void writeAtBitAlignment(void *memory, value_type value, + uint64_t startBit) { + assert(startBit < 8); + if (startBit == 0) + write(memory, value); + else { + // Read two values and shift the result into them. + value_type val[2]; + memcpy(&val[0], + LLVM_ASSUME_ALIGNED( + memory, (detail::PickAlignment::value)), + sizeof(value_type) * 2); + val[0] = byte_swap(val[0]); + val[1] = byte_swap(val[1]); + + // Mask off any existing bits in the upper part of the lower value that + // we want to replace. + val[0] &= ((make_unsigned_t)1 << startBit) - 1; + make_unsigned_t numBitsFirstVal = + (sizeof(value_type) * 8) - startBit; + make_unsigned_t lowerVal = value; + if (startBit > 0) { + // Mask off the upper bits in the new value that are not going to go into + // the lower value. This avoids a left shift of a negative value, which + // is undefined behavior. + lowerVal &= (((make_unsigned_t)1 << numBitsFirstVal) - 1); + // Now shift the new bits into place + lowerVal <<= startBit; + } + val[0] |= lowerVal; + + // Mask off any existing bits in the lower part of the upper value that + // we want to replace. + val[1] &= ~(((make_unsigned_t)1 << startBit) - 1); + // Next shift the bits that go into the upper value into position. + make_unsigned_t upperVal = value >> numBitsFirstVal; + // Mask off upper bits after right shift in case of signed type. + upperVal &= ((make_unsigned_t)1 << startBit) - 1; + val[1] |= upperVal; + + // Finally, rewrite values. + val[0] = byte_swap(val[0]); + val[1] = byte_swap(val[1]); + memcpy(LLVM_ASSUME_ALIGNED( + memory, (detail::PickAlignment::value)), + &val[0], sizeof(value_type) * 2); + } +} } // end namespace endian namespace detail { @@ -208,19 +297,47 @@ typedef detail::packed_endian_specific_integral unaligned_int64_t; namespace endian { -inline uint16_t read16le(const void *p) { return *(const ulittle16_t *)p; } -inline uint32_t read32le(const void *p) { return *(const ulittle32_t *)p; } -inline uint64_t read64le(const void *p) { return *(const ulittle64_t *)p; } -inline uint16_t read16be(const void *p) { return *(const ubig16_t *)p; } -inline uint32_t read32be(const void *p) { return *(const ubig32_t *)p; } -inline uint64_t read64be(const void *p) { return *(const ubig64_t *)p; } +template inline T read(const void *P) { + return *(const detail::packed_endian_specific_integral *)P; +} -inline void write16le(void *p, uint16_t v) { *(ulittle16_t *)p = v; } -inline void write32le(void *p, uint32_t v) { *(ulittle32_t *)p = v; } -inline void write64le(void *p, uint64_t v) { *(ulittle64_t *)p = v; } -inline void write16be(void *p, uint16_t v) { *(ubig16_t *)p = v; } -inline void write32be(void *p, uint32_t v) { *(ubig32_t *)p = v; } -inline void write64be(void *p, uint64_t v) { *(ubig64_t *)p = v; } +template inline uint16_t read16(const void *P) { + return read(P); +} +template inline uint32_t read32(const void *P) { + return read(P); +} +template inline uint64_t read64(const void *P) { + return read(P); +} + +inline uint16_t read16le(const void *P) { return read16(P); } +inline uint32_t read32le(const void *P) { return read32(P); } +inline uint64_t read64le(const void *P) { return read64(P); } +inline uint16_t read16be(const void *P) { return read16(P); } +inline uint32_t read32be(const void *P) { return read32(P); } +inline uint64_t read64be(const void *P) { return read64(P); } + +template inline void write(void *P, T V) { + *(detail::packed_endian_specific_integral *)P = V; +} + +template inline void write16(void *P, uint16_t V) { + write(P, V); +} +template inline void write32(void *P, uint32_t V) { + write(P, V); +} +template inline void write64(void *P, uint64_t V) { + write(P, V); +} + +inline void write16le(void *P, uint16_t V) { write16(P, V); } +inline void write32le(void *P, uint32_t V) { write32(P, V); } +inline void write64le(void *P, uint64_t V) { write64(P, V); } +inline void write16be(void *P, uint16_t V) { write16(P, V); } +inline void write32be(void *P, uint32_t V) { write32(P, V); } +inline void write64be(void *P, uint64_t V) { write64(P, V); } } // end namespace endian } // end namespace support } // end namespace llvm diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h index 9afd52d1abc7..32f05e0e9610 100644 --- a/include/llvm/Support/ErrorHandling.h +++ b/include/llvm/Support/ErrorHandling.h @@ -61,22 +61,22 @@ namespace llvm { ~ScopedFatalErrorHandler() { remove_fatal_error_handler(); } }; - /// Reports a serious error, calling any installed error handler. These - /// functions are intended to be used for error conditions which are outside - /// the control of the compiler (I/O errors, invalid user input, etc.) - /// - /// If no error handler is installed the default is to print the message to - /// standard error, followed by a newline. - /// After the error handler is called this function will call exit(1), it - /// does not return. - LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, - bool gen_crash_diag = true); - LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason, - bool gen_crash_diag = true); - LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason, - bool gen_crash_diag = true); - LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason, - bool gen_crash_diag = true); +/// Reports a serious error, calling any installed error handler. These +/// functions are intended to be used for error conditions which are outside +/// the control of the compiler (I/O errors, invalid user input, etc.) +/// +/// If no error handler is installed the default is to print the message to +/// standard error, followed by a newline. +/// After the error handler is called this function will call exit(1), it +/// does not return. +LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, + bool gen_crash_diag = true); +LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason, + bool gen_crash_diag = true); +LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason, + bool gen_crash_diag = true); +LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason, + bool gen_crash_diag = true); /// This function calls abort(), and prints the optional message to stderr. /// Use the llvm_unreachable macro (that adds location info), instead of diff --git a/include/llvm/Support/ErrorOr.h b/include/llvm/Support/ErrorOr.h index 589404f9b4ee..ca6ede73e8df 100644 --- a/include/llvm/Support/ErrorOr.h +++ b/include/llvm/Support/ErrorOr.h @@ -1,4 +1,4 @@ -//===- llvm/Support/ErrorOr.h - Error Smart Pointer -----------------------===// +//===- llvm/Support/ErrorOr.h - Error Smart Pointer -------------*- C++ -*-===// // // The LLVM Linker // @@ -91,6 +91,7 @@ private: typedef typename std::remove_reference::type &reference; typedef const typename std::remove_reference::type &const_reference; typedef typename std::remove_reference::type *pointer; + typedef const typename std::remove_reference::type *const_pointer; public: template @@ -183,10 +184,14 @@ public: return toPointer(getStorage()); } + const_pointer operator->() const { return toPointer(getStorage()); } + reference operator *() { return *getStorage(); } + const_reference operator*() const { return *getStorage(); } + private: template void copyConstruct(const ErrorOr &Other) { @@ -246,10 +251,14 @@ private: return Val; } + const_pointer toPointer(const_pointer Val) const { return Val; } + pointer toPointer(wrap *Val) { return &Val->get(); } + const_pointer toPointer(const wrap *Val) const { return &Val->get(); } + storage_type *getStorage() { assert(!HasError && "Cannot get value when an error exists!"); return reinterpret_cast(TStorage.buffer); diff --git a/include/llvm/Support/FileOutputBuffer.h b/include/llvm/Support/FileOutputBuffer.h index fd8879c84622..3bcf64a8a08b 100644 --- a/include/llvm/Support/FileOutputBuffer.h +++ b/include/llvm/Support/FileOutputBuffer.h @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" namespace llvm { @@ -37,9 +38,8 @@ public: /// Factory method to create an OutputBuffer object which manages a read/write /// buffer of the specified size. When committed, the buffer will be written /// to the file at the specified path. - static std::error_code create(StringRef FilePath, size_t Size, - std::unique_ptr &Result, - unsigned Flags = 0); + static ErrorOr> + create(StringRef FilePath, size_t Size, unsigned Flags = 0); /// Returns a pointer to the start of the buffer. uint8_t *getBufferStart() { diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h index a736c324f8aa..4733ddb77575 100644 --- a/include/llvm/Support/FileSystem.h +++ b/include/llvm/Support/FileSystem.h @@ -95,21 +95,21 @@ enum perms { }; // Helper functions so that you can use & and | to manipulate perms bits: -inline perms operator|(perms l , perms r) { - return static_cast( - static_cast(l) | static_cast(r)); +inline perms operator|(perms l, perms r) { + return static_cast(static_cast(l) | + static_cast(r)); } -inline perms operator&(perms l , perms r) { - return static_cast( - static_cast(l) & static_cast(r)); +inline perms operator&(perms l, perms r) { + return static_cast(static_cast(l) & + static_cast(r)); } inline perms &operator|=(perms &l, perms r) { - l = l | r; - return l; + l = l | r; + return l; } inline perms &operator&=(perms &l, perms r) { - l = l & r; - return l; + l = l & r; + return l; } inline perms operator~(perms x) { return static_cast(~static_cast(x)); @@ -156,6 +156,7 @@ class file_status friend bool equivalent(file_status A, file_status B); file_type Type; perms Perms; + public: #if defined(LLVM_ON_UNIX) file_status() : fs_st_dev(0), fs_st_ino(0), fs_st_mtime(0), @@ -265,6 +266,20 @@ private: /// @name Physical Operators /// @{ +/// @brief Make \a path an absolute path. +/// +/// Makes \a path absolute using the \a current_directory if it is not already. +/// An empty \a path will result in the \a current_directory. +/// +/// /absolute/path => /absolute/path +/// relative/../path => /relative/../path +/// +/// @param path A path that is modified to be an absolute path. +/// @returns errc::success if \a path has been made absolute, otherwise a +/// platform-specific error_code. +std::error_code make_absolute(const Twine ¤t_directory, + SmallVectorImpl &path); + /// @brief Make \a path an absolute path. /// /// Makes \a path absolute using the current directory if it is not already. An @@ -285,7 +300,8 @@ std::error_code make_absolute(SmallVectorImpl &path); /// specific error_code. If IgnoreExisting is false, also returns /// error if the directory already existed. std::error_code create_directories(const Twine &path, - bool IgnoreExisting = true); + bool IgnoreExisting = true, + perms Perms = owner_all | group_all); /// @brief Create the directory in path. /// @@ -293,7 +309,8 @@ std::error_code create_directories(const Twine &path, /// @returns errc::success if is_directory(path), otherwise a platform /// specific error_code. If IgnoreExisting is false, also returns /// error if the directory already existed. -std::error_code create_directory(const Twine &path, bool IgnoreExisting = true); +std::error_code create_directory(const Twine &path, bool IgnoreExisting = true, + perms Perms = owner_all | group_all); /// @brief Create a link from \a from to \a to. /// @@ -375,9 +392,7 @@ inline bool exists(const Twine &Path) { /// /// @param Path Input path. /// @returns True if we can execute it, false otherwise. -inline bool can_execute(const Twine &Path) { - return !access(Path, AccessMode::Execute); -} +bool can_execute(const Twine &Path); /// @brief Can we write this file? /// @@ -531,15 +546,15 @@ std::error_code status_known(const Twine &path, bool &result); /// /// Generates a unique path suitable for a temporary file and then opens it as a /// file. The name is based on \a model with '%' replaced by a random char in -/// [0-9a-f]. If \a model is not an absolute path, a suitable temporary -/// directory will be prepended. +/// [0-9a-f]. If \a model is not an absolute path, the temporary file will be +/// created in the current directory. /// /// Example: clang-%%-%%-%%-%%-%%.s => clang-a0-b1-c2-d3-e4.s /// /// This is an atomic operation. Either the file is created and opened, or the /// file system is left untouched. /// -/// The intendend use is for files that are to be kept, possibly after +/// The intended use is for files that are to be kept, possibly after /// renaming them. For example, when running 'clang -c foo.o', the file can /// be first created as foo-abc123.o and then renamed. /// diff --git a/include/llvm/Support/Format.h b/include/llvm/Support/Format.h index 4319a3ba2745..f0b437a0cbed 100644 --- a/include/llvm/Support/Format.h +++ b/include/llvm/Support/Format.h @@ -118,6 +118,7 @@ class FormattedString { unsigned Width; bool RightJustify; friend class raw_ostream; + public: FormattedString(StringRef S, unsigned W, bool R) : Str(S), Width(W), RightJustify(R) { } @@ -146,6 +147,7 @@ class FormattedNumber { bool Upper; bool HexPrefix; friend class raw_ostream; + public: FormattedNumber(uint64_t HV, int64_t DV, unsigned W, bool H, bool U, bool Prefix) @@ -178,7 +180,7 @@ inline FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, return FormattedNumber(N, 0, Width, true, Upper, false); } -/// format_decimal - Output \p N as a right justified, fixed-width decimal. If +/// format_decimal - Output \p N as a right justified, fixed-width decimal. If /// number will not fit in width, full number is still printed. Examples: /// OS << format_decimal(0, 5) => " 0" /// OS << format_decimal(255, 5) => " 255" @@ -188,7 +190,6 @@ inline FormattedNumber format_decimal(int64_t N, unsigned Width) { return FormattedNumber(0, N, Width, false, false, false); } - } // end namespace llvm #endif diff --git a/include/llvm/Support/GCOV.h b/include/llvm/Support/GCOV.h index c2e34bd3eaeb..544434f036a4 100644 --- a/include/llvm/Support/GCOV.h +++ b/include/llvm/Support/GCOV.h @@ -30,12 +30,11 @@ class GCOVBlock; class FileInfo; namespace GCOV { -enum GCOVVersion { V402, V404 }; -} // end GCOV namespace +enum GCOVVersion { V402, V404, V704 }; -/// GCOVOptions - A struct for passing gcov options between functions. -struct GCOVOptions { - GCOVOptions(bool A, bool B, bool C, bool F, bool P, bool U, bool L, bool N) +/// \brief A struct for passing gcov options between functions. +struct Options { + Options(bool A, bool B, bool C, bool F, bool P, bool U, bool L, bool N) : AllBlocks(A), BranchInfo(B), BranchCount(C), FuncCoverage(F), PreservePaths(P), UncondBranch(U), LongFileNames(L), NoOutput(N) {} @@ -48,6 +47,7 @@ struct GCOVOptions { bool LongFileNames; bool NoOutput; }; +} // end GCOV namespace /// GCOVBuffer - A wrapper around MemoryBuffer to provide GCOV specific /// read operations. @@ -90,6 +90,11 @@ public: Version = GCOV::V404; return true; } + if (VersionStr == "*704") { + Cursor += 4; + Version = GCOV::V704; + return true; + } errs() << "Unexpected version: " << VersionStr << ".\n"; return false; } @@ -390,7 +395,7 @@ class FileInfo { }; public: - FileInfo(const GCOVOptions &Options) + FileInfo(const GCOV::Options &Options) : Options(Options), LineInfo(), RunCount(0), ProgramCount(0) {} void addBlockLine(StringRef Filename, uint32_t Line, const GCOVBlock *Block) { @@ -424,7 +429,7 @@ private: void printFuncCoverage(raw_ostream &OS) const; void printFileCoverage(raw_ostream &OS) const; - const GCOVOptions &Options; + const GCOV::Options &Options; StringMap LineInfo; uint32_t RunCount; uint32_t ProgramCount; diff --git a/include/llvm/Support/GenericDomTree.h b/include/llvm/Support/GenericDomTree.h index 63678bb98bb1..8751f272cd29 100644 --- a/include/llvm/Support/GenericDomTree.h +++ b/include/llvm/Support/GenericDomTree.h @@ -371,8 +371,9 @@ public: void releaseMemory() { reset(); } /// getNode - return the (Post)DominatorTree node for the specified basic - /// block. This is the same as using operator[] on this class. - /// + /// block. This is the same as using operator[] on this class. The result + /// may (but is not required to) be null for a forward (backwards) + /// statically unreachable block. DomTreeNodeBase *getNode(NodeT *BB) const { auto I = DomTreeNodes.find(BB); if (I != DomTreeNodes.end()) @@ -380,6 +381,7 @@ public: return nullptr; } + /// See getNode. DomTreeNodeBase *operator[](NodeT *BB) const { return getNode(BB); } /// getRootNode - This returns the entry node for the CFG of the function. If @@ -732,13 +734,13 @@ public: for (typename TraitsTy::nodes_iterator I = TraitsTy::nodes_begin(&F), E = TraitsTy::nodes_end(&F); I != E; ++I) { - if (TraitsTy::child_begin(I) == TraitsTy::child_end(I)) - addRoot(I); + if (TraitsTy::child_begin(&*I) == TraitsTy::child_end(&*I)) + addRoot(&*I); // Prepopulate maps so that we don't get iterator invalidation issues // later. - this->IDoms[I] = nullptr; - this->DomTreeNodes[I] = nullptr; + this->IDoms[&*I] = nullptr; + this->DomTreeNodes[&*I] = nullptr; } Calculate>(*this, F); diff --git a/include/llvm/Support/GenericDomTreeConstruction.h b/include/llvm/Support/GenericDomTreeConstruction.h index 7c065f939256..3e867dc6cbf1 100644 --- a/include/llvm/Support/GenericDomTreeConstruction.h +++ b/include/llvm/Support/GenericDomTreeConstruction.h @@ -21,7 +21,6 @@ /// //===----------------------------------------------------------------------===// - #ifndef LLVM_SUPPORT_GENERICDOMTREECONSTRUCTION_H #define LLVM_SUPPORT_GENERICDOMTREECONSTRUCTION_H @@ -88,7 +87,7 @@ unsigned DFSPass(DominatorTreeBase& DT, // Increment the successor number for the next time we get to it. ++Worklist.back().second; - + // Visit the successor next, if it isn't already visited. typename GraphT::NodeType* Succ = *NextSucc; @@ -103,9 +102,9 @@ unsigned DFSPass(DominatorTreeBase& DT, return N; } -template -typename GraphT::NodeType* -Eval(DominatorTreeBase& DT, +template +typename GraphT::NodeType * +Eval(DominatorTreeBase &DT, typename GraphT::NodeType *VIn, unsigned LastLinked) { typename DominatorTreeBase::InfoRec &VInInfo = DT.Info[VIn]; @@ -117,7 +116,7 @@ Eval(DominatorTreeBase& DT, if (VInInfo.Parent >= LastLinked) Work.push_back(VIn); - + while (!Work.empty()) { typename GraphT::NodeType* V = Work.back(); typename DominatorTreeBase::InfoRec &VInfo = @@ -128,8 +127,8 @@ Eval(DominatorTreeBase& DT, if (Visited.insert(VAncestor).second && VInfo.Parent >= LastLinked) { Work.push_back(VAncestor); continue; - } - Work.pop_back(); + } + Work.pop_back(); // Update VInfo based on Ancestor info if (VInfo.Parent < LastLinked) @@ -169,7 +168,7 @@ void Calculate(DominatorTreeBase::NodeType>& DT, i != e; ++i) N = DFSPass(DT, DT.Roots[i], N); - // it might be that some blocks did not get a DFS number (e.g., blocks of + // it might be that some blocks did not get a DFS number (e.g., blocks of // infinite loops). In these cases an artificial exit node is required. MultipleRoots |= (DT.isPostDominator() && N != GraphTraits::size(&F)); @@ -287,7 +286,6 @@ void Calculate(DominatorTreeBase::NodeType>& DT, DT.updateDFSNumbers(); } - } #endif diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h index b1af3d7c2632..86985c569464 100644 --- a/include/llvm/Support/GraphWriter.h +++ b/include/llvm/Support/GraphWriter.h @@ -175,8 +175,9 @@ public: O << DOT::EscapeString(DTraits.getNodeLabel(Node, G)); // If we should include the address of the node in the label, do so now. - if (DTraits.hasNodeAddressLabel(Node, G)) - O << "|" << static_cast(Node); + std::string Id = DTraits.getNodeIdentifierLabel(Node, G); + if (!Id.empty()) + O << "|" << DOT::EscapeString(Id); std::string NodeDesc = DTraits.getNodeDescription(Node, G); if (!NodeDesc.empty()) @@ -199,8 +200,9 @@ public: O << DOT::EscapeString(DTraits.getNodeLabel(Node, G)); // If we should include the address of the node in the label, do so now. - if (DTraits.hasNodeAddressLabel(Node, G)) - O << "|" << static_cast(Node); + std::string Id = DTraits.getNodeIdentifierLabel(Node, G); + if (!Id.empty()) + O << "|" << DOT::EscapeString(Id); std::string NodeDesc = DTraits.getNodeDescription(Node, G); if (!NodeDesc.empty()) diff --git a/include/llvm/Support/JamCRC.h b/include/llvm/Support/JamCRC.h new file mode 100644 index 000000000000..20c28a5f8e45 --- /dev/null +++ b/include/llvm/Support/JamCRC.h @@ -0,0 +1,48 @@ +//===-- llvm/Support/JamCRC.h - Cyclic Redundancy Check ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an implementation of JamCRC. +// +// We will use the "Rocksoft^tm Model CRC Algorithm" to describe the properties +// of this CRC: +// Width : 32 +// Poly : 04C11DB7 +// Init : FFFFFFFF +// RefIn : True +// RefOut : True +// XorOut : 00000000 +// Check : 340BC6D9 (result of CRC for "123456789") +// +// N.B. We permit flexibility of the "Init" value. Some consumers of this need +// it to be zero. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_JAMCRC_H +#define LLVM_SUPPORT_JAMCRC_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { +class JamCRC { +public: + JamCRC(uint32_t Init = 0xFFFFFFFFU) : CRC(Init) {} + + // \brief Update the CRC calculation with Data. + void update(ArrayRef Data); + + uint32_t getCRC() const { return CRC; } + +private: + uint32_t CRC; +}; +} // End of namespace llvm + +#endif diff --git a/include/llvm/Support/MachO.h b/include/llvm/Support/MachO.h index 775127505923..54b8745de1c1 100644 --- a/include/llvm/Support/MachO.h +++ b/include/llvm/Support/MachO.h @@ -132,7 +132,9 @@ namespace llvm { LC_DYLIB_CODE_SIGN_DRS = 0x0000002Bu, LC_ENCRYPTION_INFO_64 = 0x0000002Cu, LC_LINKER_OPTION = 0x0000002Du, - LC_LINKER_OPTIMIZATION_HINT = 0x0000002Eu + LC_LINKER_OPTIMIZATION_HINT = 0x0000002Eu, + LC_VERSION_MIN_TVOS = 0x0000002Fu, + LC_VERSION_MIN_WATCHOS = 0x00000030u, }; enum : uint32_t { @@ -142,7 +144,6 @@ namespace llvm { SG_NORELOC = 0x4u, SG_PROTECTED_VERSION_1 = 0x8u, - // Constant masks for the "flags" field in llvm::MachO::section and // llvm::MachO::section_64 SECTION_TYPE = 0x000000ffu, // SECTION_TYPE @@ -334,7 +335,6 @@ namespace llvm { EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE = 0x02u }; - enum { // Constant masks for the "n_type" field in llvm::MachO::nlist and // llvm::MachO::nlist_64 @@ -385,7 +385,7 @@ namespace llvm { SELF_LIBRARY_ORDINAL = 0x0, MAX_LIBRARY_ORDINAL = 0xfd, DYNAMIC_LOOKUP_ORDINAL = 0xfe, - EXECUTABLE_ORDINAL = 0xff + EXECUTABLE_ORDINAL = 0xff }; enum StabType { @@ -506,7 +506,6 @@ namespace llvm { // Must be followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12. ARM64_RELOC_ADDEND = 10, - // Constant values for the r_type field in an x86_64 architecture // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info // structure @@ -530,7 +529,6 @@ namespace llvm { VM_PROT_EXECUTE = 0x4 }; - // Structs from struct mach_header { @@ -784,7 +782,6 @@ namespace llvm { flags:8; }; - struct twolevel_hints_command { uint32_t cmd; uint32_t cmdsize; @@ -924,7 +921,6 @@ namespace llvm { uint64_t stacksize; }; - // Structs from struct fat_header { uint32_t magic; @@ -995,7 +991,6 @@ namespace llvm { uint64_t n_value; }; - // Byte order swapping functions for MachO structs inline void swapStruct(mach_header &mh) { diff --git a/include/llvm/Support/ManagedStatic.h b/include/llvm/Support/ManagedStatic.h index addd34e704bc..2e131e47177d 100644 --- a/include/llvm/Support/ManagedStatic.h +++ b/include/llvm/Support/ManagedStatic.h @@ -15,8 +15,8 @@ #define LLVM_SUPPORT_MANAGEDSTATIC_H #include "llvm/Support/Atomic.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Threading.h" -#include "llvm/Support/Valgrind.h" namespace llvm { diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h index 2cf7e0e5d0b3..8111aeebe6ee 100644 --- a/include/llvm/Support/MathExtras.h +++ b/include/llvm/Support/MathExtras.h @@ -63,7 +63,7 @@ template struct TrailingZerosCounter { } }; -#if __GNUC__ >= 4 || _MSC_VER +#if __GNUC__ >= 4 || defined(_MSC_VER) template struct TrailingZerosCounter { static std::size_t count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) @@ -71,7 +71,7 @@ template struct TrailingZerosCounter { #if __has_builtin(__builtin_ctz) || LLVM_GNUC_PREREQ(4, 0, 0) return __builtin_ctz(Val); -#elif _MSC_VER +#elif defined(_MSC_VER) unsigned long Index; _BitScanForward(&Index, Val); return Index; @@ -87,7 +87,7 @@ template struct TrailingZerosCounter { #if __has_builtin(__builtin_ctzll) || LLVM_GNUC_PREREQ(4, 0, 0) return __builtin_ctzll(Val); -#elif _MSC_VER +#elif defined(_MSC_VER) unsigned long Index; _BitScanForward64(&Index, Val); return Index; @@ -132,7 +132,7 @@ template struct LeadingZerosCounter { } }; -#if __GNUC__ >= 4 || _MSC_VER +#if __GNUC__ >= 4 || defined(_MSC_VER) template struct LeadingZerosCounter { static std::size_t count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) @@ -140,7 +140,7 @@ template struct LeadingZerosCounter { #if __has_builtin(__builtin_clz) || LLVM_GNUC_PREREQ(4, 0, 0) return __builtin_clz(Val); -#elif _MSC_VER +#elif defined(_MSC_VER) unsigned long Index; _BitScanReverse(&Index, Val); return Index ^ 31; @@ -156,7 +156,7 @@ template struct LeadingZerosCounter { #if __has_builtin(__builtin_clzll) || LLVM_GNUC_PREREQ(4, 0, 0) return __builtin_clzll(Val); -#elif _MSC_VER +#elif defined(_MSC_VER) unsigned long Index; _BitScanReverse64(&Index, Val); return Index ^ 63; @@ -313,7 +313,7 @@ inline bool isShiftedUInt(uint64_t x) { /// isUIntN - Checks if an unsigned integer fits into the given (dynamic) /// bit width. inline bool isUIntN(unsigned N, uint64_t x) { - return x == (x & (~0ULL >> (64 - N))); + return N >= 64 || x < (UINT64_C(1)<<(N)); } /// isIntN - Checks if an signed integer fits into the given (dynamic) @@ -552,7 +552,7 @@ inline uint32_t FloatToBits(float Float) { inline uint64_t MinAlign(uint64_t A, uint64_t B) { // The largest power of 2 that divides both A and B. // - // Replace "-Value" by "1+~Value" in the following commented code to avoid + // Replace "-Value" by "1+~Value" in the following commented code to avoid // MSVC warning C4146 // return (A | B) & -(A | B); return (A | B) & (1 + ~(A | B)); @@ -599,15 +599,27 @@ inline uint64_t PowerOf2Floor(uint64_t A) { /// Returns the next integer (mod 2**64) that is greater than or equal to /// \p Value and is a multiple of \p Align. \p Align must be non-zero. /// +/// If non-zero \p Skew is specified, the return value will be a minimal +/// integer that is greater than or equal to \p Value and equal to +/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than +/// \p Align, its value is adjusted to '\p Skew mod \p Align'. +/// /// Examples: /// \code /// RoundUpToAlignment(5, 8) = 8 /// RoundUpToAlignment(17, 8) = 24 /// RoundUpToAlignment(~0LL, 8) = 0 /// RoundUpToAlignment(321, 255) = 510 +/// +/// RoundUpToAlignment(5, 8, 7) = 7 +/// RoundUpToAlignment(17, 8, 1) = 17 +/// RoundUpToAlignment(~0LL, 8, 3) = 3 +/// RoundUpToAlignment(321, 255, 42) = 552 /// \endcode -inline uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align) { - return (Value + Align - 1) / Align * Align; +inline uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align, + uint64_t Skew = 0) { + Skew %= Align; + return (Value + Align - 1 - Skew) / Align * Align + Skew; } /// Returns the offset to the next integer (mod 2**64) that is greater than @@ -641,6 +653,70 @@ inline int64_t SignExtend64(uint64_t X, unsigned B) { return int64_t(X << (64 - B)) >> (64 - B); } +/// \brief Add two unsigned integers, X and Y, of type T. +/// Clamp the result to the maximum representable value of T on overflow. +/// ResultOverflowed indicates if the result is larger than the maximum +/// representable value of type T. +template +typename std::enable_if::value, T>::type +SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) { + bool Dummy; + bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; + // Hacker's Delight, p. 29 + T Z = X + Y; + Overflowed = (Z < X || Z < Y); + if (Overflowed) + return std::numeric_limits::max(); + else + return Z; +} + +/// \brief Multiply two unsigned integers, X and Y, of type T. +/// Clamp the result to the maximum representable value of T on overflow. +/// ResultOverflowed indicates if the result is larger than the maximum +/// representable value of type T. +template +typename std::enable_if::value, T>::type +SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) { + bool Dummy; + bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; + + // Hacker's Delight, p. 30 has a different algorithm, but we don't use that + // because it fails for uint16_t (where multiplication can have undefined + // behavior due to promotion to int), and requires a division in addition + // to the multiplication. + + Overflowed = false; + + // Log2(Z) would be either Log2Z or Log2Z + 1. + // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z + // will necessarily be less than Log2Max as desired. + int Log2Z = Log2_64(X) + Log2_64(Y); + const T Max = std::numeric_limits::max(); + int Log2Max = Log2_64(Max); + if (Log2Z < Log2Max) { + return X * Y; + } + if (Log2Z > Log2Max) { + Overflowed = true; + return Max; + } + + // We're going to use the top bit, and maybe overflow one + // bit past it. Multiply all but the bottom bit then add + // that on at the end. + T Z = (X >> 1) * Y; + if (Z & ~(Max >> 1)) { + Overflowed = true; + return Max; + } + Z <<= 1; + if (X & 1) + return SaturatingAdd(Z, Y, ResultOverflowed); + + return Z; +} + extern const float huge_valf; } // End llvm namespace diff --git a/include/llvm/Support/Memory.h b/include/llvm/Support/Memory.h index b4305cb697d0..8103aea2fa25 100644 --- a/include/llvm/Support/Memory.h +++ b/include/llvm/Support/Memory.h @@ -1,4 +1,4 @@ -//===- llvm/Support/Memory.h - Memory Support --------------------*- C++ -*-===// +//===- llvm/Support/Memory.h - Memory Support -------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -32,6 +32,7 @@ namespace sys { MemoryBlock(void *addr, size_t size) : Address(addr), Size(size) { } void *base() const { return Address; } size_t size() const { return Size; } + private: void *Address; ///< Address of first byte of memory area size_t Size; ///< Size, in bytes of the memory area @@ -70,7 +71,7 @@ namespace sys { /// If the address following \p NearBlock is not so aligned, it will be /// rounded up to the next allocation granularity boundary. /// - /// \r a non-null MemoryBlock if the function was successful, + /// \r a non-null MemoryBlock if the function was successful, /// otherwise a null MemoryBlock is with \p EC describing the error. /// /// @brief Allocate mapped memory. @@ -86,7 +87,7 @@ namespace sys { /// /// \r error_success if the function was successful, or an error_code /// describing the failure if an error occurred. - /// + /// /// @brief Release mapped memory. static std::error_code releaseMappedMemory(MemoryBlock &Block); @@ -131,7 +132,6 @@ namespace sys { /// @brief Release Read/Write/Execute memory. static bool ReleaseRWX(MemoryBlock &block, std::string *ErrMsg = nullptr); - /// InvalidateInstructionCache - Before the JIT can run a block of code /// that has been emitted it must invalidate the instruction cache on some /// platforms. @@ -155,6 +155,31 @@ namespace sys { /// as writable. static bool setRangeWritable(const void *Addr, size_t Size); }; + + /// Owning version of MemoryBlock. + class OwningMemoryBlock { + public: + OwningMemoryBlock() = default; + explicit OwningMemoryBlock(MemoryBlock M) : M(M) {} + OwningMemoryBlock(OwningMemoryBlock &&Other) { + M = Other.M; + Other.M = MemoryBlock(); + } + OwningMemoryBlock& operator=(OwningMemoryBlock &&Other) { + M = Other.M; + Other.M = MemoryBlock(); + return *this; + } + ~OwningMemoryBlock() { + Memory::releaseMappedMemory(M); + } + void *base() const { return M.base(); } + size_t size() const { return M.size(); } + MemoryBlock getMemoryBlock() const { return M; } + private: + MemoryBlock M; + }; + } } diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h index 81616d8ba3ac..73d643537a6f 100644 --- a/include/llvm/Support/MemoryBuffer.h +++ b/include/llvm/Support/MemoryBuffer.h @@ -14,7 +14,6 @@ #ifndef LLVM_SUPPORT_MEMORYBUFFER_H #define LLVM_SUPPORT_MEMORYBUFFER_H -#include "llvm-c/Support.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/DataTypes.h" @@ -122,7 +121,8 @@ public: /// Open the specified file as a MemoryBuffer, or open stdin if the Filename /// is "-". static ErrorOr> - getFileOrSTDIN(const Twine &Filename, int64_t FileSize = -1); + getFileOrSTDIN(const Twine &Filename, int64_t FileSize = -1, + bool RequiresNullTerminator = true); /// Map a subrange of the specified file as a MemoryBuffer. static ErrorOr> @@ -151,6 +151,8 @@ class MemoryBufferRef { public: MemoryBufferRef() {} + MemoryBufferRef(MemoryBuffer& Buffer) + : Buffer(Buffer.getBuffer()), Identifier(Buffer.getBufferIdentifier()) {} MemoryBufferRef(StringRef Buffer, StringRef Identifier) : Buffer(Buffer), Identifier(Identifier) {} diff --git a/include/llvm/Support/OnDiskHashTable.h b/include/llvm/Support/OnDiskHashTable.h index 08e277ad5ce1..ac978d4c242c 100644 --- a/include/llvm/Support/OnDiskHashTable.h +++ b/include/llvm/Support/OnDiskHashTable.h @@ -53,6 +53,8 @@ namespace llvm { /// /// Write Data to Out. DataLen is the length from EmitKeyDataLength. /// static void EmitData(raw_ostream &Out, key_type_ref Key, /// data_type_ref Data, offset_type DataLen); +/// /// Determine if two keys are equal. Optional, only needed by contains. +/// static bool EqualKey(key_type_ref Key1, key_type_ref Key2); /// }; /// \endcode template class OnDiskChainedHashTableGenerator { @@ -122,13 +124,21 @@ public: /// Uses the provided Info instead of a stack allocated one. void insert(typename Info::key_type_ref Key, typename Info::data_type_ref Data, Info &InfoObj) { - ++NumEntries; if (4 * NumEntries >= 3 * NumBuckets) resize(NumBuckets * 2); insert(Buckets, NumBuckets, new (BA.Allocate()) Item(Key, Data, InfoObj)); } + /// \brief Determine whether an entry has been inserted. + bool contains(typename Info::key_type_ref Key, Info &InfoObj) { + unsigned Hash = InfoObj.ComputeHash(Key); + for (Item *I = Buckets[Hash & (NumBuckets - 1)].Head; I; I = I->Next) + if (I->Hash == Hash && InfoObj.EqualKey(I->Key, Key)) + return true; + return false; + } + /// \brief Emit the table to Out, which must not be at offset 0. offset_type Emit(raw_ostream &Out) { Info InfoObj; @@ -161,8 +171,22 @@ public: LE.write(I->Hash); const std::pair &Len = InfoObj.EmitKeyDataLength(Out, I->Key, I->Data); +#ifdef NDEBUG InfoObj.EmitKey(Out, I->Key, Len.first); InfoObj.EmitData(Out, I->Key, I->Data, Len.second); +#else + // In asserts mode, check that the users length matches the data they + // wrote. + uint64_t KeyStart = Out.tell(); + InfoObj.EmitKey(Out, I->Key, Len.first); + uint64_t DataStart = Out.tell(); + InfoObj.EmitData(Out, I->Key, I->Data, Len.second); + uint64_t End = Out.tell(); + assert(offset_type(DataStart - KeyStart) == Len.first && + "key length does not match bytes written"); + assert(offset_type(End - DataStart) == Len.second && + "data length does not match bytes written"); +#endif } } @@ -239,11 +263,12 @@ template class OnDiskChainedHashTable { Info InfoObj; public: + typedef Info InfoType; typedef typename Info::internal_key_type internal_key_type; typedef typename Info::external_key_type external_key_type; - typedef typename Info::data_type data_type; - typedef typename Info::hash_value_type hash_value_type; - typedef typename Info::offset_type offset_type; + typedef typename Info::data_type data_type; + typedef typename Info::hash_value_type hash_value_type; + typedef typename Info::offset_type offset_type; OnDiskChainedHashTable(offset_type NumBuckets, offset_type NumEntries, const unsigned char *Buckets, @@ -255,6 +280,21 @@ public: "'buckets' must have a 4-byte alignment"); } + /// Read the number of buckets and the number of entries from a hash table + /// produced by OnDiskHashTableGenerator::Emit, and advance the Buckets + /// pointer past them. + static std::pair + readNumBucketsAndEntries(const unsigned char *&Buckets) { + assert((reinterpret_cast(Buckets) & 0x3) == 0 && + "buckets should be 4-byte aligned."); + using namespace llvm::support; + offset_type NumBuckets = + endian::readNext(Buckets); + offset_type NumEntries = + endian::readNext(Buckets); + return std::make_pair(NumBuckets, NumEntries); + } + offset_type getNumBuckets() const { return NumBuckets; } offset_type getNumEntries() const { return NumEntries; } const unsigned char *getBase() const { return Base; } @@ -275,6 +315,10 @@ public: : Key(K), Data(D), Len(L), InfoObj(InfoObj) {} data_type operator*() const { return InfoObj->ReadData(Key, Data, Len); } + + const unsigned char *getDataPtr() const { return Data; } + offset_type getDataLen() const { return Len; } + bool operator==(const iterator &X) const { return X.Data == Data; } bool operator!=(const iterator &X) const { return X.Data != Data; } }; @@ -356,17 +400,11 @@ public: static OnDiskChainedHashTable *Create(const unsigned char *Buckets, const unsigned char *const Base, const Info &InfoObj = Info()) { - using namespace llvm::support; assert(Buckets > Base); - assert((reinterpret_cast(Buckets) & 0x3) == 0 && - "buckets should be 4-byte aligned."); - - offset_type NumBuckets = - endian::readNext(Buckets); - offset_type NumEntries = - endian::readNext(Buckets); - return new OnDiskChainedHashTable(NumBuckets, NumEntries, Buckets, - Base, InfoObj); + auto NumBucketsAndEntries = readNumBucketsAndEntries(Buckets); + return new OnDiskChainedHashTable(NumBucketsAndEntries.first, + NumBucketsAndEntries.second, + Buckets, Base, InfoObj); } }; @@ -385,40 +423,30 @@ public: typedef typename base_type::hash_value_type hash_value_type; typedef typename base_type::offset_type offset_type; - OnDiskIterableChainedHashTable(offset_type NumBuckets, offset_type NumEntries, - const unsigned char *Buckets, - const unsigned char *Payload, - const unsigned char *Base, - const Info &InfoObj = Info()) - : base_type(NumBuckets, NumEntries, Buckets, Base, InfoObj), - Payload(Payload) {} - +private: /// \brief Iterates over all of the keys in the table. - class key_iterator { + class iterator_base { const unsigned char *Ptr; offset_type NumItemsInBucketLeft; offset_type NumEntriesLeft; - Info *InfoObj; public: typedef external_key_type value_type; - key_iterator(const unsigned char *const Ptr, offset_type NumEntries, - Info *InfoObj) - : Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries), - InfoObj(InfoObj) {} - key_iterator() - : Ptr(nullptr), NumItemsInBucketLeft(0), NumEntriesLeft(0), - InfoObj(0) {} + iterator_base(const unsigned char *const Ptr, offset_type NumEntries) + : Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries) {} + iterator_base() + : Ptr(nullptr), NumItemsInBucketLeft(0), NumEntriesLeft(0) {} - friend bool operator==(const key_iterator &X, const key_iterator &Y) { + friend bool operator==(const iterator_base &X, const iterator_base &Y) { return X.NumEntriesLeft == Y.NumEntriesLeft; } - friend bool operator!=(const key_iterator &X, const key_iterator &Y) { + friend bool operator!=(const iterator_base &X, const iterator_base &Y) { return X.NumEntriesLeft != Y.NumEntriesLeft; } - key_iterator &operator++() { // Preincrement + /// Move to the next item. + void advance() { using namespace llvm::support; if (!NumItemsInBucketLeft) { // 'Items' starts with a 16-bit unsigned integer representing the @@ -435,25 +463,58 @@ public: --NumItemsInBucketLeft; assert(NumEntriesLeft); --NumEntriesLeft; + } + + /// Get the start of the item as written by the trait (after the hash and + /// immediately before the key and value length). + const unsigned char *getItem() const { + return Ptr + (NumItemsInBucketLeft ? 0 : 2) + sizeof(hash_value_type); + } + }; + +public: + OnDiskIterableChainedHashTable(offset_type NumBuckets, offset_type NumEntries, + const unsigned char *Buckets, + const unsigned char *Payload, + const unsigned char *Base, + const Info &InfoObj = Info()) + : base_type(NumBuckets, NumEntries, Buckets, Base, InfoObj), + Payload(Payload) {} + + /// \brief Iterates over all of the keys in the table. + class key_iterator : public iterator_base { + Info *InfoObj; + + public: + typedef external_key_type value_type; + + key_iterator(const unsigned char *const Ptr, offset_type NumEntries, + Info *InfoObj) + : iterator_base(Ptr, NumEntries), InfoObj(InfoObj) {} + key_iterator() : iterator_base(), InfoObj() {} + + key_iterator &operator++() { + this->advance(); return *this; } key_iterator operator++(int) { // Postincrement - key_iterator tmp = *this; ++*this; return tmp; + key_iterator tmp = *this; + ++*this; + return tmp; + } + + internal_key_type getInternalKey() const { + auto *LocalPtr = this->getItem(); + + // Determine the length of the key and the data. + auto L = Info::ReadKeyDataLength(LocalPtr); + + // Read the key. + return InfoObj->ReadKey(LocalPtr, L.first); } value_type operator*() const { - const unsigned char *LocalPtr = Ptr; - if (!NumItemsInBucketLeft) - LocalPtr += 2; // number of items in bucket - LocalPtr += sizeof(hash_value_type); // Skip the hash. - - // Determine the length of the key and the data. - const std::pair &L = - Info::ReadKeyDataLength(LocalPtr); - - // Read the key. - const internal_key_type &Key = InfoObj->ReadKey(LocalPtr, L.first); - return InfoObj->GetExternalKey(Key); + return InfoObj->GetExternalKey(getInternalKey()); } }; @@ -467,10 +528,7 @@ public: } /// \brief Iterates over all the entries in the table, returning the data. - class data_iterator { - const unsigned char *Ptr; - offset_type NumItemsInBucketLeft; - offset_type NumEntriesLeft; + class data_iterator : public iterator_base { Info *InfoObj; public: @@ -478,51 +536,24 @@ public: data_iterator(const unsigned char *const Ptr, offset_type NumEntries, Info *InfoObj) - : Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries), - InfoObj(InfoObj) {} - data_iterator() - : Ptr(nullptr), NumItemsInBucketLeft(0), NumEntriesLeft(0), - InfoObj(nullptr) {} - - bool operator==(const data_iterator &X) const { - return X.NumEntriesLeft == NumEntriesLeft; - } - bool operator!=(const data_iterator &X) const { - return X.NumEntriesLeft != NumEntriesLeft; - } + : iterator_base(Ptr, NumEntries), InfoObj(InfoObj) {} + data_iterator() : iterator_base(), InfoObj() {} data_iterator &operator++() { // Preincrement - using namespace llvm::support; - if (!NumItemsInBucketLeft) { - // 'Items' starts with a 16-bit unsigned integer representing the - // number of items in this bucket. - NumItemsInBucketLeft = - endian::readNext(Ptr); - } - Ptr += sizeof(hash_value_type); // Skip the hash. - // Determine the length of the key and the data. - const std::pair &L = - Info::ReadKeyDataLength(Ptr); - Ptr += L.first + L.second; - assert(NumItemsInBucketLeft); - --NumItemsInBucketLeft; - assert(NumEntriesLeft); - --NumEntriesLeft; + this->advance(); return *this; } data_iterator operator++(int) { // Postincrement - data_iterator tmp = *this; ++*this; return tmp; + data_iterator tmp = *this; + ++*this; + return tmp; } value_type operator*() const { - const unsigned char *LocalPtr = Ptr; - if (!NumItemsInBucketLeft) - LocalPtr += 2; // number of items in bucket - LocalPtr += sizeof(hash_value_type); // Skip the hash. + auto *LocalPtr = this->getItem(); // Determine the length of the key and the data. - const std::pair &L = - Info::ReadKeyDataLength(LocalPtr); + auto L = Info::ReadKeyDataLength(LocalPtr); // Read the key. const internal_key_type &Key = InfoObj->ReadKey(LocalPtr, L.first); @@ -555,17 +586,12 @@ public: static OnDiskIterableChainedHashTable * Create(const unsigned char *Buckets, const unsigned char *const Payload, const unsigned char *const Base, const Info &InfoObj = Info()) { - using namespace llvm::support; assert(Buckets > Base); - assert((reinterpret_cast(Buckets) & 0x3) == 0 && - "buckets should be 4-byte aligned."); - - offset_type NumBuckets = - endian::readNext(Buckets); - offset_type NumEntries = - endian::readNext(Buckets); + auto NumBucketsAndEntries = + OnDiskIterableChainedHashTable::readNumBucketsAndEntries(Buckets); return new OnDiskIterableChainedHashTable( - NumBuckets, NumEntries, Buckets, Payload, Base, InfoObj); + NumBucketsAndEntries.first, NumBucketsAndEntries.second, + Buckets, Payload, Base, InfoObj); } }; diff --git a/include/llvm/Support/Options.h b/include/llvm/Support/Options.h index 2742d3907c62..7b61b2308f57 100644 --- a/include/llvm/Support/Options.h +++ b/include/llvm/Support/Options.h @@ -71,7 +71,7 @@ private: /// \param Key unique key for option /// \param O option to map to \p Key /// - /// Allocated cl::Options are owened by the OptionRegistry and are deallocated + /// Allocated cl::Options are owned by the OptionRegistry and are deallocated /// on destruction or removal void addOption(void *Key, cl::Option *O); @@ -91,7 +91,7 @@ public: /// Options are keyed off the template parameters to generate unique static /// characters. The template parameters are (1) the type of the data the /// option stores (\p ValT), the class that will read the option (\p Base), - /// and the memeber that the class will store the data into (\p Mem). + /// and the member that the class will store the data into (\p Mem). template static void registerOption(const char *ArgStr, const char *Desc, const ValT &InitValue) { diff --git a/include/llvm/Support/OutputBuffer.h b/include/llvm/Support/OutputBuffer.h deleted file mode 100644 index 6b98e99e28e0..000000000000 --- a/include/llvm/Support/OutputBuffer.h +++ /dev/null @@ -1,166 +0,0 @@ -//=== OutputBuffer.h - Output Buffer ----------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Methods to output values to a data buffer. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_OUTPUTBUFFER_H -#define LLVM_SUPPORT_OUTPUTBUFFER_H - -#include -#include -#include - -namespace llvm { - - class OutputBuffer { - /// Output buffer. - std::vector &Output; - - /// is64Bit/isLittleEndian - This information is inferred from the target - /// machine directly, indicating what header values and flags to set. - bool is64Bit, isLittleEndian; - public: - OutputBuffer(std::vector &Out, - bool is64bit, bool le) - : Output(Out), is64Bit(is64bit), isLittleEndian(le) {} - - // align - Emit padding into the file until the current output position is - // aligned to the specified power of two boundary. - void align(unsigned Boundary) { - assert(Boundary && (Boundary & (Boundary - 1)) == 0 && - "Must align to 2^k boundary"); - size_t Size = Output.size(); - - if (Size & (Boundary - 1)) { - // Add padding to get alignment to the correct place. - size_t Pad = Boundary - (Size & (Boundary - 1)); - Output.resize(Size + Pad); - } - } - - //===------------------------------------------------------------------===// - // Out Functions - Output the specified value to the data buffer. - - void outbyte(unsigned char X) { - Output.push_back(X); - } - void outhalf(unsigned short X) { - if (isLittleEndian) { - Output.push_back(X & 255); - Output.push_back(X >> 8); - } else { - Output.push_back(X >> 8); - Output.push_back(X & 255); - } - } - void outword(unsigned X) { - if (isLittleEndian) { - Output.push_back((X >> 0) & 255); - Output.push_back((X >> 8) & 255); - Output.push_back((X >> 16) & 255); - Output.push_back((X >> 24) & 255); - } else { - Output.push_back((X >> 24) & 255); - Output.push_back((X >> 16) & 255); - Output.push_back((X >> 8) & 255); - Output.push_back((X >> 0) & 255); - } - } - void outxword(uint64_t X) { - if (isLittleEndian) { - Output.push_back(unsigned(X >> 0) & 255); - Output.push_back(unsigned(X >> 8) & 255); - Output.push_back(unsigned(X >> 16) & 255); - Output.push_back(unsigned(X >> 24) & 255); - Output.push_back(unsigned(X >> 32) & 255); - Output.push_back(unsigned(X >> 40) & 255); - Output.push_back(unsigned(X >> 48) & 255); - Output.push_back(unsigned(X >> 56) & 255); - } else { - Output.push_back(unsigned(X >> 56) & 255); - Output.push_back(unsigned(X >> 48) & 255); - Output.push_back(unsigned(X >> 40) & 255); - Output.push_back(unsigned(X >> 32) & 255); - Output.push_back(unsigned(X >> 24) & 255); - Output.push_back(unsigned(X >> 16) & 255); - Output.push_back(unsigned(X >> 8) & 255); - Output.push_back(unsigned(X >> 0) & 255); - } - } - void outaddr32(unsigned X) { - outword(X); - } - void outaddr64(uint64_t X) { - outxword(X); - } - void outaddr(uint64_t X) { - if (!is64Bit) - outword((unsigned)X); - else - outxword(X); - } - void outstring(const std::string &S, unsigned Length) { - unsigned len_to_copy = static_cast(S.length()) < Length - ? static_cast(S.length()) : Length; - unsigned len_to_fill = static_cast(S.length()) < Length - ? Length - static_cast(S.length()) : 0; - - for (unsigned i = 0; i < len_to_copy; ++i) - outbyte(S[i]); - - for (unsigned i = 0; i < len_to_fill; ++i) - outbyte(0); - } - - //===------------------------------------------------------------------===// - // Fix Functions - Replace an existing entry at an offset. - - void fixhalf(unsigned short X, unsigned Offset) { - unsigned char *P = &Output[Offset]; - P[0] = (X >> (isLittleEndian ? 0 : 8)) & 255; - P[1] = (X >> (isLittleEndian ? 8 : 0)) & 255; - } - void fixword(unsigned X, unsigned Offset) { - unsigned char *P = &Output[Offset]; - P[0] = (X >> (isLittleEndian ? 0 : 24)) & 255; - P[1] = (X >> (isLittleEndian ? 8 : 16)) & 255; - P[2] = (X >> (isLittleEndian ? 16 : 8)) & 255; - P[3] = (X >> (isLittleEndian ? 24 : 0)) & 255; - } - void fixxword(uint64_t X, unsigned Offset) { - unsigned char *P = &Output[Offset]; - P[0] = (X >> (isLittleEndian ? 0 : 56)) & 255; - P[1] = (X >> (isLittleEndian ? 8 : 48)) & 255; - P[2] = (X >> (isLittleEndian ? 16 : 40)) & 255; - P[3] = (X >> (isLittleEndian ? 24 : 32)) & 255; - P[4] = (X >> (isLittleEndian ? 32 : 24)) & 255; - P[5] = (X >> (isLittleEndian ? 40 : 16)) & 255; - P[6] = (X >> (isLittleEndian ? 48 : 8)) & 255; - P[7] = (X >> (isLittleEndian ? 56 : 0)) & 255; - } - void fixaddr(uint64_t X, unsigned Offset) { - if (!is64Bit) - fixword((unsigned)X, Offset); - else - fixxword(X, Offset); - } - - unsigned char &operator[](unsigned Index) { - return Output[Index]; - } - const unsigned char &operator[](unsigned Index) const { - return Output[Index]; - } - }; - -} // end llvm namespace - -#endif // LLVM_SUPPORT_OUTPUTBUFFER_H diff --git a/include/llvm/Support/Path.h b/include/llvm/Support/Path.h index 8fae853e2cf4..955cc991d9b7 100644 --- a/include/llvm/Support/Path.h +++ b/include/llvm/Support/Path.h @@ -61,7 +61,6 @@ public: reference operator*() const { return Component; } pointer operator->() const { return &Component; } const_iterator &operator++(); // preincrement - const_iterator &operator++(int); // postincrement bool operator==(const const_iterator &RHS) const; bool operator!=(const const_iterator &RHS) const { return !(*this == RHS); } @@ -87,7 +86,6 @@ public: reference operator*() const { return Component; } pointer operator->() const { return &Component; } reverse_iterator &operator++(); // preincrement - reverse_iterator &operator++(int); // postincrement bool operator==(const reverse_iterator &RHS) const; bool operator!=(const reverse_iterator &RHS) const { return !(*this == RHS); } }; @@ -218,7 +216,7 @@ StringRef root_name(StringRef path); /// @result The root directory of \a path if it has one, otherwise /// "". StringRef root_directory(StringRef path); - + /// @brief Get root path. /// /// Equivalent to root_name + root_directory. @@ -310,7 +308,7 @@ bool is_separator(char value); /// @result StringRef of the preferred separator, null-terminated. StringRef get_separator(); -/// @brief Get the typical temporary directory for the system, e.g., +/// @brief Get the typical temporary directory for the system, e.g., /// "/var/tmp" or "C:/TEMP" /// /// @param erasedOnReboot Whether to favor a path that is erased on reboot @@ -327,6 +325,22 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl &result); /// @result True if a home directory is set, false otherwise. bool home_directory(SmallVectorImpl &result); +/// @brief Get the user's cache directory. +/// +/// Expect the resulting path to be a directory shared with other +/// applications/services used by the user. Params \p Path1 to \p Path3 can be +/// used to append additional directory names to the resulting path. Recommended +/// pattern is //. +/// +/// @param Result Holds the resulting path. +/// @param Path1 Additional path to be appended to the user's cache directory +/// path. "" can be used to append nothing. +/// @param Path2 Second additional path to be appended. +/// @param Path3 Third additional path to be appended. +/// @result True if a cache directory path is set, false otherwise. +bool user_cache_directory(SmallVectorImpl &Result, const Twine &Path1, + const Twine &Path2 = "", const Twine &Path3 = ""); + /// @brief Has root name? /// /// root_name != "" @@ -403,6 +417,19 @@ bool is_absolute(const Twine &path); /// @result True if the path is relative, false if it is not. bool is_relative(const Twine &path); +/// @brief Remove redundant leading "./" pieces and consecutive separators. +/// +/// @param path Input path. +/// @result The cleaned-up \a path. +StringRef remove_leading_dotslash(StringRef path); + +/// @brief In-place remove any './' and optionally '../' components from a path. +/// +/// @param path processed path +/// @param remove_dot_dot specify if '../' should be removed +/// @result True if path was changed +bool remove_dots(SmallVectorImpl &path, bool remove_dot_dot = false); + } // end namespace path } // end namespace sys } // end namespace llvm diff --git a/include/llvm/Support/PointerLikeTypeTraits.h b/include/llvm/Support/PointerLikeTypeTraits.h index 837082139214..c12d237b2796 100644 --- a/include/llvm/Support/PointerLikeTypeTraits.h +++ b/include/llvm/Support/PointerLikeTypeTraits.h @@ -15,59 +15,70 @@ #ifndef LLVM_SUPPORT_POINTERLIKETYPETRAITS_H #define LLVM_SUPPORT_POINTERLIKETYPETRAITS_H +#include "llvm/Support/AlignOf.h" #include "llvm/Support/DataTypes.h" namespace llvm { - -/// PointerLikeTypeTraits - This is a traits object that is used to handle -/// pointer types and things that are just wrappers for pointers as a uniform -/// entity. -template -class PointerLikeTypeTraits { + +/// A traits type that is used to handle pointer types and things that are just +/// wrappers for pointers as a uniform entity. +template class PointerLikeTypeTraits { // getAsVoidPointer // getFromVoidPointer // getNumLowBitsAvailable }; +namespace detail { +/// A tiny meta function to compute the log2 of a compile time constant. +template +struct ConstantLog2 + : std::integral_constant::value + 1> {}; +template <> struct ConstantLog2<1> : std::integral_constant {}; +} + // Provide PointerLikeTypeTraits for non-cvr pointers. -template -class PointerLikeTypeTraits { -public: - static inline void *getAsVoidPointer(T* P) { return P; } - static inline T *getFromVoidPointer(void *P) { - return static_cast(P); - } - - /// Note, we assume here that malloc returns objects at least 4-byte aligned. - /// However, this may be wrong, or pointers may be from something other than - /// malloc. In this case, you should specialize this template to reduce this. +template struct PointerLikeTypeTraits { + static inline void *getAsVoidPointer(T *P) { return P; } + static inline T *getFromVoidPointer(void *P) { return static_cast(P); } + + enum { + NumLowBitsAvailable = detail::ConstantLog2::Alignment>::value + }; +}; + +template <> struct PointerLikeTypeTraits { + static inline void *getAsVoidPointer(void *P) { return P; } + static inline void *getFromVoidPointer(void *P) { return P; } + + /// Note, we assume here that void* is related to raw malloc'ed memory and + /// that malloc returns objects at least 4-byte aligned. However, this may be + /// wrong, or pointers may be from something other than malloc. In this case, + /// you should specify a real typed pointer or avoid this template. /// /// All clients should use assertions to do a run-time check to ensure that /// this is actually true. enum { NumLowBitsAvailable = 2 }; }; - + // Provide PointerLikeTypeTraits for const pointers. -template -class PointerLikeTypeTraits { - typedef PointerLikeTypeTraits NonConst; +template class PointerLikeTypeTraits { + typedef PointerLikeTypeTraits NonConst; public: - static inline const void *getAsVoidPointer(const T* P) { - return NonConst::getAsVoidPointer(const_cast(P)); + static inline const void *getAsVoidPointer(const T *P) { + return NonConst::getAsVoidPointer(const_cast(P)); } static inline const T *getFromVoidPointer(const void *P) { - return NonConst::getFromVoidPointer(const_cast(P)); + return NonConst::getFromVoidPointer(const_cast(P)); } enum { NumLowBitsAvailable = NonConst::NumLowBitsAvailable }; }; // Provide PointerLikeTypeTraits for uintptr_t. -template<> -class PointerLikeTypeTraits { +template <> class PointerLikeTypeTraits { public: static inline void *getAsVoidPointer(uintptr_t P) { - return reinterpret_cast(P); + return reinterpret_cast(P); } static inline uintptr_t getFromVoidPointer(void *P) { return reinterpret_cast(P); @@ -75,7 +86,7 @@ public: // No bits are available! enum { NumLowBitsAvailable = 0 }; }; - + } // end namespace llvm #endif diff --git a/include/llvm/Support/PrettyStackTrace.h b/include/llvm/Support/PrettyStackTrace.h index 96afb60d8e51..027f9433969d 100644 --- a/include/llvm/Support/PrettyStackTrace.h +++ b/include/llvm/Support/PrettyStackTrace.h @@ -66,6 +66,18 @@ namespace llvm { void print(raw_ostream &OS) const override; }; + /// Returns the topmost element of the "pretty" stack state. + const void* SavePrettyStackState(); + + /// Restores the topmost element of the "pretty" stack state to State, which + /// should come from a previous call to SavePrettyStackState(). This is + /// useful when using a CrashRecoveryContext in code that also uses + /// PrettyStackTraceEntries, to make sure the stack that's printed if a crash + /// happens after a crash that's been recovered by CrashRecoveryContext + /// doesn't have frames on it that were added in code unwound by the + /// CrashRecoveryContext. + void RestorePrettyStackState(const void* State); + } // end namespace llvm #endif diff --git a/include/llvm/Support/Printable.h b/include/llvm/Support/Printable.h new file mode 100644 index 000000000000..5c1b8d5070d4 --- /dev/null +++ b/include/llvm/Support/Printable.h @@ -0,0 +1,52 @@ +//===--- Printable.h - Print function helpers -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Printable struct. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_PRINTABLE_H +#define LLVM_SUPPORT_PRINTABLE_H + +#include + +namespace llvm { + +class raw_ostream; + +/// Simple wrapper around std::function. +/// This class is usefull to construct print helpers for raw_ostream. +/// +/// Example: +/// Printable PrintRegister(unsigned Register) { +/// return Printable([Register](raw_ostream &OS) { +/// OS << getRegisterName(Register); +/// } +/// } +/// ... OS << PrintRegister(Register); ... +/// +/// Implementation note: Ideally this would just be a typedef, but doing so +/// leads to operator << being ambiguous as function has matching constructors +/// in some STL versions. I have seen the problem on gcc 4.6 libstdc++ and +/// microsoft STL. +class Printable { +public: + std::function Print; + Printable(const std::function Print) + : Print(Print) {} +}; + +static inline raw_ostream &operator<<(raw_ostream &OS, const Printable &P) { + P.Print(OS); + return OS; +} + +} + +#endif diff --git a/include/llvm/Support/Program.h b/include/llvm/Support/Program.h index b89a0f73ec68..43302101e3e0 100644 --- a/include/llvm/Support/Program.h +++ b/include/llvm/Support/Program.h @@ -67,8 +67,7 @@ struct ProcessInfo { /// \returns The fully qualified path to the first \p Name in \p Paths if it /// exists. \p Name if \p Name has slashes in it. Otherwise an error. ErrorOr - findProgramByName(StringRef Name, - ArrayRef Paths = ArrayRef()); + findProgramByName(StringRef Name, ArrayRef Paths = None); // These functions change the specified standard stream (stdin or stdout) to // binary mode. They return errc::success if the specified stream diff --git a/include/llvm/Support/Recycler.h b/include/llvm/Support/Recycler.h index e97f36a735fd..a38050d81903 100644 --- a/include/llvm/Support/Recycler.h +++ b/include/llvm/Support/Recycler.h @@ -28,53 +28,36 @@ namespace llvm { /// void PrintRecyclerStats(size_t Size, size_t Align, size_t FreeListSize); -/// RecyclerStruct - Implementation detail for Recycler. This is a -/// class that the recycler imposes on free'd memory to carve out -/// next/prev pointers. -struct RecyclerStruct { - RecyclerStruct *Prev, *Next; -}; - -template<> -struct ilist_traits : - public ilist_default_traits { - static RecyclerStruct *getPrev(const RecyclerStruct *t) { return t->Prev; } - static RecyclerStruct *getNext(const RecyclerStruct *t) { return t->Next; } - static void setPrev(RecyclerStruct *t, RecyclerStruct *p) { t->Prev = p; } - static void setNext(RecyclerStruct *t, RecyclerStruct *n) { t->Next = n; } - - mutable RecyclerStruct Sentinel; - RecyclerStruct *createSentinel() const { - return &Sentinel; - } - static void destroySentinel(RecyclerStruct *) {} - - RecyclerStruct *provideInitialHead() const { return createSentinel(); } - RecyclerStruct *ensureHead(RecyclerStruct*) const { return createSentinel(); } - static void noteHead(RecyclerStruct*, RecyclerStruct*) {} - - static void deleteNode(RecyclerStruct *) { - llvm_unreachable("Recycler's ilist_traits shouldn't see a deleteNode call!"); - } -}; - /// Recycler - This class manages a linked-list of deallocated nodes /// and facilitates reusing deallocated memory in place of allocating /// new memory. /// template::Alignment> class Recycler { - /// FreeList - Doubly-linked list of nodes that have deleted contents and - /// are not in active use. - /// - iplist FreeList; + struct FreeNode { + FreeNode *Next; + }; + + /// List of nodes that have deleted contents and are not in active use. + FreeNode *FreeList = nullptr; + + FreeNode *pop_val() { + auto *Val = FreeList; + FreeList = FreeList->Next; + return Val; + } + + void push(FreeNode *N) { + N->Next = FreeList; + FreeList = N; + } public: ~Recycler() { // If this fails, either the callee has lost track of some allocation, // or the callee isn't tracking allocations and should just call // clear() before deleting the Recycler. - assert(FreeList.empty() && "Non-empty recycler deleted!"); + assert(!FreeList && "Non-empty recycler deleted!"); } /// clear - Release all the tracked allocations to the allocator. The @@ -82,8 +65,8 @@ public: /// deleted; calling clear is one way to ensure this. template void clear(AllocatorType &Allocator) { - while (!FreeList.empty()) { - T *t = reinterpret_cast(FreeList.remove(FreeList.begin())); + while (FreeList) { + T *t = reinterpret_cast(pop_val()); Allocator.Deallocate(t); } } @@ -93,9 +76,7 @@ public: /// /// There is no need to traverse the free list, pulling all the objects into /// cache. - void clear(BumpPtrAllocator&) { - FreeList.clearAndLeakNodesUnsafely(); - } + void clear(BumpPtrAllocator &) { FreeList = nullptr; } template SubClass *Allocate(AllocatorType &Allocator) { @@ -103,9 +84,8 @@ public: "Recycler allocation alignment is less than object align!"); static_assert(sizeof(SubClass) <= Size, "Recycler allocation size is less than object size!"); - return !FreeList.empty() ? - reinterpret_cast(FreeList.remove(FreeList.begin())) : - static_cast(Allocator.Allocate(Size, Align)); + return FreeList ? reinterpret_cast(pop_val()) + : static_cast(Allocator.Allocate(Size, Align)); } template @@ -115,14 +95,20 @@ public: template void Deallocate(AllocatorType & /*Allocator*/, SubClass* Element) { - FreeList.push_front(reinterpret_cast(Element)); + push(reinterpret_cast(Element)); } - void PrintStats() { - PrintRecyclerStats(Size, Align, FreeList.size()); - } + void PrintStats(); }; +template +void Recycler::PrintStats() { + size_t S = 0; + for (auto *I = FreeList; I; I = I->Next) + ++S; + PrintRecyclerStats(Size, Align, S); +} + } #endif diff --git a/include/llvm/Support/Registry.h b/include/llvm/Support/Registry.h index 95c4e96f7f29..bbea97b289a6 100644 --- a/include/llvm/Support/Registry.h +++ b/include/llvm/Support/Registry.h @@ -37,7 +37,6 @@ namespace llvm { std::unique_ptr instantiate() const { return Ctor(); } }; - /// Traits for registry entries. If using other than SimpleRegistryEntry, it /// is necessary to define an alternate traits class. template @@ -53,7 +52,6 @@ namespace llvm { static const char *descof(const entry &Entry) { return Entry.getDesc(); } }; - /// A global registry used in conjunction with static constructors to make /// pluggable components (like targets or garbage collectors) "just work" when /// linked with an executable. @@ -102,7 +100,6 @@ namespace llvm { } }; - /// Iterators for registry entries. /// class iterator { @@ -122,10 +119,9 @@ namespace llvm { static iterator end() { return iterator(nullptr); } static iterator_range entries() { - return iterator_range(begin(), end()); + return make_range(begin(), end()); } - /// Abstract base class for registry listeners, which are informed when new /// entries are added to the registry. Simply subclass and instantiate: /// @@ -160,7 +156,7 @@ namespace llvm { } public: - listener() : Prev(ListenerTail), Next(0) { + listener() : Prev(ListenerTail), Next(nullptr) { if (Prev) Prev->Next = this; else @@ -180,7 +176,6 @@ namespace llvm { } }; - /// A static registration template. Use like such: /// /// Registry::Add @@ -210,7 +205,6 @@ namespace llvm { }; /// Registry::Parser now lives in llvm/Support/RegistryParser.h. - }; // Since these are defined in a header file, plugins must be sure to export @@ -228,6 +222,6 @@ namespace llvm { template typename Registry::listener *Registry::ListenerTail; -} +} // end namespace llvm -#endif +#endif // LLVM_SUPPORT_REGISTRY_H diff --git a/include/llvm/Support/SMLoc.h b/include/llvm/Support/SMLoc.h index d5b4c57a8fd6..c6e9a14e82ac 100644 --- a/include/llvm/Support/SMLoc.h +++ b/include/llvm/Support/SMLoc.h @@ -22,6 +22,7 @@ namespace llvm { /// Represents a location in source code. class SMLoc { const char *Ptr; + public: SMLoc() : Ptr(nullptr) {} @@ -53,11 +54,10 @@ public: assert(Start.isValid() == End.isValid() && "Start and end should either both be valid or both be invalid!"); } - + bool isValid() const { return Start.isValid(); } }; - + } // end namespace llvm #endif - diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h index 0a4262b7eec5..c6421efc8b49 100644 --- a/include/llvm/Support/ScaledNumber.h +++ b/include/llvm/Support/ScaledNumber.h @@ -282,7 +282,7 @@ int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale) { /// /// As a convenience, returns the matching scale. If the output value of one /// number is zero, returns the scale of the other. If both are zero, which -/// scale is returned is unspecifed. +/// scale is returned is unspecified. template int16_t matchScales(DigitsT &LDigits, int16_t &LScale, DigitsT &RDigits, int16_t &RScale) { @@ -334,7 +334,7 @@ std::pair getSum(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale) { static_assert(!std::numeric_limits::is_signed, "expected unsigned"); - // Check inputs up front. This is only relevent if addition overflows, but + // Check inputs up front. This is only relevant if addition overflows, but // testing here should catch more bugs. assert(LScale < INT16_MAX && "scale too large"); assert(RScale < INT16_MAX && "scale too large"); diff --git a/include/llvm/Support/Signals.h b/include/llvm/Support/Signals.h index 7e165d7f3a42..2a4d84bd891a 100644 --- a/include/llvm/Support/Signals.h +++ b/include/llvm/Support/Signals.h @@ -47,6 +47,9 @@ namespace sys { /// \brief Print the stack trace using the given \c raw_ostream object. void PrintStackTrace(raw_ostream &OS); + // Run all registered signal handlers. + void RunSignalHandlers(); + /// AddSignalHandler - Add a function to be called when an abort/kill signal /// is delivered to the process. The handler can have a cookie passed to it /// to identify what instance of the handler it is. diff --git a/include/llvm/Support/StreamingMemoryObject.h b/include/llvm/Support/StreamingMemoryObject.h index 7cb6438d1342..a5980c235946 100644 --- a/include/llvm/Support/StreamingMemoryObject.h +++ b/include/llvm/Support/StreamingMemoryObject.h @@ -50,8 +50,10 @@ public: /// starts (although it can be called anytime). void setKnownObjectSize(size_t size); + /// The number of bytes read at a time from the data streamer. + static const uint32_t kChunkSize = 4096 * 4; + private: - const static uint32_t kChunkSize = 4096 * 4; mutable std::vector Bytes; std::unique_ptr Streamer; mutable size_t BytesRead; // Bytes read from stream diff --git a/include/llvm/Support/StringSaver.h b/include/llvm/Support/StringSaver.h index f3853ee91570..38fb7bb38339 100644 --- a/include/llvm/Support/StringSaver.h +++ b/include/llvm/Support/StringSaver.h @@ -18,25 +18,15 @@ namespace llvm { /// \brief Saves strings in the inheritor's stable storage and returns a stable /// raw character pointer. -class StringSaver { -protected: - ~StringSaver() {} - virtual const char *saveImpl(StringRef S); +class StringSaver final { + BumpPtrAllocator &Alloc; public: StringSaver(BumpPtrAllocator &Alloc) : Alloc(Alloc) {} const char *save(const char *S) { return save(StringRef(S)); } - const char *save(StringRef S) { return saveImpl(S); } + const char *save(StringRef S); const char *save(const Twine &S) { return save(StringRef(S.str())); } const char *save(std::string &S) { return save(StringRef(S)); } - -private: - BumpPtrAllocator &Alloc; -}; - -class BumpPtrStringSaver final : public StringSaver { -public: - BumpPtrStringSaver(BumpPtrAllocator &Alloc) : StringSaver(Alloc) {} }; } #endif diff --git a/include/llvm/Support/TargetParser.h b/include/llvm/Support/TargetParser.h index dab724895e86..c21019d0c5b8 100644 --- a/include/llvm/Support/TargetParser.h +++ b/include/llvm/Support/TargetParser.h @@ -20,7 +20,7 @@ #include namespace llvm { - class StringRef; +class StringRef; // Target specific information into their own namespaces. These should be // generated from TableGen because the information is already there, and there @@ -29,177 +29,117 @@ namespace llvm { // even if the back-end is not compiled with LLVM, plus we need to create a new // back-end to TableGen to create these clean tables. namespace ARM { - // FPU names. - enum FPUKind { - FK_INVALID = 0, - FK_NONE, - FK_VFP, - FK_VFPV2, - FK_VFPV3, - FK_VFPV3_FP16, - FK_VFPV3_D16, - FK_VFPV3_D16_FP16, - FK_VFPV3XD, - FK_VFPV3XD_FP16, - FK_VFPV4, - FK_VFPV4_D16, - FK_FPV4_SP_D16, - FK_FPV5_D16, - FK_FPV5_SP_D16, - FK_FP_ARMV8, - FK_NEON, - FK_NEON_FP16, - FK_NEON_VFPV4, - FK_NEON_FP_ARMV8, - FK_CRYPTO_NEON_FP_ARMV8, - FK_SOFTVFP, - FK_LAST - }; - - // FPU Version - enum FPUVersion { - FV_NONE = 0, - FV_VFPV2, - FV_VFPV3, - FV_VFPV3_FP16, - FV_VFPV4, - FV_VFPV5 - }; - - // An FPU name implies one of three levels of Neon support: - enum NeonSupportLevel { - NS_None = 0, ///< No Neon - NS_Neon, ///< Neon - NS_Crypto ///< Neon with Crypto - }; - - // An FPU name restricts the FPU in one of three ways: - enum FPURestriction { - FR_None = 0, ///< No restriction - FR_D16, ///< Only 16 D registers - FR_SP_D16 ///< Only single-precision instructions, with 16 D registers - }; - - // Arch names. - enum ArchKind { - AK_INVALID = 0, - AK_ARMV2, - AK_ARMV2A, - AK_ARMV3, - AK_ARMV3M, - AK_ARMV4, - AK_ARMV4T, - AK_ARMV5T, - AK_ARMV5TE, - AK_ARMV5TEJ, - AK_ARMV6, - AK_ARMV6K, - AK_ARMV6T2, - AK_ARMV6Z, - AK_ARMV6ZK, - AK_ARMV6M, - AK_ARMV6SM, - AK_ARMV7A, - AK_ARMV7R, - AK_ARMV7M, - AK_ARMV7EM, - AK_ARMV8A, - AK_ARMV8_1A, - // Non-standard Arch names. - AK_IWMMXT, - AK_IWMMXT2, - AK_XSCALE, - AK_ARMV5, - AK_ARMV5E, - AK_ARMV6J, - AK_ARMV6HL, - AK_ARMV7, - AK_ARMV7L, - AK_ARMV7HL, - AK_ARMV7S, - AK_LAST - }; - - // Arch extension modifiers for CPUs. - enum ArchExtKind { - AEK_INVALID = 0, - AEK_CRC, - AEK_CRYPTO, - AEK_FP, - AEK_HWDIV, - AEK_MP, - AEK_SIMD, - AEK_SEC, - AEK_VIRT, - // Unsupported extensions. - AEK_OS, - AEK_IWMMXT, - AEK_IWMMXT2, - AEK_MAVERICK, - AEK_XSCALE, - AEK_LAST - }; - - // ISA kinds. - enum ISAKind { - IK_INVALID = 0, - IK_ARM, - IK_THUMB, - IK_AARCH64 - }; - - // Endianness - // FIXME: BE8 vs. BE32? - enum EndianKind { - EK_INVALID = 0, - EK_LITTLE, - EK_BIG - }; - - // v6/v7/v8 Profile - enum ProfileKind { - PK_INVALID = 0, - PK_A, - PK_R, - PK_M - }; -} // namespace ARM - -// Target Parsers, one per architecture. -class ARMTargetParser { - static StringRef getFPUSynonym(StringRef FPU); - static StringRef getArchSynonym(StringRef Arch); - -public: - static StringRef getCanonicalArchName(StringRef Arch); - - // Information by ID - static const char * getFPUName(unsigned FPUKind); - static unsigned getFPUVersion(unsigned FPUKind); - static unsigned getFPUNeonSupportLevel(unsigned FPUKind); - static unsigned getFPURestriction(unsigned FPUKind); - // FIXME: This should be moved to TargetTuple once it exists - static bool getFPUFeatures(unsigned FPUKind, - std::vector &Features); - static const char * getArchName(unsigned ArchKind); - static unsigned getArchAttr(unsigned ArchKind); - static const char * getCPUAttr(unsigned ArchKind); - static const char * getSubArch(unsigned ArchKind); - static const char * getArchExtName(unsigned ArchExtKind); - static const char * getDefaultCPU(StringRef Arch); - - // Parser - static unsigned parseFPU(StringRef FPU); - static unsigned parseArch(StringRef Arch); - static unsigned parseArchExt(StringRef ArchExt); - static unsigned parseCPUArch(StringRef CPU); - static unsigned parseArchISA(StringRef Arch); - static unsigned parseArchEndian(StringRef Arch); - static unsigned parseArchProfile(StringRef Arch); - static unsigned parseArchVersion(StringRef Arch); +// FPU names. +enum FPUKind { +#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION) KIND, +#include "ARMTargetParser.def" + FK_LAST }; +// FPU Version +enum FPUVersion { + FV_NONE = 0, + FV_VFPV2, + FV_VFPV3, + FV_VFPV3_FP16, + FV_VFPV4, + FV_VFPV5 +}; + +// An FPU name implies one of three levels of Neon support: +enum NeonSupportLevel { + NS_None = 0, ///< No Neon + NS_Neon, ///< Neon + NS_Crypto ///< Neon with Crypto +}; + +// An FPU name restricts the FPU in one of three ways: +enum FPURestriction { + FR_None = 0, ///< No restriction + FR_D16, ///< Only 16 D registers + FR_SP_D16 ///< Only single-precision instructions, with 16 D registers +}; + +// Arch names. +enum ArchKind { +#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) ID, +#include "ARMTargetParser.def" + AK_LAST +}; + +// Arch extension modifiers for CPUs. +enum ArchExtKind : unsigned { + AEK_INVALID = 0x0, + AEK_NONE = 0x1, + AEK_CRC = 0x2, + AEK_CRYPTO = 0x4, + AEK_FP = 0x8, + AEK_HWDIV = 0x10, + AEK_HWDIVARM = 0x20, + AEK_MP = 0x40, + AEK_SIMD = 0x80, + AEK_SEC = 0x100, + AEK_VIRT = 0x200, + AEK_DSP = 0x400, + AEK_FP16 = 0x800, + // Unsupported extensions. + AEK_OS = 0x8000000, + AEK_IWMMXT = 0x10000000, + AEK_IWMMXT2 = 0x20000000, + AEK_MAVERICK = 0x40000000, + AEK_XSCALE = 0x80000000, +}; + +// ISA kinds. +enum ISAKind { IK_INVALID = 0, IK_ARM, IK_THUMB, IK_AARCH64 }; + +// Endianness +// FIXME: BE8 vs. BE32? +enum EndianKind { EK_INVALID = 0, EK_LITTLE, EK_BIG }; + +// v6/v7/v8 Profile +enum ProfileKind { PK_INVALID = 0, PK_A, PK_R, PK_M }; + +StringRef getCanonicalArchName(StringRef Arch); + +// Information by ID +StringRef getFPUName(unsigned FPUKind); +unsigned getFPUVersion(unsigned FPUKind); +unsigned getFPUNeonSupportLevel(unsigned FPUKind); +unsigned getFPURestriction(unsigned FPUKind); + +// FIXME: These should be moved to TargetTuple once it exists +bool getFPUFeatures(unsigned FPUKind, std::vector &Features); +bool getHWDivFeatures(unsigned HWDivKind, std::vector &Features); +bool getExtensionFeatures(unsigned Extensions, + std::vector &Features); + +StringRef getArchName(unsigned ArchKind); +unsigned getArchAttr(unsigned ArchKind); +StringRef getCPUAttr(unsigned ArchKind); +StringRef getSubArch(unsigned ArchKind); +StringRef getArchExtName(unsigned ArchExtKind); +const char *getArchExtFeature(StringRef ArchExt); +StringRef getHWDivName(unsigned HWDivKind); + +// Information by Name +unsigned getDefaultFPU(StringRef CPU, unsigned ArchKind); +unsigned getDefaultExtensions(StringRef CPU, unsigned ArchKind); +StringRef getDefaultCPU(StringRef Arch); + +// Parser +unsigned parseHWDiv(StringRef HWDiv); +unsigned parseFPU(StringRef FPU); +unsigned parseArch(StringRef Arch); +unsigned parseArchExt(StringRef ArchExt); +unsigned parseCPUArch(StringRef CPU); +unsigned parseArchISA(StringRef Arch); +unsigned parseArchEndian(StringRef Arch); +unsigned parseArchProfile(StringRef Arch); +unsigned parseArchVersion(StringRef Arch); + +} // namespace ARM } // namespace llvm #endif diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h index 40bf6fb20c9f..aec181b1d266 100644 --- a/include/llvm/Support/TargetRegistry.h +++ b/include/llvm/Support/TargetRegistry.h @@ -115,7 +115,7 @@ public: const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU); typedef MCTargetAsmParser *(*MCAsmParserCtorTy)( - MCSubtargetInfo &STI, MCAsmParser &P, const MCInstrInfo &MII, + const MCSubtargetInfo &STI, MCAsmParser &P, const MCInstrInfo &MII, const MCTargetOptions &Options); typedef MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T, const MCSubtargetInfo &STI, @@ -141,7 +141,8 @@ public: typedef MCStreamer *(*COFFStreamerCtorTy)(MCContext &Ctx, MCAsmBackend &TAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, - bool RelaxAll); + bool RelaxAll, + bool IncrementalLinkerCompatible); typedef MCTargetStreamer *(*NullTargetStreamerCtorTy)(MCStreamer &S); typedef MCTargetStreamer *(*AsmTargetStreamerCtorTy)( MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter *InstPrint, @@ -382,7 +383,7 @@ public: /// /// \param Parser The target independent parser implementation to use for /// parsing and lexing. - MCTargetAsmParser *createMCAsmParser(MCSubtargetInfo &STI, + MCTargetAsmParser *createMCAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) const { @@ -437,6 +438,7 @@ public: MCAsmBackend &TAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, const MCSubtargetInfo &STI, bool RelaxAll, + bool IncrementalLinkerCompatible, bool DWARFMustBeAtTheEnd) const { MCStreamer *S; switch (T.getObjectFormat()) { @@ -444,7 +446,8 @@ public: llvm_unreachable("Unknown object format"); case Triple::COFF: assert(T.isOSWindows() && "only Windows COFF is supported"); - S = COFFStreamerCtorFn(Ctx, TAB, OS, Emitter, RelaxAll); + S = COFFStreamerCtorFn(Ctx, TAB, OS, Emitter, RelaxAll, + IncrementalLinkerCompatible); break; case Triple::MachO: if (MachOStreamerCtorFn) @@ -1133,8 +1136,8 @@ template struct RegisterMCAsmParser { } private: - static MCTargetAsmParser *Allocator(MCSubtargetInfo &STI, MCAsmParser &P, - const MCInstrInfo &MII, + static MCTargetAsmParser *Allocator(const MCSubtargetInfo &STI, + MCAsmParser &P, const MCInstrInfo &MII, const MCTargetOptions &Options) { return new MCAsmParserImpl(STI, P, MII, Options); } diff --git a/include/llvm/Support/TargetSelect.h b/include/llvm/Support/TargetSelect.h index a86e953f00ea..582785cb69a5 100644 --- a/include/llvm/Support/TargetSelect.h +++ b/include/llvm/Support/TargetSelect.h @@ -25,11 +25,11 @@ extern "C" { #define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##Target(); #include "llvm/Config/Targets.def" - + // Declare all of the target-MC-initialization functions that are available. #define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##TargetMC(); #include "llvm/Config/Targets.def" - + // Declare all of the available assembly printer initialization functions. #define LLVM_ASM_PRINTER(TargetName) void LLVMInitialize##TargetName##AsmPrinter(); #include "llvm/Config/AsmPrinters.def" @@ -54,7 +54,7 @@ namespace llvm { #define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetInfo(); #include "llvm/Config/Targets.def" } - + /// InitializeAllTargets - The main program should call this function if it /// wants access to all available target machines that LLVM is configured to /// support, to make them available via the TargetRegistry. @@ -67,7 +67,7 @@ namespace llvm { #define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##Target(); #include "llvm/Config/Targets.def" } - + /// InitializeAllTargetMCs - The main program should call this function if it /// wants access to all available target MC that LLVM is configured to /// support, to make them available via the TargetRegistry. @@ -77,7 +77,7 @@ namespace llvm { #define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetMC(); #include "llvm/Config/Targets.def" } - + /// InitializeAllAsmPrinters - The main program should call this function if /// it wants all asm printers that LLVM is configured to support, to make them /// available via the TargetRegistry. @@ -87,7 +87,7 @@ namespace llvm { #define LLVM_ASM_PRINTER(TargetName) LLVMInitialize##TargetName##AsmPrinter(); #include "llvm/Config/AsmPrinters.def" } - + /// InitializeAllAsmParsers - The main program should call this function if it /// wants all asm parsers that LLVM is configured to support, to make them /// available via the TargetRegistry. @@ -97,7 +97,7 @@ namespace llvm { #define LLVM_ASM_PARSER(TargetName) LLVMInitialize##TargetName##AsmParser(); #include "llvm/Config/AsmParsers.def" } - + /// InitializeAllDisassemblers - The main program should call this function if /// it wants all disassemblers that LLVM is configured to support, to make /// them available via the TargetRegistry. @@ -107,9 +107,9 @@ namespace llvm { #define LLVM_DISASSEMBLER(TargetName) LLVMInitialize##TargetName##Disassembler(); #include "llvm/Config/Disassemblers.def" } - + /// InitializeNativeTarget - The main program should call this function to - /// initialize the native target corresponding to the host. This is useful + /// initialize the native target corresponding to the host. This is useful /// for JIT applications to ensure that the target gets linked in correctly. /// /// It is legal for a client to make multiple calls to this function. @@ -123,7 +123,7 @@ namespace llvm { #else return true; #endif - } + } /// InitializeNativeTargetAsmPrinter - The main program should call /// this function to initialize the native target asm printer. @@ -135,7 +135,7 @@ namespace llvm { #else return true; #endif - } + } /// InitializeNativeTargetAsmParser - The main program should call /// this function to initialize the native target asm parser. @@ -147,7 +147,7 @@ namespace llvm { #else return true; #endif - } + } /// InitializeNativeTargetDisassembler - The main program should call /// this function to initialize the native target disassembler. @@ -159,8 +159,7 @@ namespace llvm { #else return true; #endif - } - + } } #endif diff --git a/include/llvm/Support/ThreadPool.h b/include/llvm/Support/ThreadPool.h new file mode 100644 index 000000000000..745334db4450 --- /dev/null +++ b/include/llvm/Support/ThreadPool.h @@ -0,0 +1,136 @@ +//===-- llvm/Support/ThreadPool.h - A ThreadPool implementation -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a crude C++11 based thread pool. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_THREAD_POOL_H +#define LLVM_SUPPORT_THREAD_POOL_H + +#include "llvm/Support/thread.h" + +#ifdef _MSC_VER +// concrt.h depends on eh.h for __uncaught_exception declaration +// even if we disable exceptions. +#include + +// Disable warnings from ppltasks.h transitively included by . +#pragma warning(push) +#pragma warning(disable:4530) +#pragma warning(disable:4062) +#endif + +#include + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#include +#include +#include +#include +#include +#include + +namespace llvm { + +/// A ThreadPool for asynchronous parallel execution on a defined number of +/// threads. +/// +/// The pool keeps a vector of threads alive, waiting on a condition variable +/// for some work to become available. +class ThreadPool { +public: +#ifndef _MSC_VER + using VoidTy = void; + using TaskTy = std::function; + using PackagedTaskTy = std::packaged_task; +#else + // MSVC 2013 has a bug and can't use std::packaged_task; + // We force it to use bool(bool) instead. + using VoidTy = bool; + using TaskTy = std::function; + using PackagedTaskTy = std::packaged_task; +#endif + + /// Construct a pool with the number of core available on the system (or + /// whatever the value returned by std::thread::hardware_concurrency() is). + ThreadPool(); + + /// Construct a pool of \p ThreadCount threads + ThreadPool(unsigned ThreadCount); + + /// Blocking destructor: the pool will wait for all the threads to complete. + ~ThreadPool(); + + /// Asynchronous submission of a task to the pool. The returned future can be + /// used to wait for the task to finish and is *non-blocking* on destruction. + template + inline std::shared_future async(Function &&F, Args &&... ArgList) { + auto Task = + std::bind(std::forward(F), std::forward(ArgList)...); +#ifndef _MSC_VER + return asyncImpl(std::move(Task)); +#else + // This lambda has to be marked mutable because MSVC 2013's std::bind call + // operator isn't const qualified. + return asyncImpl([Task](VoidTy) mutable -> VoidTy { + Task(); + return VoidTy(); + }); +#endif + } + + /// Asynchronous submission of a task to the pool. The returned future can be + /// used to wait for the task to finish and is *non-blocking* on destruction. + template + inline std::shared_future async(Function &&F) { +#ifndef _MSC_VER + return asyncImpl(std::forward(F)); +#else + return asyncImpl([F] (VoidTy) -> VoidTy { F(); return VoidTy(); }); +#endif + } + + /// Blocking wait for all the threads to complete and the queue to be empty. + /// It is an error to try to add new tasks while blocking on this call. + void wait(); + +private: + /// Asynchronous submission of a task to the pool. The returned future can be + /// used to wait for the task to finish and is *non-blocking* on destruction. + std::shared_future asyncImpl(TaskTy F); + + /// Threads in flight + std::vector Threads; + + /// Tasks waiting for execution in the pool. + std::queue Tasks; + + /// Locking and signaling for accessing the Tasks queue. + std::mutex QueueLock; + std::condition_variable QueueCondition; + + /// Locking and signaling for job completion + std::mutex CompletionLock; + std::condition_variable CompletionCondition; + + /// Keep track of the number of thread actually busy + std::atomic ActiveThreads; + +#if LLVM_ENABLE_THREADS // avoids warning for unused variable + /// Signal for the destruction of the pool, asking thread to exit. + bool EnableFlag; +#endif +}; +} + +#endif // LLVM_SUPPORT_THREAD_POOL_H diff --git a/include/llvm/Support/Threading.h b/include/llvm/Support/Threading.h index 3cca1d6a9913..9007c132a99a 100644 --- a/include/llvm/Support/Threading.h +++ b/include/llvm/Support/Threading.h @@ -21,7 +21,7 @@ namespace llvm { bool llvm_is_multithreaded(); /// llvm_execute_on_thread - Execute the given \p UserFn on a separate - /// thread, passing it the provided \p UserData and waits for thread + /// thread, passing it the provided \p UserData and waits for thread /// completion. /// /// This function does not guarantee that the code will actually be executed diff --git a/include/llvm/Support/Timer.h b/include/llvm/Support/Timer.h index 2cd30e2aaf32..499fe7b7e70c 100644 --- a/include/llvm/Support/Timer.h +++ b/include/llvm/Support/Timer.h @@ -30,26 +30,25 @@ class TimeRecord { ssize_t MemUsed; // Memory allocated (in bytes) public: TimeRecord() : WallTime(0), UserTime(0), SystemTime(0), MemUsed(0) {} - + /// getCurrentTime - Get the current time and memory usage. If Start is true /// we get the memory usage before the time, otherwise we get time before /// memory usage. This matters if the time to get the memory usage is /// significant and shouldn't be counted as part of a duration. static TimeRecord getCurrentTime(bool Start = true); - - double getProcessTime() const { return UserTime+SystemTime; } + + double getProcessTime() const { return UserTime + SystemTime; } double getUserTime() const { return UserTime; } double getSystemTime() const { return SystemTime; } double getWallTime() const { return WallTime; } ssize_t getMemUsed() const { return MemUsed; } - - + // operator< - Allow sorting. bool operator<(const TimeRecord &T) const { // Sort by Wall Time elapsed, as it is the only thing really accurate return WallTime < T.WallTime; } - + void operator+=(const TimeRecord &RHS) { WallTime += RHS.WallTime; UserTime += RHS.UserTime; @@ -62,12 +61,12 @@ public: SystemTime -= RHS.SystemTime; MemUsed -= RHS.MemUsed; } - - /// print - Print the current timer to standard error, and reset the "Started" - /// flag. + + /// Print the current time record to \p OS, with a breakdown showing + /// contributions to the \p Total time record. void print(const TimeRecord &Total, raw_ostream &OS) const; }; - + /// Timer - This class is used to track the amount of time spent between /// invocations of its startTimer()/stopTimer() methods. Given appropriate OS /// support it can also keep track of the RSS of the program at various points. @@ -77,11 +76,13 @@ public: /// if they are never started. /// class Timer { - TimeRecord Time; + TimeRecord Time; // The total time captured + TimeRecord StartTime; // The time startTimer() was last called std::string Name; // The name of this time variable. - bool Started; // Has this time variable ever been started? + bool Running; // Is the timer currently running? + bool Triggered; // Has the timer ever been triggered? TimerGroup *TG; // The TimerGroup this Timer is in. - + Timer **Prev, *Next; // Doubly linked list of timers in the group. public: explicit Timer(StringRef N) : TG(nullptr) { init(N); } @@ -99,25 +100,31 @@ public: explicit Timer() : TG(nullptr) {} void init(StringRef N); void init(StringRef N, TimerGroup &tg); - + const std::string &getName() const { return Name; } bool isInitialized() const { return TG != nullptr; } - - /// startTimer - Start the timer running. Time between calls to - /// startTimer/stopTimer is counted by the Timer class. Note that these calls - /// must be correctly paired. - /// + + /// Check if startTimer() has ever been called on this timer. + bool hasTriggered() const { return Triggered; } + + /// Start the timer running. Time between calls to startTimer/stopTimer is + /// counted by the Timer class. Note that these calls must be correctly + /// paired. void startTimer(); - /// stopTimer - Stop the timer. - /// + /// Stop the timer. void stopTimer(); + /// Clear the timer state. + void clear(); + + /// Return the duration for which this timer has been running. + TimeRecord getTotalTime() const { return Time; } + private: friend class TimerGroup; }; - /// The TimeRegion class is used as a helper class to call the startTimer() and /// stopTimer() methods of the Timer class. When the object is constructed, it /// starts the timer specified as its argument. When it is destroyed, it stops @@ -126,6 +133,7 @@ private: class TimeRegion { Timer *T; TimeRegion(const TimeRegion &) = delete; + public: explicit TimeRegion(Timer &t) : T(&t) { T->startTimer(); @@ -138,7 +146,6 @@ public: } }; - /// NamedRegionTimer - This class is basically a combination of TimeRegion and /// Timer. It allows you to declare a new timer, AND specify the region to /// time, all in one statement. All timers with the same name are merged. This @@ -151,7 +158,6 @@ struct NamedRegionTimer : public TimeRegion { bool Enabled = true); }; - /// The TimerGroup class is used to group together related timers into a single /// report that is printed when the TimerGroup is destroyed. It is illegal to /// destroy a TimerGroup object before all of the Timers in it are gone. A @@ -160,11 +166,12 @@ struct NamedRegionTimer : public TimeRegion { class TimerGroup { std::string Name; Timer *FirstTimer; // First timer in the group. - std::vector > TimersToPrint; - + std::vector> TimersToPrint; + TimerGroup **Prev, *Next; // Doubly linked list of TimerGroup's. TimerGroup(const TimerGroup &TG) = delete; void operator=(const TimerGroup &TG) = delete; + public: explicit TimerGroup(StringRef name); ~TimerGroup(); @@ -173,10 +180,10 @@ public: /// print - Print any started timers in this group and zero them. void print(raw_ostream &OS); - + /// printAll - This static method prints all timers and clears them all out. static void printAll(raw_ostream &OS); - + private: friend class Timer; void addTimer(Timer &T); diff --git a/include/llvm/Support/TrailingObjects.h b/include/llvm/Support/TrailingObjects.h new file mode 100644 index 000000000000..8529746eeccc --- /dev/null +++ b/include/llvm/Support/TrailingObjects.h @@ -0,0 +1,349 @@ +//===--- TrailingObjects.h - Variable-length classes ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This header defines support for implementing classes that have +/// some trailing object (or arrays of objects) appended to them. The +/// main purpose is to make it obvious where this idiom is being used, +/// and to make the usage more idiomatic and more difficult to get +/// wrong. +/// +/// The TrailingObject template abstracts away the reinterpret_cast, +/// pointer arithmetic, and size calculations used for the allocation +/// and access of appended arrays of objects, and takes care that they +/// are all allocated at their required alignment. Additionally, it +/// ensures that the base type is final -- deriving from a class that +/// expects data appended immediately after it is typically not safe. +/// +/// Users are expected to derive from this template, and provide +/// numTrailingObjects implementations for each trailing type except +/// the last, e.g. like this sample: +/// +/// \code +/// class VarLengthObj : private TrailingObjects { +/// friend TrailingObjects; +/// +/// unsigned NumInts, NumDoubles; +/// size_t numTrailingObjects(OverloadToken) const { return NumInts; } +/// }; +/// \endcode +/// +/// You can access the appended arrays via 'getTrailingObjects', and +/// determine the size needed for allocation via +/// 'additionalSizeToAlloc' and 'totalSizeToAlloc'. +/// +/// All the methods implemented by this class are are intended for use +/// by the implementation of the class, not as part of its interface +/// (thus, private inheritance is suggested). +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_TRAILINGOBJECTS_H +#define LLVM_SUPPORT_TRAILINGOBJECTS_H + +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/type_traits.h" +#include +#include + +namespace llvm { + +namespace trailing_objects_internal { +/// Helper template to calculate the max alignment requirement for a set of +/// objects. +template class AlignmentCalcHelper { +private: + enum { + FirstAlignment = AlignOf::Alignment, + RestAlignment = AlignmentCalcHelper::Alignment, + }; + +public: + enum { + Alignment = FirstAlignment > RestAlignment ? FirstAlignment : RestAlignment + }; +}; + +template class AlignmentCalcHelper { +public: + enum { Alignment = AlignOf::Alignment }; +}; + +/// The base class for TrailingObjects* classes. +class TrailingObjectsBase { +protected: + /// OverloadToken's purpose is to allow specifying function overloads + /// for different types, without actually taking the types as + /// parameters. (Necessary because member function templates cannot + /// be specialized, so overloads must be used instead of + /// specialization.) + template struct OverloadToken {}; +}; + +/// This helper template works-around MSVC 2013's lack of useful +/// alignas() support. The argument to LLVM_ALIGNAS(), in MSVC, is +/// required to be a literal integer. But, you *can* use template +/// specialization to select between a bunch of different LLVM_ALIGNAS +/// expressions... +template +class TrailingObjectsAligner : public TrailingObjectsBase {}; +template <> +class LLVM_ALIGNAS(1) TrailingObjectsAligner<1> : public TrailingObjectsBase {}; +template <> +class LLVM_ALIGNAS(2) TrailingObjectsAligner<2> : public TrailingObjectsBase {}; +template <> +class LLVM_ALIGNAS(4) TrailingObjectsAligner<4> : public TrailingObjectsBase {}; +template <> +class LLVM_ALIGNAS(8) TrailingObjectsAligner<8> : public TrailingObjectsBase {}; +template <> +class LLVM_ALIGNAS(16) TrailingObjectsAligner<16> : public TrailingObjectsBase { +}; +template <> +class LLVM_ALIGNAS(32) TrailingObjectsAligner<32> : public TrailingObjectsBase { +}; + +// Just a little helper for transforming a type pack into the same +// number of a different type. e.g.: +// ExtractSecondType::type +template struct ExtractSecondType { + typedef Ty2 type; +}; + +// TrailingObjectsImpl is somewhat complicated, because it is a +// recursively inheriting template, in order to handle the template +// varargs. Each level of inheritance picks off a single trailing type +// then recurses on the rest. The "Align", "BaseTy", and +// "TopTrailingObj" arguments are passed through unchanged through the +// recursion. "PrevTy" is, at each level, the type handled by the +// level right above it. + +template +struct TrailingObjectsImpl { + // The main template definition is never used -- the two + // specializations cover all possibilities. +}; + +template +struct TrailingObjectsImpl + : public TrailingObjectsImpl { + + typedef TrailingObjectsImpl + ParentType; + + // Ensure the methods we inherit are not hidden. + using ParentType::getTrailingObjectsImpl; + using ParentType::additionalSizeToAllocImpl; + + static LLVM_CONSTEXPR bool requiresRealignment() { + return llvm::AlignOf::Alignment < llvm::AlignOf::Alignment; + } + + // These two functions are helper functions for + // TrailingObjects::getTrailingObjects. They recurse to the left -- + // the result for each type in the list of trailing types depends on + // the result of calling the function on the type to the + // left. However, the function for the type to the left is + // implemented by a *subclass* of this class, so we invoke it via + // the TopTrailingObj, which is, via the + // curiously-recurring-template-pattern, the most-derived type in + // this recursion, and thus, contains all the overloads. + static const NextTy * + getTrailingObjectsImpl(const BaseTy *Obj, + TrailingObjectsBase::OverloadToken) { + auto *Ptr = TopTrailingObj::getTrailingObjectsImpl( + Obj, TrailingObjectsBase::OverloadToken()) + + TopTrailingObj::callNumTrailingObjects( + Obj, TrailingObjectsBase::OverloadToken()); + + if (requiresRealignment()) + return reinterpret_cast( + llvm::alignAddr(Ptr, llvm::alignOf())); + else + return reinterpret_cast(Ptr); + } + + static NextTy * + getTrailingObjectsImpl(BaseTy *Obj, + TrailingObjectsBase::OverloadToken) { + auto *Ptr = TopTrailingObj::getTrailingObjectsImpl( + Obj, TrailingObjectsBase::OverloadToken()) + + TopTrailingObj::callNumTrailingObjects( + Obj, TrailingObjectsBase::OverloadToken()); + + if (requiresRealignment()) + return reinterpret_cast( + llvm::alignAddr(Ptr, llvm::alignOf())); + else + return reinterpret_cast(Ptr); + } + + // Helper function for TrailingObjects::additionalSizeToAlloc: this + // function recurses to superclasses, each of which requires one + // fewer size_t argument, and adds its own size. + static LLVM_CONSTEXPR size_t additionalSizeToAllocImpl( + size_t SizeSoFar, size_t Count1, + typename ExtractSecondType::type... MoreCounts) { + return additionalSizeToAllocImpl( + (requiresRealignment() + ? llvm::RoundUpToAlignment(SizeSoFar, llvm::alignOf()) + : SizeSoFar) + + sizeof(NextTy) * Count1, + MoreCounts...); + } +}; + +// The base case of the TrailingObjectsImpl inheritance recursion, +// when there's no more trailing types. +template +struct TrailingObjectsImpl + : public TrailingObjectsAligner { + // This is a dummy method, only here so the "using" doesn't fail -- + // it will never be called, because this function recurses backwards + // up the inheritance chain to subclasses. + static void getTrailingObjectsImpl(); + + static LLVM_CONSTEXPR size_t additionalSizeToAllocImpl(size_t SizeSoFar) { + return SizeSoFar; + } + + template static void verifyTrailingObjectsAlignment() {} +}; + +} // end namespace trailing_objects_internal + +// Finally, the main type defined in this file, the one intended for users... + +/// See the file comment for details on the usage of the +/// TrailingObjects type. +template +class TrailingObjects : private trailing_objects_internal::TrailingObjectsImpl< + trailing_objects_internal::AlignmentCalcHelper< + TrailingTys...>::Alignment, + BaseTy, TrailingObjects, + BaseTy, TrailingTys...> { + + template + friend struct trailing_objects_internal::TrailingObjectsImpl; + + template class Foo {}; + + typedef trailing_objects_internal::TrailingObjectsImpl< + trailing_objects_internal::AlignmentCalcHelper::Alignment, + BaseTy, TrailingObjects, BaseTy, TrailingTys...> + ParentType; + using TrailingObjectsBase = trailing_objects_internal::TrailingObjectsBase; + + using ParentType::getTrailingObjectsImpl; + + // This function contains only a static_assert BaseTy is final. The + // static_assert must be in a function, and not at class-level + // because BaseTy isn't complete at class instantiation time, but + // will be by the time this function is instantiated. + static void verifyTrailingObjectsAssertions() { +#ifdef LLVM_IS_FINAL + static_assert(LLVM_IS_FINAL(BaseTy), "BaseTy must be final."); +#endif + } + + // These two methods are the base of the recursion for this method. + static const BaseTy * + getTrailingObjectsImpl(const BaseTy *Obj, + TrailingObjectsBase::OverloadToken) { + return Obj; + } + + static BaseTy * + getTrailingObjectsImpl(BaseTy *Obj, + TrailingObjectsBase::OverloadToken) { + return Obj; + } + + // callNumTrailingObjects simply calls numTrailingObjects on the + // provided Obj -- except when the type being queried is BaseTy + // itself. There is always only one of the base object, so that case + // is handled here. (An additional benefit of indirecting through + // this function is that consumers only say "friend + // TrailingObjects", and thus, only this class itself can call the + // numTrailingObjects function.) + static size_t + callNumTrailingObjects(const BaseTy *Obj, + TrailingObjectsBase::OverloadToken) { + return 1; + } + + template + static size_t callNumTrailingObjects(const BaseTy *Obj, + TrailingObjectsBase::OverloadToken) { + return Obj->numTrailingObjects(TrailingObjectsBase::OverloadToken()); + } + +public: + // make this (privately inherited) class public. + using ParentType::OverloadToken; + + /// Returns a pointer to the trailing object array of the given type + /// (which must be one of those specified in the class template). The + /// array may have zero or more elements in it. + template const T *getTrailingObjects() const { + verifyTrailingObjectsAssertions(); + // Forwards to an impl function with overloads, since member + // function templates can't be specialized. + return this->getTrailingObjectsImpl( + static_cast(this), + TrailingObjectsBase::OverloadToken()); + } + + /// Returns a pointer to the trailing object array of the given type + /// (which must be one of those specified in the class template). The + /// array may have zero or more elements in it. + template T *getTrailingObjects() { + verifyTrailingObjectsAssertions(); + // Forwards to an impl function with overloads, since member + // function templates can't be specialized. + return this->getTrailingObjectsImpl( + static_cast(this), TrailingObjectsBase::OverloadToken()); + } + + /// Returns the size of the trailing data, if an object were + /// allocated with the given counts (The counts are in the same order + /// as the template arguments). This does not include the size of the + /// base object. The template arguments must be the same as those + /// used in the class; they are supplied here redundantly only so + /// that it's clear what the counts are counting in callers. + template + static LLVM_CONSTEXPR typename std::enable_if< + std::is_same, Foo>::value, size_t>::type + additionalSizeToAlloc( + typename trailing_objects_internal::ExtractSecondType< + TrailingTys, size_t>::type... Counts) { + return ParentType::additionalSizeToAllocImpl(0, Counts...); + } + + /// Returns the total size of an object if it were allocated with the + /// given trailing object counts. This is the same as + /// additionalSizeToAlloc, except it *does* include the size of the base + /// object. + template + static LLVM_CONSTEXPR typename std::enable_if< + std::is_same, Foo>::value, size_t>::type + totalSizeToAlloc(typename trailing_objects_internal::ExtractSecondType< + TrailingTys, size_t>::type... Counts) { + return sizeof(BaseTy) + ParentType::additionalSizeToAllocImpl(0, Counts...); + } +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/Support/UnicodeCharRanges.h b/include/llvm/Support/UnicodeCharRanges.h index 9f738dff1107..134698c3ec6b 100644 --- a/include/llvm/Support/UnicodeCharRanges.h +++ b/include/llvm/Support/UnicodeCharRanges.h @@ -51,6 +51,11 @@ public: /// the constructor, so it makes sense to create as few UnicodeCharSet /// instances per each array of ranges, as possible. #ifdef NDEBUG + + // FIXME: This could use constexpr + static_assert. This way we + // may get rid of NDEBUG in this header. Unfortunately there are some + // problems to get this working with MSVC 2013. Change this when + // the support for MSVC 2013 is dropped. LLVM_CONSTEXPR UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) {} #else UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) { diff --git a/include/llvm/Support/Valgrind.h b/include/llvm/Support/Valgrind.h index cebf75c49c19..12b0dc961daa 100644 --- a/include/llvm/Support/Valgrind.h +++ b/include/llvm/Support/Valgrind.h @@ -20,17 +20,6 @@ #include "llvm/Support/Compiler.h" #include -#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG) -// tsan (Thread Sanitizer) is a valgrind-based tool that detects these exact -// functions by name. -extern "C" { -void AnnotateHappensAfter(const char *file, int line, const volatile void *cv); -void AnnotateHappensBefore(const char *file, int line, const volatile void *cv); -void AnnotateIgnoreWritesBegin(const char *file, int line); -void AnnotateIgnoreWritesEnd(const char *file, int line); -} -#endif - namespace llvm { namespace sys { // True if Valgrind is controlling this process. @@ -39,34 +28,6 @@ namespace sys { // Discard valgrind's translation of code in the range [Addr .. Addr + Len). // Otherwise valgrind may continue to execute the old version of the code. void ValgrindDiscardTranslations(const void *Addr, size_t Len); - -#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG) - // Thread Sanitizer is a valgrind tool that finds races in code. - // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations . - - // This marker is used to define a happens-before arc. The race detector will - // infer an arc from the begin to the end when they share the same pointer - // argument. - #define TsanHappensBefore(cv) \ - AnnotateHappensBefore(__FILE__, __LINE__, cv) - - // This marker defines the destination of a happens-before arc. - #define TsanHappensAfter(cv) \ - AnnotateHappensAfter(__FILE__, __LINE__, cv) - - // Ignore any races on writes between here and the next TsanIgnoreWritesEnd. - #define TsanIgnoreWritesBegin() \ - AnnotateIgnoreWritesBegin(__FILE__, __LINE__) - - // Resume checking for racy writes. - #define TsanIgnoreWritesEnd() \ - AnnotateIgnoreWritesEnd(__FILE__, __LINE__) -#else - #define TsanHappensBefore(cv) - #define TsanHappensAfter(cv) - #define TsanIgnoreWritesBegin() - #define TsanIgnoreWritesEnd() -#endif } } diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h index 0fbb7d2e6c7e..b056ab6c1ce2 100644 --- a/include/llvm/Support/YAMLParser.h +++ b/include/llvm/Support/YAMLParser.h @@ -145,11 +145,12 @@ public: unsigned int getType() const { return TypeID; } void *operator new(size_t Size, BumpPtrAllocator &Alloc, - size_t Alignment = 16) throw() { + size_t Alignment = 16) LLVM_NOEXCEPT { return Alloc.Allocate(Size, Alignment); } - void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t Size) throw() { + void operator delete(void *Ptr, BumpPtrAllocator &Alloc, + size_t Size) LLVM_NOEXCEPT { Alloc.Deallocate(Ptr, Size); } @@ -157,7 +158,7 @@ protected: std::unique_ptr &Doc; SMRange SourceRange; - void operator delete(void *) throw() {} + void operator delete(void *) LLVM_NOEXCEPT = delete; ~Node() = default; diff --git a/include/llvm/Support/YAMLTraits.h b/include/llvm/Support/YAMLTraits.h index c04294a5e87a..fb2badfd93ba 100644 --- a/include/llvm/Support/YAMLTraits.h +++ b/include/llvm/Support/YAMLTraits.h @@ -10,7 +10,6 @@ #ifndef LLVM_SUPPORT_YAMLTRAITS_H #define LLVM_SUPPORT_YAMLTRAITS_H - #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/Optional.h" @@ -29,7 +28,6 @@ namespace llvm { namespace yaml { - /// This class should be specialized by any type that needs to be converted /// to/from a YAML mapping. For example: /// @@ -52,7 +50,6 @@ struct MappingTraits { // static const bool flow = true; }; - /// This class should be specialized by any integral type that converts /// to/from a YAML scalar where there is a one-to-one mapping between /// in-memory values and a string in YAML. For example: @@ -70,7 +67,6 @@ struct ScalarEnumerationTraits { // static void enumeration(IO &io, T &value); }; - /// This class should be specialized by any integer type that is a union /// of bit values and the YAML representation is a flow sequence of /// strings. For example: @@ -88,7 +84,6 @@ struct ScalarBitSetTraits { // static void bitset(IO &io, T &value); }; - /// This class should be specialized by type that requires custom conversion /// to/from a yaml scalar. For example: /// @@ -149,7 +144,6 @@ struct BlockScalarTraits { // static StringRef input(StringRef Scalar, void *ctxt, T &Value); }; - /// This class should be specialized by any type that needs to be converted /// to/from a YAML sequence. For example: /// @@ -175,7 +169,6 @@ struct SequenceTraits { // static const bool flow = true; }; - /// This class should be specialized by any type that needs to be converted /// to/from a list of YAML documents. template @@ -185,7 +178,6 @@ struct DocumentListTraits { // static T::value_type& element(IO &io, T &seq, size_t index); }; - // Only used by compiler if both template types are the same template struct SameType; @@ -194,8 +186,6 @@ struct SameType; template struct MissingTrait; - - // Test if ScalarEnumerationTraits is defined on type T. template struct has_ScalarEnumerationTraits @@ -213,7 +203,6 @@ public: (sizeof(test >(nullptr)) == 1); }; - // Test if ScalarBitSetTraits is defined on type T. template struct has_ScalarBitSetTraits @@ -230,7 +219,6 @@ public: static bool const value = (sizeof(test >(nullptr)) == 1); }; - // Test if ScalarTraits is defined on type T. template struct has_ScalarTraits @@ -252,7 +240,6 @@ public: (sizeof(test>(nullptr, nullptr, nullptr)) == 1); }; - // Test if BlockScalarTraits is defined on type T. template struct has_BlockScalarTraits @@ -272,7 +259,6 @@ public: (sizeof(test>(nullptr, nullptr)) == 1); }; - // Test if MappingTraits is defined on type T. template struct has_MappingTraits @@ -305,8 +291,6 @@ public: static bool const value = (sizeof(test >(nullptr)) == 1); }; - - // Test if SequenceTraits is defined on type T. template struct has_SequenceMethodTraits @@ -323,7 +307,6 @@ public: static bool const value = (sizeof(test >(nullptr)) == 1); }; - // has_FlowTraits will cause an error with some compilers because // it subclasses int. Using this wrapper only instantiates the // real has_FlowTraits only if the template type is a class. @@ -353,14 +336,11 @@ public: static bool const value = sizeof(f(nullptr)) == 2; }; - - // Test if SequenceTraits is defined on type T template struct has_SequenceTraits : public std::integral_constant::value > { }; - // Test if DocumentListTraits is defined on type T template struct has_DocumentListTraits @@ -453,7 +433,6 @@ inline bool needsQuotes(StringRef S) { return false; } - template struct missingTraits : public std::integral_constant::value @@ -654,8 +633,6 @@ private: void *Ctxt; }; - - template typename std::enable_if::value,void>::type yamlize(IO &io, T &Val, bool) { @@ -676,7 +653,6 @@ yamlize(IO &io, T &Val, bool) { } } - template typename std::enable_if::value,void>::type yamlize(IO &io, T &Val, bool) { @@ -791,7 +767,6 @@ yamlize(IO &io, T &Seq, bool) { } } - template<> struct ScalarTraits { static void output(const bool &, void*, llvm::raw_ostream &); @@ -883,8 +858,6 @@ struct ScalarTraits { static bool mustQuote(StringRef) { return false; } }; - - // Utility for use within MappingTraits<>::mapping() method // to [de]normalize an object for use with YAML conversion. template @@ -917,14 +890,12 @@ private: TFinal &Result; }; - - // Utility for use within MappingTraits<>::mapping() method // to [de]normalize an object for use with YAML conversion. template struct MappingNormalizationHeap { MappingNormalizationHeap(IO &i_o, TFinal &Obj) - : io(i_o), BufPtr(NULL), Result(Obj) { + : io(i_o), BufPtr(nullptr), Result(Obj) { if ( io.outputting() ) { BufPtr = new (&Buffer) TNorm(io, Obj); } @@ -953,8 +924,6 @@ private: TFinal &Result; }; - - /// /// The Input class is used to parse a yaml document into in-memory structs /// and vectors. @@ -1083,7 +1052,6 @@ private: void setError(HNode *hnode, const Twine &message); void setError(Node *node, const Twine &message); - public: // These are only used by operator>>. They could be private // if those templated things could be made friends. @@ -1105,9 +1073,6 @@ private: bool ScalarMatchFound; }; - - - /// /// The Output class is used to generate a yaml document from in-memory structs /// and vectors. @@ -1181,9 +1146,6 @@ private: bool NeedsNewLine; }; - - - /// YAML I/O does conversion based on types. But often native data types /// are just a typedef of built in intergral types (e.g. int). But the C++ /// type matching system sees through the typedef and all the typedefed types @@ -1206,8 +1168,6 @@ private: _base value; \ }; - - /// /// Use these types instead of uintXX_t in any mapping to have /// its yaml output formatted as hexadecimal. @@ -1217,7 +1177,6 @@ LLVM_YAML_STRONG_TYPEDEF(uint16_t, Hex16) LLVM_YAML_STRONG_TYPEDEF(uint32_t, Hex32) LLVM_YAML_STRONG_TYPEDEF(uint64_t, Hex64) - template<> struct ScalarTraits { static void output(const Hex8 &, void*, llvm::raw_ostream &); @@ -1246,7 +1205,6 @@ struct ScalarTraits { static bool mustQuote(StringRef) { return false; } }; - // Define non-member operator>> so that Input can stream in a document list. template inline @@ -1303,7 +1261,6 @@ operator>>(Input &yin, T &docSeq) { return yin; } - // Define non-member operator<< so that Output can stream out document list. template inline @@ -1372,11 +1329,9 @@ operator<<(Output &yout, T &seq) { return yout; } - } // namespace yaml } // namespace llvm - /// Utility for declaring that a std::vector of a particular type /// should be considered a YAML sequence. #define LLVM_YAML_IS_SEQUENCE_VECTOR(_type) \ @@ -1436,6 +1391,4 @@ operator<<(Output &yout, T &seq) { } \ } - - #endif // LLVM_SUPPORT_YAMLTRAITS_H diff --git a/include/llvm/Support/circular_raw_ostream.h b/include/llvm/Support/circular_raw_ostream.h index 19f9c2c4b155..b46fd7f730c9 100644 --- a/include/llvm/Support/circular_raw_ostream.h +++ b/include/llvm/Support/circular_raw_ostream.h @@ -17,8 +17,7 @@ #include "llvm/Support/raw_ostream.h" -namespace llvm -{ +namespace llvm { /// circular_raw_ostream - A raw_ostream which *can* save its data /// to a circular buffer, or can pass it through directly to an /// underlying stream if specified with a buffer of zero. @@ -154,5 +153,4 @@ namespace llvm }; } // end llvm namespace - #endif diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h index 28e512c86941..d1e96f892a4b 100644 --- a/include/llvm/Support/raw_ostream.h +++ b/include/llvm/Support/raw_ostream.h @@ -218,14 +218,13 @@ public: // Formatted output, see the leftJustify() function in Support/Format.h. raw_ostream &operator<<(const FormattedString &); - + // Formatted output, see the formatHex() function in Support/Format.h. raw_ostream &operator<<(const FormattedNumber &); - + /// indent - Insert 'NumSpaces' spaces. raw_ostream &indent(unsigned NumSpaces); - /// Changes the foreground color of text that will be output from this point /// forward. /// @param Color ANSI color to use, the special SAVEDCOLOR can be used to @@ -246,7 +245,7 @@ public: /// outputting colored text, or before program exit. virtual raw_ostream &resetColor() { return *this; } - /// Reverses the forground and background colors. + /// Reverses the foreground and background colors. virtual raw_ostream &reverseColor() { return *this; } /// This function determines if this stream is connected to a "tty" or @@ -316,7 +315,7 @@ private: }; /// An abstract base class for streams implementations that also support a -/// pwrite operation. This is usefull for code that can mostly stream out data, +/// pwrite operation. This is useful for code that can mostly stream out data, /// but needs to patch in a header that needs to know the output size. class raw_pwrite_stream : public raw_ostream { virtual void pwrite_impl(const char *Ptr, size_t Size, uint64_t Offset) = 0; @@ -350,10 +349,6 @@ class raw_fd_ostream : public raw_pwrite_stream { /// bool Error; - /// Controls whether the stream should attempt to use atomic writes, when - /// possible. - bool UseAtomicWrites; - uint64_t pos; bool SupportsSeeking; @@ -403,16 +398,6 @@ public: /// to the offset specified from the beginning of the file. uint64_t seek(uint64_t off); - /// Set the stream to attempt to use atomic writes for individual output - /// routines where possible. - /// - /// Note that because raw_ostream's are typically buffered, this flag is only - /// sensible when used on unbuffered streams which will flush their output - /// immediately. - void SetUseAtomicWrites(bool Value) { - UseAtomicWrites = Value; - } - raw_ostream &changeColor(enum Colors colors, bool bold=false, bool bg=false) override; raw_ostream &resetColor() override; @@ -471,6 +456,7 @@ class raw_string_ostream : public raw_ostream { /// Return the current position within the stream, not counting the bytes /// currently in the buffer. uint64_t current_pos() const override { return OS.size(); } + public: explicit raw_string_ostream(std::string &O) : OS(O) {} ~raw_string_ostream() override; @@ -485,6 +471,9 @@ public: /// A raw_ostream that writes to an SmallVector or SmallString. This is a /// simple adaptor class. This class does not encounter output errors. +/// raw_svector_ostream operates without a buffer, delegating all memory +/// management to the SmallString. Thus the SmallString is always up-to-date, +/// may be used directly and there is no need to call flush(). class raw_svector_ostream : public raw_pwrite_stream { SmallVectorImpl &OS; @@ -493,32 +482,23 @@ class raw_svector_ostream : public raw_pwrite_stream { void pwrite_impl(const char *Ptr, size_t Size, uint64_t Offset) override; - /// Return the current position within the stream, not counting the bytes - /// currently in the buffer. + /// Return the current position within the stream. uint64_t current_pos() const override; -protected: - // Like the regular constructor, but doesn't call init. - explicit raw_svector_ostream(SmallVectorImpl &O, unsigned); - void init(); - public: /// Construct a new raw_svector_ostream. /// /// \param O The vector to write to; this should generally have at least 128 /// bytes free to avoid any extraneous memory overhead. - explicit raw_svector_ostream(SmallVectorImpl &O); - ~raw_svector_ostream() override; + explicit raw_svector_ostream(SmallVectorImpl &O) : OS(O) { + SetUnbuffered(); + } + ~raw_svector_ostream() override {} + void flush() = delete; - /// This is called when the SmallVector we're appending to is changed outside - /// of the raw_svector_ostream's control. It is only safe to do this if the - /// raw_svector_ostream has previously been flushed. - void resync(); - - /// Flushes the stream contents to the target vector and return a StringRef - /// for the vector contents. - StringRef str(); + /// Return a StringRef for the vector contents. + StringRef str() { return StringRef(OS.data(), OS.size()); } }; /// A raw_ostream that discards all output. @@ -541,12 +521,10 @@ class buffer_ostream : public raw_svector_ostream { SmallVector Buffer; public: - buffer_ostream(raw_ostream &OS) : raw_svector_ostream(Buffer, 0), OS(OS) { - init(); - } - ~buffer_ostream() { OS << str(); } + buffer_ostream(raw_ostream &OS) : raw_svector_ostream(Buffer), OS(OS) {} + ~buffer_ostream() override { OS << str(); } }; } // end llvm namespace -#endif +#endif // LLVM_SUPPORT_RAW_OSTREAM_H diff --git a/include/llvm/Support/thread.h b/include/llvm/Support/thread.h new file mode 100644 index 000000000000..2d130418a57f --- /dev/null +++ b/include/llvm/Support/thread.h @@ -0,0 +1,66 @@ +//===-- llvm/Support/thread.h - Wrapper for ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header is a wrapper for that works around problems with the +// MSVC headers when exceptions are disabled. It also provides llvm::thread, +// which is either a typedef of std::thread or a replacement that calls the +// function synchronously depending on the value of LLVM_ENABLE_THREADS. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_THREAD_H +#define LLVM_SUPPORT_THREAD_H + +#include "llvm/Config/llvm-config.h" + +#if LLVM_ENABLE_THREADS + +#ifdef _MSC_VER +// concrt.h depends on eh.h for __uncaught_exception declaration +// even if we disable exceptions. +#include + +// Suppress 'C++ exception handler used, but unwind semantics are not enabled.' +#pragma warning(push) +#pragma warning(disable:4530) +#endif + +#include + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +namespace llvm { +typedef std::thread thread; +} + +#else // !LLVM_ENABLE_THREADS + +#include + +namespace llvm { + +struct thread { + thread() {} + thread(thread &&other) {} + template + explicit thread(Function &&f, Args &&... args) { + f(std::forward(args)...); + } + thread(const thread &) = delete; + + void join() {} +}; + +} + +#endif // LLVM_ENABLE_THREADS + +#endif diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h index 45465aea004b..88385c3fae1e 100644 --- a/include/llvm/Support/type_traits.h +++ b/include/llvm/Support/type_traits.h @@ -93,6 +93,15 @@ struct add_const_past_pointer< } +// If the compiler supports detecting whether a class is final, define +// an LLVM_IS_FINAL macro. If it cannot be defined properly, this +// macro will be left undefined. +#if __cplusplus >= 201402L +#define LLVM_IS_FINAL(Ty) std::is_final() +#elif __has_feature(is_final) || LLVM_GNUC_PREREQ(4, 7, 0) +#define LLVM_IS_FINAL(Ty) __is_final(Ty) +#endif + #ifdef LLVM_DEFINED_HAS_FEATURE #undef __has_feature #endif diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h index b4642c991192..eb1c5c78b9c0 100644 --- a/include/llvm/TableGen/Record.h +++ b/include/llvm/TableGen/Record.h @@ -366,7 +366,7 @@ class TypedInit : public Init { protected: explicit TypedInit(InitKind K, RecTy *T) : Init(K), Ty(T) {} - ~TypedInit() { + ~TypedInit() override { // If this is a DefInit we need to delete the RecordRecTy. if (getKind() == IK_DefInit) delete Ty; @@ -547,7 +547,7 @@ public: class StringInit : public TypedInit { std::string Value; - explicit StringInit(const std::string &V) + explicit StringInit(StringRef V) : TypedInit(IK_StringInit, StringRecTy::get()), Value(V) {} StringInit(const StringInit &Other) = delete; @@ -836,8 +836,6 @@ public: class VarInit : public TypedInit { Init *VarName; - explicit VarInit(const std::string &VN, RecTy *T) - : TypedInit(IK_VarInit, T), VarName(StringInit::get(VN)) {} explicit VarInit(Init *VN, RecTy *T) : TypedInit(IK_VarInit, T), VarName(VN) {} @@ -1589,6 +1587,6 @@ Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass, Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass, const std::string &Name, const std::string &Scoper); -} // End llvm namespace +} // end llvm namespace -#endif +#endif // LLVM_TABLEGEN_RECORD_H diff --git a/include/llvm/Target/CostTable.h b/include/llvm/Target/CostTable.h index 34f6041137c1..2499f5c3189c 100644 --- a/include/llvm/Target/CostTable.h +++ b/include/llvm/Target/CostTable.h @@ -15,64 +15,54 @@ #ifndef LLVM_TARGET_COSTTABLE_H_ #define LLVM_TARGET_COSTTABLE_H_ +#include "llvm/ADT/ArrayRef.h" +#include "llvm/CodeGen/MachineValueType.h" + namespace llvm { /// Cost Table Entry -template struct CostTblEntry { int ISD; - TypeTy Type; + MVT::SimpleValueType Type; unsigned Cost; }; /// Find in cost table, TypeTy must be comparable to CompareTy by == -template -int CostTableLookup(const CostTblEntry *Tbl, unsigned len, int ISD, - CompareTy Ty) { - for (unsigned int i = 0; i < len; ++i) - if (ISD == Tbl[i].ISD && Ty == Tbl[i].Type) - return i; +inline const CostTblEntry *CostTableLookup(ArrayRef Tbl, + int ISD, MVT Ty) { + auto I = std::find_if(Tbl.begin(), Tbl.end(), + [=](const CostTblEntry &Entry) { + return ISD == Entry.ISD && Ty == Entry.Type; }); + if (I != Tbl.end()) + return I; // Could not find an entry. - return -1; -} - -/// Find in cost table, TypeTy must be comparable to CompareTy by == -template -int CostTableLookup(const CostTblEntry(&Tbl)[N], int ISD, - CompareTy Ty) { - return CostTableLookup(Tbl, N, ISD, Ty); + return nullptr; } /// Type Conversion Cost Table -template struct TypeConversionCostTblEntry { int ISD; - TypeTy Dst; - TypeTy Src; + MVT::SimpleValueType Dst; + MVT::SimpleValueType Src; unsigned Cost; }; /// Find in type conversion cost table, TypeTy must be comparable to CompareTy /// by == -template -int ConvertCostTableLookup(const TypeConversionCostTblEntry *Tbl, - unsigned len, int ISD, CompareTy Dst, - CompareTy Src) { - for (unsigned int i = 0; i < len; ++i) - if (ISD == Tbl[i].ISD && Src == Tbl[i].Src && Dst == Tbl[i].Dst) - return i; +inline const TypeConversionCostTblEntry * +ConvertCostTableLookup(ArrayRef Tbl, + int ISD, MVT Dst, MVT Src) { + auto I = std::find_if(Tbl.begin(), Tbl.end(), + [=](const TypeConversionCostTblEntry &Entry) { + return ISD == Entry.ISD && Src == Entry.Src && + Dst == Entry.Dst; + }); + if (I != Tbl.end()) + return I; // Could not find an entry. - return -1; -} - -/// Find in type conversion cost table, TypeTy must be comparable to CompareTy -/// by == -template -int ConvertCostTableLookup(const TypeConversionCostTblEntry(&Tbl)[N], - int ISD, CompareTy Dst, CompareTy Src) { - return ConvertCostTableLookup(Tbl, N, ISD, Dst, Src); + return nullptr; } } // namespace llvm diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index e0aea181a639..79046b2b7352 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -441,6 +441,30 @@ class Instruction { string PostEncoderMethod = ""; string DecoderMethod = ""; + // Is the instruction decoder method able to completely determine if the + // given instruction is valid or not. If the TableGen definition of the + // instruction specifies bitpattern A??B where A and B are static bits, the + // hasCompleteDecoder flag says whether the decoder method fully handles the + // ?? space, i.e. if it is a final arbiter for the instruction validity. + // If not then the decoder attempts to continue decoding when the decoder + // method fails. + // + // This allows to handle situations where the encoding is not fully + // orthogonal. Example: + // * InstA with bitpattern 0b0000????, + // * InstB with bitpattern 0b000000?? but the associated decoder method + // DecodeInstB() returns Fail when ?? is 0b00 or 0b11. + // + // The decoder tries to decode a bitpattern that matches both InstA and + // InstB bitpatterns first as InstB (because it is the most specific + // encoding). In the default case (hasCompleteDecoder = 1), when + // DecodeInstB() returns Fail the bitpattern gets rejected. By setting + // hasCompleteDecoder = 0 in InstB, the decoder is informed that + // DecodeInstB() is not able to determine if all possible values of ?? are + // valid or not. If DecodeInstB() returns Fail the decoder will attempt to + // decode the bitpattern as InstA too. + bit hasCompleteDecoder = 1; + /// Target-specific flags. This becomes the TSFlags field in TargetInstrDesc. bits<64> TSFlags = 0; @@ -595,6 +619,8 @@ class Operand : DAGOperand { string PrintMethod = "printOperand"; string EncoderMethod = ""; string DecoderMethod = ""; + bit hasCompleteDecoder = 1; + string OperandNamespace = "MCOI"; string OperandType = "OPERAND_UNKNOWN"; dag MIOperandInfo = (ops); @@ -910,9 +936,6 @@ class AsmParser { // ShouldEmitMatchRegisterName - Set to false if the target needs a hand // written register name matcher bit ShouldEmitMatchRegisterName = 1; - - /// Does the instruction mnemonic allow '.' - bit MnemonicContainsDot = 0; } def DefaultAsmParser : AsmParser; @@ -940,6 +963,15 @@ class AsmParserVariant { // register tokens as constrained registers, instead of tokens, for the // purposes of matching. string RegisterPrefix = ""; + + // TokenizingCharacters - Characters that are standalone tokens + string TokenizingCharacters = "[]*!"; + + // SeparatorCharacters - Characters that are not tokens + string SeparatorCharacters = " \t,"; + + // BreakCharacters - Characters that start new identifiers + string BreakCharacters = ""; } def DefaultAsmParserVariant : AsmParserVariant; diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h index 9d4e7a04d905..0c6c1f1468c4 100644 --- a/include/llvm/Target/TargetCallingConv.h +++ b/include/llvm/Target/TargetCallingConv.h @@ -46,6 +46,8 @@ namespace ISD { static const uint64_t SplitOffs = 11; static const uint64_t InAlloca = 1ULL<<12; ///< Passed with inalloca static const uint64_t InAllocaOffs = 12; + static const uint64_t SplitEnd = 1ULL<<13; ///< Last part of a split + static const uint64_t SplitEndOffs = 13; static const uint64_t OrigAlign = 0x1FULL<<27; static const uint64_t OrigAlignOffs = 27; static const uint64_t ByValSize = 0x3fffffffULL<<32; ///< Struct size @@ -103,6 +105,9 @@ namespace ISD { bool isSplit() const { return Flags & Split; } void setSplit() { Flags |= One << SplitOffs; } + bool isSplitEnd() const { return Flags & SplitEnd; } + void setSplitEnd() { Flags |= One << SplitEndOffs; } + unsigned getOrigAlign() const { return (unsigned) ((One << ((Flags & OrigAlign) >> OrigAlignOffs)) / 2); diff --git a/include/llvm/Target/TargetFrameLowering.h b/include/llvm/Target/TargetFrameLowering.h index 3af2227410f7..cadd07d71f12 100644 --- a/include/llvm/Target/TargetFrameLowering.h +++ b/include/llvm/Target/TargetFrameLowering.h @@ -70,6 +70,18 @@ public: /// unsigned getStackAlignment() const { return StackAlignment; } + /// alignSPAdjust - This method aligns the stack adjustment to the correct + /// alignment. + /// + int alignSPAdjust(int SPAdj) const { + if (SPAdj < 0) { + SPAdj = -RoundUpToAlignment(-SPAdj, StackAlignment); + } else { + SPAdj = RoundUpToAlignment(SPAdj, StackAlignment); + } + return SPAdj; + } + /// getTransientStackAlignment - This method returns the number of bytes to /// which the stack pointer must be aligned at all times, even between /// calls. @@ -84,6 +96,11 @@ public: return StackRealignable; } + /// Return the skew that has to be applied to stack alignment under + /// certain conditions (e.g. stack was adjusted before function \p MF + /// was called). + virtual unsigned getStackAlignmentSkew(const MachineFunction &MF) const; + /// getOffsetOfLocalArea - This method returns the offset of the local area /// from the stack pointer on entrance to a function. /// @@ -129,6 +146,11 @@ public: return false; } + /// Returns true if the target will correctly handle shrink wrapping. + virtual bool enableShrinkWrapping(const MachineFunction &MF) const { + return false; + } + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. virtual void emitPrologue(MachineFunction &MF, @@ -136,6 +158,10 @@ public: virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const = 0; + /// Replace a StackProbe stub (if any) with the actual probe code inline + virtual void inlineStackProbe(MachineFunction &MF, + MachineBasicBlock &PrologueMBB) const {} + /// Adjust the prologue to have the function use segmented stacks. This works /// by adding a check even before the "normal" function prologue. virtual void adjustForSegmentedStacks(MachineFunction &MF, @@ -207,10 +233,6 @@ public: // has any stack objects. However, targets may want to override this. virtual bool needsFrameIndexResolution(const MachineFunction &MF) const; - /// getFrameIndexOffset - Returns the displacement from the frame register to - /// the stack frame of the specified index. - virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const; - /// getFrameIndexReference - This method should return the base register /// and offset used to reference a frame index location. The offset is /// returned directly, and the base register is returned via FrameReg. @@ -218,10 +240,11 @@ public: unsigned &FrameReg) const; /// Same as above, except that the 'base register' will always be RSP, not - /// RBP on x86. This is used exclusively for lowering STATEPOINT nodes. + /// RBP on x86. This is generally used for emitting statepoint or EH tables + /// that use offsets from RSP. /// TODO: This should really be a parameterizable choice. virtual int getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI, - unsigned &FrameReg) const { + unsigned &FrameReg) const { // default to calling normal version, we override this on x86 only llvm_unreachable("unimplemented for non-x86"); return 0; @@ -246,6 +269,10 @@ public: RegScavenger *RS = nullptr) const { } + virtual unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const { + report_fatal_error("WinEH not implemented for this target"); + } + /// eliminateCallFramePseudoInstr - This method is called during prolog/epilog /// code insertion to eliminate call frame setup and destroy pseudo /// instructions (but only if the Target is using them). It is responsible diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 8b314f454b18..0cebcf1c6b5d 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Target/TargetRegisterInfo.h" namespace llvm { @@ -38,7 +39,6 @@ class SelectionDAG; class ScheduleDAG; class TargetRegisterClass; class TargetRegisterInfo; -class BranchProbability; class TargetSubtargetInfo; class TargetSchedModel; class DFAPacketizer; @@ -54,13 +54,18 @@ class TargetInstrInfo : public MCInstrInfo { TargetInstrInfo(const TargetInstrInfo &) = delete; void operator=(const TargetInstrInfo &) = delete; public: - TargetInstrInfo(unsigned CFSetupOpcode = ~0u, unsigned CFDestroyOpcode = ~0u) - : CallFrameSetupOpcode(CFSetupOpcode), - CallFrameDestroyOpcode(CFDestroyOpcode) { - } + TargetInstrInfo(unsigned CFSetupOpcode = ~0u, unsigned CFDestroyOpcode = ~0u, + unsigned CatchRetOpcode = ~0u) + : CallFrameSetupOpcode(CFSetupOpcode), + CallFrameDestroyOpcode(CFDestroyOpcode), + CatchRetOpcode(CatchRetOpcode) {} virtual ~TargetInstrInfo(); + static bool isGenericOpcode(unsigned Opc) { + return Opc <= TargetOpcode::GENERIC_OP_END; + } + /// Given a machine instruction descriptor, returns the register /// class constraint for OpNum, or NULL. const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, @@ -94,6 +99,41 @@ protected: return false; } + /// This method commutes the operands of the given machine instruction MI. + /// The operands to be commuted are specified by their indices OpIdx1 and + /// OpIdx2. + /// + /// If a target has any instructions that are commutable but require + /// converting to different instructions or making non-trivial changes + /// to commute them, this method can be overloaded to do that. + /// The default implementation simply swaps the commutable operands. + /// + /// If NewMI is false, MI is modified in place and returned; otherwise, a + /// new machine instruction is created and returned. + /// + /// Do not call this method for a non-commutable instruction. + /// Even though the instruction is commutable, the method may still + /// fail to commute the operands, null pointer is returned in such cases. + virtual MachineInstr *commuteInstructionImpl(MachineInstr *MI, + bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const; + + /// Assigns the (CommutableOpIdx1, CommutableOpIdx2) pair of commutable + /// operand indices to (ResultIdx1, ResultIdx2). + /// One or both input values of the pair: (ResultIdx1, ResultIdx2) may be + /// predefined to some indices or be undefined (designated by the special + /// value 'CommuteAnyOperandIndex'). + /// The predefined result indices cannot be re-defined. + /// The function returns true iff after the result pair redefinition + /// the fixed result pair is equal to or equivalent to the source pair of + /// indices: (CommutableOpIdx1, CommutableOpIdx2). It is assumed here that + /// the pairs (x,y) and (y,x) are equivalent. + static bool fixCommutedOpIndices(unsigned &ResultIdx1, + unsigned &ResultIdx2, + unsigned CommutableOpIdx1, + unsigned CommutableOpIdx2); + private: /// For instructions with opcodes for which the M_REMATERIALIZABLE flag is /// set and the target hook isReallyTriviallyReMaterializable returns false, @@ -111,6 +151,8 @@ public: unsigned getCallFrameSetupOpcode() const { return CallFrameSetupOpcode; } unsigned getCallFrameDestroyOpcode() const { return CallFrameDestroyOpcode; } + unsigned getCatchReturnOpcode() const { return CatchRetOpcode; } + /// Returns the actual stack pointer adjustment made by an instruction /// as part of a call sequence. By default, only call frame setup/destroy /// instructions adjust the stack, but targets may want to override this @@ -250,20 +292,51 @@ public: return nullptr; } - /// If a target has any instructions that are commutable but require - /// converting to different instructions or making non-trivial changes to - /// commute them, this method can overloaded to do that. - /// The default implementation simply swaps the commutable operands. - /// If NewMI is false, MI is modified in place and returned; otherwise, a - /// new machine instruction is created and returned. Do not call this - /// method for a non-commutable instruction, but there may be some cases - /// where this method fails and returns null. - virtual MachineInstr *commuteInstruction(MachineInstr *MI, - bool NewMI = false) const; + // This constant can be used as an input value of operand index passed to + // the method findCommutedOpIndices() to tell the method that the + // corresponding operand index is not pre-defined and that the method + // can pick any commutable operand. + static const unsigned CommuteAnyOperandIndex = ~0U; - /// If specified MI is commutable, return the two operand indices that would - /// swap value. Return false if the instruction - /// is not in a form which this routine understands. + /// This method commutes the operands of the given machine instruction MI. + /// + /// The operands to be commuted are specified by their indices OpIdx1 and + /// OpIdx2. OpIdx1 and OpIdx2 arguments may be set to a special value + /// 'CommuteAnyOperandIndex', which means that the method is free to choose + /// any arbitrarily chosen commutable operand. If both arguments are set to + /// 'CommuteAnyOperandIndex' then the method looks for 2 different commutable + /// operands; then commutes them if such operands could be found. + /// + /// If NewMI is false, MI is modified in place and returned; otherwise, a + /// new machine instruction is created and returned. + /// + /// Do not call this method for a non-commutable instruction or + /// for non-commuable operands. + /// Even though the instruction is commutable, the method may still + /// fail to commute the operands, null pointer is returned in such cases. + MachineInstr * + commuteInstruction(MachineInstr *MI, + bool NewMI = false, + unsigned OpIdx1 = CommuteAnyOperandIndex, + unsigned OpIdx2 = CommuteAnyOperandIndex) const; + + /// Returns true iff the routine could find two commutable operands in the + /// given machine instruction. + /// The 'SrcOpIdx1' and 'SrcOpIdx2' are INPUT and OUTPUT arguments. + /// If any of the INPUT values is set to the special value + /// 'CommuteAnyOperandIndex' then the method arbitrarily picks a commutable + /// operand, then returns its index in the corresponding argument. + /// If both of INPUT values are set to 'CommuteAnyOperandIndex' then method + /// looks for 2 commutable operands. + /// If INPUT values refer to some operands of MI, then the method simply + /// returns true if the corresponding operands are commutable and returns + /// false otherwise. + /// + /// For example, calling this method this way: + /// unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex; + /// findCommutedOpIndices(MI, Op1, Op2); + /// can be interpreted as a query asking to find an operand that would be + /// commutable with the operand#1. virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const; @@ -511,7 +584,7 @@ public: virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, - const BranchProbability &Probability) const { + BranchProbability Probability) const { return false; } @@ -526,7 +599,7 @@ public: unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB, unsigned NumFCycles, unsigned ExtraFCycles, - const BranchProbability &Probability) const { + BranchProbability Probability) const { return false; } @@ -538,7 +611,7 @@ public: /// will be properly predicted. virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, - const BranchProbability &Probability) const { + BranchProbability Probability) const { return false; } @@ -724,13 +797,30 @@ public: /// order since the pattern evaluator stops checking as soon as it finds a /// faster sequence. /// \param Root - Instruction that could be combined with one of its operands - /// \param Pattern - Vector of possible combination patterns + /// \param Patterns - Vector of possible combination patterns virtual bool getMachineCombinerPatterns( MachineInstr &Root, - SmallVectorImpl &Pattern) const { + SmallVectorImpl &Patterns) const; + + /// Return true if the input \P Inst is part of a chain of dependent ops + /// that are suitable for reassociation, otherwise return false. + /// If the instruction's operands must be commuted to have a previous + /// instruction of the same type define the first source operand, \P Commuted + /// will be set to true. + bool isReassociationCandidate(const MachineInstr &Inst, bool &Commuted) const; + + /// Return true when \P Inst is both associative and commutative. + virtual bool isAssociativeAndCommutative(const MachineInstr &Inst) const { return false; } + /// Return true when \P Inst has reassociable operands in the same \P MBB. + virtual bool hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const; + + /// Return true when \P Inst has reassociable sibling. + bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted) const; + /// When getMachineCombinerPatterns() finds patterns, this function generates /// the instructions that could replace the original code sequence. The client /// has to decide whether the actual replacement is beneficial or not. @@ -742,12 +832,26 @@ public: /// \param InstrIdxForVirtReg - map of virtual register to instruction in /// InsInstr that defines it virtual void genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern, + MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg) const { + DenseMap &InstrIdxForVirtReg) const; + + /// Attempt to reassociate \P Root and \P Prev according to \P Pattern to + /// reduce critical path length. + void reassociateOps(MachineInstr &Root, MachineInstr &Prev, + MachineCombinerPattern Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const; + + /// This is an architecture-specific helper function of reassociateOps. + /// Set special operand attributes for new instructions after reassociation. + virtual void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, + MachineInstr &NewMI1, + MachineInstr &NewMI2) const { return; - } + }; /// Return true when a target supports MachineCombiner. virtual bool useMachineCombiner() const { return false; } @@ -819,10 +923,6 @@ protected: } public: - /// Returns true for the specified load / store if folding is possible. - virtual bool canFoldMemoryOperand(const MachineInstr *MI, - ArrayRef Ops) const; - /// unfoldMemoryOperand - Separate a single instruction which folded a load or /// a store or a load and a store into two or more instruction. If this is /// possible, returns true as well as the new instructions by reference. @@ -1266,8 +1366,73 @@ public: return 5; } + /// Return an array that contains the ids of the target indices (used for the + /// TargetIndex machine operand) and their names. + /// + /// MIR Serialization is able to serialize only the target indices that are + /// defined by this method. + virtual ArrayRef> + getSerializableTargetIndices() const { + return None; + } + + /// Decompose the machine operand's target flags into two values - the direct + /// target flag value and any of bit flags that are applied. + virtual std::pair + decomposeMachineOperandsTargetFlags(unsigned /*TF*/) const { + return std::make_pair(0u, 0u); + } + + /// Return an array that contains the direct target flag values and their + /// names. + /// + /// MIR Serialization is able to serialize only the target flags that are + /// defined by this method. + virtual ArrayRef> + getSerializableDirectMachineOperandTargetFlags() const { + return None; + } + + /// Return an array that contains the bitmask target flag values and their + /// names. + /// + /// MIR Serialization is able to serialize only the target flags that are + /// defined by this method. + virtual ArrayRef> + getSerializableBitmaskMachineOperandTargetFlags() const { + return None; + } + private: unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; + unsigned CatchRetOpcode; +}; + +/// \brief Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair. +template<> +struct DenseMapInfo { + typedef DenseMapInfo RegInfo; + + static inline TargetInstrInfo::RegSubRegPair getEmptyKey() { + return TargetInstrInfo::RegSubRegPair(RegInfo::getEmptyKey(), + RegInfo::getEmptyKey()); + } + static inline TargetInstrInfo::RegSubRegPair getTombstoneKey() { + return TargetInstrInfo::RegSubRegPair(RegInfo::getTombstoneKey(), + RegInfo::getTombstoneKey()); + } + /// \brief Reuse getHashValue implementation from + /// std::pair. + static unsigned getHashValue(const TargetInstrInfo::RegSubRegPair &Val) { + std::pair PairVal = + std::make_pair(Val.Reg, Val.SubReg); + return DenseMapInfo>::getHashValue(PairVal); + } + static bool isEqual(const TargetInstrInfo::RegSubRegPair &LHS, + const TargetInstrInfo::RegSubRegPair &RHS) { + return RegInfo::isEqual(LHS.Reg, RHS.Reg) && + RegInfo::isEqual(LHS.SubReg, RHS.SubReg); + } }; } // End llvm namespace diff --git a/include/llvm/Target/TargetItinerary.td b/include/llvm/Target/TargetItinerary.td index cc74006dc9fe..a37bbf2474c5 100644 --- a/include/llvm/Target/TargetItinerary.td +++ b/include/llvm/Target/TargetItinerary.td @@ -134,3 +134,19 @@ class ProcessorItineraries fu, list bp, // info. Subtargets using NoItineraries can bypass the scheduler's // expensive HazardRecognizer because no reservation table is needed. def NoItineraries : ProcessorItineraries<[], [], []>; + +//===----------------------------------------------------------------------===// +// Combo Function Unit data - This is a map of combo function unit names to +// the list of functional units that are included in the combination. +// +class ComboFuncData funclist> { + FuncUnit TheComboFunc = ComboFunc; + list FuncList = funclist; +} + +//===----------------------------------------------------------------------===// +// Combo Function Units - This is a list of all combo function unit data. +class ComboFuncUnits cfd> { + list CFD = cfd; +} + diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 4412d9b3c68e..140c36591acc 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -83,20 +83,22 @@ class TargetLoweringBase { public: /// This enum indicates whether operations are valid for a target, and if not, /// what action should be used to make them valid. - enum LegalizeAction { + enum LegalizeAction : uint8_t { Legal, // The target natively supports this operation. Promote, // This operation should be executed in a larger type. Expand, // Try to expand this to other ops, otherwise use a libcall. + LibCall, // Don't try to expand this to other ops, always use a libcall. Custom // Use the LowerOperation hook to implement custom lowering. }; /// This enum indicates whether a types are legal for a target, and if not, /// what action should be used to make them valid. - enum LegalizeTypeAction { + enum LegalizeTypeAction : uint8_t { TypeLegal, // The target natively supports this type. TypePromoteInteger, // Replace this integer with a larger one. TypeExpandInteger, // Split this integer into two of half the size. - TypeSoftenFloat, // Convert this float to a same size integer type. + TypeSoftenFloat, // Convert this float to a same size integer type, + // if an operation is not supported in target HW. TypeExpandFloat, // Split this float into two of half the size. TypeScalarizeVector, // Replace this one-element vector with its element. TypeSplitVector, // Split this vector into two of half the size. @@ -124,16 +126,17 @@ public: // mask (ex: x86 blends). }; - /// Enum that specifies what a AtomicRMWInst is expanded to, if at all. Exists - /// because different targets have different levels of support for these - /// atomic RMW instructions, and also have different options w.r.t. what they - /// should expand to. - enum class AtomicRMWExpansionKind { - None, // Don't expand the instruction. - LLSC, // Expand the instruction into loadlinked/storeconditional; used - // by ARM/AArch64. Implies `hasLoadLinkedStoreConditional` - // returns true. - CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. + /// Enum that specifies what an atomic load/AtomicRMWInst is expanded + /// to, if at all. Exists because different targets have different levels of + /// support for these atomic instructions, and also have different options + /// w.r.t. what they should expand to. + enum class AtomicExpansionKind { + None, // Don't expand the instruction. + LLSC, // Expand the instruction into loadlinked/storeconditional; used + // by ARM/AArch64. + LLOnly, // Expand the (load) instruction into just a load-linked, which has + // greater atomic guarantees than a normal load. + CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. }; static ISD::NodeType getExtendForContent(BooleanContent Content) { @@ -226,7 +229,11 @@ public: /// Return true if integer divide is usually cheaper than a sequence of /// several shifts, adds, and multiplies for this target. - bool isIntDivCheap() const { return IntDivIsCheap; } + /// The definition of "cheaper" may depend on whether we're optimizing + /// for speed or for size. + virtual bool isIntDivCheap(EVT VT, AttributeSet Attr) const { + return false; + } /// Return true if sqrt(x) is as cheap or cheaper than 1 / rsqrt(x) bool isFsqrtCheap() const { @@ -242,9 +249,6 @@ public: return BypassSlowDivWidths; } - /// Return true if pow2 sdiv is cheaper than a chain of sra/srl/add/sra. - bool isPow2SDivCheap() const { return Pow2SDivIsCheap; } - /// Return true if Flow Control is an expensive operation that should be /// avoided. bool isJumpExpensive() const { return JumpIsExpensive; } @@ -409,20 +413,20 @@ public: class ValueTypeActionImpl { /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum /// that indicates how instruction selection should deal with the type. - uint8_t ValueTypeActions[MVT::LAST_VALUETYPE]; + LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE]; public: ValueTypeActionImpl() { - std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), 0); + std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), + TypeLegal); } LegalizeTypeAction getTypeAction(MVT VT) const { - return (LegalizeTypeAction)ValueTypeActions[VT.SimpleTy]; + return ValueTypeActions[VT.SimpleTy]; } void setTypeAction(MVT VT, LegalizeTypeAction Action) { - unsigned I = VT.SimpleTy; - ValueTypeActions[I] = Action; + ValueTypeActions[VT.SimpleTy] = Action; } }; @@ -546,8 +550,7 @@ public: // If a target-specific SDNode requires legalization, require the target // to provide custom legalization for it. if (Op > array_lengthof(OpActions[0])) return Custom; - unsigned I = (unsigned) VT.getSimpleVT().SimpleTy; - return (LegalizeAction)OpActions[I][Op]; + return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; } /// Return true if the specified operation is legal on this target or can be @@ -591,7 +594,7 @@ public: unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!"); - return (LegalizeAction)LoadExtActions[ValI][MemI][ExtType]; + return LoadExtActions[ValI][MemI][ExtType]; } /// Return true if the specified load with extension is legal on this target. @@ -617,7 +620,7 @@ public: unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!"); - return (LegalizeAction)TruncStoreActions[ValI][MemI]; + return TruncStoreActions[ValI][MemI]; } /// Return true if the specified store with truncation is legal on this @@ -672,9 +675,9 @@ public: ((unsigned)VT.SimpleTy >> 4) < array_lengthof(CondCodeActions[0]) && "Table isn't big enough!"); // See setCondCodeAction for how this is encoded. - uint32_t Shift = 2 * (VT.SimpleTy & 0xF); - uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 4]; - LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0x3); + uint32_t Shift = 4 * (VT.SimpleTy & 0x7); + uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3]; + LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF); assert(Action != Promote && "Can't promote condition code!"); return Action; } @@ -832,6 +835,10 @@ public: return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7)); } + unsigned getGatherAllAliasesMaxDepth() const { + return GatherAllAliasesMaxDepth; + } + /// \brief Get maximum # of store operations permitted for llvm.memset /// /// This function returns the maximum number of store operations permitted @@ -878,6 +885,14 @@ public: return false; } + /// Return true if the target supports a memory access of this type for the + /// given address space and alignment. If the access is allowed, the optional + /// final parameter returns if the access is also fast (as defined by the + /// target). + bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, + unsigned AddrSpace = 0, unsigned Alignment = 1, + bool *Fast = nullptr) const; + /// Returns the target specific optimal type for load and store operations as /// a result of memset, memcpy, and memmove lowering. /// @@ -930,15 +945,19 @@ public: } /// If a physical register, this returns the register that receives the - /// exception address on entry to a landing pad. - unsigned getExceptionPointerRegister() const { - return ExceptionPointerRegister; + /// exception address on entry to an EH pad. + virtual unsigned + getExceptionPointerRegister(const Constant *PersonalityFn) const { + // 0 is guaranteed to be the NoRegister value on all targets + return 0; } /// If a physical register, this returns the register that receives the /// exception typeid on entry to a landing pad. - unsigned getExceptionSelectorRegister() const { - return ExceptionSelectorRegister; + virtual unsigned + getExceptionSelectorRegister(const Constant *PersonalityFn) const { + // 0 is guaranteed to be the NoRegister value on all targets + return 0; } /// Returns the target's jmp_buf size in bytes (if never set, the default is @@ -987,6 +1006,10 @@ public: return false; } + /// If the target has a standard location for the unsafe stack pointer, + /// returns the address of that location. Otherwise, returns nullptr. + virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const; + /// Returns true if a cast between SrcAS and DestAS is a noop. virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { return false; @@ -1009,8 +1032,8 @@ public: int InstructionOpcodeToISD(unsigned Opcode) const; /// Estimate the cost of type-legalization and the legalized type. - std::pair getTypeLegalizationCost(const DataLayout &DL, - Type *Ty) const; + std::pair getTypeLegalizationCost(const DataLayout &DL, + Type *Ty) const; /// @} @@ -1018,10 +1041,6 @@ public: /// \name Helpers for atomic expansion. /// @{ - /// True if AtomicExpandPass should use emitLoadLinked/emitStoreConditional - /// and expand AtomicCmpXchgInst. - virtual bool hasLoadLinkedStoreConditional() const { return false; } - /// Perform a load-linked operation on Addr, returning a "Value *" with the /// corresponding pointee type. This may entail some non-trivial operations to /// truncate or reconstruct types that will be illegal in the backend. See @@ -1093,6 +1112,14 @@ public: } /// @} + // Emits code that executes when the comparison result in the ll/sc + // expansion of a cmpxchg instruction is such that the store-conditional will + // not execute. This makes it possible to balance out the load-linked with + // a dedicated instruction, if desired. + // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would + // be unnecessarily held, except if clrex, inserted by this hook, is executed. + virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {} + /// Returns true if the given (atomic) store should be expanded by the /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input. virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const { @@ -1102,18 +1129,25 @@ public: /// Returns true if arguments should be sign-extended in lib calls. virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { return IsSigned; - } + } - /// Returns true if the given (atomic) load should be expanded by the - /// IR-level AtomicExpand pass into a load-linked instruction - /// (through emitLoadLinked()). - virtual bool shouldExpandAtomicLoadInIR(LoadInst *LI) const { return false; } + /// Returns how the given (atomic) load should be expanded by the + /// IR-level AtomicExpand pass. + virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { + return AtomicExpansionKind::None; + } + + /// Returns true if the given atomic cmpxchg should be expanded by the + /// IR-level AtomicExpand pass into a load-linked/store-conditional sequence + /// (through emitLoadLinked() and emitStoreConditional()). + virtual bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { + return false; + } /// Returns how the IR-level AtomicExpand pass should expand the given /// AtomicRMW, if at all. Default is to never expand. - virtual AtomicRMWExpansionKind - shouldExpandAtomicRMWInIR(AtomicRMWInst *) const { - return AtomicRMWExpansionKind::None; + virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const { + return AtomicExpansionKind::None; } /// On some platforms, an AtomicRMW that never actually modifies the value @@ -1204,18 +1238,6 @@ protected: StackPointerRegisterToSaveRestore = R; } - /// If set to a physical register, this sets the register that receives the - /// exception address on entry to a landing pad. - void setExceptionPointerRegister(unsigned R) { - ExceptionPointerRegister = R; - } - - /// If set to a physical register, this sets the register that receives the - /// exception typeid on entry to a landing pad. - void setExceptionSelectorRegister(unsigned R) { - ExceptionSelectorRegister = R; - } - /// Tells the code generator not to expand operations into sequences that use /// the select operations if possible. void setSelectIsExpensive(bool isExpensive = true) { @@ -1244,11 +1266,6 @@ protected: /// control. void setJumpIsExpensive(bool isExpensive = true); - /// Tells the code generator that integer divide is expensive, and if - /// possible, should be replaced by an alternate sequence of instructions not - /// containing an integer divide. - void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; } - /// Tells the code generator that fsqrt is cheap, and should not be replaced /// with an alternative sequence of instructions. void setFsqrtIsCheap(bool isCheap = true) { FsqrtIsCheap = isCheap; } @@ -1264,10 +1281,6 @@ protected: BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; } - /// Tells the code generator that it shouldn't generate sra/srl/add/sra for a - /// signed divide by power of two; let the target handle it. - void setPow2SDivIsCheap(bool isCheap = true) { Pow2SDivIsCheap = isCheap; } - /// Add the specified register class as an available regclass for the /// specified value type. This indicates the selector can handle values of /// that class natively. @@ -1279,7 +1292,7 @@ protected: /// Remove all register classes. void clearRegisterClasses() { - memset(RegClassForVT, 0,MVT::LAST_VALUETYPE * sizeof(TargetRegisterClass*)); + std::fill(std::begin(RegClassForVT), std::end(RegClassForVT), nullptr); AvailableRegClasses.clear(); } @@ -1302,7 +1315,7 @@ protected: void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) { assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); - OpActions[(unsigned)VT.SimpleTy][Op] = (uint8_t)Action; + OpActions[(unsigned)VT.SimpleTy][Op] = Action; } /// Indicate that the specified load with extension does not work with the @@ -1311,7 +1324,7 @@ protected: LegalizeAction Action) { assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); - LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy][ExtType] = (uint8_t)Action; + LoadExtActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy][ExtType] = Action; } /// Indicate that the specified truncating store does not work with the @@ -1319,7 +1332,7 @@ protected: void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) { assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); - TruncStoreActions[ValVT.SimpleTy][MemVT.SimpleTy] = (uint8_t)Action; + TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; } /// Indicate that the specified indexed load does or does not work with the @@ -1356,12 +1369,13 @@ protected: LegalizeAction Action) { assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && "Table isn't big enough!"); - /// The lower 5 bits of the SimpleTy index into Nth 2bit set from the 32-bit - /// value and the upper 27 bits index into the second dimension of the array + assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); + /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit + /// value and the upper 29 bits index into the second dimension of the array /// to select what 32-bit value to use. - uint32_t Shift = 2 * (VT.SimpleTy & 0xF); - CondCodeActions[CC][VT.SimpleTy >> 4] &= ~((uint32_t)0x3 << Shift); - CondCodeActions[CC][VT.SimpleTy >> 4] |= (uint32_t)Action << Shift; + uint32_t Shift = 4 * (VT.SimpleTy & 0x7); + CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); + CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; } /// If Opc/OrigVT is specified as being promoted, the promotion code defaults @@ -1504,23 +1518,24 @@ public: return false; } - /// Return true if it's free to truncate a value of type Ty1 to type - /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 + /// Return true if it's free to truncate a value of type FromTy to type + /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 /// by referencing its sub-register AX. - virtual bool isTruncateFree(Type * /*Ty1*/, Type * /*Ty2*/) const { + /// Targets must return false when FromTy <= ToTy. + virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const { return false; } - /// Return true if a truncation from Ty1 to Ty2 is permitted when deciding + /// Return true if a truncation from FromTy to ToTy is permitted when deciding /// whether a call is in tail position. Typically this means that both results /// would be assigned to the same register or stack slot, but it could mean /// the target performs adequate checks of its own before proceeding with the - /// tail call. - virtual bool allowTruncateForTailCall(Type * /*Ty1*/, Type * /*Ty2*/) const { + /// tail call. Targets must return false when FromTy <= ToTy. + virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const { return false; } - virtual bool isTruncateFree(EVT /*VT1*/, EVT /*VT2*/) const { + virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { return false; } @@ -1553,19 +1568,21 @@ public: return isExtFreeImpl(I); } - /// Return true if any actual instruction that defines a value of type Ty1 - /// implicitly zero-extends the value to Ty2 in the result register. + /// Return true if any actual instruction that defines a value of type FromTy + /// implicitly zero-extends the value to ToTy in the result register. /// - /// This does not necessarily include registers defined in unknown ways, such - /// as incoming arguments, or copies from unknown virtual registers. Also, if - /// isTruncateFree(Ty2, Ty1) is true, this does not necessarily apply to - /// truncate instructions. e.g. on x86-64, all instructions that define 32-bit - /// values implicit zero-extend the result out to 64 bits. - virtual bool isZExtFree(Type * /*Ty1*/, Type * /*Ty2*/) const { + /// The function should return true when it is likely that the truncate can + /// be freely folded with an instruction defining a value of FromTy. If + /// the defining instruction is unknown (because you're looking at a + /// function argument, PHI, etc.) then the target may require an + /// explicit truncate, which is not necessarily free, but this function + /// does not deal with those cases. + /// Targets must return false when FromTy >= ToTy. + virtual bool isZExtFree(Type *FromTy, Type *ToTy) const { return false; } - virtual bool isZExtFree(EVT /*VT1*/, EVT /*VT2*/) const { + virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { return false; } @@ -1699,6 +1716,12 @@ public: return false; } + // Return true if it is profitable to use a scalar input to a BUILD_VECTOR + // even if the vector itself has multiple uses. + virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { + return false; + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // @@ -1755,12 +1778,6 @@ private: /// combined with "shift" to BitExtract instructions. bool HasExtractBitsInsn; - /// Tells the code generator not to expand integer divides by constants into a - /// sequence of muls, adds, and shifts. This is a hack until a real cost - /// model is in place. If we ever optimize for size, this will be set to true - /// unconditionally. - bool IntDivIsCheap; - // Don't expand fsqrt with an approximation based on the inverse sqrt. bool FsqrtIsCheap; @@ -1770,10 +1787,6 @@ private: /// div/rem when the operands are positive and less than 256. DenseMap BypassSlowDivWidths; - /// Tells the code generator that it shouldn't generate sra/srl/add/sra for a - /// signed divide by power of two; let the target handle it. - bool Pow2SDivIsCheap; - /// Tells the code generator that it shouldn't generate extra flow control /// instructions and should attempt to combine flow control instructions via /// predication. @@ -1841,14 +1854,6 @@ private: /// llvm.savestack/llvm.restorestack should save and restore. unsigned StackPointerRegisterToSaveRestore; - /// If set to a physical register, this specifies the register that receives - /// the exception address on entry to a landing pad. - unsigned ExceptionPointerRegister; - - /// If set to a physical register, this specifies the register that receives - /// the exception typeid on entry to a landing pad. - unsigned ExceptionSelectorRegister; - /// This indicates the default register class to use for each ValueType the /// target supports natively. const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE]; @@ -1880,17 +1885,17 @@ private: /// operations are Legal (aka, supported natively by the target), but /// operations that are not should be described. Note that operations on /// non-legal value types are not described here. - uint8_t OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END]; + LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END]; /// For each load extension type and each value type, keep a LegalizeAction /// that indicates how instruction selection should deal with a load of a /// specific value type and extension type. - uint8_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE] - [ISD::LAST_LOADEXT_TYPE]; + LegalizeAction LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE] + [ISD::LAST_LOADEXT_TYPE]; /// For each value type pair keep a LegalizeAction that indicates whether a /// truncating store of a specific value type and truncating type is legal. - uint8_t TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; + LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; /// For each indexed mode and each value type, keep a pair of LegalizeAction /// that indicates how instruction selection should deal with the load / @@ -1903,11 +1908,12 @@ private: /// For each condition code (ISD::CondCode) keep a LegalizeAction that /// indicates how instruction selection should deal with the condition code. /// - /// Because each CC action takes up 2 bits, we need to have the array size be + /// Because each CC action takes up 4 bits, we need to have the array size be /// large enough to fit all of the value types. This can be done by rounding - /// up the MVT::LAST_VALUETYPE value to the next multiple of 16. - uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 15) / 16]; + /// up the MVT::LAST_VALUETYPE value to the next multiple of 8. + uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8]; +protected: ValueTypeActionImpl ValueTypeActions; private: @@ -1947,6 +1953,12 @@ protected: /// is[Z|FP]ExtFree of the related types is not true. virtual bool isExtFreeImpl(const Instruction *I) const { return false; } + /// Depth that GatherAllAliases should should continue looking for chain + /// dependencies when trying to find a more preferrable chain. As an + /// approximation, this should be more than the number of consecutive stores + /// expected to be merged. + unsigned GatherAllAliasesMaxDepth; + /// \brief Specify maximum number of store instructions per memset call. /// /// When lowering \@llvm.memset this field specifies the maximum number of @@ -1993,7 +2005,7 @@ protected: unsigned MaxStoresPerMemmove; /// Maximum number of store instructions that may be substituted for a call to - /// memmove, used for functions with OpSize attribute. + /// memmove, used for functions with OptSize attribute. unsigned MaxStoresPerMemmoveOptSize; /// Tells the code generator that select is more expensive than a branch if @@ -2087,9 +2099,9 @@ public: /// Returns a pair of (return value, chain). /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. std::pair makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, - EVT RetVT, const SDValue *Ops, - unsigned NumOps, bool isSigned, - SDLoc dl, bool doesNotReturn = false, + EVT RetVT, ArrayRef Ops, + bool isSigned, SDLoc dl, + bool doesNotReturn = false, bool isReturnValueUsed = true) const; //===--------------------------------------------------------------------===// @@ -2251,6 +2263,29 @@ public: return false; } + /// Return true if the target supports that a subset of CSRs for the given + /// machine function is handled explicitly via copies. + virtual bool supportSplitCSR(MachineFunction *MF) const { + return false; + } + + /// Perform necessary initialization to handle a subset of CSRs explicitly + /// via copies. This function is called at the beginning of instruction + /// selection. + virtual void initializeSplitCSR(MachineBasicBlock *Entry) const { + llvm_unreachable("Not Implemented"); + } + + /// Insert explicit copies in entry and exit blocks. We copy a subset of + /// CSRs to virtual registers in the entry block, and copy them back to + /// physical registers in the exit blocks. This function is called at the end + /// of instruction selection. + virtual void insertCopiesSplitCSR( + MachineBasicBlock *Entry, + const SmallVectorImpl &Exits) const { + llvm_unreachable("Not Implemented"); + } + //===--------------------------------------------------------------------===// // Lowering methods - These methods must be implemented by targets so that // the SelectionDAGBuilder code knows how to lower these. @@ -2726,16 +2761,21 @@ public: SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, std::vector *Created) const; + + /// Targets may override this function to provide custom SDIV lowering for + /// power-of-2 denominators. If the target returns an empty SDValue, LLVM + /// assumes SDIV is expensive and replaces it with a series of other integer + /// operations. virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, - std::vector *Created) const { - return SDValue(); - } + std::vector *Created) const; - /// Indicate whether this target prefers to combine the given number of FDIVs - /// with the same divisor. - virtual bool combineRepeatedFPDivisors(unsigned NumUsers) const { - return false; + /// Indicate whether this target prefers to combine FDIVs with the same + /// divisor. If the transform should never be done, return zero. If the + /// transform should be done, return the minimum number of divisor uses + /// that must exist. + virtual unsigned combineRepeatedFPDivisors() const { + return 0; } /// Hooks for building estimates in place of slower divisions and square @@ -2821,6 +2861,10 @@ public: virtual bool useLoadStackGuardNode() const { return false; } + + /// Lower TLS global address SDNode for target independent emulated TLS model. + virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, + SelectionDAG &DAG) const; }; /// Given an LLVM IR type and return type attributes, compute the return value diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h index 5b626c244ba0..cb52698c58b9 100644 --- a/include/llvm/Target/TargetLoweringObjectFile.h +++ b/include/llvm/Target/TargetLoweringObjectFile.h @@ -42,16 +42,15 @@ class TargetLoweringObjectFile : public MCObjectFileInfo { void operator=(const TargetLoweringObjectFile&) = delete; protected: - const DataLayout *DL; bool SupportIndirectSymViaGOTPCRel; bool SupportGOTPCRelWithOffset; public: MCContext &getContext() const { return *Ctx; } - TargetLoweringObjectFile() : MCObjectFileInfo(), Ctx(nullptr), DL(nullptr), - SupportIndirectSymViaGOTPCRel(false), - SupportGOTPCRelWithOffset(true) {} + TargetLoweringObjectFile() + : MCObjectFileInfo(), Ctx(nullptr), SupportIndirectSymViaGOTPCRel(false), + SupportGOTPCRelWithOffset(true) {} virtual ~TargetLoweringObjectFile(); @@ -60,8 +59,7 @@ public: /// implementations a chance to set up their default sections. virtual void Initialize(MCContext &ctx, const TargetMachine &TM); - virtual void emitPersonalityValue(MCStreamer &Streamer, - const TargetMachine &TM, + virtual void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM, const MCSymbol *Sym) const; /// Emit the module flags that the platform cares about. @@ -71,7 +69,8 @@ public: /// Given a constant with the SectionKind, return a section that it should be /// placed in. - virtual MCSection *getSectionForConstant(SectionKind Kind, + virtual MCSection *getSectionForConstant(const DataLayout &DL, + SectionKind Kind, const Constant *C) const; /// Classify the specified global variable into a set of target independent @@ -94,8 +93,7 @@ public: } virtual void getNameWithPrefix(SmallVectorImpl &OutName, - const GlobalValue *GV, - bool CannotUsePrivateLabel, Mangler &Mang, + const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM) const; virtual MCSection *getSectionForJumpTable(const Function &F, Mangler &Mang, diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index f1e9d1718f5a..74e91b5790cb 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -76,7 +76,12 @@ protected: // Can only create subclasses. /// The Target that this machine was created for. const Target &TheTarget; - /// For ABI type size and alignment. + /// DataLayout for the target: keep ABI type size and alignment. + /// + /// The DataLayout is created based on the string representation provided + /// during construction. It is kept here only to avoid reparsing the string + /// but should not really be used during compilation, because it has an + /// internal cache that is context specific. const DataLayout DL; /// Triple string, CPU name, and target feature strings the TargetMachine @@ -97,6 +102,12 @@ protected: // Can only create subclasses. const MCSubtargetInfo *STI; unsigned RequireStructuredCFG : 1; + unsigned O0WantsFastISel : 1; + + /// This API is here to support the C API, deprecated in 3.7 release. + /// This should never be used outside of legacy existing client. + const DataLayout &getDataLayout() const { return DL; } + friend struct C_API_PRIVATE_ACCESS; public: mutable TargetOptions Options; @@ -125,15 +136,23 @@ public: return *static_cast(getSubtargetImpl(F)); } - /// Deprecated in 3.7, will be removed in 3.8. Use createDataLayout() instead. - /// - /// This method returns a pointer to the DataLayout for the target. It should - /// be unchanging for every subtarget. - const DataLayout *getDataLayout() const { return &DL; } - /// Create a DataLayout. const DataLayout createDataLayout() const { return DL; } + /// Test if a DataLayout if compatible with the CodeGen for this target. + /// + /// The LLVM Module owns a DataLayout that is used for the target independent + /// optimizations and code generation. This hook provides a target specific + /// check on the validity of this DataLayout. + bool isCompatibleDataLayout(const DataLayout &Candidate) const { + return DL == Candidate; + } + + /// Get the pointer size for this target. + /// + /// This is the only time the DataLayout in the TargetMachine is used. + unsigned getPointerSize() const { return DL.getPointerSize(); } + /// \brief Reset the target options based on the function's attributes. // FIXME: Remove TargetOptions that affect per-function code generation // from TargetMachine. @@ -172,6 +191,8 @@ public: void setOptLevel(CodeGenOpt::Level Level) const; void setFastISel(bool Enable) { Options.EnableFastISel = Enable; } + bool getO0WantsFastISel() { return O0WantsFastISel; } + void setO0WantsFastISel(bool Enable) { O0WantsFastISel = Enable; } bool shouldPrintMachineCode() const { return Options.PrintMachineCode; } @@ -234,6 +255,13 @@ public: return true; } + /// True if subtarget inserts the final scheduling pass on its own. + /// + /// Branch relaxation, which must happen after block placement, can + /// on some targets (e.g. SystemZ) expose additional post-RA + /// scheduling opportunities. + virtual bool targetSchedulesPostRAScheduling() const { return false; }; + void getNameWithPrefix(SmallVectorImpl &Name, const GlobalValue *GV, Mangler &Mang, bool MayAlwaysUsePrivate = false) const; MCSymbol *getSymbol(const GlobalValue *GV, Mangler &Mang) const; diff --git a/include/llvm/Target/TargetOpcodes.h b/include/llvm/Target/TargetOpcodes.h index 50197191109d..db37bdb62582 100644 --- a/include/llvm/Target/TargetOpcodes.h +++ b/include/llvm/Target/TargetOpcodes.h @@ -126,8 +126,12 @@ enum { /// Loading instruction that may page fault, bundled with associated /// information on how to handle such a page fault. It is intended to support /// "zero cost" null checks in managed languages by allowing LLVM to fold - /// comparisions into existing memory operations. + /// comparisons into existing memory operations. FAULTING_LOAD_OP = 22, + + /// BUILTIN_OP_END - This must be the last enum value in this list. + /// The target-specific post-isel opcode values start here. + GENERIC_OP_END = FAULTING_LOAD_OP, }; } // end namespace TargetOpcode } // end namespace llvm diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h index d52cb60cf108..d98d0fa0ed5f 100644 --- a/include/llvm/Target/TargetOptions.h +++ b/include/llvm/Target/TargetOptions.h @@ -58,24 +58,53 @@ namespace llvm { }; } + enum class EABI { + Unknown, + Default, // Default means not specified + EABI4, // Target-specific (either 4, 5 or gnu depending on triple). + EABI5, + GNU + }; + + /// Identify a debugger for "tuning" the debug info. + /// + /// The "debugger tuning" concept allows us to present a more intuitive + /// interface that unpacks into different sets of defaults for the various + /// individual feature-flag settings, that suit the preferences of the + /// various debuggers. However, it's worth remembering that debuggers are + /// not the only consumers of debug info, and some variations in DWARF might + /// better be treated as target/platform issues. Fundamentally, + /// o if the feature is useful (or not) to a particular debugger, regardless + /// of the target, that's a tuning decision; + /// o if the feature is useful (or not) on a particular platform, regardless + /// of the debugger, that's a target decision. + /// It's not impossible to see both factors in some specific case. + /// + /// The "tuning" should be used to set defaults for individual feature flags + /// in DwarfDebug; if a given feature has a more specific command-line option, + /// that option should take precedence over the tuning. + enum class DebuggerKind { + Default, // No specific tuning requested. + GDB, // Tune debug info for gdb. + LLDB, // Tune debug info for lldb. + SCE // Tune debug info for SCE targets (e.g. PS4). + }; + class TargetOptions { public: TargetOptions() - : PrintMachineCode(false), - LessPreciseFPMADOption(false), UnsafeFPMath(false), - NoInfsFPMath(false), NoNaNsFPMath(false), - HonorSignDependentRoundingFPMathOption(false), - NoZerosInBSS(false), - GuaranteedTailCallOpt(false), - StackAlignmentOverride(0), + : PrintMachineCode(false), LessPreciseFPMADOption(false), + UnsafeFPMath(false), NoInfsFPMath(false), NoNaNsFPMath(false), + HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false), + GuaranteedTailCallOpt(false), StackAlignmentOverride(0), EnableFastISel(false), PositionIndependentExecutable(false), UseInitArray(false), DisableIntegratedAS(false), CompressDebugSections(false), FunctionSections(false), DataSections(false), UniqueSectionNames(true), TrapUnreachable(false), - FloatABIType(FloatABI::Default), + EmulatedTLS(false), FloatABIType(FloatABI::Default), AllowFPOpFusion(FPOpFusion::Standard), Reciprocals(TargetRecip()), - JTType(JumpTable::Single), - ThreadModel(ThreadModel::POSIX) {} + JTType(JumpTable::Single), ThreadModel(ThreadModel::POSIX), + EABIVersion(EABI::Default), DebuggerTuning(DebuggerKind::Default) {} /// PrintMachineCode - This flag is enabled when the -print-machineinstrs /// option is specified on the command line, and should enable debugging @@ -172,6 +201,10 @@ namespace llvm { /// Emit target-specific trap instruction for 'unreachable' IR instructions. unsigned TrapUnreachable : 1; + /// EmulatedTLS - This flag enables emulated TLS model, using emutls + /// function in the runtime library.. + unsigned EmulatedTLS : 1; + /// FloatABIType - This setting is set by -float-abi=xxx option is specfied /// on the command line. This setting may either be Default, Soft, or Hard. /// Default selects the target's default behavior. Soft selects the ABI for @@ -200,7 +233,7 @@ namespace llvm { /// This class encapsulates options for reciprocal-estimate code generation. TargetRecip Reciprocals; - + /// JTType - This flag specifies the type of jump-instruction table to /// create for functions that have the jumptable attribute. JumpTable::JumpTableType JTType; @@ -209,6 +242,12 @@ namespace llvm { /// for things like atomics ThreadModel::Model ThreadModel; + /// EABIVersion - This flag specifies the EABI version + EABI EABIVersion; + + /// Which debugger to tune for. + DebuggerKind DebuggerTuning; + /// Machine level options. MCTargetOptions MCOptions; }; @@ -231,11 +270,14 @@ inline bool operator==(const TargetOptions &LHS, ARE_EQUAL(PositionIndependentExecutable) && ARE_EQUAL(UseInitArray) && ARE_EQUAL(TrapUnreachable) && + ARE_EQUAL(EmulatedTLS) && ARE_EQUAL(FloatABIType) && ARE_EQUAL(AllowFPOpFusion) && ARE_EQUAL(Reciprocals) && ARE_EQUAL(JTType) && ARE_EQUAL(ThreadModel) && + ARE_EQUAL(EABIVersion) && + ARE_EQUAL(DebuggerTuning) && ARE_EQUAL(MCOptions); #undef ARE_EQUAL } diff --git a/include/llvm/Target/TargetRecip.h b/include/llvm/Target/TargetRecip.h index 4cc3672d758d..210d49324848 100644 --- a/include/llvm/Target/TargetRecip.h +++ b/include/llvm/Target/TargetRecip.h @@ -31,20 +31,20 @@ public: /// Initialize all or part of the operations from command-line options or /// a front end. TargetRecip(const std::vector &Args); - + /// Set whether a particular reciprocal operation is enabled and how many /// refinement steps are needed when using it. Use "all" to set enablement /// and refinement steps for all operations. - void setDefaults(const StringRef &Key, bool Enable, unsigned RefSteps); + void setDefaults(StringRef Key, bool Enable, unsigned RefSteps); /// Return true if the reciprocal operation has been enabled by default or /// from the command-line. Return false if the operation has been disabled /// by default or from the command-line. - bool isEnabled(const StringRef &Key) const; + bool isEnabled(StringRef Key) const; /// Return the number of iterations necessary to refine the /// the result of a machine instruction for the given reciprocal operation. - unsigned getRefinementSteps(const StringRef &Key) const; + unsigned getRefinementSteps(StringRef Key) const; bool operator==(const TargetRecip &Other) const; @@ -52,14 +52,14 @@ private: enum { Uninitialized = -1 }; - + struct RecipParams { int8_t Enabled; int8_t RefinementSteps; - + RecipParams() : Enabled(Uninitialized), RefinementSteps(Uninitialized) {} }; - + std::map RecipMap; typedef std::map::iterator RecipIter; typedef std::map::const_iterator ConstRecipIter; diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index 0ee936a76211..fccaad4705d5 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -21,6 +21,8 @@ #include "llvm/CodeGen/MachineValueType.h" #include "llvm/IR/CallingConv.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Printable.h" #include #include @@ -32,6 +34,24 @@ class RegScavenger; template class SmallVectorImpl; class VirtRegMap; class raw_ostream; +class LiveRegMatrix; + +/// A bitmask representing the covering of a register with sub-registers. +/// +/// This is typically used to track liveness at sub-register granularity. +/// Lane masks for sub-register indices are similar to register units for +/// physical registers. The individual bits in a lane mask can't be assigned +/// any specific meaning. They can be used to check if two sub-register +/// indices overlap. +/// +/// Iff the target has a register such that: +/// +/// getSubReg(Reg, A) overlaps getSubReg(Reg, B) +/// +/// then: +/// +/// (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0 +typedef unsigned LaneBitmask; class TargetRegisterClass { public: @@ -45,7 +65,7 @@ public: const vt_iterator VTs; const uint32_t *SubClassMask; const uint16_t *SuperRegIndices; - const unsigned LaneMask; + const LaneBitmask LaneMask; /// Classes with a higher priority value are assigned first by register /// allocators using a greedy heuristic. The value is in the range [0,63]. const uint8_t AllocationPriority; @@ -54,8 +74,7 @@ public: const sc_iterator SuperClasses; ArrayRef (*OrderFunc)(const MachineFunction&); - /// getID() - Return the register class ID number. - /// + /// Return the register class ID number. unsigned getID() const { return MC->getID(); } /// begin/end - Return all of the registers in this class. @@ -63,46 +82,42 @@ public: iterator begin() const { return MC->begin(); } iterator end() const { return MC->end(); } - /// getNumRegs - Return the number of registers in this class. - /// + /// Return the number of registers in this class. unsigned getNumRegs() const { return MC->getNumRegs(); } - /// getRegister - Return the specified register in the class. - /// + /// Return the specified register in the class. unsigned getRegister(unsigned i) const { return MC->getRegister(i); } - /// contains - Return true if the specified register is included in this - /// register class. This does not include virtual registers. + /// Return true if the specified register is included in this register class. + /// This does not include virtual registers. bool contains(unsigned Reg) const { return MC->contains(Reg); } - /// contains - Return true if both registers are in this class. + /// Return true if both registers are in this class. bool contains(unsigned Reg1, unsigned Reg2) const { return MC->contains(Reg1, Reg2); } - /// getSize - Return the size of the register in bytes, which is also the size + /// Return the size of the register in bytes, which is also the size /// of a stack slot allocated to hold a spilled copy of this register. unsigned getSize() const { return MC->getSize(); } - /// getAlignment - Return the minimum required alignment for a register of - /// this class. + /// Return the minimum required alignment for a register of this class. unsigned getAlignment() const { return MC->getAlignment(); } - /// getCopyCost - Return the cost of copying a value between two registers in - /// this class. A negative number means the register class is very expensive + /// Return the cost of copying a value between two registers in this class. + /// A negative number means the register class is very expensive /// to copy e.g. status flag register classes. int getCopyCost() const { return MC->getCopyCost(); } - /// isAllocatable - Return true if this register class may be used to create - /// virtual registers. + /// Return true if this register class may be used to create virtual + /// registers. bool isAllocatable() const { return MC->isAllocatable(); } - /// hasType - return true if this TargetRegisterClass has the ValueType vt. - /// + /// Return true if this TargetRegisterClass has the ValueType vt. bool hasType(MVT vt) const { for(int i = 0; VTs[i] != MVT::Other; ++i) if (MVT(VTs[i]) == vt) @@ -122,41 +137,39 @@ public: return I; } - /// hasSubClass - return true if the specified TargetRegisterClass + /// Return true if the specified TargetRegisterClass /// is a proper sub-class of this TargetRegisterClass. bool hasSubClass(const TargetRegisterClass *RC) const { return RC != this && hasSubClassEq(RC); } - /// hasSubClassEq - Returns true if RC is a sub-class of or equal to this - /// class. + /// Returns true if RC is a sub-class of or equal to this class. bool hasSubClassEq(const TargetRegisterClass *RC) const { unsigned ID = RC->getID(); return (SubClassMask[ID / 32] >> (ID % 32)) & 1; } - /// hasSuperClass - return true if the specified TargetRegisterClass is a + /// Return true if the specified TargetRegisterClass is a /// proper super-class of this TargetRegisterClass. bool hasSuperClass(const TargetRegisterClass *RC) const { return RC->hasSubClass(this); } - /// hasSuperClassEq - Returns true if RC is a super-class of or equal to this - /// class. + /// Returns true if RC is a super-class of or equal to this class. bool hasSuperClassEq(const TargetRegisterClass *RC) const { return RC->hasSubClassEq(this); } - /// getSubClassMask - Returns a bit vector of subclasses, including this one. + /// Returns a bit vector of subclasses, including this one. /// The vector is indexed by class IDs, see hasSubClassEq() above for how to /// use it. const uint32_t *getSubClassMask() const { return SubClassMask; } - /// getSuperRegIndices - Returns a 0-terminated list of sub-register indices - /// that project some super-register class into this register class. The list - /// has an entry for each Idx such that: + /// Returns a 0-terminated list of sub-register indices that project some + /// super-register class into this register class. The list has an entry for + /// each Idx such that: /// /// There exists SuperRC where: /// For all Reg in SuperRC: @@ -166,23 +179,23 @@ public: return SuperRegIndices; } - /// getSuperClasses - Returns a NULL terminated list of super-classes. The + /// Returns a NULL-terminated list of super-classes. The /// classes are ordered by ID which is also a topological ordering from large /// to small classes. The list does NOT include the current class. sc_iterator getSuperClasses() const { return SuperClasses; } - /// isASubClass - return true if this TargetRegisterClass is a subset + /// Return true if this TargetRegisterClass is a subset /// class of at least one other TargetRegisterClass. bool isASubClass() const { return SuperClasses[0] != nullptr; } - /// getRawAllocationOrder - Returns the preferred order for allocating - /// registers from this register class in MF. The raw order comes directly - /// from the .td file and may include reserved registers that are not - /// allocatable. Register allocators should also make sure to allocate + /// Returns the preferred order for allocating registers from this register + /// class in MF. The raw order comes directly from the .td file and may + /// include reserved registers that are not allocatable. + /// Register allocators should also make sure to allocate /// callee-saved registers only after all the volatiles are used. The /// RegisterClassInfo class provides filtered allocation orders with /// callee-saved registers moved to the end. @@ -200,13 +213,13 @@ public: /// Returns the combination of all lane masks of register in this class. /// The lane masks of the registers are the combination of all lane masks /// of their subregisters. - unsigned getLaneMask() const { + LaneBitmask getLaneMask() const { return LaneMask; } }; -/// TargetRegisterInfoDesc - Extra information, not in MCRegisterDesc, about -/// registers. These are used by codegen, not by MC. +/// Extra information, not in MCRegisterDesc, about registers. +/// These are used by codegen, not by MC. struct TargetRegisterInfoDesc { unsigned CostPerUse; // Extra cost of instructions using register. bool inAllocatableClass; // Register belongs to an allocatable regclass. @@ -232,7 +245,7 @@ private: const TargetRegisterInfoDesc *InfoDesc; // Extra desc array for codegen const char *const *SubRegIndexNames; // Names of subreg indexes. // Pointer to array of lane masks, one per sub-reg index. - const unsigned *SubRegIndexLaneMasks; + const LaneBitmask *SubRegIndexLaneMasks; regclass_iterator RegClassBegin, RegClassEnd; // List of regclasses unsigned CoveringLanes; @@ -242,7 +255,7 @@ protected: regclass_iterator RegClassBegin, regclass_iterator RegClassEnd, const char *const *SRINames, - const unsigned *SRILaneMasks, + const LaneBitmask *SRILaneMasks, unsigned CoveringLanes); virtual ~TargetRegisterInfo(); public: @@ -270,77 +283,74 @@ public: return int(Reg) >= (1 << 30); } - /// stackSlot2Index - Compute the frame index from a register value - /// representing a stack slot. + /// Compute the frame index from a register value representing a stack slot. static int stackSlot2Index(unsigned Reg) { assert(isStackSlot(Reg) && "Not a stack slot"); return int(Reg - (1u << 30)); } - /// index2StackSlot - Convert a non-negative frame index to a stack slot - /// register value. + /// Convert a non-negative frame index to a stack slot register value. static unsigned index2StackSlot(int FI) { assert(FI >= 0 && "Cannot hold a negative frame index."); return FI + (1u << 30); } - /// isPhysicalRegister - Return true if the specified register number is in + /// Return true if the specified register number is in /// the physical register namespace. static bool isPhysicalRegister(unsigned Reg) { assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); return int(Reg) > 0; } - /// isVirtualRegister - Return true if the specified register number is in + /// Return true if the specified register number is in /// the virtual register namespace. static bool isVirtualRegister(unsigned Reg) { assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); return int(Reg) < 0; } - /// virtReg2Index - Convert a virtual register number to a 0-based index. + /// Convert a virtual register number to a 0-based index. /// The first virtual register in a function will get the index 0. static unsigned virtReg2Index(unsigned Reg) { assert(isVirtualRegister(Reg) && "Not a virtual register"); return Reg & ~(1u << 31); } - /// index2VirtReg - Convert a 0-based index to a virtual register number. + /// Convert a 0-based index to a virtual register number. /// This is the inverse operation of VirtReg2IndexFunctor below. static unsigned index2VirtReg(unsigned Index) { return Index | (1u << 31); } - /// getMinimalPhysRegClass - Returns the Register Class of a physical - /// register of the given type, picking the most sub register class of - /// the right type that contains this physreg. + /// Returns the Register Class of a physical register of the given type, + /// picking the most sub register class of the right type that contains this + /// physreg. const TargetRegisterClass * getMinimalPhysRegClass(unsigned Reg, MVT VT = MVT::Other) const; - /// getAllocatableClass - Return the maximal subclass of the given register - /// class that is alloctable, or NULL. + /// Return the maximal subclass of the given register class that is + /// allocatable or NULL. const TargetRegisterClass * getAllocatableClass(const TargetRegisterClass *RC) const; - /// getAllocatableSet - Returns a bitset indexed by register number - /// indicating if a register is allocatable or not. If a register class is - /// specified, returns the subset for the class. + /// Returns a bitset indexed by register number indicating if a register is + /// allocatable or not. If a register class is specified, returns the subset + /// for the class. BitVector getAllocatableSet(const MachineFunction &MF, const TargetRegisterClass *RC = nullptr) const; - /// getCostPerUse - Return the additional cost of using this register instead + /// Return the additional cost of using this register instead /// of other registers in its class. unsigned getCostPerUse(unsigned RegNo) const { return InfoDesc[RegNo].CostPerUse; } - /// isInAllocatableClass - Return true if the register is in the allocation - /// of any register class. + /// Return true if the register is in the allocation of any register class. bool isInAllocatableClass(unsigned RegNo) const { return InfoDesc[RegNo].inAllocatableClass; } - /// getSubRegIndexName - Return the human-readable symbolic target-specific + /// Return the human-readable symbolic target-specific /// name for the specified SubRegIndex. const char *getSubRegIndexName(unsigned SubIdx) const { assert(SubIdx && SubIdx < getNumSubRegIndices() && @@ -348,44 +358,15 @@ public: return SubRegIndexNames[SubIdx-1]; } - /// getSubRegIndexLaneMask - Return a bitmask representing the parts of a - /// register that are covered by SubIdx. + /// Return a bitmask representing the parts of a register that are covered by + /// SubIdx \see LaneBitmask. /// - /// Lane masks for sub-register indices are similar to register units for - /// physical registers. The individual bits in a lane mask can't be assigned - /// any specific meaning. They can be used to check if two sub-register - /// indices overlap. - /// - /// If the target has a register such that: - /// - /// getSubReg(Reg, A) overlaps getSubReg(Reg, B) - /// - /// then: - /// - /// (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0 - /// - /// The converse is not necessarily true. If two lane masks have a common - /// bit, the corresponding sub-registers may not overlap, but it can be - /// assumed that they usually will. /// SubIdx == 0 is allowed, it has the lane mask ~0u. - unsigned getSubRegIndexLaneMask(unsigned SubIdx) const { + LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const { assert(SubIdx < getNumSubRegIndices() && "This is not a subregister index"); return SubRegIndexLaneMasks[SubIdx]; } - /// Returns true if the given lane mask is imprecise. - /// - /// LaneMasks as given by getSubRegIndexLaneMask() have a limited number of - /// bits, so for targets with more than 31 disjunct subregister indices there - /// may be cases where: - /// getSubReg(Reg,A) does not overlap getSubReg(Reg,B) - /// but we still have - /// (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0. - /// This function returns true in those cases. - static bool isImpreciseLaneMask(unsigned LaneMask) { - return LaneMask & 0x80000000u; - } - /// The lane masks returned by getSubRegIndexLaneMask() above can only be /// used to determine if sub-registers overlap - they can't be used to /// determine if a set of sub-registers completely cover another @@ -409,10 +390,10 @@ public: /// /// If (MaskA & ~(MaskB & Covering)) == 0, then SubA is completely covered by /// SubB. - unsigned getCoveringLanes() const { return CoveringLanes; } + LaneBitmask getCoveringLanes() const { return CoveringLanes; } - /// regsOverlap - Returns true if the two registers are equal or alias each - /// other. The registers may be virtual register. + /// Returns true if the two registers are equal or alias each other. + /// The registers may be virtual registers. bool regsOverlap(unsigned regA, unsigned regB) const { if (regA == regB) return true; if (isVirtualRegister(regA) || isVirtualRegister(regB)) @@ -429,7 +410,7 @@ public: return false; } - /// hasRegUnit - Returns true if Reg contains RegUnit. + /// Returns true if Reg contains RegUnit. bool hasRegUnit(unsigned Reg, unsigned RegUnit) const { for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) if (*Units == RegUnit) @@ -437,18 +418,23 @@ public: return false; } - /// getCalleeSavedRegs - Return a null-terminated list of all of the - /// callee saved registers on this target. The register should be in the - /// order of desired callee-save stack frame offset. The first register is - /// closest to the incoming stack pointer if stack grows down, and vice versa. + /// Return a null-terminated list of all of the callee-saved registers on + /// this target. The register should be in the order of desired callee-save + /// stack frame offset. The first register is closest to the incoming stack + /// pointer if stack grows down, and vice versa. /// virtual const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF) const = 0; - /// getCallPreservedMask - Return a mask of call-preserved registers for the - /// given calling convention on the current function. The mask should - /// include all call-preserved aliases. This is used by the register - /// allocator to determine which registers can be live across a call. + virtual const MCPhysReg* + getCalleeSavedRegsViaCopy(const MachineFunction *MF) const { + return nullptr; + } + + /// Return a mask of call-preserved registers for the given calling convention + /// on the current function. The mask should include all call-preserved + /// aliases. This is used by the register allocator to determine which + /// registers can be live across a call. /// /// The mask is an array containing (TRI::getNumRegs()+31)/32 entries. /// A set bit indicates that all bits of the corresponding register are @@ -469,13 +455,18 @@ public: return nullptr; } + /// Return a register mask that clobbers everything. + virtual const uint32_t *getNoPreservedMask() const { + llvm_unreachable("target does not provide no presered mask"); + } + /// Return all the call-preserved register masks defined for this target. virtual ArrayRef getRegMasks() const = 0; virtual ArrayRef getRegMaskNames() const = 0; - /// getReservedRegs - Returns a bitset indexed by physical register number - /// indicating if a register is a special register that has particular uses - /// and should be considered unavailable at all times, e.g. SP, RA. This is + /// Returns a bitset indexed by physical register number indicating if a + /// register is a special register that has particular uses and should be + /// considered unavailable at all times, e.g. SP, RA. This is /// used by register scavenger to determine what registers are free. virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0; @@ -484,14 +475,14 @@ public: /// remove pseudo-registers that should be ignored). virtual void adjustStackMapLiveOutMask(uint32_t *Mask) const { } - /// getMatchingSuperReg - Return a super-register of the specified register + /// Return a super-register of the specified register /// Reg so its sub-register of index SubIdx is Reg. unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const { return MCRegisterInfo::getMatchingSuperReg(Reg, SubIdx, RC->MC); } - /// getMatchingSuperRegClass - Return a subclass of the specified register + /// Return a subclass of the specified register /// class A so that each register in it has a sub-register of the /// specified sub-register index which is in the specified register class B. /// @@ -500,7 +491,16 @@ public: getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const; - /// getSubClassWithSubReg - Returns the largest legal sub-class of RC that + // For a copy-like instruction that defines a register of class DefRC with + // subreg index DefSubReg, reading from another source with class SrcRC and + // subregister SrcSubReg return true if this is a preferrable copy + // instruction or an earlier use should be used. + virtual bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, + unsigned DefSubReg, + const TargetRegisterClass *SrcRC, + unsigned SrcSubReg) const; + + /// Returns the largest legal sub-class of RC that /// supports the sub-register index Idx. /// If no such sub-class exists, return NULL. /// If all registers in RC already have an Idx sub-register, return RC. @@ -518,7 +518,7 @@ public: return RC; } - /// composeSubRegIndices - Return the subregister index you get from composing + /// Return the subregister index you get from composing /// two subregister indices. /// /// The special null sub-register index composes as the identity. @@ -541,10 +541,11 @@ public: /// Transforms a LaneMask computed for one subregister to the lanemask that /// would have been computed when composing the subsubregisters with IdxA /// first. @sa composeSubRegIndices() - unsigned composeSubRegIndexLaneMask(unsigned IdxA, unsigned LaneMask) const { + LaneBitmask composeSubRegIndexLaneMask(unsigned IdxA, + LaneBitmask Mask) const { if (!IdxA) - return LaneMask; - return composeSubRegIndexLaneMaskImpl(IdxA, LaneMask); + return Mask; + return composeSubRegIndexLaneMaskImpl(IdxA, Mask); } /// Debugging helper: dump register in human readable form to dbgs() stream. @@ -558,13 +559,13 @@ protected: } /// Overridden by TableGen in targets that have sub-registers. - virtual unsigned - composeSubRegIndexLaneMaskImpl(unsigned, unsigned) const { + virtual LaneBitmask + composeSubRegIndexLaneMaskImpl(unsigned, LaneBitmask) const { llvm_unreachable("Target has no sub-registers"); } public: - /// getCommonSuperRegClass - Find a common super-register class if it exists. + /// Find a common super-register class if it exists. /// /// Find a register class, SuperRC and two sub-register indices, PreA and /// PreB, such that: @@ -605,44 +606,47 @@ public: return (unsigned)(regclass_end()-regclass_begin()); } - /// getRegClass - Returns the register class associated with the enumeration - /// value. See class MCOperandInfo. + /// Returns the register class associated with the enumeration value. + /// See class MCOperandInfo. const TargetRegisterClass *getRegClass(unsigned i) const { assert(i < getNumRegClasses() && "Register Class ID out of range"); return RegClassBegin[i]; } - /// getRegClassName - Returns the name of the register class. + /// Returns the name of the register class. const char *getRegClassName(const TargetRegisterClass *Class) const { return MCRegisterInfo::getRegClassName(Class->MC); } - /// getCommonSubClass - find the largest common subclass of A and B. Return - /// NULL if there is no common subclass. + /// Find the largest common subclass of A and B. + /// Return NULL if there is no common subclass. + /// The common subclass should contain + /// simple value type SVT if it is not the Any type. const TargetRegisterClass * getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B) const; + const TargetRegisterClass *B, + const MVT::SimpleValueType SVT = + MVT::SimpleValueType::Any) const; - /// getPointerRegClass - Returns a TargetRegisterClass used for pointer - /// values. If a target supports multiple different pointer register classes, + /// Returns a TargetRegisterClass used for pointer values. + /// If a target supports multiple different pointer register classes, /// kind specifies which one is indicated. virtual const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const { llvm_unreachable("Target didn't implement getPointerRegClass!"); } - /// getCrossCopyRegClass - Returns a legal register class to copy a register - /// in the specified class to or from. If it is possible to copy the register - /// directly without using a cross register class copy, return the specified - /// RC. Returns NULL if it is not possible to copy between a two registers of - /// the specified class. + /// Returns a legal register class to copy a register in the specified class + /// to or from. If it is possible to copy the register directly without using + /// a cross register class copy, return the specified RC. Returns NULL if it + /// is not possible to copy between two registers of the specified class. virtual const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const { return RC; } - /// getLargestLegalSuperClass - Returns the largest super class of RC that is - /// legal to use in the current sub-target and has the same spill size. + /// Returns the largest super class of RC that is legal to use in the current + /// sub-target and has the same spill size. /// The returned register class can be used to create virtual registers which /// means that all its registers can be copied and spilled. virtual const TargetRegisterClass * @@ -653,9 +657,9 @@ public: return RC; } - /// getRegPressureLimit - Return the register pressure "high water mark" for - /// the specific register class. The scheduler is in high register pressure - /// mode (for the specific register class) if it goes over the limit. + /// Return the register pressure "high water mark" for the specific register + /// class. The scheduler is in high register pressure mode (for the specific + /// register class) if it goes over the limit. /// /// Note: this is the old register pressure model that relies on a manually /// specified representative register class per value type. @@ -664,6 +668,15 @@ public: return 0; } + /// Return a heuristic for the machine scheduler to compare the profitability + /// of increasing one register pressure set versus another. The scheduler + /// will prefer increasing the register pressure of the set which returns + /// the largest value for this function. + virtual unsigned getRegPressureSetScore(const MachineFunction &MF, + unsigned PSetID) const { + return PSetID; + } + /// Get the weight in units of pressure for this register class. virtual const RegClassWeight &getRegClassWeight( const TargetRegisterClass *RC) const = 0; @@ -709,14 +722,15 @@ public: ArrayRef Order, SmallVectorImpl &Hints, const MachineFunction &MF, - const VirtRegMap *VRM = nullptr) const; + const VirtRegMap *VRM = nullptr, + const LiveRegMatrix *Matrix = nullptr) + const; - /// updateRegAllocHint - A callback to allow target a chance to update - /// register allocation hints when a register is "changed" (e.g. coalesced) - /// to another register. e.g. On ARM, some virtual registers should target - /// register pairs, if one of pair is coalesced to another register, the - /// allocation hint of the other half of the pair should be changed to point - /// to the new register. + /// A callback to allow target a chance to update register allocation hints + /// when a register is "changed" (e.g. coalesced) to another register. + /// e.g. On ARM, some virtual registers should target register pairs, + /// if one of pair is coalesced to another register, the allocation hint of + /// the other half of the pair should be changed to point to the new register. virtual void updateRegAllocHint(unsigned Reg, unsigned NewReg, MachineFunction &MF) const { // Do nothing. @@ -738,73 +752,72 @@ public: /// register if it is available. virtual unsigned getCSRFirstUseCost() const { return 0; } - /// requiresRegisterScavenging - returns true if the target requires (and can - /// make use of) the register scavenger. + /// Returns true if the target requires (and can make use of) the register + /// scavenger. virtual bool requiresRegisterScavenging(const MachineFunction &MF) const { return false; } - /// useFPForScavengingIndex - returns true if the target wants to use - /// frame pointer based accesses to spill to the scavenger emergency spill - /// slot. + /// Returns true if the target wants to use frame pointer based accesses to + /// spill to the scavenger emergency spill slot. virtual bool useFPForScavengingIndex(const MachineFunction &MF) const { return true; } - /// requiresFrameIndexScavenging - returns true if the target requires post - /// PEI scavenging of registers for materializing frame index constants. + /// Returns true if the target requires post PEI scavenging of registers for + /// materializing frame index constants. virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const { return false; } - /// requiresVirtualBaseRegisters - Returns true if the target wants the - /// LocalStackAllocation pass to be run and virtual base registers - /// used for more efficient stack access. + /// Returns true if the target wants the LocalStackAllocation pass to be run + /// and virtual base registers used for more efficient stack access. virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const { return false; } - /// hasReservedSpillSlot - Return true if target has reserved a spill slot in - /// the stack frame of the given function for the specified register. e.g. On - /// x86, if the frame register is required, the first fixed stack object is - /// reserved as its spill slot. This tells PEI not to create a new stack frame + /// Return true if target has reserved a spill slot in the stack frame of + /// the given function for the specified register. e.g. On x86, if the frame + /// register is required, the first fixed stack object is reserved as its + /// spill slot. This tells PEI not to create a new stack frame /// object for the given register. It should be called only after - /// processFunctionBeforeCalleeSavedScan(). + /// determineCalleeSaves(). virtual bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const { return false; } - /// trackLivenessAfterRegAlloc - returns true if the live-ins should be tracked - /// after register allocation. + /// Returns true if the live-ins should be tracked after register allocation. virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const { return false; } - /// needsStackRealignment - true if storage within the function requires the - /// stack pointer to be aligned more than the normal calling convention calls - /// for. - virtual bool needsStackRealignment(const MachineFunction &MF) const { - return false; - } + /// True if the stack can be realigned for the target. + virtual bool canRealignStack(const MachineFunction &MF) const; - /// getFrameIndexInstrOffset - Get the offset from the referenced frame - /// index in the instruction, if there is one. + /// True if storage within the function requires the stack pointer to be + /// aligned more than the normal calling convention calls for. + /// This cannot be overriden by the target, but canRealignStack can be + /// overridden. + bool needsStackRealignment(const MachineFunction &MF) const; + + /// Get the offset from the referenced frame index in the instruction, + /// if there is one. virtual int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { return 0; } - /// needsFrameBaseReg - Returns true if the instruction's frame index - /// reference would be better served by a base register other than FP - /// or SP. Used by LocalStackFrameAllocation to determine which frame index + /// Returns true if the instruction's frame index reference would be better + /// served by a base register other than FP or SP. + /// Used by LocalStackFrameAllocation to determine which frame index /// references it should create new base registers for. virtual bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { return false; } - /// materializeFrameBaseRegister - Insert defining instruction(s) for - /// BaseReg to be a pointer to FrameIdx before insertion point I. + /// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx + /// before insertion point I. virtual void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const { @@ -812,24 +825,23 @@ public: "target"); } - /// resolveFrameIndex - Resolve a frame index operand of an instruction + /// Resolve a frame index operand of an instruction /// to reference the indicated base register plus offset instead. virtual void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const { llvm_unreachable("resolveFrameIndex does not exist on this target"); } - /// isFrameOffsetLegal - Determine whether a given base register plus offset - /// immediate is encodable to resolve a frame index. + /// Determine whether a given base register plus offset immediate is + /// encodable to resolve a frame index. virtual bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg, int64_t Offset) const { llvm_unreachable("isFrameOffsetLegal does not exist on this target"); } - - /// saveScavengerRegister - Spill the register so it can be used by the - /// register scavenger. Return true if the register was spilled, false - /// otherwise. If this function does not spill the register, the scavenger + /// Spill the register so it can be used by the register scavenger. + /// Return true if the register was spilled, false otherwise. + /// If this function does not spill the register, the scavenger /// will instead spill it to the emergency spill slot. /// virtual bool saveScavengerRegister(MachineBasicBlock &MBB, @@ -840,13 +852,13 @@ public: return false; } - /// eliminateFrameIndex - This method must be overriden to eliminate abstract - /// frame indices from instructions which may use them. The instruction - /// referenced by the iterator contains an MO_FrameIndex operand which must be - /// eliminated by this method. This method may modify or replace the - /// specified instruction, as long as it keeps the iterator pointing at the - /// finished product. SPAdj is the SP adjustment due to call frame setup - /// instruction. FIOperandNum is the FI operand number. + /// This method must be overriden to eliminate abstract frame indices from + /// instructions which may use them. The instruction referenced by the + /// iterator contains an MO_FrameIndex operand which must be eliminated by + /// this method. This method may modify or replace the specified instruction, + /// as long as it keeps the iterator pointing at the finished product. + /// SPAdj is the SP adjustment due to call frame setup instruction. + /// FIOperandNum is the FI operand number. virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const = 0; @@ -935,7 +947,6 @@ struct VirtReg2IndexFunctor : public std::unary_function { } }; -/// PrintReg - Helper class for printing registers on a raw_ostream. /// Prints virtual and physical registers with or without a TRI instance. /// /// The format is: @@ -946,24 +957,10 @@ struct VirtReg2IndexFunctor : public std::unary_function { /// %physreg17 - a physical register when no TRI instance given. /// /// Usage: OS << PrintReg(Reg, TRI) << '\n'; -/// -class PrintReg { - const TargetRegisterInfo *TRI; - unsigned Reg; - unsigned SubIdx; -public: - explicit PrintReg(unsigned reg, const TargetRegisterInfo *tri = nullptr, - unsigned subidx = 0) - : TRI(tri), Reg(reg), SubIdx(subidx) {} - void print(raw_ostream&) const; -}; +Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr, + unsigned SubRegIdx = 0); -static inline raw_ostream &operator<<(raw_ostream &OS, const PrintReg &PR) { - PR.print(OS); - return OS; -} - -/// PrintRegUnit - Helper class for printing register units on a raw_ostream. +/// Create Printable object to print register units on a \ref raw_ostream. /// /// Register units are named after their root registers: /// @@ -971,36 +968,14 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const PrintReg &PR) { /// FP0~ST7 - Dual roots. /// /// Usage: OS << PrintRegUnit(Unit, TRI) << '\n'; -/// -class PrintRegUnit { -protected: - const TargetRegisterInfo *TRI; - unsigned Unit; -public: - PrintRegUnit(unsigned unit, const TargetRegisterInfo *tri) - : TRI(tri), Unit(unit) {} - void print(raw_ostream&) const; -}; +Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI); -static inline raw_ostream &operator<<(raw_ostream &OS, const PrintRegUnit &PR) { - PR.print(OS); - return OS; -} +/// \brief Create Printable object to print virtual registers and physical +/// registers on a \ref raw_ostream. +Printable PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI); -/// PrintVRegOrUnit - It is often convenient to track virtual registers and -/// physical register units in the same list. -class PrintVRegOrUnit : protected PrintRegUnit { -public: - PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *tri) - : PrintRegUnit(VRegOrUnit, tri) {} - void print(raw_ostream&) const; -}; - -static inline raw_ostream &operator<<(raw_ostream &OS, - const PrintVRegOrUnit &PR) { - PR.print(OS); - return OS; -} +/// Create Printable object to print LaneBitmasks on a \ref raw_ostream. +Printable PrintLaneMask(LaneBitmask LaneMask); } // End llvm namespace diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index 4abbe3793995..565473658404 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -80,6 +80,11 @@ class SDTCisSameNumEltsAs : SDTypeConstraint { int OtherOperandNum = OtherOp; } +// SDTCisSameSizeAs - The two specified operands have identical size. +class SDTCisSameSizeAs : SDTypeConstraint { + int OtherOperandNum = OtherOp; +} + //===----------------------------------------------------------------------===// // Selection DAG Type Profile definitions. // @@ -186,6 +191,10 @@ def SDTBrind : SDTypeProfile<0, 1, [ // brind SDTCisPtrTy<0> ]>; +def SDTCatchret : SDTypeProfile<0, 2, [ // catchret + SDTCisVT<0, OtherVT>, SDTCisVT<1, OtherVT> +]>; + def SDTNone : SDTypeProfile<0, 0, []>; // ret, trap def SDTLoad : SDTypeProfile<1, 1, [ // load @@ -201,11 +210,12 @@ def SDTIStore : SDTypeProfile<1, 3, [ // indexed store ]>; def SDTMaskedStore: SDTypeProfile<0, 3, [ // masked store - SDTCisPtrTy<0>, SDTCisVec<1>, SDTCisVec<2> + SDTCisPtrTy<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<1, 2> ]>; def SDTMaskedLoad: SDTypeProfile<1, 3, [ // masked load - SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameAs<0, 3> + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>, + SDTCisSameNumEltsAs<0, 2> ]>; def SDTMaskedGather: SDTypeProfile<2, 3, [ // masked gather @@ -387,6 +397,7 @@ def umin : SDNode<"ISD::UMIN" , SDTIntBinOp>; def umax : SDNode<"ISD::UMAX" , SDTIntBinOp>; def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>; +def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>; def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>; def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>; def cttz : SDNode<"ISD::CTTZ" , SDTIntUnaryOp>; @@ -412,6 +423,8 @@ def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp>; def fabs : SDNode<"ISD::FABS" , SDTFPUnaryOp>; def fminnum : SDNode<"ISD::FMINNUM" , SDTFPBinOp>; def fmaxnum : SDNode<"ISD::FMAXNUM" , SDTFPBinOp>; +def fminnan : SDNode<"ISD::FMINNAN" , SDTFPBinOp>; +def fmaxnan : SDNode<"ISD::FMAXNAN" , SDTFPBinOp>; def fgetsign : SDNode<"ISD::FGETSIGN" , SDTFPToIntOp>; def fneg : SDNode<"ISD::FNEG" , SDTFPUnaryOp>; def fsqrt : SDNode<"ISD::FSQRT" , SDTFPUnaryOp>; @@ -447,6 +460,12 @@ def brcc : SDNode<"ISD::BR_CC" , SDTBrCC, [SDNPHasChain]>; def brcond : SDNode<"ISD::BRCOND" , SDTBrcond, [SDNPHasChain]>; def brind : SDNode<"ISD::BRIND" , SDTBrind, [SDNPHasChain]>; def br : SDNode<"ISD::BR" , SDTBr, [SDNPHasChain]>; +def catchret : SDNode<"ISD::CATCHRET" , SDTCatchret, + [SDNPHasChain, SDNPSideEffect]>; +def cleanupret : SDNode<"ISD::CLEANUPRET" , SDTNone, [SDNPHasChain]>; +def catchpad : SDNode<"ISD::CATCHPAD" , SDTNone, + [SDNPHasChain, SDNPSideEffect]>; + def trap : SDNode<"ISD::TRAP" , SDTNone, [SDNPHasChain, SDNPSideEffect]>; def debugtrap : SDNode<"ISD::DEBUGTRAP" , SDTNone, @@ -513,6 +532,9 @@ def vector_shuffle : SDNode<"ISD::VECTOR_SHUFFLE", SDTVecShuffle, []>; def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>; def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>, []>; + +// vector_extract/vector_insert are deprecated. extractelt/insertelt +// are preferred. def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>, []>; def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT", @@ -523,7 +545,7 @@ def concat_vectors : SDNode<"ISD::CONCAT_VECTORS", // This operator does not do subvector type checking. The ARM // backend, at least, needs it. def vector_extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR", - SDTypeProfile<1, 2, [SDTCisInt<2>, SDTCisVec<1>, SDTCisVec<0>]>, + SDTypeProfile<1, 2, [SDTCisInt<2>, SDTCisVec<1>, SDTCisVec<0>]>, []>; // This operator does subvector type checking. @@ -815,6 +837,21 @@ def truncstoref64 : PatFrag<(ops node:$val, node:$ptr), return cast(N)->getMemoryVT() == MVT::f64; }]>; +def truncstorevi8 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; + +def truncstorevi16 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i16; +}]>; + +def truncstorevi32 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i32; +}]>; + // indexed store fragments. def istore : PatFrag<(ops node:$val, node:$base, node:$offset), (ist node:$val, node:$base, node:$offset), [{ @@ -889,6 +926,24 @@ def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset), return cast(N)->getMemoryVT() == MVT::f32; }]>; +// nontemporal store fragments. +def nontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->isNonTemporal(); +}]>; + +def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (nontemporalstore node:$val, node:$ptr), [{ + StoreSDNode *St = cast(N); + return St->getAlignment() >= St->getMemoryVT().getStoreSize(); +}]>; + +def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (nontemporalstore node:$val, node:$ptr), [{ + StoreSDNode *St = cast(N); + return St->getAlignment() < St->getMemoryVT().getStoreSize(); +}]>; + // setcc convenience fragments. def setoeq : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETOEQ)>; diff --git a/include/llvm/Target/TargetSelectionDAGInfo.h b/include/llvm/Target/TargetSelectionDAGInfo.h index 53db5aa84292..a7143ac3fa66 100644 --- a/include/llvm/Target/TargetSelectionDAGInfo.h +++ b/include/llvm/Target/TargetSelectionDAGInfo.h @@ -21,7 +21,7 @@ namespace llvm { //===----------------------------------------------------------------------===// -/// TargetSelectionDAGInfo - Targets can subclass this to parameterize the +/// Targets can subclass this to parameterize the /// SelectionDAG lowering and instruction selection process. /// class TargetSelectionDAGInfo { @@ -32,8 +32,8 @@ public: explicit TargetSelectionDAGInfo() = default; virtual ~TargetSelectionDAGInfo(); - /// EmitTargetCodeForMemcpy - Emit target-specific code that performs a - /// memcpy. This can be used by targets to provide code sequences for cases + /// Emit target-specific code that performs a memcpy. + /// This can be used by targets to provide code sequences for cases /// that don't fit the target's parameters for simple loads/stores and can be /// more efficient than using a library call. This function can return a null /// SDValue if the target declines to use custom code and a different @@ -56,8 +56,8 @@ public: return SDValue(); } - /// EmitTargetCodeForMemmove - Emit target-specific code that performs a - /// memmove. This can be used by targets to provide code sequences for cases + /// Emit target-specific code that performs a memmove. + /// This can be used by targets to provide code sequences for cases /// that don't fit the target's parameters for simple loads/stores and can be /// more efficient than using a library call. This function can return a null /// SDValue if the target declines to use custom code and a different @@ -72,8 +72,8 @@ public: return SDValue(); } - /// EmitTargetCodeForMemset - Emit target-specific code that performs a - /// memset. This can be used by targets to provide code sequences for cases + /// Emit target-specific code that performs a memset. + /// This can be used by targets to provide code sequences for cases /// that don't fit the target's parameters for simple stores and can be more /// efficient than using a library call. This function can return a null /// SDValue if the target declines to use custom code and a different @@ -87,11 +87,10 @@ public: return SDValue(); } - /// EmitTargetCodeForMemcmp - Emit target-specific code that performs a - /// memcmp, in cases where that is faster than a libcall. The first - /// returned SDValue is the result of the memcmp and the second is - /// the chain. Both SDValues can be null if a normal libcall should - /// be used. + /// Emit target-specific code that performs a memcmp, in cases where that is + /// faster than a libcall. The first returned SDValue is the result of the + /// memcmp and the second is the chain. Both SDValues can be null if a normal + /// libcall should be used. virtual std::pair EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc dl, SDValue Chain, @@ -101,11 +100,10 @@ public: return std::make_pair(SDValue(), SDValue()); } - /// EmitTargetCodeForMemchr - Emit target-specific code that performs a - /// memchr, in cases where that is faster than a libcall. The first - /// returned SDValue is the result of the memchr and the second is - /// the chain. Both SDValues can be null if a normal libcall should - /// be used. + /// Emit target-specific code that performs a memchr, in cases where that is + /// faster than a libcall. The first returned SDValue is the result of the + /// memchr and the second is the chain. Both SDValues can be null if a normal + /// libcall should be used. virtual std::pair EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Src, SDValue Char, SDValue Length, @@ -113,8 +111,8 @@ public: return std::make_pair(SDValue(), SDValue()); } - /// EmitTargetCodeForStrcpy - Emit target-specific code that performs a - /// strcpy or stpcpy, in cases where that is faster than a libcall. + /// Emit target-specific code that performs a strcpy or stpcpy, in cases + /// where that is faster than a libcall. /// The first returned SDValue is the result of the copy (the start /// of the destination string for strcpy, a pointer to the null terminator /// for stpcpy) and the second is the chain. Both SDValues can be null @@ -128,11 +126,10 @@ public: return std::make_pair(SDValue(), SDValue()); } - /// EmitTargetCodeForStrcmp - Emit target-specific code that performs a - /// strcmp, in cases where that is faster than a libcall. The first - /// returned SDValue is the result of the strcmp and the second is - /// the chain. Both SDValues can be null if a normal libcall should - /// be used. + /// Emit target-specific code that performs a strcmp, in cases where that is + /// faster than a libcall. + /// The first returned SDValue is the result of the strcmp and the second is + /// the chain. Both SDValues can be null if a normal libcall should be used. virtual std::pair EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc dl, SDValue Chain, diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h index 07c0c66bfa18..d50aa4932f8f 100644 --- a/include/llvm/Target/TargetSubtargetInfo.h +++ b/include/llvm/Target/TargetSubtargetInfo.h @@ -15,6 +15,7 @@ #define LLVM_TARGET_TARGETSUBTARGETINFO_H #include "llvm/CodeGen/PBQPRAConstraint.h" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/CodeGen.h" @@ -81,6 +82,11 @@ public: virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return nullptr; } + /// Target can subclass this hook to select a different DAG scheduler. + virtual RegisterScheduler::FunctionPassCtor + getDAGScheduler(CodeGenOpt::Level) const { + return nullptr; + } /// getRegisterInfo - If register information is available, return it. If /// not, return null. This is kept separate from RegInfo until RegInfo has diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h index 2ea47301bb4c..0c374a070ce8 100644 --- a/include/llvm/Transforms/IPO.h +++ b/include/llvm/Transforms/IPO.h @@ -16,9 +16,11 @@ #define LLVM_TRANSFORMS_IPO_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" namespace llvm { +class FunctionInfoIndex; class ModulePass; class Pass; class Function; @@ -84,6 +86,10 @@ ModulePass *createEliminateAvailableExternallyPass(); ModulePass *createGVExtractionPass(std::vector& GVs, bool deleteFn = false); +//===----------------------------------------------------------------------===// +/// This pass performs iterative function importing from other modules. +Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr); + //===----------------------------------------------------------------------===// /// createFunctionInliningPass - Return a new pass object that uses a heuristic /// to inline direct function calls to small functions. @@ -209,6 +215,15 @@ ModulePass *createBarrierNoopPass(); /// to bitsets. ModulePass *createLowerBitSetsPass(); +/// \brief This pass export CFI checks for use by external modules. +ModulePass *createCrossDSOCFIPass(); + +//===----------------------------------------------------------------------===// +// SampleProfilePass - Loads sample profile data from disk and generates +// IR metadata to reflect the profile. +ModulePass *createSampleProfileLoaderPass(); +ModulePass *createSampleProfileLoaderPass(StringRef Name); + } // End llvm namespace #endif diff --git a/include/llvm/Transforms/IPO/ForceFunctionAttrs.h b/include/llvm/Transforms/IPO/ForceFunctionAttrs.h new file mode 100644 index 000000000000..0ff4afe79b0c --- /dev/null +++ b/include/llvm/Transforms/IPO/ForceFunctionAttrs.h @@ -0,0 +1,35 @@ +//===-- ForceFunctionAttrs.h - Force function attrs for debugging ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// Super simple passes to force specific function attrs from the commandline +/// into the IR for debugging purposes. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H +#define LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// Pass which forces specific function attributes into the IR, primarily as +/// a debugging tool. +class ForceFunctionAttrsPass { +public: + static StringRef name() { return "ForceFunctionAttrsPass"; } + PreservedAnalyses run(Module &M); +}; + +/// Create a legacy pass manager instance of a pass to force function attrs. +Pass *createForceFunctionAttrsLegacyPass(); + +} + +#endif // LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H diff --git a/include/llvm/Transforms/IPO/FunctionImport.h b/include/llvm/Transforms/IPO/FunctionImport.h new file mode 100644 index 000000000000..d7707790a017 --- /dev/null +++ b/include/llvm/Transforms/IPO/FunctionImport.h @@ -0,0 +1,43 @@ +//===- llvm/Transforms/IPO/FunctionImport.h - ThinLTO importing -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUNCTIONIMPORT_H +#define LLVM_FUNCTIONIMPORT_H + +#include "llvm/ADT/StringMap.h" +#include + +namespace llvm { +class LLVMContext; +class Module; +class FunctionInfoIndex; + +/// The function importer is automatically importing function from other modules +/// based on the provided summary informations. +class FunctionImporter { + + /// The summaries index used to trigger importing. + const FunctionInfoIndex &Index; + + /// Factory function to load a Module for a given identifier + std::function(StringRef Identifier)> ModuleLoader; + +public: + /// Create a Function Importer. + FunctionImporter( + const FunctionInfoIndex &Index, + std::function(StringRef Identifier)> ModuleLoader) + : Index(Index), ModuleLoader(ModuleLoader) {} + + /// Import functions in Module \p M based on the summary informations. + bool importFunctions(Module &M); +}; +} + +#endif // LLVM_FUNCTIONIMPORT_H diff --git a/include/llvm/Transforms/IPO/InferFunctionAttrs.h b/include/llvm/Transforms/IPO/InferFunctionAttrs.h new file mode 100644 index 000000000000..80afc02c62ae --- /dev/null +++ b/include/llvm/Transforms/IPO/InferFunctionAttrs.h @@ -0,0 +1,38 @@ +//===-- InferFunctionAttrs.h - Infer implicit function attributes ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Interfaces for passes which infer implicit function attributes from the +/// name and signature of function declarations. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_INFERFUNCTIONATTRS_H +#define LLVM_TRANSFORMS_IPO_INFERFUNCTIONATTRS_H + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// A pass which infers function attributes from the names and signatures of +/// function declarations in a module. +class InferFunctionAttrsPass { +public: + static StringRef name() { return "InferFunctionAttrsPass"; } + PreservedAnalyses run(Module &M, AnalysisManager *AM); +}; + +/// Create a legacy pass manager instance of a pass to infer function +/// attributes. +Pass *createInferFunctionAttrsLegacyPass(); + +} + +#endif // LLVM_TRANSFORMS_IPO_INFERFUNCTIONATTRS_H diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h index 6a644ad4a63b..58ef0cbbfb5d 100644 --- a/include/llvm/Transforms/IPO/InlinerPass.h +++ b/include/llvm/Transforms/IPO/InlinerPass.h @@ -20,11 +20,11 @@ #include "llvm/Analysis/CallGraphSCCPass.h" namespace llvm { - class CallSite; - class DataLayout; - class InlineCost; - template - class SmallPtrSet; +class AssumptionCacheTracker; +class CallSite; +class DataLayout; +class InlineCost; +template class SmallPtrSet; /// Inliner - This class contains all of the helper code which is used to /// perform the inlining operations that do not depend on the policy. @@ -84,6 +84,9 @@ private: /// shouldInline - Return true if the inliner should attempt to /// inline at the given CallSite. bool shouldInline(CallSite CS); + +protected: + AssumptionCacheTracker *ACT; }; } // End llvm namespace diff --git a/include/llvm/Transforms/IPO/LowerBitSets.h b/include/llvm/Transforms/IPO/LowerBitSets.h index 55d7d84560a0..e5fb7b98fcb3 100644 --- a/include/llvm/Transforms/IPO/LowerBitSets.h +++ b/include/llvm/Transforms/IPO/LowerBitSets.h @@ -26,8 +26,9 @@ namespace llvm { class DataLayout; -class GlobalVariable; +class GlobalObject; class Value; +class raw_ostream; struct BitSetInfo { // The indices of the set bits in the bitset. @@ -55,8 +56,10 @@ struct BitSetInfo { bool containsGlobalOffset(uint64_t Offset) const; bool containsValue(const DataLayout &DL, - const DenseMap &GlobalLayout, + const DenseMap &GlobalLayout, Value *V, uint64_t COffset = 0) const; + + void print(raw_ostream &OS) const; }; struct BitSetBuilder { diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h index 1334dd0da23c..a4e7bce8ef4a 100644 --- a/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -15,9 +15,11 @@ #ifndef LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H #define LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H +#include #include namespace llvm { +class FunctionInfoIndex; class Pass; class TargetLibraryInfoImpl; class TargetMachine; @@ -81,6 +83,11 @@ public: /// run after everything else. EP_OptimizerLast, + /// EP_VectorizerStart - This extension point allows adding optimization + /// passes before the vectorizer and other highly target specific + /// optimization passes are executed. + EP_VectorizerStart, + /// EP_EnabledOnOptLevel0 - This extension point allows adding passes that /// should not be disabled by O0 optimization level. The passes will be /// inserted after the inlining pass. @@ -109,6 +116,9 @@ public: /// added to the per-module passes. Pass *Inliner; + /// The function summary index to use for function importing. + const FunctionInfoIndex *FunctionIndex; + bool DisableTailCalls; bool DisableUnitAtATime; bool DisableUnrollLoops; diff --git a/include/llvm/Transforms/IPO/StripDeadPrototypes.h b/include/llvm/Transforms/IPO/StripDeadPrototypes.h new file mode 100644 index 000000000000..9dddd12871c4 --- /dev/null +++ b/include/llvm/Transforms/IPO/StripDeadPrototypes.h @@ -0,0 +1,34 @@ +//===-- StripDeadPrototypes.h - Remove unused function declarations -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass loops over all of the functions in the input module, looking for +// dead declarations and removes them. Dead declarations are declarations of +// functions for which no implementation is available (i.e., declarations for +// unused library functions). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_STRIPDEADPROTOTYPES_H +#define LLVM_TRANSFORMS_IPO_STRIPDEADPROTOTYPES_H + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// Pass to remove unused function declarations. +class StripDeadPrototypesPass { +public: + static StringRef name() { return "StripDeadPrototypesPass"; } + PreservedAnalyses run(Module &M); +}; + +} + +#endif // LLVM_TRANSFORMS_IPO_STRIPDEADPROTOTYPES_H diff --git a/include/llvm/Transforms/InstCombine/InstCombineWorklist.h b/include/llvm/Transforms/InstCombine/InstCombineWorklist.h index a6bad343db43..5d2b2d000009 100644 --- a/include/llvm/Transforms/InstCombine/InstCombineWorklist.h +++ b/include/llvm/Transforms/InstCombine/InstCombineWorklist.h @@ -60,13 +60,13 @@ public: /// AddInitialGroup - Add the specified batch of stuff in reverse order. /// which should only be done when the worklist is empty and when the group /// has no duplicates. - void AddInitialGroup(Instruction *const *List, unsigned NumEntries) { + void AddInitialGroup(ArrayRef List) { assert(Worklist.empty() && "Worklist must be empty to add initial group"); - Worklist.reserve(NumEntries+16); - WorklistMap.resize(NumEntries); - DEBUG(dbgs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); - for (unsigned Idx = 0; NumEntries; --NumEntries) { - Instruction *I = List[NumEntries-1]; + Worklist.reserve(List.size()+16); + WorklistMap.resize(List.size()); + DEBUG(dbgs() << "IC: ADDING: " << List.size() << " instrs to worklist\n"); + unsigned Idx = 0; + for (Instruction *I : reverse(List)) { WorklistMap.insert(std::make_pair(I, Idx++)); Worklist.push_back(I); } diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h index 250e3893cb15..38dfeb04ace3 100644 --- a/include/llvm/Transforms/Instrumentation.h +++ b/include/llvm/Transforms/Instrumentation.h @@ -15,6 +15,7 @@ #define LLVM_TRANSFORMS_INSTRUMENTATION_H #include "llvm/ADT/StringRef.h" +#include "llvm/IR/BasicBlock.h" #include #if defined(__GNUC__) && defined(__linux__) && !defined(ANDROID) @@ -33,6 +34,16 @@ inline void *getDFSanRetValTLSPtrForJIT() { namespace llvm { +class TargetMachine; + +/// Instrumentation passes often insert conditional checks into entry blocks. +/// Call this function before splitting the entry block to move instructions +/// that must remain in the entry block up before the split point. Static +/// allocas and llvm.localescape calls, for example, must remain in the entry +/// block. +BasicBlock::iterator PrepareToSplitEntryBlock(BasicBlock &BB, + BasicBlock::iterator IP); + class ModulePass; class FunctionPass; @@ -68,6 +79,11 @@ struct GCOVOptions { ModulePass *createGCOVProfilerPass(const GCOVOptions &Options = GCOVOptions::getDefault()); +// PGO Instrumention +ModulePass *createPGOInstrumentationGenPass(); +ModulePass * +createPGOInstrumentationUsePass(StringRef Filename = StringRef("")); + /// Options for the frontend instrumentation based profiling pass. struct InstrProfOptions { InstrProfOptions() : NoRedZone(false) {} @@ -84,8 +100,10 @@ ModulePass *createInstrProfilingPass( const InstrProfOptions &Options = InstrProfOptions()); // Insert AddressSanitizer (address sanity checking) instrumentation -FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false); -ModulePass *createAddressSanitizerModulePass(bool CompileKernel = false); +FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false, + bool Recover = false); +ModulePass *createAddressSanitizerModulePass(bool CompileKernel = false, + bool Recover = false); // Insert MemorySanitizer instrumentation (detection of uninitialized reads) FunctionPass *createMemorySanitizerPass(int TrackOrigins = 0); @@ -134,7 +152,25 @@ FunctionPass *createBoundsCheckingPass(); /// \brief This pass splits the stack into a safe stack and an unsafe stack to /// protect against stack-based overflow vulnerabilities. -FunctionPass *createSafeStackPass(); +FunctionPass *createSafeStackPass(const TargetMachine *TM = nullptr); + +/// \brief Calculate what to divide by to scale counts. +/// +/// Given the maximum count, calculate a divisor that will scale all the +/// weights to strictly less than UINT32_MAX. +static inline uint64_t calculateCountScale(uint64_t MaxCount) { + return MaxCount < UINT32_MAX ? 1 : MaxCount / UINT32_MAX + 1; +} + +/// \brief Scale an individual branch count. +/// +/// Scale a 64-bit weight down to 32-bits using \c Scale. +/// +static inline uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale) { + uint64_t Scaled = Count / Scale; + assert(Scaled <= UINT32_MAX && "overflow 32-bits"); + return Scaled; +} } // End llvm namespace diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index 4676c95d7cd4..9173de1112f3 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -93,7 +93,7 @@ FunctionPass *createBitTrackingDCEPass(); // // SROA - Replace aggregates or pieces of aggregates with scalar SSA values. // -FunctionPass *createSROAPass(bool RequiresDomTree = true); +FunctionPass *createSROAPass(); //===----------------------------------------------------------------------===// // @@ -161,7 +161,8 @@ Pass *createLoopStrengthReducePass(); // It can also be configured to focus on size optimizations only. // Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset, - bool OnlyOptimizeForSize = false); + bool OnlyOptimizeForSize = false, + bool MergeExternalByDefault = false); //===----------------------------------------------------------------------===// // @@ -405,13 +406,6 @@ FunctionPass *createLowerExpectIntrinsicPass(); // FunctionPass *createPartiallyInlineLibCallsPass(); -//===----------------------------------------------------------------------===// -// -// SampleProfilePass - Loads sample profile data from disk and generates -// IR metadata to reflect the profile. -FunctionPass *createSampleProfileLoaderPass(); -FunctionPass *createSampleProfileLoaderPass(StringRef Name); - //===----------------------------------------------------------------------===// // // ScalarizerPass - Converts vector operations into scalar operations @@ -486,6 +480,12 @@ FunctionPass *createNaryReassociatePass(); // FunctionPass *createLoopDistributePass(); +//===----------------------------------------------------------------------===// +// +// LoopLoadElimination - Perform loop-aware load elimination. +// +FunctionPass *createLoopLoadEliminationPass(); + } // End llvm namespace #endif diff --git a/include/llvm/Transforms/Scalar/ADCE.h b/include/llvm/Transforms/Scalar/ADCE.h new file mode 100644 index 000000000000..f9bc7b77c14a --- /dev/null +++ b/include/llvm/Transforms/Scalar/ADCE.h @@ -0,0 +1,38 @@ +//===- ADCE.h - Aggressive dead code elimination --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the interface for the Aggressive Dead Code Elimination +// pass. This pass optimistically assumes that all instructions are dead until +// proven otherwise, allowing it to eliminate dead computations that other DCE +// passes do not catch, particularly involving loop computations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_ADCE_H +#define LLVM_TRANSFORMS_SCALAR_ADCE_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// A DCE pass that assumes instructions are dead until proven otherwise. +/// +/// This pass eliminates dead code by optimistically assuming that all +/// instructions are dead until proven otherwise. This allows it to eliminate +/// dead computations that other DCE passes do not catch, particularly involving +/// loop computations. +class ADCEPass { +public: + static StringRef name() { return "ADCEPass"; } + PreservedAnalyses run(Function &F); +}; +} + +#endif // LLVM_TRANSFORMS_SCALAR_ADCE_H diff --git a/include/llvm/Transforms/Scalar/SROA.h b/include/llvm/Transforms/Scalar/SROA.h new file mode 100644 index 000000000000..f90cc7b686ba --- /dev/null +++ b/include/llvm/Transforms/Scalar/SROA.h @@ -0,0 +1,129 @@ +//===- SROA.h - Scalar Replacement Of Aggregates ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file provides the interface for LLVM's Scalar Replacement of +/// Aggregates pass. This pass provides both aggregate splitting and the +/// primary SSA formation used in the compiler. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_SROA_H +#define LLVM_TRANSFORMS_SCALAR_SROA_H + +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// A private "module" namespace for types and utilities used by SROA. These +/// are implementation details and should not be used by clients. +namespace sroa { +class AllocaSliceRewriter; +class AllocaSlices; +class Partition; +class SROALegacyPass; +} + +/// \brief An optimization pass providing Scalar Replacement of Aggregates. +/// +/// This pass takes allocations which can be completely analyzed (that is, they +/// don't escape) and tries to turn them into scalar SSA values. There are +/// a few steps to this process. +/// +/// 1) It takes allocations of aggregates and analyzes the ways in which they +/// are used to try to split them into smaller allocations, ideally of +/// a single scalar data type. It will split up memcpy and memset accesses +/// as necessary and try to isolate individual scalar accesses. +/// 2) It will transform accesses into forms which are suitable for SSA value +/// promotion. This can be replacing a memset with a scalar store of an +/// integer value, or it can involve speculating operations on a PHI or +/// select to be a PHI or select of the results. +/// 3) Finally, this will try to detect a pattern of accesses which map cleanly +/// onto insert and extract operations on a vector value, and convert them to +/// this form. By doing so, it will enable promotion of vector aggregates to +/// SSA vector values. +class SROA { + LLVMContext *C; + DominatorTree *DT; + AssumptionCache *AC; + + /// \brief Worklist of alloca instructions to simplify. + /// + /// Each alloca in the function is added to this. Each new alloca formed gets + /// added to it as well to recursively simplify unless that alloca can be + /// directly promoted. Finally, each time we rewrite a use of an alloca other + /// the one being actively rewritten, we add it back onto the list if not + /// already present to ensure it is re-visited. + SetVector> Worklist; + + /// \brief A collection of instructions to delete. + /// We try to batch deletions to simplify code and make things a bit more + /// efficient. + SetVector> DeadInsts; + + /// \brief Post-promotion worklist. + /// + /// Sometimes we discover an alloca which has a high probability of becoming + /// viable for SROA after a round of promotion takes place. In those cases, + /// the alloca is enqueued here for re-processing. + /// + /// Note that we have to be very careful to clear allocas out of this list in + /// the event they are deleted. + SetVector> PostPromotionWorklist; + + /// \brief A collection of alloca instructions we can directly promote. + std::vector PromotableAllocas; + + /// \brief A worklist of PHIs to speculate prior to promoting allocas. + /// + /// All of these PHIs have been checked for the safety of speculation and by + /// being speculated will allow promoting allocas currently in the promotable + /// queue. + SetVector> SpeculatablePHIs; + + /// \brief A worklist of select instructions to speculate prior to promoting + /// allocas. + /// + /// All of these select instructions have been checked for the safety of + /// speculation and by being speculated will allow promoting allocas + /// currently in the promotable queue. + SetVector> SpeculatableSelects; + +public: + SROA() : C(nullptr), DT(nullptr), AC(nullptr) {} + + static StringRef name() { return "SROA"; } + + /// \brief Run the pass over the function. + PreservedAnalyses run(Function &F, AnalysisManager *AM); + +private: + friend class sroa::AllocaSliceRewriter; + friend class sroa::SROALegacyPass; + + /// Helper used by both the public run method and by the legacy pass. + PreservedAnalyses runImpl(Function &F, DominatorTree &RunDT, + AssumptionCache &RunAC); + + bool presplitLoadsAndStores(AllocaInst &AI, sroa::AllocaSlices &AS); + AllocaInst *rewritePartition(AllocaInst &AI, sroa::AllocaSlices &AS, + sroa::Partition &P); + bool splitAlloca(AllocaInst &AI, sroa::AllocaSlices &AS); + bool runOnAlloca(AllocaInst &AI); + void clobberUse(Use &U); + void deleteDeadInstructions(SmallPtrSetImpl &DeletedAllocas); + bool promoteAllocas(Function &F); +}; + +} + +#endif diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h index 9b919b62ee41..13c856dfdc9a 100644 --- a/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -22,7 +22,6 @@ namespace llvm { -class AliasAnalysis; class MemoryDependenceAnalysis; class DominatorTree; class LoopInfo; @@ -40,7 +39,7 @@ void DeleteDeadBlock(BasicBlock *BB); /// any single-entry PHI nodes in it, fold them away. This handles the case /// when all entries to the PHI nodes in a block are guaranteed equal, such as /// when the block has exactly one predecessor. -void FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA = nullptr, +void FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceAnalysis *MemDep = nullptr); /// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it @@ -54,7 +53,6 @@ bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI = nullptr); /// if possible. The return value indicates success or failure. bool MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT = nullptr, LoopInfo *LI = nullptr, - AliasAnalysis *AA = nullptr, MemoryDependenceAnalysis *MemDep = nullptr); // ReplaceInstWithValue - Replace all uses of an instruction (specified by BI) @@ -82,27 +80,15 @@ void ReplaceInstWithInst(Instruction *From, Instruction *To); /// This provides a builder interface for overriding the default options used /// during critical edge splitting. struct CriticalEdgeSplittingOptions { - AliasAnalysis *AA; DominatorTree *DT; LoopInfo *LI; bool MergeIdenticalEdges; bool DontDeleteUselessPHIs; bool PreserveLCSSA; - CriticalEdgeSplittingOptions() - : AA(nullptr), DT(nullptr), LI(nullptr), MergeIdenticalEdges(false), - DontDeleteUselessPHIs(false), PreserveLCSSA(false) {} - - /// \brief Basic case of setting up all the analysis. - CriticalEdgeSplittingOptions(AliasAnalysis *AA, DominatorTree *DT = nullptr, + CriticalEdgeSplittingOptions(DominatorTree *DT = nullptr, LoopInfo *LI = nullptr) - : AA(AA), DT(DT), LI(LI), MergeIdenticalEdges(false), - DontDeleteUselessPHIs(false), PreserveLCSSA(false) {} - - /// \brief A common pattern is to preserve the dominator tree and loop - /// info but not care about AA. - CriticalEdgeSplittingOptions(DominatorTree *DT, LoopInfo *LI) - : AA(nullptr), DT(DT), LI(LI), MergeIdenticalEdges(false), + : DT(DT), LI(LI), MergeIdenticalEdges(false), DontDeleteUselessPHIs(false), PreserveLCSSA(false) {} CriticalEdgeSplittingOptions &setMergeIdenticalEdges() { @@ -214,15 +200,13 @@ BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, /// It will have Suffix+".split_lp". See SplitLandingPadPredecessors for more /// details on this case. /// -/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, -/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. -/// In particular, it does not preserve LoopSimplify (because it's -/// complicated to handle the case where one of the edges being split -/// is an exit of a loop with other exits). +/// This currently updates the LLVM IR, DominatorTree, LoopInfo, and LCCSA but +/// no other analyses. In particular, it does not preserve LoopSimplify +/// (because it's complicated to handle the case where one of the edges being +/// split is an exit of a loop with other exits). /// BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef Preds, const char *Suffix, - AliasAnalysis *AA = nullptr, DominatorTree *DT = nullptr, LoopInfo *LI = nullptr, bool PreserveLCSSA = false); @@ -234,17 +218,15 @@ BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef Preds, /// OrigBB is clone into both of the new basic blocks. The new blocks are given /// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector. /// -/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, -/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular, -/// it does not preserve LoopSimplify (because it's complicated to handle the -/// case where one of the edges being split is an exit of a loop with other -/// exits). +/// This currently updates the LLVM IR, DominatorTree, LoopInfo, and LCCSA but +/// no other analyses. In particular, it does not preserve LoopSimplify +/// (because it's complicated to handle the case where one of the edges being +/// split is an exit of a loop with other exits). /// void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl &NewBBs, - AliasAnalysis *AA = nullptr, DominatorTree *DT = nullptr, LoopInfo *LI = nullptr, bool PreserveLCSSA = false); diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h index 2caa9a2462df..92a1d52f1011 100644 --- a/include/llvm/Transforms/Utils/Cloning.h +++ b/include/llvm/Transforms/Utils/Cloning.h @@ -20,9 +20,11 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Transforms/Utils/ValueMapper.h" +#include namespace llvm { @@ -43,14 +45,21 @@ class DataLayout; class Loop; class LoopInfo; class AllocaInst; -class AliasAnalysis; class AssumptionCacheTracker; class DominatorTree; -/// CloneModule - Return an exact copy of the specified module +/// Return an exact copy of the specified module /// -Module *CloneModule(const Module *M); -Module *CloneModule(const Module *M, ValueToValueMapTy &VMap); +std::unique_ptr CloneModule(const Module *M); +std::unique_ptr CloneModule(const Module *M, ValueToValueMapTy &VMap); + +/// Return a copy of the specified module. The ShouldCloneDefinition function +/// controls whether a specific GlobalValue's definition is cloned. If the +/// function returns false, the module copy will contain an external reference +/// in place of the global definition. +std::unique_ptr +CloneModule(const Module *M, ValueToValueMapTy &VMap, + std::function ShouldCloneDefinition); /// ClonedCodeInfo - This struct can be used to capture information about code /// being cloned, while it is being cloned. @@ -65,6 +74,11 @@ struct ClonedCodeInfo { /// size. bool ContainsDynamicAllocas; + /// All cloned call sites that have operand bundles attached are appended to + /// this vector. This vector may contain nulls or undefs if some of the + /// originally inserted callsites were DCE'ed after they were cloned. + std::vector OperandBundleCallSites; + ClonedCodeInfo() : ContainsCalls(false), ContainsDynamicAllocas(false) {} }; @@ -193,14 +207,12 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, class InlineFunctionInfo { public: explicit InlineFunctionInfo(CallGraph *cg = nullptr, - AliasAnalysis *AA = nullptr, AssumptionCacheTracker *ACT = nullptr) - : CG(cg), AA(AA), ACT(ACT) {} + : CG(cg), ACT(ACT) {} /// CG - If non-null, InlineFunction will update the callgraph to reflect the /// changes it makes. CallGraph *CG; - AliasAnalysis *AA; AssumptionCacheTracker *ACT; /// StaticAllocas - InlineFunction fills this in with all static allocas that @@ -228,11 +240,11 @@ public: /// function by one level. /// bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI, - bool InsertLifetime = true); + AAResults *CalleeAAR = nullptr, bool InsertLifetime = true); bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, - bool InsertLifetime = true); + AAResults *CalleeAAR = nullptr, bool InsertLifetime = true); bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI, - bool InsertLifetime = true); + AAResults *CalleeAAR = nullptr, bool InsertLifetime = true); /// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p /// Blocks. diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index a1bb367ac7b6..81b376f0c212 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -15,6 +15,7 @@ #ifndef LLVM_TRANSFORMS_UTILS_LOCAL_H #define LLVM_TRANSFORMS_UTILS_LOCAL_H +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" @@ -40,7 +41,6 @@ class DataLayout; class TargetLibraryInfo; class TargetTransformInfo; class DIBuilder; -class AliasAnalysis; class DominatorTree; template class SmallVectorImpl; @@ -271,11 +271,34 @@ bool LowerDbgDeclare(Function &F); /// an alloca, if any. DbgDeclareInst *FindAllocaDbgDeclare(Value *V); -/// \brief Replaces llvm.dbg.declare instruction when an alloca is replaced with -/// a new value. If Deref is true, tan additional DW_OP_deref is prepended to -/// the expression. +/// \brief Replaces llvm.dbg.declare instruction when the address it describes +/// is replaced with a new value. If Deref is true, an additional DW_OP_deref is +/// prepended to the expression. If Offset is non-zero, a constant displacement +/// is added to the expression (after the optional Deref). Offset can be +/// negative. +bool replaceDbgDeclare(Value *Address, Value *NewAddress, + Instruction *InsertBefore, DIBuilder &Builder, + bool Deref, int Offset); + +/// \brief Replaces llvm.dbg.declare instruction when the alloca it describes +/// is replaced with a new value. If Deref is true, an additional DW_OP_deref is +/// prepended to the expression. If Offset is non-zero, a constant displacement +/// is added to the expression (after the optional Deref). Offset can be +/// negative. New llvm.dbg.declare is inserted immediately before AI. bool replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, - DIBuilder &Builder, bool Deref); + DIBuilder &Builder, bool Deref, int Offset = 0); + +/// \brief Insert an unreachable instruction before the specified +/// instruction, making it and the rest of the code in the block dead. +void changeToUnreachable(Instruction *I, bool UseLLVMTrap); + +/// Replace 'BB's terminator with one that does not have an unwind successor +/// block. Rewrites `invoke` to `call`, etc. Updates any PHIs in unwind +/// successor. +/// +/// \param BB Block whose terminator will be replaced. Its terminator must +/// have an unwind successor. +void removeUnwindEdge(BasicBlock *BB); /// \brief Remove all blocks that can not be reached from the function's entry. /// @@ -291,6 +314,22 @@ void combineMetadata(Instruction *K, const Instruction *J, ArrayRef Kn /// the given edge. Returns the number of replacements made. unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Edge); +/// \brief Replace each use of 'From' with 'To' if that use is dominated by +/// the given BasicBlock. Returns the number of replacements made. +unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, + const BasicBlock *BB); + + +/// \brief Return true if the CallSite CS calls a gc leaf function. +/// +/// A leaf function is a function that does not safepoint the thread during its +/// execution. During a call or invoke to such a function, the callers stack +/// does not have to be made parseable. +/// +/// Most passes can and should ignore this information, and it is only used +/// during lowering by the GC infrastructure. +bool callsGCLeafFunction(ImmutableCallSite CS); + } // End llvm namespace #endif diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h index 15747bc7f1ac..17aaee03e4a8 100644 --- a/include/llvm/Transforms/Utils/LoopUtils.h +++ b/include/llvm/Transforms/Utils/LoopUtils.h @@ -15,11 +15,11 @@ #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" namespace llvm { -class AliasAnalysis; class AliasSet; class AliasSetTracker; class AssumptionCache; @@ -85,24 +85,35 @@ public: RecurrenceDescriptor() : StartValue(nullptr), LoopExitInstr(nullptr), Kind(RK_NoRecurrence), - MinMaxKind(MRK_Invalid) {} + MinMaxKind(MRK_Invalid), UnsafeAlgebraInst(nullptr), + RecurrenceType(nullptr), IsSigned(false) {} RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K, - MinMaxRecurrenceKind MK) - : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {} + MinMaxRecurrenceKind MK, Instruction *UAI, Type *RT, + bool Signed, SmallPtrSetImpl &CI) + : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK), + UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) { + CastInsts.insert(CI.begin(), CI.end()); + } /// This POD struct holds information about a potential recurrence operation. class InstDesc { public: - InstDesc(bool IsRecur, Instruction *I) - : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid) {} + InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr) + : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid), + UnsafeAlgebraInst(UAI) {} - InstDesc(Instruction *I, MinMaxRecurrenceKind K) - : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K) {} + InstDesc(Instruction *I, MinMaxRecurrenceKind K, Instruction *UAI = nullptr) + : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K), + UnsafeAlgebraInst(UAI) {} bool isRecurrence() { return IsRecurrence; } + bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; } + + Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; } + MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; } Instruction *getPatternInst() { return PatternLastInst; } @@ -115,6 +126,8 @@ public: Instruction *PatternLastInst; // If this is a min/max pattern the comparison predicate. MinMaxRecurrenceKind MinMaxKind; + // Recurrence has unsafe algebra. + Instruction *UnsafeAlgebraInst; }; /// Returns a struct describing if the instruction 'I' can be a recurrence @@ -125,7 +138,7 @@ public: static InstDesc isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, InstDesc &Prev, bool HasFunNoNaNAttr); - /// Returns true if instuction I has multiple uses in Insts + /// Returns true if instruction I has multiple uses in Insts static bool hasMultipleUsesOf(Instruction *I, SmallPtrSetImpl &Insts); @@ -167,6 +180,51 @@ public: Instruction *getLoopExitInstr() { return LoopExitInstr; } + /// Returns true if the recurrence has unsafe algebra which requires a relaxed + /// floating-point model. + bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; } + + /// Returns first unsafe algebra instruction in the PHI node's use-chain. + Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; } + + /// Returns true if the recurrence kind is an integer kind. + static bool isIntegerRecurrenceKind(RecurrenceKind Kind); + + /// Returns true if the recurrence kind is a floating point kind. + static bool isFloatingPointRecurrenceKind(RecurrenceKind Kind); + + /// Returns true if the recurrence kind is an arithmetic kind. + static bool isArithmeticRecurrenceKind(RecurrenceKind Kind); + + /// Determines if Phi may have been type-promoted. If Phi has a single user + /// that ANDs the Phi with a type mask, return the user. RT is updated to + /// account for the narrower bit width represented by the mask, and the AND + /// instruction is added to CI. + static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, + SmallPtrSetImpl &Visited, + SmallPtrSetImpl &CI); + + /// Returns true if all the source operands of a recurrence are either + /// SExtInsts or ZExtInsts. This function is intended to be used with + /// lookThroughAnd to determine if the recurrence has been type-promoted. The + /// source operands are added to CI, and IsSigned is updated to indicate if + /// all source operands are SExtInsts. + static bool getSourceExtensionKind(Instruction *Start, Instruction *Exit, + Type *RT, bool &IsSigned, + SmallPtrSetImpl &Visited, + SmallPtrSetImpl &CI); + + /// Returns the type of the recurrence. This type can be narrower than the + /// actual type of the Phi if the recurrence has been type-promoted. + Type *getRecurrenceType() { return RecurrenceType; } + + /// Returns a reference to the instructions used for type-promoting the + /// recurrence. + SmallPtrSet &getCastInsts() { return CastInsts; } + + /// Returns true if all source operands of the recurrence are SExtInsts. + bool isSigned() { return IsSigned; } + private: // The starting value of the recurrence. // It does not have to be zero! @@ -177,19 +235,74 @@ private: RecurrenceKind Kind; // If this a min/max recurrence the kind of recurrence. MinMaxRecurrenceKind MinMaxKind; + // First occurance of unasfe algebra in the PHI's use-chain. + Instruction *UnsafeAlgebraInst; + // The type of the recurrence. + Type *RecurrenceType; + // True if all source operands of the recurrence are SExtInsts. + bool IsSigned; + // Instructions used for type-promoting the recurrence. + SmallPtrSet CastInsts; }; -BasicBlock *InsertPreheaderForLoop(Loop *L, Pass *P); +/// A struct for saving information about induction variables. +class InductionDescriptor { +public: + /// This enum represents the kinds of inductions that we support. + enum InductionKind { + IK_NoInduction, ///< Not an induction variable. + IK_IntInduction, ///< Integer induction variable. Step = C. + IK_PtrInduction ///< Pointer induction var. Step = C / sizeof(elem). + }; + +public: + /// Default constructor - creates an invalid induction. + InductionDescriptor() + : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {} + + /// Get the consecutive direction. Returns: + /// 0 - unknown or non-consecutive. + /// 1 - consecutive and increasing. + /// -1 - consecutive and decreasing. + int getConsecutiveDirection() const; + + /// Compute the transformed value of Index at offset StartValue using step + /// StepValue. + /// For integer induction, returns StartValue + Index * StepValue. + /// For pointer induction, returns StartValue[Index * StepValue]. + /// FIXME: The newly created binary instructions should contain nsw/nuw + /// flags, which can be found from the original scalar operations. + Value *transform(IRBuilder<> &B, Value *Index) const; + + Value *getStartValue() const { return StartValue; } + InductionKind getKind() const { return IK; } + ConstantInt *getStepValue() const { return StepValue; } + + static bool isInductionPHI(PHINode *Phi, ScalarEvolution *SE, + InductionDescriptor &D); + +private: + /// Private constructor - used by \c isInductionPHI. + InductionDescriptor(Value *Start, InductionKind K, ConstantInt *Step); + + /// Start value. + TrackingVH StartValue; + /// Induction kind. + InductionKind IK; + /// Step value. + ConstantInt *StepValue; +}; + +BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA); /// \brief Simplify each loop in a loop nest recursively. /// /// This takes a potentially un-simplified loop L (and its children) and turns -/// it into a simplified loop nest with preheaders and single backedges. It -/// will optionally update \c AliasAnalysis and \c ScalarEvolution analyses if -/// passed into it. -bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, - AliasAnalysis *AA = nullptr, ScalarEvolution *SE = nullptr, - AssumptionCache *AC = nullptr); +/// it into a simplified loop nest with preheaders and single backedges. It will +/// update \c AliasAnalysis and \c ScalarEvolution analyses if they're non-null. +bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, + AssumptionCache *AC, bool PreserveLCSSA); /// \brief Put loop into LCSSA form. /// @@ -203,7 +316,7 @@ bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, /// /// Returns true if any modifications are made to the loop. bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, - ScalarEvolution *SE = nullptr); + ScalarEvolution *SE); /// \brief Put a loop nest into LCSSA form. /// @@ -215,7 +328,7 @@ bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, /// /// Returns true if any modifications are made to the loop. bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, - ScalarEvolution *SE = nullptr); + ScalarEvolution *SE); /// \brief Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in @@ -242,10 +355,10 @@ bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, /// \brief Try to promote memory values to scalars by sinking stores out of /// the loop and moving loads to before the loop. We do this by looping over -/// the stores in the loop, looking for stores to Must pointers which are +/// the stores in the loop, looking for stores to Must pointers which are /// loop invariant. It takes AliasSet, Loop exit blocks vector, loop exit blocks /// insertion point vector, PredIteratorCache, LoopInfo, DominatorTree, Loop, -/// AliasSet information for all instructions of the loop and loop safety +/// AliasSet information for all instructions of the loop and loop safety /// information as arguments. It returns changed status. bool promoteLoopAccessesToScalars(AliasSet &, SmallVectorImpl &, SmallVectorImpl &, @@ -254,15 +367,13 @@ bool promoteLoopAccessesToScalars(AliasSet &, SmallVectorImpl &, LICMSafetyInfo *); /// \brief Computes safety information for a loop -/// checks loop body & header for the possiblity of may throw +/// checks loop body & header for the possibility of may throw /// exception, it takes LICMSafetyInfo and loop as argument. /// Updates safety information in LICMSafetyInfo argument. void computeLICMSafetyInfo(LICMSafetyInfo *, Loop *); -/// \brief Checks if the given PHINode in a loop header is an induction -/// variable. Returns true if this is an induction PHI along with the step -/// value. -bool isInductionPHI(PHINode *, ScalarEvolution *, ConstantInt *&); +/// \brief Returns the instructions that use values defined in the loop. +SmallVector findDefsUsedOutsideOfLoop(Loop *L); } #endif diff --git a/include/llvm/Transforms/Utils/LoopVersioning.h b/include/llvm/Transforms/Utils/LoopVersioning.h index 009fba48c6a3..3b70594e0b63 100644 --- a/include/llvm/Transforms/Utils/LoopVersioning.h +++ b/include/llvm/Transforms/Utils/LoopVersioning.h @@ -16,13 +16,17 @@ #ifndef LLVM_TRANSFORMS_UTILS_LOOPVERSIONING_H #define LLVM_TRANSFORMS_UTILS_LOOPVERSIONING_H +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Transforms/Utils/LoopUtils.h" namespace llvm { class Loop; class LoopAccessInfo; class LoopInfo; +class ScalarEvolution; /// \brief This class emits a version of the loop where run-time checks ensure /// that may-alias pointers can't overlap. @@ -31,13 +35,13 @@ class LoopInfo; /// already has a preheader. class LoopVersioning { public: + /// \brief Expects LoopAccessInfo, Loop, LoopInfo, DominatorTree as input. + /// It uses runtime check provided by the user. If \p UseLAIChecks is true, + /// we will retain the default checks made by LAI. Otherwise, construct an + /// object having no checks and we expect the user to add them. LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, - DominatorTree *DT, - const SmallVector *PtrToPartition = nullptr); - - /// \brief Returns true if we need memchecks to disambiguate may-aliasing - /// accesses. - bool needsRuntimeChecks() const; + DominatorTree *DT, ScalarEvolution *SE, + bool UseLAIChecks = true); /// \brief Performs the CFG manipulation part of versioning the loop including /// the DominatorTree and LoopInfo updates. @@ -52,15 +56,11 @@ public: /// analyze L /// if versioning is necessary version L /// transform L - void versionLoop(Pass *P); + void versionLoop() { versionLoop(findDefsUsedOutsideOfLoop(VersionedLoop)); } - /// \brief Adds the necessary PHI nodes for the versioned loops based on the - /// loop-defined values used outside of the loop. - /// - /// This needs to be called after versionLoop if there are defs in the loop - /// that are used outside the loop. FIXME: this should be invoked internally - /// by versionLoop and made private. - void addPHINodes(const SmallVectorImpl &DefsUsedOutside); + /// \brief Same but if the client has already precomputed the set of values + /// used outside the loop, this API will allows passing that. + void versionLoop(const SmallVectorImpl &DefsUsedOutside); /// \brief Returns the versioned loop. Control flows here if pointers in the /// loop don't alias (i.e. all memchecks passed). (This loop is actually the @@ -71,7 +71,21 @@ public: /// loop may alias (i.e. one of the memchecks failed). Loop *getNonVersionedLoop() { return NonVersionedLoop; } + /// \brief Sets the runtime alias checks for versioning the loop. + void setAliasChecks( + const SmallVector Checks); + + /// \brief Sets the runtime SCEV checks for versioning the loop. + void setSCEVChecks(SCEVUnionPredicate Check); + private: + /// \brief Adds the necessary PHI nodes for the versioned loops based on the + /// loop-defined values used outside of the loop. + /// + /// This needs to be called after versionLoop if there are defs in the loop + /// that are used outside the loop. + void addPHINodes(const SmallVectorImpl &DefsUsedOutside); + /// \brief The original loop. This becomes the "versioned" one. I.e., /// control flows here if pointers in the loop don't alias. Loop *VersionedLoop; @@ -79,21 +93,21 @@ private: /// loop may alias (memchecks failed). Loop *NonVersionedLoop; - /// \brief For each memory pointer it contains the partitionId it is used in. - /// If nullptr, no partitioning is used. - /// - /// The I-th entry corresponds to I-th entry in LAI.getRuntimePointerCheck(). - /// If the pointer is used in multiple partitions the entry is set to -1. - const SmallVector *PtrToPartition; - /// \brief This maps the instructions from VersionedLoop to their counterpart /// in NonVersionedLoop. ValueToValueMapTy VMap; + /// \brief The set of alias checks that we are versioning for. + SmallVector AliasChecks; + + /// \brief The set of SCEV checks that we are versioning for. + SCEVUnionPredicate Preds; + /// \brief Analyses used. const LoopAccessInfo &LAI; LoopInfo *LI; DominatorTree *DT; + ScalarEvolution *SE; }; } diff --git a/include/llvm/Transforms/Utils/ModuleUtils.h b/include/llvm/Transforms/Utils/ModuleUtils.h index 622265bae143..0f23d34de5db 100644 --- a/include/llvm/Transforms/Utils/ModuleUtils.h +++ b/include/llvm/Transforms/Utils/ModuleUtils.h @@ -15,6 +15,7 @@ #define LLVM_TRANSFORMS_UTILS_MODULEUTILS_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" #include // for std::pair namespace llvm { @@ -56,7 +57,8 @@ Function *checkSanitizerInterfaceFunction(Constant *FuncOrBitcast); /// respectively. std::pair createSanitizerCtorAndInitFunctions( Module &M, StringRef CtorName, StringRef InitName, - ArrayRef InitArgTypes, ArrayRef InitArgs); + ArrayRef InitArgTypes, ArrayRef InitArgs, + StringRef VersionCheckName = StringRef()); } // End llvm namespace #endif // LLVM_TRANSFORMS_UTILS_MODULEUTILS_H diff --git a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h index ed0841c46c27..425ecd3cfb5e 100644 --- a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h +++ b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h @@ -378,7 +378,7 @@ public: void FindExistingPHI(BlkT *BB, BlockListTy *BlockList) { for (typename BlkT::iterator BBI = BB->begin(), BBE = BB->end(); BBI != BBE; ++BBI) { - PhiT *SomePHI = Traits::InstrIsPHI(BBI); + PhiT *SomePHI = Traits::InstrIsPHI(&*BBI); if (!SomePHI) break; if (CheckIfPHIMatches(SomePHI)) { diff --git a/include/llvm/Transforms/Utils/SimplifyIndVar.h b/include/llvm/Transforms/Utils/SimplifyIndVar.h index dcb1d67cbf75..3c55e64537c7 100644 --- a/include/llvm/Transforms/Utils/SimplifyIndVar.h +++ b/include/llvm/Transforms/Utils/SimplifyIndVar.h @@ -25,7 +25,7 @@ class CastInst; class DominatorTree; class IVUsers; class Loop; -class LPPassManager; +class LoopInfo; class PHINode; class ScalarEvolution; @@ -57,13 +57,14 @@ public: /// simplifyUsersOfIV - Simplify instructions that use this induction variable /// by using ScalarEvolution to analyze the IV's recurrence. -bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM, - SmallVectorImpl &Dead, IVVisitor *V = nullptr); +bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, + LoopInfo *LI, SmallVectorImpl &Dead, + IVVisitor *V = nullptr); /// SimplifyLoopIVs - Simplify users of induction variables within this /// loop. This does not actually change or add IVs. -bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM, - SmallVectorImpl &Dead); +bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, + LoopInfo *LI, SmallVectorImpl &Dead); } // namespace llvm diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 41159603aae5..410a075aeb98 100644 --- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -131,8 +131,11 @@ private: Value *optimizePow(CallInst *CI, IRBuilder<> &B); Value *optimizeExp2(CallInst *CI, IRBuilder<> &B); Value *optimizeFabs(CallInst *CI, IRBuilder<> &B); + Value *optimizeFMinFMax(CallInst *CI, IRBuilder<> &B); + Value *optimizeLog(CallInst *CI, IRBuilder<> &B); Value *optimizeSqrt(CallInst *CI, IRBuilder<> &B); Value *optimizeSinCosPi(CallInst *CI, IRBuilder<> &B); + Value *optimizeTan(CallInst *CI, IRBuilder<> &B); // Integer Library Call Optimizations Value *optimizeFFS(CallInst *CI, IRBuilder<> &B); diff --git a/include/llvm/Transforms/Utils/SplitModule.h b/include/llvm/Transforms/Utils/SplitModule.h new file mode 100644 index 000000000000..7d896d1993d6 --- /dev/null +++ b/include/llvm/Transforms/Utils/SplitModule.h @@ -0,0 +1,43 @@ +//===- SplitModule.h - Split a module into partitions -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the function llvm::SplitModule, which splits a module +// into multiple linkable partitions. It can be used to implement parallel code +// generation for link-time optimization. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_SPLITMODULE_H +#define LLVM_TRANSFORMS_UTILS_SPLITMODULE_H + +#include +#include + +namespace llvm { + +class Module; +class StringRef; + +/// Splits the module M into N linkable partitions. The function ModuleCallback +/// is called N times passing each individual partition as the MPart argument. +/// +/// FIXME: This function does not deal with the somewhat subtle symbol +/// visibility issues around module splitting, including (but not limited to): +/// +/// - Internal symbols should not collide with symbols defined outside the +/// module. +/// - Internal symbols defined in module-level inline asm should be visible to +/// each partition. +void SplitModule( + std::unique_ptr M, unsigned N, + std::function MPart)> ModuleCallback); + +} // End llvm namespace + +#endif diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h index 7f2cf8d7f59e..710817cddf6a 100644 --- a/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/include/llvm/Transforms/Utils/UnrollLoop.h @@ -21,20 +21,23 @@ namespace llvm { class AssumptionCache; +class DominatorTree; class Loop; class LoopInfo; class LPPassManager; class MDNode; class Pass; +class ScalarEvolution; bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, bool AllowExpensiveTripCount, unsigned TripMultiple, - LoopInfo *LI, Pass *PP, LPPassManager *LPM, - AssumptionCache *AC); + LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, + AssumptionCache *AC, bool PreserveLCSSA); bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count, bool AllowExpensiveTripCount, LoopInfo *LI, - LPPassManager *LPM); + ScalarEvolution *SE, DominatorTree *DT, + bool PreserveLCSSA); MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name); } diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h index 047ab818711b..469022f34c56 100644 --- a/include/llvm/Transforms/Utils/ValueMapper.h +++ b/include/llvm/Transforms/Utils/ValueMapper.h @@ -38,13 +38,34 @@ namespace llvm { /// to materialize Values on demand. class ValueMaterializer { virtual void anchor(); // Out of line method. - public: - virtual ~ValueMaterializer() {} - /// materializeValueFor - The client should implement this method if they - /// want to generate a mapped Value on demand. For example, if linking - /// lazily. - virtual Value *materializeValueFor(Value *V) = 0; + protected: + ~ValueMaterializer() = default; + ValueMaterializer() = default; + ValueMaterializer(const ValueMaterializer&) = default; + ValueMaterializer &operator=(const ValueMaterializer&) = default; + + public: + /// The client should implement this method if they want to generate a + /// mapped Value on demand. For example, if linking lazily. + virtual Value *materializeDeclFor(Value *V) = 0; + + /// If the data being mapped is recursive, the above function can map + /// just the declaration and this is called to compute the initializer. + /// It is called after the mapping is recorded, so it doesn't need to worry + /// about recursion. + virtual void materializeInitFor(GlobalValue *New, GlobalValue *Old); + + /// If the client needs to handle temporary metadata it must implement + /// these methods. + virtual Metadata *mapTemporaryMetadata(Metadata *MD) { return nullptr; } + virtual void replaceTemporaryMetadata(const Metadata *OrigMD, + Metadata *NewMD) {} + + /// The client should implement this method if some metadata need + /// not be mapped, for example DISubprogram metadata for functions not + /// linked into the destination module. + virtual bool isMetadataNeeded(Metadata *MD) { return true; } }; /// RemapFlags - These are flags that the value mapping APIs allow. @@ -59,7 +80,20 @@ namespace llvm { /// RF_IgnoreMissingEntries - If this flag is set, the remapper ignores /// entries that are not in the value map. If it is unset, it aborts if an /// operand is asked to be remapped which doesn't exist in the mapping. - RF_IgnoreMissingEntries = 2 + RF_IgnoreMissingEntries = 2, + + /// Instruct the remapper to move distinct metadata instead of duplicating + /// it when there are module-level changes. + RF_MoveDistinctMDs = 4, + + /// Any global values not in value map are mapped to null instead of + /// mapping to self. Illegal if RF_IgnoreMissingEntries is also set. + RF_NullMapMissingGlobalValues = 8, + + /// Set when there is still temporary metadata that must be handled, + /// such as when we are doing function importing and will materialize + /// and link metadata as a postpass. + RF_HaveUnmaterializedMetadata = 16, }; static inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) { diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap index dcc5ce1059ff..0adce0c9602d 100644 --- a/include/llvm/module.modulemap +++ b/include/llvm/module.modulemap @@ -190,17 +190,15 @@ module LLVM_Utils { // Exclude this; it's fundamentally non-modular. exclude header "Support/PluginLoader.h" - // Exclude this; it's a weirdly-factored part of llvm-gcov and conflicts - // with the Analysis module (which also defines an llvm::GCOVOptions). - exclude header "Support/GCOV.h" - // FIXME: Mislayered? exclude header "Support/TargetRegistry.h" // These are intended for textual inclusion. + textual header "Support/ARMTargetParser.def" textual header "Support/Dwarf.def" textual header "Support/ELFRelocs/AArch64.def" textual header "Support/ELFRelocs/ARM.def" + textual header "Support/ELFRelocs/AVR.def" textual header "Support/ELFRelocs/Hexagon.def" textual header "Support/ELFRelocs/i386.def" textual header "Support/ELFRelocs/Mips.def" @@ -210,6 +208,12 @@ module LLVM_Utils { textual header "Support/ELFRelocs/SystemZ.def" textual header "Support/ELFRelocs/x86_64.def" } + + // This part of the module is usable from both C and C++ code. + module ConvertUTF { + header "Support/ConvertUTF.h" + export * + } } module LLVM_CodeGen_MachineValueType { diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 44d137dffd22..35f2e97622fa 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -25,9 +25,16 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/CFLAliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/ObjCARCAliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" @@ -40,44 +47,72 @@ #include "llvm/Pass.h" using namespace llvm; -// Register the AliasAnalysis interface, providing a nice name to refer to. -INITIALIZE_ANALYSIS_GROUP(AliasAnalysis, "Alias Analysis", NoAA) -char AliasAnalysis::ID = 0; +/// Allow disabling BasicAA from the AA results. This is particularly useful +/// when testing to isolate a single AA implementation. +static cl::opt DisableBasicAA("disable-basicaa", cl::Hidden, + cl::init(false)); + +AAResults::AAResults(AAResults &&Arg) : AAs(std::move(Arg.AAs)) { + for (auto &AA : AAs) + AA->setAAResults(this); +} + +AAResults &AAResults::operator=(AAResults &&Arg) { + AAs = std::move(Arg.AAs); + for (auto &AA : AAs) + AA->setAAResults(this); + return *this; +} + +AAResults::~AAResults() { +// FIXME; It would be nice to at least clear out the pointers back to this +// aggregation here, but we end up with non-nesting lifetimes in the legacy +// pass manager that prevent this from working. In the legacy pass manager +// we'll end up with dangling references here in some cases. +#if 0 + for (auto &AA : AAs) + AA->setAAResults(nullptr); +#endif +} //===----------------------------------------------------------------------===// // Default chaining methods //===----------------------------------------------------------------------===// -AliasResult AliasAnalysis::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - return AA->alias(LocA, LocB); +AliasResult AAResults::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) { + for (const auto &AA : AAs) { + auto Result = AA->alias(LocA, LocB); + if (Result != MayAlias) + return Result; + } + return MayAlias; } -bool AliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - return AA->pointsToConstantMemory(Loc, OrLocal); +bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc, + bool OrLocal) { + for (const auto &AA : AAs) + if (AA->pointsToConstantMemory(Loc, OrLocal)) + return true; + + return false; } -AliasAnalysis::ModRefResult -AliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - return AA->getArgModRefInfo(CS, ArgIdx); +ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { + ModRefInfo Result = MRI_ModRef; + + for (const auto &AA : AAs) { + Result = ModRefInfo(Result & AA->getArgModRefInfo(CS, ArgIdx)); + + // Early-exit the moment we reach the bottom of the lattice. + if (Result == MRI_NoModRef) + return Result; + } + + return Result; } -void AliasAnalysis::deleteValue(Value *V) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - AA->deleteValue(V); -} - -void AliasAnalysis::addEscapingUse(Use &U) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - AA->addEscapingUse(U); -} - -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) { +ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) { // We may have two calls if (auto CS = ImmutableCallSite(I)) { // Check if the two calls modify the same memory @@ -88,289 +123,215 @@ AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) { // is that if the call references what this instruction // defines, it must be clobbered by this location. const MemoryLocation DefLoc = MemoryLocation::get(I); - if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef) - return AliasAnalysis::ModRef; + if (getModRefInfo(Call, DefLoc) != MRI_NoModRef) + return MRI_ModRef; } - return AliasAnalysis::NoModRef; + return MRI_NoModRef; } -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); +ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) { + ModRefInfo Result = MRI_ModRef; - ModRefBehavior MRB = getModRefBehavior(CS); - if (MRB == DoesNotAccessMemory) - return NoModRef; + for (const auto &AA : AAs) { + Result = ModRefInfo(Result & AA->getModRefInfo(CS, Loc)); - ModRefResult Mask = ModRef; - if (onlyReadsMemory(MRB)) - Mask = Ref; - - if (onlyAccessesArgPointees(MRB)) { - bool doesAlias = false; - ModRefResult AllArgsMask = NoModRef; - if (doesAccessArgPointees(MRB)) { - for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - AI != AE; ++AI) { - const Value *Arg = *AI; - if (!Arg->getType()->isPointerTy()) - continue; - unsigned ArgIdx = std::distance(CS.arg_begin(), AI); - MemoryLocation ArgLoc = - MemoryLocation::getForArgument(CS, ArgIdx, *TLI); - if (!isNoAlias(ArgLoc, Loc)) { - ModRefResult ArgMask = getArgModRefInfo(CS, ArgIdx); - doesAlias = true; - AllArgsMask = ModRefResult(AllArgsMask | ArgMask); - } - } - } - if (!doesAlias) - return NoModRef; - Mask = ModRefResult(Mask & AllArgsMask); + // Early-exit the moment we reach the bottom of the lattice. + if (Result == MRI_NoModRef) + return Result; } - // If Loc is a constant memory location, the call definitely could not - // modify the memory location. - if ((Mask & Mod) && pointsToConstantMemory(Loc)) - Mask = ModRefResult(Mask & ~Mod); - - // If this is the end of the chain, don't forward. - if (!AA) return Mask; - - // Otherwise, fall back to the next AA in the chain. But we can merge - // in any mask we've managed to compute. - return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask); + return Result; } -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); +ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + ModRefInfo Result = MRI_ModRef; - // If CS1 or CS2 are readnone, they don't interact. - ModRefBehavior CS1B = getModRefBehavior(CS1); - if (CS1B == DoesNotAccessMemory) return NoModRef; + for (const auto &AA : AAs) { + Result = ModRefInfo(Result & AA->getModRefInfo(CS1, CS2)); - ModRefBehavior CS2B = getModRefBehavior(CS2); - if (CS2B == DoesNotAccessMemory) return NoModRef; - - // If they both only read from memory, there is no dependence. - if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B)) - return NoModRef; - - AliasAnalysis::ModRefResult Mask = ModRef; - - // If CS1 only reads memory, the only dependence on CS2 can be - // from CS1 reading memory written by CS2. - if (onlyReadsMemory(CS1B)) - Mask = ModRefResult(Mask & Ref); - - // If CS2 only access memory through arguments, accumulate the mod/ref - // information from CS1's references to the memory referenced by - // CS2's arguments. - if (onlyAccessesArgPointees(CS2B)) { - AliasAnalysis::ModRefResult R = NoModRef; - if (doesAccessArgPointees(CS2B)) { - for (ImmutableCallSite::arg_iterator - I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { - const Value *Arg = *I; - if (!Arg->getType()->isPointerTy()) - continue; - unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I); - auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, *TLI); - - // ArgMask indicates what CS2 might do to CS2ArgLoc, and the dependence of - // CS1 on that location is the inverse. - ModRefResult ArgMask = getArgModRefInfo(CS2, CS2ArgIdx); - if (ArgMask == Mod) - ArgMask = ModRef; - else if (ArgMask == Ref) - ArgMask = Mod; - - R = ModRefResult((R | (getModRefInfo(CS1, CS2ArgLoc) & ArgMask)) & Mask); - if (R == Mask) - break; - } - } - return R; + // Early-exit the moment we reach the bottom of the lattice. + if (Result == MRI_NoModRef) + return Result; } - // If CS1 only accesses memory through arguments, check if CS2 references - // any of the memory referenced by CS1's arguments. If not, return NoModRef. - if (onlyAccessesArgPointees(CS1B)) { - AliasAnalysis::ModRefResult R = NoModRef; - if (doesAccessArgPointees(CS1B)) { - for (ImmutableCallSite::arg_iterator - I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) { - const Value *Arg = *I; - if (!Arg->getType()->isPointerTy()) - continue; - unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I); - auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, *TLI); + return Result; +} - // ArgMask indicates what CS1 might do to CS1ArgLoc; if CS1 might Mod - // CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If CS1 - // might Ref, then we care only about a Mod by CS2. - ModRefResult ArgMask = getArgModRefInfo(CS1, CS1ArgIdx); - ModRefResult ArgR = getModRefInfo(CS2, CS1ArgLoc); - if (((ArgMask & Mod) != NoModRef && (ArgR & ModRef) != NoModRef) || - ((ArgMask & Ref) != NoModRef && (ArgR & Mod) != NoModRef)) - R = ModRefResult((R | ArgMask) & Mask); +FunctionModRefBehavior AAResults::getModRefBehavior(ImmutableCallSite CS) { + FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior; - if (R == Mask) - break; - } - } - return R; + for (const auto &AA : AAs) { + Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(CS)); + + // Early-exit the moment we reach the bottom of the lattice. + if (Result == FMRB_DoesNotAccessMemory) + return Result; } - // If this is the end of the chain, don't forward. - if (!AA) return Mask; - - // Otherwise, fall back to the next AA in the chain. But we can merge - // in any mask we've managed to compute. - return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask); + return Result; } -AliasAnalysis::ModRefBehavior -AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); +FunctionModRefBehavior AAResults::getModRefBehavior(const Function *F) { + FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior; - ModRefBehavior Min = UnknownModRefBehavior; + for (const auto &AA : AAs) { + Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(F)); - // Call back into the alias analysis with the other form of getModRefBehavior - // to see if it can give a better response. - if (const Function *F = CS.getCalledFunction()) - Min = getModRefBehavior(F); + // Early-exit the moment we reach the bottom of the lattice. + if (Result == FMRB_DoesNotAccessMemory) + return Result; + } - // If this is the end of the chain, don't forward. - if (!AA) return Min; - - // Otherwise, fall back to the next AA in the chain. But we can merge - // in any result we've managed to compute. - return ModRefBehavior(AA->getModRefBehavior(CS) & Min); -} - -AliasAnalysis::ModRefBehavior -AliasAnalysis::getModRefBehavior(const Function *F) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - return AA->getModRefBehavior(F); + return Result; } //===----------------------------------------------------------------------===// -// AliasAnalysis non-virtual helper method implementation +// Helper method implementation //===----------------------------------------------------------------------===// -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const LoadInst *L, const MemoryLocation &Loc) { +ModRefInfo AAResults::getModRefInfo(const LoadInst *L, + const MemoryLocation &Loc) { // Be conservative in the face of volatile/atomic. if (!L->isUnordered()) - return ModRef; + return MRI_ModRef; // If the load address doesn't alias the given address, it doesn't read // or write the specified memory. if (Loc.Ptr && !alias(MemoryLocation::get(L), Loc)) - return NoModRef; + return MRI_NoModRef; // Otherwise, a load just reads. - return Ref; + return MRI_Ref; } -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const StoreInst *S, const MemoryLocation &Loc) { +ModRefInfo AAResults::getModRefInfo(const StoreInst *S, + const MemoryLocation &Loc) { // Be conservative in the face of volatile/atomic. if (!S->isUnordered()) - return ModRef; + return MRI_ModRef; if (Loc.Ptr) { // If the store address cannot alias the pointer in question, then the // specified memory cannot be modified by the store. if (!alias(MemoryLocation::get(S), Loc)) - return NoModRef; + return MRI_NoModRef; // If the pointer is a pointer to constant memory, then it could not have // been modified by this store. if (pointsToConstantMemory(Loc)) - return NoModRef; - + return MRI_NoModRef; } // Otherwise, a store just writes. - return Mod; + return MRI_Mod; } -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc) { +ModRefInfo AAResults::getModRefInfo(const VAArgInst *V, + const MemoryLocation &Loc) { if (Loc.Ptr) { // If the va_arg address cannot alias the pointer in question, then the // specified memory cannot be accessed by the va_arg. if (!alias(MemoryLocation::get(V), Loc)) - return NoModRef; + return MRI_NoModRef; // If the pointer is a pointer to constant memory, then it could not have // been modified by this va_arg. if (pointsToConstantMemory(Loc)) - return NoModRef; + return MRI_NoModRef; } // Otherwise, a va_arg reads and writes. - return ModRef; + return MRI_ModRef; } -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, - const MemoryLocation &Loc) { +ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad, + const MemoryLocation &Loc) { + if (Loc.Ptr) { + // If the pointer is a pointer to constant memory, + // then it could not have been modified by this catchpad. + if (pointsToConstantMemory(Loc)) + return MRI_NoModRef; + } + + // Otherwise, a catchpad reads and writes. + return MRI_ModRef; +} + +ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet, + const MemoryLocation &Loc) { + if (Loc.Ptr) { + // If the pointer is a pointer to constant memory, + // then it could not have been modified by this catchpad. + if (pointsToConstantMemory(Loc)) + return MRI_NoModRef; + } + + // Otherwise, a catchret reads and writes. + return MRI_ModRef; +} + +ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX, + const MemoryLocation &Loc) { // Acquire/Release cmpxchg has properties that matter for arbitrary addresses. if (CX->getSuccessOrdering() > Monotonic) - return ModRef; + return MRI_ModRef; // If the cmpxchg address does not alias the location, it does not access it. if (Loc.Ptr && !alias(MemoryLocation::get(CX), Loc)) - return NoModRef; + return MRI_NoModRef; - return ModRef; + return MRI_ModRef; } -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, - const MemoryLocation &Loc) { +ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW, + const MemoryLocation &Loc) { // Acquire/Release atomicrmw has properties that matter for arbitrary addresses. if (RMW->getOrdering() > Monotonic) - return ModRef; + return MRI_ModRef; // If the atomicrmw address does not alias the location, it does not access it. if (Loc.Ptr && !alias(MemoryLocation::get(RMW), Loc)) - return NoModRef; + return MRI_NoModRef; - return ModRef; + return MRI_ModRef; } -// FIXME: this is really just shoring-up a deficiency in alias analysis. -// BasicAA isn't willing to spend linear time determining whether an alloca -// was captured before or after this particular call, while we are. However, -// with a smarter AA in place, this test is just wasting compile time. -AliasAnalysis::ModRefResult AliasAnalysis::callCapturesBefore( - const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT) { +/// \brief Return information about whether a particular call site modifies +/// or reads the specified memory location \p MemLoc before instruction \p I +/// in a BasicBlock. A ordered basic block \p OBB can be used to speed up +/// instruction-ordering queries inside the BasicBlock containing \p I. +/// FIXME: this is really just shoring-up a deficiency in alias analysis. +/// BasicAA isn't willing to spend linear time determining whether an alloca +/// was captured before or after this particular call, while we are. However, +/// with a smarter AA in place, this test is just wasting compile time. +ModRefInfo AAResults::callCapturesBefore(const Instruction *I, + const MemoryLocation &MemLoc, + DominatorTree *DT, + OrderedBasicBlock *OBB) { if (!DT) - return AliasAnalysis::ModRef; + return MRI_ModRef; - const Value *Object = GetUnderlyingObject(MemLoc.Ptr, *DL); + const Value *Object = + GetUnderlyingObject(MemLoc.Ptr, I->getModule()->getDataLayout()); if (!isIdentifiedObject(Object) || isa(Object) || isa(Object)) - return AliasAnalysis::ModRef; + return MRI_ModRef; ImmutableCallSite CS(I); if (!CS.getInstruction() || CS.getInstruction() == Object) - return AliasAnalysis::ModRef; + return MRI_ModRef; if (llvm::PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true, /* StoreCaptures */ true, I, DT, - /* include Object */ true)) - return AliasAnalysis::ModRef; + /* include Object */ true, + /* OrderedBasicBlock */ OBB)) + return MRI_ModRef; unsigned ArgNo = 0; - AliasAnalysis::ModRefResult R = AliasAnalysis::NoModRef; + ModRefInfo R = MRI_NoModRef; for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI, ++ArgNo) { // Only look at the no-capture or byval pointer arguments. If this @@ -389,50 +350,20 @@ AliasAnalysis::ModRefResult AliasAnalysis::callCapturesBefore( if (CS.doesNotAccessMemory(ArgNo)) continue; if (CS.onlyReadsMemory(ArgNo)) { - R = AliasAnalysis::Ref; + R = MRI_Ref; continue; } - return AliasAnalysis::ModRef; + return MRI_ModRef; } return R; } -// AliasAnalysis destructor: DO NOT move this to the header file for -// AliasAnalysis or else clients of the AliasAnalysis class may not depend on -// the AliasAnalysis.o file in the current .a file, causing alias analysis -// support to not be included in the tool correctly! -// -AliasAnalysis::~AliasAnalysis() {} - -/// InitializeAliasAnalysis - Subclasses must call this method to initialize the -/// AliasAnalysis interface before any other methods are called. -/// -void AliasAnalysis::InitializeAliasAnalysis(Pass *P, const DataLayout *NewDL) { - DL = NewDL; - auto *TLIP = P->getAnalysisIfAvailable(); - TLI = TLIP ? &TLIP->getTLI() : nullptr; - AA = &P->getAnalysis(); -} - -// getAnalysisUsage - All alias analysis implementations should invoke this -// directly (using AliasAnalysis::getAnalysisUsage(AU)). -void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); // All AA's chain -} - -/// getTypeStoreSize - Return the DataLayout store size for the given type, -/// if known, or a conservative value otherwise. -/// -uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) { - return DL ? DL->getTypeStoreSize(Ty) : MemoryLocation::UnknownSize; -} - /// canBasicBlockModify - Return true if it is possible for execution of the /// specified basic block to modify the location Loc. /// -bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB, - const MemoryLocation &Loc) { - return canInstructionRangeModRef(BB.front(), BB.back(), Loc, Mod); +bool AAResults::canBasicBlockModify(const BasicBlock &BB, + const MemoryLocation &Loc) { + return canInstructionRangeModRef(BB.front(), BB.back(), Loc, MRI_Mod); } /// canInstructionRangeModRef - Return true if it is possible for the @@ -440,28 +371,178 @@ bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB, /// mode) the location Loc. The instructions to consider are all /// of the instructions in the range of [I1,I2] INCLUSIVE. /// I1 and I2 must be in the same basic block. -bool AliasAnalysis::canInstructionRangeModRef(const Instruction &I1, - const Instruction &I2, - const MemoryLocation &Loc, - const ModRefResult Mode) { +bool AAResults::canInstructionRangeModRef(const Instruction &I1, + const Instruction &I2, + const MemoryLocation &Loc, + const ModRefInfo Mode) { assert(I1.getParent() == I2.getParent() && "Instructions not in same basic block!"); - BasicBlock::const_iterator I = &I1; - BasicBlock::const_iterator E = &I2; + BasicBlock::const_iterator I = I1.getIterator(); + BasicBlock::const_iterator E = I2.getIterator(); ++E; // Convert from inclusive to exclusive range. for (; I != E; ++I) // Check every instruction in range - if (getModRefInfo(I, Loc) & Mode) + if (getModRefInfo(&*I, Loc) & Mode) return true; return false; } +// Provide a definition for the root virtual destructor. +AAResults::Concept::~Concept() {} + +namespace { +/// A wrapper pass for external alias analyses. This just squirrels away the +/// callback used to run any analyses and register their results. +struct ExternalAAWrapperPass : ImmutablePass { + typedef std::function CallbackT; + + CallbackT CB; + + static char ID; + + ExternalAAWrapperPass() : ImmutablePass(ID) { + initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry()); + } + explicit ExternalAAWrapperPass(CallbackT CB) + : ImmutablePass(ID), CB(std::move(CB)) { + initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } +}; +} + +char ExternalAAWrapperPass::ID = 0; +INITIALIZE_PASS(ExternalAAWrapperPass, "external-aa", "External Alias Analysis", + false, true) + +ImmutablePass * +llvm::createExternalAAWrapperPass(ExternalAAWrapperPass::CallbackT Callback) { + return new ExternalAAWrapperPass(std::move(Callback)); +} + +AAResultsWrapperPass::AAResultsWrapperPass() : FunctionPass(ID) { + initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +char AAResultsWrapperPass::ID = 0; + +INITIALIZE_PASS_BEGIN(AAResultsWrapperPass, "aa", + "Function Alias Analysis Results", false, true) +INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(CFLAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ExternalAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ObjCARCAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScopedNoAliasAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TypeBasedAAWrapperPass) +INITIALIZE_PASS_END(AAResultsWrapperPass, "aa", + "Function Alias Analysis Results", false, true) + +FunctionPass *llvm::createAAResultsWrapperPass() { + return new AAResultsWrapperPass(); +} + +/// Run the wrapper pass to rebuild an aggregation over known AA passes. +/// +/// This is the legacy pass manager's interface to the new-style AA results +/// aggregation object. Because this is somewhat shoe-horned into the legacy +/// pass manager, we hard code all the specific alias analyses available into +/// it. While the particular set enabled is configured via commandline flags, +/// adding a new alias analysis to LLVM will require adding support for it to +/// this list. +bool AAResultsWrapperPass::runOnFunction(Function &F) { + // NB! This *must* be reset before adding new AA results to the new + // AAResults object because in the legacy pass manager, each instance + // of these will refer to the *same* immutable analyses, registering and + // unregistering themselves with them. We need to carefully tear down the + // previous object first, in this case replacing it with an empty one, before + // registering new results. + AAR.reset(new AAResults()); + + // BasicAA is always available for function analyses. Also, we add it first + // so that it can trump TBAA results when it proves MustAlias. + // FIXME: TBAA should have an explicit mode to support this and then we + // should reconsider the ordering here. + if (!DisableBasicAA) + AAR->addAAResult(getAnalysis().getResult()); + + // Populate the results with the currently available AAs. + if (auto *WrapperPass = getAnalysisIfAvailable()) + AAR->addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = getAnalysisIfAvailable()) + AAR->addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = + getAnalysisIfAvailable()) + AAR->addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = getAnalysisIfAvailable()) + AAR->addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = getAnalysisIfAvailable()) + AAR->addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = getAnalysisIfAvailable()) + AAR->addAAResult(WrapperPass->getResult()); + + // If available, run an external AA providing callback over the results as + // well. + if (auto *WrapperPass = getAnalysisIfAvailable()) + if (WrapperPass->CB) + WrapperPass->CB(*this, F, *AAR); + + // Analyses don't mutate the IR, so return false. + return false; +} + +void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + + // We also need to mark all the alias analysis passes we will potentially + // probe in runOnFunction as used here to ensure the legacy pass manager + // preserves them. This hard coding of lists of alias analyses is specific to + // the legacy pass manager. + AU.addUsedIfAvailable(); + AU.addUsedIfAvailable(); + AU.addUsedIfAvailable(); + AU.addUsedIfAvailable(); + AU.addUsedIfAvailable(); + AU.addUsedIfAvailable(); +} + +AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F, + BasicAAResult &BAR) { + AAResults AAR; + + // Add in our explicitly constructed BasicAA results. + if (!DisableBasicAA) + AAR.addAAResult(BAR); + + // Populate the results with the other currently available AAs. + if (auto *WrapperPass = + P.getAnalysisIfAvailable()) + AAR.addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = P.getAnalysisIfAvailable()) + AAR.addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = + P.getAnalysisIfAvailable()) + AAR.addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = P.getAnalysisIfAvailable()) + AAR.addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = P.getAnalysisIfAvailable()) + AAR.addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = P.getAnalysisIfAvailable()) + AAR.addAAResult(WrapperPass->getResult()); + + return AAR; +} + /// isNoAliasCall - Return true if this pointer is returned by a noalias /// function. bool llvm::isNoAliasCall(const Value *V) { - if (isa(V) || isa(V)) - return ImmutableCallSite(cast(V)) - .paramHasAttr(0, Attribute::NoAlias); + if (auto CS = ImmutableCallSite(V)) + return CS.paramHasAttr(0, Attribute::NoAlias); return false; } diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp deleted file mode 100644 index 9b6a5a44d80c..000000000000 --- a/lib/Analysis/AliasAnalysisCounter.cpp +++ /dev/null @@ -1,173 +0,0 @@ -//===- AliasAnalysisCounter.cpp - Alias Analysis Query Counter ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a pass which can be used to count how many alias queries -// are being made and how the alias analysis implementation being used responds. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -static cl::opt -PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true)); -static cl::opt -PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden); - -namespace { - class AliasAnalysisCounter : public ModulePass, public AliasAnalysis { - unsigned No, May, Partial, Must; - unsigned NoMR, JustRef, JustMod, MR; - Module *M; - public: - static char ID; // Class identification, replacement for typeinfo - AliasAnalysisCounter() : ModulePass(ID) { - initializeAliasAnalysisCounterPass(*PassRegistry::getPassRegistry()); - No = May = Partial = Must = 0; - NoMR = JustRef = JustMod = MR = 0; - } - - void printLine(const char *Desc, unsigned Val, unsigned Sum) { - errs() << " " << Val << " " << Desc << " responses (" - << Val*100/Sum << "%)\n"; - } - ~AliasAnalysisCounter() override { - unsigned AASum = No+May+Partial+Must; - unsigned MRSum = NoMR+JustRef+JustMod+MR; - if (AASum + MRSum) { // Print a report if any counted queries occurred... - errs() << "\n===== Alias Analysis Counter Report =====\n" - << " Analysis counted:\n" - << " " << AASum << " Total Alias Queries Performed\n"; - if (AASum) { - printLine("no alias", No, AASum); - printLine("may alias", May, AASum); - printLine("partial alias", Partial, AASum); - printLine("must alias", Must, AASum); - errs() << " Alias Analysis Counter Summary: " << No*100/AASum << "%/" - << May*100/AASum << "%/" - << Partial*100/AASum << "%/" - << Must*100/AASum<<"%\n\n"; - } - - errs() << " " << MRSum << " Total Mod/Ref Queries Performed\n"; - if (MRSum) { - printLine("no mod/ref", NoMR, MRSum); - printLine("ref", JustRef, MRSum); - printLine("mod", JustMod, MRSum); - printLine("mod/ref", MR, MRSum); - errs() << " Mod/Ref Analysis Counter Summary: " <M = &M; - InitializeAliasAnalysis(this, &M.getDataLayout()); - return false; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AliasAnalysis::getAnalysisUsage(AU); - AU.addRequired(); - AU.setPreservesAll(); - } - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(AnalysisID PI) override { - if (PI == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } - - // FIXME: We could count these too... - bool pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) override { - return getAnalysis().pointsToConstantMemory(Loc, OrLocal); - } - - // Forwarding functions: just delegate to a real AA implementation, counting - // the number of responses... - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override; - - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override; - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override { - return AliasAnalysis::getModRefInfo(CS1,CS2); - } - }; -} - -char AliasAnalysisCounter::ID = 0; -INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa", - "Count Alias Analysis Query Responses", false, true, false) - -ModulePass *llvm::createAliasAnalysisCounterPass() { - return new AliasAnalysisCounter(); -} - -AliasResult AliasAnalysisCounter::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { - AliasResult R = getAnalysis().alias(LocA, LocB); - - const char *AliasString = nullptr; - switch (R) { - case NoAlias: No++; AliasString = "No alias"; break; - case MayAlias: May++; AliasString = "May alias"; break; - case PartialAlias: Partial++; AliasString = "Partial alias"; break; - case MustAlias: Must++; AliasString = "Must alias"; break; - } - - if (PrintAll || (PrintAllFailures && R == MayAlias)) { - errs() << AliasString << ":\t"; - errs() << "[" << LocA.Size << "B] "; - LocA.Ptr->printAsOperand(errs(), true, M); - errs() << ", "; - errs() << "[" << LocB.Size << "B] "; - LocB.Ptr->printAsOperand(errs(), true, M); - errs() << "\n"; - } - - return R; -} - -AliasAnalysis::ModRefResult -AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) { - ModRefResult R = getAnalysis().getModRefInfo(CS, Loc); - - const char *MRString = nullptr; - switch (R) { - case NoModRef: NoMR++; MRString = "NoModRef"; break; - case Ref: JustRef++; MRString = "JustRef"; break; - case Mod: JustMod++; MRString = "JustMod"; break; - case ModRef: MR++; MRString = "ModRef"; break; - } - - if (PrintAll || (PrintAllFailures && R == ModRef)) { - errs() << MRString << ": Ptr: "; - errs() << "[" << Loc.Size << "B] "; - Loc.Ptr->printAsOperand(errs(), true, M); - errs() << "\t<->" << *CS.getInstruction() << '\n'; - } - return R; -} diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index 5d1b001fe161..12917b650e5e 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -21,8 +21,10 @@ #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/Pass.h" @@ -57,7 +59,7 @@ namespace { } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); + AU.addRequired(); AU.setPreservesAll(); } @@ -81,7 +83,7 @@ namespace { char AAEval::ID = 0; INITIALIZE_PASS_BEGIN(AAEval, "aa-eval", "Exhaustive Alias Analysis Precision Evaluator", false, true) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(AAEval, "aa-eval", "Exhaustive Alias Analysis Precision Evaluator", false, true) @@ -139,16 +141,17 @@ static inline bool isInterestingPointer(Value *V) { } bool AAEval::runOnFunction(Function &F) { - AliasAnalysis &AA = getAnalysis(); + const DataLayout &DL = F.getParent()->getDataLayout(); + AliasAnalysis &AA = getAnalysis().getAAResults(); SetVector Pointers; - SetVector CallSites; + SmallSetVector CallSites; SetVector Loads; SetVector Stores; - for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) - if (I->getType()->isPointerTy()) // Add all pointer arguments. - Pointers.insert(I); + for (auto &I : F.args()) + if (I.getType()->isPointerTy()) // Add all pointer arguments. + Pointers.insert(&I); for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { if (I->getType()->isPointerTy()) // Add all pointer instructions. @@ -164,10 +167,9 @@ bool AAEval::runOnFunction(Function &F) { if (!isa(Callee) && isInterestingPointer(Callee)) Pointers.insert(Callee); // Consider formals. - for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - AI != AE; ++AI) - if (isInterestingPointer(*AI)) - Pointers.insert(*AI); + for (Use &DataOp : CS.data_ops()) + if (isInterestingPointer(DataOp)) + Pointers.insert(DataOp); CallSites.insert(CS); } else { // Consider all operands. @@ -188,12 +190,12 @@ bool AAEval::runOnFunction(Function &F) { I1 != E; ++I1) { uint64_t I1Size = MemoryLocation::UnknownSize; Type *I1ElTy = cast((*I1)->getType())->getElementType(); - if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy); + if (I1ElTy->isSized()) I1Size = DL.getTypeStoreSize(I1ElTy); for (SetVector::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { uint64_t I2Size = MemoryLocation::UnknownSize; Type *I2ElTy =cast((*I2)->getType())->getElementType(); - if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy); + if (I2ElTy->isSized()) I2Size = DL.getTypeStoreSize(I2ElTy); switch (AA.alias(*I1, I1Size, *I2, I2Size)) { case NoAlias: @@ -281,30 +283,29 @@ bool AAEval::runOnFunction(Function &F) { } // Mod/ref alias analysis: compare all pairs of calls and values - for (SetVector::iterator C = CallSites.begin(), - Ce = CallSites.end(); C != Ce; ++C) { + for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) { Instruction *I = C->getInstruction(); for (SetVector::iterator V = Pointers.begin(), Ve = Pointers.end(); V != Ve; ++V) { uint64_t Size = MemoryLocation::UnknownSize; Type *ElTy = cast((*V)->getType())->getElementType(); - if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy); + if (ElTy->isSized()) Size = DL.getTypeStoreSize(ElTy); switch (AA.getModRefInfo(*C, *V, Size)) { - case AliasAnalysis::NoModRef: + case MRI_NoModRef: PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent()); ++NoModRefCount; break; - case AliasAnalysis::Mod: + case MRI_Mod: PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent()); ++ModCount; break; - case AliasAnalysis::Ref: + case MRI_Ref: PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent()); ++RefCount; break; - case AliasAnalysis::ModRef: + case MRI_ModRef: PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent()); ++ModRefCount; break; @@ -313,25 +314,24 @@ bool AAEval::runOnFunction(Function &F) { } // Mod/ref alias analysis: compare all pairs of calls - for (SetVector::iterator C = CallSites.begin(), - Ce = CallSites.end(); C != Ce; ++C) { - for (SetVector::iterator D = CallSites.begin(); D != Ce; ++D) { + for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) { + for (auto D = CallSites.begin(); D != Ce; ++D) { if (D == C) continue; switch (AA.getModRefInfo(*C, *D)) { - case AliasAnalysis::NoModRef: + case MRI_NoModRef: PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent()); ++NoModRefCount; break; - case AliasAnalysis::Mod: + case MRI_Mod: PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent()); ++ModCount; break; - case AliasAnalysis::Ref: + case MRI_Ref: PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent()); ++RefCount; break; - case AliasAnalysis::ModRef: + case MRI_ModRef: PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent()); ++ModRefCount; break; diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp deleted file mode 100644 index e5107b3bc827..000000000000 --- a/lib/Analysis/AliasDebugger.cpp +++ /dev/null @@ -1,136 +0,0 @@ -//===- AliasDebugger.cpp - Simple Alias Analysis Use Checker --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This simple pass checks alias analysis users to ensure that if they -// create a new value, they do not query AA without informing it of the value. -// It acts as a shim over any other AA pass you want. -// -// Yes keeping track of every value in the program is expensive, but this is -// a debugging pass. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include -using namespace llvm; - -namespace { - - class AliasDebugger : public ModulePass, public AliasAnalysis { - - //What we do is simple. Keep track of every value the AA could - //know about, and verify that queries are one of those. - //A query to a value that didn't exist when the AA was created - //means someone forgot to update the AA when creating new values - - std::set Vals; - - public: - static char ID; // Class identification, replacement for typeinfo - AliasDebugger() : ModulePass(ID) { - initializeAliasDebuggerPass(*PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override { - InitializeAliasAnalysis(this, &M.getDataLayout()); // set up super class - - for(Module::global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) { - Vals.insert(&*I); - for (User::const_op_iterator OI = I->op_begin(), - OE = I->op_end(); OI != OE; ++OI) - Vals.insert(*OI); - } - - for(Module::iterator I = M.begin(), - E = M.end(); I != E; ++I){ - Vals.insert(&*I); - if(!I->isDeclaration()) { - for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end(); - AI != AE; ++AI) - Vals.insert(&*AI); - for (Function::const_iterator FI = I->begin(), FE = I->end(); - FI != FE; ++FI) - for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); - BI != BE; ++BI) { - Vals.insert(&*BI); - for (User::const_op_iterator OI = BI->op_begin(), - OE = BI->op_end(); OI != OE; ++OI) - Vals.insert(*OI); - } - } - - } - return false; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AliasAnalysis::getAnalysisUsage(AU); - AU.setPreservesAll(); // Does not transform code - } - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(AnalysisID PI) override { - if (PI == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } - - //------------------------------------------------ - // Implement the AliasAnalysis API - // - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override { - assert(Vals.find(LocA.Ptr) != Vals.end() && - "Never seen value in AA before"); - assert(Vals.find(LocB.Ptr) != Vals.end() && - "Never seen value in AA before"); - return AliasAnalysis::alias(LocA, LocB); - } - - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override { - assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before"); - return AliasAnalysis::getModRefInfo(CS, Loc); - } - - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override { - return AliasAnalysis::getModRefInfo(CS1,CS2); - } - - bool pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) override { - assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before"); - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); - } - - void deleteValue(Value *V) override { - assert(Vals.find(V) != Vals.end() && "Never seen value in AA before"); - AliasAnalysis::deleteValue(V); - } - - }; -} - -char AliasDebugger::ID = 0; -INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa", - "AA use debugger", false, true, false) - -Pass *llvm::createAliasDebugger() { return new AliasDebugger(); } - diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 54d0f4304e1f..3094049b3cc3 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" @@ -167,8 +168,7 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, if (!UnknownInsts.empty()) { for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) if (AA.getModRefInfo(UnknownInsts[i], - MemoryLocation(Ptr, Size, AAInfo)) != - AliasAnalysis::NoModRef) + MemoryLocation(Ptr, Size, AAInfo)) != MRI_NoModRef) return true; } @@ -182,16 +182,14 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst, for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { ImmutableCallSite C1(getUnknownInst(i)), C2(Inst); - if (!C1 || !C2 || - AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef || - AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef) + if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef || + AA.getModRefInfo(C2, C1) != MRI_NoModRef) return true; } for (iterator I = begin(), E = end(); I != E; ++I) - if (AA.getModRefInfo( - Inst, MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())) != - AliasAnalysis::NoModRef) + if (AA.getModRefInfo(Inst, MemoryLocation(I.getPointer(), I.getSize(), + I.getAAInfo())) != MRI_NoModRef) return true; return false; @@ -223,7 +221,7 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr, if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue; if (!FoundSet) { // If this is the first alias set ptr can go into. - FoundSet = Cur; // Remember it. + FoundSet = &*Cur; // Remember it. } else { // Otherwise, we must merge the sets. FoundSet->mergeSetIn(*Cur, *this); // Merge in contents. } @@ -257,7 +255,7 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) { if (Cur->Forward || !Cur->aliasesUnknownInst(Inst, AA)) continue; if (!FoundSet) // If this is the first alias set ptr can go into. - FoundSet = Cur; // Remember it. + FoundSet = &*Cur; // Remember it. else if (!Cur->Forward) // Otherwise, we must merge the sets. FoundSet->mergeSetIn(*Cur, *this); // Merge in contents. } @@ -309,8 +307,9 @@ bool AliasSetTracker::add(LoadInst *LI) { AliasSet::AccessLattice Access = AliasSet::RefAccess; bool NewPtr; + const DataLayout &DL = LI->getModule()->getDataLayout(); AliasSet &AS = addPointer(LI->getOperand(0), - AA.getTypeStoreSize(LI->getType()), + DL.getTypeStoreSize(LI->getType()), AAInfo, Access, NewPtr); if (LI->isVolatile()) AS.setVolatile(); return NewPtr; @@ -324,9 +323,10 @@ bool AliasSetTracker::add(StoreInst *SI) { AliasSet::AccessLattice Access = AliasSet::ModAccess; bool NewPtr; + const DataLayout &DL = SI->getModule()->getDataLayout(); Value *Val = SI->getOperand(0); AliasSet &AS = addPointer(SI->getOperand(1), - AA.getTypeStoreSize(Val->getType()), + DL.getTypeStoreSize(Val->getType()), AAInfo, Access, NewPtr); if (SI->isVolatile()) AS.setVolatile(); return NewPtr; @@ -372,8 +372,8 @@ bool AliasSetTracker::add(Instruction *I) { } void AliasSetTracker::add(BasicBlock &BB) { - for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) - add(I); + for (auto &I : BB) + add(&I); } void AliasSetTracker::add(const AliasSetTracker &AST) { @@ -443,7 +443,8 @@ AliasSetTracker::remove(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) { } bool AliasSetTracker::remove(LoadInst *LI) { - uint64_t Size = AA.getTypeStoreSize(LI->getType()); + const DataLayout &DL = LI->getModule()->getDataLayout(); + uint64_t Size = DL.getTypeStoreSize(LI->getType()); AAMDNodes AAInfo; LI->getAAMetadata(AAInfo); @@ -455,7 +456,8 @@ bool AliasSetTracker::remove(LoadInst *LI) { } bool AliasSetTracker::remove(StoreInst *SI) { - uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType()); + const DataLayout &DL = SI->getModule()->getDataLayout(); + uint64_t Size = DL.getTypeStoreSize(SI->getOperand(0)->getType()); AAMDNodes AAInfo; SI->getAAMetadata(AAInfo); @@ -505,9 +507,6 @@ bool AliasSetTracker::remove(Instruction *I) { // dangling pointers to deleted instructions. // void AliasSetTracker::deleteValue(Value *PtrVal) { - // Notify the alias analysis implementation that this value is gone. - AA.deleteValue(PtrVal); - // If this is a call instruction, remove the callsite from the appropriate // AliasSet (if present). if (Instruction *Inst = dyn_cast(PtrVal)) { @@ -650,11 +649,12 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); - AU.addRequired(); + AU.addRequired(); } bool runOnFunction(Function &F) override { - Tracker = new AliasSetTracker(getAnalysis()); + auto &AAWP = getAnalysis(); + Tracker = new AliasSetTracker(AAWP.getAAResults()); for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) Tracker->add(&*I); @@ -668,6 +668,6 @@ namespace { char AliasSetPrinter::ID = 0; INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets", "Alias Set Printer", false, true) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets", "Alias Set Printer", false, true) diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 842ff0a14e2f..9c1ac000be2c 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -20,23 +20,23 @@ using namespace llvm; /// initializeAnalysis - Initialize all passes linked into the Analysis library. void llvm::initializeAnalysis(PassRegistry &Registry) { - initializeAliasAnalysisAnalysisGroup(Registry); - initializeAliasAnalysisCounterPass(Registry); initializeAAEvalPass(Registry); - initializeAliasDebuggerPass(Registry); initializeAliasSetPrinterPass(Registry); - initializeNoAAPass(Registry); - initializeBasicAliasAnalysisPass(Registry); - initializeBlockFrequencyInfoPass(Registry); - initializeBranchProbabilityInfoPass(Registry); + initializeBasicAAWrapperPassPass(Registry); + initializeBlockFrequencyInfoWrapperPassPass(Registry); + initializeBranchProbabilityInfoWrapperPassPass(Registry); + initializeCallGraphWrapperPassPass(Registry); + initializeCallGraphPrinterPass(Registry); + initializeCallGraphViewerPass(Registry); initializeCostModelAnalysisPass(Registry); initializeCFGViewerPass(Registry); initializeCFGPrinterPass(Registry); initializeCFGOnlyViewerPass(Registry); initializeCFGOnlyPrinterPass(Registry); - initializeCFLAliasAnalysisPass(Registry); + initializeCFLAAWrapperPassPass(Registry); initializeDependenceAnalysisPass(Registry); initializeDelinearizationPass(Registry); + initializeDemandedBitsPass(Registry); initializeDivergenceAnalysisPass(Registry); initializeDominanceFrontierPass(Registry); initializeDomViewerPass(Registry); @@ -47,34 +47,40 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializePostDomPrinterPass(Registry); initializePostDomOnlyViewerPass(Registry); initializePostDomOnlyPrinterPass(Registry); + initializeAAResultsWrapperPassPass(Registry); + initializeGlobalsAAWrapperPassPass(Registry); initializeIVUsersPass(Registry); initializeInstCountPass(Registry); initializeIntervalPartitionPass(Registry); initializeLazyValueInfoPass(Registry); - initializeLibCallAliasAnalysisPass(Registry); initializeLintPass(Registry); initializeLoopInfoWrapperPassPass(Registry); initializeMemDepPrinterPass(Registry); initializeMemDerefPrinterPass(Registry); initializeMemoryDependenceAnalysisPass(Registry); initializeModuleDebugInfoPrinterPass(Registry); + initializeObjCARCAAWrapperPassPass(Registry); initializePostDominatorTreePass(Registry); initializeRegionInfoPassPass(Registry); initializeRegionViewerPass(Registry); initializeRegionPrinterPass(Registry); initializeRegionOnlyViewerPass(Registry); initializeRegionOnlyPrinterPass(Registry); - initializeScalarEvolutionPass(Registry); - initializeScalarEvolutionAliasAnalysisPass(Registry); + initializeSCEVAAWrapperPassPass(Registry); + initializeScalarEvolutionWrapperPassPass(Registry); initializeTargetTransformInfoWrapperPassPass(Registry); - initializeTypeBasedAliasAnalysisPass(Registry); - initializeScopedNoAliasAAPass(Registry); + initializeTypeBasedAAWrapperPassPass(Registry); + initializeScopedNoAliasAAWrapperPassPass(Registry); } void LLVMInitializeAnalysis(LLVMPassRegistryRef R) { initializeAnalysis(*unwrap(R)); } +void LLVMInitializeIPA(LLVMPassRegistryRef R) { + initializeAnalysis(*unwrap(R)); +} + LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, char **OutMessages) { raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : nullptr; diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 35863542f437..00f346ea115d 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -13,24 +13,21 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" @@ -42,6 +39,18 @@ #include using namespace llvm; +/// Enable analysis of recursive PHI nodes. +static cl::opt EnableRecPhiAnalysis("basicaa-recphi", cl::Hidden, + cl::init(false)); + +/// SearchLimitReached / SearchTimes shows how often the limit of +/// to decompose GEPs is reached. It will affect the precision +/// of basic alias analysis. +#define DEBUG_TYPE "basicaa" +STATISTIC(SearchLimitReached, "Number of times the limit to " + "decompose GEPs is reached"); +STATISTIC(SearchTimes, "Number of times a GEP is decomposed"); + /// Cutoff after which to stop analysing a set of phi nodes potentially involved /// in a cycle. Because we are analysing 'through' phi nodes we need to be /// careful with value equivalence. We use reachability to make sure a value @@ -57,8 +66,8 @@ static const unsigned MaxLookupSearchDepth = 6; // Useful predicates //===----------------------------------------------------------------------===// -/// isNonEscapingLocalObject - Return true if the pointer is to a function-local -/// object that never escapes from the function. +/// Returns true if the pointer is to a function-local object that never +/// escapes from the function. static bool isNonEscapingLocalObject(const Value *V) { // If this is a local allocation, check to see if it escapes. if (isa(V) || isNoAliasCall(V)) @@ -82,8 +91,8 @@ static bool isNonEscapingLocalObject(const Value *V) { return false; } -/// isEscapeSource - Return true if the pointer is one which would have -/// been considered an escape by isNonEscapingLocalObject. +/// Returns true if the pointer is one which would have been considered an +/// escape by isNonEscapingLocalObject. static bool isEscapeSource(const Value *V) { if (isa(V) || isa(V) || isa(V)) return true; @@ -97,8 +106,7 @@ static bool isEscapeSource(const Value *V) { return false; } -/// getObjectSize - Return the size of the object specified by V, or -/// UnknownSize if unknown. +/// Returns the size of the object specified by V, or UnknownSize if unknown. static uint64_t getObjectSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, bool RoundToAlign = false) { @@ -108,8 +116,8 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL, return MemoryLocation::UnknownSize; } -/// isObjectSmallerThan - Return true if we can prove that the object specified -/// by V is smaller than Size. +/// Returns true if we can prove that the object specified by V is smaller than +/// Size. static bool isObjectSmallerThan(const Value *V, uint64_t Size, const DataLayout &DL, const TargetLibraryInfo &TLI) { @@ -144,15 +152,14 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size, // This function needs to use the aligned object size because we allow // reads a bit past the end given sufficient alignment. - uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/true); + uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/ true); return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size; } -/// isObjectSize - Return true if we can prove that the object specified -/// by V has size Size. -static bool isObjectSize(const Value *V, uint64_t Size, - const DataLayout &DL, const TargetLibraryInfo &TLI) { +/// Returns true if we can prove that the object specified by V has size Size. +static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, + const TargetLibraryInfo &TLI) { uint64_t ObjectSize = getObjectSize(V, DL, TLI); return ObjectSize != MemoryLocation::UnknownSize && ObjectSize == Size; } @@ -161,42 +168,20 @@ static bool isObjectSize(const Value *V, uint64_t Size, // GetElementPtr Instruction Decomposition and Analysis //===----------------------------------------------------------------------===// -namespace { - enum ExtensionKind { - EK_NotExtended, - EK_SignExt, - EK_ZeroExt - }; - - struct VariableGEPIndex { - const Value *V; - ExtensionKind Extension; - int64_t Scale; - - bool operator==(const VariableGEPIndex &Other) const { - return V == Other.V && Extension == Other.Extension && - Scale == Other.Scale; - } - - bool operator!=(const VariableGEPIndex &Other) const { - return !operator==(Other); - } - }; -} - - -/// GetLinearExpression - Analyze the specified value as a linear expression: -/// "A*V + B", where A and B are constant integers. Return the scale and offset -/// values as APInts and return V as a Value*, and return whether we looked -/// through any sign or zero extends. The incoming Value is known to have -/// IntegerType and it may already be sign or zero extended. +/// Analyzes the specified value as a linear expression: "A*V + B", where A and +/// B are constant integers. +/// +/// Returns the scale and offset values as APInts and return V as a Value*, and +/// return whether we looked through any sign or zero extends. The incoming +/// Value is known to have IntegerType and it may already be sign or zero +/// extended. /// /// Note that this looks through extends, so the high bits may not be /// represented in the result. -static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, - ExtensionKind &Extension, - const DataLayout &DL, unsigned Depth, - AssumptionCache *AC, DominatorTree *DT) { +/*static*/ const Value *BasicAAResult::GetLinearExpression( + const Value *V, APInt &Scale, APInt &Offset, unsigned &ZExtBits, + unsigned &SExtBits, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, DominatorTree *DT, bool &NSW, bool &NUW) { assert(V->getType()->isIntegerTy() && "Not an integer value"); // Limit our recursion depth. @@ -206,54 +191,125 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, return V; } - if (BinaryOperator *BOp = dyn_cast(V)) { + if (const ConstantInt *Const = dyn_cast(V)) { + // if it's a constant, just convert it to an offset and remove the variable. + // If we've been called recursively the Offset bit width will be greater + // than the constant's (the Offset's always as wide as the outermost call), + // so we'll zext here and process any extension in the isa & + // isa cases below. + Offset += Const->getValue().zextOrSelf(Offset.getBitWidth()); + assert(Scale == 0 && "Constant values don't have a scale"); + return V; + } + + if (const BinaryOperator *BOp = dyn_cast(V)) { if (ConstantInt *RHSC = dyn_cast(BOp->getOperand(1))) { + + // If we've been called recursively then Offset and Scale will be wider + // that the BOp operands. We'll always zext it here as we'll process sign + // extensions below (see the isa / isa cases). + APInt RHS = RHSC->getValue().zextOrSelf(Offset.getBitWidth()); + switch (BOp->getOpcode()) { - default: break; + default: + // We don't understand this instruction, so we can't decompose it any + // further. + Scale = 1; + Offset = 0; + return V; case Instruction::Or: // X|C == X+C if all the bits in C are unset in X. Otherwise we can't // analyze it. if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), DL, 0, AC, - BOp, DT)) - break; - // FALL THROUGH. + BOp, DT)) { + Scale = 1; + Offset = 0; + return V; + } + // FALL THROUGH. case Instruction::Add: - V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - DL, Depth + 1, AC, DT); - Offset += RHSC->getValue(); - return V; + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, + SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); + Offset += RHS; + break; + case Instruction::Sub: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, + SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); + Offset -= RHS; + break; case Instruction::Mul: - V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - DL, Depth + 1, AC, DT); - Offset *= RHSC->getValue(); - Scale *= RHSC->getValue(); - return V; + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, + SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); + Offset *= RHS; + Scale *= RHS; + break; case Instruction::Shl: - V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - DL, Depth + 1, AC, DT); - Offset <<= RHSC->getValue().getLimitedValue(); - Scale <<= RHSC->getValue().getLimitedValue(); + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, + SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); + Offset <<= RHS.getLimitedValue(); + Scale <<= RHS.getLimitedValue(); + // the semantics of nsw and nuw for left shifts don't match those of + // multiplications, so we won't propagate them. + NSW = NUW = false; return V; } + + if (isa(BOp)) { + NUW &= BOp->hasNoUnsignedWrap(); + NSW &= BOp->hasNoSignedWrap(); + } + return V; } } // Since GEP indices are sign extended anyway, we don't care about the high // bits of a sign or zero extended value - just scales and offsets. The // extensions have to be consistent though. - if ((isa(V) && Extension != EK_ZeroExt) || - (isa(V) && Extension != EK_SignExt)) { + if (isa(V) || isa(V)) { Value *CastOp = cast(V)->getOperand(0); - unsigned OldWidth = Scale.getBitWidth(); + unsigned NewWidth = V->getType()->getPrimitiveSizeInBits(); unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits(); - Scale = Scale.trunc(SmallWidth); - Offset = Offset.trunc(SmallWidth); - Extension = isa(V) ? EK_SignExt : EK_ZeroExt; + unsigned OldZExtBits = ZExtBits, OldSExtBits = SExtBits; + const Value *Result = + GetLinearExpression(CastOp, Scale, Offset, ZExtBits, SExtBits, DL, + Depth + 1, AC, DT, NSW, NUW); - Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, DL, - Depth + 1, AC, DT); - Scale = Scale.zext(OldWidth); - Offset = Offset.zext(OldWidth); + // zext(zext(%x)) == zext(%x), and similiarly for sext; we'll handle this + // by just incrementing the number of bits we've extended by. + unsigned ExtendedBy = NewWidth - SmallWidth; + + if (isa(V) && ZExtBits == 0) { + // sext(sext(%x, a), b) == sext(%x, a + b) + + if (NSW) { + // We haven't sign-wrapped, so it's valid to decompose sext(%x + c) + // into sext(%x) + sext(c). We'll sext the Offset ourselves: + unsigned OldWidth = Offset.getBitWidth(); + Offset = Offset.trunc(SmallWidth).sext(NewWidth).zextOrSelf(OldWidth); + } else { + // We may have signed-wrapped, so don't decompose sext(%x + c) into + // sext(%x) + sext(c) + Scale = 1; + Offset = 0; + Result = CastOp; + ZExtBits = OldZExtBits; + SExtBits = OldSExtBits; + } + SExtBits += ExtendedBy; + } else { + // sext(zext(%x, a), b) = zext(zext(%x, a), b) = zext(%x, a + b) + + if (!NUW) { + // We may have unsigned-wrapped, so don't decompose zext(%x + c) into + // zext(%x) + zext(c) + Scale = 1; + Offset = 0; + Result = CastOp; + ZExtBits = OldZExtBits; + SExtBits = OldSExtBits; + } + ZExtBits += ExtendedBy; + } return Result; } @@ -263,29 +319,27 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, return V; } -/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it -/// into a base pointer with a constant offset and a number of scaled symbolic -/// offsets. +/// If V is a symbolic pointer expression, decompose it into a base pointer +/// with a constant offset and a number of scaled symbolic offsets. /// -/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in -/// the VarIndices vector) are Value*'s that are known to be scaled by the -/// specified amount, but which may have other unrepresented high bits. As such, -/// the gep cannot necessarily be reconstructed from its decomposed form. +/// The scaled symbolic offsets (represented by pairs of a Value* and a scale +/// in the VarIndices vector) are Value*'s that are known to be scaled by the +/// specified amount, but which may have other unrepresented high bits. As +/// such, the gep cannot necessarily be reconstructed from its decomposed form. /// /// When DataLayout is around, this function is capable of analyzing everything /// that GetUnderlyingObject can look through. To be able to do that /// GetUnderlyingObject and DecomposeGEPExpression must use the same search -/// depth (MaxLookupSearchDepth). -/// When DataLayout not is around, it just looks through pointer casts. -/// -static const Value * -DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, - SmallVectorImpl &VarIndices, - bool &MaxLookupReached, const DataLayout &DL, - AssumptionCache *AC, DominatorTree *DT) { +/// depth (MaxLookupSearchDepth). When DataLayout not is around, it just looks +/// through pointer casts. +/*static*/ const Value *BasicAAResult::DecomposeGEPExpression( + const Value *V, int64_t &BaseOffs, + SmallVectorImpl &VarIndices, bool &MaxLookupReached, + const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT) { // Limit recursion depth to limit compile time in crazy cases. unsigned MaxLookup = MaxLookupSearchDepth; MaxLookupReached = false; + SearchTimes++; BaseOffs = 0; do { @@ -318,7 +372,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // updated when GetUnderlyingObject is updated). TLI should be // provided also. if (const Value *Simplified = - SimplifyInstruction(const_cast(I), DL)) { + SimplifyInstruction(const_cast(I), DL)) { V = Simplified; continue; } @@ -333,43 +387,47 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, unsigned AS = GEPOp->getPointerAddressSpace(); // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. gep_type_iterator GTI = gep_type_begin(GEPOp); - for (User::const_op_iterator I = GEPOp->op_begin()+1, - E = GEPOp->op_end(); I != E; ++I) { - Value *Index = *I; + for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end(); + I != E; ++I) { + const Value *Index = *I; // Compute the (potentially symbolic) offset in bytes for this index. if (StructType *STy = dyn_cast(*GTI++)) { // For a struct, add the member offset. unsigned FieldNo = cast(Index)->getZExtValue(); - if (FieldNo == 0) continue; + if (FieldNo == 0) + continue; BaseOffs += DL.getStructLayout(STy)->getElementOffset(FieldNo); continue; } // For an array/pointer, add the element offset, explicitly scaled. - if (ConstantInt *CIdx = dyn_cast(Index)) { - if (CIdx->isZero()) continue; + if (const ConstantInt *CIdx = dyn_cast(Index)) { + if (CIdx->isZero()) + continue; BaseOffs += DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue(); continue; } uint64_t Scale = DL.getTypeAllocSize(*GTI); - ExtensionKind Extension = EK_NotExtended; + unsigned ZExtBits = 0, SExtBits = 0; // If the integer type is smaller than the pointer size, it is implicitly // sign extended to pointer size. unsigned Width = Index->getType()->getIntegerBitWidth(); - if (DL.getPointerSizeInBits(AS) > Width) - Extension = EK_SignExt; + unsigned PointerSize = DL.getPointerSizeInBits(AS); + if (PointerSize > Width) + SExtBits += PointerSize - Width; // Use GetLinearExpression to decompose the index into a C1*V+C2 form. APInt IndexScale(Width, 0), IndexOffset(Width, 0); - Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, DL, - 0, AC, DT); + bool NSW = true, NUW = true; + Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits, + SExtBits, DL, 0, AC, DT, NSW, NUW); // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. - BaseOffs += IndexOffset.getSExtValue()*Scale; + BaseOffs += IndexOffset.getSExtValue() * Scale; Scale *= IndexScale.getSExtValue(); // If we already had an occurrence of this index variable, merge this @@ -377,23 +435,23 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // A[x][x] -> x*16 + x*4 -> x*20 // This also ensures that 'x' only appears in the index list once. for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) { - if (VarIndices[i].V == Index && - VarIndices[i].Extension == Extension) { + if (VarIndices[i].V == Index && VarIndices[i].ZExtBits == ZExtBits && + VarIndices[i].SExtBits == SExtBits) { Scale += VarIndices[i].Scale; - VarIndices.erase(VarIndices.begin()+i); + VarIndices.erase(VarIndices.begin() + i); break; } } // Make sure that we have a scale that makes sense for this target's // pointer size. - if (unsigned ShiftBits = 64 - DL.getPointerSizeInBits(AS)) { + if (unsigned ShiftBits = 64 - PointerSize) { Scale <<= ShiftBits; Scale = (int64_t)Scale >> ShiftBits; } if (Scale) { - VariableGEPIndex Entry = {Index, Extension, + VariableGEPIndex Entry = {Index, ZExtBits, SExtBits, static_cast(Scale)}; VarIndices.push_back(Entry); } @@ -405,196 +463,25 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // If the chain of expressions is too deep, just return early. MaxLookupReached = true; + SearchLimitReached++; return V; } -//===----------------------------------------------------------------------===// -// BasicAliasAnalysis Pass -//===----------------------------------------------------------------------===// - -#ifndef NDEBUG -static const Function *getParent(const Value *V) { - if (const Instruction *inst = dyn_cast(V)) - return inst->getParent()->getParent(); - - if (const Argument *arg = dyn_cast(V)) - return arg->getParent(); - - return nullptr; -} - -static bool notDifferentParent(const Value *O1, const Value *O2) { - - const Function *F1 = getParent(O1); - const Function *F2 = getParent(O2); - - return !F1 || !F2 || F1 == F2; -} -#endif - -namespace { - /// BasicAliasAnalysis - This is the primary alias analysis implementation. - struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis { - static char ID; // Class identification, replacement for typeinfo - BasicAliasAnalysis() : ImmutablePass(ID) { - initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry()); - } - - bool doInitialization(Module &M) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } - - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override { - assert(AliasCache.empty() && "AliasCache must be cleared after use!"); - assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && - "BasicAliasAnalysis doesn't support interprocedural queries."); - AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, - LocB.Ptr, LocB.Size, LocB.AATags); - // AliasCache rarely has more than 1 or 2 elements, always use - // shrink_and_clear so it quickly returns to the inline capacity of the - // SmallDenseMap if it ever grows larger. - // FIXME: This should really be shrink_to_inline_capacity_and_clear(). - AliasCache.shrink_and_clear(); - VisitedPhiBBs.clear(); - return Alias; - } - - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override; - - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override; - - /// pointsToConstantMemory - Chase pointers until we find a (constant - /// global) or not. - bool pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) override; - - /// Get the location associated with a pointer argument of a callsite. - ModRefResult getArgModRefInfo(ImmutableCallSite CS, - unsigned ArgIdx) override; - - /// getModRefBehavior - Return the behavior when calling the given - /// call site. - ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override; - - /// getModRefBehavior - Return the behavior when calling the given function. - /// For use when the call site is not known. - ModRefBehavior getModRefBehavior(const Function *F) override; - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } - - private: - // AliasCache - Track alias queries to guard against recursion. - typedef std::pair LocPair; - typedef SmallDenseMap AliasCacheTy; - AliasCacheTy AliasCache; - - /// \brief Track phi nodes we have visited. When interpret "Value" pointer - /// equality as value equality we need to make sure that the "Value" is not - /// part of a cycle. Otherwise, two uses could come from different - /// "iterations" of a cycle and see different values for the same "Value" - /// pointer. - /// The following example shows the problem: - /// %p = phi(%alloca1, %addr2) - /// %l = load %ptr - /// %addr1 = gep, %alloca2, 0, %l - /// %addr2 = gep %alloca2, 0, (%l + 1) - /// alias(%p, %addr1) -> MayAlias ! - /// store %l, ... - SmallPtrSet VisitedPhiBBs; - - // Visited - Track instructions visited by pointsToConstantMemory. - SmallPtrSet Visited; - - /// \brief Check whether two Values can be considered equivalent. - /// - /// In addition to pointer equivalence of \p V1 and \p V2 this checks - /// whether they can not be part of a cycle in the value graph by looking at - /// all visited phi nodes an making sure that the phis cannot reach the - /// value. We have to do this because we are looking through phi nodes (That - /// is we say noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB). - bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2); - - /// \brief Dest and Src are the variable indices from two decomposed - /// GetElementPtr instructions GEP1 and GEP2 which have common base - /// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic - /// difference between the two pointers. - void GetIndexDifference(SmallVectorImpl &Dest, - const SmallVectorImpl &Src); - - // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP - // instruction against another. - AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size, - const AAMDNodes &V1AAInfo, - const Value *V2, uint64_t V2Size, - const AAMDNodes &V2AAInfo, - const Value *UnderlyingV1, const Value *UnderlyingV2); - - // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI - // instruction against another. - AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize, - const AAMDNodes &PNAAInfo, - const Value *V2, uint64_t V2Size, - const AAMDNodes &V2AAInfo); - - /// aliasSelect - Disambiguate a Select instruction against another value. - AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize, - const AAMDNodes &SIAAInfo, - const Value *V2, uint64_t V2Size, - const AAMDNodes &V2AAInfo); - - AliasResult aliasCheck(const Value *V1, uint64_t V1Size, - AAMDNodes V1AATag, - const Value *V2, uint64_t V2Size, - AAMDNodes V2AATag); - }; -} // End of anonymous namespace - -// Register this pass... -char BasicAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa", - "Basic Alias Analysis (stateless AA impl)", - false, true, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa", - "Basic Alias Analysis (stateless AA impl)", - false, true, false) - - -ImmutablePass *llvm::createBasicAliasAnalysisPass() { - return new BasicAliasAnalysis(); -} - -/// pointsToConstantMemory - Returns whether the given pointer value -/// points to memory that is local to the function, with global constants being -/// considered local to all functions. -bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) { +/// Returns whether the given pointer value points to memory that is local to +/// the function, with global constants being considered local to all +/// functions. +bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc, + bool OrLocal) { assert(Visited.empty() && "Visited must be cleared after use!"); unsigned MaxLookup = 8; SmallVector Worklist; Worklist.push_back(Loc.Ptr); do { - const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), *DL); + const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL); if (!Visited.insert(V).second) { Visited.clear(); - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, OrLocal); } // An alloca instruction defines local memory. @@ -608,7 +495,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, // others. GV may even be a declaration, not a definition. if (!GV->isConstant()) { Visited.clear(); - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, OrLocal); } continue; } @@ -626,7 +513,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, // Don't bother inspecting phi nodes with many operands. if (PN->getNumIncomingValues() > MaxLookup) { Visited.clear(); - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, OrLocal); } for (Value *IncValue : PN->incoming_values()) Worklist.push_back(IncValue); @@ -635,7 +522,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, // Otherwise be conservative. Visited.clear(); - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, OrLocal); } while (!Worklist.empty() && --MaxLookup); @@ -660,62 +547,51 @@ static bool isMemsetPattern16(const Function *MS, return false; } -/// getModRefBehavior - Return the behavior when calling the given call site. -AliasAnalysis::ModRefBehavior -BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { +/// Returns the behavior when calling the given call site. +FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) { if (CS.doesNotAccessMemory()) // Can't do better than this. - return DoesNotAccessMemory; + return FMRB_DoesNotAccessMemory; - ModRefBehavior Min = UnknownModRefBehavior; + FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; // If the callsite knows it only reads memory, don't return worse // than that. if (CS.onlyReadsMemory()) - Min = OnlyReadsMemory; + Min = FMRB_OnlyReadsMemory; if (CS.onlyAccessesArgMemory()) - Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees); + Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees); - // The AliasAnalysis base class has some smarts, lets use them. - return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); + // The AAResultBase base class has some smarts, lets use them. + return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min); } -/// getModRefBehavior - Return the behavior when calling the given function. -/// For use when the call site is not known. -AliasAnalysis::ModRefBehavior -BasicAliasAnalysis::getModRefBehavior(const Function *F) { +/// Returns the behavior when calling the given function. For use when the call +/// site is not known. +FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) { // If the function declares it doesn't access memory, we can't do better. if (F->doesNotAccessMemory()) - return DoesNotAccessMemory; + return FMRB_DoesNotAccessMemory; - // For intrinsics, we can check the table. - if (Intrinsic::ID iid = F->getIntrinsicID()) { -#define GET_INTRINSIC_MODREF_BEHAVIOR -#include "llvm/IR/Intrinsics.gen" -#undef GET_INTRINSIC_MODREF_BEHAVIOR - } - - ModRefBehavior Min = UnknownModRefBehavior; + FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; // If the function declares it only reads memory, go with that. if (F->onlyReadsMemory()) - Min = OnlyReadsMemory; + Min = FMRB_OnlyReadsMemory; if (F->onlyAccessesArgMemory()) - Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees); + Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees); - const TargetLibraryInfo &TLI = - getAnalysis().getTLI(); if (isMemsetPattern16(F, TLI)) - Min = OnlyAccessesArgumentPointees; + Min = FMRB_OnlyAccessesArgumentPointees; // Otherwise be conservative. - return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); + return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min); } -AliasAnalysis::ModRefResult -BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { +ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS, + unsigned ArgIdx) { if (const IntrinsicInst *II = dyn_cast(CS.getInstruction())) switch (II->getIntrinsicID()) { default: @@ -725,7 +601,7 @@ BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { case Intrinsic::memmove: assert((ArgIdx == 0 || ArgIdx == 1) && "Invalid argument index for memory intrinsic"); - return ArgIdx ? Ref : Mod; + return ArgIdx ? MRI_Ref : MRI_Mod; } // We can bound the aliasing properties of memset_pattern16 just as we can @@ -733,40 +609,82 @@ BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16 // whenever possible. if (CS.getCalledFunction() && - isMemsetPattern16(CS.getCalledFunction(), *TLI)) { + isMemsetPattern16(CS.getCalledFunction(), TLI)) { assert((ArgIdx == 0 || ArgIdx == 1) && "Invalid argument index for memset_pattern16"); - return ArgIdx ? Ref : Mod; + return ArgIdx ? MRI_Ref : MRI_Mod; } // FIXME: Handle memset_pattern4 and memset_pattern8 also. - return AliasAnalysis::getArgModRefInfo(CS, ArgIdx); + if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly)) + return MRI_Ref; + + if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadNone)) + return MRI_NoModRef; + + return AAResultBase::getArgModRefInfo(CS, ArgIdx); } static bool isAssumeIntrinsic(ImmutableCallSite CS) { const IntrinsicInst *II = dyn_cast(CS.getInstruction()); - if (II && II->getIntrinsicID() == Intrinsic::assume) - return true; - - return false; + return II && II->getIntrinsicID() == Intrinsic::assume; } -bool BasicAliasAnalysis::doInitialization(Module &M) { - InitializeAliasAnalysis(this, &M.getDataLayout()); - return true; +#ifndef NDEBUG +static const Function *getParent(const Value *V) { + if (const Instruction *inst = dyn_cast(V)) + return inst->getParent()->getParent(); + + if (const Argument *arg = dyn_cast(V)) + return arg->getParent(); + + return nullptr; } -/// getModRefInfo - Check to see if the specified callsite can clobber the -/// specified memory object. Since we only look at local properties of this -/// function, we really can't say much about this query. We do, however, use -/// simple "address taken" analysis on local objects. -AliasAnalysis::ModRefResult -BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) { +static bool notDifferentParent(const Value *O1, const Value *O2) { + + const Function *F1 = getParent(O1); + const Function *F2 = getParent(O2); + + return !F1 || !F2 || F1 == F2; +} +#endif + +AliasResult BasicAAResult::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) { + assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && + "BasicAliasAnalysis doesn't support interprocedural queries."); + + // If we have a directly cached entry for these locations, we have recursed + // through this once, so just return the cached results. Notably, when this + // happens, we don't clear the cache. + auto CacheIt = AliasCache.find(LocPair(LocA, LocB)); + if (CacheIt != AliasCache.end()) + return CacheIt->second; + + AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr, + LocB.Size, LocB.AATags); + // AliasCache rarely has more than 1 or 2 elements, always use + // shrink_and_clear so it quickly returns to the inline capacity of the + // SmallDenseMap if it ever grows larger. + // FIXME: This should really be shrink_to_inline_capacity_and_clear(). + AliasCache.shrink_and_clear(); + VisitedPhiBBs.clear(); + return Alias; +} + +/// Checks to see if the specified callsite can clobber the specified memory +/// object. +/// +/// Since we only look at local properties of this function, we really can't +/// say much about this query. We do, however, use simple "address taken" +/// analysis on local objects. +ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) { assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) && "AliasAnalysis query involving multiple functions!"); - const Value *Object = GetUnderlyingObject(Loc.Ptr, *DL); + const Value *Object = GetUnderlyingObject(Loc.Ptr, DL); // If this is a tail call and Loc.Ptr points to a stack location, we know that // the tail call cannot access or modify the local stack. @@ -776,7 +694,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, if (isa(Object)) if (const CallInst *CI = dyn_cast(CS.getInstruction())) if (CI->isTailCall()) - return NoModRef; + return MRI_NoModRef; // If the pointer is to a locally allocated object that does not escape, // then the call can not mod/ref the pointer unless the call takes the pointer @@ -798,41 +716,42 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, // is impossible to alias the pointer we're checking. If not, we have to // assume that the call could touch the pointer, even though it doesn't // escape. - if (!isNoAlias(MemoryLocation(*CI), MemoryLocation(Object))) { + AliasResult AR = + getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object)); + if (AR) { PassedAsArg = true; break; } } if (!PassedAsArg) - return NoModRef; + return MRI_NoModRef; } // While the assume intrinsic is marked as arbitrarily writing so that // proper control dependencies will be maintained, it never aliases any // particular memory location. if (isAssumeIntrinsic(CS)) - return NoModRef; + return MRI_NoModRef; - // The AliasAnalysis base class has some smarts, lets use them. - return AliasAnalysis::getModRefInfo(CS, Loc); + // The AAResultBase base class has some smarts, lets use them. + return AAResultBase::getModRefInfo(CS, Loc); } -AliasAnalysis::ModRefResult -BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) { +ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { // While the assume intrinsic is marked as arbitrarily writing so that // proper control dependencies will be maintained, it never aliases any // particular memory location. if (isAssumeIntrinsic(CS1) || isAssumeIntrinsic(CS2)) - return NoModRef; + return MRI_NoModRef; - // The AliasAnalysis base class has some smarts, lets use them. - return AliasAnalysis::getModRefInfo(CS1, CS2); + // The AAResultBase base class has some smarts, lets use them. + return AAResultBase::getModRefInfo(CS1, CS2); } -/// \brief Provide ad-hoc rules to disambiguate accesses through two GEP -/// operators, both having the exact same pointer operand. +/// Provide ad-hoc rules to disambiguate accesses through two GEP operators, +/// both having the exact same pointer operand. static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size, const GEPOperator *GEP2, @@ -860,10 +779,9 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, ConstantInt *C2 = dyn_cast(GEP2->getOperand(GEP2->getNumOperands() - 1)); - // If the last (struct) indices aren't constants, we can't say anything. - // If they're identical, the other indices might be also be dynamically - // equal, so the GEPs can alias. - if (!C1 || !C2 || C1 == C2) + // If the last (struct) indices are constants and are equal, the other indices + // might be also be dynamically equal, so the GEPs can alias. + if (C1 && C2 && C1 == C2) return MayAlias; // Find the last-indexed type of the GEP, i.e., the type you'd get if @@ -886,12 +804,49 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, IntermediateIndices.push_back(GEP1->getOperand(i + 1)); } - StructType *LastIndexedStruct = - dyn_cast(GetElementPtrInst::getIndexedType( - GEP1->getSourceElementType(), IntermediateIndices)); + auto *Ty = GetElementPtrInst::getIndexedType( + GEP1->getSourceElementType(), IntermediateIndices); + StructType *LastIndexedStruct = dyn_cast(Ty); - if (!LastIndexedStruct) + if (isa(Ty)) { + // We know that: + // - both GEPs begin indexing from the exact same pointer; + // - the last indices in both GEPs are constants, indexing into a sequential + // type (array or pointer); + // - both GEPs only index through arrays prior to that. + // + // Because array indices greater than the number of elements are valid in + // GEPs, unless we know the intermediate indices are identical between + // GEP1 and GEP2 we cannot guarantee that the last indexed arrays don't + // partially overlap. We also need to check that the loaded size matches + // the element size, otherwise we could still have overlap. + const uint64_t ElementSize = + DL.getTypeStoreSize(cast(Ty)->getElementType()); + if (V1Size != ElementSize || V2Size != ElementSize) + return MayAlias; + + for (unsigned i = 0, e = GEP1->getNumIndices() - 1; i != e; ++i) + if (GEP1->getOperand(i + 1) != GEP2->getOperand(i + 1)) + return MayAlias; + + // Now we know that the array/pointer that GEP1 indexes into and that + // that GEP2 indexes into must either precisely overlap or be disjoint. + // Because they cannot partially overlap and because fields in an array + // cannot overlap, if we can prove the final indices are different between + // GEP1 and GEP2, we can conclude GEP1 and GEP2 don't alias. + + // If the last indices are constants, we've already checked they don't + // equal each other so we can exit early. + if (C1 && C2) + return NoAlias; + if (isKnownNonEqual(GEP1->getOperand(GEP1->getNumOperands() - 1), + GEP2->getOperand(GEP2->getNumOperands() - 1), + DL)) + return NoAlias; return MayAlias; + } else if (!LastIndexedStruct || !C1 || !C2) { + return MayAlias; + } // We know that: // - both GEPs begin indexing from the exact same pointer; @@ -925,39 +880,21 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, return MayAlias; } -/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction -/// against another pointer. We know that V1 is a GEP, but we don't know -/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, DL), -/// UnderlyingV2 is the same for V2. +/// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against +/// another pointer. /// -AliasResult BasicAliasAnalysis::aliasGEP( - const GEPOperator *GEP1, uint64_t V1Size, const AAMDNodes &V1AAInfo, - const Value *V2, uint64_t V2Size, const AAMDNodes &V2AAInfo, - const Value *UnderlyingV1, const Value *UnderlyingV2) { +/// We know that V1 is a GEP, but we don't know anything about V2. +/// UnderlyingV1 is GetUnderlyingObject(GEP1, DL), UnderlyingV2 is the same for +/// V2. +AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, + const AAMDNodes &V1AAInfo, const Value *V2, + uint64_t V2Size, const AAMDNodes &V2AAInfo, + const Value *UnderlyingV1, + const Value *UnderlyingV2) { int64_t GEP1BaseOffset; bool GEP1MaxLookupReached; SmallVector GEP1VariableIndices; - // We have to get two AssumptionCaches here because GEP1 and V2 may be from - // different functions. - // FIXME: This really doesn't make any sense. We get a dominator tree below - // that can only refer to a single function. But this function (aliasGEP) is - // a method on an immutable pass that can be called when there *isn't* - // a single function. The old pass management layer makes this "work", but - // this isn't really a clean solution. - AssumptionCacheTracker &ACT = getAnalysis(); - AssumptionCache *AC1 = nullptr, *AC2 = nullptr; - if (auto *GEP1I = dyn_cast(GEP1)) - AC1 = &ACT.getAssumptionCache( - const_cast(*GEP1I->getParent()->getParent())); - if (auto *I2 = dyn_cast(V2)) - AC2 = &ACT.getAssumptionCache( - const_cast(*I2->getParent()->getParent())); - - DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - // If we have two gep instructions with must-alias or not-alias'ing base // pointers, figure out if the indexes to the GEP tell us anything about the // derived pointer. @@ -971,9 +908,8 @@ AliasResult BasicAliasAnalysis::aliasGEP( // identical. if ((BaseAlias == MayAlias) && V1Size == V2Size) { // Do the base pointers alias assuming type and size. - AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, - V1AAInfo, UnderlyingV2, - V2Size, V2AAInfo); + AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, V1AAInfo, + UnderlyingV2, V2Size, V2AAInfo); if (PreciseBaseAlias == NoAlias) { // See if the computed offset from the common pointer tells us about the // relation of the resulting pointer. @@ -982,15 +918,15 @@ AliasResult BasicAliasAnalysis::aliasGEP( SmallVector GEP2VariableIndices; const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, - GEP2MaxLookupReached, *DL, AC2, DT); + GEP2MaxLookupReached, DL, &AC, DT); const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, *DL, AC1, DT); + GEP1MaxLookupReached, DL, &AC, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. + // FIXME: They always have a DataLayout so this should become an + // assert. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { - assert(!DL && - "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } // If the max search depth is reached the result is undefined @@ -1007,35 +943,35 @@ AliasResult BasicAliasAnalysis::aliasGEP( // If we get a No or May, then return it immediately, no amount of analysis // will improve this situation. - if (BaseAlias != MustAlias) return BaseAlias; + if (BaseAlias != MustAlias) + return BaseAlias; // Otherwise, we have a MustAlias. Since the base pointers alias each other // exactly, see if the computed offset from the common pointer tells us // about the relation of the resulting pointer. const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, *DL, AC1, DT); + GEP1MaxLookupReached, DL, &AC, DT); int64_t GEP2BaseOffset; bool GEP2MaxLookupReached; SmallVector GEP2VariableIndices; const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, - GEP2MaxLookupReached, *DL, AC2, DT); + GEP2MaxLookupReached, DL, &AC, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. + // FIXME: They always have a DataLayout so this should become an assert. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { - assert(!DL && - "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } // If we know the two GEPs are based off of the exact same pointer (and not // just the same underlying object), see if that tells us anything about // the resulting pointers. - if (DL && GEP1->getPointerOperand() == GEP2->getPointerOperand()) { - AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, *DL); + if (GEP1->getPointerOperand() == GEP2->getPointerOperand()) { + AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL); // If we couldn't find anything interesting, don't abandon just yet. if (R != MayAlias) return R; @@ -1072,13 +1008,12 @@ AliasResult BasicAliasAnalysis::aliasGEP( const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, *DL, AC1, DT); + GEP1MaxLookupReached, DL, &AC, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. + // FIXME: They always have a DataLayout so this should become an assert. if (GEP1BasePtr != UnderlyingV1) { - assert(!DL && - "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } // If the max search depth is reached the result is undefined @@ -1124,12 +1059,42 @@ AliasResult BasicAliasAnalysis::aliasGEP( } } - // Try to distinguish something like &A[i][1] against &A[42][0]. - // Grab the least significant bit set in any of the scales. if (!GEP1VariableIndices.empty()) { uint64_t Modulo = 0; - for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) - Modulo |= (uint64_t) GEP1VariableIndices[i].Scale; + bool AllPositive = true; + for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) { + + // Try to distinguish something like &A[i][1] against &A[42][0]. + // Grab the least significant bit set in any of the scales. We + // don't need std::abs here (even if the scale's negative) as we'll + // be ^'ing Modulo with itself later. + Modulo |= (uint64_t)GEP1VariableIndices[i].Scale; + + if (AllPositive) { + // If the Value could change between cycles, then any reasoning about + // the Value this cycle may not hold in the next cycle. We'll just + // give up if we can't determine conditions that hold for every cycle: + const Value *V = GEP1VariableIndices[i].V; + + bool SignKnownZero, SignKnownOne; + ComputeSignBit(const_cast(V), SignKnownZero, SignKnownOne, DL, + 0, &AC, nullptr, DT); + + // Zero-extension widens the variable, and so forces the sign + // bit to zero. + bool IsZExt = GEP1VariableIndices[i].ZExtBits > 0 || isa(V); + SignKnownZero |= IsZExt; + SignKnownOne &= !IsZExt; + + // If the variable begins with a zero then we know it's + // positive, regardless of whether the value is signed or + // unsigned. + int64_t Scale = GEP1VariableIndices[i].Scale; + AllPositive = + (SignKnownZero && Scale >= 0) || (SignKnownOne && Scale < 0); + } + } + Modulo = Modulo ^ (Modulo & (Modulo - 1)); // We can compute the difference between the two addresses @@ -1140,6 +1105,16 @@ AliasResult BasicAliasAnalysis::aliasGEP( V2Size != MemoryLocation::UnknownSize && ModOffset >= V2Size && V1Size <= Modulo - ModOffset) return NoAlias; + + // If we know all the variables are positive, then GEP1 >= GEP1BasePtr. + // If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers + // don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr. + if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t)GEP1BaseOffset) + return NoAlias; + + if (constantOffsetHeuristic(GEP1VariableIndices, V1Size, V2Size, + GEP1BaseOffset, &AC, DT)) + return NoAlias; } // Statically, we can see that the base objects are the same, but the @@ -1164,46 +1139,44 @@ static AliasResult MergeAliasResults(AliasResult A, AliasResult B) { return MayAlias; } -/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select -/// instruction against another. -AliasResult BasicAliasAnalysis::aliasSelect(const SelectInst *SI, - uint64_t SISize, - const AAMDNodes &SIAAInfo, - const Value *V2, uint64_t V2Size, - const AAMDNodes &V2AAInfo) { +/// Provides a bunch of ad-hoc rules to disambiguate a Select instruction +/// against another. +AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize, + const AAMDNodes &SIAAInfo, + const Value *V2, uint64_t V2Size, + const AAMDNodes &V2AAInfo) { // If the values are Selects with the same condition, we can do a more precise // check: just check for aliases between the values on corresponding arms. if (const SelectInst *SI2 = dyn_cast(V2)) if (SI->getCondition() == SI2->getCondition()) { - AliasResult Alias = - aliasCheck(SI->getTrueValue(), SISize, SIAAInfo, - SI2->getTrueValue(), V2Size, V2AAInfo); + AliasResult Alias = aliasCheck(SI->getTrueValue(), SISize, SIAAInfo, + SI2->getTrueValue(), V2Size, V2AAInfo); if (Alias == MayAlias) return MayAlias; AliasResult ThisAlias = - aliasCheck(SI->getFalseValue(), SISize, SIAAInfo, - SI2->getFalseValue(), V2Size, V2AAInfo); + aliasCheck(SI->getFalseValue(), SISize, SIAAInfo, + SI2->getFalseValue(), V2Size, V2AAInfo); return MergeAliasResults(ThisAlias, Alias); } // If both arms of the Select node NoAlias or MustAlias V2, then returns // NoAlias / MustAlias. Otherwise, returns MayAlias. AliasResult Alias = - aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo); + aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo); if (Alias == MayAlias) return MayAlias; AliasResult ThisAlias = - aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo); + aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo); return MergeAliasResults(ThisAlias, Alias); } -// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction -// against another. -AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, - const AAMDNodes &PNAAInfo, - const Value *V2, uint64_t V2Size, - const AAMDNodes &V2AAInfo) { +/// Provide a bunch of ad-hoc rules to disambiguate a PHI instruction against +/// another. +AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize, + const AAMDNodes &PNAAInfo, const Value *V2, + uint64_t V2Size, + const AAMDNodes &V2AAInfo) { // Track phi nodes we have visited. We use this information when we determine // value equivalence. VisitedPhiBBs.insert(PN->getParent()); @@ -1232,9 +1205,9 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { AliasResult ThisAlias = - aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo, - PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), - V2Size, V2AAInfo); + aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo, + PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), + V2Size, V2AAInfo); Alias = MergeAliasResults(ThisAlias, Alias); if (Alias == MayAlias) break; @@ -1247,8 +1220,9 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, return Alias; } - SmallPtrSet UniqueSrc; - SmallVector V1Srcs; + SmallPtrSet UniqueSrc; + SmallVector V1Srcs; + bool isRecursive = false; for (Value *PV1 : PN->incoming_values()) { if (isa(PV1)) // If any of the source itself is a PHI, return MayAlias conservatively @@ -1256,12 +1230,33 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, // sides are PHI nodes. In which case, this is O(m x n) time where 'm' // and 'n' are the number of PHI sources. return MayAlias; + + if (EnableRecPhiAnalysis) + if (GEPOperator *PV1GEP = dyn_cast(PV1)) { + // Check whether the incoming value is a GEP that advances the pointer + // result of this PHI node (e.g. in a loop). If this is the case, we + // would recurse and always get a MayAlias. Handle this case specially + // below. + if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 && + isa(PV1GEP->idx_begin())) { + isRecursive = true; + continue; + } + } + if (UniqueSrc.insert(PV1).second) V1Srcs.push_back(PV1); } - AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo, - V1Srcs[0], PNSize, PNAAInfo); + // If this PHI node is recursive, set the size of the accessed memory to + // unknown to represent all the possible values the GEP could advance the + // pointer to. + if (isRecursive) + PNSize = MemoryLocation::UnknownSize; + + AliasResult Alias = + aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], PNSize, PNAAInfo); + // Early exit if the check of the first PHI source against V2 is MayAlias. // Other results are not possible. if (Alias == MayAlias) @@ -1272,8 +1267,8 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) { Value *V = V1Srcs[i]; - AliasResult ThisAlias = aliasCheck(V2, V2Size, V2AAInfo, - V, PNSize, PNAAInfo); + AliasResult ThisAlias = + aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo); Alias = MergeAliasResults(ThisAlias, Alias); if (Alias == MayAlias) break; @@ -1282,13 +1277,11 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, return Alias; } -// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases, -// such as array references. -// -AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, - AAMDNodes V1AAInfo, const Value *V2, - uint64_t V2Size, - AAMDNodes V2AAInfo) { +/// Provides a bunch of ad-hoc rules to disambiguate in common cases, such as +/// array references. +AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, + AAMDNodes V1AAInfo, const Value *V2, + uint64_t V2Size, AAMDNodes V2AAInfo) { // If either of the memory references is empty, it doesn't matter what the // pointer values are. if (V1Size == 0 || V2Size == 0) @@ -1313,11 +1306,11 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, return MustAlias; if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy()) - return NoAlias; // Scalars cannot alias each other + return NoAlias; // Scalars cannot alias each other // Figure out what objects these things are pointing to if we can. - const Value *O1 = GetUnderlyingObject(V1, *DL, MaxLookupSearchDepth); - const Value *O2 = GetUnderlyingObject(V2, *DL, MaxLookupSearchDepth); + const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth); + const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth); // Null values in the default address space don't point to any object, so they // don't alias any other pointer. @@ -1366,12 +1359,11 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. - if (DL) - if ((V1Size != MemoryLocation::UnknownSize && - isObjectSmallerThan(O2, V1Size, *DL, *TLI)) || - (V2Size != MemoryLocation::UnknownSize && - isObjectSmallerThan(O1, V2Size, *DL, *TLI))) - return NoAlias; + if ((V1Size != MemoryLocation::UnknownSize && + isObjectSmallerThan(O2, V1Size, DL, TLI)) || + (V2Size != MemoryLocation::UnknownSize && + isObjectSmallerThan(O1, V2Size, DL, TLI))) + return NoAlias; // Check the cache before climbing up use-def chains. This also terminates // otherwise infinitely recursive queries. @@ -1380,7 +1372,7 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, if (V1 > V2) std::swap(Locs.first, Locs.second); std::pair Pair = - AliasCache.insert(std::make_pair(Locs, MayAlias)); + AliasCache.insert(std::make_pair(Locs, MayAlias)); if (!Pair.second) return Pair.first->second; @@ -1393,8 +1385,10 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, std::swap(V1AAInfo, V2AAInfo); } if (const GEPOperator *GV1 = dyn_cast(V1)) { - AliasResult Result = aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2); - if (Result != MayAlias) return AliasCache[Locs] = Result; + AliasResult Result = + aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2); + if (Result != MayAlias) + return AliasCache[Locs] = Result; } if (isa(V2) && !isa(V1)) { @@ -1403,9 +1397,9 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, std::swap(V1AAInfo, V2AAInfo); } if (const PHINode *PN = dyn_cast(V1)) { - AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, - V2, V2Size, V2AAInfo); - if (Result != MayAlias) return AliasCache[Locs] = Result; + AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo); + if (Result != MayAlias) + return AliasCache[Locs] = Result; } if (isa(V2) && !isa(V1)) { @@ -1414,29 +1408,38 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, std::swap(V1AAInfo, V2AAInfo); } if (const SelectInst *S1 = dyn_cast(V1)) { - AliasResult Result = aliasSelect(S1, V1Size, V1AAInfo, - V2, V2Size, V2AAInfo); - if (Result != MayAlias) return AliasCache[Locs] = Result; + AliasResult Result = + aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo); + if (Result != MayAlias) + return AliasCache[Locs] = Result; } // If both pointers are pointing into the same object and one of them // accesses is accessing the entire object, then the accesses must // overlap in some way. - if (DL && O1 == O2) + if (O1 == O2) if ((V1Size != MemoryLocation::UnknownSize && - isObjectSize(O1, V1Size, *DL, *TLI)) || + isObjectSize(O1, V1Size, DL, TLI)) || (V2Size != MemoryLocation::UnknownSize && - isObjectSize(O2, V2Size, *DL, *TLI))) + isObjectSize(O2, V2Size, DL, TLI))) return AliasCache[Locs] = PartialAlias; - AliasResult Result = - AliasAnalysis::alias(MemoryLocation(V1, V1Size, V1AAInfo), - MemoryLocation(V2, V2Size, V2AAInfo)); + // Recurse back into the best AA results we have, potentially with refined + // memory locations. We have already ensured that BasicAA has a MayAlias + // cache result for these, so any recursion back into BasicAA won't loop. + AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second); return AliasCache[Locs] = Result; } -bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V, - const Value *V2) { +/// Check whether two Values can be considered equivalent. +/// +/// In addition to pointer equivalence of \p V1 and \p V2 this checks whether +/// they can not be part of a cycle in the value graph by looking at all +/// visited phi nodes an making sure that the phis cannot reach the value. We +/// have to do this because we are looking through phi nodes (That is we say +/// noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB). +bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V, + const Value *V2) { if (V != V2) return false; @@ -1450,28 +1453,21 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V, if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck) return false; - // Use dominance or loop info if available. - DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto *LIWP = getAnalysisIfAvailable(); - LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - // Make sure that the visited phis cannot reach the Value. This ensures that // the Values cannot come from different iterations of a potential cycle the // phi nodes could be involved in. for (auto *P : VisitedPhiBBs) - if (isPotentiallyReachable(P->begin(), Inst, DT, LI)) + if (isPotentiallyReachable(&P->front(), Inst, DT, LI)) return false; return true; } -/// GetIndexDifference - Dest and Src are the variable indices from two -/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base -/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic -/// difference between the two pointers. -void BasicAliasAnalysis::GetIndexDifference( +/// Computes the symbolic difference between two de-composed GEPs. +/// +/// Dest and Src are the variable indices from two decomposed GetElementPtr +/// instructions GEP1 and GEP2 which have common base pointers. +void BasicAAResult::GetIndexDifference( SmallVectorImpl &Dest, const SmallVectorImpl &Src) { if (Src.empty()) @@ -1479,14 +1475,14 @@ void BasicAliasAnalysis::GetIndexDifference( for (unsigned i = 0, e = Src.size(); i != e; ++i) { const Value *V = Src[i].V; - ExtensionKind Extension = Src[i].Extension; + unsigned ZExtBits = Src[i].ZExtBits, SExtBits = Src[i].SExtBits; int64_t Scale = Src[i].Scale; // Find V in Dest. This is N^2, but pointer indices almost never have more // than a few variable indexes. for (unsigned j = 0, e = Dest.size(); j != e; ++j) { if (!isValueEqualInPotentialCycles(Dest[j].V, V) || - Dest[j].Extension != Extension) + Dest[j].ZExtBits != ZExtBits || Dest[j].SExtBits != SExtBits) continue; // If we found it, subtract off Scale V's from the entry in Dest. If it @@ -1501,8 +1497,120 @@ void BasicAliasAnalysis::GetIndexDifference( // If we didn't consume this entry, add it to the end of the Dest list. if (Scale) { - VariableGEPIndex Entry = { V, Extension, -Scale }; + VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale}; Dest.push_back(Entry); } } } + +bool BasicAAResult::constantOffsetHeuristic( + const SmallVectorImpl &VarIndices, uint64_t V1Size, + uint64_t V2Size, int64_t BaseOffset, AssumptionCache *AC, + DominatorTree *DT) { + if (VarIndices.size() != 2 || V1Size == MemoryLocation::UnknownSize || + V2Size == MemoryLocation::UnknownSize) + return false; + + const VariableGEPIndex &Var0 = VarIndices[0], &Var1 = VarIndices[1]; + + if (Var0.ZExtBits != Var1.ZExtBits || Var0.SExtBits != Var1.SExtBits || + Var0.Scale != -Var1.Scale) + return false; + + unsigned Width = Var1.V->getType()->getIntegerBitWidth(); + + // We'll strip off the Extensions of Var0 and Var1 and do another round + // of GetLinearExpression decomposition. In the example above, if Var0 + // is zext(%x + 1) we should get V1 == %x and V1Offset == 1. + + APInt V0Scale(Width, 0), V0Offset(Width, 0), V1Scale(Width, 0), + V1Offset(Width, 0); + bool NSW = true, NUW = true; + unsigned V0ZExtBits = 0, V0SExtBits = 0, V1ZExtBits = 0, V1SExtBits = 0; + const Value *V0 = GetLinearExpression(Var0.V, V0Scale, V0Offset, V0ZExtBits, + V0SExtBits, DL, 0, AC, DT, NSW, NUW); + NSW = true, NUW = true; + const Value *V1 = GetLinearExpression(Var1.V, V1Scale, V1Offset, V1ZExtBits, + V1SExtBits, DL, 0, AC, DT, NSW, NUW); + + if (V0Scale != V1Scale || V0ZExtBits != V1ZExtBits || + V0SExtBits != V1SExtBits || !isValueEqualInPotentialCycles(V0, V1)) + return false; + + // We have a hit - Var0 and Var1 only differ by a constant offset! + + // If we've been sext'ed then zext'd the maximum difference between Var0 and + // Var1 is possible to calculate, but we're just interested in the absolute + // minimum difference between the two. The minimum distance may occur due to + // wrapping; consider "add i3 %i, 5": if %i == 7 then 7 + 5 mod 8 == 4, and so + // the minimum distance between %i and %i + 5 is 3. + APInt MinDiff = V0Offset - V1Offset, Wrapped = -MinDiff; + MinDiff = APIntOps::umin(MinDiff, Wrapped); + uint64_t MinDiffBytes = MinDiff.getZExtValue() * std::abs(Var0.Scale); + + // We can't definitely say whether GEP1 is before or after V2 due to wrapping + // arithmetic (i.e. for some values of GEP1 and V2 GEP1 < V2, and for other + // values GEP1 > V2). We'll therefore only declare NoAlias if both V1Size and + // V2Size can fit in the MinDiffBytes gap. + return V1Size + std::abs(BaseOffset) <= MinDiffBytes && + V2Size + std::abs(BaseOffset) <= MinDiffBytes; +} + +//===----------------------------------------------------------------------===// +// BasicAliasAnalysis Pass +//===----------------------------------------------------------------------===// + +char BasicAA::PassID; + +BasicAAResult BasicAA::run(Function &F, AnalysisManager *AM) { + return BasicAAResult(F.getParent()->getDataLayout(), + AM->getResult(F), + AM->getResult(F), + AM->getCachedResult(F), + AM->getCachedResult(F)); +} + +BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) { + initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +char BasicAAWrapperPass::ID = 0; +void BasicAAWrapperPass::anchor() {} + +INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa", + "Basic Alias Analysis (stateless AA impl)", true, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa", + "Basic Alias Analysis (stateless AA impl)", true, true) + +FunctionPass *llvm::createBasicAAWrapperPass() { + return new BasicAAWrapperPass(); +} + +bool BasicAAWrapperPass::runOnFunction(Function &F) { + auto &ACT = getAnalysis(); + auto &TLIWP = getAnalysis(); + auto *DTWP = getAnalysisIfAvailable(); + auto *LIWP = getAnalysisIfAvailable(); + + Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), TLIWP.getTLI(), + ACT.getAssumptionCache(F), + DTWP ? &DTWP->getDomTree() : nullptr, + LIWP ? &LIWP->getLoopInfo() : nullptr)); + + return false; +} + +void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); +} + +BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) { + return BasicAAResult( + F.getParent()->getDataLayout(), + P.getAnalysis().getTLI(), + P.getAnalysis().getAssumptionCache(F)); +} diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp index 3d819eb596d4..90b7a339a0fe 100644 --- a/lib/Analysis/BlockFrequencyInfo.cpp +++ b/lib/Analysis/BlockFrequencyInfo.cpp @@ -55,7 +55,7 @@ struct GraphTraits { typedef Function::const_iterator nodes_iterator; static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) { - return G->getFunction()->begin(); + return &G->getFunction()->front(); } static ChildIteratorType child_begin(const NodeType *N) { return succ_begin(N); @@ -105,51 +105,36 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { } // end namespace llvm #endif -INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", - "Block Frequency Analysis", true, true) -INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", - "Block Frequency Analysis", true, true) +BlockFrequencyInfo::BlockFrequencyInfo() {} -char BlockFrequencyInfo::ID = 0; - - -BlockFrequencyInfo::BlockFrequencyInfo() : FunctionPass(ID) { - initializeBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); +BlockFrequencyInfo::BlockFrequencyInfo(const Function &F, + const BranchProbabilityInfo &BPI, + const LoopInfo &LI) { + calculate(F, BPI, LI); } -BlockFrequencyInfo::~BlockFrequencyInfo() {} - -void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - AU.setPreservesAll(); -} - -bool BlockFrequencyInfo::runOnFunction(Function &F) { - BranchProbabilityInfo &BPI = getAnalysis(); - LoopInfo &LI = getAnalysis().getLoopInfo(); +void BlockFrequencyInfo::calculate(const Function &F, + const BranchProbabilityInfo &BPI, + const LoopInfo &LI) { if (!BFI) BFI.reset(new ImplType); - BFI->doFunction(&F, &BPI, &LI); + BFI->calculate(F, BPI, LI); #ifndef NDEBUG if (ViewBlockFreqPropagationDAG != GVDT_None) view(); #endif - return false; -} - -void BlockFrequencyInfo::releaseMemory() { BFI.reset(); } - -void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const { - if (BFI) BFI->print(O); } BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const { return BFI ? BFI->getBlockFreq(BB) : 0; } +void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, + uint64_t Freq) { + assert(BFI && "Expected analysis to be available"); + BFI->setBlockFreq(BB, Freq); +} + /// Pop up a ghostview window with the current block frequency propagation /// rendered using dot. void BlockFrequencyInfo::view() const { @@ -180,3 +165,49 @@ BlockFrequencyInfo::printBlockFreq(raw_ostream &OS, uint64_t BlockFrequencyInfo::getEntryFreq() const { return BFI ? BFI->getEntryFreq() : 0; } + +void BlockFrequencyInfo::releaseMemory() { BFI.reset(); } + +void BlockFrequencyInfo::print(raw_ostream &OS) const { + if (BFI) + BFI->print(OS); +} + + +INITIALIZE_PASS_BEGIN(BlockFrequencyInfoWrapperPass, "block-freq", + "Block Frequency Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(BlockFrequencyInfoWrapperPass, "block-freq", + "Block Frequency Analysis", true, true) + +char BlockFrequencyInfoWrapperPass::ID = 0; + + +BlockFrequencyInfoWrapperPass::BlockFrequencyInfoWrapperPass() + : FunctionPass(ID) { + initializeBlockFrequencyInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +BlockFrequencyInfoWrapperPass::~BlockFrequencyInfoWrapperPass() {} + +void BlockFrequencyInfoWrapperPass::print(raw_ostream &OS, + const Module *) const { + BFI.print(OS); +} + +void BlockFrequencyInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); +} + +void BlockFrequencyInfoWrapperPass::releaseMemory() { BFI.releaseMemory(); } + +bool BlockFrequencyInfoWrapperPass::runOnFunction(Function &F) { + BranchProbabilityInfo &BPI = + getAnalysis().getBPI(); + LoopInfo &LI = getAnalysis().getLoopInfo(); + BFI.calculate(F, BPI, LI); + return false; +} diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp index 6ceda06aac14..48e23af2690a 100644 --- a/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -530,6 +530,13 @@ BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const { return Freqs[Node.Index].Scaled; } +void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node, + uint64_t Freq) { + assert(Node.isValid() && "Expected valid node"); + assert(Node.Index < Freqs.size() && "Expected legal index"); + Freqs[Node.Index].Integer = Freq; +} + std::string BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const { return std::string(); @@ -743,7 +750,10 @@ void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) { auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)]; DEBUG(dbgs() << " - Add back edge mass for node " << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n"); - Dist.addLocal(HeaderNode, BackedgeMass.getMass()); + if (BackedgeMass.getMass() > 0) + Dist.addLocal(HeaderNode, BackedgeMass.getMass()); + else + DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n"); } DitheringDistributer D(Dist, LoopMass); diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 430b41241edf..cf0cc8da6ef8 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -27,13 +27,13 @@ using namespace llvm; #define DEBUG_TYPE "branch-prob" -INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob", +INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob", +INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) -char BranchProbabilityInfo::ID = 0; +char BranchProbabilityInfoWrapperPass::ID = 0; // Weights are for internal use only. They are used by heuristics to help to // estimate edges' probability. Example: @@ -108,13 +108,6 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1; /// instruction. This is essentially never taken. static const uint32_t IH_NONTAKEN_WEIGHT = 1; -// Standard weight value. Used when none of the heuristics set weight for -// the edge. -static const uint32_t NORMAL_WEIGHT = 16; - -// Minimum weight of an edge. Please note, that weight is NEVER 0. -static const uint32_t MIN_WEIGHT = 1; - /// \brief Calculate edge weights for successors lead to unreachable. /// /// Predict that a successor which leads necessarily to an @@ -147,22 +140,34 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty()) return false; - uint32_t UnreachableWeight = - std::max(UR_TAKEN_WEIGHT / (unsigned)UnreachableEdges.size(), MIN_WEIGHT); - for (SmallVectorImpl::iterator I = UnreachableEdges.begin(), - E = UnreachableEdges.end(); - I != E; ++I) - setEdgeWeight(BB, *I, UnreachableWeight); + // If the terminator is an InvokeInst, check only the normal destination block + // as the unwind edge of InvokeInst is also very unlikely taken. + if (auto *II = dyn_cast(TI)) + if (PostDominatedByUnreachable.count(II->getNormalDest())) { + PostDominatedByUnreachable.insert(BB); + // Return false here so that edge weights for InvokeInst could be decided + // in calcInvokeHeuristics(). + return false; + } - if (ReachableEdges.empty()) + if (ReachableEdges.empty()) { + BranchProbability Prob(1, UnreachableEdges.size()); + for (unsigned SuccIdx : UnreachableEdges) + setEdgeProbability(BB, SuccIdx, Prob); return true; - uint32_t ReachableWeight = - std::max(UR_NONTAKEN_WEIGHT / (unsigned)ReachableEdges.size(), - NORMAL_WEIGHT); - for (SmallVectorImpl::iterator I = ReachableEdges.begin(), - E = ReachableEdges.end(); - I != E; ++I) - setEdgeWeight(BB, *I, ReachableWeight); + } + + BranchProbability UnreachableProb(UR_TAKEN_WEIGHT, + (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * + UnreachableEdges.size()); + BranchProbability ReachableProb(UR_NONTAKEN_WEIGHT, + (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * + ReachableEdges.size()); + + for (unsigned SuccIdx : UnreachableEdges) + setEdgeProbability(BB, SuccIdx, UnreachableProb); + for (unsigned SuccIdx : ReachableEdges) + setEdgeProbability(BB, SuccIdx, ReachableProb); return true; } @@ -213,10 +218,18 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { WeightSum = 0; for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - uint32_t W = Weights[i] / ScalingFactor; - WeightSum += W; - setEdgeWeight(BB, i, W); + Weights[i] /= ScalingFactor; + WeightSum += Weights[i]; } + + if (WeightSum == 0) { + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + setEdgeProbability(BB, i, {1, e}); + } else { + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + setEdgeProbability(BB, i, {Weights[i], static_cast(WeightSum)}); + } + assert(WeightSum <= UINT32_MAX && "Expected weights to scale down to 32 bits"); @@ -265,21 +278,24 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(BasicBlock *BB) { if (TI->getNumSuccessors() == 1 || ColdEdges.empty()) return false; - uint32_t ColdWeight = - std::max(CC_TAKEN_WEIGHT / (unsigned) ColdEdges.size(), MIN_WEIGHT); - for (SmallVectorImpl::iterator I = ColdEdges.begin(), - E = ColdEdges.end(); - I != E; ++I) - setEdgeWeight(BB, *I, ColdWeight); - - if (NormalEdges.empty()) + if (NormalEdges.empty()) { + BranchProbability Prob(1, ColdEdges.size()); + for (unsigned SuccIdx : ColdEdges) + setEdgeProbability(BB, SuccIdx, Prob); return true; - uint32_t NormalWeight = std::max( - CC_NONTAKEN_WEIGHT / (unsigned) NormalEdges.size(), NORMAL_WEIGHT); - for (SmallVectorImpl::iterator I = NormalEdges.begin(), - E = NormalEdges.end(); - I != E; ++I) - setEdgeWeight(BB, *I, NormalWeight); + } + + BranchProbability ColdProb(CC_TAKEN_WEIGHT, + (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * + ColdEdges.size()); + BranchProbability NormalProb(CC_NONTAKEN_WEIGHT, + (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * + NormalEdges.size()); + + for (unsigned SuccIdx : ColdEdges) + setEdgeProbability(BB, SuccIdx, ColdProb); + for (unsigned SuccIdx : NormalEdges) + setEdgeProbability(BB, SuccIdx, NormalProb); return true; } @@ -312,15 +328,18 @@ bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) { if (!isProb) std::swap(TakenIdx, NonTakenIdx); - setEdgeWeight(BB, TakenIdx, PH_TAKEN_WEIGHT); - setEdgeWeight(BB, NonTakenIdx, PH_NONTAKEN_WEIGHT); + BranchProbability TakenProb(PH_TAKEN_WEIGHT, + PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); + setEdgeProbability(BB, TakenIdx, TakenProb); + setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl()); return true; } // Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges // as taken, exiting edges as not-taken. -bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { - Loop *L = LI->getLoopFor(BB); +bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB, + const LoopInfo &LI) { + Loop *L = LI.getLoopFor(BB); if (!L) return false; @@ -340,37 +359,35 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (BackEdges.empty() && ExitingEdges.empty()) return false; - if (uint32_t numBackEdges = BackEdges.size()) { - uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges; - if (backWeight < NORMAL_WEIGHT) - backWeight = NORMAL_WEIGHT; + // Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and + // normalize them so that they sum up to one. + SmallVector Probs(3, BranchProbability::getZero()); + unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) + + (InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) + + (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT); + if (!BackEdges.empty()) + Probs[0] = BranchProbability(LBH_TAKEN_WEIGHT, Denom); + if (!InEdges.empty()) + Probs[1] = BranchProbability(LBH_TAKEN_WEIGHT, Denom); + if (!ExitingEdges.empty()) + Probs[2] = BranchProbability(LBH_NONTAKEN_WEIGHT, Denom); - for (SmallVectorImpl::iterator EI = BackEdges.begin(), - EE = BackEdges.end(); EI != EE; ++EI) { - setEdgeWeight(BB, *EI, backWeight); - } + if (uint32_t numBackEdges = BackEdges.size()) { + auto Prob = Probs[0] / numBackEdges; + for (unsigned SuccIdx : BackEdges) + setEdgeProbability(BB, SuccIdx, Prob); } if (uint32_t numInEdges = InEdges.size()) { - uint32_t inWeight = LBH_TAKEN_WEIGHT / numInEdges; - if (inWeight < NORMAL_WEIGHT) - inWeight = NORMAL_WEIGHT; - - for (SmallVectorImpl::iterator EI = InEdges.begin(), - EE = InEdges.end(); EI != EE; ++EI) { - setEdgeWeight(BB, *EI, inWeight); - } + auto Prob = Probs[1] / numInEdges; + for (unsigned SuccIdx : InEdges) + setEdgeProbability(BB, SuccIdx, Prob); } if (uint32_t numExitingEdges = ExitingEdges.size()) { - uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numExitingEdges; - if (exitWeight < MIN_WEIGHT) - exitWeight = MIN_WEIGHT; - - for (SmallVectorImpl::iterator EI = ExitingEdges.begin(), - EE = ExitingEdges.end(); EI != EE; ++EI) { - setEdgeWeight(BB, *EI, exitWeight); - } + auto Prob = Probs[2] / numExitingEdges; + for (unsigned SuccIdx : ExitingEdges) + setEdgeProbability(BB, SuccIdx, Prob); } return true; @@ -452,9 +469,10 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) { if (!isProb) std::swap(TakenIdx, NonTakenIdx); - setEdgeWeight(BB, TakenIdx, ZH_TAKEN_WEIGHT); - setEdgeWeight(BB, NonTakenIdx, ZH_NONTAKEN_WEIGHT); - + BranchProbability TakenProb(ZH_TAKEN_WEIGHT, + ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); + setEdgeProbability(BB, TakenIdx, TakenProb); + setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl()); return true; } @@ -488,9 +506,10 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) { if (!isProb) std::swap(TakenIdx, NonTakenIdx); - setEdgeWeight(BB, TakenIdx, FPH_TAKEN_WEIGHT); - setEdgeWeight(BB, NonTakenIdx, FPH_NONTAKEN_WEIGHT); - + BranchProbability TakenProb(FPH_TAKEN_WEIGHT, + FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); + setEdgeProbability(BB, TakenIdx, TakenProb); + setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl()); return true; } @@ -499,82 +518,30 @@ bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) { if (!II) return false; - setEdgeWeight(BB, 0/*Index for Normal*/, IH_TAKEN_WEIGHT); - setEdgeWeight(BB, 1/*Index for Unwind*/, IH_NONTAKEN_WEIGHT); + BranchProbability TakenProb(IH_TAKEN_WEIGHT, + IH_TAKEN_WEIGHT + IH_NONTAKEN_WEIGHT); + setEdgeProbability(BB, 0 /*Index for Normal*/, TakenProb); + setEdgeProbability(BB, 1 /*Index for Unwind*/, TakenProb.getCompl()); return true; } -void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.setPreservesAll(); -} - -bool BranchProbabilityInfo::runOnFunction(Function &F) { - DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() - << " ----\n\n"); - LastF = &F; // Store the last function we ran on for printing. - LI = &getAnalysis().getLoopInfo(); - assert(PostDominatedByUnreachable.empty()); - assert(PostDominatedByColdCall.empty()); - - // Walk the basic blocks in post-order so that we can build up state about - // the successors of a block iteratively. - for (auto BB : post_order(&F.getEntryBlock())) { - DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); - if (calcUnreachableHeuristics(BB)) - continue; - if (calcMetadataWeights(BB)) - continue; - if (calcColdCallHeuristics(BB)) - continue; - if (calcLoopBranchHeuristics(BB)) - continue; - if (calcPointerHeuristics(BB)) - continue; - if (calcZeroHeuristics(BB)) - continue; - if (calcFloatingPointHeuristics(BB)) - continue; - calcInvokeHeuristics(BB); - } - - PostDominatedByUnreachable.clear(); - PostDominatedByColdCall.clear(); - return false; -} - void BranchProbabilityInfo::releaseMemory() { - Weights.clear(); + Probs.clear(); } -void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const { +void BranchProbabilityInfo::print(raw_ostream &OS) const { OS << "---- Branch Probabilities ----\n"; // We print the probabilities from the last function the analysis ran over, // or the function it is currently running over. assert(LastF && "Cannot print prior to running over a function"); - for (Function::const_iterator BI = LastF->begin(), BE = LastF->end(); - BI != BE; ++BI) { - for (succ_const_iterator SI = succ_begin(BI), SE = succ_end(BI); - SI != SE; ++SI) { - printEdgeProbability(OS << " ", BI, *SI); + for (const auto &BI : *LastF) { + for (succ_const_iterator SI = succ_begin(&BI), SE = succ_end(&BI); SI != SE; + ++SI) { + printEdgeProbability(OS << " ", &BI, *SI); } } } -uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const { - uint32_t Sum = 0; - - for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - uint32_t Weight = getEdgeWeight(BB, I.getSuccessorIndex()); - uint32_t PrevSum = Sum; - - Sum += Weight; - assert(Sum >= PrevSum); (void) PrevSum; - } - - return Sum; -} - bool BranchProbabilityInfo:: isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% @@ -583,97 +550,74 @@ isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { } BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { - uint32_t Sum = 0; - uint32_t MaxWeight = 0; + auto MaxProb = BranchProbability::getZero(); BasicBlock *MaxSucc = nullptr; for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { BasicBlock *Succ = *I; - uint32_t Weight = getEdgeWeight(BB, Succ); - uint32_t PrevSum = Sum; - - Sum += Weight; - assert(Sum > PrevSum); (void) PrevSum; - - if (Weight > MaxWeight) { - MaxWeight = Weight; + auto Prob = getEdgeProbability(BB, Succ); + if (Prob > MaxProb) { + MaxProb = Prob; MaxSucc = Succ; } } // Hot probability is at least 4/5 = 80% - if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5)) + if (MaxProb > BranchProbability(4, 5)) return MaxSucc; return nullptr; } -/// Get the raw edge weight for the edge. If can't find it, return -/// DEFAULT_WEIGHT value. Here an edge is specified using PredBlock and an index -/// to the successors. -uint32_t BranchProbabilityInfo:: -getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const { - DenseMap::const_iterator I = - Weights.find(std::make_pair(Src, IndexInSuccessors)); +/// Get the raw edge probability for the edge. If can't find it, return a +/// default probability 1/N where N is the number of successors. Here an edge is +/// specified using PredBlock and an +/// index to the successors. +BranchProbability +BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, + unsigned IndexInSuccessors) const { + auto I = Probs.find(std::make_pair(Src, IndexInSuccessors)); - if (I != Weights.end()) + if (I != Probs.end()) return I->second; - return DEFAULT_WEIGHT; + return {1, + static_cast(std::distance(succ_begin(Src), succ_end(Src)))}; } -uint32_t BranchProbabilityInfo::getEdgeWeight(const BasicBlock *Src, - succ_const_iterator Dst) const { - return getEdgeWeight(Src, Dst.getSuccessorIndex()); +BranchProbability +BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, + succ_const_iterator Dst) const { + return getEdgeProbability(Src, Dst.getSuccessorIndex()); } -/// Get the raw edge weight calculated for the block pair. This returns the sum -/// of all raw edge weights from Src to Dst. -uint32_t BranchProbabilityInfo:: -getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { - uint32_t Weight = 0; - bool FoundWeight = false; - DenseMap::const_iterator MapI; +/// Get the raw edge probability calculated for the block pair. This returns the +/// sum of all raw edge probabilities from Src to Dst. +BranchProbability +BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, + const BasicBlock *Dst) const { + auto Prob = BranchProbability::getZero(); + bool FoundProb = false; for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I) if (*I == Dst) { - MapI = Weights.find(std::make_pair(Src, I.getSuccessorIndex())); - if (MapI != Weights.end()) { - FoundWeight = true; - Weight += MapI->second; + auto MapI = Probs.find(std::make_pair(Src, I.getSuccessorIndex())); + if (MapI != Probs.end()) { + FoundProb = true; + Prob += MapI->second; } } - return (!FoundWeight) ? DEFAULT_WEIGHT : Weight; + uint32_t succ_num = std::distance(succ_begin(Src), succ_end(Src)); + return FoundProb ? Prob : BranchProbability(1, succ_num); } -/// Set the edge weight for a given edge specified by PredBlock and an index -/// to the successors. -void BranchProbabilityInfo:: -setEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors, - uint32_t Weight) { - Weights[std::make_pair(Src, IndexInSuccessors)] = Weight; - DEBUG(dbgs() << "set edge " << Src->getName() << " -> " - << IndexInSuccessors << " successor weight to " - << Weight << "\n"); -} - -/// Get an edge's probability, relative to other out-edges from Src. -BranchProbability BranchProbabilityInfo:: -getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const { - uint32_t N = getEdgeWeight(Src, IndexInSuccessors); - uint32_t D = getSumForBlock(Src); - - return BranchProbability(N, D); -} - -/// Get the probability of going from Src to Dst. It returns the sum of all -/// probabilities for edges from Src to Dst. -BranchProbability BranchProbabilityInfo:: -getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const { - - uint32_t N = getEdgeWeight(Src, Dst); - uint32_t D = getSumForBlock(Src); - - return BranchProbability(N, D); +/// Set the edge probability for a given edge specified by PredBlock and an +/// index to the successors. +void BranchProbabilityInfo::setEdgeProbability(const BasicBlock *Src, + unsigned IndexInSuccessors, + BranchProbability Prob) { + Probs[std::make_pair(Src, IndexInSuccessors)] = Prob; + DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << IndexInSuccessors + << " successor probability to " << Prob << "\n"); } raw_ostream & @@ -688,3 +632,54 @@ BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, return OS; } + +void BranchProbabilityInfo::calculate(Function &F, const LoopInfo& LI) { + DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() + << " ----\n\n"); + LastF = &F; // Store the last function we ran on for printing. + assert(PostDominatedByUnreachable.empty()); + assert(PostDominatedByColdCall.empty()); + + // Walk the basic blocks in post-order so that we can build up state about + // the successors of a block iteratively. + for (auto BB : post_order(&F.getEntryBlock())) { + DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); + if (calcUnreachableHeuristics(BB)) + continue; + if (calcMetadataWeights(BB)) + continue; + if (calcColdCallHeuristics(BB)) + continue; + if (calcLoopBranchHeuristics(BB, LI)) + continue; + if (calcPointerHeuristics(BB)) + continue; + if (calcZeroHeuristics(BB)) + continue; + if (calcFloatingPointHeuristics(BB)) + continue; + calcInvokeHeuristics(BB); + } + + PostDominatedByUnreachable.clear(); + PostDominatedByColdCall.clear(); +} + +void BranchProbabilityInfoWrapperPass::getAnalysisUsage( + AnalysisUsage &AU) const { + AU.addRequired(); + AU.setPreservesAll(); +} + +bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { + const LoopInfo &LI = getAnalysis().getLoopInfo(); + BPI.calculate(F, LI); + return false; +} + +void BranchProbabilityInfoWrapperPass::releaseMemory() { BPI.releaseMemory(); } + +void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS, + const Module *) const { + BPI.print(OS); +} diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp index e15109bd2702..0dfd57d3cb6b 100644 --- a/lib/Analysis/CFG.cpp +++ b/lib/Analysis/CFG.cpp @@ -69,8 +69,9 @@ void llvm::FindFunctionBackedges(const Function &F, /// and return its position in the terminator instruction's list of /// successors. It is an error to call this with a block that is not a /// successor. -unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) { - TerminatorInst *Term = BB->getTerminator(); +unsigned llvm::GetSuccessorNumber(const BasicBlock *BB, + const BasicBlock *Succ) { + const TerminatorInst *Term = BB->getTerminator(); #ifndef NDEBUG unsigned e = Term->getNumSuccessors(); #endif @@ -203,7 +204,8 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B, return true; // Linear scan, start at 'A', see whether we hit 'B' or the end first. - for (BasicBlock::const_iterator I = A, E = BB->end(); I != E; ++I) { + for (BasicBlock::const_iterator I = A->getIterator(), E = BB->end(); I != E; + ++I) { if (&*I == B) return true; } diff --git a/lib/Analysis/CFLAliasAnalysis.cpp b/lib/Analysis/CFLAliasAnalysis.cpp index fe1c088886bc..4843ed6587a8 100644 --- a/lib/Analysis/CFLAliasAnalysis.cpp +++ b/lib/Analysis/CFLAliasAnalysis.cpp @@ -27,18 +27,17 @@ // time. //===----------------------------------------------------------------------===// +#include "llvm/Analysis/CFLAliasAnalysis.h" #include "StratifiedSets.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" @@ -47,7 +46,6 @@ #include "llvm/Support/raw_ostream.h" #include #include -#include #include #include @@ -55,6 +53,19 @@ using namespace llvm; #define DEBUG_TYPE "cfl-aa" +CFLAAResult::CFLAAResult(const TargetLibraryInfo &TLI) : AAResultBase(TLI) {} +CFLAAResult::CFLAAResult(CFLAAResult &&Arg) : AAResultBase(std::move(Arg)) {} + +// \brief Information we have about a function and would like to keep around +struct CFLAAResult::FunctionInfo { + StratifiedSets Sets; + // Lots of functions have < 4 returns. Adjust as necessary. + SmallVector ReturnedValues; + + FunctionInfo(StratifiedSets &&S, SmallVector &&RV) + : Sets(std::move(S)), ReturnedValues(std::move(RV)) {} +}; + // Try to go from a Value* to a Function*. Never returns nullptr. static Optional parentFunctionOfValue(Value *); @@ -141,129 +152,13 @@ struct Edge { : From(From), To(To), Weight(W), AdditionalAttrs(A) {} }; -// \brief Information we have about a function and would like to keep around -struct FunctionInfo { - StratifiedSets Sets; - // Lots of functions have < 4 returns. Adjust as necessary. - SmallVector ReturnedValues; - - FunctionInfo(StratifiedSets &&S, SmallVector &&RV) - : Sets(std::move(S)), ReturnedValues(std::move(RV)) {} -}; - -struct CFLAliasAnalysis; - -struct FunctionHandle : public CallbackVH { - FunctionHandle(Function *Fn, CFLAliasAnalysis *CFLAA) - : CallbackVH(Fn), CFLAA(CFLAA) { - assert(Fn != nullptr); - assert(CFLAA != nullptr); - } - - ~FunctionHandle() override {} - - void deleted() override { removeSelfFromCache(); } - void allUsesReplacedWith(Value *) override { removeSelfFromCache(); } - -private: - CFLAliasAnalysis *CFLAA; - - void removeSelfFromCache(); -}; - -struct CFLAliasAnalysis : public ImmutablePass, public AliasAnalysis { -private: - /// \brief Cached mapping of Functions to their StratifiedSets. - /// If a function's sets are currently being built, it is marked - /// in the cache as an Optional without a value. This way, if we - /// have any kind of recursion, it is discernable from a function - /// that simply has empty sets. - DenseMap> Cache; - std::forward_list Handles; - -public: - static char ID; - - CFLAliasAnalysis() : ImmutablePass(ID) { - initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry()); - } - - ~CFLAliasAnalysis() override {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AliasAnalysis::getAnalysisUsage(AU); - } - - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &AliasAnalysis::ID) - return (AliasAnalysis *)this; - return this; - } - - /// \brief Inserts the given Function into the cache. - void scan(Function *Fn); - - void evict(Function *Fn) { Cache.erase(Fn); } - - /// \brief Ensures that the given function is available in the cache. - /// Returns the appropriate entry from the cache. - const Optional &ensureCached(Function *Fn) { - auto Iter = Cache.find(Fn); - if (Iter == Cache.end()) { - scan(Fn); - Iter = Cache.find(Fn); - assert(Iter != Cache.end()); - assert(Iter->second.hasValue()); - } - return Iter->second; - } - - AliasResult query(const MemoryLocation &LocA, const MemoryLocation &LocB); - - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override { - if (LocA.Ptr == LocB.Ptr) { - if (LocA.Size == LocB.Size) { - return MustAlias; - } else { - return PartialAlias; - } - } - - // Comparisons between global variables and other constants should be - // handled by BasicAA. - // TODO: ConstantExpr handling -- CFLAA may report NoAlias when comparing - // a GlobalValue and ConstantExpr, but every query needs to have at least - // one Value tied to a Function, and neither GlobalValues nor ConstantExprs - // are. - if (isa(LocA.Ptr) && isa(LocB.Ptr)) { - return AliasAnalysis::alias(LocA, LocB); - } - - AliasResult QueryResult = query(LocA, LocB); - if (QueryResult == MayAlias) - return AliasAnalysis::alias(LocA, LocB); - - return QueryResult; - } - - bool doInitialization(Module &M) override; -}; - -void FunctionHandle::removeSelfFromCache() { - assert(CFLAA != nullptr); - auto *Val = getValPtr(); - CFLAA->evict(cast(Val)); - setValPtr(nullptr); -} - // \brief Gets the edges our graph should have, based on an Instruction* class GetEdgesVisitor : public InstVisitor { - CFLAliasAnalysis &AA; + CFLAAResult &AA; SmallVectorImpl &Output; public: - GetEdgesVisitor(CFLAliasAnalysis &AA, SmallVectorImpl &Output) + GetEdgesVisitor(CFLAAResult &AA, SmallVectorImpl &Output) : AA(AA), Output(Output) {} void visitInstruction(Instruction &) { @@ -480,6 +375,8 @@ public: } template void visitCallLikeInst(InstT &Inst) { + // TODO: Add support for noalias args/all the other fun function attributes + // that we can tack on. SmallVector Targets; if (getPossibleTargets(&Inst, Targets)) { if (tryInterproceduralAnalysis(Targets, &Inst, Inst.arg_operands())) @@ -488,8 +385,16 @@ public: Output.clear(); } + // Because the function is opaque, we need to note that anything + // could have happened to the arguments, and that the result could alias + // just about anything, too. + // The goal of the loop is in part to unify many Values into one set, so we + // don't care if the function is void there. for (Value *V : Inst.arg_operands()) Output.push_back(Edge(&Inst, V, EdgeType::Assign, AttrAll)); + if (Inst.getNumArgOperands() == 0 && + Inst.getType() != Type::getVoidTy(Inst.getContext())) + Output.push_back(Edge(&Inst, &Inst, EdgeType::Assign, AttrAll)); } void visitCallInst(CallInst &Inst) { visitCallLikeInst(Inst); } @@ -624,7 +529,7 @@ public: // ----- Various Edge iterators for the graph ----- // // \brief Iterator for edges. Because this graph is bidirected, we don't - // allow modificaiton of the edges using this iterator. Additionally, the + // allow modification of the edges using this iterator. Additionally, the // iterator becomes invalid if you add edges to or from the node you're // getting the edges of. struct EdgeIterator : public std::iterator> GraphT; typedef DenseMap NodeMapT; } -// -- Setting up/registering CFLAA pass -- // -char CFLAliasAnalysis::ID = 0; - -INITIALIZE_AG_PASS(CFLAliasAnalysis, AliasAnalysis, "cfl-aa", - "CFL-Based AA implementation", false, true, false) - -ImmutablePass *llvm::createCFLAliasAnalysisPass() { - return new CFLAliasAnalysis(); -} - //===----------------------------------------------------------------------===// // Function declarations that require types defined in the namespace above //===----------------------------------------------------------------------===// @@ -751,12 +646,10 @@ static Optional valueToAttrIndex(Value *Val); static EdgeType flipWeight(EdgeType); // Gets edges of the given Instruction*, writing them to the SmallVector*. -static void argsToEdges(CFLAliasAnalysis &, Instruction *, - SmallVectorImpl &); +static void argsToEdges(CFLAAResult &, Instruction *, SmallVectorImpl &); // Gets edges of the given ConstantExpr*, writing them to the SmallVector*. -static void argsToEdges(CFLAliasAnalysis &, ConstantExpr *, - SmallVectorImpl &); +static void argsToEdges(CFLAAResult &, ConstantExpr *, SmallVectorImpl &); // Gets the "Level" that one should travel in StratifiedSets // given an EdgeType. @@ -764,13 +657,13 @@ static Level directionOfEdgeType(EdgeType); // Builds the graph needed for constructing the StratifiedSets for the // given function -static void buildGraphFrom(CFLAliasAnalysis &, Function *, +static void buildGraphFrom(CFLAAResult &, Function *, SmallVectorImpl &, NodeMapT &, GraphT &); // Gets the edges of a ConstantExpr as if it was an Instruction. This // function also acts on any nested ConstantExprs, adding the edges // of those to the given SmallVector as well. -static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &, +static void constexprToEdges(CFLAAResult &, ConstantExpr &, SmallVectorImpl &); // Given an Instruction, this will add it to the graph, along with any @@ -779,16 +672,13 @@ static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &, // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2 // addInstructionToGraph would add both the `load` and `getelementptr` // instructions to the graph appropriately. -static void addInstructionToGraph(CFLAliasAnalysis &, Instruction &, +static void addInstructionToGraph(CFLAAResult &, Instruction &, SmallVectorImpl &, NodeMapT &, GraphT &); // Notes whether it would be pointless to add the given Value to our sets. static bool canSkipAddingToSets(Value *Val); -// Builds the graph + StratifiedSets for a function. -static FunctionInfo buildSetsFrom(CFLAliasAnalysis &, Function *); - static Optional parentFunctionOfValue(Value *Val) { if (auto *Inst = dyn_cast(Val)) { auto *Bb = Inst->getParent(); @@ -825,7 +715,7 @@ static bool hasUsefulEdges(Instruction *Inst) { } static bool hasUsefulEdges(ConstantExpr *CE) { - // ConstantExpr doens't have terminators, invokes, or fences, so only needs + // ConstantExpr doesn't have terminators, invokes, or fences, so only needs // to check for compares. return CE->getOpcode() != Instruction::ICmp && CE->getOpcode() != Instruction::FCmp; @@ -862,7 +752,7 @@ static EdgeType flipWeight(EdgeType Initial) { llvm_unreachable("Incomplete coverage of EdgeType enum"); } -static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst, +static void argsToEdges(CFLAAResult &Analysis, Instruction *Inst, SmallVectorImpl &Output) { assert(hasUsefulEdges(Inst) && "Expected instructions to have 'useful' edges"); @@ -870,7 +760,7 @@ static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst, v.visit(Inst); } -static void argsToEdges(CFLAliasAnalysis &Analysis, ConstantExpr *CE, +static void argsToEdges(CFLAAResult &Analysis, ConstantExpr *CE, SmallVectorImpl &Output) { assert(hasUsefulEdges(CE) && "Expected constant expr to have 'useful' edges"); GetEdgesVisitor v(Analysis, Output); @@ -889,7 +779,7 @@ static Level directionOfEdgeType(EdgeType Weight) { llvm_unreachable("Incomplete switch coverage"); } -static void constexprToEdges(CFLAliasAnalysis &Analysis, +static void constexprToEdges(CFLAAResult &Analysis, ConstantExpr &CExprToCollapse, SmallVectorImpl &Results) { SmallVector Worklist; @@ -919,7 +809,7 @@ static void constexprToEdges(CFLAliasAnalysis &Analysis, } } -static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst, +static void addInstructionToGraph(CFLAAResult &Analysis, Instruction &Inst, SmallVectorImpl &ReturnedValues, NodeMapT &Map, GraphT &Graph) { const auto findOrInsertNode = [&Map, &Graph](Value *Val) { @@ -982,7 +872,7 @@ static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst, // buy us much that we don't already have. I'd like to add interprocedural // analysis prior to this however, in case that somehow requires the graph // produced by this for efficient execution -static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn, +static void buildGraphFrom(CFLAAResult &Analysis, Function *Fn, SmallVectorImpl &ReturnedValues, NodeMapT &Map, GraphT &Graph) { for (auto &Bb : Fn->getBasicBlockList()) @@ -1012,12 +902,13 @@ static bool canSkipAddingToSets(Value *Val) { return false; } -static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { +// Builds the graph + StratifiedSets for a function. +CFLAAResult::FunctionInfo CFLAAResult::buildSetsFrom(Function *Fn) { NodeMapT Map; GraphT Graph; SmallVector ReturnedValues; - buildGraphFrom(Analysis, Fn, ReturnedValues, Map, Graph); + buildGraphFrom(*this, Fn, ReturnedValues, Map, Graph); DenseMap NodeValueMap; NodeValueMap.resize(Map.size()); @@ -1098,19 +989,35 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { return FunctionInfo(Builder.build(), std::move(ReturnedValues)); } -void CFLAliasAnalysis::scan(Function *Fn) { +void CFLAAResult::scan(Function *Fn) { auto InsertPair = Cache.insert(std::make_pair(Fn, Optional())); (void)InsertPair; assert(InsertPair.second && "Trying to scan a function that has already been cached"); - FunctionInfo Info(buildSetsFrom(*this, Fn)); + FunctionInfo Info(buildSetsFrom(Fn)); Cache[Fn] = std::move(Info); Handles.push_front(FunctionHandle(Fn, this)); } -AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA, - const MemoryLocation &LocB) { +void CFLAAResult::evict(Function *Fn) { Cache.erase(Fn); } + +/// \brief Ensures that the given function is available in the cache. +/// Returns the appropriate entry from the cache. +const Optional & +CFLAAResult::ensureCached(Function *Fn) { + auto Iter = Cache.find(Fn); + if (Iter == Cache.end()) { + scan(Fn); + Iter = Cache.find(Fn); + assert(Iter != Cache.end()); + assert(Iter->second.hasValue()); + } + return Iter->second; +} + +AliasResult CFLAAResult::query(const MemoryLocation &LocA, + const MemoryLocation &LocB) { auto *ValA = const_cast(LocA.Ptr); auto *ValB = const_cast(LocB.Ptr); @@ -1176,7 +1083,37 @@ AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA, return NoAlias; } -bool CFLAliasAnalysis::doInitialization(Module &M) { - InitializeAliasAnalysis(this, &M.getDataLayout()); - return true; +CFLAAResult CFLAA::run(Function &F, AnalysisManager *AM) { + return CFLAAResult(AM->getResult(F)); +} + +char CFLAA::PassID; + +char CFLAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis", + false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis", + false, true) + +ImmutablePass *llvm::createCFLAAWrapperPass() { return new CFLAAWrapperPass(); } + +CFLAAWrapperPass::CFLAAWrapperPass() : ImmutablePass(ID) { + initializeCFLAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool CFLAAWrapperPass::doInitialization(Module &M) { + Result.reset( + new CFLAAResult(getAnalysis().getTLI())); + return false; +} + +bool CFLAAWrapperPass::doFinalization(Module &M) { + Result.reset(); + return false; +} + +void CFLAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); } diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 3ec79adba57f..69623619a8b0 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -1,8 +1,6 @@ add_llvm_library(LLVMAnalysis AliasAnalysis.cpp - AliasAnalysisCounter.cpp AliasAnalysisEvaluator.cpp - AliasDebugger.cpp AliasSetTracker.cpp Analysis.cpp AssumptionCache.cpp @@ -14,16 +12,23 @@ add_llvm_library(LLVMAnalysis CFGPrinter.cpp CFLAliasAnalysis.cpp CGSCCPassManager.cpp + CallGraph.cpp + CallGraphSCCPass.cpp + CallPrinter.cpp CaptureTracking.cpp CostModel.cpp CodeMetrics.cpp ConstantFolding.cpp Delinearization.cpp + DemandedBits.cpp DependenceAnalysis.cpp DivergenceAnalysis.cpp DomPrinter.cpp DominanceFrontier.cpp + EHPersonalities.cpp + GlobalsModRef.cpp IVUsers.cpp + InlineCost.cpp InstCount.cpp InstructionSimplify.cpp Interval.cpp @@ -31,8 +36,6 @@ add_llvm_library(LLVMAnalysis IteratedDominanceFrontier.cpp LazyCallGraph.cpp LazyValueInfo.cpp - LibCallAliasAnalysis.cpp - LibCallSemantics.cpp Lint.cpp Loads.cpp LoopAccessAnalysis.cpp @@ -44,7 +47,10 @@ add_llvm_library(LLVMAnalysis MemoryDependenceAnalysis.cpp MemoryLocation.cpp ModuleDebugInfoPrinter.cpp - NoAliasAnalysis.cpp + ObjCARCAliasAnalysis.cpp + ObjCARCAnalysisUtils.cpp + ObjCARCInstKind.cpp + OrderedBasicBlock.cpp PHITransAddr.cpp PostDominators.cpp PtrUseVisitor.cpp @@ -69,5 +75,3 @@ add_llvm_library(LLVMAnalysis ) add_dependencies(LLVMAnalysis intrinsics_gen) - -add_subdirectory(IPA) diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/CallGraph.cpp similarity index 91% rename from lib/Analysis/IPA/CallGraph.cpp rename to lib/Analysis/CallGraph.cpp index e2799d965a7d..7cec962678e8 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/CallGraph.cpp @@ -22,7 +22,7 @@ using namespace llvm; CallGraph::CallGraph(Module &M) : M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)), - CallsExternalNode(new CallGraphNode(nullptr)) { + CallsExternalNode(llvm::make_unique(nullptr)) { // Add every function to the call graph. for (Function &F : M) addToCallGraph(&F); @@ -32,10 +32,19 @@ CallGraph::CallGraph(Module &M) Root = ExternalCallingNode; } +CallGraph::CallGraph(CallGraph &&Arg) + : M(Arg.M), FunctionMap(std::move(Arg.FunctionMap)), Root(Arg.Root), + ExternalCallingNode(Arg.ExternalCallingNode), + CallsExternalNode(std::move(Arg.CallsExternalNode)) { + Arg.FunctionMap.clear(); + Arg.Root = nullptr; + Arg.ExternalCallingNode = nullptr; +} + CallGraph::~CallGraph() { // CallsExternalNode is not in the function map, delete it explicitly. - CallsExternalNode->allReferencesDropped(); - delete CallsExternalNode; + if (CallsExternalNode) + CallsExternalNode->allReferencesDropped(); // Reset all node's use counts to zero before deleting them to prevent an // assertion from firing. @@ -43,8 +52,6 @@ CallGraph::~CallGraph() { for (auto &I : FunctionMap) I.second->allReferencesDropped(); #endif - for (auto &I : FunctionMap) - delete I.second; } void CallGraph::addToCallGraph(Function *F) { @@ -70,7 +77,7 @@ void CallGraph::addToCallGraph(Function *F) { // If this function is not defined in this translation unit, it could call // anything. if (F->isDeclaration() && !F->isIntrinsic()) - Node->addCalledFunction(CallSite(), CallsExternalNode); + Node->addCalledFunction(CallSite(), CallsExternalNode.get()); // Look for calls by this function. for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB) @@ -83,7 +90,7 @@ void CallGraph::addToCallGraph(Function *F) { // Indirect calls of intrinsics are not allowed so no need to check. // We can be more precise here by using TargetArg returned by // Intrinsic::isLeaf. - Node->addCalledFunction(CS, CallsExternalNode); + Node->addCalledFunction(CS, CallsExternalNode.get()); else if (!Callee->isIntrinsic()) Node->addCalledFunction(CS, getOrInsertFunction(Callee)); } @@ -105,7 +112,7 @@ void CallGraph::print(raw_ostream &OS) const { Nodes.reserve(FunctionMap.size()); for (auto I = begin(), E = end(); I != E; ++I) - Nodes.push_back(I->second); + Nodes.push_back(I->second.get()); std::sort(Nodes.begin(), Nodes.end(), [](CallGraphNode *LHS, CallGraphNode *RHS) { @@ -120,9 +127,8 @@ void CallGraph::print(raw_ostream &OS) const { CN->print(OS); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void CallGraph::dump() const { print(dbgs()); } -#endif // removeFunctionFromModule - Unlink the function from this module, returning // it. Because this removes the function from the module, the call graph node @@ -134,7 +140,6 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) { assert(CGN->empty() && "Cannot remove function from call " "graph if it references other functions!"); Function *F = CGN->getFunction(); // Get the function for the call graph node - delete CGN; // Delete the call graph node for this func FunctionMap.erase(F); // Remove the call graph node from the map M.getFunctionList().remove(F); @@ -152,7 +157,7 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) { "Pointing CallGraphNode at a function that already exists"); FunctionMapTy::iterator I = FunctionMap.find(From); I->second->F = const_cast(To); - FunctionMap[To] = I->second; + FunctionMap[To] = std::move(I->second); FunctionMap.erase(I); } @@ -160,12 +165,13 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) { // it will insert a new CallGraphNode for the specified function if one does // not already exist. CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) { - CallGraphNode *&CGN = FunctionMap[F]; + auto &CGN = FunctionMap[F]; if (CGN) - return CGN; + return CGN.get(); assert((!F || F->getParent() == &M) && "Function not in current module!"); - return CGN = new CallGraphNode(const_cast(F)); + CGN = llvm::make_unique(const_cast(F)); + return CGN.get(); } //===----------------------------------------------------------------------===// @@ -190,9 +196,8 @@ void CallGraphNode::print(raw_ostream &OS) const { OS << '\n'; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void CallGraphNode::dump() const { print(dbgs()); } -#endif /// removeCallEdgeFor - This method removes the edge in the node for the /// specified call site. Note that this method takes linear time, so it @@ -297,6 +302,5 @@ void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const { G->print(OS); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); } -#endif diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp similarity index 100% rename from lib/Analysis/IPA/CallGraphSCCPass.cpp rename to lib/Analysis/CallGraphSCCPass.cpp diff --git a/lib/Analysis/IPA/CallPrinter.cpp b/lib/Analysis/CallPrinter.cpp similarity index 100% rename from lib/Analysis/IPA/CallPrinter.cpp rename to lib/Analysis/CallPrinter.cpp diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index 52ef807aeb59..1add2fa77566 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" @@ -52,63 +53,6 @@ namespace { bool Captured; }; - struct NumberedInstCache { - SmallDenseMap NumberedInsts; - BasicBlock::const_iterator LastInstFound; - unsigned LastInstPos; - const BasicBlock *BB; - - NumberedInstCache(const BasicBlock *BasicB) : LastInstPos(0), BB(BasicB) { - LastInstFound = BB->end(); - } - - /// \brief Find the first instruction 'A' or 'B' in 'BB'. Number out - /// instruction while walking 'BB'. - const Instruction *find(const Instruction *A, const Instruction *B) { - const Instruction *Inst = nullptr; - assert(!(LastInstFound == BB->end() && LastInstPos != 0) && - "Instruction supposed to be in NumberedInsts"); - - // Start the search with the instruction found in the last lookup round. - auto II = BB->begin(); - auto IE = BB->end(); - if (LastInstFound != IE) - II = std::next(LastInstFound); - - // Number all instructions up to the point where we find 'A' or 'B'. - for (++LastInstPos; II != IE; ++II, ++LastInstPos) { - Inst = cast(II); - NumberedInsts[Inst] = LastInstPos; - if (Inst == A || Inst == B) - break; - } - - assert(II != IE && "Instruction not found?"); - LastInstFound = II; - return Inst; - } - - /// \brief Find out whether 'A' dominates 'B', meaning whether 'A' - /// comes before 'B' in 'BB'. This is a simplification that considers - /// cached instruction positions and ignores other basic blocks, being - /// only relevant to compare relative instructions positions inside 'BB'. - bool dominates(const Instruction *A, const Instruction *B) { - assert(A->getParent() == B->getParent() && - "Instructions must be in the same basic block!"); - - unsigned NA = NumberedInsts.lookup(A); - unsigned NB = NumberedInsts.lookup(B); - if (NA && NB) - return NA < NB; - if (NA) - return true; - if (NB) - return false; - - return A == find(A, B); - } - }; - /// Only find pointer captures which happen before the given instruction. Uses /// the dominator tree to determine whether one instruction is before another. /// Only support the case where the Value is defined in the same basic block @@ -116,8 +60,8 @@ namespace { struct CapturesBefore : public CaptureTracker { CapturesBefore(bool ReturnCaptures, const Instruction *I, DominatorTree *DT, - bool IncludeI) - : LocalInstCache(I->getParent()), BeforeHere(I), DT(DT), + bool IncludeI, OrderedBasicBlock *IC) + : OrderedBB(IC), BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures), IncludeI(IncludeI), Captured(false) {} void tooManyUses() override { Captured = true; } @@ -131,18 +75,18 @@ namespace { // Compute the case where both instructions are inside the same basic // block. Since instructions in the same BB as BeforeHere are numbered in - // 'LocalInstCache', avoid using 'dominates' and 'isPotentiallyReachable' + // 'OrderedBB', avoid using 'dominates' and 'isPotentiallyReachable' // which are very expensive for large basic blocks. if (BB == BeforeHere->getParent()) { // 'I' dominates 'BeforeHere' => not safe to prune. // - // The value defined by an invoke dominates an instruction only if it - // dominates every instruction in UseBB. A PHI is dominated only if - // the instruction dominates every possible use in the UseBB. Since + // The value defined by an invoke dominates an instruction only + // if it dominates every instruction in UseBB. A PHI is dominated only + // if the instruction dominates every possible use in the UseBB. Since // UseBB == BB, avoid pruning. if (isa(BeforeHere) || isa(I) || I == BeforeHere) return false; - if (!LocalInstCache.dominates(BeforeHere, I)) + if (!OrderedBB->dominates(BeforeHere, I)) return false; // 'BeforeHere' comes before 'I', it's safe to prune if we also @@ -157,10 +101,7 @@ namespace { SmallVector Worklist; Worklist.append(succ_begin(BB), succ_end(BB)); - if (!isPotentiallyReachableFromMany(Worklist, BB, DT)) - return true; - - return false; + return !isPotentiallyReachableFromMany(Worklist, BB, DT); } // If the value is defined in the same basic block as use and BeforeHere, @@ -196,7 +137,7 @@ namespace { return true; } - NumberedInstCache LocalInstCache; + OrderedBasicBlock *OrderedBB; const Instruction *BeforeHere; DominatorTree *DT; @@ -238,21 +179,29 @@ bool llvm::PointerMayBeCaptured(const Value *V, /// returning the value (or part of it) from the function counts as capturing /// it or not. The boolean StoreCaptures specified whether storing the value /// (or part of it) into memory anywhere automatically counts as capturing it -/// or not. +/// or not. A ordered basic block \p OBB can be used in order to speed up +/// queries about relative order among instructions in the same basic block. bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, - DominatorTree *DT, bool IncludeI) { + DominatorTree *DT, bool IncludeI, + OrderedBasicBlock *OBB) { assert(!isa(V) && "It doesn't make sense to ask whether a global is captured."); + bool UseNewOBB = OBB == nullptr; if (!DT) return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures); + if (UseNewOBB) + OBB = new OrderedBasicBlock(I->getParent()); // TODO: See comment in PointerMayBeCaptured regarding what could be done // with StoreCaptures. - CapturesBefore CB(ReturnCaptures, I, DT, IncludeI); + CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, OBB); PointerMayBeCaptured(V, &CB); + + if (UseNewOBB) + delete OBB; return CB.Captured; } @@ -300,8 +249,9 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) { // that loading a value from a pointer does not cause the pointer to be // captured, even though the loaded value might be the pointer itself // (think of self-referential objects). - CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); - for (CallSite::arg_iterator A = B; A != E; ++A) + CallSite::data_operand_iterator B = + CS.data_operands_begin(), E = CS.data_operands_end(); + for (CallSite::data_operand_iterator A = B; A != E; ++A) if (A->get() == V && !CS.doesNotCapture(A - B)) // The parameter is not marked 'nocapture' - captured. if (Tracker->captured(U)) diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index 46a2c43b1690..4090b4cd752b 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -45,14 +45,8 @@ static void completeEphemeralValues(SmallVector &WorkSet, continue; // If all uses of this value are ephemeral, then so is this value. - bool FoundNEUse = false; - for (const User *I : V->users()) - if (!EphValues.count(I)) { - FoundNEUse = true; - break; - } - - if (FoundNEUse) + if (!std::all_of(V->user_begin(), V->user_end(), + [&](const User *U) { return EphValues.count(U); })) continue; EphValues.insert(V); @@ -116,7 +110,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) { // Skip ephemeral values. - if (EphValues.count(II)) + if (EphValues.count(&*II)) continue; // Special handling for calls. @@ -155,6 +149,9 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, if (isa(II) || II->getType()->isVectorTy()) ++NumVectorInsts; + if (II->getType()->isTokenTy() && II->isUsedOutsideOfBlock(BB)) + notDuplicatable = true; + if (const CallInst *CI = dyn_cast(II)) if (CI->cannotDuplicate()) notDuplicatable = true; diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 02a5aef03223..ccb56631b846 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -248,8 +248,7 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, // Look through ptr->int and ptr->ptr casts. if (CE->getOpcode() == Instruction::PtrToInt || - CE->getOpcode() == Instruction::BitCast || - CE->getOpcode() == Instruction::AddrSpaceCast) + CE->getOpcode() == Instruction::BitCast) return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL); // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) @@ -532,6 +531,10 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, if (GV->isConstant() && GV->hasDefinitiveInitializer()) return GV->getInitializer(); + if (auto *GA = dyn_cast(C)) + if (GA->getAliasee() && !GA->mayBeOverridden()) + return ConstantFoldLoadFromConstPtr(GA->getAliasee(), DL); + // If the loaded value isn't a constant expr, we can't handle it. ConstantExpr *CE = dyn_cast(C); if (!CE) @@ -1236,6 +1239,9 @@ bool llvm::canConstantFoldCallTo(const Function *F) { case Intrinsic::sqrt: case Intrinsic::sin: case Intrinsic::cos: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: case Intrinsic::pow: case Intrinsic::powi: case Intrinsic::bswap: @@ -1276,24 +1282,30 @@ bool llvm::canConstantFoldCallTo(const Function *F) { // return true for a name like "cos\0blah" which strcmp would return equal to // "cos", but has length 8. switch (Name[0]) { - default: return false; + default: + return false; case 'a': - return Name == "acos" || Name == "asin" || Name == "atan" || Name =="atan2"; + return Name == "acos" || Name == "asin" || Name == "atan" || + Name == "atan2" || Name == "acosf" || Name == "asinf" || + Name == "atanf" || Name == "atan2f"; case 'c': - return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; + return Name == "ceil" || Name == "cos" || Name == "cosh" || + Name == "ceilf" || Name == "cosf" || Name == "coshf"; case 'e': - return Name == "exp" || Name == "exp2"; + return Name == "exp" || Name == "exp2" || Name == "expf" || Name == "exp2f"; case 'f': - return Name == "fabs" || Name == "fmod" || Name == "floor"; + return Name == "fabs" || Name == "floor" || Name == "fmod" || + Name == "fabsf" || Name == "floorf" || Name == "fmodf"; case 'l': - return Name == "log" || Name == "log10"; + return Name == "log" || Name == "log10" || Name == "logf" || + Name == "log10f"; case 'p': - return Name == "pow"; + return Name == "pow" || Name == "powf"; case 's': return Name == "sin" || Name == "sinh" || Name == "sqrt" || - Name == "sinf" || Name == "sqrtf"; + Name == "sinf" || Name == "sinhf" || Name == "sqrtf"; case 't': - return Name == "tan" || Name == "tanh"; + return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf"; } } @@ -1422,6 +1434,36 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, return ConstantFP::get(Ty->getContext(), V); } + if (IntrinsicID == Intrinsic::floor) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmTowardNegative); + return ConstantFP::get(Ty->getContext(), V); + } + + if (IntrinsicID == Intrinsic::ceil) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmTowardPositive); + return ConstantFP::get(Ty->getContext(), V); + } + + if (IntrinsicID == Intrinsic::trunc) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmTowardZero); + return ConstantFP::get(Ty->getContext(), V); + } + + if (IntrinsicID == Intrinsic::rint) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), V); + } + + if (IntrinsicID == Intrinsic::nearbyint) { + APFloat V = Op->getValueAPF(); + V.roundToIntegral(APFloat::rmNearestTiesToEven); + return ConstantFP::get(Ty->getContext(), V); + } + /// We only fold functions with finite arguments. Folding NaN and inf is /// likely to be aborted with an exception anyway, and some host libms /// have known errors raising exceptions. @@ -1448,10 +1490,6 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, return ConstantFoldFP(exp, V, Ty); case Intrinsic::exp2: return ConstantFoldFP(exp2, V, Ty); - case Intrinsic::floor: - return ConstantFoldFP(floor, V, Ty); - case Intrinsic::ceil: - return ConstantFoldFP(ceil, V, Ty); case Intrinsic::sin: return ConstantFoldFP(sin, V, Ty); case Intrinsic::cos: @@ -1463,43 +1501,51 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, switch (Name[0]) { case 'a': - if (Name == "acos" && TLI->has(LibFunc::acos)) + if ((Name == "acos" && TLI->has(LibFunc::acos)) || + (Name == "acosf" && TLI->has(LibFunc::acosf))) return ConstantFoldFP(acos, V, Ty); - else if (Name == "asin" && TLI->has(LibFunc::asin)) + else if ((Name == "asin" && TLI->has(LibFunc::asin)) || + (Name == "asinf" && TLI->has(LibFunc::asinf))) return ConstantFoldFP(asin, V, Ty); - else if (Name == "atan" && TLI->has(LibFunc::atan)) + else if ((Name == "atan" && TLI->has(LibFunc::atan)) || + (Name == "atanf" && TLI->has(LibFunc::atanf))) return ConstantFoldFP(atan, V, Ty); break; case 'c': - if (Name == "ceil" && TLI->has(LibFunc::ceil)) + if ((Name == "ceil" && TLI->has(LibFunc::ceil)) || + (Name == "ceilf" && TLI->has(LibFunc::ceilf))) return ConstantFoldFP(ceil, V, Ty); - else if (Name == "cos" && TLI->has(LibFunc::cos)) + else if ((Name == "cos" && TLI->has(LibFunc::cos)) || + (Name == "cosf" && TLI->has(LibFunc::cosf))) return ConstantFoldFP(cos, V, Ty); - else if (Name == "cosh" && TLI->has(LibFunc::cosh)) + else if ((Name == "cosh" && TLI->has(LibFunc::cosh)) || + (Name == "coshf" && TLI->has(LibFunc::coshf))) return ConstantFoldFP(cosh, V, Ty); - else if (Name == "cosf" && TLI->has(LibFunc::cosf)) - return ConstantFoldFP(cos, V, Ty); break; case 'e': - if (Name == "exp" && TLI->has(LibFunc::exp)) + if ((Name == "exp" && TLI->has(LibFunc::exp)) || + (Name == "expf" && TLI->has(LibFunc::expf))) return ConstantFoldFP(exp, V, Ty); - - if (Name == "exp2" && TLI->has(LibFunc::exp2)) { + if ((Name == "exp2" && TLI->has(LibFunc::exp2)) || + (Name == "exp2f" && TLI->has(LibFunc::exp2f))) // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a // C99 library. return ConstantFoldBinaryFP(pow, 2.0, V, Ty); - } break; case 'f': - if (Name == "fabs" && TLI->has(LibFunc::fabs)) + if ((Name == "fabs" && TLI->has(LibFunc::fabs)) || + (Name == "fabsf" && TLI->has(LibFunc::fabsf))) return ConstantFoldFP(fabs, V, Ty); - else if (Name == "floor" && TLI->has(LibFunc::floor)) + else if ((Name == "floor" && TLI->has(LibFunc::floor)) || + (Name == "floorf" && TLI->has(LibFunc::floorf))) return ConstantFoldFP(floor, V, Ty); break; case 'l': - if (Name == "log" && V > 0 && TLI->has(LibFunc::log)) + if ((Name == "log" && V > 0 && TLI->has(LibFunc::log)) || + (Name == "logf" && V > 0 && TLI->has(LibFunc::logf))) return ConstantFoldFP(log, V, Ty); - else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) + else if ((Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) || + (Name == "log10f" && V > 0 && TLI->has(LibFunc::log10f))) return ConstantFoldFP(log10, V, Ty); else if (IntrinsicID == Intrinsic::sqrt && (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) { @@ -1516,21 +1562,22 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, } break; case 's': - if (Name == "sin" && TLI->has(LibFunc::sin)) + if ((Name == "sin" && TLI->has(LibFunc::sin)) || + (Name == "sinf" && TLI->has(LibFunc::sinf))) return ConstantFoldFP(sin, V, Ty); - else if (Name == "sinh" && TLI->has(LibFunc::sinh)) + else if ((Name == "sinh" && TLI->has(LibFunc::sinh)) || + (Name == "sinhf" && TLI->has(LibFunc::sinhf))) return ConstantFoldFP(sinh, V, Ty); - else if (Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) + else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) || + (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf))) return ConstantFoldFP(sqrt, V, Ty); - else if (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf)) - return ConstantFoldFP(sqrt, V, Ty); - else if (Name == "sinf" && TLI->has(LibFunc::sinf)) - return ConstantFoldFP(sin, V, Ty); break; case 't': - if (Name == "tan" && TLI->has(LibFunc::tan)) + if ((Name == "tan" && TLI->has(LibFunc::tan)) || + (Name == "tanf" && TLI->has(LibFunc::tanf))) return ConstantFoldFP(tan, V, Ty); - else if (Name == "tanh" && TLI->has(LibFunc::tanh)) + else if ((Name == "tanh" && TLI->has(LibFunc::tanh)) || + (Name == "tanhf" && TLI->has(LibFunc::tanhf))) return ConstantFoldFP(tanh, V, Ty); break; default: @@ -1633,11 +1680,14 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, if (!TLI) return nullptr; - if (Name == "pow" && TLI->has(LibFunc::pow)) + if ((Name == "pow" && TLI->has(LibFunc::pow)) || + (Name == "powf" && TLI->has(LibFunc::powf))) return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); - if (Name == "fmod" && TLI->has(LibFunc::fmod)) + if ((Name == "fmod" && TLI->has(LibFunc::fmod)) || + (Name == "fmodf" && TLI->has(LibFunc::fmodf))) return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); - if (Name == "atan2" && TLI->has(LibFunc::atan2)) + if ((Name == "atan2" && TLI->has(LibFunc::atan2)) || + (Name == "atan2f" && TLI->has(LibFunc::atan2f))) return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); } else if (ConstantInt *Op2C = dyn_cast(Operands[1])) { if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy()) diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index b529c1a70aa3..0383cbfbbe4c 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -152,10 +152,7 @@ static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, Mask[i] = val; SmallVector ActualMask = SI->getShuffleMask(); - if (Mask != ActualMask) - return false; - - return true; + return Mask == ActualMask; } static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, @@ -383,10 +380,8 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { return -1; switch (I->getOpcode()) { - case Instruction::GetElementPtr:{ - Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); - return TTI->getAddressComputationCost(ValTy); - } + case Instruction::GetElementPtr: + return TTI->getUserCost(I); case Instruction::Ret: case Instruction::PHI: @@ -505,12 +500,12 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { } case Instruction::Call: if (const IntrinsicInst *II = dyn_cast(I)) { - SmallVector Tys; + SmallVector Args; for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) - Tys.push_back(II->getArgOperand(J)->getType()); + Args.push_back(II->getArgOperand(J)); return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), - Tys); + Args); } return -1; default: @@ -525,7 +520,7 @@ void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) { for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) { - Instruction *Inst = it; + Instruction *Inst = &*it; unsigned Cost = getInstructionCost(Inst); if (Cost != (unsigned)-1) OS << "Cost Model: Found an estimated cost of " << Cost; diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp index 9d1578603268..baee8b3b084b 100644 --- a/lib/Analysis/Delinearization.cpp +++ b/lib/Analysis/Delinearization.cpp @@ -60,12 +60,12 @@ public: void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); } bool Delinearization::runOnFunction(Function &F) { this->F = &F; - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); LI = &getAnalysis().getLoopInfo(); return false; } @@ -102,20 +102,14 @@ void Delinearization::print(raw_ostream &O, const Module *) const { if (!BasePointer) break; AccessFn = SE->getMinusSCEV(AccessFn, BasePointer); - const SCEVAddRecExpr *AR = dyn_cast(AccessFn); - - // Do not try to delinearize memory accesses that are not AddRecs. - if (!AR) - break; - O << "\n"; O << "Inst:" << *Inst << "\n"; O << "In Loop with Header: " << L->getHeader()->getName() << "\n"; - O << "AddRec: " << *AR << "\n"; + O << "AccessFunction: " << *AccessFn << "\n"; SmallVector Subscripts, Sizes; - SE->delinearize(AR, Subscripts, Sizes, SE->getElementSize(Inst)); + SE->delinearize(AccessFn, Subscripts, Sizes, SE->getElementSize(Inst)); if (Subscripts.size() == 0 || Sizes.size() == 0 || Subscripts.size() != Sizes.size()) { O << "failed to delinearize\n"; diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp new file mode 100644 index 000000000000..912c5ceb754d --- /dev/null +++ b/lib/Analysis/DemandedBits.cpp @@ -0,0 +1,392 @@ +//===---- DemandedBits.cpp - Determine demanded bits ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements a demanded bits analysis. A demanded bit is one that +// contributes to a result; bits that are not demanded can be either zero or +// one without affecting control or data flow. For example in this sequence: +// +// %1 = add i32 %x, %y +// %2 = trunc i32 %1 to i16 +// +// Only the lowest 16 bits of %1 are demanded; the rest are removed by the +// trunc. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DemandedBits.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "demanded-bits" + +char DemandedBits::ID = 0; +INITIALIZE_PASS_BEGIN(DemandedBits, "demanded-bits", "Demanded bits analysis", + false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(DemandedBits, "demanded-bits", "Demanded bits analysis", + false, false) + +DemandedBits::DemandedBits() : FunctionPass(ID), F(nullptr), Analyzed(false) { + initializeDemandedBitsPass(*PassRegistry::getPassRegistry()); +} + +void DemandedBits::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); +} + +static bool isAlwaysLive(Instruction *I) { + return isa(I) || isa(I) || + I->isEHPad() || I->mayHaveSideEffects(); +} + +void DemandedBits::determineLiveOperandBits( + const Instruction *UserI, const Instruction *I, unsigned OperandNo, + const APInt &AOut, APInt &AB, APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2) { + unsigned BitWidth = AB.getBitWidth(); + + // We're called once per operand, but for some instructions, we need to + // compute known bits of both operands in order to determine the live bits of + // either (when both operands are instructions themselves). We don't, + // however, want to do this twice, so we cache the result in APInts that live + // in the caller. For the two-relevant-operands case, both operand values are + // provided here. + auto ComputeKnownBits = + [&](unsigned BitWidth, const Value *V1, const Value *V2) { + const DataLayout &DL = I->getModule()->getDataLayout(); + KnownZero = APInt(BitWidth, 0); + KnownOne = APInt(BitWidth, 0); + computeKnownBits(const_cast(V1), KnownZero, KnownOne, DL, 0, + AC, UserI, DT); + + if (V2) { + KnownZero2 = APInt(BitWidth, 0); + KnownOne2 = APInt(BitWidth, 0); + computeKnownBits(const_cast(V2), KnownZero2, KnownOne2, DL, + 0, AC, UserI, DT); + } + }; + + switch (UserI->getOpcode()) { + default: break; + case Instruction::Call: + case Instruction::Invoke: + if (const IntrinsicInst *II = dyn_cast(UserI)) + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::bswap: + // The alive bits of the input are the swapped alive bits of + // the output. + AB = AOut.byteSwap(); + break; + case Intrinsic::ctlz: + if (OperandNo == 0) { + // We need some output bits, so we need all bits of the + // input to the left of, and including, the leftmost bit + // known to be one. + ComputeKnownBits(BitWidth, I, nullptr); + AB = APInt::getHighBitsSet(BitWidth, + std::min(BitWidth, KnownOne.countLeadingZeros()+1)); + } + break; + case Intrinsic::cttz: + if (OperandNo == 0) { + // We need some output bits, so we need all bits of the + // input to the right of, and including, the rightmost bit + // known to be one. + ComputeKnownBits(BitWidth, I, nullptr); + AB = APInt::getLowBitsSet(BitWidth, + std::min(BitWidth, KnownOne.countTrailingZeros()+1)); + } + break; + } + break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + // Find the highest live output bit. We don't need any more input + // bits than that (adds, and thus subtracts, ripple only to the + // left). + AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits()); + break; + case Instruction::Shl: + if (OperandNo == 0) + if (ConstantInt *CI = + dyn_cast(UserI->getOperand(1))) { + uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + AB = AOut.lshr(ShiftAmt); + + // If the shift is nuw/nsw, then the high bits are not dead + // (because we've promised that they *must* be zero). + const ShlOperator *S = cast(UserI); + if (S->hasNoSignedWrap()) + AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1); + else if (S->hasNoUnsignedWrap()) + AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt); + } + break; + case Instruction::LShr: + if (OperandNo == 0) + if (ConstantInt *CI = + dyn_cast(UserI->getOperand(1))) { + uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + AB = AOut.shl(ShiftAmt); + + // If the shift is exact, then the low bits are not dead + // (they must be zero). + if (cast(UserI)->isExact()) + AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + } + break; + case Instruction::AShr: + if (OperandNo == 0) + if (ConstantInt *CI = + dyn_cast(UserI->getOperand(1))) { + uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + AB = AOut.shl(ShiftAmt); + // Because the high input bit is replicated into the + // high-order bits of the result, if we need any of those + // bits, then we must keep the highest input bit. + if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt)) + .getBoolValue()) + AB.setBit(BitWidth-1); + + // If the shift is exact, then the low bits are not dead + // (they must be zero). + if (cast(UserI)->isExact()) + AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + } + break; + case Instruction::And: + AB = AOut; + + // For bits that are known zero, the corresponding bits in the + // other operand are dead (unless they're both zero, in which + // case they can't both be dead, so just mark the LHS bits as + // dead). + if (OperandNo == 0) { + ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); + AB &= ~KnownZero2; + } else { + if (!isa(UserI->getOperand(0))) + ComputeKnownBits(BitWidth, UserI->getOperand(0), I); + AB &= ~(KnownZero & ~KnownZero2); + } + break; + case Instruction::Or: + AB = AOut; + + // For bits that are known one, the corresponding bits in the + // other operand are dead (unless they're both one, in which + // case they can't both be dead, so just mark the LHS bits as + // dead). + if (OperandNo == 0) { + ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); + AB &= ~KnownOne2; + } else { + if (!isa(UserI->getOperand(0))) + ComputeKnownBits(BitWidth, UserI->getOperand(0), I); + AB &= ~(KnownOne & ~KnownOne2); + } + break; + case Instruction::Xor: + case Instruction::PHI: + AB = AOut; + break; + case Instruction::Trunc: + AB = AOut.zext(BitWidth); + break; + case Instruction::ZExt: + AB = AOut.trunc(BitWidth); + break; + case Instruction::SExt: + AB = AOut.trunc(BitWidth); + // Because the high input bit is replicated into the + // high-order bits of the result, if we need any of those + // bits, then we must keep the highest input bit. + if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(), + AOut.getBitWidth() - BitWidth)) + .getBoolValue()) + AB.setBit(BitWidth-1); + break; + case Instruction::Select: + if (OperandNo != 0) + AB = AOut; + break; + case Instruction::ICmp: + // Count the number of leading zeroes in each operand. + ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); + auto NumLeadingZeroes = std::min(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + AB = ~APInt::getHighBitsSet(BitWidth, NumLeadingZeroes); + break; + } +} + +bool DemandedBits::runOnFunction(Function& Fn) { + F = &Fn; + Analyzed = false; + return false; +} + +void DemandedBits::performAnalysis() { + if (Analyzed) + // Analysis already completed for this function. + return; + Analyzed = true; + AC = &getAnalysis().getAssumptionCache(*F); + DT = &getAnalysis().getDomTree(); + + Visited.clear(); + AliveBits.clear(); + + SmallVector Worklist; + + // Collect the set of "root" instructions that are known live. + for (Instruction &I : instructions(*F)) { + if (!isAlwaysLive(&I)) + continue; + + DEBUG(dbgs() << "DemandedBits: Root: " << I << "\n"); + // For integer-valued instructions, set up an initial empty set of alive + // bits and add the instruction to the work list. For other instructions + // add their operands to the work list (for integer values operands, mark + // all bits as live). + if (IntegerType *IT = dyn_cast(I.getType())) { + if (!AliveBits.count(&I)) { + AliveBits[&I] = APInt(IT->getBitWidth(), 0); + Worklist.push_back(&I); + } + + continue; + } + + // Non-integer-typed instructions... + for (Use &OI : I.operands()) { + if (Instruction *J = dyn_cast(OI)) { + if (IntegerType *IT = dyn_cast(J->getType())) + AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth()); + Worklist.push_back(J); + } + } + // To save memory, we don't add I to the Visited set here. Instead, we + // check isAlwaysLive on every instruction when searching for dead + // instructions later (we need to check isAlwaysLive for the + // integer-typed instructions anyway). + } + + // Propagate liveness backwards to operands. + while (!Worklist.empty()) { + Instruction *UserI = Worklist.pop_back_val(); + + DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI); + APInt AOut; + if (UserI->getType()->isIntegerTy()) { + AOut = AliveBits[UserI]; + DEBUG(dbgs() << " Alive Out: " << AOut); + } + DEBUG(dbgs() << "\n"); + + if (!UserI->getType()->isIntegerTy()) + Visited.insert(UserI); + + APInt KnownZero, KnownOne, KnownZero2, KnownOne2; + // Compute the set of alive bits for each operand. These are anded into the + // existing set, if any, and if that changes the set of alive bits, the + // operand is added to the work-list. + for (Use &OI : UserI->operands()) { + if (Instruction *I = dyn_cast(OI)) { + if (IntegerType *IT = dyn_cast(I->getType())) { + unsigned BitWidth = IT->getBitWidth(); + APInt AB = APInt::getAllOnesValue(BitWidth); + if (UserI->getType()->isIntegerTy() && !AOut && + !isAlwaysLive(UserI)) { + AB = APInt(BitWidth, 0); + } else { + // If all bits of the output are dead, then all bits of the input + // Bits of each operand that are used to compute alive bits of the + // output are alive, all others are dead. + determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB, + KnownZero, KnownOne, + KnownZero2, KnownOne2); + } + + // If we've added to the set of alive bits (or the operand has not + // been previously visited), then re-queue the operand to be visited + // again. + APInt ABPrev(BitWidth, 0); + auto ABI = AliveBits.find(I); + if (ABI != AliveBits.end()) + ABPrev = ABI->second; + + APInt ABNew = AB | ABPrev; + if (ABNew != ABPrev || ABI == AliveBits.end()) { + AliveBits[I] = std::move(ABNew); + Worklist.push_back(I); + } + } else if (!Visited.count(I)) { + Worklist.push_back(I); + } + } + } + } +} + +APInt DemandedBits::getDemandedBits(Instruction *I) { + performAnalysis(); + + const DataLayout &DL = I->getParent()->getModule()->getDataLayout(); + if (AliveBits.count(I)) + return AliveBits[I]; + return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType())); +} + +bool DemandedBits::isInstructionDead(Instruction *I) { + performAnalysis(); + + return !Visited.count(I) && AliveBits.find(I) == AliveBits.end() && + !isAlwaysLive(I); +} + +void DemandedBits::print(raw_ostream &OS, const Module *M) const { + // This is gross. But the alternative is making all the state mutable + // just because of this one debugging method. + const_cast(this)->performAnalysis(); + for (auto &KV : AliveBits) { + OS << "DemandedBits: 0x" << utohexstr(KV.second.getLimitedValue()) << " for " + << *KV.first << "\n"; + } +} + +FunctionPass *llvm::createDemandedBitsPass() { + return new DemandedBits(); +} diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index 4826ac407d7f..4040ad3cacd5 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -117,8 +117,8 @@ Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", "Dependence Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(DependenceAnalysis, "da", "Dependence Analysis", true, true) @@ -132,8 +132,8 @@ FunctionPass *llvm::createDependenceAnalysisPass() { bool DependenceAnalysis::runOnFunction(Function &F) { this->F = &F; - AA = &getAnalysis(); - SE = &getAnalysis(); + AA = &getAnalysis().getAAResults(); + SE = &getAnalysis().getSE(); LI = &getAnalysis().getLoopInfo(); return false; } @@ -145,8 +145,8 @@ void DependenceAnalysis::releaseMemory() { void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequiredTransitive(); - AU.addRequiredTransitive(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); AU.addRequiredTransitive(); } @@ -233,7 +233,8 @@ FullDependence::FullDependence(Instruction *Source, Instruction *Destination, : Dependence(Source, Destination), Levels(CommonLevels), LoopIndependent(PossiblyLoopIndependent) { Consistent = true; - DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr; + if (CommonLevels) + DV = make_unique(CommonLevels); } // The rest are simple getters that hide the implementation. @@ -371,7 +372,7 @@ void DependenceAnalysis::Constraint::setLine(const SCEV *AA, void DependenceAnalysis::Constraint::setDistance(const SCEV *D, const Loop *CurLoop) { Kind = Distance; - A = SE->getConstant(D->getType(), 1); + A = SE->getOne(D->getType()); B = SE->getNegativeSCEV(A); C = SE->getNegativeSCEV(D); AssociatedLoop = CurLoop; @@ -500,10 +501,10 @@ bool DependenceAnalysis::intersectConstraints(Constraint *X, if (!C1B2_C2B1 || !C1A2_C2A1 || !A1B2_A2B1 || !A2B1_A1B2) return false; - APInt Xtop = C1B2_C2B1->getValue()->getValue(); - APInt Xbot = A1B2_A2B1->getValue()->getValue(); - APInt Ytop = C1A2_C2A1->getValue()->getValue(); - APInt Ybot = A2B1_A1B2->getValue()->getValue(); + APInt Xtop = C1B2_C2B1->getAPInt(); + APInt Xbot = A1B2_A2B1->getAPInt(); + APInt Ytop = C1A2_C2A1->getAPInt(); + APInt Ybot = A2B1_A1B2->getAPInt(); DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); @@ -527,7 +528,7 @@ bool DependenceAnalysis::intersectConstraints(Constraint *X, } if (const SCEVConstant *CUB = collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { - APInt UpperBound = CUB->getValue()->getValue(); + APInt UpperBound = CUB->getAPInt(); DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { X->setEmpty(); @@ -630,8 +631,8 @@ static AliasResult underlyingObjectsAlias(AliasAnalysis *AA, const Value *B) { const Value *AObj = GetUnderlyingObject(A, DL); const Value *BObj = GetUnderlyingObject(B, DL); - return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), - BObj, AA->getTypeStoreSize(BObj->getType())); + return AA->alias(AObj, DL.getTypeStoreSize(AObj->getType()), + BObj, DL.getTypeStoreSize(BObj->getType())); } @@ -1114,8 +1115,8 @@ bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, // Can we compute distance? if (isa(Delta) && isa(Coeff)) { - APInt ConstDelta = cast(Delta)->getValue()->getValue(); - APInt ConstCoeff = cast(Coeff)->getValue()->getValue(); + APInt ConstDelta = cast(Delta)->getAPInt(); + APInt ConstCoeff = cast(Coeff)->getAPInt(); APInt Distance = ConstDelta; // these need to be initialized APInt Remainder = ConstDelta; APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); @@ -1256,11 +1257,9 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); // compute SplitIter for use by DependenceAnalysis::getSplitIteration() - SplitIter = - SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), - Delta), - SE->getMulExpr(SE->getConstant(Delta->getType(), 2), - ConstCoeff)); + SplitIter = SE->getUDivExpr( + SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), + SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); const SCEVConstant *ConstDelta = dyn_cast(Delta); @@ -1302,14 +1301,14 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, return true; } Result.DV[Level].Splitable = false; - Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); + Result.DV[Level].Distance = SE->getZero(Delta->getType()); return false; } } // check that Coeff divides Delta - APInt APDelta = ConstDelta->getValue()->getValue(); - APInt APCoeff = ConstCoeff->getValue()->getValue(); + APInt APDelta = ConstDelta->getAPInt(); + APInt APCoeff = ConstCoeff->getAPInt(); APInt Distance = APDelta; // these need to be initialzed APInt Remainder = APDelta; APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); @@ -1463,10 +1462,10 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, // find gcd APInt G, X, Y; - APInt AM = ConstSrcCoeff->getValue()->getValue(); - APInt BM = ConstDstCoeff->getValue()->getValue(); + APInt AM = ConstSrcCoeff->getAPInt(); + APInt BM = ConstDstCoeff->getAPInt(); unsigned Bits = AM.getBitWidth(); - if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { + if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { // gcd doesn't divide Delta, no dependence ++ExactSIVindependence; ++ExactSIVsuccesses; @@ -1481,7 +1480,7 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, // UM is perhaps unavailable, let's check if (const SCEVConstant *CUB = collectConstantUpperBound(CurLoop, Delta->getType())) { - UM = CUB->getValue()->getValue(); + UM = CUB->getAPInt(); DEBUG(dbgs() << "\t UM = " << UM << "\n"); UMvalid = true; } @@ -1609,8 +1608,8 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, static bool isRemainderZero(const SCEVConstant *Dividend, const SCEVConstant *Divisor) { - APInt ConstDividend = Dividend->getValue()->getValue(); - APInt ConstDivisor = Divisor->getValue()->getValue(); + APInt ConstDividend = Dividend->getAPInt(); + APInt ConstDivisor = Divisor->getAPInt(); return ConstDividend.srem(ConstDivisor) == 0; } @@ -1665,8 +1664,8 @@ bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, Level--; Result.Consistent = false; const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); - NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), - DstCoeff, Delta, CurLoop); + NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, + CurLoop); DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { if (Level < CommonLevels) { @@ -1775,8 +1774,8 @@ bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, Level--; Result.Consistent = false; const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); - NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), - Delta, CurLoop); + NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, + CurLoop); DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { if (Level < CommonLevels) { @@ -1867,10 +1866,10 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, // find gcd APInt G, X, Y; - APInt AM = ConstSrcCoeff->getValue()->getValue(); - APInt BM = ConstDstCoeff->getValue()->getValue(); + APInt AM = ConstSrcCoeff->getAPInt(); + APInt BM = ConstDstCoeff->getAPInt(); unsigned Bits = AM.getBitWidth(); - if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { + if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { // gcd doesn't divide Delta, no dependence ++ExactRDIVindependence; return true; @@ -1884,7 +1883,7 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, // SrcUM is perhaps unavailable, let's check if (const SCEVConstant *UpperBound = collectConstantUpperBound(SrcLoop, Delta->getType())) { - SrcUM = UpperBound->getValue()->getValue(); + SrcUM = UpperBound->getAPInt(); DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); SrcUMvalid = true; } @@ -1894,7 +1893,7 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, // UM is perhaps unavailable, let's check if (const SCEVConstant *UpperBound = collectConstantUpperBound(DstLoop, Delta->getType())) { - DstUM = UpperBound->getValue()->getValue(); + DstUM = UpperBound->getAPInt(); DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); DstUMvalid = true; } @@ -2307,7 +2306,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, Constant = getConstantPart(Product); if (!Constant) return false; - APInt ConstCoeff = Constant->getValue()->getValue(); + APInt ConstCoeff = Constant->getAPInt(); RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); Coefficients = AddRec->getStart(); } @@ -2328,7 +2327,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, Constant = getConstantPart(Product); if (!Constant) return false; - APInt ConstCoeff = Constant->getValue()->getValue(); + APInt ConstCoeff = Constant->getAPInt(); RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); Coefficients = AddRec->getStart(); } @@ -2352,7 +2351,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, const SCEVConstant *ConstOp = getConstantPart(Product); if (!ConstOp) return false; - APInt ConstOpValue = ConstOp->getValue()->getValue(); + APInt ConstOpValue = ConstOp->getAPInt(); ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, ConstOpValue.abs()); } @@ -2362,7 +2361,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, } if (!Constant) return false; - APInt ConstDelta = cast(Constant)->getValue()->getValue(); + APInt ConstDelta = cast(Constant)->getAPInt(); DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); if (ConstDelta == 0) return false; @@ -2410,7 +2409,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, Constant = getConstantPart(Product); else Constant = cast(Coeff); - APInt ConstCoeff = Constant->getValue()->getValue(); + APInt ConstCoeff = Constant->getAPInt(); RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); } Inner = AddRec->getStart(); @@ -2428,7 +2427,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, Constant = getConstantPart(Product); else Constant = cast(Coeff); - APInt ConstCoeff = Constant->getValue()->getValue(); + APInt ConstCoeff = Constant->getAPInt(); RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); } Inner = AddRec->getStart(); @@ -2445,7 +2444,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, // or constant, in which case we give up on this direction. continue; } - APInt ConstCoeff = Constant->getValue()->getValue(); + APInt ConstCoeff = Constant->getAPInt(); RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); if (RunningGCD != 0) { @@ -2728,10 +2727,10 @@ void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, // If the difference is 0, we won't need to know the number of iterations. if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) Bound[K].Lower[Dependence::DVEntry::ALL] = - SE->getConstant(A[K].Coeff->getType(), 0); + SE->getZero(A[K].Coeff->getType()); if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) Bound[K].Upper[Dependence::DVEntry::ALL] = - SE->getConstant(A[K].Coeff->getType(), 0); + SE->getZero(A[K].Coeff->getType()); } } @@ -2800,9 +2799,8 @@ void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. if (Bound[K].Iterations) { - const SCEV *Iter_1 = - SE->getMinusSCEV(Bound[K].Iterations, - SE->getConstant(Bound[K].Iterations->getType(), 1)); + const SCEV *Iter_1 = SE->getMinusSCEV( + Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); Bound[K].Lower[Dependence::DVEntry::LT] = @@ -2847,9 +2845,8 @@ void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. if (Bound[K].Iterations) { - const SCEV *Iter_1 = - SE->getMinusSCEV(Bound[K].Iterations, - SE->getConstant(Bound[K].Iterations->getType(), 1)); + const SCEV *Iter_1 = SE->getMinusSCEV( + Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); Bound[K].Lower[Dependence::DVEntry::GT] = @@ -2874,13 +2871,13 @@ void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, // X^+ = max(X, 0) const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { - return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); + return SE->getSMaxExpr(X, SE->getZero(X->getType())); } // X^- = min(X, 0) const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { - return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); + return SE->getSMinExpr(X, SE->getZero(X->getType())); } @@ -2891,7 +2888,7 @@ DependenceAnalysis::CoefficientInfo * DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, const SCEV *&Constant) const { - const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); + const SCEV *Zero = SE->getZero(Subscript->getType()); CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; for (unsigned K = 1; K <= MaxLevels; ++K) { CI[K].Coeff = Zero; @@ -2975,7 +2972,7 @@ const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, const Loop *TargetLoop) const { const SCEVAddRecExpr *AddRec = dyn_cast(Expr); if (!AddRec) - return SE->getConstant(Expr->getType(), 0); + return SE->getZero(Expr->getType()); if (AddRec->getLoop() == TargetLoop) return AddRec->getStepRecurrence(*SE); return findCoefficient(AddRec->getStart(), TargetLoop); @@ -3110,8 +3107,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src, const SCEVConstant *Bconst = dyn_cast(B); const SCEVConstant *Cconst = dyn_cast(C); if (!Bconst || !Cconst) return false; - APInt Beta = Bconst->getValue()->getValue(); - APInt Charlie = Cconst->getValue()->getValue(); + APInt Beta = Bconst->getAPInt(); + APInt Charlie = Cconst->getAPInt(); APInt CdivB = Charlie.sdiv(Beta); assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); const SCEV *AP_K = findCoefficient(Dst, CurLoop); @@ -3125,8 +3122,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src, const SCEVConstant *Aconst = dyn_cast(A); const SCEVConstant *Cconst = dyn_cast(C); if (!Aconst || !Cconst) return false; - APInt Alpha = Aconst->getValue()->getValue(); - APInt Charlie = Cconst->getValue()->getValue(); + APInt Alpha = Aconst->getAPInt(); + APInt Charlie = Cconst->getAPInt(); APInt CdivA = Charlie.sdiv(Alpha); assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); const SCEV *A_K = findCoefficient(Src, CurLoop); @@ -3139,8 +3136,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src, const SCEVConstant *Aconst = dyn_cast(A); const SCEVConstant *Cconst = dyn_cast(C); if (!Aconst || !Cconst) return false; - APInt Alpha = Aconst->getValue()->getValue(); - APInt Charlie = Cconst->getValue()->getValue(); + APInt Alpha = Aconst->getAPInt(); + APInt Charlie = Cconst->getAPInt(); APInt CdivA = Charlie.sdiv(Alpha); assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); const SCEV *A_K = findCoefficient(Src, CurLoop); @@ -3244,20 +3241,36 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, /// source and destination array references are recurrences on a nested loop, /// this function flattens the nested recurrences into separate recurrences /// for each loop level. -bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, - const SCEV *DstSCEV, - SmallVectorImpl &Pair, - const SCEV *ElementSize) { +bool DependenceAnalysis::tryDelinearize(Instruction *Src, + Instruction *Dst, + SmallVectorImpl &Pair) +{ + Value *SrcPtr = getPointerOperand(Src); + Value *DstPtr = getPointerOperand(Dst); + + Loop *SrcLoop = LI->getLoopFor(Src->getParent()); + Loop *DstLoop = LI->getLoopFor(Dst->getParent()); + + // Below code mimics the code in Delinearization.cpp + const SCEV *SrcAccessFn = + SE->getSCEVAtScope(SrcPtr, SrcLoop); + const SCEV *DstAccessFn = + SE->getSCEVAtScope(DstPtr, DstLoop); + const SCEVUnknown *SrcBase = - dyn_cast(SE->getPointerBase(SrcSCEV)); + dyn_cast(SE->getPointerBase(SrcAccessFn)); const SCEVUnknown *DstBase = - dyn_cast(SE->getPointerBase(DstSCEV)); + dyn_cast(SE->getPointerBase(DstAccessFn)); if (!SrcBase || !DstBase || SrcBase != DstBase) return false; - SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase); - DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase); + const SCEV *ElementSize = SE->getElementSize(Src); + if (ElementSize != SE->getElementSize(Dst)) + return false; + + const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); + const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); const SCEVAddRecExpr *SrcAR = dyn_cast(SrcSCEV); const SCEVAddRecExpr *DstAR = dyn_cast(DstSCEV); @@ -3330,7 +3343,6 @@ static void dumpSmallBitVector(SmallBitVector &BV) { } #endif - // depends - // Returns NULL if there is no dependence. // Otherwise, return a Dependence with as many details as possible. @@ -3425,10 +3437,11 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, Pair[0].Dst = DstSCEV; } - if (Delinearize && Pairs == 1 && CommonLevels > 1 && - tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { - DEBUG(dbgs() << " delinerized GEP\n"); - Pairs = Pair.size(); + if (Delinearize && CommonLevels > 1) { + if (tryDelinearize(Src, Dst, Pair)) { + DEBUG(dbgs() << " delinerized GEP\n"); + Pairs = Pair.size(); + } } for (unsigned P = 0; P < Pairs; ++P) { @@ -3746,9 +3759,7 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, return nullptr; } - auto Final = make_unique(Result); - Result.DV = nullptr; - return std::move(Final); + return make_unique(std::move(Result)); } @@ -3852,10 +3863,11 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep, Pair[0].Dst = DstSCEV; } - if (Delinearize && Pairs == 1 && CommonLevels > 1 && - tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { - DEBUG(dbgs() << " delinerized GEP\n"); - Pairs = Pair.size(); + if (Delinearize && CommonLevels > 1) { + if (tryDelinearize(Src, Dst, Pair)) { + DEBUG(dbgs() << " delinerized GEP\n"); + Pairs = Pair.size(); + } } for (unsigned P = 0; P < Pairs; ++P) { diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp index e5ee2959c15d..5ae6d74130a7 100644 --- a/lib/Analysis/DivergenceAnalysis.cpp +++ b/lib/Analysis/DivergenceAnalysis.cpp @@ -1,4 +1,4 @@ -//===- DivergenceAnalysis.cpp ------ Divergence Analysis ------------------===// +//===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file defines divergence analysis which determines whether a branch in a -// GPU program is divergent. It can help branch optimizations such as jump +// This file implements divergence analysis which determines whether a branch +// in a GPU program is divergent.It can help branch optimizations such as jump // threading and loop unswitching to make better decisions. // // GPU programs typically use the SIMD execution model, where multiple threads @@ -61,75 +61,31 @@ // 2. memory as black box. It conservatively considers values loaded from // generic or local address as divergent. This can be improved by leveraging // pointer analysis. +// //===----------------------------------------------------------------------===// -#include -#include "llvm/IR/Dominators.h" -#include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/Function.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Value.h" -#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include using namespace llvm; -#define DEBUG_TYPE "divergence" - -namespace { -class DivergenceAnalysis : public FunctionPass { -public: - static char ID; - - DivergenceAnalysis() : FunctionPass(ID) { - initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.setPreservesAll(); - } - - bool runOnFunction(Function &F) override; - - // Print all divergent branches in the function. - void print(raw_ostream &OS, const Module *) const override; - - // Returns true if V is divergent. - bool isDivergent(const Value *V) const { return DivergentValues.count(V); } - // Returns true if V is uniform/non-divergent. - bool isUniform(const Value *V) const { return !isDivergent(V); } - -private: - // Stores all divergent values. - DenseSet DivergentValues; -}; -} // End of anonymous namespace - -// Register this pass. -char DivergenceAnalysis::ID = 0; -INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis", - false, true) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) -INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis", - false, true) - namespace { class DivergencePropagator { public: - DivergencePropagator(Function &F, TargetTransformInfo &TTI, - DominatorTree &DT, PostDominatorTree &PDT, - DenseSet &DV) + DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, + PostDominatorTree &PDT, DenseSet &DV) : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {} void populateWithSourcesOfDivergence(); void propagate(); @@ -140,7 +96,7 @@ private: // A helper function that explores sync dependents of TI. void exploreSyncDependency(TerminatorInst *TI); // Computes the influence region from Start to End. This region includes all - // basic blocks on any path from Start to End. + // basic blocks on any simple path from Start to End. void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End, DenseSet &InfluenceRegion); // Finds all users of I that are outside the influence region, and add these @@ -153,13 +109,13 @@ private: DominatorTree &DT; PostDominatorTree &PDT; std::vector Worklist; // Stack for DFS. - DenseSet &DV; // Stores all divergent values. + DenseSet &DV; // Stores all divergent values. }; void DivergencePropagator::populateWithSourcesOfDivergence() { Worklist.clear(); DV.clear(); - for (auto &I : inst_range(F)) { + for (auto &I : instructions(F)) { if (TTI.isSourceOfDivergence(&I)) { Worklist.push_back(&I); DV.insert(&I); @@ -191,8 +147,8 @@ void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) { for (auto I = IPostDom->begin(); isa(I); ++I) { // A PHINode is uniform if it returns the same value no matter which path is // taken. - if (!cast(I)->hasConstantValue() && DV.insert(I).second) - Worklist.push_back(I); + if (!cast(I)->hasConstantValue() && DV.insert(&*I).second) + Worklist.push_back(&*I); } // Propagation rule 2: if a value defined in a loop is used outside, the user @@ -242,21 +198,33 @@ void DivergencePropagator::findUsersOutsideInfluenceRegion( } } +// A helper function for computeInfluenceRegion that adds successors of "ThisBB" +// to the influence region. +static void +addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End, + DenseSet &InfluenceRegion, + std::vector &InfluenceStack) { + for (BasicBlock *Succ : successors(ThisBB)) { + if (Succ != End && InfluenceRegion.insert(Succ).second) + InfluenceStack.push_back(Succ); + } +} + void DivergencePropagator::computeInfluenceRegion( BasicBlock *Start, BasicBlock *End, DenseSet &InfluenceRegion) { assert(PDT.properlyDominates(End, Start) && "End does not properly dominate Start"); + + // The influence region starts from the end of "Start" to the beginning of + // "End". Therefore, "Start" should not be in the region unless "Start" is in + // a loop that doesn't contain "End". std::vector InfluenceStack; - InfluenceStack.push_back(Start); - InfluenceRegion.insert(Start); + addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack); while (!InfluenceStack.empty()) { BasicBlock *BB = InfluenceStack.back(); InfluenceStack.pop_back(); - for (BasicBlock *Succ : successors(BB)) { - if (End != Succ && InfluenceRegion.insert(Succ).second) - InfluenceStack.push_back(Succ); - } + addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack); } } @@ -286,10 +254,25 @@ void DivergencePropagator::propagate() { } /// end namespace anonymous +// Register this pass. +char DivergenceAnalysis::ID = 0; +INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis", + false, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis", + false, true) + FunctionPass *llvm::createDivergenceAnalysisPass() { return new DivergenceAnalysis(); } +void DivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); +} + bool DivergenceAnalysis::runOnFunction(Function &F) { auto *TTIWP = getAnalysisIfAvailable(); if (TTIWP == nullptr) @@ -329,8 +312,8 @@ void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const { if (DivergentValues.count(&Arg)) OS << "DIVERGENT: " << Arg << "\n"; } - // Iterate instructions using inst_range to ensure a deterministic order. - for (auto &I : inst_range(F)) { + // Iterate instructions using instructions() to ensure a deterministic order. + for (auto &I : instructions(F)) { if (DivergentValues.count(&I)) OS << "DIVERGENT:" << I << "\n"; } diff --git a/lib/Analysis/EHPersonalities.cpp b/lib/Analysis/EHPersonalities.cpp new file mode 100644 index 000000000000..01be8b38fadd --- /dev/null +++ b/lib/Analysis/EHPersonalities.cpp @@ -0,0 +1,106 @@ +//===- EHPersonalities.cpp - Compute EH-related information ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// See if the given exception handling personality function is one that we +/// understand. If so, return a description of it; otherwise return Unknown. +EHPersonality llvm::classifyEHPersonality(const Value *Pers) { + const Function *F = + Pers ? dyn_cast(Pers->stripPointerCasts()) : nullptr; + if (!F) + return EHPersonality::Unknown; + return StringSwitch(F->getName()) + .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) + .Case("__gxx_personality_v0", EHPersonality::GNU_CXX) + .Case("__gcc_personality_v0", EHPersonality::GNU_C) + .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) + .Case("_except_handler3", EHPersonality::MSVC_X86SEH) + .Case("_except_handler4", EHPersonality::MSVC_X86SEH) + .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH) + .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX) + .Case("ProcessCLRException", EHPersonality::CoreCLR) + .Default(EHPersonality::Unknown); +} + +bool llvm::canSimplifyInvokeNoUnwind(const Function *F) { + EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn()); + // We can't simplify any invokes to nounwind functions if the personality + // function wants to catch asynch exceptions. The nounwind attribute only + // implies that the function does not throw synchronous exceptions. + return !isAsynchronousEHPersonality(Personality); +} + +DenseMap llvm::colorEHFunclets(Function &F) { + SmallVector, 16> Worklist; + BasicBlock *EntryBlock = &F.getEntryBlock(); + DenseMap BlockColors; + + // Build up the color map, which maps each block to its set of 'colors'. + // For any block B the "colors" of B are the set of funclets F (possibly + // including a root "funclet" representing the main function) such that + // F will need to directly contain B or a copy of B (where the term "directly + // contain" is used to distinguish from being "transitively contained" in + // a nested funclet). + // + // Note: Despite not being a funclet in the truest sense, a catchswitch is + // considered to belong to its own funclet for the purposes of coloring. + + DEBUG_WITH_TYPE("winehprepare-coloring", dbgs() << "\nColoring funclets for " + << F.getName() << "\n"); + + Worklist.push_back({EntryBlock, EntryBlock}); + + while (!Worklist.empty()) { + BasicBlock *Visiting; + BasicBlock *Color; + std::tie(Visiting, Color) = Worklist.pop_back_val(); + DEBUG_WITH_TYPE("winehprepare-coloring", + dbgs() << "Visiting " << Visiting->getName() << ", " + << Color->getName() << "\n"); + Instruction *VisitingHead = Visiting->getFirstNonPHI(); + if (VisitingHead->isEHPad()) { + // Mark this funclet head as a member of itself. + Color = Visiting; + } + // Note that this is a member of the given color. + ColorVector &Colors = BlockColors[Visiting]; + if (std::find(Colors.begin(), Colors.end(), Color) == Colors.end()) + Colors.push_back(Color); + else + continue; + + DEBUG_WITH_TYPE("winehprepare-coloring", + dbgs() << " Assigned color \'" << Color->getName() + << "\' to block \'" << Visiting->getName() + << "\'.\n"); + + BasicBlock *SuccColor = Color; + TerminatorInst *Terminator = Visiting->getTerminator(); + if (auto *CatchRet = dyn_cast(Terminator)) { + Value *ParentPad = CatchRet->getParentPad(); + if (isa(ParentPad)) + SuccColor = EntryBlock; + else + SuccColor = cast(ParentPad)->getParent(); + } + + for (BasicBlock *Succ : successors(Visiting)) + Worklist.push_back({Succ, SuccColor}); + } + return BlockColors; +} diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp new file mode 100644 index 000000000000..ab2263ae374e --- /dev/null +++ b/lib/Analysis/GlobalsModRef.cpp @@ -0,0 +1,1002 @@ +//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This simple pass provides alias and mod/ref information for global values +// that do not have their address taken, and keeps track of whether functions +// read or write memory (are "pure"). For this simple (but very common) case, +// we can provide pretty accurate and useful information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +#define DEBUG_TYPE "globalsmodref-aa" + +STATISTIC(NumNonAddrTakenGlobalVars, + "Number of global vars without address taken"); +STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken"); +STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory"); +STATISTIC(NumReadMemFunctions, "Number of functions that only read memory"); +STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects"); + +// An option to enable unsafe alias results from the GlobalsModRef analysis. +// When enabled, GlobalsModRef will provide no-alias results which in extremely +// rare cases may not be conservatively correct. In particular, in the face of +// transforms which cause assymetry between how effective GetUnderlyingObject +// is for two pointers, it may produce incorrect results. +// +// These unsafe results have been returned by GMR for many years without +// causing significant issues in the wild and so we provide a mechanism to +// re-enable them for users of LLVM that have a particular performance +// sensitivity and no known issues. The option also makes it easy to evaluate +// the performance impact of these results. +static cl::opt EnableUnsafeGlobalsModRefAliasResults( + "enable-unsafe-globalsmodref-alias-results", cl::init(false), cl::Hidden); + +/// The mod/ref information collected for a particular function. +/// +/// We collect information about mod/ref behavior of a function here, both in +/// general and as pertains to specific globals. We only have this detailed +/// information when we know *something* useful about the behavior. If we +/// saturate to fully general mod/ref, we remove the info for the function. +class GlobalsAAResult::FunctionInfo { + typedef SmallDenseMap GlobalInfoMapType; + + /// Build a wrapper struct that has 8-byte alignment. All heap allocations + /// should provide this much alignment at least, but this makes it clear we + /// specifically rely on this amount of alignment. + struct LLVM_ALIGNAS(8) AlignedMap { + AlignedMap() {} + AlignedMap(const AlignedMap &Arg) : Map(Arg.Map) {} + GlobalInfoMapType Map; + }; + + /// Pointer traits for our aligned map. + struct AlignedMapPointerTraits { + static inline void *getAsVoidPointer(AlignedMap *P) { return P; } + static inline AlignedMap *getFromVoidPointer(void *P) { + return (AlignedMap *)P; + } + enum { NumLowBitsAvailable = 3 }; + static_assert(AlignOf::Alignment >= (1 << NumLowBitsAvailable), + "AlignedMap insufficiently aligned to have enough low bits."); + }; + + /// The bit that flags that this function may read any global. This is + /// chosen to mix together with ModRefInfo bits. + enum { MayReadAnyGlobal = 4 }; + + /// Checks to document the invariants of the bit packing here. + static_assert((MayReadAnyGlobal & MRI_ModRef) == 0, + "ModRef and the MayReadAnyGlobal flag bits overlap."); + static_assert(((MayReadAnyGlobal | MRI_ModRef) >> + AlignedMapPointerTraits::NumLowBitsAvailable) == 0, + "Insufficient low bits to store our flag and ModRef info."); + +public: + FunctionInfo() : Info() {} + ~FunctionInfo() { + delete Info.getPointer(); + } + // Spell out the copy ond move constructors and assignment operators to get + // deep copy semantics and correct move semantics in the face of the + // pointer-int pair. + FunctionInfo(const FunctionInfo &Arg) + : Info(nullptr, Arg.Info.getInt()) { + if (const auto *ArgPtr = Arg.Info.getPointer()) + Info.setPointer(new AlignedMap(*ArgPtr)); + } + FunctionInfo(FunctionInfo &&Arg) + : Info(Arg.Info.getPointer(), Arg.Info.getInt()) { + Arg.Info.setPointerAndInt(nullptr, 0); + } + FunctionInfo &operator=(const FunctionInfo &RHS) { + delete Info.getPointer(); + Info.setPointerAndInt(nullptr, RHS.Info.getInt()); + if (const auto *RHSPtr = RHS.Info.getPointer()) + Info.setPointer(new AlignedMap(*RHSPtr)); + return *this; + } + FunctionInfo &operator=(FunctionInfo &&RHS) { + delete Info.getPointer(); + Info.setPointerAndInt(RHS.Info.getPointer(), RHS.Info.getInt()); + RHS.Info.setPointerAndInt(nullptr, 0); + return *this; + } + + /// Returns the \c ModRefInfo info for this function. + ModRefInfo getModRefInfo() const { + return ModRefInfo(Info.getInt() & MRI_ModRef); + } + + /// Adds new \c ModRefInfo for this function to its state. + void addModRefInfo(ModRefInfo NewMRI) { + Info.setInt(Info.getInt() | NewMRI); + } + + /// Returns whether this function may read any global variable, and we don't + /// know which global. + bool mayReadAnyGlobal() const { return Info.getInt() & MayReadAnyGlobal; } + + /// Sets this function as potentially reading from any global. + void setMayReadAnyGlobal() { Info.setInt(Info.getInt() | MayReadAnyGlobal); } + + /// Returns the \c ModRefInfo info for this function w.r.t. a particular + /// global, which may be more precise than the general information above. + ModRefInfo getModRefInfoForGlobal(const GlobalValue &GV) const { + ModRefInfo GlobalMRI = mayReadAnyGlobal() ? MRI_Ref : MRI_NoModRef; + if (AlignedMap *P = Info.getPointer()) { + auto I = P->Map.find(&GV); + if (I != P->Map.end()) + GlobalMRI = ModRefInfo(GlobalMRI | I->second); + } + return GlobalMRI; + } + + /// Add mod/ref info from another function into ours, saturating towards + /// MRI_ModRef. + void addFunctionInfo(const FunctionInfo &FI) { + addModRefInfo(FI.getModRefInfo()); + + if (FI.mayReadAnyGlobal()) + setMayReadAnyGlobal(); + + if (AlignedMap *P = FI.Info.getPointer()) + for (const auto &G : P->Map) + addModRefInfoForGlobal(*G.first, G.second); + } + + void addModRefInfoForGlobal(const GlobalValue &GV, ModRefInfo NewMRI) { + AlignedMap *P = Info.getPointer(); + if (!P) { + P = new AlignedMap(); + Info.setPointer(P); + } + auto &GlobalMRI = P->Map[&GV]; + GlobalMRI = ModRefInfo(GlobalMRI | NewMRI); + } + + /// Clear a global's ModRef info. Should be used when a global is being + /// deleted. + void eraseModRefInfoForGlobal(const GlobalValue &GV) { + if (AlignedMap *P = Info.getPointer()) + P->Map.erase(&GV); + } + +private: + /// All of the information is encoded into a single pointer, with a three bit + /// integer in the low three bits. The high bit provides a flag for when this + /// function may read any global. The low two bits are the ModRefInfo. And + /// the pointer, when non-null, points to a map from GlobalValue to + /// ModRefInfo specific to that GlobalValue. + PointerIntPair Info; +}; + +void GlobalsAAResult::DeletionCallbackHandle::deleted() { + Value *V = getValPtr(); + if (auto *F = dyn_cast(V)) + GAR->FunctionInfos.erase(F); + + if (GlobalValue *GV = dyn_cast(V)) { + if (GAR->NonAddressTakenGlobals.erase(GV)) { + // This global might be an indirect global. If so, remove it and + // remove any AllocRelatedValues for it. + if (GAR->IndirectGlobals.erase(GV)) { + // Remove any entries in AllocsForIndirectGlobals for this global. + for (auto I = GAR->AllocsForIndirectGlobals.begin(), + E = GAR->AllocsForIndirectGlobals.end(); + I != E; ++I) + if (I->second == GV) + GAR->AllocsForIndirectGlobals.erase(I); + } + + // Scan the function info we have collected and remove this global + // from all of them. + for (auto &FIPair : GAR->FunctionInfos) + FIPair.second.eraseModRefInfoForGlobal(*GV); + } + } + + // If this is an allocation related to an indirect global, remove it. + GAR->AllocsForIndirectGlobals.erase(V); + + // And clear out the handle. + setValPtr(nullptr); + GAR->Handles.erase(I); + // This object is now destroyed! +} + +FunctionModRefBehavior GlobalsAAResult::getModRefBehavior(const Function *F) { + FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; + + if (FunctionInfo *FI = getFunctionInfo(F)) { + if (FI->getModRefInfo() == MRI_NoModRef) + Min = FMRB_DoesNotAccessMemory; + else if ((FI->getModRefInfo() & MRI_Mod) == 0) + Min = FMRB_OnlyReadsMemory; + } + + return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min); +} + +FunctionModRefBehavior +GlobalsAAResult::getModRefBehavior(ImmutableCallSite CS) { + FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; + + if (const Function *F = CS.getCalledFunction()) + if (FunctionInfo *FI = getFunctionInfo(F)) { + if (FI->getModRefInfo() == MRI_NoModRef) + Min = FMRB_DoesNotAccessMemory; + else if ((FI->getModRefInfo() & MRI_Mod) == 0) + Min = FMRB_OnlyReadsMemory; + } + + return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min); +} + +/// Returns the function info for the function, or null if we don't have +/// anything useful to say about it. +GlobalsAAResult::FunctionInfo * +GlobalsAAResult::getFunctionInfo(const Function *F) { + auto I = FunctionInfos.find(F); + if (I != FunctionInfos.end()) + return &I->second; + return nullptr; +} + +/// AnalyzeGlobals - Scan through the users of all of the internal +/// GlobalValue's in the program. If none of them have their "address taken" +/// (really, their address passed to something nontrivial), record this fact, +/// and record the functions that they are used directly in. +void GlobalsAAResult::AnalyzeGlobals(Module &M) { + SmallPtrSet TrackedFunctions; + for (Function &F : M) + if (F.hasLocalLinkage()) + if (!AnalyzeUsesOfPointer(&F)) { + // Remember that we are tracking this global. + NonAddressTakenGlobals.insert(&F); + TrackedFunctions.insert(&F); + Handles.emplace_front(*this, &F); + Handles.front().I = Handles.begin(); + ++NumNonAddrTakenFunctions; + } + + SmallPtrSet Readers, Writers; + for (GlobalVariable &GV : M.globals()) + if (GV.hasLocalLinkage()) { + if (!AnalyzeUsesOfPointer(&GV, &Readers, + GV.isConstant() ? nullptr : &Writers)) { + // Remember that we are tracking this global, and the mod/ref fns + NonAddressTakenGlobals.insert(&GV); + Handles.emplace_front(*this, &GV); + Handles.front().I = Handles.begin(); + + for (Function *Reader : Readers) { + if (TrackedFunctions.insert(Reader).second) { + Handles.emplace_front(*this, Reader); + Handles.front().I = Handles.begin(); + } + FunctionInfos[Reader].addModRefInfoForGlobal(GV, MRI_Ref); + } + + if (!GV.isConstant()) // No need to keep track of writers to constants + for (Function *Writer : Writers) { + if (TrackedFunctions.insert(Writer).second) { + Handles.emplace_front(*this, Writer); + Handles.front().I = Handles.begin(); + } + FunctionInfos[Writer].addModRefInfoForGlobal(GV, MRI_Mod); + } + ++NumNonAddrTakenGlobalVars; + + // If this global holds a pointer type, see if it is an indirect global. + if (GV.getType()->getElementType()->isPointerTy() && + AnalyzeIndirectGlobalMemory(&GV)) + ++NumIndirectGlobalVars; + } + Readers.clear(); + Writers.clear(); + } +} + +/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer. +/// If this is used by anything complex (i.e., the address escapes), return +/// true. Also, while we are at it, keep track of those functions that read and +/// write to the value. +/// +/// If OkayStoreDest is non-null, stores into this global are allowed. +bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V, + SmallPtrSetImpl *Readers, + SmallPtrSetImpl *Writers, + GlobalValue *OkayStoreDest) { + if (!V->getType()->isPointerTy()) + return true; + + for (Use &U : V->uses()) { + User *I = U.getUser(); + if (LoadInst *LI = dyn_cast(I)) { + if (Readers) + Readers->insert(LI->getParent()->getParent()); + } else if (StoreInst *SI = dyn_cast(I)) { + if (V == SI->getOperand(1)) { + if (Writers) + Writers->insert(SI->getParent()->getParent()); + } else if (SI->getOperand(1) != OkayStoreDest) { + return true; // Storing the pointer + } + } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) { + if (AnalyzeUsesOfPointer(I, Readers, Writers)) + return true; + } else if (Operator::getOpcode(I) == Instruction::BitCast) { + if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest)) + return true; + } else if (auto CS = CallSite(I)) { + // Make sure that this is just the function being called, not that it is + // passing into the function. + if (CS.isDataOperand(&U)) { + // Detect calls to free. + if (CS.isArgOperand(&U) && isFreeCall(I, &TLI)) { + if (Writers) + Writers->insert(CS->getParent()->getParent()); + } else if (CS.doesNotCapture(CS.getDataOperandNo(&U))) { + Function *ParentF = CS->getParent()->getParent(); + // A nocapture argument may be read from or written to, but does not + // escape unless the call can somehow recurse. + // + // nocapture "indicates that the callee does not make any copies of + // the pointer that outlive itself". Therefore if we directly or + // indirectly recurse, we must treat the pointer as escaping. + if (FunctionToSCCMap[ParentF] == + FunctionToSCCMap[CS.getCalledFunction()]) + return true; + if (Readers) + Readers->insert(ParentF); + if (Writers) + Writers->insert(ParentF); + } else { + return true; // Argument of an unknown call. + } + // If the Callee is not ReadNone, it may read the global, + // and if it is not ReadOnly, it may also write to it. + Function *CalleeF = CS.getCalledFunction(); + if (!CalleeF->doesNotAccessMemory()) { + if (Readers) + Readers->insert(CalleeF); + if (Writers && !CalleeF->onlyReadsMemory()) + Writers->insert(CalleeF); + } + } + } else if (ICmpInst *ICI = dyn_cast(I)) { + if (!isa(ICI->getOperand(1))) + return true; // Allow comparison against null. + } else { + return true; + } + } + + return false; +} + +/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable +/// which holds a pointer type. See if the global always points to non-aliased +/// heap memory: that is, all initializers of the globals are allocations, and +/// those allocations have no use other than initialization of the global. +/// Further, all loads out of GV must directly use the memory, not store the +/// pointer somewhere. If this is true, we consider the memory pointed to by +/// GV to be owned by GV and can disambiguate other pointers from it. +bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) { + // Keep track of values related to the allocation of the memory, f.e. the + // value produced by the malloc call and any casts. + std::vector AllocRelatedValues; + + // If the initializer is a valid pointer, bail. + if (Constant *C = GV->getInitializer()) + if (!C->isNullValue()) + return false; + + // Walk the user list of the global. If we find anything other than a direct + // load or store, bail out. + for (User *U : GV->users()) { + if (LoadInst *LI = dyn_cast(U)) { + // The pointer loaded from the global can only be used in simple ways: + // we allow addressing of it and loading storing to it. We do *not* allow + // storing the loaded pointer somewhere else or passing to a function. + if (AnalyzeUsesOfPointer(LI)) + return false; // Loaded pointer escapes. + // TODO: Could try some IP mod/ref of the loaded pointer. + } else if (StoreInst *SI = dyn_cast(U)) { + // Storing the global itself. + if (SI->getOperand(0) == GV) + return false; + + // If storing the null pointer, ignore it. + if (isa(SI->getOperand(0))) + continue; + + // Check the value being stored. + Value *Ptr = GetUnderlyingObject(SI->getOperand(0), + GV->getParent()->getDataLayout()); + + if (!isAllocLikeFn(Ptr, &TLI)) + return false; // Too hard to analyze. + + // Analyze all uses of the allocation. If any of them are used in a + // non-simple way (e.g. stored to another global) bail out. + if (AnalyzeUsesOfPointer(Ptr, /*Readers*/ nullptr, /*Writers*/ nullptr, + GV)) + return false; // Loaded pointer escapes. + + // Remember that this allocation is related to the indirect global. + AllocRelatedValues.push_back(Ptr); + } else { + // Something complex, bail out. + return false; + } + } + + // Okay, this is an indirect global. Remember all of the allocations for + // this global in AllocsForIndirectGlobals. + while (!AllocRelatedValues.empty()) { + AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV; + Handles.emplace_front(*this, AllocRelatedValues.back()); + Handles.front().I = Handles.begin(); + AllocRelatedValues.pop_back(); + } + IndirectGlobals.insert(GV); + Handles.emplace_front(*this, GV); + Handles.front().I = Handles.begin(); + return true; +} + +void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) { + // We do a bottom-up SCC traversal of the call graph. In other words, we + // visit all callees before callers (leaf-first). + unsigned SCCID = 0; + for (scc_iterator I = scc_begin(&CG); !I.isAtEnd(); ++I) { + const std::vector &SCC = *I; + assert(!SCC.empty() && "SCC with no functions?"); + + for (auto *CGN : SCC) + if (Function *F = CGN->getFunction()) + FunctionToSCCMap[F] = SCCID; + ++SCCID; + } +} + +/// AnalyzeCallGraph - At this point, we know the functions where globals are +/// immediately stored to and read from. Propagate this information up the call +/// graph to all callers and compute the mod/ref info for all memory for each +/// function. +void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { + // We do a bottom-up SCC traversal of the call graph. In other words, we + // visit all callees before callers (leaf-first). + for (scc_iterator I = scc_begin(&CG); !I.isAtEnd(); ++I) { + const std::vector &SCC = *I; + assert(!SCC.empty() && "SCC with no functions?"); + + if (!SCC[0]->getFunction() || SCC[0]->getFunction()->mayBeOverridden()) { + // Calls externally or is weak - can't say anything useful. Remove any existing + // function records (may have been created when scanning globals). + for (auto *Node : SCC) + FunctionInfos.erase(Node->getFunction()); + continue; + } + + FunctionInfo &FI = FunctionInfos[SCC[0]->getFunction()]; + bool KnowNothing = false; + + // Collect the mod/ref properties due to called functions. We only compute + // one mod-ref set. + for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) { + Function *F = SCC[i]->getFunction(); + if (!F) { + KnowNothing = true; + break; + } + + if (F->isDeclaration()) { + // Try to get mod/ref behaviour from function attributes. + if (F->doesNotAccessMemory() || F->onlyAccessesInaccessibleMemory()) { + // Can't do better than that! + } else if (F->onlyReadsMemory()) { + FI.addModRefInfo(MRI_Ref); + if (!F->isIntrinsic()) + // This function might call back into the module and read a global - + // consider every global as possibly being read by this function. + FI.setMayReadAnyGlobal(); + } else if (F->onlyAccessesArgMemory() || + F->onlyAccessesInaccessibleMemOrArgMem()) { + // This function may only access (read/write) memory pointed to by its + // arguments. If this pointer is to a global, this escaping use of the + // pointer is captured in AnalyzeUsesOfPointer(). + FI.addModRefInfo(MRI_ModRef); + } else { + FI.addModRefInfo(MRI_ModRef); + // Can't say anything useful unless it's an intrinsic - they don't + // read or write global variables of the kind considered here. + KnowNothing = !F->isIntrinsic(); + } + continue; + } + + for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end(); + CI != E && !KnowNothing; ++CI) + if (Function *Callee = CI->second->getFunction()) { + if (FunctionInfo *CalleeFI = getFunctionInfo(Callee)) { + // Propagate function effect up. + FI.addFunctionInfo(*CalleeFI); + } else { + // Can't say anything about it. However, if it is inside our SCC, + // then nothing needs to be done. + CallGraphNode *CalleeNode = CG[Callee]; + if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end()) + KnowNothing = true; + } + } else { + KnowNothing = true; + } + } + + // If we can't say anything useful about this SCC, remove all SCC functions + // from the FunctionInfos map. + if (KnowNothing) { + for (auto *Node : SCC) + FunctionInfos.erase(Node->getFunction()); + continue; + } + + // Scan the function bodies for explicit loads or stores. + for (auto *Node : SCC) { + if (FI.getModRefInfo() == MRI_ModRef) + break; // The mod/ref lattice saturates here. + for (Instruction &I : instructions(Node->getFunction())) { + if (FI.getModRefInfo() == MRI_ModRef) + break; // The mod/ref lattice saturates here. + + // We handle calls specially because the graph-relevant aspects are + // handled above. + if (auto CS = CallSite(&I)) { + if (isAllocationFn(&I, &TLI) || isFreeCall(&I, &TLI)) { + // FIXME: It is completely unclear why this is necessary and not + // handled by the above graph code. + FI.addModRefInfo(MRI_ModRef); + } else if (Function *Callee = CS.getCalledFunction()) { + // The callgraph doesn't include intrinsic calls. + if (Callee->isIntrinsic()) { + FunctionModRefBehavior Behaviour = + AAResultBase::getModRefBehavior(Callee); + FI.addModRefInfo(ModRefInfo(Behaviour & MRI_ModRef)); + } + } + continue; + } + + // All non-call instructions we use the primary predicates for whether + // thay read or write memory. + if (I.mayReadFromMemory()) + FI.addModRefInfo(MRI_Ref); + if (I.mayWriteToMemory()) + FI.addModRefInfo(MRI_Mod); + } + } + + if ((FI.getModRefInfo() & MRI_Mod) == 0) + ++NumReadMemFunctions; + if (FI.getModRefInfo() == MRI_NoModRef) + ++NumNoMemFunctions; + + // Finally, now that we know the full effect on this SCC, clone the + // information to each function in the SCC. + // FI is a reference into FunctionInfos, so copy it now so that it doesn't + // get invalidated if DenseMap decides to re-hash. + FunctionInfo CachedFI = FI; + for (unsigned i = 1, e = SCC.size(); i != e; ++i) + FunctionInfos[SCC[i]->getFunction()] = CachedFI; + } +} + +// GV is a non-escaping global. V is a pointer address that has been loaded from. +// If we can prove that V must escape, we can conclude that a load from V cannot +// alias GV. +static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV, + const Value *V, + int &Depth, + const DataLayout &DL) { + SmallPtrSet Visited; + SmallVector Inputs; + Visited.insert(V); + Inputs.push_back(V); + do { + const Value *Input = Inputs.pop_back_val(); + + if (isa(Input) || isa(Input) || isa(Input) || + isa(Input)) + // Arguments to functions or returns from functions are inherently + // escaping, so we can immediately classify those as not aliasing any + // non-addr-taken globals. + // + // (Transitive) loads from a global are also safe - if this aliased + // another global, its address would escape, so no alias. + continue; + + // Recurse through a limited number of selects, loads and PHIs. This is an + // arbitrary depth of 4, lower numbers could be used to fix compile time + // issues if needed, but this is generally expected to be only be important + // for small depths. + if (++Depth > 4) + return false; + + if (auto *LI = dyn_cast(Input)) { + Inputs.push_back(GetUnderlyingObject(LI->getPointerOperand(), DL)); + continue; + } + if (auto *SI = dyn_cast(Input)) { + const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL); + const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL); + if (Visited.insert(LHS).second) + Inputs.push_back(LHS); + if (Visited.insert(RHS).second) + Inputs.push_back(RHS); + continue; + } + if (auto *PN = dyn_cast(Input)) { + for (const Value *Op : PN->incoming_values()) { + Op = GetUnderlyingObject(Op, DL); + if (Visited.insert(Op).second) + Inputs.push_back(Op); + } + continue; + } + + return false; + } while (!Inputs.empty()); + + // All inputs were known to be no-alias. + return true; +} + +// There are particular cases where we can conclude no-alias between +// a non-addr-taken global and some other underlying object. Specifically, +// a non-addr-taken global is known to not be escaped from any function. It is +// also incorrect for a transformation to introduce an escape of a global in +// a way that is observable when it was not there previously. One function +// being transformed to introduce an escape which could possibly be observed +// (via loading from a global or the return value for example) within another +// function is never safe. If the observation is made through non-atomic +// operations on different threads, it is a data-race and UB. If the +// observation is well defined, by being observed the transformation would have +// changed program behavior by introducing the observed escape, making it an +// invalid transform. +// +// This property does require that transformations which *temporarily* escape +// a global that was not previously escaped, prior to restoring it, cannot rely +// on the results of GMR::alias. This seems a reasonable restriction, although +// currently there is no way to enforce it. There is also no realistic +// optimization pass that would make this mistake. The closest example is +// a transformation pass which does reg2mem of SSA values but stores them into +// global variables temporarily before restoring the global variable's value. +// This could be useful to expose "benign" races for example. However, it seems +// reasonable to require that a pass which introduces escapes of global +// variables in this way to either not trust AA results while the escape is +// active, or to be forced to operate as a module pass that cannot co-exist +// with an alias analysis such as GMR. +bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV, + const Value *V) { + // In order to know that the underlying object cannot alias the + // non-addr-taken global, we must know that it would have to be an escape. + // Thus if the underlying object is a function argument, a load from + // a global, or the return of a function, it cannot alias. We can also + // recurse through PHI nodes and select nodes provided all of their inputs + // resolve to one of these known-escaping roots. + SmallPtrSet Visited; + SmallVector Inputs; + Visited.insert(V); + Inputs.push_back(V); + int Depth = 0; + do { + const Value *Input = Inputs.pop_back_val(); + + if (auto *InputGV = dyn_cast(Input)) { + // If one input is the very global we're querying against, then we can't + // conclude no-alias. + if (InputGV == GV) + return false; + + // Distinct GlobalVariables never alias, unless overriden or zero-sized. + // FIXME: The condition can be refined, but be conservative for now. + auto *GVar = dyn_cast(GV); + auto *InputGVar = dyn_cast(InputGV); + if (GVar && InputGVar && + !GVar->isDeclaration() && !InputGVar->isDeclaration() && + !GVar->mayBeOverridden() && !InputGVar->mayBeOverridden()) { + Type *GVType = GVar->getInitializer()->getType(); + Type *InputGVType = InputGVar->getInitializer()->getType(); + if (GVType->isSized() && InputGVType->isSized() && + (DL.getTypeAllocSize(GVType) > 0) && + (DL.getTypeAllocSize(InputGVType) > 0)) + continue; + } + + // Conservatively return false, even though we could be smarter + // (e.g. look through GlobalAliases). + return false; + } + + if (isa(Input) || isa(Input) || + isa(Input)) { + // Arguments to functions or returns from functions are inherently + // escaping, so we can immediately classify those as not aliasing any + // non-addr-taken globals. + continue; + } + + // Recurse through a limited number of selects, loads and PHIs. This is an + // arbitrary depth of 4, lower numbers could be used to fix compile time + // issues if needed, but this is generally expected to be only be important + // for small depths. + if (++Depth > 4) + return false; + + if (auto *LI = dyn_cast(Input)) { + // A pointer loaded from a global would have been captured, and we know + // that the global is non-escaping, so no alias. + const Value *Ptr = GetUnderlyingObject(LI->getPointerOperand(), DL); + if (isNonEscapingGlobalNoAliasWithLoad(GV, Ptr, Depth, DL)) + // The load does not alias with GV. + continue; + // Otherwise, a load could come from anywhere, so bail. + return false; + } + if (auto *SI = dyn_cast(Input)) { + const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL); + const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL); + if (Visited.insert(LHS).second) + Inputs.push_back(LHS); + if (Visited.insert(RHS).second) + Inputs.push_back(RHS); + continue; + } + if (auto *PN = dyn_cast(Input)) { + for (const Value *Op : PN->incoming_values()) { + Op = GetUnderlyingObject(Op, DL); + if (Visited.insert(Op).second) + Inputs.push_back(Op); + } + continue; + } + + // FIXME: It would be good to handle other obvious no-alias cases here, but + // it isn't clear how to do so reasonbly without building a small version + // of BasicAA into this code. We could recurse into AAResultBase::alias + // here but that seems likely to go poorly as we're inside the + // implementation of such a query. Until then, just conservatievly retun + // false. + return false; + } while (!Inputs.empty()); + + // If all the inputs to V were definitively no-alias, then V is no-alias. + return true; +} + +/// alias - If one of the pointers is to a global that we are tracking, and the +/// other is some random pointer, we know there cannot be an alias, because the +/// address of the global isn't taken. +AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) { + // Get the base object these pointers point to. + const Value *UV1 = GetUnderlyingObject(LocA.Ptr, DL); + const Value *UV2 = GetUnderlyingObject(LocB.Ptr, DL); + + // If either of the underlying values is a global, they may be non-addr-taken + // globals, which we can answer queries about. + const GlobalValue *GV1 = dyn_cast(UV1); + const GlobalValue *GV2 = dyn_cast(UV2); + if (GV1 || GV2) { + // If the global's address is taken, pretend we don't know it's a pointer to + // the global. + if (GV1 && !NonAddressTakenGlobals.count(GV1)) + GV1 = nullptr; + if (GV2 && !NonAddressTakenGlobals.count(GV2)) + GV2 = nullptr; + + // If the two pointers are derived from two different non-addr-taken + // globals we know these can't alias. + if (GV1 && GV2 && GV1 != GV2) + return NoAlias; + + // If one is and the other isn't, it isn't strictly safe but we can fake + // this result if necessary for performance. This does not appear to be + // a common problem in practice. + if (EnableUnsafeGlobalsModRefAliasResults) + if ((GV1 || GV2) && GV1 != GV2) + return NoAlias; + + // Check for a special case where a non-escaping global can be used to + // conclude no-alias. + if ((GV1 || GV2) && GV1 != GV2) { + const GlobalValue *GV = GV1 ? GV1 : GV2; + const Value *UV = GV1 ? UV2 : UV1; + if (isNonEscapingGlobalNoAlias(GV, UV)) + return NoAlias; + } + + // Otherwise if they are both derived from the same addr-taken global, we + // can't know the two accesses don't overlap. + } + + // These pointers may be based on the memory owned by an indirect global. If + // so, we may be able to handle this. First check to see if the base pointer + // is a direct load from an indirect global. + GV1 = GV2 = nullptr; + if (const LoadInst *LI = dyn_cast(UV1)) + if (GlobalVariable *GV = dyn_cast(LI->getOperand(0))) + if (IndirectGlobals.count(GV)) + GV1 = GV; + if (const LoadInst *LI = dyn_cast(UV2)) + if (const GlobalVariable *GV = dyn_cast(LI->getOperand(0))) + if (IndirectGlobals.count(GV)) + GV2 = GV; + + // These pointers may also be from an allocation for the indirect global. If + // so, also handle them. + if (!GV1) + GV1 = AllocsForIndirectGlobals.lookup(UV1); + if (!GV2) + GV2 = AllocsForIndirectGlobals.lookup(UV2); + + // Now that we know whether the two pointers are related to indirect globals, + // use this to disambiguate the pointers. If the pointers are based on + // different indirect globals they cannot alias. + if (GV1 && GV2 && GV1 != GV2) + return NoAlias; + + // If one is based on an indirect global and the other isn't, it isn't + // strictly safe but we can fake this result if necessary for performance. + // This does not appear to be a common problem in practice. + if (EnableUnsafeGlobalsModRefAliasResults) + if ((GV1 || GV2) && GV1 != GV2) + return NoAlias; + + return AAResultBase::alias(LocA, LocB); +} + +ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS, + const GlobalValue *GV) { + if (CS.doesNotAccessMemory()) + return MRI_NoModRef; + ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef; + + // Iterate through all the arguments to the called function. If any argument + // is based on GV, return the conservative result. + for (auto &A : CS.args()) { + SmallVector Objects; + GetUnderlyingObjects(A, Objects, DL); + + // All objects must be identified. + if (!std::all_of(Objects.begin(), Objects.end(), isIdentifiedObject)) + return ConservativeResult; + + if (std::find(Objects.begin(), Objects.end(), GV) != Objects.end()) + return ConservativeResult; + } + + // We identified all objects in the argument list, and none of them were GV. + return MRI_NoModRef; +} + +ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) { + unsigned Known = MRI_ModRef; + + // If we are asking for mod/ref info of a direct call with a pointer to a + // global we are tracking, return information if we have it. + if (const GlobalValue *GV = + dyn_cast(GetUnderlyingObject(Loc.Ptr, DL))) + if (GV->hasLocalLinkage()) + if (const Function *F = CS.getCalledFunction()) + if (NonAddressTakenGlobals.count(GV)) + if (const FunctionInfo *FI = getFunctionInfo(F)) + Known = FI->getModRefInfoForGlobal(*GV) | + getModRefInfoForArgument(CS, GV); + + if (Known == MRI_NoModRef) + return MRI_NoModRef; // No need to query other mod/ref analyses + return ModRefInfo(Known & AAResultBase::getModRefInfo(CS, Loc)); +} + +GlobalsAAResult::GlobalsAAResult(const DataLayout &DL, + const TargetLibraryInfo &TLI) + : AAResultBase(TLI), DL(DL) {} + +GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg) + : AAResultBase(std::move(Arg)), DL(Arg.DL), + NonAddressTakenGlobals(std::move(Arg.NonAddressTakenGlobals)), + IndirectGlobals(std::move(Arg.IndirectGlobals)), + AllocsForIndirectGlobals(std::move(Arg.AllocsForIndirectGlobals)), + FunctionInfos(std::move(Arg.FunctionInfos)), + Handles(std::move(Arg.Handles)) { + // Update the parent for each DeletionCallbackHandle. + for (auto &H : Handles) { + assert(H.GAR == &Arg); + H.GAR = this; + } +} + +/*static*/ GlobalsAAResult +GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI, + CallGraph &CG) { + GlobalsAAResult Result(M.getDataLayout(), TLI); + + // Discover which functions aren't recursive, to feed into AnalyzeGlobals. + Result.CollectSCCMembership(CG); + + // Find non-addr taken globals. + Result.AnalyzeGlobals(M); + + // Propagate on CG. + Result.AnalyzeCallGraph(CG, M); + + return Result; +} + +GlobalsAAResult GlobalsAA::run(Module &M, AnalysisManager *AM) { + return GlobalsAAResult::analyzeModule(M, + AM->getResult(M), + AM->getResult(M)); +} + +char GlobalsAA::PassID; + +char GlobalsAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(GlobalsAAWrapperPass, "globals-aa", + "Globals Alias Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(GlobalsAAWrapperPass, "globals-aa", + "Globals Alias Analysis", false, true) + +ModulePass *llvm::createGlobalsAAWrapperPass() { + return new GlobalsAAWrapperPass(); +} + +GlobalsAAWrapperPass::GlobalsAAWrapperPass() : ModulePass(ID) { + initializeGlobalsAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool GlobalsAAWrapperPass::runOnModule(Module &M) { + Result.reset(new GlobalsAAResult(GlobalsAAResult::analyzeModule( + M, getAnalysis().getTLI(), + getAnalysis().getCallGraph()))); + return false; +} + +bool GlobalsAAWrapperPass::doFinalization(Module &M) { + Result.reset(); + return false; +} + +void GlobalsAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); +} diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt deleted file mode 100644 index 6095136d60a1..000000000000 --- a/lib/Analysis/IPA/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -add_llvm_library(LLVMipa - CallGraph.cpp - CallGraphSCCPass.cpp - CallPrinter.cpp - GlobalsModRef.cpp - IPA.cpp - InlineCost.cpp - ) - -add_dependencies(LLVMipa intrinsics_gen) diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp deleted file mode 100644 index 28fb49c89019..000000000000 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ /dev/null @@ -1,609 +0,0 @@ -//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This simple pass provides alias and mod/ref information for global values -// that do not have their address taken, and keeps track of whether functions -// read or write memory (are "pure"). For this simple (but very common) case, -// we can provide pretty accurate and useful information. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/Passes.h" -#include "llvm/ADT/SCCIterator.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include -using namespace llvm; - -#define DEBUG_TYPE "globalsmodref-aa" - -STATISTIC(NumNonAddrTakenGlobalVars, - "Number of global vars without address taken"); -STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken"); -STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory"); -STATISTIC(NumReadMemFunctions, "Number of functions that only read memory"); -STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects"); - -namespace { -/// FunctionRecord - One instance of this structure is stored for every -/// function in the program. Later, the entries for these functions are -/// removed if the function is found to call an external function (in which -/// case we know nothing about it. -struct FunctionRecord { - /// GlobalInfo - Maintain mod/ref info for all of the globals without - /// addresses taken that are read or written (transitively) by this - /// function. - std::map GlobalInfo; - - /// MayReadAnyGlobal - May read global variables, but it is not known which. - bool MayReadAnyGlobal; - - unsigned getInfoForGlobal(const GlobalValue *GV) const { - unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0; - std::map::const_iterator I = - GlobalInfo.find(GV); - if (I != GlobalInfo.end()) - Effect |= I->second; - return Effect; - } - - /// FunctionEffect - Capture whether or not this function reads or writes to - /// ANY memory. If not, we can do a lot of aggressive analysis on it. - unsigned FunctionEffect; - - FunctionRecord() : MayReadAnyGlobal(false), FunctionEffect(0) {} -}; - -/// GlobalsModRef - The actual analysis pass. -class GlobalsModRef : public ModulePass, public AliasAnalysis { - /// NonAddressTakenGlobals - The globals that do not have their addresses - /// taken. - std::set NonAddressTakenGlobals; - - /// IndirectGlobals - The memory pointed to by this global is known to be - /// 'owned' by the global. - std::set IndirectGlobals; - - /// AllocsForIndirectGlobals - If an instruction allocates memory for an - /// indirect global, this map indicates which one. - std::map AllocsForIndirectGlobals; - - /// FunctionInfo - For each function, keep track of what globals are - /// modified or read. - std::map FunctionInfo; - -public: - static char ID; - GlobalsModRef() : ModulePass(ID) { - initializeGlobalsModRefPass(*PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override { - InitializeAliasAnalysis(this, &M.getDataLayout()); - - // Find non-addr taken globals. - AnalyzeGlobals(M); - - // Propagate on CG. - AnalyzeCallGraph(getAnalysis().getCallGraph(), M); - return false; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AliasAnalysis::getAnalysisUsage(AU); - AU.addRequired(); - AU.setPreservesAll(); // Does not transform code - } - - //------------------------------------------------ - // Implement the AliasAnalysis API - // - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override; - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override; - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override { - return AliasAnalysis::getModRefInfo(CS1, CS2); - } - - /// getModRefBehavior - Return the behavior of the specified function if - /// called from the specified call site. The call site may be null in which - /// case the most generic behavior of this function should be returned. - ModRefBehavior getModRefBehavior(const Function *F) override { - ModRefBehavior Min = UnknownModRefBehavior; - - if (FunctionRecord *FR = getFunctionInfo(F)) { - if (FR->FunctionEffect == 0) - Min = DoesNotAccessMemory; - else if ((FR->FunctionEffect & Mod) == 0) - Min = OnlyReadsMemory; - } - - return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); - } - - /// getModRefBehavior - Return the behavior of the specified function if - /// called from the specified call site. The call site may be null in which - /// case the most generic behavior of this function should be returned. - ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override { - ModRefBehavior Min = UnknownModRefBehavior; - - if (const Function *F = CS.getCalledFunction()) - if (FunctionRecord *FR = getFunctionInfo(F)) { - if (FR->FunctionEffect == 0) - Min = DoesNotAccessMemory; - else if ((FR->FunctionEffect & Mod) == 0) - Min = OnlyReadsMemory; - } - - return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); - } - - void deleteValue(Value *V) override; - void addEscapingUse(Use &U) override; - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(AnalysisID PI) override { - if (PI == &AliasAnalysis::ID) - return (AliasAnalysis *)this; - return this; - } - -private: - /// getFunctionInfo - Return the function info for the function, or null if - /// we don't have anything useful to say about it. - FunctionRecord *getFunctionInfo(const Function *F) { - std::map::iterator I = - FunctionInfo.find(F); - if (I != FunctionInfo.end()) - return &I->second; - return nullptr; - } - - void AnalyzeGlobals(Module &M); - void AnalyzeCallGraph(CallGraph &CG, Module &M); - bool AnalyzeUsesOfPointer(Value *V, std::vector &Readers, - std::vector &Writers, - GlobalValue *OkayStoreDest = nullptr); - bool AnalyzeIndirectGlobalMemory(GlobalValue *GV); -}; -} - -char GlobalsModRef::ID = 0; -INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", - "Simple mod/ref analysis for globals", false, true, - false) -INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", - "Simple mod/ref analysis for globals", false, true, - false) - -Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); } - -/// AnalyzeGlobals - Scan through the users of all of the internal -/// GlobalValue's in the program. If none of them have their "address taken" -/// (really, their address passed to something nontrivial), record this fact, -/// and record the functions that they are used directly in. -void GlobalsModRef::AnalyzeGlobals(Module &M) { - std::vector Readers, Writers; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (I->hasLocalLinkage()) { - if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { - // Remember that we are tracking this global. - NonAddressTakenGlobals.insert(I); - ++NumNonAddrTakenFunctions; - } - Readers.clear(); - Writers.clear(); - } - - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; - ++I) - if (I->hasLocalLinkage()) { - if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { - // Remember that we are tracking this global, and the mod/ref fns - NonAddressTakenGlobals.insert(I); - - for (unsigned i = 0, e = Readers.size(); i != e; ++i) - FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref; - - if (!I->isConstant()) // No need to keep track of writers to constants - for (unsigned i = 0, e = Writers.size(); i != e; ++i) - FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod; - ++NumNonAddrTakenGlobalVars; - - // If this global holds a pointer type, see if it is an indirect global. - if (I->getType()->getElementType()->isPointerTy() && - AnalyzeIndirectGlobalMemory(I)) - ++NumIndirectGlobalVars; - } - Readers.clear(); - Writers.clear(); - } -} - -/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer. -/// If this is used by anything complex (i.e., the address escapes), return -/// true. Also, while we are at it, keep track of those functions that read and -/// write to the value. -/// -/// If OkayStoreDest is non-null, stores into this global are allowed. -bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, - std::vector &Readers, - std::vector &Writers, - GlobalValue *OkayStoreDest) { - if (!V->getType()->isPointerTy()) - return true; - - for (Use &U : V->uses()) { - User *I = U.getUser(); - if (LoadInst *LI = dyn_cast(I)) { - Readers.push_back(LI->getParent()->getParent()); - } else if (StoreInst *SI = dyn_cast(I)) { - if (V == SI->getOperand(1)) { - Writers.push_back(SI->getParent()->getParent()); - } else if (SI->getOperand(1) != OkayStoreDest) { - return true; // Storing the pointer - } - } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) { - if (AnalyzeUsesOfPointer(I, Readers, Writers)) - return true; - } else if (Operator::getOpcode(I) == Instruction::BitCast) { - if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest)) - return true; - } else if (auto CS = CallSite(I)) { - // Make sure that this is just the function being called, not that it is - // passing into the function. - if (!CS.isCallee(&U)) { - // Detect calls to free. - if (isFreeCall(I, TLI)) - Writers.push_back(CS->getParent()->getParent()); - else - return true; // Argument of an unknown call. - } - } else if (ICmpInst *ICI = dyn_cast(I)) { - if (!isa(ICI->getOperand(1))) - return true; // Allow comparison against null. - } else { - return true; - } - } - - return false; -} - -/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable -/// which holds a pointer type. See if the global always points to non-aliased -/// heap memory: that is, all initializers of the globals are allocations, and -/// those allocations have no use other than initialization of the global. -/// Further, all loads out of GV must directly use the memory, not store the -/// pointer somewhere. If this is true, we consider the memory pointed to by -/// GV to be owned by GV and can disambiguate other pointers from it. -bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { - // Keep track of values related to the allocation of the memory, f.e. the - // value produced by the malloc call and any casts. - std::vector AllocRelatedValues; - - // Walk the user list of the global. If we find anything other than a direct - // load or store, bail out. - for (User *U : GV->users()) { - if (LoadInst *LI = dyn_cast(U)) { - // The pointer loaded from the global can only be used in simple ways: - // we allow addressing of it and loading storing to it. We do *not* allow - // storing the loaded pointer somewhere else or passing to a function. - std::vector ReadersWriters; - if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters)) - return false; // Loaded pointer escapes. - // TODO: Could try some IP mod/ref of the loaded pointer. - } else if (StoreInst *SI = dyn_cast(U)) { - // Storing the global itself. - if (SI->getOperand(0) == GV) - return false; - - // If storing the null pointer, ignore it. - if (isa(SI->getOperand(0))) - continue; - - // Check the value being stored. - Value *Ptr = GetUnderlyingObject(SI->getOperand(0), - GV->getParent()->getDataLayout()); - - if (!isAllocLikeFn(Ptr, TLI)) - return false; // Too hard to analyze. - - // Analyze all uses of the allocation. If any of them are used in a - // non-simple way (e.g. stored to another global) bail out. - std::vector ReadersWriters; - if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV)) - return false; // Loaded pointer escapes. - - // Remember that this allocation is related to the indirect global. - AllocRelatedValues.push_back(Ptr); - } else { - // Something complex, bail out. - return false; - } - } - - // Okay, this is an indirect global. Remember all of the allocations for - // this global in AllocsForIndirectGlobals. - while (!AllocRelatedValues.empty()) { - AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV; - AllocRelatedValues.pop_back(); - } - IndirectGlobals.insert(GV); - return true; -} - -/// AnalyzeCallGraph - At this point, we know the functions where globals are -/// immediately stored to and read from. Propagate this information up the call -/// graph to all callers and compute the mod/ref info for all memory for each -/// function. -void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { - // We do a bottom-up SCC traversal of the call graph. In other words, we - // visit all callees before callers (leaf-first). - for (scc_iterator I = scc_begin(&CG); !I.isAtEnd(); ++I) { - const std::vector &SCC = *I; - assert(!SCC.empty() && "SCC with no functions?"); - - if (!SCC[0]->getFunction()) { - // Calls externally - can't say anything useful. Remove any existing - // function records (may have been created when scanning globals). - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - FunctionInfo.erase(SCC[i]->getFunction()); - continue; - } - - FunctionRecord &FR = FunctionInfo[SCC[0]->getFunction()]; - - bool KnowNothing = false; - unsigned FunctionEffect = 0; - - // Collect the mod/ref properties due to called functions. We only compute - // one mod-ref set. - for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) { - Function *F = SCC[i]->getFunction(); - if (!F) { - KnowNothing = true; - break; - } - - if (F->isDeclaration()) { - // Try to get mod/ref behaviour from function attributes. - if (F->doesNotAccessMemory()) { - // Can't do better than that! - } else if (F->onlyReadsMemory()) { - FunctionEffect |= Ref; - if (!F->isIntrinsic()) - // This function might call back into the module and read a global - - // consider every global as possibly being read by this function. - FR.MayReadAnyGlobal = true; - } else { - FunctionEffect |= ModRef; - // Can't say anything useful unless it's an intrinsic - they don't - // read or write global variables of the kind considered here. - KnowNothing = !F->isIntrinsic(); - } - continue; - } - - for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end(); - CI != E && !KnowNothing; ++CI) - if (Function *Callee = CI->second->getFunction()) { - if (FunctionRecord *CalleeFR = getFunctionInfo(Callee)) { - // Propagate function effect up. - FunctionEffect |= CalleeFR->FunctionEffect; - - // Incorporate callee's effects on globals into our info. - for (const auto &G : CalleeFR->GlobalInfo) - FR.GlobalInfo[G.first] |= G.second; - FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal; - } else { - // Can't say anything about it. However, if it is inside our SCC, - // then nothing needs to be done. - CallGraphNode *CalleeNode = CG[Callee]; - if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end()) - KnowNothing = true; - } - } else { - KnowNothing = true; - } - } - - // If we can't say anything useful about this SCC, remove all SCC functions - // from the FunctionInfo map. - if (KnowNothing) { - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - FunctionInfo.erase(SCC[i]->getFunction()); - continue; - } - - // Scan the function bodies for explicit loads or stores. - for (auto *Node : SCC) { - if (FunctionEffect == ModRef) - break; // The mod/ref lattice saturates here. - for (Instruction &I : inst_range(Node->getFunction())) { - if (FunctionEffect == ModRef) - break; // The mod/ref lattice saturates here. - - // We handle calls specially because the graph-relevant aspects are - // handled above. - if (auto CS = CallSite(&I)) { - if (isAllocationFn(&I, TLI) || isFreeCall(&I, TLI)) { - // FIXME: It is completely unclear why this is necessary and not - // handled by the above graph code. - FunctionEffect |= ModRef; - } else if (Function *Callee = CS.getCalledFunction()) { - // The callgraph doesn't include intrinsic calls. - if (Callee->isIntrinsic()) { - ModRefBehavior Behaviour = - AliasAnalysis::getModRefBehavior(Callee); - FunctionEffect |= (Behaviour & ModRef); - } - } - continue; - } - - // All non-call instructions we use the primary predicates for whether - // thay read or write memory. - if (I.mayReadFromMemory()) - FunctionEffect |= Ref; - if (I.mayWriteToMemory()) - FunctionEffect |= Mod; - } - } - - if ((FunctionEffect & Mod) == 0) - ++NumReadMemFunctions; - if (FunctionEffect == 0) - ++NumNoMemFunctions; - FR.FunctionEffect = FunctionEffect; - - // Finally, now that we know the full effect on this SCC, clone the - // information to each function in the SCC. - for (unsigned i = 1, e = SCC.size(); i != e; ++i) - FunctionInfo[SCC[i]->getFunction()] = FR; - } -} - -/// alias - If one of the pointers is to a global that we are tracking, and the -/// other is some random pointer, we know there cannot be an alias, because the -/// address of the global isn't taken. -AliasResult GlobalsModRef::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { - // Get the base object these pointers point to. - const Value *UV1 = GetUnderlyingObject(LocA.Ptr, *DL); - const Value *UV2 = GetUnderlyingObject(LocB.Ptr, *DL); - - // If either of the underlying values is a global, they may be non-addr-taken - // globals, which we can answer queries about. - const GlobalValue *GV1 = dyn_cast(UV1); - const GlobalValue *GV2 = dyn_cast(UV2); - if (GV1 || GV2) { - // If the global's address is taken, pretend we don't know it's a pointer to - // the global. - if (GV1 && !NonAddressTakenGlobals.count(GV1)) - GV1 = nullptr; - if (GV2 && !NonAddressTakenGlobals.count(GV2)) - GV2 = nullptr; - - // If the two pointers are derived from two different non-addr-taken - // globals, or if one is and the other isn't, we know these can't alias. - if ((GV1 || GV2) && GV1 != GV2) - return NoAlias; - - // Otherwise if they are both derived from the same addr-taken global, we - // can't know the two accesses don't overlap. - } - - // These pointers may be based on the memory owned by an indirect global. If - // so, we may be able to handle this. First check to see if the base pointer - // is a direct load from an indirect global. - GV1 = GV2 = nullptr; - if (const LoadInst *LI = dyn_cast(UV1)) - if (GlobalVariable *GV = dyn_cast(LI->getOperand(0))) - if (IndirectGlobals.count(GV)) - GV1 = GV; - if (const LoadInst *LI = dyn_cast(UV2)) - if (const GlobalVariable *GV = dyn_cast(LI->getOperand(0))) - if (IndirectGlobals.count(GV)) - GV2 = GV; - - // These pointers may also be from an allocation for the indirect global. If - // so, also handle them. - if (AllocsForIndirectGlobals.count(UV1)) - GV1 = AllocsForIndirectGlobals[UV1]; - if (AllocsForIndirectGlobals.count(UV2)) - GV2 = AllocsForIndirectGlobals[UV2]; - - // Now that we know whether the two pointers are related to indirect globals, - // use this to disambiguate the pointers. If either pointer is based on an - // indirect global and if they are not both based on the same indirect global, - // they cannot alias. - if ((GV1 || GV2) && GV1 != GV2) - return NoAlias; - - return AliasAnalysis::alias(LocA, LocB); -} - -AliasAnalysis::ModRefResult -GlobalsModRef::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { - unsigned Known = ModRef; - - // If we are asking for mod/ref info of a direct call with a pointer to a - // global we are tracking, return information if we have it. - const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout(); - if (const GlobalValue *GV = - dyn_cast(GetUnderlyingObject(Loc.Ptr, DL))) - if (GV->hasLocalLinkage()) - if (const Function *F = CS.getCalledFunction()) - if (NonAddressTakenGlobals.count(GV)) - if (const FunctionRecord *FR = getFunctionInfo(F)) - Known = FR->getInfoForGlobal(GV); - - if (Known == NoModRef) - return NoModRef; // No need to query other mod/ref analyses - return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc)); -} - -//===----------------------------------------------------------------------===// -// Methods to update the analysis as a result of the client transformation. -// -void GlobalsModRef::deleteValue(Value *V) { - if (GlobalValue *GV = dyn_cast(V)) { - if (NonAddressTakenGlobals.erase(GV)) { - // This global might be an indirect global. If so, remove it and remove - // any AllocRelatedValues for it. - if (IndirectGlobals.erase(GV)) { - // Remove any entries in AllocsForIndirectGlobals for this global. - for (std::map::iterator - I = AllocsForIndirectGlobals.begin(), - E = AllocsForIndirectGlobals.end(); - I != E;) { - if (I->second == GV) { - AllocsForIndirectGlobals.erase(I++); - } else { - ++I; - } - } - } - } - } - - // Otherwise, if this is an allocation related to an indirect global, remove - // it. - AllocsForIndirectGlobals.erase(V); - - AliasAnalysis::deleteValue(V); -} - -void GlobalsModRef::addEscapingUse(Use &U) { - // For the purposes of this analysis, it is conservatively correct to treat - // a newly escaping value equivalently to a deleted one. We could perhaps - // be more precise by processing the new use and attempting to update our - // saved analysis results to accommodate it. - deleteValue(U); - - AliasAnalysis::addEscapingUse(U); -} diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp deleted file mode 100644 index 806bfb81b6d5..000000000000 --- a/lib/Analysis/IPA/IPA.cpp +++ /dev/null @@ -1,30 +0,0 @@ -//===-- IPA.cpp -----------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the common initialization routines for the IPA library. -// -//===----------------------------------------------------------------------===// - -#include "llvm/InitializePasses.h" -#include "llvm-c/Initialization.h" -#include "llvm/PassRegistry.h" - -using namespace llvm; - -/// initializeIPA - Initialize all passes linked into the IPA library. -void llvm::initializeIPA(PassRegistry &Registry) { - initializeCallGraphWrapperPassPass(Registry); - initializeCallGraphPrinterPass(Registry); - initializeCallGraphViewerPass(Registry); - initializeGlobalsModRefPass(Registry); -} - -void LLVMInitializeIPA(LLVMPassRegistryRef R) { - initializeIPA(*unwrap(R)); -} diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 926787d3be91..e0c5d8fa5f5a 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -39,7 +39,7 @@ INITIALIZE_PASS_BEGIN(IVUsers, "iv-users", INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(IVUsers, "iv-users", "Induction Variable Users", false, true) @@ -255,7 +255,7 @@ void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.setPreservesAll(); } @@ -266,7 +266,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { *L->getHeader()->getParent()); LI = &getAnalysis().getLoopInfo(); DT = &getAnalysis().getDomTree(); - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); // Collect ephemeral values so that AddUsersIfInteresting skips them. EphValues.clear(); @@ -276,7 +276,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { // them by stride. Start by finding all of the PHI nodes in the header for // this loop. If they are induction variables, inspect their uses. for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) - (void)AddUsersIfInteresting(I); + (void)AddUsersIfInteresting(&*I); return false; } diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/InlineCost.cpp similarity index 95% rename from lib/Analysis/IPA/InlineCost.cpp rename to lib/Analysis/InlineCost.cpp index c0d2e375cb04..a86a703ed9d6 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -115,11 +115,11 @@ class CallAnalyzer : public InstVisitor { /// inlining has the given attribute set either at the call site or the /// function declaration. Primarily used to inspect call site specific /// attributes since these can be more precise than the ones on the callee - /// itself. + /// itself. bool paramHasAttr(Argument *A, Attribute::AttrKind Attr); /// Return true if the given value is known non null within the callee if - /// inlined through this particular callsite. + /// inlined through this particular callsite. bool isKnownNonNullInCallee(Value *V); // Custom analysis routines. @@ -156,6 +156,8 @@ class CallAnalyzer : public InstVisitor { bool visitSwitchInst(SwitchInst &SI); bool visitIndirectBrInst(IndirectBrInst &IBI); bool visitResumeInst(ResumeInst &RI); + bool visitCleanupReturnInst(CleanupReturnInst &RI); + bool visitCatchReturnInst(CatchReturnInst &RI); bool visitUnreachableInst(UnreachableInst &I); public: @@ -832,8 +834,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the - // bonus we want to apply, but don't go below zero. - Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); + // threshold to get the bonus we want to apply, but don't go below zero. + Cost -= std::max(0, CA.getThreshold() - CA.getCost()); } return Base::visitCallSite(CS); @@ -903,6 +905,18 @@ bool CallAnalyzer::visitResumeInst(ResumeInst &RI) { return false; } +bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) { + // FIXME: It's not clear that a single instruction is an accurate model for + // the inline cost of a cleanupret instruction. + return false; +} + +bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) { + // FIXME: It's not clear that a single instruction is an accurate model for + // the inline cost of a catchret instruction. + return false; +} + bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { // FIXME: It might be reasonably to discount the cost of instructions leading // to unreachable as they have the lowest possible impact on both runtime and @@ -946,20 +960,21 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, continue; // Skip ephemeral values. - if (EphValues.count(I)) + if (EphValues.count(&*I)) continue; ++NumInstructions; if (isa(I) || I->getType()->isVectorTy()) ++NumVectorInstructions; - // If the instruction is floating point, and the target says this operation is - // expensive or the function has the "use-soft-float" attribute, this may - // eventually become a library call. Treat the cost as such. + // If the instruction is floating point, and the target says this operation + // is expensive or the function has the "use-soft-float" attribute, this may + // eventually become a library call. Treat the cost as such. if (I->getType()->isFloatingPointTy()) { bool hasSoftFloatAttr = false; - // If the function has the "use-soft-float" attribute, mark it as expensive. + // If the function has the "use-soft-float" attribute, mark it as + // expensive. if (F.hasFnAttribute("use-soft-float")) { Attribute Attr = F.getFnAttribute("use-soft-float"); StringRef Val = Attr.getValueAsString(); @@ -977,7 +992,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, // all of the per-instruction logic. The visit tree returns true if we // consumed the instruction in any way, and false if the instruction's base // cost should count against inlining. - if (Base::visit(I)) + if (Base::visit(&*I)) ++NumInstructionsSimplified; else Cost += InlineConstants::InstrCost; @@ -1157,15 +1172,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { FAI != FAE; ++FAI, ++CAI) { assert(CAI != CS.arg_end()); if (Constant *C = dyn_cast(CAI)) - SimplifiedValues[FAI] = C; + SimplifiedValues[&*FAI] = C; Value *PtrArg = *CAI; if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { - ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); + ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue()); // We can SROA any pointer arguments derived from alloca instructions. if (isa(PtrArg)) { - SROAArgValues[FAI] = PtrArg; + SROAArgValues[&*FAI] = PtrArg; SROAArgCosts[PtrArg] = 0; } } @@ -1281,7 +1296,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { else if (NumVectorInstructions <= NumInstructions / 2) Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus); - return Cost < Threshold; + return Cost <= std::max(0, Threshold); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1304,36 +1319,6 @@ void CallAnalyzer::dump() { } #endif -INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", - true, true) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", - true, true) - -char InlineCostAnalysis::ID = 0; - -InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID) {} - -InlineCostAnalysis::~InlineCostAnalysis() {} - -void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired(); - AU.addRequired(); - CallGraphSCCPass::getAnalysisUsage(AU); -} - -bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { - TTIWP = &getAnalysis(); - ACT = &getAnalysis(); - return false; -} - -InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { - return getInlineCost(CS, CS.getCalledFunction(), Threshold); -} - /// \brief Test that two functions either have or have not the given attribute /// at the same time. template @@ -1346,14 +1331,19 @@ static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) { static bool functionsHaveCompatibleAttributes(Function *Caller, Function *Callee, TargetTransformInfo &TTI) { - return TTI.hasCompatibleFunctionAttributes(Caller, Callee) && - attributeMatches(Caller, Callee, Attribute::SanitizeAddress) && - attributeMatches(Caller, Callee, Attribute::SanitizeMemory) && - attributeMatches(Caller, Callee, Attribute::SanitizeThread); + return TTI.areInlineCompatible(Caller, Callee) && + AttributeFuncs::areInlineCompatible(*Caller, *Callee); } -InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, - int Threshold) { +InlineCost llvm::getInlineCost(CallSite CS, int Threshold, + TargetTransformInfo &CalleeTTI, + AssumptionCacheTracker *ACT) { + return getInlineCost(CS, CS.getCalledFunction(), Threshold, CalleeTTI, ACT); +} + +InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int Threshold, + TargetTransformInfo &CalleeTTI, + AssumptionCacheTracker *ACT) { // Cannot inline indirect calls. if (!Callee) return llvm::InlineCost::getNever(); @@ -1368,8 +1358,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, // Never inline functions with conflicting attributes (unless callee has // always-inline attribute). - if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, - TTIWP->getTTI(*Callee))) + if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, CalleeTTI)) return llvm::InlineCost::getNever(); // Don't inline this call if the caller has the optnone attribute. @@ -1386,7 +1375,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold, CS); + CallAnalyzer CA(CalleeTTI, ACT, *Callee, Threshold, CS); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); @@ -1400,7 +1389,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); } -bool InlineCostAnalysis::isInlineViable(Function &F) { +bool llvm::isInlineViable(Function &F) { bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice); for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { // Disallow inlining of functions which contain indirect branches or @@ -1408,9 +1397,8 @@ bool InlineCostAnalysis::isInlineViable(Function &F) { if (isa(BI->getTerminator()) || BI->hasAddressTaken()) return false; - for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; - ++II) { - CallSite CS(II); + for (auto &II : *BI) { + CallSite CS(&II); if (!CS) continue; diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index a7f8f5c8c99b..b89ff268d11e 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -122,7 +122,7 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { return DT->dominates(I, P); } - // Otherwise, if the instruction is in the entry block, and is not an invoke, + // Otherwise, if the instruction is in the entry block and is not an invoke, // then it obviously dominates all phi nodes. if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() && !isa(I)) @@ -2090,8 +2090,7 @@ static Constant *computePointerICmp(const DataLayout &DL, // Is the set of underlying objects all noalias calls? auto IsNAC = [](SmallVectorImpl &Objects) { - return std::all_of(Objects.begin(), Objects.end(), - [](Value *V){ return isNoAliasCall(V); }); + return std::all_of(Objects.begin(), Objects.end(), isNoAliasCall); }; // Is the set of underlying objects all things which must be disjoint from @@ -2176,6 +2175,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // X >=u 1 -> X if (match(RHS, m_One())) return LHS; + if (isImpliedCondition(RHS, LHS, Q.DL)) + return getTrue(ITy); + break; + case ICmpInst::ICMP_SGE: + /// For signed comparison, the values for an i1 are 0 and -1 + /// respectively. This maps into a truth table of: + /// LHS | RHS | LHS >=s RHS | LHS implies RHS + /// 0 | 0 | 1 (0 >= 0) | 1 + /// 0 | 1 | 1 (0 >= -1) | 1 + /// 1 | 0 | 0 (-1 >= 0) | 0 + /// 1 | 1 | 1 (-1 >= -1) | 1 + if (isImpliedCondition(LHS, RHS, Q.DL)) + return getTrue(ITy); break; case ICmpInst::ICMP_SLT: // X X @@ -2187,6 +2199,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (match(RHS, m_One())) return LHS; break; + case ICmpInst::ICMP_ULE: + if (isImpliedCondition(LHS, RHS, Q.DL)) + return getTrue(ITy); + break; } } @@ -2360,9 +2376,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) { // 'and x, CI2' produces [0, CI2]. Upper = CI2->getValue() + 1; + } else if (match(LHS, m_NUWAdd(m_Value(), m_ConstantInt(CI2)))) { + // 'add nuw x, CI2' produces [CI2, UINT_MAX]. + Lower = CI2->getValue(); } - if (Lower != Upper) { - ConstantRange LHS_CR = ConstantRange(Lower, Upper); + + ConstantRange LHS_CR = Lower != Upper ? ConstantRange(Lower, Upper) + : ConstantRange(Width, true); + + if (auto *I = dyn_cast(LHS)) + if (auto *Ranges = I->getMetadata(LLVMContext::MD_range)) + LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges)); + + if (!LHS_CR.isFullSet()) { if (RHS_CR.contains(LHS_CR)) return ConstantInt::getTrue(RHS->getContext()); if (RHS_CR.inverse().contains(LHS_CR)) @@ -2370,6 +2396,30 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // If both operands have range metadata, use the metadata + // to simplify the comparison. + if (isa(RHS) && isa(LHS)) { + auto RHS_Instr = dyn_cast(RHS); + auto LHS_Instr = dyn_cast(LHS); + + if (RHS_Instr->getMetadata(LLVMContext::MD_range) && + LHS_Instr->getMetadata(LLVMContext::MD_range)) { + auto RHS_CR = getConstantRangeFromMetadata( + *RHS_Instr->getMetadata(LLVMContext::MD_range)); + auto LHS_CR = getConstantRangeFromMetadata( + *LHS_Instr->getMetadata(LLVMContext::MD_range)); + + auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR); + if (Satisfied_CR.contains(LHS_CR)) + return ConstantInt::getTrue(RHS->getContext()); + + auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion( + CmpInst::getInversePredicate(Pred), RHS_CR); + if (InversedSatisfied_CR.contains(LHS_CR)) + return ConstantInt::getFalse(RHS->getContext()); + } + } + // Compare of cast, for example (zext X) != 0 -> X != 0 if (isa(LHS) && (isa(RHS) || isa(RHS))) { Instruction *LI = cast(LHS); @@ -2529,6 +2579,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // icmp eq|ne X, Y -> false|true if X != Y + if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && + isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) { + LLVMContext &Ctx = LHS->getType()->getContext(); + return Pred == ICmpInst::ICMP_NE ? + ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx); + } + // Special logic for binary operators. BinaryOperator *LBO = dyn_cast(LHS); BinaryOperator *RBO = dyn_cast(RHS); @@ -3039,7 +3097,7 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, - Instruction *CxtI) { + const Instruction *CxtI) { return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } @@ -4024,6 +4082,17 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, break; } + // In general, it is possible for computeKnownBits to determine all bits in a + // value even when the operands are not all constants. + if (!Result && I->getType()->isIntegerTy()) { + unsigned BitWidth = I->getType()->getScalarSizeInBits(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT); + if ((KnownZero | KnownOne).isAllOnesValue()) + Result = ConstantInt::get(I->getContext(), KnownOne); + } + /// If called on unreachable code, the above logic may report that the /// instruction simplified to itself. Make life easier for users by /// detecting that case here, returning a safe value instead. diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt index 3039ddea4ff5..bddf1a3ac201 100644 --- a/lib/Analysis/LLVMBuild.txt +++ b/lib/Analysis/LLVMBuild.txt @@ -15,9 +15,6 @@ ; ;===------------------------------------------------------------------------===; -[common] -subdirectories = IPA - [component_0] type = Library name = Analysis diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp index c8d0410c1e0f..0f0f31e62ac7 100644 --- a/lib/Analysis/LazyCallGraph.cpp +++ b/lib/Analysis/LazyCallGraph.cpp @@ -198,7 +198,8 @@ void LazyCallGraph::SCC::insertOutgoingEdge(Node &CallerN, Node &CalleeN) { assert(CalleeC.isDescendantOf(*this) && "Callee must be a descendant of the Caller."); - // The only change required is to add this SCC to the parent set of the callee. + // The only change required is to add this SCC to the parent set of the + // callee. CalleeC.ParentSCCs.insert(this); } @@ -454,8 +455,7 @@ void LazyCallGraph::SCC::internalDFS( } SmallVector -LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN, - Node &CalleeN) { +LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN, Node &CalleeN) { // First remove it from the node. CallerN.removeEdgeInternal(CalleeN.getFunction()); @@ -522,7 +522,7 @@ LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN, // the leaf SCC list. if (!IsLeafSCC && !ResultSCCs.empty()) G->LeafSCCs.erase(std::remove(G->LeafSCCs.begin(), G->LeafSCCs.end(), this), - G->LeafSCCs.end()); + G->LeafSCCs.end()); // Return the new list of SCCs. return ResultSCCs; diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index a6ae7f2229c5..0d1d34e0cb4f 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" @@ -64,10 +65,10 @@ class LVILatticeVal { enum LatticeValueTy { /// This Value has no known value yet. undefined, - + /// This Value has a specific constant value. constant, - + /// This Value is known to not have the specified value. notconstant, @@ -77,13 +78,13 @@ class LVILatticeVal { /// This value is not known to be constant, and we know that it has a value. overdefined }; - + /// Val: This stores the current lattice value along with the Constant* for /// the constant if this is a 'constant' or 'notconstant' value. LatticeValueTy Tag; Constant *Val; ConstantRange Range; - + public: LVILatticeVal() : Tag(undefined), Val(nullptr), Range(1, true) {} @@ -104,29 +105,34 @@ public: Res.markConstantRange(CR); return Res; } + static LVILatticeVal getOverdefined() { + LVILatticeVal Res; + Res.markOverdefined(); + return Res; + } bool isUndefined() const { return Tag == undefined; } bool isConstant() const { return Tag == constant; } bool isNotConstant() const { return Tag == notconstant; } bool isConstantRange() const { return Tag == constantrange; } bool isOverdefined() const { return Tag == overdefined; } - + Constant *getConstant() const { assert(isConstant() && "Cannot get the constant of a non-constant!"); return Val; } - + Constant *getNotConstant() const { assert(isNotConstant() && "Cannot get the constant of a non-notconstant!"); return Val; } - + ConstantRange getConstantRange() const { assert(isConstantRange() && "Cannot get the constant-range of a non-constant-range!"); return Range; } - + /// Return true if this is a change in status. bool markOverdefined() { if (isOverdefined()) @@ -150,7 +156,7 @@ public: Val = V; return true; } - + /// Return true if this is a change in status. bool markNotConstant(Constant *V) { assert(V && "Marking constant with NULL"); @@ -168,27 +174,27 @@ public: Val = V; return true; } - + /// Return true if this is a change in status. bool markConstantRange(const ConstantRange NewR) { if (isConstantRange()) { if (NewR.isEmptySet()) return markOverdefined(); - + bool changed = Range != NewR; Range = NewR; return changed; } - + assert(isUndefined()); if (NewR.isEmptySet()) return markOverdefined(); - + Tag = constantrange; Range = NewR; return true; } - + /// Merge the specified lattice value into this one, updating this /// one and returning true if anything changed. bool mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) { @@ -267,7 +273,7 @@ public: return markConstantRange(NewR); } }; - + } // end anonymous namespace. namespace llvm { @@ -295,9 +301,9 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) { namespace { /// A callback value handle updates the cache when values are erased. class LazyValueInfoCache; - struct LVIValueHandle : public CallbackVH { + struct LVIValueHandle final : public CallbackVH { LazyValueInfoCache *Parent; - + LVIValueHandle(Value *V, LazyValueInfoCache *P) : CallbackVH(V), Parent(P) { } @@ -308,24 +314,27 @@ namespace { }; } -namespace { +namespace { /// This is the cache kept by LazyValueInfo which /// maintains information about queries across the clients' queries. class LazyValueInfoCache { /// This is all of the cached block information for exactly one Value*. /// The entries are sorted by the BasicBlock* of the /// entries, allowing us to do a lookup with a binary search. - typedef std::map, LVILatticeVal> ValueCacheEntryTy; + /// Over-defined lattice values are recorded in OverDefinedCache to reduce + /// memory overhead. + typedef SmallDenseMap, LVILatticeVal, 4> + ValueCacheEntryTy; /// This is all of the cached information for all values, /// mapped from Value* to key information. std::map ValueCache; - + /// This tracks, on a per-block basis, the set of values that are - /// over-defined at the end of that block. This is required - /// for cache updating. - typedef std::pair, Value*> OverDefinedPairTy; - DenseSet OverDefinedCache; + /// over-defined at the end of that block. + typedef DenseMap, SmallPtrSet> + OverDefinedCacheTy; + OverDefinedCacheTy OverDefinedCache; /// Keep track of all blocks that we have ever seen, so we /// don't spend time removing unused blocks from our caches. @@ -357,9 +366,13 @@ namespace { void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) { SeenBlocks.insert(BB); - lookup(Val)[BB] = Result; + + // Insert over-defined values into their own cache to reduce memory + // overhead. if (Result.isOverdefined()) - OverDefinedCache.insert(std::make_pair(BB, Val)); + OverDefinedCache[BB].insert(Val); + else + lookup(Val)[BB] = Result; } LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB); @@ -382,11 +395,39 @@ namespace { Instruction *BBI); void solve(); - + ValueCacheEntryTy &lookup(Value *V) { return ValueCache[LVIValueHandle(V, this)]; } + bool isOverdefined(Value *V, BasicBlock *BB) const { + auto ODI = OverDefinedCache.find(BB); + + if (ODI == OverDefinedCache.end()) + return false; + + return ODI->second.count(V); + } + + bool hasCachedValueInfo(Value *V, BasicBlock *BB) { + if (isOverdefined(V, BB)) + return true; + + LVIValueHandle ValHandle(V, this); + auto I = ValueCache.find(ValHandle); + if (I == ValueCache.end()) + return false; + + return I->second.count(BB); + } + + LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) { + if (isOverdefined(V, BB)) + return LVILatticeVal::getOverdefined(); + + return lookup(V)[BB]; + } + public: /// This is the query interface to determine the lattice /// value for the specified Value* at the end of the specified block. @@ -402,15 +443,15 @@ namespace { /// value for the specified Value* that is true on the specified edge. LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB, Instruction *CxtI = nullptr); - + /// This is the update interface to inform the cache that an edge from /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc. void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc); - + /// This is part of the update interface to inform the cache /// that a block has been deleted. void eraseBlock(BasicBlock *BB); - + /// clear - Empty the cache. void clear() { SeenBlocks.clear(); @@ -425,15 +466,17 @@ namespace { } // end anonymous namespace void LVIValueHandle::deleted() { - typedef std::pair, Value*> OverDefinedPairTy; - - SmallVector ToErase; - for (const OverDefinedPairTy &P : Parent->OverDefinedCache) - if (P.second == getValPtr()) - ToErase.push_back(P); - for (const OverDefinedPairTy &P : ToErase) - Parent->OverDefinedCache.erase(P); - + SmallVector, 4> ToErase; + for (auto &I : Parent->OverDefinedCache) { + SmallPtrSetImpl &ValueSet = I.second; + if (ValueSet.count(getValPtr())) + ValueSet.erase(getValPtr()); + if (ValueSet.empty()) + ToErase.push_back(I.first); + } + for (auto &BB : ToErase) + Parent->OverDefinedCache.erase(BB); + // This erasure deallocates *this, so it MUST happen after we're done // using any and all members of *this. Parent->ValueCache.erase(*this); @@ -446,15 +489,11 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { return; SeenBlocks.erase(I); - SmallVector ToErase; - for (const OverDefinedPairTy& P : OverDefinedCache) - if (P.first == BB) - ToErase.push_back(P); - for (const OverDefinedPairTy &P : ToErase) - OverDefinedCache.erase(P); + auto ODI = OverDefinedCache.find(BB); + if (ODI != OverDefinedCache.end()) + OverDefinedCache.erase(ODI); - for (std::map::iterator - I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I) + for (auto I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I) I->second.erase(BB); } @@ -466,7 +505,8 @@ void LazyValueInfoCache::solve() { if (solveBlockValue(e.second, e.first)) { // The work item was completely processed. assert(BlockValueStack.top() == e && "Nothing should have been pushed!"); - assert(lookup(e.second).count(e.first) && "Result should be in cache!"); + assert(hasCachedValueInfo(e.second, e.first) && + "Result should be in cache!"); BlockValueStack.pop(); BlockValueSet.erase(e); @@ -482,11 +522,7 @@ bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) { if (isa(Val)) return true; - LVIValueHandle ValHandle(Val, this); - std::map::iterator I = - ValueCache.find(ValHandle); - if (I == ValueCache.end()) return false; - return I->second.count(BB); + return hasCachedValueInfo(Val, BB); } LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { @@ -495,17 +531,36 @@ LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { return LVILatticeVal::get(VC); SeenBlocks.insert(BB); - return lookup(Val)[BB]; + return getCachedValueInfo(Val, BB); +} + +static LVILatticeVal getFromRangeMetadata(Instruction *BBI) { + switch (BBI->getOpcode()) { + default: break; + case Instruction::Load: + case Instruction::Call: + case Instruction::Invoke: + if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range)) + if (isa(BBI->getType())) { + ConstantRange Result = getConstantRangeFromMetadata(*Ranges); + return LVILatticeVal::getRange(Result); + } + break; + }; + // Nothing known - Note that we do not want overdefined here. We may know + // something else about the value and not having range metadata shouldn't + // cause us to throw away those facts. + return LVILatticeVal(); } bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { if (isa(Val)) return true; - if (lookup(Val).count(BB)) { + if (hasCachedValueInfo(Val, BB)) { // If we have a cached value, use that. DEBUG(dbgs() << " reuse BB '" << BB->getName() - << "' val=" << lookup(Val)[BB] << '\n'); + << "' val=" << getCachedValueInfo(Val, BB) << '\n'); // Since we're reusing a cached value, we don't need to update the // OverDefinedCache. The cache will have been properly updated whenever the @@ -516,7 +571,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { // Hold off inserting this value into the Cache in case we have to return // false and come back later. LVILatticeVal Res; - + Instruction *BBI = dyn_cast(Val); if (!BBI || BBI->getParent() != BB) { if (!solveBlockValueNonLocal(Res, Val, BB)) @@ -532,12 +587,18 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { return true; } - if (AllocaInst *AI = dyn_cast(BBI)) { - Res = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType())); + // If this value is a nonnull pointer, record it's range and bailout. + PointerType *PT = dyn_cast(BBI->getType()); + if (PT && isKnownNonNull(BBI)) { + Res = LVILatticeVal::getNot(ConstantPointerNull::get(PT)); insertResult(Val, BB, Res); return true; } + // If this is an instruction which supports range metadata, return the + // implied range. TODO: This should be an intersection, not a union. + Res.mergeIn(getFromRangeMetadata(BBI), DL); + // We can only analyze the definitions of certain classes of instructions // (integral binops and casts at the moment), so bail if this isn't one. LVILatticeVal Result; @@ -661,7 +722,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, PointerType *PTy = cast(Val->getType()); Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); } - + BBLV = Result; return true; } @@ -674,7 +735,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, BBLV = Result; return true; } - + bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB) { LVILatticeVal Result; // Start Undefined. @@ -700,7 +761,7 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV, if (Result.isOverdefined()) { DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined because of pred.\n"); - + BBLV = Result; return true; } @@ -765,7 +826,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, BBLV.markOverdefined(); return true; } - + ConstantRange LHSRange = LHSVal.getConstantRange(); ConstantRange RHSRange(1); IntegerType *ResultTy = cast(BBI->getType()); @@ -819,7 +880,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, case Instruction::Or: Result.markConstantRange(LHSRange.binaryOr(RHSRange)); break; - + // Unhandled instructions are overdefined. default: DEBUG(dbgs() << " compute BB '" << BB->getName() @@ -827,7 +888,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, Result.markOverdefined(); break; } - + BBLV = Result; return true; } @@ -877,7 +938,7 @@ bool getValueFromFromCondition(Value *Val, ICmpInst *ICI, /// Val is not constrained on the edge. static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, BasicBlock *BBTo, LVILatticeVal &Result) { - // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we + // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we // know that v != 0. if (BranchInst *BI = dyn_cast(BBFrom->getTerminator())) { // If this is a conditional branch and only one successor goes to BBTo, then @@ -887,7 +948,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, bool isTrueDest = BI->getSuccessor(0) == BBTo; assert(BI->getSuccessor(!isTrueDest) == BBTo && "BBTo isn't a successor of BBFrom"); - + // If V is the condition of the branch itself, then we know exactly what // it is. if (BI->getCondition() == Val) { @@ -895,7 +956,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, Type::getInt1Ty(Val->getContext()), isTrueDest)); return true; } - + // If the condition of the branch is an equality comparison, we may be // able to infer the value. if (ICmpInst *ICI = dyn_cast(BI->getCondition())) @@ -997,7 +1058,7 @@ LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB, Instruction *CxtI) { DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" << BB->getName() << "'\n"); - + assert(BlockValueStack.empty() && BlockValueSet.empty()); pushBlockValue(std::make_pair(BB, V)); @@ -1014,6 +1075,8 @@ LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) { << CxtI->getName() << "'\n"); LVILatticeVal Result; + if (auto *I = dyn_cast(V)) + Result = getFromRangeMetadata(I); mergeAssumeBlockValueConstantRange(V, Result, CxtI); DEBUG(dbgs() << " Result = " << Result << "\n"); @@ -1025,7 +1088,7 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '" << FromBB->getName() << "' to '" << ToBB->getName() << "'\n"); - + LVILatticeVal Result; if (!getEdgeValue(V, FromBB, ToBB, Result, CxtI)) { solve(); @@ -1040,24 +1103,24 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc) { - // When an edge in the graph has been threaded, values that we could not - // determine a value for before (i.e. were marked overdefined) may be possible - // to solve now. We do NOT try to proactively update these values. Instead, - // we clear their entries from the cache, and allow lazy updating to recompute - // them when needed. - + // When an edge in the graph has been threaded, values that we could not + // determine a value for before (i.e. were marked overdefined) may be + // possible to solve now. We do NOT try to proactively update these values. + // Instead, we clear their entries from the cache, and allow lazy updating to + // recompute them when needed. + // The updating process is fairly simple: we need to drop cached info // for all values that were marked overdefined in OldSucc, and for those same // values in any successor of OldSucc (except NewSucc) in which they were // also marked overdefined. std::vector worklist; worklist.push_back(OldSucc); - - DenseSet ClearSet; - for (OverDefinedPairTy &P : OverDefinedCache) - if (P.first == OldSucc) - ClearSet.insert(P.second); - + + auto I = OverDefinedCache.find(OldSucc); + if (I == OverDefinedCache.end()) + return; // Nothing to process here. + SmallVector ValsToClear(I->second.begin(), I->second.end()); + // Use a worklist to perform a depth-first search of OldSucc's successors. // NOTE: We do not need a visited list since any blocks we have already // visited will have had their overdefined markers cleared already, and we @@ -1065,32 +1128,31 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, while (!worklist.empty()) { BasicBlock *ToUpdate = worklist.back(); worklist.pop_back(); - + // Skip blocks only accessible through NewSucc. if (ToUpdate == NewSucc) continue; - + bool changed = false; - for (Value *V : ClearSet) { + for (Value *V : ValsToClear) { // If a value was marked overdefined in OldSucc, and is here too... - DenseSet::iterator OI = - OverDefinedCache.find(std::make_pair(ToUpdate, V)); - if (OI == OverDefinedCache.end()) continue; + auto OI = OverDefinedCache.find(ToUpdate); + if (OI == OverDefinedCache.end()) + continue; + SmallPtrSetImpl &ValueSet = OI->second; + if (!ValueSet.count(V)) + continue; - // Remove it from the caches. - ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(V, this)]; - ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate); + ValueSet.erase(V); + if (ValueSet.empty()) + OverDefinedCache.erase(OI); - assert(CI != Entry.end() && "Couldn't find entry to update?"); - Entry.erase(CI); - OverDefinedCache.erase(OI); - - // If we removed anything, then we potentially need to update + // If we removed anything, then we potentially need to update // blocks successors too. changed = true; } if (!changed) continue; - + worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate)); } } @@ -1158,7 +1220,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB, } /// Determine whether the specified value is known to be a -/// constant on the specified edge. Return null if not. +/// constant on the specified edge. Return null if not. Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI) { @@ -1190,26 +1252,26 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True; return LazyValueInfo::Unknown; } - + if (Result.isConstantRange()) { ConstantInt *CI = dyn_cast(C); if (!CI) return LazyValueInfo::Unknown; - + ConstantRange CR = Result.getConstantRange(); if (Pred == ICmpInst::ICMP_EQ) { if (!CR.contains(CI->getValue())) return LazyValueInfo::False; - + if (CR.isSingleElement() && CR.contains(CI->getValue())) return LazyValueInfo::True; } else if (Pred == ICmpInst::ICMP_NE) { if (!CR.contains(CI->getValue())) return LazyValueInfo::True; - + if (CR.isSingleElement() && CR.contains(CI->getValue())) return LazyValueInfo::False; } - + // Handle more complex predicates. ConstantRange TrueValues = ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue()); @@ -1219,7 +1281,7 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, return LazyValueInfo::False; return LazyValueInfo::Unknown; } - + if (Result.isNotConstant()) { // If this is an equality comparison, we can try to fold it knowing that // "V != C1". @@ -1240,7 +1302,7 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, } return LazyValueInfo::Unknown; } - + return LazyValueInfo::Unknown; } @@ -1266,20 +1328,69 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, if (Ret != Unknown) return Ret; - // TODO: Move this logic inside getValueAt so that it can be cached rather - // than re-queried on each call. This would also allow us to merge the - // underlying lattice values to get more information + // Note: The following bit of code is somewhat distinct from the rest of LVI; + // LVI as a whole tries to compute a lattice value which is conservatively + // correct at a given location. In this case, we have a predicate which we + // weren't able to prove about the merged result, and we're pushing that + // predicate back along each incoming edge to see if we can prove it + // separately for each input. As a motivating example, consider: + // bb1: + // %v1 = ... ; constantrange<1, 5> + // br label %merge + // bb2: + // %v2 = ... ; constantrange<10, 20> + // br label %merge + // merge: + // %phi = phi [%v1, %v2] ; constantrange<1,20> + // %pred = icmp eq i32 %phi, 8 + // We can't tell from the lattice value for '%phi' that '%pred' is false + // along each path, but by checking the predicate over each input separately, + // we can. + // We limit the search to one step backwards from the current BB and value. + // We could consider extending this to search further backwards through the + // CFG and/or value graph, but there are non-obvious compile time vs quality + // tradeoffs. if (CxtI) { - // For a comparison where the V is outside this block, it's possible - // that we've branched on it before. Look to see if the value is known - // on all incoming edges. BasicBlock *BB = CxtI->getParent(); + + // Function entry or an unreachable block. Bail to avoid confusing + // analysis below. pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - if (PI != PE && - (!isa(V) || - cast(V)->getParent() != BB)) { + if (PI == PE) + return Unknown; + + // If V is a PHI node in the same block as the context, we need to ask + // questions about the predicate as applied to the incoming value along + // each edge. This is useful for eliminating cases where the predicate is + // known along all incoming edges. + if (auto *PHI = dyn_cast(V)) + if (PHI->getParent() == BB) { + Tristate Baseline = Unknown; + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i < e; i++) { + Value *Incoming = PHI->getIncomingValue(i); + BasicBlock *PredBB = PHI->getIncomingBlock(i); + // Note that PredBB may be BB itself. + Tristate Result = getPredicateOnEdge(Pred, Incoming, C, PredBB, BB, + CxtI); + + // Keep going as long as we've seen a consistent known result for + // all inputs. + Baseline = (i == 0) ? Result /* First iteration */ + : (Baseline == Result ? Baseline : Unknown); /* All others */ + if (Baseline == Unknown) + break; + } + if (Baseline != Unknown) + return Baseline; + } + + // For a comparison where the V is outside this block, it's possible + // that we've branched on it before. Look to see if the value is known + // on all incoming edges. + if (!isa(V) || + cast(V)->getParent() != BB) { // For predecessor edge, determine if the comparison is true or false - // on that edge. If they're all true or all false, we can conclude + // on that edge. If they're all true or all false, we can conclude // the value of the comparison in this block. Tristate Baseline = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI); if (Baseline != Unknown) { diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp deleted file mode 100644 index 991a0e3e2752..000000000000 --- a/lib/Analysis/LibCallAliasAnalysis.cpp +++ /dev/null @@ -1,141 +0,0 @@ -//===- LibCallAliasAnalysis.cpp - Implement AliasAnalysis for libcalls ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the LibCallAliasAnalysis class. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/LibCallAliasAnalysis.h" -#include "llvm/Analysis/LibCallSemantics.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/IR/Function.h" -#include "llvm/Pass.h" -using namespace llvm; - -// Register this pass... -char LibCallAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa", - "LibCall Alias Analysis", false, true, false) - -FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) { - return new LibCallAliasAnalysis(LCI); -} - -LibCallAliasAnalysis::~LibCallAliasAnalysis() { - delete LCI; -} - -void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AliasAnalysis::getAnalysisUsage(AU); - AU.setPreservesAll(); // Does not transform code -} - -bool LibCallAliasAnalysis::runOnFunction(Function &F) { - // set up super class - InitializeAliasAnalysis(this, &F.getParent()->getDataLayout()); - return false; -} - -/// AnalyzeLibCallDetails - Given a call to a function with the specified -/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call -/// vs the specified pointer/size. -AliasAnalysis::ModRefResult -LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, - ImmutableCallSite CS, - const MemoryLocation &Loc) { - // If we have a function, check to see what kind of mod/ref effects it - // has. Start by including any info globally known about the function. - AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior; - if (MRInfo == NoModRef) return MRInfo; - - // If that didn't tell us that the function is 'readnone', check to see - // if we have detailed info and if 'P' is any of the locations we know - // about. - const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails; - if (Details == nullptr) - return MRInfo; - - // If the details array is of the 'DoesNot' kind, we only know something if - // the pointer is a match for one of the locations in 'Details'. If we find a - // match, we can prove some interactions cannot happen. - // - if (FI->DetailsType == LibCallFunctionInfo::DoesNot) { - // Find out if the pointer refers to a known location. - for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) { - const LibCallLocationInfo &LocInfo = - LCI->getLocationInfo(Details[i].LocationID); - LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc); - if (Res != LibCallLocationInfo::Yes) continue; - - // If we find a match against a location that we 'do not' interact with, - // learn this info into MRInfo. - return ModRefResult(MRInfo & ~Details[i].MRInfo); - } - return MRInfo; - } - - // If the details are of the 'DoesOnly' sort, we know something if the pointer - // is a match for one of the locations in 'Details'. Also, if we can prove - // that the pointers is *not* one of the locations in 'Details', we know that - // the call is NoModRef. - assert(FI->DetailsType == LibCallFunctionInfo::DoesOnly); - - // Find out if the pointer refers to a known location. - bool NoneMatch = true; - for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) { - const LibCallLocationInfo &LocInfo = - LCI->getLocationInfo(Details[i].LocationID); - LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc); - if (Res == LibCallLocationInfo::No) continue; - - // If we don't know if this pointer points to the location, then we have to - // assume it might alias in some case. - if (Res == LibCallLocationInfo::Unknown) { - NoneMatch = false; - continue; - } - - // If we know that this pointer definitely is pointing into the location, - // merge in this information. - return ModRefResult(MRInfo & Details[i].MRInfo); - } - - // If we found that the pointer is guaranteed to not match any of the - // locations in our 'DoesOnly' rule, then we know that the pointer must point - // to some other location. Since the libcall doesn't mod/ref any other - // locations, return NoModRef. - if (NoneMatch) - return NoModRef; - - // Otherwise, return any other info gained so far. - return MRInfo; -} - -// getModRefInfo - Check to see if the specified callsite can clobber the -// specified memory object. -// -AliasAnalysis::ModRefResult -LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) { - ModRefResult MRInfo = ModRef; - - // If this is a direct call to a function that LCI knows about, get the - // information about the runtime function. - if (LCI) { - if (const Function *F = CS.getCalledFunction()) { - if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) { - MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, Loc)); - if (MRInfo == NoModRef) return NoModRef; - } - } - } - - // The AliasAnalysis base class has some smarts, lets use them. - return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, Loc)); -} diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp deleted file mode 100644 index 003c81e87b60..000000000000 --- a/lib/Analysis/LibCallSemantics.cpp +++ /dev/null @@ -1,89 +0,0 @@ -//===- LibCallSemantics.cpp - Describe library semantics ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements interfaces that can be used to describe language -// specific runtime library interfaces (e.g. libc, libm, etc) to LLVM -// optimizers. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/LibCallSemantics.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/IR/Function.h" -using namespace llvm; - -/// This impl pointer in ~LibCallInfo is actually a StringMap. This -/// helper does the cast. -static StringMap *getMap(void *Ptr) { - return static_cast *>(Ptr); -} - -LibCallInfo::~LibCallInfo() { - delete getMap(Impl); -} - -const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const { - // Get location info on the first call. - if (NumLocations == 0) - NumLocations = getLocationInfo(Locations); - - assert(LocID < NumLocations && "Invalid location ID!"); - return Locations[LocID]; -} - - -/// Return the LibCallFunctionInfo object corresponding to -/// the specified function if we have it. If not, return null. -const LibCallFunctionInfo * -LibCallInfo::getFunctionInfo(const Function *F) const { - StringMap *Map = getMap(Impl); - - /// If this is the first time we are querying for this info, lazily construct - /// the StringMap to index it. - if (!Map) { - Impl = Map = new StringMap(); - - const LibCallFunctionInfo *Array = getFunctionInfoArray(); - if (!Array) return nullptr; - - // We now have the array of entries. Populate the StringMap. - for (unsigned i = 0; Array[i].Name; ++i) - (*Map)[Array[i].Name] = Array+i; - } - - // Look up this function in the string map. - return Map->lookup(F->getName()); -} - -/// See if the given exception handling personality function is one that we -/// understand. If so, return a description of it; otherwise return Unknown. -EHPersonality llvm::classifyEHPersonality(const Value *Pers) { - const Function *F = dyn_cast(Pers->stripPointerCasts()); - if (!F) - return EHPersonality::Unknown; - return StringSwitch(F->getName()) - .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) - .Case("__gxx_personality_v0", EHPersonality::GNU_CXX) - .Case("__gcc_personality_v0", EHPersonality::GNU_C) - .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) - .Case("_except_handler3", EHPersonality::MSVC_X86SEH) - .Case("_except_handler4", EHPersonality::MSVC_X86SEH) - .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH) - .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX) - .Default(EHPersonality::Unknown); -} - -bool llvm::canSimplifyInvokeNoUnwind(const Function *F) { - EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn()); - // We can't simplify any invokes to nounwind functions if the personality - // function wants to catch asynch exceptions. The nounwind attribute only - // implies that the function does not throw synchronous exceptions. - return !isAsynchronousEHPersonality(Personality); -} diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 0b9308a573a0..2dfb09c95ad6 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -49,6 +49,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManager.h" @@ -98,12 +99,13 @@ namespace { void visitInsertElementInst(InsertElementInst &I); void visitUnreachableInst(UnreachableInst &I); - Value *findValue(Value *V, const DataLayout &DL, bool OffsetOk) const; - Value *findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, + Value *findValue(Value *V, bool OffsetOk) const; + Value *findValueImpl(Value *V, bool OffsetOk, SmallPtrSetImpl &Visited) const; public: Module *Mod; + const DataLayout *DL; AliasAnalysis *AA; AssumptionCache *AC; DominatorTree *DT; @@ -121,7 +123,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -165,7 +167,7 @@ INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", false, true) @@ -178,7 +180,8 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", // bool Lint::runOnFunction(Function &F) { Mod = F.getParent(); - AA = &getAnalysis(); + DL = &F.getParent()->getDataLayout(); + AA = &getAnalysis().getAAResults(); AC = &getAnalysis().getAssumptionCache(F); DT = &getAnalysis().getDomTree(); TLI = &getAnalysis().getTLI(); @@ -200,12 +203,11 @@ void Lint::visitFunction(Function &F) { void Lint::visitCallSite(CallSite CS) { Instruction &I = *CS.getInstruction(); Value *Callee = CS.getCalledValue(); - const DataLayout &DL = CS->getModule()->getDataLayout(); visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr, MemRef::Callee); - if (Function *F = dyn_cast(findValue(Callee, DL, + if (Function *F = dyn_cast(findValue(Callee, /*OffsetOk=*/false))) { Assert(CS.getCallingConv() == F->getCallingConv(), "Undefined behavior: Caller and callee calling convention differ", @@ -232,7 +234,7 @@ void Lint::visitCallSite(CallSite CS) { for (; AI != AE; ++AI) { Value *Actual = *AI; if (PI != PE) { - Argument *Formal = PI++; + Argument *Formal = &*PI++; Assert(Formal->getType() == Actual->getType(), "Undefined behavior: Call argument type mismatches " "callee parameter type", @@ -253,8 +255,8 @@ void Lint::visitCallSite(CallSite CS) { if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { Type *Ty = cast(Formal->getType())->getElementType(); - visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), - DL.getABITypeAlignment(Ty), Ty, + visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty), + DL->getABITypeAlignment(Ty), Ty, MemRef::Read | MemRef::Write); } } @@ -264,7 +266,7 @@ void Lint::visitCallSite(CallSite CS) { if (CS.isCall() && cast(CS.getInstruction())->isTailCall()) for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) { - Value *Obj = findValue(*AI, DL, /*OffsetOk=*/true); + Value *Obj = findValue(*AI, /*OffsetOk=*/true); Assert(!isa(Obj), "Undefined behavior: Call with \"tail\" keyword references " "alloca", @@ -291,7 +293,7 @@ void Lint::visitCallSite(CallSite CS) { // overlap is not distinguished from the case where nothing is known. uint64_t Size = 0; if (const ConstantInt *Len = - dyn_cast(findValue(MCI->getLength(), DL, + dyn_cast(findValue(MCI->getLength(), /*OffsetOk=*/false))) if (Len->getValue().isIntN(32)) Size = Len->getValue().getZExtValue(); @@ -343,13 +345,6 @@ void Lint::visitCallSite(CallSite CS) { visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; - - case Intrinsic::eh_begincatch: - visitEHBeginCatch(II); - break; - case Intrinsic::eh_endcatch: - visitEHEndCatch(II); - break; } } @@ -367,8 +362,7 @@ void Lint::visitReturnInst(ReturnInst &I) { "Unusual: Return statement in function with noreturn attribute", &I); if (Value *V = I.getReturnValue()) { - Value *Obj = - findValue(V, F->getParent()->getDataLayout(), /*OffsetOk=*/true); + Value *Obj = findValue(V, /*OffsetOk=*/true); Assert(!isa(Obj), "Unusual: Returning alloca value", &I); } } @@ -383,8 +377,7 @@ void Lint::visitMemoryReference(Instruction &I, if (Size == 0) return; - Value *UnderlyingObject = - findValue(Ptr, I.getModule()->getDataLayout(), /*OffsetOk=*/true); + Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); Assert(!isa(UnderlyingObject), "Undefined behavior: Null pointer dereference", &I); Assert(!isa(UnderlyingObject), @@ -423,9 +416,8 @@ void Lint::visitMemoryReference(Instruction &I, // Check for buffer overflows and misalignment. // Only handles memory references that read/write something simple like an // alloca instruction or a global variable. - auto &DL = I.getModule()->getDataLayout(); int64_t Offset = 0; - if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) { + if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *DL)) { // OK, so the access is to a constant offset from Ptr. Check that Ptr is // something we can handle and if so extract the size of this base object // along with its alignment. @@ -435,20 +427,20 @@ void Lint::visitMemoryReference(Instruction &I, if (AllocaInst *AI = dyn_cast(Base)) { Type *ATy = AI->getAllocatedType(); if (!AI->isArrayAllocation() && ATy->isSized()) - BaseSize = DL.getTypeAllocSize(ATy); + BaseSize = DL->getTypeAllocSize(ATy); BaseAlign = AI->getAlignment(); if (BaseAlign == 0 && ATy->isSized()) - BaseAlign = DL.getABITypeAlignment(ATy); + BaseAlign = DL->getABITypeAlignment(ATy); } else if (GlobalVariable *GV = dyn_cast(Base)) { // If the global may be defined differently in another compilation unit // then don't warn about funky memory accesses. if (GV->hasDefinitiveInitializer()) { Type *GTy = GV->getType()->getElementType(); if (GTy->isSized()) - BaseSize = DL.getTypeAllocSize(GTy); + BaseSize = DL->getTypeAllocSize(GTy); BaseAlign = GV->getAlignment(); if (BaseAlign == 0 && GTy->isSized()) - BaseAlign = DL.getABITypeAlignment(GTy); + BaseAlign = DL->getABITypeAlignment(GTy); } } @@ -462,7 +454,7 @@ void Lint::visitMemoryReference(Instruction &I, // Accesses that say that the memory is more aligned than it is are not // defined. if (Align == 0 && Ty && Ty->isSized()) - Align = DL.getABITypeAlignment(Ty); + Align = DL->getABITypeAlignment(Ty); Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), "Undefined behavior: Memory reference address is misaligned", &I); } @@ -470,13 +462,13 @@ void Lint::visitMemoryReference(Instruction &I, void Lint::visitLoadInst(LoadInst &I) { visitMemoryReference(I, I.getPointerOperand(), - AA->getTypeStoreSize(I.getType()), I.getAlignment(), + DL->getTypeStoreSize(I.getType()), I.getAlignment(), I.getType(), MemRef::Read); } void Lint::visitStoreInst(StoreInst &I) { visitMemoryReference(I, I.getPointerOperand(), - AA->getTypeStoreSize(I.getOperand(0)->getType()), + DL->getTypeStoreSize(I.getOperand(0)->getType()), I.getAlignment(), I.getOperand(0)->getType(), MemRef::Write); } @@ -492,208 +484,26 @@ void Lint::visitSub(BinaryOperator &I) { } void Lint::visitLShr(BinaryOperator &I) { - if (ConstantInt *CI = dyn_cast( - findValue(I.getOperand(1), I.getModule()->getDataLayout(), - /*OffsetOk=*/false))) + if (ConstantInt *CI = dyn_cast(findValue(I.getOperand(1), + /*OffsetOk=*/false))) Assert(CI->getValue().ult(cast(I.getType())->getBitWidth()), "Undefined result: Shift count out of range", &I); } void Lint::visitAShr(BinaryOperator &I) { - if (ConstantInt *CI = dyn_cast(findValue( - I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false))) + if (ConstantInt *CI = + dyn_cast(findValue(I.getOperand(1), /*OffsetOk=*/false))) Assert(CI->getValue().ult(cast(I.getType())->getBitWidth()), "Undefined result: Shift count out of range", &I); } void Lint::visitShl(BinaryOperator &I) { - if (ConstantInt *CI = dyn_cast(findValue( - I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false))) + if (ConstantInt *CI = + dyn_cast(findValue(I.getOperand(1), /*OffsetOk=*/false))) Assert(CI->getValue().ult(cast(I.getType())->getBitWidth()), "Undefined result: Shift count out of range", &I); } -static bool -allPredsCameFromLandingPad(BasicBlock *BB, - SmallSet &VisitedBlocks) { - VisitedBlocks.insert(BB); - if (BB->isLandingPad()) - return true; - // If we find a block with no predecessors, the search failed. - if (pred_empty(BB)) - return false; - for (BasicBlock *Pred : predecessors(BB)) { - if (VisitedBlocks.count(Pred)) - continue; - if (!allPredsCameFromLandingPad(Pred, VisitedBlocks)) - return false; - } - return true; -} - -static bool -allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin, - IntrinsicInst **SecondBeginCatch, - SmallSet &VisitedBlocks) { - VisitedBlocks.insert(BB); - for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) { - IntrinsicInst *IC = dyn_cast(I); - if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) - return true; - // If we find another begincatch while looking for an endcatch, - // that's also an error. - if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) { - *SecondBeginCatch = IC; - return false; - } - } - - // If we reach a block with no successors while searching, the - // search has failed. - if (succ_empty(BB)) - return false; - // Otherwise, search all of the successors. - for (BasicBlock *Succ : successors(BB)) { - if (VisitedBlocks.count(Succ)) - continue; - if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch, - VisitedBlocks)) - return false; - } - return true; -} - -void Lint::visitEHBeginCatch(IntrinsicInst *II) { - // The checks in this function make a potentially dubious assumption about - // the CFG, namely that any block involved in a catch is only used for the - // catch. This will very likely be true of IR generated by a front end, - // but it may cease to be true, for example, if the IR is run through a - // pass which combines similar blocks. - // - // In general, if we encounter a block the isn't dominated by the catch - // block while we are searching the catch block's successors for a call - // to end catch intrinsic, then it is possible that it will be legal for - // a path through this block to never reach a call to llvm.eh.endcatch. - // An analogous statement could be made about our search for a landing - // pad among the catch block's predecessors. - // - // What is actually required is that no path is possible at runtime that - // reaches a call to llvm.eh.begincatch without having previously visited - // a landingpad instruction and that no path is possible at runtime that - // calls llvm.eh.begincatch and does not subsequently call llvm.eh.endcatch - // (mentally adjusting for the fact that in reality these calls will be - // removed before code generation). - // - // Because this is a lint check, we take a pessimistic approach and warn if - // the control flow is potentially incorrect. - - SmallSet VisitedBlocks; - BasicBlock *CatchBB = II->getParent(); - - // The begin catch must occur in a landing pad block or all paths - // to it must have come from a landing pad. - Assert(allPredsCameFromLandingPad(CatchBB, VisitedBlocks), - "llvm.eh.begincatch may be reachable without passing a landingpad", - II); - - // Reset the visited block list. - VisitedBlocks.clear(); - - IntrinsicInst *SecondBeginCatch = nullptr; - - // This has to be called before it is asserted. Otherwise, the first assert - // below can never be hit. - bool EndCatchFound = allSuccessorsReachEndCatch( - CatchBB, std::next(static_cast(II)), - &SecondBeginCatch, VisitedBlocks); - Assert( - SecondBeginCatch == nullptr, - "llvm.eh.begincatch may be called a second time before llvm.eh.endcatch", - II, SecondBeginCatch); - Assert(EndCatchFound, - "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch", - II); -} - -static bool allPredCameFromBeginCatch( - BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin, - IntrinsicInst **SecondEndCatch, SmallSet &VisitedBlocks) { - VisitedBlocks.insert(BB); - // Look for a begincatch in this block. - for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE; - ++RI) { - IntrinsicInst *IC = dyn_cast(&*RI); - if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) - return true; - // If we find another end catch before we find a begin catch, that's - // an error. - if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) { - *SecondEndCatch = IC; - return false; - } - // If we encounter a landingpad instruction, the search failed. - if (isa(*RI)) - return false; - } - // If while searching we find a block with no predeccesors, - // the search failed. - if (pred_empty(BB)) - return false; - // Search any predecessors we haven't seen before. - for (BasicBlock *Pred : predecessors(BB)) { - if (VisitedBlocks.count(Pred)) - continue; - if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch, - VisitedBlocks)) - return false; - } - return true; -} - -void Lint::visitEHEndCatch(IntrinsicInst *II) { - // The check in this function makes a potentially dubious assumption about - // the CFG, namely that any block involved in a catch is only used for the - // catch. This will very likely be true of IR generated by a front end, - // but it may cease to be true, for example, if the IR is run through a - // pass which combines similar blocks. - // - // In general, if we encounter a block the isn't post-dominated by the - // end catch block while we are searching the end catch block's predecessors - // for a call to the begin catch intrinsic, then it is possible that it will - // be legal for a path to reach the end catch block without ever having - // called llvm.eh.begincatch. - // - // What is actually required is that no path is possible at runtime that - // reaches a call to llvm.eh.endcatch without having previously visited - // a call to llvm.eh.begincatch (mentally adjusting for the fact that in - // reality these calls will be removed before code generation). - // - // Because this is a lint check, we take a pessimistic approach and warn if - // the control flow is potentially incorrect. - - BasicBlock *EndCatchBB = II->getParent(); - - // Alls paths to the end catch call must pass through a begin catch call. - - // If llvm.eh.begincatch wasn't called in the current block, we'll use this - // lambda to recursively look for it in predecessors. - SmallSet VisitedBlocks; - IntrinsicInst *SecondEndCatch = nullptr; - - // This has to be called before it is asserted. Otherwise, the first assert - // below can never be hit. - bool BeginCatchFound = - allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II), - &SecondEndCatch, VisitedBlocks); - Assert( - SecondEndCatch == nullptr, - "llvm.eh.endcatch may be called a second time after llvm.eh.begincatch", - II, SecondEndCatch); - Assert(BeginCatchFound, - "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch", - II); -} - static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC) { // Assume undef could be zero. @@ -777,25 +587,23 @@ void Lint::visitIndirectBrInst(IndirectBrInst &I) { } void Lint::visitExtractElementInst(ExtractElementInst &I) { - if (ConstantInt *CI = dyn_cast( - findValue(I.getIndexOperand(), I.getModule()->getDataLayout(), - /*OffsetOk=*/false))) + if (ConstantInt *CI = dyn_cast(findValue(I.getIndexOperand(), + /*OffsetOk=*/false))) Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), "Undefined result: extractelement index out of range", &I); } void Lint::visitInsertElementInst(InsertElementInst &I) { - if (ConstantInt *CI = dyn_cast( - findValue(I.getOperand(2), I.getModule()->getDataLayout(), - /*OffsetOk=*/false))) + if (ConstantInt *CI = dyn_cast(findValue(I.getOperand(2), + /*OffsetOk=*/false))) Assert(CI->getValue().ult(I.getType()->getNumElements()), "Undefined result: insertelement index out of range", &I); } void Lint::visitUnreachableInst(UnreachableInst &I) { // This isn't undefined behavior, it's merely suspicious. - Assert(&I == I.getParent()->begin() || - std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(), + Assert(&I == &I.getParent()->front() || + std::prev(I.getIterator())->mayHaveSideEffects(), "Unusual: unreachable immediately preceded by instruction without " "side effects", &I); @@ -808,13 +616,13 @@ void Lint::visitUnreachableInst(UnreachableInst &I) { /// Most analysis passes don't require this logic, because instcombine /// will simplify most of these kinds of things away. But it's a goal of /// this Lint pass to be useful even on non-optimized IR. -Value *Lint::findValue(Value *V, const DataLayout &DL, bool OffsetOk) const { +Value *Lint::findValue(Value *V, bool OffsetOk) const { SmallPtrSet Visited; - return findValueImpl(V, DL, OffsetOk, Visited); + return findValueImpl(V, OffsetOk, Visited); } /// findValueImpl - Implementation helper for findValue. -Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, +Value *Lint::findValueImpl(Value *V, bool OffsetOk, SmallPtrSetImpl &Visited) const { // Detect self-referential values. if (!Visited.insert(V).second) @@ -825,17 +633,18 @@ Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, // TODO: Look through eliminable cast pairs. // TODO: Look through calls with unique return values. // TODO: Look through vector insert/extract/shuffle. - V = OffsetOk ? GetUnderlyingObject(V, DL) : V->stripPointerCasts(); + V = OffsetOk ? GetUnderlyingObject(V, *DL) : V->stripPointerCasts(); if (LoadInst *L = dyn_cast(V)) { - BasicBlock::iterator BBI = L; + BasicBlock::iterator BBI = L->getIterator(); BasicBlock *BB = L->getParent(); SmallPtrSet VisitedBlocks; for (;;) { if (!VisitedBlocks.insert(BB).second) break; - if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(), - BB, BBI, 6, AA)) - return findValueImpl(U, DL, OffsetOk, Visited); + if (Value *U = + FindAvailableLoadedValue(L->getPointerOperand(), + BB, BBI, DefMaxInstsToScan, AA)) + return findValueImpl(U, OffsetOk, Visited); if (BBI != BB->begin()) break; BB = BB->getUniquePredecessor(); if (!BB) break; @@ -844,38 +653,38 @@ Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, } else if (PHINode *PN = dyn_cast(V)) { if (Value *W = PN->hasConstantValue()) if (W != V) - return findValueImpl(W, DL, OffsetOk, Visited); + return findValueImpl(W, OffsetOk, Visited); } else if (CastInst *CI = dyn_cast(V)) { - if (CI->isNoopCast(DL)) - return findValueImpl(CI->getOperand(0), DL, OffsetOk, Visited); + if (CI->isNoopCast(*DL)) + return findValueImpl(CI->getOperand(0), OffsetOk, Visited); } else if (ExtractValueInst *Ex = dyn_cast(V)) { if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices())) if (W != V) - return findValueImpl(W, DL, OffsetOk, Visited); + return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast(V)) { // Same as above, but for ConstantExpr instead of Instruction. if (Instruction::isCast(CE->getOpcode())) { if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), CE->getOperand(0)->getType(), CE->getType(), - DL.getIntPtrType(V->getType()))) - return findValueImpl(CE->getOperand(0), DL, OffsetOk, Visited); + DL->getIntPtrType(V->getType()))) + return findValueImpl(CE->getOperand(0), OffsetOk, Visited); } else if (CE->getOpcode() == Instruction::ExtractValue) { ArrayRef Indices = CE->getIndices(); if (Value *W = FindInsertedValue(CE->getOperand(0), Indices)) if (W != V) - return findValueImpl(W, DL, OffsetOk, Visited); + return findValueImpl(W, OffsetOk, Visited); } } // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast(V)) { - if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC)) - return findValueImpl(W, DL, OffsetOk, Visited); + if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC)) + return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast(V)) { - if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI)) + if (Value *W = ConstantFoldConstantExpression(CE, *DL, TLI)) if (W != V) - return findValueImpl(W, DL, OffsetOk, Visited); + return findValueImpl(W, OffsetOk, Visited); } return V; diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 624c5a18d679..4b2fa3c6505a 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -118,7 +118,8 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, // from/to. If so, the previous load or store would have already trapped, // so there is no harm doing an extra load (also, CSE will later eliminate // the load entirely). - BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); + BasicBlock::iterator BBI = ScanFrom->getIterator(), + E = ScanFrom->getParent()->begin(); // We can at least always strip pointer casts even though we can't use the // base here. @@ -161,6 +162,18 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, return false; } +/// DefMaxInstsToScan - the default number of maximum instructions +/// to scan in the block, used by FindAvailableLoadedValue(). +/// FindAvailableLoadedValue() was introduced in r60148, to improve jump +/// threading in part by eliminating partially redundant loads. +/// At that point, the value of MaxInstsToScan was already set to '6' +/// without documented explanation. +cl::opt +llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden, + cl::desc("Use this to specify the default maximum number of instructions " + "to scan backward from a given instruction, when searching for " + "available loaded value")); + /// \brief Scan the ScanBB block backwards to see if we have the value at the /// memory address *Ptr locally available within a small number of instructions. /// @@ -199,7 +212,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, while (ScanFrom != ScanBB->begin()) { // We must ignore debug info directives when counting (otherwise they // would affect codegen). - Instruction *Inst = --ScanFrom; + Instruction *Inst = &*--ScanFrom; if (isa(Inst)) continue; @@ -246,9 +259,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // If we have alias analysis and it says the store won't modify the loaded // value, ignore the store. - if (AA && - (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & - AliasAnalysis::Mod) == 0) + if (AA && (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & MRI_Mod) == 0) continue; // Otherwise the store that may or may not alias the pointer, bail out. @@ -261,8 +272,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // If alias analysis claims that it really won't modify the load, // ignore it. if (AA && - (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) & - AliasAnalysis::Mod) == 0) + (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) & MRI_Mod) == 0) continue; // May modify the pointer, bail out. diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index becbae4c5b50..d7896ade3543 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -58,12 +58,12 @@ static cl::opt MemoryCheckMergeThreshold( /// Maximum SIMD width. const unsigned VectorizerParams::MaxVectorWidth = 64; -/// \brief We collect interesting dependences up to this threshold. -static cl::opt MaxInterestingDependence( - "max-interesting-dependences", cl::Hidden, - cl::desc("Maximum number of interesting dependences collected by " - "loop-access analysis (default = 100)"), - cl::init(100)); +/// \brief We collect dependences up to this threshold. +static cl::opt + MaxDependences("max-dependences", cl::Hidden, + cl::desc("Maximum number of dependences collected by " + "loop-access analysis (default = 100)"), + cl::init(100)); bool VectorizerParams::isInterleaveForced() { return ::VectorizationInterleave.getNumOccurrences() > 0; @@ -87,11 +87,10 @@ Value *llvm::stripIntegerCast(Value *V) { return V; } -const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, +const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr) { - - const SCEV *OrigSCEV = SE->getSCEV(Ptr); + const SCEV *OrigSCEV = PSE.getSCEV(Ptr); // If there is an entry in the map return the SCEV of the pointer with the // symbolic stride replaced by one. @@ -108,36 +107,82 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, ValueToValueMap RewriteMap; RewriteMap[StrideVal] = One; - const SCEV *ByOne = - SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true); - DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne + ScalarEvolution *SE = PSE.getSE(); + const auto *U = cast(SE->getSCEV(StrideVal)); + const auto *CT = + static_cast(SE->getOne(StrideVal->getType())); + + PSE.addPredicate(*SE->getEqualPredicate(U, CT)); + auto *Expr = PSE.getSCEV(Ptr); + + DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr << "\n"); - return ByOne; + return Expr; } // Otherwise, just return the SCEV of the original pointer. - return SE->getSCEV(Ptr); + return OrigSCEV; } void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, - const ValueToValueMap &Strides) { + const ValueToValueMap &Strides, + PredicatedScalarEvolution &PSE) { // Get the stride replaced scev. - const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr); + const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast(Sc); assert(AR && "Invalid addrec expression"); + ScalarEvolution *SE = PSE.getSE(); const SCEV *Ex = SE->getBackedgeTakenCount(Lp); + + const SCEV *ScStart = AR->getStart(); const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); - Pointers.emplace_back(Ptr, AR->getStart(), ScEnd, WritePtr, DepSetId, ASId, - Sc); + const SCEV *Step = AR->getStepRecurrence(*SE); + + // For expressions with negative step, the upper bound is ScStart and the + // lower bound is ScEnd. + if (const SCEVConstant *CStep = dyn_cast(Step)) { + if (CStep->getValue()->isNegative()) + std::swap(ScStart, ScEnd); + } else { + // Fallback case: the step is not constant, but the we can still + // get the upper and lower bounds of the interval by using min/max + // expressions. + ScStart = SE->getUMinExpr(ScStart, ScEnd); + ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd); + } + + Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); } -bool RuntimePointerChecking::needsChecking( - const CheckingPtrGroup &M, const CheckingPtrGroup &N, - const SmallVectorImpl *PtrPartition) const { +SmallVector +RuntimePointerChecking::generateChecks() const { + SmallVector Checks; + + for (unsigned I = 0; I < CheckingGroups.size(); ++I) { + for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) { + const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I]; + const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J]; + + if (needsChecking(CGI, CGJ)) + Checks.push_back(std::make_pair(&CGI, &CGJ)); + } + } + return Checks; +} + +void RuntimePointerChecking::generateChecks( + MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { + assert(Checks.empty() && "Checks is not empty"); + groupChecks(DepCands, UseDependencies); + Checks = generateChecks(); +} + +bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M, + const CheckingPtrGroup &N) const { for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I) for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J) - if (needsChecking(M.Members[I], N.Members[J], PtrPartition)) + if (needsChecking(M.Members[I], N.Members[J])) return true; return false; } @@ -204,8 +249,31 @@ void RuntimePointerChecking::groupChecks( CheckingGroups.clear(); + // If we need to check two pointers to the same underlying object + // with a non-constant difference, we shouldn't perform any pointer + // grouping with those pointers. This is because we can easily get + // into cases where the resulting check would return false, even when + // the accesses are safe. + // + // The following example shows this: + // for (i = 0; i < 1000; ++i) + // a[5000 + i * m] = a[i] + a[i + 9000] + // + // Here grouping gives a check of (5000, 5000 + 1000 * m) against + // (0, 10000) which is always false. However, if m is 1, there is no + // dependence. Not grouping the checks for a[i] and a[i + 9000] allows + // us to perform an accurate check in this case. + // + // The above case requires that we have an UnknownDependence between + // accesses to the same underlying object. This cannot happen unless + // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies + // is also false. In this case we will use the fallback path and create + // separate checking groups for all pointers. + // If we don't have the dependency partitions, construct a new - // checking pointer group for each pointer. + // checking pointer group for each pointer. This is also required + // for correctness, because in this case we can have checking between + // pointers to the same underlying object. if (!UseDependencies) { for (unsigned I = 0; I < Pointers.size(); ++I) CheckingGroups.push_back(CheckingPtrGroup(I, *this)); @@ -222,7 +290,7 @@ void RuntimePointerChecking::groupChecks( // don't process them twice. SmallSet Seen; - // Go through all equivalence classes, get the the "pointer check groups" + // Go through all equivalence classes, get the "pointer check groups" // and add them to the overall solution. We use the order in which accesses // appear in 'Pointers' to enforce determinism. for (unsigned I = 0; I < Pointers.size(); ++I) { @@ -280,8 +348,14 @@ void RuntimePointerChecking::groupChecks( } } -bool RuntimePointerChecking::needsChecking( - unsigned I, unsigned J, const SmallVectorImpl *PtrPartition) const { +bool RuntimePointerChecking::arePointersInSamePartition( + const SmallVectorImpl &PtrToPartition, unsigned PtrIdx1, + unsigned PtrIdx2) { + return (PtrToPartition[PtrIdx1] != -1 && + PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]); +} + +bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const { const PointerInfo &PointerI = Pointers[I]; const PointerInfo &PointerJ = Pointers[J]; @@ -297,85 +371,45 @@ bool RuntimePointerChecking::needsChecking( if (PointerI.AliasSetId != PointerJ.AliasSetId) return false; - // If PtrPartition is set omit checks between pointers of the same partition. - // Partition number -1 means that the pointer is used in multiple partitions. - // In this case we can't omit the check. - if (PtrPartition && (*PtrPartition)[I] != -1 && - (*PtrPartition)[I] == (*PtrPartition)[J]) - return false; - return true; } -void RuntimePointerChecking::print( - raw_ostream &OS, unsigned Depth, - const SmallVectorImpl *PtrPartition) const { - - OS.indent(Depth) << "Run-time memory checks:\n"; - +void RuntimePointerChecking::printChecks( + raw_ostream &OS, const SmallVectorImpl &Checks, + unsigned Depth) const { unsigned N = 0; - for (unsigned I = 0; I < CheckingGroups.size(); ++I) - for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) - if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) { - OS.indent(Depth) << "Check " << N++ << ":\n"; - OS.indent(Depth + 2) << "Comparing group " << I << ":\n"; + for (const auto &Check : Checks) { + const auto &First = Check.first->Members, &Second = Check.second->Members; - for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) { - OS.indent(Depth + 2) - << *Pointers[CheckingGroups[I].Members[K]].PointerValue << "\n"; - if (PtrPartition) - OS << " (Partition: " - << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")" - << "\n"; - } + OS.indent(Depth) << "Check " << N++ << ":\n"; - OS.indent(Depth + 2) << "Against group " << J << ":\n"; + OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n"; + for (unsigned K = 0; K < First.size(); ++K) + OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n"; - for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) { - OS.indent(Depth + 2) - << *Pointers[CheckingGroups[J].Members[K]].PointerValue << "\n"; - if (PtrPartition) - OS << " (Partition: " - << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")" - << "\n"; - } - } - - OS.indent(Depth) << "Grouped accesses:\n"; - for (unsigned I = 0; I < CheckingGroups.size(); ++I) { - OS.indent(Depth + 2) << "Group " << I << ":\n"; - OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low - << " High: " << *CheckingGroups[I].High << ")\n"; - for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) { - OS.indent(Depth + 6) << "Member: " - << *Pointers[CheckingGroups[I].Members[J]].Expr - << "\n"; - } + OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n"; + for (unsigned K = 0; K < Second.size(); ++K) + OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n"; } } -unsigned RuntimePointerChecking::getNumberOfChecks( - const SmallVectorImpl *PtrPartition) const { +void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const { - unsigned NumPartitions = CheckingGroups.size(); - unsigned CheckCount = 0; + OS.indent(Depth) << "Run-time memory checks:\n"; + printChecks(OS, Checks, Depth); - for (unsigned I = 0; I < NumPartitions; ++I) - for (unsigned J = I + 1; J < NumPartitions; ++J) - if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) - CheckCount++; - return CheckCount; -} + OS.indent(Depth) << "Grouped accesses:\n"; + for (unsigned I = 0; I < CheckingGroups.size(); ++I) { + const auto &CG = CheckingGroups[I]; -bool RuntimePointerChecking::needsAnyChecking( - const SmallVectorImpl *PtrPartition) const { - unsigned NumPointers = Pointers.size(); - - for (unsigned I = 0; I < NumPointers; ++I) - for (unsigned J = I + 1; J < NumPointers; ++J) - if (needsChecking(I, J, PtrPartition)) - return true; - return false; + OS.indent(Depth + 2) << "Group " << &CG << ":\n"; + OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High + << ")\n"; + for (unsigned J = 0; J < CG.Members.size(); ++J) { + OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr + << "\n"; + } + } } namespace { @@ -390,9 +424,10 @@ public: typedef SmallPtrSet MemAccessInfoSet; AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, - MemoryDepChecker::DepCandidates &DA) - : DL(Dl), AST(*AA), LI(LI), DepCands(DA), - IsRTCheckAnalysisNeeded(false) {} + MemoryDepChecker::DepCandidates &DA, + PredicatedScalarEvolution &PSE) + : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false), + PSE(PSE) {} /// \brief Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, bool IsReadOnly) { @@ -435,7 +470,7 @@ public: /// We decided that no dependence analysis would be used. Reset the state. void resetDepChecks(MemoryDepChecker &DepChecker) { CheckDeps.clear(); - DepChecker.clearInterestingDependences(); + DepChecker.clearDependences(); } MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; } @@ -477,14 +512,18 @@ private: /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared /// while this remains set if we have potentially dependent accesses. bool IsRTCheckAnalysisNeeded; + + /// The SCEV predicate containing all the SCEV-related assumptions. + PredicatedScalarEvolution &PSE; }; } // end anonymous namespace /// \brief Check whether a pointer can participate in a runtime bounds check. -static bool hasComputableBounds(ScalarEvolution *SE, - const ValueToValueMap &Strides, Value *Ptr) { - const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr); +static bool hasComputableBounds(PredicatedScalarEvolution &PSE, + const ValueToValueMap &Strides, Value *Ptr, + Loop *L) { + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (!AR) return false; @@ -527,11 +566,11 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, else ++NumReadPtrChecks; - if (hasComputableBounds(SE, StridesMap, Ptr) && + if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) && // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. (!ShouldCheckStride || - isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) { + isStridedPtr(PSE, Ptr, TheLoop, StridesMap) == 1)) { // The id of the dependence set. unsigned DepId; @@ -545,7 +584,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, // Each access has its own dependence set. DepId = RunningDepId++; - RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); + RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); } else { @@ -599,9 +638,9 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, } if (NeedRTCheck && CanDoRT) - RtCheck.groupChecks(DepCands, IsDepCheckNeeded); + RtCheck.generateChecks(DepCands, IsDepCheckNeeded); - DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr) + DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks() << " pointer comparisons.\n"); RtCheck.Need = NeedRTCheck; @@ -706,6 +745,11 @@ void AccessAnalysis::processMemAccesses() { GetUnderlyingObjects(Ptr, TempObjects, DL, LI); DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n"); for (Value *UnderlyingObj : TempObjects) { + // nullptr never alias, don't join sets for pointer that have "null" + // in their UnderlyingObjects list. + if (isa(UnderlyingObj)) + continue; + UnderlyingObjToAccessMap::iterator Prev = ObjToLastAccess.find(UnderlyingObj); if (Prev != ObjToLastAccess.end()) @@ -775,20 +819,20 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, } /// \brief Check whether the access through \p Ptr has a constant stride. -int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap) { - const Type *Ty = Ptr->getType(); +int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, + const Loop *Lp, const ValueToValueMap &StridesMap) { + Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); // Make sure that the pointer does not point to aggregate types. - const PointerType *PtrTy = cast(Ty); + auto *PtrTy = cast(Ty); if (PtrTy->getElementType()->isAggregateType()) { DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" << *Ptr << "\n"); return 0; } - const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr); + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (!AR) { @@ -811,16 +855,16 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, // to access the pointer value "0" which is undefined behavior in address // space 0, therefore we can also vectorize this case. bool IsInBoundsGEP = isInBoundsGep(Ptr); - bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, SE, Lp); + bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, PSE.getSE(), Lp); bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " - << *Ptr << " SCEV: " << *PtrScev << "\n"); + << *Ptr << " SCEV: " << *PtrScev << "\n"); return 0; } // Check the step is constant. - const SCEV *Step = AR->getStepRecurrence(*SE); + const SCEV *Step = AR->getStepRecurrence(*PSE.getSE()); // Calculate the pointer stride and check if it is constant. const SCEVConstant *C = dyn_cast(Step); @@ -832,7 +876,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, auto &DL = Lp->getHeader()->getModule()->getDataLayout(); int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); - const APInt &APStepVal = C->getValue()->getValue(); + const APInt &APStepVal = C->getAPInt(); // Huge step value - give up. if (APStepVal.getBitWidth() > 64) @@ -872,15 +916,15 @@ bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { llvm_unreachable("unexpected DepType!"); } -bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) { +bool MemoryDepChecker::Dependence::isBackward() const { switch (Type) { case NoDep: case Forward: + case ForwardButPreventsForwarding: + case Unknown: return false; case BackwardVectorizable: - case Unknown: - case ForwardButPreventsForwarding: case Backward: case BackwardVectorizableButPreventsForwarding: return true; @@ -889,17 +933,21 @@ bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) { } bool MemoryDepChecker::Dependence::isPossiblyBackward() const { + return isBackward() || Type == Unknown; +} + +bool MemoryDepChecker::Dependence::isForward() const { switch (Type) { - case NoDep: case Forward: case ForwardButPreventsForwarding: - return false; + return true; + case NoDep: case Unknown: case BackwardVectorizable: case Backward: case BackwardVectorizableButPreventsForwarding: - return true; + return false; } llvm_unreachable("unexpected DepType!"); } @@ -999,11 +1047,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, BPtr->getType()->getPointerAddressSpace()) return Dependence::Unknown; - const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr); - const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr); + const SCEV *AScev = replaceSymbolicStrideSCEV(PSE, Strides, APtr); + const SCEV *BScev = replaceSymbolicStrideSCEV(PSE, Strides, BPtr); - int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides); - int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides); + int StrideAPtr = isStridedPtr(PSE, APtr, InnermostLoop, Strides); + int StrideBPtr = isStridedPtr(PSE, BPtr, InnermostLoop, Strides); const SCEV *Src = AScev; const SCEV *Sink = BScev; @@ -1020,12 +1068,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, std::swap(StrideAPtr, StrideBPtr); } - const SCEV *Dist = SE->getMinusSCEV(Sink, Src); + const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src); DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink - << "(Induction step: " << StrideAPtr << ")\n"); + << "(Induction step: " << StrideAPtr << ")\n"); DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " - << *InstMap[BIdx] << ": " << *Dist << "\n"); + << *InstMap[BIdx] << ": " << *Dist << "\n"); // Need accesses with constant stride. We don't want to vectorize // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in @@ -1048,7 +1096,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, unsigned TypeByteSize = DL.getTypeAllocSize(ATy); // Negative distances are not plausible dependencies. - const APInt &Val = C->getValue()->getValue(); + const APInt &Val = C->getAPInt(); if (Val.isNegative()) { bool IsTrueDataDependence = (AIsWrite && !BIsWrite); if (IsTrueDataDependence && @@ -1064,7 +1112,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // Could be improved to assert type sizes are the same (i32 == float, etc). if (Val == 0) { if (ATy == BTy) - return Dependence::NoDep; + return Dependence::Forward; DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n"); return Dependence::Unknown; } @@ -1203,22 +1251,21 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, isDependent(*A.first, A.second, *B.first, B.second, Strides); SafeForVectorization &= Dependence::isSafeForVectorization(Type); - // Gather dependences unless we accumulated MaxInterestingDependence + // Gather dependences unless we accumulated MaxDependences // dependences. In that case return as soon as we find the first // unsafe dependence. This puts a limit on this quadratic // algorithm. - if (RecordInterestingDependences) { - if (Dependence::isInterestingDependence(Type)) - InterestingDependences.push_back( - Dependence(A.second, B.second, Type)); + if (RecordDependences) { + if (Type != Dependence::NoDep) + Dependences.push_back(Dependence(A.second, B.second, Type)); - if (InterestingDependences.size() >= MaxInterestingDependence) { - RecordInterestingDependences = false; - InterestingDependences.clear(); + if (Dependences.size() >= MaxDependences) { + RecordDependences = false; + Dependences.clear(); DEBUG(dbgs() << "Too many dependences, stopped recording\n"); } } - if (!RecordInterestingDependences && !SafeForVectorization) + if (!RecordDependences && !SafeForVectorization) return false; } ++OI; @@ -1227,8 +1274,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, } } - DEBUG(dbgs() << "Total Interesting Dependences: " - << InterestingDependences.size() << "\n"); + DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n"); return SafeForVectorization; } @@ -1298,10 +1344,10 @@ bool LoopAccessInfo::canAnalyzeLoop() { } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); - if (ExitCount == SE->getCouldNotCompute()) { - emitAnalysis(LoopAccessReport() << - "could not determine number of loop iterations"); + const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); + if (ExitCount == PSE.getSE()->getCouldNotCompute()) { + emitAnalysis(LoopAccessReport() + << "could not determine number of loop iterations"); DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); return false; } @@ -1370,7 +1416,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (it->mayWriteToMemory()) { StoreInst *St = dyn_cast(it); if (!St) { - emitAnalysis(LoopAccessReport(it) << + emitAnalysis(LoopAccessReport(&*it) << "instruction cannot be vectorized"); CanVecMem = false; return; @@ -1402,7 +1448,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { MemoryDepChecker::DepCandidates DependentAccesses; AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), - AA, LI, DependentAccesses); + AA, LI, DependentAccesses, PSE); // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once @@ -1453,7 +1499,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // read a few words, modify, and write a few words, and some of the // words may be written to the same address. bool IsReadOnlyPtr = false; - if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) { + if (Seen.insert(Ptr).second || !isStridedPtr(PSE, Ptr, TheLoop, Strides)) { ++NumReads; IsReadOnlyPtr = true; } @@ -1483,7 +1529,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRTIfNeeded = - Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides); + Accesses.canCheckPtrAtRT(PtrRtChecking, PSE.getSE(), TheLoop, Strides); if (!CanDoRTIfNeeded) { emitAnalysis(LoopAccessReport() << "cannot identify array bounds"); DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " @@ -1510,6 +1556,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { PtrRtChecking.reset(); PtrRtChecking.Need = true; + auto *SE = PSE.getSE(); CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true); @@ -1552,7 +1599,7 @@ void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) { } bool LoopAccessInfo::isUniform(Value *V) const { - return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); + return (PSE.getSE()->isLoopInvariant(PSE.getSE()->getSCEV(V), TheLoop)); } // FIXME: this function is currently a duplicate of the one in @@ -1566,86 +1613,115 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V, return nullptr; } -std::pair LoopAccessInfo::addRuntimeCheck( - Instruction *Loc, const SmallVectorImpl *PtrPartition) const { - if (!PtrRtChecking.Need) - return std::make_pair(nullptr, nullptr); +namespace { +/// \brief IR Values for the lower and upper bounds of a pointer evolution. We +/// need to use value-handles because SCEV expansion can invalidate previously +/// expanded values. Thus expansion of a pointer can invalidate the bounds for +/// a previous one. +struct PointerBounds { + TrackingVH Start; + TrackingVH End; +}; +} // end anonymous namespace - SmallVector, 2> Starts; - SmallVector, 2> Ends; +/// \brief Expand code for the lower and upper bound of the pointer group \p CG +/// in \p TheLoop. \return the values for the bounds. +static PointerBounds +expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, + Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, + const RuntimePointerChecking &PtrRtChecking) { + Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue; + const SCEV *Sc = SE->getSCEV(Ptr); + + if (SE->isLoopInvariant(Sc, TheLoop)) { + DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr + << "\n"); + return {Ptr, Ptr}; + } else { + unsigned AS = Ptr->getType()->getPointerAddressSpace(); + LLVMContext &Ctx = Loc->getContext(); + + // Use this type for pointer arithmetic. + Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); + Value *Start = nullptr, *End = nullptr; + + DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); + Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); + End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); + DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n"); + return {Start, End}; + } +} + +/// \brief Turns a collection of checks into a collection of expanded upper and +/// lower bounds for both pointers in the check. +static SmallVector, 4> expandBounds( + const SmallVectorImpl &PointerChecks, + Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp, + const RuntimePointerChecking &PtrRtChecking) { + SmallVector, 4> ChecksWithBounds; + + // Here we're relying on the SCEV Expander's cache to only emit code for the + // same bounds once. + std::transform( + PointerChecks.begin(), PointerChecks.end(), + std::back_inserter(ChecksWithBounds), + [&](const RuntimePointerChecking::PointerCheck &Check) { + PointerBounds + First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking), + Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking); + return std::make_pair(First, Second); + }); + + return ChecksWithBounds; +} + +std::pair LoopAccessInfo::addRuntimeChecks( + Instruction *Loc, + const SmallVectorImpl &PointerChecks) + const { + auto *SE = PSE.getSE(); + SCEVExpander Exp(*SE, DL, "induction"); + auto ExpandedChecks = + expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, PtrRtChecking); LLVMContext &Ctx = Loc->getContext(); - SCEVExpander Exp(*SE, DL, "induction"); Instruction *FirstInst = nullptr; - - for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) { - const RuntimePointerChecking::CheckingPtrGroup &CG = - PtrRtChecking.CheckingGroups[i]; - Value *Ptr = PtrRtChecking.Pointers[CG.Members[0]].PointerValue; - const SCEV *Sc = SE->getSCEV(Ptr); - - if (SE->isLoopInvariant(Sc, TheLoop)) { - DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr - << "\n"); - Starts.push_back(Ptr); - Ends.push_back(Ptr); - } else { - unsigned AS = Ptr->getType()->getPointerAddressSpace(); - - // Use this type for pointer arithmetic. - Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); - Value *Start = nullptr, *End = nullptr; - - DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); - Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc); - End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc); - DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n"); - Starts.push_back(Start); - Ends.push_back(End); - } - } - IRBuilder<> ChkBuilder(Loc); // Our instructions might fold to a constant. Value *MemoryRuntimeCheck = nullptr; - for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) { - for (unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) { - const RuntimePointerChecking::CheckingPtrGroup &CGI = - PtrRtChecking.CheckingGroups[i]; - const RuntimePointerChecking::CheckingPtrGroup &CGJ = - PtrRtChecking.CheckingGroups[j]; - if (!PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition)) - continue; + for (const auto &Check : ExpandedChecks) { + const PointerBounds &A = Check.first, &B = Check.second; + // Check if two pointers (A and B) conflict where conflict is computed as: + // start(A) <= end(B) && start(B) <= end(A) + unsigned AS0 = A.Start->getType()->getPointerAddressSpace(); + unsigned AS1 = B.Start->getType()->getPointerAddressSpace(); - unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); - unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace(); + assert((AS0 == B.End->getType()->getPointerAddressSpace()) && + (AS1 == A.End->getType()->getPointerAddressSpace()) && + "Trying to bounds check pointers with different address spaces"); - assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) && - (AS1 == Ends[i]->getType()->getPointerAddressSpace()) && - "Trying to bounds check pointers with different address spaces"); + Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); + Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); - Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); - Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); + Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc"); + Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc"); + Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc"); + Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc"); - Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc"); - Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc"); - Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc"); - Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc"); - - Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); - FirstInst = getFirstInst(FirstInst, Cmp0, Loc); - Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); - FirstInst = getFirstInst(FirstInst, Cmp1, Loc); - Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); + Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); + FirstInst = getFirstInst(FirstInst, Cmp0, Loc); + Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); + FirstInst = getFirstInst(FirstInst, Cmp1, Loc); + Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); + FirstInst = getFirstInst(FirstInst, IsConflict, Loc); + if (MemoryRuntimeCheck) { + IsConflict = + ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx"); FirstInst = getFirstInst(FirstInst, IsConflict, Loc); - if (MemoryRuntimeCheck) { - IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, - "conflict.rdx"); - FirstInst = getFirstInst(FirstInst, IsConflict, Loc); - } - MemoryRuntimeCheck = IsConflict; } + MemoryRuntimeCheck = IsConflict; } if (!MemoryRuntimeCheck) @@ -1661,12 +1737,20 @@ std::pair LoopAccessInfo::addRuntimeCheck( return std::make_pair(FirstInst, Check); } +std::pair +LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const { + if (!PtrRtChecking.Need) + return std::make_pair(nullptr, nullptr); + + return addRuntimeChecks(Loc, PtrRtChecking.getChecks()); +} + LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, const ValueToValueMap &Strides) - : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), + : PSE(*SE), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false), StoreToLoopInvariantAddress(false) { @@ -1685,14 +1769,14 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (Report) OS.indent(Depth) << "Report: " << Report->str() << "\n"; - if (auto *InterestingDependences = DepChecker.getInterestingDependences()) { - OS.indent(Depth) << "Interesting Dependences:\n"; - for (auto &Dep : *InterestingDependences) { + if (auto *Dependences = DepChecker.getDependences()) { + OS.indent(Depth) << "Dependences:\n"; + for (auto &Dep : *Dependences) { Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions()); OS << "\n"; } } else - OS.indent(Depth) << "Too many interesting dependences, not recorded\n"; + OS.indent(Depth) << "Too many dependences, not recorded\n"; // List the pair of accesses need run-time checks to prove independence. PtrRtChecking.print(OS, Depth); @@ -1701,6 +1785,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { OS.indent(Depth) << "Store to invariant address was " << (StoreToLoopInvariantAddress ? "" : "not ") << "found in loop.\n"; + + OS.indent(Depth) << "SCEV assumptions:\n"; + PSE.getUnionPredicate().print(OS, Depth); } const LoopAccessInfo & @@ -1714,8 +1801,8 @@ LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) { if (!LAI) { const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - LAI = llvm::make_unique(L, SE, DL, TLI, AA, DT, LI, - Strides); + LAI = + llvm::make_unique(L, SE, DL, TLI, AA, DT, LI, Strides); #ifndef NDEBUG LAI->NumSymbolicStrides = Strides.size(); #endif @@ -1737,10 +1824,10 @@ void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const { } bool LoopAccessAnalysis::runOnFunction(Function &F) { - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); auto *TLIP = getAnalysisIfAvailable(); TLI = TLIP ? &TLIP->getTLI() : nullptr; - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); DT = &getAnalysis().getDomTree(); LI = &getAnalysis().getLoopInfo(); @@ -1748,8 +1835,8 @@ bool LoopAccessAnalysis::runOnFunction(Function &F) { } void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -1761,8 +1848,8 @@ static const char laa_name[] = "Loop Access Analysis"; #define LAA_NAME "loop-accesses" INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 6b6faf8a66c3..9ab9eead584f 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -102,8 +102,8 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, return false; if (I->mayReadFromMemory()) return false; - // The landingpad instruction is immobile. - if (isa(I)) + // EH block instructions are immobile. + if (I->isEHPad()) return false; // Determine the insertion point, unless one was given. if (!InsertPt) { @@ -120,6 +120,13 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, // Hoist. I->moveBefore(InsertPt); + + // There is possibility of hoisting this instruction above some arbitrary + // condition. Any metadata defined on it can be control dependent on this + // condition. Conservatively strip it here so that we don't give any wrong + // information to the optimizer. + I->dropUnknownNonDebugMetadata(); + Changed = true; return true; } @@ -172,7 +179,13 @@ PHINode *Loop::getCanonicalInductionVariable() const { bool Loop::isLCSSAForm(DominatorTree &DT) const { for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) { BasicBlock *BB = *BI; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) { + // Tokens can't be used in PHI nodes and live-out tokens prevent loop + // optimizations, so for the purposes of considered LCSSA form, we + // can ignore them. + if (I->getType()->isTokenTy()) + continue; + for (Use &U : I->uses()) { Instruction *UI = cast(U.getUser()); BasicBlock *UserBB = UI->getParent(); @@ -188,11 +201,21 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const { DT.isReachableFromEntry(UserBB)) return false; } + } } return true; } +bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT) const { + if (!isLCSSAForm(DT)) + return false; + + return std::all_of(begin(), end(), [&](const Loop *L) { + return L->isRecursivelyLCSSAForm(DT); + }); +} + /// isLoopSimplifyForm - Return true if the Loop is in the form that /// the LoopSimplify form transforms loops to, which is sometimes called /// normal form. @@ -211,15 +234,23 @@ bool Loop::isSafeToClone() const { if (isa((*I)->getTerminator())) return false; - if (const InvokeInst *II = dyn_cast((*I)->getTerminator())) + if (const InvokeInst *II = dyn_cast((*I)->getTerminator())) { if (II->cannotDuplicate()) return false; + // Return false if any loop blocks contain invokes to EH-pads other than + // landingpads; we don't know how to split those edges yet. + auto *FirstNonPHI = II->getUnwindDest()->getFirstNonPHI(); + if (FirstNonPHI->isEHPad() && !isa(FirstNonPHI)) + return false; + } for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) { if (const CallInst *CI = dyn_cast(BI)) { if (CI->cannotDuplicate()) return false; } + if (BI->getType()->isTokenTy() && BI->isUsedOutsideOfBlock(*I)) + return false; } } return true; @@ -602,14 +633,12 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { return NearLoop; } -/// updateUnloop - The last backedge has been removed from a loop--now the -/// "unloop". Find a new parent for the blocks contained within unloop and -/// update the loop tree. We don't necessarily have valid dominators at this -/// point, but LoopInfo is still valid except for the removal of this loop. -/// -/// Note that Unloop may now be an empty loop. Calling Loop::getHeader without -/// checking first is illegal. +LoopInfo::LoopInfo(const DominatorTreeBase &DomTree) { + analyze(DomTree); +} + void LoopInfo::updateUnloop(Loop *Unloop) { + Unloop->markUnlooped(); // First handle the special case of no parent loop to simplify the algorithm. if (!Unloop->getParentLoop()) { @@ -675,7 +704,7 @@ LoopInfo LoopAnalysis::run(Function &F, AnalysisManager *AM) { // objects. I don't want to add that kind of complexity until the scope of // the problem is better understood. LoopInfo LI; - LI.Analyze(AM->getResult(F)); + LI.analyze(AM->getResult(F)); return LI; } @@ -685,6 +714,20 @@ PreservedAnalyses LoopPrinterPass::run(Function &F, return PreservedAnalyses::all(); } +PrintLoopPass::PrintLoopPass() : OS(dbgs()) {} +PrintLoopPass::PrintLoopPass(raw_ostream &OS, const std::string &Banner) + : OS(OS), Banner(Banner) {} + +PreservedAnalyses PrintLoopPass::run(Loop &L) { + OS << Banner; + for (auto *Block : L.blocks()) + if (Block) + Block->print(OS); + else + OS << "Printing block"; + return PreservedAnalyses::all(); +} + //===----------------------------------------------------------------------===// // LoopInfo implementation // @@ -698,7 +741,7 @@ INITIALIZE_PASS_END(LoopInfoWrapperPass, "loops", "Natural Loop Information", bool LoopInfoWrapperPass::runOnFunction(Function &) { releaseMemory(); - LI.Analyze(getAnalysis().getDomTree()); + LI.analyze(getAnalysis().getDomTree()); return false; } diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index e9fcf02118b9..dc424734dd56 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" @@ -27,35 +28,26 @@ namespace { /// PrintLoopPass - Print a Function corresponding to a Loop. /// -class PrintLoopPass : public LoopPass { -private: - std::string Banner; - raw_ostream &Out; // raw_ostream to print on. +class PrintLoopPassWrapper : public LoopPass { + PrintLoopPass P; public: static char ID; - PrintLoopPass(const std::string &B, raw_ostream &o) - : LoopPass(ID), Banner(B), Out(o) {} + PrintLoopPassWrapper() : LoopPass(ID) {} + PrintLoopPassWrapper(raw_ostream &OS, const std::string &Banner) + : LoopPass(ID), P(OS, Banner) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } bool runOnLoop(Loop *L, LPPassManager &) override { - Out << Banner; - for (Loop::block_iterator b = L->block_begin(), be = L->block_end(); - b != be; - ++b) { - if (*b) - (*b)->print(Out); - else - Out << "Printing block"; - } + P.run(*L); return false; } }; -char PrintLoopPass::ID = 0; +char PrintLoopPassWrapper::ID = 0; } //===----------------------------------------------------------------------===// @@ -66,81 +58,34 @@ char LPPassManager::ID = 0; LPPassManager::LPPassManager() : FunctionPass(ID), PMDataManager() { - skipThisLoop = false; - redoThisLoop = false; LI = nullptr; CurrentLoop = nullptr; } -/// Delete loop from the loop queue and loop hierarchy (LoopInfo). -void LPPassManager::deleteLoopFromQueue(Loop *L) { +// Inset loop into loop nest (LoopInfo) and loop queue (LQ). +Loop &LPPassManager::addLoop(Loop *ParentLoop) { + // Create a new loop. LI will take ownership. + Loop *L = new Loop(); - LI->updateUnloop(L); + // Insert into the loop nest and the loop queue. + if (!ParentLoop) { + // This is the top level loop. + LI->addTopLevelLoop(L); + LQ.push_front(L); + return *L; + } - // Notify passes that the loop is being deleted. - deleteSimpleAnalysisLoop(L); - - // If L is current loop then skip rest of the passes and let - // runOnFunction remove L from LQ. Otherwise, remove L from LQ now - // and continue applying other passes on CurrentLoop. - if (CurrentLoop == L) - skipThisLoop = true; - - delete L; - - if (skipThisLoop) - return; - - for (std::deque::iterator I = LQ.begin(), - E = LQ.end(); I != E; ++I) { - if (*I == L) { - LQ.erase(I); + ParentLoop->addChildLoop(L); + // Insert L into the loop queue after the parent loop. + for (auto I = LQ.begin(), E = LQ.end(); I != E; ++I) { + if (*I == L->getParentLoop()) { + // deque does not support insert after. + ++I; + LQ.insert(I, 1, L); break; } } -} - -// Inset loop into loop nest (LoopInfo) and loop queue (LQ). -void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) { - - assert (CurrentLoop != L && "Cannot insert CurrentLoop"); - - // Insert into loop nest - if (ParentLoop) - ParentLoop->addChildLoop(L); - else - LI->addTopLevelLoop(L); - - insertLoopIntoQueue(L); -} - -void LPPassManager::insertLoopIntoQueue(Loop *L) { - // Insert L into loop queue - if (L == CurrentLoop) - redoLoop(L); - else if (!L->getParentLoop()) - // This is top level loop. - LQ.push_front(L); - else { - // Insert L after the parent loop. - for (std::deque::iterator I = LQ.begin(), - E = LQ.end(); I != E; ++I) { - if (*I == L->getParentLoop()) { - // deque does not support insert after. - ++I; - LQ.insert(I, 1, L); - break; - } - } - } -} - -// Reoptimize this loop. LPPassManager will re-insert this loop into the -// queue. This allows LoopPass to change loop nest for the loop. This -// utility may send LPPassManager into infinite loops so use caution. -void LPPassManager::redoLoop(Loop *L) { - assert (CurrentLoop == L && "Can redo only CurrentLoop"); - redoThisLoop = true; + return *L; } /// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for @@ -230,10 +175,7 @@ bool LPPassManager::runOnFunction(Function &F) { // Walk Loops while (!LQ.empty()) { - CurrentLoop = LQ.back(); - skipThisLoop = false; - redoThisLoop = false; - + CurrentLoop = LQ.back(); // Run all passes on the current Loop. for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *P = getContainedPass(Index); @@ -253,11 +195,15 @@ bool LPPassManager::runOnFunction(Function &F) { if (Changed) dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, - skipThisLoop ? "" : - CurrentLoop->getHeader()->getName()); + CurrentLoop->isUnloop() + ? "" + : CurrentLoop->getHeader()->getName()); dumpPreservedSet(P); - if (!skipThisLoop) { + if (CurrentLoop->isUnloop()) { + // Notify passes that the loop is being deleted. + deleteSimpleAnalysisLoop(CurrentLoop); + } else { // Manually check that this loop is still healthy. This is done // instead of relying on LoopInfo::verifyLoop since LoopInfo // is a function pass and it's really expensive to verify every @@ -276,12 +222,12 @@ bool LPPassManager::runOnFunction(Function &F) { removeNotPreservedAnalysis(P); recordAvailableAnalysis(P); - removeDeadPasses(P, - skipThisLoop ? "" : - CurrentLoop->getHeader()->getName(), + removeDeadPasses(P, CurrentLoop->isUnloop() + ? "" + : CurrentLoop->getHeader()->getName(), ON_LOOP_MSG); - if (skipThisLoop) + if (CurrentLoop->isUnloop()) // Do not run other passes on this loop. break; } @@ -289,17 +235,16 @@ bool LPPassManager::runOnFunction(Function &F) { // If the loop was deleted, release all the loop passes. This frees up // some memory, and avoids trouble with the pass manager trying to call // verifyAnalysis on them. - if (skipThisLoop) + if (CurrentLoop->isUnloop()) { for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { Pass *P = getContainedPass(Index); freePass(P, "", ON_LOOP_MSG); } + delete CurrentLoop; + } // Pop the loop from queue after running all passes. LQ.pop_back(); - - if (redoThisLoop) - LQ.push_back(CurrentLoop); } // Finalization @@ -327,7 +272,7 @@ void LPPassManager::dumpPassStructure(unsigned Offset) { Pass *LoopPass::createPrinterPass(raw_ostream &O, const std::string &Banner) const { - return new PrintLoopPass(Banner, O); + return new PrintLoopPassWrapper(O, Banner); } // Check if this pass is suitable for the current LPPassManager, if diff --git a/lib/Analysis/Makefile b/lib/Analysis/Makefile index 4af6d350a645..93fd7f9bdd93 100644 --- a/lib/Analysis/Makefile +++ b/lib/Analysis/Makefile @@ -9,7 +9,6 @@ LEVEL = ../.. LIBRARYNAME = LLVMAnalysis -DIRS = IPA BUILD_ARCHIVE = 1 include $(LEVEL)/Makefile.common diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index da3b829b6d31..078cefe51807 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -49,7 +49,7 @@ namespace { void print(raw_ostream &OS, const Module * = nullptr) const override; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequiredTransitive(); + AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.setPreservesAll(); } @@ -96,7 +96,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { // All this code uses non-const interfaces because MemDep is not // const-friendly, though nothing is actually modified. - for (auto &I : inst_range(F)) { + for (auto &I : instructions(F)) { Instruction *Inst = &I; if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory()) @@ -135,7 +135,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { } void MemDepPrinter::print(raw_ostream &OS, const Module *M) const { - for (const auto &I : inst_range(*F)) { + for (const auto &I : instructions(*F)) { const Instruction *Inst = &I; DepSetMap::const_iterator DI = Deps.find(Inst); diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp index fa292a28ec87..36f1424c8cf9 100644 --- a/lib/Analysis/MemDerefPrinter.cpp +++ b/lib/Analysis/MemDerefPrinter.cpp @@ -22,7 +22,8 @@ using namespace llvm; namespace { struct MemDerefPrinter : public FunctionPass { - SmallVector Vec; + SmallVector Deref; + SmallPtrSet DerefAndAligned; static char ID; // Pass identification, replacement for typeid MemDerefPrinter() : FunctionPass(ID) { @@ -34,7 +35,8 @@ namespace { bool runOnFunction(Function &F) override; void print(raw_ostream &OS, const Module * = nullptr) const override; void releaseMemory() override { - Vec.clear(); + Deref.clear(); + DerefAndAligned.clear(); } }; } @@ -51,11 +53,13 @@ FunctionPass *llvm::createMemDerefPrinter() { bool MemDerefPrinter::runOnFunction(Function &F) { const DataLayout &DL = F.getParent()->getDataLayout(); - for (auto &I: inst_range(F)) { + for (auto &I: instructions(F)) { if (LoadInst *LI = dyn_cast(&I)) { Value *PO = LI->getPointerOperand(); if (isDereferenceablePointer(PO, DL)) - Vec.push_back(PO); + Deref.push_back(PO); + if (isDereferenceableAndAlignedPointer(PO, LI->getAlignment(), DL)) + DerefAndAligned.insert(PO); } } return false; @@ -63,8 +67,12 @@ bool MemDerefPrinter::runOnFunction(Function &F) { void MemDerefPrinter::print(raw_ostream &OS, const Module *M) const { OS << "The following are dereferenceable:\n"; - for (auto &V: Vec) { + for (Value *V: Deref) { V->print(OS); + if (DerefAndAligned.count(V)) + OS << "\t(aligned)"; + else + OS << "\t(unaligned)"; OS << "\n\n"; } } diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 8ddac8ffb971..b19ecadd3161 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -31,7 +31,7 @@ using namespace llvm; #define DEBUG_TYPE "memory-builtins" -enum AllocType { +enum AllocType : uint8_t { OpNewLike = 1<<0, // allocates; never returns null MallocLike = 1<<1 | OpNewLike, // allocates; may return null CallocLike = 1<<2, // allocates + bzero @@ -62,6 +62,14 @@ static const AllocFnsTy AllocationFnData[] = { {LibFunc::ZnajRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) {LibFunc::Znam, OpNewLike, 1, 0, -1}, // new[](unsigned long) {LibFunc::ZnamRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow) + {LibFunc::msvc_new_int, OpNewLike, 1, 0, -1}, // new(unsigned int) + {LibFunc::msvc_new_int_nothrow, MallocLike, 2, 0, -1}, // new(unsigned int, nothrow) + {LibFunc::msvc_new_longlong, OpNewLike, 1, 0, -1}, // new(unsigned long long) + {LibFunc::msvc_new_longlong_nothrow, MallocLike, 2, 0, -1}, // new(unsigned long long, nothrow) + {LibFunc::msvc_new_array_int, OpNewLike, 1, 0, -1}, // new[](unsigned int) + {LibFunc::msvc_new_array_int_nothrow, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) + {LibFunc::msvc_new_array_longlong, OpNewLike, 1, 0, -1}, // new[](unsigned long long) + {LibFunc::msvc_new_array_longlong_nothrow, MallocLike, 2, 0, -1}, // new[](unsigned long long, nothrow) {LibFunc::calloc, CallocLike, 2, 0, 1}, {LibFunc::realloc, ReallocLike, 2, 1, -1}, {LibFunc::reallocf, ReallocLike, 2, 1, -1}, @@ -107,18 +115,13 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) return nullptr; - unsigned i = 0; - bool found = false; - for ( ; i < array_lengthof(AllocationFnData); ++i) { - if (AllocationFnData[i].Func == TLIFn) { - found = true; - break; - } - } - if (!found) + const AllocFnsTy *FnData = + std::find_if(std::begin(AllocationFnData), std::end(AllocationFnData), + [TLIFn](const AllocFnsTy &Fn) { return Fn.Func == TLIFn; }); + + if (FnData == std::end(AllocationFnData)) return nullptr; - const AllocFnsTy *FnData = &AllocationFnData[i]; if ((FnData->AllocTy & AllocTy) != FnData->AllocTy) return nullptr; @@ -184,13 +187,6 @@ bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, return getAllocationData(V, AllocLike, TLI, LookThroughBitCast); } -/// \brief Tests if a value is a call or invoke to a library function that -/// reallocates memory (such as realloc). -bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, - bool LookThroughBitCast) { - return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast); -} - /// \brief Tests if a value is a call or invoke to a library function that /// allocates memory and never returns null (such as operator new). bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, @@ -313,14 +309,26 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { unsigned ExpectedNumParams; if (TLIFn == LibFunc::free || TLIFn == LibFunc::ZdlPv || // operator delete(void*) - TLIFn == LibFunc::ZdaPv) // operator delete[](void*) + TLIFn == LibFunc::ZdaPv || // operator delete[](void*) + TLIFn == LibFunc::msvc_delete_ptr32 || // operator delete(void*) + TLIFn == LibFunc::msvc_delete_ptr64 || // operator delete(void*) + TLIFn == LibFunc::msvc_delete_array_ptr32 || // operator delete[](void*) + TLIFn == LibFunc::msvc_delete_array_ptr64) // operator delete[](void*) ExpectedNumParams = 1; else if (TLIFn == LibFunc::ZdlPvj || // delete(void*, uint) TLIFn == LibFunc::ZdlPvm || // delete(void*, ulong) TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow) TLIFn == LibFunc::ZdaPvj || // delete[](void*, uint) TLIFn == LibFunc::ZdaPvm || // delete[](void*, ulong) - TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow) + TLIFn == LibFunc::ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow) + TLIFn == LibFunc::msvc_delete_ptr32_int || // delete(void*, uint) + TLIFn == LibFunc::msvc_delete_ptr64_longlong || // delete(void*, ulonglong) + TLIFn == LibFunc::msvc_delete_ptr32_nothrow || // delete(void*, nothrow) + TLIFn == LibFunc::msvc_delete_ptr64_nothrow || // delete(void*, nothrow) + TLIFn == LibFunc::msvc_delete_array_ptr32_int || // delete[](void*, uint) + TLIFn == LibFunc::msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong) + TLIFn == LibFunc::msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow) + TLIFn == LibFunc::msvc_delete_array_ptr64_nothrow) // delete[](void*, nothrow) ExpectedNumParams = 2; else return nullptr; @@ -621,7 +629,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { // always generate code immediately before the instruction being // processed, so that the generated code dominates the same BBs - Instruction *PrevInsertPoint = Builder.GetInsertPoint(); + BuilderTy::InsertPointGuard Guard(Builder); if (Instruction *I = dyn_cast(V)) Builder.SetInsertPoint(I); @@ -650,9 +658,6 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { Result = unknown(); } - if (PrevInsertPoint) - Builder.SetInsertPoint(PrevInsertPoint); - // Don't reuse CacheIt since it may be invalid at this point. CacheMap[V] = Result; return Result; @@ -742,7 +747,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) { // compute offset/size for each PHI incoming pointer for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) { - Builder.SetInsertPoint(PHI.getIncomingBlock(i)->getFirstInsertionPt()); + Builder.SetInsertPoint(&*PHI.getIncomingBlock(i)->getFirstInsertionPt()); SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i)); if (!bothKnown(EdgeData)) { diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 782a67bf72d5..3e80bfe1fdfb 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -22,7 +22,9 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -49,7 +51,11 @@ STATISTIC(NumCacheCompleteNonLocalPtr, "Number of block queries that were completely cached"); // Limit for the number of instructions to scan in a block. -static const unsigned int BlockScanLimit = 100; + +static cl::opt BlockScanLimit( + "memdep-block-scan-limit", cl::Hidden, cl::init(100), + cl::desc("The number of instructions to scan in a block in memory " + "dependency analysis (default = 100)")); // Limit on the number of memdep results to process. static const unsigned int NumResultsLimit = 100; @@ -60,7 +66,8 @@ char MemoryDependenceAnalysis::ID = 0; INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep", "Memory Dependence Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep", "Memory Dependence Analysis", false, true) @@ -87,15 +94,17 @@ void MemoryDependenceAnalysis::releaseMemory() { void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); - AU.addRequiredTransitive(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); } bool MemoryDependenceAnalysis::runOnFunction(Function &F) { - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); AC = &getAnalysis().getAssumptionCache(F); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); DT = DTWP ? &DTWP->getDomTree() : nullptr; + TLI = &getAnalysis().getTLI(); return false; } @@ -118,43 +127,43 @@ static void RemoveFromReverseMap(DenseMap(Inst)) { if (LI->isUnordered()) { Loc = MemoryLocation::get(LI); - return AliasAnalysis::Ref; + return MRI_Ref; } if (LI->getOrdering() == Monotonic) { Loc = MemoryLocation::get(LI); - return AliasAnalysis::ModRef; + return MRI_ModRef; } Loc = MemoryLocation(); - return AliasAnalysis::ModRef; + return MRI_ModRef; } if (const StoreInst *SI = dyn_cast(Inst)) { if (SI->isUnordered()) { Loc = MemoryLocation::get(SI); - return AliasAnalysis::Mod; + return MRI_Mod; } if (SI->getOrdering() == Monotonic) { Loc = MemoryLocation::get(SI); - return AliasAnalysis::ModRef; + return MRI_ModRef; } Loc = MemoryLocation(); - return AliasAnalysis::ModRef; + return MRI_ModRef; } if (const VAArgInst *V = dyn_cast(Inst)) { Loc = MemoryLocation::get(V); - return AliasAnalysis::ModRef; + return MRI_ModRef; } - if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) { + if (const CallInst *CI = isFreeCall(Inst, &TLI)) { // calls to free() deallocate the entire structure Loc = MemoryLocation(CI->getArgOperand(0)); - return AliasAnalysis::Mod; + return MRI_Mod; } if (const IntrinsicInst *II = dyn_cast(Inst)) { @@ -170,7 +179,7 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) { cast(II->getArgOperand(0))->getZExtValue(), AAInfo); // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. - return AliasAnalysis::Mod; + return MRI_Mod; case Intrinsic::invariant_end: II->getAAMetadata(AAInfo); Loc = MemoryLocation( @@ -178,7 +187,7 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) { cast(II->getArgOperand(1))->getZExtValue(), AAInfo); // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. - return AliasAnalysis::Mod; + return MRI_Mod; default: break; } @@ -186,10 +195,10 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) { // Otherwise, just do the coarse-grained thing that always works. if (Inst->mayWriteToMemory()) - return AliasAnalysis::ModRef; + return MRI_ModRef; if (Inst->mayReadFromMemory()) - return AliasAnalysis::Ref; - return AliasAnalysis::NoModRef; + return MRI_Ref; + return MRI_NoModRef; } /// getCallSiteDependencyFrom - Private helper for finding the local @@ -207,14 +216,14 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, if (!Limit) return MemDepResult::getUnknown(); - Instruction *Inst = --ScanIt; + Instruction *Inst = &*--ScanIt; // If this inst is a memory op, get the pointer it accessed MemoryLocation Loc; - AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA); + ModRefInfo MR = GetLocation(Inst, Loc, *TLI); if (Loc.Ptr) { // A simple instruction. - if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef) + if (AA->getModRefInfo(CS, Loc) != MRI_NoModRef) return MemDepResult::getClobber(Inst); continue; } @@ -224,10 +233,10 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, if (isa(Inst)) continue; // If these two calls do not interfere, look past it. switch (AA->getModRefInfo(CS, InstCS)) { - case AliasAnalysis::NoModRef: + case MRI_NoModRef: // If the two calls are the same, return InstCS as a Def, so that // CS can be found redundant and eliminated. - if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) && + if (isReadOnlyCall && !(MR & MRI_Mod) && CS.getInstruction()->isIdenticalToWhenDefined(Inst)) return MemDepResult::getDef(Inst); @@ -241,7 +250,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, // If we could not obtain a pointer for the instruction and the instruction // touches memory then assume that this is a dependency. - if (MR != AliasAnalysis::NoModRef) + if (MR != MRI_NoModRef) return MemDepResult::getClobber(Inst); } @@ -371,6 +380,75 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst) { + if (QueryInst != nullptr) { + if (auto *LI = dyn_cast(QueryInst)) { + MemDepResult invariantGroupDependency = + getInvariantGroupPointerDependency(LI, BB); + + if (invariantGroupDependency.isDef()) + return invariantGroupDependency; + } + } + return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst); +} + +MemDepResult +MemoryDependenceAnalysis::getInvariantGroupPointerDependency(LoadInst *LI, + BasicBlock *BB) { + Value *LoadOperand = LI->getPointerOperand(); + // It's is not safe to walk the use list of global value, because function + // passes aren't allowed to look outside their functions. + if (isa(LoadOperand)) + return MemDepResult::getUnknown(); + + auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group); + if (!InvariantGroupMD) + return MemDepResult::getUnknown(); + + MemDepResult Result = MemDepResult::getUnknown(); + llvm::SmallSet Seen; + // Queue to process all pointers that are equivalent to load operand. + llvm::SmallVector LoadOperandsQueue; + LoadOperandsQueue.push_back(LoadOperand); + while (!LoadOperandsQueue.empty()) { + Value *Ptr = LoadOperandsQueue.pop_back_val(); + if (isa(Ptr)) + continue; + + if (auto *BCI = dyn_cast(Ptr)) { + if (!Seen.count(BCI->getOperand(0))) { + LoadOperandsQueue.push_back(BCI->getOperand(0)); + Seen.insert(BCI->getOperand(0)); + } + } + + for (Use &Us : Ptr->uses()) { + auto *U = dyn_cast(Us.getUser()); + if (!U || U == LI || !DT->dominates(U, LI)) + continue; + + if (auto *BCI = dyn_cast(U)) { + if (!Seen.count(BCI)) { + LoadOperandsQueue.push_back(BCI); + Seen.insert(BCI); + } + continue; + } + // If we hit load/store with the same invariant.group metadata (and the + // same pointer operand) we can assume that value pointed by pointer + // operand didn't change. + if ((isa(U) || isa(U)) && U->getParent() == BB && + U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD) + return MemDepResult::getDef(U); + } + } + return Result; +} + +MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom( + const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, + BasicBlock *BB, Instruction *QueryInst) { + const Value *MemLocBase = nullptr; int64_t MemLocOffset = 0; unsigned Limit = BlockScanLimit; @@ -416,9 +494,15 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom( const DataLayout &DL = BB->getModule()->getDataLayout(); + // Create a numbered basic block to lazily compute and cache instruction + // positions inside a BB. This is used to provide fast queries for relative + // position between two instructions in a BB and can be used by + // AliasAnalysis::callCapturesBefore. + OrderedBasicBlock OBB(BB); + // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { - Instruction *Inst = --ScanIt; + Instruction *Inst = &*--ScanIt; if (IntrinsicInst *II = dyn_cast(Inst)) // Debug intrinsics don't (and can't) cause dependencies. @@ -567,7 +651,7 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom( // If alias analysis can tell that this store is guaranteed to not modify // the query pointer, ignore it. Use getModRefInfo to handle cases where // the query pointer points to constant memory etc. - if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef) + if (AA->getModRefInfo(SI, MemLoc) == MRI_NoModRef) continue; // Ok, this store might clobber the query pointer. Check to see if it is @@ -594,7 +678,6 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom( // a subsequent bitcast of the malloc call result. There can be stores to // the malloced memory between the malloc call and its bitcast uses, and we // need to continue scanning until the malloc call. - const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo(); if (isa(Inst) || isNoAliasFn(Inst, TLI)) { const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL); @@ -616,17 +699,17 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom( continue; // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. - AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc); + ModRefInfo MR = AA->getModRefInfo(Inst, MemLoc); // If necessary, perform additional analysis. - if (MR == AliasAnalysis::ModRef) - MR = AA->callCapturesBefore(Inst, MemLoc, DT); + if (MR == MRI_ModRef) + MR = AA->callCapturesBefore(Inst, MemLoc, DT, &OBB); switch (MR) { - case AliasAnalysis::NoModRef: + case MRI_NoModRef: // If the call has no effect on the queried pointer, just ignore it. continue; - case AliasAnalysis::Mod: + case MRI_Mod: return MemDepResult::getClobber(Inst); - case AliasAnalysis::Ref: + case MRI_Ref: // If the call is known to never store to the pointer, and if this is a // load query, we can safely ignore it (scan past it). if (isLoad) @@ -677,20 +760,20 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { LocalCache = MemDepResult::getNonFuncLocal(); } else { MemoryLocation MemLoc; - AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA); + ModRefInfo MR = GetLocation(QueryInst, MemLoc, *TLI); if (MemLoc.Ptr) { // If we can do a pointer scan, make it happen. - bool isLoad = !(MR & AliasAnalysis::Mod); + bool isLoad = !(MR & MRI_Mod); if (IntrinsicInst *II = dyn_cast(QueryInst)) isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; - LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos, - QueryParent, QueryInst); + LocalCache = getPointerDependencyFrom( + MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst); } else if (isa(QueryInst) || isa(QueryInst)) { CallSite QueryCS(QueryInst); bool isReadOnly = AA->onlyReadsMemory(QueryCS); - LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, - QueryParent); + LocalCache = getCallSiteDependencyFrom( + QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent); } else // Non-memory instruction. LocalCache = MemDepResult::getUnknown(); @@ -813,7 +896,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { BasicBlock::iterator ScanPos = DirtyBB->end(); if (ExistingResult) { if (Instruction *Inst = ExistingResult->getResult().getInst()) { - ScanPos = Inst; + ScanPos = Inst->getIterator(); // We're removing QueryInst's use of Inst. RemoveFromReverseMap(ReverseNonLocalDeps, Inst, QueryCS.getInstruction()); @@ -952,11 +1035,11 @@ MemDepResult MemoryDependenceAnalysis::GetNonLocalInfoForBlock( assert(ExistingResult->getResult().getInst()->getParent() == BB && "Instruction invalidated?"); ++NumCacheDirtyNonLocalPtr; - ScanPos = ExistingResult->getResult().getInst(); + ScanPos = ExistingResult->getResult().getInst()->getIterator(); // Eliminating the dirty entry from 'Cache', so update the reverse info. ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); - RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey); + RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey); } else { ++NumUncacheNonLocalPtr; } @@ -1507,7 +1590,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { // the entire block to get to this point. MemDepResult NewDirtyVal; if (!RemInst->isTerminator()) - NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst)); + NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator()); ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst); if (ReverseDepIt != ReverseLocalDeps.end()) { @@ -1614,7 +1697,6 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); - AA->deleteValue(RemInst); DEBUG(verifyRemoved(RemInst)); } /// verifyRemoved - Verify that the specified instruction does not occur diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp deleted file mode 100644 index 322a9a80de4c..000000000000 --- a/lib/Analysis/NoAliasAnalysis.cpp +++ /dev/null @@ -1,95 +0,0 @@ -//===- NoAliasAnalysis.cpp - Minimal Alias Analysis Impl ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the default implementation of the Alias Analysis interface -// that simply returns "I don't know" for all queries. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -using namespace llvm; - -namespace { - /// NoAA - This class implements the -no-aa pass, which always returns "I - /// don't know" for alias queries. NoAA is unlike other alias analysis - /// implementations, in that it does not chain to a previous analysis. As - /// such it doesn't follow many of the rules that other alias analyses must. - /// - struct NoAA : public ImmutablePass, public AliasAnalysis { - static char ID; // Class identification, replacement for typeinfo - NoAA() : ImmutablePass(ID) { - initializeNoAAPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override {} - - bool doInitialization(Module &M) override { - // Note: NoAA does not call InitializeAliasAnalysis because it's - // special and does not support chaining. - DL = &M.getDataLayout(); - return true; - } - - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override { - return MayAlias; - } - - ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override { - return UnknownModRefBehavior; - } - ModRefBehavior getModRefBehavior(const Function *F) override { - return UnknownModRefBehavior; - } - - bool pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) override { - return false; - } - ModRefResult getArgModRefInfo(ImmutableCallSite CS, - unsigned ArgIdx) override { - return ModRef; - } - - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override { - return ModRef; - } - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override { - return ModRef; - } - - void deleteValue(Value *V) override {} - void addEscapingUse(Use &U) override {} - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } - }; -} // End of anonymous namespace - -// Register this pass... -char NoAA::ID = 0; -INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa", - "No Alias Analysis (always returns 'may' alias)", - true, true, true) - -ImmutablePass *llvm::createNoAAPass() { return new NoAA(); } diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Analysis/ObjCARCAliasAnalysis.cpp similarity index 53% rename from lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp rename to lib/Analysis/ObjCARCAliasAnalysis.cpp index 3893aab76b2a..25f660ffe221 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp +++ b/lib/Analysis/ObjCARCAliasAnalysis.cpp @@ -18,66 +18,46 @@ /// used. Naive LLVM IR transformations which would otherwise be /// behavior-preserving may break these assumptions. /// +/// TODO: Theoretically we could check for dependencies between objc_* calls +/// and FMRB_OnlyAccessesArgumentPointees calls or other well-behaved calls. +/// //===----------------------------------------------------------------------===// -#include "ObjCARC.h" -#include "ObjCARCAliasAnalysis.h" +#include "llvm/Analysis/ObjCARCAliasAnalysis.h" +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/PassAnalysisSupport.h" #include "llvm/PassSupport.h" #define DEBUG_TYPE "objc-arc-aa" -namespace llvm { - class Function; - class Value; -} - using namespace llvm; using namespace llvm::objcarc; -// Register this pass... -char ObjCARCAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa", - "ObjC-ARC-Based Alias Analysis", false, true, false) - -ImmutablePass *llvm::createObjCARCAliasAnalysisPass() { - return new ObjCARCAliasAnalysis(); -} - -bool ObjCARCAliasAnalysis::doInitialization(Module &M) { - InitializeAliasAnalysis(this, &M.getDataLayout()); - return true; -} - -void -ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AliasAnalysis::getAnalysisUsage(AU); -} - -AliasResult ObjCARCAliasAnalysis::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { +AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) { if (!EnableARCOpts) - return AliasAnalysis::alias(LocA, LocB); + return AAResultBase::alias(LocA, LocB); // First, strip off no-ops, including ObjC-specific no-ops, and try making a // precise alias query. const Value *SA = GetRCIdentityRoot(LocA.Ptr); const Value *SB = GetRCIdentityRoot(LocB.Ptr); AliasResult Result = - AliasAnalysis::alias(MemoryLocation(SA, LocA.Size, LocA.AATags), - MemoryLocation(SB, LocB.Size, LocB.AATags)); + AAResultBase::alias(MemoryLocation(SA, LocA.Size, LocA.AATags), + MemoryLocation(SB, LocB.Size, LocB.AATags)); if (Result != MayAlias) return Result; // If that failed, climb to the underlying object, including climbing through // ObjC-specific no-ops, and try making an imprecise alias query. - const Value *UA = GetUnderlyingObjCPtr(SA, *DL); - const Value *UB = GetUnderlyingObjCPtr(SB, *DL); + const Value *UA = GetUnderlyingObjCPtr(SA, DL); + const Value *UB = GetUnderlyingObjCPtr(SB, DL); if (UA != SA || UB != SB) { - Result = AliasAnalysis::alias(MemoryLocation(UA), MemoryLocation(UB)); + Result = AAResultBase::alias(MemoryLocation(UA), MemoryLocation(UB)); // We can't use MustAlias or PartialAlias results here because // GetUnderlyingObjCPtr may return an offsetted pointer value. if (Result == NoAlias) @@ -89,55 +69,47 @@ AliasResult ObjCARCAliasAnalysis::alias(const MemoryLocation &LocA, return MayAlias; } -bool ObjCARCAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) { +bool ObjCARCAAResult::pointsToConstantMemory(const MemoryLocation &Loc, + bool OrLocal) { if (!EnableARCOpts) - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, OrLocal); // First, strip off no-ops, including ObjC-specific no-ops, and try making // a precise alias query. const Value *S = GetRCIdentityRoot(Loc.Ptr); - if (AliasAnalysis::pointsToConstantMemory( + if (AAResultBase::pointsToConstantMemory( MemoryLocation(S, Loc.Size, Loc.AATags), OrLocal)) return true; // If that failed, climb to the underlying object, including climbing through // ObjC-specific no-ops, and try making an imprecise alias query. - const Value *U = GetUnderlyingObjCPtr(S, *DL); + const Value *U = GetUnderlyingObjCPtr(S, DL); if (U != S) - return AliasAnalysis::pointsToConstantMemory(MemoryLocation(U), OrLocal); + return AAResultBase::pointsToConstantMemory(MemoryLocation(U), OrLocal); // If that failed, fail. We don't need to chain here, since that's covered // by the earlier precise query. return false; } -AliasAnalysis::ModRefBehavior -ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { - // We have nothing to do. Just chain to the next AliasAnalysis. - return AliasAnalysis::getModRefBehavior(CS); -} - -AliasAnalysis::ModRefBehavior -ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) { +FunctionModRefBehavior ObjCARCAAResult::getModRefBehavior(const Function *F) { if (!EnableARCOpts) - return AliasAnalysis::getModRefBehavior(F); + return AAResultBase::getModRefBehavior(F); switch (GetFunctionClass(F)) { case ARCInstKind::NoopCast: - return DoesNotAccessMemory; + return FMRB_DoesNotAccessMemory; default: break; } - return AliasAnalysis::getModRefBehavior(F); + return AAResultBase::getModRefBehavior(F); } -AliasAnalysis::ModRefResult -ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) { +ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) { if (!EnableARCOpts) - return AliasAnalysis::getModRefInfo(CS, Loc); + return AAResultBase::getModRefInfo(CS, Loc); switch (GetBasicARCInstKind(CS.getInstruction())) { case ARCInstKind::Retain: @@ -151,18 +123,48 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, // These functions don't access any memory visible to the compiler. // Note that this doesn't include objc_retainBlock, because it updates // pointers when it copies block data. - return NoModRef; + return MRI_NoModRef; default: break; } - return AliasAnalysis::getModRefInfo(CS, Loc); + return AAResultBase::getModRefInfo(CS, Loc); } -AliasAnalysis::ModRefResult -ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) { - // TODO: Theoretically we could check for dependencies between objc_* calls - // and OnlyAccessesArgumentPointees calls or other well-behaved calls. - return AliasAnalysis::getModRefInfo(CS1, CS2); +ObjCARCAAResult ObjCARCAA::run(Function &F, AnalysisManager *AM) { + return ObjCARCAAResult(F.getParent()->getDataLayout(), + AM->getResult(F)); +} + +char ObjCARCAA::PassID; + +char ObjCARCAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(ObjCARCAAWrapperPass, "objc-arc-aa", + "ObjC-ARC-Based Alias Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(ObjCARCAAWrapperPass, "objc-arc-aa", + "ObjC-ARC-Based Alias Analysis", false, true) + +ImmutablePass *llvm::createObjCARCAAWrapperPass() { + return new ObjCARCAAWrapperPass(); +} + +ObjCARCAAWrapperPass::ObjCARCAAWrapperPass() : ImmutablePass(ID) { + initializeObjCARCAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool ObjCARCAAWrapperPass::doInitialization(Module &M) { + Result.reset(new ObjCARCAAResult( + M.getDataLayout(), getAnalysis().getTLI())); + return false; +} + +bool ObjCARCAAWrapperPass::doFinalization(Module &M) { + Result.reset(); + return false; +} + +void ObjCARCAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); } diff --git a/lib/Analysis/ObjCARCAnalysisUtils.cpp b/lib/Analysis/ObjCARCAnalysisUtils.cpp new file mode 100644 index 000000000000..e3e74aa249da --- /dev/null +++ b/lib/Analysis/ObjCARCAnalysisUtils.cpp @@ -0,0 +1,28 @@ +//===- ObjCARCAnalysisUtils.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements common infrastructure for libLLVMObjCARCOpts.a, which +// implements several scalar transformations over the LLVM intermediate +// representation, including the C bindings for that library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; +using namespace llvm::objcarc; + +/// \brief A handy option to enable/disable all ARC Optimizations. +bool llvm::objcarc::EnableARCOpts; +static cl::opt +EnableARCOptimizations("enable-objc-arc-opts", + cl::desc("enable/disable all ARC Optimizations"), + cl::location(EnableARCOpts), + cl::init(true)); diff --git a/lib/Transforms/ObjCARC/ARCInstKind.cpp b/lib/Analysis/ObjCARCInstKind.cpp similarity index 99% rename from lib/Transforms/ObjCARC/ARCInstKind.cpp rename to lib/Analysis/ObjCARCInstKind.cpp index afb873a355a7..133b63513c87 100644 --- a/lib/Transforms/ObjCARC/ARCInstKind.cpp +++ b/lib/Analysis/ObjCARCInstKind.cpp @@ -19,7 +19,9 @@ /// //===----------------------------------------------------------------------===// -#include "ObjCARC.h" +#include "llvm/Analysis/ObjCARCInstKind.h" +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Intrinsics.h" using namespace llvm; @@ -91,7 +93,7 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) { .Default(ARCInstKind::CallOrUser); // One argument. - const Argument *A0 = AI++; + const Argument *A0 = &*AI++; if (AI == AE) // Argument is a pointer. if (PointerType *PTy = dyn_cast(A0->getType())) { @@ -129,7 +131,7 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) { } // Two arguments, first is i8**. - const Argument *A1 = AI++; + const Argument *A1 = &*AI++; if (AI == AE) if (PointerType *PTy = dyn_cast(A0->getType())) if (PointerType *Pte = dyn_cast(PTy->getElementType())) diff --git a/lib/Analysis/OrderedBasicBlock.cpp b/lib/Analysis/OrderedBasicBlock.cpp new file mode 100644 index 000000000000..0f0016f22cc0 --- /dev/null +++ b/lib/Analysis/OrderedBasicBlock.cpp @@ -0,0 +1,85 @@ +//===- OrderedBasicBlock.cpp --------------------------------- -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the OrderedBasicBlock class. OrderedBasicBlock +// maintains an interface where clients can query if one instruction comes +// before another in a BasicBlock. Since BasicBlock currently lacks a reliable +// way to query relative position between instructions one can use +// OrderedBasicBlock to do such queries. OrderedBasicBlock is lazily built on a +// source BasicBlock and maintains an internal Instruction -> Position map. A +// OrderedBasicBlock instance should be discarded whenever the source +// BasicBlock changes. +// +// It's currently used by the CaptureTracker in order to find relative +// positions of a pair of instructions inside a BasicBlock. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/OrderedBasicBlock.h" +#include "llvm/IR/Instruction.h" +using namespace llvm; + +OrderedBasicBlock::OrderedBasicBlock(const BasicBlock *BasicB) + : NextInstPos(0), BB(BasicB) { + LastInstFound = BB->end(); +} + +/// \brief Given no cached results, find if \p A comes before \p B in \p BB. +/// Cache and number out instruction while walking \p BB. +bool OrderedBasicBlock::comesBefore(const Instruction *A, + const Instruction *B) { + const Instruction *Inst = nullptr; + assert(!(LastInstFound == BB->end() && NextInstPos != 0) && + "Instruction supposed to be in NumberedInsts"); + + // Start the search with the instruction found in the last lookup round. + auto II = BB->begin(); + auto IE = BB->end(); + if (LastInstFound != IE) + II = std::next(LastInstFound); + + // Number all instructions up to the point where we find 'A' or 'B'. + for (; II != IE; ++II) { + Inst = cast(II); + NumberedInsts[Inst] = NextInstPos++; + if (Inst == A || Inst == B) + break; + } + + assert(II != IE && "Instruction not found?"); + assert((Inst == A || Inst == B) && "Should find A or B"); + LastInstFound = II; + return Inst == A; +} + +/// \brief Find out whether \p A dominates \p B, meaning whether \p A +/// comes before \p B in \p BB. This is a simplification that considers +/// cached instruction positions and ignores other basic blocks, being +/// only relevant to compare relative instructions positions inside \p BB. +bool OrderedBasicBlock::dominates(const Instruction *A, const Instruction *B) { + assert(A->getParent() == B->getParent() && + "Instructions must be in the same basic block!"); + + // First we lookup the instructions. If they don't exist, lookup will give us + // back ::end(). If they both exist, we compare the numbers. Otherwise, if NA + // exists and NB doesn't, it means NA must come before NB because we would + // have numbered NB as well if it didn't. The same is true for NB. If it + // exists, but NA does not, NA must come after it. If neither exist, we need + // to number the block and cache the results (by calling comesBefore). + auto NAI = NumberedInsts.find(A); + auto NBI = NumberedInsts.find(B); + if (NAI != NumberedInsts.end() && NBI != NumberedInsts.end()) + return NAI->second < NBI->second; + if (NAI != NumberedInsts.end()) + return true; + if (NBI != NumberedInsts.end()) + return false; + + return comesBefore(A, B); +} diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index 8cd85348fdcc..f59d26730327 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -21,6 +21,9 @@ #include #include #include +#ifndef NDEBUG +#include "llvm/Analysis/RegionPrinter.h" +#endif using namespace llvm; @@ -103,6 +106,12 @@ void RegionInfo::recalculate(Function &F, DominatorTree *DT_, calculate(F); } +#ifndef NDEBUG +void RegionInfo::view() { viewRegion(this); } + +void RegionInfo::viewOnly() { viewRegionOnly(this); } +#endif + //===----------------------------------------------------------------------===// // RegionInfoPass implementation // diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp index d7f510984881..acb218d5fea0 100644 --- a/lib/Analysis/RegionPrinter.cpp +++ b/lib/Analysis/RegionPrinter.cpp @@ -20,6 +20,9 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#ifndef NDEBUG +#include "llvm/IR/LegacyPassManager.h" +#endif using namespace llvm; @@ -55,25 +58,22 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { } }; -template<> -struct DOTGraphTraits : public DOTGraphTraits { +template <> +struct DOTGraphTraits : public DOTGraphTraits { DOTGraphTraits (bool isSimple = false) : DOTGraphTraits(isSimple) {} - static std::string getGraphName(RegionInfoPass *DT) { - return "Region Graph"; - } + static std::string getGraphName(const RegionInfo *) { return "Region Graph"; } - std::string getNodeLabel(RegionNode *Node, RegionInfoPass *G) { - RegionInfo &RI = G->getRegionInfo(); - return DOTGraphTraits::getNodeLabel(Node, - reinterpret_cast(RI.getTopLevelRegion())); + std::string getNodeLabel(RegionNode *Node, RegionInfo *G) { + return DOTGraphTraits::getNodeLabel( + Node, reinterpret_cast(G->getTopLevelRegion())); } std::string getEdgeAttributes(RegionNode *srcNode, - GraphTraits::ChildIteratorType CI, RegionInfoPass *G) { - RegionInfo &RI = G->getRegionInfo(); + GraphTraits::ChildIteratorType CI, + RegionInfo *G) { RegionNode *destNode = *CI; if (srcNode->isSubRegion() || destNode->isSubRegion()) @@ -83,7 +83,7 @@ struct DOTGraphTraits : public DOTGraphTraits { BasicBlock *srcBB = srcNode->getNodeAs(); BasicBlock *destBB = destNode->getNodeAs(); - Region *R = RI.getRegionFor(destBB); + Region *R = G->getRegionFor(destBB); while (R && R->getParent()) if (R->getParent()->getEntry() == destBB) @@ -91,7 +91,7 @@ struct DOTGraphTraits : public DOTGraphTraits { else break; - if (R->getEntry() == destBB && R->contains(srcBB)) + if (R && R->getEntry() == destBB && R->contains(srcBB)) return "constraint=false"; return ""; @@ -99,8 +99,7 @@ struct DOTGraphTraits : public DOTGraphTraits { // Print the cluster of the subregions. This groups the single basic blocks // and adds a different background color for each group. - static void printRegionCluster(const Region &R, - GraphWriter &GW, + static void printRegionCluster(const Region &R, GraphWriter &GW, unsigned depth = 0) { raw_ostream &O = GW.getOStream(); O.indent(2 * depth) << "subgraph cluster_" << static_cast(&R) @@ -132,50 +131,81 @@ struct DOTGraphTraits : public DOTGraphTraits { O.indent(2 * depth) << "}\n"; } - static void addCustomGraphFeatures(const RegionInfoPass* RIP, - GraphWriter &GW) { - const RegionInfo &RI = RIP->getRegionInfo(); + static void addCustomGraphFeatures(const RegionInfo *G, + GraphWriter &GW) { raw_ostream &O = GW.getOStream(); O << "\tcolorscheme = \"paired12\"\n"; - printRegionCluster(*RI.getTopLevelRegion(), GW, 4); + printRegionCluster(*G->getTopLevelRegion(), GW, 4); } }; } //end namespace llvm namespace { -struct RegionViewer - : public DOTGraphTraitsViewer { +struct RegionInfoPassGraphTraits { + static RegionInfo *getGraph(RegionInfoPass *RIP) { + return &RIP->getRegionInfo(); + } +}; + +struct RegionPrinter + : public DOTGraphTraitsPrinter { static char ID; - RegionViewer() : DOTGraphTraitsViewer("reg", ID){ + RegionPrinter() + : DOTGraphTraitsPrinter("reg", ID) { + initializeRegionPrinterPass(*PassRegistry::getPassRegistry()); + } +}; +char RegionPrinter::ID = 0; + +struct RegionOnlyPrinter + : public DOTGraphTraitsPrinter { + static char ID; + RegionOnlyPrinter() + : DOTGraphTraitsPrinter("reg", ID) { + initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry()); + } +}; +char RegionOnlyPrinter::ID = 0; + +struct RegionViewer + : public DOTGraphTraitsViewer { + static char ID; + RegionViewer() + : DOTGraphTraitsViewer("reg", ID) { initializeRegionViewerPass(*PassRegistry::getPassRegistry()); } }; char RegionViewer::ID = 0; struct RegionOnlyViewer - : public DOTGraphTraitsViewer { + : public DOTGraphTraitsViewer { static char ID; - RegionOnlyViewer() : DOTGraphTraitsViewer("regonly", ID) { + RegionOnlyViewer() + : DOTGraphTraitsViewer("regonly", ID) { initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry()); } }; char RegionOnlyViewer::ID = 0; -struct RegionPrinter - : public DOTGraphTraitsPrinter { - static char ID; - RegionPrinter() : - DOTGraphTraitsPrinter("reg", ID) { - initializeRegionPrinterPass(*PassRegistry::getPassRegistry()); - } -}; -char RegionPrinter::ID = 0; } //end anonymous namespace INITIALIZE_PASS(RegionPrinter, "dot-regions", "Print regions of function to 'dot' file", true, true) +INITIALIZE_PASS( + RegionOnlyPrinter, "dot-regions-only", + "Print regions of function to 'dot' file (with no function bodies)", true, + true) + INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function", true, true) @@ -183,25 +213,12 @@ INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only", "View regions of function (with no function bodies)", true, true) -namespace { - -struct RegionOnlyPrinter - : public DOTGraphTraitsPrinter { - static char ID; - RegionOnlyPrinter() : - DOTGraphTraitsPrinter("reg", ID) { - initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry()); - } -}; +FunctionPass *llvm::createRegionPrinterPass() { return new RegionPrinter(); } +FunctionPass *llvm::createRegionOnlyPrinterPass() { + return new RegionOnlyPrinter(); } -char RegionOnlyPrinter::ID = 0; -INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only", - "Print regions of function to 'dot' file " - "(with no function bodies)", - true, true) - FunctionPass* llvm::createRegionViewerPass() { return new RegionViewer(); } @@ -210,11 +227,41 @@ FunctionPass* llvm::createRegionOnlyViewerPass() { return new RegionOnlyViewer(); } -FunctionPass* llvm::createRegionPrinterPass() { - return new RegionPrinter(); +#ifndef NDEBUG +static void viewRegionInfo(RegionInfo *RI, bool ShortNames) { + assert(RI && "Argument must be non-null"); + + llvm::Function *F = RI->getTopLevelRegion()->getEntry()->getParent(); + std::string GraphName = DOTGraphTraits::getGraphName(RI); + + llvm::ViewGraph(RI, "reg", ShortNames, + Twine(GraphName) + " for '" + F->getName() + "' function"); } -FunctionPass* llvm::createRegionOnlyPrinterPass() { - return new RegionOnlyPrinter(); +static void invokeFunctionPass(const Function *F, FunctionPass *ViewerPass) { + assert(F && "Argument must be non-null"); + assert(!F->isDeclaration() && "Function must have an implementation"); + + // The viewer and analysis passes do not modify anything, so we can safely + // remove the const qualifier + auto NonConstF = const_cast(F); + + llvm::legacy::FunctionPassManager FPM(NonConstF->getParent()); + FPM.add(ViewerPass); + FPM.doInitialization(); + FPM.run(*NonConstF); + FPM.doFinalization(); } +void llvm::viewRegion(RegionInfo *RI) { viewRegionInfo(RI, false); } + +void llvm::viewRegion(const Function *F) { + invokeFunctionPass(F, createRegionViewerPass()); +} + +void llvm::viewRegionOnly(RegionInfo *RI) { viewRegionInfo(RI, true); } + +void llvm::viewRegionOnly(const Function *F) { + invokeFunctionPass(F, createRegionOnlyViewerPass()); +} +#endif diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 9c7c1754e387..34074efd1ceb 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -83,11 +83,13 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SaveAndRestore.h" #include using namespace llvm; @@ -114,16 +116,6 @@ static cl::opt VerifySCEV("verify-scev", cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); -INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", - "Scalar Evolution Analysis", false, true) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution", - "Scalar Evolution Analysis", false, true) -char ScalarEvolution::ID = 0; - //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// @@ -132,12 +124,11 @@ char ScalarEvolution::ID = 0; // Implementation of the SCEV class. // -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void SCEV::dump() const { print(dbgs()); dbgs() << '\n'; } -#endif void SCEV::print(raw_ostream &OS) const { switch (static_cast(getSCEVType())) { @@ -303,7 +294,7 @@ bool SCEV::isNonConstantNegative() const { if (!SC) return false; // Return true if the value is negative, this matches things like (-42 * V). - return SC->getValue()->getValue().isNegative(); + return SC->getAPInt().isNegative(); } SCEVCouldNotCompute::SCEVCouldNotCompute() : @@ -455,179 +446,179 @@ bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const { //===----------------------------------------------------------------------===// namespace { - /// SCEVComplexityCompare - Return true if the complexity of the LHS is less - /// than the complexity of the RHS. This comparator is used to canonicalize - /// expressions. - class SCEVComplexityCompare { - const LoopInfo *const LI; - public: - explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {} +/// SCEVComplexityCompare - Return true if the complexity of the LHS is less +/// than the complexity of the RHS. This comparator is used to canonicalize +/// expressions. +class SCEVComplexityCompare { + const LoopInfo *const LI; +public: + explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {} - // Return true or false if LHS is less than, or at least RHS, respectively. - bool operator()(const SCEV *LHS, const SCEV *RHS) const { - return compare(LHS, RHS) < 0; - } + // Return true or false if LHS is less than, or at least RHS, respectively. + bool operator()(const SCEV *LHS, const SCEV *RHS) const { + return compare(LHS, RHS) < 0; + } - // Return negative, zero, or positive, if LHS is less than, equal to, or - // greater than RHS, respectively. A three-way result allows recursive - // comparisons to be more efficient. - int compare(const SCEV *LHS, const SCEV *RHS) const { - // Fast-path: SCEVs are uniqued so we can do a quick equality check. - if (LHS == RHS) - return 0; + // Return negative, zero, or positive, if LHS is less than, equal to, or + // greater than RHS, respectively. A three-way result allows recursive + // comparisons to be more efficient. + int compare(const SCEV *LHS, const SCEV *RHS) const { + // Fast-path: SCEVs are uniqued so we can do a quick equality check. + if (LHS == RHS) + return 0; - // Primarily, sort the SCEVs by their getSCEVType(). - unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); - if (LType != RType) - return (int)LType - (int)RType; + // Primarily, sort the SCEVs by their getSCEVType(). + unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); + if (LType != RType) + return (int)LType - (int)RType; - // Aside from the getSCEVType() ordering, the particular ordering - // isn't very important except that it's beneficial to be consistent, - // so that (a + b) and (b + a) don't end up as different expressions. - switch (static_cast(LType)) { - case scUnknown: { - const SCEVUnknown *LU = cast(LHS); - const SCEVUnknown *RU = cast(RHS); + // Aside from the getSCEVType() ordering, the particular ordering + // isn't very important except that it's beneficial to be consistent, + // so that (a + b) and (b + a) don't end up as different expressions. + switch (static_cast(LType)) { + case scUnknown: { + const SCEVUnknown *LU = cast(LHS); + const SCEVUnknown *RU = cast(RHS); - // Sort SCEVUnknown values with some loose heuristics. TODO: This is - // not as complete as it could be. - const Value *LV = LU->getValue(), *RV = RU->getValue(); + // Sort SCEVUnknown values with some loose heuristics. TODO: This is + // not as complete as it could be. + const Value *LV = LU->getValue(), *RV = RU->getValue(); - // Order pointer values after integer values. This helps SCEVExpander - // form GEPs. - bool LIsPointer = LV->getType()->isPointerTy(), - RIsPointer = RV->getType()->isPointerTy(); - if (LIsPointer != RIsPointer) - return (int)LIsPointer - (int)RIsPointer; + // Order pointer values after integer values. This helps SCEVExpander + // form GEPs. + bool LIsPointer = LV->getType()->isPointerTy(), + RIsPointer = RV->getType()->isPointerTy(); + if (LIsPointer != RIsPointer) + return (int)LIsPointer - (int)RIsPointer; - // Compare getValueID values. - unsigned LID = LV->getValueID(), - RID = RV->getValueID(); - if (LID != RID) - return (int)LID - (int)RID; + // Compare getValueID values. + unsigned LID = LV->getValueID(), + RID = RV->getValueID(); + if (LID != RID) + return (int)LID - (int)RID; - // Sort arguments by their position. - if (const Argument *LA = dyn_cast(LV)) { - const Argument *RA = cast(RV); - unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); - return (int)LArgNo - (int)RArgNo; - } - - // For instructions, compare their loop depth, and their operand - // count. This is pretty loose. - if (const Instruction *LInst = dyn_cast(LV)) { - const Instruction *RInst = cast(RV); - - // Compare loop depths. - const BasicBlock *LParent = LInst->getParent(), - *RParent = RInst->getParent(); - if (LParent != RParent) { - unsigned LDepth = LI->getLoopDepth(LParent), - RDepth = LI->getLoopDepth(RParent); - if (LDepth != RDepth) - return (int)LDepth - (int)RDepth; - } - - // Compare the number of operands. - unsigned LNumOps = LInst->getNumOperands(), - RNumOps = RInst->getNumOperands(); - return (int)LNumOps - (int)RNumOps; - } - - return 0; + // Sort arguments by their position. + if (const Argument *LA = dyn_cast(LV)) { + const Argument *RA = cast(RV); + unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); + return (int)LArgNo - (int)RArgNo; } - case scConstant: { - const SCEVConstant *LC = cast(LHS); - const SCEVConstant *RC = cast(RHS); + // For instructions, compare their loop depth, and their operand + // count. This is pretty loose. + if (const Instruction *LInst = dyn_cast(LV)) { + const Instruction *RInst = cast(RV); - // Compare constant values. - const APInt &LA = LC->getValue()->getValue(); - const APInt &RA = RC->getValue()->getValue(); - unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); - if (LBitWidth != RBitWidth) - return (int)LBitWidth - (int)RBitWidth; - return LA.ult(RA) ? -1 : 1; - } - - case scAddRecExpr: { - const SCEVAddRecExpr *LA = cast(LHS); - const SCEVAddRecExpr *RA = cast(RHS); - - // Compare addrec loop depths. - const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); - if (LLoop != RLoop) { - unsigned LDepth = LLoop->getLoopDepth(), - RDepth = RLoop->getLoopDepth(); + // Compare loop depths. + const BasicBlock *LParent = LInst->getParent(), + *RParent = RInst->getParent(); + if (LParent != RParent) { + unsigned LDepth = LI->getLoopDepth(LParent), + RDepth = LI->getLoopDepth(RParent); if (LDepth != RDepth) return (int)LDepth - (int)RDepth; } - // Addrec complexity grows with operand count. - unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); - if (LNumOps != RNumOps) - return (int)LNumOps - (int)RNumOps; - - // Lexicographically compare. - for (unsigned i = 0; i != LNumOps; ++i) { - long X = compare(LA->getOperand(i), RA->getOperand(i)); - if (X != 0) - return X; - } - - return 0; - } - - case scAddExpr: - case scMulExpr: - case scSMaxExpr: - case scUMaxExpr: { - const SCEVNAryExpr *LC = cast(LHS); - const SCEVNAryExpr *RC = cast(RHS); - - // Lexicographically compare n-ary expressions. - unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); - if (LNumOps != RNumOps) - return (int)LNumOps - (int)RNumOps; - - for (unsigned i = 0; i != LNumOps; ++i) { - if (i >= RNumOps) - return 1; - long X = compare(LC->getOperand(i), RC->getOperand(i)); - if (X != 0) - return X; - } + // Compare the number of operands. + unsigned LNumOps = LInst->getNumOperands(), + RNumOps = RInst->getNumOperands(); return (int)LNumOps - (int)RNumOps; } - case scUDivExpr: { - const SCEVUDivExpr *LC = cast(LHS); - const SCEVUDivExpr *RC = cast(RHS); + return 0; + } - // Lexicographically compare udiv expressions. - long X = compare(LC->getLHS(), RC->getLHS()); + case scConstant: { + const SCEVConstant *LC = cast(LHS); + const SCEVConstant *RC = cast(RHS); + + // Compare constant values. + const APInt &LA = LC->getAPInt(); + const APInt &RA = RC->getAPInt(); + unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); + if (LBitWidth != RBitWidth) + return (int)LBitWidth - (int)RBitWidth; + return LA.ult(RA) ? -1 : 1; + } + + case scAddRecExpr: { + const SCEVAddRecExpr *LA = cast(LHS); + const SCEVAddRecExpr *RA = cast(RHS); + + // Compare addrec loop depths. + const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); + if (LLoop != RLoop) { + unsigned LDepth = LLoop->getLoopDepth(), + RDepth = RLoop->getLoopDepth(); + if (LDepth != RDepth) + return (int)LDepth - (int)RDepth; + } + + // Addrec complexity grows with operand count. + unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); + if (LNumOps != RNumOps) + return (int)LNumOps - (int)RNumOps; + + // Lexicographically compare. + for (unsigned i = 0; i != LNumOps; ++i) { + long X = compare(LA->getOperand(i), RA->getOperand(i)); if (X != 0) return X; - return compare(LC->getRHS(), RC->getRHS()); } - case scTruncate: - case scZeroExtend: - case scSignExtend: { - const SCEVCastExpr *LC = cast(LHS); - const SCEVCastExpr *RC = cast(RHS); - - // Compare cast expressions by operand. - return compare(LC->getOperand(), RC->getOperand()); - } - - case scCouldNotCompute: - llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - } - llvm_unreachable("Unknown SCEV kind!"); + return 0; } - }; -} + + case scAddExpr: + case scMulExpr: + case scSMaxExpr: + case scUMaxExpr: { + const SCEVNAryExpr *LC = cast(LHS); + const SCEVNAryExpr *RC = cast(RHS); + + // Lexicographically compare n-ary expressions. + unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); + if (LNumOps != RNumOps) + return (int)LNumOps - (int)RNumOps; + + for (unsigned i = 0; i != LNumOps; ++i) { + if (i >= RNumOps) + return 1; + long X = compare(LC->getOperand(i), RC->getOperand(i)); + if (X != 0) + return X; + } + return (int)LNumOps - (int)RNumOps; + } + + case scUDivExpr: { + const SCEVUDivExpr *LC = cast(LHS); + const SCEVUDivExpr *RC = cast(RHS); + + // Lexicographically compare udiv expressions. + long X = compare(LC->getLHS(), RC->getLHS()); + if (X != 0) + return X; + return compare(LC->getRHS(), RC->getRHS()); + } + + case scTruncate: + case scZeroExtend: + case scSignExtend: { + const SCEVCastExpr *LC = cast(LHS); + const SCEVCastExpr *RC = cast(RHS); + + // Compare cast expressions by operand. + return compare(LC->getOperand(), RC->getOperand()); + } + + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + } + llvm_unreachable("Unknown SCEV kind!"); + } +}; +} // end anonymous namespace /// GroupByComplexity - Given a list of SCEV objects, order them by their /// complexity, and group objects of the same complexity together by value. @@ -675,24 +666,22 @@ static void GroupByComplexity(SmallVectorImpl &Ops, } } -namespace { -struct FindSCEVSize { - int Size; - FindSCEVSize() : Size(0) {} - - bool follow(const SCEV *S) { - ++Size; - // Keep looking at all operands of S. - return true; - } - bool isDone() const { - return false; - } -}; -} - // Returns the size of the SCEV S. static inline int sizeOfSCEV(const SCEV *S) { + struct FindSCEVSize { + int Size; + FindSCEVSize() : Size(0) {} + + bool follow(const SCEV *S) { + ++Size; + // Keep looking at all operands of S. + return true; + } + bool isDone() const { + return false; + } + }; + FindSCEVSize F; SCEVTraversal ST(F); ST.visitAll(S); @@ -771,8 +760,8 @@ public: void visitConstant(const SCEVConstant *Numerator) { if (const SCEVConstant *D = dyn_cast(Denominator)) { - APInt NumeratorVal = Numerator->getValue()->getValue(); - APInt DenominatorVal = D->getValue()->getValue(); + APInt NumeratorVal = Numerator->getAPInt(); + APInt DenominatorVal = D->getAPInt(); uint32_t NumeratorBW = NumeratorVal.getBitWidth(); uint32_t DenominatorBW = DenominatorVal.getBitWidth(); @@ -792,17 +781,15 @@ public: void visitAddRecExpr(const SCEVAddRecExpr *Numerator) { const SCEV *StartQ, *StartR, *StepQ, *StepR; - assert(Numerator->isAffine() && "Numerator should be affine"); + if (!Numerator->isAffine()) + return cannotDivide(Numerator); divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR); divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR); // Bail out if the types do not match. Type *Ty = Denominator->getType(); if (Ty != StartQ->getType() || Ty != StartR->getType() || - Ty != StepQ->getType() || Ty != StepR->getType()) { - Quotient = Zero; - Remainder = Numerator; - return; - } + Ty != StepQ->getType() || Ty != StepR->getType()) + return cannotDivide(Numerator); Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(), Numerator->getNoWrapFlags()); Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), @@ -818,11 +805,8 @@ public: divide(SE, Op, Denominator, &Q, &R); // Bail out if types do not match. - if (Ty != Q->getType() || Ty != R->getType()) { - Quotient = Zero; - Remainder = Numerator; - return; - } + if (Ty != Q->getType() || Ty != R->getType()) + return cannotDivide(Numerator); Qs.push_back(Q); Rs.push_back(R); @@ -845,11 +829,8 @@ public: bool FoundDenominatorTerm = false; for (const SCEV *Op : Numerator->operands()) { // Bail out if types do not match. - if (Ty != Op->getType()) { - Quotient = Zero; - Remainder = Numerator; - return; - } + if (Ty != Op->getType()) + return cannotDivide(Numerator); if (FoundDenominatorTerm) { Qs.push_back(Op); @@ -865,11 +846,8 @@ public: } // Bail out if types do not match. - if (Ty != Q->getType()) { - Quotient = Zero; - Remainder = Numerator; - return; - } + if (Ty != Q->getType()) + return cannotDivide(Numerator); FoundDenominatorTerm = true; Qs.push_back(Q); @@ -884,11 +862,8 @@ public: return; } - if (!isa(Denominator)) { - Quotient = Zero; - Remainder = Numerator; - return; - } + if (!isa(Denominator)) + return cannotDivide(Numerator); // The Remainder is obtained by replacing Denominator by 0 in Numerator. ValueToValueMap RewriteMap; @@ -908,15 +883,12 @@ public: // Quotient is (Numerator - Remainder) divided by Denominator. const SCEV *Q, *R; const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder); - if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) { - // This SCEV does not seem to simplify: fail the division here. - Quotient = Zero; - Remainder = Numerator; - return; - } + // This SCEV does not seem to simplify: fail the division here. + if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) + return cannotDivide(Numerator); divide(SE, Diff, Denominator, &Q, &R); - assert(R == Zero && - "(Numerator - Remainder) should evenly divide Denominator"); + if (R != Zero) + return cannotDivide(Numerator); Quotient = Q; } @@ -924,11 +896,18 @@ private: SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, const SCEV *Denominator) : SE(S), Denominator(Denominator) { - Zero = SE.getConstant(Denominator->getType(), 0); - One = SE.getConstant(Denominator->getType(), 1); + Zero = SE.getZero(Denominator->getType()); + One = SE.getOne(Denominator->getType()); - // By default, we don't know how to divide Expr by Denominator. - // Providing the default here simplifies the rest of the code. + // We generally do not know how to divide Expr by Denominator. We + // initialize the division to a "cannot divide" state to simplify the rest + // of the code. + cannotDivide(Numerator); + } + + // Convenience function for giving up on the division. We set the quotient to + // be equal to zero and the remainder to be equal to the numerator. + void cannotDivide(const SCEV *Numerator) { Quotient = Zero; Remainder = Numerator; } @@ -1151,8 +1130,8 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, // If the input value is a chrec scev, truncate the chrec's operands. if (const SCEVAddRecExpr *AddRec = dyn_cast(Op)) { SmallVector Operands; - for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) - Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty)); + for (const SCEV *Op : AddRec->operands()) + Operands.push_back(getTruncateExpr(Op, Ty)); return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); } @@ -1287,7 +1266,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, // `Step`: // 1. NSW/NUW flags on the step increment. - const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags()); + auto PreStartFlags = + ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW); + const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags); const SCEVAddRecExpr *PreAR = dyn_cast( SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); @@ -1322,9 +1303,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, ExtendOpTraits::getOverflowLimitForStep(Step, &Pred, SE); if (OverflowLimit && - SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { + SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) return PreStart; - } + return nullptr; } @@ -1390,24 +1371,22 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, if (!StartC) return false; - APInt StartAI = StartC->getValue()->getValue(); + APInt StartAI = StartC->getAPInt(); for (unsigned Delta : {-2, -1, 1, 2}) { const SCEV *PreStart = getConstant(StartAI - Delta); + FoldingSetNodeID ID; + ID.AddInteger(scAddRecExpr); + ID.AddPointer(PreStart); + ID.AddPointer(Step); + ID.AddPointer(L); + void *IP = nullptr; + const auto *PreAR = + static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + // Give up if we don't already have the add recurrence we need because // actually constructing an add recurrence is relatively expensive. - const SCEVAddRecExpr *PreAR = [&]() { - FoldingSetNodeID ID; - ID.AddInteger(scAddRecExpr); - ID.AddPointer(PreStart); - ID.AddPointer(Step); - ID.AddPointer(L); - void *IP = nullptr; - return static_cast( - this->UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); - }(); - if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2) const SCEV *DeltaS = getConstant(StartC->getType(), Delta); ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; @@ -1578,6 +1557,18 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, } } + if (auto *SA = dyn_cast(Op)) { + // zext((A + B + ...)) --> (zext(A) + zext(B) + ...) + if (SA->getNoWrapFlags(SCEV::FlagNUW)) { + // If the addition does not unsign overflow then we can, by definition, + // commute the zero extension with the addition operation. + SmallVector Ops; + for (const auto *Op : SA->operands()) + Ops.push_back(getZeroExtendExpr(Op, Ty)); + return getAddExpr(Ops, SCEV::FlagNUW); + } + } + // The cast wasn't folded; create an explicit cast node. // Recompute the insert position, as it may have been invalidated. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; @@ -1635,14 +1626,14 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, } // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2 - if (auto SA = dyn_cast(Op)) { + if (auto *SA = dyn_cast(Op)) { if (SA->getNumOperands() == 2) { - auto SC1 = dyn_cast(SA->getOperand(0)); - auto SMul = dyn_cast(SA->getOperand(1)); + auto *SC1 = dyn_cast(SA->getOperand(0)); + auto *SMul = dyn_cast(SA->getOperand(1)); if (SMul && SC1) { - if (auto SC2 = dyn_cast(SMul->getOperand(0))) { - const APInt &C1 = SC1->getValue()->getValue(); - const APInt &C2 = SC2->getValue()->getValue(); + if (auto *SC2 = dyn_cast(SMul->getOperand(0))) { + const APInt &C1 = SC1->getAPInt(); + const APInt &C2 = SC2->getAPInt(); if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && C2.isPowerOf2()) return getAddExpr(getSignExtendExpr(SC1, Ty), @@ -1650,6 +1641,16 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, } } } + + // sext((A + B + ...)) --> (sext(A) + sext(B) + ...) + if (SA->getNoWrapFlags(SCEV::FlagNSW)) { + // If the addition does not sign overflow then we can, by definition, + // commute the sign extension with the addition operation. + SmallVector Ops; + for (const auto *Op : SA->operands()) + Ops.push_back(getSignExtendExpr(Op, Ty)); + return getAddExpr(Ops, SCEV::FlagNSW); + } } // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can sign extend all of the @@ -1754,16 +1755,16 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // If Start and Step are constants, check if we can apply this // transformation: // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2 - auto SC1 = dyn_cast(Start); - auto SC2 = dyn_cast(Step); + auto *SC1 = dyn_cast(Start); + auto *SC2 = dyn_cast(Step); if (SC1 && SC2) { - const APInt &C1 = SC1->getValue()->getValue(); - const APInt &C2 = SC2->getValue()->getValue(); + const APInt &C1 = SC1->getAPInt(); + const APInt &C2 = SC2->getAPInt(); if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && C2.isPowerOf2()) { Start = getSignExtendExpr(Start, Ty); - const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step, - L, AR->getNoWrapFlags()); + const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L, + AR->getNoWrapFlags()); return getAddExpr(Start, getSignExtendExpr(NewAR, Ty)); } } @@ -1798,7 +1799,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, // Sign-extend negative constants. if (const SCEVConstant *SC = dyn_cast(Op)) - if (SC->getValue()->getValue().isNegative()) + if (SC->getAPInt().isNegative()) return getSignExtendExpr(Op, Ty); // Peel off a truncate cast. @@ -1876,7 +1877,7 @@ CollectAddOperandsWithScales(DenseMap &M, // Pull a buried constant out to the outside. if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero()) Interesting = true; - AccumulatedConstant += Scale * C->getValue()->getValue(); + AccumulatedConstant += Scale * C->getAPInt(); } // Next comes everything else. We're especially interested in multiplies @@ -1885,7 +1886,7 @@ CollectAddOperandsWithScales(DenseMap &M, const SCEVMulExpr *Mul = dyn_cast(Ops[i]); if (Mul && isa(Mul->getOperand(0))) { APInt NewScale = - Scale * cast(Mul->getOperand(0))->getValue()->getValue(); + Scale * cast(Mul->getOperand(0))->getAPInt(); if (Mul->getNumOperands() == 2 && isa(Mul->getOperand(1))) { // A multiplication of a constant with another add; recurse. const SCEVAddExpr *Add = cast(Mul->getOperand(1)); @@ -1898,8 +1899,7 @@ CollectAddOperandsWithScales(DenseMap &M, // the map. SmallVector MulOps(Mul->op_begin()+1, Mul->op_end()); const SCEV *Key = SE.getMulExpr(MulOps); - std::pair::iterator, bool> Pair = - M.insert(std::make_pair(Key, NewScale)); + auto Pair = M.insert(std::make_pair(Key, NewScale)); if (Pair.second) { NewOps.push_back(Pair.first->first); } else { @@ -1927,22 +1927,15 @@ CollectAddOperandsWithScales(DenseMap &M, return Interesting; } -namespace { - struct APIntCompare { - bool operator()(const APInt &LHS, const APInt &RHS) const { - return LHS.ult(RHS); - } - }; -} - // We're trying to construct a SCEV of type `Type' with `Ops' as operands and // `OldFlags' as can't-wrap behavior. Infer a more aggressive set of // can't-overflow flags for the operation if possible. static SCEV::NoWrapFlags StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, const SmallVectorImpl &Ops, - SCEV::NoWrapFlags OldFlags) { + SCEV::NoWrapFlags Flags) { using namespace std::placeholders; + typedef OverflowingBinaryOperator OBO; bool CanAnalyze = Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr; @@ -1951,18 +1944,42 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; SCEV::NoWrapFlags SignOrUnsignWrap = - ScalarEvolution::maskFlags(OldFlags, SignOrUnsignMask); + ScalarEvolution::maskFlags(Flags, SignOrUnsignMask); // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. - auto IsKnownNonNegative = - std::bind(std::mem_fn(&ScalarEvolution::isKnownNonNegative), SE, _1); + auto IsKnownNonNegative = [&](const SCEV *S) { + return SE->isKnownNonNegative(S); + }; - if (SignOrUnsignWrap == SCEV::FlagNSW && - std::all_of(Ops.begin(), Ops.end(), IsKnownNonNegative)) - return ScalarEvolution::setFlags(OldFlags, - (SCEV::NoWrapFlags)SignOrUnsignMask); + if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative)) + Flags = + ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); - return OldFlags; + SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask); + + if (SignOrUnsignWrap != SignOrUnsignMask && Type == scAddExpr && + Ops.size() == 2 && isa(Ops[0])) { + + // (A + C) --> (A + C) if the addition does not sign overflow + // (A + C) --> (A + C) if the addition does not unsign overflow + + const APInt &C = cast(Ops[0])->getAPInt(); + if (!(SignOrUnsignWrap & SCEV::FlagNSW)) { + auto NSWRegion = + ConstantRange::makeNoWrapRegion(Instruction::Add, C, OBO::NoSignedWrap); + if (NSWRegion.contains(SE->getSignedRange(Ops[1]))) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); + } + if (!(SignOrUnsignWrap & SCEV::FlagNUW)) { + auto NUWRegion = + ConstantRange::makeNoWrapRegion(Instruction::Add, C, + OBO::NoUnsignedWrap); + if (NUWRegion.contains(SE->getUnsignedRange(Ops[1]))) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); + } + } + + return Flags; } /// getAddExpr - Get a canonical add expression, or something simpler if @@ -1980,10 +1997,10 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, "SCEVAddExpr operand types don't match!"); #endif - Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags); - // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, LI); + GroupByComplexity(Ops, &LI); + + Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags); // If there are any constants, fold them together. unsigned Idx = 0; @@ -1992,8 +2009,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, assert(Idx < Ops.size()); while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { // We found two constants, fold them together! - Ops[0] = getConstant(LHSC->getValue()->getValue() + - RHSC->getValue()->getValue()); + Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt()); if (Ops.size() == 2) return Ops[0]; Ops.erase(Ops.begin()+1); // Erase the folded element LHSC = cast(Ops[0]); @@ -2063,8 +2079,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, break; } LargeMulOps.push_back(T->getOperand()); - } else if (const SCEVConstant *C = - dyn_cast(M->getOperand(j))) { + } else if (const auto *C = dyn_cast(M->getOperand(j))) { LargeMulOps.push_back(getAnyExtendExpr(C, SrcType)); } else { Ok = false; @@ -2123,24 +2138,28 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, Ops.data(), Ops.size(), APInt(BitWidth, 1), *this)) { + struct APIntCompare { + bool operator()(const APInt &LHS, const APInt &RHS) const { + return LHS.ult(RHS); + } + }; + // Some interesting folding opportunity is present, so its worthwhile to // re-generate the operands list. Group the operands by constant scale, // to avoid multiplying by the same constant scale multiple times. std::map, APIntCompare> MulOpLists; - for (SmallVectorImpl::const_iterator I = NewOps.begin(), - E = NewOps.end(); I != E; ++I) - MulOpLists[M.find(*I)->second].push_back(*I); + for (const SCEV *NewOp : NewOps) + MulOpLists[M.find(NewOp)->second].push_back(NewOp); // Re-generate the operands list. Ops.clear(); if (AccumulatedConstant != 0) Ops.push_back(getConstant(AccumulatedConstant)); - for (std::map, APIntCompare>::iterator - I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I) - if (I->first != 0) - Ops.push_back(getMulExpr(getConstant(I->first), - getAddExpr(I->second))); + for (auto &MulOp : MulOpLists) + if (MulOp.first != 0) + Ops.push_back(getMulExpr(getConstant(MulOp.first), + getAddExpr(MulOp.second))); if (Ops.empty()) - return getConstant(Ty, 0); + return getZero(Ty); if (Ops.size() == 1) return Ops[0]; return getAddExpr(Ops); @@ -2168,7 +2187,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); InnerMul = getMulExpr(MulOps); } - const SCEV *One = getConstant(Ty, 1); + const SCEV *One = getOne(Ty); const SCEV *AddOne = getAddExpr(One, InnerMul); const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV); if (Ops.size() == 2) return OuterMul; @@ -2279,8 +2298,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, AddRec->op_end()); for (; OtherIdx != Ops.size() && isa(Ops[OtherIdx]); ++OtherIdx) - if (const SCEVAddRecExpr *OtherAddRec = - dyn_cast(Ops[OtherIdx])) + if (const auto *OtherAddRec = dyn_cast(Ops[OtherIdx])) if (OtherAddRec->getLoop() == AddRecLoop) { for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) { @@ -2388,10 +2406,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, "SCEVMulExpr operand types don't match!"); #endif - Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); - // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, LI); + GroupByComplexity(Ops, &LI); + + Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); // If there are any constants, fold them together. unsigned Idx = 0; @@ -2410,9 +2428,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, ++Idx; while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get(getContext(), - LHSC->getValue()->getValue() * - RHSC->getValue()->getValue()); + ConstantInt *Fold = + ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt()); Ops[0] = getConstant(Fold); Ops.erase(Ops.begin()+1); // Erase the folded element if (Ops.size() == 1) return Ops[0]; @@ -2433,23 +2450,19 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, if (const SCEVAddExpr *Add = dyn_cast(Ops[1])) { SmallVector NewOps; bool AnyFolded = false; - for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), - E = Add->op_end(); I != E; ++I) { - const SCEV *Mul = getMulExpr(Ops[0], *I); + for (const SCEV *AddOp : Add->operands()) { + const SCEV *Mul = getMulExpr(Ops[0], AddOp); if (!isa(Mul)) AnyFolded = true; NewOps.push_back(Mul); } if (AnyFolded) return getAddExpr(NewOps); - } - else if (const SCEVAddRecExpr * - AddRec = dyn_cast(Ops[1])) { + } else if (const auto *AddRec = dyn_cast(Ops[1])) { // Negation preserves a recurrence's no self-wrap property. SmallVector Operands; - for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(), - E = AddRec->op_end(); I != E; ++I) { - Operands.push_back(getMulExpr(Ops[0], *I)); - } + for (const SCEV *AddRecOp : AddRec->operands()) + Operands.push_back(getMulExpr(Ops[0], AddRecOp)); + return getAddRecExpr(Operands, AddRec->getLoop(), AddRec->getNoWrapFlags(SCEV::FlagNW)); } @@ -2560,7 +2573,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, SmallVector AddRecOps; for (int x = 0, xe = AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) { - const SCEV *Term = getConstant(Ty, 0); + const SCEV *Term = getZero(Ty); for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), @@ -2638,11 +2651,11 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, // its operands. // TODO: Generalize this to non-constants by using known-bits information. Type *Ty = LHS->getType(); - unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros(); + unsigned LZ = RHSC->getAPInt().countLeadingZeros(); unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1; // For non-power-of-two values, effectively round the value up to the // nearest power of two. - if (!RHSC->getValue()->getValue().isPowerOf2()) + if (!RHSC->getAPInt().isPowerOf2()) ++MaxShiftAmt; IntegerType *ExtTy = IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); @@ -2650,18 +2663,17 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, if (const SCEVConstant *Step = dyn_cast(AR->getStepRecurrence(*this))) { // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. - const APInt &StepInt = Step->getValue()->getValue(); - const APInt &DivInt = RHSC->getValue()->getValue(); + const APInt &StepInt = Step->getAPInt(); + const APInt &DivInt = RHSC->getAPInt(); if (!StepInt.urem(DivInt) && getZeroExtendExpr(AR, ExtTy) == getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), getZeroExtendExpr(Step, ExtTy), AR->getLoop(), SCEV::FlagAnyWrap)) { SmallVector Operands; - for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i) - Operands.push_back(getUDivExpr(AR->getOperand(i), RHS)); - return getAddRecExpr(Operands, AR->getLoop(), - SCEV::FlagNW); + for (const SCEV *Op : AR->operands()) + Operands.push_back(getUDivExpr(Op, RHS)); + return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW); } /// Get a canonical UDivExpr for a recurrence. /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0. @@ -2672,7 +2684,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), getZeroExtendExpr(Step, ExtTy), AR->getLoop(), SCEV::FlagAnyWrap)) { - const APInt &StartInt = StartC->getValue()->getValue(); + const APInt &StartInt = StartC->getAPInt(); const APInt &StartRem = StartInt.urem(StepInt); if (StartRem != 0) LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step, @@ -2682,8 +2694,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, // (A*B)/C --> A*(B/C) if safe and B/C can be folded. if (const SCEVMulExpr *M = dyn_cast(LHS)) { SmallVector Operands; - for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) - Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy)); + for (const SCEV *Op : M->operands()) + Operands.push_back(getZeroExtendExpr(Op, ExtTy)); if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) // Find an operand that's safely divisible. for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { @@ -2700,8 +2712,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. if (const SCEVAddExpr *A = dyn_cast(LHS)) { SmallVector Operands; - for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) - Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); + for (const SCEV *Op : A->operands()) + Operands.push_back(getZeroExtendExpr(Op, ExtTy)); if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { Operands.clear(); for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { @@ -2739,8 +2751,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, } static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { - APInt A = C1->getValue()->getValue().abs(); - APInt B = C2->getValue()->getValue().abs(); + APInt A = C1->getAPInt().abs(); + APInt B = C2->getAPInt().abs(); uint32_t ABW = A.getBitWidth(); uint32_t BBW = B.getBitWidth(); @@ -2769,8 +2781,7 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS, if (const SCEVConstant *RHSCst = dyn_cast(RHS)) { // If the mulexpr multiplies by a constant, then that constant must be the // first element of the mulexpr. - if (const SCEVConstant *LHSCst = - dyn_cast(Mul->getOperand(0))) { + if (const auto *LHSCst = dyn_cast(Mul->getOperand(0))) { if (LHSCst == RHSCst) { SmallVector Operands; Operands.append(Mul->op_begin() + 1, Mul->op_end()); @@ -2782,10 +2793,10 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS, // check. APInt Factor = gcd(LHSCst, RHSCst); if (!Factor.isIntN(1)) { - LHSCst = cast( - getConstant(LHSCst->getValue()->getValue().udiv(Factor))); - RHSCst = cast( - getConstant(RHSCst->getValue()->getValue().udiv(Factor))); + LHSCst = + cast(getConstant(LHSCst->getAPInt().udiv(Factor))); + RHSCst = + cast(getConstant(RHSCst->getAPInt().udiv(Factor))); SmallVector Operands; Operands.push_back(LHSCst); Operands.append(Mul->op_begin() + 1, Mul->op_end()); @@ -2859,22 +2870,19 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, // Canonicalize nested AddRecs in by nesting them in order of loop depth. if (const SCEVAddRecExpr *NestedAR = dyn_cast(Operands[0])) { const Loop *NestedLoop = NestedAR->getLoop(); - if (L->contains(NestedLoop) ? - (L->getLoopDepth() < NestedLoop->getLoopDepth()) : - (!NestedLoop->contains(L) && - DT->dominates(L->getHeader(), NestedLoop->getHeader()))) { + if (L->contains(NestedLoop) + ? (L->getLoopDepth() < NestedLoop->getLoopDepth()) + : (!NestedLoop->contains(L) && + DT.dominates(L->getHeader(), NestedLoop->getHeader()))) { SmallVector NestedOperands(NestedAR->op_begin(), NestedAR->op_end()); Operands[0] = NestedAR->getStart(); // AddRecs require their operands be loop-invariant with respect to their // loops. Don't perform this transformation if it would break this // requirement. - bool AllInvariant = true; - for (unsigned i = 0, e = Operands.size(); i != e; ++i) - if (!isLoopInvariant(Operands[i], L)) { - AllInvariant = false; - break; - } + bool AllInvariant = all_of( + Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); }); + if (AllInvariant) { // Create a recurrence for the outer loop with the same step size. // @@ -2884,12 +2892,10 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags()); NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags); - AllInvariant = true; - for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i) - if (!isLoopInvariant(NestedOperands[i], NestedLoop)) { - AllInvariant = false; - break; - } + AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) { + return isLoopInvariant(Op, NestedLoop); + }); + if (AllInvariant) { // Ok, both add recurrences are valid after the transformation. // @@ -2936,10 +2942,11 @@ ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr, // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP // instruction to its SCEV, because the Instruction may be guarded by control // flow and the no-overflow bits may not be valid for the expression in any - // context. + // context. This can be fixed similarly to how these flags are handled for + // adds. SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap; - const SCEV *TotalOffset = getConstant(IntPtrTy, 0); + const SCEV *TotalOffset = getZero(IntPtrTy); // The address space is unimportant. The first thing we do on CurTy is getting // its element type. Type *CurTy = PointerType::getUnqual(PointeeType); @@ -2996,7 +3003,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { #endif // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, LI); + GroupByComplexity(Ops, &LI); // If there are any constants, fold them together. unsigned Idx = 0; @@ -3005,9 +3012,8 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { assert(Idx < Ops.size()); while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get(getContext(), - APIntOps::smax(LHSC->getValue()->getValue(), - RHSC->getValue()->getValue())); + ConstantInt *Fold = ConstantInt::get( + getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt())); Ops[0] = getConstant(Fold); Ops.erase(Ops.begin()+1); // Erase the folded element if (Ops.size() == 1) return Ops[0]; @@ -3100,7 +3106,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { #endif // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, LI); + GroupByComplexity(Ops, &LI); // If there are any constants, fold them together. unsigned Idx = 0; @@ -3109,9 +3115,8 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { assert(Idx < Ops.size()); while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get(getContext(), - APIntOps::umax(LHSC->getValue()->getValue(), - RHSC->getValue()->getValue())); + ConstantInt *Fold = ConstantInt::get( + getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt())); Ops[0] = getConstant(Fold); Ops.erase(Ops.begin()+1); // Erase the folded element if (Ops.size() == 1) return Ops[0]; @@ -3200,8 +3205,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { // We can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. - return getConstant(IntTy, - F->getParent()->getDataLayout().getTypeAllocSize(AllocTy)); + return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy)); } const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, @@ -3211,9 +3215,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. return getConstant( - IntTy, - F->getParent()->getDataLayout().getStructLayout(STy)->getElementOffset( - FieldNo)); + IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo)); } const SCEV *ScalarEvolution::getUnknown(Value *V) { @@ -3255,7 +3257,7 @@ bool ScalarEvolution::isSCEVable(Type *Ty) const { /// for which isSCEVable must return true. uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); - return F->getParent()->getDataLayout().getTypeSizeInBits(Ty); + return getDataLayout().getTypeSizeInBits(Ty); } /// getEffectiveSCEVType - Return a type with the same bitwidth as @@ -3265,20 +3267,20 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); - if (Ty->isIntegerTy()) { + if (Ty->isIntegerTy()) return Ty; - } // The only other support type is pointer. assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); - return F->getParent()->getDataLayout().getIntPtrType(Ty); + return getDataLayout().getIntPtrType(Ty); } const SCEV *ScalarEvolution::getCouldNotCompute() { - return &CouldNotCompute; + return CouldNotCompute.get(); } -namespace { + +bool ScalarEvolution::checkValidity(const SCEV *S) const { // Helper class working with SCEVTraversal to figure out if a SCEV contains // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne // is set iff if find such SCEVUnknown. @@ -3300,9 +3302,7 @@ namespace { } bool isDone() const { return FindOne; } }; -} -bool ScalarEvolution::checkValidity(const SCEV *S) const { FindInvalidSCEVUnknown F; SCEVTraversal ST(F); ST.visitAll(S); @@ -3315,35 +3315,39 @@ bool ScalarEvolution::checkValidity(const SCEV *S) const { const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); + const SCEV *S = getExistingSCEV(V); + if (S == nullptr) { + S = createSCEV(V); + ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); + } + return S; +} + +const SCEV *ScalarEvolution::getExistingSCEV(Value *V) { + assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); + ValueExprMapType::iterator I = ValueExprMap.find_as(V); if (I != ValueExprMap.end()) { const SCEV *S = I->second; if (checkValidity(S)) return S; - else - ValueExprMap.erase(I); + ValueExprMap.erase(I); } - const SCEV *S = createSCEV(V); - - // The process of creating a SCEV for V may have caused other SCEVs - // to have been created, so it's necessary to insert the new entry - // from scratch, rather than trying to remember the insert position - // above. - ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); - return S; + return nullptr; } /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V /// -const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) { +const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V, + SCEV::NoWrapFlags Flags) { if (const SCEVConstant *VC = dyn_cast(V)) return getConstant( cast(ConstantExpr::getNeg(VC->getValue()))); Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); - return getMulExpr(V, - getConstant(cast(Constant::getAllOnesValue(Ty)))); + return getMulExpr( + V, getConstant(cast(Constant::getAllOnesValue(Ty))), Flags); } /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V @@ -3362,15 +3366,40 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { /// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1. const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags) { - assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW"); - // Fast path: X - X --> 0. if (LHS == RHS) - return getConstant(LHS->getType(), 0); + return getZero(LHS->getType()); - // X - Y --> X + -Y. - // X -(nsw || nuw) Y --> X + -Y. - return getAddExpr(LHS, getNegativeSCEV(RHS)); + // We represent LHS - RHS as LHS + (-1)*RHS. This transformation + // makes it so that we cannot make much use of NUW. + auto AddFlags = SCEV::FlagAnyWrap; + const bool RHSIsNotMinSigned = + !getSignedRange(RHS).getSignedMin().isMinSignedValue(); + if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) { + // Let M be the minimum representable signed value. Then (-1)*RHS + // signed-wraps if and only if RHS is M. That can happen even for + // a NSW subtraction because e.g. (-1)*M signed-wraps even though + // -1 - M does not. So to transfer NSW from LHS - RHS to LHS + + // (-1)*RHS, we need to prove that RHS != M. + // + // If LHS is non-negative and we know that LHS - RHS does not + // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap + // either by proving that RHS > M or that LHS >= 0. + if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) { + AddFlags = SCEV::FlagNSW; + } + } + + // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS - + // RHS is NSW and LHS >= 0. + // + // The difficulty here is that the NSW flag may have been proven + // relative to a loop that is to be found in a recurrence in LHS and + // not in RHS. Applying NSW to (-1)*M may then let the NSW have a + // larger scope than intended. + auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap; + + return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags); } /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the @@ -3513,16 +3542,14 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { if (const SCEVCastExpr *Cast = dyn_cast(V)) { return getPointerBase(Cast->getOperand()); - } - else if (const SCEVNAryExpr *NAry = dyn_cast(V)) { + } else if (const SCEVNAryExpr *NAry = dyn_cast(V)) { const SCEV *PtrOp = nullptr; - for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); - I != E; ++I) { - if ((*I)->getType()->isPointerTy()) { + for (const SCEV *NAryOp : NAry->operands()) { + if (NAryOp->getType()->isPointerTy()) { // Cannot find the base of an expression with multiple pointer operands. if (PtrOp) return V; - PtrOp = *I; + PtrOp = NAryOp; } } if (!PtrOp) @@ -3558,8 +3585,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { if (!Visited.insert(I).second) continue; - ValueExprMapType::iterator It = - ValueExprMap.find_as(static_cast(I)); + auto It = ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { const SCEV *Old = It->second; @@ -3587,165 +3613,476 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { } } -/// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in -/// a loop header, making it a potential recurrence, or it doesn't. -/// -const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { - if (const Loop *L = LI->getLoopFor(PN->getParent())) - if (L->getHeader() == PN->getParent()) { - // The loop may have multiple entrances or multiple exits; we can analyze - // this phi as an addrec if it has a unique entry value and a unique - // backedge value. - Value *BEValueV = nullptr, *StartValueV = nullptr; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *V = PN->getIncomingValue(i); - if (L->contains(PN->getIncomingBlock(i))) { - if (!BEValueV) { - BEValueV = V; - } else if (BEValueV != V) { - BEValueV = nullptr; +namespace { +class SCEVInitRewriter : public SCEVRewriteVisitor { +public: + static const SCEV *rewrite(const SCEV *Scev, const Loop *L, + ScalarEvolution &SE) { + SCEVInitRewriter Rewriter(L, SE); + const SCEV *Result = Rewriter.visit(Scev); + return Rewriter.isValid() ? Result : SE.getCouldNotCompute(); + } + + SCEVInitRewriter(const Loop *L, ScalarEvolution &SE) + : SCEVRewriteVisitor(SE), L(L), Valid(true) {} + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant)) + Valid = false; + return Expr; + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + // Only allow AddRecExprs for this loop. + if (Expr->getLoop() == L) + return Expr->getStart(); + Valid = false; + return Expr; + } + + bool isValid() { return Valid; } + +private: + const Loop *L; + bool Valid; +}; + +class SCEVShiftRewriter : public SCEVRewriteVisitor { +public: + static const SCEV *rewrite(const SCEV *Scev, const Loop *L, + ScalarEvolution &SE) { + SCEVShiftRewriter Rewriter(L, SE); + const SCEV *Result = Rewriter.visit(Scev); + return Rewriter.isValid() ? Result : SE.getCouldNotCompute(); + } + + SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE) + : SCEVRewriteVisitor(SE), L(L), Valid(true) {} + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + // Only allow AddRecExprs for this loop. + if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant)) + Valid = false; + return Expr; + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + if (Expr->getLoop() == L && Expr->isAffine()) + return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE)); + Valid = false; + return Expr; + } + bool isValid() { return Valid; } + +private: + const Loop *L; + bool Valid; +}; +} // end anonymous namespace + +const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { + const Loop *L = LI.getLoopFor(PN->getParent()); + if (!L || L->getHeader() != PN->getParent()) + return nullptr; + + // The loop may have multiple entrances or multiple exits; we can analyze + // this phi as an addrec if it has a unique entry value and a unique + // backedge value. + Value *BEValueV = nullptr, *StartValueV = nullptr; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + if (L->contains(PN->getIncomingBlock(i))) { + if (!BEValueV) { + BEValueV = V; + } else if (BEValueV != V) { + BEValueV = nullptr; + break; + } + } else if (!StartValueV) { + StartValueV = V; + } else if (StartValueV != V) { + StartValueV = nullptr; + break; + } + } + if (BEValueV && StartValueV) { + // While we are analyzing this PHI node, handle its value symbolically. + const SCEV *SymbolicName = getUnknown(PN); + assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && + "PHI node already processed?"); + ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); + + // Using this symbolic name for the PHI, analyze the value coming around + // the back-edge. + const SCEV *BEValue = getSCEV(BEValueV); + + // NOTE: If BEValue is loop invariant, we know that the PHI node just + // has a special value for the first iteration of the loop. + + // If the value coming around the backedge is an add with the symbolic + // value we just inserted, then we found a simple induction variable! + if (const SCEVAddExpr *Add = dyn_cast(BEValue)) { + // If there is a single occurrence of the symbolic value, replace it + // with a recurrence. + unsigned FoundIndex = Add->getNumOperands(); + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (Add->getOperand(i) == SymbolicName) + if (FoundIndex == e) { + FoundIndex = i; break; } - } else if (!StartValueV) { - StartValueV = V; - } else if (StartValueV != V) { - StartValueV = nullptr; - break; + + if (FoundIndex != Add->getNumOperands()) { + // Create an add with everything but the specified operand. + SmallVector Ops; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (i != FoundIndex) + Ops.push_back(Add->getOperand(i)); + const SCEV *Accum = getAddExpr(Ops); + + // This is not a valid addrec if the step amount is varying each + // loop iteration, but is not itself an addrec in this loop. + if (isLoopInvariant(Accum, L) || + (isa(Accum) && + cast(Accum)->getLoop() == L)) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; + + // If the increment doesn't overflow, then neither the addrec nor + // the post-increment will overflow. + if (const AddOperator *OBO = dyn_cast(BEValueV)) { + if (OBO->getOperand(0) == PN) { + if (OBO->hasNoUnsignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNUW); + if (OBO->hasNoSignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNSW); + } + } else if (GEPOperator *GEP = dyn_cast(BEValueV)) { + // If the increment is an inbounds GEP, then we know the address + // space cannot be wrapped around. We cannot make any guarantee + // about signed or unsigned overflow because pointers are + // unsigned but we may have a negative index from the base + // pointer. We can guarantee that no unsigned wrap occurs if the + // indices form a positive value. + if (GEP->isInBounds() && GEP->getOperand(0) == PN) { + Flags = setFlags(Flags, SCEV::FlagNW); + + const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); + if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) + Flags = setFlags(Flags, SCEV::FlagNUW); + } + + // We cannot transfer nuw and nsw flags from subtraction + // operations -- sub nuw X, Y is not the same as add nuw X, -Y + // for instance. + } + + const SCEV *StartVal = getSCEV(StartValueV); + const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); + + // Since the no-wrap flags are on the increment, they apply to the + // post-incremented value as well. + if (isLoopInvariant(Accum, L)) + (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); + + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + ForgetSymbolicName(PN, SymbolicName); + ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; + return PHISCEV; } } - if (BEValueV && StartValueV) { - // While we are analyzing this PHI node, handle its value symbolically. - const SCEV *SymbolicName = getUnknown(PN); - assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && - "PHI node already processed?"); - ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); - - // Using this symbolic name for the PHI, analyze the value coming around - // the back-edge. - const SCEV *BEValue = getSCEV(BEValueV); - - // NOTE: If BEValue is loop invariant, we know that the PHI node just - // has a special value for the first iteration of the loop. - - // If the value coming around the backedge is an add with the symbolic - // value we just inserted, then we found a simple induction variable! - if (const SCEVAddExpr *Add = dyn_cast(BEValue)) { - // If there is a single occurrence of the symbolic value, replace it - // with a recurrence. - unsigned FoundIndex = Add->getNumOperands(); - for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) - if (Add->getOperand(i) == SymbolicName) - if (FoundIndex == e) { - FoundIndex = i; - break; - } - - if (FoundIndex != Add->getNumOperands()) { - // Create an add with everything but the specified operand. - SmallVector Ops; - for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) - if (i != FoundIndex) - Ops.push_back(Add->getOperand(i)); - const SCEV *Accum = getAddExpr(Ops); - - // This is not a valid addrec if the step amount is varying each - // loop iteration, but is not itself an addrec in this loop. - if (isLoopInvariant(Accum, L) || - (isa(Accum) && - cast(Accum)->getLoop() == L)) { - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; - - // If the increment doesn't overflow, then neither the addrec nor - // the post-increment will overflow. - if (const AddOperator *OBO = dyn_cast(BEValueV)) { - if (OBO->getOperand(0) == PN) { - if (OBO->hasNoUnsignedWrap()) - Flags = setFlags(Flags, SCEV::FlagNUW); - if (OBO->hasNoSignedWrap()) - Flags = setFlags(Flags, SCEV::FlagNSW); - } - } else if (GEPOperator *GEP = dyn_cast(BEValueV)) { - // If the increment is an inbounds GEP, then we know the address - // space cannot be wrapped around. We cannot make any guarantee - // about signed or unsigned overflow because pointers are - // unsigned but we may have a negative index from the base - // pointer. We can guarantee that no unsigned wrap occurs if the - // indices form a positive value. - if (GEP->isInBounds() && GEP->getOperand(0) == PN) { - Flags = setFlags(Flags, SCEV::FlagNW); - - const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); - if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) - Flags = setFlags(Flags, SCEV::FlagNUW); - } - - // We cannot transfer nuw and nsw flags from subtraction - // operations -- sub nuw X, Y is not the same as add nuw X, -Y - // for instance. - } - - const SCEV *StartVal = getSCEV(StartValueV); - const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); - - // Since the no-wrap flags are on the increment, they apply to the - // post-incremented value as well. - if (isLoopInvariant(Accum, L)) - (void)getAddRecExpr(getAddExpr(StartVal, Accum), - Accum, L, Flags); - - // Okay, for the entire analysis of this edge we assumed the PHI - // to be symbolic. We now need to go back and purge all of the - // entries for the scalars that use the symbolic expression. - ForgetSymbolicName(PN, SymbolicName); - ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; - return PHISCEV; - } - } - } else if (const SCEVAddRecExpr *AddRec = - dyn_cast(BEValue)) { - // Otherwise, this could be a loop like this: - // i = 0; for (j = 1; ..; ++j) { .... i = j; } - // In this case, j = {1,+,1} and BEValue is j. - // Because the other in-value of i (0) fits the evolution of BEValue - // i really is an addrec evolution. - if (AddRec->getLoop() == L && AddRec->isAffine()) { - const SCEV *StartVal = getSCEV(StartValueV); - - // If StartVal = j.start - j.stride, we can use StartVal as the - // initial step of the addrec evolution. - if (StartVal == getMinusSCEV(AddRec->getOperand(0), - AddRec->getOperand(1))) { - // FIXME: For constant StartVal, we should be able to infer - // no-wrap flags. - const SCEV *PHISCEV = - getAddRecExpr(StartVal, AddRec->getOperand(1), L, - SCEV::FlagAnyWrap); - - // Okay, for the entire analysis of this edge we assumed the PHI - // to be symbolic. We now need to go back and purge all of the - // entries for the scalars that use the symbolic expression. - ForgetSymbolicName(PN, SymbolicName); - ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; - return PHISCEV; - } - } + } else { + // Otherwise, this could be a loop like this: + // i = 0; for (j = 1; ..; ++j) { .... i = j; } + // In this case, j = {1,+,1} and BEValue is j. + // Because the other in-value of i (0) fits the evolution of BEValue + // i really is an addrec evolution. + // + // We can generalize this saying that i is the shifted value of BEValue + // by one iteration: + // PHI(f(0), f({1,+,1})) --> f({0,+,1}) + const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this); + const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this); + if (Shifted != getCouldNotCompute() && + Start != getCouldNotCompute()) { + const SCEV *StartVal = getSCEV(StartValueV); + if (Start == StartVal) { + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + ForgetSymbolicName(PN, SymbolicName); + ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted; + return Shifted; } } } + } + + return nullptr; +} + +// Checks if the SCEV S is available at BB. S is considered available at BB +// if S can be materialized at BB without introducing a fault. +static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S, + BasicBlock *BB) { + struct CheckAvailable { + bool TraversalDone = false; + bool Available = true; + + const Loop *L = nullptr; // The loop BB is in (can be nullptr) + BasicBlock *BB = nullptr; + DominatorTree &DT; + + CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT) + : L(L), BB(BB), DT(DT) {} + + bool setUnavailable() { + TraversalDone = true; + Available = false; + return false; + } + + bool follow(const SCEV *S) { + switch (S->getSCEVType()) { + case scConstant: case scTruncate: case scZeroExtend: case scSignExtend: + case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: + // These expressions are available if their operand(s) is/are. + return true; + + case scAddRecExpr: { + // We allow add recurrences that are on the loop BB is in, or some + // outer loop. This guarantees availability because the value of the + // add recurrence at BB is simply the "current" value of the induction + // variable. We can relax this in the future; for instance an add + // recurrence on a sibling dominating loop is also available at BB. + const auto *ARLoop = cast(S)->getLoop(); + if (L && (ARLoop == L || ARLoop->contains(L))) + return true; + + return setUnavailable(); + } + + case scUnknown: { + // For SCEVUnknown, we check for simple dominance. + const auto *SU = cast(S); + Value *V = SU->getValue(); + + if (isa(V)) + return false; + + if (isa(V) && DT.dominates(cast(V), BB)) + return false; + + return setUnavailable(); + } + + case scUDivExpr: + case scCouldNotCompute: + // We do not try to smart about these at all. + return setUnavailable(); + } + llvm_unreachable("switch should be fully covered!"); + } + + bool isDone() { return TraversalDone; } + }; + + CheckAvailable CA(L, BB, DT); + SCEVTraversal ST(CA); + + ST.visitAll(S); + return CA.Available; +} + +// Try to match a control flow sequence that branches out at BI and merges back +// at Merge into a "C ? LHS : RHS" select pattern. Return true on a successful +// match. +static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge, + Value *&C, Value *&LHS, Value *&RHS) { + C = BI->getCondition(); + + BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0)); + BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1)); + + if (!LeftEdge.isSingleEdge()) + return false; + + assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()"); + + Use &LeftUse = Merge->getOperandUse(0); + Use &RightUse = Merge->getOperandUse(1); + + if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) { + LHS = LeftUse; + RHS = RightUse; + return true; + } + + if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) { + LHS = RightUse; + RHS = LeftUse; + return true; + } + + return false; +} + +const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) { + if (PN->getNumIncomingValues() == 2) { + const Loop *L = LI.getLoopFor(PN->getParent()); + + // We don't want to break LCSSA, even in a SCEV expression tree. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (LI.getLoopFor(PN->getIncomingBlock(i)) != L) + return nullptr; + + // Try to match + // + // br %cond, label %left, label %right + // left: + // br label %merge + // right: + // br label %merge + // merge: + // V = phi [ %x, %left ], [ %y, %right ] + // + // as "select %cond, %x, %y" + + BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock(); + assert(IDom && "At least the entry block should dominate PN"); + + auto *BI = dyn_cast(IDom->getTerminator()); + Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr; + + if (BI && BI->isConditional() && + BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) && + IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) && + IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent())) + return createNodeForSelectOrPHI(PN, Cond, LHS, RHS); + } + + return nullptr; +} + +const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { + if (const SCEV *S = createAddRecFromPHI(PN)) + return S; + + if (const SCEV *S = createNodeFromSelectLikePHI(PN)) + return S; // If the PHI has a single incoming value, follow that value, unless the // PHI's incoming blocks are in a different loop, in which case doing so // risks breaking LCSSA form. Instcombine would normally zap these, but // it doesn't have DominatorTree information, so it may miss cases. - if (Value *V = - SimplifyInstruction(PN, F->getParent()->getDataLayout(), TLI, DT, AC)) - if (LI->replacementPreservesLCSSAForm(PN, V)) + if (Value *V = SimplifyInstruction(PN, getDataLayout(), &TLI, &DT, &AC)) + if (LI.replacementPreservesLCSSAForm(PN, V)) return getSCEV(V); // If it's not a loop phi, we can't handle it yet. return getUnknown(PN); } +const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I, + Value *Cond, + Value *TrueVal, + Value *FalseVal) { + // Handle "constant" branch or select. This can occur for instance when a + // loop pass transforms an inner loop and moves on to process the outer loop. + if (auto *CI = dyn_cast(Cond)) + return getSCEV(CI->isOne() ? TrueVal : FalseVal); + + // Try to match some simple smax or umax patterns. + auto *ICI = dyn_cast(Cond); + if (!ICI) + return getUnknown(I); + + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); + + switch (ICI->getPredicate()) { + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + std::swap(LHS, RHS); + // fall through + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + // a >s b ? a+x : b+x -> smax(a, b)+x + // a >s b ? b+x : a+x -> smin(a, b)+x + if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) { + const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), I->getType()); + const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), I->getType()); + const SCEV *LA = getSCEV(TrueVal); + const SCEV *RA = getSCEV(FalseVal); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, RS); + if (LDiff == RDiff) + return getAddExpr(getSMaxExpr(LS, RS), LDiff); + LDiff = getMinusSCEV(LA, RS); + RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getSMinExpr(LS, RS), LDiff); + } + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + std::swap(LHS, RHS); + // fall through + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + // a >u b ? a+x : b+x -> umax(a, b)+x + // a >u b ? b+x : a+x -> umin(a, b)+x + if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) { + const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); + const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), I->getType()); + const SCEV *LA = getSCEV(TrueVal); + const SCEV *RA = getSCEV(FalseVal); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, RS); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(LS, RS), LDiff); + LDiff = getMinusSCEV(LA, RS); + RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getUMinExpr(LS, RS), LDiff); + } + break; + case ICmpInst::ICMP_NE: + // n != 0 ? n+x : 1+x -> umax(n, 1)+x + if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) && + isa(RHS) && cast(RHS)->isZero()) { + const SCEV *One = getOne(I->getType()); + const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); + const SCEV *LA = getSCEV(TrueVal); + const SCEV *RA = getSCEV(FalseVal); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, One); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(One, LS), LDiff); + } + break; + case ICmpInst::ICMP_EQ: + // n == 0 ? 1+x : n+x -> umax(n, 1)+x + if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) && + isa(RHS) && cast(RHS)->isZero()) { + const SCEV *One = getOne(I->getType()); + const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); + const SCEV *LA = getSCEV(TrueVal); + const SCEV *RA = getSCEV(FalseVal); + const SCEV *LDiff = getMinusSCEV(LA, One); + const SCEV *RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(One, LS), LDiff); + } + break; + default: + break; + } + + return getUnknown(I); +} + /// createNodeForGEP - Expand GEP instructions into add and multiply /// operations. This allows them to be analyzed by regular SCEV code. /// @@ -3769,7 +4106,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { uint32_t ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { if (const SCEVConstant *C = dyn_cast(S)) - return C->getValue()->getValue().countTrailingZeros(); + return C->getAPInt().countTrailingZeros(); if (const SCEVTruncateExpr *T = dyn_cast(S)) return std::min(GetMinTrailingZeros(T->getOperand()), @@ -3834,8 +4171,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, - F->getParent()->getDataLayout(), 0, AC, nullptr, DT); + computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, &AC, + nullptr, &DT); return Zeros.countTrailingOnes(); } @@ -3846,26 +4183,9 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { /// GetRangeFromMetadata - Helper method to assign a range to V from /// metadata present in the IR. static Optional GetRangeFromMetadata(Value *V) { - if (Instruction *I = dyn_cast(V)) { - if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) { - ConstantRange TotalRange( - cast(I->getType())->getBitWidth(), false); - - unsigned NumRanges = MD->getNumOperands() / 2; - assert(NumRanges >= 1); - - for (unsigned i = 0; i < NumRanges; ++i) { - ConstantInt *Lower = - mdconst::extract(MD->getOperand(2 * i + 0)); - ConstantInt *Upper = - mdconst::extract(MD->getOperand(2 * i + 1)); - ConstantRange Range(Lower->getValue(), Upper->getValue()); - TotalRange = TotalRange.unionWith(Range); - } - - return TotalRange; - } - } + if (Instruction *I = dyn_cast(V)) + if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) + return getConstantRangeFromMetadata(*MD); return None; } @@ -3887,7 +4207,7 @@ ScalarEvolution::getRange(const SCEV *S, return I->second; if (const SCEVConstant *C = dyn_cast(S)) - return setRange(C, SignHint, ConstantRange(C->getValue()->getValue())); + return setRange(C, SignHint, ConstantRange(C->getAPInt())); unsigned BitWidth = getTypeSizeInBits(S->getType()); ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); @@ -3965,9 +4285,8 @@ ScalarEvolution::getRange(const SCEV *S, if (AddRec->getNoWrapFlags(SCEV::FlagNUW)) if (const SCEVConstant *C = dyn_cast(AddRec->getStart())) if (!C->getValue()->isZero()) - ConservativeResult = - ConservativeResult.intersectWith( - ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0))); + ConservativeResult = ConservativeResult.intersectWith( + ConstantRange(C->getAPInt(), APInt(BitWidth, 0))); // If there's no signed wrap, and all the operands have the same sign or // zero, the value won't ever change sign. @@ -4065,18 +4384,18 @@ ScalarEvolution::getRange(const SCEV *S, // Split here to avoid paying the compile-time cost of calling both // computeKnownBits and ComputeNumSignBits. This restriction can be lifted // if needed. - const DataLayout &DL = F->getParent()->getDataLayout(); + const DataLayout &DL = getDataLayout(); if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { // For a SCEVUnknown, ask ValueTracking. APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT); + computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT); if (Ones != ~Zeros + 1) ConservativeResult = ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)); } else { assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED && "generalize as needed!"); - unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT); + unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT); if (NS > 1) ConservativeResult = ConservativeResult.intersectWith( ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), @@ -4089,8 +4408,64 @@ ScalarEvolution::getRange(const SCEV *S, return setRange(S, SignHint, ConservativeResult); } -/// createSCEV - We know that there is no SCEV for the specified value. -/// Analyze the expression. +SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) { + if (isa(V)) return SCEV::FlagAnyWrap; + const BinaryOperator *BinOp = cast(V); + + // Return early if there are no flags to propagate to the SCEV. + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; + if (BinOp->hasNoUnsignedWrap()) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); + if (BinOp->hasNoSignedWrap()) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); + if (Flags == SCEV::FlagAnyWrap) { + return SCEV::FlagAnyWrap; + } + + // Here we check that BinOp is in the header of the innermost loop + // containing BinOp, since we only deal with instructions in the loop + // header. The actual loop we need to check later will come from an add + // recurrence, but getting that requires computing the SCEV of the operands, + // which can be expensive. This check we can do cheaply to rule out some + // cases early. + Loop *innermostContainingLoop = LI.getLoopFor(BinOp->getParent()); + if (innermostContainingLoop == nullptr || + innermostContainingLoop->getHeader() != BinOp->getParent()) + return SCEV::FlagAnyWrap; + + // Only proceed if we can prove that BinOp does not yield poison. + if (!isKnownNotFullPoison(BinOp)) return SCEV::FlagAnyWrap; + + // At this point we know that if V is executed, then it does not wrap + // according to at least one of NSW or NUW. If V is not executed, then we do + // not know if the calculation that V represents would wrap. Multiple + // instructions can map to the same SCEV. If we apply NSW or NUW from V to + // the SCEV, we must guarantee no wrapping for that SCEV also when it is + // derived from other instructions that map to the same SCEV. We cannot make + // that guarantee for cases where V is not executed. So we need to find the + // loop that V is considered in relation to and prove that V is executed for + // every iteration of that loop. That implies that the value that V + // calculates does not wrap anywhere in the loop, so then we can apply the + // flags to the SCEV. + // + // We check isLoopInvariant to disambiguate in case we are adding two + // recurrences from different loops, so that we know which loop to prove + // that V is executed in. + for (int OpIndex = 0; OpIndex < 2; ++OpIndex) { + const SCEV *Op = getSCEV(BinOp->getOperand(OpIndex)); + if (auto *AddRec = dyn_cast(Op)) { + const int OtherOpIndex = 1 - OpIndex; + const SCEV *OtherOp = getSCEV(BinOp->getOperand(OtherOpIndex)); + if (isLoopInvariant(OtherOp, AddRec->getLoop()) && + isGuaranteedToExecuteForEveryIteration(BinOp, AddRec->getLoop())) + return Flags; + } + } + return SCEV::FlagAnyWrap; +} + +/// createSCEV - We know that there is no SCEV for the specified value. Analyze +/// the expression. /// const SCEV *ScalarEvolution::createSCEV(Value *V) { if (!isSCEVable(V->getType())) @@ -4104,14 +4479,14 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // reachable. Such instructions don't matter, and they aren't required // to obey basic rules for definitions dominating uses which this // analysis depends on. - if (!DT->isReachableFromEntry(I->getParent())) + if (!DT.isReachableFromEntry(I->getParent())) return getUnknown(V); } else if (ConstantExpr *CE = dyn_cast(V)) Opcode = CE->getOpcode(); else if (ConstantInt *CI = dyn_cast(V)) return getConstant(CI); else if (isa(V)) - return getConstant(V->getType(), 0); + return getZero(V->getType()); else if (GlobalAlias *GA = dyn_cast(V)) return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee()); else @@ -4126,47 +4501,79 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // because it leads to N-1 getAddExpr calls for N ultimate operands. // Instead, gather up all the operands and make a single getAddExpr call. // LLVM IR canonical form means we need only traverse the left operands. - // - // Don't apply this instruction's NSW or NUW flags to the new - // expression. The instruction may be guarded by control flow that the - // no-wrap behavior depends on. Non-control-equivalent instructions can be - // mapped to the same SCEV expression, and it would be incorrect to transfer - // NSW/NUW semantics to those operations. SmallVector AddOps; - AddOps.push_back(getSCEV(U->getOperand(1))); - for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) { - unsigned Opcode = Op->getValueID() - Value::InstructionVal; - if (Opcode != Instruction::Add && Opcode != Instruction::Sub) + for (Value *Op = U;; Op = U->getOperand(0)) { + U = dyn_cast(Op); + unsigned Opcode = U ? U->getOpcode() : 0; + if (!U || (Opcode != Instruction::Add && Opcode != Instruction::Sub)) { + assert(Op != V && "V should be an add"); + AddOps.push_back(getSCEV(Op)); break; - U = cast(Op); - const SCEV *Op1 = getSCEV(U->getOperand(1)); + } + + if (auto *OpSCEV = getExistingSCEV(U)) { + AddOps.push_back(OpSCEV); + break; + } + + // If a NUW or NSW flag can be applied to the SCEV for this + // addition, then compute the SCEV for this addition by itself + // with a separate call to getAddExpr. We need to do that + // instead of pushing the operands of the addition onto AddOps, + // since the flags are only known to apply to this particular + // addition - they may not apply to other additions that can be + // formed with operands from AddOps. + const SCEV *RHS = getSCEV(U->getOperand(1)); + SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U); + if (Flags != SCEV::FlagAnyWrap) { + const SCEV *LHS = getSCEV(U->getOperand(0)); + if (Opcode == Instruction::Sub) + AddOps.push_back(getMinusSCEV(LHS, RHS, Flags)); + else + AddOps.push_back(getAddExpr(LHS, RHS, Flags)); + break; + } + if (Opcode == Instruction::Sub) - AddOps.push_back(getNegativeSCEV(Op1)); + AddOps.push_back(getNegativeSCEV(RHS)); else - AddOps.push_back(Op1); + AddOps.push_back(RHS); } - AddOps.push_back(getSCEV(U->getOperand(0))); return getAddExpr(AddOps); } + case Instruction::Mul: { - // Don't transfer NSW/NUW for the same reason as AddExpr. SmallVector MulOps; - MulOps.push_back(getSCEV(U->getOperand(1))); - for (Value *Op = U->getOperand(0); - Op->getValueID() == Instruction::Mul + Value::InstructionVal; - Op = U->getOperand(0)) { - U = cast(Op); + for (Value *Op = U;; Op = U->getOperand(0)) { + U = dyn_cast(Op); + if (!U || U->getOpcode() != Instruction::Mul) { + assert(Op != V && "V should be a mul"); + MulOps.push_back(getSCEV(Op)); + break; + } + + if (auto *OpSCEV = getExistingSCEV(U)) { + MulOps.push_back(OpSCEV); + break; + } + + SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U); + if (Flags != SCEV::FlagAnyWrap) { + MulOps.push_back(getMulExpr(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1)), Flags)); + break; + } + MulOps.push_back(getSCEV(U->getOperand(1))); } - MulOps.push_back(getSCEV(U->getOperand(0))); return getMulExpr(MulOps); } case Instruction::UDiv: return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1))); case Instruction::Sub: - return getMinusSCEV(getSCEV(U->getOperand(0)), - getSCEV(U->getOperand(1))); + return getMinusSCEV(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)), + getNoWrapFlagsFromUB(U)); case Instruction::And: // For an expression like x&255 that merely masks off the high bits, // use zext(trunc(x)) as the SCEV expression. @@ -4185,8 +4592,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { unsigned TZ = A.countTrailingZeros(); unsigned BitWidth = A.getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(U->getOperand(0), KnownZero, KnownOne, - F->getParent()->getDataLayout(), 0, AC, nullptr, DT); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, getDataLayout(), + 0, &AC, nullptr, &DT); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); @@ -4286,9 +4693,18 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { if (SA->getValue().uge(BitWidth)) break; + // It is currently not resolved how to interpret NSW for left + // shift by BitWidth - 1, so we avoid applying flags in that + // case. Remove this check (or this comment) once the situation + // is resolved. See + // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html + // and http://reviews.llvm.org/D8890 . + auto Flags = SCEV::FlagAnyWrap; + if (SA->getValue().ult(BitWidth - 1)) Flags = getNoWrapFlagsFromUB(U); + Constant *X = ConstantInt::get(getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue())); - return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X)); + return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X), Flags); } break; @@ -4363,94 +4779,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { return createNodeForPHI(cast(U)); case Instruction::Select: - // This could be a smax or umax that was lowered earlier. - // Try to recover it. - if (ICmpInst *ICI = dyn_cast(U->getOperand(0))) { - Value *LHS = ICI->getOperand(0); - Value *RHS = ICI->getOperand(1); - switch (ICI->getPredicate()) { - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: - std::swap(LHS, RHS); - // fall through - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: - // a >s b ? a+x : b+x -> smax(a, b)+x - // a >s b ? b+x : a+x -> smin(a, b)+x - if (getTypeSizeInBits(LHS->getType()) <= - getTypeSizeInBits(U->getType())) { - const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), U->getType()); - const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), U->getType()); - const SCEV *LA = getSCEV(U->getOperand(1)); - const SCEV *RA = getSCEV(U->getOperand(2)); - const SCEV *LDiff = getMinusSCEV(LA, LS); - const SCEV *RDiff = getMinusSCEV(RA, RS); - if (LDiff == RDiff) - return getAddExpr(getSMaxExpr(LS, RS), LDiff); - LDiff = getMinusSCEV(LA, RS); - RDiff = getMinusSCEV(RA, LS); - if (LDiff == RDiff) - return getAddExpr(getSMinExpr(LS, RS), LDiff); - } - break; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: - std::swap(LHS, RHS); - // fall through - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: - // a >u b ? a+x : b+x -> umax(a, b)+x - // a >u b ? b+x : a+x -> umin(a, b)+x - if (getTypeSizeInBits(LHS->getType()) <= - getTypeSizeInBits(U->getType())) { - const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType()); - const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), U->getType()); - const SCEV *LA = getSCEV(U->getOperand(1)); - const SCEV *RA = getSCEV(U->getOperand(2)); - const SCEV *LDiff = getMinusSCEV(LA, LS); - const SCEV *RDiff = getMinusSCEV(RA, RS); - if (LDiff == RDiff) - return getAddExpr(getUMaxExpr(LS, RS), LDiff); - LDiff = getMinusSCEV(LA, RS); - RDiff = getMinusSCEV(RA, LS); - if (LDiff == RDiff) - return getAddExpr(getUMinExpr(LS, RS), LDiff); - } - break; - case ICmpInst::ICMP_NE: - // n != 0 ? n+x : 1+x -> umax(n, 1)+x - if (getTypeSizeInBits(LHS->getType()) <= - getTypeSizeInBits(U->getType()) && - isa(RHS) && cast(RHS)->isZero()) { - const SCEV *One = getConstant(U->getType(), 1); - const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType()); - const SCEV *LA = getSCEV(U->getOperand(1)); - const SCEV *RA = getSCEV(U->getOperand(2)); - const SCEV *LDiff = getMinusSCEV(LA, LS); - const SCEV *RDiff = getMinusSCEV(RA, One); - if (LDiff == RDiff) - return getAddExpr(getUMaxExpr(One, LS), LDiff); - } - break; - case ICmpInst::ICMP_EQ: - // n == 0 ? 1+x : n+x -> umax(n, 1)+x - if (getTypeSizeInBits(LHS->getType()) <= - getTypeSizeInBits(U->getType()) && - isa(RHS) && cast(RHS)->isZero()) { - const SCEV *One = getConstant(U->getType(), 1); - const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType()); - const SCEV *LA = getSCEV(U->getOperand(1)); - const SCEV *RA = getSCEV(U->getOperand(2)); - const SCEV *LDiff = getMinusSCEV(LA, One); - const SCEV *RDiff = getMinusSCEV(RA, LS); - if (LDiff == RDiff) - return getAddExpr(getUMaxExpr(One, LS), LDiff); - } - break; - default: - break; - } - } + // U can also be a select constant expr, which let fall through. Since + // createNodeForSelect only works for a condition that is an `ICmpInst`, and + // constant expressions cannot have instructions as operands, we'd have + // returned getUnknown for a select constant expressions anyway. + if (isa(U)) + return createNodeForSelectOrPHI(cast(U), U->getOperand(0), + U->getOperand(1), U->getOperand(2)); default: // We cannot analyze this expression. break; @@ -4534,8 +4869,7 @@ ScalarEvolution::getSmallConstantTripMultiple(Loop *L, return 1; // Get the trip count from the BE count by adding 1. - const SCEV *TCMul = getAddExpr(ExitCount, - getConstant(ExitCount->getType(), 1)); + const SCEV *TCMul = getAddExpr(ExitCount, getOne(ExitCount->getType())); // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt // to factor simple cases. if (const SCEVMulExpr *Mul = dyn_cast(TCMul)) @@ -4610,10 +4944,10 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { if (!Pair.second) return Pair.first->second; - // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it + // computeBackedgeTakenCount may allocate memory for its result. Inserting it // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result // must be cleared in this scope. - BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L); + BackedgeTakenInfo Result = computeBackedgeTakenCount(L); if (Result.getExact(this) != getCouldNotCompute()) { assert(isLoopInvariant(Result.getExact(this), L) && @@ -4666,7 +5000,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { } // Re-lookup the insert position, since the call to - // ComputeBackedgeTakenCount above could result in a + // computeBackedgeTakenCount above could result in a // recusive call to getBackedgeTakenInfo (on a different // loop), which would invalidate the iterator computed // earlier. @@ -4744,12 +5078,12 @@ void ScalarEvolution::forgetValue(Value *V) { } /// getExact - Get the exact loop backedge taken count considering all loop -/// exits. A computable result can only be return for loops with a single exit. -/// Returning the minimum taken count among all exits is incorrect because one -/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that -/// the limit of each loop test is never skipped. This is a valid assumption as -/// long as the loop exits via that test. For precise results, it is the -/// caller's responsibility to specify the relevant loop exit using +/// exits. A computable result can only be returned for loops with a single +/// exit. Returning the minimum taken count among all exits is incorrect +/// because one of the loop's exit limit's may have been skipped. HowFarToZero +/// assumes that the limit of each loop test is never skipped. This is a valid +/// assumption as long as the loop exits via that test. For precise results, it +/// is the caller's responsibility to specify the relevant loop exit using /// getExact(ExitingBlock, SE). const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { @@ -4847,10 +5181,10 @@ void ScalarEvolution::BackedgeTakenInfo::clear() { delete[] ExitNotTaken.getNextExit(); } -/// ComputeBackedgeTakenCount - Compute the number of times the backedge +/// computeBackedgeTakenCount - Compute the number of times the backedge /// of the specified loop will execute. ScalarEvolution::BackedgeTakenInfo -ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { +ScalarEvolution::computeBackedgeTakenCount(const Loop *L) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -4864,7 +5198,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { // and compute maxBECount. for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitBB = ExitingBlocks[i]; - ExitLimit EL = ComputeExitLimit(L, ExitBB); + ExitLimit EL = computeExitLimit(L, ExitBB); // 1. For each exit that can be computed, add an entry to ExitCounts. // CouldComputeBECount is true only if all exits can be computed. @@ -4885,7 +5219,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is // considered greater than any computable EL.Max. if (EL.Max != getCouldNotCompute() && Latch && - DT->dominates(ExitBB, Latch)) { + DT.dominates(ExitBB, Latch)) { if (!MustExitMaxBECount) MustExitMaxBECount = EL.Max; else { @@ -4906,13 +5240,11 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); } -/// ComputeExitLimit - Compute the number of times the backedge of the specified -/// loop will execute if it exits via the specified block. ScalarEvolution::ExitLimit -ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { +ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { - // Okay, we've chosen an exiting block. See what condition causes us to - // exit at this block and remember the exit block and whether all other targets + // Okay, we've chosen an exiting block. See what condition causes us to exit + // at this block and remember the exit block and whether all other targets // lead to the loop header. bool MustExecuteLoopHeader = true; BasicBlock *Exit = nullptr; @@ -4952,8 +5284,7 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { if (!Pred) return getCouldNotCompute(); TerminatorInst *PredTerm = Pred->getTerminator(); - for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) { - BasicBlock *PredSucc = PredTerm->getSuccessor(i); + for (const BasicBlock *PredSucc : PredTerm->successors()) { if (PredSucc == BB) continue; // If the predecessor has a successor that isn't BB and isn't @@ -4976,19 +5307,19 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { if (BranchInst *BI = dyn_cast(Term)) { assert(BI->isConditional() && "If unconditional, it can't be in loop!"); // Proceed to the next level to examine the exit condition expression. - return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0), + return computeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0), BI->getSuccessor(1), /*ControlsExit=*/IsOnlyExit); } if (SwitchInst *SI = dyn_cast(Term)) - return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit, + return computeExitLimitFromSingleExitSwitch(L, SI, Exit, /*ControlsExit=*/IsOnlyExit); return getCouldNotCompute(); } -/// ComputeExitLimitFromCond - Compute the number of times the +/// computeExitLimitFromCond - Compute the number of times the /// backedge of the specified loop will execute if its exit condition /// were a conditional branch of ExitCond, TBB, and FBB. /// @@ -4997,7 +5328,7 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { /// condition is true and can infer that failing to meet the condition prior to /// integer wraparound results in undefined behavior. ScalarEvolution::ExitLimit -ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, +ScalarEvolution::computeExitLimitFromCond(const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, @@ -5007,9 +5338,9 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. bool EitherMayExit = L->contains(TBB); - ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, + ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit); - ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, + ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); @@ -5042,9 +5373,9 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. bool EitherMayExit = L->contains(FBB); - ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, + ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit); - ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, + ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); @@ -5079,7 +5410,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, // With an icmp, it may be feasible to compute an exact backedge-taken count. // Proceed to the next level to examine the icmp. if (ICmpInst *ExitCondICmp = dyn_cast(ExitCond)) - return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit); + return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit); // Check for a constant condition. These are normally stripped out by // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to @@ -5091,18 +5422,15 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, return getCouldNotCompute(); else // The backedge is never taken. - return getConstant(CI->getType(), 0); + return getZero(CI->getType()); } // If it's not an integer or pointer comparison then compute it the hard way. - return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); + return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); } -/// ComputeExitLimitFromICmp - Compute the number of times the -/// backedge of the specified loop will execute if its exit condition -/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB. ScalarEvolution::ExitLimit -ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, +ScalarEvolution::computeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond, BasicBlock *TBB, BasicBlock *FBB, @@ -5119,11 +5447,16 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, if (LoadInst *LI = dyn_cast(ExitCond->getOperand(0))) if (Constant *RHS = dyn_cast(ExitCond->getOperand(1))) { ExitLimit ItCnt = - ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond); + computeLoadConstantCompareExitLimit(LI, RHS, L, Cond); if (ItCnt.hasAnyInfo()) return ItCnt; } + ExitLimit ShiftEL = computeShiftCompareExitLimit( + ExitCond->getOperand(0), ExitCond->getOperand(1), L, Cond); + if (ShiftEL.hasAnyInfo()) + return ShiftEL; + const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); @@ -5149,7 +5482,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, if (AddRec->getLoop() == L) { // Form the constant range. ConstantRange CompRange( - ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue())); + ICmpInst::makeConstantRange(Cond, RHSC->getAPInt())); const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this); if (!isa(Ret)) return Ret; @@ -5183,21 +5516,13 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, break; } default: -#if 0 - dbgs() << "ComputeBackedgeTakenCount "; - if (ExitCond->getOperand(0)->getType()->isUnsigned()) - dbgs() << "[unsigned] "; - dbgs() << *LHS << " " - << Instruction::getOpcodeName(Instruction::ICmp) - << " " << *RHS << "\n"; -#endif break; } - return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); + return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); } ScalarEvolution::ExitLimit -ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L, +ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L, SwitchInst *Switch, BasicBlock *ExitingBlock, bool ControlsExit) { @@ -5230,11 +5555,11 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, return cast(Val)->getValue(); } -/// ComputeLoadConstantCompareExitLimit - Given an exit condition of +/// computeLoadConstantCompareExitLimit - Given an exit condition of /// 'icmp op load X, cst', try to see if we can compute the backedge /// execution count. ScalarEvolution::ExitLimit -ScalarEvolution::ComputeLoadConstantCompareExitLimit( +ScalarEvolution::computeLoadConstantCompareExitLimit( LoadInst *LI, Constant *RHS, const Loop *L, @@ -5303,11 +5628,6 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( Result = ConstantExpr::getICmp(predicate, Result, RHS); if (!isa(Result)) break; // Couldn't decide for sure if (cast(Result)->getValue().isMinValue()) { -#if 0 - dbgs() << "\n***\n*** Computed loop count " << *ItCst - << "\n*** From global " << *GV << "*** BB: " << *L->getHeader() - << "***\n"; -#endif ++NumArrayLenItCounts; return getConstant(ItCst); // Found terminating iteration! } @@ -5315,6 +5635,149 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( return getCouldNotCompute(); } +ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( + Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) { + ConstantInt *RHS = dyn_cast(RHSV); + if (!RHS) + return getCouldNotCompute(); + + const BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return getCouldNotCompute(); + + const BasicBlock *Predecessor = L->getLoopPredecessor(); + if (!Predecessor) + return getCouldNotCompute(); + + // Return true if V is of the form "LHS `shift_op` ". + // Return LHS in OutLHS and shift_opt in OutOpCode. + auto MatchPositiveShift = + [](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) { + + using namespace PatternMatch; + + ConstantInt *ShiftAmt; + if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) + OutOpCode = Instruction::LShr; + else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) + OutOpCode = Instruction::AShr; + else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) + OutOpCode = Instruction::Shl; + else + return false; + + return ShiftAmt->getValue().isStrictlyPositive(); + }; + + // Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in + // + // loop: + // %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ] + // %iv.shifted = lshr i32 %iv, + // + // Return true on a succesful match. Return the corresponding PHI node (%iv + // above) in PNOut and the opcode of the shift operation in OpCodeOut. + auto MatchShiftRecurrence = + [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) { + Optional PostShiftOpCode; + + { + Instruction::BinaryOps OpC; + Value *V; + + // If we encounter a shift instruction, "peel off" the shift operation, + // and remember that we did so. Later when we inspect %iv's backedge + // value, we will make sure that the backedge value uses the same + // operation. + // + // Note: the peeled shift operation does not have to be the same + // instruction as the one feeding into the PHI's backedge value. We only + // really care about it being the same *kind* of shift instruction -- + // that's all that is required for our later inferences to hold. + if (MatchPositiveShift(LHS, V, OpC)) { + PostShiftOpCode = OpC; + LHS = V; + } + } + + PNOut = dyn_cast(LHS); + if (!PNOut || PNOut->getParent() != L->getHeader()) + return false; + + Value *BEValue = PNOut->getIncomingValueForBlock(Latch); + Value *OpLHS; + + return + // The backedge value for the PHI node must be a shift by a positive + // amount + MatchPositiveShift(BEValue, OpLHS, OpCodeOut) && + + // of the PHI node itself + OpLHS == PNOut && + + // and the kind of shift should be match the kind of shift we peeled + // off, if any. + (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut); + }; + + PHINode *PN; + Instruction::BinaryOps OpCode; + if (!MatchShiftRecurrence(LHS, PN, OpCode)) + return getCouldNotCompute(); + + const DataLayout &DL = getDataLayout(); + + // The key rationale for this optimization is that for some kinds of shift + // recurrences, the value of the recurrence "stabilizes" to either 0 or -1 + // within a finite number of iterations. If the condition guarding the + // backedge (in the sense that the backedge is taken if the condition is true) + // is false for the value the shift recurrence stabilizes to, then we know + // that the backedge is taken only a finite number of times. + + ConstantInt *StableValue = nullptr; + switch (OpCode) { + default: + llvm_unreachable("Impossible case!"); + + case Instruction::AShr: { + // {K,ashr,} stabilizes to signum(K) in at most + // bitwidth(K) iterations. + Value *FirstValue = PN->getIncomingValueForBlock(Predecessor); + bool KnownZero, KnownOne; + ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, nullptr, + Predecessor->getTerminator(), &DT); + auto *Ty = cast(RHS->getType()); + if (KnownZero) + StableValue = ConstantInt::get(Ty, 0); + else if (KnownOne) + StableValue = ConstantInt::get(Ty, -1, true); + else + return getCouldNotCompute(); + + break; + } + case Instruction::LShr: + case Instruction::Shl: + // Both {K,lshr,} and {K,shl,} + // stabilize to 0 in at most bitwidth(K) iterations. + StableValue = ConstantInt::get(cast(RHS->getType()), 0); + break; + } + + auto *Result = + ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI); + assert(Result->getType()->isIntegerTy(1) && + "Otherwise cannot be an operand to a branch instruction"); + + if (Result->isZeroValue()) { + unsigned BitWidth = getTypeSizeInBits(RHS->getType()); + const SCEV *UpperBound = + getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth); + return ExitLimit(getCouldNotCompute(), UpperBound); + } + + return getCouldNotCompute(); +} /// CanConstantFold - Return true if we can constant fold an instruction of the /// specified type, assuming that all operands were constants. @@ -5356,12 +5819,10 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, // Otherwise, we can evaluate this instruction if all of its operands are // constant or derived from a PHI node themselves. PHINode *PHI = nullptr; - for (Instruction::op_iterator OpI = UseInst->op_begin(), - OpE = UseInst->op_end(); OpI != OpE; ++OpI) { + for (Value *Op : UseInst->operands()) { + if (isa(Op)) continue; - if (isa(*OpI)) continue; - - Instruction *OpInst = dyn_cast(*OpI); + Instruction *OpInst = dyn_cast(Op); if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr; PHINode *P = dyn_cast(OpInst); @@ -5395,9 +5856,8 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { Instruction *I = dyn_cast(V); if (!I || !canConstantEvolve(I, L)) return nullptr; - if (PHINode *PN = dyn_cast(I)) { + if (PHINode *PN = dyn_cast(I)) return PN; - } // Record non-constant instructions contained by the loop. DenseMap PHIMap; @@ -5454,6 +5914,30 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, TLI); } + +// If every incoming value to PN except the one for BB is a specific Constant, +// return that, else return nullptr. +static Constant *getOtherIncomingValue(PHINode *PN, BasicBlock *BB) { + Constant *IncomingVal = nullptr; + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (PN->getIncomingBlock(i) == BB) + continue; + + auto *CurrentVal = dyn_cast(PN->getIncomingValue(i)); + if (!CurrentVal) + return nullptr; + + if (IncomingVal != CurrentVal) { + if (IncomingVal) + return nullptr; + IncomingVal = CurrentVal; + } + } + + return IncomingVal; +} + /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is /// in the header of its containing loop, we know the loop executes a /// constant number of times, and the PHI node is just a recurrence @@ -5462,8 +5946,7 @@ Constant * ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, const APInt &BEs, const Loop *L) { - DenseMap::const_iterator I = - ConstantEvolutionLoopExitValue.find(PN); + auto I = ConstantEvolutionLoopExitValue.find(PN); if (I != ConstantEvolutionLoopExitValue.end()) return I->second; @@ -5476,22 +5959,21 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, BasicBlock *Header = L->getHeader(); assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); - // Since the loop is canonicalized, the PHI node must have two entries. One - // entry must be a constant (coming in from outside of the loop), and the - // second must be derived from the same PHI. - bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - PHINode *PHI = nullptr; - for (BasicBlock::iterator I = Header->begin(); - (PHI = dyn_cast(I)); ++I) { - Constant *StartCST = - dyn_cast(PHI->getIncomingValue(!SecondIsBackedge)); + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return nullptr; + + for (auto &I : *Header) { + PHINode *PHI = dyn_cast(&I); + if (!PHI) break; + auto *StartCST = getOtherIncomingValue(PHI, Latch); if (!StartCST) continue; CurrentIterVals[PHI] = StartCST; } if (!CurrentIterVals.count(PN)) return RetVal = nullptr; - Value *BEValue = PN->getIncomingValue(SecondIsBackedge); + Value *BEValue = PN->getIncomingValueForBlock(Latch); // Execute the loop symbolically to determine the exit value. if (BEs.getActiveBits() >= 32) @@ -5499,7 +5981,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, unsigned NumIterations = BEs.getZExtValue(); // must be in range unsigned IterationNum = 0; - const DataLayout &DL = F->getParent()->getDataLayout(); + const DataLayout &DL = getDataLayout(); for (; ; ++IterationNum) { if (IterationNum == NumIterations) return RetVal = CurrentIterVals[PN]; // Got exit value! @@ -5508,7 +5990,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // EvaluateExpression adds non-phi values to the CurrentIterVals map. DenseMap NextIterVals; Constant *NextPHI = - EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); + EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); if (!NextPHI) return nullptr; // Couldn't evaluate! NextIterVals[PN] = NextPHI; @@ -5519,23 +6001,21 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // cease to be able to evaluate one of them or if they stop evolving, // because that doesn't necessarily prevent us from computing PN. SmallVector, 8> PHIsToCompute; - for (DenseMap::const_iterator - I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ - PHINode *PHI = dyn_cast(I->first); + for (const auto &I : CurrentIterVals) { + PHINode *PHI = dyn_cast(I.first); if (!PHI || PHI == PN || PHI->getParent() != Header) continue; - PHIsToCompute.push_back(std::make_pair(PHI, I->second)); + PHIsToCompute.emplace_back(PHI, I.second); } // We use two distinct loops because EvaluateExpression may invalidate any // iterators into CurrentIterVals. - for (SmallVectorImpl >::const_iterator - I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) { - PHINode *PHI = I->first; + for (const auto &I : PHIsToCompute) { + PHINode *PHI = I.first; Constant *&NextPHI = NextIterVals[PHI]; if (!NextPHI) { // Not already computed. - Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); + Value *BEValue = PHI->getIncomingValueForBlock(Latch); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); } - if (NextPHI != I->second) + if (NextPHI != I.second) StoppedEvolving = false; } @@ -5548,12 +6028,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, } } -/// ComputeExitCountExhaustively - If the loop is known to execute a -/// constant number of times (the condition evolves only from constants), -/// try to evaluate a few iterations of the loop until we get the exit -/// condition gets a value of ExitWhen (true or false). If we cannot -/// evaluate the trip count of the loop, return getCouldNotCompute(). -const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, +const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen) { PHINode *PN = getConstantEvolvingPHI(Cond, L); @@ -5567,14 +6042,14 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, BasicBlock *Header = L->getHeader(); assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); - // One entry must be a constant (coming in from outside of the loop), and the - // second must be derived from the same PHI. - bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - PHINode *PHI = nullptr; - for (BasicBlock::iterator I = Header->begin(); - (PHI = dyn_cast(I)); ++I) { - Constant *StartCST = - dyn_cast(PHI->getIncomingValue(!SecondIsBackedge)); + BasicBlock *Latch = L->getLoopLatch(); + assert(Latch && "Should follow from NumIncomingValues == 2!"); + + for (auto &I : *Header) { + PHINode *PHI = dyn_cast(&I); + if (!PHI) + break; + auto *StartCST = getOtherIncomingValue(PHI, Latch); if (!StartCST) continue; CurrentIterVals[PHI] = StartCST; } @@ -5585,10 +6060,10 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // the loop symbolically to determine when the condition gets a value of // "ExitWhen". unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. - const DataLayout &DL = F->getParent()->getDataLayout(); + const DataLayout &DL = getDataLayout(); for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ - ConstantInt *CondVal = dyn_cast_or_null( - EvaluateExpression(Cond, L, CurrentIterVals, DL, TLI)); + auto *CondVal = dyn_cast_or_null( + EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -5605,20 +6080,17 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // calling EvaluateExpression on them because that may invalidate iterators // into CurrentIterVals. SmallVector PHIsToCompute; - for (DenseMap::const_iterator - I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ - PHINode *PHI = dyn_cast(I->first); + for (const auto &I : CurrentIterVals) { + PHINode *PHI = dyn_cast(I.first); if (!PHI || PHI->getParent() != Header) continue; PHIsToCompute.push_back(PHI); } - for (SmallVectorImpl::const_iterator I = PHIsToCompute.begin(), - E = PHIsToCompute.end(); I != E; ++I) { - PHINode *PHI = *I; + for (PHINode *PHI : PHIsToCompute) { Constant *&NextPHI = NextIterVals[PHI]; if (NextPHI) continue; // Already computed! - Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); + Value *BEValue = PHI->getIncomingValueForBlock(Latch); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); } CurrentIterVals.swap(NextIterVals); } @@ -5638,22 +6110,22 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, /// In the case that a relevant loop exit value cannot be computed, the /// original value V is returned. const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { + SmallVector, 2> &Values = + ValuesAtScopes[V]; // Check to see if we've folded this expression at this loop before. - SmallVector, 2> &Values = ValuesAtScopes[V]; - for (unsigned u = 0; u < Values.size(); u++) { - if (Values[u].first == L) - return Values[u].second ? Values[u].second : V; - } - Values.push_back(std::make_pair(L, static_cast(nullptr))); + for (auto &LS : Values) + if (LS.first == L) + return LS.second ? LS.second : V; + + Values.emplace_back(L, nullptr); + // Otherwise compute it. const SCEV *C = computeSCEVAtScope(V, L); - SmallVector, 2> &Values2 = ValuesAtScopes[V]; - for (unsigned u = Values2.size(); u > 0; u--) { - if (Values2[u - 1].first == L) { - Values2[u - 1].second = C; + for (auto &LS : reverse(ValuesAtScopes[V])) + if (LS.first == L) { + LS.second = C; break; } - } return C; } @@ -5763,7 +6235,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // exit value from the loop without using SCEVs. if (const SCEVUnknown *SU = dyn_cast(V)) { if (Instruction *I = dyn_cast(SU->getValue())) { - const Loop *LI = (*this->LI)[I->getParent()]; + const Loop *LI = this->LI[I->getParent()]; if (LI && LI->getParentLoop() == L) // Looking for loop exit value. if (PHINode *PN = dyn_cast(I)) if (PN->getParent() == LI->getHeader()) { @@ -5777,9 +6249,8 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // Okay, we know how many times the containing loop executes. If // this is a constant evolving PHI node, get the final value at // the specified iteration number. - Constant *RV = getConstantEvolutionLoopExitValue(PN, - BTCC->getValue()->getValue(), - LI); + Constant *RV = + getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI); if (RV) return getSCEV(RV); } } @@ -5791,8 +6262,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { if (CanConstantFold(I)) { SmallVector Operands; bool MadeImprovement = false; - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - Value *Op = I->getOperand(i); + for (Value *Op : I->operands()) { if (Constant *C = dyn_cast(Op)) { Operands.push_back(C); continue; @@ -5821,16 +6291,16 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // Check to see if getSCEVAtScope actually made an improvement. if (MadeImprovement) { Constant *C = nullptr; - const DataLayout &DL = F->getParent()->getDataLayout(); + const DataLayout &DL = getDataLayout(); if (const CmpInst *CI = dyn_cast(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], - Operands[1], DL, TLI); + Operands[1], DL, &TLI); else if (const LoadInst *LI = dyn_cast(I)) { if (!LI->isVolatile()) C = ConstantFoldLoadFromConstPtr(Operands[0], DL); } else C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, - DL, TLI); + DL, &TLI); if (!C) return V; return getSCEV(C); } @@ -6021,10 +6491,10 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { return std::make_pair(CNC, CNC); } - uint32_t BitWidth = LC->getValue()->getValue().getBitWidth(); - const APInt &L = LC->getValue()->getValue(); - const APInt &M = MC->getValue()->getValue(); - const APInt &N = NC->getValue()->getValue(); + uint32_t BitWidth = LC->getAPInt().getBitWidth(); + const APInt &L = LC->getAPInt(); + const APInt &M = MC->getAPInt(); + const APInt &N = NC->getAPInt(); APInt Two(BitWidth, 2); APInt Four(BitWidth, 4); @@ -6103,10 +6573,6 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { const SCEVConstant *R1 = dyn_cast(Roots.first); const SCEVConstant *R2 = dyn_cast(Roots.second); if (R1 && R2) { -#if 0 - dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1 - << " sol#2: " << *R2 << "\n"; -#endif // Pick the smallest positive root value. if (ConstantInt *CB = dyn_cast(ConstantExpr::getICmp(CmpInst::ICMP_ULT, @@ -6160,7 +6626,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { // For negative steps (counting down to zero): // N = Start/-Step // First compute the unsigned distance from zero in the direction of Step. - bool CountDown = StepC->getValue()->getValue().isNegative(); + bool CountDown = StepC->getAPInt().isNegative(); const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start); // Handle unitary steps, which cannot wraparound. @@ -6185,13 +6651,53 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { // done by counting and comparing the number of trailing zeros of Step and // Distance. if (!CountDown) { - const APInt &StepV = StepC->getValue()->getValue(); + const APInt &StepV = StepC->getAPInt(); // StepV.isPowerOf2() returns true if StepV is an positive power of two. It // also returns true if StepV is maximally negative (eg, INT_MIN), but that // case is not handled as this code is guarded by !CountDown. if (StepV.isPowerOf2() && - GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) - return getUDivExactExpr(Distance, Step); + GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) { + // Here we've constrained the equation to be of the form + // + // 2^(N + k) * Distance' = (StepV == 2^N) * X (mod 2^W) ... (0) + // + // where we're operating on a W bit wide integer domain and k is + // non-negative. The smallest unsigned solution for X is the trip count. + // + // (0) is equivalent to: + // + // 2^(N + k) * Distance' - 2^N * X = L * 2^W + // <=> 2^N(2^k * Distance' - X) = L * 2^(W - N) * 2^N + // <=> 2^k * Distance' - X = L * 2^(W - N) + // <=> 2^k * Distance' = L * 2^(W - N) + X ... (1) + // + // The smallest X satisfying (1) is unsigned remainder of dividing the LHS + // by 2^(W - N). + // + // <=> X = 2^k * Distance' URem 2^(W - N) ... (2) + // + // E.g. say we're solving + // + // 2 * Val = 2 * X (in i8) ... (3) + // + // then from (2), we get X = Val URem i8 128 (k = 0 in this case). + // + // Note: It is tempting to solve (3) by setting X = Val, but Val is not + // necessarily the smallest unsigned value of X that satisfies (3). + // E.g. if Val is i8 -127 then the smallest value of X that satisfies (3) + // is i8 1, not i8 -127 + + const auto *ModuloResult = getUDivExactExpr(Distance, Step); + + // Since SCEV does not have a URem node, we construct one using a truncate + // and a zero extend. + + unsigned NarrowWidth = StepV.getBitWidth() - StepV.countTrailingZeros(); + auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth); + auto *WideTy = Distance->getType(); + + return getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); + } } // If the condition controls loop exit (the loop exits only if the expression @@ -6207,8 +6713,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { // Then, try to solve the above equation provided that Start is constant. if (const SCEVConstant *StartC = dyn_cast(Start)) - return SolveLinEquationWithOverflow(StepC->getValue()->getValue(), - -StartC->getValue()->getValue(), + return SolveLinEquationWithOverflow(StepC->getAPInt(), -StartC->getAPInt(), *this); return getCouldNotCompute(); } @@ -6226,7 +6731,7 @@ ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { // already. If so, the backedge will execute zero times. if (const SCEVConstant *C = dyn_cast(V)) { if (!C->getValue()->isNullValue()) - return getConstant(C->getType(), 0); + return getZero(C->getType()); return getCouldNotCompute(); // Otherwise it will loop infinitely. } @@ -6251,7 +6756,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { // A loop's header is defined to be a block that dominates the loop. // If the header has a unique predecessor outside the loop, it must be // a block that has exactly one successor that can reach the loop. - if (Loop *L = LI->getLoopFor(BB)) + if (Loop *L = LI.getLoopFor(BB)) return std::make_pair(L->getLoopPredecessor(), L->getHeader()); return std::pair(); @@ -6267,13 +6772,20 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) { // Quick check to see if they are the same SCEV. if (A == B) return true; + auto ComputesEqualValues = [](const Instruction *A, const Instruction *B) { + // Not all instructions that are "identical" compute the same value. For + // instance, two distinct alloca instructions allocating the same type are + // identical and do not read memory; but compute distinct values. + return A->isIdenticalTo(B) && (isa(A) || isa(A)); + }; + // Otherwise, if they're both SCEVUnknown, it's possible that they hold // two different instructions with the same value. Check for this case. if (const SCEVUnknown *AU = dyn_cast(A)) if (const SCEVUnknown *BU = dyn_cast(B)) if (const Instruction *AI = dyn_cast(AU->getValue())) if (const Instruction *BI = dyn_cast(BU->getValue())) - if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory()) + if (ComputesEqualValues(AI, BI)) return true; // Otherwise assume they may have a different value. @@ -6324,7 +6836,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, // If there's a constant operand, canonicalize comparisons with boundary // cases, and canonicalize *-or-equal comparisons to regular comparisons. if (const SCEVConstant *RC = dyn_cast(RHS)) { - const APInt &RA = RC->getValue()->getValue(); + const APInt &RA = RC->getAPInt(); switch (Pred) { default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); case ICmpInst::ICMP_EQ: @@ -6515,16 +7027,14 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, Pred = ICmpInst::ICMP_ULT; Changed = true; } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) { - LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, - SCEV::FlagNUW); + LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS); Pred = ICmpInst::ICMP_ULT; Changed = true; } break; case ICmpInst::ICMP_UGE: if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) { - RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, - SCEV::FlagNUW); + RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS); Pred = ICmpInst::ICMP_UGT; Changed = true; } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) { @@ -6612,10 +7122,140 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, if (LeftGuarded && RightGuarded) return true; + if (isKnownPredicateViaSplitting(Pred, LHS, RHS)) + return true; + // Otherwise see what can be done with known constant ranges. return isKnownPredicateWithRanges(Pred, LHS, RHS); } +bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS, + ICmpInst::Predicate Pred, + bool &Increasing) { + bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing); + +#ifndef NDEBUG + // Verify an invariant: inverting the predicate should turn a monotonically + // increasing change to a monotonically decreasing one, and vice versa. + bool IncreasingSwapped; + bool ResultSwapped = isMonotonicPredicateImpl( + LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped); + + assert(Result == ResultSwapped && "should be able to analyze both!"); + if (ResultSwapped) + assert(Increasing == !IncreasingSwapped && + "monotonicity should flip as we flip the predicate"); +#endif + + return Result; +} + +bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS, + ICmpInst::Predicate Pred, + bool &Increasing) { + + // A zero step value for LHS means the induction variable is essentially a + // loop invariant value. We don't really depend on the predicate actually + // flipping from false to true (for increasing predicates, and the other way + // around for decreasing predicates), all we care about is that *if* the + // predicate changes then it only changes from false to true. + // + // A zero step value in itself is not very useful, but there may be places + // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be + // as general as possible. + + switch (Pred) { + default: + return false; // Conservative answer + + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + if (!LHS->getNoWrapFlags(SCEV::FlagNUW)) + return false; + + Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE; + return true; + + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: { + if (!LHS->getNoWrapFlags(SCEV::FlagNSW)) + return false; + + const SCEV *Step = LHS->getStepRecurrence(*this); + + if (isKnownNonNegative(Step)) { + Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE; + return true; + } + + if (isKnownNonPositive(Step)) { + Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE; + return true; + } + + return false; + } + + } + + llvm_unreachable("switch has default clause!"); +} + +bool ScalarEvolution::isLoopInvariantPredicate( + ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, + ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS, + const SCEV *&InvariantRHS) { + + // If there is a loop-invariant, force it into the RHS, otherwise bail out. + if (!isLoopInvariant(RHS, L)) { + if (!isLoopInvariant(LHS, L)) + return false; + + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + const SCEVAddRecExpr *ArLHS = dyn_cast(LHS); + if (!ArLHS || ArLHS->getLoop() != L) + return false; + + bool Increasing; + if (!isMonotonicPredicate(ArLHS, Pred, Increasing)) + return false; + + // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to + // true as the loop iterates, and the backedge is control dependent on + // "ArLHS `Pred` RHS" == true then we can reason as follows: + // + // * if the predicate was false in the first iteration then the predicate + // is never evaluated again, since the loop exits without taking the + // backedge. + // * if the predicate was true in the first iteration then it will + // continue to be true for all future iterations since it is + // monotonically increasing. + // + // For both the above possibilities, we can replace the loop varying + // predicate with its value on the first iteration of the loop (which is + // loop invariant). + // + // A similar reasoning applies for a monotonically decreasing predicate, by + // replacing true with false and false with true in the above two bullets. + + auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred); + + if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS)) + return false; + + InvariantPred = Pred; + InvariantLHS = ArLHS->getStart(); + InvariantRHS = RHS; + return true; +} + bool ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { @@ -6690,6 +7330,84 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, return false; } +bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, + const SCEV *LHS, + const SCEV *RHS) { + + // Match Result to (X + Y) where Y is a constant integer. + // Return Y via OutY. + auto MatchBinaryAddToConst = + [this](const SCEV *Result, const SCEV *X, APInt &OutY, + SCEV::NoWrapFlags ExpectedFlags) { + const SCEV *NonConstOp, *ConstOp; + SCEV::NoWrapFlags FlagsPresent; + + if (!splitBinaryAdd(Result, ConstOp, NonConstOp, FlagsPresent) || + !isa(ConstOp) || NonConstOp != X) + return false; + + OutY = cast(ConstOp)->getAPInt(); + return (FlagsPresent & ExpectedFlags) == ExpectedFlags; + }; + + APInt C; + + switch (Pred) { + default: + break; + + case ICmpInst::ICMP_SGE: + std::swap(LHS, RHS); + case ICmpInst::ICMP_SLE: + // X s<= (X + C) if C >= 0 + if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isNonNegative()) + return true; + + // (X + C) s<= X if C <= 0 + if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && + !C.isStrictlyPositive()) + return true; + break; + + case ICmpInst::ICMP_SGT: + std::swap(LHS, RHS); + case ICmpInst::ICMP_SLT: + // X s< (X + C) if C > 0 + if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && + C.isStrictlyPositive()) + return true; + + // (X + C) s< X if C < 0 + if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative()) + return true; + break; + } + + return false; +} + +bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, + const SCEV *LHS, + const SCEV *RHS) { + if (Pred != ICmpInst::ICMP_ULT || ProvingSplitPredicate) + return false; + + // Allowing arbitrary number of activations of isKnownPredicateViaSplitting on + // the stack can result in exponential time complexity. + SaveAndRestore Restore(ProvingSplitPredicate, true); + + // If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L + // + // To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use + // isKnownPredicate. isKnownPredicate is more powerful, but also more + // expensive; and using isKnownNonNegative(RHS) is sufficient for most of the + // interesting cases seen in practice. We can consider "upgrading" L >= 0 to + // use isKnownPredicate later if needed. + return isKnownNonNegative(RHS) && + isKnownPredicate(CmpInst::ICMP_SGE, LHS, getZero(LHS->getType())) && + isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS); +} + /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is /// protected by a conditional between LHS and RHS. This is used to /// to eliminate casts. @@ -6715,46 +7433,49 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, LoopContinuePredicate->getSuccessor(0) != L->getHeader())) return true; + // We don't want more than one activation of the following loops on the stack + // -- that can lead to O(n!) time complexity. + if (WalkingBEDominatingConds) + return false; + + SaveAndRestore ClearOnExit(WalkingBEDominatingConds, true); + + // See if we can exploit a trip count to prove the predicate. + const auto &BETakenInfo = getBackedgeTakenInfo(L); + const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this); + if (LatchBECount != getCouldNotCompute()) { + // We know that Latch branches back to the loop header exactly + // LatchBECount times. This means the backdege condition at Latch is + // equivalent to "{0,+,1} u< LatchBECount". + Type *Ty = LatchBECount->getType(); + auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNW); + const SCEV *LoopCounter = + getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags); + if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter, + LatchBECount)) + return true; + } + // Check conditions due to any @llvm.assume intrinsics. - for (auto &AssumeVH : AC->assumptions()) { + for (auto &AssumeVH : AC.assumptions()) { if (!AssumeVH) continue; auto *CI = cast(AssumeVH); - if (!DT->dominates(CI, Latch->getTerminator())) + if (!DT.dominates(CI, Latch->getTerminator())) continue; if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) return true; } - struct ClearWalkingBEDominatingCondsOnExit { - ScalarEvolution &SE; - - explicit ClearWalkingBEDominatingCondsOnExit(ScalarEvolution &SE) - : SE(SE){}; - - ~ClearWalkingBEDominatingCondsOnExit() { - SE.WalkingBEDominatingConds = false; - } - }; - - // We don't want more than one activation of the following loop on the stack - // -- that can lead to O(n!) time complexity. - if (WalkingBEDominatingConds) - return false; - - WalkingBEDominatingConds = true; - ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this); - // If the loop is not reachable from the entry block, we risk running into an // infinite loop as we walk up into the dom tree. These loops do not matter // anyway, so we just return a conservative answer when we see them. - if (!DT->isReachableFromEntry(L->getHeader())) + if (!DT.isReachableFromEntry(L->getHeader())) return false; - for (DomTreeNode *DTN = (*DT)[Latch], *HeaderDTN = (*DT)[L->getHeader()]; - DTN != HeaderDTN; - DTN = DTN->getIDom()) { + for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()]; + DTN != HeaderDTN; DTN = DTN->getIDom()) { assert(DTN && "should reach the loop header before reaching the root!"); @@ -6778,7 +7499,7 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, // We're constructively (and conservatively) enumerating edges within the // loop body that dominate the latch. The dominator tree better agree // with us on this: - assert(DT->dominates(DominatingEdge, Latch) && "should be!"); + assert(DT.dominates(DominatingEdge, Latch) && "should be!"); if (isImpliedCond(Pred, LHS, RHS, Condition, BB != ContinuePredicate->getSuccessor(0))) @@ -6823,11 +7544,11 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, } // Check conditions due to any @llvm.assume intrinsics. - for (auto &AssumeVH : AC->assumptions()) { + for (auto &AssumeVH : AC.assumptions()) { if (!AssumeVH) continue; auto *CI = cast(AssumeVH); - if (!DT->dominates(CI, L->getHeader())) + if (!DT.dominates(CI, L->getHeader())) continue; if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) @@ -6837,6 +7558,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, return false; } +namespace { /// RAII wrapper to prevent recursive application of isImpliedCond. /// ScalarEvolution's PendingLoopPredicates set must be empty unless we are /// currently evaluating isImpliedCond. @@ -6854,6 +7576,7 @@ struct MarkPendingLoopPredicate { LoopPreds.erase(Cond); } }; +} // end anonymous namespace /// isImpliedCond - Test whether the condition described by Pred, LHS, /// and RHS is true whenever the given Cond value evaluates to true. @@ -6892,6 +7615,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); + return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS); +} + +bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, + ICmpInst::Predicate FoundPred, + const SCEV *FoundLHS, + const SCEV *FoundRHS) { // Balance the types. if (getTypeSizeInBits(LHS->getType()) < getTypeSizeInBits(FoundLHS->getType())) { @@ -6947,6 +7678,13 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, RHS, LHS, FoundLHS, FoundRHS); } + // Unsigned comparison is the same as signed comparison when both the operands + // are non-negative. + if (CmpInst::isUnsigned(FoundPred) && + CmpInst::getSignedPredicate(FoundPred) == Pred && + isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); + // Check if we can make progress by sharpening ranges. if (FoundPred == ICmpInst::ICMP_NE && (isa(FoundLHS) || isa(FoundRHS))) { @@ -6970,7 +7708,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, APInt Min = ICmpInst::isSigned(Pred) ? getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin(); - if (Min == C->getValue()->getValue()) { + if (Min == C->getAPInt()) { // Given (V >= Min && V != Min) we conclude V >= (Min + 1). // This is true even if (Min + 1) wraps around -- in case of // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)). @@ -7021,6 +7759,149 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, return false; } +bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr, + const SCEV *&L, const SCEV *&R, + SCEV::NoWrapFlags &Flags) { + const auto *AE = dyn_cast(Expr); + if (!AE || AE->getNumOperands() != 2) + return false; + + L = AE->getOperand(0); + R = AE->getOperand(1); + Flags = AE->getNoWrapFlags(); + return true; +} + +bool ScalarEvolution::computeConstantDifference(const SCEV *Less, + const SCEV *More, + APInt &C) { + // We avoid subtracting expressions here because this function is usually + // fairly deep in the call stack (i.e. is called many times). + + if (isa(Less) && isa(More)) { + const auto *LAR = cast(Less); + const auto *MAR = cast(More); + + if (LAR->getLoop() != MAR->getLoop()) + return false; + + // We look at affine expressions only; not for correctness but to keep + // getStepRecurrence cheap. + if (!LAR->isAffine() || !MAR->isAffine()) + return false; + + if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this)) + return false; + + Less = LAR->getStart(); + More = MAR->getStart(); + + // fall through + } + + if (isa(Less) && isa(More)) { + const auto &M = cast(More)->getAPInt(); + const auto &L = cast(Less)->getAPInt(); + C = M - L; + return true; + } + + const SCEV *L, *R; + SCEV::NoWrapFlags Flags; + if (splitBinaryAdd(Less, L, R, Flags)) + if (const auto *LC = dyn_cast(L)) + if (R == More) { + C = -(LC->getAPInt()); + return true; + } + + if (splitBinaryAdd(More, L, R, Flags)) + if (const auto *LC = dyn_cast(L)) + if (R == Less) { + C = LC->getAPInt(); + return true; + } + + return false; +} + +bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow( + ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, const SCEV *FoundRHS) { + if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT) + return false; + + const auto *AddRecLHS = dyn_cast(LHS); + if (!AddRecLHS) + return false; + + const auto *AddRecFoundLHS = dyn_cast(FoundLHS); + if (!AddRecFoundLHS) + return false; + + // We'd like to let SCEV reason about control dependencies, so we constrain + // both the inequalities to be about add recurrences on the same loop. This + // way we can use isLoopEntryGuardedByCond later. + + const Loop *L = AddRecFoundLHS->getLoop(); + if (L != AddRecLHS->getLoop()) + return false; + + // FoundLHS u< FoundRHS u< -C => (FoundLHS + C) u< (FoundRHS + C) ... (1) + // + // FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C) + // ... (2) + // + // Informal proof for (2), assuming (1) [*]: + // + // We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**] + // + // Then + // + // FoundLHS s< FoundRHS s< INT_MIN - C + // <=> (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C [ using (3) ] + // <=> (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ] + // <=> (FoundLHS + INT_MIN + C + INT_MIN) s< + // (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ] + // <=> FoundLHS + C s< FoundRHS + C + // + // [*]: (1) can be proved by ruling out overflow. + // + // [**]: This can be proved by analyzing all the four possibilities: + // (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and + // (A s>= 0, B s>= 0). + // + // Note: + // Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C" + // will not sign underflow. For instance, say FoundLHS = (i8 -128), FoundRHS + // = (i8 -127) and C = (i8 -100). Then INT_MIN - C = (i8 -28), and FoundRHS + // s< (INT_MIN - C). Lack of sign overflow / underflow in "FoundRHS + C" is + // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS + + // C)". + + APInt LDiff, RDiff; + if (!computeConstantDifference(FoundLHS, LHS, LDiff) || + !computeConstantDifference(FoundRHS, RHS, RDiff) || + LDiff != RDiff) + return false; + + if (LDiff == 0) + return true; + + APInt FoundRHSLimit; + + if (Pred == CmpInst::ICMP_ULT) { + FoundRHSLimit = -RDiff; + } else { + assert(Pred == CmpInst::ICMP_SLT && "Checked above!"); + FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - RDiff; + } + + // Try to prove (1) or (2), as needed. + return isLoopEntryGuardedByCond(L, Pred, FoundRHS, + getConstant(FoundRHSLimit)); +} + /// isImpliedCondOperands - Test whether the condition described by Pred, /// LHS, and RHS is true whenever the condition described by Pred, FoundLHS, /// and FoundRHS is true. @@ -7031,6 +7912,9 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; + if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS)) + return true; + return isImpliedCondOperandsHelper(Pred, LHS, RHS, FoundLHS, FoundRHS) || // ~x < ~y --> x > y @@ -7043,17 +7927,13 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, /// If Expr computes ~A, return A else return nullptr static const SCEV *MatchNotExpr(const SCEV *Expr) { const SCEVAddExpr *Add = dyn_cast(Expr); - if (!Add || Add->getNumOperands() != 2) return nullptr; - - const SCEVConstant *AddLHS = dyn_cast(Add->getOperand(0)); - if (!(AddLHS && AddLHS->getValue()->getValue().isAllOnesValue())) + if (!Add || Add->getNumOperands() != 2 || + !Add->getOperand(0)->isAllOnesValue()) return nullptr; const SCEVMulExpr *AddRHS = dyn_cast(Add->getOperand(1)); - if (!AddRHS || AddRHS->getNumOperands() != 2) return nullptr; - - const SCEVConstant *MulLHS = dyn_cast(AddRHS->getOperand(0)); - if (!(MulLHS && MulLHS->getValue()->getValue().isAllOnesValue())) + if (!AddRHS || AddRHS->getNumOperands() != 2 || + !AddRHS->getOperand(0)->isAllOnesValue()) return nullptr; return AddRHS->getOperand(1); @@ -7067,8 +7947,7 @@ static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr, const MaxExprType *MaxExpr = dyn_cast(MaybeMaxExpr); if (!MaxExpr) return false; - auto It = std::find(MaxExpr->op_begin(), MaxExpr->op_end(), Candidate); - return It != MaxExpr->op_end(); + return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end(); } @@ -7084,6 +7963,38 @@ static bool IsMinConsistingOf(ScalarEvolution &SE, return IsMaxConsistingOf(MaybeMaxExpr, SE.getNotSCEV(Candidate)); } +static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + + // If both sides are affine addrecs for the same loop, with equal + // steps, and we know the recurrences don't wrap, then we only + // need to check the predicate on the starting values. + + if (!ICmpInst::isRelational(Pred)) + return false; + + const SCEVAddRecExpr *LAR = dyn_cast(LHS); + if (!LAR) + return false; + const SCEVAddRecExpr *RAR = dyn_cast(RHS); + if (!RAR) + return false; + if (LAR->getLoop() != RAR->getLoop()) + return false; + if (!LAR->isAffine() || !RAR->isAffine()) + return false; + + if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE)) + return false; + + SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ? + SCEV::FlagNSW : SCEV::FlagNUW; + if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW)) + return false; + + return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart()); +} /// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max /// expression? @@ -7129,7 +8040,9 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, auto IsKnownPredicateFull = [this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { return isKnownPredicateWithRanges(Pred, LHS, RHS) || - IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS); + IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) || + IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) || + isKnownPredicateViaNoOverflow(Pred, LHS, RHS); }; switch (Pred) { @@ -7185,7 +8098,7 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, !isa(AddLHS->getOperand(0))) return false; - APInt ConstFoundRHS = cast(FoundRHS)->getValue()->getValue(); + APInt ConstFoundRHS = cast(FoundRHS)->getAPInt(); // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the // antecedent "`FoundLHS` `Pred` `FoundRHS`". @@ -7194,13 +8107,12 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range // for `LHS`: - APInt Addend = - cast(AddLHS->getOperand(0))->getValue()->getValue(); + APInt Addend = cast(AddLHS->getOperand(0))->getAPInt(); ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend)); // We can also compute the range of values for `LHS` that satisfy the // consequent, "`LHS` `Pred` `RHS`": - APInt ConstRHS = cast(RHS)->getValue()->getValue(); + APInt ConstRHS = cast(RHS)->getAPInt(); ConstantRange SatisfyingLHSRange = ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS); @@ -7217,7 +8129,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, if (NoWrap) return false; unsigned BitWidth = getTypeSizeInBits(RHS->getType()); - const SCEV *One = getConstant(Stride->getType(), 1); + const SCEV *One = getOne(Stride->getType()); if (IsSigned) { APInt MaxRHS = getSignedRange(RHS).getSignedMax(); @@ -7246,7 +8158,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, if (NoWrap) return false; unsigned BitWidth = getTypeSizeInBits(RHS->getType()); - const SCEV *One = getConstant(Stride->getType(), 1); + const SCEV *One = getOne(Stride->getType()); if (IsSigned) { APInt MinRHS = getSignedRange(RHS).getSignedMin(); @@ -7271,7 +8183,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, // stride and presence of the equality in the comparison. const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, bool Equality) { - const SCEV *One = getConstant(Step->getType(), 1); + const SCEV *One = getOne(Step->getType()); Delta = Equality ? getAddExpr(Delta, Step) : getAddExpr(Delta, getMinusSCEV(Step, One)); return getUDivExpr(Delta, Step); @@ -7324,7 +8236,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, // overflow, in which case if RHS - Start is a constant, we don't need to // do a max operation since we can just figure it out statically if (NoWrap && isa(Diff)) { - APInt D = dyn_cast(Diff)->getValue()->getValue(); + APInt D = dyn_cast(Diff)->getAPInt(); if (D.isNegative()) End = Start; } else @@ -7405,7 +8317,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, // overflow, in which case if RHS - Start is a constant, we don't need to // do a max operation since we can just figure it out statically if (NoWrap && isa(Diff)) { - APInt D = dyn_cast(Diff)->getValue()->getValue(); + APInt D = dyn_cast(Diff)->getAPInt(); if (!D.isNegative()) End = Start; } else @@ -7460,23 +8372,20 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, if (const SCEVConstant *SC = dyn_cast(getStart())) if (!SC->getValue()->isZero()) { SmallVector Operands(op_begin(), op_end()); - Operands[0] = SE.getConstant(SC->getType(), 0); + Operands[0] = SE.getZero(SC->getType()); const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(), getNoWrapFlags(FlagNW)); - if (const SCEVAddRecExpr *ShiftedAddRec = - dyn_cast(Shifted)) + if (const auto *ShiftedAddRec = dyn_cast(Shifted)) return ShiftedAddRec->getNumIterationsInRange( - Range.subtract(SC->getValue()->getValue()), SE); + Range.subtract(SC->getAPInt()), SE); // This is strange and shouldn't happen. return SE.getCouldNotCompute(); } // The only time we can solve this is when we have all constant indices. // Otherwise, we cannot determine the overflow conditions. - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (!isa(getOperand(i))) - return SE.getCouldNotCompute(); - + if (any_of(operands(), [](const SCEV *Op) { return !isa(Op); })) + return SE.getCouldNotCompute(); // Okay at this point we know that all elements of the chrec are constants and // that the start element is zero. @@ -7485,7 +8394,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // iteration exits. unsigned BitWidth = SE.getTypeSizeInBits(getType()); if (!Range.contains(APInt(BitWidth, 0))) - return SE.getConstant(getType(), 0); + return SE.getZero(getType()); if (isAffine()) { // If this is an affine expression then we have this situation: @@ -7496,7 +8405,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // If A is negative then the lower of the range is the last possible loop // value. Also note that we already checked for a full range. APInt One(BitWidth,1); - APInt A = cast(getOperand(1))->getValue()->getValue(); + APInt A = cast(getOperand(1))->getAPInt(); APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower(); // The exit value should be (End+A)/A. @@ -7528,15 +8437,13 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, FlagAnyWrap); // Next, solve the constructed addrec - std::pair Roots = - SolveQuadraticEquation(cast(NewAddRec), SE); + auto Roots = SolveQuadraticEquation(cast(NewAddRec), SE); const SCEVConstant *R1 = dyn_cast(Roots.first); const SCEVConstant *R2 = dyn_cast(Roots.second); if (R1) { // Pick the smallest positive root value. - if (ConstantInt *CB = - dyn_cast(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, - R1->getValue(), R2->getValue()))) { + if (ConstantInt *CB = dyn_cast(ConstantExpr::getICmp( + ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) { if (!CB->getZExtValue()) std::swap(R1, R2); // R1 is the minimum root now. @@ -7549,7 +8456,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, if (Range.contains(R1Val->getValue())) { // The next iteration must be out of the range... ConstantInt *NextVal = - ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1); + ConstantInt::get(SE.getContext(), R1->getAPInt() + 1); R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); if (!Range.contains(R1Val->getValue())) @@ -7560,7 +8467,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // If R1 was not in the range, then it is a good return value. Make // sure that R1-1 WAS in the range though, just in case. ConstantInt *NextVal = - ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1); + ConstantInt::get(SE.getContext(), R1->getAPInt() - 1); R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); if (Range.contains(R1Val->getValue())) return R1; @@ -7644,9 +8551,84 @@ struct SCEVCollectTerms { } bool isDone() const { return false; } }; + +// Check if a SCEV contains an AddRecExpr. +struct SCEVHasAddRec { + bool &ContainsAddRec; + + SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) { + ContainsAddRec = false; + } + + bool follow(const SCEV *S) { + if (isa(S)) { + ContainsAddRec = true; + + // Stop recursion: once we collected a term, do not walk its operands. + return false; + } + + // Keep looking. + return true; + } + bool isDone() const { return false; } +}; + +// Find factors that are multiplied with an expression that (possibly as a +// subexpression) contains an AddRecExpr. In the expression: +// +// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop)) +// +// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)" +// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size +// parameters as they form a product with an induction variable. +// +// This collector expects all array size parameters to be in the same MulExpr. +// It might be necessary to later add support for collecting parameters that are +// spread over different nested MulExpr. +struct SCEVCollectAddRecMultiplies { + SmallVectorImpl &Terms; + ScalarEvolution &SE; + + SCEVCollectAddRecMultiplies(SmallVectorImpl &T, ScalarEvolution &SE) + : Terms(T), SE(SE) {} + + bool follow(const SCEV *S) { + if (auto *Mul = dyn_cast(S)) { + bool HasAddRec = false; + SmallVector Operands; + for (auto Op : Mul->operands()) { + if (isa(Op)) { + Operands.push_back(Op); + } else { + bool ContainsAddRec; + SCEVHasAddRec ContiansAddRec(ContainsAddRec); + visitAll(Op, ContiansAddRec); + HasAddRec |= ContainsAddRec; + } + } + if (Operands.size() == 0) + return true; + + if (!HasAddRec) + return false; + + Terms.push_back(SE.getMulExpr(Operands)); + // Stop recursion: once we collected a term, do not walk its operands. + return false; + } + + // Keep looking. + return true; + } + bool isDone() const { return false; } +}; } -/// Find parametric terms in this SCEVAddRecExpr. +/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in +/// two places: +/// 1) The strides of AddRec expressions. +/// 2) Unknowns that are multiplied with AddRec expressions. void ScalarEvolution::collectParametricTerms(const SCEV *Expr, SmallVectorImpl &Terms) { SmallVector Strides; @@ -7669,6 +8651,9 @@ void ScalarEvolution::collectParametricTerms(const SCEV *Expr, for (const SCEV *T : Terms) dbgs() << *T << "\n"; }); + + SCEVCollectAddRecMultiplies MulCollector(Terms, *this); + visitAll(Expr, MulCollector); } static bool findArrayDimensionsRec(ScalarEvolution &SE, @@ -7718,30 +8703,28 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE, return true; } -namespace { -struct FindParameter { - bool FoundParameter; - FindParameter() : FoundParameter(false) {} - - bool follow(const SCEV *S) { - if (isa(S)) { - FoundParameter = true; - // Stop recursion: we found a parameter. - return false; - } - // Keep looking. - return true; - } - bool isDone() const { - // Stop recursion if we have found a parameter. - return FoundParameter; - } -}; -} - // Returns true when S contains at least a SCEVUnknown parameter. static inline bool containsParameters(const SCEV *S) { + struct FindParameter { + bool FoundParameter; + FindParameter() : FoundParameter(false) {} + + bool follow(const SCEV *S) { + if (isa(S)) { + FoundParameter = true; + // Stop recursion: we found a parameter. + return false; + } + // Keep looking. + return true; + } + bool isDone() const { + // Stop recursion if we have found a parameter. + return FoundParameter; + } + }; + FindParameter F; SCEVTraversal ST(F); ST.visitAll(S); @@ -7829,11 +8812,13 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl &Terms, ScalarEvolution &SE = *const_cast(this); - // Divide all terms by the element size. + // Try to divide all terms by the element size. If term is not divisible by + // element size, proceed with the original term. for (const SCEV *&Term : Terms) { const SCEV *Q, *R; SCEVDivision::divide(SE, Term, ElementSize, &Q, &R); - Term = Q; + if (!Q->isZero()) + Term = Q; } SmallVector NewTerms; @@ -7875,7 +8860,7 @@ void ScalarEvolution::computeAccessFunctions( if (Sizes.empty()) return; - if (auto AR = dyn_cast(Expr)) + if (auto *AR = dyn_cast(Expr)) if (!AR->isAffine()) return; @@ -8059,58 +9044,55 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) // ScalarEvolution Class Implementation //===----------------------------------------------------------------------===// -ScalarEvolution::ScalarEvolution() - : FunctionPass(ID), WalkingBEDominatingConds(false), ValuesAtScopes(64), - LoopDispositions(64), BlockDispositions(64), FirstUnknown(nullptr) { - initializeScalarEvolutionPass(*PassRegistry::getPassRegistry()); +ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI, + AssumptionCache &AC, DominatorTree &DT, + LoopInfo &LI) + : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI), + CouldNotCompute(new SCEVCouldNotCompute()), + WalkingBEDominatingConds(false), ProvingSplitPredicate(false), + ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), + FirstUnknown(nullptr) {} + +ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) + : F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI), + CouldNotCompute(std::move(Arg.CouldNotCompute)), + ValueExprMap(std::move(Arg.ValueExprMap)), + WalkingBEDominatingConds(false), ProvingSplitPredicate(false), + BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), + ConstantEvolutionLoopExitValue( + std::move(Arg.ConstantEvolutionLoopExitValue)), + ValuesAtScopes(std::move(Arg.ValuesAtScopes)), + LoopDispositions(std::move(Arg.LoopDispositions)), + BlockDispositions(std::move(Arg.BlockDispositions)), + UnsignedRanges(std::move(Arg.UnsignedRanges)), + SignedRanges(std::move(Arg.SignedRanges)), + UniqueSCEVs(std::move(Arg.UniqueSCEVs)), + UniquePreds(std::move(Arg.UniquePreds)), + SCEVAllocator(std::move(Arg.SCEVAllocator)), + FirstUnknown(Arg.FirstUnknown) { + Arg.FirstUnknown = nullptr; } -bool ScalarEvolution::runOnFunction(Function &F) { - this->F = &F; - AC = &getAnalysis().getAssumptionCache(F); - LI = &getAnalysis().getLoopInfo(); - TLI = &getAnalysis().getTLI(); - DT = &getAnalysis().getDomTree(); - return false; -} - -void ScalarEvolution::releaseMemory() { +ScalarEvolution::~ScalarEvolution() { // Iterate through all the SCEVUnknown instances and call their // destructors, so that they release their references to their values. - for (SCEVUnknown *U = FirstUnknown; U; U = U->Next) - U->~SCEVUnknown(); + for (SCEVUnknown *U = FirstUnknown; U;) { + SCEVUnknown *Tmp = U; + U = U->Next; + Tmp->~SCEVUnknown(); + } FirstUnknown = nullptr; ValueExprMap.clear(); // Free any extra memory created for ExitNotTakenInfo in the unlikely event // that a loop had multiple computable exits. - for (DenseMap::iterator I = - BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); - I != E; ++I) { - I->second.clear(); - } + for (auto &BTCI : BackedgeTakenCounts) + BTCI.second.clear(); assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); - - BackedgeTakenCounts.clear(); - ConstantEvolutionLoopExitValue.clear(); - ValuesAtScopes.clear(); - LoopDispositions.clear(); - BlockDispositions.clear(); - UnsignedRanges.clear(); - SignedRanges.clear(); - UniqueSCEVs.clear(); - SCEVAllocator.Reset(); -} - -void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired(); - AU.addRequiredTransitive(); - AU.addRequiredTransitive(); - AU.addRequired(); + assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!"); } bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { @@ -8152,7 +9134,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, OS << "\n"; } -void ScalarEvolution::print(raw_ostream &OS, const Module *) const { +void ScalarEvolution::print(raw_ostream &OS) const { // ScalarEvolution's implementation of the print method is to print // out SCEV values of all instructions that are interesting. Doing // this potentially causes it to create new SCEV objects though, @@ -8162,13 +9144,13 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { ScalarEvolution &SE = *const_cast(this); OS << "Classifying expressions for: "; - F->printAsOperand(OS, /*PrintType=*/false); + F.printAsOperand(OS, /*PrintType=*/false); OS << "\n"; - for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) - if (isSCEVable(I->getType()) && !isa(*I)) { - OS << *I << '\n'; + for (Instruction &I : instructions(F)) + if (isSCEVable(I.getType()) && !isa(I)) { + OS << I << '\n'; OS << " --> "; - const SCEV *SV = SE.getSCEV(&*I); + const SCEV *SV = SE.getSCEV(&I); SV->print(OS); if (!isa(SV)) { OS << " U: "; @@ -8177,7 +9159,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { SE.getSignedRange(SV).print(OS); } - const Loop *L = LI->getLoopFor((*I).getParent()); + const Loop *L = LI.getLoopFor(I.getParent()); const SCEV *AtUse = SE.getSCEVAtScope(SV, L); if (AtUse != SV) { @@ -8205,9 +9187,9 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { } OS << "Determining loop execution counts for: "; - F->printAsOperand(OS, /*PrintType=*/false); + F.printAsOperand(OS, /*PrintType=*/false); OS << "\n"; - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + for (LoopInfo::iterator I = LI.begin(), E = LI.end(); I != E; ++I) PrintLoopInfo(OS, &SE, *I); } @@ -8260,9 +9242,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { // This recurrence is variant w.r.t. L if any of its operands // are variant. - for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); - I != E; ++I) - if (!isLoopInvariant(*I, L)) + for (auto *Op : AR->operands()) + if (!isLoopInvariant(Op, L)) return LoopVariant; // Otherwise it's loop-invariant. @@ -8272,11 +9253,9 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { case scMulExpr: case scUMaxExpr: case scSMaxExpr: { - const SCEVNAryExpr *NAry = cast(S); bool HasVarying = false; - for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); - I != E; ++I) { - LoopDisposition D = getLoopDisposition(*I, L); + for (auto *Op : cast(S)->operands()) { + LoopDisposition D = getLoopDisposition(Op, L); if (D == LoopVariant) return LoopVariant; if (D == LoopComputable) @@ -8300,7 +9279,7 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { // invariant if they are not contained in the specified loop. // Instructions are never considered invariant in the function body // (null loop) because they are defined within the "loop". - if (Instruction *I = dyn_cast(cast(S)->getValue())) + if (auto *I = dyn_cast(cast(S)->getValue())) return (L && !L->contains(I)) ? LoopInvariant : LoopVariant; return LoopInvariant; case scCouldNotCompute: @@ -8351,7 +9330,7 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { // produces the addrec's value is a PHI, and a PHI effectively properly // dominates its entire containing block. const SCEVAddRecExpr *AR = cast(S); - if (!DT->dominates(AR->getLoop()->getHeader(), BB)) + if (!DT.dominates(AR->getLoop()->getHeader(), BB)) return DoesNotDominateBlock; } // FALL THROUGH into SCEVNAryExpr handling. @@ -8361,9 +9340,8 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { case scSMaxExpr: { const SCEVNAryExpr *NAry = cast(S); bool Proper = true; - for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); - I != E; ++I) { - BlockDisposition D = getBlockDisposition(*I, BB); + for (const SCEV *NAryOp : NAry->operands()) { + BlockDisposition D = getBlockDisposition(NAryOp, BB); if (D == DoesNotDominateBlock) return DoesNotDominateBlock; if (D == DominatesBlock) @@ -8388,7 +9366,7 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { dyn_cast(cast(S)->getValue())) { if (I->getParent() == BB) return DominatesBlock; - if (DT->properlyDominates(I->getParent(), BB)) + if (DT.properlyDominates(I->getParent(), BB)) return ProperlyDominatesBlock; return DoesNotDominateBlock; } @@ -8407,24 +9385,22 @@ bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) { return getBlockDisposition(S, BB) == ProperlyDominatesBlock; } -namespace { -// Search for a SCEV expression node within an expression tree. -// Implements SCEVTraversal::Visitor. -struct SCEVSearch { - const SCEV *Node; - bool IsFound; - - SCEVSearch(const SCEV *N): Node(N), IsFound(false) {} - - bool follow(const SCEV *S) { - IsFound |= (S == Node); - return !IsFound; - } - bool isDone() const { return IsFound; } -}; -} - bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { + // Search for a SCEV expression node within an expression tree. + // Implements SCEVTraversal::Visitor. + struct SCEVSearch { + const SCEV *Node; + bool IsFound; + + SCEVSearch(const SCEV *N): Node(N), IsFound(false) {} + + bool follow(const SCEV *S) { + IsFound |= (S == Node); + return !IsFound; + } + bool isDone() const { return IsFound; } + }; + SCEVSearch Search(Op); visitAll(S, Search); return Search.IsFound; @@ -8463,43 +9439,39 @@ static void replaceSubString(std::string &Str, StringRef From, StringRef To) { /// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis. static void getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) { - for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) { - getLoopBackedgeTakenCounts(*I, Map, SE); // recurse. + std::string &S = Map[L]; + if (S.empty()) { + raw_string_ostream OS(S); + SE.getBackedgeTakenCount(L)->print(OS); - std::string &S = Map[L]; - if (S.empty()) { - raw_string_ostream OS(S); - SE.getBackedgeTakenCount(L)->print(OS); - - // false and 0 are semantically equivalent. This can happen in dead loops. - replaceSubString(OS.str(), "false", "0"); - // Remove wrap flags, their use in SCEV is highly fragile. - // FIXME: Remove this when SCEV gets smarter about them. - replaceSubString(OS.str(), "", ""); - replaceSubString(OS.str(), "", ""); - replaceSubString(OS.str(), "", ""); - } + // false and 0 are semantically equivalent. This can happen in dead loops. + replaceSubString(OS.str(), "false", "0"); + // Remove wrap flags, their use in SCEV is highly fragile. + // FIXME: Remove this when SCEV gets smarter about them. + replaceSubString(OS.str(), "", ""); + replaceSubString(OS.str(), "", ""); + replaceSubString(OS.str(), "", ""); } + + for (auto *R : reverse(*L)) + getLoopBackedgeTakenCounts(R, Map, SE); // recurse. } -void ScalarEvolution::verifyAnalysis() const { - if (!VerifySCEV) - return; - +void ScalarEvolution::verify() const { ScalarEvolution &SE = *const_cast(this); // Gather stringified backedge taken counts for all loops using SCEV's caches. // FIXME: It would be much better to store actual values instead of strings, // but SCEV pointers will change if we drop the caches. VerifyMap BackedgeDumpsOld, BackedgeDumpsNew; - for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) + for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I) getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE); - // Gather stringified backedge taken counts for all loops without using - // SCEV's caches. - SE.releaseMemory(); - for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) - getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE); + // Gather stringified backedge taken counts for all loops using a fresh + // ScalarEvolution object. + ScalarEvolution SE2(F, TLI, AC, DT, LI); + for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I) + getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2); // Now compare whether they're the same with and without caches. This allows // verifying that no pass changed the cache. @@ -8532,3 +9504,238 @@ void ScalarEvolution::verifyAnalysis() const { // TODO: Verify more things. } + +char ScalarEvolutionAnalysis::PassID; + +ScalarEvolution ScalarEvolutionAnalysis::run(Function &F, + AnalysisManager *AM) { + return ScalarEvolution(F, AM->getResult(F), + AM->getResult(F), + AM->getResult(F), + AM->getResult(F)); +} + +PreservedAnalyses +ScalarEvolutionPrinterPass::run(Function &F, AnalysisManager *AM) { + AM->getResult(F).print(OS); + return PreservedAnalyses::all(); +} + +INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution", + "Scalar Evolution Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution", + "Scalar Evolution Analysis", false, true) +char ScalarEvolutionWrapperPass::ID = 0; + +ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) { + initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) { + SE.reset(new ScalarEvolution( + F, getAnalysis().getTLI(), + getAnalysis().getAssumptionCache(F), + getAnalysis().getDomTree(), + getAnalysis().getLoopInfo())); + return false; +} + +void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); } + +void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const { + SE->print(OS); +} + +void ScalarEvolutionWrapperPass::verifyAnalysis() const { + if (!VerifySCEV) + return; + + SE->verify(); +} + +void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); +} + +const SCEVPredicate * +ScalarEvolution::getEqualPredicate(const SCEVUnknown *LHS, + const SCEVConstant *RHS) { + FoldingSetNodeID ID; + // Unique this node based on the arguments + ID.AddInteger(SCEVPredicate::P_Equal); + ID.AddPointer(LHS); + ID.AddPointer(RHS); + void *IP = nullptr; + if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP)) + return S; + SCEVEqualPredicate *Eq = new (SCEVAllocator) + SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS); + UniquePreds.InsertNode(Eq, IP); + return Eq; +} + +namespace { +class SCEVPredicateRewriter : public SCEVRewriteVisitor { +public: + static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, + SCEVUnionPredicate &A) { + SCEVPredicateRewriter Rewriter(SE, A); + return Rewriter.visit(Scev); + } + + SCEVPredicateRewriter(ScalarEvolution &SE, SCEVUnionPredicate &P) + : SCEVRewriteVisitor(SE), P(P) {} + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + auto ExprPreds = P.getPredicatesForExpr(Expr); + for (auto *Pred : ExprPreds) + if (const auto *IPred = dyn_cast(Pred)) + if (IPred->getLHS() == Expr) + return IPred->getRHS(); + + return Expr; + } + +private: + SCEVUnionPredicate &P; +}; +} // end anonymous namespace + +const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *Scev, + SCEVUnionPredicate &Preds) { + return SCEVPredicateRewriter::rewrite(Scev, *this, Preds); +} + +/// SCEV predicates +SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID, + SCEVPredicateKind Kind) + : FastID(ID), Kind(Kind) {} + +SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID, + const SCEVUnknown *LHS, + const SCEVConstant *RHS) + : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {} + +bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const { + const auto *Op = dyn_cast(N); + + if (!Op) + return false; + + return Op->LHS == LHS && Op->RHS == RHS; +} + +bool SCEVEqualPredicate::isAlwaysTrue() const { return false; } + +const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; } + +void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const { + OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n"; +} + +/// Union predicates don't get cached so create a dummy set ID for it. +SCEVUnionPredicate::SCEVUnionPredicate() + : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {} + +bool SCEVUnionPredicate::isAlwaysTrue() const { + return all_of(Preds, + [](const SCEVPredicate *I) { return I->isAlwaysTrue(); }); +} + +ArrayRef +SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) { + auto I = SCEVToPreds.find(Expr); + if (I == SCEVToPreds.end()) + return ArrayRef(); + return I->second; +} + +bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const { + if (const auto *Set = dyn_cast(N)) + return all_of(Set->Preds, + [this](const SCEVPredicate *I) { return this->implies(I); }); + + auto ScevPredsIt = SCEVToPreds.find(N->getExpr()); + if (ScevPredsIt == SCEVToPreds.end()) + return false; + auto &SCEVPreds = ScevPredsIt->second; + + return any_of(SCEVPreds, + [N](const SCEVPredicate *I) { return I->implies(N); }); +} + +const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; } + +void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const { + for (auto Pred : Preds) + Pred->print(OS, Depth); +} + +void SCEVUnionPredicate::add(const SCEVPredicate *N) { + if (const auto *Set = dyn_cast(N)) { + for (auto Pred : Set->Preds) + add(Pred); + return; + } + + if (implies(N)) + return; + + const SCEV *Key = N->getExpr(); + assert(Key && "Only SCEVUnionPredicate doesn't have an " + " associated expression!"); + + SCEVToPreds[Key].push_back(N); + Preds.push_back(N); +} + +PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE) + : SE(SE), Generation(0) {} + +const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { + const SCEV *Expr = SE.getSCEV(V); + RewriteEntry &Entry = RewriteMap[Expr]; + + // If we already have an entry and the version matches, return it. + if (Entry.second && Generation == Entry.first) + return Entry.second; + + // We found an entry but it's stale. Rewrite the stale entry + // acording to the current predicate. + if (Entry.second) + Expr = Entry.second; + + const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, Preds); + Entry = {Generation, NewSCEV}; + + return NewSCEV; +} + +void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { + if (Preds.implies(&Pred)) + return; + Preds.add(&Pred); + updateGeneration(); +} + +const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const { + return Preds; +} + +void PredicatedScalarEvolution::updateGeneration() { + // If the generation number wrapped recompute everything. + if (++Generation == 0) { + for (auto &II : RewriteMap) { + const SCEV *Rewritten = II.second.second; + II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, Preds)}; + } + } +} diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 6bc0d85a61f9..2e50c80c4e73 100644 --- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -19,125 +19,42 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; -namespace { - /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis - /// implementation that uses ScalarEvolution to answer queries. - class ScalarEvolutionAliasAnalysis : public FunctionPass, - public AliasAnalysis { - ScalarEvolution *SE; - - public: - static char ID; // Class identification, replacement for typeinfo - ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(nullptr) { - initializeScalarEvolutionAliasAnalysisPass( - *PassRegistry::getPassRegistry()); - } - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(AnalysisID PI) override { - if (PI == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } - - private: - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnFunction(Function &F) override; - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override; - - Value *GetBaseValue(const SCEV *S); - }; -} // End of anonymous namespace - -// Register this pass... -char ScalarEvolutionAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS_BEGIN(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa", - "ScalarEvolution-based Alias Analysis", false, true, false) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) -INITIALIZE_AG_PASS_END(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa", - "ScalarEvolution-based Alias Analysis", false, true, false) - -FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() { - return new ScalarEvolutionAliasAnalysis(); -} - -void -ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequiredTransitive(); - AU.setPreservesAll(); - AliasAnalysis::getAnalysisUsage(AU); -} - -bool -ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) { - InitializeAliasAnalysis(this, &F.getParent()->getDataLayout()); - SE = &getAnalysis(); - return false; -} - -/// GetBaseValue - Given an expression, try to find a -/// base value. Return null is none was found. -Value * -ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) { - if (const SCEVAddRecExpr *AR = dyn_cast(S)) { - // In an addrec, assume that the base will be in the start, rather - // than the step. - return GetBaseValue(AR->getStart()); - } else if (const SCEVAddExpr *A = dyn_cast(S)) { - // If there's a pointer operand, it'll be sorted at the end of the list. - const SCEV *Last = A->getOperand(A->getNumOperands()-1); - if (Last->getType()->isPointerTy()) - return GetBaseValue(Last); - } else if (const SCEVUnknown *U = dyn_cast(S)) { - // This is a leaf node. - return U->getValue(); - } - // No Identified object found. - return nullptr; -} - -AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { +AliasResult SCEVAAResult::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) { // If either of the memory references is empty, it doesn't matter what the // pointer values are. This allows the code below to ignore this special // case. if (LocA.Size == 0 || LocB.Size == 0) return NoAlias; - // This is ScalarEvolutionAliasAnalysis. Get the SCEVs! - const SCEV *AS = SE->getSCEV(const_cast(LocA.Ptr)); - const SCEV *BS = SE->getSCEV(const_cast(LocB.Ptr)); + // This is SCEVAAResult. Get the SCEVs! + const SCEV *AS = SE.getSCEV(const_cast(LocA.Ptr)); + const SCEV *BS = SE.getSCEV(const_cast(LocB.Ptr)); // If they evaluate to the same expression, it's a MustAlias. - if (AS == BS) return MustAlias; + if (AS == BS) + return MustAlias; // If something is known about the difference between the two addresses, // see if it's enough to prove a NoAlias. - if (SE->getEffectiveSCEVType(AS->getType()) == - SE->getEffectiveSCEVType(BS->getType())) { - unsigned BitWidth = SE->getTypeSizeInBits(AS->getType()); + if (SE.getEffectiveSCEVType(AS->getType()) == + SE.getEffectiveSCEVType(BS->getType())) { + unsigned BitWidth = SE.getTypeSizeInBits(AS->getType()); APInt ASizeInt(BitWidth, LocA.Size); APInt BSizeInt(BitWidth, LocB.Size); // Compute the difference between the two pointers. - const SCEV *BA = SE->getMinusSCEV(BS, AS); + const SCEV *BA = SE.getMinusSCEV(BS, AS); // Test whether the difference is known to be great enough that memory of // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt // are non-zero, which is special-cased above. - if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) && - (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax())) + if (ASizeInt.ule(SE.getUnsignedRange(BA).getUnsignedMin()) && + (-BSizeInt).uge(SE.getUnsignedRange(BA).getUnsignedMax())) return NoAlias; // Folding the subtraction while preserving range information can be tricky @@ -145,13 +62,13 @@ AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA, // and try again to see if things fold better that way. // Compute the difference between the two pointers. - const SCEV *AB = SE->getMinusSCEV(AS, BS); + const SCEV *AB = SE.getMinusSCEV(AS, BS); // Test whether the difference is known to be great enough that memory of // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt // are non-zero, which is special-cased above. - if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) && - (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax())) + if (BSizeInt.ule(SE.getUnsignedRange(AB).getUnsignedMin()) && + (-ASizeInt).uge(SE.getUnsignedRange(AB).getUnsignedMax())) return NoAlias; } @@ -170,5 +87,62 @@ AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA, return NoAlias; // Forward the query to the next analysis. - return AliasAnalysis::alias(LocA, LocB); + return AAResultBase::alias(LocA, LocB); +} + +/// Given an expression, try to find a base value. +/// +/// Returns null if none was found. +Value *SCEVAAResult::GetBaseValue(const SCEV *S) { + if (const SCEVAddRecExpr *AR = dyn_cast(S)) { + // In an addrec, assume that the base will be in the start, rather + // than the step. + return GetBaseValue(AR->getStart()); + } else if (const SCEVAddExpr *A = dyn_cast(S)) { + // If there's a pointer operand, it'll be sorted at the end of the list. + const SCEV *Last = A->getOperand(A->getNumOperands() - 1); + if (Last->getType()->isPointerTy()) + return GetBaseValue(Last); + } else if (const SCEVUnknown *U = dyn_cast(S)) { + // This is a leaf node. + return U->getValue(); + } + // No Identified object found. + return nullptr; +} + +SCEVAAResult SCEVAA::run(Function &F, AnalysisManager *AM) { + return SCEVAAResult(AM->getResult(F), + AM->getResult(F)); +} + +char SCEVAA::PassID; + +char SCEVAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(SCEVAAWrapperPass, "scev-aa", + "ScalarEvolution-based Alias Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(SCEVAAWrapperPass, "scev-aa", + "ScalarEvolution-based Alias Analysis", false, true) + +FunctionPass *llvm::createSCEVAAWrapperPass() { + return new SCEVAAWrapperPass(); +} + +SCEVAAWrapperPass::SCEVAAWrapperPass() : FunctionPass(ID) { + initializeSCEVAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool SCEVAAWrapperPass::runOnFunction(Function &F) { + Result.reset( + new SCEVAAResult(getAnalysis().getTLI(), + getAnalysis().getSE())); + return false; +} + +void SCEVAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); } diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index fee2a2d0d183..921403ddc0fd 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -63,7 +63,7 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, // Create a new cast, and leave the old cast in place in case // it is being used as an insert point. Clear its operand // so that it doesn't hold anything live. - Ret = CastInst::Create(Op, V, Ty, "", IP); + Ret = CastInst::Create(Op, V, Ty, "", &*IP); Ret->takeName(CI); CI->replaceAllUsesWith(Ret); CI->setOperand(0, UndefValue::get(V->getType())); @@ -75,17 +75,39 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, // Create a new cast. if (!Ret) - Ret = CastInst::Create(Op, V, Ty, V->getName(), IP); + Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP); // We assert at the end of the function since IP might point to an // instruction with different dominance properties than a cast // (an invoke for example) and not dominate BIP (but the cast does). - assert(SE.DT->dominates(Ret, BIP)); + assert(SE.DT.dominates(Ret, &*BIP)); rememberInstruction(Ret); return Ret; } +static BasicBlock::iterator findInsertPointAfter(Instruction *I, + BasicBlock *MustDominate) { + BasicBlock::iterator IP = ++I->getIterator(); + if (auto *II = dyn_cast(I)) + IP = II->getNormalDest()->begin(); + + while (isa(IP)) + ++IP; + + while (IP->isEHPad()) { + if (isa(IP) || isa(IP)) { + ++IP; + } else if (isa(IP)) { + IP = MustDominate->getFirstInsertionPt(); + } else { + llvm_unreachable("unexpected eh pad!"); + } + } + + return IP; +} + /// InsertNoopCastOfTo - Insert a cast of V to the specified type, /// which must be possible with a noop cast, doing what we can to share /// the casts. @@ -135,19 +157,14 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { while ((isa(IP) && isa(cast(IP)->getOperand(0)) && cast(IP)->getOperand(0) != A) || - isa(IP) || - isa(IP)) + isa(IP)) ++IP; return ReuseOrCreateCast(A, Ty, Op, IP); } // Cast the instruction immediately after the instruction. Instruction *I = cast(V); - BasicBlock::iterator IP = I; ++IP; - if (InvokeInst *II = dyn_cast(I)) - IP = II->getNormalDest()->begin(); - while (isa(IP) || isa(IP)) - ++IP; + BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock()); return ReuseOrCreateCast(I, Ty, Op, IP); } @@ -174,7 +191,7 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, ScanLimit++; if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && IP->getOperand(1) == RHS) - return IP; + return &*IP; if (IP == BlockBegin) break; } } @@ -184,13 +201,13 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, BuilderType::InsertPointGuard Guard(Builder); // Move the insertion point out of as many loops as we can. - while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) break; // Ok, move up a level. - Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + Builder.SetInsertPoint(Preheader->getTerminator()); } // If we haven't found this binop, insert it. @@ -229,19 +246,15 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, // Check for divisibility. if (const SCEVConstant *FC = dyn_cast(Factor)) { ConstantInt *CI = - ConstantInt::get(SE.getContext(), - C->getValue()->getValue().sdiv( - FC->getValue()->getValue())); + ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt())); // If the quotient is zero and the remainder is non-zero, reject // the value at this scale. It will be considered for subsequent // smaller scales. if (!CI->isZero()) { const SCEV *Div = SE.getConstant(CI); S = Div; - Remainder = - SE.getAddExpr(Remainder, - SE.getConstant(C->getValue()->getValue().srem( - FC->getValue()->getValue()))); + Remainder = SE.getAddExpr( + Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt()))); return true; } } @@ -254,10 +267,9 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, // of the given factor. If so, we can factor it. const SCEVConstant *FC = cast(Factor); if (const SCEVConstant *C = dyn_cast(M->getOperand(0))) - if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { + if (!C->getAPInt().srem(FC->getAPInt())) { SmallVector NewMulOps(M->op_begin(), M->op_end()); - NewMulOps[0] = SE.getConstant( - C->getValue()->getValue().sdiv(FC->getValue()->getValue())); + NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt())); S = SE.getMulExpr(NewMulOps); return true; } @@ -402,8 +414,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy); if (!ElSize->isZero()) { SmallVector NewOps; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - const SCEV *Op = Ops[i]; + for (const SCEV *Op : Ops) { const SCEV *Remainder = SE.getConstant(Ty, 0); if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) { // Op now has ElSize factored out. @@ -414,7 +425,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } else { // The operand was not divisible, so add it to the list of operands // we'll scan next iteration. - NewOps.push_back(Ops[i]); + NewOps.push_back(Op); } } // If we made any changes, update Ops. @@ -483,7 +494,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); assert(!isa(V) || - SE.DT->dominates(cast(V), Builder.GetInsertPoint())); + SE.DT.dominates(cast(V), &*Builder.GetInsertPoint())); // Expand the operands for a plain byte offset. Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); @@ -508,7 +519,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, ScanLimit++; if (IP->getOpcode() == Instruction::GetElementPtr && IP->getOperand(0) == V && IP->getOperand(1) == Idx) - return IP; + return &*IP; if (IP == BlockBegin) break; } } @@ -517,13 +528,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, BuilderType::InsertPointGuard Guard(Builder); // Move the insertion point out of as many loops as we can. - while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break; BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) break; // Ok, move up a level. - Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + Builder.SetInsertPoint(Preheader->getTerminator()); } // Emit a GEP. @@ -537,16 +548,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, BuilderType::InsertPoint SaveInsertPt = Builder.saveIP(); // Move the insertion point out of as many loops as we can. - while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { if (!L->isLoopInvariant(V)) break; - bool AnyIndexNotLoopInvariant = false; - for (SmallVectorImpl::const_iterator I = GepIndices.begin(), - E = GepIndices.end(); I != E; ++I) - if (!L->isLoopInvariant(*I)) { - AnyIndexNotLoopInvariant = true; - break; - } + bool AnyIndexNotLoopInvariant = + std::any_of(GepIndices.begin(), GepIndices.end(), + [L](Value *Op) { return !L->isLoopInvariant(Op); }); + if (AnyIndexNotLoopInvariant) break; @@ -554,7 +562,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, if (!Preheader) break; // Ok, move up a level. - Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + Builder.SetInsertPoint(Preheader->getTerminator()); } // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, @@ -563,9 +571,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, Value *Casted = V; if (V->getType() != PTy) Casted = InsertNoopCastOfTo(Casted, PTy); - Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, - GepIndices, - "scevgep"); + Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep"); Ops.push_back(SE.getUnknown(GEP)); rememberInstruction(GEP); @@ -593,8 +599,7 @@ static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, /// expression, according to PickMostRelevantLoop. const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { // Test whether we've already computed the most relevant loop for this SCEV. - std::pair::iterator, bool> Pair = - RelevantLoops.insert(std::make_pair(S, nullptr)); + auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr)); if (!Pair.second) return Pair.first->second; @@ -603,7 +608,7 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { return nullptr; if (const SCEVUnknown *U = dyn_cast(S)) { if (const Instruction *I = dyn_cast(U->getValue())) - return Pair.first->second = SE.LI->getLoopFor(I->getParent()); + return Pair.first->second = SE.LI.getLoopFor(I->getParent()); // A non-instruction has no relevant loops. return nullptr; } @@ -611,9 +616,8 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { const Loop *L = nullptr; if (const SCEVAddRecExpr *AR = dyn_cast(S)) L = AR->getLoop(); - for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end(); - I != E; ++I) - L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT); + for (const SCEV *Op : N->operands()) + L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT); return RelevantLoops[N] = L; } if (const SCEVCastExpr *C = dyn_cast(S)) { @@ -621,10 +625,8 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { return RelevantLoops[C] = Result; } if (const SCEVUDivExpr *D = dyn_cast(S)) { - const Loop *Result = - PickMostRelevantLoop(getRelevantLoop(D->getLHS()), - getRelevantLoop(D->getRHS()), - *SE.DT); + const Loop *Result = PickMostRelevantLoop( + getRelevantLoop(D->getLHS()), getRelevantLoop(D->getRHS()), SE.DT); return RelevantLoops[D] = Result; } llvm_unreachable("Unexpected SCEV type!"); @@ -679,13 +681,12 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // Sort by loop. Use a stable sort so that constants follow non-constants and // pointer operands precede non-pointer operands. - std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT)); // Emit instructions to add all the operands. Hoist as much as possible // out of loops, and form meaningful getelementptrs where possible. Value *Sum = nullptr; - for (SmallVectorImpl >::iterator - I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { + for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) { const Loop *CurLoop = I->first; const SCEV *Op = I->second; if (!Sum) { @@ -747,14 +748,13 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); // Sort by loop. Use a stable sort so that constants follow non-constants. - std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT)); // Emit instructions to mul all the operands. Hoist as much as possible // out of loops. Value *Prod = nullptr; - for (SmallVectorImpl >::iterator - I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ++I) { - const SCEV *Op = I->second; + for (const auto &I : OpsAndLoops) { + const SCEV *Op = I.second; if (!Prod) { // This is the first operand. Just expand it. Prod = expand(Op); @@ -788,7 +788,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { Value *LHS = expandCodeFor(S->getLHS(), Ty); if (const SCEVConstant *SC = dyn_cast(S->getRHS())) { - const APInt &RHS = SC->getValue()->getValue(); + const APInt &RHS = SC->getAPInt(); if (RHS.isPowerOf2()) return InsertBinop(Instruction::LShr, LHS, ConstantInt::get(Ty, RHS.logBase2())); @@ -834,7 +834,7 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, for (User::op_iterator OI = IncV->op_begin()+1, OE = IncV->op_end(); OI != OE; ++OI) if (Instruction *OInst = dyn_cast(OI)) - if (!SE.DT->dominates(OInst, IVIncInsertPos)) + if (!SE.DT.dominates(OInst, IVIncInsertPos)) return false; } // Advance to the next instruction. @@ -873,19 +873,18 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, case Instruction::Add: case Instruction::Sub: { Instruction *OInst = dyn_cast(IncV->getOperand(1)); - if (!OInst || SE.DT->dominates(OInst, InsertPos)) + if (!OInst || SE.DT.dominates(OInst, InsertPos)) return dyn_cast(IncV->getOperand(0)); return nullptr; } case Instruction::BitCast: return dyn_cast(IncV->getOperand(0)); case Instruction::GetElementPtr: - for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end(); - I != E; ++I) { + for (auto I = IncV->op_begin() + 1, E = IncV->op_end(); I != E; ++I) { if (isa(*I)) continue; if (Instruction *OInst = dyn_cast(*I)) { - if (!SE.DT->dominates(OInst, InsertPos)) + if (!SE.DT.dominates(OInst, InsertPos)) return nullptr; } if (allowScale) { @@ -912,13 +911,16 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, /// it available to other uses in this loop. Recursively hoist any operands, /// until we reach a value that dominates InsertPos. bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { - if (SE.DT->dominates(IncV, InsertPos)) + if (SE.DT.dominates(IncV, InsertPos)) return true; // InsertPos must itself dominate IncV so that IncV's new position satisfies // its existing users. - if (isa(InsertPos) - || !SE.DT->dominates(InsertPos->getParent(), IncV->getParent())) + if (isa(InsertPos) || + !SE.DT.dominates(InsertPos->getParent(), IncV->getParent())) + return false; + + if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos)) return false; // Check that the chain of IV operands leading back to Phi can be hoisted. @@ -930,11 +932,10 @@ bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { // IncV is safe to hoist. IVIncs.push_back(IncV); IncV = Oper; - if (SE.DT->dominates(IncV, InsertPos)) + if (SE.DT.dominates(IncV, InsertPos)) break; } - for (SmallVectorImpl::reverse_iterator I = IVIncs.rbegin(), - E = IVIncs.rend(); I != E; ++I) { + for (auto I = IVIncs.rbegin(), E = IVIncs.rend(); I != E; ++I) { (*I)->moveBefore(InsertPos); } return true; @@ -1002,7 +1003,7 @@ static void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, } /// \brief Check whether we can cheaply express the requested SCEV in terms of -/// the available PHI SCEV by truncation and/or invertion of the step. +/// the available PHI SCEV by truncation and/or inversion of the step. static bool canBeCheaplyTransformed(ScalarEvolution &SE, const SCEVAddRecExpr *Phi, const SCEVAddRecExpr *Requested, @@ -1084,12 +1085,13 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Only try partially matching scevs that need truncation and/or // step-inversion if we know this loop is outside the current loop. - bool TryNonMatchingSCEV = IVIncInsertLoop && - SE.DT->properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); + bool TryNonMatchingSCEV = + IVIncInsertLoop && + SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); - for (BasicBlock::iterator I = L->getHeader()->begin(); - PHINode *PN = dyn_cast(I); ++I) { - if (!SE.isSCEVable(PN->getType())) + for (auto &I : *L->getHeader()) { + auto *PN = dyn_cast(&I); + if (!PN || !SE.isSCEVable(PN->getType())) continue; const SCEVAddRecExpr *PhiSCEV = dyn_cast(SE.getSCEV(PN)); @@ -1142,7 +1144,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Potentially, move the increment. We have made sure in // isExpandedAddRecExprPHI or hoistIVInc that this is possible. if (L == IVIncInsertLoop) - hoistBeforePos(SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch); + hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch); // Ok, the add recurrence looks usable. // Remember this PHI, even in post-inc mode. @@ -1167,13 +1169,13 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, PostIncLoops.clear(); // Expand code for the start value. - Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, - L->getHeader()->begin()); + Value *StartV = + expandCodeFor(Normalized->getStart(), ExpandTy, &L->getHeader()->front()); // StartV must be hoisted into L's preheader to dominate the new phi. assert(!isa(StartV) || - SE.DT->properlyDominates(cast(StartV)->getParent(), - L->getHeader())); + SE.DT.properlyDominates(cast(StartV)->getParent(), + L->getHeader())); // Expand code for the step value. Do this before creating the PHI so that PHI // reuse code doesn't see an incomplete PHI. @@ -1185,7 +1187,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, if (useSubtract) Step = SE.getNegativeSCEV(Step); // Expand the step somewhere that dominates the loop header. - Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front()); // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if // we actually do emit an addition. It does not apply if we emit a @@ -1249,9 +1251,8 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { if (PostIncLoops.count(L)) { PostIncLoopSet Loops; Loops.insert(L); - Normalized = - cast(TransformForPostIncUse(Normalize, S, nullptr, - nullptr, Loops, SE, *SE.DT)); + Normalized = cast(TransformForPostIncUse( + Normalize, S, nullptr, nullptr, Loops, SE, SE.DT)); } // Strip off any non-loop-dominating component from the addrec start. @@ -1301,9 +1302,9 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // For an expansion to use the postinc form, the client must call // expandCodeFor with an InsertPoint that is either outside the PostIncLoop // or dominated by IVIncInsertPos. - if (isa(Result) - && !SE.DT->dominates(cast(Result), - Builder.GetInsertPoint())) { + if (isa(Result) && + !SE.DT.dominates(cast(Result), + &*Builder.GetInsertPoint())) { // The induction variable's postinc expansion does not dominate this use. // IVUsers tries to prevent this case, so it is rare. However, it can // happen when an IVUser outside the loop is not dominated by the latch @@ -1321,7 +1322,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { { // Expand the step somewhere that dominates the loop header. BuilderType::InsertPointGuard Guard(Builder); - StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front()); } Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); } @@ -1395,13 +1396,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), S->getNoWrapFlags(SCEV::FlagNW))); BasicBlock::iterator NewInsertPt = - std::next(BasicBlock::iterator(cast(V))); - BuilderType::InsertPointGuard Guard(Builder); - while (isa(NewInsertPt) || isa(NewInsertPt) || - isa(NewInsertPt)) - ++NewInsertPt; + findInsertPointAfter(cast(V), Builder.GetInsertBlock()); V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, - NewInsertPt); + &*NewInsertPt); return V; } @@ -1442,7 +1439,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { BasicBlock *Header = L->getHeader(); pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", - Header->begin()); + &Header->front()); rememberInstruction(CanonicalIV); SmallSet PredSeen; @@ -1587,7 +1584,8 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, Instruction *IP) { - Builder.SetInsertPoint(IP->getParent(), IP); + assert(IP); + Builder.SetInsertPoint(IP); return expandCodeFor(SH, Ty); } @@ -1605,8 +1603,8 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { Value *SCEVExpander::expand(const SCEV *S) { // Compute an insertion point for this SCEV object. Hoist the instructions // as far out in the loop nest as possible. - Instruction *InsertPt = Builder.GetInsertPoint(); - for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ; + Instruction *InsertPt = &*Builder.GetInsertPoint(); + for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());; L = L->getParentLoop()) if (SE.isLoopInvariant(S, L)) { if (!L) break; @@ -1616,30 +1614,29 @@ Value *SCEVExpander::expand(const SCEV *S) { // LSR sets the insertion point for AddRec start/step values to the // block start to simplify value reuse, even though it's an invalid // position. SCEVExpander must correct for this in all cases. - InsertPt = L->getHeader()->getFirstInsertionPt(); + InsertPt = &*L->getHeader()->getFirstInsertionPt(); } } else { // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) - InsertPt = L->getHeader()->getFirstInsertionPt(); + InsertPt = &*L->getHeader()->getFirstInsertionPt(); while (InsertPt != Builder.GetInsertPoint() && (isInsertedInstruction(InsertPt) || isa(InsertPt))) { - InsertPt = std::next(BasicBlock::iterator(InsertPt)); + InsertPt = &*std::next(InsertPt->getIterator()); } break; } // Check to see if we already expanded this here. - std::map, TrackingVH >::iterator - I = InsertedExpressions.find(std::make_pair(S, InsertPt)); + auto I = InsertedExpressions.find(std::make_pair(S, InsertPt)); if (I != InsertedExpressions.end()) return I->second; BuilderType::InsertPointGuard Guard(Builder); - Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); + Builder.SetInsertPoint(InsertPt); // Expand the expression into instructions. Value *V = visit(S); @@ -1677,8 +1674,8 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, // Emit code for it. BuilderType::InsertPointGuard Guard(Builder); - PHINode *V = cast(expandCodeFor(H, nullptr, - L->getHeader()->begin())); + PHINode *V = + cast(expandCodeFor(H, nullptr, &L->getHeader()->front())); return V; } @@ -1694,10 +1691,13 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, const TargetTransformInfo *TTI) { // Find integer phis in order of increasing width. SmallVector Phis; - for (BasicBlock::iterator I = L->getHeader()->begin(); - PHINode *Phi = dyn_cast(I); ++I) { - Phis.push_back(Phi); + for (auto &I : *L->getHeader()) { + if (auto *PN = dyn_cast(&I)) + Phis.push_back(PN); + else + break; } + if (TTI) std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) { // Put pointers at the back and make sure pointer < pointer = false. @@ -1711,13 +1711,23 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, DenseMap ExprToIVMap; // Process phis from wide to narrow. Map wide phis to their truncation // so narrow phis can reuse them. - for (SmallVectorImpl::const_iterator PIter = Phis.begin(), - PEnd = Phis.end(); PIter != PEnd; ++PIter) { - PHINode *Phi = *PIter; + for (PHINode *Phi : Phis) { + auto SimplifyPHINode = [&](PHINode *PN) -> Value * { + if (Value *V = SimplifyInstruction(PN, DL, &SE.TLI, &SE.DT, &SE.AC)) + return V; + if (!SE.isSCEVable(PN->getType())) + return nullptr; + auto *Const = dyn_cast(SE.getSCEV(PN)); + if (!Const) + return nullptr; + return Const->getValue(); + }; // Fold constant phis. They may be congruent to other constant phis and // would confuse the logic below that expects proper IVs. - if (Value *V = SimplifyInstruction(Phi, DL, SE.TLI, SE.DT, SE.AC)) { + if (Value *V = SimplifyPHINode(Phi)) { + if (V->getType() != Phi->getType()) + continue; Phi->replaceAllUsesWith(V); DeadInsts.emplace_back(Phi); ++NumElim; @@ -1784,7 +1794,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, if (OrigInc->getType() != IsomorphicInc->getType()) { Instruction *IP = nullptr; if (PHINode *PN = dyn_cast(OrigInc)) - IP = PN->getParent()->getFirstInsertionPt(); + IP = &*PN->getParent()->getFirstInsertionPt(); else IP = OrigInc->getNextNode(); @@ -1802,7 +1812,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, ++NumElim; Value *NewIV = OrigPhiRef; if (OrigPhiRef->getType() != Phi->getType()) { - IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt()); + IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt()); Builder.SetCurrentDebugLocation(Phi->getDebugLoc()); NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); } @@ -1812,8 +1822,46 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, return NumElim; } +Value *SCEVExpander::findExistingExpansion(const SCEV *S, + const Instruction *At, Loop *L) { + using namespace llvm::PatternMatch; + + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + // Look for suitable value in simple conditions at the loop exits. + for (BasicBlock *BB : ExitingBlocks) { + ICmpInst::Predicate Pred; + Instruction *LHS, *RHS; + BasicBlock *TrueBB, *FalseBB; + + if (!match(BB->getTerminator(), + m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), + TrueBB, FalseBB))) + continue; + + if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) + return LHS; + + if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At)) + return RHS; + } + + // There is potential to make this significantly smarter, but this simple + // heuristic already gets some interesting cases. + + // Can not find suitable value. + return nullptr; +} + bool SCEVExpander::isHighCostExpansionHelper( - const SCEV *S, Loop *L, SmallPtrSetImpl &Processed) { + const SCEV *S, Loop *L, const Instruction *At, + SmallPtrSetImpl &Processed) { + + // If we can find an existing value for this scev avaliable at the point "At" + // then consider the expression cheap. + if (At && findExistingExpansion(S, At, L) != nullptr) + return false; // Zero/One operand expressions switch (S->getSCEVType()) { @@ -1821,14 +1869,14 @@ bool SCEVExpander::isHighCostExpansionHelper( case scConstant: return false; case scTruncate: - return isHighCostExpansionHelper(cast(S)->getOperand(), L, - Processed); + return isHighCostExpansionHelper(cast(S)->getOperand(), + L, At, Processed); case scZeroExtend: return isHighCostExpansionHelper(cast(S)->getOperand(), - L, Processed); + L, At, Processed); case scSignExtend: return isHighCostExpansionHelper(cast(S)->getOperand(), - L, Processed); + L, At, Processed); } if (!Processed.insert(S).second) @@ -1836,10 +1884,10 @@ bool SCEVExpander::isHighCostExpansionHelper( if (auto *UDivExpr = dyn_cast(S)) { // If the divisor is a power of two and the SCEV type fits in a native - // integer, consider the divison cheap irrespective of whether it occurs in + // integer, consider the division cheap irrespective of whether it occurs in // the user code since it can be lowered into a right shift. if (auto *SC = dyn_cast(UDivExpr->getRHS())) - if (SC->getValue()->getValue().isPowerOf2()) { + if (SC->getAPInt().isPowerOf2()) { const DataLayout &DL = L->getHeader()->getParent()->getParent()->getDataLayout(); unsigned Width = cast(UDivExpr->getType())->getBitWidth(); @@ -1855,22 +1903,14 @@ bool SCEVExpander::isHighCostExpansionHelper( if (!ExitingBB) return true; - BranchInst *ExitingBI = dyn_cast(ExitingBB->getTerminator()); - if (!ExitingBI || !ExitingBI->isConditional()) + // At the beginning of this function we already tried to find existing value + // for plain 'S'. Now try to lookup 'S + 1' since it is common pattern + // involving division. This is just a simple search heuristic. + if (!At) + At = &ExitingBB->back(); + if (!findExistingExpansion( + SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L)) return true; - - ICmpInst *OrigCond = dyn_cast(ExitingBI->getCondition()); - if (!OrigCond) - return true; - - const SCEV *RHS = SE.getSCEV(OrigCond->getOperand(1)); - RHS = SE.getMinusSCEV(RHS, SE.getConstant(RHS->getType(), 1)); - if (RHS != S) { - const SCEV *LHS = SE.getSCEV(OrigCond->getOperand(0)); - LHS = SE.getMinusSCEV(LHS, SE.getConstant(LHS->getType(), 1)); - if (LHS != S) - return true; - } } // HowManyLessThans uses a Max expression whenever the loop is not guarded by @@ -1882,11 +1922,9 @@ bool SCEVExpander::isHighCostExpansionHelper( // BackedgeTakenCount. They may already exist in program code, and if not, // they are not too expensive rematerialize. if (const SCEVNAryExpr *NAry = dyn_cast(S)) { - for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); - I != E; ++I) { - if (isHighCostExpansionHelper(*I, L, Processed)) + for (auto *Op : NAry->operands()) + if (isHighCostExpansionHelper(Op, L, At, Processed)) return true; - } } // If we haven't recognized an expensive SCEV pattern, assume it's an @@ -1894,6 +1932,43 @@ bool SCEVExpander::isHighCostExpansionHelper( return false; } +Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, + Instruction *IP) { + assert(IP); + switch (Pred->getKind()) { + case SCEVPredicate::P_Union: + return expandUnionPredicate(cast(Pred), IP); + case SCEVPredicate::P_Equal: + return expandEqualPredicate(cast(Pred), IP); + } + llvm_unreachable("Unknown SCEV predicate type"); +} + +Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred, + Instruction *IP) { + Value *Expr0 = expandCodeFor(Pred->getLHS(), Pred->getLHS()->getType(), IP); + Value *Expr1 = expandCodeFor(Pred->getRHS(), Pred->getRHS()->getType(), IP); + + Builder.SetInsertPoint(IP); + auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check"); + return I; +} + +Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union, + Instruction *IP) { + auto *BoolType = IntegerType::get(IP->getContext(), 1); + Value *Check = ConstantInt::getNullValue(BoolType); + + // Loop over all checks in this set. + for (auto Pred : Union->getPredicates()) { + auto *NextCheck = expandCodeForPredicate(Pred, IP); + Builder.SetInsertPoint(IP); + Check = Builder.CreateOr(Check, NextCheck); + } + + return Check; +} + namespace { // Search for a SCEV subexpression that is not safe to expand. Any expression // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index b238fe43cc60..b7fd5d506175 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -109,7 +109,7 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { SmallVector Operands; const Loop *L = AR->getLoop(); // The addrec conceptually uses its operands at loop entry. - Instruction *LUser = L->getHeader()->begin(); + Instruction *LUser = &L->getHeader()->front(); // Transform each operand. for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); I != E; ++I) { diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp index a5fca3e79b37..029997adab9e 100644 --- a/lib/Analysis/ScopedNoAliasAA.cpp +++ b/lib/Analysis/ScopedNoAliasAA.cpp @@ -32,22 +32,23 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" + using namespace llvm; // A handy option for disabling scoped no-alias functionality. The same effect // can also be achieved by stripping the associated metadata tags from IR, but // this option is sometimes more convenient. -static cl::opt -EnableScopedNoAlias("enable-scoped-noalias", cl::init(true)); +static cl::opt EnableScopedNoAlias("enable-scoped-noalias", + cl::init(true)); namespace { /// AliasScopeNode - This is a simple wrapper around an MDNode which provides @@ -57,7 +58,7 @@ class AliasScopeNode { const MDNode *Node; public: - AliasScopeNode() : Node(0) {} + AliasScopeNode() : Node(nullptr) {} explicit AliasScopeNode(const MDNode *N) : Node(N) {} /// getNode - Get the MDNode for this AliasScopeNode. @@ -70,79 +71,74 @@ public: return dyn_cast_or_null(Node->getOperand(1)); } }; +} // end of anonymous namespace -/// ScopedNoAliasAA - This is a simple alias analysis -/// implementation that uses scoped-noalias metadata to answer queries. -class ScopedNoAliasAA : public ImmutablePass, public AliasAnalysis { -public: - static char ID; // Class identification, replacement for typeinfo - ScopedNoAliasAA() : ImmutablePass(ID) { - initializeScopedNoAliasAAPass(*PassRegistry::getPassRegistry()); - } +AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) { + if (!EnableScopedNoAlias) + return AAResultBase::alias(LocA, LocB); - bool doInitialization(Module &M) override; + // Get the attached MDNodes. + const MDNode *AScopes = LocA.AATags.Scope, *BScopes = LocB.AATags.Scope; - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(const void *PI) override { - if (PI == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } + const MDNode *ANoAlias = LocA.AATags.NoAlias, *BNoAlias = LocB.AATags.NoAlias; -protected: - bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const; - void collectMDInDomain(const MDNode *List, const MDNode *Domain, - SmallPtrSetImpl &Nodes) const; + if (!mayAliasInScopes(AScopes, BNoAlias)) + return NoAlias; -private: - void getAnalysisUsage(AnalysisUsage &AU) const override; - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override; - bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) override; - ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override; - ModRefBehavior getModRefBehavior(const Function *F) override; - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override; - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override; -}; -} // End of anonymous namespace + if (!mayAliasInScopes(BScopes, ANoAlias)) + return NoAlias; -// Register this pass... -char ScopedNoAliasAA::ID = 0; -INITIALIZE_AG_PASS(ScopedNoAliasAA, AliasAnalysis, "scoped-noalias", - "Scoped NoAlias Alias Analysis", false, true, false) - -ImmutablePass *llvm::createScopedNoAliasAAPass() { - return new ScopedNoAliasAA(); + // If they may alias, chain to the next AliasAnalysis. + return AAResultBase::alias(LocA, LocB); } -bool ScopedNoAliasAA::doInitialization(Module &M) { - InitializeAliasAnalysis(this, &M.getDataLayout()); - return true; +ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) { + if (!EnableScopedNoAlias) + return AAResultBase::getModRefInfo(CS, Loc); + + if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata( + LLVMContext::MD_noalias))) + return MRI_NoModRef; + + if (!mayAliasInScopes( + CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), + Loc.AATags.NoAlias)) + return MRI_NoModRef; + + return AAResultBase::getModRefInfo(CS, Loc); } -void -ScopedNoAliasAA::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AliasAnalysis::getAnalysisUsage(AU); +ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + if (!EnableScopedNoAlias) + return AAResultBase::getModRefInfo(CS1, CS2); + + if (!mayAliasInScopes( + CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), + CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias))) + return MRI_NoModRef; + + if (!mayAliasInScopes( + CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), + CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias))) + return MRI_NoModRef; + + return AAResultBase::getModRefInfo(CS1, CS2); } -void -ScopedNoAliasAA::collectMDInDomain(const MDNode *List, const MDNode *Domain, - SmallPtrSetImpl &Nodes) const { +void ScopedNoAliasAAResult::collectMDInDomain( + const MDNode *List, const MDNode *Domain, + SmallPtrSetImpl &Nodes) const { for (unsigned i = 0, ie = List->getNumOperands(); i != ie; ++i) if (const MDNode *MD = dyn_cast(List->getOperand(i))) if (AliasScopeNode(MD).getDomain() == Domain) Nodes.insert(MD); } -bool -ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes, - const MDNode *NoAlias) const { +bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes, + const MDNode *NoAlias) const { if (!Scopes || !NoAlias) return true; @@ -177,76 +173,40 @@ ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes, return true; } -AliasResult ScopedNoAliasAA::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { - if (!EnableScopedNoAlias) - return AliasAnalysis::alias(LocA, LocB); - - // Get the attached MDNodes. - const MDNode *AScopes = LocA.AATags.Scope, - *BScopes = LocB.AATags.Scope; - - const MDNode *ANoAlias = LocA.AATags.NoAlias, - *BNoAlias = LocB.AATags.NoAlias; - - if (!mayAliasInScopes(AScopes, BNoAlias)) - return NoAlias; - - if (!mayAliasInScopes(BScopes, ANoAlias)) - return NoAlias; - - // If they may alias, chain to the next AliasAnalysis. - return AliasAnalysis::alias(LocA, LocB); +ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F, + AnalysisManager *AM) { + return ScopedNoAliasAAResult(AM->getResult(F)); } -bool ScopedNoAliasAA::pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) { - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); +char ScopedNoAliasAA::PassID; + +char ScopedNoAliasAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(ScopedNoAliasAAWrapperPass, "scoped-noalias", + "Scoped NoAlias Alias Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(ScopedNoAliasAAWrapperPass, "scoped-noalias", + "Scoped NoAlias Alias Analysis", false, true) + +ImmutablePass *llvm::createScopedNoAliasAAWrapperPass() { + return new ScopedNoAliasAAWrapperPass(); } -AliasAnalysis::ModRefBehavior -ScopedNoAliasAA::getModRefBehavior(ImmutableCallSite CS) { - return AliasAnalysis::getModRefBehavior(CS); +ScopedNoAliasAAWrapperPass::ScopedNoAliasAAWrapperPass() : ImmutablePass(ID) { + initializeScopedNoAliasAAWrapperPassPass(*PassRegistry::getPassRegistry()); } -AliasAnalysis::ModRefBehavior -ScopedNoAliasAA::getModRefBehavior(const Function *F) { - return AliasAnalysis::getModRefBehavior(F); +bool ScopedNoAliasAAWrapperPass::doInitialization(Module &M) { + Result.reset(new ScopedNoAliasAAResult( + getAnalysis().getTLI())); + return false; } -AliasAnalysis::ModRefResult -ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) { - if (!EnableScopedNoAlias) - return AliasAnalysis::getModRefInfo(CS, Loc); - - if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata( - LLVMContext::MD_noalias))) - return NoModRef; - - if (!mayAliasInScopes( - CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), - Loc.AATags.NoAlias)) - return NoModRef; - - return AliasAnalysis::getModRefInfo(CS, Loc); +bool ScopedNoAliasAAWrapperPass::doFinalization(Module &M) { + Result.reset(); + return false; } -AliasAnalysis::ModRefResult -ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { - if (!EnableScopedNoAlias) - return AliasAnalysis::getModRefInfo(CS1, CS2); - - if (!mayAliasInScopes( - CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), - CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias))) - return NoModRef; - - if (!mayAliasInScopes( - CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), - CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias))) - return NoModRef; - - return AliasAnalysis::getModRefInfo(CS1, CS2); +void ScopedNoAliasAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); } - diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp index edd82f5fe296..f5a927b80525 100644 --- a/lib/Analysis/SparsePropagation.cpp +++ b/lib/Analysis/SparsePropagation.cpp @@ -328,17 +328,17 @@ void SparseSolver::Solve(Function &F) { void SparseSolver::Print(Function &F, raw_ostream &OS) const { OS << "\nFUNCTION: " << F.getName() << "\n"; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (!BBExecutable.count(BB)) + for (auto &BB : F) { + if (!BBExecutable.count(&BB)) OS << "INFEASIBLE: "; OS << "\t"; - if (BB->hasName()) - OS << BB->getName() << ":\n"; + if (BB.hasName()) + OS << BB.getName() << ":\n"; else OS << "; anon bb\n"; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - LatticeFunc->PrintValue(getLatticeState(I), OS); - OS << *I << "\n"; + for (auto &I : BB) { + LatticeFunc->PrintValue(getLatticeState(&I), OS); + OS << I << "\n"; } OS << "\n"; diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp index 635c50ca6e53..e00f4aed07fc 100644 --- a/lib/Analysis/TargetLibraryInfo.cpp +++ b/lib/Analysis/TargetLibraryInfo.cpp @@ -61,10 +61,19 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, } #endif // !NDEBUG + if (T.getArch() == Triple::r600 || + T.getArch() == Triple::amdgcn) { + TLI.setUnavailable(LibFunc::ldexp); + TLI.setUnavailable(LibFunc::ldexpf); + TLI.setUnavailable(LibFunc::ldexpl); + } + // There are no library implementations of mempcy and memset for AMD gpus and // these can be difficult to lower in the backend. if (T.getArch() == Triple::r600 || - T.getArch() == Triple::amdgcn) { + T.getArch() == Triple::amdgcn || + T.getArch() == Triple::wasm32 || + T.getArch() == Triple::wasm64) { TLI.setUnavailable(LibFunc::memcpy); TLI.setUnavailable(LibFunc::memset); TLI.setUnavailable(LibFunc::memset_pattern16); @@ -72,13 +81,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, } // memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later. + // All versions of watchOS support it. if (T.isMacOSX()) { if (T.isMacOSXVersionLT(10, 5)) TLI.setUnavailable(LibFunc::memset_pattern16); } else if (T.isiOS()) { if (T.isOSVersionLT(3, 0)) TLI.setUnavailable(LibFunc::memset_pattern16); - } else { + } else if (!T.isWatchOS()) { TLI.setUnavailable(LibFunc::memset_pattern16); } @@ -286,8 +296,13 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, } break; case Triple::IOS: + case Triple::TvOS: + case Triple::WatchOS: TLI.setUnavailable(LibFunc::exp10l); - if (T.isOSVersionLT(7, 0)) { + if (!T.isWatchOS() && (T.isOSVersionLT(7, 0) || + (T.isOSVersionLT(9, 0) && + (T.getArch() == Triple::x86 || + T.getArch() == Triple::x86_64)))) { TLI.setUnavailable(LibFunc::exp10); TLI.setUnavailable(LibFunc::exp10f); } else { @@ -311,12 +326,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and // Linux (GLIBC): // http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/ffsl.3.html - // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsl.c + // http://svn.freebsd.org/base/head/lib/libc/string/ffsl.c // http://www.gnu.org/software/gnulib/manual/html_node/ffsl.html switch (T.getOS()) { case Triple::Darwin: case Triple::MacOSX: case Triple::IOS: + case Triple::TvOS: + case Triple::WatchOS: case Triple::FreeBSD: case Triple::Linux: break; @@ -325,9 +342,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, } // ffsll is available on at least FreeBSD and Linux (GLIBC): - // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsll.c + // http://svn.freebsd.org/base/head/lib/libc/string/ffsll.c // http://www.gnu.org/software/gnulib/manual/html_node/ffsll.html switch (T.getOS()) { + case Triple::Darwin: + case Triple::MacOSX: + case Triple::IOS: + case Triple::TvOS: + case Triple::WatchOS: case Triple::FreeBSD: case Triple::Linux: break; @@ -335,6 +357,16 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc::ffsll); } + // The following functions are available on at least FreeBSD: + // http://svn.freebsd.org/base/head/lib/libc/string/fls.c + // http://svn.freebsd.org/base/head/lib/libc/string/flsl.c + // http://svn.freebsd.org/base/head/lib/libc/string/flsll.c + if (!T.isOSFreeBSD()) { + TLI.setUnavailable(LibFunc::fls); + TLI.setUnavailable(LibFunc::flsl); + TLI.setUnavailable(LibFunc::flsll); + } + // The following functions are available on at least Linux: if (!T.isOSLinux()) { TLI.setUnavailable(LibFunc::dunder_strdup); diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 7d1c3fbef68a..9c1d3fd4f582 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -46,30 +46,37 @@ TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) { return *this; } -unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, - Type *OpTy) const { - return TTIImpl->getOperationCost(Opcode, Ty, OpTy); +int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, + Type *OpTy) const { + int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getCallCost(FunctionType *FTy, - int NumArgs) const { - return TTIImpl->getCallCost(FTy, NumArgs); +int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const { + int Cost = TTIImpl->getCallCost(FTy, NumArgs); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned -TargetTransformInfo::getCallCost(const Function *F, - ArrayRef Arguments) const { - return TTIImpl->getCallCost(F, Arguments); +int TargetTransformInfo::getCallCost(const Function *F, + ArrayRef Arguments) const { + int Cost = TTIImpl->getCallCost(F, Arguments); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned -TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments) const { - return TTIImpl->getIntrinsicCost(IID, RetTy, Arguments); +int TargetTransformInfo::getIntrinsicCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments) const { + int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getUserCost(const User *U) const { - return TTIImpl->getUserCost(U); +int TargetTransformInfo::getUserCost(const User *U) const { + int Cost = TTIImpl->getUserCost(U); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } bool TargetTransformInfo::hasBranchDivergence() const { @@ -106,14 +113,20 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, Scale, AddrSpace); } -bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, - int Consecutive) const { - return TTIImpl->isLegalMaskedStore(DataType, Consecutive); +bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const { + return TTIImpl->isLegalMaskedStore(DataType); } -bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType, - int Consecutive) const { - return TTIImpl->isLegalMaskedLoad(DataType, Consecutive); +bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const { + return TTIImpl->isLegalMaskedLoad(DataType); +} + +bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const { + return TTIImpl->isLegalMaskedGather(DataType); +} + +bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const { + return TTIImpl->isLegalMaskedGather(DataType); } int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, @@ -121,8 +134,10 @@ int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const { - return TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, - Scale, AddrSpace); + int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, + Scale, AddrSpace); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { @@ -153,6 +168,10 @@ bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) c return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } +bool TargetTransformInfo::enableInterleavedAccessVectorization() const { + return TTIImpl->enableInterleavedAccessVectorization(); +} + TargetTransformInfo::PopcntSupportKind TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { return TTIImpl->getPopcntSupport(IntTyWidthInBit); @@ -162,22 +181,30 @@ bool TargetTransformInfo::haveFastSqrt(Type *Ty) const { return TTIImpl->haveFastSqrt(Ty); } -unsigned TargetTransformInfo::getFPOpCost(Type *Ty) const { - return TTIImpl->getFPOpCost(Ty); +int TargetTransformInfo::getFPOpCost(Type *Ty) const { + int Cost = TTIImpl->getFPOpCost(Ty); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { - return TTIImpl->getIntImmCost(Imm, Ty); +int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { + int Cost = TTIImpl->getIntImmCost(Imm, Ty); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) const { - return TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty); +int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) const { + int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) const { - return TTIImpl->getIntImmCost(IID, Idx, Imm, Ty); +int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) const { + int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { @@ -192,81 +219,122 @@ unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const { return TTIImpl->getMaxInterleaveFactor(VF); } -unsigned TargetTransformInfo::getArithmeticInstrCost( +int TargetTransformInfo::getArithmeticInstrCost( unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo) const { - return TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo); + int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, - int Index, Type *SubTp) const { - return TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp); +int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index, + Type *SubTp) const { + int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { - return TTIImpl->getCastInstrCost(Opcode, Dst, Src); +int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { - return TTIImpl->getCFInstrCost(Opcode); +int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { + int Cost = TTIImpl->getCFInstrCost(Opcode); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const { - return TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy); +int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const { - return TTIImpl->getVectorInstrCost(Opcode, Val, Index); +int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { - return TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); +int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned -TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { - return TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); +int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + int Cost = + TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getInterleavedMemoryOpCost( +int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, + Value *Ptr, bool VariableMask, + unsigned Alignment) const { + int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, + Alignment); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace) const { - return TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace); + int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned -TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Tys) const { - return TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys); -} - -unsigned TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, +int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys) const { - return TTIImpl->getCallInstrCost(F, RetTy, Tys); + int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef Args) const { + int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, + ArrayRef Tys) const { + int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { return TTIImpl->getNumberOfParts(Tp); } -unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp, - bool IsComplex) const { - return TTIImpl->getAddressComputationCost(Tp, IsComplex); +int TargetTransformInfo::getAddressComputationCost(Type *Tp, + bool IsComplex) const { + int Cost = TTIImpl->getAddressComputationCost(Tp, IsComplex); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } -unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwiseForm) const { - return TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm); +int TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm) const { + int Cost = TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; } unsigned @@ -284,9 +352,9 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); } -bool TargetTransformInfo::hasCompatibleFunctionAttributes( - const Function *Caller, const Function *Callee) const { - return TTIImpl->hasCompatibleFunctionAttributes(Caller, Callee); +bool TargetTransformInfo::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + return TTIImpl->areInlineCompatible(Caller, Callee); } TargetTransformInfo::Concept::~Concept() {} @@ -294,16 +362,16 @@ TargetTransformInfo::Concept::~Concept() {} TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} TargetIRAnalysis::TargetIRAnalysis( - std::function TTICallback) + std::function TTICallback) : TTICallback(TTICallback) {} -TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) { +TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F) { return TTICallback(F); } char TargetIRAnalysis::PassID; -TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) { +TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) { return Result(F.getParent()->getDataLayout()); } @@ -327,7 +395,7 @@ TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass( *PassRegistry::getPassRegistry()); } -TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(Function &F) { +TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) { TTI = TIRA.run(F); return *TTI; } diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index 4e9c6f678ebd..805f3efb0814 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -121,15 +121,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/ADT/SetVector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" -#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/ADT/SetVector.h" using namespace llvm; // A handy option for disabling TBAA functionality. The same effect can also be @@ -138,199 +136,138 @@ using namespace llvm; static cl::opt EnableTBAA("enable-tbaa", cl::init(true)); namespace { - /// TBAANode - This is a simple wrapper around an MDNode which provides a - /// higher-level interface by hiding the details of how alias analysis - /// information is encoded in its operands. - class TBAANode { - const MDNode *Node; +/// TBAANode - This is a simple wrapper around an MDNode which provides a +/// higher-level interface by hiding the details of how alias analysis +/// information is encoded in its operands. +class TBAANode { + const MDNode *Node; - public: - TBAANode() : Node(nullptr) {} - explicit TBAANode(const MDNode *N) : Node(N) {} +public: + TBAANode() : Node(nullptr) {} + explicit TBAANode(const MDNode *N) : Node(N) {} - /// getNode - Get the MDNode for this TBAANode. - const MDNode *getNode() const { return Node; } + /// getNode - Get the MDNode for this TBAANode. + const MDNode *getNode() const { return Node; } - /// getParent - Get this TBAANode's Alias tree parent. - TBAANode getParent() const { - if (Node->getNumOperands() < 2) - return TBAANode(); - MDNode *P = dyn_cast_or_null(Node->getOperand(1)); - if (!P) - return TBAANode(); - // Ok, this node has a valid parent. Return it. - return TBAANode(P); - } + /// getParent - Get this TBAANode's Alias tree parent. + TBAANode getParent() const { + if (Node->getNumOperands() < 2) + return TBAANode(); + MDNode *P = dyn_cast_or_null(Node->getOperand(1)); + if (!P) + return TBAANode(); + // Ok, this node has a valid parent. Return it. + return TBAANode(P); + } - /// TypeIsImmutable - Test if this TBAANode represents a type for objects - /// which are not modified (by any means) in the context where this - /// AliasAnalysis is relevant. - bool TypeIsImmutable() const { - if (Node->getNumOperands() < 3) - return false; - ConstantInt *CI = mdconst::dyn_extract(Node->getOperand(2)); - if (!CI) - return false; - return CI->getValue()[0]; - } - }; + /// TypeIsImmutable - Test if this TBAANode represents a type for objects + /// which are not modified (by any means) in the context where this + /// AliasAnalysis is relevant. + bool TypeIsImmutable() const { + if (Node->getNumOperands() < 3) + return false; + ConstantInt *CI = mdconst::dyn_extract(Node->getOperand(2)); + if (!CI) + return false; + return CI->getValue()[0]; + } +}; - /// This is a simple wrapper around an MDNode which provides a - /// higher-level interface by hiding the details of how alias analysis - /// information is encoded in its operands. - class TBAAStructTagNode { - /// This node should be created with createTBAAStructTagNode. - const MDNode *Node; +/// This is a simple wrapper around an MDNode which provides a +/// higher-level interface by hiding the details of how alias analysis +/// information is encoded in its operands. +class TBAAStructTagNode { + /// This node should be created with createTBAAStructTagNode. + const MDNode *Node; - public: - explicit TBAAStructTagNode(const MDNode *N) : Node(N) {} +public: + explicit TBAAStructTagNode(const MDNode *N) : Node(N) {} - /// Get the MDNode for this TBAAStructTagNode. - const MDNode *getNode() const { return Node; } + /// Get the MDNode for this TBAAStructTagNode. + const MDNode *getNode() const { return Node; } - const MDNode *getBaseType() const { - return dyn_cast_or_null(Node->getOperand(0)); - } - const MDNode *getAccessType() const { - return dyn_cast_or_null(Node->getOperand(1)); - } - uint64_t getOffset() const { - return mdconst::extract(Node->getOperand(2))->getZExtValue(); - } - /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for - /// objects which are not modified (by any means) in the context where this - /// AliasAnalysis is relevant. - bool TypeIsImmutable() const { - if (Node->getNumOperands() < 4) - return false; - ConstantInt *CI = mdconst::dyn_extract(Node->getOperand(3)); - if (!CI) - return false; - return CI->getValue()[0]; - } - }; + const MDNode *getBaseType() const { + return dyn_cast_or_null(Node->getOperand(0)); + } + const MDNode *getAccessType() const { + return dyn_cast_or_null(Node->getOperand(1)); + } + uint64_t getOffset() const { + return mdconst::extract(Node->getOperand(2))->getZExtValue(); + } + /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for + /// objects which are not modified (by any means) in the context where this + /// AliasAnalysis is relevant. + bool TypeIsImmutable() const { + if (Node->getNumOperands() < 4) + return false; + ConstantInt *CI = mdconst::dyn_extract(Node->getOperand(3)); + if (!CI) + return false; + return CI->getValue()[0]; + } +}; - /// This is a simple wrapper around an MDNode which provides a - /// higher-level interface by hiding the details of how alias analysis - /// information is encoded in its operands. - class TBAAStructTypeNode { - /// This node should be created with createTBAAStructTypeNode. - const MDNode *Node; +/// This is a simple wrapper around an MDNode which provides a +/// higher-level interface by hiding the details of how alias analysis +/// information is encoded in its operands. +class TBAAStructTypeNode { + /// This node should be created with createTBAAStructTypeNode. + const MDNode *Node; - public: - TBAAStructTypeNode() : Node(nullptr) {} - explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} +public: + TBAAStructTypeNode() : Node(nullptr) {} + explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} - /// Get the MDNode for this TBAAStructTypeNode. - const MDNode *getNode() const { return Node; } + /// Get the MDNode for this TBAAStructTypeNode. + const MDNode *getNode() const { return Node; } - /// Get this TBAAStructTypeNode's field in the type DAG with - /// given offset. Update the offset to be relative to the field type. - TBAAStructTypeNode getParent(uint64_t &Offset) const { - // Parent can be omitted for the root node. - if (Node->getNumOperands() < 2) - return TBAAStructTypeNode(); + /// Get this TBAAStructTypeNode's field in the type DAG with + /// given offset. Update the offset to be relative to the field type. + TBAAStructTypeNode getParent(uint64_t &Offset) const { + // Parent can be omitted for the root node. + if (Node->getNumOperands() < 2) + return TBAAStructTypeNode(); - // Fast path for a scalar type node and a struct type node with a single - // field. - if (Node->getNumOperands() <= 3) { - uint64_t Cur = Node->getNumOperands() == 2 - ? 0 - : mdconst::extract(Node->getOperand(2)) - ->getZExtValue(); - Offset -= Cur; - MDNode *P = dyn_cast_or_null(Node->getOperand(1)); - if (!P) - return TBAAStructTypeNode(); - return TBAAStructTypeNode(P); - } - - // Assume the offsets are in order. We return the previous field if - // the current offset is bigger than the given offset. - unsigned TheIdx = 0; - for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) { - uint64_t Cur = mdconst::extract(Node->getOperand(Idx + 1)) - ->getZExtValue(); - if (Cur > Offset) { - assert(Idx >= 3 && - "TBAAStructTypeNode::getParent should have an offset match!"); - TheIdx = Idx - 2; - break; - } - } - // Move along the last field. - if (TheIdx == 0) - TheIdx = Node->getNumOperands() - 2; - uint64_t Cur = mdconst::extract(Node->getOperand(TheIdx + 1)) - ->getZExtValue(); + // Fast path for a scalar type node and a struct type node with a single + // field. + if (Node->getNumOperands() <= 3) { + uint64_t Cur = Node->getNumOperands() == 2 + ? 0 + : mdconst::extract(Node->getOperand(2)) + ->getZExtValue(); Offset -= Cur; - MDNode *P = dyn_cast_or_null(Node->getOperand(TheIdx)); + MDNode *P = dyn_cast_or_null(Node->getOperand(1)); if (!P) return TBAAStructTypeNode(); return TBAAStructTypeNode(P); } - }; -} -namespace { - /// TypeBasedAliasAnalysis - This is a simple alias analysis - /// implementation that uses TypeBased to answer queries. - class TypeBasedAliasAnalysis : public ImmutablePass, - public AliasAnalysis { - public: - static char ID; // Class identification, replacement for typeinfo - TypeBasedAliasAnalysis() : ImmutablePass(ID) { - initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry()); + // Assume the offsets are in order. We return the previous field if + // the current offset is bigger than the given offset. + unsigned TheIdx = 0; + for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) { + uint64_t Cur = mdconst::extract(Node->getOperand(Idx + 1)) + ->getZExtValue(); + if (Cur > Offset) { + assert(Idx >= 3 && + "TBAAStructTypeNode::getParent should have an offset match!"); + TheIdx = Idx - 2; + break; + } } - - bool doInitialization(Module &M) override; - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(const void *PI) override { - if (PI == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } - - bool Aliases(const MDNode *A, const MDNode *B) const; - bool PathAliases(const MDNode *A, const MDNode *B) const; - - private: - void getAnalysisUsage(AnalysisUsage &AU) const override; - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override; - bool pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) override; - ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override; - ModRefBehavior getModRefBehavior(const Function *F) override; - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override; - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override; - }; -} // End of anonymous namespace - -// Register this pass... -char TypeBasedAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa", - "Type-Based Alias Analysis", false, true, false) - -ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() { - return new TypeBasedAliasAnalysis(); -} - -bool TypeBasedAliasAnalysis::doInitialization(Module &M) { - InitializeAliasAnalysis(this, &M.getDataLayout()); - return true; -} - -void -TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AliasAnalysis::getAnalysisUsage(AU); + // Move along the last field. + if (TheIdx == 0) + TheIdx = Node->getNumOperands() - 2; + uint64_t Cur = mdconst::extract(Node->getOperand(TheIdx + 1)) + ->getZExtValue(); + Offset -= Cur; + MDNode *P = dyn_cast_or_null(Node->getOperand(TheIdx)); + if (!P) + return TBAAStructTypeNode(); + return TBAAStructTypeNode(P); + } +}; } /// Check the first operand of the tbaa tag node, if it is a MDNode, we treat @@ -342,145 +279,36 @@ static bool isStructPathTBAA(const MDNode *MD) { return isa(MD->getOperand(0)) && MD->getNumOperands() >= 3; } -/// Aliases - Test whether the type represented by A may alias the -/// type represented by B. -bool -TypeBasedAliasAnalysis::Aliases(const MDNode *A, - const MDNode *B) const { - // Make sure that both MDNodes are struct-path aware. - if (isStructPathTBAA(A) && isStructPathTBAA(B)) - return PathAliases(A, B); - - // Keep track of the root node for A and B. - TBAANode RootA, RootB; - - // Climb the tree from A to see if we reach B. - for (TBAANode T(A); ; ) { - if (T.getNode() == B) - // B is an ancestor of A. - return true; - - RootA = T; - T = T.getParent(); - if (!T.getNode()) - break; - } - - // Climb the tree from B to see if we reach A. - for (TBAANode T(B); ; ) { - if (T.getNode() == A) - // A is an ancestor of B. - return true; - - RootB = T; - T = T.getParent(); - if (!T.getNode()) - break; - } - - // Neither node is an ancestor of the other. - - // If they have different roots, they're part of different potentially - // unrelated type systems, so we must be conservative. - if (RootA.getNode() != RootB.getNode()) - return true; - - // If they have the same root, then we've proved there's no alias. - return false; -} - -/// Test whether the struct-path tag represented by A may alias the -/// struct-path tag represented by B. -bool -TypeBasedAliasAnalysis::PathAliases(const MDNode *A, - const MDNode *B) const { - // Verify that both input nodes are struct-path aware. - assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware."); - assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware."); - - // Keep track of the root node for A and B. - TBAAStructTypeNode RootA, RootB; - TBAAStructTagNode TagA(A), TagB(B); - - // TODO: We need to check if AccessType of TagA encloses AccessType of - // TagB to support aggregate AccessType. If yes, return true. - - // Start from the base type of A, follow the edge with the correct offset in - // the type DAG and adjust the offset until we reach the base type of B or - // until we reach the Root node. - // Compare the adjusted offset once we have the same base. - - // Climb the type DAG from base type of A to see if we reach base type of B. - const MDNode *BaseA = TagA.getBaseType(); - const MDNode *BaseB = TagB.getBaseType(); - uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset(); - for (TBAAStructTypeNode T(BaseA); ; ) { - if (T.getNode() == BaseB) - // Base type of A encloses base type of B, check if the offsets match. - return OffsetA == OffsetB; - - RootA = T; - // Follow the edge with the correct offset, OffsetA will be adjusted to - // be relative to the field type. - T = T.getParent(OffsetA); - if (!T.getNode()) - break; - } - - // Reset OffsetA and climb the type DAG from base type of B to see if we reach - // base type of A. - OffsetA = TagA.getOffset(); - for (TBAAStructTypeNode T(BaseB); ; ) { - if (T.getNode() == BaseA) - // Base type of B encloses base type of A, check if the offsets match. - return OffsetA == OffsetB; - - RootB = T; - // Follow the edge with the correct offset, OffsetB will be adjusted to - // be relative to the field type. - T = T.getParent(OffsetB); - if (!T.getNode()) - break; - } - - // Neither node is an ancestor of the other. - - // If they have different roots, they're part of different potentially - // unrelated type systems, so we must be conservative. - if (RootA.getNode() != RootB.getNode()) - return true; - - // If they have the same root, then we've proved there's no alias. - return false; -} - -AliasResult TypeBasedAliasAnalysis::alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) { +AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) { if (!EnableTBAA) - return AliasAnalysis::alias(LocA, LocB); + return AAResultBase::alias(LocA, LocB); // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must // be conservative. const MDNode *AM = LocA.AATags.TBAA; - if (!AM) return AliasAnalysis::alias(LocA, LocB); + if (!AM) + return AAResultBase::alias(LocA, LocB); const MDNode *BM = LocB.AATags.TBAA; - if (!BM) return AliasAnalysis::alias(LocA, LocB); + if (!BM) + return AAResultBase::alias(LocA, LocB); // If they may alias, chain to the next AliasAnalysis. if (Aliases(AM, BM)) - return AliasAnalysis::alias(LocA, LocB); + return AAResultBase::alias(LocA, LocB); // Otherwise return a definitive result. return NoAlias; } -bool TypeBasedAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) { +bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc, + bool OrLocal) { if (!EnableTBAA) - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, OrLocal); const MDNode *M = Loc.AATags.TBAA; - if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + if (!M) + return AAResultBase::pointsToConstantMemory(Loc, OrLocal); // If this is an "immutable" type, we can assume the pointer is pointing // to constant memory. @@ -488,80 +316,82 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) return true; - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + return AAResultBase::pointsToConstantMemory(Loc, OrLocal); } -AliasAnalysis::ModRefBehavior -TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { +FunctionModRefBehavior +TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) { if (!EnableTBAA) - return AliasAnalysis::getModRefBehavior(CS); + return AAResultBase::getModRefBehavior(CS); - ModRefBehavior Min = UnknownModRefBehavior; + FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; // If this is an "immutable" type, we can assume the call doesn't write // to memory. if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) || (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) - Min = OnlyReadsMemory; + Min = FMRB_OnlyReadsMemory; - return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); + return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min); } -AliasAnalysis::ModRefBehavior -TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) { +FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) { // Functions don't have metadata. Just chain to the next implementation. - return AliasAnalysis::getModRefBehavior(F); + return AAResultBase::getModRefBehavior(F); } -AliasAnalysis::ModRefResult -TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) { +ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) { if (!EnableTBAA) - return AliasAnalysis::getModRefInfo(CS, Loc); + return AAResultBase::getModRefInfo(CS, Loc); if (const MDNode *L = Loc.AATags.TBAA) if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) if (!Aliases(L, M)) - return NoModRef; + return MRI_NoModRef; - return AliasAnalysis::getModRefInfo(CS, Loc); + return AAResultBase::getModRefInfo(CS, Loc); } -AliasAnalysis::ModRefResult -TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) { +ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { if (!EnableTBAA) - return AliasAnalysis::getModRefInfo(CS1, CS2); + return AAResultBase::getModRefInfo(CS1, CS2); if (const MDNode *M1 = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) if (const MDNode *M2 = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) if (!Aliases(M1, M2)) - return NoModRef; + return MRI_NoModRef; - return AliasAnalysis::getModRefInfo(CS1, CS2); + return AAResultBase::getModRefInfo(CS1, CS2); } bool MDNode::isTBAAVtableAccess() const { if (!isStructPathTBAA(this)) { - if (getNumOperands() < 1) return false; + if (getNumOperands() < 1) + return false; if (MDString *Tag1 = dyn_cast(getOperand(0))) { - if (Tag1->getString() == "vtable pointer") return true; + if (Tag1->getString() == "vtable pointer") + return true; } return false; } // For struct-path aware TBAA, we use the access type of the tag. - if (getNumOperands() < 2) return false; + if (getNumOperands() < 2) + return false; MDNode *Tag = cast_or_null(getOperand(1)); - if (!Tag) return false; + if (!Tag) + return false; if (MDString *Tag1 = dyn_cast(Tag->getOperand(0))) { - if (Tag1->getString() == "vtable pointer") return true; + if (Tag1->getString() == "vtable pointer") + return true; } - return false; + return false; } MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { @@ -575,9 +405,11 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B); if (StructPath) { A = cast_or_null(A->getOperand(1)); - if (!A) return nullptr; + if (!A) + return nullptr; B = cast_or_null(B->getOperand(1)); - if (!B) return nullptr; + if (!B) + return nullptr; } SmallSetVector PathA; @@ -604,7 +436,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { int IB = PathB.size() - 1; MDNode *Ret = nullptr; - while (IA >= 0 && IB >=0) { + while (IA >= 0 && IB >= 0) { if (PathA[IA] == PathB[IB]) Ret = PathA[IA]; else @@ -644,3 +476,147 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { N.NoAlias = getMetadata(LLVMContext::MD_noalias); } +/// Aliases - Test whether the type represented by A may alias the +/// type represented by B. +bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const { + // Make sure that both MDNodes are struct-path aware. + if (isStructPathTBAA(A) && isStructPathTBAA(B)) + return PathAliases(A, B); + + // Keep track of the root node for A and B. + TBAANode RootA, RootB; + + // Climb the tree from A to see if we reach B. + for (TBAANode T(A);;) { + if (T.getNode() == B) + // B is an ancestor of A. + return true; + + RootA = T; + T = T.getParent(); + if (!T.getNode()) + break; + } + + // Climb the tree from B to see if we reach A. + for (TBAANode T(B);;) { + if (T.getNode() == A) + // A is an ancestor of B. + return true; + + RootB = T; + T = T.getParent(); + if (!T.getNode()) + break; + } + + // Neither node is an ancestor of the other. + + // If they have different roots, they're part of different potentially + // unrelated type systems, so we must be conservative. + if (RootA.getNode() != RootB.getNode()) + return true; + + // If they have the same root, then we've proved there's no alias. + return false; +} + +/// Test whether the struct-path tag represented by A may alias the +/// struct-path tag represented by B. +bool TypeBasedAAResult::PathAliases(const MDNode *A, const MDNode *B) const { + // Verify that both input nodes are struct-path aware. + assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware."); + assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware."); + + // Keep track of the root node for A and B. + TBAAStructTypeNode RootA, RootB; + TBAAStructTagNode TagA(A), TagB(B); + + // TODO: We need to check if AccessType of TagA encloses AccessType of + // TagB to support aggregate AccessType. If yes, return true. + + // Start from the base type of A, follow the edge with the correct offset in + // the type DAG and adjust the offset until we reach the base type of B or + // until we reach the Root node. + // Compare the adjusted offset once we have the same base. + + // Climb the type DAG from base type of A to see if we reach base type of B. + const MDNode *BaseA = TagA.getBaseType(); + const MDNode *BaseB = TagB.getBaseType(); + uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset(); + for (TBAAStructTypeNode T(BaseA);;) { + if (T.getNode() == BaseB) + // Base type of A encloses base type of B, check if the offsets match. + return OffsetA == OffsetB; + + RootA = T; + // Follow the edge with the correct offset, OffsetA will be adjusted to + // be relative to the field type. + T = T.getParent(OffsetA); + if (!T.getNode()) + break; + } + + // Reset OffsetA and climb the type DAG from base type of B to see if we reach + // base type of A. + OffsetA = TagA.getOffset(); + for (TBAAStructTypeNode T(BaseB);;) { + if (T.getNode() == BaseA) + // Base type of B encloses base type of A, check if the offsets match. + return OffsetA == OffsetB; + + RootB = T; + // Follow the edge with the correct offset, OffsetB will be adjusted to + // be relative to the field type. + T = T.getParent(OffsetB); + if (!T.getNode()) + break; + } + + // Neither node is an ancestor of the other. + + // If they have different roots, they're part of different potentially + // unrelated type systems, so we must be conservative. + if (RootA.getNode() != RootB.getNode()) + return true; + + // If they have the same root, then we've proved there's no alias. + return false; +} + +TypeBasedAAResult TypeBasedAA::run(Function &F, AnalysisManager *AM) { + return TypeBasedAAResult(AM->getResult(F)); +} + +char TypeBasedAA::PassID; + +char TypeBasedAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(TypeBasedAAWrapperPass, "tbaa", + "Type-Based Alias Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(TypeBasedAAWrapperPass, "tbaa", "Type-Based Alias Analysis", + false, true) + +ImmutablePass *llvm::createTypeBasedAAWrapperPass() { + return new TypeBasedAAWrapperPass(); +} + +TypeBasedAAWrapperPass::TypeBasedAAWrapperPass() : ImmutablePass(ID) { + initializeTypeBasedAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool TypeBasedAAWrapperPass::doInitialization(Module &M) { + Result.reset(new TypeBasedAAResult( + getAnalysis().getTLI())); + return false; +} + +bool TypeBasedAAWrapperPass::doFinalization(Module &M) { + Result.reset(); + return false; +} + +void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); +} diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index fa0d7798cae9..314ec9c1886e 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -43,7 +44,7 @@ const unsigned MaxDepth = 6; /// Enable an experimental feature to leverage information about dominating /// conditions to compute known bits. The individual options below control how -/// hard we search. The defaults are choosen to be fairly aggressive. If you +/// hard we search. The defaults are chosen to be fairly aggressive. If you /// run into compile time problems when testing, scale them back and report /// your findings. static cl::opt EnableDomConditions("value-tracking-dom-conditions", @@ -58,12 +59,12 @@ static cl::opt DomConditionsMaxDepth("dom-conditions-max-depth", /// conditions? static cl::opt DomConditionsMaxDomBlocks("dom-conditions-dom-blocks", cl::Hidden, - cl::init(20000)); + cl::init(20)); // Controls the number of uses of the value searched for possible // dominating comparisons. static cl::opt DomConditionsMaxUses("dom-conditions-max-uses", - cl::Hidden, cl::init(2000)); + cl::Hidden, cl::init(20)); // If true, don't consider only compares whose only use is a branch. static cl::opt DomConditionsSingleCmpUse("dom-conditions-single-cmp-use", @@ -185,6 +186,25 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, return ::isKnownNonZero(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT)); } +bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + bool NonNegative, Negative; + ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT); + return NonNegative; +} + +static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, + const Query &Q); + +bool llvm::isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + return ::isKnownNonEqual(V1, V2, DL, Query(AC, + safeCxtI(V1, safeCxtI(V2, CxtI)), + DT)); +} + static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth, const Query &Q); @@ -320,7 +340,7 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, } // If low bits are zero in either operand, output low known-0 bits. - // Also compute a conserative estimate for high known-0 bits. + // Also compute a conservative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. KnownOne.clearAllBits(); @@ -347,26 +367,30 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, } void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, - APInt &KnownZero) { + APInt &KnownZero, + APInt &KnownOne) { unsigned BitWidth = KnownZero.getBitWidth(); unsigned NumRanges = Ranges.getNumOperands() / 2; assert(NumRanges >= 1); - // Use the high end of the ranges to find leading zeros. - unsigned MinLeadingZeros = BitWidth; + KnownZero.setAllBits(); + KnownOne.setAllBits(); + for (unsigned i = 0; i < NumRanges; ++i) { ConstantInt *Lower = mdconst::extract(Ranges.getOperand(2 * i + 0)); ConstantInt *Upper = mdconst::extract(Ranges.getOperand(2 * i + 1)); ConstantRange Range(Lower->getValue(), Upper->getValue()); - if (Range.isWrappedSet()) - MinLeadingZeros = 0; // -1 has no zeros - unsigned LeadingZeros = (Upper->getValue() - 1).countLeadingZeros(); - MinLeadingZeros = std::min(LeadingZeros, MinLeadingZeros); - } - KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros); + // The first CommonPrefixBits of all values in Range are equal. + unsigned CommonPrefixBits = + (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros(); + + APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits); + KnownOne &= Range.getUnsignedMax() & Mask; + KnownZero &= ~Range.getUnsignedMax() & Mask; + } } static bool isEphemeralValueOf(Instruction *I, const Value *E) { @@ -374,20 +398,20 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) { SmallPtrSet Visited; SmallPtrSet EphValues; + // The instruction defining an assumption's condition itself is always + // considered ephemeral to that assumption (even if it has other + // non-ephemeral users). See r246696's test case for an example. + if (std::find(I->op_begin(), I->op_end(), E) != I->op_end()) + return true; + while (!WorkSet.empty()) { const Value *V = WorkSet.pop_back_val(); if (!Visited.insert(V).second) continue; // If all uses of this value are ephemeral, then so is this value. - bool FoundNEUse = false; - for (const User *I : V->users()) - if (!EphValues.count(I)) { - FoundNEUse = true; - break; - } - - if (!FoundNEUse) { + if (std::all_of(V->user_begin(), V->user_end(), + [&](const User *U) { return EphValues.count(U); })) { if (V == E) return true; @@ -447,7 +471,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) { for (BasicBlock::const_iterator I = std::next(BasicBlock::const_iterator(Q.CxtI)), IE(Inv); I != IE; ++I) - if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I)) + if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) return false; return !isEphemeralValueOf(Inv, Q.CxtI); @@ -464,14 +488,14 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) { // of the block); the common case is that the assume will come first. for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)), IE = Inv->getParent()->end(); I != IE; ++I) - if (I == Q.CxtI) + if (&*I == Q.CxtI) return true; // The context must come first... for (BasicBlock::const_iterator I = std::next(BasicBlock::const_iterator(Q.CxtI)), IE(Inv); I != IE; ++I) - if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I)) + if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I)) return false; return !isEphemeralValueOf(Inv, Q.CxtI); @@ -601,6 +625,11 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero, if (!Q.DT || !Q.CxtI) return; Instruction *Cxt = const_cast(Q.CxtI); + // The context instruction might be in a statically unreachable block. If + // so, asking dominator queries may yield suprising results. (e.g. the block + // may not have a dom tree node) + if (!Q.DT->isReachableFromEntry(Cxt->getParent())) + return; // Avoid useless work if (auto VI = dyn_cast(V)) @@ -647,7 +676,9 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero, // instruction. Finding a condition where one path dominates the context // isn't enough because both the true and false cases could merge before // the context instruction we're actually interested in. Instead, we need - // to ensure that the taken *edge* dominates the context instruction. + // to ensure that the taken *edge* dominates the context instruction. We + // know that the edge must be reachable since we started from a reachable + // block. BasicBlock *BB0 = BI->getSuccessor(0); BasicBlockEdge Edge(BI->getParent(), BB0); if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent())) @@ -941,6 +972,90 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, } } +// Compute known bits from a shift operator, including those with a +// non-constant shift amount. KnownZero and KnownOne are the outputs of this +// function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the +// same bit width as KnownZero and KnownOne. KZF and KOF are operator-specific +// functors that, given the known-zero or known-one bits respectively, and a +// shift amount, compute the implied known-zero or known-one bits of the shift +// operator's result respectively for that shift amount. The results from calling +// KZF and KOF are conservatively combined for all permitted shift amounts. +template +static void computeKnownBitsFromShiftOperator(Operator *I, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const DataLayout &DL, unsigned Depth, const Query &Q, + KZFunctor KZF, KOFunctor KOF) { + unsigned BitWidth = KnownZero.getBitWidth(); + + if (auto *SA = dyn_cast(I->getOperand(1))) { + unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1); + + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); + KnownZero = KZF(KnownZero, ShiftAmt); + KnownOne = KOF(KnownOne, ShiftAmt); + return; + } + + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q); + + // Note: We cannot use KnownZero.getLimitedValue() here, because if + // BitWidth > 64 and any upper bits are known, we'll end up returning the + // limit value (which implies all bits are known). + uint64_t ShiftAmtKZ = KnownZero.zextOrTrunc(64).getZExtValue(); + uint64_t ShiftAmtKO = KnownOne.zextOrTrunc(64).getZExtValue(); + + // It would be more-clearly correct to use the two temporaries for this + // calculation. Reusing the APInts here to prevent unnecessary allocations. + KnownZero.clearAllBits(), KnownOne.clearAllBits(); + + // If we know the shifter operand is nonzero, we can sometimes infer more + // known bits. However this is expensive to compute, so be lazy about it and + // only compute it when absolutely necessary. + Optional ShifterOperandIsNonZero; + + // Early exit if we can't constrain any well-defined shift amount. + if (!(ShiftAmtKZ & (BitWidth - 1)) && !(ShiftAmtKO & (BitWidth - 1))) { + ShifterOperandIsNonZero = + isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q); + if (!*ShifterOperandIsNonZero) + return; + } + + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); + + KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) { + // Combine the shifted known input bits only for those shift amounts + // compatible with its known constraints. + if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt) + continue; + if ((ShiftAmt | ShiftAmtKO) != ShiftAmt) + continue; + // If we know the shifter is nonzero, we may be able to infer more known + // bits. This check is sunk down as far as possible to avoid the expensive + // call to isKnownNonZero if the cheaper checks above fail. + if (ShiftAmt == 0) { + if (!ShifterOperandIsNonZero.hasValue()) + ShifterOperandIsNonZero = + isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q); + if (*ShifterOperandIsNonZero) + continue; + } + + KnownZero &= KZF(KnownZero2, ShiftAmt); + KnownOne &= KOF(KnownOne2, ShiftAmt); + } + + // If there are no compatible shift amounts, then we've proven that the shift + // amount must be >= the BitWidth, and the result is undefined. We could + // return anything we'd like, but we need to make sure the sets of known bits + // stay disjoint (it should be better for some other code to actually + // propagate the undef than to pick a value here using known bits). + if ((KnownZero & KnownOne) != 0) + KnownZero.clearAllBits(), KnownOne.clearAllBits(); +} + static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, APInt &KnownOne, const DataLayout &DL, unsigned Depth, const Query &Q) { @@ -951,7 +1066,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, default: break; case Instruction::Load: if (MDNode *MD = cast(I)->getMetadata(LLVMContext::MD_range)) - computeKnownBitsFromRangeMetadata(*MD, KnownZero); + computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne); break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. @@ -962,6 +1077,22 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, KnownOne &= KnownOne2; // Output known-0 are known to be clear if zero in either the LHS | RHS. KnownZero |= KnownZero2; + + // and(x, add (x, -1)) is a common idiom that always clears the low bit; + // here we handle the more general case of adding any odd number by + // matching the form add(x, add(x, y)) where y is odd. + // TODO: This could be generalized to clearing any bit set in y where the + // following bit is known to be unset in y. + Value *Y = nullptr; + if (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)), + m_Value(Y))) || + match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)), + m_Value(Y)))) { + APInt KnownZero3(BitWidth, 0), KnownOne3(BitWidth, 0); + computeKnownBits(Y, KnownZero3, KnownOne3, DL, Depth + 1, Q); + if (KnownOne3.countTrailingOnes() > 0) + KnownZero |= APInt::getLowBitsSet(BitWidth, 1); + } break; } case Instruction::Or: { @@ -1050,7 +1181,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, } case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); - if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy() || + SrcTy->isFloatingPointTy()) && // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { @@ -1077,48 +1209,54 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); break; } - case Instruction::Shl: + case Instruction::Shl: { // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); - KnownZero <<= ShiftAmt; - KnownOne <<= ShiftAmt; - KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 - } + auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { + return (KnownZero << ShiftAmt) | + APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0. + }; + + auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { + return KnownOne << ShiftAmt; + }; + + computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, + KnownZero2, KnownOne2, DL, Depth, Q, + KZF, KOF); break; - case Instruction::LShr: + } + case Instruction::LShr: { // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - // Compute the new bits that are at the top now. - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { + return APIntOps::lshr(KnownZero, ShiftAmt) | + // High bits known zero. + APInt::getHighBitsSet(BitWidth, ShiftAmt); + }; - // Unsigned shift right. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); - KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); - KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); - // high bits known zero. - KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt); - } + auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { + return APIntOps::lshr(KnownOne, ShiftAmt); + }; + + computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, + KnownZero2, KnownOne2, DL, Depth, Q, + KZF, KOF); break; - case Instruction::AShr: + } + case Instruction::AShr: { // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - // Compute the new bits that are at the top now. - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); + auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { + return APIntOps::ashr(KnownZero, ShiftAmt); + }; - // Signed shift right. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); - KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); - KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); + auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { + return APIntOps::ashr(KnownOne, ShiftAmt); + }; - APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero. - KnownZero |= HighBits; - else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one. - KnownOne |= HighBits; - } + computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, + KnownZero2, KnownOne2, DL, Depth, Q, + KZF, KOF); break; + } case Instruction::Sub: { bool NSW = cast(I)->hasNoSignedWrap(); computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, @@ -1336,13 +1474,19 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, case Instruction::Call: case Instruction::Invoke: if (MDNode *MD = cast(I)->getMetadata(LLVMContext::MD_range)) - computeKnownBitsFromRangeMetadata(*MD, KnownZero); + computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne); // If a range metadata is attached to this IntrinsicInst, intersect the // explicit range specified by the metadata and the implicit range of // the intrinsic. if (IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { default: break; + case Intrinsic::bswap: + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, + Depth + 1, Q); + KnownZero |= KnownZero2.byteSwap(); + KnownOne |= KnownOne2.byteSwap(); + break; case Intrinsic::ctlz: case Intrinsic::cttz: { unsigned LowBits = Log2_32(BitWidth)+1; @@ -1353,8 +1497,24 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, break; } case Intrinsic::ctpop: { - unsigned LowBits = Log2_32(BitWidth)+1; - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, + Depth + 1, Q); + // We can bound the space the count needs. Also, bits known to be zero + // can't contribute to the population. + unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation(); + unsigned LeadingZeros = + APInt(BitWidth, BitsPossiblySet).countLeadingZeros(); + assert(LeadingZeros <= BitWidth); + KnownZero |= APInt::getHighBitsSet(BitWidth, LeadingZeros); + KnownOne &= ~KnownZero; + // TODO: we could bound KnownOne using the lower bound on the number + // of bits which might be set provided by popcnt KnownOne2. + break; + } + case Intrinsic::fabs: { + Type *Ty = II->getType(); + APInt SignBit = APInt::getSignBit(Ty->getScalarSizeInBits()); + KnownZero |= APInt::getSplat(Ty->getPrimitiveSizeInBits(), SignBit); break; } case Intrinsic::x86_sse42_crc32_64_64: @@ -1394,6 +1554,46 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, } } +static unsigned getAlignment(const Value *V, const DataLayout &DL) { + unsigned Align = 0; + if (auto *GO = dyn_cast(V)) { + Align = GO->getAlignment(); + if (Align == 0) { + if (auto *GVar = dyn_cast(GO)) { + Type *ObjectType = GVar->getType()->getElementType(); + if (ObjectType->isSized()) { + // If the object is defined in the current Module, we'll be giving + // it the preferred alignment. Otherwise, we have to assume that it + // may only have the minimum ABI alignment. + if (GVar->isStrongDefinitionForLinker()) + Align = DL.getPreferredAlignment(GVar); + else + Align = DL.getABITypeAlignment(ObjectType); + } + } + } + } else if (const Argument *A = dyn_cast(V)) { + Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0; + + if (!Align && A->hasStructRetAttr()) { + // An sret parameter has at least the ABI alignment of the return type. + Type *EltTy = cast(A->getType())->getElementType(); + if (EltTy->isSized()) + Align = DL.getABITypeAlignment(EltTy); + } + } else if (const AllocaInst *AI = dyn_cast(V)) + Align = AI->getAlignment(); + else if (auto CS = ImmutableCallSite(V)) + Align = CS.getAttributes().getParamAlignment(AttributeSet::ReturnIndex); + else if (const LoadInst *LI = dyn_cast(V)) + if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) { + ConstantInt *CI = mdconst::extract(MD->getOperand(0)); + Align = CI->getLimitedValue(); + } + + return Align; +} + /// Determine which bits of V are known to be either zero or one and return /// them in the KnownZero/KnownOne bit sets. /// @@ -1416,8 +1616,9 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned BitWidth = KnownZero.getBitWidth(); assert((V->getType()->isIntOrIntVectorTy() || + V->getType()->isFPOrFPVectorTy() || V->getType()->getScalarType()->isPointerTy()) && - "Not integer or pointer type!"); + "Not integer, floating point, or pointer type!"); assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && (!V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarSizeInBits() == BitWidth) && @@ -1454,59 +1655,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, return; } - // The address of an aligned GlobalValue has trailing zeros. - if (auto *GO = dyn_cast(V)) { - unsigned Align = GO->getAlignment(); - if (Align == 0) { - if (auto *GVar = dyn_cast(GO)) { - Type *ObjectType = GVar->getType()->getElementType(); - if (ObjectType->isSized()) { - // If the object is defined in the current Module, we'll be giving - // it the preferred alignment. Otherwise, we have to assume that it - // may only have the minimum ABI alignment. - if (GVar->isStrongDefinitionForLinker()) - Align = DL.getPreferredAlignment(GVar); - else - Align = DL.getABITypeAlignment(ObjectType); - } - } - } - if (Align > 0) - KnownZero = APInt::getLowBitsSet(BitWidth, - countTrailingZeros(Align)); - else - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); - return; - } - - if (Argument *A = dyn_cast(V)) { - unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0; - - if (!Align && A->hasStructRetAttr()) { - // An sret parameter has at least the ABI alignment of the return type. - Type *EltTy = cast(A->getType())->getElementType(); - if (EltTy->isSized()) - Align = DL.getABITypeAlignment(EltTy); - } - - if (Align) - KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); - else - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); - - // Don't give up yet... there might be an assumption that provides more - // information... - computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q); - - // Or a dominating condition for that matter - if (EnableDomConditions && Depth <= DomConditionsMaxDepth) - computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, - Depth, Q); - return; - } - // Start out not knowing anything. KnownZero.clearAllBits(); KnownOne.clearAllBits(); @@ -1525,6 +1673,14 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (Operator *I = dyn_cast(V)) computeKnownBitsFromOperator(I, KnownZero, KnownOne, DL, Depth, Q); + + // Aligned pointers have trailing zeros - refine KnownZero set + if (V->getType()->isPointerTy()) { + unsigned Align = getAlignment(V, DL); + if (Align) + KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); + } + // computeKnownBitsFromAssume and computeKnownBitsFromDominatingCondition // strictly refines KnownZero and KnownOne. Therefore, we run them after // computeKnownBitsFromOperator. @@ -1812,6 +1968,23 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q); if (XKnownNegative) return true; + + // If the shifter operand is a constant, and all of the bits shifted + // out are known to be zero, and X is known non-zero then at least one + // non-zero bit must remain. + if (ConstantInt *Shift = dyn_cast(Y)) { + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q); + + auto ShiftVal = Shift->getLimitedValue(BitWidth - 1); + // Is there a known one in the portion not shifted out? + if (KnownOne.countLeadingZeros() < BitWidth - ShiftVal) + return true; + // Are all the bits to be shifted out known zero? + if (KnownZero.countTrailingOnes() >= ShiftVal) + return isKnownNonZero(X, DL, Depth, Q); + } } // div exact can only produce a zero if the dividend is zero. else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { @@ -1871,6 +2044,26 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, isKnownNonZero(SI->getFalseValue(), DL, Depth, Q)) return true; } + // PHI + else if (PHINode *PN = dyn_cast(V)) { + // Try and detect a recurrence that monotonically increases from a + // starting value, as these are common as induction variables. + if (PN->getNumIncomingValues() == 2) { + Value *Start = PN->getIncomingValue(0); + Value *Induction = PN->getIncomingValue(1); + if (isa(Induction) && !isa(Start)) + std::swap(Start, Induction); + if (ConstantInt *C = dyn_cast(Start)) { + if (!C->isZero() && !C->isNegative()) { + ConstantInt *X; + if ((match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) || + match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) && + !X->isNegative()) + return true; + } + } + } + } if (!BitWidth) return false; APInt KnownZero(BitWidth, 0); @@ -1879,6 +2072,51 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth, return KnownOne != 0; } +/// Return true if V2 == V1 + X, where X is known non-zero. +static bool isAddOfNonZero(Value *V1, Value *V2, const DataLayout &DL, + const Query &Q) { + BinaryOperator *BO = dyn_cast(V1); + if (!BO || BO->getOpcode() != Instruction::Add) + return false; + Value *Op = nullptr; + if (V2 == BO->getOperand(0)) + Op = BO->getOperand(1); + else if (V2 == BO->getOperand(1)) + Op = BO->getOperand(0); + else + return false; + return isKnownNonZero(Op, DL, 0, Q); +} + +/// Return true if it is known that V1 != V2. +static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, + const Query &Q) { + if (V1->getType()->isVectorTy() || V1 == V2) + return false; + if (V1->getType() != V2->getType()) + // We can't look through casts yet. + return false; + if (isAddOfNonZero(V1, V2, DL, Q) || isAddOfNonZero(V2, V1, DL, Q)) + return true; + + if (IntegerType *Ty = dyn_cast(V1->getType())) { + // Are any known bits in V1 contradictory to known bits in V2? If V1 + // has a known zero where V2 has a known one, they must not be equal. + auto BitWidth = Ty->getBitWidth(); + APInt KnownZero1(BitWidth, 0); + APInt KnownOne1(BitWidth, 0); + computeKnownBits(V1, KnownZero1, KnownOne1, DL, 0, Q); + APInt KnownZero2(BitWidth, 0); + APInt KnownOne2(BitWidth, 0); + computeKnownBits(V2, KnownZero2, KnownOne2, DL, 0, Q); + + auto OppositeBits = (KnownZero1 & KnownOne2) | (KnownZero2 & KnownOne1); + if (OppositeBits.getBoolValue()) + return true; + } + return false; +} + /// Return true if 'V & Mask' is known to be zero. We use this predicate to /// simplify operations downstream. Mask is known to be zero for bits that V /// cannot have. @@ -2545,7 +2783,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, } // This insert value inserts something else than what we are looking for. - // See if the (aggregrate) value inserted into has the value we are + // See if the (aggregate) value inserted into has the value we are // looking for, then. if (*req_idx != *i) return FindInsertedValue(I->getAggregateOperand(), idx_range, @@ -2560,7 +2798,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, } if (ExtractValueInst *I = dyn_cast(V)) { - // If we're extracting a value from an aggregrate that was extracted from + // If we're extracting a value from an aggregate that was extracted from // something else, we can extract from that something else directly instead. // However, we will need to chain I's indices with the requested indices. @@ -2935,20 +3173,42 @@ static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL, return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI); } -/// Return true if Value is always a dereferenceable pointer. -/// +static bool isAligned(const Value *Base, APInt Offset, unsigned Align, + const DataLayout &DL) { + APInt BaseAlign(Offset.getBitWidth(), getAlignment(Base, DL)); + + if (!BaseAlign) { + Type *Ty = Base->getType()->getPointerElementType(); + if (!Ty->isSized()) + return false; + BaseAlign = DL.getABITypeAlignment(Ty); + } + + APInt Alignment(Offset.getBitWidth(), Align); + + assert(Alignment.isPowerOf2() && "must be a power of 2!"); + return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1)); +} + +static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) { + Type *Ty = Base->getType(); + assert(Ty->isSized() && "must be sized"); + APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0); + return isAligned(Base, Offset, Align, DL); +} + /// Test if V is always a pointer to allocated and suitably aligned memory for /// a simple load or store. -static bool isDereferenceablePointer(const Value *V, const DataLayout &DL, - const Instruction *CtxI, - const DominatorTree *DT, - const TargetLibraryInfo *TLI, - SmallPtrSetImpl &Visited) { +static bool isDereferenceableAndAlignedPointer( + const Value *V, unsigned Align, const DataLayout &DL, + const Instruction *CtxI, const DominatorTree *DT, + const TargetLibraryInfo *TLI, SmallPtrSetImpl &Visited) { // Note that it is not safe to speculate into a malloc'd region because // malloc may return null. - // These are obviously ok. - if (isa(V)) return true; + // These are obviously ok if aligned. + if (isa(V)) + return isAligned(V, Align, DL); // It's not always safe to follow a bitcast, for example: // bitcast i8* (alloca i8) to i32* @@ -2963,21 +3223,22 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL, if (STy->isSized() && DTy->isSized() && (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) && (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy))) - return isDereferenceablePointer(BC->getOperand(0), DL, CtxI, - DT, TLI, Visited); + return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, DL, + CtxI, DT, TLI, Visited); } // Global variables which can't collapse to null are ok. if (const GlobalVariable *GV = dyn_cast(V)) - return !GV->hasExternalWeakLinkage(); + if (!GV->hasExternalWeakLinkage()) + return isAligned(V, Align, DL); // byval arguments are okay. if (const Argument *A = dyn_cast(V)) if (A->hasByValAttr()) - return true; - + return isAligned(V, Align, DL); + if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI)) - return true; + return isAligned(V, Align, DL); // For GEPs, determine if the indexing lands within the allocated object. if (const GEPOperator *GEP = dyn_cast(V)) { @@ -2985,61 +3246,79 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL, Type *Ty = VTy->getPointerElementType(); const Value *Base = GEP->getPointerOperand(); - // Conservatively require that the base pointer be fully dereferenceable. + // Conservatively require that the base pointer be fully dereferenceable + // and aligned. if (!Visited.insert(Base).second) return false; - if (!isDereferenceablePointer(Base, DL, CtxI, - DT, TLI, Visited)) + if (!isDereferenceableAndAlignedPointer(Base, Align, DL, CtxI, DT, TLI, + Visited)) return false; - + APInt Offset(DL.getPointerTypeSizeInBits(VTy), 0); if (!GEP->accumulateConstantOffset(DL, Offset)) return false; - - // Check if the load is within the bounds of the underlying object. + + // Check if the load is within the bounds of the underlying object + // and offset is aligned. uint64_t LoadSize = DL.getTypeStoreSize(Ty); Type *BaseType = Base->getType()->getPointerElementType(); - return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)); + assert(isPowerOf2_32(Align) && "must be a power of 2!"); + return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)) && + !(Offset & APInt(Offset.getBitWidth(), Align-1)); } // For gc.relocate, look through relocations if (const IntrinsicInst *I = dyn_cast(V)) if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) { GCRelocateOperands RelocateInst(I); - return isDereferenceablePointer(RelocateInst.getDerivedPtr(), DL, CtxI, - DT, TLI, Visited); + return isDereferenceableAndAlignedPointer( + RelocateInst.getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited); } if (const AddrSpaceCastInst *ASC = dyn_cast(V)) - return isDereferenceablePointer(ASC->getOperand(0), DL, CtxI, - DT, TLI, Visited); + return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL, + CtxI, DT, TLI, Visited); // If we don't know, assume the worst. return false; } -bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, - const Instruction *CtxI, - const DominatorTree *DT, - const TargetLibraryInfo *TLI) { +bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, + const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { // When dereferenceability information is provided by a dereferenceable // attribute, we know exactly how many bytes are dereferenceable. If we can // determine the exact offset to the attributed variable, we can use that // information here. Type *VTy = V->getType(); Type *Ty = VTy->getPointerElementType(); + + // Require ABI alignment for loads without alignment specification + if (Align == 0) + Align = DL.getABITypeAlignment(Ty); + if (Ty->isSized()) { APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0); const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); - + if (Offset.isNonNegative()) - if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, - CtxI, DT, TLI)) + if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, CtxI, DT, TLI) && + isAligned(BV, Offset, Align, DL)) return true; } SmallPtrSet Visited; - return ::isDereferenceablePointer(V, DL, CtxI, DT, TLI, Visited); + return ::isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT, TLI, + Visited); +} + +bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { + return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT, TLI); } bool llvm::isSafeToSpeculativelyExecute(const Value *V, @@ -3089,10 +3368,15 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, const LoadInst *LI = cast(Inst); if (!LI->isUnordered() || // Speculative load may create a race that did not exist in the source. - LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) + LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeThread) || + // Speculative load may load data from dirty regions. + LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeAddress)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); - return isDereferenceablePointer(LI->getPointerOperand(), DL, CtxI, DT, TLI); + return isDereferenceableAndAlignedPointer( + LI->getPointerOperand(), LI->getAlignment(), DL, CtxI, DT, TLI); } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast(Inst)) { @@ -3147,16 +3431,27 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, case Instruction::Switch: case Instruction::Unreachable: case Instruction::Fence: - case Instruction::LandingPad: case Instruction::AtomicRMW: case Instruction::AtomicCmpXchg: + case Instruction::LandingPad: case Instruction::Resume: + case Instruction::CatchSwitch: + case Instruction::CatchPad: + case Instruction::CatchRet: + case Instruction::CleanupPad: + case Instruction::CleanupRet: return false; // Misc instructions which have effects } } +bool llvm::mayBeMemoryDependent(const Instruction &I) { + return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I); +} + /// Return true if we know that the specified value is never null. bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { + assert(V->getType()->isPointerTy() && "V must be pointer type"); + // Alloca never returns null, malloc might. if (isa(V)) return true; @@ -3164,9 +3459,12 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { if (const Argument *A = dyn_cast(V)) return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr(); - // Global values are not null unless extern weak. + // A global variable in address space 0 is non null unless extern weak. + // Other address spaces may have null as a valid address for a global, + // so we can't assume anything. if (const GlobalValue *GV = dyn_cast(V)) - return !GV->hasExternalWeakLinkage(); + return !GV->hasExternalWeakLinkage() && + GV->getType()->getAddressSpace() == 0; // A Load tagged w/nonnull metadata is never null. if (const LoadInst *LI = dyn_cast(V)) @@ -3186,6 +3484,8 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { static bool isKnownNonNullFromDominatingCondition(const Value *V, const Instruction *CtxI, const DominatorTree *DT) { + assert(V->getType()->isPointerTy() && "V must be pointer type"); + unsigned NumUsesExplored = 0; for (auto U : V->users()) { // Avoid massive lists @@ -3316,40 +3616,339 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS, return OverflowResult::MayOverflow; } -static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred, +static OverflowResult computeOverflowForSignedAdd( + Value *LHS, Value *RHS, AddOperator *Add, const DataLayout &DL, + AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { + if (Add && Add->hasNoSignedWrap()) { + return OverflowResult::NeverOverflows; + } + + bool LHSKnownNonNegative, LHSKnownNegative; + bool RHSKnownNonNegative, RHSKnownNegative; + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0, + AC, CxtI, DT); + ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0, + AC, CxtI, DT); + + if ((LHSKnownNonNegative && RHSKnownNegative) || + (LHSKnownNegative && RHSKnownNonNegative)) { + // The sign bits are opposite: this CANNOT overflow. + return OverflowResult::NeverOverflows; + } + + // The remaining code needs Add to be available. Early returns if not so. + if (!Add) + return OverflowResult::MayOverflow; + + // If the sign of Add is the same as at least one of the operands, this add + // CANNOT overflow. This is particularly useful when the sum is + // @llvm.assume'ed non-negative rather than proved so from analyzing its + // operands. + bool LHSOrRHSKnownNonNegative = + (LHSKnownNonNegative || RHSKnownNonNegative); + bool LHSOrRHSKnownNegative = (LHSKnownNegative || RHSKnownNegative); + if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { + bool AddKnownNonNegative, AddKnownNegative; + ComputeSignBit(Add, AddKnownNonNegative, AddKnownNegative, DL, + /*Depth=*/0, AC, CxtI, DT); + if ((AddKnownNonNegative && LHSOrRHSKnownNonNegative) || + (AddKnownNegative && LHSOrRHSKnownNegative)) { + return OverflowResult::NeverOverflows; + } + } + + return OverflowResult::MayOverflow; +} + +OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1), + Add, DL, AC, CxtI, DT); +} + +OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS, + const DataLayout &DL, + AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT); +} + +bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { + // FIXME: This conservative implementation can be relaxed. E.g. most + // atomic operations are guaranteed to terminate on most platforms + // and most functions terminate. + + return !I->isAtomic() && // atomics may never succeed on some platforms + !isa(I) && // could throw and might not terminate + !isa(I) && // might not terminate and could throw to + // non-successor (see bug 24185 for details). + !isa(I) && // has no successors + !isa(I); // has no successors +} + +bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, + const Loop *L) { + // The loop header is guaranteed to be executed for every iteration. + // + // FIXME: Relax this constraint to cover all basic blocks that are + // guaranteed to be executed at every iteration. + if (I->getParent() != L->getHeader()) return false; + + for (const Instruction &LI : *L->getHeader()) { + if (&LI == I) return true; + if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false; + } + llvm_unreachable("Instruction not contained in its own parent basic block."); +} + +bool llvm::propagatesFullPoison(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Xor: + case Instruction::Trunc: + case Instruction::BitCast: + case Instruction::AddrSpaceCast: + // These operations all propagate poison unconditionally. Note that poison + // is not any particular value, so xor or subtraction of poison with + // itself still yields poison, not zero. + return true; + + case Instruction::AShr: + case Instruction::SExt: + // For these operations, one bit of the input is replicated across + // multiple output bits. A replicated poison bit is still poison. + return true; + + case Instruction::Shl: { + // Left shift *by* a poison value is poison. The number of + // positions to shift is unsigned, so no negative values are + // possible there. Left shift by zero places preserves poison. So + // it only remains to consider left shift of poison by a positive + // number of places. + // + // A left shift by a positive number of places leaves the lowest order bit + // non-poisoned. However, if such a shift has a no-wrap flag, then we can + // make the poison operand violate that flag, yielding a fresh full-poison + // value. + auto *OBO = cast(I); + return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap(); + } + + case Instruction::Mul: { + // A multiplication by zero yields a non-poison zero result, so we need to + // rule out zero as an operand. Conservatively, multiplication by a + // non-zero constant is not multiplication by zero. + // + // Multiplication by a non-zero constant can leave some bits + // non-poisoned. For example, a multiplication by 2 leaves the lowest + // order bit unpoisoned. So we need to consider that. + // + // Multiplication by 1 preserves poison. If the multiplication has a + // no-wrap flag, then we can make the poison operand violate that flag + // when multiplied by any integer other than 0 and 1. + auto *OBO = cast(I); + if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) { + for (Value *V : OBO->operands()) { + if (auto *CI = dyn_cast(V)) { + // A ConstantInt cannot yield poison, so we can assume that it is + // the other operand that is poison. + return !CI->isZero(); + } + } + } + return false; + } + + case Instruction::GetElementPtr: + // A GEP implicitly represents a sequence of additions, subtractions, + // truncations, sign extensions and multiplications. The multiplications + // are by the non-zero sizes of some set of types, so we do not have to be + // concerned with multiplication by zero. If the GEP is in-bounds, then + // these operations are implicitly no-signed-wrap so poison is propagated + // by the arguments above for Add, Sub, Trunc, SExt and Mul. + return cast(I)->isInBounds(); + + default: + return false; + } +} + +const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Store: + return cast(I)->getPointerOperand(); + + case Instruction::Load: + return cast(I)->getPointerOperand(); + + case Instruction::AtomicCmpXchg: + return cast(I)->getPointerOperand(); + + case Instruction::AtomicRMW: + return cast(I)->getPointerOperand(); + + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + return I->getOperand(1); + + default: + return nullptr; + } +} + +bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) { + // We currently only look for uses of poison values within the same basic + // block, as that makes it easier to guarantee that the uses will be + // executed given that PoisonI is executed. + // + // FIXME: Expand this to consider uses beyond the same basic block. To do + // this, look out for the distinction between post-dominance and strong + // post-dominance. + const BasicBlock *BB = PoisonI->getParent(); + + // Set of instructions that we have proved will yield poison if PoisonI + // does. + SmallSet YieldsPoison; + YieldsPoison.insert(PoisonI); + + for (BasicBlock::const_iterator I = PoisonI->getIterator(), E = BB->end(); + I != E; ++I) { + if (&*I != PoisonI) { + const Value *NotPoison = getGuaranteedNonFullPoisonOp(&*I); + if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) return true; + if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) + return false; + } + + // Mark poison that propagates from I through uses of I. + if (YieldsPoison.count(&*I)) { + for (const User *User : I->users()) { + const Instruction *UserI = cast(User); + if (UserI->getParent() == BB && propagatesFullPoison(UserI)) + YieldsPoison.insert(User); + } + } + } + return false; +} + +static bool isKnownNonNaN(Value *V, FastMathFlags FMF) { + if (FMF.noNaNs()) + return true; + + if (auto *C = dyn_cast(V)) + return !C->isNaN(); + return false; +} + +static bool isKnownNonZero(Value *V) { + if (auto *C = dyn_cast(V)) + return !C->isZero(); + return false; +} + +static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, + FastMathFlags FMF, Value *CmpLHS, Value *CmpRHS, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS) { LHS = CmpLHS; RHS = CmpRHS; - // (icmp X, Y) ? X : Y - if (TrueVal == CmpLHS && FalseVal == CmpRHS) { - switch (Pred) { - default: return SPF_UNKNOWN; // Equality. - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: return SPF_UMAX; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: return SPF_SMAX; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: return SPF_UMIN; - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: return SPF_SMIN; + // If the predicate is an "or-equal" (FP) predicate, then signed zeroes may + // return inconsistent results between implementations. + // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 + // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) + // Therefore we behave conservatively and only proceed if at least one of the + // operands is known to not be zero, or if we don't care about signed zeroes. + switch (Pred) { + default: break; + case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE: + case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE: + if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && + !isKnownNonZero(CmpRHS)) + return {SPF_UNKNOWN, SPNB_NA, false}; + } + + SelectPatternNaNBehavior NaNBehavior = SPNB_NA; + bool Ordered = false; + + // When given one NaN and one non-NaN input: + // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input. + // - A simple C99 (a < b ? a : b) construction will return 'b' (as the + // ordered comparison fails), which could be NaN or non-NaN. + // so here we discover exactly what NaN behavior is required/accepted. + if (CmpInst::isFPPredicate(Pred)) { + bool LHSSafe = isKnownNonNaN(CmpLHS, FMF); + bool RHSSafe = isKnownNonNaN(CmpRHS, FMF); + + if (LHSSafe && RHSSafe) { + // Both operands are known non-NaN. + NaNBehavior = SPNB_RETURNS_ANY; + } else if (CmpInst::isOrdered(Pred)) { + // An ordered comparison will return false when given a NaN, so it + // returns the RHS. + Ordered = true; + if (LHSSafe) + // LHS is non-NaN, so if RHS is NaN then NaN will be returned. + NaNBehavior = SPNB_RETURNS_NAN; + else if (RHSSafe) + NaNBehavior = SPNB_RETURNS_OTHER; + else + // Completely unsafe. + return {SPF_UNKNOWN, SPNB_NA, false}; + } else { + Ordered = false; + // An unordered comparison will return true when given a NaN, so it + // returns the LHS. + if (LHSSafe) + // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned. + NaNBehavior = SPNB_RETURNS_OTHER; + else if (RHSSafe) + NaNBehavior = SPNB_RETURNS_NAN; + else + // Completely unsafe. + return {SPF_UNKNOWN, SPNB_NA, false}; } } - // (icmp X, Y) ? Y : X if (TrueVal == CmpRHS && FalseVal == CmpLHS) { + std::swap(CmpLHS, CmpRHS); + Pred = CmpInst::getSwappedPredicate(Pred); + if (NaNBehavior == SPNB_RETURNS_NAN) + NaNBehavior = SPNB_RETURNS_OTHER; + else if (NaNBehavior == SPNB_RETURNS_OTHER) + NaNBehavior = SPNB_RETURNS_NAN; + Ordered = !Ordered; + } + + // ([if]cmp X, Y) ? X : Y + if (TrueVal == CmpLHS && FalseVal == CmpRHS) { switch (Pred) { - default: return SPF_UNKNOWN; // Equality. + default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality. case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: return SPF_UMIN; + case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false}; case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: return SPF_SMIN; + case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false}; case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: return SPF_UMAX; + case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false}; case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: return SPF_SMAX; + case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false}; + case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_UGE: + case FCmpInst::FCMP_OGT: + case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered}; + case FCmpInst::FCMP_ULT: + case FCmpInst::FCMP_ULE: + case FCmpInst::FCMP_OLT: + case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered}; } } @@ -3360,13 +3959,13 @@ static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred, // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) { - return (CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS; + return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; } // ABS(X) ==> (X (X isZero() || C1->isOne())) { - return (CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS; + return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false}; } } @@ -3377,24 +3976,36 @@ static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred, match(CmpLHS, m_Not(m_Specific(TrueVal))))) { LHS = TrueVal; RHS = FalseVal; - return SPF_SMIN; + return {SPF_SMIN, SPNB_NA, false}; } } } // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) - return SPF_UNKNOWN; + return {SPF_UNKNOWN, SPNB_NA, false}; } -static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2, - Instruction::CastOps *CastOp) { +static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, + Instruction::CastOps *CastOp) { CastInst *CI = dyn_cast(V1); Constant *C = dyn_cast(V2); - if (!CI || !C) + CastInst *CI2 = dyn_cast(V2); + if (!CI) return nullptr; *CastOp = CI->getOpcode(); + if (CI2) { + // If V1 and V2 are both the same cast from the same type, we can look + // through V1. + if (CI2->getOpcode() == CI->getOpcode() && + CI2->getSrcTy() == CI->getSrcTy()) + return CI2->getOperand(0); + return nullptr; + } else if (!C) { + return nullptr; + } + if (isa(CI) && CmpI->isSigned()) { Constant *T = ConstantExpr::getTrunc(C, CI->getSrcTy()); // This is only valid if the truncated value can be sign-extended @@ -3409,39 +4020,200 @@ static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2, if (isa(CI)) return ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned()); + if (isa(CI)) + return ConstantExpr::getUIToFP(C, CI->getSrcTy(), true); + + if (isa(CI)) + return ConstantExpr::getSIToFP(C, CI->getSrcTy(), true); + + if (isa(CI)) + return ConstantExpr::getFPToUI(C, CI->getSrcTy(), true); + + if (isa(CI)) + return ConstantExpr::getFPToSI(C, CI->getSrcTy(), true); + + if (isa(CI)) + return ConstantExpr::getFPExtend(C, CI->getSrcTy(), true); + + if (isa(CI)) + return ConstantExpr::getFPTrunc(C, CI->getSrcTy(), true); + return nullptr; } -SelectPatternFlavor llvm::matchSelectPattern(Value *V, +SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp) { SelectInst *SI = dyn_cast(V); - if (!SI) return SPF_UNKNOWN; + if (!SI) return {SPF_UNKNOWN, SPNB_NA, false}; - ICmpInst *CmpI = dyn_cast(SI->getCondition()); - if (!CmpI) return SPF_UNKNOWN; + CmpInst *CmpI = dyn_cast(SI->getCondition()); + if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false}; - ICmpInst::Predicate Pred = CmpI->getPredicate(); + CmpInst::Predicate Pred = CmpI->getPredicate(); Value *CmpLHS = CmpI->getOperand(0); Value *CmpRHS = CmpI->getOperand(1); Value *TrueVal = SI->getTrueValue(); Value *FalseVal = SI->getFalseValue(); + FastMathFlags FMF; + if (isa(CmpI)) + FMF = CmpI->getFastMathFlags(); // Bail out early. if (CmpI->isEquality()) - return SPF_UNKNOWN; + return {SPF_UNKNOWN, SPNB_NA, false}; // Deal with type mismatches. if (CastOp && CmpLHS->getType() != TrueVal->getType()) { - if (Constant *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) - return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, + if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) + return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, cast(TrueVal)->getOperand(0), C, LHS, RHS); - if (Constant *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) - return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, + if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) + return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, C, cast(FalseVal)->getOperand(0), LHS, RHS); } - return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, + return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); } + +ConstantRange llvm::getConstantRangeFromMetadata(MDNode &Ranges) { + const unsigned NumRanges = Ranges.getNumOperands() / 2; + assert(NumRanges >= 1 && "Must have at least one range!"); + assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs"); + + auto *FirstLow = mdconst::extract(Ranges.getOperand(0)); + auto *FirstHigh = mdconst::extract(Ranges.getOperand(1)); + + ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue()); + + for (unsigned i = 1; i < NumRanges; ++i) { + auto *Low = mdconst::extract(Ranges.getOperand(2 * i + 0)); + auto *High = mdconst::extract(Ranges.getOperand(2 * i + 1)); + + // Note: unionWith will potentially create a range that contains values not + // contained in any of the original N ranges. + CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue())); + } + + return CR; +} + +/// Return true if "icmp Pred LHS RHS" is always true. +static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS, + const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!"); + if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS) + return true; + + switch (Pred) { + default: + return false; + + case CmpInst::ICMP_SLE: { + const APInt *C; + + // LHS s<= LHS +_{nsw} C if C >= 0 + if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C)))) + return !C->isNegative(); + return false; + } + + case CmpInst::ICMP_ULE: { + const APInt *C; + + // LHS u<= LHS +_{nuw} C for any C + if (match(RHS, m_NUWAdd(m_Specific(LHS), m_APInt(C)))) + return true; + + // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB) + auto MatchNUWAddsToSameValue = [&](Value *A, Value *B, Value *&X, + const APInt *&CA, const APInt *&CB) { + if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) && + match(B, m_NUWAdd(m_Specific(X), m_APInt(CB)))) + return true; + + // If X & C == 0 then (X | C) == X +_{nuw} C + if (match(A, m_Or(m_Value(X), m_APInt(CA))) && + match(B, m_Or(m_Specific(X), m_APInt(CB)))) { + unsigned BitWidth = CA->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + computeKnownBits(X, KnownZero, KnownOne, DL, Depth + 1, AC, CxtI, DT); + + if ((KnownZero & *CA) == *CA && (KnownZero & *CB) == *CB) + return true; + } + + return false; + }; + + Value *X; + const APInt *CLHS, *CRHS; + if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS)) + return CLHS->ule(*CRHS); + + return false; + } + } +} + +/// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred +/// ALHS ARHS" is true. +static bool isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS, + Value *ARHS, Value *BLHS, Value *BRHS, + const DataLayout &DL, unsigned Depth, + AssumptionCache *AC, const Instruction *CxtI, + const DominatorTree *DT) { + switch (Pred) { + default: + return false; + + case CmpInst::ICMP_SLT: + case CmpInst::ICMP_SLE: + return isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, AC, CxtI, + DT) && + isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI, + DT); + + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_ULE: + return isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, AC, CxtI, + DT) && + isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI, + DT); + } +} + +bool llvm::isImpliedCondition(Value *LHS, Value *RHS, const DataLayout &DL, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + assert(LHS->getType() == RHS->getType() && "mismatched type"); + Type *OpTy = LHS->getType(); + assert(OpTy->getScalarType()->isIntegerTy(1)); + + // LHS ==> RHS by definition + if (LHS == RHS) return true; + + if (OpTy->isVectorTy()) + // TODO: extending the code below to handle vectors + return false; + assert(OpTy->isIntegerTy(1) && "implied by above"); + + ICmpInst::Predicate APred, BPred; + Value *ALHS, *ARHS; + Value *BLHS, *BRHS; + + if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS))) || + !match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS)))) + return false; + + if (APred == BPred) + return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth, AC, + CxtI, DT); + + return false; +} diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp index 8c671ef0ef0e..4b244ec5e1f6 100644 --- a/lib/Analysis/VectorUtils.cpp +++ b/lib/Analysis/VectorUtils.cpp @@ -11,13 +11,20 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" +#include "llvm/IR/Constants.h" + +using namespace llvm; +using namespace llvm::PatternMatch; /// \brief Identify if the intrinsic is trivially vectorizable. /// This method returns true if the intrinsic's argument types are all @@ -79,7 +86,7 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, /// d) call should only reads memory. /// If all these condition is met then return ValidIntrinsicID /// else return not_intrinsic. -llvm::Intrinsic::ID +Intrinsic::ID llvm::checkUnaryFloatSignature(const CallInst &I, Intrinsic::ID ValidIntrinsicID) { if (I.getNumArgOperands() != 1 || @@ -98,7 +105,7 @@ llvm::checkUnaryFloatSignature(const CallInst &I, /// d) call should only reads memory. /// If all these condition is met then return ValidIntrinsicID /// else return not_intrinsic. -llvm::Intrinsic::ID +Intrinsic::ID llvm::checkBinaryFloatSignature(const CallInst &I, Intrinsic::ID ValidIntrinsicID) { if (I.getNumArgOperands() != 2 || @@ -114,8 +121,8 @@ llvm::checkBinaryFloatSignature(const CallInst &I, /// \brief Returns intrinsic ID for call. /// For the input call instruction it finds mapping intrinsic and returns /// its ID, in case it does not found it return not_intrinsic. -llvm::Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI, - const TargetLibraryInfo *TLI) { +Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI, + const TargetLibraryInfo *TLI) { // If we have an intrinsic call, check if it is trivially vectorizable. if (IntrinsicInst *II = dyn_cast(CI)) { Intrinsic::ID ID = II->getIntrinsicID(); @@ -228,8 +235,7 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) { cast(Gep->getType()->getScalarType())->getElementType()); // Walk backwards and try to peel off zeros. - while (LastOperand > 1 && - match(Gep->getOperand(LastOperand), llvm::PatternMatch::m_Zero())) { + while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) { // Find the type we're currently indexing into. gep_type_iterator GEPTI = gep_type_begin(Gep); std::advance(GEPTI, LastOperand - 1); @@ -247,8 +253,7 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) { /// \brief If the argument is a GEP, then returns the operand identified by /// getGEPInductionOperand. However, if there is some other non-loop-invariant /// operand, it returns that instead. -llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE, - Loop *Lp) { +Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { GetElementPtrInst *GEP = dyn_cast(Ptr); if (!GEP) return Ptr; @@ -265,8 +270,8 @@ llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE, } /// \brief If a value has only one user that is a CastInst, return it. -llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) { - llvm::Value *UniqueCast = nullptr; +Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) { + Value *UniqueCast = nullptr; for (User *U : Ptr->users()) { CastInst *CI = dyn_cast(U); if (CI && CI->getType() == Ty) { @@ -281,16 +286,15 @@ llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) { /// \brief Get the stride of a pointer access in a loop. Looks for symbolic /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. -llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE, - Loop *Lp) { - const PointerType *PtrTy = dyn_cast(Ptr->getType()); +Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { + auto *PtrTy = dyn_cast(Ptr->getType()); if (!PtrTy || PtrTy->isAggregateType()) return nullptr; // Try to remove a gep instruction to make the pointer (actually index at this // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the // pointer, otherwise, we are analyzing the index. - llvm::Value *OrigPtr = Ptr; + Value *OrigPtr = Ptr; // The size of the pointer access. int64_t PtrAccessSize = 1; @@ -320,8 +324,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE, if (M->getOperand(0)->getSCEVType() != scConstant) return nullptr; - const APInt &APStepVal = - cast(M->getOperand(0))->getValue()->getValue(); + const APInt &APStepVal = cast(M->getOperand(0))->getAPInt(); // Huge step value - give up. if (APStepVal.getBitWidth() > 64) @@ -346,7 +349,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE, if (!U) return nullptr; - llvm::Value *Stride = U->getValue(); + Value *Stride = U->getValue(); if (!Lp->isLoopInvariant(Stride)) return nullptr; @@ -361,7 +364,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE, /// \brief Given a vector and an element number, see if the scalar value is /// already around as a register, for example if it were inserted then extracted /// from the vector. -llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) { +Value *llvm::findScalarElement(Value *V, unsigned EltNo) { assert(V->getType()->isVectorTy() && "Not looking at a vector?"); VectorType *VTy = cast(V->getType()); unsigned Width = VTy->getNumElements(); @@ -399,14 +402,166 @@ llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) { // Extract a value from a vector add operation with a constant zero. Value *Val = nullptr; Constant *Con = nullptr; - if (match(V, - llvm::PatternMatch::m_Add(llvm::PatternMatch::m_Value(Val), - llvm::PatternMatch::m_Constant(Con)))) { + if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) if (Constant *Elt = Con->getAggregateElement(EltNo)) if (Elt->isNullValue()) return findScalarElement(Val, EltNo); - } // Otherwise, we don't know. return nullptr; } + +/// \brief Get splat value if the input is a splat vector or return nullptr. +/// This function is not fully general. It checks only 2 cases: +/// the input value is (1) a splat constants vector or (2) a sequence +/// of instructions that broadcast a single value into a vector. +/// +const llvm::Value *llvm::getSplatValue(const Value *V) { + + if (auto *C = dyn_cast(V)) + if (isa(V->getType())) + return C->getSplatValue(); + + auto *ShuffleInst = dyn_cast(V); + if (!ShuffleInst) + return nullptr; + // All-zero (or undef) shuffle mask elements. + for (int MaskElt : ShuffleInst->getShuffleMask()) + if (MaskElt != 0 && MaskElt != -1) + return nullptr; + // The first shuffle source is 'insertelement' with index 0. + auto *InsertEltInst = + dyn_cast(ShuffleInst->getOperand(0)); + if (!InsertEltInst || !isa(InsertEltInst->getOperand(2)) || + !cast(InsertEltInst->getOperand(2))->isNullValue()) + return nullptr; + + return InsertEltInst->getOperand(1); +} + +MapVector +llvm::computeMinimumValueSizes(ArrayRef Blocks, DemandedBits &DB, + const TargetTransformInfo *TTI) { + + // DemandedBits will give us every value's live-out bits. But we want + // to ensure no extra casts would need to be inserted, so every DAG + // of connected values must have the same minimum bitwidth. + EquivalenceClasses ECs; + SmallVector Worklist; + SmallPtrSet Roots; + SmallPtrSet Visited; + DenseMap DBits; + SmallPtrSet InstructionSet; + MapVector MinBWs; + + // Determine the roots. We work bottom-up, from truncs or icmps. + bool SeenExtFromIllegalType = false; + for (auto *BB : Blocks) + for (auto &I : *BB) { + InstructionSet.insert(&I); + + if (TTI && (isa(&I) || isa(&I)) && + !TTI->isTypeLegal(I.getOperand(0)->getType())) + SeenExtFromIllegalType = true; + + // Only deal with non-vector integers up to 64-bits wide. + if ((isa(&I) || isa(&I)) && + !I.getType()->isVectorTy() && + I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) { + // Don't make work for ourselves. If we know the loaded type is legal, + // don't add it to the worklist. + if (TTI && isa(&I) && TTI->isTypeLegal(I.getType())) + continue; + + Worklist.push_back(&I); + Roots.insert(&I); + } + } + // Early exit. + if (Worklist.empty() || (TTI && !SeenExtFromIllegalType)) + return MinBWs; + + // Now proceed breadth-first, unioning values together. + while (!Worklist.empty()) { + Value *Val = Worklist.pop_back_val(); + Value *Leader = ECs.getOrInsertLeaderValue(Val); + + if (Visited.count(Val)) + continue; + Visited.insert(Val); + + // Non-instructions terminate a chain successfully. + if (!isa(Val)) + continue; + Instruction *I = cast(Val); + + // If we encounter a type that is larger than 64 bits, we can't represent + // it so bail out. + if (DB.getDemandedBits(I).getBitWidth() > 64) + return MapVector(); + + uint64_t V = DB.getDemandedBits(I).getZExtValue(); + DBits[Leader] |= V; + + // Casts, loads and instructions outside of our range terminate a chain + // successfully. + if (isa(I) || isa(I) || isa(I) || + !InstructionSet.count(I)) + continue; + + // Unsafe casts terminate a chain unsuccessfully. We can't do anything + // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to + // transform anything that relies on them. + if (isa(I) || isa(I) || isa(I) || + !I->getType()->isIntegerTy()) { + DBits[Leader] |= ~0ULL; + continue; + } + + // We don't modify the types of PHIs. Reductions will already have been + // truncated if possible, and inductions' sizes will have been chosen by + // indvars. + if (isa(I)) + continue; + + if (DBits[Leader] == ~0ULL) + // All bits demanded, no point continuing. + continue; + + for (Value *O : cast(I)->operands()) { + ECs.unionSets(Leader, O); + Worklist.push_back(O); + } + } + + // Now we've discovered all values, walk them to see if there are + // any users we didn't see. If there are, we can't optimize that + // chain. + for (auto &I : DBits) + for (auto *U : I.first->users()) + if (U->getType()->isIntegerTy() && DBits.count(U) == 0) + DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL; + + for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) { + uint64_t LeaderDemandedBits = 0; + for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) + LeaderDemandedBits |= DBits[*MI]; + + uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) - + llvm::countLeadingZeros(LeaderDemandedBits); + // Round up to a power of 2 + if (!isPowerOf2_64((uint64_t)MinBW)) + MinBW = NextPowerOf2(MinBW); + for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) { + if (!isa(*MI)) + continue; + Type *Ty = (*MI)->getType(); + if (Roots.count(*MI)) + Ty = cast(*MI)->getOperand(0)->getType(); + if (MinBW < Ty->getScalarSizeInBits()) + MinBWs[cast(*MI)] = MinBW; + } + } + + return MinBWs; +} diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 5c4bab734b2b..26eca230bb31 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -105,7 +105,7 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, Pair[1] += hexDigitValue(*Buffer); } Pair[0] = 0; - for (int i=0; i<16; i++, Buffer++) { + for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) { Pair[0] *= 16; Pair[0] += hexDigitValue(*Buffer); } @@ -523,9 +523,14 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(zeroinitializer); KEYWORD(undef); KEYWORD(null); + KEYWORD(none); KEYWORD(to); + KEYWORD(caller); + KEYWORD(within); + KEYWORD(from); KEYWORD(tail); KEYWORD(musttail); + KEYWORD(notail); KEYWORD(target); KEYWORD(triple); KEYWORD(unwind); @@ -586,6 +591,10 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(preserve_mostcc); KEYWORD(preserve_allcc); KEYWORD(ghccc); + KEYWORD(x86_intrcc); + KEYWORD(hhvmcc); + KEYWORD(hhvm_ccc); + KEYWORD(cxx_fast_tlscc); KEYWORD(cc); KEYWORD(c); @@ -601,6 +610,8 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(convergent); KEYWORD(dereferenceable); KEYWORD(dereferenceable_or_null); + KEYWORD(inaccessiblememonly); + KEYWORD(inaccessiblemem_or_argmemonly); KEYWORD(inlinehint); KEYWORD(inreg); KEYWORD(jumptable); @@ -613,6 +624,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(noduplicate); KEYWORD(noimplicitfloat); KEYWORD(noinline); + KEYWORD(norecurse); KEYWORD(nonlazybind); KEYWORD(nonnull); KEYWORD(noredzone); @@ -690,6 +702,7 @@ lltok::Kind LLLexer::LexIdentifier() { TYPEKEYWORD("label", Type::getLabelTy(Context)); TYPEKEYWORD("metadata", Type::getMetadataTy(Context)); TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context)); + TYPEKEYWORD("token", Type::getTokenTy(Context)); #undef TYPEKEYWORD // Keywords for instructions. @@ -749,6 +762,11 @@ lltok::Kind LLLexer::LexIdentifier() { INSTKEYWORD(extractvalue, ExtractValue); INSTKEYWORD(insertvalue, InsertValue); INSTKEYWORD(landingpad, LandingPad); + INSTKEYWORD(cleanupret, CleanupRet); + INSTKEYWORD(catchret, CatchRet); + INSTKEYWORD(catchswitch, CatchSwitch); + INSTKEYWORD(catchpad, CatchPad); + INSTKEYWORD(cleanuppad, CleanupPad); #undef INSTKEYWORD #define DWKEYWORD(TYPE, TOKEN) \ @@ -763,6 +781,7 @@ lltok::Kind LLLexer::LexIdentifier() { DWKEYWORD(VIRTUALITY, DwarfVirtuality); DWKEYWORD(LANG, DwarfLang); DWKEYWORD(OP, DwarfOp); + DWKEYWORD(MACINFO, DwarfMacinfo); #undef DWKEYWORD if (Keyword.startswith("DIFlag")) { diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 1c6e7bd18d0e..3471a2dbd05c 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -13,6 +13,7 @@ #include "LLParser.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/AsmParser/SlotMapping.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/CallingConv.h" @@ -48,6 +49,32 @@ bool LLParser::Run() { ValidateEndOfModule(); } +bool LLParser::parseStandaloneConstantValue(Constant *&C, + const SlotMapping *Slots) { + restoreParsingState(Slots); + Lex.Lex(); + + Type *Ty = nullptr; + if (ParseType(Ty) || parseConstantValue(Ty, C)) + return true; + if (Lex.getKind() != lltok::Eof) + return Error(Lex.getLoc(), "expected end of string"); + return false; +} + +void LLParser::restoreParsingState(const SlotMapping *Slots) { + if (!Slots) + return; + NumberedVals = Slots->GlobalValues; + NumberedMetadata = Slots->MetadataNodes; + for (const auto &I : Slots->NamedTypes) + NamedTypes.insert( + std::make_pair(I.getKey(), std::make_pair(I.second, LocTy()))); + for (const auto &I : Slots->Types) + NumberedTypes.insert( + std::make_pair(I.first, std::make_pair(I.second, LocTy()))); +} + /// ValidateEndOfModule - Do final validity and sanity checks at the end of the /// module. bool LLParser::ValidateEndOfModule() { @@ -158,7 +185,7 @@ bool LLParser::ValidateEndOfModule() { // Look for intrinsic functions and CallInst that need to be upgraded for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ) - UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove + UpgradeCallsToIntrinsic(&*FI++); // must be post-increment, as we remove UpgradeDebugInfo(*M); @@ -169,6 +196,10 @@ bool LLParser::ValidateEndOfModule() { // the mapping from LLParser as it doesn't need it anymore. Slots->GlobalValues = std::move(NumberedVals); Slots->MetadataNodes = std::move(NumberedMetadata); + for (const auto &I : NamedTypes) + Slots->NamedTypes.insert(std::make_pair(I.getKey(), I.second.first)); + for (const auto &I : NumberedTypes) + Slots->Types.insert(std::make_pair(I.first, I.second.first)); return false; } @@ -647,6 +678,12 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L, return Error(NameLoc, "symbol with local linkage must have default visibility"); + Type *Ty; + LocTy ExplicitTypeLoc = Lex.getLoc(); + if (ParseType(Ty) || + ParseToken(lltok::comma, "expected comma after alias's type")) + return true; + Constant *Aliasee; LocTy AliaseeLoc = Lex.getLoc(); if (Lex.getKind() != lltok::kw_bitcast && @@ -669,11 +706,35 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L, auto *PTy = dyn_cast(AliaseeType); if (!PTy) return Error(AliaseeLoc, "An alias must have pointer type"); + unsigned AddrSpace = PTy->getAddressSpace(); + + if (Ty != PTy->getElementType()) + return Error( + ExplicitTypeLoc, + "explicit pointee type doesn't match operand's pointee type"); + + GlobalValue *GVal = nullptr; + + // See if the alias was forward referenced, if so, prepare to replace the + // forward reference. + if (!Name.empty()) { + GVal = M->getNamedValue(Name); + if (GVal) { + if (!ForwardRefVals.erase(Name)) + return Error(NameLoc, "redefinition of global '@" + Name + "'"); + } + } else { + auto I = ForwardRefValIDs.find(NumberedVals.size()); + if (I != ForwardRefValIDs.end()) { + GVal = I->second.first; + ForwardRefValIDs.erase(I); + } + } // Okay, create the alias but do not insert it into the module yet. std::unique_ptr GA( - GlobalAlias::create(PTy, (GlobalValue::LinkageTypes)Linkage, Name, - Aliasee, /*Parent*/ nullptr)); + GlobalAlias::create(Ty, AddrSpace, (GlobalValue::LinkageTypes)Linkage, + Name, Aliasee, /*Parent*/ nullptr)); GA->setThreadLocalMode(TLM); GA->setVisibility((GlobalValue::VisibilityTypes)Visibility); GA->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass); @@ -682,27 +743,17 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L, if (Name.empty()) NumberedVals.push_back(GA.get()); - // See if this value already exists in the symbol table. If so, it is either - // a redefinition or a definition of a forward reference. - if (GlobalValue *Val = M->getNamedValue(Name)) { - // See if this was a redefinition. If so, there is no entry in - // ForwardRefVals. - std::map >::iterator - I = ForwardRefVals.find(Name); - if (I == ForwardRefVals.end()) - return Error(NameLoc, "redefinition of global named '@" + Name + "'"); - - // Otherwise, this was a definition of forward ref. Verify that types - // agree. - if (Val->getType() != GA->getType()) - return Error(NameLoc, - "forward reference and definition of alias have different types"); + if (GVal) { + // Verify that types agree. + if (GVal->getType() != GA->getType()) + return Error( + ExplicitTypeLoc, + "forward reference and definition of alias have different types"); // If they agree, just RAUW the old value with the alias and remove the // forward ref info. - Val->replaceAllUsesWith(GA.get()); - Val->eraseFromParent(); - ForwardRefVals.erase(I); + GVal->replaceAllUsesWith(GA.get()); + GVal->eraseFromParent(); } // Insert into the module, we know its name won't collide now. @@ -767,12 +818,11 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, if (!Name.empty()) { GVal = M->getNamedValue(Name); if (GVal) { - if (!ForwardRefVals.erase(Name) || !isa(GVal)) + if (!ForwardRefVals.erase(Name)) return Error(NameLoc, "redefinition of global '@" + Name + "'"); } } else { - std::map >::iterator - I = ForwardRefValIDs.find(NumberedVals.size()); + auto I = ForwardRefValIDs.find(NumberedVals.size()); if (I != ForwardRefValIDs.end()) { GVal = I->second.first; ForwardRefValIDs.erase(I); @@ -903,14 +953,8 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, } // Target-dependent attributes: case lltok::StringConstant: { - std::string Attr = Lex.getStrVal(); - Lex.Lex(); - std::string Val; - if (EatIfPresent(lltok::equal) && - ParseStringConstant(Val)) + if (ParseStringAttribute(B)) return true; - - B.addAttribute(Attr, Val); continue; } @@ -951,6 +995,10 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, case lltok::kw_builtin: B.addAttribute(Attribute::Builtin); break; case lltok::kw_cold: B.addAttribute(Attribute::Cold); break; case lltok::kw_convergent: B.addAttribute(Attribute::Convergent); break; + case lltok::kw_inaccessiblememonly: + B.addAttribute(Attribute::InaccessibleMemOnly); break; + case lltok::kw_inaccessiblemem_or_argmemonly: + B.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); break; case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break; case lltok::kw_jumptable: B.addAttribute(Attribute::JumpTable); break; case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break; @@ -963,6 +1011,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break; case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break; case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break; + case lltok::kw_norecurse: B.addAttribute(Attribute::NoRecurse); break; case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break; case lltok::kw_optnone: B.addAttribute(Attribute::OptimizeNone); break; case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break; @@ -1015,6 +1064,17 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, // GlobalValue Reference/Resolution Routines. //===----------------------------------------------------------------------===// +static inline GlobalValue *createGlobalFwdRef(Module *M, PointerType *PTy, + const std::string &Name) { + if (auto *FT = dyn_cast(PTy->getElementType())) + return Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M); + else + return new GlobalVariable(*M, PTy->getElementType(), false, + GlobalValue::ExternalWeakLinkage, nullptr, Name, + nullptr, GlobalVariable::NotThreadLocal, + PTy->getAddressSpace()); +} + /// GetGlobalVal - Get a value with the specified name or ID, creating a /// forward reference record if needed. This can return null if the value /// exists but does not have the right type. @@ -1033,8 +1093,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty, // If this is a forward reference for the value, see if we already created a // forward ref record. if (!Val) { - std::map >::iterator - I = ForwardRefVals.find(Name); + auto I = ForwardRefVals.find(Name); if (I != ForwardRefVals.end()) Val = I->second.first; } @@ -1048,15 +1107,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty, } // Otherwise, create a new forward reference for this value and remember it. - GlobalValue *FwdVal; - if (FunctionType *FT = dyn_cast(PTy->getElementType())) - FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M); - else - FwdVal = new GlobalVariable(*M, PTy->getElementType(), false, - GlobalValue::ExternalWeakLinkage, nullptr, Name, - nullptr, GlobalVariable::NotThreadLocal, - PTy->getAddressSpace()); - + GlobalValue *FwdVal = createGlobalFwdRef(M, PTy, Name); ForwardRefVals[Name] = std::make_pair(FwdVal, Loc); return FwdVal; } @@ -1073,8 +1124,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) { // If this is a forward reference for the value, see if we already created a // forward ref record. if (!Val) { - std::map >::iterator - I = ForwardRefValIDs.find(ID); + auto I = ForwardRefValIDs.find(ID); if (I != ForwardRefValIDs.end()) Val = I->second.first; } @@ -1088,13 +1138,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) { } // Otherwise, create a new forward reference for this value and remember it. - GlobalValue *FwdVal; - if (FunctionType *FT = dyn_cast(PTy->getElementType())) - FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M); - else - FwdVal = new GlobalVariable(*M, PTy->getElementType(), false, - GlobalValue::ExternalWeakLinkage, nullptr, ""); - + GlobalValue *FwdVal = createGlobalFwdRef(M, PTy, ""); ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc); return FwdVal; } @@ -1217,6 +1261,19 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) { ParseToken(lltok::rparen, "expected ')' in address space"); } +/// ParseStringAttribute +/// := StringConstant +/// := StringConstant '=' StringConstant +bool LLParser::ParseStringAttribute(AttrBuilder &B) { + std::string Attr = Lex.getStrVal(); + Lex.Lex(); + std::string Val; + if (EatIfPresent(lltok::equal) && ParseStringConstant(Val)) + return true; + B.addAttribute(Attr, Val); + return false; +} + /// ParseOptionalParamAttrs - Parse a potentially empty list of parameter attributes. bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { bool HaveError = false; @@ -1228,6 +1285,11 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { switch (Token) { default: // End of attributes. return HaveError; + case lltok::StringConstant: { + if (ParseStringAttribute(B)) + return true; + continue; + } case lltok::kw_align: { unsigned Alignment; if (ParseOptionalAlignment(Alignment)) @@ -1309,6 +1371,11 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { switch (Token) { default: // End of attributes. return HaveError; + case lltok::StringConstant: { + if (ParseStringAttribute(B)) + return true; + continue; + } case lltok::kw_dereferenceable: { uint64_t Bytes; if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes)) @@ -1323,6 +1390,13 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { B.addDereferenceableOrNullAttr(Bytes); continue; } + case lltok::kw_align: { + unsigned Alignment; + if (ParseOptionalAlignment(Alignment)) + return true; + B.addAlignmentAttr(Alignment); + continue; + } case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break; case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break; case lltok::kw_nonnull: B.addAttribute(Attribute::NonNull); break; @@ -1330,7 +1404,6 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break; // Error handling. - case lltok::kw_align: case lltok::kw_byval: case lltok::kw_inalloca: case lltok::kw_nest: @@ -1473,6 +1546,10 @@ bool LLParser::ParseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'preserve_mostcc' /// ::= 'preserve_allcc' /// ::= 'ghccc' +/// ::= 'x86_intrcc' +/// ::= 'hhvmcc' +/// ::= 'hhvm_ccc' +/// ::= 'cxx_fast_tlscc' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(unsigned &CC) { @@ -1501,6 +1578,10 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) { case lltok::kw_preserve_mostcc:CC = CallingConv::PreserveMost; break; case lltok::kw_preserve_allcc: CC = CallingConv::PreserveAll; break; case lltok::kw_ghccc: CC = CallingConv::GHC; break; + case lltok::kw_x86_intrcc: CC = CallingConv::X86_INTR; break; + case lltok::kw_hhvmcc: CC = CallingConv::HHVM; break; + case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break; + case lltok::kw_cxx_fast_tlscc: CC = CallingConv::CXX_FAST_TLS; break; case lltok::kw_cc: { Lex.Lex(); return ParseUInt32(CC); @@ -1883,7 +1964,59 @@ bool LLParser::ParseParameterList(SmallVectorImpl &ArgList, return false; } +/// ParseOptionalOperandBundles +/// ::= /*empty*/ +/// ::= '[' OperandBundle [, OperandBundle ]* ']' +/// +/// OperandBundle +/// ::= bundle-tag '(' ')' +/// ::= bundle-tag '(' Type Value [, Type Value ]* ')' +/// +/// bundle-tag ::= String Constant +bool LLParser::ParseOptionalOperandBundles( + SmallVectorImpl &BundleList, PerFunctionState &PFS) { + LocTy BeginLoc = Lex.getLoc(); + if (!EatIfPresent(lltok::lsquare)) + return false; + while (Lex.getKind() != lltok::rsquare) { + // If this isn't the first operand bundle, we need a comma. + if (!BundleList.empty() && + ParseToken(lltok::comma, "expected ',' in input list")) + return true; + + std::string Tag; + if (ParseStringConstant(Tag)) + return true; + + if (ParseToken(lltok::lparen, "expected '(' in operand bundle")) + return true; + + std::vector Inputs; + while (Lex.getKind() != lltok::rparen) { + // If this isn't the first input, we need a comma. + if (!Inputs.empty() && + ParseToken(lltok::comma, "expected ',' in input list")) + return true; + + Type *Ty = nullptr; + Value *Input = nullptr; + if (ParseType(Ty) || ParseValue(Ty, Input, PFS)) + return true; + Inputs.push_back(Input); + } + + BundleList.emplace_back(std::move(Tag), std::move(Inputs)); + + Lex.Lex(); // Lex the ')'. + } + + if (BundleList.empty()) + return Error(BeginLoc, "operand bundle set must not be empty"); + + Lex.Lex(); // Lex the ']'. + return false; +} /// ParseArgumentList - Parse the argument list for a function type or function /// prototype. @@ -2146,31 +2279,29 @@ LLParser::PerFunctionState::PerFunctionState(LLParser &p, Function &f, : P(p), F(f), FunctionNumber(functionNumber) { // Insert unnamed arguments into the NumberedVals list. - for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); - AI != E; ++AI) - if (!AI->hasName()) - NumberedVals.push_back(AI); + for (Argument &A : F.args()) + if (!A.hasName()) + NumberedVals.push_back(&A); } LLParser::PerFunctionState::~PerFunctionState() { // If there were any forward referenced non-basicblock values, delete them. - for (std::map >::iterator - I = ForwardRefVals.begin(), E = ForwardRefVals.end(); I != E; ++I) - if (!isa(I->second.first)) { - I->second.first->replaceAllUsesWith( - UndefValue::get(I->second.first->getType())); - delete I->second.first; - I->second.first = nullptr; - } - for (std::map >::iterator - I = ForwardRefValIDs.begin(), E = ForwardRefValIDs.end(); I != E; ++I) - if (!isa(I->second.first)) { - I->second.first->replaceAllUsesWith( - UndefValue::get(I->second.first->getType())); - delete I->second.first; - I->second.first = nullptr; - } + for (const auto &P : ForwardRefVals) { + if (isa(P.second.first)) + continue; + P.second.first->replaceAllUsesWith( + UndefValue::get(P.second.first->getType())); + delete P.second.first; + } + + for (const auto &P : ForwardRefValIDs) { + if (isa(P.second.first)) + continue; + P.second.first->replaceAllUsesWith( + UndefValue::get(P.second.first->getType())); + delete P.second.first; + } } bool LLParser::PerFunctionState::FinishFunction() { @@ -2189,16 +2320,15 @@ bool LLParser::PerFunctionState::FinishFunction() { /// GetVal - Get a value with the specified name or ID, creating a /// forward reference record if needed. This can return null if the value /// exists but does not have the right type. -Value *LLParser::PerFunctionState::GetVal(const std::string &Name, - Type *Ty, LocTy Loc) { +Value *LLParser::PerFunctionState::GetVal(const std::string &Name, Type *Ty, + LocTy Loc) { // Look this name up in the normal function symbol table. Value *Val = F.getValueSymbolTable().lookup(Name); // If this is a forward reference for the value, see if we already created a // forward ref record. if (!Val) { - std::map >::iterator - I = ForwardRefVals.find(Name); + auto I = ForwardRefVals.find(Name); if (I != ForwardRefVals.end()) Val = I->second.first; } @@ -2222,25 +2352,24 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name, // Otherwise, create a new forward reference for this value and remember it. Value *FwdVal; - if (Ty->isLabelTy()) + if (Ty->isLabelTy()) { FwdVal = BasicBlock::Create(F.getContext(), Name, &F); - else + } else { FwdVal = new Argument(Ty, Name); + } ForwardRefVals[Name] = std::make_pair(FwdVal, Loc); return FwdVal; } -Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty, - LocTy Loc) { +Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty, LocTy Loc) { // Look this name up in the normal function symbol table. Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : nullptr; // If this is a forward reference for the value, see if we already created a // forward ref record. if (!Val) { - std::map >::iterator - I = ForwardRefValIDs.find(ID); + auto I = ForwardRefValIDs.find(ID); if (I != ForwardRefValIDs.end()) Val = I->second.first; } @@ -2263,10 +2392,11 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty, // Otherwise, create a new forward reference for this value and remember it. Value *FwdVal; - if (Ty->isLabelTy()) + if (Ty->isLabelTy()) { FwdVal = BasicBlock::Create(F.getContext(), "", &F); - else + } else { FwdVal = new Argument(Ty); + } ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc); return FwdVal; @@ -2295,14 +2425,15 @@ bool LLParser::PerFunctionState::SetInstName(int NameID, return P.Error(NameLoc, "instruction expected to be numbered '%" + Twine(NumberedVals.size()) + "'"); - std::map >::iterator FI = - ForwardRefValIDs.find(NameID); + auto FI = ForwardRefValIDs.find(NameID); if (FI != ForwardRefValIDs.end()) { - if (FI->second.first->getType() != Inst->getType()) + Value *Sentinel = FI->second.first; + if (Sentinel->getType() != Inst->getType()) return P.Error(NameLoc, "instruction forward referenced with type '" + getTypeString(FI->second.first->getType()) + "'"); - FI->second.first->replaceAllUsesWith(Inst); - delete FI->second.first; + + Sentinel->replaceAllUsesWith(Inst); + delete Sentinel; ForwardRefValIDs.erase(FI); } @@ -2311,14 +2442,15 @@ bool LLParser::PerFunctionState::SetInstName(int NameID, } // Otherwise, the instruction had a name. Resolve forward refs and set it. - std::map >::iterator - FI = ForwardRefVals.find(NameStr); + auto FI = ForwardRefVals.find(NameStr); if (FI != ForwardRefVals.end()) { - if (FI->second.first->getType() != Inst->getType()) + Value *Sentinel = FI->second.first; + if (Sentinel->getType() != Inst->getType()) return P.Error(NameLoc, "instruction forward referenced with type '" + getTypeString(FI->second.first->getType()) + "'"); - FI->second.first->replaceAllUsesWith(Inst); - delete FI->second.first; + + Sentinel->replaceAllUsesWith(Inst); + delete Sentinel; ForwardRefVals.erase(FI); } @@ -2421,6 +2553,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { case lltok::kw_null: ID.Kind = ValID::t_Null; break; case lltok::kw_undef: ID.Kind = ValID::t_Undef; break; case lltok::kw_zeroinitializer: ID.Kind = ValID::t_Zero; break; + case lltok::kw_none: ID.Kind = ValID::t_None; break; case lltok::lbrace: { // ValID ::= '{' ConstVector '}' @@ -2430,9 +2563,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ParseToken(lltok::rbrace, "expected end of struct constant")) return true; - ID.ConstantStructElts = new Constant*[Elts.size()]; + ID.ConstantStructElts = make_unique(Elts.size()); ID.UIntVal = Elts.size(); - memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0])); + memcpy(ID.ConstantStructElts.get(), Elts.data(), + Elts.size() * sizeof(Elts[0])); ID.Kind = ValID::t_ConstantStruct; return false; } @@ -2451,8 +2585,9 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return true; if (isPackedStruct) { - ID.ConstantStructElts = new Constant*[Elts.size()]; - memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0])); + ID.ConstantStructElts = make_unique(Elts.size()); + memcpy(ID.ConstantStructElts.get(), Elts.data(), + Elts.size() * sizeof(Elts[0])); ID.UIntVal = Elts.size(); ID.Kind = ValID::t_PackedConstantStruct; return false; @@ -2891,7 +3026,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { } } - SmallPtrSet Visited; + SmallPtrSet Visited; if (!Indices.empty() && !Ty->isSized(&Visited)) return Error(ID.Loc, "base element of getelementptr must be sized"); @@ -3066,6 +3201,11 @@ struct DwarfTagField : public MDUnsignedField { DwarfTagField(dwarf::Tag DefaultTag) : MDUnsignedField(DefaultTag, dwarf::DW_TAG_hi_user) {} }; +struct DwarfMacinfoTypeField : public MDUnsignedField { + DwarfMacinfoTypeField() : MDUnsignedField(0, dwarf::DW_MACINFO_vendor_ext) {} + DwarfMacinfoTypeField(dwarf::MacinfoRecordType DefaultType) + : MDUnsignedField(DefaultType, dwarf::DW_MACINFO_vendor_ext) {} +}; struct DwarfAttEncodingField : public MDUnsignedField { DwarfAttEncodingField() : MDUnsignedField(0, dwarf::DW_ATE_hi_user) {} }; @@ -3157,6 +3297,26 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DwarfTagField &Result) { return false; } +template <> +bool LLParser::ParseMDField(LocTy Loc, StringRef Name, + DwarfMacinfoTypeField &Result) { + if (Lex.getKind() == lltok::APSInt) + return ParseMDField(Loc, Name, static_cast(Result)); + + if (Lex.getKind() != lltok::DwarfMacinfo) + return TokError("expected DWARF macinfo type"); + + unsigned Macinfo = dwarf::getMacinfo(Lex.getStrVal()); + if (Macinfo == dwarf::DW_MACINFO_invalid) + return TokError( + "invalid DWARF macinfo type" + Twine(" '") + Lex.getStrVal() + "'"); + assert(Macinfo <= Result.Max && "Expected valid DWARF macinfo type"); + + Result.assign(Macinfo); + Lex.Lex(); + return false; +} + template <> bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DwarfVirtualityField &Result) { @@ -3569,8 +3729,11 @@ bool LLParser::ParseDIFile(MDNode *&Result, bool IsDistinct) { /// isOptimized: true, flags: "-O2", runtimeVersion: 1, /// splitDebugFilename: "abc.debug", emissionKind: 1, /// enums: !1, retainedTypes: !2, subprograms: !3, -/// globals: !4, imports: !5, dwoId: 0x0abcd) +/// globals: !4, imports: !5, macros: !6, dwoId: 0x0abcd) bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) { + if (!IsDistinct) + return Lex.Error("missing 'distinct', required for !DICompileUnit"); + #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ REQUIRED(language, DwarfLangField, ); \ REQUIRED(file, MDField, (/* AllowNull */ false)); \ @@ -3585,16 +3748,16 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) { OPTIONAL(subprograms, MDField, ); \ OPTIONAL(globals, MDField, ); \ OPTIONAL(imports, MDField, ); \ + OPTIONAL(macros, MDField, ); \ OPTIONAL(dwoId, MDUnsignedField, ); PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS - Result = GET_OR_DISTINCT(DICompileUnit, - (Context, language.Val, file.Val, producer.Val, - isOptimized.Val, flags.Val, runtimeVersion.Val, - splitDebugFilename.Val, emissionKind.Val, enums.Val, - retainedTypes.Val, subprograms.Val, globals.Val, - imports.Val, dwoId.Val)); + Result = DICompileUnit::getDistinct( + Context, language.Val, file.Val, producer.Val, isOptimized.Val, flags.Val, + runtimeVersion.Val, splitDebugFilename.Val, emissionKind.Val, enums.Val, + retainedTypes.Val, subprograms.Val, globals.Val, imports.Val, macros.Val, + dwoId.Val); return false; } @@ -3604,9 +3767,10 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) { /// isDefinition: true, scopeLine: 8, containingType: !3, /// virtuality: DW_VIRTUALTIY_pure_virtual, /// virtualIndex: 10, flags: 11, -/// isOptimized: false, function: void ()* @_Z3foov, -/// templateParams: !4, declaration: !5, variables: !6) +/// isOptimized: false, templateParams: !4, declaration: !5, +/// variables: !6) bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) { + auto Loc = Lex.getLoc(); #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ OPTIONAL(scope, MDField, ); \ OPTIONAL(name, MDStringField, ); \ @@ -3622,19 +3786,23 @@ bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) { OPTIONAL(virtualIndex, MDUnsignedField, (0, UINT32_MAX)); \ OPTIONAL(flags, DIFlagField, ); \ OPTIONAL(isOptimized, MDBoolField, ); \ - OPTIONAL(function, MDConstant, ); \ OPTIONAL(templateParams, MDField, ); \ OPTIONAL(declaration, MDField, ); \ OPTIONAL(variables, MDField, ); PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS + if (isDefinition.Val && !IsDistinct) + return Lex.Error( + Loc, + "missing 'distinct', required for !DISubprogram when 'isDefinition'"); + Result = GET_OR_DISTINCT( - DISubprogram, (Context, scope.Val, name.Val, linkageName.Val, file.Val, - line.Val, type.Val, isLocal.Val, isDefinition.Val, - scopeLine.Val, containingType.Val, virtuality.Val, - virtualIndex.Val, flags.Val, isOptimized.Val, function.Val, - templateParams.Val, declaration.Val, variables.Val)); + DISubprogram, + (Context, scope.Val, name.Val, linkageName.Val, file.Val, line.Val, + type.Val, isLocal.Val, isDefinition.Val, scopeLine.Val, + containingType.Val, virtuality.Val, virtualIndex.Val, flags.Val, + isOptimized.Val, templateParams.Val, declaration.Val, variables.Val)); return false; } @@ -3685,6 +3853,39 @@ bool LLParser::ParseDINamespace(MDNode *&Result, bool IsDistinct) { return false; } +/// ParseDIMacro: +/// ::= !DIMacro(macinfo: type, line: 9, name: "SomeMacro", value: "SomeValue") +bool LLParser::ParseDIMacro(MDNode *&Result, bool IsDistinct) { +#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ + REQUIRED(type, DwarfMacinfoTypeField, ); \ + REQUIRED(line, LineField, ); \ + REQUIRED(name, MDStringField, ); \ + OPTIONAL(value, MDStringField, ); + PARSE_MD_FIELDS(); +#undef VISIT_MD_FIELDS + + Result = GET_OR_DISTINCT(DIMacro, + (Context, type.Val, line.Val, name.Val, value.Val)); + return false; +} + +/// ParseDIMacroFile: +/// ::= !DIMacroFile(line: 9, file: !2, nodes: !3) +bool LLParser::ParseDIMacroFile(MDNode *&Result, bool IsDistinct) { +#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ + OPTIONAL(type, DwarfMacinfoTypeField, (dwarf::DW_MACINFO_start_file)); \ + REQUIRED(line, LineField, ); \ + REQUIRED(file, MDField, ); \ + OPTIONAL(nodes, MDField, ); + PARSE_MD_FIELDS(); +#undef VISIT_MD_FIELDS + + Result = GET_OR_DISTINCT(DIMacroFile, + (Context, type.Val, line.Val, file.Val, nodes.Val)); + return false; +} + + /// ParseDIModule: /// ::= !DIModule(scope: !0, name: "SomeModule", configMacros: "-DNDEBUG", /// includePath: "/usr/include", isysroot: "/") @@ -3762,24 +3963,25 @@ bool LLParser::ParseDIGlobalVariable(MDNode *&Result, bool IsDistinct) { } /// ParseDILocalVariable: -/// ::= !DILocalVariable(tag: DW_TAG_arg_variable, scope: !0, name: "foo", +/// ::= !DILocalVariable(arg: 7, scope: !0, name: "foo", +/// file: !1, line: 7, type: !2, arg: 2, flags: 7) +/// ::= !DILocalVariable(scope: !0, name: "foo", /// file: !1, line: 7, type: !2, arg: 2, flags: 7) bool LLParser::ParseDILocalVariable(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ - REQUIRED(tag, DwarfTagField, ); \ REQUIRED(scope, MDField, (/* AllowNull */ false)); \ OPTIONAL(name, MDStringField, ); \ + OPTIONAL(arg, MDUnsignedField, (0, UINT16_MAX)); \ OPTIONAL(file, MDField, ); \ OPTIONAL(line, LineField, ); \ OPTIONAL(type, MDField, ); \ - OPTIONAL(arg, MDUnsignedField, (0, UINT16_MAX)); \ OPTIONAL(flags, DIFlagField, ); PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS Result = GET_OR_DISTINCT(DILocalVariable, - (Context, tag.Val, scope.Val, name.Val, file.Val, - line.Val, type.Val, arg.Val, flags.Val)); + (Context, scope.Val, name.Val, file.Val, line.Val, + type.Val, arg.Val, flags.Val)); return false; } @@ -3969,13 +4171,11 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, V = PFS->GetVal(ID.StrVal, Ty, ID.Loc); return V == nullptr; case ValID::t_InlineAsm: { - PointerType *PTy = dyn_cast(Ty); - FunctionType *FTy = - PTy ? dyn_cast(PTy->getElementType()) : nullptr; - if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2)) + if (!ID.FTy || !InlineAsm::Verify(ID.FTy, ID.StrVal2)) return Error(ID.Loc, "invalid type for inline asm constraint string"); - V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, - (ID.UIntVal>>1)&1, (InlineAsm::AsmDialect(ID.UIntVal>>2))); + V = InlineAsm::get(ID.FTy, ID.StrVal, ID.StrVal2, ID.UIntVal & 1, + (ID.UIntVal >> 1) & 1, + (InlineAsm::AsmDialect(ID.UIntVal >> 2))); return false; } case ValID::t_GlobalName: @@ -4035,6 +4235,11 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, return Error(ID.Loc, "invalid type for null constant"); V = Constant::getNullValue(Ty); return false; + case ValID::t_None: + if (!Ty->isTokenTy()) + return Error(ID.Loc, "invalid type for none constant"); + V = Constant::getNullValue(Ty); + return false; case ValID::t_Constant: if (ID.ConstantVal->getType() != Ty) return Error(ID.Loc, "constant expression type mismatch"); @@ -4056,8 +4261,8 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, return Error(ID.Loc, "element " + Twine(i) + " of struct initializer doesn't match struct element type"); - V = ConstantStruct::get(ST, makeArrayRef(ID.ConstantStructElts, - ID.UIntVal)); + V = ConstantStruct::get( + ST, makeArrayRef(ID.ConstantStructElts.get(), ID.UIntVal)); } else return Error(ID.Loc, "constant expression type mismatch"); return false; @@ -4065,11 +4270,35 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, llvm_unreachable("Invalid ValID"); } +bool LLParser::parseConstantValue(Type *Ty, Constant *&C) { + C = nullptr; + ValID ID; + auto Loc = Lex.getLoc(); + if (ParseValID(ID, /*PFS=*/nullptr)) + return true; + switch (ID.Kind) { + case ValID::t_APSInt: + case ValID::t_APFloat: + case ValID::t_Undef: + case ValID::t_Constant: + case ValID::t_ConstantStruct: + case ValID::t_PackedConstantStruct: { + Value *V; + if (ConvertValIDToValue(Ty, ID, V, /*PFS=*/nullptr)) + return true; + assert(isa(V) && "Expected a constant value"); + C = cast(V); + return false; + } + default: + return Error(Loc, "expected a constant value"); + } +} + bool LLParser::ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS) { V = nullptr; ValID ID; - return ParseValID(ID, PFS) || - ConvertValIDToValue(Ty, ID, V, PFS); + return ParseValID(ID, PFS) || ConvertValIDToValue(Ty, ID, V, PFS); } bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState *PFS) { @@ -4242,8 +4471,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { if (!FunctionName.empty()) { // If this was a definition of a forward reference, remove the definition // from the forward reference table and fill in the forward ref. - std::map >::iterator FRVI = - ForwardRefVals.find(FunctionName); + auto FRVI = ForwardRefVals.find(FunctionName); if (FRVI != ForwardRefVals.end()) { Fn = M->getFunction(FunctionName); if (!Fn) @@ -4265,8 +4493,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { } else { // If this is a definition of a forward referenced function, make sure the // types agree. - std::map >::iterator I - = ForwardRefValIDs.find(NumberedVals.size()); + auto I = ForwardRefValIDs.find(NumberedVals.size()); if (I != ForwardRefValIDs.end()) { Fn = cast(I->second.first); if (Fn->getType() != PFT) @@ -4498,6 +4725,11 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_indirectbr: return ParseIndirectBr(Inst, PFS); case lltok::kw_invoke: return ParseInvoke(Inst, PFS); case lltok::kw_resume: return ParseResume(Inst, PFS); + case lltok::kw_cleanupret: return ParseCleanupRet(Inst, PFS); + case lltok::kw_catchret: return ParseCatchRet(Inst, PFS); + case lltok::kw_catchswitch: return ParseCatchSwitch(Inst, PFS); + case lltok::kw_catchpad: return ParseCatchPad(Inst, PFS); + case lltok::kw_cleanuppad: return ParseCleanupPad(Inst, PFS); // Binary Operators. case lltok::kw_add: case lltok::kw_sub: @@ -4580,6 +4812,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_call: return ParseCall(Inst, PFS, CallInst::TCK_None); case lltok::kw_tail: return ParseCall(Inst, PFS, CallInst::TCK_Tail); case lltok::kw_musttail: return ParseCall(Inst, PFS, CallInst::TCK_MustTail); + case lltok::kw_notail: return ParseCall(Inst, PFS, CallInst::TCK_NoTail); // Memory. case lltok::kw_alloca: return ParseAlloc(Inst, PFS); case lltok::kw_load: return ParseLoad(Inst, PFS); @@ -4798,15 +5031,15 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { LocTy RetTypeLoc; ValID CalleeID; SmallVector ArgList; + SmallVector BundleList; BasicBlock *NormalBB, *UnwindBB; - if (ParseOptionalCallingConv(CC) || - ParseOptionalReturnAttrs(RetAttrs) || + if (ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) || ParseType(RetType, RetTypeLoc, true /*void allowed*/) || - ParseValID(CalleeID) || - ParseParameterList(ArgList, PFS) || + ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) || ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false, NoBuiltinLoc) || + ParseOptionalOperandBundles(BundleList, PFS) || ParseToken(lltok::kw_to, "expected 'to' in invoke") || ParseTypeAndBasicBlock(NormalBB, PFS) || ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") || @@ -4829,6 +5062,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { Ty = FunctionType::get(RetType, ParamTypes, false); } + CalleeID.FTy = Ty; + // Look up the callee. Value *Callee; if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS)) @@ -4880,7 +5115,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { // Finish off the Attribute and check them AttributeSet PAL = AttributeSet::get(Context, Attrs); - InvokeInst *II = InvokeInst::Create(Ty, Callee, NormalBB, UnwindBB, Args); + InvokeInst *II = + InvokeInst::Create(Ty, Callee, NormalBB, UnwindBB, Args, BundleList); II->setCallingConv(CC); II->setAttributes(PAL); ForwardRefAttrGroups[II] = FwdRefAttrGrps; @@ -4900,6 +5136,183 @@ bool LLParser::ParseResume(Instruction *&Inst, PerFunctionState &PFS) { return false; } +bool LLParser::ParseExceptionArgs(SmallVectorImpl &Args, + PerFunctionState &PFS) { + if (ParseToken(lltok::lsquare, "expected '[' in catchpad/cleanuppad")) + return true; + + while (Lex.getKind() != lltok::rsquare) { + // If this isn't the first argument, we need a comma. + if (!Args.empty() && + ParseToken(lltok::comma, "expected ',' in argument list")) + return true; + + // Parse the argument. + LocTy ArgLoc; + Type *ArgTy = nullptr; + if (ParseType(ArgTy, ArgLoc)) + return true; + + Value *V; + if (ArgTy->isMetadataTy()) { + if (ParseMetadataAsValue(V, PFS)) + return true; + } else { + if (ParseValue(ArgTy, V, PFS)) + return true; + } + Args.push_back(V); + } + + Lex.Lex(); // Lex the ']'. + return false; +} + +/// ParseCleanupRet +/// ::= 'cleanupret' from Value unwind ('to' 'caller' | TypeAndValue) +bool LLParser::ParseCleanupRet(Instruction *&Inst, PerFunctionState &PFS) { + Value *CleanupPad = nullptr; + + if (ParseToken(lltok::kw_from, "expected 'from' after cleanupret")) + return true; + + if (ParseValue(Type::getTokenTy(Context), CleanupPad, PFS)) + return true; + + if (ParseToken(lltok::kw_unwind, "expected 'unwind' in cleanupret")) + return true; + + BasicBlock *UnwindBB = nullptr; + if (Lex.getKind() == lltok::kw_to) { + Lex.Lex(); + if (ParseToken(lltok::kw_caller, "expected 'caller' in cleanupret")) + return true; + } else { + if (ParseTypeAndBasicBlock(UnwindBB, PFS)) { + return true; + } + } + + Inst = CleanupReturnInst::Create(CleanupPad, UnwindBB); + return false; +} + +/// ParseCatchRet +/// ::= 'catchret' from Parent Value 'to' TypeAndValue +bool LLParser::ParseCatchRet(Instruction *&Inst, PerFunctionState &PFS) { + Value *CatchPad = nullptr; + + if (ParseToken(lltok::kw_from, "expected 'from' after catchret")) + return true; + + if (ParseValue(Type::getTokenTy(Context), CatchPad, PFS)) + return true; + + BasicBlock *BB; + if (ParseToken(lltok::kw_to, "expected 'to' in catchret") || + ParseTypeAndBasicBlock(BB, PFS)) + return true; + + Inst = CatchReturnInst::Create(CatchPad, BB); + return false; +} + +/// ParseCatchSwitch +/// ::= 'catchswitch' within Parent +bool LLParser::ParseCatchSwitch(Instruction *&Inst, PerFunctionState &PFS) { + Value *ParentPad; + LocTy BBLoc; + + if (ParseToken(lltok::kw_within, "expected 'within' after catchswitch")) + return true; + + if (Lex.getKind() != lltok::kw_none && Lex.getKind() != lltok::LocalVar && + Lex.getKind() != lltok::LocalVarID) + return TokError("expected scope value for catchswitch"); + + if (ParseValue(Type::getTokenTy(Context), ParentPad, PFS)) + return true; + + if (ParseToken(lltok::lsquare, "expected '[' with catchswitch labels")) + return true; + + SmallVector Table; + do { + BasicBlock *DestBB; + if (ParseTypeAndBasicBlock(DestBB, PFS)) + return true; + Table.push_back(DestBB); + } while (EatIfPresent(lltok::comma)); + + if (ParseToken(lltok::rsquare, "expected ']' after catchswitch labels")) + return true; + + if (ParseToken(lltok::kw_unwind, + "expected 'unwind' after catchswitch scope")) + return true; + + BasicBlock *UnwindBB = nullptr; + if (EatIfPresent(lltok::kw_to)) { + if (ParseToken(lltok::kw_caller, "expected 'caller' in catchswitch")) + return true; + } else { + if (ParseTypeAndBasicBlock(UnwindBB, PFS)) + return true; + } + + auto *CatchSwitch = + CatchSwitchInst::Create(ParentPad, UnwindBB, Table.size()); + for (BasicBlock *DestBB : Table) + CatchSwitch->addHandler(DestBB); + Inst = CatchSwitch; + return false; +} + +/// ParseCatchPad +/// ::= 'catchpad' ParamList 'to' TypeAndValue 'unwind' TypeAndValue +bool LLParser::ParseCatchPad(Instruction *&Inst, PerFunctionState &PFS) { + Value *CatchSwitch = nullptr; + + if (ParseToken(lltok::kw_within, "expected 'within' after catchpad")) + return true; + + if (Lex.getKind() != lltok::LocalVar && Lex.getKind() != lltok::LocalVarID) + return TokError("expected scope value for catchpad"); + + if (ParseValue(Type::getTokenTy(Context), CatchSwitch, PFS)) + return true; + + SmallVector Args; + if (ParseExceptionArgs(Args, PFS)) + return true; + + Inst = CatchPadInst::Create(CatchSwitch, Args); + return false; +} + +/// ParseCleanupPad +/// ::= 'cleanuppad' within Parent ParamList +bool LLParser::ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS) { + Value *ParentPad = nullptr; + + if (ParseToken(lltok::kw_within, "expected 'within' after cleanuppad")) + return true; + + if (Lex.getKind() != lltok::kw_none && Lex.getKind() != lltok::LocalVar && + Lex.getKind() != lltok::LocalVarID) + return TokError("expected scope value for cleanuppad"); + + if (ParseValue(Type::getTokenTy(Context), ParentPad, PFS)) + return true; + + SmallVector Args; + if (ParseExceptionArgs(Args, PFS)) + return true; + + Inst = CleanupPadInst::Create(ParentPad, Args); + return false; +} + //===----------------------------------------------------------------------===// // Binary Operators. //===----------------------------------------------------------------------===// @@ -5196,12 +5609,14 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) { } /// ParseCall -/// ::= 'call' OptionalCallingConv OptionalAttrs Type Value -/// ParameterList OptionalAttrs -/// ::= 'tail' 'call' OptionalCallingConv OptionalAttrs Type Value -/// ParameterList OptionalAttrs -/// ::= 'musttail' 'call' OptionalCallingConv OptionalAttrs Type Value -/// ParameterList OptionalAttrs +/// ::= 'call' OptionalFastMathFlags OptionalCallingConv +/// OptionalAttrs Type Value ParameterList OptionalAttrs +/// ::= 'tail' 'call' OptionalFastMathFlags OptionalCallingConv +/// OptionalAttrs Type Value ParameterList OptionalAttrs +/// ::= 'musttail' 'call' OptionalFastMathFlags OptionalCallingConv +/// OptionalAttrs Type Value ParameterList OptionalAttrs +/// ::= 'notail' 'call' OptionalFastMathFlags OptionalCallingConv +/// OptionalAttrs Type Value ParameterList OptionalAttrs bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, CallInst::TailCallKind TCK) { AttrBuilder RetAttrs, FnAttrs; @@ -5212,20 +5627,29 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, LocTy RetTypeLoc; ValID CalleeID; SmallVector ArgList; + SmallVector BundleList; LocTy CallLoc = Lex.getLoc(); - if ((TCK != CallInst::TCK_None && - ParseToken(lltok::kw_call, "expected 'tail call'")) || - ParseOptionalCallingConv(CC) || - ParseOptionalReturnAttrs(RetAttrs) || + if (TCK != CallInst::TCK_None && + ParseToken(lltok::kw_call, + "expected 'tail call', 'musttail call', or 'notail call'")) + return true; + + FastMathFlags FMF = EatFastMathFlagsIfPresent(); + + if (ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) || ParseType(RetType, RetTypeLoc, true /*void allowed*/) || ParseValID(CalleeID) || ParseParameterList(ArgList, PFS, TCK == CallInst::TCK_MustTail, PFS.getFunction().isVarArg()) || - ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false, - BuiltinLoc)) + ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false, BuiltinLoc) || + ParseOptionalOperandBundles(BundleList, PFS)) return true; + if (FMF.any() && !RetType->isFPOrFPVectorTy()) + return Error(CallLoc, "fast-math-flags specified for call without " + "floating-point scalar or vector return type"); + // If RetType is a non-function pointer type, then this is the short syntax // for the call, which means that RetType is just the return type. Infer the // rest of the function argument types from the arguments that are present. @@ -5242,6 +5666,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, Ty = FunctionType::get(RetType, ParamTypes, false); } + CalleeID.FTy = Ty; + // Look up the callee. Value *Callee; if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS)) @@ -5293,9 +5719,11 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, // Finish off the Attribute and check them AttributeSet PAL = AttributeSet::get(Context, Attrs); - CallInst *CI = CallInst::Create(Ty, Callee, Args); + CallInst *CI = CallInst::Create(Ty, Callee, Args, BundleList); CI->setTailCallKind(TCK); CI->setCallingConv(CC); + if (FMF.any()) + CI->setFastMathFlags(FMF); CI->setAttributes(PAL); ForwardRefAttrGroups[CI] = FwdRefAttrGrps; Inst = CI; @@ -5614,7 +6042,7 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { Indices.push_back(Val); } - SmallPtrSet Visited; + SmallPtrSet Visited; if (!Indices.empty() && !Ty->isSized(&Visited)) return Error(Loc, "base element of getelementptr must be sized"); diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 6e57b3e0667d..f61a5e5e3a38 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -46,29 +46,32 @@ namespace llvm { /// or a symbolic (%var) reference. This is just a discriminated union. struct ValID { enum { - t_LocalID, t_GlobalID, // ID in UIntVal. - t_LocalName, t_GlobalName, // Name in StrVal. - t_APSInt, t_APFloat, // Value in APSIntVal/APFloatVal. - t_Null, t_Undef, t_Zero, // No value. - t_EmptyArray, // No value: [] - t_Constant, // Value in ConstantVal. - t_InlineAsm, // Value in StrVal/StrVal2/UIntVal. - t_ConstantStruct, // Value in ConstantStructElts. - t_PackedConstantStruct // Value in ConstantStructElts. - } Kind; + t_LocalID, t_GlobalID, // ID in UIntVal. + t_LocalName, t_GlobalName, // Name in StrVal. + t_APSInt, t_APFloat, // Value in APSIntVal/APFloatVal. + t_Null, t_Undef, t_Zero, t_None, // No value. + t_EmptyArray, // No value: [] + t_Constant, // Value in ConstantVal. + t_InlineAsm, // Value in FTy/StrVal/StrVal2/UIntVal. + t_ConstantStruct, // Value in ConstantStructElts. + t_PackedConstantStruct // Value in ConstantStructElts. + } Kind = t_LocalID; LLLexer::LocTy Loc; unsigned UIntVal; + FunctionType *FTy = nullptr; std::string StrVal, StrVal2; APSInt APSIntVal; - APFloat APFloatVal; + APFloat APFloatVal{0.0}; Constant *ConstantVal; - Constant **ConstantStructElts; + std::unique_ptr ConstantStructElts; - ValID() : Kind(t_LocalID), APFloatVal(0.0) {} - ~ValID() { - if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) - delete [] ConstantStructElts; + ValID() = default; + ValID(const ValID &RHS) + : Kind(RHS.Kind), Loc(RHS.Loc), UIntVal(RHS.UIntVal), FTy(RHS.FTy), + StrVal(RHS.StrVal), StrVal2(RHS.StrVal2), APSIntVal(RHS.APSIntVal), + APFloatVal(RHS.APFloatVal), ConstantVal(RHS.ConstantVal) { + assert(!RHS.ConstantStructElts); } bool operator<(const ValID &RHS) const { @@ -143,6 +146,8 @@ namespace llvm { Slots(Slots), BlockAddressPFS(nullptr) {} bool Run(); + bool parseStandaloneConstantValue(Constant *&C, const SlotMapping *Slots); + LLVMContext &getContext() { return Context; } private: @@ -154,6 +159,10 @@ namespace llvm { return Error(Lex.getLoc(), Msg); } + /// Restore the internal name and slot mappings using the mappings that + /// were created at an earlier parsing stage. + void restoreParsingState(const SlotMapping *Slots); + /// GetGlobalVal - Get a value with the specified name or ID, creating a /// forward reference record if needed. This can return null if the value /// exists but does not have the right type. @@ -210,6 +219,8 @@ namespace llvm { return ParseUInt64(Val); } + bool ParseStringAttribute(AttrBuilder &B); + bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM); bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM); bool parseOptionalUnnamedAddr(bool &UnnamedAddr) { @@ -343,10 +354,12 @@ namespace llvm { bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, PerFunctionState *PFS); + bool parseConstantValue(Type *Ty, Constant *&C); bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS); bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) { return ParseValue(Ty, V, &PFS); } + bool ParseValue(Type *Ty, Value *&V, LocTy &Loc, PerFunctionState &PFS) { Loc = Lex.getLoc(); @@ -381,6 +394,13 @@ namespace llvm { bool IsMustTailCall = false, bool InVarArgsFunc = false); + bool + ParseOptionalOperandBundles(SmallVectorImpl &BundleList, + PerFunctionState &PFS); + + bool ParseExceptionArgs(SmallVectorImpl &Args, + PerFunctionState &PFS); + // Constant Parsing. bool ParseValID(ValID &ID, PerFunctionState *PFS = nullptr); bool ParseGlobalValue(Type *Ty, Constant *&V); @@ -441,6 +461,11 @@ namespace llvm { bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS); bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS); bool ParseResume(Instruction *&Inst, PerFunctionState &PFS); + bool ParseCleanupRet(Instruction *&Inst, PerFunctionState &PFS); + bool ParseCatchRet(Instruction *&Inst, PerFunctionState &PFS); + bool ParseCatchSwitch(Instruction *&Inst, PerFunctionState &PFS); + bool ParseCatchPad(Instruction *&Inst, PerFunctionState &PFS); + bool ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS); bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc, unsigned OperandType); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 691f085f0c9f..29a7f16d3c20 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -49,10 +49,14 @@ namespace lltok { kw_external, kw_thread_local, kw_localdynamic, kw_initialexec, kw_localexec, kw_zeroinitializer, - kw_undef, kw_null, + kw_undef, kw_null, kw_none, kw_to, + kw_caller, + kw_within, + kw_from, kw_tail, kw_musttail, + kw_notail, kw_target, kw_triple, kw_unwind, @@ -96,6 +100,9 @@ namespace lltok { kw_webkit_jscc, kw_anyregcc, kw_preserve_mostcc, kw_preserve_allcc, kw_ghccc, + kw_x86_intrcc, + kw_hhvmcc, kw_hhvm_ccc, + kw_cxx_fast_tlscc, // Attributes: kw_attributes, @@ -109,6 +116,8 @@ namespace lltok { kw_convergent, kw_dereferenceable, kw_dereferenceable_or_null, + kw_inaccessiblememonly, + kw_inaccessiblemem_or_argmemonly, kw_inlinehint, kw_inreg, kw_jumptable, @@ -121,6 +130,7 @@ namespace lltok { kw_noduplicate, kw_noimplicitfloat, kw_noinline, + kw_norecurse, kw_nonlazybind, kw_nonnull, kw_noredzone, @@ -177,7 +187,8 @@ namespace lltok { kw_landingpad, kw_personality, kw_cleanup, kw_catch, kw_filter, kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_resume, - kw_unreachable, + kw_unreachable, kw_cleanupret, kw_catchswitch, kw_catchret, kw_catchpad, + kw_cleanuppad, kw_alloca, kw_load, kw_store, kw_fence, kw_cmpxchg, kw_atomicrmw, kw_getelementptr, @@ -209,6 +220,7 @@ namespace lltok { DwarfLang, // DW_LANG_foo DwarfOp, // DW_OP_foo DIFlag, // DIFlagFoo + DwarfMacinfo, // DW_MACINFO_foo // Type valued tokens (TyVal). Type, diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp index 9145a54f2a7b..4e55e62ecf5c 100644 --- a/lib/AsmParser/Parser.cpp +++ b/lib/AsmParser/Parser.cpp @@ -66,3 +66,15 @@ std::unique_ptr llvm::parseAssemblyString(StringRef AsmString, MemoryBufferRef F(AsmString, ""); return parseAssembly(F, Err, Context, Slots); } + +Constant *llvm::parseConstantValue(StringRef Asm, SMDiagnostic &Err, + const Module &M, const SlotMapping *Slots) { + SourceMgr SM; + std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Asm); + SM.AddNewSourceBuffer(std::move(Buf), SMLoc()); + Constant *C; + if (LLParser(Asm, SM, Err, const_cast(&M)) + .parseStandaloneConstantValue(C, Slots)) + return nullptr; + return C; +} diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp index 289c76e85b4b..385c18a40006 100644 --- a/lib/Bitcode/Reader/BitReader.cpp +++ b/lib/Bitcode/Reader/BitReader.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm-c/BitReader.h" +#include "llvm-c/Core.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMContext.h" @@ -22,12 +23,25 @@ using namespace llvm; /* Builds a module from the bitcode in the specified memory buffer, returning a reference to the module via the OutModule parameter. Returns 0 on success. Optionally returns a human-readable error message via OutMessage. */ -LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, - LLVMModuleRef *OutModule, char **OutMessage) { +LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutModule, + char **OutMessage) { return LLVMParseBitcodeInContext(wrap(&getGlobalContext()), MemBuf, OutModule, OutMessage); } +LLVMBool LLVMParseBitcode2(LLVMMemoryBufferRef MemBuf, + LLVMModuleRef *OutModule) { + return LLVMParseBitcodeInContext2(wrap(&getGlobalContext()), MemBuf, + OutModule); +} + +static void diagnosticHandler(const DiagnosticInfo &DI, void *C) { + auto *Message = reinterpret_cast(C); + raw_string_ostream Stream(*Message); + DiagnosticPrinterRawOStream DP(Stream); + DI.print(DP); +} + LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef, LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutModule, @@ -35,18 +49,36 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef, MemoryBufferRef Buf = unwrap(MemBuf)->getMemBufferRef(); LLVMContext &Ctx = *unwrap(ContextRef); + LLVMContext::DiagnosticHandlerTy OldDiagnosticHandler = + Ctx.getDiagnosticHandler(); + void *OldDiagnosticContext = Ctx.getDiagnosticContext(); std::string Message; - raw_string_ostream Stream(Message); - DiagnosticPrinterRawOStream DP(Stream); + Ctx.setDiagnosticHandler(diagnosticHandler, &Message, true); + + ErrorOr> ModuleOrErr = parseBitcodeFile(Buf, Ctx); + + Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext, true); - ErrorOr> ModuleOrErr = parseBitcodeFile( - Buf, Ctx, [&](const DiagnosticInfo &DI) { DI.print(DP); }); if (ModuleOrErr.getError()) { - if (OutMessage) { - Stream.flush(); + if (OutMessage) *OutMessage = strdup(Message.c_str()); - } - *OutModule = wrap((Module*)nullptr); + *OutModule = wrap((Module *)nullptr); + return 1; + } + + *OutModule = wrap(ModuleOrErr.get().release()); + return 0; +} + +LLVMBool LLVMParseBitcodeInContext2(LLVMContextRef ContextRef, + LLVMMemoryBufferRef MemBuf, + LLVMModuleRef *OutModule) { + MemoryBufferRef Buf = unwrap(MemBuf)->getMemBufferRef(); + LLVMContext &Ctx = *unwrap(ContextRef); + + ErrorOr> ModuleOrErr = parseBitcodeFile(Buf, Ctx); + if (ModuleOrErr.getError()) { + *OutModule = wrap((Module *)nullptr); return 1; } @@ -59,26 +91,50 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef, Optionally returns a human-readable error message via OutMessage. */ LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef, LLVMMemoryBufferRef MemBuf, - LLVMModuleRef *OutM, - char **OutMessage) { + LLVMModuleRef *OutM, char **OutMessage) { + LLVMContext &Ctx = *unwrap(ContextRef); + LLVMContext::DiagnosticHandlerTy OldDiagnosticHandler = + Ctx.getDiagnosticHandler(); + void *OldDiagnosticContext = Ctx.getDiagnosticContext(); + std::string Message; + Ctx.setDiagnosticHandler(diagnosticHandler, &Message, true); std::unique_ptr Owner(unwrap(MemBuf)); ErrorOr> ModuleOrErr = - getLazyBitcodeModule(std::move(Owner), *unwrap(ContextRef)); + getLazyBitcodeModule(std::move(Owner), Ctx); Owner.release(); + Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext, true); - if (std::error_code EC = ModuleOrErr.getError()) { + if (ModuleOrErr.getError()) { *OutM = wrap((Module *)nullptr); if (OutMessage) - *OutMessage = strdup(EC.message().c_str()); + *OutMessage = strdup(Message.c_str()); return 1; } *OutM = wrap(ModuleOrErr.get().release()); return 0; +} +LLVMBool LLVMGetBitcodeModuleInContext2(LLVMContextRef ContextRef, + LLVMMemoryBufferRef MemBuf, + LLVMModuleRef *OutM) { + LLVMContext &Ctx = *unwrap(ContextRef); + std::unique_ptr Owner(unwrap(MemBuf)); + + ErrorOr> ModuleOrErr = + getLazyBitcodeModule(std::move(Owner), Ctx); + Owner.release(); + + if (ModuleOrErr.getError()) { + *OutM = wrap((Module *)nullptr); + return 1; + } + + *OutM = wrap(ModuleOrErr.get().release()); + return 0; } LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM, @@ -87,20 +143,7 @@ LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM, OutMessage); } -/* Deprecated: Use LLVMGetBitcodeModuleInContext instead. */ -LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef, - LLVMMemoryBufferRef MemBuf, - LLVMModuleProviderRef *OutMP, - char **OutMessage) { - return LLVMGetBitcodeModuleInContext(ContextRef, MemBuf, - reinterpret_cast(OutMP), - OutMessage); -} - -/* Deprecated: Use LLVMGetBitcodeModule instead. */ -LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, - LLVMModuleProviderRef *OutMP, - char **OutMessage) { - return LLVMGetBitcodeModuleProviderInContext(LLVMGetGlobalContext(), MemBuf, - OutMP, OutMessage); +LLVMBool LLVMGetBitcodeModule2(LLVMMemoryBufferRef MemBuf, + LLVMModuleRef *OutM) { + return LLVMGetBitcodeModuleInContext2(LLVMGetGlobalContext(), MemBuf, OutM); } diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index c04e8b9f1f37..2e670d584ecc 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/FunctionInfo.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/DataStream.h" #include "llvm/Support/ManagedStatic.h" @@ -93,35 +94,35 @@ public: void resolveConstantForwardRefs(); }; -class BitcodeReaderMDValueList { +class BitcodeReaderMetadataList { unsigned NumFwdRefs; bool AnyFwdRefs; unsigned MinFwdRef; unsigned MaxFwdRef; - std::vector MDValuePtrs; + std::vector MetadataPtrs; LLVMContext &Context; public: - BitcodeReaderMDValueList(LLVMContext &C) + BitcodeReaderMetadataList(LLVMContext &C) : NumFwdRefs(0), AnyFwdRefs(false), Context(C) {} // vector compatibility methods - unsigned size() const { return MDValuePtrs.size(); } - void resize(unsigned N) { MDValuePtrs.resize(N); } - void push_back(Metadata *MD) { MDValuePtrs.emplace_back(MD); } - void clear() { MDValuePtrs.clear(); } - Metadata *back() const { return MDValuePtrs.back(); } - void pop_back() { MDValuePtrs.pop_back(); } - bool empty() const { return MDValuePtrs.empty(); } + unsigned size() const { return MetadataPtrs.size(); } + void resize(unsigned N) { MetadataPtrs.resize(N); } + void push_back(Metadata *MD) { MetadataPtrs.emplace_back(MD); } + void clear() { MetadataPtrs.clear(); } + Metadata *back() const { return MetadataPtrs.back(); } + void pop_back() { MetadataPtrs.pop_back(); } + bool empty() const { return MetadataPtrs.empty(); } Metadata *operator[](unsigned i) const { - assert(i < MDValuePtrs.size()); - return MDValuePtrs[i]; + assert(i < MetadataPtrs.size()); + return MetadataPtrs[i]; } void shrinkTo(unsigned N) { assert(N <= size() && "Invalid shrinkTo request!"); - MDValuePtrs.resize(N); + MetadataPtrs.resize(N); } Metadata *getValueFwdRef(unsigned Idx); @@ -131,17 +132,27 @@ public: class BitcodeReader : public GVMaterializer { LLVMContext &Context; - DiagnosticHandlerFunction DiagnosticHandler; Module *TheModule = nullptr; std::unique_ptr Buffer; std::unique_ptr StreamFile; BitstreamCursor Stream; + // Next offset to start scanning for lazy parsing of function bodies. uint64_t NextUnreadBit = 0; + // Last function offset found in the VST. + uint64_t LastFunctionBlockBit = 0; bool SeenValueSymbolTable = false; + uint64_t VSTOffset = 0; + // Contains an arbitrary and optional string identifying the bitcode producer + std::string ProducerIdentification; + // Number of module level metadata records specified by the + // MODULE_CODE_METADATA_VALUES record. + unsigned NumModuleMDs = 0; + // Support older bitcode without the MODULE_CODE_METADATA_VALUES record. + bool SeenModuleValuesRecord = false; std::vector TypeList; BitcodeReaderValueList ValueList; - BitcodeReaderMDValueList MDValueList; + BitcodeReaderMetadataList MetadataList; std::vector ComdatList; SmallVector InstructionList; @@ -157,7 +168,7 @@ class BitcodeReader : public GVMaterializer { /// is thus not represented here. As such all indices are off by one. std::vector MAttributes; - /// \brief The set of attribute groups. + /// The set of attribute groups. std::map MAttributeGroups; /// While parsing a function body, this is a list of the basic blocks for the @@ -208,23 +219,24 @@ class BitcodeReader : public GVMaterializer { /// (e.g.) blockaddress forward references. bool WillMaterializeAllForwardRefs = false; - /// Functions that have block addresses taken. This is usually empty. - SmallPtrSet BlockAddressesTaken; - /// True if any Metadata block has been materialized. bool IsMetadataMaterialized = false; bool StripDebugInfo = false; + /// Functions that need to be matched with subprograms when upgrading old + /// metadata. + SmallDenseMap FunctionsWithSPs; + + std::vector BundleTags; + public: std::error_code error(BitcodeError E, const Twine &Message); std::error_code error(BitcodeError E); std::error_code error(const Twine &Message); - BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context, - DiagnosticHandlerFunction DiagnosticHandler); - BitcodeReader(LLVMContext &Context, - DiagnosticHandlerFunction DiagnosticHandler); + BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context); + BitcodeReader(LLVMContext &Context); ~BitcodeReader() override { freeState(); } std::error_code materializeForwardReferencedFunctions(); @@ -233,11 +245,9 @@ public: void releaseBuffer(); - bool isDematerializable(const GlobalValue *GV) const override; std::error_code materialize(GlobalValue *GV) override; - std::error_code materializeModule(Module *M) override; + std::error_code materializeModule() override; std::vector getIdentifiedStructTypes() const override; - void dematerialize(GlobalValue *GV) override; /// \brief Main interface to parsing a bitcode buffer. /// \returns true if an error occurred. @@ -249,6 +259,9 @@ public: /// \returns true if an error occurred. ErrorOr parseTriple(); + /// Cheap mechanism to just extract the identification block out of bitcode. + ErrorOr parseIdentificationBlock(); + static uint64_t decodeSignRotatedValue(uint64_t V); /// Materialize any deferred Metadata block. @@ -256,7 +269,20 @@ public: void setStripDebugInfo() override; + /// Save the mapping between the metadata values and the corresponding + /// value id that were recorded in the MetadataList during parsing. If + /// OnlyTempMD is true, then only record those entries that are still + /// temporary metadata. This interface is used when metadata linking is + /// performed as a postpass, such as during function importing. + void saveMetadataList(DenseMap &MetadataToIDs, + bool OnlyTempMD) override; + private: + /// Parse the "IDENTIFICATION_BLOCK_ID" block, populate the + // ProducerIdentification data member, and do some basic enforcement on the + // "epoch" encoded in the bitcode. + std::error_code parseBitcodeVersion(); + std::vector IdentifiedStructTypes; StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name); StructType *createIdentifiedStructType(LLVMContext &Context); @@ -268,7 +294,7 @@ private: return ValueList.getValueFwdRef(ID, Ty); } Metadata *getFnMetadataByID(unsigned ID) { - return MDValueList.getValueFwdRef(ID); + return MetadataList.getValueFwdRef(ID); } BasicBlock *getBasicBlock(unsigned ID) const { if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID @@ -351,21 +377,28 @@ private: /// a corresponding error code. std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment); std::error_code parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); - std::error_code parseModule(bool Resume, bool ShouldLazyLoadMetadata = false); + std::error_code parseModule(uint64_t ResumeBit, + bool ShouldLazyLoadMetadata = false); std::error_code parseAttributeBlock(); std::error_code parseAttributeGroupBlock(); std::error_code parseTypeTable(); std::error_code parseTypeTableBody(); + std::error_code parseOperandBundleTags(); - std::error_code parseValueSymbolTable(); + ErrorOr recordValue(SmallVectorImpl &Record, + unsigned NameIndex, Triple &TT); + std::error_code parseValueSymbolTable(uint64_t Offset = 0); std::error_code parseConstants(); + std::error_code rememberAndSkipFunctionBodies(); std::error_code rememberAndSkipFunctionBody(); /// Save the positions of the Metadata blocks and skip parsing the blocks. std::error_code rememberAndSkipMetadata(); std::error_code parseFunctionBody(Function *F); std::error_code globalCleanup(); std::error_code resolveGlobalAndAliasInits(); - std::error_code parseMetadata(); + std::error_code parseMetadata(bool ModuleLevel = false); + std::error_code parseMetadataKinds(); + std::error_code parseMetadataKindRecord(SmallVectorImpl &Record); std::error_code parseMetadataAttachment(Function &F); ErrorOr parseModuleTriple(); std::error_code parseUseLists(); @@ -376,6 +409,94 @@ private: Function *F, DenseMap::iterator DeferredFunctionInfoIterator); }; + +/// Class to manage reading and parsing function summary index bitcode +/// files/sections. +class FunctionIndexBitcodeReader { + DiagnosticHandlerFunction DiagnosticHandler; + + /// Eventually points to the function index built during parsing. + FunctionInfoIndex *TheIndex = nullptr; + + std::unique_ptr Buffer; + std::unique_ptr StreamFile; + BitstreamCursor Stream; + + /// \brief Used to indicate whether we are doing lazy parsing of summary data. + /// + /// If false, the summary section is fully parsed into the index during + /// the initial parse. Otherwise, if true, the caller is expected to + /// invoke \a readFunctionSummary for each summary needed, and the summary + /// section is thus parsed lazily. + bool IsLazy = false; + + /// Used to indicate whether caller only wants to check for the presence + /// of the function summary bitcode section. All blocks are skipped, + /// but the SeenFuncSummary boolean is set. + bool CheckFuncSummaryPresenceOnly = false; + + /// Indicates whether we have encountered a function summary section + /// yet during parsing, used when checking if file contains function + /// summary section. + bool SeenFuncSummary = false; + + /// \brief Map populated during function summary section parsing, and + /// consumed during ValueSymbolTable parsing. + /// + /// Used to correlate summary records with VST entries. For the per-module + /// index this maps the ValueID to the parsed function summary, and + /// for the combined index this maps the summary record's bitcode + /// offset to the function summary (since in the combined index the + /// VST records do not hold value IDs but rather hold the function + /// summary record offset). + DenseMap> SummaryMap; + + /// Map populated during module path string table parsing, from the + /// module ID to a string reference owned by the index's module + /// path string table, used to correlate with combined index function + /// summary records. + DenseMap ModuleIdMap; + +public: + std::error_code error(BitcodeError E, const Twine &Message); + std::error_code error(BitcodeError E); + std::error_code error(const Twine &Message); + + FunctionIndexBitcodeReader(MemoryBuffer *Buffer, + DiagnosticHandlerFunction DiagnosticHandler, + bool IsLazy = false, + bool CheckFuncSummaryPresenceOnly = false); + FunctionIndexBitcodeReader(DiagnosticHandlerFunction DiagnosticHandler, + bool IsLazy = false, + bool CheckFuncSummaryPresenceOnly = false); + ~FunctionIndexBitcodeReader() { freeState(); } + + void freeState(); + + void releaseBuffer(); + + /// Check if the parser has encountered a function summary section. + bool foundFuncSummary() { return SeenFuncSummary; } + + /// \brief Main interface to parsing a bitcode buffer. + /// \returns true if an error occurred. + std::error_code parseSummaryIndexInto(std::unique_ptr Streamer, + FunctionInfoIndex *I); + + /// \brief Interface for parsing a function summary lazily. + std::error_code parseFunctionSummary(std::unique_ptr Streamer, + FunctionInfoIndex *I, + size_t FunctionSummaryOffset); + +private: + std::error_code parseModule(); + std::error_code parseValueSymbolTable(); + std::error_code parseEntireSummary(); + std::error_code parseModuleStringTable(); + std::error_code initStream(std::unique_ptr Streamer); + std::error_code initStreamFromBuffer(); + std::error_code initLazyStream(std::unique_ptr Streamer); +}; } // namespace BitcodeDiagnosticInfo::BitcodeDiagnosticInfo(std::error_code EC, @@ -397,43 +518,51 @@ static std::error_code error(DiagnosticHandlerFunction DiagnosticHandler, return error(DiagnosticHandler, EC, EC.message()); } -static std::error_code error(DiagnosticHandlerFunction DiagnosticHandler, +static std::error_code error(LLVMContext &Context, std::error_code EC, const Twine &Message) { - return error(DiagnosticHandler, - make_error_code(BitcodeError::CorruptedBitcode), Message); + return error([&](const DiagnosticInfo &DI) { Context.diagnose(DI); }, EC, + Message); +} + +static std::error_code error(LLVMContext &Context, std::error_code EC) { + return error(Context, EC, EC.message()); +} + +static std::error_code error(LLVMContext &Context, const Twine &Message) { + return error(Context, make_error_code(BitcodeError::CorruptedBitcode), + Message); } std::error_code BitcodeReader::error(BitcodeError E, const Twine &Message) { - return ::error(DiagnosticHandler, make_error_code(E), Message); + if (!ProducerIdentification.empty()) { + return ::error(Context, make_error_code(E), + Message + " (Producer: '" + ProducerIdentification + + "' Reader: 'LLVM " + LLVM_VERSION_STRING "')"); + } + return ::error(Context, make_error_code(E), Message); } std::error_code BitcodeReader::error(const Twine &Message) { - return ::error(DiagnosticHandler, - make_error_code(BitcodeError::CorruptedBitcode), Message); + if (!ProducerIdentification.empty()) { + return ::error(Context, make_error_code(BitcodeError::CorruptedBitcode), + Message + " (Producer: '" + ProducerIdentification + + "' Reader: 'LLVM " + LLVM_VERSION_STRING "')"); + } + return ::error(Context, make_error_code(BitcodeError::CorruptedBitcode), + Message); } std::error_code BitcodeReader::error(BitcodeError E) { - return ::error(DiagnosticHandler, make_error_code(E)); + return ::error(Context, make_error_code(E)); } -static DiagnosticHandlerFunction getDiagHandler(DiagnosticHandlerFunction F, - LLVMContext &C) { - if (F) - return F; - return [&C](const DiagnosticInfo &DI) { C.diagnose(DI); }; -} +BitcodeReader::BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context) + : Context(Context), Buffer(Buffer), ValueList(Context), + MetadataList(Context) {} -BitcodeReader::BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context, - DiagnosticHandlerFunction DiagnosticHandler) - : Context(Context), - DiagnosticHandler(getDiagHandler(DiagnosticHandler, Context)), - Buffer(Buffer), ValueList(Context), MDValueList(Context) {} - -BitcodeReader::BitcodeReader(LLVMContext &Context, - DiagnosticHandlerFunction DiagnosticHandler) - : Context(Context), - DiagnosticHandler(getDiagHandler(DiagnosticHandler, Context)), - Buffer(nullptr), ValueList(Context), MDValueList(Context) {} +BitcodeReader::BitcodeReader(LLVMContext &Context) + : Context(Context), Buffer(nullptr), ValueList(Context), + MetadataList(Context) {} std::error_code BitcodeReader::materializeForwardReferencedFunctions() { if (WillMaterializeAllForwardRefs) @@ -472,7 +601,7 @@ void BitcodeReader::freeState() { Buffer = nullptr; std::vector().swap(TypeList); ValueList.clear(); - MDValueList.clear(); + MetadataList.clear(); std::vector().swap(ComdatList); std::vector().swap(MAttributes); @@ -779,6 +908,8 @@ void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) { OldV->replaceAllUsesWith(V); delete PrevVal; } + + return; } @@ -904,7 +1035,7 @@ void BitcodeReaderValueList::resolveConstantForwardRefs() { } } -void BitcodeReaderMDValueList::assignValue(Metadata *MD, unsigned Idx) { +void BitcodeReaderMetadataList::assignValue(Metadata *MD, unsigned Idx) { if (Idx == size()) { push_back(MD); return; @@ -913,7 +1044,7 @@ void BitcodeReaderMDValueList::assignValue(Metadata *MD, unsigned Idx) { if (Idx >= size()) resize(Idx+1); - TrackingMDRef &OldMD = MDValuePtrs[Idx]; + TrackingMDRef &OldMD = MetadataPtrs[Idx]; if (!OldMD) { OldMD.reset(MD); return; @@ -925,11 +1056,11 @@ void BitcodeReaderMDValueList::assignValue(Metadata *MD, unsigned Idx) { --NumFwdRefs; } -Metadata *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) { +Metadata *BitcodeReaderMetadataList::getValueFwdRef(unsigned Idx) { if (Idx >= size()) resize(Idx + 1); - if (Metadata *MD = MDValuePtrs[Idx]) + if (Metadata *MD = MetadataPtrs[Idx]) return MD; // Track forward refs to be resolved later. @@ -944,11 +1075,11 @@ Metadata *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) { // Create and return a placeholder, which will later be RAUW'd. Metadata *MD = MDNode::getTemporary(Context, None).release(); - MDValuePtrs[Idx].reset(MD); + MetadataPtrs[Idx].reset(MD); return MD; } -void BitcodeReaderMDValueList::tryToResolveCycles() { +void BitcodeReaderMetadataList::tryToResolveCycles() { if (!AnyFwdRefs) // Nothing to do. return; @@ -959,7 +1090,7 @@ void BitcodeReaderMDValueList::tryToResolveCycles() { // Resolve any cycles. for (unsigned I = MinFwdRef, E = MaxFwdRef + 1; I != E; ++I) { - auto &MD = MDValuePtrs[I]; + auto &MD = MetadataPtrs[I]; auto *N = dyn_cast_or_null(MD); if (!N) continue; @@ -1102,6 +1233,10 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::Cold; case bitc::ATTR_KIND_CONVERGENT: return Attribute::Convergent; + case bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY: + return Attribute::InaccessibleMemOnly; + case bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY: + return Attribute::InaccessibleMemOrArgMemOnly; case bitc::ATTR_KIND_INLINE_HINT: return Attribute::InlineHint; case bitc::ATTR_KIND_IN_REG: @@ -1126,6 +1261,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::NoImplicitFloat; case bitc::ATTR_KIND_NO_INLINE: return Attribute::NoInline; + case bitc::ATTR_KIND_NO_RECURSE: + return Attribute::NoRecurse; case bitc::ATTR_KIND_NON_LAZY_BIND: return Attribute::NonLazyBind; case bitc::ATTR_KIND_NON_NULL: @@ -1360,6 +1497,9 @@ std::error_code BitcodeReader::parseTypeTableBody() { case bitc::TYPE_CODE_X86_MMX: // X86_MMX ResultTy = Type::getX86_MMXTy(Context); break; + case bitc::TYPE_CODE_TOKEN: // TOKEN + ResultTy = Type::getTokenTy(Context); + break; case bitc::TYPE_CODE_INTEGER: { // INTEGER: [width] if (Record.size() < 1) return error("Invalid record"); @@ -1524,7 +1664,107 @@ std::error_code BitcodeReader::parseTypeTableBody() { } } -std::error_code BitcodeReader::parseValueSymbolTable() { +std::error_code BitcodeReader::parseOperandBundleTags() { + if (Stream.EnterSubBlock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID)) + return error("Invalid record"); + + if (!BundleTags.empty()) + return error("Invalid multiple blocks"); + + SmallVector Record; + + while (1) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return std::error_code(); + case BitstreamEntry::Record: + // The interesting case. + break; + } + + // Tags are implicitly mapped to integers by their order. + + if (Stream.readRecord(Entry.ID, Record) != bitc::OPERAND_BUNDLE_TAG) + return error("Invalid record"); + + // OPERAND_BUNDLE_TAG: [strchr x N] + BundleTags.emplace_back(); + if (convertToString(Record, 0, BundleTags.back())) + return error("Invalid record"); + Record.clear(); + } +} + +/// Associate a value with its name from the given index in the provided record. +ErrorOr BitcodeReader::recordValue(SmallVectorImpl &Record, + unsigned NameIndex, Triple &TT) { + SmallString<128> ValueName; + if (convertToString(Record, NameIndex, ValueName)) + return error("Invalid record"); + unsigned ValueID = Record[0]; + if (ValueID >= ValueList.size() || !ValueList[ValueID]) + return error("Invalid record"); + Value *V = ValueList[ValueID]; + + StringRef NameStr(ValueName.data(), ValueName.size()); + if (NameStr.find_first_of(0) != StringRef::npos) + return error("Invalid value name"); + V->setName(NameStr); + auto *GO = dyn_cast(V); + if (GO) { + if (GO->getComdat() == reinterpret_cast(1)) { + if (TT.isOSBinFormatMachO()) + GO->setComdat(nullptr); + else + GO->setComdat(TheModule->getOrInsertComdat(V->getName())); + } + } + return V; +} + +/// Parse the value symbol table at either the current parsing location or +/// at the given bit offset if provided. +std::error_code BitcodeReader::parseValueSymbolTable(uint64_t Offset) { + uint64_t CurrentBit; + // Pass in the Offset to distinguish between calling for the module-level + // VST (where we want to jump to the VST offset) and the function-level + // VST (where we don't). + if (Offset > 0) { + // Save the current parsing location so we can jump back at the end + // of the VST read. + CurrentBit = Stream.GetCurrentBitNo(); + Stream.JumpToBit(Offset * 32); +#ifndef NDEBUG + // Do some checking if we are in debug mode. + BitstreamEntry Entry = Stream.advance(); + assert(Entry.Kind == BitstreamEntry::SubBlock); + assert(Entry.ID == bitc::VALUE_SYMTAB_BLOCK_ID); +#else + // In NDEBUG mode ignore the output so we don't get an unused variable + // warning. + Stream.advance(); +#endif + } + + // Compute the delta between the bitcode indices in the VST (the word offset + // to the word-aligned ENTER_SUBBLOCK for the function block, and that + // expected by the lazy reader. The reader's EnterSubBlock expects to have + // already read the ENTER_SUBBLOCK code (size getAbbrevIDWidth) and BlockID + // (size BlockIDWidth). Note that we access the stream's AbbrevID width here + // just before entering the VST subblock because: 1) the EnterSubBlock + // changes the AbbrevID width; 2) the VST block is nested within the same + // outer MODULE_BLOCK as the FUNCTION_BLOCKs and therefore have the same + // AbbrevID width before calling EnterSubBlock; and 3) when we want to + // jump to the FUNCTION_BLOCK using this offset later, we don't want + // to rely on the stream's AbbrevID width being that of the MODULE_BLOCK. + unsigned FuncBitcodeOffsetDelta = + Stream.getAbbrevIDWidth() + bitc::BlockIDWidth; + if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) return error("Invalid record"); @@ -1542,6 +1782,8 @@ std::error_code BitcodeReader::parseValueSymbolTable() { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: + if (Offset > 0) + Stream.JumpToBit(CurrentBit); return std::error_code(); case BitstreamEntry::Record: // The interesting case. @@ -1554,23 +1796,39 @@ std::error_code BitcodeReader::parseValueSymbolTable() { default: // Default behavior: unknown type. break; case bitc::VST_CODE_ENTRY: { // VST_ENTRY: [valueid, namechar x N] - if (convertToString(Record, 1, ValueName)) - return error("Invalid record"); - unsigned ValueID = Record[0]; - if (ValueID >= ValueList.size() || !ValueList[ValueID]) - return error("Invalid record"); - Value *V = ValueList[ValueID]; + ErrorOr ValOrErr = recordValue(Record, 1, TT); + if (std::error_code EC = ValOrErr.getError()) + return EC; + ValOrErr.get(); + break; + } + case bitc::VST_CODE_FNENTRY: { + // VST_FNENTRY: [valueid, offset, namechar x N] + ErrorOr ValOrErr = recordValue(Record, 2, TT); + if (std::error_code EC = ValOrErr.getError()) + return EC; + Value *V = ValOrErr.get(); - V->setName(StringRef(ValueName.data(), ValueName.size())); - if (auto *GO = dyn_cast(V)) { - if (GO->getComdat() == reinterpret_cast(1)) { - if (TT.isOSBinFormatMachO()) - GO->setComdat(nullptr); - else - GO->setComdat(TheModule->getOrInsertComdat(V->getName())); - } + auto *GO = dyn_cast(V); + if (!GO) { + // If this is an alias, need to get the actual Function object + // it aliases, in order to set up the DeferredFunctionInfo entry below. + auto *GA = dyn_cast(V); + if (GA) + GO = GA->getBaseObject(); + assert(GO); } - ValueName.clear(); + + uint64_t FuncWordOffset = Record[1]; + Function *F = dyn_cast(GO); + assert(F); + uint64_t FuncBitOffset = FuncWordOffset * 32; + DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta; + // Set the LastFunctionBlockBit to point to the last function block. + // Later when parsing is resumed after function materialization, + // we can simply skip that last function block. + if (FuncBitOffset > LastFunctionBlockBit) + LastFunctionBlockBit = FuncBitOffset; break; } case bitc::VST_CODE_BBENTRY: { @@ -1588,19 +1846,51 @@ std::error_code BitcodeReader::parseValueSymbolTable() { } } +/// Parse a single METADATA_KIND record, inserting result in MDKindMap. +std::error_code +BitcodeReader::parseMetadataKindRecord(SmallVectorImpl &Record) { + if (Record.size() < 2) + return error("Invalid record"); + + unsigned Kind = Record[0]; + SmallString<8> Name(Record.begin() + 1, Record.end()); + + unsigned NewKind = TheModule->getMDKindID(Name.str()); + if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second) + return error("Conflicting METADATA_KIND records"); + return std::error_code(); +} + static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; } -std::error_code BitcodeReader::parseMetadata() { +/// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing +/// module level metadata. +std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { IsMetadataMaterialized = true; - unsigned NextMDValueNo = MDValueList.size(); + unsigned NextMetadataNo = MetadataList.size(); + if (ModuleLevel && SeenModuleValuesRecord) { + // Now that we are parsing the module level metadata, we want to restart + // the numbering of the MD values, and replace temp MD created earlier + // with their real values. If we saw a METADATA_VALUE record then we + // would have set the MetadataList size to the number specified in that + // record, to support parsing function-level metadata first, and we need + // to reset back to 0 to fill the MetadataList in with the parsed module + // The function-level metadata parsing should have reset the MetadataList + // size back to the value reported by the METADATA_VALUE record, saved in + // NumModuleMDs. + assert(NumModuleMDs == MetadataList.size() && + "Expected MetadataList to only contain module level values"); + NextMetadataNo = 0; + } if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID)) return error("Invalid record"); SmallVector Record; - auto getMD = - [&](unsigned ID) -> Metadata *{ return MDValueList.getValueFwdRef(ID); }; + auto getMD = [&](unsigned ID) -> Metadata * { + return MetadataList.getValueFwdRef(ID); + }; auto getMDOrNull = [&](unsigned ID) -> Metadata *{ if (ID) return getMD(ID - 1); @@ -1624,7 +1914,10 @@ std::error_code BitcodeReader::parseMetadata() { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: - MDValueList.tryToResolveCycles(); + MetadataList.tryToResolveCycles(); + assert((!(ModuleLevel && SeenModuleValuesRecord) || + NumModuleMDs == MetadataList.size()) && + "Inconsistent bitcode: METADATA_VALUES mismatch"); return std::error_code(); case BitstreamEntry::Record: // The interesting case. @@ -1652,7 +1945,8 @@ std::error_code BitcodeReader::parseMetadata() { unsigned Size = Record.size(); NamedMDNode *NMD = TheModule->getOrInsertNamedMetadata(Name); for (unsigned i = 0; i != Size; ++i) { - MDNode *MD = dyn_cast_or_null(MDValueList.getValueFwdRef(Record[i])); + MDNode *MD = + dyn_cast_or_null(MetadataList.getValueFwdRef(Record[i])); if (!MD) return error("Invalid record"); NMD->addOperand(MD); @@ -1669,7 +1963,7 @@ std::error_code BitcodeReader::parseMetadata() { // If this isn't a LocalAsMetadata record, we're dropping it. This used // to be legal, but there's no upgrade path. auto dropRecord = [&] { - MDValueList.assignValue(MDNode::get(Context, None), NextMDValueNo++); + MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo++); }; if (Record.size() != 2) { dropRecord(); @@ -1682,9 +1976,9 @@ std::error_code BitcodeReader::parseMetadata() { break; } - MDValueList.assignValue( + MetadataList.assignValue( LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)), - NextMDValueNo++); + NextMetadataNo++); break; } case bitc::METADATA_OLD_NODE: { @@ -1699,7 +1993,7 @@ std::error_code BitcodeReader::parseMetadata() { if (!Ty) return error("Invalid record"); if (Ty->isMetadataTy()) - Elts.push_back(MDValueList.getValueFwdRef(Record[i+1])); + Elts.push_back(MetadataList.getValueFwdRef(Record[i + 1])); else if (!Ty->isVoidTy()) { auto *MD = ValueAsMetadata::get(ValueList.getValueFwdRef(Record[i + 1], Ty)); @@ -1709,7 +2003,7 @@ std::error_code BitcodeReader::parseMetadata() { } else Elts.push_back(nullptr); } - MDValueList.assignValue(MDNode::get(Context, Elts), NextMDValueNo++); + MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo++); break; } case bitc::METADATA_VALUE: { @@ -1720,9 +2014,9 @@ std::error_code BitcodeReader::parseMetadata() { if (Ty->isMetadataTy() || Ty->isVoidTy()) return error("Invalid record"); - MDValueList.assignValue( + MetadataList.assignValue( ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)), - NextMDValueNo++); + NextMetadataNo++); break; } case bitc::METADATA_DISTINCT_NODE: @@ -1732,10 +2026,10 @@ std::error_code BitcodeReader::parseMetadata() { SmallVector Elts; Elts.reserve(Record.size()); for (unsigned ID : Record) - Elts.push_back(ID ? MDValueList.getValueFwdRef(ID - 1) : nullptr); - MDValueList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts) - : MDNode::get(Context, Elts), - NextMDValueNo++); + Elts.push_back(ID ? MetadataList.getValueFwdRef(ID - 1) : nullptr); + MetadataList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts) + : MDNode::get(Context, Elts), + NextMetadataNo++); break; } case bitc::METADATA_LOCATION: { @@ -1744,13 +2038,13 @@ std::error_code BitcodeReader::parseMetadata() { unsigned Line = Record[1]; unsigned Column = Record[2]; - MDNode *Scope = cast(MDValueList.getValueFwdRef(Record[3])); + MDNode *Scope = cast(MetadataList.getValueFwdRef(Record[3])); Metadata *InlinedAt = - Record[4] ? MDValueList.getValueFwdRef(Record[4] - 1) : nullptr; - MDValueList.assignValue( + Record[4] ? MetadataList.getValueFwdRef(Record[4] - 1) : nullptr; + MetadataList.assignValue( GET_OR_DISTINCT(DILocation, Record[0], (Context, Line, Column, Scope, InlinedAt)), - NextMDValueNo++); + NextMetadataNo++); break; } case bitc::METADATA_GENERIC_DEBUG: { @@ -1766,63 +2060,65 @@ std::error_code BitcodeReader::parseMetadata() { auto *Header = getMDString(Record[3]); SmallVector DwarfOps; for (unsigned I = 4, E = Record.size(); I != E; ++I) - DwarfOps.push_back(Record[I] ? MDValueList.getValueFwdRef(Record[I] - 1) - : nullptr); - MDValueList.assignValue(GET_OR_DISTINCT(GenericDINode, Record[0], - (Context, Tag, Header, DwarfOps)), - NextMDValueNo++); + DwarfOps.push_back( + Record[I] ? MetadataList.getValueFwdRef(Record[I] - 1) : nullptr); + MetadataList.assignValue( + GET_OR_DISTINCT(GenericDINode, Record[0], + (Context, Tag, Header, DwarfOps)), + NextMetadataNo++); break; } case bitc::METADATA_SUBRANGE: { if (Record.size() != 3) return error("Invalid record"); - MDValueList.assignValue( + MetadataList.assignValue( GET_OR_DISTINCT(DISubrange, Record[0], (Context, Record[1], unrotateSign(Record[2]))), - NextMDValueNo++); + NextMetadataNo++); break; } case bitc::METADATA_ENUMERATOR: { if (Record.size() != 3) return error("Invalid record"); - MDValueList.assignValue(GET_OR_DISTINCT(DIEnumerator, Record[0], - (Context, unrotateSign(Record[1]), - getMDString(Record[2]))), - NextMDValueNo++); + MetadataList.assignValue( + GET_OR_DISTINCT( + DIEnumerator, Record[0], + (Context, unrotateSign(Record[1]), getMDString(Record[2]))), + NextMetadataNo++); break; } case bitc::METADATA_BASIC_TYPE: { if (Record.size() != 6) return error("Invalid record"); - MDValueList.assignValue( + MetadataList.assignValue( GET_OR_DISTINCT(DIBasicType, Record[0], (Context, Record[1], getMDString(Record[2]), Record[3], Record[4], Record[5])), - NextMDValueNo++); + NextMetadataNo++); break; } case bitc::METADATA_DERIVED_TYPE: { if (Record.size() != 12) return error("Invalid record"); - MDValueList.assignValue( + MetadataList.assignValue( GET_OR_DISTINCT(DIDerivedType, Record[0], (Context, Record[1], getMDString(Record[2]), getMDOrNull(Record[3]), Record[4], getMDOrNull(Record[5]), getMDOrNull(Record[6]), Record[7], Record[8], Record[9], Record[10], getMDOrNull(Record[11]))), - NextMDValueNo++); + NextMetadataNo++); break; } case bitc::METADATA_COMPOSITE_TYPE: { if (Record.size() != 16) return error("Invalid record"); - MDValueList.assignValue( + MetadataList.assignValue( GET_OR_DISTINCT(DICompositeType, Record[0], (Context, Record[1], getMDString(Record[2]), getMDOrNull(Record[3]), Record[4], @@ -1831,17 +2127,17 @@ std::error_code BitcodeReader::parseMetadata() { getMDOrNull(Record[11]), Record[12], getMDOrNull(Record[13]), getMDOrNull(Record[14]), getMDString(Record[15]))), - NextMDValueNo++); + NextMetadataNo++); break; } case bitc::METADATA_SUBROUTINE_TYPE: { if (Record.size() != 3) return error("Invalid record"); - MDValueList.assignValue( + MetadataList.assignValue( GET_OR_DISTINCT(DISubroutineType, Record[0], (Context, Record[1], getMDOrNull(Record[2]))), - NextMDValueNo++); + NextMetadataNo++); break; } @@ -1849,12 +2145,12 @@ std::error_code BitcodeReader::parseMetadata() { if (Record.size() != 6) return error("Invalid record"); - MDValueList.assignValue( + MetadataList.assignValue( GET_OR_DISTINCT(DIModule, Record[0], (Context, getMDOrNull(Record[1]), - getMDString(Record[2]), getMDString(Record[3]), - getMDString(Record[4]), getMDString(Record[5]))), - NextMDValueNo++); + getMDString(Record[2]), getMDString(Record[3]), + getMDString(Record[4]), getMDString(Record[5]))), + NextMetadataNo++); break; } @@ -1862,180 +2158,218 @@ std::error_code BitcodeReader::parseMetadata() { if (Record.size() != 3) return error("Invalid record"); - MDValueList.assignValue( + MetadataList.assignValue( GET_OR_DISTINCT(DIFile, Record[0], (Context, getMDString(Record[1]), getMDString(Record[2]))), - NextMDValueNo++); + NextMetadataNo++); break; } case bitc::METADATA_COMPILE_UNIT: { - if (Record.size() < 14 || Record.size() > 15) + if (Record.size() < 14 || Record.size() > 16) return error("Invalid record"); - MDValueList.assignValue( - GET_OR_DISTINCT( - DICompileUnit, Record[0], - (Context, Record[1], getMDOrNull(Record[2]), - getMDString(Record[3]), Record[4], getMDString(Record[5]), - Record[6], getMDString(Record[7]), Record[8], - getMDOrNull(Record[9]), getMDOrNull(Record[10]), - getMDOrNull(Record[11]), getMDOrNull(Record[12]), - getMDOrNull(Record[13]), Record.size() == 14 ? 0 : Record[14])), - NextMDValueNo++); + // Ignore Record[0], which indicates whether this compile unit is + // distinct. It's always distinct. + MetadataList.assignValue( + DICompileUnit::getDistinct( + Context, Record[1], getMDOrNull(Record[2]), + getMDString(Record[3]), Record[4], getMDString(Record[5]), + Record[6], getMDString(Record[7]), Record[8], + getMDOrNull(Record[9]), getMDOrNull(Record[10]), + getMDOrNull(Record[11]), getMDOrNull(Record[12]), + getMDOrNull(Record[13]), + Record.size() <= 15 ? 0 : getMDOrNull(Record[15]), + Record.size() <= 14 ? 0 : Record[14]), + NextMetadataNo++); break; } case bitc::METADATA_SUBPROGRAM: { - if (Record.size() != 19) + if (Record.size() != 18 && Record.size() != 19) return error("Invalid record"); - MDValueList.assignValue( - GET_OR_DISTINCT( - DISubprogram, Record[0], - (Context, getMDOrNull(Record[1]), getMDString(Record[2]), - getMDString(Record[3]), getMDOrNull(Record[4]), Record[5], - getMDOrNull(Record[6]), Record[7], Record[8], Record[9], - getMDOrNull(Record[10]), Record[11], Record[12], Record[13], - Record[14], getMDOrNull(Record[15]), getMDOrNull(Record[16]), - getMDOrNull(Record[17]), getMDOrNull(Record[18]))), - NextMDValueNo++); + bool HasFn = Record.size() == 19; + DISubprogram *SP = GET_OR_DISTINCT( + DISubprogram, + Record[0] || Record[8], // All definitions should be distinct. + (Context, getMDOrNull(Record[1]), getMDString(Record[2]), + getMDString(Record[3]), getMDOrNull(Record[4]), Record[5], + getMDOrNull(Record[6]), Record[7], Record[8], Record[9], + getMDOrNull(Record[10]), Record[11], Record[12], Record[13], + Record[14], getMDOrNull(Record[15 + HasFn]), + getMDOrNull(Record[16 + HasFn]), getMDOrNull(Record[17 + HasFn]))); + MetadataList.assignValue(SP, NextMetadataNo++); + + // Upgrade sp->function mapping to function->sp mapping. + if (HasFn && Record[15]) { + if (auto *CMD = dyn_cast(getMDOrNull(Record[15]))) + if (auto *F = dyn_cast(CMD->getValue())) { + if (F->isMaterializable()) + // Defer until materialized; unmaterialized functions may not have + // metadata. + FunctionsWithSPs[F] = SP; + else if (!F->empty()) + F->setSubprogram(SP); + } + } break; } case bitc::METADATA_LEXICAL_BLOCK: { if (Record.size() != 5) return error("Invalid record"); - MDValueList.assignValue( + MetadataList.assignValue( GET_OR_DISTINCT(DILexicalBlock, Record[0], (Context, getMDOrNull(Record[1]), getMDOrNull(Record[2]), Record[3], Record[4])), - NextMDValueNo++); + NextMetadataNo++); break; } case bitc::METADATA_LEXICAL_BLOCK_FILE: { if (Record.size() != 4) return error("Invalid record"); - MDValueList.assignValue( + MetadataList.assignValue( GET_OR_DISTINCT(DILexicalBlockFile, Record[0], (Context, getMDOrNull(Record[1]), getMDOrNull(Record[2]), Record[3])), - NextMDValueNo++); + NextMetadataNo++); break; } case bitc::METADATA_NAMESPACE: { if (Record.size() != 5) return error("Invalid record"); - MDValueList.assignValue( + MetadataList.assignValue( GET_OR_DISTINCT(DINamespace, Record[0], (Context, getMDOrNull(Record[1]), getMDOrNull(Record[2]), getMDString(Record[3]), Record[4])), - NextMDValueNo++); + NextMetadataNo++); + break; + } + case bitc::METADATA_MACRO: { + if (Record.size() != 5) + return error("Invalid record"); + + MetadataList.assignValue( + GET_OR_DISTINCT(DIMacro, Record[0], + (Context, Record[1], Record[2], + getMDString(Record[3]), getMDString(Record[4]))), + NextMetadataNo++); + break; + } + case bitc::METADATA_MACRO_FILE: { + if (Record.size() != 5) + return error("Invalid record"); + + MetadataList.assignValue( + GET_OR_DISTINCT(DIMacroFile, Record[0], + (Context, Record[1], Record[2], + getMDOrNull(Record[3]), getMDOrNull(Record[4]))), + NextMetadataNo++); break; } case bitc::METADATA_TEMPLATE_TYPE: { if (Record.size() != 3) return error("Invalid record"); - MDValueList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter, - Record[0], - (Context, getMDString(Record[1]), - getMDOrNull(Record[2]))), - NextMDValueNo++); + MetadataList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter, + Record[0], + (Context, getMDString(Record[1]), + getMDOrNull(Record[2]))), + NextMetadataNo++); break; } case bitc::METADATA_TEMPLATE_VALUE: { if (Record.size() != 5) return error("Invalid record"); - MDValueList.assignValue( + MetadataList.assignValue( GET_OR_DISTINCT(DITemplateValueParameter, Record[0], (Context, Record[1], getMDString(Record[2]), getMDOrNull(Record[3]), getMDOrNull(Record[4]))), - NextMDValueNo++); + NextMetadataNo++); break; } case bitc::METADATA_GLOBAL_VAR: { if (Record.size() != 11) return error("Invalid record"); - MDValueList.assignValue( + MetadataList.assignValue( GET_OR_DISTINCT(DIGlobalVariable, Record[0], (Context, getMDOrNull(Record[1]), getMDString(Record[2]), getMDString(Record[3]), getMDOrNull(Record[4]), Record[5], getMDOrNull(Record[6]), Record[7], Record[8], getMDOrNull(Record[9]), getMDOrNull(Record[10]))), - NextMDValueNo++); + NextMetadataNo++); break; } case bitc::METADATA_LOCAL_VAR: { // 10th field is for the obseleted 'inlinedAt:' field. - if (Record.size() != 9 && Record.size() != 10) + if (Record.size() < 8 || Record.size() > 10) return error("Invalid record"); - MDValueList.assignValue( + // 2nd field used to be an artificial tag, either DW_TAG_auto_variable or + // DW_TAG_arg_variable. + bool HasTag = Record.size() > 8; + MetadataList.assignValue( GET_OR_DISTINCT(DILocalVariable, Record[0], - (Context, Record[1], getMDOrNull(Record[2]), - getMDString(Record[3]), getMDOrNull(Record[4]), - Record[5], getMDOrNull(Record[6]), Record[7], - Record[8])), - NextMDValueNo++); + (Context, getMDOrNull(Record[1 + HasTag]), + getMDString(Record[2 + HasTag]), + getMDOrNull(Record[3 + HasTag]), Record[4 + HasTag], + getMDOrNull(Record[5 + HasTag]), Record[6 + HasTag], + Record[7 + HasTag])), + NextMetadataNo++); break; } case bitc::METADATA_EXPRESSION: { if (Record.size() < 1) return error("Invalid record"); - MDValueList.assignValue( + MetadataList.assignValue( GET_OR_DISTINCT(DIExpression, Record[0], (Context, makeArrayRef(Record).slice(1))), - NextMDValueNo++); + NextMetadataNo++); break; } case bitc::METADATA_OBJC_PROPERTY: { if (Record.size() != 8) return error("Invalid record"); - MDValueList.assignValue( + MetadataList.assignValue( GET_OR_DISTINCT(DIObjCProperty, Record[0], (Context, getMDString(Record[1]), getMDOrNull(Record[2]), Record[3], getMDString(Record[4]), getMDString(Record[5]), Record[6], getMDOrNull(Record[7]))), - NextMDValueNo++); + NextMetadataNo++); break; } case bitc::METADATA_IMPORTED_ENTITY: { if (Record.size() != 6) return error("Invalid record"); - MDValueList.assignValue( + MetadataList.assignValue( GET_OR_DISTINCT(DIImportedEntity, Record[0], (Context, Record[1], getMDOrNull(Record[2]), getMDOrNull(Record[3]), Record[4], getMDString(Record[5]))), - NextMDValueNo++); + NextMetadataNo++); break; } case bitc::METADATA_STRING: { std::string String(Record.begin(), Record.end()); llvm::UpgradeMDStringConstant(String); Metadata *MD = MDString::get(Context, String); - MDValueList.assignValue(MD, NextMDValueNo++); + MetadataList.assignValue(MD, NextMetadataNo++); break; } case bitc::METADATA_KIND: { - if (Record.size() < 2) - return error("Invalid record"); - - unsigned Kind = Record[0]; - SmallString<8> Name(Record.begin()+1, Record.end()); - - unsigned NewKind = TheModule->getMDKindID(Name.str()); - if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second) - return error("Conflicting METADATA_KIND records"); + // Support older bitcode files that had METADATA_KIND records in a + // block with METADATA_BLOCK_ID. + if (std::error_code EC = parseMetadataKindRecord(Record)) + return EC; break; } } @@ -2043,6 +2377,43 @@ std::error_code BitcodeReader::parseMetadata() { #undef GET_OR_DISTINCT } +/// Parse the metadata kinds out of the METADATA_KIND_BLOCK. +std::error_code BitcodeReader::parseMetadataKinds() { + if (Stream.EnterSubBlock(bitc::METADATA_KIND_BLOCK_ID)) + return error("Invalid record"); + + SmallVector Record; + + // Read all the records. + while (1) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return std::error_code(); + case BitstreamEntry::Record: + // The interesting case. + break; + } + + // Read a record. + Record.clear(); + unsigned Code = Stream.readRecord(Entry.ID, Record); + switch (Code) { + default: // Default behavior: ignore. + break; + case bitc::METADATA_KIND: { + if (std::error_code EC = parseMetadataKindRecord(Record)) + return EC; + break; + } + } + } +} + /// Decode a signed value stored with the sign bit in the LSB for dense VBR /// encoding. uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) { @@ -2410,11 +2781,12 @@ std::error_code BitcodeReader::parseConstants() { Type *SelectorTy = Type::getInt1Ty(Context); - // If CurTy is a vector of length n, then Record[0] must be a - // vector. Otherwise, it must be a single bit. + // The selector might be an i1 or an + // Get the type from the ValueList before getting a forward ref. if (VectorType *VTy = dyn_cast(CurTy)) - SelectorTy = VectorType::get(Type::getInt1Ty(Context), - VTy->getNumElements()); + if (Value *V = ValueList[Record[0]]) + if (SelectorTy != V->getType()) + SelectorTy = VectorType::get(SelectorTy, VTy->getNumElements()); V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0], SelectorTy), @@ -2567,9 +2939,6 @@ std::error_code BitcodeReader::parseConstants() { if (!Fn) return error("Invalid record"); - // Don't let Fn get dematerialized. - BlockAddressesTaken.insert(Fn); - // If the function is already parsed we can insert the block address right // away. BasicBlock *BB; @@ -2584,7 +2953,7 @@ std::error_code BitcodeReader::parseConstants() { return error("Invalid ID"); ++BBI; } - BB = BBI; + BB = &*BBI; } else { // Otherwise insert a placeholder and remember it so it can be inserted // when the function is parsed. @@ -2652,7 +3021,7 @@ std::error_code BitcodeReader::parseUseLists() { V = ValueList[ID]; unsigned NumUses = 0; SmallDenseMap Order; - for (const Use &U : V->uses()) { + for (const Use &U : V->materialized_uses()) { if (++NumUses > Record.size()) break; Order[&U] = Record[NumUses - 1]; @@ -2688,7 +3057,7 @@ std::error_code BitcodeReader::materializeMetadata() { for (uint64_t BitPos : DeferredMetadataInfo) { // Move the bit stream to the saved position. Stream.JumpToBit(BitPos); - if (std::error_code EC = parseMetadata()) + if (std::error_code EC = parseMetadata(true)) return EC; } DeferredMetadataInfo.clear(); @@ -2697,6 +3066,25 @@ std::error_code BitcodeReader::materializeMetadata() { void BitcodeReader::setStripDebugInfo() { StripDebugInfo = true; } +void BitcodeReader::saveMetadataList( + DenseMap &MetadataToIDs, bool OnlyTempMD) { + for (unsigned ID = 0; ID < MetadataList.size(); ++ID) { + Metadata *MD = MetadataList[ID]; + auto *N = dyn_cast_or_null(MD); + // Save all values if !OnlyTempMD, otherwise just the temporary metadata. + if (!OnlyTempMD || (N && N->isTemporary())) { + // Will call this after materializing each function, in order to + // handle remapping of the function's instructions/metadata. + // See if we already have an entry in that case. + if (OnlyTempMD && MetadataToIDs.count(MD)) { + assert(MetadataToIDs[MD] == ID && "Inconsistent metadata value id"); + continue; + } + MetadataToIDs[MD] = ID; + } + } +} + /// When we see the block for a function body, remember where it is and then /// skip it. This lets us lazily deserialize the functions. std::error_code BitcodeReader::rememberAndSkipFunctionBody() { @@ -2709,6 +3097,9 @@ std::error_code BitcodeReader::rememberAndSkipFunctionBody() { // Save the current stream state. uint64_t CurBit = Stream.GetCurrentBitNo(); + assert( + (DeferredFunctionInfo[Fn] == 0 || DeferredFunctionInfo[Fn] == CurBit) && + "Mismatch between VST and scanned function offsets"); DeferredFunctionInfo[Fn] = CurBit; // Skip over the function block for now. @@ -2741,10 +3132,91 @@ std::error_code BitcodeReader::globalCleanup() { return std::error_code(); } -std::error_code BitcodeReader::parseModule(bool Resume, +/// Support for lazy parsing of function bodies. This is required if we +/// either have an old bitcode file without a VST forward declaration record, +/// or if we have an anonymous function being materialized, since anonymous +/// functions do not have a name and are therefore not in the VST. +std::error_code BitcodeReader::rememberAndSkipFunctionBodies() { + Stream.JumpToBit(NextUnreadBit); + + if (Stream.AtEndOfStream()) + return error("Could not find function in stream"); + + if (!SeenFirstFunctionBody) + return error("Trying to materialize functions before seeing function blocks"); + + // An old bitcode file with the symbol table at the end would have + // finished the parse greedily. + assert(SeenValueSymbolTable); + + SmallVector Record; + + while (1) { + BitstreamEntry Entry = Stream.advance(); + switch (Entry.Kind) { + default: + return error("Expect SubBlock"); + case BitstreamEntry::SubBlock: + switch (Entry.ID) { + default: + return error("Expect function block"); + case bitc::FUNCTION_BLOCK_ID: + if (std::error_code EC = rememberAndSkipFunctionBody()) + return EC; + NextUnreadBit = Stream.GetCurrentBitNo(); + return std::error_code(); + } + } + } +} + +std::error_code BitcodeReader::parseBitcodeVersion() { + if (Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID)) + return error("Invalid record"); + + // Read all the records. + SmallVector Record; + while (1) { + BitstreamEntry Entry = Stream.advance(); + + switch (Entry.Kind) { + default: + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return std::error_code(); + case BitstreamEntry::Record: + // The interesting case. + break; + } + + // Read a record. + Record.clear(); + unsigned BitCode = Stream.readRecord(Entry.ID, Record); + switch (BitCode) { + default: // Default behavior: reject + return error("Invalid value"); + case bitc::IDENTIFICATION_CODE_STRING: { // IDENTIFICATION: [strchr x + // N] + convertToString(Record, 0, ProducerIdentification); + break; + } + case bitc::IDENTIFICATION_CODE_EPOCH: { // EPOCH: [epoch#] + unsigned epoch = (unsigned)Record[0]; + if (epoch != bitc::BITCODE_CURRENT_EPOCH) { + return error( + Twine("Incompatible epoch: Bitcode '") + Twine(epoch) + + "' vs current: '" + Twine(bitc::BITCODE_CURRENT_EPOCH) + "'"); + } + } + } + } +} + +std::error_code BitcodeReader::parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata) { - if (Resume) - Stream.JumpToBit(NextUnreadBit); + if (ResumeBit) + Stream.JumpToBit(ResumeBit); else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return error("Invalid record"); @@ -2785,9 +3257,23 @@ std::error_code BitcodeReader::parseModule(bool Resume, return EC; break; case bitc::VALUE_SYMTAB_BLOCK_ID: - if (std::error_code EC = parseValueSymbolTable()) - return EC; - SeenValueSymbolTable = true; + if (!SeenValueSymbolTable) { + // Either this is an old form VST without function index and an + // associated VST forward declaration record (which would have caused + // the VST to be jumped to and parsed before it was encountered + // normally in the stream), or there were no function blocks to + // trigger an earlier parsing of the VST. + assert(VSTOffset == 0 || FunctionsWithBodies.empty()); + if (std::error_code EC = parseValueSymbolTable()) + return EC; + SeenValueSymbolTable = true; + } else { + // We must have had a VST forward declaration record, which caused + // the parser to jump to and parse the VST earlier. + assert(VSTOffset > 0); + if (Stream.SkipBlock()) + return error("Invalid record"); + } break; case bitc::CONSTANTS_BLOCK_ID: if (std::error_code EC = parseConstants()) @@ -2802,7 +3288,11 @@ std::error_code BitcodeReader::parseModule(bool Resume, break; } assert(DeferredMetadataInfo.empty() && "Unexpected deferred metadata"); - if (std::error_code EC = parseMetadata()) + if (std::error_code EC = parseMetadata(true)) + return EC; + break; + case bitc::METADATA_KIND_BLOCK_ID: + if (std::error_code EC = parseMetadataKinds()) return EC; break; case bitc::FUNCTION_BLOCK_ID: @@ -2815,8 +3305,39 @@ std::error_code BitcodeReader::parseModule(bool Resume, SeenFirstFunctionBody = true; } + if (VSTOffset > 0) { + // If we have a VST forward declaration record, make sure we + // parse the VST now if we haven't already. It is needed to + // set up the DeferredFunctionInfo vector for lazy reading. + if (!SeenValueSymbolTable) { + if (std::error_code EC = + BitcodeReader::parseValueSymbolTable(VSTOffset)) + return EC; + SeenValueSymbolTable = true; + // Fall through so that we record the NextUnreadBit below. + // This is necessary in case we have an anonymous function that + // is later materialized. Since it will not have a VST entry we + // need to fall back to the lazy parse to find its offset. + } else { + // If we have a VST forward declaration record, but have already + // parsed the VST (just above, when the first function body was + // encountered here), then we are resuming the parse after + // materializing functions. The ResumeBit points to the + // start of the last function block recorded in the + // DeferredFunctionInfo map. Skip it. + if (Stream.SkipBlock()) + return error("Invalid record"); + continue; + } + } + + // Support older bitcode files that did not have the function + // index in the VST, nor a VST forward declaration record, as + // well as anonymous functions that do not have VST entries. + // Build the DeferredFunctionInfo vector on the fly. if (std::error_code EC = rememberAndSkipFunctionBody()) return EC; + // Suspend parsing when we reach the function bodies. Subsequent // materialization calls will resume it when necessary. If the bitcode // file is old, the symbol table will be at the end instead and will not @@ -2830,6 +3351,10 @@ std::error_code BitcodeReader::parseModule(bool Resume, if (std::error_code EC = parseUseLists()) return EC; break; + case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID: + if (std::error_code EC = parseOperandBundleTags()) + return EC; + break; } continue; @@ -2840,7 +3365,8 @@ std::error_code BitcodeReader::parseModule(bool Resume, // Read a record. - switch (Stream.readRecord(Entry.ID, Record)) { + auto BitCode = Stream.readRecord(Entry.ID, Record); + switch (BitCode) { default: break; // Default behavior, ignore unknown content. case bitc::MODULE_CODE_VERSION: { // VERSION: [version#] if (Record.size() < 1) @@ -3012,11 +3538,14 @@ std::error_code BitcodeReader::parseModule(bool Resume, auto *FTy = dyn_cast(Ty); if (!FTy) return error("Invalid type for value"); + auto CC = static_cast(Record[1]); + if (CC & ~CallingConv::MaxID) + return error("Invalid calling convention ID"); Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage, "", TheModule); - Func->setCallingConv(static_cast(Record[1])); + Func->setCallingConv(CC); bool isProto = Record[2]; uint64_t RawLinkage = Record[3]; Func->setLinkage(getDecodedLinkage(RawLinkage)); @@ -3079,35 +3608,51 @@ std::error_code BitcodeReader::parseModule(bool Resume, } break; } - // ALIAS: [alias type, aliasee val#, linkage] - // ALIAS: [alias type, aliasee val#, linkage, visibility, dllstorageclass] - case bitc::MODULE_CODE_ALIAS: { - if (Record.size() < 3) + // ALIAS: [alias type, addrspace, aliasee val#, linkage] + // ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility, dllstorageclass] + case bitc::MODULE_CODE_ALIAS: + case bitc::MODULE_CODE_ALIAS_OLD: { + bool NewRecord = BitCode == bitc::MODULE_CODE_ALIAS; + if (Record.size() < (3 + (unsigned)NewRecord)) return error("Invalid record"); - Type *Ty = getTypeByID(Record[0]); + unsigned OpNum = 0; + Type *Ty = getTypeByID(Record[OpNum++]); if (!Ty) return error("Invalid record"); - auto *PTy = dyn_cast(Ty); - if (!PTy) - return error("Invalid type for value"); - auto *NewGA = - GlobalAlias::create(PTy, getDecodedLinkage(Record[2]), "", TheModule); + unsigned AddrSpace; + if (!NewRecord) { + auto *PTy = dyn_cast(Ty); + if (!PTy) + return error("Invalid type for value"); + Ty = PTy->getElementType(); + AddrSpace = PTy->getAddressSpace(); + } else { + AddrSpace = Record[OpNum++]; + } + + auto Val = Record[OpNum++]; + auto Linkage = Record[OpNum++]; + auto *NewGA = GlobalAlias::create( + Ty, AddrSpace, getDecodedLinkage(Linkage), "", TheModule); // Old bitcode files didn't have visibility field. // Local linkage must have default visibility. - if (Record.size() > 3 && !NewGA->hasLocalLinkage()) - // FIXME: Change to an error if non-default in 4.0. - NewGA->setVisibility(getDecodedVisibility(Record[3])); - if (Record.size() > 4) - NewGA->setDLLStorageClass(getDecodedDLLStorageClass(Record[4])); + if (OpNum != Record.size()) { + auto VisInd = OpNum++; + if (!NewGA->hasLocalLinkage()) + // FIXME: Change to an error if non-default in 4.0. + NewGA->setVisibility(getDecodedVisibility(Record[VisInd])); + } + if (OpNum != Record.size()) + NewGA->setDLLStorageClass(getDecodedDLLStorageClass(Record[OpNum++])); else - upgradeDLLImportExportLinkage(NewGA, Record[2]); - if (Record.size() > 5) - NewGA->setThreadLocalMode(getDecodedThreadLocalMode(Record[5])); - if (Record.size() > 6) - NewGA->setUnnamedAddr(Record[6]); + upgradeDLLImportExportLinkage(NewGA, Linkage); + if (OpNum != Record.size()) + NewGA->setThreadLocalMode(getDecodedThreadLocalMode(Record[OpNum++])); + if (OpNum != Record.size()) + NewGA->setUnnamedAddr(Record[OpNum++]); ValueList.push_back(NewGA); - AliasInits.push_back(std::make_pair(NewGA, Record[1])); + AliasInits.push_back(std::make_pair(NewGA, Val)); break; } /// MODULE_CODE_PURGEVALS: [numvals] @@ -3117,11 +3662,52 @@ std::error_code BitcodeReader::parseModule(bool Resume, return error("Invalid record"); ValueList.shrinkTo(Record[0]); break; + /// MODULE_CODE_VSTOFFSET: [offset] + case bitc::MODULE_CODE_VSTOFFSET: + if (Record.size() < 1) + return error("Invalid record"); + VSTOffset = Record[0]; + break; + /// MODULE_CODE_METADATA_VALUES: [numvals] + case bitc::MODULE_CODE_METADATA_VALUES: + if (Record.size() < 1) + return error("Invalid record"); + assert(!IsMetadataMaterialized); + // This record contains the number of metadata values in the module-level + // METADATA_BLOCK. It is used to support lazy parsing of metadata as + // a postpass, where we will parse function-level metadata first. + // This is needed because the ids of metadata are assigned implicitly + // based on their ordering in the bitcode, with the function-level + // metadata ids starting after the module-level metadata ids. Otherwise, + // we would have to parse the module-level metadata block to prime the + // MetadataList when we are lazy loading metadata during function + // importing. Initialize the MetadataList size here based on the + // record value, regardless of whether we are doing lazy metadata + // loading, so that we have consistent handling and assertion + // checking in parseMetadata for module-level metadata. + NumModuleMDs = Record[0]; + SeenModuleValuesRecord = true; + assert(MetadataList.size() == 0); + MetadataList.resize(NumModuleMDs); + break; } Record.clear(); } } +/// Helper to read the header common to all bitcode files. +static bool hasValidBitcodeHeader(BitstreamCursor &Stream) { + // Sniff for the signature. + if (Stream.Read(8) != 'B' || + Stream.Read(8) != 'C' || + Stream.Read(4) != 0x0 || + Stream.Read(4) != 0xC || + Stream.Read(4) != 0xE || + Stream.Read(4) != 0xD) + return false; + return true; +} + std::error_code BitcodeReader::parseBitcodeInto(std::unique_ptr Streamer, Module *M, bool ShouldLazyLoadMetadata) { @@ -3131,12 +3717,7 @@ BitcodeReader::parseBitcodeInto(std::unique_ptr Streamer, return EC; // Sniff for the signature. - if (Stream.Read(8) != 'B' || - Stream.Read(8) != 'C' || - Stream.Read(4) != 0x0 || - Stream.Read(4) != 0xC || - Stream.Read(4) != 0xE || - Stream.Read(4) != 0xD) + if (!hasValidBitcodeHeader(Stream)) return error("Invalid bitcode signature"); // We expect a number of well-defined blocks, though we don't necessarily @@ -3153,8 +3734,13 @@ BitcodeReader::parseBitcodeInto(std::unique_ptr Streamer, if (Entry.Kind != BitstreamEntry::SubBlock) return error("Malformed block"); + if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) { + parseBitcodeVersion(); + continue; + } + if (Entry.ID == bitc::MODULE_BLOCK_ID) - return parseModule(false, ShouldLazyLoadMetadata); + return parseModule(0, ShouldLazyLoadMetadata); if (Stream.SkipBlock()) return error("Invalid record"); @@ -3204,12 +3790,7 @@ ErrorOr BitcodeReader::parseTriple() { return EC; // Sniff for the signature. - if (Stream.Read(8) != 'B' || - Stream.Read(8) != 'C' || - Stream.Read(4) != 0x0 || - Stream.Read(4) != 0xC || - Stream.Read(4) != 0xE || - Stream.Read(4) != 0xD) + if (!hasValidBitcodeHeader(Stream)) return error("Invalid bitcode signature"); // We expect a number of well-defined blocks, though we don't necessarily @@ -3239,6 +3820,41 @@ ErrorOr BitcodeReader::parseTriple() { } } +ErrorOr BitcodeReader::parseIdentificationBlock() { + if (std::error_code EC = initStream(nullptr)) + return EC; + + // Sniff for the signature. + if (!hasValidBitcodeHeader(Stream)) + return error("Invalid bitcode signature"); + + // We expect a number of well-defined blocks, though we don't necessarily + // need to understand them all. + while (1) { + BitstreamEntry Entry = Stream.advance(); + switch (Entry.Kind) { + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return std::error_code(); + + case BitstreamEntry::SubBlock: + if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) { + if (std::error_code EC = parseBitcodeVersion()) + return EC; + return ProducerIdentification; + } + // Ignore other sub-blocks. + if (Stream.SkipBlock()) + return error("Malformed block"); + continue; + case BitstreamEntry::Record: + Stream.skipRecord(Entry.ID); + continue; + } + } +} + /// Parse metadata attachments. std::error_code BitcodeReader::parseMetadataAttachment(Function &F) { if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID)) @@ -3274,7 +3890,7 @@ std::error_code BitcodeReader::parseMetadataAttachment(Function &F) { auto K = MDKindMap.find(Record[I]); if (K == MDKindMap.end()) return error("Invalid ID"); - Metadata *MD = MDValueList.getValueFwdRef(Record[I + 1]); + Metadata *MD = MetadataList.getValueFwdRef(Record[I + 1]); F.setMetadata(K->second, cast(MD)); } continue; @@ -3288,7 +3904,7 @@ std::error_code BitcodeReader::parseMetadataAttachment(Function &F) { MDKindMap.find(Kind); if (I == MDKindMap.end()) return error("Invalid ID"); - Metadata *Node = MDValueList.getValueFwdRef(Record[i + 1]); + Metadata *Node = MetadataList.getValueFwdRef(Record[i + 1]); if (isa(Node)) // Drop the attachment. This used to be legal, but there's no // upgrade path. @@ -3303,17 +3919,17 @@ std::error_code BitcodeReader::parseMetadataAttachment(Function &F) { } } -static std::error_code typeCheckLoadStoreInst(DiagnosticHandlerFunction DH, - Type *ValType, Type *PtrType) { +static std::error_code typeCheckLoadStoreInst(Type *ValType, Type *PtrType) { + LLVMContext &Context = PtrType->getContext(); if (!isa(PtrType)) - return error(DH, "Load/Store operand is not a pointer type"); + return error(Context, "Load/Store operand is not a pointer type"); Type *ElemType = cast(PtrType)->getElementType(); if (ValType && ValType != ElemType) - return error(DH, "Explicit load/store type does not match pointee type of " - "pointer operand"); + return error(Context, "Explicit load/store type does not match pointee " + "type of pointer operand"); if (!PointerType::isLoadableOrStorableType(ElemType)) - return error(DH, "Cannot load/store from pointer"); + return error(Context, "Cannot load/store from pointer"); return std::error_code(); } @@ -3324,11 +3940,11 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { InstructionList.clear(); unsigned ModuleValueListSize = ValueList.size(); - unsigned ModuleMDValueListSize = MDValueList.size(); + unsigned ModuleMetadataListSize = MetadataList.size(); // Add all the function arguments to the value table. - for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) - ValueList.push_back(I); + for (Argument &I : F->args()) + ValueList.push_back(&I); unsigned NextValueNo = ValueList.size(); BasicBlock *CurBB = nullptr; @@ -3344,6 +3960,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { return nullptr; }; + std::vector OperandBundles; + // Read all the records. SmallVector Record; while (1) { @@ -3452,8 +4070,10 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { unsigned ScopeID = Record[2], IAID = Record[3]; MDNode *Scope = nullptr, *IA = nullptr; - if (ScopeID) Scope = cast(MDValueList.getValueFwdRef(ScopeID-1)); - if (IAID) IA = cast(MDValueList.getValueFwdRef(IAID-1)); + if (ScopeID) + Scope = cast(MetadataList.getValueFwdRef(ScopeID - 1)); + if (IAID) + IA = cast(MetadataList.getValueFwdRef(IAID - 1)); LastLoc = DebugLoc::get(Line, Col, Scope, IA); I->setDebugLoc(LastLoc); I = nullptr; @@ -3515,7 +4135,10 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { CurBB->getInstList().push_back(Temp); } } else { - I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy); + auto CastOp = (Instruction::CastOps)Opc; + if (!CastInst::castIsValid(CastOp, Op, ResTy)) + return error("Invalid cast"); + I = CastInst::Create(CastOp, Op, ResTy); } InstructionList.push_back(I); break; @@ -3811,6 +4434,110 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { } break; } + case bitc::FUNC_CODE_INST_CLEANUPRET: { // CLEANUPRET: [val] or [val,bb#] + if (Record.size() != 1 && Record.size() != 2) + return error("Invalid record"); + unsigned Idx = 0; + Value *CleanupPad = + getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context)); + if (!CleanupPad) + return error("Invalid record"); + BasicBlock *UnwindDest = nullptr; + if (Record.size() == 2) { + UnwindDest = getBasicBlock(Record[Idx++]); + if (!UnwindDest) + return error("Invalid record"); + } + + I = CleanupReturnInst::Create(CleanupPad, UnwindDest); + InstructionList.push_back(I); + break; + } + case bitc::FUNC_CODE_INST_CATCHRET: { // CATCHRET: [val,bb#] + if (Record.size() != 2) + return error("Invalid record"); + unsigned Idx = 0; + Value *CatchPad = + getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context)); + if (!CatchPad) + return error("Invalid record"); + BasicBlock *BB = getBasicBlock(Record[Idx++]); + if (!BB) + return error("Invalid record"); + + I = CatchReturnInst::Create(CatchPad, BB); + InstructionList.push_back(I); + break; + } + case bitc::FUNC_CODE_INST_CATCHSWITCH: { // CATCHSWITCH: [tok,num,(bb)*,bb?] + // We must have, at minimum, the outer scope and the number of arguments. + if (Record.size() < 2) + return error("Invalid record"); + + unsigned Idx = 0; + + Value *ParentPad = + getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context)); + + unsigned NumHandlers = Record[Idx++]; + + SmallVector Handlers; + for (unsigned Op = 0; Op != NumHandlers; ++Op) { + BasicBlock *BB = getBasicBlock(Record[Idx++]); + if (!BB) + return error("Invalid record"); + Handlers.push_back(BB); + } + + BasicBlock *UnwindDest = nullptr; + if (Idx + 1 == Record.size()) { + UnwindDest = getBasicBlock(Record[Idx++]); + if (!UnwindDest) + return error("Invalid record"); + } + + if (Record.size() != Idx) + return error("Invalid record"); + + auto *CatchSwitch = + CatchSwitchInst::Create(ParentPad, UnwindDest, NumHandlers); + for (BasicBlock *Handler : Handlers) + CatchSwitch->addHandler(Handler); + I = CatchSwitch; + InstructionList.push_back(I); + break; + } + case bitc::FUNC_CODE_INST_CATCHPAD: + case bitc::FUNC_CODE_INST_CLEANUPPAD: { // [tok,num,(ty,val)*] + // We must have, at minimum, the outer scope and the number of arguments. + if (Record.size() < 2) + return error("Invalid record"); + + unsigned Idx = 0; + + Value *ParentPad = + getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context)); + + unsigned NumArgOperands = Record[Idx++]; + + SmallVector Args; + for (unsigned Op = 0; Op != NumArgOperands; ++Op) { + Value *Val; + if (getValueTypePair(Record, Idx, NextValueNo, Val)) + return error("Invalid record"); + Args.push_back(Val); + } + + if (Record.size() != Idx) + return error("Invalid record"); + + if (BitCode == bitc::FUNC_CODE_INST_CLEANUPPAD) + I = CleanupPadInst::Create(ParentPad, Args); + else + I = CatchPadInst::Create(ParentPad, Args); + InstructionList.push_back(I); + break; + } case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...] // Check magic if ((Record[0] >> 16) == SWITCH_INST_MAGIC) { @@ -3973,10 +4700,11 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { } } - I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops); + I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops, OperandBundles); + OperandBundles.clear(); InstructionList.push_back(I); - cast(I) - ->setCallingConv(static_cast(~(1U << 13) & CCInfo)); + cast(I)->setCallingConv( + static_cast(CallingConv::MaxID & CCInfo)); cast(I)->setAttributes(PAL); break; } @@ -4081,6 +4809,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { uint64_t AlignRecord = Record[3]; const uint64_t InAllocaMask = uint64_t(1) << 5; const uint64_t ExplicitTypeMask = uint64_t(1) << 6; + // Reserve bit 7 for SwiftError flag. + // const uint64_t SwiftErrorMask = uint64_t(1) << 7; const uint64_t FlagMask = InAllocaMask | ExplicitTypeMask; bool InAlloca = AlignRecord & InAllocaMask; Type *Ty = getTypeByID(Record[0]); @@ -4115,8 +4845,7 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { Type *Ty = nullptr; if (OpNum + 3 == Record.size()) Ty = getTypeByID(Record[OpNum++]); - if (std::error_code EC = - typeCheckLoadStoreInst(DiagnosticHandler, Ty, Op->getType())) + if (std::error_code EC = typeCheckLoadStoreInst(Ty, Op->getType())) return EC; if (!Ty) Ty = cast(Op->getType())->getElementType(); @@ -4140,8 +4869,7 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { Type *Ty = nullptr; if (OpNum + 5 == Record.size()) Ty = getTypeByID(Record[OpNum++]); - if (std::error_code EC = - typeCheckLoadStoreInst(DiagnosticHandler, Ty, Op->getType())) + if (std::error_code EC = typeCheckLoadStoreInst(Ty, Op->getType())) return EC; if (!Ty) Ty = cast(Op->getType())->getElementType(); @@ -4175,8 +4903,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { OpNum + 2 != Record.size()) return error("Invalid record"); - if (std::error_code EC = typeCheckLoadStoreInst( - DiagnosticHandler, Val->getType(), Ptr->getType())) + if (std::error_code EC = + typeCheckLoadStoreInst(Val->getType(), Ptr->getType())) return EC; unsigned Align; if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align)) @@ -4199,8 +4927,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { OpNum + 4 != Record.size()) return error("Invalid record"); - if (std::error_code EC = typeCheckLoadStoreInst( - DiagnosticHandler, Val->getType(), Ptr->getType())) + if (std::error_code EC = + typeCheckLoadStoreInst(Val->getType(), Ptr->getType())) return EC; AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]); if (Ordering == NotAtomic || Ordering == Acquire || @@ -4237,8 +4965,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { return error("Invalid record"); SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 2]); - if (std::error_code EC = typeCheckLoadStoreInst( - DiagnosticHandler, Cmp->getType(), Ptr->getType())) + if (std::error_code EC = + typeCheckLoadStoreInst(Cmp->getType(), Ptr->getType())) return EC; AtomicOrdering FailureOrdering; if (Record.size() < 7) @@ -4299,7 +5027,7 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { break; } case bitc::FUNC_CODE_INST_CALL: { - // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...] + // CALL: [paramattrs, cc, fmf, fnty, fnid, arg0, arg1...] if (Record.size() < 3) return error("Invalid record"); @@ -4307,8 +5035,15 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { AttributeSet PAL = getAttributes(Record[OpNum++]); unsigned CCInfo = Record[OpNum++]; + FastMathFlags FMF; + if ((CCInfo >> bitc::CALL_FMF) & 1) { + FMF = getDecodedFastMathFlags(Record[OpNum++]); + if (!FMF.any()) + return error("Fast math flags indicator set for call with no FMF"); + } + FunctionType *FTy = nullptr; - if (CCInfo >> 15 & 1 && + if (CCInfo >> bitc::CALL_EXPLICIT_TYPE & 1 && !(FTy = dyn_cast(getTypeByID(Record[OpNum++])))) return error("Explicit call type is not a function type"); @@ -4354,17 +5089,26 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { } } - I = CallInst::Create(FTy, Callee, Args); + I = CallInst::Create(FTy, Callee, Args, OperandBundles); + OperandBundles.clear(); InstructionList.push_back(I); cast(I)->setCallingConv( - static_cast((~(1U << 14) & CCInfo) >> 1)); + static_cast((0x7ff & CCInfo) >> bitc::CALL_CCONV)); CallInst::TailCallKind TCK = CallInst::TCK_None; - if (CCInfo & 1) + if (CCInfo & 1 << bitc::CALL_TAIL) TCK = CallInst::TCK_Tail; - if (CCInfo & (1 << 14)) + if (CCInfo & (1 << bitc::CALL_MUSTTAIL)) TCK = CallInst::TCK_MustTail; + if (CCInfo & (1 << bitc::CALL_NOTAIL)) + TCK = CallInst::TCK_NoTail; cast(I)->setTailCallKind(TCK); cast(I)->setAttributes(PAL); + if (FMF.any()) { + if (!isa(I)) + return error("Fast-math-flags specified for call without " + "floating-point scalar or vector return type"); + I->setFastMathFlags(FMF); + } break; } case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty] @@ -4379,6 +5123,28 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { InstructionList.push_back(I); break; } + + case bitc::FUNC_CODE_OPERAND_BUNDLE: { + // A call or an invoke can be optionally prefixed with some variable + // number of operand bundle blocks. These blocks are read into + // OperandBundles and consumed at the next call or invoke instruction. + + if (Record.size() < 1 || Record[0] >= BundleTags.size()) + return error("Invalid record"); + + std::vector Inputs; + + unsigned OpNum = 1; + while (OpNum != Record.size()) { + Value *Op; + if (getValueTypePair(Record, OpNum, NextValueNo, Op)) + return error("Invalid record"); + Inputs.push_back(Op); + } + + OperandBundles.emplace_back(BundleTags[Record[0]], std::move(Inputs)); + continue; + } } // Add instruction to end of current BB. If there is no current BB, reject @@ -4387,6 +5153,10 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { delete I; return error("Invalid instruction with no BB"); } + if (!OperandBundles.empty()) { + delete I; + return error("Operand bundles found with no consumer"); + } CurBB->getInstList().push_back(I); // If this was a terminator instruction, move to the next block. @@ -4402,6 +5172,9 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { OutOfRecordLoop: + if (!OperandBundles.empty()) + return error("Operand bundles found with no consumer"); + // Check the function list for unresolved values. if (Argument *A = dyn_cast(ValueList.back())) { if (!A->getParent()) { @@ -4421,7 +5194,7 @@ OutOfRecordLoop: // Trim the value list down to the size it was before we parsed this function. ValueList.shrinkTo(ModuleValueListSize); - MDValueList.shrinkTo(ModuleMDValueListSize); + MetadataList.shrinkTo(ModuleMetadataListSize); std::vector().swap(FunctionBBs); return std::error_code(); } @@ -4431,11 +5204,14 @@ std::error_code BitcodeReader::findFunctionInStream( Function *F, DenseMap::iterator DeferredFunctionInfoIterator) { while (DeferredFunctionInfoIterator->second == 0) { - if (Stream.AtEndOfStream()) - return error("Could not find function in stream"); - // ParseModule will parse the next body in the stream and set its - // position in the DeferredFunctionInfo map. - if (std::error_code EC = parseModule(true)) + // This is the fallback handling for the old format bitcode that + // didn't contain the function index in the VST, or when we have + // an anonymous function which would not have a VST entry. + // Assert that we have one of those two cases. + assert(VSTOffset == 0 || !F->hasName()); + // Parse the next body in the stream and set its position in the + // DeferredFunctionInfo map. + if (std::error_code EC = rememberAndSkipFunctionBodies()) return EC; } return std::error_code(); @@ -4448,8 +5224,12 @@ std::error_code BitcodeReader::findFunctionInStream( void BitcodeReader::releaseBuffer() { Buffer.release(); } std::error_code BitcodeReader::materialize(GlobalValue *GV) { - if (std::error_code EC = materializeMetadata()) - return EC; + // In older bitcode we must materialize the metadata before parsing + // any functions, in order to set up the MetadataList properly. + if (!SeenModuleValuesRecord) { + if (std::error_code EC = materializeMetadata()) + return EC; + } Function *F = dyn_cast(GV); // If it's not a function or is already material, ignore the request. @@ -4476,7 +5256,8 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) { // Upgrade any old intrinsic calls in the function. for (auto &I : UpgradedIntrinsics) { - for (auto UI = I.first->user_begin(), UE = I.first->user_end(); UI != UE;) { + for (auto UI = I.first->materialized_user_begin(), UE = I.first->user_end(); + UI != UE;) { User *U = *UI; ++UI; if (CallInst *CI = dyn_cast(U)) @@ -4484,41 +5265,16 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) { } } + // Finish fn->subprogram upgrade for materialized functions. + if (DISubprogram *SP = FunctionsWithSPs.lookup(F)) + F->setSubprogram(SP); + // Bring in any functions that this function forward-referenced via // blockaddresses. return materializeForwardReferencedFunctions(); } -bool BitcodeReader::isDematerializable(const GlobalValue *GV) const { - const Function *F = dyn_cast(GV); - if (!F || F->isDeclaration()) - return false; - - // Dematerializing F would leave dangling references that wouldn't be - // reconnected on re-materialization. - if (BlockAddressesTaken.count(F)) - return false; - - return DeferredFunctionInfo.count(const_cast(F)); -} - -void BitcodeReader::dematerialize(GlobalValue *GV) { - Function *F = dyn_cast(GV); - // If this function isn't dematerializable, this is a noop. - if (!F || !isDematerializable(F)) - return; - - assert(DeferredFunctionInfo.count(F) && "No info to read function later?"); - - // Just forget the function body, we can remat it later. - F->dropAllReferences(); - F->setIsMaterializable(true); -} - -std::error_code BitcodeReader::materializeModule(Module *M) { - assert(M == TheModule && - "Can only Materialize the Module this BitcodeReader is attached to."); - +std::error_code BitcodeReader::materializeModule() { if (std::error_code EC = materializeMetadata()) return EC; @@ -4527,16 +5283,16 @@ std::error_code BitcodeReader::materializeModule(Module *M) { // Iterate over the module, deserializing any functions that are still on // disk. - for (Module::iterator F = TheModule->begin(), E = TheModule->end(); - F != E; ++F) { - if (std::error_code EC = materialize(F)) + for (Function &F : *TheModule) { + if (std::error_code EC = materialize(&F)) return EC; } - // At this point, if there are any function bodies, the current bit is - // pointing to the END_BLOCK record after them. Now make sure the rest - // of the bits in the module have been read. - if (NextUnreadBit) - parseModule(true); + // At this point, if there are any function bodies, parse the rest of + // the bits in the module past the last function block we have recorded + // through either lazy scanning or the VST. + if (LastFunctionBlockBit || NextUnreadBit) + parseModule(LastFunctionBlockBit > NextUnreadBit ? LastFunctionBlockBit + : NextUnreadBit); // Check that all block address forward references got resolved (as we // promised above). @@ -4561,7 +5317,7 @@ std::error_code BitcodeReader::materializeModule(Module *M) { for (unsigned I = 0, E = InstsWithTBAATag.size(); I < E; I++) UpgradeInstWithTBAATag(InstsWithTBAATag[I]); - UpgradeDebugInfo(*M); + UpgradeDebugInfo(*TheModule); return std::error_code(); } @@ -4622,6 +5378,416 @@ BitcodeReader::initLazyStream(std::unique_ptr Streamer) { return std::error_code(); } +std::error_code FunctionIndexBitcodeReader::error(BitcodeError E, + const Twine &Message) { + return ::error(DiagnosticHandler, make_error_code(E), Message); +} + +std::error_code FunctionIndexBitcodeReader::error(const Twine &Message) { + return ::error(DiagnosticHandler, + make_error_code(BitcodeError::CorruptedBitcode), Message); +} + +std::error_code FunctionIndexBitcodeReader::error(BitcodeError E) { + return ::error(DiagnosticHandler, make_error_code(E)); +} + +FunctionIndexBitcodeReader::FunctionIndexBitcodeReader( + MemoryBuffer *Buffer, DiagnosticHandlerFunction DiagnosticHandler, + bool IsLazy, bool CheckFuncSummaryPresenceOnly) + : DiagnosticHandler(DiagnosticHandler), Buffer(Buffer), IsLazy(IsLazy), + CheckFuncSummaryPresenceOnly(CheckFuncSummaryPresenceOnly) {} + +FunctionIndexBitcodeReader::FunctionIndexBitcodeReader( + DiagnosticHandlerFunction DiagnosticHandler, bool IsLazy, + bool CheckFuncSummaryPresenceOnly) + : DiagnosticHandler(DiagnosticHandler), Buffer(nullptr), IsLazy(IsLazy), + CheckFuncSummaryPresenceOnly(CheckFuncSummaryPresenceOnly) {} + +void FunctionIndexBitcodeReader::freeState() { Buffer = nullptr; } + +void FunctionIndexBitcodeReader::releaseBuffer() { Buffer.release(); } + +// Specialized value symbol table parser used when reading function index +// blocks where we don't actually create global values. +// At the end of this routine the function index is populated with a map +// from function name to FunctionInfo. The function info contains +// the function block's bitcode offset as well as the offset into the +// function summary section. +std::error_code FunctionIndexBitcodeReader::parseValueSymbolTable() { + if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) + return error("Invalid record"); + + SmallVector Record; + + // Read all the records for this value table. + SmallString<128> ValueName; + while (1) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return std::error_code(); + case BitstreamEntry::Record: + // The interesting case. + break; + } + + // Read a record. + Record.clear(); + switch (Stream.readRecord(Entry.ID, Record)) { + default: // Default behavior: ignore (e.g. VST_CODE_BBENTRY records). + break; + case bitc::VST_CODE_FNENTRY: { + // VST_FNENTRY: [valueid, offset, namechar x N] + if (convertToString(Record, 2, ValueName)) + return error("Invalid record"); + unsigned ValueID = Record[0]; + uint64_t FuncOffset = Record[1]; + std::unique_ptr FuncInfo = + llvm::make_unique(FuncOffset); + if (foundFuncSummary() && !IsLazy) { + DenseMap>::iterator SMI = + SummaryMap.find(ValueID); + assert(SMI != SummaryMap.end() && "Summary info not found"); + FuncInfo->setFunctionSummary(std::move(SMI->second)); + } + TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo)); + + ValueName.clear(); + break; + } + case bitc::VST_CODE_COMBINED_FNENTRY: { + // VST_FNENTRY: [offset, namechar x N] + if (convertToString(Record, 1, ValueName)) + return error("Invalid record"); + uint64_t FuncSummaryOffset = Record[0]; + std::unique_ptr FuncInfo = + llvm::make_unique(FuncSummaryOffset); + if (foundFuncSummary() && !IsLazy) { + DenseMap>::iterator SMI = + SummaryMap.find(FuncSummaryOffset); + assert(SMI != SummaryMap.end() && "Summary info not found"); + FuncInfo->setFunctionSummary(std::move(SMI->second)); + } + TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo)); + + ValueName.clear(); + break; + } + } + } +} + +// Parse just the blocks needed for function index building out of the module. +// At the end of this routine the function Index is populated with a map +// from function name to FunctionInfo. The function info contains +// either the parsed function summary information (when parsing summaries +// eagerly), or just to the function summary record's offset +// if parsing lazily (IsLazy). +std::error_code FunctionIndexBitcodeReader::parseModule() { + if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) + return error("Invalid record"); + + // Read the function index for this module. + while (1) { + BitstreamEntry Entry = Stream.advance(); + + switch (Entry.Kind) { + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return std::error_code(); + + case BitstreamEntry::SubBlock: + if (CheckFuncSummaryPresenceOnly) { + if (Entry.ID == bitc::FUNCTION_SUMMARY_BLOCK_ID) { + SeenFuncSummary = true; + // No need to parse the rest since we found the summary. + return std::error_code(); + } + if (Stream.SkipBlock()) + return error("Invalid record"); + continue; + } + switch (Entry.ID) { + default: // Skip unknown content. + if (Stream.SkipBlock()) + return error("Invalid record"); + break; + case bitc::BLOCKINFO_BLOCK_ID: + // Need to parse these to get abbrev ids (e.g. for VST) + if (Stream.ReadBlockInfoBlock()) + return error("Malformed block"); + break; + case bitc::VALUE_SYMTAB_BLOCK_ID: + if (std::error_code EC = parseValueSymbolTable()) + return EC; + break; + case bitc::FUNCTION_SUMMARY_BLOCK_ID: + SeenFuncSummary = true; + if (IsLazy) { + // Lazy parsing of summary info, skip it. + if (Stream.SkipBlock()) + return error("Invalid record"); + } else if (std::error_code EC = parseEntireSummary()) + return EC; + break; + case bitc::MODULE_STRTAB_BLOCK_ID: + if (std::error_code EC = parseModuleStringTable()) + return EC; + break; + } + continue; + + case BitstreamEntry::Record: + Stream.skipRecord(Entry.ID); + continue; + } + } +} + +// Eagerly parse the entire function summary block (i.e. for all functions +// in the index). This populates the FunctionSummary objects in +// the index. +std::error_code FunctionIndexBitcodeReader::parseEntireSummary() { + if (Stream.EnterSubBlock(bitc::FUNCTION_SUMMARY_BLOCK_ID)) + return error("Invalid record"); + + SmallVector Record; + + while (1) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return std::error_code(); + case BitstreamEntry::Record: + // The interesting case. + break; + } + + // Read a record. The record format depends on whether this + // is a per-module index or a combined index file. In the per-module + // case the records contain the associated value's ID for correlation + // with VST entries. In the combined index the correlation is done + // via the bitcode offset of the summary records (which were saved + // in the combined index VST entries). The records also contain + // information used for ThinLTO renaming and importing. + Record.clear(); + uint64_t CurRecordBit = Stream.GetCurrentBitNo(); + switch (Stream.readRecord(Entry.ID, Record)) { + default: // Default behavior: ignore. + break; + // FS_PERMODULE_ENTRY: [valueid, islocal, instcount] + case bitc::FS_CODE_PERMODULE_ENTRY: { + unsigned ValueID = Record[0]; + bool IsLocal = Record[1]; + unsigned InstCount = Record[2]; + std::unique_ptr FS = + llvm::make_unique(InstCount); + FS->setLocalFunction(IsLocal); + // The module path string ref set in the summary must be owned by the + // index's module string table. Since we don't have a module path + // string table section in the per-module index, we create a single + // module path string table entry with an empty (0) ID to take + // ownership. + FS->setModulePath( + TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)); + SummaryMap[ValueID] = std::move(FS); + } + // FS_COMBINED_ENTRY: [modid, instcount] + case bitc::FS_CODE_COMBINED_ENTRY: { + uint64_t ModuleId = Record[0]; + unsigned InstCount = Record[1]; + std::unique_ptr FS = + llvm::make_unique(InstCount); + FS->setModulePath(ModuleIdMap[ModuleId]); + SummaryMap[CurRecordBit] = std::move(FS); + } + } + } + llvm_unreachable("Exit infinite loop"); +} + +// Parse the module string table block into the Index. +// This populates the ModulePathStringTable map in the index. +std::error_code FunctionIndexBitcodeReader::parseModuleStringTable() { + if (Stream.EnterSubBlock(bitc::MODULE_STRTAB_BLOCK_ID)) + return error("Invalid record"); + + SmallVector Record; + + SmallString<128> ModulePath; + while (1) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return std::error_code(); + case BitstreamEntry::Record: + // The interesting case. + break; + } + + Record.clear(); + switch (Stream.readRecord(Entry.ID, Record)) { + default: // Default behavior: ignore. + break; + case bitc::MST_CODE_ENTRY: { + // MST_ENTRY: [modid, namechar x N] + if (convertToString(Record, 1, ModulePath)) + return error("Invalid record"); + uint64_t ModuleId = Record[0]; + StringRef ModulePathInMap = TheIndex->addModulePath(ModulePath, ModuleId); + ModuleIdMap[ModuleId] = ModulePathInMap; + ModulePath.clear(); + break; + } + } + } + llvm_unreachable("Exit infinite loop"); +} + +// Parse the function info index from the bitcode streamer into the given index. +std::error_code FunctionIndexBitcodeReader::parseSummaryIndexInto( + std::unique_ptr Streamer, FunctionInfoIndex *I) { + TheIndex = I; + + if (std::error_code EC = initStream(std::move(Streamer))) + return EC; + + // Sniff for the signature. + if (!hasValidBitcodeHeader(Stream)) + return error("Invalid bitcode signature"); + + // We expect a number of well-defined blocks, though we don't necessarily + // need to understand them all. + while (1) { + if (Stream.AtEndOfStream()) { + // We didn't really read a proper Module block. + return error("Malformed block"); + } + + BitstreamEntry Entry = + Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); + + if (Entry.Kind != BitstreamEntry::SubBlock) + return error("Malformed block"); + + // If we see a MODULE_BLOCK, parse it to find the blocks needed for + // building the function summary index. + if (Entry.ID == bitc::MODULE_BLOCK_ID) + return parseModule(); + + if (Stream.SkipBlock()) + return error("Invalid record"); + } +} + +// Parse the function information at the given offset in the buffer into +// the index. Used to support lazy parsing of function summaries from the +// combined index during importing. +// TODO: This function is not yet complete as it won't have a consumer +// until ThinLTO function importing is added. +std::error_code FunctionIndexBitcodeReader::parseFunctionSummary( + std::unique_ptr Streamer, FunctionInfoIndex *I, + size_t FunctionSummaryOffset) { + TheIndex = I; + + if (std::error_code EC = initStream(std::move(Streamer))) + return EC; + + // Sniff for the signature. + if (!hasValidBitcodeHeader(Stream)) + return error("Invalid bitcode signature"); + + Stream.JumpToBit(FunctionSummaryOffset); + + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + default: + return error("Malformed block"); + case BitstreamEntry::Record: + // The expected case. + break; + } + + // TODO: Read a record. This interface will be completed when ThinLTO + // importing is added so that it can be tested. + SmallVector Record; + switch (Stream.readRecord(Entry.ID, Record)) { + case bitc::FS_CODE_COMBINED_ENTRY: + default: + return error("Invalid record"); + } + + return std::error_code(); +} + +std::error_code +FunctionIndexBitcodeReader::initStream(std::unique_ptr Streamer) { + if (Streamer) + return initLazyStream(std::move(Streamer)); + return initStreamFromBuffer(); +} + +std::error_code FunctionIndexBitcodeReader::initStreamFromBuffer() { + const unsigned char *BufPtr = (const unsigned char *)Buffer->getBufferStart(); + const unsigned char *BufEnd = BufPtr + Buffer->getBufferSize(); + + if (Buffer->getBufferSize() & 3) + return error("Invalid bitcode signature"); + + // If we have a wrapper header, parse it and ignore the non-bc file contents. + // The magic number is 0x0B17C0DE stored in little endian. + if (isBitcodeWrapper(BufPtr, BufEnd)) + if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true)) + return error("Invalid bitcode wrapper header"); + + StreamFile.reset(new BitstreamReader(BufPtr, BufEnd)); + Stream.init(&*StreamFile); + + return std::error_code(); +} + +std::error_code FunctionIndexBitcodeReader::initLazyStream( + std::unique_ptr Streamer) { + // Check and strip off the bitcode wrapper; BitstreamReader expects never to + // see it. + auto OwnedBytes = + llvm::make_unique(std::move(Streamer)); + StreamingMemoryObject &Bytes = *OwnedBytes; + StreamFile = llvm::make_unique(std::move(OwnedBytes)); + Stream.init(&*StreamFile); + + unsigned char buf[16]; + if (Bytes.readBytes(buf, 16, 0) != 16) + return error("Invalid bitcode signature"); + + if (!isBitcode(buf, buf + 16)) + return error("Invalid bitcode signature"); + + if (isBitcodeWrapper(buf, buf + 4)) { + const unsigned char *bitcodeStart = buf; + const unsigned char *bitcodeEnd = buf + 16; + SkipBitcodeWrapperHeader(bitcodeStart, bitcodeEnd, false); + Bytes.dropLeadingBytes(bitcodeStart - buf); + Bytes.setKnownObjectSize(bitcodeEnd - bitcodeStart); + } + return std::error_code(); +} + namespace { class BitcodeErrorCategoryType : public std::error_category { const char *name() const LLVM_NOEXCEPT override { @@ -4669,7 +5835,7 @@ getBitcodeModuleImpl(std::unique_ptr Streamer, StringRef Name, if (MaterializeAll) { // Read in the entire module, and destroy the BitcodeReader. - if (std::error_code EC = M->materializeAllPermanently()) + if (std::error_code EC = M->materializeAll()) return cleanupOnError(EC); } else { // Resolve forward references from blockaddresses. @@ -4690,10 +5856,8 @@ getBitcodeModuleImpl(std::unique_ptr Streamer, StringRef Name, static ErrorOr> getLazyBitcodeModuleImpl(std::unique_ptr &&Buffer, LLVMContext &Context, bool MaterializeAll, - DiagnosticHandlerFunction DiagnosticHandler, bool ShouldLazyLoadMetadata = false) { - BitcodeReader *R = - new BitcodeReader(Buffer.get(), Context, DiagnosticHandler); + BitcodeReader *R = new BitcodeReader(Buffer.get(), Context); ErrorOr> Ret = getBitcodeModuleImpl(nullptr, Buffer->getBufferIdentifier(), R, Context, @@ -4705,41 +5869,124 @@ getLazyBitcodeModuleImpl(std::unique_ptr &&Buffer, return Ret; } -ErrorOr> llvm::getLazyBitcodeModule( - std::unique_ptr &&Buffer, LLVMContext &Context, - DiagnosticHandlerFunction DiagnosticHandler, bool ShouldLazyLoadMetadata) { +ErrorOr> +llvm::getLazyBitcodeModule(std::unique_ptr &&Buffer, + LLVMContext &Context, bool ShouldLazyLoadMetadata) { return getLazyBitcodeModuleImpl(std::move(Buffer), Context, false, - DiagnosticHandler, ShouldLazyLoadMetadata); + ShouldLazyLoadMetadata); } -ErrorOr> llvm::getStreamedBitcodeModule( - StringRef Name, std::unique_ptr Streamer, - LLVMContext &Context, DiagnosticHandlerFunction DiagnosticHandler) { +ErrorOr> +llvm::getStreamedBitcodeModule(StringRef Name, + std::unique_ptr Streamer, + LLVMContext &Context) { std::unique_ptr M = make_unique(Name, Context); - BitcodeReader *R = new BitcodeReader(Context, DiagnosticHandler); + BitcodeReader *R = new BitcodeReader(Context); return getBitcodeModuleImpl(std::move(Streamer), Name, R, Context, false, false); } -ErrorOr> -llvm::parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, - DiagnosticHandlerFunction DiagnosticHandler) { +ErrorOr> llvm::parseBitcodeFile(MemoryBufferRef Buffer, + LLVMContext &Context) { std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); - return getLazyBitcodeModuleImpl(std::move(Buf), Context, true, - DiagnosticHandler); + return getLazyBitcodeModuleImpl(std::move(Buf), Context, true); // TODO: Restore the use-lists to the in-memory state when the bitcode was // written. We must defer until the Module has been fully materialized. } -std::string -llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer, LLVMContext &Context, - DiagnosticHandlerFunction DiagnosticHandler) { +std::string llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer, + LLVMContext &Context) { std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); - auto R = llvm::make_unique(Buf.release(), Context, - DiagnosticHandler); + auto R = llvm::make_unique(Buf.release(), Context); ErrorOr Triple = R->parseTriple(); if (Triple.getError()) return ""; return Triple.get(); } + +std::string llvm::getBitcodeProducerString(MemoryBufferRef Buffer, + LLVMContext &Context) { + std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); + BitcodeReader R(Buf.release(), Context); + ErrorOr ProducerString = R.parseIdentificationBlock(); + if (ProducerString.getError()) + return ""; + return ProducerString.get(); +} + +// Parse the specified bitcode buffer, returning the function info index. +// If IsLazy is false, parse the entire function summary into +// the index. Otherwise skip the function summary section, and only create +// an index object with a map from function name to function summary offset. +// The index is used to perform lazy function summary reading later. +ErrorOr> +llvm::getFunctionInfoIndex(MemoryBufferRef Buffer, + DiagnosticHandlerFunction DiagnosticHandler, + bool IsLazy) { + std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); + FunctionIndexBitcodeReader R(Buf.get(), DiagnosticHandler, IsLazy); + + auto Index = llvm::make_unique(); + + auto cleanupOnError = [&](std::error_code EC) { + R.releaseBuffer(); // Never take ownership on error. + return EC; + }; + + if (std::error_code EC = R.parseSummaryIndexInto(nullptr, Index.get())) + return cleanupOnError(EC); + + Buf.release(); // The FunctionIndexBitcodeReader owns it now. + return std::move(Index); +} + +// Check if the given bitcode buffer contains a function summary block. +bool llvm::hasFunctionSummary(MemoryBufferRef Buffer, + DiagnosticHandlerFunction DiagnosticHandler) { + std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); + FunctionIndexBitcodeReader R(Buf.get(), DiagnosticHandler, false, true); + + auto cleanupOnError = [&](std::error_code EC) { + R.releaseBuffer(); // Never take ownership on error. + return false; + }; + + if (std::error_code EC = R.parseSummaryIndexInto(nullptr, nullptr)) + return cleanupOnError(EC); + + Buf.release(); // The FunctionIndexBitcodeReader owns it now. + return R.foundFuncSummary(); +} + +// This method supports lazy reading of function summary data from the combined +// index during ThinLTO function importing. When reading the combined index +// file, getFunctionInfoIndex is first invoked with IsLazy=true. +// Then this method is called for each function considered for importing, +// to parse the summary information for the given function name into +// the index. +std::error_code llvm::readFunctionSummary( + MemoryBufferRef Buffer, DiagnosticHandlerFunction DiagnosticHandler, + StringRef FunctionName, std::unique_ptr Index) { + std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); + FunctionIndexBitcodeReader R(Buf.get(), DiagnosticHandler); + + auto cleanupOnError = [&](std::error_code EC) { + R.releaseBuffer(); // Never take ownership on error. + return EC; + }; + + // Lookup the given function name in the FunctionMap, which may + // contain a list of function infos in the case of a COMDAT. Walk through + // and parse each function summary info at the function summary offset + // recorded when parsing the value symbol table. + for (const auto &FI : Index->getFunctionInfoList(FunctionName)) { + size_t FunctionSummaryOffset = FI->bitcodeIndex(); + if (std::error_code EC = + R.parseFunctionSummary(nullptr, Index.get(), FunctionSummaryOffset)) + return cleanupOnError(EC); + } + + Buf.release(); // The FunctionIndexBitcodeReader owns it now. + return std::error_code(); +} diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 1a70ba5ac127..a1f87863757b 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -13,14 +13,18 @@ #include "llvm/Bitcode/ReaderWriter.h" #include "ValueEnumerator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Bitcode/BitstreamWriter.h" #include "llvm/Bitcode/LLVMBitCodes.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/UseListOrder.h" @@ -174,6 +178,10 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_IN_ALLOCA; case Attribute::Cold: return bitc::ATTR_KIND_COLD; + case Attribute::InaccessibleMemOnly: + return bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY; + case Attribute::InaccessibleMemOrArgMemOnly: + return bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY; case Attribute::InlineHint: return bitc::ATTR_KIND_INLINE_HINT; case Attribute::InReg: @@ -198,6 +206,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_NO_IMPLICIT_FLOAT; case Attribute::NoInline: return bitc::ATTR_KIND_NO_INLINE; + case Attribute::NoRecurse: + return bitc::ATTR_KIND_NO_RECURSE; case Attribute::NonLazyBind: return bitc::ATTR_KIND_NON_LAZY_BIND; case Attribute::NonNull: @@ -405,6 +415,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break; case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break; case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break; + case Type::TokenTyID: Code = bitc::TYPE_CODE_TOKEN; break; case Type::IntegerTyID: // INTEGER: [width] Code = bitc::TYPE_CODE_INTEGER; @@ -573,10 +584,41 @@ static void writeComdats(const ValueEnumerator &VE, BitstreamWriter &Stream) { } } -// Emit top-level description of module, including target triple, inline asm, -// descriptors for global variables, and function prototype info. -static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, - BitstreamWriter &Stream) { +/// Write a record that will eventually hold the word offset of the +/// module-level VST. For now the offset is 0, which will be backpatched +/// after the real VST is written. Returns the bit offset to backpatch. +static uint64_t WriteValueSymbolTableForwardDecl(const ValueSymbolTable &VST, + BitstreamWriter &Stream) { + if (VST.empty()) + return 0; + + // Write a placeholder value in for the offset of the real VST, + // which is written after the function blocks so that it can include + // the offset of each function. The placeholder offset will be + // updated when the real VST is written. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_VSTOFFSET)); + // Blocks are 32-bit aligned, so we can use a 32-bit word offset to + // hold the real VST offset. Must use fixed instead of VBR as we don't + // know how many VBR chunks to reserve ahead of time. + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + unsigned VSTOffsetAbbrev = Stream.EmitAbbrev(Abbv); + + // Emit the placeholder + uint64_t Vals[] = {bitc::MODULE_CODE_VSTOFFSET, 0}; + Stream.EmitRecordWithAbbrev(VSTOffsetAbbrev, Vals); + + // Compute and return the bit offset to the placeholder, which will be + // patched when the real VST is written. We can simply subtract the 32-bit + // fixed size from the current bit number to get the location to backpatch. + return Stream.GetCurrentBitNo() - 32; +} + +/// Emit top-level description of module, including target triple, inline asm, +/// descriptors for global variables, and function prototype info. +/// Returns the bit offset to backpatch with the location of the real VST. +static uint64_t WriteModuleInfo(const Module *M, const ValueEnumerator &VE, + BitstreamWriter &Stream) { // Emit various pieces of data attached to a module. if (!M->getTargetTriple().empty()) WriteStringRecord(bitc::MODULE_CODE_TRIPLE, M->getTargetTriple(), @@ -725,7 +767,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, // Emit the alias information. for (const GlobalAlias &A : M->aliases()) { // ALIAS: [alias type, aliasee val#, linkage, visibility] - Vals.push_back(VE.getTypeID(A.getType())); + Vals.push_back(VE.getTypeID(A.getValueType())); + Vals.push_back(A.getType()->getAddressSpace()); Vals.push_back(VE.getValueID(A.getAliasee())); Vals.push_back(getEncodedLinkage(A)); Vals.push_back(getEncodedVisibility(A)); @@ -736,6 +779,25 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse); Vals.clear(); } + + // Write a record indicating the number of module-level metadata IDs + // This is needed because the ids of metadata are assigned implicitly + // based on their ordering in the bitcode, with the function-level + // metadata ids starting after the module-level metadata ids. For + // function importing where we lazy load the metadata as a postpass, + // we want to avoid parsing the module-level metadata before parsing + // the imported functions. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_METADATA_VALUES)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + unsigned MDValsAbbrev = Stream.EmitAbbrev(Abbv); + Vals.push_back(VE.numMDs()); + Stream.EmitRecord(bitc::MODULE_CODE_METADATA_VALUES, Vals, MDValsAbbrev); + Vals.clear(); + + uint64_t VSTOffsetPlaceholder = + WriteValueSymbolTableForwardDecl(M->getValueSymbolTable(), Stream); + return VSTOffsetPlaceholder; } static uint64_t GetOptimizationFlags(const Value *V) { @@ -943,7 +1005,8 @@ static void WriteDICompileUnit(const DICompileUnit *N, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { - Record.push_back(N->isDistinct()); + assert(N->isDistinct() && "Expected distinct compile units"); + Record.push_back(/* IsDistinct */ true); Record.push_back(N->getSourceLanguage()); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(VE.getMetadataOrNullID(N->getRawProducer())); @@ -958,6 +1021,7 @@ static void WriteDICompileUnit(const DICompileUnit *N, Record.push_back(VE.getMetadataOrNullID(N->getGlobalVariables().get())); Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities().get())); Record.push_back(N->getDWOId()); + Record.push_back(VE.getMetadataOrNullID(N->getMacros().get())); Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev); Record.clear(); @@ -982,7 +1046,6 @@ static void WriteDISubprogram(const DISubprogram *N, const ValueEnumerator &VE, Record.push_back(N->getVirtualIndex()); Record.push_back(N->getFlags()); Record.push_back(N->isOptimized()); - Record.push_back(VE.getMetadataOrNullID(N->getRawFunction())); Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get())); Record.push_back(VE.getMetadataOrNullID(N->getDeclaration())); Record.push_back(VE.getMetadataOrNullID(N->getVariables().get())); @@ -1034,6 +1097,33 @@ static void WriteDINamespace(const DINamespace *N, const ValueEnumerator &VE, Record.clear(); } +static void WriteDIMacro(const DIMacro *N, const ValueEnumerator &VE, + BitstreamWriter &Stream, + SmallVectorImpl &Record, unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(N->getMacinfoType()); + Record.push_back(N->getLine()); + Record.push_back(VE.getMetadataOrNullID(N->getRawName())); + Record.push_back(VE.getMetadataOrNullID(N->getRawValue())); + + Stream.EmitRecord(bitc::METADATA_MACRO, Record, Abbrev); + Record.clear(); +} + +static void WriteDIMacroFile(const DIMacroFile *N, const ValueEnumerator &VE, + BitstreamWriter &Stream, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(N->getMacinfoType()); + Record.push_back(N->getLine()); + Record.push_back(VE.getMetadataOrNullID(N->getFile())); + Record.push_back(VE.getMetadataOrNullID(N->getElements().get())); + + Stream.EmitRecord(bitc::METADATA_MACRO_FILE, Record, Abbrev); + Record.clear(); +} + static void WriteDIModule(const DIModule *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { @@ -1100,7 +1190,6 @@ static void WriteDILocalVariable(const DILocalVariable *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); - Record.push_back(N->getTag()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); @@ -1310,16 +1399,15 @@ static void WriteMetadataAttachment(const Function &F, Record.clear(); } - for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) { MDs.clear(); - I->getAllMetadataOtherThanDebugLoc(MDs); + I.getAllMetadataOtherThanDebugLoc(MDs); // If no metadata, ignore instruction. if (MDs.empty()) continue; - Record.push_back(VE.getInstructionID(I)); + Record.push_back(VE.getInstructionID(&I)); for (unsigned i = 0, e = MDs.size(); i != e; ++i) { Record.push_back(MDs[i].first); @@ -1342,7 +1430,7 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) { if (Names.empty()) return; - Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); + Stream.EnterSubblock(bitc::METADATA_KIND_BLOCK_ID, 3); for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) { Record.push_back(MDKindID); @@ -1356,6 +1444,33 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) { Stream.ExitBlock(); } +static void WriteOperandBundleTags(const Module *M, BitstreamWriter &Stream) { + // Write metadata kinds + // + // OPERAND_BUNDLE_TAGS_BLOCK_ID : N x OPERAND_BUNDLE_TAG + // + // OPERAND_BUNDLE_TAG - [strchr x N] + + SmallVector Tags; + M->getOperandBundleTags(Tags); + + if (Tags.empty()) + return; + + Stream.EnterSubblock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID, 3); + + SmallVector Record; + + for (auto Tag : Tags) { + Record.append(Tag.begin(), Tag.end()); + + Stream.EmitRecord(bitc::OPERAND_BUNDLE_TAG, Record, 0); + Record.clear(); + } + + Stream.ExitBlock(); +} + static void emitSignedInt64(SmallVectorImpl &Vals, uint64_t V) { if ((int64_t)V >= 0) Vals.push_back(V << 1); @@ -1664,6 +1779,23 @@ static bool PushValueAndType(const Value *V, unsigned InstID, return false; } +static void WriteOperandBundles(BitstreamWriter &Stream, ImmutableCallSite CS, + unsigned InstID, ValueEnumerator &VE) { + SmallVector Record; + LLVMContext &C = CS.getInstruction()->getContext(); + + for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) { + const auto &Bundle = CS.getOperandBundleAt(i); + Record.push_back(C.getOperandBundleTagID(Bundle.getTagName())); + + for (auto &Input : Bundle.Inputs) + PushValueAndType(Input, InstID, Record, VE); + + Stream.EmitRecord(bitc::FUNC_CODE_OPERAND_BUNDLE, Record); + Record.clear(); + } +} + /// pushValue - Like PushValueAndType, but where the type of the value is /// omitted (perhaps it was already encoded in an earlier operand). static void pushValue(const Value *V, unsigned InstID, @@ -1806,10 +1938,9 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Vals.push_back(VE.getTypeID(SI.getCondition()->getType())); pushValue(SI.getCondition(), InstID, Vals, VE); Vals.push_back(VE.getValueID(SI.getDefaultDest())); - for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); - i != e; ++i) { - Vals.push_back(VE.getValueID(i.getCaseValue())); - Vals.push_back(VE.getValueID(i.getCaseSuccessor())); + for (SwitchInst::ConstCaseIt Case : SI.cases()) { + Vals.push_back(VE.getValueID(Case.getCaseValue())); + Vals.push_back(VE.getValueID(Case.getCaseSuccessor())); } } break; @@ -1826,6 +1957,10 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, const InvokeInst *II = cast(&I); const Value *Callee = II->getCalledValue(); FunctionType *FTy = II->getFunctionType(); + + if (II->hasOperandBundles()) + WriteOperandBundles(Stream, II, InstID, VE); + Code = bitc::FUNC_CODE_INST_INVOKE; Vals.push_back(VE.getAttributeID(II->getAttributes())); @@ -1851,6 +1986,49 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Code = bitc::FUNC_CODE_INST_RESUME; PushValueAndType(I.getOperand(0), InstID, Vals, VE); break; + case Instruction::CleanupRet: { + Code = bitc::FUNC_CODE_INST_CLEANUPRET; + const auto &CRI = cast(I); + pushValue(CRI.getCleanupPad(), InstID, Vals, VE); + if (CRI.hasUnwindDest()) + Vals.push_back(VE.getValueID(CRI.getUnwindDest())); + break; + } + case Instruction::CatchRet: { + Code = bitc::FUNC_CODE_INST_CATCHRET; + const auto &CRI = cast(I); + pushValue(CRI.getCatchPad(), InstID, Vals, VE); + Vals.push_back(VE.getValueID(CRI.getSuccessor())); + break; + } + case Instruction::CleanupPad: + case Instruction::CatchPad: { + const auto &FuncletPad = cast(I); + Code = isa(FuncletPad) ? bitc::FUNC_CODE_INST_CATCHPAD + : bitc::FUNC_CODE_INST_CLEANUPPAD; + pushValue(FuncletPad.getParentPad(), InstID, Vals, VE); + + unsigned NumArgOperands = FuncletPad.getNumArgOperands(); + Vals.push_back(NumArgOperands); + for (unsigned Op = 0; Op != NumArgOperands; ++Op) + PushValueAndType(FuncletPad.getArgOperand(Op), InstID, Vals, VE); + break; + } + case Instruction::CatchSwitch: { + Code = bitc::FUNC_CODE_INST_CATCHSWITCH; + const auto &CatchSwitch = cast(I); + + pushValue(CatchSwitch.getParentPad(), InstID, Vals, VE); + + unsigned NumHandlers = CatchSwitch.getNumHandlers(); + Vals.push_back(NumHandlers); + for (const BasicBlock *CatchPadBB : CatchSwitch.handlers()) + Vals.push_back(VE.getValueID(CatchPadBB)); + + if (CatchSwitch.hasUnwindDest()) + Vals.push_back(VE.getValueID(CatchSwitch.getUnwindDest())); + break; + } case Instruction::Unreachable: Code = bitc::FUNC_CODE_INST_UNREACHABLE; AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV; @@ -1902,6 +2080,8 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, assert(AlignRecord < 1 << 5 && "alignment greater than 1 << 64"); AlignRecord |= AI.isUsedWithInAlloca() << 5; AlignRecord |= 1 << 6; + // Reserve bit 7 for SwiftError flag. + // AlignRecord |= AI.isSwiftError() << 7; Vals.push_back(AlignRecord); break; } @@ -1971,11 +2151,23 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, const CallInst &CI = cast(I); FunctionType *FTy = CI.getFunctionType(); + if (CI.hasOperandBundles()) + WriteOperandBundles(Stream, &CI, InstID, VE); + Code = bitc::FUNC_CODE_INST_CALL; Vals.push_back(VE.getAttributeID(CI.getAttributes())); - Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()) | - unsigned(CI.isMustTailCall()) << 14 | 1 << 15); + + unsigned Flags = GetOptimizationFlags(&I); + Vals.push_back(CI.getCallingConv() << bitc::CALL_CCONV | + unsigned(CI.isTailCall()) << bitc::CALL_TAIL | + unsigned(CI.isMustTailCall()) << bitc::CALL_MUSTTAIL | + 1 << bitc::CALL_EXPLICIT_TYPE | + unsigned(CI.isNoTailCall()) << bitc::CALL_NOTAIL | + unsigned(Flags != 0) << bitc::CALL_FMF); + if (Flags != 0) + Vals.push_back(Flags); + Vals.push_back(VE.getTypeID(FTy)); PushValueAndType(CI.getCalledValue(), InstID, Vals, VE); // Callee @@ -2008,56 +2200,149 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Vals.clear(); } -// Emit names for globals/functions etc. -static void WriteValueSymbolTable(const ValueSymbolTable &VST, - const ValueEnumerator &VE, - BitstreamWriter &Stream) { - if (VST.empty()) return; +enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 }; + +/// Determine the encoding to use for the given string name and length. +static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) { + bool isChar6 = true; + for (const char *C = Str, *E = C + StrLen; C != E; ++C) { + if (isChar6) + isChar6 = BitCodeAbbrevOp::isChar6(*C); + if ((unsigned char)*C & 128) + // don't bother scanning the rest. + return SE_Fixed8; + } + if (isChar6) + return SE_Char6; + else + return SE_Fixed7; +} + +/// Emit names for globals/functions etc. The VSTOffsetPlaceholder, +/// BitcodeStartBit and FunctionIndex are only passed for the module-level +/// VST, where we are including a function bitcode index and need to +/// backpatch the VST forward declaration record. +static void WriteValueSymbolTable( + const ValueSymbolTable &VST, const ValueEnumerator &VE, + BitstreamWriter &Stream, uint64_t VSTOffsetPlaceholder = 0, + uint64_t BitcodeStartBit = 0, + DenseMap> *FunctionIndex = + nullptr) { + if (VST.empty()) { + // WriteValueSymbolTableForwardDecl should have returned early as + // well. Ensure this handling remains in sync by asserting that + // the placeholder offset is not set. + assert(VSTOffsetPlaceholder == 0); + return; + } + + if (VSTOffsetPlaceholder > 0) { + // Get the offset of the VST we are writing, and backpatch it into + // the VST forward declaration record. + uint64_t VSTOffset = Stream.GetCurrentBitNo(); + // The BitcodeStartBit was the stream offset of the actual bitcode + // (e.g. excluding any initial darwin header). + VSTOffset -= BitcodeStartBit; + assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned"); + Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32); + } + Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); + // For the module-level VST, add abbrev Ids for the VST_CODE_FNENTRY + // records, which are not used in the per-function VSTs. + unsigned FnEntry8BitAbbrev; + unsigned FnEntry7BitAbbrev; + unsigned FnEntry6BitAbbrev; + if (VSTOffsetPlaceholder > 0) { + // 8-bit fixed-width VST_FNENTRY function strings. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); + FnEntry8BitAbbrev = Stream.EmitAbbrev(Abbv); + + // 7-bit fixed width VST_FNENTRY function strings. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); + FnEntry7BitAbbrev = Stream.EmitAbbrev(Abbv); + + // 6-bit char6 VST_FNENTRY function strings. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv); + } + // FIXME: Set up the abbrev, we know how many values there are! // FIXME: We know if the type names can use 7-bit ascii. SmallVector NameVals; - for (ValueSymbolTable::const_iterator SI = VST.begin(), SE = VST.end(); - SI != SE; ++SI) { - - const ValueName &Name = *SI; - + for (const ValueName &Name : VST) { // Figure out the encoding to use for the name. - bool is7Bit = true; - bool isChar6 = true; - for (const char *C = Name.getKeyData(), *E = C+Name.getKeyLength(); - C != E; ++C) { - if (isChar6) - isChar6 = BitCodeAbbrevOp::isChar6(*C); - if ((unsigned char)*C & 128) { - is7Bit = false; - break; // don't bother scanning the rest. - } - } + StringEncoding Bits = + getStringEncoding(Name.getKeyData(), Name.getKeyLength()); unsigned AbbrevToUse = VST_ENTRY_8_ABBREV; + NameVals.push_back(VE.getValueID(Name.getValue())); + + Function *F = dyn_cast(Name.getValue()); + if (!F) { + // If value is an alias, need to get the aliased base object to + // see if it is a function. + auto *GA = dyn_cast(Name.getValue()); + if (GA && GA->getBaseObject()) + F = dyn_cast(GA->getBaseObject()); + } // VST_ENTRY: [valueid, namechar x N] + // VST_FNENTRY: [valueid, funcoffset, namechar x N] // VST_BBENTRY: [bbid, namechar x N] unsigned Code; - if (isa(SI->getValue())) { + if (isa(Name.getValue())) { Code = bitc::VST_CODE_BBENTRY; - if (isChar6) + if (Bits == SE_Char6) AbbrevToUse = VST_BBENTRY_6_ABBREV; + } else if (F && !F->isDeclaration()) { + // Must be the module-level VST, where we pass in the Index and + // have a VSTOffsetPlaceholder. The function-level VST should not + // contain any Function symbols. + assert(FunctionIndex); + assert(VSTOffsetPlaceholder > 0); + + // Save the word offset of the function (from the start of the + // actual bitcode written to the stream). + assert(FunctionIndex->count(F) == 1); + uint64_t BitcodeIndex = + (*FunctionIndex)[F]->bitcodeIndex() - BitcodeStartBit; + assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned"); + NameVals.push_back(BitcodeIndex / 32); + + Code = bitc::VST_CODE_FNENTRY; + AbbrevToUse = FnEntry8BitAbbrev; + if (Bits == SE_Char6) + AbbrevToUse = FnEntry6BitAbbrev; + else if (Bits == SE_Fixed7) + AbbrevToUse = FnEntry7BitAbbrev; } else { Code = bitc::VST_CODE_ENTRY; - if (isChar6) + if (Bits == SE_Char6) AbbrevToUse = VST_ENTRY_6_ABBREV; - else if (is7Bit) + else if (Bits == SE_Fixed7) AbbrevToUse = VST_ENTRY_7_ABBREV; } - NameVals.push_back(VE.getValueID(SI->getValue())); - for (const char *P = Name.getKeyData(), - *E = Name.getKeyData()+Name.getKeyLength(); P != E; ++P) - NameVals.push_back((unsigned char)*P); + for (const auto P : Name.getKey()) + NameVals.push_back((unsigned char)P); // Emit the finished record. Stream.EmitRecord(Code, NameVals, AbbrevToUse); @@ -2066,6 +2351,66 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST, Stream.ExitBlock(); } +/// Emit function names and summary offsets for the combined index +/// used by ThinLTO. +static void WriteCombinedValueSymbolTable(const FunctionInfoIndex &Index, + BitstreamWriter &Stream) { + Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); + + // 8-bit fixed-width VST_COMBINED_FNENTRY function strings. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); + unsigned FnEntry8BitAbbrev = Stream.EmitAbbrev(Abbv); + + // 7-bit fixed width VST_COMBINED_FNENTRY function strings. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); + unsigned FnEntry7BitAbbrev = Stream.EmitAbbrev(Abbv); + + // 6-bit char6 VST_COMBINED_FNENTRY function strings. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + unsigned FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv); + + // FIXME: We know if the type names can use 7-bit ascii. + SmallVector NameVals; + + for (const auto &FII : Index) { + for (const auto &FI : FII.getValue()) { + NameVals.push_back(FI->bitcodeIndex()); + + StringRef FuncName = FII.first(); + + // Figure out the encoding to use for the name. + StringEncoding Bits = getStringEncoding(FuncName.data(), FuncName.size()); + + // VST_COMBINED_FNENTRY: [funcsumoffset, namechar x N] + unsigned AbbrevToUse = FnEntry8BitAbbrev; + if (Bits == SE_Char6) + AbbrevToUse = FnEntry6BitAbbrev; + else if (Bits == SE_Fixed7) + AbbrevToUse = FnEntry7BitAbbrev; + + for (const auto P : FuncName) + NameVals.push_back((unsigned char)P); + + // Emit the finished record. + Stream.EmitRecord(bitc::VST_CODE_COMBINED_FNENTRY, NameVals, AbbrevToUse); + NameVals.clear(); + } + } + Stream.ExitBlock(); +} + static void WriteUseList(ValueEnumerator &VE, UseListOrder &&Order, BitstreamWriter &Stream) { assert(Order.Shuffle.size() >= 2 && "Shuffle too small"); @@ -2100,9 +2445,34 @@ static void WriteUseListBlock(const Function *F, ValueEnumerator &VE, Stream.ExitBlock(); } -/// WriteFunction - Emit a function body to the module stream. -static void WriteFunction(const Function &F, ValueEnumerator &VE, - BitstreamWriter &Stream) { +/// \brief Save information for the given function into the function index. +/// +/// At a minimum this saves the bitcode index of the function record that +/// was just written. However, if we are emitting function summary information, +/// for example for ThinLTO, then a \a FunctionSummary object is created +/// to hold the provided summary information. +static void SaveFunctionInfo( + const Function &F, + DenseMap> &FunctionIndex, + unsigned NumInsts, uint64_t BitcodeIndex, bool EmitFunctionSummary) { + std::unique_ptr FuncSummary; + if (EmitFunctionSummary) { + FuncSummary = llvm::make_unique(NumInsts); + FuncSummary->setLocalFunction(F.hasLocalLinkage()); + } + FunctionIndex[&F] = + llvm::make_unique(BitcodeIndex, std::move(FuncSummary)); +} + +/// Emit a function body to the module stream. +static void WriteFunction( + const Function &F, ValueEnumerator &VE, BitstreamWriter &Stream, + DenseMap> &FunctionIndex, + bool EmitFunctionSummary) { + // Save the bitcode index of the start of this function block for recording + // in the VST. + uint64_t BitcodeIndex = Stream.GetCurrentBitNo(); + Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4); VE.incorporateFunction(F); @@ -2128,6 +2498,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, bool NeedsMetadataAttachment = F.hasMetadata(); DILocation *LastDL = nullptr; + unsigned NumInsts = 0; // Finally, emit all the instructions, in order. for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) @@ -2135,6 +2506,9 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, I != E; ++I) { WriteInstruction(*I, InstID, VE, Stream, Vals); + if (!isa(I)) + ++NumInsts; + if (!I->getType()->isVoidTy()) ++InstID; @@ -2171,6 +2545,9 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, WriteUseListBlock(&F, VE, Stream); VE.purgeFunction(); Stream.ExitBlock(); + + SaveFunctionInfo(F, FunctionIndex, NumInsts, BitcodeIndex, + EmitFunctionSummary); } // Emit blockinfo, which defines the standard abbreviations etc. @@ -2348,9 +2725,183 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { Stream.ExitBlock(); } +/// Write the module path strings, currently only used when generating +/// a combined index file. +static void WriteModStrings(const FunctionInfoIndex &I, + BitstreamWriter &Stream) { + Stream.EnterSubblock(bitc::MODULE_STRTAB_BLOCK_ID, 3); + + // TODO: See which abbrev sizes we actually need to emit + + // 8-bit fixed-width MST_ENTRY strings. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); + unsigned Abbrev8Bit = Stream.EmitAbbrev(Abbv); + + // 7-bit fixed width MST_ENTRY strings. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); + unsigned Abbrev7Bit = Stream.EmitAbbrev(Abbv); + + // 6-bit char6 MST_ENTRY strings. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv); + + SmallVector NameVals; + for (const StringMapEntry &MPSE : I.modPathStringEntries()) { + StringEncoding Bits = + getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size()); + unsigned AbbrevToUse = Abbrev8Bit; + if (Bits == SE_Char6) + AbbrevToUse = Abbrev6Bit; + else if (Bits == SE_Fixed7) + AbbrevToUse = Abbrev7Bit; + + NameVals.push_back(MPSE.getValue()); + + for (const auto P : MPSE.getKey()) + NameVals.push_back((unsigned char)P); + + // Emit the finished record. + Stream.EmitRecord(bitc::MST_CODE_ENTRY, NameVals, AbbrevToUse); + NameVals.clear(); + } + Stream.ExitBlock(); +} + +// Helper to emit a single function summary record. +static void WritePerModuleFunctionSummaryRecord( + SmallVector &NameVals, FunctionSummary *FS, unsigned ValueID, + unsigned FSAbbrev, BitstreamWriter &Stream) { + assert(FS); + NameVals.push_back(ValueID); + NameVals.push_back(FS->isLocalFunction()); + NameVals.push_back(FS->instCount()); + + // Emit the finished record. + Stream.EmitRecord(bitc::FS_CODE_PERMODULE_ENTRY, NameVals, FSAbbrev); + NameVals.clear(); +} + +/// Emit the per-module function summary section alongside the rest of +/// the module's bitcode. +static void WritePerModuleFunctionSummary( + DenseMap> &FunctionIndex, + const Module *M, const ValueEnumerator &VE, BitstreamWriter &Stream) { + Stream.EnterSubblock(bitc::FUNCTION_SUMMARY_BLOCK_ID, 3); + + // Abbrev for FS_CODE_PERMODULE_ENTRY. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::FS_CODE_PERMODULE_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // islocal + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount + unsigned FSAbbrev = Stream.EmitAbbrev(Abbv); + + SmallVector NameVals; + for (auto &I : FunctionIndex) { + // Skip anonymous functions. We will emit a function summary for + // any aliases below. + if (!I.first->hasName()) + continue; + + WritePerModuleFunctionSummaryRecord( + NameVals, I.second->functionSummary(), + VE.getValueID(M->getValueSymbolTable().lookup(I.first->getName())), + FSAbbrev, Stream); + } + + for (const GlobalAlias &A : M->aliases()) { + if (!A.getBaseObject()) + continue; + const Function *F = dyn_cast(A.getBaseObject()); + if (!F || F->isDeclaration()) + continue; + + assert(FunctionIndex.count(F) == 1); + WritePerModuleFunctionSummaryRecord( + NameVals, FunctionIndex[F]->functionSummary(), + VE.getValueID(M->getValueSymbolTable().lookup(A.getName())), FSAbbrev, + Stream); + } + + Stream.ExitBlock(); +} + +/// Emit the combined function summary section into the combined index +/// file. +static void WriteCombinedFunctionSummary(const FunctionInfoIndex &I, + BitstreamWriter &Stream) { + Stream.EnterSubblock(bitc::FUNCTION_SUMMARY_BLOCK_ID, 3); + + // Abbrev for FS_CODE_COMBINED_ENTRY. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::FS_CODE_COMBINED_ENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount + unsigned FSAbbrev = Stream.EmitAbbrev(Abbv); + + SmallVector NameVals; + for (const auto &FII : I) { + for (auto &FI : FII.getValue()) { + FunctionSummary *FS = FI->functionSummary(); + assert(FS); + + NameVals.push_back(I.getModuleId(FS->modulePath())); + NameVals.push_back(FS->instCount()); + + // Record the starting offset of this summary entry for use + // in the VST entry. Add the current code size since the + // reader will invoke readRecord after the abbrev id read. + FI->setBitcodeIndex(Stream.GetCurrentBitNo() + Stream.GetAbbrevIDWidth()); + + // Emit the finished record. + Stream.EmitRecord(bitc::FS_CODE_COMBINED_ENTRY, NameVals, FSAbbrev); + NameVals.clear(); + } + } + + Stream.ExitBlock(); +} + +// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the +// current llvm version, and a record for the epoch number. +static void WriteIdentificationBlock(const Module *M, BitstreamWriter &Stream) { + Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5); + + // Write the "user readable" string identifying the bitcode producer + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_STRING)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + auto StringAbbrev = Stream.EmitAbbrev(Abbv); + WriteStringRecord(bitc::IDENTIFICATION_CODE_STRING, + "LLVM" LLVM_VERSION_STRING, StringAbbrev, Stream); + + // Write the epoch version + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_EPOCH)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + auto EpochAbbrev = Stream.EmitAbbrev(Abbv); + SmallVector Vals = {bitc::BITCODE_CURRENT_EPOCH}; + Stream.EmitRecord(bitc::IDENTIFICATION_CODE_EPOCH, Vals, EpochAbbrev); + Stream.ExitBlock(); +} + /// WriteModule - Emit the specified module to the bitstream. static void WriteModule(const Module *M, BitstreamWriter &Stream, - bool ShouldPreserveUseListOrder) { + bool ShouldPreserveUseListOrder, + uint64_t BitcodeStartBit, bool EmitFunctionSummary) { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); SmallVector Vals; @@ -2377,7 +2928,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream, // Emit top-level description of module, including target triple, inline asm, // descriptors for global variables, and function prototype info. - WriteModuleInfo(M, VE, Stream); + uint64_t VSTOffsetPlaceholder = WriteModuleInfo(M, VE, Stream); // Emit constants. WriteModuleConstants(VE, Stream); @@ -2388,17 +2939,25 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream, // Emit metadata. WriteModuleMetadataStore(M, Stream); - // Emit names for globals/functions etc. - WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream); - // Emit module-level use-lists. if (VE.shouldPreserveUseListOrder()) WriteUseListBlock(nullptr, VE, Stream); + WriteOperandBundleTags(M, Stream); + // Emit function bodies. + DenseMap> FunctionIndex; for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) if (!F->isDeclaration()) - WriteFunction(*F, VE, Stream); + WriteFunction(*F, VE, Stream, FunctionIndex, EmitFunctionSummary); + + // Need to write after the above call to WriteFunction which populates + // the summary information in the index. + if (EmitFunctionSummary) + WritePerModuleFunctionSummary(FunctionIndex, M, VE, Stream); + + WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream, + VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex); Stream.ExitBlock(); } @@ -2473,10 +3032,22 @@ static void EmitDarwinBCHeaderAndTrailer(SmallVectorImpl &Buffer, Buffer.push_back(0); } +/// Helper to write the header common to all bitcode files. +static void WriteBitcodeHeader(BitstreamWriter &Stream) { + // Emit the file header. + Stream.Emit((unsigned)'B', 8); + Stream.Emit((unsigned)'C', 8); + Stream.Emit(0x0, 4); + Stream.Emit(0xC, 4); + Stream.Emit(0xE, 4); + Stream.Emit(0xD, 4); +} + /// WriteBitcodeToFile - Write the specified module to the specified output /// stream. void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, - bool ShouldPreserveUseListOrder) { + bool ShouldPreserveUseListOrder, + bool EmitFunctionSummary) { SmallVector Buffer; Buffer.reserve(256*1024); @@ -2489,17 +3060,20 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, // Emit the module into the buffer. { BitstreamWriter Stream(Buffer); + // Save the start bit of the actual bitcode, in case there is space + // saved at the start for the darwin header above. The reader stream + // will start at the bitcode, and we need the offset of the VST + // to line up. + uint64_t BitcodeStartBit = Stream.GetCurrentBitNo(); // Emit the file header. - Stream.Emit((unsigned)'B', 8); - Stream.Emit((unsigned)'C', 8); - Stream.Emit(0x0, 4); - Stream.Emit(0xC, 4); - Stream.Emit(0xE, 4); - Stream.Emit(0xD, 4); + WriteBitcodeHeader(Stream); + + WriteIdentificationBlock(M, Stream); // Emit the module. - WriteModule(M, Stream, ShouldPreserveUseListOrder); + WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit, + EmitFunctionSummary); } if (TT.isOSDarwin()) @@ -2508,3 +3082,38 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, // Write the generated bitstream to "Out". Out.write((char*)&Buffer.front(), Buffer.size()); } + +// Write the specified function summary index to the given raw output stream, +// where it will be written in a new bitcode block. This is used when +// writing the combined index file for ThinLTO. +void llvm::WriteFunctionSummaryToFile(const FunctionInfoIndex &Index, + raw_ostream &Out) { + SmallVector Buffer; + Buffer.reserve(256 * 1024); + + BitstreamWriter Stream(Buffer); + + // Emit the bitcode header. + WriteBitcodeHeader(Stream); + + Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); + + SmallVector Vals; + unsigned CurVersion = 1; + Vals.push_back(CurVersion); + Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); + + // Write the module paths in the combined index. + WriteModStrings(Index, Stream); + + // Write the function summary combined index records. + WriteCombinedFunctionSummary(Index, Stream); + + // Need a special VST writer for the combined index (we don't have a + // real VST and real values when this is invoked). + WriteCombinedValueSymbolTable(Index, Stream); + + Stream.ExitBlock(); + + Out.write((char *)&Buffer.front(), Buffer.size()); +} diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp index 3165743576ec..24de99a34d33 100644 --- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp +++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp @@ -19,7 +19,7 @@ using namespace llvm; PreservedAnalyses BitcodeWriterPass::run(Module &M) { - WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder); + WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder, EmitFunctionSummary); return PreservedAnalyses::all(); } @@ -27,17 +27,21 @@ namespace { class WriteBitcodePass : public ModulePass { raw_ostream &OS; // raw_ostream to print on bool ShouldPreserveUseListOrder; + bool EmitFunctionSummary; public: static char ID; // Pass identification, replacement for typeid - explicit WriteBitcodePass(raw_ostream &o, bool ShouldPreserveUseListOrder) + explicit WriteBitcodePass(raw_ostream &o, bool ShouldPreserveUseListOrder, + bool EmitFunctionSummary) : ModulePass(ID), OS(o), - ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {} + ShouldPreserveUseListOrder(ShouldPreserveUseListOrder), + EmitFunctionSummary(EmitFunctionSummary) {} const char *getPassName() const override { return "Bitcode Writer"; } bool runOnModule(Module &M) override { - WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder); + WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder, + EmitFunctionSummary); return false; } }; @@ -46,6 +50,8 @@ namespace { char WriteBitcodePass::ID = 0; ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str, - bool ShouldPreserveUseListOrder) { - return new WriteBitcodePass(Str, ShouldPreserveUseListOrder); + bool ShouldPreserveUseListOrder, + bool EmitFunctionSummary) { + return new WriteBitcodePass(Str, ShouldPreserveUseListOrder, + EmitFunctionSummary); } diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 44dd604f8823..e07563b5a390 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -87,15 +87,9 @@ static OrderMap orderModule(const Module &M) { if (!isa(A.getAliasee())) orderValue(A.getAliasee(), OM); for (const Function &F : M) { - if (F.hasPrefixData()) - if (!isa(F.getPrefixData())) - orderValue(F.getPrefixData(), OM); - if (F.hasPrologueData()) - if (!isa(F.getPrologueData())) - orderValue(F.getPrologueData(), OM); - if (F.hasPersonalityFn()) - if (!isa(F.getPersonalityFn())) - orderValue(F.getPersonalityFn(), OM); + for (const Use &U : F.operands()) + if (!isa(U.get())) + orderValue(U.get(), OM); } OM.LastGlobalConstantID = OM.size(); @@ -273,12 +267,8 @@ static UseListOrderStack predictUseListOrder(const Module &M) { for (const GlobalAlias &A : M.aliases()) predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack); for (const Function &F : M) { - if (F.hasPrefixData()) - predictValueUseListOrder(F.getPrefixData(), nullptr, OM, Stack); - if (F.hasPrologueData()) - predictValueUseListOrder(F.getPrologueData(), nullptr, OM, Stack); - if (F.hasPersonalityFn()) - predictValueUseListOrder(F.getPersonalityFn(), nullptr, OM, Stack); + for (const Use &U : F.operands()) + predictValueUseListOrder(U.get(), nullptr, OM, Stack); } return Stack; @@ -321,20 +311,10 @@ ValueEnumerator::ValueEnumerator(const Module &M, for (const GlobalAlias &GA : M.aliases()) EnumerateValue(GA.getAliasee()); - // Enumerate the prefix data constants. + // Enumerate any optional Function data. for (const Function &F : M) - if (F.hasPrefixData()) - EnumerateValue(F.getPrefixData()); - - // Enumerate the prologue data constants. - for (const Function &F : M) - if (F.hasPrologueData()) - EnumerateValue(F.getPrologueData()); - - // Enumerate the personality functions. - for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) - if (I->hasPersonalityFn()) - EnumerateValue(I->getPersonalityFn()); + for (const Use &U : F.operands()) + EnumerateValue(U.get()); // Enumerate the metadata type. // @@ -425,7 +405,7 @@ unsigned ValueEnumerator::getValueID(const Value *V) const { void ValueEnumerator::dump() const { print(dbgs(), ValueMap, "Default"); dbgs() << '\n'; - print(dbgs(), MDValueMap, "MetaData"); + print(dbgs(), MetadataMap, "MetaData"); dbgs() << '\n'; } @@ -512,10 +492,8 @@ void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) { /// Insert all of the values referenced by named metadata in the specified /// module. void ValueEnumerator::EnumerateNamedMetadata(const Module &M) { - for (Module::const_named_metadata_iterator I = M.named_metadata_begin(), - E = M.named_metadata_end(); - I != E; ++I) - EnumerateNamedMDNode(I); + for (const auto &I : M.named_metadata()) + EnumerateNamedMDNode(&I); } void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) { @@ -544,7 +522,7 @@ void ValueEnumerator::EnumerateMetadata(const Metadata *MD) { // EnumerateMDNodeOperands() from re-visiting MD in a cyclic graph. // // Return early if there's already an ID. - if (!MDValueMap.insert(std::make_pair(MD, 0)).second) + if (!MetadataMap.insert(std::make_pair(MD, 0)).second) return; // Visit operands first to minimize RAUW. @@ -557,10 +535,10 @@ void ValueEnumerator::EnumerateMetadata(const Metadata *MD) { HasDILocation |= isa(MD); HasGenericDINode |= isa(MD); - // Replace the dummy ID inserted above with the correct one. MDValueMap may + // Replace the dummy ID inserted above with the correct one. MetadataMap may // have changed by inserting operands, so we need a fresh lookup here. MDs.push_back(MD); - MDValueMap[MD] = MDs.size(); + MetadataMap[MD] = MDs.size(); } /// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata @@ -568,12 +546,12 @@ void ValueEnumerator::EnumerateMetadata(const Metadata *MD) { void ValueEnumerator::EnumerateFunctionLocalMetadata( const LocalAsMetadata *Local) { // Check to see if it's already in! - unsigned &MDValueID = MDValueMap[Local]; - if (MDValueID) + unsigned &MetadataID = MetadataMap[Local]; + if (MetadataID) return; MDs.push_back(Local); - MDValueID = MDs.size(); + MetadataID = MDs.size(); EnumerateValue(Local->getValue()); @@ -729,23 +707,20 @@ void ValueEnumerator::incorporateFunction(const Function &F) { NumModuleMDs = MDs.size(); // Adding function arguments to the value table. - for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); - I != E; ++I) - EnumerateValue(I); + for (const auto &I : F.args()) + EnumerateValue(&I); FirstFuncConstantID = Values.size(); // Add all function-level constants to the value table. - for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) - for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); - OI != E; ++OI) { - if ((isa(*OI) && !isa(*OI)) || - isa(*OI)) - EnumerateValue(*OI); + for (const BasicBlock &BB : F) { + for (const Instruction &I : BB) + for (const Use &OI : I.operands()) { + if ((isa(OI) && !isa(OI)) || isa(OI)) + EnumerateValue(OI); } - BasicBlocks.push_back(BB); - ValueMap[BB] = BasicBlocks.size(); + BasicBlocks.push_back(&BB); + ValueMap[&BB] = BasicBlocks.size(); } // Optimize the constant layout. @@ -759,18 +734,17 @@ void ValueEnumerator::incorporateFunction(const Function &F) { SmallVector FnLocalMDVector; // Add all of the instructions. - for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) { - for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); - OI != E; ++OI) { - if (auto *MD = dyn_cast(&*OI)) + for (const BasicBlock &BB : F) { + for (const Instruction &I : BB) { + for (const Use &OI : I.operands()) { + if (auto *MD = dyn_cast(&OI)) if (auto *Local = dyn_cast(MD->getMetadata())) // Enumerate metadata after the instructions they might refer to. FnLocalMDVector.push_back(Local); } - if (!I->getType()->isVoidTy()) - EnumerateValue(I); + if (!I.getType()->isVoidTy()) + EnumerateValue(&I); } } @@ -784,7 +758,7 @@ void ValueEnumerator::purgeFunction() { for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i) ValueMap.erase(Values[i].first); for (unsigned i = NumModuleMDs, e = MDs.size(); i != e; ++i) - MDValueMap.erase(MDs[i]); + MetadataMap.erase(MDs[i]); for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i) ValueMap.erase(BasicBlocks[i]); @@ -797,8 +771,8 @@ void ValueEnumerator::purgeFunction() { static void IncorporateFunctionInfoGlobalBBIDs(const Function *F, DenseMap &IDMap) { unsigned Counter = 0; - for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - IDMap[BB] = ++Counter; + for (const BasicBlock &BB : *F) + IDMap[&BB] = ++Counter; } /// getGlobalBasicBlockID - This returns the function-specific ID for the diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 92d166e3ba92..9fb8325150e9 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -63,7 +63,7 @@ private: std::vector MDs; SmallVector FunctionLocalMDs; typedef DenseMap MetadataMapType; - MetadataMapType MDValueMap; + MetadataMapType MetadataMap; bool HasMDString; bool HasDILocation; bool HasGenericDINode; @@ -93,7 +93,7 @@ private: /// before incorporation. unsigned NumModuleValues; - /// When a function is incorporated, this is the size of the MDValues list + /// When a function is incorporated, this is the size of the Metadatas list /// before incorporation. unsigned NumModuleMDs; @@ -117,8 +117,9 @@ public: return ID - 1; } unsigned getMetadataOrNullID(const Metadata *MD) const { - return MDValueMap.lookup(MD); + return MetadataMap.lookup(MD); } + unsigned numMDs() const { return MDs.size(); } bool hasMDString() const { return HasMDString; } bool hasDILocation() const { return HasDILocation; } diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 5fe4c4bcaec4..4060db74a9b7 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -142,16 +142,15 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { assert(!State); State = new AggressiveAntiDepState(TRI->getNumRegs(), BB); - bool IsReturnBlock = (!BB->empty() && BB->back().isReturn()); + bool IsReturnBlock = BB->isReturnBlock(); std::vector &KillIndices = State->GetKillIndices(); std::vector &DefIndices = State->GetDefIndices(); // Examine the live-in regs of all successors. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), - E = (*SI)->livein_end(); I != E; ++I) { - for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + for (const auto &LI : (*SI)->liveins()) { + for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) { unsigned Reg = *AI; State->UnionGroups(Reg, 0); KillIndices[Reg] = BB->size(); @@ -365,9 +364,11 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers - // defined in a call must not be changed (ABI). + // defined in a call must not be changed (ABI). Inline assembly may + // reference either system calls or the register directly. Skip it until we + // can tell user specified registers from compiler-specified. if (MI->isCall() || MI->hasExtraDefRegAllocReq() || - TII->isPredicated(MI)) { + TII->isPredicated(MI) || MI->isInlineAsm()) { DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); State->UnionGroups(Reg, 0); } @@ -429,6 +430,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // If MI's uses have special allocation requirement, don't allow // any use registers to be changed. Also assume all registers // used in a call must not be changed (ABI). + // Inline Assembly register uses also cannot be safely changed. // FIXME: The issue with predicated instruction is more complex. We are being // conservatively here because the kill markers cannot be trusted after // if-conversion: @@ -444,7 +446,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // changed. bool Special = MI->isCall() || MI->hasExtraSrcRegAllocReq() || - TII->isPredicated(MI); + TII->isPredicated(MI) || MI->isInlineAsm(); // Scan the register uses for this instruction and update // live-ranges, groups and RegRefs. @@ -509,15 +511,8 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) { // Check all references that need rewriting for Reg. For each, use // the corresponding register class to narrow the set of registers // that are appropriate for renaming. - std::pair::iterator, - std::multimap::iterator> - Range = State->GetRegRefs().equal_range(Reg); - for (std::multimap::iterator Q = Range.first, - QE = Range.second; Q != QE; ++Q) { - const TargetRegisterClass *RC = Q->second.RC; + for (const auto &Q : make_range(State->GetRegRefs().equal_range(Reg))) { + const TargetRegisterClass *RC = Q.second.RC; if (!RC) continue; BitVector RCBV = TRI->getAllocatableSet(MF, RC); @@ -685,9 +680,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // We cannot rename 'Reg' to 'NewReg' if one of the uses of 'Reg' also // defines 'NewReg' via an early-clobber operand. - auto Range = RegRefs.equal_range(Reg); - for (auto Q = Range.first, QE = Range.second; Q != QE; ++Q) { - auto UseMI = Q->second.Operand->getParent(); + for (const auto &Q : make_range(RegRefs.equal_range(Reg))) { + MachineInstr *UseMI = Q.second.Operand->getParent(); int Idx = UseMI->findRegisterDefOperandIdx(NewReg, false, true, TRI); if (Idx == -1) continue; @@ -698,6 +692,20 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( } } + // Also, we cannot rename 'Reg' to 'NewReg' if the instruction defining + // 'Reg' is an early-clobber define and that instruction also uses + // 'NewReg'. + for (const auto &Q : make_range(RegRefs.equal_range(Reg))) { + if (!Q.second.Operand->isDef() || !Q.second.Operand->isEarlyClobber()) + continue; + + MachineInstr *DefMI = Q.second.Operand->getParent(); + if (DefMI->readsRegister(NewReg, TRI)) { + DEBUG(dbgs() << "(ec)"); + goto next_super_reg; + } + } + // Record that 'Reg' can be renamed to 'NewReg'. RenameMap.insert(std::pair(Reg, NewReg)); } @@ -920,23 +928,16 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Update the references to the old register CurrReg to // refer to the new register NewReg. - std::pair::iterator, - std::multimap::iterator> - Range = RegRefs.equal_range(CurrReg); - for (std::multimap::iterator - Q = Range.first, QE = Range.second; Q != QE; ++Q) { - Q->second.Operand->setReg(NewReg); + for (const auto &Q : make_range(RegRefs.equal_range(CurrReg))) { + Q.second.Operand->setReg(NewReg); // If the SU for the instruction being updated has debug // information related to the anti-dependency register, make // sure to update that as well. - const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()]; + const SUnit *SU = MISUnitMap[Q.second.Operand->getParent()]; if (!SU) continue; for (DbgValueVector::iterator DVI = DbgValues.begin(), DVE = DbgValues.end(); DVI != DVE; ++DVI) - if (DVI->second == Q->second.Operand->getParent()) + if (DVI->second == Q.second.Operand->getParent()) UpdateDbgValue(DVI->first, AntiDepReg, NewReg); } diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp index dc9bcff56121..40451c0d6c19 100644 --- a/lib/CodeGen/AllocationOrder.cpp +++ b/lib/CodeGen/AllocationOrder.cpp @@ -29,12 +29,13 @@ using namespace llvm; // Compare VirtRegMap::getRegAllocPref(). AllocationOrder::AllocationOrder(unsigned VirtReg, const VirtRegMap &VRM, - const RegisterClassInfo &RegClassInfo) + const RegisterClassInfo &RegClassInfo, + const LiveRegMatrix *Matrix) : Pos(0) { const MachineFunction &MF = VRM.getMachineFunction(); const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo(); Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg)); - TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM); + TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix); rewind(); DEBUG({ diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h index 02b2d9250bc8..2aee3a63a2b1 100644 --- a/lib/CodeGen/AllocationOrder.h +++ b/lib/CodeGen/AllocationOrder.h @@ -24,6 +24,7 @@ namespace llvm { class RegisterClassInfo; class VirtRegMap; +class LiveRegMatrix; class LLVM_LIBRARY_VISIBILITY AllocationOrder { SmallVector Hints; @@ -37,7 +38,8 @@ public: /// @param RegClassInfo Information about reserved and allocatable registers. AllocationOrder(unsigned VirtReg, const VirtRegMap &VRM, - const RegisterClassInfo &RegClassInfo); + const RegisterClassInfo &RegClassInfo, + const LiveRegMatrix *Matrix); /// Get the allocation order without reordered hints. ArrayRef getOrder() const { return Order; } diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 98d4c8afc7b9..75579a2b4559 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -25,6 +26,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/GlobalStatus.h" @@ -515,7 +517,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { if (isa(BBI)) continue; if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || - !isSafeToSpeculativelyExecute(BBI)) + !isSafeToSpeculativelyExecute(&*BBI)) return false; } @@ -643,3 +645,97 @@ bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) { return !GS.IsCompared; } + +static void collectFuncletMembers( + DenseMap &FuncletMembership, int Funclet, + const MachineBasicBlock *MBB) { + // Add this MBB to our funclet. + auto P = FuncletMembership.insert(std::make_pair(MBB, Funclet)); + + // Don't revisit blocks. + if (!P.second) { + assert(P.first->second == Funclet && "MBB is part of two funclets!"); + return; + } + + bool IsReturn = false; + int NumTerminators = 0; + for (const MachineInstr &MI : MBB->terminators()) { + IsReturn |= MI.isReturn(); + ++NumTerminators; + } + assert((!IsReturn || NumTerminators == 1) && + "Expected only one terminator when a return is present!"); + + // Returns are boundaries where funclet transfer can occur, don't follow + // successors. + if (IsReturn) + return; + + for (const MachineBasicBlock *SMBB : MBB->successors()) + if (!SMBB->isEHPad()) + collectFuncletMembers(FuncletMembership, Funclet, SMBB); +} + +DenseMap +llvm::getFuncletMembership(const MachineFunction &MF) { + DenseMap FuncletMembership; + + // We don't have anything to do if there aren't any EH pads. + if (!MF.getMMI().hasEHFunclets()) + return FuncletMembership; + + int EntryBBNumber = MF.front().getNumber(); + bool IsSEH = isAsynchronousEHPersonality( + classifyEHPersonality(MF.getFunction()->getPersonalityFn())); + + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + SmallVector FuncletBlocks; + SmallVector UnreachableBlocks; + SmallVector SEHCatchPads; + SmallVector, 16> CatchRetSuccessors; + for (const MachineBasicBlock &MBB : MF) { + if (MBB.isEHFuncletEntry()) { + FuncletBlocks.push_back(&MBB); + } else if (IsSEH && MBB.isEHPad()) { + SEHCatchPads.push_back(&MBB); + } else if (MBB.pred_empty()) { + UnreachableBlocks.push_back(&MBB); + } + + MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator(); + // CatchPads are not funclets for SEH so do not consider CatchRet to + // transfer control to another funclet. + if (MBBI->getOpcode() != TII->getCatchReturnOpcode()) + continue; + + // FIXME: SEH CatchPads are not necessarily in the parent function: + // they could be inside a finally block. + const MachineBasicBlock *Successor = MBBI->getOperand(0).getMBB(); + const MachineBasicBlock *SuccessorColor = MBBI->getOperand(1).getMBB(); + CatchRetSuccessors.push_back( + {Successor, IsSEH ? EntryBBNumber : SuccessorColor->getNumber()}); + } + + // We don't have anything to do if there aren't any EH pads. + if (FuncletBlocks.empty()) + return FuncletMembership; + + // Identify all the basic blocks reachable from the function entry. + collectFuncletMembers(FuncletMembership, EntryBBNumber, &MF.front()); + // All blocks not part of a funclet are in the parent function. + for (const MachineBasicBlock *MBB : UnreachableBlocks) + collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB); + // Next, identify all the blocks inside the funclets. + for (const MachineBasicBlock *MBB : FuncletBlocks) + collectFuncletMembers(FuncletMembership, MBB->getNumber(), MBB); + // SEH CatchPads aren't really funclets, handle them separately. + for (const MachineBasicBlock *MBB : SEHCatchPads) + collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB); + // Finally, identify all the targets of a catchret. + for (std::pair CatchRetPair : + CatchRetSuccessors) + collectFuncletMembers(FuncletMembership, CatchRetPair.second, + CatchRetPair.first); + return FuncletMembership; +} diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 0bad7954b980..ade2d7105b88 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -73,7 +73,6 @@ void ARMException::endFunction(const MachineFunction *MF) { const Function *Per = nullptr; if (F->hasPersonalityFn()) Per = dyn_cast(F->getPersonalityFn()->stripPointerCasts()); - assert(!MMI->getPersonality() || Per == MMI->getPersonality()); bool forceEmitPersonality = F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && F->needsUnwindTableEntry(); @@ -115,9 +114,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) { Entry = TypeInfos.size(); } - for (std::vector::const_reverse_iterator - I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { - const GlobalValue *GV = *I; + for (const GlobalValue *GV : reverse(TypeInfos)) { if (VerboseAsm) Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--)); Asm->EmitTTypeReference(GV, TTypeEncoding); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 125047e7bbb5..be7eafbeb83d 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -135,11 +135,14 @@ const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { return *TM.getObjFileLowering(); } -/// getDataLayout - Return information about data layout. const DataLayout &AsmPrinter::getDataLayout() const { - return *TM.getDataLayout(); + return MMI->getModule()->getDataLayout(); } +// Do not use the cached DataLayout because some client use it without a Module +// (llmv-dsymutil, llvm-dwarfdump). +unsigned AsmPrinter::getPointerSize() const { return TM.getPointerSize(); } + const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const { assert(MF && "getSubtargetInfo requires a valid MachineFunction!"); return MF->getSubtarget(); @@ -193,10 +196,18 @@ bool AsmPrinter::doInitialization(Module &M) { unsigned Major, Minor, Update; TT.getOSVersion(Major, Minor, Update); // If there is a version specified, Major will be non-zero. - if (Major) - OutStreamer->EmitVersionMin((TT.isMacOSX() ? - MCVM_OSXVersionMin : MCVM_IOSVersionMin), - Major, Minor, Update); + if (Major) { + MCVersionMinType VersionType; + if (TT.isWatchOS()) + VersionType = MCVM_WatchOSVersionMin; + else if (TT.isTvOS()) + VersionType = MCVM_TvOSVersionMin; + else if (TT.isMacOSX()) + VersionType = MCVM_OSXVersionMin; + else + VersionType = MCVM_IOSVersionMin; + OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update); + } } // Allow the target to emit any magic that it wants at the start of the file. @@ -224,28 +235,20 @@ bool AsmPrinter::doInitialization(Module &M) { TM.getTargetFeatureString())); OutStreamer->AddComment("Start of file scope inline assembly"); OutStreamer->AddBlankLine(); - EmitInlineAsm(M.getModuleInlineAsm()+"\n", *STI, TM.Options.MCOptions); + EmitInlineAsm(M.getModuleInlineAsm()+"\n", + OutContext.getSubtargetCopy(*STI), TM.Options.MCOptions); OutStreamer->AddComment("End of file scope inline assembly"); OutStreamer->AddBlankLine(); } if (MAI->doesSupportDebugInformation()) { - bool skip_dwarf = false; - if (TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) { + bool EmitCodeView = MMI->getModule()->getCodeViewFlag(); + if (EmitCodeView && TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) { Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this), DbgTimerName, CodeViewLineTablesGroupName)); - // FIXME: Don't emit DWARF debug info if there's at least one function - // with AddressSanitizer instrumentation. - // This is a band-aid fix for PR22032. - for (auto &F : M.functions()) { - if (F.hasFnAttribute(Attribute::SanitizeAddress)) { - skip_dwarf = true; - break; - } - } } - if (!skip_dwarf) { + if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) { DD = new DwarfDebug(this, &M); Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName)); } @@ -340,8 +343,51 @@ MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const { return TM.getSymbol(GV, *Mang); } +static MCSymbol *getOrCreateEmuTLSControlSym(MCSymbol *GVSym, MCContext &C) { + return C.getOrCreateSymbol(Twine("__emutls_v.") + GVSym->getName()); +} + +static MCSymbol *getOrCreateEmuTLSInitSym(MCSymbol *GVSym, MCContext &C) { + return C.getOrCreateSymbol(Twine("__emutls_t.") + GVSym->getName()); +} + +/// EmitEmulatedTLSControlVariable - Emit the control variable for an emulated TLS variable. +void AsmPrinter::EmitEmulatedTLSControlVariable(const GlobalVariable *GV, + MCSymbol *EmittedSym, + bool AllZeroInitValue) { + MCSection *TLSVarSection = getObjFileLowering().getDataSection(); + OutStreamer->SwitchSection(TLSVarSection); + MCSymbol *GVSym = getSymbol(GV); + EmitLinkage(GV, EmittedSym); // same linkage as GV + const DataLayout &DL = GV->getParent()->getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType()); + unsigned AlignLog = getGVAlignmentLog2(GV, DL); + unsigned WordSize = DL.getPointerSize(); + unsigned Alignment = DL.getPointerABIAlignment(); + EmitAlignment(Log2_32(Alignment)); + OutStreamer->EmitLabel(EmittedSym); + OutStreamer->EmitIntValue(Size, WordSize); + OutStreamer->EmitIntValue((1 << AlignLog), WordSize); + OutStreamer->EmitIntValue(0, WordSize); + if (GV->hasInitializer() && !AllZeroInitValue) { + OutStreamer->EmitSymbolValue( + getOrCreateEmuTLSInitSym(GVSym, OutContext), WordSize); + } else + OutStreamer->EmitIntValue(0, WordSize); + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer->emitELFSize(cast(EmittedSym), + MCConstantExpr::create(4 * WordSize, OutContext)); + OutStreamer->AddBlankLine(); // End of the __emutls_v.* variable. +} + /// EmitGlobalVariable - Emit the specified global variable to the .s file. void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + bool IsEmuTLSVar = + GV->getThreadLocalMode() != llvm::GlobalVariable::NotThreadLocal && + TM.Options.EmulatedTLS; + assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) && + "No emulated TLS variables in the common section"); + if (GV->hasInitializer()) { // Check to see if this is a special global used by LLVM, if so, emit it. if (EmitSpecialLLVMGlobal(GV)) @@ -352,7 +398,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (GlobalGOTEquivs.count(getSymbol(GV))) return; - if (isVerbose()) { + if (isVerbose() && !IsEmuTLSVar) { + // When printing the control variable __emutls_v.*, + // we don't need to print the original TLS variable name. GV->printAsOperand(OutStreamer->GetCommentOS(), /*PrintType=*/false, GV->getParent()); OutStreamer->GetCommentOS() << '\n'; @@ -360,7 +408,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { } MCSymbol *GVSym = getSymbol(GV); - EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration()); + MCSymbol *EmittedSym = IsEmuTLSVar ? + getOrCreateEmuTLSControlSym(GVSym, OutContext) : GVSym; + // getOrCreateEmuTLSControlSym only creates the symbol with name and default attributes. + // GV's or GVSym's attributes will be used for the EmittedSym. + + EmitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration()); if (!GV->hasInitializer()) // External globals require no extra code. return; @@ -371,17 +424,29 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { "' is already defined"); if (MAI->hasDotTypeDotSizeDirective()) - OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); + OutStreamer->EmitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject); SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); - const DataLayout *DL = TM.getDataLayout(); - uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType()); + const DataLayout &DL = GV->getParent()->getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType()); // If the alignment is specified, we *must* obey it. Overaligning a global // with a specified alignment is a prompt way to break globals emitted to // sections and expected to be contiguous (e.g. ObjC metadata). - unsigned AlignLog = getGVAlignmentLog2(GV, *DL); + unsigned AlignLog = getGVAlignmentLog2(GV, DL); + + bool AllZeroInitValue = false; + const Constant *InitValue = GV->getInitializer(); + if (isa(InitValue)) + AllZeroInitValue = true; + else { + const ConstantInt *InitIntValue = dyn_cast(InitValue); + if (InitIntValue && InitIntValue->isZero()) + AllZeroInitValue = true; + } + if (IsEmuTLSVar) + EmitEmulatedTLSControlVariable(GV, EmittedSym, AllZeroInitValue); for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); @@ -390,6 +455,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Handle common and BSS local symbols (.lcomm). if (GVKind.isCommon() || GVKind.isBSSLocal()) { + assert(!(IsEmuTLSVar && GVKind.isCommon()) && + "No emulated TLS variables in the common section"); if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. unsigned Align = 1 << AlignLog; @@ -434,12 +501,21 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { return; } - MCSection *TheSection = + if (IsEmuTLSVar && AllZeroInitValue) + return; // No need of initialization values. + + MCSymbol *EmittedInitSym = IsEmuTLSVar ? + getOrCreateEmuTLSInitSym(GVSym, OutContext) : GVSym; + // getOrCreateEmuTLSInitSym only creates the symbol with name and default attributes. + // GV's or GVSym's attributes will be used for the EmittedInitSym. + + MCSection *TheSection = IsEmuTLSVar ? + getObjFileLowering().getReadOnlySection() : getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); // Handle the zerofill directive on darwin, which is a special form of BSS // emission. - if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) { + if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective() && !IsEmuTLSVar) { if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. // .globl _foo @@ -459,7 +535,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // TLOF class. This will also make it more obvious that stuff like // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho // specific code. - if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) { + if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective() && !IsEmuTLSVar) { // Emit the .tbss symbol MCSymbol *MangSym = OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); @@ -473,7 +549,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { EmitAlignment(AlignLog, GV); OutStreamer->EmitLabel(MangSym); - EmitGlobalConstant(GV->getInitializer()); + EmitGlobalConstant(GV->getParent()->getDataLayout(), + GV->getInitializer()); } OutStreamer->AddBlankLine(); @@ -490,7 +567,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // - __tlv_bootstrap - used to make sure support exists // - spare pointer, used when mapped by the runtime // - pointer to mangled symbol above with initializer - unsigned PtrSize = DL->getPointerTypeSize(GV->getType()); + unsigned PtrSize = DL.getPointerTypeSize(GV->getType()); OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), PtrSize); OutStreamer->EmitIntValue(0, PtrSize); @@ -502,16 +579,18 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer->SwitchSection(TheSection); - EmitLinkage(GV, GVSym); + // emutls_t.* symbols are only used in the current compilation unit. + if (!IsEmuTLSVar) + EmitLinkage(GV, EmittedInitSym); EmitAlignment(AlignLog, GV); - OutStreamer->EmitLabel(GVSym); + OutStreamer->EmitLabel(EmittedInitSym); - EmitGlobalConstant(GV->getInitializer()); + EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); if (MAI->hasDotTypeDotSizeDirective()) // .size foo, 42 - OutStreamer->emitELFSize(cast(GVSym), + OutStreamer->emitELFSize(cast(EmittedInitSym), MCConstantExpr::create(Size, OutContext)); OutStreamer->AddBlankLine(); @@ -545,7 +624,7 @@ void AsmPrinter::EmitFunctionHeader() { // Emit the prefix data. if (F->hasPrefixData()) - EmitGlobalConstant(F->getPrefixData()); + EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData()); // Emit the CurrentFnSym. This is a virtual function to allow targets to // do their wild and crazy things as required. @@ -580,7 +659,7 @@ void AsmPrinter::EmitFunctionHeader() { // Emit the prologue data. if (F->hasPrologueData()) - EmitGlobalConstant(F->getPrologueData()); + EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrologueData()); } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the @@ -640,19 +719,27 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { /// that is an implicit def. void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - OutStreamer->AddComment(Twine("implicit-def: ") + - MMI->getContext().getRegisterInfo()->getName(RegNo)); + + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << "implicit-def: " + << PrintReg(RegNo, MF->getSubtarget().getRegisterInfo()); + + OutStreamer->AddComment(OS.str()); OutStreamer->AddBlankLine(); } static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { - std::string Str = "kill:"; + std::string Str; + raw_string_ostream OS(Str); + OS << "kill:"; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &Op = MI->getOperand(i); assert(Op.isReg() && "KILL instruction must have only register operands"); - Str += ' '; - Str += AP.MMI->getContext().getRegisterInfo()->getName(Op.getReg()); - Str += (Op.isDef() ? "" : ""); + OS << ' ' + << PrintReg(Op.getReg(), + AP.MF->getSubtarget().getRegisterInfo()) + << (Op.isDef() ? "" : ""); } AP.OutStreamer->AddComment(Str); AP.OutStreamer->AddBlankLine(); @@ -688,6 +775,31 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0; + for (unsigned i = 0; i < Expr->getNumElements(); ++i) { + if (Deref) { + // We currently don't support extra Offsets or derefs after the first + // one. Bail out early instead of emitting an incorrect comment + OS << " [complex expression]"; + AP.OutStreamer->emitRawComment(OS.str()); + return true; + } + uint64_t Op = Expr->getElement(i); + if (Op == dwarf::DW_OP_deref) { + Deref = true; + continue; + } else if (Op == dwarf::DW_OP_bit_piece) { + // There can't be any operands after this in a valid expression + break; + } + uint64_t ExtraOffset = Expr->getElement(i++); + if (Op == dwarf::DW_OP_plus) + Offset += ExtraOffset; + else { + assert(Op == dwarf::DW_OP_minus); + Offset -= ExtraOffset; + } + } + // Register or immediate value. Register 0 means undef. if (MI->getOperand(0).isFPImm()) { APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); @@ -727,7 +839,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } if (Deref) OS << '['; - OS << AP.MMI->getContext().getRegisterInfo()->getName(Reg); + OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo()); } if (Deref) @@ -888,7 +1000,7 @@ void AsmPrinter::EmitFunctionBody() { EmitFunctionBodyEnd(); if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() || - MAI->hasDotTypeDotSizeDirective()) { + MMI->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) { // Create a symbol for the end of function. CurrentFnEnd = createTempSymbol("func_end"); OutStreamer->EmitLabel(CurrentFnEnd); @@ -1047,20 +1159,17 @@ bool AsmPrinter::doFinalization(Module &M) { // Output stubs for external and common global variables. MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); if (!Stubs.empty()) { - OutStreamer->SwitchSection(TLOF.getDataRelSection()); - const DataLayout *DL = TM.getDataLayout(); + OutStreamer->SwitchSection(TLOF.getDataSection()); + const DataLayout &DL = M.getDataLayout(); for (const auto &Stub : Stubs) { OutStreamer->EmitLabel(Stub.first); OutStreamer->EmitSymbolValue(Stub.second.getPointer(), - DL->getPointerSize()); + DL.getPointerSize()); } } } - // Make sure we wrote out everything we need. - OutStreamer->Flush(); - // Finalize debug and EH information. for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, @@ -1103,10 +1212,29 @@ bool AsmPrinter::doFinalization(Module &M) { else assert(Alias.hasLocalLinkage() && "Invalid alias linkage"); + // Set the symbol type to function if the alias has a function type. + // This affects codegen when the aliasee is not a function. + if (Alias.getType()->getPointerElementType()->isFunctionTy()) + OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction); + EmitVisibility(Name, Alias.getVisibility()); // Emit the directives as assignments aka .set: OutStreamer->EmitAssignment(Name, lowerConstant(Alias.getAliasee())); + + // If the aliasee does not correspond to a symbol in the output, i.e. the + // alias is not of an object or the aliased object is private, then set the + // size of the alias symbol from the type of the alias. We don't do this in + // other situations as the alias and aliasee having differing types but same + // size may be intentional. + const GlobalObject *BaseObject = Alias.getBaseObject(); + if (MAI->hasDotTypeDotSizeDirective() && Alias.getValueType()->isSized() && + (!BaseObject || BaseObject->hasPrivateLinkage())) { + const DataLayout &DL = M.getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(Alias.getValueType()); + OutStreamer->emitELFSize(cast(Name), + MCConstantExpr::create(Size, OutContext)); + } } GCModuleInfo *MI = getAnalysisIfAvailable(); @@ -1120,16 +1248,16 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit __morestack address if needed for indirect calls. if (MMI->usesMorestackAddr()) { - MCSection *ReadOnlySection = - getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(), - /*C=*/nullptr); + MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant( + getDataLayout(), SectionKind::getReadOnly(), + /*C=*/nullptr); OutStreamer->SwitchSection(ReadOnlySection); MCSymbol *AddrSymbol = OutContext.getOrCreateSymbol(StringRef("__morestack_addr")); OutStreamer->EmitLabel(AddrSymbol); - unsigned PtrSize = TM.getDataLayout()->getPointerSize(0); + unsigned PtrSize = M.getDataLayout().getPointerSize(0); OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"), PtrSize); } @@ -1169,7 +1297,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { CurExceptionSym = nullptr; bool NeedsLocalForSize = MAI->needsLocalForSize(); if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() || - NeedsLocalForSize) { + MMI->hasEHFunclets() || NeedsLocalForSize) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) CurrentFnSymForSize = CurrentFnBegin; @@ -1206,14 +1334,14 @@ void AsmPrinter::EmitConstantPool() { const MachineConstantPoolEntry &CPE = CP[i]; unsigned Align = CPE.getAlignment(); - SectionKind Kind = - CPE.getSectionKind(TM.getDataLayout()); + SectionKind Kind = CPE.getSectionKind(&getDataLayout()); const Constant *C = nullptr; if (!CPE.isMachineConstantPoolEntry()) C = CPE.Val.ConstVal; - MCSection *S = getObjFileLowering().getSectionForConstant(Kind, C); + MCSection *S = + getObjFileLowering().getSectionForConstant(getDataLayout(), Kind, C); // The number of sections are small, just do a linear search from the // last section to the first. @@ -1260,14 +1388,13 @@ void AsmPrinter::EmitConstantPool() { OutStreamer->EmitZeros(NewOffset - Offset); Type *Ty = CPE.getType(); - Offset = NewOffset + - TM.getDataLayout()->getTypeAllocSize(Ty); + Offset = NewOffset + getDataLayout().getTypeAllocSize(Ty); OutStreamer->EmitLabel(Sym); if (CPE.isMachineConstantPoolEntry()) EmitMachineConstantPoolValue(CPE.Val.MachineCPVal); else - EmitGlobalConstant(CPE.Val.ConstVal); + EmitGlobalConstant(getDataLayout(), CPE.Val.ConstVal); } } } @@ -1276,7 +1403,7 @@ void AsmPrinter::EmitConstantPool() { /// by the current function to the current output stream. /// void AsmPrinter::EmitJumpTableInfo() { - const DataLayout *DL = MF->getTarget().getDataLayout(); + const DataLayout &DL = MF->getDataLayout(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (!MJTI) return; if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return; @@ -1296,8 +1423,7 @@ void AsmPrinter::EmitJumpTableInfo() { OutStreamer->SwitchSection(ReadOnlySection); } - EmitAlignment(Log2_32( - MJTI->getEntryAlignment(*TM.getDataLayout()))); + EmitAlignment(Log2_32(MJTI->getEntryAlignment(DL))); // Jump tables in code sections are marked with a data_region directive // where that's supported. @@ -1335,7 +1461,7 @@ void AsmPrinter::EmitJumpTableInfo() { // before each jump table. The first label is never referenced, but tells // the assembler and linker the extents of the jump table object. The // second label is actually referenced by the code. - if (JTInDiffSection && DL->hasLinkerPrivateGlobalPrefix()) + if (JTInDiffSection && DL.hasLinkerPrivateGlobalPrefix()) // FIXME: This doesn't have to have any specific name, just any randomly // named and numbered 'l' label would work. Simplify GetJTISymbol. OutStreamer->EmitLabel(GetJTISymbol(JTI, true)); @@ -1409,8 +1535,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, assert(Value && "Unknown entry kind!"); - unsigned EntrySize = - MJTI->getEntrySize(*TM.getDataLayout()); + unsigned EntrySize = MJTI->getEntrySize(getDataLayout()); OutStreamer->EmitValue(Value, EntrySize); } @@ -1435,7 +1560,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { assert(GV->hasInitializer() && "Not a special LLVM global!"); if (GV->getName() == "llvm.global_ctors") { - EmitXXStructorList(GV->getInitializer(), /* isCtor */ true); + EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(), + /* isCtor */ true); if (TM.getRelocationModel() == Reloc::Static && MAI->hasStaticCtorDtorReferenceInStaticMode()) { @@ -1447,7 +1573,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { } if (GV->getName() == "llvm.global_dtors") { - EmitXXStructorList(GV->getInitializer(), /* isCtor */ false); + EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(), + /* isCtor */ false); if (TM.getRelocationModel() == Reloc::Static && MAI->hasStaticCtorDtorReferenceInStaticMode()) { @@ -1485,7 +1612,8 @@ struct Structor { /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init /// priority. -void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { +void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List, + bool isCtor) { // Should be an array of '{ int, void ()* }' structs. The first value is the // init priority. if (!isa(List)) return; @@ -1520,8 +1648,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { } // Emit the function pointers in the target-specific order - const DataLayout *DL = TM.getDataLayout(); - unsigned Align = Log2_32(DL->getPointerPrefAlignment()); + unsigned Align = Log2_32(DL.getPointerPrefAlignment()); std::stable_sort(Structors.begin(), Structors.end(), [](const Structor &L, const Structor &R) { return L.Priority < R.Priority; }); @@ -1542,7 +1669,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { OutStreamer->SwitchSection(OutputSection); if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection()) EmitAlignment(Align); - EmitXXStructor(S.Func); + EmitXXStructor(DL, S.Func); } } @@ -1621,8 +1748,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const { if (GV) - NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), - NumBits); + NumBits = getGVAlignmentLog2(GV, GV->getParent()->getDataLayout(), NumBits); if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. @@ -1668,7 +1794,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout())) + if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout())) if (C != CE) return lowerConstant(C); @@ -1682,11 +1808,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { - const DataLayout &DL = *TM.getDataLayout(); - // Generate a symbolic expression for the byte address - APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); - cast(CE)->accumulateConstantOffset(DL, OffsetAI); + APInt OffsetAI(getDataLayout().getPointerTypeSizeInBits(CE->getType()), 0); + cast(CE)->accumulateConstantOffset(getDataLayout(), OffsetAI); const MCExpr *Base = lowerConstant(CE->getOperand(0)); if (!OffsetAI) @@ -1707,7 +1831,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { return lowerConstant(CE->getOperand(0)); case Instruction::IntToPtr: { - const DataLayout &DL = *TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. @@ -1718,7 +1842,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { } case Instruction::PtrToInt: { - const DataLayout &DL = *TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. @@ -1769,10 +1893,13 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { } } -static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP, +static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C, + AsmPrinter &AP, const Constant *BaseCV = nullptr, uint64_t Offset = 0); +static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP); + /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the /// byte value. If it is not a repeated sequence, return -1. @@ -1789,9 +1916,9 @@ static int isRepeatedByteSequence(const ConstantDataSequential *V) { /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the /// byte value. If it is not a repeated sequence, return -1. -static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { +static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) { if (const ConstantInt *CI = dyn_cast(V)) { - uint64_t Size = TM.getDataLayout()->getTypeAllocSizeInBits(V->getType()); + uint64_t Size = DL.getTypeAllocSizeInBits(V->getType()); assert(Size % 8 == 0); // Extend the element to take zero padding into account. @@ -1806,7 +1933,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { // byte. assert(CA->getNumOperands() != 0 && "Should be a CAZ"); Constant *Op0 = CA->getOperand(0); - int Byte = isRepeatedByteSequence(Op0, TM); + int Byte = isRepeatedByteSequence(Op0, DL); if (Byte == -1) return -1; @@ -1823,15 +1950,14 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { return -1; } -static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, - AsmPrinter &AP){ +static void emitGlobalConstantDataSequential(const DataLayout &DL, + const ConstantDataSequential *CDS, + AsmPrinter &AP) { // See if we can aggregate this into a .fill, if so, emit it as such. - int Value = isRepeatedByteSequence(CDS, AP.TM); + int Value = isRepeatedByteSequence(CDS, DL); if (Value != -1) { - uint64_t Bytes = - AP.TM.getDataLayout()->getTypeAllocSize( - CDS->getType()); + uint64_t Bytes = DL.getTypeAllocSize(CDS->getType()); // Don't emit a 1-byte object as a .fill. if (Bytes > 1) return AP.OutStreamer->EmitFill(Bytes, Value); @@ -1851,37 +1977,11 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, AP.OutStreamer->EmitIntValue(CDS->getElementAsInteger(i), ElementByteSize); } - } else if (ElementByteSize == 4) { - // FP Constants are printed as integer constants to avoid losing - // precision. - assert(CDS->getElementType()->isFloatTy()); - for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { - union { - float F; - uint32_t I; - }; - - F = CDS->getElementAsFloat(i); - if (AP.isVerbose()) - AP.OutStreamer->GetCommentOS() << "float " << F << '\n'; - AP.OutStreamer->EmitIntValue(I, 4); - } } else { - assert(CDS->getElementType()->isDoubleTy()); - for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { - union { - double F; - uint64_t I; - }; - - F = CDS->getElementAsDouble(i); - if (AP.isVerbose()) - AP.OutStreamer->GetCommentOS() << "double " << F << '\n'; - AP.OutStreamer->EmitIntValue(I, 8); - } + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) + emitGlobalConstantFP(cast(CDS->getElementAsConstant(I)), AP); } - const DataLayout &DL = *AP.TM.getDataLayout(); unsigned Size = DL.getTypeAllocSize(CDS->getType()); unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) * CDS->getNumElements(); @@ -1890,12 +1990,12 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, } -static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP, +static void emitGlobalConstantArray(const DataLayout &DL, + const ConstantArray *CA, AsmPrinter &AP, const Constant *BaseCV, uint64_t Offset) { // See if we can aggregate some values. Make sure it can be // represented as a series of bytes of the constant value. - int Value = isRepeatedByteSequence(CA, AP.TM); - const DataLayout &DL = *AP.TM.getDataLayout(); + int Value = isRepeatedByteSequence(CA, DL); if (Value != -1) { uint64_t Bytes = DL.getTypeAllocSize(CA->getType()); @@ -1903,17 +2003,17 @@ static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP, } else { for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { - emitGlobalConstantImpl(CA->getOperand(i), AP, BaseCV, Offset); + emitGlobalConstantImpl(DL, CA->getOperand(i), AP, BaseCV, Offset); Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType()); } } } -static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { +static void emitGlobalConstantVector(const DataLayout &DL, + const ConstantVector *CV, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) - emitGlobalConstantImpl(CV->getOperand(i), AP); + emitGlobalConstantImpl(DL, CV->getOperand(i), AP); - const DataLayout &DL = *AP.TM.getDataLayout(); unsigned Size = DL.getTypeAllocSize(CV->getType()); unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) * CV->getType()->getNumElements(); @@ -1921,21 +2021,21 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { AP.OutStreamer->EmitZeros(Padding); } -static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP, +static void emitGlobalConstantStruct(const DataLayout &DL, + const ConstantStruct *CS, AsmPrinter &AP, const Constant *BaseCV, uint64_t Offset) { // Print the fields in successive locations. Pad to align if needed! - const DataLayout *DL = AP.TM.getDataLayout(); - unsigned Size = DL->getTypeAllocSize(CS->getType()); - const StructLayout *Layout = DL->getStructLayout(CS->getType()); + unsigned Size = DL.getTypeAllocSize(CS->getType()); + const StructLayout *Layout = DL.getStructLayout(CS->getType()); uint64_t SizeSoFar = 0; for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { const Constant *Field = CS->getOperand(i); // Print the actual field value. - emitGlobalConstantImpl(Field, AP, BaseCV, Offset+SizeSoFar); + emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar); // Check if padding is needed and insert one or more 0s. - uint64_t FieldSize = DL->getTypeAllocSize(Field->getType()); + uint64_t FieldSize = DL.getTypeAllocSize(Field->getType()); uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) - Layout->getElementOffset(i)) - FieldSize; SizeSoFar += FieldSize + PadSize; @@ -1974,8 +2074,7 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { // PPC's long double has odd notions of endianness compared to how LLVM // handles it: p[0] goes first for *big* endian on PPC. - if (AP.TM.getDataLayout()->isBigEndian() && - !CFP->getType()->isPPC_FP128Ty()) { + if (AP.getDataLayout().isBigEndian() && !CFP->getType()->isPPC_FP128Ty()) { int Chunk = API.getNumWords() - 1; if (TrailingBytes) @@ -1993,13 +2092,13 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { } // Emit the tail padding for the long double. - const DataLayout &DL = *AP.TM.getDataLayout(); + const DataLayout &DL = AP.getDataLayout(); AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(CFP->getType()) - DL.getTypeStoreSize(CFP->getType())); } static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { - const DataLayout *DL = AP.TM.getDataLayout(); + const DataLayout &DL = AP.getDataLayout(); unsigned BitWidth = CI->getBitWidth(); // Copy the value as we may massage the layout for constants whose bit width @@ -2016,7 +2115,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { // Big endian: // * Record the extra bits to emit. // * Realign the raw data to emit the chunks of 64-bits. - if (DL->isBigEndian()) { + if (DL.isBigEndian()) { // Basically the structure of the raw data is a chunk of 64-bits cells: // 0 1 BitWidth / 64 // [chunk1][chunk2] ... [chunkN]. @@ -2037,7 +2136,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { // quantities at a time. const uint64_t *RawData = Realigned.getRawData(); for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { - uint64_t Val = DL->isBigEndian() ? RawData[e - i - 1] : RawData[i]; + uint64_t Val = DL.isBigEndian() ? RawData[e - i - 1] : RawData[i]; AP.OutStreamer->EmitIntValue(Val, 8); } @@ -2045,8 +2144,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { // Emit the extra bits after the 64-bits chunks. // Emit a directive that fills the expected size. - uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize( - CI->getType()); + uint64_t Size = AP.getDataLayout().getTypeAllocSize(CI->getType()); Size -= (BitWidth / 64) * 8; assert(Size && Size * 8 >= ExtraBitsSize && (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize))) @@ -2094,7 +2192,7 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, if (!AP.GlobalGOTEquivs.count(GOTEquivSym)) return; - const GlobalValue *BaseGV = dyn_cast(BaseCst); + const GlobalValue *BaseGV = dyn_cast_or_null(BaseCst); if (!BaseGV) return; @@ -2149,10 +2247,10 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses); } -static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP, - const Constant *BaseCV, uint64_t Offset) { - const DataLayout *DL = AP.TM.getDataLayout(); - uint64_t Size = DL->getTypeAllocSize(CV->getType()); +static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, + AsmPrinter &AP, const Constant *BaseCV, + uint64_t Offset) { + uint64_t Size = DL.getTypeAllocSize(CV->getType()); // Globals with sub-elements such as combinations of arrays and structs // are handled recursively by emitGlobalConstantImpl. Keep track of the @@ -2189,32 +2287,32 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP, } if (const ConstantDataSequential *CDS = dyn_cast(CV)) - return emitGlobalConstantDataSequential(CDS, AP); + return emitGlobalConstantDataSequential(DL, CDS, AP); if (const ConstantArray *CVA = dyn_cast(CV)) - return emitGlobalConstantArray(CVA, AP, BaseCV, Offset); + return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset); if (const ConstantStruct *CVS = dyn_cast(CV)) - return emitGlobalConstantStruct(CVS, AP, BaseCV, Offset); + return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset); if (const ConstantExpr *CE = dyn_cast(CV)) { // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of // vectors). if (CE->getOpcode() == Instruction::BitCast) - return emitGlobalConstantImpl(CE->getOperand(0), AP); + return emitGlobalConstantImpl(DL, CE->getOperand(0), AP); if (Size > 8) { // If the constant expression's size is greater than 64-bits, then we have // to emit the value in chunks. Try to constant fold the value and emit it // that way. - Constant *New = ConstantFoldConstantExpression(CE, *DL); + Constant *New = ConstantFoldConstantExpression(CE, DL); if (New && New != CE) - return emitGlobalConstantImpl(New, AP); + return emitGlobalConstantImpl(DL, New, AP); } } if (const ConstantVector *V = dyn_cast(CV)) - return emitGlobalConstantVector(V, AP); + return emitGlobalConstantVector(DL, V, AP); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. @@ -2230,11 +2328,10 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP, } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. -void AsmPrinter::EmitGlobalConstant(const Constant *CV) { - uint64_t Size = - TM.getDataLayout()->getTypeAllocSize(CV->getType()); +void AsmPrinter::EmitGlobalConstant(const DataLayout &DL, const Constant *CV) { + uint64_t Size = DL.getTypeAllocSize(CV->getType()); if (Size) - emitGlobalConstantImpl(CV, *this); + emitGlobalConstantImpl(DL, CV, *this); else if (MAI->hasSubsectionsViaSymbols()) { // If the global has zero size, emit a single byte so that two labels don't // look like they are at the same location. @@ -2272,10 +2369,10 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { /// GetCPISymbol - Return the symbol for the specified constant pool entry. MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { - const DataLayout *DL = TM.getDataLayout(); - return OutContext.getOrCreateSymbol - (Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) - + "_" + Twine(CPID)); + const DataLayout &DL = getDataLayout(); + return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) + + "CPI" + Twine(getFunctionNumber()) + "_" + + Twine(CPID)); } /// GetJTISymbol - Return the symbol for the specified jump table entry. @@ -2286,10 +2383,10 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const { /// GetJTSetSymbol - Return the symbol for the specified jump table .set /// FIXME: privatize to AsmPrinter. MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const { - const DataLayout *DL = TM.getDataLayout(); - return OutContext.getOrCreateSymbol - (Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" + - Twine(UID) + "_set_" + Twine(MBBID)); + const DataLayout &DL = getDataLayout(); + return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) + + Twine(getFunctionNumber()) + "_" + + Twine(UID) + "_set_" + Twine(MBBID)); } MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV, @@ -2301,7 +2398,7 @@ MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV, /// Return the MCSymbol for the specified ExternalSymbol. MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const { SmallString<60> NameStr; - Mangler::getNameWithPrefix(NameStr, Sym, *TM.getDataLayout()); + Mangler::getNameWithPrefix(NameStr, Sym, getDataLayout()); return OutContext.getOrCreateSymbol(NameStr); } @@ -2376,6 +2473,14 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, /// MachineBasicBlock, an alignment (if present) and a comment describing /// it if appropriate. void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { + // End the previous funclet and start a new one. + if (MBB.isEHFuncletEntry()) { + for (const HandlerInfo &HI : Handlers) { + HI.Handler->endFunclet(); + HI.Handler->beginFunclet(MBB); + } + } + // Emit an alignment directive for this block, if needed. if (unsigned Align = MBB.getAlignment()) EmitAlignment(Align); @@ -2389,20 +2494,28 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { if (isVerbose()) OutStreamer->AddComment("Block address taken"); - for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB)) - OutStreamer->EmitLabel(Sym); + // MBBs can have their address taken as part of CodeGen without having + // their corresponding BB's address taken in IR + if (BB->hasAddressTaken()) + for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB)) + OutStreamer->EmitLabel(Sym); } // Print some verbose block comments. if (isVerbose()) { - if (const BasicBlock *BB = MBB.getBasicBlock()) - if (BB->hasName()) - OutStreamer->AddComment("%" + BB->getName()); + if (const BasicBlock *BB = MBB.getBasicBlock()) { + if (BB->hasName()) { + BB->printAsOperand(OutStreamer->GetCommentOS(), + /*PrintType=*/false, BB->getModule()); + OutStreamer->GetCommentOS() << '\n'; + } + } emitBasicBlockLoopComments(MBB, LI, *this); } // Print the main label for the block. - if (MBB.pred_empty() || isBlockOnlyReachableByFallthrough(&MBB)) { + if (MBB.pred_empty() || + (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry())) { if (isVerbose()) { // NOTE: Want this comment at start of line, don't emit with AddComment. OutStreamer->emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false); @@ -2440,7 +2553,7 @@ bool AsmPrinter:: isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { // If this is a landing pad, it isn't a fall through. If it has no preds, // then nothing falls through to it. - if (MBB->isLandingPad() || MBB->pred_empty()) + if (MBB->isEHPad() || MBB->pred_empty()) return false; // If there isn't exactly one predecessor, it can't be a fall through. diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index ad180b6667c0..504c5d283cba 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -47,7 +47,7 @@ void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const { OutStreamer->EmitSLEB128IntValue(Value); } -/// EmitULEB128 - emit the specified signed leb128 value. +/// EmitULEB128 - emit the specified unsigned leb128 value. void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, unsigned PadTo) const { if (isVerbose() && Desc) @@ -56,18 +56,6 @@ void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, OutStreamer->EmitULEB128IntValue(Value, PadTo); } -/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value. -void AsmPrinter::EmitCFAByte(unsigned Val) const { - if (isVerbose()) { - if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset + 64) - OutStreamer->AddComment("DW_CFA_offset + Reg (" + - Twine(Val - dwarf::DW_CFA_offset) + ")"); - else - OutStreamer->AddComment(dwarf::CallFrameString(Val)); - } - OutStreamer->EmitIntValue(Val, 1); -} - static const char *DecodeDWARFEncoding(unsigned Encoding) { switch (Encoding) { case dwarf::DW_EH_PE_absptr: @@ -134,7 +122,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { default: llvm_unreachable("Invalid encoded value."); case dwarf::DW_EH_PE_absptr: - return TM.getDataLayout()->getPointerSize(); + return MF->getDataLayout().getPointerSize(); case dwarf::DW_EH_PE_udata2: return 2; case dwarf::DW_EH_PE_udata4: @@ -228,6 +216,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpDefCfaOffset: OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset()); break; + case MCCFIInstruction::OpAdjustCfaOffset: + OutStreamer->EmitCFIAdjustCfaOffset(Inst.getOffset()); + break; case MCCFIInstruction::OpDefCfa: OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); break; @@ -246,6 +237,12 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpSameValue: OutStreamer->EmitCFISameValue(Inst.getRegister()); break; + case MCCFIInstruction::OpGnuArgsSize: + OutStreamer->EmitCFIGnuArgsSize(Inst.getOffset()); + break; + case MCCFIInstruction::OpEscape: + OutStreamer->EmitCFIEscape(Inst.getValues()); + break; } } @@ -284,17 +281,10 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const { } } -void -AsmPrinter::emitDwarfAbbrevs(const std::vector& Abbrevs) const { - // For each abbrevation. - for (const DIEAbbrev *Abbrev : Abbrevs) { - // Emit the abbrevations code (base 1 index.) - EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); +void AsmPrinter::emitDwarfAbbrev(const DIEAbbrev &Abbrev) const { + // Emit the abbreviations code (base 1 index.) + EmitULEB128(Abbrev.getNumber(), "Abbreviation Code"); - // Emit the abbreviations data. - Abbrev->Emit(this); - } - - // Mark end of abbreviations. - EmitULEB128(0, "EOM(3)"); + // Emit the abbreviations data. + Abbrev.Emit(this); } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h index f1efe9d835e0..e59961f85769 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h +++ b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h @@ -19,6 +19,7 @@ namespace llvm { +class MachineBasicBlock; class MachineFunction; class MachineInstr; class MCSymbol; @@ -50,6 +51,11 @@ public: /// beginFunction at all. virtual void endFunction(const MachineFunction *MF) = 0; + /// \brief Emit target-specific EH funclet machinery. + virtual void beginFunclet(const MachineBasicBlock &MBB, + MCSymbol *Sym = nullptr) {} + virtual void endFunclet() {} + /// \brief Process beginning of an instruction. virtual void beginInstruction(const MachineInstr *MI) = 0; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 793e62960dd6..4171657b5285 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -127,19 +127,13 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, std::unique_ptr Parser( createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI)); - // Create a temporary copy of the original STI because the parser may modify - // it. For example, when switching between arm and thumb mode. If the target - // needs to emit code to return to the original state it can do so in - // emitInlineAsmEnd(). - MCSubtargetInfo TmpSTI = STI; - // We create a new MCInstrInfo here since we might be at the module level // and not have a MachineFunction to initialize the TargetInstrInfo from and // we only need MCInstrInfo for asm parsing. We create one unconditionally // because it's not subtarget dependent. std::unique_ptr MII(TM.getTarget().createMCInstrInfo()); std::unique_ptr TAP(TM.getTarget().createMCAsmParser( - TmpSTI, *Parser, *MII, MCOptions)); + STI, *Parser, *MII, MCOptions)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); @@ -154,7 +148,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, // Don't implicitly switch to the text section before the asm. int Res = Parser->Run(/*NoInitialTextSection*/ true, /*NoFinalize*/ true); - emitInlineAsmEnd(STI, &TmpSTI); + emitInlineAsmEnd(STI, &TAP->getSTI()); if (Res && !HasDiagHandler) report_fatal_error("Error parsing inline asm\n"); } @@ -512,9 +506,9 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { /// for their own strange codes. void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, const char *Code) const { - const DataLayout *DL = TM.getDataLayout(); if (!strcmp(Code, "private")) { - OS << DL->getPrivateGlobalPrefix(); + const DataLayout &DL = MF->getDataLayout(); + OS << DL.getPrivateGlobalPrefix(); } else if (!strcmp(Code, "comment")) { OS << MAI->getCommentString(); } else if (!strcmp(Code, "uid")) { diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h index 0cc829fffc54..df1997bcb72c 100644 --- a/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -24,16 +24,19 @@ namespace llvm { class ByteStreamer { - public: - virtual ~ByteStreamer() {} + protected: + ~ByteStreamer() = default; + ByteStreamer(const ByteStreamer&) = default; + ByteStreamer() = default; + public: // For now we're just handling the calls we need for dwarf emission/hashing. virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0; virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0; virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "") = 0; }; -class APByteStreamer : public ByteStreamer { +class APByteStreamer final : public ByteStreamer { private: AsmPrinter &AP; @@ -53,7 +56,7 @@ public: } }; -class HashingByteStreamer : public ByteStreamer { +class HashingByteStreamer final : public ByteStreamer { private: DIEHash &Hash; public: @@ -69,7 +72,7 @@ class HashingByteStreamer : public ByteStreamer { } }; -class BufferByteStreamer : public ByteStreamer { +class BufferByteStreamer final : public ByteStreamer { private: SmallVectorImpl &Buffer; SmallVectorImpl &Comments; diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 46dbc7693698..bf794f7f70f6 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -86,7 +86,7 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const { AP->EmitULEB128(0, "EOM(2)"); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIEAbbrev::print(raw_ostream &O) { O << "Abbreviation @" << format("0x%lx", (long)(intptr_t)this) @@ -104,12 +104,13 @@ void DIEAbbrev::print(raw_ostream &O) { << '\n'; } } + +LLVM_DUMP_METHOD void DIEAbbrev::dump() { print(dbgs()); } -#endif DIEAbbrev DIE::generateAbbrev() const { DIEAbbrev Abbrev(Tag, hasChildren()); - for (const DIEValue &V : Values) + for (const DIEValue &V : values()) Abbrev.AddAttribute(V.getAttribute(), V.getForm()); return Abbrev; } @@ -144,36 +145,35 @@ DIEValue DIE::findAttribute(dwarf::Attribute Attribute) const { return DIEValue(); } -#ifndef NDEBUG +LLVM_DUMP_METHOD +static void printValues(raw_ostream &O, const DIEValueList &Values, + StringRef Type, unsigned Size, unsigned IndentCount) { + O << Type << ": Size: " << Size << "\n"; + + unsigned I = 0; + const std::string Indent(IndentCount, ' '); + for (const auto &V : Values.values()) { + O << Indent; + O << "Blk[" << I++ << "]"; + O << " " << dwarf::FormEncodingString(V.getForm()) << " "; + V.print(O); + O << "\n"; + } +} + +LLVM_DUMP_METHOD void DIE::print(raw_ostream &O, unsigned IndentCount) const { const std::string Indent(IndentCount, ' '); - bool isBlock = getTag() == 0; + O << Indent << "Die: " << format("0x%lx", (long)(intptr_t) this) + << ", Offset: " << Offset << ", Size: " << Size << "\n"; - if (!isBlock) { - O << Indent - << "Die: " - << format("0x%lx", (long)(intptr_t)this) - << ", Offset: " << Offset - << ", Size: " << Size << "\n"; - - O << Indent - << dwarf::TagString(getTag()) - << " " - << dwarf::ChildrenString(hasChildren()) << "\n"; - } else { - O << "Size: " << Size << "\n"; - } + O << Indent << dwarf::TagString(getTag()) << " " + << dwarf::ChildrenString(hasChildren()) << "\n"; IndentCount += 2; - unsigned I = 0; - for (const auto &V : Values) { + for (const auto &V : values()) { O << Indent; - - if (!isBlock) - O << dwarf::AttributeString(V.getAttribute()); - else - O << "Blk[" << I++ << "]"; - + O << dwarf::AttributeString(V.getAttribute()); O << " " << dwarf::FormEncodingString(V.getForm()) << " "; V.print(O); O << "\n"; @@ -183,13 +183,13 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const { for (const auto &Child : children()) Child.print(O, IndentCount + 4); - if (!isBlock) O << "\n"; + O << "\n"; } +LLVM_DUMP_METHOD void DIE::dump() { print(dbgs()); } -#endif void DIEValue::EmitValue(const AsmPrinter *AP) const { switch (Ty) { @@ -215,7 +215,7 @@ unsigned DIEValue::SizeOf(const AsmPrinter *AP) const { llvm_unreachable("Unknown DIE kind"); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIEValue::print(raw_ostream &O) const { switch (Ty) { case isNone: @@ -228,10 +228,10 @@ void DIEValue::print(raw_ostream &O) const { } } +LLVM_DUMP_METHOD void DIEValue::dump() const { print(dbgs()); } -#endif //===----------------------------------------------------------------------===// // DIEInteger Implementation @@ -264,7 +264,8 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; case dwarf::DW_FORM_addr: - Size = Asm->getDataLayout().getPointerSize(); break; + Size = Asm->getPointerSize(); + break; case dwarf::DW_FORM_ref_addr: Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr); break; @@ -294,21 +295,21 @@ unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_GNU_addr_index: return getULEB128Size(Integer); case dwarf::DW_FORM_udata: return getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer); - case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(); + case dwarf::DW_FORM_addr: + return AP->getPointerSize(); case dwarf::DW_FORM_ref_addr: if (AP->OutStreamer->getContext().getDwarfVersion() == 2) - return AP->getDataLayout().getPointerSize(); + return AP->getPointerSize(); return sizeof(int32_t); default: llvm_unreachable("DIE Value form not supported yet"); } } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIEInteger::print(raw_ostream &O) const { O << "Int: " << (int64_t)Integer << " 0x"; O.write_hex(Integer); } -#endif //===----------------------------------------------------------------------===// // DIEExpr Implementation @@ -326,12 +327,11 @@ unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getDataLayout().getPointerSize(); + return AP->getPointerSize(); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; } -#endif //===----------------------------------------------------------------------===// // DIELabel Implementation @@ -352,12 +352,11 @@ unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getDataLayout().getPointerSize(); + return AP->getPointerSize(); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); } -#endif //===----------------------------------------------------------------------===// // DIEDelta Implementation @@ -375,14 +374,13 @@ unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getDataLayout().getPointerSize(); + return AP->getPointerSize(); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIEDelta::print(raw_ostream &O) const { O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName(); } -#endif //===----------------------------------------------------------------------===// // DIEString Implementation @@ -431,11 +429,10 @@ unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { return DIEInteger(S.getOffset()).SizeOf(AP, Form); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIEString::print(raw_ostream &O) const { O << "String: " << S.getString(); } -#endif //===----------------------------------------------------------------------===// // DIEEntry Implementation @@ -472,15 +469,14 @@ unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) { const DwarfDebug *DD = AP->getDwarfDebug(); assert(DD && "Expected Dwarf Debug info to be available"); if (DD->getDwarfVersion() == 2) - return AP->getDataLayout().getPointerSize(); + return AP->getPointerSize(); return sizeof(int32_t); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIEEntry::print(raw_ostream &O) const { O << format("Die: 0x%lx", (long)(intptr_t)&Entry); } -#endif //===----------------------------------------------------------------------===// // DIETypeSignature Implementation @@ -491,11 +487,10 @@ void DIETypeSignature::EmitValue(const AsmPrinter *Asm, Asm->OutStreamer->EmitIntValue(Unit->getTypeSignature(), 8); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIETypeSignature::print(raw_ostream &O) const { O << format("Type Unit: 0x%lx", Unit->getTypeSignature()); } -#endif //===----------------------------------------------------------------------===// // DIELoc Implementation @@ -505,7 +500,7 @@ void DIETypeSignature::print(raw_ostream &O) const { /// unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const { if (!Size) { - for (const auto &V : Values) + for (const auto &V : values()) Size += V.SizeOf(AP); } @@ -525,7 +520,7 @@ void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { Asm->EmitULEB128(Size); break; } - for (const auto &V : Values) + for (const auto &V : values()) V.EmitValue(Asm); } @@ -543,12 +538,10 @@ unsigned DIELoc::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { } } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIELoc::print(raw_ostream &O) const { - O << "ExprLoc: "; - DIE::print(O, 5); + printValues(O, *this, "ExprLoc", Size, 5); } -#endif //===----------------------------------------------------------------------===// // DIEBlock Implementation @@ -558,7 +551,7 @@ void DIELoc::print(raw_ostream &O) const { /// unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const { if (!Size) { - for (const auto &V : Values) + for (const auto &V : values()) Size += V.SizeOf(AP); } @@ -576,7 +569,7 @@ void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break; } - for (const auto &V : Values) + for (const auto &V : values()) V.EmitValue(Asm); } @@ -592,12 +585,10 @@ unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { } } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIEBlock::print(raw_ostream &O) const { - O << "Blk: "; - DIE::print(O, 5); + printValues(O, *this, "Blk", Size, 5); } -#endif //===----------------------------------------------------------------------===// // DIELocList Implementation @@ -608,7 +599,7 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; - return AP->getDataLayout().getPointerSize(); + return AP->getPointerSize(); } /// EmitValue - Emit label value. @@ -619,6 +610,5 @@ void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { AP->emitDwarfSymbolReference(Label, /*ForceOffset*/ DD->useSplitDwarf()); } -#ifndef NDEBUG +LLVM_DUMP_METHOD void DIELocList::print(raw_ostream &O) const { O << "LocList: " << Index; } -#endif diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp index 5e60156fdfc9..02010654a6f4 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -469,38 +469,6 @@ void DIEHash::computeHash(const DIE &Die) { Hash.update(makeArrayRef((uint8_t)'\0')); } -/// This is based on the type signature computation given in section 7.27 of the -/// DWARF4 standard. It is the md5 hash of a flattened description of the DIE -/// with the exception that we are hashing only the context and the name of the -/// type. -uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) { - - // Add the contexts to the hash. We won't be computing the ODR hash for - // function local types so it's safe to use the generic context hashing - // algorithm here. - // FIXME: If we figure out how to account for linkage in some way we could - // actually do this with a slight modification to the parent hash algorithm. - if (const DIE *Parent = Die.getParent()) - addParentContext(*Parent); - - // Add the current DIE information. - - // Add the DWARF tag of the DIE. - addULEB128(Die.getTag()); - - // Add the name of the type to the hash. - addString(getDIEStringAttr(Die, dwarf::DW_AT_name)); - - // Now get the result. - MD5::MD5Result Result; - Hash.final(Result); - - // ... take the least significant 8 bytes and return those. Our MD5 - // implementation always returns its results in little endian, swap bytes - // appropriately. - return support::endian::read64le(Result + 8); -} - /// This is based on the type signature computation given in section 7.27 of the /// DWARF4 standard. It is an md5 hash of the flattened description of the DIE /// with the inclusion of the full CU and all top level CU entities. diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h index 833ca0276fdb..44f0ce88523d 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/lib/CodeGen/AsmPrinter/DIEHash.h @@ -84,9 +84,6 @@ class DIEHash { public: DIEHash(AsmPrinter *A = nullptr) : AP(A) {} - /// \brief Computes the ODR signature. - uint64_t computeDIEODRSignature(const DIE &Die); - /// \brief Computes the CU signature. uint64_t computeCUSignature(const DIE &Die); diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h index afffa839a606..bbe53249a084 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -9,6 +9,8 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H + +#include "DebugLocStream.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -17,7 +19,6 @@ namespace llvm { class AsmPrinter; -class DebugLocStream; /// \brief This struct describes location entries emitted in the .debug_loc /// section. diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index f8cdde203187..4ad3e1867328 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -41,7 +41,7 @@ void DwarfAccelTable::AddName(DwarfStringPoolEntryRef Name, const DIE *die, DIEs.Values.push_back(new (Allocator) HashDataContents(die, Flags)); } -void DwarfAccelTable::ComputeBucketCount(void) { +void DwarfAccelTable::ComputeBucketCount() { // First get the number of unique hashes. std::vector uniques(Data.size()); for (size_t i = 0, e = Data.size(); i < e; ++i) diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 2c212c7ecee1..6665c16159a0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -78,12 +78,11 @@ void DwarfCFIException::endModule() { return; // Emit references to all used personality functions - const std::vector &Personalities = MMI->getPersonalities(); - for (size_t i = 0, e = Personalities.size(); i != e; ++i) { - if (!Personalities[i]) + for (const Function *Personality : MMI->getPersonalities()) { + if (!Personality) continue; - MCSymbol *Sym = Asm->getSymbol(Personalities[i]); - TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->TM, Sym); + MCSymbol *Sym = Asm->getSymbol(Personality); + TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->getDataLayout(), Sym); } } @@ -108,7 +107,6 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { const Function *Per = nullptr; if (F->hasPersonalityFn()) Per = dyn_cast(F->getPersonalityFn()->stripPointerCasts()); - assert(!MMI->getPersonality() || Per == MMI->getPersonality()); // Emit a personality function even when there are no landing pads bool forceEmitPersonality = diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index fc54a2925beb..725063a8177b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -151,28 +151,33 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( DIELoc *Loc = new (DIEValueAllocator) DIELoc; const MCSymbol *Sym = Asm->getSymbol(Global); if (Global->isThreadLocal()) { - // FIXME: Make this work with -gsplit-dwarf. - unsigned PointerSize = Asm->getDataLayout().getPointerSize(); - assert((PointerSize == 4 || PointerSize == 8) && - "Add support for other sizes if necessary"); - // Based on GCC's support for TLS: - if (!DD->useSplitDwarf()) { - // 1) Start with a constNu of the appropriate pointer size - addUInt(*Loc, dwarf::DW_FORM_data1, - PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); - // 2) containing the (relocated) offset of the TLS variable - // within the module's TLS block. - addExpr(*Loc, dwarf::DW_FORM_udata, - Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); + if (Asm->TM.Options.EmulatedTLS) { + // TODO: add debug info for emulated thread local mode. } else { - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); - addUInt(*Loc, dwarf::DW_FORM_udata, - DD->getAddressPool().getIndex(Sym, /* TLS */ true)); + // FIXME: Make this work with -gsplit-dwarf. + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + // Based on GCC's support for TLS: + if (!DD->useSplitDwarf()) { + // 1) Start with a constNu of the appropriate pointer size + addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4 + ? dwarf::DW_OP_const4u + : dwarf::DW_OP_const8u); + // 2) containing the (relocated) offset of the TLS variable + // within the module's TLS block. + addExpr(*Loc, dwarf::DW_FORM_udata, + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); + } else { + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(*Loc, dwarf::DW_FORM_udata, + DD->getAddressPool().getIndex(Sym, /* TLS */ true)); + } + // 3) followed by an OP to make the debugger do a TLS lookup. + addUInt(*Loc, dwarf::DW_FORM_data1, + DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address + : dwarf::DW_OP_form_tls_address); } - // 3) followed by an OP to make the debugger do a TLS lookup. - addUInt(*Loc, dwarf::DW_FORM_data1, - DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address - : dwarf::DW_OP_form_tls_address); } else { DD->addArangeLabel(SymbolCU(this, Sym)); addOpAddress(*Loc, Sym); @@ -338,9 +343,9 @@ void DwarfCompileUnit::constructScopeDIE( // Skip imported directives in gmlt-like data. if (!includeMinimalInlineScopes()) { // There is no need to emit empty lexical block DIE. - for (const auto &E : DD->findImportedEntitiesForScope(DS)) + for (const auto *IE : ImportedEntities[DS]) Children.push_back( - constructImportedEntityDIE(cast(E.second))); + constructImportedEntityDIE(cast(IE))); } // If there are only other scopes as children, put them directly in the @@ -435,6 +440,9 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, getOrCreateSourceID(IA->getFilename(), IA->getDirectory())); addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine()); + if (IA->getDiscriminator()) + addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None, + IA->getDiscriminator()); // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_inlined_subprogram nodes. @@ -517,8 +525,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, unsigned FrameReg = 0; const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - assert(Expr != DV.getExpression().end() && - "Wrong number of expressions"); + assert(Expr != DV.getExpression().end() && "Wrong number of expressions"); DwarfExpr.AddMachineRegIndirect(FrameReg, Offset); DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end()); ++Expr; @@ -597,8 +604,8 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, return ObjectPointer; } -void -DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { +void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( + LexicalScope *Scope) { DIE *&AbsDef = DU->getAbstractSPDies()[Scope->getScopeNode()]; if (AbsDef) return; diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 509c9432bcbf..2e2846790cc1 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -39,6 +39,12 @@ class DwarfCompileUnit : public DwarfUnit { /// The start of the unit within its section. MCSymbol *LabelBegin; + typedef llvm::SmallVector ImportedEntityList; + typedef llvm::DenseMap + ImportedEntityMap; + + ImportedEntityMap ImportedEntities; + /// GlobalNames - A map of globally visible named entities for this unit. StringMap GlobalNames; @@ -98,6 +104,10 @@ public: unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override; + void addImportedEntity(const DIImportedEntity* IE) { + ImportedEntities[IE->getScope()].push_back(IE); + } + /// addRange - Add an address range to the list of ranges for this unit. void addRange(RangeSpan Range); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 7d03a3930d7d..3466f3469f1c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/ValueHandle.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" @@ -104,6 +105,14 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, clEnumVal(Disable, "Disabled"), clEnumValEnd), cl::init(Default)); +static cl::opt +DwarfLinkageNames("dwarf-linkage-names", cl::Hidden, + cl::desc("Emit DWARF linkage-name attributes."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + static const char *const DWARFGroupName = "DWARF Emission"; static const char *const DbgTimerName = "DWARF Debug Writer"; @@ -176,9 +185,9 @@ const DIType *DbgVariable::getType() const { if (tag == dwarf::DW_TAG_pointer_type) subType = resolve(cast(Ty)->getBaseType()); - auto Elements = cast(subType)->getElements(); + auto Elements = cast(subType)->getElements(); for (unsigned i = 0, N = Elements.size(); i < N; ++i) { - auto *DT = cast(Elements[i]); + auto *DT = cast(Elements[i]); if (getName() == DT->getName()) return resolve(DT->getBaseType()); } @@ -194,45 +203,67 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = { DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : Asm(A), MMI(Asm->MMI), DebugLocs(A->OutStreamer->isVerboseAsm()), PrevLabel(nullptr), InfoHolder(A, "info_string", DIEValueAllocator), - UsedNonDefaultText(false), SkeletonHolder(A, "skel_string", DIEValueAllocator), IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()), - IsPS4(Triple(A->getTargetTriple()).isPS4()), AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)), AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)), AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)), - AccelTypes(TypeAtoms) { + AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) { CurFn = nullptr; CurMI = nullptr; + Triple TT(Asm->getTargetTriple()); - // Turn on accelerator tables for Darwin by default, pubnames by - // default for non-Darwin/PS4, and handle split dwarf. + // Make sure we know our "debugger tuning." The target option takes + // precedence; fall back to triple-based defaults. + if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default) + DebuggerTuning = Asm->TM.Options.DebuggerTuning; + else if (IsDarwin || TT.isOSFreeBSD()) + DebuggerTuning = DebuggerKind::LLDB; + else if (TT.isPS4CPU()) + DebuggerTuning = DebuggerKind::SCE; + else + DebuggerTuning = DebuggerKind::GDB; + + // Turn on accelerator tables for LLDB by default. if (DwarfAccelTables == Default) - HasDwarfAccelTables = IsDarwin; + HasDwarfAccelTables = tuneForLLDB(); else HasDwarfAccelTables = DwarfAccelTables == Enable; + // Handle split DWARF. Off by default for now. if (SplitDwarf == Default) HasSplitDwarf = false; else HasSplitDwarf = SplitDwarf == Enable; + // Pubnames/pubtypes on by default for GDB. if (DwarfPubSections == Default) - HasDwarfPubSections = !IsDarwin && !IsPS4; + HasDwarfPubSections = tuneForGDB(); else HasDwarfPubSections = DwarfPubSections == Enable; + // SCE does not use linkage names. + if (DwarfLinkageNames == Default) + UseLinkageNames = !tuneForSCE(); + else + UseLinkageNames = DwarfLinkageNames == Enable; + unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion; DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber : MMI->getModule()->getDwarfVersion(); + // Use dwarf 4 by default if nothing is requested. + DwarfVersion = DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION; - // Darwin and PS4 use the standard TLS opcode (defined in DWARF 3). - // Everybody else uses GNU's. - UseGNUTLSOpcode = !(IsDarwin || IsPS4) || DwarfVersion < 3; + // Work around a GDB bug. GDB doesn't support the standard opcode; + // SCE doesn't support GNU's; LLDB prefers the standard opcode, which + // is defined as of DWARF 3. + // See GDB bug 11616 - DW_OP_form_tls_address is unimplemented + // https://sourceware.org/bugzilla/show_bug.cgi?id=11616 + UseGNUTLSOpcode = tuneForGDB() || DwarfVersion < 3; Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion); @@ -300,18 +331,6 @@ void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) { } } -/// isSubprogramContext - Return true if Context is either a subprogram -/// or another context nested inside a subprogram. -bool DwarfDebug::isSubprogramContext(const MDNode *Context) { - if (!Context) - return false; - if (isa(Context)) - return true; - if (auto *T = dyn_cast(Context)) - return isSubprogramContext(resolve(T->getScope())); - return false; -} - /// Check whether we should create a DIE for the given Scope, return true /// if we don't create a DIE (the corresponding DIE is null). bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { @@ -416,6 +435,16 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) { else NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection()); + if (DIUnit->getDWOId()) { + // This CU is either a clang module DWO or a skeleton CU. + NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, + DIUnit->getDWOId()); + if (!DIUnit->getSplitDebugFilename().empty()) + // This is a prefabricated skeleton CU. + NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name, + DIUnit->getSplitDebugFilename()); + } + CUMap.insert(std::make_pair(DIUnit, &NewCU)); CUDieMap.insert(std::make_pair(&Die, &NewCU)); return NewCU; @@ -436,8 +465,6 @@ void DwarfDebug::beginModule() { const Module *M = MMI->getModule(); - FunctionDIs = makeSubprogramMap(*M); - NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; @@ -449,12 +476,7 @@ void DwarfDebug::beginModule() { auto *CUNode = cast(N); DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode); for (auto *IE : CUNode->getImportedEntities()) - ScopesWithImportedEntities.push_back(std::make_pair(IE->getScope(), IE)); - // Stable sort to preserve the order of appearance of imported entities. - // This is to avoid out-of-order processing of interdependent declarations - // within the same scope, e.g. { namespace A = base; namespace B = A; } - std::stable_sort(ScopesWithImportedEntities.begin(), - ScopesWithImportedEntities.end(), less_first()); + CU.addImportedEntity(IE); for (auto *GV : CUNode->getGlobalVariables()) CU.getOrCreateGlobalVariableDIE(GV); for (auto *SP : CUNode->getSubprograms()) @@ -467,7 +489,10 @@ void DwarfDebug::beginModule() { for (auto *Ty : CUNode->getRetainedTypes()) { // The retained types array by design contains pointers to // MDNodes rather than DIRefs. Unique them here. - CU.getOrCreateTypeDIE(cast(resolve(Ty->getRef()))); + DIType *RT = cast(resolve(Ty->getRef())); + if (!RT->isExternalTypeRef()) + // There is no point in force-emitting a forward declaration. + CU.getOrCreateTypeDIE(RT); } // Emit imported_modules last so that the relevant context is already // available. @@ -1061,12 +1086,8 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { for (const auto &MBB : *MF) for (const auto &MI : MBB) if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && - MI.getDebugLoc()) { - // Did the target forget to set the FrameSetup flag for CFI insns? - assert(!MI.isCFIInstruction() && - "First non-frame-setup instruction is a CFI instruction."); + MI.getDebugLoc()) return MI.getDebugLoc(); - } return DebugLoc(); } @@ -1079,8 +1100,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return; - auto DI = FunctionDIs.find(MF->getFunction()); - if (DI == FunctionDIs.end()) + auto DI = MF->getFunction()->getSubprogram(); + if (!DI) return; // Grab the lexical scopes for the function, if we don't have any of those @@ -1127,7 +1148,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // The first mention of a function argument gets the CurrentFnBegin // label, so arguments are visible when breaking at function entry. const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable(); - if (DIVar->getTag() == dwarf::DW_TAG_arg_variable && + if (DIVar->isParameter() && getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) { LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin(); if (Ranges.front().first->getDebugExpression()->isBitPiece()) { @@ -1171,7 +1192,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { "endFunction should be called with the same function as beginFunction"); if (!MMI->hasDebugInfo() || LScopes.empty() || - !FunctionDIs.count(MF->getFunction())) { + !MF->getFunction()->getSubprogram()) { // If we don't have a lexical scope for this function then there will // be a hole in the range information. Keep note of this by setting the // previously used section to nullptr. @@ -1863,7 +1884,7 @@ void DwarfDebug::emitDebugLineDWO() { assert(useSplitDwarf() && "No split dwarf?"); Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfLineDWOSection()); - SplitTypeUnitFileTable.Emit(*Asm->OutStreamer); + SplitTypeUnitFileTable.Emit(*Asm->OutStreamer, MCDwarfLineTableParams()); } // Emit the .debug_str.dwo section for separated dwarf. This contains the @@ -1884,7 +1905,7 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) { return &SplitTypeUnitFileTable; } -static uint64_t makeTypeSignature(StringRef Identifier) { +uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) { MD5 Hash; Hash.update(Identifier); // ... take the least significant 8 bytes and return those. Our MD5 diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 01f34c6eb81c..4c613a905450 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -33,6 +33,7 @@ #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/Allocator.h" +#include "llvm/Target/TargetOptions.h" #include namespace llvm { @@ -48,24 +49,6 @@ class DwarfTypeUnit; class DwarfUnit; class MachineModuleInfo; -//===----------------------------------------------------------------------===// -/// This class is used to record source line correspondence. -class SrcLineInfo { - unsigned Line; // Source line number. - unsigned Column; // Source column. - unsigned SourceID; // Source ID number. - MCSymbol *Label; // Label in code ID number. -public: - SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label) - : Line(L), Column(C), SourceID(S), Label(label) {} - - // Accessors - unsigned getLine() const { return Line; } - unsigned getColumn() const { return Column; } - unsigned getSourceID() const { return SourceID; } - MCSymbol *getLabel() const { return Label; } -}; - //===----------------------------------------------------------------------===// /// This class is used to track local variable information. /// @@ -127,14 +110,14 @@ public: // Accessors. const DILocalVariable *getVariable() const { return Var; } const DILocation *getInlinedAt() const { return IA; } - const ArrayRef getExpression() const { return Expr; } + ArrayRef getExpression() const { return Expr; } void setDIE(DIE &D) { TheDIE = &D; } DIE *getDIE() const { return TheDIE; } void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; } unsigned getDebugLocListIndex() const { return DebugLocListIndex; } StringRef getName() const { return Var->getName(); } const MachineInstr *getMInsn() const { return MInsn; } - const ArrayRef getFrameIndex() const { return FrameIndex; } + ArrayRef getFrameIndex() const { return FrameIndex; } void addMMIEntry(const DbgVariable &V) { assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry"); @@ -156,7 +139,8 @@ public: // Translate tag to proper Dwarf tag. dwarf::Tag getTag() const { - if (Var->getTag() == dwarf::DW_TAG_arg_variable) + // FIXME: Why don't we just infer this tag and store it all along? + if (Var->isParameter()) return dwarf::DW_TAG_formal_parameter; return dwarf::DW_TAG_variable; @@ -282,11 +266,6 @@ class DwarfDebug : public AsmPrinterHandler { /// Holders for the various debug information flags that we might need to /// have exposed. See accessor functions below for description. - /// Holder for imported entities. - typedef SmallVector, 32> - ImportedEntityMap; - ImportedEntityMap ScopesWithImportedEntities; - /// Map from MDNodes for user-defined types to the type units that /// describe them. DenseMap DwarfTypeUnits; @@ -298,16 +277,12 @@ class DwarfDebug : public AsmPrinterHandler { /// Whether to emit the pubnames/pubtypes sections. bool HasDwarfPubSections; - /// Whether or not to use AT_ranges for compilation units. - bool HasCURanges; - - /// Whether we emitted a function into a section other than the - /// default text. - bool UsedNonDefaultText; - /// Whether to use the GNU TLS opcode (instead of the standard opcode). bool UseGNUTLSOpcode; + /// Whether to emit DW_AT_[MIPS_]linkage_name. + bool UseLinkageNames; + /// Version of dwarf we're emitting. unsigned DwarfVersion; @@ -338,7 +313,6 @@ class DwarfDebug : public AsmPrinterHandler { /// True iff there are multiple CUs in this module. bool SingleCU; bool IsDarwin; - bool IsPS4; AddressPool AddrPool; @@ -347,7 +321,8 @@ class DwarfDebug : public AsmPrinterHandler { DwarfAccelTable AccelNamespace; DwarfAccelTable AccelTypes; - DenseMap FunctionDIs; + // Identify a debugger for "tuning" the debug info. + DebuggerKind DebuggerTuning; MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); @@ -372,12 +347,6 @@ class DwarfDebug : public AsmPrinterHandler { /// Construct a DIE for this abstract scope. void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); - /// Compute the size and offset of a DIE given an incoming Offset. - unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); - - /// Compute the size and offset of all the DIEs. - void computeSizeAndOffsets(); - /// Collect info for variables that were optimized out. void collectDeadVariables(); @@ -443,9 +412,6 @@ class DwarfDebug : public AsmPrinterHandler { /// Emit visible names into a debug ranges section. void emitDebugRanges(); - /// Emit inline info using custom format. - void emitDebugInlineInfo(); - /// DWARF 5 Experimental Split Dwarf Emitters /// Initialize common features of skeleton units. @@ -456,10 +422,6 @@ class DwarfDebug : public AsmPrinterHandler { /// section. DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU); - /// Construct the split debug info compile unit for the debug info - /// section. - DwarfTypeUnit &constructSkeletonTU(DwarfTypeUnit &TU); - /// Emit the debug info dwo section. void emitDebugInfoDWO(); @@ -544,6 +506,9 @@ public: /// Process end of an instruction. void endInstruction() override; + /// Perform an MD5 checksum of \p Identifier and return the lower 64 bits. + static uint64_t makeTypeSignature(StringRef Identifier); + /// Add a DIE to the set of types that we're going to pull into /// type units. void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, @@ -558,10 +523,22 @@ public: SymSize[Sym] = Size; } + /// Returns whether to emit DW_AT_[MIPS_]linkage_name. + bool useLinkageNames() const { return UseLinkageNames; } + /// Returns whether to use DW_OP_GNU_push_tls_address, instead of the /// standard DW_OP_form_tls_address opcode bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; } + /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger. + /// + /// Returns whether we are "tuning" for a given debugger. + /// @{ + bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; } + bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; } + bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; } + /// @} + // Experimental DWARF5 features. /// Returns whether or not to emit tables that dwarf consumers can @@ -604,9 +581,6 @@ public: DwarfCompileUnit *lookupUnit(const DIE *CU) const { return CUDieMap.lookup(CU); } - /// isSubprogramContext - Return true if Context is either a subprogram - /// or another context nested inside a subprogram. - bool isSubprogramContext(const MDNode *Context); void addSubprogramNames(const DISubprogram *SP, DIE &Die); @@ -622,14 +596,6 @@ public: const MachineFunction *getCurrentFunction() const { return CurFn; } - iterator_range - findImportedEntitiesForScope(const MDNode *Scope) const { - return make_range(std::equal_range( - ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(), - std::pair(Scope, nullptr), - less_first())); - } - /// A helper function to check whether the DIE for a given Scope is /// going to be null. bool isLexicalScopeDIENull(LexicalScope *Scope); diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index a2799b8d6300..7b5b831da166 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -211,12 +211,15 @@ bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr, return AddMachineRegPiece(MachineReg, SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); } - case dwarf::DW_OP_plus: { - // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset]. + case dwarf::DW_OP_plus: + case dwarf::DW_OP_minus: { + // [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset]. + // [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset]. auto N = I.getNext(); if (N != E && N->getOp() == dwarf::DW_OP_deref) { unsigned Offset = I->getArg(0); - ValidReg = AddMachineRegIndirect(MachineReg, Offset); + ValidReg = AddMachineRegIndirect( + MachineReg, I->getOp() == dwarf::DW_OP_plus ? Offset : -Offset); std::advance(I, 2); break; } else @@ -255,6 +258,12 @@ void DwarfExpression::AddExpression(DIExpression::expr_op_iterator I, EmitOp(dwarf::DW_OP_plus_uconst); EmitUnsigned(I->getArg(0)); break; + case dwarf::DW_OP_minus: + // There is no OP_minus_uconst. + EmitOp(dwarf::DW_OP_constu); + EmitUnsigned(I->getArg(0)); + EmitOp(dwarf::DW_OP_minus); + break; case dwarf::DW_OP_deref: EmitOp(dwarf::DW_OP_deref); break; diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 355582298e5e..d75fea5d8c8a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -192,18 +192,19 @@ void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) { DIEInteger(1)); } -void DwarfUnit::addUInt(DIE &Die, dwarf::Attribute Attribute, +void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute, Optional Form, uint64_t Integer) { if (!Form) Form = DIEInteger::BestForm(false, Integer); Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer)); } -void DwarfUnit::addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer) { +void DwarfUnit::addUInt(DIEValueList &Block, dwarf::Form Form, + uint64_t Integer) { addUInt(Block, (dwarf::Attribute)0, Form, Integer); } -void DwarfUnit::addSInt(DIE &Die, dwarf::Attribute Attribute, +void DwarfUnit::addSInt(DIEValueList &Die, dwarf::Attribute Attribute, Optional Form, int64_t Integer) { if (!Form) Form = DIEInteger::BestForm(true, Integer); @@ -222,9 +223,10 @@ void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute, DIEString(DU->getStringPool().getEntry(*Asm, String))); } -DIE::value_iterator DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute, - dwarf::Form Form, - const MCSymbol *Label) { +DIEValueList::value_iterator DwarfUnit::addLabel(DIEValueList &Die, + dwarf::Attribute Attribute, + dwarf::Form Form, + const MCSymbol *Label) { return Die.addValue(DIEValueAllocator, Attribute, Form, DIELabel(Label)); } @@ -277,6 +279,13 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) { dwarf::DW_FORM_ref_sig8, DIETypeSignature(Type)); } +void DwarfUnit::addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute, + StringRef Identifier) { + uint64_t Signature = DD->makeTypeSignature(Identifier); + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_ref_sig8, + DIEInteger(Signature)); +} + void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry) { const DIE *DieCU = Die.getUnitOrNull(); @@ -292,8 +301,6 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, } DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) { - assert(Tag != dwarf::DW_TAG_auto_variable && - Tag != dwarf::DW_TAG_arg_variable); DIE &Die = Parent.addChild(DIE::get(DIEValueAllocator, (dwarf::Tag)Tag)); if (N) insertDIE(N, &Die); @@ -445,7 +452,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // Find the __forwarding field and the variable field in the __Block_byref // struct. - DINodeArray Fields = cast(TmpTy)->getElements(); + DINodeArray Fields = cast(TmpTy)->getElements(); const DIDerivedType *varField = nullptr; const DIDerivedType *forwardingField = nullptr; @@ -506,34 +513,35 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, /// Return true if type encoding is unsigned. static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) { - if (auto *DTy = dyn_cast(Ty)) { + if (auto *CTy = dyn_cast(Ty)) { + // FIXME: Enums without a fixed underlying type have unknown signedness + // here, leading to incorrectly emitted constants. + if (CTy->getTag() == dwarf::DW_TAG_enumeration_type) + return false; + + // (Pieces of) aggregate types that get hacked apart by SROA may be + // represented by a constant. Encode them as unsigned bytes. + return true; + } + + if (auto *DTy = dyn_cast(Ty)) { dwarf::Tag T = (dwarf::Tag)Ty->getTag(); // Encode pointer constants as unsigned bytes. This is used at least for // null pointer constant emission. - // (Pieces of) aggregate types that get hacked apart by SROA may also be - // represented by a constant. Encode them as unsigned bytes. // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed // here, but accept them for now due to a bug in SROA producing bogus // dbg.values. - if (T == dwarf::DW_TAG_array_type || - T == dwarf::DW_TAG_class_type || - T == dwarf::DW_TAG_pointer_type || + if (T == dwarf::DW_TAG_pointer_type || T == dwarf::DW_TAG_ptr_to_member_type || T == dwarf::DW_TAG_reference_type || - T == dwarf::DW_TAG_rvalue_reference_type || - T == dwarf::DW_TAG_structure_type || - T == dwarf::DW_TAG_union_type) + T == dwarf::DW_TAG_rvalue_reference_type) return true; assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || T == dwarf::DW_TAG_volatile_type || - T == dwarf::DW_TAG_restrict_type || - T == dwarf::DW_TAG_enumeration_type); - if (DITypeRef Deriv = DTy->getBaseType()) - return isUnsignedDIType(DD, DD->resolve(Deriv)); - // FIXME: Enums without a fixed underlying type have unknown signedness - // here, leading to incorrectly emitted constants. - assert(DTy->getTag() == dwarf::DW_TAG_enumeration_type); - return false; + T == dwarf::DW_TAG_restrict_type); + DITypeRef Deriv = DTy->getBaseType(); + assert(Deriv && "Expected valid base type"); + return isUnsignedDIType(DD, DD->resolve(Deriv)); } auto *BTy = cast(Ty); @@ -659,7 +667,7 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) { } void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) { - if (!LinkageName.empty()) + if (!LinkageName.empty() && DD->useLinkageNames()) addString(Die, DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name : dwarf::DW_AT_MIPS_linkage_name, @@ -685,6 +693,8 @@ DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) { return getOrCreateNameSpace(NS); if (auto *SP = dyn_cast(Context)) return getOrCreateSubprogramDIE(SP); + if (auto *M = dyn_cast(Context)) + return getOrCreateModule(M); return getDIE(Context); } @@ -700,7 +710,8 @@ DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) { constructTypeDIE(TyDIE, cast(Ty)); - updateAcceleratorTables(Context, Ty, TyDIE); + if (!Ty->isExternalTypeRef()) + updateAcceleratorTables(Context, Ty, TyDIE); return &TyDIE; } @@ -753,7 +764,7 @@ void DwarfUnit::updateAcceleratorTables(const DIScope *Context, const DIType *Ty, const DIE &TyDIE) { if (!Ty->getName().empty() && !Ty->isForwardDecl()) { bool IsImplementation = 0; - if (auto *CT = dyn_cast(Ty)) { + if (auto *CT = dyn_cast(Ty)) { // A runtime language of 0 actually means C/C++ and that any // non-negative value is some version of Objective-C/C++. IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete(); @@ -795,8 +806,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const { // Reverse iterate over our list to go from the outermost construct to the // innermost. - for (auto I = Parents.rbegin(), E = Parents.rend(); I != E; ++I) { - const DIScope *Ctx = *I; + for (const DIScope *Ctx : make_range(Parents.rbegin(), Parents.rend())) { StringRef Name = Ctx->getName(); if (Name.empty() && isa(Ctx)) Name = "(anonymous namespace)"; @@ -843,7 +853,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { // Add size if non-zero (derived types might be zero-sized.) if (Size && Tag != dwarf::DW_TAG_pointer_type - && Tag != dwarf::DW_TAG_ptr_to_member_type) + && Tag != dwarf::DW_TAG_ptr_to_member_type + && Tag != dwarf::DW_TAG_reference_type + && Tag != dwarf::DW_TAG_rvalue_reference_type) addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); if (Tag == dwarf::DW_TAG_ptr_to_member_type) @@ -899,6 +911,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) { } void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { + if (CTy->isExternalTypeRef()) { + StringRef Identifier = CTy->getIdentifier(); + assert(!Identifier.empty() && "external type ref without identifier"); + addFlag(Buffer, dwarf::DW_AT_declaration); + return addDIETypeSignature(Buffer, dwarf::DW_AT_signature, Identifier); + } + // Add name if not anonymous or intermediate type. StringRef Name = CTy->getName(); @@ -1134,6 +1153,14 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, "definition DIE was created in " "getOrCreateSubprogramDIE"); DeclLinkageName = SPDecl->getLinkageName(); + unsigned DeclID = + getOrCreateSourceID(SPDecl->getFilename(), SPDecl->getDirectory()); + unsigned DefID = getOrCreateSourceID(SP->getFilename(), SP->getDirectory()); + if (DeclID != DefID) + addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID); + + if (SP->getLine() != SPDecl->getLine()) + addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine()); } // Add function template parameters. @@ -1180,11 +1207,10 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, Language == dwarf::DW_LANG_ObjC)) addFlag(SPDie, dwarf::DW_AT_prototyped); - const DISubroutineType *SPTy = SP->getType(); - assert(SPTy->getTag() == dwarf::DW_TAG_subroutine_type && - "the type of a subprogram should be a subroutine"); + DITypeRefArray Args; + if (const DISubroutineType *SPTy = SP->getType()) + Args = SPTy->getTypeArray(); - auto Args = SPTy->getTypeArray(); // Add a return type. If this is a type like a C/C++ void type we don't add a // return type. if (Args.size()) diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index 44d9d2245dda..82760bf21839 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -113,13 +113,6 @@ protected: DwarfUnit(unsigned UID, dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU); - /// Add a string attribute data and value. - /// - /// This is guaranteed to be in the local string pool instead of indirected. - void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); - - void addIndexedString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); - bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie); public: @@ -162,9 +155,6 @@ public: virtual void addGlobalType(const DIType *Ty, const DIE &Die, const DIScope *Context) {} - /// Add a new name to the namespace accelerator table. - void addAccelNamespace(StringRef Name, const DIE &Die); - /// Returns the DIE map slot for the specified debug variable. /// /// We delegate the request to DwarfDebug when the MDNode can be part of the @@ -186,14 +176,14 @@ public: void addFlag(DIE &Die, dwarf::Attribute Attribute); /// Add an unsigned integer attribute data and value. - void addUInt(DIE &Die, dwarf::Attribute Attribute, Optional Form, - uint64_t Integer); + void addUInt(DIEValueList &Die, dwarf::Attribute Attribute, + Optional Form, uint64_t Integer); - void addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer); + void addUInt(DIEValueList &Block, dwarf::Form Form, uint64_t Integer); /// Add an signed integer attribute data and value. - void addSInt(DIE &Die, dwarf::Attribute Attribute, Optional Form, - int64_t Integer); + void addSInt(DIEValueList &Die, dwarf::Attribute Attribute, + Optional Form, int64_t Integer); void addSInt(DIELoc &Die, Optional Form, int64_t Integer); @@ -206,8 +196,10 @@ public: void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); /// Add a Dwarf label attribute data and value. - DIE::value_iterator addLabel(DIE &Die, dwarf::Attribute Attribute, - dwarf::Form Form, const MCSymbol *Label); + DIEValueList::value_iterator addLabel(DIEValueList &Die, + dwarf::Attribute Attribute, + dwarf::Form Form, + const MCSymbol *Label); void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label); @@ -228,7 +220,11 @@ public: /// Add a DIE attribute data and value. void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry); + /// Add a type's DW_AT_signature and set the declaration flag. void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type); + /// Add an attribute containing the type signature for a unique identifier. + void addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute, + StringRef Identifier); /// Add block data. void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block); diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 49ef8d3ddc8f..e24dcb1bffd4 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -662,9 +662,8 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { Entry = TypeInfos.size(); } - for (std::vector::const_reverse_iterator - I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { - const GlobalValue *GV = *I; + for (const GlobalValue *GV : make_range(TypeInfos.rbegin(), + TypeInfos.rend())) { if (VerboseAsm) Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--)); Asm->EmitTTypeReference(GV, TTypeEncoding); diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h index e42e082acbf9..c6a0e9d0524c 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -76,10 +76,6 @@ protected: SmallVectorImpl &Actions, SmallVectorImpl &FirstActions); - /// Return `true' if this is a call to a function marked `nounwind'. Return - /// `false' otherwise. - bool callToNoUnwindFunction(const MachineInstr *MI); - void computePadMap(const SmallVectorImpl &LandingPads, RangeMapType &PadMap); @@ -131,6 +127,10 @@ public: void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} void beginInstruction(const MachineInstr *MI) override {} void endInstruction() override {} + + /// Return `true' if this is a call to a function marked `nounwind'. Return + /// `false' otherwise. + static bool callToNoUnwindFunction(const MachineInstr *MI); }; } diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index eb9e4c10daf4..6a023b998b32 100644 --- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -48,7 +48,7 @@ void llvm::linkErlangGCPrinter() {} void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) { MCStreamer &OS = *AP.OutStreamer; - unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); + unsigned IntPtrSize = M.getDataLayout().getPointerSize(); // Put this in a custom .note section. OS.SwitchSection( diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 2ceec61ab5ca..c09ef6adea69 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -93,7 +93,7 @@ void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info, /// void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) { - unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); + unsigned IntPtrSize = M.getDataLayout().getPointerSize(); AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection()); EmitCamlGlobal(M, AP, "code_end"); diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp index 6610ac78f8c4..c2c0f84e5c92 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -27,15 +27,15 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) { auto *Scope = cast(S); StringRef Dir = Scope->getDirectory(), Filename = Scope->getFilename(); - char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)]; - if (Result) - return Result; + std::string &Filepath = + DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)]; + if (!Filepath.empty()) + return Filepath; // Clang emits directory and relative filename info into the IR, but CodeView // operates on full paths. We could change Clang to emit full paths too, but // that would increase the IR size and probably not needed for other users. // For now, just concatenate and canonicalize the path here. - std::string Filepath; if (Filename.find(':') == 1) Filepath = Filename; else @@ -74,8 +74,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) { while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos) Filepath.erase(Cursor, 1); - Result = strdup(Filepath.c_str()); - return StringRef(Result); + return Filepath; } void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL, @@ -253,7 +252,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { } FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd; - // Emit a line table subsection, requred to do PC-to-file:line lookup. + // Emit a line table subsection, required to do PC-to-file:line lookup. Asm->OutStreamer->AddComment("Line table subsection for " + Twine(FuncName)); Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION); MCSymbol *LineTableBegin = Asm->MMI->getContext().createTempSymbol(), diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h index 43d1a432712e..78068e07c16f 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h @@ -98,7 +98,7 @@ class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler { } } FileNameRegistry; - typedef std::map, char *> + typedef std::map, std::string> DirAndFilenameToFilepathMapTy; DirAndFilenameToFilepathMapTy DirAndFilenameToFilepathMap; StringRef getFullFilepath(const MDNode *S); @@ -116,14 +116,6 @@ class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler { public: WinCodeViewLineTables(AsmPrinter *Asm); - ~WinCodeViewLineTables() override { - for (DirAndFilenameToFilepathMapTy::iterator - I = DirAndFilenameToFilepathMap.begin(), - E = DirAndFilenameToFilepathMap.end(); - I != E; ++I) - free(I->second); - } - void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {} /// \brief Emit the COFF section that holds the line table information. diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index a2b9316aa875..48b7104f24c3 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -30,6 +30,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCWin64EH.h" +#include "llvm/Support/COFF.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" @@ -37,6 +38,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; WinException::WinException(AsmPrinter *A) : EHStreamer(A) { @@ -62,9 +64,9 @@ void WinException::beginFunction(const MachineFunction *MF) { // If any landing pads survive, we need an EH table. bool hasLandingPads = !MMI->getLandingPads().empty(); + bool hasEHFunclets = MMI->hasEHFunclets(); const Function *F = MF->getFunction(); - const Function *ParentF = MMI->getWinEHParent(F); shouldEmitMoves = Asm->needsSEHMoves(); @@ -78,49 +80,23 @@ void WinException::beginFunction(const MachineFunction *MF) { F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && F->needsUnwindTableEntry(); - shouldEmitPersonality = forceEmitPersonality || (hasLandingPads && - PerEncoding != dwarf::DW_EH_PE_omit && Per); + shouldEmitPersonality = + forceEmitPersonality || ((hasLandingPads || hasEHFunclets) && + PerEncoding != dwarf::DW_EH_PE_omit && Per); unsigned LSDAEncoding = TLOF.getLSDAEncoding(); shouldEmitLSDA = shouldEmitPersonality && LSDAEncoding != dwarf::DW_EH_PE_omit; - // If we're not using CFI, we don't want the CFI or the personality. If - // WinEHPrepare outlined something, we should emit the LSDA. + // If we're not using CFI, we don't want the CFI or the personality, but we + // might want EH tables if we had EH pads. if (!Asm->MAI->usesWindowsCFI()) { - bool HasOutlinedChildren = - F->hasFnAttribute("wineh-parent") && F == ParentF; - shouldEmitLSDA = HasOutlinedChildren; + shouldEmitLSDA = hasEHFunclets; shouldEmitPersonality = false; return; } - // If this was an outlined handler, we need to define the label corresponding - // to the offset of the parent frame relative to the stack pointer after the - // prologue. - if (F != ParentF) { - WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF); - auto I = FuncInfo.CatchHandlerParentFrameObjOffset.find(F); - if (I != FuncInfo.CatchHandlerParentFrameObjOffset.end()) { - MCSymbol *HandlerTypeParentFrameOffset = - Asm->OutContext.getOrCreateParentFrameOffsetSymbol( - GlobalValue::getRealLinkageName(F->getName())); - - // Emit a symbol assignment. - Asm->OutStreamer->EmitAssignment( - HandlerTypeParentFrameOffset, - MCConstantExpr::create(I->second, Asm->OutContext)); - } - } - - if (shouldEmitMoves || shouldEmitPersonality) - Asm->OutStreamer->EmitWinCFIStartProc(Asm->CurrentFnSym); - - if (shouldEmitPersonality) { - const MCSymbol *PersHandlerSym = - TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); - Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true); - } + beginFunclet(MF->front(), Asm->CurrentFnSym); } /// endFunction - Gather and emit post-function exception information. @@ -134,43 +110,158 @@ void WinException::endFunction(const MachineFunction *MF) { if (F->hasPersonalityFn()) Per = classifyEHPersonality(F->getPersonalityFn()); - // Get rid of any dead landing pads if we're not using a Windows EH scheme. In - // Windows EH schemes, the landing pad is not actually reachable. It only - // exists so that we can emit the right table data. - if (!isMSVCEHPersonality(Per)) + // Get rid of any dead landing pads if we're not using funclets. In funclet + // schemes, the landing pad is not actually reachable. It only exists so + // that we can emit the right table data. + if (!isFuncletEHPersonality(Per)) MMI->TidyLandingPads(); + endFunclet(); + + // endFunclet will emit the necessary .xdata tables for x64 SEH. + if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets()) + return; + if (shouldEmitPersonality || shouldEmitLSDA) { Asm->OutStreamer->PushSection(); - if (shouldEmitMoves || shouldEmitPersonality) { - // Emit an UNWIND_INFO struct describing the prologue. - Asm->OutStreamer->EmitWinEHHandlerData(); - } else { - // Just switch sections to the right xdata section. This use of - // CurrentFnSym assumes that we only emit the LSDA when ending the parent - // function. - MCSection *XData = WinEH::UnwindEmitter::getXDataSection( - Asm->CurrentFnSym, Asm->OutContext); - Asm->OutStreamer->SwitchSection(XData); - } + // Just switch sections to the right xdata section. This use of CurrentFnSym + // assumes that we only emit the LSDA when ending the parent function. + MCSection *XData = WinEH::UnwindEmitter::getXDataSection(Asm->CurrentFnSym, + Asm->OutContext); + Asm->OutStreamer->SwitchSection(XData); // Emit the tables appropriate to the personality function in use. If we // don't recognize the personality, assume it uses an Itanium-style LSDA. if (Per == EHPersonality::MSVC_Win64SEH) - emitCSpecificHandlerTable(); + emitCSpecificHandlerTable(MF); else if (Per == EHPersonality::MSVC_X86SEH) emitExceptHandlerTable(MF); else if (Per == EHPersonality::MSVC_CXX) emitCXXFrameHandler3Table(MF); + else if (Per == EHPersonality::CoreCLR) + emitCLRExceptionTable(MF); else emitExceptionTable(); Asm->OutStreamer->PopSection(); } +} +/// Retreive the MCSymbol for a GlobalValue or MachineBasicBlock. +static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm, + const MachineBasicBlock *MBB) { + if (!MBB) + return nullptr; + + assert(MBB->isEHFuncletEntry()); + + // Give catches and cleanups a name based off of their parent function and + // their funclet entry block's number. + const MachineFunction *MF = MBB->getParent(); + const Function *F = MF->getFunction(); + StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + MCContext &Ctx = MF->getContext(); + StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch"; + return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" + + Twine(MBB->getNumber()) + "@?0?" + + FuncLinkageName + "@4HA"); +} + +void WinException::beginFunclet(const MachineBasicBlock &MBB, + MCSymbol *Sym) { + CurrentFuncletEntry = &MBB; + + const Function *F = Asm->MF->getFunction(); + // If a symbol was not provided for the funclet, invent one. + if (!Sym) { + Sym = getMCSymbolForMBB(Asm, &MBB); + + // Describe our funclet symbol as a function with internal linkage. + Asm->OutStreamer->BeginCOFFSymbolDef(Sym); + Asm->OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); + Asm->OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); + Asm->OutStreamer->EndCOFFSymbolDef(); + + // We want our funclet's entry point to be aligned such that no nops will be + // present after the label. + Asm->EmitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()), + F); + + // Now that we've emitted the alignment directive, point at our funclet. + Asm->OutStreamer->EmitLabel(Sym); + } + + // Mark 'Sym' as starting our funclet. if (shouldEmitMoves || shouldEmitPersonality) + Asm->OutStreamer->EmitWinCFIStartProc(Sym); + + if (shouldEmitPersonality) { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + const Function *PerFn = nullptr; + + // Determine which personality routine we are using for this funclet. + if (F->hasPersonalityFn()) + PerFn = dyn_cast(F->getPersonalityFn()->stripPointerCasts()); + const MCSymbol *PersHandlerSym = + TLOF.getCFIPersonalitySymbol(PerFn, *Asm->Mang, Asm->TM, MMI); + + // Classify the personality routine so that we may reason about it. + EHPersonality Per = EHPersonality::Unknown; + if (F->hasPersonalityFn()) + Per = classifyEHPersonality(F->getPersonalityFn()); + + // Do not emit a .seh_handler directive if it is a C++ cleanup funclet. + if (Per != EHPersonality::MSVC_CXX || + !CurrentFuncletEntry->isCleanupFuncletEntry()) + Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true); + } +} + +void WinException::endFunclet() { + // No funclet to process? Great, we have nothing to do. + if (!CurrentFuncletEntry) + return; + + if (shouldEmitMoves || shouldEmitPersonality) { + const Function *F = Asm->MF->getFunction(); + EHPersonality Per = EHPersonality::Unknown; + if (F->hasPersonalityFn()) + Per = classifyEHPersonality(F->getPersonalityFn()); + + // The .seh_handlerdata directive implicitly switches section, push the + // current section so that we may return to it. + Asm->OutStreamer->PushSection(); + + // Emit an UNWIND_INFO struct describing the prologue. + Asm->OutStreamer->EmitWinEHHandlerData(); + + if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality && + !CurrentFuncletEntry->isCleanupFuncletEntry()) { + // If this is a C++ catch funclet (or the parent function), + // emit a reference to the LSDA for the parent function. + StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol( + Twine("$cppxdata$", FuncLinkageName)); + Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4); + } else if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets() && + !CurrentFuncletEntry->isEHFuncletEntry()) { + // If this is the parent function in Win64 SEH, emit the LSDA immediately + // following .seh_handlerdata. + emitCSpecificHandlerTable(Asm->MF); + } + + // Switch back to the previous section now that we are done writing to + // .xdata. + Asm->OutStreamer->PopSection(); + + // Emit a .seh_endproc directive to mark the end of the function. Asm->OutStreamer->EmitWinCFIEndProc(); + } + + // Let's make sure we don't try to end the same funclet twice. + CurrentFuncletEntry = nullptr; } const MCExpr *WinException::create32bitRef(const MCSymbol *Value) { @@ -188,6 +279,202 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) { return create32bitRef(Asm->getSymbol(GV)); } +const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) { + return MCBinaryExpr::createAdd(create32bitRef(Label), + MCConstantExpr::create(1, Asm->OutContext), + Asm->OutContext); +} + +const MCExpr *WinException::getOffset(const MCSymbol *OffsetOf, + const MCSymbol *OffsetFrom) { + return MCBinaryExpr::createSub( + MCSymbolRefExpr::create(OffsetOf, Asm->OutContext), + MCSymbolRefExpr::create(OffsetFrom, Asm->OutContext), Asm->OutContext); +} + +const MCExpr *WinException::getOffsetPlusOne(const MCSymbol *OffsetOf, + const MCSymbol *OffsetFrom) { + return MCBinaryExpr::createAdd(getOffset(OffsetOf, OffsetFrom), + MCConstantExpr::create(1, Asm->OutContext), + Asm->OutContext); +} + +int WinException::getFrameIndexOffset(int FrameIndex, + const WinEHFuncInfo &FuncInfo) { + const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering(); + unsigned UnusedReg; + if (Asm->MAI->usesWindowsCFI()) + return TFI.getFrameIndexReferenceFromSP(*Asm->MF, FrameIndex, UnusedReg); + // For 32-bit, offsets should be relative to the end of the EH registration + // node. For 64-bit, it's relative to SP at the end of the prologue. + assert(FuncInfo.EHRegNodeEndOffset != INT_MAX); + int Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg); + Offset += FuncInfo.EHRegNodeEndOffset; + return Offset; +} + +namespace { + +/// Top-level state used to represent unwind to caller +const int NullState = -1; + +struct InvokeStateChange { + /// EH Label immediately after the last invoke in the previous state, or + /// nullptr if the previous state was the null state. + const MCSymbol *PreviousEndLabel; + + /// EH label immediately before the first invoke in the new state, or nullptr + /// if the new state is the null state. + const MCSymbol *NewStartLabel; + + /// State of the invoke following NewStartLabel, or NullState to indicate + /// the presence of calls which may unwind to caller. + int NewState; +}; + +/// Iterator that reports all the invoke state changes in a range of machine +/// basic blocks. Changes to the null state are reported whenever a call that +/// may unwind to caller is encountered. The MBB range is expected to be an +/// entire function or funclet, and the start and end of the range are treated +/// as being in the NullState even if there's not an unwind-to-caller call +/// before the first invoke or after the last one (i.e., the first state change +/// reported is the first change to something other than NullState, and a +/// change back to NullState is always reported at the end of iteration). +class InvokeStateChangeIterator { + InvokeStateChangeIterator(const WinEHFuncInfo &EHInfo, + MachineFunction::const_iterator MFI, + MachineFunction::const_iterator MFE, + MachineBasicBlock::const_iterator MBBI, + int BaseState) + : EHInfo(EHInfo), MFI(MFI), MFE(MFE), MBBI(MBBI), BaseState(BaseState) { + LastStateChange.PreviousEndLabel = nullptr; + LastStateChange.NewStartLabel = nullptr; + LastStateChange.NewState = BaseState; + scan(); + } + +public: + static iterator_range + range(const WinEHFuncInfo &EHInfo, MachineFunction::const_iterator Begin, + MachineFunction::const_iterator End, int BaseState = NullState) { + // Reject empty ranges to simplify bookkeeping by ensuring that we can get + // the end of the last block. + assert(Begin != End); + auto BlockBegin = Begin->begin(); + auto BlockEnd = std::prev(End)->end(); + return make_range( + InvokeStateChangeIterator(EHInfo, Begin, End, BlockBegin, BaseState), + InvokeStateChangeIterator(EHInfo, End, End, BlockEnd, BaseState)); + } + + // Iterator methods. + bool operator==(const InvokeStateChangeIterator &O) const { + assert(BaseState == O.BaseState); + // Must be visiting same block. + if (MFI != O.MFI) + return false; + // Must be visiting same isntr. + if (MBBI != O.MBBI) + return false; + // At end of block/instr iteration, we can still have two distinct states: + // one to report the final EndLabel, and another indicating the end of the + // state change iteration. Check for CurrentEndLabel equality to + // distinguish these. + return CurrentEndLabel == O.CurrentEndLabel; + } + + bool operator!=(const InvokeStateChangeIterator &O) const { + return !operator==(O); + } + InvokeStateChange &operator*() { return LastStateChange; } + InvokeStateChange *operator->() { return &LastStateChange; } + InvokeStateChangeIterator &operator++() { return scan(); } + +private: + InvokeStateChangeIterator &scan(); + + const WinEHFuncInfo &EHInfo; + const MCSymbol *CurrentEndLabel = nullptr; + MachineFunction::const_iterator MFI; + MachineFunction::const_iterator MFE; + MachineBasicBlock::const_iterator MBBI; + InvokeStateChange LastStateChange; + bool VisitingInvoke = false; + int BaseState; +}; + +} // end anonymous namespace + +InvokeStateChangeIterator &InvokeStateChangeIterator::scan() { + bool IsNewBlock = false; + for (; MFI != MFE; ++MFI, IsNewBlock = true) { + if (IsNewBlock) + MBBI = MFI->begin(); + for (auto MBBE = MFI->end(); MBBI != MBBE; ++MBBI) { + const MachineInstr &MI = *MBBI; + if (!VisitingInvoke && LastStateChange.NewState != BaseState && + MI.isCall() && !EHStreamer::callToNoUnwindFunction(&MI)) { + // Indicate a change of state to the null state. We don't have + // start/end EH labels handy but the caller won't expect them for + // null state regions. + LastStateChange.PreviousEndLabel = CurrentEndLabel; + LastStateChange.NewStartLabel = nullptr; + LastStateChange.NewState = BaseState; + CurrentEndLabel = nullptr; + // Don't re-visit this instr on the next scan + ++MBBI; + return *this; + } + + // All other state changes are at EH labels before/after invokes. + if (!MI.isEHLabel()) + continue; + MCSymbol *Label = MI.getOperand(0).getMCSymbol(); + if (Label == CurrentEndLabel) { + VisitingInvoke = false; + continue; + } + auto InvokeMapIter = EHInfo.LabelToStateMap.find(Label); + // Ignore EH labels that aren't the ones inserted before an invoke + if (InvokeMapIter == EHInfo.LabelToStateMap.end()) + continue; + auto &StateAndEnd = InvokeMapIter->second; + int NewState = StateAndEnd.first; + // Keep track of the fact that we're between EH start/end labels so + // we know not to treat the inoke we'll see as unwinding to caller. + VisitingInvoke = true; + if (NewState == LastStateChange.NewState) { + // The state isn't actually changing here. Record the new end and + // keep going. + CurrentEndLabel = StateAndEnd.second; + continue; + } + // Found a state change to report + LastStateChange.PreviousEndLabel = CurrentEndLabel; + LastStateChange.NewStartLabel = Label; + LastStateChange.NewState = NewState; + // Start keeping track of the new current end + CurrentEndLabel = StateAndEnd.second; + // Don't re-visit this instr on the next scan + ++MBBI; + return *this; + } + } + // Iteration hit the end of the block range. + if (LastStateChange.NewState != BaseState) { + // Report the end of the last new state + LastStateChange.PreviousEndLabel = CurrentEndLabel; + LastStateChange.NewStartLabel = nullptr; + LastStateChange.NewState = BaseState; + // Leave CurrentEndLabel non-null to distinguish this state from end. + assert(CurrentEndLabel != nullptr); + return *this; + } + // We've reported all state changes and hit the end state. + CurrentEndLabel = nullptr; + return *this; +} + /// Emit the language-specific data that __C_specific_handler expects. This /// handler lives in the x64 Microsoft C runtime and allows catching or cleaning /// up after faults with __try, __except, and __finally. The typeinfo values @@ -216,135 +503,156 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) { /// imagerel32 LabelLPad; // Zero means __finally. /// } Entries[NumEntries]; /// }; -void WinException::emitCSpecificHandlerTable() { - const std::vector &PadInfos = MMI->getLandingPads(); +void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) { + auto &OS = *Asm->OutStreamer; + MCContext &Ctx = Asm->OutContext; + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); - // Simplifying assumptions for first implementation: - // - Cleanups are not implemented. - // - Filters are not implemented. + bool VerboseAsm = OS.isVerboseAsm(); + auto AddComment = [&](const Twine &Comment) { + if (VerboseAsm) + OS.AddComment(Comment); + }; - // The Itanium LSDA table sorts similar landing pads together to simplify the - // actions table, but we don't need that. - SmallVector LandingPads; - LandingPads.reserve(PadInfos.size()); - for (const auto &LP : PadInfos) - LandingPads.push_back(&LP); + // Emit a label assignment with the SEH frame offset so we can use it for + // llvm.x86.seh.recoverfp. + StringRef FLinkageName = + GlobalValue::getRealLinkageName(MF->getFunction()->getName()); + MCSymbol *ParentFrameOffset = + Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); + const MCExpr *MCOffset = + MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx); + Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset); - // Compute label ranges for call sites as we would for the Itanium LSDA, but - // use an all zero action table because we aren't using these actions. - SmallVector FirstActions; - FirstActions.resize(LandingPads.size()); - SmallVector CallSites; - computeCallSiteTable(CallSites, LandingPads, FirstActions); + // Use the assembler to compute the number of table entries through label + // difference and division. + MCSymbol *TableBegin = + Ctx.createTempSymbol("lsda_begin", /*AlwaysAddSuffix=*/true); + MCSymbol *TableEnd = + Ctx.createTempSymbol("lsda_end", /*AlwaysAddSuffix=*/true); + const MCExpr *LabelDiff = getOffset(TableEnd, TableBegin); + const MCExpr *EntrySize = MCConstantExpr::create(16, Ctx); + const MCExpr *EntryCount = MCBinaryExpr::createDiv(LabelDiff, EntrySize, Ctx); + AddComment("Number of call sites"); + OS.EmitValue(EntryCount, 4); - MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin(); - MCSymbol *EHFuncEndSym = Asm->getFunctionEnd(); + OS.EmitLabel(TableBegin); - // Emit the number of table entries. - unsigned NumEntries = 0; - for (const CallSiteEntry &CSE : CallSites) { - if (!CSE.LPad) - continue; // Ignore gaps. - NumEntries += CSE.LPad->SEHHandlers.size(); + // Iterate over all the invoke try ranges. Unlike MSVC, LLVM currently only + // models exceptions from invokes. LLVM also allows arbitrary reordering of + // the code, so our tables end up looking a bit different. Rather than + // trying to match MSVC's tables exactly, we emit a denormalized table. For + // each range of invokes in the same state, we emit table entries for all + // the actions that would be taken in that state. This means our tables are + // slightly bigger, which is OK. + const MCSymbol *LastStartLabel = nullptr; + int LastEHState = -1; + // Break out before we enter into a finally funclet. + // FIXME: We need to emit separate EH tables for cleanups. + MachineFunction::const_iterator End = MF->end(); + MachineFunction::const_iterator Stop = std::next(MF->begin()); + while (Stop != End && !Stop->isEHFuncletEntry()) + ++Stop; + for (const auto &StateChange : + InvokeStateChangeIterator::range(FuncInfo, MF->begin(), Stop)) { + // Emit all the actions for the state we just transitioned out of + // if it was not the null state + if (LastEHState != -1) + emitSEHActionsForRange(FuncInfo, LastStartLabel, + StateChange.PreviousEndLabel, LastEHState); + LastStartLabel = StateChange.NewStartLabel; + LastEHState = StateChange.NewState; } - Asm->OutStreamer->EmitIntValue(NumEntries, 4); - // If there are no actions, we don't need to iterate again. - if (NumEntries == 0) - return; + OS.EmitLabel(TableEnd); +} - // Emit the four-label records for each call site entry. The table has to be - // sorted in layout order, and the call sites should already be sorted. - for (const CallSiteEntry &CSE : CallSites) { - // Ignore gaps. Unlike the Itanium model, unwinding through a frame without - // an EH table entry will propagate the exception rather than terminating - // the program. - if (!CSE.LPad) - continue; - const LandingPadInfo *LPad = CSE.LPad; +void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, + const MCSymbol *BeginLabel, + const MCSymbol *EndLabel, int State) { + auto &OS = *Asm->OutStreamer; + MCContext &Ctx = Asm->OutContext; - // Compute the label range. We may reuse the function begin and end labels - // rather than forming new ones. - const MCExpr *Begin = - create32bitRef(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym); - const MCExpr *End; - if (CSE.EndLabel) { - // The interval is half-open, so we have to add one to include the return - // address of the last invoke in the range. - End = MCBinaryExpr::createAdd(create32bitRef(CSE.EndLabel), - MCConstantExpr::create(1, Asm->OutContext), - Asm->OutContext); + bool VerboseAsm = OS.isVerboseAsm(); + auto AddComment = [&](const Twine &Comment) { + if (VerboseAsm) + OS.AddComment(Comment); + }; + + assert(BeginLabel && EndLabel); + while (State != -1) { + const SEHUnwindMapEntry &UME = FuncInfo.SEHUnwindMap[State]; + const MCExpr *FilterOrFinally; + const MCExpr *ExceptOrNull; + auto *Handler = UME.Handler.get(); + if (UME.IsFinally) { + FilterOrFinally = create32bitRef(getMCSymbolForMBB(Asm, Handler)); + ExceptOrNull = MCConstantExpr::create(0, Ctx); } else { - End = create32bitRef(EHFuncEndSym); + // For an except, the filter can be 1 (catch-all) or a function + // label. + FilterOrFinally = UME.Filter ? create32bitRef(UME.Filter) + : MCConstantExpr::create(1, Ctx); + ExceptOrNull = create32bitRef(Handler->getSymbol()); } - // Emit an entry for each action. - for (SEHHandler Handler : LPad->SEHHandlers) { - Asm->OutStreamer->EmitValue(Begin, 4); - Asm->OutStreamer->EmitValue(End, 4); + AddComment("LabelStart"); + OS.EmitValue(getLabelPlusOne(BeginLabel), 4); + AddComment("LabelEnd"); + OS.EmitValue(getLabelPlusOne(EndLabel), 4); + AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction" + : "CatchAll"); + OS.EmitValue(FilterOrFinally, 4); + AddComment(UME.IsFinally ? "Null" : "ExceptionHandler"); + OS.EmitValue(ExceptOrNull, 4); - // Emit the filter or finally function pointer, if present. Otherwise, - // emit '1' to indicate a catch-all. - const Function *F = Handler.FilterOrFinally; - if (F) - Asm->OutStreamer->EmitValue(create32bitRef(Asm->getSymbol(F)), 4); - else - Asm->OutStreamer->EmitIntValue(1, 4); - - // Emit the recovery address, if present. Otherwise, this must be a - // finally. - const BlockAddress *BA = Handler.RecoverBA; - if (BA) - Asm->OutStreamer->EmitValue( - create32bitRef(Asm->GetBlockAddressSymbol(BA)), 4); - else - Asm->OutStreamer->EmitIntValue(0, 4); - } + assert(UME.ToState < State && "states should decrease"); + State = UME.ToState; } } void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { const Function *F = MF->getFunction(); - const Function *ParentF = MMI->getWinEHParent(F); auto &OS = *Asm->OutStreamer; - WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF); + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); - StringRef ParentLinkageName = - GlobalValue::getRealLinkageName(ParentF->getName()); + StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + SmallVector, 4> IPToStateTable; MCSymbol *FuncInfoXData = nullptr; if (shouldEmitPersonality) { - FuncInfoXData = Asm->OutContext.getOrCreateSymbol( - Twine("$cppxdata$", ParentLinkageName)); - OS.EmitValue(create32bitRef(FuncInfoXData), 4); - - extendIP2StateTable(MF, ParentF, FuncInfo); - - // Defer emission until we've visited the parent function and all the catch - // handlers. Cleanups don't contribute to the ip2state table, so don't count - // them. - if (ParentF != F && !FuncInfo.CatchHandlerMaxState.count(F)) - return; - ++FuncInfo.NumIPToStateFuncsVisited; - if (FuncInfo.NumIPToStateFuncsVisited != FuncInfo.CatchHandlerMaxState.size()) - return; + // If we're 64-bit, emit a pointer to the C++ EH data, and build a map from + // IPs to state numbers. + FuncInfoXData = + Asm->OutContext.getOrCreateSymbol(Twine("$cppxdata$", FuncLinkageName)); + computeIP2StateTable(MF, FuncInfo, IPToStateTable); } else { - FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(ParentLinkageName); - emitEHRegistrationOffsetLabel(FuncInfo, ParentLinkageName); + FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(FuncLinkageName); } + int UnwindHelpOffset = 0; + if (Asm->MAI->usesWindowsCFI()) + UnwindHelpOffset = + getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx, FuncInfo); + MCSymbol *UnwindMapXData = nullptr; MCSymbol *TryBlockMapXData = nullptr; MCSymbol *IPToStateXData = nullptr; - if (!FuncInfo.UnwindMap.empty()) + if (!FuncInfo.CxxUnwindMap.empty()) UnwindMapXData = Asm->OutContext.getOrCreateSymbol( - Twine("$stateUnwindMap$", ParentLinkageName)); + Twine("$stateUnwindMap$", FuncLinkageName)); if (!FuncInfo.TryBlockMap.empty()) - TryBlockMapXData = Asm->OutContext.getOrCreateSymbol( - Twine("$tryMap$", ParentLinkageName)); - if (!FuncInfo.IPToStateList.empty()) - IPToStateXData = Asm->OutContext.getOrCreateSymbol( - Twine("$ip2state$", ParentLinkageName)); + TryBlockMapXData = + Asm->OutContext.getOrCreateSymbol(Twine("$tryMap$", FuncLinkageName)); + if (!IPToStateTable.empty()) + IPToStateXData = + Asm->OutContext.getOrCreateSymbol(Twine("$ip2state$", FuncLinkageName)); + + bool VerboseAsm = OS.isVerboseAsm(); + auto AddComment = [&](const Twine &Comment) { + if (VerboseAsm) + OS.AddComment(Comment); + }; // FuncInfo { // uint32_t MagicNumber @@ -363,17 +671,38 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue. OS.EmitValueToAlignment(4); OS.EmitLabel(FuncInfoXData); - OS.EmitIntValue(0x19930522, 4); // MagicNumber - OS.EmitIntValue(FuncInfo.UnwindMap.size(), 4); // MaxState - OS.EmitValue(create32bitRef(UnwindMapXData), 4); // UnwindMap - OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4); // NumTryBlocks - OS.EmitValue(create32bitRef(TryBlockMapXData), 4); // TryBlockMap - OS.EmitIntValue(FuncInfo.IPToStateList.size(), 4); // IPMapEntries - OS.EmitValue(create32bitRef(IPToStateXData), 4); // IPToStateMap - if (Asm->MAI->usesWindowsCFI()) - OS.EmitIntValue(FuncInfo.UnwindHelpFrameOffset, 4); // UnwindHelp - OS.EmitIntValue(0, 4); // ESTypeList - OS.EmitIntValue(1, 4); // EHFlags + + AddComment("MagicNumber"); + OS.EmitIntValue(0x19930522, 4); + + AddComment("MaxState"); + OS.EmitIntValue(FuncInfo.CxxUnwindMap.size(), 4); + + AddComment("UnwindMap"); + OS.EmitValue(create32bitRef(UnwindMapXData), 4); + + AddComment("NumTryBlocks"); + OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4); + + AddComment("TryBlockMap"); + OS.EmitValue(create32bitRef(TryBlockMapXData), 4); + + AddComment("IPMapEntries"); + OS.EmitIntValue(IPToStateTable.size(), 4); + + AddComment("IPToStateXData"); + OS.EmitValue(create32bitRef(IPToStateXData), 4); + + if (Asm->MAI->usesWindowsCFI()) { + AddComment("UnwindHelp"); + OS.EmitIntValue(UnwindHelpOffset, 4); + } + + AddComment("ESTypeList"); + OS.EmitIntValue(0, 4); + + AddComment("EHFlags"); + OS.EmitIntValue(1, 4); // UnwindMapEntry { // int32_t ToState; @@ -381,9 +710,14 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { // }; if (UnwindMapXData) { OS.EmitLabel(UnwindMapXData); - for (const WinEHUnwindMapEntry &UME : FuncInfo.UnwindMap) { - OS.EmitIntValue(UME.ToState, 4); // ToState - OS.EmitValue(create32bitRef(UME.Cleanup), 4); // Action + for (const CxxUnwindMapEntry &UME : FuncInfo.CxxUnwindMap) { + MCSymbol *CleanupSym = + getMCSymbolForMBB(Asm, UME.Cleanup.dyn_cast()); + AddComment("ToState"); + OS.EmitIntValue(UME.ToState, 4); + + AddComment("Action"); + OS.EmitValue(create32bitRef(CleanupSym), 4); } } @@ -398,33 +732,49 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { OS.EmitLabel(TryBlockMapXData); SmallVector HandlerMaps; for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) { - WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; - MCSymbol *HandlerMapXData = nullptr; + const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; + MCSymbol *HandlerMapXData = nullptr; if (!TBME.HandlerArray.empty()) HandlerMapXData = Asm->OutContext.getOrCreateSymbol(Twine("$handlerMap$") .concat(Twine(I)) .concat("$") - .concat(ParentLinkageName)); - + .concat(FuncLinkageName)); HandlerMaps.push_back(HandlerMapXData); - int CatchHigh = -1; - for (WinEHHandlerType &HT : TBME.HandlerArray) - CatchHigh = - std::max(CatchHigh, FuncInfo.CatchHandlerMaxState[HT.Handler]); + // TBMEs should form intervals. + assert(0 <= TBME.TryLow && "bad trymap interval"); + assert(TBME.TryLow <= TBME.TryHigh && "bad trymap interval"); + assert(TBME.TryHigh < TBME.CatchHigh && "bad trymap interval"); + assert(TBME.CatchHigh < int(FuncInfo.CxxUnwindMap.size()) && + "bad trymap interval"); - assert(TBME.TryLow <= TBME.TryHigh); - OS.EmitIntValue(TBME.TryLow, 4); // TryLow - OS.EmitIntValue(TBME.TryHigh, 4); // TryHigh - OS.EmitIntValue(CatchHigh, 4); // CatchHigh - OS.EmitIntValue(TBME.HandlerArray.size(), 4); // NumCatches - OS.EmitValue(create32bitRef(HandlerMapXData), 4); // HandlerArray + AddComment("TryLow"); + OS.EmitIntValue(TBME.TryLow, 4); + + AddComment("TryHigh"); + OS.EmitIntValue(TBME.TryHigh, 4); + + AddComment("CatchHigh"); + OS.EmitIntValue(TBME.CatchHigh, 4); + + AddComment("NumCatches"); + OS.EmitIntValue(TBME.HandlerArray.size(), 4); + + AddComment("HandlerArray"); + OS.EmitValue(create32bitRef(HandlerMapXData), 4); + } + + // All funclets use the same parent frame offset currently. + unsigned ParentFrameOffset = 0; + if (shouldEmitPersonality) { + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + ParentFrameOffset = TFI->getWinEHParentFrameOffset(*MF); } for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) { - WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; + const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; MCSymbol *HandlerMapXData = HandlerMaps[I]; if (!HandlerMapXData) continue; @@ -438,32 +788,34 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { OS.EmitLabel(HandlerMapXData); for (const WinEHHandlerType &HT : TBME.HandlerArray) { // Get the frame escape label with the offset of the catch object. If - // the index is -1, then there is no catch object, and we should emit an - // offset of zero, indicating that no copy will occur. + // the index is INT_MAX, then there is no catch object, and we should + // emit an offset of zero, indicating that no copy will occur. const MCExpr *FrameAllocOffsetRef = nullptr; - if (HT.CatchObjRecoverIdx >= 0) { - MCSymbol *FrameAllocOffset = - Asm->OutContext.getOrCreateFrameAllocSymbol( - GlobalValue::getRealLinkageName(ParentF->getName()), - HT.CatchObjRecoverIdx); - FrameAllocOffsetRef = MCSymbolRefExpr::create( - FrameAllocOffset, MCSymbolRefExpr::VK_None, Asm->OutContext); + if (HT.CatchObj.FrameIndex != INT_MAX) { + int Offset = getFrameIndexOffset(HT.CatchObj.FrameIndex, FuncInfo); + FrameAllocOffsetRef = MCConstantExpr::create(Offset, Asm->OutContext); } else { FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext); } - OS.EmitIntValue(HT.Adjectives, 4); // Adjectives - OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4); // Type - OS.EmitValue(FrameAllocOffsetRef, 4); // CatchObjOffset - OS.EmitValue(create32bitRef(HT.Handler), 4); // Handler + MCSymbol *HandlerSym = + getMCSymbolForMBB(Asm, HT.Handler.dyn_cast()); + + AddComment("Adjectives"); + OS.EmitIntValue(HT.Adjectives, 4); + + AddComment("Type"); + OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4); + + AddComment("CatchObjOffset"); + OS.EmitValue(FrameAllocOffsetRef, 4); + + AddComment("Handler"); + OS.EmitValue(create32bitRef(HandlerSym), 4); if (shouldEmitPersonality) { - MCSymbol *ParentFrameOffset = - Asm->OutContext.getOrCreateParentFrameOffsetSymbol( - GlobalValue::getRealLinkageName(HT.Handler->getName())); - const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::create( - ParentFrameOffset, Asm->OutContext); - OS.EmitValue(ParentFrameOffsetRef, 4); // ParentFrameOffset + AddComment("ParentFrameOffset"); + OS.EmitIntValue(ParentFrameOffset, 4); } } } @@ -475,87 +827,65 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { // }; if (IPToStateXData) { OS.EmitLabel(IPToStateXData); - for (auto &IPStatePair : FuncInfo.IPToStateList) { - OS.EmitValue(create32bitRef(IPStatePair.first), 4); // IP - OS.EmitIntValue(IPStatePair.second, 4); // State + for (auto &IPStatePair : IPToStateTable) { + AddComment("IP"); + OS.EmitValue(IPStatePair.first, 4); + AddComment("ToState"); + OS.EmitIntValue(IPStatePair.second, 4); } } } -void WinException::extendIP2StateTable(const MachineFunction *MF, - const Function *ParentF, - WinEHFuncInfo &FuncInfo) { - const Function *F = MF->getFunction(); +void WinException::computeIP2StateTable( + const MachineFunction *MF, const WinEHFuncInfo &FuncInfo, + SmallVectorImpl> &IPToStateTable) { - // The Itanium LSDA table sorts similar landing pads together to simplify the - // actions table, but we don't need that. - SmallVector LandingPads; - const std::vector &PadInfos = MMI->getLandingPads(); - LandingPads.reserve(PadInfos.size()); - for (const auto &LP : PadInfos) - LandingPads.push_back(&LP); - - RangeMapType PadMap; - computePadMap(LandingPads, PadMap); - - // The end label of the previous invoke or nounwind try-range. - MCSymbol *LastLabel = Asm->getFunctionBegin(); - - // Whether there is a potentially throwing instruction (currently this means - // an ordinary call) between the end of the previous try-range and now. - bool SawPotentiallyThrowing = false; - - int LastEHState = -2; - - // The parent function and the catch handlers contribute to the 'ip2state' - // table. - - // Include ip2state entries for the beginning of the main function and - // for catch handler functions. - if (F == ParentF) { - FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1)); - LastEHState = -1; - } else if (FuncInfo.HandlerBaseState.count(F)) { - FuncInfo.IPToStateList.push_back( - std::make_pair(LastLabel, FuncInfo.HandlerBaseState[F])); - LastEHState = FuncInfo.HandlerBaseState[F]; - } - for (const auto &MBB : *MF) { - for (const auto &MI : MBB) { - if (!MI.isEHLabel()) { - if (MI.isCall()) - SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI); - continue; + for (MachineFunction::const_iterator FuncletStart = MF->begin(), + FuncletEnd = MF->begin(), + End = MF->end(); + FuncletStart != End; FuncletStart = FuncletEnd) { + // Find the end of the funclet + while (++FuncletEnd != End) { + if (FuncletEnd->isEHFuncletEntry()) { + break; } + } - // End of the previous try-range? - MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol(); - if (BeginLabel == LastLabel) - SawPotentiallyThrowing = false; + // Don't emit ip2state entries for cleanup funclets. Any interesting + // exceptional actions in cleanups must be handled in a separate IR + // function. + if (FuncletStart->isCleanupFuncletEntry()) + continue; - // Beginning of a new try-range? - RangeMapType::const_iterator L = PadMap.find(BeginLabel); - if (L == PadMap.end()) - // Nope, it was just some random label. - continue; + MCSymbol *StartLabel; + int BaseState; + if (FuncletStart == MF->begin()) { + BaseState = NullState; + StartLabel = Asm->getFunctionBegin(); + } else { + auto *FuncletPad = + cast(FuncletStart->getBasicBlock()->getFirstNonPHI()); + assert(FuncInfo.FuncletBaseStateMap.count(FuncletPad) != 0); + BaseState = FuncInfo.FuncletBaseStateMap.find(FuncletPad)->second; + StartLabel = getMCSymbolForMBB(Asm, &*FuncletStart); + } + assert(StartLabel && "need local function start label"); + IPToStateTable.push_back( + std::make_pair(create32bitRef(StartLabel), BaseState)); - const PadRange &P = L->second; - const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; - assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && - "Inconsistent landing pad map!"); - - // FIXME: Should this be using FuncInfo.HandlerBaseState? - if (SawPotentiallyThrowing && LastEHState != -1) { - FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1)); - SawPotentiallyThrowing = false; - LastEHState = -1; - } - - if (LandingPad->WinEHState != LastEHState) - FuncInfo.IPToStateList.push_back( - std::make_pair(BeginLabel, LandingPad->WinEHState)); - LastEHState = LandingPad->WinEHState; - LastLabel = LandingPad->EndLabels[P.RangeIndex]; + for (const auto &StateChange : InvokeStateChangeIterator::range( + FuncInfo, FuncletStart, FuncletEnd, BaseState)) { + // Compute the label to report as the start of this entry; use the EH + // start label for the invoke if we have one, otherwise (this is a call + // which may unwind to our caller and does not have an EH start label, so) + // use the previous end label. + const MCSymbol *ChangeLabel = StateChange.NewStartLabel; + if (!ChangeLabel) + ChangeLabel = StateChange.PreviousEndLabel; + // Emit an entry indicating that PCs after 'Label' have this EH state. + IPToStateTable.push_back( + std::make_pair(getLabelPlusOne(ChangeLabel), StateChange.NewState)); + // FIXME: assert that NewState is between CatchLow and CatchHigh. } } } @@ -566,15 +896,15 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, // registration in order to recover the parent frame pointer. Now that we know // we've code generated the parent, we can emit the label assignment that // those helpers use to get the offset of the registration node. - assert(FuncInfo.EHRegNodeEscapeIndex != INT_MAX && - "no EH reg node localescape index"); + MCContext &Ctx = Asm->OutContext; MCSymbol *ParentFrameOffset = - Asm->OutContext.getOrCreateParentFrameOffsetSymbol(FLinkageName); - MCSymbol *RegistrationOffsetSym = Asm->OutContext.getOrCreateFrameAllocSymbol( - FLinkageName, FuncInfo.EHRegNodeEscapeIndex); - const MCExpr *RegistrationOffsetSymRef = - MCSymbolRefExpr::create(RegistrationOffsetSym, Asm->OutContext); - Asm->OutStreamer->EmitAssignment(ParentFrameOffset, RegistrationOffsetSymRef); + Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); + unsigned UnusedReg; + const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); + int64_t Offset = TFI->getFrameIndexReference( + *Asm->MF, FuncInfo.EHRegNodeFrameIndex, UnusedReg); + const MCExpr *MCOffset = MCConstantExpr::create(Offset, Ctx); + Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset); } /// Emit the language-specific data that _except_handler3 and 4 expect. This is @@ -585,7 +915,13 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { const Function *F = MF->getFunction(); StringRef FLinkageName = GlobalValue::getRealLinkageName(F->getName()); - WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(F); + bool VerboseAsm = OS.isVerboseAsm(); + auto AddComment = [&](const Twine &Comment) { + if (VerboseAsm) + OS.AddComment(Comment); + }; + + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName); // Emit the __ehtable label that we use for llvm.x86.seh.lsda. @@ -611,58 +947,290 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { // // Only the EHCookieOffset field appears to vary, and it appears to be the // offset from the final saved SP value to the retaddr. + AddComment("GSCookieOffset"); OS.EmitIntValue(-2, 4); + AddComment("GSCookieXOROffset"); OS.EmitIntValue(0, 4); // FIXME: Calculate. + AddComment("EHCookieOffset"); OS.EmitIntValue(9999, 4); + AddComment("EHCookieXOROffset"); OS.EmitIntValue(0, 4); BaseState = -2; } - // Build a list of pointers to LandingPadInfos and then sort by WinEHState. - const std::vector &PadInfos = MMI->getLandingPads(); - SmallVector LPads; - LPads.reserve((PadInfos.size())); - for (const LandingPadInfo &LPInfo : PadInfos) - LPads.push_back(&LPInfo); - std::sort(LPads.begin(), LPads.end(), - [](const LandingPadInfo *L, const LandingPadInfo *R) { - return L->WinEHState < R->WinEHState; - }); - - // For each action in each lpad, emit one of these: - // struct ScopeTableEntry { - // int32_t EnclosingLevel; - // int32_t (__cdecl *Filter)(); - // void *HandlerOrFinally; - // }; - // - // The "outermost" action will use BaseState as its enclosing level. Each - // other action will refer to the previous state as its enclosing level. - int CurState = 0; - for (const LandingPadInfo *LPInfo : LPads) { - int EnclosingLevel = BaseState; - assert(CurState + int(LPInfo->SEHHandlers.size()) - 1 == - LPInfo->WinEHState && - "gaps in the SEH scope table"); - for (auto I = LPInfo->SEHHandlers.rbegin(), E = LPInfo->SEHHandlers.rend(); - I != E; ++I) { - const SEHHandler &Handler = *I; - const BlockAddress *BA = Handler.RecoverBA; - const Function *F = Handler.FilterOrFinally; - assert(F && "cannot catch all in 32-bit SEH without filter function"); - const MCExpr *FilterOrNull = - create32bitRef(BA ? Asm->getSymbol(F) : nullptr); - const MCExpr *ExceptOrFinally = create32bitRef( - BA ? Asm->GetBlockAddressSymbol(BA) : Asm->getSymbol(F)); - - OS.EmitIntValue(EnclosingLevel, 4); - OS.EmitValue(FilterOrNull, 4); - OS.EmitValue(ExceptOrFinally, 4); - - // The next state unwinds to this state. - EnclosingLevel = CurState; - CurState++; - } + assert(!FuncInfo.SEHUnwindMap.empty()); + for (const SEHUnwindMapEntry &UME : FuncInfo.SEHUnwindMap) { + auto *Handler = UME.Handler.get(); + const MCSymbol *ExceptOrFinally = + UME.IsFinally ? getMCSymbolForMBB(Asm, Handler) : Handler->getSymbol(); + // -1 is usually the base state for "unwind to caller", but for + // _except_handler4 it's -2. Do that replacement here if necessary. + int ToState = UME.ToState == -1 ? BaseState : UME.ToState; + AddComment("ToState"); + OS.EmitIntValue(ToState, 4); + AddComment(UME.IsFinally ? "Null" : "FilterFunction"); + OS.EmitValue(create32bitRef(UME.Filter), 4); + AddComment(UME.IsFinally ? "FinallyFunclet" : "ExceptionHandler"); + OS.EmitValue(create32bitRef(ExceptOrFinally), 4); + } +} + +static int getRank(const WinEHFuncInfo &FuncInfo, int State) { + int Rank = 0; + while (State != -1) { + ++Rank; + State = FuncInfo.ClrEHUnwindMap[State].Parent; + } + return Rank; +} + +static int getAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) { + int LeftRank = getRank(FuncInfo, Left); + int RightRank = getRank(FuncInfo, Right); + + while (LeftRank < RightRank) { + Right = FuncInfo.ClrEHUnwindMap[Right].Parent; + --RightRank; + } + + while (RightRank < LeftRank) { + Left = FuncInfo.ClrEHUnwindMap[Left].Parent; + --LeftRank; + } + + while (Left != Right) { + Left = FuncInfo.ClrEHUnwindMap[Left].Parent; + Right = FuncInfo.ClrEHUnwindMap[Right].Parent; + } + + return Left; +} + +void WinException::emitCLRExceptionTable(const MachineFunction *MF) { + // CLR EH "states" are really just IDs that identify handlers/funclets; + // states, handlers, and funclets all have 1:1 mappings between them, and a + // handler/funclet's "state" is its index in the ClrEHUnwindMap. + MCStreamer &OS = *Asm->OutStreamer; + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); + MCSymbol *FuncBeginSym = Asm->getFunctionBegin(); + MCSymbol *FuncEndSym = Asm->getFunctionEnd(); + + // A ClrClause describes a protected region. + struct ClrClause { + const MCSymbol *StartLabel; // Start of protected region + const MCSymbol *EndLabel; // End of protected region + int State; // Index of handler protecting the protected region + int EnclosingState; // Index of funclet enclosing the protected region + }; + SmallVector Clauses; + + // Build a map from handler MBBs to their corresponding states (i.e. their + // indices in the ClrEHUnwindMap). + int NumStates = FuncInfo.ClrEHUnwindMap.size(); + assert(NumStates > 0 && "Don't need exception table!"); + DenseMap HandlerStates; + for (int State = 0; State < NumStates; ++State) { + MachineBasicBlock *HandlerBlock = + FuncInfo.ClrEHUnwindMap[State].Handler.get(); + HandlerStates[HandlerBlock] = State; + // Use this loop through all handlers to verify our assumption (used in + // the MinEnclosingState computation) that ancestors have lower state + // numbers than their descendants. + assert(FuncInfo.ClrEHUnwindMap[State].Parent < State && + "ill-formed state numbering"); + } + // Map the main function to the NullState. + HandlerStates[&MF->front()] = NullState; + + // Write out a sentinel indicating the end of the standard (Windows) xdata + // and the start of the additional (CLR) info. + OS.EmitIntValue(0xffffffff, 4); + // Write out the number of funclets + OS.EmitIntValue(NumStates, 4); + + // Walk the machine blocks/instrs, computing and emitting a few things: + // 1. Emit a list of the offsets to each handler entry, in lexical order. + // 2. Compute a map (EndSymbolMap) from each funclet to the symbol at its end. + // 3. Compute the list of ClrClauses, in the required order (inner before + // outer, earlier before later; the order by which a forward scan with + // early termination will find the innermost enclosing clause covering + // a given address). + // 4. A map (MinClauseMap) from each handler index to the index of the + // outermost funclet/function which contains a try clause targeting the + // key handler. This will be used to determine IsDuplicate-ness when + // emitting ClrClauses. The NullState value is used to indicate that the + // top-level function contains a try clause targeting the key handler. + // HandlerStack is a stack of (PendingStartLabel, PendingState) pairs for + // try regions we entered before entering the PendingState try but which + // we haven't yet exited. + SmallVector, 4> HandlerStack; + // EndSymbolMap and MinClauseMap are maps described above. + std::unique_ptr EndSymbolMap(new MCSymbol *[NumStates]); + SmallVector MinClauseMap((size_t)NumStates, NumStates); + + // Visit the root function and each funclet. + + for (MachineFunction::const_iterator FuncletStart = MF->begin(), + FuncletEnd = MF->begin(), + End = MF->end(); + FuncletStart != End; FuncletStart = FuncletEnd) { + int FuncletState = HandlerStates[&*FuncletStart]; + // Find the end of the funclet + MCSymbol *EndSymbol = FuncEndSym; + while (++FuncletEnd != End) { + if (FuncletEnd->isEHFuncletEntry()) { + EndSymbol = getMCSymbolForMBB(Asm, &*FuncletEnd); + break; + } + } + // Emit the function/funclet end and, if this is a funclet (and not the + // root function), record it in the EndSymbolMap. + OS.EmitValue(getOffset(EndSymbol, FuncBeginSym), 4); + if (FuncletState != NullState) { + // Record the end of the handler. + EndSymbolMap[FuncletState] = EndSymbol; + } + + // Walk the state changes in this function/funclet and compute its clauses. + // Funclets always start in the null state. + const MCSymbol *CurrentStartLabel = nullptr; + int CurrentState = NullState; + assert(HandlerStack.empty()); + for (const auto &StateChange : + InvokeStateChangeIterator::range(FuncInfo, FuncletStart, FuncletEnd)) { + // Close any try regions we're not still under + int AncestorState = + getAncestor(FuncInfo, CurrentState, StateChange.NewState); + while (CurrentState != AncestorState) { + assert(CurrentState != NullState && "Failed to find ancestor!"); + // Close the pending clause + Clauses.push_back({CurrentStartLabel, StateChange.PreviousEndLabel, + CurrentState, FuncletState}); + // Now the parent handler is current + CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].Parent; + // Pop the new start label from the handler stack if we've exited all + // descendants of the corresponding handler. + if (HandlerStack.back().second == CurrentState) + CurrentStartLabel = HandlerStack.pop_back_val().first; + } + + if (StateChange.NewState != CurrentState) { + // For each clause we're starting, update the MinClauseMap so we can + // know which is the topmost funclet containing a clause targeting + // it. + for (int EnteredState = StateChange.NewState; + EnteredState != CurrentState; + EnteredState = FuncInfo.ClrEHUnwindMap[EnteredState].Parent) { + int &MinEnclosingState = MinClauseMap[EnteredState]; + if (FuncletState < MinEnclosingState) + MinEnclosingState = FuncletState; + } + // Save the previous current start/label on the stack and update to + // the newly-current start/state. + HandlerStack.emplace_back(CurrentStartLabel, CurrentState); + CurrentStartLabel = StateChange.NewStartLabel; + CurrentState = StateChange.NewState; + } + } + assert(HandlerStack.empty()); + } + + // Now emit the clause info, starting with the number of clauses. + OS.EmitIntValue(Clauses.size(), 4); + for (ClrClause &Clause : Clauses) { + // Emit a CORINFO_EH_CLAUSE : + /* + struct CORINFO_EH_CLAUSE + { + CORINFO_EH_CLAUSE_FLAGS Flags; // actually a CorExceptionFlag + DWORD TryOffset; + DWORD TryLength; // actually TryEndOffset + DWORD HandlerOffset; + DWORD HandlerLength; // actually HandlerEndOffset + union + { + DWORD ClassToken; // use for catch clauses + DWORD FilterOffset; // use for filter clauses + }; + }; + + enum CORINFO_EH_CLAUSE_FLAGS + { + CORINFO_EH_CLAUSE_NONE = 0, + CORINFO_EH_CLAUSE_FILTER = 0x0001, // This clause is for a filter + CORINFO_EH_CLAUSE_FINALLY = 0x0002, // This clause is a finally clause + CORINFO_EH_CLAUSE_FAULT = 0x0004, // This clause is a fault clause + }; + typedef enum CorExceptionFlag + { + COR_ILEXCEPTION_CLAUSE_NONE, + COR_ILEXCEPTION_CLAUSE_FILTER = 0x0001, // This is a filter clause + COR_ILEXCEPTION_CLAUSE_FINALLY = 0x0002, // This is a finally clause + COR_ILEXCEPTION_CLAUSE_FAULT = 0x0004, // This is a fault clause + COR_ILEXCEPTION_CLAUSE_DUPLICATED = 0x0008, // duplicated clause. This + // clause was duplicated + // to a funclet which was + // pulled out of line + } CorExceptionFlag; + */ + // Add 1 to the start/end of the EH clause; the IP associated with a + // call when the runtime does its scan is the IP of the next instruction + // (the one to which control will return after the call), so we need + // to add 1 to the end of the clause to cover that offset. We also add + // 1 to the start of the clause to make sure that the ranges reported + // for all clauses are disjoint. Note that we'll need some additional + // logic when machine traps are supported, since in that case the IP + // that the runtime uses is the offset of the faulting instruction + // itself; if such an instruction immediately follows a call but the + // two belong to different clauses, we'll need to insert a nop between + // them so the runtime can distinguish the point to which the call will + // return from the point at which the fault occurs. + + const MCExpr *ClauseBegin = + getOffsetPlusOne(Clause.StartLabel, FuncBeginSym); + const MCExpr *ClauseEnd = getOffsetPlusOne(Clause.EndLabel, FuncBeginSym); + + const ClrEHUnwindMapEntry &Entry = FuncInfo.ClrEHUnwindMap[Clause.State]; + MachineBasicBlock *HandlerBlock = Entry.Handler.get(); + MCSymbol *BeginSym = getMCSymbolForMBB(Asm, HandlerBlock); + const MCExpr *HandlerBegin = getOffset(BeginSym, FuncBeginSym); + MCSymbol *EndSym = EndSymbolMap[Clause.State]; + const MCExpr *HandlerEnd = getOffset(EndSym, FuncBeginSym); + + uint32_t Flags = 0; + switch (Entry.HandlerType) { + case ClrHandlerType::Catch: + // Leaving bits 0-2 clear indicates catch. + break; + case ClrHandlerType::Filter: + Flags |= 1; + break; + case ClrHandlerType::Finally: + Flags |= 2; + break; + case ClrHandlerType::Fault: + Flags |= 4; + break; + } + if (Clause.EnclosingState != MinClauseMap[Clause.State]) { + // This is a "duplicate" clause; the handler needs to be entered from a + // frame above the one holding the invoke. + assert(Clause.EnclosingState > MinClauseMap[Clause.State]); + Flags |= 8; + } + OS.EmitIntValue(Flags, 4); + + // Write the clause start/end + OS.EmitValue(ClauseBegin, 4); + OS.EmitValue(ClauseEnd, 4); + + // Write out the handler start/end + OS.EmitValue(HandlerBegin, 4); + OS.EmitValue(HandlerEnd, 4); + + // Write out the type token or filter offset + assert(Entry.HandlerType != ClrHandlerType::Filter && "NYI: filters"); + OS.EmitIntValue(Entry.TypeToken, 4); } } diff --git a/lib/CodeGen/AsmPrinter/WinException.h b/lib/CodeGen/AsmPrinter/WinException.h index 669c9cc366ba..acb301016910 100644 --- a/lib/CodeGen/AsmPrinter/WinException.h +++ b/lib/CodeGen/AsmPrinter/WinException.h @@ -21,6 +21,7 @@ class Function; class GlobalValue; class MachineFunction; class MCExpr; +class Value; struct WinEHFuncInfo; class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { @@ -36,7 +37,14 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { /// True if this is a 64-bit target and we should use image relative offsets. bool useImageRel32 = false; - void emitCSpecificHandlerTable(); + /// Pointer to the current funclet entry BB. + const MachineBasicBlock *CurrentFuncletEntry = nullptr; + + void emitCSpecificHandlerTable(const MachineFunction *MF); + + void emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, + const MCSymbol *BeginLabel, + const MCSymbol *EndLabel, int State); /// Emit the EH table data for 32-bit and 64-bit functions using /// the __CxxFrameHandler3 personality. @@ -47,8 +55,11 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { /// tables. void emitExceptHandlerTable(const MachineFunction *MF); - void extendIP2StateTable(const MachineFunction *MF, const Function *ParentF, - WinEHFuncInfo &FuncInfo); + void emitCLRExceptionTable(const MachineFunction *MF); + + void computeIP2StateTable( + const MachineFunction *MF, const WinEHFuncInfo &FuncInfo, + SmallVectorImpl> &IPToStateTable); /// Emits the label used with llvm.x86.seh.recoverfp, which is used by /// outlined funclets. @@ -57,6 +68,16 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { const MCExpr *create32bitRef(const MCSymbol *Value); const MCExpr *create32bitRef(const GlobalValue *GV); + const MCExpr *getLabelPlusOne(const MCSymbol *Label); + const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom); + const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf, + const MCSymbol *OffsetFrom); + + /// Gets the offset that we should use in a table for a stack object with the + /// given index. For targets using CFI (Win64, etc), this is relative to the + /// established SP at the end of the prologue. For targets without CFI (Win32 + /// only), it is relative to the frame pointer. + int getFrameIndexOffset(int FrameIndex, const WinEHFuncInfo &FuncInfo); public: //===--------------------------------------------------------------------===// @@ -74,6 +95,10 @@ public: /// Gather and emit post-function exception information. void endFunction(const MachineFunction *) override; + + /// \brief Emit target-specific EH funclet machinery. + void beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym) override; + void endFunclet() override; }; } diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index 530ab46db03b..d12fdb246984 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -8,10 +8,14 @@ //===----------------------------------------------------------------------===// // // This file contains a pass (at IR level) to replace atomic instructions with -// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg. +// target specific instruction which implement the same semantics in a way +// which better fits the target backend. This can include the use of either +// (intrinsic-based) load-linked/store-conditional loops, AtomicCmpXchg, or +// type coercions. // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/AtomicExpandUtils.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -20,6 +24,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -44,13 +49,17 @@ namespace { private: bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, bool IsStore, bool IsLoad); - bool expandAtomicLoad(LoadInst *LI); + IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); + LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); + bool tryExpandAtomicLoad(LoadInst *LI); bool expandAtomicLoadToLL(LoadInst *LI); bool expandAtomicLoadToCmpXchg(LoadInst *LI); + StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); bool expandAtomicStore(StoreInst *SI); bool tryExpandAtomicRMW(AtomicRMWInst *AI); - bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); - bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI); + bool expandAtomicOpToLLSC( + Instruction *I, Value *Addr, AtomicOrdering MemOpOrder, + std::function &, Value *)> PerformOp); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); bool isIdempotentRMW(AtomicRMWInst *AI); bool simplifyIdempotentRMW(AtomicRMWInst *AI); @@ -108,7 +117,7 @@ bool AtomicExpand::runOnFunction(Function &F) { FenceOrdering = RMWI->getOrdering(); RMWI->setOrdering(Monotonic); IsStore = IsLoad = true; - } else if (CASI && !TLI->hasLoadLinkedStoreConditional() && + } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) && (isAtLeastRelease(CASI->getSuccessOrdering()) || isAtLeastAcquire(CASI->getSuccessOrdering()))) { // If a compare and swap is lowered to LL/SC, we can do smarter fence @@ -126,10 +135,28 @@ bool AtomicExpand::runOnFunction(Function &F) { } } - if (LI && TLI->shouldExpandAtomicLoadInIR(LI)) { - MadeChange |= expandAtomicLoad(LI); - } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) { - MadeChange |= expandAtomicStore(SI); + if (LI) { + if (LI->getType()->isFloatingPointTy()) { + // TODO: add a TLI hook to control this so that each target can + // convert to lowering the original type one at a time. + LI = convertAtomicLoadToIntegerType(LI); + assert(LI->getType()->isIntegerTy() && "invariant broken"); + MadeChange = true; + } + + MadeChange |= tryExpandAtomicLoad(LI); + } else if (SI) { + if (SI->getValueOperand()->getType()->isFloatingPointTy()) { + // TODO: add a TLI hook to control this so that each target can + // convert to lowering the original type one at a time. + SI = convertAtomicStoreToIntegerType(SI); + assert(SI->getValueOperand()->getType()->isIntegerTy() && + "invariant broken"); + MadeChange = true; + } + + if (TLI->shouldExpandAtomicStoreInIR(SI)) + MadeChange |= expandAtomicStore(SI); } else if (RMWI) { // There are two different ways of expanding RMW instructions: // - into a load if it is idempotent @@ -141,7 +168,7 @@ bool AtomicExpand::runOnFunction(Function &F) { } else { MadeChange |= tryExpandAtomicRMW(RMWI); } - } else if (CASI && TLI->hasLoadLinkedStoreConditional()) { + } else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI)) { MadeChange |= expandAtomicCmpXchg(CASI); } } @@ -169,11 +196,56 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, return (LeadingFence || TrailingFence); } -bool AtomicExpand::expandAtomicLoad(LoadInst *LI) { - if (TLI->hasLoadLinkedStoreConditional()) +/// Get the iX type with the same bitwidth as T. +IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, + const DataLayout &DL) { + EVT VT = TLI->getValueType(DL, T); + unsigned BitWidth = VT.getStoreSizeInBits(); + assert(BitWidth == VT.getSizeInBits() && "must be a power of two"); + return IntegerType::get(T->getContext(), BitWidth); +} + +/// Convert an atomic load of a non-integral type to an integer load of the +/// equivelent bitwidth. See the function comment on +/// convertAtomicStoreToIntegerType for background. +LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { + auto *M = LI->getModule(); + Type *NewTy = getCorrespondingIntegerType(LI->getType(), + M->getDataLayout()); + + IRBuilder<> Builder(LI); + + Value *Addr = LI->getPointerOperand(); + Type *PT = PointerType::get(NewTy, + Addr->getType()->getPointerAddressSpace()); + Value *NewAddr = Builder.CreateBitCast(Addr, PT); + + auto *NewLI = Builder.CreateLoad(NewAddr); + NewLI->setAlignment(LI->getAlignment()); + NewLI->setVolatile(LI->isVolatile()); + NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope()); + DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); + + Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); + LI->replaceAllUsesWith(NewVal); + LI->eraseFromParent(); + return NewLI; +} + +bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { + switch (TLI->shouldExpandAtomicLoadInIR(LI)) { + case TargetLoweringBase::AtomicExpansionKind::None: + return false; + case TargetLoweringBase::AtomicExpansionKind::LLSC: + return expandAtomicOpToLLSC( + LI, LI->getPointerOperand(), LI->getOrdering(), + [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; }); + case TargetLoweringBase::AtomicExpansionKind::LLOnly: return expandAtomicLoadToLL(LI); - else + case TargetLoweringBase::AtomicExpansionKind::CmpXChg: return expandAtomicLoadToCmpXchg(LI); + } + llvm_unreachable("Unhandled case in tryExpandAtomicLoad"); } bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { @@ -184,6 +256,7 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { // to be single-copy atomic by ARM is an ldrexd (A3.5.3). Value *Val = TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); + TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); LI->replaceAllUsesWith(Val); LI->eraseFromParent(); @@ -209,6 +282,35 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { return true; } +/// Convert an atomic store of a non-integral type to an integer store of the +/// equivelent bitwidth. We used to not support floating point or vector +/// atomics in the IR at all. The backends learned to deal with the bitcast +/// idiom because that was the only way of expressing the notion of a atomic +/// float or vector store. The long term plan is to teach each backend to +/// instruction select from the original atomic store, but as a migration +/// mechanism, we convert back to the old format which the backends understand. +/// Each backend will need individual work to recognize the new format. +StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { + IRBuilder<> Builder(SI); + auto *M = SI->getModule(); + Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), + M->getDataLayout()); + Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); + + Value *Addr = SI->getPointerOperand(); + Type *PT = PointerType::get(NewTy, + Addr->getType()->getPointerAddressSpace()); + Value *NewAddr = Builder.CreateBitCast(Addr, PT); + + StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); + NewSI->setAlignment(SI->getAlignment()); + NewSI->setVolatile(SI->isVolatile()); + NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope()); + DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); + SI->eraseFromParent(); + return NewSI; +} + bool AtomicExpand::expandAtomicStore(StoreInst *SI) { // This function is only called on atomic stores that are too large to be // atomic if implemented as a native store. So we replace them by an @@ -226,23 +328,15 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) { return tryExpandAtomicRMW(AI); } -bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { - switch (TLI->shouldExpandAtomicRMWInIR(AI)) { - case TargetLoweringBase::AtomicRMWExpansionKind::None: - return false; - case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: { - assert(TLI->hasLoadLinkedStoreConditional() && - "TargetLowering requested we expand AtomicRMW instruction into " - "load-linked/store-conditional combos, but such instructions aren't " - "supported"); - - return expandAtomicRMWToLLSC(AI); - } - case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: { - return expandAtomicRMWToCmpXchg(AI); - } - } - llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); +static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, + Value *Loaded, Value *NewVal, + AtomicOrdering MemOpOrder, + Value *&Success, Value *&NewLoaded) { + Value* Pair = Builder.CreateAtomicCmpXchg( + Addr, Loaded, NewVal, MemOpOrder, + AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); + Success = Builder.CreateExtractValue(Pair, 1, "success"); + NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); } /// Emit IR to implement the given atomicrmw operation on values in registers, @@ -282,10 +376,28 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, } } -bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { - AtomicOrdering MemOpOrder = AI->getOrdering(); - Value *Addr = AI->getPointerOperand(); - BasicBlock *BB = AI->getParent(); +bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { + switch (TLI->shouldExpandAtomicRMWInIR(AI)) { + case TargetLoweringBase::AtomicExpansionKind::None: + return false; + case TargetLoweringBase::AtomicExpansionKind::LLSC: + return expandAtomicOpToLLSC(AI, AI->getPointerOperand(), AI->getOrdering(), + [&](IRBuilder<> &Builder, Value *Loaded) { + return performAtomicOp(AI->getOperation(), + Builder, Loaded, + AI->getValOperand()); + }); + case TargetLoweringBase::AtomicExpansionKind::CmpXChg: + return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); + default: + llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); + } +} + +bool AtomicExpand::expandAtomicOpToLLSC( + Instruction *I, Value *Addr, AtomicOrdering MemOpOrder, + std::function &, Value *)> PerformOp) { + BasicBlock *BB = I->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); @@ -303,11 +415,11 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { // atomicrmw.end: // fence? // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); + BasicBlock *ExitBB = BB->splitBasicBlock(I->getIterator(), "atomicrmw.end"); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - // This grabs the DebugLoc from AI. - IRBuilder<> Builder(AI); + // This grabs the DebugLoc from I. + IRBuilder<> Builder(I); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we might want a fence too. It's easiest to just remove @@ -320,8 +432,7 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { Builder.SetInsertPoint(LoopBB); Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); - Value *NewVal = - performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); + Value *NewVal = PerformOp(Builder, Loaded); Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); @@ -331,72 +442,8 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - AI->replaceAllUsesWith(Loaded); - AI->eraseFromParent(); - - return true; -} - -bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { - AtomicOrdering MemOpOrder = - AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); - Value *Addr = AI->getPointerOperand(); - BasicBlock *BB = AI->getParent(); - Function *F = BB->getParent(); - LLVMContext &Ctx = F->getContext(); - - // Given: atomicrmw some_op iN* %addr, iN %incr ordering - // - // The standard expansion we produce is: - // [...] - // %init_loaded = load atomic iN* %addr - // br label %loop - // loop: - // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] - // %new = some_op iN %loaded, %incr - // %pair = cmpxchg iN* %addr, iN %loaded, iN %new - // %new_loaded = extractvalue { iN, i1 } %pair, 0 - // %success = extractvalue { iN, i1 } %pair, 1 - // br i1 %success, label %atomicrmw.end, label %loop - // atomicrmw.end: - // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); - BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - - // This grabs the DebugLoc from AI. - IRBuilder<> Builder(AI); - - // The split call above "helpfully" added a branch at the end of BB (to the - // wrong place), but we want a load. It's easiest to just remove - // the branch entirely. - std::prev(BB->end())->eraseFromParent(); - Builder.SetInsertPoint(BB); - LoadInst *InitLoaded = Builder.CreateLoad(Addr); - // Atomics require at least natural alignment. - InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); - Builder.CreateBr(LoopBB); - - // Start the main loop block now that we've taken care of the preliminaries. - Builder.SetInsertPoint(LoopBB); - PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); - Loaded->addIncoming(InitLoaded, BB); - - Value *NewVal = - performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); - - Value *Pair = Builder.CreateAtomicCmpXchg( - Addr, Loaded, NewVal, MemOpOrder, - AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); - Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); - Loaded->addIncoming(NewLoaded, LoopBB); - - Value *Success = Builder.CreateExtractValue(Pair, 1, "success"); - Builder.CreateCondBr(Success, ExitBB, LoopBB); - - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - - AI->replaceAllUsesWith(NewLoaded); - AI->eraseFromParent(); + I->replaceAllUsesWith(Loaded); + I->eraseFromParent(); return true; } @@ -424,7 +471,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // %loaded = @load.linked(%addr) // %should_store = icmp eq %loaded, %desired // br i1 %should_store, label %cmpxchg.trystore, - // label %cmpxchg.failure + // label %cmpxchg.nostore // cmpxchg.trystore: // %stored = @store_conditional(%new, %addr) // %success = icmp eq i32 %stored, 0 @@ -432,6 +479,9 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // cmpxchg.success: // fence? // br label %cmpxchg.end + // cmpxchg.nostore: + // @load_linked_fail_balance()? + // br label %cmpxchg.failure // cmpxchg.failure: // fence? // br label %cmpxchg.end @@ -440,9 +490,10 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); + BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end"); auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); - auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); + auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); + auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); @@ -466,7 +517,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). - Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); + Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); Builder.SetInsertPoint(TryStoreBB); Value *StoreSuccess = TLI->emitStoreConditional( @@ -482,6 +533,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { /*IsLoad=*/true); Builder.CreateBr(ExitBB); + Builder.SetInsertPoint(NoStoreBB); + // In the failing case, where we don't execute the store-conditional, the + // target might want to balance out the load-linked with a dedicated + // instruction (e.g., on ARM, clearing the exclusive monitor). + TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); + Builder.CreateBr(FailureBB); + Builder.SetInsertPoint(FailureBB); TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, /*IsLoad=*/true); @@ -556,9 +614,77 @@ bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) { bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { - if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad)) - expandAtomicLoad(ResultingLoad); + tryExpandAtomicLoad(ResultingLoad); return true; } return false; } + +bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, + CreateCmpXchgInstFun CreateCmpXchg) { + assert(AI); + + AtomicOrdering MemOpOrder = + AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // %init_loaded = load atomic iN* %addr + // br label %loop + // loop: + // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] + // %new = some_op iN %loaded, %incr + // %pair = cmpxchg iN* %addr, iN %loaded, iN %new + // %new_loaded = extractvalue { iN, i1 } %pair, 0 + // %success = extractvalue { iN, i1 } %pair, 1 + // br i1 %success, label %atomicrmw.end, label %loop + // atomicrmw.end: + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we want a load. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + LoadInst *InitLoaded = Builder.CreateLoad(Addr); + // Atomics require at least natural alignment. + InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits() / 8); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); + Loaded->addIncoming(InitLoaded, BB); + + Value *NewVal = + performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); + + Value *NewLoaded = nullptr; + Value *Success = nullptr; + + CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder, + Success, NewLoaded); + assert(Success && NewLoaded); + + Loaded->addIncoming(NewLoaded, LoopBB); + + Builder.CreateCondBr(Success, ExitBB, LoopBB); + + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + + AI->replaceAllUsesWith(NewLoaded); + AI->eraseFromParent(); + + return true; +} diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index db00910cd018..a67e194356d8 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -33,6 +33,6 @@ cl::opt cl::desc("Threshold for partial unrolling"), cl::Hidden); -BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, Function &F) +BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 618266731c06..604feeddd355 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -12,7 +12,8 @@ // it then removes. // // Note that this pass must be run after register allocation, it cannot handle -// SSA form. +// SSA form. It also must handle virtual registers for targets that emit virtual +// ISA (e.g. NVPTX). // //===----------------------------------------------------------------------===// @@ -20,6 +21,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -95,7 +97,7 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { // TailMerge can create jump into if branches that make CFG irreducible for // HW that requires structurized CFG. bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && - PassConfig->getEnableTailMerge(); + PassConfig->getEnableTailMerge(); BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, getAnalysis(), getAnalysis()); @@ -132,6 +134,7 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { // Remove the block. MF->erase(MBB); + FuncletMembership.erase(MBB); } /// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def @@ -150,9 +153,13 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { if (!I->isImplicitDef()) break; unsigned Reg = I->getOperand(0).getReg(); - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - ImpDefRegs.insert(*SubRegs); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + ImpDefRegs.insert(*SubRegs); + } else { + ImpDefRegs.insert(Reg); + } ++I; } if (ImpDefRegs.empty()) @@ -163,8 +170,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { if (!TII->isUnpredicatedTerminator(I)) return false; // See if it uses any of the implicitly defined registers. - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - MachineOperand &MO = I->getOperand(i); + for (const MachineOperand &MO : I->operands()) { if (!MO.isReg() || !MO.isUse()) continue; unsigned Reg = MO.getReg(); @@ -208,14 +214,17 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, // Fix CFG. The later algorithms expect it to be right. bool MadeChange = false; - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) { - MachineBasicBlock *MBB = I, *TBB = nullptr, *FBB = nullptr; + for (MachineBasicBlock &MBB : MF) { + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; - if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true)) - MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); - MadeChange |= OptimizeImpDefsBlock(MBB); + if (!TII->AnalyzeBranch(MBB, TBB, FBB, Cond, true)) + MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); + MadeChange |= OptimizeImpDefsBlock(&MBB); } + // Recalculate funclet membership. + FuncletMembership = getFuncletMembership(MF); + bool MadeChangeThisIteration = true; while (MadeChangeThisIteration) { MadeChangeThisIteration = TailMergeBlocks(MF); @@ -235,12 +244,9 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, // Walk the function to find jump tables that are live. BitVector JTIsLive(JTI->getJumpTables().size()); - for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); - BB != E; ++BB) { - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); - I != E; ++I) - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { - MachineOperand &Op = I->getOperand(op); + for (const MachineBasicBlock &BB : MF) { + for (const MachineInstr &I : BB) + for (const MachineOperand &Op : I.operands()) { if (!Op.isJTI()) continue; // Remember that this JT is live. @@ -365,7 +371,7 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, } // Back past possible debugging pseudos at beginning of block. This matters // when one block differs from the other only by whether debugging pseudos - // are present at the beginning. (This way, the various checks later for + // are present at the beginning. (This way, the various checks later for // I1==MBB1->begin() work as expected.) if (I1 == MBB1->begin() && I2 != MBB2->begin()) { --I2; @@ -426,7 +432,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, MachineFunction &MF = *CurMBB.getParent(); // Create the fall-through block. - MachineFunction::iterator MBBI = &CurMBB; + MachineFunction::iterator MBBI = CurMBB.getIterator(); MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB); CurMBB.getParent()->insert(++MBBI, NewMBB); @@ -445,6 +451,11 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // For targets that use the register scavenger, we must maintain LiveIns. MaintainLiveIns(&CurMBB, NewMBB); + // Add the new block to the funclet. + const auto &FuncletI = FuncletMembership.find(&CurMBB); + if (FuncletI != FuncletMembership.end()) + FuncletMembership[NewMBB] = FuncletI->second; + return NewMBB; } @@ -479,7 +490,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, DebugLoc dl; // FIXME: this is nowhere if (I != MF->end() && !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { - MachineBasicBlock *NextBB = I; + MachineBasicBlock *NextBB = &*I; if (TBB == NextBB && !Cond.empty() && !FBB) { if (!TII->ReverseBranchCondition(Cond)) { TII->RemoveBranch(*CurMBB); @@ -549,14 +560,23 @@ static unsigned CountTerminators(MachineBasicBlock *MBB, /// and decide if it would be profitable to merge those tails. Return the /// length of the common tail and iterators to the first common instruction /// in each block. -static bool ProfitableToMerge(MachineBasicBlock *MBB1, - MachineBasicBlock *MBB2, - unsigned minCommonTailLength, - unsigned &CommonTailLen, - MachineBasicBlock::iterator &I1, - MachineBasicBlock::iterator &I2, - MachineBasicBlock *SuccBB, - MachineBasicBlock *PredBB) { +static bool +ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, + unsigned minCommonTailLength, unsigned &CommonTailLen, + MachineBasicBlock::iterator &I1, + MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB, + DenseMap &FuncletMembership) { + // It is never profitable to tail-merge blocks from two different funclets. + if (!FuncletMembership.empty()) { + auto Funclet1 = FuncletMembership.find(MBB1); + assert(Funclet1 != FuncletMembership.end()); + auto Funclet2 = FuncletMembership.find(MBB2); + assert(Funclet2 != FuncletMembership.end()); + if (Funclet1->second != Funclet2->second) + return false; + } + CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2); if (CommonTailLen == 0) return false; @@ -600,12 +620,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // branch instruction, which is likely to be smaller than the 2 // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); - if (EffectiveTailLen >= 2 && - MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize) && - (I1 == MBB1->begin() || I2 == MBB2->begin())) - return true; - - return false; + return EffectiveTailLen >= 2 && MF->getFunction()->optForSize() && + (I1 == MBB1->begin() || I2 == MBB2->begin()); } /// ComputeSameTails - Look through all the blocks in MergePotentials that have @@ -634,7 +650,8 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash, if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(), minCommonTailLength, CommonTailLen, TrialBBI1, TrialBBI2, - SuccBB, PredBB)) { + SuccBB, PredBB, + FuncletMembership)) { if (CommonTailLen > maxCommonTailLength) { SameTails.clear(); maxCommonTailLength = CommonTailLen; @@ -776,7 +793,7 @@ removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, if (MBBICommon->mayLoad() || MBBICommon->mayStore()) if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon)) - MBBICommon->clearMemRefs(); + MBBICommon->dropMemRefs(); ++MBBI; ++MBBICommon; @@ -840,8 +857,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // block, which we can't jump to), we can treat all blocks with this same // tail at once. Use PredBB if that is one of the possibilities, as that // will not introduce any extra branches. - MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()-> - getParent()->begin(); + MachineBasicBlock *EntryBB = + &MergePotentials.front().getBlock()->getParent()->front(); unsigned commonTailIndex = SameTails.size(); // If there are two blocks, check to see if one can be made to fall through // into the other. @@ -917,12 +934,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // First find blocks with no successors. MergePotentials.clear(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E && MergePotentials.size() < TailMergeThreshold; ++I) { - if (TriedMerging.count(I)) - continue; - if (I->succ_empty()) - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I)); + for (MachineBasicBlock &MBB : MF) { + if (MergePotentials.size() == TailMergeThreshold) + break; + if (!TriedMerging.count(&MBB) && MBB.succ_empty()) + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(&MBB), &MBB)); } // If this is a large problem, avoid visiting the same basic blocks @@ -958,13 +974,13 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { I != E; ++I) { if (I->pred_size() < 2) continue; SmallPtrSet UniquePreds; - MachineBasicBlock *IBB = I; - MachineBasicBlock *PredBB = std::prev(I); + MachineBasicBlock *IBB = &*I; + MachineBasicBlock *PredBB = &*std::prev(I); MergePotentials.clear(); - for (MachineBasicBlock::pred_iterator P = I->pred_begin(), - E2 = I->pred_end(); - P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) { - MachineBasicBlock *PBB = *P; + for (MachineBasicBlock *PBB : I->predecessors()) { + if (MergePotentials.size() == TailMergeThreshold) + break; + if (TriedMerging.count(PBB)) continue; @@ -977,7 +993,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { continue; // Skip blocks which may jump to a landing pad. Can't tail merge these. - if (PBB->getLandingPadSuccessor()) + if (PBB->hasEHPadSuccessor()) continue; MachineBasicBlock *TBB = nullptr, *FBB = nullptr; @@ -990,18 +1006,21 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (TII->ReverseBranchCondition(NewCond)) continue; // This is the QBB case described above - if (!FBB) - FBB = std::next(MachineFunction::iterator(PBB)); + if (!FBB) { + auto Next = ++PBB->getIterator(); + if (Next != MF.end()) + FBB = &*Next; + } } // Failing case: the only way IBB can be reached from PBB is via // exception handling. Happens for landing pads. Would be nice to have // a bit in the edge so we didn't have to do all this. - if (IBB->isLandingPad()) { - MachineFunction::iterator IP = PBB; IP++; + if (IBB->isEHPad()) { + MachineFunction::iterator IP = ++PBB->getIterator(); MachineBasicBlock *PredNextBB = nullptr; if (IP != MF.end()) - PredNextBB = IP; + PredNextBB = &*IP; if (!TBB) { if (IBB != PredNextBB) // fallthrough continue; @@ -1027,7 +1046,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { NewCond, dl); } - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), PBB)); } } @@ -1042,7 +1061,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // Reinsert an unconditional branch if needed. The 1 below can occur as a // result of removing blocks in TryTailMergeBlocks. - PredBB = std::prev(I); // this may have been changed in TryTailMergeBlocks + PredBB = &*std::prev(I); // this may have been changed in TryTailMergeBlocks if (MergePotentials.size() == 1 && MergePotentials.begin()->getBlock() != PredBB) FixTail(MergePotentials.begin()->getBlock(), IBB, TII); @@ -1080,13 +1099,19 @@ void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) { if (TailMBB.succ_size() <= 1) return; - auto MaxEdgeFreq = *std::max_element(EdgeFreqLs.begin(), EdgeFreqLs.end()); - uint64_t Scale = MaxEdgeFreq.getFrequency() / UINT32_MAX + 1; + auto SumEdgeFreq = + std::accumulate(EdgeFreqLs.begin(), EdgeFreqLs.end(), BlockFrequency(0)) + .getFrequency(); auto EdgeFreq = EdgeFreqLs.begin(); - for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end(); - SuccI != SuccE; ++SuccI, ++EdgeFreq) - TailMBB.setSuccWeight(SuccI, EdgeFreq->getFrequency() / Scale); + if (SumEdgeFreq > 0) { + for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end(); + SuccI != SuccE; ++SuccI, ++EdgeFreq) { + auto Prob = BranchProbability::getBranchProbability( + EdgeFreq->getFrequency(), SumEdgeFreq); + TailMBB.setSuccProbability(SuccI, Prob); + } + } } //===----------------------------------------------------------------------===// @@ -1098,10 +1123,12 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { // Make sure blocks are numbered in order MF.RenumberBlocks(); + // Renumbering blocks alters funclet membership, recalculate it. + FuncletMembership = getFuncletMembership(MF); for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); I != E; ) { - MachineBasicBlock *MBB = I++; + MachineBasicBlock *MBB = &*I++; MadeChange |= OptimizeBlock(MBB); // If it is dead, remove it. @@ -1111,6 +1138,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { ++NumDeadBlocks; } } + return MadeChange; } @@ -1167,20 +1195,31 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { MachineFunction &MF = *MBB->getParent(); ReoptimizeBlock: - MachineFunction::iterator FallThrough = MBB; + MachineFunction::iterator FallThrough = MBB->getIterator(); ++FallThrough; + // Make sure MBB and FallThrough belong to the same funclet. + bool SameFunclet = true; + if (!FuncletMembership.empty() && FallThrough != MF.end()) { + auto MBBFunclet = FuncletMembership.find(MBB); + assert(MBBFunclet != FuncletMembership.end()); + auto FallThroughFunclet = FuncletMembership.find(&*FallThrough); + assert(FallThroughFunclet != FuncletMembership.end()); + SameFunclet = MBBFunclet->second == FallThroughFunclet->second; + } + // If this block is empty, make everyone use its fall-through, not the block // explicitly. Landing pads should not do this since the landing-pad table // points to this block. Blocks with their addresses taken shouldn't be // optimized away. - if (IsEmptyBlock(MBB) && !MBB->isLandingPad() && !MBB->hasAddressTaken()) { + if (IsEmptyBlock(MBB) && !MBB->isEHPad() && !MBB->hasAddressTaken() && + SameFunclet) { // Dead block? Leave for cleanup later. if (MBB->pred_empty()) return MadeChange; if (FallThrough == MF.end()) { // TODO: Simplify preds to not branch here if possible! - } else if (FallThrough->isLandingPad()) { + } else if (FallThrough->isEHPad()) { // Don't rewrite to a landing pad fallthough. That could lead to the case // where a BB jumps to more than one landing pad. // TODO: Is it ever worth rewriting predecessors which don't already @@ -1190,12 +1229,12 @@ ReoptimizeBlock: // instead. while (!MBB->pred_empty()) { MachineBasicBlock *Pred = *(MBB->pred_end()-1); - Pred->ReplaceUsesOfBlockWith(MBB, FallThrough); + Pred->ReplaceUsesOfBlockWith(MBB, &*FallThrough); } // If MBB was the target of a jump table, update jump tables to go to the // fallthrough instead. if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) - MJTI->ReplaceMBBInJumpTables(MBB, FallThrough); + MJTI->ReplaceMBBInJumpTables(MBB, &*FallThrough); MadeChange = true; } return MadeChange; @@ -1237,7 +1276,7 @@ ReoptimizeBlock: // AnalyzeBranch. if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 && PrevBB.succ_size() == 1 && - !MBB->hasAddressTaken() && !MBB->isLandingPad()) { + !MBB->hasAddressTaken() && !MBB->isEHPad()) { DEBUG(dbgs() << "\nMerging into block: " << PrevBB << "From MBB: " << *MBB); // Remove redundant DBG_VALUEs first. @@ -1333,7 +1372,7 @@ ReoptimizeBlock: TII->InsertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl); // Move this block to the end of the function. - MBB->moveAfter(--MF.end()); + MBB->moveAfter(&MF.back()); MadeChange = true; ++NumBranchOpts; return MadeChange; @@ -1371,7 +1410,7 @@ ReoptimizeBlock: // other blocks across it. if (CurTBB && CurCond.empty() && !CurFBB && IsBranchOnlyBlock(MBB) && CurTBB != MBB && - !MBB->hasAddressTaken()) { + !MBB->hasAddressTaken() && !MBB->isEHPad()) { DebugLoc dl = getBranchDebugLoc(*MBB); // This block may contain just an unconditional branch. Because there can // be 'non-branch terminators' in the block, try removing the branch and @@ -1468,14 +1507,11 @@ ReoptimizeBlock: // see if it has a fall-through into its successor. bool CurFallsThru = MBB->canFallThrough(); - if (!MBB->isLandingPad()) { + if (!MBB->isEHPad()) { // Check all the predecessors of this block. If one of them has no fall // throughs, move this block right after it. - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - E = MBB->pred_end(); PI != E; ++PI) { + for (MachineBasicBlock *PredBB : MBB->predecessors()) { // Analyze the branch at the end of the pred. - MachineBasicBlock *PredBB = *PI; - MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough; MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; if (PredBB != MBB && !PredBB->canFallThrough() && @@ -1493,8 +1529,7 @@ ReoptimizeBlock: // B elsewhere // next: if (CurFallsThru) { - MachineBasicBlock *NextBB = - std::next(MachineFunction::iterator(MBB)); + MachineBasicBlock *NextBB = &*std::next(MBB->getIterator()); CurCond.clear(); TII->InsertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc()); } @@ -1507,11 +1542,9 @@ ReoptimizeBlock: if (!CurFallsThru) { // Check all successors to see if we can move this block before it. - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - E = MBB->succ_end(); SI != E; ++SI) { + for (MachineBasicBlock *SuccBB : MBB->successors()) { // Analyze the branch at the end of the block before the succ. - MachineBasicBlock *SuccBB = *SI; - MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev; + MachineFunction::iterator SuccPrev = --SuccBB->getIterator(); // If this block doesn't already fall-through to that successor, and if // the succ doesn't already have a block that can fall through into it, @@ -1519,7 +1552,7 @@ ReoptimizeBlock: // fallthrough to happen. if (SuccBB != MBB && &*SuccPrev != MBB && !SuccPrev->canFallThrough() && !CurUnAnalyzable && - !SuccBB->isLandingPad()) { + !SuccBB->isEHPad()) { MBB->moveBefore(SuccBB); MadeChange = true; goto ReoptimizeBlock; @@ -1531,10 +1564,18 @@ ReoptimizeBlock: // removed, move this block to the end of the function. MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr; SmallVector PrevCond; + // We're looking for cases where PrevBB could possibly fall through to + // FallThrough, but if FallThrough is an EH pad that wouldn't be useful + // so here we skip over any EH pads so we might have a chance to find + // a branch target from PrevBB. + while (FallThrough != MF.end() && FallThrough->isEHPad()) + ++FallThrough; + // Now check to see if the current block is sitting between PrevBB and + // a block to which it could fall through. if (FallThrough != MF.end() && !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && - PrevBB.isSuccessor(FallThrough)) { - MBB->moveAfter(--MF.end()); + PrevBB.isSuccessor(&*FallThrough)) { + MBB->moveAfter(&MF.back()); MadeChange = true; return MadeChange; } @@ -1553,7 +1594,7 @@ ReoptimizeBlock: bool BranchFolder::HoistCommonCode(MachineFunction &MF) { bool MadeChange = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) { - MachineBasicBlock *MBB = I++; + MachineBasicBlock *MBB = &*I++; MadeChange |= HoistCommonCodeInSuccs(MBB); } @@ -1564,15 +1605,23 @@ bool BranchFolder::HoistCommonCode(MachineFunction &MF) { /// its 'true' successor. static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, MachineBasicBlock *TrueBB) { - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - E = BB->succ_end(); SI != E; ++SI) { - MachineBasicBlock *SuccBB = *SI; + for (MachineBasicBlock *SuccBB : BB->successors()) if (SuccBB != TrueBB) return SuccBB; - } return nullptr; } +template +static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI, + Container &Set) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Set.insert(*AI); + } else { + Set.insert(Reg); + } +} + /// findHoistingInsertPosAndDeps - Find the location to move common instructions /// in successors to. The location is usually just before the terminator, /// however if the terminator is a conditional branch and its previous @@ -1590,16 +1639,14 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (!TII->isUnpredicatedTerminator(Loc)) return MBB->end(); - for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = Loc->getOperand(i); + for (const MachineOperand &MO : Loc->operands()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (MO.isUse()) { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Uses.insert(*AI); + addRegAndItsAliases(Reg, TRI, Uses); } else { if (!MO.isDead()) // Don't try to hoist code in the rare case the terminator defines a @@ -1608,8 +1655,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, // If the terminator defines a register, make sure we don't hoist // the instruction whose def might be clobbered by the terminator. - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Defs.insert(*AI); + addRegAndItsAliases(Reg, TRI, Defs); } } @@ -1626,8 +1672,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, --PI; bool IsDef = false; - for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) { - const MachineOperand &MO = PI->getOperand(i); + for (const MachineOperand &MO : PI->operands()) { // If PI has a regmask operand, it is probably a call. Separate away. if (MO.isRegMask()) return Loc; @@ -1636,8 +1681,10 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, unsigned Reg = MO.getReg(); if (!Reg) continue; - if (Uses.count(Reg)) + if (Uses.count(Reg)) { IsDef = true; + break; + } } if (!IsDef) // The condition setting instruction is not just before the conditional @@ -1657,23 +1704,22 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, // Find out what registers are live. Note this routine is ignoring other live // registers which are only used by instructions in successor blocks. - for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = PI->getOperand(i); + for (const MachineOperand &MO : PI->operands()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (MO.isUse()) { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Uses.insert(*AI); + addRegAndItsAliases(Reg, TRI, Uses); } else { if (Uses.erase(Reg)) { - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - Uses.erase(*SubRegs); // Use sub-registers to be conservative + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Uses.erase(*SubRegs); // Use sub-registers to be conservative + } } - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - Defs.insert(*AI); + addRegAndItsAliases(Reg, TRI, Defs); } } @@ -1737,8 +1783,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { break; bool IsSafe = true; - for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { - MachineOperand &MO = TIB->getOperand(i); + for (MachineOperand &MO : TIB->operands()) { // Don't attempt to hoist instructions with register masks. if (MO.isRegMask()) { IsSafe = false; @@ -1793,28 +1838,29 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { break; // Remove kills from LocalDefsSet, these registers had short live ranges. - for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { - MachineOperand &MO = TIB->getOperand(i); + for (const MachineOperand &MO : TIB->operands()) { if (!MO.isReg() || !MO.isUse() || !MO.isKill()) continue; unsigned Reg = MO.getReg(); if (!Reg || !LocalDefsSet.count(Reg)) continue; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - LocalDefsSet.erase(*AI); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LocalDefsSet.erase(*AI); + } else { + LocalDefsSet.erase(Reg); + } } // Track local defs so we can update liveins. - for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) { - MachineOperand &MO = TIB->getOperand(i); + for (const MachineOperand &MO : TIB->operands()) { if (!MO.isReg() || !MO.isDef() || MO.isDead()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; LocalDefs.push_back(Reg); - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - LocalDefsSet.insert(*AI); + addRegAndItsAliases(Reg, TRI, LocalDefsSet); } HasDups = true; diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index 46c05dc0600a..d759d53e27f2 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -54,6 +54,7 @@ namespace llvm { typedef std::vector::iterator MPIterator; std::vector MergePotentials; SmallPtrSet TriedMerging; + DenseMap FuncletMembership; class SameTailElt { MPIterator MPIter; diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index eb7552970d3f..a078c3c707a0 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -1,3 +1,8 @@ +set(system_libs) +if(CMAKE_HOST_UNIX AND LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD) + set(system_libs ${system_libs} pthread) +endif() + add_llvm_library(LLVMCodeGen AggressiveAntiDepBreaker.cpp AllocationOrder.cpp @@ -20,7 +25,9 @@ add_llvm_library(LLVMCodeGen ExecutionDepsFix.cpp ExpandISelPseudos.cpp ExpandPostRAPseudos.cpp + LiveDebugValues.cpp FaultMaps.cpp + FuncletLayout.cpp GCMetadata.cpp GCMetadataPrinter.cpp GCRootLowering.cpp @@ -80,6 +87,7 @@ add_llvm_library(LLVMCodeGen OptimizePHIs.cpp PHIElimination.cpp PHIEliminationUtils.cpp + ParallelCG.cpp Passes.cpp PeepholeOptimizer.cpp PostRASchedulerList.cpp @@ -128,6 +136,8 @@ add_llvm_library(LLVMCodeGen ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGen ${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGen/PBQP + + LINK_LIBS ${system_libs} ) add_dependencies(LLVMCodeGen intrinsics_gen) diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index d08fae09323c..abc655ac34ca 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -24,6 +25,7 @@ using namespace llvm; void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, MachineFunction &MF, + VirtRegMap *VRM, const MachineLoopInfo &MLI, const MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo::NormalizingFn norm) { @@ -31,7 +33,7 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, << "********** Function: " << MF.getName() << '\n'); MachineRegisterInfo &MRI = MF.getRegInfo(); - VirtRegAuxInfo VRAI(MF, LIS, MLI, MBFI, norm); + VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm); for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI.reg_nodbg_empty(Reg)) @@ -74,7 +76,10 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg, // Check if all values in LI are rematerializable static bool isRematerializable(const LiveInterval &LI, const LiveIntervals &LIS, + VirtRegMap *VRM, const TargetInstrInfo &TII) { + unsigned Reg = LI.reg; + unsigned Original = VRM ? VRM->getOriginal(Reg) : 0; for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); I != E; ++I) { const VNInfo *VNI = *I; @@ -86,6 +91,36 @@ static bool isRematerializable(const LiveInterval &LI, MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); assert(MI && "Dead valno in interval"); + // Trace copies introduced by live range splitting. The inline + // spiller can rematerialize through these copies, so the spill + // weight must reflect this. + if (VRM) { + while (MI->isFullCopy()) { + // The copy destination must match the interval register. + if (MI->getOperand(0).getReg() != Reg) + return false; + + // Get the source register. + Reg = MI->getOperand(1).getReg(); + + // If the original (pre-splitting) registers match this + // copy came from a split. + if (!TargetRegisterInfo::isVirtualRegister(Reg) || + VRM->getOriginal(Reg) != Original) + return false; + + // Follow the copy live-in value. + const LiveInterval &SrcLI = LIS.getInterval(Reg); + LiveQueryResult SrcQ = SrcLI.Query(VNI->def); + VNI = SrcQ.valueIn(); + assert(VNI && "Copy from non-existing value"); + if (VNI->isPHIDef()) + return false; + MI = LIS.getInstructionFromIndex(VNI->def); + assert(MI && "Dead valno in interval"); + } + } + if (!TII.isTriviallyReMaterializable(MI, LIS.getAliasAnalysis())) return false; } @@ -188,7 +223,7 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { // it is a preferred candidate for spilling. // FIXME: this gets much more complicated once we support non-trivial // re-materialization. - if (isRematerializable(li, LIS, *MF.getSubtarget().getInstrInfo())) + if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo())) totalWeight *= 0.5F; li.weight = normalize(totalWeight, li.getSize(), numInstr); diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index fb29b1db7a43..23c0d542560e 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -32,6 +32,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, CallOrPrologue(Unknown) { // No stack is used. StackOffset = 0; + MaxStackArgAlign = 1; clearByValRegsInfo(); UsedRegs.resize((TRI.getNumRegs()+31)/32); @@ -192,6 +193,7 @@ static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) { void CCState::getRemainingRegParmsForType(SmallVectorImpl &Regs, MVT VT, CCAssignFn Fn) { unsigned SavedStackOffset = StackOffset; + unsigned SavedMaxStackArgAlign = MaxStackArgAlign; unsigned NumLocs = Locs.size(); // Set the 'inreg' flag if it is used for this calling convention. @@ -223,6 +225,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl &Regs, // as allocated so that future queries don't return the same registers, i.e. // when i64 and f64 are both passed in GPRs. StackOffset = SavedStackOffset; + MaxStackArgAlign = SavedMaxStackArgAlign; Locs.resize(NumLocs); } diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 155c5ecec772..dc13b5b11d30 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -29,6 +29,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeExpandISelPseudosPass(Registry); initializeExpandPostRAPass(Registry); initializeFinalizeMachineBundlesPass(Registry); + initializeFuncletLayoutPass(Registry); initializeGCMachineCodeAnalysisPass(Registry); initializeGCModuleInfoPass(Registry); initializeIfConverterPass(Registry); @@ -66,6 +67,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeSlotIndexesPass(Registry); initializeStackColoringPass(Registry); initializeStackMapLivenessPass(Registry); + initializeLiveDebugValuesPass(Registry); initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); initializeTailDuplicatePassPass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 6ab6acc03722..5844124d8565 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -63,6 +64,9 @@ STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " "computations were sunk"); STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumAndsAdded, + "Number of and mask instructions added to form ext loads"); +STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized"); STATISTIC(NumRetsDup, "Number of return instructions duplicated"); STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); @@ -109,25 +113,18 @@ static cl::opt StressExtLdPromotion( namespace { typedef SmallPtrSet SetOfInstrs; -struct TypeIsSExt { - Type *Ty; - bool IsSExt; - TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {} -}; +typedef PointerIntPair TypeIsSExt; typedef DenseMap InstrToOrigTy; class TypePromotionTransaction; class CodeGenPrepare : public FunctionPass { - /// TLI - Keep a pointer of a TargetLowering to consult for determining - /// transformation profitability. const TargetMachine *TM; const TargetLowering *TLI; const TargetTransformInfo *TTI; const TargetLibraryInfo *TLInfo; - /// CurInstIterator - As we scan instructions optimizing them, this is the - /// next instruction to optimize. Xforms that can invalidate this should - /// update it. + /// As we scan instructions optimizing them, this is the next instruction + /// to optimize. Transforms that can invalidate this should update it. BasicBlock::iterator CurInstIterator; /// Keeps track of non-local addresses that have been sunk into a block. @@ -141,10 +138,10 @@ class TypePromotionTransaction; /// promotion for the current function. InstrToOrigTy PromotedInsts; - /// ModifiedDT - If CFG is modified in anyway. + /// True if CFG is modified in any way. bool ModifiedDT; - /// OptSize - True if optimizing for size. + /// True if optimizing for size. bool OptSize; /// DataLayout for the Function being processed. @@ -167,30 +164,33 @@ class TypePromotionTransaction; } private: - bool EliminateFallThrough(Function &F); - bool EliminateMostlyEmptyBlocks(Function &F); - bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; - void EliminateMostlyEmptyBlock(BasicBlock *BB); - bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT); - bool OptimizeInst(Instruction *I, bool& ModifiedDT); - bool OptimizeMemoryInst(Instruction *I, Value *Addr, + bool eliminateFallThrough(Function &F); + bool eliminateMostlyEmptyBlocks(Function &F); + bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; + void eliminateMostlyEmptyBlock(BasicBlock *BB); + bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT); + bool optimizeInst(Instruction *I, bool& ModifiedDT); + bool optimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy, unsigned AS); - bool OptimizeInlineAsmInst(CallInst *CS); - bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT); - bool MoveExtToFormExtLoad(Instruction *&I); - bool OptimizeExtUses(Instruction *I); - bool OptimizeSelectInst(SelectInst *SI); - bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); - bool OptimizeExtractElementInst(Instruction *Inst); - bool DupRetToEnableTailCallOpts(BasicBlock *BB); - bool PlaceDbgValues(Function &F); + bool optimizeInlineAsmInst(CallInst *CS); + bool optimizeCallInst(CallInst *CI, bool& ModifiedDT); + bool moveExtToFormExtLoad(Instruction *&I); + bool optimizeExtUses(Instruction *I); + bool optimizeLoadExt(LoadInst *I); + bool optimizeSelectInst(SelectInst *SI); + bool optimizeShuffleVectorInst(ShuffleVectorInst *SI); + bool optimizeSwitchInst(SwitchInst *CI); + bool optimizeExtractElementInst(Instruction *Inst); + bool dupRetToEnableTailCallOpts(BasicBlock *BB); + bool placeDbgValues(Function &F); bool sinkAndCmp(Function &F); - bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, + bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, Instruction *&Inst, const SmallVectorImpl &Exts, unsigned CreatedInstCost); bool splitBranchCondition(Function &F); bool simplifyOffsetableRelocate(Instruction &I); + void stripInvariantGroupMetadata(Instruction &I); }; } @@ -218,7 +218,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLI = TM->getSubtargetImpl(F)->getTargetLowering(); TLInfo = &getAnalysis().getTLI(); TTI = &getAnalysis().getTTI(F); - OptSize = F.hasFnAttribute(Attribute::OptimizeForSize); + OptSize = F.optForSize(); /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. @@ -231,12 +231,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Eliminate blocks that contain only PHI nodes and an // unconditional branch. - EverMadeChange |= EliminateMostlyEmptyBlocks(F); + EverMadeChange |= eliminateMostlyEmptyBlocks(F); // llvm.dbg.value is far away from the value then iSel may not be able // handle it properly. iSel will drop llvm.dbg.value if it can not // find a node corresponding to the value. - EverMadeChange |= PlaceDbgValues(F); + EverMadeChange |= placeDbgValues(F); // If there is a mask, compare against zero, and branch that can be combined // into a single target instruction, push the mask and compare into branch @@ -251,9 +251,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) { while (MadeChange) { MadeChange = false; for (Function::iterator I = F.begin(); I != F.end(); ) { - BasicBlock *BB = I++; + BasicBlock *BB = &*I++; bool ModifiedDTOnIteration = false; - MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration); + MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration); // Restart BB iteration if the dominator tree of the Function was changed if (ModifiedDTOnIteration) @@ -296,7 +296,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Merge pairs of basic blocks with unconditional branches, connected by // a single edge. if (EverMadeChange || MadeChange) - MadeChange |= EliminateFallThrough(F); + MadeChange |= eliminateFallThrough(F); EverMadeChange |= MadeChange; } @@ -314,14 +314,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) { return EverMadeChange; } -/// EliminateFallThrough - Merge basic blocks which are connected -/// by a single edge, where one of the basic blocks has a single successor -/// pointing to the other basic block, which has a single predecessor. -bool CodeGenPrepare::EliminateFallThrough(Function &F) { +/// Merge basic blocks which are connected by a single edge, where one of the +/// basic blocks has a single successor pointing to the other basic block, +/// which has a single predecessor. +bool CodeGenPrepare::eliminateFallThrough(Function &F) { bool Changed = false; // Scan all of the blocks in the function, except for the entry block. for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { - BasicBlock *BB = I++; + BasicBlock *BB = &*I++; // If the destination block has a single pred, then this is a trivial // edge, just collapse it. BasicBlock *SinglePred = BB->getSinglePredecessor(); @@ -342,22 +342,21 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) { BB->moveBefore(&BB->getParent()->getEntryBlock()); // We have erased a block. Update the iterator. - I = BB; + I = BB->getIterator(); } } return Changed; } -/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, -/// debug info directives, and an unconditional branch. Passes before isel -/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for -/// isel. Start by eliminating these blocks so we can split them the way we -/// want them. -bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { +/// Eliminate blocks that contain only PHI nodes, debug info directives, and an +/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split +/// edges in ways that are non-optimal for isel. Start by eliminating these +/// blocks so we can split them the way we want them. +bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { bool MadeChange = false; // Note that this intentionally skips the entry block. for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { - BasicBlock *BB = I++; + BasicBlock *BB = &*I++; // If this block doesn't end with an uncond branch, ignore it. BranchInst *BI = dyn_cast(BB->getTerminator()); @@ -366,7 +365,7 @@ bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { // If the instruction before the branch (skipping debug info) isn't a phi // node, then other stuff is happening here. - BasicBlock::iterator BBI = BI; + BasicBlock::iterator BBI = BI->getIterator(); if (BBI != BB->begin()) { --BBI; while (isa(BBI)) { @@ -383,19 +382,19 @@ bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { if (DestBB == BB) continue; - if (!CanMergeBlocks(BB, DestBB)) + if (!canMergeBlocks(BB, DestBB)) continue; - EliminateMostlyEmptyBlock(BB); + eliminateMostlyEmptyBlock(BB); MadeChange = true; } return MadeChange; } -/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a -/// single uncond branch between them, and BB contains no other non-phi +/// Return true if we can merge BB into DestBB if there is a single +/// unconditional branch between them, and BB contains no other non-phi /// instructions. -bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, +bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const { // We only want to eliminate blocks whose phi nodes are used by phi nodes in // the successor. If there are more complex condition (e.g. preheaders), @@ -461,9 +460,9 @@ bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, } -/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and -/// an unconditional branch in it. -void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { +/// Eliminate a basic block that has only phi's and an unconditional branch in +/// it. +void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { BranchInst *BI = cast(BB->getTerminator()); BasicBlock *DestBB = BI->getSuccessor(0); @@ -594,6 +593,14 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase, continue; } + if (RelocatedBase->getParent() != ToReplace->getParent()) { + // Base and derived relocates are in different basic blocks. + // In this case transform is only valid when base dominates derived + // relocate. However it would be too expensive to check dominance + // for each such relocate, so we skip the whole transformation. + continue; + } + Value *Base = ThisRelocate.getBasePtr(); auto Derived = dyn_cast(ThisRelocate.getDerivedPtr()); if (!Derived || Derived->getPointerOperand() != Base) @@ -631,21 +638,20 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase, // In this case, we can not find the bitcast any more. So we insert a new bitcast // no matter there is already one or not. In this way, we can handle all cases, and // the extra bitcast should be optimized away in later passes. - Instruction *ActualRelocatedBase = RelocatedBase; + Value *ActualRelocatedBase = RelocatedBase; if (RelocatedBase->getType() != Base->getType()) { ActualRelocatedBase = - cast(Builder.CreateBitCast(RelocatedBase, Base->getType())); + Builder.CreateBitCast(RelocatedBase, Base->getType()); } Value *Replacement = Builder.CreateGEP( Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV)); - Instruction *ReplacementInst = cast(Replacement); Replacement->takeName(ToReplace); // If the newly generated derived pointer's type does not match the original derived // pointer's type, cast the new derived pointer to match it. Same reasoning as above. - Instruction *ActualReplacement = ReplacementInst; - if (ReplacementInst->getType() != ToReplace->getType()) { + Value *ActualReplacement = Replacement; + if (Replacement->getType() != ToReplace->getType()) { ActualReplacement = - cast(Builder.CreateBitCast(ReplacementInst, ToReplace->getType())); + Builder.CreateBitCast(Replacement, ToReplace->getType()); } ToReplace->replaceAllUsesWith(ActualReplacement); ToReplace->eraseFromParent(); @@ -723,6 +729,12 @@ static bool SinkCast(CastInst *CI) { // Preincrement use iterator so we don't invalidate it. ++UI; + // If the block selected to receive the cast is an EH pad that does not + // allow non-PHI instructions before the terminator, we can't sink the + // cast. + if (UserBB->getTerminator()->isEHPad()) + continue; + // If this user is in the same block as the cast, don't change the cast. if (UserBB == DefBB) continue; @@ -731,9 +743,9 @@ static bool SinkCast(CastInst *CI) { if (!InsertedCast) { BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); - InsertedCast = - CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", - InsertPt); + assert(InsertPt != UserBB->end()); + InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), + CI->getType(), "", &*InsertPt); } // Replace a use of the cast with a use of the new cast. @@ -751,10 +763,9 @@ static bool SinkCast(CastInst *CI) { return MadeChange; } -/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop -/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), -/// sink it into user blocks to reduce the number of virtual -/// registers that must be created and coalesced. +/// If the specified cast instruction is a noop copy (e.g. it's casting from +/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to +/// reduce the number of virtual registers that must be created and coalesced. /// /// Return true if any changes are made. /// @@ -789,8 +800,8 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, return SinkCast(CI); } -/// CombineUAddWithOverflow - try to combine CI into a call to the -/// llvm.uadd.with.overflow intrinsic if possible. +/// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if +/// possible. /// /// Return true if any changes were made. static bool CombineUAddWithOverflow(CmpInst *CI) { @@ -818,7 +829,7 @@ static bool CombineUAddWithOverflow(CmpInst *CI) { assert(*AddI->user_begin() == CI && "expected!"); #endif - Module *M = CI->getParent()->getParent()->getParent(); + Module *M = CI->getModule(); Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); auto *InsertPt = AddI->hasOneUse() ? CI : AddI; @@ -836,16 +847,16 @@ static bool CombineUAddWithOverflow(CmpInst *CI) { return true; } -/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce -/// the number of virtual registers that must be created and coalesced. This is -/// a clear win except on targets with multiple condition code registers -/// (PowerPC), where it might lose; some adjustment may be wanted there. +/// Sink the given CmpInst into user blocks to reduce the number of virtual +/// registers that must be created and coalesced. This is a clear win except on +/// targets with multiple condition code registers (PowerPC), where it might +/// lose; some adjustment may be wanted there. /// /// Return true if any changes are made. static bool SinkCmpExpression(CmpInst *CI) { BasicBlock *DefBB = CI->getParent(); - /// InsertedCmp - Only insert a cmp in each block once. + /// Only insert a cmp in each block once. DenseMap InsertedCmps; bool MadeChange = false; @@ -872,10 +883,10 @@ static bool SinkCmpExpression(CmpInst *CI) { if (!InsertedCmp) { BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + assert(InsertPt != UserBB->end()); InsertedCmp = - CmpInst::Create(CI->getOpcode(), - CI->getPredicate(), CI->getOperand(0), - CI->getOperand(1), "", InsertPt); + CmpInst::Create(CI->getOpcode(), CI->getPredicate(), + CI->getOperand(0), CI->getOperand(1), "", &*InsertPt); } // Replace a use of the cmp with a use of the new cmp. @@ -903,8 +914,8 @@ static bool OptimizeCmpExpression(CmpInst *CI) { return false; } -/// isExtractBitsCandidateUse - Check if the candidates could -/// be combined with shift instruction, which includes: +/// Check if the candidates could be combined with a shift instruction, which +/// includes: /// 1. Truncate instruction /// 2. And instruction and the imm is a mask of the low bits: /// imm & (imm+1) == 0 @@ -922,8 +933,7 @@ static bool isExtractBitsCandidateUse(Instruction *User) { return true; } -/// SinkShiftAndTruncate - sink both shift and truncate instruction -/// to the use of truncate's BB. +/// Sink both shift and truncate instruction to the use of truncate's BB. static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap &InsertedShifts, @@ -970,20 +980,22 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, if (!InsertedShift && !InsertedTrunc) { BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); + assert(InsertPt != TruncUserBB->end()); // Sink the shift if (ShiftI->getOpcode() == Instruction::AShr) - InsertedShift = - BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); + InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, + "", &*InsertPt); else - InsertedShift = - BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); + InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, + "", &*InsertPt); // Sink the trunc BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); TruncInsertPt++; + assert(TruncInsertPt != TruncUserBB->end()); InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, - TruncI->getType(), "", TruncInsertPt); + TruncI->getType(), "", &*TruncInsertPt); MadeChange = true; @@ -993,10 +1005,10 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, return MadeChange; } -/// OptimizeExtractBits - sink the shift *right* instruction into user blocks if -/// the uses could potentially be combined with this shift instruction and -/// generate BitExtract instruction. It will only be applied if the architecture -/// supports BitExtract instruction. Here is an example: +/// Sink the shift *right* instruction into user blocks if the uses could +/// potentially be combined with this shift instruction and generate BitExtract +/// instruction. It will only be applied if the architecture supports BitExtract +/// instruction. Here is an example: /// BB1: /// %x.extract.shift = lshr i64 %arg1, 32 /// BB2: @@ -1067,13 +1079,14 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, if (!InsertedShift) { BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + assert(InsertPt != UserBB->end()); if (ShiftI->getOpcode() == Instruction::AShr) - InsertedShift = - BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); + InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, + "", &*InsertPt); else - InsertedShift = - BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); + InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, + "", &*InsertPt); MadeChange = true; } @@ -1089,10 +1102,10 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, return MadeChange; } -// ScalarizeMaskedLoad() translates masked load intrinsic, like +// Translate a masked load intrinsic like // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, // <16 x i1> %mask, <16 x i32> %passthru) -// to a chain of basic blocks, whith loading element one-by-one if +// to a chain of basic blocks, with loading element one-by-one if // the appropriate mask bit is set // // %1 = bitcast i8* %addr to i32* @@ -1126,35 +1139,68 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, // static void ScalarizeMaskedLoad(CallInst *CI) { Value *Ptr = CI->getArgOperand(0); - Value *Src0 = CI->getArgOperand(3); + Value *Alignment = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); - VectorType *VecType = dyn_cast(CI->getType()); - Type *EltTy = VecType->getElementType(); + Value *Src0 = CI->getArgOperand(3); + unsigned AlignVal = cast(Alignment)->getZExtValue(); + VectorType *VecType = dyn_cast(CI->getType()); assert(VecType && "Unexpected return type of masked load intrinsic"); + Type *EltTy = CI->getType()->getVectorElementType(); + IRBuilder<> Builder(CI->getContext()); Instruction *InsertPt = CI; BasicBlock *IfBlock = CI->getParent(); BasicBlock *CondBlock = nullptr; BasicBlock *PrevIfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); + Builder.SetInsertPoint(InsertPt); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + // Short-cut if the mask is all-true. + bool IsAllOnesMask = isa(Mask) && + cast(Mask)->isAllOnesValue(); + + if (IsAllOnesMask) { + Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + // Adjust alignment for the scalar instruction. + AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits()/8); // Bitcast %addr fron i8* to EltTy* Type *NewPtrType = EltTy->getPointerTo(cast(Ptr->getType())->getAddressSpace()); Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); + unsigned VectorWidth = VecType->getNumElements(); + Value *UndefVal = UndefValue::get(VecType); // The result vector Value *VResult = UndefVal; + if (isa(Mask)) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + LoadInst* Load = Builder.CreateAlignedLoad(Gep, AlignVal); + VResult = Builder.CreateInsertElement(VResult, Load, + Builder.getInt32(Idx)); + } + Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + PHINode *Phi = nullptr; Value *PrevPhi = UndefVal; - unsigned VectorWidth = VecType->getNumElements(); for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration @@ -1182,16 +1228,17 @@ static void ScalarizeMaskedLoad(CallInst *CI) { // %Elt = load i32* %EltAddr // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx // - CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); + CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load"); Builder.SetInsertPoint(InsertPt); Value *Gep = Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - LoadInst* Load = Builder.CreateLoad(Gep, false); + LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal); VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); Builder.SetInsertPoint(InsertPt); Instruction *OldBr = IfBlock->getTerminator(); BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); @@ -1208,7 +1255,7 @@ static void ScalarizeMaskedLoad(CallInst *CI) { CI->eraseFromParent(); } -// ScalarizeMaskedStore() translates masked store intrinsic, like +// Translate a masked store intrinsic, like // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, // <16 x i1> %mask) // to a chain of basic blocks, that stores element one-by-one if @@ -1237,34 +1284,61 @@ static void ScalarizeMaskedLoad(CallInst *CI) { // br label %else2 // . . . static void ScalarizeMaskedStore(CallInst *CI) { - Value *Ptr = CI->getArgOperand(1); Value *Src = CI->getArgOperand(0); + Value *Ptr = CI->getArgOperand(1); + Value *Alignment = CI->getArgOperand(2); Value *Mask = CI->getArgOperand(3); + unsigned AlignVal = cast(Alignment)->getZExtValue(); VectorType *VecType = dyn_cast(Src->getType()); - Type *EltTy = VecType->getElementType(); - assert(VecType && "Unexpected data type in masked store intrinsic"); + Type *EltTy = VecType->getElementType(); + IRBuilder<> Builder(CI->getContext()); Instruction *InsertPt = CI; BasicBlock *IfBlock = CI->getParent(); Builder.SetInsertPoint(InsertPt); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + // Short-cut if the mask is all-true. + bool IsAllOnesMask = isa(Mask) && + cast(Mask)->isAllOnesValue(); + + if (IsAllOnesMask) { + Builder.CreateAlignedStore(Src, Ptr, AlignVal); + CI->eraseFromParent(); + return; + } + + // Adjust alignment for the scalar instruction. + AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits()/8); // Bitcast %addr fron i8* to EltTy* Type *NewPtrType = EltTy->getPointerTo(cast(Ptr->getType())->getAddressSpace()); Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); - unsigned VectorWidth = VecType->getNumElements(); + + if (isa(Mask)) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + Builder.CreateAlignedStore(OneElt, Gep, AlignVal); + } + CI->eraseFromParent(); + return; + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // // %mask_1 = extractelement <16 x i1> %mask, i32 Idx // %to_store = icmp eq i1 %mask_1, true - // br i1 %to_load, label %cond.store, label %else + // br i1 %to_store, label %cond.store, label %else // Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, @@ -1276,13 +1350,259 @@ static void ScalarizeMaskedStore(CallInst *CI) { // %EltAddr = getelementptr i32* %1, i32 0 // %store i32 %OneElt, i32* %EltAddr // - BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); + BasicBlock *CondBlock = + IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); Builder.SetInsertPoint(InsertPt); - + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); Value *Gep = Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - Builder.CreateStore(OneElt, Gep); + Builder.CreateAlignedStore(OneElt, Gep, AlignVal); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + IfBlock = NewIfBlock; + } + CI->eraseFromParent(); +} + +// Translate a masked gather intrinsic like +// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, +// <16 x i1> %Mask, <16 x i32> %Src) +// to a chain of basic blocks, with loading element one-by-one if +// the appropriate mask bit is set +// +// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind +// % Mask0 = extractelement <16 x i1> %Mask, i32 0 +// % ToLoad0 = icmp eq i1 % Mask0, true +// br i1 % ToLoad0, label %cond.load, label %else +// +// cond.load: +// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 +// % Load0 = load i32, i32* % Ptr0, align 4 +// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0 +// br label %else +// +// else: +// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0] +// % Mask1 = extractelement <16 x i1> %Mask, i32 1 +// % ToLoad1 = icmp eq i1 % Mask1, true +// br i1 % ToLoad1, label %cond.load1, label %else2 +// +// cond.load1: +// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 +// % Load1 = load i32, i32* % Ptr1, align 4 +// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1 +// br label %else2 +// . . . +// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src +// ret <16 x i32> %Result +static void ScalarizeMaskedGather(CallInst *CI) { + Value *Ptrs = CI->getArgOperand(0); + Value *Alignment = CI->getArgOperand(1); + Value *Mask = CI->getArgOperand(2); + Value *Src0 = CI->getArgOperand(3); + + VectorType *VecType = dyn_cast(CI->getType()); + + assert(VecType && "Unexpected return type of masked load intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + BasicBlock *CondBlock = nullptr; + BasicBlock *PrevIfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + unsigned AlignVal = cast(Alignment)->getZExtValue(); + + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + Value *UndefVal = UndefValue::get(VecType); + + // The result vector + Value *VResult = UndefVal; + unsigned VectorWidth = VecType->getNumElements(); + + // Shorten the way if the mask is a vector of constants. + bool IsConstMask = isa(Mask); + + if (IsConstMask) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal, + "Load" + Twine(Idx)); + VResult = Builder.CreateInsertElement(VResult, Load, + Builder.getInt32(Idx), + "Res" + Twine(Idx)); + } + Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + PHINode *Phi = nullptr; + Value *PrevPhi = UndefVal; + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %Mask1 = extractelement <16 x i1> %Mask, i32 1 + // %ToLoad1 = icmp eq i1 %Mask1, true + // br i1 %ToLoad1, label %cond.load, label %else + // + if (Idx > 0) { + Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + PrevPhi = Phi; + VResult = Phi; + } + + Value *Predicate = Builder.CreateExtractElement(Mask, + Builder.getInt32(Idx), + "Mask" + Twine(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1), + "ToLoad" + Twine(Idx)); + + // Create "cond" block + // + // %EltAddr = getelementptr i32* %1, i32 0 + // %Elt = load i32* %EltAddr + // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx + // + CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); + Builder.SetInsertPoint(InsertPt); + + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal, + "Load" + Twine(Idx)); + VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx), + "Res" + Twine(Idx)); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; + } + + Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); +} + +// Translate a masked scatter intrinsic, like +// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, +// <16 x i1> %Mask) +// to a chain of basic blocks, that stores element one-by-one if +// the appropriate mask bit is set. +// +// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind +// % Mask0 = extractelement <16 x i1> % Mask, i32 0 +// % ToStore0 = icmp eq i1 % Mask0, true +// br i1 %ToStore0, label %cond.store, label %else +// +// cond.store: +// % Elt0 = extractelement <16 x i32> %Src, i32 0 +// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 +// store i32 %Elt0, i32* % Ptr0, align 4 +// br label %else +// +// else: +// % Mask1 = extractelement <16 x i1> % Mask, i32 1 +// % ToStore1 = icmp eq i1 % Mask1, true +// br i1 % ToStore1, label %cond.store1, label %else2 +// +// cond.store1: +// % Elt1 = extractelement <16 x i32> %Src, i32 1 +// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 +// store i32 % Elt1, i32* % Ptr1, align 4 +// br label %else2 +// . . . +static void ScalarizeMaskedScatter(CallInst *CI) { + Value *Src = CI->getArgOperand(0); + Value *Ptrs = CI->getArgOperand(1); + Value *Alignment = CI->getArgOperand(2); + Value *Mask = CI->getArgOperand(3); + + assert(isa(Src->getType()) && + "Unexpected data type in masked scatter intrinsic"); + assert(isa(Ptrs->getType()) && + isa(Ptrs->getType()->getVectorElementType()) && + "Vector of pointers is expected in masked scatter intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + unsigned AlignVal = cast(Alignment)->getZExtValue(); + unsigned VectorWidth = Src->getType()->getVectorNumElements(); + + // Shorten the way if the mask is a vector of constants. + bool IsConstMask = isa(Mask); + + if (IsConstMask) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), + "Elt" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); + } + CI->eraseFromParent(); + return; + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + // Fill the "else" block, created in the previous iteration + // + // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx + // % ToStore = icmp eq i1 % Mask1, true + // br i1 % ToStore, label %cond.store, label %else + // + Value *Predicate = Builder.CreateExtractElement(Mask, + Builder.getInt32(Idx), + "Mask" + Twine(Idx)); + Value *Cmp = + Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1), + "ToStore" + Twine(Idx)); + + // Create "cond" block + // + // % Elt1 = extractelement <16 x i32> %Src, i32 1 + // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 + // %store i32 % Elt1, i32* % Ptr1 + // + BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); + Builder.SetInsertPoint(InsertPt); + + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), + "Elt" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); // Create "else" block, fill it in the next iteration BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); @@ -1295,7 +1615,86 @@ static void ScalarizeMaskedStore(CallInst *CI) { CI->eraseFromParent(); } -bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { +/// If counting leading or trailing zeros is an expensive operation and a zero +/// input is defined, add a check for zero to avoid calling the intrinsic. +/// +/// We want to transform: +/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false) +/// +/// into: +/// entry: +/// %cmpz = icmp eq i64 %A, 0 +/// br i1 %cmpz, label %cond.end, label %cond.false +/// cond.false: +/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true) +/// br label %cond.end +/// cond.end: +/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ] +/// +/// If the transform is performed, return true and set ModifiedDT to true. +static bool despeculateCountZeros(IntrinsicInst *CountZeros, + const TargetLowering *TLI, + const DataLayout *DL, + bool &ModifiedDT) { + if (!TLI || !DL) + return false; + + // If a zero input is undefined, it doesn't make sense to despeculate that. + if (match(CountZeros->getOperand(1), m_One())) + return false; + + // If it's cheap to speculate, there's nothing to do. + auto IntrinsicID = CountZeros->getIntrinsicID(); + if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) || + (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz())) + return false; + + // Only handle legal scalar cases. Anything else requires too much work. + Type *Ty = CountZeros->getType(); + unsigned SizeInBits = Ty->getPrimitiveSizeInBits(); + if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSize()) + return false; + + // The intrinsic will be sunk behind a compare against zero and branch. + BasicBlock *StartBlock = CountZeros->getParent(); + BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false"); + + // Create another block after the count zero intrinsic. A PHI will be added + // in this block to select the result of the intrinsic or the bit-width + // constant if the input to the intrinsic is zero. + BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros)); + BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end"); + + // Set up a builder to create a compare, conditional branch, and PHI. + IRBuilder<> Builder(CountZeros->getContext()); + Builder.SetInsertPoint(StartBlock->getTerminator()); + Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc()); + + // Replace the unconditional branch that was created by the first split with + // a compare against zero and a conditional branch. + Value *Zero = Constant::getNullValue(Ty); + Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz"); + Builder.CreateCondBr(Cmp, EndBlock, CallBlock); + StartBlock->getTerminator()->eraseFromParent(); + + // Create a PHI in the end block to select either the output of the intrinsic + // or the bit width of the operand. + Builder.SetInsertPoint(&EndBlock->front()); + PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz"); + CountZeros->replaceAllUsesWith(PN); + Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits)); + PN->addIncoming(BitWidth, StartBlock); + PN->addIncoming(CountZeros, CallBlock); + + // We are explicitly handling the zero case, so we can set the intrinsic's + // undefined zero argument to 'true'. This will also prevent reprocessing the + // intrinsic; we only despeculate when a zero input is defined. + CountZeros->setArgOperand(1, Builder.getTrue()); + ModifiedDT = true; + return true; +} + +bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { BasicBlock *BB = CI->getParent(); // Lower inline assembly if we can. @@ -1311,7 +1710,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { return true; } // Sink address computing for memory operands into the block. - if (OptimizeInlineAsmInst(CI)) + if (optimizeInlineAsmInst(CI)) return true; } @@ -1372,14 +1771,14 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { // Substituting this can cause recursive simplifications, which can // invalidate our iterator. Use a WeakVH to hold onto it in case this // happens. - WeakVH IterHandle(CurInstIterator); + WeakVH IterHandle(&*CurInstIterator); replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); // If the iterator instruction was recursively deleted, start over at the // start of the block. - if (IterHandle != CurInstIterator) { + if (IterHandle != CurInstIterator.getNodePtrUnchecked()) { CurInstIterator = BB->begin(); SunkAddrs.clear(); } @@ -1387,7 +1786,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { } case Intrinsic::masked_load: { // Scalarize unsupported vector masked load - if (!TTI->isLegalMaskedLoad(CI->getType(), 1)) { + if (!TTI->isLegalMaskedLoad(CI->getType())) { ScalarizeMaskedLoad(CI); ModifiedDT = true; return true; @@ -1395,13 +1794,29 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { return false; } case Intrinsic::masked_store: { - if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), 1)) { + if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) { ScalarizeMaskedStore(CI); ModifiedDT = true; return true; } return false; } + case Intrinsic::masked_gather: { + if (!TTI->isLegalMaskedGather(CI->getType())) { + ScalarizeMaskedGather(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::masked_scatter: { + if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) { + ScalarizeMaskedScatter(CI); + ModifiedDT = true; + return true; + } + return false; + } case Intrinsic::aarch64_stlxr: case Intrinsic::aarch64_stxr: { ZExtInst *ExtVal = dyn_cast(CI->getArgOperand(0)); @@ -1415,6 +1830,15 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { InsertedInsts.insert(ExtVal); return true; } + case Intrinsic::invariant_group_barrier: + II->replaceAllUsesWith(II->getArgOperand(0)); + II->eraseFromParent(); + return true; + + case Intrinsic::cttz: + case Intrinsic::ctlz: + // If counting zeros is expensive, try to avoid it. + return despeculateCountZeros(II, TLI, DL, ModifiedDT); } if (TLI) { @@ -1426,7 +1850,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { Type *AccessTy; if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace)) while (!PtrOps.empty()) - if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace)) + if (optimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace)) return true; } } @@ -1447,9 +1871,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { return false; } -/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return -/// instructions to the predecessor to enable tail call optimizations. The -/// case it is currently looking for is: +/// Look for opportunities to duplicate return instructions to the predecessor +/// to enable tail call optimizations. The case it is currently looking for is: /// @code /// bb0: /// %tmp0 = tail call i32 @f0() @@ -1478,7 +1901,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { /// %tmp2 = tail call i32 @f2() /// ret i32 %tmp2 /// @endcode -bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { +bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) { if (!TLI) return false; @@ -1597,7 +2020,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { namespace { -/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode +/// This is an extended version of TargetLowering::AddrMode /// which holds actual Value*'s for register values. struct ExtAddrMode : public TargetLowering::AddrMode { Value *BaseReg; @@ -1709,10 +2132,10 @@ class TypePromotionTransaction { public: /// \brief Record the position of \p Inst. InsertionHandler(Instruction *Inst) { - BasicBlock::iterator It = Inst; + BasicBlock::iterator It = Inst->getIterator(); HasPrevInstruction = (It != (Inst->getParent()->begin())); if (HasPrevInstruction) - Point.PrevInst = --It; + Point.PrevInst = &*--It; else Point.BB = Inst->getParent(); } @@ -1724,7 +2147,7 @@ class TypePromotionTransaction { Inst->removeFromParent(); Inst->insertAfter(Point.PrevInst); } else { - Instruction *Position = Point.BB->getFirstInsertionPt(); + Instruction *Position = &*Point.BB->getFirstInsertionPt(); if (Inst->getParent()) Inst->moveBefore(Position); else @@ -1797,7 +2220,7 @@ class TypePromotionTransaction { Value *Val = Inst->getOperand(It); OriginalValues.push_back(Val); // Set a dummy one. - // We could use OperandSetter here, but that would implied an overhead + // We could use OperandSetter here, but that would imply an overhead // that we are not willing to pay. Inst->setOperand(It, UndefValue::get(Val->getType())); } @@ -2111,7 +2534,7 @@ class AddressingModeMatcher { unsigned AddrSpace; Instruction *MemoryInst; - /// AddrMode - This is the addressing mode that we're building up. This is + /// This is the addressing mode that we're building up. This is /// part of the return value of this addressing mode matching stuff. ExtAddrMode &AddrMode; @@ -2122,9 +2545,8 @@ class AddressingModeMatcher { /// The ongoing transaction where every action should be registered. TypePromotionTransaction &TPT; - /// IgnoreProfitability - This is set to true when we should not do - /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode - /// always returns true. + /// This is set to true when we should not do profitability checks. + /// When true, IsProfitableToFoldIntoAddressingMode always returns true. bool IgnoreProfitability; AddressingModeMatcher(SmallVectorImpl &AMI, @@ -2143,7 +2565,7 @@ class AddressingModeMatcher { } public: - /// Match - Find the maximal addressing mode that a load/store of V can fold, + /// Find the maximal addressing mode that a load/store of V can fold, /// give an access type of AccessTy. This returns a list of involved /// instructions in AddrModeInsts. /// \p InsertedInsts The instructions inserted by other CodeGenPrepare @@ -2161,32 +2583,32 @@ public: bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS, MemoryInst, Result, InsertedInsts, - PromotedInsts, TPT).MatchAddr(V, 0); + PromotedInsts, TPT).matchAddr(V, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); return Result; } private: - bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); - bool MatchAddr(Value *V, unsigned Depth); - bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, + bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); + bool matchAddr(Value *V, unsigned Depth); + bool matchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, bool *MovedAway = nullptr); - bool IsProfitableToFoldIntoAddressingMode(Instruction *I, + bool isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter); - bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); - bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost, + bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); + bool isPromotionProfitable(unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const; }; -/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. +/// Try adding ScaleReg*Scale to the current addressing mode. /// Return true and update AddrMode if this addr mode is legal for the target, /// false if not. -bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, +bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth) { // If Scale is 1, then this is the same as adding ScaleReg to the addressing // mode. Just process that directly. if (Scale == 1) - return MatchAddr(ScaleReg, Depth); + return matchAddr(ScaleReg, Depth); // If the scale is 0, it takes nothing to add this. if (Scale == 0) @@ -2233,9 +2655,9 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, return true; } -/// MightBeFoldableInst - This is a little filter, which returns true if an -/// addressing computation involving I might be folded into a load/store -/// accessing it. This doesn't need to be perfect, but needs to accept at least +/// This is a little filter, which returns true if an addressing computation +/// involving I might be folded into a load/store accessing it. +/// This doesn't need to be perfect, but needs to accept at least /// the set of instructions that MatchOperationAddr can. static bool MightBeFoldableInst(Instruction *I) { switch (I->getOpcode()) { @@ -2301,9 +2723,7 @@ class TypePromotionHelper { /// \brief Utility function to determine if \p OpIdx should be promoted when /// promoting \p Inst. static bool shouldExtOperand(const Instruction *Inst, int OpIdx) { - if (isa(Inst) && OpIdx == 0) - return false; - return true; + return !(isa(Inst) && OpIdx == 0); } /// \brief Utility function to promote the operand of \p Ext when this @@ -2413,8 +2833,7 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, Value *OpndVal = Inst->getOperand(0); // Check if we can use this operand in the extension. - // If the type is larger than the result type of the extension, - // we cannot. + // If the type is larger than the result type of the extension, we cannot. if (!OpndVal->getType()->isIntegerTy() || OpndVal->getType()->getIntegerBitWidth() > ConsideredExtType->getIntegerBitWidth()) @@ -2433,18 +2852,16 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, // #1 get the type of the operand and check the kind of the extended bits. const Type *OpndType; InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); - if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt) - OpndType = It->second.Ty; + if (It != PromotedInsts.end() && It->second.getInt() == IsSExt) + OpndType = It->second.getPointer(); else if ((IsSExt && isa(Opnd)) || (!IsSExt && isa(Opnd))) OpndType = Opnd->getOperand(0)->getType(); else return false; - // #2 check that the truncate just drop extended bits. - if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth()) - return true; - - return false; + // #2 check that the truncate just drops extended bits. + return Inst->getType()->getIntegerBitWidth() >= + OpndType->getIntegerBitWidth(); } TypePromotionHelper::Action TypePromotionHelper::getAction( @@ -2553,7 +2970,7 @@ Value *TypePromotionHelper::promoteOperandForOther( } TPT.replaceAllUsesWith(ExtOpnd, Trunc); - // Restore the operand of Ext (which has been replace by the previous call + // Restore the operand of Ext (which has been replaced by the previous call // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. TPT.setOperand(Ext, 0, ExtOpnd); } @@ -2631,8 +3048,7 @@ Value *TypePromotionHelper::promoteOperandForOther( return ExtOpnd; } -/// IsPromotionProfitable - Check whether or not promoting an instruction -/// to a wider type was profitable. +/// Check whether or not promoting an instruction to a wider type is profitable. /// \p NewCost gives the cost of extension instructions created by the /// promotion. /// \p OldCost gives the cost of extension instructions before the promotion @@ -2640,7 +3056,7 @@ Value *TypePromotionHelper::promoteOperandForOther( /// matched in the addressing mode the promotion. /// \p PromotedOperand is the value that has been promoted. /// \return True if the promotion is profitable, false otherwise. -bool AddressingModeMatcher::IsPromotionProfitable( +bool AddressingModeMatcher::isPromotionProfitable( unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const { DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n'); // The cost of the new extensions is greater than the cost of the @@ -2656,9 +3072,9 @@ bool AddressingModeMatcher::IsPromotionProfitable( return isPromotedInstructionLegal(TLI, DL, PromotedOperand); } -/// MatchOperationAddr - Given an instruction or constant expr, see if we can -/// fold the operation into the addressing mode. If so, update the addressing -/// mode and return true, otherwise return false without modifying AddrMode. +/// Given an instruction or constant expr, see if we can fold the operation +/// into the addressing mode. If so, update the addressing mode and return +/// true, otherwise return false without modifying AddrMode. /// If \p MovedAway is not NULL, it contains the information of whether or /// not AddrInst has to be folded into the addressing mode on success. /// If \p MovedAway == true, \p AddrInst will not be part of the addressing @@ -2667,7 +3083,7 @@ bool AddressingModeMatcher::IsPromotionProfitable( /// This state can happen when AddrInst is a sext, since it may be moved away. /// Therefore, AddrInst may not be valid when MovedAway is true and it must /// not be referenced anymore. -bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, +bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth, bool *MovedAway) { // Avoid exponential behavior on extremely deep expression trees. @@ -2680,13 +3096,13 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, switch (Opcode) { case Instruction::PtrToInt: // PtrToInt is always a noop, as we know that the int type is pointer sized. - return MatchAddr(AddrInst->getOperand(0), Depth); + return matchAddr(AddrInst->getOperand(0), Depth); case Instruction::IntToPtr: { auto AS = AddrInst->getType()->getPointerAddressSpace(); auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); // This inttoptr is a no-op if the integer type is pointer sized. if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy) - return MatchAddr(AddrInst->getOperand(0), Depth); + return matchAddr(AddrInst->getOperand(0), Depth); return false; } case Instruction::BitCast: @@ -2698,14 +3114,14 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, // and we don't want to mess around with them. Assume it knows what it // is doing. AddrInst->getOperand(0)->getType() != AddrInst->getType()) - return MatchAddr(AddrInst->getOperand(0), Depth); + return matchAddr(AddrInst->getOperand(0), Depth); return false; case Instruction::AddrSpaceCast: { unsigned SrcAS = AddrInst->getOperand(0)->getType()->getPointerAddressSpace(); unsigned DestAS = AddrInst->getType()->getPointerAddressSpace(); if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) - return MatchAddr(AddrInst->getOperand(0), Depth); + return matchAddr(AddrInst->getOperand(0), Depth); return false; } case Instruction::Add: { @@ -2719,8 +3135,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); - if (MatchAddr(AddrInst->getOperand(1), Depth+1) && - MatchAddr(AddrInst->getOperand(0), Depth+1)) + if (matchAddr(AddrInst->getOperand(1), Depth+1) && + matchAddr(AddrInst->getOperand(0), Depth+1)) return true; // Restore the old addr mode info. @@ -2729,8 +3145,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, TPT.rollback(LastKnownGood); // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. - if (MatchAddr(AddrInst->getOperand(0), Depth+1) && - MatchAddr(AddrInst->getOperand(1), Depth+1)) + if (matchAddr(AddrInst->getOperand(0), Depth+1) && + matchAddr(AddrInst->getOperand(1), Depth+1)) return true; // Otherwise we definitely can't merge the ADD in. @@ -2752,7 +3168,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, if (Opcode == Instruction::Shl) Scale = 1LL << Scale; - return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); + return matchScaledValue(AddrInst->getOperand(0), Scale, Depth); } case Instruction::GetElementPtr: { // Scan the GEP. We check it if it contains constant offsets and at most @@ -2791,7 +3207,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, if (ConstantOffset == 0 || TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) { // Check to see if we can fold the base pointer in too. - if (MatchAddr(AddrInst->getOperand(0), Depth+1)) + if (matchAddr(AddrInst->getOperand(0), Depth+1)) return true; } AddrMode.BaseOffs -= ConstantOffset; @@ -2806,7 +3222,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, AddrMode.BaseOffs += ConstantOffset; // Match the base operand of the GEP. - if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) { + if (!matchAddr(AddrInst->getOperand(0), Depth+1)) { // If it couldn't be matched, just stuff the value in a register. if (AddrMode.HasBaseReg) { AddrMode = BackupAddrMode; @@ -2818,7 +3234,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, } // Match the remaining variable portion of the GEP. - if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, + if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, Depth)) { // If it couldn't be matched, try stuffing the base into a register // instead of matching it, and retrying the match of the scale. @@ -2829,7 +3245,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, AddrMode.HasBaseReg = true; AddrMode.BaseReg = AddrInst->getOperand(0); AddrMode.BaseOffs += ConstantOffset; - if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), + if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, Depth)) { // If even that didn't work, bail. AddrMode = BackupAddrMode; @@ -2879,12 +3295,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, ExtAddrMode BackupAddrMode = AddrMode; unsigned OldSize = AddrModeInsts.size(); - if (!MatchAddr(PromotedOperand, Depth) || - // The total of the new cost is equals to the cost of the created + if (!matchAddr(PromotedOperand, Depth) || + // The total of the new cost is equal to the cost of the created // instructions. - // The total of the old cost is equals to the cost of the extension plus + // The total of the old cost is equal to the cost of the extension plus // what we have saved in the addressing mode. - !IsPromotionProfitable(CreatedInstsCost, + !isPromotionProfitable(CreatedInstsCost, ExtCost + (AddrModeInsts.size() - OldSize), PromotedOperand)) { AddrMode = BackupAddrMode; @@ -2899,12 +3315,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, return false; } -/// MatchAddr - If we can, try to add the value of 'Addr' into the current -/// addressing mode. If Addr can't be added to AddrMode this returns false and -/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type -/// or intptr_t for the target. +/// If we can, try to add the value of 'Addr' into the current addressing mode. +/// If Addr can't be added to AddrMode this returns false and leaves AddrMode +/// unmodified. This assumes that Addr is either a pointer type or intptr_t +/// for the target. /// -bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { +bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) { // Start a transaction at this point that we will rollback if the matching // fails. TypePromotionTransaction::ConstRestorationPt LastKnownGood = @@ -2929,8 +3345,8 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { // Check to see if it is possible to fold this operation. bool MovedAway = false; - if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { - // This instruction may have been move away. If so, there is nothing + if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { + // This instruction may have been moved away. If so, there is nothing // to check here. if (MovedAway) return true; @@ -2938,7 +3354,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { // *profitable* to do so. We use a simple cost model to avoid increasing // register pressure too much. if (I->hasOneUse() || - IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { + isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { AddrModeInsts.push_back(I); return true; } @@ -2950,7 +3366,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { TPT.rollback(LastKnownGood); } } else if (ConstantExpr *CE = dyn_cast(Addr)) { - if (MatchOperationAddr(CE, CE->getOpcode(), Depth)) + if (matchOperationAddr(CE, CE->getOpcode(), Depth)) return true; TPT.rollback(LastKnownGood); } else if (isa(Addr)) { @@ -2983,9 +3399,8 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { return false; } -/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified -/// inline asm call are due to memory operands. If so, return true, otherwise -/// return false. +/// Check to see if all uses of OpVal by the specified inline asm call are due +/// to memory operands. If so, return true, otherwise return false. static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetMachine &TM) { const Function *F = CI->getParent()->getParent(); @@ -3011,8 +3426,8 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, return true; } -/// FindAllMemoryUses - Recursively walk all the uses of I until we find a -/// memory use. If we find an obviously non-foldable instruction, return true. +/// Recursively walk all the uses of I until we find a memory use. +/// If we find an obviously non-foldable instruction, return true. /// Add the ultimately found memory instructions to MemoryUses. static bool FindAllMemoryUses( Instruction *I, @@ -3059,11 +3474,11 @@ static bool FindAllMemoryUses( return false; } -/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at -/// the use site that we're folding it into. If so, there is no cost to -/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values -/// that we know are live at the instruction already. -bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, +/// Return true if Val is already known to be live at the use site that we're +/// folding it into. If so, there is no cost to include it in the addressing +/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the +/// instruction already. +bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, Value *KnownLive2) { // If Val is either of the known-live values, we know it is live! if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) @@ -3085,11 +3500,11 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, return Val->isUsedInBasicBlock(MemoryInst->getParent()); } -/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing -/// mode of the machine to fold the specified instruction into a load or store -/// that ultimately uses it. However, the specified instruction has multiple -/// uses. Given this, it may actually increase register pressure to fold it -/// into the load. For example, consider this code: +/// It is possible for the addressing mode of the machine to fold the specified +/// instruction into a load or store that ultimately uses it. +/// However, the specified instruction has multiple uses. +/// Given this, it may actually increase register pressure to fold it +/// into the load. For example, consider this code: /// /// X = ... /// Y = X+1 @@ -3107,7 +3522,7 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, /// X was live across 'load Z' for other reasons, we actually *would* want to /// fold the addressing mode in the Z case. This would make Y die earlier. bool AddressingModeMatcher:: -IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, +isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) { if (IgnoreProfitability) return true; @@ -3124,9 +3539,9 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // If the BaseReg or ScaledReg was referenced by the previous addrmode, their // lifetime wasn't extended by adding this instruction. - if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) + if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) BaseReg = nullptr; - if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) + if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) ScaledReg = nullptr; // If folding this instruction (and it's subexprs) didn't extend any live @@ -3171,7 +3586,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, MemoryInst, Result, InsertedInsts, PromotedInsts, TPT); Matcher.IgnoreProfitability = true; - bool Success = Matcher.MatchAddr(Address, 0); + bool Success = Matcher.matchAddr(Address, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); // The match was to check the profitability, the changes made are not @@ -3192,7 +3607,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, } // end anonymous namespace -/// IsNonLocalValue - Return true if the specified values are defined in a +/// Return true if the specified values are defined in a /// different basic block than BB. static bool IsNonLocalValue(Value *V, BasicBlock *BB) { if (Instruction *I = dyn_cast(V)) @@ -3200,16 +3615,15 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { return false; } -/// OptimizeMemoryInst - Load and Store Instructions often have -/// addressing modes that can do significant amounts of computation. As such, -/// instruction selection will try to get the load or store to do as much -/// computation as possible for the program. The problem is that isel can only -/// see within a single block. As such, we sink as much legal addressing mode -/// stuff into the block as possible. +/// Load and Store Instructions often have addressing modes that can do +/// significant amounts of computation. As such, instruction selection will try +/// to get the load or store to do as much computation as possible for the +/// program. The problem is that isel can only see within a single block. As +/// such, we sink as much legal addressing mode work into the block as possible. /// /// This method is used to optimize both load/store and inline asms with memory /// operands. -bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, +bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy, unsigned AddrSpace) { Value *Repl = Addr; @@ -3530,12 +3944,12 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (Repl->use_empty()) { // This can cause recursive deletion, which can invalidate our iterator. // Use a WeakVH to hold onto it in case this happens. - WeakVH IterHandle(CurInstIterator); + WeakVH IterHandle(&*CurInstIterator); BasicBlock *BB = CurInstIterator->getParent(); RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); - if (IterHandle != CurInstIterator) { + if (IterHandle != CurInstIterator.getNodePtrUnchecked()) { // If the iterator instruction was recursively deleted, start over at the // start of the block. CurInstIterator = BB->begin(); @@ -3546,10 +3960,9 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, return true; } -/// OptimizeInlineAsmInst - If there are any memory operands, use -/// OptimizeMemoryInst to sink their address computing into the block when -/// possible / profitable. -bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { +/// If there are any memory operands, use OptimizeMemoryInst to sink their +/// address computing into the block when possible / profitable. +bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { bool MadeChange = false; const TargetRegisterInfo *TRI = @@ -3566,7 +3979,7 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.isIndirect) { Value *OpVal = CS->getArgOperand(ArgNo++); - MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u); + MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u); } else if (OpInfo.Type == InlineAsm::isInput) ArgNo++; } @@ -3646,7 +4059,7 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) { /// %add = add nuw i64 %zext, 4 /// \encode /// Thanks to the promotion, we can match zext(load i32*) to i64. -bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, +bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, Instruction *&Inst, const SmallVectorImpl &Exts, unsigned CreatedInstsCost = 0) { @@ -3696,7 +4109,7 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, } // The promotion is profitable. // Check if it exposes an ext(load). - (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost); + (void)extLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost); if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost || // If we have created a new extension, i.e., now we have two // extensions. We must make sure one of them is merged with @@ -3713,13 +4126,13 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, return false; } -/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same -/// basic block as the load, unless conditions are unfavorable. This allows -/// SelectionDAG to fold the extend into the load. +/// Move a zext or sext fed by a load into the same basic block as the load, +/// unless conditions are unfavorable. This allows SelectionDAG to fold the +/// extend into the load. /// \p I[in/out] the extension may be modified during the process if some /// promotions apply. /// -bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) { +bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) { // Try to promote a chain of computation if it allows to form // an extended load. TypePromotionTransaction TPT; @@ -3730,7 +4143,7 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) { // Look for a load being extended. LoadInst *LI = nullptr; Instruction *OldExt = I; - bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts); + bool HasPromoted = extLdPromotion(TPT, LI, I, Exts); if (!LI || !I) { assert(!HasPromoted && !LI && "If we did not match any load instruction " "the code must remain the same"); @@ -3780,7 +4193,7 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) { return true; } -bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { +bool CodeGenPrepare::optimizeExtUses(Instruction *I) { BasicBlock *DefBB = I->getParent(); // If the result of a {s|z}ext and its source are both live out, rewrite all @@ -3838,7 +4251,8 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { if (!InsertedTrunc) { BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); - InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); + assert(InsertPt != UserBB->end()); + InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt); InsertedInsts.insert(InsertedTrunc); } @@ -3851,9 +4265,202 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { return MadeChange; } -/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be -/// turned into an explicit branch. -static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { +// Find loads whose uses only use some of the loaded value's bits. Add an "and" +// just after the load if the target can fold this into one extload instruction, +// with the hope of eliminating some of the other later "and" instructions using +// the loaded value. "and"s that are made trivially redundant by the insertion +// of the new "and" are removed by this function, while others (e.g. those whose +// path from the load goes through a phi) are left for isel to potentially +// remove. +// +// For example: +// +// b0: +// x = load i32 +// ... +// b1: +// y = and x, 0xff +// z = use y +// +// becomes: +// +// b0: +// x = load i32 +// x' = and x, 0xff +// ... +// b1: +// z = use x' +// +// whereas: +// +// b0: +// x1 = load i32 +// ... +// b1: +// x2 = load i32 +// ... +// b2: +// x = phi x1, x2 +// y = and x, 0xff +// +// becomes (after a call to optimizeLoadExt for each load): +// +// b0: +// x1 = load i32 +// x1' = and x1, 0xff +// ... +// b1: +// x2 = load i32 +// x2' = and x2, 0xff +// ... +// b2: +// x = phi x1', x2' +// y = and x, 0xff +// + +bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { + + if (!Load->isSimple() || + !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy())) + return false; + + // Skip loads we've already transformed or have no reason to transform. + if (Load->hasOneUse()) { + User *LoadUser = *Load->user_begin(); + if (cast(LoadUser)->getParent() == Load->getParent() && + !dyn_cast(LoadUser)) + return false; + } + + // Look at all uses of Load, looking through phis, to determine how many bits + // of the loaded value are needed. + SmallVector WorkList; + SmallPtrSet Visited; + SmallVector AndsToMaybeRemove; + for (auto *U : Load->users()) + WorkList.push_back(cast(U)); + + EVT LoadResultVT = TLI->getValueType(*DL, Load->getType()); + unsigned BitWidth = LoadResultVT.getSizeInBits(); + APInt DemandBits(BitWidth, 0); + APInt WidestAndBits(BitWidth, 0); + + while (!WorkList.empty()) { + Instruction *I = WorkList.back(); + WorkList.pop_back(); + + // Break use-def graph loops. + if (!Visited.insert(I).second) + continue; + + // For a PHI node, push all of its users. + if (auto *Phi = dyn_cast(I)) { + for (auto *U : Phi->users()) + WorkList.push_back(cast(U)); + continue; + } + + switch (I->getOpcode()) { + case llvm::Instruction::And: { + auto *AndC = dyn_cast(I->getOperand(1)); + if (!AndC) + return false; + APInt AndBits = AndC->getValue(); + DemandBits |= AndBits; + // Keep track of the widest and mask we see. + if (AndBits.ugt(WidestAndBits)) + WidestAndBits = AndBits; + if (AndBits == WidestAndBits && I->getOperand(0) == Load) + AndsToMaybeRemove.push_back(I); + break; + } + + case llvm::Instruction::Shl: { + auto *ShlC = dyn_cast(I->getOperand(1)); + if (!ShlC) + return false; + uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1); + auto ShlDemandBits = APInt::getAllOnesValue(BitWidth).lshr(ShiftAmt); + DemandBits |= ShlDemandBits; + break; + } + + case llvm::Instruction::Trunc: { + EVT TruncVT = TLI->getValueType(*DL, I->getType()); + unsigned TruncBitWidth = TruncVT.getSizeInBits(); + auto TruncBits = APInt::getAllOnesValue(TruncBitWidth).zext(BitWidth); + DemandBits |= TruncBits; + break; + } + + default: + return false; + } + } + + uint32_t ActiveBits = DemandBits.getActiveBits(); + // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the + // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example, + // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but + // (and (load x) 1) is not matched as a single instruction, rather as a LDR + // followed by an AND. + // TODO: Look into removing this restriction by fixing backends to either + // return false for isLoadExtLegal for i1 or have them select this pattern to + // a single instruction. + // + // Also avoid hoisting if we didn't see any ands with the exact DemandBits + // mask, since these are the only ands that will be removed by isel. + if (ActiveBits <= 1 || !APIntOps::isMask(ActiveBits, DemandBits) || + WidestAndBits != DemandBits) + return false; + + LLVMContext &Ctx = Load->getType()->getContext(); + Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits); + EVT TruncVT = TLI->getValueType(*DL, TruncTy); + + // Reject cases that won't be matched as extloads. + if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() || + !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT)) + return false; + + IRBuilder<> Builder(Load->getNextNode()); + auto *NewAnd = dyn_cast( + Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits))); + + // Replace all uses of load with new and (except for the use of load in the + // new and itself). + Load->replaceAllUsesWith(NewAnd); + NewAnd->setOperand(0, Load); + + // Remove any and instructions that are now redundant. + for (auto *And : AndsToMaybeRemove) + // Check that the and mask is the same as the one we decided to put on the + // new and. + if (cast(And->getOperand(1))->getValue() == DemandBits) { + And->replaceAllUsesWith(NewAnd); + if (&*CurInstIterator == And) + CurInstIterator = std::next(And->getIterator()); + And->eraseFromParent(); + ++NumAndUses; + } + + ++NumAndsAdded; + return true; +} + +/// Check if V (an operand of a select instruction) is an expensive instruction +/// that is only used once. +static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) { + auto *I = dyn_cast(V); + // If it's safe to speculatively execute, then it should not have side + // effects; therefore, it's safe to sink and possibly *not* execute. + return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) && + TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive; +} + +/// Returns true if a SelectInst should be turned into an explicit branch. +static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, + SelectInst *SI) { // FIXME: This should use the same heuristics as IfConversion to determine // whether a select is better represented as a branch. This requires that // branch probability metadata is preserved for the select, which is not the @@ -3861,28 +4468,36 @@ static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { CmpInst *Cmp = dyn_cast(SI->getCondition()); - // If the branch is predicted right, an out of order CPU can avoid blocking on - // the compare. Emit cmovs on compares with a memory operand as branches to - // avoid stalls on the load from memory. If the compare has more than one use - // there's probably another cmov or setcc around so it's not worth emitting a - // branch. - if (!Cmp) + // If a branch is predictable, an out-of-order CPU can avoid blocking on its + // comparison condition. If the compare has more than one use, there's + // probably another cmov or setcc around, so it's not worth emitting a branch. + if (!Cmp || !Cmp->hasOneUse()) return false; Value *CmpOp0 = Cmp->getOperand(0); Value *CmpOp1 = Cmp->getOperand(1); - // We check that the memory operand has one use to avoid uses of the loaded - // value directly after the compare, making branches unprofitable. - return Cmp->hasOneUse() && - ((isa(CmpOp0) && CmpOp0->hasOneUse()) || - (isa(CmpOp1) && CmpOp1->hasOneUse())); + // Emit "cmov on compare with a memory operand" as a branch to avoid stalls + // on a load from memory. But if the load is used more than once, do not + // change the select to a branch because the load is probably needed + // regardless of whether the branch is taken or not. + if ((isa(CmpOp0) && CmpOp0->hasOneUse()) || + (isa(CmpOp1) && CmpOp1->hasOneUse())) + return true; + + // If either operand of the select is expensive and only needed on one side + // of the select, we should form a branch. + if (sinkSelectOperand(TTI, SI->getTrueValue()) || + sinkSelectOperand(TTI, SI->getFalseValue())) + return true; + + return false; } /// If we have a SelectInst that will likely profit from branch prediction, /// turn it into a branch. -bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { +bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); // Can we convert the 'select' to CF ? @@ -3902,34 +4517,97 @@ bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { // We have efficient codegen support for the select instruction. // Check if it is profitable to keep this 'select'. if (!TLI->isPredictableSelectExpensive() || - !isFormingBranchFromSelectProfitable(SI)) + !isFormingBranchFromSelectProfitable(TTI, SI)) return false; } ModifiedDT = true; + // Transform a sequence like this: + // start: + // %cmp = cmp uge i32 %a, %b + // %sel = select i1 %cmp, i32 %c, i32 %d + // + // Into: + // start: + // %cmp = cmp uge i32 %a, %b + // br i1 %cmp, label %select.true, label %select.false + // select.true: + // br label %select.end + // select.false: + // br label %select.end + // select.end: + // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] + // + // In addition, we may sink instructions that produce %c or %d from + // the entry block into the destination(s) of the new branch. + // If the true or false blocks do not contain a sunken instruction, that + // block and its branch may be optimized away. In that case, one side of the + // first branch will point directly to select.end, and the corresponding PHI + // predecessor block will be the start block. + // First, we split the block containing the select into 2 blocks. BasicBlock *StartBlock = SI->getParent(); BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); - BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); + BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); - // Create a new block serving as the landing pad for the branch. - BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid", - NextBlock->getParent(), NextBlock); - - // Move the unconditional branch from the block with the select in it into our - // landing pad block. + // Delete the unconditional branch that was just created by the split. StartBlock->getTerminator()->eraseFromParent(); - BranchInst::Create(NextBlock, SmallBlock); + + // These are the new basic blocks for the conditional branch. + // At least one will become an actual new basic block. + BasicBlock *TrueBlock = nullptr; + BasicBlock *FalseBlock = nullptr; + + // Sink expensive instructions into the conditional blocks to avoid executing + // them speculatively. + if (sinkSelectOperand(TTI, SI->getTrueValue())) { + TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink", + EndBlock->getParent(), EndBlock); + auto *TrueBranch = BranchInst::Create(EndBlock, TrueBlock); + auto *TrueInst = cast(SI->getTrueValue()); + TrueInst->moveBefore(TrueBranch); + } + if (sinkSelectOperand(TTI, SI->getFalseValue())) { + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink", + EndBlock->getParent(), EndBlock); + auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + auto *FalseInst = cast(SI->getFalseValue()); + FalseInst->moveBefore(FalseBranch); + } + + // If there was nothing to sink, then arbitrarily choose the 'false' side + // for a new input value to the PHI. + if (TrueBlock == FalseBlock) { + assert(TrueBlock == nullptr && + "Unexpected basic block transform while optimizing select"); + + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", + EndBlock->getParent(), EndBlock); + BranchInst::Create(EndBlock, FalseBlock); + } // Insert the real conditional branch based on the original condition. - BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI); + // If we did not create a new block for one of the 'true' or 'false' paths + // of the condition, it means that side of the branch goes to the end block + // directly and the path originates from the start block from the point of + // view of the new PHI. + if (TrueBlock == nullptr) { + BranchInst::Create(EndBlock, FalseBlock, SI->getCondition(), SI); + TrueBlock = StartBlock; + } else if (FalseBlock == nullptr) { + BranchInst::Create(TrueBlock, EndBlock, SI->getCondition(), SI); + FalseBlock = StartBlock; + } else { + BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI); + } // The select itself is replaced with a PHI Node. - PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin()); + PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); PN->takeName(SI); - PN->addIncoming(SI->getTrueValue(), StartBlock); - PN->addIncoming(SI->getFalseValue(), SmallBlock); + PN->addIncoming(SI->getTrueValue(), TrueBlock); + PN->addIncoming(SI->getFalseValue(), FalseBlock); + SI->replaceAllUsesWith(PN); SI->eraseFromParent(); @@ -3955,7 +4633,7 @@ static bool isBroadcastShuffle(ShuffleVectorInst *SVI) { /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases /// it's often worth sinking a shufflevector splat down to its use so that /// codegen can spot all lanes are identical. -bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { +bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { BasicBlock *DefBB = SVI->getParent(); // Only do this xform if variable vector shifts are particularly expensive. @@ -3987,9 +4665,10 @@ bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { if (!InsertedShuffle) { BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); - InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0), - SVI->getOperand(1), - SVI->getOperand(2), "", InsertPt); + assert(InsertPt != UserBB->end()); + InsertedShuffle = + new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1), + SVI->getOperand(2), "", &*InsertPt); } UI->replaceUsesOfWith(SVI, InsertedShuffle); @@ -4005,6 +4684,49 @@ bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { return MadeChange; } +bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { + if (!TLI || !DL) + return false; + + Value *Cond = SI->getCondition(); + Type *OldType = Cond->getType(); + LLVMContext &Context = Cond->getContext(); + MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType)); + unsigned RegWidth = RegType.getSizeInBits(); + + if (RegWidth <= cast(OldType)->getBitWidth()) + return false; + + // If the register width is greater than the type width, expand the condition + // of the switch instruction and each case constant to the width of the + // register. By widening the type of the switch condition, subsequent + // comparisons (for case comparisons) will not need to be extended to the + // preferred register width, so we will potentially eliminate N-1 extends, + // where N is the number of cases in the switch. + auto *NewType = Type::getIntNTy(Context, RegWidth); + + // Zero-extend the switch condition and case constants unless the switch + // condition is a function argument that is already being sign-extended. + // In that case, we can avoid an unnecessary mask/extension by sign-extending + // everything instead. + Instruction::CastOps ExtType = Instruction::ZExt; + if (auto *Arg = dyn_cast(Cond)) + if (Arg->hasSExtAttr()) + ExtType = Instruction::SExt; + + auto *ExtInst = CastInst::Create(ExtType, Cond, NewType); + ExtInst->insertBefore(SI); + SI->setCondition(ExtInst); + for (SwitchInst::CaseIt Case : SI->cases()) { + APInt NarrowConst = Case.getCaseValue()->getValue(); + APInt WideConst = (ExtType == Instruction::ZExt) ? + NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth); + Case.setValue(ConstantInt::get(Context, WideConst)); + } + + return true; +} + namespace { /// \brief Helper class to promote a scalar operation to a vector one. /// This class is used to move downward extractelement transition. @@ -4138,7 +4860,7 @@ class VectorPromoteHelper { /// \brief Generate a constant vector with \p Val with the same /// number of elements as the transition. /// \p UseSplat defines whether or not \p Val should be replicated - /// accross the whole vector. + /// across the whole vector. /// In other words, if UseSplat == true, we generate , /// otherwise we generate a vector with as many undef as possible: /// where \p Val is only @@ -4320,7 +5042,7 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { /// Some targets can do store(extractelement) with one instruction. /// Try to push the extractelement towards the stores when the target /// has this feature and this is profitable. -bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) { +bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) { unsigned CombineCost = UINT_MAX; if (DisableStoreExtract || !TLI || (!StressStoreExtract && @@ -4372,7 +5094,7 @@ bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) { return false; } -bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { +bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. if (InsertedInsts.count(I)) @@ -4413,8 +5135,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { TargetLowering::TypeExpandInteger) { return SinkCast(CI); } else { - bool MadeChange = MoveExtToFormExtLoad(I); - return MadeChange | OptimizeExtUses(I); + bool MadeChange = moveExtToFormExtLoad(I); + return MadeChange | optimizeExtUses(I); } } return false; @@ -4425,17 +5147,21 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { return OptimizeCmpExpression(CI); if (LoadInst *LI = dyn_cast(I)) { + stripInvariantGroupMetadata(*LI); if (TLI) { + bool Modified = optimizeLoadExt(LI); unsigned AS = LI->getPointerAddressSpace(); - return OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS); + Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS); + return Modified; } return false; } if (StoreInst *SI = dyn_cast(I)) { + stripInvariantGroupMetadata(*SI); if (TLI) { unsigned AS = SI->getPointerAddressSpace(); - return OptimizeMemoryInst(I, SI->getOperand(1), + return optimizeMemoryInst(I, SI->getOperand(1), SI->getOperand(0)->getType(), AS); } return false; @@ -4460,23 +5186,26 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { GEPI->replaceAllUsesWith(NC); GEPI->eraseFromParent(); ++NumGEPsElim; - OptimizeInst(NC, ModifiedDT); + optimizeInst(NC, ModifiedDT); return true; } return false; } if (CallInst *CI = dyn_cast(I)) - return OptimizeCallInst(CI, ModifiedDT); + return optimizeCallInst(CI, ModifiedDT); if (SelectInst *SI = dyn_cast(I)) - return OptimizeSelectInst(SI); + return optimizeSelectInst(SI); if (ShuffleVectorInst *SVI = dyn_cast(I)) - return OptimizeShuffleVectorInst(SVI); + return optimizeShuffleVectorInst(SVI); + + if (auto *Switch = dyn_cast(I)) + return optimizeSwitchInst(Switch); if (isa(I)) - return OptimizeExtractElementInst(I); + return optimizeExtractElementInst(I); return false; } @@ -4484,17 +5213,17 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { // In this pass we look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. -bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) { +bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) { SunkAddrs.clear(); bool MadeChange = false; CurInstIterator = BB.begin(); while (CurInstIterator != BB.end()) { - MadeChange |= OptimizeInst(CurInstIterator++, ModifiedDT); + MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT); if (ModifiedDT) return true; } - MadeChange |= DupRetToEnableTailCallOpts(&BB); + MadeChange |= dupRetToEnableTailCallOpts(&BB); return MadeChange; } @@ -4502,12 +5231,12 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) { // llvm.dbg.value is far away from the value then iSel may not be able // handle it properly. iSel will drop llvm.dbg.value if it can not // find a node corresponding to the value. -bool CodeGenPrepare::PlaceDbgValues(Function &F) { +bool CodeGenPrepare::placeDbgValues(Function &F) { bool MadeChange = false; for (BasicBlock &BB : F) { Instruction *PrevNonDbgInst = nullptr; for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { - Instruction *Insn = BI++; + Instruction *Insn = &*BI++; DbgValueInst *DVI = dyn_cast(Insn); // Leave dbg.values that refer to an alloca alone. These // instrinsics describe the address of a variable (= the alloca) @@ -4521,10 +5250,14 @@ bool CodeGenPrepare::PlaceDbgValues(Function &F) { Instruction *VI = dyn_cast_or_null(DVI->getValue()); if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { + // If VI is a phi in a block with an EHPad terminator, we can't insert + // after it. + if (isa(VI) && VI->getParent()->getTerminator()->isEHPad()) + continue; DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); DVI->removeFromParent(); if (isa(VI)) - DVI->insertBefore(VI->getParent()->getFirstInsertionPt()); + DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt()); else DVI->insertAfter(VI); MadeChange = true; @@ -4548,7 +5281,7 @@ bool CodeGenPrepare::sinkAndCmp(Function &F) { return false; bool MadeChange = false; for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { - BasicBlock *BB = I++; + BasicBlock *BB = &*I++; // Does this BB end with the following? // %andVal = and %val, #single-bit-set @@ -4671,6 +5404,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB))) continue; + auto *Br1 = cast(BB.getTerminator()); + if (Br1->getMetadata(LLVMContext::MD_unpredictable)) + continue; + unsigned Opc; Value *Cond1, *Cond2; if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)), @@ -4697,7 +5434,6 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { // Update original basic block by using the first condition directly by the // branch instruction and removing the no longer needed and/or instruction. - auto *Br1 = cast(BB.getTerminator()); Br1->setCondition(Cond1); LogicOp->eraseFromParent(); @@ -4828,3 +5564,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { } return MadeChange; } + +void CodeGenPrepare::stripInvariantGroupMetadata(Instruction &I) { + if (auto *InvariantMD = I.getMetadata(LLVMContext::MD_invariant_group)) + I.dropUnknownNonDebugMetadata(InvariantMD->getMetadataID()); +} diff --git a/lib/CodeGen/CoreCLRGC.cpp b/lib/CodeGen/CoreCLRGC.cpp index 28c97ba71bd9..ff7c0d5dc0ac 100644 --- a/lib/CodeGen/CoreCLRGC.cpp +++ b/lib/CodeGen/CoreCLRGC.cpp @@ -38,9 +38,9 @@ public: UsesMetadata = false; CustomRoots = false; } - Optional isGCManagedPointer(const Value *V) const override { + Optional isGCManagedPointer(const Type *Ty) const override { // Method is only valid on pointer typed values. - PointerType *PT = cast(V->getType()); + const PointerType *PT = cast(Ty); // We pick addrspace(1) as our GC managed heap. return (1 == PT->getAddressSpace()); } diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index dba280fd5aa2..c924ba30c8a1 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -52,14 +52,13 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // Clear "do not change" set. KeepRegs.reset(); - bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn()); + bool IsReturnBlock = BB->isReturnBlock(); // Examine the live-in regs of all successors. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), - E = (*SI)->livein_end(); I != E; ++I) { - for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + for (const auto &LI : (*SI)->liveins()) { + for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) { unsigned Reg = *AI; Classes[Reg] = reinterpret_cast(-1); KillIndices[Reg] = BBSize; diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index 0a188c0935ad..af6b6a392b75 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -31,10 +31,39 @@ #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; -DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2], +// -------------------------------------------------------------------- +// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp + +namespace { + DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) { + return (Inp << DFA_MAX_RESOURCES) | FuncUnits; + } + + /// Return the DFAInput for an instruction class input vector. + /// This function is used in both DFAPacketizer.cpp and in + /// DFAPacketizerEmitter.cpp. + DFAInput getDFAInsnInput(const std::vector &InsnClass) { + DFAInput InsnInput = 0; + assert ((InsnClass.size() <= DFA_MAX_RESTERMS) && + "Exceeded maximum number of DFA terms"); + for (auto U : InsnClass) + InsnInput = addDFAFuncUnits(InsnInput, U); + return InsnInput; + } +} +// -------------------------------------------------------------------- + +DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, + const DFAStateInput (*SIT)[2], const unsigned *SET): InstrItins(I), CurrentState(0), DFAStateInputTable(SIT), - DFAStateEntryTable(SET) {} + DFAStateEntryTable(SET) { + // Make sure DFA types are large enough for the number of terms & resources. + assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAInput)) + && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput"); + assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)) + && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); +} // @@ -60,26 +89,42 @@ void DFAPacketizer::ReadTable(unsigned int state) { DFAStateInputTable[i][1]; } +// +// getInsnInput - Return the DFAInput for an instruction class. +// +DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { + // Note: this logic must match that in DFAPacketizerDefs.h for input vectors. + DFAInput InsnInput = 0; + unsigned i = 0; + for (const InstrStage *IS = InstrItins->beginStage(InsnClass), + *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS, ++i) { + InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits()); + assert ((i < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs"); + } + return InsnInput; +} + +// getInsnInput - Return the DFAInput for an instruction class input vector. +DFAInput DFAPacketizer::getInsnInput(const std::vector &InsnClass) { + return getDFAInsnInput(InsnClass); +} // canReserveResources - Check if the resources occupied by a MCInstrDesc // are available in the current state. bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); - const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass); - unsigned FuncUnits = IS->getUnits(); - UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits); + DFAInput InsnInput = getInsnInput(InsnClass); + UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); ReadTable(CurrentState); return (CachedTable.count(StateTrans) != 0); } - // reserveResources - Reserve the resources occupied by a MCInstrDesc and // change the current state to reflect that change. void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); - const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass); - unsigned FuncUnits = IS->getUnits(); - UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits); + DFAInput InsnInput = getInsnInput(InsnClass); + UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); ReadTable(CurrentState); assert(CachedTable.count(StateTrans) != 0); CurrentState = CachedTable[StateTrans]; @@ -104,32 +149,35 @@ namespace llvm { // DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides // Schedule method to build the dependence graph. class DefaultVLIWScheduler : public ScheduleDAGInstrs { +private: + AliasAnalysis *AA; public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, - bool IsPostRA); + AliasAnalysis *AA); // Schedule - Actual scheduling work. void schedule() override; }; } DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, - MachineLoopInfo &MLI, bool IsPostRA) - : ScheduleDAGInstrs(MF, &MLI, IsPostRA) { + MachineLoopInfo &MLI, + AliasAnalysis *AA) + : ScheduleDAGInstrs(MF, &MLI), AA(AA) { CanHandleTerminators = true; } void DefaultVLIWScheduler::schedule() { // Build the scheduling graph. - buildSchedGraph(nullptr); + buildSchedGraph(AA); } // VLIWPacketizerList Ctor VLIWPacketizerList::VLIWPacketizerList(MachineFunction &MF, - MachineLoopInfo &MLI, bool IsPostRA) - : MF(MF) { + MachineLoopInfo &MLI, AliasAnalysis *AA) + : MF(MF), AA(AA) { TII = MF.getSubtarget().getInstrInfo(); ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget()); - VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, IsPostRA); + VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, AA); } // VLIWPacketizerList Dtor @@ -147,7 +195,7 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, MachineInstr *MI) { if (CurrentPacketMIs.size() > 1) { MachineInstr *MIFirst = CurrentPacketMIs.front(); - finalizeBundle(*MBB, MIFirst, MI); + finalizeBundle(*MBB, MIFirst->getIterator(), MI->getIterator()); } CurrentPacketMIs.clear(); ResourceTracker->clearResources(); @@ -191,7 +239,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, // Ask DFA if machine resource is available for MI. bool ResourceAvail = ResourceTracker->canReserveResources(MI); - if (ResourceAvail) { + if (ResourceAvail && shouldAddToPacket(MI)) { // Dependency check for MI with instructions in CurrentPacketMIs. for (std::vector::iterator VI = CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end(); VI != VE; ++VI) { @@ -210,7 +258,8 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, } // !isLegalToPacketizeTogether. } // For all instructions in CurrentPacketMIs. } else { - // End the packet if resource is not available. + // End the packet if resource is not available, or if the instruction + // shoud not be added to the current packet. endPacket(MBB, MI); } diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 941129b5cc95..b11b49717c45 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -101,26 +101,22 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will // be cleaned up. - for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend(); - I != E; ++I) { - MachineBasicBlock *MBB = &*I; - + for (MachineBasicBlock &MBB : make_range(MF.rbegin(), MF.rend())) { // Start out assuming that reserved registers are live out of this block. LivePhysRegs = MRI->getReservedRegs(); // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not // live across blocks, but some targets (x86) can have flags live out of a // block. - for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(), - E = MBB->succ_end(); S != E; S++) - for (MachineBasicBlock::livein_iterator LI = (*S)->livein_begin(); - LI != (*S)->livein_end(); LI++) - LivePhysRegs.set(*LI); + for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(), + E = MBB.succ_end(); S != E; S++) + for (const auto &LI : (*S)->liveins()) + LivePhysRegs.set(LI.PhysReg); // Now scan the instructions and delete dead ones, tracking physreg // liveness as we go. - for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(), - MIE = MBB->rend(); MII != MIE; ) { + for (MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), + MIE = MBB.rend(); MII != MIE; ) { MachineInstr *MI = &*MII; // If the instruction is dead, delete it! @@ -132,7 +128,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { MI->eraseFromParentAndMarkDBGValuesForRemoval(); AnyChanges = true; ++NumDeletes; - MIE = MBB->rend(); + MIE = MBB.rend(); // MII is now pointing to the next instruction to process, // so don't increment it. continue; diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index e019dfbc8f7d..eae78a950d9a 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -16,7 +16,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -192,9 +192,9 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { if (Resumes.empty()) return false; - // Check the personality, don't do anything if it's for MSVC. + // Check the personality, don't do anything if it's funclet-based. EHPersonality Pers = classifyEHPersonality(Fn.getPersonalityFn()); - if (isMSVCEHPersonality(Pers)) + if (isFuncletEHPersonality(Pers)) return false; LLVMContext &Ctx = Fn.getContext(); diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index fbc4d97c4987..f3536d74111e 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -538,11 +538,11 @@ void SSAIfConv::convertIf(SmallVectorImpl &RemovedBlocks) { // Fix up the CFG, temporarily leave Head without any successors. Head->removeSuccessor(TBB); - Head->removeSuccessor(FBB); + Head->removeSuccessor(FBB, true); if (TBB != Tail) - TBB->removeSuccessor(Tail); + TBB->removeSuccessor(Tail, true); if (FBB != Tail) - FBB->removeSuccessor(Tail); + FBB->removeSuccessor(Tail, true); // Fix up Head's terminators. // It should become a single branch or a fallthrough. diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 5b09cf1a0fd7..c550008da025 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -375,9 +375,8 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // This is the entry block. if (MBB->pred_empty()) { - for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), - e = MBB->livein_end(); i != e; ++i) { - for (int rx : regIndices(*i)) { + for (const auto &LI : MBB->liveins()) { + for (int rx : regIndices(LI.PhysReg)) { // Treat function live-ins as if they were defined just before the first // instruction. Usually, function arguments are set up immediately // before the call. @@ -559,12 +558,11 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) { MachineInstr *UndefMI = UndefReads.back().first; unsigned OpIdx = UndefReads.back().second; - for (MachineBasicBlock::reverse_iterator I = MBB->rbegin(), E = MBB->rend(); - I != E; ++I) { + for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) { // Update liveness, including the current instruction's defs. - LiveRegSet.stepBackward(*I); + LiveRegSet.stepBackward(I); - if (UndefMI == &*I) { + if (UndefMI == &I) { if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg())) TII->breakPartialRegDependency(UndefMI, OpIdx, TRI); @@ -733,12 +731,13 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { // If no relevant registers are used in the function, we can skip it // completely. bool anyregs = false; - for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); - I != E; ++I) - if (MF->getRegInfo().isPhysRegUsed(*I)) { + const MachineRegisterInfo &MRI = mf.getRegInfo(); + for (unsigned Reg : *RC) { + if (MRI.isPhysRegUsed(Reg)) { anyregs = true; break; } + } if (!anyregs) return false; // Initialize the AliasMap on the first use. @@ -752,7 +751,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { AliasMap[*AI].push_back(i); } - MachineBasicBlock *Entry = MF->begin(); + MachineBasicBlock *Entry = &*MF->begin(); ReversePostOrderTraversal RPOT(Entry); SmallVector Loops; for (ReversePostOrderTraversal::rpo_iterator @@ -761,22 +760,19 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { enterBasicBlock(MBB); if (SeenUnknownBackEdge) Loops.push_back(MBB); - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) - visitInstr(I); + for (MachineInstr &MI : *MBB) + visitInstr(&MI); processUndefReads(MBB); leaveBasicBlock(MBB); } // Visit all the loop blocks again in order to merge DomainValues from // back-edges. - for (unsigned i = 0, e = Loops.size(); i != e; ++i) { - MachineBasicBlock *MBB = Loops[i]; + for (MachineBasicBlock *MBB : Loops) { enterBasicBlock(MBB); - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) - if (!I->isDebugValue()) - processDefs(I, false); + for (MachineInstr &MI : *MBB) + if (!MI.isDebugValue()) + processDefs(&MI, false); processUndefReads(MBB); leaveBasicBlock(MBB); } diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index 55e809e24278..90ddac94f93b 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -50,7 +50,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { // Iterate through each instruction in the function, looking for pseudos. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock *MBB = I; + MachineBasicBlock *MBB = &*I; for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); MBBI != MBBE; ) { MachineInstr *MI = MBBI++; @@ -63,7 +63,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { // The expansion may involve new basic blocks. if (NewMBB != MBB) { MBB = NewMBB; - I = NewMBB; + I = NewMBB->getIterator(); MBBI = NewMBB->begin(); MBBE = NewMBB->end(); } diff --git a/lib/CodeGen/FuncletLayout.cpp b/lib/CodeGen/FuncletLayout.cpp new file mode 100644 index 000000000000..8b2f505ff028 --- /dev/null +++ b/lib/CodeGen/FuncletLayout.cpp @@ -0,0 +1,55 @@ +//===-- FuncletLayout.cpp - Contiguously lay out funclets -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements basic block placement transformations which result in +// funclets being contiguous. +// +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +#define DEBUG_TYPE "funclet-layout" + +namespace { +class FuncletLayout : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + FuncletLayout() : MachineFunctionPass(ID) { + initializeFuncletLayoutPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F) override; +}; +} + +char FuncletLayout::ID = 0; +char &llvm::FuncletLayoutID = FuncletLayout::ID; +INITIALIZE_PASS(FuncletLayout, "funclet-layout", + "Contiguously Lay Out Funclets", false, false) + +bool FuncletLayout::runOnMachineFunction(MachineFunction &F) { + DenseMap FuncletMembership = + getFuncletMembership(F); + if (FuncletMembership.empty()) + return false; + + F.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) { + auto FuncletX = FuncletMembership.find(&X); + auto FuncletY = FuncletMembership.find(&Y); + assert(FuncletX != FuncletMembership.end()); + assert(FuncletY != FuncletMembership.end()); + return FuncletX->second < FuncletY->second; + }); + + // Conservatively assume we changed something. + return true; +} diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp index d8edd7e4063f..484d31737b2e 100644 --- a/lib/CodeGen/GCRootLowering.cpp +++ b/lib/CodeGen/GCRootLowering.cpp @@ -158,7 +158,7 @@ static bool InsertRootInitializers(Function &F, AllocaInst **Roots, // Search for initializers in the initial BB. SmallPtrSet InitedRoots; - for (; !CouldBecomeSafePoint(IP); ++IP) + for (; !CouldBecomeSafePoint(&*IP); ++IP) if (StoreInst *SI = dyn_cast(IP)) if (AllocaInst *AI = dyn_cast(SI->getOperand(1)->stripPointerCasts())) @@ -320,7 +320,9 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) { RI = FI->removeStackRoot(RI); } else { - RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num); + unsigned FrameReg; // FIXME: surely GCRoot ought to store the + // register that the offset is from? + RI->StackOffset = TFI->getFrameIndexReference(MF, RI->Num, FrameReg); ++RI; } } diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 6f9e8394081e..dd9a84086181 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -108,10 +108,9 @@ EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, // FIXME: this could be a transitional option, and we probably need to remove // it if only we are sure this optimization could always benefit all targets. -static cl::opt +static cl::opt EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, - cl::desc("Enable global merge pass on external linkage"), - cl::init(false)); + cl::desc("Enable global merge pass on external linkage")); STATISTIC(NumMerged, "Number of globals merged"); namespace { @@ -129,11 +128,14 @@ namespace { /// FIXME: This could learn about optsize, and be used in the cost model. bool OnlyOptimizeForSize; + /// Whether we should merge global variables that have external linkage. + bool MergeExternalGlobals; + bool doMerge(SmallVectorImpl &Globals, Module &M, bool isConst, unsigned AddrSpace) const; /// \brief Merge everything in \p Globals for which the corresponding bit /// in \p GlobalSet is set. - bool doMerge(SmallVectorImpl &Globals, + bool doMerge(const SmallVectorImpl &Globals, const BitVector &GlobalSet, Module &M, bool isConst, unsigned AddrSpace) const; @@ -158,9 +160,11 @@ namespace { static char ID; // Pass identification, replacement for typeid. explicit GlobalMerge(const TargetMachine *TM = nullptr, unsigned MaximalOffset = 0, - bool OnlyOptimizeForSize = false) + bool OnlyOptimizeForSize = false, + bool MergeExternalGlobals = false) : FunctionPass(ID), TM(TM), MaxOffset(MaximalOffset), - OnlyOptimizeForSize(OnlyOptimizeForSize) { + OnlyOptimizeForSize(OnlyOptimizeForSize), + MergeExternalGlobals(MergeExternalGlobals) { initializeGlobalMergePass(*PassRegistry::getPassRegistry()); } @@ -189,14 +193,11 @@ bool GlobalMerge::doMerge(SmallVectorImpl &Globals, Module &M, bool isConst, unsigned AddrSpace) const { auto &DL = M.getDataLayout(); // FIXME: Find better heuristics - std::stable_sort( - Globals.begin(), Globals.end(), - [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { - Type *Ty1 = cast(GV1->getType())->getElementType(); - Type *Ty2 = cast(GV2->getType())->getElementType(); - - return (DL.getTypeAllocSize(Ty1) < DL.getTypeAllocSize(Ty2)); - }); + std::stable_sort(Globals.begin(), Globals.end(), + [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { + return DL.getTypeAllocSize(GV1->getValueType()) < + DL.getTypeAllocSize(GV2->getValueType()); + }); // If we want to just blindly group all globals together, do so. if (!GlobalMergeGroupByUse) { @@ -207,7 +208,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl &Globals, // If we want to be smarter, look at all uses of each global, to try to // discover all sets of globals used together, and how many times each of - // these sets occured. + // these sets occurred. // // Keep this reasonably efficient, by having an append-only list of all sets // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of @@ -302,8 +303,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl &Globals, Function *ParentFn = I->getParent()->getParent(); // If we're only optimizing for size, ignore non-minsize functions. - if (OnlyOptimizeForSize && - !ParentFn->hasFnAttribute(Attribute::MinSize)) + if (OnlyOptimizeForSize && !ParentFn->optForMinSize()) continue; size_t UGSIdx = GlobalUsesByFunction[ParentFn]; @@ -406,15 +406,14 @@ bool GlobalMerge::doMerge(SmallVectorImpl &Globals, return Changed; } -bool GlobalMerge::doMerge(SmallVectorImpl &Globals, +bool GlobalMerge::doMerge(const SmallVectorImpl &Globals, const BitVector &GlobalSet, Module &M, bool isConst, unsigned AddrSpace) const { + assert(Globals.size() > 1); Type *Int32Ty = Type::getInt32Ty(M.getContext()); auto &DL = M.getDataLayout(); - assert(Globals.size() > 1); - DEBUG(dbgs() << " Trying to merge set, starts with #" << GlobalSet.find_first() << "\n"); @@ -425,58 +424,44 @@ bool GlobalMerge::doMerge(SmallVectorImpl &Globals, std::vector Tys; std::vector Inits; - bool HasExternal = false; - GlobalVariable *TheFirstExternal = 0; for (j = i; j != -1; j = GlobalSet.find_next(j)) { - Type *Ty = Globals[j]->getType()->getElementType(); + Type *Ty = Globals[j]->getValueType(); MergedSize += DL.getTypeAllocSize(Ty); if (MergedSize > MaxOffset) { break; } Tys.push_back(Ty); Inits.push_back(Globals[j]->getInitializer()); - - if (Globals[j]->hasExternalLinkage() && !HasExternal) { - HasExternal = true; - TheFirstExternal = Globals[j]; - } } - // If merged variables doesn't have external linkage, we needn't to expose - // the symbol after merging. - GlobalValue::LinkageTypes Linkage = HasExternal - ? GlobalValue::ExternalLinkage - : GlobalValue::InternalLinkage; - StructType *MergedTy = StructType::get(M.getContext(), Tys); Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); - // If merged variables have external linkage, we use symbol name of the - // first variable merged as the suffix of global symbol name. This would - // be able to avoid the link-time naming conflict for globalm symbols. GlobalVariable *MergedGV = new GlobalVariable( - M, MergedTy, isConst, Linkage, MergedInit, - HasExternal ? "_MergedGlobals_" + TheFirstExternal->getName() - : "_MergedGlobals", - nullptr, GlobalVariable::NotThreadLocal, AddrSpace); + M, MergedTy, isConst, GlobalValue::PrivateLinkage, MergedInit, + "_MergedGlobals", nullptr, GlobalVariable::NotThreadLocal, AddrSpace); - for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k)) { + for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) { GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage(); std::string Name = Globals[k]->getName(); Constant *Idx[2] = { ConstantInt::get(Int32Ty, 0), - ConstantInt::get(Int32Ty, idx++) + ConstantInt::get(Int32Ty, idx), }; Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx); Globals[k]->replaceAllUsesWith(GEP); Globals[k]->eraseFromParent(); - if (Linkage != GlobalValue::InternalLinkage) { - // Generate a new alias... - auto *PTy = cast(GEP->getType()); - GlobalAlias::create(PTy, Linkage, Name, GEP, &M); + // When the linkage is not internal we must emit an alias for the original + // variable name as it may be accessed from another object. On non-Mach-O + // we can also emit an alias for internal linkage as it's safe to do so. + // It's not safe on Mach-O as the alias (and thus the portion of the + // MergedGlobals variable) may be dead stripped at link time. + if (Linkage != GlobalValue::InternalLinkage || + !TM->getTargetTriple().isOSBinFormatMachO()) { + GlobalAlias::create(Tys[idx], AddrSpace, Linkage, Name, GEP, &M); } NumMerged++; @@ -535,61 +520,57 @@ bool GlobalMerge::doInitialization(Module &M) { setMustKeepGlobalVariables(M); // Grab all non-const globals. - for (Module::global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) { + for (auto &GV : M.globals()) { // Merge is safe for "normal" internal or external globals only - if (I->isDeclaration() || I->isThreadLocal() || I->hasSection()) + if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasSection()) continue; - if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage()) && - !I->hasInternalLinkage()) + if (!(MergeExternalGlobals && GV.hasExternalLinkage()) && + !GV.hasInternalLinkage()) continue; - PointerType *PT = dyn_cast(I->getType()); + PointerType *PT = dyn_cast(GV.getType()); assert(PT && "Global variable is not a pointer!"); unsigned AddressSpace = PT->getAddressSpace(); // Ignore fancy-aligned globals for now. - unsigned Alignment = DL.getPreferredAlignment(I); - Type *Ty = I->getType()->getElementType(); + unsigned Alignment = DL.getPreferredAlignment(&GV); + Type *Ty = GV.getValueType(); if (Alignment > DL.getABITypeAlignment(Ty)) continue; // Ignore all 'special' globals. - if (I->getName().startswith("llvm.") || - I->getName().startswith(".llvm.")) + if (GV.getName().startswith("llvm.") || + GV.getName().startswith(".llvm.")) continue; // Ignore all "required" globals: - if (isMustKeepGlobalVariable(I)) + if (isMustKeepGlobalVariable(&GV)) continue; if (DL.getTypeAllocSize(Ty) < MaxOffset) { - if (TargetLoweringObjectFile::getKindForGlobal(I, *TM).isBSSLocal()) - BSSGlobals[AddressSpace].push_back(I); - else if (I->isConstant()) - ConstGlobals[AddressSpace].push_back(I); + if (TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal()) + BSSGlobals[AddressSpace].push_back(&GV); + else if (GV.isConstant()) + ConstGlobals[AddressSpace].push_back(&GV); else - Globals[AddressSpace].push_back(I); + Globals[AddressSpace].push_back(&GV); } } - for (DenseMap >::iterator - I = Globals.begin(), E = Globals.end(); I != E; ++I) - if (I->second.size() > 1) - Changed |= doMerge(I->second, M, false, I->first); + for (auto &P : Globals) + if (P.second.size() > 1) + Changed |= doMerge(P.second, M, false, P.first); - for (DenseMap >::iterator - I = BSSGlobals.begin(), E = BSSGlobals.end(); I != E; ++I) - if (I->second.size() > 1) - Changed |= doMerge(I->second, M, false, I->first); + for (auto &P : BSSGlobals) + if (P.second.size() > 1) + Changed |= doMerge(P.second, M, false, P.first); if (EnableGlobalMergeOnConst) - for (DenseMap >::iterator - I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I) - if (I->second.size() > 1) - Changed |= doMerge(I->second, M, true, I->first); + for (auto &P : ConstGlobals) + if (P.second.size() > 1) + Changed |= doMerge(P.second, M, true, P.first); return Changed; } @@ -604,6 +585,9 @@ bool GlobalMerge::doFinalization(Module &M) { } Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset, - bool OnlyOptimizeForSize) { - return new GlobalMerge(TM, Offset, OnlyOptimizeForSize); + bool OnlyOptimizeForSize, + bool MergeExternalByDefault) { + bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ? + MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE); + return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal); } diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index ee0532bfc630..c38c9d22266e 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -32,6 +32,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include using namespace llvm; @@ -190,10 +191,10 @@ namespace { private: bool ReverseBranchCondition(BBInfo &BBI); bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups, - const BranchProbability &Prediction) const; + BranchProbability Prediction) const; bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups, - const BranchProbability &Prediction) const; + BranchProbability Prediction) const; bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, unsigned &Dups1, unsigned &Dups2) const; void ScanInstructions(BBInfo &BBI); @@ -218,7 +219,7 @@ namespace { bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Cycle, unsigned Extra, - const BranchProbability &Prediction) const { + BranchProbability Prediction) const { return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra, Prediction); } @@ -227,7 +228,7 @@ namespace { unsigned TCycle, unsigned TExtra, MachineBasicBlock &FBB, unsigned FCycle, unsigned FExtra, - const BranchProbability &Prediction) const { + BranchProbability Prediction) const { return TCycle > 0 && FCycle > 0 && TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, Prediction); @@ -462,11 +463,11 @@ bool IfConverter::ReverseBranchCondition(BBInfo &BBI) { /// getNextBlock - Returns the next block in the function blocks ordering. If /// it is the end, returns NULL. static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { - MachineFunction::iterator I = BB; + MachineFunction::iterator I = BB->getIterator(); MachineFunction::iterator E = BB->getParent()->end(); if (++I == E) return nullptr; - return I; + return &*I; } /// ValidSimple - Returns true if the 'true' block (along with its @@ -474,7 +475,7 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { /// number of instructions that the ifcvt would need to duplicate if performed /// in Dups. bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, - const BranchProbability &Prediction) const { + BranchProbability Prediction) const { Dups = 0; if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -501,7 +502,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, /// if performed in 'Dups'. bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups, - const BranchProbability &Prediction) const { + BranchProbability Prediction) const { Dups = 0; if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -530,10 +531,10 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB; if (!TExit && blockAlwaysFallThrough(TrueBBI)) { - MachineFunction::iterator I = TrueBBI.BB; + MachineFunction::iterator I = TrueBBI.BB->getIterator(); if (++I == TrueBBI.BB->getParent()->end()) return false; - TExit = I; + TExit = &*I; } return TExit && TExit == FalseBBI.BB; } @@ -948,10 +949,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB, /// candidates. void IfConverter::AnalyzeBlocks(MachineFunction &MF, std::vector &Tokens) { - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock *BB = I; - AnalyzeBlock(BB, Tokens); - } + for (auto &BB : MF) + AnalyzeBlock(&BB, Tokens); // Sort to favor more complex ifcvt scheme. std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp); @@ -961,14 +960,14 @@ void IfConverter::AnalyzeBlocks(MachineFunction &MF, /// that all the intervening blocks are empty (given BB can fall through to its /// next block). static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { - MachineFunction::iterator PI = BB; + MachineFunction::iterator PI = BB->getIterator(); MachineFunction::iterator I = std::next(PI); - MachineFunction::iterator TI = ToBB; + MachineFunction::iterator TI = ToBB->getIterator(); MachineFunction::iterator E = BB->getParent()->end(); while (I != TI) { // Check isSuccessor to avoid case where the next block is empty, but // it's not a successor. - if (I == E || !I->empty() || !PI->isSuccessor(I)) + if (I == E || !I->empty() || !PI->isSuccessor(&*I)) return false; PI = I++; } @@ -1114,7 +1113,7 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // RemoveExtraEdges won't work if the block has an unanalyzable branch, so // explicitly remove CvtBBI as a successor. - BBI.BB->removeSuccessor(CvtBBI->BB); + BBI.BB->removeSuccessor(CvtBBI->BB, true); } else { RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI); PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); @@ -1153,28 +1152,6 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { return true; } -/// Scale down weights to fit into uint32_t. NewTrue is the new weight -/// for successor TrueBB, and NewFalse is the new weight for successor -/// FalseBB. -static void ScaleWeights(uint64_t NewTrue, uint64_t NewFalse, - MachineBasicBlock *MBB, - const MachineBasicBlock *TrueBB, - const MachineBasicBlock *FalseBB, - const MachineBranchProbabilityInfo *MBPI) { - uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; - uint32_t Scale = (NewMax / UINT32_MAX) + 1; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); - SI != SE; ++SI) { - if (*SI == TrueBB) - MBB->setSuccWeight(SI, (uint32_t)(NewTrue / Scale)); - else if (*SI == FalseBB) - MBB->setSuccWeight(SI, (uint32_t)(NewFalse / Scale)); - else - MBB->setSuccWeight(SI, MBPI->getEdgeWeight(MBB, SI) / Scale); - } -} - /// IfConvertTriangle - If convert a triangle sub-CFG. /// bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { @@ -1231,16 +1208,14 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { DontKill.clear(); bool HasEarlyExit = CvtBBI->FalseBB != nullptr; - uint64_t CvtNext = 0, CvtFalse = 0, BBNext = 0, BBCvt = 0, SumWeight = 0; - uint32_t WeightScale = 0; + BranchProbability CvtNext, CvtFalse, BBNext, BBCvt; if (HasEarlyExit) { - // Get weights before modifying CvtBBI->BB and BBI.BB. - CvtNext = MBPI->getEdgeWeight(CvtBBI->BB, NextBBI->BB); - CvtFalse = MBPI->getEdgeWeight(CvtBBI->BB, CvtBBI->FalseBB); - BBNext = MBPI->getEdgeWeight(BBI.BB, NextBBI->BB); - BBCvt = MBPI->getEdgeWeight(BBI.BB, CvtBBI->BB); - SumWeight = MBPI->getSumForBlock(CvtBBI->BB, WeightScale); + // Get probabilities before modifying CvtBBI->BB and BBI.BB. + CvtNext = MBPI->getEdgeProbability(CvtBBI->BB, NextBBI->BB); + CvtFalse = MBPI->getEdgeProbability(CvtBBI->BB, CvtBBI->FalseBB); + BBNext = MBPI->getEdgeProbability(BBI.BB, NextBBI->BB); + BBCvt = MBPI->getEdgeProbability(BBI.BB, CvtBBI->BB); } if (CvtBBI->BB->pred_size() > 1) { @@ -1251,7 +1226,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // RemoveExtraEdges won't work if the block has an unanalyzable branch, so // explicitly remove CvtBBI as a successor. - BBI.BB->removeSuccessor(CvtBBI->BB); + BBI.BB->removeSuccessor(CvtBBI->BB, true); } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); @@ -1268,22 +1243,23 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { CvtBBI->BrCond.end()); if (TII->ReverseBranchCondition(RevCond)) llvm_unreachable("Unable to reverse branch condition!"); - TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl); - BBI.BB->addSuccessor(CvtBBI->FalseBB); - // Update the edge weight for both CvtBBI->FalseBB and NextBBI. - // New_Weight(BBI.BB, NextBBI->BB) = - // Weight(BBI.BB, NextBBI->BB) * getSumForBlock(CvtBBI->BB) + - // Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, NextBBI->BB) - // New_Weight(BBI.BB, CvtBBI->FalseBB) = - // Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, CvtBBI->FalseBB) - uint64_t NewNext = BBNext * SumWeight + (BBCvt * CvtNext) / WeightScale; - uint64_t NewFalse = (BBCvt * CvtFalse) / WeightScale; - // We need to scale down all weights of BBI.BB to fit uint32_t. - // Here BBI.BB is connected to CvtBBI->FalseBB and will fall through to - // the next block. - ScaleWeights(NewNext, NewFalse, BBI.BB, getNextBlock(BBI.BB), - CvtBBI->FalseBB, MBPI); + // Update the edge probability for both CvtBBI->FalseBB and NextBBI. + // NewNext = New_Prob(BBI.BB, NextBBI->BB) = + // Prob(BBI.BB, NextBBI->BB) + + // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, NextBBI->BB) + // NewFalse = New_Prob(BBI.BB, CvtBBI->FalseBB) = + // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, CvtBBI->FalseBB) + auto NewTrueBB = getNextBlock(BBI.BB); + auto NewNext = BBNext + BBCvt * CvtNext; + auto NewTrueBBIter = + std::find(BBI.BB->succ_begin(), BBI.BB->succ_end(), NewTrueBB); + if (NewTrueBBIter != BBI.BB->succ_end()) + BBI.BB->setSuccProbability(NewTrueBBIter, NewNext); + + auto NewFalse = BBCvt * CvtFalse; + TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl); + BBI.BB->addSuccessor(CvtBBI->FalseBB, NewFalse); } // Merge in the 'false' block if the 'false' block has no other @@ -1526,7 +1502,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, MergeBlocks(BBI, TailBBI); TailBBI.IsDone = true; } else { - BBI.BB->addSuccessor(TailBB); + BBI.BB->addSuccessor(TailBB, BranchProbability::getOne()); InsertUncondBranch(BBI.BB, TailBB, TII); BBI.HasFallThrough = false; } @@ -1536,7 +1512,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // which can happen here if TailBB is unanalyzable and is merged, so // explicitly remove BBI1 and BBI2 as successors. BBI.BB->removeSuccessor(BBI1->BB); - BBI.BB->removeSuccessor(BBI2->BB); + BBI.BB->removeSuccessor(BBI2->BB, true); RemoveExtraEdges(BBI); // Update block info. @@ -1686,25 +1662,94 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { ToBBI.BB->splice(ToBBI.BB->end(), FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end()); - std::vector Succs(FromBBI.BB->succ_begin(), - FromBBI.BB->succ_end()); + // Force normalizing the successors' probabilities of ToBBI.BB to convert all + // unknown probabilities into known ones. + // FIXME: This usage is too tricky and in the future we would like to + // eliminate all unknown probabilities in MBB. + ToBBI.BB->normalizeSuccProbs(); + + SmallVector FromSuccs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr; + // The edge probability from ToBBI.BB to FromBBI.BB, which is only needed when + // AddEdges is true and FromBBI.BB is a successor of ToBBI.BB. + auto To2FromProb = BranchProbability::getZero(); + if (AddEdges && ToBBI.BB->isSuccessor(FromBBI.BB)) { + To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, FromBBI.BB); + // Set the edge probability from ToBBI.BB to FromBBI.BB to zero to avoid the + // edge probability being merged to other edges when this edge is removed + // later. + ToBBI.BB->setSuccProbability( + std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), FromBBI.BB), + BranchProbability::getZero()); + } - for (unsigned i = 0, e = Succs.size(); i != e; ++i) { - MachineBasicBlock *Succ = Succs[i]; + for (unsigned i = 0, e = FromSuccs.size(); i != e; ++i) { + MachineBasicBlock *Succ = FromSuccs[i]; // Fallthrough edge can't be transferred. if (Succ == FallThrough) continue; + + auto NewProb = BranchProbability::getZero(); + if (AddEdges) { + // Calculate the edge probability for the edge from ToBBI.BB to Succ, + // which is a portion of the edge probability from FromBBI.BB to Succ. The + // portion ratio is the edge probability from ToBBI.BB to FromBBI.BB (if + // FromBBI is a successor of ToBBI.BB. See comment below for excepion). + NewProb = MBPI->getEdgeProbability(FromBBI.BB, Succ); + + // To2FromProb is 0 when FromBBI.BB is not a successor of ToBBI.BB. This + // only happens when if-converting a diamond CFG and FromBBI.BB is the + // tail BB. In this case FromBBI.BB post-dominates ToBBI.BB and hence we + // could just use the probabilities on FromBBI.BB's out-edges when adding + // new successors. + if (!To2FromProb.isZero()) + NewProb *= To2FromProb; + } + FromBBI.BB->removeSuccessor(Succ); - if (AddEdges && !ToBBI.BB->isSuccessor(Succ)) - ToBBI.BB->addSuccessor(Succ); + + if (AddEdges) { + // If the edge from ToBBI.BB to Succ already exists, update the + // probability of this edge by adding NewProb to it. An example is shown + // below, in which A is ToBBI.BB and B is FromBBI.BB. In this case we + // don't have to set C as A's successor as it already is. We only need to + // update the edge probability on A->C. Note that B will not be + // immediately removed from A's successors. It is possible that B->D is + // not removed either if D is a fallthrough of B. Later the edge A->D + // (generated here) and B->D will be combined into one edge. To maintain + // correct edge probability of this combined edge, we need to set the edge + // probability of A->B to zero, which is already done above. The edge + // probability on A->D is calculated by scaling the original probability + // on A->B by the probability of B->D. + // + // Before ifcvt: After ifcvt (assume B->D is kept): + // + // A A + // /| /|\ + // / B / B| + // | /| | || + // |/ | | |/ + // C D C D + // + if (ToBBI.BB->isSuccessor(Succ)) + ToBBI.BB->setSuccProbability( + std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), Succ), + MBPI->getEdgeProbability(ToBBI.BB, Succ) + NewProb); + else + ToBBI.BB->addSuccessor(Succ, NewProb); + } } // Now FromBBI always falls through to the next block! if (NBB && !FromBBI.BB->isSuccessor(NBB)) FromBBI.BB->addSuccessor(NBB); + // Normalize the probabilities of ToBBI.BB's successors with all adjustment + // we've done above. + ToBBI.BB->normalizeSuccProbs(); + ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end()); FromBBI.Predicate.clear(); diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index 93e04876a8f3..39c1b9fb9a66 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -38,6 +38,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -107,6 +108,98 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; }; + +/// \brief Detect re-ordering hazards and dependencies. +/// +/// This class keeps track of defs and uses, and can be queried if a given +/// machine instruction can be re-ordered from after the machine instructions +/// seen so far to before them. +class HazardDetector { + DenseSet RegDefs; + DenseSet RegUses; + const TargetRegisterInfo &TRI; + bool hasSeenClobber; + +public: + explicit HazardDetector(const TargetRegisterInfo &TRI) : + TRI(TRI), hasSeenClobber(false) {} + + /// \brief Make a note of \p MI for later queries to isSafeToHoist. + /// + /// May clobber this HazardDetector instance. \see isClobbered. + void rememberInstruction(MachineInstr *MI); + + /// \brief Return true if it is safe to hoist \p MI from after all the + /// instructions seen so far (via rememberInstruction) to before it. + bool isSafeToHoist(MachineInstr *MI); + + /// \brief Return true if this instance of HazardDetector has been clobbered + /// (i.e. has no more useful information). + /// + /// A HazardDetecter is clobbered when it sees a construct it cannot + /// understand, and it would have to return a conservative answer for all + /// future queries. Having a separate clobbered state lets the client code + /// bail early, without making queries about all of the future instructions + /// (which would have returned the most conservative answer anyway). + /// + /// Calling rememberInstruction or isSafeToHoist on a clobbered HazardDetector + /// is an error. + bool isClobbered() { return hasSeenClobber; } +}; +} + + +void HazardDetector::rememberInstruction(MachineInstr *MI) { + assert(!isClobbered() && + "Don't add instructions to a clobbered hazard detector"); + + if (MI->mayStore() || MI->hasUnmodeledSideEffects()) { + hasSeenClobber = true; + return; + } + + for (auto *MMO : MI->memoperands()) { + // Right now we don't want to worry about LLVM's memory model. + if (!MMO->isUnordered()) { + hasSeenClobber = true; + return; + } + } + + for (auto &MO : MI->operands()) { + if (!MO.isReg() || !MO.getReg()) + continue; + + if (MO.isDef()) + RegDefs.insert(MO.getReg()); + else + RegUses.insert(MO.getReg()); + } +} + +bool HazardDetector::isSafeToHoist(MachineInstr *MI) { + assert(!isClobbered() && "isSafeToHoist cannot do anything useful!"); + + // Right now we don't want to worry about LLVM's memory model. This can be + // made more precise later. + for (auto *MMO : MI->memoperands()) + if (!MMO->isUnordered()) + return false; + + for (auto &MO : MI->operands()) { + if (MO.isReg() && MO.getReg()) { + for (unsigned Reg : RegDefs) + if (TRI.regsOverlap(Reg, MO.getReg())) + return false; // We found a write-after-write or read-after-write + + if (MO.isDef()) + for (unsigned Reg : RegUses) + if (TRI.regsOverlap(Reg, MO.getReg())) + return false; // We found a write-after-read + } + } + + return true; } bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) { @@ -132,10 +225,10 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( MachineBasicBlock &MBB, SmallVectorImpl &NullCheckList) { typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate; - MDNode *BranchMD = - MBB.getBasicBlock() - ? MBB.getBasicBlock()->getTerminator()->getMetadata("make.implicit") - : nullptr; + MDNode *BranchMD = nullptr; + if (auto *BB = MBB.getBasicBlock()) + BranchMD = BB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit); + if (!BranchMD) return false; @@ -188,7 +281,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // // we want to end up with // - // Def = TrappingLoad (%RAX + ), LblNull + // Def = FaultingLoad (%RAX + ), LblNull // jmp LblNotNull ;; explicit or fallthrough // // LblNotNull: @@ -199,38 +292,34 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // LblNull: // callq throw_NullPointerException // + // + // To see why this is legal, consider the two possibilities: + // + // 1. %RAX is null: since we constrain to be less than PageSize, the + // load instruction dereferences the null page, causing a segmentation + // fault. + // + // 2. %RAX is not null: in this case we know that the load cannot fault, as + // otherwise the load would've faulted in the original program too and the + // original program would've been undefined. + // + // This reasoning cannot be extended to justify hoisting through arbitrary + // control flow. For instance, in the example below (in pseudo-C) + // + // if (ptr == null) { throw_npe(); unreachable; } + // if (some_cond) { return 42; } + // v = ptr->field; // LD + // ... + // + // we cannot (without code duplication) use the load marked "LD" to null check + // ptr -- clause (2) above does not apply in this case. In the above program + // the safety of ptr->field can be dependent on some_cond; and, for instance, + // ptr could be some non-null invalid reference that never gets loaded from + // because some_cond is always true. unsigned PointerReg = MBP.LHS.getReg(); - // As we scan NotNullSucc for a suitable load instruction, we keep track of - // the registers defined and used by the instructions we scan past. This bit - // of information lets us decide if it is legal to hoist the load instruction - // we find (if we do find such an instruction) to before NotNullSucc. - DenseSet RegDefs, RegUses; - - // Returns true if it is safe to reorder MI to before NotNullSucc. - auto IsSafeToHoist = [&](MachineInstr *MI) { - // Right now we don't want to worry about LLVM's memory model. This can be - // made more precise later. - for (auto *MMO : MI->memoperands()) - if (!MMO->isUnordered()) - return false; - - for (auto &MO : MI->operands()) { - if (MO.isReg() && MO.getReg()) { - for (unsigned Reg : RegDefs) - if (TRI->regsOverlap(Reg, MO.getReg())) - return false; // We found a write-after-write or read-after-write - - if (MO.isDef()) - for (unsigned Reg : RegUses) - if (TRI->regsOverlap(Reg, MO.getReg())) - return false; // We found a write-after-read - } - } - - return true; - }; + HazardDetector HD(*TRI); for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE; ++MII) { @@ -238,37 +327,16 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( unsigned BaseReg, Offset; if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) if (MI->mayLoad() && !MI->isPredicable() && BaseReg == PointerReg && - Offset < PageSize && MI->getDesc().getNumDefs() == 1 && - IsSafeToHoist(MI)) { + Offset < PageSize && MI->getDesc().getNumDefs() <= 1 && + HD.isSafeToHoist(MI)) { NullCheckList.emplace_back(MI, MBP.ConditionDef, &MBB, NotNullSucc, NullSucc); return true; } - // MI did not match our criteria for conversion to a trapping load. Check - // if we can continue looking. - - if (MI->mayStore() || MI->hasUnmodeledSideEffects()) + HD.rememberInstruction(MI); + if (HD.isClobbered()) return false; - - for (auto *MMO : MI->memoperands()) - // Right now we don't want to worry about LLVM's memory model. - if (!MMO->isUnordered()) - return false; - - // It _may_ be okay to reorder a later load instruction across MI. Make a - // note of its operands so that we can make the legality check if we find a - // suitable load instruction: - - for (auto &MO : MI->operands()) { - if (!MO.isReg() || !MO.getReg()) - continue; - - if (MO.isDef()) - RegDefs.insert(MO.getReg()); - else - RegUses.insert(MO.getReg()); - } } return false; @@ -281,14 +349,19 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( MachineInstr *ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI, MachineBasicBlock *MBB, MCSymbol *HandlerLabel) { + const unsigned NoRegister = 0; // Guaranteed to be the NoRegister value for + // all targets. + DebugLoc DL; unsigned NumDefs = LoadMI->getDesc().getNumDefs(); - assert(NumDefs == 1 && "other cases unhandled!"); - (void)NumDefs; + assert(NumDefs <= 1 && "other cases unhandled!"); - unsigned DefReg = LoadMI->defs().begin()->getReg(); - assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 && - "expected exactly one def!"); + unsigned DefReg = NoRegister; + if (NumDefs != 0) { + DefReg = LoadMI->defs().begin()->getReg(); + assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 && + "expected exactly one def!"); + } auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_LOAD_OP), DefReg) .addSym(HandlerLabel) diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 9989f233d09c..e31013266bc7 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -141,7 +141,7 @@ public: InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) : MF(mf), LIS(pass.getAnalysis()), LSS(pass.getAnalysis()), - AA(&pass.getAnalysis()), + AA(&pass.getAnalysis().getAAResults()), MDT(pass.getAnalysis()), Loops(pass.getAnalysis()), VRM(vrm), MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()), @@ -329,8 +329,8 @@ static raw_ostream &operator<<(raw_ostream &OS, if (SVI.KillsSource) OS << " kill"; OS << " deps["; - for (unsigned i = 0, e = SVI.Deps.size(); i != e; ++i) - OS << ' ' << SVI.Deps[i]->id << '@' << SVI.Deps[i]->def; + for (VNInfo *Dep : SVI.Deps) + OS << ' ' << Dep->id << '@' << Dep->def; OS << " ]"; if (SVI.DefMI) OS << " def: " << *SVI.DefMI; @@ -383,9 +383,8 @@ void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter, bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg); unsigned SpillDepth = ~0u; - for (TinyPtrVector::iterator DepI = Deps->begin(), - DepE = Deps->end(); DepI != DepE; ++DepI) { - SibValueMap::iterator DepSVI = SibValues.find(*DepI); + for (VNInfo *Dep : *Deps) { + SibValueMap::iterator DepSVI = SibValues.find(Dep); assert(DepSVI != SibValues.end() && "Dependent value not in SibValues"); SibValueInfo &DepSV = DepSVI->second; if (!DepSV.SpillMBB) @@ -566,12 +565,11 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, // Create entries for all the PHIs. Don't add them to the worklist, we // are processing all of them in one go here. - for (unsigned i = 0, e = PHIs.size(); i != e; ++i) - SibValues.insert(std::make_pair(PHIs[i], SibValueInfo(Reg, PHIs[i]))); + for (VNInfo *PHI : PHIs) + SibValues.insert(std::make_pair(PHI, SibValueInfo(Reg, PHI))); // Add every PHI as a dependent of all the non-PHIs. - for (unsigned i = 0, e = NonPHIs.size(); i != e; ++i) { - VNInfo *NonPHI = NonPHIs[i]; + for (VNInfo *NonPHI : NonPHIs) { // Known value? Try an insertion. std::tie(SVI, Inserted) = SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI))); @@ -654,8 +652,7 @@ void InlineSpiller::analyzeSiblingValues() { return; LiveInterval &OrigLI = LIS.getInterval(Original); - for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { - unsigned Reg = RegsToSpill[i]; + for (unsigned Reg : RegsToSpill) { LiveInterval &LI = LIS.getInterval(Reg); for (LiveInterval::const_vni_iterator VI = LI.vni_begin(), VE = LI.vni_end(); VI != VE; ++VI) { @@ -831,9 +828,8 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { if (VNI->isPHIDef()) { MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI)); + for (MachineBasicBlock *P : MBB->predecessors()) { + VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(P)); if (PVNI) WorkList.push_back(std::make_pair(LI, PVNI)); } @@ -920,8 +916,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, << *LIS.getInstructionFromIndex(DefIdx)); // Replace operands - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second); + for (const auto &OpPair : Ops) { + MachineOperand &MO = OpPair.first->getOperand(OpPair.second); if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { MO.setReg(NewVReg); MO.setIsKill(); @@ -944,8 +940,7 @@ void InlineSpiller::reMaterializeAll() { // Try to remat before all uses of snippets. bool anyRemat = false; - for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { - unsigned Reg = RegsToSpill[i]; + for (unsigned Reg : RegsToSpill) { LiveInterval &LI = LIS.getInterval(Reg); for (MachineRegisterInfo::reg_bundle_iterator RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); @@ -963,8 +958,7 @@ void InlineSpiller::reMaterializeAll() { return; // Remove any values that were completely rematted. - for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { - unsigned Reg = RegsToSpill[i]; + for (unsigned Reg : RegsToSpill) { LiveInterval &LI = LIS.getInterval(Reg); for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end(); I != E; ++I) { @@ -989,8 +983,7 @@ void InlineSpiller::reMaterializeAll() { // Get rid of deleted and empty intervals. unsigned ResultPos = 0; - for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { - unsigned Reg = RegsToSpill[i]; + for (unsigned Reg : RegsToSpill) { if (!LIS.hasInterval(Reg)) continue; @@ -1098,9 +1091,9 @@ foldMemoryOperand(ArrayRef > Ops, // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied // operands. SmallVector FoldOps; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - unsigned Idx = Ops[i].second; - assert(MI == Ops[i].first && "Instruction conflict during operand folding"); + for (const auto &OpPair : Ops) { + unsigned Idx = OpPair.second; + assert(MI == OpPair.first && "Instruction conflict during operand folding"); MachineOperand &MO = MI->getOperand(Idx); if (MO.isImplicit()) { ImpReg = MO.getReg(); @@ -1139,7 +1132,7 @@ foldMemoryOperand(ArrayRef > Ops, continue; MIBundleOperands::PhysRegInfo RI = MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI); - if (RI.Defines) + if (RI.FullyDefined) continue; // FoldMI does not define this physreg. Remove the LI segment. assert(MO->isDead() && "Cannot fold physreg def"); @@ -1152,10 +1145,9 @@ foldMemoryOperand(ArrayRef > Ops, // Insert any new instructions other than FoldMI into the LIS maps. assert(!MIS.empty() && "Unexpected empty span of instructions!"); - for (MachineBasicBlock::iterator MII = MIS.begin(), End = MIS.end(); - MII != End; ++MII) - if (&*MII != FoldMI) - LIS.InsertMachineInstrInMaps(&*MII); + for (MachineInstr &MI : MIS) + if (&MI != FoldMI) + LIS.InsertMachineInstrInMaps(&MI); // TII.foldMemoryOperand may have left some implicit operands on the // instruction. Strip them. @@ -1301,11 +1293,11 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Rewrite instruction operands. bool hasLiveDef = false; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second); + for (const auto &OpPair : Ops) { + MachineOperand &MO = OpPair.first->getOperand(OpPair.second); MO.setReg(NewVReg); if (MO.isUse()) { - if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second)) + if (!OpPair.first->isRegTiedToDefOperand(OpPair.second)) MO.setIsKill(); } else { if (!MO.isDead()) @@ -1335,14 +1327,14 @@ void InlineSpiller::spillAll() { VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot); assert(StackInt->getNumValNums() == 1 && "Bad stack interval values"); - for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) - StackInt->MergeSegmentsInAsValue(LIS.getInterval(RegsToSpill[i]), + for (unsigned Reg : RegsToSpill) + StackInt->MergeSegmentsInAsValue(LIS.getInterval(Reg), StackInt->getValNumInfo(0)); DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); // Spill around uses of all RegsToSpill. - for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) - spillAroundUses(RegsToSpill[i]); + for (unsigned Reg : RegsToSpill) + spillAroundUses(Reg); // Hoisted spills may cause dead code. if (!DeadDefs.empty()) { @@ -1351,9 +1343,9 @@ void InlineSpiller::spillAll() { } // Finally delete the SnippetCopies. - for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + for (unsigned Reg : RegsToSpill) { for (MachineRegisterInfo::reg_instr_iterator - RI = MRI.reg_instr_begin(RegsToSpill[i]), E = MRI.reg_instr_end(); + RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) { MachineInstr *MI = &*(RI++); assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy"); @@ -1364,8 +1356,8 @@ void InlineSpiller::spillAll() { } // Delete all spilled registers. - for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) - Edit->eraseVirtReg(RegsToSpill[i]); + for (unsigned Reg : RegsToSpill) + Edit->eraseVirtReg(Reg); } void InlineSpiller::spill(LiveRangeEdit &edit) { diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index fd5749bfefa0..f8cc24724580 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -144,7 +144,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { PrevPos = Start; } - MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum); + MachineFunction::const_iterator MFI = + MF->getBlockNumbered(MBBNum)->getIterator(); BlockInterference *BI = &Blocks[MBBNum]; ArrayRef RegMaskSlots; ArrayRef RegMaskBits; diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp index 53c8adc05d77..724f1d61abe2 100644 --- a/lib/CodeGen/InterleavedAccessPass.cpp +++ b/lib/CodeGen/InterleavedAccessPass.cpp @@ -52,7 +52,7 @@ using namespace llvm; static cl::opt LowerInterleavedAccesses( "lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), - cl::init(false), cl::Hidden); + cl::init(true), cl::Hidden); static unsigned MaxFactor; // The maximum supported interleave factor. @@ -271,7 +271,7 @@ bool InterleavedAccess::runOnFunction(Function &F) { SmallVector DeadInsts; bool Changed = false; - for (auto &I : inst_range(F)) { + for (auto &I : instructions(F)) { if (LoadInst *LI = dyn_cast(&I)) Changed |= lowerInterleavedLoad(LI, DeadInsts); diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 2c95e9e7d0d3..2962f8701625 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -35,24 +35,24 @@ static void EnsureFunctionExists(Module &M, const char *Name, M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false)); } -static void EnsureFPIntrinsicsExist(Module &M, Function *Fn, +static void EnsureFPIntrinsicsExist(Module &M, Function &Fn, const char *FName, const char *DName, const char *LDName) { // Insert definitions for all the floating point types. - switch((int)Fn->arg_begin()->getType()->getTypeID()) { + switch((int)Fn.arg_begin()->getType()->getTypeID()) { case Type::FloatTyID: - EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(), + EnsureFunctionExists(M, FName, Fn.arg_begin(), Fn.arg_end(), Type::getFloatTy(M.getContext())); break; case Type::DoubleTyID: - EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(), + EnsureFunctionExists(M, DName, Fn.arg_begin(), Fn.arg_end(), Type::getDoubleTy(M.getContext())); break; case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: - EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(), - Fn->arg_begin()->getType()); + EnsureFunctionExists(M, LDName, Fn.arg_begin(), Fn.arg_end(), + Fn.arg_begin()->getType()); break; } } @@ -67,7 +67,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, Type *RetTy) { // If we haven't already looked up this function, check to see if the // program already contains a function with this name. - Module *M = CI->getParent()->getParent()->getParent(); + Module *M = CI->getModule(); // Get or insert the definition now. std::vector ParamTys; for (ArgIt I = ArgBegin; I != ArgEnd; ++I) @@ -75,7 +75,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, Constant* FCache = M->getOrInsertFunction(NewFn, FunctionType::get(RetTy, ParamTys, false)); - IRBuilder<> Builder(CI->getParent(), CI); + IRBuilder<> Builder(CI->getParent(), CI->getIterator()); SmallVector Args(ArgBegin, ArgEnd); CallInst *NewCI = Builder.CreateCall(FCache, Args); NewCI->setName(CI->getName()); @@ -94,20 +94,20 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, void IntrinsicLowering::AddPrototypes(Module &M) { LLVMContext &Context = M.getContext(); - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (I->isDeclaration() && !I->use_empty()) - switch (I->getIntrinsicID()) { + for (auto &F : M) + if (F.isDeclaration() && !F.use_empty()) + switch (F.getIntrinsicID()) { default: break; case Intrinsic::setjmp: - EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(), + EnsureFunctionExists(M, "setjmp", F.arg_begin(), F.arg_end(), Type::getInt32Ty(M.getContext())); break; case Intrinsic::longjmp: - EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(), + EnsureFunctionExists(M, "longjmp", F.arg_begin(), F.arg_end(), Type::getVoidTy(M.getContext())); break; case Intrinsic::siglongjmp: - EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(), + EnsureFunctionExists(M, "abort", F.arg_end(), F.arg_end(), Type::getVoidTy(M.getContext())); break; case Intrinsic::memcpy: @@ -132,31 +132,31 @@ void IntrinsicLowering::AddPrototypes(Module &M) { DL.getIntPtrType(Context), nullptr); break; case Intrinsic::sqrt: - EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl"); + EnsureFPIntrinsicsExist(M, F, "sqrtf", "sqrt", "sqrtl"); break; case Intrinsic::sin: - EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl"); + EnsureFPIntrinsicsExist(M, F, "sinf", "sin", "sinl"); break; case Intrinsic::cos: - EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl"); + EnsureFPIntrinsicsExist(M, F, "cosf", "cos", "cosl"); break; case Intrinsic::pow: - EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl"); + EnsureFPIntrinsicsExist(M, F, "powf", "pow", "powl"); break; case Intrinsic::log: - EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl"); + EnsureFPIntrinsicsExist(M, F, "logf", "log", "logl"); break; case Intrinsic::log2: - EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l"); + EnsureFPIntrinsicsExist(M, F, "log2f", "log2", "log2l"); break; case Intrinsic::log10: - EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l"); + EnsureFPIntrinsicsExist(M, F, "log10f", "log10", "log10l"); break; case Intrinsic::exp: - EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl"); + EnsureFPIntrinsicsExist(M, F, "expf", "exp", "expl"); break; case Intrinsic::exp2: - EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l"); + EnsureFPIntrinsicsExist(M, F, "exp2f", "exp2", "exp2l"); break; } } @@ -167,8 +167,8 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!"); unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); - - IRBuilder<> Builder(IP->getParent(), IP); + + IRBuilder<> Builder(IP); switch(BitSize) { default: llvm_unreachable("Unhandled type size of value to byteswap!"); @@ -268,7 +268,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) { 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL }; - IRBuilder<> Builder(IP->getParent(), IP); + IRBuilder<> Builder(IP); unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); unsigned WordSize = (BitSize + 63) / 64; @@ -301,7 +301,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) { /// instruction IP. static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) { - IRBuilder<> Builder(IP->getParent(), IP); + IRBuilder<> Builder(IP); unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); for (unsigned i = 1; i < BitSize; i <<= 1) { @@ -338,7 +338,7 @@ static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname, } void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { - IRBuilder<> Builder(CI->getParent(), CI); + IRBuilder<> Builder(CI); LLVMContext &Context = CI->getContext(); const Function *Callee = CI->getCalledFunction(); @@ -424,6 +424,13 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } + case Intrinsic::get_dynamic_area_offset: + errs() << "WARNING: this target does not support the custom llvm.get." + "dynamic.area.offset. It is being lowered to a constant 0\n"; + // Just lower it to a constant 0 because for most targets + // @llvm.get.dynamic.area.offset is lowered to zero. + CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 0)); + break; case Intrinsic::returnaddress: case Intrinsic::frameaddress: errs() << "WARNING: this target does not support the llvm." @@ -589,7 +596,7 @@ bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) { return false; // Okay, we can do this xform, do so now. - Module *M = CI->getParent()->getParent()->getParent(); + Module *M = CI->getModule(); Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty); Value *Op = CI->getArgOperand(0); diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt index 18ed77607c6a..69b6a0f380aa 100644 --- a/lib/CodeGen/LLVMBuild.txt +++ b/lib/CodeGen/LLVMBuild.txt @@ -22,4 +22,4 @@ subdirectories = AsmPrinter SelectionDAG MIRParser type = Library name = CodeGen parent = Libraries -required_libraries = Analysis Core Instrumentation MC Scalar Support Target TransformUtils +required_libraries = Analysis BitReader BitWriter Core Instrumentation MC Scalar Support Target TransformUtils diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 37299eb664cf..1c27377feee7 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -82,7 +82,7 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, } TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](Function &F) { + return TargetIRAnalysis([this](const Function &F) { return TargetTransformInfo(BasicTTIImpl(this, F)); }); } @@ -125,9 +125,10 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, PM.add(new MachineFunctionAnalysis(*TM, MFInitializer)); // Enable FastISel with -fast, but allow that to be overridden. + TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE); if (EnableFastISelOption == cl::BOU_TRUE || (TM->getOptLevel() == CodeGenOpt::None && - EnableFastISelOption != cl::BOU_FALSE)) + TM->getO0WantsFastISel())) TM->setFastISel(true); // Ask the target for an isel. @@ -202,6 +203,7 @@ bool LLVMTargetMachine::addPassesToEmitFile( Triple T(getTargetTriple().str()); AsmStreamer.reset(getTarget().createMCObjectStreamer( T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + Options.MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ true)); break; } @@ -254,6 +256,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, const MCSubtargetInfo &STI = *getMCSubtargetInfo(); std::unique_ptr AsmStreamer(getTarget().createMCObjectStreamer( T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + Options.MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ true)); // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp new file mode 100644 index 000000000000..98d30b95dd2d --- /dev/null +++ b/lib/CodeGen/LiveDebugValues.cpp @@ -0,0 +1,405 @@ +//===------ LiveDebugValues.cpp - Tracking Debug Value MIs ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// This pass implements a data flow analysis that propagates debug location +/// information by inserting additional DBG_VALUE instructions into the machine +/// instruction stream. The pass internally builds debug location liveness +/// ranges to determine the points where additional DBG_VALUEs need to be +/// inserted. +/// +/// This is a separate pass from DbgValueHistoryCalculator to facilitate +/// testing and improve modularity. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "live-debug-values" + +STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); + +namespace { + +class LiveDebugValues : public MachineFunctionPass { + +private: + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + typedef std::pair + InlinedVariable; + + /// A potentially inlined instance of a variable. + struct DebugVariable { + const DILocalVariable *Var; + const DILocation *InlinedAt; + + DebugVariable(const DILocalVariable *_var, const DILocation *_inlinedAt) + : Var(_var), InlinedAt(_inlinedAt) {} + + bool operator==(const DebugVariable &DV) const { + return (Var == DV.Var) && (InlinedAt == DV.InlinedAt); + } + }; + + /// Member variables and functions for Range Extension across basic blocks. + struct VarLoc { + DebugVariable Var; + const MachineInstr *MI; // MachineInstr should be a DBG_VALUE instr. + + VarLoc(DebugVariable _var, const MachineInstr *_mi) : Var(_var), MI(_mi) {} + + bool operator==(const VarLoc &V) const; + }; + + typedef std::list VarLocList; + typedef SmallDenseMap VarLocInMBB; + + bool OLChanged; // OutgoingLocs got changed for this bb. + bool MBBJoined; // The MBB was joined. + + void transferDebugValue(MachineInstr &MI, VarLocList &OpenRanges); + void transferRegisterDef(MachineInstr &MI, VarLocList &OpenRanges); + void transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges, + VarLocInMBB &OutLocs); + void transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs); + + void join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs); + + bool ExtendRanges(MachineFunction &MF); + +public: + static char ID; + + /// Default construct and initialize the pass. + LiveDebugValues(); + + /// Tell the pass manager which passes we depend on and what + /// information we preserve. + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Print to ostream with a message. + void printVarLocInMBB(const VarLocInMBB &V, const char *msg, + raw_ostream &Out) const; + + /// Calculate the liveness information for the given machine function. + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // namespace + +//===----------------------------------------------------------------------===// +// Implementation +//===----------------------------------------------------------------------===// + +char LiveDebugValues::ID = 0; +char &llvm::LiveDebugValuesID = LiveDebugValues::ID; +INITIALIZE_PASS(LiveDebugValues, "livedebugvalues", "Live DEBUG_VALUE analysis", + false, false) + +/// Default construct and initialize the pass. +LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) { + initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry()); +} + +/// Tell the pass manager which passes we depend on and what information we +/// preserve. +void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); +} + +// \brief If @MI is a DBG_VALUE with debug value described by a defined +// register, returns the number of this register. In the other case, returns 0. +static unsigned isDescribedByReg(const MachineInstr &MI) { + assert(MI.isDebugValue()); + assert(MI.getNumOperands() == 4); + // If location of variable is described using a register (directly or + // indirecltly), this register is always a first operand. + return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; +} + +// \brief This function takes two DBG_VALUE instructions and returns true +// if their offsets are equal; otherwise returns false. +static bool areOffsetsEqual(const MachineInstr &MI1, const MachineInstr &MI2) { + assert(MI1.isDebugValue()); + assert(MI1.getNumOperands() == 4); + + assert(MI2.isDebugValue()); + assert(MI2.getNumOperands() == 4); + + if (!MI1.isIndirectDebugValue() && !MI2.isIndirectDebugValue()) + return true; + + // Check if both MIs are indirect and they are equal. + if (MI1.isIndirectDebugValue() && MI2.isIndirectDebugValue()) + return MI1.getOperand(1).getImm() == MI2.getOperand(1).getImm(); + + return false; +} + +//===----------------------------------------------------------------------===// +// Debug Range Extension Implementation +//===----------------------------------------------------------------------===// + +void LiveDebugValues::printVarLocInMBB(const VarLocInMBB &V, const char *msg, + raw_ostream &Out) const { + Out << "Printing " << msg << ":\n"; + for (const auto &L : V) { + Out << "MBB: " << L.first->getName() << ":\n"; + for (const auto &VLL : L.second) { + Out << " Var: " << VLL.Var.Var->getName(); + Out << " MI: "; + (*VLL.MI).dump(); + Out << "\n"; + } + } + Out << "\n"; +} + +bool LiveDebugValues::VarLoc::operator==(const VarLoc &V) const { + return (Var == V.Var) && (isDescribedByReg(*MI) == isDescribedByReg(*V.MI)) && + (areOffsetsEqual(*MI, *V.MI)); +} + +/// End all previous ranges related to @MI and start a new range from @MI +/// if it is a DBG_VALUE instr. +void LiveDebugValues::transferDebugValue(MachineInstr &MI, + VarLocList &OpenRanges) { + if (!MI.isDebugValue()) + return; + const DILocalVariable *RawVar = MI.getDebugVariable(); + assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) && + "Expected inlined-at fields to agree"); + DebugVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt()); + + // End all previous ranges of Var. + OpenRanges.erase( + std::remove_if(OpenRanges.begin(), OpenRanges.end(), + [&](const VarLoc &V) { return (Var == V.Var); }), + OpenRanges.end()); + + // Add Var to OpenRanges from this DBG_VALUE. + // TODO: Currently handles DBG_VALUE which has only reg as location. + if (isDescribedByReg(MI)) { + VarLoc V(Var, &MI); + OpenRanges.push_back(std::move(V)); + } +} + +/// A definition of a register may mark the end of a range. +void LiveDebugValues::transferRegisterDef(MachineInstr &MI, + VarLocList &OpenRanges) { + for (const MachineOperand &MO : MI.operands()) { + if (!(MO.isReg() && MO.isDef() && MO.getReg() && + TRI->isPhysicalRegister(MO.getReg()))) + continue; + // Remove ranges of all aliased registers. + for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) + OpenRanges.erase(std::remove_if(OpenRanges.begin(), OpenRanges.end(), + [&](const VarLoc &V) { + return (*RAI == + isDescribedByReg(*V.MI)); + }), + OpenRanges.end()); + } +} + +/// Terminate all open ranges at the end of the current basic block. +void LiveDebugValues::transferTerminatorInst(MachineInstr &MI, + VarLocList &OpenRanges, + VarLocInMBB &OutLocs) { + const MachineBasicBlock *CurMBB = MI.getParent(); + if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back()))) + return; + + if (OpenRanges.empty()) + return; + + if (OutLocs.find(CurMBB) == OutLocs.end()) { + // Create space for new Outgoing locs entries. + VarLocList VLL; + OutLocs.insert(std::make_pair(CurMBB, std::move(VLL))); + } + auto OL = OutLocs.find(CurMBB); + assert(OL != OutLocs.end()); + VarLocList &VLL = OL->second; + + for (auto OR : OpenRanges) { + // Copy OpenRanges to OutLocs, if not already present. + assert(OR.MI->isDebugValue()); + DEBUG(dbgs() << "Add to OutLocs: "; OR.MI->dump();); + if (std::find_if(VLL.begin(), VLL.end(), + [&](const VarLoc &V) { return (OR == V); }) == VLL.end()) { + VLL.push_back(std::move(OR)); + OLChanged = true; + } + } + OpenRanges.clear(); +} + +/// This routine creates OpenRanges and OutLocs. +void LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges, + VarLocInMBB &OutLocs) { + transferDebugValue(MI, OpenRanges); + transferRegisterDef(MI, OpenRanges); + transferTerminatorInst(MI, OpenRanges, OutLocs); +} + +/// This routine joins the analysis results of all incoming edges in @MBB by +/// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same +/// source variable in all the predecessors of @MBB reside in the same location. +void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, + VarLocInMBB &InLocs) { + DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n"); + + MBBJoined = false; + + VarLocList InLocsT; // Temporary incoming locations. + + // For all predecessors of this MBB, find the set of VarLocs that can be + // joined. + for (auto p : MBB.predecessors()) { + auto OL = OutLocs.find(p); + // Join is null in case of empty OutLocs from any of the pred. + if (OL == OutLocs.end()) + return; + + // Just copy over the Out locs to incoming locs for the first predecessor. + if (p == *MBB.pred_begin()) { + InLocsT = OL->second; + continue; + } + + // Join with this predecessor. + VarLocList &VLL = OL->second; + InLocsT.erase(std::remove_if(InLocsT.begin(), InLocsT.end(), + [&](VarLoc &ILT) { + return (std::find_if(VLL.begin(), VLL.end(), + [&](const VarLoc &V) { + return (ILT == V); + }) == VLL.end()); + }), + InLocsT.end()); + } + + if (InLocsT.empty()) + return; + + if (InLocs.find(&MBB) == InLocs.end()) { + // Create space for new Incoming locs entries. + VarLocList VLL; + InLocs.insert(std::make_pair(&MBB, std::move(VLL))); + } + auto IL = InLocs.find(&MBB); + assert(IL != InLocs.end()); + VarLocList &ILL = IL->second; + + // Insert DBG_VALUE instructions, if not already inserted. + for (auto ILT : InLocsT) { + if (std::find_if(ILL.begin(), ILL.end(), [&](const VarLoc &I) { + return (ILT == I); + }) == ILL.end()) { + // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a + // new range is started for the var from the mbb's beginning by inserting + // a new DBG_VALUE. transfer() will end this range however appropriate. + const MachineInstr *DMI = ILT.MI; + MachineInstr *MI = + BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(), + DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(), 0, + DMI->getDebugVariable(), DMI->getDebugExpression()); + if (DMI->isIndirectDebugValue()) + MI->getOperand(1).setImm(DMI->getOperand(1).getImm()); + DEBUG(dbgs() << "Inserted: "; MI->dump();); + ++NumInserted; + MBBJoined = true; // rerun transfer(). + + VarLoc V(ILT.Var, MI); + ILL.push_back(std::move(V)); + } + } +} + +/// Calculate the liveness information for the given machine function and +/// extend ranges across basic blocks. +bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { + + DEBUG(dbgs() << "\nDebug Range Extension\n"); + + bool Changed = false; + OLChanged = MBBJoined = false; + + VarLocList OpenRanges; // Ranges that are open until end of bb. + VarLocInMBB OutLocs; // Ranges that exist beyond bb. + VarLocInMBB InLocs; // Ranges that are incoming after joining. + + std::deque BBWorklist; + + // Initialize every mbb with OutLocs. + for (auto &MBB : MF) + for (auto &MI : MBB) + transfer(MI, OpenRanges, OutLocs); + DEBUG(printVarLocInMBB(OutLocs, "OutLocs after initialization", dbgs())); + + // Construct a worklist of MBBs. + for (auto &MBB : MF) + BBWorklist.push_back(&MBB); + + // Perform join() and transfer() using the worklist until the ranges converge + // Ranges have converged when the worklist is empty. + while (!BBWorklist.empty()) { + MachineBasicBlock *MBB = BBWorklist.front(); + BBWorklist.pop_front(); + + join(*MBB, OutLocs, InLocs); + + if (MBBJoined) { + Changed = true; + for (auto &MI : *MBB) + transfer(MI, OpenRanges, OutLocs); + DEBUG(printVarLocInMBB(OutLocs, "OutLocs after propagating", dbgs())); + DEBUG(printVarLocInMBB(InLocs, "InLocs after propagating", dbgs())); + + if (OLChanged) { + OLChanged = false; + for (auto s : MBB->successors()) + if (std::find(BBWorklist.begin(), BBWorklist.end(), s) == + BBWorklist.end()) // add if not already present. + BBWorklist.push_back(s); + } + } + } + DEBUG(printVarLocInMBB(OutLocs, "Final OutLocs", dbgs())); + DEBUG(printVarLocInMBB(InLocs, "Final InLocs", dbgs())); + return Changed; +} + +bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + + bool Changed = false; + + Changed |= ExtendRanges(MF); + + return Changed; +} diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 15715513452d..6dac7dbd15bf 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -91,9 +91,7 @@ public: bool dominates(MachineBasicBlock *MBB) { if (LBlocks.empty()) LS.getMachineBasicBlocks(DL, LBlocks); - if (LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB)) - return true; - return false; + return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB); } }; } // end anonymous namespace @@ -512,7 +510,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { bool Changed = false; for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE; ++MFI) { - MachineBasicBlock *MBB = MFI; + MachineBasicBlock *MBB = &*MFI; for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); MBBI != MBBE;) { if (!MBBI->isDebugValue()) { @@ -536,65 +534,49 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { return Changed; } -void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, - LiveRange *LR, const VNInfo *VNI, - SmallVectorImpl *Kills, +/// We only propagate DBG_VALUES locally here. LiveDebugValues performs a +/// data-flow analysis to propagate them beyond basic block boundaries. +void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR, + const VNInfo *VNI, SmallVectorImpl *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS) { - SmallVector Todo; - Todo.push_back(Idx); - do { - SlotIndex Start = Todo.pop_back_val(); - MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start); - SlotIndex Stop = LIS.getMBBEndIdx(MBB); - LocMap::iterator I = locInts.find(Start); + SlotIndex Start = Idx; + MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start); + SlotIndex Stop = LIS.getMBBEndIdx(MBB); + LocMap::iterator I = locInts.find(Start); - // Limit to VNI's live range. - bool ToEnd = true; - if (LR && VNI) { - LiveInterval::Segment *Segment = LR->getSegmentContaining(Start); - if (!Segment || Segment->valno != VNI) { - if (Kills) - Kills->push_back(Start); - continue; - } - if (Segment->end < Stop) - Stop = Segment->end, ToEnd = false; + // Limit to VNI's live range. + bool ToEnd = true; + if (LR && VNI) { + LiveInterval::Segment *Segment = LR->getSegmentContaining(Start); + if (!Segment || Segment->valno != VNI) { + if (Kills) + Kills->push_back(Start); + return; } + if (Segment->end < Stop) + Stop = Segment->end, ToEnd = false; + } - // There could already be a short def at Start. - if (I.valid() && I.start() <= Start) { - // Stop when meeting a different location or an already extended interval. - Start = Start.getNextSlot(); - if (I.value() != LocNo || I.stop() != Start) - continue; - // This is a one-slot placeholder. Just skip it. - ++I; - } + // There could already be a short def at Start. + if (I.valid() && I.start() <= Start) { + // Stop when meeting a different location or an already extended interval. + Start = Start.getNextSlot(); + if (I.value() != LocNo || I.stop() != Start) + return; + // This is a one-slot placeholder. Just skip it. + ++I; + } - // Limited by the next def. - if (I.valid() && I.start() < Stop) - Stop = I.start(), ToEnd = false; - // Limited by VNI's live range. - else if (!ToEnd && Kills) - Kills->push_back(Stop); - - if (Start >= Stop) - continue; + // Limited by the next def. + if (I.valid() && I.start() < Stop) + Stop = I.start(), ToEnd = false; + // Limited by VNI's live range. + else if (!ToEnd && Kills) + Kills->push_back(Stop); + if (Start < Stop) I.insert(Start, Stop, LocNo); - - // If we extended to the MBB end, propagate down the dominator tree. - if (!ToEnd) - continue; - const std::vector &Children = - MDT.getNode(MBB)->getChildren(); - for (unsigned i = 0, e = Children.size(); i != e; ++i) { - MachineBasicBlock *MBB = Children[i]->getBlock(); - if (UVS.dominates(MBB)) - Todo.push_back(LIS.getMBBStartIdx(MBB)); - } - } while (!Todo.empty()); } void @@ -763,7 +745,7 @@ static void removeDebugValues(MachineFunction &mf) { bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { if (!EnableLDV) return false; - if (!FunctionDIs.count(mf.getFunction())) { + if (!mf.getFunction()->getSubprogram()) { removeDebugValues(mf); return false; } @@ -1004,11 +986,11 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, SlotIndex Stop = I.stop(); unsigned LocNo = I.value(); DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo); - MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); - SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB); + MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator(); + SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB); DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); - insertDebugValue(MBB, Start, LocNo, LIS, TII); + insertDebugValue(&*MBB, Start, LocNo, LIS, TII); // This interval may span multiple basic blocks. // Insert a DBG_VALUE into each one. while(Stop > MBBEnd) { @@ -1016,9 +998,9 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, Start = MBBEnd; if (++MBB == MFEnd) break; - MBBEnd = LIS.getMBBEndIdx(MBB); + MBBEnd = LIS.getMBBEndIdx(&*MBB); DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); - insertDebugValue(MBB, Start, LocNo, LIS, TII); + insertDebugValue(&*MBB, Start, LocNo, LIS, TII); } DEBUG(dbgs() << '\n'); if (MBB == MFEnd) @@ -1047,7 +1029,6 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { } bool LiveDebugVariables::doInitialization(Module &M) { - FunctionDIs = makeSubprogramMap(M); return Pass::doInitialization(M); } diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index 694aa1770c9c..3d36f4d2494a 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -33,7 +33,6 @@ class VirtRegMap; class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass { void *pImpl; - DenseMap FunctionDIs; public: static char ID; // Pass identification, replacement for typeid diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index d75e4417cb03..efad36ffa3f1 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -26,7 +26,6 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include @@ -865,7 +864,7 @@ void LiveInterval::constructMainRangeFromSubranges( // - If any of the subranges is live at a point the main liverange has to be // live too, conversily if no subrange is live the main range mustn't be // live either. - // We do this by scannig through all the subranges simultaneously creating new + // We do this by scanning through all the subranges simultaneously creating new // segments in the main range as segments start/ends come up in the subranges. assert(hasSubRanges() && "expected subranges to be present"); assert(segments.empty() && valnos.empty() && "expected empty main range"); @@ -889,7 +888,7 @@ void LiveInterval::constructMainRangeFromSubranges( Segment CurrentSegment; bool ConstructingSegment = false; bool NeedVNIFixup = false; - unsigned ActiveMask = 0; + LaneBitmask ActiveMask = 0; SlotIndex Pos = First; while (true) { SlotIndex NextPos = Last; @@ -899,7 +898,7 @@ void LiveInterval::constructMainRangeFromSubranges( END_SEGMENT, } Event = NOTHING; // Which subregister lanes are affected by the current event. - unsigned EventMask = 0; + LaneBitmask EventMask = 0; // Whether a BEGIN_SEGMENT is also a valno definition point. bool IsDef = false; // Find the next begin or end of a subrange segment. Combine masks if we @@ -1066,7 +1065,7 @@ void LiveInterval::print(raw_ostream &OS) const { super::print(OS); // Print subranges for (const SubRange &SR : subranges()) { - OS << format(" L%04X ", SR.LaneMask) << SR; + OS << " L" << PrintLaneMask(SR.LaneMask) << ' ' << SR; } } @@ -1101,8 +1100,8 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const { super::verify(); // Make sure SubRanges are fine and LaneMasks are disjunct. - unsigned Mask = 0; - unsigned MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u; + LaneBitmask Mask = 0; + LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u; for (const SubRange &SR : subranges()) { // Subrange lanemask should be disjunct to any previous subrange masks. assert((Mask & SR.LaneMask) == 0); @@ -1110,6 +1109,8 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const { // subrange mask should not contained in maximum lane mask for the vreg. assert((Mask & ~MaxMask) == 0); + // empty subranges must be removed. + assert(!SR.empty()); SR.verify(); // Main liverange should cover subrange. @@ -1370,11 +1371,42 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { return EqClass.getNumClasses(); } -void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], - MachineRegisterInfo &MRI) { - assert(LIV[0] && "LIV[0] must be set"); - LiveInterval &LI = *LIV[0]; +template +static void DistributeRange(LiveRangeT &LR, LiveRangeT *SplitLRs[], + EqClassesT VNIClasses) { + // Move segments to new intervals. + LiveRange::iterator J = LR.begin(), E = LR.end(); + while (J != E && VNIClasses[J->valno->id] == 0) + ++J; + for (LiveRange::iterator I = J; I != E; ++I) { + if (unsigned eq = VNIClasses[I->valno->id]) { + assert((SplitLRs[eq-1]->empty() || SplitLRs[eq-1]->expiredAt(I->start)) && + "New intervals should be empty"); + SplitLRs[eq-1]->segments.push_back(*I); + } else + *J++ = *I; + } + LR.segments.erase(J, E); + // Transfer VNInfos to their new owners and renumber them. + unsigned j = 0, e = LR.getNumValNums(); + while (j != e && VNIClasses[j] == 0) + ++j; + for (unsigned i = j; i != e; ++i) { + VNInfo *VNI = LR.getValNumInfo(i); + if (unsigned eq = VNIClasses[i]) { + VNI->id = SplitLRs[eq-1]->getNumValNums(); + SplitLRs[eq-1]->valnos.push_back(VNI); + } else { + VNI->id = j; + LR.valnos[j++] = VNI; + } + } + LR.valnos.resize(j); +} + +void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[], + MachineRegisterInfo &MRI) { // Rewrite instructions. for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg), RE = MRI.reg_end(); RI != RE;) { @@ -1396,38 +1428,41 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], // NULL. If the use is tied to a def, VNI will be the defined value. if (!VNI) continue; - MO.setReg(LIV[getEqClass(VNI)]->reg); + if (unsigned EqClass = getEqClass(VNI)) + MO.setReg(LIV[EqClass-1]->reg); } - // Move runs to new intervals. - LiveInterval::iterator J = LI.begin(), E = LI.end(); - while (J != E && EqClass[J->valno->id] == 0) - ++J; - for (LiveInterval::iterator I = J; I != E; ++I) { - if (unsigned eq = EqClass[I->valno->id]) { - assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) && - "New intervals should be empty"); - LIV[eq]->segments.push_back(*I); - } else - *J++ = *I; - } - // TODO: do not cheat anymore by simply cleaning all subranges - LI.clearSubRanges(); - LI.segments.erase(J, E); - - // Transfer VNInfos to their new owners and renumber them. - unsigned j = 0, e = LI.getNumValNums(); - while (j != e && EqClass[j] == 0) - ++j; - for (unsigned i = j; i != e; ++i) { - VNInfo *VNI = LI.getValNumInfo(i); - if (unsigned eq = EqClass[i]) { - VNI->id = LIV[eq]->getNumValNums(); - LIV[eq]->valnos.push_back(VNI); - } else { - VNI->id = j; - LI.valnos[j++] = VNI; + // Distribute subregister liveranges. + if (LI.hasSubRanges()) { + unsigned NumComponents = EqClass.getNumClasses(); + SmallVector VNIMapping; + SmallVector SubRanges; + BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); + for (LiveInterval::SubRange &SR : LI.subranges()) { + // Create new subranges in the split intervals and construct a mapping + // for the VNInfos in the subrange. + unsigned NumValNos = SR.valnos.size(); + VNIMapping.clear(); + VNIMapping.reserve(NumValNos); + SubRanges.clear(); + SubRanges.resize(NumComponents-1, nullptr); + for (unsigned I = 0; I < NumValNos; ++I) { + const VNInfo &VNI = *SR.valnos[I]; + const VNInfo *MainRangeVNI = LI.getVNInfoAt(VNI.def); + assert(MainRangeVNI != nullptr + && "SubRange def must have corresponding main range def"); + unsigned ComponentNum = getEqClass(MainRangeVNI); + VNIMapping.push_back(ComponentNum); + if (ComponentNum > 0 && SubRanges[ComponentNum-1] == nullptr) { + SubRanges[ComponentNum-1] + = LIV[ComponentNum-1]->createSubRange(Allocator, SR.LaneMask); + } + } + DistributeRange(SR, SubRanges.data(), VNIMapping); } + LI.removeEmptySubRanges(); } - LI.valnos.resize(j); + + // Distribute main liverange. + DistributeRange(LI, LIV, EqClass); } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index c00b010e763b..9451d92bd7ae 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -32,7 +32,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -48,7 +47,7 @@ char LiveIntervals::ID = 0; char &llvm::LiveIntervalsID = LiveIntervals::ID; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveVariables) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) @@ -76,8 +75,8 @@ cl::opt UseSegmentSetForPhysRegs( void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); // LiveVariables isn't really required by this analysis, it is only required // here to make sure it is live during TwoAddressInstructionPass and // PHIElimination. This is temporary. @@ -124,7 +123,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { MRI = &MF->getRegInfo(); TRI = MF->getSubtarget().getRegisterInfo(); TII = MF->getSubtarget().getInstrInfo(); - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); Indexes = &getAnalysis(); DomTree = &getAnalysis(); @@ -198,9 +197,16 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) { void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LRCalc && "LRCalc not initialized."); assert(LI.empty() && "Should only compute empty intervals."); + bool ShouldTrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(LI.reg); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); - LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg)); - computeDeadValues(LI, nullptr); + LRCalc->calculate(LI, ShouldTrackSubRegLiveness); + bool SeparatedComponents = computeDeadValues(LI, nullptr); + if (SeparatedComponents) { + assert(ShouldTrackSubRegLiveness + && "Separated components should only occur for unused subreg defs"); + SmallVector SplitLIs; + splitSeparateComponents(LI, SplitLIs); + } } void LiveIntervals::computeVirtRegs() { @@ -216,19 +222,31 @@ void LiveIntervals::computeRegMasks() { RegMaskBlocks.resize(MF->getNumBlockIDs()); // Find all instructions with regmask operands. - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = MBBI; - std::pair &RMB = RegMaskBlocks[MBB->getNumber()]; + for (MachineBasicBlock &MBB : *MF) { + std::pair &RMB = RegMaskBlocks[MBB.getNumber()]; RMB.first = RegMaskSlots.size(); - for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end(); - MI != ME; ++MI) - for (const MachineOperand &MO : MI->operands()) { + + // Some block starts, such as EH funclets, create masks. + if (const uint32_t *Mask = MBB.getBeginClobberMask(TRI)) { + RegMaskSlots.push_back(Indexes->getMBBStartIdx(&MBB)); + RegMaskBits.push_back(Mask); + } + + for (MachineInstr &MI : MBB) { + for (const MachineOperand &MO : MI.operands()) { if (!MO.isRegMask()) continue; - RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot()); - RegMaskBits.push_back(MO.getRegMask()); + RegMaskSlots.push_back(Indexes->getInstructionIndex(&MI).getRegSlot()); + RegMaskBits.push_back(MO.getRegMask()); } + } + + // Some block ends, such as funclet returns, create masks. + if (const uint32_t *Mask = MBB.getEndClobberMask(TRI)) { + RegMaskSlots.push_back(Indexes->getMBBEndIdx(&MBB)); + RegMaskBits.push_back(Mask); + } + // Compute the number of register mask instructions in this block. RMB.second = RegMaskSlots.size() - RMB.first; } @@ -296,18 +314,17 @@ void LiveIntervals::computeLiveInRegUnits() { // Check all basic blocks for live-ins. for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { - const MachineBasicBlock *MBB = MFI; + const MachineBasicBlock *MBB = &*MFI; // We only care about ABI blocks: Entry + landing pads. - if ((MFI != MF->begin() && !MBB->isLandingPad()) || MBB->livein_empty()) + if ((MFI != MF->begin() && !MBB->isEHPad()) || MBB->livein_empty()) continue; // Create phi-defs at Begin for all live-in registers. SlotIndex Begin = Indexes->getMBBStartIdx(MBB); DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber()); - for (MachineBasicBlock::livein_iterator LII = MBB->livein_begin(), - LIE = MBB->livein_end(); LII != LIE; ++LII) { - for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) { + for (const auto &LI : MBB->liveins()) { + for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) { unsigned Unit = *Units; LiveRange *LR = RegUnitRanges[Unit]; if (!LR) { @@ -396,9 +413,6 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes, } } -/// shrinkToUses - After removing some uses of a register, shrink its live -/// range to just the remaining uses. This method does not compute reaching -/// defs for new uses, and it doesn't remove dead defs. bool LiveIntervals::shrinkToUses(LiveInterval *li, SmallVectorImpl *dead) { DEBUG(dbgs() << "Shrink: " << *li << '\n'); @@ -406,9 +420,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, && "Can only shrink virtual registers"); // Shrink subregister live ranges. + bool NeedsCleanup = false; for (LiveInterval::SubRange &S : li->subranges()) { shrinkToUses(S, li->reg); + if (S.empty()) + NeedsCleanup = true; } + if (NeedsCleanup) + li->removeEmptySubRanges(); // Find all the values used, including PHI kills. ShrinkToUsesWorkList WorkList; @@ -456,7 +475,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, bool LiveIntervals::computeDeadValues(LiveInterval &LI, SmallVectorImpl *dead) { - bool PHIRemoved = false; + bool MayHaveSplitComponents = false; for (auto VNI : LI.valnos) { if (VNI->isUnused()) continue; @@ -466,10 +485,13 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, // Is the register live before? Otherwise we may have to add a read-undef // flag for subregister defs. - if (MRI->shouldTrackSubRegLiveness(LI.reg)) { + bool DeadBeforeDef = false; + unsigned VReg = LI.reg; + if (MRI->shouldTrackSubRegLiveness(VReg)) { if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) { MachineInstr *MI = getInstructionFromIndex(Def); - MI->addRegisterDefReadUndef(LI.reg); + MI->setRegisterDefReadUndef(VReg); + DeadBeforeDef = true; } } @@ -480,19 +502,27 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, VNI->markUnused(); LI.removeSegment(I); DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n"); - PHIRemoved = true; + MayHaveSplitComponents = true; } else { // This is a dead def. Make sure the instruction knows. MachineInstr *MI = getInstructionFromIndex(Def); assert(MI && "No instruction defining live value"); - MI->addRegisterDead(LI.reg, TRI); + MI->addRegisterDead(VReg, TRI); + + // If we have a dead def that is completely separate from the rest of + // the liverange then we rewrite it to use a different VReg to not violate + // the rule that the liveness of a virtual register forms a connected + // component. This should only happen if subregister liveness is tracked. + if (DeadBeforeDef) + MayHaveSplitComponents = true; + if (dead && MI->allDefsAreDead()) { DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI); dead->push_back(MI); } } } - return PHIRemoved; + return MayHaveSplitComponents; } void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) @@ -512,8 +542,8 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) // Maybe the operand is for a subregister we don't care about. unsigned SubReg = MO.getSubReg(); if (SubReg != 0) { - unsigned SubRegMask = TRI->getSubRegIndexLaneMask(SubReg); - if ((SubRegMask & SR.LaneMask) == 0) + LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg); + if ((LaneMask & SR.LaneMask) == 0) continue; } // We only need to visit each instruction once. @@ -712,7 +742,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // assign R0L to %vreg1, and R0 to %vreg2 because the low 32bits of R0 // are actually never written by %vreg2. After assignment the // flag at the read instruction is invalid. - unsigned DefinedLanesMask; + LaneBitmask DefinedLanesMask; if (!SRs.empty()) { // Compute a mask of lanes that are defined. DefinedLanesMask = 0; @@ -736,7 +766,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { continue; if (MO.isUse()) { // Reading any undefined lanes? - unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); if ((UseMask & ~DefinedLanesMask) != 0) goto CancelKill; } else if (MO.getSubReg() == 0) { @@ -944,7 +974,7 @@ public: LiveInterval &LI = LIS.getInterval(Reg); if (LI.hasSubRanges()) { unsigned SubReg = MO.getSubReg(); - unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg); + LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg); for (LiveInterval::SubRange &S : LI.subranges()) { if ((S.LaneMask & LaneMask) == 0) continue; @@ -968,7 +998,7 @@ public: private: /// Update a single live range, assuming an instruction has been moved from /// OldIdx to NewIdx. - void updateRange(LiveRange &LR, unsigned Reg, unsigned LaneMask) { + void updateRange(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) { if (!Updated.insert(&LR).second) return; DEBUG({ @@ -976,7 +1006,7 @@ private: if (TargetRegisterInfo::isVirtualRegister(Reg)) { dbgs() << PrintReg(Reg); if (LaneMask != 0) - dbgs() << format(" L%04X", LaneMask); + dbgs() << " L" << PrintLaneMask(LaneMask); } else { dbgs() << PrintRegUnit(Reg, &TRI); } @@ -1098,7 +1128,7 @@ private: /// Hoist kill to NewIdx, then scan for last kill between NewIdx and /// OldIdx. /// - void handleMoveUp(LiveRange &LR, unsigned Reg, unsigned LaneMask) { + void handleMoveUp(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) { // First look for a kill at OldIdx. LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); LiveRange::iterator E = LR.end(); @@ -1175,7 +1205,7 @@ private: } // Return the last use of reg between NewIdx and OldIdx. - SlotIndex findLastUseBefore(unsigned Reg, unsigned LaneMask) { + SlotIndex findLastUseBefore(unsigned Reg, LaneBitmask LaneMask) { if (TargetRegisterInfo::isVirtualRegister(Reg)) { SlotIndex LastUse = NewIdx; @@ -1255,7 +1285,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, const MachineBasicBlock::iterator End, const SlotIndex endIdx, LiveRange &LR, const unsigned Reg, - const unsigned LaneMask) { + LaneBitmask LaneMask) { LiveInterval::iterator LII = LR.find(endIdx); SlotIndex lastUseIdx; if (LII != LR.end() && LII->start < endIdx) @@ -1282,7 +1312,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, continue; unsigned SubReg = MO.getSubReg(); - unsigned Mask = TRI->getSubRegIndexLaneMask(SubReg); + LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg); if ((Mask & LaneMask) == 0) continue; @@ -1412,3 +1442,20 @@ void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) { } LI.removeEmptySubRanges(); } + +void LiveIntervals::splitSeparateComponents(LiveInterval &LI, + SmallVectorImpl &SplitLIs) { + ConnectedVNInfoEqClasses ConEQ(*this); + unsigned NumComp = ConEQ.Classify(&LI); + if (NumComp <= 1) + return; + DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n'); + unsigned Reg = LI.reg; + const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); + for (unsigned I = 1; I < NumComp; ++I) { + unsigned NewVReg = MRI->createVirtualRegister(RegClass); + LiveInterval &NewLI = createEmptyInterval(NewVReg); + SplitLIs.push_back(&NewLI); + } + ConEQ.Distribute(LI, SplitLIs.data(), *MRI); +} diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp index cbd98e3f3450..efbbcbe23e15 100644 --- a/lib/CodeGen/LivePhysRegs.cpp +++ b/lib/CodeGen/LivePhysRegs.cpp @@ -68,7 +68,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) { /// Simulates liveness when stepping forward over an instruction(bundle): Remove /// killed-uses, add defs. This is the not recommended way, because it depends -/// on accurate kill flags. If possible use stepBackwards() instead of this +/// on accurate kill flags. If possible use stepBackward() instead of this /// function. void LivePhysRegs::stepForward(const MachineInstr &MI, SmallVectorImpl> &Clobbers) { @@ -128,8 +128,8 @@ void LivePhysRegs::dump() const { /// Add live-in registers of basic block \p MBB to \p LiveRegs. static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) { - for (unsigned Reg : make_range(MBB.livein_begin(), MBB.livein_end())) - LiveRegs.addReg(Reg); + for (const auto &LI : MBB.liveins()) + LiveRegs.addReg(LI.PhysReg); } /// Add pristine registers to the given \p LiveRegs. This function removes @@ -147,11 +147,19 @@ static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF, } void LivePhysRegs::addLiveOuts(const MachineBasicBlock *MBB, - bool AddPristines) { - if (AddPristines) { + bool AddPristinesAndCSRs) { + if (AddPristinesAndCSRs) { const MachineFunction &MF = *MBB->getParent(); addPristines(*this, MF, *TRI); + if (!MBB->isReturnBlock()) { + // The return block has no successors whose live-ins we could merge + // below. So instead we add the callee saved registers manually. + for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) + addReg(*I); + } } + + // To get the live-outs we simply merge the live-ins of all successors. for (const MachineBasicBlock *Succ : MBB->successors()) ::addLiveIns(*this, *Succ); } diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index bb2877ae31a8..c408615d42e2 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -64,23 +64,23 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { unsigned SubReg = MO.getSubReg(); if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) { - unsigned Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg) - : MRI->getMaxLaneMaskForVReg(Reg); + LaneBitmask Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg) + : MRI->getMaxLaneMaskForVReg(Reg); // If this is the first time we see a subregister def, initialize // subranges by creating a copy of the main range. if (!LI.hasSubRanges() && !LI.empty()) { - unsigned ClassMask = MRI->getMaxLaneMaskForVReg(Reg); + LaneBitmask ClassMask = MRI->getMaxLaneMaskForVReg(Reg); LI.createSubRangeFrom(*Alloc, ClassMask, LI); } for (LiveInterval::SubRange &S : LI.subranges()) { // A Mask for subregs common to the existing subrange and current def. - unsigned Common = S.LaneMask & Mask; + LaneBitmask Common = S.LaneMask & Mask; if (Common == 0) continue; // A Mask for subregs covered by the subrange but not the current def. - unsigned LRest = S.LaneMask & ~Mask; + LaneBitmask LRest = S.LaneMask & ~Mask; LiveInterval::SubRange *CommonRange; if (LRest != 0) { // Split current subrange into Common and LRest ranges. @@ -138,7 +138,8 @@ void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { } -void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, unsigned Mask) { +void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, + LaneBitmask Mask) { // Visit all operands that read Reg. This may include partial defs. const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { @@ -157,7 +158,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, unsigned Mask) { continue; unsigned SubReg = MO.getSubReg(); if (SubReg != 0) { - unsigned SubRegMask = TRI.getSubRegIndexLaneMask(SubReg); + LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg); // Ignore uses not covering the current subrange. if ((SubRegMask & Mask) == 0) continue; diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index 34d99534834b..ff38c68820f1 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -129,7 +129,7 @@ class LiveRangeCalc { /// /// All uses must be jointly dominated by existing liveness. PHI-defs are /// inserted as needed to preserve SSA form. - void extendToUses(LiveRange &LR, unsigned Reg, unsigned LaneMask); + void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask); /// Reset Map and Seen fields. void resetLiveOutMap(); diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 08bbe0c3f379..5ce364ae661e 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -226,7 +226,7 @@ bool LiveRangeEdit::useIsKill(const LiveInterval &LI, return true; const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); unsigned SubReg = MO.getSubReg(); - unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg); + LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg); for (const LiveInterval::SubRange &S : LI.subranges()) { if ((S.LaneMask & LaneMask) != 0 && S.Query(Idx).isKill()) return true; @@ -349,8 +349,9 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, ToShrink.pop_back(); if (foldAsLoad(LI, Dead)) continue; + unsigned VReg = LI->reg; if (TheDelegate) - TheDelegate->LRE_WillShrinkVirtReg(LI->reg); + TheDelegate->LRE_WillShrinkVirtReg(VReg); if (!LIS.shrinkToUses(LI, &Dead)) continue; @@ -360,7 +361,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, // them results in incorrect code. bool BeingSpilled = false; for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) { - if (LI->reg == RegsBeingSpilled[i]) { + if (VReg == RegsBeingSpilled[i]) { BeingSpilled = true; break; } @@ -370,29 +371,21 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, // LI may have been separated, create new intervals. LI->RenumberValues(); - ConnectedVNInfoEqClasses ConEQ(LIS); - unsigned NumComp = ConEQ.Classify(LI); - if (NumComp <= 1) - continue; - ++NumFracRanges; - bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg; - DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); - SmallVector Dups(1, LI); - for (unsigned i = 1; i != NumComp; ++i) { - Dups.push_back(&createEmptyIntervalFrom(LI->reg)); + SmallVector SplitLIs; + LIS.splitSeparateComponents(*LI, SplitLIs); + if (!SplitLIs.empty()) + ++NumFracRanges; + + unsigned Original = VRM ? VRM->getOriginal(VReg) : 0; + for (const LiveInterval *SplitLI : SplitLIs) { // If LI is an original interval that hasn't been split yet, make the new // intervals their own originals instead of referring to LI. The original // interval must contain all the split products, and LI doesn't. - if (IsOriginal) - VRM->setIsSplitFromReg(Dups.back()->reg, 0); + if (Original != VReg && Original != 0) + VRM->setIsSplitFromReg(SplitLI->reg, Original); if (TheDelegate) - TheDelegate->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); + TheDelegate->LRE_DidCloneVirtReg(SplitLI->reg, VReg); } - ConEQ.Distribute(&Dups[0], MRI); - DEBUG({ - for (unsigned i = 0; i != NumComp; ++i) - dbgs() << '\t' << *Dups[i] << '\n'; - }); } } @@ -411,7 +404,7 @@ void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, const MachineLoopInfo &Loops, const MachineBlockFrequencyInfo &MBFI) { - VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI); + VirtRegAuxInfo VRAI(MF, LIS, VRM, Loops, MBFI); for (unsigned I = 0, Size = size(); I < Size; ++I) { LiveInterval &LI = LIS.getInterval(get(I)); if (MRI.recomputeRegClass(LI.reg)) diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index 9ea031d38d29..7ee87c1e650f 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -15,12 +15,11 @@ #include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -49,7 +48,6 @@ void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const { bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getSubtarget().getRegisterInfo(); - MRI = &MF.getRegInfo(); LIS = &getAnalysis(); VRM = &getAnalysis(); @@ -78,7 +76,7 @@ bool foreachUnit(const TargetRegisterInfo *TRI, LiveInterval &VRegInterval, if (VRegInterval.hasSubRanges()) { for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { unsigned Unit = (*Units).first; - unsigned Mask = (*Units).second; + LaneBitmask Mask = (*Units).second; for (LiveInterval::SubRange &S : VRegInterval.subranges()) { if (S.LaneMask & Mask) { if (Func(Unit, S)) @@ -101,7 +99,6 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { << " to " << PrintReg(PhysReg, TRI) << ':'); assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); VRM->assignVirt2Phys(VirtReg.reg, PhysReg); - MRI->setPhysRegUsed(PhysReg); foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) { diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index b355393e76f7..06b86d82daf1 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -522,11 +522,15 @@ void LiveVariables::runOnInstr(MachineInstr *MI, continue; unsigned MOReg = MO.getReg(); if (MO.isUse()) { - MO.setIsKill(false); + if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) && + MRI->isReserved(MOReg))) + MO.setIsKill(false); if (MO.readsReg()) UseRegs.push_back(MOReg); } else /*MO.isDef()*/ { - MO.setIsDead(false); + if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) && + MRI->isReserved(MOReg))) + MO.setIsDead(false); DefRegs.push_back(MOReg); } } @@ -559,11 +563,10 @@ void LiveVariables::runOnInstr(MachineInstr *MI, void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) { // Mark live-in registers as live-in. SmallVector Defs; - for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(), - EE = MBB->livein_end(); II != EE; ++II) { - assert(TargetRegisterInfo::isPhysicalRegister(*II) && + for (const auto &LI : MBB->liveins()) { + assert(TargetRegisterInfo::isPhysicalRegister(LI.PhysReg) && "Cannot have a live-in virtual register!"); - HandlePhysRegDef(*II, nullptr, Defs); + HandlePhysRegDef(LI.PhysReg, nullptr, Defs); } // Loop over all of the instructions, processing them. @@ -599,14 +602,12 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) { for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { MachineBasicBlock *SuccMBB = *SI; - if (SuccMBB->isLandingPad()) + if (SuccMBB->isEHPad()) continue; - for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(), - LE = SuccMBB->livein_end(); LI != LE; ++LI) { - unsigned LReg = *LI; - if (!TRI->isInAllocatableClass(LReg)) + for (const auto &LI : SuccMBB->liveins()) { + if (!TRI->isInAllocatableClass(LI.PhysReg)) // Ignore other live-ins, e.g. those that are live into landing pads. - LiveOuts.insert(LReg); + LiveOuts.insert(LI.PhysReg); } } @@ -640,7 +641,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // function. This guarantees that we will see the definition of a virtual // register before its uses due to dominance properties of SSA (except for PHI // nodes, which are treated as a special case). - MachineBasicBlock *Entry = MF->begin(); + MachineBasicBlock *Entry = &MF->front(); SmallPtrSet Visited; for (MachineBasicBlock *MBB : depth_first_ext(Entry, Visited)) { diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index 837842914b4c..eb60005764c0 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -325,7 +325,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // Sort the frame references by local offset array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end()); - MachineBasicBlock *Entry = Fn.begin(); + MachineBasicBlock *Entry = &Fn.front(); unsigned BaseReg = 0; int64_t BaseOffset = 0; diff --git a/lib/CodeGen/MIRParser/LLVMBuild.txt b/lib/CodeGen/MIRParser/LLVMBuild.txt index 04ae72290f93..2852124786e3 100644 --- a/lib/CodeGen/MIRParser/LLVMBuild.txt +++ b/lib/CodeGen/MIRParser/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = MIRParser parent = CodeGen -required_libraries = Core Support Target AsmParser CodeGen +required_libraries = AsmParser CodeGen Core MC Support Target diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp index 482c33ae2235..28f9d4e298f9 100644 --- a/lib/CodeGen/MIRParser/MILexer.cpp +++ b/lib/CodeGen/MIRParser/MILexer.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "MILexer.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include @@ -54,15 +55,132 @@ public: } // end anonymous namespace +MIToken &MIToken::reset(TokenKind Kind, StringRef Range) { + this->Kind = Kind; + this->Range = Range; + return *this; +} + +MIToken &MIToken::setStringValue(StringRef StrVal) { + StringValue = StrVal; + return *this; +} + +MIToken &MIToken::setOwnedStringValue(std::string StrVal) { + StringValueStorage = std::move(StrVal); + StringValue = StringValueStorage; + return *this; +} + +MIToken &MIToken::setIntegerValue(APSInt IntVal) { + this->IntVal = std::move(IntVal); + return *this; +} + /// Skip the leading whitespace characters and return the updated cursor. static Cursor skipWhitespace(Cursor C) { - while (isspace(C.peek())) + while (isblank(C.peek())) C.advance(); return C; } +static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; } + +/// Skip a line comment and return the updated cursor. +static Cursor skipComment(Cursor C) { + if (C.peek() != ';') + return C; + while (!isNewlineChar(C.peek()) && !C.isEOF()) + C.advance(); + return C; +} + +/// Return true if the given character satisfies the following regular +/// expression: [-a-zA-Z$._0-9] static bool isIdentifierChar(char C) { - return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.'; + return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || + C == '$'; +} + +/// Unescapes the given string value. +/// +/// Expects the string value to be quoted. +static std::string unescapeQuotedString(StringRef Value) { + assert(Value.front() == '"' && Value.back() == '"'); + Cursor C = Cursor(Value.substr(1, Value.size() - 2)); + + std::string Str; + Str.reserve(C.remaining().size()); + while (!C.isEOF()) { + char Char = C.peek(); + if (Char == '\\') { + if (C.peek(1) == '\\') { + // Two '\' become one + Str += '\\'; + C.advance(2); + continue; + } + if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) { + Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2)); + C.advance(3); + continue; + } + } + Str += Char; + C.advance(); + } + return Str; +} + +/// Lex a string constant using the following regular expression: \"[^\"]*\" +static Cursor lexStringConstant( + Cursor C, + function_ref ErrorCallback) { + assert(C.peek() == '"'); + for (C.advance(); C.peek() != '"'; C.advance()) { + if (C.isEOF() || isNewlineChar(C.peek())) { + ErrorCallback( + C.location(), + "end of machine instruction reached before the closing '\"'"); + return None; + } + } + C.advance(); + return C; +} + +static Cursor lexName( + Cursor C, MIToken &Token, MIToken::TokenKind Type, unsigned PrefixLength, + function_ref ErrorCallback) { + auto Range = C; + C.advance(PrefixLength); + if (C.peek() == '"') { + if (Cursor R = lexStringConstant(C, ErrorCallback)) { + StringRef String = Range.upto(R); + Token.reset(Type, String) + .setOwnedStringValue( + unescapeQuotedString(String.drop_front(PrefixLength))); + return R; + } + Token.reset(MIToken::Error, Range.remaining()); + return Range; + } + while (isIdentifierChar(C.peek())) + C.advance(); + Token.reset(Type, Range.upto(C)) + .setStringValue(Range.upto(C).drop_front(PrefixLength)); + return C; +} + +static Cursor maybeLexIntegerType(Cursor C, MIToken &Token) { + if (C.peek() != 'i' || !isdigit(C.peek(1))) + return None; + auto Range = C; + C.advance(); // Skip 'i' + while (isdigit(C.peek())) + C.advance(); + Token.reset(MIToken::IntegerType, Range.upto(C)); + return C; } static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { @@ -70,32 +188,70 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("_", MIToken::underscore) .Case("implicit", MIToken::kw_implicit) .Case("implicit-def", MIToken::kw_implicit_define) + .Case("def", MIToken::kw_def) .Case("dead", MIToken::kw_dead) .Case("killed", MIToken::kw_killed) .Case("undef", MIToken::kw_undef) + .Case("internal", MIToken::kw_internal) + .Case("early-clobber", MIToken::kw_early_clobber) + .Case("debug-use", MIToken::kw_debug_use) + .Case("tied-def", MIToken::kw_tied_def) + .Case("frame-setup", MIToken::kw_frame_setup) + .Case("debug-location", MIToken::kw_debug_location) + .Case(".cfi_same_value", MIToken::kw_cfi_same_value) + .Case(".cfi_offset", MIToken::kw_cfi_offset) + .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register) + .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) + .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa) + .Case("blockaddress", MIToken::kw_blockaddress) + .Case("target-index", MIToken::kw_target_index) + .Case("half", MIToken::kw_half) + .Case("float", MIToken::kw_float) + .Case("double", MIToken::kw_double) + .Case("x86_fp80", MIToken::kw_x86_fp80) + .Case("fp128", MIToken::kw_fp128) + .Case("ppc_fp128", MIToken::kw_ppc_fp128) + .Case("target-flags", MIToken::kw_target_flags) + .Case("volatile", MIToken::kw_volatile) + .Case("non-temporal", MIToken::kw_non_temporal) + .Case("invariant", MIToken::kw_invariant) + .Case("align", MIToken::kw_align) + .Case("stack", MIToken::kw_stack) + .Case("got", MIToken::kw_got) + .Case("jump-table", MIToken::kw_jump_table) + .Case("constant-pool", MIToken::kw_constant_pool) + .Case("call-entry", MIToken::kw_call_entry) + .Case("liveout", MIToken::kw_liveout) + .Case("address-taken", MIToken::kw_address_taken) + .Case("landing-pad", MIToken::kw_landing_pad) + .Case("liveins", MIToken::kw_liveins) + .Case("successors", MIToken::kw_successors) .Default(MIToken::Identifier); } static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { - if (!isalpha(C.peek()) && C.peek() != '_') + if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.') return None; auto Range = C; while (isIdentifierChar(C.peek())) C.advance(); auto Identifier = Range.upto(C); - Token = MIToken(getIdentifierKind(Identifier), Identifier); + Token.reset(getIdentifierKind(Identifier), Identifier) + .setStringValue(Identifier); return C; } static Cursor maybeLexMachineBasicBlock( Cursor C, MIToken &Token, function_ref ErrorCallback) { - if (!C.remaining().startswith("%bb.")) + bool IsReference = C.remaining().startswith("%bb."); + if (!IsReference && !C.remaining().startswith("bb.")) return None; auto Range = C; - C.advance(4); // Skip '%bb.' + unsigned PrefixLength = IsReference ? 4 : 3; + C.advance(PrefixLength); // Skip '%bb.' or 'bb.' if (!isdigit(C.peek())) { - Token = MIToken(MIToken::Error, C.remaining()); + Token.reset(MIToken::Error, C.remaining()); ErrorCallback(C.location(), "expected a number after '%bb.'"); return C; } @@ -103,26 +259,103 @@ static Cursor maybeLexMachineBasicBlock( while (isdigit(C.peek())) C.advance(); StringRef Number = NumberRange.upto(C); - unsigned StringOffset = 4 + Number.size(); // Drop '%bb.' + unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.' if (C.peek() == '.') { C.advance(); // Skip '.' ++StringOffset; while (isIdentifierChar(C.peek())) C.advance(); } - Token = MIToken(MIToken::MachineBasicBlock, Range.upto(C), APSInt(Number), - StringOffset); + Token.reset(IsReference ? MIToken::MachineBasicBlock + : MIToken::MachineBasicBlockLabel, + Range.upto(C)) + .setIntegerValue(APSInt(Number)) + .setStringValue(Range.upto(C).drop_front(StringOffset)); return C; } +static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, + MIToken::TokenKind Kind) { + if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) + return None; + auto Range = C; + C.advance(Rule.size()); + auto NumberRange = C; + while (isdigit(C.peek())) + C.advance(); + Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C))); + return C; +} + +static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, + MIToken::TokenKind Kind) { + if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) + return None; + auto Range = C; + C.advance(Rule.size()); + auto NumberRange = C; + while (isdigit(C.peek())) + C.advance(); + StringRef Number = NumberRange.upto(C); + unsigned StringOffset = Rule.size() + Number.size(); + if (C.peek() == '.') { + C.advance(); + ++StringOffset; + while (isIdentifierChar(C.peek())) + C.advance(); + } + Token.reset(Kind, Range.upto(C)) + .setIntegerValue(APSInt(Number)) + .setStringValue(Range.upto(C).drop_front(StringOffset)); + return C; +} + +static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { + return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex); +} + +static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { + return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject); +} + +static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { + return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject); +} + +static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { + return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem); +} + +static Cursor maybeLexIRBlock( + Cursor C, MIToken &Token, + function_ref ErrorCallback) { + const StringRef Rule = "%ir-block."; + if (!C.remaining().startswith(Rule)) + return None; + if (isdigit(C.peek(Rule.size()))) + return maybeLexIndex(C, Token, Rule, MIToken::IRBlock); + return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback); +} + +static Cursor maybeLexIRValue( + Cursor C, MIToken &Token, + function_ref ErrorCallback) { + const StringRef Rule = "%ir."; + if (!C.remaining().startswith(Rule)) + return None; + if (isdigit(C.peek(Rule.size()))) + return maybeLexIndex(C, Token, Rule, MIToken::IRValue); + return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); +} + static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { auto Range = C; C.advance(); // Skip '%' auto NumberRange = C; while (isdigit(C.peek())) C.advance(); - Token = MIToken(MIToken::VirtualRegister, Range.upto(C), - APSInt(NumberRange.upto(C))); + Token.reset(MIToken::VirtualRegister, Range.upto(C)) + .setIntegerValue(APSInt(NumberRange.upto(C))); return C; } @@ -135,41 +368,112 @@ static Cursor maybeLexRegister(Cursor C, MIToken &Token) { C.advance(); // Skip '%' while (isIdentifierChar(C.peek())) C.advance(); - Token = MIToken(MIToken::NamedRegister, Range.upto(C), - /*StringOffset=*/1); // Drop the '%' + Token.reset(MIToken::NamedRegister, Range.upto(C)) + .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' return C; } -static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token) { +static Cursor maybeLexGlobalValue( + Cursor C, MIToken &Token, + function_ref ErrorCallback) { if (C.peek() != '@') return None; + if (!isdigit(C.peek(1))) + return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1, + ErrorCallback); auto Range = C; - C.advance(); // Skip the '@' - // TODO: add support for quoted names. - if (!isdigit(C.peek())) { - while (isIdentifierChar(C.peek())) - C.advance(); - Token = MIToken(MIToken::NamedGlobalValue, Range.upto(C), - /*StringOffset=*/1); // Drop the '@' - return C; - } + C.advance(1); // Skip the '@' auto NumberRange = C; while (isdigit(C.peek())) C.advance(); - Token = - MIToken(MIToken::GlobalValue, Range.upto(C), APSInt(NumberRange.upto(C))); + Token.reset(MIToken::GlobalValue, Range.upto(C)) + .setIntegerValue(APSInt(NumberRange.upto(C))); return C; } -static Cursor maybeLexIntegerLiteral(Cursor C, MIToken &Token) { +static Cursor maybeLexExternalSymbol( + Cursor C, MIToken &Token, + function_ref ErrorCallback) { + if (C.peek() != '$') + return None; + return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, + ErrorCallback); +} + +static bool isValidHexFloatingPointPrefix(char C) { + return C == 'H' || C == 'K' || C == 'L' || C == 'M'; +} + +static Cursor maybeLexHexFloatingPointLiteral(Cursor C, MIToken &Token) { + if (C.peek() != '0' || C.peek(1) != 'x') + return None; + Cursor Range = C; + C.advance(2); // Skip '0x' + if (isValidHexFloatingPointPrefix(C.peek())) + C.advance(); + while (isxdigit(C.peek())) + C.advance(); + Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); + return C; +} + +static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { + C.advance(); + // Skip over [0-9]*([eE][-+]?[0-9]+)? + while (isdigit(C.peek())) + C.advance(); + if ((C.peek() == 'e' || C.peek() == 'E') && + (isdigit(C.peek(1)) || + ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) { + C.advance(2); + while (isdigit(C.peek())) + C.advance(); + } + Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); + return C; +} + +static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) return None; auto Range = C; C.advance(); while (isdigit(C.peek())) C.advance(); + if (C.peek() == '.') + return lexFloatingPointLiteral(Range, C, Token); StringRef StrVal = Range.upto(C); - Token = MIToken(MIToken::IntegerLiteral, StrVal, APSInt(StrVal)); + Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal)); + return C; +} + +static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { + return StringSwitch(Identifier) + .Case("!tbaa", MIToken::md_tbaa) + .Case("!alias.scope", MIToken::md_alias_scope) + .Case("!noalias", MIToken::md_noalias) + .Case("!range", MIToken::md_range) + .Default(MIToken::Error); +} + +static Cursor maybeLexExlaim( + Cursor C, MIToken &Token, + function_ref ErrorCallback) { + if (C.peek() != '!') + return None; + auto Range = C; + C.advance(1); + if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) { + Token.reset(MIToken::exclaim, Range.upto(C)); + return C; + } + while (isIdentifierChar(C.peek())) + C.advance(); + StringRef StrVal = Range.upto(C); + Token.reset(getMetadataKeywordKind(StrVal), StrVal); + if (Token.isError()) + ErrorCallback(Token.location(), + "use of unknown metadata keyword '" + StrVal + "'"); return C; } @@ -181,44 +485,119 @@ static MIToken::TokenKind symbolToken(char C) { return MIToken::equal; case ':': return MIToken::colon; + case '(': + return MIToken::lparen; + case ')': + return MIToken::rparen; + case '{': + return MIToken::lbrace; + case '}': + return MIToken::rbrace; + case '+': + return MIToken::plus; + case '-': + return MIToken::minus; default: return MIToken::Error; } } static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { - auto Kind = symbolToken(C.peek()); + MIToken::TokenKind Kind; + unsigned Length = 1; + if (C.peek() == ':' && C.peek(1) == ':') { + Kind = MIToken::coloncolon; + Length = 2; + } else + Kind = symbolToken(C.peek()); if (Kind == MIToken::Error) return None; auto Range = C; + C.advance(Length); + Token.reset(Kind, Range.upto(C)); + return C; +} + +static Cursor maybeLexNewline(Cursor C, MIToken &Token) { + if (!isNewlineChar(C.peek())) + return None; + auto Range = C; C.advance(); - Token = MIToken(Kind, Range.upto(C)); + Token.reset(MIToken::Newline, Range.upto(C)); + return C; +} + +static Cursor maybeLexEscapedIRValue( + Cursor C, MIToken &Token, + function_ref ErrorCallback) { + if (C.peek() != '`') + return None; + auto Range = C; + C.advance(); + auto StrRange = C; + while (C.peek() != '`') { + if (C.isEOF() || isNewlineChar(C.peek())) { + ErrorCallback( + C.location(), + "end of machine instruction reached before the closing '`'"); + Token.reset(MIToken::Error, Range.remaining()); + return C; + } + C.advance(); + } + StringRef Value = StrRange.upto(C); + C.advance(); + Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value); return C; } StringRef llvm::lexMIToken( StringRef Source, MIToken &Token, function_ref ErrorCallback) { - auto C = skipWhitespace(Cursor(Source)); + auto C = skipComment(skipWhitespace(Cursor(Source))); if (C.isEOF()) { - Token = MIToken(MIToken::Eof, C.remaining()); + Token.reset(MIToken::Eof, C.remaining()); return C.remaining(); } - if (Cursor R = maybeLexIdentifier(C, Token)) + if (Cursor R = maybeLexIntegerType(C, Token)) return R.remaining(); if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) return R.remaining(); + if (Cursor R = maybeLexIdentifier(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexJumpTableIndex(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexStackObject(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexFixedStackObject(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexConstantPoolItem(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) + return R.remaining(); if (Cursor R = maybeLexRegister(C, Token)) return R.remaining(); - if (Cursor R = maybeLexGlobalValue(C, Token)) + if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) return R.remaining(); - if (Cursor R = maybeLexIntegerLiteral(C, Token)) + if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexHexFloatingPointLiteral(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexNumericalLiteral(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback)) return R.remaining(); if (Cursor R = maybeLexSymbol(C, Token)) return R.remaining(); + if (Cursor R = maybeLexNewline(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) + return R.remaining(); - Token = MIToken(MIToken::Error, C.remaining()); + Token.reset(MIToken::Error, C.remaining()); ErrorCallback(C.location(), Twine("unexpected character '") + Twine(C.peek()) + "'"); return C.remaining(); diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h index 55460b56e7d6..ff54aa3554d8 100644 --- a/lib/CodeGen/MIRParser/MILexer.h +++ b/lib/CodeGen/MIRParser/MILexer.h @@ -30,50 +30,119 @@ struct MIToken { // Markers Eof, Error, + Newline, // Tokens with no info. comma, equal, underscore, colon, + coloncolon, + exclaim, + lparen, + rparen, + lbrace, + rbrace, + plus, + minus, // Keywords kw_implicit, kw_implicit_define, + kw_def, kw_dead, kw_killed, kw_undef, + kw_internal, + kw_early_clobber, + kw_debug_use, + kw_tied_def, + kw_frame_setup, + kw_debug_location, + kw_cfi_same_value, + kw_cfi_offset, + kw_cfi_def_cfa_register, + kw_cfi_def_cfa_offset, + kw_cfi_def_cfa, + kw_blockaddress, + kw_target_index, + kw_half, + kw_float, + kw_double, + kw_x86_fp80, + kw_fp128, + kw_ppc_fp128, + kw_target_flags, + kw_volatile, + kw_non_temporal, + kw_invariant, + kw_align, + kw_stack, + kw_got, + kw_jump_table, + kw_constant_pool, + kw_call_entry, + kw_liveout, + kw_address_taken, + kw_landing_pad, + kw_liveins, + kw_successors, + + // Named metadata keywords + md_tbaa, + md_alias_scope, + md_noalias, + md_range, // Identifier tokens Identifier, + IntegerType, NamedRegister, + MachineBasicBlockLabel, MachineBasicBlock, + StackObject, + FixedStackObject, NamedGlobalValue, GlobalValue, + ExternalSymbol, // Other tokens IntegerLiteral, - VirtualRegister + FloatingPointLiteral, + VirtualRegister, + ConstantPoolItem, + JumpTableIndex, + NamedIRBlock, + IRBlock, + NamedIRValue, + IRValue, + QuotedIRValue // `` }; private: TokenKind Kind; - unsigned StringOffset; StringRef Range; + StringRef StringValue; + std::string StringValueStorage; APSInt IntVal; public: - MIToken(TokenKind Kind, StringRef Range, unsigned StringOffset = 0) - : Kind(Kind), StringOffset(StringOffset), Range(Range) {} + MIToken() : Kind(Error) {} - MIToken(TokenKind Kind, StringRef Range, const APSInt &IntVal, - unsigned StringOffset = 0) - : Kind(Kind), StringOffset(StringOffset), Range(Range), IntVal(IntVal) {} + MIToken &reset(TokenKind Kind, StringRef Range); + + MIToken &setStringValue(StringRef StrVal); + MIToken &setOwnedStringValue(std::string StrVal); + MIToken &setIntegerValue(APSInt IntVal); TokenKind kind() const { return Kind; } bool isError() const { return Kind == Error; } + bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; } + + bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; } + bool isRegister() const { return Kind == NamedRegister || Kind == underscore || Kind == VirtualRegister; @@ -81,7 +150,14 @@ public: bool isRegisterFlag() const { return Kind == kw_implicit || Kind == kw_implicit_define || - Kind == kw_dead || Kind == kw_killed || Kind == kw_undef; + Kind == kw_def || Kind == kw_dead || Kind == kw_killed || + Kind == kw_undef || Kind == kw_internal || + Kind == kw_early_clobber || Kind == kw_debug_use; + } + + bool isMemoryOperandFlag() const { + return Kind == kw_volatile || Kind == kw_non_temporal || + Kind == kw_invariant; } bool is(TokenKind K) const { return Kind == K; } @@ -90,13 +166,19 @@ public: StringRef::iterator location() const { return Range.begin(); } - StringRef stringValue() const { return Range.drop_front(StringOffset); } + StringRef range() const { return Range; } + + /// Return the token's string value. + StringRef stringValue() const { return StringValue; } const APSInt &integerValue() const { return IntVal; } bool hasIntegerValue() const { return Kind == IntegerLiteral || Kind == MachineBasicBlock || - Kind == GlobalValue || Kind == VirtualRegister; + Kind == MachineBasicBlockLabel || Kind == StackObject || + Kind == FixedStackObject || Kind == GlobalValue || + Kind == VirtualRegister || Kind == ConstantPoolItem || + Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue; } }; diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index c00011288a60..f2f6584fb6c8 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -14,12 +14,20 @@ #include "MIParser.h" #include "MILexer.h" #include "llvm/ADT/StringMap.h" +#include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -30,15 +38,20 @@ using namespace llvm; namespace { /// A wrapper struct around the 'MachineOperand' struct that includes a source -/// range. -struct MachineOperandWithLocation { +/// range and other attributes. +struct ParsedMachineOperand { MachineOperand Operand; StringRef::iterator Begin; StringRef::iterator End; + Optional TiedDefIdx; - MachineOperandWithLocation(const MachineOperand &Operand, - StringRef::iterator Begin, StringRef::iterator End) - : Operand(Operand), Begin(Begin), End(End) {} + ParsedMachineOperand(const MachineOperand &Operand, StringRef::iterator Begin, + StringRef::iterator End, Optional &TiedDefIdx) + : Operand(Operand), Begin(Begin), End(End), TiedDefIdx(TiedDefIdx) { + if (TiedDefIdx) + assert(Operand.isReg() && Operand.isUse() && + "Only used register operands can be tied"); + } }; class MIParser { @@ -58,6 +71,16 @@ class MIParser { StringMap Names2RegMasks; /// Maps from subregister names to subregister indices. StringMap Names2SubRegIndices; + /// Maps from slot numbers to function's unnamed basic blocks. + DenseMap Slots2BasicBlocks; + /// Maps from slot numbers to function's unnamed values. + DenseMap Slots2Values; + /// Maps from target index names to target indices. + StringMap Names2TargetIndices; + /// Maps from direct target flag names to the direct target flag values. + StringMap Names2DirectTargetFlags; + /// Maps from direct target flag names to the bitmask target flag values. + StringMap Names2BitmaskTargetFlags; public: MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, @@ -76,19 +99,66 @@ public: /// This function always return true. bool error(StringRef::iterator Loc, const Twine &Msg); + bool + parseBasicBlockDefinitions(DenseMap &MBBSlots); + bool parseBasicBlocks(); bool parse(MachineInstr *&MI); - bool parseMBB(MachineBasicBlock *&MBB); - bool parseNamedRegister(unsigned &Reg); + bool parseStandaloneMBB(MachineBasicBlock *&MBB); + bool parseStandaloneNamedRegister(unsigned &Reg); + bool parseStandaloneVirtualRegister(unsigned &Reg); + bool parseStandaloneStackObject(int &FI); + bool parseStandaloneMDNode(MDNode *&Node); + + bool + parseBasicBlockDefinition(DenseMap &MBBSlots); + bool parseBasicBlock(MachineBasicBlock &MBB); + bool parseBasicBlockLiveins(MachineBasicBlock &MBB); + bool parseBasicBlockSuccessors(MachineBasicBlock &MBB); bool parseRegister(unsigned &Reg); bool parseRegisterFlag(unsigned &Flags); bool parseSubRegisterIndex(unsigned &SubReg); - bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false); + bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx); + bool parseRegisterOperand(MachineOperand &Dest, + Optional &TiedDefIdx, bool IsDef = false); bool parseImmediateOperand(MachineOperand &Dest); + bool parseIRConstant(StringRef::iterator Loc, StringRef Source, + const Constant *&C); + bool parseIRConstant(StringRef::iterator Loc, const Constant *&C); + bool parseTypedImmediateOperand(MachineOperand &Dest); + bool parseFPImmediateOperand(MachineOperand &Dest); bool parseMBBReference(MachineBasicBlock *&MBB); bool parseMBBOperand(MachineOperand &Dest); + bool parseStackFrameIndex(int &FI); + bool parseStackObjectOperand(MachineOperand &Dest); + bool parseFixedStackFrameIndex(int &FI); + bool parseFixedStackObjectOperand(MachineOperand &Dest); + bool parseGlobalValue(GlobalValue *&GV); bool parseGlobalAddressOperand(MachineOperand &Dest); - bool parseMachineOperand(MachineOperand &Dest); + bool parseConstantPoolIndexOperand(MachineOperand &Dest); + bool parseJumpTableIndexOperand(MachineOperand &Dest); + bool parseExternalSymbolOperand(MachineOperand &Dest); + bool parseMDNode(MDNode *&Node); + bool parseMetadataOperand(MachineOperand &Dest); + bool parseCFIOffset(int &Offset); + bool parseCFIRegister(unsigned &Reg); + bool parseCFIOperand(MachineOperand &Dest); + bool parseIRBlock(BasicBlock *&BB, const Function &F); + bool parseBlockAddressOperand(MachineOperand &Dest); + bool parseTargetIndexOperand(MachineOperand &Dest); + bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest); + bool parseMachineOperand(MachineOperand &Dest, + Optional &TiedDefIdx); + bool parseMachineOperandAndTargetFlags(MachineOperand &Dest, + Optional &TiedDefIdx); + bool parseOffset(int64_t &Offset); + bool parseAlignment(unsigned &Alignment); + bool parseOperandsOffset(MachineOperand &Op); + bool parseIRValue(const Value *&V); + bool parseMemoryOperandFlag(unsigned &Flags); + bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV); + bool parseMachinePointerInfo(MachinePointerInfo &Dest); + bool parseMachineMemoryOperand(MachineMemOperand *&Dest); private: /// Convert the integer literal in the current token into an unsigned integer. @@ -96,15 +166,31 @@ private: /// Return true if an error occurred. bool getUnsigned(unsigned &Result); + /// Convert the integer literal in the current token into an uint64. + /// + /// Return true if an error occurred. + bool getUint64(uint64_t &Result); + + /// If the current token is of the given kind, consume it and return false. + /// Otherwise report an error and return true. + bool expectAndConsume(MIToken::TokenKind TokenKind); + + /// If the current token is of the given kind, consume it and return true. + /// Otherwise return false. + bool consumeIfPresent(MIToken::TokenKind TokenKind); + void initNames2InstrOpCodes(); /// Try to convert an instruction name to an opcode. Return true if the /// instruction name is invalid. bool parseInstrName(StringRef InstrName, unsigned &OpCode); - bool parseInstruction(unsigned &OpCode); + bool parseInstruction(unsigned &OpCode, unsigned &Flags); - bool verifyImplicitOperands(ArrayRef Operands, + bool assignRegisterTies(MachineInstr &MI, + ArrayRef Operands); + + bool verifyImplicitOperands(ArrayRef Operands, const MCInstrDesc &MCID); void initNames2Regs(); @@ -126,6 +212,34 @@ private: /// /// Return 0 if the name isn't a subregister index class. unsigned getSubRegIndex(StringRef Name); + + const BasicBlock *getIRBlock(unsigned Slot); + const BasicBlock *getIRBlock(unsigned Slot, const Function &F); + + const Value *getIRValue(unsigned Slot); + + void initNames2TargetIndices(); + + /// Try to convert a name of target index to the corresponding target index. + /// + /// Return true if the name isn't a name of a target index. + bool getTargetIndex(StringRef Name, int &Index); + + void initNames2DirectTargetFlags(); + + /// Try to convert a name of a direct target flag to the corresponding + /// target flag. + /// + /// Return true if the name isn't a name of a direct flag. + bool getDirectTargetFlag(StringRef Name, unsigned &Flag); + + void initNames2BitmaskTargetFlags(); + + /// Try to convert a name of a bitmask target flag to the corresponding + /// target flag. + /// + /// Return true if the name isn't a name of a bitmask target flag. + bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag); }; } // end anonymous namespace @@ -134,7 +248,7 @@ MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, StringRef Source, const PerFunctionMIParsingState &PFS, const SlotMapping &IRSlots) : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source), - Token(MIToken::Error, StringRef()), PFS(PFS), IRSlots(IRSlots) {} + PFS(PFS), IRSlots(IRSlots) {} void MIParser::lex() { CurrentSource = lexMIToken( @@ -146,49 +260,378 @@ bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); } bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size())); - Error = SMDiagnostic( - SM, SMLoc(), - SM.getMemoryBuffer(SM.getMainFileID())->getBufferIdentifier(), 1, - Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), Source, None, None); + const MemoryBuffer &Buffer = *SM.getMemoryBuffer(SM.getMainFileID()); + if (Loc >= Buffer.getBufferStart() && Loc <= Buffer.getBufferEnd()) { + // Create an ordinary diagnostic when the source manager's buffer is the + // source string. + Error = SM.GetMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg); + return true; + } + // Create a diagnostic for a YAML string literal. + Error = SMDiagnostic(SM, SMLoc(), Buffer.getBufferIdentifier(), 1, + Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), + Source, None, None); return true; } -bool MIParser::parse(MachineInstr *&MI) { - lex(); +static const char *toString(MIToken::TokenKind TokenKind) { + switch (TokenKind) { + case MIToken::comma: + return "','"; + case MIToken::equal: + return "'='"; + case MIToken::colon: + return "':'"; + case MIToken::lparen: + return "'('"; + case MIToken::rparen: + return "')'"; + default: + return ""; + } +} - // Parse any register operands before '=' - // TODO: Allow parsing of multiple operands before '=' - MachineOperand MO = MachineOperand::CreateImm(0); - SmallVector Operands; - if (Token.isRegister() || Token.isRegisterFlag()) { - auto Loc = Token.location(); - if (parseRegisterOperand(MO, /*IsDef=*/true)) +bool MIParser::expectAndConsume(MIToken::TokenKind TokenKind) { + if (Token.isNot(TokenKind)) + return error(Twine("expected ") + toString(TokenKind)); + lex(); + return false; +} + +bool MIParser::consumeIfPresent(MIToken::TokenKind TokenKind) { + if (Token.isNot(TokenKind)) + return false; + lex(); + return true; +} + +bool MIParser::parseBasicBlockDefinition( + DenseMap &MBBSlots) { + assert(Token.is(MIToken::MachineBasicBlockLabel)); + unsigned ID = 0; + if (getUnsigned(ID)) + return true; + auto Loc = Token.location(); + auto Name = Token.stringValue(); + lex(); + bool HasAddressTaken = false; + bool IsLandingPad = false; + unsigned Alignment = 0; + BasicBlock *BB = nullptr; + if (consumeIfPresent(MIToken::lparen)) { + do { + // TODO: Report an error when multiple same attributes are specified. + switch (Token.kind()) { + case MIToken::kw_address_taken: + HasAddressTaken = true; + lex(); + break; + case MIToken::kw_landing_pad: + IsLandingPad = true; + lex(); + break; + case MIToken::kw_align: + if (parseAlignment(Alignment)) + return true; + break; + case MIToken::IRBlock: + // TODO: Report an error when both name and ir block are specified. + if (parseIRBlock(BB, *MF.getFunction())) + return true; + lex(); + break; + default: + break; + } + } while (consumeIfPresent(MIToken::comma)); + if (expectAndConsume(MIToken::rparen)) return true; - Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location())); - if (Token.isNot(MIToken::equal)) - return error("expected '='"); + } + if (expectAndConsume(MIToken::colon)) + return true; + + if (!Name.empty()) { + BB = dyn_cast_or_null( + MF.getFunction()->getValueSymbolTable().lookup(Name)); + if (!BB) + return error(Loc, Twine("basic block '") + Name + + "' is not defined in the function '" + + MF.getName() + "'"); + } + auto *MBB = MF.CreateMachineBasicBlock(BB); + MF.insert(MF.end(), MBB); + bool WasInserted = MBBSlots.insert(std::make_pair(ID, MBB)).second; + if (!WasInserted) + return error(Loc, Twine("redefinition of machine basic block with id #") + + Twine(ID)); + if (Alignment) + MBB->setAlignment(Alignment); + if (HasAddressTaken) + MBB->setHasAddressTaken(); + MBB->setIsEHPad(IsLandingPad); + return false; +} + +bool MIParser::parseBasicBlockDefinitions( + DenseMap &MBBSlots) { + lex(); + // Skip until the first machine basic block. + while (Token.is(MIToken::Newline)) + lex(); + if (Token.isErrorOrEOF()) + return Token.isError(); + if (Token.isNot(MIToken::MachineBasicBlockLabel)) + return error("expected a basic block definition before instructions"); + unsigned BraceDepth = 0; + do { + if (parseBasicBlockDefinition(MBBSlots)) + return true; + bool IsAfterNewline = false; + // Skip until the next machine basic block. + while (true) { + if ((Token.is(MIToken::MachineBasicBlockLabel) && IsAfterNewline) || + Token.isErrorOrEOF()) + break; + else if (Token.is(MIToken::MachineBasicBlockLabel)) + return error("basic block definition should be located at the start of " + "the line"); + else if (consumeIfPresent(MIToken::Newline)) { + IsAfterNewline = true; + continue; + } + IsAfterNewline = false; + if (Token.is(MIToken::lbrace)) + ++BraceDepth; + if (Token.is(MIToken::rbrace)) { + if (!BraceDepth) + return error("extraneous closing brace ('}')"); + --BraceDepth; + } + lex(); + } + // Verify that we closed all of the '{' at the end of a file or a block. + if (!Token.isError() && BraceDepth) + return error("expected '}'"); // FIXME: Report a note that shows '{'. + } while (!Token.isErrorOrEOF()); + return Token.isError(); +} + +bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) { + assert(Token.is(MIToken::kw_liveins)); + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (Token.isNewlineOrEOF()) // Allow an empty list of liveins. + return false; + do { + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a named register"); + unsigned Reg = 0; + if (parseRegister(Reg)) + return true; + MBB.addLiveIn(Reg); + lex(); + } while (consumeIfPresent(MIToken::comma)); + return false; +} + +bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) { + assert(Token.is(MIToken::kw_successors)); + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (Token.isNewlineOrEOF()) // Allow an empty list of successors. + return false; + do { + if (Token.isNot(MIToken::MachineBasicBlock)) + return error("expected a machine basic block reference"); + MachineBasicBlock *SuccMBB = nullptr; + if (parseMBBReference(SuccMBB)) + return true; + lex(); + unsigned Weight = 0; + if (consumeIfPresent(MIToken::lparen)) { + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal after '('"); + if (getUnsigned(Weight)) + return true; + lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + } + MBB.addSuccessor(SuccMBB, BranchProbability::getRaw(Weight)); + } while (consumeIfPresent(MIToken::comma)); + MBB.normalizeSuccProbs(); + return false; +} + +bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) { + // Skip the definition. + assert(Token.is(MIToken::MachineBasicBlockLabel)); + lex(); + if (consumeIfPresent(MIToken::lparen)) { + while (Token.isNot(MIToken::rparen) && !Token.isErrorOrEOF()) + lex(); + consumeIfPresent(MIToken::rparen); + } + consumeIfPresent(MIToken::colon); + + // Parse the liveins and successors. + // N.B: Multiple lists of successors and liveins are allowed and they're + // merged into one. + // Example: + // liveins: %edi + // liveins: %esi + // + // is equivalent to + // liveins: %edi, %esi + while (true) { + if (Token.is(MIToken::kw_successors)) { + if (parseBasicBlockSuccessors(MBB)) + return true; + } else if (Token.is(MIToken::kw_liveins)) { + if (parseBasicBlockLiveins(MBB)) + return true; + } else if (consumeIfPresent(MIToken::Newline)) { + continue; + } else + break; + if (!Token.isNewlineOrEOF()) + return error("expected line break at the end of a list"); lex(); } - unsigned OpCode; - if (Token.isError() || parseInstruction(OpCode)) + // Parse the instructions. + bool IsInBundle = false; + MachineInstr *PrevMI = nullptr; + while (true) { + if (Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof)) + return false; + else if (consumeIfPresent(MIToken::Newline)) + continue; + if (consumeIfPresent(MIToken::rbrace)) { + // The first parsing pass should verify that all closing '}' have an + // opening '{'. + assert(IsInBundle); + IsInBundle = false; + continue; + } + MachineInstr *MI = nullptr; + if (parse(MI)) + return true; + MBB.insert(MBB.end(), MI); + if (IsInBundle) { + PrevMI->setFlag(MachineInstr::BundledSucc); + MI->setFlag(MachineInstr::BundledPred); + } + PrevMI = MI; + if (Token.is(MIToken::lbrace)) { + if (IsInBundle) + return error("nested instruction bundles are not allowed"); + lex(); + // This instruction is the start of the bundle. + MI->setFlag(MachineInstr::BundledSucc); + IsInBundle = true; + if (!Token.is(MIToken::Newline)) + // The next instruction can be on the same line. + continue; + } + assert(Token.isNewlineOrEOF() && "MI is not fully parsed"); + lex(); + } + return false; +} + +bool MIParser::parseBasicBlocks() { + lex(); + // Skip until the first machine basic block. + while (Token.is(MIToken::Newline)) + lex(); + if (Token.isErrorOrEOF()) + return Token.isError(); + // The first parsing pass should have verified that this token is a MBB label + // in the 'parseBasicBlockDefinitions' method. + assert(Token.is(MIToken::MachineBasicBlockLabel)); + do { + MachineBasicBlock *MBB = nullptr; + if (parseMBBReference(MBB)) + return true; + if (parseBasicBlock(*MBB)) + return true; + // The method 'parseBasicBlock' should parse the whole block until the next + // block or the end of file. + assert(Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof)); + } while (Token.isNot(MIToken::Eof)); + return false; +} + +bool MIParser::parse(MachineInstr *&MI) { + // Parse any register operands before '=' + MachineOperand MO = MachineOperand::CreateImm(0); + SmallVector Operands; + while (Token.isRegister() || Token.isRegisterFlag()) { + auto Loc = Token.location(); + Optional TiedDefIdx; + if (parseRegisterOperand(MO, TiedDefIdx, /*IsDef=*/true)) + return true; + Operands.push_back( + ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx)); + if (Token.isNot(MIToken::comma)) + break; + lex(); + } + if (!Operands.empty() && expectAndConsume(MIToken::equal)) return true; - // TODO: Parse the instruction flags and memory operands. + unsigned OpCode, Flags = 0; + if (Token.isError() || parseInstruction(OpCode, Flags)) + return true; // Parse the remaining machine operands. - while (Token.isNot(MIToken::Eof)) { + while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) && + Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) { auto Loc = Token.location(); - if (parseMachineOperand(MO)) + Optional TiedDefIdx; + if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx)) return true; - Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location())); - if (Token.is(MIToken::Eof)) + Operands.push_back( + ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx)); + if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) || + Token.is(MIToken::lbrace)) break; if (Token.isNot(MIToken::comma)) return error("expected ',' before the next machine operand"); lex(); } + DebugLoc DebugLocation; + if (Token.is(MIToken::kw_debug_location)) { + lex(); + if (Token.isNot(MIToken::exclaim)) + return error("expected a metadata node after 'debug-location'"); + MDNode *Node = nullptr; + if (parseMDNode(Node)) + return true; + DebugLocation = DebugLoc(Node); + } + + // Parse the machine memory operands. + SmallVector MemOperands; + if (Token.is(MIToken::coloncolon)) { + lex(); + while (!Token.isNewlineOrEOF()) { + MachineMemOperand *MemOp = nullptr; + if (parseMachineMemoryOperand(MemOp)) + return true; + MemOperands.push_back(MemOp); + if (Token.isNewlineOrEOF()) + break; + if (Token.isNot(MIToken::comma)) + return error("expected ',' before the next machine memory operand"); + lex(); + } + } + const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode); if (!MCID.isVariadic()) { // FIXME: Move the implicit operand verification to the machine verifier. @@ -197,13 +640,22 @@ bool MIParser::parse(MachineInstr *&MI) { } // TODO: Check for extraneous machine operands. - MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true); + MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true); + MI->setFlags(Flags); for (const auto &Operand : Operands) MI->addOperand(MF, Operand.Operand); + if (assignRegisterTies(*MI, Operands)) + return true; + if (MemOperands.empty()) + return false; + MachineInstr::mmo_iterator MemRefs = + MF.allocateMemRefsArray(MemOperands.size()); + std::copy(MemOperands.begin(), MemOperands.end(), MemRefs); + MI->setMemRefs(MemRefs, MemRefs + MemOperands.size()); return false; } -bool MIParser::parseMBB(MachineBasicBlock *&MBB) { +bool MIParser::parseStandaloneMBB(MachineBasicBlock *&MBB) { lex(); if (Token.isNot(MIToken::MachineBasicBlock)) return error("expected a machine basic block reference"); @@ -216,18 +668,52 @@ bool MIParser::parseMBB(MachineBasicBlock *&MBB) { return false; } -bool MIParser::parseNamedRegister(unsigned &Reg) { +bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) { lex(); if (Token.isNot(MIToken::NamedRegister)) return error("expected a named register"); if (parseRegister(Reg)) - return 0; + return true; lex(); if (Token.isNot(MIToken::Eof)) return error("expected end of string after the register reference"); return false; } +bool MIParser::parseStandaloneVirtualRegister(unsigned &Reg) { + lex(); + if (Token.isNot(MIToken::VirtualRegister)) + return error("expected a virtual register"); + if (parseRegister(Reg)) + return true; + lex(); + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the register reference"); + return false; +} + +bool MIParser::parseStandaloneStackObject(int &FI) { + lex(); + if (Token.isNot(MIToken::StackObject)) + return error("expected a stack object"); + if (parseStackFrameIndex(FI)) + return true; + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the stack object reference"); + return false; +} + +bool MIParser::parseStandaloneMDNode(MDNode *&Node) { + lex(); + if (Token.isNot(MIToken::exclaim)) + return error("expected a metadata node"); + if (parseMDNode(Node)) + return true; + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the metadata node"); + return false; +} + static const char *printImplicitRegisterFlag(const MachineOperand &MO) { assert(MO.isImplicit()); return MO.isDef() ? "implicit-def" : "implicit"; @@ -239,8 +725,18 @@ static std::string getRegisterName(const TargetRegisterInfo *TRI, return StringRef(TRI->getName(Reg)).lower(); } -bool MIParser::verifyImplicitOperands( - ArrayRef Operands, const MCInstrDesc &MCID) { +/// Return true if the parsed machine operands contain a given machine operand. +static bool isImplicitOperandIn(const MachineOperand &ImplicitOperand, + ArrayRef Operands) { + for (const auto &I : Operands) { + if (ImplicitOperand.isIdenticalTo(I.Operand)) + return true; + } + return false; +} + +bool MIParser::verifyImplicitOperands(ArrayRef Operands, + const MCInstrDesc &MCID) { if (MCID.isCall()) // We can't verify call instructions as they can contain arbitrary implicit // register and register mask operands. @@ -249,48 +745,32 @@ bool MIParser::verifyImplicitOperands( // Gather all the expected implicit operands. SmallVector ImplicitOperands; if (MCID.ImplicitDefs) - for (const uint16_t *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs) + for (const MCPhysReg *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs) ImplicitOperands.push_back( MachineOperand::CreateReg(*ImpDefs, true, true)); if (MCID.ImplicitUses) - for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) + for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) ImplicitOperands.push_back( MachineOperand::CreateReg(*ImpUses, false, true)); const auto *TRI = MF.getSubtarget().getRegisterInfo(); assert(TRI && "Expected target register info"); - size_t I = ImplicitOperands.size(), J = Operands.size(); - while (I) { - --I; - if (J) { - --J; - const auto &ImplicitOperand = ImplicitOperands[I]; - const auto &Operand = Operands[J].Operand; - if (ImplicitOperand.isIdenticalTo(Operand)) - continue; - if (Operand.isReg() && Operand.isImplicit()) { - return error(Operands[J].Begin, - Twine("expected an implicit register operand '") + - printImplicitRegisterFlag(ImplicitOperand) + " %" + - getRegisterName(TRI, ImplicitOperand.getReg()) + "'"); - } - } - // TODO: Fix source location when Operands[J].end is right before '=', i.e: - // insead of reporting an error at this location: - // %eax = MOV32r0 - // ^ - // report the error at the following location: - // %eax = MOV32r0 - // ^ - return error(J < Operands.size() ? Operands[J].End : Token.location(), + for (const auto &I : ImplicitOperands) { + if (isImplicitOperandIn(I, Operands)) + continue; + return error(Operands.empty() ? Token.location() : Operands.back().End, Twine("missing implicit register operand '") + - printImplicitRegisterFlag(ImplicitOperands[I]) + " %" + - getRegisterName(TRI, ImplicitOperands[I].getReg()) + "'"); + printImplicitRegisterFlag(I) + " %" + + getRegisterName(TRI, I.getReg()) + "'"); } return false; } -bool MIParser::parseInstruction(unsigned &OpCode) { +bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { + if (Token.is(MIToken::kw_frame_setup)) { + Flags |= MachineInstr::FrameSetup; + lex(); + } if (Token.isNot(MIToken::Identifier)) return error("expected a machine instruction"); StringRef InstrName = Token.stringValue(); @@ -330,6 +810,7 @@ bool MIParser::parseRegister(unsigned &Reg) { } bool MIParser::parseRegisterFlag(unsigned &Flags) { + const unsigned OldFlags = Flags; switch (Token.kind()) { case MIToken::kw_implicit: Flags |= RegState::Implicit; @@ -337,6 +818,9 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) { case MIToken::kw_implicit_define: Flags |= RegState::ImplicitDefine; break; + case MIToken::kw_def: + Flags |= RegState::Define; + break; case MIToken::kw_dead: Flags |= RegState::Dead; break; @@ -346,11 +830,22 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) { case MIToken::kw_undef: Flags |= RegState::Undef; break; - // TODO: report an error when we specify the same flag more than once. - // TODO: parse the other register flags. + case MIToken::kw_internal: + Flags |= RegState::InternalRead; + break; + case MIToken::kw_early_clobber: + Flags |= RegState::EarlyClobber; + break; + case MIToken::kw_debug_use: + Flags |= RegState::Debug; + break; default: llvm_unreachable("The current token should be a register flag"); } + if (OldFlags == Flags) + // We know that the same flag is specified more than once when the flags + // weren't modified. + return error("duplicate '" + Token.stringValue() + "' register flag"); lex(); return false; } @@ -368,7 +863,59 @@ bool MIParser::parseSubRegisterIndex(unsigned &SubReg) { return false; } -bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) { +bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) { + if (!consumeIfPresent(MIToken::kw_tied_def)) + return error("expected 'tied-def' after '('"); + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal after 'tied-def'"); + if (getUnsigned(TiedDefIdx)) + return true; + lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + return false; +} + +bool MIParser::assignRegisterTies(MachineInstr &MI, + ArrayRef Operands) { + SmallVector, 4> TiedRegisterPairs; + for (unsigned I = 0, E = Operands.size(); I != E; ++I) { + if (!Operands[I].TiedDefIdx) + continue; + // The parser ensures that this operand is a register use, so we just have + // to check the tied-def operand. + unsigned DefIdx = Operands[I].TiedDefIdx.getValue(); + if (DefIdx >= E) + return error(Operands[I].Begin, + Twine("use of invalid tied-def operand index '" + + Twine(DefIdx) + "'; instruction has only ") + + Twine(E) + " operands"); + const auto &DefOperand = Operands[DefIdx].Operand; + if (!DefOperand.isReg() || !DefOperand.isDef()) + // FIXME: add note with the def operand. + return error(Operands[I].Begin, + Twine("use of invalid tied-def operand index '") + + Twine(DefIdx) + "'; the operand #" + Twine(DefIdx) + + " isn't a defined register"); + // Check that the tied-def operand wasn't tied elsewhere. + for (const auto &TiedPair : TiedRegisterPairs) { + if (TiedPair.first == DefIdx) + return error(Operands[I].Begin, + Twine("the tied-def operand #") + Twine(DefIdx) + + " is already tied with another register operand"); + } + TiedRegisterPairs.push_back(std::make_pair(DefIdx, I)); + } + // FIXME: Verify that for non INLINEASM instructions, the def and use tied + // indices must be less than tied max. + for (const auto &TiedPair : TiedRegisterPairs) + MI.tieOperands(TiedPair.first, TiedPair.second); + return false; +} + +bool MIParser::parseRegisterOperand(MachineOperand &Dest, + Optional &TiedDefIdx, + bool IsDef) { unsigned Reg; unsigned Flags = IsDef ? RegState::Define : 0; while (Token.isRegisterFlag()) { @@ -385,10 +932,17 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) { if (parseSubRegisterIndex(SubReg)) return true; } + if ((Flags & RegState::Define) == 0 && consumeIfPresent(MIToken::lparen)) { + unsigned Idx; + if (parseRegisterTiedDefIndex(Idx)) + return true; + TiedDefIdx = Idx; + } Dest = MachineOperand::CreateReg( Reg, Flags & RegState::Define, Flags & RegState::Implicit, Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef, - /*isEarlyClobber=*/false, SubReg); + Flags & RegState::EarlyClobber, SubReg, Flags & RegState::Debug, + Flags & RegState::InternalRead); return false; } @@ -396,13 +950,55 @@ bool MIParser::parseImmediateOperand(MachineOperand &Dest) { assert(Token.is(MIToken::IntegerLiteral)); const APSInt &Int = Token.integerValue(); if (Int.getMinSignedBits() > 64) - // TODO: Replace this with an error when we can parse CIMM Machine Operands. - llvm_unreachable("Can't parse large integer literals yet!"); + return error("integer literal is too large to be an immediate operand"); Dest = MachineOperand::CreateImm(Int.getExtValue()); lex(); return false; } +bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue, + const Constant *&C) { + auto Source = StringValue.str(); // The source has to be null terminated. + SMDiagnostic Err; + C = parseConstantValue(Source.c_str(), Err, *MF.getFunction()->getParent(), + &IRSlots); + if (!C) + return error(Loc + Err.getColumnNo(), Err.getMessage()); + return false; +} + +bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) { + if (parseIRConstant(Loc, StringRef(Loc, Token.range().end() - Loc), C)) + return true; + lex(); + return false; +} + +bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::IntegerType)); + auto Loc = Token.location(); + lex(); + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal"); + const Constant *C = nullptr; + if (parseIRConstant(Loc, C)) + return true; + Dest = MachineOperand::CreateCImm(cast(C)); + return false; +} + +bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) { + auto Loc = Token.location(); + lex(); + if (Token.isNot(MIToken::FloatingPointLiteral)) + return error("expected a floating point literal"); + const Constant *C = nullptr; + if (parseIRConstant(Loc, C)) + return true; + Dest = MachineOperand::CreateFPImm(cast(C)); + return false; +} + bool MIParser::getUnsigned(unsigned &Result) { assert(Token.hasIntegerValue() && "Expected a token with an integer value"); const uint64_t Limit = uint64_t(std::numeric_limits::max()) + 1; @@ -414,7 +1010,8 @@ bool MIParser::getUnsigned(unsigned &Result) { } bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) { - assert(Token.is(MIToken::MachineBasicBlock)); + assert(Token.is(MIToken::MachineBasicBlock) || + Token.is(MIToken::MachineBasicBlockLabel)); unsigned Number; if (getUnsigned(Number)) return true; @@ -438,16 +1035,66 @@ bool MIParser::parseMBBOperand(MachineOperand &Dest) { return false; } -bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) { +bool MIParser::parseStackFrameIndex(int &FI) { + assert(Token.is(MIToken::StackObject)); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto ObjectInfo = PFS.StackObjectSlots.find(ID); + if (ObjectInfo == PFS.StackObjectSlots.end()) + return error(Twine("use of undefined stack object '%stack.") + Twine(ID) + + "'"); + StringRef Name; + if (const auto *Alloca = + MF.getFrameInfo()->getObjectAllocation(ObjectInfo->second)) + Name = Alloca->getName(); + if (!Token.stringValue().empty() && Token.stringValue() != Name) + return error(Twine("the name of the stack object '%stack.") + Twine(ID) + + "' isn't '" + Token.stringValue() + "'"); + lex(); + FI = ObjectInfo->second; + return false; +} + +bool MIParser::parseStackObjectOperand(MachineOperand &Dest) { + int FI; + if (parseStackFrameIndex(FI)) + return true; + Dest = MachineOperand::CreateFI(FI); + return false; +} + +bool MIParser::parseFixedStackFrameIndex(int &FI) { + assert(Token.is(MIToken::FixedStackObject)); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto ObjectInfo = PFS.FixedStackObjectSlots.find(ID); + if (ObjectInfo == PFS.FixedStackObjectSlots.end()) + return error(Twine("use of undefined fixed stack object '%fixed-stack.") + + Twine(ID) + "'"); + lex(); + FI = ObjectInfo->second; + return false; +} + +bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) { + int FI; + if (parseFixedStackFrameIndex(FI)) + return true; + Dest = MachineOperand::CreateFI(FI); + return false; +} + +bool MIParser::parseGlobalValue(GlobalValue *&GV) { switch (Token.kind()) { case MIToken::NamedGlobalValue: { - auto Name = Token.stringValue(); const Module *M = MF.getFunction()->getParent(); - if (const auto *GV = M->getNamedValue(Name)) { - Dest = MachineOperand::CreateGA(GV, /*Offset=*/0); - break; - } - return error(Twine("use of undefined global value '@") + Name + "'"); + GV = M->getNamedValue(Token.stringValue()); + if (!GV) + return error(Twine("use of undefined global value '") + Token.range() + + "'"); + break; } case MIToken::GlobalValue: { unsigned GVIdx; @@ -456,36 +1103,323 @@ bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) { if (GVIdx >= IRSlots.GlobalValues.size()) return error(Twine("use of undefined global value '@") + Twine(GVIdx) + "'"); - Dest = MachineOperand::CreateGA(IRSlots.GlobalValues[GVIdx], - /*Offset=*/0); + GV = IRSlots.GlobalValues[GVIdx]; break; } default: llvm_unreachable("The current token should be a global value"); } - // TODO: Parse offset and target flags. + return false; +} + +bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) { + GlobalValue *GV = nullptr; + if (parseGlobalValue(GV)) + return true; + lex(); + Dest = MachineOperand::CreateGA(GV, /*Offset=*/0); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseConstantPoolIndexOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::ConstantPoolItem)); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto ConstantInfo = PFS.ConstantPoolSlots.find(ID); + if (ConstantInfo == PFS.ConstantPoolSlots.end()) + return error("use of undefined constant '%const." + Twine(ID) + "'"); + lex(); + Dest = MachineOperand::CreateCPI(ID, /*Offset=*/0); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseJumpTableIndexOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::JumpTableIndex)); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto JumpTableEntryInfo = PFS.JumpTableSlots.find(ID); + if (JumpTableEntryInfo == PFS.JumpTableSlots.end()) + return error("use of undefined jump table '%jump-table." + Twine(ID) + "'"); + lex(); + Dest = MachineOperand::CreateJTI(JumpTableEntryInfo->second); + return false; +} + +bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::ExternalSymbol)); + const char *Symbol = MF.createExternalSymbolName(Token.stringValue()); + lex(); + Dest = MachineOperand::CreateES(Symbol); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseMDNode(MDNode *&Node) { + assert(Token.is(MIToken::exclaim)); + auto Loc = Token.location(); + lex(); + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) + return error("expected metadata id after '!'"); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto NodeInfo = IRSlots.MetadataNodes.find(ID); + if (NodeInfo == IRSlots.MetadataNodes.end()) + return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'"); + lex(); + Node = NodeInfo->second.get(); + return false; +} + +bool MIParser::parseMetadataOperand(MachineOperand &Dest) { + MDNode *Node = nullptr; + if (parseMDNode(Node)) + return true; + Dest = MachineOperand::CreateMetadata(Node); + return false; +} + +bool MIParser::parseCFIOffset(int &Offset) { + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected a cfi offset"); + if (Token.integerValue().getMinSignedBits() > 32) + return error("expected a 32 bit integer (the cfi offset is too large)"); + Offset = (int)Token.integerValue().getExtValue(); lex(); return false; } -bool MIParser::parseMachineOperand(MachineOperand &Dest) { +bool MIParser::parseCFIRegister(unsigned &Reg) { + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a cfi register"); + unsigned LLVMReg; + if (parseRegister(LLVMReg)) + return true; + const auto *TRI = MF.getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + int DwarfReg = TRI->getDwarfRegNum(LLVMReg, true); + if (DwarfReg < 0) + return error("invalid DWARF register"); + Reg = (unsigned)DwarfReg; + lex(); + return false; +} + +bool MIParser::parseCFIOperand(MachineOperand &Dest) { + auto Kind = Token.kind(); + lex(); + auto &MMI = MF.getMMI(); + int Offset; + unsigned Reg; + unsigned CFIIndex; + switch (Kind) { + case MIToken::kw_cfi_same_value: + if (parseCFIRegister(Reg)) + return true; + CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg)); + break; + case MIToken::kw_cfi_offset: + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIOffset(Offset)) + return true; + CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset)); + break; + case MIToken::kw_cfi_def_cfa_register: + if (parseCFIRegister(Reg)) + return true; + CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); + break; + case MIToken::kw_cfi_def_cfa_offset: + if (parseCFIOffset(Offset)) + return true; + // NB: MCCFIInstruction::createDefCfaOffset negates the offset. + CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, -Offset)); + break; + case MIToken::kw_cfi_def_cfa: + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIOffset(Offset)) + return true; + // NB: MCCFIInstruction::createDefCfa negates the offset. + CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset)); + break; + default: + // TODO: Parse the other CFI operands. + llvm_unreachable("The current token should be a cfi operand"); + } + Dest = MachineOperand::CreateCFIIndex(CFIIndex); + return false; +} + +bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) { + switch (Token.kind()) { + case MIToken::NamedIRBlock: { + BB = dyn_cast_or_null( + F.getValueSymbolTable().lookup(Token.stringValue())); + if (!BB) + return error(Twine("use of undefined IR block '") + Token.range() + "'"); + break; + } + case MIToken::IRBlock: { + unsigned SlotNumber = 0; + if (getUnsigned(SlotNumber)) + return true; + BB = const_cast(getIRBlock(SlotNumber, F)); + if (!BB) + return error(Twine("use of undefined IR block '%ir-block.") + + Twine(SlotNumber) + "'"); + break; + } + default: + llvm_unreachable("The current token should be an IR block reference"); + } + return false; +} + +bool MIParser::parseBlockAddressOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_blockaddress)); + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + if (Token.isNot(MIToken::GlobalValue) && + Token.isNot(MIToken::NamedGlobalValue)) + return error("expected a global value"); + GlobalValue *GV = nullptr; + if (parseGlobalValue(GV)) + return true; + auto *F = dyn_cast(GV); + if (!F) + return error("expected an IR function reference"); + lex(); + if (expectAndConsume(MIToken::comma)) + return true; + BasicBlock *BB = nullptr; + if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock)) + return error("expected an IR block reference"); + if (parseIRBlock(BB, *F)) + return true; + lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = MachineOperand::CreateBA(BlockAddress::get(F, BB), /*Offset=*/0); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_target_index)); + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + if (Token.isNot(MIToken::Identifier)) + return error("expected the name of the target index"); + int Index = 0; + if (getTargetIndex(Token.stringValue(), Index)) + return error("use of undefined target index '" + Token.stringValue() + "'"); + lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = MachineOperand::CreateTargetIndex(unsigned(Index), /*Offset=*/0); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_liveout)); + const auto *TRI = MF.getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs()); + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + while (true) { + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a named register"); + unsigned Reg = 0; + if (parseRegister(Reg)) + return true; + lex(); + Mask[Reg / 32] |= 1U << (Reg % 32); + // TODO: Report an error if the same register is used more than once. + if (Token.isNot(MIToken::comma)) + break; + lex(); + } + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = MachineOperand::CreateRegLiveOut(Mask); + return false; +} + +bool MIParser::parseMachineOperand(MachineOperand &Dest, + Optional &TiedDefIdx) { switch (Token.kind()) { case MIToken::kw_implicit: case MIToken::kw_implicit_define: + case MIToken::kw_def: case MIToken::kw_dead: case MIToken::kw_killed: case MIToken::kw_undef: + case MIToken::kw_internal: + case MIToken::kw_early_clobber: + case MIToken::kw_debug_use: case MIToken::underscore: case MIToken::NamedRegister: case MIToken::VirtualRegister: - return parseRegisterOperand(Dest); + return parseRegisterOperand(Dest, TiedDefIdx); case MIToken::IntegerLiteral: return parseImmediateOperand(Dest); + case MIToken::IntegerType: + return parseTypedImmediateOperand(Dest); + case MIToken::kw_half: + case MIToken::kw_float: + case MIToken::kw_double: + case MIToken::kw_x86_fp80: + case MIToken::kw_fp128: + case MIToken::kw_ppc_fp128: + return parseFPImmediateOperand(Dest); case MIToken::MachineBasicBlock: return parseMBBOperand(Dest); + case MIToken::StackObject: + return parseStackObjectOperand(Dest); + case MIToken::FixedStackObject: + return parseFixedStackObjectOperand(Dest); case MIToken::GlobalValue: case MIToken::NamedGlobalValue: return parseGlobalAddressOperand(Dest); + case MIToken::ConstantPoolItem: + return parseConstantPoolIndexOperand(Dest); + case MIToken::JumpTableIndex: + return parseJumpTableIndexOperand(Dest); + case MIToken::ExternalSymbol: + return parseExternalSymbolOperand(Dest); + case MIToken::exclaim: + return parseMetadataOperand(Dest); + case MIToken::kw_cfi_same_value: + case MIToken::kw_cfi_offset: + case MIToken::kw_cfi_def_cfa_register: + case MIToken::kw_cfi_def_cfa_offset: + case MIToken::kw_cfi_def_cfa: + return parseCFIOperand(Dest); + case MIToken::kw_blockaddress: + return parseBlockAddressOperand(Dest); + case MIToken::kw_target_index: + return parseTargetIndexOperand(Dest); + case MIToken::kw_liveout: + return parseLiveoutRegisterMaskOperand(Dest); case MIToken::Error: return true; case MIToken::Identifier: @@ -496,12 +1430,314 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest) { } // fallthrough default: - // TODO: parse the other machine operands. + // FIXME: Parse the MCSymbol machine operand. return error("expected a machine operand"); } return false; } +bool MIParser::parseMachineOperandAndTargetFlags( + MachineOperand &Dest, Optional &TiedDefIdx) { + unsigned TF = 0; + bool HasTargetFlags = false; + if (Token.is(MIToken::kw_target_flags)) { + HasTargetFlags = true; + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + if (Token.isNot(MIToken::Identifier)) + return error("expected the name of the target flag"); + if (getDirectTargetFlag(Token.stringValue(), TF)) { + if (getBitmaskTargetFlag(Token.stringValue(), TF)) + return error("use of undefined target flag '" + Token.stringValue() + + "'"); + } + lex(); + while (Token.is(MIToken::comma)) { + lex(); + if (Token.isNot(MIToken::Identifier)) + return error("expected the name of the target flag"); + unsigned BitFlag = 0; + if (getBitmaskTargetFlag(Token.stringValue(), BitFlag)) + return error("use of undefined target flag '" + Token.stringValue() + + "'"); + // TODO: Report an error when using a duplicate bit target flag. + TF |= BitFlag; + lex(); + } + if (expectAndConsume(MIToken::rparen)) + return true; + } + auto Loc = Token.location(); + if (parseMachineOperand(Dest, TiedDefIdx)) + return true; + if (!HasTargetFlags) + return false; + if (Dest.isReg()) + return error(Loc, "register operands can't have target flags"); + Dest.setTargetFlags(TF); + return false; +} + +bool MIParser::parseOffset(int64_t &Offset) { + if (Token.isNot(MIToken::plus) && Token.isNot(MIToken::minus)) + return false; + StringRef Sign = Token.range(); + bool IsNegative = Token.is(MIToken::minus); + lex(); + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal after '" + Sign + "'"); + if (Token.integerValue().getMinSignedBits() > 64) + return error("expected 64-bit integer (too large)"); + Offset = Token.integerValue().getExtValue(); + if (IsNegative) + Offset = -Offset; + lex(); + return false; +} + +bool MIParser::parseAlignment(unsigned &Alignment) { + assert(Token.is(MIToken::kw_align)); + lex(); + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) + return error("expected an integer literal after 'align'"); + if (getUnsigned(Alignment)) + return true; + lex(); + return false; +} + +bool MIParser::parseOperandsOffset(MachineOperand &Op) { + int64_t Offset = 0; + if (parseOffset(Offset)) + return true; + Op.setOffset(Offset); + return false; +} + +bool MIParser::parseIRValue(const Value *&V) { + switch (Token.kind()) { + case MIToken::NamedIRValue: { + V = MF.getFunction()->getValueSymbolTable().lookup(Token.stringValue()); + break; + } + case MIToken::IRValue: { + unsigned SlotNumber = 0; + if (getUnsigned(SlotNumber)) + return true; + V = getIRValue(SlotNumber); + break; + } + case MIToken::NamedGlobalValue: + case MIToken::GlobalValue: { + GlobalValue *GV = nullptr; + if (parseGlobalValue(GV)) + return true; + V = GV; + break; + } + case MIToken::QuotedIRValue: { + const Constant *C = nullptr; + if (parseIRConstant(Token.location(), Token.stringValue(), C)) + return true; + V = C; + break; + } + default: + llvm_unreachable("The current token should be an IR block reference"); + } + if (!V) + return error(Twine("use of undefined IR value '") + Token.range() + "'"); + return false; +} + +bool MIParser::getUint64(uint64_t &Result) { + assert(Token.hasIntegerValue()); + if (Token.integerValue().getActiveBits() > 64) + return error("expected 64-bit integer (too large)"); + Result = Token.integerValue().getZExtValue(); + return false; +} + +bool MIParser::parseMemoryOperandFlag(unsigned &Flags) { + const unsigned OldFlags = Flags; + switch (Token.kind()) { + case MIToken::kw_volatile: + Flags |= MachineMemOperand::MOVolatile; + break; + case MIToken::kw_non_temporal: + Flags |= MachineMemOperand::MONonTemporal; + break; + case MIToken::kw_invariant: + Flags |= MachineMemOperand::MOInvariant; + break; + // TODO: parse the target specific memory operand flags. + default: + llvm_unreachable("The current token should be a memory operand flag"); + } + if (OldFlags == Flags) + // We know that the same flag is specified more than once when the flags + // weren't modified. + return error("duplicate '" + Token.stringValue() + "' memory operand flag"); + lex(); + return false; +} + +bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) { + switch (Token.kind()) { + case MIToken::kw_stack: + PSV = MF.getPSVManager().getStack(); + break; + case MIToken::kw_got: + PSV = MF.getPSVManager().getGOT(); + break; + case MIToken::kw_jump_table: + PSV = MF.getPSVManager().getJumpTable(); + break; + case MIToken::kw_constant_pool: + PSV = MF.getPSVManager().getConstantPool(); + break; + case MIToken::FixedStackObject: { + int FI; + if (parseFixedStackFrameIndex(FI)) + return true; + PSV = MF.getPSVManager().getFixedStack(FI); + // The token was already consumed, so use return here instead of break. + return false; + } + case MIToken::kw_call_entry: { + lex(); + switch (Token.kind()) { + case MIToken::GlobalValue: + case MIToken::NamedGlobalValue: { + GlobalValue *GV = nullptr; + if (parseGlobalValue(GV)) + return true; + PSV = MF.getPSVManager().getGlobalValueCallEntry(GV); + break; + } + case MIToken::ExternalSymbol: + PSV = MF.getPSVManager().getExternalSymbolCallEntry( + MF.createExternalSymbolName(Token.stringValue())); + break; + default: + return error( + "expected a global value or an external symbol after 'call-entry'"); + } + break; + } + default: + llvm_unreachable("The current token should be pseudo source value"); + } + lex(); + return false; +} + +bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) { + if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) || + Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) || + Token.is(MIToken::FixedStackObject) || Token.is(MIToken::kw_call_entry)) { + const PseudoSourceValue *PSV = nullptr; + if (parseMemoryPseudoSourceValue(PSV)) + return true; + int64_t Offset = 0; + if (parseOffset(Offset)) + return true; + Dest = MachinePointerInfo(PSV, Offset); + return false; + } + if (Token.isNot(MIToken::NamedIRValue) && Token.isNot(MIToken::IRValue) && + Token.isNot(MIToken::GlobalValue) && + Token.isNot(MIToken::NamedGlobalValue) && + Token.isNot(MIToken::QuotedIRValue)) + return error("expected an IR value reference"); + const Value *V = nullptr; + if (parseIRValue(V)) + return true; + if (!V->getType()->isPointerTy()) + return error("expected a pointer IR value"); + lex(); + int64_t Offset = 0; + if (parseOffset(Offset)) + return true; + Dest = MachinePointerInfo(V, Offset); + return false; +} + +bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { + if (expectAndConsume(MIToken::lparen)) + return true; + unsigned Flags = 0; + while (Token.isMemoryOperandFlag()) { + if (parseMemoryOperandFlag(Flags)) + return true; + } + if (Token.isNot(MIToken::Identifier) || + (Token.stringValue() != "load" && Token.stringValue() != "store")) + return error("expected 'load' or 'store' memory operation"); + if (Token.stringValue() == "load") + Flags |= MachineMemOperand::MOLoad; + else + Flags |= MachineMemOperand::MOStore; + lex(); + + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected the size integer literal after memory operation"); + uint64_t Size; + if (getUint64(Size)) + return true; + lex(); + + const char *Word = Flags & MachineMemOperand::MOLoad ? "from" : "into"; + if (Token.isNot(MIToken::Identifier) || Token.stringValue() != Word) + return error(Twine("expected '") + Word + "'"); + lex(); + + MachinePointerInfo Ptr = MachinePointerInfo(); + if (parseMachinePointerInfo(Ptr)) + return true; + unsigned BaseAlignment = Size; + AAMDNodes AAInfo; + MDNode *Range = nullptr; + while (consumeIfPresent(MIToken::comma)) { + switch (Token.kind()) { + case MIToken::kw_align: + if (parseAlignment(BaseAlignment)) + return true; + break; + case MIToken::md_tbaa: + lex(); + if (parseMDNode(AAInfo.TBAA)) + return true; + break; + case MIToken::md_alias_scope: + lex(); + if (parseMDNode(AAInfo.Scope)) + return true; + break; + case MIToken::md_noalias: + lex(); + if (parseMDNode(AAInfo.NoAlias)) + return true; + break; + case MIToken::md_range: + lex(); + if (parseMDNode(Range)) + return true; + break; + // TODO: Report an error on duplicate metadata nodes. + default: + return error("expected 'align' or '!tbaa' or '!alias.scope' or " + "'!noalias' or '!range'"); + } + } + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = + MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range); + return false; +} + void MIParser::initNames2InstrOpCodes() { if (!Names2InstrOpCodes.empty()) return; @@ -583,18 +1819,162 @@ unsigned MIParser::getSubRegIndex(StringRef Name) { return SubRegInfo->getValue(); } -bool llvm::parseMachineInstr(MachineInstr *&MI, SourceMgr &SM, - MachineFunction &MF, StringRef Src, - const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, SMDiagnostic &Error) { - return MIParser(SM, MF, Error, Src, PFS, IRSlots).parse(MI); +static void initSlots2BasicBlocks( + const Function &F, + DenseMap &Slots2BasicBlocks) { + ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false); + MST.incorporateFunction(F); + for (auto &BB : F) { + if (BB.hasName()) + continue; + int Slot = MST.getLocalSlot(&BB); + if (Slot == -1) + continue; + Slots2BasicBlocks.insert(std::make_pair(unsigned(Slot), &BB)); + } +} + +static const BasicBlock *getIRBlockFromSlot( + unsigned Slot, + const DenseMap &Slots2BasicBlocks) { + auto BlockInfo = Slots2BasicBlocks.find(Slot); + if (BlockInfo == Slots2BasicBlocks.end()) + return nullptr; + return BlockInfo->second; +} + +const BasicBlock *MIParser::getIRBlock(unsigned Slot) { + if (Slots2BasicBlocks.empty()) + initSlots2BasicBlocks(*MF.getFunction(), Slots2BasicBlocks); + return getIRBlockFromSlot(Slot, Slots2BasicBlocks); +} + +const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) { + if (&F == MF.getFunction()) + return getIRBlock(Slot); + DenseMap CustomSlots2BasicBlocks; + initSlots2BasicBlocks(F, CustomSlots2BasicBlocks); + return getIRBlockFromSlot(Slot, CustomSlots2BasicBlocks); +} + +static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST, + DenseMap &Slots2Values) { + int Slot = MST.getLocalSlot(V); + if (Slot == -1) + return; + Slots2Values.insert(std::make_pair(unsigned(Slot), V)); +} + +/// Creates the mapping from slot numbers to function's unnamed IR values. +static void initSlots2Values(const Function &F, + DenseMap &Slots2Values) { + ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false); + MST.incorporateFunction(F); + for (const auto &Arg : F.args()) + mapValueToSlot(&Arg, MST, Slots2Values); + for (const auto &BB : F) { + mapValueToSlot(&BB, MST, Slots2Values); + for (const auto &I : BB) + mapValueToSlot(&I, MST, Slots2Values); + } +} + +const Value *MIParser::getIRValue(unsigned Slot) { + if (Slots2Values.empty()) + initSlots2Values(*MF.getFunction(), Slots2Values); + auto ValueInfo = Slots2Values.find(Slot); + if (ValueInfo == Slots2Values.end()) + return nullptr; + return ValueInfo->second; +} + +void MIParser::initNames2TargetIndices() { + if (!Names2TargetIndices.empty()) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Indices = TII->getSerializableTargetIndices(); + for (const auto &I : Indices) + Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first)); +} + +bool MIParser::getTargetIndex(StringRef Name, int &Index) { + initNames2TargetIndices(); + auto IndexInfo = Names2TargetIndices.find(Name); + if (IndexInfo == Names2TargetIndices.end()) + return true; + Index = IndexInfo->second; + return false; +} + +void MIParser::initNames2DirectTargetFlags() { + if (!Names2DirectTargetFlags.empty()) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Flags = TII->getSerializableDirectMachineOperandTargetFlags(); + for (const auto &I : Flags) + Names2DirectTargetFlags.insert( + std::make_pair(StringRef(I.second), I.first)); +} + +bool MIParser::getDirectTargetFlag(StringRef Name, unsigned &Flag) { + initNames2DirectTargetFlags(); + auto FlagInfo = Names2DirectTargetFlags.find(Name); + if (FlagInfo == Names2DirectTargetFlags.end()) + return true; + Flag = FlagInfo->second; + return false; +} + +void MIParser::initNames2BitmaskTargetFlags() { + if (!Names2BitmaskTargetFlags.empty()) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags(); + for (const auto &I : Flags) + Names2BitmaskTargetFlags.insert( + std::make_pair(StringRef(I.second), I.first)); +} + +bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) { + initNames2BitmaskTargetFlags(); + auto FlagInfo = Names2BitmaskTargetFlags.find(Name); + if (FlagInfo == Names2BitmaskTargetFlags.end()) + return true; + Flag = FlagInfo->second; + return false; +} + +bool llvm::parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src, + PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error) { + SourceMgr SM; + SM.AddNewSourceBuffer( + MemoryBuffer::getMemBuffer(Src, "", /*RequiresNullTerminator=*/false), + SMLoc()); + return MIParser(SM, MF, Error, Src, PFS, IRSlots) + .parseBasicBlockDefinitions(PFS.MBBSlots); +} + +bool llvm::parseMachineInstructions(MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error) { + SourceMgr SM; + SM.AddNewSourceBuffer( + MemoryBuffer::getMemBuffer(Src, "", /*RequiresNullTerminator=*/false), + SMLoc()); + return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseBasicBlocks(); } bool llvm::parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM, MachineFunction &MF, StringRef Src, const PerFunctionMIParsingState &PFS, const SlotMapping &IRSlots, SMDiagnostic &Error) { - return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseMBB(MBB); + return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseStandaloneMBB(MBB); } bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM, @@ -602,5 +1982,30 @@ bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM, const PerFunctionMIParsingState &PFS, const SlotMapping &IRSlots, SMDiagnostic &Error) { - return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseNamedRegister(Reg); + return MIParser(SM, MF, Error, Src, PFS, IRSlots) + .parseStandaloneNamedRegister(Reg); +} + +bool llvm::parseVirtualRegisterReference(unsigned &Reg, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error) { + return MIParser(SM, MF, Error, Src, PFS, IRSlots) + .parseStandaloneVirtualRegister(Reg); +} + +bool llvm::parseStackObjectReference(int &FI, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error) { + return MIParser(SM, MF, Error, Src, PFS, IRSlots) + .parseStandaloneStackObject(FI); +} + +bool llvm::parseMDNode(MDNode *&Node, SourceMgr &SM, MachineFunction &MF, + StringRef Src, const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, SMDiagnostic &Error) { + return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseStandaloneMDNode(Node); } diff --git a/lib/CodeGen/MIRParser/MIParser.h b/lib/CodeGen/MIRParser/MIParser.h index fca4c4e6f885..8aef704ab36c 100644 --- a/lib/CodeGen/MIRParser/MIParser.h +++ b/lib/CodeGen/MIRParser/MIParser.h @@ -19,9 +19,11 @@ namespace llvm { +class BasicBlock; class MachineBasicBlock; class MachineInstr; class MachineFunction; +class MDNode; struct SlotMapping; class SMDiagnostic; class SourceMgr; @@ -29,11 +31,42 @@ class SourceMgr; struct PerFunctionMIParsingState { DenseMap MBBSlots; DenseMap VirtualRegisterSlots; + DenseMap FixedStackObjectSlots; + DenseMap StackObjectSlots; + DenseMap ConstantPoolSlots; + DenseMap JumpTableSlots; }; -bool parseMachineInstr(MachineInstr *&MI, SourceMgr &SM, MachineFunction &MF, - StringRef Src, const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, SMDiagnostic &Error); +/// Parse the machine basic block definitions, and skip the machine +/// instructions. +/// +/// This function runs the first parsing pass on the machine function's body. +/// It parses only the machine basic block definitions and creates the machine +/// basic blocks in the given machine function. +/// +/// The machine instructions aren't parsed during the first pass because all +/// the machine basic blocks aren't defined yet - this makes it impossible to +/// resolve the machine basic block references. +/// +/// Return true if an error occurred. +bool parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src, + PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error); + +/// Parse the machine instructions. +/// +/// This function runs the second parsing pass on the machine function's body. +/// It skips the machine basic block definitions and parses only the machine +/// instructions and basic block attributes like liveins and successors. +/// +/// The second parsing pass assumes that the first parsing pass already ran +/// on the given source string. +/// +/// Return true if an error occurred. +bool parseMachineInstructions(MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, SMDiagnostic &Error); bool parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM, MachineFunction &MF, StringRef Src, @@ -46,6 +79,21 @@ bool parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM, const SlotMapping &IRSlots, SMDiagnostic &Error); +bool parseVirtualRegisterReference(unsigned &Reg, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error); + +bool parseStackObjectReference(int &FI, SourceMgr &SM, MachineFunction &MF, + StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, SMDiagnostic &Error); + +bool parseMDNode(MDNode *&Node, SourceMgr &SM, MachineFunction &MF, + StringRef Src, const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, SMDiagnostic &Error); + } // end namespace llvm #endif diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp index 16b0e1655891..422efbc5ce57 100644 --- a/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/lib/CodeGen/MIRParser/MIRParser.cpp @@ -20,8 +20,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/IR/BasicBlock.h" @@ -95,30 +97,53 @@ public: /// Return true if error occurred. bool initializeMachineFunction(MachineFunction &MF); - /// Initialize the machine basic block using it's YAML representation. - /// - /// Return true if an error occurred. - bool initializeMachineBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, - const yaml::MachineBasicBlock &YamlMBB, - const PerFunctionMIParsingState &PFS); + bool initializeRegisterInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF, + PerFunctionMIParsingState &PFS); - bool - initializeRegisterInfo(const MachineFunction &MF, - MachineRegisterInfo &RegInfo, - const yaml::MachineFunction &YamlMF, - DenseMap &VirtualRegisterSlots); + void inferRegisterInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF); - bool initializeFrameInfo(MachineFrameInfo &MFI, - const yaml::MachineFunction &YamlMF); + bool initializeFrameInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF, + PerFunctionMIParsingState &PFS); + + bool parseCalleeSavedRegister(MachineFunction &MF, + PerFunctionMIParsingState &PFS, + std::vector &CSIInfo, + const yaml::StringValue &RegisterSource, + int FrameIdx); + + bool parseStackObjectsDebugInfo(MachineFunction &MF, + PerFunctionMIParsingState &PFS, + const yaml::MachineStackObject &Object, + int FrameIdx); + + bool initializeConstantPool(MachineConstantPool &ConstantPool, + const yaml::MachineFunction &YamlMF, + const MachineFunction &MF, + DenseMap &ConstantPoolSlots); + + bool initializeJumpTableInfo(MachineFunction &MF, + const yaml::MachineJumpTable &YamlJTI, + PerFunctionMIParsingState &PFS); private: + bool parseMDNode(MDNode *&Node, const yaml::StringValue &Source, + MachineFunction &MF, const PerFunctionMIParsingState &PFS); + + bool parseMBBReference(MachineBasicBlock *&MBB, + const yaml::StringValue &Source, MachineFunction &MF, + const PerFunctionMIParsingState &PFS); + /// Return a MIR diagnostic converted from an MI string diagnostic. SMDiagnostic diagFromMIStringDiag(const SMDiagnostic &Error, SMRange SourceRange); - /// Return a MIR diagnostic converted from an LLVM assembly diagnostic. - SMDiagnostic diagFromLLVMAssemblyDiag(const SMDiagnostic &Error, - SMRange SourceRange); + /// Return a MIR diagnostic converted from a diagnostic located in a YAML + /// block scalar string. + SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error, + SMRange SourceRange); /// Create an empty function with the given name. void createDummyFunction(StringRef Name, Module &M); @@ -200,7 +225,7 @@ std::unique_ptr MIRParserImpl::parse() { M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error, Context, &IRSlots); if (!M) { - reportDiagnostic(diagFromLLVMAssemblyDiag(Error, BSN->getSourceRange())); + reportDiagnostic(diagFromBlockStringDiag(Error, BSN->getSourceRange())); return M; } In.nextDocument(); @@ -261,88 +286,56 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) { MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); MF.setHasInlineAsm(YamlMF.HasInlineAsm); PerFunctionMIParsingState PFS; - if (initializeRegisterInfo(MF, MF.getRegInfo(), YamlMF, - PFS.VirtualRegisterSlots)) + if (initializeRegisterInfo(MF, YamlMF, PFS)) return true; - if (initializeFrameInfo(*MF.getFrameInfo(), YamlMF)) - return true; - - const auto &F = *MF.getFunction(); - for (const auto &YamlMBB : YamlMF.BasicBlocks) { - const BasicBlock *BB = nullptr; - const yaml::StringValue &Name = YamlMBB.Name; - if (!Name.Value.empty()) { - BB = dyn_cast_or_null( - F.getValueSymbolTable().lookup(Name.Value)); - if (!BB) - return error(Name.SourceRange.Start, - Twine("basic block '") + Name.Value + - "' is not defined in the function '" + MF.getName() + - "'"); - } - auto *MBB = MF.CreateMachineBasicBlock(BB); - MF.insert(MF.end(), MBB); - bool WasInserted = - PFS.MBBSlots.insert(std::make_pair(YamlMBB.ID, MBB)).second; - if (!WasInserted) - return error(Twine("redefinition of machine basic block with id #") + - Twine(YamlMBB.ID)); - } - - if (YamlMF.BasicBlocks.empty()) - return error(Twine("machine function '") + Twine(MF.getName()) + - "' requires at least one machine basic block in its body"); - // Initialize the machine basic blocks after creating them all so that the - // machine instructions parser can resolve the MBB references. - unsigned I = 0; - for (const auto &YamlMBB : YamlMF.BasicBlocks) { - if (initializeMachineBasicBlock(MF, *MF.getBlockNumbered(I++), YamlMBB, - PFS)) + if (!YamlMF.Constants.empty()) { + auto *ConstantPool = MF.getConstantPool(); + assert(ConstantPool && "Constant pool must be created"); + if (initializeConstantPool(*ConstantPool, YamlMF, MF, + PFS.ConstantPoolSlots)) return true; } - return false; -} -bool MIRParserImpl::initializeMachineBasicBlock( - MachineFunction &MF, MachineBasicBlock &MBB, - const yaml::MachineBasicBlock &YamlMBB, - const PerFunctionMIParsingState &PFS) { - MBB.setAlignment(YamlMBB.Alignment); - if (YamlMBB.AddressTaken) - MBB.setHasAddressTaken(); - MBB.setIsLandingPad(YamlMBB.IsLandingPad); SMDiagnostic Error; - // Parse the successors. - for (const auto &MBBSource : YamlMBB.Successors) { - MachineBasicBlock *SuccMBB = nullptr; - if (parseMBBReference(SuccMBB, SM, MF, MBBSource.Value, PFS, IRSlots, - Error)) - return error(Error, MBBSource.SourceRange); - // TODO: Report an error when adding the same successor more than once. - MBB.addSuccessor(SuccMBB); + if (parseMachineBasicBlockDefinitions(MF, YamlMF.Body.Value.Value, PFS, + IRSlots, Error)) { + reportDiagnostic( + diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange)); + return true; } - // Parse the liveins. - for (const auto &LiveInSource : YamlMBB.LiveIns) { - unsigned Reg = 0; - if (parseNamedRegisterReference(Reg, SM, MF, LiveInSource.Value, PFS, - IRSlots, Error)) - return error(Error, LiveInSource.SourceRange); - MBB.addLiveIn(Reg); - } - // Parse the instructions. - for (const auto &MISource : YamlMBB.Instructions) { - MachineInstr *MI = nullptr; - if (parseMachineInstr(MI, SM, MF, MISource.Value, PFS, IRSlots, Error)) - return error(Error, MISource.SourceRange); - MBB.insert(MBB.end(), MI); + + if (MF.empty()) + return error(Twine("machine function '") + Twine(MF.getName()) + + "' requires at least one machine basic block in its body"); + // Initialize the frame information after creating all the MBBs so that the + // MBB references in the frame information can be resolved. + if (initializeFrameInfo(MF, YamlMF, PFS)) + return true; + // Initialize the jump table after creating all the MBBs so that the MBB + // references can be resolved. + if (!YamlMF.JumpTableInfo.Entries.empty() && + initializeJumpTableInfo(MF, YamlMF.JumpTableInfo, PFS)) + return true; + // Parse the machine instructions after creating all of the MBBs so that the + // parser can resolve the MBB references. + if (parseMachineInstructions(MF, YamlMF.Body.Value.Value, PFS, IRSlots, + Error)) { + reportDiagnostic( + diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange)); + return true; } + inferRegisterInfo(MF, YamlMF); + // FIXME: This is a temporary workaround until the reserved registers can be + // serialized. + MF.getRegInfo().freezeReservedRegs(MF); + MF.verify(); return false; } -bool MIRParserImpl::initializeRegisterInfo( - const MachineFunction &MF, MachineRegisterInfo &RegInfo, - const yaml::MachineFunction &YamlMF, - DenseMap &VirtualRegisterSlots) { +bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF, + PerFunctionMIParsingState &PFS) { + MachineRegisterInfo &RegInfo = MF.getRegInfo(); assert(RegInfo.isSSA()); if (!YamlMF.IsSSA) RegInfo.leaveSSA(); @@ -351,6 +344,7 @@ bool MIRParserImpl::initializeRegisterInfo( RegInfo.invalidateLiveness(); RegInfo.enableSubRegLiveness(YamlMF.TracksSubRegLiveness); + SMDiagnostic Error; // Parse the virtual register information. for (const auto &VReg : YamlMF.VirtualRegisters) { const auto *RC = getRegClass(MF, VReg.Class.Value); @@ -359,15 +353,71 @@ bool MIRParserImpl::initializeRegisterInfo( Twine("use of undefined register class '") + VReg.Class.Value + "'"); unsigned Reg = RegInfo.createVirtualRegister(RC); - // TODO: Report an error when the same virtual register with the same ID is - // redefined. - VirtualRegisterSlots.insert(std::make_pair(VReg.ID, Reg)); + if (!PFS.VirtualRegisterSlots.insert(std::make_pair(VReg.ID.Value, Reg)) + .second) + return error(VReg.ID.SourceRange.Start, + Twine("redefinition of virtual register '%") + + Twine(VReg.ID.Value) + "'"); + if (!VReg.PreferredRegister.Value.empty()) { + unsigned PreferredReg = 0; + if (parseNamedRegisterReference(PreferredReg, SM, MF, + VReg.PreferredRegister.Value, PFS, + IRSlots, Error)) + return error(Error, VReg.PreferredRegister.SourceRange); + RegInfo.setSimpleHint(Reg, PreferredReg); + } } + + // Parse the liveins. + for (const auto &LiveIn : YamlMF.LiveIns) { + unsigned Reg = 0; + if (parseNamedRegisterReference(Reg, SM, MF, LiveIn.Register.Value, PFS, + IRSlots, Error)) + return error(Error, LiveIn.Register.SourceRange); + unsigned VReg = 0; + if (!LiveIn.VirtualRegister.Value.empty()) { + if (parseVirtualRegisterReference( + VReg, SM, MF, LiveIn.VirtualRegister.Value, PFS, IRSlots, Error)) + return error(Error, LiveIn.VirtualRegister.SourceRange); + } + RegInfo.addLiveIn(Reg, VReg); + } + + // Parse the callee saved register mask. + BitVector CalleeSavedRegisterMask(RegInfo.getUsedPhysRegsMask().size()); + if (!YamlMF.CalleeSavedRegisters) + return false; + for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) { + unsigned Reg = 0; + if (parseNamedRegisterReference(Reg, SM, MF, RegSource.Value, PFS, IRSlots, + Error)) + return error(Error, RegSource.SourceRange); + CalleeSavedRegisterMask[Reg] = true; + } + RegInfo.setUsedPhysRegMask(CalleeSavedRegisterMask.flip()); return false; } -bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI, - const yaml::MachineFunction &YamlMF) { +void MIRParserImpl::inferRegisterInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF) { + if (YamlMF.CalleeSavedRegisters) + return; + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isRegMask()) + continue; + MF.getRegInfo().addPhysRegsUsedFromRegMask(MO.getRegMask()); + } + } + } +} + +bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF, + PerFunctionMIParsingState &PFS) { + MachineFrameInfo &MFI = *MF.getFrameInfo(); + const Function &F = *MF.getFunction(); const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo; MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken); MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken); @@ -383,7 +433,20 @@ bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI, MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment); MFI.setHasVAStart(YamlMFI.HasVAStart); MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc); + if (!YamlMFI.SavePoint.Value.empty()) { + MachineBasicBlock *MBB = nullptr; + if (parseMBBReference(MBB, YamlMFI.SavePoint, MF, PFS)) + return true; + MFI.setSavePoint(MBB); + } + if (!YamlMFI.RestorePoint.Value.empty()) { + MachineBasicBlock *MBB = nullptr; + if (parseMBBReference(MBB, YamlMFI.RestorePoint, MF, PFS)) + return true; + MFI.setRestorePoint(MBB); + } + std::vector CSIInfo; // Initialize the fixed frame objects. for (const auto &Object : YamlMF.FixedStackObjects) { int ObjectIdx; @@ -393,24 +456,187 @@ bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI, else ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset); MFI.setObjectAlignment(ObjectIdx, Object.Alignment); - // TODO: Store the mapping between fixed object IDs and object indices to - // parse fixed stack object references correctly. + if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value, + ObjectIdx)) + .second) + return error(Object.ID.SourceRange.Start, + Twine("redefinition of fixed stack object '%fixed-stack.") + + Twine(Object.ID.Value) + "'"); + if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister, + ObjectIdx)) + return true; } // Initialize the ordinary frame objects. for (const auto &Object : YamlMF.StackObjects) { int ObjectIdx; + const AllocaInst *Alloca = nullptr; + const yaml::StringValue &Name = Object.Name; + if (!Name.Value.empty()) { + Alloca = dyn_cast_or_null( + F.getValueSymbolTable().lookup(Name.Value)); + if (!Alloca) + return error(Name.SourceRange.Start, + "alloca instruction named '" + Name.Value + + "' isn't defined in the function '" + F.getName() + + "'"); + } if (Object.Type == yaml::MachineStackObject::VariableSized) - ObjectIdx = - MFI.CreateVariableSizedObject(Object.Alignment, /*Alloca=*/nullptr); + ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca); else ObjectIdx = MFI.CreateStackObject( Object.Size, Object.Alignment, - Object.Type == yaml::MachineStackObject::SpillSlot); + Object.Type == yaml::MachineStackObject::SpillSlot, Alloca); MFI.setObjectOffset(ObjectIdx, Object.Offset); - // TODO: Store the mapping between object IDs and object indices to parse - // stack object references correctly. + if (!PFS.StackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx)) + .second) + return error(Object.ID.SourceRange.Start, + Twine("redefinition of stack object '%stack.") + + Twine(Object.ID.Value) + "'"); + if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister, + ObjectIdx)) + return true; + if (Object.LocalOffset) + MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue()); + if (parseStackObjectsDebugInfo(MF, PFS, Object, ObjectIdx)) + return true; } + MFI.setCalleeSavedInfo(CSIInfo); + if (!CSIInfo.empty()) + MFI.setCalleeSavedInfoValid(true); + + // Initialize the various stack object references after initializing the + // stack objects. + if (!YamlMFI.StackProtector.Value.empty()) { + SMDiagnostic Error; + int FI; + if (parseStackObjectReference(FI, SM, MF, YamlMFI.StackProtector.Value, PFS, + IRSlots, Error)) + return error(Error, YamlMFI.StackProtector.SourceRange); + MFI.setStackProtectorIndex(FI); + } + return false; +} + +bool MIRParserImpl::parseCalleeSavedRegister( + MachineFunction &MF, PerFunctionMIParsingState &PFS, + std::vector &CSIInfo, + const yaml::StringValue &RegisterSource, int FrameIdx) { + if (RegisterSource.Value.empty()) + return false; + unsigned Reg = 0; + SMDiagnostic Error; + if (parseNamedRegisterReference(Reg, SM, MF, RegisterSource.Value, PFS, + IRSlots, Error)) + return error(Error, RegisterSource.SourceRange); + CSIInfo.push_back(CalleeSavedInfo(Reg, FrameIdx)); + return false; +} + +/// Verify that given node is of a certain type. Return true on error. +template +static bool typecheckMDNode(T *&Result, MDNode *Node, + const yaml::StringValue &Source, + StringRef TypeString, MIRParserImpl &Parser) { + if (!Node) + return false; + Result = dyn_cast(Node); + if (!Result) + return Parser.error(Source.SourceRange.Start, + "expected a reference to a '" + TypeString + + "' metadata node"); + return false; +} + +bool MIRParserImpl::parseStackObjectsDebugInfo( + MachineFunction &MF, PerFunctionMIParsingState &PFS, + const yaml::MachineStackObject &Object, int FrameIdx) { + // Debug information can only be attached to stack objects; Fixed stack + // objects aren't supported. + assert(FrameIdx >= 0 && "Expected a stack object frame index"); + MDNode *Var = nullptr, *Expr = nullptr, *Loc = nullptr; + if (parseMDNode(Var, Object.DebugVar, MF, PFS) || + parseMDNode(Expr, Object.DebugExpr, MF, PFS) || + parseMDNode(Loc, Object.DebugLoc, MF, PFS)) + return true; + if (!Var && !Expr && !Loc) + return false; + DILocalVariable *DIVar = nullptr; + DIExpression *DIExpr = nullptr; + DILocation *DILoc = nullptr; + if (typecheckMDNode(DIVar, Var, Object.DebugVar, "DILocalVariable", *this) || + typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) || + typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this)) + return true; + MF.getMMI().setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc); + return false; +} + +bool MIRParserImpl::parseMDNode(MDNode *&Node, const yaml::StringValue &Source, + MachineFunction &MF, + const PerFunctionMIParsingState &PFS) { + if (Source.Value.empty()) + return false; + SMDiagnostic Error; + if (llvm::parseMDNode(Node, SM, MF, Source.Value, PFS, IRSlots, Error)) + return error(Error, Source.SourceRange); + return false; +} + +bool MIRParserImpl::initializeConstantPool( + MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF, + const MachineFunction &MF, + DenseMap &ConstantPoolSlots) { + const auto &M = *MF.getFunction()->getParent(); + SMDiagnostic Error; + for (const auto &YamlConstant : YamlMF.Constants) { + const Constant *Value = dyn_cast_or_null( + parseConstantValue(YamlConstant.Value.Value, Error, M)); + if (!Value) + return error(Error, YamlConstant.Value.SourceRange); + unsigned Alignment = + YamlConstant.Alignment + ? YamlConstant.Alignment + : M.getDataLayout().getPrefTypeAlignment(Value->getType()); + unsigned Index = ConstantPool.getConstantPoolIndex(Value, Alignment); + if (!ConstantPoolSlots.insert(std::make_pair(YamlConstant.ID.Value, Index)) + .second) + return error(YamlConstant.ID.SourceRange.Start, + Twine("redefinition of constant pool item '%const.") + + Twine(YamlConstant.ID.Value) + "'"); + } + return false; +} + +bool MIRParserImpl::initializeJumpTableInfo( + MachineFunction &MF, const yaml::MachineJumpTable &YamlJTI, + PerFunctionMIParsingState &PFS) { + MachineJumpTableInfo *JTI = MF.getOrCreateJumpTableInfo(YamlJTI.Kind); + for (const auto &Entry : YamlJTI.Entries) { + std::vector Blocks; + for (const auto &MBBSource : Entry.Blocks) { + MachineBasicBlock *MBB = nullptr; + if (parseMBBReference(MBB, MBBSource.Value, MF, PFS)) + return true; + Blocks.push_back(MBB); + } + unsigned Index = JTI->createJumpTableIndex(Blocks); + if (!PFS.JumpTableSlots.insert(std::make_pair(Entry.ID.Value, Index)) + .second) + return error(Entry.ID.SourceRange.Start, + Twine("redefinition of jump table entry '%jump-table.") + + Twine(Entry.ID.Value) + "'"); + } + return false; +} + +bool MIRParserImpl::parseMBBReference(MachineBasicBlock *&MBB, + const yaml::StringValue &Source, + MachineFunction &MF, + const PerFunctionMIParsingState &PFS) { + SMDiagnostic Error; + if (llvm::parseMBBReference(MBB, SM, MF, Source.Value, PFS, IRSlots, Error)) + return error(Error, Source.SourceRange); return false; } @@ -430,8 +656,8 @@ SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error, Error.getFixIts()); } -SMDiagnostic MIRParserImpl::diagFromLLVMAssemblyDiag(const SMDiagnostic &Error, - SMRange SourceRange) { +SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error, + SMRange SourceRange) { assert(SourceRange.isValid()); // Translate the location of the error from the location in the llvm IR string diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index d5cf9244199e..175cb0d51437 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -14,13 +14,20 @@ #include "MIRPrinter.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/YAMLTraits.h" @@ -31,11 +38,38 @@ using namespace llvm; namespace { +/// This structure describes how to print out stack object references. +struct FrameIndexOperand { + std::string Name; + unsigned ID; + bool IsFixed; + + FrameIndexOperand(StringRef Name, unsigned ID, bool IsFixed) + : Name(Name.str()), ID(ID), IsFixed(IsFixed) {} + + /// Return an ordinary stack object reference. + static FrameIndexOperand create(StringRef Name, unsigned ID) { + return FrameIndexOperand(Name, ID, /*IsFixed=*/false); + } + + /// Return a fixed stack object reference. + static FrameIndexOperand createFixed(unsigned ID) { + return FrameIndexOperand("", ID, /*IsFixed=*/true); + } +}; + +} // end anonymous namespace + +namespace llvm { + /// This class prints out the machine functions using the MIR serialization /// format. class MIRPrinter { raw_ostream &OS; DenseMap RegisterMaskIds; + /// Maps from stack object indices to operand indices which will be used when + /// printing frame index machine operands. + DenseMap StackObjectOperandMapping; public: MIRPrinter(raw_ostream &OS) : OS(OS) {} @@ -44,11 +78,16 @@ public: void convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo, const TargetRegisterInfo *TRI); - void convert(yaml::MachineFrameInfo &YamlMFI, const MachineFrameInfo &MFI); - void convert(ModuleSlotTracker &MST, yaml::MachineBasicBlock &YamlMBB, - const MachineBasicBlock &MBB); + void convert(ModuleSlotTracker &MST, yaml::MachineFrameInfo &YamlMFI, + const MachineFrameInfo &MFI); + void convert(yaml::MachineFunction &MF, + const MachineConstantPool &ConstantPool); + void convert(ModuleSlotTracker &MST, yaml::MachineJumpTable &YamlJTI, + const MachineJumpTableInfo &JTI); void convertStackObjects(yaml::MachineFunction &MF, - const MachineFrameInfo &MFI); + const MachineFrameInfo &MFI, MachineModuleInfo &MMI, + ModuleSlotTracker &MST, + const TargetRegisterInfo *TRI); private: void initRegisterMaskIds(const MachineFunction &MF); @@ -60,18 +99,32 @@ class MIPrinter { raw_ostream &OS; ModuleSlotTracker &MST; const DenseMap &RegisterMaskIds; + const DenseMap &StackObjectOperandMapping; public: MIPrinter(raw_ostream &OS, ModuleSlotTracker &MST, - const DenseMap &RegisterMaskIds) - : OS(OS), MST(MST), RegisterMaskIds(RegisterMaskIds) {} + const DenseMap &RegisterMaskIds, + const DenseMap &StackObjectOperandMapping) + : OS(OS), MST(MST), RegisterMaskIds(RegisterMaskIds), + StackObjectOperandMapping(StackObjectOperandMapping) {} + + void print(const MachineBasicBlock &MBB); void print(const MachineInstr &MI); void printMBBReference(const MachineBasicBlock &MBB); - void print(const MachineOperand &Op, const TargetRegisterInfo *TRI); + void printIRBlockReference(const BasicBlock &BB); + void printIRValueReference(const Value &V); + void printStackObjectReference(int FrameIndex); + void printOffset(int64_t Offset); + void printTargetFlags(const MachineOperand &Op); + void print(const MachineOperand &Op, const TargetRegisterInfo *TRI, + unsigned I, bool ShouldPrintRegisterTies, bool IsDef = false); + void print(const MachineMemOperand &Op); + + void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI); }; -} // end anonymous namespace +} // end namespace llvm namespace llvm { namespace yaml { @@ -103,6 +156,12 @@ static void printReg(unsigned Reg, raw_ostream &OS, llvm_unreachable("Can't print this kind of register yet"); } +static void printReg(unsigned Reg, yaml::StringValue &Dest, + const TargetRegisterInfo *TRI) { + raw_string_ostream OS(Dest.Value); + printReg(Reg, OS, TRI); +} + void MIRPrinter::print(const MachineFunction &MF) { initRegisterMaskIds(MF); @@ -112,23 +171,25 @@ void MIRPrinter::print(const MachineFunction &MF) { YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); YamlMF.HasInlineAsm = MF.hasInlineAsm(); convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo()); - convert(YamlMF.FrameInfo, *MF.getFrameInfo()); - convertStackObjects(YamlMF, *MF.getFrameInfo()); - - int I = 0; ModuleSlotTracker MST(MF.getFunction()->getParent()); + MST.incorporateFunction(*MF.getFunction()); + convert(MST, YamlMF.FrameInfo, *MF.getFrameInfo()); + convertStackObjects(YamlMF, *MF.getFrameInfo(), MF.getMMI(), MST, + MF.getSubtarget().getRegisterInfo()); + if (const auto *ConstantPool = MF.getConstantPool()) + convert(YamlMF, *ConstantPool); + if (const auto *JumpTableInfo = MF.getJumpTableInfo()) + convert(MST, YamlMF.JumpTableInfo, *JumpTableInfo); + raw_string_ostream StrOS(YamlMF.Body.Value.Value); + bool IsNewlineNeeded = false; for (const auto &MBB : MF) { - // TODO: Allow printing of non sequentially numbered MBBs. - // This is currently needed as the basic block references get their index - // from MBB.getNumber(), thus it should be sequential so that the parser can - // map back to the correct MBBs when parsing the output. - assert(MBB.getNumber() == I++ && - "Can't print MBBs that aren't sequentially numbered"); - (void)I; - yaml::MachineBasicBlock YamlMBB; - convert(MST, YamlMBB, MBB); - YamlMF.BasicBlocks.push_back(YamlMBB); + if (IsNewlineNeeded) + StrOS << "\n"; + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .print(MBB); + IsNewlineNeeded = true; } + StrOS.flush(); yaml::Output Out(OS); Out << YamlMF; } @@ -147,11 +208,38 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, VReg.ID = I; VReg.Class = StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower(); + unsigned PreferredReg = RegInfo.getSimpleHint(Reg); + if (PreferredReg) + printReg(PreferredReg, VReg.PreferredRegister, TRI); MF.VirtualRegisters.push_back(VReg); } + + // Print the live ins. + for (auto I = RegInfo.livein_begin(), E = RegInfo.livein_end(); I != E; ++I) { + yaml::MachineFunctionLiveIn LiveIn; + printReg(I->first, LiveIn.Register, TRI); + if (I->second) + printReg(I->second, LiveIn.VirtualRegister, TRI); + MF.LiveIns.push_back(LiveIn); + } + // The used physical register mask is printed as an inverted callee saved + // register mask. + const BitVector &UsedPhysRegMask = RegInfo.getUsedPhysRegsMask(); + if (UsedPhysRegMask.none()) + return; + std::vector CalleeSavedRegisters; + for (unsigned I = 0, E = UsedPhysRegMask.size(); I != E; ++I) { + if (!UsedPhysRegMask[I]) { + yaml::FlowStringValue Reg; + printReg(I, Reg, TRI); + CalleeSavedRegisters.push_back(Reg); + } + } + MF.CalleeSavedRegisters = CalleeSavedRegisters; } -void MIRPrinter::convert(yaml::MachineFrameInfo &YamlMFI, +void MIRPrinter::convert(ModuleSlotTracker &MST, + yaml::MachineFrameInfo &YamlMFI, const MachineFrameInfo &MFI) { YamlMFI.IsFrameAddressTaken = MFI.isFrameAddressTaken(); YamlMFI.IsReturnAddressTaken = MFI.isReturnAddressTaken(); @@ -166,10 +254,23 @@ void MIRPrinter::convert(yaml::MachineFrameInfo &YamlMFI, YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment(); YamlMFI.HasVAStart = MFI.hasVAStart(); YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc(); + if (MFI.getSavePoint()) { + raw_string_ostream StrOS(YamlMFI.SavePoint.Value); + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .printMBBReference(*MFI.getSavePoint()); + } + if (MFI.getRestorePoint()) { + raw_string_ostream StrOS(YamlMFI.RestorePoint.Value); + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .printMBBReference(*MFI.getRestorePoint()); + } } void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, - const MachineFrameInfo &MFI) { + const MachineFrameInfo &MFI, + MachineModuleInfo &MMI, + ModuleSlotTracker &MST, + const TargetRegisterInfo *TRI) { // Process fixed stack objects. unsigned ID = 0; for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { @@ -177,7 +278,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, continue; yaml::FixedMachineStackObject YamlObject; - YamlObject.ID = ID++; + YamlObject.ID = ID; YamlObject.Type = MFI.isSpillSlotObjectIndex(I) ? yaml::FixedMachineStackObject::SpillSlot : yaml::FixedMachineStackObject::DefaultType; @@ -187,8 +288,8 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I); YamlObject.IsAliased = MFI.isAliasedObjectIndex(I); MF.FixedStackObjects.push_back(YamlObject); - // TODO: Store the mapping between fixed object IDs and object indices to - // print the fixed stack object references correctly. + StackObjectOperandMapping.insert( + std::make_pair(I, FrameIndexOperand::createFixed(ID++))); } // Process ordinary stack objects. @@ -198,7 +299,10 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, continue; yaml::MachineStackObject YamlObject; - YamlObject.ID = ID++; + YamlObject.ID = ID; + if (const auto *Alloca = MFI.getObjectAllocation(I)) + YamlObject.Name.Value = + Alloca->hasName() ? Alloca->getName() : ""; YamlObject.Type = MFI.isSpillSlotObjectIndex(I) ? yaml::MachineStackObject::SpillSlot : MFI.isVariableSizedObjectIndex(I) @@ -209,47 +313,100 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, YamlObject.Alignment = MFI.getObjectAlignment(I); MF.StackObjects.push_back(YamlObject); - // TODO: Store the mapping between object IDs and object indices to print - // the stack object references correctly. + StackObjectOperandMapping.insert(std::make_pair( + I, FrameIndexOperand::create(YamlObject.Name.Value, ID++))); + } + + for (const auto &CSInfo : MFI.getCalleeSavedInfo()) { + yaml::StringValue Reg; + printReg(CSInfo.getReg(), Reg, TRI); + auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx()); + assert(StackObjectInfo != StackObjectOperandMapping.end() && + "Invalid stack object index"); + const FrameIndexOperand &StackObject = StackObjectInfo->second; + if (StackObject.IsFixed) + MF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg; + else + MF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg; + } + for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) { + auto LocalObject = MFI.getLocalFrameObjectMap(I); + auto StackObjectInfo = StackObjectOperandMapping.find(LocalObject.first); + assert(StackObjectInfo != StackObjectOperandMapping.end() && + "Invalid stack object index"); + const FrameIndexOperand &StackObject = StackObjectInfo->second; + assert(!StackObject.IsFixed && "Expected a locally mapped stack object"); + MF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second; + } + + // Print the stack object references in the frame information class after + // converting the stack objects. + if (MFI.hasStackProtectorIndex()) { + raw_string_ostream StrOS(MF.FrameInfo.StackProtector.Value); + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .printStackObjectReference(MFI.getStackProtectorIndex()); + } + + // Print the debug variable information. + for (MachineModuleInfo::VariableDbgInfo &DebugVar : + MMI.getVariableDbgInfo()) { + auto StackObjectInfo = StackObjectOperandMapping.find(DebugVar.Slot); + assert(StackObjectInfo != StackObjectOperandMapping.end() && + "Invalid stack object index"); + const FrameIndexOperand &StackObject = StackObjectInfo->second; + assert(!StackObject.IsFixed && "Expected a non-fixed stack object"); + auto &Object = MF.StackObjects[StackObject.ID]; + { + raw_string_ostream StrOS(Object.DebugVar.Value); + DebugVar.Var->printAsOperand(StrOS, MST); + } + { + raw_string_ostream StrOS(Object.DebugExpr.Value); + DebugVar.Expr->printAsOperand(StrOS, MST); + } + { + raw_string_ostream StrOS(Object.DebugLoc.Value); + DebugVar.Loc->printAsOperand(StrOS, MST); + } + } +} + +void MIRPrinter::convert(yaml::MachineFunction &MF, + const MachineConstantPool &ConstantPool) { + unsigned ID = 0; + for (const MachineConstantPoolEntry &Constant : ConstantPool.getConstants()) { + // TODO: Serialize target specific constant pool entries. + if (Constant.isMachineConstantPoolEntry()) + llvm_unreachable("Can't print target specific constant pool entries yet"); + + yaml::MachineConstantPoolValue YamlConstant; + std::string Str; + raw_string_ostream StrOS(Str); + Constant.Val.ConstVal->printAsOperand(StrOS); + YamlConstant.ID = ID++; + YamlConstant.Value = StrOS.str(); + YamlConstant.Alignment = Constant.getAlignment(); + MF.Constants.push_back(YamlConstant); } } void MIRPrinter::convert(ModuleSlotTracker &MST, - yaml::MachineBasicBlock &YamlMBB, - const MachineBasicBlock &MBB) { - assert(MBB.getNumber() >= 0 && "Invalid MBB number"); - YamlMBB.ID = (unsigned)MBB.getNumber(); - // TODO: Serialize unnamed BB references. - if (const auto *BB = MBB.getBasicBlock()) - YamlMBB.Name.Value = BB->hasName() ? BB->getName() : ""; - else - YamlMBB.Name.Value = ""; - YamlMBB.Alignment = MBB.getAlignment(); - YamlMBB.AddressTaken = MBB.hasAddressTaken(); - YamlMBB.IsLandingPad = MBB.isLandingPad(); - for (const auto *SuccMBB : MBB.successors()) { + yaml::MachineJumpTable &YamlJTI, + const MachineJumpTableInfo &JTI) { + YamlJTI.Kind = JTI.getEntryKind(); + unsigned ID = 0; + for (const auto &Table : JTI.getJumpTables()) { std::string Str; - raw_string_ostream StrOS(Str); - MIPrinter(StrOS, MST, RegisterMaskIds).printMBBReference(*SuccMBB); - YamlMBB.Successors.push_back(StrOS.str()); - } - // Print the live in registers. - const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); - assert(TRI && "Expected target register info"); - for (auto I = MBB.livein_begin(), E = MBB.livein_end(); I != E; ++I) { - std::string Str; - raw_string_ostream StrOS(Str); - printReg(*I, StrOS, TRI); - YamlMBB.LiveIns.push_back(StrOS.str()); - } - // Print the machine instructions. - YamlMBB.Instructions.reserve(MBB.size()); - std::string Str; - for (const auto &MI : MBB) { - raw_string_ostream StrOS(Str); - MIPrinter(StrOS, MST, RegisterMaskIds).print(MI); - YamlMBB.Instructions.push_back(StrOS.str()); - Str.clear(); + yaml::MachineJumpTable::Entry Entry; + Entry.ID = ID++; + for (const auto *MBB : Table.MBBs) { + raw_string_ostream StrOS(Str); + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .printMBBReference(*MBB); + Entry.Blocks.push_back(StrOS.str()); + Str.clear(); + } + YamlJTI.Entries.push_back(Entry); } } @@ -260,26 +417,137 @@ void MIRPrinter::initRegisterMaskIds(const MachineFunction &MF) { RegisterMaskIds.insert(std::make_pair(Mask, I++)); } +void MIPrinter::print(const MachineBasicBlock &MBB) { + assert(MBB.getNumber() >= 0 && "Invalid MBB number"); + OS << "bb." << MBB.getNumber(); + bool HasAttributes = false; + if (const auto *BB = MBB.getBasicBlock()) { + if (BB->hasName()) { + OS << "." << BB->getName(); + } else { + HasAttributes = true; + OS << " ("; + int Slot = MST.getLocalSlot(BB); + if (Slot == -1) + OS << ""; + else + OS << (Twine("%ir-block.") + Twine(Slot)).str(); + } + } + if (MBB.hasAddressTaken()) { + OS << (HasAttributes ? ", " : " ("); + OS << "address-taken"; + HasAttributes = true; + } + if (MBB.isEHPad()) { + OS << (HasAttributes ? ", " : " ("); + OS << "landing-pad"; + HasAttributes = true; + } + if (MBB.getAlignment()) { + OS << (HasAttributes ? ", " : " ("); + OS << "align " << MBB.getAlignment(); + HasAttributes = true; + } + if (HasAttributes) + OS << ")"; + OS << ":\n"; + + bool HasLineAttributes = false; + // Print the successors + if (!MBB.succ_empty()) { + OS.indent(2) << "successors: "; + for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) { + if (I != MBB.succ_begin()) + OS << ", "; + printMBBReference(**I); + if (MBB.hasSuccessorProbabilities()) + OS << '(' << MBB.getSuccProbability(I) << ')'; + } + OS << "\n"; + HasLineAttributes = true; + } + + // Print the live in registers. + const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + if (!MBB.livein_empty()) { + OS.indent(2) << "liveins: "; + bool First = true; + for (const auto &LI : MBB.liveins()) { + if (!First) + OS << ", "; + First = false; + printReg(LI.PhysReg, OS, TRI); + if (LI.LaneMask != ~0u) + OS << ':' << PrintLaneMask(LI.LaneMask); + } + OS << "\n"; + HasLineAttributes = true; + } + + if (HasLineAttributes) + OS << "\n"; + bool IsInBundle = false; + for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; ++I) { + const MachineInstr &MI = *I; + if (IsInBundle && !MI.isInsideBundle()) { + OS.indent(2) << "}\n"; + IsInBundle = false; + } + OS.indent(IsInBundle ? 4 : 2); + print(MI); + if (!IsInBundle && MI.getFlag(MachineInstr::BundledSucc)) { + OS << " {"; + IsInBundle = true; + } + OS << "\n"; + } + if (IsInBundle) + OS.indent(2) << "}\n"; +} + +/// Return true when an instruction has tied register that can't be determined +/// by the instruction's descriptor. +static bool hasComplexRegisterTies(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { + const auto &Operand = MI.getOperand(I); + if (!Operand.isReg() || Operand.isDef()) + // Ignore the defined registers as MCID marks only the uses as tied. + continue; + int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO); + int TiedIdx = Operand.isTied() ? int(MI.findTiedOperandIdx(I)) : -1; + if (ExpectedTiedIdx != TiedIdx) + return true; + } + return false; +} + void MIPrinter::print(const MachineInstr &MI) { const auto &SubTarget = MI.getParent()->getParent()->getSubtarget(); const auto *TRI = SubTarget.getRegisterInfo(); assert(TRI && "Expected target register info"); const auto *TII = SubTarget.getInstrInfo(); assert(TII && "Expected target instruction info"); + if (MI.isCFIInstruction()) + assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction"); + bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI); unsigned I = 0, E = MI.getNumOperands(); for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() && !MI.getOperand(I).isImplicit(); ++I) { if (I) OS << ", "; - print(MI.getOperand(I), TRI); + print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, /*IsDef=*/true); } if (I) OS << " = "; + if (MI.getFlag(MachineInstr::FrameSetup)) + OS << "frame-setup "; OS << TII->getName(MI.getOpcode()); - // TODO: Print the instruction flags, machine mem operands. if (I < E) OS << ' '; @@ -287,9 +555,27 @@ void MIPrinter::print(const MachineInstr &MI) { for (; I < E; ++I) { if (NeedComma) OS << ", "; - print(MI.getOperand(I), TRI); + print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies); NeedComma = true; } + + if (MI.getDebugLoc()) { + if (NeedComma) + OS << ','; + OS << " debug-location "; + MI.getDebugLoc()->printAsOperand(OS, MST); + } + + if (!MI.memoperands_empty()) { + OS << " :: "; + bool NeedComma = false; + for (const auto *Op : MI.memoperands()) { + if (NeedComma) + OS << ", "; + print(*Op); + NeedComma = true; + } + } } void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) { @@ -300,32 +586,225 @@ void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) { } } -void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) { +static void printIRSlotNumber(raw_ostream &OS, int Slot) { + if (Slot == -1) + OS << ""; + else + OS << Slot; +} + +void MIPrinter::printIRBlockReference(const BasicBlock &BB) { + OS << "%ir-block."; + if (BB.hasName()) { + printLLVMNameWithoutPrefix(OS, BB.getName()); + return; + } + const Function *F = BB.getParent(); + int Slot; + if (F == MST.getCurrentFunction()) { + Slot = MST.getLocalSlot(&BB); + } else { + ModuleSlotTracker CustomMST(F->getParent(), + /*ShouldInitializeAllMetadata=*/false); + CustomMST.incorporateFunction(*F); + Slot = CustomMST.getLocalSlot(&BB); + } + printIRSlotNumber(OS, Slot); +} + +void MIPrinter::printIRValueReference(const Value &V) { + if (isa(V)) { + V.printAsOperand(OS, /*PrintType=*/false, MST); + return; + } + if (isa(V)) { + // Machine memory operands can load/store to/from constant value pointers. + OS << '`'; + V.printAsOperand(OS, /*PrintType=*/true, MST); + OS << '`'; + return; + } + OS << "%ir."; + if (V.hasName()) { + printLLVMNameWithoutPrefix(OS, V.getName()); + return; + } + printIRSlotNumber(OS, MST.getLocalSlot(&V)); +} + +void MIPrinter::printStackObjectReference(int FrameIndex) { + auto ObjectInfo = StackObjectOperandMapping.find(FrameIndex); + assert(ObjectInfo != StackObjectOperandMapping.end() && + "Invalid frame index"); + const FrameIndexOperand &Operand = ObjectInfo->second; + if (Operand.IsFixed) { + OS << "%fixed-stack." << Operand.ID; + return; + } + OS << "%stack." << Operand.ID; + if (!Operand.Name.empty()) + OS << '.' << Operand.Name; +} + +void MIPrinter::printOffset(int64_t Offset) { + if (Offset == 0) + return; + if (Offset < 0) { + OS << " - " << -Offset; + return; + } + OS << " + " << Offset; +} + +static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) { + auto Flags = TII->getSerializableDirectMachineOperandTargetFlags(); + for (const auto &I : Flags) { + if (I.first == TF) { + return I.second; + } + } + return nullptr; +} + +void MIPrinter::printTargetFlags(const MachineOperand &Op) { + if (!Op.getTargetFlags()) + return; + const auto *TII = + Op.getParent()->getParent()->getParent()->getSubtarget().getInstrInfo(); + assert(TII && "expected instruction info"); + auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags()); + OS << "target-flags("; + const bool HasDirectFlags = Flags.first; + const bool HasBitmaskFlags = Flags.second; + if (!HasDirectFlags && !HasBitmaskFlags) { + OS << ") "; + return; + } + if (HasDirectFlags) { + if (const auto *Name = getTargetFlagName(TII, Flags.first)) + OS << Name; + else + OS << ""; + } + if (!HasBitmaskFlags) { + OS << ") "; + return; + } + bool IsCommaNeeded = HasDirectFlags; + unsigned BitMask = Flags.second; + auto BitMasks = TII->getSerializableBitmaskMachineOperandTargetFlags(); + for (const auto &Mask : BitMasks) { + // Check if the flag's bitmask has the bits of the current mask set. + if ((BitMask & Mask.first) == Mask.first) { + if (IsCommaNeeded) + OS << ", "; + IsCommaNeeded = true; + OS << Mask.second; + // Clear the bits which were serialized from the flag's bitmask. + BitMask &= ~(Mask.first); + } + } + if (BitMask) { + // When the resulting flag's bitmask isn't zero, we know that we didn't + // serialize all of the bit flags. + if (IsCommaNeeded) + OS << ", "; + OS << ""; + } + OS << ") "; +} + +static const char *getTargetIndexName(const MachineFunction &MF, int Index) { + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "expected instruction info"); + auto Indices = TII->getSerializableTargetIndices(); + for (const auto &I : Indices) { + if (I.first == Index) { + return I.second; + } + } + return nullptr; +} + +void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, + unsigned I, bool ShouldPrintRegisterTies, bool IsDef) { + printTargetFlags(Op); switch (Op.getType()) { case MachineOperand::MO_Register: - // TODO: Print the other register flags. if (Op.isImplicit()) OS << (Op.isDef() ? "implicit-def " : "implicit "); + else if (!IsDef && Op.isDef()) + // Print the 'def' flag only when the operand is defined after '='. + OS << "def "; + if (Op.isInternalRead()) + OS << "internal "; if (Op.isDead()) OS << "dead "; if (Op.isKill()) OS << "killed "; if (Op.isUndef()) OS << "undef "; + if (Op.isEarlyClobber()) + OS << "early-clobber "; + if (Op.isDebug()) + OS << "debug-use "; printReg(Op.getReg(), OS, TRI); // Print the sub register. if (Op.getSubReg() != 0) OS << ':' << TRI->getSubRegIndexName(Op.getSubReg()); + if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef()) + OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")"; break; case MachineOperand::MO_Immediate: OS << Op.getImm(); break; + case MachineOperand::MO_CImmediate: + Op.getCImm()->printAsOperand(OS, /*PrintType=*/true, MST); + break; + case MachineOperand::MO_FPImmediate: + Op.getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST); + break; case MachineOperand::MO_MachineBasicBlock: printMBBReference(*Op.getMBB()); break; + case MachineOperand::MO_FrameIndex: + printStackObjectReference(Op.getIndex()); + break; + case MachineOperand::MO_ConstantPoolIndex: + OS << "%const." << Op.getIndex(); + printOffset(Op.getOffset()); + break; + case MachineOperand::MO_TargetIndex: { + OS << "target-index("; + if (const auto *Name = getTargetIndexName( + *Op.getParent()->getParent()->getParent(), Op.getIndex())) + OS << Name; + else + OS << ""; + OS << ')'; + printOffset(Op.getOffset()); + break; + } + case MachineOperand::MO_JumpTableIndex: + OS << "%jump-table." << Op.getIndex(); + break; + case MachineOperand::MO_ExternalSymbol: + OS << '$'; + printLLVMNameWithoutPrefix(OS, Op.getSymbolName()); + printOffset(Op.getOffset()); + break; case MachineOperand::MO_GlobalAddress: Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST); - // TODO: Print offset and target flags. + printOffset(Op.getOffset()); + break; + case MachineOperand::MO_BlockAddress: + OS << "blockaddress("; + Op.getBlockAddress()->getFunction()->printAsOperand(OS, /*PrintType=*/false, + MST); + OS << ", "; + printIRBlockReference(*Op.getBlockAddress()->getBasicBlock()); + OS << ')'; + printOffset(Op.getOffset()); break; case MachineOperand::MO_RegisterMask: { auto RegMaskInfo = RegisterMaskIds.find(Op.getRegMask()); @@ -335,9 +814,157 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) { llvm_unreachable("Can't print this machine register mask yet."); break; } + case MachineOperand::MO_RegisterLiveOut: { + const uint32_t *RegMask = Op.getRegLiveOut(); + OS << "liveout("; + bool IsCommaNeeded = false; + for (unsigned Reg = 0, E = TRI->getNumRegs(); Reg < E; ++Reg) { + if (RegMask[Reg / 32] & (1U << (Reg % 32))) { + if (IsCommaNeeded) + OS << ", "; + printReg(Reg, OS, TRI); + IsCommaNeeded = true; + } + } + OS << ")"; + break; + } + case MachineOperand::MO_Metadata: + Op.getMetadata()->printAsOperand(OS, MST); + break; + case MachineOperand::MO_MCSymbol: + OS << ""; + break; + case MachineOperand::MO_CFIIndex: { + const auto &MMI = Op.getParent()->getParent()->getParent()->getMMI(); + print(MMI.getFrameInstructions()[Op.getCFIIndex()], TRI); + break; + } + } +} + +void MIPrinter::print(const MachineMemOperand &Op) { + OS << '('; + // TODO: Print operand's target specific flags. + if (Op.isVolatile()) + OS << "volatile "; + if (Op.isNonTemporal()) + OS << "non-temporal "; + if (Op.isInvariant()) + OS << "invariant "; + if (Op.isLoad()) + OS << "load "; + else { + assert(Op.isStore() && "Non load machine operand must be a store"); + OS << "store "; + } + OS << Op.getSize() << (Op.isLoad() ? " from " : " into "); + if (const Value *Val = Op.getValue()) { + printIRValueReference(*Val); + } else { + const PseudoSourceValue *PVal = Op.getPseudoValue(); + assert(PVal && "Expected a pseudo source value"); + switch (PVal->kind()) { + case PseudoSourceValue::Stack: + OS << "stack"; + break; + case PseudoSourceValue::GOT: + OS << "got"; + break; + case PseudoSourceValue::JumpTable: + OS << "jump-table"; + break; + case PseudoSourceValue::ConstantPool: + OS << "constant-pool"; + break; + case PseudoSourceValue::FixedStack: + printStackObjectReference( + cast(PVal)->getFrameIndex()); + break; + case PseudoSourceValue::GlobalValueCallEntry: + OS << "call-entry "; + cast(PVal)->getValue()->printAsOperand( + OS, /*PrintType=*/false, MST); + break; + case PseudoSourceValue::ExternalSymbolCallEntry: + OS << "call-entry $"; + printLLVMNameWithoutPrefix( + OS, cast(PVal)->getSymbol()); + break; + } + } + printOffset(Op.getOffset()); + if (Op.getBaseAlignment() != Op.getSize()) + OS << ", align " << Op.getBaseAlignment(); + auto AAInfo = Op.getAAInfo(); + if (AAInfo.TBAA) { + OS << ", !tbaa "; + AAInfo.TBAA->printAsOperand(OS, MST); + } + if (AAInfo.Scope) { + OS << ", !alias.scope "; + AAInfo.Scope->printAsOperand(OS, MST); + } + if (AAInfo.NoAlias) { + OS << ", !noalias "; + AAInfo.NoAlias->printAsOperand(OS, MST); + } + if (Op.getRanges()) { + OS << ", !range "; + Op.getRanges()->printAsOperand(OS, MST); + } + OS << ')'; +} + +static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS, + const TargetRegisterInfo *TRI) { + int Reg = TRI->getLLVMRegNum(DwarfReg, true); + if (Reg == -1) { + OS << ""; + return; + } + printReg(Reg, OS, TRI); +} + +void MIPrinter::print(const MCCFIInstruction &CFI, + const TargetRegisterInfo *TRI) { + switch (CFI.getOperation()) { + case MCCFIInstruction::OpSameValue: + OS << ".cfi_same_value "; + if (CFI.getLabel()) + OS << " "; + printCFIRegister(CFI.getRegister(), OS, TRI); + break; + case MCCFIInstruction::OpOffset: + OS << ".cfi_offset "; + if (CFI.getLabel()) + OS << " "; + printCFIRegister(CFI.getRegister(), OS, TRI); + OS << ", " << CFI.getOffset(); + break; + case MCCFIInstruction::OpDefCfaRegister: + OS << ".cfi_def_cfa_register "; + if (CFI.getLabel()) + OS << " "; + printCFIRegister(CFI.getRegister(), OS, TRI); + break; + case MCCFIInstruction::OpDefCfaOffset: + OS << ".cfi_def_cfa_offset "; + if (CFI.getLabel()) + OS << " "; + OS << CFI.getOffset(); + break; + case MCCFIInstruction::OpDefCfa: + OS << ".cfi_def_cfa "; + if (CFI.getLabel()) + OS << " "; + printCFIRegister(CFI.getRegister(), OS, TRI); + OS << ", " << CFI.getOffset(); + break; default: - // TODO: Print the other machine operands. - llvm_unreachable("Can't print this machine operand at the moment"); + // TODO: Print the other CFI Operations. + OS << ""; + break; } } diff --git a/lib/CodeGen/MIRPrintingPass.cpp b/lib/CodeGen/MIRPrintingPass.cpp index 13d61e65d7e0..8e7566a4e46b 100644 --- a/lib/CodeGen/MIRPrintingPass.cpp +++ b/lib/CodeGen/MIRPrintingPass.cpp @@ -40,7 +40,7 @@ struct MIRPrintingPass : public MachineFunctionPass { MachineFunctionPass::getAnalysisUsage(AU); } - virtual bool runOnMachineFunction(MachineFunction &MF) override { + bool runOnMachineFunction(MachineFunction &MF) override { std::string Str; raw_string_ostream StrOS(Str); printMIR(StrOS, MF); @@ -48,7 +48,7 @@ struct MIRPrintingPass : public MachineFunctionPass { return false; } - virtual bool doFinalization(Module &M) override { + bool doFinalization(Module &M) override { printMIR(OS, M); OS << MachineFunctions; return false; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 5d3f7ebaed29..76099f28499b 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -38,22 +39,21 @@ using namespace llvm; #define DEBUG_TYPE "codegen" -MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb) - : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false), - AddressTaken(false), CachedMCSymbol(nullptr) { +MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B) + : BB(B), Number(-1), xParent(&MF) { Insts.Parent = this; } MachineBasicBlock::~MachineBasicBlock() { } -/// getSymbol - Return the MCSymbol for this basic block. -/// +/// Return the MCSymbol for this basic block. MCSymbol *MachineBasicBlock::getSymbol() const { if (!CachedMCSymbol) { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); + assert(getNumber() >= 0 && "cannot get label for unreachable MBB"); CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber())); @@ -68,9 +68,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) { return OS; } -/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the -/// parent pointer of the MBB, the MBB numbering, and any instructions in the -/// MBB to be on the right operand list for registers. +/// When an MBB is added to an MF, we need to update the parent pointer of the +/// MBB, the MBB numbering, and any instructions in the MBB to be on the right +/// operand list for registers. /// /// MBBs start out as #-1. When a MBB is added to a MachineFunction, it /// gets the next available unique MBB number. If it is removed from a @@ -91,10 +91,8 @@ void ilist_traits::removeNodeFromList(MachineBasicBlock *N) { N->Number = -1; } - -/// addNodeToList (MI) - When we add an instruction to a basic block -/// list, we update its parent pointer and add its operands from reg use/def -/// lists if appropriate. +/// When we add an instruction to a basic block list, we update its parent +/// pointer and add its operands from reg use/def lists if appropriate. void ilist_traits::addNodeToList(MachineInstr *N) { assert(!N->getParent() && "machine instruction already in a basic block"); N->setParent(Parent); @@ -105,9 +103,8 @@ void ilist_traits::addNodeToList(MachineInstr *N) { N->AddRegOperandsToUseLists(MF->getRegInfo()); } -/// removeNodeFromList (MI) - When we remove an instruction from a basic block -/// list, we update its parent pointer and remove its operands from reg use/def -/// lists if appropriate. +/// When we remove an instruction from a basic block list, we update its parent +/// pointer and remove its operands from reg use/def lists if appropriate. void ilist_traits::removeNodeFromList(MachineInstr *N) { assert(N->getParent() && "machine instruction not in a basic block"); @@ -118,23 +115,22 @@ void ilist_traits::removeNodeFromList(MachineInstr *N) { N->setParent(nullptr); } -/// transferNodesFromList (MI) - When moving a range of instructions from one -/// MBB list to another, we need to update the parent pointers and the use/def -/// lists. +/// When moving a range of instructions from one MBB list to another, we need to +/// update the parent pointers and the use/def lists. void ilist_traits:: -transferNodesFromList(ilist_traits &fromList, - ilist_iterator first, - ilist_iterator last) { - assert(Parent->getParent() == fromList.Parent->getParent() && +transferNodesFromList(ilist_traits &FromList, + ilist_iterator First, + ilist_iterator Last) { + assert(Parent->getParent() == FromList.Parent->getParent() && "MachineInstr parent mismatch!"); // Splice within the same MBB -> no change. - if (Parent == fromList.Parent) return; + if (Parent == FromList.Parent) return; // If splicing between two blocks within the same function, just update the // parent pointers. - for (; first != last; ++first) - first->setParent(Parent); + for (; First != Last; ++First) + First->setParent(Parent); } void ilist_traits::deleteNode(MachineInstr* MI) { @@ -208,11 +204,18 @@ const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const { if (succ_size() > 2) return nullptr; for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I) - if ((*I)->isLandingPad()) + if ((*I)->isEHPad()) return *I; return nullptr; } +bool MachineBasicBlock::hasEHPadSuccessor() const { + for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I) + if ((*I)->isEHPad()) + return true; + return false; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineBasicBlock::dump() const { print(dbgs()); @@ -271,7 +274,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, LBB->printAsOperand(OS, /*PrintType=*/false, MST); Comma = ", "; } - if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } + if (isEHPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; } if (Alignment) OS << Comma << "Align " << Alignment << " (" << (1u << Alignment) @@ -283,8 +286,11 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (!livein_empty()) { if (Indexes) OS << '\t'; OS << " Live Ins:"; - for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I) - OS << ' ' << PrintReg(*I, TRI); + for (const auto &LI : make_range(livein_begin(), livein_end())) { + OS << ' ' << PrintReg(LI.PhysReg, TRI); + if (LI.LaneMask != ~0u) + OS << ':' << PrintLaneMask(LI.LaneMask); + } OS << '\n'; } // Print the preds of this block according to the CFG. @@ -298,8 +304,8 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) { if (Indexes) { - if (Indexes->hasIndex(I)) - OS << Indexes->getInstructionIndex(I); + if (Indexes->hasIndex(&*I)) + OS << Indexes->getInstructionIndex(&*I); OS << '\t'; } OS << '\t'; @@ -314,35 +320,63 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << " Successors according to CFG:"; for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) { OS << " BB#" << (*SI)->getNumber(); - if (!Weights.empty()) - OS << '(' << *getWeightIterator(SI) << ')'; + if (!Probs.empty()) + OS << '(' << *getProbabilityIterator(SI) << ')'; } OS << '\n'; } } -void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) const { +void MachineBasicBlock::printAsOperand(raw_ostream &OS, + bool /*PrintType*/) const { OS << "BB#" << getNumber(); } -void MachineBasicBlock::removeLiveIn(unsigned Reg) { - std::vector::iterator I = - std::find(LiveIns.begin(), LiveIns.end(), Reg); - if (I != LiveIns.end()) +void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) { + LiveInVector::iterator I = std::find_if( + LiveIns.begin(), LiveIns.end(), + [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; }); + if (I == LiveIns.end()) + return; + + I->LaneMask &= ~LaneMask; + if (I->LaneMask == 0) LiveIns.erase(I); } -bool MachineBasicBlock::isLiveIn(unsigned Reg) const { - livein_iterator I = std::find(livein_begin(), livein_end(), Reg); - return I != livein_end(); +bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const { + livein_iterator I = std::find_if( + LiveIns.begin(), LiveIns.end(), + [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; }); + return I != livein_end() && (I->LaneMask & LaneMask) != 0; +} + +void MachineBasicBlock::sortUniqueLiveIns() { + std::sort(LiveIns.begin(), LiveIns.end(), + [](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) { + return LI0.PhysReg < LI1.PhysReg; + }); + // Liveins are sorted by physreg now we can merge their lanemasks. + LiveInVector::const_iterator I = LiveIns.begin(); + LiveInVector::const_iterator J; + LiveInVector::iterator Out = LiveIns.begin(); + for (; I != LiveIns.end(); ++Out, I = J) { + unsigned PhysReg = I->PhysReg; + LaneBitmask LaneMask = I->LaneMask; + for (J = std::next(I); J != LiveIns.end() && J->PhysReg == PhysReg; ++J) + LaneMask |= J->LaneMask; + Out->PhysReg = PhysReg; + Out->LaneMask = LaneMask; + } + LiveIns.erase(Out, LiveIns.end()); } unsigned -MachineBasicBlock::addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC) { +MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) { assert(getParent() && "MBB must be inserted in function"); assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Expected physreg"); assert(RC && "Register class is required"); - assert((isLandingPad() || this == &getParent()->front()) && + assert((isEHPad() || this == &getParent()->front()) && "Only the entry block and landing pads can have physreg live ins"); bool LiveIn = isLiveIn(PhysReg); @@ -370,12 +404,11 @@ MachineBasicBlock::addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC) { } void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) { - getParent()->splice(NewAfter, this); + getParent()->splice(NewAfter->getIterator(), getIterator()); } void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) { - MachineFunction::iterator BBI = NewBefore; - getParent()->splice(++BBI, this); + getParent()->splice(++NewBefore->getIterator(), getIterator()); } void MachineBasicBlock::updateTerminator() { @@ -385,7 +418,7 @@ void MachineBasicBlock::updateTerminator() { MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; - DebugLoc dl; // FIXME: this is nowhere + DebugLoc DL; // FIXME: this is nowhere bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond); (void) B; assert(!B && "UpdateTerminators requires analyzable predecessors!"); @@ -400,7 +433,7 @@ void MachineBasicBlock::updateTerminator() { // its layout successor, insert a branch. First we have to locate the // only non-landing-pad successor, as that is the fallthrough block. for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { - if ((*SI)->isLandingPad()) + if ((*SI)->isEHPad()) continue; assert(!TBB && "Found more than one non-landing-pad successor!"); TBB = *SI; @@ -414,7 +447,7 @@ void MachineBasicBlock::updateTerminator() { // Finally update the unconditional successor to be reached via a branch // if it would not be reached by fallthrough. if (!isLayoutSuccessor(TBB)) - TII->InsertBranch(*this, TBB, nullptr, Cond, dl); + TII->InsertBranch(*this, TBB, nullptr, Cond, DL); } } else { if (FBB) { @@ -425,10 +458,10 @@ void MachineBasicBlock::updateTerminator() { if (TII->ReverseBranchCondition(Cond)) return; TII->RemoveBranch(*this); - TII->InsertBranch(*this, FBB, nullptr, Cond, dl); + TII->InsertBranch(*this, FBB, nullptr, Cond, DL); } else if (isLayoutSuccessor(FBB)) { TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, nullptr, Cond, dl); + TII->InsertBranch(*this, TBB, nullptr, Cond, DL); } } else { // Walk through the successors and find the successor which is not @@ -436,7 +469,7 @@ void MachineBasicBlock::updateTerminator() { // as the fallthrough successor. MachineBasicBlock *FallthroughBB = nullptr; for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { - if ((*SI)->isLandingPad() || *SI == TBB) + if ((*SI)->isEHPad() || *SI == TBB) continue; assert(!FallthroughBB && "Found more than one fallthrough successor."); FallthroughBB = *SI; @@ -445,14 +478,14 @@ void MachineBasicBlock::updateTerminator() { // We fallthrough to the same basic block as the conditional jump // targets. Remove the conditional jump, leaving unconditional // fallthrough. - // FIXME: This does not seem like a reasonable pattern to support, but it - // has been seen in the wild coming out of degenerate ARM test cases. + // FIXME: This does not seem like a reasonable pattern to support, but + // it has been seen in the wild coming out of degenerate ARM test cases. TII->RemoveBranch(*this); // Finally update the unconditional successor to be reached via a branch // if it would not be reached by fallthrough. if (!isLayoutSuccessor(TBB)) - TII->InsertBranch(*this, TBB, nullptr, Cond, dl); + TII->InsertBranch(*this, TBB, nullptr, Cond, DL); return; } @@ -461,55 +494,69 @@ void MachineBasicBlock::updateTerminator() { if (TII->ReverseBranchCondition(Cond)) { // We can't reverse the condition, add an unconditional branch. Cond.clear(); - TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, dl); + TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL); return; } TII->RemoveBranch(*this); - TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, dl); + TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL); } else if (!isLayoutSuccessor(FallthroughBB)) { TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, FallthroughBB, Cond, dl); + TII->InsertBranch(*this, TBB, FallthroughBB, Cond, DL); } } } } -void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) { +void MachineBasicBlock::validateSuccProbs() const { +#ifndef NDEBUG + int64_t Sum = 0; + for (auto Prob : Probs) + Sum += Prob.getNumerator(); + // Due to precision issue, we assume that the sum of probabilities is one if + // the difference between the sum of their numerators and the denominator is + // no greater than the number of successors. + assert((uint64_t)std::abs(Sum - BranchProbability::getDenominator()) <= + Probs.size() && + "The sum of successors's probabilities exceeds one."); +#endif // NDEBUG +} - // If we see non-zero value for the first time it means we actually use Weight - // list, so we fill all Weights with 0's. - if (weight != 0 && Weights.empty()) - Weights.resize(Successors.size()); +void MachineBasicBlock::addSuccessor(MachineBasicBlock *Succ, + BranchProbability Prob) { + // Probability list is either empty (if successor list isn't empty, this means + // disabled optimization) or has the same size as successor list. + if (!(Probs.empty() && !Successors.empty())) + Probs.push_back(Prob); + Successors.push_back(Succ); + Succ->addPredecessor(this); +} - if (weight != 0 || !Weights.empty()) - Weights.push_back(weight); +void MachineBasicBlock::addSuccessorWithoutProb(MachineBasicBlock *Succ) { + // We need to make sure probability list is either empty or has the same size + // of successor list. When this function is called, we can safely delete all + // probability in the list. + Probs.clear(); + Successors.push_back(Succ); + Succ->addPredecessor(this); +} - Successors.push_back(succ); - succ->addPredecessor(this); - } - -void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) { - succ->removePredecessor(this); - succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); - assert(I != Successors.end() && "Not a current successor!"); - - // If Weight list is empty it means we don't use it (disabled optimization). - if (!Weights.empty()) { - weight_iterator WI = getWeightIterator(I); - Weights.erase(WI); - } - - Successors.erase(I); +void MachineBasicBlock::removeSuccessor(MachineBasicBlock *Succ, + bool NormalizeSuccProbs) { + succ_iterator I = std::find(Successors.begin(), Successors.end(), Succ); + removeSuccessor(I, NormalizeSuccProbs); } MachineBasicBlock::succ_iterator -MachineBasicBlock::removeSuccessor(succ_iterator I) { +MachineBasicBlock::removeSuccessor(succ_iterator I, bool NormalizeSuccProbs) { assert(I != Successors.end() && "Not a current successor!"); - // If Weight list is empty it means we don't use it (disabled optimization). - if (!Weights.empty()) { - weight_iterator WI = getWeightIterator(I); - Weights.erase(WI); + // If probability list is empty it means we don't use it (disabled + // optimization). + if (!Probs.empty()) { + probability_iterator WI = getProbabilityIterator(I); + Probs.erase(WI); + if (NormalizeSuccProbs) + normalizeSuccProbs(); } (*I)->removePredecessor(this); @@ -537,74 +584,77 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, } } assert(OldI != E && "Old is not a successor of this block"); - Old->removePredecessor(this); // If New isn't already a successor, let it take Old's place. if (NewI == E) { + Old->removePredecessor(this); New->addPredecessor(this); *OldI = New; return; } // New is already a successor. - // Update its weight instead of adding a duplicate edge. - if (!Weights.empty()) { - weight_iterator OldWI = getWeightIterator(OldI); - *getWeightIterator(NewI) += *OldWI; - Weights.erase(OldWI); + // Update its probability instead of adding a duplicate edge. + if (!Probs.empty()) { + auto ProbIter = getProbabilityIterator(NewI); + if (!ProbIter->isUnknown()) + *ProbIter += *getProbabilityIterator(OldI); } - Successors.erase(OldI); + removeSuccessor(OldI); } -void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { - Predecessors.push_back(pred); +void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) { + Predecessors.push_back(Pred); } -void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { - pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred); +void MachineBasicBlock::removePredecessor(MachineBasicBlock *Pred) { + pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), Pred); assert(I != Predecessors.end() && "Pred is not a predecessor of this block!"); Predecessors.erase(I); } -void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { - if (this == fromMBB) +void MachineBasicBlock::transferSuccessors(MachineBasicBlock *FromMBB) { + if (this == FromMBB) return; - while (!fromMBB->succ_empty()) { - MachineBasicBlock *Succ = *fromMBB->succ_begin(); - uint32_t Weight = 0; + while (!FromMBB->succ_empty()) { + MachineBasicBlock *Succ = *FromMBB->succ_begin(); - // If Weight list is empty it means we don't use it (disabled optimization). - if (!fromMBB->Weights.empty()) - Weight = *fromMBB->Weights.begin(); + // If probability list is empty it means we don't use it (disabled optimization). + if (!FromMBB->Probs.empty()) { + auto Prob = *FromMBB->Probs.begin(); + addSuccessor(Succ, Prob); + } else + addSuccessorWithoutProb(Succ); - addSuccessor(Succ, Weight); - fromMBB->removeSuccessor(Succ); + FromMBB->removeSuccessor(Succ); } } void -MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { - if (this == fromMBB) +MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) { + if (this == FromMBB) return; - while (!fromMBB->succ_empty()) { - MachineBasicBlock *Succ = *fromMBB->succ_begin(); - uint32_t Weight = 0; - if (!fromMBB->Weights.empty()) - Weight = *fromMBB->Weights.begin(); - addSuccessor(Succ, Weight); - fromMBB->removeSuccessor(Succ); + while (!FromMBB->succ_empty()) { + MachineBasicBlock *Succ = *FromMBB->succ_begin(); + if (!FromMBB->Probs.empty()) { + auto Prob = *FromMBB->Probs.begin(); + addSuccessor(Succ, Prob); + } else + addSuccessorWithoutProb(Succ); + FromMBB->removeSuccessor(Succ); // Fix up any PHI nodes in the successor. for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(), ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI) for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) { MachineOperand &MO = MI->getOperand(i); - if (MO.getMBB() == fromMBB) + if (MO.getMBB() == FromMBB) MO.setMBB(this); } } + normalizeSuccProbs(); } bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const { @@ -621,14 +671,14 @@ bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { } bool MachineBasicBlock::canFallThrough() { - MachineFunction::iterator Fallthrough = this; + MachineFunction::iterator Fallthrough = getIterator(); ++Fallthrough; // If FallthroughBlock is off the end of the function, it can't fall through. if (Fallthrough == getParent()->end()) return false; // If FallthroughBlock isn't a successor, no fallthrough is possible. - if (!isSuccessor(Fallthrough)) + if (!isSuccessor(&*Fallthrough)) return false; // Analyze the branches, if any, at the end of the block. @@ -666,11 +716,11 @@ MachineBasicBlock * MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Splitting the critical edge to a landing pad block is non-trivial. Don't do // it in this generic function. - if (Succ->isLandingPad()) + if (Succ->isEHPad()) return nullptr; MachineFunction *MF = getParent(); - DebugLoc dl; // FIXME: this is nowhere + DebugLoc DL; // FIXME: this is nowhere // Performance might be harmed on HW that implements branching using exec mask // where both sides of the branches are always executed. @@ -719,7 +769,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (LV) for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) { - MachineInstr *MI = I; + MachineInstr *MI = &*I; for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { if (!OI->isReg() || OI->getReg() == 0 || @@ -739,7 +789,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (LIS) { for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) { - MachineInstr *MI = I; + MachineInstr *MI = &*I; for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { @@ -761,7 +811,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (Indexes) { for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) - Terminators.push_back(I); + Terminators.push_back(&*I); } updateTerminator(); @@ -770,7 +820,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { SmallVector NewTerminators; for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); I != E; ++I) - NewTerminators.push_back(I); + NewTerminators.push_back(&*I); for (SmallVectorImpl::iterator I = Terminators.begin(), E = Terminators.end(); I != E; ++I) { @@ -784,17 +834,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { NMBB->addSuccessor(Succ); if (!NMBB->isLayoutSuccessor(Succ)) { Cond.clear(); - MF->getSubtarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond, - dl); + TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL); if (Indexes) { for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end(); I != E; ++I) { // Some instructions may have been moved to NMBB by updateTerminator(), // so we first remove any instruction that already has an index. - if (Indexes->hasIndex(I)) - Indexes->removeMachineInstrFromMaps(I); - Indexes->insertMachineInstrInMaps(I); + if (Indexes->hasIndex(&*I)) + Indexes->removeMachineInstrFromMaps(&*I); + Indexes->insertMachineInstrInMaps(&*I); } } } @@ -808,9 +857,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { i->getOperand(ni+1).setMBB(NMBB); // Inherit live-ins from the successor - for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(), - E = Succ->livein_end(); I != E; ++I) - NMBB->addLiveIn(*I); + for (const auto &LI : Succ->liveins()) + NMBB->addLiveIn(LI); // Update LiveVariables. const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); @@ -822,7 +870,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) - LV->getVarInfo(Reg).Kills.push_back(I); + LV->getVarInfo(Reg).Kills.push_back(&*I); DEBUG(dbgs() << "Restored terminator kill: " << *I); break; } @@ -834,10 +882,10 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (LIS) { // After splitting the edge and updating SlotIndexes, live intervals may be // in one of two situations, depending on whether this block was the last in - // the function. If the original block was the last in the function, all live - // intervals will end prior to the beginning of the new split block. If the - // original block was not at the end of the function, all live intervals will - // extend to the end of the new split block. + // the function. If the original block was the last in the function, all + // live intervals will end prior to the beginning of the new split block. If + // the original block was not at the end of the function, all live intervals + // will extend to the end of the new split block. bool isLastMBB = std::next(MachineFunction::iterator(NMBB)) == getParent()->end(); @@ -861,7 +909,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { LiveInterval &LI = LIS->getInterval(Reg); VNInfo *VNI = LI.getVNInfoAt(PrevIndex); - assert(VNI && "PHI sources should be live out of their predecessors."); + assert(VNI && + "PHI sources should be live out of their predecessors."); LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } } @@ -941,7 +990,7 @@ static void unbundleSingleMI(MachineInstr *MI) { MachineBasicBlock::instr_iterator MachineBasicBlock::erase(MachineBasicBlock::instr_iterator I) { - unbundleSingleMI(I); + unbundleSingleMI(&*I); return Insts.erase(I); } @@ -964,25 +1013,22 @@ MachineBasicBlock::insert(instr_iterator I, MachineInstr *MI) { return Insts.insert(I, MI); } -/// removeFromParent - This method unlinks 'this' from the containing function, -/// and returns it, but does not delete it. +/// This method unlinks 'this' from the containing function, and returns it, but +/// does not delete it. MachineBasicBlock *MachineBasicBlock::removeFromParent() { assert(getParent() && "Not embedded in a function!"); getParent()->remove(this); return this; } - -/// eraseFromParent - This method unlinks 'this' from the containing function, -/// and deletes it. +/// This method unlinks 'this' from the containing function, and deletes it. void MachineBasicBlock::eraseFromParent() { assert(getParent() && "Not embedded in a function!"); getParent()->erase(this); } - -/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to -/// 'Old', change the code and CFG so that it branches to 'New' instead. +/// Given a machine basic block that branched to 'Old', change the code and CFG +/// so that it branches to 'New' instead. void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New) { assert(Old != New && "Cannot replace self with self!"); @@ -1004,46 +1050,44 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, replaceSuccessor(Old, New); } -/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the -/// CFG to be inserted. If we have proven that MBB can only branch to DestA and -/// DestB, remove any other MBB successors from the CFG. DestA and DestB can be -/// null. +/// Various pieces of code can cause excess edges in the CFG to be inserted. If +/// we have proven that MBB can only branch to DestA and DestB, remove any other +/// MBB successors from the CFG. DestA and DestB can be null. /// /// Besides DestA and DestB, retain other edges leading to LandingPads /// (currently there can be only one; we don't check or require that here). /// Note it is possible that DestA and/or DestB are LandingPads. bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, MachineBasicBlock *DestB, - bool isCond) { + bool IsCond) { // The values of DestA and DestB frequently come from a call to the // 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial // values from there. // // 1. If both DestA and DestB are null, then the block ends with no branches // (it falls through to its successor). - // 2. If DestA is set, DestB is null, and isCond is false, then the block ends + // 2. If DestA is set, DestB is null, and IsCond is false, then the block ends // with only an unconditional branch. - // 3. If DestA is set, DestB is null, and isCond is true, then the block ends + // 3. If DestA is set, DestB is null, and IsCond is true, then the block ends // with a conditional branch that falls through to a successor (DestB). - // 4. If DestA and DestB is set and isCond is true, then the block ends with a + // 4. If DestA and DestB is set and IsCond is true, then the block ends with a // conditional branch followed by an unconditional branch. DestA is the // 'true' destination and DestB is the 'false' destination. bool Changed = false; - MachineFunction::iterator FallThru = - std::next(MachineFunction::iterator(this)); + MachineFunction::iterator FallThru = std::next(getIterator()); if (!DestA && !DestB) { // Block falls through to successor. - DestA = FallThru; - DestB = FallThru; + DestA = &*FallThru; + DestB = &*FallThru; } else if (DestA && !DestB) { - if (isCond) + if (IsCond) // Block ends in conditional jump that falls through to successor. - DestB = FallThru; + DestB = &*FallThru; } else { - assert(DestA && DestB && isCond && + assert(DestA && DestB && IsCond && "CFG in a bad state. Cannot correct CFG edges"); } @@ -1054,7 +1098,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, while (SI != succ_end()) { const MachineBasicBlock *MBB = *SI; if (!SeenMBBs.insert(MBB).second || - (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) { + (MBB != DestA && MBB != DestB && !MBB->isEHPad())) { // This is a superfluous edge, remove it. SI = removeSuccessor(SI); Changed = true; @@ -1063,11 +1107,13 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, } } + if (Changed) + normalizeSuccProbs(); return Changed; } -/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping -/// any DBG_VALUE instructions. Return UnknownLoc if there is none. +/// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE +/// instructions. Return UnknownLoc if there is none. DebugLoc MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { DebugLoc DL; @@ -1083,40 +1129,55 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { return DL; } -/// getSuccWeight - Return weight of the edge from this block to MBB. -/// -uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const { - if (Weights.empty()) - return 0; +/// Return probability of the edge from this block to MBB. +BranchProbability +MachineBasicBlock::getSuccProbability(const_succ_iterator Succ) const { + if (Probs.empty()) + return BranchProbability(1, succ_size()); - return *getWeightIterator(Succ); + const auto &Prob = *getProbabilityIterator(Succ); + if (Prob.isUnknown()) { + // For unknown probabilities, collect the sum of all known ones, and evenly + // ditribute the complemental of the sum to each unknown probability. + unsigned KnownProbNum = 0; + auto Sum = BranchProbability::getZero(); + for (auto &P : Probs) { + if (!P.isUnknown()) { + Sum += P; + KnownProbNum++; + } + } + return Sum.getCompl() / (Probs.size() - KnownProbNum); + } else + return Prob; } -/// Set successor weight of a given iterator. -void MachineBasicBlock::setSuccWeight(succ_iterator I, uint32_t weight) { - if (Weights.empty()) +/// Set successor probability of a given iterator. +void MachineBasicBlock::setSuccProbability(succ_iterator I, + BranchProbability Prob) { + assert(!Prob.isUnknown()); + if (Probs.empty()) return; - *getWeightIterator(I) = weight; + *getProbabilityIterator(I) = Prob; } -/// getWeightIterator - Return wight iterator corresonding to the I successor -/// iterator -MachineBasicBlock::weight_iterator MachineBasicBlock:: -getWeightIterator(MachineBasicBlock::succ_iterator I) { - assert(Weights.size() == Successors.size() && "Async weight list!"); - size_t index = std::distance(Successors.begin(), I); - assert(index < Weights.size() && "Not a current successor!"); - return Weights.begin() + index; -} - -/// getWeightIterator - Return wight iterator corresonding to the I successor -/// iterator -MachineBasicBlock::const_weight_iterator MachineBasicBlock:: -getWeightIterator(MachineBasicBlock::const_succ_iterator I) const { - assert(Weights.size() == Successors.size() && "Async weight list!"); +/// Return probability iterator corresonding to the I successor iterator +MachineBasicBlock::const_probability_iterator +MachineBasicBlock::getProbabilityIterator( + MachineBasicBlock::const_succ_iterator I) const { + assert(Probs.size() == Successors.size() && "Async probability list!"); const size_t index = std::distance(Successors.begin(), I); - assert(index < Weights.size() && "Not a current successor!"); - return Weights.begin() + index; + assert(index < Probs.size() && "Not a current successor!"); + return Probs.begin() + index; +} + +/// Return probability iterator corresonding to the I successor iterator. +MachineBasicBlock::probability_iterator +MachineBasicBlock::getProbabilityIterator(MachineBasicBlock::succ_iterator I) { + assert(Probs.size() == Successors.size() && "Async probability list!"); + const size_t index = std::distance(Successors.begin(), I); + assert(index < Probs.size() && "Not a current successor!"); + return Probs.begin() + index; } /// Return whether (physical) register "Reg" has been ined and not ed @@ -1138,33 +1199,33 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, do { --I; - MachineOperandIteratorBase::PhysRegInfo Analysis = + MachineOperandIteratorBase::PhysRegInfo Info = ConstMIOperands(I).analyzePhysReg(Reg, TRI); - if (Analysis.Defines) - // Outputs happen after inputs so they take precedence if both are - // present. - return Analysis.DefinesDead ? LQR_Dead : LQR_Live; + // Defs happen after uses so they take precedence if both are present. - if (Analysis.Kills || Analysis.Clobbers) - // Register killed, so isn't live. + // Register is dead after a dead def of the full register. + if (Info.DeadDef) return LQR_Dead; - - else if (Analysis.ReadsOverlap) - // Defined or read without a previous kill - live. - return Analysis.Reads ? LQR_Live : LQR_OverlappingLive; - + // Register is (at least partially) live after a def. + if (Info.Defined) + return LQR_Live; + // Register is dead after a full kill or clobber and no def. + if (Info.Killed || Info.Clobbered) + return LQR_Dead; + // Register must be live if we read it. + if (Info.Read) + return LQR_Live; } while (I != begin() && --N > 0); } // Did we get to the start of the block? if (I == begin()) { // If so, the register's state is definitely defined by the live-in state. - for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); - RAI.isValid(); ++RAI) { + for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); RAI.isValid(); + ++RAI) if (isLiveIn(*RAI)) - return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive; - } + return LQR_Live; return LQR_Dead; } @@ -1176,16 +1237,14 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, // If this is the last insn in the block, don't search forwards. if (I != end()) { for (++I; I != end() && N > 0; ++I, --N) { - MachineOperandIteratorBase::PhysRegInfo Analysis = + MachineOperandIteratorBase::PhysRegInfo Info = ConstMIOperands(I).analyzePhysReg(Reg, TRI); - if (Analysis.ReadsOverlap) - // Used, therefore must have been live. - return (Analysis.Reads) ? - LQR_Live : LQR_OverlappingLive; - - else if (Analysis.Clobbers || Analysis.Defines) - // Defined (but not read) therefore cannot have been live. + // Register is live when we read it here. + if (Info.Read) + return LQR_Live; + // Register is dead if we can fully overwrite or clobber it here. + if (Info.FullyDefined || Info.Clobbered) return LQR_Dead; } } @@ -1193,3 +1252,17 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, // At this point we have no idea of the liveness of the register. return LQR_Unknown; } + +const uint32_t * +MachineBasicBlock::getBeginClobberMask(const TargetRegisterInfo *TRI) const { + // EH funclet entry does not preserve any registers. + return isEHFuncletEntry() ? TRI->getNoPreservedMask() : nullptr; +} + +const uint32_t * +MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const { + // If we see a return block with successors, this must be a funclet return, + // which does not preserve any registers. If there are no successors, we don't + // care what kind of return it is, putting a mask after it is a no-op. + return isReturnBlock() && !succ_empty() ? TRI->getNoPreservedMask() : nullptr; +} diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 9151d99089d6..9119e31bdb3c 100644 --- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -57,7 +57,7 @@ struct GraphTraits { static inline const NodeType *getEntryNode(const MachineBlockFrequencyInfo *G) { - return G->getFunction()->begin(); + return &G->getFunction()->front(); } static ChildIteratorType child_begin(const NodeType *N) { @@ -143,7 +143,7 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { MachineLoopInfo &MLI = getAnalysis(); if (!MBFI) MBFI.reset(new ImplType); - MBFI->doFunction(&F, &MBPI, &MLI); + MBFI->calculate(F, MBPI, MLI); #ifndef NDEBUG if (ViewMachineBlockFreqPropagationDAG != GVDT_None) { view(); diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 2969bad4ff98..f5e305645011 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -51,7 +51,7 @@ using namespace llvm; #define DEBUG_TYPE "block-placement" STATISTIC(NumCondBranches, "Number of conditional branches"); -STATISTIC(NumUncondBranches, "Number of uncondittional branches"); +STATISTIC(NumUncondBranches, "Number of unconditional branches"); STATISTIC(CondBranchTakenFreq, "Potential frequency of taking conditional branches"); STATISTIC(UncondBranchTakenFreq, @@ -62,6 +62,11 @@ static cl::opt AlignAllBlock("align-all-blocks", "blocks in the function."), cl::init(0), cl::Hidden); +static cl::opt + AlignAllLoops("align-all-loops", + cl::desc("Force the alignment of all loops in the function."), + cl::init(0), cl::Hidden); + // FIXME: Find a good default for this flag and remove the flag. static cl::opt ExitBlockBias( "block-placement-exit-block-bias", @@ -81,6 +86,29 @@ static cl::opt OutlineOptionalThreshold( "instruction count below this threshold"), cl::init(4), cl::Hidden); +static cl::opt LoopToColdBlockRatio( + "loop-to-cold-block-ratio", + cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " + "(frequency of block) is greater than this ratio"), + cl::init(5), cl::Hidden); + +static cl::opt + PreciseRotationCost("precise-rotation-cost", + cl::desc("Model the cost of loop rotation more " + "precisely by using profile data."), + cl::init(false), cl::Hidden); + +static cl::opt MisfetchCost( + "misfetch-cost", + cl::desc("Cost that models the probablistic risk of an instruction " + "misfetch due to a jump comparing to falling through, whose cost " + "is zero."), + cl::init(1), cl::Hidden); + +static cl::opt JumpInstCost("jump-inst-cost", + cl::desc("Cost of jump instructions."), + cl::init(1), cl::Hidden); + namespace { class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. @@ -246,9 +274,12 @@ class MachineBlockPlacement : public MachineFunctionPass { const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L, const BlockFilterSet &LoopBlockSet); + BlockFilterSet collectLoopBlockSet(MachineFunction &F, MachineLoop &L); void buildLoopChains(MachineFunction &F, MachineLoop &L); void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB, const BlockFilterSet &LoopBlockSet); + void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L, + const BlockFilterSet &LoopBlockSet); void buildCFGChains(MachineFunction &F); public: @@ -354,31 +385,56 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, const BranchProbability HotProb(4, 5); // 80% MachineBasicBlock *BestSucc = nullptr; - // FIXME: Due to the performance of the probability and weight routines in - // the MBPI analysis, we manually compute probabilities using the edge - // weights. This is suboptimal as it means that the somewhat subtle - // definition of edge weight semantics is encoded here as well. We should - // improve the MBPI interface to efficiently support query patterns such as - // this. - uint32_t BestWeight = 0; - uint32_t WeightScale = 0; - uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale); - DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); - for (MachineBasicBlock *Succ : BB->successors()) { - if (BlockFilter && !BlockFilter->count(Succ)) - continue; - BlockChain &SuccChain = *BlockToChain[Succ]; - if (&SuccChain == &Chain) { - DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Already merged!\n"); - continue; - } - if (Succ != *SuccChain.begin()) { - DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n"); - continue; - } + auto BestProb = BranchProbability::getZero(); - uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ); - BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + // Adjust edge probabilities by excluding edges pointing to blocks that is + // either not in BlockFilter or is already in the current chain. Consider the + // following CFG: + // + // --->A + // | / \ + // | B C + // | \ / \ + // ----D E + // + // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after + // A->C is chosen as a fall-through, D won't be selected as a successor of C + // due to CFG constraint (the probability of C->D is not greater than + // HotProb). If we exclude E that is not in BlockFilter when calculating the + // probability of C->D, D will be selected and we will get A C D B as the + // layout of this loop. + auto AdjustedSumProb = BranchProbability::getOne(); + SmallVector Successors; + for (MachineBasicBlock *Succ : BB->successors()) { + bool SkipSucc = false; + if (BlockFilter && !BlockFilter->count(Succ)) { + SkipSucc = true; + } else { + BlockChain *SuccChain = BlockToChain[Succ]; + if (SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(Succ) + << " -> Already merged!\n"); + SkipSucc = true; + } else if (Succ != *SuccChain->begin()) { + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n"); + continue; + } + } + if (SkipSucc) + AdjustedSumProb -= MBPI->getEdgeProbability(BB, Succ); + else + Successors.push_back(Succ); + } + + DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); + for (MachineBasicBlock *Succ : Successors) { + BranchProbability SuccProb; + uint32_t SuccProbN = MBPI->getEdgeProbability(BB, Succ).getNumerator(); + uint32_t SuccProbD = AdjustedSumProb.getNumerator(); + if (SuccProbN >= SuccProbD) + SuccProb = BranchProbability::getOne(); + else + SuccProb = BranchProbability(SuccProbN, SuccProbD); // If we outline optional branches, look whether Succ is unavoidable, i.e. // dominates all terminators of the MachineFunction. If it does, other @@ -406,6 +462,7 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, // Only consider successors which are either "hot", or wouldn't violate // any CFG constraints. + BlockChain &SuccChain = *BlockToChain[Succ]; if (SuccChain.LoopPredecessors != 0) { if (SuccProb < HotProb) { DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb @@ -415,8 +472,9 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, // Make sure that a hot successor doesn't have a globally more // important predecessor. + auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ); BlockFrequency CandidateEdgeFreq = - MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl(); + MBFI->getBlockFreq(BB) * RealSuccProb * HotProb.getCompl(); bool BadCFGConflict = false; for (MachineBasicBlock *Pred : Succ->predecessors()) { if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) || @@ -440,10 +498,10 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, << " (prob)" << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "") << "\n"); - if (BestSucc && BestWeight >= SuccWeight) + if (BestSucc && BestProb >= SuccProb) continue; BestSucc = Succ; - BestWeight = SuccWeight; + BestProb = SuccProb; } return BestSucc; } @@ -505,14 +563,14 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( const BlockFilterSet *BlockFilter) { for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E; ++I) { - if (BlockFilter && !BlockFilter->count(I)) + if (BlockFilter && !BlockFilter->count(&*I)) continue; - if (BlockToChain[I] != &PlacedChain) { + if (BlockToChain[&*I] != &PlacedChain) { PrevUnplacedBlockIt = I; // Now select the head of the chain to which the unplaced block belongs // as the block to place. This will force the entire chain to be placed, // and satisfies the requirements of merging chains. - return *BlockToChain[I]->begin(); + return *BlockToChain[&*I]->begin(); } } return nullptr; @@ -672,13 +730,8 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L, MachineBasicBlock *OldExitingBB = ExitingBB; BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq; bool HasLoopingSucc = false; - // FIXME: Due to the performance of the probability and weight routines in - // the MBPI analysis, we use the internal weights and manually compute the - // probabilities to avoid quadratic behavior. - uint32_t WeightScale = 0; - uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale); for (MachineBasicBlock *Succ : MBB->successors()) { - if (Succ->isLandingPad()) + if (Succ->isEHPad()) continue; if (Succ == MBB) continue; @@ -690,10 +743,10 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L, continue; } - uint32_t SuccWeight = MBPI->getEdgeWeight(MBB, Succ); + auto SuccProb = MBPI->getEdgeProbability(MBB, Succ); if (LoopBlockSet.count(Succ)) { DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> " - << getBlockName(Succ) << " (" << SuccWeight << ")\n"); + << getBlockName(Succ) << " (" << SuccProb << ")\n"); HasLoopingSucc = true; continue; } @@ -705,7 +758,6 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L, BlocksExitingToOuterLoop.insert(MBB); } - BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb; DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] ("; @@ -791,6 +843,188 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); } +/// \brief Attempt to rotate a loop based on profile data to reduce branch cost. +/// +/// With profile data, we can determine the cost in terms of missed fall through +/// opportunities when rotating a loop chain and select the best rotation. +/// Basically, there are three kinds of cost to consider for each rotation: +/// 1. The possibly missed fall through edge (if it exists) from BB out of +/// the loop to the loop header. +/// 2. The possibly missed fall through edges (if they exist) from the loop +/// exits to BB out of the loop. +/// 3. The missed fall through edge (if it exists) from the last BB to the +/// first BB in the loop chain. +/// Therefore, the cost for a given rotation is the sum of costs listed above. +/// We select the best rotation with the smallest cost. +void MachineBlockPlacement::rotateLoopWithProfile( + BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet) { + auto HeaderBB = L.getHeader(); + auto HeaderIter = std::find(LoopChain.begin(), LoopChain.end(), HeaderBB); + auto RotationPos = LoopChain.end(); + + BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency(); + + // A utility lambda that scales up a block frequency by dividing it by a + // branch probability which is the reciprocal of the scale. + auto ScaleBlockFrequency = [](BlockFrequency Freq, + unsigned Scale) -> BlockFrequency { + if (Scale == 0) + return 0; + // Use operator / between BlockFrequency and BranchProbability to implement + // saturating multiplication. + return Freq / BranchProbability(1, Scale); + }; + + // Compute the cost of the missed fall-through edge to the loop header if the + // chain head is not the loop header. As we only consider natural loops with + // single header, this computation can be done only once. + BlockFrequency HeaderFallThroughCost(0); + for (auto *Pred : HeaderBB->predecessors()) { + BlockChain *PredChain = BlockToChain[Pred]; + if (!LoopBlockSet.count(Pred) && + (!PredChain || Pred == *std::prev(PredChain->end()))) { + auto EdgeFreq = + MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB); + auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost); + // If the predecessor has only an unconditional jump to the header, we + // need to consider the cost of this jump. + if (Pred->succ_size() == 1) + FallThruCost += ScaleBlockFrequency(EdgeFreq, JumpInstCost); + HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost); + } + } + + // Here we collect all exit blocks in the loop, and for each exit we find out + // its hottest exit edge. For each loop rotation, we define the loop exit cost + // as the sum of frequencies of exit edges we collect here, excluding the exit + // edge from the tail of the loop chain. + SmallVector, 4> ExitsWithFreq; + for (auto BB : LoopChain) { + auto LargestExitEdgeProb = BranchProbability::getZero(); + for (auto *Succ : BB->successors()) { + BlockChain *SuccChain = BlockToChain[Succ]; + if (!LoopBlockSet.count(Succ) && + (!SuccChain || Succ == *SuccChain->begin())) { + auto SuccProb = MBPI->getEdgeProbability(BB, Succ); + LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb); + } + } + if (LargestExitEdgeProb > BranchProbability::getZero()) { + auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb; + ExitsWithFreq.emplace_back(BB, ExitFreq); + } + } + + // In this loop we iterate every block in the loop chain and calculate the + // cost assuming the block is the head of the loop chain. When the loop ends, + // we should have found the best candidate as the loop chain's head. + for (auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()), + EndIter = LoopChain.end(); + Iter != EndIter; Iter++, TailIter++) { + // TailIter is used to track the tail of the loop chain if the block we are + // checking (pointed by Iter) is the head of the chain. + if (TailIter == LoopChain.end()) + TailIter = LoopChain.begin(); + + auto TailBB = *TailIter; + + // Calculate the cost by putting this BB to the top. + BlockFrequency Cost = 0; + + // If the current BB is the loop header, we need to take into account the + // cost of the missed fall through edge from outside of the loop to the + // header. + if (Iter != HeaderIter) + Cost += HeaderFallThroughCost; + + // Collect the loop exit cost by summing up frequencies of all exit edges + // except the one from the chain tail. + for (auto &ExitWithFreq : ExitsWithFreq) + if (TailBB != ExitWithFreq.first) + Cost += ExitWithFreq.second; + + // The cost of breaking the once fall-through edge from the tail to the top + // of the loop chain. Here we need to consider three cases: + // 1. If the tail node has only one successor, then we will get an + // additional jmp instruction. So the cost here is (MisfetchCost + + // JumpInstCost) * tail node frequency. + // 2. If the tail node has two successors, then we may still get an + // additional jmp instruction if the layout successor after the loop + // chain is not its CFG successor. Note that the more frequently executed + // jmp instruction will be put ahead of the other one. Assume the + // frequency of those two branches are x and y, where x is the frequency + // of the edge to the chain head, then the cost will be + // (x * MisfetechCost + min(x, y) * JumpInstCost) * tail node frequency. + // 3. If the tail node has more than two successors (this rarely happens), + // we won't consider any additional cost. + if (TailBB->isSuccessor(*Iter)) { + auto TailBBFreq = MBFI->getBlockFreq(TailBB); + if (TailBB->succ_size() == 1) + Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(), + MisfetchCost + JumpInstCost); + else if (TailBB->succ_size() == 2) { + auto TailToHeadProb = MBPI->getEdgeProbability(TailBB, *Iter); + auto TailToHeadFreq = TailBBFreq * TailToHeadProb; + auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2) + ? TailBBFreq * TailToHeadProb.getCompl() + : TailToHeadFreq; + Cost += ScaleBlockFrequency(TailToHeadFreq, MisfetchCost) + + ScaleBlockFrequency(ColderEdgeFreq, JumpInstCost); + } + } + + DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockNum(*Iter) + << " to the top: " << Cost.getFrequency() << "\n"); + + if (Cost < SmallestRotationCost) { + SmallestRotationCost = Cost; + RotationPos = Iter; + } + } + + if (RotationPos != LoopChain.end()) { + DEBUG(dbgs() << "Rotate loop by making " << getBlockNum(*RotationPos) + << " to the top\n"); + std::rotate(LoopChain.begin(), RotationPos, LoopChain.end()); + } +} + +/// \brief Collect blocks in the given loop that are to be placed. +/// +/// When profile data is available, exclude cold blocks from the returned set; +/// otherwise, collect all blocks in the loop. +MachineBlockPlacement::BlockFilterSet +MachineBlockPlacement::collectLoopBlockSet(MachineFunction &F, MachineLoop &L) { + BlockFilterSet LoopBlockSet; + + // Filter cold blocks off from LoopBlockSet when profile data is available. + // Collect the sum of frequencies of incoming edges to the loop header from + // outside. If we treat the loop as a super block, this is the frequency of + // the loop. Then for each block in the loop, we calculate the ratio between + // its frequency and the frequency of the loop block. When it is too small, + // don't add it to the loop chain. If there are outer loops, then this block + // will be merged into the first outer loop chain for which this block is not + // cold anymore. This needs precise profile data and we only do this when + // profile data is available. + if (F.getFunction()->getEntryCount()) { + BlockFrequency LoopFreq(0); + for (auto LoopPred : L.getHeader()->predecessors()) + if (!L.contains(LoopPred)) + LoopFreq += MBFI->getBlockFreq(LoopPred) * + MBPI->getEdgeProbability(LoopPred, L.getHeader()); + + for (MachineBasicBlock *LoopBB : L.getBlocks()) { + auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency(); + if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio) + continue; + LoopBlockSet.insert(LoopBB); + } + } else + LoopBlockSet.insert(L.block_begin(), L.block_end()); + + return LoopBlockSet; +} + /// \brief Forms basic block chains from the natural loop structures. /// /// These chains are designed to preserve the existing *structure* of the code @@ -805,19 +1039,27 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, buildLoopChains(F, *InnerLoop); SmallVector BlockWorkList; - BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); + BlockFilterSet LoopBlockSet = collectLoopBlockSet(F, L); + + // Check if we have profile data for this function. If yes, we will rotate + // this loop by modeling costs more precisely which requires the profile data + // for better layout. + bool RotateLoopWithProfile = + PreciseRotationCost && F.getFunction()->getEntryCount(); // First check to see if there is an obviously preferable top block for the // loop. This will default to the header, but may end up as one of the // predecessors to the header if there is one which will result in strictly // fewer branches in the loop body. - MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet); + // When we use profile data to rotate the loop, this is unnecessary. + MachineBasicBlock *LoopTop = + RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet); // If we selected just the header for the loop top, look for a potentially // profitable exit block in the event that rotating the loop can eliminate // branches by placing an exit edge at the bottom. MachineBasicBlock *ExitingBB = nullptr; - if (LoopTop == L.getHeader()) + if (!RotateLoopWithProfile && LoopTop == L.getHeader()) ExitingBB = findBestLoopExit(F, L, LoopBlockSet); BlockChain &LoopChain = *BlockToChain[LoopTop]; @@ -828,7 +1070,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, SmallPtrSet UpdatedPreds; assert(LoopChain.LoopPredecessors == 0); UpdatedPreds.insert(&LoopChain); - for (MachineBasicBlock *LoopBB : L.getBlocks()) { + + for (MachineBasicBlock *LoopBB : LoopBlockSet) { BlockChain &Chain = *BlockToChain[LoopBB]; if (!UpdatedPreds.insert(&Chain).second) continue; @@ -848,7 +1091,11 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, } buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet); - rotateLoop(LoopChain, ExitingBB, LoopBlockSet); + + if (RotateLoopWithProfile) + rotateLoopWithProfile(LoopChain, L, LoopBlockSet); + else + rotateLoop(LoopChain, ExitingBB, LoopBlockSet); DEBUG({ // Crash at the end so we get all of the debugging output first. @@ -889,7 +1136,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // the assumptions of the remaining algorithm. SmallVector Cond; // For AnalyzeBranch. for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { - MachineBasicBlock *BB = FI; + MachineBasicBlock *BB = &*FI; BlockChain *Chain = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); // Also, merge any blocks which we cannot reason about and must preserve @@ -900,8 +1147,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) break; - MachineFunction::iterator NextFI(std::next(FI)); - MachineBasicBlock *NextBB = NextFI; + MachineFunction::iterator NextFI = std::next(FI); + MachineBasicBlock *NextBB = &*NextFI; // Ensure that the layout successor is a viable block, as we know that // fallthrough is a possibility. assert(NextFI != FE && "Can't fallthrough past the last block."); @@ -1004,7 +1251,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Update the terminator of the previous block. if (ChainBB == *FunctionChain.begin()) continue; - MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(ChainBB)); + MachineBasicBlock *PrevBB = &*std::prev(MachineFunction::iterator(ChainBB)); // FIXME: It would be awesome of updateTerminator would just return rather // than assert when the branch cannot be analyzed in order to remove this @@ -1035,14 +1282,16 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { } // If PrevBB has a two-way branch, try to re-order the branches - // such that we branch to the successor with higher weight first. + // such that we branch to the successor with higher probability first. if (TBB && !Cond.empty() && FBB && - MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) && + MBPI->getEdgeProbability(PrevBB, FBB) > + MBPI->getEdgeProbability(PrevBB, TBB) && !TII->ReverseBranchCondition(Cond)) { DEBUG(dbgs() << "Reverse order of the two branches: " << getBlockName(PrevBB) << "\n"); - DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB) - << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n"); + DEBUG(dbgs() << " Edge probability: " + << MBPI->getEdgeProbability(PrevBB, FBB) << " vs " + << MBPI->getEdgeProbability(PrevBB, TBB) << "\n"); DebugLoc dl; // FIXME: this is nowhere TII->RemoveBranch(*PrevBB); TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); @@ -1064,13 +1313,14 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. + // FIXME: Use Function::optForSize(). if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) return; if (FunctionChain.begin() == FunctionChain.end()) return; // Empty chain. const BranchProbability ColdProb(1, 5); // 20% - BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); + BlockFrequency EntryFreq = MBFI->getBlockFreq(&F.front()); BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; for (MachineBasicBlock *ChainBB : FunctionChain) { if (ChainBB == *FunctionChain.begin()) @@ -1084,6 +1334,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (!L) continue; + if (AlignAllLoops) { + ChainBB->setAlignment(AlignAllLoops); + continue; + } + unsigned Align = TLI->getPrefLoopAlignment(L); if (!Align) continue; // Don't care about loop alignment. @@ -1224,4 +1479,3 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { return false; } - diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 6fbc2be70486..cf6d4018cb70 100644 --- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -28,91 +28,48 @@ char MachineBranchProbabilityInfo::ID = 0; void MachineBranchProbabilityInfo::anchor() { } -uint32_t MachineBranchProbabilityInfo:: -getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const { - // First we compute the sum with 64-bits of precision, ensuring that cannot - // overflow by bounding the number of weights considered. Hopefully no one - // actually needs 2^32 successors. - assert(MBB->succ_size() < UINT32_MAX); - uint64_t Sum = 0; - Scale = 1; - for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, I); - Sum += Weight; - } - - // If the computed sum fits in 32-bits, we're done. - if (Sum <= UINT32_MAX) - return Sum; - - // Otherwise, compute the scale necessary to cause the weights to fit, and - // re-sum with that scale applied. - assert((Sum / UINT32_MAX) < UINT32_MAX); - Scale = (Sum / UINT32_MAX) + 1; - Sum = 0; - for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, I); - Sum += Weight / Scale; - } - assert(Sum <= UINT32_MAX); - return Sum; +BranchProbability MachineBranchProbabilityInfo::getEdgeProbability( + const MachineBasicBlock *Src, + MachineBasicBlock::const_succ_iterator Dst) const { + return Src->getSuccProbability(Dst); } -uint32_t MachineBranchProbabilityInfo:: -getEdgeWeight(const MachineBasicBlock *Src, - MachineBasicBlock::const_succ_iterator Dst) const { - uint32_t Weight = Src->getSuccWeight(Dst); - if (!Weight) - return DEFAULT_WEIGHT; - return Weight; -} - -uint32_t MachineBranchProbabilityInfo:: -getEdgeWeight(const MachineBasicBlock *Src, - const MachineBasicBlock *Dst) const { +BranchProbability MachineBranchProbabilityInfo::getEdgeProbability( + const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { // This is a linear search. Try to use the const_succ_iterator version when // possible. - return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst)); + return getEdgeProbability(Src, + std::find(Src->succ_begin(), Src->succ_end(), Dst)); } bool MachineBranchProbabilityInfo::isEdgeHot(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% - // FIXME: Compare against a static "hot" BranchProbability. - return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); + static BranchProbability HotProb(4, 5); + return getEdgeProbability(Src, Dst) > HotProb; } MachineBasicBlock * MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { - uint32_t MaxWeight = 0; + auto MaxProb = BranchProbability::getZero(); MachineBasicBlock *MaxSucc = nullptr; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, I); - if (Weight > MaxWeight) { - MaxWeight = Weight; + auto Prob = getEdgeProbability(MBB, I); + if (Prob > MaxProb) { + MaxProb = Prob; MaxSucc = *I; } } - if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5)) + static BranchProbability HotProb(4, 5); + if (getEdgeProbability(MBB, MaxSucc) >= HotProb) return MaxSucc; return nullptr; } -BranchProbability MachineBranchProbabilityInfo::getEdgeProbability( - const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { - uint32_t Scale = 1; - uint32_t D = getSumForBlock(Src, Scale); - uint32_t N = getEdgeWeight(Src, Dst) / Scale; - - return BranchProbability(N, D); -} - raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability( raw_ostream &OS, const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 87aaaa0834cf..021707b7c3c7 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -57,7 +57,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired(); + AU.addRequired(); AU.addPreservedID(MachineLoopInfoID); AU.addRequired(); AU.addPreserved(); @@ -111,7 +111,7 @@ char &llvm::MachineCSEID = MachineCSE::ID; INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse", "Machine Common Subexpression Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineCSE, "machine-cse", "Machine Common Subexpression Elimination", false, false) @@ -714,7 +714,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); DT = &getAnalysis(); LookAheadLimit = TII->getMachineCSELookAheadLimit(); return PerformCSE(DT->getRootNode()); diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp index f33d0e6a28e5..fa43c4dfa05a 100644 --- a/lib/CodeGen/MachineCombiner.cpp +++ b/lib/CodeGen/MachineCombiner.cpp @@ -10,6 +10,7 @@ // The machine combiner pass uses machine trace metrics to ensure the combined // instructions does not lengthen the critical path or the resource depth. //===----------------------------------------------------------------------===// + #define DEBUG_TYPE "machine-combiner" #include "llvm/ADT/Statistic.h" @@ -68,10 +69,10 @@ private: MachineTraceMetrics::Trace BlockTrace); bool improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, - MachineTraceMetrics::Trace BlockTrace, - SmallVectorImpl &InsInstrs, - DenseMap &InstrIdxForVirtReg, - bool NewCodeHasLessInsts); + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl &InsInstrs, + DenseMap &InstrIdxForVirtReg, + MachineCombinerPattern Pattern); bool preservesResourceLen(MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl &InsInstrs, @@ -122,9 +123,9 @@ unsigned MachineCombiner::getDepth(SmallVectorImpl &InsInstrs, DenseMap &InstrIdxForVirtReg, MachineTraceMetrics::Trace BlockTrace) { - SmallVector InstrDepth; - assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + assert(TSchedModel.hasInstrSchedModelOrItineraries() && + "Missing machine model\n"); // For each instruction in the new sequence compute the depth based on the // operands. Use the trace information when possible. For new operands which @@ -180,8 +181,8 @@ MachineCombiner::getDepth(SmallVectorImpl &InsInstrs, /// \returns Latency of \p NewRoot unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, MachineTraceMetrics::Trace BlockTrace) { - - assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + assert(TSchedModel.hasInstrSchedModelOrItineraries() && + "Missing machine model\n"); // Check each definition in NewRoot and compute the latency unsigned NewRootLatency = 0; @@ -202,62 +203,86 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO, UseMO->findRegisterUseOperandIdx(MO.getReg())); } else { - LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode()); + LatencyOp = TSchedModel.computeInstrLatency(NewRoot); } NewRootLatency = std::max(NewRootLatency, LatencyOp); } return NewRootLatency; } -/// True when the new instruction sequence does not lengthen the critical path -/// and the new sequence has less instructions or the new sequence improves the -/// critical path. +/// The combiner's goal may differ based on which pattern it is attempting +/// to optimize. +enum class CombinerObjective { + MustReduceDepth, // The data dependency chain must be improved. + Default // The critical path must not be lengthened. +}; + +static CombinerObjective getCombinerObjective(MachineCombinerPattern P) { + // TODO: If C++ ever gets a real enum class, make this part of the + // MachineCombinerPattern class. + switch (P) { + case MachineCombinerPattern::REASSOC_AX_BY: + case MachineCombinerPattern::REASSOC_AX_YB: + case MachineCombinerPattern::REASSOC_XA_BY: + case MachineCombinerPattern::REASSOC_XA_YB: + return CombinerObjective::MustReduceDepth; + default: + return CombinerObjective::Default; + } +} + /// The DAGCombine code sequence ends in MI (Machine Instruction) Root. /// The new code sequence ends in MI NewRoot. A necessary condition for the new /// sequence to replace the old sequence is that it cannot lengthen the critical -/// path. This is decided by the formula: -/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)). -/// If the new sequence has an equal length critical path but does not reduce -/// the number of instructions (NewCodeHasLessInsts is false), then it is not -/// considered an improvement. The slack is the number of cycles Root can be -/// delayed before the critical patch becomes longer. +/// path. The definition of "improve" may be restricted by specifying that the +/// new path improves the data dependency chain (MustReduceDepth). bool MachineCombiner::improvesCriticalPathLen( MachineBasicBlock *MBB, MachineInstr *Root, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl &InsInstrs, DenseMap &InstrIdxForVirtReg, - bool NewCodeHasLessInsts) { - - assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + MachineCombinerPattern Pattern) { + assert(TSchedModel.hasInstrSchedModelOrItineraries() && + "Missing machine model\n"); // NewRoot is the last instruction in the \p InsInstrs vector. - // Get depth and latency of NewRoot. unsigned NewRootIdx = InsInstrs.size() - 1; MachineInstr *NewRoot = InsInstrs[NewRootIdx]; - unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); - unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); - // Get depth, latency and slack of Root. + // Get depth and latency of NewRoot and Root. + unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth; + + DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n"; + dbgs() << " NewRootDepth: " << NewRootDepth << "\n"; + dbgs() << " RootDepth: " << RootDepth << "\n"); + + // For a transform such as reassociation, the cost equation is + // conservatively calculated so that we must improve the depth (data + // dependency cycles) in the critical path to proceed with the transform. + // Being conservative also protects against inaccuracies in the underlying + // machine trace metrics and CPU models. + if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) + return NewRootDepth < RootDepth; + + // A more flexible cost calculation for the critical path includes the slack + // of the original code sequence. This may allow the transform to proceed + // even if the instruction depths (data dependency cycles) become worse. + unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); unsigned RootLatency = TSchedModel.computeInstrLatency(Root); unsigned RootSlack = BlockTrace.getInstrSlack(Root); - DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n"; - dbgs() << " NewRootDepth: " << NewRootDepth - << " NewRootLatency: " << NewRootLatency << "\n"; - dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency - << " RootSlack: " << RootSlack << "\n"; - dbgs() << " NewRootDepth + NewRootLatency " + DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n"; + dbgs() << " RootLatency: " << RootLatency << "\n"; + dbgs() << " RootSlack: " << RootSlack << "\n"; + dbgs() << " NewRootDepth + NewRootLatency = " << NewRootDepth + NewRootLatency << "\n"; - dbgs() << " RootDepth + RootLatency + RootSlack " + dbgs() << " RootDepth + RootLatency + RootSlack = " << RootDepth + RootLatency + RootSlack << "\n";); unsigned NewCycleCount = NewRootDepth + NewRootLatency; unsigned OldCycleCount = RootDepth + RootLatency + RootSlack; - if (NewCodeHasLessInsts) - return NewCycleCount <= OldCycleCount; - else - return NewCycleCount < OldCycleCount; + return NewCycleCount <= OldCycleCount; } /// helper routine to convert instructions into SC @@ -271,11 +296,14 @@ void MachineCombiner::instr2instrSC( InstrsSC.push_back(SC); } } + /// True when the new instructions do not increase resource length bool MachineCombiner::preservesResourceLen( MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs) { + if (!TSchedModel.hasInstrSchedModel()) + return true; // Compute current resource length @@ -310,7 +338,7 @@ bool MachineCombiner::preservesResourceLen( bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) { if (OptSize && (NewSize < OldSize)) return true; - if (!TSchedModel.hasInstrSchedModel()) + if (!TSchedModel.hasInstrSchedModelOrItineraries()) return true; return false; } @@ -332,7 +360,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { auto &MI = *BlockIter++; DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";); - SmallVector Patterns; + SmallVector Patterns; // The motivating example is: // // MUL Other MUL_op1 MUL_op2 Other @@ -358,54 +386,55 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { // mostly one pattern, and getMachineCombinerPatterns() can order patterns // based on an internal cost heuristic. - if (TII->getMachineCombinerPatterns(MI, Patterns)) { - for (auto P : Patterns) { - SmallVector InsInstrs; - SmallVector DelInstrs; - DenseMap InstrIdxForVirtReg; - if (!MinInstr) - MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); - MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB); + if (!TII->getMachineCombinerPatterns(MI, Patterns)) + continue; + + for (auto P : Patterns) { + SmallVector InsInstrs; + SmallVector DelInstrs; + DenseMap InstrIdxForVirtReg; + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB); + Traces->verifyAnalysis(); + TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, + InstrIdxForVirtReg); + unsigned NewInstCount = InsInstrs.size(); + unsigned OldInstCount = DelInstrs.size(); + // Found pattern, but did not generate alternative sequence. + // This can happen e.g. when an immediate could not be materialized + // in a single instruction. + if (!NewInstCount) + continue; + + // Substitute when we optimize for codesize and the new sequence has + // fewer instructions OR + // the new sequence neither lengthens the critical path nor increases + // resource pressure. + if (doSubstitute(NewInstCount, OldInstCount) || + (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, + InstrIdxForVirtReg, P) && + preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { + for (auto *InstrPtr : InsInstrs) + MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr); + for (auto *InstrPtr : DelInstrs) + InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval(); + + Changed = true; + ++NumInstCombined; + + Traces->invalidate(MBB); Traces->verifyAnalysis(); - TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, - InstrIdxForVirtReg); - unsigned NewInstCount = InsInstrs.size(); - unsigned OldInstCount = DelInstrs.size(); - // Found pattern, but did not generate alternative sequence. - // This can happen e.g. when an immediate could not be materialized - // in a single instruction. - if (!NewInstCount) - continue; - // Substitute when we optimize for codesize and the new sequence has - // fewer instructions OR - // the new sequence neither lengthens the critical path nor increases - // resource pressure. - if (doSubstitute(NewInstCount, OldInstCount) || - (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, - InstrIdxForVirtReg, - NewInstCount < OldInstCount) && - preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { - for (auto *InstrPtr : InsInstrs) - MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr); - for (auto *InstrPtr : DelInstrs) - InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval(); - - Changed = true; - ++NumInstCombined; - - Traces->invalidate(MBB); - Traces->verifyAnalysis(); - // Eagerly stop after the first pattern fires. - break; - } else { - // Cleanup instructions of the alternative code sequence. There is no - // use for them. - MachineFunction *MF = MBB->getParent(); - for (auto *InstrPtr : InsInstrs) - MF->DeleteMachineInstr(InstrPtr); - } - InstrIdxForVirtReg.clear(); + // Eagerly stop after the first pattern fires. + break; + } else { + // Cleanup instructions of the alternative code sequence. There is no + // use for them. + MachineFunction *MF = MBB->getParent(); + for (auto *InstrPtr : InsInstrs) + MF->DeleteMachineInstr(InstrPtr); } + InstrIdxForVirtReg.clear(); } } @@ -420,9 +449,8 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { TSchedModel.init(SchedModel, &STI, TII); MRI = &MF.getRegInfo(); Traces = &getAnalysis(); - MinInstr = 0; - - OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + MinInstr = nullptr; + OptSize = MF.getFunction()->optForSize(); DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); if (!TII->useMachineCombiner()) { diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 9856e70edaef..ca4bb1c6ad49 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionInitializer.h" @@ -26,6 +27,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" @@ -44,6 +47,11 @@ using namespace llvm; #define DEBUG_TYPE "codegen" +static cl::opt + AlignAllFunctions("align-all-functions", + cl::desc("Force the alignment of all functions."), + cl::init(0), cl::Hidden); + void MachineFunctionInitializer::anchor() {} //===----------------------------------------------------------------------===// @@ -79,12 +87,27 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, Alignment = STI->getTargetLowering()->getMinFunctionAlignment(); // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. + // FIXME: Use Function::optForSize(). if (!Fn->hasFnAttribute(Attribute::OptimizeForSize)) Alignment = std::max(Alignment, STI->getTargetLowering()->getPrefFunctionAlignment()); + if (AlignAllFunctions) + Alignment = AlignAllFunctions; + FunctionNumber = FunctionNum; JumpTableInfo = nullptr; + + if (isFuncletEHPersonality(classifyEHPersonality( + F->hasPersonalityFn() ? F->getPersonalityFn() : nullptr))) { + WinEHInfo = new (Allocator) WinEHFuncInfo(); + } + + assert(TM.isCompatibleDataLayout(getDataLayout()) && + "Can't create a MachineFunction using a Module with a " + "Target-incompatible DataLayout attached\n"); + + PSVManager = llvm::make_unique(); } MachineFunction::~MachineFunction() { @@ -117,6 +140,11 @@ MachineFunction::~MachineFunction() { JumpTableInfo->~MachineJumpTableInfo(); Allocator.Deallocate(JumpTableInfo); } + + if (WinEHInfo) { + WinEHInfo->~WinEHFuncInfo(); + Allocator.Deallocate(WinEHInfo); + } } const DataLayout &MachineFunction::getDataLayout() const { @@ -149,7 +177,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { if (MBB == nullptr) MBBI = begin(); else - MBBI = MBB; + MBBI = MBB->getIterator(); // Figure out the block number this should have. unsigned BlockNo = 0; @@ -169,7 +197,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { if (MBBNumbering[BlockNo]) MBBNumbering[BlockNo]->setNumber(-1); - MBBNumbering[BlockNo] = MBBI; + MBBNumbering[BlockNo] = &*MBBI; MBBI->setNumber(BlockNo); } } @@ -322,6 +350,13 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, return std::make_pair(Result, Result + Num); } +const char *MachineFunction::createExternalSymbolName(StringRef Name) { + char *Dest = Allocator.Allocate(Name.size() + 1); + std::copy(Name.begin(), Name.end(), Dest); + Dest[Name.size()] = 0; + return Dest; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MachineFunction::dump() const { print(dbgs()); @@ -593,10 +628,9 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const { BV.set(*CSR); // Saved CSRs are not pristine. - const std::vector &CSI = getCalleeSavedInfo(); - for (std::vector::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) - BV.reset(I->getReg()); + for (auto &I : getCalleeSavedInfo()) + for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S) + BV.reset(*S); return BV; } @@ -801,42 +835,26 @@ Type *MachineConstantPoolEntry::getType() const { return Val.ConstVal->getType(); } - -unsigned MachineConstantPoolEntry::getRelocationInfo() const { +bool MachineConstantPoolEntry::needsRelocation() const { if (isMachineConstantPoolEntry()) - return Val.MachineCPVal->getRelocationInfo(); - return Val.ConstVal->getRelocationInfo(); + return true; + return Val.ConstVal->needsRelocation(); } SectionKind MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const { - SectionKind Kind; - switch (getRelocationInfo()) { + if (needsRelocation()) + return SectionKind::getReadOnlyWithRel(); + switch (DL->getTypeAllocSize(getType())) { + case 4: + return SectionKind::getMergeableConst4(); + case 8: + return SectionKind::getMergeableConst8(); + case 16: + return SectionKind::getMergeableConst16(); default: - llvm_unreachable("Unknown section kind"); - case Constant::GlobalRelocations: - Kind = SectionKind::getReadOnlyWithRel(); - break; - case Constant::LocalRelocation: - Kind = SectionKind::getReadOnlyWithRelLocal(); - break; - case Constant::NoRelocation: - switch (DL->getTypeAllocSize(getType())) { - case 4: - Kind = SectionKind::getMergeableConst4(); - break; - case 8: - Kind = SectionKind::getMergeableConst8(); - break; - case 16: - Kind = SectionKind::getMergeableConst16(); - break; - default: - Kind = SectionKind::getReadOnly(); - break; - } + return SectionKind::getReadOnly(); } - return Kind; } MachineConstantPool::~MachineConstantPool() { diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp index aaf06a70da74..05463fc6a1ef 100644 --- a/lib/CodeGen/MachineFunctionPass.cpp +++ b/lib/CodeGen/MachineFunctionPass.cpp @@ -13,11 +13,14 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/StackProtector.h" @@ -49,13 +52,16 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { // passes explicitly. This does not include setPreservesCFG, // because CodeGen overloads that to mean preserving the MachineBasicBlock // CFG in addition to the LLVM IR CFG. - AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); FunctionPass::getAnalysisUsage(AU); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index fdc4226ad926..1eb2edcd7cec 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Value.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -43,6 +44,11 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +static cl::opt PrintWholeRegMask( + "print-whole-regmask", + cl::desc("Print the full contents of regmask operands in IR dumps"), + cl::init(true), cl::Hidden); + //===----------------------------------------------------------------------===// // MachineOperand Implementation //===----------------------------------------------------------------------===// @@ -407,9 +413,26 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, if (getOffset()) OS << "+" << getOffset(); OS << '>'; break; - case MachineOperand::MO_RegisterMask: - OS << ""; + case MachineOperand::MO_RegisterMask: { + unsigned NumRegsInMask = 0; + unsigned NumRegsEmitted = 0; + OS << "getNumRegs(); ++i) { + unsigned MaskWord = i / 32; + unsigned MaskBit = i % 32; + if (getRegMask()[MaskWord] & (1 << MaskBit)) { + if (PrintWholeRegMask || NumRegsEmitted <= 10) { + OS << " " << PrintReg(i, TRI); + NumRegsEmitted++; + } + NumRegsInMask++; + } + } + if (NumRegsEmitted != NumRegsInMask) + OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more..."; + OS << ">"; break; + } case MachineOperand::MO_RegisterLiveOut: OS << ""; break; @@ -443,26 +466,28 @@ unsigned MachinePointerInfo::getAddrSpace() const { /// getConstantPool - Return a MachinePointerInfo record that refers to the /// constant pool. -MachinePointerInfo MachinePointerInfo::getConstantPool() { - return MachinePointerInfo(PseudoSourceValue::getConstantPool()); +MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getConstantPool()); } /// getFixedStack - Return a MachinePointerInfo record that refers to the /// the specified FrameIndex. -MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) { - return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset); +MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF, + int FI, int64_t Offset) { + return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset); } -MachinePointerInfo MachinePointerInfo::getJumpTable() { - return MachinePointerInfo(PseudoSourceValue::getJumpTable()); +MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getJumpTable()); } -MachinePointerInfo MachinePointerInfo::getGOT() { - return MachinePointerInfo(PseudoSourceValue::getGOT()); +MachinePointerInfo MachinePointerInfo::getGOT(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getGOT()); } -MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) { - return MachinePointerInfo(PseudoSourceValue::getStack(), Offset); +MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF, + int64_t Offset) { + return MachinePointerInfo(MF.getPSVManager().getStack(), Offset); } MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f, @@ -606,10 +631,12 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { if (MCID->ImplicitDefs) - for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs) + for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; + ++ImpDefs) addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true)); if (MCID->ImplicitUses) - for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses) + for (const MCPhysReg *ImpUses = MCID->getImplicitUses(); *ImpUses; + ++ImpUses) addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true)); } @@ -841,7 +868,7 @@ void MachineInstr::addMemOperand(MachineFunction &MF, bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { assert(!isBundledWithPred() && "Must be called on bundle header"); - for (MachineBasicBlock::const_instr_iterator MII = this;; ++MII) { + for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) { if (MII->getDesc().getFlags() & Mask) { if (Type == AnyInBundle) return true; @@ -865,13 +892,13 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, if (isBundle()) { // Both instructions are bundles, compare MIs inside the bundle. - MachineBasicBlock::const_instr_iterator I1 = *this; + MachineBasicBlock::const_instr_iterator I1 = getIterator(); MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end(); - MachineBasicBlock::const_instr_iterator I2 = *Other; + MachineBasicBlock::const_instr_iterator I2 = Other->getIterator(); MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end(); while (++I1 != E1 && I1->isInsideBundle()) { ++I2; - if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check)) + if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(&*I2, Check)) return false; } } @@ -976,7 +1003,7 @@ unsigned MachineInstr::getNumExplicitOperands() const { void MachineInstr::bundleWithPred() { assert(!isBundledWithPred() && "MI is already bundled with its predecessor"); setFlag(BundledPred); - MachineBasicBlock::instr_iterator Pred = this; + MachineBasicBlock::instr_iterator Pred = getIterator(); --Pred; assert(!Pred->isBundledWithSucc() && "Inconsistent bundle flags"); Pred->setFlag(BundledSucc); @@ -985,7 +1012,7 @@ void MachineInstr::bundleWithPred() { void MachineInstr::bundleWithSucc() { assert(!isBundledWithSucc() && "MI is already bundled with its successor"); setFlag(BundledSucc); - MachineBasicBlock::instr_iterator Succ = this; + MachineBasicBlock::instr_iterator Succ = getIterator(); ++Succ; assert(!Succ->isBundledWithPred() && "Inconsistent bundle flags"); Succ->setFlag(BundledPred); @@ -994,7 +1021,7 @@ void MachineInstr::bundleWithSucc() { void MachineInstr::unbundleFromPred() { assert(isBundledWithPred() && "MI isn't bundled with its predecessor"); clearFlag(BundledPred); - MachineBasicBlock::instr_iterator Pred = this; + MachineBasicBlock::instr_iterator Pred = getIterator(); --Pred; assert(Pred->isBundledWithSucc() && "Inconsistent bundle flags"); Pred->clearFlag(BundledSucc); @@ -1003,7 +1030,7 @@ void MachineInstr::unbundleFromPred() { void MachineInstr::unbundleFromSucc() { assert(isBundledWithSucc() && "MI isn't bundled with its successor"); clearFlag(BundledSucc); - MachineBasicBlock::instr_iterator Succ = this; + MachineBasicBlock::instr_iterator Succ = getIterator(); ++Succ; assert(Succ->isBundledWithPred() && "Inconsistent bundle flags"); Succ->clearFlag(BundledPred); @@ -1139,7 +1166,7 @@ const TargetRegisterClass *MachineInstr::getRegClassConstraintEffect( /// Return the number of instructions inside the MI bundle, not counting the /// header instruction. unsigned MachineInstr::getBundleSize() const { - MachineBasicBlock::const_instr_iterator I = this; + MachineBasicBlock::const_instr_iterator I = getIterator(); unsigned Size = 0; while (I->isBundledWithSucc()) ++Size, ++I; @@ -1501,6 +1528,10 @@ bool MachineInstr::hasUnmodeledSideEffects() const { return false; } +bool MachineInstr::isLoadFoldBarrier() const { + return mayStore() || isCall() || hasUnmodeledSideEffects(); +} + /// allDefsAreDead - Return true if all the defs of this instruction are dead. /// bool MachineInstr::allDefsAreDead() const { @@ -1615,7 +1646,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, FirstOp = false; } - for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); @@ -1706,13 +1736,16 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, } bool HaveSemi = false; - const unsigned PrintableFlags = FrameSetup; + const unsigned PrintableFlags = FrameSetup | FrameDestroy; if (Flags & PrintableFlags) { if (!HaveSemi) OS << ";"; HaveSemi = true; OS << " flags: "; if (Flags & FrameSetup) OS << "FrameSetup"; + + if (Flags & FrameDestroy) + OS << "FrameDestroy"; } if (!memoperands_empty()) { @@ -1755,7 +1788,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, DebugLoc InlinedAtDL(InlinedAt); if (InlinedAtDL && MF) { OS << " inlined @[ "; - InlinedAtDL.print(OS); + InlinedAtDL.print(OS); OS << " ]"; } } @@ -1902,11 +1935,11 @@ void MachineInstr::clearRegisterDeads(unsigned Reg) { } } -void MachineInstr::addRegisterDefReadUndef(unsigned Reg) { +void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) { for (MachineOperand &MO : operands()) { if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0) continue; - MO.setIsUndef(); + MO.setIsUndef(IsUndef); } } diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index cd820ee1ac52..3eaf4c5dea0f 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -293,15 +293,17 @@ MachineOperandIteratorBase::PhysRegInfo MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, const TargetRegisterInfo *TRI) { bool AllDefsDead = true; - PhysRegInfo PRI = {false, false, false, false, false, false}; + PhysRegInfo PRI = {false, false, false, false, false, false, false}; assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "analyzePhysReg not given a physical register!"); for (; isValid(); ++*this) { MachineOperand &MO = deref(); - if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) - PRI.Clobbers = true; // Regmask clobbers Reg. + if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) { + PRI.Clobbered = true; + continue; + } if (!MO.isReg()) continue; @@ -310,33 +312,28 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg)) continue; - bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg); - bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg); - - if (IsRegOrSuperReg && MO.readsReg()) { - // Reg or a super-reg is read, and perhaps killed also. - PRI.Reads = true; - PRI.Kills = MO.isKill(); - } - - if (IsRegOrOverlapping && MO.readsReg()) { - PRI.ReadsOverlap = true;// Reg or an overlapping register is read. - } - - if (!MO.isDef()) + if (!TRI->regsOverlap(MOReg, Reg)) continue; - if (IsRegOrSuperReg) { - PRI.Defines = true; // Reg or a super-register is defined. + bool Covered = TRI->isSuperRegisterEq(MOReg, Reg); + if (MO.readsReg()) { + PRI.Read = true; + if (Covered) { + PRI.FullyRead = true; + if (MO.isKill()) + PRI.Killed = true; + } + } else if (MO.isDef()) { + PRI.Defined = true; + if (Covered) + PRI.FullyDefined = true; if (!MO.isDead()) AllDefsDead = false; } - if (IsRegOrOverlapping) - PRI.Clobbers = true; // Reg or an overlapping reg is defined. } - if (AllDefsDead && PRI.Defines) - PRI.DefinesDead = true; // Reg or super-register was defined and was dead. + if (AllDefsDead && PRI.FullyDefined) + PRI.DeadDef = true; return PRI; } diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index e9ea5ed9648c..a8368e9c80d6 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -138,7 +138,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); @@ -153,7 +153,7 @@ namespace { } private: - /// CandidateInfo - Keep track of information about hoisting candidates. + /// Keep track of information about hoisting candidates. struct CandidateInfo { MachineInstr *MI; unsigned Def; @@ -162,149 +162,76 @@ namespace { : MI(mi), Def(def), FI(fi) {} }; - /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop - /// invariants out to the preheader. void HoistRegionPostRA(); - /// HoistPostRA - When an instruction is found to only use loop invariant - /// operands that is safe to hoist, this instruction is called to do the - /// dirty work. void HoistPostRA(MachineInstr *MI, unsigned Def); - /// ProcessMI - Examine the instruction for potentai LICM candidate. Also - /// gather register def and frame object update information. - void ProcessMI(MachineInstr *MI, - BitVector &PhysRegDefs, - BitVector &PhysRegClobbers, - SmallSet &StoredFIs, + void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, + BitVector &PhysRegClobbers, SmallSet &StoredFIs, SmallVectorImpl &Candidates); - /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the - /// current loop. void AddToLiveIns(unsigned Reg); - /// IsLICMCandidate - Returns true if the instruction may be a suitable - /// candidate for LICM. e.g. If the instruction is a call, then it's - /// obviously not safe to hoist it. bool IsLICMCandidate(MachineInstr &I); - /// IsLoopInvariantInst - Returns true if the instruction is loop - /// invariant. I.e., all virtual register operands are defined outside of - /// the loop, physical registers aren't accessed (explicitly or implicitly), - /// and the instruction is hoistable. - /// bool IsLoopInvariantInst(MachineInstr &I); - /// HasLoopPHIUse - Return true if the specified instruction is used by any - /// phi node in the current loop. bool HasLoopPHIUse(const MachineInstr *MI) const; - /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' - /// and an use in the current loop, return true if the target considered - /// it 'high'. bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg) const; bool IsCheapInstruction(MachineInstr &MI) const; - /// CanCauseHighRegPressure - Visit BBs from header to current BB, - /// check if hoisting an instruction of the given cost matrix can cause high - /// register pressure. bool CanCauseHighRegPressure(const DenseMap &Cost, bool Cheap); - /// UpdateBackTraceRegPressure - Traverse the back trace from header to - /// the current block and update their register pressures to reflect the - /// effect of hoisting MI from the current block to the preheader. void UpdateBackTraceRegPressure(const MachineInstr *MI); - /// IsProfitableToHoist - Return true if it is potentially profitable to - /// hoist the given loop invariant. bool IsProfitableToHoist(MachineInstr &MI); - /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. - /// If not then a load from this mbb may not be safe to hoist. bool IsGuaranteedToExecute(MachineBasicBlock *BB); void EnterScope(MachineBasicBlock *MBB); void ExitScope(MachineBasicBlock *MBB); - /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given - /// dominator tree node if its a leaf or all of its children are done. Walk - /// up the dominator tree to destroy ancestors which are now done. - void ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap &OpenChildren, - DenseMap &ParentMap); + void ExitScopeIfDone( + MachineDomTreeNode *Node, + DenseMap &OpenChildren, + DenseMap &ParentMap); - /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all - /// blocks dominated by the specified header block, and that are in the - /// current loop) in depth first order w.r.t the DominatorTree. This allows - /// us to visit definitions before uses, allowing us to hoist a loop body in - /// one pass without iteration. - /// void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); + void HoistRegion(MachineDomTreeNode *N, bool IsHeader); - /// SinkIntoLoop - Sink instructions into loops if profitable. This - /// especially tries to prevent register spills caused by register pressure - /// if there is little to no overhead moving instructions into loops. void SinkIntoLoop(); - /// InitRegPressure - Find all virtual register references that are liveout - /// of the preheader to initialize the starting "register pressure". Note - /// this does not count live through (livein but not used) registers. void InitRegPressure(MachineBasicBlock *BB); - /// calcRegisterCost - Calculate the additional register pressure that the - /// registers used in MI cause. - /// - /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to - /// figure out which usages are live-ins. - /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. DenseMap calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, bool ConsiderUnseenAsDef); - /// UpdateRegPressure - Update estimate of register pressure after the - /// specified instruction. void UpdateRegPressure(const MachineInstr *MI, bool ConsiderUnseenAsDef = false); - /// ExtractHoistableLoad - Unfold a load from the given machineinstr if - /// the load itself could be hoisted. Return the unfolded and hoistable - /// load, or null if the load couldn't be unfolded or if it wouldn't - /// be hoistable. MachineInstr *ExtractHoistableLoad(MachineInstr *MI); - /// LookForDuplicate - Find an instruction amount PrevMIs that is a - /// duplicate of MI. Return this instruction if it's found. - const MachineInstr *LookForDuplicate(const MachineInstr *MI, - std::vector &PrevMIs); + const MachineInstr * + LookForDuplicate(const MachineInstr *MI, + std::vector &PrevMIs); - /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on - /// the preheader that compute the same value. If it's found, do a RAU on - /// with the definition of the existing instruction rather than hoisting - /// the instruction to the preheader. - bool EliminateCSE(MachineInstr *MI, - DenseMap >::iterator &CI); + bool EliminateCSE( + MachineInstr *MI, + DenseMap>::iterator &CI); - /// MayCSE - Return true if the given instruction will be CSE'd if it's - /// hoisted out of the loop. bool MayCSE(MachineInstr *MI); - /// Hoist - When an instruction is found to only use loop invariant operands - /// that is safe to hoist, this instruction is called to do the dirty work. - /// It returns true if the instruction is hoisted. bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); - /// InitCSEMap - Initialize the CSE map with instructions that are in the - /// current loop preheader that may become duplicates of instructions that - /// are hoisted out of the loop. void InitCSEMap(MachineBasicBlock *BB); - /// getCurPreheader - Get the preheader for the current loop, splitting - /// a critical edge if needed. MachineBasicBlock *getCurPreheader(); }; } // end anonymous namespace @@ -315,12 +242,11 @@ INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", "Machine Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineLICM, "machinelicm", "Machine Loop Invariant Code Motion", false, false) -/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most -/// loop that has a unique predecessor. +/// Test if the given loop is the outer-most loop that has a unique predecessor. static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { // Check whether this loop even has a unique predecessor. if (!CurLoop->getLoopPredecessor()) @@ -367,7 +293,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { // Get our Loop information... MLI = &getAnalysis(); DT = &getAnalysis(); - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); SmallVector Worklist(MLI->begin(), MLI->end()); while (!Worklist.empty()) { @@ -402,9 +328,12 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { return Changed; } -/// InstructionStoresToFI - Return true if instruction stores to the -/// specified frame. +/// Return true if instruction stores to the specified frame. static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { + // If we lost memory operands, conservatively assume that the instruction + // writes to all slots. + if (MI->memoperands_empty()) + return true; for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), oe = MI->memoperands_end(); o != oe; ++o) { if (!(*o)->isStore() || !(*o)->getPseudoValue()) @@ -418,7 +347,7 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { return false; } -/// ProcessMI - Examine the instruction for potentai LICM candidate. Also +/// Examine the instruction for potentai LICM candidate. Also /// gather register def and frame object update information. void MachineLICM::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, @@ -506,8 +435,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI, } } -/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop -/// invariants out to the preheader. +/// Walk the specified region of the CFG and hoist loop invariants out to the +/// preheader. void MachineLICM::HoistRegionPostRA() { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) @@ -529,15 +458,13 @@ void MachineLICM::HoistRegionPostRA() { // If the header of the loop containing this basic block is a landing pad, // then don't try to hoist instructions out of this loop. const MachineLoop *ML = MLI->getLoopFor(BB); - if (ML && ML->getHeader()->isLandingPad()) continue; + if (ML && ML->getHeader()->isEHPad()) continue; // Conservatively treat live-in's as an external def. // FIXME: That means a reload that're reused in successor block(s) will not // be LICM'ed. - for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), - E = BB->livein_end(); I != E; ++I) { - unsigned Reg = *I; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + for (const auto &LI : BB->liveins()) { + for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) PhysRegDefs.set(*AI); } @@ -601,8 +528,8 @@ void MachineLICM::HoistRegionPostRA() { } } -/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current -/// loop, and make sure it is not killed by any instructions in the loop. +/// Add register 'Reg' to the livein sets of BBs in the current loop, and make +/// sure it is not killed by any instructions in the loop. void MachineLICM::AddToLiveIns(unsigned Reg) { const std::vector &Blocks = CurLoop->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { @@ -622,9 +549,8 @@ void MachineLICM::AddToLiveIns(unsigned Reg) { } } -/// HoistPostRA - When an instruction is found to only use loop invariant -/// operands that is safe to hoist, this instruction is called to do the -/// dirty work. +/// When an instruction is found to only use loop invariant operands that is +/// safe to hoist, this instruction is called to do the dirty work. void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { MachineBasicBlock *Preheader = getCurPreheader(); @@ -646,8 +572,8 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { Changed = true; } -// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. -// If not then a load from this mbb may not be safe to hoist. +/// Check if this mbb is guaranteed to execute. If not then a load from this mbb +/// may not be safe to hoist. bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { if (SpeculationState != SpeculateUnknown) return SpeculationState == SpeculateFalse; @@ -679,9 +605,9 @@ void MachineLICM::ExitScope(MachineBasicBlock *MBB) { BackTrace.pop_back(); } -/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given -/// dominator tree node if its a leaf or all of its children are done. Walk -/// up the dominator tree to destroy ancestors which are now done. +/// Destroy scope for the MBB that corresponds to the given dominator tree node +/// if its a leaf or all of its children are done. Walk up the dominator tree to +/// destroy ancestors which are now done. void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, DenseMap &OpenChildren, DenseMap &ParentMap) { @@ -701,11 +627,10 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, } } -/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all -/// blocks dominated by the specified header block, and that are in the -/// current loop) in depth first order w.r.t the DominatorTree. This allows -/// us to visit definitions before uses, allowing us to hoist a loop body in -/// one pass without iteration. +/// Walk the specified loop in the CFG (defined by all blocks dominated by the +/// specified header block, and that are in the current loop) in depth first +/// order w.r.t the DominatorTree. This allows us to visit definitions before +/// uses, allowing us to hoist a loop body in one pass without iteration. /// void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { MachineBasicBlock *Preheader = getCurPreheader(); @@ -727,7 +652,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { // If the header of the loop containing this basic block is a landing pad, // then don't try to hoist instructions out of this loop. const MachineLoop *ML = MLI->getLoopFor(BB); - if (ML && ML->getHeader()->isLandingPad()) + if (ML && ML->getHeader()->isEHPad()) continue; // If this subregion is not in the top level loop at all, exit. @@ -786,6 +711,9 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { } } +/// Sink instructions into loops if profitable. This especially tries to prevent +/// register spills caused by register pressure if there is little to no +/// overhead moving instructions into loops. void MachineLICM::SinkIntoLoop() { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) @@ -796,8 +724,8 @@ void MachineLICM::SinkIntoLoop() { I != Preheader->instr_end(); ++I) { // We need to ensure that we can safely move this instruction into the loop. // As such, it must not have side-effects, e.g. such as a call has. - if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I)) - Candidates.push_back(I); + if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) + Candidates.push_back(&*I); } for (MachineInstr *I : Candidates) { @@ -837,9 +765,9 @@ static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); } -/// InitRegPressure - Find all virtual register references that are liveout of -/// the preheader to initialize the starting "register pressure". Note this -/// does not count live through (livein but not used) registers. +/// Find all virtual register references that are liveout of the preheader to +/// initialize the starting "register pressure". Note this does not count live +/// through (livein but not used) registers. void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { std::fill(RegPressure.begin(), RegPressure.end(), 0); @@ -858,8 +786,7 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true); } -/// UpdateRegPressure - Update estimate of register pressure after the -/// specified instruction. +/// Update estimate of register pressure after the specified instruction. void MachineLICM::UpdateRegPressure(const MachineInstr *MI, bool ConsiderUnseenAsDef) { auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef); @@ -872,6 +799,12 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI, } } +/// Calculate the additional register pressure that the registers used in MI +/// cause. +/// +/// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to +/// figure out which usages are live-ins. +/// FIXME: Figure out a way to consider 'RegSeen' from all code paths. DenseMap MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, bool ConsiderUnseenAsDef) { @@ -915,23 +848,28 @@ MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, return Cost; } -/// isLoadFromGOTOrConstantPool - Return true if this machine instruction -/// loads from global offset table or constant pool. -static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { +/// Return true if this machine instruction loads from global offset table or +/// constant pool. +static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { assert (MI.mayLoad() && "Expected MI that loads!"); + + // If we lost memory operands, conservatively assume that the instruction + // reads from everything.. + if (MI.memoperands_empty()) + return true; + for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), E = MI.memoperands_end(); I != E; ++I) { if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) { - if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) + if (PSV->isGOT() || PSV->isConstantPool()) return true; } } return false; } -/// IsLICMCandidate - Returns true if the instruction may be a suitable -/// candidate for LICM. e.g. If the instruction is a call, then it's obviously -/// not safe to hoist it. +/// Returns true if the instruction may be a suitable candidate for LICM. +/// e.g. If the instruction is a call, then it's obviously not safe to hoist it. bool MachineLICM::IsLICMCandidate(MachineInstr &I) { // Check if it's safe to move the instruction. bool DontMoveAcrossStore = true; @@ -944,16 +882,16 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) { // from constant memory are not safe to speculate all the time, for example // indexed load from a jump table. // Stores and side effects are already checked by isSafeToMove. - if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) && + if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) && !IsGuaranteedToExecute(I.getParent())) return false; return true; } -/// IsLoopInvariantInst - Returns true if the instruction is loop -/// invariant. I.e., all virtual register operands are defined outside of the -/// loop, physical registers aren't accessed explicitly, and there are no side +/// Returns true if the instruction is loop invariant. +/// I.e., all virtual register operands are defined outside of the loop, +/// physical registers aren't accessed explicitly, and there are no side /// effects that aren't captured by the operands or other flags. /// bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { @@ -1007,8 +945,8 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } -/// HasLoopPHIUse - Return true if the specified instruction is used by a -/// phi node and hoisting it could cause a copy to be inserted. +/// Return true if the specified instruction is used by a phi node and hoisting +/// it could cause a copy to be inserted. bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { SmallVector Work(1, MI); do { @@ -1042,9 +980,8 @@ bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { return false; } -/// HasHighOperandLatency - Compute operand latency between a def of 'Reg' -/// and an use in the current loop, return true if the target considered -/// it 'high'. +/// Compute operand latency between a def of 'Reg' and an use in the current +/// loop, return true if the target considered it high. bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg) const { if (MRI->use_nodbg_empty(Reg)) @@ -1074,8 +1011,8 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, return false; } -/// IsCheapInstruction - Return true if the instruction is marked "cheap" or -/// the operand latency between its def and a use is one or less. +/// Return true if the instruction is marked "cheap" or the operand latency +/// between its def and a use is one or less. bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike()) return true; @@ -1099,9 +1036,8 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { return isCheap; } -/// CanCauseHighRegPressure - Visit BBs from header to current BB, check -/// if hoisting an instruction of the given cost matrix can cause high -/// register pressure. +/// Visit BBs from header to current BB, check if hoisting an instruction of the +/// given cost matrix can cause high register pressure. bool MachineLICM::CanCauseHighRegPressure(const DenseMap& Cost, bool CheapInstr) { for (const auto &RPIdAndCost : Cost) { @@ -1124,9 +1060,9 @@ bool MachineLICM::CanCauseHighRegPressure(const DenseMap& Cost, return false; } -/// UpdateBackTraceRegPressure - Traverse the back trace from header to the -/// current block and update their register pressures to reflect the effect -/// of hoisting MI from the current block to the preheader. +/// Traverse the back trace from header to the current block and update their +/// register pressures to reflect the effect of hoisting MI from the current +/// block to the preheader. void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { // First compute the 'cost' of the instruction, i.e. its contribution // to register pressure. @@ -1139,8 +1075,8 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { RP[RPIdAndCost.first] += RPIdAndCost.second; } -/// IsProfitableToHoist - Return true if it is potentially profitable to hoist -/// the given loop invariant. +/// Return true if it is potentially profitable to hoist the given loop +/// invariant. bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (MI.isImplicitDef()) return true; @@ -1230,6 +1166,9 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { return true; } +/// Unfold a load from the given machineinstr if the load itself could be +/// hoisted. Return the unfolded and hoistable load, or null if the load +/// couldn't be unfolded or if it wouldn't be hoistable. MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { // Don't unfold simple loads. if (MI->canFoldAsLoad()) @@ -1287,6 +1226,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { return NewMIs[0]; } +/// Initialize the CSE map with instructions that are in the current loop +/// preheader that may become duplicates of instructions that are hoisted +/// out of the loop. void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { const MachineInstr *MI = &*I; @@ -1295,6 +1237,8 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { } } +/// Find an instruction amount PrevMIs that is a duplicate of MI. +/// Return this instruction if it's found. const MachineInstr* MachineLICM::LookForDuplicate(const MachineInstr *MI, std::vector &PrevMIs) { @@ -1306,6 +1250,10 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI, return nullptr; } +/// Given a LICM'ed instruction, look for an instruction on the preheader that +/// computes the same value. If it's found, do a RAU on with the definition of +/// the existing instruction rather than hoisting the instruction to the +/// preheader. bool MachineLICM::EliminateCSE(MachineInstr *MI, DenseMap >::iterator &CI) { // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate @@ -1363,8 +1311,8 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, return false; } -/// MayCSE - Return true if the given instruction will be CSE'd if it's -/// hoisted out of the loop. +/// Return true if the given instruction will be CSE'd if it's hoisted out of +/// the loop. bool MachineLICM::MayCSE(MachineInstr *MI) { unsigned Opcode = MI->getOpcode(); DenseMap >::iterator @@ -1377,9 +1325,9 @@ bool MachineLICM::MayCSE(MachineInstr *MI) { return LookForDuplicate(MI, CI->second) != nullptr; } -/// Hoist - When an instruction is found to use only loop invariant operands +/// When an instruction is found to use only loop invariant operands /// that are safe to hoist, this instruction is called to do the dirty work. -/// +/// It returns true if the instruction is hoisted. bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { // First check whether we should hoist this instruction. if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { @@ -1441,6 +1389,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { return true; } +/// Get the preheader for the current loop, splitting a critical edge if needed. MachineBasicBlock *MachineLICM::getCurPreheader() { // Determine the block to which to hoist instructions. If we can't find a // suitable loop predecessor, we can't do any hoisting. diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index ce6abdd870b3..2f5c9e05cc7b 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -37,7 +37,7 @@ char &llvm::MachineLoopInfoID = MachineLoopInfo::ID; bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { releaseMemory(); - LI.Analyze(getAnalysis().getBase()); + LI.analyze(getAnalysis().getBase()); return false; } @@ -51,11 +51,11 @@ MachineBasicBlock *MachineLoop::getTopBlock() { MachineBasicBlock *TopMBB = getHeader(); MachineFunction::iterator Begin = TopMBB->getParent()->begin(); if (TopMBB != Begin) { - MachineBasicBlock *PriorMBB = std::prev(MachineFunction::iterator(TopMBB)); + MachineBasicBlock *PriorMBB = &*std::prev(TopMBB->getIterator()); while (contains(PriorMBB)) { TopMBB = PriorMBB; if (TopMBB == Begin) break; - PriorMBB = std::prev(MachineFunction::iterator(TopMBB)); + PriorMBB = &*std::prev(TopMBB->getIterator()); } } return TopMBB; @@ -65,11 +65,12 @@ MachineBasicBlock *MachineLoop::getBottomBlock() { MachineBasicBlock *BotMBB = getHeader(); MachineFunction::iterator End = BotMBB->getParent()->end(); if (BotMBB != std::prev(End)) { - MachineBasicBlock *NextMBB = std::next(MachineFunction::iterator(BotMBB)); + MachineBasicBlock *NextMBB = &*std::next(BotMBB->getIterator()); while (contains(NextMBB)) { BotMBB = NextMBB; - if (BotMBB == std::next(MachineFunction::iterator(BotMBB))) break; - NextMBB = std::next(MachineFunction::iterator(BotMBB)); + if (BotMBB == &*std::next(BotMBB->getIterator())) + break; + NextMBB = &*std::next(BotMBB->getIterator()); } } return BotMBB; diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 6a206249d834..1956a701d8e6 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -9,12 +9,12 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/ADT/PointerUnion.h" -#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" @@ -35,7 +35,7 @@ char MachineModuleInfo::ID = 0; MachineModuleInfoImpl::~MachineModuleInfoImpl() {} namespace llvm { -class MMIAddrLabelMapCallbackPtr : CallbackVH { +class MMIAddrLabelMapCallbackPtr final : CallbackVH { MMIAddrLabelMap *Map; public: MMIAddrLabelMapCallbackPtr() : Map(nullptr) {} @@ -209,9 +209,8 @@ bool MachineModuleInfo::doInitialization(Module &M) { CurCallSite = 0; CallsEHReturn = false; CallsUnwindInit = false; + HasEHFunclets = false; DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false; - // Always emit some info, by default "no personality" info. - Personalities.push_back(nullptr); PersonalityTypeCache = EHPersonality::Unknown; AddrLabelSymbols = nullptr; TheModule = nullptr; @@ -249,6 +248,7 @@ void MachineModuleInfo::EndFunction() { FilterEnds.clear(); CallsEHReturn = false; CallsUnwindInit = false; + HasEHFunclets = false; VariableDbgInfos.clear(); } @@ -314,32 +314,11 @@ MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) { return LandingPadLabel; } -/// addPersonality - Provide the personality function for the exception -/// information. -void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, - const Function *Personality) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - LP.Personality = Personality; - addPersonality(Personality); -} - void MachineModuleInfo::addPersonality(const Function *Personality) { for (unsigned i = 0; i < Personalities.size(); ++i) if (Personalities[i] == Personality) return; - - // If this is the first personality we're adding go - // ahead and add it at the beginning. - if (!Personalities[0]) - Personalities[0] = Personality; - else - Personalities.push_back(Personality); -} - -void MachineModuleInfo::addWinEHState(MachineBasicBlock *LandingPad, - int State) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - LP.WinEHState = State; + Personalities.push_back(Personality); } /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. @@ -481,56 +460,3 @@ try_next:; FilterIds.push_back(0); // terminator return FilterID; } - -/// getPersonality - Return the personality function for the current function. -const Function *MachineModuleInfo::getPersonality() const { - for (const LandingPadInfo &LPI : LandingPads) - if (LPI.Personality) - return LPI.Personality; - return nullptr; -} - -EHPersonality MachineModuleInfo::getPersonalityType() { - if (PersonalityTypeCache == EHPersonality::Unknown) { - if (const Function *F = getPersonality()) - PersonalityTypeCache = classifyEHPersonality(F); - } - return PersonalityTypeCache; -} - -/// getPersonalityIndex - Return unique index for current personality -/// function. NULL/first personality function should always get zero index. -unsigned MachineModuleInfo::getPersonalityIndex() const { - const Function* Personality = nullptr; - - // Scan landing pads. If there is at least one non-NULL personality - use it. - for (unsigned i = 0, e = LandingPads.size(); i != e; ++i) - if (LandingPads[i].Personality) { - Personality = LandingPads[i].Personality; - break; - } - - for (unsigned i = 0, e = Personalities.size(); i < e; ++i) { - if (Personalities[i] == Personality) - return i; - } - - // This will happen if the current personality function is - // in the zero index. - return 0; -} - -const Function *MachineModuleInfo::getWinEHParent(const Function *F) const { - StringRef WinEHParentName = - F->getFnAttribute("wineh-parent").getValueAsString(); - if (WinEHParentName.empty() || WinEHParentName == F->getName()) - return F; - return F->getParent()->getFunction(WinEHParentName); -} - -WinEHFuncInfo &MachineModuleInfo::getWinEHFuncInfo(const Function *F) { - auto &Ptr = FuncInfoMap[getWinEHParent(F)]; - if (!Ptr) - Ptr.reset(new WinEHFuncInfo); - return *Ptr; -} diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index e883ce523134..03c82f46da63 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -27,13 +27,11 @@ void MachineRegisterInfo::Delegate::anchor() {} MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF) : MF(MF), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true), TracksSubRegLiveness(false) { + unsigned NumRegs = getTargetRegisterInfo()->getNumRegs(); VRegInfo.reserve(256); RegAllocHints.reserve(256); - UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits()); - UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs()); - - // Create the physreg use/def lists. - PhysRegUseDefLists.resize(getTargetRegisterInfo()->getNumRegs(), nullptr); + UsedPhysRegMask.resize(NumRegs); + PhysRegUseDefLists.reset(new MachineOperand*[NumRegs]()); } /// setRegClass - Set the register class of the specified virtual register. @@ -117,6 +115,8 @@ void MachineRegisterInfo::clearVirtRegs() { } #endif VRegInfo.clear(); + for (auto &I : LiveIns) + I.second = 0; } void MachineRegisterInfo::verifyUseList(unsigned Reg) const { @@ -394,8 +394,7 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, } } -unsigned MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const -{ +LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const { // Lane masks are only defined for vregs. assert(TargetRegisterInfo::isVirtualRegister(Reg)); const TargetRegisterClass &TRC = *getRegClass(Reg); @@ -468,11 +467,8 @@ static bool isNoReturnDef(const MachineOperand &MO) { if (MF.getFunction()->hasFnAttribute(Attribute::UWTable)) return false; const Function *Called = getCalledFunction(MI); - if (Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn) - || !Called->hasFnAttribute(Attribute::NoUnwind)) - return false; - - return true; + return !(Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn) || + !Called->hasFnAttribute(Attribute::NoUnwind)); } bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg) const { @@ -488,3 +484,15 @@ bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg) const { } return false; } + +bool MachineRegisterInfo::isPhysRegUsed(unsigned PhysReg) const { + if (UsedPhysRegMask.test(PhysReg)) + return true; + const TargetRegisterInfo *TRI = getTargetRegisterInfo(); + for (MCRegAliasIterator AliasReg(PhysReg, TRI, true); AliasReg.isValid(); + ++AliasReg) { + if (!reg_nodbg_empty(*AliasReg)) + return true; + } + return false; +} diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index a48e54caf3fe..bcee15c7c75f 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -49,6 +49,11 @@ DumpCriticalPathLength("misched-dcpl", cl::Hidden, static cl::opt ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed")); +/// In some situations a few uninteresting nodes depend on nearly all other +/// nodes in the graph, provide a cutoff to hide them. +static cl::opt ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, + cl::desc("Hide nodes with more predecessor/successor than cutoff")); + static cl::opt MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); @@ -106,7 +111,7 @@ public: void print(raw_ostream &O, const Module* = nullptr) const override; protected: - void scheduleRegions(ScheduleDAGInstrs &Scheduler); + void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags); }; /// MachineScheduler runs after coalescing and before register allocation. @@ -146,7 +151,7 @@ char &llvm::MachineSchedulerID = MachineScheduler::ID; INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler", "Machine Instruction Scheduler", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler", @@ -161,7 +166,7 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequiredID(MachineDominatorsID); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -315,14 +320,14 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { } else if (!mf.getSubtarget().enableMachineScheduler()) return false; - DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); + DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs())); // Initialize the context of the pass. MF = &mf; MLI = &getAnalysis(); MDT = &getAnalysis(); PassConfig = &getAnalysis(); - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); LIS = &getAnalysis(); @@ -335,7 +340,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Instantiate the selected scheduler for this target, function, and // optimization level. std::unique_ptr Scheduler(createMachineScheduler()); - scheduleRegions(*Scheduler); + scheduleRegions(*Scheduler, false); DEBUG(LIS->dump()); if (VerifyScheduling) @@ -363,7 +368,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Instantiate the selected scheduler for this target, function, and // optimization level. std::unique_ptr Scheduler(createPostMachineScheduler()); - scheduleRegions(*Scheduler); + scheduleRegions(*Scheduler, true); if (VerifyScheduling) MF->verify(this, "After post machine scheduling."); @@ -383,15 +388,14 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, - const TargetInstrInfo *TII, - bool IsPostRA) { + const TargetInstrInfo *TII) { return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF); } /// Main driver for both MachineScheduler and PostMachineScheduler. -void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { +void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, + bool FixKillFlags) { const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - bool IsPostRA = Scheduler.isPostRA(); // Visit all machine basic blocks. // @@ -400,7 +404,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); MBB != MBBEnd; ++MBB) { - Scheduler.startBlock(MBB); + Scheduler.startBlock(&*MBB); #ifndef NDEBUG if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) @@ -429,7 +433,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { // Avoid decrementing RegionEnd for blocks with no terminator. if (RegionEnd != MBB->end() || - isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) { + isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) { --RegionEnd; // Count the boundary instruction. --RemainingInstrs; @@ -440,14 +444,14 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; for(;I != MBB->begin(); --I, --RemainingInstrs) { - if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA)) + if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII)) break; if (!I->isDebugValue()) ++NumRegionInstrs; } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. - Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs); + Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs); // Skip empty scheduling regions (0 or 1 schedulable instructions). if (I == RegionEnd || I == std::prev(RegionEnd)) { @@ -456,8 +460,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { Scheduler.exitRegion(); continue; } - DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "") - << "MI Scheduling **********\n"); + DEBUG(dbgs() << "********** MI Scheduling **********\n"); DEBUG(dbgs() << MF->getName() << ":BB#" << MBB->getNumber() << " " << MBB->getName() << "\n From: " << *I << " To: "; @@ -484,11 +487,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { } assert(RemainingInstrs == 0 && "Instruction count mismatch!"); Scheduler.finishBlock(); - if (Scheduler.isPostRA()) { - // FIXME: Ideally, no further passes should rely on kill flags. However, - // thumb2 size reduction is currently an exception. - Scheduler.fixupKills(MBB); - } + // FIXME: Ideally, no further passes should rely on kill flags. However, + // thumb2 size reduction is currently an exception, so the PostMIScheduler + // needs to do this. + if (FixKillFlags) + Scheduler.fixupKills(&*MBB); } Scheduler.finalizeSchedule(); } @@ -499,7 +502,7 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { LLVM_DUMP_METHOD void ReadyQueue::dump() { - dbgs() << Name << ": "; + dbgs() << "Queue " << Name << ": "; for (unsigned i = 0, e = Queue.size(); i < e; ++i) dbgs() << Queue[i]->NodeNum << " "; dbgs() << "\n"; @@ -660,6 +663,9 @@ bool ScheduleDAGMI::checkSchedLimit() { /// does not consider liveness or register pressure. It is useful for PostRA /// scheduling and potentially other custom schedulers. void ScheduleDAGMI::schedule() { + DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n"); + DEBUG(SchedImpl->dumpPolicy()); + // Build the DAG. buildSchedGraph(AA); @@ -682,7 +688,11 @@ void ScheduleDAGMI::schedule() { initQueues(TopRoots, BotRoots); bool IsTopNode = false; - while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + while (true) { + DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n"); + SUnit *SU = SchedImpl->pickNode(IsTopNode); + if (!SU) break; + assert(!SU->isScheduled && "Node already scheduled"); if (!checkSchedLimit()) break; @@ -900,6 +910,13 @@ void ScheduleDAGMILive::initRegPressure() { updatePressureDiffs(LiveUses); } + DEBUG( + dbgs() << "Top Pressure:\n"; + dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); + dbgs() << "Bottom Pressure:\n"; + dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); + ); + assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); // Cache the list of excess pressure sets in this region. This will also track @@ -976,18 +993,24 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef LiveUses) { } // RegisterPressureTracker guarantees that readsReg is true for LiveUses. assert(VNI && "No live value at use."); - for (VReg2UseMap::iterator - UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { - SUnit *SU = UI->SU; - DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " - << *SU->getInstr()); + for (const VReg2SUnit &V2SU + : make_range(VRegUses.find(Reg), VRegUses.end())) { + SUnit *SU = V2SU.SU; // If this use comes before the reaching def, it cannot be a last use, so // descrease its pressure change. if (!SU->isScheduled && SU != &ExitSU) { LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(SU->getInstr())); - if (LRQ.valueIn() == VNI) - getPressureDiff(SU).addPressureChange(Reg, true, &MRI); + if (LRQ.valueIn() == VNI) { + PressureDiff &PDiff = getPressureDiff(SU); + PDiff.addPressureChange(Reg, true, &MRI); + DEBUG( + dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " + << *SU->getInstr(); + dbgs() << " to "; + PDiff.dump(*TRI); + ); + } } } } @@ -998,12 +1021,14 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef LiveUses) { /// only includes instructions that have DAG nodes, not scheduling boundaries. /// /// This is a skeletal driver, with all the functionality pushed into helpers, -/// so that it can be easilly extended by experimental schedulers. Generally, +/// so that it can be easily extended by experimental schedulers. Generally, /// implementing MachineSchedStrategy should be sufficient to implement a new /// scheduling algorithm. However, if a scheduler further subclasses /// ScheduleDAGMILive then it will want to override this virtual method in order /// to update any specialized state. void ScheduleDAGMILive::schedule() { + DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n"); + DEBUG(SchedImpl->dumpPolicy()); buildDAGWithRegPressure(); Topo.InitDAGTopologicalSorting(); @@ -1017,8 +1042,16 @@ void ScheduleDAGMILive::schedule() { // This may initialize a DFSResult to be used for queue priority. SchedImpl->initialize(this); - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); + DEBUG( + for (const SUnit &SU : SUnits) { + SU.dumpAll(this); + if (ShouldTrackPressure) { + dbgs() << " Pressure Diff : "; + getPressureDiff(&SU).dump(*TRI); + } + dbgs() << '\n'; + } + ); if (ViewMISchedDAGs) viewGraph(); // Initialize ready queues now that the DAG and priority data are finalized. @@ -1030,7 +1063,11 @@ void ScheduleDAGMILive::schedule() { } bool IsTopNode = false; - while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + while (true) { + DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n"); + SUnit *SU = SchedImpl->pickNode(IsTopNode); + if (!SU) break; + assert(!SU->isScheduled && "Node already scheduled"); if (!checkSchedLimit()) break; @@ -1149,14 +1186,15 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { unsigned LiveOutHeight = DefSU->getHeight(); unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; // Visit all local users of the vreg def. - for (VReg2UseMap::iterator - UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { - if (UI->SU == &ExitSU) + for (const VReg2SUnit &V2SU + : make_range(VRegUses.find(Reg), VRegUses.end())) { + SUnit *SU = V2SU.SU; + if (SU == &ExitSU) continue; // Only consider uses of the phi. LiveQueryResult LRQ = - LI.Query(LIS->getInstructionIndex(UI->SU->getInstr())); + LI.Query(LIS->getInstructionIndex(SU->getInstr())); if (!LRQ.valueIn()->isPHIDef()) continue; @@ -1164,10 +1202,10 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { // overestimate in strange cases. This allows cyclic latency to be // estimated as the minimum slack of the vreg's depth or height. unsigned CyclicLatency = 0; - if (LiveOutDepth > UI->SU->getDepth()) - CyclicLatency = LiveOutDepth - UI->SU->getDepth(); + if (LiveOutDepth > SU->getDepth()) + CyclicLatency = LiveOutDepth - SU->getDepth(); - unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency; + unsigned LiveInHeight = SU->getHeight() + DefSU->Latency; if (LiveInHeight > LiveOutHeight) { if (LiveInHeight - LiveOutHeight < CyclicLatency) CyclicLatency = LiveInHeight - LiveOutHeight; @@ -1176,7 +1214,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { CyclicLatency = 0; DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" - << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n"); + << SU->NodeNum << ") = " << CyclicLatency << "c\n"); if (CyclicLatency > MaxCyclicLatency) MaxCyclicLatency = CyclicLatency; } @@ -1203,6 +1241,11 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { // Update top scheduled pressure. TopRPTracker.advance(); assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); + DEBUG( + dbgs() << "Top Pressure:\n"; + dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); + ); + updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); } } @@ -1225,6 +1268,11 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { SmallVector LiveUses; BotRPTracker.recede(&LiveUses); assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); + DEBUG( + dbgs() << "Bottom Pressure:\n"; + dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); + ); + updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); updatePressureDiffs(LiveUses); } @@ -1349,25 +1397,49 @@ namespace { /// \brief Post-process the DAG to create cluster edges between instructions /// that may be fused by the processor into a single operation. class MacroFusion : public ScheduleDAGMutation { - const TargetInstrInfo *TII; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; public: - MacroFusion(const TargetInstrInfo *tii): TII(tii) {} + MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) + : TII(TII), TRI(TRI) {} void apply(ScheduleDAGMI *DAG) override; }; } // anonymous +/// Returns true if \p MI reads a register written by \p Other. +static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI, + const MachineInstr &Other) { + for (const MachineOperand &MO : MI.uses()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + + unsigned Reg = MO.getReg(); + if (Other.modifiesRegister(Reg, &TRI)) + return true; + } + return false; +} + /// \brief Callback from DAG postProcessing to create cluster edges to encourage /// fused operations. void MacroFusion::apply(ScheduleDAGMI *DAG) { // For now, assume targets can only fuse with the branch. - MachineInstr *Branch = DAG->ExitSU.getInstr(); + SUnit &ExitSU = DAG->ExitSU; + MachineInstr *Branch = ExitSU.getInstr(); if (!Branch) return; - for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) { - SUnit *SU = &DAG->SUnits[--Idx]; - if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch)) + for (SUnit &SU : DAG->SUnits) { + // SUnits with successors can't be schedule in front of the ExitSU. + if (!SU.Succs.empty()) + continue; + // We only care if the node writes to a register that the branch reads. + MachineInstr *Pred = SU.getInstr(); + if (!HasDataDep(TRI, *Branch, *Pred)) + continue; + + if (!TII.shouldScheduleAdjacent(Pred, Branch)) continue; // Create a single weak edge from SU to ExitSU. The only effect is to cause @@ -1376,11 +1448,11 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) { // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling // of SU, we could create an artificial edge from the deepest root, but it // hasn't been needed yet. - bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster)); + bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster)); (void)Success; assert(Success && "No DAG nodes should be reachable from ExitSU"); - DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n"); + DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n"); break; } } @@ -2277,7 +2349,7 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { Latency = Cand.SU->getDepth(); break; } - dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); + dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); if (P.isValid()) dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) << ":" << P.getUnitInc() << " "; @@ -2438,6 +2510,14 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, } } +void GenericScheduler::dumpPolicy() { + dbgs() << "GenericScheduler RegionPolicy: " + << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure + << " OnlyTopDown=" << RegionPolicy.OnlyTopDown + << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp + << "\n"; +} + /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic /// critical path by more cycles than it takes to drain the instruction buffer. /// We estimate an upper bounds on in-flight instructions as: @@ -2499,11 +2579,13 @@ static bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, - GenericSchedulerBase::CandReason Reason) { - int TryRank = TryP.getPSetOrMax(); - int CandRank = CandP.getPSetOrMax(); + GenericSchedulerBase::CandReason Reason, + const TargetRegisterInfo *TRI, + const MachineFunction &MF) { + unsigned TryPSet = TryP.getPSetOrMax(); + unsigned CandPSet = CandP.getPSetOrMax(); // If both candidates affect the same set, go with the smallest increase. - if (TryRank == CandRank) { + if (TryPSet == CandPSet) { return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, Reason); } @@ -2513,6 +2595,13 @@ static bool tryPressure(const PressureChange &TryP, Reason)) { return true; } + + int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) : + std::numeric_limits::max(); + + int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) : + std::numeric_limits::max(); + // If the candidates are decreasing pressure, reverse priority. if (TryP.getUnitInc() < 0) std::swap(TryRank, CandRank); @@ -2597,7 +2686,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, } } DEBUG(if (TryCand.RPDelta.Excess.isValid()) - dbgs() << " SU(" << TryCand.SU->NodeNum << ") " + dbgs() << " Try SU(" << TryCand.SU->NodeNum << ") " << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet()) << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n"); @@ -2615,13 +2704,15 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // Avoid exceeding the target's limit. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, - TryCand, Cand, RegExcess)) + TryCand, Cand, RegExcess, TRI, + DAG->MF)) return; // Avoid increasing the max critical pressure in the scheduled region. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, - TryCand, Cand, RegCritical)) + TryCand, Cand, RegCritical, TRI, + DAG->MF)) return; // For loops that are acyclic path limited, aggressively schedule for latency. @@ -2657,7 +2748,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // Avoid increasing the max pressure of the entire region. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, - TryCand, Cand, RegMax)) + TryCand, Cand, RegMax, TRI, + DAG->MF)) return; // Avoid critical resource consumption and balance the schedule. @@ -2672,8 +2764,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // Avoid serializing long latency dependence chains. // For acyclic path limited loops, latency was already checked above. - if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited - && tryLatency(TryCand, Cand, Zone)) { + if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency && + !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) { return; } @@ -2727,12 +2819,12 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { IsTopNode = false; - DEBUG(dbgs() << "Pick Bot NOCAND\n"); + DEBUG(dbgs() << "Pick Bot ONLY1\n"); return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { IsTopNode = true; - DEBUG(dbgs() << "Pick Top NOCAND\n"); + DEBUG(dbgs() << "Pick Top ONLY1\n"); return SU; } CandPolicy NoPolicy; @@ -2887,7 +2979,7 @@ static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) { if (EnableLoadCluster && DAG->TII->enableClusterLoads()) DAG->addMutation(make_unique(DAG->TII, DAG->TRI)); if (EnableMacroFusion) - DAG->addMutation(make_unique(DAG->TII)); + DAG->addMutation(make_unique(*DAG->TII, *DAG->TRI)); return DAG; } @@ -3254,12 +3346,10 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { } static bool isNodeHidden(const SUnit *Node) { - return (Node->Preds.size() > 10 || Node->Succs.size() > 10); - } - - static bool hasNodeAddressLabel(const SUnit *Node, - const ScheduleDAG *Graph) { - return false; + if (ViewMISchedCutoff == 0) + return false; + return (Node->Preds.size() > ViewMISchedCutoff + || Node->Succs.size() > ViewMISchedCutoff); } /// If you want to override the dot attributes printed for a particular diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 1b9be50068a9..5e6d6190c638 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -87,7 +87,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -150,7 +150,7 @@ INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink", "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineSinking, "machine-sink", "Machine code sinking", false, false) @@ -268,7 +268,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { PDT = &getAnalysis(); LI = &getAnalysis(); MBFI = UseBlockFreqInfo ? &getAnalysis() : nullptr; - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); bool EverMadeChange = false; @@ -667,7 +667,7 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // It's not safe to sink instructions to EH landing pad. Control flow into // landing pad is implicitly defined. - if (SuccToSinkTo && SuccToSinkTo->isLandingPad()) + if (SuccToSinkTo && SuccToSinkTo->isEHPad()) return nullptr; return SuccToSinkTo; @@ -686,7 +686,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore, if (!MI->isSafeToMove(AA, SawStore)) return false; - // Convergent operations may only be moved to control equivalent locations. + // Convergent operations may not be made control-dependent on additional + // values. if (MI->isConvergent()) return false; diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index d9a6b68462eb..f7edacd5ebaf 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -724,13 +724,12 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI, // Update RegUnits to reflect live registers after UseMI. // First kills. - for (unsigned i = 0, e = Kills.size(); i != e; ++i) - for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units) + for (unsigned Kill : Kills) + for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units) RegUnits.erase(*Units); // Second, live defs. - for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) { - unsigned DefOp = LiveDefOps[i]; + for (unsigned DefOp : LiveDefOps) { for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI); Units.isValid(); ++Units) { LiveRegUnit &LRU = RegUnits[*Units]; @@ -756,8 +755,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { assert(TBI.HasValidInstrDepths && "Missing depth info"); assert(TBI.HasValidInstrHeights && "Missing height info"); unsigned MaxLen = 0; - for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { - const LiveInReg &LIR = TBI.LiveIns[i]; + for (const LiveInReg &LIR : TBI.LiveIns) { if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg)) continue; const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index ca35ec5fdcf8..cdcd8eb4fbdf 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" @@ -42,7 +43,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -204,18 +204,19 @@ namespace { void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB); void visitMachineFunctionAfter(); + template void report(const char *msg, ilist_iterator I) { + report(msg, &*I); + } void report(const char *msg, const MachineFunction *MF); void report(const char *msg, const MachineBasicBlock *MBB); void report(const char *msg, const MachineInstr *MI); void report(const char *msg, const MachineOperand *MO, unsigned MONum); - void report(const char *msg, const MachineFunction *MF, - const LiveInterval &LI); - void report(const char *msg, const MachineBasicBlock *MBB, - const LiveInterval &LI); - void report(const char *msg, const MachineFunction *MF, - const LiveRange &LR, unsigned Reg, unsigned LaneMask); - void report(const char *msg, const MachineBasicBlock *MBB, - const LiveRange &LR, unsigned Reg, unsigned LaneMask); + + void report_context(const LiveInterval &LI) const; + void report_context(const LiveRange &LR, unsigned Reg, + LaneBitmask LaneMask) const; + void report_context(const LiveRange::Segment &S) const; + void report_context(const VNInfo &VNI) const; void verifyInlineAsm(const MachineInstr *MI); @@ -233,9 +234,11 @@ namespace { void verifyLiveRangeSegment(const LiveRange&, const LiveRange::const_iterator I, unsigned, unsigned); - void verifyLiveRange(const LiveRange&, unsigned, unsigned LaneMask = 0); + void verifyLiveRange(const LiveRange&, unsigned, LaneBitmask LaneMask = 0); void verifyStackFrame(); + + void verifySlotIndexes() const; }; struct MachineVerifierPass : public MachineFunctionPass { @@ -273,6 +276,19 @@ void MachineFunction::verify(Pass *p, const char *Banner) const { .runOnMachineFunction(const_cast(*this)); } +void MachineVerifier::verifySlotIndexes() const { + if (Indexes == nullptr) + return; + + // Ensure the IdxMBB list is sorted by slot indexes. + SlotIndex Last; + for (SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin(), + E = Indexes->MBBIndexEnd(); I != E; ++I) { + assert(!Last.isValid() || I->first > Last); + Last = I->first; + } +} + bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { foundErrors = 0; @@ -295,10 +311,12 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { Indexes = PASS->getAnalysisIfAvailable(); } + verifySlotIndexes(); + visitMachineFunctionBefore(); for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end(); MFI!=MFE; ++MFI) { - visitMachineBasicBlockBefore(MFI); + visitMachineBasicBlockBefore(&*MFI); // Keep track of the current bundle header. const MachineInstr *CurBundle = nullptr; // Do we expect the next instruction to be part of the same bundle? @@ -306,7 +324,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(), MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) { - if (MBBI->getParent() != MFI) { + if (MBBI->getParent() != &*MFI) { report("Bad instruction parent pointer", MFI); errs() << "Instruction: " << *MBBI; continue; @@ -315,20 +333,22 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { // Check for consistent bundle flags. if (InBundle && !MBBI->isBundledWithPred()) report("Missing BundledPred flag, " - "BundledSucc was set on predecessor", MBBI); + "BundledSucc was set on predecessor", + &*MBBI); if (!InBundle && MBBI->isBundledWithPred()) report("BundledPred flag is set, " - "but BundledSucc not set on predecessor", MBBI); + "but BundledSucc not set on predecessor", + &*MBBI); // Is this a bundle header? if (!MBBI->isInsideBundle()) { if (CurBundle) visitMachineBundleAfter(CurBundle); - CurBundle = MBBI; + CurBundle = &*MBBI; visitMachineBundleBefore(CurBundle); } else if (!CurBundle) report("No bundle header", MBBI); - visitMachineInstrBefore(MBBI); + visitMachineInstrBefore(&*MBBI); for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { const MachineInstr &MI = *MBBI; const MachineOperand &Op = MI.getOperand(I); @@ -341,7 +361,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { visitMachineOperand(&Op, I); } - visitMachineInstrAfter(MBBI); + visitMachineInstrAfter(&*MBBI); // Was this the last bundled instruction? InBundle = MBBI->isBundledWithSucc(); @@ -350,7 +370,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { visitMachineBundleAfter(CurBundle); if (InBundle) report("BundledSucc flag set on last instruction in block", &MFI->back()); - visitMachineBasicBlockAfter(MFI); + visitMachineBasicBlockAfter(&*MFI); } visitMachineFunctionAfter(); @@ -375,7 +395,10 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { if (!foundErrors++) { if (Banner) errs() << "# " << Banner << '\n'; - MF->print(errs(), Indexes); + if (LiveInts != nullptr) + LiveInts->print(errs()); + else + MF->print(errs(), Indexes); } errs() << "*** Bad machine code: " << msg << " ***\n" << "- function: " << MF->getName() << "\n"; @@ -399,7 +422,8 @@ void MachineVerifier::report(const char *msg, const MachineInstr *MI) { errs() << "- instruction: "; if (Indexes && Indexes->hasIndex(MI)) errs() << Indexes->getInstructionIndex(MI) << '\t'; - MI->print(errs(), TM); + MI->print(errs(), /*SkipOpers=*/true); + errs() << '\n'; } void MachineVerifier::report(const char *msg, @@ -411,36 +435,24 @@ void MachineVerifier::report(const char *msg, errs() << "\n"; } -void MachineVerifier::report(const char *msg, const MachineFunction *MF, - const LiveInterval &LI) { - report(msg, MF); +void MachineVerifier::report_context(const LiveInterval &LI) const { errs() << "- interval: " << LI << '\n'; } -void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, - const LiveInterval &LI) { - report(msg, MBB); - errs() << "- interval: " << LI << '\n'; -} - -void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, - const LiveRange &LR, unsigned Reg, - unsigned LaneMask) { - report(msg, MBB); - errs() << "- liverange: " << LR << '\n'; +void MachineVerifier::report_context(const LiveRange &LR, unsigned Reg, + LaneBitmask LaneMask) const { errs() << "- register: " << PrintReg(Reg, TRI) << '\n'; if (LaneMask != 0) - errs() << "- lanemask: " << format("%04X\n", LaneMask); + errs() << "- lanemask: " << PrintLaneMask(LaneMask) << '\n'; + errs() << "- liverange: " << LR << '\n'; } -void MachineVerifier::report(const char *msg, const MachineFunction *MF, - const LiveRange &LR, unsigned Reg, - unsigned LaneMask) { - report(msg, MF); - errs() << "- liverange: " << LR << '\n'; - errs() << "- register: " << PrintReg(Reg, TRI) << '\n'; - if (LaneMask != 0) - errs() << "- lanemask: " << format("%04X\n", LaneMask); +void MachineVerifier::report_context(const LiveRange::Segment &S) const { + errs() << "- segment: " << S << '\n'; +} + +void MachineVerifier::report_context(const VNInfo &VNI) const { + errs() << "- ValNo: " << VNI.id << " (def " << VNI.def << ")\n"; } void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { @@ -507,11 +519,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MRI->isSSA()) { // If this block has allocatable physical registers live-in, check that // it is an entry block or landing pad. - for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(), - LE = MBB->livein_end(); - LI != LE; ++LI) { - unsigned reg = *LI; - if (isAllocatable(reg) && !MBB->isLandingPad() && + for (const auto &LI : MBB->liveins()) { + if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() && MBB != MBB->getParent()->begin()) { report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB); } @@ -522,7 +531,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { SmallPtrSet LandingPadSuccs; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - if ((*I)->isLandingPad()) + if ((*I)->isEHPad()) LandingPadSuccs.insert(*I); if (!FunctionBlocks.count(*I)) report("MBB has successor that isn't part of the function.", MBB); @@ -547,10 +556,12 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { const MCAsmInfo *AsmInfo = TM->getMCAsmInfo(); const BasicBlock *BB = MBB->getBasicBlock(); + const Function *Fn = MF->getFunction(); if (LandingPadSuccs.size() > 1 && !(AsmInfo && AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj && - BB && isa(BB->getTerminator()))) + BB && isa(BB->getTerminator())) && + !isFuncletEHPersonality(classifyEHPersonality(Fn->getPersonalityFn()))) report("MBB has more than one landing pad successor", MBB); // Call AnalyzeBranch. If it succeeds, there several more conditions to check. @@ -562,7 +573,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { // check whether its answers match up with reality. if (!TBB && !FBB) { // Block falls through to its successor. - MachineFunction::const_iterator MBBI = MBB; + MachineFunction::const_iterator MBBI = MBB->getIterator(); ++MBBI; if (MBBI == MF->end()) { // It's possible that the block legitimately ends with a noreturn @@ -575,7 +586,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) { report("MBB exits via unconditional fall-through but doesn't have " "exactly one CFG successor!", MBB); - } else if (!MBB->isSuccessor(MBBI)) { + } else if (!MBB->isSuccessor(&*MBBI)) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } @@ -613,7 +624,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } } else if (TBB && !FBB && !Cond.empty()) { // Block conditionally branches somewhere, otherwise falls through. - MachineFunction::const_iterator MBBI = MBB; + MachineFunction::const_iterator MBBI = MBB->getIterator(); ++MBBI; if (MBBI == MF->end()) { report("MBB conditionally falls through out of function!", MBB); @@ -628,7 +639,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } else if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/fall-through but doesn't have " "exactly two CFG successors!", MBB); - } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) { + } else if (!matchPair(MBB->succ_begin(), TBB, &*MBBI)) { report("MBB exits via conditional branch/fall-through but the CFG " "successors don't match the actual successors!", MBB); } @@ -680,13 +691,12 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } regsLive.clear(); - for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), - E = MBB->livein_end(); I != E; ++I) { - if (!TargetRegisterInfo::isPhysicalRegister(*I)) { + for (const auto &LI : MBB->liveins()) { + if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) { report("MBB live-in list contains non-physical register", MBB); continue; } - for (MCSubRegIterator SubRegs(*I, TRI, /*IncludeSelf=*/true); + for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) regsLive.insert(*SubRegs); } @@ -822,9 +832,12 @@ void MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const MachineInstr *MI = MO->getParent(); const MCInstrDesc &MCID = MI->getDesc(); + unsigned NumDefs = MCID.getNumDefs(); + if (MCID.getOpcode() == TargetOpcode::PATCHPOINT) + NumDefs = (MONum == 0 && MO->isReg()) ? NumDefs : 0; // The first MCID.NumDefs operands must be explicit register defines - if (MONum < MCID.getNumDefs()) { + if (MONum < NumDefs) { const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; if (!MO->isReg()) report("Explicit definition must be a register", MO, MONum); @@ -972,13 +985,38 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { case MachineOperand::MO_FrameIndex: if (LiveStks && LiveStks->hasInterval(MO->getIndex()) && LiveInts && !LiveInts->isNotInMIMap(MI)) { - LiveInterval &LI = LiveStks->getInterval(MO->getIndex()); + int FI = MO->getIndex(); + LiveInterval &LI = LiveStks->getInterval(FI); SlotIndex Idx = LiveInts->getInstructionIndex(MI); - if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) { + + bool stores = MI->mayStore(); + bool loads = MI->mayLoad(); + // For a memory-to-memory move, we need to check if the frame + // index is used for storing or loading, by inspecting the + // memory operands. + if (stores && loads) { + for (auto *MMO : MI->memoperands()) { + const PseudoSourceValue *PSV = MMO->getPseudoValue(); + if (PSV == nullptr) continue; + const FixedStackPseudoSourceValue *Value = + dyn_cast(PSV); + if (Value == nullptr) continue; + if (Value->getFrameIndex() != FI) continue; + + if (MMO->isStore()) + loads = false; + else + stores = false; + break; + } + if (loads == stores) + report("Missing fixed stack memoperand.", MI); + } + if (loads && !LI.liveAt(Idx.getRegSlot(true))) { report("Instruction loads from dead spill slot", MO, MONum); errs() << "Live stack: " << LI << '\n'; } - if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) { + if (stores && !LI.liveAt(Idx.getRegSlot())) { report("Instruction stores to dead spill slot", MO, MONum); errs() << "Live stack: " << LI << '\n'; } @@ -1387,40 +1425,39 @@ void MachineVerifier::verifyLiveIntervals() { void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, const VNInfo *VNI, unsigned Reg, - unsigned LaneMask) { + LaneBitmask LaneMask) { if (VNI->isUnused()) return; const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def); if (!DefVNI) { - report("Valno not live at def and not marked unused", MF, LR, Reg, - LaneMask); - errs() << "Valno #" << VNI->id << '\n'; + report("Value not live at VNInfo def and not marked unused", MF); + report_context(LR, Reg, LaneMask); + report_context(*VNI); return; } if (DefVNI != VNI) { - report("Live segment at def has different valno", MF, LR, Reg, LaneMask); - errs() << "Valno #" << VNI->id << " is defined at " << VNI->def - << " where valno #" << DefVNI->id << " is live\n"; + report("Live segment at def has different VNInfo", MF); + report_context(LR, Reg, LaneMask); + report_context(*VNI); return; } const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); if (!MBB) { - report("Invalid definition index", MF, LR, Reg, LaneMask); - errs() << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LR << '\n'; + report("Invalid VNInfo definition index", MF); + report_context(LR, Reg, LaneMask); + report_context(*VNI); return; } if (VNI->isPHIDef()) { if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { - report("PHIDef value is not defined at MBB start", MBB, LR, Reg, - LaneMask); - errs() << "Valno #" << VNI->id << " is defined at " << VNI->def - << ", not at the beginning of BB#" << MBB->getNumber() << '\n'; + report("PHIDef VNInfo is not defined at MBB start", MBB); + report_context(LR, Reg, LaneMask); + report_context(*VNI); } return; } @@ -1428,8 +1465,9 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, // Non-PHI def. const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); if (!MI) { - report("No instruction at def index", MBB, LR, Reg, LaneMask); - errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + report("No instruction at VNInfo def index", MBB); + report_context(LR, Reg, LaneMask); + report_context(*VNI); return; } @@ -1457,60 +1495,67 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, if (!hasDef) { report("Defining instruction does not modify register", MI); - errs() << "Valno #" << VNI->id << " in " << LR << '\n'; + report_context(LR, Reg, LaneMask); + report_context(*VNI); } // Early clobber defs begin at USE slots, but other defs must begin at // DEF slots. if (isEarlyClobber) { if (!VNI->def.isEarlyClobber()) { - report("Early clobber def must be at an early-clobber slot", MBB, LR, - Reg, LaneMask); - errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + report("Early clobber def must be at an early-clobber slot", MBB); + report_context(LR, Reg, LaneMask); + report_context(*VNI); } } else if (!VNI->def.isRegister()) { - report("Non-PHI, non-early clobber def must be at a register slot", - MBB, LR, Reg, LaneMask); - errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + report("Non-PHI, non-early clobber def must be at a register slot", MBB); + report_context(LR, Reg, LaneMask); + report_context(*VNI); } } } void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, const LiveRange::const_iterator I, - unsigned Reg, unsigned LaneMask) { + unsigned Reg, LaneBitmask LaneMask) +{ const LiveRange::Segment &S = *I; const VNInfo *VNI = S.valno; assert(VNI && "Live segment has no valno"); if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) { - report("Foreign valno in live segment", MF, LR, Reg, LaneMask); - errs() << S << " has a bad valno\n"; + report("Foreign valno in live segment", MF); + report_context(LR, Reg, LaneMask); + report_context(S); + report_context(*VNI); } if (VNI->isUnused()) { - report("Live segment valno is marked unused", MF, LR, Reg, LaneMask); - errs() << S << '\n'; + report("Live segment valno is marked unused", MF); + report_context(LR, Reg, LaneMask); + report_context(S); } const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start); if (!MBB) { - report("Bad start of live segment, no basic block", MF, LR, Reg, LaneMask); - errs() << S << '\n'; + report("Bad start of live segment, no basic block", MF); + report_context(LR, Reg, LaneMask); + report_context(S); return; } SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); if (S.start != MBBStartIdx && S.start != VNI->def) { - report("Live segment must begin at MBB entry or valno def", MBB, LR, Reg, - LaneMask); - errs() << S << '\n'; + report("Live segment must begin at MBB entry or valno def", MBB); + report_context(LR, Reg, LaneMask); + report_context(S); } const MachineBasicBlock *EndMBB = LiveInts->getMBBFromIndex(S.end.getPrevSlot()); if (!EndMBB) { - report("Bad end of live segment, no basic block", MF, LR, Reg, LaneMask); - errs() << S << '\n'; + report("Bad end of live segment, no basic block", MF); + report_context(LR, Reg, LaneMask); + report_context(S); return; } @@ -1527,26 +1572,26 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, const MachineInstr *MI = LiveInts->getInstructionFromIndex(S.end.getPrevSlot()); if (!MI) { - report("Live segment doesn't end at a valid instruction", EndMBB, LR, Reg, - LaneMask); - errs() << S << '\n'; + report("Live segment doesn't end at a valid instruction", EndMBB); + report_context(LR, Reg, LaneMask); + report_context(S); return; } // The block slot must refer to a basic block boundary. if (S.end.isBlock()) { - report("Live segment ends at B slot of an instruction", EndMBB, LR, Reg, - LaneMask); - errs() << S << '\n'; + report("Live segment ends at B slot of an instruction", EndMBB); + report_context(LR, Reg, LaneMask); + report_context(S); } if (S.end.isDead()) { // Segment ends on the dead slot. // That means there must be a dead def. if (!SlotIndex::isSameInstr(S.start, S.end)) { - report("Live segment ending at dead slot spans instructions", EndMBB, LR, - Reg, LaneMask); - errs() << S << '\n'; + report("Live segment ending at dead slot spans instructions", EndMBB); + report_context(LR, Reg, LaneMask); + report_context(S); } } @@ -1555,9 +1600,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (S.end.isEarlyClobber()) { if (I+1 == LR.end() || (I+1)->start != S.end) { report("Live segment ending at early clobber slot must be " - "redefined by an EC def in the same instruction", EndMBB, LR, Reg, - LaneMask); - errs() << S << '\n'; + "redefined by an EC def in the same instruction", EndMBB); + report_context(LR, Reg, LaneMask); + report_context(S); } } @@ -1587,14 +1632,15 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, !hasSubRegDef) { report("Instruction ending live segment doesn't read the register", MI); - errs() << S << " in " << LR << '\n'; + report_context(LR, Reg, LaneMask); + report_context(S); } } } } // Now check all the basic blocks in this live segment. - MachineFunction::const_iterator MFI = MBB; + MachineFunction::const_iterator MFI = MBB->getIterator(); // Is this live segment the beginning of a non-PHIDef VN? if (S.start == VNI->def && !VNI->isPHIDef()) { // Not live-in to any blocks. @@ -1604,10 +1650,10 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, ++MFI; } for (;;) { - assert(LiveInts->isLiveInToMBB(LR, MFI)); + assert(LiveInts->isLiveInToMBB(LR, &*MFI)); // We don't know how to track physregs into a landing pad. if (!TargetRegisterInfo::isVirtualRegister(Reg) && - MFI->isLandingPad()) { + MFI->isEHPad()) { if (&*MFI == EndMBB) break; ++MFI; @@ -1616,7 +1662,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // Is VNI a PHI-def in the current block? bool IsPHI = VNI->isPHIDef() && - VNI->def == LiveInts->getMBBStartIdx(MFI); + VNI->def == LiveInts->getMBBStartIdx(&*MFI); // Check that VNI is live-out of all predecessors. for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), @@ -1626,22 +1672,23 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // All predecessors must have a live-out value. if (!PVNI) { - report("Register not marked live out of predecessor", *PI, LR, Reg, - LaneMask); - errs() << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " - << PEnd << '\n'; + report("Register not marked live out of predecessor", *PI); + report_context(LR, Reg, LaneMask); + report_context(*VNI); + errs() << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(&*MFI) << ", not live before " + << PEnd << '\n'; continue; } // Only PHI-defs can take different predecessor values. if (!IsPHI && PVNI != VNI) { - report("Different value live out of predecessor", *PI, LR, Reg, - LaneMask); + report("Different value live out of predecessor", *PI); + report_context(LR, Reg, LaneMask); errs() << "Valno #" << PVNI->id << " live out of BB#" - << (*PI)->getNumber() << '@' << PEnd - << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << '\n'; + << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id + << " live into BB#" << MFI->getNumber() << '@' + << LiveInts->getMBBStartIdx(&*MFI) << '\n'; } } if (&*MFI == EndMBB) @@ -1651,7 +1698,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, } void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg, - unsigned LaneMask) { + LaneBitmask LaneMask) { for (const VNInfo *VNI : LR.valnos) verifyLiveRangeValue(LR, VNI, Reg, LaneMask); @@ -1664,24 +1711,35 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { assert(TargetRegisterInfo::isVirtualRegister(Reg)); verifyLiveRange(LI, Reg); - unsigned Mask = 0; - unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg); + LaneBitmask Mask = 0; + LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg); for (const LiveInterval::SubRange &SR : LI.subranges()) { - if ((Mask & SR.LaneMask) != 0) - report("Lane masks of sub ranges overlap in live interval", MF, LI); - if ((SR.LaneMask & ~MaxMask) != 0) - report("Subrange lanemask is invalid", MF, LI); + if ((Mask & SR.LaneMask) != 0) { + report("Lane masks of sub ranges overlap in live interval", MF); + report_context(LI); + } + if ((SR.LaneMask & ~MaxMask) != 0) { + report("Subrange lanemask is invalid", MF); + report_context(LI); + } + if (SR.empty()) { + report("Subrange must not be empty", MF); + report_context(SR, LI.reg, SR.LaneMask); + } Mask |= SR.LaneMask; verifyLiveRange(SR, LI.reg, SR.LaneMask); - if (!LI.covers(SR)) - report("A Subrange is not covered by the main range", MF, LI); + if (!LI.covers(SR)) { + report("A Subrange is not covered by the main range", MF); + report_context(LI); + } } // Check the LI only has one connected component. ConnectedVNInfoEqClasses ConEQ(*LiveInts); unsigned NumComp = ConEQ.Classify(&LI); if (NumComp > 1) { - report("Multiple connected components in live interval", MF, LI); + report("Multiple connected components in live interval", MF); + report_context(LI); for (unsigned comp = 0; comp != NumComp; ++comp) { errs() << comp << ": valnos"; for (LiveInterval::const_vni_iterator I = LI.vni_begin(), diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index d3433018004c..2c937926d0a7 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -548,7 +548,7 @@ void PHIElimination::analyzePHINodes(const MachineFunction& MF) { bool PHIElimination::SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, MachineLoopInfo *MLI) { - if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) + if (MBB.empty() || !MBB.front().isPHI() || MBB.isEHPad()) return false; // Quick exit for basic blocks without PHIs. const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : nullptr; diff --git a/lib/CodeGen/PHIEliminationUtils.cpp b/lib/CodeGen/PHIEliminationUtils.cpp index 99bbad1cc280..4cabc3a8c1fd 100644 --- a/lib/CodeGen/PHIEliminationUtils.cpp +++ b/lib/CodeGen/PHIEliminationUtils.cpp @@ -28,7 +28,7 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB, // Usually, we just want to insert the copy before the first terminator // instruction. However, for the edge going to a landing pad, we must insert // the copy before the call/invoke instruction. - if (!SuccMBB->isLandingPad()) + if (!SuccMBB->isEHPad()) return MBB->getFirstTerminator(); // Discover any defs/uses in this basic block. diff --git a/lib/CodeGen/ParallelCG.cpp b/lib/CodeGen/ParallelCG.cpp new file mode 100644 index 000000000000..e73ba0296045 --- /dev/null +++ b/lib/CodeGen/ParallelCG.cpp @@ -0,0 +1,96 @@ +//===-- ParallelCG.cpp ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines functions that can be used for parallel code generation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ParallelCG.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/thread.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/SplitModule.h" + +using namespace llvm; + +static void codegen(Module *M, llvm::raw_pwrite_stream &OS, + const Target *TheTarget, StringRef CPU, StringRef Features, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL, + TargetMachine::CodeGenFileType FileType) { + std::unique_ptr TM(TheTarget->createTargetMachine( + M->getTargetTriple(), CPU, Features, Options, RM, CM, OL)); + + legacy::PassManager CodeGenPasses; + if (TM->addPassesToEmitFile(CodeGenPasses, OS, FileType)) + report_fatal_error("Failed to setup codegen"); + CodeGenPasses.run(*M); +} + +std::unique_ptr +llvm::splitCodeGen(std::unique_ptr M, + ArrayRef OSs, StringRef CPU, + StringRef Features, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, + TargetMachine::CodeGenFileType FileType) { + StringRef TripleStr = M->getTargetTriple(); + std::string ErrMsg; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg); + if (!TheTarget) + report_fatal_error(Twine("Target not found: ") + ErrMsg); + + if (OSs.size() == 1) { + codegen(M.get(), *OSs[0], TheTarget, CPU, Features, Options, RM, CM, + OL, FileType); + return M; + } + + std::vector Threads; + SplitModule(std::move(M), OSs.size(), [&](std::unique_ptr MPart) { + // We want to clone the module in a new context to multi-thread the codegen. + // We do it by serializing partition modules to bitcode (while still on the + // main thread, in order to avoid data races) and spinning up new threads + // which deserialize the partitions into separate contexts. + // FIXME: Provide a more direct way to do this in LLVM. + SmallVector BC; + raw_svector_ostream BCOS(BC); + WriteBitcodeToFile(MPart.get(), BCOS); + + llvm::raw_pwrite_stream *ThreadOS = OSs[Threads.size()]; + Threads.emplace_back( + [TheTarget, CPU, Features, Options, RM, CM, OL, FileType, + ThreadOS](const SmallVector &BC) { + LLVMContext Ctx; + ErrorOr> MOrErr = + parseBitcodeFile(MemoryBufferRef(StringRef(BC.data(), BC.size()), + ""), + Ctx); + if (!MOrErr) + report_fatal_error("Failed to read bitcode"); + std::unique_ptr MPartInCtx = std::move(MOrErr.get()); + + codegen(MPartInCtx.get(), *ThreadOS, TheTarget, CPU, Features, + Options, RM, CM, OL, FileType); + }, + // Pass BC using std::move to ensure that it get moved rather than + // copied into the thread's context. + std::move(BC)); + }); + + for (thread &T : Threads) + T.join(); + + return {}; +} diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 024d166a4987..873f7125b82a 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -13,7 +13,11 @@ //===---------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CFLAliasAnalysis.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/IR/IRPrintingPasses.h" @@ -52,9 +56,6 @@ static cl::opt DisableMachineLICM("disable-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); static cl::opt DisableMachineCSE("disable-machine-cse", cl::Hidden, cl::desc("Disable Machine Common Subexpression Elimination")); -static cl::opt - EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, - cl::desc("enable the shrink-wrapping pass")); static cl::opt OptimizeRegAlloc( "optimize-regalloc", cl::Hidden, cl::desc("Enable optimized register allocation compilation path.")); @@ -95,10 +96,10 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, // Temporary option to allow experimenting with MachineScheduler as a post-RA // scheduler. Targets can "properly" enable this with -// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); Ideally it -// wouldn't be part of the standard pass pipeline, and the target would just add -// a PostRA scheduling pass wherever it wants. -static cl::opt MISchedPostRA("misched-postra", cl::Hidden, +// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID). +// Targets can return true in targetSchedulesPostRAScheduling() and +// insert a PostRA scheduling pass wherever it wants. +cl::opt MISchedPostRA("misched-postra", cl::Hidden, cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)")); // Experimental option to run live interval analysis early. @@ -188,6 +189,29 @@ char TargetPassConfig::ID = 0; char TargetPassConfig::EarlyTailDuplicateID = 0; char TargetPassConfig::PostRAMachineLICMID = 0; +namespace { +struct InsertedPass { + AnalysisID TargetPassID; + IdentifyingPassPtr InsertedPassID; + bool VerifyAfter; + bool PrintAfter; + + InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID, + bool VerifyAfter, bool PrintAfter) + : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID), + VerifyAfter(VerifyAfter), PrintAfter(PrintAfter) {} + + Pass *getInsertedPass() const { + assert(InsertedPassID.isValid() && "Illegal Pass ID!"); + if (InsertedPassID.isInstance()) + return InsertedPassID.getInstance(); + Pass *NP = Pass::createPass(InsertedPassID.getID()); + assert(NP && "Pass ID not registered"); + return NP; + } +}; +} + namespace llvm { class PassConfigImpl { public: @@ -202,7 +226,7 @@ public: /// Store the pairs of of which the second pass /// is inserted after each instance of the first one. - SmallVector, 4> InsertedPasses; + SmallVector InsertedPasses; }; } // namespace llvm @@ -217,7 +241,7 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) : ImmutablePass(ID), PM(&pm), StartBefore(nullptr), StartAfter(nullptr), StopAfter(nullptr), Started(true), Stopped(false), AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false), - DisableVerify(false), EnableTailMerge(true), EnableShrinkWrap(false) { + DisableVerify(false), EnableTailMerge(true) { Impl = new PassConfigImpl(); @@ -225,6 +249,10 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) // including this pass itself. initializeCodeGen(*PassRegistry::getPassRegistry()); + // Also register alias analysis passes required by codegen passes. + initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry()); + initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); + // Substitute Pseudo Pass IDs for real ones. substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); substitutePass(&PostRAMachineLICMID, &MachineLICMID); @@ -232,14 +260,15 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) /// Insert InsertedPassID pass after TargetPassID. void TargetPassConfig::insertPass(AnalysisID TargetPassID, - IdentifyingPassPtr InsertedPassID) { + IdentifyingPassPtr InsertedPassID, + bool VerifyAfter, bool PrintAfter) { assert(((!InsertedPassID.isInstance() && TargetPassID != InsertedPassID.getID()) || (InsertedPassID.isInstance() && TargetPassID != InsertedPassID.getInstance()->getPassID())) && "Insert a pass after itself!"); - std::pair P(TargetPassID, InsertedPassID); - Impl->InsertedPasses.push_back(P); + Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter, + PrintAfter); } /// createPassConfig - Create a pass configuration object to be used by @@ -304,21 +333,9 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) { } // Add the passes after the pass P if there is any. - for (SmallVectorImpl >::iterator - I = Impl->InsertedPasses.begin(), - E = Impl->InsertedPasses.end(); - I != E; ++I) { - if ((*I).first == PassID) { - assert((*I).second.isValid() && "Illegal Pass ID!"); - Pass *NP; - if ((*I).second.isInstance()) - NP = (*I).second.getInstance(); - else { - NP = Pass::createPass((*I).second.getID()); - assert(NP && "Pass ID not registered"); - } - addPass(NP, false, false); - } + for (auto IP : Impl->InsertedPasses) { + if (IP.TargetPassID == PassID) + addPass(IP.getInsertedPass(), IP.VerifyAfter, IP.PrintAfter); } } else { delete P; @@ -380,10 +397,10 @@ void TargetPassConfig::addIRPasses() { // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. if (UseCFLAA) - addPass(createCFLAliasAnalysisPass()); - addPass(createTypeBasedAliasAnalysisPass()); - addPass(createScopedNoAliasAAPass()); - addPass(createBasicAliasAnalysisPass()); + addPass(createCFLAAWrapperPass()); + addPass(createTypeBasedAAWrapperPass()); + addPass(createScopedNoAliasAAWrapperPass()); + addPass(createBasicAAWrapperPass()); // Before running any passes, run the verifier to determine if the input // coming from the front-end and/or optimizer is valid. @@ -461,7 +478,7 @@ void TargetPassConfig::addISelPrepare() { // Add both the safe stack and the stack protection passes: each of them will // only protect functions that have corresponding attributes. - addPass(createSafeStackPass()); + addPass(createSafeStackPass(TM)); addPass(createStackProtectorPass(TM)); if (PrintISelInput) @@ -539,8 +556,9 @@ void TargetPassConfig::addMachinePasses() { addPostRegAlloc(); // Insert prolog/epilog code. Eliminate abstract frame index references... - if (getEnableShrinkWrap()) + if (getOptLevel() != CodeGenOpt::None) addPass(&ShrinkWrapID); + addPass(&PrologEpilogCodeInserterID); /// Add passes that optimize machine instructions after register allocation. @@ -557,7 +575,10 @@ void TargetPassConfig::addMachinePasses() { addPass(&ImplicitNullChecksID); // Second pass scheduler. - if (getOptLevel() != CodeGenOpt::None) { + // Let Target optionally insert this pass by itself at some other + // point. + if (getOptLevel() != CodeGenOpt::None && + !TM->targetSchedulesPostRAScheduling()) { if (MISchedPostRA) addPass(&PostMachineSchedulerID); else @@ -576,7 +597,10 @@ void TargetPassConfig::addMachinePasses() { addPreEmitPass(); + addPass(&FuncletLayoutID, false); + addPass(&StackMapLivenessID, false); + addPass(&LiveDebugValuesID, false); AddingMachinePasses = false; } @@ -613,27 +637,12 @@ void TargetPassConfig::addMachineSSAOptimization() { addPass(&MachineCSEID, false); addPass(&MachineSinkingID); - addPass(&PeepholeOptimizerID, false); + addPass(&PeepholeOptimizerID); // Clean-up the dead code that may have been generated by peephole // rewriting. addPass(&DeadMachineInstructionElimID); } -bool TargetPassConfig::getEnableShrinkWrap() const { - switch (EnableShrinkWrapOpt) { - case cl::BOU_UNSET: - return EnableShrinkWrap && getOptLevel() != CodeGenOpt::None; - // If EnableShrinkWrap is set, it takes precedence on whatever the - // target sets. The rational is that we assume we want to test - // something related to shrink-wrapping. - case cl::BOU_TRUE: - return true; - case cl::BOU_FALSE: - return false; - } - llvm_unreachable("Invalid shrink-wrapping state"); -} - //===---------------------------------------------------------------------===// /// Register Allocation Pass Configuration //===---------------------------------------------------------------------===// @@ -717,7 +726,8 @@ void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { addPass(&PHIEliminationID, false); addPass(&TwoAddressInstructionPassID, false); - addPass(RegAllocPass); + if (RegAllocPass) + addPass(RegAllocPass); } /// Add standard target-independent passes that are tightly coupled with @@ -748,25 +758,27 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // PreRA instruction scheduling. addPass(&MachineSchedulerID); - // Add the selected register allocation pass. - addPass(RegAllocPass); + if (RegAllocPass) { + // Add the selected register allocation pass. + addPass(RegAllocPass); - // Allow targets to change the register assignments before rewriting. - addPreRewrite(); + // Allow targets to change the register assignments before rewriting. + addPreRewrite(); - // Finally rewrite virtual registers. - addPass(&VirtRegRewriterID); + // Finally rewrite virtual registers. + addPass(&VirtRegRewriterID); - // Perform stack slot coloring and post-ra machine LICM. - // - // FIXME: Re-enable coloring with register when it's capable of adding - // kill markers. - addPass(&StackSlotColoringID); + // Perform stack slot coloring and post-ra machine LICM. + // + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + addPass(&StackSlotColoringID); - // Run post-ra machine LICM to hoist reloads / remats. - // - // FIXME: can this move into MachineLateOptimization? - addPass(&PostRAMachineLICMID); + // Run post-ra machine LICM to hoist reloads / remats. + // + // FIXME: can this move into MachineLateOptimization? + addPass(&PostRAMachineLICMID); + } } //===---------------------------------------------------------------------===// diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index ebe05e3f2731..52b42b624ee1 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -43,7 +43,7 @@ // - Optimize Loads: // // Loads that can be folded into a later instruction. A load is foldable -// if it loads to virtual registers and the virtual register defined has +// if it loads to virtual registers and the virtual register defined has // a single use. // // - Optimize Copies and Bitcast (more generally, target specific copies): @@ -98,6 +98,16 @@ static cl::opt DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization")); +static cl::opt DisableNAPhysCopyOpt( + "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), + cl::desc("Disable non-allocatable physical register copy optimization")); + +// Limit the number of PHI instructions to process +// in PeepholeOptimizer::getNextSource. +static cl::opt RewritePHILimit( + "rewrite-phi-limit", cl::Hidden, cl::init(10), + cl::desc("Limit the length of PHI chains to lookup")); + STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); @@ -105,8 +115,11 @@ STATISTIC(NumLoadFold, "Number of loads folded"); STATISTIC(NumSelects, "Number of selects optimized"); STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); +STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed"); namespace { + class ValueTrackerResult; + class PeepholeOptimizer : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -130,6 +143,10 @@ namespace { } } + /// \brief Track Def -> Use info used for rewriting copies. + typedef SmallDenseMap + RewriteMapTy; + private: bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, @@ -137,17 +154,38 @@ namespace { bool optimizeSelect(MachineInstr *MI, SmallPtrSetImpl &LocalMIs); bool optimizeCondBranch(MachineInstr *MI); - bool optimizeCopyOrBitcast(MachineInstr *MI); bool optimizeCoalescableCopy(MachineInstr *MI); bool optimizeUncoalescableCopy(MachineInstr *MI, SmallPtrSetImpl &LocalMIs); - bool findNextSource(unsigned &Reg, unsigned &SubReg); + bool findNextSource(unsigned Reg, unsigned SubReg, + RewriteMapTy &RewriteMap); bool isMoveImmediate(MachineInstr *MI, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs); bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs); + + /// \brief If copy instruction \p MI is a virtual register copy, track it in + /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was + /// previously seen as a copy, replace the uses of this copy with the + /// previously seen copy's destination register. + bool foldRedundantCopy(MachineInstr *MI, + SmallSet &CopySrcRegs, + DenseMap &CopyMIs); + + /// \brief Is the register \p Reg a non-allocatable physical register? + bool isNAPhysCopy(unsigned Reg); + + /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical + /// register copy, track it in the \p NAPhysToVirtMIs map. If this + /// non-allocatable physical register was previously copied to a virtual + /// registered and hasn't been clobbered, the virt->phys copy can be + /// deleted. + bool foldRedundantNAPhysCopy( + MachineInstr *MI, + DenseMap &NAPhysToVirtMIs); + bool isLoadFoldable(MachineInstr *MI, SmallSet &FoldAsLoadDefCandidates); @@ -171,6 +209,69 @@ namespace { } }; + /// \brief Helper class to hold a reply for ValueTracker queries. Contains the + /// returned sources for a given search and the instructions where the sources + /// were tracked from. + class ValueTrackerResult { + private: + /// Track all sources found by one ValueTracker query. + SmallVector RegSrcs; + + /// Instruction using the sources in 'RegSrcs'. + const MachineInstr *Inst; + + public: + ValueTrackerResult() : Inst(nullptr) {} + ValueTrackerResult(unsigned Reg, unsigned SubReg) : Inst(nullptr) { + addSource(Reg, SubReg); + } + + bool isValid() const { return getNumSources() > 0; } + + void setInst(const MachineInstr *I) { Inst = I; } + const MachineInstr *getInst() const { return Inst; } + + void clear() { + RegSrcs.clear(); + Inst = nullptr; + } + + void addSource(unsigned SrcReg, unsigned SrcSubReg) { + RegSrcs.push_back(TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg)); + } + + void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) { + assert(Idx < getNumSources() && "Reg pair source out of index"); + RegSrcs[Idx] = TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg); + } + + int getNumSources() const { return RegSrcs.size(); } + + unsigned getSrcReg(int Idx) const { + assert(Idx < getNumSources() && "Reg source out of index"); + return RegSrcs[Idx].Reg; + } + + unsigned getSrcSubReg(int Idx) const { + assert(Idx < getNumSources() && "SubReg source out of index"); + return RegSrcs[Idx].SubReg; + } + + bool operator==(const ValueTrackerResult &Other) { + if (Other.getInst() != getInst()) + return false; + + if (Other.getNumSources() != getNumSources()) + return false; + + for (int i = 0, e = Other.getNumSources(); i != e; ++i) + if (Other.getSrcReg(i) != getSrcReg(i) || + Other.getSrcSubReg(i) != getSrcSubReg(i)) + return false; + return true; + } + }; + /// \brief Helper class to track the possible sources of a value defined by /// a (chain of) copy related instructions. /// Given a definition (instruction and definition index), this class @@ -213,23 +314,25 @@ namespace { /// \brief Dispatcher to the right underlying implementation of /// getNextSource. - bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg); + ValueTrackerResult getNextSourceImpl(); /// \brief Specialized version of getNextSource for Copy instructions. - bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg); + ValueTrackerResult getNextSourceFromCopy(); /// \brief Specialized version of getNextSource for Bitcast instructions. - bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg); + ValueTrackerResult getNextSourceFromBitcast(); /// \brief Specialized version of getNextSource for RegSequence /// instructions. - bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg); + ValueTrackerResult getNextSourceFromRegSequence(); /// \brief Specialized version of getNextSource for InsertSubreg /// instructions. - bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg); + ValueTrackerResult getNextSourceFromInsertSubreg(); /// \brief Specialized version of getNextSource for ExtractSubreg /// instructions. - bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg); + ValueTrackerResult getNextSourceFromExtractSubreg(); /// \brief Specialized version of getNextSource for SubregToReg /// instructions. - bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg); + ValueTrackerResult getNextSourceFromSubregToReg(); + /// \brief Specialized version of getNextSource for PHI instructions. + ValueTrackerResult getNextSourceFromPHI(); public: /// \brief Create a ValueTracker instance for the value defined by \p Reg. @@ -276,16 +379,10 @@ namespace { /// \brief Following the use-def chain, get the next available source /// for the tracked value. - /// When the returned value is not nullptr, \p SrcReg gives the register - /// that contain the tracked value. - /// \note The sub register index returned in \p SrcSubReg must be used - /// on \p SrcReg to access the actual value. - /// \return Unless the returned value is nullptr (i.e., no source found), - /// \p SrcReg gives the register of the next source used in the returned - /// instruction and \p SrcSubReg the sub-register index to be used on that - /// source to get the tracked value. When nullptr is returned, no - /// alternative source has been found. - const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg); + /// \return A ValueTrackerResult containing a set of registers + /// and sub registers with tracked values. A ValueTrackerResult with + /// an empty set of registers means no source was found. + ValueTrackerResult getNextSource(); /// \brief Get the last register where the initial value can be found. /// Initially this is the register of the definition. @@ -303,11 +400,10 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", "Peephole Optimizations", false, false) -/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads -/// a single register and writes a single register and it does not modify the -/// source, and if the source value is preserved as a sub-register of the -/// result, then replace all reachable uses of the source with the subreg of the -/// result. +/// If instruction is a copy-like instruction, i.e. it reads a single register +/// and writes a single register and it does not modify the source, and if the +/// source value is preserved as a sub-register of the result, then replace all +/// reachable uses of the source with the subreg of the result. /// /// Do not generate an EXTRACT that is used only in a debug use, as this changes /// the code. Since this code does not currently share EXTRACTs, just ignore all @@ -458,10 +554,10 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, return Changed; } -/// optimizeCmpInstr - If the instruction is a compare and the previous -/// instruction it's comparing against all ready sets (or could be modified to -/// set) the same flag as the compare, then we can remove the comparison and use -/// the flag from the previous instruction. +/// If the instruction is a compare and the previous instruction it's comparing +/// against already sets (or could be modified to set) the same flag as the +/// compare, then we can remove the comparison and use the flag from the +/// previous instruction. bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB) { // If this instruction is a comparison against zero and isn't comparing a @@ -506,88 +602,138 @@ bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { return TII->optimizeCondBranch(MI); } -/// \brief Check if the registers defined by the pair (RegisterClass, SubReg) -/// share the same register file. -static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, - const TargetRegisterClass *DefRC, - unsigned DefSubReg, - const TargetRegisterClass *SrcRC, - unsigned SrcSubReg) { - // Same register class. - if (DefRC == SrcRC) - return true; - - // Both operands are sub registers. Check if they share a register class. - unsigned SrcIdx, DefIdx; - if (SrcSubReg && DefSubReg) - return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, - SrcIdx, DefIdx) != nullptr; - // At most one of the register is a sub register, make it Src to avoid - // duplicating the test. - if (!SrcSubReg) { - std::swap(DefSubReg, SrcSubReg); - std::swap(DefRC, SrcRC); - } - - // One of the register is a sub register, check if we can get a superclass. - if (SrcSubReg) - return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr; - // Plain copy. - return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; -} - /// \brief Try to find the next source that share the same register file /// for the value defined by \p Reg and \p SubReg. -/// When true is returned, \p Reg and \p SubReg are updated with the -/// register number and sub-register index of the new source. +/// When true is returned, the \p RewriteMap can be used by the client to +/// retrieve all Def -> Use along the way up to the next source. Any found +/// Use that is not itself a key for another entry, is the next source to +/// use. During the search for the next source, multiple sources can be found +/// given multiple incoming sources of a PHI instruction. In this case, we +/// look in each PHI source for the next source; all found next sources must +/// share the same register file as \p Reg and \p SubReg. The client should +/// then be capable to rewrite all intermediate PHIs to get the next source. /// \return False if no alternative sources are available. True otherwise. -bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) { +bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg, + RewriteMapTy &RewriteMap) { // Do not try to find a new source for a physical register. // So far we do not have any motivating example for doing that. // Thus, instead of maintaining untested code, we will revisit that if // that changes at some point. if (TargetRegisterInfo::isPhysicalRegister(Reg)) return false; - const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); - unsigned DefSubReg = SubReg; - unsigned Src; - unsigned SrcSubReg; - bool ShouldRewrite = false; + SmallVector SrcToLook; + TargetInstrInfo::RegSubRegPair CurSrcPair(Reg, SubReg); + SrcToLook.push_back(CurSrcPair); - // Follow the chain of copies until we reach the top of the use-def chain - // or find a more suitable source. - ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII); - do { - unsigned CopySrcReg, CopySrcSubReg; - if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg)) - break; - Src = CopySrcReg; - SrcSubReg = CopySrcSubReg; + unsigned PHICount = 0; + while (!SrcToLook.empty() && PHICount < RewritePHILimit) { + TargetInstrInfo::RegSubRegPair Pair = SrcToLook.pop_back_val(); + // As explained above, do not handle physical registers + if (TargetRegisterInfo::isPhysicalRegister(Pair.Reg)) + return false; - // Do not extend the live-ranges of physical registers as they add - // constraints to the register allocator. - // Moreover, if we want to extend the live-range of a physical register, - // unlike SSA virtual register, we will have to check that they are not - // redefine before the related use. - if (TargetRegisterInfo::isPhysicalRegister(Src)) - break; + CurSrcPair = Pair; + ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, + !DisableAdvCopyOpt, TII); + ValueTrackerResult Res; + bool ShouldRewrite = false; - const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); + do { + // Follow the chain of copies until we reach the top of the use-def chain + // or find a more suitable source. + Res = ValTracker.getNextSource(); + if (!Res.isValid()) + break; - // If this source does not incur a cross register bank copy, use it. - ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC, - SrcSubReg); - } while (!ShouldRewrite); + // Insert the Def -> Use entry for the recently found source. + ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair); + if (CurSrcRes.isValid()) { + assert(CurSrcRes == Res && "ValueTrackerResult found must match"); + // An existent entry with multiple sources is a PHI cycle we must avoid. + // Otherwise it's an entry with a valid next source we already found. + if (CurSrcRes.getNumSources() > 1) { + DEBUG(dbgs() << "findNextSource: found PHI cycle, aborting...\n"); + return false; + } + break; + } + RewriteMap.insert(std::make_pair(CurSrcPair, Res)); + + // ValueTrackerResult usually have one source unless it's the result from + // a PHI instruction. Add the found PHI edges to be looked up further. + unsigned NumSrcs = Res.getNumSources(); + if (NumSrcs > 1) { + PHICount++; + for (unsigned i = 0; i < NumSrcs; ++i) + SrcToLook.push_back(TargetInstrInfo::RegSubRegPair( + Res.getSrcReg(i), Res.getSrcSubReg(i))); + break; + } + + CurSrcPair.Reg = Res.getSrcReg(0); + CurSrcPair.SubReg = Res.getSrcSubReg(0); + // Do not extend the live-ranges of physical registers as they add + // constraints to the register allocator. Moreover, if we want to extend + // the live-range of a physical register, unlike SSA virtual register, + // we will have to check that they aren't redefine before the related use. + if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg)) + return false; + + const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg); + ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC, + CurSrcPair.SubReg); + } while (!ShouldRewrite); + + // Continue looking for new sources... + if (Res.isValid()) + continue; + + // Do not continue searching for a new source if the there's at least + // one use-def which cannot be rewritten. + if (!ShouldRewrite) + return false; + } + + if (PHICount >= RewritePHILimit) { + DEBUG(dbgs() << "findNextSource: PHI limit reached\n"); + return false; + } // If we did not find a more suitable source, there is nothing to optimize. - if (!ShouldRewrite || Src == Reg) - return false; + return CurSrcPair.Reg != Reg; +} - Reg = Src; - SubReg = SrcSubReg; - return true; +/// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are +/// guaranteed to have the same register class. This is necessary whenever we +/// successfully traverse a PHI instruction and find suitable sources coming +/// from its edges. By inserting a new PHI, we provide a rewritten PHI def +/// suitable to be used in a new COPY instruction. +static MachineInstr * +insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, + const SmallVectorImpl &SrcRegs, + MachineInstr *OrigPHI) { + assert(!SrcRegs.empty() && "No sources to create a PHI instruction?"); + + const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg); + unsigned NewVR = MRI->createVirtualRegister(NewRC); + MachineBasicBlock *MBB = OrigPHI->getParent(); + MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(), + TII->get(TargetOpcode::PHI), NewVR); + + unsigned MBBOpIdx = 2; + for (auto RegPair : SrcRegs) { + MIB.addReg(RegPair.Reg, 0, RegPair.SubReg); + MIB.addMBB(OrigPHI->getOperand(MBBOpIdx).getMBB()); + // Since we're extended the lifetime of RegPair.Reg, clear the + // kill flags to account for that and make RegPair.Reg reaches + // the new PHI. + MRI->clearKillFlags(RegPair.Reg); + MBBOpIdx += 2; + } + + return MIB; } namespace { @@ -624,7 +770,7 @@ public: /// This source defines the whole definition, i.e., /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). /// - /// The second and subsequent calls will return false, has there is only one + /// The second and subsequent calls will return false, as there is only one /// rewritable source. /// /// \return True if a rewritable source has been found, false otherwise. @@ -632,9 +778,9 @@ public: virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, unsigned &TrackReg, unsigned &TrackSubReg) { - // If CurrentSrcIdx == 1, this means this function has already been - // called once. CopyLike has one defintiion and one argument, thus, - // there is nothing else to rewrite. + // If CurrentSrcIdx == 1, this means this function has already been called + // once. CopyLike has one definition and one argument, thus, there is + // nothing else to rewrite. if (!CopyLike.isCopy() || CurrentSrcIdx == 1) return false; // This is the first call to getNextRewritableSource. @@ -653,7 +799,7 @@ public: /// \brief Rewrite the current source with \p NewReg and \p NewSubReg /// if possible. - /// \return True if the rewritting was possible, false otherwise. + /// \return True if the rewriting was possible, false otherwise. virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { if (!CopyLike.isCopy() || CurrentSrcIdx != 1) return false; @@ -662,6 +808,157 @@ public: MOSrc.setSubReg(NewSubReg); return true; } + + /// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find + /// the new source to use for rewrite. If \p HandleMultipleSources is true and + /// multiple sources for a given \p Def are found along the way, we found a + /// PHI instructions that needs to be rewritten. + /// TODO: HandleMultipleSources should be removed once we test PHI handling + /// with coalescable copies. + TargetInstrInfo::RegSubRegPair + getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, + TargetInstrInfo::RegSubRegPair Def, + PeepholeOptimizer::RewriteMapTy &RewriteMap, + bool HandleMultipleSources = true) { + + TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg); + do { + ValueTrackerResult Res = RewriteMap.lookup(LookupSrc); + // If there are no entries on the map, LookupSrc is the new source. + if (!Res.isValid()) + return LookupSrc; + + // There's only one source for this definition, keep searching... + unsigned NumSrcs = Res.getNumSources(); + if (NumSrcs == 1) { + LookupSrc.Reg = Res.getSrcReg(0); + LookupSrc.SubReg = Res.getSrcSubReg(0); + continue; + } + + // TODO: Remove once multiple srcs w/ coalescable copies are supported. + if (!HandleMultipleSources) + break; + + // Multiple sources, recurse into each source to find a new source + // for it. Then, rewrite the PHI accordingly to its new edges. + SmallVector NewPHISrcs; + for (unsigned i = 0; i < NumSrcs; ++i) { + TargetInstrInfo::RegSubRegPair PHISrc(Res.getSrcReg(i), + Res.getSrcSubReg(i)); + NewPHISrcs.push_back( + getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources)); + } + + // Build the new PHI node and return its def register as the new source. + MachineInstr *OrigPHI = const_cast(Res.getInst()); + MachineInstr *NewPHI = insertPHI(MRI, TII, NewPHISrcs, OrigPHI); + DEBUG(dbgs() << "-- getNewSource\n"); + DEBUG(dbgs() << " Replacing: " << *OrigPHI); + DEBUG(dbgs() << " With: " << *NewPHI); + const MachineOperand &MODef = NewPHI->getOperand(0); + return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg()); + + } while (1); + + return TargetInstrInfo::RegSubRegPair(0, 0); + } + + /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap + /// and create a new COPY instruction. More info about RewriteMap in + /// PeepholeOptimizer::findNextSource. Right now this is only used to handle + /// Uncoalescable copies, since they are copy like instructions that aren't + /// recognized by the register allocator. + virtual MachineInstr * + RewriteSource(TargetInstrInfo::RegSubRegPair Def, + PeepholeOptimizer::RewriteMapTy &RewriteMap) { + return nullptr; + } +}; + +/// \brief Helper class to rewrite uncoalescable copy like instructions +/// into new COPY (coalescable friendly) instructions. +class UncoalescableRewriter : public CopyRewriter { +protected: + const TargetInstrInfo &TII; + MachineRegisterInfo &MRI; + /// The number of defs in the bitcast + unsigned NumDefs; + +public: + UncoalescableRewriter(MachineInstr &MI, const TargetInstrInfo &TII, + MachineRegisterInfo &MRI) + : CopyRewriter(MI), TII(TII), MRI(MRI) { + NumDefs = MI.getDesc().getNumDefs(); + } + + /// \brief Get the next rewritable def source (TrackReg, TrackSubReg) + /// All such sources need to be considered rewritable in order to + /// rewrite a uncoalescable copy-like instruction. This method return + /// each definition that must be checked if rewritable. + /// + bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, + unsigned &TrackReg, + unsigned &TrackSubReg) override { + // Find the next non-dead definition and continue from there. + if (CurrentSrcIdx == NumDefs) + return false; + + while (CopyLike.getOperand(CurrentSrcIdx).isDead()) { + ++CurrentSrcIdx; + if (CurrentSrcIdx == NumDefs) + return false; + } + + // What we track are the alternative sources of the definition. + const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx); + TrackReg = MODef.getReg(); + TrackSubReg = MODef.getSubReg(); + + CurrentSrcIdx++; + return true; + } + + /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap + /// and create a new COPY instruction. More info about RewriteMap in + /// PeepholeOptimizer::findNextSource. Right now this is only used to handle + /// Uncoalescable copies, since they are copy like instructions that aren't + /// recognized by the register allocator. + MachineInstr * + RewriteSource(TargetInstrInfo::RegSubRegPair Def, + PeepholeOptimizer::RewriteMapTy &RewriteMap) override { + assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && + "We do not rewrite physical registers"); + + // Find the new source to use in the COPY rewrite. + TargetInstrInfo::RegSubRegPair NewSrc = + getNewSource(&MRI, &TII, Def, RewriteMap); + + // Insert the COPY. + const TargetRegisterClass *DefRC = MRI.getRegClass(Def.Reg); + unsigned NewVR = MRI.createVirtualRegister(DefRC); + + MachineInstr *NewCopy = + BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), + TII.get(TargetOpcode::COPY), NewVR) + .addReg(NewSrc.Reg, 0, NewSrc.SubReg); + + NewCopy->getOperand(0).setSubReg(Def.SubReg); + if (Def.SubReg) + NewCopy->getOperand(0).setIsUndef(); + + DEBUG(dbgs() << "-- RewriteSource\n"); + DEBUG(dbgs() << " Replacing: " << CopyLike); + DEBUG(dbgs() << " With: " << *NewCopy); + MRI.replaceRegWith(Def.Reg, NewVR); + MRI.clearKillFlags(NewVR); + + // We extended the lifetime of NewSrc.Reg, clear the kill flags to + // account for that. + MRI.clearKillFlags(NewSrc.Reg); + + return NewCopy; + } }; /// \brief Specialized rewriter for INSERT_SUBREG instruction. @@ -699,7 +996,7 @@ public: // partial definition. TrackReg = MODef.getReg(); if (MODef.getSubReg()) - // Bails if we have to compose sub-register indices. + // Bail if we have to compose sub-register indices. return false; TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); return true; @@ -740,7 +1037,7 @@ public: CurrentSrcIdx = 1; const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); SrcReg = MOExtractedReg.getReg(); - // If we have to compose sub-register indices, bails out. + // If we have to compose sub-register indices, bail out. if (MOExtractedReg.getSubReg()) return false; @@ -818,7 +1115,7 @@ public: } const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); SrcReg = MOInsertedReg.getReg(); - // If we have to compose sub-register indices, bails out. + // If we have to compose sub-register indices, bail out. if ((SrcSubReg = MOInsertedReg.getSubReg())) return false; @@ -828,7 +1125,7 @@ public: const MachineOperand &MODef = CopyLike.getOperand(0); TrackReg = MODef.getReg(); - // If we have to compose sub-registers, bails. + // If we have to compose sub-registers, bail. return MODef.getSubReg() == 0; } @@ -850,7 +1147,13 @@ public: /// \return A pointer to a dynamically allocated CopyRewriter or nullptr /// if no rewriter works for \p MI. static CopyRewriter *getCopyRewriter(MachineInstr &MI, - const TargetInstrInfo &TII) { + const TargetInstrInfo &TII, + MachineRegisterInfo &MRI) { + // Handle uncoalescable copy-like instructions. + if (MI.isBitcast() || (MI.isRegSequenceLike() || MI.isInsertSubregLike() || + MI.isExtractSubregLike())) + return new UncoalescableRewriter(MI, TII, MRI); + switch (MI.getOpcode()) { default: return nullptr; @@ -874,7 +1177,7 @@ static CopyRewriter *getCopyRewriter(MachineInstr &MI, /// the same register bank. /// New copies issued by this optimization are register allocator /// friendly. This optimization does not remove any copy as it may -/// overconstraint the register allocator, but replaces some operands +/// overconstrain the register allocator, but replaces some operands /// when possible. /// \pre isCoalescableCopy(*MI) is true. /// \return True, when \p MI has been rewritten. False otherwise. @@ -889,25 +1192,33 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { bool Changed = false; // Get the right rewriter for the current copy. - std::unique_ptr CpyRewriter(getCopyRewriter(*MI, *TII)); - // If none exists, bails out. + std::unique_ptr CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); + // If none exists, bail out. if (!CpyRewriter) return false; // Rewrite each rewritable source. unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, TrackSubReg)) { - unsigned NewSrc = TrackReg; - unsigned NewSubReg = TrackSubReg; - // Try to find a more suitable source. - // If we failed to do so, or get the actual source, - // move to the next source. - if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc) + // Keep track of PHI nodes and its incoming edges when looking for sources. + RewriteMapTy RewriteMap; + // Try to find a more suitable source. If we failed to do so, or get the + // actual source, move to the next source. + if (!findNextSource(TrackReg, TrackSubReg, RewriteMap)) continue; + + // Get the new source to rewrite. TODO: Only enable handling of multiple + // sources (PHIs) once we have a motivating example and testcases for it. + TargetInstrInfo::RegSubRegPair TrackPair(TrackReg, TrackSubReg); + TargetInstrInfo::RegSubRegPair NewSrc = CpyRewriter->getNewSource( + MRI, TII, TrackPair, RewriteMap, false /* multiple sources */); + if (SrcReg == NewSrc.Reg || NewSrc.Reg == 0) + continue; + // Rewrite source. - if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) { + if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) { // We may have extended the live-range of NewSrc, account for that. - MRI->clearKillFlags(NewSrc); + MRI->clearKillFlags(NewSrc.Reg); Changed = true; } } @@ -936,61 +1247,53 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy( assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); // Check if we can rewrite all the values defined by this instruction. - SmallVector< - std::pair, - 4> RewritePairs; - for (const MachineOperand &MODef : MI->defs()) { - if (MODef.isDead()) - // We can ignore those. - continue; + SmallVector RewritePairs; + // Get the right rewriter for the current copy. + std::unique_ptr CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); + // If none exists, bail out. + if (!CpyRewriter) + return false; + // Rewrite each rewritable source by generating new COPYs. This works + // differently from optimizeCoalescableCopy since it first makes sure that all + // definitions can be rewritten. + RewriteMapTy RewriteMap; + unsigned Reg, SubReg, CopyDefReg, CopyDefSubReg; + while (CpyRewriter->getNextRewritableSource(Reg, SubReg, CopyDefReg, + CopyDefSubReg)) { // If a physical register is here, this is probably for a good reason. // Do not rewrite that. - if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) + if (TargetRegisterInfo::isPhysicalRegister(CopyDefReg)) return false; // If we do not know how to rewrite this definition, there is no point // in trying to kill this instruction. - TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg()); - TargetInstrInfo::RegSubRegPair Src = Def; - if (!findNextSource(Src.Reg, Src.SubReg)) + TargetInstrInfo::RegSubRegPair Def(CopyDefReg, CopyDefSubReg); + if (!findNextSource(Def.Reg, Def.SubReg, RewriteMap)) return false; - RewritePairs.push_back(std::make_pair(Def, Src)); + + RewritePairs.push_back(Def); } + // The change is possible for all defs, do it. - for (const auto &PairDefSrc : RewritePairs) { - const auto &Def = PairDefSrc.first; - const auto &Src = PairDefSrc.second; + for (const auto &Def : RewritePairs) { // Rewrite the "copy" in a way the register coalescer understands. - assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && - "We do not rewrite physical registers"); - const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); - unsigned NewVR = MRI->createVirtualRegister(DefRC); - MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), - NewVR).addReg(Src.Reg, 0, Src.SubReg); - NewCopy->getOperand(0).setSubReg(Def.SubReg); - if (Def.SubReg) - NewCopy->getOperand(0).setIsUndef(); + MachineInstr *NewCopy = CpyRewriter->RewriteSource(Def, RewriteMap); + assert(NewCopy && "Should be able to always generate a new copy"); LocalMIs.insert(NewCopy); - MRI->replaceRegWith(Def.Reg, NewVR); - MRI->clearKillFlags(NewVR); - // We extended the lifetime of Src. - // Clear the kill flags to account for that. - MRI->clearKillFlags(Src.Reg); } + // MI is now dead. MI->eraseFromParent(); ++NumUncoalescableCopies; return true; } -/// isLoadFoldable - Check whether MI is a candidate for folding into a later -/// instruction. We only fold loads to virtual registers and the virtual -/// register defined has a single use. +/// Check whether MI is a candidate for folding into a later instruction. +/// We only fold loads to virtual registers and the virtual register defined +/// has a single use. bool PeepholeOptimizer::isLoadFoldable( - MachineInstr *MI, - SmallSet &FoldAsLoadDefCandidates) { + MachineInstr *MI, SmallSet &FoldAsLoadDefCandidates) { if (!MI->canFoldAsLoad() || !MI->mayLoad()) return false; const MCInstrDesc &MCID = MI->getDesc(); @@ -1010,9 +1313,9 @@ bool PeepholeOptimizer::isLoadFoldable( return false; } -bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, - SmallSet &ImmDefRegs, - DenseMap &ImmDefMIs) { +bool PeepholeOptimizer::isMoveImmediate( + MachineInstr *MI, SmallSet &ImmDefRegs, + DenseMap &ImmDefMIs) { const MCInstrDesc &MCID = MI->getDesc(); if (!MI->isMoveImmediate()) return false; @@ -1028,23 +1331,26 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, return false; } -/// foldImmediate - Try folding register operands that are defined by move -/// immediate instructions, i.e. a trivial constant folding optimization, if +/// Try folding register operands that are defined by move immediate +/// instructions, i.e. a trivial constant folding optimization, if /// and only if the def and use are in the same BB. -bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, - SmallSet &ImmDefRegs, - DenseMap &ImmDefMIs) { +bool PeepholeOptimizer::foldImmediate( + MachineInstr *MI, MachineBasicBlock *MBB, SmallSet &ImmDefRegs, + DenseMap &ImmDefMIs) { for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isDef()) continue; + // Ignore dead implicit defs. + if (MO.isImplicit() && MO.isDead()) + continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (ImmDefRegs.count(Reg) == 0) continue; DenseMap::iterator II = ImmDefMIs.find(Reg); - assert(II != ImmDefMIs.end()); + assert(II != ImmDefMIs.end() && "couldn't find immediate definition"); if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { ++NumImmFold; return true; @@ -1053,6 +1359,117 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, return false; } +// FIXME: This is very simple and misses some cases which should be handled when +// motivating examples are found. +// +// The copy rewriting logic should look at uses as well as defs and be able to +// eliminate copies across blocks. +// +// Later copies that are subregister extracts will also not be eliminated since +// only the first copy is considered. +// +// e.g. +// %vreg1 = COPY %vreg0 +// %vreg2 = COPY %vreg0:sub1 +// +// Should replace %vreg2 uses with %vreg1:sub1 +bool PeepholeOptimizer::foldRedundantCopy( + MachineInstr *MI, SmallSet &CopySrcRegs, + DenseMap &CopyMIs) { + assert(MI->isCopy() && "expected a COPY machine instruction"); + + unsigned SrcReg = MI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + + unsigned DstReg = MI->getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + return false; + + if (CopySrcRegs.insert(SrcReg).second) { + // First copy of this reg seen. + CopyMIs.insert(std::make_pair(SrcReg, MI)); + return false; + } + + MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second; + + unsigned SrcSubReg = MI->getOperand(1).getSubReg(); + unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg(); + + // Can't replace different subregister extracts. + if (SrcSubReg != PrevSrcSubReg) + return false; + + unsigned PrevDstReg = PrevCopy->getOperand(0).getReg(); + + // Only replace if the copy register class is the same. + // + // TODO: If we have multiple copies to different register classes, we may want + // to track multiple copies of the same source register. + if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg)) + return false; + + MRI->replaceRegWith(DstReg, PrevDstReg); + + // Lifetime of the previous copy has been extended. + MRI->clearKillFlags(PrevDstReg); + return true; +} + +bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) { + return TargetRegisterInfo::isPhysicalRegister(Reg) && + !MRI->isAllocatable(Reg); +} + +bool PeepholeOptimizer::foldRedundantNAPhysCopy( + MachineInstr *MI, DenseMap &NAPhysToVirtMIs) { + assert(MI->isCopy() && "expected a COPY machine instruction"); + + if (DisableNAPhysCopyOpt) + return false; + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) { + // %vreg = COPY %PHYSREG + // Avoid using a datastructure which can track multiple live non-allocatable + // phys->virt copies since LLVM doesn't seem to do this. + NAPhysToVirtMIs.insert({SrcReg, MI}); + return false; + } + + if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg))) + return false; + + // %PHYSREG = COPY %vreg + auto PrevCopy = NAPhysToVirtMIs.find(DstReg); + if (PrevCopy == NAPhysToVirtMIs.end()) { + // We can't remove the copy: there was an intervening clobber of the + // non-allocatable physical register after the copy to virtual. + DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << *MI + << '\n'); + return false; + } + + unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg(); + if (PrevDstReg == SrcReg) { + // Remove the virt->phys copy: we saw the virtual register definition, and + // the non-allocatable physical register's state hasn't changed since then. + DEBUG(dbgs() << "NAPhysCopy: erasing " << *MI << '\n'); + ++NumNAPhysCopies; + return true; + } + + // Potential missed optimization opportunity: we saw a different virtual + // register get a copy of the non-allocatable physical register, and we only + // track one such copy. Avoid getting confused by this new non-allocatable + // physical register definition, and remove it from the tracked copies. + DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << *MI << '\n'); + NAPhysToVirtMIs.erase(PrevCopy); + return false; +} + bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (skipOptnoneFunction(*MF.getFunction())) return false; @@ -1070,9 +1487,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock *MBB = &*I; - + for (MachineBasicBlock &MBB : MF) { bool SeenMoveImm = false; // During this forward scan, at some point it needs to answer the question @@ -1086,8 +1501,19 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { DenseMap ImmDefMIs; SmallSet FoldAsLoadDefCandidates; - for (MachineBasicBlock::iterator - MII = I->begin(), MIE = I->end(); MII != MIE; ) { + // Track when a non-allocatable physical register is copied to a virtual + // register so that useless moves can be removed. + // + // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG` + // without any intervening re-definition of %PHYSREG. + DenseMap NAPhysToVirtMIs; + + // Set of virtual registers that are copied from. + SmallSet CopySrcRegs; + DenseMap CopySrcMIs; + + for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end(); + MII != MIE; ) { MachineInstr *MI = &*MII; // We may be erasing MI below, increment MII now. ++MII; @@ -1097,20 +1523,60 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->isDebugValue()) continue; - // If there exists an instruction which belongs to the following - // categories, we will discard the load candidates. - if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || - MI->isKill() || MI->isInlineAsm() || - MI->hasUnmodeledSideEffects()) { + // If we run into an instruction we can't fold across, discard + // the load candidates. + if (MI->isLoadFoldBarrier()) FoldAsLoadDefCandidates.clear(); + + if (MI->isPosition() || MI->isPHI()) + continue; + + if (!MI->isCopy()) { + for (const auto &Op : MI->operands()) { + // Visit all operands: definitions can be implicit or explicit. + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + if (Op.isDef() && isNAPhysCopy(Reg)) { + const auto &Def = NAPhysToVirtMIs.find(Reg); + if (Def != NAPhysToVirtMIs.end()) { + // A new definition of the non-allocatable physical register + // invalidates previous copies. + DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI + << '\n'); + NAPhysToVirtMIs.erase(Def); + } + } + } else if (Op.isRegMask()) { + const uint32_t *RegMask = Op.getRegMask(); + for (auto &RegMI : NAPhysToVirtMIs) { + unsigned Def = RegMI.first; + if (MachineOperand::clobbersPhysReg(RegMask, Def)) { + DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI + << '\n'); + NAPhysToVirtMIs.erase(Def); + } + } + } + } + } + + if (MI->isImplicitDef() || MI->isKill()) + continue; + + if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) { + // Blow away all non-allocatable physical registers knowledge since we + // don't know what's correct anymore. + // + // FIXME: handle explicit asm clobbers. + DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI + << '\n'); + NAPhysToVirtMIs.clear(); continue; } - if (MI->mayStore() || MI->isCall()) - FoldAsLoadDefCandidates.clear(); if ((isUncoalescableCopy(*MI) && optimizeUncoalescableCopy(MI, LocalMIs)) || - (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || + (MI->isCompare() && optimizeCmpInstr(MI, &MBB)) || (MI->isSelect() && optimizeSelect(MI, LocalMIs))) { // MI is deleted. LocalMIs.erase(MI); @@ -1129,17 +1595,26 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } + if (MI->isCopy() && + (foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs) || + foldRedundantNAPhysCopy(MI, NAPhysToVirtMIs))) { + LocalMIs.erase(MI); + MI->eraseFromParent(); + Changed = true; + continue; + } + if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { SeenMoveImm = true; } else { - Changed |= optimizeExtInstr(MI, MBB, LocalMIs); + Changed |= optimizeExtInstr(MI, &MBB, LocalMIs); // optimizeExtInstr might have created new instructions after MI // and before the already incremented MII. Adjust MII so that the // next iteration sees the new instructions. MII = MI; ++MII; if (SeenMoveImm) - Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); + Changed |= foldImmediate(MI, &MBB, ImmDefRegs, ImmDefMIs); } // Check whether MI is a load candidate for folding into a later @@ -1190,8 +1665,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { return Changed; } -bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg, - unsigned &SrcSubReg) { +ValueTrackerResult ValueTracker::getNextSourceFromCopy() { assert(Def->isCopy() && "Invalid definition"); // Copy instruction are supposed to be: Def = Src. // If someone breaks this assumption, bad things will happen everywhere. @@ -1199,30 +1673,27 @@ bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg, if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) // If we look for a different subreg, it means we want a subreg of src. - // Bails as we do not support composing subreg yet. - return false; + // Bails as we do not support composing subregs yet. + return ValueTrackerResult(); // Otherwise, we want the whole source. const MachineOperand &Src = Def->getOperand(1); - SrcReg = Src.getReg(); - SrcSubReg = Src.getSubReg(); - return true; + return ValueTrackerResult(Src.getReg(), Src.getSubReg()); } -bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg, - unsigned &SrcSubReg) { +ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { assert(Def->isBitcast() && "Invalid definition"); // Bail if there are effects that a plain copy will not expose. if (Def->hasUnmodeledSideEffects()) - return false; + return ValueTrackerResult(); // Bitcasts with more than one def are not supported. if (Def->getDesc().getNumDefs() != 1) - return false; + return ValueTrackerResult(); if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) // If we look for a different subreg, it means we want a subreg of the src. - // Bails as we do not support composing subreg yet. - return false; + // Bails as we do not support composing subregs yet. + return ValueTrackerResult(); unsigned SrcIdx = Def->getNumOperands(); for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; @@ -1230,25 +1701,25 @@ bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg, const MachineOperand &MO = Def->getOperand(OpIdx); if (!MO.isReg() || !MO.getReg()) continue; + // Ignore dead implicit defs. + if (MO.isImplicit() && MO.isDead()) + continue; assert(!MO.isDef() && "We should have skipped all the definitions by now"); if (SrcIdx != EndOpIdx) // Multiple sources? - return false; + return ValueTrackerResult(); SrcIdx = OpIdx; } const MachineOperand &Src = Def->getOperand(SrcIdx); - SrcReg = Src.getReg(); - SrcSubReg = Src.getSubReg(); - return true; + return ValueTrackerResult(Src.getReg(), Src.getSubReg()); } -bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg, - unsigned &SrcSubReg) { +ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() { assert((Def->isRegSequence() || Def->isRegSequenceLike()) && "Invalid definition"); if (Def->getOperand(DefIdx).getSubReg()) - // If we are composing subreg, bails out. + // If we are composing subregs, bail out. // The case we are checking is Def. = REG_SEQUENCE. // This should almost never happen as the SSA property is tracked at // the register level (as opposed to the subreg level). @@ -1262,16 +1733,16 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg, // have this case. // If we can ascertain (or force) that this never happens, we could // turn that into an assertion. - return false; + return ValueTrackerResult(); if (!TII) // We could handle the REG_SEQUENCE here, but we do not want to // duplicate the code from the generic TII. - return false; + return ValueTrackerResult(); SmallVector RegSeqInputRegs; if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) - return false; + return ValueTrackerResult(); // We are looking at: // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... @@ -1279,41 +1750,38 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg, for (auto &RegSeqInput : RegSeqInputRegs) { if (RegSeqInput.SubIdx == DefSubReg) { if (RegSeqInput.SubReg) - // Bails if we have to compose sub registers. - return false; + // Bail if we have to compose sub registers. + return ValueTrackerResult(); - SrcReg = RegSeqInput.Reg; - SrcSubReg = RegSeqInput.SubReg; - return true; + return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg); } } // If the subreg we are tracking is super-defined by another subreg, // we could follow this value. However, this would require to compose // the subreg and we do not do that for now. - return false; + return ValueTrackerResult(); } -bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, - unsigned &SrcSubReg) { +ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() { assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && "Invalid definition"); if (Def->getOperand(DefIdx).getSubReg()) - // If we are composing subreg, bails out. + // If we are composing subreg, bail out. // Same remark as getNextSourceFromRegSequence. // I.e., this may be turned into an assert. - return false; + return ValueTrackerResult(); if (!TII) // We could handle the REG_SEQUENCE here, but we do not want to // duplicate the code from the generic TII. - return false; + return ValueTrackerResult(); TargetInstrInfo::RegSubRegPair BaseReg; TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) - return false; + return ValueTrackerResult(); // We are looking at: // Def = INSERT_SUBREG v0, v1, sub1 @@ -1323,9 +1791,7 @@ bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, // #1 Check if the inserted register matches the required sub index. if (InsertedReg.SubIdx == DefSubReg) { - SrcReg = InsertedReg.Reg; - SrcSubReg = InsertedReg.SubReg; - return true; + return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg); } // #2 Otherwise, if the sub register we are looking for is not partial // defined by the inserted element, we can look through the main @@ -1333,10 +1799,10 @@ bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, const MachineOperand &MODef = Def->getOperand(DefIdx); // If the result register (Def) and the base register (v0) do not // have the same register class or if we have to compose - // subregisters, bails out. + // subregisters, bail out. if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || BaseReg.SubReg) - return false; + return ValueTrackerResult(); // Get the TRI and check if the inserted sub-register overlaps with the // sub-register we are tracking. @@ -1344,121 +1810,138 @@ bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, if (!TRI || (TRI->getSubRegIndexLaneMask(DefSubReg) & TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) - return false; + return ValueTrackerResult(); // At this point, the value is available in v0 via the same subreg // we used for Def. - SrcReg = BaseReg.Reg; - SrcSubReg = DefSubReg; - return true; + return ValueTrackerResult(BaseReg.Reg, DefSubReg); } -bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg, - unsigned &SrcSubReg) { +ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() { assert((Def->isExtractSubreg() || Def->isExtractSubregLike()) && "Invalid definition"); // We are looking at: // Def = EXTRACT_SUBREG v0, sub0 - // Bails if we have to compose sub registers. + // Bail if we have to compose sub registers. // Indeed, if DefSubReg != 0, we would have to compose it with sub0. if (DefSubReg) - return false; + return ValueTrackerResult(); if (!TII) // We could handle the EXTRACT_SUBREG here, but we do not want to // duplicate the code from the generic TII. - return false; + return ValueTrackerResult(); TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) - return false; + return ValueTrackerResult(); - // Bails if we have to compose sub registers. + // Bail if we have to compose sub registers. // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. if (ExtractSubregInputReg.SubReg) - return false; + return ValueTrackerResult(); // Otherwise, the value is available in the v0.sub0. - SrcReg = ExtractSubregInputReg.Reg; - SrcSubReg = ExtractSubregInputReg.SubIdx; - return true; + return ValueTrackerResult(ExtractSubregInputReg.Reg, + ExtractSubregInputReg.SubIdx); } -bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg, - unsigned &SrcSubReg) { +ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() { assert(Def->isSubregToReg() && "Invalid definition"); // We are looking at: // Def = SUBREG_TO_REG Imm, v0, sub0 - // Bails if we have to compose sub registers. + // Bail if we have to compose sub registers. // If DefSubReg != sub0, we would have to check that all the bits // we track are included in sub0 and if yes, we would have to // determine the right subreg in v0. if (DefSubReg != Def->getOperand(3).getImm()) - return false; - // Bails if we have to compose sub registers. + return ValueTrackerResult(); + // Bail if we have to compose sub registers. // Likewise, if v0.subreg != 0, we would have to compose it with sub0. if (Def->getOperand(2).getSubReg()) - return false; + return ValueTrackerResult(); - SrcReg = Def->getOperand(2).getReg(); - SrcSubReg = Def->getOperand(3).getImm(); - return true; + return ValueTrackerResult(Def->getOperand(2).getReg(), + Def->getOperand(3).getImm()); } -bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) { +/// \brief Explore each PHI incoming operand and return its sources +ValueTrackerResult ValueTracker::getNextSourceFromPHI() { + assert(Def->isPHI() && "Invalid definition"); + ValueTrackerResult Res; + + // If we look for a different subreg, bail as we do not support composing + // subregs yet. + if (Def->getOperand(0).getSubReg() != DefSubReg) + return ValueTrackerResult(); + + // Return all register sources for PHI instructions. + for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) { + auto &MO = Def->getOperand(i); + assert(MO.isReg() && "Invalid PHI instruction"); + Res.addSource(MO.getReg(), MO.getSubReg()); + } + + return Res; +} + +ValueTrackerResult ValueTracker::getNextSourceImpl() { assert(Def && "This method needs a valid definition"); assert( (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); if (Def->isCopy()) - return getNextSourceFromCopy(SrcReg, SrcSubReg); + return getNextSourceFromCopy(); if (Def->isBitcast()) - return getNextSourceFromBitcast(SrcReg, SrcSubReg); + return getNextSourceFromBitcast(); // All the remaining cases involve "complex" instructions. - // Bails if we did not ask for the advanced tracking. + // Bail if we did not ask for the advanced tracking. if (!UseAdvancedTracking) - return false; + return ValueTrackerResult(); if (Def->isRegSequence() || Def->isRegSequenceLike()) - return getNextSourceFromRegSequence(SrcReg, SrcSubReg); + return getNextSourceFromRegSequence(); if (Def->isInsertSubreg() || Def->isInsertSubregLike()) - return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg); + return getNextSourceFromInsertSubreg(); if (Def->isExtractSubreg() || Def->isExtractSubregLike()) - return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg); + return getNextSourceFromExtractSubreg(); if (Def->isSubregToReg()) - return getNextSourceFromSubregToReg(SrcReg, SrcSubReg); - return false; + return getNextSourceFromSubregToReg(); + if (Def->isPHI()) + return getNextSourceFromPHI(); + return ValueTrackerResult(); } -const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg, - unsigned &SrcSubReg) { +ValueTrackerResult ValueTracker::getNextSource() { // If we reach a point where we cannot move up in the use-def chain, // there is nothing we can get. if (!Def) - return nullptr; + return ValueTrackerResult(); - const MachineInstr *PrevDef = nullptr; - // Try to find the next source. - if (getNextSourceImpl(SrcReg, SrcSubReg)) { + ValueTrackerResult Res = getNextSourceImpl(); + if (Res.isValid()) { // Update definition, definition index, and subregister for the // next call of getNextSource. // Update the current register. - Reg = SrcReg; - // Update the return value before moving up in the use-def chain. - PrevDef = Def; + bool OneRegSrc = Res.getNumSources() == 1; + if (OneRegSrc) + Reg = Res.getSrcReg(0); + // Update the result before moving up in the use-def chain + // with the instruction containing the last found sources. + Res.setInst(Def); + // If we can still move up in the use-def chain, move to the next - // defintion. - if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { + // definition. + if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) { Def = MRI.getVRegDef(Reg); DefIdx = MRI.def_begin(Reg).getOperandNo(); - DefSubReg = SrcSubReg; - return PrevDef; + DefSubReg = Res.getSrcSubReg(0); + return Res; } } // If we end up here, this means we will not be able to find another source - // for the next iteration. - // Make sure any new call to getNextSource bails out early by cutting the - // use-def chain. + // for the next iteration. Make sure any new call to getNextSource bails out + // early by cutting the use-def chain. Def = nullptr; - return PrevDef; + return Res; } diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 6f76116da1eb..b95dffd05c46 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -87,7 +87,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -196,7 +196,7 @@ SchedulePostRATDList::SchedulePostRATDList( const RegisterClassInfo &RCI, TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl &CriticalPathRCs) - : ScheduleDAGInstrs(MF, &MLI, /*IsPostRA=*/true), AA(AA), EndIndex(0) { + : ScheduleDAGInstrs(MF, &MLI), AA(AA), EndIndex(0) { const InstrItineraryData *InstrItins = MF.getSubtarget().getInstrItineraryData(); @@ -267,7 +267,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { TII = Fn.getSubtarget().getInstrInfo(); MachineLoopInfo &MLI = getAnalysis(); - AliasAnalysis *AA = &getAnalysis(); + AliasAnalysis *AA = &getAnalysis().getAAResults(); TargetPassConfig *PassConfig = &getAnalysis(); RegClassInfo.runOnMachineFunction(Fn); @@ -302,8 +302,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { CriticalPathRCs); // Loop over all of the basic blocks - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) { + for (auto &MBB : Fn) { #ifndef NDEBUG // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod if (DebugDiv > 0) { @@ -311,25 +310,25 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { if (bbcnt++ % DebugDiv != DebugMod) continue; dbgs() << "*** DEBUG scheduling " << Fn.getName() - << ":BB#" << MBB->getNumber() << " ***\n"; + << ":BB#" << MBB.getNumber() << " ***\n"; } #endif // Initialize register live-range state for scheduling in this block. - Scheduler.startBlock(MBB); + Scheduler.startBlock(&MBB); // Schedule each sequence of instructions not interrupted by a label // or anything else that effectively needs to shut down scheduling. - MachineBasicBlock::iterator Current = MBB->end(); - unsigned Count = MBB->size(), CurrentCount = Count; - for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { + MachineBasicBlock::iterator Current = MBB.end(); + unsigned Count = MBB.size(), CurrentCount = Count; + for (MachineBasicBlock::iterator I = Current; I != MBB.begin();) { MachineInstr *MI = std::prev(I); --Count; // Calls are not scheduling boundaries before register allocation, but // post-ra we don't gain anything by scheduling across calls since we // don't need to worry about register pressure. - if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) { - Scheduler.enterRegion(MBB, I, Current, CurrentCount - Count); + if (MI->isCall() || TII->isSchedulingBoundary(MI, &MBB, Fn)) { + Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count); Scheduler.setEndIndex(CurrentCount); Scheduler.schedule(); Scheduler.exitRegion(); @@ -343,9 +342,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { Count -= MI->getBundleSize(); } assert(Count == 0 && "Instruction count mismatch!"); - assert((MBB->begin() == Current || CurrentCount != 0) && + assert((MBB.begin() == Current || CurrentCount != 0) && "Instruction count mismatch!"); - Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.enterRegion(&MBB, MBB.begin(), Current, CurrentCount); Scheduler.setEndIndex(CurrentCount); Scheduler.schedule(); Scheduler.exitRegion(); @@ -355,7 +354,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { Scheduler.finishBlock(); // Update register kills - Scheduler.fixupKills(MBB); + Scheduler.fixupKills(&MBB); } return true; @@ -400,8 +399,12 @@ void SchedulePostRATDList::schedule() { } DEBUG(dbgs() << "********** List Scheduling **********\n"); - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); + DEBUG( + for (const SUnit &SU : SUnits) { + SU.dumpAll(this); + dbgs() << '\n'; + } + ); AvailableQueue.initNodes(SUnits); ListScheduleTopDown(); diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 5f8194983484..d27ea2f51867 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -58,7 +58,7 @@ INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs", void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addPreserved(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -96,7 +96,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { // This is a physreg implicit-def. // Look for the first instruction to use or define an alias. - MachineBasicBlock::instr_iterator UserMI = MI; + MachineBasicBlock::instr_iterator UserMI = MI->getIterator(); MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end(); bool Found = false; for (++UserMI; UserMI != UserE; ++UserMI) { @@ -151,7 +151,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(), MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) if (MBBI->isImplicitDef()) - WorkList.insert(MBBI); + WorkList.insert(&*MBBI); if (WorkList.empty()) continue; diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 6ca69a124297..939c50027b02 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -71,8 +71,9 @@ private: // stack frame indexes. unsigned MinCSFrameIndex, MaxCSFrameIndex; - // Save and Restore blocks of the current function. - MachineBasicBlock *SaveBlock; + // Save and Restore blocks of the current function. Typically there is a + // single save block, unless Windows EH funclets are involved. + SmallVector SaveBlocks; SmallVector RestoreBlocks; // Flag to control whether to use the register scavenger to resolve @@ -91,9 +92,6 @@ private: int &SPAdj); void scavengeFrameVirtualRegs(MachineFunction &Fn); void insertPrologEpilogCode(MachineFunction &Fn); - - // Convenience for recognizing return blocks. - bool isReturnBlock(const MachineBasicBlock *MBB) const; }; } // namespace @@ -128,10 +126,6 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -bool PEI::isReturnBlock(const MachineBasicBlock* MBB) const { - return (MBB && !MBB->empty() && MBB->back().isReturn()); -} - /// Compute the set of return blocks void PEI::calculateSets(MachineFunction &Fn) { const MachineFrameInfo *MFI = Fn.getFrameInfo(); @@ -142,25 +136,25 @@ void PEI::calculateSets(MachineFunction &Fn) { // Use the points found by shrink-wrapping, if any. if (MFI->getSavePoint()) { - SaveBlock = MFI->getSavePoint(); + SaveBlocks.push_back(MFI->getSavePoint()); assert(MFI->getRestorePoint() && "Both restore and save must be set"); MachineBasicBlock *RestoreBlock = MFI->getRestorePoint(); // If RestoreBlock does not have any successor and is not a return block // then the end point is unreachable and we do not need to insert any // epilogue. - if (!RestoreBlock->succ_empty() || isReturnBlock(RestoreBlock)) + if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) RestoreBlocks.push_back(RestoreBlock); return; } // Save refs to entry and return blocks. - SaveBlock = Fn.begin(); - for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); - MBB != E; ++MBB) - if (isReturnBlock(MBB)) - RestoreBlocks.push_back(MBB); - - return; + SaveBlocks.push_back(&Fn.front()); + for (MachineBasicBlock &MBB : Fn) { + if (MBB.isEHFuncletEntry()) + SaveBlocks.push_back(&MBB); + if (MBB.isReturnBlock()) + RestoreBlocks.push_back(&MBB); + } } /// StackObjSet - A set of stack object indexes @@ -195,7 +189,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // place all spills in the entry block, all restores in return blocks. calculateSets(Fn); - // Add the code to save and restore the callee saved registers + // Add the code to save and restore the callee saved registers. if (!F->hasFnAttribute(Attribute::Naked)) insertCSRSpillsAndRestores(Fn); @@ -237,6 +231,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { } delete RS; + SaveBlocks.clear(); RestoreBlocks.clear(); return true; } @@ -407,7 +402,7 @@ static void updateLiveness(MachineFunction &MF) { const MachineBasicBlock *CurBB = WorkList.pop_back_val(); // By construction, the region that is after the save point is // dominated by the Save and post-dominated by the Restore. - if (CurBB == Save) + if (CurBB == Save && Save != Restore) continue; // Enqueue all the successors not already visited. // Those are by construction either before Save or after Restore. @@ -419,10 +414,13 @@ static void updateLiveness(MachineFunction &MF) { const std::vector &CSI = MFI->getCalleeSavedInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - for (MachineBasicBlock *MBB : Visited) + for (MachineBasicBlock *MBB : Visited) { + MCPhysReg Reg = CSI[i].getReg(); // Add the callee-saved register as live-in. // It's killed at the spill. - MBB->addLiveIn(CSI[i].getReg()); + if (!MBB->isLiveIn(Reg)) + MBB->addLiveIn(Reg); + } } } @@ -446,18 +444,20 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { MachineBasicBlock::iterator I; // Spill using target interface. - I = SaveBlock->begin(); - if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) { - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - // Insert the spill to the stack frame. - unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(), - RC, TRI); + for (MachineBasicBlock *SaveBlock : SaveBlocks) { + I = SaveBlock->begin(); + if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Insert the spill to the stack frame. + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(), + RC, TRI); + } } + // Update the live-in information of all the blocks up to the save point. + updateLiveness(Fn); } - // Update the live-in information of all the blocks up to the save point. - updateLiveness(Fn); // Restore using target interface. for (MachineBasicBlock *MBB : RestoreBlocks) { @@ -500,7 +500,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { static inline void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, bool StackGrowsDown, int64_t &Offset, - unsigned &MaxAlign) { + unsigned &MaxAlign, unsigned Skew) { // If the stack grows down, add the object size to find the lowest address. if (StackGrowsDown) Offset += MFI->getObjectSize(FrameIdx); @@ -512,7 +512,7 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, MaxAlign = std::max(MaxAlign, Align); // Adjust to alignment boundary. - Offset = (Offset + Align - 1) / Align * Align; + Offset = RoundUpToAlignment(Offset, Align, Skew); if (StackGrowsDown) { DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n"); @@ -530,12 +530,12 @@ static void AssignProtectedObjSet(const StackObjSet &UnassignedObjs, SmallSet &ProtectedObjs, MachineFrameInfo *MFI, bool StackGrowsDown, - int64_t &Offset, unsigned &MaxAlign) { + int64_t &Offset, unsigned &MaxAlign, unsigned Skew) { for (StackObjSet::const_iterator I = UnassignedObjs.begin(), E = UnassignedObjs.end(); I != E; ++I) { int i = *I; - AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew); ProtectedObjs.insert(i); } } @@ -563,6 +563,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { && "Local area offset should be in direction of stack growth"); int64_t Offset = LocalAreaOffset; + // Skew to be applied to alignment. + unsigned Skew = TFI.getStackAlignmentSkew(Fn); + // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. // We currently don't support filling in holes in between fixed sized @@ -593,7 +596,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary - Offset = RoundUpToAlignment(Offset, Align); + Offset = RoundUpToAlignment(Offset, Align, Skew); MFI->setObjectOffset(i, -Offset); // Set the computed offset } @@ -602,7 +605,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { for (int i = MaxCSFI; i >= MinCSFI ; --i) { unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary - Offset = RoundUpToAlignment(Offset, Align); + Offset = RoundUpToAlignment(Offset, Align, Skew); MFI->setObjectOffset(i, Offset); Offset += MFI->getObjectSize(i); @@ -624,7 +627,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { RS->getScavengingFrameIndices(SFIs); for (SmallVectorImpl::iterator I = SFIs.begin(), IE = SFIs.end(); I != IE; ++I) - AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); } // FIXME: Once this is working, then enable flag will change to a target @@ -635,7 +638,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { unsigned Align = MFI->getLocalFrameMaxAlign(); // Adjust to alignment boundary. - Offset = RoundUpToAlignment(Offset, Align); + Offset = RoundUpToAlignment(Offset, Align, Skew); DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); @@ -662,7 +665,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { StackObjSet AddrOfObjs; AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, - Offset, MaxAlign); + Offset, MaxAlign, Skew); // Assign large stack objects first. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { @@ -695,11 +698,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { } AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, - Offset, MaxAlign); + Offset, MaxAlign, Skew); AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, - Offset, MaxAlign); + Offset, MaxAlign, Skew); AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, - Offset, MaxAlign); + Offset, MaxAlign, Skew); } // Then assign frame offsets to stack objects that are not used to spill @@ -719,7 +722,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { if (ProtectedObjs.count(i)) continue; - AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew); } // Make sure the special register scavenging spill slot is closest to the @@ -729,7 +732,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { RS->getScavengingFrameIndices(SFIs); for (SmallVectorImpl::iterator I = SFIs.begin(), IE = SFIs.end(); I != IE; ++I) - AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); } if (!TFI.targetHandlesStackFrameRounding()) { @@ -754,7 +757,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); - Offset = RoundUpToAlignment(Offset, StackAlign); + Offset = RoundUpToAlignment(Offset, StackAlign, Skew); } // Update frame info to pretend that this is part of the stack... @@ -771,18 +774,24 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); // Add prologue to the function... - TFI.emitPrologue(Fn, *SaveBlock); + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.emitPrologue(Fn, *SaveBlock); // Add epilogue to restore the callee-save registers in each exiting block. for (MachineBasicBlock *RestoreBlock : RestoreBlocks) TFI.emitEpilogue(Fn, *RestoreBlock); + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.inlineStackProbe(Fn, *SaveBlock); + // Emit additional code that is required to support segmented stacks, if // we've been asked for it. This, when linked with a runtime with support // for segmented stacks (libgcc is one), will result in allocating stack // space in small chunks instead of one large contiguous block. - if (Fn.shouldSplitStack()) - TFI.adjustForSegmentedStacks(Fn, *SaveBlock); + if (Fn.shouldSplitStack()) { + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.adjustForSegmentedStacks(Fn, *SaveBlock); + } // Emit additional code that is required to explicitly handle the stack in // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The @@ -790,7 +799,8 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // different conditional check and another BIF for allocating more stack // space. if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE) - TFI.adjustForHiPEPrologue(Fn, *SaveBlock); + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.adjustForHiPEPrologue(Fn, *SaveBlock); } /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical @@ -800,25 +810,6 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); if (!TFI.needsFrameIndexResolution(Fn)) return; - MachineModuleInfo &MMI = Fn.getMMI(); - const Function *F = Fn.getFunction(); - const Function *ParentF = MMI.getWinEHParent(F); - unsigned FrameReg; - if (F == ParentF) { - WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction()); - // FIXME: This should be unconditional but we have bugs in the preparation - // pass. - if (FuncInfo.UnwindHelpFrameIdx != INT_MAX) - FuncInfo.UnwindHelpFrameOffset = TFI.getFrameIndexReferenceFromSP( - Fn, FuncInfo.UnwindHelpFrameIdx, FrameReg); - } else if (MMI.hasWinEHFuncInfo(F)) { - WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction()); - auto I = FuncInfo.CatchHandlerParentFrameObjIdx.find(F); - if (I != FuncInfo.CatchHandlerParentFrameObjIdx.end()) - FuncInfo.CatchHandlerParentFrameObjOffset[F] = - TFI.getFrameIndexReferenceFromSP(Fn, I->second, FrameReg); - } - // Store SPAdj at exit of a basic block. SmallVector SPState; SPState.resize(Fn.getNumBlockIDs()); @@ -841,12 +832,12 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { } // Handle the unreachable blocks. - for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { - if (Reachable.count(BB)) + for (auto &BB : Fn) { + if (Reachable.count(&BB)) // Already handled in DFS traversal. continue; int SPAdj = 0; - replaceFrameIndices(BB, Fn, SPAdj); + replaceFrameIndices(&BB, Fn, SPAdj); } } @@ -889,11 +880,11 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, if (!MI->getOperand(i).isFI()) continue; - // Frame indicies in debug values are encoded in a target independent + // Frame indices in debug values are encoded in a target independent // way with simply the frame index and offset rather than any // target-specific addressing mode. if (MI->isDebugValue()) { - assert(i == 0 && "Frame indicies can only appear as the first " + assert(i == 0 && "Frame indices can only appear as the first " "operand of a DBG_VALUE machine instruction"); unsigned Reg; MachineOperand &Offset = MI->getOperand(1); @@ -979,7 +970,7 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Run through the instructions and find any virtual registers. for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { - RS->enterBasicBlock(BB); + RS->enterBasicBlock(&*BB); int SPAdj = 0; @@ -1026,12 +1017,8 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Replace this reference to the virtual register with the // scratch register. assert (ScratchReg && "Missing scratch register!"); - MachineRegisterInfo &MRI = Fn.getRegInfo(); Fn.getRegInfo().replaceRegWith(Reg, ScratchReg); - // Make sure MRI now accounts this register as used. - MRI.setPhysRegUsed(ScratchReg); - // Because this instruction was processed by the RS before this // register was allocated, make sure that the RS now records the // register as being used. @@ -1044,7 +1031,7 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // problem because we need the spill code before I: Move I to just // prior to J. if (I != std::prev(J)) { - BB->splice(J, BB, I); + BB->splice(J, &*BB, I); // Before we move I, we need to prepare the RS to visit I again. // Specifically, RS will assert if it sees uses of registers that diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index b1c341d3a681..1f46417e61e7 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/DerivedTypes.h" @@ -22,87 +23,38 @@ #include using namespace llvm; -namespace { -struct PSVGlobalsTy { - // PseudoSourceValues are immutable so don't need locking. - const PseudoSourceValue PSVs[4]; - sys::Mutex Lock; // Guards FSValues, but not the values inside it. - std::map FSValues; - - PSVGlobalsTy() : PSVs() {} - ~PSVGlobalsTy() { - for (std::map::iterator - I = FSValues.begin(), E = FSValues.end(); I != E; ++I) { - delete I->second; - } - } -}; - -static ManagedStatic PSVGlobals; - -} // anonymous namespace - -const PseudoSourceValue *PseudoSourceValue::getStack() -{ return &PSVGlobals->PSVs[0]; } -const PseudoSourceValue *PseudoSourceValue::getGOT() -{ return &PSVGlobals->PSVs[1]; } -const PseudoSourceValue *PseudoSourceValue::getJumpTable() -{ return &PSVGlobals->PSVs[2]; } -const PseudoSourceValue *PseudoSourceValue::getConstantPool() -{ return &PSVGlobals->PSVs[3]; } - static const char *const PSVNames[] = { - "Stack", - "GOT", - "JumpTable", - "ConstantPool" -}; + "Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack", + "GlobalValueCallEntry", "ExternalSymbolCallEntry"}; -PseudoSourceValue::PseudoSourceValue(bool isFixed) : isFixed(isFixed) {} +PseudoSourceValue::PseudoSourceValue(PSVKind Kind) : Kind(Kind) {} PseudoSourceValue::~PseudoSourceValue() {} void PseudoSourceValue::printCustom(raw_ostream &O) const { - O << PSVNames[this - PSVGlobals->PSVs]; -} - -const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) { - PSVGlobalsTy &PG = *PSVGlobals; - sys::ScopedLock locked(PG.Lock); - const PseudoSourceValue *&V = PG.FSValues[FI]; - if (!V) - V = new FixedStackPseudoSourceValue(FI); - return V; + O << PSVNames[Kind]; } bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const { - if (this == getStack()) + if (isStack()) return false; - if (this == getGOT() || - this == getConstantPool() || - this == getJumpTable()) + if (isGOT() || isConstantPool() || isJumpTable()) return true; llvm_unreachable("Unknown PseudoSourceValue!"); } -bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const { - if (this == getStack() || - this == getGOT() || - this == getConstantPool() || - this == getJumpTable()) +bool PseudoSourceValue::isAliased(const MachineFrameInfo *) const { + if (isStack() || isGOT() || isConstantPool() || isJumpTable()) return false; llvm_unreachable("Unknown PseudoSourceValue!"); } -bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const { - if (this == getGOT() || - this == getConstantPool() || - this == getJumpTable()) - return false; - return true; +bool PseudoSourceValue::mayAlias(const MachineFrameInfo *) const { + return !(isGOT() || isConstantPool() || isJumpTable()); } -bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{ +bool FixedStackPseudoSourceValue::isConstant( + const MachineFrameInfo *MFI) const { return MFI && MFI->isImmutableObjectIndex(FI); } @@ -122,3 +74,69 @@ bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const { void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const { OS << "FixedStack" << FI; } + +CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(PSVKind Kind) + : PseudoSourceValue(Kind) {} + +bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const { + return false; +} + +bool CallEntryPseudoSourceValue::isAliased(const MachineFrameInfo *) const { + return false; +} + +bool CallEntryPseudoSourceValue::mayAlias(const MachineFrameInfo *) const { + return false; +} + +GlobalValuePseudoSourceValue::GlobalValuePseudoSourceValue( + const GlobalValue *GV) + : CallEntryPseudoSourceValue(GlobalValueCallEntry), GV(GV) {} + +ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue(const char *ES) + : CallEntryPseudoSourceValue(ExternalSymbolCallEntry), ES(ES) {} + +PseudoSourceValueManager::PseudoSourceValueManager() + : StackPSV(PseudoSourceValue::Stack), GOTPSV(PseudoSourceValue::GOT), + JumpTablePSV(PseudoSourceValue::JumpTable), + ConstantPoolPSV(PseudoSourceValue::ConstantPool) {} + +const PseudoSourceValue *PseudoSourceValueManager::getStack() { + return &StackPSV; +} + +const PseudoSourceValue *PseudoSourceValueManager::getGOT() { return &GOTPSV; } + +const PseudoSourceValue *PseudoSourceValueManager::getConstantPool() { + return &ConstantPoolPSV; +} + +const PseudoSourceValue *PseudoSourceValueManager::getJumpTable() { + return &JumpTablePSV; +} + +const PseudoSourceValue *PseudoSourceValueManager::getFixedStack(int FI) { + std::unique_ptr &V = FSValues[FI]; + if (!V) + V = llvm::make_unique(FI); + return V.get(); +} + +const PseudoSourceValue * +PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) { + std::unique_ptr &E = + GlobalCallEntries[GV]; + if (!E) + E = llvm::make_unique(GV); + return E.get(); +} + +const PseudoSourceValue * +PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) { + std::unique_ptr &E = + ExternalCallEntries[ES]; + if (!E) + E = llvm::make_unique(ES); + return E.get(); +} diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 0090332a8123..cfe367d5115c 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -133,8 +133,8 @@ RABasic::RABasic(): MachineFunctionPass(ID) { void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addPreserved(); AU.addPreserved(); @@ -223,7 +223,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, SmallVector PhysRegSpillCands; // Check for an available register in this class. - AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); while (unsigned PhysReg = Order.next()) { // Check for interference in PhysReg switch (Matrix->checkInterference(VirtReg, PhysReg)) { @@ -276,7 +276,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { getAnalysis(), getAnalysis()); - calculateSpillWeightsAndHints(*LIS, *MF, + calculateSpillWeightsAndHints(*LIS, *MF, VRM, getAnalysis(), getAnalysis()); diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index fd3d4d78968b..f4c076fea0e7 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -799,10 +799,9 @@ void RAFast::AllocateBasicBlock() { MachineBasicBlock::iterator MII = MBB->begin(); // Add live-in registers as live. - for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), - E = MBB->livein_end(); I != E; ++I) - if (MRI->isAllocatable(*I)) - definePhysReg(MII, *I, regReserved); + for (const auto &LI : MBB->liveins()) + if (MRI->isAllocatable(LI.PhysReg)) + definePhysReg(MII, LI.PhysReg, regReserved); SmallVector VirtDead; SmallVector Coalesced; @@ -986,10 +985,6 @@ void RAFast::AllocateBasicBlock() { } } - for (UsedInInstrSet::iterator - I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) - MRI->setRegUnitUsed(*I); - // Track registers defined by instruction - early clobbers and tied uses at // this point. UsedInInstr.clear(); @@ -1050,10 +1045,6 @@ void RAFast::AllocateBasicBlock() { killVirtReg(VirtDead[i]); VirtDead.clear(); - for (UsedInInstrSet::iterator - I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) - MRI->setRegUnitUsed(*I); - if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { DEBUG(dbgs() << "-- coalescing: " << *MI); Coalesced.push_back(MI); @@ -1103,12 +1094,6 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { AllocateBasicBlock(); } - // Add the clobber lists for all the instructions we skipped earlier. - for (const MCInstrDesc *Desc : SkippedInstrs) - if (const uint16_t *Defs = Desc->getImplicitDefs()) - while (*Defs) - MRI->setPhysRegUsed(*Defs++); - // All machine operands and other references to virtual registers have been // replaced. Remove the virtual registers. MRI->clearVirtRegs(); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 7ebcf7f54856..945cb9e2c993 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -86,6 +86,14 @@ static cl::opt EnableLocalReassignment( "may be compile time intensive"), cl::init(false)); +static cl::opt EnableDeferredSpilling( + "enable-deferred-spilling", cl::Hidden, + cl::desc("Instead of spilling a variable right away, defer the actual " + "code insertion to the end of the allocation. That way the " + "allocator might still find a suitable coloring for this " + "variable because of other evicted variables."), + cl::init(false)); + // FIXME: Find a good default for this flag and remove the flag. static cl::opt CSRFirstTimeCost("regalloc-csr-first-time-cost", @@ -157,6 +165,11 @@ class RAGreedy : public MachineFunctionPass, /// Live range will be spilled. No more splitting will be attempted. RS_Spill, + + /// Live range is in memory. Because of other evictions, it might get moved + /// in a register in the end. + RS_Memory, + /// There is nothing more we can do to this live range. Abort compilation /// if it can't be assigned. RS_Done @@ -414,6 +427,7 @@ const char *const RAGreedy::StageName[] = { "RS_Split", "RS_Split2", "RS_Spill", + "RS_Memory", "RS_Done" }; #endif @@ -447,8 +461,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -536,6 +550,13 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Unsplit ranges that couldn't be allocated immediately are deferred until // everything else has been allocated. Prio = Size; + } else if (ExtraRegInfo[Reg].Stage == RS_Memory) { + // Memory operand should be considered last. + // Change the priority such that Memory operand are assigned in + // the reverse order that they came in. + // TODO: Make this a member variable and probably do something about hints. + static unsigned MemOp = 0; + Prio = MemOp++; } else { // Giant live ranges fall back to the global assignment heuristic, which // prevents excessive spilling in pathological cases. @@ -637,7 +658,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, //===----------------------------------------------------------------------===// unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { - AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); unsigned PhysReg; while ((PhysReg = Order.next())) { if (PhysReg == PrevReg) @@ -2450,7 +2471,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, unsigned Depth) { unsigned CostPerUseLimit = ~0u; // First try assigning a free register. - AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical @@ -2512,13 +2533,23 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, return PhysReg; // Finally spill VirtReg itself. - NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); - LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); - spiller().spill(LRE); - setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); + if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) { + // TODO: This is experimental and in particular, we do not model + // the live range splitting done by spilling correctly. + // We would need a deep integration with the spiller to do the + // right thing here. Anyway, that is still good for early testing. + setStage(VirtReg, RS_Memory); + DEBUG(dbgs() << "Do as if this register is in memory\n"); + NewVRegs.push_back(VirtReg.reg); + } else { + NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); + LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + spiller().spill(LRE); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); - if (VerifyEnabled) - MF->verify(this, "After spilling"); + if (VerifyEnabled) + MF->verify(this, "After spilling"); + } // The live virtual register requesting allocation was spilled, so tell // the caller not to allocate anything during this round. @@ -2555,7 +2586,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { initializeCSRCost(); - calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI); + calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI); DEBUG(LIS->dump()); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index eeff73d0f2a0..fd28b05ed80a 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -47,6 +47,7 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -497,8 +498,8 @@ void PBQPRAConstraintList::anchor() {} void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.setPreservesCFG(); - au.addRequired(); - au.addPreserved(); + au.addRequired(); + au.addPreserved(); au.addRequired(); au.addPreserved(); au.addRequired(); @@ -724,11 +725,11 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { MachineBlockFrequencyInfo &MBFI = getAnalysis(); - calculateSpillWeightsAndHints(LIS, MF, getAnalysis(), MBFI, - normalizePBQPSpillWeight); - VirtRegMap &VRM = getAnalysis(); + calculateSpillWeightsAndHints(LIS, MF, &VRM, getAnalysis(), + MBFI, normalizePBQPSpillWeight); + std::unique_ptr VRegSpiller(createInlineSpiller(*this, MF, VRM)); MF.getRegInfo().freezeReservedRegs(MF); @@ -805,33 +806,17 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { return true; } -namespace { -// A helper class for printing node and register info in a consistent way -class PrintNodeInfo { -public: - typedef PBQP::RegAlloc::PBQPRAGraph Graph; - typedef PBQP::RegAlloc::PBQPRAGraph::NodeId NodeId; - - PrintNodeInfo(NodeId NId, const Graph &G) : G(G), NId(NId) {} - - void print(raw_ostream &OS) const { +/// Create Printable object for node and register info. +static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId, + const PBQP::RegAlloc::PBQPRAGraph &G) { + return Printable([NId, &G](raw_ostream &OS) { const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); unsigned VReg = G.getNodeMetadata(NId).getVReg(); const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg)); OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')'; - } - -private: - const Graph &G; - NodeId NId; -}; - -inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeInfo &PR) { - PR.print(OS); - return OS; + }); } -} // anonymous namespace void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const { for (auto NId : nodeIds()) { diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index c911b9b47ea2..e7b32179bde5 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -32,7 +32,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -93,7 +92,7 @@ namespace { /// A LaneMask to remember on which subregister live ranges we need to call /// shrinkToUses() later. - unsigned ShrinkMask; + LaneBitmask ShrinkMask; /// True if the main range of the currently coalesced intervals should be /// checked for smaller live intervals. @@ -164,15 +163,13 @@ namespace { /// LaneMask are split as necessary. @p LaneMask are the lanes that /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange /// lanemasks already adjusted to the coalesced register. - /// @returns false if live range conflicts couldn't get resolved. - bool mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, - unsigned LaneMask, CoalescerPair &CP); + void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, + LaneBitmask LaneMask, CoalescerPair &CP); /// Join the liveranges of two subregisters. Joins @p RRange into /// @p LRange, @p RRange may be invalid afterwards. - /// @returns false if live range conflicts couldn't get resolved. - bool joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, - unsigned LaneMask, const CoalescerPair &CP); + void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, + LaneBitmask LaneMask, const CoalescerPair &CP); /// We found a non-trivially-coalescable copy. If the source value number is /// defined by a copy from the destination reg see if we can merge these two @@ -224,30 +221,17 @@ namespace { /// Dst, we can drop \p Copy. bool applyTerminalRule(const MachineInstr &Copy) const; - /// Check whether or not \p LI is composed by multiple connected - /// components and if that is the case, fix that. - void splitNewRanges(LiveInterval *LI) { - ConnectedVNInfoEqClasses ConEQ(*LIS); - unsigned NumComps = ConEQ.Classify(LI); - if (NumComps <= 1) - return; - SmallVector NewComps(1, LI); - for (unsigned i = 1; i != NumComps; ++i) { - unsigned VReg = MRI->createVirtualRegister(MRI->getRegClass(LI->reg)); - NewComps.push_back(&LIS->createEmptyInterval(VReg)); - } - - ConEQ.Distribute(&NewComps[0], *MRI); - } - /// Wrapper method for \see LiveIntervals::shrinkToUses. /// This method does the proper fixing of the live-ranges when the afore /// mentioned method returns true. void shrinkToUses(LiveInterval *LI, SmallVectorImpl *Dead = nullptr) { - if (LIS->shrinkToUses(LI, Dead)) - // We may have created multiple connected components, split them. - splitNewRanges(LI); + if (LIS->shrinkToUses(LI, Dead)) { + /// Check whether or not \p LI is composed by multiple connected + /// components and if that is the case, fix that. + SmallVector SplitLIs; + LIS->splitSeparateComponents(*LI, SplitLIs); + } } public: @@ -275,7 +259,7 @@ INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", false, false) @@ -453,7 +437,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addPreserved(); @@ -679,14 +663,18 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, unsigned UseOpIdx; if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) return false; - unsigned Op1, Op2, NewDstIdx; - if (!TII->findCommutedOpIndices(DefMI, Op1, Op2)) - return false; - if (Op1 == UseOpIdx) - NewDstIdx = Op2; - else if (Op2 == UseOpIdx) - NewDstIdx = Op1; - else + + // FIXME: The code below tries to commute 'UseOpIdx' operand with some other + // commutable operand which is expressed by 'CommuteAnyOperandIndex'value + // passed to the method. That _other_ operand is chosen by + // the findCommutedOpIndices() method. + // + // That is obviously an area for improvement in case of instructions having + // more than 2 operands. For example, if some instruction has 3 commutable + // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3, + // op#2<->op#3) of commute transformation should be considered/tried here. + unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex; + if (!TII->findCommutedOpIndices(DefMI, UseOpIdx, NewDstIdx)) return false; MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); @@ -719,7 +707,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // At this point we have decided that it is legal to do this // transformation. Start by commuting the instruction. MachineBasicBlock *MBB = DefMI->getParent(); - MachineInstr *NewMI = TII->commuteInstruction(DefMI); + MachineInstr *NewMI = + TII->commuteInstruction(DefMI, false, UseOpIdx, NewDstIdx); if (!NewMI) return false; if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && @@ -804,7 +793,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); if (IntB.hasSubRanges()) { if (!IntA.hasSubRanges()) { - unsigned Mask = MRI->getMaxLaneMaskForVReg(IntA.reg); + LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg); IntA.createSubRangeFrom(Allocator, Mask, IntA); } SlotIndex AIdx = CopyIdx.getRegSlot(true); @@ -812,20 +801,21 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, VNInfo *ASubValNo = SA.getVNInfoAt(AIdx); assert(ASubValNo != nullptr); - unsigned AMask = SA.LaneMask; + LaneBitmask AMask = SA.LaneMask; for (LiveInterval::SubRange &SB : IntB.subranges()) { - unsigned BMask = SB.LaneMask; - unsigned Common = BMask & AMask; + LaneBitmask BMask = SB.LaneMask; + LaneBitmask Common = BMask & AMask; if (Common == 0) continue; - DEBUG( - dbgs() << format("\t\tCopy+Merge %04X into %04X\n", BMask, Common)); - unsigned BRest = BMask & ~AMask; + DEBUG( dbgs() << "\t\tCopy_Merge " << PrintLaneMask(BMask) + << " into " << PrintLaneMask(Common) << '\n'); + LaneBitmask BRest = BMask & ~AMask; LiveInterval::SubRange *CommonRange; if (BRest != 0) { SB.LaneMask = BRest; - DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", BRest)); + DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(BRest) + << '\n'); // Duplicate SubRange for newly merged common stuff. CommonRange = IntB.createSubRangeFrom(Allocator, Common, SB); } else { @@ -842,7 +832,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, AMask &= ~BMask; } if (AMask != 0) { - DEBUG(dbgs() << format("\t\tNew Lane %04X\n", AMask)); + DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(AMask) << '\n'); LiveRange *NewRange = IntB.createSubRange(Allocator, AMask); VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator); addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo); @@ -1107,7 +1097,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { const LiveInterval &SrcLI = LIS->getInterval(SrcReg); // CopyMI is undef iff SrcReg is not live before the instruction. if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) { - unsigned SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx); + LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx); for (const LiveInterval::SubRange &SR : SrcLI.subranges()) { if ((SR.LaneMask & SrcMask) == 0) continue; @@ -1128,7 +1118,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { DstLI.MergeValueNumberInto(VNI, PrevVNI); // The affected subregister segments can be removed. - unsigned DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx); + LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx); for (LiveInterval::SubRange &SR : DstLI.subranges()) { if ((SR.LaneMask & DstMask) == 0) continue; @@ -1147,7 +1137,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { continue; const MachineInstr &MI = *MO.getParent(); SlotIndex UseIdx = LIS->getInstructionIndex(&MI); - unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); bool isLive; if (UseMask != ~0u && DstLI.hasSubRanges()) { isLive = false; @@ -1213,10 +1203,10 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) { if (!DstInt->hasSubRanges()) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); - unsigned Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg); + LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg); DstInt->createSubRangeFrom(Allocator, Mask, *DstInt); } - unsigned Mask = TRI->getSubRegIndexLaneMask(SubIdx); + LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubIdx); bool IsUndef = true; SlotIndex MIIdx = UseMI->isDebugValue() ? LIS->getSlotIndexes()->getIndexBefore(UseMI) @@ -1445,8 +1435,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { for (LiveInterval::SubRange &S : LI.subranges()) { if ((S.LaneMask & ShrinkMask) == 0) continue; - DEBUG(dbgs() << "Shrink LaneUses (Lane " - << format("%04X", S.LaneMask) << ")\n"); + DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) + << ")\n"); LIS->shrinkToUses(S, LI.reg); } LI.removeEmptySubRanges(); @@ -1644,7 +1634,7 @@ class JoinVals { const unsigned SubIdx; /// The LaneMask that this liverange will occupy the coalesced register. May /// be smaller than the lanemask produced by SubIdx when merging subranges. - const unsigned LaneMask; + const LaneBitmask LaneMask; /// This is true when joining sub register ranges, false when joining main /// ranges. @@ -1699,11 +1689,11 @@ class JoinVals { ConflictResolution Resolution; /// Lanes written by this def, 0 for unanalyzed values. - unsigned WriteLanes; + LaneBitmask WriteLanes; /// Lanes with defined values in this register. Other lanes are undef and /// safe to clobber. - unsigned ValidLanes; + LaneBitmask ValidLanes; /// Value in LI being redefined by this def. VNInfo *RedefVNI; @@ -1744,7 +1734,7 @@ class JoinVals { /// Compute the bitmask of lanes actually written by DefMI. /// Set Redef if there are any partial register definitions that depend on the /// previous value of the register. - unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; + LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; /// Find the ultimate value that VNI was copied from. std::pair followCopyChain(const VNInfo *VNI) const; @@ -1780,12 +1770,12 @@ class JoinVals { /// entry to TaintedVals. /// /// Returns false if the tainted lanes extend beyond the basic block. - bool taintExtent(unsigned, unsigned, JoinVals&, - SmallVectorImpl >&); + bool taintExtent(unsigned, LaneBitmask, JoinVals&, + SmallVectorImpl >&); /// Return true if MI uses any of the given Lanes from Reg. /// This does not include partial redefinitions of Reg. - bool usesLanes(const MachineInstr *MI, unsigned, unsigned, unsigned) const; + bool usesLanes(const MachineInstr *MI, unsigned, unsigned, LaneBitmask) const; /// Determine if ValNo is a copy of a value number in LR or Other.LR that will /// be pruned: @@ -1796,7 +1786,7 @@ class JoinVals { bool isPrunedValue(unsigned ValNo, JoinVals &Other); public: - JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, unsigned LaneMask, + JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask, SmallVectorImpl &newVNInfo, const CoalescerPair &cp, LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin, bool TrackSubRegLiveness) @@ -1822,8 +1812,8 @@ public: /// Removes subranges starting at copies that get removed. This sometimes /// happens when undefined subranges are copied around. These ranges contain - /// no usefull information and can be removed. - void pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask); + /// no useful information and can be removed. + void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask); /// Erase any machine instructions that have been coalesced away. /// Add erased instructions to ErasedInstrs. @@ -1840,9 +1830,9 @@ public: }; } // end anonymous namespace -unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) +LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const { - unsigned L = 0; + LaneBitmask L = 0; for (const MachineOperand &MO : DefMI->operands()) { if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef()) continue; @@ -1879,7 +1869,7 @@ std::pair JoinVals::followCopyChain( ValueIn = nullptr; for (const LiveInterval::SubRange &S : LI.subranges()) { // Transform lanemask to a mask in the joined live interval. - unsigned SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask); + LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask); if ((SMask & LaneMask) == 0) continue; LiveQueryResult LRQ = S.Query(Def); @@ -1928,7 +1918,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { const MachineInstr *DefMI = nullptr; if (VNI->isPHIDef()) { // Conservatively assume that all lanes in a PHI are valid. - unsigned Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx); + LaneBitmask Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx); V.ValidLanes = V.WriteLanes = Lanes; } else { DefMI = Indexes->getInstructionFromIndex(VNI->def); @@ -2190,8 +2180,8 @@ bool JoinVals::mapValues(JoinVals &Other) { } bool JoinVals:: -taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, - SmallVectorImpl > &TaintExtent) { +taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, + SmallVectorImpl > &TaintExtent) { VNInfo *VNI = LR.getValNumInfo(ValNo); MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); @@ -2230,7 +2220,7 @@ taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, } bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx, - unsigned Lanes) const { + LaneBitmask Lanes) const { if (MI->isDebugValue()) return false; for (const MachineOperand &MO : MI->operands()) { @@ -2264,8 +2254,8 @@ bool JoinVals::resolveConflicts(JoinVals &Other) { // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the // join, those lanes will be tainted with a wrong value. Get the extent of // the tainted lanes. - unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes; - SmallVector, 8> TaintExtent; + LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes; + SmallVector, 8> TaintExtent; if (!taintExtent(i, TaintedLanes, Other, TaintExtent)) // Tainted lanes would extend beyond the basic block. return false; @@ -2384,7 +2374,7 @@ void JoinVals::pruneValues(JoinVals &Other, } } -void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask) +void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) { // Look for values being erased. bool DidPrune = false; @@ -2401,7 +2391,7 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask) // copied and we must remove that subrange value as well. VNInfo *ValueOut = Q.valueOutOrDead(); if (ValueOut != nullptr && Q.valueIn() == nullptr) { - DEBUG(dbgs() << "\t\tPrune sublane " << format("%04X", S.LaneMask) + DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask) << " at " << Def << "\n"); LIS->pruneValue(S, Def, nullptr); DidPrune = true; @@ -2410,10 +2400,10 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask) continue; } // If a subrange ends at the copy, then a value was copied but only - // partially used later. Shrink the subregister range apropriately. + // partially used later. Shrink the subregister range appropriately. if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) { - DEBUG(dbgs() << "\t\tDead uses at sublane " - << format("%04X", S.LaneMask) << " at " << Def << "\n"); + DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S.LaneMask) + << " at " << Def << "\n"); ShrinkMask |= S.LaneMask; } } @@ -2477,8 +2467,8 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl &ErasedInstrs, } } -bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, - unsigned LaneMask, +void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, + LaneBitmask LaneMask, const CoalescerPair &CP) { SmallVector NewVNInfo; JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask, @@ -2492,13 +2482,15 @@ bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, // ranges get mapped to the "overflow" lane mask bit which creates unexpected // interferences. if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) { - DEBUG(dbgs() << "*** Couldn't join subrange!\n"); - return false; + // We already determined that it is legal to merge the intervals, so this + // should never fail. + llvm_unreachable("*** Couldn't join subrange!\n"); } if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) { - DEBUG(dbgs() << "*** Couldn't join subrange!\n"); - return false; + // We already determined that it is legal to merge the intervals, so this + // should never fail. + llvm_unreachable("*** Couldn't join subrange!\n"); } // The merging algorithm in LiveInterval::join() can't handle conflicting @@ -2521,36 +2513,37 @@ bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n"); if (EndPoints.empty()) - return true; + return; // Recompute the parts of the live range we had to remove because of // CR_Replace conflicts. DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: " << LRange << '\n'); LIS->extendToIndices(LRange, EndPoints); - return true; } -bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, +void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, - unsigned LaneMask, CoalescerPair &CP) { + LaneBitmask LaneMask, + CoalescerPair &CP) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); for (LiveInterval::SubRange &R : LI.subranges()) { - unsigned RMask = R.LaneMask; + LaneBitmask RMask = R.LaneMask; // LaneMask of subregisters common to subrange R and ToMerge. - unsigned Common = RMask & LaneMask; + LaneBitmask Common = RMask & LaneMask; // There is nothing to do without common subregs. if (Common == 0) continue; - DEBUG(dbgs() << format("\t\tCopy+Merge %04X into %04X\n", RMask, Common)); + DEBUG(dbgs() << "\t\tCopy+Merge " << PrintLaneMask(RMask) << " into " + << PrintLaneMask(Common) << '\n'); // LaneMask of subregisters contained in the R range but not in ToMerge, // they have to split into their own subrange. - unsigned LRest = RMask & ~LaneMask; + LaneBitmask LRest = RMask & ~LaneMask; LiveInterval::SubRange *CommonRange; if (LRest != 0) { R.LaneMask = LRest; - DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", LRest)); + DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(LRest) << '\n'); // Duplicate SubRange for newly merged common stuff. CommonRange = LI.createSubRangeFrom(Allocator, Common, R); } else { @@ -2559,16 +2552,14 @@ bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, CommonRange = &R; } LiveRange RangeCopy(ToMerge, Allocator); - if (!joinSubRegRanges(*CommonRange, RangeCopy, Common, CP)) - return false; + joinSubRegRanges(*CommonRange, RangeCopy, Common, CP); LaneMask &= ~RMask; } if (LaneMask != 0) { - DEBUG(dbgs() << format("\t\tNew Lane %04X\n", LaneMask)); + DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n'); LI.createSubRangeFrom(Allocator, LaneMask, ToMerge); } - return true; } bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { @@ -2602,15 +2593,15 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // create initial subranges if necessary. unsigned DstIdx = CP.getDstIdx(); if (!LHS.hasSubRanges()) { - unsigned Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask() - : TRI->getSubRegIndexLaneMask(DstIdx); + LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask() + : TRI->getSubRegIndexLaneMask(DstIdx); // LHS must support subregs or we wouldn't be in this codepath. assert(Mask != 0); LHS.createSubRangeFrom(Allocator, Mask, LHS); } else if (DstIdx != 0) { // Transform LHS lanemasks to new register class if necessary. for (LiveInterval::SubRange &R : LHS.subranges()) { - unsigned Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask); + LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask); R.LaneMask = Mask; } } @@ -2619,41 +2610,21 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // Determine lanemasks of RHS in the coalesced register and merge subranges. unsigned SrcIdx = CP.getSrcIdx(); - bool Abort = false; if (!RHS.hasSubRanges()) { - unsigned Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() - : TRI->getSubRegIndexLaneMask(SrcIdx); - if (!mergeSubRangeInto(LHS, RHS, Mask, CP)) - Abort = true; + LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() + : TRI->getSubRegIndexLaneMask(SrcIdx); + mergeSubRangeInto(LHS, RHS, Mask, CP); } else { // Pair up subranges and merge. for (LiveInterval::SubRange &R : RHS.subranges()) { - unsigned Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); - if (!mergeSubRangeInto(LHS, R, Mask, CP)) { - Abort = true; - break; - } + LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); + mergeSubRangeInto(LHS, R, Mask, CP); } } - if (Abort) { - // This shouldn't have happened :-( - // However we are aware of at least one existing problem where we - // can't merge subranges when multiple ranges end up in the - // "overflow bit" 32. As a workaround we drop all subregister ranges - // which means we loose some precision but are back to a well defined - // state. - assert(TargetRegisterInfo::isImpreciseLaneMask( - CP.getNewRC()->getLaneMask()) - && "SubRange merge should only fail when merging into bit 32."); - DEBUG(dbgs() << "\tSubrange join aborted!\n"); - LHS.clearSubRanges(); - RHS.clearSubRanges(); - } else { - DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); + DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); - LHSVals.pruneSubRegValues(LHS, ShrinkMask); - RHSVals.pruneSubRegValues(LHS, ShrinkMask); - } + LHSVals.pruneSubRegValues(LHS, ShrinkMask); + RHSVals.pruneSubRegValues(LHS, ShrinkMask); } // The merging algorithm in LiveInterval::join() can't handle conflicting @@ -2799,7 +2770,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { !isTerminalReg(DstReg, Copy, MRI)) return false; - // DstReg is a terminal node. Check if it inteferes with any other + // DstReg is a terminal node. Check if it interferes with any other // copy involving SrcReg. const MachineBasicBlock *OrigBB = Copy.getParent(); const LiveInterval &DstLI = LIS->getInterval(DstReg); @@ -2904,7 +2875,7 @@ void RegisterCoalescer::joinAllIntervals() { std::vector MBBs; MBBs.reserve(MF->size()); for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){ - MachineBasicBlock *MBB = I; + MachineBasicBlock *MBB = &*I; MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB), JoinSplitEdges && isSplitEdge(MBB))); } @@ -2943,7 +2914,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { TRI = STI.getRegisterInfo(); TII = STI.getInstrInfo(); LIS = &getAnalysis(); - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); Loops = &getAnalysis(); if (EnableGlobalCopies == cl::BOU_UNSET) JoinGlobalCopies = STI.enableJoinGlobalCopies(); @@ -2981,22 +2952,25 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { if (MRI->recomputeRegClass(Reg)) { DEBUG(dbgs() << PrintReg(Reg) << " inflated to " << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n'); + ++NumInflated; + LiveInterval &LI = LIS->getInterval(Reg); - unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg); - if (MaxMask == 0) { + if (LI.hasSubRanges()) { // If the inflated register class does not support subregisters anymore // remove the subranges. - LI.clearSubRanges(); - } else { + if (!MRI->shouldTrackSubRegLiveness(Reg)) { + LI.clearSubRanges(); + } else { #ifndef NDEBUG - // If subranges are still supported, then the same subregs should still - // be supported. - for (LiveInterval::SubRange &S : LI.subranges()) { - assert ((S.LaneMask & ~MaxMask) == 0); - } + LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg); + // If subranges are still supported, then the same subregs + // should still be supported. + for (LiveInterval::SubRange &S : LI.subranges()) { + assert((S.LaneMask & ~MaxMask) == 0); + } #endif + } } - ++NumInflated; } } diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index c3786e552a13..8382b0912bde 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -59,12 +59,12 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << "Max Pressure: "; dumpRegSetPressure(MaxSetPressure, TRI); dbgs() << "Live In: "; - for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i) - dbgs() << PrintVRegOrUnit(LiveInRegs[i], TRI) << " "; + for (unsigned Reg : LiveInRegs) + dbgs() << PrintVRegOrUnit(Reg, TRI) << " "; dbgs() << '\n'; dbgs() << "Live Out: "; - for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i) - dbgs() << PrintVRegOrUnit(LiveOutRegs[i], TRI) << " "; + for (unsigned Reg : LiveOutRegs) + dbgs() << PrintVRegOrUnit(Reg, TRI) << " "; dbgs() << '\n'; } @@ -78,11 +78,13 @@ void RegPressureTracker::dump() const { } void PressureDiff::dump(const TargetRegisterInfo &TRI) const { + const char *sep = ""; for (const PressureChange &Change : *this) { - if (!Change.isValid() || Change.getUnitInc() == 0) - continue; - dbgs() << " " << TRI.getRegPressureSetName(Change.getPSet()) + if (!Change.isValid()) + break; + dbgs() << sep << TRI.getRegPressureSetName(Change.getPSet()) << " " << Change.getUnitInc(); + sep = " "; } dbgs() << '\n'; } @@ -90,8 +92,8 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const { /// Increase the current pressure as impacted by these registers and bump /// the high water mark if needed. void RegPressureTracker::increaseRegPressure(ArrayRef RegUnits) { - for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { - PSetIterator PSetI = MRI->getPressureSets(RegUnits[i]); + for (unsigned RegUnit : RegUnits) { + PSetIterator PSetI = MRI->getPressureSets(RegUnit); unsigned Weight = PSetI.getWeight(); for (; PSetI.isValid(); ++PSetI) { CurrSetPressure[*PSetI] += Weight; @@ -104,8 +106,8 @@ void RegPressureTracker::increaseRegPressure(ArrayRef RegUnits) { /// Simply decrease the current pressure as impacted by these registers. void RegPressureTracker::decreaseRegPressure(ArrayRef RegUnits) { - for (unsigned I = 0, E = RegUnits.size(); I != E; ++I) - decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnits[I])); + for (unsigned RegUnit : RegUnits) + decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnit)); } /// Clear the result so it can be used for another round of pressure tracking. @@ -157,10 +159,22 @@ void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) { LiveInRegs.clear(); } -const LiveRange *RegPressureTracker::getLiveRange(unsigned Reg) const { +void LiveRegSet::init(const MachineRegisterInfo &MRI) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + unsigned NumRegUnits = TRI.getNumRegs(); + unsigned NumVirtRegs = MRI.getNumVirtRegs(); + Regs.setUniverse(NumRegUnits + NumVirtRegs); + this->NumRegUnits = NumRegUnits; +} + +void LiveRegSet::clear() { + Regs.clear(); +} + +static const LiveRange *getLiveRange(const LiveIntervals &LIS, unsigned Reg) { if (TargetRegisterInfo::isVirtualRegister(Reg)) - return &LIS->getInterval(Reg); - return LIS->getCachedRegUnit(Reg); + return &LIS.getInterval(Reg); + return LIS.getCachedRegUnit(Reg); } void RegPressureTracker::reset() { @@ -176,8 +190,7 @@ void RegPressureTracker::reset() { else static_cast(P).reset(); - LiveRegs.PhysRegs.clear(); - LiveRegs.VirtRegs.clear(); + LiveRegs.clear(); UntiedDefs.clear(); } @@ -210,8 +223,7 @@ void RegPressureTracker::init(const MachineFunction *mf, P.MaxSetPressure = CurrSetPressure; - LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs()); - LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs()); + LiveRegs.init(*MRI); if (TrackUntiedDefs) UntiedDefs.setUniverse(MRI->getNumVirtRegs()); } @@ -250,14 +262,8 @@ void RegPressureTracker::closeTop() { static_cast(P).TopPos = CurrPos; assert(P.LiveInRegs.empty() && "inconsistent max pressure result"); - P.LiveInRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size()); - P.LiveInRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end()); - for (SparseSet::const_iterator I = - LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I) - P.LiveInRegs.push_back(*I); - std::sort(P.LiveInRegs.begin(), P.LiveInRegs.end()); - P.LiveInRegs.erase(std::unique(P.LiveInRegs.begin(), P.LiveInRegs.end()), - P.LiveInRegs.end()); + P.LiveInRegs.reserve(LiveRegs.size()); + LiveRegs.appendTo(P.LiveInRegs); } /// Set the boundary for the bottom of the region and summarize live outs. @@ -268,21 +274,14 @@ void RegPressureTracker::closeBottom() { static_cast(P).BottomPos = CurrPos; assert(P.LiveOutRegs.empty() && "inconsistent max pressure result"); - P.LiveOutRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size()); - P.LiveOutRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end()); - for (SparseSet::const_iterator I = - LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I) - P.LiveOutRegs.push_back(*I); - std::sort(P.LiveOutRegs.begin(), P.LiveOutRegs.end()); - P.LiveOutRegs.erase(std::unique(P.LiveOutRegs.begin(), P.LiveOutRegs.end()), - P.LiveOutRegs.end()); + P.LiveOutRegs.reserve(LiveRegs.size()); + LiveRegs.appendTo(P.LiveOutRegs); } /// Finalize the region boundaries and record live ins and live outs. void RegPressureTracker::closeRegion() { if (!isTopClosed() && !isBottomClosed()) { - assert(LiveRegs.PhysRegs.empty() && LiveRegs.VirtRegs.empty() && - "no region boundary"); + assert(LiveRegs.size() == 0 && "no region boundary"); return; } if (!isBottomClosed()) @@ -299,8 +298,7 @@ void RegPressureTracker::closeRegion() { void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0); assert(isBottomClosed() && "need bottom-up tracking to intialize."); - for (unsigned i = 0, e = P.LiveOutRegs.size(); i < e; ++i) { - unsigned Reg = P.LiveOutRegs[i]; + for (unsigned Reg : P.LiveOutRegs) { if (TargetRegisterInfo::isVirtualRegister(Reg) && !RPTracker.hasUntiedDef(Reg)) { increaseSetPressure(LiveThruPressure, MRI->getPressureSets(Reg)); @@ -315,71 +313,113 @@ static bool containsReg(ArrayRef RegUnits, unsigned RegUnit) { } namespace { -/// Collect this instruction's unique uses and defs into SmallVectors for -/// processing defs and uses in order. -/// -/// FIXME: always ignore tied opers -class RegisterOperands { - const TargetRegisterInfo *TRI; - const MachineRegisterInfo *MRI; - bool IgnoreDead; +/// List of register defined and used by a machine instruction. +class RegisterOperands { public: SmallVector Uses; SmallVector Defs; SmallVector DeadDefs; - RegisterOperands(const TargetRegisterInfo *tri, - const MachineRegisterInfo *mri, bool ID = false): - TRI(tri), MRI(mri), IgnoreDead(ID) {} + void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI, bool IgnoreDead = false); - /// Push this operand's register onto the correct vector. - void collect(const MachineOperand &MO) { + /// Use liveness information to find dead defs not marked with a dead flag + /// and move them to the DeadDefs vector. + void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS); +}; + +/// Collect this instruction's unique uses and defs into SmallVectors for +/// processing defs and uses in order. +/// +/// FIXME: always ignore tied opers +class RegisterOperandsCollector { + RegisterOperands &RegOpers; + const TargetRegisterInfo &TRI; + const MachineRegisterInfo &MRI; + bool IgnoreDead; + + RegisterOperandsCollector(RegisterOperands &RegOpers, + const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI, + bool IgnoreDead) + : RegOpers(RegOpers), TRI(TRI), MRI(MRI), IgnoreDead(IgnoreDead) {} + + void collectInstr(const MachineInstr &MI) const { + for (ConstMIBundleOperands OperI(&MI); OperI.isValid(); ++OperI) + collectOperand(*OperI); + + // Remove redundant physreg dead defs. + SmallVectorImpl::iterator I = + std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(), + std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs)); + RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end()); + } + + /// Push this operand's register onto the correct vectors. + void collectOperand(const MachineOperand &MO) const { if (!MO.isReg() || !MO.getReg()) return; + unsigned Reg = MO.getReg(); if (MO.readsReg()) - pushRegUnits(MO.getReg(), Uses); + pushRegUnits(Reg, RegOpers.Uses); if (MO.isDef()) { if (MO.isDead()) { if (!IgnoreDead) - pushRegUnits(MO.getReg(), DeadDefs); - } - else - pushRegUnits(MO.getReg(), Defs); + pushRegUnits(Reg, RegOpers.DeadDefs); + } else + pushRegUnits(Reg, RegOpers.Defs); } } -protected: - void pushRegUnits(unsigned Reg, SmallVectorImpl &RegUnits) { + void pushRegUnits(unsigned Reg, SmallVectorImpl &RegUnits) const { if (TargetRegisterInfo::isVirtualRegister(Reg)) { if (containsReg(RegUnits, Reg)) return; RegUnits.push_back(Reg); - } - else if (MRI->isAllocatable(Reg)) { - for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + } else if (MRI.isAllocatable(Reg)) { + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) { if (containsReg(RegUnits, *Units)) continue; RegUnits.push_back(*Units); } } } + + friend class RegisterOperands; }; -} // namespace -/// Collect physical and virtual register operands. -static void collectOperands(const MachineInstr *MI, - RegisterOperands &RegOpers) { - for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) - RegOpers.collect(*OperI); - - // Remove redundant physreg dead defs. - SmallVectorImpl::iterator I = - std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(), - std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs)); - RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end()); +void RegisterOperands::collect(const MachineInstr &MI, + const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI, + bool IgnoreDead) { + RegisterOperandsCollector Collector(*this, TRI, MRI, IgnoreDead); + Collector.collectInstr(MI); } +void RegisterOperands::detectDeadDefs(const MachineInstr &MI, + const LiveIntervals &LIS) { + SlotIndex SlotIdx = LIS.getInstructionIndex(&MI); + for (SmallVectorImpl::iterator RI = Defs.begin(); + RI != Defs.end(); /*empty*/) { + unsigned Reg = *RI; + const LiveRange *LR = getLiveRange(LIS, Reg); + if (LR != nullptr) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + if (LRQ.isDeadDef()) { + // LiveIntervals knows this is a dead even though it's MachineOperand is + // not flagged as such. + DeadDefs.push_back(Reg); + RI = Defs.erase(RI); + continue; + } + } + ++RI; + } +} + +} // namespace + /// Initialize an array of N PressureDiffs. void PressureDiffs::init(unsigned N) { Size = N; @@ -399,7 +439,7 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight(); for (; PSetI.isValid(); ++PSetI) { // Find an existing entry in the pressure diff for this PSet. - PressureDiff::iterator I = begin(), E = end(); + PressureDiff::iterator I = nonconst_begin(), E = nonconst_end(); for (; I != E && I->isValid(); ++I) { if (I->getPSet() >= *PSetI) break; @@ -411,10 +451,20 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, if (!I->isValid() || I->getPSet() != *PSetI) { PressureChange PTmp = PressureChange(*PSetI); for (PressureDiff::iterator J = I; J != E && PTmp.isValid(); ++J) - std::swap(*J,PTmp); + std::swap(*J, PTmp); } // Update the units for this pressure set. - I->setUnitInc(I->getUnitInc() + Weight); + unsigned NewUnitInc = I->getUnitInc() + Weight; + if (NewUnitInc != 0) { + I->setUnitInc(NewUnitInc); + } else { + // Remove entry + PressureDiff::iterator J; + for (J = std::next(I); J != E && J->isValid(); ++J, ++I) + *I = *J; + if (J != E) + *I = *J; + } } } @@ -423,18 +473,18 @@ static void collectPDiff(PressureDiff &PDiff, RegisterOperands &RegOpers, const MachineRegisterInfo *MRI) { assert(!PDiff.begin()->isValid() && "stale PDiff"); - for (unsigned i = 0, e = RegOpers.Defs.size(); i != e; ++i) - PDiff.addPressureChange(RegOpers.Defs[i], true, MRI); + for (unsigned Reg : RegOpers.Defs) + PDiff.addPressureChange(Reg, true, MRI); - for (unsigned i = 0, e = RegOpers.Uses.size(); i != e; ++i) - PDiff.addPressureChange(RegOpers.Uses[i], false, MRI); + for (unsigned Reg : RegOpers.Uses) + PDiff.addPressureChange(Reg, false, MRI); } /// Force liveness of registers. void RegPressureTracker::addLiveRegs(ArrayRef Regs) { - for (unsigned i = 0, e = Regs.size(); i != e; ++i) { - if (LiveRegs.insert(Regs[i])) - increaseRegPressure(Regs[i]); + for (unsigned Reg : Regs) { + if (LiveRegs.insert(Reg)) + increaseRegPressure(Reg); } } @@ -465,13 +515,9 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) { /// registers that are both defined and used by the instruction. If a pressure /// difference pointer is provided record the changes is pressure caused by this /// instruction independent of liveness. -bool RegPressureTracker::recede(SmallVectorImpl *LiveUses, +void RegPressureTracker::recede(SmallVectorImpl *LiveUses, PressureDiff *PDiff) { - // Check for the top of the analyzable region. - if (CurrPos == MBB->begin()) { - closeRegion(); - return false; - } + assert(CurrPos != MBB->begin()); if (!isBottomClosed()) closeBottom(); @@ -483,11 +529,8 @@ bool RegPressureTracker::recede(SmallVectorImpl *LiveUses, do --CurrPos; while (CurrPos != MBB->begin() && CurrPos->isDebugValue()); + assert(!CurrPos->isDebugValue()); - if (CurrPos->isDebugValue()) { - closeRegion(); - return false; - } SlotIndex SlotIdx; if (RequireIntervals) SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); @@ -496,8 +539,11 @@ bool RegPressureTracker::recede(SmallVectorImpl *LiveUses, if (RequireIntervals && isTopClosed()) static_cast(P).openTop(SlotIdx); - RegisterOperands RegOpers(TRI, MRI); - collectOperands(CurrPos, RegOpers); + const MachineInstr &MI = *CurrPos; + RegisterOperands RegOpers; + RegOpers.collect(MI, *TRI, *MRI); + if (RequireIntervals) + RegOpers.detectDeadDefs(MI, *LIS); if (PDiff) collectPDiff(*PDiff, RegOpers, MRI); @@ -508,37 +554,19 @@ bool RegPressureTracker::recede(SmallVectorImpl *LiveUses, // Kill liveness at live defs. // TODO: consider earlyclobbers? - for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { - unsigned Reg = RegOpers.Defs[i]; - bool DeadDef = false; - if (RequireIntervals) { - const LiveRange *LR = getLiveRange(Reg); - if (LR) { - LiveQueryResult LRQ = LR->Query(SlotIdx); - DeadDef = LRQ.isDeadDef(); - } - } - if (DeadDef) { - // LiveIntervals knows this is a dead even though it's MachineOperand is - // not flagged as such. Since this register will not be recorded as - // live-out, increase its PDiff value to avoid underflowing pressure. - if (PDiff) - PDiff->addPressureChange(Reg, false, MRI); - } else { - if (LiveRegs.erase(Reg)) - decreaseRegPressure(Reg); - else - discoverLiveOut(Reg); - } + for (unsigned Reg : RegOpers.Defs) { + if (LiveRegs.erase(Reg)) + decreaseRegPressure(Reg); + else + discoverLiveOut(Reg); } // Generate liveness for uses. - for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { - unsigned Reg = RegOpers.Uses[i]; + for (unsigned Reg : RegOpers.Uses) { if (!LiveRegs.contains(Reg)) { // Adjust liveouts if LiveIntervals are available. if (RequireIntervals) { - const LiveRange *LR = getLiveRange(Reg); + const LiveRange *LR = getLiveRange(*LIS, Reg); if (LR) { LiveQueryResult LRQ = LR->Query(SlotIdx); if (!LRQ.isKill() && !LRQ.valueDefined()) @@ -552,24 +580,18 @@ bool RegPressureTracker::recede(SmallVectorImpl *LiveUses, } } if (TrackUntiedDefs) { - for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { - unsigned Reg = RegOpers.Defs[i]; + for (unsigned Reg : RegOpers.Defs) { if (TargetRegisterInfo::isVirtualRegister(Reg) && !LiveRegs.contains(Reg)) UntiedDefs.insert(Reg); } } - return true; } /// Advance across the current instruction. -bool RegPressureTracker::advance() { +void RegPressureTracker::advance() { assert(!TrackUntiedDefs && "unsupported mode"); - // Check for the bottom of the analyzable region. - if (CurrPos == MBB->end()) { - closeRegion(); - return false; - } + assert(CurrPos != MBB->end()); if (!isTopClosed()) closeTop(); @@ -585,11 +607,10 @@ bool RegPressureTracker::advance() { static_cast(P).openBottom(CurrPos); } - RegisterOperands RegOpers(TRI, MRI); - collectOperands(CurrPos, RegOpers); + RegisterOperands RegOpers; + RegOpers.collect(*CurrPos, *TRI, *MRI); - for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { - unsigned Reg = RegOpers.Uses[i]; + for (unsigned Reg : RegOpers.Uses) { // Discover live-ins. bool isLive = LiveRegs.contains(Reg); if (!isLive) @@ -597,24 +618,21 @@ bool RegPressureTracker::advance() { // Kill liveness at last uses. bool lastUse = false; if (RequireIntervals) { - const LiveRange *LR = getLiveRange(Reg); + const LiveRange *LR = getLiveRange(*LIS, Reg); lastUse = LR && LR->Query(SlotIdx).isKill(); - } - else { + } else { // Allocatable physregs are always single-use before register rewriting. lastUse = !TargetRegisterInfo::isVirtualRegister(Reg); } if (lastUse && isLive) { LiveRegs.erase(Reg); decreaseRegPressure(Reg); - } - else if (!lastUse && !isLive) + } else if (!lastUse && !isLive) increaseRegPressure(Reg); } // Generate liveness for defs. - for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { - unsigned Reg = RegOpers.Defs[i]; + for (unsigned Reg : RegOpers.Defs) { if (LiveRegs.insert(Reg)) increaseRegPressure(Reg); } @@ -627,7 +645,6 @@ bool RegPressureTracker::advance() { do ++CurrPos; while (CurrPos != MBB->end() && CurrPos->isDebugValue()); - return true; } /// Find the max change in excess pressure across all sets. @@ -653,8 +670,7 @@ static void computeExcessPressureDelta(ArrayRef OldPressureVec, PDiff = 0; // Under the limit else PDiff = PNew - Limit; // Just exceeded limit. - } - else if (Limit > PNew) + } else if (Limit > PNew) PDiff = Limit - POld; // Just obeyed limit. if (PDiff) { @@ -719,34 +735,19 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { assert(!MI->isDebugValue() && "Expect a nondebug instruction."); // Account for register pressure similar to RegPressureTracker::recede(). - RegisterOperands RegOpers(TRI, MRI, /*IgnoreDead=*/true); - collectOperands(MI, RegOpers); - - // Boost max pressure for all dead defs together. - // Since CurrSetPressure and MaxSetPressure - increaseRegPressure(RegOpers.DeadDefs); - decreaseRegPressure(RegOpers.DeadDefs); + RegisterOperands RegOpers; + RegOpers.collect(*MI, *TRI, *MRI, /*IgnoreDead=*/true); + assert(RegOpers.DeadDefs.size() == 0); + if (RequireIntervals) + RegOpers.detectDeadDefs(*MI, *LIS); // Kill liveness at live defs. - for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { - unsigned Reg = RegOpers.Defs[i]; - bool DeadDef = false; - if (RequireIntervals) { - const LiveRange *LR = getLiveRange(Reg); - if (LR) { - SlotIndex SlotIdx = LIS->getInstructionIndex(MI); - LiveQueryResult LRQ = LR->Query(SlotIdx); - DeadDef = LRQ.isDeadDef(); - } - } - if (!DeadDef) { - if (!containsReg(RegOpers.Uses, Reg)) - decreaseRegPressure(Reg); - } + for (unsigned Reg : RegOpers.Defs) { + if (!containsReg(RegOpers.Uses, Reg)) + decreaseRegPressure(Reg); } // Generate liveness for uses. - for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { - unsigned Reg = RegOpers.Uses[i]; + for (unsigned Reg : RegOpers.Uses) { if (!LiveRegs.contains(Reg)) increaseRegPressure(Reg); } @@ -853,7 +854,8 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff, unsigned MNew = MOld; // Ignore DeadDefs here because they aren't captured by PressureChange. unsigned PNew = POld + PDiffI->getUnitInc(); - assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld) && "PSet overflow"); + assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld) + && "PSet overflow/underflow"); if (PNew > MOld) MNew = PNew; // Check if current pressure has exceeded the limit. @@ -892,19 +894,13 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff, } /// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx). -static bool findUseBetween(unsigned Reg, - SlotIndex PriorUseIdx, SlotIndex NextUseIdx, - const MachineRegisterInfo *MRI, +static bool findUseBetween(unsigned Reg, SlotIndex PriorUseIdx, + SlotIndex NextUseIdx, const MachineRegisterInfo &MRI, const LiveIntervals *LIS) { - for (MachineRegisterInfo::use_instr_nodbg_iterator - UI = MRI->use_instr_nodbg_begin(Reg), - UE = MRI->use_instr_nodbg_end(); UI != UE; ++UI) { - const MachineInstr* MI = &*UI; - if (MI->isDebugValue()) - continue; - SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot(); - if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx) - return true; + for (const MachineInstr &MI : MRI.use_nodbg_instructions(Reg)) { + SlotIndex InstSlot = LIS->getInstructionIndex(&MI).getRegSlot(); + if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx) + return true; } return false; } @@ -919,8 +915,8 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { assert(!MI->isDebugValue() && "Expect a nondebug instruction."); // Account for register pressure similar to RegPressureTracker::recede(). - RegisterOperands RegOpers(TRI, MRI); - collectOperands(MI, RegOpers); + RegisterOperands RegOpers; + RegOpers.collect(*MI, *TRI, *MRI); // Kill liveness at last uses. Assume allocatable physregs are single-use // rather than checking LiveIntervals. @@ -928,21 +924,18 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { if (RequireIntervals) SlotIdx = LIS->getInstructionIndex(MI).getRegSlot(); - for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { - unsigned Reg = RegOpers.Uses[i]; + for (unsigned Reg : RegOpers.Uses) { if (RequireIntervals) { // FIXME: allow the caller to pass in the list of vreg uses that remain // to be bottom-scheduled to avoid searching uses at each query. SlotIndex CurrIdx = getCurrSlot(); - const LiveRange *LR = getLiveRange(Reg); + const LiveRange *LR = getLiveRange(*LIS, Reg); if (LR) { LiveQueryResult LRQ = LR->Query(SlotIdx); - if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) { + if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, *MRI, LIS)) decreaseRegPressure(Reg); - } } - } - else if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + } else if (!TargetRegisterInfo::isVirtualRegister(Reg)) { // Allocatable physregs are always single-use before register rewriting. decreaseRegPressure(Reg); } @@ -966,7 +959,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { /// This is expensive for an on-the-fly query because it calls /// bumpDownwardPressure to recompute the pressure sets based on current /// liveness. We don't yet have a fast version of downward pressure tracking -/// analagous to getUpwardPressureDelta. +/// analogous to getUpwardPressureDelta. void RegPressureTracker:: getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef CriticalPSets, diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 4176686d1f7f..8fa1bf74b7e2 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -31,9 +31,12 @@ using namespace llvm; #define DEBUG_TYPE "reg-scavenging" /// setUsed - Set the register units of this register as used. -void RegScavenger::setRegUsed(unsigned Reg) { - for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) - RegUnitsAvailable.reset(*RUI); +void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) { + for (MCRegUnitMaskIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) { + LaneBitmask UnitMask = (*RUI).second; + if (UnitMask == 0 || (LaneMask & UnitMask) != 0) + RegUnitsAvailable.reset((*RUI).first); + } } void RegScavenger::initRegState() { @@ -50,9 +53,8 @@ void RegScavenger::initRegState() { return; // Live-in registers are in use. - for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), - E = MBB->livein_end(); I != E; ++I) - setRegUsed(*I); + for (const auto &LI : MBB->liveins()) + setRegUsed(LI.PhysReg, LI.LaneMask); // Pristine CSRs are also unavailable. const MachineFunction &MF = *MBB->getParent(); diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 76a7fef58fcc..efde61ece639 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -372,7 +372,6 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { dbgs() << "\n"; } } - dbgs() << "\n"; } #endif diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 390b6d25954e..fb82ab7a5555 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -13,12 +13,12 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/ADT/IntEqClasses.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -51,15 +51,11 @@ static cl::opt UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, - bool IsPostRAFlag, bool RemoveKillFlags, - LiveIntervals *lis) - : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis), - IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags), - CanHandleTerminators(false), FirstDbgValue(nullptr) { - assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); + bool RemoveKillFlags) + : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), + RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false), + TrackLaneMasks(false), FirstDbgValue(nullptr) { DbgValues.clear(); - assert(!(IsPostRA && MRI.getNumVirtRegs()) && - "Virtual registers must be removed prior to PostRA scheduling"); const TargetSubtargetInfo &ST = mf.getSubtarget(); SchedModel.init(ST.getSchedModel(), &ST, TII); @@ -230,11 +226,8 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { if (TRI->isPhysicalRegister(Reg)) Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); - else { - assert(!IsPostRA && "Virtual register encountered after regalloc."); - if (MO.readsReg()) // ignore undef operands - addVRegUseDeps(&ExitSU, i); - } + else if (MO.readsReg()) // ignore undef operands + addVRegUseDeps(&ExitSU, i); } } else { // For others, e.g. fallthrough, conditional branch, assume the exit @@ -242,11 +235,9 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { assert(Uses.empty() && "Uses in set before adding deps?"); for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), - E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - if (!Uses.contains(Reg)) - Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); + for (const auto &LI : (*SI)->liveins()) { + if (!Uses.contains(LI.PhysReg)) + Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg)); } } } @@ -371,6 +362,20 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { } } +LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const +{ + unsigned Reg = MO.getReg(); + // No point in tracking lanemasks if we don't have interesting subregisters. + const TargetRegisterClass &RC = *MRI.getRegClass(Reg); + if (!RC.HasDisjunctSubRegs) + return ~0u; + + unsigned SubReg = MO.getSubReg(); + if (SubReg == 0) + return RC.getLaneMask(); + return TRI->getSubRegIndexLaneMask(SubReg); +} + /// addVRegDefDeps - Add register output and data dependencies from this SUnit /// to instructions that occur later in the same scheduling region if they read /// from or write to the virtual register defined at OperIdx. @@ -378,35 +383,106 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { /// TODO: Hoist loop induction variable increments. This has to be /// reevaluated. Generally, IV scheduling should be done before coalescing. void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { - const MachineInstr *MI = SU->getInstr(); - unsigned Reg = MI->getOperand(OperIdx).getReg(); + MachineInstr *MI = SU->getInstr(); + MachineOperand &MO = MI->getOperand(OperIdx); + unsigned Reg = MO.getReg(); - // Singly defined vregs do not have output/anti dependencies. - // The current operand is a def, so we have at least one. - // Check here if there are any others... + LaneBitmask DefLaneMask; + LaneBitmask KillLaneMask; + if (TrackLaneMasks) { + bool IsKill = MO.getSubReg() == 0 || MO.isUndef(); + DefLaneMask = getLaneMaskForMO(MO); + // If we have a flag, none of the lane values comes from an + // earlier instruction. + KillLaneMask = IsKill ? ~0u : DefLaneMask; + + // Clear undef flag, we'll re-add it later once we know which subregister + // Def is first. + MO.setIsUndef(false); + } else { + DefLaneMask = ~0u; + KillLaneMask = ~0u; + } + + if (MO.isDead()) { + assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && + "Dead defs should have no uses"); + } else { + // Add data dependence to all uses we found so far. + const TargetSubtargetInfo &ST = MF.getSubtarget(); + for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg), + E = CurrentVRegUses.end(); I != E; /*empty*/) { + LaneBitmask LaneMask = I->LaneMask; + // Ignore uses of other lanes. + if ((LaneMask & KillLaneMask) == 0) { + ++I; + continue; + } + + if ((LaneMask & DefLaneMask) != 0) { + SUnit *UseSU = I->SU; + MachineInstr *Use = UseSU->getInstr(); + SDep Dep(SU, SDep::Data, Reg); + Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use, + I->OperandIndex)); + ST.adjustSchedDependency(SU, UseSU, Dep); + UseSU->addPred(Dep); + } + + LaneMask &= ~KillLaneMask; + // If we found a Def for all lanes of this use, remove it from the list. + if (LaneMask != 0) { + I->LaneMask = LaneMask; + ++I; + } else + I = CurrentVRegUses.erase(I); + } + } + + // Shortcut: Singly defined vregs do not have output/anti dependencies. if (MRI.hasOneDef(Reg)) return; - // Add output dependence to the next nearest def of this vreg. + // Add output dependence to the next nearest defs of this vreg. // // Unless this definition is dead, the output dependence should be // transitively redundant with antidependencies from this definition's // uses. We're conservative for now until we have a way to guarantee the uses // are not eliminated sometime during scheduling. The output dependence edge // is also useful if output latency exceeds def-use latency. - VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); - if (DefI == VRegDefs.end()) - VRegDefs.insert(VReg2SUnit(Reg, SU)); - else { - SUnit *DefSU = DefI->SU; - if (DefSU != SU && DefSU != &ExitSU) { - SDep Dep(SU, SDep::Output, Reg); - Dep.setLatency( - SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); - DefSU->addPred(Dep); - } - DefI->SU = SU; + LaneBitmask LaneMask = DefLaneMask; + for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), + CurrentVRegDefs.end())) { + // Ignore defs for other lanes. + if ((V2SU.LaneMask & LaneMask) == 0) + continue; + // Add an output dependence. + SUnit *DefSU = V2SU.SU; + // Ignore additional defs of the same lanes in one instruction. This can + // happen because lanemasks are shared for targets with too many + // subregisters. We also use some representration tricks/hacks where we + // add super-register defs/uses, to imply that although we only access parts + // of the reg we care about the full one. + if (DefSU == SU) + continue; + SDep Dep(SU, SDep::Output, Reg); + Dep.setLatency( + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); + DefSU->addPred(Dep); + + // Update current definition. This can get tricky if the def was about a + // bigger lanemask before. We then have to shrink it and create a new + // VReg2SUnit for the non-overlapping part. + LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask; + LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask; + if (NonOverlapMask != 0) + CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, V2SU.SU)); + V2SU.SU = SU; + V2SU.LaneMask = OverlapMask; } + // If there was no CurrentVRegDefs entry for some lanes yet, create one. + if (LaneMask != 0) + CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU)); } /// addVRegUseDeps - Add a register data dependency if the instruction that @@ -416,59 +492,34 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { /// /// TODO: Handle ExitSU "uses" properly. void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { - MachineInstr *MI = SU->getInstr(); - unsigned Reg = MI->getOperand(OperIdx).getReg(); + const MachineInstr *MI = SU->getInstr(); + const MachineOperand &MO = MI->getOperand(OperIdx); + unsigned Reg = MO.getReg(); - // Record this local VReg use. - VReg2UseMap::iterator UI = VRegUses.find(Reg); - for (; UI != VRegUses.end(); ++UI) { - if (UI->SU == SU) - break; + // Remember the use. Data dependencies will be added when we find the def. + LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u; + CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU)); + + // Add antidependences to the following defs of the vreg. + for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), + CurrentVRegDefs.end())) { + // Ignore defs for unrelated lanes. + LaneBitmask PrevDefLaneMask = V2SU.LaneMask; + if ((PrevDefLaneMask & LaneMask) == 0) + continue; + if (V2SU.SU == SU) + continue; + + V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg)); } - if (UI == VRegUses.end()) - VRegUses.insert(VReg2SUnit(Reg, SU)); - - // Lookup this operand's reaching definition. - assert(LIS && "vreg dependencies requires LiveIntervals"); - LiveQueryResult LRQ - = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI)); - VNInfo *VNI = LRQ.valueIn(); - - // VNI will be valid because MachineOperand::readsReg() is checked by caller. - assert(VNI && "No value to read by operand"); - MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); - // Phis and other noninstructions (after coalescing) have a NULL Def. - if (Def) { - SUnit *DefSU = getSUnit(Def); - if (DefSU) { - // The reaching Def lives within this scheduling region. - // Create a data dependence. - SDep dep(DefSU, SDep::Data, Reg); - // Adjust the dependence latency using operand def/use information, then - // allow the target to perform its own adjustments. - int DefOp = Def->findRegisterDefOperandIdx(Reg); - dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx)); - - const TargetSubtargetInfo &ST = MF.getSubtarget(); - ST.adjustSchedDependency(DefSU, SU, const_cast(dep)); - SU->addPred(dep); - } - } - - // Add antidependence to the following def of the vreg it uses. - VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); - if (DefI != VRegDefs.end() && DefI->SU != SU) - DefI->SU->addPred(SDep(SU, SDep::Anti, Reg)); } /// Return true if MI is an instruction we are unable to reason about /// (like a call or something with unmodeled side effects). static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { - if (MI->isCall() || MI->hasUnmodeledSideEffects() || - (MI->hasOrderedMemoryRef() && - (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) - return true; - return false; + return MI->isCall() || MI->hasUnmodeledSideEffects() || + (MI->hasOrderedMemoryRef() && + (!MI->mayLoad() || !MI->isInvariantLoad(AA))); } // This MI might have either incomplete info, or known to be unsafe @@ -508,7 +559,7 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, return false; } -/// This returns true if the two MIs need a chain edge betwee them. +/// This returns true if the two MIs need a chain edge between them. /// If these are not even memory operations, we still may need /// chain deps between them. The question really is - could /// these two MIs be reordered during scheduling from memory dependency @@ -670,7 +721,7 @@ static inline void addChainDependency(AliasAnalysis *AA, unsigned TrueMemOrderLatency = 0, bool isNormalMemory = false) { // If this is a false dependency, - // do not add the edge, but rememeber the rejected node. + // do not add the edge, but remember the rejected node. if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); Dep.setLatency(TrueMemOrderLatency); @@ -685,7 +736,7 @@ static inline void addChainDependency(AliasAnalysis *AA, } } -/// Create an SUnit for each real instruction, numbered in top-down toplological +/// Create an SUnit for each real instruction, numbered in top-down topological /// order. The instruction order A < B, implies that no edge exists from B to A. /// /// Map each real instruction to its SUnit. @@ -743,17 +794,44 @@ void ScheduleDAGInstrs::initSUnits() { } } +void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) { + const MachineInstr *MI = SU->getInstr(); + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg()) + continue; + if (!MO.readsReg()) + continue; + if (TrackLaneMasks && !MO.isUse()) + continue; + + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + // Record this local VReg use. + VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); + for (; UI != VRegUses.end(); ++UI) { + if (UI->SU == SU) + break; + } + if (UI == VRegUses.end()) + VRegUses.insert(VReg2SUnit(Reg, 0, SU)); + } +} + /// If RegPressure is non-null, compute register pressure as a side effect. The /// DAG builder is an efficient place to do it because it already visits /// operands. void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker, - PressureDiffs *PDiffs) { + PressureDiffs *PDiffs, + bool TrackLaneMasks) { const TargetSubtargetInfo &ST = MF.getSubtarget(); bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI : ST.useAA(); AliasAnalysis *AAForDep = UseAA ? AA : nullptr; + this->TrackLaneMasks = TrackLaneMasks; MISUnitMap.clear(); ScheduleDAG::clearDAG(); @@ -766,7 +844,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // We build scheduling units by walking a block's instruction list from bottom // to top. - // Remember where a generic side-effecting instruction is as we procede. + // Remember where a generic side-effecting instruction is as we proceed. SUnit *BarrierChain = nullptr, *AliasChain = nullptr; // Memory references to specific known memory locations are tracked @@ -787,10 +865,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Defs.setUniverse(TRI->getNumRegs()); Uses.setUniverse(TRI->getNumRegs()); - assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); + assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs"); + assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses"); + unsigned NumVirtRegs = MRI.getNumVirtRegs(); + CurrentVRegDefs.setUniverse(NumVirtRegs); + CurrentVRegUses.setUniverse(NumVirtRegs); + VRegUses.clear(); - VRegDefs.setUniverse(MRI.getNumVirtRegs()); - VRegUses.setUniverse(MRI.getNumVirtRegs()); + VRegUses.setUniverse(NumVirtRegs); // Model data dependencies between instructions being scheduled and the // ExitSU. @@ -818,6 +900,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, RPTracker->recede(/*LiveUses=*/nullptr, PDiff); assert(RPTracker->getPos() == std::prev(MII) && "RPTracker can't find MI"); + collectVRegUses(SU); } assert( @@ -835,7 +918,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (TRI->isPhysicalRegister(Reg)) addPhysRegDeps(SU, j); else { - assert(!IsPostRA && "Virtual register encountered!"); if (MO.isDef()) { HasVRegDef = true; addVRegDefDeps(SU, j); @@ -890,7 +972,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, BarrierChain = SU; // This is a barrier event that acts as a pivotal node in the DAG, // so it is safe to clear list of exposed nodes. - adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); RejectMemNodes.clear(); NonAliasMemDefs.clear(); @@ -903,27 +985,27 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, unsigned ChainLatency = 0; if (AliasChain->getInstr()->mayLoad()) ChainLatency = TrueMemOrderLatency; - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, RejectMemNodes, ChainLatency); } AliasChain = SU; for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); for (MapVector >::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, I->second[i], RejectMemNodes); } for (MapVector >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, I->second[i], RejectMemNodes, TrueMemOrderLatency); } - adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); PendingLoads.clear(); AliasMemDefs.clear(); @@ -937,7 +1019,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, BarrierChain->addPred(SDep(SU, SDep::Barrier)); UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout()); + getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout()); if (Objs.empty()) { // Treat all other stores conservatively. @@ -961,7 +1043,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, I->second[i], RejectMemNodes, 0, true); // If we're not using AA, then we only need one store per object. @@ -986,7 +1068,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, J->second[i], RejectMemNodes, TrueMemOrderLatency, true); J->second.clear(); @@ -996,15 +1078,15 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Add dependencies from all the PendingLoads, i.e. loads // with no underlying object. for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); // Add dependence on alias chain, if needed. if (AliasChain) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, RejectMemNodes); } - adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes, + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); } else if (MI->mayLoad()) { bool MayAlias = true; @@ -1012,7 +1094,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Invariant load, no chain dependencies needed! } else { UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout()); + getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout()); if (Objs.empty()) { // A load with no underlying object. Depend on all @@ -1020,7 +1102,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, for (MapVector >::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, I->second[i], RejectMemNodes); PendingLoads.push_back(SU); @@ -1044,7 +1126,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, I->second[i], RejectMemNodes, 0, true); if (ThisMayAlias) AliasMemUses[V].push_back(SU); @@ -1052,11 +1134,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, NonAliasMemUses[V].push_back(SU); } if (MayAlias) - adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, /*Latency=*/0); // Add dependencies on alias and barrier chains, if needed. if (MayAlias && AliasChain) - addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain, + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, RejectMemNodes); if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Barrier)); @@ -1068,7 +1150,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Defs.clear(); Uses.clear(); - VRegDefs.clear(); + CurrentVRegDefs.clear(); + CurrentVRegUses.clear(); PendingLoads.clear(); } @@ -1080,11 +1163,9 @@ void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) { // Examine the live-in regs of all successors. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) { - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), - E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; + for (const auto &LI : (*SI)->liveins()) { // Repeat, for reg and all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) LiveRegs.set(*SubRegs); } @@ -1103,7 +1184,7 @@ static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg, // Once we set a kill flag on an instruction, we bail out, as otherwise we // might set it on too many operands. We will clear as many flags as we // can though. - MachineBasicBlock::instr_iterator Begin = MI; + MachineBasicBlock::instr_iterator Begin = MI->getIterator(); MachineBasicBlock::instr_iterator End = getBundleEnd(MI); while (Begin != End) { for (MachineOperand &MO : (--End)->operands()) { @@ -1237,7 +1318,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { toggleKillFlag(MI, MO); DEBUG(MI->dump()); DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) { - MachineBasicBlock::instr_iterator Begin = MI; + MachineBasicBlock::instr_iterator Begin = MI->getIterator(); MachineBasicBlock::instr_iterator End = getBundleEnd(MI); while (++Begin != End) DEBUG(Begin->dump()); diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index b2e4617720ff..1150d26e559b 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -43,9 +43,12 @@ namespace llvm { return (Node->NumPreds > 10 || Node->NumSuccs > 10); } - static bool hasNodeAddressLabel(const SUnit *Node, - const ScheduleDAG *Graph) { - return true; + static std::string getNodeIdentifierLabel(const SUnit *Node, + const ScheduleDAG *Graph) { + std::string R; + raw_string_ostream OS(R); + OS << static_cast(Node); + return R; } /// If you want to override the dot attributes printed for a particular diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3b29306bb54a..0872d7a9a228 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -156,13 +156,16 @@ namespace { void deleteAndRecombine(SDNode *N); bool recursivelyDeleteUnusedNodes(SDNode *N); + /// Replaces all uses of the results of one DAG node with new values. SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo = true); + /// Replaces all uses of the results of one DAG node with new values. SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { return CombineTo(N, &Res, 1, AddTo); } + /// Replaces all uses of the results of one DAG node with new values. SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true) { SDValue To[] = { Res0, Res1 }; @@ -233,18 +236,17 @@ namespace { SDValue visitADDE(SDNode *N); SDValue visitSUBE(SDNode *N); SDValue visitMUL(SDNode *N); + SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); SDValue visitUDIV(SDNode *N); - SDValue visitSREM(SDNode *N); - SDValue visitUREM(SDNode *N); + SDValue visitREM(SDNode *N); SDValue visitMULHU(SDNode *N); SDValue visitMULHS(SDNode *N); SDValue visitSMUL_LOHI(SDNode *N); SDValue visitUMUL_LOHI(SDNode *N); SDValue visitSMULO(SDNode *N); SDValue visitUMULO(SDNode *N); - SDValue visitSDIVREM(SDNode *N); - SDValue visitUDIVREM(SDNode *N); + SDValue visitIMINMAX(SDNode *N); SDValue visitAND(SDNode *N); SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitOR(SDNode *N); @@ -265,6 +267,7 @@ namespace { SDValue visitVSELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); SDValue visitSETCC(SDNode *N); + SDValue visitSETCCE(SDNode *N); SDValue visitSIGN_EXTEND(SDNode *N); SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); @@ -298,6 +301,10 @@ namespace { SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); + + SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); + SDValue replaceStoreOfFPConstant(StoreSDNode *ST); + SDValue visitSTORE(SDNode *N); SDValue visitINSERT_VECTOR_ELT(SDNode *N); SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); @@ -312,9 +319,11 @@ namespace { SDValue visitMGATHER(SDNode *N); SDValue visitMSCATTER(SDNode *N); SDValue visitFP_TO_FP16(SDNode *N); + SDValue visitFP16_TO_FP(SDNode *N); SDValue visitFADDForFMACombine(SDNode *N); SDValue visitFSUBForFMACombine(SDNode *N); + SDValue visitFMULForFMACombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); @@ -338,14 +347,17 @@ namespace { unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue CombineExtLoad(SDNode *N); + SDValue combineRepeatedFPDivisors(SDNode *N); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); - SDValue BuildReciprocalEstimate(SDValue Op); - SDValue BuildRsqrtEstimate(SDValue Op); - SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations); - SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations); + SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); + SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); + SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, + SDNodeFlags *Flags); + SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, + SDNodeFlags *Flags); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -374,6 +386,10 @@ namespace { /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); + /// Do FindBetterChain for a store and any possibly adjacent stores on + /// consecutive chains. + bool findBetterNeighborChains(StoreSDNode *St); + /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { @@ -388,19 +404,37 @@ namespace { unsigned SequenceNum; }; + /// This is a helper function for visitMUL to check the profitability + /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). + /// MulNode is the original multiply, AddNode is (add x, c1), + /// and ConstNode is c2. + bool isMulAddWithConstProfitable(SDNode *MulNode, + SDValue &AddNode, + SDValue &ConstNode); + /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a /// constant build_vector of the stored constant values in Stores. SDValue getMergedConstantVectorStore(SelectionDAG &DAG, SDLoc SL, ArrayRef Stores, + SmallVectorImpl &Chains, EVT Ty) const; + /// This is a helper function for visitAND and visitZERO_EXTEND. Returns + /// true if the (and (load x) c) pattern matches an extload. ExtVT returns + /// the type of the loaded value to be extended. LoadedVT returns the type + /// of the original loaded value. NarrowLoad returns whether the load would + /// need to be narrowed in order to match. + bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, + EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, + bool &NarrowLoad); + /// This is a helper function for MergeConsecutiveStores. When the source /// elements of the consecutive stores are all constants or all extracted /// vector elements, try to merge them into one larger store. /// \return True if a merged store was created. bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl &StoreNodes, - EVT MemVT, unsigned NumElem, + EVT MemVT, unsigned NumStores, bool IsConstantSrc, bool UseVector); /// This is a helper function for MergeConsecutiveStores. @@ -409,7 +443,7 @@ namespace { void getStoreMergeAndAliasCandidates( StoreSDNode* St, SmallVectorImpl &StoreNodes, SmallVectorImpl &AliasLoadNodes); - + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -427,9 +461,7 @@ namespace { DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { - auto *F = DAG.getMachineFunction().getFunction(); - ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) || - F->hasFnAttribute(Attribute::MinSize); + ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); } /// Runs the dag combiner on all nodes in the work list @@ -606,6 +638,9 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, assert(Op.hasOneUse() && "Unknown reuse!"); assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); + + const SDNodeFlags *Flags = Op.getNode()->getFlags(); + switch (Op.getOpcode()) { default: llvm_unreachable("Unknown code"); case ISD::ConstantFP: { @@ -623,12 +658,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), - Op.getOperand(1)); + Op.getOperand(1), Flags); // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, Depth+1), - Op.getOperand(0)); + Op.getOperand(0), Flags); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. assert(Options.UnsafeFPMath); @@ -640,7 +675,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, // fold (fneg (fsub A, B)) -> (fsub B, A) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - Op.getOperand(1), Op.getOperand(0)); + Op.getOperand(1), Op.getOperand(0), Flags); case ISD::FMUL: case ISD::FDIV: @@ -652,13 +687,13 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), - Op.getOperand(1)); + Op.getOperand(1), Flags); // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), GetNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, Depth+1)); + LegalOperations, Depth+1), Flags); case ISD::FP_EXTEND: case ISD::FSIN: @@ -1216,9 +1251,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { LegalTypes = Level >= AfterLegalizeTypes; // Add all the dag nodes to the worklist. - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = DAG.allnodes_end(); I != E; ++I) - AddToWorklist(I); + for (SDNode &Node : DAG.allnodes()) + AddToWorklist(&Node); // Create a dummy node (which is not added to allnodes), that adds a reference // to the root node, preventing it from being deleted, and tracking any @@ -1333,16 +1367,18 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); - case ISD::SREM: return visitSREM(N); - case ISD::UREM: return visitUREM(N); + case ISD::SREM: + case ISD::UREM: return visitREM(N); case ISD::MULHU: return visitMULHU(N); case ISD::MULHS: return visitMULHS(N); case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); case ISD::SMULO: return visitSMULO(N); case ISD::UMULO: return visitUMULO(N); - case ISD::SDIVREM: return visitSDIVREM(N); - case ISD::UDIVREM: return visitUDIVREM(N); + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: return visitIMINMAX(N); case ISD::AND: return visitAND(N); case ISD::OR: return visitOR(N); case ISD::XOR: return visitXOR(N); @@ -1361,6 +1397,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::VSELECT: return visitVSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); case ISD::SETCC: return visitSETCC(N); + case ISD::SETCCE: return visitSETCCE(N); case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); @@ -1408,6 +1445,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::MSCATTER: return visitMSCATTER(N); case ISD::MSTORE: return visitMSTORE(N); case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); + case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); } return SDValue(); } @@ -1470,13 +1508,8 @@ SDValue DAGCombiner::combine(SDNode *N) { // Constant operands are canonicalized to RHS. if (isa(N0) || !isa(N1)) { SDValue Ops[] = {N1, N0}; - SDNode *CSENode; - if (const auto *BinNode = dyn_cast(N)) { - CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, - &BinNode->Flags); - } else { - CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); - } + SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, + N->getFlags()); if (CSENode) return SDValue(CSENode, 0); } @@ -1595,26 +1628,6 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } -static bool isNullConstant(SDValue V) { - ConstantSDNode *Const = dyn_cast(V); - return Const != nullptr && Const->isNullValue(); -} - -static bool isNullFPConstant(SDValue V) { - ConstantFPSDNode *Const = dyn_cast(V); - return Const != nullptr && Const->isZero() && !Const->isNegative(); -} - -static bool isAllOnesConstant(SDValue V) { - ConstantSDNode *Const = dyn_cast(V); - return Const != nullptr && Const->isAllOnesValue(); -} - -static bool isOneConstant(SDValue V) { - ConstantSDNode *Const = dyn_cast(V); - return Const != nullptr && Const->isOne(); -} - /// If \p N is a ContantSDNode with isOpaque() == false return it casted to a /// ContantSDNode pointer else nullptr. static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { @@ -1721,22 +1734,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return SDValue(N, 0); // fold (a+b) -> (a|b) iff a and b share no bits. - if (VT.isInteger() && !VT.isVector()) { - APInt LHSZero, LHSOne; - APInt RHSZero, RHSOne; - DAG.computeKnownBits(N0, LHSZero, LHSOne); - - if (LHSZero.getBoolValue()) { - DAG.computeKnownBits(N1, RHSZero, RHSOne); - - // If all possibly-set bits on the LHS are clear on the RHS, return an OR. - // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){ - if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); - } - } - } + if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && + VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && @@ -1971,31 +1971,26 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + SDLoc DL(N); // If the flag result is dead, turn this into an SUB. if (!N->hasAnyUseOfValue(1)) - return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1), - DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), - MVT::Glue)); + return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // fold (subc x, x) -> 0 + no borrow - if (N0 == N1) { - SDLoc DL(N); + if (N0 == N1) return CombineTo(N, DAG.getConstant(0, DL, VT), - DAG.getNode(ISD::CARRY_FALSE, DL, - MVT::Glue)); - } + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // fold (subc x, 0) -> x + no borrow if (isNullConstant(N1)) - return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), - MVT::Glue)); + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow if (isAllOnesConstant(N0)) - return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), - DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), - MVT::Glue)); + return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); return SDValue(); } @@ -2130,14 +2125,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) - if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && - (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || - isa(N0.getOperand(1)))) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, - DAG.getNode(ISD::MUL, SDLoc(N0), VT, - N0.getOperand(0), N1), - DAG.getNode(ISD::MUL, SDLoc(N1), VT, - N0.getOperand(1), N1)); + if (isConstantIntBuildVectorOrConstantInt(N1) && + N0.getOpcode() == ISD::ADD && + isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && + isMulAddWithConstProfitable(N, N0, N1)) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, + DAG.getNode(ISD::MUL, SDLoc(N0), VT, + N0.getOperand(0), N1), + DAG.getNode(ISD::MUL, SDLoc(N1), VT, + N0.getOperand(1), N1)); // reassociate mul if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) @@ -2146,6 +2142,88 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { return SDValue(); } +/// Return true if divmod libcall is available. +static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, + const TargetLowering &TLI) { + RTLIB::Libcall LC; + switch (Node->getSimpleValueType(0).SimpleTy) { + default: return false; // No libcall for vector types. + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; + } + + return TLI.getLibcallName(LC) != nullptr; +} + +/// Issue divrem if both quotient and remainder are needed. +SDValue DAGCombiner::useDivRem(SDNode *Node) { + if (Node->use_empty()) + return SDValue(); // This is a dead node, leave it alone. + + EVT VT = Node->getValueType(0); + if (!TLI.isTypeLegal(VT)) + return SDValue(); + + unsigned Opcode = Node->getOpcode(); + bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); + + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + // If DIVREM is going to get expanded into a libcall, + // but there is no libcall available, then don't combine. + if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && + !isDivRemLibcallAvailable(Node, isSigned, TLI)) + return SDValue(); + + // If div is legal, it's better to do the normal expansion + unsigned OtherOpcode = 0; + if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) { + OtherOpcode = isSigned ? ISD::SREM : ISD::UREM; + if (TLI.isOperationLegalOrCustom(Opcode, VT)) + return SDValue(); + } else { + OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV; + if (TLI.isOperationLegalOrCustom(OtherOpcode, VT)) + return SDValue(); + } + + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDValue combined; + for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node || User->use_empty()) + continue; + // Convert the other matching node(s), too; + // otherwise, the DIVREM may get target-legalized into something + // target-specific that we won't be able to recognize. + unsigned UserOpc = User->getOpcode(); + if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) && + User->getOperand(0) == Op0 && + User->getOperand(1) == Op1) { + if (!combined) { + if (UserOpc == OtherOpcode) { + SDVTList VTs = DAG.getVTList(VT, VT); + combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1); + } else if (UserOpc == DivRemOpc) { + combined = SDValue(User, 0); + } else { + assert(UserOpc == Opcode); + continue; + } + } + if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV) + CombineTo(User, combined); + else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM) + CombineTo(User, combined.getValue(1)); + } + } + return combined; +} + SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2156,26 +2234,26 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; + SDLoc DL(N); + // fold (sdiv c1, c2) -> c1/c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) - return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); // fold (sdiv X, 1) -> X if (N1C && N1C->isOne()) return N0; // fold (sdiv X, -1) -> 0-X - if (N1C && N1C->isAllOnesValue()) { - SDLoc DL(N); + if (N1C && N1C->isAllOnesValue()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); - } + // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 if (!VT.isVector()) { if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), - N0, N1); + return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); } // fold (sdiv X, pow2) -> simple ops after legalize @@ -2186,18 +2264,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { !cast(N)->Flags.hasExact() && (N1C->getAPIntValue().isPowerOf2() || (-N1C->getAPIntValue()).isPowerOf2())) { - // If dividing by powers of two is cheap, then don't perform the following - // fold. - if (TLI.isPow2SDivCheap()) - return SDValue(); - // Target-specific implementation of sdiv x, pow2. - SDValue Res = BuildSDIVPow2(N); - if (Res.getNode()) + if (SDValue Res = BuildSDIVPow2(N)) return Res; unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); - SDLoc DL(N); // Splat the sign bit into the register SDValue SGN = @@ -2228,15 +2299,23 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { } // If integer divide is expensive and we satisfy the requirements, emit an - // alternate sequence. - if (N1C && !TLI.isIntDivCheap()) { - SDValue Op = BuildSDIV(N); - if (Op.getNode()) return Op; - } + // alternate sequence. Targets may check function attributes for size/speed + // trade-offs. + AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue Op = BuildSDIV(N)) + return Op; + + // sdiv, srem -> sdivrem + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. + // Otherwise, we break the simplification logic in visitREM(). + if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue DivRem = useDivRem(N)) + return DivRem; // undef / X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // X / undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2254,26 +2333,26 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; + SDLoc DL(N); + // fold (udiv c1, c2) -> c1/c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C) - if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT, + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, N0C, N1C)) return Folded; // fold (udiv x, (1 << c)) -> x >>u c - if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) { - SDLoc DL(N); + if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(N1C->getAPIntValue().logBase2(), DL, getShiftAmountTy(N0.getValueType()))); - } + // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (N1.getOpcode() == ISD::SHL) { if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { EVT ADDVT = N1.getOperand(1).getValueType(); - SDLoc DL(N); SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), DAG.getConstant(SHC->getAPIntValue() @@ -2284,15 +2363,23 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { } } } + // fold (udiv x, c) -> alternate - if (N1C && !TLI.isIntDivCheap()) { - SDValue Op = BuildUDIV(N); - if (Op.getNode()) return Op; - } + AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue Op = BuildUDIV(N)) + return Op; + + // sdiv, srem -> sdivrem + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. + // Otherwise, we break the simplification logic in visitREM(). + if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue DivRem = useDivRem(N)) + return DivRem; // undef / X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // X / undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2300,102 +2387,83 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitSREM(SDNode *N) { +// handles ISD::SREM and ISD::UREM +SDValue DAGCombiner::visitREM(SDNode *N) { + unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); + bool isSigned = (Opcode == ISD::SREM); + SDLoc DL(N); - // fold (srem c1, c2) -> c1%c2 + // fold (rem c1, c2) -> c1%c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C) - if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT, - N0C, N1C)) + if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) return Folded; - // If we know the sign bits of both operands are zero, strength reduce to a - // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 - if (!VT.isVector()) { - if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1); - } - // If X/C can be simplified by the division-by-constant logic, lower - // X%C to the equivalent of X-X/C*C. - if (N1C && !N1C->isNullValue()) { - SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); - AddToWorklist(Div.getNode()); - SDValue OptimizedDiv = combine(Div.getNode()); - if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, - OptimizedDiv, N1); - SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); - AddToWorklist(Mul.getNode()); - return Sub; + if (isSigned) { + // If we know the sign bits of both operands are zero, strength reduce to a + // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 + if (!VT.isVector()) { + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UREM, DL, VT, N0, N1); } - } - - // undef % X -> 0 - if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, SDLoc(N), VT); - // X % undef -> undef - if (N1.getOpcode() == ISD::UNDEF) - return N1; - - return SDValue(); -} - -SDValue DAGCombiner::visitUREM(SDNode *N) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - EVT VT = N->getValueType(0); - - // fold (urem c1, c2) -> c1%c2 - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C) - if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT, - N0C, N1C)) - return Folded; - // fold (urem x, pow2) -> (and x, pow2-1) - if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && - N1C->getAPIntValue().isPowerOf2()) { - SDLoc DL(N); - return DAG.getNode(ISD::AND, DL, VT, N0, - DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); - } - // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) - if (N1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { - if (SHC->getAPIntValue().isPowerOf2()) { - SDLoc DL(N); - SDValue Add = - DAG.getNode(ISD::ADD, DL, VT, N1, + } else { + // fold (urem x, pow2) -> (and x, pow2-1) + if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && + N1C->getAPIntValue().isPowerOf2()) { + return DAG.getNode(ISD::AND, DL, VT, N0, + DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); + } + // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { + if (SHC->getAPIntValue().isPowerOf2()) { + SDValue Add = + DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT)); - AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::AND, DL, VT, N0, Add); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); + } } } } + AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. - if (N1C && !N1C->isNullValue()) { - SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); + // To avoid mangling nodes, this simplification requires that the combine() + // call for the speculative DIV must not cause a DIVREM conversion. We guard + // against this by skipping the simplification if isIntDivCheap(). When + // div is not cheap, combine will not return a DIVREM. Regardless, + // checking cheapness here makes sense since the simplification results in + // fatter code. + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) { + unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; + SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1); AddToWorklist(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, - OptimizedDiv, N1); - SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); + assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) && + (OptimizedDiv.getOpcode() != ISD::SDIVREM)); + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); AddToWorklist(Mul.getNode()); return Sub; } } + // sdiv, srem -> sdivrem + if (SDValue DivRem = useDivRem(N)) + return DivRem.getValue(1); + // undef % X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // X % undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2532,8 +2600,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, } SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { - SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); - if (Res.getNode()) return Res; + if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) + return Res; EVT VT = N->getValueType(0); SDLoc DL(N); @@ -2563,8 +2631,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { } SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { - SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); - if (Res.getNode()) return Res; + if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) + return Res; EVT VT = N->getValueType(0); SDLoc DL(N); @@ -2613,16 +2681,26 @@ SDValue DAGCombiner::visitUMULO(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitSDIVREM(SDNode *N) { - SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); - if (Res.getNode()) return Res; +SDValue DAGCombiner::visitIMINMAX(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); - return SDValue(); -} + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; -SDValue DAGCombiner::visitUDIVREM(SDNode *N) { - SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); - if (Res.getNode()) return Res; + // fold (add c1, c2) -> c1+c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); + if (N0C && N1C) + return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); + + // canonicalize constant to RHS + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); return SDValue(); } @@ -2848,10 +2926,13 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, if (Result != ISD::SETCC_INVALID && (!LegalOperations || (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && - TLI.isOperationLegal(ISD::SETCC, - getSetCCResultType(N0.getSimpleValueType()))))) - return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), - LL, LR, Result); + TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { + EVT CCVT = getSetCCResultType(LL.getValueType()); + if (N0.getValueType() == CCVT || + (!LegalOperations && N0.getValueType() == MVT::i1)) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } } } @@ -2887,6 +2968,46 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, return SDValue(); } +bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, + EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, + bool &NarrowLoad) { + uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits(); + + if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue())) + return false; + + ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + LoadedVT = LoadN->getMemoryVT(); + + if (ExtVT == LoadedVT && + (!LegalOperations || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) { + // ZEXTLOAD will match without needing to change the size of the value being + // loaded. + NarrowLoad = false; + return true; + } + + // Do not change the width of a volatile load. + if (LoadN->isVolatile()) + return false; + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound()) + return false; + + if (LegalOperations && + !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)) + return false; + + if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) + return false; + + NarrowLoad = true; + return true; +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3079,16 +3200,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) { : cast(N0); if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { - uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); - if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ - EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - EVT LoadedVT = LN0->getMemoryVT(); - EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; - - if (ExtVT == LoadedVT && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, - ExtVT))) { - + auto NarrowLoad = false; + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + EVT ExtVT, LoadedVT; + if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT, + NarrowLoad)) { + if (!NarrowLoad) { SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), LN0->getBasePtr(), ExtVT, @@ -3096,14 +3213,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { AddToWorklist(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - - // Do not change the width of a volatile load. - // Do not generate loads of non-round integer types since these can - // be expensive (and would be wrong if the type is not byte sized). - if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, - ExtVT))) { + } else { EVT PtrType = LN0->getOperand(1).getValueType(); unsigned Alignment = LN0->getAlignment(); @@ -3142,10 +3252,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return Combined; // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) + return Tmp; // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) // fold (and (sra)) -> (and (srl)) when possible. @@ -3507,10 +3616,13 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { if (Result != ISD::SETCC_INVALID && (!LegalOperations || (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && - TLI.isOperationLegal(ISD::SETCC, - getSetCCResultType(N0.getValueType()))))) - return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), - LL, LR, Result); + TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { + EVT CCVT = getSetCCResultType(LL.getValueType()); + if (N0.getValueType() == CCVT || + (!LegalOperations && N0.getValueType() == MVT::i1)) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } } } @@ -3665,11 +3777,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return Combined; // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) - SDValue BSwap = MatchBSwapHWord(N, N0, N1); - if (BSwap.getNode()) + if (SDValue BSwap = MatchBSwapHWord(N, N0, N1)) return BSwap; - BSwap = MatchBSwapHWordLow(N, N0, N1); - if (BSwap.getNode()) + if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) return BSwap; // reassociate or @@ -3690,10 +3800,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) + return Tmp; // See if this is some rotate idiom. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) @@ -3710,7 +3819,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { /// Match "(X shl/srl V1) & V2" where V2 may not be present. static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { if (Op.getOpcode() == ISD::AND) { - if (isa(Op.getOperand(1))) { + if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { Mask = Op.getOperand(1); Op = Op.getOperand(0); } else { @@ -3727,105 +3836,106 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { } // Return true if we can prove that, whenever Neg and Pos are both in the -// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that +// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: // // (or (shift1 X, Neg), (shift2 X, Pos)) // // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate -// in direction shift1 by Neg. The range [0, OpSize) means that we only need +// in direction shift1 by Neg. The range [0, EltSize) means that we only need // to consider shift amounts with defined behavior. -static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) { - // If OpSize is a power of 2 then: +static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { + // If EltSize is a power of 2 then: // - // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1) - // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize). + // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) + // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize). // - // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check + // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check // for the stronger condition: // - // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A] + // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A] // - // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1) + // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1) // we can just replace Neg with Neg' for the rest of the function. // // In other cases we check for the even stronger condition: // - // Neg == OpSize - Pos [B] + // Neg == EltSize - Pos [B] // // for all Neg and Pos. Note that the (or ...) then invokes undefined - // behavior if Pos == 0 (and consequently Neg == OpSize). + // behavior if Pos == 0 (and consequently Neg == EltSize). // - // We could actually use [A] whenever OpSize is a power of 2, but the + // We could actually use [A] whenever EltSize is a power of 2, but the // only extra cases that it would match are those uninteresting ones // where Neg and Pos are never in range at the same time. E.g. for - // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) + // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) // as well as (sub 32, Pos), but: // // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) // // always invokes undefined behavior for 32-bit X. // - // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise. + // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. unsigned MaskLoBits = 0; - if (Neg.getOpcode() == ISD::AND && - isPowerOf2_64(OpSize) && - Neg.getOperand(1).getOpcode() == ISD::Constant && - cast(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) { - Neg = Neg.getOperand(0); - MaskLoBits = Log2_64(OpSize); + if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { + if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { + if (NegC->getAPIntValue() == EltSize - 1) { + Neg = Neg.getOperand(0); + MaskLoBits = Log2_64(EltSize); + } + } } // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. if (Neg.getOpcode() != ISD::SUB) - return 0; - ConstantSDNode *NegC = dyn_cast(Neg.getOperand(0)); + return false; + ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); if (!NegC) - return 0; + return false; SDValue NegOp1 = Neg.getOperand(1); - // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with + // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with // Pos'. The truncation is redundant for the purpose of the equality. - if (MaskLoBits && - Pos.getOpcode() == ISD::AND && - Pos.getOperand(1).getOpcode() == ISD::Constant && - cast(Pos.getOperand(1))->getAPIntValue() == OpSize - 1) - Pos = Pos.getOperand(0); + if (MaskLoBits && Pos.getOpcode() == ISD::AND) + if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) + if (PosC->getAPIntValue() == EltSize - 1) + Pos = Pos.getOperand(0); // The condition we need is now: // - // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask + // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask // // If NegOp1 == Pos then we need: // - // OpSize & Mask == NegC & Mask + // EltSize & Mask == NegC & Mask // // (because "x & Mask" is a truncation and distributes through subtraction). APInt Width; if (Pos == NegOp1) Width = NegC->getAPIntValue(); + // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. // Then the condition we want to prove becomes: // - // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask + // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask // // which, again because "x & Mask" is a truncation, becomes: // - // NegC & Mask == (OpSize - PosC) & Mask - // OpSize & Mask == (NegC + PosC) & Mask - else if (Pos.getOpcode() == ISD::ADD && - Pos.getOperand(0) == NegOp1 && - Pos.getOperand(1).getOpcode() == ISD::Constant) - Width = (cast(Pos.getOperand(1))->getAPIntValue() + - NegC->getAPIntValue()); - else + // NegC & Mask == (EltSize - PosC) & Mask + // EltSize & Mask == (NegC + PosC) & Mask + else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) { + if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) + Width = PosC->getAPIntValue() + NegC->getAPIntValue(); + else + return false; + } else return false; - // Now we just need to check that OpSize & Mask == Width & Mask. + // Now we just need to check that EltSize & Mask == Width & Mask. if (MaskLoBits) - // Opsize & Mask is 0 since Mask is Opsize - 1. + // EltSize & Mask is 0 since Mask is EltSize - 1. return Width.getLoBits(MaskLoBits) == 0; - return Width == OpSize; + return Width == EltSize; } // A subroutine of MatchRotate used once we have found an OR of two opposite @@ -3845,7 +3955,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, // (srl x, (*ext y))) -> // (rotr x, y) or (rotl x, (sub 32, y)) EVT VT = Shifted.getValueType(); - if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) { + if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, HasPos ? Pos : Neg).getNode(); @@ -3888,10 +3998,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { if (RHSShift.getOpcode() == ISD::SHL) { std::swap(LHS, RHS); std::swap(LHSShift, RHSShift); - std::swap(LHSMask , RHSMask ); + std::swap(LHSMask, RHSMask); } - unsigned OpSizeInBits = VT.getSizeInBits(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); SDValue LHSShiftArg = LHSShift.getOperand(0); SDValue LHSShiftAmt = LHSShift.getOperand(1); SDValue RHSShiftArg = RHSShift.getOperand(0); @@ -3899,11 +4009,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) - if (LHSShiftAmt.getOpcode() == ISD::Constant && - RHSShiftAmt.getOpcode() == ISD::Constant) { - uint64_t LShVal = cast(LHSShiftAmt)->getZExtValue(); - uint64_t RShVal = cast(RHSShiftAmt)->getZExtValue(); - if ((LShVal + RShVal) != OpSizeInBits) + if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) { + uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue(); + uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue(); + if ((LShVal + RShVal) != EltSizeInBits) return nullptr; SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, @@ -3911,18 +4020,23 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { - APInt Mask = APInt::getAllOnesValue(OpSizeInBits); + APInt AllBits = APInt::getAllOnesValue(EltSizeInBits); + SDValue Mask = DAG.getConstant(AllBits, DL, VT); if (LHSMask.getNode()) { - APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); - Mask &= cast(LHSMask)->getAPIntValue() | RHSBits; + APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal); + Mask = DAG.getNode(ISD::AND, DL, VT, Mask, + DAG.getNode(ISD::OR, DL, VT, LHSMask, + DAG.getConstant(RHSBits, DL, VT))); } if (RHSMask.getNode()) { - APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); - Mask &= cast(RHSMask)->getAPIntValue() | LHSBits; + APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal); + Mask = DAG.getNode(ISD::AND, DL, VT, Mask, + DAG.getNode(ISD::OR, DL, VT, RHSMask, + DAG.getConstant(LHSBits, DL, VT))); } - Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT)); + Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); } return Rot.getNode(); @@ -4112,10 +4226,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) + return Tmp; // Simplify the expression using non-local knowledge. if (!VT.isVector() && @@ -4434,12 +4547,19 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); } - if (N1C && !N1C->isOpaque()) { - SDValue NewSHL = visitShiftByConstant(N, N1C); - if (NewSHL.getNode()) - return NewSHL; + // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) + if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + if (SDValue Folded = + DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C)) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded); + } } + if (N1C && !N1C->isOpaque()) + if (SDValue NewSHL = visitShiftByConstant(N, N1C)) + return NewSHL; + return SDValue(); } @@ -4583,11 +4703,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); - if (N1C && !N1C->isOpaque()) { - SDValue NewSRA = visitShiftByConstant(N, N1C); - if (NewSRA.getNode()) + if (N1C && !N1C->isOpaque()) + if (SDValue NewSRA = visitShiftByConstant(N, N1C)) return NewSRA; - } return SDValue(); } @@ -4744,8 +4862,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { - SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); - if (NewOp1.getNode()) + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); } @@ -4754,15 +4871,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - if (N1C && !N1C->isOpaque()) { - SDValue NewSRL = visitShiftByConstant(N, N1C); - if (NewSRL.getNode()) + if (N1C && !N1C->isOpaque()) + if (SDValue NewSRL = visitShiftByConstant(N, N1C)) return NewSRL; - } // Attempt to convert a srl of a load into a narrower zero-extending load. - SDValue NarrowLoad = ReduceLoadWidth(N); - if (NarrowLoad.getNode()) + if (SDValue NarrowLoad = ReduceLoadWidth(N)) return NarrowLoad; // Here is a common situation. We want to optimize: @@ -4973,6 +5087,79 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (SimplifySelectOps(N, N1, N2)) return SDValue(N, 0); // Don't revisit N. + if (VT0 == MVT::i1) { + // The code in this block deals with the following 2 equivalences: + // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y)) + // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y) + // The target can specify its prefered form with the + // shouldNormalizeToSelectSequence() callback. However we always transform + // to the right anyway if we find the inner select exists in the DAG anyway + // and we always transform to the left side if we know that we can further + // optimize the combination of the conditions. + bool normalizeToSequence + = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); + // select (and Cond0, Cond1), X, Y + // -> select Cond0, (select Cond1, X, Y), Y + if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + if (normalizeToSequence || !InnerSelect.use_empty()) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, + InnerSelect, N2); + } + // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) + if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + if (normalizeToSequence || !InnerSelect.use_empty()) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, + InnerSelect); + } + + // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y + if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) { + SDValue N1_0 = N1->getOperand(0); + SDValue N1_1 = N1->getOperand(1); + SDValue N1_2 = N1->getOperand(2); + if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { + // Create the actual and node if we can generate good code for it. + if (!normalizeToSequence) { + SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), + N0, N1_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, + N1_1, N2); + } + // Otherwise see if we can optimize the "and" to a better pattern. + if (SDValue Combined = visitANDLike(N0, N1_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1_1, N2); + } + } + // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y + if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) { + SDValue N2_0 = N2->getOperand(0); + SDValue N2_1 = N2->getOperand(1); + SDValue N2_2 = N2->getOperand(2); + if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { + // Create the actual or node if we can generate good code for it. + if (!normalizeToSequence) { + SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), + N0, N2_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, + N1, N2_2); + } + // Otherwise see if we can optimize to a better pattern. + if (SDValue Combined = visitORLike(N0, N2_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1, N2_2); + } + } + } + // fold selects based on a setcc into other things, such as min/max/abs if (N0.getOpcode() == ISD::SETCC) { // select x, y (fcmp lt x, y) -> fminnum x, y @@ -4990,10 +5177,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { ISD::CondCode CC = cast(N0.getOperand(2))->get(); - SDValue FMinMax = - combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), - N1, N2, CC, TLI, DAG); - if (FMinMax) + if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), + N0.getOperand(1), N1, N2, CC, + TLI, DAG)) return FMinMax; } @@ -5006,69 +5192,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SimplifySelect(SDLoc(N), N0, N1, N2); } - if (VT0 == MVT::i1) { - if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { - // select (and Cond0, Cond1), X, Y - // -> select Cond0, (select Cond1, X, Y), Y - if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { - SDValue Cond0 = N0->getOperand(0); - SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), - N1.getValueType(), Cond1, N1, N2); - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, - InnerSelect, N2); - } - // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) - if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { - SDValue Cond0 = N0->getOperand(0); - SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), - N1.getValueType(), Cond1, N1, N2); - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, - InnerSelect); - } - } - - // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y - if (N1->getOpcode() == ISD::SELECT) { - SDValue N1_0 = N1->getOperand(0); - SDValue N1_1 = N1->getOperand(1); - SDValue N1_2 = N1->getOperand(2); - if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { - // Create the actual and node if we can generate good code for it. - if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { - SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), - N0, N1_0); - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, - N1_1, N2); - } - // Otherwise see if we can optimize the "and" to a better pattern. - if (SDValue Combined = visitANDLike(N0, N1_0, N)) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, - N1_1, N2); - } - } - // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y - if (N2->getOpcode() == ISD::SELECT) { - SDValue N2_0 = N2->getOperand(0); - SDValue N2_1 = N2->getOperand(1); - SDValue N2_2 = N2->getOperand(2); - if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { - // Create the actual or node if we can generate good code for it. - if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { - SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), - N0, N2_0); - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, - N1, N2_2); - } - // Otherwise see if we can optimize to a better pattern. - if (SDValue Combined = visitORLike(N0, N2_0, N)) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, - N1, N2_2); - } - } - } - return SDValue(); } @@ -5523,8 +5646,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (N1.getOpcode() == ISD::CONCAT_VECTORS && N2.getOpcode() == ISD::CONCAT_VECTORS && ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { - SDValue CV = ConvertSelectToConcatVector(N, DAG); - if (CV.getNode()) + if (SDValue CV = ConvertSelectToConcatVector(N, DAG)) return CV; } @@ -5580,7 +5702,20 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { SDLoc(N)); } -/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or +SDValue DAGCombiner::visitSETCCE(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + + // If Carry is false, fold to a regular SETCC. + if (Carry.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); + + return SDValue(); +} + +/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or /// a build_vector of constants. /// This function is called by the DAGCombiner when visiting sext/zext/aext /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). @@ -5837,8 +5972,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::TRUNCATE) { // fold (sext (truncate (load x))) -> (sext (smaller load x)) // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) - SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); - if (NarrowLoad.getNode()) { + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { SDNode* oye = N0.getNode()->getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); @@ -6024,7 +6158,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (!VT.isVector()) { EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); - if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) { + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) { SDLoc DL(N); ISD::CondCode CC = cast(N0.getOperand(2))->get(); SDValue SetCC = DAG.getSetCC(DL, SetCCVT, @@ -6120,8 +6255,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { - SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); - if (NarrowLoad.getNode()) { + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { SDNode* oye = N0.getNode()->getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); @@ -6133,32 +6267,45 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } // fold (zext (truncate x)) -> (and x, mask) - if (N0.getOpcode() == ISD::TRUNCATE && - (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { - + if (N0.getOpcode() == ISD::TRUNCATE) { // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) - SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); - if (NarrowLoad.getNode()) { - SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { + SDNode *oye = N0.getNode()->getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorklist(oye); } - return SDValue(N, 0); // Return N so it doesn't get rechecked! + return SDValue(N, 0); // Return N so it doesn't get rechecked! } - SDValue Op = N0.getOperand(0); - if (Op.getValueType().bitsLT(VT)) { - Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); - AddToWorklist(Op.getNode()); - } else if (Op.getValueType().bitsGT(VT)) { - Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); - AddToWorklist(Op.getNode()); + EVT SrcVT = N0.getOperand(0).getValueType(); + EVT MinVT = N0.getValueType(); + + // Try to mask before the extension to avoid having to generate a larger mask, + // possibly over several sub-vectors. + if (SrcVT.bitsLT(VT)) { + if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) && + TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) { + SDValue Op = N0.getOperand(0); + Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); + AddToWorklist(Op.getNode()); + return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); + } + } + + if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { + SDValue Op = N0.getOperand(0); + if (SrcVT.bitsLT(VT)) { + Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); + AddToWorklist(Op.getNode()); + } else if (SrcVT.bitsGT(VT)) { + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); + AddToWorklist(Op.getNode()); + } + return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); } - return DAG.getZeroExtendInReg(Op, SDLoc(N), - N0.getValueType().getScalarType()); } // Fold (zext (and (trunc x), cst)) -> (and x, cst), @@ -6219,6 +6366,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (and/or/xor (load x), cst)) -> // (and/or/xor (zextload x), (zext cst)) + // Unless (and (load x) cst) will match as a zextload already and has + // additional users. if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::XOR) && isa(N0.getOperand(0)) && @@ -6229,9 +6378,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { bool DoXform = true; SmallVector SetCCs; - if (!N0.hasOneUse()) - DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, - SetCCs, TLI); + if (!N0.hasOneUse()) { + if (N0.getOpcode() == ISD::AND) { + auto *AndC = cast(N0.getOperand(1)); + auto NarrowLoad = false; + EVT LoadResultTy = AndC->getValueType(0); + EVT ExtVT, LoadedVT; + if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT, + NarrowLoad)) + DoXform = false; + } + if (DoXform) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), + ISD::ZERO_EXTEND, SetCCs, TLI); + } if (DoXform) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), @@ -6378,8 +6538,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext (truncate (load x))) -> (aext (smaller load x)) // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { - SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); - if (NarrowLoad.getNode()) { + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { SDNode* oye = N0.getNode()->getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); @@ -6546,8 +6705,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { // Watch out for shift count overflow though. if (Amt >= Mask.getBitWidth()) break; APInt NewMask = Mask << Amt; - SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); - if (SimplifyLHS.getNode()) + if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask)) return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, V.getOperand(1)); } @@ -6736,8 +6894,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { unsigned VTBits = VT.getScalarType().getSizeInBits(); unsigned EVTBits = EVT.getScalarType().getSizeInBits(); + if (N0.isUndef()) + return DAG.getUNDEF(VT); + // fold (sext_in_reg c1) -> c1 - if (isa(N0) || N0.getOpcode() == ISD::UNDEF) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. @@ -6771,8 +6932,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_in_reg (load x)) -> (smaller sextload x) // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) - SDValue NarrowLoad = ReduceLoadWidth(N); - if (NarrowLoad.getNode()) + if (SDValue NarrowLoad = ReduceLoadWidth(N)) return NarrowLoad; // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) @@ -6831,29 +6991,6 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { BSwap, N1); } - // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs - // into a build_vector. - if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { - SmallVector Elts; - unsigned NumElts = N0->getNumOperands(); - unsigned ShAmt = VTBits - EVTBits; - - for (unsigned i = 0; i != NumElts; ++i) { - SDValue Op = N0->getOperand(i); - if (Op->getOpcode() == ISD::UNDEF) { - Elts.push_back(Op); - continue; - } - - ConstantSDNode *CurrentND = cast(Op); - const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); - Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), - SDLoc(Op), Op.getValueType())); - } - - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts); - } - return SDValue(); } @@ -6999,9 +7136,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { - SDValue Reduced = ReduceLoadWidth(N); - if (Reduced.getNode()) + if (SDValue Reduced = ReduceLoadWidth(N)) return Reduced; + // Handle the case where the load remains an extending load even // after truncation. if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { @@ -7107,6 +7244,12 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { return SDValue(); } +static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { + // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi + // and Lo parts; on big-endian machines it doesn't. + return DAG.getDataLayout().isBigEndian() ? 1 : 0; +} + SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7173,6 +7316,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) + // + // For ppc_fp128: + // fold (bitcast (fneg x)) -> + // flipbit = signbit + // (xor (bitcast x) (build_pair flipbit, flipbit)) + // fold (bitcast (fabs x)) -> + // flipbit = (and (extract_element (bitcast x), 0), signbit) + // (xor (bitcast x) (build_pair flipbit, flipbit)) // This often reduces constant pool loads. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && @@ -7183,6 +7334,29 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { AddToWorklist(NewConv.getNode()); SDLoc DL(N); + if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { + assert(VT.getSizeInBits() == 128); + SDValue SignBit = DAG.getConstant( + APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); + SDValue FlipBit; + if (N0.getOpcode() == ISD::FNEG) { + FlipBit = SignBit; + AddToWorklist(FlipBit.getNode()); + } else { + assert(N0.getOpcode() == ISD::FABS); + SDValue Hi = + DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv, + DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), + SDLoc(NewConv))); + AddToWorklist(Hi.getNode()); + FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit); + AddToWorklist(FlipBit.getNode()); + } + SDValue FlipBits = + DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); + AddToWorklist(FlipBits.getNode()); + return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); + } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) return DAG.getNode(ISD::XOR, DL, VT, @@ -7196,6 +7370,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // (or (and (bitconvert x), sign), (and cst, (not sign))) // Note that we don't handle (copysign x, cst) because this can always be // folded to an fneg or fabs. + // + // For ppc_fp128: + // fold (bitcast (fcopysign cst, x)) -> + // flipbit = (and (extract_element + // (xor (bitcast cst), (bitcast x)), 0), + // signbit) + // (xor (bitcast cst) (build_pair flipbit, flipbit)) if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && isa(N0.getOperand(0)) && VT.isInteger() && !VT.isVector()) { @@ -7224,6 +7405,30 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { AddToWorklist(X.getNode()); } + if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { + APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2); + SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT, + N0.getOperand(0)); + AddToWorklist(Cst.getNode()); + SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT, + N0.getOperand(1)); + AddToWorklist(X.getNode()); + SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); + AddToWorklist(XorResult.getNode()); + SDValue XorResult64 = DAG.getNode( + ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult, + DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), + SDLoc(XorResult))); + AddToWorklist(XorResult64.getNode()); + SDValue FlipBit = + DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64, + DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64)); + AddToWorklist(FlipBit.getNode()); + SDValue FlipBits = + DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); + AddToWorklist(FlipBits.getNode()); + return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); + } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, SDLoc(X), VT)); @@ -7240,11 +7445,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { } // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. - if (N0.getOpcode() == ISD::BUILD_PAIR) { - SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); - if (CombineLD.getNode()) + if (N0.getOpcode() == ISD::BUILD_PAIR) + if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT)) return CombineLD; - } // Remove double bitcasts from shuffles - this is often a legacy of // XformToShuffleWithZero being used to combine bitmaskings (of @@ -7257,10 +7460,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { ShuffleVectorSDNode *SVN = cast(N0); // If operands are a bitcast, peek through if it casts the original VT. - // If operands are a UNDEF or constant, just bitcast back to original VT. + // If operands are a constant, just bitcast back to original VT. auto PeekThroughBitcast = [&](SDValue Op) { if (Op.getOpcode() == ISD::BITCAST && - Op.getOperand(0)->getValueType(0) == VT) + Op.getOperand(0).getValueType() == VT) return SDValue(Op.getOperand(0)); if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) @@ -7431,28 +7634,34 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { SDLoc SL(N); const TargetOptions &Options = DAG.getTarget().Options; - bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath); + bool AllowFusion = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && - TLI.isOperationLegal(ISD::FMAD, VT)); + bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); // Floating-point multiply-add without intermediate rounding. - bool HasFMA = ((!LegalOperations || - TLI.isOperationLegalOrCustom(ISD::FMA, VT)) && - TLI.isFMAFasterThanFMulAndFAdd(VT) && - UnsafeFPMath); + bool HasFMA = + AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) return SDValue(); // Always prefer FMAD to FMA for precision. - unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); bool LookThroughFPExt = TLI.isFPExtFree(VT); + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && N0.getOpcode() == ISD::FMUL && + N1.getOpcode() == ISD::FMUL) { + if (N0.getNode()->use_size() > N1.getNode()->use_size()) + std::swap(N0, N1); + } + // fold (fadd (fmul x, y), z) -> (fma x, y, z) if (N0.getOpcode() == ISD::FMUL && (Aggressive || N0->hasOneUse())) { @@ -7469,7 +7678,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // Look through FP_EXTEND nodes to do more combining. - if (UnsafeFPMath && LookThroughFPExt) { + if (AllowFusion && LookThroughFPExt) { // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); @@ -7495,7 +7704,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // More folding opportunities when target permits. - if ((UnsafeFPMath || HasFMAD) && Aggressive) { + if ((AllowFusion || HasFMAD) && Aggressive) { // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) if (N0.getOpcode() == PreferredFusedOpcode && N0.getOperand(2).getOpcode() == ISD::FMUL) { @@ -7518,7 +7727,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { N0)); } - if (UnsafeFPMath && LookThroughFPExt) { + if (AllowFusion && LookThroughFPExt) { // fold (fadd (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y, (fma (fpext u), (fpext v), z)) auto FoldFAddFMAFPExtFMul = [&] ( @@ -7608,25 +7817,23 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { SDLoc SL(N); const TargetOptions &Options = DAG.getTarget().Options; - bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath); + bool AllowFusion = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && - TLI.isOperationLegal(ISD::FMAD, VT)); + bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); // Floating-point multiply-add without intermediate rounding. - bool HasFMA = ((!LegalOperations || - TLI.isOperationLegalOrCustom(ISD::FMA, VT)) && - TLI.isFMAFasterThanFMulAndFAdd(VT) && - UnsafeFPMath); + bool HasFMA = + AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) return SDValue(); // Always prefer FMAD to FMA for precision. - unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); bool LookThroughFPExt = TLI.isFPExtFree(VT); @@ -7659,7 +7866,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } // Look through FP_EXTEND nodes to do more combining. - if (UnsafeFPMath && LookThroughFPExt) { + if (AllowFusion && LookThroughFPExt) { // fold (fsub (fpext (fmul x, y)), z) // -> (fma (fpext x), (fpext y), (fneg z)) if (N0.getOpcode() == ISD::FP_EXTEND) { @@ -7735,7 +7942,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } // More folding opportunities when target permits. - if ((UnsafeFPMath || HasFMAD) && Aggressive) { + if ((AllowFusion || HasFMAD) && Aggressive) { // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) if (N0.getOpcode() == PreferredFusedOpcode && @@ -7765,7 +7972,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N21, N0)); } - if (UnsafeFPMath && LookThroughFPExt) { + if (AllowFusion && LookThroughFPExt) { // fold (fsub (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) if (N0.getOpcode() == PreferredFusedOpcode) { @@ -7866,14 +8073,97 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { return SDValue(); } +/// Try to perform FMA combining on a given FMUL node. +SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc SL(N); + + assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); + + const TargetOptions &Options = DAG.getTarget().Options; + bool AllowFusion = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); + + // Floating-point multiply-add with intermediate rounding. + bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = + AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); + + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return SDValue(); + + // Always prefer FMAD to FMA for precision. + unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + + // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y) + // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y)) + auto FuseFADD = [&](SDValue X, SDValue Y) { + if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) { + auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); + if (XC1 && XC1->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); + if (XC1 && XC1->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y)); + } + return SDValue(); + }; + + if (SDValue FMA = FuseFADD(N0, N1)) + return FMA; + if (SDValue FMA = FuseFADD(N1, N0)) + return FMA; + + // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y) + // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y)) + // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y)) + // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y) + auto FuseFSUB = [&](SDValue X, SDValue Y) { + if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) { + auto XC0 = isConstOrConstSplatFP(X.getOperand(0)); + if (XC0 && XC0->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, + Y); + if (XC0 && XC0->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y)); + + auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); + if (XC1 && XC1->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y)); + if (XC1 && XC1->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); + } + return SDValue(); + }; + + if (SDValue FMA = FuseFSUB(N0, N1)) + return FMA; + if (SDValue FMA = FuseFSUB(N1, N0)) + return FMA; + + return SDValue(); +} + SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast(N0); - ConstantFPSDNode *N1CFP = dyn_cast(N1); + bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); + bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags *Flags = &cast(N)->Flags; // fold vector ops if (VT.isVector()) @@ -7882,23 +8172,23 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FADD, DL, VT, N0, N1); + return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags); // canonicalize constant to RHS if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FADD, DL, VT, N1, N0); + return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags); // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) return DAG.getNode(ISD::FSUB, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations)); + GetNegatedExpression(N1, DAG, LegalOperations), Flags); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) return DAG.getNode(ISD::FSUB, DL, VT, N1, - GetNegatedExpression(N0, DAG, LegalOperations)); + GetNegatedExpression(N0, DAG, LegalOperations), Flags); // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { @@ -7907,14 +8197,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { bool AllowNewConst = (Level < AfterLegalizeDAG); // fold (fadd A, 0) -> A - if (N1CFP && N1CFP->isZero()) - return N0; + if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) + if (N1C->isZero()) + return N0; // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && - isa(N0.getOperand(1))) + isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), - DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1)); + DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, + Flags), + Flags); // If allowed, fold (fadd (fneg x), x) -> 0.0 if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) @@ -7929,64 +8222,64 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // of rounding steps. if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { if (N0.getOpcode() == ISD::FMUL) { - ConstantFPSDNode *CFP00 = dyn_cast(N0.getOperand(0)); - ConstantFPSDNode *CFP01 = dyn_cast(N0.getOperand(1)); + bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), - DAG.getConstantFP(1.0, DL, VT)); - return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), + DAG.getConstantFP(1.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags); } // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), - DAG.getConstantFP(2.0, DL, VT)); - return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), + DAG.getConstantFP(2.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags); } } if (N1.getOpcode() == ISD::FMUL) { - ConstantFPSDNode *CFP10 = dyn_cast(N1.getOperand(0)); - ConstantFPSDNode *CFP11 = dyn_cast(N1.getOperand(1)); + bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), - DAG.getConstantFP(1.0, DL, VT)); - return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), + DAG.getConstantFP(1.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags); } // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N0.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), - DAG.getConstantFP(2.0, DL, VT)); - return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), + DAG.getConstantFP(2.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags); } } if (N0.getOpcode() == ISD::FADD && AllowNewConst) { - ConstantFPSDNode *CFP = dyn_cast(N0.getOperand(0)); + bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) - if (!CFP && N0.getOperand(0) == N0.getOperand(1) && + if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && (N0.getOperand(0) == N1)) { return DAG.getNode(ISD::FMUL, DL, VT, - N1, DAG.getConstantFP(3.0, DL, VT)); + N1, DAG.getConstantFP(3.0, DL, VT), Flags); } } if (N1.getOpcode() == ISD::FADD && AllowNewConst) { - ConstantFPSDNode *CFP10 = dyn_cast(N1.getOperand(0)); + bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && N1.getOperand(0) == N0) { return DAG.getNode(ISD::FMUL, DL, VT, - N0, DAG.getConstantFP(3.0, DL, VT)); + N0, DAG.getConstantFP(3.0, DL, VT), Flags); } } @@ -7996,15 +8289,14 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { - return DAG.getNode(ISD::FMUL, DL, VT, - N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), + DAG.getConstantFP(4.0, DL, VT), Flags); } } } // enable-unsafe-fp-math // FADD -> FMA combines: - SDValue Fused = visitFADDForFMACombine(N); - if (Fused) { + if (SDValue Fused = visitFADDForFMACombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } @@ -8020,6 +8312,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { EVT VT = N->getValueType(0); SDLoc dl(N); const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags *Flags = &cast(N)->Flags; // fold vector ops if (VT.isVector()) @@ -8028,12 +8321,12 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, dl, VT, N0, N1); + return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags); // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return DAG.getNode(ISD::FADD, dl, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations)); + GetNegatedExpression(N1, DAG, LegalOperations), Flags); // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { @@ -8068,8 +8361,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // FSUB -> FMA combines: - SDValue Fused = visitFSUBForFMACombine(N); - if (Fused) { + if (SDValue Fused = visitFSUBForFMACombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } @@ -8085,6 +8377,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags *Flags = &cast(N)->Flags; // fold vector ops if (VT.isVector()) { @@ -8095,12 +8388,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold (fmul c1, c2) -> c1*c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FMUL, DL, VT, N0, N1); + return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags); // canonicalize constant to RHS if (isConstantFPBuildVectorOrConstantFP(N0) && !isConstantFPBuildVectorOrConstantFP(N1)) - return DAG.getNode(ISD::FMUL, DL, VT, N1, N0); + return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); // fold (fmul A, 1.0) -> A if (N1CFP && N1CFP->isExactlyValue(1.0)) @@ -8129,8 +8422,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // the second operand of the outer multiply are constants. if ((N1CFP && isConstOrConstSplatFP(N01)) || (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { - SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1); - return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts); + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags); } } } @@ -8139,16 +8432,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs // during an early run of DAGCombiner can prevent folding with fmuls // inserted during lowering. - if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { + if (N0.getOpcode() == ISD::FADD && + (N0.getOperand(0) == N0.getOperand(1)) && + N0.hasOneUse()) { const SDValue Two = DAG.getConstantFP(2.0, DL, VT); - SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1); - return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts); + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags); } } // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) - return DAG.getNode(ISD::FADD, DL, VT, N0, N0); + return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags); // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) @@ -8163,10 +8458,17 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (LHSNeg == 2 || RHSNeg == 2) return DAG.getNode(ISD::FMUL, DL, VT, GetNegatedExpression(N0, DAG, LegalOperations), - GetNegatedExpression(N1, DAG, LegalOperations)); + GetNegatedExpression(N1, DAG, LegalOperations), + Flags); } } + // FMUL -> FMA combines: + if (SDValue Fused = visitFMULForFMACombine(N)) { + AddToWorklist(Fused.getNode()); + return Fused; + } + return SDValue(); } @@ -8193,66 +8495,145 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N1CFP && N1CFP->isZero()) return N2; } + // TODO: The FMA node should have flags that propagate to these nodes. if (N0CFP && N0CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); if (N1CFP && N1CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); // Canonicalize (fma c, x, y) -> (fma x, c, y) - if (N0CFP && !N1CFP) + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); - // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) - if (Options.UnsafeFPMath && N1CFP && - N2.getOpcode() == ISD::FMUL && - N0 == N2.getOperand(0) && - N2.getOperand(1).getOpcode() == ISD::ConstantFP) { - return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); - } + // TODO: FMA nodes should have flags that propagate to the created nodes. + // For now, create a Flags object for use with all unsafe math transforms. + SDNodeFlags Flags; + Flags.setUnsafeAlgebra(true); + if (Options.UnsafeFPMath) { + // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) + if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && + isConstantFPBuildVectorOrConstantFP(N1) && + isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1), + &Flags), &Flags); + } - // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) - if (Options.UnsafeFPMath && - N0.getOpcode() == ISD::FMUL && N1CFP && - N0.getOperand(1).getOpcode() == ISD::ConstantFP) { - return DAG.getNode(ISD::FMA, dl, VT, - N0.getOperand(0), - DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), - N2); + // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) + if (N0.getOpcode() == ISD::FMUL && + isConstantFPBuildVectorOrConstantFP(N1) && + isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { + return DAG.getNode(ISD::FMA, dl, VT, + N0.getOperand(0), + DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1), + &Flags), + N2); + } } // (fma x, 1, y) -> (fadd x, y) // (fma x, -1, y) -> (fadd (fneg x), y) if (N1CFP) { if (N1CFP->isExactlyValue(1.0)) + // TODO: The FMA node should have flags that propagate to this node. return DAG.getNode(ISD::FADD, dl, VT, N0, N2); if (N1CFP->isExactlyValue(-1.0) && (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); AddToWorklist(RHSNeg.getNode()); + // TODO: The FMA node should have flags that propagate to this node. return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); } } - // (fma x, c, x) -> (fmul x, (c+1)) - if (Options.UnsafeFPMath && N1CFP && N0 == N2) + if (Options.UnsafeFPMath) { + // (fma x, c, x) -> (fmul x, (c+1)) + if (N1CFP && N0 == N2) { return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(1.0, dl, VT))); - - // (fma x, c, (fneg x)) -> (fmul x, (c-1)) - if (Options.UnsafeFPMath && N1CFP && - N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) - return DAG.getNode(ISD::FMUL, dl, VT, N0, - DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(-1.0, dl, VT))); + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(1.0, dl, VT), + &Flags), &Flags); + } + // (fma x, c, (fneg x)) -> (fmul x, (c-1)) + if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(-1.0, dl, VT), + &Flags), &Flags); + } + } return SDValue(); } +// Combine multiple FDIVs with the same divisor into multiple FMULs by the +// reciprocal. +// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) +// Notice that this is not always beneficial. One reason is different target +// may have different costs for FDIV and FMUL, so sometimes the cost of two +// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason +// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". +SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { + bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; + const SDNodeFlags *Flags = N->getFlags(); + if (!UnsafeMath && !Flags->hasAllowReciprocal()) + return SDValue(); + + // Skip if current node is a reciprocal. + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + if (N0CFP && N0CFP->isExactlyValue(1.0)) + return SDValue(); + + // Exit early if the target does not want this transform or if there can't + // possibly be enough uses of the divisor to make the transform worthwhile. + SDValue N1 = N->getOperand(1); + unsigned MinUses = TLI.combineRepeatedFPDivisors(); + if (!MinUses || N1->use_size() < MinUses) + return SDValue(); + + // Find all FDIV users of the same divisor. + // Use a set because duplicates may be present in the user list. + SetVector Users; + for (auto *U : N1->uses()) { + if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) { + // This division is eligible for optimization only if global unsafe math + // is enabled or if this division allows reciprocal formation. + if (UnsafeMath || U->getFlags()->hasAllowReciprocal()) + Users.insert(U); + } + } + + // Now that we have the actual number of divisor uses, make sure it meets + // the minimum threshold specified by the target. + if (Users.size() < MinUses) + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); + SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags); + + // Dividend / Divisor -> Dividend * Reciprocal + for (auto *U : Users) { + SDValue Dividend = U->getOperand(0); + if (Dividend != FPOne) { + SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, + Reciprocal, Flags); + CombineTo(U, NewNode); + } else if (U != Reciprocal.getNode()) { + // In the absence of fast-math-flags, this user node is always the + // same node as Reciprocal, but with FMF they may be different nodes. + CombineTo(U, Reciprocal); + } + } + return SDValue(N, 0); // N was replaced. +} + SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8261,6 +8642,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; + SDNodeFlags *Flags = &cast(N)->Flags; // fold vector ops if (VT.isVector()) @@ -8269,7 +8651,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // fold (fdiv c1, c2) -> c1/c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags); if (Options.UnsafeFPMath) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. @@ -8288,28 +8670,30 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT))) return DAG.getNode(ISD::FMUL, DL, VT, N0, - DAG.getConstantFP(Recip, DL, VT)); + DAG.getConstantFP(Recip, DL, VT), Flags); } // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) { - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) { + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), + Flags)) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), + Flags)) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FMUL) { // Look through an FMUL. Even though this won't remove the FDIV directly, @@ -8326,18 +8710,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (SqrtOp.getNode()) { // We found a FSQRT, so try to make this fold: // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) - if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) { - RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp); + if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { + RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } } // Fold into a reciprocal estimate and multiply instead of a real divide. - if (SDValue RV = BuildReciprocalEstimate(N1)) { + if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) { AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } @@ -8349,52 +8733,13 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (LHSNeg == 2 || RHSNeg == 2) return DAG.getNode(ISD::FDIV, SDLoc(N), VT, GetNegatedExpression(N0, DAG, LegalOperations), - GetNegatedExpression(N1, DAG, LegalOperations)); + GetNegatedExpression(N1, DAG, LegalOperations), + Flags); } } - // Combine multiple FDIVs with the same divisor into multiple FMULs by the - // reciprocal. - // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) - // Notice that this is not always beneficial. One reason is different target - // may have different costs for FDIV and FMUL, so sometimes the cost of two - // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason - // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". - if (Options.UnsafeFPMath) { - // Skip if current node is a reciprocal. - if (N0CFP && N0CFP->isExactlyValue(1.0)) - return SDValue(); - - // Find all FDIV users of the same divisor. - // Use a set because duplicates may be present in the user list. - SetVector Users; - for (auto *U : N1->uses()) - if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) - Users.insert(U); - - if (TLI.combineRepeatedFPDivisors(Users.size())) { - SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); - // FIXME: This optimization requires some level of fast-math, so the - // created reciprocal node should at least have the 'allowReciprocal' - // fast-math-flag set. - SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1); - - // Dividend / Divisor -> Dividend * Reciprocal - for (auto *U : Users) { - SDValue Dividend = U->getOperand(0); - if (Dividend != FPOne) { - SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, - Reciprocal); - CombineTo(U, NewNode); - } else if (U != Reciprocal.getNode()) { - // In the absence of fast-math-flags, this user node is always the - // same node as Reciprocal, but with FMF they may be different nodes. - CombineTo(U, Reciprocal); - } - } - return SDValue(N, 0); // N was replaced. - } - } + if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N)) + return CombineRepeatedDivisors; return SDValue(); } @@ -8408,7 +8753,8 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP) - return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, + &cast(N)->Flags); return SDValue(); } @@ -8417,20 +8763,25 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap()) return SDValue(); + // TODO: FSQRT nodes should have flags that propagate to the created nodes. + // For now, create a Flags object for use with all unsafe math transforms. + SDNodeFlags Flags; + Flags.setUnsafeAlgebra(true); + // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) - SDValue RV = BuildRsqrtEstimate(N->getOperand(0)); + SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags); if (!RV) return SDValue(); - + EVT VT = RV.getValueType(); SDLoc DL(N); - RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV); + RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags); AddToWorklist(RV.getNode()); // Unfortunately, RV is now NaN if the input was exactly 0. // Select out this case and force the answer to 0. SDValue Zero = DAG.getConstantFP(0.0, DL, VT); - EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + EVT CCVT = getSetCCResultType(VT); SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ); AddToWorklist(ZeroCmp.getNode()); AddToWorklist(RV.getNode()); @@ -8439,6 +8790,22 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { ZeroCmp, Zero, RV); } +static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { + // copysign(x, fp_extend(y)) -> copysign(x, y) + // copysign(x, fp_round(y)) -> copysign(x, y) + // Do not optimize out type conversion of f128 type yet. + // For some target like x86_64, configuration is changed + // to keep one f128 value in one SSE register, but + // instruction selection cannot handle FCOPYSIGN on + // SSE registers yet. + SDValue N1 = N->getOperand(1); + EVT N1VT = N1->getValueType(0); + EVT N1Op0VT = N1->getOperand(0)->getValueType(0); + return (N1.getOpcode() == ISD::FP_EXTEND || + N1.getOpcode() == ISD::FP_ROUND) && + (N1VT == N1Op0VT || N1Op0VT != MVT::f128); +} + SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8482,7 +8849,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) - if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) + if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); @@ -8837,11 +9204,12 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { APFloat CVal = CFP1->getValueAPF(); CVal.changeSign(); if (Level >= AfterLegalizeDAG && - (TLI.isFPImmLegal(CVal, N->getValueType(0)) || - TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0)))) - return DAG.getNode( - ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); + (TLI.isFPImmLegal(CVal, VT) || + TLI.isOperationLegal(ISD::ConstantFP, VT))) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N0.getOperand(1)), + &cast(N0)->Flags); } } @@ -8851,20 +9219,20 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue DAGCombiner::visitFMINNUM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - const ConstantFPSDNode *N0CFP = dyn_cast(N0); - const ConstantFPSDNode *N1CFP = dyn_cast(N1); + EVT VT = N->getValueType(0); + const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0)); + return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT); } - if (N0CFP) { - EVT VT = N->getValueType(0); - // Canonicalize to constant on RHS. + // Canonicalize to constant on RHS. + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); - } return SDValue(); } @@ -8872,20 +9240,20 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) { SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - const ConstantFPSDNode *N0CFP = dyn_cast(N0); - const ConstantFPSDNode *N1CFP = dyn_cast(N1); + EVT VT = N->getValueType(0); + const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0)); + return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT); } - if (N0CFP) { - EVT VT = N->getValueType(0); - // Canonicalize to constant on RHS. + // Canonicalize to constant on RHS. + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); - } return SDValue(); } @@ -9034,8 +9402,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { SDValue Op1 = TheXor->getOperand(1); if (Op0.getOpcode() == Op1.getOpcode()) { // Avoid missing important xor optimizations. - SDValue Tmp = visitXOR(TheXor); - if (Tmp.getNode()) { + if (SDValue Tmp = visitXOR(TheXor)) { if (Tmp.getNode() != TheXor) { DEBUG(dbgs() << "\nReplacing.8 "; TheXor->dump(&DAG); @@ -9722,8 +10089,8 @@ struct LoadedSlice { void addSliceGain(const LoadedSlice &LS) { // Each slice saves a truncate. const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); - if (!TLI.isTruncateFree(LS.Inst->getValueType(0), - LS.Inst->getOperand(0).getValueType())) + if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(), + LS.Inst->getValueType(0))) ++Truncates; // If there is a shift amount, this slice gets rid of it. if (LS.Shift) @@ -10625,30 +10992,109 @@ struct BaseIndexOffset { }; } // namespace +// This is a helper function for visitMUL to check the profitability +// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). +// MulNode is the original multiply, AddNode is (add x, c1), +// and ConstNode is c2. +// +// If the (add x, c1) has multiple uses, we could increase +// the number of adds if we make this transformation. +// It would only be worth doing this if we can remove a +// multiply in the process. Check for that here. +// To illustrate: +// (A + c1) * c3 +// (A + c2) * c3 +// We're checking for cases where we have common "c3 * A" expressions. +bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, + SDValue &AddNode, + SDValue &ConstNode) { + APInt Val; + + // If the add only has one use, this would be OK to do. + if (AddNode.getNode()->hasOneUse()) + return true; + + // Walk all the users of the constant with which we're multiplying. + for (SDNode *Use : ConstNode->uses()) { + + if (Use == MulNode) // This use is the one we're on right now. Skip it. + continue; + + if (Use->getOpcode() == ISD::MUL) { // We have another multiply use. + SDNode *OtherOp; + SDNode *MulVar = AddNode.getOperand(0).getNode(); + + // OtherOp is what we're multiplying against the constant. + if (Use->getOperand(0) == ConstNode) + OtherOp = Use->getOperand(1).getNode(); + else + OtherOp = Use->getOperand(0).getNode(); + + // Check to see if multiply is with the same operand of our "add". + // + // ConstNode = CONST + // Use = ConstNode * A <-- visiting Use. OtherOp is A. + // ... + // AddNode = (A + c1) <-- MulVar is A. + // = AddNode * ConstNode <-- current visiting instruction. + // + // If we make this transformation, we will have a common + // multiply (ConstNode * A) that we can save. + if (OtherOp == MulVar) + return true; + + // Now check to see if a future expansion will give us a common + // multiply. + // + // ConstNode = CONST + // AddNode = (A + c1) + // ... = AddNode * ConstNode <-- current visiting instruction. + // ... + // OtherOp = (A + c2) + // Use = OtherOp * ConstNode <-- visiting Use. + // + // If we make this transformation, we will have a common + // multiply (CONST * A) after we also do the same transformation + // to the "t2" instruction. + if (OtherOp->getOpcode() == ISD::ADD && + isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && + OtherOp->getOperand(0).getNode() == MulVar) + return true; + } + } + + // Didn't find a case where this would be profitable. + return false; +} + SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, SDLoc SL, ArrayRef Stores, + SmallVectorImpl &Chains, EVT Ty) const { SmallVector BuildVector; - for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) - BuildVector.push_back(cast(Stores[I].MemNode)->getValue()); + for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { + StoreSDNode *St = cast(Stores[I].MemNode); + Chains.push_back(St->getChain()); + BuildVector.push_back(St->getValue()); + } return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector); } bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SmallVectorImpl &StoreNodes, EVT MemVT, - unsigned NumElem, bool IsConstantSrc, bool UseVector) { + unsigned NumStores, bool IsConstantSrc, bool UseVector) { // Make sure we have something to merge. - if (NumElem < 2) + if (NumStores < 2) return false; int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned LatestNodeUsed = 0; - for (unsigned i=0; i < NumElem; ++i) { + for (unsigned i=0; i < NumStores; ++i) { // Find a chain for the new wide-store operand. Notice that some // of the store nodes that we found may not be selected for inclusion // in the wide store. The chain we use needs to be the chain of the @@ -10657,45 +11103,57 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( LatestNodeUsed = i; } + SmallVector Chains; + // The latest Node in the DAG. LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; SDLoc DL(StoreNodes[0].MemNode); SDValue StoredVal; if (UseVector) { - // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + bool IsVec = MemVT.isVector(); + unsigned Elts = NumStores; + if (IsVec) { + // When merging vector stores, get the total number of elements. + Elts *= MemVT.getVectorNumElements(); + } + // Get the type for the merged vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); + if (IsConstantSrc) { - StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty); + StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty); } else { SmallVector Ops; - for (unsigned i = 0; i < NumElem ; ++i) { + for (unsigned i = 0; i < NumStores; ++i) { StoreSDNode *St = cast(StoreNodes[i].MemNode); SDValue Val = St->getValue(); - // All of the operands of a BUILD_VECTOR must have the same type. + // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type. if (Val.getValueType() != MemVT) return false; Ops.push_back(Val); + Chains.push_back(St->getChain()); } // Build the extracted vector elements back into a vector. - StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops); - } + StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, + DL, Ty, Ops); } } else { // We should always use a vector store when merging extracted vector // elements, so this path implies a store of constants. assert(IsConstantSrc && "Merged vector elements should use vector store"); - unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + unsigned SizeInBits = NumStores * ElementSizeBytes * 8; APInt StoreInt(SizeInBits, 0); // Construct a single integer constant which is made of the smaller // constant inputs. bool IsLE = DAG.getDataLayout().isLittleEndian(); - for (unsigned i = 0; i < NumElem ; ++i) { - unsigned Idx = IsLE ? (NumElem - 1 - i) : i; + for (unsigned i = 0; i < NumStores; ++i) { + unsigned Idx = IsLE ? (NumStores - 1 - i) : i; StoreSDNode *St = cast(StoreNodes[Idx].MemNode); + Chains.push_back(St->getChain()); + SDValue Val = St->getValue(); StoreInt <<= ElementSizeBytes * 8; if (ConstantSDNode *C = dyn_cast(Val)) { @@ -10712,7 +11170,10 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); } - SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal, + assert(!Chains.empty()); + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, @@ -10721,7 +11182,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // Replace the last store with the new store CombineTo(LatestOp, NewStore); // Erase all other stores. - for (unsigned i = 0; i < NumElem ; ++i) { + for (unsigned i = 0; i < NumStores; ++i) { if (StoreNodes[i].MemNode == LatestOp) continue; StoreSDNode *St = cast(StoreNodes[i].MemNode); @@ -10743,17 +11204,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( return true; } -static bool allowableAlignment(const SelectionDAG &DAG, - const TargetLowering &TLI, EVT EVTTy, - unsigned AS, unsigned Align) { - if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align)) - return true; - - Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = DAG.getDataLayout().getPrefTypeAlignment(Ty); - return (Align >= ABIAlignment); -} - void DAGCombiner::getStoreMergeAndAliasCandidates( StoreSDNode* St, SmallVectorImpl &StoreNodes, SmallVectorImpl &AliasLoadNodes) { @@ -10775,6 +11225,38 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( EVT MemVT = St->getMemoryVT(); unsigned Seq = 0; StoreSDNode *Index = St; + + + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + + if (UseAA) { + // Look at other users of the same chain. Stores on the same chain do not + // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized + // to be on the same chain, so don't bother looking at adjacent chains. + + SDValue Chain = St->getChain(); + for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) { + if (StoreSDNode *OtherST = dyn_cast(*I)) { + if (I.getOperandNo() != 0) + continue; + + if (OtherST->isVolatile() || OtherST->isIndexed()) + continue; + + if (OtherST->getMemoryVT() != MemVT) + continue; + + BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr()); + + if (Ptr.equalBaseIndex(BasePtr)) + StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++)); + } + } + + return; + } + while (Index) { // If the chain has more than one use, then we can't reorder the mem ops. if (Index != St && !SDValue(Index, 0)->hasOneUse()) @@ -10800,6 +11282,13 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( if (Index->getMemoryVT() != MemVT) break; + // We do not allow under-aligned stores in order to prevent + // overriding stores. NOTE: this is a bad hack. Alignment SHOULD + // be irrelevant here; what MATTERS is that we not move memory + // operations that potentially overlap past each-other. + if (Index->getAlignment() < MemVT.getStoreSize()) + break; + // We found a potential memory operand to merge. StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); @@ -10844,8 +11333,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) return false; - // Don't merge vectors into wider inputs. - if (MemVT.isVector() || !MemVT.isSimple()) + if (!MemVT.isSimple()) return false; // Perform an early exit check. Do not bother looking at stored values that @@ -10854,9 +11342,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { bool IsLoadSrc = isa(StoredVal); bool IsConstantSrc = isa(StoredVal) || isa(StoredVal); - bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); + bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR); - if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc) + return false; + + // Don't merge vectors into wider vectors if the source data comes from loads. + // TODO: This restriction can be lifted by using logic similar to the + // ExtractVecSrc case. + if (MemVT.isVector() && IsLoadSrc) return false; // Only look at ends of store sequences. @@ -10868,22 +11363,28 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // We need to make sure that these nodes do not interfere with // any of the store nodes. SmallVector AliasLoadNodes; - + // Save the StoreSDNodes that we find in the chain. SmallVector StoreNodes; getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes); - + // Check if there is anything to merge. if (StoreNodes.size() < 2) return false; - // Sort the memory operands according to their distance from the base pointer. + // Sort the memory operands according to their distance from the + // base pointer. As a secondary criteria: make sure stores coming + // later in the code come first in the list. This is important for + // the non-UseAA case, because we're merging stores into the FINAL + // store along a chain which potentially contains aliasing stores. + // Thus, if there are multiple stores to the same address, the last + // one can be considered for merging but not the others. std::sort(StoreNodes.begin(), StoreNodes.end(), [](MemOpLink LHS, MemOpLink RHS) { return LHS.OffsetFromBase < RHS.OffsetFromBase || (LHS.OffsetFromBase == RHS.OffsetFromBase && - LHS.SequenceNum > RHS.SequenceNum); + LHS.SequenceNum < RHS.SequenceNum); }); // Scan the memory operations on the chain and find the first non-consecutive @@ -10900,15 +11401,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { break; } - bool Alias = false; // Check if this store interferes with any of the loads that we found. - for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld) - if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) { - Alias = true; - break; - } - // We found a load that alias with this store. Stop the sequence. - if (Alias) + // If we find a load that alias with this store. Stop the sequence. + if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(), + [&](LSBaseSDNode* Ldn) { + return isAlias(Ldn, StoreNodes[i].MemNode); + })) break; // Mark this node as useful. @@ -10919,6 +11417,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); unsigned FirstStoreAlign = FirstInChain->getAlignment(); + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); // Store the constants into memory as one consecutive store. if (IsConstantSrc) { @@ -10940,43 +11440,40 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Find a legal type for the constant store. unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; - EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); + EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); + bool IsFast; if (TLI.isTypeLegal(StoreTy) && - allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, - FirstStoreAlign)) { + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) { LastLegalType = i+1; // Or check whether a truncstore is legal. - } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + } else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = - TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); + TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS, - FirstStoreAlign)) { + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstStoreAS, FirstStoreAlign, &IsFast) && + IsFast) { LastLegalType = i + 1; } } - // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); - if (TLI.isTypeLegal(Ty) && - allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) { - LastLegalVectorType = i + 1; + // We only use vectors if the constant is known to be zero or the target + // allows it and the function is not marked with the noimplicitfloat + // attribute. + if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1, + FirstStoreAS)) && + !NoVectors) { + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(Context, MemVT, i+1); + if (TLI.isTypeLegal(Ty) && + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) + LastLegalVectorType = i + 1; } } - - // We only use vectors if the constant is known to be zero or the target - // allows it and the function is not marked with the noimplicitfloat - // attribute. - if (NoVectors) { - LastLegalVectorType = 0; - } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT, - LastLegalVectorType, - FirstStoreAS)) { - LastLegalVectorType = 0; - } - // Check if we found a legal integer type to store. if (LastLegalType == 0 && LastLegalVectorType == 0) return false; @@ -10990,27 +11487,36 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // When extracting multiple vector elements, try to store them // in one vector store rather than a sequence of scalar stores. - if (IsExtractVecEltSrc) { - unsigned NumElem = 0; + if (IsExtractVecSrc) { + unsigned NumStoresToMerge = 0; + bool IsVec = MemVT.isVector(); for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) { StoreSDNode *St = cast(StoreNodes[i].MemNode); - SDValue StoredVal = St->getValue(); + unsigned StoreValOpcode = St->getValue().getOpcode(); // This restriction could be loosened. // Bail out if any stored values are not elements extracted from a vector. // It should be possible to handle mixed sources, but load sources need // more careful handling (see the block of code below that handles // consecutive loads). - if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT && + StoreValOpcode != ISD::EXTRACT_SUBVECTOR) return false; // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + unsigned Elts = i + 1; + if (IsVec) { + // When merging vector stores, get the total number of elements. + Elts *= MemVT.getVectorNumElements(); + } + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); + bool IsFast; if (TLI.isTypeLegal(Ty) && - allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) - NumElem = i + 1; + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) + NumStoresToMerge = i + 1; } - return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, + return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge, false, true); } @@ -11084,7 +11590,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { StartAddress = LoadNodes[0].OffsetFromBase; SDValue FirstChain = FirstLoad->getChain(); for (unsigned i = 1; i < LoadNodes.size(); ++i) { - // All loads much share the same chain. + // All loads must share the same chain. if (LoadNodes[i].MemNode->getChain() != FirstChain) break; @@ -11092,35 +11598,41 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (CurrAddress - StartAddress != (ElementSizeBytes * i)) break; LastConsecutiveLoad = i; - // Find a legal type for the vector store. - EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1); + bool IsFastSt, IsFastLd; if (TLI.isTypeLegal(StoreTy) && - allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && - allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) { + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && IsFastLd) { LastLegalVectorType = i + 1; } // Find a legal type for the integer store. unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; - StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); + StoreTy = EVT::getIntegerVT(Context, SizeInBits); if (TLI.isTypeLegal(StoreTy) && - allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && - allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && IsFastLd) LastLegalIntegerType = i + 1; // Or check whether a truncstore and extload is legal. - else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = - TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy); + TLI.getTypeToTransformTo(Context, StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && - allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS, - FirstStoreAlign) && - allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS, - FirstLoadAlign)) + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstStoreAS, FirstStoreAlign, &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstLoadAS, FirstLoadAlign, &IsFastLd) && + IsFastLd) LastLegalIntegerType = i+1; } } @@ -11138,6 +11650,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (NumElem < 2) return false; + // Collect the chains from all merged stores. + SmallVector MergeStoreChains; + MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain()); + // The latest Node in the DAG. unsigned LatestNodeUsed = 0; for (unsigned i=1; igetChain()); } LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; @@ -11155,34 +11673,33 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // to memory. EVT JointMemOpVT; if (UseVectorTy) { - JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem); } else { unsigned SizeInBits = NumElem * ElementSizeBytes * 8; - JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); + JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); } SDLoc LoadDL(LoadNodes[0].MemNode); SDLoc StoreDL(StoreNodes[0].MemNode); + // The merged loads are required to have the same incoming chain, so + // using the first's chain is acceptable. SDValue NewLoad = DAG.getLoad( JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign); + SDValue NewStoreChain = + DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains); + SDValue NewStore = DAG.getStore( - LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(), + NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, FirstStoreAlign); - // Replace one of the loads with the new load. - LoadSDNode *Ld = cast(LoadNodes[0].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), - SDValue(NewLoad.getNode(), 1)); - - // Remove the rest of the load chains. - for (unsigned i = 1; i < NumElem ; ++i) { - // Replace all chain users of the old load nodes with the chain of the new - // load node. + // Transfer chain users from old loads to the new load. + for (unsigned i = 0; i < NumElem; ++i) { LoadSDNode *Ld = cast(LoadNodes[i].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); } // Replace the last store with the new store. @@ -11200,6 +11717,114 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { return true; } +SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { + SDLoc SL(ST); + SDValue ReplStore; + + // Replace the chain to avoid dependency. + if (ST->isTruncatingStore()) { + ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(), + ST->getBasePtr(), ST->getMemoryVT(), + ST->getMemOperand()); + } else { + ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(), + ST->getMemOperand()); + } + + // Create token to keep both nodes around. + SDValue Token = DAG.getNode(ISD::TokenFactor, SL, + MVT::Other, ST->getChain(), ReplStore); + + // Make sure the new and old chains are cleaned up. + AddToWorklist(Token.getNode()); + + // Don't add users to work list. + return CombineTo(ST, Token, false); +} + +SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { + SDValue Value = ST->getValue(); + if (Value.getOpcode() == ISD::TargetConstantFP) + return SDValue(); + + SDLoc DL(ST); + + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + + const ConstantFPSDNode *CFP = cast(Value); + + // NOTE: If the original store is volatile, this transform must not increase + // the number of stores. For example, on x86-32 an f64 can be stored in one + // processor operation but an i64 (which is not legal) requires two. So the + // transform should not be done in this case. + + SDValue Tmp; + switch (CFP->getSimpleValueType(0).SimpleTy) { + default: + llvm_unreachable("Unknown FP type"); + case MVT::f16: // We don't do this for these yet. + case MVT::f80: + case MVT::f128: + case MVT::ppcf128: + return SDValue(); + case MVT::f32: + if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + ; + Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). + bitcastToAPInt().getZExtValue(), SDLoc(CFP), + MVT::i32); + return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand()); + } + + return SDValue(); + case MVT::f64: + if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && + !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { + ; + Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + getZExtValue(), SDLoc(CFP), MVT::i64); + return DAG.getStore(Chain, DL, Tmp, + Ptr, ST->getMemOperand()); + } + + if (!ST->isVolatile() && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + // Many FP stores are not made apparent until after legalize, e.g. for + // argument passing. Since this is so common, custom legalize the + // 64-bit integer store into two 32-bit stores. + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); + SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + AAMDNodes AAInfo = ST->getAAInfo(); + + SDValue St0 = DAG.getStore(Chain, DL, Lo, + Ptr, ST->getPointerInfo(), + isVolatile, isNonTemporal, + ST->getAlignment(), AAInfo); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4, DL, Ptr.getValueType())); + Alignment = MinAlign(Alignment, 4U); + SDValue St1 = DAG.getStore(Chain, DL, Hi, + Ptr, ST->getPointerInfo().getWithOffset(4), + isVolatile, isNonTemporal, + Alignment, AAInfo); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + St0, St1); + } + + return SDValue(); + } +} + SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast(N); SDValue Chain = ST->getChain(); @@ -11227,81 +11852,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) return Chain; - // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' - if (ConstantFPSDNode *CFP = dyn_cast(Value)) { - // NOTE: If the original store is volatile, this transform must not increase - // the number of stores. For example, on x86-32 an f64 can be stored in one - // processor operation but an i64 (which is not legal) requires two. So the - // transform should not be done in this case. - if (Value.getOpcode() != ISD::TargetConstantFP) { - SDValue Tmp; - switch (CFP->getSimpleValueType(0).SimpleTy) { - default: llvm_unreachable("Unknown FP type"); - case MVT::f16: // We don't do this for these yet. - case MVT::f80: - case MVT::f128: - case MVT::ppcf128: - break; - case MVT::f32: - if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { - ; - Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). - bitcastToAPInt().getZExtValue(), SDLoc(CFP), - MVT::i32); - return DAG.getStore(Chain, SDLoc(N), Tmp, - Ptr, ST->getMemOperand()); - } - break; - case MVT::f64: - if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && - !ST->isVolatile()) || - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { - ; - Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). - getZExtValue(), SDLoc(CFP), MVT::i64); - return DAG.getStore(Chain, SDLoc(N), Tmp, - Ptr, ST->getMemOperand()); - } - - if (!ST->isVolatile() && - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { - // Many FP stores are not made apparent until after legalize, e.g. for - // argument passing. Since this is so common, custom legalize the - // 64-bit integer store into two 32-bit stores. - uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); - SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); - if (DAG.getDataLayout().isBigEndian()) - std::swap(Lo, Hi); - - unsigned Alignment = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); - AAMDNodes AAInfo = ST->getAAInfo(); - - SDLoc DL(N); - - SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, - Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, - ST->getAlignment(), AAInfo); - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(4, DL, Ptr.getValueType())); - Alignment = MinAlign(Alignment, 4U); - SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, - Ptr, ST->getPointerInfo().getWithOffset(4), - isVolatile, isNonTemporal, - Alignment, AAInfo); - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - St0, St1); - } - - break; - } - } - } - // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { @@ -11319,8 +11869,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try transforming a pair floating point load / store ops to integer // load / store ops. - SDValue NewST = TransformFPLoadStorePair(N); - if (NewST.getNode()) + if (SDValue NewST = TransformFPLoadStorePair(N)) return NewST; bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA @@ -11331,31 +11880,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { UseAA = false; #endif if (UseAA && ST->isUnindexed()) { - // Walk up chain skipping non-aliasing memory nodes. - SDValue BetterChain = FindBetterChain(N, Chain); + // FIXME: We should do this even without AA enabled. AA will just allow + // FindBetterChain to work in more situations. The problem with this is that + // any combine that expects memory operations to be on consecutive chains + // first needs to be updated to look for users of the same chain. - // If there is a better chain. - if (Chain != BetterChain) { - SDValue ReplStore; - - // Replace the chain to avoid dependency. - if (ST->isTruncatingStore()) { - ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr, - ST->getMemoryVT(), ST->getMemOperand()); - } else { - ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr, - ST->getMemOperand()); - } - - // Create token to keep both nodes around. - SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), - MVT::Other, Chain, ReplStore); - - // Make sure the new and old chains are cleaned up. - AddToWorklist(Token.getNode()); - - // Don't add users to work list. - return CombineTo(N, Token, false); + // Walk up chain skipping non-aliasing memory nodes, on this store and any + // adjacent stores. + if (findBetterNeighborChains(ST)) { + // replaceStoreChain uses CombineTo, which handled all of the worklist + // manipulation. Return the original node to not do anything else. + return SDValue(ST, 0); } } @@ -11440,6 +11975,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return SDValue(N, 0); } + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + // + // Make sure to do this only after attempting to merge stores in order to + // avoid changing the types of some subset of stores due to visit order, + // preventing their merging. + if (isa(Value)) { + if (SDValue NewSt = replaceStoreOfFPConstant(ST)) + return NewSt; + } + return ReduceLoadOpStoreWidth(N); } @@ -11613,7 +12158,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } SDValue EltNo = N->getOperand(1); - bool ConstEltNo = isa(EltNo); + ConstantSDNode *ConstEltNo = dyn_cast(EltNo); + + // extract_vector_elt (build_vector x, y), 1 -> y + if (ConstEltNo && + InVec.getOpcode() == ISD::BUILD_VECTOR && + TLI.isTypeLegal(VT) && + (InVec.hasOneUse() || + TLI.aggressivelyPreferBuildVectorSources(VT))) { + SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue()); + EVT InEltVT = Elt.getValueType(); + + // Sometimes build_vector's scalar input types do not match result type. + if (NVT == InEltVT) + return Elt; + + // TODO: It may be useful to truncate if free if the build_vector implicitly + // converts. + } // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. // We only perform this optimization before the op legalization phase because @@ -11621,13 +12183,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // patterns. For example on AVX, extracting elements from a wide vector // without using extract_subvector. However, if we can find an underlying // scalar value, then we can always use that. - if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE - && ConstEltNo) { - int Elt = cast(EltNo)->getZExtValue(); + if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) { int NumElem = VT.getVectorNumElements(); ShuffleVectorSDNode *SVOp = cast(InVec); // Find the new index to extract from. - int OrigElt = SVOp->getMaskElt(Elt); + int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue()); // Extracting an undef index is undef. if (OrigElt == -1) @@ -12183,12 +12743,90 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops)); } -SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { - // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of - // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector - // inputs come from at most two distinct vectors, turn this into a shuffle - // node. +// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR +// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at +// most two distinct vectors the same size as the result, attempt to turn this +// into a legal shuffle. +static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + EVT OpVT = N->getOperand(0).getValueType(); + int NumElts = VT.getVectorNumElements(); + int NumOpElts = OpVT.getVectorNumElements(); + SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT); + SmallVector Mask; + + for (SDValue Op : N->ops()) { + // Peek through any bitcast. + while (Op.getOpcode() == ISD::BITCAST) + Op = Op.getOperand(0); + + // UNDEF nodes convert to UNDEF shuffle mask values. + if (Op.getOpcode() == ISD::UNDEF) { + Mask.append((unsigned)NumOpElts, -1); + continue; + } + + if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + + // What vector are we extracting the subvector from and at what index? + SDValue ExtVec = Op.getOperand(0); + + // We want the EVT of the original extraction to correctly scale the + // extraction index. + EVT ExtVT = ExtVec.getValueType(); + + // Peek through any bitcast. + while (ExtVec.getOpcode() == ISD::BITCAST) + ExtVec = ExtVec.getOperand(0); + + // UNDEF nodes convert to UNDEF shuffle mask values. + if (ExtVec.getOpcode() == ISD::UNDEF) { + Mask.append((unsigned)NumOpElts, -1); + continue; + } + + if (!isa(Op.getOperand(1))) + return SDValue(); + int ExtIdx = cast(Op.getOperand(1))->getZExtValue(); + + // Ensure that we are extracting a subvector from a vector the same + // size as the result. + if (ExtVT.getSizeInBits() != VT.getSizeInBits()) + return SDValue(); + + // Scale the subvector index to account for any bitcast. + int NumExtElts = ExtVT.getVectorNumElements(); + if (0 == (NumExtElts % NumElts)) + ExtIdx /= (NumExtElts / NumElts); + else if (0 == (NumElts % NumExtElts)) + ExtIdx *= (NumElts / NumExtElts); + else + return SDValue(); + + // At most we can reference 2 inputs in the final shuffle. + if (SV0.getOpcode() == ISD::UNDEF || SV0 == ExtVec) { + SV0 = ExtVec; + for (int i = 0; i != NumOpElts; ++i) + Mask.push_back(i + ExtIdx); + } else if (SV1.getOpcode() == ISD::UNDEF || SV1 == ExtVec) { + SV1 = ExtVec; + for (int i = 0; i != NumOpElts; ++i) + Mask.push_back(i + ExtIdx + NumElts); + } else { + return SDValue(); + } + } + + if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)) + return SDValue(); + + return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), + DAG.getBitcast(VT, SV1), Mask); +} + +SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // If we only have one input vector, we don't need to do any concatenation. if (N->getNumOperands() == 1) return N->getOperand(0); @@ -12289,6 +12927,11 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (SDValue V = combineConcatVectorOfScalars(N, DAG)) return V; + // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) + if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) + return V; + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that @@ -12503,7 +13146,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { std::all_of(SVN->getMask().begin() + NumElemsPerConcat, SVN->getMask().end(), [](int i) { return i == -1; })) { N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), - ArrayRef(SVN->getMask().begin(), NumElemsPerConcat)); + makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat)); N1 = DAG.getUNDEF(ConcatVT); return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); } @@ -12981,6 +13624,21 @@ SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) + if (N0->getOpcode() == ISD::AND) { + ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); + if (AndConst && AndConst->getAPIntValue() == 0xffff) { + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), + N0.getOperand(0)); + } + } + + return SDValue(); +} + /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle /// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> @@ -13002,34 +13660,76 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { if (RHS.getOpcode() == ISD::BITCAST) RHS = RHS.getOperand(0); - if (RHS.getOpcode() == ISD::BUILD_VECTOR) { - SmallVector Indices; - unsigned NumElts = RHS.getNumOperands(); + if (RHS.getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); - for (unsigned i = 0; i != NumElts; ++i) { - SDValue Elt = RHS.getOperand(i); - if (isAllOnesConstant(Elt)) + EVT RVT = RHS.getValueType(); + unsigned NumElts = RHS.getNumOperands(); + + // Attempt to create a valid clear mask, splitting the mask into + // sub elements and checking to see if each is + // all zeros or all ones - suitable for shuffle masking. + auto BuildClearMask = [&](int Split) { + int NumSubElts = NumElts * Split; + int NumSubBits = RVT.getScalarSizeInBits() / Split; + + SmallVector Indices; + for (int i = 0; i != NumSubElts; ++i) { + int EltIdx = i / Split; + int SubIdx = i % Split; + SDValue Elt = RHS.getOperand(EltIdx); + if (Elt.getOpcode() == ISD::UNDEF) { + Indices.push_back(-1); + continue; + } + + APInt Bits; + if (isa(Elt)) + Bits = cast(Elt)->getAPIntValue(); + else if (isa(Elt)) + Bits = cast(Elt)->getValueAPF().bitcastToAPInt(); + else + return SDValue(); + + // Extract the sub element from the constant bit mask. + if (DAG.getDataLayout().isBigEndian()) { + Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits); + } else { + Bits = Bits.lshr(SubIdx * NumSubBits); + } + + if (Split > 1) + Bits = Bits.trunc(NumSubBits); + + if (Bits.isAllOnesValue()) Indices.push_back(i); - else if (isNullConstant(Elt)) - Indices.push_back(NumElts+i); + else if (Bits == 0) + Indices.push_back(i + NumSubElts); else return SDValue(); } // Let's see if the target supports this vector_shuffle. - EVT RVT = RHS.getValueType(); - if (!TLI.isVectorClearMaskLegal(Indices, RVT)) + EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits); + EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts); + if (!TLI.isVectorClearMaskLegal(Indices, ClearVT)) return SDValue(); - // Return the new VECTOR_SHUFFLE node. - EVT EltVT = RVT.getVectorElementType(); - SmallVector ZeroOps(RVT.getVectorNumElements(), - DAG.getConstant(0, dl, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps); - LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); - SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); - return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); - } + SDValue Zero = DAG.getConstant(0, dl, ClearVT); + return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl, + DAG.getBitcast(ClearVT, LHS), + Zero, &Indices[0])); + }; + + // Determine maximum split level (byte level masking). + int MaxSplit = 1; + if (RVT.getScalarSizeInBits() % 8 == 0) + MaxSplit = RVT.getScalarSizeInBits() / 8; + + for (int Split = 1; Split <= MaxSplit; ++Split) + if (RVT.getScalarSizeInBits() % Split == 0) + if (SDValue S = BuildClearMask(Split)) + return S; return SDValue(); } @@ -13041,60 +13741,17 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); + SDValue Ops[] = {LHS, RHS}; + // See if we can constant fold the vector operation. + if (SDValue Fold = DAG.FoldConstantVectorArithmetic( + N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) + return Fold; + + // Try to convert a constant mask AND into a shuffle clear mask. if (SDValue Shuffle = XformToShuffleWithZero(N)) return Shuffle; - // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold - // this operation. - if (LHS.getOpcode() == ISD::BUILD_VECTOR && - RHS.getOpcode() == ISD::BUILD_VECTOR) { - // Check if both vectors are constants. If not bail out. - if (!(cast(LHS)->isConstant() && - cast(RHS)->isConstant())) - return SDValue(); - - SmallVector Ops; - for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { - SDValue LHSOp = LHS.getOperand(i); - SDValue RHSOp = RHS.getOperand(i); - - // Can't fold divide by zero. - if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || - N->getOpcode() == ISD::FDIV) { - if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP && - cast(RHSOp.getNode())->isZero())) - break; - } - - EVT VT = LHSOp.getValueType(); - EVT RVT = RHSOp.getValueType(); - if (RVT != VT) { - // Integer BUILD_VECTOR operands may have types larger than the element - // size (e.g., when the element type is not legal). Prior to type - // legalization, the types may not match between the two BUILD_VECTORS. - // Truncate one of the operands to make them match. - if (RVT.getSizeInBits() > VT.getSizeInBits()) { - RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp); - } else { - LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp); - VT = RVT; - } - } - SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT, - LHSOp, RHSOp); - if (FoldOp.getOpcode() != ISD::UNDEF && - FoldOp.getOpcode() != ISD::Constant && - FoldOp.getOpcode() != ISD::ConstantFP) - break; - Ops.push_back(FoldOp); - AddToWorklist(FoldOp.getNode()); - } - - if (Ops.size() == LHS.getNumOperands()) - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops); - } - // Type legalization might introduce new shuffles in the DAG. // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) // -> (shuffle (VBinOp (A, B)), Undef, Mask). @@ -13109,7 +13766,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { EVT VT = N->getValueType(0); SDValue UndefVector = LHS.getOperand(1); SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, - LHS.getOperand(0), RHS.getOperand(0)); + LHS.getOperand(0), RHS.getOperand(0), + N->getFlags()); AddUsersToWorklist(N); return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, &SVN0->getMask()[0]); @@ -13390,9 +14048,10 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); AddToWorklist(CPIdx.getNode()); - return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), false, - false, false, Alignment); + return DAG.getLoad( + TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); } } @@ -13481,8 +14140,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // Get a SetCC of the condition // NOTE: Don't create a SETCC if it's not legal on this target. if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, - LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) { + TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) { SDValue Temp, SCC; // cast from setcc result type to select result type if (LegalTypes) { @@ -13514,51 +14172,6 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, } } - // Check to see if this is the equivalent of setcc - // FIXME: Turn all of these into setcc if setcc if setcc is legal - // otherwise, go ahead with the folds. - if (0 && isNullConstant(N3) && isOneConstant(N2)) { - EVT XType = N0.getValueType(); - if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) { - SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC); - if (Res.getValueType() != VT) - Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); - return Res; - } - - // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) - if (isNullConstant(N1) && CC == ISD::SETEQ && - (!LegalOperations || - TLI.isOperationLegal(ISD::CTLZ, XType))) { - SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0); - return DAG.getNode(ISD::SRL, DL, XType, Ctlz, - DAG.getConstant(Log2_32(XType.getSizeInBits()), - SDLoc(Ctlz), - getShiftAmountTy(Ctlz.getValueType()))); - } - // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) - if (isNullConstant(N1) && CC == ISD::SETGT) { - SDLoc DL(N0); - SDValue NegN0 = DAG.getNode(ISD::SUB, DL, - XType, DAG.getConstant(0, DL, XType), N0); - SDValue NotN0 = DAG.getNOT(DL, N0, XType); - return DAG.getNode(ISD::SRL, DL, XType, - DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), - DAG.getConstant(XType.getSizeInBits() - 1, DL, - getShiftAmountTy(XType))); - } - // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) - if (isAllOnesConstant(N1) && CC == ISD::SETGT) { - SDLoc DL(N0); - SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0, - DAG.getConstant(XType.getSizeInBits() - 1, DL, - getShiftAmountTy(N0.getValueType()))); - return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL, - XType)); - } - } - // Check to see if this is an integer abs. // select_cc setg[te] X, 0, X, -X -> // select_cc setgt X, -1, X, -X -> @@ -13666,7 +14279,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return S; } -SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { +SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -13690,16 +14303,16 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { // Newton iterations: Est = Est + Est (1 - Arg * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est); + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); + NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst); + Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); AddToWorklist(Est.getNode()); } } @@ -13716,31 +14329,32 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, - unsigned Iterations) { + unsigned Iterations, + SDNodeFlags *Flags) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. - SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); + SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); AddToWorklist(HalfArg.getNode()); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); + HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); AddToWorklist(HalfArg.getNode()); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); + NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(Est.getNode()); } return Est; @@ -13752,7 +14366,8 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, - unsigned Iterations) { + unsigned Iterations, + SDNodeFlags *Flags) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); @@ -13760,25 +14375,25 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { - SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); + SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); AddToWorklist(HalfEst.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); AddToWorklist(Est.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); AddToWorklist(Est.getNode()); - Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree); + Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags); AddToWorklist(Est.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst); + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags); AddToWorklist(Est.getNode()); } return Est; } -SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) { +SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -13790,8 +14405,8 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) { AddToWorklist(Est.getNode()); if (Iterations) { Est = UseOneConstNR ? - BuildRsqrtNROneConst(Op, Est, Iterations) : - BuildRsqrtNRTwoConst(Op, Est, Iterations); + BuildRsqrtNROneConst(Op, Est, Iterations, Flags) : + BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags); } return Est; } @@ -13955,14 +14570,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SDValue Chain = Chains.pop_back_val(); // For TokenFactor nodes, look at each operand and only continue up the - // chain until we find two aliases. If we've seen two aliases, assume we'll - // find more and revert to original chain since the xform is unlikely to be - // profitable. + // chain until we reach the depth limit. // // FIXME: The depth check could be made to return the last non-aliasing // chain we found before we hit a tokenfactor rather than the original // chain. - if (Depth > 6 || Aliases.size() == 2) { + if (Depth > TLI.getGatherAllAliasesMaxDepth()) { Aliases.clear(); Aliases.push_back(OriginalChain); return; @@ -14094,6 +14707,83 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } +bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { + // This holds the base pointer, index, and the offset in bytes from the base + // pointer. + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + + // We must have a base and an offset. + if (!BasePtr.Base.getNode()) + return false; + + // Do not handle stores to undef base pointers. + if (BasePtr.Base.getOpcode() == ISD::UNDEF) + return false; + + SmallVector ChainedStores; + ChainedStores.push_back(St); + + // Walk up the chain and look for nodes with offsets from the same + // base pointer. Stop when reaching an instruction with a different kind + // or instruction which has a different base pointer. + StoreSDNode *Index = St; + while (Index) { + // If the chain has more than one use, then we can't reorder the mem ops. + if (Index != St && !SDValue(Index, 0)->hasOneUse()) + break; + + if (Index->isVolatile() || Index->isIndexed()) + break; + + // Find the base pointer and offset for this memory node. + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); + + // Check that the base pointer is the same as the original one. + if (!Ptr.equalBaseIndex(BasePtr)) + break; + + // Find the next memory operand in the chain. If the next operand in the + // chain is a store then move up and continue the scan with the next + // memory operand. If the next operand is a load save it and use alias + // information to check if it interferes with anything. + SDNode *NextInChain = Index->getChain().getNode(); + while (true) { + if (StoreSDNode *STn = dyn_cast(NextInChain)) { + // We found a store node. Use it for the next iteration. + ChainedStores.push_back(STn); + Index = STn; + break; + } else if (LoadSDNode *Ldn = dyn_cast(NextInChain)) { + NextInChain = Ldn->getChain().getNode(); + continue; + } else { + Index = nullptr; + break; + } + } + } + + bool MadeChange = false; + SmallVector, 8> BetterChains; + + for (StoreSDNode *ChainedStore : ChainedStores) { + SDValue Chain = ChainedStore->getChain(); + SDValue BetterChain = FindBetterChain(ChainedStore, Chain); + + if (Chain != BetterChain) { + MadeChange = true; + BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); + } + } + + // Do all replacements after finding the replacements to make to avoid making + // the chains more complicated by introducing new TokenFactors. + for (auto Replacement : BetterChains) + replaceStoreChain(Replacement.first, Replacement.second); + + return MadeChange; +} + /// This is the entry point for the file. void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, CodeGenOpt::Level OptLevel) { diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 2b9ba2c1b534..cfbb20947acc 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -118,9 +118,9 @@ bool FastISel::lowerArguments() { for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), E = FuncInfo.Fn->arg_end(); I != E; ++I) { - DenseMap::iterator VI = LocalValueMap.find(I); + DenseMap::iterator VI = LocalValueMap.find(&*I); assert(VI != LocalValueMap.end() && "Missed an argument?"); - FuncInfo.ValueMap[I] = VI->second; + FuncInfo.ValueMap[&*I] = VI->second; } return true; } @@ -611,7 +611,7 @@ bool FastISel::selectStackmap(const CallInst *I) { // have to worry about calling conventions and target-specific lowering code. // Instead we perform the call lowering right here. // - // CALLSEQ_START(0) + // CALLSEQ_START(0...) // STACKMAP(id, nbytes, ...) // CALLSEQ_END(0, 0) // @@ -647,8 +647,11 @@ bool FastISel::selectStackmap(const CallInst *I) { // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) - .addImm(0); + auto Builder = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)); + const MCInstrDesc &MCID = Builder.getInstr()->getDesc(); + for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I) + Builder.addImm(0); // Issue STACKMAP. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -1100,13 +1103,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // The donothing intrinsic does, well, nothing. case Intrinsic::donothing: return true; - case Intrinsic::eh_actions: { - unsigned ResultReg = getRegForValue(UndefValue::get(II->getType())); - if (!ResultReg) - return false; - updateValueMap(II, ResultReg); - return true; - } case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast(II); assert(DI->getVariable() && "Missing variable"); @@ -1326,12 +1322,38 @@ bool FastISel::selectBitCast(const User *I) { return true; } +// Remove local value instructions starting from the instruction after +// SavedLastLocalValue to the current function insert point. +void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue) +{ + MachineInstr *CurLastLocalValue = getLastLocalValue(); + if (CurLastLocalValue != SavedLastLocalValue) { + // Find the first local value instruction to be deleted. + // This is the instruction after SavedLastLocalValue if it is non-NULL. + // Otherwise it's the first instruction in the block. + MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue); + if (SavedLastLocalValue) + ++FirstDeadInst; + else + FirstDeadInst = FuncInfo.MBB->getFirstNonPHI(); + setLastLocalValue(SavedLastLocalValue); + removeDeadCode(FirstDeadInst, FuncInfo.InsertPt); + } +} + bool FastISel::selectInstruction(const Instruction *I) { + MachineInstr *SavedLastLocalValue = getLastLocalValue(); // Just before the terminator instruction, insert instructions to // feed PHI nodes in successor blocks. if (isa(I)) - if (!handlePHINodesInSuccessorBlocks(I->getParent())) + if (!handlePHINodesInSuccessorBlocks(I->getParent())) { + // PHI node handling may have generated local value instructions, + // even though it failed to handle all PHI nodes. + // We remove these instructions because SelectionDAGISel will generate + // them again. + removeDeadLocalValueCode(SavedLastLocalValue); return false; + } DbgLoc = I->getDebugLoc(); @@ -1348,7 +1370,7 @@ bool FastISel::selectInstruction(const Instruction *I) { LibInfo->hasOptimizedCodeGen(Func)) return false; - // Don't handle Intrinsic::trap if a trap funciton is specified. + // Don't handle Intrinsic::trap if a trap function is specified. if (F && F->getIntrinsicID() == Intrinsic::trap && Call->hasFnAttr("trap-func-name")) return false; @@ -1380,8 +1402,12 @@ bool FastISel::selectInstruction(const Instruction *I) { DbgLoc = DebugLoc(); // Undo phi node updates, because they will be added again by SelectionDAG. - if (isa(I)) + if (isa(I)) { + // PHI node handling may have generated local value instructions. + // We remove them because SelectionDAGISel will generate them again. + removeDeadLocalValueCode(SavedLastLocalValue); FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); + } return false; } @@ -1398,11 +1424,30 @@ void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr, SmallVector(), DbgLoc); } - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(FuncInfo.MBB->getBasicBlock(), - MSucc->getBasicBlock()); - FuncInfo.MBB->addSuccessor(MSucc, BranchWeight); + if (FuncInfo.BPI) { + auto BranchProbability = FuncInfo.BPI->getEdgeProbability( + FuncInfo.MBB->getBasicBlock(), MSucc->getBasicBlock()); + FuncInfo.MBB->addSuccessor(MSucc, BranchProbability); + } else + FuncInfo.MBB->addSuccessorWithoutProb(MSucc); +} + +void FastISel::finishCondBranch(const BasicBlock *BranchBB, + MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB) { + // Add TrueMBB as successor unless it is equal to the FalseMBB: This can + // happen in degenerate IR and MachineIR forbids to have a block twice in the + // successor/predecessor lists. + if (TrueMBB != FalseMBB) { + if (FuncInfo.BPI) { + auto BranchProbability = + FuncInfo.BPI->getEdgeProbability(BranchBB, TrueMBB->getBasicBlock()); + FuncInfo.MBB->addSuccessor(TrueMBB, BranchProbability); + } else + FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB); + } + + fastEmitBranch(FalseMBB, DbgLoc); } /// Emit an FNeg operation. @@ -1864,21 +1909,18 @@ unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_rf(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, unsigned Op0, - bool Op0IsKill, const ConstantFP *FPImm) { +unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + const ConstantFP *FPImm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); - Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill)) .addFPImm(FPImm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, getKillRegState(Op0IsKill)) .addFPImm(FPImm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); @@ -1912,35 +1954,6 @@ unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_rrii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, unsigned Op1, - bool Op1IsKill, uint64_t Imm1, - uint64_t Imm2) { - const MCInstrDesc &II = TII.get(MachineInstOpcode); - - unsigned ResultReg = createResultReg(RC); - Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); - Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); - - if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill)) - .addReg(Op1, getKillRegState(Op1IsKill)) - .addImm(Imm1) - .addImm(Imm2); - else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, getKillRegState(Op0IsKill)) - .addReg(Op1, getKillRegState(Op1IsKill)) - .addImm(Imm1) - .addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); - } - return ResultReg; -} - unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); @@ -1957,25 +1970,6 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_ii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, uint64_t Imm1, - uint64_t Imm2) { - unsigned ResultReg = createResultReg(RC); - const MCInstrDesc &II = TII.get(MachineInstOpcode); - - if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addImm(Imm1) - .addImm(Imm2); - else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1) - .addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); - } - return ResultReg; -} - unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index cc306cbf5ae4..b62bd2bd63ee 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -87,6 +87,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); MachineModuleInfo &MMI = MF->getMMI(); + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); // Check whether the function can return without sret-demotion. SmallVector Outs; @@ -103,28 +104,29 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (const AllocaInst *AI = dyn_cast(I)) { - // Static allocas can be folded into the initial stack frame adjustment. - if (AI->isStaticAlloca()) { + Type *Ty = AI->getAllocatedType(); + unsigned Align = + std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), + AI->getAlignment()); + unsigned StackAlign = TFI->getStackAlignment(); + + // Static allocas can be folded into the initial stack frame + // adjustment. For targets that don't realign the stack, don't + // do this if there is an extra alignment requirement. + if (AI->isStaticAlloca() && + (TFI->isStackRealignable() || (Align <= StackAlign))) { const ConstantInt *CUI = cast(AI->getArraySize()); - Type *Ty = AI->getAllocatedType(); uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty); - unsigned Align = - std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), - AI->getAlignment()); TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. StaticAllocaMap[AI] = MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); - } else { - unsigned Align = - std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment( - AI->getAllocatedType()), - AI->getAlignment()); - unsigned StackAlign = - MF->getSubtarget().getFrameLowering()->getStackAlignment(); + // FIXME: Overaligned static allocas should be grouped into + // a single dynamic allocation instead of using a separate + // stack allocation for each one. if (Align <= StackAlign) Align = 0; // Inform the Frame Information that we have variable-sized objects. @@ -134,7 +136,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Look for inline asm that clobbers the SP register. if (isa(I) || isa(I)) { - ImmutableCallSite CS(I); + ImmutableCallSite CS(&*I); if (isa(CS.getCalledValue())) { unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); @@ -163,7 +165,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, MF->getFrameInfo()->setHasVAStart(true); } - // If we have a musttail call in a variadic funciton, we need to ensure we + // If we have a musttail call in a variadic function, we need to ensure we // forward implicit register parameters. if (const auto *CI = dyn_cast(I)) { if (CI->isMustTailCall() && Fn->isVarArg()) @@ -172,10 +174,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Mark values used outside their block as exported, by allocating // a virtual register for them. - if (isUsedOutsideOfDefiningBlock(I)) - if (!isa(I) || - !StaticAllocaMap.count(cast(I))) - InitializeRegForValue(I); + if (isUsedOutsideOfDefiningBlock(&*I)) + if (!isa(I) || !StaticAllocaMap.count(cast(I))) + InitializeRegForValue(&*I); // Collect llvm.dbg.declare information. This is done now instead of // during the initial isel pass through the IR so that it is done @@ -205,15 +206,36 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } // Decide the preferred extend type for a value. - PreferredExtendType[I] = getPreferredExtendForValue(I); + PreferredExtendType[&*I] = getPreferredExtendForValue(&*I); } // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This // also creates the initial PHI MachineInstrs, though none of the input // operands are populated. for (BB = Fn->begin(); BB != EB; ++BB) { - MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB); - MBBMap[BB] = MBB; + // Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks + // are really data, and no instructions can live here. + if (BB->isEHPad()) { + const Instruction *I = BB->getFirstNonPHI(); + // If this is a non-landingpad EH pad, mark this function as using + // funclets. + // FIXME: SEH catchpads do not create funclets, so we could avoid setting + // this in such cases in order to improve frame layout. + if (!isa(I)) { + MMI.setHasEHFunclets(true); + MF->getFrameInfo()->setHasOpaqueSPAdjustment(true); + } + if (isa(I)) { + assert(&*BB->begin() == I && + "WinEHPrepare failed to remove PHIs from imaginary BBs"); + continue; + } + if (isa(I)) + assert(&*BB->begin() == I && "WinEHPrepare failed to demote PHIs"); + } + + MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&*BB); + MBBMap[&*BB] = MBB; MF->push_back(MBB); // Transfer the address-taken flag. This is necessary because there could @@ -252,94 +274,64 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Mark landing pad blocks. SmallVector LPads; for (BB = Fn->begin(); BB != EB; ++BB) { - if (const auto *Invoke = dyn_cast(BB->getTerminator())) - MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); - if (BB->isLandingPad()) - LPads.push_back(BB->getLandingPadInst()); + const Instruction *FNP = BB->getFirstNonPHI(); + if (BB->isEHPad() && MBBMap.count(&*BB)) + MBBMap[&*BB]->setIsEHPad(); + if (const auto *LPI = dyn_cast(FNP)) + LPads.push_back(LPI); } - // If this is an MSVC EH personality, we need to do a bit more work. - EHPersonality Personality = EHPersonality::Unknown; - if (Fn->hasPersonalityFn()) - Personality = classifyEHPersonality(Fn->getPersonalityFn()); - if (!isMSVCEHPersonality(Personality)) + // If this personality uses funclets, we need to do a bit more work. + if (!Fn->hasPersonalityFn()) + return; + EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn()); + if (!isFuncletEHPersonality(Personality)) return; - if (Personality == EHPersonality::MSVC_Win64SEH || - Personality == EHPersonality::MSVC_X86SEH) { - addSEHHandlersForLPads(LPads); - } + // Calculate state numbers if we haven't already. + WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo(); + if (Personality == EHPersonality::MSVC_CXX) + calculateWinCXXEHStateNumbers(&fn, EHInfo); + else if (isAsynchronousEHPersonality(Personality)) + calculateSEHStateNumbers(&fn, EHInfo); + else if (Personality == EHPersonality::CoreCLR) + calculateClrEHStateNumbers(&fn, EHInfo); - WinEHFuncInfo &EHInfo = MMI.getWinEHFuncInfo(&fn); - if (Personality == EHPersonality::MSVC_CXX) { - const Function *WinEHParentFn = MMI.getWinEHParent(&fn); - calculateWinCXXEHStateNumbers(WinEHParentFn, EHInfo); - } + calculateCatchReturnSuccessorColors(&fn, EHInfo); - // Copy the state numbers to LandingPadInfo for the current function, which - // could be a handler or the parent. This should happen for 32-bit SEH and - // C++ EH. - if (Personality == EHPersonality::MSVC_CXX || - Personality == EHPersonality::MSVC_X86SEH) { - for (const LandingPadInst *LP : LPads) { - MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()]; - MMI.addWinEHState(LPadMBB, EHInfo.LandingPadStateMap[LP]); - } - } -} - -void FunctionLoweringInfo::addSEHHandlersForLPads( - ArrayRef LPads) { - MachineModuleInfo &MMI = MF->getMMI(); - - // Iterate over all landing pads with llvm.eh.actions calls. - for (const LandingPadInst *LP : LPads) { - const IntrinsicInst *ActionsCall = - dyn_cast(LP->getNextNode()); - if (!ActionsCall || - ActionsCall->getIntrinsicID() != Intrinsic::eh_actions) - continue; - - // Parse the llvm.eh.actions call we found. - MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()]; - SmallVector, 4> Actions; - parseEHActions(ActionsCall, Actions); - - // Iterate EH actions from most to least precedence, which means - // iterating in reverse. - for (auto I = Actions.rbegin(), E = Actions.rend(); I != E; ++I) { - ActionHandler *Action = I->get(); - if (auto *CH = dyn_cast(Action)) { - const auto *Filter = - dyn_cast(CH->getSelector()->stripPointerCasts()); - assert((Filter || CH->getSelector()->isNullValue()) && - "expected function or catch-all"); - const auto *RecoverBA = - cast(CH->getHandlerBlockOrFunc()); - MMI.addSEHCatchHandler(LPadMBB, Filter, RecoverBA); + // Map all BB references in the WinEH data to MBBs. + for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { + for (WinEHHandlerType &H : TBME.HandlerArray) { + if (H.CatchObj.Alloca) { + assert(StaticAllocaMap.count(H.CatchObj.Alloca)); + H.CatchObj.FrameIndex = StaticAllocaMap[H.CatchObj.Alloca]; } else { - assert(isa(Action)); - const auto *Fini = cast(Action->getHandlerBlockOrFunc()); - MMI.addSEHCleanupHandler(LPadMBB, Fini); + H.CatchObj.FrameIndex = INT_MAX; } + if (H.Handler) + H.Handler = MBBMap[H.Handler.get()]; } } + for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap) + if (UME.Cleanup) + UME.Cleanup = MBBMap[UME.Cleanup.get()]; + for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) { + const BasicBlock *BB = UME.Handler.get(); + UME.Handler = MBBMap[BB]; + } + for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) { + const BasicBlock *BB = CME.Handler.get(); + CME.Handler = MBBMap[BB]; + } } /// clear - Clear out all the function-specific state. This returns this /// FunctionLoweringInfo to an empty state, ready to be used for a /// different function. void FunctionLoweringInfo::clear() { - assert(CatchInfoFound.size() == CatchInfoLost.size() && - "Not all catch info was assigned to a landing pad!"); - MBBMap.clear(); ValueMap.clear(); StaticAllocaMap.clear(); -#ifndef NDEBUG - CatchInfoLost.clear(); - CatchInfoFound.clear(); -#endif LiveOutRegInfo.clear(); VisitedBBs.clear(); ArgDbgValues.clear(); @@ -520,6 +512,17 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { return 0; } +unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg( + const Value *CPI, const TargetRegisterClass *RC) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + auto I = CatchPadExceptionPointers.insert({CPI, 0}); + unsigned &VReg = I.first->second; + if (I.second) + VReg = MRI.createVirtualRegister(RC); + assert(VReg && "null vreg in exception pointer table!"); + return VReg; +} + /// ComputeUsesVAFloatArgument - Determine if any floating-point values are /// being passed to this variadic function, and set the MachineModuleInfo's /// usesVAFloatArgument flag if so. This flag is used to emit an undefined @@ -547,10 +550,9 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I, /// landingpad instruction and add them to the specified machine module info. void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, MachineBasicBlock *MBB) { - MMI.addPersonality( - MBB, - cast( - I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts())); + if (const auto *PF = dyn_cast( + I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts())) + MMI.addPersonality(PF); if (I.isCleanup()) MMI.addCleanup(MBB); diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 5ec10308dc28..a1e2d410ab00 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, UseRC = RC; else if (RC) { const TargetRegisterClass *ComRC = - TRI->getCommonSubClass(UseRC, RC); + TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy); // If multiple uses expect disjoint register classes, we emit // copies in AddRegisterOperand. if (ComRC) diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index fbc8f1e89f6e..f46767f6c4a1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -39,6 +39,10 @@ using namespace llvm; #define DEBUG_TYPE "legalizedag" +namespace { + +struct FloatSignAsInt; + //===----------------------------------------------------------------------===// /// This takes an arbitrary SelectionDAG as input and /// hacks on it until the target machine can handle it. This involves @@ -51,7 +55,6 @@ using namespace llvm; /// 'setcc' instruction efficiently, but does support 'brcc' instruction, this /// will attempt merge setcc and brc instructions into brcc's. /// -namespace { class SelectionDAGLegalize { const TargetMachine &TM; const TargetLowering &TLI; @@ -130,7 +133,11 @@ private: SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl &Results); - SDValue ExpandFCOPYSIGN(SDNode *Node); + void getSignAsIntValue(FloatSignAsInt &State, SDLoc DL, SDValue Value) const; + SDValue modifySignAsInt(const FloatSignAsInt &State, SDLoc DL, + SDValue NewIntValue) const; + SDValue ExpandFCOPYSIGN(SDNode *Node) const; + SDValue ExpandFABS(SDNode *Node) const; SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, SDLoc dl); SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, @@ -138,6 +145,7 @@ private: SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, SDLoc dl); + SDValue ExpandBITREVERSE(SDValue Op, SDLoc dl); SDValue ExpandBSWAP(SDValue Op, SDLoc dl); SDValue ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl); @@ -146,10 +154,11 @@ private: SDValue ExpandVectorBuildThroughStack(SDNode* Node); SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); + SDValue ExpandConstant(ConstantSDNode *CP); - std::pair ExpandAtomic(SDNode *Node); - - void ExpandNode(SDNode *Node); + // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall + bool ExpandNode(SDNode *Node); + void ConvertNodeToLibcall(SDNode *Node); void PromoteNode(SDNode *Node); public: @@ -273,17 +282,30 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast(CPIdx)->getAlignment(); if (Extend) { - SDValue Result = - DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, - DAG.getEntryNode(), - CPIdx, MachinePointerInfo::getConstantPool(), - VT, false, false, false, Alignment); + SDValue Result = DAG.getExtLoad( + ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT, + false, false, false, Alignment); return Result; } SDValue Result = - DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), false, false, false, - Alignment); + DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); + return Result; +} + +/// Expands the Constant node to a load from the constant pool. +SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { + SDLoc dl(CP); + EVT VT = CP->getValueType(0); + SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(), + TLI.getPointerTy(DAG.getDataLayout())); + unsigned Alignment = cast(CPIdx)->getAlignment(); + SDValue Result = + DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); return Result; } @@ -594,13 +616,13 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, int SPFI = cast(StackPtr.getNode())->getIndex(); // Store the vector. - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), - false, false, 0); + SDValue Ch = DAG.getStore( + DAG.getEntryNode(), dl, Tmp1, StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false, + false, 0); // Truncate or zero extend offset to target pointer type. - unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; - Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3); + Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT); // Add the offset to the index. unsigned EltSize = EltVT.getSizeInBits()/8; Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3, @@ -610,9 +632,9 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT, false, false, 0); // Load the updated vector. - return DAG.getLoad(VT, dl, Ch, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), false, false, - false, 0); + return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), SPFI), + false, false, false, 0); } @@ -728,14 +750,12 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { case TargetLowering::Legal: { // If this is an unaligned store and the target doesn't support it, // expand it. + EVT MemVT = ST->getMemoryVT(); unsigned AS = ST->getAddressSpace(); unsigned Align = ST->getAlignment(); - if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) { - Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); - if (Align < ABIAlignment) - ExpandUnalignedStore(cast(Node), DAG, TLI, this); - } + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + ExpandUnalignedStore(cast(Node), DAG, TLI, this); break; } case TargetLowering::Custom: { @@ -839,20 +859,16 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); ReplaceNode(SDValue(Node, 0), Result); } else { - switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(), - StVT.getSimpleVT())) { + switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { + EVT MemVT = ST->getMemoryVT(); unsigned AS = ST->getAddressSpace(); unsigned Align = ST->getAlignment(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) { - Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = DL.getABITypeAlignment(Ty); - if (Align < ABIAlignment) - ExpandUnalignedStore(cast(Node), DAG, TLI, this); - } + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + ExpandUnalignedStore(cast(Node), DAG, TLI, this); break; } case TargetLowering::Custom: { @@ -895,17 +911,14 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { + EVT MemVT = LD->getMemoryVT(); unsigned AS = LD->getAddressSpace(); unsigned Align = LD->getAlignment(); + const DataLayout &DL = DAG.getDataLayout(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) { - Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); - if (Align < ABIAlignment){ - ExpandUnalignedLoad(cast(Node), DAG, TLI, RVal, RChain); - } - } + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + ExpandUnalignedLoad(cast(Node), DAG, TLI, RVal, RChain); break; } case TargetLowering::Custom: { @@ -1092,23 +1105,20 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = Res.getValue(1); } } else { - // If this is an unaligned load and the target doesn't support - // it, expand it. + // If this is an unaligned load and the target doesn't support it, + // expand it. EVT MemVT = LD->getMemoryVT(); unsigned AS = LD->getAddressSpace(); unsigned Align = LD->getAlignment(); - if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) { - Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); - if (Align < ABIAlignment){ - ExpandUnalignedLoad(cast(Node), DAG, TLI, Value, Chain); - } - } + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + ExpandUnalignedLoad(cast(Node), DAG, TLI, Value, Chain); } break; } case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) { + EVT DestVT = Node->getValueType(0); + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) { // If the source type is not legal, see if there is a legal extload to // an intermediate type that we can then extend further. EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT()); @@ -1127,6 +1137,23 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = Load.getValue(1); break; } + + // Handle the special case of fp16 extloads. EXTLOAD doesn't have the + // normal undefined upper bits behavior to allow using an in-reg extend + // with the illegal FP type, so load as an integer and do the + // from-integer conversion. + if (SrcVT.getScalarType() == MVT::f16) { + EVT ISrcVT = SrcVT.changeTypeToInteger(); + EVT IDestVT = DestVT.changeTypeToInteger(); + EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); + + SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT, + Chain, Ptr, ISrcVT, + LD->getMemOperand()); + Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result); + Chain = Result.getValue(1); + break; + } } assert(!SrcVT.isVector() && @@ -1180,15 +1207,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { #ifndef NDEBUG for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == - TargetLowering::TypeLegal && + assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal || + TLI.isTypeLegal(Node->getValueType(i))) && "Unexpected illegal type!"); for (const SDValue &Op : Node->op_values()) - assert((TLI.getTypeAction(*DAG.getContext(), - Op.getValueType()) == TargetLowering::TypeLegal || - Op.getOpcode() == ISD::TargetConstant) && - "Unexpected illegal type!"); + assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == + TargetLowering::TypeLegal || + TLI.isTypeLegal(Op.getValueType()) || + Op.getOpcode() == ISD::TargetConstant) && + "Unexpected illegal type!"); #endif // Figure out the correct action; the way to query this varies by opcode @@ -1201,6 +1230,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::STACKSAVE: Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; + case ISD::GET_DYNAMIC_AREA_OFFSET: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + break; case ISD::VAARG: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -1229,7 +1262,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::SETCC: case ISD::BR_CC: { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : - Node->getOpcode() == ISD::SETCC ? 2 : 1; + Node->getOpcode() == ISD::SETCC ? 2 : + Node->getOpcode() == ISD::SETCCE ? 3 : 1; unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = @@ -1265,6 +1299,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::FRAME_TO_ARGS_OFFSET: case ISD::EH_SJLJ_SETJMP: case ISD::EH_SJLJ_LONGJMP: + case ISD::EH_SJLJ_SETUP_DISPATCH: // These operations lie about being legal: when they claim to be legal, // they should actually be expanded. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -1281,6 +1316,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; + case ISD::READCYCLECOUNTER: + // READCYCLECOUNTER returns an i64, even if type legalization might have + // expanded that to several smaller types. + Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64); + break; case ISD::READ_REGISTER: case ISD::WRITE_REGISTER: // Named register is legal in the DAG, but blocked by register name @@ -1379,7 +1419,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } // FALL THROUGH case TargetLowering::Expand: - ExpandNode(Node); + if (ExpandNode(Node)) + return; + // FALL THROUGH + case TargetLowering::LibCall: + ConvertNodeToLibcall(Node); return; case TargetLowering::Promote: PromoteNode(Node); @@ -1419,6 +1463,11 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in // the vector. If all are expanded here, we don't want one store per vector // element. + + // Caches for hasPredecessorHelper + SmallPtrSet Visited; + SmallVector Worklist; + SDValue StackPtr, Ch; for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), UE = Vec.getNode()->use_end(); UI != UE; ++UI) { @@ -1433,6 +1482,12 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode())) continue; + // If the index is dependent on the store we will introduce a cycle when + // creating the load (the load uses the index, and by replacing the chain + // we will make the index dependent on the load). + if (Idx.getNode()->hasPredecessorHelper(ST, Visited, Worklist)) + continue; + StackPtr = ST->getBasePtr(); Ch = SDValue(ST, 0); break; @@ -1490,7 +1545,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); int FI = cast(StackPtr.getNode())->getIndex(); - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // First store the whole vector. SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, @@ -1528,7 +1584,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { SDLoc dl(Node); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast(FIPtr.getNode())->getIndex(); - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // Emit a store of each element to the stack slot. SmallVector Stores; @@ -1568,69 +1625,143 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { false, false, false, 0); } -SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { - SDLoc dl(Node); - SDValue Tmp1 = Node->getOperand(0); - SDValue Tmp2 = Node->getOperand(1); +namespace { +/// Keeps track of state when getting the sign of a floating-point value as an +/// integer. +struct FloatSignAsInt { + EVT FloatVT; + SDValue Chain; + SDValue FloatPtr; + SDValue IntPtr; + MachinePointerInfo IntPointerInfo; + MachinePointerInfo FloatPointerInfo; + SDValue IntValue; + APInt SignMask; +}; +} - // Get the sign bit of the RHS. First obtain a value that has the same - // sign as the sign bit, i.e. negative if and only if the sign bit is 1. - SDValue SignBit; - EVT FloatVT = Tmp2.getValueType(); - EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits()); +/// Bitcast a floating-point value to an integer value. Only bitcast the part +/// containing the sign bit if the target has no integer value capable of +/// holding all bits of the floating-point value. +void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, + SDLoc DL, SDValue Value) const { + EVT FloatVT = Value.getValueType(); + unsigned NumBits = FloatVT.getSizeInBits(); + State.FloatVT = FloatVT; + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); + // Convert to an integer of the same size. if (TLI.isTypeLegal(IVT)) { - // Convert to an integer with the same sign bit. - SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2); - } else { - auto &DL = DAG.getDataLayout(); - // Store the float to memory, then load the sign part out as an integer. - MVT LoadTy = TLI.getPointerTy(DL); - // First create a temporary that is aligned for both the load and store. - SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); - // Then store the float to it. - SDValue Ch = - DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(), - false, false, 0); - if (DL.isBigEndian()) { - assert(FloatVT.isByteSized() && "Unsupported floating point type!"); - // Load out a legal integer with the same sign bit as the float. - SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(), - false, false, false, 0); - } else { // Little endian - SDValue LoadPtr = StackPtr; - // The float may be wider than the integer we are going to load. Advance - // the pointer so that the loaded integer will contain the sign bit. - unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); - unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; - LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr, - DAG.getConstant(ByteOffset, dl, - LoadPtr.getValueType())); - // Load a legal integer containing the sign bit. - SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), - false, false, false, 0); - // Move the sign bit to the top bit of the loaded integer. - unsigned BitShift = LoadTy.getSizeInBits() - - (FloatVT.getSizeInBits() - 8 * ByteOffset); - assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?"); - if (BitShift) - SignBit = DAG.getNode( - ISD::SHL, dl, LoadTy, SignBit, - DAG.getConstant(BitShift, dl, - TLI.getShiftAmountTy(SignBit.getValueType(), DL))); - } + State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value); + State.SignMask = APInt::getSignBit(NumBits); + return; } - // Now get the sign bit proper, by seeing whether the value is negative. - SignBit = DAG.getSetCC(dl, getSetCCResultType(SignBit.getValueType()), - SignBit, - DAG.getConstant(0, dl, SignBit.getValueType()), - ISD::SETLT); - // Get the absolute value of the result. - SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1); - // Select between the nabs and abs value based on the sign bit of - // the input. - return DAG.getSelect(dl, AbsVal.getValueType(), SignBit, - DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), - AbsVal); + + auto &DataLayout = DAG.getDataLayout(); + // Store the float to memory, then load the sign part out as an integer. + MVT LoadTy = TLI.getRegisterType(*DAG.getContext(), MVT::i8); + // First create a temporary that is aligned for both the load and store. + SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); + int FI = cast(StackPtr.getNode())->getIndex(); + // Then store the float to it. + State.FloatPtr = StackPtr; + MachineFunction &MF = DAG.getMachineFunction(); + State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI); + State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr, + State.FloatPointerInfo, false, false, 0); + + SDValue IntPtr; + if (DataLayout.isBigEndian()) { + assert(FloatVT.isByteSized() && "Unsupported floating point type!"); + // Load out a legal integer with the same sign bit as the float. + IntPtr = StackPtr; + State.IntPointerInfo = State.FloatPointerInfo; + } else { + // Advance the pointer so that the loaded byte will contain the sign bit. + unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1; + IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr, + DAG.getConstant(ByteOffset, DL, StackPtr.getValueType())); + State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI, + ByteOffset); + } + + State.IntPtr = IntPtr; + State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, + IntPtr, State.IntPointerInfo, MVT::i8, + false, false, false, 0); + State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7); +} + +/// Replace the integer value produced by getSignAsIntValue() with a new value +/// and cast the result back to a floating-point type. +SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State, + SDLoc DL, SDValue NewIntValue) const { + if (!State.Chain) + return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue); + + // Override the part containing the sign bit in the value stored on the stack. + SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr, + State.IntPointerInfo, MVT::i8, false, false, + 0); + return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr, + State.FloatPointerInfo, false, false, false, 0); +} + +SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { + SDLoc DL(Node); + SDValue Mag = Node->getOperand(0); + SDValue Sign = Node->getOperand(1); + + // Get sign bit into an integer value. + FloatSignAsInt SignAsInt; + getSignAsIntValue(SignAsInt, DL, Sign); + + EVT IntVT = SignAsInt.IntValue.getValueType(); + SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT); + SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue, + SignMask); + + // If FABS is legal transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X) + EVT FloatVT = Mag.getValueType(); + if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) { + SDValue AbsValue = DAG.getNode(ISD::FABS, DL, FloatVT, Mag); + SDValue NegValue = DAG.getNode(ISD::FNEG, DL, FloatVT, AbsValue); + SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(IntVT), SignBit, + DAG.getConstant(0, DL, IntVT), ISD::SETNE); + return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue); + } + + // Transform values to integer, copy the sign bit and transform back. + FloatSignAsInt MagAsInt; + getSignAsIntValue(MagAsInt, DL, Mag); + assert(SignAsInt.SignMask == MagAsInt.SignMask); + SDValue ClearSignMask = DAG.getConstant(~SignAsInt.SignMask, DL, IntVT); + SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, MagAsInt.IntValue, + ClearSignMask); + SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit); + + return modifySignAsInt(MagAsInt, DL, CopiedSign); +} + +SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const { + SDLoc DL(Node); + SDValue Value = Node->getOperand(0); + + // Transform FABS(x) => FCOPYSIGN(x, 0.0) if FCOPYSIGN is legal. + EVT FloatVT = Value.getValueType(); + if (TLI.isOperationLegalOrCustom(ISD::FCOPYSIGN, FloatVT)) { + SDValue Zero = DAG.getConstantFP(0.0, DL, FloatVT); + return DAG.getNode(ISD::FCOPYSIGN, DL, FloatVT, Value, Zero); + } + + // Transform value to integer, clear the sign bit and transform back. + FloatSignAsInt ValueAsInt; + getSignAsIntValue(ValueAsInt, DL, Value); + EVT IntVT = ValueAsInt.IntValue.getValueType(); + SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT); + SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue, + ClearSignMask); + return modifySignAsInt(ValueAsInt, DL, ClearedSign); } void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, @@ -1798,7 +1929,8 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, FrameIndexSDNode *StackPtrFI = cast(FIPtr); int SPFI = StackPtrFI->getIndex(); - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); unsigned SrcSize = SrcOp.getValueType().getSizeInBits(); unsigned SlotSize = SlotVT.getSizeInBits(); @@ -1838,14 +1970,14 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { FrameIndexSDNode *StackPtrFI = cast(StackPtr); int SPFI = StackPtrFI->getIndex(); - SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0), - StackPtr, - MachinePointerInfo::getFixedStack(SPFI), - Node->getValueType(0).getVectorElementType(), - false, false, 0); - return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), - false, false, false, 0); + SDValue Ch = DAG.getTruncStore( + DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), + Node->getValueType(0).getVectorElementType(), false, false, 0); + return DAG.getLoad( + Node->getValueType(0), dl, Ch, StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false, + false, false, 0); } static bool @@ -2011,9 +2143,10 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast(CPIdx)->getAlignment(); - return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); + return DAG.getLoad( + VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, Alignment); } SmallSet DefinedValues; @@ -2205,47 +2338,6 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, return ExpandLibCall(LC, Node, isSigned); } -/// Return true if divmod libcall is available. -static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, - const TargetLowering &TLI) { - RTLIB::Libcall LC; - switch (Node->getSimpleValueType(0).SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); - case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; - case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; - case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; - case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; - case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; - } - - return TLI.getLibcallName(LC) != nullptr; -} - -/// Only issue divrem libcall if both quotient and remainder are needed. -static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) { - // The other use might have been replaced with a divrem already. - unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; - unsigned OtherOpcode = 0; - if (isSigned) - OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV; - else - OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV; - - SDValue Op0 = Node->getOperand(0); - SDValue Op1 = Node->getOperand(1); - for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), - UE = Op0.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - if (User == Node) - continue; - if ((User->getOpcode() == OtherOpcode || User->getOpcode() == DivRemOpc) && - User->getOperand(0) == Op0 && - User->getOperand(1) == Op1) - return true; - } - return false; -} - /// Issue libcalls to __{u}divmod to compute div / rem pairs. void SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, @@ -2428,6 +2520,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, SDLoc dl) { + // TODO: Should any fast-math-flags be set for the created nodes? + if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { // simple 32-bit [signed|unsigned] integer to float/double expansion @@ -2611,14 +2705,15 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, Alignment = std::min(Alignment, 4u); SDValue FudgeInReg; if (DestVT == MVT::f32) - FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); + FudgeInReg = DAG.getLoad( + MVT::f32, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, Alignment); else { - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, - DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - MVT::f32, false, false, false, Alignment); + SDValue Load = DAG.getExtLoad( + ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, + false, false, false, Alignment); HandleSDNode Handle(Load); LegalizeOp(Load.getNode()); FudgeInReg = Handle.getValue(); @@ -2713,6 +2808,31 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); } +/// Open code the operations for BITREVERSE. +SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) { + EVT VT = Op.getValueType(); + EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + unsigned Sz = VT.getScalarSizeInBits(); + + SDValue Tmp, Tmp2; + Tmp = DAG.getConstant(0, dl, VT); + for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) { + if (I < J) + Tmp2 = + DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT)); + else + Tmp2 = + DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT)); + + APInt Shift(Sz, 1); + Shift = Shift.shl(J); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2); + } + + return Tmp; +} + /// Open code the operations for BSWAP of the specified operation. SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { EVT VT = Op.getValueType(); @@ -2865,16 +2985,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, } } -std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { - unsigned Opc = Node->getOpcode(); - MVT VT = cast(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); - - return ExpandChainLibCall(LC, Node, false); -} - -void SelectionDAGLegalize::ExpandNode(SDNode *Node) { +bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SmallVector Results; SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; @@ -2888,6 +2999,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); Results.push_back(Tmp1); break; + case ISD::BITREVERSE: + Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl)); + break; case ISD::BSWAP: Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); break; @@ -2908,30 +3022,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // preserve the chain and be done. Results.push_back(Node->getOperand(0)); break; + case ISD::READCYCLECOUNTER: + // If the target didn't expand this, just return 'zero' and preserve the + // chain. + Results.append(Node->getNumValues() - 1, + DAG.getConstant(0, dl, Node->getValueType(0))); + Results.push_back(Node->getOperand(0)); + break; case ISD::EH_SJLJ_SETJMP: // If the target didn't expand this, just return 'zero' and preserve the // chain. Results.push_back(DAG.getConstant(0, dl, MVT::i32)); Results.push_back(Node->getOperand(0)); break; - case ISD::ATOMIC_FENCE: { - // If the target didn't lower this, lower it to '__sync_synchronize()' call - // FIXME: handle "fence singlethread" more efficiently. - TargetLowering::ArgListTy Args; - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl) - .setChain(Node->getOperand(0)) - .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("__sync_synchronize", - TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args), 0); - - std::pair CallResult = TLI.LowerCallTo(CLI); - - Results.push_back(CallResult.second); - break; - } case ISD::ATOMIC_LOAD: { // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP. SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0)); @@ -2959,26 +3062,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Swap.getValue(1)); break; } - // By default, atomic intrinsics are marked Legal and lowered. Targets - // which don't support them directly, however, may want libcalls, in which - // case they mark them Expand, and we get here. - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - case ISD::ATOMIC_CMP_SWAP: { - std::pair Tmp = ExpandAtomic(Node); - Results.push_back(Tmp.first); - Results.push_back(Tmp.second); - break; - } case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and // splits out the success value as a comparison. Expanding the resulting @@ -3017,21 +3100,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } break; } - case ISD::TRAP: { - // If this operation is not supported, lower it to 'abort()' call - TargetLowering::ArgListTy Args; - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl) - .setChain(Node->getOperand(0)) - .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("abort", - TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args), 0); - std::pair CallResult = TLI.LowerCallTo(CLI); - - Results.push_back(CallResult.second); - break; - } case ISD::FP_ROUND: case ISD::BITCAST: Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), @@ -3097,6 +3165,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(0), Tmp1, ISD::SETLT); True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0)); + // TODO: Should any fast-math-flags be set for the FSUB? False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, DAG.getNode(ISD::FSUB, dl, VT, Node->getOperand(0), Tmp1)); @@ -3106,57 +3175,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } - case ISD::VAARG: { - const Value *V = cast(Node->getOperand(2))->getValue(); - EVT VT = Node->getValueType(0); - Tmp1 = Node->getOperand(0); - Tmp2 = Node->getOperand(1); - unsigned Align = Node->getConstantOperandVal(3); - - SDValue VAListLoad = - DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, Tmp1, Tmp2, - MachinePointerInfo(V), false, false, false, 0); - SDValue VAList = VAListLoad; - - if (Align > TLI.getMinStackArgumentAlignment()) { - assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); - - VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - DAG.getConstant(Align - 1, dl, - VAList.getValueType())); - - VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList, - DAG.getConstant(-(int64_t)Align, dl, - VAList.getValueType())); - } - - // Increment the pointer, VAList, to the next vaarg - Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - DAG.getConstant(DAG.getDataLayout().getTypeAllocSize( - VT.getTypeForEVT(*DAG.getContext())), - dl, VAList.getValueType())); - // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, - MachinePointerInfo(V), false, false, 0); - // Load the actual argument out of the pointer VAList - Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(), - false, false, false, 0)); + case ISD::VAARG: + Results.push_back(DAG.expandVAArg(Node)); Results.push_back(Results[0].getValue(1)); break; - } - case ISD::VACOPY: { - // This defaults to loading a pointer from the input and storing it to the - // output, returning the chain. - const Value *VD = cast(Node->getOperand(3))->getValue(); - const Value *VS = cast(Node->getOperand(4))->getValue(); - Tmp1 = DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, - Node->getOperand(0), Node->getOperand(2), - MachinePointerInfo(VS), false, false, false, 0); - Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), - MachinePointerInfo(VD), false, false, 0); - Results.push_back(Tmp1); + case ISD::VACOPY: + Results.push_back(DAG.expandVACopy(Node)); break; - } case ISD::EXTRACT_VECTOR_ELT: if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) // This must be an access of the only element. Return it. @@ -3302,28 +3327,24 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Node->getOperand(0)); } break; + case ISD::GET_DYNAMIC_AREA_OFFSET: + Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0))); + Results.push_back(Results[0].getValue(0)); + break; case ISD::FCOPYSIGN: Results.push_back(ExpandFCOPYSIGN(Node)); break; case ISD::FNEG: // Expand Y = FNEG(X) -> Y = SUB -0.0, X Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0)); + // TODO: If FNEG has fast-math-flags, propagate them to the FSUB. Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, Node->getOperand(0)); Results.push_back(Tmp1); break; - case ISD::FABS: { - // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X). - EVT VT = Node->getValueType(0); - Tmp1 = Node->getOperand(0); - Tmp2 = DAG.getConstantFP(0.0, dl, VT); - Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()), - Tmp1, Tmp2, ISD::SETUGT); - Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1); - Tmp1 = DAG.getSelect(dl, VT, Tmp2, Tmp1, Tmp3); - Results.push_back(Tmp1); + case ISD::FABS: + Results.push_back(ExpandFABS(Node)); break; - } case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -3344,25 +3365,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } - case ISD::FMINNUM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, - RTLIB::FMIN_F80, RTLIB::FMIN_F128, - RTLIB::FMIN_PPCF128)); - break; - case ISD::FMAXNUM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, - RTLIB::FMAX_F80, RTLIB::FMAX_F128, - RTLIB::FMAX_PPCF128)); - break; - case ISD::FSQRT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, - RTLIB::SQRT_F80, RTLIB::SQRT_F128, - RTLIB::SQRT_PPCF128)); - break; case ISD::FSIN: case ISD::FCOS: { EVT VT = Node->getValueType(0); - bool isSIN = Node->getOpcode() == ISD::FSIN; // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / // fcos which share the same operand and both are used. if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || @@ -3370,137 +3375,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { && useSinCos(Node)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); - if (!isSIN) + if (Node->getOpcode() == ISD::FCOS) Tmp1 = Tmp1.getValue(1); Results.push_back(Tmp1); - } else if (isSIN) { - Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, - RTLIB::SIN_F80, RTLIB::SIN_F128, - RTLIB::SIN_PPCF128)); - } else { - Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, - RTLIB::COS_F80, RTLIB::COS_F128, - RTLIB::COS_PPCF128)); } break; } - case ISD::FSINCOS: - // Expand into sincos libcall. - ExpandSinCosLibCall(Node, Results); - break; - case ISD::FLOG: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_F128, - RTLIB::LOG_PPCF128)); - break; - case ISD::FLOG2: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128)); - break; - case ISD::FLOG10: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, - RTLIB::LOG10_F80, RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128)); - break; - case ISD::FEXP: - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_F128, - RTLIB::EXP_PPCF128)); - break; - case ISD::FEXP2: - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128)); - break; - case ISD::FTRUNC: - Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, - RTLIB::TRUNC_PPCF128)); - break; - case ISD::FFLOOR: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, - RTLIB::FLOOR_PPCF128)); - break; - case ISD::FCEIL: - Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, - RTLIB::CEIL_F80, RTLIB::CEIL_F128, - RTLIB::CEIL_PPCF128)); - break; - case ISD::FRINT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, - RTLIB::RINT_F80, RTLIB::RINT_F128, - RTLIB::RINT_PPCF128)); - break; - case ISD::FNEARBYINT: - Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, - RTLIB::NEARBYINT_F64, - RTLIB::NEARBYINT_F80, - RTLIB::NEARBYINT_F128, - RTLIB::NEARBYINT_PPCF128)); - break; - case ISD::FROUND: - Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, - RTLIB::ROUND_F64, - RTLIB::ROUND_F80, - RTLIB::ROUND_F128, - RTLIB::ROUND_PPCF128)); - break; - case ISD::FPOWI: - Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, - RTLIB::POWI_F80, RTLIB::POWI_F128, - RTLIB::POWI_PPCF128)); - break; - case ISD::FPOW: - Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_F128, - RTLIB::POW_PPCF128)); - break; - case ISD::FDIV: - Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, - RTLIB::DIV_F80, RTLIB::DIV_F128, - RTLIB::DIV_PPCF128)); - break; - case ISD::FREM: - Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, - RTLIB::REM_F80, RTLIB::REM_F128, - RTLIB::REM_PPCF128)); - break; - case ISD::FMA: - Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, - RTLIB::FMA_F80, RTLIB::FMA_F128, - RTLIB::FMA_PPCF128)); - break; case ISD::FMAD: llvm_unreachable("Illegal fmad should never be formed"); - case ISD::FADD: - Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, - RTLIB::ADD_F80, RTLIB::ADD_F128, - RTLIB::ADD_PPCF128)); - break; - case ISD::FMUL: - Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, - RTLIB::MUL_F80, RTLIB::MUL_F128, - RTLIB::MUL_PPCF128)); - break; - case ISD::FP16_TO_FP: { - if (Node->getValueType(0) == MVT::f32) { - Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); - break; + case ISD::FP16_TO_FP: + if (Node->getValueType(0) != MVT::f32) { + // We can extend to types bigger than f32 in two steps without changing + // the result. Since "f16 -> f32" is much more commonly available, give + // CodeGen the option of emitting that before resorting to a libcall. + SDValue Res = + DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0)); + Results.push_back( + DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res)); } - - // We can extend to types bigger than f32 in two steps without changing the - // result. Since "f16 -> f32" is much more commonly available, give CodeGen - // the option of emitting that before resorting to a libcall. - SDValue Res = - DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0)); - Results.push_back( - DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res)); break; - } - case ISD::FP_TO_FP16: { + case ISD::FP_TO_FP16: if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { SDValue Op = Node->getOperand(0); MVT SVT = Op.getSimpleValueType(); @@ -3512,16 +3407,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { DAG.getIntPtrConstant(0, dl)); Results.push_back( DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal)); - break; } } - - RTLIB::Libcall LC = - RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16"); - Results.push_back(ExpandLibCall(LC, Node, false)); break; - } case ISD::ConstantFP: { ConstantFPSDNode *CFP = cast(Node); // Check to see if this FP immediate is already legal. @@ -3530,17 +3418,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(ExpandConstantFP(CFP, true)); break; } + case ISD::Constant: { + ConstantSDNode *CP = cast(Node); + Results.push_back(ExpandConstant(CP)); + break; + } case ISD::FSUB: { EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) { + const SDNodeFlags *Flags = &cast(Node)->Flags; Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); - Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); + Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags); Results.push_back(Tmp1); - } else { - Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, - RTLIB::SUB_F80, RTLIB::SUB_F128, - RTLIB::SUB_PPCF128)); } break; } @@ -3564,29 +3454,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; Tmp2 = Node->getOperand(0); Tmp3 = Node->getOperand(1); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || - (isDivRemLibcallAvailable(Node, isSigned, TLI) && - // If div is legal, it's better to do the normal expansion - !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) && - useDivRem(Node, isSigned, false))) { + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); + Results.push_back(Tmp1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); - } else if (isSigned) - Tmp1 = ExpandIntLibCall(Node, true, - RTLIB::SREM_I8, - RTLIB::SREM_I16, RTLIB::SREM_I32, - RTLIB::SREM_I64, RTLIB::SREM_I128); - else - Tmp1 = ExpandIntLibCall(Node, false, - RTLIB::UREM_I8, - RTLIB::UREM_I16, RTLIB::UREM_I32, - RTLIB::UREM_I64, RTLIB::UREM_I128); - Results.push_back(Tmp1); + Results.push_back(Tmp1); + } break; } case ISD::UDIV: @@ -3594,23 +3472,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { bool isSigned = Node->getOpcode() == ISD::SDIV; unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; EVT VT = Node->getValueType(0); - SDVTList VTs = DAG.getVTList(VT, VT); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || - (isDivRemLibcallAvailable(Node, isSigned, TLI) && - useDivRem(Node, isSigned, true))) + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { + SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), Node->getOperand(1)); - else if (isSigned) - Tmp1 = ExpandIntLibCall(Node, true, - RTLIB::SDIV_I8, - RTLIB::SDIV_I16, RTLIB::SDIV_I32, - RTLIB::SDIV_I64, RTLIB::SDIV_I128); - else - Tmp1 = ExpandIntLibCall(Node, false, - RTLIB::UDIV_I8, - RTLIB::UDIV_I16, RTLIB::UDIV_I32, - RTLIB::UDIV_I64, RTLIB::UDIV_I128); - Results.push_back(Tmp1); + Results.push_back(Tmp1); + } break; } case ISD::MULHU: @@ -3626,11 +3493,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1.getValue(1)); break; } - case ISD::SDIVREM: - case ISD::UDIVREM: - // Expand into divrem libcall - ExpandDivRemLibCall(Node, Results); - break; case ISD::MUL: { EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); @@ -3673,14 +3535,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.getShiftAmountTy(HalfType, DAG.getDataLayout())); Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); - break; } - - Tmp1 = ExpandIntLibCall(Node, false, - RTLIB::MUL_I8, - RTLIB::MUL_I16, RTLIB::MUL_I32, - RTLIB::MUL_I64, RTLIB::MUL_I128); - Results.push_back(Tmp1); break; } case ISD::SADDO: @@ -3867,9 +3722,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); - SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, - MachinePointerInfo::getJumpTable(), MemVT, - false, false, false, 0); + SDValue LD = DAG.getExtLoad( + ISD::SEXTLOAD, dl, PTy, Chain, Addr, + MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT, + false, false, false, 0); Addr = LD; if (TM.getRelocationModel() == Reloc::PIC_) { // For PIC, the sequence is: @@ -4091,17 +3947,277 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } + // Replace the original node with the legalized result. + if (Results.empty()) + return false; + + ReplaceNode(Node, Results.data()); + return true; +} + +void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { + SmallVector Results; + SDLoc dl(Node); + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + unsigned Opc = Node->getOpcode(); + switch (Opc) { + case ISD::ATOMIC_FENCE: { + // If the target didn't lower this, lower it to '__sync_synchronize()' call + // FIXME: handle "fence singlethread" more efficiently. + TargetLowering::ArgListTy Args; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Node->getOperand(0)) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__sync_synchronize", + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0); + + std::pair CallResult = TLI.LowerCallTo(CLI); + + Results.push_back(CallResult.second); + break; + } + // By default, atomic intrinsics are marked Legal and lowered. Targets + // which don't support them directly, however, may want libcalls, in which + // case they mark them Expand, and we get here. + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_CMP_SWAP: { + MVT VT = cast(Node)->getMemoryVT().getSimpleVT(); + RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); + + std::pair Tmp = ExpandChainLibCall(LC, Node, false); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + break; + } + case ISD::TRAP: { + // If this operation is not supported, lower it to 'abort()' call + TargetLowering::ArgListTy Args; + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Node->getOperand(0)) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("abort", + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0); + std::pair CallResult = TLI.LowerCallTo(CLI); + + Results.push_back(CallResult.second); + break; + } + case ISD::FMINNUM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128)); + break; + case ISD::FMAXNUM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128)); + break; + case ISD::FSQRT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128)); + break; + case ISD::FSIN: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128)); + break; + case ISD::FCOS: + Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128)); + break; + case ISD::FSINCOS: + // Expand into sincos libcall. + ExpandSinCosLibCall(Node, Results); + break; + case ISD::FLOG: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128)); + break; + case ISD::FLOG2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128)); + break; + case ISD::FLOG10: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128)); + break; + case ISD::FEXP: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128)); + break; + case ISD::FEXP2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128)); + break; + case ISD::FTRUNC: + Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128)); + break; + case ISD::FFLOOR: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128)); + break; + case ISD::FCEIL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128)); + break; + case ISD::FRINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128)); + break; + case ISD::FNEARBYINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128)); + break; + case ISD::FROUND: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128)); + break; + case ISD::FPOWI: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128)); + break; + case ISD::FPOW: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128)); + break; + case ISD::FDIV: + Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, + RTLIB::DIV_F80, RTLIB::DIV_F128, + RTLIB::DIV_PPCF128)); + break; + case ISD::FREM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128)); + break; + case ISD::FMA: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, + RTLIB::FMA_F80, RTLIB::FMA_F128, + RTLIB::FMA_PPCF128)); + break; + case ISD::FADD: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128)); + break; + case ISD::FMUL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, + RTLIB::MUL_F80, RTLIB::MUL_F128, + RTLIB::MUL_PPCF128)); + break; + case ISD::FP16_TO_FP: + if (Node->getValueType(0) == MVT::f32) { + Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); + } + break; + case ISD::FP_TO_FP16: { + RTLIB::Libcall LC = + RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16"); + Results.push_back(ExpandLibCall(LC, Node, false)); + break; + } + case ISD::FSUB: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, + RTLIB::SUB_F80, RTLIB::SUB_F128, + RTLIB::SUB_PPCF128)); + break; + case ISD::SREM: + Results.push_back(ExpandIntLibCall(Node, true, + RTLIB::SREM_I8, + RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128)); + break; + case ISD::UREM: + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::UREM_I8, + RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128)); + break; + case ISD::SDIV: + Results.push_back(ExpandIntLibCall(Node, true, + RTLIB::SDIV_I8, + RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128)); + break; + case ISD::UDIV: + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::UDIV_I8, + RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128)); + break; + case ISD::SDIVREM: + case ISD::UDIVREM: + // Expand into divrem libcall + ExpandDivRemLibCall(Node, Results); + break; + case ISD::MUL: + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::MUL_I8, + RTLIB::MUL_I16, RTLIB::MUL_I32, + RTLIB::MUL_I64, RTLIB::MUL_I128)); + break; + } + // Replace the original node with the legalized result. if (!Results.empty()) ReplaceNode(Node, Results.data()); } +// Determine the vector type to use in place of an original scalar element when +// promoting equally sized vectors. +static MVT getPromotedVectorElementType(const TargetLowering &TLI, + MVT EltVT, MVT NewEltVT) { + unsigned OldEltsPerNewElt = EltVT.getSizeInBits() / NewEltVT.getSizeInBits(); + MVT MidVT = MVT::getVectorVT(NewEltVT, OldEltsPerNewElt); + assert(TLI.isTypeLegal(MidVT) && "unexpected"); + return MidVT; +} + void SelectionDAGLegalize::PromoteNode(SDNode *Node) { SmallVector Results; MVT OVT = Node->getSimpleValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || Node->getOpcode() == ISD::SINT_TO_FP || - Node->getOpcode() == ISD::SETCC) { + Node->getOpcode() == ISD::SETCC || + Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { OVT = Node->getOperand(0).getSimpleValueType(); } if (Node->getOpcode() == ISD::BR_CC) @@ -4284,11 +4400,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FREM: case ISD::FMINNUM: case ISD::FMAXNUM: - case ISD::FCOPYSIGN: case ISD::FPOW: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); - Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, + Node->getFlags()); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0, dl))); break; @@ -4303,12 +4419,20 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { DAG.getIntPtrConstant(0, dl))); break; } + case ISD::FCOPYSIGN: case ISD::FPOWI: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = Node->getOperand(1); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + + // fcopysign doesn't change anything but the sign bit, so + // (fp_round (fcopysign (fpext a), b)) + // is as precise as + // (fp_round (fpext a)) + // which is a no-op. Mark it as a TRUNCating FP_ROUND. + const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, - Tmp3, DAG.getIntPtrConstant(0, dl))); + Tmp3, DAG.getIntPtrConstant(isTrunc, dl))); break; } case ISD::FFLOOR: @@ -4333,6 +4457,157 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp2, DAG.getIntPtrConstant(0, dl))); break; } + case ISD::BUILD_VECTOR: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to a different vector type with the same total bit size + // + // e.g. v2i64 = build_vector i64:x, i64:y => v4i32 + // => + // v4i32 = concat_vectors (v2i32 (bitcast i64:x)), (v2i32 (bitcast i64:y)) + + assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && + "Invalid promote type for build_vector"); + assert(NewEltVT.bitsLT(EltVT) && "not handled"); + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + + SmallVector NewOps; + for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) { + SDValue Op = Node->getOperand(I); + NewOps.push_back(DAG.getNode(ISD::BITCAST, SDLoc(Op), MidVT, Op)); + } + + SDLoc SL(Node); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewOps); + SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat); + Results.push_back(CvtVec); + break; + } + case ISD::EXTRACT_VECTOR_ELT: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to a different vector type with the same total bit size. + // + // e.g. v2i64 = extract_vector_elt x:v2i64, y:i32 + // => + // v4i32:castx = bitcast x:v2i64 + // + // i64 = bitcast + // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), + // (i32 (extract_vector_elt castx, (2 * y + 1))) + // + + assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && + "Invalid promote type for extract_vector_elt"); + assert(NewEltVT.bitsLT(EltVT) && "not handled"); + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + unsigned NewEltsPerOldElt = MidVT.getVectorNumElements(); + + SDValue Idx = Node->getOperand(1); + EVT IdxVT = Idx.getValueType(); + SDLoc SL(Node); + SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SL, IdxVT); + SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor); + + SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0)); + + SmallVector NewOps; + for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { + SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT); + SDValue TmpIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset); + + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT, + CastVec, TmpIdx); + NewOps.push_back(Elt); + } + + SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, SL, MidVT, NewOps); + + Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec)); + break; + } + case ISD::INSERT_VECTOR_ELT: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to a different vector type with the same total bit size + // + // e.g. v2i64 = insert_vector_elt x:v2i64, y:i64, z:i32 + // => + // v4i32:castx = bitcast x:v2i64 + // v2i32:casty = bitcast y:i64 + // + // v2i64 = bitcast + // (v4i32 insert_vector_elt + // (v4i32 insert_vector_elt v4i32:castx, + // (extract_vector_elt casty, 0), 2 * z), + // (extract_vector_elt casty, 1), (2 * z + 1)) + + assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && + "Invalid promote type for insert_vector_elt"); + assert(NewEltVT.bitsLT(EltVT) && "not handled"); + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + unsigned NewEltsPerOldElt = MidVT.getVectorNumElements(); + + SDValue Val = Node->getOperand(1); + SDValue Idx = Node->getOperand(2); + EVT IdxVT = Idx.getValueType(); + SDLoc SL(Node); + + SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SDLoc(), IdxVT); + SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor); + + SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0)); + SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val); + + SDValue NewVec = CastVec; + for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { + SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT); + SDValue InEltIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset); + + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT, + CastVal, IdxOffset); + + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, NVT, + NewVec, Elt, InEltIdx); + } + + Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewVec)); + break; + } + case ISD::SCALAR_TO_VECTOR: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to different vector type with the smae total bit size. + // + // e.g. v2i64 = scalar_to_vector x:i64 + // => + // concat_vectors (v2i32 bitcast x:i64), (v2i32 undef) + // + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + SDValue Val = Node->getOperand(0); + SDLoc SL(Node); + + SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val); + SDValue Undef = DAG.getUNDEF(MidVT); + + SmallVector NewElts; + NewElts.push_back(CastVal); + for (unsigned I = 1, NElts = OVT.getVectorNumElements(); I != NElts; ++I) + NewElts.push_back(Undef); + + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewElts); + SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat); + Results.push_back(CvtVec); + break; + } } // Replace the original node with the legalized result. @@ -4356,7 +4631,7 @@ void SelectionDAG::Legalize() { for (auto NI = allnodes_end(); NI != allnodes_begin();) { --NI; - SDNode *N = NI; + SDNode *N = &*NI; if (N->use_empty() && N != getRoot().getNode()) { ++NI; DeleteNode(N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 3c50a4155731..6c0193a76732 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -43,10 +43,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, } //===----------------------------------------------------------------------===// -// Result Float to Integer Conversion. +// Convert Float Results to Integer for Non-HW-supported Operations. //===----------------------------------------------------------------------===// -void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { +bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue R = SDValue(); @@ -59,20 +59,26 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to soften the result of this operator!"); - case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; - case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; - case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; - case ISD::ConstantFP: - R = SoftenFloatRes_ConstantFP(cast(N)); + case ISD::Register: + case ISD::CopyFromReg: + case ISD::CopyToReg: + assert(isLegalInHWReg(N->getValueType(ResNo)) && + "Unsupported SoftenFloatRes opcode!"); + // Only when isLegalInHWReg, we can skip check of the operands. + R = SDValue(N, ResNo); break; + case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; + case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break; + case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; + case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break; case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; - case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break; case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::FADD: R = SoftenFloatRes_FADD(N); break; case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; - case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break; case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; @@ -84,7 +90,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; - case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break; case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; @@ -97,9 +103,9 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; - case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; - case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; - case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; @@ -107,11 +113,19 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { } // If R is null, the sub-method took care of registering the result. - if (R.getNode()) + if (R.getNode()) { SetSoftenedFloat(SDValue(N, ResNo), R); + ReplaceSoftenFloatResult(N, ResNo, R); + } + // Return true only if the node is changed, + // assuming that the operands are also converted when necessary. + // Otherwise, return false to tell caller to scan operands. + return R.getNode() && R.getNode() != N; } -SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); return BitConvertToInteger(N->getOperand(0)); } @@ -130,10 +144,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { BitConvertToInteger(N->getOperand(1))); } -SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { - return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), SDLoc(N), +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, we can load better from the constant pool. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); + ConstantFPSDNode *CN = cast(N); + return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN), TLI.getTypeToTransformTo(*DAG.getContext(), - N->getValueType(0))); + CN->getValueType(0))); } SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -143,7 +161,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { NewOp, N->getOperand(1)); } -SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FABS can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned Size = NVT.getSizeInBits(); @@ -165,7 +186,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { RTLIB::FMIN_F80, RTLIB::FMIN_F128, RTLIB::FMIN_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { @@ -178,7 +199,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { RTLIB::FMAX_F80, RTLIB::FMAX_F128, RTLIB::FMAX_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { @@ -191,7 +212,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { @@ -203,10 +224,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { RTLIB::CEIL_F80, RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(0)); SDValue RHS = BitConvertToInteger(N->getOperand(1)); SDLoc dl(N); @@ -263,7 +287,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { @@ -276,7 +300,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { @@ -288,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { @@ -300,7 +324,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { @@ -312,7 +336,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { @@ -324,7 +348,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { @@ -336,7 +360,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { @@ -348,7 +372,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { @@ -362,7 +386,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - NVT, Ops, 3, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { @@ -375,7 +399,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { @@ -387,10 +411,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { RTLIB::NEARBYINT_F80, RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FNEG can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); // Expand Y = FNEG(X) -> Y = SUB -0.0, X @@ -402,7 +429,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, dl).first; + NVT, Ops, false, dl).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -418,11 +445,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SoftenFloatResult(Op.getNode(), 0); } + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) { + Op = GetPromotedFloat(Op); + // If the promotion did the FP_EXTEND to the destination type for us, + // there's nothing left to do here. + if (Op.getValueType() == N->getValueType(0)) { + return BitConvertToInteger(Op); + } + } + RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat) Op = GetSoftenedFloat(Op); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -430,7 +466,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32); SDValue Op = N->getOperand(0); - SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, &Op, 1, + SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op, false, SDLoc(N)).first; if (N->getValueType(0) == MVT::f32) return Res32; @@ -438,7 +474,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Res32, 1, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -452,7 +488,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { @@ -465,7 +501,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { @@ -479,7 +515,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { @@ -492,7 +528,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { RTLIB::REM_F80, RTLIB::REM_F128, RTLIB::REM_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { @@ -504,7 +540,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { @@ -516,7 +552,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { RTLIB::ROUND_F80, RTLIB::ROUND_F128, RTLIB::ROUND_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { @@ -528,7 +564,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { @@ -540,7 +576,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { @@ -553,7 +589,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, SDLoc(N)).first; + NVT, Ops, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { @@ -568,10 +604,11 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128), - NVT, &Op, 1, false, SDLoc(N)).first; + NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { + bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo)); LoadSDNode *L = cast(N); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -586,7 +623,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + if (N != NewL.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); return NewL; } @@ -600,17 +638,24 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); - return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL)); + auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL); + if (LegalInHWReg) + return ExtendNode; + return BitConvertToInteger(ExtendNode); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(1)); SDValue RHS = GetSoftenedFloat(N->getOperand(2)); return DAG.getSelect(SDLoc(N), LHS.getValueType(), N->getOperand(0), LHS, RHS); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(2)); SDValue RHS = GetSoftenedFloat(N->getOperand(3)); return DAG.getNode(ISD::SELECT_CC, SDLoc(N), @@ -636,7 +681,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { // Legalized the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); + if (N != NewVAARG.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); return NewVAARG; } @@ -665,12 +711,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { NVT, N->getOperand(0)); return TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - &Op, 1, Signed, dl).first; + Op, Signed, dl).first; } //===----------------------------------------------------------------------===// -// Operand Float to Integer Conversion.. +// Convert Float Operand to Integer for Non-HW-supported Operations. //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { @@ -680,6 +726,8 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: + if (CanSkipSoftenFloatOperand(N, OpNo)) + return false; #ifndef NDEBUG dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; @@ -691,18 +739,27 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; - case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break; - case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; - case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + case ISD::STORE: + Res = SoftenFloatOp_STORE(N, OpNo); + // Do not try to analyze or soften this node again if the value is + // or can be held in a register. In that case, Res.getNode() should + // be equal to N. + if (Res.getNode() == N && + isLegalInHWReg(N->getOperand(OpNo).getValueType())) + return false; + // Otherwise, we need to reanalyze and lower the new Res nodes. + break; } // If the result is null, the sub-method took care of registering results etc. if (!Res.getNode()) return false; // If the result is N, the sub-method updated N in place. Tell the legalizer - // core about this. + // core about this to re-analyze. if (Res.getNode() == N) return true; @@ -713,6 +770,41 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { return false; } +bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { + if (!isLegalInHWReg(N->getOperand(OpNo).getValueType())) + return false; + // When the operand type can be kept in registers, SoftenFloatResult + // will call ReplaceValueWith to replace all references and we can + // skip softening this operand. + switch (N->getOperand(OpNo).getOpcode()) { + case ISD::BITCAST: + case ISD::ConstantFP: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::FABS: + case ISD::FCOPYSIGN: + case ISD::FNEG: + case ISD::Register: + case ISD::SELECT: + case ISD::SELECT_CC: + return true; + } + // For some opcodes, SoftenFloatResult handles all conversion of softening + // and replacing operands, so that there is no need to soften operands + // again, although such opcode could be scanned for other illegal operands. + switch (N->getOpcode()) { + case ISD::ConstantFP: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::FABS: + case ISD::FCOPYSIGN: + case ISD::FNEG: + case ISD::Register: + return true; + } + return false; +} + SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), GetSoftenedFloat(N->getOperand(0))); @@ -730,7 +822,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first; } @@ -747,7 +839,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { @@ -773,20 +865,33 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { 0); } -SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { + bool Signed = N->getOpcode() == ISD::FP_TO_SINT; + EVT SVT = N->getOperand(0).getValueType(); EVT RVT = N->getValueType(0); - RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; -} + EVT NVT = EVT(); + SDLoc dl(N); + + // If the result is not legal, eg: fp -> i1, then it needs to be promoted to + // a larger type, eg: fp -> i32. Even if it is legal, no libcall may exactly + // match, eg. we don't have fp -> i8 conversions. + // Look for an appropriate libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE; + IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; + ++IntVT) { + NVT = (MVT::SimpleValueType)IntVT; + // The type needs to big enough to hold the result. + if (NVT.bitsGE(RVT)) + LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT); + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); -SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { - EVT RVT = N->getValueType(0); - RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; + SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first; + + // Truncate the result if the libcall returns a larger type. + return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); } SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { @@ -1028,7 +1133,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - N->getValueType(0), Ops, 2, false, + N->getValueType(0), Ops, false, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1102,7 +1207,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - N->getValueType(0), Ops, 3, false, + N->getValueType(0), Ops, false, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1116,7 +1221,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - N->getValueType(0), Ops, 2, false, + N->getValueType(0), Ops, false, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1231,7 +1336,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - N->getValueType(0), Ops, 2, false, + N->getValueType(0), Ops, false, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1310,7 +1415,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); - Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl).first; + Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first; GetPairElements(Hi, Lo, Hi); } @@ -1341,6 +1446,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, break; } + // TODO: Are there fast-math-flags to propagate to this FADD? Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble, APInt(128, Parts)), @@ -1494,7 +1600,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl).first; + return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { @@ -1511,6 +1617,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. + // TODO: Are there fast-math-flags to propagate to this FSUB? return DAG.getSelectCC(dl, N->getOperand(0), Tmp, DAG.getNode(ISD::ADD, dl, MVT::i32, DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, @@ -1527,7 +1634,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0), false, dl).first; } @@ -1912,8 +2019,7 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Op0 = GetPromotedFloat(N->getOperand(0)); SDValue Op1 = GetPromotedFloat(N->getOperand(1)); - - return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags()); } SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 9f060a09a0f3..cd114d668e20 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -53,6 +53,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break; + case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break; case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break; case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break; case ISD::Constant: Res = PromoteIntRes_Constant(N); break; @@ -65,16 +66,20 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; - case ISD::LOAD: Res = PromoteIntRes_LOAD(cast(N));break; - case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast(N));break; + case ISD::LOAD: Res = PromoteIntRes_LOAD(cast(N)); break; + case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast(N)); + break; + case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast(N)); + break; case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; case ISD::SMIN: - case ISD::SMAX: + case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break; case ISD::UMIN: - case ISD::UMAX: Res = PromoteIntRes_SimpleIntBinOp(N); break; + case ISD::UMAX: Res = PromoteIntRes_ZExtIntBinOp(N); break; + case ISD::SHL: Res = PromoteIntRes_SHL(N); break; case ISD::SIGN_EXTEND_INREG: Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; @@ -114,10 +119,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; case ISD::SDIV: - case ISD::SREM: Res = PromoteIntRes_SDIV(N); break; + case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break; case ISD::UDIV: - case ISD::UREM: Res = PromoteIntRes_UDIV(N); break; + case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break; case ISD::SADDO: case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break; @@ -180,7 +185,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { N->getChain(), N->getBasePtr(), N->getMemOperand(), N->getOrdering(), N->getSynchScope()); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; @@ -193,7 +198,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(), N->getOrdering(), N->getSynchScope()); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; @@ -316,6 +321,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); } +SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + SDLoc dl(N); + + unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); + return DAG.getNode( + ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), + DAG.getConstant(DiffBits, dl, + TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); +} + SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { // The pair element type may be legal, or may not promote to the same type as // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. @@ -465,7 +483,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), N->getMemoryVT(), N->getMemOperand()); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; @@ -475,20 +493,34 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0()); - SDValue Mask = N->getMask(); - EVT NewMaskVT = getSetCCResultType(NVT); - if (NewMaskVT != N->getMask().getValueType()) - Mask = PromoteTargetBoolean(Mask, NewMaskVT); SDLoc dl(N); - SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), - Mask, ExtSrc0, N->getMemoryVT(), + N->getMask(), ExtSrc0, N->getMemoryVT(), N->getMemOperand(), ISD::SEXTLOAD); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; } + +SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue ExtSrc0 = GetPromotedInteger(N->getValue()); + assert(NVT == ExtSrc0.getValueType() && + "Gather result type and the passThru agrument type should be the same"); + + SDLoc dl(N); + SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(), + N->getIndex()}; + SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), + N->getMemoryVT(), dl, Ops, + N->getMemOperand()); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + /// Promote the overflow flag of an overflowing arithmetic node. SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { // Simply change the return type of the boolean result. @@ -534,14 +566,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { return Res; } -SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) { - // Sign extend the input. - SDValue LHS = SExtPromotedInteger(N->getOperand(0)); - SDValue RHS = SExtPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); -} - SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); @@ -629,6 +653,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { LHS.getValueType(), LHS, RHS); } +SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) { + // Sign extend the input. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) { + // Zero extend the input. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS); +} + SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); @@ -770,14 +810,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { return Mul; } -SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { - // Zero extend the input. - SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); - SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); -} - SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); @@ -875,6 +907,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { OpNo); break; case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast(N), OpNo); break; + case ISD::MGATHER: Res = PromoteIntOp_MGATHER(cast(N), + OpNo); break; + case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast(N), + OpNo); break; case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; case ISD::FP16_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; @@ -1143,56 +1179,49 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getMemoryVT(), N->getMemOperand()); } -SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){ +SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, + unsigned OpNo) { SDValue DataOp = N->getValue(); EVT DataVT = DataOp.getValueType(); SDValue Mask = N->getMask(); - EVT MaskVT = Mask.getValueType(); SDLoc dl(N); bool TruncateStore = false; - if (!TLI.isTypeLegal(DataVT)) { - if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) { - DataOp = GetPromotedInteger(DataOp); - if (!TLI.isTypeLegal(MaskVT)) - Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); - TruncateStore = true; - } + if (OpNo == 2) { + // Mask comes before the data operand. If the data operand is legal, we just + // promote the mask. + // When the data operand has illegal type, we should legalize the data + // operand first. The mask will be promoted/splitted/widened according to + // the data operand type. + if (TLI.isTypeLegal(DataVT)) + Mask = PromoteTargetBoolean(Mask, DataVT); else { - assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector && - "Unexpected data legalization in MSTORE"); - DataOp = GetWidenedVector(DataOp); + if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) + return PromoteIntOp_MSTORE(N, 3); + + else if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector) + return WidenVecOp_MSTORE(N, 3); - if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) - Mask = GetWidenedVector(Mask); else { - EVT BoolVT = getSetCCResultType(DataOp.getValueType()); - - // We can't use ModifyToType() because we should fill the mask with - // zeroes - unsigned WidenNumElts = BoolVT.getVectorNumElements(); - unsigned MaskNumElts = MaskVT.getVectorNumElements(); - - unsigned NumConcat = WidenNumElts / MaskNumElts; - SmallVector Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); - Ops[0] = Mask; - for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = ZeroVal; - - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); + assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector); + return SplitVecOp_MSTORE(N, 3); } } + } else { // Data operand + assert(OpNo == 3 && "Unexpected operand for promotion"); + DataOp = GetPromotedInteger(DataOp); + Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); + TruncateStore = true; } - else - Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType()); + return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask, N->getMemoryVT(), N->getMemOperand(), TruncateStore); } -SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){ +SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, + unsigned OpNo) { assert(OpNo == 2 && "Only know how to promote the mask!"); EVT DataVT = N->getValueType(0); SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); @@ -1201,6 +1230,31 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo) return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, + unsigned OpNo) { + + SmallVector NewOps(N->op_begin(), N->op_end()); + if (OpNo == 2) { + // The Mask + EVT DataVT = N->getValueType(0); + NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else + NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, + unsigned OpNo) { + SmallVector NewOps(N->op_begin(), N->op_end()); + if (OpNo == 2) { + // The Mask + EVT DataVT = N->getValue().getValueType(); + NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else + NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); @@ -1259,6 +1313,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break; case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break; case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; + case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break; case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; case ISD::CTLZ_ZERO_UNDEF: @@ -1270,6 +1325,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; case ISD::LOAD: ExpandIntRes_LOAD(cast(N), Lo, Hi); break; case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; + case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break; case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break; case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break; @@ -1763,12 +1819,6 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); } -void DAGTypeLegalizer::ExpandIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo, - SDValue &Lo, SDValue &Hi) { - SDValue Res = DisintegrateMERGE_VALUES(N, ResNo); - SplitInteger(Res, Lo, Hi); -} - void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -1834,6 +1884,14 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, } } +void DAGTypeLegalizer::ExpandIntRes_BITREVERSE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands. + Lo = DAG.getNode(ISD::BITREVERSE, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::BITREVERSE, dl, Hi.getValueType(), Hi); +} + void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); @@ -1918,8 +1976,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, - dl).first, + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first, Lo, Hi); } @@ -1934,8 +1991,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, - dl).first, + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first, Lo, Hi); } @@ -2055,7 +2111,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } } - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Ch); } @@ -2096,11 +2152,21 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, - dl).first, + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first, Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc DL(N); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other); + SDValue R = DAG.getNode(N->getOpcode(), DL, VTs, N->getOperand(0)); + Lo = R.getValue(0); + Hi = R.getValue(1); + ReplaceValueWith(SDValue(N, 1), R.getValue(2)); +} + void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, SDValue &Lo, SDValue &Hi) { SDValue LHS = Node->getOperand(0); @@ -2166,7 +2232,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, LC = RTLIB::SDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, @@ -2261,8 +2327,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl).first, Lo, - Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi); return; } @@ -2352,7 +2417,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, LC = RTLIB::SREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, @@ -2499,7 +2564,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, LC = RTLIB::UDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, @@ -2525,7 +2590,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, LC = RTLIB::UREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, @@ -2605,6 +2670,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; + case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break; case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; case ISD::STORE: Res = ExpandIntOp_STORE(cast(N), OpNo); break; case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; @@ -2732,6 +2798,47 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, return; } + if (LHSHi == RHSHi) { + // Comparing the low bits is enough. + NewLHS = Tmp1; + NewRHS = SDValue(); + return; + } + + // Lower with SETCCE if the target supports it. + // FIXME: Make all targets support this, then remove the other lowering. + if (TLI.getOperationAction( + ISD::SETCCE, + TLI.getTypeToExpandTo(*DAG.getContext(), LHSLo.getValueType())) == + TargetLowering::Custom) { + // SETCCE can detect < and >= directly. For > and <=, flip operands and + // condition code. + bool FlipOperands = false; + switch (CCCode) { + case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break; + case ISD::SETUGT: CCCode = ISD::SETULT; FlipOperands = true; break; + case ISD::SETLE: CCCode = ISD::SETGE; FlipOperands = true; break; + case ISD::SETULE: CCCode = ISD::SETUGE; FlipOperands = true; break; + default: break; + } + if (FlipOperands) { + std::swap(LHSLo, RHSLo); + std::swap(LHSHi, RHSHi); + } + // Perform a wide subtraction, feeding the carry from the low part into + // SETCCE. The SETCCE operation is essentially looking at the high part of + // the result of LHS - RHS. It is negative iff LHS < RHS. It is zero or + // positive iff LHS >= RHS. + SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue); + SDValue LowCmp = DAG.getNode(ISD::SUBC, dl, VTList, LHSLo, RHSLo); + SDValue Res = + DAG.getNode(ISD::SETCCE, dl, getSetCCResultType(LHSLo.getValueType()), + LHSHi, RHSHi, LowCmp.getValue(1), DAG.getCondCode(CCCode)); + NewLHS = Res; + NewRHS = SDValue(); + return; + } + NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETEQ, false, DagCombineInfo, dl); @@ -2796,6 +2903,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { DAG.getCondCode(CCCode)), 0); } +SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + SDLoc dl = SDLoc(N); + + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedInteger(LHS, LHSLo, LHSHi); + GetExpandedInteger(RHS, RHSLo, RHSHi); + + // Expand to a SUBE for the low part and a smaller SETCCE for the high. + SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue); + SDValue LowCmp = DAG.getNode(ISD::SUBE, dl, VTList, LHSLo, RHSLo, Carry); + return DAG.getNode(ISD::SETCCE, dl, N->getValueType(0), LHSHi, RHSHi, + LowCmp.getValue(1), Cond); +} + SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { // The value being shifted is legal, but the shift amount is too big. // It follows that either the result of the shift is undefined, or the @@ -2820,7 +2945,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2980,11 +3105,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { // Load the value out, extending it from f32 to the destination float type. // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), - FudgePtr, - MachinePointerInfo::getConstantPool(), - MVT::f32, - false, false, false, Alignment); + SDValue Fudge = DAG.getExtLoad( + ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, + false, false, false, Alignment); return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); } @@ -2992,7 +3116,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl).first; + return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first; } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 54cfaf570619..2a0b0aa44794 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -73,21 +73,20 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { // (for example because it was created but not used). In general, we cannot // distinguish between new nodes and deleted nodes. SmallVector NewNodes; - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = DAG.allnodes_end(); I != E; ++I) { + for (SDNode &Node : DAG.allnodes()) { // Remember nodes marked NewNode - they are subject to extra checking below. - if (I->getNodeId() == NewNode) - NewNodes.push_back(I); + if (Node.getNodeId() == NewNode) + NewNodes.push_back(&Node); - for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) { - SDValue Res(I, i); + for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) { + SDValue Res(&Node, i); bool Failed = false; unsigned Mapped = 0; if (ReplacedValues.find(Res) != ReplacedValues.end()) { Mapped |= 1; // Check that remapped values are only used by nodes marked NewNode. - for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end(); + for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end(); UI != UE; ++UI) if (UI.getUse().getResNo() == i) assert(UI->getNodeId() == NewNode && @@ -119,16 +118,16 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { if (WidenedVectors.find(Res) != WidenedVectors.end()) Mapped |= 128; - if (I->getNodeId() != Processed) { + if (Node.getNodeId() != Processed) { // Since we allow ReplacedValues to map deleted nodes, it may map nodes // marked NewNode too, since a deleted node may have been reallocated as // another node that has not been seen by the LegalizeTypes machinery. - if ((I->getNodeId() == NewNode && Mapped > 1) || - (I->getNodeId() != NewNode && Mapped != 0)) { + if ((Node.getNodeId() == NewNode && Mapped > 1) || + (Node.getNodeId() != NewNode && Mapped != 0)) { dbgs() << "Unprocessed value in a map!"; Failed = true; } - } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) { + } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) { if (Mapped > 1) { dbgs() << "Value with legal type was transformed!"; Failed = true; @@ -194,13 +193,12 @@ bool DAGTypeLegalizer::run() { // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess' // (and remembering them) if they are leaves and assigning 'Unanalyzed' if // non-leaves. - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = DAG.allnodes_end(); I != E; ++I) { - if (I->getNumOperands() == 0) { - I->setNodeId(ReadyToProcess); - Worklist.push_back(I); + for (SDNode &Node : DAG.allnodes()) { + if (Node.getNumOperands() == 0) { + Node.setNodeId(ReadyToProcess); + Worklist.push_back(&Node); } else { - I->setNodeId(Unanalyzed); + Node.setNodeId(Unanalyzed); } } @@ -240,9 +238,13 @@ bool DAGTypeLegalizer::run() { Changed = true; goto NodeDone; case TargetLowering::TypeSoftenFloat: - SoftenFloatResult(N, i); - Changed = true; - goto NodeDone; + Changed = SoftenFloatResult(N, i); + if (Changed) + goto NodeDone; + // If not changed, the result type should be legally in register. + assert(isLegalInHWReg(ResultVT) && + "Unchanged SoftenFloatResult should be legal in register!"); + goto ScanOperands; case TargetLowering::TypeExpandFloat: ExpandFloatResult(N, i); Changed = true; @@ -409,40 +411,48 @@ NodeDone: // In a debug build, scan all the nodes to make sure we found them all. This // ensures that there are no cycles and that everything got processed. #ifndef NDEBUG - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = DAG.allnodes_end(); I != E; ++I) { + for (SDNode &Node : DAG.allnodes()) { bool Failed = false; // Check that all result types are legal. - if (!IgnoreNodeResults(I)) - for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i) - if (!isTypeLegal(I->getValueType(i))) { - dbgs() << "Result type " << i << " illegal!\n"; + // A value type is illegal if its TypeAction is not TypeLegal, + // and TLI.RegClassForVT does not have a register class for this type. + // For example, the x86_64 target has f128 that is not TypeLegal, + // to have softened operators, but it also has FR128 register class to + // pass and return f128 values. Hence a legalized node can have f128 type. + if (!IgnoreNodeResults(&Node)) + for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i) + if (!isTypeLegal(Node.getValueType(i)) && + !TLI.isTypeLegal(Node.getValueType(i))) { + dbgs() << "Result type " << i << " illegal: "; + Node.dump(); Failed = true; } // Check that all operand types are legal. - for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i) - if (!IgnoreNodeResults(I->getOperand(i).getNode()) && - !isTypeLegal(I->getOperand(i).getValueType())) { - dbgs() << "Operand type " << i << " illegal!\n"; + for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i) + if (!IgnoreNodeResults(Node.getOperand(i).getNode()) && + !isTypeLegal(Node.getOperand(i).getValueType()) && + !TLI.isTypeLegal(Node.getOperand(i).getValueType())) { + dbgs() << "Operand type " << i << " illegal: "; + Node.getOperand(i).dump(); Failed = true; } - if (I->getNodeId() != Processed) { - if (I->getNodeId() == NewNode) + if (Node.getNodeId() != Processed) { + if (Node.getNodeId() == NewNode) dbgs() << "New node not analyzed?\n"; - else if (I->getNodeId() == Unanalyzed) + else if (Node.getNodeId() == Unanalyzed) dbgs() << "Unanalyzed node not noticed?\n"; - else if (I->getNodeId() > 0) + else if (Node.getNodeId() > 0) dbgs() << "Operand not processed?\n"; - else if (I->getNodeId() == ReadyToProcess) + else if (Node.getNodeId() == ReadyToProcess) dbgs() << "Not added to worklist?\n"; Failed = true; } if (Failed) { - I->dump(&DAG); dbgs() << "\n"; + Node.dump(&DAG); dbgs() << "\n"; llvm_unreachable(nullptr); } } @@ -751,13 +761,23 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { - assert(Result.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + // f128 of x86_64 could be kept in SSE registers, + // but sometimes softened to i128. + assert((Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) || + Op.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && "Invalid type for softened float"); AnalyzeNewValue(Result); SDValue &OpEntry = SoftenedFloats[Op]; - assert(!OpEntry.getNode() && "Node is already converted to integer!"); + // Allow repeated calls to save f128 type nodes + // or any node with type that transforms to itself. + // Many operations on these types are not softened. + assert((!OpEntry.getNode()|| + Op.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && + "Node is already converted to integer!"); OpEntry = Result; } @@ -1042,23 +1062,22 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, unsigned NumOps = N->getNumOperands(); SDLoc dl(N); if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), nullptr, 0, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned, dl).first; } else if (NumOps == 1) { SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned, dl).first; } else if (NumOps == 2) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first; } SmallVector Ops(NumOps); for (unsigned i = 0; i < NumOps; ++i) Ops[i] = N->getOperand(i); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), - &Ops[0], NumOps, isSigned, dl).first; + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first; } // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to @@ -1108,6 +1127,23 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { return DAG.getNode(ExtendCode, dl, BoolVT, Bool); } +/// WidenTargetBoolean - Widen the given target boolean to a target boolean +/// of the given type. The boolean vector is widened and then promoted to match +/// the target boolean type of the given ValVT. +SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT, + bool WithZeroes) { + SDLoc dl(Bool); + EVT BoolVT = Bool.getValueType(); + + assert(ValVT.getVectorNumElements() > BoolVT.getVectorNumElements() && + TLI.isTypeLegal(ValVT) && + "Unexpected types in WidenTargetBoolean"); + EVT WideVT = EVT::getVectorVT(*DAG.getContext(), BoolVT.getScalarType(), + ValVT.getVectorNumElements()); + Bool = ModifyToType(Bool, WideVT, WithZeroes); + return PromoteTargetBoolean(Bool, ValVT); +} + /// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT /// bits in Hi. void DAGTypeLegalizer::SplitInteger(SDValue Op, diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d1131a74cf17..8ba19f76797f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -72,6 +72,20 @@ private: return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal; } + /// isSimpleLegalType - Return true if this is a simple legal type. + bool isSimpleLegalType(EVT VT) const { + return VT.isSimple() && TLI.isTypeLegal(VT); + } + + /// isLegalInHWReg - Return true if this type can be passed in registers. + /// For example, x86_64's f128, should to be legally in registers + /// and only some operations converted to library calls or integer + /// bitwise operations. + bool isLegalInHWReg(EVT VT) const { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return VT == NVT && isSimpleLegalType(VT); + } + EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } @@ -173,6 +187,11 @@ private: std::pair ExpandAtomic(SDNode *Node); SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT); + + /// Modify Bit Vector to match SetCC result type of ValVT. + /// The bit vector is widened with zeroes when WithZeroes is true. + SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false); + void ReplaceValueWith(SDValue From, SDValue To); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, @@ -234,6 +253,7 @@ private: SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntRes_BITCAST(SDNode *N); SDValue PromoteIntRes_BSWAP(SDNode *N); + SDValue PromoteIntRes_BITREVERSE(SDNode *N); SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); SDValue PromoteIntRes_Constant(SDNode *N); SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N); @@ -246,21 +266,22 @@ private: SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N); + SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N); SDValue PromoteIntRes_Overflow(SDNode *N); SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); - SDValue PromoteIntRes_SDIV(SDNode *N); SDValue PromoteIntRes_SELECT(SDNode *N); SDValue PromoteIntRes_VSELECT(SDNode *N); SDValue PromoteIntRes_SELECT_CC(SDNode *N); SDValue PromoteIntRes_SETCC(SDNode *N); SDValue PromoteIntRes_SHL(SDNode *N); SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); + SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N); + SDValue PromoteIntRes_SExtIntBinOp(SDNode *N); SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); SDValue PromoteIntRes_SRA(SDNode *N); SDValue PromoteIntRes_SRL(SDNode *N); SDValue PromoteIntRes_TRUNCATE(SDNode *N); SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); - SDValue PromoteIntRes_UDIV(SDNode *N); SDValue PromoteIntRes_UNDEF(SDNode *N); SDValue PromoteIntRes_VAARG(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); @@ -276,7 +297,6 @@ private: SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N); SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N); SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); - SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N); SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); @@ -284,7 +304,6 @@ private: SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); - SDValue PromoteIntOp_VSETCC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_Shift(SDNode *N); SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N); SDValue PromoteIntOp_SINT_TO_FP(SDNode *N); @@ -294,6 +313,8 @@ private: SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -312,8 +333,6 @@ private: // Integer Result Expansion. void ExpandIntegerResult(SDNode *N, unsigned ResNo); - void ExpandIntRes_MERGE_VALUES (SDNode *N, unsigned ResNo, - SDValue &Lo, SDValue &Hi); void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -322,6 +341,7 @@ private: void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -333,6 +353,7 @@ private: void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -354,12 +375,10 @@ private: // Integer Operand Expansion. bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo); - SDValue ExpandIntOp_BITCAST(SDNode *N); SDValue ExpandIntOp_BR_CC(SDNode *N); - SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N); - SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N); SDValue ExpandIntOp_SELECT_CC(SDNode *N); SDValue ExpandIntOp_SETCC(SDNode *N); + SDValue ExpandIntOp_SETCCE(SDNode *N); SDValue ExpandIntOp_Shift(SDNode *N); SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); @@ -375,32 +394,48 @@ private: // Float to Integer Conversion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// - /// GetSoftenedFloat - Given a processed operand Op which was converted to an - /// integer of the same size, this returns the integer. The integer contains - /// exactly the same bits as Op - only the type changed. For example, if Op - /// is an f32 which was softened to an i32, then this method returns an i32, - /// the bits of which coincide with those of Op. + /// GetSoftenedFloat - Given an operand Op of Float type, returns the integer + /// if the Op is not supported in target HW and converted to the integer. + /// The integer contains exactly the same bits as Op - only the type changed. + /// For example, if Op is an f32 which was softened to an i32, then this method + /// returns an i32, the bits of which coincide with those of Op. + /// If the Op can be efficiently supported in target HW or the operand must + /// stay in a register, the Op is not converted to an integer. + /// In that case, the given op is returned. SDValue GetSoftenedFloat(SDValue Op) { SDValue &SoftenedOp = SoftenedFloats[Op]; + if (!SoftenedOp.getNode() && + isSimpleLegalType(Op.getValueType())) + return Op; RemapValue(SoftenedOp); assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?"); return SoftenedOp; } void SetSoftenedFloat(SDValue Op, SDValue Result); - // Result Float to Integer Conversion. - void SoftenFloatResult(SDNode *N, unsigned OpNo); + // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary. + void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) { + // When the result type can be kept in HW registers, the converted + // NewRes node could have the same type. We can save the effort in + // cloning every user of N in SoftenFloatOperand or other legalization functions, + // by calling ReplaceValueWith here to update all users. + if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo))) + ReplaceValueWith(SDValue(N, ResNo), NewRes); + } + + // Convert Float Results to Integer for Non-HW-supported Operations. + bool SoftenFloatResult(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_BITCAST(SDNode *N); + SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); - SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); + SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); - SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); - SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FCOS(SDNode *N); SDValue SoftenFloatRes_FDIV(SDNode *N); SDValue SoftenFloatRes_FEXP(SDNode *N); @@ -412,7 +447,7 @@ private: SDValue SoftenFloatRes_FMA(SDNode *N); SDValue SoftenFloatRes_FMUL(SDNode *N); SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); - SDValue SoftenFloatRes_FNEG(SDNode *N); + SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N); SDValue SoftenFloatRes_FP_ROUND(SDNode *N); @@ -425,21 +460,25 @@ private: SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); - SDValue SoftenFloatRes_LOAD(SDNode *N); - SDValue SoftenFloatRes_SELECT(SDNode *N); - SDValue SoftenFloatRes_SELECT_CC(SDNode *N); + SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_UNDEF(SDNode *N); SDValue SoftenFloatRes_VAARG(SDNode *N); SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); - // Operand Float to Integer Conversion. + // Return true if we can skip softening the given operand or SDNode because + // it was soften before by SoftenFloatResult and references to the operand + // were replaced by ReplaceValueWith. + bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); + + // Convert Float Operand to Integer for Non-HW-supported Operations. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); - SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N); - SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N); + SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); @@ -575,7 +614,6 @@ private: SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); - SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); @@ -617,20 +655,18 @@ private: void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi); @@ -650,6 +686,7 @@ private: SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); + SDValue SplitVecOp_FCOPYSIGN(SDNode *N); //===--------------------------------------------------------------------===// // Vector Widening Support: LegalizeVectorTypes.cpp @@ -680,8 +717,8 @@ private: SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); + SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N); SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); - SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N); SDValue WidenVecRes_SELECT(SDNode* N); SDValue WidenVecRes_SELECT_CC(SDNode* N); SDValue WidenVecRes_SETCC(SDNode* N); @@ -693,6 +730,7 @@ private: SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_FCOPYSIGN(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); @@ -707,9 +745,11 @@ private: SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); + SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_Convert(SDNode *N); + SDValue WidenVecOp_FCOPYSIGN(SDNode *N); //===--------------------------------------------------------------------===// // Vector Widening Utilities Support: LegalizeVectorTypes.cpp @@ -745,8 +785,10 @@ private: /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. - SDValue ModifyToType(SDValue InOp, EVT WidenVT); - + /// When FillWithZeroes is "on" the vector will be widened with + /// zeroes. + /// By default, the vector will be widened with undefined values. + SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false); //===--------------------------------------------------------------------===// // Generic Splitting: LegalizeTypesGeneric.cpp diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 14d8f7762086..593c346df770 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -53,12 +53,17 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypePromoteFloat: llvm_unreachable("Bitcast of a promotion-needing float should never need" "expansion"); - case TargetLowering::TypeSoftenFloat: - // Convert the integer operand instead. - SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); + case TargetLowering::TypeSoftenFloat: { + // Expand the floating point operand only if it was converted to integers. + // Otherwise, it is a legal type like f128 that can be saved in a register. + auto SoftenedOp = GetSoftenedFloat(InOp); + if (SoftenedOp == InOp) + break; + SplitInteger(SoftenedOp, Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; + } case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: { auto &DL = DAG.getDataLayout(); @@ -161,7 +166,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { NOutVT.getTypeForEVT(*DAG.getContext())); SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); int SPFI = cast(StackPtr.getNode())->getIndex(); - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); // Emit a store to the stack slot. SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo, diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 83d4ad5ea1f4..f61f631e2ff8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -105,6 +105,8 @@ class VectorLegalizer { SDValue ExpandLoad(SDValue Op); SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); + SDValue ExpandBITREVERSE(SDValue Op); + SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op); /// \brief Implements vector promotion. /// @@ -159,7 +161,7 @@ bool VectorLegalizer::Run() { DAG.AssignTopologicalOrder(); for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) - LegalizeOp(SDValue(I, 0)); + LegalizeOp(SDValue(&*I, 0)); // Finally, it's possible the root changed. Get the new root. SDValue OldRoot = DAG.getRoot(); @@ -218,9 +220,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { assert(Result.getValue(1).use_empty() && "There are still live users of the old chain!"); return LegalizeOp(Lowered); - } else { - return TranslateLegalizeResults(Op, Lowered); } + return TranslateLegalizeResults(Op, Lowered); } case TargetLowering::Expand: Changed = true; @@ -231,7 +232,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { EVT StVT = ST->getMemoryVT(); MVT ValVT = ST->getValue().getSimpleValueType(); if (StVT.isVector() && ST->isTruncatingStore()) - switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) { + switch (TLI.getTruncStoreAction(ValVT, StVT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: return TranslateLegalizeResults(Op, Result); @@ -244,7 +245,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Changed = true; return LegalizeOp(ExpandStore(Op)); } - } else if (Op.getOpcode() == ISD::MSCATTER) + } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE) HasVectorValue = true; for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); @@ -265,6 +266,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::UDIV: case ISD::SREM: case ISD::UREM: + case ISD::SDIVREM: + case ISD::UDIVREM: case ISD::FADD: case ISD::FSUB: case ISD::FMUL: @@ -279,6 +282,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::ROTL: case ISD::ROTR: case ISD::BSWAP: + case ISD::BITREVERSE: case ISD::CTLZ: case ISD::CTTZ: case ISD::CTLZ_ZERO_UNDEF: @@ -298,6 +302,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FABS: case ISD::FMINNUM: case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: case ISD::FCOPYSIGN: case ISD::FSQRT: case ISD::FSIN: @@ -338,9 +344,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::MSCATTER: QueryType = cast(Node)->getValue().getValueType(); break; + case ISD::MSTORE: + QueryType = cast(Node)->getValue().getValueType(); + break; } switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Promote: Result = Promote(Op); Changed = true; @@ -411,7 +421,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) { Operands[j] = Op.getOperand(j); } - Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands); + Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags()); if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) @@ -708,6 +718,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) { return ExpandFNEG(Op); case ISD::SETCC: return UnrollVSETCC(Op); + case ISD::BITREVERSE: + return ExpandBITREVERSE(Op); + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: + return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op); default: return DAG.UnrollVectorOp(Op.getNode()); } @@ -893,6 +908,25 @@ SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { return DAG.getNode(ISD::BITCAST, DL, VT, Op); } +SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { + EVT VT = Op.getValueType(); + + // If we have the scalar operation, it's probably cheaper to unroll it. + if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) + return DAG.UnrollVectorOp(Op.getNode()); + + // If we have the appropriate vector bit operations, it is better to use them + // than unrolling and expanding each component. + if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) || + !TLI.isOperationLegalOrCustom(ISD::SRL, VT) || + !TLI.isOperationLegalOrCustom(ISD::AND, VT) || + !TLI.isOperationLegalOrCustom(ISD::OR, VT)) + return DAG.UnrollVectorOp(Op.getNode()); + + // Let LegalizeDAG handle this later. + return Op; +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. @@ -971,6 +1005,7 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { // Convert hi and lo to floats // Convert the hi part back to the upper values + // TODO: Can any fast-math-flags be set on these nodes? SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI); fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW); SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); @@ -984,12 +1019,23 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { SDLoc DL(Op); SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType()); + // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB. return DAG.getNode(ISD::FSUB, DL, Op.getValueType(), Zero, Op.getOperand(0)); } return DAG.UnrollVectorOp(Op.getNode()); } +SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) { + // If the non-ZERO_UNDEF version is supported we can let LegalizeDAG handle. + unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ; + if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) + return Op; + + // Otherwise go ahead and unroll. + return DAG.UnrollVectorOp(Op.getNode()); +} + SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { EVT VT = Op.getValueType(); unsigned NumElems = VT.getVectorNumElements(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 51cd6619f783..d0187d36dee2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -67,6 +67,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; case ISD::ANY_EXTEND: + case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: @@ -108,6 +109,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FMUL: case ISD::FMINNUM: case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: case ISD::FPOW: case ISD::FREM: @@ -139,7 +146,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); + LHS.getValueType(), LHS, RHS, N->getFlags()); } SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { @@ -228,7 +235,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { N->isInvariant(), N->getOriginalAlignment(), N->getAAInfo()); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); return Result; @@ -594,6 +601,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; + case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; @@ -613,6 +621,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast(N), Lo, Hi); break; + case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CONVERT_RNDSAT: case ISD::CTLZ: @@ -656,11 +665,12 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SUB: case ISD::MUL: case ISD::FADD: - case ISD::FCOPYSIGN: case ISD::FSUB: case ISD::FMUL: case ISD::FMINNUM: case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: case ISD::SDIV: case ISD::UDIV: case ISD::FDIV: @@ -698,8 +708,10 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, GetSplitVector(N->getOperand(1), RHSLo, RHSHi); SDLoc dl(N); - Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo); - Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); + const SDNodeFlags *Flags = N->getFlags(); + unsigned Opcode = N->getOpcode(); + Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); + Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); } void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, @@ -870,6 +882,25 @@ void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); } +void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDLoc DL(N); + + SDValue RHSLo, RHSHi; + SDValue RHS = N->getOperand(1); + EVT RHSVT = RHS.getValueType(); + if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector) + GetSplitVector(RHS, RHSLo, RHSHi); + else + std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS)); + + + Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo); + Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi); +} + void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; @@ -989,7 +1020,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(LD, 1), Ch); } @@ -1003,6 +1034,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue Ch = MLD->getChain(); SDValue Ptr = MLD->getBasePtr(); SDValue Mask = MLD->getMask(); + SDValue Src0 = MLD->getSrc0(); unsigned Alignment = MLD->getOriginalAlignment(); ISD::LoadExtType ExtType = MLD->getExtensionType(); @@ -1012,16 +1044,22 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? Alignment/2 : Alignment; + // Split Mask operand SDValue MaskLo, MaskHi; - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); EVT MemoryVT = MLD->getMemoryVT(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - SDValue Src0 = MLD->getSrc0(); SDValue Src0Lo, Src0Hi; - std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); + if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Src0, Src0Lo, Src0Hi); + else + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MLD->getPointerInfo(), @@ -1049,7 +1087,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(MLD, 1), Ch); @@ -1064,20 +1102,33 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue Ch = MGT->getChain(); SDValue Ptr = MGT->getBasePtr(); SDValue Mask = MGT->getMask(); + SDValue Src0 = MGT->getValue(); + SDValue Index = MGT->getIndex(); unsigned Alignment = MGT->getOriginalAlignment(); + // Split Mask operand SDValue MaskLo, MaskHi; - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); EVT MemoryVT = MGT->getMemoryVT(); EVT LoMemVT, HiMemVT; + // Split MemoryVT std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue Src0Lo, Src0Hi; - std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl); + if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Src0, Src0Lo, Src0Hi); + else + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); SDValue IndexHi, IndexLo; - std::tie(IndexLo, IndexHi) = DAG.SplitVector(MGT->getIndex(), dl); + if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Index, IndexLo, IndexHi); + else + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MGT->getPointerInfo(), @@ -1097,7 +1148,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(MGT, 1), Ch); } @@ -1357,6 +1408,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { Res = SplitVecOp_TruncateHelper(N); break; case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; + case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast(N), OpNo); break; @@ -1567,23 +1619,31 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue Ptr = MGT->getBasePtr(); SDValue Index = MGT->getIndex(); SDValue Mask = MGT->getMask(); + SDValue Src0 = MGT->getValue(); unsigned Alignment = MGT->getOriginalAlignment(); SDValue MaskLo, MaskHi; - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + // Split Mask operand + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); EVT MemoryVT = MGT->getMemoryVT(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue Src0Lo, Src0Hi; - std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl); + if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Src0, Src0Lo, Src0Hi); + else + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); SDValue IndexHi, IndexLo; - if (Index.getNode()) - std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); + if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Index, IndexLo, IndexHi); else - IndexLo = IndexHi = Index; + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MGT->getPointerInfo(), @@ -1609,7 +1669,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(MGT, 1), Ch); @@ -1633,9 +1693,21 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue DataLo, DataHi; - GetSplitVector(Data, DataLo, DataHi); + if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) + // Split Data operand + GetSplitVector(Data, DataLo, DataHi); + else + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + SDValue MaskLo, MaskHi; - GetSplitVector(Mask, MaskLo, MaskHi); + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + // Split Mask operand + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + + MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType()); + MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType()); // if Alignment is equal to the vector size, // take the half of it for the second part @@ -1680,25 +1752,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned Alignment = N->getOriginalAlignment(); SDLoc DL(N); + // Split all operands EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue DataLo, DataHi; - GetSplitVector(Data, DataLo, DataHi); - SDValue MaskLo, MaskHi; - GetSplitVector(Mask, MaskLo, MaskHi); - - SDValue PtrLo, PtrHi; - if (Ptr.getValueType().isVector()) // gather form vector of pointers - std::tie(PtrLo, PtrHi) = DAG.SplitVector(Ptr, DL); + if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) + // Split Data operand + GetSplitVector(Data, DataLo, DataHi); else - PtrLo = PtrHi = Ptr; + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + + SDValue MaskLo, MaskHi; + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + // Split Mask operand + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); SDValue IndexHi, IndexLo; - if (Index.getNode()) - std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); + if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Index, IndexLo, IndexHi); else - IndexLo = IndexHi = Index; + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); SDValue Lo, Hi; MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -1706,7 +1782,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - SDValue OpsLo[] = {Ch, DataLo, MaskLo, PtrLo, IndexLo}; + SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo}; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), DL, OpsLo, MMO); @@ -1715,7 +1791,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - SDValue OpsHi[] = {Ch, DataHi, MaskHi, PtrHi, IndexHi}; + SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi}; Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), DL, OpsHi, MMO); @@ -1891,6 +1967,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); } +SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) { + // The result (and the first input) has a legal vector type, but the second + // input needs splitting. + return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements()); +} //===----------------------------------------------------------------------===// @@ -1938,6 +2019,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::MLOAD: Res = WidenVecRes_MLOAD(cast(N)); break; + case ISD::MGATHER: + Res = WidenVecRes_MGATHER(cast(N)); + break; case ISD::ADD: case ISD::AND: @@ -1949,11 +2033,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::XOR: case ISD::FMINNUM: case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: Res = WidenVecRes_Binary(N); break; case ISD::FADD: - case ISD::FCOPYSIGN: case ISD::FMUL: case ISD::FPOW: case ISD::FSUB: @@ -1966,6 +2055,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_BinaryCanTrap(N); break; + case ISD::FCOPYSIGN: + Res = WidenVecRes_FCOPYSIGN(N); + break; + case ISD::FPOWI: Res = WidenVecRes_POWI(N); break; @@ -1989,6 +2082,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Convert(N); break; + case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: case ISD::CTPOP: @@ -2037,7 +2131,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); } SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { @@ -2048,6 +2142,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; unsigned NumElts = VT.getVectorNumElements(); + const SDNodeFlags *Flags = N->getFlags(); while (!TLI.isTypeLegal(VT) && NumElts != 1) { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); @@ -2057,7 +2152,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { // Operation doesn't trap so just widen as normal. SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags); } // No legal vector version so unroll the vector operation and then widen. @@ -2087,7 +2182,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { SDValue EOp2 = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags); Idx += NumElts; CurNumElts -= NumElts; } @@ -2105,7 +2200,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, - EOp1, EOp2); + EOp1, EOp2, Flags); } CurNumElts = 0; } @@ -2195,7 +2290,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { unsigned Opcode = N->getOpcode(); unsigned InVTNumElts = InVT.getVectorNumElements(); - + const SDNodeFlags *Flags = N->getFlags(); if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); @@ -2203,7 +2298,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { if (InVTNumElts == WidenNumElts) { if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InOp); - return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1)); + return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); } } @@ -2224,7 +2319,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVec); - return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1)); + return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags); } if (InVTNumElts % WidenNumElts == 0) { @@ -2234,7 +2329,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { // Extract the input and convert the shorten input vector. if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVal); - return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1)); + return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags); } } @@ -2250,7 +2345,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { if (N->getNumOperands() == 1) Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); else - Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1)); + Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags); } SDValue UndefVal = DAG.getUNDEF(EltVT); @@ -2260,6 +2355,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops); } +SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { + // If this is an FCOPYSIGN with same input types, we can treat it as a + // normal (can trap) binary op. + if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType()) + return WidenVecRes_BinaryCanTrap(N); + + // If the types are different, fall back to unrolling. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); +} + SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); @@ -2669,7 +2775,35 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), Mask, Src0, N->getMemoryVT(), N->getMemOperand(), ExtType); - // Legalized the chain result - switch anything that used the old chain to + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { + + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Mask = N->getMask(); + SDValue Src0 = GetWidenedVector(N->getValue()); + unsigned NumElts = WideVT.getVectorNumElements(); + SDLoc dl(N); + + // The mask should be widened as well + Mask = WidenTargetBoolean(Mask, WideVT, true); + + // Widen the Index operand + SDValue Index = N->getIndex(); + EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), + Index.getValueType().getScalarType(), + NumElts); + Index = ModifyToType(Index, WideIndexVT); + SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; + SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), + N->getMemoryVT(), dl, Ops, + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; @@ -2831,7 +2965,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; + case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break; case ISD::ANY_EXTEND: case ISD::SIGN_EXTEND: @@ -2928,6 +3064,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { } } +SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) { + // The result (and first input) is legal, but the second input is illegal. + // We can't do much to fix that, so just unroll and let the extracts off of + // the second input be widened as needed later. + return DAG.UnrollVectorOp(N); +} + SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // Since the result is legal and the input is illegal, it is unlikely // that we can fix the input to a legal type so unroll the convert @@ -3070,6 +3213,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { false); } +SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "Can widen only data operand of mscatter"); + MaskedScatterSDNode *MSC = cast(N); + SDValue DataOp = MSC->getValue(); + SDValue Mask = MSC->getMask(); + + // Widen the value + SDValue WideVal = GetWidenedVector(DataOp); + EVT WideVT = WideVal.getValueType(); + unsigned NumElts = WideVal.getValueType().getVectorNumElements(); + SDLoc dl(N); + + // The mask should be widened as well + Mask = WidenTargetBoolean(Mask, WideVT, true); + + // Widen index + SDValue Index = MSC->getIndex(); + EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), + Index.getValueType().getScalarType(), + NumElts); + Index = ModifyToType(Index, WideIndexVT); + + SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index}; + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), + MSC->getMemoryVT(), dl, Ops, + MSC->getMemOperand()); +} + SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); @@ -3533,7 +3704,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl &StChain, /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. -SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { +/// FillWithZeroes specifies that the vector should be widened with zeroes. +SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, + bool FillWithZeroes) { // Note that InOp might have been widened so it might already have // the right width or it might need be narrowed. EVT InVT = InOp.getValueType(); @@ -3550,10 +3723,11 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) { unsigned NumConcat = WidenNumElts / InNumElts; SmallVector Ops(NumConcat); - SDValue UndefVal = DAG.getUNDEF(InVT); + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) : + DAG.getUNDEF(InVT); Ops[0] = InOp; for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = UndefVal; + Ops[i] = FillVal; return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops); } @@ -3573,8 +3747,9 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDValue UndefVal = DAG.getUNDEF(EltVT); + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : + DAG.getUNDEF(EltVT); for ( ; Idx < WidenNumElts; ++Idx) - Ops[Idx] = UndefVal; + Ops[Idx] = FillVal; return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops); } diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 6303422b9ae9..622e06f0da2a 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -49,7 +49,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) TII = STI.getInstrInfo(); ResourcesModel.reset(TII->CreateTargetScheduleState(STI)); // This hard requirement could be relaxed, but for now - // do not let it procede. + // do not let it proceed. assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); unsigned NumRC = TRI->getNumRegClasses(); @@ -269,12 +269,12 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { } // Now see if there are no other dependencies - // to instructions alredy in the packet. + // to instructions already in the packet. for (unsigned i = 0, e = Packet.size(); i != e; ++i) for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), E = Packet[i]->Succs.end(); I != E; ++I) { // Since we do not add pseudos to packets, might as well - // ignor order deps. + // ignore order deps. if (I->isCtrl()) continue; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 34e1a7001082..62e7733ecd2b 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -440,7 +440,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); NumRes = MCID.getNumDefs(); - for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -519,7 +519,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { + for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index e9bd52034ffd..91024e672f9c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -141,8 +141,8 @@ private: /// that are "live". These nodes must be scheduled before any other nodes that /// modifies the registers can be scheduled. unsigned NumLiveRegs; - std::vector LiveRegDefs; - std::vector LiveRegGens; + std::unique_ptr LiveRegDefs; + std::unique_ptr LiveRegGens; // Collect interferences between physical register use/defs. // Each interference is an SUnit and set of physical registers. @@ -328,8 +328,8 @@ void ScheduleDAGRRList::Schedule() { NumLiveRegs = 0; // Allocate slots for each physical register, plus one for a special register // to track the virtual resource of a calling sequence. - LiveRegDefs.resize(TRI->getNumRegs() + 1, nullptr); - LiveRegGens.resize(TRI->getNumRegs() + 1, nullptr); + LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]()); + LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]()); CallSeqEndForStart.clear(); assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); @@ -1206,7 +1206,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); NumRes = MCID.getNumDefs(); - for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -1218,7 +1218,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, /// CheckForLiveRegDef - Return true and update live register vector if the /// specified register def of the specified SUnit clobbers any "live" registers. static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, - std::vector &LiveRegDefs, + SUnit **LiveRegDefs, SmallSet &RegAdded, SmallVectorImpl &LRegs, const TargetRegisterInfo *TRI) { @@ -1240,7 +1240,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered /// by RegMask, and add them to LRegs. static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, - std::vector &LiveRegDefs, + ArrayRef LiveRegDefs, SmallSet &RegAdded, SmallVectorImpl &LRegs) { // Look at all live registers. Skip Reg0 and the special CallResource. @@ -1278,7 +1278,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl &LRegs) { for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) - CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, + CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs.get(), RegAdded, LRegs, TRI); } @@ -1302,7 +1302,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl &LRegs) { for (; NumVals; --NumVals, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) - CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); + CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } } else i += NumVals; @@ -1328,13 +1328,15 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl &LRegs) { } } if (const uint32_t *RegMask = getNodeRegMask(Node)) - CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs); + CheckForLiveRegDefMasked(SU, RegMask, + makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()), + RegAdded, LRegs); const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) - CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); + for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) + CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } return !LRegs.empty(); @@ -2718,7 +2720,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - const uint16_t *ImpDefs + const MCPhysReg *ImpDefs = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); const uint32_t *RegMask = getNodeRegMask(SU->getNode()); if(!ImpDefs && !RegMask) @@ -2737,7 +2739,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, return true; if (ImpDefs) - for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef) + for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef) // Return true if SU clobbers this physical register use and the // definition of the register reaches from DepSU. IsReachable queries // a topological forward sort of the DAG (following the successors). @@ -2756,13 +2758,13 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetRegisterInfo *TRI) { SDNode *N = SuccSU->getNode(); unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); assert(ImpDefs && "Caller should check hasPhysRegDefs"); for (const SDNode *SUNode = SU->getNode(); SUNode; SUNode = SUNode->getGluedNode()) { if (!SUNode->isMachineOpcode()) continue; - const uint16_t *SUImpDefs = + const MCPhysReg *SUImpDefs = TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); const uint32_t *SURegMask = getNodeRegMask(SUNode); if (!SUImpDefs && !SURegMask) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 159c28cd2a61..5cc806668b12 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -86,12 +86,6 @@ namespace llvm { /// flagged together nodes with a single SUnit. void BuildSchedGraph(AliasAnalysis *AA); - /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is - /// CopyToReg and its only active data operands are CopyFromReg within a - /// single block loop. - /// - void InitVRegCycleFlag(SUnit *SU); - /// InitNumRegDefsLeft - Determine the # of regs defined by this node. /// void InitNumRegDefsLeft(SUnit *SU); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 14f44ccc60ce..abbc48e10e46 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "SDNodeDbgValue.h" +#include "llvm/ADT/APSInt.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -210,28 +211,6 @@ bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { return true; } -/// isScalarToVector - Return true if the specified node is a -/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low -/// element is not an undef. -bool ISD::isScalarToVector(const SDNode *N) { - if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) - return true; - - if (N->getOpcode() != ISD::BUILD_VECTOR) - return false; - if (N->getOperand(0).getOpcode() == ISD::UNDEF) - return false; - unsigned NumElems = N->getNumOperands(); - if (NumElems == 1) - return false; - for (unsigned i = 1; i < NumElems; ++i) { - SDValue V = N->getOperand(i); - if (V.getOpcode() != ISD::UNDEF) - return false; - } - return true; -} - /// allOperandsUndef - Return true if the node has at least one operand /// and all operands of the specified node are ISD::UNDEF. bool ISD::allOperandsUndef(const SDNode *N) { @@ -397,24 +376,21 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID, ID.AddInteger(Op.getResNo()); } } + /// Add logical or fast math flag values to FoldingSetNodeID value. static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode, const SDNodeFlags *Flags) { - if (!Flags || !isBinOpWithFlags(Opcode)) + if (!isBinOpWithFlags(Opcode)) return; - unsigned RawFlags = Flags->getRawFlags(); - // If no flags are set, do not alter the ID. We must match the ID of nodes - // that were created without explicitly specifying flags. This also saves time - // and allows a gradual increase in API usage of the optional optimization - // flags. - if (RawFlags != 0) - ID.AddInteger(RawFlags); + unsigned RawFlags = 0; + if (Flags) + RawFlags = Flags->getRawFlags(); + ID.AddInteger(RawFlags); } static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) { - if (auto *Node = dyn_cast(N)) - AddNodeIDFlags(ID, Node->getOpcode(), &Node->Flags); + AddNodeIDFlags(ID, N->getOpcode(), N->getFlags()); } static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, @@ -624,9 +600,9 @@ void SelectionDAG::RemoveDeadNodes() { SmallVector DeadNodes; // Add all obviously-dead nodes to the DeadNodes worklist. - for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) - if (I->use_empty()) - DeadNodes.push_back(I); + for (SDNode &Node : allnodes()) + if (Node.use_empty()) + DeadNodes.push_back(&Node); RemoveDeadNodes(DeadNodes); @@ -766,6 +742,7 @@ static void VerifySDNode(SDNode *N) { void SelectionDAG::InsertNode(SDNode *N) { AllNodes.push_back(N); #ifndef NDEBUG + N->PersistentId = NextPersistentId++; VerifySDNode(N); #endif } @@ -929,7 +906,7 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), UpdateListeners(nullptr) { - AllNodes.push_back(&EntryNode); + InsertNode(&EntryNode); DbgInfo = new SDDbgInfo(); } @@ -950,7 +927,10 @@ void SelectionDAG::allnodes_clear() { assert(&*AllNodes.begin() == &EntryNode); AllNodes.remove(AllNodes.begin()); while (!AllNodes.empty()) - DeallocateNode(AllNodes.begin()); + DeallocateNode(&AllNodes.front()); +#ifndef NDEBUG + NextPersistentId = 0; +#endif } BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, @@ -1023,7 +1003,7 @@ void SelectionDAG::clear() { static_cast(nullptr)); EntryNode.UseList = nullptr; - AllNodes.push_back(&EntryNode); + InsertNode(&EntryNode); Root = getEntryNode(); DbgInfo->clear(); } @@ -1429,8 +1409,8 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, - TargetFlags); + SDNode *N = + new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1852,8 +1832,58 @@ SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout()); if (OpTy == ShTy || OpTy.isVector()) return Op; - ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; - return getNode(Opcode, SDLoc(Op), ShTy, Op); + return getZExtOrTrunc(Op, SDLoc(Op), ShTy); +} + +SDValue SelectionDAG::expandVAArg(SDNode *Node) { + SDLoc dl(Node); + const TargetLowering &TLI = getTargetLoweringInfo(); + const Value *V = cast(Node->getOperand(2))->getValue(); + EVT VT = Node->getValueType(0); + SDValue Tmp1 = Node->getOperand(0); + SDValue Tmp2 = Node->getOperand(1); + unsigned Align = Node->getConstantOperandVal(3); + + SDValue VAListLoad = + getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2, + MachinePointerInfo(V), false, false, false, 0); + SDValue VAList = VAListLoad; + + if (Align > TLI.getMinStackArgumentAlignment()) { + assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); + + VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, + getConstant(Align - 1, dl, VAList.getValueType())); + + VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList, + getConstant(-(int64_t)Align, dl, VAList.getValueType())); + } + + // Increment the pointer, VAList, to the next vaarg + Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, + getConstant(getDataLayout().getTypeAllocSize( + VT.getTypeForEVT(*getContext())), + dl, VAList.getValueType())); + // Store the incremented VAList to the legalized pointer + Tmp1 = getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, + MachinePointerInfo(V), false, false, 0); + // Load the actual argument out of the pointer VAList + return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo(), + false, false, false, 0); +} + +SDValue SelectionDAG::expandVACopy(SDNode *Node) { + SDLoc dl(Node); + const TargetLowering &TLI = getTargetLoweringInfo(); + // This defaults to loading a pointer from the input and storing it to the + // output, returning the chain. + const Value *VD = cast(Node->getOperand(3))->getValue(); + const Value *VS = cast(Node->getOperand(4))->getValue(); + SDValue Tmp1 = getLoad(TLI.getPointerTy(getDataLayout()), dl, + Node->getOperand(0), Node->getOperand(2), + MachinePointerInfo(VS), false, false, false, 0); + return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), + MachinePointerInfo(VD), false, false, 0); } /// CreateStackTemporary - Create a stack temporary, suitable for holding the @@ -1872,8 +1902,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { /// CreateStackTemporary - Create a stack temporary suitable for holding /// either of the specified value types. SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { - unsigned Bytes = std::max(VT1.getStoreSizeInBits(), - VT2.getStoreSizeInBits())/8; + unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize()); Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); const DataLayout &DL = getDataLayout(); @@ -2255,7 +2284,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, unsigned MemBits = VT.getScalarType().getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); } else if (const MDNode *Ranges = LD->getRanges()) { - computeKnownBitsFromRangeMetadata(*Ranges, KnownZero); + if (LD->getExtensionType() == ISD::NON_EXTLOAD) + computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne); } break; } @@ -2564,6 +2594,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (Tmp == 1) return 1; // Early out. Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1); return std::min(Tmp, Tmp2); + case ISD::SELECT_CC: + Tmp = ComputeNumSignBits(Op.getOperand(2), Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(3), Depth+1); + return std::min(Tmp, Tmp2); case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -2679,7 +2714,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ const int rIndex = Items - 1 - cast(Op.getOperand(1))->getZExtValue(); - // If the sign portion ends in our element the substraction gives correct + // If the sign portion ends in our element the subtraction gives correct // result. Otherwise it gives either negative or > bitwidth result return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); } @@ -2798,6 +2833,16 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { return false; } +bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { + assert(A.getValueType() == B.getValueType() && + "Values must have the same type"); + APInt AZero, AOne; + APInt BZero, BOne; + computeKnownBits(A, AZero, AOne); + computeKnownBits(B, BZero, BOne); + return (AZero | BZero).isAllOnesValue(); +} + /// getNode - Gets or creates the specified node. /// SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { @@ -2848,8 +2893,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT); if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT); - else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT); + if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) + return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT); break; case ISD::BSWAP: return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), @@ -2954,44 +3001,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::CTPOP: { - EVT SVT = VT.getScalarType(); - EVT InVT = BV->getValueType(0); - EVT InSVT = InVT.getScalarType(); - - // Find legal integer scalar type for constant promotion and - // ensure that its scalar size is at least as large as source. - EVT LegalSVT = SVT; - if (SVT.isInteger()) { - LegalSVT = TLI->getTypeToTransformTo(*getContext(), SVT); - if (LegalSVT.bitsLT(SVT)) break; - } - - // Let the above scalar folding handle the folding of each element. - SmallVector Ops; - for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { - SDValue OpN = BV->getOperand(i); - EVT OpVT = OpN.getValueType(); - - // Build vector (integer) scalar operands may need implicit - // truncation - do this before constant folding. - if (OpVT.isInteger() && OpVT.bitsGT(InSVT)) - OpN = getNode(ISD::TRUNCATE, DL, InSVT, OpN); - - OpN = getNode(Opcode, DL, SVT, OpN); - - // Legalize the (integer) scalar constant if necessary. - if (LegalSVT != SVT) - OpN = getNode(ISD::ANY_EXTEND, DL, LegalSVT, OpN); - - if (OpN.getOpcode() != ISD::UNDEF && - OpN.getOpcode() != ISD::Constant && - OpN.getOpcode() != ISD::ConstantFP) - break; - Ops.push_back(OpN); - } - if (Ops.size() == VT.getVectorNumElements()) - return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); - break; + SDValue Ops = { Operand }; + if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) + return Fold; } } } @@ -3012,6 +3024,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid fpext node, dst < src!"); if (Operand.getOpcode() == ISD::UNDEF) return getUNDEF(VT); break; @@ -3019,12 +3033,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid SIGN_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && - "Invalid sext node, dst < src!"); assert((!VT.isVector() || VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid sext node, dst < src!"); if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); else if (OpOpcode == ISD::UNDEF) @@ -3035,12 +3049,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ZERO_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && - "Invalid zext node, dst < src!"); assert((!VT.isVector() || VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid zext node, dst < src!"); if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getNode()->getOperand(0)); @@ -3052,12 +3066,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ANY_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && - "Invalid anyext node, dst < src!"); assert((!VT.isVector() || VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid anyext node, dst < src!"); if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ANY_EXTEND) @@ -3077,12 +3091,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid TRUNCATE!"); if (Operand.getValueType() == VT) return Operand; // noop truncate - assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) && - "Invalid truncate node, src < dst!"); assert((!VT.isVector() || VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); + assert(Operand.getValueType().bitsGT(VT) && + "Invalid truncate node, src < dst!"); if (OpOpcode == ISD::TRUNCATE) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || @@ -3135,8 +3149,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, case ISD::FNEG: // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) + // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), - Operand.getNode()->getOperand(0)); + Operand.getNode()->getOperand(0), + &cast(Operand.getNode())->Flags); if (OpOpcode == ISD::FNEG) // --X -> X return Operand.getNode()->getOperand(0); break; @@ -3182,6 +3198,10 @@ static std::pair FoldValue(unsigned Opcode, const APInt &C1, case ISD::SRA: return std::make_pair(C1.ashr(C2), true); case ISD::ROTL: return std::make_pair(C1.rotl(C2), true); case ISD::ROTR: return std::make_pair(C1.rotr(C2), true); + case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true); + case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true); + case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true); + case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true); case ISD::UDIV: if (!C2.getBoolValue()) break; @@ -3284,10 +3304,118 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs); } +SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL, + EVT VT, + ArrayRef Ops, + const SDNodeFlags *Flags) { + // If the opcode is a target-specific ISD node, there's nothing we can + // do here and the operand rules may not line up with the below, so + // bail early. + if (Opcode >= ISD::BUILTIN_OP_END) + return SDValue(); + + // We can only fold vectors - maybe merge with FoldConstantArithmetic someday? + if (!VT.isVector()) + return SDValue(); + + unsigned NumElts = VT.getVectorNumElements(); + + auto IsScalarOrSameVectorSize = [&](const SDValue &Op) { + return !Op.getValueType().isVector() || + Op.getValueType().getVectorNumElements() == NumElts; + }; + + auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) { + BuildVectorSDNode *BV = dyn_cast(Op); + return (Op.getOpcode() == ISD::UNDEF) || + (Op.getOpcode() == ISD::CONDCODE) || (BV && BV->isConstant()); + }; + + // All operands must be vector types with the same number of elements as + // the result type and must be either UNDEF or a build vector of constant + // or UNDEF scalars. + if (!std::all_of(Ops.begin(), Ops.end(), IsConstantBuildVectorOrUndef) || + !std::all_of(Ops.begin(), Ops.end(), IsScalarOrSameVectorSize)) + return SDValue(); + + // If we are comparing vectors, then the result needs to be a i1 boolean + // that is then sign-extended back to the legal result type. + EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType()); + + // Find legal integer scalar type for constant promotion and + // ensure that its scalar size is at least as large as source. + EVT LegalSVT = VT.getScalarType(); + if (LegalSVT.isInteger()) { + LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); + if (LegalSVT.bitsLT(SVT)) + return SDValue(); + } + + // Constant fold each scalar lane separately. + SmallVector ScalarResults; + for (unsigned i = 0; i != NumElts; i++) { + SmallVector ScalarOps; + for (SDValue Op : Ops) { + EVT InSVT = Op.getValueType().getScalarType(); + BuildVectorSDNode *InBV = dyn_cast(Op); + if (!InBV) { + // We've checked that this is UNDEF or a constant of some kind. + if (Op.isUndef()) + ScalarOps.push_back(getUNDEF(InSVT)); + else + ScalarOps.push_back(Op); + continue; + } + + SDValue ScalarOp = InBV->getOperand(i); + EVT ScalarVT = ScalarOp.getValueType(); + + // Build vector (integer) scalar operands may need implicit + // truncation - do this before constant folding. + if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT)) + ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp); + + ScalarOps.push_back(ScalarOp); + } + + // Constant fold the scalar operands. + SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags); + + // Legalize the (integer) scalar constant if necessary. + if (LegalSVT != SVT) + ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); + + // Scalar folding only succeeded if the result is a constant or UNDEF. + if (ScalarResult.getOpcode() != ISD::UNDEF && + ScalarResult.getOpcode() != ISD::Constant && + ScalarResult.getOpcode() != ISD::ConstantFP) + return SDValue(); + ScalarResults.push_back(ScalarResult); + } + + assert(ScalarResults.size() == NumElts && + "Unexpected number of scalar results for BUILD_VECTOR"); + return getNode(ISD::BUILD_VECTOR, DL, VT, ScalarResults); +} + SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags *Flags) { ConstantSDNode *N1C = dyn_cast(N1); ConstantSDNode *N2C = dyn_cast(N2); + ConstantFPSDNode *N1CFP = dyn_cast(N1); + ConstantFPSDNode *N2CFP = dyn_cast(N2); + + // Canonicalize constant to RHS if commutative. + if (isCommutativeBinOp(Opcode)) { + if (N1C && !N2C) { + std::swap(N1C, N2C); + std::swap(N1, N2); + } else if (N1CFP && !N2CFP) { + std::swap(N1CFP, N2CFP); + std::swap(N1, N2); + } + } + switch (Opcode) { default: break; case ISD::TokenFactor: @@ -3356,6 +3484,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, case ISD::MUL: case ISD::SDIV: case ISD::SREM: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: assert(VT.isInteger() && "This operator does not apply to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); @@ -3367,37 +3499,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, case ISD::FREM: if (getTarget().Options.UnsafeFPMath) { if (Opcode == ISD::FADD) { - // 0+x --> x - if (ConstantFPSDNode *CFP = dyn_cast(N1)) - if (CFP->getValueAPF().isZero()) - return N2; // x+0 --> x - if (ConstantFPSDNode *CFP = dyn_cast(N2)) - if (CFP->getValueAPF().isZero()) - return N1; + if (N2CFP && N2CFP->getValueAPF().isZero()) + return N1; } else if (Opcode == ISD::FSUB) { // x-0 --> x - if (ConstantFPSDNode *CFP = dyn_cast(N2)) - if (CFP->getValueAPF().isZero()) - return N1; + if (N2CFP && N2CFP->getValueAPF().isZero()) + return N1; } else if (Opcode == ISD::FMUL) { - ConstantFPSDNode *CFP = dyn_cast(N1); - SDValue V = N2; - - // If the first operand isn't the constant, try the second - if (!CFP) { - CFP = dyn_cast(N2); - V = N1; - } - - if (CFP) { - // 0*x --> 0 - if (CFP->isZero()) - return SDValue(CFP,0); - // 1*x --> x - if (CFP->isExactlyValue(1.0)) - return V; - } + // x*0 --> 0 + if (N2CFP && N2CFP->isZero()) + return N2; + // x*1 --> x + if (N2CFP && N2CFP->isExactlyValue(1.0)) + return N1; } } assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); @@ -3457,7 +3572,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && VT.bitsLE(N1.getValueType()) && - isa(N2) && "Invalid FP_ROUND!"); + N2C && "Invalid FP_ROUND!"); if (N1.getValueType() == VT) return N1; // noop conversion. break; case ISD::AssertSext: @@ -3502,13 +3617,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SmallVector Ops; for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { SDValue Op = N1.getOperand(i); - if (Op.getValueType() != VT.getScalarType()) break; if (Op.getOpcode() == ISD::UNDEF) { - Ops.push_back(Op); + Ops.push_back(getUNDEF(VT.getScalarType())); continue; } if (ConstantSDNode *C = dyn_cast(Op)) { APInt Val = C->getAPIntValue(); + Val = Val.zextOrTrunc(VT.getScalarSizeInBits()); Ops.push_back(SignExtendInReg(Val)); continue; } @@ -3590,15 +3705,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, return N1.getOperand(N2C->getZExtValue()); // EXTRACT_ELEMENT of a constant int is also very common. - if (ConstantSDNode *C = dyn_cast(N1)) { + if (N1C) { unsigned ElementSize = VT.getSizeInBits(); unsigned Shift = ElementSize * N2C->getZExtValue(); - APInt ShiftedVal = C->getAPIntValue().lshr(Shift); + APInt ShiftedVal = N1C->getAPIntValue().lshr(Shift); return getConstant(ShiftedVal.trunc(ElementSize), DL, VT); } break; - case ISD::EXTRACT_SUBVECTOR: { - SDValue Index = N2; + case ISD::EXTRACT_SUBVECTOR: if (VT.isSimple() && N1.getValueType().isSimple()) { assert(VT.isVector() && N1.getValueType().isVector() && "Extract subvector VTs must be a vectors!"); @@ -3608,9 +3722,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, assert(VT.getSimpleVT() <= N1.getSimpleValueType() && "Extract subvector must be from larger vector to smaller vector!"); - if (isa(Index)) { - assert((VT.getVectorNumElements() + - cast(Index)->getZExtValue() + if (N2C) { + assert((VT.getVectorNumElements() + N2C->getZExtValue() <= N1.getValueType().getVectorNumElements()) && "Extract subvector overflow!"); } @@ -3621,29 +3734,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } break; } - } // Perform trivial constant folding. if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode())) return SV; - // Canonicalize constant to RHS if commutative. - if (N1C && !N2C && isCommutativeBinOp(Opcode)) { - std::swap(N1C, N2C); - std::swap(N1, N2); - } - // Constant fold FP operations. bool HasFPExceptions = TLI->hasFloatingPointExceptions(); - ConstantFPSDNode *N1CFP = dyn_cast(N1); - ConstantFPSDNode *N2CFP = dyn_cast(N2); if (N1CFP) { - if (!N2CFP && isCommutativeBinOp(Opcode)) { - // Canonicalize constant to RHS if commutative. - std::swap(N1CFP, N2CFP); - std::swap(N1, N2); - } else if (N2CFP) { + if (N2CFP) { APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF(); APFloat::opStatus s; switch (Opcode) { @@ -3670,7 +3770,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } break; case ISD::FREM : - s = V1.mod(V2, APFloat::rmNearestTiesToEven); + s = V1.mod(V2); if (!HasFPExceptions || (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)) { return getConstantFP(V1, DL, VT); @@ -3795,7 +3895,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3) { // Perform various simplifications. - ConstantSDNode *N1C = dyn_cast(N1); switch (Opcode) { case ISD::FMA: { ConstantFPSDNode *N1CFP = dyn_cast(N1); @@ -3827,12 +3926,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, break; case ISD::SETCC: { // Use FoldSetCC to simplify SETCC's. - SDValue Simp = FoldSetCC(VT, N1, N2, cast(N3)->get(), DL); - if (Simp.getNode()) return Simp; + if (SDValue V = FoldSetCC(VT, N1, N2, cast(N3)->get(), DL)) + return V; + // Vector constant folding. + SDValue Ops[] = {N1, N2, N3}; + if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) + return V; break; } case ISD::SELECT: - if (N1C) { + if (ConstantSDNode *N1C = dyn_cast(N1)) { if (N1C->getZExtValue()) return N2; // select true, X, Y -> X return N3; // select false, X, Y -> Y @@ -4153,6 +4256,14 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, return true; } +static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { + // On Darwin, -Os means optimize for size without hurting performance, so + // only really optimize for size when -Oz (MinSize) is used. + if (MF.getTarget().getTargetTriple().isOSDarwin()) + return MF.getFunction()->optForMinSize(); + return MF.getFunction()->optForSize(); +} + static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, @@ -4173,7 +4284,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4286,7 +4397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4380,7 +4491,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4446,6 +4557,16 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } +static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, + unsigned AS) { + // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all + // pointer operands can be losslessly bitcasted to pointers of address space 0 + if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) { + report_fatal_error("cannot lower memory intrinsic in address space " + + Twine(AS)); + } +} + SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, @@ -4487,6 +4608,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, true, DstPtrInfo, SrcPtrInfo); } + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); + // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc // memcpy is not guaranteed to be safe. libc memcpys aren't required to // respect volatile, so they may do things like read or write memory @@ -4548,6 +4672,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, return Result; } + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); + // FIXME: If the memmove is volatile, lowering it to plain libc memmove may // not be safe. See memcpy above for more details. @@ -4605,6 +4732,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, return Result; } + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + // Emit a library call. Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; @@ -4872,10 +5001,12 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". -static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) { +static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, + int64_t Offset = 0) { // If this is FI+Offset, we can model it. if (const FrameIndexSDNode *FI = dyn_cast(Ptr)) - return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset); + return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), + FI->getIndex(), Offset); // If this is (FI+Offset1)+Offset2, we can model it. if (Ptr.getOpcode() != ISD::ADD || @@ -4884,20 +5015,22 @@ static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) { return MachinePointerInfo(); int FI = cast(Ptr.getOperand(0))->getIndex(); - return MachinePointerInfo::getFixedStack(FI, Offset+ - cast(Ptr.getOperand(1))->getSExtValue()); + return MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI, + Offset + cast(Ptr.getOperand(1))->getSExtValue()); } /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". -static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) { +static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, + SDValue OffsetOp) { // If the 'Offset' value isn't a constant, we can't handle this. if (ConstantSDNode *OffsetNode = dyn_cast(OffsetOp)) - return InferPointerInfo(Ptr, OffsetNode->getSExtValue()); + return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue()); if (OffsetOp.getOpcode() == ISD::UNDEF) - return InferPointerInfo(Ptr); + return InferPointerInfo(DAG, Ptr); return MachinePointerInfo(); } @@ -4926,7 +5059,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, // If we don't have a PtrInfo, infer the trivial frame index case to simplify // clients. if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(Ptr, Offset); + PtrInfo = InferPointerInfo(*this, Ptr, Offset); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = @@ -5054,7 +5187,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, Flags |= MachineMemOperand::MONonTemporal; if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(Ptr); + PtrInfo = InferPointerInfo(*this, Ptr); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = @@ -5109,7 +5242,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, Flags |= MachineMemOperand::MONonTemporal; if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(Ptr); + PtrInfo = InferPointerInfo(*this, Ptr); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = @@ -5261,7 +5394,7 @@ SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl, cast(E)->refineAlignment(MMO); return SDValue(E, 0); } - MaskedGatherSDNode *N = + MaskedGatherSDNode *N = new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(), Ops, VTs, VT, MMO); CSEMap.InsertNode(N, IP); @@ -5317,12 +5450,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, - ArrayRef Ops) { + ArrayRef Ops, const SDNodeFlags *Flags) { unsigned NumOps = Ops.size(); switch (NumOps) { case 0: return getNode(Opcode, DL, VT); case 1: return getNode(Opcode, DL, VT, Ops[0]); - case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags); case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); default: break; } @@ -5656,7 +5789,7 @@ UpdateNodeOperands(SDNode *N, ArrayRef Ops) { "Update with wrong number of operands"); // If no operands changed just return the input node. - if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin())) + if (std::equal(Ops.begin(), Ops.end(), N->op_begin())) return N; // See if the modified node already exists. @@ -6451,13 +6584,13 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // Node Id fields for nodes At SortedPos and after will contain the // count of outstanding operands. for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { - SDNode *N = I++; + SDNode *N = &*I++; checkForCycles(N, this); unsigned Degree = N->getNumOperands(); if (Degree == 0) { // A node with no uses, add it to the result array immediately. N->setNodeId(DAGSize++); - allnodes_iterator Q = N; + allnodes_iterator Q(N); if (Q != SortedPos) SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); assert(SortedPos != AllNodes.end() && "Overran node list"); @@ -6470,8 +6603,8 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // Visit all the nodes. As we iterate, move nodes into sorted order, // such that by the time the end is reached all nodes will be sorted. - for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) { - SDNode *N = I; + for (SDNode &Node : allnodes()) { + SDNode *N = &Node; checkForCycles(N, this); // N is in sorted position, so all its uses have one less operand // that needs to be sorted. @@ -6493,9 +6626,10 @@ unsigned SelectionDAG::AssignTopologicalOrder() { P->setNodeId(Degree); } } - if (I == SortedPos) { + if (&Node == SortedPos) { #ifndef NDEBUG - SDNode *S = ++I; + allnodes_iterator I(N); + SDNode *S = &*++I; dbgs() << "Overran sorted position:\n"; S->dumprFull(this); dbgs() << "\n"; dbgs() << "Checking if this is due to cycles\n"; @@ -6559,6 +6693,26 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { // SDNode Class //===----------------------------------------------------------------------===// +bool llvm::isNullConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast(V); + return Const != nullptr && Const->isNullValue(); +} + +bool llvm::isNullFPConstant(SDValue V) { + ConstantFPSDNode *Const = dyn_cast(V); + return Const != nullptr && Const->isZero() && !Const->isNegative(); +} + +bool llvm::isAllOnesConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast(V); + return Const != nullptr && Const->isAllOnesValue(); +} + +bool llvm::isOneConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast(V); + return Const != nullptr && Const->isOne(); +} + HandleSDNode::~HandleSDNode() { DropOperands(); } @@ -6772,6 +6926,12 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const { return cast(OperandList[Num])->getZExtValue(); } +const SDNodeFlags *SDNode::getFlags() const { + if (auto *FlagsNode = dyn_cast(this)) + return &FlagsNode->Flags; + return nullptr; +} + SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { assert(N->getNumValues() == 1 && "Can't unroll a vector with multiple results!"); @@ -6808,9 +6968,11 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { } switch (N->getOpcode()) { - default: - Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands)); + default: { + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands, + N->getFlags())); break; + } case ISD::VSELECT: Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands)); break; @@ -7101,6 +7263,24 @@ BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { return dyn_cast_or_null(getSplatValue(UndefElements)); } +int32_t +BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, + uint32_t BitWidth) const { + if (ConstantFPSDNode *CN = + dyn_cast_or_null(getSplatValue(UndefElements))) { + bool IsExact; + APSInt IntVal(BitWidth); + APFloat APF = CN->getValueAPF(); + if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) != + APFloat::opOK || + !IsExact) + return -1; + + return IntVal.exactLogBase2(); + } + return -1; +} + bool BuildVectorSDNode::isConstant() const { for (const SDValue &Op : op_values()) { unsigned Opc = Op.getOpcode(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2c3c0eb101a0..d2ea85ab4d22 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -63,6 +64,7 @@ #include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include +#include using namespace llvm; #define DEBUG_TYPE "isel" @@ -79,7 +81,7 @@ LimitFPPrecision("limit-float-precision", cl::init(0)); static cl::opt -EnableFMFInDAG("enable-fmf-dag", cl::init(false), cl::Hidden, +EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, cl::desc("Enable fast-math-flags for DAG nodes")); // Limit the width of DAG chains. This is important in general to prevent @@ -196,6 +198,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, if (PartEVT == ValueVT) return Val; + if (PartEVT.isInteger() && ValueVT.isFloatingPoint() && + ValueVT.bitsLT(PartEVT)) { + // For an FP value in an integer part, we need to truncate to the right + // width first. + PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val); + } + if (PartEVT.isInteger() && ValueVT.isInteger()) { if (ValueVT.bitsLT(PartEVT)) { // For a truncate, see if we have any information to @@ -319,9 +329,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && "Cannot handle this kind of promotion"); // Promoted vector extract - bool Smaller = ValueVT.bitsLE(PartEVT); - return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, ValueVT, Val); + return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); } @@ -339,11 +347,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, } if (ValueVT.getVectorNumElements() == 1 && - ValueVT.getVectorElementType() != PartEVT) { - bool Smaller = ValueVT.bitsLE(PartEVT); - Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, ValueVT.getScalarType(), Val); - } + ValueVT.getVectorElementType() != PartEVT) + Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType()); return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } @@ -387,6 +392,12 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, assert(NumParts == 1 && "Do not know what to promote to!"); Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); } else { + if (ValueVT.isFloatingPoint()) { + // FP values need to be bitcast, then extended if they are being put + // into a larger container. + ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + } assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && ValueVT.isInteger() && "Unknown mismatch!"); @@ -520,9 +531,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { // Promoted vector extract - bool Smaller = PartEVT.bitsLE(ValueVT); - Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, PartVT, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } else{ // Vector -> scalar conversion. assert(ValueVT.getVectorNumElements() == 1 && @@ -531,9 +540,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); - bool Smaller = ValueVT.bitsLE(PartVT); - Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), - DL, PartVT, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } Parts[0] = Val; @@ -595,8 +602,7 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; + for (EVT ValueVT : ValueVTs) { unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT); MVT RegisterVT = TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) @@ -907,7 +913,8 @@ void SelectionDAGBuilder::visit(const Instruction &I) { visit(I.getOpcode(), I); - if (!isa(&I) && !HasTailCall) + if (!isa(&I) && !HasTailCall && + !isStatepoint(&I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); CurInst = nullptr; @@ -943,14 +950,12 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, assert(Variable->isValidLocationForIntrinsic(dl) && "Expected inlined-at fields to agree"); uint64_t Offset = DI->getOffset(); - // A dbg.value for an alloca is always indirect. - bool IsIndirect = isa(V) || Offset != 0; SDDbgValue *SDV; if (Val.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, IsIndirect, + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false, Val)) { SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), - IsIndirect, Offset, dl, DbgSDNodeOrder); + false, Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } } else @@ -1168,6 +1173,135 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { llvm_unreachable("Can't get register for value!"); } +void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX; + bool IsCoreCLR = Pers == EHPersonality::CoreCLR; + MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; + // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues. + if (IsMSVCCXX || IsCoreCLR) + CatchPadMBB->setIsEHFuncletEntry(); + + DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot())); +} + +void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { + // Update machine-CFG edge. + MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()]; + FuncInfo.MBB->addSuccessor(TargetMBB); + + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + bool IsSEH = isAsynchronousEHPersonality(Pers); + if (IsSEH) { + // If this is not a fall-through branch or optimizations are switched off, + // emit the branch. + if (TargetMBB != NextBlock(FuncInfo.MBB) || + TM.getOptLevel() == CodeGenOpt::None) + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(TargetMBB))); + return; + } + + // Figure out the funclet membership for the catchret's successor. + // This will be used by the FuncletLayout pass to determine how to order the + // BB's. + WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); + const BasicBlock *SuccessorColor = EHInfo->CatchRetSuccessorColorMap[&I]; + assert(SuccessorColor && "No parent funclet for catchret!"); + MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor]; + assert(SuccessorColorMBB && "No MBB for SuccessorColor!"); + + // Create the terminator node. + SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(TargetMBB), + DAG.getBasicBlock(SuccessorColorMBB)); + DAG.setRoot(Ret); +} + +void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { + // Don't emit any special code for the cleanuppad instruction. It just marks + // the start of a funclet. + FuncInfo.MBB->setIsEHFuncletEntry(); + FuncInfo.MBB->setIsCleanupFuncletEntry(); +} + +/// When an invoke or a cleanupret unwinds to the next EH pad, there are +/// many places it could ultimately go. In the IR, we have a single unwind +/// destination, but in the machine CFG, we enumerate all the possible blocks. +/// This function skips over imaginary basic blocks that hold catchswitch +/// instructions, and finds all the "real" machine +/// basic block destinations. As those destinations may not be successors of +/// EHPadBB, here we also calculate the edge probability to those destinations. +/// The passed-in Prob is the edge probability to EHPadBB. +static void findUnwindDestinations( + FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, + BranchProbability Prob, + SmallVectorImpl> + &UnwindDests) { + EHPersonality Personality = + classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; + bool IsCoreCLR = Personality == EHPersonality::CoreCLR; + + while (EHPadBB) { + const Instruction *Pad = EHPadBB->getFirstNonPHI(); + BasicBlock *NewEHPadBB = nullptr; + if (isa(Pad)) { + // Stop on landingpads. They are not funclets. + UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); + break; + } else if (isa(Pad)) { + // Stop on cleanup pads. Cleanups are always funclet entries for all known + // personalities. + UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); + UnwindDests.back().first->setIsEHFuncletEntry(); + break; + } else if (auto *CatchSwitch = dyn_cast(Pad)) { + // Add the catchpad handlers to the possible destinations. + for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { + UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); + // For MSVC++ and the CLR, catchblocks are funclets and need prologues. + if (IsMSVCCXX || IsCoreCLR) + UnwindDests.back().first->setIsEHFuncletEntry(); + } + NewEHPadBB = CatchSwitch->getUnwindDest(); + } else { + continue; + } + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (BPI && NewEHPadBB) + Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB); + EHPadBB = NewEHPadBB; + } +} + +void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) { + // Update successor info. + SmallVector, 1> UnwindDests; + auto UnwindDest = I.getUnwindDest(); + BranchProbabilityInfo *BPI = FuncInfo.BPI; + BranchProbability UnwindDestProb = + (BPI && UnwindDest) + ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest) + : BranchProbability::getZero(); + findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests); + for (auto &UnwindDest : UnwindDests) { + UnwindDest.first->setIsEHPad(); + addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second); + } + FuncInfo.MBB->normalizeSuccProbs(); + + // Create the terminator node. + SDValue Ret = + DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot()); + DAG.setRoot(Ret); +} + +void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) { + report_fatal_error("visitCatchSwitch not yet implemented!"); +} + void SelectionDAGBuilder::visitRet(const ReturnInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto &DL = DAG.getDataLayout(); @@ -1186,7 +1320,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()), PtrValueVTs); - SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); + SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + DemoteReg, PtrValueVTs[0]); SDValue RetOp = getValue(I.getOperand(0)); SmallVector ValueVTs; @@ -1334,25 +1469,34 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, } /// Return branch probability calculated by BranchProbabilityInfo for IR blocks. -uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src, - const MachineBasicBlock *Dst) const { +BranchProbability +SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { BranchProbabilityInfo *BPI = FuncInfo.BPI; - if (!BPI) - return 0; const BasicBlock *SrcBB = Src->getBasicBlock(); const BasicBlock *DstBB = Dst->getBasicBlock(); - return BPI->getEdgeWeight(SrcBB, DstBB); + if (!BPI) { + // If BPI is not available, set the default probability as 1 / N, where N is + // the number of successors. + auto SuccSize = std::max( + std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1); + return BranchProbability(1, SuccSize); + } + return BPI->getEdgeProbability(SrcBB, DstBB); } -void SelectionDAGBuilder:: -addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, - uint32_t Weight /* = 0 */) { - if (!Weight) - Weight = getEdgeWeight(Src, Dst); - Src->addSuccessor(Dst, Weight); +void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src, + MachineBasicBlock *Dst, + BranchProbability Prob) { + if (!FuncInfo.BPI) + Src->addSuccessorWithoutProb(Dst); + else { + if (Prob.isUnknown()) + Prob = getEdgeProbability(Src, Dst); + Src->addSuccessor(Dst, Prob); + } } - static bool InBlock(const Value *V, const BasicBlock *BB) { if (const Instruction *I = dyn_cast(V)) return I->getParent() == BB; @@ -1369,8 +1513,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - uint32_t TWeight, - uint32_t FWeight) { + BranchProbability TProb, + BranchProbability FProb) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into @@ -1385,17 +1529,15 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, ISD::CondCode Condition; if (const ICmpInst *IC = dyn_cast(Cond)) { Condition = getICmpCondCode(IC->getPredicate()); - } else if (const FCmpInst *FC = dyn_cast(Cond)) { + } else { + const FCmpInst *FC = cast(Cond); Condition = getFCmpCondCode(FC->getPredicate()); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); - } else { - (void)Condition; // silence warning. - llvm_unreachable("Unknown compare instruction"); } CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, - TBB, FBB, CurBB, TWeight, FWeight); + TBB, FBB, CurBB, TProb, FProb); SwitchCases.push_back(CB); return; } @@ -1403,26 +1545,19 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), - nullptr, TBB, FBB, CurBB, TWeight, FWeight); + nullptr, TBB, FBB, CurBB, TProb, FProb); SwitchCases.push_back(CB); } -/// Scale down both weights to fit into uint32_t. -static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { - uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; - uint32_t Scale = (NewMax / UINT32_MAX) + 1; - NewTrue = NewTrue / Scale; - NewFalse = NewFalse / Scale; -} - /// FindMergedConditions - If Cond is an expression like void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - unsigned Opc, uint32_t TWeight, - uint32_t FWeight) { + Instruction::BinaryOps Opc, + BranchProbability TProb, + BranchProbability FProb) { // If this node is not part of the or/and tree, emit it as a branch. const Instruction *BOp = dyn_cast(Cond); if (!BOp || !(isa(BOp) || isa(BOp)) || @@ -1431,12 +1566,12 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, - TWeight, FWeight); + TProb, FProb); return; } // Create TmpBB after CurBB. - MachineFunction::iterator BBI = CurBB; + MachineFunction::iterator BBI(CurBB); MachineFunction &MF = DAG.getMachineFunction(); MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); CurBB->getParent()->insert(++BBI, TmpBB); @@ -1455,26 +1590,25 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // The requirement is that // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) // = TrueProb for original BB. - // Assuming the original weights are A and B, one choice is to set BB1's - // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice - // assumes that + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to + // A/(1+B) and 2B/(1+B). This choice assumes that // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. // Another choice is to assume TrueProb for BB1 equals to TrueProb for // TmpBB, but the math is more complicated. - uint64_t NewTrueWeight = TWeight; - uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight; - ScaleWeights(NewTrueWeight, NewFalseWeight); + auto NewTrueProb = TProb / 2; + auto NewFalseProb = TProb / 2 + FProb; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, - NewTrueWeight, NewFalseWeight); + NewTrueProb, NewFalseProb); - NewTrueWeight = TWeight; - NewFalseWeight = 2 * (uint64_t)FWeight; - ScaleWeights(NewTrueWeight, NewFalseWeight); + // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). + SmallVector Probs{TProb / 2, FProb}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - NewTrueWeight, NewFalseWeight); + Probs[0], Probs[1]); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: @@ -1491,24 +1625,23 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // The requirement is that // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) // = FalseProb for original BB. - // Assuming the original weights are A and B, one choice is to set BB1's - // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice - // assumes that - // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to + // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == + // TrueProb for BB1 * FalseProb for TmpBB. - uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight; - uint64_t NewFalseWeight = FWeight; - ScaleWeights(NewTrueWeight, NewFalseWeight); + auto NewTrueProb = TProb + FProb / 2; + auto NewFalseProb = FProb / 2; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, - NewTrueWeight, NewFalseWeight); + NewTrueProb, NewFalseProb); - NewTrueWeight = 2 * (uint64_t)TWeight; - NewFalseWeight = FWeight; - ScaleWeights(NewTrueWeight, NewFalseWeight); + // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). + SmallVector Probs{TProb, FProb / 2}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - NewTrueWeight, NewFalseWeight); + Probs[0], Probs[1]); } } @@ -1585,12 +1718,14 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast(CondVal)) { - if (!DAG.getTargetLoweringInfo().isJumpExpensive() && - BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || - BOp->getOpcode() == Instruction::Or)) { + Instruction::BinaryOps Opcode = BOp->getOpcode(); + if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && + !I.getMetadata(LLVMContext::MD_unpredictable) && + (Opcode == Instruction::And || Opcode == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, - BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), - getEdgeWeight(BrMBB, Succ1MBB)); + Opcode, + getEdgeProbability(BrMBB, Succ0MBB), + getEdgeProbability(BrMBB, Succ1MBB)); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. @@ -1669,11 +1804,12 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } // Update successor info - addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); + addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb); // TrueBB and FalseBB are always different unless the incoming IR is // degenerate. This only happens when running llc on weird IR. if (CB.TrueBB != CB.FalseBB) - addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); + addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb); + SwitchBB->normalizeSuccProbs(); // If the lhs block is the next block, invert the condition so that we can // fall through to the lhs instead of the rhs block. @@ -1797,10 +1933,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, GuardPtr, MachinePointerInfo(IRGuard, 0), true, false, false, Align); - SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), - StackSlotPtr, - MachinePointerInfo::getFixedStack(FI), - true, false, false, Align); + SDValue StackSlot = DAG.getLoad( + PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true, + false, false, Align); // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); @@ -1837,7 +1973,7 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, - nullptr, 0, false, getCurSDLoc(), false, false).second; + None, false, getCurSDLoc(), false, false).second; DAG.setRoot(Chain); } @@ -1884,8 +2020,9 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - addSuccessorWithWeight(SwitchBB, B.Default); - addSuccessorWithWeight(SwitchBB, MBB); + addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); + addSuccessorWithProb(SwitchBB, MBB, B.Prob); + SwitchBB->normalizeSuccProbs(); SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, RangeCmp, @@ -1902,7 +2039,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, /// visitBitTestCase - this function produces one "bit test" void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, - uint32_t BranchWeightToNext, + BranchProbability BranchProbToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { @@ -1938,10 +2075,14 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE); } - // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. - addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight); - // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. - addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); + // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb. + addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb); + // The branch probability from SwitchBB to NextMBB is BranchProbToNext. + addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext); + // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is + // one as they are relative probabilities (and thus work more like weights), + // and hence we need to normalize them to let the sum of them become one. + SwitchBB->normalizeSuccProbs(); SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), @@ -1958,9 +2099,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { MachineBasicBlock *InvokeMBB = FuncInfo.MBB; - // Retrieve successors. + // Retrieve successors. Look through artificial IR level blocks like + // catchswitch for successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; - MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; + const BasicBlock *EHPadBB = I.getSuccessor(1); const Value *Callee(I.getCalledValue()); const Function *Fn = dyn_cast(Callee); @@ -1975,14 +2117,14 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: - visitPatchpoint(&I, LandingPad); + visitPatchpoint(&I, EHPadBB); break; case Intrinsic::experimental_gc_statepoint: - LowerStatepoint(ImmutableStatepoint(&I), LandingPad); + LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); break; } } else - LowerCallTo(&I, getValue(Callee), false, LandingPad); + LowerCallTo(&I, getValue(Callee), false, EHPadBB); // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. @@ -1992,9 +2134,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { CopyToExportRegsIfNeeded(&I); } - // Update successor info - addSuccessorWithWeight(InvokeMBB, Return); - addSuccessorWithWeight(InvokeMBB, LandingPad); + SmallVector, 1> UnwindDests; + BranchProbabilityInfo *BPI = FuncInfo.BPI; + BranchProbability EHPadBBProb = + BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB) + : BranchProbability::getZero(); + findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests); + + // Update successor info. + addSuccessorWithProb(InvokeMBB, Return); + for (auto &UnwindDest : UnwindDests) { + UnwindDest.first->setIsEHPad(); + addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second); + } + InvokeMBB->normalizeSuccProbs(); // Drop into normal successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), @@ -2007,7 +2160,7 @@ void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { } void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { - assert(FuncInfo.MBB->isLandingPad() && + assert(FuncInfo.MBB->isEHPad() && "Call to landingpad not in landing pad!"); MachineBasicBlock *MBB = FuncInfo.MBB; @@ -2017,8 +2170,16 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother to create these DAG nodes. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.getExceptionPointerRegister() == 0 && - TLI.getExceptionSelectorRegister() == 0) + const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn(); + if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 && + TLI.getExceptionSelectorRegister(PersonalityFn) == 0) + return; + + // If landingpad's return type is token type, we don't create DAG nodes + // for its exception pointer and selector value. The extraction of exception + // pointer or selector value from token type landingpads is not currently + // supported. + if (LP.getType()->isTokenTy()) return; SmallVector ValueVTs; @@ -2074,8 +2235,7 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { // If this case has the same successor and is a neighbour, merge it into // the previous cluster. Clusters[DstIndex - 1].High = CaseVal; - Clusters[DstIndex - 1].Weight += CC.Weight; - assert(Clusters[DstIndex - 1].Weight >= CC.Weight && "Weight overflow!"); + Clusters[DstIndex - 1].Prob += CC.Prob; } else { std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex], sizeof(Clusters[SrcIndex])); @@ -2109,8 +2269,9 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { continue; MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; - addSuccessorWithWeight(IndirectBrMBB, Succ); + addSuccessorWithProb(IndirectBrMBB, Succ); } + IndirectBrMBB->normalizeSuccProbs(); DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(), MVT::Other, getControlRoot(), @@ -2119,7 +2280,8 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { if (DAG.getTarget().Options.TrapUnreachable) - DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); + DAG.setRoot( + DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); } void SelectionDAGBuilder::visitFSub(const User &I) { @@ -2260,6 +2422,10 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); + + // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them. + // FIXME: We should propagate the fast-math-flags to the DAG node itself for + // further optimization, but currently FMF is only applicable to binary nodes. if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), @@ -2284,27 +2450,74 @@ void SelectionDAGBuilder::visitSelect(const User &I) { // Min/max matching is only viable if all output VTs are the same. if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) { - Value *LHS, *RHS; - SelectPatternFlavor SPF = matchSelectPattern(const_cast(&I), LHS, RHS); - ISD::NodeType Opc = ISD::DELETED_NODE; - switch (SPF) { - case SPF_UMAX: Opc = ISD::UMAX; break; - case SPF_UMIN: Opc = ISD::UMIN; break; - case SPF_SMAX: Opc = ISD::SMAX; break; - case SPF_SMIN: Opc = ISD::SMIN; break; - default: break; - } - EVT VT = ValueVTs[0]; LLVMContext &Ctx = *DAG.getContext(); auto &TLI = DAG.getTargetLoweringInfo(); - while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector) + + // We care about the legality of the operation after it has been type + // legalized. + while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal && + VT != TLI.getTypeToTransformTo(Ctx, VT)) VT = TLI.getTypeToTransformTo(Ctx, VT); - if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) && - // If the underlying comparison instruction is used by any other instruction, - // the consumed instructions won't be destroyed, so it is not profitable - // to convert to a min/max. + // If the vselect is legal, assume we want to leave this as a vector setcc + + // vselect. Otherwise, if this is going to be scalarized, we want to see if + // min/max is legal on the scalar type. + bool UseScalarMinMax = VT.isVector() && + !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT); + + Value *LHS, *RHS; + auto SPR = matchSelectPattern(const_cast(&I), LHS, RHS); + ISD::NodeType Opc = ISD::DELETED_NODE; + switch (SPR.Flavor) { + case SPF_UMAX: Opc = ISD::UMAX; break; + case SPF_UMIN: Opc = ISD::UMIN; break; + case SPF_SMAX: Opc = ISD::SMAX; break; + case SPF_SMIN: Opc = ISD::SMIN; break; + case SPF_FMINNUM: + switch (SPR.NaNBehavior) { + case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); + case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break; + case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; + case SPNB_RETURNS_ANY: { + if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT)) + Opc = ISD::FMINNUM; + else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT)) + Opc = ISD::FMINNAN; + else if (UseScalarMinMax) + Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ? + ISD::FMINNUM : ISD::FMINNAN; + break; + } + } + break; + case SPF_FMAXNUM: + switch (SPR.NaNBehavior) { + case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); + case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break; + case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; + case SPNB_RETURNS_ANY: + + if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT)) + Opc = ISD::FMAXNUM; + else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT)) + Opc = ISD::FMAXNAN; + else if (UseScalarMinMax) + Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ? + ISD::FMAXNUM : ISD::FMAXNAN; + break; + } + break; + default: break; + } + + if (Opc != ISD::DELETED_NODE && + (TLI.isOperationLegalOrCustom(Opc, VT) || + (UseScalarMinMax && + TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) && + // If the underlying comparison instruction is used by any other + // instruction, the consumed instructions won't be destroyed, so it is + // not profitable to convert to a min/max. cast(&I)->getCondition()->hasOneUse()) { OpCode = Opc; LHSVal = getValue(LHS); @@ -2920,7 +3133,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // throughout the function's lifetime. bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr && - isDereferenceablePointer(SV, *DAG.getTarget().getDataLayout()); + isDereferenceablePointer(SV, DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; @@ -2940,8 +3153,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); - else if (AA->pointsToConstantMemory( - MemoryLocation(SV, AA->getTypeStoreSize(Ty), AAInfo))) { + else if (AA->pointsToConstantMemory(MemoryLocation( + SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -3056,7 +3269,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { SDLoc sdl = getCurSDLoc(); - // llvm.masked.store.*(Src0, Ptr, alignemt, Mask) + // llvm.masked.store.*(Src0, Ptr, alignment, Mask) Value *PtrOperand = I.getArgOperand(1); SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(I.getArgOperand(0)); @@ -3080,63 +3293,70 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { setValue(&I, StoreNode); } -// Gather/scatter receive a vector of pointers. -// This vector of pointers may be represented as a base pointer + vector of -// indices, it depends on GEP and instruction preceeding GEP -// that calculates indices -static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index, +// Get a uniform base for the Gather/Scatter intrinsic. +// The first argument of the Gather/Scatter intrinsic is a vector of pointers. +// We try to represent it as a base pointer + vector of indices. +// Usually, the vector of pointers comes from a 'getelementptr' instruction. +// The first operand of the GEP may be a single pointer or a vector of pointers +// Example: +// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind +// or +// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind +// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, .. +// +// When the first GEP operand is a single pointer - it is the uniform base we +// are looking for. If first operand of the GEP is a splat vector - we +// extract the spalt value and use it as a uniform base. +// In all other cases the function returns 'false'. +// +static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index, SelectionDAGBuilder* SDB) { - assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type"); - GetElementPtrInst *Gep = dyn_cast(Ptr); - if (!Gep || Gep->getNumOperands() > 2) - return false; - ShuffleVectorInst *ShuffleInst = - dyn_cast(Gep->getPointerOperand()); - if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() || - cast(ShuffleInst->getOperand(0))->getOpcode() != - Instruction::InsertElement) - return false; - - Ptr = cast(ShuffleInst->getOperand(0))->getOperand(1); - SelectionDAG& DAG = SDB->DAG; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - // Check is the Ptr is inside current basic block - // If not, look for the shuffle instruction - if (SDB->findValue(Ptr)) - Base = SDB->getValue(Ptr); - else if (SDB->findValue(ShuffleInst)) { - SDValue ShuffleNode = SDB->getValue(ShuffleInst); - SDLoc sdl = ShuffleNode; - Base = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, sdl, - ShuffleNode.getValueType().getScalarType(), ShuffleNode, - DAG.getConstant(0, sdl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDB->setValue(Ptr, Base); - } - else + LLVMContext &Context = *DAG.getContext(); + + assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); + const GetElementPtrInst *GEP = dyn_cast(Ptr); + if (!GEP || GEP->getNumOperands() > 2) return false; - Value *IndexVal = Gep->getOperand(1); - if (SDB->findValue(IndexVal)) { - Index = SDB->getValue(IndexVal); + const Value *GEPPtr = GEP->getPointerOperand(); + if (!GEPPtr->getType()->isVectorTy()) + Ptr = GEPPtr; + else if (!(Ptr = getSplatValue(GEPPtr))) + return false; - if (SExtInst* Sext = dyn_cast(IndexVal)) { + Value *IndexVal = GEP->getOperand(1); + + // The operands of the GEP may be defined in another basic block. + // In this case we'll not find nodes for the operands. + if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal)) + return false; + + Base = SDB->getValue(Ptr); + Index = SDB->getValue(IndexVal); + + // Suppress sign extension. + if (SExtInst* Sext = dyn_cast(IndexVal)) { + if (SDB->findValue(Sext->getOperand(0))) { IndexVal = Sext->getOperand(0); - if (SDB->findValue(IndexVal)) - Index = SDB->getValue(IndexVal); + Index = SDB->getValue(IndexVal); } - return true; } - return false; + if (!Index.getValueType().isVector()) { + unsigned GEPWidth = GEP->getType()->getVectorNumElements(); + EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); + SmallVector Ops(GEPWidth, Index); + Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops); + } + return true; } void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDLoc sdl = getCurSDLoc(); // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask) - Value *Ptr = I.getArgOperand(1); + const Value *Ptr = I.getArgOperand(1); SDValue Src0 = getValue(I.getArgOperand(0)); SDValue Mask = getValue(I.getArgOperand(3)); EVT VT = Src0.getValueType(); @@ -3150,10 +3370,10 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDValue Base; SDValue Index; - Value *BasePtr = Ptr; + const Value *BasePtr = Ptr; bool UniformBase = getUniformBase(BasePtr, Base, Index, this); - Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; + const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), MachineMemOperand::MOStore, VT.getStoreSize(), @@ -3190,7 +3410,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { SDValue InChain = DAG.getRoot(); if (AA->pointsToConstantMemory(MemoryLocation( - PtrOperand, AA->getTypeStoreSize(I.getType()), AAInfo))) { + PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), + AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. InChain = DAG.getEntryNode(); } @@ -3212,7 +3433,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDLoc sdl = getCurSDLoc(); // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) - Value *Ptr = I.getArgOperand(0); + const Value *Ptr = I.getArgOperand(0); SDValue Src0 = getValue(I.getArgOperand(3)); SDValue Mask = getValue(I.getArgOperand(2)); @@ -3229,12 +3450,13 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Root = DAG.getRoot(); SDValue Base; SDValue Index; - Value *BasePtr = Ptr; + const Value *BasePtr = Ptr; bool UniformBase = getUniformBase(BasePtr, Base, Index, this); bool ConstantMemory = false; if (UniformBase && - AA->pointsToConstantMemory( - MemoryLocation(BasePtr, AA->getTypeStoreSize(I.getType()), AAInfo))) { + AA->pointsToConstantMemory(MemoryLocation( + BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()), + AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -3511,6 +3733,8 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) { static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, SelectionDAG &DAG) { + // TODO: What fast-math-flags should be set on the floating-point nodes? + // IntegerPartOfX = ((int32_t)(t0); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); @@ -3609,6 +3833,8 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, // // #define LOG2OFe 1.4426950f // t0 = Op * LOG2OFe + + // TODO: What fast-math-flags should be set here? SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, getF32Constant(DAG, 0x3fb8aa3b, dl)); return getLimitedPrecisionExp2(t0, dl, DAG); @@ -3622,6 +3848,9 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); @@ -3718,6 +3947,9 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); @@ -3813,6 +4045,9 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); @@ -3922,6 +4157,7 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, } } + // TODO: What fast-math-flags should be set on the FMUL node? if (IsExp10) { // Put the exponent in the right bit position for later addition to the // final result: @@ -3955,9 +4191,9 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, return DAG.getConstantFP(1.0, DL, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->hasFnAttribute(Attribute::OptimizeForSize) || - // If optimizing for size, don't insert too many multiplies. This - // inserts up to 5 multiplies. + if (!F->optForSize() || + // If optimizing for size, don't insert too many multiplies. + // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { // We use the simple binary decomposition method to generate the multiply // sequence. There are more optimal ways to do this (for example, @@ -3965,6 +4201,8 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, // the benefit of being both really simple and much better than a libcall. SDValue Res; // Logically starts equal to 1.0 SDValue CurSquare = LHS; + // TODO: Intrinsics should have fast-math-flags that propagate to these + // nodes. while (Val) { if (Val & 1) { if (Res.getNode()) @@ -3990,22 +4228,20 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } -// getTruncatedArgReg - Find underlying register used for an truncated -// argument. -static unsigned getTruncatedArgReg(const SDValue &N) { - if (N.getOpcode() != ISD::TRUNCATE) +// getUnderlyingArgReg - Find underlying register used for a truncated or +// bitcasted argument. +static unsigned getUnderlyingArgReg(const SDValue &N) { + switch (N.getOpcode()) { + case ISD::CopyFromReg: + return cast(N.getOperand(1))->getReg(); + case ISD::BITCAST: + case ISD::AssertZext: + case ISD::AssertSext: + case ISD::TRUNCATE: + return getUnderlyingArgReg(N.getOperand(0)); + default: return 0; - - const SDValue &Ext = N.getOperand(0); - if (Ext.getOpcode() == ISD::AssertZext || - Ext.getOpcode() == ISD::AssertSext) { - const SDValue &CFR = Ext.getOperand(0); - if (CFR.getOpcode() == ISD::CopyFromReg) - return cast(CFR.getOperand(1))->getReg(); - if (CFR.getOpcode() == ISD::TRUNCATE) - return getTruncatedArgReg(CFR); } - return 0; } /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function @@ -4033,11 +4269,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( Op = MachineOperand::CreateFI(FI); if (!Op && N.getNode()) { - unsigned Reg; - if (N.getOpcode() == ISD::CopyFromReg) - Reg = cast(N.getOperand(1))->getReg(); - else - Reg = getTruncatedArgReg(N); + unsigned Reg = getUnderlyingArgReg(N); if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); unsigned PR = RegInfo.getLiveInPhysReg(Reg); @@ -4145,14 +4377,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::longjmp: return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { - // FIXME: this definition of "user defined address space" is x86-specific - // Assert for address < 256 since we support only user defined address - // spaces. - assert(cast(I.getArgOperand(0)->getType())->getAddressSpace() - < 256 && - cast(I.getArgOperand(1)->getType())->getAddressSpace() - < 256 && - "Unknown address space"); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); @@ -4169,12 +4393,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memset: { - // FIXME: this definition of "user defined address space" is x86-specific - // Assert for address < 256 since we support only user defined address - // spaces. - assert(cast(I.getArgOperand(0)->getType())->getAddressSpace() - < 256 && - "Unknown address space"); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); @@ -4189,14 +4407,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memmove: { - // FIXME: this definition of "user defined address space" is x86-specific - // Assert for address < 256 since we support only user defined address - // spaces. - assert(cast(I.getArgOperand(0)->getType())->getAddressSpace() - < 256 && - cast(I.getArgOperand(1)->getType())->getAddressSpace() - < 256 && - "Unknown address space"); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); @@ -4238,33 +4448,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (const BitCastInst *BCI = dyn_cast(Address)) Address = BCI->getOperand(0); // Parameters are handled specially. - bool isParameter = Variable->getTag() == dwarf::DW_TAG_arg_variable || - isa(Address); - - const AllocaInst *AI = dyn_cast(Address); - - if (isParameter && !AI) { - FrameIndexSDNode *FINode = dyn_cast(N.getNode()); - if (FINode) - // Byval parameter. We have a frame index at this point. - SDV = DAG.getFrameIndexDbgValue( - Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder); - else { - // Address is an argument, so try to emit its dbg value using - // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, - N); - return nullptr; - } - } else if (AI) + bool isParameter = Variable->isParameter() || isa(Address); + auto FINode = dyn_cast(N.getNode()); + if (isParameter && FINode) { + // Byval parameter. We have a frame index at this point. + SDV = DAG.getFrameIndexDbgValue(Variable, Expression, + FINode->getIndex(), 0, dl, SDNodeOrder); + } else if (isa(Address)) { + // Address is an argument, so try to emit its dbg value using + // virtual register info from the FuncInfo.ValueMap. + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, + N); + return nullptr; + } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), true, 0, dl, SDNodeOrder); - else { - // Can't do anything with other non-AI cases yet. - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); - DEBUG(Address->dump()); - return nullptr; } DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { @@ -4315,12 +4513,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Check unused arguments map. N = UnusedArgNodeMap[V]; if (N.getNode()) { - // A dbg.value for an alloca is always indirect. - bool IsIndirect = isa(V) || Offset != 0; if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, - IsIndirect, N)) { + false, N)) { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), - IsIndirect, Offset, dl, SDNodeOrder); + false, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } } else if (!V->use_empty() ) { @@ -4421,6 +4617,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getRoot(), getValue(I.getArgOperand(0)))); return nullptr; } + case Intrinsic::eh_sjlj_setup_dispatch: { + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, + getRoot())); + return nullptr; + } case Intrinsic::masked_gather: visitMaskedGather(I); @@ -4614,6 +4815,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); } else { + // TODO: Intrinsic calls should have fast-math-flags. SDValue Mul = DAG.getNode(ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), @@ -4652,6 +4854,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(Res.getValue(1)); return nullptr; } + case Intrinsic::bitreverse: + setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return nullptr; case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -4693,6 +4900,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); return nullptr; } + case Intrinsic::get_dynamic_area_offset: { + SDValue Op = getRoot(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); + // Result type for @llvm.get.dynamic.area.offset should match PtrTy for + // target. + if (PtrTy != ResTy) + report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset" + " intrinsic!"); + Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy), + Op); + DAG.setRoot(Op); + setValue(&I, Res); + return nullptr; + } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); @@ -4743,8 +4965,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. - Res = DAG.getStore(Chain, sdl, Src, FIN, - MachinePointerInfo::getFixedStack(FI), + Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI), true, false, 0); setValue(&I, Res); DAG.setRoot(Res); @@ -4946,9 +5168,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::clear_cache: return TLI.getClearCacheBuiltinName(); - case Intrinsic::eh_actions: - setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); - return nullptr; case Intrinsic::donothing: // ignore return nullptr; @@ -4965,9 +5184,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { visitStatepoint(I); return nullptr; } - case Intrinsic::experimental_gc_result_int: - case Intrinsic::experimental_gc_result_float: - case Intrinsic::experimental_gc_result_ptr: case Intrinsic::experimental_gc_result: { visitGCResult(I); return nullptr; @@ -4978,7 +5194,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); - + case Intrinsic::instrprof_value_profile: + llvm_unreachable("instrprof failed to lower a value profiling call"); case Intrinsic::localescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); @@ -5032,19 +5249,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } - case Intrinsic::eh_begincatch: - case Intrinsic::eh_endcatch: - llvm_unreachable("begin/end catch intrinsics not lowered in codegen"); + + case Intrinsic::eh_exceptionpointer: case Intrinsic::eh_exceptioncode: { - unsigned Reg = TLI.getExceptionPointerRegister(); - assert(Reg && "cannot get exception code on this platform"); + // Get the exception pointer vreg, copy from it, and resize it to fit. + const auto *CPI = cast(I.getArgOperand(0)); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); - assert(FuncInfo.MBB->isLandingPad() && "eh.exceptioncode in non-lpad"); - unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC); + unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC); SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); - N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); + if (Intrinsic == Intrinsic::eh_exceptioncode) + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); setValue(&I, N); return nullptr; } @@ -5053,11 +5269,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { std::pair SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, - MachineBasicBlock *LandingPad) { + const BasicBlock *EHPadBB) { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); MCSymbol *BeginLabel = nullptr; - if (LandingPad) { + if (EHPadBB) { // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. BeginLabel = MMI.getContext().createTempSymbol(); @@ -5067,7 +5283,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, unsigned CallSiteIndex = MMI.getCurrentCallSite(); if (CallSiteIndex) { MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); - LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex); + LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex); // Now that the call site is handled, stop tracking it. MMI.setCurrentCallSite(0); @@ -5100,14 +5316,21 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, DAG.setRoot(Result.second); } - if (LandingPad) { + if (EHPadBB) { // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. - MMI.addInvoke(LandingPad, BeginLabel, EndLabel); + if (MMI.hasEHFunclets()) { + assert(CLI.CS); + WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); + EHInfo->addIPToStateRange(cast(CLI.CS->getInstruction()), + BeginLabel, EndLabel); + } else { + MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); + } } return Result; @@ -5115,7 +5338,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, - MachineBasicBlock *LandingPad) { + const BasicBlock *EHPadBB) { PointerType *PT = cast(CS.getCalledValue()->getType()); FunctionType *FTy = cast(PT->getElementType()); Type *RetTy = FTy->getReturnType(); @@ -5154,7 +5377,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) .setCallee(RetTy, FTy, Callee, std::move(Args), CS) .setTailCall(isTailCall); - std::pair Result = lowerInvokable(CLI, LandingPad); + std::pair Result = lowerInvokable(CLI, EHPadBB); if (Result.first.getNode()) setValue(CS.getInstruction(), Result.first); @@ -5978,7 +6201,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); std::pair MatchRC = TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, OpInfo.ConstraintVT); @@ -6037,10 +6260,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout())); - Chain = DAG.getStore(Chain, getCurSDLoc(), - OpInfo.CallOperand, StackSlot, - MachinePointerInfo::getFixedStack(SSFI), - false, false, 0); + Chain = DAG.getStore( + Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), + false, false, 0); OpInfo.CallOperand = StackSlot; } @@ -6460,12 +6683,9 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// This is a helper for lowering intrinsics that follow a target calling /// convention or require stack pointer adjustment. Only a subset of the /// intrinsic's operands need to participate in the calling convention. -std::pair -SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, - unsigned NumArgs, SDValue Callee, - Type *ReturnTy, - MachineBasicBlock *LandingPad, - bool IsPatchPoint) { +std::pair SelectionDAGBuilder::lowerCallOperands( + ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, + Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); @@ -6489,7 +6709,7 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs) .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); - return lowerInvokable(CLI, LandingPad); + return lowerInvokable(CLI, EHPadBB); } /// \brief Add a stack map intrinsic call's live variable operands to a stackmap @@ -6593,7 +6813,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, - MachineBasicBlock *LandingPad) { + const BasicBlock *EHPadBB) { // void|i64 @llvm.experimental.patchpoint.void|i64(i64 , // i32 , // i8* , @@ -6630,9 +6850,8 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; Type *ReturnTy = IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); - std::pair Result = - lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, - LandingPad, true); + std::pair Result = lowerCallOperands( + CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true); SDNode *CallEnd = Result.second.getNode(); if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) @@ -6926,8 +7145,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { i, j*Parts[j].getValueType().getStoreSize()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); - else if (j != 0) + else if (j != 0) { MyFlags.Flags.setOrigAlign(1); + if (j == NumParts - 1) + MyFlags.Flags.setSplitEnd(); + } CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); @@ -6986,8 +7208,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { PtrVT)); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, - MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, - false, false, 1); + MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), + DemoteStackIdx, Offsets[i]), + false, false, false, 1); ReturnValues[i] = L; Chains[i] = L.getValue(1); } @@ -7069,9 +7292,9 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { if (FastISel) return A->use_empty(); - const BasicBlock *Entry = A->getParent()->begin(); + const BasicBlock &Entry = A->getParent()->front(); for (const User *U : A->users()) - if (cast(U)->getParent() != Entry || isa(U)) + if (cast(U)->getParent() != &Entry || isa(U)) return false; // Use not in entry block. return true; @@ -7138,6 +7361,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // in the various CC lowering callbacks. Flags.setByVal(); } + if (F.getCallingConv() == CallingConv::X86_INTR) { + // IA Interrupt passes frame (1st parameter) by value in the stack. + if (Idx == 1) + Flags.setByVal(); + } if (Flags.isByVal() || Flags.isInAlloca()) { PointerType *Ty = cast(I->getType()); Type *ElementTy = Ty->getElementType(); @@ -7165,8 +7393,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 - else if (i > 0) + else if (i > 0) { MyFlags.Flags.setOrigAlign(1); + if (i == NumRegs - 1) + MyFlags.Flags.setSplitEnd(); + } Ins.push_back(MyFlags); } if (NeedsRegBlock && Value == NumValues - 1) @@ -7235,12 +7466,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // If this argument is unused then remember its value. It is used to generate // debugging information. if (I->use_empty() && NumValues) { - SDB->setUnusedArgValue(I, InVals[i]); + SDB->setUnusedArgValue(&*I, InVals[i]); // Also remember any frame index for use in FastISel. if (FrameIndexSDNode *FI = dyn_cast(InVals[i].getNode())) - FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); } for (unsigned Val = 0; Val != NumValues; ++Val) { @@ -7270,18 +7501,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Note down frame index. if (FrameIndexSDNode *FI = dyn_cast(ArgValues[0].getNode())) - FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), SDB->getCurSDLoc()); - SDB->setValue(I, Res); + SDB->setValue(&*I, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { if (LoadSDNode *LNode = dyn_cast(Res.getOperand(0).getNode())) if (FrameIndexSDNode *FI = dyn_cast(LNode->getBasePtr().getNode())) - FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); } // If this argument is live outside of the entry block, insert a copy from @@ -7293,13 +7524,13 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // uses with vregs. unsigned Reg = cast(Res.getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { - FuncInfo->ValueMap[I] = Reg; + FuncInfo->ValueMap[&*I] = Reg; continue; } } - if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) { - FuncInfo->InitializeRegForValue(I); - SDB->CopyToExportRegsIfNeeded(I); + if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) { + FuncInfo->InitializeRegForValue(&*I); + SDB->CopyToExportRegsIfNeeded(&*I); } } @@ -7401,21 +7632,21 @@ AddSuccessorMBB(const BasicBlock *BB, // If SuccBB has not been created yet, create it. if (!SuccMBB) { MachineFunction *MF = ParentMBB->getParent(); - MachineFunction::iterator BBI = ParentMBB; + MachineFunction::iterator BBI(ParentMBB); SuccMBB = MF->CreateMachineBasicBlock(BB); MF->insert(++BBI, SuccMBB); } // Add it as a successor of ParentMBB. ParentMBB->addSuccessor( - SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely)); + SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely)); return SuccMBB; } MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { - MachineFunction::iterator I = MBB; + MachineFunction::iterator I(MBB); if (++I == FuncInfo.MF->end()) return nullptr; - return I; + return &*I; } /// During lowering new call nodes can be created (such as memset, etc.). @@ -7469,14 +7700,18 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, CaseCluster &JTCluster) { assert(First <= Last); - uint32_t Weight = 0; + auto Prob = BranchProbability::getZero(); unsigned NumCmps = 0; std::vector Table; - DenseMap JTWeights; + DenseMap JTProbs; + + // Initialize probabilities in JTProbs. + for (unsigned I = First; I <= Last; ++I) + JTProbs[Clusters[I].MBB] = BranchProbability::getZero(); + for (unsigned I = First; I <= Last; ++I) { assert(Clusters[I].Kind == CC_Range); - Weight += Clusters[I].Weight; - assert(Weight >= Clusters[I].Weight && "Weight overflow!"); + Prob += Clusters[I].Prob; APInt Low = Clusters[I].Low->getValue(); APInt High = Clusters[I].High->getValue(); NumCmps += (Low == High) ? 1 : 2; @@ -7491,10 +7726,10 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, uint64_t ClusterSize = (High - Low).getLimitedValue() + 1; for (uint64_t J = 0; J < ClusterSize; ++J) Table.push_back(Clusters[I].MBB); - JTWeights[Clusters[I].MBB] += Clusters[I].Weight; + JTProbs[Clusters[I].MBB] += Clusters[I].Prob; } - unsigned NumDests = JTWeights.size(); + unsigned NumDests = JTProbs.size(); if (isSuitableForBitTests(NumDests, NumCmps, Clusters[First].Low->getValue(), Clusters[Last].High->getValue())) { @@ -7513,9 +7748,10 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, for (MachineBasicBlock *Succ : Table) { if (Done.count(Succ)) continue; - addSuccessorWithWeight(JumpTableMBB, Succ, JTWeights[Succ]); + addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]); Done.insert(Succ); } + JumpTableMBB->normalizeSuccProbs(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) @@ -7529,7 +7765,7 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, JTCases.emplace_back(std::move(JTH), std::move(JT)); JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High, - JTCases.size() - 1, Weight); + JTCases.size() - 1, Prob); return true; } @@ -7707,19 +7943,29 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, .getSizeInBits(); assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); - if (Low.isNonNegative() && High.slt(BitWidth)) { - // Optimize the case where all the case values fit in a - // word without having to subtract minValue. In this case, - // we can optimize away the subtraction. + // Check if the clusters cover a contiguous range such that no value in the + // range will jump to the default statement. + bool ContiguousRange = true; + for (int64_t I = First + 1; I <= Last; ++I) { + if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) { + ContiguousRange = false; + break; + } + } + + if (Low.isStrictlyPositive() && High.slt(BitWidth)) { + // Optimize the case where all the case values fit in a word without having + // to subtract minValue. In this case, we can optimize away the subtraction. LowBound = APInt::getNullValue(Low.getBitWidth()); CmpRange = High; + ContiguousRange = false; } else { LowBound = Low; CmpRange = High - Low; } CaseBitsVector CBV; - uint32_t TotalWeight = 0; + auto TotalProb = BranchProbability::getZero(); for (unsigned i = First; i <= Last; ++i) { // Find the CaseBits for this destination. unsigned j; @@ -7727,39 +7973,40 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, if (CBV[j].BB == Clusters[i].MBB) break; if (j == CBV.size()) - CBV.push_back(CaseBits(0, Clusters[i].MBB, 0, 0)); + CBV.push_back( + CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero())); CaseBits *CB = &CBV[j]; - // Update Mask, Bits and ExtraWeight. + // Update Mask, Bits and ExtraProb. uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue(); uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue(); assert(Hi >= Lo && Hi < 64 && "Invalid bit case!"); CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo; CB->Bits += Hi - Lo + 1; - CB->ExtraWeight += Clusters[i].Weight; - TotalWeight += Clusters[i].Weight; - assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!"); + CB->ExtraProb += Clusters[i].Prob; + TotalProb += Clusters[i].Prob; } BitTestInfo BTI; std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { - // Sort by weight first, number of bits second. - if (a.ExtraWeight != b.ExtraWeight) - return a.ExtraWeight > b.ExtraWeight; + // Sort by probability first, number of bits second. + if (a.ExtraProb != b.ExtraProb) + return a.ExtraProb > b.ExtraProb; return a.Bits > b.Bits; }); for (auto &CB : CBV) { MachineBasicBlock *BitTestBB = FuncInfo.MF->CreateMachineBasicBlock(SI->getParent()); - BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight)); + BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb)); } BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), - SI->getCondition(), -1U, MVT::Other, false, nullptr, - nullptr, std::move(BTI)); + SI->getCondition(), -1U, MVT::Other, false, + ContiguousRange, nullptr, nullptr, std::move(BTI), + TotalProb); BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, - BitTestCases.size() - 1, TotalWeight); + BitTestCases.size() - 1, TotalProb); return true; } @@ -7868,9 +8115,9 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, MachineBasicBlock *DefaultMBB) { MachineFunction *CurMF = FuncInfo.MF; MachineBasicBlock *NextMBB = nullptr; - MachineFunction::iterator BBI = W.MBB; + MachineFunction::iterator BBI(W.MBB); if (++BBI != FuncInfo.MF->end()) - NextMBB = BBI; + NextMBB = &*BBI; unsigned Size = W.LastCluster - W.FirstCluster + 1; @@ -7906,13 +8153,16 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, ISD::SETEQ); // Update successor info. - // Both Small and Big will jump to Small.BB, so we sum up the weights. - addSuccessorWithWeight(SwitchMBB, Small.MBB, Small.Weight + Big.Weight); - addSuccessorWithWeight( - SwitchMBB, DefaultMBB, - // The default destination is the first successor in IR. - BPI ? BPI->getEdgeWeight(SwitchMBB->getBasicBlock(), (unsigned)0) - : 0); + // Both Small and Big will jump to Small.BB, so we sum up the + // probabilities. + addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob); + if (BPI) + addSuccessorWithProb( + SwitchMBB, DefaultMBB, + // The default destination is the first successor in IR. + BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0)); + else + addSuccessorWithProb(SwitchMBB, DefaultMBB); // Insert the true branch. SDValue BrCond = @@ -7929,17 +8179,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } if (TM.getOptLevel() != CodeGenOpt::None) { - // Order cases by weight so the most likely case will be checked first. + // Order cases by probability so the most likely case will be checked first. std::sort(W.FirstCluster, W.LastCluster + 1, [](const CaseCluster &a, const CaseCluster &b) { - return a.Weight > b.Weight; + return a.Prob > b.Prob; }); // Rearrange the case blocks so that the last one falls through if possible - // without without changing the order of weights. + // without without changing the order of probabilities. for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { --I; - if (I->Weight > W.LastCluster->Weight) + if (I->Prob > W.LastCluster->Prob) break; if (I->Kind == CC_Range && I->MBB == NextMBB) { std::swap(*I, *W.LastCluster); @@ -7948,12 +8198,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } } - // Compute total weight. - uint32_t UnhandledWeights = 0; - for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) { - UnhandledWeights += I->Weight; - assert(UnhandledWeights >= I->Weight && "Weight overflow!"); - } + // Compute total probability. + BranchProbability DefaultProb = W.DefaultProb; + BranchProbability UnhandledProbs = DefaultProb; + for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) + UnhandledProbs += I->Prob; MachineBasicBlock *CurMBB = W.MBB; for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { @@ -7967,6 +8216,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } + UnhandledProbs -= I->Prob; switch (I->Kind) { case CC_JumpTable: { @@ -7977,8 +8227,28 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // The jump block hasn't been inserted yet; insert it here. MachineBasicBlock *JumpMBB = JT->MBB; CurMF->insert(BBI, JumpMBB); - addSuccessorWithWeight(CurMBB, Fallthrough); - addSuccessorWithWeight(CurMBB, JumpMBB); + + auto JumpProb = I->Prob; + auto FallthroughProb = UnhandledProbs; + + // If the default statement is a target of the jump table, we evenly + // distribute the default probability to successors of CurMBB. Also + // update the probability on the edge from JumpMBB to Fallthrough. + for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(), + SE = JumpMBB->succ_end(); + SI != SE; ++SI) { + if (*SI == DefaultMBB) { + JumpProb += DefaultProb / 2; + FallthroughProb -= DefaultProb / 2; + JumpMBB->setSuccProbability(SI, DefaultProb / 2); + JumpMBB->normalizeSuccProbs(); + break; + } + } + + addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); + addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); + CurMBB->normalizeSuccProbs(); // The jump table header will be inserted in our current block, do the // range check, and fall through to our fallthrough block. @@ -8004,8 +8274,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, BTB->Parent = CurMBB; BTB->Default = Fallthrough; - // If we're in the right place, emit the bit test header header right now. - if (CurMBB ==SwitchMBB) { + BTB->DefaultProb = UnhandledProbs; + // If the cases in bit test don't form a contiguous range, we evenly + // distribute the probability on the edge to Fallthrough to two + // successors of CurMBB. + if (!BTB->ContiguousRange) { + BTB->Prob += DefaultProb / 2; + BTB->DefaultProb -= DefaultProb / 2; + } + + // If we're in the right place, emit the bit test header right now. + if (CurMBB == SwitchMBB) { visitBitTestHeader(*BTB, SwitchMBB); BTB->Emitted = true; } @@ -8028,10 +8307,9 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, RHS = I->High; } - // The false weight is the sum of all unhandled cases. - UnhandledWeights -= I->Weight; - CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight, - UnhandledWeights); + // The false probability is the sum of all unhandled cases. + CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob, + UnhandledProbs); if (CurMBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -8049,8 +8327,8 @@ unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC, CaseClusterIt First, CaseClusterIt Last) { return std::count_if(First, Last + 1, [&](const CaseCluster &X) { - if (X.Weight != CC.Weight) - return X.Weight > CC.Weight; + if (X.Prob != CC.Prob) + return X.Prob > CC.Prob; // Ties are broken by comparing the case value. return X.Low->getValue().slt(CC.Low->getValue()); @@ -8066,24 +8344,24 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); - // Balance the tree based on branch weights to create a near-optimal (in terms - // of search time given key frequency) binary search tree. See e.g. Kurt + // Balance the tree based on branch probabilities to create a near-optimal (in + // terms of search time given key frequency) binary search tree. See e.g. Kurt // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). CaseClusterIt LastLeft = W.FirstCluster; CaseClusterIt FirstRight = W.LastCluster; - uint32_t LeftWeight = LastLeft->Weight; - uint32_t RightWeight = FirstRight->Weight; + auto LeftProb = LastLeft->Prob + W.DefaultProb / 2; + auto RightProb = FirstRight->Prob + W.DefaultProb / 2; // Move LastLeft and FirstRight towards each other from opposite directions to - // find a partitioning of the clusters which balances the weight on both - // sides. If LeftWeight and RightWeight are equal, alternate which side is - // taken to ensure 0-weight nodes are distributed evenly. + // find a partitioning of the clusters which balances the probability on both + // sides. If LeftProb and RightProb are equal, alternate which side is + // taken to ensure 0-probability nodes are distributed evenly. unsigned I = 0; while (LastLeft + 1 < FirstRight) { - if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1))) - LeftWeight += (++LastLeft)->Weight; + if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1))) + LeftProb += (++LastLeft)->Prob; else - RightWeight += (--FirstRight)->Weight; + RightProb += (--FirstRight)->Prob; I++; } @@ -8144,7 +8422,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, const ConstantInt *Pivot = PivotCluster->Low; // New blocks will be inserted immediately after the current one. - MachineFunction::iterator BBI = W.MBB; + MachineFunction::iterator BBI(W.MBB); ++BBI; // We will branch to the LHS if Value < Pivot. If LHS is a single cluster, @@ -8158,7 +8436,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, } else { LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, LeftMBB); - WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot}); + WorkList.push_back( + {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } @@ -8173,14 +8452,15 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, } else { RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, RightMBB); - WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT}); + WorkList.push_back( + {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } // Create the CaseBlock record that will be used to lower the branch. CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, - LeftWeight, RightWeight); + LeftProb, RightProb); if (W.MBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -8196,9 +8476,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { for (auto I : SI.cases()) { MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; const ConstantInt *CaseVal = I.getCaseValue(); - uint32_t Weight = - BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0; - Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight)); + BranchProbability Prob = + BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex()) + : BranchProbability(1, SI.getNumCases() + 1); + Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob)); } MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; @@ -8274,7 +8555,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { SwitchWorkList WorkList; CaseClusterIt First = Clusters.begin(); CaseClusterIt Last = Clusters.end() - 1; - WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr}); + auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB); + WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); while (!WorkList.empty()) { SwitchWorkListItem W = WorkList.back(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 700675453fe7..49a3872d20c8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -17,6 +17,7 @@ #include "StatepointLowering.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -30,7 +31,6 @@ namespace llvm { class AddrSpaceCastInst; -class AliasAnalysis; class AllocaInst; class BasicBlock; class BitCastInst; @@ -154,39 +154,39 @@ private: unsigned JTCasesIndex; unsigned BTCasesIndex; }; - uint32_t Weight; + BranchProbability Prob; static CaseCluster range(const ConstantInt *Low, const ConstantInt *High, - MachineBasicBlock *MBB, uint32_t Weight) { + MachineBasicBlock *MBB, BranchProbability Prob) { CaseCluster C; C.Kind = CC_Range; C.Low = Low; C.High = High; C.MBB = MBB; - C.Weight = Weight; + C.Prob = Prob; return C; } static CaseCluster jumpTable(const ConstantInt *Low, const ConstantInt *High, unsigned JTCasesIndex, - uint32_t Weight) { + BranchProbability Prob) { CaseCluster C; C.Kind = CC_JumpTable; C.Low = Low; C.High = High; C.JTCasesIndex = JTCasesIndex; - C.Weight = Weight; + C.Prob = Prob; return C; } static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High, - unsigned BTCasesIndex, uint32_t Weight) { + unsigned BTCasesIndex, BranchProbability Prob) { CaseCluster C; C.Kind = CC_BitTests; C.Low = Low; C.High = High; C.BTCasesIndex = BTCasesIndex; - C.Weight = Weight; + C.Prob = Prob; return C; } }; @@ -198,13 +198,13 @@ private: uint64_t Mask; MachineBasicBlock* BB; unsigned Bits; - uint32_t ExtraWeight; + BranchProbability ExtraProb; CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits, - uint32_t Weight): - Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { } + BranchProbability Prob): + Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) { } - CaseBits() : Mask(0), BB(nullptr), Bits(0), ExtraWeight(0) {} + CaseBits() : Mask(0), BB(nullptr), Bits(0) {} }; typedef std::vector CaseBitsVector; @@ -217,13 +217,13 @@ private: /// blocks needed by multi-case switch statements. struct CaseBlock { CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs, - const Value *cmpmiddle, - MachineBasicBlock *truebb, MachineBasicBlock *falsebb, - MachineBasicBlock *me, - uint32_t trueweight = 0, uint32_t falseweight = 0) - : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), - TrueBB(truebb), FalseBB(falsebb), ThisBB(me), - TrueWeight(trueweight), FalseWeight(falseweight) { } + const Value *cmpmiddle, MachineBasicBlock *truebb, + MachineBasicBlock *falsebb, MachineBasicBlock *me, + BranchProbability trueprob = BranchProbability::getUnknown(), + BranchProbability falseprob = BranchProbability::getUnknown()) + : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), + TrueBB(truebb), FalseBB(falsebb), ThisBB(me), TrueProb(trueprob), + FalseProb(falseprob) {} // CC - the condition code to use for the case block's setcc node ISD::CondCode CC; @@ -239,8 +239,8 @@ private: // ThisBB - the block into which to emit the code for the setcc and branches MachineBasicBlock *ThisBB; - // TrueWeight/FalseWeight - branch weights. - uint32_t TrueWeight, FalseWeight; + // TrueProb/FalseProb - branch weights. + BranchProbability TrueProb, FalseProb; }; struct JumpTable { @@ -272,32 +272,35 @@ private: struct BitTestCase { BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr, - uint32_t Weight): - Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { } + BranchProbability Prob): + Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) { } uint64_t Mask; MachineBasicBlock *ThisBB; MachineBasicBlock *TargetBB; - uint32_t ExtraWeight; + BranchProbability ExtraProb; }; typedef SmallVector BitTestInfo; struct BitTestBlock { - BitTestBlock(APInt F, APInt R, const Value* SV, - unsigned Rg, MVT RgVT, bool E, - MachineBasicBlock* P, MachineBasicBlock* D, - BitTestInfo C): - First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), - Parent(P), Default(D), Cases(std::move(C)) { } + BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, + bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, + BitTestInfo C, BranchProbability Pr) + : First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), + ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)), + Prob(Pr) {} APInt First; APInt Range; const Value *SValue; unsigned Reg; MVT RegVT; bool Emitted; + bool ContiguousRange; MachineBasicBlock *Parent; MachineBasicBlock *Default; BitTestInfo Cases; + BranchProbability Prob; + BranchProbability DefaultProb; }; /// Minimum jump table density, in percent. @@ -339,6 +342,7 @@ private: CaseClusterIt LastCluster; const ConstantInt *GE; const ConstantInt *LT; + BranchProbability DefaultProb; }; typedef SmallVector SwitchWorkList; @@ -515,6 +519,7 @@ private: void resetPerFunctionState() { FailureMBB = nullptr; Guard = nullptr; + GuardReg = 0; } MachineBasicBlock *getParentMBB() { return ParentMBB; } @@ -592,10 +597,6 @@ public: /// FunctionLoweringInfo &FuncInfo; - /// OptLevel - What optimization level we're generating code for. - /// - CodeGenOpt::Level OptLevel; - /// GFI - Garbage collection metadata for the function. GCFunctionInfo *GFI; @@ -613,7 +614,7 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), - DAG(dag), FuncInfo(funcinfo), OptLevel(ol), + DAG(dag), FuncInfo(funcinfo), HasTailCall(false) { } @@ -692,19 +693,20 @@ public: void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB, unsigned Opc, - uint32_t TW, uint32_t FW); + MachineBasicBlock *SwitchBB, + Instruction::BinaryOps Opc, BranchProbability TW, + BranchProbability FW); void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - uint32_t TW, uint32_t FW); + BranchProbability TW, BranchProbability FW); bool ShouldEmitAsBranches(const std::vector &Cases); bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); void CopyToExportRegsIfNeeded(const Value *V); void ExportFromCurrentBlock(const Value *V); void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, - MachineBasicBlock *LandingPad = nullptr); + const BasicBlock *EHPadBB = nullptr); std::pair lowerCallOperands( ImmutableCallSite CS, @@ -712,7 +714,7 @@ public: unsigned NumArgs, SDValue Callee, Type *ReturnTy, - MachineBasicBlock *LandingPad = nullptr, + const BasicBlock *EHPadBB = nullptr, bool IsPatchPoint = false); /// UpdateSplitBlock - When an MBB was split during scheduling, update the @@ -722,11 +724,11 @@ public: // This function is responsible for the whole statepoint lowering process. // It uniformly handles invoke and call statepoints. void LowerStatepoint(ImmutableStatepoint Statepoint, - MachineBasicBlock *LandingPad = nullptr); + const BasicBlock *EHPadBB = nullptr); private: - std::pair lowerInvokable( - TargetLowering::CallLoweringInfo &CLI, - MachineBasicBlock *LandingPad); + std::pair + lowerInvokable(TargetLowering::CallLoweringInfo &CLI, + const BasicBlock *EHPadBB = nullptr); // Terminator instructions. void visitRet(const ReturnInst &I); @@ -734,11 +736,18 @@ private: void visitSwitch(const SwitchInst &I); void visitIndirectBr(const IndirectBrInst &I); void visitUnreachable(const UnreachableInst &I); + void visitCleanupRet(const CleanupReturnInst &I); + void visitCatchSwitch(const CatchSwitchInst &I); + void visitCatchRet(const CatchReturnInst &I); + void visitCatchPad(const CatchPadInst &I); + void visitCleanupPad(const CleanupPadInst &CPI); + + BranchProbability getEdgeProbability(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const; + void addSuccessorWithProb( + MachineBasicBlock *Src, MachineBasicBlock *Dst, + BranchProbability Prob = BranchProbability::getUnknown()); - uint32_t getEdgeWeight(const MachineBasicBlock *Src, - const MachineBasicBlock *Dst) const; - void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, - uint32_t Weight = 0); public: void visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB); @@ -748,7 +757,7 @@ public: void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); void visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, - uint32_t BranchWeightToNext, + BranchProbability BranchProbToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB); @@ -842,7 +851,7 @@ private: void visitVACopy(const CallInst &I); void visitStackmap(const CallInst &I); void visitPatchpoint(ImmutableCallSite CS, - MachineBasicBlock *LandingPad = nullptr); + const BasicBlock *EHPadBB = nullptr); // These three are implemented in StatepointLowering.cpp void visitStatepoint(const CallInst &I); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 5b9b18286fae..a1c6c4c1dd63 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -30,6 +31,11 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +static cl::opt +VerboseDAGDumping("dag-dump-verbose", cl::Hidden, + cl::desc("Display more information when dumping selection " + "DAG nodes.")); + std::string SDNode::getOperationName(const SelectionDAG *G) const { switch (getOpcode()) { default: @@ -102,6 +108,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; + case ISD::EH_SJLJ_SETUP_DISPATCH: return "EH_SJLJ_SETUP_DISPATCH"; case ISD::ConstantPool: return "ConstantPool"; case ISD::TargetIndex: return "TargetIndex"; case ISD::ExternalSymbol: return "ExternalSymbol"; @@ -145,6 +152,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FABS: return "fabs"; case ISD::FMINNUM: return "fminnum"; case ISD::FMAXNUM: return "fmaxnum"; + case ISD::FMINNAN: return "fminnan"; + case ISD::FMAXNAN: return "fmaxnan"; case ISD::FNEG: return "fneg"; case ISD::FSQRT: return "fsqrt"; case ISD::FSIN: return "fsin"; @@ -201,6 +210,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; + case ISD::SETCCE: return "setcce"; case ISD::SELECT: return "select"; case ISD::VSELECT: return "vselect"; case ISD::SELECT_CC: return "select_cc"; @@ -273,6 +283,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CALLSEQ_START: return "callseq_start"; case ISD::CALLSEQ_END: return "callseq_end"; + // EH instructions + case ISD::CATCHRET: return "catchret"; + case ISD::CLEANUPRET: return "cleanupret"; + // Other operators case ISD::LOAD: return "load"; case ISD::STORE: return "store"; @@ -295,15 +309,17 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::LIFETIME_END: return "lifetime.end"; case ISD::GC_TRANSITION_START: return "gc_transition.start"; case ISD::GC_TRANSITION_END: return "gc_transition.end"; + case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; // Bit manipulation + case ISD::BITREVERSE: return "bitreverse"; case ISD::BSWAP: return "bswap"; case ISD::CTPOP: return "ctpop"; case ISD::CTTZ: return "cttz"; case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; case ISD::CTLZ: return "ctlz"; case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; - + // Trampolines case ISD::INIT_TRAMPOLINE: return "init_trampoline"; case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; @@ -320,7 +336,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SETO: return "seto"; case ISD::SETUO: return "setuo"; - case ISD::SETUEQ: return "setue"; + case ISD::SETUEQ: return "setueq"; case ISD::SETUGT: return "setugt"; case ISD::SETUGE: return "setuge"; case ISD::SETULT: return "setult"; @@ -352,6 +368,16 @@ const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { } } +static Printable PrintNodeId(const SDNode &Node) { + return Printable([&Node](raw_ostream &OS) { +#ifndef NDEBUG + OS << 't' << Node.PersistentId; +#else + OS << (const void*)&Node; +#endif + }); +} + void SDNode::dump() const { dump(nullptr); } void SDNode::dump(const SelectionDAG *G) const { print(dbgs(), G); @@ -359,8 +385,6 @@ void SDNode::dump(const SelectionDAG *G) const { } void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { - OS << (const void*)this << ": "; - for (unsigned i = 0, e = getNumValues(); i != e; ++i) { if (i) OS << ","; if (getValueType(i) == MVT::Other) @@ -368,7 +392,6 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { else OS << getValueType(i).getEVTString(); } - OS << " = " << getOperationName(G); } void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { @@ -523,48 +546,58 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { << ']'; } - if (unsigned Order = getIROrder()) - OS << " [ORD=" << Order << ']'; + if (VerboseDAGDumping) { + if (unsigned Order = getIROrder()) + OS << " [ORD=" << Order << ']'; - if (getNodeId() != -1) - OS << " [ID=" << getNodeId() << ']'; + if (getNodeId() != -1) + OS << " [ID=" << getNodeId() << ']'; - if (!G) - return; + if (!G) + return; - DILocation *L = getDebugLoc(); - if (!L) - return; + DILocation *L = getDebugLoc(); + if (!L) + return; - if (auto *Scope = L->getScope()) - OS << Scope->getFilename(); - else - OS << ""; - OS << ':' << L->getLine(); - if (unsigned C = L->getColumn()) - OS << ':' << C; + if (auto *Scope = L->getScope()) + OS << Scope->getFilename(); + else + OS << ""; + OS << ':' << L->getLine(); + if (unsigned C = L->getColumn()) + OS << ':' << C; + } +} + +/// Return true if this node is so simple that we should just print it inline +/// if it appears as an operand. +static bool shouldPrintInline(const SDNode &Node) { + if (Node.getOpcode() == ISD::EntryToken) + return false; + return Node.getNumOperands() == 0; } static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { - for (const SDValue &Op : N->op_values()) + for (const SDValue &Op : N->op_values()) { + if (shouldPrintInline(*Op.getNode())) + continue; if (Op.getNode()->hasOneUse()) DumpNodes(Op.getNode(), indent+2, G); - else - dbgs() << "\n" << std::string(indent+2, ' ') - << (void*)Op.getNode() << ": "; + } - dbgs() << '\n'; dbgs().indent(indent); N->dump(G); } void SelectionDAG::dump() const { - dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; + dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n"; for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) { - const SDNode *N = I; - if (!N->hasOneUse() && N != getRoot().getNode()) + const SDNode *N = &*I; + if (!N->hasOneUse() && N != getRoot().getNode() && + (!shouldPrintInline(*N) || N->use_empty())) DumpNodes(N, 2, this); } @@ -573,10 +606,30 @@ void SelectionDAG::dump() const { } void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { + OS << PrintNodeId(*this) << ": "; print_types(OS, G); + OS << " = " << getOperationName(G); print_details(OS, G); } +static bool printOperand(raw_ostream &OS, const SelectionDAG *G, + const SDValue Value) { + if (!Value.getNode()) { + OS << ""; + return false; + } else if (shouldPrintInline(*Value.getNode())) { + OS << Value->getOperationName(G) << ':'; + Value->print_types(OS, G); + Value->print_details(OS, G); + return true; + } else { + OS << PrintNodeId(*Value.getNode()); + if (unsigned RN = Value.getResNo()) + OS << ':' << RN; + return false; + } +} + typedef SmallPtrSet VisitedSDNodeSet; static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, const SelectionDAG *G, VisitedSDNodeSet &once) { @@ -589,20 +642,13 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, // Having printed this SDNode, walk the children: for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - const SDNode *child = N->getOperand(i).getNode(); - if (i) OS << ","; OS << " "; - if (child->getNumOperands() == 0) { - // This child has no grandchildren; print it inline right here. - child->printr(OS, G); - once.insert(child); - } else { // Just the address. FIXME: also print the child's opcode. - OS << (const void*)child; - if (unsigned RN = N->getOperand(i).getResNo()) - OS << ":" << RN; - } + const SDValue Op = N->getOperand(i); + bool printedInline = printOperand(OS, G, Op); + if (printedInline) + once.insert(Op.getNode()); } OS << "\n"; @@ -664,12 +710,9 @@ void SDNode::dumprFull(const SelectionDAG *G) const { } void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { - print_types(OS, G); + printr(OS, G); for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { if (i) OS << ", "; else OS << " "; - OS << (void*)getOperand(i).getNode(); - if (unsigned RN = getOperand(i).getResNo()) - OS << ":" << RN; + printOperand(OS, G, getOperand(i)); } - print_details(OS, G); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 97ece8b9248a..853a21a15eb9 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" @@ -263,13 +264,17 @@ namespace llvm { return; IS.OptLevel = NewOptLevel; IS.TM.setOptLevel(NewOptLevel); - SavedFastISel = IS.TM.Options.EnableFastISel; - if (NewOptLevel == CodeGenOpt::None) - IS.TM.setFastISel(true); DEBUG(dbgs() << "\nChanging optimization level for Function " << IS.MF->getFunction()->getName() << "\n"); DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O" << NewOptLevel << "\n"); + SavedFastISel = IS.TM.Options.EnableFastISel; + if (NewOptLevel == CodeGenOpt::None) { + IS.TM.setFastISel(IS.TM.getO0WantsFastISel()); + DEBUG(dbgs() << "\tFastISel is " + << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled") + << "\n"); + } } ~OptLevelChanger() { @@ -293,6 +298,11 @@ namespace llvm { const TargetLowering *TLI = IS->TLI; const TargetSubtargetInfo &ST = IS->MF->getSubtarget(); + // Try first to see if the Target has its own way of selecting a scheduler + if (auto *SchedulerCtor = ST.getDAGScheduler(OptLevel)) { + return SchedulerCtor(IS, OptLevel); + } + if (OptLevel == CodeGenOpt::None || (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) || TLI->getSchedulingPreference() == Sched::Source) @@ -350,8 +360,9 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, OptLevel(OL), DAGSize(0) { initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); - initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); - initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); + initializeBranchProbabilityInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); + initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); initializeTargetLibraryInfoWrapperPassPass( *PassRegistry::getPassRegistry()); } @@ -363,13 +374,12 @@ SelectionDAGISel::~SelectionDAGISel() { } void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); if (UseMBPI && OptLevel != CodeGenOpt::None) - AU.addRequired(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -380,10 +390,10 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { /// /// This is required for correctness, so it must be done at -O0. /// -static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) { +static void SplitCriticalSideEffectEdges(Function &Fn) { // Loop for blocks with phi nodes. - for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { - PHINode *PN = dyn_cast(BB->begin()); + for (BasicBlock &BB : Fn) { + PHINode *PN = dyn_cast(BB.begin()); if (!PN) continue; ReprocessBlock: @@ -391,7 +401,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) { // are potentially trapping constant expressions. Constant expressions are // the only potentially trapping value that can occur as the argument to a // PHI. - for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast(I)); ++I) + for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast(I)); ++I) for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantExpr *CE = dyn_cast(PN->getIncomingValue(i)); if (!CE || !CE->canTrap()) continue; @@ -405,8 +415,8 @@ static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) { // Okay, we have to split this edge. SplitCriticalEdge( - Pred->getTerminator(), GetSuccessorNumber(Pred, BB), - CriticalEdgeSplittingOptions(AA).setMergeIdenticalEdges()); + Pred->getTerminator(), GetSuccessorNumber(Pred, &BB), + CriticalEdgeSplittingOptions().setMergeIdenticalEdges()); goto ReprocessBlock; } } @@ -437,19 +447,19 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); LibInfo = &getAnalysis().getTLI(); GFI = Fn.hasGC() ? &getAnalysis().getFunctionInfo(Fn) : nullptr; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); - SplitCriticalSideEffectEdges(const_cast(Fn), AA); + SplitCriticalSideEffectEdges(const_cast(Fn)); CurDAG->init(*MF); FuncInfo->set(Fn, *MF, CurDAG); if (UseMBPI && OptLevel != CodeGenOpt::None) - FuncInfo->BPI = &getAnalysis(); + FuncInfo->BPI = &getAnalysis().getBPI(); else FuncInfo->BPI = nullptr; @@ -457,15 +467,50 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MF->setHasInlineAsm(false); + FuncInfo->SplitCSR = false; + SmallVector Returns; + + // We split CSR if the target supports it for the given function + // and the function has only return exits. + if (TLI->supportSplitCSR(MF)) { + FuncInfo->SplitCSR = true; + + // Collect all the return blocks. + for (const BasicBlock &BB : Fn) { + if (!succ_empty(&BB)) + continue; + + const TerminatorInst *Term = BB.getTerminator(); + if (isa(Term)) + continue; + if (isa(Term)) { + Returns.push_back(FuncInfo->MBBMap[&BB]); + continue; + } + + // Bail out if the exit block is not Return nor Unreachable. + FuncInfo->SplitCSR = false; + break; + } + } + + MachineBasicBlock *EntryMBB = &MF->front(); + if (FuncInfo->SplitCSR) + // This performs initialization so lowering for SplitCSR will be correct. + TLI->initializeSplitCSR(EntryMBB); + SelectAllBasicBlocks(Fn); // If the first basic block in the function has live ins that need to be // copied into vregs, emit the copies into the top of the block before // emitting the code for the block. - MachineBasicBlock *EntryMBB = MF->begin(); const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII); + // Insert copies in the entry block and the return blocks. + if (FuncInfo->SplitCSR) + TLI->insertCopiesSplitCSR(EntryMBB, Returns); + DenseMap LiveInMap; if (!FuncInfo->ArgDbgValues.empty()) for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), @@ -882,7 +927,7 @@ void SelectionDAGISel::DoInstructionSelection() { // graph) and preceding back toward the beginning (the entry // node). while (ISelPosition != CurDAG->allnodes_begin()) { - SDNode *Node = --ISelPosition; + SDNode *Node = &*--ISelPosition; // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes, // but there are currently some corner cases that it misses. Also, this // makes it theoretically possible to disable the DAGCombiner. @@ -916,14 +961,47 @@ void SelectionDAGISel::DoInstructionSelection() { PostprocessISelDAG(); } +static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) { + for (const User *U : CPI->users()) { + if (const IntrinsicInst *EHPtrCall = dyn_cast(U)) { + Intrinsic::ID IID = EHPtrCall->getIntrinsicID(); + if (IID == Intrinsic::eh_exceptionpointer || + IID == Intrinsic::eh_exceptioncode) + return true; + } + } + return false; +} + /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and /// do other setup for EH landing-pad blocks. bool SelectionDAGISel::PrepareEHLandingPad() { MachineBasicBlock *MBB = FuncInfo->MBB; - + const Constant *PersonalityFn = FuncInfo->Fn->getPersonalityFn(); + const BasicBlock *LLVMBB = MBB->getBasicBlock(); const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout())); + // Catchpads have one live-in register, which typically holds the exception + // pointer or code. + if (const auto *CPI = dyn_cast(LLVMBB->getFirstNonPHI())) { + if (hasExceptionPointerOrCodeUser(CPI)) { + // Get or create the virtual register to hold the pointer or code. Mark + // the live in physreg and copy into the vreg. + MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn); + assert(EHPhysReg && "target lacks exception pointer register"); + MBB->addLiveIn(EHPhysReg); + unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC); + BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), + TII->get(TargetOpcode::COPY), VReg) + .addReg(EHPhysReg, RegState::Kill); + } + return true; + } + + if (!LLVMBB->isLandingPad()) + return true; + // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. MCSymbol *Label = MF->getMMI().addLandingPad(MBB); @@ -935,52 +1013,12 @@ bool SelectionDAGISel::PrepareEHLandingPad() { BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); - // If this is an MSVC-style personality function, we need to split the landing - // pad into several BBs. - const BasicBlock *LLVMBB = MBB->getBasicBlock(); - const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst(); - MF->getMMI().addPersonality(MBB, cast(LPadInst->getParent() - ->getParent() - ->getPersonalityFn() - ->stripPointerCasts())); - EHPersonality Personality = MF->getMMI().getPersonalityType(); - - if (isMSVCEHPersonality(Personality)) { - SmallVector ClauseBBs; - const IntrinsicInst *ActionsCall = - dyn_cast(LLVMBB->getFirstInsertionPt()); - // Get all invoke BBs that unwind to this landingpad. - SmallVector InvokeBBs(MBB->pred_begin(), - MBB->pred_end()); - if (ActionsCall && ActionsCall->getIntrinsicID() == Intrinsic::eh_actions) { - // If this is a call to llvm.eh.actions followed by indirectbr, then we've - // run WinEHPrepare, and we should remove this block from the machine CFG. - // Mark the targets of the indirectbr as landingpads instead. - for (const BasicBlock *LLVMSucc : successors(LLVMBB)) { - MachineBasicBlock *ClauseBB = FuncInfo->MBBMap[LLVMSucc]; - // Add the edge from the invoke to the clause. - for (MachineBasicBlock *InvokeBB : InvokeBBs) - InvokeBB->addSuccessor(ClauseBB); - - // Mark the clause as a landing pad or MI passes will delete it. - ClauseBB->setIsLandingPad(); - } - } - - // Remove the edge from the invoke to the lpad. - for (MachineBasicBlock *InvokeBB : InvokeBBs) - InvokeBB->removeSuccessor(MBB); - - // Don't select instructions for the landingpad. - return false; - } - // Mark exception register as live in. - if (unsigned Reg = TLI->getExceptionPointerRegister()) + if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn)) FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); // Mark exception selector register as live in. - if (unsigned Reg = TLI->getExceptionSelectorRegister()) + if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn)) FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC); return true; @@ -992,9 +1030,9 @@ bool SelectionDAGISel::PrepareEHLandingPad() { static bool isFoldedOrDeadInstruction(const Instruction *I, FunctionLoweringInfo *FuncInfo) { return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. - !isa(I) && // Terminators aren't folded. + !isa(I) && // Terminators aren't folded. !isa(I) && // Debug instructions aren't folded. - !isa(I) && // Landingpad instructions aren't folded. + !I->isEHPad() && // EH pad instructions aren't folded. !FuncInfo->isExportedInst(I); // Exported instrs must be computed. } @@ -1143,17 +1181,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FuncInfo->VisitedBBs.insert(LLVMBB); } - BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI(); + BasicBlock::const_iterator const Begin = + LLVMBB->getFirstNonPHI()->getIterator(); BasicBlock::const_iterator const End = LLVMBB->end(); BasicBlock::const_iterator BI = End; FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; + if (!FuncInfo->MBB) + continue; // Some blocks like catchpads have no code or MBB. FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); // Setup an EH landing-pad block. FuncInfo->ExceptionPointerVirtReg = 0; FuncInfo->ExceptionSelectorVirtReg = 0; - if (LLVMBB->isLandingPad()) + if (LLVMBB->isEHPad()) if (!PrepareEHLandingPad()) continue; @@ -1192,7 +1233,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { unsigned NumFastIselRemaining = std::distance(Begin, End); // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { - const Instruction *Inst = std::prev(BI); + const Instruction *Inst = &*std::prev(BI); // If we no longer require this instruction, skip it. if (isFoldedOrDeadInstruction(Inst, FuncInfo)) { @@ -1212,8 +1253,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // then see if there is a load right before the selected instructions. // Try to fold the load if so. const Instruction *BeforeInst = Inst; - while (BeforeInst != Begin) { - BeforeInst = std::prev(BasicBlock::const_iterator(BeforeInst)); + while (BeforeInst != &*Begin) { + BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst)); if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) break; } @@ -1245,7 +1286,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // For the purpose of debugging, just abort. report_fatal_error("FastISel didn't select the entire block"); - if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) { + if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() && + !Inst->use_empty()) { unsigned &R = FuncInfo->ValueMap[Inst]; if (!R) R = FuncInfo->CreateRegs(Inst->getType()); @@ -1253,7 +1295,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { bool HadTailCall = false; MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt; - SelectBasicBlock(Inst, BI, HadTailCall); + SelectBasicBlock(Inst->getIterator(), BI, HadTailCall); // If the call was emitted as a tail call, we're done with the block. // We also need to delete any previously emitted instructions. @@ -1483,35 +1525,39 @@ SelectionDAGISel::FinishBasicBlock() { CodeGenAndEmitDAG(); } - uint32_t UnhandledWeight = 0; - for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) - UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight; - + BranchProbability UnhandledProb = SDB->BitTestCases[i].Prob; for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { - UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight; + UnhandledProb -= SDB->BitTestCases[i].Cases[j].ExtraProb; // Set the current basic block to the mbb we wish to insert the code into FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code - if (j+1 != ej) - SDB->visitBitTestCase(SDB->BitTestCases[i], - SDB->BitTestCases[i].Cases[j+1].ThisBB, - UnhandledWeight, - SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j], - FuncInfo->MBB); - else - SDB->visitBitTestCase(SDB->BitTestCases[i], - SDB->BitTestCases[i].Default, - UnhandledWeight, - SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j], - FuncInfo->MBB); + // If all cases cover a contiguous range, it is not necessary to jump to + // the default block after the last bit test fails. This is because the + // range check during bit test header creation has guaranteed that every + // case here doesn't go outside the range. + MachineBasicBlock *NextMBB; + if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) + NextMBB = SDB->BitTestCases[i].Cases[j + 1].TargetBB; + else if (j + 1 != ej) + NextMBB = SDB->BitTestCases[i].Cases[j + 1].ThisBB; + else + NextMBB = SDB->BitTestCases[i].Default; + + SDB->visitBitTestCase(SDB->BitTestCases[i], + NextMBB, + UnhandledProb, + SDB->BitTestCases[i].Reg, + SDB->BitTestCases[i].Cases[j], + FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); + + if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) + break; } // Update PHI Nodes @@ -1642,14 +1688,7 @@ SelectionDAGISel::FinishBasicBlock() { /// one preferred by the target. /// ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { - RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault(); - - if (!Ctor) { - Ctor = ISHeuristic; - RegisterScheduler::setDefault(Ctor); - } - - return Ctor(this, OptLevel); + return ISHeuristic(this, OptLevel); } //===----------------------------------------------------------------------===// @@ -1961,7 +2000,7 @@ SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { } /// GetVBR - decode a vbr encoding whose top bit is set. -LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { assert(Val >= 128 && "Not a VBR"); Val &= 127; // Remove first vbr bit. @@ -2287,7 +2326,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, } /// CheckSame - Implements OP_CheckSame. -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SmallVectorImpl > &RecordedNodes) { @@ -2298,7 +2337,7 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, } /// CheckChildSame - Implements OP_CheckChildXSame. -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SmallVectorImpl > &RecordedNodes, @@ -2310,20 +2349,20 @@ CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, } /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, const SelectionDAGISel &SDISel) { return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]); } /// CheckNodePredicate - Implements OP_CheckNodePredicate. -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, const SelectionDAGISel &SDISel, SDNode *N) { return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDNode *N) { uint16_t Opc = MatcherTable[MatcherIndex++]; @@ -2331,7 +2370,7 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, return N->getOpcode() == Opc; } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; @@ -2341,7 +2380,7 @@ CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL, unsigned ChildNo) { @@ -2351,14 +2390,14 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, DL); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { return cast(N)->get() == (ISD::CondCode)MatcherTable[MatcherIndex++]; } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; @@ -2369,7 +2408,7 @@ CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, return VT == MVT::iPTR && cast(N)->getVT() == TLI->getPointerTy(DL); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { int64_t Val = MatcherTable[MatcherIndex++]; @@ -2380,7 +2419,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, return C && C->getSExtValue() == Val; } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, unsigned ChildNo) { if (ChildNo >= N.getNumOperands()) @@ -2388,7 +2427,7 @@ CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo)); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; @@ -2401,7 +2440,7 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, return C && SDISel.CheckAndMask(N.getOperand(0), C, Val); } -LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 4df5ede388fc..2764688518c2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -80,9 +80,16 @@ namespace llvm { return true; } - static bool hasNodeAddressLabel(const SDNode *Node, - const SelectionDAG *Graph) { - return true; + static std::string getNodeIdentifierLabel(const SDNode *Node, + const SelectionDAG *Graph) { + std::string R; + raw_string_ostream OS(R); +#ifndef NDEBUG + OS << 't' << Node->PersistentId; +#else + OS << static_cast(Node); +#endif + return R; } /// If you want to override the dot attributes printed for a particular diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 34688df4765b..050ec2116c5d 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -95,6 +96,9 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType); const unsigned FI = cast(SpillSlot)->getIndex(); + auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo(); + MFI->markAsStatepointSpillSlotObjectIndex(FI); + Builder.FuncInfo.StatepointStackSlots.push_back(FI); AllocatedStackSlots.push_back(true); return SpillSlot; @@ -105,8 +109,8 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, return Builder.DAG.getFrameIndex(FI, ValueType); } // Note: We deliberately choose to advance this only on the failing path. - // Doing so on the suceeding path involes a bit of complexity that caused a - // minor bug previously. Unless performance shows this matters, please + // Doing so on the succeeding path involves a bit of complexity that caused + // a minor bug previously. Unless performance shows this matters, please // keep this code as simple as possible. NextSlotToAllocate++; } @@ -119,7 +123,7 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, static Optional findPreviousSpillSlot(const Value *Val, SelectionDAGBuilder &Builder, int LookUpDepth) { - // Can not look any futher - give up now + // Can not look any further - give up now if (LookUpDepth <= 0) return Optional(); @@ -196,7 +200,7 @@ static Optional findPreviousSpillSlot(const Value *Val, /// Try to find existing copies of the incoming values in stack slots used for /// statepoint spilling. If we can find a spill slot for the incoming value, /// mark that slot as allocated, and reuse the same slot for this safepoint. -/// This helps to avoid series of loads and stores that only serve to resuffle +/// This helps to avoid series of loads and stores that only serve to reshuffle /// values on the stack between calls. static void reservePreviousStackSlotForValue(const Value *IncomingValue, SelectionDAGBuilder &Builder) { @@ -255,7 +259,7 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl &Bases, SmallVectorImpl &Relocs, SelectionDAGBuilder &Builder) { - // This is horribly ineffecient, but I don't care right now + // This is horribly inefficient, but I don't care right now SmallSet Seen; SmallVector NewBases, NewPtrs, NewRelocs; @@ -283,13 +287,29 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl &Bases, /// call node. Also update NodeMap so that getValue(statepoint) will /// reference lowered call result static SDNode * -lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, +lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB, SelectionDAGBuilder &Builder, SmallVectorImpl &PendingExports) { ImmutableCallSite CS(ISP.getCallSite()); - SDValue ActualCallee = Builder.getValue(ISP.getCalledValue()); + SDValue ActualCallee; + + if (ISP.getNumPatchBytes() > 0) { + // If we've been asked to emit a nop sequence instead of a call instruction + // for this statepoint then don't lower the call target, but use a constant + // `null` instead. Not lowering the call target lets statepoint clients get + // away without providing a physical address for the symbolic call target at + // link time. + + const auto &TLI = Builder.DAG.getTargetLoweringInfo(); + const auto &DL = Builder.DAG.getDataLayout(); + + unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace(); + ActualCallee = Builder.DAG.getConstant(0, Builder.getCurSDLoc(), + TLI.getPointerTy(DL, AS)); + } else + ActualCallee = Builder.getValue(ISP.getCalledValue()); assert(CS.getCallingConv() != CallingConv::AnyReg && "anyregcc is not supported on statepoints!"); @@ -300,7 +320,7 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, SDValue ReturnValue, CallEndVal; std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands( ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos, - ISP.getNumCallArgs(), ActualCallee, DefTy, LandingPad, + ISP.getNumCallArgs(), ActualCallee, DefTy, EHPadBB, false /* IsPatchPoint */); SDNode *CallEnd = CallEndVal.getNode(); @@ -317,25 +337,33 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, // ch, glue = callseq_end ch, glue // get_return_value ch, glue // - // get_return_value can either be a CopyFromReg to grab the return value from - // %RAX, or it can be a LOAD to load a value returned by reference via a stack - // slot. + // get_return_value can either be a sequence of CopyFromReg instructions + // to grab the return value from the return register(s), or it can be a LOAD + // to load a value returned by reference via a stack slot. - if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg || - CallEnd->getOpcode() == ISD::LOAD)) - CallEnd = CallEnd->getOperand(0).getNode(); + if (HasDef) { + if (CallEnd->getOpcode() == ISD::LOAD) + CallEnd = CallEnd->getOperand(0).getNode(); + else + while (CallEnd->getOpcode() == ISD::CopyFromReg) + CallEnd = CallEnd->getOperand(0).getNode(); + } assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!"); - if (HasDef) { - if (CS.isInvoke()) { - // Result value will be used in different basic block for invokes - // so we need to export it now. But statepoint call has a different type - // than the actuall call. It means that standart exporting mechanism will - // create register of the wrong type. So instead we need to create - // register with correct type and save value into it manually. + // Export the result value if needed + const Instruction *GCResult = ISP.getGCResult(); + if (HasDef && GCResult) { + if (GCResult->getParent() != CS.getParent()) { + // Result value will be used in a different basic block so we need to + // export it now. + // Default exporting mechanism will not work here because statepoint call + // has a different type than the actual call. It means that by default + // llvm will create export register of the wrong type (always i32 in our + // case). So instead we need to create export register with correct type + // manually. // TODO: To eliminate this problem we can remove gc.result intrinsics - // completelly and make statepoint call to return a tuple. + // completely and make statepoint call to return a tuple. unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType()); RegsForValue RFV( *Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(), @@ -347,8 +375,9 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, PendingExports.push_back(Chain); Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg; } else { - // The value of the statepoint itself will be the value of call itself. - // We'll replace the actually call node shortly. gc_result will grab + // Result value will be used in a same basic block. Don't export it or + // perform any explicit register copies. + // We'll replace the actuall call node shortly. gc_result will grab // this value. Builder.setValue(CS.getInstruction(), ReturnValue); } @@ -411,7 +440,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // chaining stores one after another, this may allow // a bit more optimal scheduling for them Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, - MachinePointerInfo::getFixedStack(Index), + MachinePointerInfo::getFixedStack( + Builder.DAG.getMachineFunction(), Index), false, false, 0); Builder.StatepointLowering.setLocation(Incoming, Loc); @@ -483,21 +513,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl &Ops, // to the GCStrategy from there (yet). GCStrategy &S = Builder.GFI->getStrategy(); for (const Value *V : Bases) { - auto Opt = S.isGCManagedPointer(V); + auto Opt = S.isGCManagedPointer(V->getType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed base pointer found in statepoint"); } } for (const Value *V : Ptrs) { - auto Opt = S.isGCManagedPointer(V); + auto Opt = S.isGCManagedPointer(V->getType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed derived pointer found in statepoint"); } } for (const Value *V : Relocations) { - auto Opt = S.isGCManagedPointer(V); + auto Opt = S.isGCManagedPointer(V->getType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed pointer relocated"); } @@ -581,19 +611,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl &Ops, SpillMap[V] = cast(Loc)->getIndex(); } else { // Record value as visited, but not spilled. This is case for allocas - // and constants. For this values we can avoid emiting spill load while + // and constants. For this values we can avoid emitting spill load while // visiting corresponding gc_relocate. // Actually we do not need to record them in this map at all. - // We do this only to check that we are not relocating any unvisited value. + // We do this only to check that we are not relocating any unvisited + // value. SpillMap[V] = None; // Default llvm mechanisms for exporting values which are used in // different basic blocks does not work for gc relocates. // Note that it would be incorrect to teach llvm that all relocates are - // uses of the corresponging values so that it would automatically + // uses of the corresponding values so that it would automatically // export them. Relocates of the spilled values does not use original // value. - if (StatepointSite.getCallSite().isInvoke()) + if (RelocateOpers.getUnderlyingCallSite().getParent() != + StatepointInstr->getParent()) Builder.ExportFromCurrentBlock(V); } } @@ -608,7 +640,7 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { } void SelectionDAGBuilder::LowerStatepoint( - ImmutableStatepoint ISP, MachineBasicBlock *LandingPad /*=nullptr*/) { + ImmutableStatepoint ISP, const BasicBlock *EHPadBB /*= nullptr*/) { // The basic scheme here is that information about both the original call and // the safepoint is encoded in the CallInst. We create a temporary call and // lower it, then reverse engineer the calling sequence. @@ -620,14 +652,12 @@ void SelectionDAGBuilder::LowerStatepoint( ImmutableCallSite CS(ISP.getCallSite()); #ifndef NDEBUG - // Consistency check. Don't do this for invokes. It would be too - // expensive to preserve this information across different basic blocks - if (!CS.isInvoke()) { - for (const User *U : CS->users()) { - const CallInst *Call = cast(U); - if (isGCRelocate(Call)) - StatepointLowering.scheduleRelocCall(*Call); - } + // Consistency check. Check only relocates in the same basic block as thier + // statepoint. + for (const User *U : CS->users()) { + const CallInst *Call = cast(U); + if (isGCRelocate(Call) && Call->getParent() == CS.getParent()) + StatepointLowering.scheduleRelocCall(*Call); } #endif @@ -648,7 +678,7 @@ void SelectionDAGBuilder::LowerStatepoint( // Get call node, we will replace it later with statepoint SDNode *CallNode = - lowerCallFromStatepoint(ISP, LandingPad, *this, PendingExports); + lowerCallFromStatepoint(ISP, EHPadBB, *this, PendingExports); // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END // nodes with all the appropriate arguments and return values. @@ -790,7 +820,7 @@ void SelectionDAGBuilder::LowerStatepoint( // Replace original call DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root - // Remove originall call node + // Remove original call node DAG.DeleteNode(CallNode); // DON'T set the root - under the assumption that it's already set past the @@ -809,8 +839,9 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) { Instruction *I = cast(CI.getArgOperand(0)); assert(isStatepoint(I) && "first argument must be a statepoint token"); - if (isa(I)) { - // For invokes we should have stored call result in a virtual register. + if (I->getParent() != CI.getParent()) { + // Statepoint is in different basic block so we should have stored call + // result in a virtual register. // We can not use default getValue() functionality to copy value from this // register because statepoint and actuall call return types can be // different, and getValue() will use CopyFromReg of the wrong type, @@ -833,9 +864,10 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { #ifndef NDEBUG // Consistency check - // We skip this check for invoke statepoints. It would be too expensive to - // preserve validation info through different basic blocks. - if (!RelocateOpers.isTiedToInvoke()) { + // We skip this check for relocates not in the same basic block as thier + // statepoint. It would be too expensive to preserve validation info through + // different basic blocks. + if (RelocateOpers.getStatepoint()->getParent() == CI.getParent()) { StatepointLowering.relocCallVisited(CI); } #endif @@ -862,13 +894,14 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { // Be conservative: flush all pending loads // TODO: Probably we can be less restrictive on this, - // it may allow more scheduling opprtunities + // it may allow more scheduling opportunities. SDValue Chain = getRoot(); SDValue SpillLoad = - DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, - MachinePointerInfo::getFixedStack(*DerivedPtrLocation), - false, false, false, 0); + DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), + *DerivedPtrLocation), + false, false, false, 0); // Again, be conservative, don't emit pending loads DAG.setRoot(SpillLoad.getValue(1)); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index fbf651277c7f..c64d882d69a4 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -85,21 +85,22 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, std::pair TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, - const SDValue *Ops, unsigned NumOps, + ArrayRef Ops, bool isSigned, SDLoc dl, bool doesNotReturn, bool isReturnValueUsed) const { TargetLowering::ArgListTy Args; - Args.reserve(NumOps); + Args.reserve(Ops.size()); TargetLowering::ArgListEntry Entry; - for (unsigned i = 0; i != NumOps; ++i) { - Entry.Node = Ops[i]; + for (SDValue Op : Ops) { + Entry.Node = Op; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned); - Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned); + Entry.isSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); + Entry.isZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); Args.push_back(Entry); } + if (LC == RTLIB::UNKNOWN_LIBCALL) report_fatal_error("Unsupported library call operation!"); SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), @@ -115,9 +116,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, return LowerCallTo(CLI); } - -/// SoftenSetCCOperands - Soften the operands of a comparison. This code is -/// shared among BR_CC, SELECT_CC, and SETCC handlers. +/// Soften the operands of a comparison. This code is shared among BR_CC, +/// SELECT_CC, and SETCC handlers. void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, @@ -127,6 +127,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, // Expand into one or more soft-fp libcall(s). RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; + bool ShouldInvertCC = false; switch (CCCode) { case ISD::SETEQ: case ISD::SETOEQ: @@ -166,34 +167,38 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128; break; - default: + case ISD::SETONE: + // SETONE = SETOLT | SETOGT + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + break; + case ISD::SETUEQ: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : + (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + break; + default: + // Invert CC for unordered comparisons + ShouldInvertCC = true; switch (CCCode) { - case ISD::SETONE: - // SETONE = SETOLT | SETOGT - LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : - (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; - // Fallthrough - case ISD::SETUGT: - LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : - (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; - break; - case ISD::SETUGE: - LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : + case ISD::SETULT: + LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; break; - case ISD::SETULT: - LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : - (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; - break; case ISD::SETULE: - LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : + LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + break; + case ISD::SETUGT: + LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; break; - case ISD::SETUEQ: - LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : - (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + case ISD::SETUGE: + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; break; default: llvm_unreachable("Do not know how to soften this setcc!"); } @@ -201,17 +206,21 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, // Use the target specific return value for comparions lib calls. EVT RetVT = getCmpLibcallReturnType(); - SDValue Ops[2] = { NewLHS, NewRHS }; - NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, + SDValue Ops[2] = {NewLHS, NewRHS}; + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/, dl).first; NewRHS = DAG.getConstant(0, dl, RetVT); + CCCode = getCmpLibcallCC(LC1); + if (ShouldInvertCC) + CCCode = getSetCCInverse(CCCode, /*isInteger=*/true); + if (LC2 != RTLIB::UNKNOWN_LIBCALL) { SDValue Tmp = DAG.getNode( ISD::SETCC, dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/, dl).first; NewLHS = DAG.getNode( ISD::SETCC, dl, @@ -222,9 +231,8 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, } } -/// getJumpTableEncoding - Return the entry encoding for a jump table in the -/// current function. The returned value is a member of the -/// MachineJumpTableInfo::JTEntryKind enum. +/// Return the entry encoding for a jump table in the current function. The +/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. unsigned TargetLowering::getJumpTableEncoding() const { // In non-pic modes, just use the address of a block. if (getTargetMachine().getRelocationModel() != Reloc::PIC_) @@ -250,9 +258,8 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, return Table; } -/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the -/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an -/// MCExpr. +/// This returns the relocation base for the given PIC jumptable, the same as +/// getPICJumpTableRelocBase, but as an MCExpr. const MCExpr * TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,MCContext &Ctx) const{ @@ -279,10 +286,9 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // Optimization Methods //===----------------------------------------------------------------------===// -/// ShrinkDemandedConstant - Check to see if the specified operand of the -/// specified instruction is a constant integer. If so, check to see if there -/// are any bits set in the constant that are not demanded. If so, shrink the -/// constant and return true. +/// Check to see if the specified operand of the specified instruction is a +/// constant integer. If so, check to see if there are any bits set in the +/// constant that are not demanded. If so, shrink the constant and return true. bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded) { SDLoc dl(Op); @@ -317,10 +323,9 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, return false; } -/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the -/// casts are free. This uses isZExtFree and ZERO_EXTEND for the widening -/// cast, but it could be generalized for targets with other types of -/// implicit widening casts. +/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. +/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be +/// generalized for targets with other types of implicit widening casts. bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, @@ -366,13 +371,13 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, return false; } -/// SimplifyDemandedBits - Look at Op. At this point, we know that only the -/// DemandedMask bits of the result of Op are ever used downstream. If we can -/// use this information to simplify Op, create a new simplified DAG node and -/// return true, returning the original and new nodes in Old and New. Otherwise, -/// analyze the expression and return a mask of KnownOne and KnownZero bits for -/// the expression (used to simplify the caller). The KnownZero/One bits may -/// only be accurate for those bits in the DemandedMask. +/// Look at Op. At this point, we know that only the DemandedMask bits of the +/// result of Op are ever used downstream. If we can use this information to +/// simplify Op, create a new simplified DAG node and return true, returning the +/// original and new nodes in Old and New. Otherwise, analyze the expression and +/// return a mask of KnownOne and KnownZero bits for the expression (used to +/// simplify the caller). The KnownZero/One bits may only be accurate for those +/// bits in the DemandedMask. bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, APInt &KnownZero, @@ -1061,7 +1066,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); - if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) { + if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() && + Op.getOperand(0).getValueType() != MVT::f128) { + // Cannot eliminate/lower SHL for f128 yet. EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. @@ -1120,9 +1127,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return false; } -/// computeKnownBitsForTargetNode - Determine which of the bits specified -/// in Mask are known to be either zero or one and return them in the -/// KnownZero/KnownOne bitsets. +/// Determine which of the bits specified in Mask are known to be either zero or +/// one and return them in the KnownZero/KnownOne bitsets. void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, @@ -1137,9 +1143,8 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); } -/// ComputeNumSignBitsForTargetNode - This method can be implemented by -/// targets that want to expose additional information about sign bits to the -/// DAG Combiner. +/// This method can be implemented by targets that want to expose additional +/// information about sign bits to the DAG Combiner. unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &, unsigned Depth) const { @@ -1152,10 +1157,8 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, return 1; } -/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly -/// one bit set. This differs from computeKnownBits in that it doesn't need to -/// determine which bit is set. -/// +/// Test if the given value is known to have exactly one bit set. This differs +/// from computeKnownBits in that it doesn't need to determine which bit is set. static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { // A left-shift of a constant one will have exactly one bit set, because // shifting the bit off the end is undefined. @@ -1239,8 +1242,8 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const { return CN->isNullValue(); } -/// SimplifySetCC - Try to simplify a setcc built with the specified operands -/// and cc. If it is unable to simplify it, return a null SDValue. +/// Try to simplify a setcc built with the specified operands and cc. If it is +/// unable to simplify it, return a null SDValue. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, @@ -1270,7 +1273,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); - if (ConstantSDNode *N1C = dyn_cast(N1.getNode())) { + if (auto *N1C = dyn_cast(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an @@ -1335,7 +1338,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, PreExt = N0->getOperand(0); } else if (N0->getOpcode() == ISD::AND) { // DAGCombine turns costly ZExts into ANDs - if (ConstantSDNode *C = dyn_cast(N0->getOperand(1))) + if (auto *C = dyn_cast(N0->getOperand(1))) if ((C->getAPIntValue()+1).isPowerOf2()) { MinBits = C->getAPIntValue().countTrailingOnes(); PreExt = N0->getOperand(0); @@ -1345,7 +1348,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, MinBits = N0->getOperand(0).getValueSizeInBits(); PreExt = N0->getOperand(0); Signed = true; - } else if (LoadSDNode *LN0 = dyn_cast(N0)) { + } else if (auto *LN0 = dyn_cast(N0)) { // ZEXTLOAD / SEXTLOAD if (LN0->getExtensionType() == ISD::ZEXTLOAD) { MinBits = LN0->getMemoryVT().getSizeInBits(); @@ -1697,8 +1700,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) && N0.getOpcode() == ISD::AND) { auto &DL = DAG.getDataLayout(); - if (ConstantSDNode *AndRHS = - dyn_cast(N0.getOperand(1))) { + if (auto *AndRHS = dyn_cast(N0.getOperand(1))) { EVT ShiftTy = DCI.isBeforeLegalize() ? getPointerTy(DL) : getShiftAmountTy(N0.getValueType(), DL); @@ -1728,8 +1730,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // (X & -256) == 256 -> (X >> 8) == 1 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && N0.getOpcode() == ISD::AND && N0.hasOneUse()) { - if (ConstantSDNode *AndRHS = - dyn_cast(N0.getOperand(1))) { + if (auto *AndRHS = dyn_cast(N0.getOperand(1))) { const APInt &AndRHSC = AndRHS->getAPIntValue(); if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); @@ -1783,7 +1784,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Constant fold or commute setcc. SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl); if (O.getNode()) return O; - } else if (ConstantFPSDNode *CFP = dyn_cast(N1.getNode())) { + } else if (auto *CFP = dyn_cast(N1.getNode())) { // If the RHS of an FP comparison is a constant, simplify it away in // some cases. if (CFP->getValueAPF().isNaN()) { @@ -1900,8 +1901,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // to be careful about increasing register pressure needlessly. bool LegalRHSImm = false; - if (ConstantSDNode *RHSC = dyn_cast(N1)) { - if (ConstantSDNode *LHSR = dyn_cast(N0.getOperand(1))) { + if (auto *RHSC = dyn_cast(N1)) { + if (auto *LHSR = dyn_cast(N0.getOperand(1))) { // Turn (X+C1) == C2 --> X == C2-C1 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) { return DAG.getSetCC(dl, VT, N0.getOperand(0), @@ -1924,7 +1925,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } // Turn (C1-X) == C2 --> X == C1-C2 - if (ConstantSDNode *SUBC = dyn_cast(N0.getOperand(0))) { + if (auto *SUBC = dyn_cast(N0.getOperand(0))) { if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { return DAG.getSetCC(dl, VT, N0.getOperand(1), @@ -2075,12 +2076,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return SDValue(); } -/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the -/// node is a GlobalAddress + offset. +/// Returns true (and the GlobalValue and the offset) if the node is a +/// GlobalAddress + offset. bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const { - if (isa(N)) { - GlobalAddressSDNode *GASD = cast(N); + if (auto *GASD = dyn_cast(N)) { GA = GASD->getGlobal(); Offset += GASD->getOffset(); return true; @@ -2090,14 +2090,12 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, SDValue N1 = N->getOperand(0); SDValue N2 = N->getOperand(1); if (isGAPlusOffset(N1.getNode(), GA, Offset)) { - ConstantSDNode *V = dyn_cast(N2); - if (V) { + if (auto *V = dyn_cast(N2)) { Offset += V->getSExtValue(); return true; } } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) { - ConstantSDNode *V = dyn_cast(N1); - if (V) { + if (auto *V = dyn_cast(N1)) { Offset += V->getSExtValue(); return true; } @@ -2107,9 +2105,8 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, return false; } - -SDValue TargetLowering:: -PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { +SDValue TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { // Default implementation: no optimization. return SDValue(); } @@ -2159,9 +2156,9 @@ TargetLowering::getConstraintType(StringRef Constraint) const { return C_Unknown; } -/// LowerXConstraint - try to replace an X constraint, which matches anything, -/// with another that has more specific requirements based on the type of the -/// corresponding operand. +/// Try to replace an X constraint, which matches anything, with another that +/// has more specific requirements based on the type of the corresponding +/// operand. const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ if (ConstraintVT.isInteger()) return "r"; @@ -2170,8 +2167,8 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ return nullptr; } -/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops -/// vector. If it is invalid, don't add anything to Ops. +/// Lower the specified operand into the Ops vector. +/// If it is invalid, don't add anything to Ops. void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector &Ops, @@ -2284,31 +2281,30 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, //===----------------------------------------------------------------------===// // Constraint Selection. -/// isMatchingInputConstraint - Return true of this is an input operand that is -/// a matching constraint like "4". +/// Return true of this is an input operand that is a matching constraint like +/// "4". bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const { assert(!ConstraintCode.empty() && "No known constraint!"); return isdigit(static_cast(ConstraintCode[0])); } -/// getMatchedOperand - If this is an input matching constraint, this method -/// returns the output operand it matches. +/// If this is an input matching constraint, this method returns the output +/// operand it matches. unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { assert(!ConstraintCode.empty() && "No known constraint!"); return atoi(ConstraintCode.c_str()); } - -/// ParseConstraints - Split up the constraint string from the inline -/// assembly value into the specific constraints and their prefixes, -/// and also tie in the associated operand values. +/// Split up the constraint string from the inline assembly value into the +/// specific constraints and their prefixes, and also tie in the associated +/// operand values. /// If this returns an empty vector, and if the constraint string itself /// isn't empty, there was an error parsing. TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, ImmutableCallSite CS) const { - /// ConstraintOperands - Information about all of the constraints. + /// Information about all of the constraints. AsmOperandInfoVector ConstraintOperands; const InlineAsm *IA = cast(CS.getCalledValue()); unsigned maCount = 0; // Largest number of multiple alternative constraints. @@ -2483,16 +2479,13 @@ TargetLowering::ParseConstraints(const DataLayout &DL, " incompatible type!"); } } - } } return ConstraintOperands; } - -/// getConstraintGenerality - Return an integer indicating how general CT -/// is. +/// Return an integer indicating how general CT is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { switch (CT) { case TargetLowering::C_Other: @@ -2581,8 +2574,8 @@ TargetLowering::ConstraintWeight return weight; } -/// ChooseConstraint - If there are multiple different constraints that we -/// could pick for this operand (e.g. "imr") try to pick the 'best' one. +/// If there are multiple different constraints that we could pick for this +/// operand (e.g. "imr") try to pick the 'best' one. /// This is somewhat tricky: constraints fall into four classes: /// Other -> immediates and magic values /// Register -> one specific register @@ -2649,9 +2642,8 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, OpInfo.ConstraintType = BestType; } -/// ComputeConstraintToUse - Determines the constraint code and constraint -/// type to use for the specific AsmOperandInfo, setting -/// OpInfo.ConstraintCode and OpInfo.ConstraintType. +/// Determines the constraint code and constraint type to use for the specific +/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG) const { @@ -2717,6 +2709,16 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, return Mul; } +SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + std::vector *Created) const { + AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N,0); // Lower SDIV as SDIV + return SDValue(); +} + /// \brief Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. @@ -3036,3 +3038,46 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT); return true; } + +//===----------------------------------------------------------------------===// +// Implementation of Emulated TLS Model +//===----------------------------------------------------------------------===// + +SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, + SelectionDAG &DAG) const { + // Access to address of TLS varialbe xyz is lowered to a function call: + // __emutls_get_address( address of global variable named "__emutls_v.xyz" ) + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext()); + SDLoc dl(GA); + + ArgListTy Args; + ArgListEntry Entry; + std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str(); + Module *VariableModule = const_cast(GA->getGlobal()->getParent()); + StringRef EmuTlsVarName(NameString); + GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName); + if (!EmuTlsVar) + EmuTlsVar = dyn_cast_or_null( + VariableModule->getOrInsertGlobal(EmuTlsVarName, VoidPtrType)); + Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT); + Entry.Ty = VoidPtrType; + Args.push_back(Entry); + + SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()); + CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args), 0); + std::pair CallResult = LowerCallTo(CLI); + + // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. + // At last for X86 targets, maybe good for other targets too? + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setAdjustsStack(true); // Is this only for X86 target? + MFI->setHasCalls(true); + + assert((GA->getOffset() == 0) && + "Emulated TLS must have zero offset in GlobalAddressSDNode"); + return CallResult.first; +} diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp index e7b2a8e72d2c..878eeeed0f6a 100644 --- a/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/lib/CodeGen/ShadowStackGCLowering.cpp @@ -112,7 +112,7 @@ public: case 1: // Find all 'return', 'resume', and 'unwind' instructions. while (StateBB != StateE) { - BasicBlock *CurBB = StateBB++; + BasicBlock *CurBB = &*StateBB++; // Branches and invokes do not escape, only unwind, resume, and return // do. @@ -120,7 +120,7 @@ public: if (!isa(TI) && !isa(TI)) continue; - Builder.SetInsertPoint(TI->getParent(), TI); + Builder.SetInsertPoint(TI); return &Builder; } @@ -163,8 +163,8 @@ public: // Split the basic block containing the function call. BasicBlock *CallBB = CI->getParent(); - BasicBlock *NewBB = - CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont"); + BasicBlock *NewBB = CallBB->splitBasicBlock( + CI->getIterator(), CallBB->getName() + ".cont"); // Remove the unconditional branch inserted at the end of CallBB. CallBB->getInstList().pop_back(); @@ -184,7 +184,7 @@ public: delete CI; } - Builder.SetInsertPoint(RI->getParent(), RI); + Builder.SetInsertPoint(RI); return &Builder; } } diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp index 4463cc7d3c51..f8aa1e2b0b9a 100644 --- a/lib/CodeGen/ShrinkWrap.cpp +++ b/lib/CodeGen/ShrinkWrap.cpp @@ -43,9 +43,11 @@ // points must be in the same loop. // Property #3 is ensured via the MachineBlockFrequencyInfo. // -// If this pass found points matching all this properties, then -// MachineFrameInfo is updated this that information. +// If this pass found points matching all these properties, then +// MachineFrameInfo is updated with this information. //===----------------------------------------------------------------------===// +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" // To check for profitability. #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -61,11 +63,14 @@ #include "llvm/CodeGen/Passes.h" // To know about callee-saved. #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Debug.h" // To query the target about frame lowering. #include "llvm/Target/TargetFrameLowering.h" // To know about frame setup operation. #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" // To access TargetInstrInfo. #include "llvm/Target/TargetSubtargetInfo.h" @@ -78,6 +83,10 @@ STATISTIC(NumCandidates, "Number of shrink-wrapping candidates"); STATISTIC(NumCandidatesDropped, "Number of shrink-wrapping candidates dropped because of frequency"); +static cl::opt + EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, + cl::desc("enable the shrink-wrapping pass")); + namespace { /// \brief Class to determine where the safe point to insert the /// prologue and epilogue are. @@ -113,18 +122,38 @@ class ShrinkWrap : public MachineFunctionPass { unsigned FrameDestroyOpcode; /// Entry block. const MachineBasicBlock *Entry; + typedef SmallSetVector SetOfRegs; + /// Registers that need to be saved for the current function. + mutable SetOfRegs CurrentCSRs; + /// Current MachineFunction. + MachineFunction *MachineFunc; /// \brief Check if \p MI uses or defines a callee-saved register or /// a frame index. If this is the case, this means \p MI must happen /// after Save and before Restore. - bool useOrDefCSROrFI(const MachineInstr &MI) const; + bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const; + + const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const { + if (CurrentCSRs.empty()) { + BitVector SavedRegs; + const TargetFrameLowering *TFI = + MachineFunc->getSubtarget().getFrameLowering(); + + TFI->determineCalleeSaves(*MachineFunc, SavedRegs, RS); + + for (int Reg = SavedRegs.find_first(); Reg != -1; + Reg = SavedRegs.find_next(Reg)) + CurrentCSRs.insert((unsigned)Reg); + } + return CurrentCSRs; + } /// \brief Update the Save and Restore points such that \p MBB is in /// the region that is dominated by Save and post-dominated by Restore /// and Save and Restore still match the safe point definition. /// Such point may not exist and Save and/or Restore may be null after /// this call. - void updateSaveRestorePoints(MachineBasicBlock &MBB); + void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS); /// \brief Initialize the pass for \p MF. void init(MachineFunction &MF) { @@ -140,6 +169,8 @@ class ShrinkWrap : public MachineFunctionPass { FrameSetupOpcode = TII.getCallFrameSetupOpcode(); FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); Entry = &MF.front(); + CurrentCSRs.clear(); + MachineFunc = &MF; ++NumFunc; } @@ -148,6 +179,9 @@ class ShrinkWrap : public MachineFunctionPass { /// shrink-wrapping. bool ArePointsInteresting() const { return Save != Entry && Save && Restore; } + /// \brief Check if shrink wrapping is enabled for this target and function. + static bool isShrinkWrapEnabled(const MachineFunction &MF); + public: static char ID; @@ -185,27 +219,34 @@ INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false) -bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI) const { +bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, + RegScavenger *RS) const { if (MI.getOpcode() == FrameSetupOpcode || MI.getOpcode() == FrameDestroyOpcode) { DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); return true; } for (const MachineOperand &MO : MI.operands()) { - bool UseCSR = false; + bool UseOrDefCSR = false; if (MO.isReg()) { unsigned PhysReg = MO.getReg(); if (!PhysReg) continue; assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Unallocated register?!"); - UseCSR = RCI.getLastCalleeSavedAlias(PhysReg); + UseOrDefCSR = RCI.getLastCalleeSavedAlias(PhysReg); + } else if (MO.isRegMask()) { + // Check if this regmask clobbers any of the CSRs. + for (unsigned Reg : getCurrentCSRs(RS)) { + if (MO.clobbersPhysReg(Reg)) { + UseOrDefCSR = true; + break; + } + } } - // TODO: Handle regmask more accurately. - // For now, be conservative about them. - if (UseCSR || MO.isFI() || MO.isRegMask()) { - DEBUG(dbgs() << "Use or define CSR(" << UseCSR << ") or FI(" << MO.isFI() - << "): " << MI << '\n'); + if (UseOrDefCSR || MO.isFI()) { + DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI(" + << MO.isFI() << "): " << MI << '\n'); return true; } } @@ -225,7 +266,8 @@ MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs, return IDom; } -void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) { +void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, + RegScavenger *RS) { // Get rid of the easy cases first. if (!Save) Save = &MBB; @@ -246,7 +288,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) { // terminator. if (Restore == &MBB) { for (const MachineInstr &Terminator : MBB.terminators()) { - if (!useOrDefCSROrFI(Terminator)) + if (!useOrDefCSROrFI(Terminator, RS)) continue; // One of the terminator needs to happen before the restore point. if (MBB.succ_empty()) { @@ -277,7 +319,24 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) { while (Save && Restore && (!(SaveDominatesRestore = MDT->dominates(Save, Restore)) || !(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) || - MLI->getLoopFor(Save) != MLI->getLoopFor(Restore))) { + // Post-dominance is not enough in loops to ensure that all uses/defs + // are after the prologue and before the epilogue at runtime. + // E.g., + // while(1) { + // Save + // Restore + // if (...) + // break; + // use/def CSRs + // } + // All the uses/defs of CSRs are dominated by Save and post-dominated + // by Restore. However, the CSRs uses are still reachable after + // Restore and before Save are executed. + // + // For now, just push the restore/save points outside of loops. + // FIXME: Refine the criteria to still find interesting cases + // for loops. + MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) { // Fix (A). if (!SaveDominatesRestore) { Save = MDT->findNearestCommonDominator(Save, Restore); @@ -288,35 +347,72 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) { Restore = MPDT->findNearestCommonDominator(Restore, Save); // Fix (C). - if (Save && Restore && Save != Restore && - MLI->getLoopFor(Save) != MLI->getLoopFor(Restore)) { - if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) - // Push Save outside of this loop. - Save = FindIDom<>(*Save, Save->predecessors(), *MDT); - else + if (Save && Restore && + (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) { + if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) { + // Push Save outside of this loop if immediate dominator is different + // from save block. If immediate dominator is not different, bail out. + MachineBasicBlock *IDom = FindIDom<>(*Save, Save->predecessors(), *MDT); + if (IDom != Save) + Save = IDom; + else { + Save = nullptr; + break; + } + } else { + // If the loop does not exit, there is no point in looking + // for a post-dominator outside the loop. + SmallVector ExitBlocks; + MLI->getLoopFor(Restore)->getExitingBlocks(ExitBlocks); // Push Restore outside of this loop. - Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); + // Look for the immediate post-dominator of the loop exits. + MachineBasicBlock *IPdom = Restore; + for (MachineBasicBlock *LoopExitBB: ExitBlocks) { + IPdom = FindIDom<>(*IPdom, LoopExitBB->successors(), *MPDT); + if (!IPdom) + break; + } + // If the immediate post-dominator is not in a less nested loop, + // then we are stuck in a program with an infinite loop. + // In that case, we will not find a safe point, hence, bail out. + if (IPdom && MLI->getLoopDepth(IPdom) < MLI->getLoopDepth(Restore)) + Restore = IPdom; + else { + Restore = nullptr; + break; + } + } } } } bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { - if (MF.empty()) + if (MF.empty() || !isShrinkWrapEnabled(MF)) return false; + DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); init(MF); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + std::unique_ptr RS( + TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr); + for (MachineBasicBlock &MBB : MF) { DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName() << '\n'); + if (MBB.isEHFuncletEntry()) { + DEBUG(dbgs() << "EH Funclets are not supported yet.\n"); + return false; + } + for (const MachineInstr &MI : MBB) { - if (!useOrDefCSROrFI(MI)) + if (!useOrDefCSROrFI(MI, RS.get())) continue; // Save (resp. restore) point must dominate (resp. post dominate) // MI. Look for the proper basic block for those. - updateSaveRestorePoints(MBB); + updateSaveRestorePoints(MBB, RS.get()); // If we are at a point where we cannot improve the placement of // save/restore instructions, just give up. if (!ArePointsInteresting()) { @@ -368,7 +464,7 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { break; NewBB = Restore; } - updateSaveRestorePoints(*NewBB); + updateSaveRestorePoints(*NewBB, RS.get()); } while (Save && Restore); if (!ArePointsInteresting()) { @@ -386,3 +482,30 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { ++NumCandidates; return false; } + +bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) { + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + + switch (EnableShrinkWrapOpt) { + case cl::BOU_UNSET: + return TFI->enableShrinkWrapping(MF) && + // Windows with CFI has some limitations that make it impossible + // to use shrink-wrapping. + !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + // Sanitizers look at the value of the stack at the location + // of the crash. Since a crash can happen anywhere, the + // frame must be lowered before anything else happen for the + // sanitizers to be able to get a correct stack frame. + !(MF.getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || + MF.getFunction()->hasFnAttribute(Attribute::SanitizeThread) || + MF.getFunction()->hasFnAttribute(Attribute::SanitizeMemory)); + // If EnableShrinkWrap is set, it takes precedence on whatever the + // target sets. The rational is that we assume we want to test + // something related to shrink-wrapping. + case cl::BOU_TRUE: + return true; + case cl::BOU_FALSE: + return false; + } + llvm_unreachable("Invalid shrink-wrapping state"); +} diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index d236e1f5ab6f..e1f242a08de1 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -50,7 +50,7 @@ class SjLjEHPrepare : public FunctionPass { Type *FunctionContextTy; Constant *RegisterFn; Constant *UnregisterFn; - Constant *BuiltinSetjmpFn; + Constant *BuiltinSetupDispatchFn; Constant *FrameAddrFn; Constant *StackAddrFn; Constant *StackRestoreFn; @@ -112,7 +112,8 @@ bool SjLjEHPrepare::doInitialization(Module &M) { FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); - BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp); + BuiltinSetupDispatchFn = + Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setup_dispatch); LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite); FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext); @@ -178,8 +179,8 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, // values and replace the LPI with that aggregate. Type *LPadType = LPI->getType(); Value *LPadVal = UndefValue::get(LPadType); - IRBuilder<> Builder( - std::next(BasicBlock::iterator(cast(SelVal)))); + auto *SelI = cast(SelVal); + IRBuilder<> Builder(SelI->getParent(), std::next(SelI->getIterator())); LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); @@ -190,7 +191,7 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, /// it with all of the data that we know at this point. Value *SjLjEHPrepare::setupFunctionContext(Function &F, ArrayRef LPads) { - BasicBlock *EntryBB = F.begin(); + BasicBlock *EntryBB = &F.front(); // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an alloca @@ -198,12 +199,13 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, auto &DL = F.getParent()->getDataLayout(); unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy); FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context", - EntryBB->begin()); + &EntryBB->front()); // Fill in the function context structure. for (unsigned I = 0, E = LPads.size(); I != E; ++I) { LandingPadInst *LPI = LPads[I]; - IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt()); + IRBuilder<> Builder(LPI->getParent(), + LPI->getParent()->getFirstInsertionPt()); // Reference the __data field. Value *FCData = @@ -250,21 +252,20 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { while (isa(AfterAllocaInsPt) && isa(cast(AfterAllocaInsPt)->getArraySize())) ++AfterAllocaInsPt; + assert(AfterAllocaInsPt != F.front().end()); - for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; - ++AI) { - Type *Ty = AI->getType(); + for (auto &AI : F.args()) { + Type *Ty = AI.getType(); // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction. Value *TrueValue = ConstantInt::getTrue(F.getContext()); Value *UndefValue = UndefValue::get(Ty); - Instruction *SI = SelectInst::Create(TrueValue, AI, UndefValue, - AI->getName() + ".tmp", - AfterAllocaInsPt); - AI->replaceAllUsesWith(SI); + Instruction *SI = SelectInst::Create( + TrueValue, &AI, UndefValue, AI.getName() + ".tmp", &*AfterAllocaInsPt); + AI.replaceAllUsesWith(SI); // Reset the operand, because it was clobbered by the RAUW above. - SI->setOperand(1, AI); + SI->setOperand(1, &AI); } } @@ -279,7 +280,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. - Instruction *Inst = II; + Instruction *Inst = &*II; if (Inst->use_empty()) continue; if (Inst->hasOneUse() && @@ -360,7 +361,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, DemotePHIToStack(PN); // Move the landingpad instruction back to the top of the landing pad block. - LPI->moveBefore(UnwindBlock->begin()); + LPI->moveBefore(&UnwindBlock->front()); } } @@ -400,7 +401,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { Value *FuncCtx = setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); - BasicBlock *EntryBB = F.begin(); + BasicBlock *EntryBB = &F.front(); IRBuilder<> Builder(EntryBB->getTerminator()); // Get a reference to the jump buffer. @@ -421,9 +422,8 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { Val = Builder.CreateCall(StackAddrFn, {}, "sp"); Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true); - // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. - Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy()); - Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg); + // Call the setup_dispatch instrinsic. It fills in the rest of the jmpbuf. + Builder.CreateCall(BuiltinSetupDispatchFn, {}); // Store a pointer to the function context so that the back-end will know // where to look for it. @@ -475,7 +475,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { continue; } Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); - StackAddr->insertAfter(I); + StackAddr->insertAfter(&*I); Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); StoreStackAddr->insertAfter(StackAddr); } diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 025ae70ed888..c9d23f67bdee 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -172,8 +172,8 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, // optionally includes an additional position prior to MBB->begin(), indicated // by the includeStart flag. This is done so that we can iterate MIs in a MBB // in parallel with SlotIndexes, but there should be a better way to do this. - IndexList::iterator ListB = startIdx.listEntry(); - IndexList::iterator ListI = endIdx.listEntry(); + IndexList::iterator ListB = startIdx.listEntry()->getIterator(); + IndexList::iterator ListI = endIdx.listEntry()->getIterator(); MachineBasicBlock::iterator MBBI = End; bool pastStart = false; while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) { diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index 97a5424aa560..d30cfc27bf4b 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -36,7 +36,6 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" #include "llvm/Support/ManagedStatic.h" using namespace llvm; @@ -188,9 +187,9 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { BlockFrequencies.resize(mf.getNumBlockIDs()); MBFI = &getAnalysis(); setThreshold(MBFI->getEntryFreq()); - for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { - unsigned Num = I->getNumber(); - BlockFrequencies[Num] = MBFI->getBlockFreq(I); + for (auto &I : mf) { + unsigned Num = I.getNumber(); + BlockFrequencies[Num] = MBFI->getBlockFreq(&I); } // We never change the function. diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index dab1dfe4f1f8..51dddabed2d9 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -56,6 +56,7 @@ void SplitAnalysis::clear() { SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { const MachineBasicBlock *MBB = MF.getBlockNumbered(Num); + // FIXME: Handle multiple EH pad successors. const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor(); std::pair &LSP = LastSplitPoint[Num]; SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB); @@ -176,10 +177,11 @@ bool SplitAnalysis::calcLiveBlockInfo() { UseE = UseSlots.end(); // Loop over basic blocks where CurLI is live. - MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start); + MachineFunction::iterator MFI = + LIS.getMBBFromIndex(LVI->start)->getIterator(); for (;;) { BlockInfo BI; - BI.MBB = MFI; + BI.MBB = &*MFI; SlotIndex Start, Stop; std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); @@ -259,7 +261,7 @@ bool SplitAnalysis::calcLiveBlockInfo() { if (LVI->start < Stop) ++MFI; else - MFI = LIS.getMBBFromIndex(LVI->start); + MFI = LIS.getMBBFromIndex(LVI->start)->getIterator(); } assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count"); @@ -275,8 +277,9 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const { unsigned Count = 0; // Loop over basic blocks where li is live. - MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start); - SlotIndex Stop = LIS.getMBBEndIdx(MFI); + MachineFunction::const_iterator MFI = + LIS.getMBBFromIndex(LVI->start)->getIterator(); + SlotIndex Stop = LIS.getMBBEndIdx(&*MFI); for (;;) { ++Count; LVI = li->advanceTo(LVI, Stop); @@ -284,7 +287,7 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const { return Count; do { ++MFI; - Stop = LIS.getMBBEndIdx(MFI); + Stop = LIS.getMBBEndIdx(&*MFI); } while (Stop <= LVI->start); } } @@ -864,9 +867,9 @@ bool SplitEditor::transferValues() { // This value has multiple defs in RegIdx, but it wasn't rematerialized, // so the live range is accurate. Add live-in blocks in [Start;End) to the // LiveInBlocks. - MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); + MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator(); SlotIndex BlockStart, BlockEnd; - std::tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB); + std::tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(&*MBB); // The first block may be live-in, or it may have its own def. if (Start != BlockStart) { @@ -875,7 +878,7 @@ bool SplitEditor::transferValues() { DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); // MBB has its own def. Is it also live-out? if (BlockEnd <= End) - LRC.setLiveOutValue(MBB, VNI); + LRC.setLiveOutValue(&*MBB, VNI); // Skip to the next block for live-in. ++MBB; @@ -886,23 +889,23 @@ bool SplitEditor::transferValues() { assert(Start <= BlockStart && "Expected live-in block"); while (BlockStart < End) { DEBUG(dbgs() << ">BB#" << MBB->getNumber()); - BlockEnd = LIS.getMBBEndIdx(MBB); + BlockEnd = LIS.getMBBEndIdx(&*MBB); if (BlockStart == ParentVNI->def) { // This block has the def of a parent PHI, so it isn't live-in. assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?"); VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped parent PHI"); if (End >= BlockEnd) - LRC.setLiveOutValue(MBB, VNI); // Live-out as well. + LRC.setLiveOutValue(&*MBB, VNI); // Live-out as well. } else { // This block needs a live-in value. The last block covered may not // be live-out. if (End < BlockEnd) - LRC.addLiveInBlock(LR, MDT[MBB], End); + LRC.addLiveInBlock(LR, MDT[&*MBB], End); else { // Live-through, and we don't know the value. - LRC.addLiveInBlock(LR, MDT[MBB]); - LRC.setLiveOutValue(MBB, nullptr); + LRC.addLiveInBlock(LR, MDT[&*MBB]); + LRC.setLiveOutValue(&*MBB, nullptr); } } BlockStart = BlockEnd; @@ -1081,16 +1084,14 @@ void SplitEditor::finish(SmallVectorImpl *LRMap) { ConnectedVNInfoEqClasses ConEQ(LIS); for (unsigned i = 0, e = Edit->size(); i != e; ++i) { // Don't use iterators, they are invalidated by create() below. - LiveInterval *li = &LIS.getInterval(Edit->get(i)); - unsigned NumComp = ConEQ.Classify(li); - if (NumComp <= 1) - continue; - DEBUG(dbgs() << " " << NumComp << " components: " << *li << '\n'); - SmallVector dups; - dups.push_back(li); - for (unsigned j = 1; j != NumComp; ++j) - dups.push_back(&Edit->createEmptyInterval()); - ConEQ.Distribute(&dups[0], MRI); + unsigned VReg = Edit->get(i); + LiveInterval &LI = LIS.getInterval(VReg); + SmallVector SplitLIs; + LIS.splitSeparateComponents(LI, SplitLIs); + unsigned Original = VRM.getOriginal(VReg); + for (LiveInterval *SplitLI : SplitLIs) + VRM.setIsSplitFromReg(SplitLI->reg, Original); + // The new intervals all map back to i. if (LRMap) LRMap->resize(Edit->size(), i); diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index 116eef66c580..b3cd8b3d80bb 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -94,7 +94,9 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, default: llvm_unreachable("Unrecognized operand type."); case StackMaps::DirectMemRefOp: { - unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits(); + auto &DL = AP.MF->getDataLayout(); + + unsigned Size = DL.getPointerSizeInBits(); assert((Size % 8) == 0 && "Need pointer size in bytes."); Size /= 8; unsigned Reg = (++MOI)->getReg(); diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index bcea37a3aafa..db3fef524b30 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -373,7 +373,7 @@ bool StackProtector::InsertStackProtectors() { Value *StackGuardVar = nullptr; // The stack guard variable. for (Function::iterator I = F->begin(), E = F->end(); I != E;) { - BasicBlock *BB = I++; + BasicBlock *BB = &*I++; ReturnInst *RI = dyn_cast(BB->getTerminator()); if (!RI) continue; @@ -433,7 +433,7 @@ bool StackProtector::InsertStackProtectors() { BasicBlock *FailBB = CreateFailBB(); // Split the basic block before the return instruction. - BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); + BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return"); // Update the dominator tree if we need to. if (DT && DT->isReachableFromEntry(BB)) { @@ -453,22 +453,20 @@ bool StackProtector::InsertStackProtectors() { LoadInst *LI1 = B.CreateLoad(StackGuardVar); LoadInst *LI2 = B.CreateLoad(AI); Value *Cmp = B.CreateICmpEQ(LI1, LI2); - unsigned SuccessWeight = - BranchProbabilityInfo::getBranchWeightStackProtector(true); - unsigned FailureWeight = - BranchProbabilityInfo::getBranchWeightStackProtector(false); + auto SuccessProb = + BranchProbabilityInfo::getBranchProbStackProtector(true); + auto FailureProb = + BranchProbabilityInfo::getBranchProbStackProtector(false); MDNode *Weights = MDBuilder(F->getContext()) - .createBranchWeights(SuccessWeight, FailureWeight); + .createBranchWeights(SuccessProb.getNumerator(), + FailureProb.getNumerator()); B.CreateCondBr(Cmp, NewBB, FailBB, Weights); } } // Return if we didn't modify any basic blocks. i.e., there are no return // statements in the function. - if (!HasPrologue) - return false; - - return true; + return HasPrologue; } /// CreateFailBB - Create a basic block to jump to when the stack protector diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index a5a175f2c8f0..51f4d0e68172 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -318,7 +318,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { if (NewFI == -1 || (NewFI == (int)SS)) continue; - const PseudoSourceValue *NewSV = PseudoSourceValue::getFixedStack(NewFI); + const PseudoSourceValue *NewSV = MF.getPSVManager().getFixedStack(NewFI); SmallVectorImpl &RefMMOs = SSRefs[SS]; for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i) RefMMOs[i]->setValue(NewSV); diff --git a/lib/CodeGen/StatepointExampleGC.cpp b/lib/CodeGen/StatepointExampleGC.cpp index 95dfd75018c1..3f60e18fafa9 100644 --- a/lib/CodeGen/StatepointExampleGC.cpp +++ b/lib/CodeGen/StatepointExampleGC.cpp @@ -34,9 +34,9 @@ public: UsesMetadata = false; CustomRoots = false; } - Optional isGCManagedPointer(const Value *V) const override { + Optional isGCManagedPointer(const Type *Ty) const override { // Method is only valid on pointer typed values. - PointerType *PT = cast(V->getType()); + const PointerType *PT = cast(Ty); // For the sake of this example GC, we arbitrarily pick addrspace(1) as our // GC managed heap. We know that a pointer into this heap needs to be // updated and that no other pointer does. Note that addrspace(1) is used diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 237460cd9051..d2fbf533a787 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -59,7 +59,7 @@ TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden); typedef std::vector > AvailableValsTy; namespace { - /// TailDuplicatePass - Perform tail duplication. + /// Perform tail duplication. class TailDuplicatePass : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -69,11 +69,11 @@ namespace { std::unique_ptr RS; bool PreRegAlloc; - // SSAUpdateVRs - A list of virtual registers for which to update SSA form. + // A list of virtual registers for which to update SSA form. SmallVector SSAUpdateVRs; - // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of - // source virtual registers. + // For each virtual register in SSAUpdateVals keep a list of source virtual + // registers. DenseMap SSAUpdateVals; public: @@ -161,7 +161,7 @@ void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const { static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock *MBB = I; + MachineBasicBlock *MBB = &*I; SmallSetVector Preds(MBB->pred_begin(), MBB->pred_end()); MachineBasicBlock::iterator MI = MBB->begin(); @@ -207,7 +207,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { } } -/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup. +/// Tail duplicate the block and cleanup. bool TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, bool IsSimple, @@ -310,9 +310,9 @@ TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, return true; } -/// TailDuplicateBlocks - Look for small blocks that are unconditionally -/// branched to and do not fall through. Tail-duplicate their instructions -/// into their predecessors to eliminate (dynamic) branches. +/// Look for small blocks that are unconditionally branched to and do not fall +/// through. Tail-duplicate their instructions into their predecessors to +/// eliminate (dynamic) branches. bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { bool MadeChange = false; @@ -322,7 +322,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { } for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { - MachineBasicBlock *MBB = I++; + MachineBasicBlock *MBB = &*I++; if (NumTails == TailDupLimit) break; @@ -375,8 +375,7 @@ static void getRegsUsedByPHIs(const MachineBasicBlock &BB, } } -/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for -/// SSA update. +/// Add a definition and source virtual registers pair for SSA update. void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, MachineBasicBlock *BB) { DenseMap::iterator LI= SSAUpdateVals.find(OrigReg); @@ -390,9 +389,8 @@ void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, } } -/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB. -/// Remember the source register that's contributed by PredBB and update SSA -/// update map. +/// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the +/// source register that's contributed by PredBB and update SSA update map. void TailDuplicatePass::ProcessPHI( MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, DenseMap &LocalVRMap, @@ -422,7 +420,7 @@ void TailDuplicatePass::ProcessPHI( MI->eraseFromParent(); } -/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update +/// Duplicate a TailBB instruction to PredBB and update /// the source operands due to earlier PHI translation. void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, MachineBasicBlock *TailBB, @@ -459,9 +457,9 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, PredBB->insert(PredBB->instr_end(), NewMI); } -/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor -/// blocks, the successors have gained new predecessors. Update the PHI -/// instructions in them accordingly. +/// After FromBB is tail duplicated into its predecessor blocks, the successors +/// have gained new predecessors. Update the PHI instructions in them +/// accordingly. void TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, SmallVectorImpl &TDBBs, @@ -545,7 +543,7 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, } } -/// shouldTailDuplicate - Determine if it is profitable to duplicate this block. +/// Determine if it is profitable to duplicate this block. bool TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, bool IsSimple, @@ -563,6 +561,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, // compensate for the duplication. unsigned MaxDuplicateCount; if (TailDuplicateSize.getNumOccurrences() == 0 && + // FIXME: Use Function::optForSize(). MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) MaxDuplicateCount = 1; else @@ -584,30 +583,51 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; - for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) { + for (MachineInstr &MI : TailBB) { // Non-duplicable things shouldn't be tail-duplicated. - if (I->isNotDuplicable()) + if (MI.isNotDuplicable()) return false; // Do not duplicate 'return' instructions if this is a pre-regalloc run. // A return may expand into a lot more instructions (e.g. reload of callee // saved registers) after PEI. - if (PreRegAlloc && I->isReturn()) + if (PreRegAlloc && MI.isReturn()) return false; // Avoid duplicating calls before register allocation. Calls presents a // barrier to register allocation so duplicating them may end up increasing // spills. - if (PreRegAlloc && I->isCall()) + if (PreRegAlloc && MI.isCall()) return false; - if (!I->isPHI() && !I->isDebugValue()) + if (!MI.isPHI() && !MI.isDebugValue()) InstrCount += 1; if (InstrCount > MaxDuplicateCount) return false; } + // Check if any of the successors of TailBB has a PHI node in which the + // value corresponding to TailBB uses a subregister. + // If a phi node uses a register paired with a subregister, the actual + // "value type" of the phi may differ from the type of the register without + // any subregisters. Due to a bug, tail duplication may add a new operand + // without a necessary subregister, producing an invalid code. This is + // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll. + // Disable tail duplication for this case for now, until the problem is + // fixed. + for (auto SB : TailBB.successors()) { + for (auto &I : *SB) { + if (!I.isPHI()) + break; + unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB); + assert(Idx != 0); + MachineOperand &PU = I.getOperand(Idx); + if (PU.getSubReg() != 0) + return false; + } + } + if (HasIndirectbr && PreRegAlloc) return true; @@ -620,7 +640,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, return canCompletelyDuplicateBB(TailBB); } -/// isSimpleBB - True if this BB has only one unconditional jump. +/// True if this BB has only one unconditional jump. bool TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) { if (TailBB->succ_size() != 1) @@ -636,22 +656,16 @@ TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) { static bool bothUsedInPHI(const MachineBasicBlock &A, SmallPtrSet SuccsB) { - for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(), - SE = A.succ_end(); SI != SE; ++SI) { - MachineBasicBlock *BB = *SI; + for (MachineBasicBlock *BB : A.successors()) if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) return true; - } return false; } bool TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { - for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(), - PE = BB.pred_end(); PI != PE; ++PI) { - MachineBasicBlock *PredBB = *PI; - + for (MachineBasicBlock *PredBB : BB.predecessors()) { if (PredBB->succ_size() > 1) return false; @@ -680,7 +694,7 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; - if (PredBB->getLandingPadSuccessor()) + if (PredBB->hasEHPadSuccessor()) continue; if (bothUsedInPHI(*PredBB, Succs)) @@ -696,7 +710,7 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, << "From simple Succ: " << *TailBB); MachineBasicBlock *NewTarget = *TailBB->succ_begin(); - MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(PredBB)); + MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator()); // Make PredFBB explicit. if (PredCond.empty()) @@ -731,19 +745,19 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, if (PredTBB) TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); - uint32_t Weight = MBPI->getEdgeWeight(PredBB, TailBB); - PredBB->removeSuccessor(TailBB); - unsigned NumSuccessors = PredBB->succ_size(); - assert(NumSuccessors <= 1); - if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget) - PredBB->addSuccessor(NewTarget, Weight); + if (!PredBB->isSuccessor(NewTarget)) + PredBB->replaceSuccessor(TailBB, NewTarget); + else { + PredBB->removeSuccessor(TailBB, true); + assert(PredBB->succ_size() <= 1); + } TDBBs.push_back(PredBB); } return Changed; } -/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each +/// If it is profitable, duplicate TailBB's contents in each /// of its predecessors. bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, @@ -798,13 +812,12 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, RS->enterBasicBlock(PredBB); if (!PredBB->empty()) RS->forward(std::prev(PredBB->end())); - for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(), - E = TailBB->livein_end(); I != E; ++I) { - if (!RS->isRegUsed(*I, false)) + for (const auto &LI : TailBB->liveins()) { + if (!RS->isRegUsed(LI.PhysReg, false)) // If a register is previously livein to the tail but it's not live // at the end of predecessor BB, then it should be added to its // livein list. - PredBB->addLiveIn(*I); + PredBB->addLiveIn(LI); } } @@ -845,7 +858,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, "TailDuplicate called on block with multiple successors!"); for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), E = TailBB->succ_end(); I != E; ++I) - PredBB->addSuccessor(*I, MBPI->getEdgeWeight(TailBB, I)); + PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I)); Changed = true; ++NumTailDups; @@ -854,7 +867,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this // block into the prior block. - MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(TailBB)); + MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator()); MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; SmallVector PriorCond; // This has to check PrevBB->succ_size() because EH edges are ignored by @@ -960,8 +973,8 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, return Changed; } -/// RemoveDeadBlock - Remove the specified dead machine basic block from the -/// function, updating the CFG. +/// Remove the specified dead machine basic block from the function, updating +/// the CFG. void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) { assert(MBB->pred_empty() && "MBB must be dead!"); DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index f3cccd82a5c5..679ade185e1c 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -32,25 +33,22 @@ bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const { return Attr.getValueAsString() == "true"; } -/// getFrameIndexOffset - Returns the displacement from the frame register to -/// the stack frame of the specified index. This is the default implementation -/// which is overridden for some targets. -int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->getObjectOffset(FI) + MFI->getStackSize() - - getOffsetOfLocalArea() + MFI->getOffsetAdjustment(); -} - +/// Returns the displacement from the frame register to the stack +/// frame of the specified index, along with the frame register used +/// (in output arg FrameReg). This is the default implementation which +/// is overridden for some targets. int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); // By default, assume all frame indices are referenced via whatever // getFrameRegister() says. The target can override this if it's doing // something different. FrameReg = RI->getFrameRegister(MF); - return getFrameIndexOffset(MF, FI); + + return MFI->getObjectOffset(FI) + MFI->getStackSize() - + getOffsetOfLocalArea() + MFI->getOffsetAdjustment(); } bool TargetFrameLowering::needsFrameIndexResolution( @@ -84,3 +82,13 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(Reg); } } + +unsigned TargetFrameLowering::getStackAlignmentSkew( + const MachineFunction &MF) const { + // When HHVM function is called, the stack is skewed as the return address + // is removed from the stack before we enter the function. + if (LLVM_UNLIKELY(MF.getFunction()->getCallingConv() == CallingConv::HHVM)) + return MF.getTarget().getPointerSize(); + + return 0; +} diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index 97ca0253d376..6eaf991ac700 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -118,23 +118,24 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, MBB->addSuccessor(NewDest); } -// commuteInstruction - The default implementation of this method just exchanges -// the two operands returned by findCommutedOpIndices. -MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI, - bool NewMI) const { +MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr *MI, + bool NewMI, + unsigned Idx1, + unsigned Idx2) const { const MCInstrDesc &MCID = MI->getDesc(); bool HasDef = MCID.getNumDefs(); if (HasDef && !MI->getOperand(0).isReg()) // No idea how to commute this instruction. Target should implement its own. return nullptr; - unsigned Idx1, Idx2; - if (!findCommutedOpIndices(MI, Idx1, Idx2)) { - assert(MI->isCommutable() && "Precondition violation: MI must be commutable."); - return nullptr; - } + unsigned CommutableOpIdx1 = Idx1; (void)CommutableOpIdx1; + unsigned CommutableOpIdx2 = Idx2; (void)CommutableOpIdx2; + assert(findCommutedOpIndices(MI, CommutableOpIdx1, CommutableOpIdx2) && + CommutableOpIdx1 == Idx1 && CommutableOpIdx2 == Idx2 && + "TargetInstrInfo::CommuteInstructionImpl(): not commutable operands."); assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() && "This only knows how to commute register operands so far"); + unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0; unsigned Reg1 = MI->getOperand(Idx1).getReg(); unsigned Reg2 = MI->getOperand(Idx2).getReg(); @@ -184,9 +185,53 @@ MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI, return MI; } -/// findCommutedOpIndices - If specified MI is commutable, return the two -/// operand indices that would swap value. Return true if the instruction -/// is not in a form which this routine understands. +MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI, + bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const { + // If OpIdx1 or OpIdx2 is not specified, then this method is free to choose + // any commutable operand, which is done in findCommutedOpIndices() method + // called below. + if ((OpIdx1 == CommuteAnyOperandIndex || OpIdx2 == CommuteAnyOperandIndex) && + !findCommutedOpIndices(MI, OpIdx1, OpIdx2)) { + assert(MI->isCommutable() && + "Precondition violation: MI must be commutable."); + return nullptr; + } + return commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); +} + +bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1, + unsigned &ResultIdx2, + unsigned CommutableOpIdx1, + unsigned CommutableOpIdx2) { + if (ResultIdx1 == CommuteAnyOperandIndex && + ResultIdx2 == CommuteAnyOperandIndex) { + ResultIdx1 = CommutableOpIdx1; + ResultIdx2 = CommutableOpIdx2; + } else if (ResultIdx1 == CommuteAnyOperandIndex) { + if (ResultIdx2 == CommutableOpIdx1) + ResultIdx1 = CommutableOpIdx2; + else if (ResultIdx2 == CommutableOpIdx2) + ResultIdx1 = CommutableOpIdx1; + else + return false; + } else if (ResultIdx2 == CommuteAnyOperandIndex) { + if (ResultIdx1 == CommutableOpIdx1) + ResultIdx2 = CommutableOpIdx2; + else if (ResultIdx1 == CommutableOpIdx2) + ResultIdx2 = CommutableOpIdx1; + else + return false; + } else + // Check that the result operand indices match the given commutable + // operand indices. + return (ResultIdx1 == CommutableOpIdx1 && ResultIdx2 == CommutableOpIdx2) || + (ResultIdx1 == CommutableOpIdx2 && ResultIdx2 == CommutableOpIdx1); + + return true; +} + bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { @@ -196,10 +241,15 @@ bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI, const MCInstrDesc &MCID = MI->getDesc(); if (!MCID.isCommutable()) return false; + // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this // is not true, then the target must implement this. - SrcOpIdx1 = MCID.getNumDefs(); - SrcOpIdx2 = SrcOpIdx1 + 1; + unsigned CommutableOpIdx1 = MCID.getNumDefs(); + unsigned CommutableOpIdx2 = CommutableOpIdx1 + 1; + if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, + CommutableOpIdx1, CommutableOpIdx2)) + return false; + if (!MI->getOperand(SrcOpIdx1).isReg() || !MI->getOperand(SrcOpIdx2).isReg()) // No idea. @@ -207,7 +257,6 @@ bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI, return true; } - bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { if (!MI->isTerminator()) return false; @@ -315,7 +364,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, assert(RC->getSize() >= (Offset + Size) && "bad subregister range"); - if (!MF.getTarget().getDataLayout()->isLittleEndian()) { + if (!MF.getDataLayout().isLittleEndian()) { Offset = RC->getSize() - (Offset + Size); } return true; @@ -384,11 +433,6 @@ void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { llvm_unreachable("Not a MachO target"); } -bool TargetInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - ArrayRef Ops) const { - return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]); -} - static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI, ArrayRef Ops, int FrameIndex, const TargetInstrInfo &TII) { @@ -489,10 +533,9 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, "Folded a use to a non-load!"); const MachineFrameInfo &MFI = *MF.getFrameInfo(); assert(MFI.getObjectOffset(FI) != -1); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - Flags, MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), Flags, MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); NewMI->addMemOperand(MF, MMO); return NewMI; @@ -517,6 +560,217 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, return --Pos; } +bool TargetInstrInfo::hasReassociableOperands( + const MachineInstr &Inst, const MachineBasicBlock *MBB) const { + const MachineOperand &Op1 = Inst.getOperand(1); + const MachineOperand &Op2 = Inst.getOperand(2); + const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + // We need virtual register definitions for the operands that we will + // reassociate. + MachineInstr *MI1 = nullptr; + MachineInstr *MI2 = nullptr; + if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg())) + MI1 = MRI.getUniqueVRegDef(Op1.getReg()); + if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg())) + MI2 = MRI.getUniqueVRegDef(Op2.getReg()); + + // And they need to be in the trace (otherwise, they won't have a depth). + return MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB; +} + +bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst, + bool &Commuted) const { + const MachineBasicBlock *MBB = Inst.getParent(); + const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg()); + MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); + unsigned AssocOpcode = Inst.getOpcode(); + + // If only one operand has the same opcode and it's the second source operand, + // the operands must be commuted. + Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode; + if (Commuted) + std::swap(MI1, MI2); + + // 1. The previous instruction must be the same type as Inst. + // 2. The previous instruction must have virtual register definitions for its + // operands in the same basic block as Inst. + // 3. The previous instruction's result must only be used by Inst. + return MI1->getOpcode() == AssocOpcode && + hasReassociableOperands(*MI1, MBB) && + MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()); +} + +// 1. The operation must be associative and commutative. +// 2. The instruction must have virtual register definitions for its +// operands in the same basic block. +// 3. The instruction must have a reassociable sibling. +bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst, + bool &Commuted) const { + return isAssociativeAndCommutative(Inst) && + hasReassociableOperands(Inst, Inst.getParent()) && + hasReassociableSibling(Inst, Commuted); +} + +// The concept of the reassociation pass is that these operations can benefit +// from this kind of transformation: +// +// A = ? op ? +// B = A op X (Prev) +// C = B op Y (Root) +// --> +// A = ? op ? +// B = X op Y +// C = A op B +// +// breaking the dependency between A and B, allowing them to be executed in +// parallel (or back-to-back in a pipeline) instead of depending on each other. + +// FIXME: This has the potential to be expensive (compile time) while not +// improving the code at all. Some ways to limit the overhead: +// 1. Track successful transforms; bail out if hit rate gets too low. +// 2. Only enable at -O3 or some other non-default optimization level. +// 3. Pre-screen pattern candidates here: if an operand of the previous +// instruction is known to not increase the critical path, then don't match +// that pattern. +bool TargetInstrInfo::getMachineCombinerPatterns( + MachineInstr &Root, + SmallVectorImpl &Patterns) const { + + bool Commute; + if (isReassociationCandidate(Root, Commute)) { + // We found a sequence of instructions that may be suitable for a + // reassociation of operands to increase ILP. Specify each commutation + // possibility for the Prev instruction in the sequence and let the + // machine combiner decide if changing the operands is worthwhile. + if (Commute) { + Patterns.push_back(MachineCombinerPattern::REASSOC_AX_YB); + Patterns.push_back(MachineCombinerPattern::REASSOC_XA_YB); + } else { + Patterns.push_back(MachineCombinerPattern::REASSOC_AX_BY); + Patterns.push_back(MachineCombinerPattern::REASSOC_XA_BY); + } + return true; + } + + return false; +} + +/// Attempt the reassociation transformation to reduce critical path length. +/// See the above comments before getMachineCombinerPatterns(). +void TargetInstrInfo::reassociateOps( + MachineInstr &Root, MachineInstr &Prev, + MachineCombinerPattern Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const { + MachineFunction *MF = Root.getParent()->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI); + + // This array encodes the operand index for each parameter because the + // operands may be commuted. Each row corresponds to a pattern value, + // and each column specifies the index of A, B, X, Y. + unsigned OpIdx[4][4] = { + { 1, 1, 2, 2 }, + { 1, 2, 2, 1 }, + { 2, 1, 1, 2 }, + { 2, 2, 1, 1 } + }; + + int Row; + switch (Pattern) { + case MachineCombinerPattern::REASSOC_AX_BY: Row = 0; break; + case MachineCombinerPattern::REASSOC_AX_YB: Row = 1; break; + case MachineCombinerPattern::REASSOC_XA_BY: Row = 2; break; + case MachineCombinerPattern::REASSOC_XA_YB: Row = 3; break; + default: llvm_unreachable("unexpected MachineCombinerPattern"); + } + + MachineOperand &OpA = Prev.getOperand(OpIdx[Row][0]); + MachineOperand &OpB = Root.getOperand(OpIdx[Row][1]); + MachineOperand &OpX = Prev.getOperand(OpIdx[Row][2]); + MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]); + MachineOperand &OpC = Root.getOperand(0); + + unsigned RegA = OpA.getReg(); + unsigned RegB = OpB.getReg(); + unsigned RegX = OpX.getReg(); + unsigned RegY = OpY.getReg(); + unsigned RegC = OpC.getReg(); + + if (TargetRegisterInfo::isVirtualRegister(RegA)) + MRI.constrainRegClass(RegA, RC); + if (TargetRegisterInfo::isVirtualRegister(RegB)) + MRI.constrainRegClass(RegB, RC); + if (TargetRegisterInfo::isVirtualRegister(RegX)) + MRI.constrainRegClass(RegX, RC); + if (TargetRegisterInfo::isVirtualRegister(RegY)) + MRI.constrainRegClass(RegY, RC); + if (TargetRegisterInfo::isVirtualRegister(RegC)) + MRI.constrainRegClass(RegC, RC); + + // Create a new virtual register for the result of (X op Y) instead of + // recycling RegB because the MachineCombiner's computation of the critical + // path requires a new register definition rather than an existing one. + unsigned NewVR = MRI.createVirtualRegister(RC); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + + unsigned Opcode = Root.getOpcode(); + bool KillA = OpA.isKill(); + bool KillX = OpX.isKill(); + bool KillY = OpY.isKill(); + + // Create new instructions for insertion. + MachineInstrBuilder MIB1 = + BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) + .addReg(RegX, getKillRegState(KillX)) + .addReg(RegY, getKillRegState(KillY)); + MachineInstrBuilder MIB2 = + BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) + .addReg(RegA, getKillRegState(KillA)) + .addReg(NewVR, getKillRegState(true)); + + setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2); + + // Record new instructions for insertion and old instructions for deletion. + InsInstrs.push_back(MIB1); + InsInstrs.push_back(MIB2); + DelInstrs.push_back(&Prev); + DelInstrs.push_back(&Root); +} + +void TargetInstrInfo::genAlternativeCodeSequence( + MachineInstr &Root, MachineCombinerPattern Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstIdxForVirtReg) const { + MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo(); + + // Select the previous instruction in the sequence based on the input pattern. + MachineInstr *Prev = nullptr; + switch (Pattern) { + case MachineCombinerPattern::REASSOC_AX_BY: + case MachineCombinerPattern::REASSOC_XA_BY: + Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); + break; + case MachineCombinerPattern::REASSOC_AX_YB: + case MachineCombinerPattern::REASSOC_XA_YB: + Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); + break; + default: + break; + } + + assert(Prev && "Unknown pattern for machine combiner"); + + reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); + return; +} + /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific /// stack slot. @@ -661,6 +915,7 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const { return 0; int SPAdj = MI->getOperand(0).getImm(); + SPAdj = TFI->alignSPAdjust(SPAdj); if ((!StackGrowsDown && MI->getOpcode() == FrameSetupOpcode) || (StackGrowsDown && MI->getOpcode() == FrameDestroyOpcode)) @@ -686,10 +941,7 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI, // modification. const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI)) - return true; - - return false; + return MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI); } // Provide a global flag for disabling the PreRA hazard recognizer that targets diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index ecfd65931574..36a31c9d6461 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -247,13 +247,9 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2"; Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; - Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi"; - Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi"; Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; - Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi"; - Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi"; Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; @@ -266,13 +262,9 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; - Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi"; - Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi"; Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; - Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi"; - Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi"; Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; @@ -501,10 +493,6 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { if (OpVT == MVT::f32) { - if (RetVT == MVT::i8) - return FPTOSINT_F32_I8; - if (RetVT == MVT::i16) - return FPTOSINT_F32_I16; if (RetVT == MVT::i32) return FPTOSINT_F32_I32; if (RetVT == MVT::i64) @@ -512,10 +500,6 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { if (RetVT == MVT::i128) return FPTOSINT_F32_I128; } else if (OpVT == MVT::f64) { - if (RetVT == MVT::i8) - return FPTOSINT_F64_I8; - if (RetVT == MVT::i16) - return FPTOSINT_F64_I16; if (RetVT == MVT::i32) return FPTOSINT_F64_I32; if (RetVT == MVT::i64) @@ -551,10 +535,6 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { if (OpVT == MVT::f32) { - if (RetVT == MVT::i8) - return FPTOUINT_F32_I8; - if (RetVT == MVT::i16) - return FPTOUINT_F32_I16; if (RetVT == MVT::i32) return FPTOUINT_F32_I32; if (RetVT == MVT::i64) @@ -562,10 +542,6 @@ RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { if (RetVT == MVT::i128) return FPTOUINT_F32_I128; } else if (OpVT == MVT::f64) { - if (RetVT == MVT::i8) - return FPTOUINT_F64_I8; - if (RetVT == MVT::i16) - return FPTOUINT_F64_I16; if (RetVT == MVT::i32) return FPTOUINT_F64_I32; if (RetVT == MVT::i64) @@ -758,17 +734,13 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { SelectIsExpensive = false; HasMultipleConditionRegisters = false; HasExtractBitsInsn = false; - IntDivIsCheap = false; FsqrtIsCheap = false; - Pow2SDivIsCheap = false; JumpIsExpensive = JumpIsExpensiveOverride; PredictableSelectIsExpensive = false; MaskAndBranchFoldingIsLegal = false; EnableExtLdPromotion = false; HasFloatingPointExceptions = true; StackPointerRegisterToSaveRestore = 0; - ExceptionPointerRegister = 0; - ExceptionSelectorRegister = 0; BooleanContents = UndefinedBooleanContent; BooleanFloatContents = UndefinedBooleanContent; BooleanVectorContents = UndefinedBooleanContent; @@ -778,6 +750,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { MinFunctionAlignment = 0; PrefFunctionAlignment = 0; PrefLoopAlignment = 0; + GatherAllAliasesMaxDepth = 6; MinStackArgumentAlignment = 1; InsertFencesForAtomic = false; MinimumJumpTableEntries = 4; @@ -814,6 +787,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::CONCAT_VECTORS, VT, Expand); setOperationAction(ISD::FMINNUM, VT, Expand); setOperationAction(ISD::FMAXNUM, VT, Expand); + setOperationAction(ISD::FMINNAN, VT, Expand); + setOperationAction(ISD::FMAXNAN, VT, Expand); setOperationAction(ISD::FMAD, VT, Expand); setOperationAction(ISD::SMIN, VT, Expand); setOperationAction(ISD::SMAX, VT, Expand); @@ -828,6 +803,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SMULO, VT, Expand); setOperationAction(ISD::UMULO, VT, Expand); + setOperationAction(ISD::BITREVERSE, VT, Expand); + // These library functions default to expand. setOperationAction(ISD::FROUND, VT, Expand); @@ -838,11 +815,17 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); } + + // For most targets @llvm.get.dynamic.area.offest just returns 0. + setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); } // Most targets ignore the @llvm.prefetch intrinsic. setOperationAction(ISD::PREFETCH, MVT::Other, Expand); + // Most targets also ignore the @llvm.readcyclecounter intrinsic. + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand); + // ConstantFP nodes default to expand. Targets can either change this to // Legal, in which case all fp constants are legal, or use isFPImmLegal() // to optimize expansions for certain constants. @@ -1111,6 +1094,19 @@ MachineBasicBlock* TargetLoweringBase::emitPatchPoint(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineFunction &MF = *MI->getParent()->getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + // We're handling multiple types of operands here: + // PATCHPOINT MetaArgs - live-in, read only, direct + // STATEPOINT Deopt Spill - live-through, read only, indirect + // STATEPOINT Deopt Alloca - live-through, read only, direct + // (We're currently conservative and mark the deopt slots read/write in + // practice.) + // STATEPOINT GC Spill - live-through, read/write, indirect + // STATEPOINT GC Alloca - live-through, read/write, direct + // The live-in vs live-through is handled already (the live through ones are + // all stack slots), but we need to handle the different type of stackmap + // operands and memory effects here. // MI changes inside this loop as we grow operands. for(unsigned OperIdx = 0; OperIdx != MI->getNumOperands(); ++OperIdx) { @@ -1126,10 +1122,24 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, // Copy operands before the frame-index. for (unsigned i = 0; i < OperIdx; ++i) MIB.addOperand(MI->getOperand(i)); - // Add frame index operands: direct-mem-ref tag, #FI, offset. - MIB.addImm(StackMaps::DirectMemRefOp); - MIB.addOperand(MI->getOperand(OperIdx)); - MIB.addImm(0); + // Add frame index operands recognized by stackmaps.cpp + if (MFI.isStatepointSpillSlotObjectIndex(FI)) { + // indirect-mem-ref tag, size, #FI, offset. + // Used for spills inserted by StatepointLowering. This codepath is not + // used for patchpoints/stackmaps at all, for these spilling is done via + // foldMemoryOperand callback only. + assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity"); + MIB.addImm(StackMaps::IndirectMemRefOp); + MIB.addImm(MFI.getObjectSize(FI)); + MIB.addOperand(MI->getOperand(OperIdx)); + MIB.addImm(0); + } else { + // direct-mem-ref tag, #FI, offset. + // Used by patchpoint, and direct alloca arguments to statepoints + MIB.addImm(StackMaps::DirectMemRefOp); + MIB.addOperand(MI->getOperand(OperIdx)); + MIB.addImm(0); + } // Copy the operands after the frame index. for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i) MIB.addOperand(MI->getOperand(i)); @@ -1139,7 +1149,6 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!"); // Add a new memory operand for this FI. - const MachineFrameInfo &MFI = *MF.getFrameInfo(); assert(MFI.getObjectOffset(FI) != -1); unsigned Flags = MachineMemOperand::MOLoad; @@ -1148,8 +1157,8 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, Flags |= MachineMemOperand::MOVolatile; } MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(FI), Flags, - TM.getDataLayout()->getPointerSize(), MFI.getObjectAlignment(FI)); + MachinePointerInfo::getFixedStack(MF, FI), Flags, + MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI)); MIB->addMemOperand(MF, MMO); // Replace the instruction and update the operand index. @@ -1274,20 +1283,14 @@ void TargetLoweringBase::computeRegisterProperties( ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); } + // Decide how to handle f16. If the target does not have native f16 support, + // promote it to f32, because there are no f16 library calls (except for + // conversions). if (!isTypeLegal(MVT::f16)) { - // If the target has native f32 support, promote f16 operations to f32. If - // f32 is not supported, generate soft float library calls. - if (isTypeLegal(MVT::f32)) { - NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; - RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; - TransformToType[MVT::f16] = MVT::f32; - ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); - } else { - NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; - RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; - TransformToType[MVT::f16] = MVT::i16; - ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat); - } + NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; + RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; + TransformToType[MVT::f16] = MVT::f32; + ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); } // Loop over all of the vector value types to see which need transformations. @@ -1528,6 +1531,29 @@ unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty, return DL.getABITypeAlignment(Ty); } +bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, + const DataLayout &DL, EVT VT, + unsigned AddrSpace, + unsigned Alignment, + bool *Fast) const { + // Check if the specified alignment is sufficient based on the data layout. + // TODO: While using the data layout works in practice, a better solution + // would be to implement this check directly (make this a virtual function). + // For example, the ABI alignment may change based on software platform while + // this function should only be affected by hardware implementation. + Type *Ty = VT.getTypeForEVT(Context); + if (Alignment >= DL.getABITypeAlignment(Ty)) { + // Assume that an access that meets the ABI-specified alignment is fast. + if (Fast != nullptr) + *Fast = true; + return true; + } + + // This is a misaligned access. + return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast); +} + + //===----------------------------------------------------------------------===// // TargetTransformInfo Helpers //===----------------------------------------------------------------------===// @@ -1546,6 +1572,11 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { case Invoke: return 0; case Resume: return 0; case Unreachable: return 0; + case CleanupRet: return 0; + case CatchRet: return 0; + case CatchPad: return 0; + case CatchSwitch: return 0; + case CleanupPad: return 0; case Add: return ISD::ADD; case FAdd: return ISD::FADD; case Sub: return ISD::SUB; @@ -1603,13 +1634,13 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { llvm_unreachable("Unknown instruction type encountered!"); } -std::pair +std::pair TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const { LLVMContext &C = Ty->getContext(); EVT MTy = getValueType(DL, Ty); - unsigned Cost = 1; + int Cost = 1; // We keep legalizing the type until we find a legal kind. We assume that // the only operation that costs anything is the split. After splitting // we need to handle two types. @@ -1622,11 +1653,28 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL, if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger) Cost *= 2; + // Do not loop with f128 type. + if (MTy == LK.second) + return std::make_pair(Cost, MTy.getSimpleVT()); + // Keep legalizing the type. MTy = LK.second; } } +Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const { + if (!TM.getTargetTriple().isAndroid()) + return nullptr; + + // Android provides a libc function to retrieve the address of the current + // thread's unsafe stack pointer. + Module *M = IRB.GetInsertBlock()->getParent()->getParent(); + Type *StackPtrTy = Type::getInt8PtrTy(M->getContext()); + Value *Fn = M->getOrInsertFunction("__safestack_pointer_address", + StackPtrTy->getPointerTo(0), nullptr); + return IRB.CreateCall(Fn); +} + //===----------------------------------------------------------------------===// // Loop Strength Reduction hooks //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 2f78763d8e02..58ae9cc53bda 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionCOFF.h" @@ -32,6 +33,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/COFF.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -58,9 +60,8 @@ MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( report_fatal_error("We do not support this DWARF encoding yet!"); } -void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, - const TargetMachine &TM, - const MCSymbol *Sym) const { +void TargetLoweringObjectFileELF::emitPersonalityValue( + MCStreamer &Streamer, const DataLayout &DL, const MCSymbol *Sym) const { SmallString<64> NameData("DW.ref."); NameData += Sym->getName(); MCSymbolELF *Label = @@ -72,9 +73,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP; MCSection *Sec = getContext().getELFSection(NameData, ELF::SHT_PROGBITS, Flags, 0, Label->getName()); - unsigned Size = TM.getDataLayout()->getPointerSize(); + unsigned Size = DL.getPointerSize(); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment()); + Streamer.EmitValueToAlignment(DL.getPointerABIAlignment()); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::create(Size, getContext()); Streamer.emitELFSize(Label, E); @@ -232,14 +233,8 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) { return ".tdata"; if (Kind.isThreadBSS()) return ".tbss"; - if (Kind.isDataNoRel()) + if (Kind.isData()) return ".data"; - if (Kind.isDataRelLocal()) - return ".data.rel.local"; - if (Kind.isDataRel()) - return ".data.rel"; - if (Kind.isReadOnlyWithRelLocal()) - return ".data.rel.ro.local"; assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); return ".data.rel.ro"; } @@ -282,8 +277,8 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV, // We also need alignment here. // FIXME: this is getting the alignment of the character, not the // alignment of the global! - unsigned Align = - TM.getDataLayout()->getPreferredAlignment(cast(GV)); + unsigned Align = GV->getParent()->getDataLayout().getPreferredAlignment( + cast(GV)); std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + "."; Name = SizeSpec + utostr(Align); @@ -350,9 +345,8 @@ bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection( /// Given a mergeable constant with the specified size and relocation /// information, return a section that it should be placed in. -MCSection * -TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind, - const Constant *C) const { +MCSection *TargetLoweringObjectFileELF::getSectionForConstant( + const DataLayout &DL, SectionKind Kind, const Constant *C) const { if (Kind.isMergeableConst4() && MergeableConst4Section) return MergeableConst4Section; if (Kind.isMergeableConst8() && MergeableConst8Section) @@ -362,7 +356,6 @@ TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind, if (Kind.isReadOnly()) return ReadOnlySection; - if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); return DataRelROSection; } @@ -507,7 +500,7 @@ emitModuleFlags(MCStreamer &Streamer, // Get the section. MCSectionMachO *S = getContext().getMachOSection( - Segment, Section, TAA, StubSize, SectionKind::getDataNoRel()); + Segment, Section, TAA, StubSize, SectionKind::getData()); Streamer.SwitchSection(S); Streamer.EmitLabel(getContext(). getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO"))); @@ -589,14 +582,16 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal( // FIXME: Alignment check should be handled by section classifier. if (Kind.isMergeable1ByteCString() && - TM.getDataLayout()->getPreferredAlignment(cast(GV)) < 32) + GV->getParent()->getDataLayout().getPreferredAlignment( + cast(GV)) < 32) return CStringSection; // Do not put 16-bit arrays in the UString section if they have an // externally visible label, this runs into issues with certain linker // versions. if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() && - TM.getDataLayout()->getPreferredAlignment(cast(GV)) < 32) + GV->getParent()->getDataLayout().getPreferredAlignment( + cast(GV)) < 32) return UStringSection; // With MachO only variables whose corresponding symbol starts with 'l' or @@ -634,12 +629,11 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal( return DataSection; } -MCSection * -TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind, - const Constant *C) const { +MCSection *TargetLoweringObjectFileMachO::getSectionForConstant( + const DataLayout &DL, SectionKind Kind, const Constant *C) const { // If this constant requires a relocation, we have to put it in the data // segment, not in the text segment. - if (Kind.isDataRel() || Kind.isReadOnlyWithRel()) + if (Kind.isData() || Kind.isReadOnlyWithRel()) return ConstDataSection; if (Kind.isMergeableConst4()) @@ -706,7 +700,7 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol( const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( const MCSymbol *Sym, const MCValue &MV, int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const { - // Although MachO 32-bit targets do not explictly have a GOTPCREL relocation + // Although MachO 32-bit targets do not explicitly have a GOTPCREL relocation // as 64-bit do, we replace the GOT equivalent by accessing the final symbol // through a non_lazy_ptr stub instead. One advantage is that it allows the // computation of deltas to final external symbols. Example: @@ -740,7 +734,7 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( // non_lazy_ptr stubs. SmallString<128> Name; StringRef Suffix = "$non_lazy_ptr"; - Name += DL->getPrivateGlobalPrefix(); + Name += MMI->getModule()->getDataLayout().getPrivateGlobalPrefix(); Name += Sym->getName(); Name += Suffix; MCSymbol *Stub = Ctx.getOrCreateSymbol(Name); @@ -763,6 +757,29 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( return MCBinaryExpr::createSub(LHS, RHS, Ctx); } +static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo, + const MCSection &Section) { + if (!AsmInfo.isSectionAtomizableBySymbols(Section)) + return true; + + // If it is not dead stripped, it is safe to use private labels. + const MCSectionMachO &SMO = cast(Section); + if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP)) + return true; + + return false; +} + +void TargetLoweringObjectFileMachO::getNameWithPrefix( + SmallVectorImpl &OutName, const GlobalValue *GV, Mangler &Mang, + const TargetMachine &TM) const { + SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); + const MCSection *TheSection = SectionForGlobal(GV, GVKind, Mang, TM); + bool CannotUsePrivateLabel = + !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection); + Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel); +} + //===----------------------------------------------------------------------===// // COFF //===----------------------------------------------------------------------===// @@ -918,7 +935,7 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( COMDATSymName, Selection); } else { SmallString<256> TmpData; - getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true, Mang, TM); + Mang.getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true); return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData, Selection); } @@ -943,8 +960,9 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( } void TargetLoweringObjectFileCOFF::getNameWithPrefix( - SmallVectorImpl &OutName, const GlobalValue *GV, - bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const { + SmallVectorImpl &OutName, const GlobalValue *GV, Mangler &Mang, + const TargetMachine &TM) const { + bool CannotUsePrivateLabel = false; if (GV->hasPrivateLinkage() && ((isa(GV) && TM.getFunctionSections()) || (isa(GV) && TM.getDataSections()))) @@ -1043,7 +1061,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal( raw_string_ostream FlagOS(Flag); Mang.getNameWithPrefix(FlagOS, GV, false); FlagOS.flush(); - if (Flag[0] == DL->getGlobalPrefix()) + if (Flag[0] == GV->getParent()->getDataLayout().getGlobalPrefix()) OS << Flag.substr(1); else OS << Flag; diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index 61a66b623928..0a7042ac3db5 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -11,13 +11,19 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#define DEBUG_TYPE "target-reg-info" using namespace llvm; @@ -34,54 +40,71 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, TargetRegisterInfo::~TargetRegisterInfo() {} -void PrintReg::print(raw_ostream &OS) const { - if (!Reg) - OS << "%noreg"; - else if (TargetRegisterInfo::isStackSlot(Reg)) - OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); - else if (TargetRegisterInfo::isVirtualRegister(Reg)) - OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); - else if (TRI && Reg < TRI->getNumRegs()) - OS << '%' << TRI->getName(Reg); - else - OS << "%physreg" << Reg; - if (SubIdx) { - if (TRI) - OS << ':' << TRI->getSubRegIndexName(SubIdx); +namespace llvm { + +Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI, + unsigned SubIdx) { + return Printable([Reg, TRI, SubIdx](raw_ostream &OS) { + if (!Reg) + OS << "%noreg"; + else if (TargetRegisterInfo::isStackSlot(Reg)) + OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); + else if (TargetRegisterInfo::isVirtualRegister(Reg)) + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); + else if (TRI && Reg < TRI->getNumRegs()) + OS << '%' << TRI->getName(Reg); else - OS << ":sub(" << SubIdx << ')'; - } + OS << "%physreg" << Reg; + if (SubIdx) { + if (TRI) + OS << ':' << TRI->getSubRegIndexName(SubIdx); + else + OS << ":sub(" << SubIdx << ')'; + } + }); } -void PrintRegUnit::print(raw_ostream &OS) const { - // Generic printout when TRI is missing. - if (!TRI) { - OS << "Unit~" << Unit; - return; - } +Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { + return Printable([Unit, TRI](raw_ostream &OS) { + // Generic printout when TRI is missing. + if (!TRI) { + OS << "Unit~" << Unit; + return; + } - // Check for invalid register units. - if (Unit >= TRI->getNumRegUnits()) { - OS << "BadUnit~" << Unit; - return; - } + // Check for invalid register units. + if (Unit >= TRI->getNumRegUnits()) { + OS << "BadUnit~" << Unit; + return; + } - // Normal units have at least one root. - MCRegUnitRootIterator Roots(Unit, TRI); - assert(Roots.isValid() && "Unit has no roots."); - OS << TRI->getName(*Roots); - for (++Roots; Roots.isValid(); ++Roots) - OS << '~' << TRI->getName(*Roots); + // Normal units have at least one root. + MCRegUnitRootIterator Roots(Unit, TRI); + assert(Roots.isValid() && "Unit has no roots."); + OS << TRI->getName(*Roots); + for (++Roots; Roots.isValid(); ++Roots) + OS << '~' << TRI->getName(*Roots); + }); } -void PrintVRegOrUnit::print(raw_ostream &OS) const { - if (TRI && TRI->isVirtualRegister(Unit)) { - OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); - return; - } - PrintRegUnit::print(OS); +Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { + return Printable([Unit, TRI](raw_ostream &OS) { + if (TRI && TRI->isVirtualRegister(Unit)) { + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); + } else { + OS << PrintRegUnit(Unit, TRI); + } + }); } +Printable PrintLaneMask(LaneBitmask LaneMask) { + return Printable([LaneMask](raw_ostream &OS) { + OS << format("%08X", LaneMask); + }); +} + +} // End of llvm namespace + /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. const TargetRegisterClass * @@ -161,16 +184,24 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, static inline const TargetRegisterClass *firstCommonClass(const uint32_t *A, const uint32_t *B, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + const MVT::SimpleValueType SVT = + MVT::SimpleValueType::Any) { + const MVT VT(SVT); for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) - if (unsigned Common = *A++ & *B++) - return TRI->getRegClass(I + countTrailingZeros(Common)); + if (unsigned Common = *A++ & *B++) { + const TargetRegisterClass *RC = + TRI->getRegClass(I + countTrailingZeros(Common)); + if (SVT == MVT::SimpleValueType::Any || RC->hasType(VT)) + return RC; + } return nullptr; } const TargetRegisterClass * TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B) const { + const TargetRegisterClass *B, + const MVT::SimpleValueType SVT) const { // First take care of the trivial cases. if (A == B) return A; @@ -179,7 +210,7 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, // Register classes are ordered topologically, so the largest common // sub-class it the common sub-class with the smallest ID. - return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this); + return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this, SVT); } const TargetRegisterClass * @@ -260,13 +291,55 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, return BestRC; } +/// \brief Check if the registers defined by the pair (RegisterClass, SubReg) +/// share the same register file. +static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, + const TargetRegisterClass *DefRC, + unsigned DefSubReg, + const TargetRegisterClass *SrcRC, + unsigned SrcSubReg) { + // Same register class. + if (DefRC == SrcRC) + return true; + + // Both operands are sub registers. Check if they share a register class. + unsigned SrcIdx, DefIdx; + if (SrcSubReg && DefSubReg) { + return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, + SrcIdx, DefIdx) != nullptr; + } + + // At most one of the register is a sub register, make it Src to avoid + // duplicating the test. + if (!SrcSubReg) { + std::swap(DefSubReg, SrcSubReg); + std::swap(DefRC, SrcRC); + } + + // One of the register is a sub register, check if we can get a superclass. + if (SrcSubReg) + return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr; + + // Plain copy. + return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; +} + +bool TargetRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC, + unsigned DefSubReg, + const TargetRegisterClass *SrcRC, + unsigned SrcSubReg) const { + // If this source does not incur a cross register bank copy, use it. + return shareSameRegisterFile(*this, DefRC, DefSubReg, SrcRC, SrcSubReg); +} + // Compute target-independent register allocator hints to help eliminate copies. void TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, ArrayRef Order, SmallVectorImpl &Hints, const MachineFunction &MF, - const VirtRegMap *VRM) const { + const VirtRegMap *VRM, + const LiveRegMatrix *Matrix) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); std::pair Hint = MRI.getRegAllocationHint(VirtReg); @@ -295,6 +368,26 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, Hints.push_back(Phys); } +bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const { + return !MF.getFunction()->hasFnAttribute("no-realign-stack"); +} + +bool TargetRegisterInfo::needsStackRealignment( + const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const Function *F = MF.getFunction(); + unsigned StackAlign = TFI->getStackAlignment(); + bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || + F->hasFnAttribute(Attribute::StackAlignment)); + if (MF.getFunction()->hasFnAttribute("stackrealign") || requiresRealignment) { + if (canRealignStack(MF)) + return true; + DEBUG(dbgs() << "Can't realign function's stack: " << F->getName() << "\n"); + } + return false; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex, diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index 299380d9268b..fc656396ade8 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -211,11 +211,9 @@ unsigned TargetSchedModel::computeOperandLatency( if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef() && SchedModel.isComplete()) { - std::string Err; - raw_string_ostream ss(Err); - ss << "DefIdx " << DefIdx << " exceeds machine model writes for " - << *DefMI; - report_fatal_error(ss.str()); + errs() << "DefIdx " << DefIdx << " exceeds machine model writes for " + << *DefMI; + llvm_unreachable("incomplete machine model"); } #endif // FIXME: Automatically giving all implicit defs defaultDefLatency is diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 1e30821dc741..c6bae2434586 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -83,21 +83,20 @@ class TwoAddressInstructionPass : public MachineFunctionPass { // The current basic block being processed. MachineBasicBlock *MBB; - // DistanceMap - Keep track the distance of a MI from the start of the - // current basic block. + // Keep track the distance of a MI from the start of the current basic block. DenseMap DistanceMap; // Set of already processed instructions in the current block. SmallPtrSet Processed; - // SrcRegMap - A map from virtual registers to physical registers which are - // likely targets to be coalesced to due to copies from physical registers to - // virtual registers. e.g. v1024 = move r0. + // A map from virtual registers to physical registers which are likely targets + // to be coalesced to due to copies from physical registers to virtual + // registers. e.g. v1024 = move r0. DenseMap SrcRegMap; - // DstRegMap - A map from virtual registers to physical registers which are - // likely targets to be coalesced to due to copies to physical registers from - // virtual registers. e.g. r1 = move v1024. + // A map from virtual registers to physical registers which are likely targets + // to be coalesced to due to copies to physical registers from virtual + // registers. e.g. r1 = move v1024. DenseMap DstRegMap; bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg, @@ -110,8 +109,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, MachineInstr *MI, unsigned Dist); - bool commuteInstruction(MachineBasicBlock::iterator &mi, - unsigned RegB, unsigned RegC, unsigned Dist); + bool commuteInstruction(MachineInstr *MI, + unsigned RegBIdx, unsigned RegCIdx, unsigned Dist); bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB); @@ -133,6 +132,11 @@ class TwoAddressInstructionPass : public MachineFunctionPass { unsigned SrcIdx, unsigned DstIdx, unsigned Dist, bool shouldOnlyCommute); + bool tryInstructionCommute(MachineInstr *MI, + unsigned DstOpIdx, + unsigned BaseOpIdx, + bool BaseOpKilled, + unsigned Dist); void scanUses(unsigned DstReg); void processCopy(MachineInstr *MI); @@ -151,7 +155,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); @@ -160,7 +164,7 @@ public: MachineFunctionPass::getAnalysisUsage(AU); } - /// runOnMachineFunction - Pass entry point. + /// Pass entry point. bool runOnMachineFunction(MachineFunction&) override; }; } // end anonymous namespace @@ -168,7 +172,7 @@ public: char TwoAddressInstructionPass::ID = 0; INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction", "Two-Address instruction pass", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction", "Two-Address instruction pass", false, false) @@ -176,10 +180,9 @@ char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS); -/// sink3AddrInstruction - A two-address instruction has been converted to a -/// three-address instruction to avoid clobbering a register. Try to sink it -/// past the instruction that would kill the above mentioned register to reduce -/// register pressure. +/// A two-address instruction has been converted to a three-address instruction +/// to avoid clobbering a register. Try to sink it past the instruction that +/// would kill the above mentioned register to reduce register pressure. bool TwoAddressInstructionPass:: sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, MachineBasicBlock::iterator OldPos) { @@ -195,8 +198,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, unsigned DefReg = 0; SmallSet UseRegs; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; unsigned MOReg = MO.getReg(); @@ -231,10 +233,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, KillMI = LIS->getInstructionFromIndex(I->end); } if (!KillMI) { - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(SavedReg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineOperand &UseMO = *UI; + for (MachineOperand &UseMO : MRI->use_nodbg_operands(SavedReg)) { if (!UseMO.isKill()) continue; KillMI = UseMO.getParent(); @@ -312,8 +311,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, return true; } -/// getSingleDef -- return the MachineInstr* if it is the single def of the Reg -/// in current BB. +/// Return the MachineInstr* if it is the single def of the Reg in current BB. static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB, const MachineRegisterInfo *MRI) { MachineInstr *Ret = nullptr; @@ -351,10 +349,10 @@ bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg, return false; } -/// noUseAfterLastDef - Return true if there are no intervening uses between the -/// last instruction in the MBB that defines the specified register and the -/// two-address instruction which is being processed. It also returns the last -/// def location by reference +/// Return true if there are no intervening uses between the last instruction +/// in the MBB that defines the specified register and the two-address +/// instruction which is being processed. It also returns the last def location +/// by reference. bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef) { LastDef = 0; @@ -375,9 +373,9 @@ bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist, return !(LastUse > LastDef && LastUse < Dist); } -/// isCopyToReg - Return true if the specified MI is a copy instruction or -/// a extract_subreg instruction. It also returns the source and destination -/// registers and whether they are physical registers by reference. +/// Return true if the specified MI is a copy instruction or an extract_subreg +/// instruction. It also returns the source and destination registers and +/// whether they are physical registers by reference. static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, unsigned &SrcReg, unsigned &DstReg, bool &IsSrcPhys, bool &IsDstPhys) { @@ -397,8 +395,8 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, return true; } -/// isPLainlyKilled - Test if the given register value, which is used by the -// given instruction, is killed by the given instruction. +/// Test if the given register value, which is used by the +/// given instruction, is killed by the given instruction. static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS) { if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) && @@ -424,7 +422,7 @@ static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, return MI->killsRegister(Reg); } -/// isKilled - Test if the given register value, which is used by the given +/// Test if the given register value, which is used by the given /// instruction, is killed by the given instruction. This looks through /// coalescable copies to see if the original value is potentially not killed. /// @@ -472,8 +470,8 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, } } -/// isTwoAddrUse - Return true if the specified MI uses the specified register -/// as a two-address use. If so, return the destination register by reference. +/// Return true if the specified MI uses the specified register as a two-address +/// use. If so, return the destination register by reference. static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) { const MachineOperand &MO = MI.getOperand(i); @@ -488,8 +486,8 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { return false; } -/// findOnlyInterestingUse - Given a register, if has a single in-basic block -/// use, return the use instruction if it's a copy or a two-address use. +/// Given a register, if has a single in-basic block use, return the use +/// instruction if it's a copy or a two-address use. static MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, MachineRegisterInfo *MRI, @@ -516,8 +514,8 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, return nullptr; } -/// getMappedReg - Return the physical register the specified virtual register -/// might be mapped to. +/// Return the physical register the specified virtual register might be mapped +/// to. static unsigned getMappedReg(unsigned Reg, DenseMap &RegMap) { while (TargetRegisterInfo::isVirtualRegister(Reg)) { @@ -531,8 +529,7 @@ getMappedReg(unsigned Reg, DenseMap &RegMap) { return 0; } -/// regsAreCompatible - Return true if the two registers are equal or aliased. -/// +/// Return true if the two registers are equal or aliased. static bool regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { if (RegA == RegB) @@ -543,8 +540,8 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { } -/// isProfitableToCommute - Return true if it's potentially profitable to commute -/// the two-address instruction that's being processed. +/// Return true if it's potentially profitable to commute the two-address +/// instruction that's being processed. bool TwoAddressInstructionPass:: isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, @@ -642,15 +639,15 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, return LastDefB && LastDefC && LastDefC > LastDefB; } -/// commuteInstruction - Commute a two-address instruction and update the basic -/// block, distance map, and live variables if needed. Return true if it is -/// successful. -bool TwoAddressInstructionPass:: -commuteInstruction(MachineBasicBlock::iterator &mi, - unsigned RegB, unsigned RegC, unsigned Dist) { - MachineInstr *MI = mi; +/// Commute a two-address instruction and update the basic block, distance map, +/// and live variables if needed. Return true if it is successful. +bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, + unsigned RegBIdx, + unsigned RegCIdx, + unsigned Dist) { + unsigned RegC = MI->getOperand(RegCIdx).getReg(); DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); - MachineInstr *NewMI = TII->commuteInstruction(MI); + MachineInstr *NewMI = TII->commuteInstruction(MI, false, RegBIdx, RegCIdx); if (NewMI == nullptr) { DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n"); @@ -672,8 +669,8 @@ commuteInstruction(MachineBasicBlock::iterator &mi, return true; } -/// isProfitableToConv3Addr - Return true if it is profitable to convert the -/// given 2-address instruction to a 3-address one. +/// Return true if it is profitable to convert the given 2-address instruction +/// to a 3-address one. bool TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){ // Look for situations like this: @@ -689,17 +686,18 @@ TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){ return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI)); } -/// convertInstTo3Addr - Convert the specified two-address instruction into a -/// three address one. Return true if this transformation was successful. +/// Convert the specified two-address instruction into a three address one. +/// Return true if this transformation was successful. bool TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned RegA, unsigned RegB, unsigned Dist) { // FIXME: Why does convertToThreeAddress() need an iterator reference? - MachineFunction::iterator MFI = MBB; + MachineFunction::iterator MFI = MBB->getIterator(); MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV); - assert(MBB == MFI && "convertToThreeAddress changed iterator reference"); + assert(MBB->getIterator() == MFI && + "convertToThreeAddress changed iterator reference"); if (!NewMI) return false; @@ -730,8 +728,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, return true; } -/// scanUses - Scan forward recursively for only uses, update maps if the use -/// is a copy or a two-address instruction. +/// Scan forward recursively for only uses, update maps if the use is a copy or +/// a two-address instruction. void TwoAddressInstructionPass::scanUses(unsigned DstReg) { SmallVector VirtRegPairs; @@ -777,8 +775,8 @@ TwoAddressInstructionPass::scanUses(unsigned DstReg) { } } -/// processCopy - If the specified instruction is not yet processed, process it -/// if it's a copy. For a copy instruction, we find the physical registers the +/// If the specified instruction is not yet processed, process it if it's a +/// copy. For a copy instruction, we find the physical registers the /// source and destination registers might be mapped to. These are kept in /// point-to maps used to determine future optimizations. e.g. /// v1024 = mov r0 @@ -813,9 +811,9 @@ void TwoAddressInstructionPass::processCopy(MachineInstr *MI) { return; } -/// rescheduleMIBelowKill - If there is one more local instruction that reads -/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill -/// instruction in order to eliminate the need for the copy. +/// If there is one more local instruction that reads 'Reg' and it kills 'Reg, +/// consider moving the instruction below the kill instruction in order to +/// eliminate the need for the copy. bool TwoAddressInstructionPass:: rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, @@ -871,8 +869,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, SmallSet Uses; SmallSet Kills; SmallSet Defs; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; unsigned MOReg = MO.getReg(); @@ -914,8 +911,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, OtherMI->isBranch() || OtherMI->isTerminator()) // Don't move pass calls, etc. return false; - for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = OtherMI->getOperand(i); + for (const MachineOperand &MO : OtherMI->operands()) { if (!MO.isReg()) continue; unsigned MOReg = MO.getReg(); @@ -984,8 +980,8 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, return true; } -/// isDefTooClose - Return true if the re-scheduling will put the given -/// instruction too close to the defs of its register dependencies. +/// Return true if the re-scheduling will put the given instruction too close +/// to the defs of its register dependencies. bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI) { for (MachineInstr &DefMI : MRI->def_instructions(Reg)) { @@ -1004,10 +1000,9 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, return false; } -/// rescheduleKillAboveMI - If there is one more local instruction that reads -/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the -/// current two-address instruction in order to eliminate the need for the -/// copy. +/// If there is one more local instruction that reads 'Reg' and it kills 'Reg, +/// consider moving the kill instruction above the current two-address +/// instruction in order to eliminate the need for the copy. bool TwoAddressInstructionPass:: rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, @@ -1055,8 +1050,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, SmallSet Kills; SmallSet Defs; SmallSet LiveDefs; - for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = KillMI->getOperand(i); + for (const MachineOperand &MO : KillMI->operands()) { if (!MO.isReg()) continue; unsigned MOReg = MO.getReg(); @@ -1094,8 +1088,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, // Don't move pass calls, etc. return false; SmallVector OtherDefs; - for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = OtherMI->getOperand(i); + for (const MachineOperand &MO : OtherMI->operands()) { if (!MO.isReg()) continue; unsigned MOReg = MO.getReg(); @@ -1155,13 +1148,68 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, return true; } -/// tryInstructionTransform - For the case where an instruction has a single -/// pair of tied register operands, attempt some transformations that may -/// either eliminate the tied operands or improve the opportunities for -/// coalescing away the register copy. Returns true if no copy needs to be -/// inserted to untie mi's operands (either because they were untied, or -/// because mi was rescheduled, and will be visited again later). If the -/// shouldOnlyCommute flag is true, only instruction commutation is attempted. +/// Tries to commute the operand 'BaseOpIdx' and some other operand in the +/// given machine instruction to improve opportunities for coalescing and +/// elimination of a register to register copy. +/// +/// 'DstOpIdx' specifies the index of MI def operand. +/// 'BaseOpKilled' specifies if the register associated with 'BaseOpIdx' +/// operand is killed by the given instruction. +/// The 'Dist' arguments provides the distance of MI from the start of the +/// current basic block and it is used to determine if it is profitable +/// to commute operands in the instruction. +/// +/// Returns true if the transformation happened. Otherwise, returns false. +bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, + unsigned DstOpIdx, + unsigned BaseOpIdx, + bool BaseOpKilled, + unsigned Dist) { + unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg(); + unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg(); + unsigned OpsNum = MI->getDesc().getNumOperands(); + unsigned OtherOpIdx = MI->getDesc().getNumDefs(); + for (; OtherOpIdx < OpsNum; OtherOpIdx++) { + // The call of findCommutedOpIndices below only checks if BaseOpIdx + // and OtherOpIdx are commutable, it does not really search for + // other commutable operands and does not change the values of passed + // variables. + if (OtherOpIdx == BaseOpIdx || + !TII->findCommutedOpIndices(MI, BaseOpIdx, OtherOpIdx)) + continue; + + unsigned OtherOpReg = MI->getOperand(OtherOpIdx).getReg(); + bool AggressiveCommute = false; + + // If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp + // operands. This makes the live ranges of DstOp and OtherOp joinable. + bool DoCommute = + !BaseOpKilled && isKilled(*MI, OtherOpReg, MRI, TII, LIS, false); + + if (!DoCommute && + isProfitableToCommute(DstOpReg, BaseOpReg, OtherOpReg, MI, Dist)) { + DoCommute = true; + AggressiveCommute = true; + } + + // If it's profitable to commute, try to do so. + if (DoCommute && commuteInstruction(MI, BaseOpIdx, OtherOpIdx, Dist)) { + ++NumCommuted; + if (AggressiveCommute) + ++NumAggrCommuted; + return true; + } + } + return false; +} + +/// For the case where an instruction has a single pair of tied register +/// operands, attempt some transformations that may either eliminate the tied +/// operands or improve the opportunities for coalescing away the register copy. +/// Returns true if no copy needs to be inserted to untie mi's operands +/// (either because they were untied, or because mi was rescheduled, and will +/// be visited again later). If the shouldOnlyCommute flag is true, only +/// instruction commutation is attempted. bool TwoAddressInstructionPass:: tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, @@ -1181,51 +1229,18 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, if (TargetRegisterInfo::isVirtualRegister(regA)) scanUses(regA); - // Check if it is profitable to commute the operands. - unsigned SrcOp1, SrcOp2; - unsigned regC = 0; - unsigned regCIdx = ~0U; - bool TryCommute = false; - bool AggressiveCommute = false; - if (MI.isCommutable() && MI.getNumOperands() >= 3 && - TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) { - if (SrcIdx == SrcOp1) - regCIdx = SrcOp2; - else if (SrcIdx == SrcOp2) - regCIdx = SrcOp1; - - if (regCIdx != ~0U) { - regC = MI.getOperand(regCIdx).getReg(); - if (!regBKilled && isKilled(MI, regC, MRI, TII, LIS, false)) - // If C dies but B does not, swap the B and C operands. - // This makes the live ranges of A and C joinable. - TryCommute = true; - else if (isProfitableToCommute(regA, regB, regC, &MI, Dist)) { - TryCommute = true; - AggressiveCommute = true; - } - } - } + bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist); // If the instruction is convertible to 3 Addr, instead // of returning try 3 Addr transformation aggresively and // use this variable to check later. Because it might be better. // For example, we can just use `leal (%rsi,%rdi), %eax` and `ret` // instead of the following code. - // addl %esi, %edi - // movl %edi, %eax + // addl %esi, %edi + // movl %edi, %eax // ret - bool Commuted = false; - - // If it's profitable to commute, try to do so. - if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) { - Commuted = true; - ++NumCommuted; - if (AggressiveCommute) - ++NumAggrCommuted; - if (!MI.isConvertibleTo3Addr()) - return false; - } + if (Commuted && !MI.isConvertibleTo3Addr()) + return false; if (shouldOnlyCommute) return false; @@ -1237,6 +1252,13 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, return true; } + // If we commuted, regB may have changed so we should re-sample it to avoid + // confusing the three address conversion below. + if (Commuted) { + regB = MI.getOperand(SrcIdx).getReg(); + regBKilled = isKilled(MI, regB, MRI, TII, LIS, true); + } + if (MI.isConvertibleTo3Addr()) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. @@ -1348,10 +1370,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, SmallVector OrigRegs; if (LIS) { - for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), - MOE = MI.operands_end(); MOI != MOE; ++MOI) { - if (MOI->isReg()) - OrigRegs.push_back(MOI->getReg()); + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg()) + OrigRegs.push_back(MO.getReg()); } } @@ -1536,12 +1557,10 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, SrcRegMap[RegA] = RegB; } - if (AllUsesCopied) { if (!IsEarlyClobber) { // Replace other (un-tied) uses of regB with LastCopiedReg. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (MachineOperand &MO : MI->operands()) { if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB && MO.isUse()) { if (MO.isKill()) { @@ -1578,8 +1597,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // regB is still used in this instruction, but a kill flag was // removed from a different tied use of regB, so now we need to add // a kill flag to one of the remaining uses of regB. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (MachineOperand &MO : MI->operands()) { if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { MO.setIsKill(true); break; @@ -1588,8 +1606,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } } -/// runOnMachineFunction - Reduce two-address instructions to two operands. -/// +/// Reduce two-address instructions to two operands. bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { MF = &Func; const TargetMachine &TM = MF->getTarget(); @@ -1599,7 +1616,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { InstrItins = MF->getSubtarget().getInstrItineraryData(); LV = getAnalysisIfAvailable(); LIS = getAnalysisIfAvailable(); - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); OptLevel = TM.getOptLevel(); bool MadeChange = false; @@ -1614,7 +1631,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { TiedOperandMap TiedOperands; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { - MBB = MBBI; + MBB = &*MBBI; unsigned Dist = 0; DistanceMap.clear(); SrcRegMap.clear(); @@ -1661,8 +1678,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { unsigned DstReg = mi->getOperand(DstIdx).getReg(); if (SrcReg != DstReg && tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) { - // The tied operands have been eliminated or shifted further down the - // block to ease elimination. Continue processing with 'nmi'. + // The tied operands have been eliminated or shifted further down + // the block to ease elimination. Continue processing with 'nmi'. TiedOperands.clear(); mi = nmi; continue; @@ -1671,9 +1688,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { } // Now iterate over the information collected above. - for (TiedOperandMap::iterator OI = TiedOperands.begin(), - OE = TiedOperands.end(); OI != OE; ++OI) { - processTiedPairs(mi, OI->second, Dist); + for (auto &TO : TiedOperands) { + processTiedPairs(mi, TO.second, Dist); DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); } diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index d393e103104d..8c9631e435bf 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -71,8 +71,8 @@ bool UnreachableBlockElim::runOnFunction(Function &F) { // in them. std::vector DeadBlocks; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) - if (!Reachable.count(I)) { - BasicBlock *BB = I; + if (!Reachable.count(&*I)) { + BasicBlock *BB = &*I; DeadBlocks.push_back(BB); while (PHINode *PN = dyn_cast(BB->begin())) { PN->replaceAllUsesWith(Constant::getNullValue(PN->getType())); @@ -131,7 +131,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { // in them. std::vector DeadBlocks; for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { - MachineBasicBlock *BB = I; + MachineBasicBlock *BB = &*I; // Test for deadness. if (!Reachable.count(BB)) { @@ -167,7 +167,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { // Cleanup PHI nodes. for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { - MachineBasicBlock *BB = I; + MachineBasicBlock *BB = &*I; // Prune unneeded PHI entries. SmallPtrSet preds(BB->pred_begin(), BB->pred_end()); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 2912bdd63426..bf1c0dce9e56 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -163,11 +163,12 @@ class VirtRegRewriter : public MachineFunctionPass { SlotIndexes *Indexes; LiveIntervals *LIS; VirtRegMap *VRM; - SparseSet PhysRegs; void rewrite(); void addMBBLiveIns(); bool readsUndefSubreg(const MachineOperand &MO) const; + void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const; + public: static char ID; VirtRegRewriter() : MachineFunctionPass(ID) {} @@ -237,10 +238,52 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { return true; } +void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI, + unsigned PhysReg) const { + assert(!LI.empty()); + assert(LI.hasSubRanges()); + + typedef std::pair SubRangeIteratorPair; + SmallVector SubRanges; + SlotIndex First; + SlotIndex Last; + for (const LiveInterval::SubRange &SR : LI.subranges()) { + SubRanges.push_back(std::make_pair(&SR, SR.begin())); + if (!First.isValid() || SR.segments.front().start < First) + First = SR.segments.front().start; + if (!Last.isValid() || SR.segments.back().end > Last) + Last = SR.segments.back().end; + } + + // Check all mbb start positions between First and Last while + // simulatenously advancing an iterator for each subrange. + for (SlotIndexes::MBBIndexIterator MBBI = Indexes->findMBBIndex(First); + MBBI != Indexes->MBBIndexEnd() && MBBI->first <= Last; ++MBBI) { + SlotIndex MBBBegin = MBBI->first; + // Advance all subrange iterators so that their end position is just + // behind MBBBegin (or the iterator is at the end). + LaneBitmask LaneMask = 0; + for (auto &RangeIterPair : SubRanges) { + const LiveInterval::SubRange *SR = RangeIterPair.first; + LiveInterval::const_iterator &SRI = RangeIterPair.second; + while (SRI != SR->end() && SRI->end <= MBBBegin) + ++SRI; + if (SRI == SR->end()) + continue; + if (SRI->start <= MBBBegin) + LaneMask |= SR->LaneMask; + } + if (LaneMask == 0) + continue; + MachineBasicBlock *MBB = MBBI->second; + MBB->addLiveIn(PhysReg, LaneMask); + } +} + // Compute MBB live-in lists from virtual register live ranges and their // assignments. void VirtRegRewriter::addMBBLiveIns() { - SmallVector LiveIn; for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) { unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx); if (MRI->reg_nodbg_empty(VirtReg)) @@ -254,31 +297,18 @@ void VirtRegRewriter::addMBBLiveIns() { assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register."); if (LI.hasSubRanges()) { - for (LiveInterval::SubRange &S : LI.subranges()) { - for (const auto &Seg : S.segments) { - if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn)) - continue; - for (MCSubRegIndexIterator SR(PhysReg, TRI); SR.isValid(); ++SR) { - unsigned SubReg = SR.getSubReg(); - unsigned SubRegIndex = SR.getSubRegIndex(); - unsigned SubRegLaneMask = TRI->getSubRegIndexLaneMask(SubRegIndex); - if ((SubRegLaneMask & S.LaneMask) == 0) - continue; - for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) { - LiveIn[i]->addLiveIn(SubReg); - } - } - LiveIn.clear(); - } - } + addLiveInsForSubRanges(LI, PhysReg); } else { - // Scan the segments of LI. - for (const auto &Seg : LI.segments) { - if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn)) - continue; - for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) - LiveIn[i]->addLiveIn(PhysReg); - LiveIn.clear(); + // Go over MBB begin positions and see if we have segments covering them. + // The following works because segments and the MBBIndex list are both + // sorted by slot indexes. + SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin(); + for (const auto &Seg : LI) { + I = Indexes->advanceMBBIndex(I, Seg.start); + for (; I != Indexes->MBBIndexEnd() && I->first < Seg.end; ++I) { + MachineBasicBlock *MBB = I->second; + MBB->addLiveIn(PhysReg); + } } } } @@ -305,7 +335,7 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const { assert(LI.liveAt(BaseIndex) && "Reads of completely dead register should be marked undef already"); unsigned SubRegIdx = MO.getSubReg(); - unsigned UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx); + LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx); // See if any of the relevant subregister liveranges is defined at this point. for (const LiveInterval::SubRange &SR : LI.subranges()) { if ((SR.LaneMask & UseMask) != 0 && SR.liveAt(BaseIndex)) @@ -319,54 +349,15 @@ void VirtRegRewriter::rewrite() { SmallVector SuperDeads; SmallVector SuperDefs; SmallVector SuperKills; - SmallPtrSet NoReturnInsts; - - // Here we have a SparseSet to hold which PhysRegs are actually encountered - // in the MF we are about to iterate over so that later when we call - // setPhysRegUsed, we are only doing it for physRegs that were actually found - // in the program and not for all of the possible physRegs for the given - // target architecture. If the target has a lot of physRegs, then for a small - // program there will be a significant compile time reduction here. - PhysRegs.clear(); - PhysRegs.setUniverse(TRI->getNumRegs()); - - // The function with uwtable should guarantee that the stack unwinder - // can unwind the stack to the previous frame. Thus, we can't apply the - // noreturn optimization if the caller function has uwtable attribute. - bool HasUWTable = MF->getFunction()->hasFnAttribute(Attribute::UWTable); for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { DEBUG(MBBI->print(dbgs(), Indexes)); - bool IsExitBB = MBBI->succ_empty(); for (MachineBasicBlock::instr_iterator MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) { - MachineInstr *MI = MII; + MachineInstr *MI = &*MII; ++MII; - // Check if this instruction is a call to a noreturn function. If this - // is a call to noreturn function and we don't need the stack unwinding - // functionality (i.e. this function does not have uwtable attribute and - // the callee function has the nounwind attribute), then we can ignore - // the definitions set by this instruction. - if (!HasUWTable && IsExitBB && MI->isCall()) { - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { - MachineOperand &MO = *MOI; - if (!MO.isGlobal()) - continue; - const Function *Func = dyn_cast(MO.getGlobal()); - if (!Func || !Func->hasFnAttribute(Attribute::NoReturn) || - // We need to keep correct unwind information - // even if the function will not return, since the - // runtime may need it. - !Func->hasFnAttribute(Attribute::NoUnwind)) - continue; - NoReturnInsts.insert(MI); - break; - } - } - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { MachineOperand &MO = *MOI; @@ -375,15 +366,6 @@ void VirtRegRewriter::rewrite() { if (MO.isRegMask()) MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); - // If we encounter a VirtReg or PhysReg then get at the PhysReg and add - // it to the physreg bitset. Later we use only the PhysRegs that were - // actually encountered in the MF to populate the MRI's used physregs. - if (MO.isReg() && MO.getReg()) - PhysRegs.insert( - TargetRegisterInfo::isVirtualRegister(MO.getReg()) ? - VRM->getPhys(MO.getReg()) : - MO.getReg()); - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; unsigned VirtReg = MO.getReg(); @@ -418,14 +400,6 @@ void VirtRegRewriter::rewrite() { MO.setIsUndef(true); } else if (!MO.isDead()) { assert(MO.isDef()); - // Things get tricky when we ran out of lane mask bits and - // merged multiple lanes into the overflow bit: In this case - // our subregister liveness tracking isn't precise and we can't - // know what subregister parts are undefined, fall back to the - // implicit super-register def then. - unsigned LaneMask = TRI->getSubRegIndexLaneMask(SubReg); - if (TargetRegisterInfo::isImpreciseLaneMask(LaneMask)) - SuperDefs.push_back(PhysReg); } } @@ -470,29 +444,5 @@ void VirtRegRewriter::rewrite() { } } } - - // Tell MRI about physical registers in use. - if (NoReturnInsts.empty()) { - for (SparseSet::iterator - RegI = PhysRegs.begin(), E = PhysRegs.end(); RegI != E; ++RegI) - if (!MRI->reg_nodbg_empty(*RegI)) - MRI->setPhysRegUsed(*RegI); - } else { - for (SparseSet::iterator - I = PhysRegs.begin(), E = PhysRegs.end(); I != E; ++I) { - unsigned Reg = *I; - if (MRI->reg_nodbg_empty(Reg)) - continue; - // Check if this register has a use that will impact the rest of the - // code. Uses in debug and noreturn instructions do not impact the - // generated code. - for (MachineInstr &It : MRI->reg_nodbg_instructions(Reg)) { - if (!NoReturnInsts.count(&It)) { - MRI->setPhysRegUsed(Reg); - break; - } - } - } - } } diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp index 0d26ed333ca7..52fb922c935a 100644 --- a/lib/CodeGen/WinEHPrepare.cpp +++ b/lib/CodeGen/WinEHPrepare.cpp @@ -18,66 +18,40 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Triple.h" -#include "llvm/ADT/TinyPtrVector.h" -#include "llvm/Analysis/LibCallSemantics.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/WinEHFuncInfo.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PatternMatch.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include +#include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; -using namespace llvm::PatternMatch; #define DEBUG_TYPE "winehprepare" +static cl::opt DisableDemotion( + "disable-demotion", cl::Hidden, + cl::desc( + "Clone multicolor basic blocks but do not demote cross funclet values"), + cl::init(false)); + +static cl::opt DisableCleanups( + "disable-cleanups", cl::Hidden, + cl::desc("Do not remove implausible terminators or other similar cleanups"), + cl::init(false)); + namespace { - -// This map is used to model frame variable usage during outlining, to -// construct a structure type to hold the frame variables in a frame -// allocation block, and to remap the frame variable allocas (including -// spill locations as needed) to GEPs that get the variable from the -// frame allocation structure. -typedef MapVector> FrameVarInfoMap; - -// TinyPtrVector cannot hold nullptr, so we need our own sentinel that isn't -// quite null. -AllocaInst *getCatchObjectSentinel() { - return static_cast(nullptr) + 1; -} - -typedef SmallSet VisitedBlockSet; - -class LandingPadActions; -class LandingPadMap; - -typedef DenseMap CatchHandlerMapTy; -typedef DenseMap CleanupHandlerMapTy; - + class WinEHPrepare : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid. - WinEHPrepare(const TargetMachine *TM = nullptr) - : FunctionPass(ID) { - if (TM) - TheTriple = TM->getTargetTriple(); - } + WinEHPrepare(const TargetMachine *TM = nullptr) : FunctionPass(ID) {} bool runOnFunction(Function &Fn) override; @@ -90,264 +64,27 @@ public: } private: - bool prepareExceptionHandlers(Function &F, - SmallVectorImpl &LPads); - void identifyEHBlocks(Function &F, SmallVectorImpl &LPads); - void promoteLandingPadValues(LandingPadInst *LPad); - void demoteValuesLiveAcrossHandlers(Function &F, - SmallVectorImpl &LPads); - void findSEHEHReturnPoints(Function &F, - SetVector &EHReturnBlocks); - void findCXXEHReturnPoints(Function &F, - SetVector &EHReturnBlocks); - void getPossibleReturnTargets(Function *ParentF, Function *HandlerF, - SetVector &Targets); - void completeNestedLandingPad(Function *ParentFn, - LandingPadInst *OutlinedLPad, - const LandingPadInst *OriginalLPad, - FrameVarInfoMap &VarInfo); - Function *createHandlerFunc(Function *ParentFn, Type *RetTy, - const Twine &Name, Module *M, Value *&ParentFP); - bool outlineHandler(ActionHandler *Action, Function *SrcFn, - LandingPadInst *LPad, BasicBlock *StartBB, - FrameVarInfoMap &VarInfo); - void addStubInvokeToHandlerIfNeeded(Function *Handler); + void insertPHIStores(PHINode *OriginalPHI, AllocaInst *SpillSlot); + void + insertPHIStore(BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot, + SmallVectorImpl> &Worklist); + AllocaInst *insertPHILoads(PHINode *PN, Function &F); + void replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot, + DenseMap &Loads, Function &F); + bool prepareExplicitEH(Function &F); + void colorFunclets(Function &F); - void mapLandingPadBlocks(LandingPadInst *LPad, LandingPadActions &Actions); - CatchHandler *findCatchHandler(BasicBlock *BB, BasicBlock *&NextBB, - VisitedBlockSet &VisitedBlocks); - void findCleanupHandlers(LandingPadActions &Actions, BasicBlock *StartBB, - BasicBlock *EndBB); - - void processSEHCatchHandler(CatchHandler *Handler, BasicBlock *StartBB); - - Triple TheTriple; + void demotePHIsOnFunclets(Function &F); + void cloneCommonBlocks(Function &F); + void removeImplausibleInstructions(Function &F); + void cleanupPreparedFunclets(Function &F); + void verifyPreparedFunclets(Function &F); // All fields are reset by runOnFunction. - DominatorTree *DT = nullptr; - const TargetLibraryInfo *LibInfo = nullptr; EHPersonality Personality = EHPersonality::Unknown; - CatchHandlerMapTy CatchHandlerMap; - CleanupHandlerMapTy CleanupHandlerMap; - DenseMap LPadMaps; - SmallPtrSet NormalBlocks; - SmallPtrSet EHBlocks; - SetVector EHReturnBlocks; - // This maps landing pad instructions found in outlined handlers to - // the landing pad instruction in the parent function from which they - // were cloned. The cloned/nested landing pad is used as the key - // because the landing pad may be cloned into multiple handlers. - // This map will be used to add the llvm.eh.actions call to the nested - // landing pads after all handlers have been outlined. - DenseMap NestedLPtoOriginalLP; - - // This maps blocks in the parent function which are destinations of - // catch handlers to cloned blocks in (other) outlined handlers. This - // handles the case where a nested landing pads has a catch handler that - // returns to a handler function rather than the parent function. - // The original block is used as the key here because there should only - // ever be one handler function from which the cloned block is not pruned. - // The original block will be pruned from the parent function after all - // handlers have been outlined. This map will be used to adjust the - // return instructions of handlers which return to the block that was - // outlined into a handler. This is done after all handlers have been - // outlined but before the outlined code is pruned from the parent function. - DenseMap LPadTargetBlocks; - - // Map from outlined handler to call to parent local address. Only used for - // 32-bit EH. - DenseMap HandlerToParentFP; - - AllocaInst *SEHExceptionCodeSlot = nullptr; -}; - -class WinEHFrameVariableMaterializer : public ValueMaterializer { -public: - WinEHFrameVariableMaterializer(Function *OutlinedFn, Value *ParentFP, - FrameVarInfoMap &FrameVarInfo); - ~WinEHFrameVariableMaterializer() override {} - - Value *materializeValueFor(Value *V) override; - - void escapeCatchObject(Value *V); - -private: - FrameVarInfoMap &FrameVarInfo; - IRBuilder<> Builder; -}; - -class LandingPadMap { -public: - LandingPadMap() : OriginLPad(nullptr) {} - void mapLandingPad(const LandingPadInst *LPad); - - bool isInitialized() { return OriginLPad != nullptr; } - - bool isOriginLandingPadBlock(const BasicBlock *BB) const; - bool isLandingPadSpecificInst(const Instruction *Inst) const; - - void remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue, - Value *SelectorValue) const; - -private: - const LandingPadInst *OriginLPad; - // We will normally only see one of each of these instructions, but - // if more than one occurs for some reason we can handle that. - TinyPtrVector ExtractedEHPtrs; - TinyPtrVector ExtractedSelectors; -}; - -class WinEHCloningDirectorBase : public CloningDirector { -public: - WinEHCloningDirectorBase(Function *HandlerFn, Value *ParentFP, - FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap) - : Materializer(HandlerFn, ParentFP, VarInfo), - SelectorIDType(Type::getInt32Ty(HandlerFn->getContext())), - Int8PtrType(Type::getInt8PtrTy(HandlerFn->getContext())), - LPadMap(LPadMap), ParentFP(ParentFP) {} - - CloningAction handleInstruction(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) override; - - virtual CloningAction handleBeginCatch(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) = 0; - virtual CloningAction handleEndCatch(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) = 0; - virtual CloningAction handleTypeIdFor(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) = 0; - virtual CloningAction handleIndirectBr(ValueToValueMapTy &VMap, - const IndirectBrInst *IBr, - BasicBlock *NewBB) = 0; - virtual CloningAction handleInvoke(ValueToValueMapTy &VMap, - const InvokeInst *Invoke, - BasicBlock *NewBB) = 0; - virtual CloningAction handleResume(ValueToValueMapTy &VMap, - const ResumeInst *Resume, - BasicBlock *NewBB) = 0; - virtual CloningAction handleCompare(ValueToValueMapTy &VMap, - const CmpInst *Compare, - BasicBlock *NewBB) = 0; - virtual CloningAction handleLandingPad(ValueToValueMapTy &VMap, - const LandingPadInst *LPad, - BasicBlock *NewBB) = 0; - - ValueMaterializer *getValueMaterializer() override { return &Materializer; } - -protected: - WinEHFrameVariableMaterializer Materializer; - Type *SelectorIDType; - Type *Int8PtrType; - LandingPadMap &LPadMap; - - /// The value representing the parent frame pointer. - Value *ParentFP; -}; - -class WinEHCatchDirector : public WinEHCloningDirectorBase { -public: - WinEHCatchDirector( - Function *CatchFn, Value *ParentFP, Value *Selector, - FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap, - DenseMap &NestedLPads, - DominatorTree *DT, SmallPtrSetImpl &EHBlocks) - : WinEHCloningDirectorBase(CatchFn, ParentFP, VarInfo, LPadMap), - CurrentSelector(Selector->stripPointerCasts()), - ExceptionObjectVar(nullptr), NestedLPtoOriginalLP(NestedLPads), - DT(DT), EHBlocks(EHBlocks) {} - - CloningAction handleBeginCatch(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) override; - CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst, - BasicBlock *NewBB) override; - CloningAction handleTypeIdFor(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) override; - CloningAction handleIndirectBr(ValueToValueMapTy &VMap, - const IndirectBrInst *IBr, - BasicBlock *NewBB) override; - CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke, - BasicBlock *NewBB) override; - CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume, - BasicBlock *NewBB) override; - CloningAction handleCompare(ValueToValueMapTy &VMap, const CmpInst *Compare, - BasicBlock *NewBB) override; - CloningAction handleLandingPad(ValueToValueMapTy &VMap, - const LandingPadInst *LPad, - BasicBlock *NewBB) override; - - Value *getExceptionVar() { return ExceptionObjectVar; } - TinyPtrVector &getReturnTargets() { return ReturnTargets; } - -private: - Value *CurrentSelector; - - Value *ExceptionObjectVar; - TinyPtrVector ReturnTargets; - - // This will be a reference to the field of the same name in the WinEHPrepare - // object which instantiates this WinEHCatchDirector object. - DenseMap &NestedLPtoOriginalLP; - DominatorTree *DT; - SmallPtrSetImpl &EHBlocks; -}; - -class WinEHCleanupDirector : public WinEHCloningDirectorBase { -public: - WinEHCleanupDirector(Function *CleanupFn, Value *ParentFP, - FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap) - : WinEHCloningDirectorBase(CleanupFn, ParentFP, VarInfo, - LPadMap) {} - - CloningAction handleBeginCatch(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) override; - CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst, - BasicBlock *NewBB) override; - CloningAction handleTypeIdFor(ValueToValueMapTy &VMap, - const Instruction *Inst, - BasicBlock *NewBB) override; - CloningAction handleIndirectBr(ValueToValueMapTy &VMap, - const IndirectBrInst *IBr, - BasicBlock *NewBB) override; - CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke, - BasicBlock *NewBB) override; - CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume, - BasicBlock *NewBB) override; - CloningAction handleCompare(ValueToValueMapTy &VMap, const CmpInst *Compare, - BasicBlock *NewBB) override; - CloningAction handleLandingPad(ValueToValueMapTy &VMap, - const LandingPadInst *LPad, - BasicBlock *NewBB) override; -}; - -class LandingPadActions { -public: - LandingPadActions() : HasCleanupHandlers(false) {} - - void insertCatchHandler(CatchHandler *Action) { Actions.push_back(Action); } - void insertCleanupHandler(CleanupHandler *Action) { - Actions.push_back(Action); - HasCleanupHandlers = true; - } - - bool includesCleanup() const { return HasCleanupHandlers; } - - SmallVectorImpl &actions() { return Actions; } - SmallVectorImpl::iterator begin() { return Actions.begin(); } - SmallVectorImpl::iterator end() { return Actions.end(); } - -private: - // Note that this class does not own the ActionHandler objects in this vector. - // The ActionHandlers are owned by the CatchHandlerMap and CleanupHandlerMap - // in the WinEHPrepare class. - SmallVector Actions; - bool HasCleanupHandlers; + DenseMap BlockColors; + MapVector> FuncletBlocks; }; } // end anonymous namespace @@ -361,2536 +98,987 @@ FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) { } bool WinEHPrepare::runOnFunction(Function &Fn) { - // No need to prepare outlined handlers. - if (Fn.hasFnAttribute("wineh-parent")) - return false; - - SmallVector LPads; - SmallVector Resumes; - for (BasicBlock &BB : Fn) { - if (auto *LP = BB.getLandingPadInst()) - LPads.push_back(LP); - if (auto *Resume = dyn_cast(BB.getTerminator())) - Resumes.push_back(Resume); - } - - // No need to prepare functions that lack landing pads. - if (LPads.empty()) + if (!Fn.hasPersonalityFn()) return false; // Classify the personality to see what kind of preparation we need. Personality = classifyEHPersonality(Fn.getPersonalityFn()); - // Do nothing if this is not an MSVC personality. - if (!isMSVCEHPersonality(Personality)) + // Do nothing if this is not a funclet-based personality. + if (!isFuncletEHPersonality(Personality)) return false; - DT = &getAnalysis().getDomTree(); - LibInfo = &getAnalysis().getTLI(); - - // If there were any landing pads, prepareExceptionHandlers will make changes. - prepareExceptionHandlers(Fn, LPads); - return true; + return prepareExplicitEH(Fn); } bool WinEHPrepare::doFinalization(Module &M) { return false; } -void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); -} +void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {} -static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler, - Constant *&Selector, BasicBlock *&NextBB); - -// Finds blocks reachable from the starting set Worklist. Does not follow unwind -// edges or blocks listed in StopPoints. -static void findReachableBlocks(SmallPtrSetImpl &ReachableBBs, - SetVector &Worklist, - const SetVector *StopPoints) { - while (!Worklist.empty()) { - BasicBlock *BB = Worklist.pop_back_val(); - - // Don't cross blocks that we should stop at. - if (StopPoints && StopPoints->count(BB)) - continue; - - if (!ReachableBBs.insert(BB).second) - continue; // Already visited. - - // Don't follow unwind edges of invokes. - if (auto *II = dyn_cast(BB->getTerminator())) { - Worklist.insert(II->getNormalDest()); - continue; - } - - // Otherwise, follow all successors. - Worklist.insert(succ_begin(BB), succ_end(BB)); - } -} - -// Attempt to find an instruction where a block can be split before -// a call to llvm.eh.begincatch and its operands. If the block -// begins with the begincatch call or one of its adjacent operands -// the block will not be split. -static Instruction *findBeginCatchSplitPoint(BasicBlock *BB, - IntrinsicInst *II) { - // If the begincatch call is already the first instruction in the block, - // don't split. - Instruction *FirstNonPHI = BB->getFirstNonPHI(); - if (II == FirstNonPHI) - return nullptr; - - // If either operand is in the same basic block as the instruction and - // isn't used by another instruction before the begincatch call, include it - // in the split block. - auto *Op0 = dyn_cast(II->getOperand(0)); - auto *Op1 = dyn_cast(II->getOperand(1)); - - Instruction *I = II->getPrevNode(); - Instruction *LastI = II; - - while (I == Op0 || I == Op1) { - // If the block begins with one of the operands and there are no other - // instructions between the operand and the begincatch call, don't split. - if (I == FirstNonPHI) - return nullptr; - - LastI = I; - I = I->getPrevNode(); - } - - // If there is at least one instruction in the block before the begincatch - // call and its operands, split the block at either the begincatch or - // its operand. - return LastI; -} - -/// Find all points where exceptional control rejoins normal control flow via -/// llvm.eh.endcatch. Add them to the normal bb reachability worklist. -void WinEHPrepare::findCXXEHReturnPoints( - Function &F, SetVector &EHReturnBlocks) { - for (auto BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { - BasicBlock *BB = BBI; - for (Instruction &I : *BB) { - if (match(&I, m_Intrinsic())) { - Instruction *SplitPt = - findBeginCatchSplitPoint(BB, cast(&I)); - if (SplitPt) { - // Split the block before the llvm.eh.begincatch call to allow - // cleanup and catch code to be distinguished later. - // Do not update BBI because we still need to process the - // portion of the block that we are splitting off. - SplitBlock(BB, SplitPt, DT); - break; - } - } - if (match(&I, m_Intrinsic())) { - // Split the block after the call to llvm.eh.endcatch if there is - // anything other than an unconditional branch, or if the successor - // starts with a phi. - auto *Br = dyn_cast(I.getNextNode()); - if (!Br || !Br->isUnconditional() || - isa(Br->getSuccessor(0)->begin())) { - DEBUG(dbgs() << "splitting block " << BB->getName() - << " with llvm.eh.endcatch\n"); - BBI = SplitBlock(BB, I.getNextNode(), DT); - } - // The next BB is normal control flow. - EHReturnBlocks.insert(BB->getTerminator()->getSuccessor(0)); - break; - } - } - } -} - -static bool isCatchAllLandingPad(const BasicBlock *BB) { - const LandingPadInst *LP = BB->getLandingPadInst(); - if (!LP) - return false; - unsigned N = LP->getNumClauses(); - return (N > 0 && LP->isCatch(N - 1) && - isa(LP->getClause(N - 1))); -} - -/// Find all points where exceptions control rejoins normal control flow via -/// selector dispatch. -void WinEHPrepare::findSEHEHReturnPoints( - Function &F, SetVector &EHReturnBlocks) { - for (auto BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { - BasicBlock *BB = BBI; - // If the landingpad is a catch-all, treat the whole lpad as if it is - // reachable from normal control flow. - // FIXME: This is imprecise. We need a better way of identifying where a - // catch-all starts and cleanups stop. As far as LLVM is concerned, there - // is no difference. - if (isCatchAllLandingPad(BB)) { - EHReturnBlocks.insert(BB); - continue; - } - - BasicBlock *CatchHandler; - BasicBlock *NextBB; - Constant *Selector; - if (isSelectorDispatch(BB, CatchHandler, Selector, NextBB)) { - // Split the edge if there are multiple predecessors. This creates a place - // where we can insert EH recovery code. - if (!CatchHandler->getSinglePredecessor()) { - DEBUG(dbgs() << "splitting EH return edge from " << BB->getName() - << " to " << CatchHandler->getName() << '\n'); - BBI = CatchHandler = SplitCriticalEdge( - BB, std::find(succ_begin(BB), succ_end(BB), CatchHandler)); - } - EHReturnBlocks.insert(CatchHandler); - } - } -} - -void WinEHPrepare::identifyEHBlocks(Function &F, - SmallVectorImpl &LPads) { - DEBUG(dbgs() << "Demoting values live across exception handlers in function " - << F.getName() << '\n'); - - // Build a set of all non-exceptional blocks and exceptional blocks. - // - Non-exceptional blocks are blocks reachable from the entry block while - // not following invoke unwind edges. - // - Exceptional blocks are blocks reachable from landingpads. Analysis does - // not follow llvm.eh.endcatch blocks, which mark a transition from - // exceptional to normal control. - - if (Personality == EHPersonality::MSVC_CXX) - findCXXEHReturnPoints(F, EHReturnBlocks); - else - findSEHEHReturnPoints(F, EHReturnBlocks); - - DEBUG({ - dbgs() << "identified the following blocks as EH return points:\n"; - for (BasicBlock *BB : EHReturnBlocks) - dbgs() << " " << BB->getName() << '\n'; - }); - -// Join points should not have phis at this point, unless they are a -// landingpad, in which case we will demote their phis later. -#ifndef NDEBUG - for (BasicBlock *BB : EHReturnBlocks) - assert((BB->isLandingPad() || !isa(BB->begin())) && - "non-lpad EH return block has phi"); -#endif - - // Normal blocks are the blocks reachable from the entry block and all EH - // return points. - SetVector Worklist; - Worklist = EHReturnBlocks; - Worklist.insert(&F.getEntryBlock()); - findReachableBlocks(NormalBlocks, Worklist, nullptr); - DEBUG({ - dbgs() << "marked the following blocks as normal:\n"; - for (BasicBlock *BB : NormalBlocks) - dbgs() << " " << BB->getName() << '\n'; - }); - - // Exceptional blocks are the blocks reachable from landingpads that don't - // cross EH return points. - Worklist.clear(); - for (auto *LPI : LPads) - Worklist.insert(LPI->getParent()); - findReachableBlocks(EHBlocks, Worklist, &EHReturnBlocks); - DEBUG({ - dbgs() << "marked the following blocks as exceptional:\n"; - for (BasicBlock *BB : EHBlocks) - dbgs() << " " << BB->getName() << '\n'; - }); - -} - -/// Ensure that all values live into and out of exception handlers are stored -/// in memory. -/// FIXME: This falls down when values are defined in one handler and live into -/// another handler. For example, a cleanup defines a value used only by a -/// catch handler. -void WinEHPrepare::demoteValuesLiveAcrossHandlers( - Function &F, SmallVectorImpl &LPads) { - DEBUG(dbgs() << "Demoting values live across exception handlers in function " - << F.getName() << '\n'); - - // identifyEHBlocks() should have been called before this function. - assert(!NormalBlocks.empty()); - - // Try to avoid demoting EH pointer and selector values. They get in the way - // of our pattern matching. - SmallPtrSet EHVals; - for (BasicBlock &BB : F) { - LandingPadInst *LP = BB.getLandingPadInst(); - if (!LP) - continue; - EHVals.insert(LP); - for (User *U : LP->users()) { - auto *EI = dyn_cast(U); - if (!EI) - continue; - EHVals.insert(EI); - for (User *U2 : EI->users()) { - if (auto *PN = dyn_cast(U2)) - EHVals.insert(PN); - } - } - } - - SetVector ArgsToDemote; - SetVector InstrsToDemote; - for (BasicBlock &BB : F) { - bool IsNormalBB = NormalBlocks.count(&BB); - bool IsEHBB = EHBlocks.count(&BB); - if (!IsNormalBB && !IsEHBB) - continue; // Blocks that are neither normal nor EH are unreachable. - for (Instruction &I : BB) { - for (Value *Op : I.operands()) { - // Don't demote static allocas, constants, and labels. - if (isa(Op) || isa(Op) || isa(Op)) - continue; - auto *AI = dyn_cast(Op); - if (AI && AI->isStaticAlloca()) - continue; - - if (auto *Arg = dyn_cast(Op)) { - if (IsEHBB) { - DEBUG(dbgs() << "Demoting argument " << *Arg - << " used by EH instr: " << I << "\n"); - ArgsToDemote.insert(Arg); - } - continue; - } - - // Don't demote EH values. - auto *OpI = cast(Op); - if (EHVals.count(OpI)) - continue; - - BasicBlock *OpBB = OpI->getParent(); - // If a value is produced and consumed in the same BB, we don't need to - // demote it. - if (OpBB == &BB) - continue; - bool IsOpNormalBB = NormalBlocks.count(OpBB); - bool IsOpEHBB = EHBlocks.count(OpBB); - if (IsNormalBB != IsOpNormalBB || IsEHBB != IsOpEHBB) { - DEBUG({ - dbgs() << "Demoting instruction live in-out from EH:\n"; - dbgs() << "Instr: " << *OpI << '\n'; - dbgs() << "User: " << I << '\n'; - }); - InstrsToDemote.insert(OpI); - } - } - } - } - - // Demote values live into and out of handlers. - // FIXME: This demotion is inefficient. We should insert spills at the point - // of definition, insert one reload in each handler that uses the value, and - // insert reloads in the BB used to rejoin normal control flow. - Instruction *AllocaInsertPt = F.getEntryBlock().getFirstInsertionPt(); - for (Instruction *I : InstrsToDemote) - DemoteRegToStack(*I, false, AllocaInsertPt); - - // Demote arguments separately, and only for uses in EH blocks. - for (Argument *Arg : ArgsToDemote) { - auto *Slot = new AllocaInst(Arg->getType(), nullptr, - Arg->getName() + ".reg2mem", AllocaInsertPt); - SmallVector Users(Arg->user_begin(), Arg->user_end()); - for (User *U : Users) { - auto *I = dyn_cast(U); - if (I && EHBlocks.count(I->getParent())) { - auto *Reload = new LoadInst(Slot, Arg->getName() + ".reload", false, I); - U->replaceUsesOfWith(Arg, Reload); - } - } - new StoreInst(Arg, Slot, AllocaInsertPt); - } - - // Demote landingpad phis, as the landingpad will be removed from the machine - // CFG. - for (LandingPadInst *LPI : LPads) { - BasicBlock *BB = LPI->getParent(); - while (auto *Phi = dyn_cast(BB->begin())) - DemotePHIToStack(Phi, AllocaInsertPt); - } - - DEBUG(dbgs() << "Demoted " << InstrsToDemote.size() << " instructions and " - << ArgsToDemote.size() << " arguments for WinEHPrepare\n\n"); -} - -bool WinEHPrepare::prepareExceptionHandlers( - Function &F, SmallVectorImpl &LPads) { - // Don't run on functions that are already prepared. - for (LandingPadInst *LPad : LPads) { - BasicBlock *LPadBB = LPad->getParent(); - for (Instruction &Inst : *LPadBB) - if (match(&Inst, m_Intrinsic())) - return false; - } - - identifyEHBlocks(F, LPads); - demoteValuesLiveAcrossHandlers(F, LPads); - - // These containers are used to re-map frame variables that are used in - // outlined catch and cleanup handlers. They will be populated as the - // handlers are outlined. - FrameVarInfoMap FrameVarInfo; - - bool HandlersOutlined = false; - - Module *M = F.getParent(); - LLVMContext &Context = M->getContext(); - - // Create a new function to receive the handler contents. - PointerType *Int8PtrType = Type::getInt8PtrTy(Context); - Type *Int32Type = Type::getInt32Ty(Context); - Function *ActionIntrin = Intrinsic::getDeclaration(M, Intrinsic::eh_actions); - - if (isAsynchronousEHPersonality(Personality)) { - // FIXME: Switch the ehptr type to i32 and then switch this. - SEHExceptionCodeSlot = - new AllocaInst(Int8PtrType, nullptr, "seh_exception_code", - F.getEntryBlock().getFirstInsertionPt()); - } - - // In order to handle the case where one outlined catch handler returns - // to a block within another outlined catch handler that would otherwise - // be unreachable, we need to outline the nested landing pad before we - // outline the landing pad which encloses it. - if (!isAsynchronousEHPersonality(Personality)) - std::sort(LPads.begin(), LPads.end(), - [this](LandingPadInst *const &L, LandingPadInst *const &R) { - return DT->properlyDominates(R->getParent(), L->getParent()); - }); - - // This container stores the llvm.eh.recover and IndirectBr instructions - // that make up the body of each landing pad after it has been outlined. - // We need to defer the population of the target list for the indirectbr - // until all landing pads have been outlined so that we can handle the - // case of blocks in the target that are reached only from nested - // landing pads. - SmallVector, 4> LPadImpls; - - for (LandingPadInst *LPad : LPads) { - // Look for evidence that this landingpad has already been processed. - bool LPadHasActionList = false; - BasicBlock *LPadBB = LPad->getParent(); - for (Instruction &Inst : *LPadBB) { - if (match(&Inst, m_Intrinsic())) { - LPadHasActionList = true; - break; - } - } - - // If we've already outlined the handlers for this landingpad, - // there's nothing more to do here. - if (LPadHasActionList) - continue; - - // If either of the values in the aggregate returned by the landing pad is - // extracted and stored to memory, promote the stored value to a register. - promoteLandingPadValues(LPad); - - LandingPadActions Actions; - mapLandingPadBlocks(LPad, Actions); - - HandlersOutlined |= !Actions.actions().empty(); - for (ActionHandler *Action : Actions) { - if (Action->hasBeenProcessed()) - continue; - BasicBlock *StartBB = Action->getStartBlock(); - - // SEH doesn't do any outlining for catches. Instead, pass the handler - // basic block addr to llvm.eh.actions and list the block as a return - // target. - if (isAsynchronousEHPersonality(Personality)) { - if (auto *CatchAction = dyn_cast(Action)) { - processSEHCatchHandler(CatchAction, StartBB); - continue; - } - } - - outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo); - } - - // Split the block after the landingpad instruction so that it is just a - // call to llvm.eh.actions followed by indirectbr. - assert(!isa(LPadBB->begin()) && "lpad phi not removed"); - SplitBlock(LPadBB, LPad->getNextNode(), DT); - // Erase the branch inserted by the split so we can insert indirectbr. - LPadBB->getTerminator()->eraseFromParent(); - - // Replace all extracted values with undef and ultimately replace the - // landingpad with undef. - SmallVector SEHCodeUses; - SmallVector EHUndefs; - for (User *U : LPad->users()) { - auto *E = dyn_cast(U); - if (!E) - continue; - assert(E->getNumIndices() == 1 && - "Unexpected operation: extracting both landing pad values"); - unsigned Idx = *E->idx_begin(); - assert((Idx == 0 || Idx == 1) && "unexpected index"); - if (Idx == 0 && isAsynchronousEHPersonality(Personality)) - SEHCodeUses.push_back(E); - else - EHUndefs.push_back(E); - } - for (Instruction *E : EHUndefs) { - E->replaceAllUsesWith(UndefValue::get(E->getType())); - E->eraseFromParent(); - } - LPad->replaceAllUsesWith(UndefValue::get(LPad->getType())); - - // Rewrite uses of the exception pointer to loads of an alloca. - while (!SEHCodeUses.empty()) { - Instruction *E = SEHCodeUses.pop_back_val(); - SmallVector Uses; - for (Use &U : E->uses()) - Uses.push_back(&U); - for (Use *U : Uses) { - auto *I = cast(U->getUser()); - if (isa(I)) - continue; - if (auto *Phi = dyn_cast(I)) - SEHCodeUses.push_back(Phi); - else - U->set(new LoadInst(SEHExceptionCodeSlot, "sehcode", false, I)); - } - E->replaceAllUsesWith(UndefValue::get(E->getType())); - E->eraseFromParent(); - } - - // Add a call to describe the actions for this landing pad. - std::vector ActionArgs; - for (ActionHandler *Action : Actions) { - // Action codes from docs are: 0 cleanup, 1 catch. - if (auto *CatchAction = dyn_cast(Action)) { - ActionArgs.push_back(ConstantInt::get(Int32Type, 1)); - ActionArgs.push_back(CatchAction->getSelector()); - // Find the frame escape index of the exception object alloca in the - // parent. - int FrameEscapeIdx = -1; - Value *EHObj = const_cast(CatchAction->getExceptionVar()); - if (EHObj && !isa(EHObj)) { - auto I = FrameVarInfo.find(EHObj); - assert(I != FrameVarInfo.end() && - "failed to map llvm.eh.begincatch var"); - FrameEscapeIdx = std::distance(FrameVarInfo.begin(), I); - } - ActionArgs.push_back(ConstantInt::get(Int32Type, FrameEscapeIdx)); - } else { - ActionArgs.push_back(ConstantInt::get(Int32Type, 0)); - } - ActionArgs.push_back(Action->getHandlerBlockOrFunc()); - } - CallInst *Recover = - CallInst::Create(ActionIntrin, ActionArgs, "recover", LPadBB); - - SetVector ReturnTargets; - for (ActionHandler *Action : Actions) { - if (auto *CatchAction = dyn_cast(Action)) { - const auto &CatchTargets = CatchAction->getReturnTargets(); - ReturnTargets.insert(CatchTargets.begin(), CatchTargets.end()); - } - } - IndirectBrInst *Branch = - IndirectBrInst::Create(Recover, ReturnTargets.size(), LPadBB); - for (BasicBlock *Target : ReturnTargets) - Branch->addDestination(Target); - - if (!isAsynchronousEHPersonality(Personality)) { - // C++ EH must repopulate the targets later to handle the case of - // targets that are reached indirectly through nested landing pads. - LPadImpls.push_back(std::make_pair(Recover, Branch)); - } - - } // End for each landingpad - - // If nothing got outlined, there is no more processing to be done. - if (!HandlersOutlined) - return false; - - // Replace any nested landing pad stubs with the correct action handler. - // This must be done before we remove unreachable blocks because it - // cleans up references to outlined blocks that will be deleted. - for (auto &LPadPair : NestedLPtoOriginalLP) - completeNestedLandingPad(&F, LPadPair.first, LPadPair.second, FrameVarInfo); - NestedLPtoOriginalLP.clear(); - - // Update the indirectbr instructions' target lists if necessary. - SetVector CheckedTargets; - SmallVector, 4> ActionList; - for (auto &LPadImplPair : LPadImpls) { - IntrinsicInst *Recover = cast(LPadImplPair.first); - IndirectBrInst *Branch = LPadImplPair.second; - - // Get a list of handlers called by - parseEHActions(Recover, ActionList); - - // Add an indirect branch listing possible successors of the catch handlers. - SetVector ReturnTargets; - for (const auto &Action : ActionList) { - if (auto *CA = dyn_cast(Action.get())) { - Function *Handler = cast(CA->getHandlerBlockOrFunc()); - getPossibleReturnTargets(&F, Handler, ReturnTargets); - } - } - ActionList.clear(); - // Clear any targets we already knew about. - for (unsigned int I = 0, E = Branch->getNumDestinations(); I < E; ++I) { - BasicBlock *KnownTarget = Branch->getDestination(I); - if (ReturnTargets.count(KnownTarget)) - ReturnTargets.remove(KnownTarget); - } - for (BasicBlock *Target : ReturnTargets) { - Branch->addDestination(Target); - // The target may be a block that we excepted to get pruned. - // If it is, it may contain a call to llvm.eh.endcatch. - if (CheckedTargets.insert(Target)) { - // Earlier preparations guarantee that all calls to llvm.eh.endcatch - // will be followed by an unconditional branch. - auto *Br = dyn_cast(Target->getTerminator()); - if (Br && Br->isUnconditional() && - Br != Target->getFirstNonPHIOrDbgOrLifetime()) { - Instruction *Prev = Br->getPrevNode(); - if (match(cast(Prev), m_Intrinsic())) - Prev->eraseFromParent(); - } - } - } - } - LPadImpls.clear(); - - F.addFnAttr("wineh-parent", F.getName()); - - // Delete any blocks that were only used by handlers that were outlined above. - removeUnreachableBlocks(F); - - BasicBlock *Entry = &F.getEntryBlock(); - IRBuilder<> Builder(F.getParent()->getContext()); - Builder.SetInsertPoint(Entry->getFirstInsertionPt()); - - Function *FrameEscapeFn = - Intrinsic::getDeclaration(M, Intrinsic::localescape); - Function *RecoverFrameFn = - Intrinsic::getDeclaration(M, Intrinsic::localrecover); - SmallVector AllocasToEscape; - - // Scan the entry block for an existing call to llvm.localescape. We need to - // keep escaping those objects. - for (Instruction &I : F.front()) { - auto *II = dyn_cast(&I); - if (II && II->getIntrinsicID() == Intrinsic::localescape) { - auto Args = II->arg_operands(); - AllocasToEscape.append(Args.begin(), Args.end()); - II->eraseFromParent(); - break; - } - } - - // Finally, replace all of the temporary allocas for frame variables used in - // the outlined handlers with calls to llvm.localrecover. - for (auto &VarInfoEntry : FrameVarInfo) { - Value *ParentVal = VarInfoEntry.first; - TinyPtrVector &Allocas = VarInfoEntry.second; - AllocaInst *ParentAlloca = cast(ParentVal); - - // FIXME: We should try to sink unescaped allocas from the parent frame into - // the child frame. If the alloca is escaped, we have to use the lifetime - // markers to ensure that the alloca is only live within the child frame. - - // Add this alloca to the list of things to escape. - AllocasToEscape.push_back(ParentAlloca); - - // Next replace all outlined allocas that are mapped to it. - for (AllocaInst *TempAlloca : Allocas) { - if (TempAlloca == getCatchObjectSentinel()) - continue; // Skip catch parameter sentinels. - Function *HandlerFn = TempAlloca->getParent()->getParent(); - llvm::Value *FP = HandlerToParentFP[HandlerFn]; - assert(FP); - - // FIXME: Sink this localrecover into the blocks where it is used. - Builder.SetInsertPoint(TempAlloca); - Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc()); - Value *RecoverArgs[] = { - Builder.CreateBitCast(&F, Int8PtrType, ""), FP, - llvm::ConstantInt::get(Int32Type, AllocasToEscape.size() - 1)}; - Instruction *RecoveredAlloca = - Builder.CreateCall(RecoverFrameFn, RecoverArgs); - - // Add a pointer bitcast if the alloca wasn't an i8. - if (RecoveredAlloca->getType() != TempAlloca->getType()) { - RecoveredAlloca->setName(Twine(TempAlloca->getName()) + ".i8"); - RecoveredAlloca = cast( - Builder.CreateBitCast(RecoveredAlloca, TempAlloca->getType())); - } - TempAlloca->replaceAllUsesWith(RecoveredAlloca); - TempAlloca->removeFromParent(); - RecoveredAlloca->takeName(TempAlloca); - delete TempAlloca; - } - } // End for each FrameVarInfo entry. - - // Insert 'call void (...)* @llvm.localescape(...)' at the end of the entry - // block. - Builder.SetInsertPoint(&F.getEntryBlock().back()); - Builder.CreateCall(FrameEscapeFn, AllocasToEscape); - - if (SEHExceptionCodeSlot) { - if (isAllocaPromotable(SEHExceptionCodeSlot)) { - SmallPtrSet UserBlocks; - for (User *U : SEHExceptionCodeSlot->users()) { - if (auto *Inst = dyn_cast(U)) - UserBlocks.insert(Inst->getParent()); - } - PromoteMemToReg(SEHExceptionCodeSlot, *DT); - // After the promotion, kill off dead instructions. - for (BasicBlock *BB : UserBlocks) - SimplifyInstructionsInBlock(BB, LibInfo); - } - } - - // Clean up the handler action maps we created for this function - DeleteContainerSeconds(CatchHandlerMap); - CatchHandlerMap.clear(); - DeleteContainerSeconds(CleanupHandlerMap); - CleanupHandlerMap.clear(); - HandlerToParentFP.clear(); - DT = nullptr; - LibInfo = nullptr; - SEHExceptionCodeSlot = nullptr; - EHBlocks.clear(); - NormalBlocks.clear(); - EHReturnBlocks.clear(); - - return HandlersOutlined; -} - -void WinEHPrepare::promoteLandingPadValues(LandingPadInst *LPad) { - // If the return values of the landing pad instruction are extracted and - // stored to memory, we want to promote the store locations to reg values. - SmallVector EHAllocas; - - // The landingpad instruction returns an aggregate value. Typically, its - // value will be passed to a pair of extract value instructions and the - // results of those extracts are often passed to store instructions. - // In unoptimized code the stored value will often be loaded and then stored - // again. - for (auto *U : LPad->users()) { - ExtractValueInst *Extract = dyn_cast(U); - if (!Extract) - continue; - - for (auto *EU : Extract->users()) { - if (auto *Store = dyn_cast(EU)) { - auto *AV = cast(Store->getPointerOperand()); - EHAllocas.push_back(AV); - } - } - } - - // We can't do this without a dominator tree. - assert(DT); - - if (!EHAllocas.empty()) { - PromoteMemToReg(EHAllocas, *DT); - EHAllocas.clear(); - } - - // After promotion, some extracts may be trivially dead. Remove them. - SmallVector Users(LPad->user_begin(), LPad->user_end()); - for (auto *U : Users) - RecursivelyDeleteTriviallyDeadInstructions(U); -} - -void WinEHPrepare::getPossibleReturnTargets(Function *ParentF, - Function *HandlerF, - SetVector &Targets) { - for (BasicBlock &BB : *HandlerF) { - // If the handler contains landing pads, check for any - // handlers that may return directly to a block in the - // parent function. - if (auto *LPI = BB.getLandingPadInst()) { - IntrinsicInst *Recover = cast(LPI->getNextNode()); - SmallVector, 4> ActionList; - parseEHActions(Recover, ActionList); - for (const auto &Action : ActionList) { - if (auto *CH = dyn_cast(Action.get())) { - Function *NestedF = cast(CH->getHandlerBlockOrFunc()); - getPossibleReturnTargets(ParentF, NestedF, Targets); - } - } - } - - auto *Ret = dyn_cast(BB.getTerminator()); - if (!Ret) - continue; - - // Handler functions must always return a block address. - BlockAddress *BA = cast(Ret->getReturnValue()); - - // If this is the handler for a nested landing pad, the - // return address may have been remapped to a block in the - // parent handler. We're not interested in those. - if (BA->getFunction() != ParentF) - continue; - - Targets.insert(BA->getBasicBlock()); - } -} - -void WinEHPrepare::completeNestedLandingPad(Function *ParentFn, - LandingPadInst *OutlinedLPad, - const LandingPadInst *OriginalLPad, - FrameVarInfoMap &FrameVarInfo) { - // Get the nested block and erase the unreachable instruction that was - // temporarily inserted as its terminator. - LLVMContext &Context = ParentFn->getContext(); - BasicBlock *OutlinedBB = OutlinedLPad->getParent(); - // If the nested landing pad was outlined before the landing pad that enclosed - // it, it will already be in outlined form. In that case, we just need to see - // if the returns and the enclosing branch instruction need to be updated. - IndirectBrInst *Branch = - dyn_cast(OutlinedBB->getTerminator()); - if (!Branch) { - // If the landing pad wasn't in outlined form, it should be a stub with - // an unreachable terminator. - assert(isa(OutlinedBB->getTerminator())); - OutlinedBB->getTerminator()->eraseFromParent(); - // That should leave OutlinedLPad as the last instruction in its block. - assert(&OutlinedBB->back() == OutlinedLPad); - } - - // The original landing pad will have already had its action intrinsic - // built by the outlining loop. We need to clone that into the outlined - // location. It may also be necessary to add references to the exception - // variables to the outlined handler in which this landing pad is nested - // and remap return instructions in the nested handlers that should return - // to an address in the outlined handler. - Function *OutlinedHandlerFn = OutlinedBB->getParent(); - BasicBlock::const_iterator II = OriginalLPad; - ++II; - // The instruction after the landing pad should now be a call to eh.actions. - const Instruction *Recover = II; - const IntrinsicInst *EHActions = cast(Recover); - - // Remap the return target in the nested handler. - SmallVector ActionTargets; - SmallVector, 4> ActionList; - parseEHActions(EHActions, ActionList); - for (const auto &Action : ActionList) { - auto *Catch = dyn_cast(Action.get()); - if (!Catch) - continue; - // The dyn_cast to function here selects C++ catch handlers and skips - // SEH catch handlers. - auto *Handler = dyn_cast(Catch->getHandlerBlockOrFunc()); - if (!Handler) - continue; - // Visit all the return instructions, looking for places that return - // to a location within OutlinedHandlerFn. - for (BasicBlock &NestedHandlerBB : *Handler) { - auto *Ret = dyn_cast(NestedHandlerBB.getTerminator()); - if (!Ret) - continue; - - // Handler functions must always return a block address. - BlockAddress *BA = cast(Ret->getReturnValue()); - // The original target will have been in the main parent function, - // but if it is the address of a block that has been outlined, it - // should be a block that was outlined into OutlinedHandlerFn. - assert(BA->getFunction() == ParentFn); - - // Ignore targets that aren't part of an outlined handler function. - if (!LPadTargetBlocks.count(BA->getBasicBlock())) - continue; - - // If the return value is the address ofF a block that we - // previously outlined into the parent handler function, replace - // the return instruction and add the mapped target to the list - // of possible return addresses. - BasicBlock *MappedBB = LPadTargetBlocks[BA->getBasicBlock()]; - assert(MappedBB->getParent() == OutlinedHandlerFn); - BlockAddress *NewBA = BlockAddress::get(OutlinedHandlerFn, MappedBB); - Ret->eraseFromParent(); - ReturnInst::Create(Context, NewBA, &NestedHandlerBB); - ActionTargets.push_back(NewBA); - } - } - ActionList.clear(); - - if (Branch) { - // If the landing pad was already in outlined form, just update its targets. - for (unsigned int I = Branch->getNumDestinations(); I > 0; --I) - Branch->removeDestination(I); - // Add the previously collected action targets. - for (auto *Target : ActionTargets) - Branch->addDestination(Target->getBasicBlock()); - } else { - // If the landing pad was previously stubbed out, fill in its outlined form. - IntrinsicInst *NewEHActions = cast(EHActions->clone()); - OutlinedBB->getInstList().push_back(NewEHActions); - - // Insert an indirect branch into the outlined landing pad BB. - IndirectBrInst *IBr = IndirectBrInst::Create(NewEHActions, 0, OutlinedBB); - // Add the previously collected action targets. - for (auto *Target : ActionTargets) - IBr->addDestination(Target->getBasicBlock()); - } -} - -// This function examines a block to determine whether the block ends with a -// conditional branch to a catch handler based on a selector comparison. -// This function is used both by the WinEHPrepare::findSelectorComparison() and -// WinEHCleanupDirector::handleTypeIdFor(). -static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler, - Constant *&Selector, BasicBlock *&NextBB) { - ICmpInst::Predicate Pred; - BasicBlock *TBB, *FBB; - Value *LHS, *RHS; - - if (!match(BB->getTerminator(), - m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TBB, FBB))) - return false; - - if (!match(LHS, - m_Intrinsic(m_Constant(Selector))) && - !match(RHS, m_Intrinsic(m_Constant(Selector)))) - return false; - - if (Pred == CmpInst::ICMP_EQ) { - CatchHandler = TBB; - NextBB = FBB; - return true; - } - - if (Pred == CmpInst::ICMP_NE) { - CatchHandler = FBB; - NextBB = TBB; - return true; - } - - return false; -} - -static bool isCatchBlock(BasicBlock *BB) { - for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); - II != IE; ++II) { - if (match(cast(II), m_Intrinsic())) - return true; - } - return false; -} - -static BasicBlock *createStubLandingPad(Function *Handler) { - // FIXME: Finish this! - LLVMContext &Context = Handler->getContext(); - BasicBlock *StubBB = BasicBlock::Create(Context, "stub"); - Handler->getBasicBlockList().push_back(StubBB); - IRBuilder<> Builder(StubBB); - LandingPadInst *LPad = Builder.CreateLandingPad( - llvm::StructType::get(Type::getInt8PtrTy(Context), - Type::getInt32Ty(Context), nullptr), - 0); - // Insert a call to llvm.eh.actions so that we don't try to outline this lpad. - Function *ActionIntrin = - Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::eh_actions); - Builder.CreateCall(ActionIntrin, {}, "recover"); - LPad->setCleanup(true); - Builder.CreateUnreachable(); - return StubBB; -} - -// Cycles through the blocks in an outlined handler function looking for an -// invoke instruction and inserts an invoke of llvm.donothing with an empty -// landing pad if none is found. The code that generates the .xdata tables for -// the handler needs at least one landing pad to identify the parent function's -// personality. -void WinEHPrepare::addStubInvokeToHandlerIfNeeded(Function *Handler) { - ReturnInst *Ret = nullptr; - UnreachableInst *Unreached = nullptr; - for (BasicBlock &BB : *Handler) { - TerminatorInst *Terminator = BB.getTerminator(); - // If we find an invoke, there is nothing to be done. - auto *II = dyn_cast(Terminator); - if (II) - return; - // If we've already recorded a return instruction, keep looking for invokes. - if (!Ret) - Ret = dyn_cast(Terminator); - // If we haven't recorded an unreachable instruction, try this terminator. - if (!Unreached) - Unreached = dyn_cast(Terminator); - } - - // If we got this far, the handler contains no invokes. We should have seen - // at least one return or unreachable instruction. We'll insert an invoke of - // llvm.donothing ahead of that instruction. - assert(Ret || Unreached); - TerminatorInst *Term; - if (Ret) - Term = Ret; - else - Term = Unreached; - BasicBlock *OldRetBB = Term->getParent(); - BasicBlock *NewRetBB = SplitBlock(OldRetBB, Term, DT); - // SplitBlock adds an unconditional branch instruction at the end of the - // parent block. We want to replace that with an invoke call, so we can - // erase it now. - OldRetBB->getTerminator()->eraseFromParent(); - BasicBlock *StubLandingPad = createStubLandingPad(Handler); - Function *F = - Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::donothing); - InvokeInst::Create(F, NewRetBB, StubLandingPad, None, "", OldRetBB); -} - -// FIXME: Consider sinking this into lib/Target/X86 somehow. TargetLowering -// usually doesn't build LLVM IR, so that's probably the wrong place. -Function *WinEHPrepare::createHandlerFunc(Function *ParentFn, Type *RetTy, - const Twine &Name, Module *M, - Value *&ParentFP) { - // x64 uses a two-argument prototype where the parent FP is the second - // argument. x86 uses no arguments, just the incoming EBP value. - LLVMContext &Context = M->getContext(); - Type *Int8PtrType = Type::getInt8PtrTy(Context); - FunctionType *FnType; - if (TheTriple.getArch() == Triple::x86_64) { - Type *ArgTys[2] = {Int8PtrType, Int8PtrType}; - FnType = FunctionType::get(RetTy, ArgTys, false); - } else { - FnType = FunctionType::get(RetTy, None, false); - } - - Function *Handler = - Function::Create(FnType, GlobalVariable::InternalLinkage, Name, M); - BasicBlock *Entry = BasicBlock::Create(Context, "entry"); - Handler->getBasicBlockList().push_front(Entry); - if (TheTriple.getArch() == Triple::x86_64) { - ParentFP = &(Handler->getArgumentList().back()); - } else { - assert(M); - Function *FrameAddressFn = - Intrinsic::getDeclaration(M, Intrinsic::frameaddress); - Function *RecoverFPFn = - Intrinsic::getDeclaration(M, Intrinsic::x86_seh_recoverfp); - IRBuilder<> Builder(&Handler->getEntryBlock()); - Value *EBP = - Builder.CreateCall(FrameAddressFn, {Builder.getInt32(1)}, "ebp"); - Value *ParentI8Fn = Builder.CreateBitCast(ParentFn, Int8PtrType); - ParentFP = Builder.CreateCall(RecoverFPFn, {ParentI8Fn, EBP}); - } - return Handler; -} - -bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn, - LandingPadInst *LPad, BasicBlock *StartBB, - FrameVarInfoMap &VarInfo) { - Module *M = SrcFn->getParent(); - LLVMContext &Context = M->getContext(); - Type *Int8PtrType = Type::getInt8PtrTy(Context); - - // Create a new function to receive the handler contents. - Value *ParentFP; - Function *Handler; - if (Action->getType() == Catch) { - Handler = createHandlerFunc(SrcFn, Int8PtrType, SrcFn->getName() + ".catch", M, - ParentFP); - } else { - Handler = createHandlerFunc(SrcFn, Type::getVoidTy(Context), - SrcFn->getName() + ".cleanup", M, ParentFP); - } - Handler->setPersonalityFn(SrcFn->getPersonalityFn()); - HandlerToParentFP[Handler] = ParentFP; - Handler->addFnAttr("wineh-parent", SrcFn->getName()); - BasicBlock *Entry = &Handler->getEntryBlock(); - - // Generate a standard prolog to setup the frame recovery structure. - IRBuilder<> Builder(Context); - Builder.SetInsertPoint(Entry); - Builder.SetCurrentDebugLocation(LPad->getDebugLoc()); - - std::unique_ptr Director; - - ValueToValueMapTy VMap; - - LandingPadMap &LPadMap = LPadMaps[LPad]; - if (!LPadMap.isInitialized()) - LPadMap.mapLandingPad(LPad); - if (auto *CatchAction = dyn_cast(Action)) { - Constant *Sel = CatchAction->getSelector(); - Director.reset(new WinEHCatchDirector(Handler, ParentFP, Sel, VarInfo, - LPadMap, NestedLPtoOriginalLP, DT, - EHBlocks)); - LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType), - ConstantInt::get(Type::getInt32Ty(Context), 1)); - } else { - Director.reset( - new WinEHCleanupDirector(Handler, ParentFP, VarInfo, LPadMap)); - LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType), - UndefValue::get(Type::getInt32Ty(Context))); - } - - SmallVector Returns; - ClonedCodeInfo OutlinedFunctionInfo; - - // If the start block contains PHI nodes, we need to map them. - BasicBlock::iterator II = StartBB->begin(); - while (auto *PN = dyn_cast(II)) { - bool Mapped = false; - // Look for PHI values that we have already mapped (such as the selector). - for (Value *Val : PN->incoming_values()) { - if (VMap.count(Val)) { - VMap[PN] = VMap[Val]; - Mapped = true; - } - } - // If we didn't find a match for this value, map it as an undef. - if (!Mapped) { - VMap[PN] = UndefValue::get(PN->getType()); - } - ++II; - } - - // The landing pad value may be used by PHI nodes. It will ultimately be - // eliminated, but we need it in the map for intermediate handling. - VMap[LPad] = UndefValue::get(LPad->getType()); - - // Skip over PHIs and, if applicable, landingpad instructions. - II = StartBB->getFirstInsertionPt(); - - CloneAndPruneIntoFromInst(Handler, SrcFn, II, VMap, - /*ModuleLevelChanges=*/false, Returns, "", - &OutlinedFunctionInfo, Director.get()); - - // Move all the instructions in the cloned "entry" block into our entry block. - // Depending on how the parent function was laid out, the block that will - // correspond to the outlined entry block may not be the first block in the - // list. We can recognize it, however, as the cloned block which has no - // predecessors. Any other block wouldn't have been cloned if it didn't - // have a predecessor which was also cloned. - Function::iterator ClonedIt = std::next(Function::iterator(Entry)); - while (!pred_empty(ClonedIt)) - ++ClonedIt; - BasicBlock *ClonedEntryBB = ClonedIt; - assert(ClonedEntryBB); - Entry->getInstList().splice(Entry->end(), ClonedEntryBB->getInstList()); - ClonedEntryBB->eraseFromParent(); - - // Make sure we can identify the handler's personality later. - addStubInvokeToHandlerIfNeeded(Handler); - - if (auto *CatchAction = dyn_cast(Action)) { - WinEHCatchDirector *CatchDirector = - reinterpret_cast(Director.get()); - CatchAction->setExceptionVar(CatchDirector->getExceptionVar()); - CatchAction->setReturnTargets(CatchDirector->getReturnTargets()); - - // Look for blocks that are not part of the landing pad that we just - // outlined but terminate with a call to llvm.eh.endcatch and a - // branch to a block that is in the handler we just outlined. - // These blocks will be part of a nested landing pad that intends to - // return to an address in this handler. This case is best handled - // after both landing pads have been outlined, so for now we'll just - // save the association of the blocks in LPadTargetBlocks. The - // return instructions which are created from these branches will be - // replaced after all landing pads have been outlined. - for (const auto MapEntry : VMap) { - // VMap maps all values and blocks that were just cloned, but dead - // blocks which were pruned will map to nullptr. - if (!isa(MapEntry.first) || MapEntry.second == nullptr) - continue; - const BasicBlock *MappedBB = cast(MapEntry.first); - for (auto *Pred : predecessors(const_cast(MappedBB))) { - auto *Branch = dyn_cast(Pred->getTerminator()); - if (!Branch || !Branch->isUnconditional() || Pred->size() <= 1) - continue; - BasicBlock::iterator II = const_cast(Branch); - --II; - if (match(cast(II), m_Intrinsic())) { - // This would indicate that a nested landing pad wants to return - // to a block that is outlined into two different handlers. - assert(!LPadTargetBlocks.count(MappedBB)); - LPadTargetBlocks[MappedBB] = cast(MapEntry.second); - } - } - } - } // End if (CatchAction) - - Action->setHandlerBlockOrFunc(Handler); - - return true; -} - -/// This BB must end in a selector dispatch. All we need to do is pass the -/// handler block to llvm.eh.actions and list it as a possible indirectbr -/// target. -void WinEHPrepare::processSEHCatchHandler(CatchHandler *CatchAction, - BasicBlock *StartBB) { - BasicBlock *HandlerBB; - BasicBlock *NextBB; - Constant *Selector; - bool Res = isSelectorDispatch(StartBB, HandlerBB, Selector, NextBB); - if (Res) { - // If this was EH dispatch, this must be a conditional branch to the handler - // block. - // FIXME: Handle instructions in the dispatch block. Currently we drop them, - // leading to crashes if some optimization hoists stuff here. - assert(CatchAction->getSelector() && HandlerBB && - "expected catch EH dispatch"); - } else { - // This must be a catch-all. Split the block after the landingpad. - assert(CatchAction->getSelector()->isNullValue() && "expected catch-all"); - HandlerBB = SplitBlock(StartBB, StartBB->getFirstInsertionPt(), DT); - } - IRBuilder<> Builder(HandlerBB->getFirstInsertionPt()); - Function *EHCodeFn = Intrinsic::getDeclaration( - StartBB->getParent()->getParent(), Intrinsic::eh_exceptioncode); - Value *Code = Builder.CreateCall(EHCodeFn, {}, "sehcode"); - Code = Builder.CreateIntToPtr(Code, SEHExceptionCodeSlot->getAllocatedType()); - Builder.CreateStore(Code, SEHExceptionCodeSlot); - CatchAction->setHandlerBlockOrFunc(BlockAddress::get(HandlerBB)); - TinyPtrVector Targets(HandlerBB); - CatchAction->setReturnTargets(Targets); -} - -void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) { - // Each instance of this class should only ever be used to map a single - // landing pad. - assert(OriginLPad == nullptr || OriginLPad == LPad); - - // If the landing pad has already been mapped, there's nothing more to do. - if (OriginLPad == LPad) - return; - - OriginLPad = LPad; - - // The landingpad instruction returns an aggregate value. Typically, its - // value will be passed to a pair of extract value instructions and the - // results of those extracts will have been promoted to reg values before - // this routine is called. - for (auto *U : LPad->users()) { - const ExtractValueInst *Extract = dyn_cast(U); - if (!Extract) - continue; - assert(Extract->getNumIndices() == 1 && - "Unexpected operation: extracting both landing pad values"); - unsigned int Idx = *(Extract->idx_begin()); - assert((Idx == 0 || Idx == 1) && - "Unexpected operation: extracting an unknown landing pad element"); - if (Idx == 0) { - ExtractedEHPtrs.push_back(Extract); - } else if (Idx == 1) { - ExtractedSelectors.push_back(Extract); - } - } -} - -bool LandingPadMap::isOriginLandingPadBlock(const BasicBlock *BB) const { - return BB->getLandingPadInst() == OriginLPad; -} - -bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const { - if (Inst == OriginLPad) - return true; - for (auto *Extract : ExtractedEHPtrs) { - if (Inst == Extract) - return true; - } - for (auto *Extract : ExtractedSelectors) { - if (Inst == Extract) - return true; - } - return false; -} - -void LandingPadMap::remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue, - Value *SelectorValue) const { - // Remap all landing pad extract instructions to the specified values. - for (auto *Extract : ExtractedEHPtrs) - VMap[Extract] = EHPtrValue; - for (auto *Extract : ExtractedSelectors) - VMap[Extract] = SelectorValue; -} - -static bool isLocalAddressCall(const Value *V) { - return match(const_cast(V), m_Intrinsic()); -} - -CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction( - ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { - // If this is one of the boilerplate landing pad instructions, skip it. - // The instruction will have already been remapped in VMap. - if (LPadMap.isLandingPadSpecificInst(Inst)) - return CloningDirector::SkipInstruction; - - // Nested landing pads that have not already been outlined will be cloned as - // stubs, with just the landingpad instruction and an unreachable instruction. - // When all landingpads have been outlined, we'll replace this with the - // llvm.eh.actions call and indirect branch created when the landing pad was - // outlined. - if (auto *LPad = dyn_cast(Inst)) { - return handleLandingPad(VMap, LPad, NewBB); - } - - // Nested landing pads that have already been outlined will be cloned in their - // outlined form, but we need to intercept the ibr instruction to filter out - // targets that do not return to the handler we are outlining. - if (auto *IBr = dyn_cast(Inst)) { - return handleIndirectBr(VMap, IBr, NewBB); - } - - if (auto *Invoke = dyn_cast(Inst)) - return handleInvoke(VMap, Invoke, NewBB); - - if (auto *Resume = dyn_cast(Inst)) - return handleResume(VMap, Resume, NewBB); - - if (auto *Cmp = dyn_cast(Inst)) - return handleCompare(VMap, Cmp, NewBB); - - if (match(Inst, m_Intrinsic())) - return handleBeginCatch(VMap, Inst, NewBB); - if (match(Inst, m_Intrinsic())) - return handleEndCatch(VMap, Inst, NewBB); - if (match(Inst, m_Intrinsic())) - return handleTypeIdFor(VMap, Inst, NewBB); - - // When outlining llvm.localaddress(), remap that to the second argument, - // which is the FP of the parent. - if (isLocalAddressCall(Inst)) { - VMap[Inst] = ParentFP; - return CloningDirector::SkipInstruction; - } - - // Continue with the default cloning behavior. - return CloningDirector::CloneInstruction; -} - -CloningDirector::CloningAction WinEHCatchDirector::handleLandingPad( - ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) { - // If the instruction after the landing pad is a call to llvm.eh.actions - // the landing pad has already been outlined. In this case, we should - // clone it because it may return to a block in the handler we are - // outlining now that would otherwise be unreachable. The landing pads - // are sorted before outlining begins to enable this case to work - // properly. - const Instruction *NextI = LPad->getNextNode(); - if (match(NextI, m_Intrinsic())) - return CloningDirector::CloneInstruction; - - // If the landing pad hasn't been outlined yet, the landing pad we are - // outlining now does not dominate it and so it cannot return to a block - // in this handler. In that case, we can just insert a stub landing - // pad now and patch it up later. - Instruction *NewInst = LPad->clone(); - if (LPad->hasName()) - NewInst->setName(LPad->getName()); - // Save this correlation for later processing. - NestedLPtoOriginalLP[cast(NewInst)] = LPad; - VMap[LPad] = NewInst; - BasicBlock::InstListType &InstList = NewBB->getInstList(); - InstList.push_back(NewInst); - InstList.push_back(new UnreachableInst(NewBB->getContext())); - return CloningDirector::StopCloningBB; -} - -CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch( - ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { - // The argument to the call is some form of the first element of the - // landingpad aggregate value, but that doesn't matter. It isn't used - // here. - // The second argument is an outparameter where the exception object will be - // stored. Typically the exception object is a scalar, but it can be an - // aggregate when catching by value. - // FIXME: Leave something behind to indicate where the exception object lives - // for this handler. Should it be part of llvm.eh.actions? - assert(ExceptionObjectVar == nullptr && "Multiple calls to " - "llvm.eh.begincatch found while " - "outlining catch handler."); - ExceptionObjectVar = Inst->getOperand(1)->stripPointerCasts(); - if (isa(ExceptionObjectVar)) - return CloningDirector::SkipInstruction; - assert(cast(ExceptionObjectVar)->isStaticAlloca() && - "catch parameter is not static alloca"); - Materializer.escapeCatchObject(ExceptionObjectVar); - return CloningDirector::SkipInstruction; -} - -CloningDirector::CloningAction -WinEHCatchDirector::handleEndCatch(ValueToValueMapTy &VMap, - const Instruction *Inst, BasicBlock *NewBB) { - auto *IntrinCall = dyn_cast(Inst); - // It might be interesting to track whether or not we are inside a catch - // function, but that might make the algorithm more brittle than it needs - // to be. - - // The end catch call can occur in one of two places: either in a - // landingpad block that is part of the catch handlers exception mechanism, - // or at the end of the catch block. However, a catch-all handler may call - // end catch from the original landing pad. If the call occurs in a nested - // landing pad block, we must skip it and continue so that the landing pad - // gets cloned. - auto *ParentBB = IntrinCall->getParent(); - if (ParentBB->isLandingPad() && !LPadMap.isOriginLandingPadBlock(ParentBB)) - return CloningDirector::SkipInstruction; - - // If an end catch occurs anywhere else we want to terminate the handler - // with a return to the code that follows the endcatch call. If the - // next instruction is not an unconditional branch, we need to split the - // block to provide a clear target for the return instruction. - BasicBlock *ContinueBB; - auto Next = std::next(BasicBlock::const_iterator(IntrinCall)); - const BranchInst *Branch = dyn_cast(Next); - if (!Branch || !Branch->isUnconditional()) { - // We're interrupting the cloning process at this location, so the - // const_cast we're doing here will not cause a problem. - ContinueBB = SplitBlock(const_cast(ParentBB), - const_cast(cast(Next))); - } else { - ContinueBB = Branch->getSuccessor(0); - } - - ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueBB), NewBB); - ReturnTargets.push_back(ContinueBB); - - // We just added a terminator to the cloned block. - // Tell the caller to stop processing the current basic block so that - // the branch instruction will be skipped. - return CloningDirector::StopCloningBB; -} - -CloningDirector::CloningAction WinEHCatchDirector::handleTypeIdFor( - ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { - auto *IntrinCall = dyn_cast(Inst); - Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts(); - // This causes a replacement that will collapse the landing pad CFG based - // on the filter function we intend to match. - if (Selector == CurrentSelector) - VMap[Inst] = ConstantInt::get(SelectorIDType, 1); - else - VMap[Inst] = ConstantInt::get(SelectorIDType, 0); - // Tell the caller not to clone this instruction. - return CloningDirector::SkipInstruction; -} - -CloningDirector::CloningAction WinEHCatchDirector::handleIndirectBr( - ValueToValueMapTy &VMap, - const IndirectBrInst *IBr, - BasicBlock *NewBB) { - // If this indirect branch is not part of a landing pad block, just clone it. - const BasicBlock *ParentBB = IBr->getParent(); - if (!ParentBB->isLandingPad()) - return CloningDirector::CloneInstruction; - - // If it is part of a landing pad, we want to filter out target blocks - // that are not part of the handler we are outlining. - const LandingPadInst *LPad = ParentBB->getLandingPadInst(); - - // Save this correlation for later processing. - NestedLPtoOriginalLP[cast(VMap[LPad])] = LPad; - - // We should only get here for landing pads that have already been outlined. - assert(match(LPad->getNextNode(), m_Intrinsic())); - - // Copy the indirectbr, but only include targets that were previously - // identified as EH blocks and are dominated by the nested landing pad. - SetVector ReturnTargets; - for (int I = 0, E = IBr->getNumDestinations(); I < E; ++I) { - auto *TargetBB = IBr->getDestination(I); - if (EHBlocks.count(const_cast(TargetBB)) && - DT->dominates(ParentBB, TargetBB)) { - DEBUG(dbgs() << " Adding destination " << TargetBB->getName() << "\n"); - ReturnTargets.insert(TargetBB); - } - } - IndirectBrInst *NewBranch = - IndirectBrInst::Create(const_cast(IBr->getAddress()), - ReturnTargets.size(), NewBB); - for (auto *Target : ReturnTargets) - NewBranch->addDestination(const_cast(Target)); - - // The operands and targets of the branch instruction are remapped later - // because it is a terminator. Tell the cloning code to clone the - // blocks we just added to the target list. - return CloningDirector::CloneSuccessors; -} - -CloningDirector::CloningAction -WinEHCatchDirector::handleInvoke(ValueToValueMapTy &VMap, - const InvokeInst *Invoke, BasicBlock *NewBB) { - return CloningDirector::CloneInstruction; -} - -CloningDirector::CloningAction -WinEHCatchDirector::handleResume(ValueToValueMapTy &VMap, - const ResumeInst *Resume, BasicBlock *NewBB) { - // Resume instructions shouldn't be reachable from catch handlers. - // We still need to handle it, but it will be pruned. - BasicBlock::InstListType &InstList = NewBB->getInstList(); - InstList.push_back(new UnreachableInst(NewBB->getContext())); - return CloningDirector::StopCloningBB; -} - -CloningDirector::CloningAction -WinEHCatchDirector::handleCompare(ValueToValueMapTy &VMap, - const CmpInst *Compare, BasicBlock *NewBB) { - const IntrinsicInst *IntrinCall = nullptr; - if (match(Compare->getOperand(0), m_Intrinsic())) { - IntrinCall = dyn_cast(Compare->getOperand(0)); - } else if (match(Compare->getOperand(1), - m_Intrinsic())) { - IntrinCall = dyn_cast(Compare->getOperand(1)); - } - if (IntrinCall) { - Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts(); - // This causes a replacement that will collapse the landing pad CFG based - // on the filter function we intend to match. - if (Selector == CurrentSelector->stripPointerCasts()) { - VMap[Compare] = ConstantInt::get(SelectorIDType, 1); - } else { - VMap[Compare] = ConstantInt::get(SelectorIDType, 0); - } - return CloningDirector::SkipInstruction; - } - return CloningDirector::CloneInstruction; -} - -CloningDirector::CloningAction WinEHCleanupDirector::handleLandingPad( - ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) { - // The MS runtime will terminate the process if an exception occurs in a - // cleanup handler, so we shouldn't encounter landing pads in the actual - // cleanup code, but they may appear in catch blocks. Depending on where - // we started cloning we may see one, but it will get dropped during dead - // block pruning. - Instruction *NewInst = new UnreachableInst(NewBB->getContext()); - VMap[LPad] = NewInst; - BasicBlock::InstListType &InstList = NewBB->getInstList(); - InstList.push_back(NewInst); - return CloningDirector::StopCloningBB; -} - -CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch( - ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { - // Cleanup code may flow into catch blocks or the catch block may be part - // of a branch that will be optimized away. We'll insert a return - // instruction now, but it may be pruned before the cloning process is - // complete. - ReturnInst::Create(NewBB->getContext(), nullptr, NewBB); - return CloningDirector::StopCloningBB; -} - -CloningDirector::CloningAction WinEHCleanupDirector::handleEndCatch( - ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { - // Cleanup handlers nested within catch handlers may begin with a call to - // eh.endcatch. We can just ignore that instruction. - return CloningDirector::SkipInstruction; -} - -CloningDirector::CloningAction WinEHCleanupDirector::handleTypeIdFor( - ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) { - // If we encounter a selector comparison while cloning a cleanup handler, - // we want to stop cloning immediately. Anything after the dispatch - // will be outlined into a different handler. - BasicBlock *CatchHandler; - Constant *Selector; - BasicBlock *NextBB; - if (isSelectorDispatch(const_cast(Inst->getParent()), - CatchHandler, Selector, NextBB)) { - ReturnInst::Create(NewBB->getContext(), nullptr, NewBB); - return CloningDirector::StopCloningBB; - } - // If eg.typeid.for is called for any other reason, it can be ignored. - VMap[Inst] = ConstantInt::get(SelectorIDType, 0); - return CloningDirector::SkipInstruction; -} - -CloningDirector::CloningAction WinEHCleanupDirector::handleIndirectBr( - ValueToValueMapTy &VMap, - const IndirectBrInst *IBr, - BasicBlock *NewBB) { - // No special handling is required for cleanup cloning. - return CloningDirector::CloneInstruction; -} - -CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke( - ValueToValueMapTy &VMap, const InvokeInst *Invoke, BasicBlock *NewBB) { - // All invokes in cleanup handlers can be replaced with calls. - SmallVector CallArgs(Invoke->op_begin(), Invoke->op_end() - 3); - // Insert a normal call instruction... - CallInst *NewCall = - CallInst::Create(const_cast(Invoke->getCalledValue()), CallArgs, - Invoke->getName(), NewBB); - NewCall->setCallingConv(Invoke->getCallingConv()); - NewCall->setAttributes(Invoke->getAttributes()); - NewCall->setDebugLoc(Invoke->getDebugLoc()); - VMap[Invoke] = NewCall; - - // Remap the operands. - llvm::RemapInstruction(NewCall, VMap, RF_None, nullptr, &Materializer); - - // Insert an unconditional branch to the normal destination. - BranchInst::Create(Invoke->getNormalDest(), NewBB); - - // The unwind destination won't be cloned into the new function, so - // we don't need to clean up its phi nodes. - - // We just added a terminator to the cloned block. - // Tell the caller to stop processing the current basic block. - return CloningDirector::CloneSuccessors; -} - -CloningDirector::CloningAction WinEHCleanupDirector::handleResume( - ValueToValueMapTy &VMap, const ResumeInst *Resume, BasicBlock *NewBB) { - ReturnInst::Create(NewBB->getContext(), nullptr, NewBB); - - // We just added a terminator to the cloned block. - // Tell the caller to stop processing the current basic block so that - // the branch instruction will be skipped. - return CloningDirector::StopCloningBB; -} - -CloningDirector::CloningAction -WinEHCleanupDirector::handleCompare(ValueToValueMapTy &VMap, - const CmpInst *Compare, BasicBlock *NewBB) { - if (match(Compare->getOperand(0), m_Intrinsic()) || - match(Compare->getOperand(1), m_Intrinsic())) { - VMap[Compare] = ConstantInt::get(SelectorIDType, 1); - return CloningDirector::SkipInstruction; - } - return CloningDirector::CloneInstruction; -} - -WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer( - Function *OutlinedFn, Value *ParentFP, FrameVarInfoMap &FrameVarInfo) - : FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) { - BasicBlock *EntryBB = &OutlinedFn->getEntryBlock(); - - // New allocas should be inserted in the entry block, but after the parent FP - // is established if it is an instruction. - Instruction *InsertPoint = EntryBB->getFirstInsertionPt(); - if (auto *FPInst = dyn_cast(ParentFP)) - InsertPoint = FPInst->getNextNode(); - Builder.SetInsertPoint(EntryBB, InsertPoint); -} - -Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) { - // If we're asked to materialize a static alloca, we temporarily create an - // alloca in the outlined function and add this to the FrameVarInfo map. When - // all the outlining is complete, we'll replace these temporary allocas with - // calls to llvm.localrecover. - if (auto *AV = dyn_cast(V)) { - assert(AV->isStaticAlloca() && - "cannot materialize un-demoted dynamic alloca"); - AllocaInst *NewAlloca = dyn_cast(AV->clone()); - Builder.Insert(NewAlloca, AV->getName()); - FrameVarInfo[AV].push_back(NewAlloca); - return NewAlloca; - } - - if (isa(V) || isa(V)) { - Function *Parent = isa(V) - ? cast(V)->getParent()->getParent() - : cast(V)->getParent(); - errs() - << "Failed to demote instruction used in exception handler of function " - << GlobalValue::getRealLinkageName(Parent->getName()) << ":\n"; - errs() << " " << *V << '\n'; - report_fatal_error("WinEHPrepare failed to demote instruction"); - } - - // Don't materialize other values. - return nullptr; -} - -void WinEHFrameVariableMaterializer::escapeCatchObject(Value *V) { - // Catch parameter objects have to live in the parent frame. When we see a use - // of a catch parameter, add a sentinel to the multimap to indicate that it's - // used from another handler. This will prevent us from trying to sink the - // alloca into the handler and ensure that the catch parameter is present in - // the call to llvm.localescape. - FrameVarInfo[V].push_back(getCatchObjectSentinel()); -} - -// This function maps the catch and cleanup handlers that are reachable from the -// specified landing pad. The landing pad sequence will have this basic shape: -// -// -// -// -// -// -// -// -// ... -// -// Any of the cleanup slots may be absent. The cleanup slots may be occupied by -// any arbitrary control flow, but all paths through the cleanup code must -// eventually reach the next selector comparison and no path can skip to a -// different selector comparisons, though some paths may terminate abnormally. -// Therefore, we will use a depth first search from the start of any given -// cleanup block and stop searching when we find the next selector comparison. -// -// If the landingpad instruction does not have a catch clause, we will assume -// that any instructions other than selector comparisons and catch handlers can -// be ignored. In practice, these will only be the boilerplate instructions. -// -// The catch handlers may also have any control structure, but we are only -// interested in the start of the catch handlers, so we don't need to actually -// follow the flow of the catch handlers. The start of the catch handlers can -// be located from the compare instructions, but they can be skipped in the -// flow by following the contrary branch. -void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad, - LandingPadActions &Actions) { - unsigned int NumClauses = LPad->getNumClauses(); - unsigned int HandlersFound = 0; - BasicBlock *BB = LPad->getParent(); - - DEBUG(dbgs() << "Mapping landing pad: " << BB->getName() << "\n"); - - if (NumClauses == 0) { - findCleanupHandlers(Actions, BB, nullptr); - return; - } - - VisitedBlockSet VisitedBlocks; - - while (HandlersFound != NumClauses) { - BasicBlock *NextBB = nullptr; - - // Skip over filter clauses. - if (LPad->isFilter(HandlersFound)) { - ++HandlersFound; - continue; - } - - // See if the clause we're looking for is a catch-all. - // If so, the catch begins immediately. - Constant *ExpectedSelector = - LPad->getClause(HandlersFound)->stripPointerCasts(); - if (isa(ExpectedSelector)) { - // The catch all must occur last. - assert(HandlersFound == NumClauses - 1); - - // There can be additional selector dispatches in the call chain that we - // need to ignore. - BasicBlock *CatchBlock = nullptr; - Constant *Selector; - while (BB && isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) { - DEBUG(dbgs() << " Found extra catch dispatch in block " - << CatchBlock->getName() << "\n"); - BB = NextBB; - } - - // Add the catch handler to the action list. - CatchHandler *Action = nullptr; - if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) { - // If the CatchHandlerMap already has an entry for this BB, re-use it. - Action = CatchHandlerMap[BB]; - assert(Action->getSelector() == ExpectedSelector); - } else { - // We don't expect a selector dispatch, but there may be a call to - // llvm.eh.begincatch, which separates catch handling code from - // cleanup code in the same control flow. This call looks for the - // begincatch intrinsic. - Action = findCatchHandler(BB, NextBB, VisitedBlocks); - if (Action) { - // For C++ EH, check if there is any interesting cleanup code before - // we begin the catch. This is important because cleanups cannot - // rethrow exceptions but code called from catches can. For SEH, it - // isn't important if some finally code before a catch-all is executed - // out of line or after recovering from the exception. - if (Personality == EHPersonality::MSVC_CXX) - findCleanupHandlers(Actions, BB, BB); - } else { - // If an action was not found, it means that the control flows - // directly into the catch-all handler and there is no cleanup code. - // That's an expected situation and we must create a catch action. - // Since this is a catch-all handler, the selector won't actually - // appear in the code anywhere. ExpectedSelector here is the constant - // null ptr that we got from the landing pad instruction. - Action = new CatchHandler(BB, ExpectedSelector, nullptr); - CatchHandlerMap[BB] = Action; - } - } - Actions.insertCatchHandler(Action); - DEBUG(dbgs() << " Catch all handler at block " << BB->getName() << "\n"); - ++HandlersFound; - - // Once we reach a catch-all, don't expect to hit a resume instruction. - BB = nullptr; - break; - } - - CatchHandler *CatchAction = findCatchHandler(BB, NextBB, VisitedBlocks); - assert(CatchAction); - - // See if there is any interesting code executed before the dispatch. - findCleanupHandlers(Actions, BB, CatchAction->getStartBlock()); - - // When the source program contains multiple nested try blocks the catch - // handlers can get strung together in such a way that we can encounter - // a dispatch for a selector that we've already had a handler for. - if (CatchAction->getSelector()->stripPointerCasts() == ExpectedSelector) { - ++HandlersFound; - - // Add the catch handler to the action list. - DEBUG(dbgs() << " Found catch dispatch in block " - << CatchAction->getStartBlock()->getName() << "\n"); - Actions.insertCatchHandler(CatchAction); - } else { - // Under some circumstances optimized IR will flow unconditionally into a - // handler block without checking the selector. This can only happen if - // the landing pad has a catch-all handler and the handler for the - // preceeding catch clause is identical to the catch-call handler - // (typically an empty catch). In this case, the handler must be shared - // by all remaining clauses. - if (isa( - CatchAction->getSelector()->stripPointerCasts())) { - DEBUG(dbgs() << " Applying early catch-all handler in block " - << CatchAction->getStartBlock()->getName() - << " to all remaining clauses.\n"); - Actions.insertCatchHandler(CatchAction); - return; - } - - DEBUG(dbgs() << " Found extra catch dispatch in block " - << CatchAction->getStartBlock()->getName() << "\n"); - } - - // Move on to the block after the catch handler. - BB = NextBB; - } - - // If we didn't wind up in a catch-all, see if there is any interesting code - // executed before the resume. - findCleanupHandlers(Actions, BB, BB); - - // It's possible that some optimization moved code into a landingpad that - // wasn't - // previously being used for cleanup. If that happens, we need to execute - // that - // extra code from a cleanup handler. - if (Actions.includesCleanup() && !LPad->isCleanup()) - LPad->setCleanup(true); -} - -// This function searches starting with the input block for the next -// block that terminates with a branch whose condition is based on a selector -// comparison. This may be the input block. See the mapLandingPadBlocks -// comments for a discussion of control flow assumptions. -// -CatchHandler *WinEHPrepare::findCatchHandler(BasicBlock *BB, - BasicBlock *&NextBB, - VisitedBlockSet &VisitedBlocks) { - // See if we've already found a catch handler use it. - // Call count() first to avoid creating a null entry for blocks - // we haven't seen before. - if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) { - CatchHandler *Action = cast(CatchHandlerMap[BB]); - NextBB = Action->getNextBB(); - return Action; - } - - // VisitedBlocks applies only to the current search. We still - // need to consider blocks that we've visited while mapping other - // landing pads. - VisitedBlocks.insert(BB); - - BasicBlock *CatchBlock = nullptr; - Constant *Selector = nullptr; - - // If this is the first time we've visited this block from any landing pad - // look to see if it is a selector dispatch block. - if (!CatchHandlerMap.count(BB)) { - if (isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) { - CatchHandler *Action = new CatchHandler(BB, Selector, NextBB); - CatchHandlerMap[BB] = Action; - return Action; - } - // If we encounter a block containing an llvm.eh.begincatch before we - // find a selector dispatch block, the handler is assumed to be - // reached unconditionally. This happens for catch-all blocks, but - // it can also happen for other catch handlers that have been combined - // with the catch-all handler during optimization. - if (isCatchBlock(BB)) { - PointerType *Int8PtrTy = Type::getInt8PtrTy(BB->getContext()); - Constant *NullSelector = ConstantPointerNull::get(Int8PtrTy); - CatchHandler *Action = new CatchHandler(BB, NullSelector, nullptr); - CatchHandlerMap[BB] = Action; - return Action; - } - } - - // Visit each successor, looking for the dispatch. - // FIXME: We expect to find the dispatch quickly, so this will probably - // work better as a breadth first search. - for (BasicBlock *Succ : successors(BB)) { - if (VisitedBlocks.count(Succ)) - continue; - - CatchHandler *Action = findCatchHandler(Succ, NextBB, VisitedBlocks); - if (Action) - return Action; - } - return nullptr; -} - -// These are helper functions to combine repeated code from findCleanupHandlers. -static void createCleanupHandler(LandingPadActions &Actions, - CleanupHandlerMapTy &CleanupHandlerMap, - BasicBlock *BB) { - CleanupHandler *Action = new CleanupHandler(BB); - CleanupHandlerMap[BB] = Action; - Actions.insertCleanupHandler(Action); - DEBUG(dbgs() << " Found cleanup code in block " - << Action->getStartBlock()->getName() << "\n"); -} - -static CallSite matchOutlinedFinallyCall(BasicBlock *BB, - Instruction *MaybeCall) { - // Look for finally blocks that Clang has already outlined for us. - // %fp = call i8* @llvm.localaddress() - // call void @"fin$parent"(iN 1, i8* %fp) - if (isLocalAddressCall(MaybeCall) && MaybeCall != BB->getTerminator()) - MaybeCall = MaybeCall->getNextNode(); - CallSite FinallyCall(MaybeCall); - if (!FinallyCall || FinallyCall.arg_size() != 2) - return CallSite(); - if (!match(FinallyCall.getArgument(0), m_SpecificInt(1))) - return CallSite(); - if (!isLocalAddressCall(FinallyCall.getArgument(1))) - return CallSite(); - return FinallyCall; -} - -static BasicBlock *followSingleUnconditionalBranches(BasicBlock *BB) { - // Skip single ubr blocks. - while (BB->getFirstNonPHIOrDbg() == BB->getTerminator()) { - auto *Br = dyn_cast(BB->getTerminator()); - if (Br && Br->isUnconditional()) - BB = Br->getSuccessor(0); - else - return BB; - } - return BB; -} - -// This function searches starting with the input block for the next block that -// contains code that is not part of a catch handler and would not be eliminated -// during handler outlining. -// -void WinEHPrepare::findCleanupHandlers(LandingPadActions &Actions, - BasicBlock *StartBB, BasicBlock *EndBB) { - // Here we will skip over the following: - // - // landing pad prolog: - // - // Unconditional branches - // - // Selector dispatch - // - // Resume pattern - // - // Anything else marks the start of an interesting block - - BasicBlock *BB = StartBB; - // Anything other than an unconditional branch will kick us out of this loop - // one way or another. - while (BB) { - BB = followSingleUnconditionalBranches(BB); - // If we've already scanned this block, don't scan it again. If it is - // a cleanup block, there will be an action in the CleanupHandlerMap. - // If we've scanned it and it is not a cleanup block, there will be a - // nullptr in the CleanupHandlerMap. If we have not scanned it, there will - // be no entry in the CleanupHandlerMap. We must call count() first to - // avoid creating a null entry for blocks we haven't scanned. - if (CleanupHandlerMap.count(BB)) { - if (auto *Action = CleanupHandlerMap[BB]) { - Actions.insertCleanupHandler(Action); - DEBUG(dbgs() << " Found cleanup code in block " - << Action->getStartBlock()->getName() << "\n"); - // FIXME: This cleanup might chain into another, and we need to discover - // that. - return; - } else { - // Here we handle the case where the cleanup handler map contains a - // value for this block but the value is a nullptr. This means that - // we have previously analyzed the block and determined that it did - // not contain any cleanup code. Based on the earlier analysis, we - // know the block must end in either an unconditional branch, a - // resume or a conditional branch that is predicated on a comparison - // with a selector. Either the resume or the selector dispatch - // would terminate the search for cleanup code, so the unconditional - // branch is the only case for which we might need to continue - // searching. - BasicBlock *SuccBB = followSingleUnconditionalBranches(BB); - if (SuccBB == BB || SuccBB == EndBB) - return; - BB = SuccBB; - continue; - } - } - - // Create an entry in the cleanup handler map for this block. Initially - // we create an entry that says this isn't a cleanup block. If we find - // cleanup code, the caller will replace this entry. - CleanupHandlerMap[BB] = nullptr; - - TerminatorInst *Terminator = BB->getTerminator(); - - // Landing pad blocks have extra instructions we need to accept. - LandingPadMap *LPadMap = nullptr; - if (BB->isLandingPad()) { - LandingPadInst *LPad = BB->getLandingPadInst(); - LPadMap = &LPadMaps[LPad]; - if (!LPadMap->isInitialized()) - LPadMap->mapLandingPad(LPad); - } - - // Look for the bare resume pattern: - // %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn, 0 - // %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel, 1 - // resume { i8*, i32 } %lpad.val2 - if (auto *Resume = dyn_cast(Terminator)) { - InsertValueInst *Insert1 = nullptr; - InsertValueInst *Insert2 = nullptr; - Value *ResumeVal = Resume->getOperand(0); - // If the resume value isn't a phi or landingpad value, it should be a - // series of insertions. Identify them so we can avoid them when scanning - // for cleanups. - if (!isa(ResumeVal) && !isa(ResumeVal)) { - Insert2 = dyn_cast(ResumeVal); - if (!Insert2) - return createCleanupHandler(Actions, CleanupHandlerMap, BB); - Insert1 = dyn_cast(Insert2->getAggregateOperand()); - if (!Insert1) - return createCleanupHandler(Actions, CleanupHandlerMap, BB); - } - for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); - II != IE; ++II) { - Instruction *Inst = II; - if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) - continue; - if (Inst == Insert1 || Inst == Insert2 || Inst == Resume) - continue; - if (!Inst->hasOneUse() || - (Inst->user_back() != Insert1 && Inst->user_back() != Insert2)) { - return createCleanupHandler(Actions, CleanupHandlerMap, BB); - } - } - return; - } - - BranchInst *Branch = dyn_cast(Terminator); - if (Branch && Branch->isConditional()) { - // Look for the selector dispatch. - // %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*)) - // %matches = icmp eq i32 %sel, %2 - // br i1 %matches, label %catch14, label %eh.resume - CmpInst *Compare = dyn_cast(Branch->getCondition()); - if (!Compare || !Compare->isEquality()) - return createCleanupHandler(Actions, CleanupHandlerMap, BB); - for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); - II != IE; ++II) { - Instruction *Inst = II; - if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) - continue; - if (Inst == Compare || Inst == Branch) - continue; - if (match(Inst, m_Intrinsic())) - continue; - return createCleanupHandler(Actions, CleanupHandlerMap, BB); - } - // The selector dispatch block should always terminate our search. - assert(BB == EndBB); - return; - } - - if (isAsynchronousEHPersonality(Personality)) { - // If this is a landingpad block, split the block at the first non-landing - // pad instruction. - Instruction *MaybeCall = BB->getFirstNonPHIOrDbg(); - if (LPadMap) { - while (MaybeCall != BB->getTerminator() && - LPadMap->isLandingPadSpecificInst(MaybeCall)) - MaybeCall = MaybeCall->getNextNode(); - } - - // Look for outlined finally calls on x64, since those happen to match the - // prototype provided by the runtime. - if (TheTriple.getArch() == Triple::x86_64) { - if (CallSite FinallyCall = matchOutlinedFinallyCall(BB, MaybeCall)) { - Function *Fin = FinallyCall.getCalledFunction(); - assert(Fin && "outlined finally call should be direct"); - auto *Action = new CleanupHandler(BB); - Action->setHandlerBlockOrFunc(Fin); - Actions.insertCleanupHandler(Action); - CleanupHandlerMap[BB] = Action; - DEBUG(dbgs() << " Found frontend-outlined finally call to " - << Fin->getName() << " in block " - << Action->getStartBlock()->getName() << "\n"); - - // Split the block if there were more interesting instructions and - // look for finally calls in the normal successor block. - BasicBlock *SuccBB = BB; - if (FinallyCall.getInstruction() != BB->getTerminator() && - FinallyCall.getInstruction()->getNextNode() != - BB->getTerminator()) { - SuccBB = - SplitBlock(BB, FinallyCall.getInstruction()->getNextNode(), DT); - } else { - if (FinallyCall.isInvoke()) { - SuccBB = cast(FinallyCall.getInstruction()) - ->getNormalDest(); - } else { - SuccBB = BB->getUniqueSuccessor(); - assert(SuccBB && - "splitOutlinedFinallyCalls didn't insert a branch"); - } - } - BB = SuccBB; - if (BB == EndBB) - return; - continue; - } - } - } - - // Anything else is either a catch block or interesting cleanup code. - for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); - II != IE; ++II) { - Instruction *Inst = II; - if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst)) - continue; - // Unconditional branches fall through to this loop. - if (Inst == Branch) - continue; - // If this is a catch block, there is no cleanup code to be found. - if (match(Inst, m_Intrinsic())) - return; - // If this a nested landing pad, it may contain an endcatch call. - if (match(Inst, m_Intrinsic())) - return; - // Anything else makes this interesting cleanup code. - return createCleanupHandler(Actions, CleanupHandlerMap, BB); - } - - // Only unconditional branches in empty blocks should get this far. - assert(Branch && Branch->isUnconditional()); - if (BB == EndBB) - return; - BB = Branch->getSuccessor(0); - } -} - -// This is a public function, declared in WinEHFuncInfo.h and is also -// referenced by WinEHNumbering in FunctionLoweringInfo.cpp. -void llvm::parseEHActions( - const IntrinsicInst *II, - SmallVectorImpl> &Actions) { - assert(II->getIntrinsicID() == Intrinsic::eh_actions && - "attempted to parse non eh.actions intrinsic"); - for (unsigned I = 0, E = II->getNumArgOperands(); I != E;) { - uint64_t ActionKind = - cast(II->getArgOperand(I))->getZExtValue(); - if (ActionKind == /*catch=*/1) { - auto *Selector = cast(II->getArgOperand(I + 1)); - ConstantInt *EHObjIndex = cast(II->getArgOperand(I + 2)); - int64_t EHObjIndexVal = EHObjIndex->getSExtValue(); - Constant *Handler = cast(II->getArgOperand(I + 3)); - I += 4; - auto CH = make_unique(/*BB=*/nullptr, Selector, - /*NextBB=*/nullptr); - CH->setHandlerBlockOrFunc(Handler); - CH->setExceptionVarIndex(EHObjIndexVal); - Actions.push_back(std::move(CH)); - } else if (ActionKind == 0) { - Constant *Handler = cast(II->getArgOperand(I + 1)); - I += 2; - auto CH = make_unique(/*BB=*/nullptr); - CH->setHandlerBlockOrFunc(Handler); - Actions.push_back(std::move(CH)); - } else { - llvm_unreachable("Expected either a catch or cleanup handler!"); - } - } - std::reverse(Actions.begin(), Actions.end()); -} - -namespace { -struct WinEHNumbering { - WinEHNumbering(WinEHFuncInfo &FuncInfo) : FuncInfo(FuncInfo), - CurrentBaseState(-1), NextState(0) {} - - WinEHFuncInfo &FuncInfo; - int CurrentBaseState; - int NextState; - - SmallVector, 4> HandlerStack; - SmallPtrSet VisitedHandlers; - - int currentEHNumber() const { - return HandlerStack.empty() ? CurrentBaseState : HandlerStack.back()->getEHState(); - } - - void createUnwindMapEntry(int ToState, ActionHandler *AH); - void createTryBlockMapEntry(int TryLow, int TryHigh, - ArrayRef Handlers); - void processCallSite(MutableArrayRef> Actions, - ImmutableCallSite CS); - void popUnmatchedActions(int FirstMismatch); - void calculateStateNumbers(const Function &F); - void findActionRootLPads(const Function &F); -}; -} - -void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) { - WinEHUnwindMapEntry UME; +static int addUnwindMapEntry(WinEHFuncInfo &FuncInfo, int ToState, + const BasicBlock *BB) { + CxxUnwindMapEntry UME; UME.ToState = ToState; - if (auto *CH = dyn_cast_or_null(AH)) - UME.Cleanup = cast(CH->getHandlerBlockOrFunc()); - else - UME.Cleanup = nullptr; - FuncInfo.UnwindMap.push_back(UME); + UME.Cleanup = BB; + FuncInfo.CxxUnwindMap.push_back(UME); + return FuncInfo.getLastStateNumber(); } -void WinEHNumbering::createTryBlockMapEntry(int TryLow, int TryHigh, - ArrayRef Handlers) { - // See if we already have an entry for this set of handlers. - // This is using iterators rather than a range-based for loop because - // if we find the entry we're looking for we'll need the iterator to erase it. - int NumHandlers = Handlers.size(); - auto I = FuncInfo.TryBlockMap.begin(); - auto E = FuncInfo.TryBlockMap.end(); - for ( ; I != E; ++I) { - auto &Entry = *I; - if (Entry.HandlerArray.size() != (size_t)NumHandlers) - continue; - int N; - for (N = 0; N < NumHandlers; ++N) { - if (Entry.HandlerArray[N].Handler != Handlers[N]->getHandlerBlockOrFunc()) - break; // breaks out of inner loop - } - // If all the handlers match, this is what we were looking for. - if (N == NumHandlers) { - break; - } - } - - // If we found an existing entry for this set of handlers, extend the range - // but move the entry to the end of the map vector. The order of entries - // in the map is critical to the way that the runtime finds handlers. - // FIXME: Depending on what has happened with block ordering, this may - // incorrectly combine entries that should remain separate. - if (I != E) { - // Copy the existing entry. - WinEHTryBlockMapEntry Entry = *I; - Entry.TryLow = std::min(TryLow, Entry.TryLow); - Entry.TryHigh = std::max(TryHigh, Entry.TryHigh); - assert(Entry.TryLow <= Entry.TryHigh); - // Erase the old entry and add this one to the back. - FuncInfo.TryBlockMap.erase(I); - FuncInfo.TryBlockMap.push_back(Entry); - return; - } - - // If we didn't find an entry, create a new one. +static void addTryBlockMapEntry(WinEHFuncInfo &FuncInfo, int TryLow, + int TryHigh, int CatchHigh, + ArrayRef Handlers) { WinEHTryBlockMapEntry TBME; TBME.TryLow = TryLow; TBME.TryHigh = TryHigh; + TBME.CatchHigh = CatchHigh; assert(TBME.TryLow <= TBME.TryHigh); - for (CatchHandler *CH : Handlers) { + for (const CatchPadInst *CPI : Handlers) { WinEHHandlerType HT; - if (CH->getSelector()->isNullValue()) { - HT.Adjectives = 0x40; + Constant *TypeInfo = cast(CPI->getArgOperand(0)); + if (TypeInfo->isNullValue()) HT.TypeDescriptor = nullptr; - } else { - auto *GV = cast(CH->getSelector()->stripPointerCasts()); - // Selectors are always pointers to GlobalVariables with 'struct' type. - // The struct has two fields, adjectives and a type descriptor. - auto *CS = cast(GV->getInitializer()); - HT.Adjectives = - cast(CS->getAggregateElement(0U))->getZExtValue(); - HT.TypeDescriptor = - cast(CS->getAggregateElement(1)->stripPointerCasts()); - } - HT.Handler = cast(CH->getHandlerBlockOrFunc()); - HT.CatchObjRecoverIdx = CH->getExceptionVarIndex(); + else + HT.TypeDescriptor = cast(TypeInfo->stripPointerCasts()); + HT.Adjectives = cast(CPI->getArgOperand(1))->getZExtValue(); + HT.Handler = CPI->getParent(); + if (isa(CPI->getArgOperand(2))) + HT.CatchObj.Alloca = nullptr; + else + HT.CatchObj.Alloca = cast(CPI->getArgOperand(2)); TBME.HandlerArray.push_back(HT); } FuncInfo.TryBlockMap.push_back(TBME); } -static void print_name(const Value *V) { -#ifndef NDEBUG - if (!V) { - DEBUG(dbgs() << "null"); - return; - } - - if (const auto *F = dyn_cast(V)) - DEBUG(dbgs() << F->getName()); - else - DEBUG(V->dump()); -#endif +static BasicBlock *getCleanupRetUnwindDest(const CleanupPadInst *CleanupPad) { + for (const User *U : CleanupPad->users()) + if (const auto *CRI = dyn_cast(U)) + return CRI->getUnwindDest(); + return nullptr; } -void WinEHNumbering::processCallSite( - MutableArrayRef> Actions, - ImmutableCallSite CS) { - DEBUG(dbgs() << "processCallSite (EH state = " << currentEHNumber() - << ") for: "); - print_name(CS ? CS.getCalledValue() : nullptr); - DEBUG(dbgs() << '\n'); - - DEBUG(dbgs() << "HandlerStack: \n"); - for (int I = 0, E = HandlerStack.size(); I < E; ++I) { - DEBUG(dbgs() << " "); - print_name(HandlerStack[I]->getHandlerBlockOrFunc()); - DEBUG(dbgs() << '\n'); - } - DEBUG(dbgs() << "Actions: \n"); - for (int I = 0, E = Actions.size(); I < E; ++I) { - DEBUG(dbgs() << " "); - print_name(Actions[I]->getHandlerBlockOrFunc()); - DEBUG(dbgs() << '\n'); - } - int FirstMismatch = 0; - for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E; - ++FirstMismatch) { - if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() != - Actions[FirstMismatch]->getHandlerBlockOrFunc()) - break; - } - - // Remove unmatched actions from the stack and process their EH states. - popUnmatchedActions(FirstMismatch); - - DEBUG(dbgs() << "Pushing actions for CallSite: "); - print_name(CS ? CS.getCalledValue() : nullptr); - DEBUG(dbgs() << '\n'); - - bool LastActionWasCatch = false; - const LandingPadInst *LastRootLPad = nullptr; - for (size_t I = FirstMismatch; I != Actions.size(); ++I) { - // We can reuse eh states when pushing two catches for the same invoke. - bool CurrActionIsCatch = isa(Actions[I].get()); - auto *Handler = cast(Actions[I]->getHandlerBlockOrFunc()); - // Various conditions can lead to a handler being popped from the - // stack and re-pushed later. That shouldn't create a new state. - // FIXME: Can code optimization lead to re-used handlers? - if (FuncInfo.HandlerEnclosedState.count(Handler)) { - // If we already assigned the state enclosed by this handler re-use it. - Actions[I]->setEHState(FuncInfo.HandlerEnclosedState[Handler]); +static void calculateStateNumbersForInvokes(const Function *Fn, + WinEHFuncInfo &FuncInfo) { + auto *F = const_cast(Fn); + DenseMap BlockColors = colorEHFunclets(*F); + for (BasicBlock &BB : *F) { + auto *II = dyn_cast(BB.getTerminator()); + if (!II) continue; + + auto &BBColors = BlockColors[&BB]; + assert(BBColors.size() == 1 && "multi-color BB not removed by preparation"); + BasicBlock *FuncletEntryBB = BBColors.front(); + + BasicBlock *FuncletUnwindDest; + auto *FuncletPad = + dyn_cast(FuncletEntryBB->getFirstNonPHI()); + assert(FuncletPad || FuncletEntryBB == &Fn->getEntryBlock()); + if (!FuncletPad) + FuncletUnwindDest = nullptr; + else if (auto *CatchPad = dyn_cast(FuncletPad)) + FuncletUnwindDest = CatchPad->getCatchSwitch()->getUnwindDest(); + else if (auto *CleanupPad = dyn_cast(FuncletPad)) + FuncletUnwindDest = getCleanupRetUnwindDest(CleanupPad); + else + llvm_unreachable("unexpected funclet pad!"); + + BasicBlock *InvokeUnwindDest = II->getUnwindDest(); + int BaseState = -1; + if (FuncletUnwindDest == InvokeUnwindDest) { + auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad); + if (BaseStateI != FuncInfo.FuncletBaseStateMap.end()) + BaseState = BaseStateI->second; } - const LandingPadInst* RootLPad = FuncInfo.RootLPad[Handler]; - if (CurrActionIsCatch && LastActionWasCatch && RootLPad == LastRootLPad) { - DEBUG(dbgs() << "setEHState for handler to " << currentEHNumber() << "\n"); - Actions[I]->setEHState(currentEHNumber()); + + if (BaseState != -1) { + FuncInfo.InvokeStateMap[II] = BaseState; } else { - DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() << ", "); - print_name(Actions[I]->getHandlerBlockOrFunc()); - DEBUG(dbgs() << ") with EH state " << NextState << "\n"); - createUnwindMapEntry(currentEHNumber(), Actions[I].get()); - DEBUG(dbgs() << "setEHState for handler to " << NextState << "\n"); - Actions[I]->setEHState(NextState); - NextState++; + Instruction *PadInst = InvokeUnwindDest->getFirstNonPHI(); + assert(FuncInfo.EHPadStateMap.count(PadInst) && "EH Pad has no state!"); + FuncInfo.InvokeStateMap[II] = FuncInfo.EHPadStateMap[PadInst]; } - HandlerStack.push_back(std::move(Actions[I])); - LastActionWasCatch = CurrActionIsCatch; - LastRootLPad = RootLPad; - } - - // This is used to defer numbering states for a handler until after the - // last time it appears in an invoke action list. - if (CS.isInvoke()) { - for (int I = 0, E = HandlerStack.size(); I < E; ++I) { - auto *Handler = cast(HandlerStack[I]->getHandlerBlockOrFunc()); - if (FuncInfo.LastInvoke[Handler] != cast(CS.getInstruction())) - continue; - FuncInfo.LastInvokeVisited[Handler] = true; - DEBUG(dbgs() << "Last invoke of "); - print_name(Handler); - DEBUG(dbgs() << " has been visited.\n"); - } - } - - DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: "); - print_name(CS ? CS.getCalledValue() : nullptr); - DEBUG(dbgs() << '\n'); -} - -void WinEHNumbering::popUnmatchedActions(int FirstMismatch) { - // Don't recurse while we are looping over the handler stack. Instead, defer - // the numbering of the catch handlers until we are done popping. - SmallVector PoppedCatches; - for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) { - std::unique_ptr Handler = HandlerStack.pop_back_val(); - if (isa(Handler.get())) - PoppedCatches.push_back(cast(Handler.release())); - } - - int TryHigh = NextState - 1; - int LastTryLowIdx = 0; - for (int I = 0, E = PoppedCatches.size(); I != E; ++I) { - CatchHandler *CH = PoppedCatches[I]; - DEBUG(dbgs() << "Popped handler with state " << CH->getEHState() << "\n"); - if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) { - int TryLow = CH->getEHState(); - auto Handlers = - makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1); - DEBUG(dbgs() << "createTryBlockMapEntry(" << TryLow << ", " << TryHigh); - for (size_t J = 0; J < Handlers.size(); ++J) { - DEBUG(dbgs() << ", "); - print_name(Handlers[J]->getHandlerBlockOrFunc()); - } - DEBUG(dbgs() << ")\n"); - createTryBlockMapEntry(TryLow, TryHigh, Handlers); - LastTryLowIdx = I + 1; - } - } - - for (CatchHandler *CH : PoppedCatches) { - if (auto *F = dyn_cast(CH->getHandlerBlockOrFunc())) { - if (FuncInfo.LastInvokeVisited[F]) { - DEBUG(dbgs() << "Assigning base state " << NextState << " to "); - print_name(F); - DEBUG(dbgs() << '\n'); - FuncInfo.HandlerBaseState[F] = NextState; - DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() - << ", null)\n"); - createUnwindMapEntry(currentEHNumber(), nullptr); - ++NextState; - calculateStateNumbers(*F); - } - else { - DEBUG(dbgs() << "Deferring handling of "); - print_name(F); - DEBUG(dbgs() << " until last invoke visited.\n"); - } - } - delete CH; } } -void WinEHNumbering::calculateStateNumbers(const Function &F) { - auto I = VisitedHandlers.insert(&F); - if (!I.second) - return; // We've already visited this handler, don't renumber it. - - int OldBaseState = CurrentBaseState; - if (FuncInfo.HandlerBaseState.count(&F)) { - CurrentBaseState = FuncInfo.HandlerBaseState[&F]; +// Given BB which ends in an unwind edge, return the EHPad that this BB belongs +// to. If the unwind edge came from an invoke, return null. +static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB, + Value *ParentPad) { + const TerminatorInst *TI = BB->getTerminator(); + if (isa(TI)) + return nullptr; + if (auto *CatchSwitch = dyn_cast(TI)) { + if (CatchSwitch->getParentPad() != ParentPad) + return nullptr; + return BB; } - - size_t SavedHandlerStackSize = HandlerStack.size(); - - DEBUG(dbgs() << "Calculating state numbers for: " << F.getName() << '\n'); - SmallVector, 4> ActionList; - for (const BasicBlock &BB : F) { - for (const Instruction &I : BB) { - const auto *CI = dyn_cast(&I); - if (!CI || CI->doesNotThrow()) - continue; - processCallSite(None, CI); - } - const auto *II = dyn_cast(BB.getTerminator()); - if (!II) - continue; - const LandingPadInst *LPI = II->getLandingPadInst(); - auto *ActionsCall = dyn_cast(LPI->getNextNode()); - if (!ActionsCall) - continue; - parseEHActions(ActionsCall, ActionList); - if (ActionList.empty()) - continue; - processCallSite(ActionList, II); - ActionList.clear(); - FuncInfo.LandingPadStateMap[LPI] = currentEHNumber(); - DEBUG(dbgs() << "Assigning state " << currentEHNumber() - << " to landing pad at " << LPI->getParent()->getName() - << '\n'); - } - - // Pop any actions that were pushed on the stack for this function. - popUnmatchedActions(SavedHandlerStackSize); - - DEBUG(dbgs() << "Assigning max state " << NextState - 1 - << " to " << F.getName() << '\n'); - FuncInfo.CatchHandlerMaxState[&F] = NextState - 1; - - CurrentBaseState = OldBaseState; + assert(!TI->isEHPad() && "unexpected EHPad!"); + auto *CleanupPad = cast(TI)->getCleanupPad(); + if (CleanupPad->getParentPad() != ParentPad) + return nullptr; + return CleanupPad->getParent(); } -// This function follows the same basic traversal as calculateStateNumbers -// but it is necessary to identify the root landing pad associated -// with each action before we start assigning state numbers. -void WinEHNumbering::findActionRootLPads(const Function &F) { - auto I = VisitedHandlers.insert(&F); - if (!I.second) - return; // We've already visited this handler, don't revisit it. +static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo, + const Instruction *FirstNonPHI, + int ParentState) { + const BasicBlock *BB = FirstNonPHI->getParent(); + assert(BB->isEHPad() && "not a funclet!"); - SmallVector, 4> ActionList; - for (const BasicBlock &BB : F) { - const auto *II = dyn_cast(BB.getTerminator()); - if (!II) - continue; - const LandingPadInst *LPI = II->getLandingPadInst(); - auto *ActionsCall = dyn_cast(LPI->getNextNode()); - if (!ActionsCall) - continue; + if (auto *CatchSwitch = dyn_cast(FirstNonPHI)) { + assert(FuncInfo.EHPadStateMap.count(CatchSwitch) == 0 && + "shouldn't revist catch funclets!"); - assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions); - parseEHActions(ActionsCall, ActionList); - if (ActionList.empty()) - continue; - for (int I = 0, E = ActionList.size(); I < E; ++I) { - if (auto *Handler - = dyn_cast(ActionList[I]->getHandlerBlockOrFunc())) { - FuncInfo.LastInvoke[Handler] = II; - // Don't replace the root landing pad if we previously saw this - // handler in a different function. - if (FuncInfo.RootLPad.count(Handler) && - FuncInfo.RootLPad[Handler]->getParent()->getParent() != &F) - continue; - DEBUG(dbgs() << "Setting root lpad for "); - print_name(Handler); - DEBUG(dbgs() << " to " << LPI->getParent()->getName() << '\n'); - FuncInfo.RootLPad[Handler] = LPI; + SmallVector Handlers; + for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { + auto *CatchPad = cast(CatchPadBB->getFirstNonPHI()); + Handlers.push_back(CatchPad); + } + int TryLow = addUnwindMapEntry(FuncInfo, ParentState, nullptr); + FuncInfo.EHPadStateMap[CatchSwitch] = TryLow; + for (const BasicBlock *PredBlock : predecessors(BB)) + if ((PredBlock = getEHPadFromPredecessor(PredBlock, + CatchSwitch->getParentPad()))) + calculateCXXStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(), + TryLow); + int CatchLow = addUnwindMapEntry(FuncInfo, ParentState, nullptr); + + // catchpads are separate funclets in C++ EH due to the way rethrow works. + int TryHigh = CatchLow - 1; + for (const auto *CatchPad : Handlers) { + FuncInfo.FuncletBaseStateMap[CatchPad] = CatchLow; + for (const User *U : CatchPad->users()) { + const auto *UserI = cast(U); + if (auto *InnerCatchSwitch = dyn_cast(UserI)) + if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest()) + calculateCXXStateNumbers(FuncInfo, UserI, CatchLow); + if (auto *InnerCleanupPad = dyn_cast(UserI)) + if (getCleanupRetUnwindDest(InnerCleanupPad) == + CatchSwitch->getUnwindDest()) + calculateCXXStateNumbers(FuncInfo, UserI, CatchLow); } } - // Walk the actions again and look for nested handlers. This has to - // happen after all of the actions have been processed in the current - // function. - for (int I = 0, E = ActionList.size(); I < E; ++I) - if (auto *Handler - = dyn_cast(ActionList[I]->getHandlerBlockOrFunc())) - findActionRootLPads(*Handler); - ActionList.clear(); + int CatchHigh = FuncInfo.getLastStateNumber(); + addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchHigh, Handlers); + DEBUG(dbgs() << "TryLow[" << BB->getName() << "]: " << TryLow << '\n'); + DEBUG(dbgs() << "TryHigh[" << BB->getName() << "]: " << TryHigh << '\n'); + DEBUG(dbgs() << "CatchHigh[" << BB->getName() << "]: " << CatchHigh + << '\n'); + } else { + auto *CleanupPad = cast(FirstNonPHI); + + // It's possible for a cleanup to be visited twice: it might have multiple + // cleanupret instructions. + if (FuncInfo.EHPadStateMap.count(CleanupPad)) + return; + + int CleanupState = addUnwindMapEntry(FuncInfo, ParentState, BB); + FuncInfo.EHPadStateMap[CleanupPad] = CleanupState; + DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB " + << BB->getName() << '\n'); + for (const BasicBlock *PredBlock : predecessors(BB)) { + if ((PredBlock = getEHPadFromPredecessor(PredBlock, + CleanupPad->getParentPad()))) { + calculateCXXStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(), + CleanupState); + } + } + for (const User *U : CleanupPad->users()) { + const auto *UserI = cast(U); + if (UserI->isEHPad()) + report_fatal_error("Cleanup funclets for the MSVC++ personality cannot " + "contain exceptional actions"); + } } } -void llvm::calculateWinCXXEHStateNumbers(const Function *ParentFn, +static int addSEHExcept(WinEHFuncInfo &FuncInfo, int ParentState, + const Function *Filter, const BasicBlock *Handler) { + SEHUnwindMapEntry Entry; + Entry.ToState = ParentState; + Entry.IsFinally = false; + Entry.Filter = Filter; + Entry.Handler = Handler; + FuncInfo.SEHUnwindMap.push_back(Entry); + return FuncInfo.SEHUnwindMap.size() - 1; +} + +static int addSEHFinally(WinEHFuncInfo &FuncInfo, int ParentState, + const BasicBlock *Handler) { + SEHUnwindMapEntry Entry; + Entry.ToState = ParentState; + Entry.IsFinally = true; + Entry.Filter = nullptr; + Entry.Handler = Handler; + FuncInfo.SEHUnwindMap.push_back(Entry); + return FuncInfo.SEHUnwindMap.size() - 1; +} + +static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo, + const Instruction *FirstNonPHI, + int ParentState) { + const BasicBlock *BB = FirstNonPHI->getParent(); + assert(BB->isEHPad() && "no a funclet!"); + + if (auto *CatchSwitch = dyn_cast(FirstNonPHI)) { + assert(FuncInfo.EHPadStateMap.count(CatchSwitch) == 0 && + "shouldn't revist catch funclets!"); + + // Extract the filter function and the __except basic block and create a + // state for them. + assert(CatchSwitch->getNumHandlers() == 1 && + "SEH doesn't have multiple handlers per __try"); + const auto *CatchPad = + cast((*CatchSwitch->handler_begin())->getFirstNonPHI()); + const BasicBlock *CatchPadBB = CatchPad->getParent(); + const Constant *FilterOrNull = + cast(CatchPad->getArgOperand(0)->stripPointerCasts()); + const Function *Filter = dyn_cast(FilterOrNull); + assert((Filter || FilterOrNull->isNullValue()) && + "unexpected filter value"); + int TryState = addSEHExcept(FuncInfo, ParentState, Filter, CatchPadBB); + + // Everything in the __try block uses TryState as its parent state. + FuncInfo.EHPadStateMap[CatchSwitch] = TryState; + DEBUG(dbgs() << "Assigning state #" << TryState << " to BB " + << CatchPadBB->getName() << '\n'); + for (const BasicBlock *PredBlock : predecessors(BB)) + if ((PredBlock = getEHPadFromPredecessor(PredBlock, + CatchSwitch->getParentPad()))) + calculateSEHStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(), + TryState); + + // Everything in the __except block unwinds to ParentState, just like code + // outside the __try. + for (const User *U : CatchPad->users()) { + const auto *UserI = cast(U); + if (auto *InnerCatchSwitch = dyn_cast(UserI)) + if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest()) + calculateSEHStateNumbers(FuncInfo, UserI, ParentState); + if (auto *InnerCleanupPad = dyn_cast(UserI)) + if (getCleanupRetUnwindDest(InnerCleanupPad) == + CatchSwitch->getUnwindDest()) + calculateSEHStateNumbers(FuncInfo, UserI, ParentState); + } + } else { + auto *CleanupPad = cast(FirstNonPHI); + + // It's possible for a cleanup to be visited twice: it might have multiple + // cleanupret instructions. + if (FuncInfo.EHPadStateMap.count(CleanupPad)) + return; + + int CleanupState = addSEHFinally(FuncInfo, ParentState, BB); + FuncInfo.EHPadStateMap[CleanupPad] = CleanupState; + DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB " + << BB->getName() << '\n'); + for (const BasicBlock *PredBlock : predecessors(BB)) + if ((PredBlock = + getEHPadFromPredecessor(PredBlock, CleanupPad->getParentPad()))) + calculateSEHStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(), + CleanupState); + for (const User *U : CleanupPad->users()) { + const auto *UserI = cast(U); + if (UserI->isEHPad()) + report_fatal_error("Cleanup funclets for the SEH personality cannot " + "contain exceptional actions"); + } + } +} + +static bool isTopLevelPadForMSVC(const Instruction *EHPad) { + if (auto *CatchSwitch = dyn_cast(EHPad)) + return isa(CatchSwitch->getParentPad()) && + CatchSwitch->unwindsToCaller(); + if (auto *CleanupPad = dyn_cast(EHPad)) + return isa(CleanupPad->getParentPad()) && + getCleanupRetUnwindDest(CleanupPad) == nullptr; + if (isa(EHPad)) + return false; + llvm_unreachable("unexpected EHPad!"); +} + +void llvm::calculateSEHStateNumbers(const Function *Fn, + WinEHFuncInfo &FuncInfo) { + // Don't compute state numbers twice. + if (!FuncInfo.SEHUnwindMap.empty()) + return; + + for (const BasicBlock &BB : *Fn) { + if (!BB.isEHPad()) + continue; + const Instruction *FirstNonPHI = BB.getFirstNonPHI(); + if (!isTopLevelPadForMSVC(FirstNonPHI)) + continue; + ::calculateSEHStateNumbers(FuncInfo, FirstNonPHI, -1); + } + + calculateStateNumbersForInvokes(Fn, FuncInfo); +} + +void llvm::calculateWinCXXEHStateNumbers(const Function *Fn, WinEHFuncInfo &FuncInfo) { // Return if it's already been done. - if (!FuncInfo.LandingPadStateMap.empty()) + if (!FuncInfo.EHPadStateMap.empty()) return; - WinEHNumbering Num(FuncInfo); - Num.findActionRootLPads(*ParentFn); - // The VisitedHandlers list is used by both findActionRootLPads and - // calculateStateNumbers, but both functions need to visit all handlers. - Num.VisitedHandlers.clear(); - Num.calculateStateNumbers(*ParentFn); - // Pop everything on the handler stack. - // It may be necessary to call this more than once because a handler can - // be pushed on the stack as a result of clearing the stack. - while (!Num.HandlerStack.empty()) - Num.processCallSite(None, ImmutableCallSite()); + for (const BasicBlock &BB : *Fn) { + if (!BB.isEHPad()) + continue; + const Instruction *FirstNonPHI = BB.getFirstNonPHI(); + if (!isTopLevelPadForMSVC(FirstNonPHI)) + continue; + calculateCXXStateNumbers(FuncInfo, FirstNonPHI, -1); + } + + calculateStateNumbersForInvokes(Fn, FuncInfo); } + +static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int ParentState, + ClrHandlerType HandlerType, uint32_t TypeToken, + const BasicBlock *Handler) { + ClrEHUnwindMapEntry Entry; + Entry.Parent = ParentState; + Entry.Handler = Handler; + Entry.HandlerType = HandlerType; + Entry.TypeToken = TypeToken; + FuncInfo.ClrEHUnwindMap.push_back(Entry); + return FuncInfo.ClrEHUnwindMap.size() - 1; +} + +void llvm::calculateClrEHStateNumbers(const Function *Fn, + WinEHFuncInfo &FuncInfo) { + // Return if it's already been done. + if (!FuncInfo.EHPadStateMap.empty()) + return; + + SmallVector, 8> Worklist; + + // Each pad needs to be able to refer to its parent, so scan the function + // looking for top-level handlers and seed the worklist with them. + for (const BasicBlock &BB : *Fn) { + if (!BB.isEHPad()) + continue; + if (BB.isLandingPad()) + report_fatal_error("CoreCLR EH cannot use landingpads"); + const Instruction *FirstNonPHI = BB.getFirstNonPHI(); + if (!isTopLevelPadForMSVC(FirstNonPHI)) + continue; + // queue this with sentinel parent state -1 to mean unwind to caller. + Worklist.emplace_back(FirstNonPHI, -1); + } + + while (!Worklist.empty()) { + const Instruction *Pad; + int ParentState; + std::tie(Pad, ParentState) = Worklist.pop_back_val(); + + Value *ParentPad; + int PredState; + if (const CleanupPadInst *Cleanup = dyn_cast(Pad)) { + // A cleanup can have multiple exits; don't re-process after the first. + if (FuncInfo.EHPadStateMap.count(Cleanup)) + continue; + // CoreCLR personality uses arity to distinguish faults from finallies. + const BasicBlock *PadBlock = Cleanup->getParent(); + ClrHandlerType HandlerType = + (Cleanup->getNumOperands() ? ClrHandlerType::Fault + : ClrHandlerType::Finally); + int NewState = + addClrEHHandler(FuncInfo, ParentState, HandlerType, 0, PadBlock); + FuncInfo.EHPadStateMap[Cleanup] = NewState; + // Propagate the new state to all preds of the cleanup + ParentPad = Cleanup->getParentPad(); + PredState = NewState; + } else if (const auto *CatchSwitch = dyn_cast(Pad)) { + SmallVector Handlers; + for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { + const auto *Catch = cast(CatchPadBB->getFirstNonPHI()); + Handlers.push_back(Catch); + } + FuncInfo.EHPadStateMap[CatchSwitch] = ParentState; + int NewState = ParentState; + for (auto HandlerI = Handlers.rbegin(), HandlerE = Handlers.rend(); + HandlerI != HandlerE; ++HandlerI) { + const CatchPadInst *Catch = *HandlerI; + const BasicBlock *PadBlock = Catch->getParent(); + uint32_t TypeToken = static_cast( + cast(Catch->getArgOperand(0))->getZExtValue()); + NewState = addClrEHHandler(FuncInfo, NewState, ClrHandlerType::Catch, + TypeToken, PadBlock); + FuncInfo.EHPadStateMap[Catch] = NewState; + } + for (const auto *CatchPad : Handlers) { + for (const User *U : CatchPad->users()) { + const auto *UserI = cast(U); + if (UserI->isEHPad()) + Worklist.emplace_back(UserI, ParentState); + } + } + PredState = NewState; + ParentPad = CatchSwitch->getParentPad(); + } else { + llvm_unreachable("Unexpected EH pad"); + } + + // Queue all predecessors with the given state + for (const BasicBlock *Pred : predecessors(Pad->getParent())) { + if ((Pred = getEHPadFromPredecessor(Pred, ParentPad))) + Worklist.emplace_back(Pred->getFirstNonPHI(), PredState); + } + } + + calculateStateNumbersForInvokes(Fn, FuncInfo); +} + +void WinEHPrepare::colorFunclets(Function &F) { + BlockColors = colorEHFunclets(F); + + // Invert the map from BB to colors to color to BBs. + for (BasicBlock &BB : F) { + ColorVector &Colors = BlockColors[&BB]; + for (BasicBlock *Color : Colors) + FuncletBlocks[Color].push_back(&BB); + } +} + +void llvm::calculateCatchReturnSuccessorColors(const Function *Fn, + WinEHFuncInfo &FuncInfo) { + for (const BasicBlock &BB : *Fn) { + const auto *CatchRet = dyn_cast(BB.getTerminator()); + if (!CatchRet) + continue; + // A 'catchret' returns to the outer scope's color. + Value *ParentPad = CatchRet->getParentPad(); + const BasicBlock *Color; + if (isa(ParentPad)) + Color = &Fn->getEntryBlock(); + else + Color = cast(ParentPad)->getParent(); + // Record the catchret successor's funclet membership. + FuncInfo.CatchRetSuccessorColorMap[CatchRet] = Color; + } +} + +void WinEHPrepare::demotePHIsOnFunclets(Function &F) { + // Strip PHI nodes off of EH pads. + SmallVector PHINodes; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { + BasicBlock *BB = &*FI++; + if (!BB->isEHPad()) + continue; + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { + Instruction *I = &*BI++; + auto *PN = dyn_cast(I); + // Stop at the first non-PHI. + if (!PN) + break; + + AllocaInst *SpillSlot = insertPHILoads(PN, F); + if (SpillSlot) + insertPHIStores(PN, SpillSlot); + + PHINodes.push_back(PN); + } + } + + for (auto *PN : PHINodes) { + // There may be lingering uses on other EH PHIs being removed + PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + PN->eraseFromParent(); + } +} + +void WinEHPrepare::cloneCommonBlocks(Function &F) { + // We need to clone all blocks which belong to multiple funclets. Values are + // remapped throughout the funclet to propogate both the new instructions + // *and* the new basic blocks themselves. + for (auto &Funclets : FuncletBlocks) { + BasicBlock *FuncletPadBB = Funclets.first; + std::vector &BlocksInFunclet = Funclets.second; + + std::vector> Orig2Clone; + ValueToValueMapTy VMap; + for (BasicBlock *BB : BlocksInFunclet) { + ColorVector &ColorsForBB = BlockColors[BB]; + // We don't need to do anything if the block is monochromatic. + size_t NumColorsForBB = ColorsForBB.size(); + if (NumColorsForBB == 1) + continue; + + DEBUG_WITH_TYPE("winehprepare-coloring", + dbgs() << " Cloning block \'" << BB->getName() + << "\' for funclet \'" << FuncletPadBB->getName() + << "\'.\n"); + + // Create a new basic block and copy instructions into it! + BasicBlock *CBB = + CloneBasicBlock(BB, VMap, Twine(".for.", FuncletPadBB->getName())); + // Insert the clone immediately after the original to ensure determinism + // and to keep the same relative ordering of any funclet's blocks. + CBB->insertInto(&F, BB->getNextNode()); + + // Add basic block mapping. + VMap[BB] = CBB; + + // Record delta operations that we need to perform to our color mappings. + Orig2Clone.emplace_back(BB, CBB); + } + + // If nothing was cloned, we're done cloning in this funclet. + if (Orig2Clone.empty()) + continue; + + // Update our color mappings to reflect that one block has lost a color and + // another has gained a color. + for (auto &BBMapping : Orig2Clone) { + BasicBlock *OldBlock = BBMapping.first; + BasicBlock *NewBlock = BBMapping.second; + + BlocksInFunclet.push_back(NewBlock); + ColorVector &NewColors = BlockColors[NewBlock]; + assert(NewColors.empty() && "A new block should only have one color!"); + NewColors.push_back(FuncletPadBB); + + DEBUG_WITH_TYPE("winehprepare-coloring", + dbgs() << " Assigned color \'" << FuncletPadBB->getName() + << "\' to block \'" << NewBlock->getName() + << "\'.\n"); + + BlocksInFunclet.erase( + std::remove(BlocksInFunclet.begin(), BlocksInFunclet.end(), OldBlock), + BlocksInFunclet.end()); + ColorVector &OldColors = BlockColors[OldBlock]; + OldColors.erase( + std::remove(OldColors.begin(), OldColors.end(), FuncletPadBB), + OldColors.end()); + + DEBUG_WITH_TYPE("winehprepare-coloring", + dbgs() << " Removed color \'" << FuncletPadBB->getName() + << "\' from block \'" << OldBlock->getName() + << "\'.\n"); + } + + // Loop over all of the instructions in this funclet, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (BasicBlock *BB : BlocksInFunclet) + // Loop over all instructions, fixing each one as we find it... + for (Instruction &I : *BB) + RemapInstruction(&I, VMap, + RF_IgnoreMissingEntries | RF_NoModuleLevelChanges); + + auto UpdatePHIOnClonedBlock = [&](PHINode *PN, bool IsForOldBlock) { + unsigned NumPreds = PN->getNumIncomingValues(); + for (unsigned PredIdx = 0, PredEnd = NumPreds; PredIdx != PredEnd; + ++PredIdx) { + BasicBlock *IncomingBlock = PN->getIncomingBlock(PredIdx); + ColorVector &IncomingColors = BlockColors[IncomingBlock]; + bool BlockInFunclet = IncomingColors.size() == 1 && + IncomingColors.front() == FuncletPadBB; + if (IsForOldBlock != BlockInFunclet) + continue; + PN->removeIncomingValue(IncomingBlock, /*DeletePHIIfEmpty=*/false); + // Revisit the next entry. + --PredIdx; + --PredEnd; + } + }; + + for (auto &BBMapping : Orig2Clone) { + BasicBlock *OldBlock = BBMapping.first; + BasicBlock *NewBlock = BBMapping.second; + for (Instruction &OldI : *OldBlock) { + auto *OldPN = dyn_cast(&OldI); + if (!OldPN) + break; + UpdatePHIOnClonedBlock(OldPN, /*IsForOldBlock=*/true); + } + for (Instruction &NewI : *NewBlock) { + auto *NewPN = dyn_cast(&NewI); + if (!NewPN) + break; + UpdatePHIOnClonedBlock(NewPN, /*IsForOldBlock=*/false); + } + } + + // Check to see if SuccBB has PHI nodes. If so, we need to add entries to + // the PHI nodes for NewBB now. + for (auto &BBMapping : Orig2Clone) { + BasicBlock *OldBlock = BBMapping.first; + BasicBlock *NewBlock = BBMapping.second; + for (BasicBlock *SuccBB : successors(NewBlock)) { + for (Instruction &SuccI : *SuccBB) { + auto *SuccPN = dyn_cast(&SuccI); + if (!SuccPN) + break; + + // Ok, we have a PHI node. Figure out what the incoming value was for + // the OldBlock. + int OldBlockIdx = SuccPN->getBasicBlockIndex(OldBlock); + if (OldBlockIdx == -1) + break; + Value *IV = SuccPN->getIncomingValue(OldBlockIdx); + + // Remap the value if necessary. + if (auto *Inst = dyn_cast(IV)) { + ValueToValueMapTy::iterator I = VMap.find(Inst); + if (I != VMap.end()) + IV = I->second; + } + + SuccPN->addIncoming(IV, NewBlock); + } + } + } + + for (ValueToValueMapTy::value_type VT : VMap) { + // If there were values defined in BB that are used outside the funclet, + // then we now have to update all uses of the value to use either the + // original value, the cloned value, or some PHI derived value. This can + // require arbitrary PHI insertion, of which we are prepared to do, clean + // these up now. + SmallVector UsesToRename; + + auto *OldI = dyn_cast(const_cast(VT.first)); + if (!OldI) + continue; + auto *NewI = cast(VT.second); + // Scan all uses of this instruction to see if it is used outside of its + // funclet, and if so, record them in UsesToRename. + for (Use &U : OldI->uses()) { + Instruction *UserI = cast(U.getUser()); + BasicBlock *UserBB = UserI->getParent(); + ColorVector &ColorsForUserBB = BlockColors[UserBB]; + assert(!ColorsForUserBB.empty()); + if (ColorsForUserBB.size() > 1 || + *ColorsForUserBB.begin() != FuncletPadBB) + UsesToRename.push_back(&U); + } + + // If there are no uses outside the block, we're done with this + // instruction. + if (UsesToRename.empty()) + continue; + + // We found a use of OldI outside of the funclet. Rename all uses of OldI + // that are outside its funclet to be uses of the appropriate PHI node + // etc. + SSAUpdater SSAUpdate; + SSAUpdate.Initialize(OldI->getType(), OldI->getName()); + SSAUpdate.AddAvailableValue(OldI->getParent(), OldI); + SSAUpdate.AddAvailableValue(NewI->getParent(), NewI); + + while (!UsesToRename.empty()) + SSAUpdate.RewriteUseAfterInsertions(*UsesToRename.pop_back_val()); + } + } +} + +void WinEHPrepare::removeImplausibleInstructions(Function &F) { + // Remove implausible terminators and replace them with UnreachableInst. + for (auto &Funclet : FuncletBlocks) { + BasicBlock *FuncletPadBB = Funclet.first; + std::vector &BlocksInFunclet = Funclet.second; + Instruction *FirstNonPHI = FuncletPadBB->getFirstNonPHI(); + auto *FuncletPad = dyn_cast(FirstNonPHI); + auto *CatchPad = dyn_cast_or_null(FuncletPad); + auto *CleanupPad = dyn_cast_or_null(FuncletPad); + + for (BasicBlock *BB : BlocksInFunclet) { + for (Instruction &I : *BB) { + CallSite CS(&I); + if (!CS) + continue; + + Value *FuncletBundleOperand = nullptr; + if (auto BU = CS.getOperandBundle(LLVMContext::OB_funclet)) + FuncletBundleOperand = BU->Inputs.front(); + + if (FuncletBundleOperand == FuncletPad) + continue; + + // Skip call sites which are nounwind intrinsics. + auto *CalledFn = + dyn_cast(CS.getCalledValue()->stripPointerCasts()); + if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow()) + continue; + + // This call site was not part of this funclet, remove it. + if (CS.isInvoke()) { + // Remove the unwind edge if it was an invoke. + removeUnwindEdge(BB); + // Get a pointer to the new call. + BasicBlock::iterator CallI = + std::prev(BB->getTerminator()->getIterator()); + auto *CI = cast(&*CallI); + changeToUnreachable(CI, /*UseLLVMTrap=*/false); + } else { + changeToUnreachable(&I, /*UseLLVMTrap=*/false); + } + + // There are no more instructions in the block (except for unreachable), + // we are done. + break; + } + + TerminatorInst *TI = BB->getTerminator(); + // CatchPadInst and CleanupPadInst can't transfer control to a ReturnInst. + bool IsUnreachableRet = isa(TI) && FuncletPad; + // The token consumed by a CatchReturnInst must match the funclet token. + bool IsUnreachableCatchret = false; + if (auto *CRI = dyn_cast(TI)) + IsUnreachableCatchret = CRI->getCatchPad() != CatchPad; + // The token consumed by a CleanupReturnInst must match the funclet token. + bool IsUnreachableCleanupret = false; + if (auto *CRI = dyn_cast(TI)) + IsUnreachableCleanupret = CRI->getCleanupPad() != CleanupPad; + if (IsUnreachableRet || IsUnreachableCatchret || + IsUnreachableCleanupret) { + changeToUnreachable(TI, /*UseLLVMTrap=*/false); + } else if (isa(TI)) { + if (Personality == EHPersonality::MSVC_CXX && CleanupPad) { + // Invokes within a cleanuppad for the MSVC++ personality never + // transfer control to their unwind edge: the personality will + // terminate the program. + removeUnwindEdge(BB); + } + } + } + } +} + +void WinEHPrepare::cleanupPreparedFunclets(Function &F) { + // Clean-up some of the mess we made by removing useles PHI nodes, trivial + // branches, etc. + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { + BasicBlock *BB = &*FI++; + SimplifyInstructionsInBlock(BB); + ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true); + MergeBlockIntoPredecessor(BB); + } + + // We might have some unreachable blocks after cleaning up some impossible + // control flow. + removeUnreachableBlocks(F); +} + +void WinEHPrepare::verifyPreparedFunclets(Function &F) { + // Recolor the CFG to verify that all is well. + for (BasicBlock &BB : F) { + size_t NumColors = BlockColors[&BB].size(); + assert(NumColors == 1 && "Expected monochromatic BB!"); + if (NumColors == 0) + report_fatal_error("Uncolored BB!"); + if (NumColors > 1) + report_fatal_error("Multicolor BB!"); + if (!DisableDemotion) { + bool EHPadHasPHI = BB.isEHPad() && isa(BB.begin()); + assert(!EHPadHasPHI && "EH Pad still has a PHI!"); + if (EHPadHasPHI) + report_fatal_error("EH Pad still has a PHI!"); + } + } +} + +bool WinEHPrepare::prepareExplicitEH(Function &F) { + // Remove unreachable blocks. It is not valuable to assign them a color and + // their existence can trick us into thinking values are alive when they are + // not. + removeUnreachableBlocks(F); + + // Determine which blocks are reachable from which funclet entries. + colorFunclets(F); + + cloneCommonBlocks(F); + + if (!DisableDemotion) + demotePHIsOnFunclets(F); + + if (!DisableCleanups) { + removeImplausibleInstructions(F); + + cleanupPreparedFunclets(F); + } + + verifyPreparedFunclets(F); + + BlockColors.clear(); + FuncletBlocks.clear(); + + return true; +} + +// TODO: Share loads when one use dominates another, or when a catchpad exit +// dominates uses (needs dominators). +AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) { + BasicBlock *PHIBlock = PN->getParent(); + AllocaInst *SpillSlot = nullptr; + Instruction *EHPad = PHIBlock->getFirstNonPHI(); + + if (!isa(EHPad)) { + // If the EHPad isn't a terminator, then we can insert a load in this block + // that will dominate all uses. + SpillSlot = new AllocaInst(PN->getType(), nullptr, + Twine(PN->getName(), ".wineh.spillslot"), + &F.getEntryBlock().front()); + Value *V = new LoadInst(SpillSlot, Twine(PN->getName(), ".wineh.reload"), + &*PHIBlock->getFirstInsertionPt()); + PN->replaceAllUsesWith(V); + return SpillSlot; + } + + // Otherwise, we have a PHI on a terminator EHPad, and we give up and insert + // loads of the slot before every use. + DenseMap Loads; + for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); + UI != UE;) { + Use &U = *UI++; + auto *UsingInst = cast(U.getUser()); + if (isa(UsingInst) && UsingInst->getParent()->isEHPad()) { + // Use is on an EH pad phi. Leave it alone; we'll insert loads and + // stores for it separately. + continue; + } + replaceUseWithLoad(PN, U, SpillSlot, Loads, F); + } + return SpillSlot; +} + +// TODO: improve store placement. Inserting at def is probably good, but need +// to be careful not to introduce interfering stores (needs liveness analysis). +// TODO: identify related phi nodes that can share spill slots, and share them +// (also needs liveness). +void WinEHPrepare::insertPHIStores(PHINode *OriginalPHI, + AllocaInst *SpillSlot) { + // Use a worklist of (Block, Value) pairs -- the given Value needs to be + // stored to the spill slot by the end of the given Block. + SmallVector, 4> Worklist; + + Worklist.push_back({OriginalPHI->getParent(), OriginalPHI}); + + while (!Worklist.empty()) { + BasicBlock *EHBlock; + Value *InVal; + std::tie(EHBlock, InVal) = Worklist.pop_back_val(); + + PHINode *PN = dyn_cast(InVal); + if (PN && PN->getParent() == EHBlock) { + // The value is defined by another PHI we need to remove, with no room to + // insert a store after the PHI, so each predecessor needs to store its + // incoming value. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) { + Value *PredVal = PN->getIncomingValue(i); + + // Undef can safely be skipped. + if (isa(PredVal)) + continue; + + insertPHIStore(PN->getIncomingBlock(i), PredVal, SpillSlot, Worklist); + } + } else { + // We need to store InVal, which dominates EHBlock, but can't put a store + // in EHBlock, so need to put stores in each predecessor. + for (BasicBlock *PredBlock : predecessors(EHBlock)) { + insertPHIStore(PredBlock, InVal, SpillSlot, Worklist); + } + } + } +} + +void WinEHPrepare::insertPHIStore( + BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot, + SmallVectorImpl> &Worklist) { + + if (PredBlock->isEHPad() && + isa(PredBlock->getFirstNonPHI())) { + // Pred is unsplittable, so we need to queue it on the worklist. + Worklist.push_back({PredBlock, PredVal}); + return; + } + + // Otherwise, insert the store at the end of the basic block. + new StoreInst(PredVal, SpillSlot, PredBlock->getTerminator()); +} + +void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot, + DenseMap &Loads, + Function &F) { + // Lazilly create the spill slot. + if (!SpillSlot) + SpillSlot = new AllocaInst(V->getType(), nullptr, + Twine(V->getName(), ".wineh.spillslot"), + &F.getEntryBlock().front()); + + auto *UsingInst = cast(U.getUser()); + if (auto *UsingPHI = dyn_cast(UsingInst)) { + // If this is a PHI node, we can't insert a load of the value before + // the use. Instead insert the load in the predecessor block + // corresponding to the incoming value. + // + // Note that if there are multiple edges from a basic block to this + // PHI node that we cannot have multiple loads. The problem is that + // the resulting PHI node will have multiple values (from each load) + // coming in from the same block, which is illegal SSA form. + // For this reason, we keep track of and reuse loads we insert. + BasicBlock *IncomingBlock = UsingPHI->getIncomingBlock(U); + if (auto *CatchRet = + dyn_cast(IncomingBlock->getTerminator())) { + // Putting a load above a catchret and use on the phi would still leave + // a cross-funclet def/use. We need to split the edge, change the + // catchret to target the new block, and put the load there. + BasicBlock *PHIBlock = UsingInst->getParent(); + BasicBlock *NewBlock = SplitEdge(IncomingBlock, PHIBlock); + // SplitEdge gives us: + // IncomingBlock: + // ... + // br label %NewBlock + // NewBlock: + // catchret label %PHIBlock + // But we need: + // IncomingBlock: + // ... + // catchret label %NewBlock + // NewBlock: + // br label %PHIBlock + // So move the terminators to each others' blocks and swap their + // successors. + BranchInst *Goto = cast(IncomingBlock->getTerminator()); + Goto->removeFromParent(); + CatchRet->removeFromParent(); + IncomingBlock->getInstList().push_back(CatchRet); + NewBlock->getInstList().push_back(Goto); + Goto->setSuccessor(0, PHIBlock); + CatchRet->setSuccessor(NewBlock); + // Update the color mapping for the newly split edge. + ColorVector &ColorsForPHIBlock = BlockColors[PHIBlock]; + BlockColors[NewBlock] = ColorsForPHIBlock; + for (BasicBlock *FuncletPad : ColorsForPHIBlock) + FuncletBlocks[FuncletPad].push_back(NewBlock); + // Treat the new block as incoming for load insertion. + IncomingBlock = NewBlock; + } + Value *&Load = Loads[IncomingBlock]; + // Insert the load into the predecessor block + if (!Load) + Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"), + /*Volatile=*/false, IncomingBlock->getTerminator()); + + U.set(Load); + } else { + // Reload right before the old use. + auto *Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"), + /*Volatile=*/false, UsingInst); + U.set(Load); + } +} + +void WinEHFuncInfo::addIPToStateRange(const InvokeInst *II, + MCSymbol *InvokeBegin, + MCSymbol *InvokeEnd) { + assert(InvokeStateMap.count(II) && + "should get invoke with precomputed state"); + LabelToStateMap[InvokeBegin] = std::make_pair(InvokeStateMap[II], InvokeEnd); +} + +WinEHFuncInfo::WinEHFuncInfo() {} diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt index 645d92fef228..2c2848d1e5cc 100644 --- a/lib/DebugInfo/CMakeLists.txt +++ b/lib/DebugInfo/CMakeLists.txt @@ -1,4 +1,4 @@ - +add_subdirectory(CodeView) add_subdirectory(DWARF) add_subdirectory(PDB) - +add_subdirectory(Symbolize) diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt new file mode 100644 index 000000000000..cfa0e4d8b401 --- /dev/null +++ b/lib/DebugInfo/CodeView/CMakeLists.txt @@ -0,0 +1,12 @@ +add_llvm_library(LLVMDebugInfoCodeView + FieldListRecordBuilder.cpp + Line.cpp + ListRecordBuilder.cpp + MemoryTypeTableBuilder.cpp + MethodListRecordBuilder.cpp + TypeRecordBuilder.cpp + TypeTableBuilder.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/CodeView + ) diff --git a/lib/DebugInfo/CodeView/FieldListRecordBuilder.cpp b/lib/DebugInfo/CodeView/FieldListRecordBuilder.cpp new file mode 100644 index 000000000000..91b71cc4b119 --- /dev/null +++ b/lib/DebugInfo/CodeView/FieldListRecordBuilder.cpp @@ -0,0 +1,165 @@ +//===-- FieldListRecordBuilder.cpp ----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h" + +using namespace llvm; +using namespace codeview; + +FieldListRecordBuilder::FieldListRecordBuilder() + : ListRecordBuilder(TypeRecordKind::FieldList) {} + +void FieldListRecordBuilder::writeBaseClass(MemberAccess Access, TypeIndex Type, + uint64_t Offset) { + TypeRecordBuilder &Builder = getBuilder(); + + Builder.writeTypeRecordKind(TypeRecordKind::BaseClass); + Builder.writeUInt16(static_cast(Access)); + Builder.writeTypeIndex(Type); + Builder.writeEncodedUnsignedInteger(Offset); + + finishSubRecord(); +} + +void FieldListRecordBuilder::writeEnumerate(MemberAccess Access, uint64_t Value, + StringRef Name) { + TypeRecordBuilder &Builder = getBuilder(); + + Builder.writeTypeRecordKind(TypeRecordKind::Enumerate); + Builder.writeUInt16(static_cast(Access)); + Builder.writeEncodedUnsignedInteger(Value); + Builder.writeNullTerminatedString(Name); + + finishSubRecord(); +} + +void FieldListRecordBuilder::writeMember(MemberAccess Access, TypeIndex Type, + uint64_t Offset, StringRef Name) { + TypeRecordBuilder &Builder = getBuilder(); + + Builder.writeTypeRecordKind(TypeRecordKind::Member); + Builder.writeUInt16(static_cast(Access)); + Builder.writeTypeIndex(Type); + Builder.writeEncodedUnsignedInteger(Offset); + Builder.writeNullTerminatedString(Name); + + finishSubRecord(); +} + +void FieldListRecordBuilder::writeMethod(uint16_t OverloadCount, + TypeIndex MethodList, StringRef Name) { + TypeRecordBuilder &Builder = getBuilder(); + + Builder.writeTypeRecordKind(TypeRecordKind::Method); + Builder.writeUInt16(OverloadCount); + Builder.writeTypeIndex(MethodList); + Builder.writeNullTerminatedString(Name); + + finishSubRecord(); +} + +void FieldListRecordBuilder::writeOneMethod( + MemberAccess Access, MethodKind Kind, MethodOptions Options, TypeIndex Type, + int32_t VTableSlotOffset, StringRef Name) { + TypeRecordBuilder &Builder = getBuilder(); + + uint16_t Flags = static_cast(Access); + Flags |= static_cast(Kind) << MethodKindShift; + Flags |= static_cast(Options); + + Builder.writeTypeRecordKind(TypeRecordKind::OneMethod); + Builder.writeUInt16(Flags); + Builder.writeTypeIndex(Type); + switch (Kind) { + case MethodKind::IntroducingVirtual: + case MethodKind::PureIntroducingVirtual: + assert(VTableSlotOffset >= 0); + Builder.writeInt32(VTableSlotOffset); + break; + + default: + assert(VTableSlotOffset == -1); + break; + } + + Builder.writeNullTerminatedString(Name); + + finishSubRecord(); +} + +void FieldListRecordBuilder::writeOneMethod(const MethodInfo &Method, + StringRef Name) { + writeOneMethod(Method.getAccess(), Method.getKind(), Method.getOptions(), + Method.getType(), Method.getVTableSlotOffset(), Name); +} + +void FieldListRecordBuilder::writeNestedType(TypeIndex Type, StringRef Name) { + TypeRecordBuilder &Builder = getBuilder(); + + Builder.writeTypeRecordKind(TypeRecordKind::NestedType); + Builder.writeUInt16(0); + Builder.writeTypeIndex(Type); + Builder.writeNullTerminatedString(Name); + + finishSubRecord(); +} + +void FieldListRecordBuilder::writeStaticMember(MemberAccess Access, + TypeIndex Type, StringRef Name) { + TypeRecordBuilder &Builder = getBuilder(); + + Builder.writeTypeRecordKind(TypeRecordKind::StaticMember); + Builder.writeUInt16(static_cast(Access)); + Builder.writeTypeIndex(Type); + Builder.writeNullTerminatedString(Name); + + finishSubRecord(); +} + +void FieldListRecordBuilder::writeIndirectVirtualBaseClass( + MemberAccess Access, TypeIndex Type, TypeIndex VirtualBasePointerType, + int64_t VirtualBasePointerOffset, uint64_t SlotIndex) { + writeVirtualBaseClass(TypeRecordKind::IndirectVirtualBaseClass, Access, Type, + VirtualBasePointerType, VirtualBasePointerOffset, + SlotIndex); +} + +void FieldListRecordBuilder::writeVirtualBaseClass( + MemberAccess Access, TypeIndex Type, TypeIndex VirtualBasePointerType, + int64_t VirtualBasePointerOffset, uint64_t SlotIndex) { + writeVirtualBaseClass(TypeRecordKind::VirtualBaseClass, Access, Type, + VirtualBasePointerType, VirtualBasePointerOffset, + SlotIndex); +} + +void FieldListRecordBuilder::writeVirtualBaseClass( + TypeRecordKind Kind, MemberAccess Access, TypeIndex Type, + TypeIndex VirtualBasePointerType, int64_t VirtualBasePointerOffset, + uint64_t SlotIndex) { + TypeRecordBuilder &Builder = getBuilder(); + + Builder.writeTypeRecordKind(Kind); + Builder.writeUInt16(static_cast(Access)); + Builder.writeTypeIndex(Type); + Builder.writeTypeIndex(VirtualBasePointerType); + Builder.writeEncodedInteger(VirtualBasePointerOffset); + Builder.writeEncodedUnsignedInteger(SlotIndex); + + finishSubRecord(); +} + +void FieldListRecordBuilder::writeVirtualFunctionTablePointer(TypeIndex Type) { + TypeRecordBuilder &Builder = getBuilder(); + + Builder.writeTypeRecordKind(TypeRecordKind::VirtualFunctionTablePointer); + Builder.writeUInt16(0); + Builder.writeTypeIndex(Type); + + finishSubRecord(); +} \ No newline at end of file diff --git a/lib/Analysis/IPA/LLVMBuild.txt b/lib/DebugInfo/CodeView/LLVMBuild.txt similarity index 78% rename from lib/Analysis/IPA/LLVMBuild.txt rename to lib/DebugInfo/CodeView/LLVMBuild.txt index 980e91809b55..4db23376fce4 100644 --- a/lib/Analysis/IPA/LLVMBuild.txt +++ b/lib/DebugInfo/CodeView/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Analysis/IPA/LLVMBuild.txt -------------------------*- Conf -*--===; +;===- ./lib/DebugInfo/CodeView/LLVMBuild.txt -------------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -17,7 +17,6 @@ [component_0] type = Library -name = IPA -parent = Libraries -library_name = ipa -required_libraries = Analysis Core Support +name = DebugInfoCodeView +parent = DebugInfo +required_libraries = Support diff --git a/lib/DebugInfo/CodeView/Line.cpp b/lib/DebugInfo/CodeView/Line.cpp new file mode 100644 index 000000000000..4cb766b5fd26 --- /dev/null +++ b/lib/DebugInfo/CodeView/Line.cpp @@ -0,0 +1,22 @@ +//===-- Line.cpp ----------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/Line.h" + +using namespace llvm; +using namespace codeview; + +LineInfo::LineInfo(uint32_t StartLine, uint32_t EndLine, bool IsStatement) { + LineData = StartLine & StartLineMask; + uint32_t LineDelta = EndLine - StartLine; + LineData |= (LineDelta << EndLineDeltaShift) & EndLineDeltaMask; + if (IsStatement) { + LineData |= StatementFlag; + } +} diff --git a/lib/DebugInfo/CodeView/ListRecordBuilder.cpp b/lib/DebugInfo/CodeView/ListRecordBuilder.cpp new file mode 100644 index 000000000000..69c7e87330e6 --- /dev/null +++ b/lib/DebugInfo/CodeView/ListRecordBuilder.cpp @@ -0,0 +1,31 @@ +//===-- ListRecordBuilder.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/ListRecordBuilder.h" + +using namespace llvm; +using namespace codeview; + +ListRecordBuilder::ListRecordBuilder(TypeRecordKind Kind) : Builder(Kind) {} + +void ListRecordBuilder::finishSubRecord() { + // The builder starts at offset 2 in the actual CodeView buffer, so add an + // additional offset of 2 before computing the alignment. + uint32_t Remainder = (Builder.size() + 2) % 4; + if (Remainder != 0) { + for (int32_t PaddingBytesLeft = 4 - Remainder; PaddingBytesLeft > 0; + --PaddingBytesLeft) { + Builder.writeUInt8(0xf0 + PaddingBytesLeft); + } + } + + // TODO: Split the list into multiple records if it's longer than 64KB, using + // a subrecord of TypeRecordKind::Index to chain the records together. + assert(Builder.size() < 65536); +} diff --git a/lib/DebugInfo/CodeView/Makefile b/lib/DebugInfo/CodeView/Makefile new file mode 100644 index 000000000000..70e2bd011429 --- /dev/null +++ b/lib/DebugInfo/CodeView/Makefile @@ -0,0 +1,14 @@ +##===- lib/DebugInfo/CodeView/Makefile ---------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMDebugInfoCodeView +BUILD_ARCHIVE := 1 + +include $(LEVEL)/Makefile.common diff --git a/lib/DebugInfo/CodeView/MemoryTypeTableBuilder.cpp b/lib/DebugInfo/CodeView/MemoryTypeTableBuilder.cpp new file mode 100644 index 000000000000..9afce92eeb1d --- /dev/null +++ b/lib/DebugInfo/CodeView/MemoryTypeTableBuilder.cpp @@ -0,0 +1,35 @@ +//===-- MemoryTypeTableBuilder.cpp ----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" + +using namespace llvm; +using namespace codeview; + +MemoryTypeTableBuilder::Record::Record(StringRef RData) + : Size(RData.size()), Data(new char[RData.size()]) { + memcpy(Data.get(), RData.data(), RData.size()); +} + +TypeIndex MemoryTypeTableBuilder::writeRecord(StringRef Data) { + auto I = HashedRecords.find(Data); + if (I != HashedRecords.end()) { + return I->second; + } + + std::unique_ptr R(new Record(Data)); + + TypeIndex TI(static_cast(Records.size()) + + TypeIndex::FirstNonSimpleIndex); + HashedRecords.insert(std::make_pair(StringRef(R->data(), R->size()), TI)); + Records.push_back(std::move(R)); + + return TI; +} diff --git a/lib/DebugInfo/CodeView/MethodListRecordBuilder.cpp b/lib/DebugInfo/CodeView/MethodListRecordBuilder.cpp new file mode 100644 index 000000000000..889302556b2d --- /dev/null +++ b/lib/DebugInfo/CodeView/MethodListRecordBuilder.cpp @@ -0,0 +1,49 @@ +//===-- MethodListRecordBuilder.cpp ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/MethodListRecordBuilder.h" +#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h" + +using namespace llvm; +using namespace codeview; + +MethodListRecordBuilder::MethodListRecordBuilder() + : ListRecordBuilder(TypeRecordKind::MethodList) {} + +void MethodListRecordBuilder::writeMethod(MemberAccess Access, MethodKind Kind, + MethodOptions Options, TypeIndex Type, + int32_t VTableSlotOffset) { + TypeRecordBuilder &Builder = getBuilder(); + + uint16_t Flags = static_cast(Access); + Flags |= static_cast(Kind) << MethodKindShift; + Flags |= static_cast(Options); + + Builder.writeUInt16(Flags); + Builder.writeUInt16(0); + Builder.writeTypeIndex(Type); + switch (Kind) { + case MethodKind::IntroducingVirtual: + case MethodKind::PureIntroducingVirtual: + assert(VTableSlotOffset >= 0); + Builder.writeInt32(VTableSlotOffset); + break; + + default: + assert(VTableSlotOffset == -1); + break; + } + + // TODO: Fail if too big? +} + +void MethodListRecordBuilder::writeMethod(const MethodInfo &Method) { + writeMethod(Method.getAccess(), Method.getKind(), Method.getOptions(), + Method.getType(), Method.getVTableSlotOffset()); +} diff --git a/lib/DebugInfo/CodeView/TypeRecordBuilder.cpp b/lib/DebugInfo/CodeView/TypeRecordBuilder.cpp new file mode 100644 index 000000000000..cbf464fd7668 --- /dev/null +++ b/lib/DebugInfo/CodeView/TypeRecordBuilder.cpp @@ -0,0 +1,113 @@ +//===-- TypeRecordBuilder.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/TypeRecordBuilder.h" + +using namespace llvm; +using namespace codeview; + +TypeRecordBuilder::TypeRecordBuilder(TypeRecordKind Kind) : Stream(Buffer), + Writer(Stream) { + writeTypeRecordKind(Kind); +} + +StringRef TypeRecordBuilder::str() { + return StringRef(Buffer.data(), Buffer.size()); +} + +void TypeRecordBuilder::writeUInt8(uint8_t Value) { + Writer.write(Value); +} + +void TypeRecordBuilder::writeInt16(int16_t Value) { + Writer.write(Value); +} + +void TypeRecordBuilder::writeUInt16(uint16_t Value) { + Writer.write(Value); +} + +void TypeRecordBuilder::writeInt32(int32_t Value) { + Writer.write(Value); +} + +void TypeRecordBuilder::writeUInt32(uint32_t Value) { + Writer.write(Value); +} + +void TypeRecordBuilder::writeInt64(int64_t Value) { + Writer.write(Value); +} + +void TypeRecordBuilder::writeUInt64(uint64_t Value) { + Writer.write(Value); +} + +void TypeRecordBuilder::writeEncodedInteger(int64_t Value) { + if (Value >= 0) { + writeEncodedUnsignedInteger(static_cast(Value)); + } else { + writeEncodedSignedInteger(Value); + } +} + +void TypeRecordBuilder::writeEncodedSignedInteger(int64_t Value) { + if (Value >= std::numeric_limits::min() && + Value <= std::numeric_limits::max()) { + writeUInt16(static_cast(TypeRecordKind::SByte)); + writeInt16(static_cast(Value)); + } else if (Value >= std::numeric_limits::min() && + Value <= std::numeric_limits::max()) { + writeUInt16(static_cast(TypeRecordKind::Int16)); + writeInt16(static_cast(Value)); + } else if (Value >= std::numeric_limits::min() && + Value <= std::numeric_limits::max()) { + writeUInt16(static_cast(TypeRecordKind::Int32)); + writeInt32(static_cast(Value)); + } else { + writeUInt16(static_cast(TypeRecordKind::Int64)); + writeInt64(Value); + } +} + +void TypeRecordBuilder::writeEncodedUnsignedInteger(uint64_t Value) { + if (Value < static_cast(TypeRecordKind::SByte)) { + writeUInt16(static_cast(Value)); + } else if (Value <= std::numeric_limits::max()) { + writeUInt16(static_cast(TypeRecordKind::UInt16)); + writeUInt16(static_cast(Value)); + } else if (Value <= std::numeric_limits::max()) { + writeUInt16(static_cast(TypeRecordKind::UInt32)); + writeUInt32(static_cast(Value)); + } else { + writeUInt16(static_cast(TypeRecordKind::UInt64)); + writeUInt64(Value); + } +} + +void TypeRecordBuilder::writeNullTerminatedString(const char *Value) { + assert(Value != nullptr); + + size_t Length = strlen(Value); + Stream.write(Value, Length); + writeUInt8(0); +} + +void TypeRecordBuilder::writeNullTerminatedString(StringRef Value) { + Stream.write(Value.data(), Value.size()); + writeUInt8(0); +} + +void TypeRecordBuilder::writeTypeIndex(TypeIndex TypeInd) { + writeUInt32(TypeInd.getIndex()); +} + +void TypeRecordBuilder::writeTypeRecordKind(TypeRecordKind Kind) { + writeUInt16(static_cast(Kind)); +} diff --git a/lib/DebugInfo/CodeView/TypeTableBuilder.cpp b/lib/DebugInfo/CodeView/TypeTableBuilder.cpp new file mode 100644 index 000000000000..4af5dcaf7228 --- /dev/null +++ b/lib/DebugInfo/CodeView/TypeTableBuilder.cpp @@ -0,0 +1,217 @@ +//===-- TypeTableBuilder.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h" +#include "llvm/DebugInfo/CodeView/MethodListRecordBuilder.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecordBuilder.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace codeview; + +namespace { + +const int PointerKindShift = 0; +const int PointerModeShift = 5; +const int PointerSizeShift = 13; + +const int ClassHfaKindShift = 11; +const int ClassWindowsRTClassKindShift = 14; + +void writePointerBase(TypeRecordBuilder &Builder, + const PointerRecordBase &Record) { + Builder.writeTypeIndex(Record.getReferentType()); + uint32_t flags = + static_cast(Record.getOptions()) | + (Record.getSize() << PointerSizeShift) | + (static_cast(Record.getMode()) << PointerModeShift) | + (static_cast(Record.getPointerKind()) << PointerKindShift); + Builder.writeUInt32(flags); +} +} + +TypeTableBuilder::TypeTableBuilder() {} + +TypeTableBuilder::~TypeTableBuilder() {} + +TypeIndex TypeTableBuilder::writeModifier(const ModifierRecord &Record) { + TypeRecordBuilder Builder(TypeRecordKind::Modifier); + + Builder.writeTypeIndex(Record.getModifiedType()); + Builder.writeUInt16(static_cast(Record.getOptions())); + + return writeRecord(Builder); +} + +TypeIndex TypeTableBuilder::writeProcedure(const ProcedureRecord &Record) { + TypeRecordBuilder Builder(TypeRecordKind::Procedure); + + Builder.writeTypeIndex(Record.getReturnType()); + Builder.writeUInt8(static_cast(Record.getCallConv())); + Builder.writeUInt8(static_cast(Record.getOptions())); + Builder.writeUInt16(Record.getParameterCount()); + Builder.writeTypeIndex(Record.getArgumentList()); + + return writeRecord(Builder); +} + +TypeIndex +TypeTableBuilder::writeMemberFunction(const MemberFunctionRecord &Record) { + TypeRecordBuilder Builder(TypeRecordKind::MemberFunction); + + Builder.writeTypeIndex(Record.getReturnType()); + Builder.writeTypeIndex(Record.getClassType()); + Builder.writeTypeIndex(Record.getThisType()); + Builder.writeUInt8(static_cast(Record.getCallConv())); + Builder.writeUInt8(static_cast(Record.getOptions())); + Builder.writeUInt16(Record.getParameterCount()); + Builder.writeTypeIndex(Record.getArgumentList()); + Builder.writeInt32(Record.getThisPointerAdjustment()); + + return writeRecord(Builder); +} + +TypeIndex +TypeTableBuilder::writeArgumentList(const ArgumentListRecord &Record) { + TypeRecordBuilder Builder(TypeRecordKind::ArgumentList); + + Builder.writeUInt32(Record.getArgumentTypes().size()); + for (TypeIndex TI : Record.getArgumentTypes()) { + Builder.writeTypeIndex(TI); + } + + return writeRecord(Builder); +} + +TypeIndex TypeTableBuilder::writePointer(const PointerRecord &Record) { + TypeRecordBuilder Builder(TypeRecordKind::Pointer); + + writePointerBase(Builder, Record); + + return writeRecord(Builder); +} + +TypeIndex +TypeTableBuilder::writePointerToMember(const PointerToMemberRecord &Record) { + TypeRecordBuilder Builder(TypeRecordKind::Pointer); + + writePointerBase(Builder, Record); + + Builder.writeTypeIndex(Record.getContainingType()); + Builder.writeUInt16(static_cast(Record.getRepresentation())); + + return writeRecord(Builder); +} + +TypeIndex TypeTableBuilder::writeArray(const ArrayRecord &Record) { + TypeRecordBuilder Builder(TypeRecordKind::Array); + + Builder.writeTypeIndex(Record.getElementType()); + Builder.writeTypeIndex(Record.getIndexType()); + Builder.writeEncodedUnsignedInteger(Record.getSize()); + Builder.writeNullTerminatedString(Record.getName()); + + return writeRecord(Builder); +} + +TypeIndex TypeTableBuilder::writeAggregate(const AggregateRecord &Record) { + assert((Record.getKind() == TypeRecordKind::Structure) || + (Record.getKind() == TypeRecordKind::Class) || + (Record.getKind() == TypeRecordKind::Union)); + + TypeRecordBuilder Builder(Record.getKind()); + + Builder.writeUInt16(Record.getMemberCount()); + uint16_t Flags = + static_cast(Record.getOptions()) | + (static_cast(Record.getHfa()) << ClassHfaKindShift) | + (static_cast(Record.getWinRTKind()) + << ClassWindowsRTClassKindShift); + Builder.writeUInt16(Flags); + Builder.writeTypeIndex(Record.getFieldList()); + if (Record.getKind() != TypeRecordKind::Union) { + Builder.writeTypeIndex(Record.getDerivationList()); + Builder.writeTypeIndex(Record.getVTableShape()); + } else { + assert(Record.getDerivationList() == TypeIndex()); + assert(Record.getVTableShape() == TypeIndex()); + } + Builder.writeEncodedUnsignedInteger(Record.getSize()); + Builder.writeNullTerminatedString(Record.getName()); + if ((Record.getOptions() & ClassOptions::HasUniqueName) != + ClassOptions::None) { + Builder.writeNullTerminatedString(Record.getUniqueName()); + } + + return writeRecord(Builder); +} + +TypeIndex TypeTableBuilder::writeEnum(const EnumRecord &Record) { + TypeRecordBuilder Builder(TypeRecordKind::Enum); + + Builder.writeUInt16(Record.getMemberCount()); + Builder.writeUInt16(static_cast(Record.getOptions())); + Builder.writeTypeIndex(Record.getUnderlyingType()); + Builder.writeTypeIndex(Record.getFieldList()); + Builder.writeNullTerminatedString(Record.getName()); + if ((Record.getOptions() & ClassOptions::HasUniqueName) != + ClassOptions::None) { + Builder.writeNullTerminatedString(Record.getUniqueName()); + } + + return writeRecord(Builder); +} + +TypeIndex TypeTableBuilder::writeBitField(const BitFieldRecord &Record) { + TypeRecordBuilder Builder(TypeRecordKind::BitField); + + Builder.writeTypeIndex(Record.getType()); + Builder.writeUInt8(Record.getBitSize()); + Builder.writeUInt8(Record.getBitOffset()); + + return writeRecord(Builder); +} + +TypeIndex TypeTableBuilder::writeVirtualTableShape( + const VirtualTableShapeRecord &Record) { + TypeRecordBuilder Builder(TypeRecordKind::VirtualTableShape); + + ArrayRef Slots = Record.getSlots(); + + Builder.writeUInt16(Slots.size()); + for (size_t SlotIndex = 0; SlotIndex < Slots.size(); SlotIndex += 2) { + uint8_t Byte = static_cast(Slots[SlotIndex]) << 4; + if ((SlotIndex + 1) < Slots.size()) { + Byte |= static_cast(Slots[SlotIndex + 1]); + } + Builder.writeUInt8(Byte); + } + + return writeRecord(Builder); +} + +TypeIndex TypeTableBuilder::writeRecord(TypeRecordBuilder &Builder) { + return writeRecord(Builder.str()); +} + +TypeIndex TypeTableBuilder::writeFieldList(FieldListRecordBuilder &FieldList) { + // TODO: Split the list into multiple records if it's longer than 64KB, using + // a subrecord of TypeRecordKind::Index to chain the records together. + return writeRecord(FieldList.str()); +} + +TypeIndex +TypeTableBuilder::writeMethodList(MethodListRecordBuilder &MethodList) { + // TODO: Split the list into multiple records if it's longer than 64KB, using + // a subrecord of TypeRecordKind::Index to chain the records together. + return writeRecord(MethodList.str()); +} diff --git a/lib/DebugInfo/DWARF/CMakeLists.txt b/lib/DebugInfo/DWARF/CMakeLists.txt index d5f8a6f24eec..7104c5f10391 100644 --- a/lib/DebugInfo/DWARF/CMakeLists.txt +++ b/lib/DebugInfo/DWARF/CMakeLists.txt @@ -10,9 +10,11 @@ add_llvm_library(LLVMDebugInfoDWARF DWARFDebugInfoEntry.cpp DWARFDebugLine.cpp DWARFDebugLoc.cpp + DWARFDebugMacro.cpp DWARFDebugRangeList.cpp DWARFFormValue.cpp DWARFTypeUnit.cpp + DWARFUnitIndex.cpp DWARFUnit.cpp SyntaxHighlighting.cpp diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index 96bcf15e0af0..a4195b75c47d 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h" +#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" @@ -126,6 +127,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { getDebugFrame()->dump(OS); } + if (DumpType == DIDT_All || DumpType == DIDT_Macro) { + OS << "\n.debug_macinfo contents:\n"; + getDebugMacro()->dump(OS); + } + uint32_t offset = 0; if (DumpType == DIDT_All || DumpType == DIDT_Aranges) { OS << "\n.debug_aranges contents:\n"; @@ -155,6 +161,16 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { } } + if (DumpType == DIDT_All || DumpType == DIDT_CUIndex) { + OS << "\n.debug_cu_index contents:\n"; + getCUIndex().dump(OS); + } + + if (DumpType == DIDT_All || DumpType == DIDT_TUIndex) { + OS << "\n.debug_tu_index contents:\n"; + getTUIndex().dump(OS); + } + if (DumpType == DIDT_All || DumpType == DIDT_LineDwo) { OS << "\n.debug_line.dwo contents:\n"; unsigned stmtOffset = 0; @@ -250,6 +266,28 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { getStringSection(), isLittleEndian()); } +const DWARFUnitIndex &DWARFContext::getCUIndex() { + if (CUIndex) + return *CUIndex; + + DataExtractor CUIndexData(getCUIndexSection(), isLittleEndian(), 0); + + CUIndex = llvm::make_unique(DW_SECT_INFO); + CUIndex->parse(CUIndexData); + return *CUIndex; +} + +const DWARFUnitIndex &DWARFContext::getTUIndex() { + if (TUIndex) + return *TUIndex; + + DataExtractor TUIndexData(getTUIndexSection(), isLittleEndian(), 0); + + TUIndex = llvm::make_unique(DW_SECT_TYPES); + TUIndex->parse(TUIndexData); + return *TUIndex; +} + const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() { if (Abbrev) return Abbrev.get(); @@ -322,24 +360,37 @@ const DWARFDebugFrame *DWARFContext::getDebugFrame() { return DebugFrame.get(); } +const DWARFDebugMacro *DWARFContext::getDebugMacro() { + if (Macro) + return Macro.get(); + + DataExtractor MacinfoData(getMacinfoSection(), isLittleEndian(), 0); + Macro.reset(new DWARFDebugMacro()); + Macro->parse(MacinfoData); + return Macro.get(); +} + const DWARFLineTable * DWARFContext::getLineTableForUnit(DWARFUnit *U) { if (!Line) Line.reset(new DWARFDebugLine(&getLineSection().Relocs)); + const auto *UnitDIE = U->getUnitDIE(); if (UnitDIE == nullptr) return nullptr; + unsigned stmtOffset = UnitDIE->getAttributeValueAsSectionOffset(U, DW_AT_stmt_list, -1U); if (stmtOffset == -1U) return nullptr; // No line table for this compile unit. + stmtOffset += U->getLineTableOffset(); // See if the line table is cached. if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset)) return lt; // We have to parse it first. - DataExtractor lineData(getLineSection().Data, isLittleEndian(), + DataExtractor lineData(U->getLineSection(), isLittleEndian(), U->getAddressByteSize()); return Line->getOrParseLineTable(lineData, stmtOffset); } @@ -556,10 +607,11 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, continue; StringRef data; + section_iterator RelocatedSection = Section.getRelocatedSection(); // Try to obtain an already relocated version of this section. // Else use the unrelocated section from the object file. We'll have to // apply relocations ourselves later. - if (!L || !L->getLoadedSectionContents(name,data)) + if (!L || !L->getLoadedSectionContents(*RelocatedSection,data)) Section.getContents(data); name = name.substr(name.find_first_not_of("._")); // Skip . and _ prefixes. @@ -591,6 +643,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, .Case("debug_frame", &DebugFrameSection) .Case("debug_str", &StringSection) .Case("debug_ranges", &RangeSection) + .Case("debug_macinfo", &MacinfoSection) .Case("debug_pubnames", &PubNamesSection) .Case("debug_pubtypes", &PubTypesSection) .Case("debug_gnu_pubnames", &GnuPubNamesSection) @@ -607,6 +660,8 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, .Case("apple_namespaces", &AppleNamespacesSection.Data) .Case("apple_namespac", &AppleNamespacesSection.Data) .Case("apple_objc", &AppleObjCSection.Data) + .Case("debug_cu_index", &CUIndexSection) + .Case("debug_tu_index", &TUIndexSection) // Any more debug info sections go here. .Default(nullptr); if (SectionData) { @@ -623,7 +678,6 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, TypesDWOSections[Section].Data = data; } - section_iterator RelocatedSection = Section.getRelocatedSection(); if (RelocatedSection == Obj.section_end()) continue; @@ -634,7 +688,15 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, // If the section we're relocating was relocated already by the JIT, // then we used the relocated version above, so we do not need to process // relocations for it now. - if (L && L->getLoadedSectionContents(RelSecName,RelSecData)) + if (L && L->getLoadedSectionContents(*RelocatedSection,RelSecData)) + continue; + + // In Mach-o files, the relocations do not need to be applied if + // there is no load offset to apply. The value read at the + // relocation point already factors in the section address + // (actually applying the relocations will produce wrong results + // as the section address will be added twice). + if (!L && isa(&Obj)) continue; RelSecName = RelSecName.substr( @@ -685,13 +747,19 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, } SymAddr = *SymAddrOrErr; // Also remember what section this symbol is in for later - Sym->getSection(RSec); + RSec = *Sym->getSection(); } else if (auto *MObj = dyn_cast(&Obj)) { // MachO also has relocations that point to sections and // scattered relocations. - // FIXME: We are not handling scattered relocations, do we have to? - RSec = MObj->getRelocationSection(Reloc.getRawDataRefImpl()); - SymAddr = RSec->getAddress(); + auto RelocInfo = MObj->getRelocation(Reloc.getRawDataRefImpl()); + if (MObj->isRelocationScattered(RelocInfo)) { + // FIXME: it's not clear how to correctly handle scattered + // relocations. + continue; + } else { + RSec = MObj->getRelocationSection(Reloc.getRawDataRefImpl()); + SymAddr = RSec->getAddress(); + } } // If we are given load addresses for the sections, we need to adjust: @@ -699,12 +767,15 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, // (Address of Section in File) + // (Load Address of Section) if (L != nullptr && RSec != Obj.section_end()) { - // RSec is now either the section being targetted or the section - // containing the symbol being targetted. In either case, + // RSec is now either the section being targeted or the section + // containing the symbol being targeted. In either case, // we need to perform the same computation. StringRef SecName; RSec->getName(SecName); - SectionLoadAddress = L->getSectionLoadAddress(SecName); +// llvm::dbgs() << "Name: '" << SecName +// << "', RSec: " << RSec->getRawDataRefImpl() +// << ", Section: " << Section.getRawDataRefImpl() << "\n"; + SectionLoadAddress = L->getSectionLoadAddress(*RSec); if (SectionLoadAddress != 0) SymAddr += SectionLoadAddress - RSec->getAddress(); } diff --git a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp index 5abbde4ac0fe..62d5e666aef9 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp @@ -139,7 +139,7 @@ void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS, std::string File; auto Color = syntax::Enumerator; if (attr == DW_AT_decl_file || attr == DW_AT_call_file) { - Color = syntax::String; + Color = syntax::String; if (const auto *LT = u->getContext().getLineTableForUnit(u)) if (LT->getFileNameByIndex( formValue.getAsUnsignedConstant().getValue(), diff --git a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp new file mode 100644 index 000000000000..b825d4d25243 --- /dev/null +++ b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp @@ -0,0 +1,103 @@ +//===-- DWARFDebugMacro.cpp -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SyntaxHighlighting.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace dwarf; +using namespace syntax; + +void DWARFDebugMacro::dump(raw_ostream &OS) const { + unsigned IndLevel = 0; + for (const Entry &E : Macros) { + // There should not be DW_MACINFO_end_file when IndLevel is Zero. However, + // this check handles the case of corrupted ".debug_macinfo" section. + if (IndLevel > 0) + IndLevel -= (E.Type == DW_MACINFO_end_file); + // Print indentation. + for (unsigned I = 0; I < IndLevel; I++) + OS << " "; + IndLevel += (E.Type == DW_MACINFO_start_file); + + WithColor(OS, syntax::Macro).get() << MacinfoString(E.Type); + switch (E.Type) { + default: + // Got a corrupted ".debug_macinfo" section (invalid macinfo type). + break; + case DW_MACINFO_define: + case DW_MACINFO_undef: + OS << " - lineno: " << E.Line; + OS << " macro: " << E.MacroStr; + break; + case DW_MACINFO_start_file: + OS << " - lineno: " << E.Line; + OS << " filenum: " << E.File; + break; + case DW_MACINFO_end_file: + break; + case DW_MACINFO_vendor_ext: + OS << " - constant: " << E.ExtConstant; + OS << " string: " << E.ExtStr; + break; + } + OS << "\n"; + } +} + +void DWARFDebugMacro::parse(DataExtractor data) { + uint32_t Offset = 0; + while (data.isValidOffset(Offset)) { + // A macro list entry consists of: + Entry E; + // 1. Macinfo type + E.Type = data.getULEB128(&Offset); + + if (E.Type == 0) { + // Reached end of ".debug_macinfo" section. + return; + } + + switch (E.Type) { + default: + // Got a corrupted ".debug_macinfo" section (invalid macinfo type). + // Push the corrupted entry to the list and halt parsing. + E.Type = DW_MACINFO_invalid; + Macros.push_back(E); + return; + case DW_MACINFO_define: + case DW_MACINFO_undef: + // 2. Source line + E.Line = data.getULEB128(&Offset); + // 3. Macro string + E.MacroStr = data.getCStr(&Offset); + break; + case DW_MACINFO_start_file: + // 2. Source line + E.Line = data.getULEB128(&Offset); + // 3. Source file id + E.File = data.getULEB128(&Offset); + break; + case DW_MACINFO_end_file: + break; + case DW_MACINFO_vendor_ext: + // 2. Vendor extension constant + E.ExtConstant = data.getULEB128(&Offset); + // 3. Vendor extension string + E.ExtStr = data.getCStr(&Offset); + break; + } + + Macros.push_back(E); + } +} diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp index 53a676efaf3f..3dc58423df68 100644 --- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -18,7 +18,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include -#include +#include using namespace llvm; using namespace dwarf; using namespace syntax; @@ -110,7 +110,7 @@ static const DWARFFormValue::FormClass DWARF4FormClasses[] = { bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { // First, check DWARF4 form classes. - if (Form < ArrayRef(DWARF4FormClasses).size() && + if (Form < makeArrayRef(DWARF4FormClasses).size() && DWARF4FormClasses[Form] == FC) return true; // Check more forms from DWARF4 and DWARF5 proposals. @@ -261,6 +261,12 @@ DWARFFormValue::skipValue(DataExtractor debug_info_data, uint32_t* offset_ptr, bool DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, uint32_t *offset_ptr, const DWARFUnit *cu) { + return skipValue(form, debug_info_data, offset_ptr, cu->getVersion(), + cu->getAddressByteSize()); +} +bool DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, + uint32_t *offset_ptr, uint16_t Version, + uint8_t AddrSize) { bool indirect = false; do { switch (form) { @@ -295,10 +301,10 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, // Compile unit address sized values case DW_FORM_addr: - *offset_ptr += cu->getAddressByteSize(); + *offset_ptr += AddrSize; return true; case DW_FORM_ref_addr: - *offset_ptr += getRefAddrSize(cu->getAddressByteSize(), cu->getVersion()); + *offset_ptr += getRefAddrSize(AddrSize, Version); return true; // 0 byte values - implied from the form. @@ -565,7 +571,7 @@ Optional DWARFFormValue::getAsUnsignedConstant() const { Optional DWARFFormValue::getAsSignedConstant() const { if ((!isFormClass(FC_Constant) && !isFormClass(FC_Flag)) || - (Form == DW_FORM_udata && uint64_t(LLONG_MAX) < Value.uval)) + (Form == DW_FORM_udata && uint64_t(std::numeric_limits::max()) < Value.uval)) return None; switch (Form) { case DW_FORM_data4: @@ -584,6 +590,6 @@ Optional DWARFFormValue::getAsSignedConstant() const { Optional> DWARFFormValue::getAsBlock() const { if (!isFormClass(FC_Block) && !isFormClass(FC_Exprloc)) return None; - return ArrayRef(Value.data, Value.uval); + return makeArrayRef(Value.data, Value.uval); } diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp index 348476d72b60..92ca2d4c3ff0 100644 --- a/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -14,29 +14,37 @@ #include "llvm/Support/Path.h" #include -using namespace llvm; +namespace llvm { using namespace dwarf; void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) { parseImpl(C, Section, C.getDebugAbbrev(), C.getRangeSection(), C.getStringSection(), StringRef(), C.getAddrSection(), - C.isLittleEndian()); + C.getLineSection().Data, C.isLittleEndian()); } void DWARFUnitSectionBase::parseDWO(DWARFContext &C, - const DWARFSection &DWOSection) { + const DWARFSection &DWOSection, + DWARFUnitIndex *Index) { parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), C.getRangeDWOSection(), C.getStringDWOSection(), C.getStringOffsetDWOSection(), - C.getAddrSection(), C.isLittleEndian()); + C.getAddrSection(), C.getLineDWOSection().Data, C.isLittleEndian()); } DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section, const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, bool LE, - const DWARFUnitSectionBase &UnitSection) + StringRef SOS, StringRef AOS, StringRef LS, bool LE, + const DWARFUnitSectionBase &UnitSection, + const DWARFUnitIndex::Entry *IndexEntry) : Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS), - StringSection(SS), StringOffsetSection(SOS), AddrOffsetSection(AOS), - isLittleEndian(LE), UnitSection(UnitSection) { + LineSection(LS), StringSection(SS), StringOffsetSection([&]() { + if (IndexEntry) + if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS)) + return SOS.slice(C->Offset, C->Offset + C->Length); + return SOS; + }()), + AddrOffsetSection(AOS), isLittleEndian(LE), UnitSection(UnitSection), + IndexEntry(IndexEntry) { clear(); } @@ -69,6 +77,17 @@ bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) { Length = debug_info.getU32(offset_ptr); Version = debug_info.getU16(offset_ptr); uint64_t AbbrOffset = debug_info.getU32(offset_ptr); + if (IndexEntry) { + if (AbbrOffset) + return false; + auto *UnitContrib = IndexEntry->getOffset(); + if (!UnitContrib || UnitContrib->Length != (Length + 4)) + return false; + auto *AbbrEntry = IndexEntry->getOffset(DW_SECT_ABBREV); + if (!AbbrEntry) + return false; + AbbrOffset = AbbrEntry->Offset; + } AddrSize = debug_info.getU8(offset_ptr); bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1); @@ -375,3 +394,12 @@ DWARFUnit::getInlinedChainForAddress(uint64_t Address) { return DWARFDebugInfoEntryInlinedChain(); return SubprogramDIE->getInlinedChainForAddress(ChainCU, Address); } + +const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context, + DWARFSectionKind Kind) { + if (Kind == DW_SECT_INFO) + return Context.getCUIndex(); + assert(Kind == DW_SECT_TYPES); + return Context.getTUIndex(); +} +} diff --git a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp new file mode 100644 index 000000000000..96b316957dfd --- /dev/null +++ b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp @@ -0,0 +1,168 @@ +//===-- DWARFUnitIndex.cpp ------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +bool DWARFUnitIndex::Header::parse(DataExtractor IndexData, + uint32_t *OffsetPtr) { + if (!IndexData.isValidOffsetForDataOfSize(*OffsetPtr, 16)) + return false; + Version = IndexData.getU32(OffsetPtr); + NumColumns = IndexData.getU32(OffsetPtr); + NumUnits = IndexData.getU32(OffsetPtr); + NumBuckets = IndexData.getU32(OffsetPtr); + return Version <= 2; +} + +void DWARFUnitIndex::Header::dump(raw_ostream &OS) const { + OS << format("version = %u slots = %u\n\n", Version, NumBuckets); +} + +bool DWARFUnitIndex::parse(DataExtractor IndexData) { + bool b = parseImpl(IndexData); + if (!b) { + // Make sure we don't try to dump anything + Header.NumBuckets = 0; + // Release any partially initialized data. + ColumnKinds.reset(); + Rows.reset(); + } + return b; +} + +bool DWARFUnitIndex::parseImpl(DataExtractor IndexData) { + uint32_t Offset = 0; + if (!Header.parse(IndexData, &Offset)) + return false; + + if (!IndexData.isValidOffsetForDataOfSize( + Offset, Header.NumBuckets * (8 + 4) + + (2 * Header.NumUnits + 1) * 4 * Header.NumColumns)) + return false; + + Rows = llvm::make_unique(Header.NumBuckets); + auto Contribs = + llvm::make_unique(Header.NumUnits); + ColumnKinds = llvm::make_unique(Header.NumColumns); + + // Read Hash Table of Signatures + for (unsigned i = 0; i != Header.NumBuckets; ++i) + Rows[i].Signature = IndexData.getU64(&Offset); + + // Read Parallel Table of Indexes + for (unsigned i = 0; i != Header.NumBuckets; ++i) { + auto Index = IndexData.getU32(&Offset); + if (!Index) + continue; + Rows[i].Index = this; + Rows[i].Contributions = + llvm::make_unique(Header.NumColumns); + Contribs[Index - 1] = Rows[i].Contributions.get(); + } + + // Read the Column Headers + for (unsigned i = 0; i != Header.NumColumns; ++i) { + ColumnKinds[i] = static_cast(IndexData.getU32(&Offset)); + if (ColumnKinds[i] == InfoColumnKind) { + if (InfoColumn != -1) + return false; + InfoColumn = i; + } + } + + if (InfoColumn == -1) + return false; + + // Read Table of Section Offsets + for (unsigned i = 0; i != Header.NumUnits; ++i) { + auto *Contrib = Contribs[i]; + for (unsigned i = 0; i != Header.NumColumns; ++i) + Contrib[i].Offset = IndexData.getU32(&Offset); + } + + // Read Table of Section Sizes + for (unsigned i = 0; i != Header.NumUnits; ++i) { + auto *Contrib = Contribs[i]; + for (unsigned i = 0; i != Header.NumColumns; ++i) + Contrib[i].Length = IndexData.getU32(&Offset); + } + + return true; +} + +StringRef DWARFUnitIndex::getColumnHeader(DWARFSectionKind DS) { +#define CASE(DS) \ + case DW_SECT_##DS: \ + return #DS; + switch (DS) { + CASE(INFO); + CASE(TYPES); + CASE(ABBREV); + CASE(LINE); + CASE(LOC); + CASE(STR_OFFSETS); + CASE(MACINFO); + CASE(MACRO); + } + llvm_unreachable("unknown DWARFSectionKind"); +} + +void DWARFUnitIndex::dump(raw_ostream &OS) const { + if (!Header.NumBuckets) + return; + + Header.dump(OS); + OS << "Index Signature "; + for (unsigned i = 0; i != Header.NumColumns; ++i) + OS << ' ' << left_justify(getColumnHeader(ColumnKinds[i]), 24); + OS << "\n----- ------------------"; + for (unsigned i = 0; i != Header.NumColumns; ++i) + OS << " ------------------------"; + OS << '\n'; + for (unsigned i = 0; i != Header.NumBuckets; ++i) { + auto &Row = Rows[i]; + if (auto *Contribs = Row.Contributions.get()) { + OS << format("%5u 0x%016" PRIx64 " ", i + 1, Row.Signature); + for (unsigned i = 0; i != Header.NumColumns; ++i) { + auto &Contrib = Contribs[i]; + OS << format("[0x%08x, 0x%08x) ", Contrib.Offset, + Contrib.Offset + Contrib.Length); + } + OS << '\n'; + } + } +} + +const DWARFUnitIndex::Entry::SectionContribution * +DWARFUnitIndex::Entry::getOffset(DWARFSectionKind Sec) const { + uint32_t i = 0; + for (; i != Index->Header.NumColumns; ++i) + if (Index->ColumnKinds[i] == Sec) + return &Contributions[i]; + return nullptr; +} +const DWARFUnitIndex::Entry::SectionContribution * +DWARFUnitIndex::Entry::getOffset() const { + return &Contributions[Index->InfoColumn]; +} + +const DWARFUnitIndex::Entry * +DWARFUnitIndex::getFromOffset(uint32_t Offset) const { + for (uint32_t i = 0; i != Header.NumBuckets; ++i) + if (const auto &Contribs = Rows[i].Contributions) + if (Contribs[InfoColumn].Offset == Offset) + return &Rows[i]; + return nullptr; +} +} diff --git a/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp b/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp index a6b4c6549ca4..4f561d062b12 100644 --- a/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp +++ b/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp @@ -27,6 +27,7 @@ WithColor::WithColor(llvm::raw_ostream &OS, enum HighlightColor Type) : OS(OS) { case Tag: OS.changeColor(llvm::raw_ostream::BLUE); break; case Attribute: OS.changeColor(llvm::raw_ostream::CYAN); break; case Enumerator: OS.changeColor(llvm::raw_ostream::MAGENTA); break; + case Macro: OS.changeColor(llvm::raw_ostream::RED); break; } } } diff --git a/lib/DebugInfo/DWARF/SyntaxHighlighting.h b/lib/DebugInfo/DWARF/SyntaxHighlighting.h index 946a31308aa1..16e68351d5e1 100644 --- a/lib/DebugInfo/DWARF/SyntaxHighlighting.h +++ b/lib/DebugInfo/DWARF/SyntaxHighlighting.h @@ -17,7 +17,7 @@ namespace dwarf { namespace syntax { // Symbolic names for various syntax elements. -enum HighlightColor { Address, String, Tag, Attribute, Enumerator }; +enum HighlightColor { Address, String, Tag, Attribute, Enumerator, Macro }; /// An RAII object that temporarily switches an output stream to a /// specific color. diff --git a/lib/DebugInfo/LLVMBuild.txt b/lib/DebugInfo/LLVMBuild.txt index 7a8e8baec2c7..23a5a3db5628 100644 --- a/lib/DebugInfo/LLVMBuild.txt +++ b/lib/DebugInfo/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = DWARF PDB +subdirectories = CodeView DWARF PDB Symbolize [component_0] type = Group diff --git a/lib/DebugInfo/Makefile b/lib/DebugInfo/Makefile index 27a5e1f0f496..6072af314416 100644 --- a/lib/DebugInfo/Makefile +++ b/lib/DebugInfo/Makefile @@ -10,6 +10,6 @@ LEVEL = ../.. include $(LEVEL)/Makefile.config -PARALLEL_DIRS := DWARF PDB +PARALLEL_DIRS := CodeView DWARF PDB Symbolize -include $(LEVEL)/Makefile.common \ No newline at end of file +include $(LEVEL)/Makefile.common diff --git a/lib/DebugInfo/PDB/PDB.cpp b/lib/DebugInfo/PDB/PDB.cpp index 13201bbaa641..613407eb1346 100644 --- a/lib/DebugInfo/PDB/PDB.cpp +++ b/lib/DebugInfo/PDB/PDB.cpp @@ -31,7 +31,7 @@ PDB_ErrorCode llvm::loadDataForPDB(PDB_ReaderType Type, StringRef Path, PDB_ErrorCode llvm::loadDataForEXE(PDB_ReaderType Type, StringRef Path, std::unique_ptr &Session) { -// Create the correct concrete instance type based on the value of Type. + // Create the correct concrete instance type based on the value of Type. #if HAVE_DIA_SDK return DIASession::createFromExe(Path, Session); #endif diff --git a/lib/DebugInfo/PDB/PDBContext.cpp b/lib/DebugInfo/PDB/PDBContext.cpp index 83f27c7fa3d9..ca2ae6665ce8 100644 --- a/lib/DebugInfo/PDB/PDBContext.cpp +++ b/lib/DebugInfo/PDB/PDBContext.cpp @@ -21,24 +21,11 @@ using namespace llvm; using namespace llvm::object; PDBContext::PDBContext(const COFFObjectFile &Object, - std::unique_ptr PDBSession, - bool RelativeAddress) + std::unique_ptr PDBSession) : DIContext(CK_PDB), Session(std::move(PDBSession)) { - if (!RelativeAddress) { - uint64_t ImageBase = 0; - if (Object.is64()) { - const pe32plus_header *Header = nullptr; - Object.getPE32PlusHeader(Header); - if (Header) - ImageBase = Header->ImageBase; - } else { - const pe32_header *Header = nullptr; - Object.getPE32Header(Header); - if (Header) - ImageBase = static_cast(Header->ImageBase); - } - Session->setLoadAddress(ImageBase); - } + ErrorOr ImageBase = Object.getImageBase(); + if (ImageBase) + Session->setLoadAddress(ImageBase.get()); } void PDBContext::dump(raw_ostream &OS, DIDumpType DumpType) {} diff --git a/lib/DebugInfo/Symbolize/CMakeLists.txt b/lib/DebugInfo/Symbolize/CMakeLists.txt new file mode 100644 index 000000000000..fe5c4bfc4321 --- /dev/null +++ b/lib/DebugInfo/Symbolize/CMakeLists.txt @@ -0,0 +1,8 @@ +add_llvm_library(LLVMSymbolize + DIPrinter.cpp + SymbolizableObjectFile.cpp + Symbolize.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/Symbolize + ) diff --git a/lib/DebugInfo/Symbolize/DIPrinter.cpp b/lib/DebugInfo/Symbolize/DIPrinter.cpp new file mode 100644 index 000000000000..c6bfbc07dcf3 --- /dev/null +++ b/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -0,0 +1,69 @@ +//===- lib/DebugInfo/Symbolize/DIPrinter.cpp ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the DIPrinter class, which is responsible for printing +// structures defined in DebugInfo/DIContext.h +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/Symbolize/DIPrinter.h" + +#include "llvm/DebugInfo/DIContext.h" + +namespace llvm { +namespace symbolize { + +// By default, DILineInfo contains "" for function/filename it +// cannot fetch. We replace it to "??" to make our output closer to addr2line. +static const char kDILineInfoBadString[] = ""; +static const char kBadString[] = "??"; + +void DIPrinter::printName(const DILineInfo &Info, bool Inlined) { + if (PrintFunctionNames) { + std::string FunctionName = Info.FunctionName; + if (FunctionName == kDILineInfoBadString) + FunctionName = kBadString; + + StringRef Delimiter = (PrintPretty == true) ? " at " : "\n"; + StringRef Prefix = (PrintPretty && Inlined) ? " (inlined by) " : ""; + OS << Prefix << FunctionName << Delimiter; + } + std::string Filename = Info.FileName; + if (Filename == kDILineInfoBadString) + Filename = kBadString; + OS << Filename << ":" << Info.Line << ":" << Info.Column << "\n"; +} + +DIPrinter &DIPrinter::operator<<(const DILineInfo &Info) { + printName(Info, false); + return *this; +} + +DIPrinter &DIPrinter::operator<<(const DIInliningInfo &Info) { + uint32_t FramesNum = Info.getNumberOfFrames(); + if (FramesNum == 0) { + printName(DILineInfo(), false); + return *this; + } + for (uint32_t i = 0; i < FramesNum; i++) + printName(Info.getFrame(i), i > 0); + return *this; +} + +DIPrinter &DIPrinter::operator<<(const DIGlobal &Global) { + std::string Name = Global.Name; + if (Name == kDILineInfoBadString) + Name = kBadString; + OS << Name << "\n"; + OS << Global.Start << " " << Global.Size << "\n"; + return *this; +} + +} +} diff --git a/lib/DebugInfo/Symbolize/LLVMBuild.txt b/lib/DebugInfo/Symbolize/LLVMBuild.txt new file mode 100644 index 000000000000..f9ec6b32f6d9 --- /dev/null +++ b/lib/DebugInfo/Symbolize/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/DebugInfo/Symbolize/LLVMBuild.txt ------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Symbolize +parent = DebugInfo +required_libraries = DebugInfoDWARF DebugInfoPDB Object Support diff --git a/lib/Analysis/IPA/Makefile b/lib/DebugInfo/Symbolize/Makefile similarity index 74% rename from lib/Analysis/IPA/Makefile rename to lib/DebugInfo/Symbolize/Makefile index b850c9ff7f44..17aac9396585 100644 --- a/lib/Analysis/IPA/Makefile +++ b/lib/DebugInfo/Symbolize/Makefile @@ -1,4 +1,4 @@ -##===- lib/Analysis/IPA/Makefile ---------------------------*- Makefile -*-===## +##===- lib/DebugInfo/Symbolize/Makefile --------------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # @@ -8,8 +8,8 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../.. -LIBRARYNAME = LLVMipa -BUILD_ARCHIVE = 1 +LIBRARYNAME = LLVMSymbolize +BUILD_ARCHIVE := 1 include $(LEVEL)/Makefile.common diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp new file mode 100644 index 000000000000..e31462459844 --- /dev/null +++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -0,0 +1,254 @@ +//===-- SymbolizableObjectFile.cpp ----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of SymbolizableObjectFile class. +// +//===----------------------------------------------------------------------===// + +#include "SymbolizableObjectFile.h" +#include "llvm/Object/SymbolSize.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" + +namespace llvm { +namespace symbolize { + +using namespace object; + +static DILineInfoSpecifier +getDILineInfoSpecifier(FunctionNameKind FNKind) { + return DILineInfoSpecifier( + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FNKind); +} + +ErrorOr> +SymbolizableObjectFile::create(object::ObjectFile *Obj, + std::unique_ptr DICtx) { + std::unique_ptr res( + new SymbolizableObjectFile(Obj, std::move(DICtx))); + std::unique_ptr OpdExtractor; + uint64_t OpdAddress = 0; + // Find the .opd (function descriptor) section if any, for big-endian + // PowerPC64 ELF. + if (Obj->getArch() == Triple::ppc64) { + for (section_iterator Section : Obj->sections()) { + StringRef Name; + StringRef Data; + if (auto EC = Section->getName(Name)) + return EC; + if (Name == ".opd") { + if (auto EC = Section->getContents(Data)) + return EC; + OpdExtractor.reset(new DataExtractor(Data, Obj->isLittleEndian(), + Obj->getBytesInAddress())); + OpdAddress = Section->getAddress(); + break; + } + } + } + std::vector> Symbols = + computeSymbolSizes(*Obj); + for (auto &P : Symbols) + res->addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress); + + // If this is a COFF object and we didn't find any symbols, try the export + // table. + if (Symbols.empty()) { + if (auto *CoffObj = dyn_cast(Obj)) + if (auto EC = res->addCoffExportSymbols(CoffObj)) + return EC; + } + return std::move(res); +} + +SymbolizableObjectFile::SymbolizableObjectFile(ObjectFile *Obj, + std::unique_ptr DICtx) + : Module(Obj), DebugInfoContext(std::move(DICtx)) {} + +namespace { +struct OffsetNamePair { + uint32_t Offset; + StringRef Name; + bool operator<(const OffsetNamePair &R) const { + return Offset < R.Offset; + } +}; +} + +std::error_code SymbolizableObjectFile::addCoffExportSymbols( + const COFFObjectFile *CoffObj) { + // Get all export names and offsets. + std::vector ExportSyms; + for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) { + StringRef Name; + uint32_t Offset; + if (auto EC = Ref.getSymbolName(Name)) + return EC; + if (auto EC = Ref.getExportRVA(Offset)) + return EC; + ExportSyms.push_back(OffsetNamePair{Offset, Name}); + } + if (ExportSyms.empty()) + return std::error_code(); + + // Sort by ascending offset. + array_pod_sort(ExportSyms.begin(), ExportSyms.end()); + + // Approximate the symbol sizes by assuming they run to the next symbol. + // FIXME: This assumes all exports are functions. + uint64_t ImageBase = CoffObj->getImageBase(); + for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) { + OffsetNamePair &Export = *I; + // FIXME: The last export has a one byte size now. + uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1; + uint64_t SymbolStart = ImageBase + Export.Offset; + uint64_t SymbolSize = NextOffset - Export.Offset; + SymbolDesc SD = {SymbolStart, SymbolSize}; + Functions.insert(std::make_pair(SD, Export.Name)); + } + return std::error_code(); +} + +std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol, + uint64_t SymbolSize, + DataExtractor *OpdExtractor, + uint64_t OpdAddress) { + SymbolRef::Type SymbolType = Symbol.getType(); + if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data) + return std::error_code(); + ErrorOr SymbolAddressOrErr = Symbol.getAddress(); + if (auto EC = SymbolAddressOrErr.getError()) + return EC; + uint64_t SymbolAddress = *SymbolAddressOrErr; + if (OpdExtractor) { + // For big-endian PowerPC64 ELF, symbols in the .opd section refer to + // function descriptors. The first word of the descriptor is a pointer to + // the function's code. + // For the purposes of symbolization, pretend the symbol's address is that + // of the function's code, not the descriptor. + uint64_t OpdOffset = SymbolAddress - OpdAddress; + uint32_t OpdOffset32 = OpdOffset; + if (OpdOffset == OpdOffset32 && + OpdExtractor->isValidOffsetForAddress(OpdOffset32)) + SymbolAddress = OpdExtractor->getAddress(&OpdOffset32); + } + ErrorOr SymbolNameOrErr = Symbol.getName(); + if (auto EC = SymbolNameOrErr.getError()) + return EC; + StringRef SymbolName = *SymbolNameOrErr; + // Mach-O symbol table names have leading underscore, skip it. + if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_') + SymbolName = SymbolName.drop_front(); + // FIXME: If a function has alias, there are two entries in symbol table + // with same address size. Make sure we choose the correct one. + auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects; + SymbolDesc SD = { SymbolAddress, SymbolSize }; + M.insert(std::make_pair(SD, SymbolName)); + return std::error_code(); +} + +// Return true if this is a 32-bit x86 PE COFF module. +bool SymbolizableObjectFile::isWin32Module() const { + auto *CoffObject = dyn_cast(Module); + return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386; +} + +uint64_t SymbolizableObjectFile::getModulePreferredBase() const { + if (auto *CoffObject = dyn_cast(Module)) + return CoffObject->getImageBase(); + return 0; +} + +bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type, + uint64_t Address, + std::string &Name, + uint64_t &Addr, + uint64_t &Size) const { + const auto &SymbolMap = Type == SymbolRef::ST_Function ? Functions : Objects; + if (SymbolMap.empty()) + return false; + SymbolDesc SD = { Address, Address }; + auto SymbolIterator = SymbolMap.upper_bound(SD); + if (SymbolIterator == SymbolMap.begin()) + return false; + --SymbolIterator; + if (SymbolIterator->first.Size != 0 && + SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address) + return false; + Name = SymbolIterator->second.str(); + Addr = SymbolIterator->first.Addr; + Size = SymbolIterator->first.Size; + return true; +} + +bool SymbolizableObjectFile::shouldOverrideWithSymbolTable( + FunctionNameKind FNKind, bool UseSymbolTable) const { + // When DWARF is used with -gline-tables-only / -gmlt, the symbol table gives + // better answers for linkage names than the DIContext. Otherwise, we are + // probably using PEs and PDBs, and we shouldn't do the override. PE files + // generally only contain the names of exported symbols. + return FNKind == FunctionNameKind::LinkageName && UseSymbolTable && + isa(DebugInfoContext.get()); +} + +DILineInfo SymbolizableObjectFile::symbolizeCode(uint64_t ModuleOffset, + FunctionNameKind FNKind, + bool UseSymbolTable) const { + DILineInfo LineInfo; + if (DebugInfoContext) { + LineInfo = DebugInfoContext->getLineInfoForAddress( + ModuleOffset, getDILineInfoSpecifier(FNKind)); + } + // Override function name from symbol table if necessary. + if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) { + std::string FunctionName; + uint64_t Start, Size; + if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, + FunctionName, Start, Size)) { + LineInfo.FunctionName = FunctionName; + } + } + return LineInfo; +} + +DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode( + uint64_t ModuleOffset, FunctionNameKind FNKind, bool UseSymbolTable) const { + DIInliningInfo InlinedContext; + + if (DebugInfoContext) + InlinedContext = DebugInfoContext->getInliningInfoForAddress( + ModuleOffset, getDILineInfoSpecifier(FNKind)); + // Make sure there is at least one frame in context. + if (InlinedContext.getNumberOfFrames() == 0) + InlinedContext.addFrame(DILineInfo()); + + // Override the function name in lower frame with name from symbol table. + if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) { + std::string FunctionName; + uint64_t Start, Size; + if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, + FunctionName, Start, Size)) { + InlinedContext.getMutableFrame(InlinedContext.getNumberOfFrames() - 1) + ->FunctionName = FunctionName; + } + } + + return InlinedContext; +} + +DIGlobal SymbolizableObjectFile::symbolizeData(uint64_t ModuleOffset) const { + DIGlobal Res; + getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Res.Name, Res.Start, + Res.Size); + return Res; +} + +} // namespace symbolize +} // namespace llvm + diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h new file mode 100644 index 000000000000..8583b6a36e63 --- /dev/null +++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h @@ -0,0 +1,82 @@ +//===-- SymbolizableObjectFile.h -------------------------------- C++ -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SymbolizableObjectFile class. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H +#define LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H + +#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" +#include + +namespace llvm { +class DataExtractor; +} + +namespace llvm { +namespace symbolize { + +class SymbolizableObjectFile : public SymbolizableModule { +public: + static ErrorOr> + create(object::ObjectFile *Obj, std::unique_ptr DICtx); + + DILineInfo symbolizeCode(uint64_t ModuleOffset, FunctionNameKind FNKind, + bool UseSymbolTable) const override; + DIInliningInfo symbolizeInlinedCode(uint64_t ModuleOffset, + FunctionNameKind FNKind, + bool UseSymbolTable) const override; + DIGlobal symbolizeData(uint64_t ModuleOffset) const override; + + // Return true if this is a 32-bit x86 PE COFF module. + bool isWin32Module() const override; + + // Returns the preferred base of the module, i.e. where the loader would place + // it in memory assuming there were no conflicts. + uint64_t getModulePreferredBase() const override; + +private: + bool shouldOverrideWithSymbolTable(FunctionNameKind FNKind, + bool UseSymbolTable) const; + + bool getNameFromSymbolTable(object::SymbolRef::Type Type, uint64_t Address, + std::string &Name, uint64_t &Addr, + uint64_t &Size) const; + // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd + // (function descriptor) section and OpdExtractor refers to its contents. + std::error_code addSymbol(const object::SymbolRef &Symbol, + uint64_t SymbolSize, + DataExtractor *OpdExtractor = nullptr, + uint64_t OpdAddress = 0); + std::error_code addCoffExportSymbols(const object::COFFObjectFile *CoffObj); + + object::ObjectFile *Module; + std::unique_ptr DebugInfoContext; + + struct SymbolDesc { + uint64_t Addr; + // If size is 0, assume that symbol occupies the whole memory range up to + // the following symbol. + uint64_t Size; + friend bool operator<(const SymbolDesc &s1, const SymbolDesc &s2) { + return s1.Addr < s2.Addr; + } + }; + std::map Functions; + std::map Objects; + + SymbolizableObjectFile(object::ObjectFile *Obj, + std::unique_ptr DICtx); +}; + +} // namespace symbolize +} // namespace llvm + +#endif // LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H diff --git a/lib/DebugInfo/Symbolize/Symbolize.cpp b/lib/DebugInfo/Symbolize/Symbolize.cpp new file mode 100644 index 000000000000..3da1963bb791 --- /dev/null +++ b/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -0,0 +1,456 @@ +//===-- LLVMSymbolize.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation for LLVM symbolization library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/Symbolize/Symbolize.h" + +#include "SymbolizableObjectFile.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Config/config.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/PDB/PDB.h" +#include "llvm/DebugInfo/PDB/PDBContext.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include + +#if defined(_MSC_VER) +#include +#include +#pragma comment(lib, "dbghelp.lib") + +// Windows.h conflicts with our COFF header definitions. +#ifdef IMAGE_FILE_MACHINE_I386 +#undef IMAGE_FILE_MACHINE_I386 +#endif +#endif + +namespace llvm { +namespace symbolize { + +ErrorOr LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, + uint64_t ModuleOffset) { + auto InfoOrErr = getOrCreateModuleInfo(ModuleName); + if (auto EC = InfoOrErr.getError()) + return EC; + SymbolizableModule *Info = InfoOrErr.get(); + + // If the user is giving us relative addresses, add the preferred base of the + // object to the offset before we do the query. It's what DIContext expects. + if (Opts.RelativeAddresses) + ModuleOffset += Info->getModulePreferredBase(); + + DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts.PrintFunctions, + Opts.UseSymbolTable); + if (Opts.Demangle) + LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); + return LineInfo; +} + +ErrorOr +LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName, + uint64_t ModuleOffset) { + auto InfoOrErr = getOrCreateModuleInfo(ModuleName); + if (auto EC = InfoOrErr.getError()) + return EC; + SymbolizableModule *Info = InfoOrErr.get(); + + // If the user is giving us relative addresses, add the preferred base of the + // object to the offset before we do the query. It's what DIContext expects. + if (Opts.RelativeAddresses) + ModuleOffset += Info->getModulePreferredBase(); + + DIInliningInfo InlinedContext = Info->symbolizeInlinedCode( + ModuleOffset, Opts.PrintFunctions, Opts.UseSymbolTable); + if (Opts.Demangle) { + for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { + auto *Frame = InlinedContext.getMutableFrame(i); + Frame->FunctionName = DemangleName(Frame->FunctionName, Info); + } + } + return InlinedContext; +} + +ErrorOr LLVMSymbolizer::symbolizeData(const std::string &ModuleName, + uint64_t ModuleOffset) { + auto InfoOrErr = getOrCreateModuleInfo(ModuleName); + if (auto EC = InfoOrErr.getError()) + return EC; + SymbolizableModule *Info = InfoOrErr.get(); + + // If the user is giving us relative addresses, add the preferred base of + // the object to the offset before we do the query. It's what DIContext + // expects. + if (Opts.RelativeAddresses) + ModuleOffset += Info->getModulePreferredBase(); + + DIGlobal Global = Info->symbolizeData(ModuleOffset); + if (Opts.Demangle) + Global.Name = DemangleName(Global.Name, Info); + return Global; +} + +void LLVMSymbolizer::flush() { + ObjectForUBPathAndArch.clear(); + BinaryForPath.clear(); + ObjectPairForPathArch.clear(); + Modules.clear(); +} + +// For Path="/path/to/foo" and Basename="foo" assume that debug info is in +// /path/to/foo.dSYM/Contents/Resources/DWARF/foo. +// For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in +// /path/to/bar.dSYM/Contents/Resources/DWARF/foo. +static +std::string getDarwinDWARFResourceForPath( + const std::string &Path, const std::string &Basename) { + SmallString<16> ResourceName = StringRef(Path); + if (sys::path::extension(Path) != ".dSYM") { + ResourceName += ".dSYM"; + } + sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); + sys::path::append(ResourceName, Basename); + return ResourceName.str(); +} + +static bool checkFileCRC(StringRef Path, uint32_t CRCHash) { + ErrorOr> MB = + MemoryBuffer::getFileOrSTDIN(Path); + if (!MB) + return false; + return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer()); +} + +static bool findDebugBinary(const std::string &OrigPath, + const std::string &DebuglinkName, uint32_t CRCHash, + std::string &Result) { + std::string OrigRealPath = OrigPath; +#if defined(HAVE_REALPATH) + if (char *RP = realpath(OrigPath.c_str(), nullptr)) { + OrigRealPath = RP; + free(RP); + } +#endif + SmallString<16> OrigDir(OrigRealPath); + llvm::sys::path::remove_filename(OrigDir); + SmallString<16> DebugPath = OrigDir; + // Try /path/to/original_binary/debuglink_name + llvm::sys::path::append(DebugPath, DebuglinkName); + if (checkFileCRC(DebugPath, CRCHash)) { + Result = DebugPath.str(); + return true; + } + // Try /path/to/original_binary/.debug/debuglink_name + DebugPath = OrigRealPath; + llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); + if (checkFileCRC(DebugPath, CRCHash)) { + Result = DebugPath.str(); + return true; + } + // Try /usr/lib/debug/path/to/original_binary/debuglink_name + DebugPath = "/usr/lib/debug"; + llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), + DebuglinkName); + if (checkFileCRC(DebugPath, CRCHash)) { + Result = DebugPath.str(); + return true; + } + return false; +} + +static bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, + uint32_t &CRCHash) { + if (!Obj) + return false; + for (const SectionRef &Section : Obj->sections()) { + StringRef Name; + Section.getName(Name); + Name = Name.substr(Name.find_first_not_of("._")); + if (Name == "gnu_debuglink") { + StringRef Data; + Section.getContents(Data); + DataExtractor DE(Data, Obj->isLittleEndian(), 0); + uint32_t Offset = 0; + if (const char *DebugNameStr = DE.getCStr(&Offset)) { + // 4-byte align the offset. + Offset = (Offset + 3) & ~0x3; + if (DE.isValidOffsetForDataOfSize(Offset, 4)) { + DebugName = DebugNameStr; + CRCHash = DE.getU32(&Offset); + return true; + } + } + break; + } + } + return false; +} + +static +bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, + const MachOObjectFile *Obj) { + ArrayRef dbg_uuid = DbgObj->getUuid(); + ArrayRef bin_uuid = Obj->getUuid(); + if (dbg_uuid.empty() || bin_uuid.empty()) + return false; + return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size()); +} + +ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, + const MachOObjectFile *MachExeObj, const std::string &ArchName) { + // On Darwin we may find DWARF in separate object file in + // resource directory. + std::vector DsymPaths; + StringRef Filename = sys::path::filename(ExePath); + DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename)); + for (const auto &Path : Opts.DsymHints) { + DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename)); + } + for (const auto &Path : DsymPaths) { + auto DbgObjOrErr = getOrCreateObject(Path, ArchName); + if (!DbgObjOrErr) + continue; + ObjectFile *DbgObj = DbgObjOrErr.get(); + const MachOObjectFile *MachDbgObj = dyn_cast(DbgObj); + if (!MachDbgObj) + continue; + if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) + return DbgObj; + } + return nullptr; +} + +ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path, + const ObjectFile *Obj, + const std::string &ArchName) { + std::string DebuglinkName; + uint32_t CRCHash; + std::string DebugBinaryPath; + if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash)) + return nullptr; + if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) + return nullptr; + auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); + if (!DbgObjOrErr) + return nullptr; + return DbgObjOrErr.get(); +} + +ErrorOr +LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, + const std::string &ArchName) { + const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName)); + if (I != ObjectPairForPathArch.end()) + return I->second; + + auto ObjOrErr = getOrCreateObject(Path, ArchName); + if (auto EC = ObjOrErr.getError()) { + ObjectPairForPathArch.insert( + std::make_pair(std::make_pair(Path, ArchName), EC)); + return EC; + } + + ObjectFile *Obj = ObjOrErr.get(); + assert(Obj != nullptr); + ObjectFile *DbgObj = nullptr; + + if (auto MachObj = dyn_cast(Obj)) + DbgObj = lookUpDsymFile(Path, MachObj, ArchName); + if (!DbgObj) + DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName); + if (!DbgObj) + DbgObj = Obj; + ObjectPair Res = std::make_pair(Obj, DbgObj); + ObjectPairForPathArch.insert( + std::make_pair(std::make_pair(Path, ArchName), Res)); + return Res; +} + +ErrorOr +LLVMSymbolizer::getOrCreateObject(const std::string &Path, + const std::string &ArchName) { + const auto &I = BinaryForPath.find(Path); + Binary *Bin = nullptr; + if (I == BinaryForPath.end()) { + ErrorOr> BinOrErr = createBinary(Path); + if (auto EC = BinOrErr.getError()) { + BinaryForPath.insert(std::make_pair(Path, EC)); + return EC; + } + Bin = BinOrErr->getBinary(); + BinaryForPath.insert(std::make_pair(Path, std::move(BinOrErr.get()))); + } else if (auto EC = I->second.getError()) { + return EC; + } else { + Bin = I->second->getBinary(); + } + + assert(Bin != nullptr); + + if (MachOUniversalBinary *UB = dyn_cast(Bin)) { + const auto &I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName)); + if (I != ObjectForUBPathAndArch.end()) { + if (auto EC = I->second.getError()) + return EC; + return I->second->get(); + } + ErrorOr> ObjOrErr = + UB->getObjectForArch(ArchName); + if (auto EC = ObjOrErr.getError()) { + ObjectForUBPathAndArch.insert( + std::make_pair(std::make_pair(Path, ArchName), EC)); + return EC; + } + ObjectFile *Res = ObjOrErr->get(); + ObjectForUBPathAndArch.insert(std::make_pair(std::make_pair(Path, ArchName), + std::move(ObjOrErr.get()))); + return Res; + } + if (Bin->isObject()) { + return cast(Bin); + } + return object_error::arch_not_found; +} + +ErrorOr +LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { + const auto &I = Modules.find(ModuleName); + if (I != Modules.end()) { + auto &InfoOrErr = I->second; + if (auto EC = InfoOrErr.getError()) + return EC; + return InfoOrErr->get(); + } + std::string BinaryName = ModuleName; + std::string ArchName = Opts.DefaultArch; + size_t ColonPos = ModuleName.find_last_of(':'); + // Verify that substring after colon form a valid arch name. + if (ColonPos != std::string::npos) { + std::string ArchStr = ModuleName.substr(ColonPos + 1); + if (Triple(ArchStr).getArch() != Triple::UnknownArch) { + BinaryName = ModuleName.substr(0, ColonPos); + ArchName = ArchStr; + } + } + auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName); + if (auto EC = ObjectsOrErr.getError()) { + // Failed to find valid object file. + Modules.insert(std::make_pair(ModuleName, EC)); + return EC; + } + ObjectPair Objects = ObjectsOrErr.get(); + + std::unique_ptr Context; + if (auto CoffObject = dyn_cast(Objects.first)) { + // If this is a COFF object, assume it contains PDB debug information. If + // we don't find any we will fall back to the DWARF case. + std::unique_ptr Session; + PDB_ErrorCode Error = loadDataForEXE(PDB_ReaderType::DIA, + Objects.first->getFileName(), Session); + if (Error == PDB_ErrorCode::Success) { + Context.reset(new PDBContext(*CoffObject, std::move(Session))); + } + } + if (!Context) + Context.reset(new DWARFContextInMemory(*Objects.second)); + assert(Context); + auto InfoOrErr = + SymbolizableObjectFile::create(Objects.first, std::move(Context)); + auto InsertResult = + Modules.insert(std::make_pair(ModuleName, std::move(InfoOrErr))); + assert(InsertResult.second); + if (auto EC = InsertResult.first->second.getError()) + return EC; + return InsertResult.first->second->get(); +} + +// Undo these various manglings for Win32 extern "C" functions: +// cdecl - _foo +// stdcall - _foo@12 +// fastcall - @foo@12 +// vectorcall - foo@@12 +// These are all different linkage names for 'foo'. +static StringRef demanglePE32ExternCFunc(StringRef SymbolName) { + // Remove any '_' or '@' prefix. + char Front = SymbolName.empty() ? '\0' : SymbolName[0]; + if (Front == '_' || Front == '@') + SymbolName = SymbolName.drop_front(); + + // Remove any '@[0-9]+' suffix. + if (Front != '?') { + size_t AtPos = SymbolName.rfind('@'); + if (AtPos != StringRef::npos && + std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(), + [](char C) { return C >= '0' && C <= '9'; })) { + SymbolName = SymbolName.substr(0, AtPos); + } + } + + // Remove any ending '@' for vectorcall. + if (SymbolName.endswith("@")) + SymbolName = SymbolName.drop_back(); + + return SymbolName; +} + +#if !defined(_MSC_VER) +// Assume that __cxa_demangle is provided by libcxxabi (except for Windows). +extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer, + size_t *length, int *status); +#endif + +std::string LLVMSymbolizer::DemangleName(const std::string &Name, + const SymbolizableModule *ModInfo) { +#if !defined(_MSC_VER) + // We can spoil names of symbols with C linkage, so use an heuristic + // approach to check if the name should be demangled. + if (Name.substr(0, 2) == "_Z") { + int status = 0; + char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status); + if (status != 0) + return Name; + std::string Result = DemangledName; + free(DemangledName); + return Result; + } +#else + if (!Name.empty() && Name.front() == '?') { + // Only do MSVC C++ demangling on symbols starting with '?'. + char DemangledName[1024] = {0}; + DWORD result = ::UnDecorateSymbolName( + Name.c_str(), DemangledName, 1023, + UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected + UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc + UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications + UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers + UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords + UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types + return (result == 0) ? Name : std::string(DemangledName); + } +#endif + if (ModInfo && ModInfo->isWin32Module()) + return std::string(demanglePE32ExternCFunc(Name)); + return Name; +} + +} // namespace symbolize +} // namespace llvm diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 67a1ca67e2f3..41c8da40346a 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -61,8 +61,7 @@ ExecutionEngine *(*ExecutionEngine::InterpCtor)(std::unique_ptr M, void JITEventListener::anchor() {} -ExecutionEngine::ExecutionEngine(std::unique_ptr M) - : LazyFunctionCreator(nullptr) { +void ExecutionEngine::Init(std::unique_ptr M) { CompilingLazily = false; GVCompilationDisabled = false; SymbolSearchingDisabled = false; @@ -79,6 +78,16 @@ ExecutionEngine::ExecutionEngine(std::unique_ptr M) Modules.push_back(std::move(M)); } +ExecutionEngine::ExecutionEngine(std::unique_ptr M) + : DL(M->getDataLayout()), LazyFunctionCreator(nullptr) { + Init(std::move(M)); +} + +ExecutionEngine::ExecutionEngine(DataLayout DL, std::unique_ptr M) + : DL(std::move(DL)), LazyFunctionCreator(nullptr) { + Init(std::move(M)); +} + ExecutionEngine::~ExecutionEngine() { clearAllGlobalMappings(); } @@ -86,7 +95,7 @@ ExecutionEngine::~ExecutionEngine() { namespace { /// \brief Helper class which uses a value handler to automatically deletes the /// memory block when the GlobalVariable is destroyed. -class GVMemoryBlock : public CallbackVH { +class GVMemoryBlock final : public CallbackVH { GVMemoryBlock(const GlobalVariable *GV) : CallbackVH(const_cast(GV)) {} @@ -115,7 +124,7 @@ public: } // anonymous namespace char *ExecutionEngine::getMemoryForGV(const GlobalVariable *GV) { - return GVMemoryBlock::Create(GV, *getDataLayout()); + return GVMemoryBlock::Create(GV, getDataLayout()); } void ExecutionEngine::addObjectFile(std::unique_ptr O) { @@ -187,7 +196,7 @@ std::string ExecutionEngine::getMangledName(const GlobalValue *GV) { const DataLayout &DL = GV->getParent()->getDataLayout().isDefault() - ? *getDataLayout() + ? getDataLayout() : GV->getParent()->getDataLayout(); Mangler::getNameWithPrefix(FullName, GV->getName(), DL); @@ -228,11 +237,10 @@ void ExecutionEngine::clearAllGlobalMappings() { void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) { MutexGuard locked(lock); - for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) - EEState.RemoveMapping(getMangledName(FI)); - for (Module::global_iterator GI = M->global_begin(), GE = M->global_end(); - GI != GE; ++GI) - EEState.RemoveMapping(getMangledName(GI)); + for (Function &FI : *M) + EEState.RemoveMapping(getMangledName(&FI)); + for (GlobalVariable &GI : M->globals()) + EEState.RemoveMapping(getMangledName(&GI)); } uint64_t ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, @@ -333,7 +341,7 @@ void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE, const std::vector &InputArgv) { Values.clear(); // Free the old contents. Values.reserve(InputArgv.size()); - unsigned PtrSize = EE->getDataLayout()->getPointerSize(); + unsigned PtrSize = EE->getDataLayout().getPointerSize(); Array = make_unique((InputArgv.size()+1)*PtrSize); DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array.get() << "\n"); @@ -408,7 +416,7 @@ void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) { #ifndef NDEBUG /// isTargetNullPtr - Return whether the target pointer stored at Loc is null. static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) { - unsigned PtrSize = EE->getDataLayout()->getPointerSize(); + unsigned PtrSize = EE->getDataLayout().getPointerSize(); for (unsigned i = 0; i < PtrSize; ++i) if (*(i + (uint8_t*)Loc)) return false; @@ -621,8 +629,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { break; case Type::VectorTyID: // if the whole vector is 'undef' just reserve memory for the value. - const VectorType* VTy = dyn_cast(C->getType()); - const Type *ElemTy = VTy->getElementType(); + auto* VTy = dyn_cast(C->getType()); + Type *ElemTy = VTy->getElementType(); unsigned int elemNum = VTy->getNumElements(); Result.AggregateVal.resize(elemNum); if (ElemTy->isIntegerTy()) @@ -641,8 +649,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { case Instruction::GetElementPtr: { // Compute the index GenericValue Result = getConstantValue(Op0); - APInt Offset(DL->getPointerSizeInBits(), 0); - cast(CE)->accumulateConstantOffset(*DL, Offset); + APInt Offset(DL.getPointerSizeInBits(), 0); + cast(CE)->accumulateConstantOffset(DL, Offset); char* tmp = (char*) Result.PointerVal; Result = PTOGV(tmp + Offset.getSExtValue()); @@ -729,16 +737,16 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { } case Instruction::PtrToInt: { GenericValue GV = getConstantValue(Op0); - uint32_t PtrWidth = DL->getTypeSizeInBits(Op0->getType()); + uint32_t PtrWidth = DL.getTypeSizeInBits(Op0->getType()); assert(PtrWidth <= 64 && "Bad pointer width"); GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal)); - uint32_t IntWidth = DL->getTypeSizeInBits(CE->getType()); + uint32_t IntWidth = DL.getTypeSizeInBits(CE->getType()); GV.IntVal = GV.IntVal.zextOrTrunc(IntWidth); return GV; } case Instruction::IntToPtr: { GenericValue GV = getConstantValue(Op0); - uint32_t PtrWidth = DL->getTypeSizeInBits(CE->getType()); + uint32_t PtrWidth = DL.getTypeSizeInBits(CE->getType()); GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth); assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width"); GV.PointerVal = PointerTy(uintptr_t(GV.IntVal.getZExtValue())); @@ -860,8 +868,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { GV.IntVal = apfLHS.bitcastToAPInt(); break; case Instruction::FRem: - apfLHS.mod(APFloat(Sem, RHS.IntVal), - APFloat::rmNearestTiesToEven); + apfLHS.mod(APFloat(Sem, RHS.IntVal)); GV.IntVal = apfLHS.bitcastToAPInt(); break; } @@ -1040,7 +1047,7 @@ static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, GenericValue *Ptr, Type *Ty) { - const unsigned StoreBytes = getDataLayout()->getTypeStoreSize(Ty); + const unsigned StoreBytes = getDataLayout().getTypeStoreSize(Ty); switch (Ty->getTypeID()) { default: @@ -1080,7 +1087,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, break; } - if (sys::IsLittleEndianHost != getDataLayout()->isLittleEndian()) + if (sys::IsLittleEndianHost != getDataLayout().isLittleEndian()) // Host and target are different endian - reverse the stored bytes. std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr); } @@ -1117,7 +1124,7 @@ static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) { void ExecutionEngine::LoadValueFromMemory(GenericValue &Result, GenericValue *Ptr, Type *Ty) { - const unsigned LoadBytes = getDataLayout()->getTypeStoreSize(Ty); + const unsigned LoadBytes = getDataLayout().getTypeStoreSize(Ty); switch (Ty->getTypeID()) { case Type::IntegerTyID: @@ -1143,8 +1150,8 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result, break; } case Type::VectorTyID: { - const VectorType *VT = cast(Ty); - const Type *ElemT = VT->getElementType(); + auto *VT = cast(Ty); + Type *ElemT = VT->getElementType(); const unsigned numElems = VT->getNumElements(); if (ElemT->isFloatTy()) { Result.AggregateVal.resize(numElems); @@ -1183,20 +1190,20 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) { if (const ConstantVector *CP = dyn_cast(Init)) { unsigned ElementSize = - getDataLayout()->getTypeAllocSize(CP->getType()->getElementType()); + getDataLayout().getTypeAllocSize(CP->getType()->getElementType()); for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) InitializeMemory(CP->getOperand(i), (char*)Addr+i*ElementSize); return; } if (isa(Init)) { - memset(Addr, 0, (size_t)getDataLayout()->getTypeAllocSize(Init->getType())); + memset(Addr, 0, (size_t)getDataLayout().getTypeAllocSize(Init->getType())); return; } if (const ConstantArray *CPA = dyn_cast(Init)) { unsigned ElementSize = - getDataLayout()->getTypeAllocSize(CPA->getType()->getElementType()); + getDataLayout().getTypeAllocSize(CPA->getType()->getElementType()); for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i) InitializeMemory(CPA->getOperand(i), (char*)Addr+i*ElementSize); return; @@ -1204,7 +1211,7 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) { if (const ConstantStruct *CPS = dyn_cast(Init)) { const StructLayout *SL = - getDataLayout()->getStructLayout(cast(CPS->getType())); + getDataLayout().getStructLayout(cast(CPS->getType())); for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i) InitializeMemory(CPS->getOperand(i), (char*)Addr+SL->getElementOffset(i)); return; @@ -1349,7 +1356,7 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) { InitializeMemory(GV->getInitializer(), GA); Type *ElTy = GV->getType()->getElementType(); - size_t GVSize = (size_t)getDataLayout()->getTypeAllocSize(ElTy); + size_t GVSize = (size_t)getDataLayout().getTypeAllocSize(ElTy); NumInitBytes += (unsigned)GVSize; ++NumGlobals; } diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp index 55ab5af2b909..ff7c4dce0d5d 100644 --- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -29,7 +29,7 @@ using namespace llvm; DEFINE_SIMPLE_CONVERSION_FUNCTIONS(GenericValue, LLVMGenericValueRef) -inline LLVMTargetMachineRef wrap(const TargetMachine *P) { +static LLVMTargetMachineRef wrap(const TargetMachine *P) { return reinterpret_cast(const_cast(P)); } @@ -210,35 +210,6 @@ LLVMBool LLVMCreateMCJITCompilerForModule( return 1; } -LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE, - LLVMModuleProviderRef MP, - char **OutError) { - /* The module provider is now actually a module. */ - return LLVMCreateExecutionEngineForModule(OutEE, - reinterpret_cast(MP), - OutError); -} - -LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp, - LLVMModuleProviderRef MP, - char **OutError) { - /* The module provider is now actually a module. */ - return LLVMCreateInterpreterForModule(OutInterp, - reinterpret_cast(MP), - OutError); -} - -LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT, - LLVMModuleProviderRef MP, - unsigned OptLevel, - char **OutError) { - /* The module provider is now actually a module. */ - return LLVMCreateJITCompilerForModule(OutJIT, - reinterpret_cast(MP), - OptLevel, OutError); -} - - void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE) { delete unwrap(EE); } @@ -282,11 +253,6 @@ void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M){ unwrap(EE)->addModule(std::unique_ptr(unwrap(M))); } -void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){ - /* The module provider is now actually a module. */ - LLVMAddModule(EE, reinterpret_cast(MP)); -} - LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M, LLVMModuleRef *OutMod, char **OutError) { Module *Mod = unwrap(M); @@ -295,14 +261,6 @@ LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M, return 0; } -LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE, - LLVMModuleProviderRef MP, - LLVMModuleRef *OutMod, char **OutError) { - /* The module provider is now actually a module. */ - return LLVMRemoveModule(EE, reinterpret_cast(MP), OutMod, - OutError); -} - LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name, LLVMValueRef *OutFn) { if (Function *F = unwrap(EE)->FindFunctionNamed(Name)) { @@ -318,7 +276,7 @@ void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, } LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) { - return wrap(unwrap(EE)->getDataLayout()); + return wrap(&unwrap(EE)->getDataLayout()); } LLVMTargetMachineRef diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index dbfa37e2b0da..1eb4f7d19342 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -593,7 +593,7 @@ static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2, } static GenericValue executeFCMP_BOOL(GenericValue Src1, GenericValue Src2, - const Type *Ty, const bool val) { + Type *Ty, const bool val) { GenericValue Dest; if(Ty->isVectorTy()) { assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); @@ -788,7 +788,7 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) { } static GenericValue executeSelectInst(GenericValue Src1, GenericValue Src2, - GenericValue Src3, const Type *Ty) { + GenericValue Src3, Type *Ty) { GenericValue Dest; if(Ty->isVectorTy()) { assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); @@ -805,7 +805,7 @@ static GenericValue executeSelectInst(GenericValue Src1, GenericValue Src2, void Interpreter::visitSelectInst(SelectInst &I) { ExecutionContext &SF = ECStack.back(); - const Type * Ty = I.getOperand(0)->getType(); + Type * Ty = I.getOperand(0)->getType(); GenericValue Src1 = getOperandValue(I.getOperand(0), SF); GenericValue Src2 = getOperandValue(I.getOperand(1), SF); GenericValue Src3 = getOperandValue(I.getOperand(2), SF); @@ -968,7 +968,7 @@ void Interpreter::visitAllocaInst(AllocaInst &I) { unsigned NumElements = getOperandValue(I.getOperand(0), SF).IntVal.getZExtValue(); - unsigned TypeSize = (size_t)TD.getTypeAllocSize(Ty); + unsigned TypeSize = (size_t)getDataLayout().getTypeAllocSize(Ty); // Avoid malloc-ing zero bytes, use max()... unsigned MemToAlloc = std::max(1U, NumElements * TypeSize); @@ -1000,7 +1000,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I, for (; I != E; ++I) { if (StructType *STy = dyn_cast(*I)) { - const StructLayout *SLO = TD.getStructLayout(STy); + const StructLayout *SLO = getDataLayout().getStructLayout(STy); const ConstantInt *CPU = cast(I.getOperand()); unsigned Index = unsigned(CPU->getZExtValue()); @@ -1020,7 +1020,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I, assert(BitWidth == 64 && "Invalid index type for getelementptr"); Idx = (int64_t)IdxGV.IntVal.getZExtValue(); } - Total += TD.getTypeAllocSize(ST->getElementType())*Idx; + Total += getDataLayout().getTypeAllocSize(ST->getElementType()) * Idx; } } @@ -1139,7 +1139,7 @@ void Interpreter::visitShl(BinaryOperator &I) { GenericValue Src1 = getOperandValue(I.getOperand(0), SF); GenericValue Src2 = getOperandValue(I.getOperand(1), SF); GenericValue Dest; - const Type *Ty = I.getType(); + Type *Ty = I.getType(); if (Ty->isVectorTy()) { uint32_t src1Size = uint32_t(Src1.AggregateVal.size()); @@ -1166,7 +1166,7 @@ void Interpreter::visitLShr(BinaryOperator &I) { GenericValue Src1 = getOperandValue(I.getOperand(0), SF); GenericValue Src2 = getOperandValue(I.getOperand(1), SF); GenericValue Dest; - const Type *Ty = I.getType(); + Type *Ty = I.getType(); if (Ty->isVectorTy()) { uint32_t src1Size = uint32_t(Src1.AggregateVal.size()); @@ -1193,7 +1193,7 @@ void Interpreter::visitAShr(BinaryOperator &I) { GenericValue Src1 = getOperandValue(I.getOperand(0), SF); GenericValue Src2 = getOperandValue(I.getOperand(1), SF); GenericValue Dest; - const Type *Ty = I.getType(); + Type *Ty = I.getType(); if (Ty->isVectorTy()) { size_t src1Size = Src1.AggregateVal.size(); @@ -1237,10 +1237,10 @@ GenericValue Interpreter::executeTruncInst(Value *SrcVal, Type *DstTy, GenericValue Interpreter::executeSExtInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { - const Type *SrcTy = SrcVal->getType(); + Type *SrcTy = SrcVal->getType(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); if (SrcTy->isVectorTy()) { - const Type *DstVecTy = DstTy->getScalarType(); + Type *DstVecTy = DstTy->getScalarType(); unsigned DBitWidth = cast(DstVecTy)->getBitWidth(); unsigned size = Src.AggregateVal.size(); // the sizes of src and dst vectors must be equal. @@ -1248,7 +1248,7 @@ GenericValue Interpreter::executeSExtInst(Value *SrcVal, Type *DstTy, for (unsigned i = 0; i < size; i++) Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.sext(DBitWidth); } else { - const IntegerType *DITy = cast(DstTy); + auto *DITy = cast(DstTy); unsigned DBitWidth = DITy->getBitWidth(); Dest.IntVal = Src.IntVal.sext(DBitWidth); } @@ -1257,10 +1257,10 @@ GenericValue Interpreter::executeSExtInst(Value *SrcVal, Type *DstTy, GenericValue Interpreter::executeZExtInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { - const Type *SrcTy = SrcVal->getType(); + Type *SrcTy = SrcVal->getType(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); if (SrcTy->isVectorTy()) { - const Type *DstVecTy = DstTy->getScalarType(); + Type *DstVecTy = DstTy->getScalarType(); unsigned DBitWidth = cast(DstVecTy)->getBitWidth(); unsigned size = Src.AggregateVal.size(); @@ -1269,7 +1269,7 @@ GenericValue Interpreter::executeZExtInst(Value *SrcVal, Type *DstTy, for (unsigned i = 0; i < size; i++) Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.zext(DBitWidth); } else { - const IntegerType *DITy = cast(DstTy); + auto *DITy = cast(DstTy); unsigned DBitWidth = DITy->getBitWidth(); Dest.IntVal = Src.IntVal.zext(DBitWidth); } @@ -1327,8 +1327,8 @@ GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, Type *DstTy, GenericValue Dest, Src = getOperandValue(SrcVal, SF); if (SrcTy->getTypeID() == Type::VectorTyID) { - const Type *DstVecTy = DstTy->getScalarType(); - const Type *SrcVecTy = SrcTy->getScalarType(); + Type *DstVecTy = DstTy->getScalarType(); + Type *SrcVecTy = SrcTy->getScalarType(); uint32_t DBitWidth = cast(DstVecTy)->getBitWidth(); unsigned size = Src.AggregateVal.size(); // the sizes of src and dst vectors must be equal. @@ -1365,8 +1365,8 @@ GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, Type *DstTy, GenericValue Dest, Src = getOperandValue(SrcVal, SF); if (SrcTy->getTypeID() == Type::VectorTyID) { - const Type *DstVecTy = DstTy->getScalarType(); - const Type *SrcVecTy = SrcTy->getScalarType(); + Type *DstVecTy = DstTy->getScalarType(); + Type *SrcVecTy = SrcTy->getScalarType(); uint32_t DBitWidth = cast(DstVecTy)->getBitWidth(); unsigned size = Src.AggregateVal.size(); // the sizes of src and dst vectors must be equal @@ -1401,7 +1401,7 @@ GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, Type *DstTy, GenericValue Dest, Src = getOperandValue(SrcVal, SF); if (SrcVal->getType()->getTypeID() == Type::VectorTyID) { - const Type *DstVecTy = DstTy->getScalarType(); + Type *DstVecTy = DstTy->getScalarType(); unsigned size = Src.AggregateVal.size(); // the sizes of src and dst vectors must be equal Dest.AggregateVal.resize(size); @@ -1433,7 +1433,7 @@ GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, Type *DstTy, GenericValue Dest, Src = getOperandValue(SrcVal, SF); if (SrcVal->getType()->getTypeID() == Type::VectorTyID) { - const Type *DstVecTy = DstTy->getScalarType(); + Type *DstVecTy = DstTy->getScalarType(); unsigned size = Src.AggregateVal.size(); // the sizes of src and dst vectors must be equal Dest.AggregateVal.resize(size); @@ -1477,7 +1477,7 @@ GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, Type *DstTy, GenericValue Dest, Src = getOperandValue(SrcVal, SF); assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction"); - uint32_t PtrSize = TD.getPointerSizeInBits(); + uint32_t PtrSize = getDataLayout().getPointerSizeInBits(); if (PtrSize != Src.IntVal.getBitWidth()) Src.IntVal = Src.IntVal.zextOrTrunc(PtrSize); @@ -1497,10 +1497,10 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, Type *DstTy, (DstTy->getTypeID() == Type::VectorTyID)) { // vector src bitcast to vector dst or vector src bitcast to scalar dst or // scalar src bitcast to vector dst - bool isLittleEndian = TD.isLittleEndian(); + bool isLittleEndian = getDataLayout().isLittleEndian(); GenericValue TempDst, TempSrc, SrcVec; - const Type *SrcElemTy; - const Type *DstElemTy; + Type *SrcElemTy; + Type *DstElemTy; unsigned SrcBitSize; unsigned DstBitSize; unsigned SrcNum; @@ -2091,7 +2091,7 @@ void Interpreter::callFunction(Function *F, ArrayRef ArgVals) { } // Get pointers to first LLVM BB & Instruction in function. - StackFrame.CurBB = F->begin(); + StackFrame.CurBB = &F->front(); StackFrame.CurInst = StackFrame.CurBB->begin(); // Run through the function arguments and initialize their values... @@ -2103,7 +2103,7 @@ void Interpreter::callFunction(Function *F, ArrayRef ArgVals) { unsigned i = 0; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI, ++i) - SetValue(AI, ArgVals[i], StackFrame); + SetValue(&*AI, ArgVals[i], StackFrame); // Handle varargs arguments... StackFrame.VarArgs.assign(ArgVals.begin()+i, ArgVals.end()); @@ -2121,27 +2121,5 @@ void Interpreter::run() { DEBUG(dbgs() << "About to interpret: " << I); visit(I); // Dispatch to one of the visit* methods... -#if 0 - // This is not safe, as visiting the instruction could lower it and free I. -DEBUG( - if (!isa(I) && !isa(I) && - I.getType() != Type::VoidTy) { - dbgs() << " --> "; - const GenericValue &Val = SF.Values[&I]; - switch (I.getType()->getTypeID()) { - default: llvm_unreachable("Invalid GenericValue Type"); - case Type::VoidTyID: dbgs() << "void"; break; - case Type::FloatTyID: dbgs() << "float " << Val.FloatVal; break; - case Type::DoubleTyID: dbgs() << "double " << Val.DoubleVal; break; - case Type::PointerTyID: dbgs() << "void* " << intptr_t(Val.PointerVal); - break; - case Type::IntegerTyID: - dbgs() << "i" << Val.IntVal.getBitWidth() << " " - << Val.IntVal.toStringUnsigned(10) - << " (0x" << Val.IntVal.toStringUnsigned(16) << ")\n"; - break; - } - }); -#endif } } diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index 9b44042d6144..441f0eb85721 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -178,7 +178,7 @@ static void *ffiValueFor(Type *Ty, const GenericValue &AV, } static bool ffiInvoke(RawFunc Fn, Function *F, ArrayRef ArgVals, - const DataLayout *TD, GenericValue &Result) { + const DataLayout &TD, GenericValue &Result) { ffi_cif cif; FunctionType *FTy = F->getFunctionType(); const unsigned NumArgs = F->arg_size(); @@ -198,7 +198,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F, ArrayRef ArgVals, const unsigned ArgNo = A->getArgNo(); Type *ArgTy = FTy->getParamType(ArgNo); args[ArgNo] = ffiTypeFor(ArgTy); - ArgBytes += TD->getTypeStoreSize(ArgTy); + ArgBytes += TD.getTypeStoreSize(ArgTy); } SmallVector ArgData; @@ -210,7 +210,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F, ArrayRef ArgVals, const unsigned ArgNo = A->getArgNo(); Type *ArgTy = FTy->getParamType(ArgNo); values[ArgNo] = ffiValueFor(ArgTy, ArgVals[ArgNo], ArgDataPtr); - ArgDataPtr += TD->getTypeStoreSize(ArgTy); + ArgDataPtr += TD.getTypeStoreSize(ArgTy); } Type *RetTy = FTy->getReturnType(); @@ -219,7 +219,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F, ArrayRef ArgVals, if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, &args[0]) == FFI_OK) { SmallVector ret; if (RetTy->getTypeID() != Type::VoidTyID) - ret.resize(TD->getTypeStoreSize(RetTy)); + ret.resize(TD.getTypeStoreSize(RetTy)); ffi_call(&cif, Fn, ret.data(), values.data()); switch (RetTy->getTypeID()) { case Type::IntegerTyID: @@ -368,7 +368,7 @@ static GenericValue lle_X_sprintf(FunctionType *FT, case 'x': case 'X': if (HowLong >= 1) { if (HowLong == 1 && - TheInterpreter->getDataLayout()->getPointerSizeInBits() == 64 && + TheInterpreter->getDataLayout().getPointerSizeInBits() == 64 && sizeof(long) < sizeof(int64_t)) { // Make sure we use %lld with a 64 bit argument because we might be // compiling LLI on a 32 bit compiler. diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp index f103c09659aa..bc7da2e4f6af 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp +++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp @@ -35,7 +35,7 @@ extern "C" void LLVMLinkInInterpreter() { } ExecutionEngine *Interpreter::create(std::unique_ptr M, std::string *ErrStr) { // Tell this Module to materialize everything and release the GVMaterializer. - if (std::error_code EC = M->materializeAllPermanently()) { + if (std::error_code EC = M->materializeAll()) { if (ErrStr) *ErrStr = EC.message(); // We got an error, just return 0 @@ -49,16 +49,15 @@ ExecutionEngine *Interpreter::create(std::unique_ptr M, // Interpreter ctor - Initialize stuff // Interpreter::Interpreter(std::unique_ptr M) - : ExecutionEngine(std::move(M)), TD(Modules.back().get()) { + : ExecutionEngine(std::move(M)) { memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped)); - setDataLayout(&TD); // Initialize the "backend" initializeExecutionEngine(); initializeExternalFunctions(); emitGlobals(); - IL = new IntrinsicLowering(TD); + IL = new IntrinsicLowering(getDataLayout()); } Interpreter::~Interpreter() { diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h index f97664181a87..2e5a867a200f 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -26,7 +26,6 @@ namespace llvm { class IntrinsicLowering; -struct FunctionInfo; template class generic_gep_type_iterator; class ConstantExpr; typedef generic_gep_type_iterator gep_type_iterator; @@ -95,7 +94,6 @@ struct ExecutionContext { // class Interpreter : public ExecutionEngine, public InstVisitor { GenericValue ExitValue; // The return value of the called function - DataLayout TD; IntrinsicLowering *IL; // The runtime stack of executing code. The top of the stack is the current diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index f6944eea2e78..6cbebe98e7c9 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -65,12 +65,13 @@ MCJIT::createJIT(std::unique_ptr M, std::move(Resolver)); } -MCJIT::MCJIT(std::unique_ptr M, std::unique_ptr tm, +MCJIT::MCJIT(std::unique_ptr M, std::unique_ptr TM, std::shared_ptr MemMgr, std::shared_ptr Resolver) - : ExecutionEngine(std::move(M)), TM(std::move(tm)), Ctx(nullptr), - MemMgr(std::move(MemMgr)), Resolver(*this, std::move(Resolver)), - Dyld(*this->MemMgr, this->Resolver), ObjCache(nullptr) { + : ExecutionEngine(TM->createDataLayout(), std::move(M)), TM(std::move(TM)), + Ctx(nullptr), MemMgr(std::move(MemMgr)), + Resolver(*this, std::move(Resolver)), Dyld(*this->MemMgr, this->Resolver), + ObjCache(nullptr) { // FIXME: We are managing our modules, so we do not want the base class // ExecutionEngine to manage them as well. To avoid double destruction // of the first (and only) module added in ExecutionEngine constructor @@ -85,7 +86,6 @@ MCJIT::MCJIT(std::unique_ptr M, std::unique_ptr tm, Modules.clear(); OwnedModules.addModule(std::move(First)); - setDataLayout(TM->getDataLayout()); RegisterJITEventListener(JITEventListener::createGDBRegistrationListener()); } @@ -159,7 +159,6 @@ std::unique_ptr MCJIT::emitObject(Module *M) { // Initialize passes. PM.run(*M); // Flush the output buffer to get the generated code into memory - ObjStream.flush(); std::unique_ptr CompiledObjBuffer( new ObjectMemoryBuffer(std::move(ObjBufferSV))); @@ -193,7 +192,11 @@ void MCJIT::generateCodeForModule(Module *M) { if (ObjCache) ObjectToLoad = ObjCache->getObject(M); - M->setDataLayout(*TM->getDataLayout()); + if (M->getDataLayout().isDefault()) { + M->setDataLayout(getDataLayout()); + } else { + assert(M->getDataLayout() == getDataLayout() && "DataLayout Mismatch"); + } // If the cache did not contain a suitable object, compile the object if (!ObjectToLoad) { @@ -265,7 +268,7 @@ void MCJIT::finalizeModule(Module *M) { RuntimeDyld::SymbolInfo MCJIT::findExistingSymbol(const std::string &Name) { SmallString<128> FullName; - Mangler::getNameWithPrefix(FullName, Name, *TM->getDataLayout()); + Mangler::getNameWithPrefix(FullName, Name, getDataLayout()); if (void *Addr = getPointerToGlobalIfAvailable(FullName)) return RuntimeDyld::SymbolInfo(static_cast( @@ -315,10 +318,12 @@ RuntimeDyld::SymbolInfo MCJIT::findSymbol(const std::string &Name, object::Archive *A = OB.getBinary(); // Look for our symbols in each Archive object::Archive::child_iterator ChildIt = A->findSym(Name); + if (std::error_code EC = ChildIt->getError()) + report_fatal_error(EC.message()); if (ChildIt != A->child_end()) { // FIXME: Support nested archives? ErrorOr> ChildBinOrErr = - ChildIt->getAsBinary(); + (*ChildIt)->getAsBinary(); if (ChildBinOrErr.getError()) continue; std::unique_ptr &ChildBin = ChildBinOrErr.get(); diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index a45173c2da8d..3c9d2fd50336 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -86,7 +86,7 @@ class MCJIT : public ExecutionEngine { ModulePtrSet::iterator begin_added() { return AddedModules.begin(); } ModulePtrSet::iterator end_added() { return AddedModules.end(); } iterator_range added() { - return iterator_range(begin_added(), end_added()); + return make_range(begin_added(), end_added()); } ModulePtrSet::iterator begin_loaded() { return LoadedModules.begin(); } @@ -223,12 +223,13 @@ public: /// FindFunctionNamed - Search all of the active modules to find the function that /// defines FnName. This is very slow operation and shouldn't be used for /// general code. - virtual Function *FindFunctionNamed(const char *FnName) override; + Function *FindFunctionNamed(const char *FnName) override; - /// FindGlobalVariableNamed - Search all of the active modules to find the global variable - /// that defines Name. This is very slow operation and shouldn't be used for - /// general code. - virtual GlobalVariable *FindGlobalVariableNamed(const char *Name, bool AllowInternal = false) override; + /// FindGlobalVariableNamed - Search all of the active modules to find the + /// global variable that defines Name. This is very slow operation and + /// shouldn't be used for general code. + GlobalVariable *FindGlobalVariableNamed(const char *Name, + bool AllowInternal = false) override; /// Sets the object manager that MCJIT should use to avoid compilation. void setObjectCache(ObjectCache *manager) override; @@ -335,6 +336,6 @@ protected: bool CheckFunctionsOnly); }; -} // End llvm namespace +} // end llvm namespace -#endif +#endif // LLVM_LIB_EXECUTIONENGINE_MCJIT_MCJIT_H diff --git a/lib/ExecutionEngine/Orc/CMakeLists.txt b/lib/ExecutionEngine/Orc/CMakeLists.txt index 99fe22c001da..a17f52e322e8 100644 --- a/lib/ExecutionEngine/Orc/CMakeLists.txt +++ b/lib/ExecutionEngine/Orc/CMakeLists.txt @@ -2,6 +2,8 @@ add_llvm_library(LLVMOrcJIT ExecutionUtils.cpp IndirectionUtils.cpp NullResolver.cpp + OrcCBindings.cpp + OrcCBindingsStack.cpp OrcMCJITReplacement.cpp OrcTargetSupport.cpp diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index b439810ed330..34564e42b10f 100644 --- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -19,6 +19,9 @@ namespace llvm { namespace orc { +void JITCompileCallbackManager::anchor() {} +void IndirectStubsManager::anchor() {} + Constant* createIRTypedAddress(FunctionType &FT, TargetAddress Addr) { Constant *AddrIntVal = ConstantInt::get(Type::getInt64Ty(FT.getContext()), Addr); @@ -37,7 +40,7 @@ GlobalVariable* createImplPointer(PointerType &PT, Module &M, return IP; } -void makeStub(Function &F, GlobalVariable &ImplPointer) { +void makeStub(Function &F, Value &ImplPointer) { assert(F.isDeclaration() && "Can't turn a definition into a stub."); assert(F.getParent() && "Function isn't in a module."); Module &M = *F.getParent(); @@ -61,9 +64,7 @@ class GlobalRenamer { public: static bool needsRenaming(const Value &New) { - if (!New.hasName() || New.getName().startswith("\01L")) - return true; - return false; + return !New.hasName() || New.getName().startswith("\01L"); } const std::string& getRename(const Value &Orig) { @@ -106,6 +107,9 @@ void makeAllSymbolsExternallyAccessible(Module &M) { for (auto &GV : M.globals()) raiseVisibilityOnValue(GV, Renamer); + + for (auto &A : M.aliases()) + raiseVisibilityOnValue(A, Renamer); } Function* cloneFunctionDecl(Module &Dst, const Function &F, @@ -121,7 +125,7 @@ Function* cloneFunctionDecl(Module &Dst, const Function &F, auto NewArgI = NewF->arg_begin(); for (auto ArgI = F.arg_begin(), ArgE = F.arg_end(); ArgI != ArgE; ++ArgI, ++NewArgI) - (*VMap)[ArgI] = NewArgI; + (*VMap)[&*ArgI] = &*NewArgI; } return NewF; @@ -177,5 +181,16 @@ void moveGlobalVariableInitializer(GlobalVariable &OrigGV, nullptr, Materializer)); } +GlobalAlias* cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA, + ValueToValueMapTy &VMap) { + assert(OrigA.getAliasee() && "Original alias doesn't have an aliasee?"); + auto *NewA = GlobalAlias::create(OrigA.getValueType(), + OrigA.getType()->getPointerAddressSpace(), + OrigA.getLinkage(), OrigA.getName(), &Dst); + NewA->copyAttributesFrom(&OrigA); + VMap[&OrigA] = NewA; + return NewA; +} + } // End namespace orc. } // End namespace llvm. diff --git a/lib/ExecutionEngine/Orc/OrcCBindings.cpp b/lib/ExecutionEngine/Orc/OrcCBindings.cpp new file mode 100644 index 000000000000..d2379cd441d5 --- /dev/null +++ b/lib/ExecutionEngine/Orc/OrcCBindings.cpp @@ -0,0 +1,97 @@ +//===----------- OrcCBindings.cpp - C bindings for the Orc APIs -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "OrcCBindingsStack.h" +#include "llvm-c/OrcBindings.h" + +using namespace llvm; + +LLVMOrcJITStackRef LLVMOrcCreateInstance(LLVMTargetMachineRef TM) { + TargetMachine *TM2(unwrap(TM)); + + Triple T(TM2->getTargetTriple()); + + auto CompileCallbackMgr = OrcCBindingsStack::createCompileCallbackMgr(T); + auto IndirectStubsMgrBuilder = + OrcCBindingsStack::createIndirectStubsMgrBuilder(T); + + OrcCBindingsStack *JITStack = + new OrcCBindingsStack(*TM2, std::move(CompileCallbackMgr), + IndirectStubsMgrBuilder); + + return wrap(JITStack); +} + +void LLVMOrcGetMangledSymbol(LLVMOrcJITStackRef JITStack, char **MangledName, + const char *SymbolName) { + OrcCBindingsStack &J = *unwrap(JITStack); + std::string Mangled = J.mangle(SymbolName); + *MangledName = new char[Mangled.size() + 1]; + strcpy(*MangledName, Mangled.c_str()); +} + +void LLVMOrcDisposeMangledSymbol(char *MangledName) { + delete[] MangledName; +} + +LLVMOrcTargetAddress +LLVMOrcCreateLazyCompileCallback(LLVMOrcJITStackRef JITStack, + LLVMOrcLazyCompileCallbackFn Callback, + void *CallbackCtx) { + OrcCBindingsStack &J = *unwrap(JITStack); + return J.createLazyCompileCallback(Callback, CallbackCtx); +} + +void LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack, + const char *StubName, + LLVMOrcTargetAddress InitAddr) { + OrcCBindingsStack &J = *unwrap(JITStack); + J.createIndirectStub(StubName, InitAddr); +} + +void LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack, + const char *StubName, + LLVMOrcTargetAddress NewAddr) { + OrcCBindingsStack &J = *unwrap(JITStack); + J.setIndirectStubPointer(StubName, NewAddr); +} + +LLVMOrcModuleHandle +LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod, + LLVMOrcSymbolResolverFn SymbolResolver, + void *SymbolResolverCtx) { + OrcCBindingsStack &J = *unwrap(JITStack); + Module *M(unwrap(Mod)); + return J.addIRModuleEager(M, SymbolResolver, SymbolResolverCtx); +} + +LLVMOrcModuleHandle +LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod, + LLVMOrcSymbolResolverFn SymbolResolver, + void *SymbolResolverCtx) { + OrcCBindingsStack &J = *unwrap(JITStack); + Module *M(unwrap(Mod)); + return J.addIRModuleLazy(M, SymbolResolver, SymbolResolverCtx); +} + +void LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, LLVMOrcModuleHandle H) { + OrcCBindingsStack &J = *unwrap(JITStack); + J.removeModule(H); +} + +LLVMOrcTargetAddress LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack, + const char *SymbolName) { + OrcCBindingsStack &J = *unwrap(JITStack); + auto Sym = J.findSymbol(SymbolName, true); + return Sym.getAddress(); +} + +void LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack) { + delete unwrap(JITStack); +} diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp b/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp new file mode 100644 index 000000000000..e519c7f30920 --- /dev/null +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp @@ -0,0 +1,43 @@ +//===-------- OrcCBindingsStack.cpp - Orc JIT stack for C bindings --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "OrcCBindingsStack.h" + +#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DynamicLibrary.h" +#include +#include + +using namespace llvm; + +std::unique_ptr +OrcCBindingsStack::createCompileCallbackMgr(Triple T) { + switch (T.getArch()) { + default: return nullptr; + + case Triple::x86_64: { + typedef orc::LocalJITCompileCallbackManager CCMgrT; + return llvm::make_unique(0); + } + } +} + +OrcCBindingsStack::IndirectStubsManagerBuilder +OrcCBindingsStack::createIndirectStubsMgrBuilder(Triple T) { + switch (T.getArch()) { + default: return nullptr; + + case Triple::x86_64: + return [](){ + return llvm::make_unique< + orc::LocalIndirectStubsManager>(); + }; + } +} diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h new file mode 100644 index 000000000000..2e17624ff474 --- /dev/null +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h @@ -0,0 +1,282 @@ +//===--- OrcCBindingsStack.h - Orc JIT stack for C bindings ---*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_EXECUTIONENGINE_ORC_ORCCBINDINGSSTACK_H +#define LLVM_LIB_EXECUTIONENGINE_ORC_ORCCBINDINGSSTACK_H + +#include "llvm/ADT/Triple.h" +#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h" +#include "llvm/ExecutionEngine/Orc/CompileUtils.h" +#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" +#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" +#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm-c/OrcBindings.h" + +namespace llvm { + +class OrcCBindingsStack; + +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(OrcCBindingsStack, LLVMOrcJITStackRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef) + +class OrcCBindingsStack { +public: + + typedef orc::JITCompileCallbackManager CompileCallbackMgr; + typedef orc::ObjectLinkingLayer<> ObjLayerT; + typedef orc::IRCompileLayer CompileLayerT; + typedef orc::CompileOnDemandLayer CODLayerT; + + typedef std::function()> + CallbackManagerBuilder; + + typedef CODLayerT::IndirectStubsManagerBuilderT IndirectStubsManagerBuilder; + +private: + + class GenericHandle { + public: + virtual ~GenericHandle() {} + virtual orc::JITSymbol findSymbolIn(const std::string &Name, + bool ExportedSymbolsOnly) = 0; + virtual void removeModule() = 0; + }; + + template + class GenericHandleImpl : public GenericHandle { + public: + GenericHandleImpl(LayerT &Layer, typename LayerT::ModuleSetHandleT Handle) + : Layer(Layer), Handle(std::move(Handle)) {} + + orc::JITSymbol findSymbolIn(const std::string &Name, + bool ExportedSymbolsOnly) override { + return Layer.findSymbolIn(Handle, Name, ExportedSymbolsOnly); + } + + void removeModule() override { + return Layer.removeModuleSet(Handle); + } + + private: + LayerT &Layer; + typename LayerT::ModuleSetHandleT Handle; + }; + + template + std::unique_ptr> + createGenericHandle(LayerT &Layer, typename LayerT::ModuleSetHandleT Handle) { + return llvm::make_unique>(Layer, + std::move(Handle)); + } + +public: + + // We need a 'ModuleSetHandleT' to conform to the layer concept. + typedef unsigned ModuleSetHandleT; + + typedef unsigned ModuleHandleT; + + static std::unique_ptr createCompileCallbackMgr(Triple T); + static IndirectStubsManagerBuilder createIndirectStubsMgrBuilder(Triple T); + + OrcCBindingsStack(TargetMachine &TM, + std::unique_ptr CCMgr, + IndirectStubsManagerBuilder IndirectStubsMgrBuilder) + : DL(TM.createDataLayout()), CCMgr(std::move(CCMgr)), + ObjectLayer(), + CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)), + CODLayer(CompileLayer, + [](Function &F) { std::set S; S.insert(&F); return S; }, + *this->CCMgr, std::move(IndirectStubsMgrBuilder), false), + IndirectStubsMgr(IndirectStubsMgrBuilder()), + CXXRuntimeOverrides([this](const std::string &S) { return mangle(S); }) {} + + ~OrcCBindingsStack() { + // Run any destructors registered with __cxa_atexit. + CXXRuntimeOverrides.runDestructors(); + // Run any IR destructors. + for (auto &DtorRunner : IRStaticDestructorRunners) + DtorRunner.runViaLayer(*this); + } + + std::string mangle(StringRef Name) { + std::string MangledName; + { + raw_string_ostream MangledNameStream(MangledName); + Mangler::getNameWithPrefix(MangledNameStream, Name, DL); + } + return MangledName; + } + + template + static PtrTy fromTargetAddress(orc::TargetAddress Addr) { + return reinterpret_cast(static_cast(Addr)); + } + + orc::TargetAddress + createLazyCompileCallback(LLVMOrcLazyCompileCallbackFn Callback, + void *CallbackCtx) { + auto CCInfo = CCMgr->getCompileCallback(); + CCInfo.setCompileAction( + [=]() -> orc::TargetAddress { + return Callback(wrap(this), CallbackCtx); + }); + return CCInfo.getAddress(); + } + + void createIndirectStub(StringRef StubName, orc::TargetAddress Addr) { + IndirectStubsMgr->createStub(StubName, Addr, JITSymbolFlags::Exported); + } + + void setIndirectStubPointer(StringRef Name, orc::TargetAddress Addr) { + IndirectStubsMgr->updatePointer(Name, Addr); + } + + std::shared_ptr + createResolver(LLVMOrcSymbolResolverFn ExternalResolver, + void *ExternalResolverCtx) { + auto Resolver = orc::createLambdaResolver( + [this, ExternalResolver, ExternalResolverCtx](const std::string &Name) { + // Search order: + // 1. JIT'd symbols. + // 2. Runtime overrides. + // 3. External resolver (if present). + + if (auto Sym = CODLayer.findSymbol(Name, true)) + return RuntimeDyld::SymbolInfo(Sym.getAddress(), + Sym.getFlags()); + if (auto Sym = CXXRuntimeOverrides.searchOverrides(Name)) + return Sym; + + if (ExternalResolver) + return RuntimeDyld::SymbolInfo(ExternalResolver(Name.c_str(), + ExternalResolverCtx), + llvm::JITSymbolFlags::Exported); + + return RuntimeDyld::SymbolInfo(nullptr); + }, + [](const std::string &Name) { + return RuntimeDyld::SymbolInfo(nullptr); + } + ); + + return std::shared_ptr(std::move(Resolver)); + } + + template + ModuleHandleT addIRModule(LayerT &Layer, + Module *M, + std::unique_ptr MemMgr, + LLVMOrcSymbolResolverFn ExternalResolver, + void *ExternalResolverCtx) { + + // Attach a data-layout if one isn't already present. + if (M->getDataLayout().isDefault()) + M->setDataLayout(DL); + + // Record the static constructors and destructors. We have to do this before + // we hand over ownership of the module to the JIT. + std::vector CtorNames, DtorNames; + for (auto Ctor : orc::getConstructors(*M)) + CtorNames.push_back(mangle(Ctor.Func->getName())); + for (auto Dtor : orc::getDestructors(*M)) + DtorNames.push_back(mangle(Dtor.Func->getName())); + + // Create the resolver. + auto Resolver = createResolver(ExternalResolver, ExternalResolverCtx); + + // Add the module to the JIT. + std::vector S; + S.push_back(std::move(M)); + + auto LH = Layer.addModuleSet(std::move(S), std::move(MemMgr), + std::move(Resolver)); + ModuleHandleT H = createHandle(Layer, LH); + + // Run the static constructors, and save the static destructor runner for + // execution when the JIT is torn down. + orc::CtorDtorRunner CtorRunner(std::move(CtorNames), H); + CtorRunner.runViaLayer(*this); + + IRStaticDestructorRunners.emplace_back(std::move(DtorNames), H); + + return H; + } + + ModuleHandleT addIRModuleEager(Module* M, + LLVMOrcSymbolResolverFn ExternalResolver, + void *ExternalResolverCtx) { + return addIRModule(CompileLayer, std::move(M), + llvm::make_unique(), + std::move(ExternalResolver), ExternalResolverCtx); + } + + ModuleHandleT addIRModuleLazy(Module* M, + LLVMOrcSymbolResolverFn ExternalResolver, + void *ExternalResolverCtx) { + return addIRModule(CODLayer, std::move(M), nullptr, + std::move(ExternalResolver), ExternalResolverCtx); + } + + void removeModule(ModuleHandleT H) { + GenericHandles[H]->removeModule(); + GenericHandles[H] = nullptr; + FreeHandleIndexes.push_back(H); + } + + orc::JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) { + if (auto Sym = IndirectStubsMgr->findStub(Name, ExportedSymbolsOnly)) + return Sym; + return CODLayer.findSymbol(mangle(Name), ExportedSymbolsOnly); + } + + orc::JITSymbol findSymbolIn(ModuleHandleT H, const std::string &Name, + bool ExportedSymbolsOnly) { + return GenericHandles[H]->findSymbolIn(Name, ExportedSymbolsOnly); + } + +private: + + template + unsigned createHandle(LayerT &Layer, + typename LayerT::ModuleSetHandleT Handle) { + unsigned NewHandle; + if (!FreeHandleIndexes.empty()) { + NewHandle = FreeHandleIndexes.back(); + FreeHandleIndexes.pop_back(); + GenericHandles[NewHandle] = createGenericHandle(Layer, std::move(Handle)); + return NewHandle; + } else { + NewHandle = GenericHandles.size(); + GenericHandles.push_back(createGenericHandle(Layer, std::move(Handle))); + } + return NewHandle; + } + + DataLayout DL; + SectionMemoryManager CCMgrMemMgr; + + std::unique_ptr CCMgr; + ObjLayerT ObjectLayer; + CompileLayerT CompileLayer; + CODLayerT CODLayer; + + std::unique_ptr IndirectStubsMgr; + + std::vector> GenericHandles; + std::vector FreeHandleIndexes; + + orc::LocalCXXRuntimeOverrides CXXRuntimeOverrides; + std::vector> IRStaticDestructorRunners; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_EXECUTIONENGINE_ORC_ORCCBINDINGSSTACK_H diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h index 7dc5164c419a..38a27cff5b2f 100644 --- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h +++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h @@ -137,25 +137,26 @@ public: } OrcMCJITReplacement( - std::shared_ptr MemMgr, - std::shared_ptr ClientResolver, - std::unique_ptr TM) - : TM(std::move(TM)), MemMgr(*this, std::move(MemMgr)), - Resolver(*this), ClientResolver(std::move(ClientResolver)), - NotifyObjectLoaded(*this), NotifyFinalized(*this), + std::shared_ptr MemMgr, + std::shared_ptr ClientResolver, + std::unique_ptr TM) + : ExecutionEngine(TM->createDataLayout()), TM(std::move(TM)), + MemMgr(*this, std::move(MemMgr)), Resolver(*this), + ClientResolver(std::move(ClientResolver)), NotifyObjectLoaded(*this), + NotifyFinalized(*this), ObjectLayer(NotifyObjectLoaded, NotifyFinalized), CompileLayer(ObjectLayer, SimpleCompiler(*this->TM)), - LazyEmitLayer(CompileLayer) { - setDataLayout(this->TM->getDataLayout()); - } + LazyEmitLayer(CompileLayer) {} void addModule(std::unique_ptr M) override { // If this module doesn't have a DataLayout attached then attach the // default. - if (M->getDataLayout().isDefault()) - M->setDataLayout(*getDataLayout()); - + if (M->getDataLayout().isDefault()) { + M->setDataLayout(getDataLayout()); + } else { + assert(M->getDataLayout() == getDataLayout() && "DataLayout Mismatch"); + } Modules.push_back(std::move(M)); std::vector Ms; Ms.push_back(&*Modules.back()); @@ -174,12 +175,7 @@ public: std::tie(Obj, Buf) = O.takeBinary(); std::vector> Objs; Objs.push_back(std::move(Obj)); - auto H = - ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver); - - std::vector> Bufs; - Bufs.push_back(std::move(Buf)); - ObjectLayer.takeOwnershipOfBuffers(H, std::move(Bufs)); + ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver); } void addArchive(object::OwningBinary A) override { @@ -234,6 +230,10 @@ public: CompileLayer.setObjectCache(NewCache); } + void setProcessAllSections(bool ProcessAllSections) override { + ObjectLayer.setProcessAllSections(ProcessAllSections); + } + private: RuntimeDyld::SymbolInfo findMangledSymbol(StringRef Name) { @@ -252,10 +252,12 @@ private: object::Archive *A = OB.getBinary(); // Look for our symbols in each Archive object::Archive::child_iterator ChildIt = A->findSym(Name); + if (std::error_code EC = ChildIt->getError()) + report_fatal_error(EC.message()); if (ChildIt != A->child_end()) { // FIXME: Support nested archives? ErrorOr> ChildBinOrErr = - ChildIt->getAsBinary(); + (*ChildIt)->getAsBinary(); if (ChildBinOrErr.getError()) continue; std::unique_ptr &ChildBin = ChildBinOrErr.get(); @@ -289,7 +291,7 @@ private: "Incorrect number of Infos for Objects."); for (unsigned I = 0; I < Objects.size(); ++I) M.MemMgr.notifyObjectLoaded(&M, *Objects[I]); - }; + } private: OrcMCJITReplacement &M; @@ -310,7 +312,7 @@ private: std::string MangledName; { raw_string_ostream MangledNameStream(MangledName); - Mang.getNameWithPrefix(MangledNameStream, Name, *TM->getDataLayout()); + Mang.getNameWithPrefix(MangledNameStream, Name, getDataLayout()); } return MangledName; } diff --git a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp index 258868aa64f6..b931f10b9d78 100644 --- a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp +++ b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp @@ -1,137 +1,170 @@ +//===------- OrcTargetSupport.cpp - Target support utilities for Orc ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + #include "llvm/ADT/Triple.h" #include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h" +#include "llvm/Support/Process.h" #include -using namespace llvm::orc; - -namespace { - -uint64_t executeCompileCallback(JITCompileCallbackManagerBase *JCBM, - TargetAddress CallbackID) { - return JCBM->executeCompileCallback(CallbackID); -} - -} - namespace llvm { namespace orc { -const char* OrcX86_64::ResolverBlockName = "orc_resolver_block"; +void OrcX86_64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn, + void *CallbackMgr) { -void OrcX86_64::insertResolverBlock( - Module &M, JITCompileCallbackManagerBase &JCBM) { + const uint8_t ResolverCode[] = { + // resolver_entry: + 0x55, // 0x00: pushq %rbp + 0x48, 0x89, 0xe5, // 0x01: movq %rsp, %rbp + 0x50, // 0x04: pushq %rax + 0x53, // 0x05: pushq %rbx + 0x51, // 0x06: pushq %rcx + 0x52, // 0x07: pushq %rdx + 0x56, // 0x08: pushq %rsi + 0x57, // 0x09: pushq %rdi + 0x41, 0x50, // 0x0a: pushq %r8 + 0x41, 0x51, // 0x0c: pushq %r9 + 0x41, 0x52, // 0x0e: pushq %r10 + 0x41, 0x53, // 0x10: pushq %r11 + 0x41, 0x54, // 0x12: pushq %r12 + 0x41, 0x55, // 0x14: pushq %r13 + 0x41, 0x56, // 0x16: pushq %r14 + 0x41, 0x57, // 0x18: pushq %r15 + 0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq 20, %rsp + 0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp) + 0x48, 0x8d, 0x3d, 0x43, 0x00, 0x00, 0x00, // 0x26: leaq 67(%rip), %rdi + 0x48, 0x8b, 0x3f, // 0x2d: movq (%rdi), %rdi + 0x48, 0x8b, 0x75, 0x08, // 0x30: movq 8(%rbp), %rsi + 0x48, 0x83, 0xee, 0x06, // 0x34: subq $6, %rsi + 0x48, 0xb8, // 0x38: movabsq $0, %rax - // Trampoline code-sequence length, used to get trampoline address from return - // address. - const unsigned X86_64_TrampolineLength = 6; + // 0x3a: JIT re-entry fn addr: + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - // List of x86-64 GPRs to save. Note - RBP saved separately below. - std::array GPRs = {{ - "rax", "rbx", "rcx", "rdx", - "rsi", "rdi", "r8", "r9", - "r10", "r11", "r12", "r13", - "r14", "r15" - }}; + 0xff, 0xd0, // 0x42: callq *%rax + 0x48, 0x89, 0x45, 0x08, // 0x44: movq %rax, 8(%rbp) + 0x48, 0x0f, 0xae, 0x0c, 0x24, // 0x48: fxrstor64 (%rsp) + 0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x4d: addq 20, %rsp + 0x41, 0x5f, // 0x54: popq %r15 + 0x41, 0x5e, // 0x56: popq %r14 + 0x41, 0x5d, // 0x58: popq %r13 + 0x41, 0x5c, // 0x5a: popq %r12 + 0x41, 0x5b, // 0x5c: popq %r11 + 0x41, 0x5a, // 0x5e: popq %r10 + 0x41, 0x59, // 0x60: popq %r9 + 0x41, 0x58, // 0x62: popq %r8 + 0x5f, // 0x64: popq %rdi + 0x5e, // 0x65: popq %rsi + 0x5a, // 0x66: popq %rdx + 0x59, // 0x67: popq %rcx + 0x5b, // 0x68: popq %rbx + 0x58, // 0x69: popq %rax + 0x5d, // 0x6a: popq %rbp + 0xc3, // 0x6b: retq + 0x00, 0x00, 0x00, 0x00, // 0x6c: - // Address of the executeCompileCallback function. - uint64_t CallbackAddr = - static_cast( - reinterpret_cast(executeCompileCallback)); - - std::ostringstream AsmStream; - Triple TT(M.getTargetTriple()); - - // Switch to text section. - if (TT.getOS() == Triple::Darwin) - AsmStream << ".section __TEXT,__text,regular,pure_instructions\n" - << ".align 4, 0x90\n"; - else - AsmStream << ".text\n" - << ".align 16, 0x90\n"; - - // Bake in a pointer to the callback manager immediately before the - // start of the resolver function. - AsmStream << "jit_callback_manager_addr:\n" - << " .quad " << &JCBM << "\n"; - - // Start the resolver function. - AsmStream << ResolverBlockName << ":\n" - << " pushq %rbp\n" - << " movq %rsp, %rbp\n"; - - // Store the GPRs. - for (const auto &GPR : GPRs) - AsmStream << " pushq %" << GPR << "\n"; - - // Store floating-point state with FXSAVE. - // Note: We need to keep the stack 16-byte aligned, so if we've emitted an odd - // number of 64-bit pushes so far (GPRs.size() plus 1 for RBP) then add - // an extra 64 bits of padding to the FXSave area. - unsigned Padding = (GPRs.size() + 1) % 2 ? 8 : 0; - unsigned FXSaveSize = 512 + Padding; - AsmStream << " subq $" << FXSaveSize << ", %rsp\n" - << " fxsave64 (%rsp)\n" - - // Load callback manager address, compute trampoline address, call JIT. - << " lea jit_callback_manager_addr(%rip), %rdi\n" - << " movq (%rdi), %rdi\n" - << " movq 0x8(%rbp), %rsi\n" - << " subq $" << X86_64_TrampolineLength << ", %rsi\n" - << " movabsq $" << CallbackAddr << ", %rax\n" - << " callq *%rax\n" - - // Replace the return to the trampoline with the return address of the - // compiled function body. - << " movq %rax, 0x8(%rbp)\n" - - // Restore the floating point state. - << " fxrstor64 (%rsp)\n" - << " addq $" << FXSaveSize << ", %rsp\n"; - - for (const auto &GPR : make_range(GPRs.rbegin(), GPRs.rend())) - AsmStream << " popq %" << GPR << "\n"; - - // Restore original RBP and return to compiled function body. - AsmStream << " popq %rbp\n" - << " retq\n"; - - M.appendModuleInlineAsm(AsmStream.str()); -} - -OrcX86_64::LabelNameFtor -OrcX86_64::insertCompileCallbackTrampolines(Module &M, - TargetAddress ResolverBlockAddr, - unsigned NumCalls, - unsigned StartIndex) { - const char *ResolverBlockPtrName = "Lorc_resolve_block_addr"; - - std::ostringstream AsmStream; - Triple TT(M.getTargetTriple()); - - if (TT.getOS() == Triple::Darwin) - AsmStream << ".section __TEXT,__text,regular,pure_instructions\n" - << ".align 4, 0x90\n"; - else - AsmStream << ".text\n" - << ".align 16, 0x90\n"; - - AsmStream << ResolverBlockPtrName << ":\n" - << " .quad " << ResolverBlockAddr << "\n"; - - auto GetLabelName = - [=](unsigned I) { - std::ostringstream LabelStream; - LabelStream << "orc_jcc_" << (StartIndex + I); - return LabelStream.str(); + // 0x70: Callback mgr address. + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; - for (unsigned I = 0; I < NumCalls; ++I) - AsmStream << GetLabelName(I) << ":\n" - << " callq *" << ResolverBlockPtrName << "(%rip)\n"; + const unsigned ReentryFnAddrOffset = 0x3a; + const unsigned CallbackMgrAddrOffset = 0x70; + + memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode)); + memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn)); + memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr, + sizeof(CallbackMgr)); +} - M.appendModuleInlineAsm(AsmStream.str()); +void OrcX86_64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr, + unsigned NumTrampolines) { - return GetLabelName; + unsigned OffsetToPtr = NumTrampolines * TrampolineSize; + + memcpy(TrampolineMem + OffsetToPtr, &ResolverAddr, sizeof(void*)); + + uint64_t *Trampolines = reinterpret_cast(TrampolineMem); + uint64_t CallIndirPCRel = 0xf1c40000000015ff; + + for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) + Trampolines[I] = CallIndirPCRel | ((OffsetToPtr - 6) << 16); +} + +std::error_code OrcX86_64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, + unsigned MinStubs, + void *InitialPtrVal) { + // Stub format is: + // + // .section __orc_stubs + // stub1: + // jmpq *ptr1(%rip) + // .byte 0xC4 ; <- Invalid opcode padding. + // .byte 0xF1 + // stub2: + // jmpq *ptr2(%rip) + // + // ... + // + // .section __orc_ptrs + // ptr1: + // .quad 0x0 + // ptr2: + // .quad 0x0 + // + // ... + + const unsigned StubSize = IndirectStubsInfo::StubSize; + + // Emit at least MinStubs, rounded up to fill the pages allocated. + unsigned PageSize = sys::Process::getPageSize(); + unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize; + unsigned NumStubs = (NumPages * PageSize) / StubSize; + + // Allocate memory for stubs and pointers in one call. + std::error_code EC; + auto StubsMem = + sys::OwningMemoryBlock( + sys::Memory::allocateMappedMemory(2 * NumPages * PageSize, nullptr, + sys::Memory::MF_READ | + sys::Memory::MF_WRITE, + EC)); + + if (EC) + return EC; + + // Create separate MemoryBlocks representing the stubs and pointers. + sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize); + sys::MemoryBlock PtrsBlock(static_cast(StubsMem.base()) + + NumPages * PageSize, + NumPages * PageSize); + + // Populate the stubs page stubs and mark it executable. + uint64_t *Stub = reinterpret_cast(StubsBlock.base()); + uint64_t PtrOffsetField = + static_cast(NumPages * PageSize - 6) << 16; + for (unsigned I = 0; I < NumStubs; ++I) + Stub[I] = 0xF1C40000000025ff | PtrOffsetField; + + if (auto EC = sys::Memory::protectMappedMemory(StubsBlock, + sys::Memory::MF_READ | + sys::Memory::MF_EXEC)) + return EC; + + // Initialize all pointers to point at FailureAddress. + void **Ptr = reinterpret_cast(PtrsBlock.base()); + for (unsigned I = 0; I < NumStubs; ++I) + Ptr[I] = InitialPtrVal; + + StubsInfo.NumStubs = NumStubs; + StubsInfo.StubsMem = std::move(StubsMem); + + return std::error_code(); } } // End namespace orc. diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 93287a3a4e71..a95f3bbe4179 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -41,20 +41,21 @@ void RuntimeDyldImpl::deregisterEHFrames() {} #ifndef NDEBUG static void dumpSectionMemory(const SectionEntry &S, StringRef State) { - dbgs() << "----- Contents of section " << S.Name << " " << State << " -----"; + dbgs() << "----- Contents of section " << S.getName() << " " << State + << " -----"; - if (S.Address == nullptr) { + if (S.getAddress() == nullptr) { dbgs() << "\n
\n"; return; } const unsigned ColsPerRow = 16; - uint8_t *DataAddr = S.Address; - uint64_t LoadAddr = S.LoadAddress; + uint8_t *DataAddr = S.getAddress(); + uint64_t LoadAddr = S.getLoadAddress(); unsigned StartPadding = LoadAddr & (ColsPerRow - 1); - unsigned BytesRemaining = S.Size; + unsigned BytesRemaining = S.getSize(); if (StartPadding) { dbgs() << "\n" << format("0x%016" PRIx64, @@ -82,30 +83,41 @@ static void dumpSectionMemory(const SectionEntry &S, StringRef State) { void RuntimeDyldImpl::resolveRelocations() { MutexGuard locked(lock); + // Print out the sections prior to relocation. + DEBUG( + for (int i = 0, e = Sections.size(); i != e; ++i) + dumpSectionMemory(Sections[i], "before relocations"); + ); + // First, resolve relocations associated with external symbols. resolveExternalSymbols(); - // Just iterate over the sections we have and resolve all the relocations - // in them. Gross overkill, but it gets the job done. - for (int i = 0, e = Sections.size(); i != e; ++i) { + // Iterate over all outstanding relocations + for (auto it = Relocations.begin(), e = Relocations.end(); it != e; ++it) { // The Section here (Sections[i]) refers to the section in which the // symbol for the relocation is located. The SectionID in the relocation // entry provides the section to which the relocation will be applied. - uint64_t Addr = Sections[i].LoadAddress; - DEBUG(dbgs() << "Resolving relocations Section #" << i << "\t" + int Idx = it->first; + uint64_t Addr = Sections[Idx].getLoadAddress(); + DEBUG(dbgs() << "Resolving relocations Section #" << Idx << "\t" << format("%p", (uintptr_t)Addr) << "\n"); - DEBUG(dumpSectionMemory(Sections[i], "before relocations")); - resolveRelocationList(Relocations[i], Addr); - DEBUG(dumpSectionMemory(Sections[i], "after relocations")); - Relocations.erase(i); + resolveRelocationList(it->second, Addr); } + Relocations.clear(); + + // Print out sections after relocation. + DEBUG( + for (int i = 0, e = Sections.size(); i != e; ++i) + dumpSectionMemory(Sections[i], "after relocations"); + ); + } void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress, uint64_t TargetAddress) { MutexGuard locked(lock); for (unsigned i = 0, e = Sections.size(); i != e; ++i) { - if (Sections[i].Address == LocalAddress) { + if (Sections[i].getAddress() == LocalAddress) { reassignSectionAddress(i, TargetAddress); return; } @@ -122,14 +134,10 @@ static std::error_code getOffset(const SymbolRef &Sym, SectionRef Sec, return std::error_code(); } -std::pair +RuntimeDyldImpl::ObjSectionToIDMap RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { MutexGuard locked(lock); - // Grab the first Section ID. We'll use this later to construct the underlying - // range for the returned LoadedObjectInfo. - unsigned SectionsAddedBeginIdx = Sections.size(); - // Save information about our target Arch = (Triple::ArchType)Obj.getArch(); IsTargetLittleEndian = Obj.isLittleEndian(); @@ -155,39 +163,56 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { ++I) { uint32_t Flags = I->getFlags(); - bool IsCommon = Flags & SymbolRef::SF_Common; - if (IsCommon) + if (Flags & SymbolRef::SF_Common) CommonSymbols.push_back(*I); else { object::SymbolRef::Type SymType = I->getType(); - if (SymType == object::SymbolRef::ST_Function || - SymType == object::SymbolRef::ST_Data || - SymType == object::SymbolRef::ST_Unknown) { + // Get symbol name. + ErrorOr NameOrErr = I->getName(); + Check(NameOrErr.getError()); + StringRef Name = *NameOrErr; + + // Compute JIT symbol flags. + JITSymbolFlags RTDyldSymFlags = JITSymbolFlags::None; + if (Flags & SymbolRef::SF_Weak) + RTDyldSymFlags |= JITSymbolFlags::Weak; + if (Flags & SymbolRef::SF_Exported) + RTDyldSymFlags |= JITSymbolFlags::Exported; - ErrorOr NameOrErr = I->getName(); - Check(NameOrErr.getError()); - StringRef Name = *NameOrErr; - section_iterator SI = Obj.section_end(); - Check(I->getSection(SI)); + if (Flags & SymbolRef::SF_Absolute && + SymType != object::SymbolRef::ST_File) { + auto Addr = I->getAddress(); + Check(Addr.getError()); + uint64_t SectOffset = *Addr; + unsigned SectionID = AbsoluteSymbolSection; + + DEBUG(dbgs() << "\tType: " << SymType << " (absolute) Name: " << Name + << " SID: " << SectionID << " Offset: " + << format("%p", (uintptr_t)SectOffset) + << " flags: " << Flags << "\n"); + GlobalSymbolTable[Name] = + SymbolTableEntry(SectionID, SectOffset, RTDyldSymFlags); + } else if (SymType == object::SymbolRef::ST_Function || + SymType == object::SymbolRef::ST_Data || + SymType == object::SymbolRef::ST_Unknown || + SymType == object::SymbolRef::ST_Other) { + + ErrorOr SIOrErr = I->getSection(); + Check(SIOrErr.getError()); + section_iterator SI = *SIOrErr; if (SI == Obj.section_end()) continue; + // Get symbol offset. uint64_t SectOffset; Check(getOffset(*I, *SI, SectOffset)); - StringRef SectionData; - Check(SI->getContents(SectionData)); bool IsCode = SI->isText(); - unsigned SectionID = - findOrEmitSection(Obj, *SI, IsCode, LocalSections); + unsigned SectionID = findOrEmitSection(Obj, *SI, IsCode, LocalSections); + DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name << " SID: " << SectionID << " Offset: " << format("%p", (uintptr_t)SectOffset) << " flags: " << Flags << "\n"); - JITSymbolFlags RTDyldSymFlags = JITSymbolFlags::None; - if (Flags & SymbolRef::SF_Weak) - RTDyldSymFlags |= JITSymbolFlags::Weak; - if (Flags & SymbolRef::SF_Exported) - RTDyldSymFlags |= JITSymbolFlags::Exported; GlobalSymbolTable[Name] = SymbolTableEntry(SectionID, SectOffset, RTDyldSymFlags); } @@ -231,9 +256,10 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { // Give the subclasses a chance to tie-up any loose ends. finalizeLoad(Obj, LocalSections); - unsigned SectionsAddedEndIdx = Sections.size(); +// for (auto E : LocalSections) +// llvm::dbgs() << "Added: " << E.first.getRawDataRefImpl() << " -> " << E.second << "\n"; - return std::make_pair(SectionsAddedBeginIdx, SectionsAddedEndIdx); + return LocalSections; } // A helper method for computeTotalAllocSize. @@ -406,10 +432,9 @@ unsigned RuntimeDyldImpl::computeSectionStubBufSize(const ObjectFile &Obj, if (!(RelSecI == Section)) continue; - for (const RelocationRef &Reloc : SI->relocations()) { - (void)Reloc; - StubBufSize += StubSize; - } + for (const RelocationRef &Reloc : SI->relocations()) + if (relocationNeedsStub(Reloc)) + StubBufSize += StubSize; } // Get section data size and alignment @@ -492,7 +517,8 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj, if (!Addr) report_fatal_error("Unable to allocate memory for common symbols!"); uint64_t Offset = 0; - Sections.push_back(SectionEntry("", Addr, CommonSize, 0)); + Sections.push_back( + SectionEntry("", Addr, CommonSize, CommonSize, 0)); memset(Addr, 0, CommonSize); DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID << " new addr: " @@ -524,6 +550,9 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj, Offset += Size; Addr += Size; } + + if (Checker) + Checker->registerSection(Obj.getFileName(), SectionID); } unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj, @@ -556,12 +585,20 @@ unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj, uint8_t *Addr; const char *pData = nullptr; - // In either case, set the location of the unrelocated section in memory, - // since we still process relocations for it even if we're not applying them. - Check(Section.getContents(data)); - // Virtual sections have no data in the object image, so leave pData = 0 - if (!IsVirtual) + // If this section contains any bits (i.e. isn't a virtual or bss section), + // grab a reference to them. + if (!IsVirtual && !IsZeroInit) { + // In either case, set the location of the unrelocated section in memory, + // since we still process relocations for it even if we're not applying them. + Check(Section.getContents(data)); pData = data.data(); + } + + // Code section alignment needs to be at least as high as stub alignment or + // padding calculations may by incorrect when the section is remapped to a + // higher alignment. + if (IsCode) + Alignment = std::max(Alignment, getStubAlignment()); // Some sections, such as debug info, don't need to be loaded for execution. // Leave those where they are. @@ -606,7 +643,8 @@ unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj, << " Allocate: " << Allocate << "\n"); } - Sections.push_back(SectionEntry(Name, Addr, DataSize, (uintptr_t)pData)); + Sections.push_back( + SectionEntry(Name, Addr, DataSize, Allocate, (uintptr_t)pData)); if (Checker) Checker->registerSection(Obj.getFileName(), SectionID); @@ -742,11 +780,11 @@ void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID, // Addr is a uint64_t because we can't assume the pointer width // of the target is the same as that of the host. Just use a generic // "big enough" type. - DEBUG(dbgs() << "Reassigning address for section " - << SectionID << " (" << Sections[SectionID].Name << "): " - << format("0x%016" PRIx64, Sections[SectionID].LoadAddress) << " -> " - << format("0x%016" PRIx64, Addr) << "\n"); - Sections[SectionID].LoadAddress = Addr; + DEBUG(dbgs() << "Reassigning address for section " << SectionID << " (" + << Sections[SectionID].getName() << "): " + << format("0x%016" PRIx64, Sections[SectionID].getLoadAddress()) + << " -> " << format("0x%016" PRIx64, Addr) << "\n"); + Sections[SectionID].setLoadAddress(Addr); } void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs, @@ -754,7 +792,7 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs, for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { const RelocationEntry &RE = Relocs[i]; // Ignore relocations for sections that were not loaded - if (Sections[RE.SectionID].Address == nullptr) + if (Sections[RE.SectionID].getAddress() == nullptr) continue; resolveRelocation(RE, Value); } @@ -818,10 +856,11 @@ void RuntimeDyldImpl::resolveExternalSymbols() { // RuntimeDyld class implementation uint64_t RuntimeDyld::LoadedObjectInfo::getSectionLoadAddress( - StringRef SectionName) const { - for (unsigned I = BeginIdx; I != EndIdx; ++I) - if (RTDyld.Sections[I].Name == SectionName) - return RTDyld.Sections[I].LoadAddress; + const object::SectionRef &Sec) const { + + auto I = ObjSecToIDMap.find(Sec); + if (I != ObjSecToIDMap.end()) + return RTDyld.Sections[I->second].getLoadAddress(); return 0; } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp index 1dacc1393f2c..e5fab929ea29 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "RuntimeDyldCOFF.h" +#include "Targets/RuntimeDyldCOFFI386.h" #include "Targets/RuntimeDyldCOFFX86_64.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" @@ -24,12 +25,11 @@ using namespace llvm::object; namespace { -class LoadedCOFFObjectInfo +class LoadedCOFFObjectInfo final : public RuntimeDyld::LoadedObjectInfoHelper { public: - LoadedCOFFObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx, - unsigned EndIdx) - : LoadedObjectInfoHelper(RTDyld, BeginIdx, EndIdx) {} + LoadedCOFFObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap) + : LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {} OwningBinary getObjectForDebug(const ObjectFile &Obj) const override { @@ -48,6 +48,8 @@ llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch, default: llvm_unreachable("Unsupported target for RuntimeDyldCOFF."); break; + case Triple::x86: + return make_unique(MemMgr, Resolver); case Triple::x86_64: return make_unique(MemMgr, Resolver); } @@ -55,10 +57,7 @@ llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch, std::unique_ptr RuntimeDyldCOFF::loadObject(const object::ObjectFile &O) { - unsigned SectionStartIdx, SectionEndIdx; - std::tie(SectionStartIdx, SectionEndIdx) = loadObjectImpl(O); - return llvm::make_unique(*this, SectionStartIdx, - SectionEndIdx); + return llvm::make_unique(*this, loadObjectImpl(O)); } uint64_t RuntimeDyldCOFF::getSymbolOffset(const SymbolRef &Sym) { diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index ae199b720223..58ce88a68f23 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -727,7 +727,7 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix, } bool RuntimeDyldCheckerImpl::isSymbolValid(StringRef Symbol) const { - if (getRTDyld().getSymbolLocalAddress(Symbol)) + if (getRTDyld().getSymbol(Symbol)) return true; return !!getRTDyld().Resolver.findSymbol(Symbol); } @@ -799,11 +799,10 @@ std::pair RuntimeDyldCheckerImpl::getSectionAddr( unsigned SectionID = SectionInfo->SectionID; uint64_t Addr; if (IsInsideLoad) - Addr = - static_cast( - reinterpret_cast(getRTDyld().Sections[SectionID].Address)); + Addr = static_cast(reinterpret_cast( + getRTDyld().Sections[SectionID].getAddress())); else - Addr = getRTDyld().Sections[SectionID].LoadAddress; + Addr = getRTDyld().Sections[SectionID].getLoadAddress(); return std::make_pair(Addr, std::string("")); } @@ -835,11 +834,11 @@ std::pair RuntimeDyldCheckerImpl::getStubAddrFor( uint64_t Addr; if (IsInsideLoad) { - uintptr_t SectionBase = - reinterpret_cast(getRTDyld().Sections[SectionID].Address); + uintptr_t SectionBase = reinterpret_cast( + getRTDyld().Sections[SectionID].getAddress()); Addr = static_cast(SectionBase) + StubOffset; } else { - uint64_t SectionBase = getRTDyld().Sections[SectionID].LoadAddress; + uint64_t SectionBase = getRTDyld().Sections[SectionID].getLoadAddress(); Addr = SectionBase + StubOffset; } @@ -855,16 +854,16 @@ RuntimeDyldCheckerImpl::getSubsectionStartingAt(StringRef Name) const { const auto &SymInfo = pos->second; uint8_t *SectionAddr = getRTDyld().getSectionAddress(SymInfo.getSectionID()); return StringRef(reinterpret_cast(SectionAddr) + - SymInfo.getOffset(), - getRTDyld().Sections[SymInfo.getSectionID()].Size - - SymInfo.getOffset()); + SymInfo.getOffset(), + getRTDyld().Sections[SymInfo.getSectionID()].getSize() - + SymInfo.getOffset()); } void RuntimeDyldCheckerImpl::registerSection( StringRef FilePath, unsigned SectionID) { StringRef FileName = sys::path::filename(FilePath); const SectionEntry &Section = getRTDyld().Sections[SectionID]; - StringRef SectionName = Section.Name; + StringRef SectionName = Section.getName(); Stubs[FileName][SectionName].SectionID = SectionID; } @@ -874,7 +873,7 @@ void RuntimeDyldCheckerImpl::registerStubMap( const RuntimeDyldImpl::StubMap &RTDyldStubs) { StringRef FileName = sys::path::filename(FilePath); const SectionEntry &Section = getRTDyld().Sections[SectionID]; - StringRef SectionName = Section.Name; + StringRef SectionName = Section.getName(); Stubs[FileName][SectionName].SectionID = SectionID; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 3787950b3b08..e09b71af18a5 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -66,7 +66,6 @@ public: static inline bool classof(const ELFObjectFile *v) { return v->isDyldType(); } - }; @@ -104,12 +103,11 @@ void DyldELFObject::updateSymbolAddress(const SymbolRef &SymRef, sym->st_value = static_cast(Addr); } -class LoadedELFObjectInfo +class LoadedELFObjectInfo final : public RuntimeDyld::LoadedObjectInfoHelper { public: - LoadedELFObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx, - unsigned EndIdx) - : LoadedObjectInfoHelper(RTDyld, BeginIdx, EndIdx) {} + LoadedELFObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap) + : LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {} OwningBinary getObjectForDebug(const ObjectFile &Obj) const override; @@ -118,6 +116,7 @@ public: template std::unique_ptr> createRTDyldELFObject(MemoryBufferRef Buffer, + const ObjectFile &SourceObject, const LoadedELFObjectInfo &L, std::error_code &ec) { typedef typename ELFFile::Elf_Shdr Elf_Shdr; @@ -127,6 +126,7 @@ createRTDyldELFObject(MemoryBufferRef Buffer, llvm::make_unique>(Buffer, ec); // Iterate over all sections in the object. + auto SI = SourceObject.section_begin(); for (const auto &Sec : Obj->sections()) { StringRef SectionName; Sec.getName(SectionName); @@ -135,12 +135,13 @@ createRTDyldELFObject(MemoryBufferRef Buffer, Elf_Shdr *shdr = const_cast( reinterpret_cast(ShdrRef.p)); - if (uint64_t SecLoadAddr = L.getSectionLoadAddress(SectionName)) { + if (uint64_t SecLoadAddr = L.getSectionLoadAddress(*SI)) { // This assumes that the address passed in matches the target address // bitness. The template-based type cast handles everything else. shdr->sh_addr = static_cast(SecLoadAddr); } } + ++SI; } return Obj; @@ -158,16 +159,20 @@ OwningBinary createELFDebugObject(const ObjectFile &Obj, std::unique_ptr DebugObj; if (Obj.getBytesInAddress() == 4 && Obj.isLittleEndian()) { typedef ELFType ELF32LE; - DebugObj = createRTDyldELFObject(Buffer->getMemBufferRef(), L, ec); + DebugObj = createRTDyldELFObject(Buffer->getMemBufferRef(), Obj, L, + ec); } else if (Obj.getBytesInAddress() == 4 && !Obj.isLittleEndian()) { typedef ELFType ELF32BE; - DebugObj = createRTDyldELFObject(Buffer->getMemBufferRef(), L, ec); + DebugObj = createRTDyldELFObject(Buffer->getMemBufferRef(), Obj, L, + ec); } else if (Obj.getBytesInAddress() == 8 && !Obj.isLittleEndian()) { typedef ELFType ELF64BE; - DebugObj = createRTDyldELFObject(Buffer->getMemBufferRef(), L, ec); + DebugObj = createRTDyldELFObject(Buffer->getMemBufferRef(), Obj, L, + ec); } else if (Obj.getBytesInAddress() == 8 && Obj.isLittleEndian()) { typedef ELFType ELF64LE; - DebugObj = createRTDyldELFObject(Buffer->getMemBufferRef(), L, ec); + DebugObj = createRTDyldELFObject(Buffer->getMemBufferRef(), Obj, L, + ec); } else llvm_unreachable("Unexpected ELF format"); @@ -181,7 +186,7 @@ LoadedELFObjectInfo::getObjectForDebug(const ObjectFile &Obj) const { return createELFDebugObject(Obj, *this); } -} // namespace +} // anonymous namespace namespace llvm { @@ -193,9 +198,9 @@ RuntimeDyldELF::~RuntimeDyldELF() {} void RuntimeDyldELF::registerEHFrames() { for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) { SID EHFrameSID = UnregisteredEHFrameSections[i]; - uint8_t *EHFrameAddr = Sections[EHFrameSID].Address; - uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress; - size_t EHFrameSize = Sections[EHFrameSID].Size; + uint8_t *EHFrameAddr = Sections[EHFrameSID].getAddress(); + uint64_t EHFrameLoadAddr = Sections[EHFrameSID].getLoadAddress(); + size_t EHFrameSize = Sections[EHFrameSID].getSize(); MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); RegisteredEHFrameSections.push_back(EHFrameSID); } @@ -205,9 +210,9 @@ void RuntimeDyldELF::registerEHFrames() { void RuntimeDyldELF::deregisterEHFrames() { for (int i = 0, e = RegisteredEHFrameSections.size(); i != e; ++i) { SID EHFrameSID = RegisteredEHFrameSections[i]; - uint8_t *EHFrameAddr = Sections[EHFrameSID].Address; - uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress; - size_t EHFrameSize = Sections[EHFrameSID].Size; + uint8_t *EHFrameAddr = Sections[EHFrameSID].getAddress(); + uint64_t EHFrameLoadAddr = Sections[EHFrameSID].getLoadAddress(); + size_t EHFrameSize = Sections[EHFrameSID].getSize(); MemMgr.deregisterEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); } RegisteredEHFrameSections.clear(); @@ -215,10 +220,7 @@ void RuntimeDyldELF::deregisterEHFrames() { std::unique_ptr RuntimeDyldELF::loadObject(const object::ObjectFile &O) { - unsigned SectionStartIdx, SectionEndIdx; - std::tie(SectionStartIdx, SectionEndIdx) = loadObjectImpl(O); - return llvm::make_unique(*this, SectionStartIdx, - SectionEndIdx); + return llvm::make_unique(*this, loadObjectImpl(O)); } void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section, @@ -230,9 +232,10 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section, llvm_unreachable("Relocation type not implemented yet!"); break; case ELF::R_X86_64_64: { - support::ulittle64_t::ref(Section.Address + Offset) = Value + Addend; + support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) = + Value + Addend; DEBUG(dbgs() << "Writing " << format("%p", (Value + Addend)) << " at " - << format("%p\n", Section.Address + Offset)); + << format("%p\n", Section.getAddressWithOffset(Offset))); break; } case ELF::R_X86_64_32: @@ -242,23 +245,34 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section, (Type == ELF::R_X86_64_32S && ((int64_t)Value <= INT32_MAX && (int64_t)Value >= INT32_MIN))); uint32_t TruncatedAddr = (Value & 0xFFFFFFFF); - support::ulittle32_t::ref(Section.Address + Offset) = TruncatedAddr; + support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) = + TruncatedAddr; DEBUG(dbgs() << "Writing " << format("%p", TruncatedAddr) << " at " - << format("%p\n", Section.Address + Offset)); + << format("%p\n", Section.getAddressWithOffset(Offset))); + break; + } + case ELF::R_X86_64_PC8: { + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); + int64_t RealOffset = Value + Addend - FinalAddress; + assert(isInt<8>(RealOffset)); + int8_t TruncOffset = (RealOffset & 0xFF); + Section.getAddress()[Offset] = TruncOffset; break; } case ELF::R_X86_64_PC32: { - uint64_t FinalAddress = Section.LoadAddress + Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); int64_t RealOffset = Value + Addend - FinalAddress; assert(isInt<32>(RealOffset)); int32_t TruncOffset = (RealOffset & 0xFFFFFFFF); - support::ulittle32_t::ref(Section.Address + Offset) = TruncOffset; + support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) = + TruncOffset; break; } case ELF::R_X86_64_PC64: { - uint64_t FinalAddress = Section.LoadAddress + Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); int64_t RealOffset = Value + Addend - FinalAddress; - support::ulittle64_t::ref(Section.Address + Offset) = RealOffset; + support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) = + RealOffset; break; } } @@ -269,13 +283,16 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section, uint32_t Type, int32_t Addend) { switch (Type) { case ELF::R_386_32: { - support::ulittle32_t::ref(Section.Address + Offset) = Value + Addend; + support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) = + Value + Addend; break; } case ELF::R_386_PC32: { - uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF); + uint32_t FinalAddress = + Section.getLoadAddressWithOffset(Offset) & 0xFFFFFFFF; uint32_t RealOffset = Value + Addend - FinalAddress; - support::ulittle32_t::ref(Section.Address + Offset) = RealOffset; + support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) = + RealOffset; break; } default: @@ -289,11 +306,12 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section, void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { - uint32_t *TargetPtr = reinterpret_cast(Section.Address + Offset); - uint64_t FinalAddress = Section.LoadAddress + Offset; + uint32_t *TargetPtr = + reinterpret_cast(Section.getAddressWithOffset(Offset)); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); DEBUG(dbgs() << "resolveAArch64Relocation, LocalAddress: 0x" - << format("%llx", Section.Address + Offset) + << format("%llx", Section.getAddressWithOffset(Offset)) << " FinalAddress: 0x" << format("%llx", FinalAddress) << " Value: 0x" << format("%llx", Value) << " Type: 0x" << format("%x", Type) << " Addend: 0x" << format("%llx", Addend) @@ -305,7 +323,7 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section, break; case ELF::R_AARCH64_ABS64: { uint64_t *TargetPtr = - reinterpret_cast(Section.Address + Offset); + reinterpret_cast(Section.getAddressWithOffset(Offset)); *TargetPtr = Value + Addend; break; } @@ -428,12 +446,13 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend) { // TODO: Add Thumb relocations. - uint32_t *TargetPtr = (uint32_t *)(Section.Address + Offset); - uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF); + uint32_t *TargetPtr = + reinterpret_cast(Section.getAddressWithOffset(Offset)); + uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset) & 0xFFFFFFFF; Value += Addend; DEBUG(dbgs() << "resolveARMRelocation, LocalAddress: " - << Section.Address + Offset + << Section.getAddressWithOffset(Offset) << " FinalAddress: " << format("%p", FinalAddress) << " Value: " << format("%x", Value) << " Type: " << format("%x", Type) << " Addend: " << format("%x", Addend) << "\n"); @@ -477,13 +496,14 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section, uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend) { - uint8_t *TargetPtr = Section.Address + Offset; + uint8_t *TargetPtr = Section.getAddressWithOffset(Offset); Value += Addend; DEBUG(dbgs() << "resolveMIPSRelocation, LocalAddress: " - << Section.Address + Offset << " FinalAddress: " - << format("%p", Section.LoadAddress + Offset) << " Value: " - << format("%x", Value) << " Type: " << format("%x", Type) + << Section.getAddressWithOffset(Offset) << " FinalAddress: " + << format("%p", Section.getLoadAddressWithOffset(Offset)) + << " Value: " << format("%x", Value) + << " Type: " << format("%x", Type) << " Addend: " << format("%x", Addend) << "\n"); uint32_t Insn = readBytesUnaligned(TargetPtr, 4); @@ -512,47 +532,47 @@ void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section, writeBytesUnaligned(Insn, TargetPtr, 4); break; case ELF::R_MIPS_PC32: { - uint32_t FinalAddress = (Section.LoadAddress + Offset); + uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset); writeBytesUnaligned(Value - FinalAddress, (uint8_t *)TargetPtr, 4); break; } case ELF::R_MIPS_PC16: { - uint32_t FinalAddress = (Section.LoadAddress + Offset); + uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset); Insn &= 0xffff0000; Insn |= ((Value - FinalAddress) >> 2) & 0xffff; writeBytesUnaligned(Insn, TargetPtr, 4); break; } case ELF::R_MIPS_PC19_S2: { - uint32_t FinalAddress = (Section.LoadAddress + Offset); + uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset); Insn &= 0xfff80000; Insn |= ((Value - (FinalAddress & ~0x3)) >> 2) & 0x7ffff; writeBytesUnaligned(Insn, TargetPtr, 4); break; } case ELF::R_MIPS_PC21_S2: { - uint32_t FinalAddress = (Section.LoadAddress + Offset); + uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset); Insn &= 0xffe00000; Insn |= ((Value - FinalAddress) >> 2) & 0x1fffff; writeBytesUnaligned(Insn, TargetPtr, 4); break; } case ELF::R_MIPS_PC26_S2: { - uint32_t FinalAddress = (Section.LoadAddress + Offset); + uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset); Insn &= 0xfc000000; Insn |= ((Value - FinalAddress) >> 2) & 0x3ffffff; writeBytesUnaligned(Insn, TargetPtr, 4); break; } case ELF::R_MIPS_PCHI16: { - uint32_t FinalAddress = (Section.LoadAddress + Offset); + uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset); Insn &= 0xffff0000; Insn |= ((Value - FinalAddress + 0x8000) >> 16) & 0xffff; writeBytesUnaligned(Insn, TargetPtr, 4); break; } case ELF::R_MIPS_PCLO16: { - uint32_t FinalAddress = (Section.LoadAddress + Offset); + uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset); Insn &= 0xffff0000; Insn |= (Value - FinalAddress) & 0xffff; writeBytesUnaligned(Insn, TargetPtr, 4); @@ -603,7 +623,8 @@ void RuntimeDyldELF::resolveMIPS64Relocation(const SectionEntry &Section, CalculatedValue, SymOffset, SectionID); } - applyMIPS64Relocation(Section.Address + Offset, CalculatedValue, RelType); + applyMIPS64Relocation(Section.getAddressWithOffset(Offset), CalculatedValue, + RelType); } int64_t @@ -613,13 +634,12 @@ RuntimeDyldELF::evaluateMIPS64Relocation(const SectionEntry &Section, uint64_t SymOffset, SID SectionID) { DEBUG(dbgs() << "evaluateMIPS64Relocation, LocalAddress: 0x" - << format("%llx", Section.Address + Offset) + << format("%llx", Section.getAddressWithOffset(Offset)) << " FinalAddress: 0x" - << format("%llx", Section.LoadAddress + Offset) + << format("%llx", Section.getLoadAddressWithOffset(Offset)) << " Value: 0x" << format("%llx", Value) << " Type: 0x" << format("%x", Type) << " Addend: 0x" << format("%llx", Addend) - << " SymOffset: " << format("%x", SymOffset) - << "\n"); + << " SymOffset: " << format("%x", SymOffset) << "\n"); switch (Type) { default: @@ -672,35 +692,35 @@ RuntimeDyldELF::evaluateMIPS64Relocation(const SectionEntry &Section, return Value + Addend - (GOTAddr + 0x7ff0); } case ELF::R_MIPS_PC16: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); return ((Value + Addend - FinalAddress) >> 2) & 0xffff; } case ELF::R_MIPS_PC32: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); return Value + Addend - FinalAddress; } case ELF::R_MIPS_PC18_S3: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); - return ((Value + Addend - ((FinalAddress | 7) ^ 7)) >> 3) & 0x3ffff; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); + return ((Value + Addend - (FinalAddress & ~0x7)) >> 3) & 0x3ffff; } case ELF::R_MIPS_PC19_S2: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); - return ((Value + Addend - FinalAddress) >> 2) & 0x7ffff; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); + return ((Value + Addend - (FinalAddress & ~0x3)) >> 2) & 0x7ffff; } case ELF::R_MIPS_PC21_S2: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); return ((Value + Addend - FinalAddress) >> 2) & 0x1fffff; } case ELF::R_MIPS_PC26_S2: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); return ((Value + Addend - FinalAddress) >> 2) & 0x3ffffff; } case ELF::R_MIPS_PCHI16: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); return ((Value + Addend - FinalAddress + 0x8000) >> 16) & 0xffff; } case ELF::R_MIPS_PCLO16: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); return (Value + Addend - FinalAddress) & 0xffff; } } @@ -769,7 +789,7 @@ void RuntimeDyldELF::findPPC64TOCSection(const ELFObjectFileBase &Obj, // relocation) without a .toc directive. In this case just use the // first section (which is usually the .odp) since the code won't // reference the .toc base directly. - Rel.SymbolName = NULL; + Rel.SymbolName = nullptr; Rel.SectionID = 0; // The TOC consists of sections .got, .toc, .tocbss, .plt in that @@ -842,8 +862,9 @@ void RuntimeDyldELF::findOPDEntrySection(const ELFObjectFileBase &Obj, if (Rel.Addend != (int64_t)TargetSymbolOffset) continue; - section_iterator tsi(Obj.section_end()); - check(TargetSymbol->getSection(tsi)); + ErrorOr TSIOrErr = TargetSymbol->getSection(); + check(TSIOrErr.getError()); + section_iterator tsi = *TSIOrErr; bool IsCode = tsi->isText(); Rel.SectionID = findOrEmitSection(Obj, (*tsi), IsCode, LocalSections); Rel.Addend = (intptr_t)Addend; @@ -884,10 +905,30 @@ static inline uint16_t applyPPChighesta (uint64_t value) { return ((value + 0x8000) >> 48) & 0xffff; } +void RuntimeDyldELF::resolvePPC32Relocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, + uint32_t Type, int64_t Addend) { + uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); + switch (Type) { + default: + llvm_unreachable("Relocation type not implemented yet!"); + break; + case ELF::R_PPC_ADDR16_LO: + writeInt16BE(LocalAddress, applyPPClo(Value + Addend)); + break; + case ELF::R_PPC_ADDR16_HI: + writeInt16BE(LocalAddress, applyPPChi(Value + Addend)); + break; + case ELF::R_PPC_ADDR16_HA: + writeInt16BE(LocalAddress, applyPPCha(Value + Addend)); + break; + } +} + void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { - uint8_t *LocalAddress = Section.Address + Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); switch (Type) { default: llvm_unreachable("Relocation type not implemented yet!"); @@ -929,17 +970,17 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, writeInt16BE(LocalAddress + 2, (aalk & 3) | ((Value + Addend) & 0xfffc)); } break; case ELF::R_PPC64_REL16_LO: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); uint64_t Delta = Value - FinalAddress + Addend; writeInt16BE(LocalAddress, applyPPClo(Delta)); } break; case ELF::R_PPC64_REL16_HI: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); uint64_t Delta = Value - FinalAddress + Addend; writeInt16BE(LocalAddress, applyPPChi(Delta)); } break; case ELF::R_PPC64_REL16_HA: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); uint64_t Delta = Value - FinalAddress + Addend; writeInt16BE(LocalAddress, applyPPCha(Delta)); } break; @@ -950,22 +991,22 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, writeInt32BE(LocalAddress, Result); } break; case ELF::R_PPC64_REL24: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); int32_t delta = static_cast(Value - FinalAddress + Addend); - if (SignExtend32<24>(delta) != delta) + if (SignExtend32<26>(delta) != delta) llvm_unreachable("Relocation R_PPC64_REL24 overflow"); // Generates a 'bl
' instruction writeInt32BE(LocalAddress, 0x48000001 | (delta & 0x03FFFFFC)); } break; case ELF::R_PPC64_REL32: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); int32_t delta = static_cast(Value - FinalAddress + Addend); if (SignExtend32<32>(delta) != delta) llvm_unreachable("Relocation R_PPC64_REL32 overflow"); writeInt32BE(LocalAddress, delta); } break; case ELF::R_PPC64_REL64: { - uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); uint64_t Delta = Value - FinalAddress + Addend; writeInt64BE(LocalAddress, Delta); } break; @@ -978,27 +1019,27 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { - uint8_t *LocalAddress = Section.Address + Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); switch (Type) { default: llvm_unreachable("Relocation type not implemented yet!"); break; case ELF::R_390_PC16DBL: case ELF::R_390_PLT16DBL: { - int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset); + int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset); assert(int16_t(Delta / 2) * 2 == Delta && "R_390_PC16DBL overflow"); writeInt16BE(LocalAddress, Delta / 2); break; } case ELF::R_390_PC32DBL: case ELF::R_390_PLT32DBL: { - int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset); + int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset); assert(int32_t(Delta / 2) * 2 == Delta && "R_390_PC32DBL overflow"); writeInt32BE(LocalAddress, Delta / 2); break; } case ELF::R_390_PC32: { - int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset); + int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset); assert(int32_t(Delta) == Delta && "R_390_PC32 overflow"); writeInt32BE(LocalAddress, Delta); break; @@ -1072,6 +1113,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, else llvm_unreachable("Mips ABI not handled"); break; + case Triple::ppc: + resolvePPC32Relocation(Section, Offset, Value, Type, Addend); + break; case Triple::ppc64: // Fall through. case Triple::ppc64le: resolvePPC64Relocation(Section, Offset, Value, Type, Addend); @@ -1085,7 +1129,7 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, } void *RuntimeDyldELF::computePlaceholderAddress(unsigned SectionID, uint64_t Offset) const { - return (void*)(Sections[SectionID].ObjAddress + Offset); + return (void *)(Sections[SectionID].getObjAddress() + Offset); } void RuntimeDyldELF::processSimpleRelocation(unsigned SectionID, uint64_t Offset, unsigned RelType, RelocationValueRef Value) { @@ -1096,6 +1140,29 @@ void RuntimeDyldELF::processSimpleRelocation(unsigned SectionID, uint64_t Offset addRelocationForSection(RE, Value.SectionID); } +uint32_t RuntimeDyldELF::getMatchingLoRelocation(uint32_t RelType, + bool IsLocal) const { + switch (RelType) { + case ELF::R_MICROMIPS_GOT16: + if (IsLocal) + return ELF::R_MICROMIPS_LO16; + break; + case ELF::R_MICROMIPS_HI16: + return ELF::R_MICROMIPS_LO16; + case ELF::R_MIPS_GOT16: + if (IsLocal) + return ELF::R_MIPS_LO16; + break; + case ELF::R_MIPS_HI16: + return ELF::R_MIPS_LO16; + case ELF::R_MIPS_PCHI16: + return ELF::R_MIPS_PCLO16; + default: + break; + } + return ELF::R_MIPS_NONE; +} + relocation_iterator RuntimeDyldELF::processRelocationRef( unsigned SectionID, relocation_iterator RelI, const ObjectFile &O, ObjSectionToIDMap &ObjSectionToID, StubMap &Stubs) { @@ -1136,8 +1203,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( // TODO: Now ELF SymbolRef::ST_Debug = STT_SECTION, it's not obviously // and can be changed by another developers. Maybe best way is add // a new symbol type ST_Section to SymbolRef and use it. - section_iterator si(Obj.section_end()); - Symbol->getSection(si); + section_iterator si = *Symbol->getSection(); if (si == Obj.section_end()) llvm_unreachable("Symbol section not found, bad object file format!"); DEBUG(dbgs() << "\t\tThis is section symbol\n"); @@ -1178,24 +1244,28 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( // Look for an existing stub. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { - resolveRelocation(Section, Offset, (uint64_t)Section.Address + i->second, + resolveRelocation(Section, Offset, + (uint64_t)Section.getAddressWithOffset(i->second), RelType, 0); DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. DEBUG(dbgs() << " Create a new stub function\n"); - Stubs[Value] = Section.StubOffset; - uint8_t *StubTargetAddr = - createStubFunction(Section.Address + Section.StubOffset); + Stubs[Value] = Section.getStubOffset(); + uint8_t *StubTargetAddr = createStubFunction( + Section.getAddressWithOffset(Section.getStubOffset())); - RelocationEntry REmovz_g3(SectionID, StubTargetAddr - Section.Address, + RelocationEntry REmovz_g3(SectionID, + StubTargetAddr - Section.getAddress(), ELF::R_AARCH64_MOVW_UABS_G3, Value.Addend); - RelocationEntry REmovk_g2(SectionID, StubTargetAddr - Section.Address + 4, + RelocationEntry REmovk_g2(SectionID, StubTargetAddr - + Section.getAddress() + 4, ELF::R_AARCH64_MOVW_UABS_G2_NC, Value.Addend); - RelocationEntry REmovk_g1(SectionID, StubTargetAddr - Section.Address + 8, + RelocationEntry REmovk_g1(SectionID, StubTargetAddr - + Section.getAddress() + 8, ELF::R_AARCH64_MOVW_UABS_G1_NC, Value.Addend); - RelocationEntry REmovk_g0(SectionID, - StubTargetAddr - Section.Address + 12, + RelocationEntry REmovk_g0(SectionID, StubTargetAddr - + Section.getAddress() + 12, ELF::R_AARCH64_MOVW_UABS_G0_NC, Value.Addend); if (Value.SymbolName) { @@ -1210,9 +1280,10 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( addRelocationForSection(REmovk_g0, Value.SectionID); } resolveRelocation(Section, Offset, - (uint64_t)Section.Address + Section.StubOffset, RelType, - 0); - Section.StubOffset += getMaxStubSize(); + reinterpret_cast(Section.getAddressWithOffset( + Section.getStubOffset())), + RelType, 0); + Section.advanceStubOffset(getMaxStubSize()); } } else if (Arch == Triple::arm) { if (RelType == ELF::R_ARM_PC24 || RelType == ELF::R_ARM_CALL || @@ -1224,26 +1295,29 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( // Look for an existing stub. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { - resolveRelocation(Section, Offset, (uint64_t)Section.Address + i->second, - RelType, 0); + resolveRelocation( + Section, Offset, + reinterpret_cast(Section.getAddressWithOffset(i->second)), + RelType, 0); DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. DEBUG(dbgs() << " Create a new stub function\n"); - Stubs[Value] = Section.StubOffset; - uint8_t *StubTargetAddr = - createStubFunction(Section.Address + Section.StubOffset); - RelocationEntry RE(SectionID, StubTargetAddr - Section.Address, - ELF::R_ARM_ABS32, Value.Addend); + Stubs[Value] = Section.getStubOffset(); + uint8_t *StubTargetAddr = createStubFunction( + Section.getAddressWithOffset(Section.getStubOffset())); + RelocationEntry RE(SectionID, StubTargetAddr - Section.getAddress(), + ELF::R_ARM_ABS32, Value.Addend); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); - resolveRelocation(Section, Offset, - (uint64_t)Section.Address + Section.StubOffset, RelType, - 0); - Section.StubOffset += getMaxStubSize(); + resolveRelocation(Section, Offset, reinterpret_cast( + Section.getAddressWithOffset( + Section.getStubOffset())), + RelType, 0); + Section.advanceStubOffset(getMaxStubSize()); } } else { uint32_t *Placeholder = @@ -1282,15 +1356,16 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( } else { // Create a new stub function. DEBUG(dbgs() << " Create a new stub function\n"); - Stubs[Value] = Section.StubOffset; - uint8_t *StubTargetAddr = - createStubFunction(Section.Address + Section.StubOffset); + Stubs[Value] = Section.getStubOffset(); + uint8_t *StubTargetAddr = createStubFunction( + Section.getAddressWithOffset(Section.getStubOffset())); // Creating Hi and Lo relocations for the filled stub instructions. - RelocationEntry REHi(SectionID, StubTargetAddr - Section.Address, - ELF::R_MIPS_HI16, Value.Addend); - RelocationEntry RELo(SectionID, StubTargetAddr - Section.Address + 4, - ELF::R_MIPS_LO16, Value.Addend); + RelocationEntry REHi(SectionID, StubTargetAddr - Section.getAddress(), + ELF::R_MIPS_HI16, Value.Addend); + RelocationEntry RELo(SectionID, + StubTargetAddr - Section.getAddress() + 4, + ELF::R_MIPS_LO16, Value.Addend); if (Value.SymbolName) { addRelocationForSymbol(REHi, Value.SymbolName); @@ -1301,21 +1376,39 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( addRelocationForSection(RELo, Value.SectionID); } - RelocationEntry RE(SectionID, Offset, RelType, Section.StubOffset); + RelocationEntry RE(SectionID, Offset, RelType, Section.getStubOffset()); addRelocationForSection(RE, SectionID); - Section.StubOffset += getMaxStubSize(); + Section.advanceStubOffset(getMaxStubSize()); } + } else if (RelType == ELF::R_MIPS_HI16 || RelType == ELF::R_MIPS_PCHI16) { + int64_t Addend = (Opcode & 0x0000ffff) << 16; + RelocationEntry RE(SectionID, Offset, RelType, Addend); + PendingRelocs.push_back(std::make_pair(Value, RE)); + } else if (RelType == ELF::R_MIPS_LO16 || RelType == ELF::R_MIPS_PCLO16) { + int64_t Addend = Value.Addend + SignExtend32<16>(Opcode & 0x0000ffff); + for (auto I = PendingRelocs.begin(); I != PendingRelocs.end();) { + const RelocationValueRef &MatchingValue = I->first; + RelocationEntry &Reloc = I->second; + if (MatchingValue == Value && + RelType == getMatchingLoRelocation(Reloc.RelType) && + SectionID == Reloc.SectionID) { + Reloc.Addend += Addend; + if (Value.SymbolName) + addRelocationForSymbol(Reloc, Value.SymbolName); + else + addRelocationForSection(Reloc, Value.SectionID); + I = PendingRelocs.erase(I); + } else + ++I; + } + RelocationEntry RE(SectionID, Offset, RelType, Addend); + if (Value.SymbolName) + addRelocationForSymbol(RE, Value.SymbolName); + else + addRelocationForSection(RE, Value.SectionID); } else { - // FIXME: Calculate correct addends for R_MIPS_HI16, R_MIPS_LO16, - // R_MIPS_PCHI16 and R_MIPS_PCLO16 relocations. - if (RelType == ELF::R_MIPS_HI16 || RelType == ELF::R_MIPS_PCHI16) - Value.Addend += (Opcode & 0x0000ffff) << 16; - else if (RelType == ELF::R_MIPS_LO16) - Value.Addend += (Opcode & 0x0000ffff); - else if (RelType == ELF::R_MIPS_32) + if (RelType == ELF::R_MIPS_32) Value.Addend += Opcode; - else if (RelType == ELF::R_MIPS_PCLO16) - Value.Addend += SignExtend32<16>((Opcode & 0x0000ffff)); else if (RelType == ELF::R_MIPS_PC16) Value.Addend += SignExtend32<18>((Opcode & 0x0000ffff) << 2); else if (RelType == ELF::R_MIPS_PC19_S2) @@ -1353,7 +1446,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( // an external symbol (Symbol::ST_Unknown) or if the target address // is not within the signed 24-bits branch address. SectionEntry &Section = Sections[SectionID]; - uint8_t *Target = Section.Address + Offset; + uint8_t *Target = Section.getAddressWithOffset(Offset); bool RangeOverflow = false; if (SymType != SymbolRef::ST_Unknown) { if (AbiVariant != 2) { @@ -1367,10 +1460,11 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( uint8_t SymOther = Symbol->getOther(); Value.Addend += ELF::decodePPC64LocalEntryOffset(SymOther); } - uint8_t *RelocTarget = Sections[Value.SectionID].Address + Value.Addend; + uint8_t *RelocTarget = + Sections[Value.SectionID].getAddressWithOffset(Value.Addend); int32_t delta = static_cast(Target - RelocTarget); - // If it is within 24-bits branch range, just set the branch target - if (SignExtend32<24>(delta) == delta) { + // If it is within 26-bits branch range, just set the branch target + if (SignExtend32<26>(delta) == delta) { RelocationEntry RE(SectionID, Offset, RelType, Value.Addend); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); @@ -1387,23 +1481,25 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( if (i != Stubs.end()) { // Symbol function stub already created, just relocate to it resolveRelocation(Section, Offset, - (uint64_t)Section.Address + i->second, RelType, 0); + reinterpret_cast( + Section.getAddressWithOffset(i->second)), + RelType, 0); DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. DEBUG(dbgs() << " Create a new stub function\n"); - Stubs[Value] = Section.StubOffset; - uint8_t *StubTargetAddr = - createStubFunction(Section.Address + Section.StubOffset, - AbiVariant); - RelocationEntry RE(SectionID, StubTargetAddr - Section.Address, + Stubs[Value] = Section.getStubOffset(); + uint8_t *StubTargetAddr = createStubFunction( + Section.getAddressWithOffset(Section.getStubOffset()), + AbiVariant); + RelocationEntry RE(SectionID, StubTargetAddr - Section.getAddress(), ELF::R_PPC64_ADDR64, Value.Addend); // Generates the 64-bits address loads as exemplified in section // 4.5.1 in PPC64 ELF ABI. Note that the relocations need to // apply to the low part of the instructions, so we have to update // the offset according to the target endianness. - uint64_t StubRelocOffset = StubTargetAddr - Section.Address; + uint64_t StubRelocOffset = StubTargetAddr - Section.getAddress(); if (!IsTargetLittleEndian) StubRelocOffset += 2; @@ -1428,10 +1524,11 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( addRelocationForSection(REl, Value.SectionID); } - resolveRelocation(Section, Offset, - (uint64_t)Section.Address + Section.StubOffset, + resolveRelocation(Section, Offset, reinterpret_cast( + Section.getAddressWithOffset( + Section.getStubOffset())), RelType, 0); - Section.StubOffset += getMaxStubSize(); + Section.advanceStubOffset(getMaxStubSize()); } if (SymType == SymbolRef::ST_Unknown) { // Restore the TOC for external calls @@ -1450,11 +1547,11 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( // These relocations are supposed to subtract the TOC address from // the final value. This does not fit cleanly into the RuntimeDyld // scheme, since there may be *two* sections involved in determining - // the relocation value (the section of the symbol refered to by the + // the relocation value (the section of the symbol referred to by the // relocation, and the TOC section associated with the current module). // // Fortunately, these relocations are currently only ever generated - // refering to symbols that themselves reside in the TOC, which means + // referring to symbols that themselves reside in the TOC, which means // that the two sections are actually the same. Thus they cancel out // and we can immediately resolve the relocation right now. switch (RelType) { @@ -1511,16 +1608,17 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( StubMap::const_iterator i = Stubs.find(Value); uintptr_t StubAddress; if (i != Stubs.end()) { - StubAddress = uintptr_t(Section.Address) + i->second; + StubAddress = uintptr_t(Section.getAddressWithOffset(i->second)); DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. DEBUG(dbgs() << " Create a new stub function\n"); - uintptr_t BaseAddress = uintptr_t(Section.Address); + uintptr_t BaseAddress = uintptr_t(Section.getAddress()); uintptr_t StubAlignment = getStubAlignment(); - StubAddress = (BaseAddress + Section.StubOffset + StubAlignment - 1) & - -StubAlignment; + StubAddress = + (BaseAddress + Section.getStubOffset() + StubAlignment - 1) & + -StubAlignment; unsigned StubOffset = StubAddress - BaseAddress; Stubs[Value] = StubOffset; @@ -1531,7 +1629,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); - Section.StubOffset = StubOffset + getMaxStubSize(); + Section.advanceStubOffset(getMaxStubSize()); } if (RelType == ELF::R_390_GOTENT) @@ -1564,37 +1662,39 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( StubMap::const_iterator i = Stubs.find(Value); uintptr_t StubAddress; if (i != Stubs.end()) { - StubAddress = uintptr_t(Section.Address) + i->second; - DEBUG(dbgs() << " Stub function found\n"); + StubAddress = uintptr_t(Section.getAddress()) + i->second; + DEBUG(dbgs() << " Stub function found\n"); } else { - // Create a new stub function (equivalent to a PLT entry). - DEBUG(dbgs() << " Create a new stub function\n"); + // Create a new stub function (equivalent to a PLT entry). + DEBUG(dbgs() << " Create a new stub function\n"); - uintptr_t BaseAddress = uintptr_t(Section.Address); - uintptr_t StubAlignment = getStubAlignment(); - StubAddress = (BaseAddress + Section.StubOffset + StubAlignment - 1) & - -StubAlignment; - unsigned StubOffset = StubAddress - BaseAddress; - Stubs[Value] = StubOffset; - createStubFunction((uint8_t *)StubAddress); + uintptr_t BaseAddress = uintptr_t(Section.getAddress()); + uintptr_t StubAlignment = getStubAlignment(); + StubAddress = + (BaseAddress + Section.getStubOffset() + StubAlignment - 1) & + -StubAlignment; + unsigned StubOffset = StubAddress - BaseAddress; + Stubs[Value] = StubOffset; + createStubFunction((uint8_t *)StubAddress); - // Bump our stub offset counter - Section.StubOffset = StubOffset + getMaxStubSize(); + // Bump our stub offset counter + Section.advanceStubOffset(getMaxStubSize()); - // Allocate a GOT Entry - uint64_t GOTOffset = allocateGOTEntries(SectionID, 1); + // Allocate a GOT Entry + uint64_t GOTOffset = allocateGOTEntries(SectionID, 1); - // The load of the GOT address has an addend of -4 - resolveGOTOffsetRelocation(SectionID, StubOffset + 2, GOTOffset - 4); + // The load of the GOT address has an addend of -4 + resolveGOTOffsetRelocation(SectionID, StubOffset + 2, GOTOffset - 4); - // Fill in the value of the symbol we're targeting into the GOT - addRelocationForSymbol(computeGOTOffsetRE(SectionID,GOTOffset,0,ELF::R_X86_64_64), - Value.SymbolName); + // Fill in the value of the symbol we're targeting into the GOT + addRelocationForSymbol( + computeGOTOffsetRE(SectionID, GOTOffset, 0, ELF::R_X86_64_64), + Value.SymbolName); } // Make the target call a call into the stub table. resolveRelocation(Section, Offset, StubAddress, ELF::R_X86_64_PC32, - Addend); + Addend); } else { RelocationEntry RE(SectionID, Offset, ELF::R_X86_64_PC32, Value.Addend, Value.Offset); @@ -1670,7 +1770,7 @@ uint64_t RuntimeDyldELF::allocateGOTEntries(unsigned SectionID, unsigned no) GOTSectionID = Sections.size(); // Reserve a section id. We'll allocate the section later // once we know the total size - Sections.push_back(SectionEntry(".got", 0, 0, 0)); + Sections.push_back(SectionEntry(".got", nullptr, 0, 0, 0)); } uint64_t StartOffset = CurrentGOTIndex * getGOTEntrySize(); CurrentGOTIndex += no; @@ -1693,6 +1793,10 @@ RelocationEntry RuntimeDyldELF::computeGOTOffsetRE(unsigned SectionID, uint64_t void RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, ObjSectionToIDMap &SectionMap) { + if (IsMipsO32ABI) + if (!PendingRelocs.empty()) + report_fatal_error("Can't find matching LO16 reloc"); + // If necessary, allocate the global offset table if (GOTSectionID != 0) { // Allocate memory for the section @@ -1702,7 +1806,8 @@ void RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, if (!Addr) report_fatal_error("Unable to allocate memory for GOT!"); - Sections[GOTSectionID] = SectionEntry(".got", Addr, TotalSize, 0); + Sections[GOTSectionID] = + SectionEntry(".got", Addr, TotalSize, TotalSize, 0); if (Checker) Checker->registerSection(Obj.getFileName(), GOTSectionID); @@ -1746,4 +1851,23 @@ bool RuntimeDyldELF::isCompatibleFile(const object::ObjectFile &Obj) const { return Obj.isELF(); } +bool RuntimeDyldELF::relocationNeedsStub(const RelocationRef &R) const { + if (Arch != Triple::x86_64) + return true; // Conservative answer + + switch (R.getType()) { + default: + return true; // Conservative answer + + + case ELF::R_X86_64_GOTPCREL: + case ELF::R_X86_64_PC32: + case ELF::R_X86_64_PC64: + case ELF::R_X86_64_64: + // We know that these reloation types won't need a stub function. This list + // can be extended as needed. + return false; + } +} + } // namespace llvm diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 1a2552deed95..041811d3e285 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -43,6 +43,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl { void resolveMIPSRelocation(const SectionEntry &Section, uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend); + void resolvePPC32Relocation(const SectionEntry &Section, uint64_t Offset, + uint64_t Value, uint32_t Type, int64_t Addend); + void resolvePPC64Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend); @@ -120,6 +123,10 @@ class RuntimeDyldELF : public RuntimeDyldImpl { // no particular advanced processing. void processSimpleRelocation(unsigned SectionID, uint64_t Offset, unsigned RelType, RelocationValueRef Value); + // Return matching *LO16 relocation (Mips specific) + uint32_t getMatchingLoRelocation(uint32_t RelType, + bool IsLocal = false) const; + // The tentative ID for the GOT section unsigned GOTSectionID; @@ -135,12 +142,18 @@ class RuntimeDyldELF : public RuntimeDyldImpl { // A map to avoid duplicate got entries (Mips64 specific) StringMap GOTSymbolOffsets; + // *HI16 relocations will be added for resolving when we find matching + // *LO16 part. (Mips specific) + SmallVector, 8> PendingRelocs; + // When a module is loaded we save the SectionID of the EH frame section // in a table until we receive a request to register all unregistered // EH frame sections with the memory manager. SmallVector UnregisteredEHFrameSections; SmallVector RegisteredEHFrameSections; + bool relocationNeedsStub(const RelocationRef &R) const override; + public: RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr, RuntimeDyld::SymbolResolver &Resolver); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index e085a9296e8d..dafd3c8793c3 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -30,6 +30,7 @@ #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" #include +#include #include using namespace llvm; @@ -50,7 +51,6 @@ class Twine; /// SectionEntry - represents a section emitted into memory by the dynamic /// linker. class SectionEntry { -public: /// Name - section name. std::string Name; @@ -70,15 +70,54 @@ public: /// relocations (like ARM). uintptr_t StubOffset; + /// The total amount of space allocated for this section. This includes the + /// section size and the maximum amount of space that the stubs can occupy. + size_t AllocationSize; + /// ObjAddress - address of the section in the in-memory object file. Used /// for calculating relocations in some object formats (like MachO). uintptr_t ObjAddress; +public: SectionEntry(StringRef name, uint8_t *address, size_t size, - uintptr_t objAddress) + size_t allocationSize, uintptr_t objAddress) : Name(name), Address(address), Size(size), LoadAddress(reinterpret_cast(address)), StubOffset(size), - ObjAddress(objAddress) {} + AllocationSize(allocationSize), ObjAddress(objAddress) { + // AllocationSize is used only in asserts, prevent an "unused private field" + // warning: + (void)AllocationSize; + } + + StringRef getName() const { return Name; } + + uint8_t *getAddress() const { return Address; } + + /// \brief Return the address of this section with an offset. + uint8_t *getAddressWithOffset(unsigned OffsetBytes) const { + assert(OffsetBytes <= AllocationSize && "Offset out of bounds!"); + return Address + OffsetBytes; + } + + size_t getSize() const { return Size; } + + uint64_t getLoadAddress() const { return LoadAddress; } + void setLoadAddress(uint64_t LA) { LoadAddress = LA; } + + /// \brief Return the load address of this section with an offset. + uint64_t getLoadAddressWithOffset(unsigned OffsetBytes) const { + assert(OffsetBytes <= AllocationSize && "Offset out of bounds!"); + return LoadAddress + OffsetBytes; + } + + uintptr_t getStubOffset() const { return StubOffset; } + + void advanceStubOffset(unsigned StubSize) { + StubOffset += StubSize; + assert(StubOffset <= AllocationSize && "Not enough space allocated!"); + } + + uintptr_t getObjAddress() const { return ObjAddress; } }; /// RelocationEntry - used to represent relocations internally in the dynamic @@ -188,6 +227,8 @@ class RuntimeDyldImpl { friend class RuntimeDyld::LoadedObjectInfo; friend class RuntimeDyldCheckerImpl; protected: + static const unsigned AbsoluteSymbolSection = ~0U; + // The MemoryManager to load objects into. RuntimeDyld::MemoryManager &MemMgr; @@ -224,7 +265,7 @@ protected: // Relocations to sections already loaded. Indexed by SectionID which is the // source of the address. The target where the address will be written is // SectionID/Offset in the relocation itself. - DenseMap Relocations; + std::unordered_map Relocations; // Relocations to external symbols that are not yet resolved. Symbols are // external when they aren't found in the global symbol table of all loaded @@ -269,11 +310,11 @@ protected: } uint64_t getSectionLoadAddress(unsigned SectionID) const { - return Sections[SectionID].LoadAddress; + return Sections[SectionID].getLoadAddress(); } uint8_t *getSectionAddress(unsigned SectionID) const { - return (uint8_t *)Sections[SectionID].Address; + return Sections[SectionID].getAddress(); } void writeInt16BE(uint8_t *Addr, uint16_t Value) { @@ -378,7 +419,12 @@ protected: const SectionRef &Section); // \brief Implementation of the generic part of the loadObject algorithm. - std::pair loadObjectImpl(const object::ObjectFile &Obj); + ObjSectionToIDMap loadObjectImpl(const object::ObjectFile &Obj); + + // \brief Return true if the relocation R may require allocating a stub. + virtual bool relocationNeedsStub(const RelocationRef &R) const { + return true; // Conservative answer + } public: RuntimeDyldImpl(RuntimeDyld::MemoryManager &MemMgr, @@ -407,6 +453,9 @@ public: if (pos == GlobalSymbolTable.end()) return nullptr; const auto &SymInfo = pos->second; + // Absolute symbols do not have a local address. + if (SymInfo.getSectionID() == AbsoluteSymbolSection) + return nullptr; return getSectionAddress(SymInfo.getSectionID()) + SymInfo.getOffset(); } @@ -417,8 +466,10 @@ public: if (pos == GlobalSymbolTable.end()) return nullptr; const auto &SymEntry = pos->second; - uint64_t TargetAddr = - getSectionLoadAddress(SymEntry.getSectionID()) + SymEntry.getOffset(); + uint64_t SectionAddr = 0; + if (SymEntry.getSectionID() != AbsoluteSymbolSection) + SectionAddr = getSectionLoadAddress(SymEntry.getSectionID()); + uint64_t TargetAddr = SectionAddr + SymEntry.getOffset(); return RuntimeDyld::SymbolInfo(TargetAddr, SymEntry.getFlags()); } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index c0741141757c..739e8d65dbf4 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -26,12 +26,12 @@ using namespace llvm::object; namespace { -class LoadedMachOObjectInfo +class LoadedMachOObjectInfo final : public RuntimeDyld::LoadedObjectInfoHelper { public: - LoadedMachOObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx, - unsigned EndIdx) - : LoadedObjectInfoHelper(RTDyld, BeginIdx, EndIdx) {} + LoadedMachOObjectInfo(RuntimeDyldImpl &RTDyld, + ObjSectionToIDMap ObjSecToIDMap) + : LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {} OwningBinary getObjectForDebug(const ObjectFile &Obj) const override { @@ -45,11 +45,47 @@ namespace llvm { int64_t RuntimeDyldMachO::memcpyAddend(const RelocationEntry &RE) const { unsigned NumBytes = 1 << RE.Size; - uint8_t *Src = Sections[RE.SectionID].Address + RE.Offset; + uint8_t *Src = Sections[RE.SectionID].getAddress() + RE.Offset; return static_cast(readBytesUnaligned(Src, NumBytes)); } +relocation_iterator RuntimeDyldMachO::processScatteredVANILLA( + unsigned SectionID, relocation_iterator RelI, + const ObjectFile &BaseObjT, + RuntimeDyldMachO::ObjSectionToIDMap &ObjSectionToID) { + const MachOObjectFile &Obj = + static_cast(BaseObjT); + MachO::any_relocation_info RE = + Obj.getRelocation(RelI->getRawDataRefImpl()); + + SectionEntry &Section = Sections[SectionID]; + uint32_t RelocType = Obj.getAnyRelocationType(RE); + bool IsPCRel = Obj.getAnyRelocationPCRel(RE); + unsigned Size = Obj.getAnyRelocationLength(RE); + uint64_t Offset = RelI->getOffset(); + uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); + unsigned NumBytes = 1 << Size; + int64_t Addend = readBytesUnaligned(LocalAddress, NumBytes); + + unsigned SymbolBaseAddr = Obj.getScatteredRelocationValue(RE); + section_iterator TargetSI = getSectionByAddress(Obj, SymbolBaseAddr); + assert(TargetSI != Obj.section_end() && "Can't find section for symbol"); + uint64_t SectionBaseAddr = TargetSI->getAddress(); + SectionRef TargetSection = *TargetSI; + bool IsCode = TargetSection.isText(); + uint32_t TargetSectionID = + findOrEmitSection(Obj, TargetSection, IsCode, ObjSectionToID); + + Addend -= SectionBaseAddr; + RelocationEntry R(SectionID, Offset, RelocType, Addend, IsPCRel, Size); + + addRelocationForSection(R, TargetSectionID); + + return ++RelI; +} + + RelocationValueRef RuntimeDyldMachO::getRelocationValueRef( const ObjectFile &BaseTObj, const relocation_iterator &RI, const RelocationEntry &RE, ObjSectionToIDMap &ObjSectionToID) { @@ -99,8 +135,8 @@ void RuntimeDyldMachO::makeValueAddendPCRel(RelocationValueRef &Value, void RuntimeDyldMachO::dumpRelocationToResolve(const RelocationEntry &RE, uint64_t Value) const { const SectionEntry &Section = Sections[RE.SectionID]; - uint8_t *LocalAddress = Section.Address + RE.Offset; - uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + uint8_t *LocalAddress = Section.getAddress() + RE.Offset; + uint64_t FinalAddress = Section.getLoadAddress() + RE.Offset; dbgs() << "resolveRelocation Section: " << RE.SectionID << " LocalAddress: " << format("%p", LocalAddress) @@ -147,10 +183,9 @@ void RuntimeDyldMachO::populateIndirectSymbolPointersSection( "Pointers section does not contain a whole number of stubs?"); DEBUG(dbgs() << "Populating pointer table section " - << Sections[PTSectionID].Name - << ", Section ID " << PTSectionID << ", " - << NumPTEntries << " entries, " << PTEntrySize - << " bytes each:\n"); + << Sections[PTSectionID].getName() << ", Section ID " + << PTSectionID << ", " << NumPTEntries << " entries, " + << PTEntrySize << " bytes each:\n"); for (unsigned i = 0; i < NumPTEntries; ++i) { unsigned SymbolIndex = @@ -204,7 +239,7 @@ void RuntimeDyldMachOCRTPBase::finalizeLoad(const ObjectFile &Obj, } template -unsigned char *RuntimeDyldMachOCRTPBase::processFDE(unsigned char *P, +unsigned char *RuntimeDyldMachOCRTPBase::processFDE(uint8_t *P, int64_t DeltaForText, int64_t DeltaForEH) { typedef typename Impl::TargetPtrT TargetPtrT; @@ -213,7 +248,7 @@ unsigned char *RuntimeDyldMachOCRTPBase::processFDE(unsigned char *P, << ", Delta for EH: " << DeltaForEH << "\n"); uint32_t Length = readBytesUnaligned(P, 4); P += 4; - unsigned char *Ret = P + Length; + uint8_t *Ret = P + Length; uint32_t Offset = readBytesUnaligned(P, 4); if (Offset == 0) // is a CIE return Ret; @@ -240,9 +275,9 @@ unsigned char *RuntimeDyldMachOCRTPBase::processFDE(unsigned char *P, } static int64_t computeDelta(SectionEntry *A, SectionEntry *B) { - int64_t ObjDistance = - static_cast(A->ObjAddress) - static_cast(B->ObjAddress); - int64_t MemDistance = A->LoadAddress - B->LoadAddress; + int64_t ObjDistance = static_cast(A->getObjAddress()) - + static_cast(B->getObjAddress()); + int64_t MemDistance = A->getLoadAddress() - B->getLoadAddress(); return ObjDistance - MemDistance; } @@ -265,14 +300,14 @@ void RuntimeDyldMachOCRTPBase::registerEHFrames() { if (ExceptTab) DeltaForEH = computeDelta(ExceptTab, EHFrame); - unsigned char *P = EHFrame->Address; - unsigned char *End = P + EHFrame->Size; + uint8_t *P = EHFrame->getAddress(); + uint8_t *End = P + EHFrame->getSize(); do { P = processFDE(P, DeltaForText, DeltaForEH); } while (P != End); - MemMgr.registerEHFrames(EHFrame->Address, EHFrame->LoadAddress, - EHFrame->Size); + MemMgr.registerEHFrames(EHFrame->getAddress(), EHFrame->getLoadAddress(), + EHFrame->getSize()); } UnregisteredEHFrameSections.clear(); } @@ -298,10 +333,7 @@ RuntimeDyldMachO::create(Triple::ArchType Arch, std::unique_ptr RuntimeDyldMachO::loadObject(const object::ObjectFile &O) { - unsigned SectionStartIdx, SectionEndIdx; - std::tie(SectionStartIdx, SectionEndIdx) = loadObjectImpl(O); - return llvm::make_unique(*this, SectionStartIdx, - SectionEndIdx); + return llvm::make_unique(*this, loadObjectImpl(O)); } } // end namespace llvm diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index 0d7364f78597..c8ae47b0db22 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -79,6 +79,12 @@ protected: return RelocationEntry(SectionID, Offset, RelType, 0, IsPCRel, Size); } + /// Process a scattered vanilla relocation. + relocation_iterator processScatteredVANILLA( + unsigned SectionID, relocation_iterator RelI, + const ObjectFile &BaseObjT, + RuntimeDyldMachO::ObjSectionToIDMap &ObjSectionToID); + /// Construct a RelocationValueRef representing the relocation target. /// For Symbols in known sections, this will return a RelocationValueRef /// representing a (SectionID, Offset) pair. @@ -140,7 +146,7 @@ private: Impl &impl() { return static_cast(*this); } const Impl &impl() const { return static_cast(*this); } - unsigned char *processFDE(unsigned char *P, int64_t DeltaForText, + unsigned char *processFDE(uint8_t *P, int64_t DeltaForText, int64_t DeltaForEH); public: diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h new file mode 100644 index 000000000000..fbfbb3285233 --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h @@ -0,0 +1,201 @@ +//===--- RuntimeDyldCOFFI386.h --- COFF/X86_64 specific code ---*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// COFF x86 support for MC-JIT runtime dynamic linker. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFI386_H +#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFI386_H + +#include "llvm/Object/COFF.h" +#include "llvm/Support/COFF.h" +#include "../RuntimeDyldCOFF.h" + +#define DEBUG_TYPE "dyld" + +namespace llvm { + +class RuntimeDyldCOFFI386 : public RuntimeDyldCOFF { +public: + RuntimeDyldCOFFI386(RuntimeDyld::MemoryManager &MM, + RuntimeDyld::SymbolResolver &Resolver) + : RuntimeDyldCOFF(MM, Resolver) {} + + unsigned getMaxStubSize() override { + return 8; // 2-byte jmp instruction + 32-bit relative address + 2 byte pad + } + + unsigned getStubAlignment() override { return 1; } + + relocation_iterator processRelocationRef(unsigned SectionID, + relocation_iterator RelI, + const ObjectFile &Obj, + ObjSectionToIDMap &ObjSectionToID, + StubMap &Stubs) override { + auto Symbol = RelI->getSymbol(); + if (Symbol == Obj.symbol_end()) + report_fatal_error("Unknown symbol in relocation"); + + ErrorOr TargetNameOrErr = Symbol->getName(); + if (auto EC = TargetNameOrErr.getError()) + report_fatal_error(EC.message()); + StringRef TargetName = *TargetNameOrErr; + + auto Section = *Symbol->getSection(); + + uint64_t RelType = RelI->getType(); + uint64_t Offset = RelI->getOffset(); + +#if !defined(NDEBUG) + SmallString<32> RelTypeName; + RelI->getTypeName(RelTypeName); +#endif + DEBUG(dbgs() << "\t\tIn Section " << SectionID << " Offset " << Offset + << " RelType: " << RelTypeName << " TargetName: " << TargetName + << "\n"); + + unsigned TargetSectionID = -1; + if (Section == Obj.section_end()) { + RelocationEntry RE(SectionID, Offset, RelType, 0, -1, 0, 0, 0, false, 0); + addRelocationForSymbol(RE, TargetName); + } else { + TargetSectionID = + findOrEmitSection(Obj, *Section, Section->isText(), ObjSectionToID); + + switch (RelType) { + case COFF::IMAGE_REL_I386_ABSOLUTE: + // This relocation is ignored. + break; + case COFF::IMAGE_REL_I386_DIR32: + case COFF::IMAGE_REL_I386_DIR32NB: + case COFF::IMAGE_REL_I386_REL32: { + RelocationEntry RE = + RelocationEntry(SectionID, Offset, RelType, 0, TargetSectionID, + getSymbolOffset(*Symbol), 0, 0, false, 0); + addRelocationForSection(RE, TargetSectionID); + break; + } + case COFF::IMAGE_REL_I386_SECTION: { + RelocationEntry RE = + RelocationEntry(TargetSectionID, Offset, RelType, 0); + addRelocationForSection(RE, TargetSectionID); + break; + } + case COFF::IMAGE_REL_I386_SECREL: { + RelocationEntry RE = RelocationEntry(SectionID, Offset, RelType, + getSymbolOffset(*Symbol)); + addRelocationForSection(RE, TargetSectionID); + break; + } + default: + llvm_unreachable("unsupported relocation type"); + } + + } + + return ++RelI; + } + + void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override { + const auto Section = Sections[RE.SectionID]; + uint8_t *Target = Section.getAddressWithOffset(RE.Offset); + + switch (RE.RelType) { + case COFF::IMAGE_REL_I386_ABSOLUTE: + // This relocation is ignored. + break; + case COFF::IMAGE_REL_I386_DIR32: { + // The target's 32-bit VA. + uint64_t Result = + RE.Sections.SectionA == static_cast(-1) + ? Value + : Sections[RE.Sections.SectionA].getLoadAddressWithOffset( + RE.Addend); + assert(static_cast(Result) <= INT32_MAX && + "relocation overflow"); + assert(static_cast(Result) >= INT32_MIN && + "relocation underflow"); + DEBUG(dbgs() << "\t\tOffset: " << RE.Offset + << " RelType: IMAGE_REL_I386_DIR32" + << " TargetSection: " << RE.Sections.SectionA + << " Value: " << format("0x%08" PRIx32, Result) << '\n'); + writeBytesUnaligned(Result, Target, 4); + break; + } + case COFF::IMAGE_REL_I386_DIR32NB: { + // The target's 32-bit RVA. + // NOTE: use Section[0].getLoadAddress() as an approximation of ImageBase + uint64_t Result = + Sections[RE.Sections.SectionA].getLoadAddressWithOffset(RE.Addend) - + Sections[0].getLoadAddress(); + assert(static_cast(Result) <= INT32_MAX && + "relocation overflow"); + assert(static_cast(Result) >= INT32_MIN && + "relocation underflow"); + DEBUG(dbgs() << "\t\tOffset: " << RE.Offset + << " RelType: IMAGE_REL_I386_DIR32NB" + << " TargetSection: " << RE.Sections.SectionA + << " Value: " << format("0x%08" PRIx32, Result) << '\n'); + writeBytesUnaligned(Result, Target, 4); + break; + } + case COFF::IMAGE_REL_I386_REL32: { + // 32-bit relative displacement to the target. + uint64_t Result = Sections[RE.Sections.SectionA].getLoadAddress() - + Section.getLoadAddress() + RE.Addend - 4 - RE.Offset; + assert(static_cast(Result) <= INT32_MAX && + "relocation overflow"); + assert(static_cast(Result) >= INT32_MIN && + "relocation underflow"); + DEBUG(dbgs() << "\t\tOffset: " << RE.Offset + << " RelType: IMAGE_REL_I386_REL32" + << " TargetSection: " << RE.Sections.SectionA + << " Value: " << format("0x%08" PRIx32, Result) << '\n'); + writeBytesUnaligned(Result, Target, 4); + break; + } + case COFF::IMAGE_REL_I386_SECTION: + // 16-bit section index of the section that contains the target. + assert(static_cast(RE.SectionID) <= INT16_MAX && + "relocation overflow"); + assert(static_cast(RE.SectionID) >= INT16_MIN && + "relocation underflow"); + DEBUG(dbgs() << "\t\tOffset: " << RE.Offset + << " RelType: IMAGE_REL_I386_SECTION Value: " << RE.SectionID + << '\n'); + writeBytesUnaligned(RE.SectionID, Target, 2); + break; + case COFF::IMAGE_REL_I386_SECREL: + // 32-bit offset of the target from the beginning of its section. + assert(static_cast(RE.Addend) <= INT32_MAX && + "relocation overflow"); + assert(static_cast(RE.Addend) >= INT32_MIN && + "relocation underflow"); + DEBUG(dbgs() << "\t\tOffset: " << RE.Offset + << " RelType: IMAGE_REL_I386_SECREL Value: " << RE.Addend + << '\n'); + writeBytesUnaligned(RE.Addend, Target, 2); + break; + default: + llvm_unreachable("unsupported relocation type"); + } + } + + void registerEHFrames() override {} + void deregisterEHFrames() override {} + + void finalizeLoad(const ObjectFile &Obj, + ObjSectionToIDMap &SectionMap) override {} +}; + +} + +#endif + diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h index 408227eb0f21..25f538d8f3da 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h @@ -62,7 +62,7 @@ public: // symbol in the target address space. void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override { const SectionEntry &Section = Sections[RE.SectionID]; - uint8_t *Target = Section.Address + RE.Offset; + uint8_t *Target = Section.getAddressWithOffset(RE.Offset); switch (RE.RelType) { @@ -72,8 +72,7 @@ public: case COFF::IMAGE_REL_AMD64_REL32_3: case COFF::IMAGE_REL_AMD64_REL32_4: case COFF::IMAGE_REL_AMD64_REL32_5: { - uint32_t *TargetAddress = (uint32_t *)Target; - uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset); // Delta is the distance from the start of the reloc to the end of the // instruction with the reloc. uint64_t Delta = 4 + (RE.RelType - COFF::IMAGE_REL_AMD64_REL32); @@ -81,7 +80,7 @@ public: uint64_t Result = Value + RE.Addend; assert(((int64_t)Result <= INT32_MAX) && "Relocation overflow"); assert(((int64_t)Result >= INT32_MIN) && "Relocation underflow"); - *TargetAddress = Result; + writeBytesUnaligned(Result, Target, 4); break; } @@ -92,14 +91,12 @@ public: // within a 32 bit offset from the base. // // For now we just set these to zero. - uint32_t *TargetAddress = (uint32_t *)Target; - *TargetAddress = 0; + writeBytesUnaligned(0, Target, 4); break; } case COFF::IMAGE_REL_AMD64_ADDR64: { - uint64_t *TargetAddress = (uint64_t *)Target; - *TargetAddress = Value + RE.Addend; + writeBytesUnaligned(Value + RE.Addend, Target, 8); break; } @@ -119,8 +116,7 @@ public: symbol_iterator Symbol = RelI->getSymbol(); if (Symbol == Obj.symbol_end()) report_fatal_error("Unknown symbol in relocation"); - section_iterator SecI(Obj.section_end()); - Symbol->getSection(SecI); + section_iterator SecI = *Symbol->getSection(); // If there is no section, this must be an external reference. const bool IsExtern = SecI == Obj.section_end(); @@ -129,7 +125,7 @@ public: uint64_t Offset = RelI->getOffset(); uint64_t Addend = 0; SectionEntry &Section = Sections[SectionID]; - uintptr_t ObjTarget = Section.ObjAddress + Offset; + uintptr_t ObjTarget = Section.getObjAddress() + Offset; switch (RelType) { @@ -140,14 +136,14 @@ public: case COFF::IMAGE_REL_AMD64_REL32_4: case COFF::IMAGE_REL_AMD64_REL32_5: case COFF::IMAGE_REL_AMD64_ADDR32NB: { - uint32_t *Displacement = (uint32_t *)ObjTarget; - Addend = *Displacement; + uint8_t *Displacement = (uint8_t *)ObjTarget; + Addend = readBytesUnaligned(Displacement, 4); break; } case COFF::IMAGE_REL_AMD64_ADDR64: { - uint64_t *Displacement = (uint64_t *)ObjTarget; - Addend = *Displacement; + uint8_t *Displacement = (uint8_t *)ObjTarget; + Addend = readBytesUnaligned(Displacement, 8); break; } @@ -182,9 +178,9 @@ public: unsigned getStubAlignment() override { return 1; } void registerEHFrames() override { for (auto const &EHFrameSID : UnregisteredEHFrameSections) { - uint8_t *EHFrameAddr = Sections[EHFrameSID].Address; - uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress; - size_t EHFrameSize = Sections[EHFrameSID].Size; + uint8_t *EHFrameAddr = Sections[EHFrameSID].getAddress(); + uint64_t EHFrameLoadAddr = Sections[EHFrameSID].getLoadAddress(); + size_t EHFrameSize = Sections[EHFrameSID].getSize(); MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); RegisteredEHFrameSections.push_back(EHFrameSID); } diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h index 7bf764114bae..dbca37747ce8 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h @@ -34,7 +34,7 @@ public: /// Extract the addend encoded in the instruction / memory location. int64_t decodeAddend(const RelocationEntry &RE) const { const SectionEntry &Section = Sections[RE.SectionID]; - uint8_t *LocalAddress = Section.Address + RE.Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset); unsigned NumBytes = 1 << RE.Size; int64_t Addend = 0; // Verify that the relocation has the correct size and alignment. @@ -272,15 +272,14 @@ public: RelocationEntry RE(getRelocationEntry(SectionID, Obj, RelI)); RE.Addend = decodeAddend(RE); - RelocationValueRef Value( - getRelocationValueRef(Obj, RelI, RE, ObjSectionToID)); assert((ExplicitAddend == 0 || RE.Addend == 0) && "Relocation has "\ "ARM64_RELOC_ADDEND and embedded addend in the instruction."); - if (ExplicitAddend) { + if (ExplicitAddend) RE.Addend = ExplicitAddend; - Value.Offset = ExplicitAddend; - } + + RelocationValueRef Value( + getRelocationValueRef(Obj, RelI, RE, ObjSectionToID)); bool IsExtern = Obj.getPlainRelocationExternal(RelInfo); if (!IsExtern && RE.IsPCRel) @@ -305,7 +304,7 @@ public: DEBUG(dumpRelocationToResolve(RE, Value)); const SectionEntry &Section = Sections[RE.SectionID]; - uint8_t *LocalAddress = Section.Address + RE.Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset); MachO::RelocationInfoType RelType = static_cast(RE.RelType); @@ -325,7 +324,7 @@ public: case MachO::ARM64_RELOC_BRANCH26: { assert(RE.IsPCRel && "not PCRel and ARM64_RELOC_BRANCH26 not supported"); // Check if branch is in range. - uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset); int64_t PCRelVal = Value - FinalAddress + RE.Addend; encodeAddend(LocalAddress, /*Size=*/4, RelType, PCRelVal); break; @@ -334,7 +333,7 @@ public: case MachO::ARM64_RELOC_PAGE21: { assert(RE.IsPCRel && "not PCRel and ARM64_RELOC_PAGE21 not supported"); // Adjust for PC-relative relocation and offset. - uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset); int64_t PCRelVal = ((Value + RE.Addend) & (-4096)) - (FinalAddress & (-4096)); encodeAddend(LocalAddress, /*Size=*/4, RelType, PCRelVal); @@ -376,10 +375,10 @@ private: else { // FIXME: There must be a better way to do this then to check and fix the // alignment every time!!! - uintptr_t BaseAddress = uintptr_t(Section.Address); + uintptr_t BaseAddress = uintptr_t(Section.getAddress()); uintptr_t StubAlignment = getStubAlignment(); uintptr_t StubAddress = - (BaseAddress + Section.StubOffset + StubAlignment - 1) & + (BaseAddress + Section.getStubOffset() + StubAlignment - 1) & -StubAlignment; unsigned StubOffset = StubAddress - BaseAddress; Stubs[Value] = StubOffset; @@ -392,7 +391,7 @@ private: addRelocationForSymbol(GOTRE, Value.SymbolName); else addRelocationForSection(GOTRE, Value.SectionID); - Section.StubOffset = StubOffset + getMaxStubSize(); + Section.advanceStubOffset(getMaxStubSize()); Offset = static_cast(StubOffset); } RelocationEntry TargetRE(RE.SectionID, RE.Offset, RE.RelType, Offset, diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h index 0a24bb2f5eae..7731df09bd21 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h @@ -35,7 +35,7 @@ public: int64_t decodeAddend(const RelocationEntry &RE) const { const SectionEntry &Section = Sections[RE.SectionID]; - uint8_t *LocalAddress = Section.Address + RE.Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset); switch (RE.RelType) { default: @@ -64,8 +64,10 @@ public: if (RelType == MachO::ARM_RELOC_HALF_SECTDIFF) return processHALFSECTDIFFRelocation(SectionID, RelI, Obj, ObjSectionToID); + else if (RelType == MachO::GENERIC_RELOC_VANILLA) + return processScatteredVANILLA(SectionID, RelI, Obj, ObjSectionToID); else - return ++++RelI; + return ++RelI; } RelocationEntry RE(getRelocationEntry(SectionID, Obj, RelI)); @@ -92,12 +94,12 @@ public: void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override { DEBUG(dumpRelocationToResolve(RE, Value)); const SectionEntry &Section = Sections[RE.SectionID]; - uint8_t *LocalAddress = Section.Address + RE.Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset); // If the relocation is PC-relative, the value to be encoded is the // pointer difference. if (RE.IsPCRel) { - uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset); Value -= FinalAddress; // ARM PCRel relocations have an effective-PC offset of two instructions // (four bytes in Thumb mode, 8 bytes in ARM mode). @@ -130,8 +132,8 @@ public: break; } case MachO::ARM_RELOC_HALF_SECTDIFF: { - uint64_t SectionABase = Sections[RE.Sections.SectionA].LoadAddress; - uint64_t SectionBBase = Sections[RE.Sections.SectionB].LoadAddress; + uint64_t SectionABase = Sections[RE.Sections.SectionA].getLoadAddress(); + uint64_t SectionBBase = Sections[RE.Sections.SectionB].getLoadAddress(); assert((Value == SectionABase || Value == SectionBBase) && "Unexpected HALFSECTDIFF relocation value."); Value = SectionABase - SectionBBase + RE.Addend; @@ -178,21 +180,21 @@ private: RuntimeDyldMachO::StubMap::const_iterator i = Stubs.find(Value); uint8_t *Addr; if (i != Stubs.end()) { - Addr = Section.Address + i->second; + Addr = Section.getAddressWithOffset(i->second); } else { // Create a new stub function. - Stubs[Value] = Section.StubOffset; - uint8_t *StubTargetAddr = - createStubFunction(Section.Address + Section.StubOffset); - RelocationEntry StubRE(RE.SectionID, StubTargetAddr - Section.Address, - MachO::GENERIC_RELOC_VANILLA, Value.Offset, false, - 2); + Stubs[Value] = Section.getStubOffset(); + uint8_t *StubTargetAddr = createStubFunction( + Section.getAddressWithOffset(Section.getStubOffset())); + RelocationEntry StubRE( + RE.SectionID, StubTargetAddr - Section.getAddress(), + MachO::GENERIC_RELOC_VANILLA, Value.Offset, false, 2); if (Value.SymbolName) addRelocationForSymbol(StubRE, Value.SymbolName); else addRelocationForSection(StubRE, Value.SectionID); - Addr = Section.Address + Section.StubOffset; - Section.StubOffset += getMaxStubSize(); + Addr = Section.getAddressWithOffset(Section.getStubOffset()); + Section.advanceStubOffset(getMaxStubSize()); } RelocationEntry TargetRE(RE.SectionID, RE.Offset, RE.RelType, 0, RE.IsPCRel, RE.Size); @@ -221,7 +223,7 @@ private: uint32_t RelocType = MachO.getAnyRelocationType(RE); bool IsPCRel = MachO.getAnyRelocationPCRel(RE); uint64_t Offset = RelI->getOffset(); - uint8_t *LocalAddress = Section.Address + Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); int64_t Immediate = readBytesUnaligned(LocalAddress, 4); // Copy the whole instruction out. Immediate = ((Immediate >> 4) & 0xf000) | (Immediate & 0xfff); diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h index 569a078d7f3d..85059d70a3eb 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h @@ -47,8 +47,7 @@ public: return processSECTDIFFRelocation(SectionID, RelI, Obj, ObjSectionToID); else if (RelType == MachO::GENERIC_RELOC_VANILLA) - return processI386ScatteredVANILLA(SectionID, RelI, Obj, - ObjSectionToID); + return processScatteredVANILLA(SectionID, RelI, Obj, ObjSectionToID); llvm_unreachable("Unhandled scattered relocation."); } @@ -84,10 +83,10 @@ public: DEBUG(dumpRelocationToResolve(RE, Value)); const SectionEntry &Section = Sections[RE.SectionID]; - uint8_t *LocalAddress = Section.Address + RE.Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset); if (RE.IsPCRel) { - uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset); Value -= FinalAddress + 4; // see MachOX86_64::resolveRelocation. } @@ -99,8 +98,8 @@ public: break; case MachO::GENERIC_RELOC_SECTDIFF: case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: { - uint64_t SectionABase = Sections[RE.Sections.SectionA].LoadAddress; - uint64_t SectionBBase = Sections[RE.Sections.SectionB].LoadAddress; + uint64_t SectionABase = Sections[RE.Sections.SectionA].getLoadAddress(); + uint64_t SectionBBase = Sections[RE.Sections.SectionB].getLoadAddress(); assert((Value == SectionABase || Value == SectionBBase) && "Unexpected SECTDIFF relocation value."); Value = SectionABase - SectionBBase + RE.Addend; @@ -139,7 +138,7 @@ private: bool IsPCRel = Obj.getAnyRelocationPCRel(RE); unsigned Size = Obj.getAnyRelocationLength(RE); uint64_t Offset = RelI->getOffset(); - uint8_t *LocalAddress = Section.Address + Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); unsigned NumBytes = 1 << Size; uint64_t Addend = readBytesUnaligned(LocalAddress, NumBytes); @@ -183,41 +182,6 @@ private: return ++RelI; } - relocation_iterator processI386ScatteredVANILLA( - unsigned SectionID, relocation_iterator RelI, - const ObjectFile &BaseObjT, - RuntimeDyldMachO::ObjSectionToIDMap &ObjSectionToID) { - const MachOObjectFile &Obj = - static_cast(BaseObjT); - MachO::any_relocation_info RE = - Obj.getRelocation(RelI->getRawDataRefImpl()); - - SectionEntry &Section = Sections[SectionID]; - uint32_t RelocType = Obj.getAnyRelocationType(RE); - bool IsPCRel = Obj.getAnyRelocationPCRel(RE); - unsigned Size = Obj.getAnyRelocationLength(RE); - uint64_t Offset = RelI->getOffset(); - uint8_t *LocalAddress = Section.Address + Offset; - unsigned NumBytes = 1 << Size; - int64_t Addend = readBytesUnaligned(LocalAddress, NumBytes); - - unsigned SymbolBaseAddr = Obj.getScatteredRelocationValue(RE); - section_iterator TargetSI = getSectionByAddress(Obj, SymbolBaseAddr); - assert(TargetSI != Obj.section_end() && "Can't find section for symbol"); - uint64_t SectionBaseAddr = TargetSI->getAddress(); - SectionRef TargetSection = *TargetSI; - bool IsCode = TargetSection.isText(); - uint32_t TargetSectionID = - findOrEmitSection(Obj, TargetSection, IsCode, ObjSectionToID); - - Addend -= SectionBaseAddr; - RelocationEntry R(SectionID, Offset, RelocType, Addend, IsPCRel, Size); - - addRelocationForSection(R, TargetSectionID); - - return ++RelI; - } - // Populate stubs in __jump_table section. void populateJumpTable(const MachOObjectFile &Obj, const SectionRef &JTSection, unsigned JTSectionID) { diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h index dd56e72f9144..2242295bc1ee 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h @@ -39,6 +39,10 @@ public: static_cast(BaseObjT); MachO::any_relocation_info RelInfo = Obj.getRelocation(RelI->getRawDataRefImpl()); + uint32_t RelType = Obj.getAnyRelocationType(RelInfo); + + if (RelType == MachO::X86_64_RELOC_SUBTRACTOR) + return processSubtractRelocation(SectionID, RelI, Obj, ObjSectionToID); assert(!Obj.isRelocationScattered(RelInfo) && "Scattered relocations not supported on X86_64"); @@ -69,14 +73,14 @@ public: void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override { DEBUG(dumpRelocationToResolve(RE, Value)); const SectionEntry &Section = Sections[RE.SectionID]; - uint8_t *LocalAddress = Section.Address + RE.Offset; + uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset); // If the relocation is PC-relative, the value to be encoded is the // pointer difference. if (RE.IsPCRel) { // FIXME: It seems this value needs to be adjusted by 4 for an effective // PC address. Is that expected? Only for branches, perhaps? - uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset); Value -= FinalAddress + 4; } @@ -91,9 +95,17 @@ public: case MachO::X86_64_RELOC_BRANCH: writeBytesUnaligned(Value + RE.Addend, LocalAddress, 1 << RE.Size); break; + case MachO::X86_64_RELOC_SUBTRACTOR: { + uint64_t SectionABase = Sections[RE.Sections.SectionA].getLoadAddress(); + uint64_t SectionBBase = Sections[RE.Sections.SectionB].getLoadAddress(); + assert((Value == SectionABase || Value == SectionBBase) && + "Unexpected SUBTRACTOR relocation value."); + Value = SectionABase - SectionBBase + RE.Addend; + writeBytesUnaligned(Value, LocalAddress, 1 << RE.Size); + break; + } case MachO::X86_64_RELOC_GOT_LOAD: case MachO::X86_64_RELOC_GOT: - case MachO::X86_64_RELOC_SUBTRACTOR: case MachO::X86_64_RELOC_TLV: Error("Relocation type not implemented yet!"); } @@ -112,24 +124,65 @@ private: RuntimeDyldMachO::StubMap::const_iterator i = Stubs.find(Value); uint8_t *Addr; if (i != Stubs.end()) { - Addr = Section.Address + i->second; + Addr = Section.getAddressWithOffset(i->second); } else { - Stubs[Value] = Section.StubOffset; - uint8_t *GOTEntry = Section.Address + Section.StubOffset; - RelocationEntry GOTRE(RE.SectionID, Section.StubOffset, + Stubs[Value] = Section.getStubOffset(); + uint8_t *GOTEntry = Section.getAddressWithOffset(Section.getStubOffset()); + RelocationEntry GOTRE(RE.SectionID, Section.getStubOffset(), MachO::X86_64_RELOC_UNSIGNED, Value.Offset, false, 3); if (Value.SymbolName) addRelocationForSymbol(GOTRE, Value.SymbolName); else addRelocationForSection(GOTRE, Value.SectionID); - Section.StubOffset += 8; + Section.advanceStubOffset(8); Addr = GOTEntry; } RelocationEntry TargetRE(RE.SectionID, RE.Offset, MachO::X86_64_RELOC_UNSIGNED, RE.Addend, true, 2); resolveRelocation(TargetRE, (uint64_t)Addr); } + + relocation_iterator + processSubtractRelocation(unsigned SectionID, relocation_iterator RelI, + const ObjectFile &BaseObjT, + ObjSectionToIDMap &ObjSectionToID) { + const MachOObjectFile &Obj = + static_cast(BaseObjT); + MachO::any_relocation_info RE = + Obj.getRelocation(RelI->getRawDataRefImpl()); + + unsigned Size = Obj.getAnyRelocationLength(RE); + uint64_t Offset = RelI->getOffset(); + uint8_t *LocalAddress = Sections[SectionID].getAddressWithOffset(Offset); + unsigned NumBytes = 1 << Size; + + ErrorOr SubtrahendNameOrErr = RelI->getSymbol()->getName(); + if (auto EC = SubtrahendNameOrErr.getError()) + report_fatal_error(EC.message()); + auto SubtrahendI = GlobalSymbolTable.find(*SubtrahendNameOrErr); + unsigned SectionBID = SubtrahendI->second.getSectionID(); + uint64_t SectionBOffset = SubtrahendI->second.getOffset(); + int64_t Addend = + SignExtend64(readBytesUnaligned(LocalAddress, NumBytes), NumBytes * 8); + + ++RelI; + ErrorOr MinuendNameOrErr = RelI->getSymbol()->getName(); + if (auto EC = MinuendNameOrErr.getError()) + report_fatal_error(EC.message()); + auto MinuendI = GlobalSymbolTable.find(*MinuendNameOrErr); + unsigned SectionAID = MinuendI->second.getSectionID(); + uint64_t SectionAOffset = MinuendI->second.getOffset(); + + RelocationEntry R(SectionID, Offset, MachO::X86_64_RELOC_SUBTRACTOR, (uint64_t)Addend, + SectionAID, SectionAOffset, SectionBID, SectionBOffset, + false, Size); + + addRelocationForSection(R, SectionAID); + + return ++RelI; + } + }; } diff --git a/lib/ExecutionEngine/SectionMemoryManager.cpp b/lib/ExecutionEngine/SectionMemoryManager.cpp index 59860844e939..e2f220862cf7 100644 --- a/lib/ExecutionEngine/SectionMemoryManager.cpp +++ b/lib/ExecutionEngine/SectionMemoryManager.cpp @@ -15,6 +15,7 @@ #include "llvm/Config/config.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Process.h" namespace llvm { @@ -48,16 +49,27 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup, // Look in the list of free memory regions and use a block there if one // is available. - for (int i = 0, e = MemGroup.FreeMem.size(); i != e; ++i) { - sys::MemoryBlock &MB = MemGroup.FreeMem[i]; - if (MB.size() >= RequiredSize) { - Addr = (uintptr_t)MB.base(); - uintptr_t EndOfBlock = Addr + MB.size(); + for (FreeMemBlock &FreeMB : MemGroup.FreeMem) { + if (FreeMB.Free.size() >= RequiredSize) { + Addr = (uintptr_t)FreeMB.Free.base(); + uintptr_t EndOfBlock = Addr + FreeMB.Free.size(); // Align the address. Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1); - // Store cutted free memory block. - MemGroup.FreeMem[i] = sys::MemoryBlock((void*)(Addr + Size), - EndOfBlock - Addr - Size); + + if (FreeMB.PendingPrefixIndex == (unsigned)-1) { + // The part of the block we're giving out to the user is now pending + MemGroup.PendingMem.push_back(sys::MemoryBlock((void *)Addr, Size)); + + // Remember this pending block, such that future allocations can just + // modify it rather than creating a new one + FreeMB.PendingPrefixIndex = MemGroup.PendingMem.size() - 1; + } else { + sys::MemoryBlock &PendingMB = MemGroup.PendingMem[FreeMB.PendingPrefixIndex]; + PendingMB = sys::MemoryBlock(PendingMB.base(), Addr + Size - (uintptr_t)PendingMB.base()); + } + + // Remember how much free space is now left in this block + FreeMB.Free = sys::MemoryBlock((void *)(Addr + Size), EndOfBlock - Addr - Size); return (uint8_t*)Addr; } } @@ -85,6 +97,7 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup, // Save this address as the basis for our next request MemGroup.Near = MB; + // Remember that we allocated this memory MemGroup.AllocatedMem.push_back(MB); Addr = (uintptr_t)MB.base(); uintptr_t EndOfBlock = Addr + MB.size(); @@ -92,11 +105,18 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup, // Align the address. Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1); + // The part of the block we're giving out to the user is now pending + MemGroup.PendingMem.push_back(sys::MemoryBlock((void *)Addr, Size)); + // The allocateMappedMemory may allocate much more memory than we need. In // this case, we store the unused memory as a free memory block. unsigned FreeSize = EndOfBlock-Addr-Size; - if (FreeSize > 16) - MemGroup.FreeMem.push_back(sys::MemoryBlock((void*)(Addr + Size), FreeSize)); + if (FreeSize > 16) { + FreeMemBlock FreeMB; + FreeMB.Free = sys::MemoryBlock((void*)(Addr + Size), FreeSize); + FreeMB.PendingPrefixIndex = (unsigned)-1; + MemGroup.FreeMem.push_back(FreeMB); + } // Return aligned address return (uint8_t*)Addr; @@ -107,9 +127,6 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg) // FIXME: Should in-progress permissions be reverted if an error occurs? std::error_code ec; - // Don't allow free memory blocks to be used after setting protection flags. - CodeMem.FreeMem.clear(); - // Make code memory executable. ec = applyMemoryGroupPermissions(CodeMem, sys::Memory::MF_READ | sys::Memory::MF_EXEC); @@ -143,36 +160,62 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg) return false; } +static sys::MemoryBlock trimBlockToPageSize(sys::MemoryBlock M) { + static const size_t PageSize = sys::Process::getPageSize(); + + size_t StartOverlap = + (PageSize - ((uintptr_t)M.base() % PageSize)) % PageSize; + + size_t TrimmedSize = M.size(); + TrimmedSize -= StartOverlap; + TrimmedSize -= TrimmedSize % PageSize; + + sys::MemoryBlock Trimmed((void *)((uintptr_t)M.base() + StartOverlap), TrimmedSize); + + assert(((uintptr_t)Trimmed.base() % PageSize) == 0); + assert((Trimmed.size() % PageSize) == 0); + assert(M.base() <= Trimmed.base() && Trimmed.size() <= M.size()); + + return Trimmed; +} + + std::error_code SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup, unsigned Permissions) { + for (sys::MemoryBlock &MB : MemGroup.PendingMem) + if (std::error_code EC = sys::Memory::protectMappedMemory(MB, Permissions)) + return EC; - for (int i = 0, e = MemGroup.AllocatedMem.size(); i != e; ++i) { - std::error_code ec; - ec = - sys::Memory::protectMappedMemory(MemGroup.AllocatedMem[i], Permissions); - if (ec) { - return ec; - } + MemGroup.PendingMem.clear(); + + // Now go through free blocks and trim any of them that don't span the entire + // page because one of the pending blocks may have overlapped it. + for (FreeMemBlock &FreeMB : MemGroup.FreeMem) { + FreeMB.Free = trimBlockToPageSize(FreeMB.Free); + // We cleared the PendingMem list, so all these pointers are now invalid + FreeMB.PendingPrefixIndex = (unsigned)-1; } + // Remove all blocks which are now empty + MemGroup.FreeMem.erase( + std::remove_if(MemGroup.FreeMem.begin(), MemGroup.FreeMem.end(), + [](FreeMemBlock &FreeMB) { return FreeMB.Free.size() == 0; }), + MemGroup.FreeMem.end()); + return std::error_code(); } void SectionMemoryManager::invalidateInstructionCache() { - for (int i = 0, e = CodeMem.AllocatedMem.size(); i != e; ++i) - sys::Memory::InvalidateInstructionCache(CodeMem.AllocatedMem[i].base(), - CodeMem.AllocatedMem[i].size()); + for (sys::MemoryBlock &Block : CodeMem.PendingMem) + sys::Memory::InvalidateInstructionCache(Block.base(), Block.size()); } SectionMemoryManager::~SectionMemoryManager() { - for (unsigned i = 0, e = CodeMem.AllocatedMem.size(); i != e; ++i) - sys::Memory::releaseMappedMemory(CodeMem.AllocatedMem[i]); - for (unsigned i = 0, e = RWDataMem.AllocatedMem.size(); i != e; ++i) - sys::Memory::releaseMappedMemory(RWDataMem.AllocatedMem[i]); - for (unsigned i = 0, e = RODataMem.AllocatedMem.size(); i != e; ++i) - sys::Memory::releaseMappedMemory(RODataMem.AllocatedMem[i]); + for (MemoryGroup *Group : {&CodeMem, &RWDataMem, &RODataMem}) { + for (sys::MemoryBlock &Block : Group->AllocatedMem) + sys::Memory::releaseMappedMemory(Block); + } } } // namespace llvm - diff --git a/lib/Fuzzer/CMakeLists.txt b/lib/Fuzzer/CMakeLists.txt index 8b4d61905d00..d4d85041d218 100644 --- a/lib/Fuzzer/CMakeLists.txt +++ b/lib/Fuzzer/CMakeLists.txt @@ -17,10 +17,16 @@ if( LLVM_USE_SANITIZE_COVERAGE ) add_library(LLVMFuzzerNoMain STATIC $ ) + if( HAVE_LIBPTHREAD ) + target_link_libraries(LLVMFuzzerNoMain pthread) + endif() add_library(LLVMFuzzer STATIC FuzzerMain.cpp $ ) + if( HAVE_LIBPTHREAD ) + target_link_libraries(LLVMFuzzer pthread) + endif() if( LLVM_INCLUDE_TESTS ) add_subdirectory(test) diff --git a/lib/Fuzzer/FuzzerCrossOver.cpp b/lib/Fuzzer/FuzzerCrossOver.cpp index d93ce5cf4fb8..5203deaf9128 100644 --- a/lib/Fuzzer/FuzzerCrossOver.cpp +++ b/lib/Fuzzer/FuzzerCrossOver.cpp @@ -16,11 +16,11 @@ namespace fuzzer { // Cross Data1 and Data2, store the result (up to MaxOutSize bytes) in Out. -size_t CrossOver(const uint8_t *Data1, size_t Size1, - const uint8_t *Data2, size_t Size2, - uint8_t *Out, size_t MaxOutSize) { +size_t MutationDispatcher::CrossOver(const uint8_t *Data1, size_t Size1, + const uint8_t *Data2, size_t Size2, + uint8_t *Out, size_t MaxOutSize) { assert(Size1 || Size2); - MaxOutSize = rand() % MaxOutSize + 1; + MaxOutSize = Rand(MaxOutSize) + 1; size_t OutPos = 0; size_t Pos1 = 0; size_t Pos2 = 0; @@ -34,7 +34,7 @@ size_t CrossOver(const uint8_t *Data1, size_t Size1, if (*InPos < InSize) { size_t InSizeLeft = InSize - *InPos; size_t MaxExtraSize = std::min(OutSizeLeft, InSizeLeft); - size_t ExtraSize = rand() % MaxExtraSize + 1; + size_t ExtraSize = Rand(MaxExtraSize) + 1; memcpy(Out + OutPos, Data + *InPos, ExtraSize); OutPos += ExtraSize; (*InPos) += ExtraSize; diff --git a/lib/Fuzzer/FuzzerDFSan.h b/lib/Fuzzer/FuzzerDFSan.h new file mode 100644 index 000000000000..eb206ec61ce8 --- /dev/null +++ b/lib/Fuzzer/FuzzerDFSan.h @@ -0,0 +1,61 @@ +//===- FuzzerDFSan.h - Internal header for the Fuzzer -----------*- C++ -* ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// DFSan interface. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_DFSAN_H +#define LLVM_FUZZER_DFSAN_H + +#define LLVM_FUZZER_SUPPORTS_DFSAN 0 +#if defined(__has_include) +# if __has_include() +# if defined (__linux__) +# undef LLVM_FUZZER_SUPPORTS_DFSAN +# define LLVM_FUZZER_SUPPORTS_DFSAN 1 +# include +# endif // __linux__ +# endif +#endif // defined(__has_include) + +#if LLVM_FUZZER_SUPPORTS_DFSAN + +extern "C" { +__attribute__((weak)) +dfsan_label dfsan_create_label(const char *desc, void *userdata); +__attribute__((weak)) +void dfsan_set_label(dfsan_label label, void *addr, size_t size); +__attribute__((weak)) +void dfsan_add_label(dfsan_label label, void *addr, size_t size); +__attribute__((weak)) +const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label); +__attribute__((weak)) +dfsan_label dfsan_read_label(const void *addr, size_t size); +} // extern "C" + +namespace fuzzer { +static bool ReallyHaveDFSan() { + return &dfsan_create_label != nullptr; +} +} // namespace fuzzer +#else +// When compiling with a compiler which does not support dfsan, +// this code is still expected to build (but not necessary work). +typedef unsigned short dfsan_label; +struct dfsan_label_info { + dfsan_label l1, l2; + const char *desc; + void *userdata; +}; +namespace fuzzer { +static bool ReallyHaveDFSan() { return false; } +} // namespace fuzzer + +#endif + +#endif // LLVM_FUZZER_DFSAN_H diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp index 0ee08e1d1624..dc5f8babbfe6 100644 --- a/lib/Fuzzer/FuzzerDriver.cpp +++ b/lib/Fuzzer/FuzzerDriver.cpp @@ -32,35 +32,42 @@ struct FlagDescription { int Default; int *IntFlag; const char **StrFlag; + unsigned int *UIntFlag; }; struct { #define FUZZER_FLAG_INT(Name, Default, Description) int Name; +#define FUZZER_FLAG_UNSIGNED(Name, Default, Description) unsigned int Name; #define FUZZER_FLAG_STRING(Name, Description) const char *Name; #include "FuzzerFlags.def" #undef FUZZER_FLAG_INT +#undef FUZZER_FLAG_UNSIGNED #undef FUZZER_FLAG_STRING } Flags; -static FlagDescription FlagDescriptions [] { +static const FlagDescription FlagDescriptions [] { #define FUZZER_FLAG_INT(Name, Default, Description) \ - { #Name, Description, Default, &Flags.Name, nullptr}, + {#Name, Description, Default, &Flags.Name, nullptr, nullptr}, +#define FUZZER_FLAG_UNSIGNED(Name, Default, Description) \ + {#Name, Description, static_cast(Default), \ + nullptr, nullptr, &Flags.Name}, #define FUZZER_FLAG_STRING(Name, Description) \ - { #Name, Description, 0, nullptr, &Flags.Name }, + {#Name, Description, 0, nullptr, &Flags.Name, nullptr}, #include "FuzzerFlags.def" #undef FUZZER_FLAG_INT +#undef FUZZER_FLAG_UNSIGNED #undef FUZZER_FLAG_STRING }; static const size_t kNumFlags = sizeof(FlagDescriptions) / sizeof(FlagDescriptions[0]); -static std::vector inputs; -static const char *ProgName; +static std::vector *Inputs; +static std::string *ProgName; static void PrintHelp() { Printf("Usage: %s [-flag1=val1 [-flag2=val2 ...] ] [dir1 [dir2 ...] ]\n", - ProgName); + ProgName->c_str()); Printf("\nFlags: (strictly in form -flag=value)\n"); size_t MaxFlagLen = 0; for (size_t F = 0; F < kNumFlags; F++) @@ -106,6 +113,12 @@ static bool ParseOneFlag(const char *Param) { if (Flags.verbosity >= 2) Printf("Flag: %s %d\n", Name, Val);; return true; + } else if (FlagDescriptions[F].UIntFlag) { + unsigned int Val = std::stoul(Str); + *FlagDescriptions[F].UIntFlag = Val; + if (Flags.verbosity >= 2) + Printf("Flag: %s %u\n", Name, Val); + return true; } else if (FlagDescriptions[F].StrFlag) { *FlagDescriptions[F].StrFlag = Str; if (Flags.verbosity >= 2) @@ -119,16 +132,20 @@ static bool ParseOneFlag(const char *Param) { } // We don't use any library to minimize dependencies. -static void ParseFlags(int argc, char **argv) { +static void ParseFlags(const std::vector &Args) { for (size_t F = 0; F < kNumFlags; F++) { if (FlagDescriptions[F].IntFlag) *FlagDescriptions[F].IntFlag = FlagDescriptions[F].Default; + if (FlagDescriptions[F].UIntFlag) + *FlagDescriptions[F].UIntFlag = + static_cast(FlagDescriptions[F].Default); if (FlagDescriptions[F].StrFlag) *FlagDescriptions[F].StrFlag = nullptr; } - for (int A = 1; A < argc; A++) { - if (ParseOneFlag(argv[A])) continue; - inputs.push_back(argv[A]); + Inputs = new std::vector; + for (size_t A = 1; A < Args.size(); A++) { + if (ParseOneFlag(Args[A].c_str())) continue; + Inputs->push_back(Args[A]); } } @@ -151,7 +168,7 @@ static void WorkerThread(const std::string &Cmd, std::atomic *Counter, std::string ToRun = Cmd + " > " + Log + " 2>&1\n"; if (Flags.verbosity) Printf("%s", ToRun.c_str()); - int ExitCode = system(ToRun.c_str()); + int ExitCode = ExecuteCommand(ToRun.c_str()); if (ExitCode != 0) *HasErrors = true; std::lock_guard Lock(Mu); @@ -161,15 +178,15 @@ static void WorkerThread(const std::string &Cmd, std::atomic *Counter, } } -static int RunInMultipleProcesses(int argc, char **argv, int NumWorkers, - int NumJobs) { +static int RunInMultipleProcesses(const std::vector &Args, + int NumWorkers, int NumJobs) { std::atomic Counter(0); std::atomic HasErrors(false); std::string Cmd; - for (int i = 0; i < argc; i++) { - if (FlagValue(argv[i], "jobs") || FlagValue(argv[i], "workers")) continue; - Cmd += argv[i]; - Cmd += " "; + for (auto &S : Args) { + if (FlagValue(S.c_str(), "jobs") || FlagValue(S.c_str(), "workers")) + continue; + Cmd += S + " "; } std::vector V; std::thread Pulse(PulseThread); @@ -181,36 +198,37 @@ static int RunInMultipleProcesses(int argc, char **argv, int NumWorkers, return HasErrors ? 1 : 0; } -std::vector ReadTokensFile(const char *TokensFilePath) { - if (!TokensFilePath) return {}; - std::string TokensFileContents = FileToString(TokensFilePath); - std::istringstream ISS(TokensFileContents); - std::vector Res = {std::istream_iterator{ISS}, - std::istream_iterator{}}; - Res.push_back(" "); - Res.push_back("\t"); - Res.push_back("\n"); - return Res; -} - -int ApplyTokens(const Fuzzer &F, const char *InputFilePath) { +int RunOneTest(Fuzzer *F, const char *InputFilePath) { Unit U = FileToVector(InputFilePath); - auto T = F.SubstituteTokens(U); - T.push_back(0); - Printf("%s", T.data()); + Unit PreciseSizedU(U); + assert(PreciseSizedU.size() == PreciseSizedU.capacity()); + F->ExecuteCallback(PreciseSizedU); return 0; } int FuzzerDriver(int argc, char **argv, UserCallback Callback) { - SimpleUserSuppliedFuzzer SUSF(Callback); + FuzzerRandomLibc Rand(0); + SimpleUserSuppliedFuzzer SUSF(&Rand, Callback); return FuzzerDriver(argc, argv, SUSF); } int FuzzerDriver(int argc, char **argv, UserSuppliedFuzzer &USF) { - using namespace fuzzer; + std::vector Args(argv, argv + argc); + return FuzzerDriver(Args, USF); +} - ProgName = argv[0]; - ParseFlags(argc, argv); +int FuzzerDriver(const std::vector &Args, UserCallback Callback) { + FuzzerRandomLibc Rand(0); + SimpleUserSuppliedFuzzer SUSF(&Rand, Callback); + return FuzzerDriver(Args, SUSF); +} + +int FuzzerDriver(const std::vector &Args, + UserSuppliedFuzzer &USF) { + using namespace fuzzer; + assert(!Args.empty()); + ProgName = new std::string(Args[0]); + ParseFlags(Args); if (Flags.help) { PrintHelp(); return 0; @@ -223,33 +241,70 @@ int FuzzerDriver(int argc, char **argv, UserSuppliedFuzzer &USF) { } if (Flags.workers > 0 && Flags.jobs > 0) - return RunInMultipleProcesses(argc, argv, Flags.workers, Flags.jobs); + return RunInMultipleProcesses(Args, Flags.workers, Flags.jobs); Fuzzer::FuzzingOptions Options; Options.Verbosity = Flags.verbosity; Options.MaxLen = Flags.max_len; Options.UnitTimeoutSec = Flags.timeout; + Options.MaxTotalTimeSec = Flags.max_total_time; Options.DoCrossOver = Flags.cross_over; Options.MutateDepth = Flags.mutate_depth; Options.ExitOnFirst = Flags.exit_on_first; Options.UseCounters = Flags.use_counters; + Options.UseIndirCalls = Flags.use_indir_calls; Options.UseTraces = Flags.use_traces; - Options.UseFullCoverageSet = Flags.use_full_coverage_set; + Options.ShuffleAtStartUp = Flags.shuffle; Options.PreferSmallDuringInitialShuffle = Flags.prefer_small_during_initial_shuffle; - Options.Tokens = ReadTokensFile(Flags.tokens); Options.Reload = Flags.reload; + Options.OnlyASCII = Flags.only_ascii; + Options.TBMDepth = Flags.tbm_depth; + Options.TBMWidth = Flags.tbm_width; + Options.OutputCSV = Flags.output_csv; if (Flags.runs >= 0) Options.MaxNumberOfRuns = Flags.runs; - if (!inputs.empty()) - Options.OutputCorpus = inputs[0]; + if (!Inputs->empty()) + Options.OutputCorpus = (*Inputs)[0]; if (Flags.sync_command) Options.SyncCommand = Flags.sync_command; Options.SyncTimeout = Flags.sync_timeout; + Options.ReportSlowUnits = Flags.report_slow_units; + if (Flags.artifact_prefix) + Options.ArtifactPrefix = Flags.artifact_prefix; + if (Flags.exact_artifact_path) + Options.ExactArtifactPath = Flags.exact_artifact_path; + std::vector Dictionary; + if (Flags.dict) + if (!ParseDictionaryFile(FileToString(Flags.dict), &Dictionary)) + return 1; + if (Flags.verbosity > 0 && !Dictionary.empty()) + Printf("Dictionary: %zd entries\n", Dictionary.size()); + Options.SaveArtifacts = !Flags.test_single_input; + Fuzzer F(USF, Options); - if (Flags.apply_tokens) - return ApplyTokens(F, Flags.apply_tokens); + for (auto &U: Dictionary) + USF.GetMD().AddWordToDictionary(U.data(), U.size()); + + // Timer + if (Flags.timeout > 0) + SetTimer(Flags.timeout / 2 + 1); + + if (Flags.test_single_input) { + RunOneTest(&F, Flags.test_single_input); + exit(0); + } + + if (Flags.save_minimized_corpus) { + Printf("The flag -save_minimized_corpus is deprecated; use -merge=1\n"); + exit(1); + } + + if (Flags.merge) { + F.Merge(*Inputs); + exit(0); + } unsigned Seed = Flags.seed; // Initialize Seed. @@ -257,35 +312,26 @@ int FuzzerDriver(int argc, char **argv, UserSuppliedFuzzer &USF) { Seed = time(0) * 10000 + getpid(); if (Flags.verbosity) Printf("Seed: %u\n", Seed); - srand(Seed); - - // Timer - if (Flags.timeout > 0) - SetTimer(Flags.timeout / 2 + 1); - - if (Flags.verbosity >= 2) { - Printf("Tokens: {"); - for (auto &T : Options.Tokens) - Printf("%s,", T.c_str()); - Printf("}\n"); - } + USF.GetRand().ResetSeed(Seed); F.RereadOutputCorpus(); - for (auto &inp : inputs) + for (auto &inp : *Inputs) if (inp != Options.OutputCorpus) F.ReadDir(inp, nullptr); if (F.CorpusSize() == 0) F.AddToCorpus(Unit()); // Can't fuzz empty corpus, so add an empty input. F.ShuffleAndMinimize(); - if (Flags.save_minimized_corpus) - F.SaveCorpus(); - F.Loop(Flags.iterations < 0 ? INT_MAX : Flags.iterations); + if (Flags.drill) + F.Drill(); + else + F.Loop(); + if (Flags.verbosity) Printf("Done %d runs in %zd second(s)\n", F.getTotalNumberOfRuns(), F.secondsSinceProcessStartUp()); - return 0; + exit(0); // Don't let F destroy itself. } } // namespace fuzzer diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def index 742f672e2012..c2b506c3c8aa 100644 --- a/lib/Fuzzer/FuzzerFlags.def +++ b/lib/Fuzzer/FuzzerFlags.def @@ -11,16 +11,14 @@ // portability and independence. //===----------------------------------------------------------------------===// FUZZER_FLAG_INT(verbosity, 1, "Verbosity level.") -FUZZER_FLAG_INT(seed, 0, "Random seed. If 0, seed is generated.") -FUZZER_FLAG_INT(iterations, -1, - "Number of iterations of the fuzzer internal loop" - " (-1 for infinite iterations).") +FUZZER_FLAG_UNSIGNED(seed, 0, "Random seed. If 0, seed is generated.") FUZZER_FLAG_INT(runs, -1, "Number of individual test runs (-1 for infinite runs).") FUZZER_FLAG_INT(max_len, 64, "Maximum length of the test input.") FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.") FUZZER_FLAG_INT(mutate_depth, 5, "Apply this number of consecutive mutations to each input.") +FUZZER_FLAG_INT(shuffle, 1, "Shuffle inputs at startup") FUZZER_FLAG_INT( prefer_small_during_initial_shuffle, -1, "If 1, always prefer smaller inputs during the initial corpus shuffle." @@ -31,16 +29,15 @@ FUZZER_FLAG_INT( timeout, 1200, "Timeout in seconds (if positive). " "If one unit runs more than this number of seconds the process will abort.") +FUZZER_FLAG_INT(max_total_time, 0, "If positive, indicates the maximal total " + "time in seconds to run the fuzzer.") FUZZER_FLAG_INT(help, 0, "Print help.") -FUZZER_FLAG_INT( - save_minimized_corpus, 0, - "If 1, the minimized corpus is saved into the first input directory") +FUZZER_FLAG_INT(save_minimized_corpus, 0, "Deprecated. Use -merge=1") +FUZZER_FLAG_INT(merge, 0, "If 1, the 2-nd, 3-rd, etc corpora will be " + "merged into the 1-st corpus. Only interesting units will be taken.") FUZZER_FLAG_INT(use_counters, 1, "Use coverage counters") +FUZZER_FLAG_INT(use_indir_calls, 1, "Use indirect caller-callee counters") FUZZER_FLAG_INT(use_traces, 0, "Experimental: use instruction traces") -FUZZER_FLAG_INT(use_full_coverage_set, 0, - "Experimental: Maximize the number of different full" - " coverage sets as opposed to maximizing the total coverage." - " This is potentially MUCH slower, but may discover more paths.") FUZZER_FLAG_INT(jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn" " this number of jobs in separate worker processes" " with stdout/stderr redirected to fuzz-JOB.log.") @@ -49,12 +46,29 @@ FUZZER_FLAG_INT(workers, 0, " If zero, \"min(jobs,NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload, 1, "Reload the main corpus periodically to get new units" - "discovered by other processes.") -FUZZER_FLAG_STRING(tokens, "Use the file with tokens (one token per line) to" - " fuzz a token based input language.") -FUZZER_FLAG_STRING(apply_tokens, "Read the given input file, substitute bytes " - " with tokens and write the result to stdout.") + " discovered by other processes.") FUZZER_FLAG_STRING(sync_command, "Execute an external command " "\" \" " "to synchronize the test corpus.") FUZZER_FLAG_INT(sync_timeout, 600, "Minimum timeout between syncs.") +FUZZER_FLAG_INT(report_slow_units, 10, + "Report slowest units if they run for more than this number of seconds.") +FUZZER_FLAG_INT(only_ascii, 0, + "If 1, generate only ASCII (isprint+isspace) inputs.") +FUZZER_FLAG_STRING(dict, "Experimental. Use the dictionary file.") +FUZZER_FLAG_INT(tbm_depth, 5, "Apply at most this number of consecutive" + "trace-based-mutations (tbm).") +FUZZER_FLAG_INT(tbm_width, 5, "Apply at most this number of independent" + "trace-based-mutations (tbm)") +FUZZER_FLAG_STRING(test_single_input, "Use specified file as test input.") +FUZZER_FLAG_STRING(artifact_prefix, "Write fuzzing artifacts (crash, " + "timeout, or slow inputs) as " + "$(artifact_prefix)file") +FUZZER_FLAG_STRING(exact_artifact_path, + "Write the single artifact on failure (crash, timeout) " + "as $(exact_artifact_path). This overrides -artifact_prefix " + "and will not use checksum in the file name. Do not " + "use the same path for several parallel processes.") +FUZZER_FLAG_INT(drill, 0, "Experimental: fuzz using a single unit as the seed " + "corpus, then merge with the initial corpus") +FUZZER_FLAG_INT(output_csv, 0, "Enable pulse output in CSV format.") diff --git a/lib/Fuzzer/FuzzerIO.cpp b/lib/Fuzzer/FuzzerIO.cpp index 85703c81841c..043fad396d51 100644 --- a/lib/Fuzzer/FuzzerIO.cpp +++ b/lib/Fuzzer/FuzzerIO.cpp @@ -15,13 +15,15 @@ #include #include #include +#include #include namespace fuzzer { static long GetEpoch(const std::string &Path) { struct stat St; - if (stat(Path.c_str(), &St)) return 0; + if (stat(Path.c_str(), &St)) + return 0; // Can't stat, be conservative. return St.st_mtime; } @@ -29,12 +31,15 @@ static std::vector ListFilesInDir(const std::string &Dir, long *Epoch) { std::vector V; if (Epoch) { - auto E = GetEpoch(Dir.c_str()); + auto E = GetEpoch(Dir); if (*Epoch >= E) return V; *Epoch = E; } DIR *D = opendir(Dir.c_str()); - if (!D) return V; + if (!D) { + Printf("No such directory: %s; exiting\n", Dir.c_str()); + exit(1); + } while (auto E = readdir(D)) { if (E->d_type == DT_REG || E->d_type == DT_LNK) V.push_back(E->d_name); @@ -45,6 +50,10 @@ static std::vector ListFilesInDir(const std::string &Dir, Unit FileToVector(const std::string &Path) { std::ifstream T(Path); + if (!T) { + Printf("No such directory: %s; exiting\n", Path.c_str()); + exit(1); + } return Unit((std::istreambuf_iterator(T)), std::istreambuf_iterator()); } @@ -60,8 +69,11 @@ void CopyFileToErr(const std::string &Path) { } void WriteToFile(const Unit &U, const std::string &Path) { - std::ofstream OF(Path); - OF.write((const char*)U.data(), U.size()); + // Use raw C interface because this function may be called from a sig handler. + FILE *Out = fopen(Path.c_str(), "w"); + if (!Out) return; + fwrite(U.data(), sizeof(U[0]), U.size(), Out); + fclose(Out); } void ReadDirToVectorOfUnits(const char *Path, std::vector *V, @@ -79,11 +91,6 @@ std::string DirPlusFile(const std::string &DirPath, return DirPath + "/" + FileName; } -void PrintFileAsBase64(const std::string &Path) { - std::string Cmd = "base64 -w 0 < " + Path + "; echo"; - ExecuteCommand(Cmd); -} - void Printf(const char *Fmt, ...) { va_list ap; va_start(ap, Fmt); diff --git a/lib/Fuzzer/FuzzerInterface.cpp b/lib/Fuzzer/FuzzerInterface.cpp index dcd4e746013c..bcd726fc08e4 100644 --- a/lib/Fuzzer/FuzzerInterface.cpp +++ b/lib/Fuzzer/FuzzerInterface.cpp @@ -14,14 +14,17 @@ #include "FuzzerInternal.h" namespace fuzzer { -size_t UserSuppliedFuzzer::BasicMutate(uint8_t *Data, size_t Size, - size_t MaxSize) { - return ::fuzzer::Mutate(Data, Size, MaxSize); -} -size_t UserSuppliedFuzzer::BasicCrossOver(const uint8_t *Data1, size_t Size1, - const uint8_t *Data2, size_t Size2, - uint8_t *Out, size_t MaxOutSize) { - return ::fuzzer::CrossOver(Data1, Size1, Data2, Size2, Out, MaxOutSize); + +void FuzzerRandomLibc::ResetSeed(unsigned int seed) { srand(seed); } + +size_t FuzzerRandomLibc::Rand() { return rand(); } + +UserSuppliedFuzzer::UserSuppliedFuzzer(FuzzerRandomBase *Rand) + : Rand(Rand), MD(*Rand) {} + +UserSuppliedFuzzer::~UserSuppliedFuzzer() { + if (OwnRand) + delete Rand; } } // namespace fuzzer. diff --git a/lib/Fuzzer/FuzzerInterface.h b/lib/Fuzzer/FuzzerInterface.h index 3fd807afcfeb..65f1707ba922 100644 --- a/lib/Fuzzer/FuzzerInterface.h +++ b/lib/Fuzzer/FuzzerInterface.h @@ -18,10 +18,14 @@ #include #include +#include +#include namespace fuzzer { +typedef std::vector Unit; -typedef void (*UserCallback)(const uint8_t *Data, size_t Size); +/// Returns an int 0. Values other than zero are reserved for future. +typedef int (*UserCallback)(const uint8_t *Data, size_t Size); /** Simple C-like interface with a single user-supplied callback. Usage: @@ -29,8 +33,9 @@ Usage: #\code #include "FuzzerInterface.h" -void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { DoStuffWithData(Data, Size); + return 0; } // Implement your own main() or use the one from FuzzerMain.cpp. @@ -42,6 +47,79 @@ int main(int argc, char **argv) { */ int FuzzerDriver(int argc, char **argv, UserCallback Callback); +class FuzzerRandomBase { + public: + FuzzerRandomBase(){} + virtual ~FuzzerRandomBase(){}; + virtual void ResetSeed(unsigned int seed) = 0; + // Return a random number. + virtual size_t Rand() = 0; + // Return a random number in range [0,n). + size_t operator()(size_t n) { return n ? Rand() % n : 0; } + bool RandBool() { return Rand() % 2; } +}; + +class FuzzerRandomLibc : public FuzzerRandomBase { + public: + FuzzerRandomLibc(unsigned int seed) { ResetSeed(seed); } + void ResetSeed(unsigned int seed) override; + ~FuzzerRandomLibc() override {} + size_t Rand() override; +}; + +class MutationDispatcher { + public: + MutationDispatcher(FuzzerRandomBase &Rand); + ~MutationDispatcher(); + /// Indicate that we are about to start a new sequence of mutations. + void StartMutationSequence(); + /// Print the current sequence of mutations. + void PrintMutationSequence(); + /// Mutates data by shuffling bytes. + size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by erasing a byte. + size_t Mutate_EraseByte(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by inserting a byte. + size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by chanding one byte. + size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize); + /// Mutates data by chanding one bit. + size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Mutates data by adding a word from the dictionary. + size_t Mutate_AddWordFromDictionary(uint8_t *Data, size_t Size, + size_t MaxSize); + + /// Tries to find an ASCII integer in Data, changes it to another ASCII int. + size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize); + + /// CrossOver Data with some other element of the corpus. + size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Applies one of the above mutations. + /// Returns the new size of data which could be up to MaxSize. + size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize); + + /// Creates a cross-over of two pieces of Data, returns its size. + size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2, + size_t Size2, uint8_t *Out, size_t MaxOutSize); + + void AddWordToDictionary(const uint8_t *Word, size_t Size); + void SetCorpus(const std::vector *Corpus); + + private: + FuzzerRandomBase &Rand; + struct Impl; + Impl *MDImpl; +}; + +// For backward compatibility only, deprecated. +static inline size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize, + FuzzerRandomBase &Rand) { + MutationDispatcher MD(Rand); + return MD.Mutate(Data, Size, MaxSize); +} + /** An abstract class that allows to use user-supplied mutators with libFuzzer. Usage: @@ -50,8 +128,9 @@ Usage: #include "FuzzerInterface.h" class MyFuzzer : public fuzzer::UserSuppliedFuzzer { public: + MyFuzzer(fuzzer::FuzzerRandomBase *Rand); // Must define the target function. - void TargetFunction(...) { ... } + int TargetFunction(...) { ...; return 0; } // Optionally define the mutator. size_t Mutate(...) { ... } // Optionally define the CrossOver method. @@ -66,33 +145,45 @@ int main(int argc, char **argv) { */ class UserSuppliedFuzzer { public: + UserSuppliedFuzzer(FuzzerRandomBase *Rand); /// Executes the target function on 'Size' bytes of 'Data'. - virtual void TargetFunction(const uint8_t *Data, size_t Size) = 0; + virtual int TargetFunction(const uint8_t *Data, size_t Size) = 0; + virtual void StartMutationSequence() { MD.StartMutationSequence(); } + virtual void PrintMutationSequence() { MD.PrintMutationSequence(); } + virtual void SetCorpus(const std::vector *Corpus) { + MD.SetCorpus(Corpus); + } /// Mutates 'Size' bytes of data in 'Data' inplace into up to 'MaxSize' bytes, /// returns the new size of the data, which should be positive. virtual size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize) { - return BasicMutate(Data, Size, MaxSize); + return MD.Mutate(Data, Size, MaxSize); } /// Crosses 'Data1' and 'Data2', writes up to 'MaxOutSize' bytes into Out, /// returns the number of bytes written, which should be positive. virtual size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2, size_t Size2, uint8_t *Out, size_t MaxOutSize) { - return BasicCrossOver(Data1, Size1, Data2, Size2, Out, MaxOutSize); + return MD.CrossOver(Data1, Size1, Data2, Size2, Out, MaxOutSize); } - virtual ~UserSuppliedFuzzer() {} + virtual ~UserSuppliedFuzzer(); - protected: - /// These can be called internally by Mutate and CrossOver. - size_t BasicMutate(uint8_t *Data, size_t Size, size_t MaxSize); - size_t BasicCrossOver(const uint8_t *Data1, size_t Size1, - const uint8_t *Data2, size_t Size2, - uint8_t *Out, size_t MaxOutSize); + FuzzerRandomBase &GetRand() { return *Rand; } + + MutationDispatcher &GetMD() { return MD; } + + private: + bool OwnRand = false; + FuzzerRandomBase *Rand; + MutationDispatcher MD; }; /// Runs the fuzzing with the UserSuppliedFuzzer. int FuzzerDriver(int argc, char **argv, UserSuppliedFuzzer &USF); +/// More C++-ish interface. +int FuzzerDriver(const std::vector &Args, UserSuppliedFuzzer &USF); +int FuzzerDriver(const std::vector &Args, UserCallback Callback); + } // namespace fuzzer #endif // LLVM_FUZZER_INTERFACE_H diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h index c387fe7a7c60..e96a4bc35fe2 100644 --- a/lib/Fuzzer/FuzzerInternal.h +++ b/lib/Fuzzer/FuzzerInternal.h @@ -8,6 +8,10 @@ //===----------------------------------------------------------------------===// // Define the main class fuzzer::Fuzzer and most functions. //===----------------------------------------------------------------------===// + +#ifndef LLVM_FUZZER_INTERNAL_H +#define LLVM_FUZZER_INTERNAL_H + #include #include #include @@ -20,7 +24,6 @@ #include "FuzzerInterface.h" namespace fuzzer { -typedef std::vector Unit; using namespace std::chrono; std::string FileToString(const std::string &Path); @@ -33,25 +36,36 @@ void CopyFileToErr(const std::string &Path); std::string DirPlusFile(const std::string &DirPath, const std::string &FileName); -size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize); - -size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2, - size_t Size2, uint8_t *Out, size_t MaxOutSize); - void Printf(const char *Fmt, ...); void Print(const Unit &U, const char *PrintAfter = ""); void PrintASCII(const Unit &U, const char *PrintAfter = ""); std::string Hash(const Unit &U); void SetTimer(int Seconds); -void PrintFileAsBase64(const std::string &Path); -void ExecuteCommand(const std::string &Command); +std::string Base64(const Unit &U); +int ExecuteCommand(const std::string &Command); // Private copy of SHA1 implementation. static const int kSHA1NumBytes = 20; // Computes SHA1 hash of 'Len' bytes in 'Data', writes kSHA1NumBytes to 'Out'. void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out); +// Changes U to contain only ASCII (isprint+isspace) characters. +// Returns true iff U has been changed. +bool ToASCII(Unit &U); +bool IsASCII(const Unit &U); + int NumberOfCpuCores(); +int GetPid(); + +// Dictionary. + +// Parses one dictionary entry. +// If successfull, write the enty to Unit and returns true, +// otherwise returns false. +bool ParseOneDictionaryEntry(const std::string &Str, Unit *U); +// Parses the dictionary file, fills Units, returns true iff all lines +// were parsed succesfully. +bool ParseDictionaryFile(const std::string &Text, std::vector *Units); class Fuzzer { public: @@ -59,27 +73,42 @@ class Fuzzer { int Verbosity = 1; int MaxLen = 0; int UnitTimeoutSec = 300; + int MaxTotalTimeSec = 0; bool DoCrossOver = true; int MutateDepth = 5; bool ExitOnFirst = false; bool UseCounters = false; + bool UseIndirCalls = true; bool UseTraces = false; bool UseFullCoverageSet = false; bool Reload = true; + bool ShuffleAtStartUp = true; int PreferSmallDuringInitialShuffle = -1; size_t MaxNumberOfRuns = ULONG_MAX; int SyncTimeout = 600; + int ReportSlowUnits = 10; + bool OnlyASCII = false; + int TBMDepth = 10; + int TBMWidth = 10; std::string OutputCorpus; std::string SyncCommand; - std::vector Tokens; + std::string ArtifactPrefix = "./"; + std::string ExactArtifactPath; + bool SaveArtifacts = true; + bool PrintNEW = true; // Print a status line when new units are found; + bool OutputCSV = false; }; Fuzzer(UserSuppliedFuzzer &USF, FuzzingOptions Options); void AddToCorpus(const Unit &U) { Corpus.push_back(U); } - void Loop(size_t NumIterations); + size_t ChooseUnitIdxToMutate(); + const Unit &ChooseUnitToMutate() { return Corpus[ChooseUnitIdxToMutate()]; }; + void Loop(); + void Drill(); void ShuffleAndMinimize(); void InitializeTraceState(); size_t CorpusSize() const { return Corpus.size(); } void ReadDir(const std::string &Path, long *Epoch) { + Printf("Loading corpus: %s\n", Path.c_str()); ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch); } void RereadOutputCorpus(); @@ -95,25 +124,31 @@ class Fuzzer { static void StaticAlarmCallback(); - Unit SubstituteTokens(const Unit &U) const; + void ExecuteCallback(const Unit &U); + + // Merge Corpora[1:] into Corpora[0]. + void Merge(const std::vector &Corpora); private: void AlarmCallback(); - void ExecuteCallback(const Unit &U); - void MutateAndTestOne(Unit *U); - void ReportNewCoverage(size_t NewCoverage, const Unit &U); - size_t RunOne(const Unit &U); - void RunOneAndUpdateCorpus(const Unit &U); - size_t RunOneMaximizeTotalCoverage(const Unit &U); - size_t RunOneMaximizeFullCoverageSet(const Unit &U); - size_t RunOneMaximizeCoveragePairs(const Unit &U); + void MutateAndTestOne(); + void ReportNewCoverage(const Unit &U); + bool RunOne(const Unit &U); + void RunOneAndUpdateCorpus(Unit &U); void WriteToOutputCorpus(const Unit &U); - void WriteToCrash(const Unit &U, const char *Prefix); - void PrintStats(const char *Where, size_t Cov, const char *End = "\n"); - void PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter = ""); + void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); + void PrintStats(const char *Where, const char *End = "\n"); + void PrintStatusForNewUnit(const Unit &U); + void PrintUnitInASCII(const Unit &U, const char *PrintAfter = ""); void SyncCorpus(); + size_t RecordBlockCoverage(); + size_t RecordCallerCalleeCoverage(); + void PrepareCoverageBeforeRun(); + bool CheckCoverageAfterRun(); + + // Trace-based fuzzing: we run a unit with some kind of tracing // enabled and record potentially useful mutations. Then // We apply these mutations one by one to the unit and run it again. @@ -131,10 +166,10 @@ class Fuzzer { Unit CurrentUnit; size_t TotalNumberOfRuns = 0; + size_t TotalNumberOfExecutedTraceBasedMutations = 0; std::vector Corpus; std::unordered_set UnitHashesAddedToCorpus; - std::unordered_set FullCoverageSets; // For UseCounters std::vector CounterBitmap; @@ -151,17 +186,23 @@ class Fuzzer { system_clock::time_point UnitStartTime; long TimeOfLongestUnitInSeconds = 0; long EpochOfLastReadOfOutputCorpus = 0; + size_t LastRecordedBlockCoverage = 0; + size_t LastRecordedCallerCalleeCoverage = 0; }; class SimpleUserSuppliedFuzzer: public UserSuppliedFuzzer { public: - SimpleUserSuppliedFuzzer(UserCallback Callback) : Callback(Callback) {} - virtual void TargetFunction(const uint8_t *Data, size_t Size) { + SimpleUserSuppliedFuzzer(FuzzerRandomBase *Rand, UserCallback Callback) + : UserSuppliedFuzzer(Rand), Callback(Callback) {} + + virtual int TargetFunction(const uint8_t *Data, size_t Size) override { return Callback(Data, Size); } private: - UserCallback Callback; + UserCallback Callback = nullptr; }; }; // namespace fuzzer + +#endif // LLVM_FUZZER_INTERNAL_H diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp index 9ef47583cbb9..7ea82f4f15dd 100644 --- a/lib/Fuzzer/FuzzerLoop.cpp +++ b/lib/Fuzzer/FuzzerLoop.cpp @@ -10,10 +10,43 @@ //===----------------------------------------------------------------------===// #include "FuzzerInternal.h" -#include #include +#if defined(__has_include) +# if __has_include() +# include +# endif +#endif + +extern "C" { +// Re-declare some of the sanitizer functions as "weak" so that +// libFuzzer can be linked w/o the sanitizers and sanitizer-coverage +// (in which case it will complain at start-up time). +__attribute__((weak)) void __sanitizer_print_stack_trace(); +__attribute__((weak)) void __sanitizer_reset_coverage(); +__attribute__((weak)) size_t __sanitizer_get_total_unique_caller_callee_pairs(); +__attribute__((weak)) size_t __sanitizer_get_total_unique_coverage(); +__attribute__((weak)) +void __sanitizer_set_death_callback(void (*callback)(void)); +__attribute__((weak)) size_t __sanitizer_get_number_of_counters(); +__attribute__((weak)) +uintptr_t __sanitizer_update_counter_bitset_and_clear_counters(uint8_t *bitset); +} + namespace fuzzer { +static const size_t kMaxUnitSizeToPrint = 256; + +static void MissingWeakApiFunction(const char *FnName) { + Printf("ERROR: %s is not defined. Exiting.\n" + "Did you use -fsanitize-coverage=... to build your code?\n", FnName); + exit(1); +} + +#define CHECK_WEAK_API_FUNCTION(fn) \ + do { \ + if (!fn) \ + MissingWeakApiFunction(#fn); \ + } while (false) // Only one Fuzzer per process. static Fuzzer *F; @@ -27,17 +60,12 @@ Fuzzer::Fuzzer(UserSuppliedFuzzer &USF, FuzzingOptions Options) } void Fuzzer::SetDeathCallback() { + CHECK_WEAK_API_FUNCTION(__sanitizer_set_death_callback); __sanitizer_set_death_callback(StaticDeathCallback); } -void Fuzzer::PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter) { - if (Options.Tokens.empty()) { - PrintASCII(U, PrintAfter); - } else { - auto T = SubstituteTokens(U); - T.push_back(0); - Printf("%s%s", T.data(), PrintAfter); - } +void Fuzzer::PrintUnitInASCII(const Unit &U, const char *PrintAfter) { + PrintASCII(U, PrintAfter); } void Fuzzer::StaticDeathCallback() { @@ -47,9 +75,11 @@ void Fuzzer::StaticDeathCallback() { void Fuzzer::DeathCallback() { Printf("DEATH:\n"); - Print(CurrentUnit, "\n"); - PrintUnitInASCIIOrTokens(CurrentUnit, "\n"); - WriteToCrash(CurrentUnit, "crash-"); + if (CurrentUnit.size() <= kMaxUnitSizeToPrint) { + Print(CurrentUnit, "\n"); + PrintUnitInASCII(CurrentUnit, "\n"); + } + WriteUnitToFileWithPrefix(CurrentUnit, "crash-"); } void Fuzzer::StaticAlarmCallback() { @@ -68,19 +98,49 @@ void Fuzzer::AlarmCallback() { Printf("ALARM: working on the last Unit for %zd seconds\n", Seconds); Printf(" and the timeout value is %d (use -timeout=N to change)\n", Options.UnitTimeoutSec); - Print(CurrentUnit, "\n"); - PrintUnitInASCIIOrTokens(CurrentUnit, "\n"); - WriteToCrash(CurrentUnit, "timeout-"); + if (CurrentUnit.size() <= kMaxUnitSizeToPrint) { + Print(CurrentUnit, "\n"); + PrintUnitInASCII(CurrentUnit, "\n"); + } + WriteUnitToFileWithPrefix(CurrentUnit, "timeout-"); + Printf("==%d== ERROR: libFuzzer: timeout after %d seconds\n", GetPid(), + Seconds); + if (__sanitizer_print_stack_trace) + __sanitizer_print_stack_trace(); + Printf("SUMMARY: libFuzzer: timeout\n"); exit(1); } } -void Fuzzer::PrintStats(const char *Where, size_t Cov, const char *End) { - if (!Options.Verbosity) return; +void Fuzzer::PrintStats(const char *Where, const char *End) { size_t Seconds = secondsSinceProcessStartUp(); size_t ExecPerSec = (Seconds ? TotalNumberOfRuns / Seconds : 0); - Printf("#%zd\t%s cov %zd bits %zd units %zd exec/s %zd %s", TotalNumberOfRuns, - Where, Cov, TotalBits(), Corpus.size(), ExecPerSec, End); + + if (Options.OutputCSV) { + static bool csvHeaderPrinted = false; + if (!csvHeaderPrinted) { + csvHeaderPrinted = true; + Printf("runs,block_cov,bits,cc_cov,corpus,execs_per_sec,tbms,reason\n"); + } + Printf("%zd,%zd,%zd,%zd,%zd,%zd,%zd,%s\n", TotalNumberOfRuns, + LastRecordedBlockCoverage, TotalBits(), + LastRecordedCallerCalleeCoverage, Corpus.size(), ExecPerSec, + TotalNumberOfExecutedTraceBasedMutations, Where); + } + + if (!Options.Verbosity) + return; + Printf("#%zd\t%s", TotalNumberOfRuns, Where); + if (LastRecordedBlockCoverage) + Printf(" cov: %zd", LastRecordedBlockCoverage); + if (auto TB = TotalBits()) + Printf(" bits: %zd", TB); + if (LastRecordedCallerCalleeCoverage) + Printf(" indir: %zd", LastRecordedCallerCalleeCoverage); + Printf(" units: %zd exec/s: %zd", Corpus.size(), ExecPerSec); + if (TotalNumberOfExecutedTraceBasedMutations) + Printf(" tbm: %zd", TotalNumberOfExecutedTraceBasedMutations); + Printf("%s", End); } void Fuzzer::RereadOutputCorpus() { @@ -101,143 +161,127 @@ void Fuzzer::RereadOutputCorpus() { if (UnitHashesAddedToCorpus.insert(Hash(X)).second) { CurrentUnit.clear(); CurrentUnit.insert(CurrentUnit.begin(), X.begin(), X.end()); - size_t NewCoverage = RunOne(CurrentUnit); - if (NewCoverage) { + if (RunOne(CurrentUnit)) { Corpus.push_back(X); - if (Options.Verbosity >= 1) - PrintStats("RELOAD", NewCoverage); + PrintStats("RELOAD"); } } } } void Fuzzer::ShuffleAndMinimize() { - size_t MaxCov = 0; - bool PreferSmall = - (Options.PreferSmallDuringInitialShuffle == 1 || - (Options.PreferSmallDuringInitialShuffle == -1 && rand() % 2)); + bool PreferSmall = (Options.PreferSmallDuringInitialShuffle == 1 || + (Options.PreferSmallDuringInitialShuffle == -1 && + USF.GetRand().RandBool())); if (Options.Verbosity) Printf("PreferSmall: %d\n", PreferSmall); - PrintStats("READ ", 0); + PrintStats("READ "); std::vector NewCorpus; - std::random_shuffle(Corpus.begin(), Corpus.end()); - if (PreferSmall) - std::stable_sort( - Corpus.begin(), Corpus.end(), - [](const Unit &A, const Unit &B) { return A.size() < B.size(); }); + if (Options.ShuffleAtStartUp) { + std::random_shuffle(Corpus.begin(), Corpus.end(), USF.GetRand()); + if (PreferSmall) + std::stable_sort( + Corpus.begin(), Corpus.end(), + [](const Unit &A, const Unit &B) { return A.size() < B.size(); }); + } Unit &U = CurrentUnit; for (const auto &C : Corpus) { for (size_t First = 0; First < 1; First++) { U.clear(); size_t Last = std::min(First + Options.MaxLen, C.size()); U.insert(U.begin(), C.begin() + First, C.begin() + Last); - size_t NewCoverage = RunOne(U); - if (NewCoverage) { - MaxCov = NewCoverage; + if (Options.OnlyASCII) + ToASCII(U); + if (RunOne(U)) { NewCorpus.push_back(U); if (Options.Verbosity >= 2) - Printf("NEW0: %zd L %zd\n", NewCoverage, U.size()); + Printf("NEW0: %zd L %zd\n", LastRecordedBlockCoverage, U.size()); } } } Corpus = NewCorpus; for (auto &X : Corpus) UnitHashesAddedToCorpus.insert(Hash(X)); - PrintStats("INITED", MaxCov); + PrintStats("INITED"); } -size_t Fuzzer::RunOne(const Unit &U) { +bool Fuzzer::RunOne(const Unit &U) { UnitStartTime = system_clock::now(); TotalNumberOfRuns++; - size_t Res = 0; - if (Options.UseFullCoverageSet) - Res = RunOneMaximizeFullCoverageSet(U); - else - Res = RunOneMaximizeTotalCoverage(U); + + PrepareCoverageBeforeRun(); + ExecuteCallback(U); + bool Res = CheckCoverageAfterRun(); + auto UnitStopTime = system_clock::now(); auto TimeOfUnit = duration_cast(UnitStopTime - UnitStartTime).count(); - if (TimeOfUnit > TimeOfLongestUnitInSeconds) { + if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && + secondsSinceProcessStartUp() >= 2) + PrintStats("pulse "); + if (TimeOfUnit > TimeOfLongestUnitInSeconds && + TimeOfUnit >= Options.ReportSlowUnits) { TimeOfLongestUnitInSeconds = TimeOfUnit; - Printf("Longest unit: %zd s:\n", TimeOfLongestUnitInSeconds); - Print(U, "\n"); + Printf("Slowest unit: %zd s:\n", TimeOfLongestUnitInSeconds); + WriteUnitToFileWithPrefix(U, "slow-unit-"); } return Res; } -void Fuzzer::RunOneAndUpdateCorpus(const Unit &U) { +void Fuzzer::RunOneAndUpdateCorpus(Unit &U) { if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) return; - ReportNewCoverage(RunOne(U), U); -} - -static uintptr_t HashOfArrayOfPCs(uintptr_t *PCs, uintptr_t NumPCs) { - uintptr_t Res = 0; - for (uintptr_t i = 0; i < NumPCs; i++) { - Res = (Res + PCs[i]) * 7; - } - return Res; -} - -Unit Fuzzer::SubstituteTokens(const Unit &U) const { - Unit Res; - for (auto Idx : U) { - if (Idx < Options.Tokens.size()) { - std::string Token = Options.Tokens[Idx]; - Res.insert(Res.end(), Token.begin(), Token.end()); - } else { - Res.push_back(' '); - } - } - // FIXME: Apply DFSan labels. - return Res; + if (Options.OnlyASCII) + ToASCII(U); + if (RunOne(U)) + ReportNewCoverage(U); } void Fuzzer::ExecuteCallback(const Unit &U) { - if (Options.Tokens.empty()) { - USF.TargetFunction(U.data(), U.size()); - } else { - auto T = SubstituteTokens(U); - USF.TargetFunction(T.data(), T.size()); - } + const uint8_t *Data = U.data(); + uint8_t EmptyData; + if (!Data) + Data = &EmptyData; + int Res = USF.TargetFunction(Data, U.size()); + (void)Res; + assert(Res == 0); } -// Experimental. -// Fuly reset the current coverage state, run a single unit, -// compute a hash function from the full coverage set, -// return non-zero if the hash value is new. -// This produces tons of new units and as is it's only suitable for small tests, -// e.g. test/FullCoverageSetTest.cpp. FIXME: make it scale. -size_t Fuzzer::RunOneMaximizeFullCoverageSet(const Unit &U) { - __sanitizer_reset_coverage(); - ExecuteCallback(U); - uintptr_t *PCs; - uintptr_t NumPCs =__sanitizer_get_coverage_guards(&PCs); - if (FullCoverageSets.insert(HashOfArrayOfPCs(PCs, NumPCs)).second) - return FullCoverageSets.size(); - return 0; +size_t Fuzzer::RecordBlockCoverage() { + CHECK_WEAK_API_FUNCTION(__sanitizer_get_total_unique_coverage); + return LastRecordedBlockCoverage = __sanitizer_get_total_unique_coverage(); } -size_t Fuzzer::RunOneMaximizeTotalCoverage(const Unit &U) { - size_t NumCounters = __sanitizer_get_number_of_counters(); +size_t Fuzzer::RecordCallerCalleeCoverage() { + if (!Options.UseIndirCalls) + return 0; + if (!__sanitizer_get_total_unique_caller_callee_pairs) + return 0; + return LastRecordedCallerCalleeCoverage = + __sanitizer_get_total_unique_caller_callee_pairs(); +} + +void Fuzzer::PrepareCoverageBeforeRun() { if (Options.UseCounters) { + size_t NumCounters = __sanitizer_get_number_of_counters(); CounterBitmap.resize(NumCounters); __sanitizer_update_counter_bitset_and_clear_counters(0); } - size_t OldCoverage = __sanitizer_get_total_unique_coverage(); - ExecuteCallback(U); - size_t NewCoverage = __sanitizer_get_total_unique_coverage(); + RecordBlockCoverage(); + RecordCallerCalleeCoverage(); +} + +bool Fuzzer::CheckCoverageAfterRun() { + size_t OldCoverage = LastRecordedBlockCoverage; + size_t NewCoverage = RecordBlockCoverage(); + size_t OldCallerCalleeCoverage = LastRecordedCallerCalleeCoverage; + size_t NewCallerCalleeCoverage = RecordCallerCalleeCoverage(); size_t NumNewBits = 0; if (Options.UseCounters) NumNewBits = __sanitizer_update_counter_bitset_and_clear_counters( CounterBitmap.data()); - - if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && Options.Verbosity) - PrintStats("pulse ", NewCoverage); - - if (NewCoverage > OldCoverage || NumNewBits) - return NewCoverage; - return 0; + return NewCoverage > OldCoverage || + NewCallerCalleeCoverage > OldCallerCalleeCoverage || NumNewBits; } void Fuzzer::WriteToOutputCorpus(const Unit &U) { @@ -246,13 +290,20 @@ void Fuzzer::WriteToOutputCorpus(const Unit &U) { WriteToFile(U, Path); if (Options.Verbosity >= 2) Printf("Written to %s\n", Path.c_str()); + assert(!Options.OnlyASCII || IsASCII(U)); } -void Fuzzer::WriteToCrash(const Unit &U, const char *Prefix) { - std::string Path = Prefix + Hash(U); +void Fuzzer::WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix) { + if (!Options.SaveArtifacts) + return; + std::string Path = Options.ArtifactPrefix + Prefix + Hash(U); + if (!Options.ExactArtifactPath.empty()) + Path = Options.ExactArtifactPath; // Overrides ArtifactPrefix. WriteToFile(U, Path); - Printf("CRASHED; file written to %s\nBase64: ", Path.c_str()); - PrintFileAsBase64(Path); + Printf("artifact_prefix='%s'; Test unit written to %s\n", + Options.ArtifactPrefix.c_str(), Path.c_str()); + if (U.size() <= kMaxUnitSizeToPrint) + Printf("Base64: %s\n", Base64(U).c_str()); } void Fuzzer::SaveCorpus() { @@ -264,70 +315,181 @@ void Fuzzer::SaveCorpus() { Options.OutputCorpus.c_str()); } -void Fuzzer::ReportNewCoverage(size_t NewCoverage, const Unit &U) { - if (!NewCoverage) return; - Corpus.push_back(U); - UnitHashesAddedToCorpus.insert(Hash(U)); - PrintStats("NEW ", NewCoverage, ""); +void Fuzzer::PrintStatusForNewUnit(const Unit &U) { + if (!Options.PrintNEW) + return; + PrintStats("NEW ", ""); if (Options.Verbosity) { - Printf(" L: %zd", U.size()); - if (U.size() < 30) { - Printf(" "); - PrintUnitInASCIIOrTokens(U, "\t"); - Print(U); - } + Printf(" L: %zd ", U.size()); + USF.PrintMutationSequence(); Printf("\n"); } +} + +void Fuzzer::ReportNewCoverage(const Unit &U) { + Corpus.push_back(U); + UnitHashesAddedToCorpus.insert(Hash(U)); + PrintStatusForNewUnit(U); WriteToOutputCorpus(U); if (Options.ExitOnFirst) exit(0); } -void Fuzzer::MutateAndTestOne(Unit *U) { +void Fuzzer::Merge(const std::vector &Corpora) { + if (Corpora.size() <= 1) { + Printf("Merge requires two or more corpus dirs\n"); + return; + } + auto InitialCorpusDir = Corpora[0]; + ReadDir(InitialCorpusDir, nullptr); + Printf("Merge: running the initial corpus '%s' of %d units\n", + InitialCorpusDir.c_str(), Corpus.size()); + for (auto &U : Corpus) + RunOne(U); + + std::vector ExtraCorpora(Corpora.begin() + 1, Corpora.end()); + + size_t NumTried = 0; + size_t NumMerged = 0; + for (auto &C : ExtraCorpora) { + Corpus.clear(); + ReadDir(C, nullptr); + Printf("Merge: merging the extra corpus '%s' of %zd units\n", C.c_str(), + Corpus.size()); + for (auto &U : Corpus) { + NumTried++; + if (RunOne(U)) { + WriteToOutputCorpus(U); + NumMerged++; + } + } + } + Printf("Merge: written %zd out of %zd units\n", NumMerged, NumTried); +} + +void Fuzzer::MutateAndTestOne() { + auto &U = CurrentUnit; + USF.StartMutationSequence(); + + U = ChooseUnitToMutate(); + for (int i = 0; i < Options.MutateDepth; i++) { StartTraceRecording(); - size_t Size = U->size(); - U->resize(Options.MaxLen); - size_t NewSize = USF.Mutate(U->data(), Size, U->size()); + size_t Size = U.size(); + U.resize(Options.MaxLen); + size_t NewSize = USF.Mutate(U.data(), Size, U.size()); assert(NewSize > 0 && "Mutator returned empty unit"); assert(NewSize <= (size_t)Options.MaxLen && "Mutator return overisized unit"); - U->resize(NewSize); - RunOneAndUpdateCorpus(*U); + U.resize(NewSize); + RunOneAndUpdateCorpus(U); size_t NumTraceBasedMutations = StopTraceRecording(); - for (size_t j = 0; j < NumTraceBasedMutations; j++) { - ApplyTraceBasedMutation(j, U); - RunOneAndUpdateCorpus(*U); + size_t TBMWidth = + std::min((size_t)Options.TBMWidth, NumTraceBasedMutations); + size_t TBMDepth = + std::min((size_t)Options.TBMDepth, NumTraceBasedMutations); + Unit BackUp = U; + for (size_t w = 0; w < TBMWidth; w++) { + U = BackUp; + for (size_t d = 0; d < TBMDepth; d++) { + TotalNumberOfExecutedTraceBasedMutations++; + ApplyTraceBasedMutation(USF.GetRand()(NumTraceBasedMutations), &U); + RunOneAndUpdateCorpus(U); + } } } } -void Fuzzer::Loop(size_t NumIterations) { - for (size_t i = 1; i <= NumIterations; i++) { - for (size_t J1 = 0; J1 < Corpus.size(); J1++) { - SyncCorpus(); - RereadOutputCorpus(); - if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) - return; - // First, simply mutate the unit w/o doing crosses. - CurrentUnit = Corpus[J1]; - MutateAndTestOne(&CurrentUnit); - // Now, cross with others. - if (Options.DoCrossOver && !Corpus[J1].empty()) { - for (size_t J2 = 0; J2 < Corpus.size(); J2++) { - CurrentUnit.resize(Options.MaxLen); - size_t NewSize = USF.CrossOver( - Corpus[J1].data(), Corpus[J1].size(), Corpus[J2].data(), - Corpus[J2].size(), CurrentUnit.data(), CurrentUnit.size()); - assert(NewSize > 0 && "CrossOver returned empty unit"); - assert(NewSize <= (size_t)Options.MaxLen && - "CrossOver return overisized unit"); - CurrentUnit.resize(NewSize); - MutateAndTestOne(&CurrentUnit); - } - } +// Returns an index of random unit from the corpus to mutate. +// Hypothesis: units added to the corpus last are more likely to be interesting. +// This function gives more wieght to the more recent units. +size_t Fuzzer::ChooseUnitIdxToMutate() { + size_t N = Corpus.size(); + size_t Total = (N + 1) * N / 2; + size_t R = USF.GetRand()(Total); + size_t IdxBeg = 0, IdxEnd = N; + // Binary search. + while (IdxEnd - IdxBeg >= 2) { + size_t Idx = IdxBeg + (IdxEnd - IdxBeg) / 2; + if (R > (Idx + 1) * Idx / 2) + IdxBeg = Idx; + else + IdxEnd = Idx; + } + assert(IdxBeg < N); + return IdxBeg; +} + +// Experimental search heuristic: drilling. +// - Read, shuffle, execute and minimize the corpus. +// - Choose one random unit. +// - Reset the coverage. +// - Start fuzzing as if the chosen unit was the only element of the corpus. +// - When done, reset the coverage again. +// - Merge the newly created corpus into the original one. +void Fuzzer::Drill() { + // The corpus is already read, shuffled, and minimized. + assert(!Corpus.empty()); + Options.PrintNEW = false; // Don't print NEW status lines when drilling. + + Unit U = ChooseUnitToMutate(); + + CHECK_WEAK_API_FUNCTION(__sanitizer_reset_coverage); + __sanitizer_reset_coverage(); + + std::vector SavedCorpus; + SavedCorpus.swap(Corpus); + Corpus.push_back(U); + assert(Corpus.size() == 1); + RunOne(U); + PrintStats("DRILL "); + std::string SavedOutputCorpusPath; // Don't write new units while drilling. + SavedOutputCorpusPath.swap(Options.OutputCorpus); + Loop(); + + __sanitizer_reset_coverage(); + + PrintStats("REINIT"); + SavedOutputCorpusPath.swap(Options.OutputCorpus); + for (auto &U : SavedCorpus) + RunOne(U); + PrintStats("MERGE "); + Options.PrintNEW = true; + size_t NumMerged = 0; + for (auto &U : Corpus) { + if (RunOne(U)) { + PrintStatusForNewUnit(U); + NumMerged++; + WriteToOutputCorpus(U); } } + PrintStats("MERGED"); + if (NumMerged && Options.Verbosity) + Printf("Drilling discovered %zd new units\n", NumMerged); +} + +void Fuzzer::Loop() { + system_clock::time_point LastCorpusReload = system_clock::now(); + if (Options.DoCrossOver) + USF.SetCorpus(&Corpus); + while (true) { + SyncCorpus(); + auto Now = system_clock::now(); + if (duration_cast(Now - LastCorpusReload).count()) { + RereadOutputCorpus(); + LastCorpusReload = Now; + } + if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) + break; + if (Options.MaxTotalTimeSec > 0 && + secondsSinceProcessStartUp() > + static_cast(Options.MaxTotalTimeSec)) + break; + // Perform several mutations and runs. + MutateAndTestOne(); + } + + PrintStats("DONE ", "\n"); } void Fuzzer::SyncCorpus() { diff --git a/lib/Fuzzer/FuzzerMain.cpp b/lib/Fuzzer/FuzzerMain.cpp index c4dffb45d166..c5af5b059091 100644 --- a/lib/Fuzzer/FuzzerMain.cpp +++ b/lib/Fuzzer/FuzzerMain.cpp @@ -13,7 +13,7 @@ #include "FuzzerInternal.h" // This function should be defined by the user. -extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); int main(int argc, char **argv) { return fuzzer::FuzzerDriver(argc, argv, LLVMFuzzerTestOneInput); diff --git a/lib/Fuzzer/FuzzerMutate.cpp b/lib/Fuzzer/FuzzerMutate.cpp index f537fa90fd85..c3fa37a435d6 100644 --- a/lib/Fuzzer/FuzzerMutate.cpp +++ b/lib/Fuzzer/FuzzerMutate.cpp @@ -13,10 +13,42 @@ #include "FuzzerInternal.h" +#include + namespace fuzzer { -static char FlipRandomBit(char X) { - int Bit = rand() % 8; +struct Mutator { + size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max); + const char *Name; +}; + +struct MutationDispatcher::Impl { + std::vector Dictionary; + std::vector Mutators; + std::vector CurrentMutatorSequence; + const std::vector *Corpus = nullptr; + + void Add(Mutator M) { Mutators.push_back(M); } + Impl() { + Add({&MutationDispatcher::Mutate_EraseByte, "EraseByte"}); + Add({&MutationDispatcher::Mutate_InsertByte, "InsertByte"}); + Add({&MutationDispatcher::Mutate_ChangeByte, "ChangeByte"}); + Add({&MutationDispatcher::Mutate_ChangeBit, "ChangeBit"}); + Add({&MutationDispatcher::Mutate_ShuffleBytes, "ShuffleBytes"}); + Add({&MutationDispatcher::Mutate_ChangeASCIIInteger, "ChangeASCIIInt"}); + Add({&MutationDispatcher::Mutate_CrossOver, "CrossOver"}); + } + void AddWordToDictionary(const uint8_t *Word, size_t Size) { + if (Dictionary.empty()) { + Add({&MutationDispatcher::Mutate_AddWordFromDictionary, "AddFromDict"}); + } + Dictionary.push_back(Unit(Word, Word + Size)); + } + void SetCorpus(const std::vector *Corpus) { this->Corpus = Corpus; } +}; + +static char FlipRandomBit(char X, FuzzerRandomBase &Rand) { + int Bit = Rand(8); char Mask = 1 << Bit; char R; if (X & (1 << Bit)) @@ -27,45 +59,167 @@ static char FlipRandomBit(char X) { return R; } -static char RandCh() { - if (rand() % 2) return rand(); +static char RandCh(FuzzerRandomBase &Rand) { + if (Rand.RandBool()) return Rand(256); const char *Special = "!*'();:@&=+$,/?%#[]123ABCxyz-`~."; - return Special[rand() % (sizeof(Special) - 1)]; + return Special[Rand(sizeof(Special) - 1)]; +} + +size_t MutationDispatcher::Mutate_ShuffleBytes(uint8_t *Data, size_t Size, + size_t MaxSize) { + assert(Size); + size_t ShuffleAmount = Rand(std::min(Size, (size_t)8)) + 1; // [1,8] and <= Size. + size_t ShuffleStart = Rand(Size - ShuffleAmount); + assert(ShuffleStart + ShuffleAmount <= Size); + std::random_shuffle(Data + ShuffleStart, Data + ShuffleStart + ShuffleAmount, + Rand); + return Size; +} + +size_t MutationDispatcher::Mutate_EraseByte(uint8_t *Data, size_t Size, + size_t MaxSize) { + assert(Size); + if (Size == 1) return 0; + size_t Idx = Rand(Size); + // Erase Data[Idx]. + memmove(Data + Idx, Data + Idx + 1, Size - Idx - 1); + return Size - 1; +} + +size_t MutationDispatcher::Mutate_InsertByte(uint8_t *Data, size_t Size, + size_t MaxSize) { + if (Size == MaxSize) return 0; + size_t Idx = Rand(Size + 1); + // Insert new value at Data[Idx]. + memmove(Data + Idx + 1, Data + Idx, Size - Idx); + Data[Idx] = RandCh(Rand); + return Size + 1; +} + +size_t MutationDispatcher::Mutate_ChangeByte(uint8_t *Data, size_t Size, + size_t MaxSize) { + size_t Idx = Rand(Size); + Data[Idx] = RandCh(Rand); + return Size; +} + +size_t MutationDispatcher::Mutate_ChangeBit(uint8_t *Data, size_t Size, + size_t MaxSize) { + size_t Idx = Rand(Size); + Data[Idx] = FlipRandomBit(Data[Idx], Rand); + return Size; +} + +size_t MutationDispatcher::Mutate_AddWordFromDictionary(uint8_t *Data, + size_t Size, + size_t MaxSize) { + auto &D = MDImpl->Dictionary; + assert(!D.empty()); + if (D.empty()) return 0; + const Unit &Word = D[Rand(D.size())]; + if (Size + Word.size() > MaxSize) return 0; + size_t Idx = Rand(Size + 1); + memmove(Data + Idx + Word.size(), Data + Idx, Size - Idx); + memcpy(Data + Idx, Word.data(), Word.size()); + return Size + Word.size(); +} + +size_t MutationDispatcher::Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, + size_t MaxSize) { + size_t B = Rand(Size); + while (B < Size && !isdigit(Data[B])) B++; + if (B == Size) return 0; + size_t E = B; + while (E < Size && isdigit(Data[E])) E++; + assert(B < E); + // now we have digits in [B, E). + // strtol and friends don't accept non-zero-teminated data, parse it manually. + uint64_t Val = Data[B] - '0'; + for (size_t i = B + 1; i < E; i++) + Val = Val * 10 + Data[i] - '0'; + + // Mutate the integer value. + switch(Rand(5)) { + case 0: Val++; break; + case 1: Val--; break; + case 2: Val /= 2; break; + case 3: Val *= 2; break; + case 4: Val = Rand(Val * Val); break; + default: assert(0); + } + // Just replace the bytes with the new ones, don't bother moving bytes. + for (size_t i = B; i < E; i++) { + size_t Idx = E + B - i - 1; + assert(Idx >= B && Idx < E); + Data[Idx] = (Val % 10) + '0'; + Val /= 10; + } + return Size; +} + +size_t MutationDispatcher::Mutate_CrossOver(uint8_t *Data, size_t Size, + size_t MaxSize) { + auto Corpus = MDImpl->Corpus; + if (!Corpus || Corpus->size() < 2 || Size == 0) return 0; + size_t Idx = Rand(Corpus->size()); + const Unit &Other = (*Corpus)[Idx]; + if (Other.empty()) return 0; + Unit U(MaxSize); + size_t NewSize = + CrossOver(Data, Size, Other.data(), Other.size(), U.data(), U.size()); + assert(NewSize > 0 && "CrossOver returned empty unit"); + assert(NewSize <= MaxSize && "CrossOver returned overisized unit"); + memcpy(Data, U.data(), NewSize); + return NewSize; +} + +void MutationDispatcher::StartMutationSequence() { + MDImpl->CurrentMutatorSequence.clear(); +} + +void MutationDispatcher::PrintMutationSequence() { + Printf("MS: %zd ", MDImpl->CurrentMutatorSequence.size()); + for (auto M : MDImpl->CurrentMutatorSequence) + Printf("%s-", M.Name); } // Mutates Data in place, returns new size. -size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize) { +size_t MutationDispatcher::Mutate(uint8_t *Data, size_t Size, size_t MaxSize) { assert(MaxSize > 0); assert(Size <= MaxSize); if (Size == 0) { for (size_t i = 0; i < MaxSize; i++) - Data[i] = RandCh(); + Data[i] = RandCh(Rand); return MaxSize; } assert(Size > 0); - size_t Idx = rand() % Size; - switch (rand() % 3) { - case 0: - if (Size > 1) { - // Erase Data[Idx]. - memmove(Data + Idx, Data + Idx + 1, Size - Idx - 1); - Size = Size - 1; + // Some mutations may fail (e.g. can't insert more bytes if Size == MaxSize), + // in which case they will return 0. + // Try several times before returning un-mutated data. + for (int Iter = 0; Iter < 10; Iter++) { + size_t MutatorIdx = Rand(MDImpl->Mutators.size()); + auto M = MDImpl->Mutators[MutatorIdx]; + size_t NewSize = (this->*(M.Fn))(Data, Size, MaxSize); + if (NewSize) { + MDImpl->CurrentMutatorSequence.push_back(M); + return NewSize; } - [[clang::fallthrough]]; - case 1: - if (Size < MaxSize) { - // Insert new value at Data[Idx]. - memmove(Data + Idx + 1, Data + Idx, Size - Idx); - Data[Idx] = RandCh(); - } - Data[Idx] = RandCh(); - break; - case 2: - Data[Idx] = FlipRandomBit(Data[Idx]); - break; } - assert(Size > 0); return Size; } +void MutationDispatcher::SetCorpus(const std::vector *Corpus) { + MDImpl->SetCorpus(Corpus); +} + +void MutationDispatcher::AddWordToDictionary(const uint8_t *Word, size_t Size) { + MDImpl->AddWordToDictionary(Word, Size); +} + +MutationDispatcher::MutationDispatcher(FuzzerRandomBase &Rand) : Rand(Rand) { + MDImpl = new Impl; +} + +MutationDispatcher::~MutationDispatcher() { delete MDImpl; } + } // namespace fuzzer diff --git a/lib/Fuzzer/FuzzerTraceState.cpp b/lib/Fuzzer/FuzzerTraceState.cpp index b2e1e956dfcf..8204a2ddc7c8 100644 --- a/lib/Fuzzer/FuzzerTraceState.cpp +++ b/lib/Fuzzer/FuzzerTraceState.cpp @@ -67,37 +67,34 @@ clang -fPIC -c -g -O2 -std=c++11 Fuzzer*.cpp clang++ -O0 -std=c++11 -fsanitize-coverage=edge,trace-cmp \ -fsanitize=dataflow \ - test/dfsan/DFSanSimpleCmpTest.cpp Fuzzer*.o - ./a.out + test/SimpleCmpTest.cpp Fuzzer*.o + ./a.out -use_traces=1 ) */ +#include "FuzzerDFSan.h" #include "FuzzerInternal.h" -#include #include #include #include +#if !LLVM_FUZZER_SUPPORTS_DFSAN +// Stubs for dfsan for platforms where dfsan does not exist and weak +// functions don't work. extern "C" { -__attribute__((weak)) -dfsan_label dfsan_create_label(const char *desc, void *userdata); -__attribute__((weak)) -void dfsan_set_label(dfsan_label label, void *addr, size_t size); -__attribute__((weak)) -void dfsan_add_label(dfsan_label label, void *addr, size_t size); -__attribute__((weak)) -const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label); -__attribute__((weak)) -dfsan_label dfsan_read_label(const void *addr, size_t size); +dfsan_label dfsan_create_label(const char *desc, void *userdata) { return 0; } +void dfsan_set_label(dfsan_label label, void *addr, size_t size) {} +void dfsan_add_label(dfsan_label label, void *addr, size_t size) {} +const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label) { + return nullptr; +} +dfsan_label dfsan_read_label(const void *addr, size_t size) { return 0; } } // extern "C" +#endif // !LLVM_FUZZER_SUPPORTS_DFSAN namespace fuzzer { -static bool ReallyHaveDFSan() { - return &dfsan_create_label != nullptr; -} - // These values are copied from include/llvm/IR/InstrTypes.h. // We do not include the LLVM headers here to remain independent. // If these values ever change, an assertion in ComputeCmp will fail. @@ -138,7 +135,9 @@ static bool ComputeCmp(size_t CmpSize, size_t CmpType, uint64_t Arg1, if (CmpSize == 4) return ComputeCmp(CmpType, Arg1, Arg2); if (CmpSize == 2) return ComputeCmp(CmpType, Arg1, Arg2); if (CmpSize == 1) return ComputeCmp(CmpType, Arg1, Arg2); - assert(0 && "unsupported type size"); + // Other size, == + if (CmpType == ICMP_EQ) return Arg1 == Arg2; + // assert(0 && "unsupported cmp and type size combination"); return true; } @@ -180,8 +179,13 @@ class TraceState { void DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType, uint64_t Arg1, uint64_t Arg2, dfsan_label L1, dfsan_label L2); - void TraceCmpCallback(size_t CmpSize, size_t CmpType, uint64_t Arg1, - uint64_t Arg2); + void DFSanSwitchCallback(uint64_t PC, size_t ValSizeInBits, uint64_t Val, + size_t NumCases, uint64_t *Cases, dfsan_label L); + void TraceCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType, + uint64_t Arg1, uint64_t Arg2); + + void TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits, uint64_t Val, + size_t NumCases, uint64_t *Cases); int TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData, size_t DataSize); @@ -191,9 +195,8 @@ class TraceState { Mutations.clear(); } - size_t StopTraceRecording() { + size_t StopTraceRecording(FuzzerRandomBase &Rand) { RecordingTraces = false; - std::random_shuffle(Mutations.begin(), Mutations.end()); return Mutations.size(); } @@ -207,7 +210,7 @@ class TraceState { } bool RecordingTraces = false; std::vector Mutations; - LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)] = {}; + LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)]; const Fuzzer::FuzzingOptions &Options; const Unit &CurrentUnit; }; @@ -255,36 +258,64 @@ void TraceState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType, if (Options.Verbosity >= 3) - Printf("DFSAN: PC %lx S %zd T %zd A1 %llx A2 %llx R %d L1 %d L2 %d MU %zd\n", + Printf("DFSanCmpCallback: PC %lx S %zd T %zd A1 %llx A2 %llx R %d L1 %d L2 " + "%d MU %zd\n", PC, CmpSize, CmpType, Arg1, Arg2, Res, L1, L2, Mutations.size()); } +void TraceState::DFSanSwitchCallback(uint64_t PC, size_t ValSizeInBits, + uint64_t Val, size_t NumCases, + uint64_t *Cases, dfsan_label L) { + assert(ReallyHaveDFSan()); + if (!RecordingTraces) return; + if (!L) return; // Not actionable. + LabelRange LR = GetLabelRange(L); + size_t ValSize = ValSizeInBits / 8; + bool TryShort = IsTwoByteData(Val); + for (size_t i = 0; i < NumCases; i++) + TryShort &= IsTwoByteData(Cases[i]); + + for (size_t Pos = LR.Beg; Pos + ValSize <= LR.End; Pos++) + for (size_t i = 0; i < NumCases; i++) + Mutations.push_back({Pos, ValSize, Cases[i]}); + + if (TryShort) + for (size_t Pos = LR.Beg; Pos + 2 <= LR.End; Pos++) + for (size_t i = 0; i < NumCases; i++) + Mutations.push_back({Pos, 2, Cases[i]}); + + if (Options.Verbosity >= 3) + Printf("DFSanSwitchCallback: PC %lx Val %zd SZ %zd # %zd L %d: {%d, %d} " + "TryShort %d\n", + PC, Val, ValSize, NumCases, L, LR.Beg, LR.End, TryShort); +} + int TraceState::TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData, size_t DataSize) { int Res = 0; const uint8_t *Beg = CurrentUnit.data(); const uint8_t *End = Beg + CurrentUnit.size(); - for (const uint8_t *Cur = Beg; Cur < End; Cur += DataSize) { + for (const uint8_t *Cur = Beg; Cur < End; Cur++) { Cur = (uint8_t *)memmem(Cur, End - Cur, &PresentData, DataSize); if (!Cur) break; size_t Pos = Cur - Beg; assert(Pos < CurrentUnit.size()); + if (Mutations.size() > 100000U) return Res; // Just in case. Mutations.push_back({Pos, DataSize, DesiredData}); Mutations.push_back({Pos, DataSize, DesiredData + 1}); Mutations.push_back({Pos, DataSize, DesiredData - 1}); - Cur += DataSize; Res++; } return Res; } -void TraceState::TraceCmpCallback(size_t CmpSize, size_t CmpType, uint64_t Arg1, - uint64_t Arg2) { +void TraceState::TraceCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType, + uint64_t Arg1, uint64_t Arg2) { if (!RecordingTraces) return; int Added = 0; if (Options.Verbosity >= 3) - Printf("TraceCmp: %zd %zd\n", Arg1, Arg2); + Printf("TraceCmp %zd/%zd: %p %zd %zd\n", CmpSize, CmpType, PC, Arg1, Arg2); Added += TryToAddDesiredData(Arg1, Arg2, CmpSize); Added += TryToAddDesiredData(Arg2, Arg1, CmpSize); if (!Added && CmpSize == 4 && IsTwoByteData(Arg1) && IsTwoByteData(Arg2)) { @@ -293,16 +324,40 @@ void TraceState::TraceCmpCallback(size_t CmpSize, size_t CmpType, uint64_t Arg1, } } +void TraceState::TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits, + uint64_t Val, size_t NumCases, + uint64_t *Cases) { + if (!RecordingTraces) return; + size_t ValSize = ValSizeInBits / 8; + bool TryShort = IsTwoByteData(Val); + for (size_t i = 0; i < NumCases; i++) + TryShort &= IsTwoByteData(Cases[i]); + + if (Options.Verbosity >= 3) + Printf("TraceSwitch: %p %zd # %zd; TryShort %d\n", PC, Val, NumCases, + TryShort); + + for (size_t i = 0; i < NumCases; i++) { + TryToAddDesiredData(Val, Cases[i], ValSize); + if (TryShort) + TryToAddDesiredData(Val, Cases[i], 2); + } + +} + static TraceState *TS; void Fuzzer::StartTraceRecording() { if (!TS) return; + if (ReallyHaveDFSan()) + for (size_t i = 0; i < static_cast(Options.MaxLen); i++) + dfsan_set_label(i + 1, &CurrentUnit[i], 1); TS->StartTraceRecording(); } size_t Fuzzer::StopTraceRecording() { if (!TS) return 0; - return TS->StopTraceRecording(); + return TS->StopTraceRecording(USF.GetRand()); } void Fuzzer::ApplyTraceBasedMutation(size_t Idx, Unit *U) { @@ -319,11 +374,19 @@ void Fuzzer::InitializeTraceState() { for (size_t i = 0; i < static_cast(Options.MaxLen); i++) { dfsan_label L = dfsan_create_label("input", (void*)(i + 1)); // We assume that no one else has called dfsan_create_label before. - assert(L == i + 1); - dfsan_set_label(L, &CurrentUnit[i], 1); + if (L != i + 1) { + Printf("DFSan labels are not starting from 1, exiting\n"); + exit(1); + } } } +static size_t InternalStrnlen(const char *S, size_t MaxLen) { + size_t Len = 0; + for (; Len < MaxLen && S[Len]; Len++) {} + return Len; +} + } // namespace fuzzer using fuzzer::TS; @@ -340,6 +403,13 @@ void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1, TS->DFSanCmpCallback(PC, CmpSize, Type, Arg1, Arg2, L1, L2); } +void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases, + dfsan_label L1, dfsan_label L2) { + if (!TS) return; + uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); + TS->DFSanSwitchCallback(PC, Cases[1], Val, Cases[0], Cases+2, L1); +} + void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2, size_t n, dfsan_label s1_label, dfsan_label s2_label, dfsan_label n_label) { @@ -354,12 +424,96 @@ void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2, TS->DFSanCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2, L1, L2); } +void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2, + size_t n, dfsan_label s1_label, + dfsan_label s2_label, dfsan_label n_label) { + if (!TS) return; + uintptr_t PC = reinterpret_cast(caller_pc); + uint64_t S1 = 0, S2 = 0; + n = std::min(n, fuzzer::InternalStrnlen(s1, n)); + n = std::min(n, fuzzer::InternalStrnlen(s2, n)); + // Simplification: handle only first 8 bytes. + memcpy(&S1, s1, std::min(n, sizeof(S1))); + memcpy(&S2, s2, std::min(n, sizeof(S2))); + dfsan_label L1 = dfsan_read_label(s1, n); + dfsan_label L2 = dfsan_read_label(s2, n); + TS->DFSanCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2, L1, L2); +} + +void dfsan_weak_hook_strcmp(void *caller_pc, const char *s1, const char *s2, + dfsan_label s1_label, dfsan_label s2_label) { + if (!TS) return; + uintptr_t PC = reinterpret_cast(caller_pc); + uint64_t S1 = 0, S2 = 0; + size_t Len1 = strlen(s1); + size_t Len2 = strlen(s2); + size_t N = std::min(Len1, Len2); + if (N <= 1) return; // Not interesting. + // Simplification: handle only first 8 bytes. + memcpy(&S1, s1, std::min(N, sizeof(S1))); + memcpy(&S2, s2, std::min(N, sizeof(S2))); + dfsan_label L1 = dfsan_read_label(s1, Len1); + dfsan_label L2 = dfsan_read_label(s2, Len2); + TS->DFSanCmpCallback(PC, N, fuzzer::ICMP_EQ, S1, S2, L1, L2); +} + +void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1, + const void *s2, size_t n) { + if (!TS) return; + uintptr_t PC = reinterpret_cast(caller_pc); + uint64_t S1 = 0, S2 = 0; + // Simplification: handle only first 8 bytes. + memcpy(&S1, s1, std::min(n, sizeof(S1))); + memcpy(&S2, s2, std::min(n, sizeof(S2))); + TS->TraceCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2); +} + +void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1, + const char *s2, size_t n) { + if (!TS) return; + uintptr_t PC = reinterpret_cast(caller_pc); + uint64_t S1 = 0, S2 = 0; + size_t Len1 = fuzzer::InternalStrnlen(s1, n); + size_t Len2 = fuzzer::InternalStrnlen(s2, n); + n = std::min(n, Len1); + n = std::min(n, Len2); + if (n <= 1) return; // Not interesting. + // Simplification: handle only first 8 bytes. + memcpy(&S1, s1, std::min(n, sizeof(S1))); + memcpy(&S2, s2, std::min(n, sizeof(S2))); + TS->TraceCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2); +} + +void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1, + const char *s2) { + if (!TS) return; + uintptr_t PC = reinterpret_cast(caller_pc); + uint64_t S1 = 0, S2 = 0; + size_t Len1 = strlen(s1); + size_t Len2 = strlen(s2); + size_t N = std::min(Len1, Len2); + if (N <= 1) return; // Not interesting. + // Simplification: handle only first 8 bytes. + memcpy(&S1, s1, std::min(N, sizeof(S1))); + memcpy(&S2, s2, std::min(N, sizeof(S2))); + TS->TraceCmpCallback(PC, N, fuzzer::ICMP_EQ, S1, S2); +} + +__attribute__((visibility("default"))) void __sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1, uint64_t Arg2) { if (!TS) return; + uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); uint64_t CmpSize = (SizeAndType >> 32) / 8; uint64_t Type = (SizeAndType << 32) >> 32; - TS->TraceCmpCallback(CmpSize, Type, Arg1, Arg2); + TS->TraceCmpCallback(PC, CmpSize, Type, Arg1, Arg2); +} + +__attribute__((visibility("default"))) +void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases) { + if (!TS) return; + uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); + TS->TraceSwitchCallback(PC, Cases[1], Val, Cases[0], Cases + 2); } } // extern "C" diff --git a/lib/Fuzzer/FuzzerUtil.cpp b/lib/Fuzzer/FuzzerUtil.cpp index e381c0406321..6c1133fffd37 100644 --- a/lib/Fuzzer/FuzzerUtil.cpp +++ b/lib/Fuzzer/FuzzerUtil.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include namespace fuzzer { @@ -51,7 +52,6 @@ static void AlarmHandler(int, siginfo_t *, void *) { void SetTimer(int Seconds) { struct itimerval T {{Seconds, 0}, {Seconds, 0}}; - Printf("SetTimer %d\n", Seconds); int Res = setitimer(ITIMER_REAL, &T, nullptr); assert(Res == 0); struct sigaction sigact; @@ -69,8 +69,131 @@ int NumberOfCpuCores() { return N; } -void ExecuteCommand(const std::string &Command) { - system(Command.c_str()); +int ExecuteCommand(const std::string &Command) { + return system(Command.c_str()); +} + +bool ToASCII(Unit &U) { + bool Changed = false; + for (auto &X : U) { + auto NewX = X; + NewX &= 127; + if (!isspace(NewX) && !isprint(NewX)) + NewX = ' '; + Changed |= NewX != X; + X = NewX; + } + return Changed; +} + +bool IsASCII(const Unit &U) { + for (auto X : U) + if (!(isprint(X) || isspace(X))) return false; + return true; +} + +bool ParseOneDictionaryEntry(const std::string &Str, Unit *U) { + U->clear(); + if (Str.empty()) return false; + size_t L = 0, R = Str.size() - 1; // We are parsing the range [L,R]. + // Skip spaces from both sides. + while (L < R && isspace(Str[L])) L++; + while (R > L && isspace(Str[R])) R--; + if (R - L < 2) return false; + // Check the closing " + if (Str[R] != '"') return false; + R--; + // Find the opening " + while (L < R && Str[L] != '"') L++; + if (L >= R) return false; + assert(Str[L] == '\"'); + L++; + assert(L <= R); + for (size_t Pos = L; Pos <= R; Pos++) { + uint8_t V = (uint8_t)Str[Pos]; + if (!isprint(V) && !isspace(V)) return false; + if (V =='\\') { + // Handle '\\' + if (Pos + 1 <= R && (Str[Pos + 1] == '\\' || Str[Pos + 1] == '"')) { + U->push_back(Str[Pos + 1]); + Pos++; + continue; + } + // Handle '\xAB' + if (Pos + 3 <= R && Str[Pos + 1] == 'x' + && isxdigit(Str[Pos + 2]) && isxdigit(Str[Pos + 3])) { + char Hex[] = "0xAA"; + Hex[2] = Str[Pos + 2]; + Hex[3] = Str[Pos + 3]; + U->push_back(strtol(Hex, nullptr, 16)); + Pos += 3; + continue; + } + return false; // Invalid escape. + } else { + // Any other character. + U->push_back(V); + } + } + return true; +} + +bool ParseDictionaryFile(const std::string &Text, std::vector *Units) { + if (Text.empty()) { + Printf("ParseDictionaryFile: file does not exist or is empty\n"); + return false; + } + std::istringstream ISS(Text); + Units->clear(); + Unit U; + int LineNo = 0; + std::string S; + while (std::getline(ISS, S, '\n')) { + LineNo++; + size_t Pos = 0; + while (Pos < S.size() && isspace(S[Pos])) Pos++; // Skip spaces. + if (Pos == S.size()) continue; // Empty line. + if (S[Pos] == '#') continue; // Comment line. + if (ParseOneDictionaryEntry(S, &U)) { + Units->push_back(U); + } else { + Printf("ParseDictionaryFile: error in line %d\n\t\t%s\n", LineNo, + S.c_str()); + return false; + } + } + return true; +} + +int GetPid() { return getpid(); } + + +std::string Base64(const Unit &U) { + static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + std::string Res; + size_t i; + for (i = 0; i + 2 < U.size(); i += 3) { + uint32_t x = (U[i] << 16) + (U[i + 1] << 8) + U[i + 2]; + Res += Table[(x >> 18) & 63]; + Res += Table[(x >> 12) & 63]; + Res += Table[(x >> 6) & 63]; + Res += Table[x & 63]; + } + if (i + 1 == U.size()) { + uint32_t x = (U[i] << 16); + Res += Table[(x >> 18) & 63]; + Res += Table[(x >> 12) & 63]; + Res += "=="; + } else if (i + 2 == U.size()) { + uint32_t x = (U[i] << 16) + (U[i + 1] << 8); + Res += Table[(x >> 18) & 63]; + Res += Table[(x >> 12) & 63]; + Res += Table[(x >> 6) & 63]; + Res += "="; + } + return Res; } } // namespace fuzzer diff --git a/lib/Fuzzer/cxx.dict b/lib/Fuzzer/cxx.dict new file mode 100644 index 000000000000..41350f47558b --- /dev/null +++ b/lib/Fuzzer/cxx.dict @@ -0,0 +1,122 @@ +"++" +"--" +"<<" +">>" +"+=" +"-=" +"*=" +"/=" +">>=" +"<<=" +"&=" +"|=" +"^=" +"%=" +"!=" +"&&" +"||" +"==" +">=" +"<=" +"->" +"alignas" +"alignof" +"and" +"and_eq" +"asm" +"auto" +"bitand" +"bitor" +"bool" +"break" +"case" +"catch" +"char" +"char16_t" +"char32_t" +"class" +"compl" +"concept" +"const" +"constexpr" +"const_cast" +"continue" +"decltype" +"default" +"delete" +"do" +"double" +"dynamic_cast" +"else" +"enum" +"explicit" +"export" +"extern" +"false" +"float" +"for" +"friend" +"goto" +"if" +"inline" +"int" +"long" +"mutable" +"namespace" +"new" +"noexcept" +"not" +"not_eq" +"nullptr" +"operator" +"or" +"or_eq" +"private" +"protected" +"public" +"register" +"reinterpret_cast" +"requires" +"return" +"short" +"signed" +"sizeof" +"static" +"static_assert" +"static_cast" +"struct" +"switch" +"template" +"this" +"thread_local" +"throw" +"true" +"try" +"typedef" +"typeid" +"typename" +"union" +"unsigned" +"using" +"virtual" +"void" +"volatile" +"wchar_t" +"while" +"xor" +"xor_eq" +"if" +"elif" +"else" +"endif" +"defined" +"ifdef" +"ifndef" +"define" +"undef" +"include" +"line" +"error" +"pragma" +"override" +"final" diff --git a/lib/Fuzzer/cxx_fuzzer_tokens.txt b/lib/Fuzzer/cxx_fuzzer_tokens.txt deleted file mode 100644 index f3c4f80e1467..000000000000 --- a/lib/Fuzzer/cxx_fuzzer_tokens.txt +++ /dev/null @@ -1,218 +0,0 @@ -# -## -` -~ -! -@ -$ -% -^ -& -* -( -) -_ -- -_ -= -+ -{ -} -[ -] -| -\ -, -. -/ -? -> -< -; -: -' -" -++ --- -<< ->> -+= --= -*= -/= ->>= -<<= -&= -|= -^= -%= -!= -&& -|| -== ->= -<= --> -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -alignas -alignof -and -and_eq -asm -auto -bitand -bitor -bool -break -case -catch -char -char16_t -char32_t -class -compl -concept -const -constexpr -const_cast -continue -decltype -default -delete -do -double -dynamic_cast -else -enum -explicit -export -extern -false -float -for -friend -goto -if -inline -int -long -mutable -namespace -new -noexcept -not -not_eq -nullptr -operator -or -or_eq -private -protected -public -register -reinterpret_cast -requires -return -short -signed -sizeof -static -static_assert -static_cast -struct -switch -template -this -thread_local -throw -true -try -typedef -typeid -typename -union -unsigned -using -virtual -void -volatile -wchar_t -while -xor -xor_eq -if -elif -else -endif -defined -ifdef -ifndef -define -undef -include -line -error -pragma -override -final diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt index a9acec15d4d3..674fcc3c9f8c 100644 --- a/lib/Fuzzer/test/CMakeLists.txt +++ b/lib/Fuzzer/test/CMakeLists.txt @@ -5,26 +5,41 @@ set(CMAKE_CXX_FLAGS_RELEASE "${LIBFUZZER_FLAGS_BASE} -O0 -fsanitize-coverage=edge,indirect-calls") set(DFSanTests - DFSanMemcmpTest - DFSanSimpleCmpTest + MemcmpTest + SimpleCmpTest + StrcmpTest + StrncmpTest + SwitchTest ) set(Tests + CallerCalleeTest CounterTest - CxxTokensTest FourIndependentBranchesTest FullCoverageSetTest - InfiniteTest + MemcmpTest NullDerefTest + SimpleCmpTest + SimpleDictionaryTest + SimpleHashTest SimpleTest + StrcmpTest + StrncmpTest + SwitchTest TimeoutTest - ${DFSanTests} ) set(CustomMainTests UserSuppliedFuzzerTest ) +set(UninstrumentedTests + UninstrumentedTest + ) + +set(TraceBBTests + SimpleTest + ) set(TestBinaries) @@ -80,6 +95,17 @@ foreach(Test ${DFSanTests}) set(TestBinaries ${TestBinaries} LLVMFuzzer-${Test}-DFSan) endforeach() +add_subdirectory(uninstrumented) + +foreach(Test ${UninstrumentedTests}) + set(TestBinaries ${TestBinaries} LLVMFuzzer-${Test}-Uninstrumented) +endforeach() + +add_subdirectory(trace-bb) + +foreach(Test ${TraceBBTests}) + set(TestBinaries ${TestBinaries} LLVMFuzzer-${Test}-TraceBB) +endforeach() set_target_properties(${TestBinaries} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} diff --git a/lib/Fuzzer/test/CallerCalleeTest.cpp b/lib/Fuzzer/test/CallerCalleeTest.cpp new file mode 100644 index 000000000000..150b2fc04058 --- /dev/null +++ b/lib/Fuzzer/test/CallerCalleeTest.cpp @@ -0,0 +1,56 @@ +// Simple test for a fuzzer. +// Try to find the target using the indirect caller-callee pairs. +#include +#include +#include +#include +#include + +typedef void (*F)(); +static F t[256]; + +void f34() { + std::cerr << "BINGO\n"; + exit(1); +} +void f23() { t[(unsigned)'d'] = f34;} +void f12() { t[(unsigned)'c'] = f23;} +void f01() { t[(unsigned)'b'] = f12;} +void f00() {} + +static F t0[256] = { + f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, + f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, + f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, + f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, + f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, + f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, + f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, + f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, + f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, + f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, + f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, + f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, + f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, + f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, + f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, + f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, +}; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size < 4) return 0; + // Spoof the counters. + for (int i = 0; i < 200; i++) { + f23(); + f12(); + f01(); + } + memcpy(t, t0, sizeof(t)); + t[(unsigned)'a'] = f01; + t[Data[0]](); + t[Data[1]](); + t[Data[2]](); + t[Data[3]](); + return 0; +} + diff --git a/lib/Fuzzer/test/CounterTest.cpp b/lib/Fuzzer/test/CounterTest.cpp index 29ddb02ebaea..b61f419c4991 100644 --- a/lib/Fuzzer/test/CounterTest.cpp +++ b/lib/Fuzzer/test/CounterTest.cpp @@ -2,7 +2,7 @@ // executed many times. #include -extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { int Num = 0; for (size_t i = 0; i < Size; i++) if (Data[i] == 'A' + i) @@ -11,4 +11,5 @@ extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { std::cerr << "BINGO!\n"; exit(1); } + return 0; } diff --git a/lib/Fuzzer/test/CxxTokensTest.cpp b/lib/Fuzzer/test/CxxTokensTest.cpp deleted file mode 100644 index 77d08b3d1055..000000000000 --- a/lib/Fuzzer/test/CxxTokensTest.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// Simple test for a fuzzer. The fuzzer must find a sequence of C++ tokens. -#include -#include -#include -#include -#include - -static void Found() { - std::cout << "BINGO; Found the target, exiting\n"; - exit(1); -} - -extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { - // looking for "thread_local unsigned A;" - if (Size < 24) return; - if (0 == memcmp(&Data[0], "thread_local", 12)) - if (Data[12] == ' ') - if (0 == memcmp(&Data[13], "unsigned", 8)) - if (Data[21] == ' ') - if (Data[22] == 'A') - if (Data[23] == ';') - Found(); -} - diff --git a/lib/Fuzzer/test/DFSanMemcmpTest.cpp b/lib/Fuzzer/test/DFSanMemcmpTest.cpp deleted file mode 100644 index 510a24398005..000000000000 --- a/lib/Fuzzer/test/DFSanMemcmpTest.cpp +++ /dev/null @@ -1,12 +0,0 @@ -// Simple test for a fuzzer. The fuzzer must find a particular string. -#include -#include -#include -#include - -extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { - if (Size >= 8 && memcmp(Data, "01234567", 8) == 0) { - fprintf(stderr, "BINGO\n"); - exit(1); - } -} diff --git a/lib/Fuzzer/test/FourIndependentBranchesTest.cpp b/lib/Fuzzer/test/FourIndependentBranchesTest.cpp index e0b7509b8d65..6007dd4a027b 100644 --- a/lib/Fuzzer/test/FourIndependentBranchesTest.cpp +++ b/lib/Fuzzer/test/FourIndependentBranchesTest.cpp @@ -4,7 +4,7 @@ #include #include -extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { int bits = 0; if (Size > 0 && Data[0] == 'F') bits |= 1; if (Size > 1 && Data[1] == 'U') bits |= 2; @@ -14,5 +14,6 @@ extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { std::cerr << "BINGO!\n"; exit(1); } + return 0; } diff --git a/lib/Fuzzer/test/FullCoverageSetTest.cpp b/lib/Fuzzer/test/FullCoverageSetTest.cpp index 2c6ff98db005..a868084a0cee 100644 --- a/lib/Fuzzer/test/FullCoverageSetTest.cpp +++ b/lib/Fuzzer/test/FullCoverageSetTest.cpp @@ -4,7 +4,7 @@ #include #include -extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { int bits = 0; if (Size > 0 && Data[0] == 'F') bits |= 1; if (Size > 1 && Data[1] == 'U') bits |= 2; @@ -16,5 +16,6 @@ extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { std::cerr << "BINGO!\n"; exit(1); } + return 0; } diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp index 50f2f99760e5..8c0012708afc 100644 --- a/lib/Fuzzer/test/FuzzerUnittest.cpp +++ b/lib/Fuzzer/test/FuzzerUnittest.cpp @@ -2,6 +2,8 @@ #include "gtest/gtest.h" #include +using namespace fuzzer; + // For now, have LLVMFuzzerTestOneInput just to make it link. // Later we may want to make unittests that actually call LLVMFuzzerTestOneInput. extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { @@ -9,7 +11,8 @@ extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { } TEST(Fuzzer, CrossOver) { - using namespace fuzzer; + FuzzerRandomLibc Rand(0); + MutationDispatcher MD(Rand); Unit A({0, 1, 2}), B({5, 6, 7}); Unit C; Unit Expected[] = { @@ -52,8 +55,8 @@ TEST(Fuzzer, CrossOver) { std::set FoundUnits, ExpectedUnitsWitThisLength; for (int Iter = 0; Iter < 3000; Iter++) { C.resize(Len); - size_t NewSize = CrossOver(A.data(), A.size(), B.data(), B.size(), - C.data(), C.size()); + size_t NewSize = MD.CrossOver(A.data(), A.size(), B.data(), B.size(), + C.data(), C.size()); C.resize(NewSize); FoundUnits.insert(C); } @@ -71,3 +74,302 @@ TEST(Fuzzer, Hash) { U.push_back('d'); EXPECT_EQ("81fe8bfe87576c3ecb22426f8e57847382917acf", fuzzer::Hash(U)); } + +typedef size_t (MutationDispatcher::*Mutator)(uint8_t *Data, size_t Size, + size_t MaxSize); + +void TestEraseByte(Mutator M, int NumIter) { + uint8_t REM0[8] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t REM1[8] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t REM2[8] = {0x00, 0x11, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t REM3[8] = {0x00, 0x11, 0x22, 0x44, 0x55, 0x66, 0x77}; + uint8_t REM4[8] = {0x00, 0x11, 0x22, 0x33, 0x55, 0x66, 0x77}; + uint8_t REM5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x66, 0x77}; + uint8_t REM6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x77}; + uint8_t REM7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + FuzzerRandomLibc Rand(0); + MutationDispatcher MD(Rand); + int FoundMask = 0; + for (int i = 0; i < NumIter; i++) { + uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + size_t NewSize = (MD.*M)(T, sizeof(T), sizeof(T)); + if (NewSize == 7 && !memcmp(REM0, T, 7)) FoundMask |= 1 << 0; + if (NewSize == 7 && !memcmp(REM1, T, 7)) FoundMask |= 1 << 1; + if (NewSize == 7 && !memcmp(REM2, T, 7)) FoundMask |= 1 << 2; + if (NewSize == 7 && !memcmp(REM3, T, 7)) FoundMask |= 1 << 3; + if (NewSize == 7 && !memcmp(REM4, T, 7)) FoundMask |= 1 << 4; + if (NewSize == 7 && !memcmp(REM5, T, 7)) FoundMask |= 1 << 5; + if (NewSize == 7 && !memcmp(REM6, T, 7)) FoundMask |= 1 << 6; + if (NewSize == 7 && !memcmp(REM7, T, 7)) FoundMask |= 1 << 7; + } + EXPECT_EQ(FoundMask, 255); +} + +TEST(FuzzerMutate, EraseByte1) { + TestEraseByte(&MutationDispatcher::Mutate_EraseByte, 100); +} +TEST(FuzzerMutate, EraseByte2) { + TestEraseByte(&MutationDispatcher::Mutate, 1000); +} + +void TestInsertByte(Mutator M, int NumIter) { + FuzzerRandomLibc Rand(0); + MutationDispatcher MD(Rand); + int FoundMask = 0; + uint8_t INS0[8] = {0xF1, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + uint8_t INS1[8] = {0x00, 0xF2, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + uint8_t INS2[8] = {0x00, 0x11, 0xF3, 0x22, 0x33, 0x44, 0x55, 0x66}; + uint8_t INS3[8] = {0x00, 0x11, 0x22, 0xF4, 0x33, 0x44, 0x55, 0x66}; + uint8_t INS4[8] = {0x00, 0x11, 0x22, 0x33, 0xF5, 0x44, 0x55, 0x66}; + uint8_t INS5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0xF6, 0x55, 0x66}; + uint8_t INS6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0xF7, 0x66}; + uint8_t INS7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF8}; + for (int i = 0; i < NumIter; i++) { + uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + size_t NewSize = (MD.*M)(T, 7, 8); + if (NewSize == 8 && !memcmp(INS0, T, 8)) FoundMask |= 1 << 0; + if (NewSize == 8 && !memcmp(INS1, T, 8)) FoundMask |= 1 << 1; + if (NewSize == 8 && !memcmp(INS2, T, 8)) FoundMask |= 1 << 2; + if (NewSize == 8 && !memcmp(INS3, T, 8)) FoundMask |= 1 << 3; + if (NewSize == 8 && !memcmp(INS4, T, 8)) FoundMask |= 1 << 4; + if (NewSize == 8 && !memcmp(INS5, T, 8)) FoundMask |= 1 << 5; + if (NewSize == 8 && !memcmp(INS6, T, 8)) FoundMask |= 1 << 6; + if (NewSize == 8 && !memcmp(INS7, T, 8)) FoundMask |= 1 << 7; + } + EXPECT_EQ(FoundMask, 255); +} + +TEST(FuzzerMutate, InsertByte1) { + TestInsertByte(&MutationDispatcher::Mutate_InsertByte, 1 << 15); +} +TEST(FuzzerMutate, InsertByte2) { + TestInsertByte(&MutationDispatcher::Mutate, 1 << 17); +} + +void TestChangeByte(Mutator M, int NumIter) { + FuzzerRandomLibc Rand(0); + MutationDispatcher MD(Rand); + int FoundMask = 0; + uint8_t CH0[8] = {0xF0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH1[8] = {0x00, 0xF1, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH2[8] = {0x00, 0x11, 0xF2, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH3[8] = {0x00, 0x11, 0x22, 0xF3, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0xF4, 0x55, 0x66, 0x77}; + uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0xF5, 0x66, 0x77}; + uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0xF5, 0x77}; + uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF7}; + for (int i = 0; i < NumIter; i++) { + uint8_t T[9] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + size_t NewSize = (MD.*M)(T, 8, 9); + if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0; + if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1; + if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2; + if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3; + if (NewSize == 8 && !memcmp(CH4, T, 8)) FoundMask |= 1 << 4; + if (NewSize == 8 && !memcmp(CH5, T, 8)) FoundMask |= 1 << 5; + if (NewSize == 8 && !memcmp(CH6, T, 8)) FoundMask |= 1 << 6; + if (NewSize == 8 && !memcmp(CH7, T, 8)) FoundMask |= 1 << 7; + } + EXPECT_EQ(FoundMask, 255); +} + +TEST(FuzzerMutate, ChangeByte1) { + TestChangeByte(&MutationDispatcher::Mutate_ChangeByte, 1 << 15); +} +TEST(FuzzerMutate, ChangeByte2) { + TestChangeByte(&MutationDispatcher::Mutate, 1 << 17); +} + +void TestChangeBit(Mutator M, int NumIter) { + FuzzerRandomLibc Rand(0); + MutationDispatcher MD(Rand); + int FoundMask = 0; + uint8_t CH0[8] = {0x01, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH1[8] = {0x00, 0x13, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH2[8] = {0x00, 0x11, 0x02, 0x33, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH3[8] = {0x00, 0x11, 0x22, 0x37, 0x44, 0x55, 0x66, 0x77}; + uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0x54, 0x55, 0x66, 0x77}; + uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x54, 0x66, 0x77}; + uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x76, 0x77}; + uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF7}; + for (int i = 0; i < NumIter; i++) { + uint8_t T[9] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77}; + size_t NewSize = (MD.*M)(T, 8, 9); + if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0; + if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1; + if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2; + if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3; + if (NewSize == 8 && !memcmp(CH4, T, 8)) FoundMask |= 1 << 4; + if (NewSize == 8 && !memcmp(CH5, T, 8)) FoundMask |= 1 << 5; + if (NewSize == 8 && !memcmp(CH6, T, 8)) FoundMask |= 1 << 6; + if (NewSize == 8 && !memcmp(CH7, T, 8)) FoundMask |= 1 << 7; + } + EXPECT_EQ(FoundMask, 255); +} + +TEST(FuzzerMutate, ChangeBit1) { + TestChangeBit(&MutationDispatcher::Mutate_ChangeBit, 1 << 16); +} +TEST(FuzzerMutate, ChangeBit2) { + TestChangeBit(&MutationDispatcher::Mutate, 1 << 18); +} + +void TestShuffleBytes(Mutator M, int NumIter) { + FuzzerRandomLibc Rand(0); + MutationDispatcher MD(Rand); + int FoundMask = 0; + uint8_t CH0[7] = {0x00, 0x22, 0x11, 0x33, 0x44, 0x55, 0x66}; + uint8_t CH1[7] = {0x11, 0x00, 0x33, 0x22, 0x44, 0x55, 0x66}; + uint8_t CH2[7] = {0x00, 0x33, 0x11, 0x22, 0x44, 0x55, 0x66}; + uint8_t CH3[7] = {0x00, 0x11, 0x22, 0x44, 0x55, 0x66, 0x33}; + uint8_t CH4[7] = {0x00, 0x11, 0x22, 0x33, 0x55, 0x44, 0x66}; + for (int i = 0; i < NumIter; i++) { + uint8_t T[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; + size_t NewSize = (MD.*M)(T, 7, 7); + if (NewSize == 7 && !memcmp(CH0, T, 7)) FoundMask |= 1 << 0; + if (NewSize == 7 && !memcmp(CH1, T, 7)) FoundMask |= 1 << 1; + if (NewSize == 7 && !memcmp(CH2, T, 7)) FoundMask |= 1 << 2; + if (NewSize == 7 && !memcmp(CH3, T, 7)) FoundMask |= 1 << 3; + if (NewSize == 7 && !memcmp(CH4, T, 7)) FoundMask |= 1 << 4; + } + EXPECT_EQ(FoundMask, 31); +} + +TEST(FuzzerMutate, ShuffleBytes1) { + TestShuffleBytes(&MutationDispatcher::Mutate_ShuffleBytes, 1 << 15); +} +TEST(FuzzerMutate, ShuffleBytes2) { + TestShuffleBytes(&MutationDispatcher::Mutate, 1 << 19); +} + +void TestAddWordFromDictionary(Mutator M, int NumIter) { + FuzzerRandomLibc Rand(0); + MutationDispatcher MD(Rand); + uint8_t Word1[4] = {0xAA, 0xBB, 0xCC, 0xDD}; + uint8_t Word2[3] = {0xFF, 0xEE, 0xEF}; + MD.AddWordToDictionary(Word1, sizeof(Word1)); + MD.AddWordToDictionary(Word2, sizeof(Word2)); + int FoundMask = 0; + uint8_t CH0[7] = {0x00, 0x11, 0x22, 0xAA, 0xBB, 0xCC, 0xDD}; + uint8_t CH1[7] = {0x00, 0x11, 0xAA, 0xBB, 0xCC, 0xDD, 0x22}; + uint8_t CH2[7] = {0x00, 0xAA, 0xBB, 0xCC, 0xDD, 0x11, 0x22}; + uint8_t CH3[7] = {0xAA, 0xBB, 0xCC, 0xDD, 0x00, 0x11, 0x22}; + uint8_t CH4[6] = {0x00, 0x11, 0x22, 0xFF, 0xEE, 0xEF}; + uint8_t CH5[6] = {0x00, 0x11, 0xFF, 0xEE, 0xEF, 0x22}; + uint8_t CH6[6] = {0x00, 0xFF, 0xEE, 0xEF, 0x11, 0x22}; + uint8_t CH7[6] = {0xFF, 0xEE, 0xEF, 0x00, 0x11, 0x22}; + for (int i = 0; i < NumIter; i++) { + uint8_t T[7] = {0x00, 0x11, 0x22}; + size_t NewSize = (MD.*M)(T, 3, 7); + if (NewSize == 7 && !memcmp(CH0, T, 7)) FoundMask |= 1 << 0; + if (NewSize == 7 && !memcmp(CH1, T, 7)) FoundMask |= 1 << 1; + if (NewSize == 7 && !memcmp(CH2, T, 7)) FoundMask |= 1 << 2; + if (NewSize == 7 && !memcmp(CH3, T, 7)) FoundMask |= 1 << 3; + if (NewSize == 6 && !memcmp(CH4, T, 6)) FoundMask |= 1 << 4; + if (NewSize == 6 && !memcmp(CH5, T, 6)) FoundMask |= 1 << 5; + if (NewSize == 6 && !memcmp(CH6, T, 6)) FoundMask |= 1 << 6; + if (NewSize == 6 && !memcmp(CH7, T, 6)) FoundMask |= 1 << 7; + } + EXPECT_EQ(FoundMask, 255); +} + +TEST(FuzzerMutate, AddWordFromDictionary1) { + TestAddWordFromDictionary(&MutationDispatcher::Mutate_AddWordFromDictionary, + 1 << 15); +} + +TEST(FuzzerMutate, AddWordFromDictionary2) { + TestAddWordFromDictionary(&MutationDispatcher::Mutate, 1 << 15); +} + +void TestChangeASCIIInteger(Mutator M, int NumIter) { + FuzzerRandomLibc Rand(0); + MutationDispatcher MD(Rand); + + uint8_t CH0[8] = {'1', '2', '3', '4', '5', '6', '7', '7'}; + uint8_t CH1[8] = {'1', '2', '3', '4', '5', '6', '7', '9'}; + uint8_t CH2[8] = {'2', '4', '6', '9', '1', '3', '5', '6'}; + uint8_t CH3[8] = {'0', '6', '1', '7', '2', '8', '3', '9'}; + int FoundMask = 0; + for (int i = 0; i < NumIter; i++) { + uint8_t T[8] = {'1', '2', '3', '4', '5', '6', '7', '8'}; + size_t NewSize = (MD.*M)(T, 8, 8); + /**/ if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0; + else if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1; + else if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2; + else if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3; + else if (NewSize == 8) FoundMask |= 1 << 4; + } + EXPECT_EQ(FoundMask, 31); +} + +TEST(FuzzerMutate, ChangeASCIIInteger1) { + TestChangeASCIIInteger(&MutationDispatcher::Mutate_ChangeASCIIInteger, + 1 << 15); +} + +TEST(FuzzerMutate, ChangeASCIIInteger2) { + TestChangeASCIIInteger(&MutationDispatcher::Mutate, 1 << 15); +} + + +TEST(FuzzerDictionary, ParseOneDictionaryEntry) { + Unit U; + EXPECT_FALSE(ParseOneDictionaryEntry("", &U)); + EXPECT_FALSE(ParseOneDictionaryEntry(" ", &U)); + EXPECT_FALSE(ParseOneDictionaryEntry("\t ", &U)); + EXPECT_FALSE(ParseOneDictionaryEntry(" \" ", &U)); + EXPECT_FALSE(ParseOneDictionaryEntry(" zz\" ", &U)); + EXPECT_FALSE(ParseOneDictionaryEntry(" \"zz ", &U)); + EXPECT_FALSE(ParseOneDictionaryEntry(" \"\" ", &U)); + EXPECT_TRUE(ParseOneDictionaryEntry("\"a\"", &U)); + EXPECT_EQ(U, Unit({'a'})); + EXPECT_TRUE(ParseOneDictionaryEntry("\"abc\"", &U)); + EXPECT_EQ(U, Unit({'a', 'b', 'c'})); + EXPECT_TRUE(ParseOneDictionaryEntry("abc=\"abc\"", &U)); + EXPECT_EQ(U, Unit({'a', 'b', 'c'})); + EXPECT_FALSE(ParseOneDictionaryEntry("\"\\\"", &U)); + EXPECT_TRUE(ParseOneDictionaryEntry("\"\\\\\"", &U)); + EXPECT_EQ(U, Unit({'\\'})); + EXPECT_TRUE(ParseOneDictionaryEntry("\"\\xAB\"", &U)); + EXPECT_EQ(U, Unit({0xAB})); + EXPECT_TRUE(ParseOneDictionaryEntry("\"\\xABz\\xDE\"", &U)); + EXPECT_EQ(U, Unit({0xAB, 'z', 0xDE})); + EXPECT_TRUE(ParseOneDictionaryEntry("\"#\"", &U)); + EXPECT_EQ(U, Unit({'#'})); + EXPECT_TRUE(ParseOneDictionaryEntry("\"\\\"\"", &U)); + EXPECT_EQ(U, Unit({'"'})); +} + +TEST(FuzzerDictionary, ParseDictionaryFile) { + std::vector Units; + EXPECT_FALSE(ParseDictionaryFile("zzz\n", &Units)); + EXPECT_FALSE(ParseDictionaryFile("", &Units)); + EXPECT_TRUE(ParseDictionaryFile("\n", &Units)); + EXPECT_EQ(Units.size(), 0U); + EXPECT_TRUE(ParseDictionaryFile("#zzzz a b c d\n", &Units)); + EXPECT_EQ(Units.size(), 0U); + EXPECT_TRUE(ParseDictionaryFile(" #zzzz\n", &Units)); + EXPECT_EQ(Units.size(), 0U); + EXPECT_TRUE(ParseDictionaryFile(" #zzzz\n", &Units)); + EXPECT_EQ(Units.size(), 0U); + EXPECT_TRUE(ParseDictionaryFile(" #zzzz\naaa=\"aa\"", &Units)); + EXPECT_EQ(Units, std::vector({Unit({'a', 'a'})})); + EXPECT_TRUE( + ParseDictionaryFile(" #zzzz\naaa=\"aa\"\n\nabc=\"abc\"", &Units)); + EXPECT_EQ(Units, + std::vector({Unit({'a', 'a'}), Unit({'a', 'b', 'c'})})); +} + +TEST(FuzzerUtil, Base64) { + EXPECT_EQ("", Base64({})); + EXPECT_EQ("YQ==", Base64({'a'})); + EXPECT_EQ("eA==", Base64({'x'})); + EXPECT_EQ("YWI=", Base64({'a', 'b'})); + EXPECT_EQ("eHk=", Base64({'x', 'y'})); + EXPECT_EQ("YWJj", Base64({'a', 'b', 'c'})); + EXPECT_EQ("eHl6", Base64({'x', 'y', 'z'})); + EXPECT_EQ("YWJjeA==", Base64({'a', 'b', 'c', 'x'})); + EXPECT_EQ("YWJjeHk=", Base64({'a', 'b', 'c', 'x', 'y'})); + EXPECT_EQ("YWJjeHl6", Base64({'a', 'b', 'c', 'x', 'y', 'z'})); +} diff --git a/lib/Fuzzer/test/InfiniteTest.cpp b/lib/Fuzzer/test/InfiniteTest.cpp deleted file mode 100644 index b6d174ffdc90..000000000000 --- a/lib/Fuzzer/test/InfiniteTest.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// Simple test for a fuzzer. The fuzzer must find the string "Hi!". -#include -#include -#include -#include - -static volatile int Sink; - -static volatile int One = 1; - -extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { - if (Size > 0 && Data[0] == 'H') { - Sink = 1; - if (Size > 1 && Data[1] == 'i') { - Sink = 2; - if (Size > 2 && Data[2] == '!') { - Sink = 2; - while (One) - ; - } - } - } -} - diff --git a/lib/Fuzzer/test/MemcmpTest.cpp b/lib/Fuzzer/test/MemcmpTest.cpp new file mode 100644 index 000000000000..47ce59e0d8f5 --- /dev/null +++ b/lib/Fuzzer/test/MemcmpTest.cpp @@ -0,0 +1,20 @@ +// Simple test for a fuzzer. The fuzzer must find a particular string. +#include +#include +#include +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + // TODO: check other sizes. + if (Size >= 8 && memcmp(Data, "01234567", 8) == 0) { + if (Size >= 12 && memcmp(Data + 8, "ABCD", 4) == 0) { + if (Size >= 14 && memcmp(Data + 12, "XY", 2) == 0) { + if (Size >= 16 && memcmp(Data + 14, "KLM", 3) == 0) { + fprintf(stderr, "BINGO\n"); + exit(1); + } + } + } + } + return 0; +} diff --git a/lib/Fuzzer/test/NullDerefTest.cpp b/lib/Fuzzer/test/NullDerefTest.cpp index 0cff6617a31d..200c56ccbbc9 100644 --- a/lib/Fuzzer/test/NullDerefTest.cpp +++ b/lib/Fuzzer/test/NullDerefTest.cpp @@ -7,7 +7,7 @@ static volatile int Sink; static volatile int *Null = 0; -extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { if (Size > 0 && Data[0] == 'H') { Sink = 1; if (Size > 1 && Data[1] == 'i') { @@ -18,5 +18,6 @@ extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { } } } + return 0; } diff --git a/lib/Fuzzer/test/DFSanSimpleCmpTest.cpp b/lib/Fuzzer/test/SimpleCmpTest.cpp similarity index 85% rename from lib/Fuzzer/test/DFSanSimpleCmpTest.cpp rename to lib/Fuzzer/test/SimpleCmpTest.cpp index ee378146dae1..8568c737efb1 100644 --- a/lib/Fuzzer/test/DFSanSimpleCmpTest.cpp +++ b/lib/Fuzzer/test/SimpleCmpTest.cpp @@ -4,8 +4,8 @@ #include #include -extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { - if (Size < 14) return; +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size < 14) return 0; uint64_t x = 0; int64_t y = 0; int z = 0; @@ -27,4 +27,5 @@ extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { Size, x, y, z, a); exit(1); } + return 0; } diff --git a/lib/Fuzzer/test/SimpleDictionaryTest.cpp b/lib/Fuzzer/test/SimpleDictionaryTest.cpp new file mode 100644 index 000000000000..b9cb2f0270a3 --- /dev/null +++ b/lib/Fuzzer/test/SimpleDictionaryTest.cpp @@ -0,0 +1,26 @@ +// Simple test for a fuzzer. +// The fuzzer must find a string based on dictionary words: +// "Elvis" +// "Presley" +#include +#include +#include +#include +#include + +static volatile int Zero = 0; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + const char *Expected = "ElvisPresley"; + if (Size < strlen(Expected)) return 0; + size_t Match = 0; + for (size_t i = 0; Expected[i]; i++) + if (Expected[i] + Zero == Data[i]) + Match++; + if (Match == strlen(Expected)) { + std::cout << "BINGO; Found the target, exiting\n"; + exit(1); + } + return 0; +} + diff --git a/lib/Fuzzer/test/SimpleHashTest.cpp b/lib/Fuzzer/test/SimpleHashTest.cpp new file mode 100644 index 000000000000..5bab3fa7f649 --- /dev/null +++ b/lib/Fuzzer/test/SimpleHashTest.cpp @@ -0,0 +1,37 @@ +// This test computes a checksum of the data (all but the last 4 bytes), +// and then compares the last 4 bytes with the computed value. +// A fuzzer with cmp traces is expected to defeat this check. +#include +#include +#include +#include + +// A modified jenkins_one_at_a_time_hash initialized by non-zero, +// so that simple_hash(0) != 0. See also +// https://en.wikipedia.org/wiki/Jenkins_hash_function +static uint32_t simple_hash(const uint8_t *Data, size_t Size) { + uint32_t Hash = 0x12039854; + for (uint32_t i = 0; i < Size; i++) { + Hash += Data[i]; + Hash += (Hash << 10); + Hash ^= (Hash >> 6); + } + Hash += (Hash << 3); + Hash ^= (Hash >> 11); + Hash += (Hash << 15); + return Hash; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size < 14) + return 0; + + uint32_t Hash = simple_hash(&Data[0], Size - 4); + uint32_t Want = reinterpret_cast(&Data[Size - 4])[0]; + if (Hash != Want) + return 0; + fprintf(stderr, "BINGO; simple_hash defeated: %x == %x\n", (unsigned int)Hash, + (unsigned int)Want); + exit(1); + return 0; +} diff --git a/lib/Fuzzer/test/SimpleTest.cpp b/lib/Fuzzer/test/SimpleTest.cpp index a891635a7f14..04225a889f5d 100644 --- a/lib/Fuzzer/test/SimpleTest.cpp +++ b/lib/Fuzzer/test/SimpleTest.cpp @@ -1,4 +1,5 @@ // Simple test for a fuzzer. The fuzzer must find the string "Hi!". +#include #include #include #include @@ -6,7 +7,8 @@ static volatile int Sink; -extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + assert(Data); if (Size > 0 && Data[0] == 'H') { Sink = 1; if (Size > 1 && Data[1] == 'i') { @@ -17,5 +19,6 @@ extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { } } } + return 0; } diff --git a/lib/Fuzzer/test/StrcmpTest.cpp b/lib/Fuzzer/test/StrcmpTest.cpp new file mode 100644 index 000000000000..835819ae2f45 --- /dev/null +++ b/lib/Fuzzer/test/StrcmpTest.cpp @@ -0,0 +1,29 @@ +// Break through a series of strcmp. +#include +#include +#include +#include +#include + +bool Eq(const uint8_t *Data, size_t Size, const char *Str) { + char Buff[1024]; + size_t Len = strlen(Str); + if (Size < Len) return false; + if (Len >= sizeof(Buff)) return false; + memcpy(Buff, (char*)Data, Len); + Buff[Len] = 0; + int res = strcmp(Buff, Str); + return res == 0; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Eq(Data, Size, "AAA") && + Size >= 3 && Eq(Data + 3, Size - 3, "BBBB") && + Size >= 7 && Eq(Data + 7, Size - 7, "CCCCCC") && + Size >= 14 && Data[13] == 42 + ) { + fprintf(stderr, "BINGO\n"); + exit(1); + } + return 0; +} diff --git a/lib/Fuzzer/test/StrncmpTest.cpp b/lib/Fuzzer/test/StrncmpTest.cpp new file mode 100644 index 000000000000..55344d75e0b1 --- /dev/null +++ b/lib/Fuzzer/test/StrncmpTest.cpp @@ -0,0 +1,25 @@ +// Simple test for a fuzzer. The fuzzer must find a particular string. +#include +#include +#include +#include + +static volatile int sink; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + // TODO: check other sizes. + char *S = (char*)Data; + if (Size >= 8 && strncmp(S, "123", 8)) + sink = 1; + if (Size >= 8 && strncmp(S, "01234567", 8) == 0) { + if (Size >= 12 && strncmp(S + 8, "ABCD", 4) == 0) { + if (Size >= 14 && strncmp(S + 12, "XY", 2) == 0) { + if (Size >= 16 && strncmp(S + 14, "KLM", 3) == 0) { + fprintf(stderr, "BINGO\n"); + exit(1); + } + } + } + } + return 0; +} diff --git a/lib/Fuzzer/test/SwitchTest.cpp b/lib/Fuzzer/test/SwitchTest.cpp new file mode 100644 index 000000000000..5de7fff74525 --- /dev/null +++ b/lib/Fuzzer/test/SwitchTest.cpp @@ -0,0 +1,55 @@ +// Simple test for a fuzzer. The fuzzer must find the interesting switch value. +#include +#include +#include +#include +#include + +static volatile int Sink; + +template +bool Switch(const uint8_t *Data, size_t Size) { + T X; + if (Size < sizeof(X)) return false; + memcpy(&X, Data, sizeof(X)); + switch (X) { + case 1: Sink = __LINE__; break; + case 101: Sink = __LINE__; break; + case 1001: Sink = __LINE__; break; + case 10001: Sink = __LINE__; break; + case 100001: Sink = __LINE__; break; + case 1000001: Sink = __LINE__; break; + case 10000001: Sink = __LINE__; break; + case 100000001: return true; + } + return false; +} + +bool ShortSwitch(const uint8_t *Data, size_t Size) { + short X; + if (Size < sizeof(short)) return false; + memcpy(&X, Data, sizeof(short)); + switch(X) { + case 42: Sink = __LINE__; break; + case 402: Sink = __LINE__; break; + case 4002: Sink = __LINE__; break; + case 5002: Sink = __LINE__; break; + case 7002: Sink = __LINE__; break; + case 9002: Sink = __LINE__; break; + case 14002: Sink = __LINE__; break; + case 21402: return true; + } + return false; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size >= 4 && Switch(Data, Size) && + Size >= 12 && Switch(Data + 4, Size - 4) && + Size >= 14 && ShortSwitch(Data + 12, 2) + ) { + fprintf(stderr, "BINGO; Found the target, exiting\n"); + exit(1); + } + return 0; +} + diff --git a/lib/Fuzzer/test/TimeoutTest.cpp b/lib/Fuzzer/test/TimeoutTest.cpp index d541c058b648..71790ded95a2 100644 --- a/lib/Fuzzer/test/TimeoutTest.cpp +++ b/lib/Fuzzer/test/TimeoutTest.cpp @@ -6,7 +6,7 @@ static volatile int Sink; -extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { if (Size > 0 && Data[0] == 'H') { Sink = 1; if (Size > 1 && Data[1] == 'i') { @@ -18,5 +18,6 @@ extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { } } } + return 0; } diff --git a/lib/Fuzzer/test/UninstrumentedTest.cpp b/lib/Fuzzer/test/UninstrumentedTest.cpp new file mode 100644 index 000000000000..c1730198d83f --- /dev/null +++ b/lib/Fuzzer/test/UninstrumentedTest.cpp @@ -0,0 +1,8 @@ +// This test should not be instrumented. +#include +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + return 0; +} + diff --git a/lib/Fuzzer/test/UserSuppliedFuzzerTest.cpp b/lib/Fuzzer/test/UserSuppliedFuzzerTest.cpp index b46313dbafbf..59f83b57bfad 100644 --- a/lib/Fuzzer/test/UserSuppliedFuzzerTest.cpp +++ b/lib/Fuzzer/test/UserSuppliedFuzzerTest.cpp @@ -14,9 +14,11 @@ static const uint64_t kMagic = 8860221463604ULL; class MyFuzzer : public fuzzer::UserSuppliedFuzzer { public: - void TargetFunction(const uint8_t *Data, size_t Size) { - if (Size <= 10) return; - if (memcmp(Data, &kMagic, sizeof(kMagic))) return; + MyFuzzer(fuzzer::FuzzerRandomBase *Rand) + : fuzzer::UserSuppliedFuzzer(Rand) {} + int TargetFunction(const uint8_t *Data, size_t Size) { + if (Size <= 10) return 0; + if (memcmp(Data, &kMagic, sizeof(kMagic))) return 0; // It's hard to get here w/o advanced fuzzing techniques (e.g. cmp tracing). // So, we simply 'fix' the data in the custom mutator. if (Data[8] == 'H') { @@ -27,6 +29,7 @@ class MyFuzzer : public fuzzer::UserSuppliedFuzzer { } } } + return 0; } // Custom mutator. virtual size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize) { @@ -35,13 +38,14 @@ class MyFuzzer : public fuzzer::UserSuppliedFuzzer { Size = sizeof(kMagic); // "Fix" the data, then mutate. memcpy(Data, &kMagic, std::min(MaxSize, sizeof(kMagic))); - return BasicMutate(Data + sizeof(kMagic), Size - sizeof(kMagic), - MaxSize - sizeof(kMagic)); + return fuzzer::UserSuppliedFuzzer::Mutate( + Data + sizeof(kMagic), Size - sizeof(kMagic), MaxSize - sizeof(kMagic)); } // No need to redefine CrossOver() here. }; int main(int argc, char **argv) { - MyFuzzer F; + fuzzer::FuzzerRandomLibc Rand(0); + MyFuzzer F(&Rand); fuzzer::FuzzerDriver(argc, argv, F); } diff --git a/lib/Fuzzer/test/dict1.txt b/lib/Fuzzer/test/dict1.txt new file mode 100644 index 000000000000..520d0cc7b7d8 --- /dev/null +++ b/lib/Fuzzer/test/dict1.txt @@ -0,0 +1,4 @@ +# Dictionary for SimpleDictionaryTest + +a="Elvis" +b="Presley" diff --git a/lib/Fuzzer/test/fuzzer-dfsan.test b/lib/Fuzzer/test/fuzzer-dfsan.test new file mode 100644 index 000000000000..982f143669d0 --- /dev/null +++ b/lib/Fuzzer/test/fuzzer-dfsan.test @@ -0,0 +1,22 @@ +CHECK1: BINGO +CHECK2: BINGO +CHECK3: BINGO +CHECK4: BINGO + +CHECK_DFSanCmpCallback: DFSanCmpCallback: PC +CHECK_DFSanSwitchCallback: DFSanSwitchCallback: PC + +RUN: not LLVMFuzzer-SimpleCmpTest-DFSan -use_traces=1 -seed=1 -runs=1000000 -timeout=5 2>&1 | FileCheck %s --check-prefix=CHECK1 +RUN: LLVMFuzzer-SimpleCmpTest-DFSan -use_traces=1 -seed=1 -runs=100 -timeout=5 -verbosity=3 2>&1 | FileCheck %s -check-prefix=CHECK_DFSanCmpCallback + +RUN: not LLVMFuzzer-MemcmpTest-DFSan -use_traces=1 -seed=1 -runs=10000 -timeout=5 2>&1 | FileCheck %s --check-prefix=CHECK2 +RUN: LLVMFuzzer-MemcmpTest-DFSan -use_traces=1 -seed=1 -runs=2 -timeout=5 -verbosity=3 2>&1 | FileCheck %s -check-prefix=CHECK_DFSanCmpCallback + +RUN: not LLVMFuzzer-StrncmpTest-DFSan -use_traces=1 -seed=1 -runs=10000 -timeout=5 2>&1 | FileCheck %s --check-prefix=CHECK3 +RUN: LLVMFuzzer-StrncmpTest-DFSan -use_traces=1 -seed=1 -runs=2 -timeout=5 -verbosity=3 2>&1 | FileCheck %s -check-prefix=CHECK_DFSanCmpCallback + +RUN: not LLVMFuzzer-StrcmpTest-DFSan -use_traces=1 -seed=1 -runs=10000 -timeout=5 2>&1 | FileCheck %s --check-prefix=CHECK3 +RUN: LLVMFuzzer-StrcmpTest-DFSan -use_traces=1 -seed=1 -runs=2 -timeout=5 -verbosity=3 2>&1 | FileCheck %s -check-prefix=CHECK_DFSanCmpCallback + +RUN: not LLVMFuzzer-SwitchTest-DFSan -use_traces=1 -seed=1 -runs=100000 -timeout=5 2>&1 | FileCheck %s --check-prefix=CHECK4 +RUN: LLVMFuzzer-SwitchTest-DFSan -use_traces=1 -seed=1 -runs=2 -timeout=5 -verbosity=3 2>&1 | FileCheck %s -check-prefix=CHECK_DFSanSwitchCallback diff --git a/lib/Fuzzer/test/fuzzer-drill.test b/lib/Fuzzer/test/fuzzer-drill.test new file mode 100644 index 000000000000..b2fc1fecd276 --- /dev/null +++ b/lib/Fuzzer/test/fuzzer-drill.test @@ -0,0 +1,8 @@ +CHECK: BINGO +RUN: rm -rf FourIndependentBranchesTestCORPUS +RUN: mkdir FourIndependentBranchesTestCORPUS +RUN: LLVMFuzzer-FourIndependentBranchesTest -seed=1 -runs=100000 FourIndependentBranchesTestCORPUS +RUN: not LLVMFuzzer-FourIndependentBranchesTest -runs=100000 -drill=1 -jobs=200 FourIndependentBranchesTestCORPUS 2>&1 | FileCheck %s +RUN: rm -rf FourIndependentBranchesTestCORPUS + + diff --git a/lib/Fuzzer/test/fuzzer-timeout.test b/lib/Fuzzer/test/fuzzer-timeout.test new file mode 100644 index 000000000000..c3a9e8a3a9e0 --- /dev/null +++ b/lib/Fuzzer/test/fuzzer-timeout.test @@ -0,0 +1,13 @@ +RUN: not LLVMFuzzer-TimeoutTest -timeout=1 2>&1 | FileCheck %s --check-prefix=TimeoutTest +TimeoutTest: ALARM: working on the last Unit for +TimeoutTest: Test unit written to ./timeout- +TimeoutTest: == ERROR: libFuzzer: timeout after +TimeoutTest: #0 +TimeoutTest: #1 +TimeoutTest: #2 +TimeoutTest: SUMMARY: libFuzzer: timeout + +RUN: not LLVMFuzzer-TimeoutTest -timeout=1 -test_single_input=%S/hi.txt 2>&1 | FileCheck %s --check-prefix=SingleInputTimeoutTest +SingleInputTimeoutTest: ALARM: working on the last Unit for +SingleInputTimeoutTest-NOT: Test unit written to ./timeout- + diff --git a/lib/Fuzzer/test/fuzzer-traces.test b/lib/Fuzzer/test/fuzzer-traces.test new file mode 100644 index 000000000000..084cf30d6982 --- /dev/null +++ b/lib/Fuzzer/test/fuzzer-traces.test @@ -0,0 +1,19 @@ +CHECK: BINGO +Done1000000: Done 1000000 runs in + +RUN: not LLVMFuzzer-SimpleCmpTest -use_traces=1 -seed=1 -runs=1000001 2>&1 | FileCheck %s + +RUN: not LLVMFuzzer-MemcmpTest -use_traces=1 -seed=4294967295 -runs=100000 2>&1 | FileCheck %s +RUN: LLVMFuzzer-MemcmpTest -seed=4294967295 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000 + +RUN: not LLVMFuzzer-StrncmpTest -use_traces=1 -seed=1 -runs=100000 2>&1 | FileCheck %s +RUN: LLVMFuzzer-StrncmpTest -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000 + +RUN: not LLVMFuzzer-StrcmpTest -use_traces=1 -seed=1 -runs=200000 2>&1 | FileCheck %s +RUN: LLVMFuzzer-StrcmpTest -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000 + +RUN: not LLVMFuzzer-SwitchTest -use_traces=1 -seed=1 -runs=1000002 2>&1 | FileCheck %s +RUN: LLVMFuzzer-SwitchTest -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000 + +RUN: not LLVMFuzzer-SimpleHashTest -use_traces=1 -seed=1 -runs=100000 2>&1 | FileCheck %s +RUN: LLVMFuzzer-SimpleHashTest -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000 diff --git a/lib/Fuzzer/test/fuzzer.test b/lib/Fuzzer/test/fuzzer.test index b8e672f0fec7..810410df6fc7 100644 --- a/lib/Fuzzer/test/fuzzer.test +++ b/lib/Fuzzer/test/fuzzer.test @@ -1,30 +1,32 @@ CHECK: BINGO +Done1000000: Done 1000000 runs in -RUN: ./LLVMFuzzer-SimpleTest 2>&1 | FileCheck %s +RUN: LLVMFuzzer-SimpleTest 2>&1 | FileCheck %s +RUN: not LLVMFuzzer-NullDerefTest -test_single_input=%S/hi.txt 2>&1 | FileCheck %s --check-prefix=SingleInput +SingleInput-NOT: Test unit written to ./crash- -RUN: not ./LLVMFuzzer-InfiniteTest -timeout=2 2>&1 | FileCheck %s --check-prefix=InfiniteTest -InfiniteTest: ALARM: working on the last Unit for -InfiniteTest: CRASHED; file written to timeout +RUN: LLVMFuzzer-SimpleCmpTest -max_total_time=1 2>&1 | FileCheck %s --check-prefix=MaxTotalTime +MaxTotalTime: Done {{.*}} runs in {{.}} second(s) -RUN: not ./LLVMFuzzer-TimeoutTest -timeout=5 2>&1 | FileCheck %s --check-prefix=TimeoutTest -TimeoutTest: ALARM: working on the last Unit for -TimeoutTest: CRASHED; file written to timeout +RUN: not LLVMFuzzer-NullDerefTest 2>&1 | FileCheck %s --check-prefix=NullDerefTest +NullDerefTest: Test unit written to ./crash- +RUN: not LLVMFuzzer-NullDerefTest -artifact_prefix=ZZZ 2>&1 | FileCheck %s --check-prefix=NullDerefTestPrefix +NullDerefTestPrefix: Test unit written to ZZZcrash- +RUN: not LLVMFuzzer-NullDerefTest -artifact_prefix=ZZZ -exact_artifact_path=FOOBAR 2>&1 | FileCheck %s --check-prefix=NullDerefTestExactPath +NullDerefTestExactPath: Test unit written to FOOBAR -RUN: not ./LLVMFuzzer-NullDerefTest 2>&1 | FileCheck %s --check-prefix=NullDerefTest -NullDerefTest: CRASHED; file written to crash- +#not LLVMFuzzer-FullCoverageSetTest -timeout=15 -seed=1 -mutate_depth=2 -use_full_coverage_set=1 2>&1 | FileCheck %s -RUN: not ./LLVMFuzzer-FullCoverageSetTest -timeout=15 -seed=1 -mutate_depth=2 -use_full_coverage_set=1 2>&1 | FileCheck %s +RUN: not LLVMFuzzer-CounterTest -use_counters=1 -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s -RUN: not ./LLVMFuzzer-FourIndependentBranchesTest -timeout=15 -seed=1 -use_full_coverage_set=1 2>&1 | FileCheck %s +RUN: not LLVMFuzzer-CallerCalleeTest -cross_over=0 -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s +RUN: LLVMFuzzer-CallerCalleeTest -use_indir_calls=0 -cross_over=0 -max_len=6 -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000 -RUN: not ./LLVMFuzzer-CounterTest -use_counters=1 -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s -RUN: not ./LLVMFuzzer-DFSanSimpleCmpTest-DFSan -use_traces=1 -seed=1 -runs=1000000 -timeout=5 2>&1 | FileCheck %s -RUN: not ./LLVMFuzzer-DFSanSimpleCmpTest -use_traces=1 -seed=1 -runs=1000000 -timeout=5 2>&1 | FileCheck %s +RUN: not LLVMFuzzer-UserSuppliedFuzzerTest -seed=1 -timeout=15 2>&1 | FileCheck %s -RUN: not ./LLVMFuzzer-DFSanMemcmpTest-DFSan -use_traces=1 -seed=1 -runs=100 -timeout=5 2>&1 | FileCheck %s - -RUN: not ./LLVMFuzzer-CxxTokensTest -seed=1 -timeout=15 -tokens=%S/../cxx_fuzzer_tokens.txt 2>&1 | FileCheck %s - -RUN: not ./LLVMFuzzer-UserSuppliedFuzzerTest -seed=1 -timeout=15 2>&1 | FileCheck %s +RUN: not LLVMFuzzer-SimpleDictionaryTest -dict=%S/dict1.txt -seed=1 -runs=1000003 2>&1 | FileCheck %s +RUN: LLVMFuzzer-SimpleDictionaryTest -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000 +RUN: not LLVMFuzzer-UninstrumentedTest-Uninstrumented 2>&1 | FileCheck %s --check-prefix=UNINSTRUMENTED +UNINSTRUMENTED: ERROR: __sanitizer_set_death_callback is not defined. Exiting. diff --git a/lib/Fuzzer/test/hi.txt b/lib/Fuzzer/test/hi.txt new file mode 100644 index 000000000000..2f9031f0ec7b --- /dev/null +++ b/lib/Fuzzer/test/hi.txt @@ -0,0 +1 @@ +Hi! \ No newline at end of file diff --git a/lib/Fuzzer/test/lit.cfg b/lib/Fuzzer/test/lit.cfg index 834a16aefe73..2140a97668b3 100644 --- a/lib/Fuzzer/test/lit.cfg +++ b/lib/Fuzzer/test/lit.cfg @@ -5,10 +5,11 @@ config.test_format = lit.formats.ShTest(True) config.suffixes = ['.test'] config.test_source_root = os.path.dirname(__file__) -# Tweak PATH to include llvm tools dir. +# Tweak PATH to include llvm tools dir and current exec dir. llvm_tools_dir = getattr(config, 'llvm_tools_dir', None) if (not llvm_tools_dir) or (not os.path.exists(llvm_tools_dir)): lit_config.fatal("Invalid llvm_tools_dir config attribute: %r" % llvm_tools_dir) -path = os.path.pathsep.join((llvm_tools_dir, config.environment['PATH'])) +path = os.path.pathsep.join((llvm_tools_dir, config.test_exec_root, + config.environment['PATH'])) config.environment['PATH'] = path diff --git a/lib/Fuzzer/test/merge.test b/lib/Fuzzer/test/merge.test new file mode 100644 index 000000000000..57ecc141bbfe --- /dev/null +++ b/lib/Fuzzer/test/merge.test @@ -0,0 +1,29 @@ +CHECK: BINGO + +RUN: rm -rf %tmp/T1 %tmp/T2 +RUN: mkdir -p %tmp/T1 %tmp/T2 +RUN: echo F..... > %tmp/T1/1 +RUN: echo .U.... > %tmp/T1/2 +RUN: echo ..Z... > %tmp/T1/3 + +# T1 has 3 elements, T2 is empty. +RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK1 +CHECK1: Merge: running the initial corpus {{.*}} of 3 units +CHECK1: Merge: written 0 out of 0 units + +RUN: echo ...Z.. > %tmp/T2/1 +RUN: echo ....E. > %tmp/T2/2 +RUN: echo .....R > %tmp/T2/3 +RUN: echo F..... > %tmp/T2/a +RUN: echo .U.... > %tmp/T2/b +RUN: echo ..Z... > %tmp/T2/c + +# T1 has 3 elements, T2 has 6 elements, only 3 are new. +RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK2 +CHECK2: Merge: running the initial corpus {{.*}} of 3 units +CHECK2: Merge: written 3 out of 6 units + +# Now, T1 has 6 units and T2 has no new interesting units. +RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK3 +CHECK3: Merge: running the initial corpus {{.*}} of 6 units +CHECK3: Merge: written 0 out of 6 units diff --git a/lib/Fuzzer/test/trace-bb/CMakeLists.txt b/lib/Fuzzer/test/trace-bb/CMakeLists.txt new file mode 100644 index 000000000000..99af019565b5 --- /dev/null +++ b/lib/Fuzzer/test/trace-bb/CMakeLists.txt @@ -0,0 +1,14 @@ +# These tests are not instrumented with coverage. + +set(CMAKE_CXX_FLAGS_RELEASE + "${LIBFUZZER_FLAGS_BASE} -fsanitize-coverage=edge,trace-bb") + +foreach(Test ${TraceBBTests}) + add_executable(LLVMFuzzer-${Test}-TraceBB + ../${Test}.cpp + ) + target_link_libraries(LLVMFuzzer-${Test}-TraceBB + LLVMFuzzer + ) +endforeach() + diff --git a/lib/Fuzzer/test/uninstrumented/CMakeLists.txt b/lib/Fuzzer/test/uninstrumented/CMakeLists.txt new file mode 100644 index 000000000000..443ba3716f66 --- /dev/null +++ b/lib/Fuzzer/test/uninstrumented/CMakeLists.txt @@ -0,0 +1,14 @@ +# These tests are not instrumented with coverage. + +set(CMAKE_CXX_FLAGS_RELEASE + "${LIBFUZZER_FLAGS_BASE} -O0 -fno-sanitize=all") + +foreach(Test ${UninstrumentedTests}) + add_executable(LLVMFuzzer-${Test}-Uninstrumented + ../${Test}.cpp + ) + target_link_libraries(LLVMFuzzer-${Test}-Uninstrumented + LLVMFuzzer + ) +endforeach() + diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index b553f11018c7..185db47f07e5 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -102,17 +103,9 @@ static OrderMap orderModule(const Module *M) { orderValue(&A, OM); } for (const Function &F : *M) { - if (F.hasPrefixData()) - if (!isa(F.getPrefixData())) - orderValue(F.getPrefixData(), OM); - - if (F.hasPrologueData()) - if (!isa(F.getPrologueData())) - orderValue(F.getPrologueData(), OM); - - if (F.hasPersonalityFn()) - if (!isa(F.getPersonalityFn())) - orderValue(F.getPersonalityFn(), OM); + for (const Use &U : F.operands()) + if (!isa(U.get())) + orderValue(U.get(), OM); orderValue(&F, OM); @@ -232,8 +225,7 @@ static UseListOrderStack predictUseListOrder(const Module *M) { // We want to visit the functions backward now so we can list function-local // constants in the last Function they're used in. Module-level constants // have already been visited above. - for (auto I = M->rbegin(), E = M->rend(); I != E; ++I) { - const Function &F = *I; + for (const Function &F : make_range(M->rbegin(), M->rend())) { if (F.isDeclaration()) continue; for (const BasicBlock &BB : F) @@ -263,8 +255,8 @@ static UseListOrderStack predictUseListOrder(const Module *M) { for (const GlobalAlias &A : M->aliases()) predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack); for (const Function &F : *M) - if (F.hasPrefixData()) - predictValueUseListOrder(F.getPrefixData(), nullptr, OM, Stack); + for (const Use &U : F.operands()) + predictValueUseListOrder(U.get(), nullptr, OM, Stack); return Stack; } @@ -304,6 +296,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::AnyReg: Out << "anyregcc"; break; case CallingConv::PreserveMost: Out << "preserve_mostcc"; break; case CallingConv::PreserveAll: Out << "preserve_allcc"; break; + case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break; case CallingConv::GHC: Out << "ghccc"; break; case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break; @@ -320,6 +313,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::X86_64_Win64: Out << "x86_64_win64cc"; break; case CallingConv::SPIR_FUNC: Out << "spir_func"; break; case CallingConv::SPIR_KERNEL: Out << "spir_kernel"; break; + case CallingConv::X86_INTR: Out << "x86_intrcc"; break; + case CallingConv::HHVM: Out << "hhvmcc"; break; + case CallingConv::HHVM_C: Out << "hhvm_ccc"; break; } } @@ -343,18 +339,8 @@ enum PrefixType { NoPrefix }; -/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either -/// prefixed with % (if the string only contains simple characters) or is -/// surrounded with ""'s (if it has special chars in it). Print it out. -static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) { +void llvm::printLLVMNameWithoutPrefix(raw_ostream &OS, StringRef Name) { assert(!Name.empty() && "Cannot get empty name!"); - switch (Prefix) { - case NoPrefix: break; - case GlobalPrefix: OS << '@'; break; - case ComdatPrefix: OS << '$'; break; - case LabelPrefix: break; - case LocalPrefix: OS << '%'; break; - } // Scan the name to see if it needs quotes first. bool NeedsQuotes = isdigit(static_cast(Name[0])); @@ -386,9 +372,31 @@ static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) { OS << '"'; } -/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either -/// prefixed with % (if the string only contains simple characters) or is -/// surrounded with ""'s (if it has special chars in it). Print it out. +/// Turn the specified name into an 'LLVM name', which is either prefixed with % +/// (if the string only contains simple characters) or is surrounded with ""'s +/// (if it has special chars in it). Print it out. +static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) { + switch (Prefix) { + case NoPrefix: + break; + case GlobalPrefix: + OS << '@'; + break; + case ComdatPrefix: + OS << '$'; + break; + case LabelPrefix: + break; + case LocalPrefix: + OS << '%'; + break; + } + printLLVMNameWithoutPrefix(OS, Name); +} + +/// Turn the specified name into an 'LLVM name', which is either prefixed with % +/// (if the string only contains simple characters) or is surrounded with ""'s +/// (if it has special chars in it). Print it out. static void PrintLLVMName(raw_ostream &OS, const Value *V) { PrintLLVMName(OS, V->getName(), isa(V) ? GlobalPrefix : LocalPrefix); @@ -456,6 +464,7 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) { case Type::LabelTyID: OS << "label"; return; case Type::MetadataTyID: OS << "metadata"; return; case Type::X86_MMXTyID: OS << "x86_mmx"; return; + case Type::TokenTyID: OS << "token"; return; case Type::IntegerTyID: OS << 'i' << cast(Ty)->getBitWidth(); return; @@ -691,8 +700,9 @@ void ModuleSlotTracker::incorporateFunction(const Function &F) { this->F = &F; } -static SlotTracker *createSlotTracker(const Module *M) { - return new SlotTracker(M); +int ModuleSlotTracker::getLocalSlot(const Value *V) { + assert(F && "No function incorporated"); + return Machine->getLocalSlot(V); } static SlotTracker *createSlotTracker(const Value *V) { @@ -802,7 +812,7 @@ void SlotTracker::processFunction() { for(Function::const_arg_iterator AI = TheFunction->arg_begin(), AE = TheFunction->arg_end(); AI != AE; ++AI) if (!AI->hasName()) - CreateFunctionSlot(AI); + CreateFunctionSlot(&*AI); ST_DEBUG("Inserting Instructions:\n"); @@ -1093,11 +1103,10 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, // the value back and get the same value. // bool ignored; - bool isHalf = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEhalf; bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble; bool isInf = CFP->getValueAPF().isInfinity(); bool isNaN = CFP->getValueAPF().isNaN(); - if (!isHalf && !isInf && !isNaN) { + if (!isInf && !isNaN) { double Val = isDouble ? CFP->getValueAPF().convertToDouble() : CFP->getValueAPF().convertToFloat(); SmallString<128> StrVal; @@ -1123,15 +1132,12 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, // x86, so we must not use these types. static_assert(sizeof(double) == sizeof(uint64_t), "assuming that double is 64 bits!"); - char Buffer[40]; APFloat apf = CFP->getValueAPF(); - // Halves and floats are represented in ASCII IR as double, convert. + // Floats are represented in ASCII IR as double, convert. if (!isDouble) apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); - Out << "0x" << - utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()), - Buffer+40); + Out << format_hex(apf.bitcastToAPInt().getZExtValue(), 0, /*Upper=*/true); return; } @@ -1139,60 +1145,32 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, // These appear as a magic letter identifying the type, then a // fixed number of hex digits. Out << "0x"; - // Bit position, in the current word, of the next nibble to print. - int shiftcount; - + APInt API = CFP->getValueAPF().bitcastToAPInt(); if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended) { Out << 'K'; - // api needed to prevent premature destruction - APInt api = CFP->getValueAPF().bitcastToAPInt(); - const uint64_t* p = api.getRawData(); - uint64_t word = p[1]; - shiftcount = 12; - int width = api.getBitWidth(); - for (int j=0; j>shiftcount) & 15; - if (nibble < 10) - Out << (unsigned char)(nibble + '0'); - else - Out << (unsigned char)(nibble - 10 + 'A'); - if (shiftcount == 0 && j+4 < width) { - word = *p; - shiftcount = 64; - if (width-j-4 < 64) - shiftcount = width-j-4; - } - } + Out << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4, + /*Upper=*/true); + Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); return; } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad) { - shiftcount = 60; Out << 'L'; + Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); + Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, + /*Upper=*/true); } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble) { - shiftcount = 60; Out << 'M'; + Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); + Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, + /*Upper=*/true); } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf) { - shiftcount = 12; Out << 'H'; + Out << format_hex_no_prefix(API.getZExtValue(), 4, + /*Upper=*/true); } else llvm_unreachable("Unsupported floating point type"); - // api needed to prevent premature destruction - APInt api = CFP->getValueAPF().bitcastToAPInt(); - const uint64_t* p = api.getRawData(); - uint64_t word = *p; - int width = api.getBitWidth(); - for (int j=0; j>shiftcount) & 15; - if (nibble < 10) - Out << (unsigned char)(nibble + '0'); - else - Out << (unsigned char)(nibble - 10 + 'A'); - if (shiftcount == 0 && j+4 < width) { - word = *(++p); - shiftcount = 64; - if (width-j-4 < 64) - shiftcount = width-j-4; - } - } return; } @@ -1313,6 +1291,11 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, return; } + if (isa(CV)) { + Out << "none"; + return; + } + if (isa(CV)) { Out << "undef"; return; @@ -1326,10 +1309,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, Out << " ("; if (const GEPOperator *GEP = dyn_cast(CE)) { - TypePrinter.print( - cast(GEP->getPointerOperandType()->getScalarType()) - ->getElementType(), - Out); + TypePrinter.print(GEP->getSourceElementType(), Out); Out << ", "; } @@ -1409,6 +1389,7 @@ struct MDFieldPrinter { : Out(Out), TypePrinter(TypePrinter), Machine(Machine), Context(Context) { } void printTag(const DINode *N); + void printMacinfoType(const DIMacroNode *N); void printString(StringRef Name, StringRef Value, bool ShouldSkipEmpty = true); void printMetadata(StringRef Name, const Metadata *MD, @@ -1431,6 +1412,14 @@ void MDFieldPrinter::printTag(const DINode *N) { Out << N->getTag(); } +void MDFieldPrinter::printMacinfoType(const DIMacroNode *N) { + Out << FS << "type: "; + if (const char *Type = dwarf::MacinfoString(N->getMacinfoType())) + Out << Type; + else + Out << N->getMacinfoType(); +} + void MDFieldPrinter::printString(StringRef Name, StringRef Value, bool ShouldSkipEmpty) { if (ShouldSkipEmpty && Value.empty()) @@ -1656,6 +1645,7 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N, Printer.printMetadata("subprograms", N->getRawSubprograms()); Printer.printMetadata("globals", N->getRawGlobalVariables()); Printer.printMetadata("imports", N->getRawImportedEntities()); + Printer.printMetadata("macros", N->getRawMacros()); Printer.printInt("dwoId", N->getDWOId()); Out << ")"; } @@ -1680,7 +1670,6 @@ static void writeDISubprogram(raw_ostream &Out, const DISubprogram *N, Printer.printInt("virtualIndex", N->getVirtualIndex()); Printer.printDIFlags("flags", N->getFlags()); Printer.printBool("isOptimized", N->isOptimized()); - Printer.printMetadata("function", N->getRawFunction()); Printer.printMetadata("templateParams", N->getRawTemplateParams()); Printer.printMetadata("declaration", N->getRawDeclaration()); Printer.printMetadata("variables", N->getRawVariables()); @@ -1725,6 +1714,29 @@ static void writeDINamespace(raw_ostream &Out, const DINamespace *N, Out << ")"; } +static void writeDIMacro(raw_ostream &Out, const DIMacro *N, + TypePrinting *TypePrinter, SlotTracker *Machine, + const Module *Context) { + Out << "!DIMacro("; + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printMacinfoType(N); + Printer.printInt("line", N->getLine()); + Printer.printString("name", N->getName()); + Printer.printString("value", N->getValue()); + Out << ")"; +} + +static void writeDIMacroFile(raw_ostream &Out, const DIMacroFile *N, + TypePrinting *TypePrinter, SlotTracker *Machine, + const Module *Context) { + Out << "!DIMacroFile("; + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printInt("line", N->getLine()); + Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false); + Printer.printMetadata("nodes", N->getRawElements()); + Out << ")"; +} + static void writeDIModule(raw_ostream &Out, const DIModule *N, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) { @@ -1789,11 +1801,8 @@ static void writeDILocalVariable(raw_ostream &Out, const DILocalVariable *N, SlotTracker *Machine, const Module *Context) { Out << "!DILocalVariable("; MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); - Printer.printTag(N); Printer.printString("name", N->getName()); - Printer.printInt("arg", N->getArg(), - /* ShouldSkipZero */ - N->getTag() == dwarf::DW_TAG_auto_variable); + Printer.printInt("arg", N->getArg()); Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false); Printer.printMetadata("file", N->getRawFile()); Printer.printInt("line", N->getLine()); @@ -1998,6 +2007,7 @@ class AssemblyWriter { TypePrinting TypePrinter; AssemblyAnnotationWriter *AnnotationWriter; SetVector Comdats; + bool IsForDebug; bool ShouldPreserveUseListOrder; UseListOrderStack UseListOrders; SmallVector MDNames; @@ -2005,12 +2015,7 @@ class AssemblyWriter { public: /// Construct an AssemblyWriter with an external SlotTracker AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, const Module *M, - AssemblyAnnotationWriter *AAW, - bool ShouldPreserveUseListOrder = false); - - /// Construct an AssemblyWriter with an internally allocated SlotTracker - AssemblyWriter(formatted_raw_ostream &o, const Module *M, - AssemblyAnnotationWriter *AAW, + AssemblyAnnotationWriter *AAW, bool IsForDebug, bool ShouldPreserveUseListOrder = false); void printMDNodeBody(const MDNode *MD); @@ -2020,6 +2025,7 @@ public: void writeOperand(const Value *Op, bool PrintType); void writeParamOperand(const Value *Operand, AttributeSet Attrs,unsigned Idx); + void writeOperandBundles(ImmutableCallSite CS); void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope); void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, @@ -2043,8 +2049,6 @@ public: void printUseLists(const Function *F); private: - void init(); - /// \brief Print out metadata attachments. void printMetadataAttachments( const SmallVectorImpl> &MDs, @@ -2060,7 +2064,12 @@ private: }; } // namespace -void AssemblyWriter::init() { +AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, + const Module *M, AssemblyAnnotationWriter *AAW, + bool IsForDebug, bool ShouldPreserveUseListOrder) + : Out(o), TheModule(M), Machine(Mac), AnnotationWriter(AAW), + IsForDebug(IsForDebug), + ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) { if (!TheModule) return; TypePrinter.incorporateTypes(*TheModule); @@ -2072,23 +2081,6 @@ void AssemblyWriter::init() { Comdats.insert(C); } -AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, - const Module *M, AssemblyAnnotationWriter *AAW, - bool ShouldPreserveUseListOrder) - : Out(o), TheModule(M), Machine(Mac), AnnotationWriter(AAW), - ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) { - init(); -} - -AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, const Module *M, - AssemblyAnnotationWriter *AAW, - bool ShouldPreserveUseListOrder) - : Out(o), TheModule(M), SlotTrackerStorage(createSlotTracker(M)), - Machine(*SlotTrackerStorage), AnnotationWriter(AAW), - ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) { - init(); -} - void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) { if (!Operand) { Out << ""; @@ -2170,6 +2162,43 @@ void AssemblyWriter::writeParamOperand(const Value *Operand, WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule); } +void AssemblyWriter::writeOperandBundles(ImmutableCallSite CS) { + if (!CS.hasOperandBundles()) + return; + + Out << " [ "; + + bool FirstBundle = true; + for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) { + OperandBundleUse BU = CS.getOperandBundleAt(i); + + if (!FirstBundle) + Out << ", "; + FirstBundle = false; + + Out << '"'; + PrintEscapedString(BU.getTagName(), Out); + Out << '"'; + + Out << '('; + + bool FirstInput = true; + for (const auto &Input : BU.Inputs) { + if (!FirstInput) + Out << ", "; + FirstInput = false; + + TypePrinter.print(Input->getType(), Out); + Out << " "; + WriteAsOperandInternal(Out, Input, &TypePrinter, &Machine, TheModule); + } + + Out << ')'; + } + + Out << " ]"; +} + void AssemblyWriter::printModule(const Module *M) { Machine.initialize(); @@ -2422,6 +2451,10 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) { Out << "alias "; + TypePrinter.print(GA->getValueType(), Out); + + Out << ", "; + const Constant *Aliasee = GA->getAliasee(); if (!Aliasee) { @@ -2536,28 +2569,26 @@ void AssemblyWriter::printFunction(const Function *F) { Machine.incorporateFunction(F); // Loop over the arguments, printing them... - - unsigned Idx = 1; - if (!F->isDeclaration()) { - // If this isn't a declaration, print the argument names as well. - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) { + if (F->isDeclaration() && !IsForDebug) { + // We're only interested in the type here - don't print argument names. + for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) { // Insert commas as we go... the first arg doesn't get a comma - if (I != F->arg_begin()) Out << ", "; - printArgument(I, Attrs, Idx); - Idx++; + if (I) + Out << ", "; + // Output type... + TypePrinter.print(FT->getParamType(I), Out); + + if (Attrs.hasAttributes(I + 1)) + Out << ' ' << Attrs.getAsString(I + 1); } } else { - // Otherwise, print the types from the function type. - for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) { + // The arguments are meaningful here, print them in detail. + unsigned Idx = 1; + for (const Argument &Arg : F->args()) { // Insert commas as we go... the first arg doesn't get a comma - if (i) Out << ", "; - - // Output type... - TypePrinter.print(FT->getParamType(i), Out); - - if (Attrs.hasAttributes(i+1)) - Out << ' ' << Attrs.getAsString(i+1); + if (Idx != 1) + Out << ", "; + printArgument(&Arg, Attrs, Idx++); } } @@ -2604,7 +2635,7 @@ void AssemblyWriter::printFunction(const Function *F) { Out << " {"; // Output all of the function's basic blocks. for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) - printBasicBlock(I); + printBasicBlock(&*I); // Output the function's use-lists. printUseLists(F); @@ -2738,6 +2769,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) { Out << "musttail "; else if (CI->isTailCall()) Out << "tail "; + else if (CI->isNoTailCall()) + Out << "notail "; } // Print out the opcode... @@ -2850,8 +2883,50 @@ void AssemblyWriter::printInstruction(const Instruction &I) { writeOperand(LPI->getClause(i), true); } + } else if (const auto *CatchSwitch = dyn_cast(&I)) { + Out << " within "; + writeOperand(CatchSwitch->getParentPad(), /*PrintType=*/false); + Out << " ["; + unsigned Op = 0; + for (const BasicBlock *PadBB : CatchSwitch->handlers()) { + if (Op > 0) + Out << ", "; + writeOperand(PadBB, /*PrintType=*/true); + ++Op; + } + Out << "] unwind "; + if (const BasicBlock *UnwindDest = CatchSwitch->getUnwindDest()) + writeOperand(UnwindDest, /*PrintType=*/true); + else + Out << "to caller"; + } else if (const auto *FPI = dyn_cast(&I)) { + Out << " within "; + writeOperand(FPI->getParentPad(), /*PrintType=*/false); + Out << " ["; + for (unsigned Op = 0, NumOps = FPI->getNumArgOperands(); Op < NumOps; + ++Op) { + if (Op > 0) + Out << ", "; + writeOperand(FPI->getArgOperand(Op), /*PrintType=*/true); + } + Out << ']'; } else if (isa(I) && !Operand) { Out << " void"; + } else if (const auto *CRI = dyn_cast(&I)) { + Out << " from "; + writeOperand(CRI->getOperand(0), /*PrintType=*/false); + + Out << " to "; + writeOperand(CRI->getOperand(1), /*PrintType=*/true); + } else if (const auto *CRI = dyn_cast(&I)) { + Out << " from "; + writeOperand(CRI->getOperand(0), /*PrintType=*/false); + + Out << " unwind "; + if (CRI->hasUnwindDest()) + writeOperand(CRI->getOperand(1), /*PrintType=*/true); + else + Out << "to caller"; } else if (const CallInst *CI = dyn_cast(&I)) { // Print the calling convention being used. if (CI->getCallingConv() != CallingConv::C) { @@ -2892,6 +2967,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) { Out << ')'; if (PAL.hasAttributes(AttributeSet::FunctionIndex)) Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes()); + + writeOperandBundles(CI); + } else if (const InvokeInst *II = dyn_cast(&I)) { Operand = II->getCalledValue(); FunctionType *FTy = cast(II->getFunctionType()); @@ -2926,6 +3004,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) { if (PAL.hasAttributes(AttributeSet::FunctionIndex)) Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes()); + writeOperandBundles(II); + Out << "\n to "; writeOperand(II->getNormalDest(), true); Out << " unwind "; @@ -3138,29 +3218,23 @@ void AssemblyWriter::printUseLists(const Function *F) { // External Interface declarations //===----------------------------------------------------------------------===// -void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { - SlotTracker SlotTable(this->getParent()); - formatted_raw_ostream OS(ROS); - AssemblyWriter W(OS, SlotTable, this->getParent(), AAW); - W.printFunction(this); -} - void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW, - bool ShouldPreserveUseListOrder) const { + bool ShouldPreserveUseListOrder, bool IsForDebug) const { SlotTracker SlotTable(this); formatted_raw_ostream OS(ROS); - AssemblyWriter W(OS, SlotTable, this, AAW, ShouldPreserveUseListOrder); + AssemblyWriter W(OS, SlotTable, this, AAW, IsForDebug, + ShouldPreserveUseListOrder); W.printModule(this); } -void NamedMDNode::print(raw_ostream &ROS) const { +void NamedMDNode::print(raw_ostream &ROS, bool IsForDebug) const { SlotTracker SlotTable(getParent()); formatted_raw_ostream OS(ROS); - AssemblyWriter W(OS, SlotTable, getParent(), nullptr); + AssemblyWriter W(OS, SlotTable, getParent(), nullptr, IsForDebug); W.printNamedMDNode(this); } -void Comdat::print(raw_ostream &ROS) const { +void Comdat::print(raw_ostream &ROS, bool /*IsForDebug*/) const { PrintLLVMName(ROS, getName(), ComdatPrefix); ROS << " = comdat "; @@ -3185,7 +3259,7 @@ void Comdat::print(raw_ostream &ROS) const { ROS << '\n'; } -void Type::print(raw_ostream &OS) const { +void Type::print(raw_ostream &OS, bool /*IsForDebug*/) const { TypePrinting TP; TP.print(const_cast(this), OS); @@ -3208,7 +3282,7 @@ static bool isReferencingMDNode(const Instruction &I) { return false; } -void Value::print(raw_ostream &ROS) const { +void Value::print(raw_ostream &ROS, bool IsForDebug) const { bool ShouldInitializeAllMetadata = false; if (auto *I = dyn_cast(this)) ShouldInitializeAllMetadata = isReferencingMDNode(*I); @@ -3216,10 +3290,11 @@ void Value::print(raw_ostream &ROS) const { ShouldInitializeAllMetadata = true; ModuleSlotTracker MST(getModuleFromVal(this), ShouldInitializeAllMetadata); - print(ROS, MST); + print(ROS, MST, IsForDebug); } -void Value::print(raw_ostream &ROS, ModuleSlotTracker &MST) const { +void Value::print(raw_ostream &ROS, ModuleSlotTracker &MST, + bool IsForDebug) const { formatted_raw_ostream OS(ROS); SlotTracker EmptySlotTable(static_cast(nullptr)); SlotTracker &SlotTable = @@ -3231,14 +3306,14 @@ void Value::print(raw_ostream &ROS, ModuleSlotTracker &MST) const { if (const Instruction *I = dyn_cast(this)) { incorporateFunction(I->getParent() ? I->getParent()->getParent() : nullptr); - AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), nullptr); + AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), nullptr, IsForDebug); W.printInstruction(*I); } else if (const BasicBlock *BB = dyn_cast(this)) { incorporateFunction(BB->getParent()); - AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), nullptr); + AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), nullptr, IsForDebug); W.printBasicBlock(BB); } else if (const GlobalValue *GV = dyn_cast(this)) { - AssemblyWriter W(OS, SlotTable, GV->getParent(), nullptr); + AssemblyWriter W(OS, SlotTable, GV->getParent(), nullptr, IsForDebug); if (const GlobalVariable *V = dyn_cast(GV)) W.printGlobal(V); else if (const Function *F = dyn_cast(GV)) @@ -3261,7 +3336,7 @@ void Value::print(raw_ostream &ROS, ModuleSlotTracker &MST) const { /// Print without a type, skipping the TypePrinting object. /// -/// \return \c true iff printing was succesful. +/// \return \c true iff printing was successful. static bool printWithoutType(const Value &V, raw_ostream &O, SlotTracker *Machine, const Module *M) { if (V.hasName() || isa(V) || @@ -3340,41 +3415,45 @@ void Metadata::printAsOperand(raw_ostream &OS, ModuleSlotTracker &MST, printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ true); } -void Metadata::print(raw_ostream &OS, const Module *M) const { +void Metadata::print(raw_ostream &OS, const Module *M, + bool /*IsForDebug*/) const { ModuleSlotTracker MST(M, isa(this)); printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ false); } void Metadata::print(raw_ostream &OS, ModuleSlotTracker &MST, - const Module *M) const { + const Module *M, bool /*IsForDebug*/) const { printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ false); } // Value::dump - allow easy printing of Values from the debugger. LLVM_DUMP_METHOD -void Value::dump() const { print(dbgs()); dbgs() << '\n'; } +void Value::dump() const { print(dbgs(), /*IsForDebug=*/true); dbgs() << '\n'; } // Type::dump - allow easy printing of Types from the debugger. LLVM_DUMP_METHOD -void Type::dump() const { print(dbgs()); dbgs() << '\n'; } +void Type::dump() const { print(dbgs(), /*IsForDebug=*/true); dbgs() << '\n'; } // Module::dump() - Allow printing of Modules from the debugger. LLVM_DUMP_METHOD -void Module::dump() const { print(dbgs(), nullptr); } +void Module::dump() const { + print(dbgs(), nullptr, + /*ShouldPreserveUseListOrder=*/false, /*IsForDebug=*/true); +} // \brief Allow printing of Comdats from the debugger. LLVM_DUMP_METHOD -void Comdat::dump() const { print(dbgs()); } +void Comdat::dump() const { print(dbgs(), /*IsForDebug=*/true); } // NamedMDNode::dump() - Allow printing of NamedMDNodes from the debugger. LLVM_DUMP_METHOD -void NamedMDNode::dump() const { print(dbgs()); } +void NamedMDNode::dump() const { print(dbgs(), /*IsForDebug=*/true); } LLVM_DUMP_METHOD void Metadata::dump() const { dump(nullptr); } LLVM_DUMP_METHOD void Metadata::dump(const Module *M) const { - print(dbgs(), M); + print(dbgs(), M, /*IsForDebug=*/true); dbgs() << '\n'; } diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 6f338ae835fa..659f9568b7c6 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -18,6 +18,7 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/IR/Attributes.h" +#include "llvm/Support/TrailingObjects.h" #include namespace llvm { @@ -141,13 +142,16 @@ public: /// \class /// \brief This class represents a group of attributes that apply to one /// element: function, return type, or parameter. -class AttributeSetNode : public FoldingSetNode { +class AttributeSetNode final + : public FoldingSetNode, + private TrailingObjects { + friend TrailingObjects; + unsigned NumAttrs; ///< Number of attributes in this node. AttributeSetNode(ArrayRef Attrs) : NumAttrs(Attrs.size()) { // There's memory after the node where we can store the entries in. - std::copy(Attrs.begin(), Attrs.end(), - reinterpret_cast(this + 1)); + std::copy(Attrs.begin(), Attrs.end(), getTrailingObjects()); } // AttributesSetNode is uniqued, these should not be publicly available. @@ -170,7 +174,7 @@ public: std::string getAsString(bool InAttrGrp) const; typedef const Attribute *iterator; - iterator begin() const { return reinterpret_cast(this + 1); } + iterator begin() const { return getTrailingObjects(); } iterator end() const { return begin() + NumAttrs; } void Profile(FoldingSetNodeID &ID) const { @@ -181,27 +185,29 @@ public: AttrList[I].Profile(ID); } }; -static_assert( - AlignOf::Alignment >= AlignOf::Alignment, - "Alignment is insufficient for objects appended to AttributeSetNode"); + +typedef std::pair IndexAttrPair; //===----------------------------------------------------------------------===// /// \class /// \brief This class represents a set of attributes that apply to the function, /// return type, and parameters. -class AttributeSetImpl : public FoldingSetNode { +class AttributeSetImpl final + : public FoldingSetNode, + private TrailingObjects { friend class AttributeSet; - -public: - typedef std::pair IndexAttrPair; + friend TrailingObjects; private: LLVMContext &Context; unsigned NumAttrs; ///< Number of entries in this set. + // Helper fn for TrailingObjects class. + size_t numTrailingObjects(OverloadToken) { return NumAttrs; } + /// \brief Return a pointer to the IndexAttrPair for the specified slot. const IndexAttrPair *getNode(unsigned Slot) const { - return reinterpret_cast(this + 1) + Slot; + return getTrailingObjects() + Slot; } // AttributesSet is uniqued, these should not be publicly available. @@ -222,8 +228,7 @@ public: } #endif // There's memory after the node where we can store the entries in. - std::copy(Attrs.begin(), Attrs.end(), - reinterpret_cast(this + 1)); + std::copy(Attrs.begin(), Attrs.end(), getTrailingObjects()); } /// \brief Get the context that created this AttributeSetImpl. @@ -273,10 +278,6 @@ public: void dump() const; }; -static_assert( - AlignOf::Alignment >= - AlignOf::Alignment, - "Alignment is insufficient for objects appended to AttributeSetImpl"); } // end llvm namespace diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 546a98670a29..bcf7dc365ce5 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" #include "AttributeImpl.h" #include "LLVMContextImpl.h" #include "llvm/ADT/STLExtras.h" @@ -120,28 +121,28 @@ Attribute::AttrKind Attribute::getKindAsEnum() const { if (!pImpl) return None; assert((isEnumAttribute() || isIntAttribute()) && "Invalid attribute type to get the kind as an enum!"); - return pImpl ? pImpl->getKindAsEnum() : None; + return pImpl->getKindAsEnum(); } uint64_t Attribute::getValueAsInt() const { if (!pImpl) return 0; assert(isIntAttribute() && "Expected the attribute to be an integer attribute!"); - return pImpl ? pImpl->getValueAsInt() : 0; + return pImpl->getValueAsInt(); } StringRef Attribute::getKindAsString() const { if (!pImpl) return StringRef(); assert(isStringAttribute() && "Invalid attribute type to get the kind as a string!"); - return pImpl ? pImpl->getKindAsString() : StringRef(); + return pImpl->getKindAsString(); } StringRef Attribute::getValueAsString() const { if (!pImpl) return StringRef(); assert(isStringAttribute() && "Invalid attribute type to get the value as a string!"); - return pImpl ? pImpl->getValueAsString() : StringRef(); + return pImpl->getValueAsString(); } bool Attribute::hasAttribute(AttrKind Kind) const { @@ -198,6 +199,10 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "byval"; if (hasAttribute(Attribute::Convergent)) return "convergent"; + if (hasAttribute(Attribute::InaccessibleMemOnly)) + return "inaccessiblememonly"; + if (hasAttribute(Attribute::InaccessibleMemOrArgMemOnly)) + return "inaccessiblemem_or_argmemonly"; if (hasAttribute(Attribute::InAlloca)) return "inalloca"; if (hasAttribute(Attribute::InlineHint)) @@ -232,6 +237,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "noredzone"; if (hasAttribute(Attribute::NoReturn)) return "noreturn"; + if (hasAttribute(Attribute::NoRecurse)) + return "norecurse"; if (hasAttribute(Attribute::NoUnwind)) return "nounwind"; if (hasAttribute(Attribute::OptimizeNone)) @@ -442,6 +449,9 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { case Attribute::JumpTable: return 1ULL << 45; case Attribute::Convergent: return 1ULL << 46; case Attribute::SafeStack: return 1ULL << 47; + case Attribute::NoRecurse: return 1ULL << 48; + case Attribute::InaccessibleMemOnly: return 1ULL << 49; + case Attribute::InaccessibleMemOrArgMemOnly: return 1ULL << 50; case Attribute::Dereferenceable: llvm_unreachable("dereferenceable attribute not supported in raw format"); break; @@ -472,9 +482,8 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, SmallVector SortedAttrs(Attrs.begin(), Attrs.end()); array_pod_sort(SortedAttrs.begin(), SortedAttrs.end()); - for (SmallVectorImpl::iterator I = SortedAttrs.begin(), - E = SortedAttrs.end(); I != E; ++I) - I->Profile(ID); + for (Attribute Attr : SortedAttrs) + Attr.Profile(ID); void *InsertPoint; AttributeSetNode *PA = @@ -484,8 +493,7 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, // new one and insert it. if (!PA) { // Coallocate entries after the AttributeSetNode itself. - void *Mem = ::operator new(sizeof(AttributeSetNode) + - sizeof(Attribute) * SortedAttrs.size()); + void *Mem = ::operator new(totalSizeToAlloc(SortedAttrs.size())); PA = new (Mem) AttributeSetNode(SortedAttrs); pImpl->AttrsSetNodes.InsertNode(PA, InsertPoint); } @@ -617,9 +625,8 @@ AttributeSet::getImpl(LLVMContext &C, // create a new one and insert it. if (!PA) { // Coallocate entries after the AttributeSetImpl itself. - void *Mem = ::operator new(sizeof(AttributeSetImpl) + - sizeof(std::pair) * - Attrs.size()); + void *Mem = ::operator new( + AttributeSetImpl::totalSizeToAlloc(Attrs.size())); PA = new (Mem) AttributeSetImpl(C, Attrs); pImpl->AttrsLists.InsertNode(PA, InsertPoint); } @@ -684,22 +691,26 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, if (!B.contains(Kind)) continue; - if (Kind == Attribute::Alignment) - Attrs.push_back(std::make_pair(Index, Attribute:: - getWithAlignment(C, B.getAlignment()))); - else if (Kind == Attribute::StackAlignment) - Attrs.push_back(std::make_pair(Index, Attribute:: - getWithStackAlignment(C, B.getStackAlignment()))); - else if (Kind == Attribute::Dereferenceable) - Attrs.push_back(std::make_pair(Index, - Attribute::getWithDereferenceableBytes(C, - B.getDereferenceableBytes()))); - else if (Kind == Attribute::DereferenceableOrNull) - Attrs.push_back( - std::make_pair(Index, Attribute::getWithDereferenceableOrNullBytes( - C, B.getDereferenceableOrNullBytes()))); - else - Attrs.push_back(std::make_pair(Index, Attribute::get(C, Kind))); + Attribute Attr; + switch (Kind) { + case Attribute::Alignment: + Attr = Attribute::getWithAlignment(C, B.getAlignment()); + break; + case Attribute::StackAlignment: + Attr = Attribute::getWithStackAlignment(C, B.getStackAlignment()); + break; + case Attribute::Dereferenceable: + Attr = Attribute::getWithDereferenceableBytes( + C, B.getDereferenceableBytes()); + break; + case Attribute::DereferenceableOrNull: + Attr = Attribute::getWithDereferenceableOrNullBytes( + C, B.getDereferenceableOrNullBytes()); + break; + default: + Attr = Attribute::get(C, Kind); + } + Attrs.push_back(std::make_pair(Index, Attr)); } // Add target-dependent (string) attributes. @@ -713,9 +724,8 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, ArrayRef Kind) { SmallVector, 8> Attrs; - for (ArrayRef::iterator I = Kind.begin(), - E = Kind.end(); I != E; ++I) - Attrs.push_back(std::make_pair(Index, Attribute::get(C, *I))); + for (Attribute::AttrKind K : Kind) + Attrs.push_back(std::make_pair(Index, Attribute::get(C, K))); return get(C, Attrs); } @@ -736,9 +746,8 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef Attrs) { if (!AS) continue; SmallVector, 8>::iterator ANVI = AttrNodeVec.begin(), ANVE; - for (const AttributeSetImpl::IndexAttrPair - *AI = AS->getNode(0), - *AE = AS->getNode(AS->getNumAttributes()); + for (const IndexAttrPair *AI = AS->getNode(0), + *AE = AS->getNode(AS->getNumAttributes()); AI != AE; ++AI) { ANVE = AttrNodeVec.end(); while (ANVI != ANVE && ANVI->first <= AI->first) @@ -770,6 +779,36 @@ AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index, return addAttributes(C, Index, AttributeSet::get(C, Index, B)); } +AttributeSet AttributeSet::addAttribute(LLVMContext &C, + ArrayRef Indices, + Attribute A) const { + unsigned I = 0, E = pImpl ? pImpl->getNumAttributes() : 0; + auto IdxI = Indices.begin(), IdxE = Indices.end(); + SmallVector AttrSet; + + while (I != E && IdxI != IdxE) { + if (getSlotIndex(I) < *IdxI) + AttrSet.emplace_back(getSlotAttributes(I++)); + else if (getSlotIndex(I) > *IdxI) + AttrSet.emplace_back(AttributeSet::get(C, std::make_pair(*IdxI++, A))); + else { + AttrBuilder B(getSlotAttributes(I), *IdxI); + B.addAttribute(A); + AttrSet.emplace_back(AttributeSet::get(C, *IdxI, B)); + ++I; + ++IdxI; + } + } + + while (I != E) + AttrSet.emplace_back(getSlotAttributes(I++)); + + while (IdxI != IdxE) + AttrSet.emplace_back(AttributeSet::get(C, std::make_pair(*IdxI++, A))); + + return get(C, AttrSet); +} + AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Index, AttributeSet Attrs) const { if (!pImpl) return Attrs; @@ -955,17 +994,17 @@ AttributeSet AttributeSet::getFnAttributes() const { bool AttributeSet::hasAttribute(unsigned Index, Attribute::AttrKind Kind) const{ AttributeSetNode *ASN = getAttributes(Index); - return ASN ? ASN->hasAttribute(Kind) : false; + return ASN && ASN->hasAttribute(Kind); } bool AttributeSet::hasAttribute(unsigned Index, StringRef Kind) const { AttributeSetNode *ASN = getAttributes(Index); - return ASN ? ASN->hasAttribute(Kind) : false; + return ASN && ASN->hasAttribute(Kind); } bool AttributeSet::hasAttributes(unsigned Index) const { AttributeSetNode *ASN = getAttributes(Index); - return ASN ? ASN->hasAttributes() : false; + return ASN && ASN->hasAttributes(); } /// \brief Return true if the specified attribute is set for at least one @@ -1111,6 +1150,7 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index) void AttrBuilder::clear() { Attrs.reset(); + TargetDepAttrs.clear(); Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0; } @@ -1177,23 +1217,10 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) { for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot); I != E; ++I) { Attribute Attr = *I; if (Attr.isEnumAttribute() || Attr.isIntAttribute()) { - Attribute::AttrKind Kind = I->getKindAsEnum(); - Attrs[Kind] = false; - - if (Kind == Attribute::Alignment) - Alignment = 0; - else if (Kind == Attribute::StackAlignment) - StackAlignment = 0; - else if (Kind == Attribute::Dereferenceable) - DerefBytes = 0; - else if (Kind == Attribute::DereferenceableOrNull) - DerefOrNullBytes = 0; + removeAttribute(Attr.getKindAsEnum()); } else { assert(Attr.isStringAttribute() && "Invalid attribute type!"); - std::map::iterator - Iter = TargetDepAttrs.find(Attr.getKindAsString()); - if (Iter != TargetDepAttrs.end()) - TargetDepAttrs.erase(Iter); + removeAttribute(Attr.getKindAsString()); } } @@ -1322,8 +1349,7 @@ bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const { assert(Slot != ~0U && "Couldn't find the index!"); - for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot); - I != E; ++I) { + for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot); I != E; ++I) { Attribute Attr = *I; if (Attr.isEnumAttribute() || Attr.isIntAttribute()) { if (Attrs[I->getKindAsEnum()]) @@ -1382,7 +1408,7 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { //===----------------------------------------------------------------------===// /// \brief Which attributes cannot be applied to a type. -AttrBuilder AttributeFuncs::typeIncompatible(const Type *Ty) { +AttrBuilder AttributeFuncs::typeIncompatible(Type *Ty) { AttrBuilder Incompatible; if (!Ty->isIntegerTy()) @@ -1406,3 +1432,80 @@ AttrBuilder AttributeFuncs::typeIncompatible(const Type *Ty) { return Incompatible; } + +template +static bool isEqual(const Function &Caller, const Function &Callee) { + return Caller.getFnAttribute(AttrClass::getKind()) == + Callee.getFnAttribute(AttrClass::getKind()); +} + +/// \brief Compute the logical AND of the attributes of the caller and the +/// callee. +/// +/// This function sets the caller's attribute to false if the callee's attribute +/// is false. +template +static void setAND(Function &Caller, const Function &Callee) { + if (AttrClass::isSet(Caller, AttrClass::getKind()) && + !AttrClass::isSet(Callee, AttrClass::getKind())) + AttrClass::set(Caller, AttrClass::getKind(), false); +} + +/// \brief Compute the logical OR of the attributes of the caller and the +/// callee. +/// +/// This function sets the caller's attribute to true if the callee's attribute +/// is true. +template +static void setOR(Function &Caller, const Function &Callee) { + if (!AttrClass::isSet(Caller, AttrClass::getKind()) && + AttrClass::isSet(Callee, AttrClass::getKind())) + AttrClass::set(Caller, AttrClass::getKind(), true); +} + +/// \brief If the inlined function had a higher stack protection level than the +/// calling function, then bump up the caller's stack protection level. +static void adjustCallerSSPLevel(Function &Caller, const Function &Callee) { + // If upgrading the SSP attribute, clear out the old SSP Attributes first. + // Having multiple SSP attributes doesn't actually hurt, but it adds useless + // clutter to the IR. + AttrBuilder B; + B.addAttribute(Attribute::StackProtect) + .addAttribute(Attribute::StackProtectStrong) + .addAttribute(Attribute::StackProtectReq); + AttributeSet OldSSPAttr = AttributeSet::get(Caller.getContext(), + AttributeSet::FunctionIndex, + B); + + if (Callee.hasFnAttribute(Attribute::SafeStack)) { + Caller.removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); + Caller.addFnAttr(Attribute::SafeStack); + } else if (Callee.hasFnAttribute(Attribute::StackProtectReq) && + !Caller.hasFnAttribute(Attribute::SafeStack)) { + Caller.removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); + Caller.addFnAttr(Attribute::StackProtectReq); + } else if (Callee.hasFnAttribute(Attribute::StackProtectStrong) && + !Caller.hasFnAttribute(Attribute::SafeStack) && + !Caller.hasFnAttribute(Attribute::StackProtectReq)) { + Caller.removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); + Caller.addFnAttr(Attribute::StackProtectStrong); + } else if (Callee.hasFnAttribute(Attribute::StackProtect) && + !Caller.hasFnAttribute(Attribute::SafeStack) && + !Caller.hasFnAttribute(Attribute::StackProtectReq) && + !Caller.hasFnAttribute(Attribute::StackProtectStrong)) + Caller.addFnAttr(Attribute::StackProtect); +} + +#define GET_ATTR_COMPAT_FUNC +#include "AttributesCompatFunc.inc" + +bool AttributeFuncs::areInlineCompatible(const Function &Caller, + const Function &Callee) { + return hasCompatibleFnAttrs(Caller, Callee); +} + + +void AttributeFuncs::mergeAttributesForInlining(Function &Caller, + const Function &Callee) { + mergeFnAttrs(Caller, Callee); +} diff --git a/lib/IR/AttributesCompatFunc.td b/lib/IR/AttributesCompatFunc.td new file mode 100644 index 000000000000..7c85b3da9ab6 --- /dev/null +++ b/lib/IR/AttributesCompatFunc.td @@ -0,0 +1 @@ +include "llvm/IR/Attributes.td" diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index f1c6ebd4846e..12c354c89b20 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Regex.h" #include using namespace llvm; @@ -92,8 +93,42 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { F->arg_begin()->getType()); return true; } + Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); + if (vldRegex.match(Name)) { + auto fArgs = F->getFunctionType()->params(); + SmallVector Tys(fArgs.begin(), fArgs.end()); + // Can't use Intrinsic::getDeclaration here as the return types might + // then only be structurally equal. + FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); + NewFn = Function::Create(fType, F->getLinkage(), + "llvm." + Name + ".p0i8", F->getParent()); + return true; + } + Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); + if (vstRegex.match(Name)) { + static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, + Intrinsic::arm_neon_vst2, + Intrinsic::arm_neon_vst3, + Intrinsic::arm_neon_vst4}; + + static const Intrinsic::ID StoreLaneInts[] = { + Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, + Intrinsic::arm_neon_vst4lane + }; + + auto fArgs = F->getFunctionType()->params(); + Type *Tys[] = {fArgs[0], fArgs[1]}; + if (Name.find("lane") == StringRef::npos) + NewFn = Intrinsic::getDeclaration(F->getParent(), + StoreInts[fArgs.size() - 3], Tys); + else + NewFn = Intrinsic::getDeclaration(F->getParent(), + StoreLaneInts[fArgs.size() - 5], Tys); + return true; + } break; } + case 'c': { if (Name.startswith("ctlz.") && F->arg_size() == 1) { F->setName(Name + ".old"); @@ -129,7 +164,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name.startswith("x86.sse2.pcmpgt.") || Name.startswith("x86.avx2.pcmpeq.") || Name.startswith("x86.avx2.pcmpgt.") || + Name.startswith("x86.avx2.vbroadcast") || + Name.startswith("x86.avx2.pbroadcast") || Name.startswith("x86.avx.vpermil.") || + Name.startswith("x86.sse41.pmovsx") || Name == "x86.avx.vinsertf128.pd.256" || Name == "x86.avx.vinsertf128.ps.256" || Name == "x86.avx.vinsertf128.si.256" || @@ -162,6 +200,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "x86.avx2.pblendd.128" || Name == "x86.avx2.pblendd.256" || Name == "x86.avx2.vbroadcasti128" || + Name == "x86.xop.vpcmov" || (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { NewFn = nullptr; return true; @@ -325,7 +364,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Function *F = CI->getCalledFunction(); LLVMContext &C = CI->getContext(); IRBuilder<> Builder(C); - Builder.SetInsertPoint(CI->getParent(), CI); + Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); assert(F && "Intrinsic call is not direct?"); @@ -351,7 +390,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Name == "llvm.x86.avx.movnt.ps.256" || Name == "llvm.x86.avx.movnt.pd.256") { IRBuilder<> Builder(C); - Builder.SetInsertPoint(CI->getParent(), CI); + Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); Module *M = F->getParent(); SmallVector Elts; @@ -368,7 +407,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { "cast"); StoreInst *SI = Builder.CreateStore(Arg1, BC); SI->setMetadata(M->getMDKindID("nontemporal"), Node); - SI->setAlignment(16); + SI->setAlignment(32); // Remove intrinsic. CI->eraseFromParent(); @@ -419,6 +458,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), Builder.getInt8(Imm)}); + } else if (Name == "llvm.x86.xop.vpcmov") { + Value *Arg0 = CI->getArgOperand(0); + Value *Arg1 = CI->getArgOperand(1); + Value *Sel = CI->getArgOperand(2); + unsigned NumElts = CI->getType()->getVectorNumElements(); + Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1)); + Value *NotSel = Builder.CreateXor(Sel, MinusOne); + Value *Sel0 = Builder.CreateAnd(Arg0, Sel); + Value *Sel1 = Builder.CreateAnd(Arg1, NotSel); + Rep = Builder.CreateOr(Sel0, Sel1); } else if (Name == "llvm.x86.sse42.crc32.64.8") { Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_sse42_crc32_32_8); @@ -438,6 +487,19 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { for (unsigned I = 0; I < EltNum; ++I) Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I)); + } else if (Name.startswith("llvm.x86.sse41.pmovsx")) { + VectorType *SrcTy = cast(CI->getArgOperand(0)->getType()); + VectorType *DstTy = cast(CI->getType()); + unsigned NumDstElts = DstTy->getNumElements(); + + // Extract a subvector of the first NumDstElts lanes and sign extend. + SmallVector ShuffleMask; + for (int i = 0; i != (int)NumDstElts; ++i) + ShuffleMask.push_back(i); + + Value *SV = Builder.CreateShuffleVector( + CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask); + Rep = Builder.CreateSExt(SV, DstTy); } else if (Name == "llvm.x86.avx2.vbroadcasti128") { // Replace vbroadcasts with a vector shuffle. Type *VT = VectorType::get(Type::getInt64Ty(C), 2); @@ -447,6 +509,14 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { const int Idxs[4] = { 0, 1, 0, 1 }; Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), Idxs); + } else if (Name.startswith("llvm.x86.avx2.pbroadcast") || + Name.startswith("llvm.x86.avx2.vbroadcast")) { + // Replace vp?broadcasts with a vector shuffle. + Value *Op = CI->getArgOperand(0); + unsigned NumElts = CI->getType()->getVectorNumElements(); + Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts); + Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()), + Constant::getNullValue(MaskTy)); } else if (Name == "llvm.x86.sse2.psll.dq") { // 128-bit shift left specified in bits. unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue(); @@ -517,10 +587,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { unsigned Imm = cast(CI->getArgOperand(2))->getZExtValue(); VectorType *VecTy = cast(CI->getType()); unsigned NumElts = VecTy->getNumElements(); - + // Mask off the high bits of the immediate value; hardware ignores those. Imm = Imm & 1; - + // Extend the second operand into a vector that is twice as big. Value *UndefV = UndefValue::get(Op1->getType()); SmallVector Idxs; @@ -562,7 +632,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { unsigned Imm = cast(CI->getArgOperand(1))->getZExtValue(); VectorType *VecTy = cast(CI->getType()); unsigned NumElts = VecTy->getNumElements(); - + // Mask off the high bits of the immediate value; hardware ignores those. Imm = Imm & 1; @@ -627,6 +697,27 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { default: llvm_unreachable("Unknown function for CallInst upgrade."); + case Intrinsic::arm_neon_vld1: + case Intrinsic::arm_neon_vld2: + case Intrinsic::arm_neon_vld3: + case Intrinsic::arm_neon_vld4: + case Intrinsic::arm_neon_vld2lane: + case Intrinsic::arm_neon_vld3lane: + case Intrinsic::arm_neon_vld4lane: + case Intrinsic::arm_neon_vst1: + case Intrinsic::arm_neon_vst2: + case Intrinsic::arm_neon_vst3: + case Intrinsic::arm_neon_vst4: + case Intrinsic::arm_neon_vst2lane: + case Intrinsic::arm_neon_vst3lane: + case Intrinsic::arm_neon_vst4lane: { + SmallVector Args(CI->arg_operands().begin(), + CI->arg_operands().end()); + CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args)); + CI->eraseFromParent(); + return; + } + case Intrinsic::ctlz: case Intrinsic::cttz: assert(CI->getNumArgOperands() == 1 && diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp index 0a0449434a7b..f61276fd436b 100644 --- a/lib/IR/BasicBlock.cpp +++ b/lib/IR/BasicBlock.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Type.h" #include + using namespace llvm; ValueSymbolTable *BasicBlock::getValueSymbolTable() { @@ -35,8 +36,7 @@ LLVMContext &BasicBlock::getContext() const { // Explicit instantiation of SymbolTableListTraits since some of the methods // are not in the public header file... -template class llvm::SymbolTableListTraits; - +template class llvm::SymbolTableListTraits; BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent, BasicBlock *InsertBefore) @@ -56,7 +56,7 @@ void BasicBlock::insertInto(Function *NewParent, BasicBlock *InsertBefore) { assert(!Parent && "Already has a parent"); if (InsertBefore) - NewParent->getBasicBlockList().insert(InsertBefore, this); + NewParent->getBasicBlockList().insert(InsertBefore->getIterator(), this); else NewParent->getBasicBlockList().push_back(this); } @@ -91,26 +91,26 @@ void BasicBlock::setParent(Function *parent) { } void BasicBlock::removeFromParent() { - getParent()->getBasicBlockList().remove(this); + getParent()->getBasicBlockList().remove(getIterator()); } iplist::iterator BasicBlock::eraseFromParent() { - return getParent()->getBasicBlockList().erase(this); + return getParent()->getBasicBlockList().erase(getIterator()); } /// Unlink this basic block from its current function and /// insert it into the function that MovePos lives in, right before MovePos. void BasicBlock::moveBefore(BasicBlock *MovePos) { - MovePos->getParent()->getBasicBlockList().splice(MovePos, - getParent()->getBasicBlockList(), this); + MovePos->getParent()->getBasicBlockList().splice( + MovePos->getIterator(), getParent()->getBasicBlockList(), getIterator()); } /// Unlink this basic block from its current function and /// insert it into the function that MovePos lives in, right after MovePos. void BasicBlock::moveAfter(BasicBlock *MovePos) { - Function::iterator I = MovePos; - MovePos->getParent()->getBasicBlockList().splice(++I, - getParent()->getBasicBlockList(), this); + MovePos->getParent()->getBasicBlockList().splice( + ++MovePos->getIterator(), getParent()->getBasicBlockList(), + getIterator()); } const Module *BasicBlock::getModule() const { @@ -196,8 +196,8 @@ BasicBlock::iterator BasicBlock::getFirstInsertionPt() { if (!FirstNonPHI) return end(); - iterator InsertPt = FirstNonPHI; - if (isa(InsertPt)) ++InsertPt; + iterator InsertPt = FirstNonPHI->getIterator(); + if (InsertPt->isEHPad()) ++InsertPt; return InsertPt; } @@ -245,12 +245,12 @@ BasicBlock *BasicBlock::getSingleSuccessor() { BasicBlock *BasicBlock::getUniqueSuccessor() { succ_iterator SI = succ_begin(this), E = succ_end(this); - if (SI == E) return NULL; // No successors + if (SI == E) return nullptr; // No successors BasicBlock *SuccBB = *SI; ++SI; for (;SI != E; ++SI) { if (*SI != SuccBB) - return NULL; + return nullptr; // The same successor appears multiple times in the successor list. // This is OK. } @@ -333,6 +333,17 @@ void BasicBlock::removePredecessor(BasicBlock *Pred, } } +bool BasicBlock::canSplitPredecessors() const { + const Instruction *FirstNonPHI = getFirstNonPHI(); + if (isa(FirstNonPHI)) + return true; + // This is perhaps a little conservative because constructs like + // CleanupBlockInst are pretty easy to split. However, SplitBlockPredecessors + // cannot handle such things just yet. + if (FirstNonPHI->isEHPad()) + return false; + return true; +} /// This splits a basic block into two at the specified /// instruction. Note that all instructions BEFORE the specified iterator stay @@ -393,8 +404,7 @@ void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) { // Cope with being called on a BasicBlock that doesn't have a terminator // yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this. return; - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - BasicBlock *Succ = TI->getSuccessor(i); + for (BasicBlock *Succ : TI->successors()) { // N.B. Succ might not be a complete BasicBlock, so don't assume // that it ends with a non-phi instruction. for (iterator II = Succ->begin(), IE = Succ->end(); II != IE; ++II) { diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt index aabeaefc0c7a..40b4ec65e22b 100644 --- a/lib/IR/CMakeLists.txt +++ b/lib/IR/CMakeLists.txt @@ -1,3 +1,7 @@ +set(LLVM_TARGET_DEFINITIONS AttributesCompatFunc.td) +tablegen(LLVM AttributesCompatFunc.inc -gen-attrs) +add_public_tablegen_target(AttributeCompatFuncTableGen) + add_llvm_library(LLVMCore AsmWriter.cpp Attributes.cpp @@ -32,13 +36,13 @@ add_llvm_library(LLVMCore MDBuilder.cpp Mangler.cpp Metadata.cpp - MetadataTracking.cpp Module.cpp Operator.cpp Pass.cpp PassManager.cpp PassRegistry.cpp Statepoint.cpp + FunctionInfo.cpp Type.cpp TypeFinder.cpp Use.cpp @@ -52,4 +56,12 @@ add_llvm_library(LLVMCore ${LLVM_MAIN_INCLUDE_DIR}/llvm/IR ) +# PR24785: Workaround for hanging compilation. +if( MSVC_VERSION EQUAL 1800) + set_property( + SOURCE Function.cpp + PROPERTY COMPILE_FLAGS "/Og-" + ) +endif() + add_dependencies(LLVMCore intrinsics_gen) diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index 46bb20e0d1b7..ce3fe03e2df7 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -83,7 +83,7 @@ foldConstantCastPair( assert(DstTy && DstTy->isFirstClassType() && "Invalid cast destination type"); assert(CastInst::isCast(opc) && "Invalid cast opcode"); - // The the types and opcodes for the two Cast constant expressions + // The types and opcodes for the two Cast constant expressions Type *SrcTy = Op->getOperand(0)->getType(); Type *MidTy = Op->getType(); Instruction::CastOps firstOp = Instruction::CastOps(Op->getOpcode()); @@ -109,7 +109,7 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) { if (PointerType *PTy = dyn_cast(V->getType())) if (PointerType *DPTy = dyn_cast(DestTy)) if (PTy->getAddressSpace() == DPTy->getAddressSpace() - && DPTy->getElementType()->isSized()) { + && PTy->getElementType()->isSized()) { SmallVector IdxList; Value *Zero = Constant::getNullValue(Type::getInt32Ty(DPTy->getContext())); @@ -1187,7 +1187,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, (void)C3V.divide(C2V, APFloat::rmNearestTiesToEven); return ConstantFP::get(C1->getContext(), C3V); case Instruction::FRem: - (void)C3V.mod(C2V, APFloat::rmNearestTiesToEven); + (void)C3V.mod(C2V); return ConstantFP::get(C1->getContext(), C3V); } } @@ -1277,9 +1277,9 @@ static bool isMaybeZeroSizedType(Type *Ty) { } /// IdxCompare - Compare the two constants as though they were getelementptr -/// indices. This allows coersion of the types to be the same thing. +/// indices. This allows coercion of the types to be the same thing. /// -/// If the two constants are the "same" (after coersion), return 0. If the +/// If the two constants are the "same" (after coercion), return 0. If the /// first is less than the second, return -1, if the second is less than the /// first, return 1. If the constants are not integral, return -2. /// @@ -1685,7 +1685,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, // Otherwise, for integer compare, pick the same value as the non-undef // operand, and fold it to true or false. if (isIntegerPredicate) - return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred)); + return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(Predicate)); // Choosing NaN for the undef will always make unordered comparison succeed // and ordered comparison fails. @@ -1869,7 +1869,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, } else { // Evaluate the relation between the two constants, per the predicate. int Result = -1; // -1 = unknown, 0 = known false, 1 = known true. - switch (evaluateICmpRelation(C1, C2, CmpInst::isSigned(pred))) { + switch (evaluateICmpRelation(C1, C2, + CmpInst::isSigned((CmpInst::Predicate)pred))) { default: llvm_unreachable("Unknown relational!"); case ICmpInst::BAD_ICMP_PREDICATE: break; // Couldn't determine anything about these constants. @@ -1950,8 +1951,10 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, // If the left hand side is an extension, try eliminating it. if (ConstantExpr *CE1 = dyn_cast(C1)) { - if ((CE1->getOpcode() == Instruction::SExt && ICmpInst::isSigned(pred)) || - (CE1->getOpcode() == Instruction::ZExt && !ICmpInst::isSigned(pred))){ + if ((CE1->getOpcode() == Instruction::SExt && + ICmpInst::isSigned((ICmpInst::Predicate)pred)) || + (CE1->getOpcode() == Instruction::ZExt && + !ICmpInst::isSigned((ICmpInst::Predicate)pred))){ Constant *CE1Op0 = CE1->getOperand(0); Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType()); if (CE1Inverse == CE1Op0) { @@ -1997,17 +2000,17 @@ static bool isInBoundsIndices(ArrayRef Idxs) { } /// \brief Test whether a given ConstantInt is in-range for a SequentialType. -static bool isIndexInRangeOfSequentialType(const SequentialType *STy, +static bool isIndexInRangeOfSequentialType(SequentialType *STy, const ConstantInt *CI) { - if (const PointerType *PTy = dyn_cast(STy)) - // Only handle pointers to sized types, not pointers to functions. - return PTy->getElementType()->isSized(); + // And indices are valid when indexing along a pointer + if (isa(STy)) + return true; uint64_t NumElements = 0; // Determine the number of elements in our sequential type. - if (const ArrayType *ATy = dyn_cast(STy)) + if (auto *ATy = dyn_cast(STy)) NumElements = ATy->getNumElements(); - else if (const VectorType *VTy = dyn_cast(STy)) + else if (auto *VTy = dyn_cast(STy)) NumElements = VTy->getNumElements(); assert((isa(STy) || NumElements > 0) && @@ -2178,7 +2181,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C, // dimension. NewIdxs.resize(Idxs.size()); uint64_t NumElements = 0; - if (const ArrayType *ATy = dyn_cast(Ty)) + if (auto *ATy = dyn_cast(Ty)) NumElements = ATy->getNumElements(); else NumElements = cast(Ty)->getNumElements(); diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp index 91095cfe9eec..48f9b27a25ae 100644 --- a/lib/IR/ConstantRange.cpp +++ b/lib/IR/ConstantRange.cpp @@ -21,7 +21,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Instruction.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -125,6 +127,57 @@ ConstantRange ConstantRange::makeSatisfyingICmpRegion(CmpInst::Predicate Pred, .inverse(); } +ConstantRange ConstantRange::makeNoWrapRegion(Instruction::BinaryOps BinOp, + const APInt &C, + unsigned NoWrapKind) { + typedef OverflowingBinaryOperator OBO; + + // Computes the intersection of CR0 and CR1. It is different from + // intersectWith in that the ConstantRange returned will only contain elements + // in both CR0 and CR1 (i.e. SubsetIntersect(X, Y) is a *subset*, proper or + // not, of both X and Y). + auto SubsetIntersect = + [](const ConstantRange &CR0, const ConstantRange &CR1) { + return CR0.inverse().unionWith(CR1.inverse()).inverse(); + }; + + assert(BinOp >= Instruction::BinaryOpsBegin && + BinOp < Instruction::BinaryOpsEnd && "Binary operators only!"); + + assert((NoWrapKind == OBO::NoSignedWrap || + NoWrapKind == OBO::NoUnsignedWrap || + NoWrapKind == (OBO::NoUnsignedWrap | OBO::NoSignedWrap)) && + "NoWrapKind invalid!"); + + unsigned BitWidth = C.getBitWidth(); + if (BinOp != Instruction::Add) + // Conservative answer: empty set + return ConstantRange(BitWidth, false); + + if (C.isMinValue()) + // Full set: nothing signed / unsigned wraps when added to 0. + return ConstantRange(BitWidth); + + ConstantRange Result(BitWidth); + + if (NoWrapKind & OBO::NoUnsignedWrap) + Result = SubsetIntersect(Result, + ConstantRange(APInt::getNullValue(BitWidth), -C)); + + if (NoWrapKind & OBO::NoSignedWrap) { + if (C.isStrictlyPositive()) + Result = SubsetIntersect( + Result, ConstantRange(APInt::getSignedMinValue(BitWidth), + APInt::getSignedMinValue(BitWidth) - C)); + else + Result = SubsetIntersect( + Result, ConstantRange(APInt::getSignedMinValue(BitWidth) - C, + APInt::getSignedMinValue(BitWidth))); + } + + return Result; +} + /// isFullSet - Return true if this set contains all of the elements possible /// for this data-type bool ConstantRange::isFullSet() const { diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index 308e6bde3d14..0898bf645385 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -53,6 +53,11 @@ bool Constant::isNegativeZeroValue() const { if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative()) return true; + if (const ConstantVector *CV = dyn_cast(this)) + if (ConstantFP *SplatCFP = dyn_cast_or_null(CV->getSplatValue())) + if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative()) + return true; + // We've already handled true FP case; any other FP vectors can't represent -0.0. if (getType()->isFPOrFPVectorTy()) return false; @@ -68,6 +73,17 @@ bool Constant::isZeroValue() const { if (const ConstantFP *CFP = dyn_cast(this)) return CFP->isZero(); + // Equivalent for a vector of -0.0's. + if (const ConstantDataVector *CV = dyn_cast(this)) + if (ConstantFP *SplatCFP = dyn_cast_or_null(CV->getSplatValue())) + if (SplatCFP && SplatCFP->isZero()) + return true; + + if (const ConstantVector *CV = dyn_cast(this)) + if (ConstantFP *SplatCFP = dyn_cast_or_null(CV->getSplatValue())) + if (SplatCFP && SplatCFP->isZero()) + return true; + // Otherwise, just use +0.0. return isNullValue(); } @@ -81,8 +97,10 @@ bool Constant::isNullValue() const { if (const ConstantFP *CFP = dyn_cast(this)) return CFP->isZero() && !CFP->isNegative(); - // constant zero is zero for aggregates and cpnull is null for pointers. - return isa(this) || isa(this); + // constant zero is zero for aggregates, cpnull is null for pointers, none for + // tokens. + return isa(this) || isa(this) || + isa(this); } bool Constant::isAllOnesValue() const { @@ -204,6 +222,8 @@ Constant *Constant::getNullValue(Type *Ty) { case Type::ArrayTyID: case Type::VectorTyID: return ConstantAggregateZero::get(Ty); + case Type::TokenTyID: + return ConstantTokenNone::get(Ty->getContext()); default: // Function, Label, or Opaque type? llvm_unreachable("Cannot create a null constant of that type!"); @@ -410,32 +430,13 @@ bool Constant::isConstantUsed() const { return false; } +bool Constant::needsRelocation() const { + if (isa(this)) + return true; // Global reference. - -/// getRelocationInfo - This method classifies the entry according to -/// whether or not it may generate a relocation entry. This must be -/// conservative, so if it might codegen to a relocatable entry, it should say -/// so. The return values are: -/// -/// NoRelocation: This constant pool entry is guaranteed to never have a -/// relocation applied to it (because it holds a simple constant like -/// '4'). -/// LocalRelocation: This entry has relocations, but the entries are -/// guaranteed to be resolvable by the static linker, so the dynamic -/// linker will never see them. -/// GlobalRelocations: This entry may have arbitrary relocations. -/// -/// FIXME: This really should not be in IR. -Constant::PossibleRelocationsTy Constant::getRelocationInfo() const { - if (const GlobalValue *GV = dyn_cast(this)) { - if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) - return LocalRelocation; // Local to this file/library. - return GlobalRelocations; // Global reference. - } - if (const BlockAddress *BA = dyn_cast(this)) - return BA->getFunction()->getRelocationInfo(); - + return BA->getFunction()->needsRelocation(); + // While raw uses of blockaddress need to be relocated, differences between // two of them don't when they are for labels in the same function. This is a // common idiom when creating a table for the indirect goto extension, so we @@ -444,20 +445,18 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const { if (CE->getOpcode() == Instruction::Sub) { ConstantExpr *LHS = dyn_cast(CE->getOperand(0)); ConstantExpr *RHS = dyn_cast(CE->getOperand(1)); - if (LHS && RHS && - LHS->getOpcode() == Instruction::PtrToInt && + if (LHS && RHS && LHS->getOpcode() == Instruction::PtrToInt && RHS->getOpcode() == Instruction::PtrToInt && isa(LHS->getOperand(0)) && isa(RHS->getOperand(0)) && cast(LHS->getOperand(0))->getFunction() == - cast(RHS->getOperand(0))->getFunction()) - return NoRelocation; + cast(RHS->getOperand(0))->getFunction()) + return false; } - PossibleRelocationsTy Result = NoRelocation; + bool Result = false; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - Result = std::max(Result, - cast(getOperand(i))->getRelocationInfo()); + Result |= cast(getOperand(i))->needsRelocation(); return Result; } @@ -797,10 +796,10 @@ Constant *ConstantAggregateZero::getElementValue(unsigned Idx) const { } unsigned ConstantAggregateZero::getNumElements() const { - const Type *Ty = getType(); - if (const auto *AT = dyn_cast(Ty)) + Type *Ty = getType(); + if (auto *AT = dyn_cast(Ty)) return AT->getNumElements(); - if (const auto *VT = dyn_cast(Ty)) + if (auto *VT = dyn_cast(Ty)) return VT->getNumElements(); return Ty->getStructNumElements(); } @@ -838,10 +837,10 @@ UndefValue *UndefValue::getElementValue(unsigned Idx) const { } unsigned UndefValue::getNumElements() const { - const Type *Ty = getType(); - if (const auto *AT = dyn_cast(Ty)) + Type *Ty = getType(); + if (auto *AT = dyn_cast(Ty)) return AT->getNumElements(); - if (const auto *VT = dyn_cast(Ty)) + if (auto *VT = dyn_cast(Ty)) return VT->getNumElements(); return Ty->getStructNumElements(); } @@ -858,6 +857,59 @@ static bool rangeOnlyContains(ItTy Start, ItTy End, EltTy Elt) { return true; } +template +static Constant *getIntSequenceIfElementsMatch(ArrayRef V) { + assert(!V.empty() && "Cannot get empty int sequence."); + + SmallVector Elts; + for (Constant *C : V) + if (auto *CI = dyn_cast(C)) + Elts.push_back(CI->getZExtValue()); + else + return nullptr; + return SequentialTy::get(V[0]->getContext(), Elts); +} + +template +static Constant *getFPSequenceIfElementsMatch(ArrayRef V) { + assert(!V.empty() && "Cannot get empty FP sequence."); + + SmallVector Elts; + for (Constant *C : V) + if (auto *CFP = dyn_cast(C)) + Elts.push_back(CFP->getValueAPF().bitcastToAPInt().getLimitedValue()); + else + return nullptr; + return SequentialTy::getFP(V[0]->getContext(), Elts); +} + +template +static Constant *getSequenceIfElementsMatch(Constant *C, + ArrayRef V) { + // We speculatively build the elements here even if it turns out that there is + // a constantexpr or something else weird, since it is so uncommon for that to + // happen. + if (ConstantInt *CI = dyn_cast(C)) { + if (CI->getType()->isIntegerTy(8)) + return getIntSequenceIfElementsMatch(V); + else if (CI->getType()->isIntegerTy(16)) + return getIntSequenceIfElementsMatch(V); + else if (CI->getType()->isIntegerTy(32)) + return getIntSequenceIfElementsMatch(V); + else if (CI->getType()->isIntegerTy(64)) + return getIntSequenceIfElementsMatch(V); + } else if (ConstantFP *CFP = dyn_cast(C)) { + if (CFP->getType()->isHalfTy()) + return getFPSequenceIfElementsMatch(V); + else if (CFP->getType()->isFloatTy()) + return getFPSequenceIfElementsMatch(V); + else if (CFP->getType()->isDoubleTy()) + return getFPSequenceIfElementsMatch(V); + } + + return nullptr; +} + ConstantArray::ConstantArray(ArrayType *T, ArrayRef V) : Constant(T, ConstantArrayVal, OperandTraits::op_end(this) - V.size(), @@ -875,6 +927,7 @@ Constant *ConstantArray::get(ArrayType *Ty, ArrayRef V) { return C; return Ty->getContext().pImpl->ArrayConstants.getOrCreate(Ty, V); } + Constant *ConstantArray::getImpl(ArrayType *Ty, ArrayRef V) { // Empty arrays are canonicalized to ConstantAggregateZero. if (V.empty()) @@ -897,74 +950,8 @@ Constant *ConstantArray::getImpl(ArrayType *Ty, ArrayRef V) { // Check to see if all of the elements are ConstantFP or ConstantInt and if // the element type is compatible with ConstantDataVector. If so, use it. - if (ConstantDataSequential::isElementTypeCompatible(C->getType())) { - // We speculatively build the elements here even if it turns out that there - // is a constantexpr or something else weird in the array, since it is so - // uncommon for that to happen. - if (ConstantInt *CI = dyn_cast(C)) { - if (CI->getType()->isIntegerTy(8)) { - SmallVector Elts; - for (unsigned i = 0, e = V.size(); i != e; ++i) - if (ConstantInt *CI = dyn_cast(V[i])) - Elts.push_back(CI->getZExtValue()); - else - break; - if (Elts.size() == V.size()) - return ConstantDataArray::get(C->getContext(), Elts); - } else if (CI->getType()->isIntegerTy(16)) { - SmallVector Elts; - for (unsigned i = 0, e = V.size(); i != e; ++i) - if (ConstantInt *CI = dyn_cast(V[i])) - Elts.push_back(CI->getZExtValue()); - else - break; - if (Elts.size() == V.size()) - return ConstantDataArray::get(C->getContext(), Elts); - } else if (CI->getType()->isIntegerTy(32)) { - SmallVector Elts; - for (unsigned i = 0, e = V.size(); i != e; ++i) - if (ConstantInt *CI = dyn_cast(V[i])) - Elts.push_back(CI->getZExtValue()); - else - break; - if (Elts.size() == V.size()) - return ConstantDataArray::get(C->getContext(), Elts); - } else if (CI->getType()->isIntegerTy(64)) { - SmallVector Elts; - for (unsigned i = 0, e = V.size(); i != e; ++i) - if (ConstantInt *CI = dyn_cast(V[i])) - Elts.push_back(CI->getZExtValue()); - else - break; - if (Elts.size() == V.size()) - return ConstantDataArray::get(C->getContext(), Elts); - } - } - - if (ConstantFP *CFP = dyn_cast(C)) { - if (CFP->getType()->isFloatTy()) { - SmallVector Elts; - for (unsigned i = 0, e = V.size(); i != e; ++i) - if (ConstantFP *CFP = dyn_cast(V[i])) - Elts.push_back( - CFP->getValueAPF().bitcastToAPInt().getLimitedValue()); - else - break; - if (Elts.size() == V.size()) - return ConstantDataArray::getFP(C->getContext(), Elts); - } else if (CFP->getType()->isDoubleTy()) { - SmallVector Elts; - for (unsigned i = 0, e = V.size(); i != e; ++i) - if (ConstantFP *CFP = dyn_cast(V[i])) - Elts.push_back( - CFP->getValueAPF().bitcastToAPInt().getLimitedValue()); - else - break; - if (Elts.size() == V.size()) - return ConstantDataArray::getFP(C->getContext(), Elts); - } - } - } + if (ConstantDataSequential::isElementTypeCompatible(C->getType())) + return getSequenceIfElementsMatch(C, V); // Otherwise, we really do want to create a ConstantArray. return nullptr; @@ -1060,6 +1047,7 @@ Constant *ConstantVector::get(ArrayRef V) { VectorType *Ty = VectorType::get(V.front()->getType(), V.size()); return Ty->getContext().pImpl->VectorConstants.getOrCreate(Ty, V); } + Constant *ConstantVector::getImpl(ArrayRef V) { assert(!V.empty() && "Vectors can't be empty"); VectorType *T = VectorType::get(V.front()->getType(), V.size()); @@ -1085,74 +1073,8 @@ Constant *ConstantVector::getImpl(ArrayRef V) { // Check to see if all of the elements are ConstantFP or ConstantInt and if // the element type is compatible with ConstantDataVector. If so, use it. - if (ConstantDataSequential::isElementTypeCompatible(C->getType())) { - // We speculatively build the elements here even if it turns out that there - // is a constantexpr or something else weird in the array, since it is so - // uncommon for that to happen. - if (ConstantInt *CI = dyn_cast(C)) { - if (CI->getType()->isIntegerTy(8)) { - SmallVector Elts; - for (unsigned i = 0, e = V.size(); i != e; ++i) - if (ConstantInt *CI = dyn_cast(V[i])) - Elts.push_back(CI->getZExtValue()); - else - break; - if (Elts.size() == V.size()) - return ConstantDataVector::get(C->getContext(), Elts); - } else if (CI->getType()->isIntegerTy(16)) { - SmallVector Elts; - for (unsigned i = 0, e = V.size(); i != e; ++i) - if (ConstantInt *CI = dyn_cast(V[i])) - Elts.push_back(CI->getZExtValue()); - else - break; - if (Elts.size() == V.size()) - return ConstantDataVector::get(C->getContext(), Elts); - } else if (CI->getType()->isIntegerTy(32)) { - SmallVector Elts; - for (unsigned i = 0, e = V.size(); i != e; ++i) - if (ConstantInt *CI = dyn_cast(V[i])) - Elts.push_back(CI->getZExtValue()); - else - break; - if (Elts.size() == V.size()) - return ConstantDataVector::get(C->getContext(), Elts); - } else if (CI->getType()->isIntegerTy(64)) { - SmallVector Elts; - for (unsigned i = 0, e = V.size(); i != e; ++i) - if (ConstantInt *CI = dyn_cast(V[i])) - Elts.push_back(CI->getZExtValue()); - else - break; - if (Elts.size() == V.size()) - return ConstantDataVector::get(C->getContext(), Elts); - } - } - - if (ConstantFP *CFP = dyn_cast(C)) { - if (CFP->getType()->isFloatTy()) { - SmallVector Elts; - for (unsigned i = 0, e = V.size(); i != e; ++i) - if (ConstantFP *CFP = dyn_cast(V[i])) - Elts.push_back( - CFP->getValueAPF().bitcastToAPInt().getLimitedValue()); - else - break; - if (Elts.size() == V.size()) - return ConstantDataVector::getFP(C->getContext(), Elts); - } else if (CFP->getType()->isDoubleTy()) { - SmallVector Elts; - for (unsigned i = 0, e = V.size(); i != e; ++i) - if (ConstantFP *CFP = dyn_cast(V[i])) - Elts.push_back( - CFP->getValueAPF().bitcastToAPInt().getLimitedValue()); - else - break; - if (Elts.size() == V.size()) - return ConstantDataVector::getFP(C->getContext(), Elts); - } - } - } + if (ConstantDataSequential::isElementTypeCompatible(C->getType())) + return getSequenceIfElementsMatch(C, V); // Otherwise, the element type isn't compatible with ConstantDataVector, or // the operand list constants a ConstantExpr or something else strange. @@ -1170,6 +1092,17 @@ Constant *ConstantVector::getSplat(unsigned NumElts, Constant *V) { return get(Elts); } +ConstantTokenNone *ConstantTokenNone::get(LLVMContext &Context) { + LLVMContextImpl *pImpl = Context.pImpl; + if (!pImpl->TheNoneToken) + pImpl->TheNoneToken.reset(new ConstantTokenNone(Context)); + return pImpl->TheNoneToken.get(); +} + +/// Remove the constant from the constant table. +void ConstantTokenNone::destroyConstantImpl() { + llvm_unreachable("You can't ConstantTokenNone->destroyConstantImpl()!"); +} // Utility function for determining if a ConstantExpr is a CastOp or not. This // can't be inline because we don't want to #include Instruction.h into @@ -1221,8 +1154,7 @@ ArrayRef ConstantExpr::getIndices() const { } unsigned ConstantExpr::getPredicate() const { - assert(isCompare()); - return ((const CompareConstantExpr*)this)->predicate; + return cast(this)->predicate; } /// getWithOperandReplaced - Return a constant expression identical to this @@ -1245,7 +1177,7 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const { /// operands replaced with the specified values. The specified array must /// have the same number of operands as our current one. Constant *ConstantExpr::getWithOperands(ArrayRef Ops, Type *Ty, - bool OnlyIfReduced) const { + bool OnlyIfReduced, Type *SrcTy) const { assert(Ops.size() == getNumOperands() && "Operand count mismatch!"); // If no operands changed return self. @@ -1283,10 +1215,13 @@ Constant *ConstantExpr::getWithOperands(ArrayRef Ops, Type *Ty, case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2], OnlyIfReducedTy); - case Instruction::GetElementPtr: - return ConstantExpr::getGetElementPtr(nullptr, Ops[0], Ops.slice(1), - cast(this)->isInBounds(), - OnlyIfReducedTy); + case Instruction::GetElementPtr: { + auto *GEPO = cast(this); + assert(SrcTy || (Ops[0]->getType() == getOperand(0)->getType())); + return ConstantExpr::getGetElementPtr( + SrcTy ? SrcTy : GEPO->getSourceElementType(), Ops[0], Ops.slice(1), + GEPO->isInBounds(), OnlyIfReducedTy); + } case Instruction::ICmp: case Instruction::FCmp: return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1], @@ -2430,9 +2365,9 @@ StringRef ConstantDataSequential::getRawDataValues() const { /// formed with a vector or array of the specified element type. /// ConstantDataArray only works with normal float and int types that are /// stored densely in memory, not with things like i42 or x86_f80. -bool ConstantDataSequential::isElementTypeCompatible(const Type *Ty) { - if (Ty->isFloatTy() || Ty->isDoubleTy()) return true; - if (const IntegerType *IT = dyn_cast(Ty)) { +bool ConstantDataSequential::isElementTypeCompatible(Type *Ty) { + if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) return true; + if (auto *IT = dyn_cast(Ty)) { switch (IT->getBitWidth()) { case 8: case 16: @@ -2587,7 +2522,7 @@ Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts) { /// object. Constant *ConstantDataArray::getFP(LLVMContext &Context, ArrayRef Elts) { - Type *Ty = VectorType::get(Type::getHalfTy(Context), Elts.size()); + Type *Ty = ArrayType::get(Type::getHalfTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); return getImpl(StringRef(const_cast(Data), Elts.size() * 2), Ty); } @@ -2703,6 +2638,11 @@ Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) { } if (ConstantFP *CFP = dyn_cast(V)) { + if (CFP->getType()->isHalfTy()) { + SmallVector Elts( + NumElts, CFP->getValueAPF().bitcastToAPInt().getLimitedValue()); + return getFP(V->getContext(), Elts); + } if (CFP->getType()->isFloatTy()) { SmallVector Elts( NumElts, CFP->getValueAPF().bitcastToAPInt().getLimitedValue()); @@ -2748,6 +2688,10 @@ APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const { switch (getElementType()->getTypeID()) { default: llvm_unreachable("Accessor can only be used when element is float/double!"); + case Type::HalfTyID: { + auto EltVal = *reinterpret_cast(EltPtr); + return APFloat(APFloat::IEEEhalf, APInt(16, EltVal)); + } case Type::FloatTyID: { auto EltVal = *reinterpret_cast(EltPtr); return APFloat(APFloat::IEEEsingle, APInt(32, EltVal)); @@ -2782,7 +2726,8 @@ double ConstantDataSequential::getElementAsDouble(unsigned Elt) const { /// Note that this has to compute a new constant to return, so it isn't as /// efficient as getElementAsInteger/Float/Double. Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const { - if (getElementType()->isFloatTy() || getElementType()->isDoubleTy()) + if (getElementType()->isHalfTy() || getElementType()->isFloatTy() || + getElementType()->isDoubleTy()) return ConstantFP::get(getContext(), getElementAsAPFloat(Elt)); return ConstantInt::get(getElementType(), getElementAsInteger(Elt)); @@ -2872,6 +2817,11 @@ Value *ConstantFP::handleOperandChangeImpl(Value *From, Value *To, Use *U) { llvm_unreachable("Unsupported class for handleOperandChange()!"); } +Value *ConstantTokenNone::handleOperandChangeImpl(Value *From, Value *To, + Use *U) { + llvm_unreachable("Unsupported class for handleOperandChange()!"); +} + Value *UndefValue::handleOperandChangeImpl(Value *From, Value *To, Use *U) { llvm_unreachable("Unsupported class for handleOperandChange()!"); } @@ -3070,7 +3020,7 @@ Instruction *ConstantExpr::getAsInstruction() { case Instruction::ICmp: case Instruction::FCmp: return CmpInst::Create((Instruction::OtherOps)getOpcode(), - getPredicate(), Ops[0], Ops[1]); + (CmpInst::Predicate)getPredicate(), Ops[0], Ops[1]); default: assert(getNumOperands() == 2 && "Must be binary operator?"); diff --git a/lib/IR/ConstantsContext.h b/lib/IR/ConstantsContext.h index f3ddcd78d265..13fcbd2ece10 100644 --- a/lib/IR/ConstantsContext.h +++ b/lib/IR/ConstantsContext.h @@ -179,6 +179,13 @@ public: /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + + static bool classof(const ConstantExpr *CE) { + return CE->getOpcode() == Instruction::ExtractValue; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } }; /// InsertValueConstantExpr - This class is private to @@ -205,6 +212,13 @@ public: /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + + static bool classof(const ConstantExpr *CE) { + return CE->getOpcode() == Instruction::InsertValue; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } }; /// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is @@ -235,6 +249,13 @@ public: Type *getSourceElementType() const; /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + + static bool classof(const ConstantExpr *CE) { + return CE->getOpcode() == Instruction::GetElementPtr; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } }; // CompareConstantExpr - This class is private to Constants.cpp, and is used @@ -257,6 +278,14 @@ public: } /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + + static bool classof(const ConstantExpr *CE) { + return CE->getOpcode() == Instruction::ICmp || + CE->getOpcode() == Instruction::FCmp; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } }; template <> @@ -373,41 +402,45 @@ template struct ConstantAggrKeyType { struct InlineAsmKeyType { StringRef AsmString; StringRef Constraints; + FunctionType *FTy; bool HasSideEffects; bool IsAlignStack; InlineAsm::AsmDialect AsmDialect; InlineAsmKeyType(StringRef AsmString, StringRef Constraints, - bool HasSideEffects, bool IsAlignStack, + FunctionType *FTy, bool HasSideEffects, bool IsAlignStack, InlineAsm::AsmDialect AsmDialect) - : AsmString(AsmString), Constraints(Constraints), + : AsmString(AsmString), Constraints(Constraints), FTy(FTy), HasSideEffects(HasSideEffects), IsAlignStack(IsAlignStack), AsmDialect(AsmDialect) {} InlineAsmKeyType(const InlineAsm *Asm, SmallVectorImpl &) : AsmString(Asm->getAsmString()), Constraints(Asm->getConstraintString()), - HasSideEffects(Asm->hasSideEffects()), + FTy(Asm->getFunctionType()), HasSideEffects(Asm->hasSideEffects()), IsAlignStack(Asm->isAlignStack()), AsmDialect(Asm->getDialect()) {} bool operator==(const InlineAsmKeyType &X) const { return HasSideEffects == X.HasSideEffects && IsAlignStack == X.IsAlignStack && AsmDialect == X.AsmDialect && - AsmString == X.AsmString && Constraints == X.Constraints; + AsmString == X.AsmString && Constraints == X.Constraints && + FTy == X.FTy; } bool operator==(const InlineAsm *Asm) const { return HasSideEffects == Asm->hasSideEffects() && IsAlignStack == Asm->isAlignStack() && AsmDialect == Asm->getDialect() && AsmString == Asm->getAsmString() && - Constraints == Asm->getConstraintString(); + Constraints == Asm->getConstraintString() && + FTy == Asm->getFunctionType(); } unsigned getHash() const { return hash_combine(AsmString, Constraints, HasSideEffects, IsAlignStack, - AsmDialect); + AsmDialect, FTy); } typedef ConstantInfo::TypeClass TypeClass; InlineAsm *create(TypeClass *Ty) const { - return new InlineAsm(Ty, AsmString, Constraints, HasSideEffects, + assert(PointerType::getUnqual(FTy) == Ty); + return new InlineAsm(FTy, AsmString, Constraints, HasSideEffects, IsAlignStack, AsmDialect); } }; diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 0eb88a967575..7f39c8085a69 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -262,6 +262,8 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) { return LLVMVectorTypeKind; case Type::X86_MMXTyID: return LLVMX86_MMXTypeKind; + case Type::TokenTyID: + return LLVMTokenTypeKind; } llvm_unreachable("Unhandled TypeID."); } @@ -366,6 +368,9 @@ LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C) { LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getX86_MMXTy(*unwrap(C)); } +LLVMTypeRef LLVMTokenTypeInContext(LLVMContextRef C) { + return (LLVMTypeRef) Type::getTokenTy(*unwrap(C)); +} LLVMTypeRef LLVMHalfType(void) { return LLVMHalfTypeInContext(LLVMGetGlobalContext()); @@ -1528,7 +1533,7 @@ LLVMValueRef LLVMGetFirstGlobal(LLVMModuleRef M) { Module::global_iterator I = Mod->global_begin(); if (I == Mod->global_end()) return nullptr; - return wrap(I); + return wrap(&*I); } LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M) { @@ -1536,23 +1541,23 @@ LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M) { Module::global_iterator I = Mod->global_end(); if (I == Mod->global_begin()) return nullptr; - return wrap(--I); + return wrap(&*--I); } LLVMValueRef LLVMGetNextGlobal(LLVMValueRef GlobalVar) { GlobalVariable *GV = unwrap(GlobalVar); - Module::global_iterator I = GV; + Module::global_iterator I(GV); if (++I == GV->getParent()->global_end()) return nullptr; - return wrap(I); + return wrap(&*I); } LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar) { GlobalVariable *GV = unwrap(GlobalVar); - Module::global_iterator I = GV; + Module::global_iterator I(GV); if (I == GV->getParent()->global_begin()) return nullptr; - return wrap(--I); + return wrap(&*--I); } void LLVMDeleteGlobal(LLVMValueRef GlobalVar) { @@ -1639,7 +1644,8 @@ void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit) { LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee, const char *Name) { auto *PTy = cast(unwrap(Ty)); - return wrap(GlobalAlias::create(PTy, GlobalValue::ExternalLinkage, Name, + return wrap(GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), + GlobalValue::ExternalLinkage, Name, unwrap(Aliasee), unwrap(M))); } @@ -1660,7 +1666,7 @@ LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M) { Module::iterator I = Mod->begin(); if (I == Mod->end()) return nullptr; - return wrap(I); + return wrap(&*I); } LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M) { @@ -1668,23 +1674,23 @@ LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M) { Module::iterator I = Mod->end(); if (I == Mod->begin()) return nullptr; - return wrap(--I); + return wrap(&*--I); } LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn) { Function *Func = unwrap(Fn); - Module::iterator I = Func; + Module::iterator I(Func); if (++I == Func->getParent()->end()) return nullptr; - return wrap(I); + return wrap(&*I); } LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn) { Function *Func = unwrap(Fn); - Module::iterator I = Func; + Module::iterator I(Func); if (I == Func->getParent()->begin()) return nullptr; - return wrap(--I); + return wrap(&*--I); } void LLVMDeleteFunction(LLVMValueRef Fn) { @@ -1779,14 +1785,14 @@ void LLVMGetParams(LLVMValueRef FnRef, LLVMValueRef *ParamRefs) { Function *Fn = unwrap(FnRef); for (Function::arg_iterator I = Fn->arg_begin(), E = Fn->arg_end(); I != E; I++) - *ParamRefs++ = wrap(I); + *ParamRefs++ = wrap(&*I); } LLVMValueRef LLVMGetParam(LLVMValueRef FnRef, unsigned index) { Function::arg_iterator AI = unwrap(FnRef)->arg_begin(); while (index --> 0) AI++; - return wrap(AI); + return wrap(&*AI); } LLVMValueRef LLVMGetParamParent(LLVMValueRef V) { @@ -1798,7 +1804,7 @@ LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn) { Function::arg_iterator I = Func->arg_begin(); if (I == Func->arg_end()) return nullptr; - return wrap(I); + return wrap(&*I); } LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) { @@ -1806,23 +1812,23 @@ LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) { Function::arg_iterator I = Func->arg_end(); if (I == Func->arg_begin()) return nullptr; - return wrap(--I); + return wrap(&*--I); } LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg) { Argument *A = unwrap(Arg); - Function::arg_iterator I = A; + Function::arg_iterator I(A); if (++I == A->getParent()->arg_end()) return nullptr; - return wrap(I); + return wrap(&*I); } LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) { Argument *A = unwrap(Arg); - Function::arg_iterator I = A; + Function::arg_iterator I(A); if (I == A->getParent()->arg_begin()) return nullptr; - return wrap(--I); + return wrap(&*--I); } void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) { @@ -1880,7 +1886,7 @@ unsigned LLVMCountBasicBlocks(LLVMValueRef FnRef) { void LLVMGetBasicBlocks(LLVMValueRef FnRef, LLVMBasicBlockRef *BasicBlocksRefs){ Function *Fn = unwrap(FnRef); for (Function::iterator I = Fn->begin(), E = Fn->end(); I != E; I++) - *BasicBlocksRefs++ = wrap(I); + *BasicBlocksRefs++ = wrap(&*I); } LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn) { @@ -1892,7 +1898,7 @@ LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn) { Function::iterator I = Func->begin(); if (I == Func->end()) return nullptr; - return wrap(I); + return wrap(&*I); } LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn) { @@ -1900,23 +1906,23 @@ LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn) { Function::iterator I = Func->end(); if (I == Func->begin()) return nullptr; - return wrap(--I); + return wrap(&*--I); } LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB) { BasicBlock *Block = unwrap(BB); - Function::iterator I = Block; + Function::iterator I(Block); if (++I == Block->getParent()->end()) return nullptr; - return wrap(I); + return wrap(&*I); } LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB) { BasicBlock *Block = unwrap(BB); - Function::iterator I = Block; + Function::iterator I(Block); if (I == Block->getParent()->begin()) return nullptr; - return wrap(--I); + return wrap(&*--I); } LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C, @@ -1968,7 +1974,7 @@ LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB) { BasicBlock::iterator I = Block->begin(); if (I == Block->end()) return nullptr; - return wrap(I); + return wrap(&*I); } LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB) { @@ -1976,23 +1982,23 @@ LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB) { BasicBlock::iterator I = Block->end(); if (I == Block->begin()) return nullptr; - return wrap(--I); + return wrap(&*--I); } LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst) { Instruction *Instr = unwrap(Inst); - BasicBlock::iterator I = Instr; + BasicBlock::iterator I(Instr); if (++I == Instr->getParent()->end()) return nullptr; - return wrap(I); + return wrap(&*I); } LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst) { Instruction *Instr = unwrap(Inst); - BasicBlock::iterator I = Instr; + BasicBlock::iterator I(Instr); if (I == Instr->getParent()->begin()) return nullptr; - return wrap(--I); + return wrap(&*--I); } void LLVMInstructionEraseFromParent(LLVMValueRef Inst) { @@ -2160,12 +2166,12 @@ void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block, LLVMValueRef Instr) { BasicBlock *BB = unwrap(Block); Instruction *I = Instr? unwrap(Instr) : (Instruction*) BB->end(); - unwrap(Builder)->SetInsertPoint(BB, I); + unwrap(Builder)->SetInsertPoint(BB, I->getIterator()); } void LLVMPositionBuilderBefore(LLVMBuilderRef Builder, LLVMValueRef Instr) { Instruction *I = unwrap(Instr); - unwrap(Builder)->SetInsertPoint(I->getParent(), I); + unwrap(Builder)->SetInsertPoint(I->getParent(), I->getIterator()); } void LLVMPositionBuilderAtEnd(LLVMBuilderRef Builder, LLVMBasicBlockRef Block) { @@ -2489,7 +2495,6 @@ LLVMValueRef LLVMBuildFree(LLVMBuilderRef B, LLVMValueRef PointerVal) { CallInst::CreateFree(unwrap(PointerVal), unwrap(B)->GetInsertBlock()))); } - LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal, const char *Name) { return wrap(unwrap(B)->CreateLoad(unwrap(PointerVal), Name)); @@ -2515,6 +2520,21 @@ static AtomicOrdering mapFromLLVMOrdering(LLVMAtomicOrdering Ordering) { llvm_unreachable("Invalid LLVMAtomicOrdering value!"); } +static LLVMAtomicOrdering mapToLLVMOrdering(AtomicOrdering Ordering) { + switch (Ordering) { + case NotAtomic: return LLVMAtomicOrderingNotAtomic; + case Unordered: return LLVMAtomicOrderingUnordered; + case Monotonic: return LLVMAtomicOrderingMonotonic; + case Acquire: return LLVMAtomicOrderingAcquire; + case Release: return LLVMAtomicOrderingRelease; + case AcquireRelease: return LLVMAtomicOrderingAcquireRelease; + case SequentiallyConsistent: + return LLVMAtomicOrderingSequentiallyConsistent; + } + + llvm_unreachable("Invalid AtomicOrdering value!"); +} + LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering, LLVMBool isSingleThread, const char *Name) { return wrap( @@ -2567,6 +2587,25 @@ void LLVMSetVolatile(LLVMValueRef MemAccessInst, LLVMBool isVolatile) { return cast(P)->setVolatile(isVolatile); } +LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemAccessInst) { + Value *P = unwrap(MemAccessInst); + AtomicOrdering O; + if (LoadInst *LI = dyn_cast(P)) + O = LI->getOrdering(); + else + O = cast(P)->getOrdering(); + return mapToLLVMOrdering(O); +} + +void LLVMSetOrdering(LLVMValueRef MemAccessInst, LLVMAtomicOrdering Ordering) { + Value *P = unwrap(MemAccessInst); + AtomicOrdering O = mapFromLLVMOrdering(Ordering); + + if (LoadInst *LI = dyn_cast(P)) + return LI->setOrdering(O); + return cast(P)->setOrdering(O); +} + /*--.. Casts ...............................................................--*/ LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val, diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 2a90e70af1a3..b7841fe2b85c 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -148,7 +148,7 @@ DICompileUnit *DIBuilder::createCompileUnit( CUNode = DICompileUnit::getDistinct( VMContext, Lang, DIFile::get(VMContext, Filename, Directory), Producer, isOptimized, Flags, RunTimeVer, SplitName, Kind, nullptr, - nullptr, nullptr, nullptr, nullptr, DWOId); + nullptr, nullptr, nullptr, nullptr, nullptr, DWOId); // Create a named metadata so that it is easier to find cu in a module. // Note that we only generate this when the caller wants to actually @@ -255,10 +255,12 @@ DIDerivedType *DIBuilder::createMemberPointerType(DIType *PointeeTy, DITypeRef::get(Base)); } -DIDerivedType *DIBuilder::createReferenceType(unsigned Tag, DIType *RTy) { +DIDerivedType *DIBuilder::createReferenceType(unsigned Tag, DIType *RTy, + uint64_t SizeInBits, + uint64_t AlignInBits) { assert(RTy && "Unable to create reference type"); return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, - DITypeRef::get(RTy), 0, 0, 0, 0); + DITypeRef::get(RTy), SizeInBits, AlignInBits, 0, 0); } DIDerivedType *DIBuilder::createTypedef(DIType *Ty, StringRef Name, @@ -429,12 +431,23 @@ DICompositeType *DIBuilder::createUnionType( return R; } -DISubroutineType *DIBuilder::createSubroutineType(DIFile *File, - DITypeRefArray ParameterTypes, +DISubroutineType *DIBuilder::createSubroutineType(DITypeRefArray ParameterTypes, unsigned Flags) { return DISubroutineType::get(VMContext, Flags, ParameterTypes); } +DICompositeType *DIBuilder::createExternalTypeRef(unsigned Tag, DIFile *File, + StringRef UniqueIdentifier) { + assert(!UniqueIdentifier.empty() && "external type ref without uid"); + auto *CTy = + DICompositeType::get(VMContext, Tag, "", nullptr, 0, nullptr, nullptr, 0, + 0, 0, DINode::FlagExternalTypeRef, nullptr, 0, + nullptr, nullptr, UniqueIdentifier); + // Types with unique IDs need to be in the type map. + retainType(CTy); + return CTy; +} + DICompositeType *DIBuilder::createEnumerationType( DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, DINodeArray Elements, @@ -590,18 +603,20 @@ DIGlobalVariable *DIBuilder::createTempGlobalVariableFwdDecl( .release(); } -DILocalVariable *DIBuilder::createLocalVariable( - unsigned Tag, DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNo, - DIType *Ty, bool AlwaysPreserve, unsigned Flags, unsigned ArgNo) { +static DILocalVariable *createLocalVariable( + LLVMContext &VMContext, + DenseMap> &PreservedVariables, + DIScope *Scope, StringRef Name, unsigned ArgNo, DIFile *File, + unsigned LineNo, DIType *Ty, bool AlwaysPreserve, unsigned Flags) { // FIXME: Why getNonCompileUnitScope()? // FIXME: Why is "!Context" okay here? // FIXME: Why doesn't this check for a subprogram or lexical block (AFAICT // the only valid scopes)? DIScope *Context = getNonCompileUnitScope(Scope); - auto *Node = DILocalVariable::get( - VMContext, Tag, cast_or_null(Context), Name, File, LineNo, - DITypeRef::get(Ty), ArgNo, Flags); + auto *Node = + DILocalVariable::get(VMContext, cast_or_null(Context), Name, + File, LineNo, DITypeRef::get(Ty), ArgNo, Flags); if (AlwaysPreserve) { // The optimizer may remove local variables. If there is an interest // to preserve variable info in such situation then stash it in a @@ -613,6 +628,23 @@ DILocalVariable *DIBuilder::createLocalVariable( return Node; } +DILocalVariable *DIBuilder::createAutoVariable(DIScope *Scope, StringRef Name, + DIFile *File, unsigned LineNo, + DIType *Ty, bool AlwaysPreserve, + unsigned Flags) { + return createLocalVariable(VMContext, PreservedVariables, Scope, Name, + /* ArgNo */ 0, File, LineNo, Ty, AlwaysPreserve, + Flags); +} + +DILocalVariable *DIBuilder::createParameterVariable( + DIScope *Scope, StringRef Name, unsigned ArgNo, DIFile *File, + unsigned LineNo, DIType *Ty, bool AlwaysPreserve, unsigned Flags) { + assert(ArgNo && "Expected non-zero argument number for parameter"); + return createLocalVariable(VMContext, PreservedVariables, Scope, Name, ArgNo, + File, LineNo, Ty, AlwaysPreserve, Flags); +} + DIExpression *DIBuilder::createExpression(ArrayRef Addr) { return DIExpression::get(VMContext, Addr); } @@ -629,36 +661,37 @@ DIExpression *DIBuilder::createBitPieceExpression(unsigned OffsetInBytes, return DIExpression::get(VMContext, Addr); } -DISubprogram *DIBuilder::createFunction(DIScopeRef Context, StringRef Name, - StringRef LinkageName, DIFile *File, - unsigned LineNo, DISubroutineType *Ty, - bool isLocalToUnit, bool isDefinition, - unsigned ScopeLine, unsigned Flags, - bool isOptimized, Function *Fn, - MDNode *TParams, MDNode *Decl) { +DISubprogram *DIBuilder::createFunction( + DIScopeRef Context, StringRef Name, StringRef LinkageName, DIFile *File, + unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit, + bool isDefinition, unsigned ScopeLine, unsigned Flags, bool isOptimized, + DITemplateParameterArray TParams, DISubprogram *Decl) { // dragonegg does not generate identifier for types, so using an empty map // to resolve the context should be fine. DITypeIdentifierMap EmptyMap; return createFunction(Context.resolve(EmptyMap), Name, LinkageName, File, LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine, - Flags, isOptimized, Fn, TParams, Decl); + Flags, isOptimized, TParams, Decl); } -DISubprogram *DIBuilder::createFunction(DIScope *Context, StringRef Name, - StringRef LinkageName, DIFile *File, - unsigned LineNo, DISubroutineType *Ty, - bool isLocalToUnit, bool isDefinition, - unsigned ScopeLine, unsigned Flags, - bool isOptimized, Function *Fn, - MDNode *TParams, MDNode *Decl) { - assert(Ty->getTag() == dwarf::DW_TAG_subroutine_type && - "function types should be subroutines"); - auto *Node = DISubprogram::get( - VMContext, DIScopeRef::get(getNonCompileUnitScope(Context)), Name, - LinkageName, File, LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine, - nullptr, 0, 0, Flags, isOptimized, Fn, cast_or_null(TParams), - cast_or_null(Decl), - MDTuple::getTemporary(VMContext, None).release()); +template +static DISubprogram *getSubprogram(bool IsDistinct, Ts &&... Args) { + if (IsDistinct) + return DISubprogram::getDistinct(std::forward(Args)...); + return DISubprogram::get(std::forward(Args)...); +} + +DISubprogram *DIBuilder::createFunction( + DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File, + unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit, + bool isDefinition, unsigned ScopeLine, unsigned Flags, bool isOptimized, + DITemplateParameterArray TParams, DISubprogram *Decl) { + auto *Node = + getSubprogram(/* IsDistinct = */ isDefinition, VMContext, + DIScopeRef::get(getNonCompileUnitScope(Context)), Name, + LinkageName, File, LineNo, Ty, isLocalToUnit, isDefinition, + ScopeLine, nullptr, 0, 0, Flags, isOptimized, TParams, Decl, + MDTuple::getTemporary(VMContext, None).release()); if (isDefinition) AllSubprograms.push_back(Node); @@ -670,12 +703,11 @@ DISubprogram *DIBuilder::createTempFunctionFwdDecl( DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File, unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit, bool isDefinition, unsigned ScopeLine, unsigned Flags, bool isOptimized, - Function *Fn, MDNode *TParams, MDNode *Decl) { + DITemplateParameterArray TParams, DISubprogram *Decl) { return DISubprogram::getTemporary( VMContext, DIScopeRef::get(getNonCompileUnitScope(Context)), Name, LinkageName, File, LineNo, Ty, isLocalToUnit, isDefinition, - ScopeLine, nullptr, 0, 0, Flags, isOptimized, Fn, - cast_or_null(TParams), cast_or_null(Decl), + ScopeLine, nullptr, 0, 0, Flags, isOptimized, TParams, Decl, nullptr) .release(); } @@ -685,18 +717,16 @@ DIBuilder::createMethod(DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F, unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit, bool isDefinition, unsigned VK, unsigned VIndex, DIType *VTableHolder, unsigned Flags, - bool isOptimized, Function *Fn, MDNode *TParam) { - assert(Ty->getTag() == dwarf::DW_TAG_subroutine_type && - "function types should be subroutines"); + bool isOptimized, DITemplateParameterArray TParams) { assert(getNonCompileUnitScope(Context) && "Methods should have both a Context and a context that isn't " "the compile unit."); // FIXME: Do we want to use different scope/lines? - auto *SP = DISubprogram::get( - VMContext, DIScopeRef::get(cast(Context)), Name, LinkageName, F, - LineNo, Ty, isLocalToUnit, isDefinition, LineNo, - DITypeRef::get(VTableHolder), VK, VIndex, Flags, isOptimized, Fn, - cast_or_null(TParam), nullptr, nullptr); + auto *SP = getSubprogram( + /* IsDistinct = */ isDefinition, VMContext, + DIScopeRef::get(cast(Context)), Name, LinkageName, F, LineNo, Ty, + isLocalToUnit, isDefinition, LineNo, DITypeRef::get(VTableHolder), VK, + VIndex, Flags, isOptimized, TParams, nullptr, nullptr); if (isDefinition) AllSubprograms.push_back(SP); diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp index 4d867efe1b3d..5468f47bbfe6 100644 --- a/lib/IR/DataLayout.cpp +++ b/lib/IR/DataLayout.cpp @@ -41,6 +41,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) { assert(!ST->isOpaque() && "Cannot get layout of opaque structs"); StructAlignment = 0; StructSize = 0; + IsPadded = false; NumElements = ST->getNumElements(); // Loop over each of the elements, placing them in memory. @@ -49,8 +50,10 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) { unsigned TyAlign = ST->isPacked() ? 1 : DL.getABITypeAlignment(Ty); // Add padding if necessary to align the data element properly. - if ((StructSize & (TyAlign-1)) != 0) + if ((StructSize & (TyAlign-1)) != 0) { + IsPadded = true; StructSize = RoundUpToAlignment(StructSize, TyAlign); + } // Keep track of maximum alignment constraint. StructAlignment = std::max(TyAlign, StructAlignment); @@ -64,8 +67,10 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) { // Add padding to the end of the struct so that it could be put in an array // and all array elements would be aligned correctly. - if ((StructSize & (StructAlignment-1)) != 0) + if ((StructSize & (StructAlignment-1)) != 0) { + IsPadded = true; StructSize = RoundUpToAlignment(StructSize, StructAlignment); + } } @@ -461,8 +466,8 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign; // The best match so far depends on what we're looking for. - if (AlignType == INTEGER_ALIGN && - Alignments[i].AlignType == INTEGER_ALIGN) { + if (AlignType == INTEGER_ALIGN && + Alignments[i].AlignType == INTEGER_ALIGN) { // The "best match" for integers is the smallest size that is larger than // the BitWidth requested. if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 || diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index 9646d1aa4d76..a2443becdd00 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -56,21 +56,6 @@ DISubprogram *llvm::getDISubprogram(const Function *F) { return nullptr; } -DICompositeTypeBase *llvm::getDICompositeType(DIType *T) { - if (auto *C = dyn_cast_or_null(T)) - return C; - - if (auto *D = dyn_cast_or_null(T)) { - // This function is currently used by dragonegg and dragonegg does - // not generate identifier for types, so using an empty map to resolve - // DerivedFrom should be fine. - DITypeIdentifierMap EmptyMap; - return getDICompositeType(D->getBaseType().resolve(EmptyMap)); - } - - return nullptr; -} - DITypeIdentifierMap llvm::generateDITypeIdentifierMap(const NamedMDNode *CU_Nodes) { DITypeIdentifierMap Map; @@ -164,20 +149,22 @@ void DebugInfoFinder::processType(DIType *DT) { if (!addType(DT)) return; processScope(DT->getScope().resolve(TypeIdentifierMap)); - if (auto *DCT = dyn_cast(DT)) { + if (auto *ST = dyn_cast(DT)) { + for (DITypeRef Ref : ST->getTypeArray()) + processType(Ref.resolve(TypeIdentifierMap)); + return; + } + if (auto *DCT = dyn_cast(DT)) { processType(DCT->getBaseType().resolve(TypeIdentifierMap)); - if (auto *ST = dyn_cast(DCT)) { - for (DITypeRef Ref : ST->getTypeArray()) - processType(Ref.resolve(TypeIdentifierMap)); - return; - } for (Metadata *D : DCT->getElements()) { if (auto *T = dyn_cast(D)) processType(T); else if (auto *SP = dyn_cast(D)) processSubprogram(SP); } - } else if (auto *DDT = dyn_cast(DT)) { + return; + } + if (auto *DDT = dyn_cast(DT)) { processType(DDT->getBaseType().resolve(TypeIdentifierMap)); } } @@ -313,6 +300,10 @@ bool DebugInfoFinder::addScope(DIScope *Scope) { bool llvm::stripDebugInfo(Function &F) { bool Changed = false; + if (F.getSubprogram()) { + Changed = true; + F.setSubprogram(nullptr); + } for (BasicBlock &BB : F) { for (Instruction &I : BB) { if (I.getDebugLoc()) { @@ -349,7 +340,7 @@ bool llvm::StripDebugInfo(Module &M) { for (Module::named_metadata_iterator NMI = M.named_metadata_begin(), NME = M.named_metadata_end(); NMI != NME;) { - NamedMDNode *NMD = NMI; + NamedMDNode *NMD = &*NMI; ++NMI; if (NMD->getName().startswith("llvm.dbg.")) { NMD->eraseFromParent(); @@ -372,21 +363,3 @@ unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) { return Val->getZExtValue(); return 0; } - -DenseMap -llvm::makeSubprogramMap(const Module &M) { - DenseMap R; - - NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu"); - if (!CU_Nodes) - return R; - - for (MDNode *N : CU_Nodes->operands()) { - auto *CUNode = cast(N); - for (auto *SP : CUNode->getSubprograms()) { - if (Function *F = SP->getFunction()) - R.insert(std::make_pair(F, SP)); - } - } - return R; -} diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp index 5e017488c1fb..58e0abdd577c 100644 --- a/lib/IR/DebugInfoMetadata.cpp +++ b/lib/IR/DebugInfoMetadata.cpp @@ -295,8 +295,7 @@ DISubroutineType *DISubroutineType::getImpl(LLVMContext &Context, StorageType Storage, bool ShouldCreate) { DEFINE_GETIMPL_LOOKUP(DISubroutineType, (Flags, TypeArray)); - Metadata *Ops[] = {nullptr, nullptr, nullptr, nullptr, - TypeArray, nullptr, nullptr, nullptr}; + Metadata *Ops[] = {nullptr, nullptr, nullptr, TypeArray}; DEFINE_GETIMPL_STORE(DISubroutineType, (Flags), Ops); } @@ -316,22 +315,20 @@ DICompileUnit *DICompileUnit::getImpl( unsigned RuntimeVersion, MDString *SplitDebugFilename, unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes, Metadata *Subprograms, Metadata *GlobalVariables, - Metadata *ImportedEntities, uint64_t DWOId, + Metadata *ImportedEntities, Metadata *Macros, uint64_t DWOId, StorageType Storage, bool ShouldCreate) { + assert(Storage != Uniqued && "Cannot unique DICompileUnit"); assert(isCanonical(Producer) && "Expected canonical MDString"); assert(isCanonical(Flags) && "Expected canonical MDString"); assert(isCanonical(SplitDebugFilename) && "Expected canonical MDString"); - DEFINE_GETIMPL_LOOKUP( - DICompileUnit, - (SourceLanguage, File, getString(Producer), IsOptimized, getString(Flags), - RuntimeVersion, getString(SplitDebugFilename), EmissionKind, EnumTypes, - RetainedTypes, Subprograms, GlobalVariables, ImportedEntities, DWOId)); + Metadata *Ops[] = {File, Producer, Flags, SplitDebugFilename, EnumTypes, RetainedTypes, Subprograms, GlobalVariables, - ImportedEntities}; - DEFINE_GETIMPL_STORE( - DICompileUnit, - (SourceLanguage, IsOptimized, RuntimeVersion, EmissionKind, DWOId), Ops); + ImportedEntities, Macros}; + return storeImpl(new (ArrayRef(Ops).size()) DICompileUnit( + Context, Storage, SourceLanguage, IsOptimized, + RuntimeVersion, EmissionKind, DWOId, Ops), + Storage); } DISubprogram *DILocalScope::getSubprogram() const { @@ -345,34 +342,28 @@ DISubprogram *DISubprogram::getImpl( MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type, bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine, Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex, - unsigned Flags, bool IsOptimized, Metadata *Function, - Metadata *TemplateParams, Metadata *Declaration, Metadata *Variables, - StorageType Storage, bool ShouldCreate) { + unsigned Flags, bool IsOptimized, Metadata *TemplateParams, + Metadata *Declaration, Metadata *Variables, StorageType Storage, + bool ShouldCreate) { assert(isCanonical(Name) && "Expected canonical MDString"); assert(isCanonical(LinkageName) && "Expected canonical MDString"); DEFINE_GETIMPL_LOOKUP(DISubprogram, (Scope, getString(Name), getString(LinkageName), File, Line, Type, IsLocalToUnit, IsDefinition, ScopeLine, ContainingType, Virtuality, VirtualIndex, Flags, - IsOptimized, Function, TemplateParams, Declaration, - Variables)); - Metadata *Ops[] = {File, Scope, Name, Name, - LinkageName, Type, ContainingType, Function, - TemplateParams, Declaration, Variables}; + IsOptimized, TemplateParams, Declaration, Variables)); + Metadata *Ops[] = {File, Scope, Name, Name, + LinkageName, Type, ContainingType, TemplateParams, + Declaration, Variables}; DEFINE_GETIMPL_STORE(DISubprogram, (Line, ScopeLine, Virtuality, VirtualIndex, Flags, IsLocalToUnit, IsDefinition, IsOptimized), Ops); } -Function *DISubprogram::getFunction() const { - // FIXME: Should this be looking through bitcasts? - return dyn_cast_or_null(getFunctionConstant()); -} - bool DISubprogram::describes(const Function *F) const { assert(F && "Invalid function"); - if (F == getFunction()) + if (F->getSubprogram() == this) return true; StringRef Name = getLinkageName(); if (Name.empty()) @@ -380,15 +371,13 @@ bool DISubprogram::describes(const Function *F) const { return F->getName() == Name; } -void DISubprogram::replaceFunction(Function *F) { - replaceFunction(F ? ConstantAsMetadata::get(F) - : static_cast(nullptr)); -} - DILexicalBlock *DILexicalBlock::getImpl(LLVMContext &Context, Metadata *Scope, Metadata *File, unsigned Line, unsigned Column, StorageType Storage, bool ShouldCreate) { + // Fixup column. + adjustColumn(Column); + assert(Scope && "Expected scope"); DEFINE_GETIMPL_LOOKUP(DILexicalBlock, (Scope, File, Line, Column)); Metadata *Ops[] = {File, Scope}; @@ -467,21 +456,21 @@ DIGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name, Ops); } -DILocalVariable *DILocalVariable::getImpl(LLVMContext &Context, unsigned Tag, - Metadata *Scope, MDString *Name, - Metadata *File, unsigned Line, - Metadata *Type, unsigned Arg, - unsigned Flags, StorageType Storage, +DILocalVariable *DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope, + MDString *Name, Metadata *File, + unsigned Line, Metadata *Type, + unsigned Arg, unsigned Flags, + StorageType Storage, bool ShouldCreate) { // 64K ought to be enough for any frontend. assert(Arg <= UINT16_MAX && "Expected argument number to fit in 16-bits"); assert(Scope && "Expected scope"); assert(isCanonical(Name) && "Expected canonical MDString"); - DEFINE_GETIMPL_LOOKUP(DILocalVariable, (Tag, Scope, getString(Name), File, - Line, Type, Arg, Flags)); + DEFINE_GETIMPL_LOOKUP(DILocalVariable, + (Scope, getString(Name), File, Line, Type, Arg, Flags)); Metadata *Ops[] = {Scope, Name, File, Type}; - DEFINE_GETIMPL_STORE(DILocalVariable, (Tag, Line, Arg, Flags), Ops); + DEFINE_GETIMPL_STORE(DILocalVariable, (Line, Arg, Flags), Ops); } DIExpression *DIExpression::getImpl(LLVMContext &Context, @@ -496,6 +485,7 @@ unsigned DIExpression::ExprOperand::getSize() const { case dwarf::DW_OP_bit_piece: return 3; case dwarf::DW_OP_plus: + case dwarf::DW_OP_minus: return 2; default: return 1; @@ -516,6 +506,7 @@ bool DIExpression::isValid() const { // Piece expressions must be at the end. return I->get() + I->getSize() == E->get(); case dwarf::DW_OP_plus: + case dwarf::DW_OP_minus: case dwarf::DW_OP_deref: break; } @@ -566,3 +557,24 @@ DIImportedEntity *DIImportedEntity::getImpl(LLVMContext &Context, unsigned Tag, Metadata *Ops[] = {Scope, Entity, Name}; DEFINE_GETIMPL_STORE(DIImportedEntity, (Tag, Line), Ops); } + +DIMacro *DIMacro::getImpl(LLVMContext &Context, unsigned MIType, + unsigned Line, MDString *Name, MDString *Value, + StorageType Storage, bool ShouldCreate) { + assert(isCanonical(Name) && "Expected canonical MDString"); + DEFINE_GETIMPL_LOOKUP(DIMacro, + (MIType, Line, getString(Name), getString(Value))); + Metadata *Ops[] = { Name, Value }; + DEFINE_GETIMPL_STORE(DIMacro, (MIType, Line), Ops); +} + +DIMacroFile *DIMacroFile::getImpl(LLVMContext &Context, unsigned MIType, + unsigned Line, Metadata *File, + Metadata *Elements, StorageType Storage, + bool ShouldCreate) { + DEFINE_GETIMPL_LOOKUP(DIMacroFile, + (MIType, Line, File, Elements)); + Metadata *Ops[] = { File, Elements }; + DEFINE_GETIMPL_STORE(DIMacroFile, (MIType, Line), Ops); +} + diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp index b8f77eda15a6..6426f76bbaa6 100644 --- a/lib/IR/DiagnosticInfo.cpp +++ b/lib/IR/DiagnosticInfo.cpp @@ -49,7 +49,7 @@ struct PassRemarksOpt { "' in -pass-remarks: " + RegexError, false); } - }; + } }; static PassRemarksOpt PassRemarksOptLoc; @@ -91,6 +91,8 @@ int llvm::getNextAvailablePluginDiagnosticKind() { return ++PluginKindID; } +const char *DiagnosticInfo::AlwaysPrint = ""; + DiagnosticInfoInlineAsm::DiagnosticInfoInlineAsm(const Instruction &I, const Twine &MsgStr, DiagnosticSeverity Severity) @@ -121,9 +123,17 @@ void DiagnosticInfoDebugMetadataVersion::print(DiagnosticPrinter &DP) const { } void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const { - if (getFileName() && getLineNum() > 0) - DP << getFileName() << ":" << getLineNum() << ": "; - else if (getFileName()) + if (!FileName.empty()) { + DP << getFileName(); + if (LineNum > 0) + DP << ":" << getLineNum(); + DP << ": "; + } + DP << getMsg(); +} + +void DiagnosticInfoPGOProfile::print(DiagnosticPrinter &DP) const { + if (getFileName()) DP << getFileName() << ": "; DP << getMsg(); } @@ -166,8 +176,9 @@ bool DiagnosticInfoOptimizationRemarkMissed::isEnabled() const { } bool DiagnosticInfoOptimizationRemarkAnalysis::isEnabled() const { - return PassRemarksAnalysisOptLoc.Pattern && - PassRemarksAnalysisOptLoc.Pattern->match(getPassName()); + return getPassName() == DiagnosticInfo::AlwaysPrint || + (PassRemarksAnalysisOptLoc.Pattern && + PassRemarksAnalysisOptLoc.Pattern->match(getPassName())); } void DiagnosticInfoMIRParser::print(DiagnosticPrinter &DP) const { @@ -196,6 +207,24 @@ void llvm::emitOptimizationRemarkAnalysis(LLVMContext &Ctx, DiagnosticInfoOptimizationRemarkAnalysis(PassName, Fn, DLoc, Msg)); } +void llvm::emitOptimizationRemarkAnalysisFPCommute(LLVMContext &Ctx, + const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, + const Twine &Msg) { + Ctx.diagnose(DiagnosticInfoOptimizationRemarkAnalysisFPCommute(PassName, Fn, + DLoc, Msg)); +} + +void llvm::emitOptimizationRemarkAnalysisAliasing(LLVMContext &Ctx, + const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, + const Twine &Msg) { + Ctx.diagnose(DiagnosticInfoOptimizationRemarkAnalysisAliasing(PassName, Fn, + DLoc, Msg)); +} + bool DiagnosticInfoOptimizationFailure::isEnabled() const { // Only print warnings. return getSeverity() == DS_Warning; diff --git a/lib/IR/Dominators.cpp b/lib/IR/Dominators.cpp index b6a8bbcbe5fa..b9d4fb7de881 100644 --- a/lib/IR/Dominators.cpp +++ b/lib/IR/Dominators.cpp @@ -91,10 +91,10 @@ bool DominatorTree::dominates(const Instruction *Def, if (Def == User) return false; - // The value defined by an invoke dominates an instruction only if - // it dominates every instruction in UseBB. - // A PHI is dominated only if the instruction dominates every possible use - // in the UseBB. + // The value defined by an invoke dominates an instruction only if it + // dominates every instruction in UseBB. + // A PHI is dominated only if the instruction dominates every possible use in + // the UseBB. if (isa(Def) || isa(User)) return dominates(Def, UseBB); @@ -126,15 +126,15 @@ bool DominatorTree::dominates(const Instruction *Def, if (DefBB == UseBB) return false; - const InvokeInst *II = dyn_cast(Def); - if (!II) - return dominates(DefBB, UseBB); - // Invoke results are only usable in the normal destination, not in the // exceptional destination. - BasicBlock *NormalDest = II->getNormalDest(); - BasicBlockEdge E(DefBB, NormalDest); - return dominates(E, UseBB); + if (const auto *II = dyn_cast(Def)) { + BasicBlock *NormalDest = II->getNormalDest(); + BasicBlockEdge E(DefBB, NormalDest); + return dominates(E, UseBB); + } + + return dominates(DefBB, UseBB); } bool DominatorTree::dominates(const BasicBlockEdge &BBE, @@ -142,7 +142,8 @@ bool DominatorTree::dominates(const BasicBlockEdge &BBE, // Assert that we have a single edge. We could handle them by simply // returning false, but since isSingleEdge is linear on the number of // edges, the callers can normally handle them more efficiently. - assert(BBE.isSingleEdge()); + assert(BBE.isSingleEdge() && + "This function is not efficient in handling multiple edges"); // If the BB the edge ends in doesn't dominate the use BB, then the // edge also doesn't. @@ -192,7 +193,8 @@ bool DominatorTree::dominates(const BasicBlockEdge &BBE, const Use &U) const { // Assert that we have a single edge. We could handle them by simply // returning false, but since isSingleEdge is linear on the number of // edges, the callers can normally handle them more efficiently. - assert(BBE.isSingleEdge()); + assert(BBE.isSingleEdge() && + "This function is not efficient in handling multiple edges"); Instruction *UserInst = cast(U.getUser()); // A PHI in the end of the edge is dominated by it. @@ -232,8 +234,8 @@ bool DominatorTree::dominates(const Instruction *Def, const Use &U) const { if (!isReachableFromEntry(DefBB)) return false; - // Invoke instructions define their return values on the edges - // to their normal successors, so we have to handle them specially. + // Invoke instructions define their return values on the edges to their normal + // successors, so we have to handle them specially. // Among other things, this means they don't dominate anything in // their own block, except possibly a phi, so we don't need to // walk the block in any case. diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index b50ad1262c69..cfb40b19c733 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -35,8 +35,8 @@ using namespace llvm; // Explicit instantiations of SymbolTableListTraits since some of the methods // are not in the public header file... -template class llvm::SymbolTableListTraits; -template class llvm::SymbolTableListTraits; +template class llvm::SymbolTableListTraits; +template class llvm::SymbolTableListTraits; //===----------------------------------------------------------------------===// // Argument Implementation @@ -235,11 +235,11 @@ Type *Function::getReturnType() const { } void Function::removeFromParent() { - getParent()->getFunctionList().remove(this); + getParent()->getFunctionList().remove(getIterator()); } void Function::eraseFromParent() { - getParent()->getFunctionList().erase(this); + getParent()->getFunctionList().erase(getIterator()); } //===----------------------------------------------------------------------===// @@ -248,7 +248,7 @@ void Function::eraseFromParent() { Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name, Module *ParentModule) - : GlobalObject(PointerType::getUnqual(Ty), Value::FunctionVal, + : GlobalObject(Ty, Value::FunctionVal, OperandTraits::op_begin(this), 0, Linkage, name), Ty(Ty) { assert(FunctionType::isValidReturnType(getReturnType()) && @@ -279,9 +279,6 @@ Function::~Function() { // Remove the function from the on-the-side GC table. clearGC(); - - // FIXME: needed by operator delete - setFunctionNumOperands(1); } void Function::BuildLazyArguments() const { @@ -328,14 +325,15 @@ void Function::dropAllReferences() { while (!BasicBlocks.empty()) BasicBlocks.begin()->eraseFromParent(); - // Prefix and prologue data are stored in a side table. - setPrefixData(nullptr); - setPrologueData(nullptr); + // Drop uses of any optional data (real or placeholder). + if (getNumOperands()) { + User::dropAllReferences(); + setNumHungOffUseOperands(0); + setValueSubclassData(getSubclassDataFromValue() & ~0xe); + } // Metadata is stored in a side-table. clearMetadata(); - - setPersonalityFn(nullptr); } void Function::addAttribute(unsigned i, Attribute::AttrKind attr) { @@ -411,30 +409,26 @@ void Function::clearGC() { } } -/// copyAttributesFrom - copy all additional attributes (those not needed to -/// create a Function) from the Function Src to this one. +/// Copy all additional attributes (those not needed to create a Function) from +/// the Function Src to this one. void Function::copyAttributesFrom(const GlobalValue *Src) { - assert(isa(Src) && "Expected a Function!"); GlobalObject::copyAttributesFrom(Src); - const Function *SrcF = cast(Src); + const Function *SrcF = dyn_cast(Src); + if (!SrcF) + return; + setCallingConv(SrcF->getCallingConv()); setAttributes(SrcF->getAttributes()); if (SrcF->hasGC()) setGC(SrcF->getGC()); else clearGC(); - if (SrcF->hasPrefixData()) - setPrefixData(SrcF->getPrefixData()); - else - setPrefixData(nullptr); - if (SrcF->hasPrologueData()) - setPrologueData(SrcF->getPrologueData()); - else - setPrologueData(nullptr); if (SrcF->hasPersonalityFn()) setPersonalityFn(SrcF->getPersonalityFn()); - else - setPersonalityFn(nullptr); + if (SrcF->hasPrefixData()) + setPrefixData(SrcF->getPrefixData()); + if (SrcF->hasPrologueData()) + setPrologueData(SrcF->getPrologueData()); } /// \brief This does the actual lookup of an intrinsic ID which @@ -492,7 +486,10 @@ static std::string getMangledTypeStr(Type* Ty) { Result += "vararg"; // Ensure nested function types are distinguishable. Result += "f"; - } else if (Ty) + } else if (isa(Ty)) + Result += "v" + utostr(Ty->getVectorNumElements()) + + getMangledTypeStr(Ty->getVectorElementType()); + else if (Ty) Result += EVT::getEVT(Ty).getEVTString(); return Result; } @@ -541,22 +538,25 @@ enum IIT_Info { // Values from 16+ are only encodable with the inefficient encoding. IIT_V64 = 16, IIT_MMX = 17, - IIT_METADATA = 18, - IIT_EMPTYSTRUCT = 19, - IIT_STRUCT2 = 20, - IIT_STRUCT3 = 21, - IIT_STRUCT4 = 22, - IIT_STRUCT5 = 23, - IIT_EXTEND_ARG = 24, - IIT_TRUNC_ARG = 25, - IIT_ANYPTR = 26, - IIT_V1 = 27, - IIT_VARARG = 28, - IIT_HALF_VEC_ARG = 29, - IIT_SAME_VEC_WIDTH_ARG = 30, - IIT_PTR_TO_ARG = 31, - IIT_VEC_OF_PTRS_TO_ELT = 32, - IIT_I128 = 33 + IIT_TOKEN = 18, + IIT_METADATA = 19, + IIT_EMPTYSTRUCT = 20, + IIT_STRUCT2 = 21, + IIT_STRUCT3 = 22, + IIT_STRUCT4 = 23, + IIT_STRUCT5 = 24, + IIT_EXTEND_ARG = 25, + IIT_TRUNC_ARG = 26, + IIT_ANYPTR = 27, + IIT_V1 = 28, + IIT_VARARG = 29, + IIT_HALF_VEC_ARG = 30, + IIT_SAME_VEC_WIDTH_ARG = 31, + IIT_PTR_TO_ARG = 32, + IIT_VEC_OF_PTRS_TO_ELT = 33, + IIT_I128 = 34, + IIT_V512 = 35, + IIT_V1024 = 36 }; @@ -576,6 +576,9 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, case IIT_MMX: OutputTable.push_back(IITDescriptor::get(IITDescriptor::MMX, 0)); return; + case IIT_TOKEN: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Token, 0)); + return; case IIT_METADATA: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Metadata, 0)); return; @@ -634,6 +637,14 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 64)); DecodeIITType(NextElt, Infos, OutputTable); return; + case IIT_V512: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 512)); + DecodeIITType(NextElt, Infos, OutputTable); + return; + case IIT_V1024: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 1024)); + DecodeIITType(NextElt, Infos, OutputTable); + return; case IIT_PTR: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 0)); DecodeIITType(NextElt, Infos, OutputTable); @@ -751,6 +762,7 @@ static Type *DecodeFixedType(ArrayRef &Infos, case IITDescriptor::Void: return Type::getVoidTy(Context); case IITDescriptor::VarArg: return Type::getVoidTy(Context); case IITDescriptor::MMX: return Type::getX86_MMXTy(Context); + case IITDescriptor::Token: return Type::getTokenTy(Context); case IITDescriptor::Metadata: return Type::getMetadataTy(Context); case IITDescriptor::Half: return Type::getHalfTy(Context); case IITDescriptor::Float: return Type::getFloatTy(Context); @@ -924,62 +936,68 @@ bool Function::callsFunctionThatReturnsTwice() const { return false; } +Constant *Function::getPersonalityFn() const { + assert(hasPersonalityFn() && getNumOperands()); + return cast(Op<0>()); +} + +void Function::setPersonalityFn(Constant *Fn) { + setHungoffOperand<0>(Fn); + setValueSubclassDataBit(3, Fn != nullptr); +} + Constant *Function::getPrefixData() const { - assert(hasPrefixData()); - const LLVMContextImpl::PrefixDataMapTy &PDMap = - getContext().pImpl->PrefixDataMap; - assert(PDMap.find(this) != PDMap.end()); - return cast(PDMap.find(this)->second->getReturnValue()); + assert(hasPrefixData() && getNumOperands()); + return cast(Op<1>()); } void Function::setPrefixData(Constant *PrefixData) { - if (!PrefixData && !hasPrefixData()) - return; - - unsigned SCData = getSubclassDataFromValue(); - LLVMContextImpl::PrefixDataMapTy &PDMap = getContext().pImpl->PrefixDataMap; - ReturnInst *&PDHolder = PDMap[this]; - if (PrefixData) { - if (PDHolder) - PDHolder->setOperand(0, PrefixData); - else - PDHolder = ReturnInst::Create(getContext(), PrefixData); - SCData |= (1<<1); - } else { - delete PDHolder; - PDMap.erase(this); - SCData &= ~(1<<1); - } - setValueSubclassData(SCData); + setHungoffOperand<1>(PrefixData); + setValueSubclassDataBit(1, PrefixData != nullptr); } Constant *Function::getPrologueData() const { - assert(hasPrologueData()); - const LLVMContextImpl::PrologueDataMapTy &SOMap = - getContext().pImpl->PrologueDataMap; - assert(SOMap.find(this) != SOMap.end()); - return cast(SOMap.find(this)->second->getReturnValue()); + assert(hasPrologueData() && getNumOperands()); + return cast(Op<2>()); } void Function::setPrologueData(Constant *PrologueData) { - if (!PrologueData && !hasPrologueData()) + setHungoffOperand<2>(PrologueData); + setValueSubclassDataBit(2, PrologueData != nullptr); +} + +void Function::allocHungoffUselist() { + // If we've already allocated a uselist, stop here. + if (getNumOperands()) return; - unsigned PDData = getSubclassDataFromValue(); - LLVMContextImpl::PrologueDataMapTy &PDMap = getContext().pImpl->PrologueDataMap; - ReturnInst *&PDHolder = PDMap[this]; - if (PrologueData) { - if (PDHolder) - PDHolder->setOperand(0, PrologueData); - else - PDHolder = ReturnInst::Create(getContext(), PrologueData); - PDData |= (1<<2); - } else { - delete PDHolder; - PDMap.erase(this); - PDData &= ~(1<<2); + allocHungoffUses(3, /*IsPhi=*/ false); + setNumHungOffUseOperands(3); + + // Initialize the uselist with placeholder operands to allow traversal. + auto *CPN = ConstantPointerNull::get(Type::getInt1PtrTy(getContext(), 0)); + Op<0>().set(CPN); + Op<1>().set(CPN); + Op<2>().set(CPN); +} + +template +void Function::setHungoffOperand(Constant *C) { + if (C) { + allocHungoffUselist(); + Op().set(C); + } else if (getNumOperands()) { + Op().set( + ConstantPointerNull::get(Type::getInt1PtrTy(getContext(), 0))); } - setValueSubclassData(PDData); +} + +void Function::setValueSubclassDataBit(unsigned Bit, bool On) { + assert(Bit < 16 && "SubclassData contains only 16 bits"); + if (On) + setValueSubclassData(getSubclassDataFromValue() | (1 << Bit)); + else + setValueSubclassData(getSubclassDataFromValue() & ~(1 << Bit)); } void Function::setEntryCount(uint64_t Count) { @@ -997,22 +1015,3 @@ Optional Function::getEntryCount() const { } return None; } - -void Function::setPersonalityFn(Constant *C) { - if (!C) { - if (hasPersonalityFn()) { - // Note, the num operands is used to compute the offset of the operand, so - // the order here matters. Clearing the operand then clearing the num - // operands ensures we have the correct offset to the operand. - Op<0>().set(nullptr); - setFunctionNumOperands(0); - } - } else { - // Note, the num operands is used to compute the offset of the operand, so - // the order here matters. We need to set num operands to 1 first so that - // we get the correct offset to the first operand when we set it. - if (!hasPersonalityFn()) - setFunctionNumOperands(1); - Op<0>().set(C); - } -} diff --git a/lib/IR/FunctionInfo.cpp b/lib/IR/FunctionInfo.cpp new file mode 100644 index 000000000000..17a67bcf0472 --- /dev/null +++ b/lib/IR/FunctionInfo.cpp @@ -0,0 +1,67 @@ +//===-- FunctionInfo.cpp - Function Info Index ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the function info index and summary classes for the +// IR library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/FunctionInfo.h" +#include "llvm/ADT/StringMap.h" +using namespace llvm; + +// Create the combined function index/summary from multiple +// per-module instances. +void FunctionInfoIndex::mergeFrom(std::unique_ptr Other, + uint64_t NextModuleId) { + + StringRef ModPath; + for (auto &OtherFuncInfoLists : *Other) { + std::string FuncName = OtherFuncInfoLists.getKey(); + FunctionInfoList &List = OtherFuncInfoLists.second; + + // Assert that the func info list only has one entry, since we shouldn't + // have duplicate names within a single per-module index. + assert(List.size() == 1); + std::unique_ptr Info = std::move(List.front()); + + // Skip if there was no function summary section. + if (!Info->functionSummary()) + continue; + + // Add the module path string ref for this module if we haven't already + // saved a reference to it. + if (ModPath.empty()) + ModPath = + addModulePath(Info->functionSummary()->modulePath(), NextModuleId); + else + assert(ModPath == Info->functionSummary()->modulePath() && + "Each module in the combined map should have a unique ID"); + + // Note the module path string ref was copied above and is still owned by + // the original per-module index. Reset it to the new module path + // string reference owned by the combined index. + Info->functionSummary()->setModulePath(ModPath); + + // If it is a local function, rename it. + if (Info->functionSummary()->isLocalFunction()) { + // Any local functions are virtually renamed when being added to the + // combined index map, to disambiguate from other functions with + // the same name. The symbol table created for the combined index + // file should contain the renamed symbols. + FuncName = + FunctionInfoIndex::getGlobalNameForLocal(FuncName, NextModuleId); + } + + // Add new function info to existing list. There may be duplicates when + // combining FunctionMap entries, due to COMDAT functions. Any local + // functions were virtually renamed above. + addFunctionInfo(FuncName, std::move(Info)); + } +} diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp index 6ed589131725..35b8157751b6 100644 --- a/lib/IR/GCOV.cpp +++ b/lib/IR/GCOV.cpp @@ -448,7 +448,7 @@ static uint32_t branchDiv(uint64_t Numerator, uint64_t Divisor) { namespace { struct formatBranchInfo { - formatBranchInfo(const GCOVOptions &Options, uint64_t Count, uint64_t Total) + formatBranchInfo(const GCOV::Options &Options, uint64_t Count, uint64_t Total) : Options(Options), Count(Count), Total(Total) {} void print(raw_ostream &OS) const { @@ -460,7 +460,7 @@ struct formatBranchInfo { OS << "taken " << branchDiv(Count, Total) << "%"; } - const GCOVOptions &Options; + const GCOV::Options &Options; uint64_t Count; uint64_t Total; }; diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp index 1d0282677bf7..6159f93faf89 100644 --- a/lib/IR/Globals.cpp +++ b/lib/IR/Globals.cpp @@ -32,15 +32,9 @@ bool GlobalValue::isMaterializable() const { return F->isMaterializable(); return false; } -bool GlobalValue::isDematerializable() const { - return getParent() && getParent()->isDematerializable(this); -} std::error_code GlobalValue::materialize() { return getParent()->materialize(this); } -void GlobalValue::dematerialize() { - getParent()->dematerialize(this); -} /// Override destroyConstantImpl to make sure it doesn't get called on /// GlobalValue's because they shouldn't be treated like other constants. @@ -97,10 +91,11 @@ void GlobalObject::setGlobalObjectSubClassData(unsigned Val) { } void GlobalObject::copyAttributesFrom(const GlobalValue *Src) { - const auto *GV = cast(Src); - GlobalValue::copyAttributesFrom(GV); - setAlignment(GV->getAlignment()); - setSection(GV->getSection()); + GlobalValue::copyAttributesFrom(Src); + if (const auto *GV = dyn_cast(Src)) { + setAlignment(GV->getAlignment()); + setSection(GV->getSection()); + } } const char *GlobalValue::getSection() const { @@ -147,9 +142,9 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, Constant *InitVal, const Twine &Name, ThreadLocalMode TLMode, unsigned AddressSpace, bool isExternallyInitialized) - : GlobalObject(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal, + : GlobalObject(Ty, Value::GlobalVariableVal, OperandTraits::op_begin(this), - InitVal != nullptr, Link, Name), + InitVal != nullptr, Link, Name, AddressSpace), isConstantGlobal(constant), isExternallyInitializedConstant(isExternallyInitialized) { setThreadLocalMode(TLMode); @@ -165,9 +160,9 @@ GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant, const Twine &Name, GlobalVariable *Before, ThreadLocalMode TLMode, unsigned AddressSpace, bool isExternallyInitialized) - : GlobalObject(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal, + : GlobalObject(Ty, Value::GlobalVariableVal, OperandTraits::op_begin(this), - InitVal != nullptr, Link, Name), + InitVal != nullptr, Link, Name, AddressSpace), isConstantGlobal(constant), isExternallyInitializedConstant(isExternallyInitialized) { setThreadLocalMode(TLMode); @@ -178,7 +173,7 @@ GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant, } if (Before) - Before->getParent()->getGlobalList().insert(Before, this); + Before->getParent()->getGlobalList().insert(Before->getIterator(), this); else M.getGlobalList().push_back(this); } @@ -188,11 +183,11 @@ void GlobalVariable::setParent(Module *parent) { } void GlobalVariable::removeFromParent() { - getParent()->getGlobalList().remove(this); + getParent()->getGlobalList().remove(getIterator()); } void GlobalVariable::eraseFromParent() { - getParent()->getGlobalList().erase(this); + getParent()->getGlobalList().erase(getIterator()); } void GlobalVariable::setInitializer(Constant *InitVal) { @@ -216,14 +211,14 @@ void GlobalVariable::setInitializer(Constant *InitVal) { } } -/// copyAttributesFrom - copy all additional attributes (those not needed to -/// create a GlobalVariable) from the GlobalVariable Src to this one. +/// Copy all additional attributes (those not needed to create a GlobalVariable) +/// from the GlobalVariable Src to this one. void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) { - assert(isa(Src) && "Expected a GlobalVariable!"); GlobalObject::copyAttributesFrom(Src); - const GlobalVariable *SrcVar = cast(Src); - setThreadLocalMode(SrcVar->getThreadLocalMode()); - setExternallyInitialized(SrcVar->isExternallyInitialized()); + if (const GlobalVariable *SrcVar = dyn_cast(Src)) { + setThreadLocalMode(SrcVar->getThreadLocalMode()); + setExternallyInitialized(SrcVar->isExternallyInitialized()); + } } @@ -231,35 +226,40 @@ void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) { // GlobalAlias Implementation //===----------------------------------------------------------------------===// -GlobalAlias::GlobalAlias(PointerType *Ty, LinkageTypes Link, const Twine &Name, - Constant *Aliasee, Module *ParentModule) - : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) { +GlobalAlias::GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Link, + const Twine &Name, Constant *Aliasee, + Module *ParentModule) + : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name, + AddressSpace) { Op<0>() = Aliasee; if (ParentModule) ParentModule->getAliasList().push_back(this); } -GlobalAlias *GlobalAlias::create(PointerType *Ty, LinkageTypes Link, - const Twine &Name, Constant *Aliasee, - Module *ParentModule) { - return new GlobalAlias(Ty, Link, Name, Aliasee, ParentModule); +GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace, + LinkageTypes Link, const Twine &Name, + Constant *Aliasee, Module *ParentModule) { + return new GlobalAlias(Ty, AddressSpace, Link, Name, Aliasee, ParentModule); } -GlobalAlias *GlobalAlias::create(PointerType *Ty, LinkageTypes Linkage, - const Twine &Name, Module *Parent) { - return create(Ty, Linkage, Name, nullptr, Parent); +GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace, + LinkageTypes Linkage, const Twine &Name, + Module *Parent) { + return create(Ty, AddressSpace, Linkage, Name, nullptr, Parent); } -GlobalAlias *GlobalAlias::create(PointerType *Ty, LinkageTypes Linkage, - const Twine &Name, GlobalValue *Aliasee) { - return create(Ty, Linkage, Name, Aliasee, Aliasee->getParent()); +GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace, + LinkageTypes Linkage, const Twine &Name, + GlobalValue *Aliasee) { + return create(Ty, AddressSpace, Linkage, Name, Aliasee, Aliasee->getParent()); } GlobalAlias *GlobalAlias::create(LinkageTypes Link, const Twine &Name, GlobalValue *Aliasee) { PointerType *PTy = Aliasee->getType(); - return create(PTy, Link, Name, Aliasee); + return create(PTy->getElementType(), PTy->getAddressSpace(), Link, Name, + Aliasee); } GlobalAlias *GlobalAlias::create(const Twine &Name, GlobalValue *Aliasee) { @@ -271,11 +271,11 @@ void GlobalAlias::setParent(Module *parent) { } void GlobalAlias::removeFromParent() { - getParent()->getAliasList().remove(this); + getParent()->getAliasList().remove(getIterator()); } void GlobalAlias::eraseFromParent() { - getParent()->getAliasList().erase(this); + getParent()->getAliasList().erase(getIterator()); } void GlobalAlias::setAliasee(Constant *Aliasee) { diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp index bddb278dee79..447412936335 100644 --- a/lib/IR/IRBuilder.cpp +++ b/lib/IR/IRBuilder.cpp @@ -247,18 +247,21 @@ CallInst *IRBuilderBase::CreateMaskedIntrinsic(Intrinsic::ID Id, return createCallHelper(TheFn, Ops, this, Name); } +template static std::vector getStatepointArgs(IRBuilderBase &B, uint64_t ID, uint32_t NumPatchBytes, - Value *ActualCallee, ArrayRef CallArgs, - ArrayRef DeoptArgs, ArrayRef GCArgs) { + Value *ActualCallee, uint32_t Flags, ArrayRef CallArgs, + ArrayRef TransitionArgs, ArrayRef DeoptArgs, + ArrayRef GCArgs) { std::vector Args; Args.push_back(B.getInt64(ID)); Args.push_back(B.getInt32(NumPatchBytes)); Args.push_back(ActualCallee); Args.push_back(B.getInt32(CallArgs.size())); - Args.push_back(B.getInt32((unsigned)StatepointFlags::None)); + Args.push_back(B.getInt32(Flags)); Args.insert(Args.end(), CallArgs.begin(), CallArgs.end()); - Args.push_back(B.getInt32(0 /* no transition args */)); + Args.push_back(B.getInt32(TransitionArgs.size())); + Args.insert(Args.end(), TransitionArgs.begin(), TransitionArgs.end()); Args.push_back(B.getInt32(DeoptArgs.size())); Args.insert(Args.end(), DeoptArgs.begin(), DeoptArgs.end()); Args.insert(Args.end(), GCArgs.begin(), GCArgs.end()); @@ -266,36 +269,78 @@ getStatepointArgs(IRBuilderBase &B, uint64_t ID, uint32_t NumPatchBytes, return Args; } -CallInst *IRBuilderBase::CreateGCStatepointCall( - uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee, - ArrayRef CallArgs, ArrayRef DeoptArgs, - ArrayRef GCArgs, const Twine &Name) { +template +static CallInst *CreateGCStatepointCallCommon( + IRBuilderBase *Builder, uint64_t ID, uint32_t NumPatchBytes, + Value *ActualCallee, uint32_t Flags, ArrayRef CallArgs, + ArrayRef TransitionArgs, ArrayRef DeoptArgs, ArrayRef GCArgs, + const Twine &Name) { // Extract out the type of the callee. PointerType *FuncPtrType = cast(ActualCallee->getType()); assert(isa(FuncPtrType->getElementType()) && "actual callee must be a callable value"); - Module *M = BB->getParent()->getParent(); + Module *M = Builder->GetInsertBlock()->getParent()->getParent(); // Fill in the one generic type'd argument (the function is also vararg) Type *ArgTypes[] = { FuncPtrType }; Function *FnStatepoint = Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_statepoint, ArgTypes); - std::vector Args = getStatepointArgs( - *this, ID, NumPatchBytes, ActualCallee, CallArgs, DeoptArgs, GCArgs); - return createCallHelper(FnStatepoint, Args, this, Name); + std::vector Args = + getStatepointArgs(*Builder, ID, NumPatchBytes, ActualCallee, Flags, + CallArgs, TransitionArgs, DeoptArgs, GCArgs); + return createCallHelper(FnStatepoint, Args, Builder, Name); +} + +CallInst *IRBuilderBase::CreateGCStatepointCall( + uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee, + ArrayRef CallArgs, ArrayRef DeoptArgs, + ArrayRef GCArgs, const Twine &Name) { + return CreateGCStatepointCallCommon( + this, ID, NumPatchBytes, ActualCallee, uint32_t(StatepointFlags::None), + CallArgs, None /* No Transition Args */, DeoptArgs, GCArgs, Name); +} + +CallInst *IRBuilderBase::CreateGCStatepointCall( + uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee, uint32_t Flags, + ArrayRef CallArgs, ArrayRef TransitionArgs, + ArrayRef DeoptArgs, ArrayRef GCArgs, const Twine &Name) { + return CreateGCStatepointCallCommon( + this, ID, NumPatchBytes, ActualCallee, Flags, CallArgs, TransitionArgs, + DeoptArgs, GCArgs, Name); } CallInst *IRBuilderBase::CreateGCStatepointCall( uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee, ArrayRef CallArgs, ArrayRef DeoptArgs, ArrayRef GCArgs, const Twine &Name) { - std::vector VCallArgs; - for (auto &U : CallArgs) - VCallArgs.push_back(U.get()); - return CreateGCStatepointCall(ID, NumPatchBytes, ActualCallee, VCallArgs, - DeoptArgs, GCArgs, Name); + return CreateGCStatepointCallCommon( + this, ID, NumPatchBytes, ActualCallee, uint32_t(StatepointFlags::None), + CallArgs, None, DeoptArgs, GCArgs, Name); +} + +template +static InvokeInst *CreateGCStatepointInvokeCommon( + IRBuilderBase *Builder, uint64_t ID, uint32_t NumPatchBytes, + Value *ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, + uint32_t Flags, ArrayRef InvokeArgs, ArrayRef TransitionArgs, + ArrayRef DeoptArgs, ArrayRef GCArgs, const Twine &Name) { + // Extract out the type of the callee. + PointerType *FuncPtrType = cast(ActualInvokee->getType()); + assert(isa(FuncPtrType->getElementType()) && + "actual callee must be a callable value"); + + Module *M = Builder->GetInsertBlock()->getParent()->getParent(); + // Fill in the one generic type'd argument (the function is also vararg) + Function *FnStatepoint = Intrinsic::getDeclaration( + M, Intrinsic::experimental_gc_statepoint, {FuncPtrType}); + + std::vector Args = + getStatepointArgs(*Builder, ID, NumPatchBytes, ActualInvokee, Flags, + InvokeArgs, TransitionArgs, DeoptArgs, GCArgs); + return createInvokeHelper(FnStatepoint, NormalDest, UnwindDest, Args, Builder, + Name); } InvokeInst *IRBuilderBase::CreateGCStatepointInvoke( @@ -303,32 +348,30 @@ InvokeInst *IRBuilderBase::CreateGCStatepointInvoke( BasicBlock *NormalDest, BasicBlock *UnwindDest, ArrayRef InvokeArgs, ArrayRef DeoptArgs, ArrayRef GCArgs, const Twine &Name) { - // Extract out the type of the callee. - PointerType *FuncPtrType = cast(ActualInvokee->getType()); - assert(isa(FuncPtrType->getElementType()) && - "actual callee must be a callable value"); + return CreateGCStatepointInvokeCommon( + this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest, + uint32_t(StatepointFlags::None), InvokeArgs, None /* No Transition Args*/, + DeoptArgs, GCArgs, Name); +} - Module *M = BB->getParent()->getParent(); - // Fill in the one generic type'd argument (the function is also vararg) - Function *FnStatepoint = Intrinsic::getDeclaration( - M, Intrinsic::experimental_gc_statepoint, {FuncPtrType}); - - std::vector Args = getStatepointArgs( - *this, ID, NumPatchBytes, ActualInvokee, InvokeArgs, DeoptArgs, GCArgs); - return createInvokeHelper(FnStatepoint, NormalDest, UnwindDest, Args, this, - Name); +InvokeInst *IRBuilderBase::CreateGCStatepointInvoke( + uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee, + BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags, + ArrayRef InvokeArgs, ArrayRef TransitionArgs, + ArrayRef DeoptArgs, ArrayRef GCArgs, const Twine &Name) { + return CreateGCStatepointInvokeCommon( + this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest, Flags, + InvokeArgs, TransitionArgs, DeoptArgs, GCArgs, Name); } InvokeInst *IRBuilderBase::CreateGCStatepointInvoke( uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, ArrayRef InvokeArgs, ArrayRef DeoptArgs, ArrayRef GCArgs, const Twine &Name) { - std::vector VCallArgs; - for (auto &U : InvokeArgs) - VCallArgs.push_back(U.get()); - return CreateGCStatepointInvoke(ID, NumPatchBytes, ActualInvokee, NormalDest, - UnwindDest, VCallArgs, DeoptArgs, GCArgs, - Name); + return CreateGCStatepointInvokeCommon( + this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest, + uint32_t(StatepointFlags::None), InvokeArgs, None, DeoptArgs, GCArgs, + Name); } CallInst *IRBuilderBase::CreateGCResult(Instruction *Statepoint, diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp index aa9e0272ad10..15d3b830b8fc 100644 --- a/lib/IR/InlineAsm.cpp +++ b/lib/IR/InlineAsm.cpp @@ -24,23 +24,22 @@ using namespace llvm; InlineAsm::~InlineAsm() { } - -InlineAsm *InlineAsm::get(FunctionType *Ty, StringRef AsmString, +InlineAsm *InlineAsm::get(FunctionType *FTy, StringRef AsmString, StringRef Constraints, bool hasSideEffects, bool isAlignStack, AsmDialect asmDialect) { - InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack, - asmDialect); - LLVMContextImpl *pImpl = Ty->getContext().pImpl; - return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(Ty), Key); + InlineAsmKeyType Key(AsmString, Constraints, FTy, hasSideEffects, + isAlignStack, asmDialect); + LLVMContextImpl *pImpl = FTy->getContext().pImpl; + return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(FTy), Key); } -InlineAsm::InlineAsm(PointerType *Ty, const std::string &asmString, +InlineAsm::InlineAsm(FunctionType *FTy, const std::string &asmString, const std::string &constraints, bool hasSideEffects, bool isAlignStack, AsmDialect asmDialect) - : Value(Ty, Value::InlineAsmVal), - AsmString(asmString), Constraints(constraints), - HasSideEffects(hasSideEffects), IsAlignStack(isAlignStack), - Dialect(asmDialect) { + : Value(PointerType::getUnqual(FTy), Value::InlineAsmVal), + AsmString(asmString), Constraints(constraints), FTy(FTy), + HasSideEffects(hasSideEffects), IsAlignStack(isAlignStack), + Dialect(asmDialect) { // Do various checks on the constraint string and type. assert(Verify(getFunctionType(), constraints) && @@ -53,7 +52,7 @@ void InlineAsm::destroyConstant() { } FunctionType *InlineAsm::getFunctionType() const { - return cast(getType()->getElementType()); + return FTy; } ///Default constructor. @@ -160,6 +159,9 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str, // If Operand N already has a matching input, reject this. An output // can't be constrained to the same value as multiple inputs. if (isMultipleAlternative) { + if (multipleAlternativeIndex >= + ConstraintsSoFar[N].multipleAlternatives.size()) + return true; InlineAsm::SubConstraintInfo &scInfo = ConstraintsSoFar[N].multipleAlternatives[multipleAlternativeIndex]; if (scInfo.MatchingInput != -1) @@ -291,4 +293,3 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) { if (Ty->getNumParams() != NumInputs) return false; return true; } - diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index c57ba16cf6ca..a0bd2c9698e8 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -28,7 +28,7 @@ Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps, if (InsertBefore) { BasicBlock *BB = InsertBefore->getParent(); assert(BB && "Instruction to insert before is not in a basic block!"); - BB->getInstList().insert(InsertBefore, this); + BB->getInstList().insert(InsertBefore->getIterator(), this); } } @@ -62,33 +62,39 @@ Module *Instruction::getModule() { return getParent()->getModule(); } +Function *Instruction::getFunction() { return getParent()->getParent(); } + +const Function *Instruction::getFunction() const { + return getParent()->getParent(); +} void Instruction::removeFromParent() { - getParent()->getInstList().remove(this); + getParent()->getInstList().remove(getIterator()); } iplist::iterator Instruction::eraseFromParent() { - return getParent()->getInstList().erase(this); + return getParent()->getInstList().erase(getIterator()); } /// insertBefore - Insert an unlinked instructions into a basic block /// immediately before the specified instruction. void Instruction::insertBefore(Instruction *InsertPos) { - InsertPos->getParent()->getInstList().insert(InsertPos, this); + InsertPos->getParent()->getInstList().insert(InsertPos->getIterator(), this); } /// insertAfter - Insert an unlinked instructions into a basic block /// immediately after the specified instruction. void Instruction::insertAfter(Instruction *InsertPos) { - InsertPos->getParent()->getInstList().insertAfter(InsertPos, this); + InsertPos->getParent()->getInstList().insertAfter(InsertPos->getIterator(), + this); } /// moveBefore - Unlink this instruction from its current basic block and /// insert it into the basic block that MovePos lives in, right before /// MovePos. void Instruction::moveBefore(Instruction *MovePos) { - MovePos->getParent()->getInstList().splice(MovePos,getParent()->getInstList(), - this); + MovePos->getParent()->getInstList().splice( + MovePos->getIterator(), getParent()->getInstList(), getIterator()); } /// Set or clear the unsafe-algebra flag on this instruction, which must be an @@ -196,6 +202,10 @@ const char *Instruction::getOpcodeName(unsigned OpCode) { case Invoke: return "invoke"; case Resume: return "resume"; case Unreachable: return "unreachable"; + case CleanupRet: return "cleanupret"; + case CatchRet: return "catchret"; + case CatchPad: return "catchpad"; + case CatchSwitch: return "catchswitch"; // Standard binary operators... case Add: return "add"; @@ -256,6 +266,7 @@ const char *Instruction::getOpcodeName(unsigned OpCode) { case ExtractValue: return "extractvalue"; case InsertValue: return "insertvalue"; case LandingPad: return "landingpad"; + case CleanupPad: return "cleanuppad"; default: return " "; } @@ -285,11 +296,12 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2, if (const CallInst *CI = dyn_cast(I1)) return CI->isTailCall() == cast(I2)->isTailCall() && CI->getCallingConv() == cast(I2)->getCallingConv() && - CI->getAttributes() == cast(I2)->getAttributes(); + CI->getAttributes() == cast(I2)->getAttributes() && + CI->hasIdenticalOperandBundleSchema(*cast(I2)); if (const InvokeInst *CI = dyn_cast(I1)) return CI->getCallingConv() == cast(I2)->getCallingConv() && - CI->getAttributes() == - cast(I2)->getAttributes(); + CI->getAttributes() == cast(I2)->getAttributes() && + CI->hasIdenticalOperandBundleSchema(*cast(I2)); if (const InsertValueInst *IVI = dyn_cast(I1)) return IVI->getIndices() == cast(I2)->getIndices(); if (const ExtractValueInst *EVI = dyn_cast(I1)) @@ -407,6 +419,8 @@ bool Instruction::mayReadFromMemory() const { case Instruction::Fence: // FIXME: refine definition of mayReadFromMemory case Instruction::AtomicCmpXchg: case Instruction::AtomicRMW: + case Instruction::CatchPad: + case Instruction::CatchRet: return true; case Instruction::Call: return !cast(this)->doesNotAccessMemory(); @@ -427,6 +441,8 @@ bool Instruction::mayWriteToMemory() const { case Instruction::VAArg: case Instruction::AtomicCmpXchg: case Instruction::AtomicRMW: + case Instruction::CatchPad: + case Instruction::CatchRet: return true; case Instruction::Call: return !cast(this)->onlyReadsMemory(); @@ -455,6 +471,10 @@ bool Instruction::isAtomic() const { bool Instruction::mayThrow() const { if (const CallInst *CI = dyn_cast(this)) return !CI->doesNotThrow(); + if (const auto *CRI = dyn_cast(this)) + return CRI->unwindsToCaller(); + if (const auto *CatchSwitch = dyn_cast(this)) + return CatchSwitch->unwindsToCaller(); return isa(this); } diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 86c921aeda8a..4ae2fd522b52 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -62,7 +62,10 @@ UnaryInstruction::~UnaryInstruction() { const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) { if (Op1->getType() != Op2->getType()) return "both values to select must have same type"; - + + if (Op1->getType()->isTokenTy()) + return "select values cannot have token type"; + if (VectorType *VT = dyn_cast(Op0->getType())) { // Vector select. if (VT->getElementType() != Type::getInt1Ty(Op0->getContext())) @@ -84,6 +87,8 @@ const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) { // PHINode Class //===----------------------------------------------------------------------===// +void PHINode::anchor() {} + PHINode::PHINode(const PHINode &PN) : Instruction(PN.getType(), Instruction::PHI, nullptr, PN.getNumOperands()), ReservedSpace(PN.getNumOperands()) { @@ -223,9 +228,10 @@ CallInst::~CallInst() { } void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef Args, - const Twine &NameStr) { + ArrayRef Bundles, const Twine &NameStr) { this->FTy = FTy; - assert(getNumOperands() == Args.size() + 1 && "NumOperands not set up?"); + assert(getNumOperands() == Args.size() + CountBundleInputs(Bundles) + 1 && + "NumOperands not set up?"); Op<-1>() = Func; #ifndef NDEBUG @@ -240,6 +246,11 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef Args, #endif std::copy(Args.begin(), Args.end(), op_begin()); + + auto It = populateBundleOperandInfos(Bundles, Args.size()); + (void)It; + assert(It + 1 == op_end() && "Should add up!"); + setName(NameStr); } @@ -281,11 +292,26 @@ CallInst::CallInst(const CallInst &CI) AttributeList(CI.AttributeList), FTy(CI.FTy) { setTailCallKind(CI.getTailCallKind()); setCallingConv(CI.getCallingConv()); - + std::copy(CI.op_begin(), CI.op_end(), op_begin()); + std::copy(CI.bundle_op_info_begin(), CI.bundle_op_info_end(), + bundle_op_info_begin()); SubclassOptionalData = CI.SubclassOptionalData; } +CallInst *CallInst::Create(CallInst *CI, ArrayRef OpB, + Instruction *InsertPt) { + std::vector Args(CI->arg_begin(), CI->arg_end()); + + auto *NewCI = CallInst::Create(CI->getCalledValue(), Args, OpB, CI->getName(), + InsertPt); + NewCI->setTailCallKind(CI->getTailCallKind()); + NewCI->setCallingConv(CI->getCallingConv()); + NewCI->SubclassOptionalData = CI->SubclassOptionalData; + NewCI->setAttributes(CI->getAttributes()); + return NewCI; +} + void CallInst::addAttribute(unsigned i, Attribute::AttrKind attr) { AttributeSet PAL = getAttributes(); PAL = PAL.addAttribute(getContext(), i, attr); @@ -320,6 +346,8 @@ void CallInst::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) { } bool CallInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const { + assert(i < (getNumArgOperands() + 1) && "Param index out of bounds!"); + if (AttributeList.hasAttribute(i, A)) return true; if (const Function *F = getCalledFunction()) @@ -327,6 +355,25 @@ bool CallInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const { return false; } +bool CallInst::dataOperandHasImpliedAttr(unsigned i, + Attribute::AttrKind A) const { + + // There are getNumOperands() - 1 data operands. The last operand is the + // callee. + assert(i < getNumOperands() && "Data operand index out of bounds!"); + + // The attribute A can either be directly specified, if the operand in + // question is a call argument; or be indirectly implied by the kind of its + // containing operand bundle, if the operand is a bundle operand. + + if (i < (getNumArgOperands() + 1)) + return paramHasAttr(i, A); + + assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) && + "Must be either a call argument or an operand bundle!"); + return bundleOperandHasAttr(i - 1, A); +} + /// IsConstantOne - Return true only if val is constant int 1 static bool IsConstantOne(Value *val) { assert(val && "IsConstantOne does not work with nullptr val"); @@ -496,10 +543,12 @@ Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) { void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef Args, + ArrayRef Bundles, const Twine &NameStr) { this->FTy = FTy; - assert(getNumOperands() == 3 + Args.size() && "NumOperands not set up?"); + assert(getNumOperands() == 3 + Args.size() + CountBundleInputs(Bundles) && + "NumOperands not set up?"); Op<-3>() = Fn; Op<-2>() = IfNormal; Op<-1>() = IfException; @@ -516,6 +565,11 @@ void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal, #endif std::copy(Args.begin(), Args.end(), op_begin()); + + auto It = populateBundleOperandInfos(Bundles, Args.size()); + (void)It; + assert(It + 3 == op_end() && "Should add up!"); + setName(NameStr); } @@ -527,9 +581,24 @@ InvokeInst::InvokeInst(const InvokeInst &II) AttributeList(II.AttributeList), FTy(II.FTy) { setCallingConv(II.getCallingConv()); std::copy(II.op_begin(), II.op_end(), op_begin()); + std::copy(II.bundle_op_info_begin(), II.bundle_op_info_end(), + bundle_op_info_begin()); SubclassOptionalData = II.SubclassOptionalData; } +InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef OpB, + Instruction *InsertPt) { + std::vector Args(II->arg_begin(), II->arg_end()); + + auto *NewII = InvokeInst::Create(II->getCalledValue(), II->getNormalDest(), + II->getUnwindDest(), Args, OpB, + II->getName(), InsertPt); + NewII->setCallingConv(II->getCallingConv()); + NewII->SubclassOptionalData = II->SubclassOptionalData; + NewII->setAttributes(II->getAttributes()); + return NewII; +} + BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const { return getSuccessor(idx); } @@ -543,12 +612,20 @@ void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) { bool InvokeInst::hasFnAttrImpl(Attribute::AttrKind A) const { if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A)) return true; + + // Operand bundles override attributes on the called function, but don't + // override attributes directly present on the invoke instruction. + if (isFnAttrDisallowedByOpBundle(A)) + return false; + if (const Function *F = getCalledFunction()) return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A); return false; } bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const { + assert(i < (getNumArgOperands() + 1) && "Param index out of bounds!"); + if (AttributeList.hasAttribute(i, A)) return true; if (const Function *F = getCalledFunction()) @@ -556,6 +633,24 @@ bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const { return false; } +bool InvokeInst::dataOperandHasImpliedAttr(unsigned i, + Attribute::AttrKind A) const { + // There are getNumOperands() - 3 data operands. The last three operands are + // the callee and the two successor basic blocks. + assert(i < (getNumOperands() - 2) && "Data operand index out of bounds!"); + + // The attribute A can either be directly specified, if the operand in + // question is an invoke argument; or be indirectly implied by the kind of its + // containing operand bundle, if the operand is a bundle operand. + + if (i < (getNumArgOperands() + 1)) + return paramHasAttr(i, A); + + assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) && + "Must be either an invoke argument or an operand bundle!"); + return bundleOperandHasAttr(i - 1, A); +} + void InvokeInst::addAttribute(unsigned i, Attribute::AttrKind attr) { AttributeSet PAL = getAttributes(); PAL = PAL.addAttribute(getContext(), i, attr); @@ -670,6 +765,223 @@ BasicBlock *ResumeInst::getSuccessorV(unsigned idx) const { llvm_unreachable("ResumeInst has no successors!"); } +//===----------------------------------------------------------------------===// +// CleanupReturnInst Implementation +//===----------------------------------------------------------------------===// + +CleanupReturnInst::CleanupReturnInst(const CleanupReturnInst &CRI) + : TerminatorInst(CRI.getType(), Instruction::CleanupRet, + OperandTraits::op_end(this) - + CRI.getNumOperands(), + CRI.getNumOperands()) { + setInstructionSubclassData(CRI.getSubclassDataFromInstruction()); + Op<0>() = CRI.Op<0>(); + if (CRI.hasUnwindDest()) + Op<1>() = CRI.Op<1>(); +} + +void CleanupReturnInst::init(Value *CleanupPad, BasicBlock *UnwindBB) { + if (UnwindBB) + setInstructionSubclassData(getSubclassDataFromInstruction() | 1); + + Op<0>() = CleanupPad; + if (UnwindBB) + Op<1>() = UnwindBB; +} + +CleanupReturnInst::CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, + unsigned Values, Instruction *InsertBefore) + : TerminatorInst(Type::getVoidTy(CleanupPad->getContext()), + Instruction::CleanupRet, + OperandTraits::op_end(this) - Values, + Values, InsertBefore) { + init(CleanupPad, UnwindBB); +} + +CleanupReturnInst::CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, + unsigned Values, BasicBlock *InsertAtEnd) + : TerminatorInst(Type::getVoidTy(CleanupPad->getContext()), + Instruction::CleanupRet, + OperandTraits::op_end(this) - Values, + Values, InsertAtEnd) { + init(CleanupPad, UnwindBB); +} + +BasicBlock *CleanupReturnInst::getSuccessorV(unsigned Idx) const { + assert(Idx == 0); + return getUnwindDest(); +} +unsigned CleanupReturnInst::getNumSuccessorsV() const { + return getNumSuccessors(); +} +void CleanupReturnInst::setSuccessorV(unsigned Idx, BasicBlock *B) { + assert(Idx == 0); + setUnwindDest(B); +} + +//===----------------------------------------------------------------------===// +// CatchReturnInst Implementation +//===----------------------------------------------------------------------===// +void CatchReturnInst::init(Value *CatchPad, BasicBlock *BB) { + Op<0>() = CatchPad; + Op<1>() = BB; +} + +CatchReturnInst::CatchReturnInst(const CatchReturnInst &CRI) + : TerminatorInst(Type::getVoidTy(CRI.getContext()), Instruction::CatchRet, + OperandTraits::op_begin(this), 2) { + Op<0>() = CRI.Op<0>(); + Op<1>() = CRI.Op<1>(); +} + +CatchReturnInst::CatchReturnInst(Value *CatchPad, BasicBlock *BB, + Instruction *InsertBefore) + : TerminatorInst(Type::getVoidTy(BB->getContext()), Instruction::CatchRet, + OperandTraits::op_begin(this), 2, + InsertBefore) { + init(CatchPad, BB); +} + +CatchReturnInst::CatchReturnInst(Value *CatchPad, BasicBlock *BB, + BasicBlock *InsertAtEnd) + : TerminatorInst(Type::getVoidTy(BB->getContext()), Instruction::CatchRet, + OperandTraits::op_begin(this), 2, + InsertAtEnd) { + init(CatchPad, BB); +} + +BasicBlock *CatchReturnInst::getSuccessorV(unsigned Idx) const { + assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!"); + return getSuccessor(); +} +unsigned CatchReturnInst::getNumSuccessorsV() const { + return getNumSuccessors(); +} +void CatchReturnInst::setSuccessorV(unsigned Idx, BasicBlock *B) { + assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!"); + setSuccessor(B); +} + +//===----------------------------------------------------------------------===// +// CatchSwitchInst Implementation +//===----------------------------------------------------------------------===// + +CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest, + unsigned NumReservedValues, + const Twine &NameStr, + Instruction *InsertBefore) + : TerminatorInst(ParentPad->getType(), Instruction::CatchSwitch, nullptr, 0, + InsertBefore) { + if (UnwindDest) + ++NumReservedValues; + init(ParentPad, UnwindDest, NumReservedValues + 1); + setName(NameStr); +} + +CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest, + unsigned NumReservedValues, + const Twine &NameStr, BasicBlock *InsertAtEnd) + : TerminatorInst(ParentPad->getType(), Instruction::CatchSwitch, nullptr, 0, + InsertAtEnd) { + if (UnwindDest) + ++NumReservedValues; + init(ParentPad, UnwindDest, NumReservedValues + 1); + setName(NameStr); +} + +CatchSwitchInst::CatchSwitchInst(const CatchSwitchInst &CSI) + : TerminatorInst(CSI.getType(), Instruction::CatchSwitch, nullptr, + CSI.getNumOperands()) { + init(CSI.getParentPad(), CSI.getUnwindDest(), CSI.getNumOperands()); + setNumHungOffUseOperands(ReservedSpace); + Use *OL = getOperandList(); + const Use *InOL = CSI.getOperandList(); + for (unsigned I = 1, E = ReservedSpace; I != E; ++I) + OL[I] = InOL[I]; +} + +void CatchSwitchInst::init(Value *ParentPad, BasicBlock *UnwindDest, + unsigned NumReservedValues) { + assert(ParentPad && NumReservedValues); + + ReservedSpace = NumReservedValues; + setNumHungOffUseOperands(UnwindDest ? 2 : 1); + allocHungoffUses(ReservedSpace); + + Op<0>() = ParentPad; + if (UnwindDest) { + setInstructionSubclassData(getSubclassDataFromInstruction() | 1); + setUnwindDest(UnwindDest); + } +} + +/// growOperands - grow operands - This grows the operand list in response to a +/// push_back style of operation. This grows the number of ops by 2 times. +void CatchSwitchInst::growOperands(unsigned Size) { + unsigned NumOperands = getNumOperands(); + assert(NumOperands >= 1); + if (ReservedSpace >= NumOperands + Size) + return; + ReservedSpace = (NumOperands + Size / 2) * 2; + growHungoffUses(ReservedSpace); +} + +void CatchSwitchInst::addHandler(BasicBlock *Handler) { + unsigned OpNo = getNumOperands(); + growOperands(1); + assert(OpNo < ReservedSpace && "Growing didn't work!"); + setNumHungOffUseOperands(getNumOperands() + 1); + getOperandList()[OpNo] = Handler; +} + +BasicBlock *CatchSwitchInst::getSuccessorV(unsigned idx) const { + return getSuccessor(idx); +} +unsigned CatchSwitchInst::getNumSuccessorsV() const { + return getNumSuccessors(); +} +void CatchSwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) { + setSuccessor(idx, B); +} + +//===----------------------------------------------------------------------===// +// FuncletPadInst Implementation +//===----------------------------------------------------------------------===// +void FuncletPadInst::init(Value *ParentPad, ArrayRef Args, + const Twine &NameStr) { + assert(getNumOperands() == 1 + Args.size() && "NumOperands not set up?"); + std::copy(Args.begin(), Args.end(), op_begin()); + setParentPad(ParentPad); + setName(NameStr); +} + +FuncletPadInst::FuncletPadInst(const FuncletPadInst &FPI) + : Instruction(FPI.getType(), FPI.getOpcode(), + OperandTraits::op_end(this) - + FPI.getNumOperands(), + FPI.getNumOperands()) { + std::copy(FPI.op_begin(), FPI.op_end(), op_begin()); + setParentPad(FPI.getParentPad()); +} + +FuncletPadInst::FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad, + ArrayRef Args, unsigned Values, + const Twine &NameStr, Instruction *InsertBefore) + : Instruction(ParentPad->getType(), Op, + OperandTraits::op_end(this) - Values, Values, + InsertBefore) { + init(ParentPad, Args, NameStr); +} + +FuncletPadInst::FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad, + ArrayRef Args, unsigned Values, + const Twine &NameStr, BasicBlock *InsertAtEnd) + : Instruction(ParentPad->getType(), Op, + OperandTraits::op_end(this) - Values, Values, + InsertAtEnd) { + init(ParentPad, Args, NameStr); +} + //===----------------------------------------------------------------------===// // UnreachableInst Implementation //===----------------------------------------------------------------------===// @@ -1193,6 +1505,8 @@ FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, // GetElementPtrInst Implementation //===----------------------------------------------------------------------===// +void GetElementPtrInst::anchor() {} + void GetElementPtrInst::init(Value *Ptr, ArrayRef IdxList, const Twine &Name) { assert(getNumOperands() == 1 + IdxList.size() && @@ -2029,7 +2343,7 @@ bool CastInst::isNoopCast(const DataLayout &DL) const { /// * %S = secondOpcode MidTy %F to DstTy /// The function returns a resultOpcode so these two casts can be replaced with: /// * %Replacement = resultOpcode %SrcTy %x to DstTy -/// If no such cast is permited, the function returns 0. +/// If no such cast is permitted, the function returns 0. unsigned CastInst::isEliminableCastPair( Instruction::CastOps firstOp, Instruction::CastOps secondOp, Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy, @@ -2037,7 +2351,7 @@ unsigned CastInst::isEliminableCastPair( // Define the 144 possibilities for these two cast instructions. The values // in this matrix determine what to do in a given situation and select the // case in the switch below. The rows correspond to firstOp, the columns - // correspond to secondOp. In looking at the table below, keep in mind + // correspond to secondOp. In looking at the table below, keep in mind // the following cast properties: // // Size Compare Source Destination @@ -2087,17 +2401,19 @@ unsigned CastInst::isEliminableCastPair( { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,13,12}, // AddrSpaceCast -+ }; + // TODO: This logic could be encoded into the table above and handled in the + // switch below. // If either of the casts are a bitcast from scalar to vector, disallow the - // merging. However, bitcast of A->B->A are allowed. - bool isFirstBitcast = (firstOp == Instruction::BitCast); - bool isSecondBitcast = (secondOp == Instruction::BitCast); - bool chainedBitcast = (SrcTy == DstTy && isFirstBitcast && isSecondBitcast); + // merging. However, any pair of bitcasts are allowed. + bool IsFirstBitcast = (firstOp == Instruction::BitCast); + bool IsSecondBitcast = (secondOp == Instruction::BitCast); + bool AreBothBitcasts = IsFirstBitcast && IsSecondBitcast; - // Check if any of the bitcasts convert scalars<->vectors. - if ((isFirstBitcast && isa(SrcTy) != isa(MidTy)) || - (isSecondBitcast && isa(MidTy) != isa(DstTy))) - // Unless we are bitcasing to the original type, disallow optimizations. - if (!chainedBitcast) return 0; + // Check if any of the casts convert scalars <-> vectors. + if ((IsFirstBitcast && isa(SrcTy) != isa(MidTy)) || + (IsSecondBitcast && isa(MidTy) != isa(DstTy))) + if (!AreBothBitcasts) + return 0; int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin] [secondOp-Instruction::CastOpsBegin]; @@ -2966,9 +3282,8 @@ AddrSpaceCastInst::AddrSpaceCastInst( void CmpInst::anchor() {} -CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate, - Value *LHS, Value *RHS, const Twine &Name, - Instruction *InsertBefore) +CmpInst::CmpInst(Type *ty, OtherOps op, Predicate predicate, Value *LHS, + Value *RHS, const Twine &Name, Instruction *InsertBefore) : Instruction(ty, op, OperandTraits::op_begin(this), OperandTraits::operands(this), @@ -2979,9 +3294,8 @@ CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate, setName(Name); } -CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate, - Value *LHS, Value *RHS, const Twine &Name, - BasicBlock *InsertAtEnd) +CmpInst::CmpInst(Type *ty, OtherOps op, Predicate predicate, Value *LHS, + Value *RHS, const Twine &Name, BasicBlock *InsertAtEnd) : Instruction(ty, op, OperandTraits::op_begin(this), OperandTraits::operands(this), @@ -2993,8 +3307,7 @@ CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate, } CmpInst * -CmpInst::Create(OtherOps Op, unsigned short predicate, - Value *S1, Value *S2, +CmpInst::Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore) { if (Op == Instruction::ICmp) { if (InsertBefore) @@ -3014,7 +3327,7 @@ CmpInst::Create(OtherOps Op, unsigned short predicate, } CmpInst * -CmpInst::Create(OtherOps Op, unsigned short predicate, Value *S1, Value *S2, +CmpInst::Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name, BasicBlock *InsertAtEnd) { if (Op == Instruction::ICmp) { return new ICmpInst(*InsertAtEnd, CmpInst::Predicate(predicate), @@ -3077,6 +3390,8 @@ CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) { } } +void ICmpInst::anchor() {} + ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) { switch (pred) { default: llvm_unreachable("Unknown icmp predicate!"); @@ -3196,7 +3511,24 @@ CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) { } } -bool CmpInst::isUnsigned(unsigned short predicate) { +CmpInst::Predicate CmpInst::getSignedPredicate(Predicate pred) { + assert(CmpInst::isUnsigned(pred) && "Call only with signed predicates!"); + + switch (pred) { + default: + llvm_unreachable("Unknown predicate!"); + case CmpInst::ICMP_ULT: + return CmpInst::ICMP_SLT; + case CmpInst::ICMP_ULE: + return CmpInst::ICMP_SLE; + case CmpInst::ICMP_UGT: + return CmpInst::ICMP_SGT; + case CmpInst::ICMP_UGE: + return CmpInst::ICMP_SGE; + } +} + +bool CmpInst::isUnsigned(Predicate predicate) { switch (predicate) { default: return false; case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_UGT: @@ -3204,7 +3536,7 @@ bool CmpInst::isUnsigned(unsigned short predicate) { } } -bool CmpInst::isSigned(unsigned short predicate) { +bool CmpInst::isSigned(Predicate predicate) { switch (predicate) { default: return false; case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT: @@ -3212,7 +3544,7 @@ bool CmpInst::isSigned(unsigned short predicate) { } } -bool CmpInst::isOrdered(unsigned short predicate) { +bool CmpInst::isOrdered(Predicate predicate) { switch (predicate) { default: return false; case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_OGT: @@ -3221,7 +3553,7 @@ bool CmpInst::isOrdered(unsigned short predicate) { } } -bool CmpInst::isUnordered(unsigned short predicate) { +bool CmpInst::isUnordered(Predicate predicate) { switch (predicate) { default: return false; case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_UNE: case FCmpInst::FCMP_UGT: @@ -3230,7 +3562,7 @@ bool CmpInst::isUnordered(unsigned short predicate) { } } -bool CmpInst::isTrueWhenEqual(unsigned short predicate) { +bool CmpInst::isTrueWhenEqual(Predicate predicate) { switch(predicate) { default: return false; case ICMP_EQ: case ICMP_UGE: case ICMP_ULE: case ICMP_SGE: case ICMP_SLE: @@ -3238,7 +3570,7 @@ bool CmpInst::isTrueWhenEqual(unsigned short predicate) { } } -bool CmpInst::isFalseWhenEqual(unsigned short predicate) { +bool CmpInst::isFalseWhenEqual(Predicate predicate) { switch(predicate) { case ICMP_NE: case ICMP_UGT: case ICMP_ULT: case ICMP_SGT: case ICMP_SLT: case FCMP_FALSE: case FCMP_ONE: case FCMP_OGT: case FCMP_OLT: return true; @@ -3569,6 +3901,10 @@ AddrSpaceCastInst *AddrSpaceCastInst::cloneImpl() const { } CallInst *CallInst::cloneImpl() const { + if (hasOperandBundles()) { + unsigned DescriptorBytes = getNumOperandBundles() * sizeof(BundleOpInfo); + return new(getNumOperands(), DescriptorBytes) CallInst(*this); + } return new(getNumOperands()) CallInst(*this); } @@ -3613,11 +3949,31 @@ IndirectBrInst *IndirectBrInst::cloneImpl() const { } InvokeInst *InvokeInst::cloneImpl() const { + if (hasOperandBundles()) { + unsigned DescriptorBytes = getNumOperandBundles() * sizeof(BundleOpInfo); + return new(getNumOperands(), DescriptorBytes) InvokeInst(*this); + } return new(getNumOperands()) InvokeInst(*this); } ResumeInst *ResumeInst::cloneImpl() const { return new (1) ResumeInst(*this); } +CleanupReturnInst *CleanupReturnInst::cloneImpl() const { + return new (getNumOperands()) CleanupReturnInst(*this); +} + +CatchReturnInst *CatchReturnInst::cloneImpl() const { + return new (getNumOperands()) CatchReturnInst(*this); +} + +CatchSwitchInst *CatchSwitchInst::cloneImpl() const { + return new CatchSwitchInst(*this); +} + +FuncletPadInst *FuncletPadInst::cloneImpl() const { + return new (getNumOperands()) FuncletPadInst(*this); +} + UnreachableInst *UnreachableInst::cloneImpl() const { LLVMContext &Context = getContext(); return new UnreachableInst(Context); diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index 6d799e4b9650..8848bcb71477 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -104,6 +104,39 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { assert(DereferenceableOrNullID == MD_dereferenceable_or_null && "dereferenceable_or_null kind id drifted"); (void)DereferenceableOrNullID; + + // Create the 'make.implicit' metadata kind. + unsigned MakeImplicitID = getMDKindID("make.implicit"); + assert(MakeImplicitID == MD_make_implicit && + "make.implicit kind id drifted"); + (void)MakeImplicitID; + + // Create the 'unpredictable' metadata kind. + unsigned UnpredictableID = getMDKindID("unpredictable"); + assert(UnpredictableID == MD_unpredictable && + "unpredictable kind id drifted"); + (void)UnpredictableID; + + // Create the 'invariant.group' metadata kind. + unsigned InvariantGroupId = getMDKindID("invariant.group"); + assert(InvariantGroupId == MD_invariant_group && + "invariant.group kind id drifted"); + (void)InvariantGroupId; + + // Create the 'align' metadata kind. + unsigned AlignID = getMDKindID("align"); + assert(AlignID == MD_align && "align kind id drifted"); + (void)AlignID; + + auto *DeoptEntry = pImpl->getOrInsertBundleTag("deopt"); + assert(DeoptEntry->second == LLVMContext::OB_deopt && + "deopt operand bundle id drifted!"); + (void)DeoptEntry; + + auto *FuncletEntry = pImpl->getOrInsertBundleTag("funclet"); + assert(FuncletEntry->second == LLVMContext::OB_funclet && + "funclet operand bundle id drifted!"); + (void)FuncletEntry; } LLVMContext::~LLVMContext() { delete pImpl; } @@ -193,6 +226,11 @@ static bool isDiagnosticEnabled(const DiagnosticInfo &DI) { if (!cast(DI).isEnabled()) return false; break; + case llvm::DK_OptimizationRemarkAnalysisFPCommute: + if (!cast(DI) + .isEnabled()) + return false; + break; default: break; } @@ -250,7 +288,7 @@ unsigned LLVMContext::getMDKindID(StringRef Name) const { .first->second; } -/// getHandlerNames - Populate client supplied smallvector using custome +/// getHandlerNames - Populate client-supplied smallvector using custom /// metadata name and ID. void LLVMContext::getMDKindNames(SmallVectorImpl &Names) const { Names.resize(pImpl->CustomMDKindNames.size()); @@ -258,3 +296,11 @@ void LLVMContext::getMDKindNames(SmallVectorImpl &Names) const { E = pImpl->CustomMDKindNames.end(); I != E; ++I) Names[I->second] = I->first(); } + +void LLVMContext::getOperandBundleTags(SmallVectorImpl &Tags) const { + pImpl->getOperandBundleTags(Tags); +} + +uint32_t LLVMContext::getOperandBundleTagID(StringRef Tag) const { + return pImpl->getOperandBundleTagID(Tag); +} diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp index 1e2080770fcd..5239b4f7d84a 100644 --- a/lib/IR/LLVMContextImpl.cpp +++ b/lib/IR/LLVMContextImpl.cpp @@ -27,6 +27,7 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C) FloatTy(C, Type::FloatTyID), DoubleTy(C, Type::DoubleTyID), MetadataTy(C, Type::MetadataTyID), + TokenTy(C, Type::TokenTyID), X86_FP80Ty(C, Type::X86_FP80TyID), FP128Ty(C, Type::FP128TyID), PPC_FP128Ty(C, Type::PPC_FP128TyID), @@ -78,7 +79,7 @@ LLVMContextImpl::~LLVMContextImpl() { // unnecessary RAUW when nodes are still unresolved. for (auto *I : DistinctMDNodes) I->dropAllReferences(); -#define HANDLE_MDNODE_LEAF(CLASS) \ +#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \ for (auto *I : CLASS##s) \ I->dropAllReferences(); #include "llvm/IR/Metadata.def" @@ -92,8 +93,8 @@ LLVMContextImpl::~LLVMContextImpl() { // Destroy MDNodes. for (MDNode *I : DistinctMDNodes) I->deleteAsSubclass(); -#define HANDLE_MDNODE_LEAF(CLASS) \ - for (CLASS *I : CLASS##s) \ +#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \ + for (CLASS * I : CLASS##s) \ delete I; #include "llvm/IR/Metadata.def" @@ -218,6 +219,23 @@ unsigned MDNodeOpsKey::calculateHash(ArrayRef Ops) { return hash_combine_range(Ops.begin(), Ops.end()); } +StringMapEntry *LLVMContextImpl::getOrInsertBundleTag(StringRef Tag) { + uint32_t NewIdx = BundleTagCache.size(); + return &*(BundleTagCache.insert(std::make_pair(Tag, NewIdx)).first); +} + +void LLVMContextImpl::getOperandBundleTags(SmallVectorImpl &Tags) const { + Tags.resize(BundleTagCache.size()); + for (const auto &T : BundleTagCache) + Tags[T.second] = T.first(); +} + +uint32_t LLVMContextImpl::getOperandBundleTagID(StringRef Tag) const { + auto I = BundleTagCache.find(Tag); + assert(I != BundleTagCache.end() && "Unknown tag!"); + return I->second; +} + // ConstantsContext anchors void UnaryConstantExpr::anchor() { } diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index cbbf11e334c4..a24114d0a0ae 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -458,67 +458,6 @@ template <> struct MDNodeKeyImpl { unsigned getHashValue() const { return hash_combine(Filename, Directory); } }; -template <> struct MDNodeKeyImpl { - unsigned SourceLanguage; - Metadata *File; - StringRef Producer; - bool IsOptimized; - StringRef Flags; - unsigned RuntimeVersion; - StringRef SplitDebugFilename; - unsigned EmissionKind; - Metadata *EnumTypes; - Metadata *RetainedTypes; - Metadata *Subprograms; - Metadata *GlobalVariables; - Metadata *ImportedEntities; - uint64_t DWOId; - - MDNodeKeyImpl(unsigned SourceLanguage, Metadata *File, StringRef Producer, - bool IsOptimized, StringRef Flags, unsigned RuntimeVersion, - StringRef SplitDebugFilename, unsigned EmissionKind, - Metadata *EnumTypes, Metadata *RetainedTypes, - Metadata *Subprograms, Metadata *GlobalVariables, - Metadata *ImportedEntities, uint64_t DWOId) - : SourceLanguage(SourceLanguage), File(File), Producer(Producer), - IsOptimized(IsOptimized), Flags(Flags), RuntimeVersion(RuntimeVersion), - SplitDebugFilename(SplitDebugFilename), EmissionKind(EmissionKind), - EnumTypes(EnumTypes), RetainedTypes(RetainedTypes), - Subprograms(Subprograms), GlobalVariables(GlobalVariables), - ImportedEntities(ImportedEntities), DWOId(DWOId) {} - MDNodeKeyImpl(const DICompileUnit *N) - : SourceLanguage(N->getSourceLanguage()), File(N->getRawFile()), - Producer(N->getProducer()), IsOptimized(N->isOptimized()), - Flags(N->getFlags()), RuntimeVersion(N->getRuntimeVersion()), - SplitDebugFilename(N->getSplitDebugFilename()), - EmissionKind(N->getEmissionKind()), EnumTypes(N->getRawEnumTypes()), - RetainedTypes(N->getRawRetainedTypes()), - Subprograms(N->getRawSubprograms()), - GlobalVariables(N->getRawGlobalVariables()), - ImportedEntities(N->getRawImportedEntities()), DWOId(N->getDWOId()) {} - - bool isKeyOf(const DICompileUnit *RHS) const { - return SourceLanguage == RHS->getSourceLanguage() && - File == RHS->getRawFile() && Producer == RHS->getProducer() && - IsOptimized == RHS->isOptimized() && Flags == RHS->getFlags() && - RuntimeVersion == RHS->getRuntimeVersion() && - SplitDebugFilename == RHS->getSplitDebugFilename() && - EmissionKind == RHS->getEmissionKind() && - EnumTypes == RHS->getRawEnumTypes() && - RetainedTypes == RHS->getRawRetainedTypes() && - Subprograms == RHS->getRawSubprograms() && - GlobalVariables == RHS->getRawGlobalVariables() && - ImportedEntities == RHS->getRawImportedEntities() && - DWOId == RHS->getDWOId(); - } - unsigned getHashValue() const { - return hash_combine(SourceLanguage, File, Producer, IsOptimized, Flags, - RuntimeVersion, SplitDebugFilename, EmissionKind, - EnumTypes, RetainedTypes, Subprograms, GlobalVariables, - ImportedEntities, DWOId); - } -}; - template <> struct MDNodeKeyImpl { Metadata *Scope; StringRef Name; @@ -534,7 +473,6 @@ template <> struct MDNodeKeyImpl { unsigned VirtualIndex; unsigned Flags; bool IsOptimized; - Metadata *Function; Metadata *TemplateParams; Metadata *Declaration; Metadata *Variables; @@ -544,15 +482,15 @@ template <> struct MDNodeKeyImpl { bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine, Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex, unsigned Flags, bool IsOptimized, - Metadata *Function, Metadata *TemplateParams, - Metadata *Declaration, Metadata *Variables) + Metadata *TemplateParams, Metadata *Declaration, + Metadata *Variables) : Scope(Scope), Name(Name), LinkageName(LinkageName), File(File), Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition), ScopeLine(ScopeLine), ContainingType(ContainingType), Virtuality(Virtuality), VirtualIndex(VirtualIndex), Flags(Flags), IsOptimized(IsOptimized), - Function(Function), TemplateParams(TemplateParams), - Declaration(Declaration), Variables(Variables) {} + TemplateParams(TemplateParams), Declaration(Declaration), + Variables(Variables) {} MDNodeKeyImpl(const DISubprogram *N) : Scope(N->getRawScope()), Name(N->getName()), LinkageName(N->getLinkageName()), File(N->getRawFile()), @@ -561,7 +499,6 @@ template <> struct MDNodeKeyImpl { ScopeLine(N->getScopeLine()), ContainingType(N->getRawContainingType()), Virtuality(N->getVirtuality()), VirtualIndex(N->getVirtualIndex()), Flags(N->getFlags()), IsOptimized(N->isOptimized()), - Function(N->getRawFunction()), TemplateParams(N->getRawTemplateParams()), Declaration(N->getRawDeclaration()), Variables(N->getRawVariables()) {} @@ -576,7 +513,6 @@ template <> struct MDNodeKeyImpl { Virtuality == RHS->getVirtuality() && VirtualIndex == RHS->getVirtualIndex() && Flags == RHS->getFlags() && IsOptimized == RHS->isOptimized() && - Function == RHS->getRawFunction() && TemplateParams == RHS->getRawTemplateParams() && Declaration == RHS->getRawDeclaration() && Variables == RHS->getRawVariables(); @@ -584,7 +520,7 @@ template <> struct MDNodeKeyImpl { unsigned getHashValue() const { return hash_combine(Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition, ScopeLine, ContainingType, - Virtuality, VirtualIndex, Flags, IsOptimized, Function, + Virtuality, VirtualIndex, Flags, IsOptimized, TemplateParams, Declaration, Variables); } }; @@ -759,7 +695,6 @@ template <> struct MDNodeKeyImpl { }; template <> struct MDNodeKeyImpl { - unsigned Tag; Metadata *Scope; StringRef Name; Metadata *File; @@ -768,23 +703,23 @@ template <> struct MDNodeKeyImpl { unsigned Arg; unsigned Flags; - MDNodeKeyImpl(unsigned Tag, Metadata *Scope, StringRef Name, Metadata *File, - unsigned Line, Metadata *Type, unsigned Arg, unsigned Flags) - : Tag(Tag), Scope(Scope), Name(Name), File(File), Line(Line), Type(Type), - Arg(Arg), Flags(Flags) {} + MDNodeKeyImpl(Metadata *Scope, StringRef Name, Metadata *File, unsigned Line, + Metadata *Type, unsigned Arg, unsigned Flags) + : Scope(Scope), Name(Name), File(File), Line(Line), Type(Type), Arg(Arg), + Flags(Flags) {} MDNodeKeyImpl(const DILocalVariable *N) - : Tag(N->getTag()), Scope(N->getRawScope()), Name(N->getName()), - File(N->getRawFile()), Line(N->getLine()), Type(N->getRawType()), - Arg(N->getArg()), Flags(N->getFlags()) {} + : Scope(N->getRawScope()), Name(N->getName()), File(N->getRawFile()), + Line(N->getLine()), Type(N->getRawType()), Arg(N->getArg()), + Flags(N->getFlags()) {} bool isKeyOf(const DILocalVariable *RHS) const { - return Tag == RHS->getTag() && Scope == RHS->getRawScope() && - Name == RHS->getName() && File == RHS->getRawFile() && - Line == RHS->getLine() && Type == RHS->getRawType() && - Arg == RHS->getArg() && Flags == RHS->getFlags(); + return Scope == RHS->getRawScope() && Name == RHS->getName() && + File == RHS->getRawFile() && Line == RHS->getLine() && + Type == RHS->getRawType() && Arg == RHS->getArg() && + Flags == RHS->getFlags(); } unsigned getHashValue() const { - return hash_combine(Tag, Scope, Name, File, Line, Type, Arg, Flags); + return hash_combine(Scope, Name, File, Line, Type, Arg, Flags); } }; @@ -857,6 +792,49 @@ template <> struct MDNodeKeyImpl { } }; +template <> struct MDNodeKeyImpl { + unsigned MIType; + unsigned Line; + StringRef Name; + StringRef Value; + + MDNodeKeyImpl(unsigned MIType, unsigned Line, StringRef Name, StringRef Value) + : MIType(MIType), Line(Line), Name(Name), Value(Value) {} + MDNodeKeyImpl(const DIMacro *N) + : MIType(N->getMacinfoType()), Line(N->getLine()), Name(N->getName()), + Value(N->getValue()) {} + + bool isKeyOf(const DIMacro *RHS) const { + return MIType == RHS->getMacinfoType() && Line == RHS->getLine() && + Name == RHS->getName() && Value == RHS->getValue(); + } + unsigned getHashValue() const { + return hash_combine(MIType, Line, Name, Value); + } +}; + +template <> struct MDNodeKeyImpl { + unsigned MIType; + unsigned Line; + Metadata *File; + Metadata *Elements; + + MDNodeKeyImpl(unsigned MIType, unsigned Line, Metadata *File, + Metadata *Elements) + : MIType(MIType), Line(Line), File(File), Elements(Elements) {} + MDNodeKeyImpl(const DIMacroFile *N) + : MIType(N->getMacinfoType()), Line(N->getLine()), File(N->getRawFile()), + Elements(N->getRawElements()) {} + + bool isKeyOf(const DIMacroFile *RHS) const { + return MIType == RHS->getMacinfoType() && Line == RHS->getLine() && + File == RHS->getRawFile() && File == RHS->getRawElements(); + } + unsigned getHashValue() const { + return hash_combine(MIType, Line, File, Elements); + } +}; + /// \brief DenseMapInfo for MDNode subclasses. template struct MDNodeInfo { typedef MDNodeKeyImpl KeyTy; @@ -953,7 +931,8 @@ public: DenseMap ValueNames; -#define HANDLE_MDNODE_LEAF(CLASS) DenseSet CLASS##s; +#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \ + DenseSet CLASS##s; #include "llvm/IR/Metadata.def" // MDNodes may be uniqued or not uniqued. When they're not uniqued, they @@ -988,8 +967,10 @@ public: ConstantInt *TheTrueVal; ConstantInt *TheFalseVal; + std::unique_ptr TheNoneToken; + // Basic type instances. - Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy; + Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy, TokenTy; Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy; IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty; @@ -1033,20 +1014,19 @@ public: /// instructions in different blocks at the same location. DenseMap, unsigned> DiscriminatorTable; - /// \brief Mapping from a function to its prefix data, which is stored as the - /// operand of an unparented ReturnInst so that the prefix data has a Use. - typedef DenseMap PrefixDataMapTy; - PrefixDataMapTy PrefixDataMap; - - /// \brief Mapping from a function to its prologue data, which is stored as - /// the operand of an unparented ReturnInst so that the prologue data has a - /// Use. - typedef DenseMap PrologueDataMapTy; - PrologueDataMapTy PrologueDataMap; - int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx); int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx); + /// \brief A set of interned tags for operand bundles. The StringMap maps + /// bundle tags to their IDs. + /// + /// \see LLVMContext::getOperandBundleTagID + StringMap BundleTagCache; + + StringMapEntry *getOrInsertBundleTag(StringRef Tag); + void getOperandBundleTags(SmallVectorImpl &Tags) const; + uint32_t getOperandBundleTagID(StringRef Tag) const; + LLVMContextImpl(LLVMContext &C); ~LLVMContextImpl(); diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index 27d98a279fe2..f2e0c7d32c02 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -569,13 +569,33 @@ void PMTopLevelManager::collectLastUses(SmallVectorImpl &LastUses, AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) { AnalysisUsage *AnUsage = nullptr; - DenseMap::iterator DMI = AnUsageMap.find(P); + auto DMI = AnUsageMap.find(P); if (DMI != AnUsageMap.end()) AnUsage = DMI->second; else { - AnUsage = new AnalysisUsage(); - P->getAnalysisUsage(*AnUsage); - AnUsageMap[P] = AnUsage; + // Look up the analysis usage from the pass instance (different instances + // of the same pass can produce different results), but unique the + // resulting object to reduce memory usage. This helps to greatly reduce + // memory usage when we have many instances of only a few pass types + // (e.g. instcombine, simplifycfg, etc...) which tend to share a fixed set + // of dependencies. + AnalysisUsage AU; + P->getAnalysisUsage(AU); + + AUFoldingSetNode* Node = nullptr; + FoldingSetNodeID ID; + AUFoldingSetNode::Profile(ID, AU); + void *IP = nullptr; + if (auto *N = UniqueAnalysisUsages.FindNodeOrInsertPos(ID, IP)) + Node = N; + else { + Node = new (AUFoldingSetNodeAllocator.Allocate()) AUFoldingSetNode(AU); + UniqueAnalysisUsages.InsertNode(Node, IP); + } + assert(Node && "cached analysis usage must be non null"); + + AnUsageMap[P] = &Node->AU; + AnUsage = &Node->AU;; } return AnUsage; } @@ -686,6 +706,10 @@ void PMTopLevelManager::schedulePass(Pass *P) { /// passes and all pass managers. If desired pass is not found /// then return NULL. Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) { + // For immutable passes we have a direct mapping from ID to pass, so check + // that first. + if (Pass *P = ImmutablePassMap.lookup(AID)) + return P; // Check pass managers for (PMDataManager *PassManager : PassManagers) @@ -697,24 +721,6 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) { if (Pass *P = IndirectPassManager->findAnalysisPass(AID, false)) return P; - // Check the immutable passes. Iterate in reverse order so that we find - // the most recently registered passes first. - for (auto I = ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E; - ++I) { - AnalysisID PI = (*I)->getPassID(); - if (PI == AID) - return *I; - - // If Pass not found then check the interfaces implemented by Immutable Pass - const PassInfo *PassInf = findAnalysisPassInfo(PI); - assert(PassInf && "Expected all immutable passes to be initialized"); - const std::vector &ImmPI = - PassInf->getInterfacesImplemented(); - for (const PassInfo *PI : ImmPI) - if (PI->getTypeInfo() == AID) - return *I; - } - return nullptr; } @@ -729,6 +735,24 @@ const PassInfo *PMTopLevelManager::findAnalysisPassInfo(AnalysisID AID) const { return PI; } +void PMTopLevelManager::addImmutablePass(ImmutablePass *P) { + P->initializePass(); + ImmutablePasses.push_back(P); + + // Add this pass to the map from its analysis ID. We clobber any prior runs + // of the pass in the map so that the last one added is the one found when + // doing lookups. + AnalysisID AID = P->getPassID(); + ImmutablePassMap[AID] = P; + + // Also add any interfaces implemented by the immutable pass to the map for + // fast lookup. + const PassInfo *PassInf = findAnalysisPassInfo(AID); + assert(PassInf && "Expected all immutable passes to be initialized"); + for (const PassInfo *ImmPI : PassInf->getInterfacesImplemented()) + ImmutablePassMap[ImmPI->getTypeInfo()] = P; +} + // Print passes managed by this top level manager. void PMTopLevelManager::dumpPasses() const { @@ -780,15 +804,8 @@ void PMTopLevelManager::initializeAllAnalysisInfo() { for (DenseMap::iterator DMI = LastUser.begin(), DME = LastUser.end(); DMI != DME; ++DMI) { - DenseMap >::iterator InvDMI = - InversedLastUser.find(DMI->second); - if (InvDMI != InversedLastUser.end()) { - SmallPtrSet &L = InvDMI->second; - L.insert(DMI->first); - } else { - SmallPtrSet L; L.insert(DMI->first); - InversedLastUser[DMI->second] = L; - } + SmallPtrSet &L = InversedLastUser[DMI->second]; + L.insert(DMI->first); } } @@ -801,10 +818,6 @@ PMTopLevelManager::~PMTopLevelManager() { for (SmallVectorImpl::iterator I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I) delete *I; - - for (DenseMap::iterator DMI = AnUsageMap.begin(), - DME = AnUsageMap.end(); DMI != DME; ++DMI) - delete DMI->second; } //===----------------------------------------------------------------------===// @@ -989,31 +1002,28 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) { // At the moment, this pass is the last user of all required passes. SmallVector LastUses; - SmallVector RequiredPasses; + SmallVector UsedPasses; SmallVector ReqAnalysisNotAvailable; unsigned PDepth = this->getDepth(); - collectRequiredAnalysis(RequiredPasses, - ReqAnalysisNotAvailable, P); - for (SmallVectorImpl::iterator I = RequiredPasses.begin(), - E = RequiredPasses.end(); I != E; ++I) { - Pass *PRequired = *I; + collectRequiredAndUsedAnalyses(UsedPasses, ReqAnalysisNotAvailable, P); + for (Pass *PUsed : UsedPasses) { unsigned RDepth = 0; - assert(PRequired->getResolver() && "Analysis Resolver is not set"); - PMDataManager &DM = PRequired->getResolver()->getPMDataManager(); + assert(PUsed->getResolver() && "Analysis Resolver is not set"); + PMDataManager &DM = PUsed->getResolver()->getPMDataManager(); RDepth = DM.getDepth(); if (PDepth == RDepth) - LastUses.push_back(PRequired); + LastUses.push_back(PUsed); else if (PDepth > RDepth) { // Let the parent claim responsibility of last use - TransferLastUses.push_back(PRequired); + TransferLastUses.push_back(PUsed); // Keep track of higher level analysis used by this manager. - HigherLevelAnalysis.push_back(PRequired); + HigherLevelAnalysis.push_back(PUsed); } else - llvm_unreachable("Unable to accommodate Required Pass"); + llvm_unreachable("Unable to accommodate Used Pass"); } // Set P as P's last user until someone starts using P. @@ -1030,10 +1040,8 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) { } // Now, take care of required analyses that are not available. - for (SmallVectorImpl::iterator - I = ReqAnalysisNotAvailable.begin(), - E = ReqAnalysisNotAvailable.end() ;I != E; ++I) { - const PassInfo *PI = TPM->findAnalysisPassInfo(*I); + for (AnalysisID ID : ReqAnalysisNotAvailable) { + const PassInfo *PI = TPM->findAnalysisPassInfo(ID); Pass *AnalysisPass = PI->createPass(); this->addLowerLevelRequiredPass(P, AnalysisPass); } @@ -1048,30 +1056,29 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) { } -/// Populate RP with analysis pass that are required by +/// Populate UP with analysis pass that are used or required by /// pass P and are available. Populate RP_NotAvail with analysis /// pass that are required by pass P but are not available. -void PMDataManager::collectRequiredAnalysis(SmallVectorImpl &RP, - SmallVectorImpl &RP_NotAvail, - Pass *P) { +void PMDataManager::collectRequiredAndUsedAnalyses( + SmallVectorImpl &UP, SmallVectorImpl &RP_NotAvail, + Pass *P) { AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P); - const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet(); - for (AnalysisUsage::VectorType::const_iterator - I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) { - if (Pass *AnalysisPass = findAnalysisPass(*I, true)) - RP.push_back(AnalysisPass); - else - RP_NotAvail.push_back(*I); - } - const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet(); - for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(), - E = IDs.end(); I != E; ++I) { - if (Pass *AnalysisPass = findAnalysisPass(*I, true)) - RP.push_back(AnalysisPass); + for (const auto &UsedID : AnUsage->getUsedSet()) + if (Pass *AnalysisPass = findAnalysisPass(UsedID, true)) + UP.push_back(AnalysisPass); + + for (const auto &RequiredID : AnUsage->getRequiredSet()) + if (Pass *AnalysisPass = findAnalysisPass(RequiredID, true)) + UP.push_back(AnalysisPass); else - RP_NotAvail.push_back(*I); - } + RP_NotAvail.push_back(RequiredID); + + for (const auto &RequiredID : AnUsage->getRequiredTransitiveSet()) + if (Pass *AnalysisPass = findAnalysisPass(RequiredID, true)) + UP.push_back(AnalysisPass); + else + RP_NotAvail.push_back(RequiredID); } // All Required analyses should be available to the pass as it runs! Here @@ -1206,6 +1213,15 @@ void PMDataManager::dumpPreservedSet(const Pass *P) const { dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet()); } +void PMDataManager::dumpUsedSet(const Pass *P) const { + if (PassDebugging < Details) + return; + + AnalysisUsage analysisUsage; + P->getAnalysisUsage(analysisUsage); + dumpAnalysisUsage("Used", P, analysisUsage.getUsedSet()); +} + void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P, const AnalysisUsage::VectorType &Set) const { assert(PassDebugging >= Details); @@ -1310,6 +1326,7 @@ bool BBPassManager::runOnFunction(Function &F) { dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG, I->getName()); dumpPreservedSet(BP); + dumpUsedSet(BP); verifyPreservedAnalysis(BP); removeNotPreservedAnalysis(BP); @@ -1524,6 +1541,7 @@ bool FPPassManager::runOnFunction(Function &F) { if (LocalChanged) dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName()); dumpPreservedSet(FP); + dumpUsedSet(FP); verifyPreservedAnalysis(FP); removeNotPreservedAnalysis(FP); @@ -1601,6 +1619,7 @@ MPPassManager::runOnModule(Module &M) { dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG, M.getModuleIdentifier()); dumpPreservedSet(MP); + dumpUsedSet(MP); verifyPreservedAnalysis(MP); removeNotPreservedAnalysis(MP); diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp index b4c5ca7c6a12..4ce3ea2e9c04 100644 --- a/lib/IR/MDBuilder.cpp +++ b/lib/IR/MDBuilder.cpp @@ -36,8 +36,7 @@ MDNode *MDBuilder::createFPMath(float Accuracy) { MDNode *MDBuilder::createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight) { - uint32_t Weights[] = {TrueWeight, FalseWeight}; - return createBranchWeights(Weights); + return createBranchWeights({TrueWeight, FalseWeight}); } MDNode *MDBuilder::createBranchWeights(ArrayRef Weights) { @@ -53,14 +52,15 @@ MDNode *MDBuilder::createBranchWeights(ArrayRef Weights) { return MDNode::get(Context, Vals); } +MDNode *MDBuilder::createUnpredictable() { + return MDNode::get(Context, None); +} + MDNode *MDBuilder::createFunctionEntryCount(uint64_t Count) { - SmallVector Vals(2); - Vals[0] = createString("function_entry_count"); - Type *Int64Ty = Type::getInt64Ty(Context); - Vals[1] = createConstant(ConstantInt::get(Int64Ty, Count)); - - return MDNode::get(Context, Vals); + return MDNode::get(Context, + {createString("function_entry_count"), + createConstant(ConstantInt::get(Int64Ty, Count))}); } MDNode *MDBuilder::createRange(const APInt &Lo, const APInt &Hi) { @@ -76,8 +76,7 @@ MDNode *MDBuilder::createRange(Constant *Lo, Constant *Hi) { return nullptr; // Return the range [Lo, Hi). - Metadata *Range[2] = {createConstant(Lo), createConstant(Hi)}; - return MDNode::get(Context, Range); + return MDNode::get(Context, {createConstant(Lo), createConstant(Hi)}); } MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) { @@ -112,12 +111,10 @@ MDNode *MDBuilder::createTBAANode(StringRef Name, MDNode *Parent, bool isConstant) { if (isConstant) { Constant *Flags = ConstantInt::get(Type::getInt64Ty(Context), 1); - Metadata *Ops[3] = {createString(Name), Parent, createConstant(Flags)}; - return MDNode::get(Context, Ops); - } else { - Metadata *Ops[2] = {createString(Name), Parent}; - return MDNode::get(Context, Ops); + return MDNode::get(Context, + {createString(Name), Parent, createConstant(Flags)}); } + return MDNode::get(Context, {createString(Name), Parent}); } MDNode *MDBuilder::createAliasScopeDomain(StringRef Name) { @@ -125,8 +122,7 @@ MDNode *MDBuilder::createAliasScopeDomain(StringRef Name) { } MDNode *MDBuilder::createAliasScope(StringRef Name, MDNode *Domain) { - Metadata *Ops[2] = {createString(Name), Domain}; - return MDNode::get(Context, Ops); + return MDNode::get(Context, {createString(Name), Domain}); } /// \brief Return metadata for a tbaa.struct node with the given @@ -161,23 +157,19 @@ MDNode *MDBuilder::createTBAAStructTypeNode( MDNode *MDBuilder::createTBAAScalarTypeNode(StringRef Name, MDNode *Parent, uint64_t Offset) { ConstantInt *Off = ConstantInt::get(Type::getInt64Ty(Context), Offset); - Metadata *Ops[3] = {createString(Name), Parent, createConstant(Off)}; - return MDNode::get(Context, Ops); + return MDNode::get(Context, + {createString(Name), Parent, createConstant(Off)}); } /// \brief Return metadata for a TBAA tag node with the given /// base type, access type and offset relative to the base type. MDNode *MDBuilder::createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType, uint64_t Offset, bool IsConstant) { - Type *Int64 = Type::getInt64Ty(Context); + IntegerType *Int64 = Type::getInt64Ty(Context); + ConstantInt *Off = ConstantInt::get(Int64, Offset); if (IsConstant) { - Metadata *Ops[4] = {BaseType, AccessType, - createConstant(ConstantInt::get(Int64, Offset)), - createConstant(ConstantInt::get(Int64, 1))}; - return MDNode::get(Context, Ops); - } else { - Metadata *Ops[3] = {BaseType, AccessType, - createConstant(ConstantInt::get(Int64, Offset))}; - return MDNode::get(Context, Ops); + return MDNode::get(Context, {BaseType, AccessType, createConstant(Off), + createConstant(ConstantInt::get(Int64, 1))}); } + return MDNode::get(Context, {BaseType, AccessType, createConstant(Off)}); } diff --git a/lib/IR/Makefile b/lib/IR/Makefile index cc403f38dd8e..329cd6636e94 100644 --- a/lib/IR/Makefile +++ b/lib/IR/Makefile @@ -10,14 +10,20 @@ LEVEL = ../.. LIBRARYNAME = LLVMCore BUILD_ARCHIVE = 1 -BUILT_SOURCES = $(PROJ_OBJ_ROOT)/include/llvm/IR/Intrinsics.gen +BUILT_SOURCES = $(PROJ_OBJ_ROOT)/include/llvm/IR/Intrinsics.gen \ + $(PROJ_OBJ_ROOT)/include/llvm/IR/Attributes.inc \ + $(PROJ_OBJ_ROOT)/lib/IR/AttributesCompatFunc.inc include $(LEVEL)/Makefile.common GENFILE:=$(PROJ_OBJ_ROOT)/include/llvm/IR/Intrinsics.gen +ATTRINCFILE:=$(PROJ_OBJ_ROOT)/include/llvm/IR/Attributes.inc +ATTRCOMPATFUNCINCFILE:=$(PROJ_OBJ_ROOT)/lib/IR/AttributesCompatFunc.inc INTRINSICTD := $(PROJ_SRC_ROOT)/include/llvm/IR/Intrinsics.td INTRINSICTDS := $(wildcard $(PROJ_SRC_ROOT)/include/llvm/IR/Intrinsics*.td) +ATTRIBUTESTD := $(PROJ_SRC_ROOT)/include/llvm/IR/Attributes.td +ATTRCOMPATFUNCTD := $(PROJ_SRC_ROOT)/lib/IR/AttributesCompatFunc.td $(ObjDir)/Intrinsics.gen.tmp: $(ObjDir)/.dir $(INTRINSICTDS) $(LLVM_TBLGEN) $(Echo) Building Intrinsics.gen.tmp from Intrinsics.td @@ -28,6 +34,28 @@ $(GENFILE): $(ObjDir)/Intrinsics.gen.tmp $(PROJ_OBJ_ROOT)/include/llvm/IR/.dir $(EchoCmd) Updated Intrinsics.gen because Intrinsics.gen.tmp \ changed significantly. ) +$(ObjDir)/Attributes.inc.tmp: $(ObjDir)/.dir $(ATTRIBUTESTD) $(LLVM_TBLGEN) + $(Echo) Building Attributes.inc.tmp from $(ATTRIBUTESTD) + $(Verb) $(LLVMTableGen) $(call SYSPATH, $(ATTRIBUTESTD)) -o $(call SYSPATH, $@) -gen-attrs + +$(ATTRINCFILE): $(ObjDir)/Attributes.inc.tmp $(PROJ_OBJ_ROOT)/include/llvm/IR/.dir + $(Verb) $(CMP) -s $@ $< || ( $(CP) $< $@ && \ + $(EchoCmd) Updated Attributes.inc because Attributes.inc.tmp \ + changed significantly. ) + +$(ObjDir)/AttributesCompatFunc.inc.tmp: $(ObjDir)/.dir $(ATTRCOMPATFUNCTD) $(LLVM_TBLGEN) + $(Echo) Building AttributesCompatFunc.inc.tmp from $(ATTRCOMPATFUNCTD) + $(Verb) $(LLVMTableGen) $(call SYSPATH, $(ATTRCOMPATFUNCTD)) -o $(call SYSPATH, $@) -gen-attrs + +$(ATTRCOMPATFUNCINCFILE): $(ObjDir)/AttributesCompatFunc.inc.tmp $(PROJ_OBJ_ROOT)/include/llvm/IR/.dir + $(Verb) $(CMP) -s $@ $< || ( $(CP) $< $@ && \ + $(EchoCmd) Updated AttributesCompatFunc.inc because AttributesCompatFunc.inc.tmp \ + changed significantly. ) + install-local:: $(GENFILE) $(Echo) Installing $(DESTDIR)$(PROJ_includedir)/llvm/IR/Intrinsics.gen $(Verb) $(DataInstall) $(GENFILE) $(DESTDIR)$(PROJ_includedir)/llvm/IR/Intrinsics.gen + +install-local:: $(ATTRINCFILE) + $(Echo) Installing $(DESTDIR)$(PROJ_includedir)/llvm/IR/Attributes.inc + $(Verb) $(DataInstall) $(ATTRINCFILE) $(DESTDIR)$(PROJ_includedir)/llvm/IR/Attributes.inc diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp index 1abcf0d18c91..ab1ba5e2035b 100644 --- a/lib/IR/Metadata.cpp +++ b/lib/IR/Metadata.cpp @@ -120,6 +120,38 @@ void MetadataAsValue::untrack() { MetadataTracking::untrack(MD); } +bool MetadataTracking::track(void *Ref, Metadata &MD, OwnerTy Owner) { + assert(Ref && "Expected live reference"); + assert((Owner || *static_cast(Ref) == &MD) && + "Reference without owner must be direct"); + if (auto *R = ReplaceableMetadataImpl::get(MD)) { + R->addRef(Ref, Owner); + return true; + } + return false; +} + +void MetadataTracking::untrack(void *Ref, Metadata &MD) { + assert(Ref && "Expected live reference"); + if (auto *R = ReplaceableMetadataImpl::get(MD)) + R->dropRef(Ref); +} + +bool MetadataTracking::retrack(void *Ref, Metadata &MD, void *New) { + assert(Ref && "Expected live reference"); + assert(New && "Expected live reference"); + assert(Ref != New && "Expected change"); + if (auto *R = ReplaceableMetadataImpl::get(MD)) { + R->moveRef(Ref, New, MD); + return true; + } + return false; +} + +bool MetadataTracking::isReplaceable(const Metadata &MD) { + return ReplaceableMetadataImpl::get(const_cast(MD)); +} + void ReplaceableMetadataImpl::addRef(void *Ref, OwnerTy Owner) { bool WasInserted = UseMap.insert(std::make_pair(Ref, std::make_pair(Owner, NextIndex))) @@ -239,6 +271,12 @@ void ReplaceableMetadataImpl::resolveAllUses(bool ResolveUsers) { } } +ReplaceableMetadataImpl *ReplaceableMetadataImpl::get(Metadata &MD) { + if (auto *N = dyn_cast(&MD)) + return N->Context.getReplaceableUses(); + return dyn_cast(&MD); +} + static Function *getLocalFunction(Value *V) { assert(V && "Expected value"); if (auto *A = dyn_cast(V)) @@ -517,7 +555,7 @@ void MDNode::decrementUnresolvedOperandCount() { resolve(); } -void MDNode::resolveCycles() { +void MDNode::resolveCycles(bool MDMaterialized) { if (isResolved()) return; @@ -530,6 +568,8 @@ void MDNode::resolveCycles() { if (!N) continue; + if (N->isTemporary() && !MDMaterialized) + continue; assert(!N->isTemporary() && "Expected all forward declarations to be resolved"); if (!N->isResolved()) @@ -545,6 +585,18 @@ static bool hasSelfReference(MDNode *N) { } MDNode *MDNode::replaceWithPermanentImpl() { + switch (getMetadataID()) { + default: + // If this type isn't uniquable, replace with a distinct node. + return replaceWithDistinctImpl(); + +#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \ + case CLASS##Kind: \ + break; +#include "llvm/IR/Metadata.def" + } + + // Even if this type is uniquable, self-references have to be distinct. if (hasSelfReference(this)) return replaceWithDistinctImpl(); return replaceWithUniquedImpl(); @@ -671,8 +723,8 @@ MDNode *MDNode::uniquify() { // Try to insert into uniquing store. switch (getMetadataID()) { default: - llvm_unreachable("Invalid subclass of MDNode"); -#define HANDLE_MDNODE_LEAF(CLASS) \ + llvm_unreachable("Invalid or non-uniquable subclass of MDNode"); +#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \ case CLASS##Kind: { \ CLASS *SubclassThis = cast(this); \ std::integral_constant::value> \ @@ -687,8 +739,8 @@ MDNode *MDNode::uniquify() { void MDNode::eraseFromStore() { switch (getMetadataID()) { default: - llvm_unreachable("Invalid subclass of MDNode"); -#define HANDLE_MDNODE_LEAF(CLASS) \ + llvm_unreachable("Invalid or non-uniquable subclass of MDNode"); +#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \ case CLASS##Kind: \ getContext().pImpl->CLASS##s.erase(cast(this)); \ break; @@ -941,6 +993,17 @@ MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) { return MDNode::get(A->getContext(), MDs); } +MDNode *MDNode::getMostGenericAlignmentOrDereferenceable(MDNode *A, MDNode *B) { + if (!A || !B) + return nullptr; + + ConstantInt *AVal = mdconst::extract(A->getOperand(0)); + ConstantInt *BVal = mdconst::extract(B->getOperand(0)); + if (AVal->getZExtValue() < BVal->getZExtValue()) + return A; + return B; +} + //===----------------------------------------------------------------------===// // NamedMDNode implementation. // @@ -1045,14 +1108,10 @@ MDNode *Instruction::getMetadataImpl(StringRef Kind) const { return getMetadataImpl(getContext().getMDKindID(Kind)); } -void Instruction::dropUnknownMetadata(ArrayRef KnownIDs) { +void Instruction::dropUnknownNonDebugMetadata(ArrayRef KnownIDs) { SmallSet KnownSet; KnownSet.insert(KnownIDs.begin(), KnownIDs.end()); - // Drop debug if needed - if (KnownSet.erase(LLVMContext::MD_dbg)) - DbgLoc = DebugLoc(); - if (!hasMetadataHashEntry()) return; // Nothing to remove! @@ -1077,7 +1136,7 @@ void Instruction::dropUnknownMetadata(ArrayRef KnownIDs) { } } -/// setMetadata - Set the metadata of of the specified kind to the specified +/// setMetadata - Set the metadata of the specified kind to the specified /// node. This updates/replaces metadata if already present, or removes it if /// Node is null. void Instruction::setMetadata(unsigned KindID, MDNode *Node) { @@ -1251,3 +1310,11 @@ void Function::clearMetadata() { getContext().pImpl->FunctionMetadata.erase(this); setHasMetadataHashEntry(false); } + +void Function::setSubprogram(DISubprogram *SP) { + setMetadata(LLVMContext::MD_dbg, SP); +} + +DISubprogram *Function::getSubprogram() const { + return cast_or_null(getMetadata(LLVMContext::MD_dbg)); +} diff --git a/lib/IR/MetadataImpl.h b/lib/IR/MetadataImpl.h index 662a50eb1bdc..b9137460bd20 100644 --- a/lib/IR/MetadataImpl.h +++ b/lib/IR/MetadataImpl.h @@ -26,6 +26,19 @@ static T *getUniqued(DenseSet &Store, return I == Store.end() ? nullptr : *I; } +template T *MDNode::storeImpl(T *N, StorageType Storage) { + switch (Storage) { + case Uniqued: + llvm_unreachable("Cannot unique without a uniquing-store"); + case Distinct: + N->storeDistinctInContext(); + break; + case Temporary: + break; + } + return N; +} + template T *MDNode::storeImpl(T *N, StorageType Storage, StoreT &Store) { switch (Storage) { diff --git a/lib/IR/MetadataTracking.cpp b/lib/IR/MetadataTracking.cpp deleted file mode 100644 index 47f0b9366d7d..000000000000 --- a/lib/IR/MetadataTracking.cpp +++ /dev/null @@ -1,55 +0,0 @@ -//===- MetadataTracking.cpp - Implement metadata tracking -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements Metadata tracking. -// -//===----------------------------------------------------------------------===// - -#include "llvm/IR/MetadataTracking.h" -#include "llvm/IR/Metadata.h" - -using namespace llvm; - -ReplaceableMetadataImpl *ReplaceableMetadataImpl::get(Metadata &MD) { - if (auto *N = dyn_cast(&MD)) - return N->Context.getReplaceableUses(); - return dyn_cast(&MD); -} - -bool MetadataTracking::track(void *Ref, Metadata &MD, OwnerTy Owner) { - assert(Ref && "Expected live reference"); - assert((Owner || *static_cast(Ref) == &MD) && - "Reference without owner must be direct"); - if (auto *R = ReplaceableMetadataImpl::get(MD)) { - R->addRef(Ref, Owner); - return true; - } - return false; -} - -void MetadataTracking::untrack(void *Ref, Metadata &MD) { - assert(Ref && "Expected live reference"); - if (auto *R = ReplaceableMetadataImpl::get(MD)) - R->dropRef(Ref); -} - -bool MetadataTracking::retrack(void *Ref, Metadata &MD, void *New) { - assert(Ref && "Expected live reference"); - assert(New && "Expected live reference"); - assert(Ref != New && "Expected change"); - if (auto *R = ReplaceableMetadataImpl::get(MD)) { - R->moveRef(Ref, New, MD); - return true; - } - return false; -} - -bool MetadataTracking::isReplaceable(const Metadata &MD) { - return ReplaceableMetadataImpl::get(const_cast(MD)); -} diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp index 043f74e12da3..ac578d6dba0f 100644 --- a/lib/IR/Module.cpp +++ b/lib/IR/Module.cpp @@ -29,6 +29,7 @@ #include #include #include + using namespace llvm; //===----------------------------------------------------------------------===// @@ -37,9 +38,9 @@ using namespace llvm; // Explicit instantiations of SymbolTableListTraits since some of the methods // are not in the public header file. -template class llvm::SymbolTableListTraits; -template class llvm::SymbolTableListTraits; -template class llvm::SymbolTableListTraits; +template class llvm::SymbolTableListTraits; +template class llvm::SymbolTableListTraits; +template class llvm::SymbolTableListTraits; //===----------------------------------------------------------------------===// // Primitive Module methods. @@ -81,7 +82,6 @@ RandomNumberGenerator *Module::createRNG(const Pass* P) const { return new RandomNumberGenerator(Salt); } - /// getNamedValue - Return the first global value in the module with /// the specified name, of arbitrary type. This method returns null /// if a global with the specified name is not found. @@ -102,6 +102,9 @@ void Module::getMDKindNames(SmallVectorImpl &Result) const { return Context.getMDKindNames(Result); } +void Module::getOperandBundleTags(SmallVectorImpl &Result) const { + return Context.getOperandBundleTags(Result); +} //===----------------------------------------------------------------------===// // Methods for easy access to the functions in the module. @@ -274,7 +277,7 @@ NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) { /// delete it. void Module::eraseNamedMetadata(NamedMDNode *NMD) { static_cast *>(NamedMDSymTab)->erase(NMD->getName()); - NamedMDList.erase(NMD); + NamedMDList.erase(NMD->getIterator()); } bool Module::isValidModFlagBehavior(Metadata *MD, ModFlagBehavior &MFB) { @@ -376,17 +379,11 @@ const DataLayout &Module::getDataLayout() const { return DL; } // void Module::setMaterializer(GVMaterializer *GVM) { assert(!Materializer && - "Module already has a GVMaterializer. Call MaterializeAllPermanently" + "Module already has a GVMaterializer. Call materializeAll" " to clear it out before setting another one."); Materializer.reset(GVM); } -bool Module::isDematerializable(const GlobalValue *GV) const { - if (Materializer) - return Materializer->isDematerializable(GV); - return false; -} - std::error_code Module::materialize(GlobalValue *GV) { if (!Materializer) return std::error_code(); @@ -394,23 +391,11 @@ std::error_code Module::materialize(GlobalValue *GV) { return Materializer->materialize(GV); } -void Module::dematerialize(GlobalValue *GV) { - if (Materializer) - return Materializer->dematerialize(GV); -} - std::error_code Module::materializeAll() { if (!Materializer) return std::error_code(); - return Materializer->materializeModule(this); -} - -std::error_code Module::materializeAllPermanently() { - if (std::error_code EC = materializeAll()) - return EC; - - Materializer.reset(); - return std::error_code(); + std::unique_ptr M = std::move(Materializer); + return M->materializeModule(); } std::error_code Module::materializeMetadata() { @@ -458,7 +443,14 @@ void Module::dropAllReferences() { unsigned Module::getDwarfVersion() const { auto *Val = cast_or_null(getModuleFlag("Dwarf Version")); if (!Val) - return dwarf::DWARF_VERSION; + return 0; + return cast(Val->getValue())->getZExtValue(); +} + +unsigned Module::getCodeViewFlag() const { + auto *Val = cast_or_null(getModuleFlag("CodeView")); + if (!Val) + return 0; return cast(Val->getValue())->getZExtValue(); } @@ -471,7 +463,7 @@ Comdat *Module::getOrInsertComdat(StringRef Name) { PICLevel::Level Module::getPICLevel() const { auto *Val = cast_or_null(getModuleFlag("PIC Level")); - if (Val == NULL) + if (!Val) return PICLevel::Default; return static_cast( @@ -481,3 +473,15 @@ PICLevel::Level Module::getPICLevel() const { void Module::setPICLevel(PICLevel::Level PL) { addModuleFlag(ModFlagBehavior::Error, "PIC Level", PL); } + +void Module::setMaximumFunctionCount(uint64_t Count) { + addModuleFlag(ModFlagBehavior::Error, "MaxFunctionCount", Count); +} + +Optional Module::getMaximumFunctionCount() { + auto *Val = + cast_or_null(getModuleFlag("MaxFunctionCount")); + if (!Val) + return None; + return cast(Val->getValue())->getZExtValue(); +} diff --git a/lib/IR/Statepoint.cpp b/lib/IR/Statepoint.cpp index 83ee611cc375..d45c1883ef9e 100644 --- a/lib/IR/Statepoint.cpp +++ b/lib/IR/Statepoint.cpp @@ -67,10 +67,7 @@ bool llvm::isGCResult(const ImmutableCallSite &CS) { bool llvm::isGCResult(const Value *inst) { if (const CallInst *call = dyn_cast(inst)) { if (Function *F = call->getCalledFunction()) { - return (F->getIntrinsicID() == Intrinsic::experimental_gc_result_int || - F->getIntrinsicID() == Intrinsic::experimental_gc_result_float || - F->getIntrinsicID() == Intrinsic::experimental_gc_result_ptr || - F->getIntrinsicID() == Intrinsic::experimental_gc_result); + return F->getIntrinsicID() == Intrinsic::experimental_gc_result; } } return false; diff --git a/lib/IR/SymbolTableListTraitsImpl.h b/lib/IR/SymbolTableListTraitsImpl.h index a18f98261abc..50573d8d688a 100644 --- a/lib/IR/SymbolTableListTraitsImpl.h +++ b/lib/IR/SymbolTableListTraitsImpl.h @@ -24,77 +24,73 @@ namespace llvm { /// setSymTabObject - This is called when (f.e.) the parent of a basic block /// changes. This requires us to remove all the instruction symtab entries from /// the current function and reinsert them into the new function. -template -template -void SymbolTableListTraits -::setSymTabObject(TPtr *Dest, TPtr Src) { +template +template +void SymbolTableListTraits::setSymTabObject(TPtr *Dest, + TPtr Src) { // Get the old symtab and value list before doing the assignment. - ValueSymbolTable *OldST = TraitsClass::getSymTab(getListOwner()); + ValueSymbolTable *OldST = getSymTab(getListOwner()); // Do it. *Dest = Src; // Get the new SymTab object. - ValueSymbolTable *NewST = TraitsClass::getSymTab(getListOwner()); + ValueSymbolTable *NewST = getSymTab(getListOwner()); // If there is nothing to do, quick exit. if (OldST == NewST) return; // Move all the elements from the old symtab to the new one. - iplist &ItemList = TraitsClass::getList(getListOwner()); + ListTy &ItemList = getList(getListOwner()); if (ItemList.empty()) return; if (OldST) { // Remove all entries from the previous symtab. - for (typename iplist::iterator I = ItemList.begin(); - I != ItemList.end(); ++I) + for (auto I = ItemList.begin(); I != ItemList.end(); ++I) if (I->hasName()) OldST->removeValueName(I->getValueName()); } if (NewST) { // Add all of the items to the new symtab. - for (typename iplist::iterator I = ItemList.begin(); - I != ItemList.end(); ++I) + for (auto I = ItemList.begin(); I != ItemList.end(); ++I) if (I->hasName()) - NewST->reinsertValue(I); + NewST->reinsertValue(&*I); } } -template -void SymbolTableListTraits -::addNodeToList(ValueSubClass *V) { +template +void SymbolTableListTraits::addNodeToList(ValueSubClass *V) { assert(!V->getParent() && "Value already in a container!!"); ItemParentClass *Owner = getListOwner(); V->setParent(Owner); if (V->hasName()) - if (ValueSymbolTable *ST = TraitsClass::getSymTab(Owner)) + if (ValueSymbolTable *ST = getSymTab(Owner)) ST->reinsertValue(V); } -template -void SymbolTableListTraits -::removeNodeFromList(ValueSubClass *V) { +template +void SymbolTableListTraits::removeNodeFromList( + ValueSubClass *V) { V->setParent(nullptr); if (V->hasName()) - if (ValueSymbolTable *ST = TraitsClass::getSymTab(getListOwner())) + if (ValueSymbolTable *ST = getSymTab(getListOwner())) ST->removeValueName(V->getValueName()); } -template -void SymbolTableListTraits -::transferNodesFromList(ilist_traits &L2, - ilist_iterator first, - ilist_iterator last) { +template +void SymbolTableListTraits::transferNodesFromList( + SymbolTableListTraits &L2, ilist_iterator first, + ilist_iterator last) { // We only have to do work here if transferring instructions between BBs ItemParentClass *NewIP = getListOwner(), *OldIP = L2.getListOwner(); if (NewIP == OldIP) return; // No work to do at all... // We only have to update symbol table entries if we are transferring the // instructions to a different symtab object... - ValueSymbolTable *NewST = TraitsClass::getSymTab(NewIP); - ValueSymbolTable *OldST = TraitsClass::getSymTab(OldIP); + ValueSymbolTable *NewST = getSymTab(NewIP); + ValueSymbolTable *OldST = getSymTab(OldIP); if (NewST != OldST) { for (; first != last; ++first) { ValueSubClass &V = *first; diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp index a9ca80034ca7..4c1baf52a58f 100644 --- a/lib/IR/Type.cpp +++ b/lib/IR/Type.cpp @@ -35,6 +35,7 @@ Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) { case LabelTyID : return getLabelTy(C); case MetadataTyID : return getMetadataTy(C); case X86_MMXTyID : return getX86_MMXTy(C); + case TokenTyID : return getTokenTy(C); default: return nullptr; } @@ -42,16 +43,10 @@ Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) { /// getScalarType - If this is a vector type, return the element type, /// otherwise return this. -Type *Type::getScalarType() { - if (VectorType *VTy = dyn_cast(this)) +Type *Type::getScalarType() const { + if (auto *VTy = dyn_cast(this)) return VTy->getElementType(); - return this; -} - -const Type *Type::getScalarType() const { - if (const VectorType *VTy = dyn_cast(this)) - return VTy->getElementType(); - return this; + return const_cast(this); } /// isIntegerTy - Return true if this is an IntegerType of the specified width. @@ -74,8 +69,8 @@ bool Type::canLosslesslyBitCastTo(Type *Ty) const { // Vector -> Vector conversions are always lossless if the two vector types // have the same size, otherwise not. Also, 64-bit vector types can be // converted to x86mmx. - if (const VectorType *thisPTy = dyn_cast(this)) { - if (const VectorType *thatPTy = dyn_cast(Ty)) + if (auto *thisPTy = dyn_cast(this)) { + if (auto *thatPTy = dyn_cast(Ty)) return thisPTy->getBitWidth() == thatPTy->getBitWidth(); if (Ty->getTypeID() == Type::X86_MMXTyID && thisPTy->getBitWidth() == 64) @@ -83,7 +78,7 @@ bool Type::canLosslesslyBitCastTo(Type *Ty) const { } if (this->getTypeID() == Type::X86_MMXTyID) - if (const VectorType *thatPTy = dyn_cast(Ty)) + if (auto *thatPTy = dyn_cast(Ty)) if (thatPTy->getBitWidth() == 64) return true; @@ -91,8 +86,8 @@ bool Type::canLosslesslyBitCastTo(Type *Ty) const { // remaining and ptr->ptr. Just select the lossless conversions. Everything // else is not lossless. Conservatively assume we can't losslessly convert // between pointers with different address spaces. - if (const PointerType *PTy = dyn_cast(this)) { - if (const PointerType *OtherPTy = dyn_cast(Ty)) + if (auto *PTy = dyn_cast(this)) { + if (auto *OtherPTy = dyn_cast(Ty)) return PTy->getAddressSpace() == OtherPTy->getAddressSpace(); return false; } @@ -100,14 +95,12 @@ bool Type::canLosslesslyBitCastTo(Type *Ty) const { } bool Type::isEmptyTy() const { - const ArrayType *ATy = dyn_cast(this); - if (ATy) { + if (auto *ATy = dyn_cast(this)) { unsigned NumElements = ATy->getNumElements(); return NumElements == 0 || ATy->getElementType()->isEmptyTy(); } - const StructType *STy = dyn_cast(this); - if (STy) { + if (auto *STy = dyn_cast(this)) { unsigned NumElements = STy->getNumElements(); for (unsigned i = 0; i < NumElements; ++i) if (!STy->getElementType(i)->isEmptyTy()) @@ -144,7 +137,7 @@ unsigned Type::getScalarSizeInBits() const { /// is only valid on floating point types. If the FP type does not /// have a stable mantissa (e.g. ppc long double), this method returns -1. int Type::getFPMantissaWidth() const { - if (const VectorType *VTy = dyn_cast(this)) + if (auto *VTy = dyn_cast(this)) return VTy->getElementType()->getFPMantissaWidth(); assert(isFloatingPointTy() && "Not a floating point type!"); if (getTypeID() == HalfTyID) return 11; @@ -159,65 +152,16 @@ int Type::getFPMantissaWidth() const { /// isSizedDerivedType - Derived types like structures and arrays are sized /// iff all of the members of the type are sized as well. Since asking for /// their size is relatively uncommon, move this operation out of line. -bool Type::isSizedDerivedType(SmallPtrSetImpl *Visited) const { - if (const ArrayType *ATy = dyn_cast(this)) +bool Type::isSizedDerivedType(SmallPtrSetImpl *Visited) const { + if (auto *ATy = dyn_cast(this)) return ATy->getElementType()->isSized(Visited); - if (const VectorType *VTy = dyn_cast(this)) + if (auto *VTy = dyn_cast(this)) return VTy->getElementType()->isSized(Visited); return cast(this)->isSized(Visited); } -//===----------------------------------------------------------------------===// -// Subclass Helper Methods -//===----------------------------------------------------------------------===// - -unsigned Type::getIntegerBitWidth() const { - return cast(this)->getBitWidth(); -} - -bool Type::isFunctionVarArg() const { - return cast(this)->isVarArg(); -} - -Type *Type::getFunctionParamType(unsigned i) const { - return cast(this)->getParamType(i); -} - -unsigned Type::getFunctionNumParams() const { - return cast(this)->getNumParams(); -} - -StringRef Type::getStructName() const { - return cast(this)->getName(); -} - -unsigned Type::getStructNumElements() const { - return cast(this)->getNumElements(); -} - -Type *Type::getStructElementType(unsigned N) const { - return cast(this)->getElementType(N); -} - -Type *Type::getSequentialElementType() const { - return cast(this)->getElementType(); -} - -uint64_t Type::getArrayNumElements() const { - return cast(this)->getNumElements(); -} - -unsigned Type::getVectorNumElements() const { - return cast(this)->getNumElements(); -} - -unsigned Type::getPointerAddressSpace() const { - return cast(getScalarType())->getAddressSpace(); -} - - //===----------------------------------------------------------------------===// // Primitive 'Type' data //===----------------------------------------------------------------------===// @@ -228,6 +172,7 @@ Type *Type::getHalfTy(LLVMContext &C) { return &C.pImpl->HalfTy; } Type *Type::getFloatTy(LLVMContext &C) { return &C.pImpl->FloatTy; } Type *Type::getDoubleTy(LLVMContext &C) { return &C.pImpl->DoubleTy; } Type *Type::getMetadataTy(LLVMContext &C) { return &C.pImpl->MetadataTy; } +Type *Type::getTokenTy(LLVMContext &C) { return &C.pImpl->TokenTy; } Type *Type::getX86_FP80Ty(LLVMContext &C) { return &C.pImpl->X86_FP80Ty; } Type *Type::getFP128Ty(LLVMContext &C) { return &C.pImpl->FP128Ty; } Type *Type::getPPC_FP128Ty(LLVMContext &C) { return &C.pImpl->PPC_FP128Ty; } @@ -345,7 +290,7 @@ FunctionType::FunctionType(Type *Result, ArrayRef Params, assert(isValidReturnType(Result) && "invalid return type for function"); setSubclassData(IsVarArgs); - SubTys[0] = const_cast(Result); + SubTys[0] = Result; for (unsigned i = 0, e = Params.size(); i != e; ++i) { assert(isValidArgumentType(Params[i]) && @@ -428,12 +373,14 @@ void StructType::setBody(ArrayRef Elements, bool isPacked) { if (isPacked) setSubclassData(getSubclassData() | SCDB_Packed); - unsigned NumElements = Elements.size(); - Type **Elts = getContext().pImpl->TypeAllocator.Allocate(NumElements); - memcpy(Elts, Elements.data(), sizeof(Elements[0]) * NumElements); - - ContainedTys = Elts; - NumContainedTys = NumElements; + NumContainedTys = Elements.size(); + + if (Elements.empty()) { + ContainedTys = nullptr; + return; + } + + ContainedTys = Elements.copy(getContext().pImpl->TypeAllocator).data(); } void StructType::setName(StringRef Name) { @@ -470,7 +417,6 @@ void StructType::setName(StringRef Name) { do { TempStr.resize(NameSize + 1); - TmpStream.resync(); TmpStream << getContext().pImpl->NamedStructTypesUniqueID++; IterBool = getContext().pImpl->NamedStructTypes.insert( @@ -556,13 +502,13 @@ StructType *StructType::create(StringRef Name, Type *type, ...) { return Ret; } -bool StructType::isSized(SmallPtrSetImpl *Visited) const { +bool StructType::isSized(SmallPtrSetImpl *Visited) const { if ((getSubclassData() & SCDB_IsSized) != 0) return true; if (isOpaque()) return false; - if (Visited && !Visited->insert(this).second) + if (Visited && !Visited->insert(const_cast(this)).second) return false; // Okay, our struct is sized if all of the elements are, but if one of the @@ -602,22 +548,19 @@ void StructType::setBody(Type *type, ...) { bool StructType::isValidElementType(Type *ElemTy) { return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && - !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy(); + !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() && + !ElemTy->isTokenTy(); } /// isLayoutIdentical - Return true if this is layout identical to the /// specified struct. bool StructType::isLayoutIdentical(StructType *Other) const { if (this == Other) return true; - - if (isPacked() != Other->isPacked() || - getNumElements() != Other->getNumElements()) + + if (isPacked() != Other->isPacked()) return false; - if (!getNumElements()) - return true; - - return std::equal(element_begin(), element_end(), Other->element_begin()); + return elements() == Other->elements(); } /// getTypeByName - Return the type with the specified name, or null if there @@ -631,8 +574,8 @@ StructType *Module::getTypeByName(StringRef Name) const { // CompositeType Implementation //===----------------------------------------------------------------------===// -Type *CompositeType::getTypeAtIndex(const Value *V) { - if (StructType *STy = dyn_cast(this)) { +Type *CompositeType::getTypeAtIndex(const Value *V) const { + if (auto *STy = dyn_cast(this)) { unsigned Idx = (unsigned)cast(V)->getUniqueInteger().getZExtValue(); assert(indexValid(Idx) && "Invalid structure index!"); @@ -641,16 +584,18 @@ Type *CompositeType::getTypeAtIndex(const Value *V) { return cast(this)->getElementType(); } -Type *CompositeType::getTypeAtIndex(unsigned Idx) { - if (StructType *STy = dyn_cast(this)) { + +Type *CompositeType::getTypeAtIndex(unsigned Idx) const{ + if (auto *STy = dyn_cast(this)) { assert(indexValid(Idx) && "Invalid structure index!"); return STy->getElementType(Idx); } - + return cast(this)->getElementType(); } + bool CompositeType::indexValid(const Value *V) const { - if (const StructType *STy = dyn_cast(this)) { + if (auto *STy = dyn_cast(this)) { // Structure indexes require (vectors of) 32-bit integer constants. In the // vector case all of the indices must be equal. if (!V->getType()->getScalarType()->isIntegerTy(32)) @@ -667,7 +612,7 @@ bool CompositeType::indexValid(const Value *V) const { } bool CompositeType::indexValid(unsigned Idx) const { - if (const StructType *STy = dyn_cast(this)) + if (auto *STy = dyn_cast(this)) return Idx < STy->getNumElements(); // Sequential types can be indexed by any integer. return true; @@ -683,10 +628,9 @@ ArrayType::ArrayType(Type *ElType, uint64_t NumEl) NumElements = NumEl; } -ArrayType *ArrayType::get(Type *elementType, uint64_t NumElements) { - Type *ElementType = const_cast(elementType); +ArrayType *ArrayType::get(Type *ElementType, uint64_t NumElements) { assert(isValidElementType(ElementType) && "Invalid type for array element!"); - + LLVMContextImpl *pImpl = ElementType->getContext().pImpl; ArrayType *&Entry = pImpl->ArrayTypes[std::make_pair(ElementType, NumElements)]; @@ -698,7 +642,8 @@ ArrayType *ArrayType::get(Type *elementType, uint64_t NumElements) { bool ArrayType::isValidElementType(Type *ElemTy) { return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && - !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy(); + !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() && + !ElemTy->isTokenTy(); } //===----------------------------------------------------------------------===// @@ -710,8 +655,7 @@ VectorType::VectorType(Type *ElType, unsigned NumEl) NumElements = NumEl; } -VectorType *VectorType::get(Type *elementType, unsigned NumElements) { - Type *ElementType = const_cast(elementType); +VectorType *VectorType::get(Type *ElementType, unsigned NumElements) { assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0"); assert(isValidElementType(ElementType) && "Element type of a VectorType must " "be an integer, floating point, or " @@ -761,13 +705,13 @@ PointerType::PointerType(Type *E, unsigned AddrSpace) assert(oldNCT == NumContainedTys && "bitfield written out of bounds?"); } -PointerType *Type::getPointerTo(unsigned addrs) { - return PointerType::get(this, addrs); +PointerType *Type::getPointerTo(unsigned addrs) const { + return PointerType::get(const_cast(this), addrs); } bool PointerType::isValidElementType(Type *ElemTy) { return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && - !ElemTy->isMetadataTy(); + !ElemTy->isMetadataTy() && !ElemTy->isTokenTy(); } bool PointerType::isLoadableOrStorableType(Type *ElemTy) { diff --git a/lib/IR/TypeFinder.cpp b/lib/IR/TypeFinder.cpp index 7accc5bef535..b5bdab0865b6 100644 --- a/lib/IR/TypeFinder.cpp +++ b/lib/IR/TypeFinder.cpp @@ -44,19 +44,13 @@ void TypeFinder::run(const Module &M, bool onlyNamed) { for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) { incorporateType(FI->getType()); - if (FI->hasPrefixData()) - incorporateValue(FI->getPrefixData()); - - if (FI->hasPrologueData()) - incorporateValue(FI->getPrologueData()); - - if (FI->hasPersonalityFn()) - incorporateValue(FI->getPersonalityFn()); + for (const Use &U : FI->operands()) + incorporateValue(U.get()); // First incorporate the arguments. for (Function::const_arg_iterator AI = FI->arg_begin(), AE = FI->arg_end(); AI != AE; ++AI) - incorporateValue(AI); + incorporateValue(&*AI); for (Function::const_iterator BB = FI->begin(), E = FI->end(); BB != E;++BB) @@ -85,7 +79,7 @@ void TypeFinder::run(const Module &M, bool onlyNamed) { for (Module::const_named_metadata_iterator I = M.named_metadata_begin(), E = M.named_metadata_end(); I != E; ++I) { - const NamedMDNode *NMD = I; + const NamedMDNode *NMD = &*I; for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) incorporateMDNode(NMD->getOperand(i)); } diff --git a/lib/IR/User.cpp b/lib/IR/User.cpp index 522722d701ba..a75abe6938c9 100644 --- a/lib/IR/User.cpp +++ b/lib/IR/User.cpp @@ -87,22 +87,70 @@ void User::growHungoffUses(unsigned NewNumUses, bool IsPhi) { Use::zap(OldOps, OldOps + OldNumUses, true); } + +// This is a private struct used by `User` to track the co-allocated descriptor +// section. +struct DescriptorInfo { + intptr_t SizeInBytes; +}; + +ArrayRef User::getDescriptor() const { + auto MutableARef = const_cast(this)->getDescriptor(); + return {MutableARef.begin(), MutableARef.end()}; +} + +MutableArrayRef User::getDescriptor() { + assert(HasDescriptor && "Don't call otherwise!"); + assert(!HasHungOffUses && "Invariant!"); + + auto *DI = reinterpret_cast(getIntrusiveOperands()) - 1; + assert(DI->SizeInBytes != 0 && "Should not have had a descriptor otherwise!"); + + return MutableArrayRef( + reinterpret_cast(DI) - DI->SizeInBytes, DI->SizeInBytes); +} + //===----------------------------------------------------------------------===// // User operator new Implementations //===----------------------------------------------------------------------===// -void *User::operator new(size_t Size, unsigned Us) { +void *User::allocateFixedOperandUser(size_t Size, unsigned Us, + unsigned DescBytes) { assert(Us < (1u << NumUserOperandsBits) && "Too many operands"); - void *Storage = ::operator new(Size + sizeof(Use) * Us); - Use *Start = static_cast(Storage); + + static_assert(sizeof(DescriptorInfo) % sizeof(void *) == 0, "Required below"); + + unsigned DescBytesToAllocate = + DescBytes == 0 ? 0 : (DescBytes + sizeof(DescriptorInfo)); + assert(DescBytesToAllocate % sizeof(void *) == 0 && + "We need this to satisfy alignment constraints for Uses"); + + uint8_t *Storage = static_cast( + ::operator new(Size + sizeof(Use) * Us + DescBytesToAllocate)); + Use *Start = reinterpret_cast(Storage + DescBytesToAllocate); Use *End = Start + Us; User *Obj = reinterpret_cast(End); Obj->NumUserOperands = Us; Obj->HasHungOffUses = false; + Obj->HasDescriptor = DescBytes != 0; Use::initTags(Start, End); + + if (DescBytes != 0) { + auto *DescInfo = reinterpret_cast(Storage + DescBytes); + DescInfo->SizeInBytes = DescBytes; + } + return Obj; } +void *User::operator new(size_t Size, unsigned Us) { + return allocateFixedOperandUser(Size, Us, 0); +} + +void *User::operator new(size_t Size, unsigned Us, unsigned DescBytes) { + return allocateFixedOperandUser(Size, Us, DescBytes); +} + void *User::operator new(size_t Size) { // Allocate space for a single Use* void *Storage = ::operator new(Size + sizeof(Use *)); @@ -110,6 +158,7 @@ void *User::operator new(size_t Size) { User *Obj = reinterpret_cast(HungOffOperandList + 1); Obj->NumUserOperands = 0; Obj->HasHungOffUses = true; + Obj->HasDescriptor = false; *HungOffOperandList = nullptr; return Obj; } @@ -123,11 +172,20 @@ void User::operator delete(void *Usr) { // use a Use[] allocated prior to the user. User *Obj = static_cast(Usr); if (Obj->HasHungOffUses) { + assert(!Obj->HasDescriptor && "not supported!"); + Use **HungOffOperandList = static_cast(Usr) - 1; // drop the hung off uses. Use::zap(*HungOffOperandList, *HungOffOperandList + Obj->NumUserOperands, /* Delete */ true); ::operator delete(HungOffOperandList); + } else if (Obj->HasDescriptor) { + Use *UseBegin = static_cast(Usr) - Obj->NumUserOperands; + Use::zap(UseBegin, UseBegin + Obj->NumUserOperands, /* Delete */ false); + + auto *DI = reinterpret_cast(UseBegin) - 1; + uint8_t *Storage = reinterpret_cast(DI) - DI->SizeInBytes; + ::operator delete(Storage); } else { Use *Storage = static_cast(Usr) - Obj->NumUserOperands; Use::zap(Storage, Storage + Obj->NumUserOperands, diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index f554d590284f..eb9deb6a07e1 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -314,6 +314,16 @@ void Value::takeName(Value *V) { } #ifndef NDEBUG +void Value::assertModuleIsMaterialized() const { + const GlobalValue *GV = dyn_cast(this); + if (!GV) + return; + const Module *M = GV->getParent(); + if (!M) + return; + assert(M->isMaterialized()); +} + static bool contains(SmallPtrSetImpl &Cache, ConstantExpr *Expr, Constant *C) { if (!Cache.insert(Expr).second) @@ -490,8 +500,7 @@ Value *Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, return V; Offset = GEPOffset; V = GEP->getPointerOperand(); - } else if (Operator::getOpcode(V) == Instruction::BitCast || - Operator::getOpcode(V) == Instruction::AddrSpaceCast) { + } else if (Operator::getOpcode(V) == Instruction::BitCast) { V = cast(V)->getOperand(0); } else if (GlobalAlias *GA = dyn_cast(V)) { V = GA->getAliasee(); diff --git a/lib/IR/ValueSymbolTable.cpp b/lib/IR/ValueSymbolTable.cpp index e10142de8232..deb6e7573e72 100644 --- a/lib/IR/ValueSymbolTable.cpp +++ b/lib/IR/ValueSymbolTable.cpp @@ -32,6 +32,24 @@ ValueSymbolTable::~ValueSymbolTable() { #endif } +ValueName *ValueSymbolTable::makeUniqueName(Value *V, + SmallString<256> &UniqueName) { + unsigned BaseSize = UniqueName.size(); + while (1) { + // Trim any suffix off and append the next number. + UniqueName.resize(BaseSize); + raw_svector_ostream S(UniqueName); + if (isa(V)) + S << "."; + S << ++LastUnique; + + // Try insert the vmap entry with this suffix. + auto IterBool = vmap.insert(std::make_pair(UniqueName, V)); + if (IterBool.second) + return &*IterBool.first; + } +} + // Insert a value into the symbol table with the specified name... // void ValueSymbolTable::reinsertValue(Value* V) { @@ -49,21 +67,8 @@ void ValueSymbolTable::reinsertValue(Value* V) { // The name is too already used, just free it so we can allocate a new name. V->getValueName()->Destroy(); - unsigned BaseSize = UniqueName.size(); - while (1) { - // Trim any suffix off and append the next number. - UniqueName.resize(BaseSize); - raw_svector_ostream(UniqueName) << "." << ++LastUnique; - - // Try insert the vmap entry with this suffix. - auto IterBool = vmap.insert(std::make_pair(UniqueName, V)); - if (IterBool.second) { - // Newly inserted name. Success! - V->setValueName(&*IterBool.first); - //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n"); - return; - } - } + ValueName *VN = makeUniqueName(V, UniqueName); + V->setValueName(VN); } void ValueSymbolTable::removeValueName(ValueName *V) { @@ -86,20 +91,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) { // Otherwise, there is a naming conflict. Rename this value. SmallString<256> UniqueName(Name.begin(), Name.end()); - - while (1) { - // Trim any suffix off and append the next number. - UniqueName.resize(Name.size()); - raw_svector_ostream(UniqueName) << ++LastUnique; - - // Try insert the vmap entry with this suffix. - auto IterBool = vmap.insert(std::make_pair(UniqueName, V)); - if (IterBool.second) { - // DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << - // "\n"); - return &*IterBool.first; - } - } + return makeUniqueName(V, UniqueName); } diff --git a/lib/IR/ValueTypes.cpp b/lib/IR/ValueTypes.cpp index d95de3989df0..f2932302af2a 100644 --- a/lib/IR/ValueTypes.cpp +++ b/lib/IR/ValueTypes.cpp @@ -19,6 +19,11 @@ #include "llvm/Support/ErrorHandling.h" using namespace llvm; +EVT EVT::changeExtendedTypeToInteger() const { + LLVMContext &Context = LLVMTy->getContext(); + return getIntegerVT(Context, getSizeInBits()); +} + EVT EVT::changeExtendedVectorElementTypeToInteger() const { LLVMContext &Context = LLVMTy->getContext(); EVT IntTy = getIntegerVT(Context, getVectorElementType().getSizeInBits()); @@ -83,6 +88,10 @@ bool EVT::isExtended1024BitVector() const { return isExtendedVector() && getExtendedSizeInBits() == 1024; } +bool EVT::isExtended2048BitVector() const { + return isExtendedVector() && getExtendedSizeInBits() == 2048; +} + EVT EVT::getExtendedVectorElementType() const { assert(isExtended() && "Type is not extended!"); return EVT::getEVT(cast(LLVMTy)->getElementType()); @@ -134,6 +143,8 @@ std::string EVT::getEVTString() const { case MVT::v16i1: return "v16i1"; case MVT::v32i1: return "v32i1"; case MVT::v64i1: return "v64i1"; + case MVT::v512i1: return "v512i1"; + case MVT::v1024i1: return "v1024i1"; case MVT::v1i8: return "v1i8"; case MVT::v2i8: return "v2i8"; case MVT::v4i8: return "v4i8"; @@ -141,22 +152,29 @@ std::string EVT::getEVTString() const { case MVT::v16i8: return "v16i8"; case MVT::v32i8: return "v32i8"; case MVT::v64i8: return "v64i8"; + case MVT::v128i8: return "v128i8"; + case MVT::v256i8: return "v256i8"; case MVT::v1i16: return "v1i16"; case MVT::v2i16: return "v2i16"; case MVT::v4i16: return "v4i16"; case MVT::v8i16: return "v8i16"; case MVT::v16i16: return "v16i16"; case MVT::v32i16: return "v32i16"; + case MVT::v64i16: return "v64i16"; + case MVT::v128i16: return "v128i16"; case MVT::v1i32: return "v1i32"; case MVT::v2i32: return "v2i32"; case MVT::v4i32: return "v4i32"; case MVT::v8i32: return "v8i32"; case MVT::v16i32: return "v16i32"; + case MVT::v32i32: return "v32i32"; + case MVT::v64i32: return "v64i32"; case MVT::v1i64: return "v1i64"; case MVT::v2i64: return "v2i64"; case MVT::v4i64: return "v4i64"; case MVT::v8i64: return "v8i64"; case MVT::v16i64: return "v16i64"; + case MVT::v32i64: return "v32i64"; case MVT::v1i128: return "v1i128"; case MVT::v1f32: return "v1f32"; case MVT::v2f32: return "v2f32"; @@ -203,6 +221,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v16i1: return VectorType::get(Type::getInt1Ty(Context), 16); case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32); case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64); + case MVT::v512i1: return VectorType::get(Type::getInt1Ty(Context), 512); + case MVT::v1024i1: return VectorType::get(Type::getInt1Ty(Context), 1024); case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1); case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2); case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4); @@ -210,22 +230,29 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v16i8: return VectorType::get(Type::getInt8Ty(Context), 16); case MVT::v32i8: return VectorType::get(Type::getInt8Ty(Context), 32); case MVT::v64i8: return VectorType::get(Type::getInt8Ty(Context), 64); + case MVT::v128i8: return VectorType::get(Type::getInt8Ty(Context), 128); + case MVT::v256i8: return VectorType::get(Type::getInt8Ty(Context), 256); case MVT::v1i16: return VectorType::get(Type::getInt16Ty(Context), 1); case MVT::v2i16: return VectorType::get(Type::getInt16Ty(Context), 2); case MVT::v4i16: return VectorType::get(Type::getInt16Ty(Context), 4); case MVT::v8i16: return VectorType::get(Type::getInt16Ty(Context), 8); case MVT::v16i16: return VectorType::get(Type::getInt16Ty(Context), 16); case MVT::v32i16: return VectorType::get(Type::getInt16Ty(Context), 32); + case MVT::v64i16: return VectorType::get(Type::getInt16Ty(Context), 64); + case MVT::v128i16: return VectorType::get(Type::getInt16Ty(Context), 128); case MVT::v1i32: return VectorType::get(Type::getInt32Ty(Context), 1); case MVT::v2i32: return VectorType::get(Type::getInt32Ty(Context), 2); case MVT::v4i32: return VectorType::get(Type::getInt32Ty(Context), 4); case MVT::v8i32: return VectorType::get(Type::getInt32Ty(Context), 8); case MVT::v16i32: return VectorType::get(Type::getInt32Ty(Context), 16); + case MVT::v32i32: return VectorType::get(Type::getInt32Ty(Context), 32); + case MVT::v64i32: return VectorType::get(Type::getInt32Ty(Context), 64); case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1); case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2); case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4); case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8); case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16); + case MVT::v32i64: return VectorType::get(Type::getInt64Ty(Context), 32); case MVT::v1i128: return VectorType::get(Type::getInt128Ty(Context), 1); case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2); case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4); diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 2a0a4ff393ed..81c87e4759b7 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -39,8 +39,7 @@ // only by the unwind edge of an invoke instruction. // * A landingpad instruction must be the first non-PHI instruction in the // block. -// * All landingpad instructions must use the same personality function with -// the same function. +// * Landingpad instructions must be in a function with a personality function. // * All other things that are tested by asserts spread about the code... // //===----------------------------------------------------------------------===// @@ -92,6 +91,16 @@ struct VerifierSupport { : OS(OS), M(nullptr), Broken(false) {} private: + template void Write(const ilist_iterator &I) { + Write(&*I); + } + + void Write(const Module *M) { + if (!M) + return; + OS << "; ModuleID = '" << M->getModuleIdentifier() << "'\n"; + } + void Write(const Value *V) { if (!V) return; @@ -184,6 +193,9 @@ class Verifier : public InstVisitor, VerifierSupport { /// \brief Track unresolved string-based type references. SmallDenseMap UnresolvedTypeRefs; + /// \brief The result type for a landingpad. + Type *LandingPadResultTy; + /// \brief Whether we've seen a call to @llvm.localescape in this function /// already. bool SawFrameEscape; @@ -192,9 +204,15 @@ class Verifier : public InstVisitor, VerifierSupport { /// given function and the largest index passed to llvm.localrecover. DenseMap> FrameEscapeInfo; + /// Cache of constants visited in search of ConstantExprs. + SmallPtrSet ConstantExprVisited; + + void checkAtomicMemAccessSize(const Module *M, Type *Ty, + const Instruction *I); public: explicit Verifier(raw_ostream &OS) - : VerifierSupport(OS), Context(nullptr), SawFrameEscape(false) {} + : VerifierSupport(OS), Context(nullptr), LandingPadResultTy(nullptr), + SawFrameEscape(false) {} bool verify(const Function &F) { M = F.getParent(); @@ -228,6 +246,7 @@ public: // FIXME: We strip const here because the inst visitor strips const. visit(const_cast(F)); InstsInThisBlock.clear(); + LandingPadResultTy = nullptr; SawFrameEscape = false; return !Broken; @@ -297,12 +316,12 @@ private: void visitFunction(const Function &F); void visitBasicBlock(BasicBlock &BB); void visitRangeMetadata(Instruction& I, MDNode* Range, Type* Ty); + void visitDereferenceableMetadata(Instruction& I, MDNode* MD); template bool isValidMetadataArray(const MDTuple &N); #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N); #include "llvm/IR/Metadata.def" void visitDIScope(const DIScope &N); - void visitDIDerivedTypeBase(const DIDerivedTypeBase &N); void visitDIVariable(const DIVariable &N); void visitDILexicalBlockBase(const DILexicalBlockBase &N); void visitDITemplateParameter(const DITemplateParameter &N); @@ -379,7 +398,13 @@ private: void visitAllocaInst(AllocaInst &AI); void visitExtractValueInst(ExtractValueInst &EVI); void visitInsertValueInst(InsertValueInst &IVI); + void visitEHPadPredecessors(Instruction &I); void visitLandingPadInst(LandingPadInst &LPI); + void visitCatchPadInst(CatchPadInst &CPI); + void visitCatchReturnInst(CatchReturnInst &CatchReturn); + void visitCleanupPadInst(CleanupPadInst &CPI); + void visitCatchSwitchInst(CatchSwitchInst &CatchSwitch); + void visitCleanupReturnInst(CleanupReturnInst &CRI); void VerifyCallSite(CallSite CS); void verifyMustTailCall(CallInst &CI); @@ -399,7 +424,8 @@ private: void VerifyFunctionMetadata( const SmallVector, 4> MDs); - void VerifyConstantExprBitcastType(const ConstantExpr *CE); + void visitConstantExprsRecursively(const Constant *EntryC); + void visitConstantExpr(const ConstantExpr *CE); void VerifyStatepoint(ImmutableCallSite CS); void verifyFrameRecoverIndices(); @@ -524,25 +550,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) { } // Walk any aggregate initializers looking for bitcasts between address spaces - SmallPtrSet Visited; - SmallVector WorkStack; - WorkStack.push_back(cast(GV.getInitializer())); - - while (!WorkStack.empty()) { - const Value *V = WorkStack.pop_back_val(); - if (!Visited.insert(V).second) - continue; - - if (const User *U = dyn_cast(V)) { - WorkStack.append(U->op_begin(), U->op_end()); - } - - if (const ConstantExpr *CE = dyn_cast(V)) { - VerifyConstantExprBitcastType(CE); - if (Broken) - return; - } - } + visitConstantExprsRecursively(GV.getInitializer()); visitGlobalValue(GV); } @@ -556,7 +564,8 @@ void Verifier::visitAliaseeSubExpr(const GlobalAlias &GA, const Constant &C) { void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl &Visited, const GlobalAlias &GA, const Constant &C) { if (const auto *GV = dyn_cast(&C)) { - Assert(!GV->isDeclaration(), "Alias must point to a definition", &GA); + Assert(!GV->isDeclarationForLinker(), "Alias must point to a definition", + &GA); if (const auto *GA2 = dyn_cast(GV)) { Assert(Visited.insert(GA2).second, "Aliases cannot form a cycle", &GA); @@ -571,7 +580,7 @@ void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl &Visited, } if (const auto *CE = dyn_cast(&C)) - VerifyConstantExprBitcastType(CE); + visitConstantExprsRecursively(CE); for (const Use &U : C.operands()) { Value *V = &*U; @@ -779,39 +788,10 @@ void Verifier::visitDIBasicType(const DIBasicType &N) { "invalid tag", &N); } -void Verifier::visitDIDerivedTypeBase(const DIDerivedTypeBase &N) { +void Verifier::visitDIDerivedType(const DIDerivedType &N) { // Common scope checks. visitDIScope(N); - Assert(isScopeRef(N, N.getScope()), "invalid scope", &N, N.getScope()); - Assert(isTypeRef(N, N.getBaseType()), "invalid base type", &N, - N.getBaseType()); - - // FIXME: Sink this into the subclass verifies. - if (!N.getFile() || N.getFile()->getFilename().empty()) { - // Check whether the filename is allowed to be empty. - uint16_t Tag = N.getTag(); - Assert( - Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type || - Tag == dwarf::DW_TAG_pointer_type || - Tag == dwarf::DW_TAG_ptr_to_member_type || - Tag == dwarf::DW_TAG_reference_type || - Tag == dwarf::DW_TAG_rvalue_reference_type || - Tag == dwarf::DW_TAG_restrict_type || - Tag == dwarf::DW_TAG_array_type || - Tag == dwarf::DW_TAG_enumeration_type || - Tag == dwarf::DW_TAG_subroutine_type || - Tag == dwarf::DW_TAG_inheritance || Tag == dwarf::DW_TAG_friend || - Tag == dwarf::DW_TAG_structure_type || - Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef, - "derived/composite type requires a filename", &N, N.getFile()); - } -} - -void Verifier::visitDIDerivedType(const DIDerivedType &N) { - // Common derived type checks. - visitDIDerivedTypeBase(N); - Assert(N.getTag() == dwarf::DW_TAG_typedef || N.getTag() == dwarf::DW_TAG_pointer_type || N.getTag() == dwarf::DW_TAG_ptr_to_member_type || @@ -828,6 +808,10 @@ void Verifier::visitDIDerivedType(const DIDerivedType &N) { Assert(isTypeRef(N, N.getExtraData()), "invalid pointer to member type", &N, N.getExtraData()); } + + Assert(isScopeRef(N, N.getScope()), "invalid scope", &N, N.getScope()); + Assert(isTypeRef(N, N.getBaseType()), "invalid base type", &N, + N.getBaseType()); } static bool hasConflictingReferenceFlags(unsigned Flags) { @@ -845,27 +829,34 @@ void Verifier::visitTemplateParams(const MDNode &N, const Metadata &RawParams) { } void Verifier::visitDICompositeType(const DICompositeType &N) { - // Common derived type checks. - visitDIDerivedTypeBase(N); + // Common scope checks. + visitDIScope(N); Assert(N.getTag() == dwarf::DW_TAG_array_type || N.getTag() == dwarf::DW_TAG_structure_type || N.getTag() == dwarf::DW_TAG_union_type || N.getTag() == dwarf::DW_TAG_enumeration_type || - N.getTag() == dwarf::DW_TAG_subroutine_type || N.getTag() == dwarf::DW_TAG_class_type, "invalid tag", &N); + Assert(isScopeRef(N, N.getScope()), "invalid scope", &N, N.getScope()); + Assert(isTypeRef(N, N.getBaseType()), "invalid base type", &N, + N.getBaseType()); + Assert(!N.getRawElements() || isa(N.getRawElements()), "invalid composite elements", &N, N.getRawElements()); Assert(isTypeRef(N, N.getRawVTableHolder()), "invalid vtable holder", &N, N.getRawVTableHolder()); - Assert(!N.getRawElements() || isa(N.getRawElements()), - "invalid composite elements", &N, N.getRawElements()); Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags", &N); if (auto *Params = N.getRawTemplateParams()) visitTemplateParams(N, *Params); + + if (N.getTag() == dwarf::DW_TAG_class_type || + N.getTag() == dwarf::DW_TAG_union_type) { + Assert(N.getFile() && !N.getFile()->getFilename().empty(), + "class/union requires a filename", &N, N.getFile()); + } } void Verifier::visitDISubroutineType(const DISubroutineType &N) { @@ -885,6 +876,7 @@ void Verifier::visitDIFile(const DIFile &N) { } void Verifier::visitDICompileUnit(const DICompileUnit &N) { + Assert(N.isDistinct(), "compile units must be distinct", &N); Assert(N.getTag() == dwarf::DW_TAG_compile_unit, "invalid tag", &N); // Don't bother verifying the compilation directory or producer string @@ -928,6 +920,12 @@ void Verifier::visitDICompileUnit(const DICompileUnit &N) { Op); } } + if (auto *Array = N.getRawMacros()) { + Assert(isa(Array), "invalid macro list", &N, Array); + for (Metadata *Op : N.getMacros()->operands()) { + Assert(Op && isa(Op), "invalid macro ref", &N, Op); + } + } } void Verifier::visitDISubprogram(const DISubprogram &N) { @@ -937,13 +935,6 @@ void Verifier::visitDISubprogram(const DISubprogram &N) { Assert(isa(T), "invalid subroutine type", &N, T); Assert(isTypeRef(N, N.getRawContainingType()), "invalid containing type", &N, N.getRawContainingType()); - if (auto *RawF = N.getRawFunction()) { - auto *FMD = dyn_cast(RawF); - auto *F = FMD ? FMD->getValue() : nullptr; - auto *FT = F ? dyn_cast(F->getType()) : nullptr; - Assert(F && FT && isa(FT->getElementType()), - "invalid function", &N, F, FT); - } if (auto *Params = N.getRawTemplateParams()) visitTemplateParams(N, *Params); if (auto *S = N.getRawDeclaration()) { @@ -961,40 +952,8 @@ void Verifier::visitDISubprogram(const DISubprogram &N) { Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags", &N); - auto *F = N.getFunction(); - if (!F) - return; - - // Check that all !dbg attachments lead to back to N (or, at least, another - // subprogram that describes the same function). - // - // FIXME: Check this incrementally while visiting !dbg attachments. - // FIXME: Only check when N is the canonical subprogram for F. - SmallPtrSet Seen; - for (auto &BB : *F) - for (auto &I : BB) { - // Be careful about using DILocation here since we might be dealing with - // broken code (this is the Verifier after all). - DILocation *DL = - dyn_cast_or_null(I.getDebugLoc().getAsMDNode()); - if (!DL) - continue; - if (!Seen.insert(DL).second) - continue; - - DILocalScope *Scope = DL->getInlinedAtScope(); - if (Scope && !Seen.insert(Scope).second) - continue; - - DISubprogram *SP = Scope ? Scope->getSubprogram() : nullptr; - if (SP && !Seen.insert(SP).second) - continue; - - // FIXME: Once N is canonical, check "SP == &N". - Assert(SP->describes(F), - "!dbg attachment points at wrong subprogram for function", &N, F, - &I, DL, Scope, SP); - } + if (N.isDefinition()) + Assert(N.isDistinct(), "subprogram definitions must be distinct", &N); } void Verifier::visitDILexicalBlockBase(const DILexicalBlockBase &N) { @@ -1020,6 +979,27 @@ void Verifier::visitDINamespace(const DINamespace &N) { Assert(isa(S), "invalid scope ref", &N, S); } +void Verifier::visitDIMacro(const DIMacro &N) { + Assert(N.getMacinfoType() == dwarf::DW_MACINFO_define || + N.getMacinfoType() == dwarf::DW_MACINFO_undef, + "invalid macinfo type", &N); + Assert(!N.getName().empty(), "anonymous macro", &N); +} + +void Verifier::visitDIMacroFile(const DIMacroFile &N) { + Assert(N.getMacinfoType() == dwarf::DW_MACINFO_start_file, + "invalid macinfo type", &N); + if (auto *F = N.getRawFile()) + Assert(isa(F), "invalid file", &N, F); + + if (auto *Array = N.getRawElements()) { + Assert(isa(Array), "invalid macro list", &N, Array); + for (Metadata *Op : N.getElements()->operands()) { + Assert(Op && isa(Op), "invalid macro ref", &N, Op); + } + } +} + void Verifier::visitDIModule(const DIModule &N) { Assert(N.getTag() == dwarf::DW_TAG_module, "invalid tag", &N); Assert(!N.getName().empty(), "anonymous module", &N); @@ -1075,9 +1055,7 @@ void Verifier::visitDILocalVariable(const DILocalVariable &N) { // Checks common to all variables. visitDIVariable(N); - Assert(N.getTag() == dwarf::DW_TAG_auto_variable || - N.getTag() == dwarf::DW_TAG_arg_variable, - "invalid tag", &N); + Assert(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N); Assert(N.getRawScope() && isa(N.getRawScope()), "local variable requires a valid scope", &N, N.getRawScope()); } @@ -1274,7 +1252,10 @@ void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx, I->getKindAsEnum() == Attribute::OptimizeNone || I->getKindAsEnum() == Attribute::JumpTable || I->getKindAsEnum() == Attribute::Convergent || - I->getKindAsEnum() == Attribute::ArgMemOnly) { + I->getKindAsEnum() == Attribute::ArgMemOnly || + I->getKindAsEnum() == Attribute::NoRecurse || + I->getKindAsEnum() == Attribute::InaccessibleMemOnly || + I->getKindAsEnum() == Attribute::InaccessibleMemOrArgMemOnly) { if (!isFunction) { CheckFailed("Attribute '" + I->getAsString() + "' only applies to functions!", V); @@ -1365,7 +1346,7 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty, V); if (PointerType *PTy = dyn_cast(Ty)) { - SmallPtrSet Visited; + SmallPtrSet Visited; if (!PTy->getElementType()->isSized(&Visited)) { Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal) && !Attrs.hasAttribute(Idx, Attribute::InAlloca), @@ -1444,6 +1425,18 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs, Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly)), "Attributes 'readnone and readonly' are incompatible!", V); + Assert( + !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone) && + Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::InaccessibleMemOrArgMemOnly)), + "Attributes 'readnone and inaccessiblemem_or_argmemonly' are incompatible!", V); + + Assert( + !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone) && + Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::InaccessibleMemOnly)), + "Attributes 'readnone and inaccessiblememonly' are incompatible!", V); + Assert( !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::NoInline) && Attrs.hasAttribute(AttributeSet::FunctionIndex, @@ -1501,7 +1494,35 @@ void Verifier::VerifyFunctionMetadata( } } -void Verifier::VerifyConstantExprBitcastType(const ConstantExpr *CE) { +void Verifier::visitConstantExprsRecursively(const Constant *EntryC) { + if (!ConstantExprVisited.insert(EntryC).second) + return; + + SmallVector Stack; + Stack.push_back(EntryC); + + while (!Stack.empty()) { + const Constant *C = Stack.pop_back_val(); + + // Check this constant expression. + if (const auto *CE = dyn_cast(C)) + visitConstantExpr(CE); + + // Visit all sub-expressions. + for (const Use &U : C->operands()) { + const auto *OpC = dyn_cast(U); + if (!OpC) + continue; + if (isa(OpC)) + continue; // Global values get visited separately. + if (!ConstantExprVisited.insert(OpC).second) + continue; + Stack.push_back(OpC); + } + } +} + +void Verifier::visitConstantExpr(const ConstantExpr *CE) { if (CE->getOpcode() != Instruction::BitCast) return; @@ -1554,17 +1575,11 @@ void Verifier::VerifyStatepoint(ImmutableCallSite CS) { &CI); const Value *Target = CS.getArgument(2); - const PointerType *PT = dyn_cast(Target->getType()); + auto *PT = dyn_cast(Target->getType()); Assert(PT && PT->getElementType()->isFunctionTy(), "gc.statepoint callee must be of function pointer type", &CI, Target); FunctionType *TargetFuncType = cast(PT->getElementType()); - if (NumPatchBytes) - Assert(isa(Target->stripPointerCasts()), - "gc.statepoint must have null as call target if number of patchable " - "bytes is non zero", - &CI); - const Value *NumCallArgsV = CS.getArgument(3); Assert(isa(NumCallArgsV), "gc.statepoint number of arguments to underlying call " @@ -1743,17 +1758,33 @@ void Verifier::visitFunction(const Function &F) { FT->getParamType(i)); Assert(I->getType()->isFirstClassType(), "Function arguments must have first-class types!", I); - if (!isLLVMdotName) + if (!isLLVMdotName) { Assert(!I->getType()->isMetadataTy(), "Function takes metadata but isn't an intrinsic", I, &F); + Assert(!I->getType()->isTokenTy(), + "Function takes token but isn't an intrinsic", I, &F); + } } + if (!isLLVMdotName) + Assert(!F.getReturnType()->isTokenTy(), + "Functions returns a token but isn't an intrinsic", &F); + // Get the function metadata attachments. SmallVector, 4> MDs; F.getAllMetadata(MDs); assert(F.hasMetadata() != MDs.empty() && "Bit out-of-sync"); VerifyFunctionMetadata(MDs); + // Check validity of the personality function + if (F.hasPersonalityFn()) { + auto *Per = dyn_cast(F.getPersonalityFn()->stripPointerCasts()); + if (Per) + Assert(Per->getParent() == F.getParent(), + "Referencing personality function in another module!", + &F, F.getParent(), Per, Per->getParent()); + } + if (F.isMaterializable()) { // Function has a body somewhere we can't see. Assert(MDs.empty(), "unmaterialized function cannot have metadata", &F, @@ -1782,13 +1813,27 @@ void Verifier::visitFunction(const Function &F) { } // Visit metadata attachments. - for (const auto &I : MDs) + for (const auto &I : MDs) { + // Verify that the attachment is legal. + switch (I.first) { + default: + break; + case LLVMContext::MD_dbg: + Assert(isa(I.second), + "function !dbg attachment must be a subprogram", &F, I.second); + break; + } + + // Verify the metadata itself. visitMDNode(*I.second); + } } // If this function is actually an intrinsic, verify that it is only used in // direct call/invokes, never having its "address taken". - if (F.getIntrinsicID()) { + // Only do this if the module is materialized, otherwise we don't have all the + // uses. + if (F.getIntrinsicID() && F.getParent()->isMaterialized()) { const User *U; if (F.hasAddressTaken(&U)) Assert(0, "Invalid user of intrinsic instruction!", U); @@ -1798,6 +1843,44 @@ void Verifier::visitFunction(const Function &F) { (F.isDeclaration() && F.hasExternalLinkage()) || F.hasAvailableExternallyLinkage(), "Function is marked as dllimport, but not external.", &F); + + auto *N = F.getSubprogram(); + if (!N) + return; + + // Check that all !dbg attachments lead to back to N (or, at least, another + // subprogram that describes the same function). + // + // FIXME: Check this incrementally while visiting !dbg attachments. + // FIXME: Only check when N is the canonical subprogram for F. + SmallPtrSet Seen; + for (auto &BB : F) + for (auto &I : BB) { + // Be careful about using DILocation here since we might be dealing with + // broken code (this is the Verifier after all). + DILocation *DL = + dyn_cast_or_null(I.getDebugLoc().getAsMDNode()); + if (!DL) + continue; + if (!Seen.insert(DL).second) + continue; + + DILocalScope *Scope = DL->getInlinedAtScope(); + if (Scope && !Seen.insert(Scope).second) + continue; + + DISubprogram *SP = Scope ? Scope->getSubprogram() : nullptr; + + // Scope and SP could be the same MDNode and we don't want to skip + // validation in that case + if (SP && ((Scope != SP) && !Seen.insert(SP).second)) + continue; + + // FIXME: Once N is canonical, check "SP == &N". + Assert(SP->describes(&F), + "!dbg attachment points at wrong subprogram for function", N, &F, + &I, DL, Scope, SP); + } } // verifyBasicBlock - Verify that a basic block is well formed... @@ -2194,6 +2277,9 @@ void Verifier::visitPHINode(PHINode &PN) { isa(--BasicBlock::iterator(&PN)), "PHI nodes not grouped at top of basic block!", &PN, PN.getParent()); + // Check that a PHI doesn't yield a Token. + Assert(!PN.getType()->isTokenTy(), "PHI nodes cannot have token type!"); + // Check that all of the values of the PHI node have the same type as the // result, and that the incoming blocks are really basic blocks. for (Value *IncValue : PN.incoming_values()) { @@ -2296,16 +2382,44 @@ void Verifier::VerifyCallSite(CallSite CS) { // Verify that there's no metadata unless it's a direct call to an intrinsic. if (CS.getCalledFunction() == nullptr || !CS.getCalledFunction()->getName().startswith("llvm.")) { - for (FunctionType::param_iterator PI = FTy->param_begin(), - PE = FTy->param_end(); PI != PE; ++PI) - Assert(!(*PI)->isMetadataTy(), + for (Type *ParamTy : FTy->params()) { + Assert(!ParamTy->isMetadataTy(), "Function has metadata parameter but isn't an intrinsic", I); + Assert(!ParamTy->isTokenTy(), + "Function has token parameter but isn't an intrinsic", I); + } } + // Verify that indirect calls don't return tokens. + if (CS.getCalledFunction() == nullptr) + Assert(!FTy->getReturnType()->isTokenTy(), + "Return type cannot be token for indirect call!"); + if (Function *F = CS.getCalledFunction()) if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) visitIntrinsicCallSite(ID, CS); + // Verify that a callsite has at most one "deopt" and one "funclet" operand + // bundle. + bool FoundDeoptBundle = false, FoundFuncletBundle = false; + for (unsigned i = 0, e = CS.getNumOperandBundles(); i < e; ++i) { + OperandBundleUse BU = CS.getOperandBundleAt(i); + uint32_t Tag = BU.getTagID(); + if (Tag == LLVMContext::OB_deopt) { + Assert(!FoundDeoptBundle, "Multiple deopt operand bundles", I); + FoundDeoptBundle = true; + } + if (Tag == LLVMContext::OB_funclet) { + Assert(!FoundFuncletBundle, "Multiple funclet operand bundles", I); + FoundFuncletBundle = true; + Assert(BU.Inputs.size() == 1, + "Expected exactly one funclet bundle operand", I); + Assert(isa(BU.Inputs.front()), + "Funclet bundle operands should correspond to a FuncletPadInst", + I); + } + } + visitInstruction(*I); } @@ -2406,10 +2520,12 @@ void Verifier::visitCallInst(CallInst &CI) { void Verifier::visitInvokeInst(InvokeInst &II) { VerifyCallSite(&II); - // Verify that there is a landingpad instruction as the first non-PHI - // instruction of the 'unwind' destination. - Assert(II.getUnwindDest()->isLandingPad(), - "The unwind destination does not have a landingpad instruction!", &II); + // Verify that the first non-PHI instruction of the unwind destination is an + // exception handling instruction. + Assert( + II.getUnwindDest()->isEHPad(), + "The unwind destination does not have an exception handling instruction!", + &II); visitTerminatorInst(II); } @@ -2622,6 +2738,14 @@ void Verifier::visitRangeMetadata(Instruction& I, } } +void Verifier::checkAtomicMemAccessSize(const Module *M, Type *Ty, + const Instruction *I) { + unsigned Size = M->getDataLayout().getTypeSizeInBits(Ty); + Assert(Size >= 8, "atomic memory access' size must be byte-sized", Ty, I); + Assert(!(Size & (Size - 1)), + "atomic memory access' operand must have a power-of-two size", Ty, I); +} + void Verifier::visitLoadInst(LoadInst &LI) { PointerType *PTy = dyn_cast(LI.getOperand(0)->getType()); Assert(PTy, "Load operand must be a pointer.", &LI); @@ -2633,14 +2757,12 @@ void Verifier::visitLoadInst(LoadInst &LI) { "Load cannot have Release ordering", &LI); Assert(LI.getAlignment() != 0, "Atomic load must specify explicit alignment", &LI); - if (!ElTy->isPointerTy()) { - Assert(ElTy->isIntegerTy(), "atomic load operand must have integer type!", - &LI, ElTy); - unsigned Size = ElTy->getPrimitiveSizeInBits(); - Assert(Size >= 8 && !(Size & (Size - 1)), - "atomic load operand must be power-of-two byte-sized integer", &LI, - ElTy); - } + Assert(ElTy->isIntegerTy() || ElTy->isPointerTy() || + ElTy->isFloatingPointTy(), + "atomic load operand must have integer, pointer, or floating point " + "type!", + ElTy, &LI); + checkAtomicMemAccessSize(M, ElTy, &LI); } else { Assert(LI.getSynchScope() == CrossThread, "Non-atomic load cannot have SynchronizationScope specified", &LI); @@ -2662,14 +2784,12 @@ void Verifier::visitStoreInst(StoreInst &SI) { "Store cannot have Acquire ordering", &SI); Assert(SI.getAlignment() != 0, "Atomic store must specify explicit alignment", &SI); - if (!ElTy->isPointerTy()) { - Assert(ElTy->isIntegerTy(), - "atomic store operand must have integer type!", &SI, ElTy); - unsigned Size = ElTy->getPrimitiveSizeInBits(); - Assert(Size >= 8 && !(Size & (Size - 1)), - "atomic store operand must be power-of-two byte-sized integer", - &SI, ElTy); - } + Assert(ElTy->isIntegerTy() || ElTy->isPointerTy() || + ElTy->isFloatingPointTy(), + "atomic store operand must have integer, pointer, or floating point " + "type!", + ElTy, &SI); + checkAtomicMemAccessSize(M, ElTy, &SI); } else { Assert(SI.getSynchScope() == CrossThread, "Non-atomic store cannot have SynchronizationScope specified", &SI); @@ -2678,7 +2798,7 @@ void Verifier::visitStoreInst(StoreInst &SI) { } void Verifier::visitAllocaInst(AllocaInst &AI) { - SmallPtrSet Visited; + SmallPtrSet Visited; PointerType *PTy = AI.getType(); Assert(PTy->getAddressSpace() == 0, "Allocation instruction pointer not in the generic address space!", @@ -2716,9 +2836,7 @@ void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) { Type *ElTy = PTy->getElementType(); Assert(ElTy->isIntegerTy(), "cmpxchg operand must have integer type!", &CXI, ElTy); - unsigned Size = ElTy->getPrimitiveSizeInBits(); - Assert(Size >= 8 && !(Size & (Size - 1)), - "cmpxchg operand must be power-of-two byte-sized integer", &CXI, ElTy); + checkAtomicMemAccessSize(M, ElTy, &CXI); Assert(ElTy == CXI.getOperand(1)->getType(), "Expected value type does not match pointer operand type!", &CXI, ElTy); @@ -2737,10 +2855,7 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) { Type *ElTy = PTy->getElementType(); Assert(ElTy->isIntegerTy(), "atomicrmw operand must have integer type!", &RMWI, ElTy); - unsigned Size = ElTy->getPrimitiveSizeInBits(); - Assert(Size >= 8 && !(Size & (Size - 1)), - "atomicrmw operand must be power-of-two byte-sized integer", &RMWI, - ElTy); + checkAtomicMemAccessSize(M, ElTy, &RMWI); Assert(ElTy == RMWI.getOperand(1)->getType(), "Argument value type does not match pointer operand type!", &RMWI, ElTy); @@ -2777,23 +2892,62 @@ void Verifier::visitInsertValueInst(InsertValueInst &IVI) { visitInstruction(IVI); } -void Verifier::visitLandingPadInst(LandingPadInst &LPI) { - BasicBlock *BB = LPI.getParent(); +void Verifier::visitEHPadPredecessors(Instruction &I) { + assert(I.isEHPad()); + BasicBlock *BB = I.getParent(); + Function *F = BB->getParent(); + + Assert(BB != &F->getEntryBlock(), "EH pad cannot be in entry block.", &I); + + if (auto *LPI = dyn_cast(&I)) { + // The landingpad instruction defines its parent as a landing pad block. The + // landing pad block may be branched to only by the unwind edge of an + // invoke. + for (BasicBlock *PredBB : predecessors(BB)) { + const auto *II = dyn_cast(PredBB->getTerminator()); + Assert(II && II->getUnwindDest() == BB && II->getNormalDest() != BB, + "Block containing LandingPadInst must be jumped to " + "only by the unwind edge of an invoke.", + LPI); + } + return; + } + if (auto *CPI = dyn_cast(&I)) { + if (!pred_empty(BB)) + Assert(BB->getUniquePredecessor() == CPI->getCatchSwitch()->getParent(), + "Block containg CatchPadInst must be jumped to " + "only by its catchswitch.", + CPI); + return; + } + + for (BasicBlock *PredBB : predecessors(BB)) { + TerminatorInst *TI = PredBB->getTerminator(); + if (auto *II = dyn_cast(TI)) { + Assert(II->getUnwindDest() == BB && II->getNormalDest() != BB, + "EH pad must be jumped to via an unwind edge", &I, II); + } else if (!isa(TI) && !isa(TI)) { + Assert(false, "EH pad must be jumped to via an unwind edge", &I, TI); + } + } +} + +void Verifier::visitLandingPadInst(LandingPadInst &LPI) { // The landingpad instruction is ill-formed if it doesn't have any clauses and // isn't a cleanup. Assert(LPI.getNumClauses() > 0 || LPI.isCleanup(), "LandingPadInst needs at least one clause or to be a cleanup.", &LPI); - // The landingpad instruction defines its parent as a landing pad block. The - // landing pad block may be branched to only by the unwind edge of an invoke. - for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { - const InvokeInst *II = dyn_cast((*I)->getTerminator()); - Assert(II && II->getUnwindDest() == BB && II->getNormalDest() != BB, - "Block containing LandingPadInst must be jumped to " - "only by the unwind edge of an invoke.", + visitEHPadPredecessors(LPI); + + if (!LandingPadResultTy) + LandingPadResultTy = LPI.getType(); + else + Assert(LandingPadResultTy == LPI.getType(), + "The landingpad instruction should have a consistent result type " + "inside a function.", &LPI); - } Function *F = LPI.getParent()->getParent(); Assert(F->hasPersonalityFn(), @@ -2820,6 +2974,132 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) { visitInstruction(LPI); } +void Verifier::visitCatchPadInst(CatchPadInst &CPI) { + visitEHPadPredecessors(CPI); + + BasicBlock *BB = CPI.getParent(); + + Function *F = BB->getParent(); + Assert(F->hasPersonalityFn(), + "CatchPadInst needs to be in a function with a personality.", &CPI); + + Assert(isa(CPI.getParentPad()), + "CatchPadInst needs to be directly nested in a CatchSwitchInst.", + CPI.getParentPad()); + + // The catchpad instruction must be the first non-PHI instruction in the + // block. + Assert(BB->getFirstNonPHI() == &CPI, + "CatchPadInst not the first non-PHI instruction in the block.", &CPI); + + visitInstruction(CPI); +} + +void Verifier::visitCatchReturnInst(CatchReturnInst &CatchReturn) { + Assert(isa(CatchReturn.getOperand(0)), + "CatchReturnInst needs to be provided a CatchPad", &CatchReturn, + CatchReturn.getOperand(0)); + + visitTerminatorInst(CatchReturn); +} + +void Verifier::visitCleanupPadInst(CleanupPadInst &CPI) { + visitEHPadPredecessors(CPI); + + BasicBlock *BB = CPI.getParent(); + + Function *F = BB->getParent(); + Assert(F->hasPersonalityFn(), + "CleanupPadInst needs to be in a function with a personality.", &CPI); + + // The cleanuppad instruction must be the first non-PHI instruction in the + // block. + Assert(BB->getFirstNonPHI() == &CPI, + "CleanupPadInst not the first non-PHI instruction in the block.", + &CPI); + + auto *ParentPad = CPI.getParentPad(); + Assert(isa(ParentPad) || isa(ParentPad) || + isa(ParentPad) || isa(ParentPad), + "CleanupPadInst has an invalid parent.", &CPI); + + User *FirstUser = nullptr; + BasicBlock *FirstUnwindDest = nullptr; + for (User *U : CPI.users()) { + BasicBlock *UnwindDest; + if (CleanupReturnInst *CRI = dyn_cast(U)) { + UnwindDest = CRI->getUnwindDest(); + } else if (isa(U) || isa(U)) { + continue; + } else if (CallSite(U)) { + continue; + } else { + Assert(false, "bogus cleanuppad use", &CPI); + } + + if (!FirstUser) { + FirstUser = U; + FirstUnwindDest = UnwindDest; + } else { + Assert( + UnwindDest == FirstUnwindDest, + "cleanupret instructions from the same cleanuppad must have the same " + "unwind destination", + FirstUser, U); + } + } + + visitInstruction(CPI); +} + +void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) { + visitEHPadPredecessors(CatchSwitch); + + BasicBlock *BB = CatchSwitch.getParent(); + + Function *F = BB->getParent(); + Assert(F->hasPersonalityFn(), + "CatchSwitchInst needs to be in a function with a personality.", + &CatchSwitch); + + // The catchswitch instruction must be the first non-PHI instruction in the + // block. + Assert(BB->getFirstNonPHI() == &CatchSwitch, + "CatchSwitchInst not the first non-PHI instruction in the block.", + &CatchSwitch); + + if (BasicBlock *UnwindDest = CatchSwitch.getUnwindDest()) { + Instruction *I = UnwindDest->getFirstNonPHI(); + Assert(I->isEHPad() && !isa(I), + "CatchSwitchInst must unwind to an EH block which is not a " + "landingpad.", + &CatchSwitch); + } + + auto *ParentPad = CatchSwitch.getParentPad(); + Assert(isa(ParentPad) || isa(ParentPad) || + isa(ParentPad) || isa(ParentPad), + "CatchSwitchInst has an invalid parent.", ParentPad); + + visitTerminatorInst(CatchSwitch); +} + +void Verifier::visitCleanupReturnInst(CleanupReturnInst &CRI) { + Assert(isa(CRI.getOperand(0)), + "CleanupReturnInst needs to be provided a CleanupPad", &CRI, + CRI.getOperand(0)); + + if (BasicBlock *UnwindDest = CRI.getUnwindDest()) { + Instruction *I = UnwindDest->getFirstNonPHI(); + Assert(I->isEHPad() && !isa(I), + "CleanupReturnInst must unwind to an EH block which is not a " + "landingpad.", + &CRI); + } + + visitTerminatorInst(CRI); +} + void Verifier::verifyDominatesUse(Instruction &I, unsigned i) { Instruction *Op = cast(I.getOperand(i)); // If the we have an invalid invoke, don't try to compute the dominance. @@ -2835,6 +3115,19 @@ void Verifier::verifyDominatesUse(Instruction &I, unsigned i) { "Instruction does not dominate all uses!", Op, &I); } +void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) { + Assert(I.getType()->isPointerTy(), "dereferenceable, dereferenceable_or_null " + "apply only to pointer types", &I); + Assert(isa(I), + "dereferenceable, dereferenceable_or_null apply only to load" + " instructions, use attributes for calls or invokes", &I); + Assert(MD->getNumOperands() == 1, "dereferenceable, dereferenceable_or_null " + "take one operand!", &I); + ConstantInt *CI = mdconst::dyn_extract(MD->getOperand(0)); + Assert(CI && CI->getType()->isIntegerTy(64), "dereferenceable, " + "dereferenceable_or_null metadata value must be an i64!", &I); +} + /// verifyInstruction - Verify that an instruction is well formed. /// void Verifier::visitInstruction(Instruction &I) { @@ -2903,7 +3196,7 @@ void Verifier::visitInstruction(Instruction &I) { " donothing or patchpoint", &I); Assert(F->getParent() == M, "Referencing function in another module!", - &I); + &I, M, F, F->getParent()); } else if (BasicBlock *OpBB = dyn_cast(I.getOperand(i))) { Assert(OpBB->getParent() == BB->getParent(), "Referring to a basic block in another function!", &I); @@ -2911,7 +3204,7 @@ void Verifier::visitInstruction(Instruction &I) { Assert(OpArg->getParent() == BB->getParent(), "Referring to an argument in another function!", &I); } else if (GlobalValue *GV = dyn_cast(I.getOperand(i))) { - Assert(GV->getParent() == M, "Referencing global in another module!", &I); + Assert(GV->getParent() == M, "Referencing global in another module!", &I, M, GV, GV->getParent()); } else if (isa(I.getOperand(i))) { verifyDominatesUse(I, i); } else if (isa(I.getOperand(i))) { @@ -2922,22 +3215,7 @@ void Verifier::visitInstruction(Instruction &I) { if (CE->getType()->isPtrOrPtrVectorTy()) { // If we have a ConstantExpr pointer, we need to see if it came from an // illegal bitcast (inttoptr ) - SmallVector Stack; - SmallPtrSet Visited; - Stack.push_back(CE); - - while (!Stack.empty()) { - const ConstantExpr *V = Stack.pop_back_val(); - if (!Visited.insert(V).second) - continue; - - VerifyConstantExprBitcastType(V); - - for (unsigned I = 0, N = V->getNumOperands(); I != N; ++I) { - if (ConstantExpr *Op = dyn_cast(V->getOperand(I))) - Stack.push_back(Op); - } - } + visitConstantExprsRecursively(CE); } } } @@ -2971,6 +3249,28 @@ void Verifier::visitInstruction(Instruction &I) { &I); } + if (MDNode *MD = I.getMetadata(LLVMContext::MD_dereferenceable)) + visitDereferenceableMetadata(I, MD); + + if (MDNode *MD = I.getMetadata(LLVMContext::MD_dereferenceable_or_null)) + visitDereferenceableMetadata(I, MD); + + if (MDNode *AlignMD = I.getMetadata(LLVMContext::MD_align)) { + Assert(I.getType()->isPointerTy(), "align applies only to pointer types", + &I); + Assert(isa(I), "align applies only to load instructions, " + "use attributes for calls or invokes", &I); + Assert(AlignMD->getNumOperands() == 1, "align takes one operand!", &I); + ConstantInt *CI = mdconst::dyn_extract(AlignMD->getOperand(0)); + Assert(CI && CI->getType()->isIntegerTy(64), + "align metadata value must be an i64!", &I); + uint64_t Align = CI->getZExtValue(); + Assert(isPowerOf2_64(Align), + "align metadata value must be a power of 2!", &I); + Assert(Align <= Value::MaximumAlignment, + "alignment is larger that implementation defined limit", &I); + } + if (MDNode *N = I.getDebugLoc().getAsMDNode()) { Assert(isa(N), "invalid !dbg metadata attachment", &I, N); visitMDNode(*N); @@ -2998,6 +3298,7 @@ bool Verifier::VerifyIntrinsicType(Type *Ty, case IITDescriptor::Void: return !Ty->isVoidTy(); case IITDescriptor::VarArg: return true; case IITDescriptor::MMX: return !Ty->isX86_MMXTy(); + case IITDescriptor::Token: return !Ty->isTokenTy(); case IITDescriptor::Metadata: return !Ty->isMetadataTy(); case IITDescriptor::Half: return !Ty->isHalfTy(); case IITDescriptor::Float: return !Ty->isFloatTy(); @@ -3321,9 +3622,6 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { VerifyStatepoint(CS); break; - case Intrinsic::experimental_gc_result_int: - case Intrinsic::experimental_gc_result_float: - case Intrinsic::experimental_gc_result_ptr: case Intrinsic::experimental_gc_result: { Assert(CS.getParent()->getParent()->hasGC(), "Enclosing function does not use GC.", CS); @@ -3339,9 +3637,8 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { // Assert that result type matches wrapped callee. const Value *Target = StatepointCS.getArgument(2); - const PointerType *PT = cast(Target->getType()); - const FunctionType *TargetFuncType = - cast(PT->getElementType()); + auto *PT = cast(Target->getType()); + auto *TargetFuncType = cast(PT->getElementType()); Assert(CS.getType() == TargetFuncType->getReturnType(), "gc.result result type does not match wrapped callee", CS); break; @@ -3352,19 +3649,16 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { // Check that this relocate is correctly tied to the statepoint // This is case for relocate on the unwinding path of an invoke statepoint - if (ExtractValueInst *ExtractValue = - dyn_cast(CS.getArgOperand(0))) { - Assert(isa(ExtractValue->getAggregateOperand()), - "gc relocate on unwind path incorrectly linked to the statepoint", - CS); + if (LandingPadInst *LandingPad = + dyn_cast(CS.getArgOperand(0))) { const BasicBlock *InvokeBB = - ExtractValue->getParent()->getUniquePredecessor(); + LandingPad->getParent()->getUniquePredecessor(); // Landingpad relocates should have only one predecessor with invoke // statepoint terminator Assert(InvokeBB, "safepoints should have unique landingpads", - ExtractValue->getParent()); + LandingPad->getParent()); Assert(InvokeBB->getTerminator(), "safepoint block should be well formed", InvokeBB); Assert(isStatepoint(InvokeBB->getTerminator()), @@ -3448,6 +3742,12 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { "gc.relocate: relocating a pointer shouldn't change its address space", CS); break; } + case Intrinsic::eh_exceptioncode: + case Intrinsic::eh_exceptionpointer: { + Assert(isa(CS.getArgOperand(0)), + "eh.exceptionpointer argument must be a catchpad", CS); + break; + } }; } @@ -3598,7 +3898,7 @@ void Verifier::verifyTypeRefs() { for (auto *CU : CUs->operands()) if (auto Ts = cast(CU)->getRetainedTypes()) for (DIType *Op : Ts) - if (auto *T = dyn_cast(Op)) + if (auto *T = dyn_cast_or_null(Op)) if (auto *S = T->getRawIdentifier()) { UnresolvedTypeRefs.erase(S); TypeRefs.insert(std::make_pair(S, T)); diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp index 43fee65db7f5..9b243fc571d0 100644 --- a/lib/IRReader/IRReader.cpp +++ b/lib/IRReader/IRReader.cpp @@ -31,11 +31,11 @@ static const char *const TimeIRParsingName = "Parse IR"; static std::unique_ptr getLazyIRModule(std::unique_ptr Buffer, SMDiagnostic &Err, - LLVMContext &Context) { + LLVMContext &Context, bool ShouldLazyLoadMetadata) { if (isBitcode((const unsigned char *)Buffer->getBufferStart(), (const unsigned char *)Buffer->getBufferEnd())) { - ErrorOr> ModuleOrErr = - getLazyBitcodeModule(std::move(Buffer), Context); + ErrorOr> ModuleOrErr = getLazyBitcodeModule( + std::move(Buffer), Context, ShouldLazyLoadMetadata); if (std::error_code EC = ModuleOrErr.getError()) { Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error, EC.message()); @@ -49,7 +49,8 @@ getLazyIRModule(std::unique_ptr Buffer, SMDiagnostic &Err, std::unique_ptr llvm::getLazyIRFileModule(StringRef Filename, SMDiagnostic &Err, - LLVMContext &Context) { + LLVMContext &Context, + bool ShouldLazyLoadMetadata) { ErrorOr> FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename); if (std::error_code EC = FileOrErr.getError()) { @@ -58,7 +59,8 @@ std::unique_ptr llvm::getLazyIRFileModule(StringRef Filename, return nullptr; } - return getLazyIRModule(std::move(FileOrErr.get()), Err, Context); + return getLazyIRModule(std::move(FileOrErr.get()), Err, Context, + ShouldLazyLoadMetadata); } std::unique_ptr llvm::parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err, diff --git a/lib/LTO/LLVMBuild.txt b/lib/LTO/LLVMBuild.txt index ea79d5e6a83a..7a0ad50fb94c 100644 --- a/lib/LTO/LLVMBuild.txt +++ b/lib/LTO/LLVMBuild.txt @@ -25,7 +25,6 @@ required_libraries = BitWriter CodeGen Core - IPA IPO InstCombine Linker diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 25ae4ac76e3c..6baaaa4b1395 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/CodeGen/ParallelCG.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/Config/config.h" #include "llvm/IR/Constants.h" @@ -63,47 +64,15 @@ const char* LTOCodeGenerator::getVersionString() { #endif } -static void handleLTODiagnostic(const DiagnosticInfo &DI) { - DiagnosticPrinterRawOStream DP(errs()); - DI.print(DP); - errs() << "\n"; -} - -LTOCodeGenerator::LTOCodeGenerator() - : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context), - handleLTODiagnostic) { +LTOCodeGenerator::LTOCodeGenerator(LLVMContext &Context) + : Context(Context), MergedModule(new Module("ld-temp.o", Context)), + TheLinker(new Linker(*MergedModule)) { initializeLTOPasses(); } -LTOCodeGenerator::LTOCodeGenerator(std::unique_ptr Context) - : OwnedContext(std::move(Context)), Context(*OwnedContext), - IRLinker(new Module("ld-temp.o", *OwnedContext), handleLTODiagnostic) { - initializeLTOPasses(); -} +LTOCodeGenerator::~LTOCodeGenerator() {} -void LTOCodeGenerator::destroyMergedModule() { - if (OwnedModule) { - assert(IRLinker.getModule() == &OwnedModule->getModule() && - "The linker's module should be the same as the owned module"); - delete OwnedModule; - OwnedModule = nullptr; - } else if (IRLinker.getModule()) - IRLinker.deleteModule(); -} - -LTOCodeGenerator::~LTOCodeGenerator() { - destroyMergedModule(); - - delete TargetMach; - TargetMach = nullptr; - - for (std::vector::iterator I = CodegenOptions.begin(), - E = CodegenOptions.end(); - I != E; ++I) - free(*I); -} - -// Initialize LTO passes. Please keep this funciton in sync with +// Initialize LTO passes. Please keep this function in sync with // PassManagerBuilder::populateLTOPassManager(), and make sure all LTO // passes are initialized. void LTOCodeGenerator::initializeLTOPasses() { @@ -120,11 +89,11 @@ void LTOCodeGenerator::initializeLTOPasses() { initializeGlobalDCEPass(R); initializeArgPromotionPass(R); initializeJumpThreadingPass(R); - initializeSROAPass(R); + initializeSROALegacyPassPass(R); initializeSROA_DTPass(R); initializeSROA_SSAUpPass(R); initializeFunctionAttrsPass(R); - initializeGlobalsModRefPass(R); + initializeGlobalsAAWrapperPassPass(R); initializeLICMPass(R); initializeMergedLoadStoreMotionPass(R); initializeGVNPass(R); @@ -133,41 +102,39 @@ void LTOCodeGenerator::initializeLTOPasses() { initializeCFGSimplifyPassPass(R); } -bool LTOCodeGenerator::addModule(LTOModule *mod) { - assert(&mod->getModule().getContext() == &Context && +bool LTOCodeGenerator::addModule(LTOModule *Mod) { + assert(&Mod->getModule().getContext() == &Context && "Expected module in same context"); - bool ret = IRLinker.linkInModule(&mod->getModule()); + bool ret = TheLinker->linkInModule(Mod->takeModule()); - const std::vector &undefs = mod->getAsmUndefinedRefs(); + const std::vector &undefs = Mod->getAsmUndefinedRefs(); for (int i = 0, e = undefs.size(); i != e; ++i) AsmUndefinedRefs[undefs[i]] = 1; return !ret; } -void LTOCodeGenerator::setModule(LTOModule *Mod) { +void LTOCodeGenerator::setModule(std::unique_ptr Mod) { assert(&Mod->getModule().getContext() == &Context && "Expected module in same context"); - // Delete the old merged module. - destroyMergedModule(); AsmUndefinedRefs.clear(); - OwnedModule = Mod; - IRLinker.setModule(&Mod->getModule()); + MergedModule = Mod->takeModule(); + TheLinker = make_unique(*MergedModule); const std::vector &Undefs = Mod->getAsmUndefinedRefs(); for (int I = 0, E = Undefs.size(); I != E; ++I) AsmUndefinedRefs[Undefs[I]] = 1; } -void LTOCodeGenerator::setTargetOptions(TargetOptions options) { - Options = options; +void LTOCodeGenerator::setTargetOptions(TargetOptions Options) { + this->Options = Options; } -void LTOCodeGenerator::setDebugInfo(lto_debug_model debug) { - switch (debug) { +void LTOCodeGenerator::setDebugInfo(lto_debug_model Debug) { + switch (Debug) { case LTO_DEBUG_MODEL_NONE: EmitDwarfDebugInfo = false; return; @@ -179,176 +146,8 @@ void LTOCodeGenerator::setDebugInfo(lto_debug_model debug) { llvm_unreachable("Unknown debug format!"); } -void LTOCodeGenerator::setCodePICModel(lto_codegen_model model) { - switch (model) { - case LTO_CODEGEN_PIC_MODEL_STATIC: - case LTO_CODEGEN_PIC_MODEL_DYNAMIC: - case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC: - case LTO_CODEGEN_PIC_MODEL_DEFAULT: - CodeModel = model; - return; - } - llvm_unreachable("Unknown PIC model!"); -} - -bool LTOCodeGenerator::writeMergedModules(const char *path, - std::string &errMsg) { - if (!determineTarget(errMsg)) - return false; - - // mark which symbols can not be internalized - applyScopeRestrictions(); - - // create output file - std::error_code EC; - tool_output_file Out(path, EC, sys::fs::F_None); - if (EC) { - errMsg = "could not open bitcode file for writing: "; - errMsg += path; - return false; - } - - // write bitcode to it - WriteBitcodeToFile(IRLinker.getModule(), Out.os(), ShouldEmbedUselists); - Out.os().close(); - - if (Out.os().has_error()) { - errMsg = "could not write bitcode file: "; - errMsg += path; - Out.os().clear_error(); - return false; - } - - Out.keep(); - return true; -} - -bool LTOCodeGenerator::compileOptimizedToFile(const char **name, - std::string &errMsg) { - // make unique temp .o file to put generated object file - SmallString<128> Filename; - int FD; - std::error_code EC = - sys::fs::createTemporaryFile("lto-llvm", "o", FD, Filename); - if (EC) { - errMsg = EC.message(); - return false; - } - - // generate object file - tool_output_file objFile(Filename.c_str(), FD); - - bool genResult = compileOptimized(objFile.os(), errMsg); - objFile.os().close(); - if (objFile.os().has_error()) { - objFile.os().clear_error(); - sys::fs::remove(Twine(Filename)); - return false; - } - - objFile.keep(); - if (!genResult) { - sys::fs::remove(Twine(Filename)); - return false; - } - - NativeObjectPath = Filename.c_str(); - *name = NativeObjectPath.c_str(); - return true; -} - -std::unique_ptr -LTOCodeGenerator::compileOptimized(std::string &errMsg) { - const char *name; - if (!compileOptimizedToFile(&name, errMsg)) - return nullptr; - - // read .o file into memory buffer - ErrorOr> BufferOrErr = - MemoryBuffer::getFile(name, -1, false); - if (std::error_code EC = BufferOrErr.getError()) { - errMsg = EC.message(); - sys::fs::remove(NativeObjectPath); - return nullptr; - } - - // remove temp files - sys::fs::remove(NativeObjectPath); - - return std::move(*BufferOrErr); -} - - -bool LTOCodeGenerator::compile_to_file(const char **name, - bool disableInline, - bool disableGVNLoadPRE, - bool disableVectorization, - std::string &errMsg) { - if (!optimize(disableInline, disableGVNLoadPRE, - disableVectorization, errMsg)) - return false; - - return compileOptimizedToFile(name, errMsg); -} - -std::unique_ptr -LTOCodeGenerator::compile(bool disableInline, bool disableGVNLoadPRE, - bool disableVectorization, std::string &errMsg) { - if (!optimize(disableInline, disableGVNLoadPRE, - disableVectorization, errMsg)) - return nullptr; - - return compileOptimized(errMsg); -} - -bool LTOCodeGenerator::determineTarget(std::string &errMsg) { - if (TargetMach) - return true; - - std::string TripleStr = IRLinker.getModule()->getTargetTriple(); - if (TripleStr.empty()) - TripleStr = sys::getDefaultTargetTriple(); - llvm::Triple Triple(TripleStr); - - // create target machine from info for merged modules - const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg); - if (!march) - return false; - - // The relocation model is actually a static member of TargetMachine and - // needs to be set before the TargetMachine is instantiated. - Reloc::Model RelocModel = Reloc::Default; - switch (CodeModel) { - case LTO_CODEGEN_PIC_MODEL_STATIC: - RelocModel = Reloc::Static; - break; - case LTO_CODEGEN_PIC_MODEL_DYNAMIC: - RelocModel = Reloc::PIC_; - break; - case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC: - RelocModel = Reloc::DynamicNoPIC; - break; - case LTO_CODEGEN_PIC_MODEL_DEFAULT: - // RelocModel is already the default, so leave it that way. - break; - } - - // Construct LTOModule, hand over ownership of module and target. Use MAttr as - // the default set of features. - SubtargetFeatures Features(MAttr); - Features.getDefaultSubtargetFeatures(Triple); - std::string FeatureStr = Features.getString(); - // Set a default CPU for Darwin triples. - if (MCpu.empty() && Triple.isOSDarwin()) { - if (Triple.getArch() == llvm::Triple::x86_64) - MCpu = "core2"; - else if (Triple.getArch() == llvm::Triple::x86) - MCpu = "yonah"; - else if (Triple.getArch() == llvm::Triple::aarch64) - MCpu = "cyclone"; - } - - CodeGenOpt::Level CGOptLevel; +void LTOCodeGenerator::setOptLevel(unsigned Level) { + OptLevel = Level; switch (OptLevel) { case 0: CGOptLevel = CodeGenOpt::None; @@ -363,10 +162,157 @@ bool LTOCodeGenerator::determineTarget(std::string &errMsg) { CGOptLevel = CodeGenOpt::Aggressive; break; } +} - TargetMach = march->createTargetMachine(TripleStr, MCpu, FeatureStr, Options, - RelocModel, CodeModel::Default, - CGOptLevel); +bool LTOCodeGenerator::writeMergedModules(const char *Path) { + if (!determineTarget()) + return false; + + // mark which symbols can not be internalized + applyScopeRestrictions(); + + // create output file + std::error_code EC; + tool_output_file Out(Path, EC, sys::fs::F_None); + if (EC) { + std::string ErrMsg = "could not open bitcode file for writing: "; + ErrMsg += Path; + emitError(ErrMsg); + return false; + } + + // write bitcode to it + WriteBitcodeToFile(MergedModule.get(), Out.os(), ShouldEmbedUselists); + Out.os().close(); + + if (Out.os().has_error()) { + std::string ErrMsg = "could not write bitcode file: "; + ErrMsg += Path; + emitError(ErrMsg); + Out.os().clear_error(); + return false; + } + + Out.keep(); + return true; +} + +bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) { + // make unique temp output file to put generated code + SmallString<128> Filename; + int FD; + + const char *Extension = + (FileType == TargetMachine::CGFT_AssemblyFile ? "s" : "o"); + + std::error_code EC = + sys::fs::createTemporaryFile("lto-llvm", Extension, FD, Filename); + if (EC) { + emitError(EC.message()); + return false; + } + + // generate object file + tool_output_file objFile(Filename.c_str(), FD); + + bool genResult = compileOptimized(&objFile.os()); + objFile.os().close(); + if (objFile.os().has_error()) { + objFile.os().clear_error(); + sys::fs::remove(Twine(Filename)); + return false; + } + + objFile.keep(); + if (!genResult) { + sys::fs::remove(Twine(Filename)); + return false; + } + + NativeObjectPath = Filename.c_str(); + *Name = NativeObjectPath.c_str(); + return true; +} + +std::unique_ptr +LTOCodeGenerator::compileOptimized() { + const char *name; + if (!compileOptimizedToFile(&name)) + return nullptr; + + // read .o file into memory buffer + ErrorOr> BufferOrErr = + MemoryBuffer::getFile(name, -1, false); + if (std::error_code EC = BufferOrErr.getError()) { + emitError(EC.message()); + sys::fs::remove(NativeObjectPath); + return nullptr; + } + + // remove temp files + sys::fs::remove(NativeObjectPath); + + return std::move(*BufferOrErr); +} + +bool LTOCodeGenerator::compile_to_file(const char **Name, bool DisableVerify, + bool DisableInline, + bool DisableGVNLoadPRE, + bool DisableVectorization) { + if (!optimize(DisableVerify, DisableInline, DisableGVNLoadPRE, + DisableVectorization)) + return false; + + return compileOptimizedToFile(Name); +} + +std::unique_ptr +LTOCodeGenerator::compile(bool DisableVerify, bool DisableInline, + bool DisableGVNLoadPRE, bool DisableVectorization) { + if (!optimize(DisableVerify, DisableInline, DisableGVNLoadPRE, + DisableVectorization)) + return nullptr; + + return compileOptimized(); +} + +bool LTOCodeGenerator::determineTarget() { + if (TargetMach) + return true; + + std::string TripleStr = MergedModule->getTargetTriple(); + if (TripleStr.empty()) { + TripleStr = sys::getDefaultTargetTriple(); + MergedModule->setTargetTriple(TripleStr); + } + llvm::Triple Triple(TripleStr); + + // create target machine from info for merged modules + std::string ErrMsg; + const Target *march = TargetRegistry::lookupTarget(TripleStr, ErrMsg); + if (!march) { + emitError(ErrMsg); + return false; + } + + // Construct LTOModule, hand over ownership of module and target. Use MAttr as + // the default set of features. + SubtargetFeatures Features(MAttr); + Features.getDefaultSubtargetFeatures(Triple); + FeatureStr = Features.getString(); + // Set a default CPU for Darwin triples. + if (MCpu.empty() && Triple.isOSDarwin()) { + if (Triple.getArch() == llvm::Triple::x86_64) + MCpu = "core2"; + else if (Triple.getArch() == llvm::Triple::x86) + MCpu = "yonah"; + else if (Triple.getArch() == llvm::Triple::aarch64) + MCpu = "cyclone"; + } + + TargetMach.reset(march->createTargetMachine(TripleStr, MCpu, FeatureStr, + Options, RelocModel, + CodeModel::Default, CGOptLevel)); return true; } @@ -453,7 +399,6 @@ static void accumulateAndSortLibcalls(std::vector &Libcalls, void LTOCodeGenerator::applyScopeRestrictions() { if (ScopeRestrictionsDone || !ShouldInternalize) return; - Module *mergedModule = IRLinker.getModule(); // Start off with a verification pass. legacy::PassManager passes; @@ -467,20 +412,17 @@ void LTOCodeGenerator::applyScopeRestrictions() { TargetLibraryInfoImpl TLII(Triple(TargetMach->getTargetTriple())); TargetLibraryInfo TLI(TLII); - accumulateAndSortLibcalls(Libcalls, TLI, *mergedModule, *TargetMach); + accumulateAndSortLibcalls(Libcalls, TLI, *MergedModule, *TargetMach); - for (Module::iterator f = mergedModule->begin(), - e = mergedModule->end(); f != e; ++f) - applyRestriction(*f, Libcalls, MustPreserveList, AsmUsed, Mangler); - for (Module::global_iterator v = mergedModule->global_begin(), - e = mergedModule->global_end(); v != e; ++v) - applyRestriction(*v, Libcalls, MustPreserveList, AsmUsed, Mangler); - for (Module::alias_iterator a = mergedModule->alias_begin(), - e = mergedModule->alias_end(); a != e; ++a) - applyRestriction(*a, Libcalls, MustPreserveList, AsmUsed, Mangler); + for (Function &f : *MergedModule) + applyRestriction(f, Libcalls, MustPreserveList, AsmUsed, Mangler); + for (GlobalVariable &v : MergedModule->globals()) + applyRestriction(v, Libcalls, MustPreserveList, AsmUsed, Mangler); + for (GlobalAlias &a : MergedModule->aliases()) + applyRestriction(a, Libcalls, MustPreserveList, AsmUsed, Mangler); GlobalVariable *LLVMCompilerUsed = - mergedModule->getGlobalVariable("llvm.compiler.used"); + MergedModule->getGlobalVariable("llvm.compiler.used"); findUsedValues(LLVMCompilerUsed, AsmUsed); if (LLVMCompilerUsed) LLVMCompilerUsed->eraseFromParent(); @@ -495,7 +437,7 @@ void LTOCodeGenerator::applyScopeRestrictions() { llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, asmUsed2.size()); LLVMCompilerUsed = - new llvm::GlobalVariable(*mergedModule, ATy, false, + new llvm::GlobalVariable(*MergedModule, ATy, false, llvm::GlobalValue::AppendingLinkage, llvm::ConstantArray::get(ATy, asmUsed2), "llvm.compiler.used"); @@ -506,21 +448,18 @@ void LTOCodeGenerator::applyScopeRestrictions() { passes.add(createInternalizePass(MustPreserveList)); // apply scope restrictions - passes.run(*mergedModule); + passes.run(*MergedModule); ScopeRestrictionsDone = true; } /// Optimize merged modules using various IPO passes -bool LTOCodeGenerator::optimize(bool DisableInline, +bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline, bool DisableGVNLoadPRE, - bool DisableVectorization, - std::string &errMsg) { - if (!this->determineTarget(errMsg)) + bool DisableVectorization) { + if (!this->determineTarget()) return false; - Module *mergedModule = IRLinker.getModule(); - // Mark which symbols can not be internalized this->applyScopeRestrictions(); @@ -528,7 +467,7 @@ bool LTOCodeGenerator::optimize(bool DisableInline, legacy::PassManager passes; // Add an appropriate DataLayout instance for this module... - mergedModule->setDataLayout(*TargetMach->getDataLayout()); + MergedModule->setDataLayout(TargetMach->createDataLayout()); passes.add( createTargetTransformInfoWrapperPass(TargetMach->getTargetIRAnalysis())); @@ -542,60 +481,57 @@ bool LTOCodeGenerator::optimize(bool DisableInline, PMB.Inliner = createFunctionInliningPass(); PMB.LibraryInfo = new TargetLibraryInfoImpl(TargetTriple); PMB.OptLevel = OptLevel; - PMB.VerifyInput = true; - PMB.VerifyOutput = true; + PMB.VerifyInput = !DisableVerify; + PMB.VerifyOutput = !DisableVerify; PMB.populateLTOPassManager(passes); // Run our queue of passes all at once now, efficiently. - passes.run(*mergedModule); + passes.run(*MergedModule); return true; } -bool LTOCodeGenerator::compileOptimized(raw_pwrite_stream &out, - std::string &errMsg) { - if (!this->determineTarget(errMsg)) +bool LTOCodeGenerator::compileOptimized(ArrayRef Out) { + if (!this->determineTarget()) return false; - Module *mergedModule = IRLinker.getModule(); - - legacy::PassManager codeGenPasses; + legacy::PassManager preCodeGenPasses; // If the bitcode files contain ARC code and were compiled with optimization, // the ObjCARCContractPass must be run, so do it unconditionally here. - codeGenPasses.add(createObjCARCContractPass()); + preCodeGenPasses.add(createObjCARCContractPass()); + preCodeGenPasses.run(*MergedModule); - if (TargetMach->addPassesToEmitFile(codeGenPasses, out, - TargetMachine::CGFT_ObjectFile)) { - errMsg = "target file type not supported"; - return false; - } - - // Run the code generator, and write assembly file - codeGenPasses.run(*mergedModule); + // Do code generation. We need to preserve the module in case the client calls + // writeMergedModules() after compilation, but we only need to allow this at + // parallelism level 1. This is achieved by having splitCodeGen return the + // original module at parallelism level 1 which we then assign back to + // MergedModule. + MergedModule = + splitCodeGen(std::move(MergedModule), Out, MCpu, FeatureStr, Options, + RelocModel, CodeModel::Default, CGOptLevel, FileType); return true; } /// setCodeGenDebugOptions - Set codegen debugging options to aid in debugging /// LTO problems. -void LTOCodeGenerator::setCodeGenDebugOptions(const char *options) { - for (std::pair o = getToken(options); - !o.first.empty(); o = getToken(o.second)) { - // ParseCommandLineOptions() expects argv[0] to be program name. Lazily add - // that. - if (CodegenOptions.empty()) - CodegenOptions.push_back(strdup("libLLVMLTO")); - CodegenOptions.push_back(strdup(o.first.str().c_str())); - } +void LTOCodeGenerator::setCodeGenDebugOptions(const char *Options) { + for (std::pair o = getToken(Options); !o.first.empty(); + o = getToken(o.second)) + CodegenOptions.push_back(o.first); } void LTOCodeGenerator::parseCodeGenDebugOptions() { // if options were requested, set them - if (!CodegenOptions.empty()) - cl::ParseCommandLineOptions(CodegenOptions.size(), - const_cast(&CodegenOptions[0])); + if (!CodegenOptions.empty()) { + // ParseCommandLineOptions() expects argv[0] to be program name. + std::vector CodegenArgv(1, "libLLVMLTO"); + for (std::string &Arg : CodegenOptions) + CodegenArgv.push_back(Arg.c_str()); + cl::ParseCommandLineOptions(CodegenArgv.size(), CodegenArgv.data()); + } } void LTOCodeGenerator::DiagnosticHandler(const DiagnosticInfo &DI, @@ -645,3 +581,20 @@ LTOCodeGenerator::setDiagnosticHandler(lto_diagnostic_handler_t DiagHandler, Context.setDiagnosticHandler(LTOCodeGenerator::DiagnosticHandler, this, /* RespectFilters */ true); } + +namespace { +class LTODiagnosticInfo : public DiagnosticInfo { + const Twine &Msg; +public: + LTODiagnosticInfo(const Twine &DiagMsg, DiagnosticSeverity Severity=DS_Error) + : DiagnosticInfo(DK_Linker, Severity), Msg(DiagMsg) {} + void print(DiagnosticPrinter &DP) const override { DP << Msg; } +}; +} + +void LTOCodeGenerator::emitError(const std::string &ErrMsg) { + if (DiagHandler) + (*DiagHandler)(LTO_DS_ERROR, ErrMsg.c_str(), DiagContext); + else + Context.diagnose(LTODiagnosticInfo(ErrMsg)); +} diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 53ed4175f8e3..409b94902332 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -91,106 +91,97 @@ bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer, return StringRef(Triple).startswith(TriplePrefix); } -LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options, - std::string &errMsg) { +std::string LTOModule::getProducerString(MemoryBuffer *Buffer) { + ErrorOr BCOrErr = + IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef()); + if (!BCOrErr) + return ""; + LLVMContext Context; + return getBitcodeProducerString(*BCOrErr, Context); +} + +ErrorOr> +LTOModule::createFromFile(LLVMContext &Context, const char *path, + TargetOptions options) { ErrorOr> BufferOrErr = MemoryBuffer::getFile(path); - if (std::error_code EC = BufferOrErr.getError()) { - errMsg = EC.message(); - return nullptr; - } + if (std::error_code EC = BufferOrErr.getError()) + return EC; std::unique_ptr Buffer = std::move(BufferOrErr.get()); - return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg, - &getGlobalContext()); + return makeLTOModule(Buffer->getMemBufferRef(), options, &Context); } -LTOModule *LTOModule::createFromOpenFile(int fd, const char *path, size_t size, - TargetOptions options, - std::string &errMsg) { - return createFromOpenFileSlice(fd, path, size, 0, options, errMsg); +ErrorOr> +LTOModule::createFromOpenFile(LLVMContext &Context, int fd, const char *path, + size_t size, TargetOptions options) { + return createFromOpenFileSlice(Context, fd, path, size, 0, options); } -LTOModule *LTOModule::createFromOpenFileSlice(int fd, const char *path, - size_t map_size, off_t offset, - TargetOptions options, - std::string &errMsg) { +ErrorOr> +LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd, + const char *path, size_t map_size, + off_t offset, TargetOptions options) { ErrorOr> BufferOrErr = MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset); - if (std::error_code EC = BufferOrErr.getError()) { - errMsg = EC.message(); - return nullptr; - } + if (std::error_code EC = BufferOrErr.getError()) + return EC; std::unique_ptr Buffer = std::move(BufferOrErr.get()); - return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg, - &getGlobalContext()); + return makeLTOModule(Buffer->getMemBufferRef(), options, &Context); } -LTOModule *LTOModule::createFromBuffer(const void *mem, size_t length, - TargetOptions options, - std::string &errMsg, StringRef path) { - return createInContext(mem, length, options, errMsg, path, - &getGlobalContext()); +ErrorOr> +LTOModule::createFromBuffer(LLVMContext &Context, const void *mem, + size_t length, TargetOptions options, + StringRef path) { + return createInContext(mem, length, options, path, &Context); } -LTOModule *LTOModule::createInLocalContext(const void *mem, size_t length, - TargetOptions options, - std::string &errMsg, - StringRef path) { - return createInContext(mem, length, options, errMsg, path, nullptr); +ErrorOr> +LTOModule::createInLocalContext(const void *mem, size_t length, + TargetOptions options, StringRef path) { + return createInContext(mem, length, options, path, nullptr); } -LTOModule *LTOModule::createInContext(const void *mem, size_t length, - TargetOptions options, - std::string &errMsg, StringRef path, - LLVMContext *Context) { +ErrorOr> +LTOModule::createInContext(const void *mem, size_t length, + TargetOptions options, StringRef path, + LLVMContext *Context) { StringRef Data((const char *)mem, length); MemoryBufferRef Buffer(Data, path); - return makeLTOModule(Buffer, options, errMsg, Context); + return makeLTOModule(Buffer, options, Context); } -static std::unique_ptr parseBitcodeFileImpl(MemoryBufferRef Buffer, - LLVMContext &Context, - bool ShouldBeLazy, - std::string &ErrMsg) { +static ErrorOr> +parseBitcodeFileImpl(MemoryBufferRef Buffer, LLVMContext &Context, + bool ShouldBeLazy) { // Find the buffer. ErrorOr MBOrErr = IRObjectFile::findBitcodeInMemBuffer(Buffer); - if (std::error_code EC = MBOrErr.getError()) { - ErrMsg = EC.message(); - return nullptr; - } - - std::function DiagnosticHandler = - [&ErrMsg](const DiagnosticInfo &DI) { - raw_string_ostream Stream(ErrMsg); - DiagnosticPrinterRawOStream DP(Stream); - DI.print(DP); - }; + if (std::error_code EC = MBOrErr.getError()) + return EC; if (!ShouldBeLazy) { // Parse the full file. - ErrorOr> M = - parseBitcodeFile(*MBOrErr, Context, DiagnosticHandler); - if (!M) - return nullptr; + ErrorOr> M = parseBitcodeFile(*MBOrErr, Context); + if (std::error_code EC = M.getError()) + return EC; return std::move(*M); } // Parse lazily. std::unique_ptr LightweightBuf = MemoryBuffer::getMemBuffer(*MBOrErr, false); - ErrorOr> M = - getLazyBitcodeModule(std::move(LightweightBuf), Context, - DiagnosticHandler, true /*ShouldLazyLoadMetadata*/); - if (!M) - return nullptr; + ErrorOr> M = getLazyBitcodeModule( + std::move(LightweightBuf), Context, true /*ShouldLazyLoadMetadata*/); + if (std::error_code EC = M.getError()) + return EC; return std::move(*M); } -LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer, - TargetOptions options, std::string &errMsg, - LLVMContext *Context) { +ErrorOr> +LTOModule::makeLTOModule(MemoryBufferRef Buffer, TargetOptions options, + LLVMContext *Context) { std::unique_ptr OwnedContext; if (!Context) { OwnedContext = llvm::make_unique(); @@ -199,11 +190,12 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer, // If we own a context, we know this is being used only for symbol // extraction, not linking. Be lazy in that case. - std::unique_ptr M = parseBitcodeFileImpl( - Buffer, *Context, - /* ShouldBeLazy */ static_cast(OwnedContext), errMsg); - if (!M) - return nullptr; + ErrorOr> MOrErr = + parseBitcodeFileImpl(Buffer, *Context, + /* ShouldBeLazy */ static_cast(OwnedContext)); + if (std::error_code EC = MOrErr.getError()) + return EC; + std::unique_ptr &M = *MOrErr; std::string TripleStr = M->getTargetTriple(); if (TripleStr.empty()) @@ -211,9 +203,10 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer, llvm::Triple Triple(TripleStr); // find machine architecture for this module + std::string errMsg; const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg); if (!march) - return nullptr; + return std::unique_ptr(nullptr); // construct LTOModule, hand over ownership of module and target SubtargetFeatures Features; @@ -232,25 +225,21 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer, TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr, options); - M->setDataLayout(*target->getDataLayout()); + M->setDataLayout(target->createDataLayout()); std::unique_ptr IRObj( new object::IRObjectFile(Buffer, std::move(M))); - LTOModule *Ret; + std::unique_ptr Ret; if (OwnedContext) - Ret = new LTOModule(std::move(IRObj), target, std::move(OwnedContext)); + Ret.reset(new LTOModule(std::move(IRObj), target, std::move(OwnedContext))); else - Ret = new LTOModule(std::move(IRObj), target); - - if (Ret->parseSymbols(errMsg)) { - delete Ret; - return nullptr; - } + Ret.reset(new LTOModule(std::move(IRObj), target)); + Ret->parseSymbols(); Ret->parseMetadata(); - return Ret; + return std::move(Ret); } /// Create a MemoryBuffer from a memory range with an optional name. @@ -583,9 +572,7 @@ void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym, info.symbol = decl; } -/// parseSymbols - Parse the symbols from the module and model-level ASM and add -/// them to either the defined or undefined lists. -bool LTOModule::parseSymbols(std::string &errMsg) { +void LTOModule::parseSymbols() { for (auto &Sym : IRFile->symbols()) { const GlobalValue *GV = IRFile->getSymbolGV(Sym.getRawDataRefImpl()); uint32_t Flags = Sym.getFlags(); @@ -640,8 +627,6 @@ bool LTOModule::parseSymbols(std::string &errMsg) { NameAndAttributes info = u->getValue(); _symbols.push_back(info); } - - return false; } /// parseMetadata - Parse metadata from the module diff --git a/lib/LibDriver/LibDriver.cpp b/lib/LibDriver/LibDriver.cpp index b33a22ff0cf8..3ae543460745 100644 --- a/lib/LibDriver/LibDriver.cpp +++ b/lib/LibDriver/LibDriver.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // Defines an interface to a lib.exe-compatible driver that also understands -// bitcode files. Used by llvm-lib and lld-link2 /lib. +// bitcode files. Used by llvm-lib and lld-link /lib. // //===----------------------------------------------------------------------===// @@ -51,7 +51,7 @@ static const llvm::opt::OptTable::Info infoTable[] = { class LibOptTable : public llvm::opt::OptTable { public: - LibOptTable() : OptTable(infoTable, llvm::array_lengthof(infoTable), true) {} + LibOptTable() : OptTable(infoTable, true) {} }; } @@ -102,7 +102,7 @@ static Optional findInputFile(StringRef File, int llvm::libDriverMain(llvm::ArrayRef ArgsArr) { SmallVector NewArgs(ArgsArr.begin(), ArgsArr.end()); BumpPtrAllocator Alloc; - BumpPtrStringSaver Saver(Alloc); + StringSaver Saver(Alloc); cl::ExpandResponseFiles(Saver, cl::TokenizeWindowsCommandLine, NewArgs); ArgsArr = NewArgs; @@ -135,14 +135,13 @@ int llvm::libDriverMain(llvm::ArrayRef ArgsArr) { llvm::errs() << Arg->getValue() << ": no such file or directory\n"; return 1; } - Members.emplace_back(Saver.save(*Path), - llvm::sys::path::filename(Arg->getValue())); + Members.emplace_back(Saver.save(*Path)); } std::pair Result = llvm::writeArchive(getOutputPath(&Args, Members[0]), Members, /*WriteSymtab=*/true, object::Archive::K_GNU, - /*Deterministic*/ true); + /*Deterministic*/ true, Args.hasArg(OPT_llvmlibthin)); if (Result.second) { if (Result.first.empty()) diff --git a/lib/LibDriver/Options.td b/lib/LibDriver/Options.td index 0aa1affbebc9..5a56ef7468d4 100644 --- a/lib/LibDriver/Options.td +++ b/lib/LibDriver/Options.td @@ -12,6 +12,8 @@ class P : def libpath: P<"libpath", "Object file search path">; def out : P<"out", "Path to file to write output">; +def llvmlibthin : F<"llvmlibthin">; + //============================================================================== // The flags below do nothing. They are defined only for lib.exe compatibility. //============================================================================== diff --git a/lib/Linker/CMakeLists.txt b/lib/Linker/CMakeLists.txt index f9d8e0925ae3..8916fb3f7251 100644 --- a/lib/Linker/CMakeLists.txt +++ b/lib/Linker/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMLinker + IRMover.cpp LinkModules.cpp ADDITIONAL_HEADER_DIRS diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp new file mode 100644 index 000000000000..fa6e37517fc4 --- /dev/null +++ b/lib/Linker/IRMover.cpp @@ -0,0 +1,1657 @@ +//===- lib/Linker/IRMover.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Linker/IRMover.h" +#include "LinkDiagnosticInfo.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/GVMaterializer.h" +#include "llvm/IR/TypeFinder.h" +#include "llvm/Transforms/Utils/Cloning.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// TypeMap implementation. +//===----------------------------------------------------------------------===// + +namespace { +class TypeMapTy : public ValueMapTypeRemapper { + /// This is a mapping from a source type to a destination type to use. + DenseMap MappedTypes; + + /// When checking to see if two subgraphs are isomorphic, we speculatively + /// add types to MappedTypes, but keep track of them here in case we need to + /// roll back. + SmallVector SpeculativeTypes; + + SmallVector SpeculativeDstOpaqueTypes; + + /// This is a list of non-opaque structs in the source module that are mapped + /// to an opaque struct in the destination module. + SmallVector SrcDefinitionsToResolve; + + /// This is the set of opaque types in the destination modules who are + /// getting a body from the source module. + SmallPtrSet DstResolvedOpaqueTypes; + +public: + TypeMapTy(IRMover::IdentifiedStructTypeSet &DstStructTypesSet) + : DstStructTypesSet(DstStructTypesSet) {} + + IRMover::IdentifiedStructTypeSet &DstStructTypesSet; + /// Indicate that the specified type in the destination module is conceptually + /// equivalent to the specified type in the source module. + void addTypeMapping(Type *DstTy, Type *SrcTy); + + /// Produce a body for an opaque type in the dest module from a type + /// definition in the source module. + void linkDefinedTypeBodies(); + + /// Return the mapped type to use for the specified input type from the + /// source module. + Type *get(Type *SrcTy); + Type *get(Type *SrcTy, SmallPtrSet &Visited); + + void finishType(StructType *DTy, StructType *STy, ArrayRef ETypes); + + FunctionType *get(FunctionType *T) { + return cast(get((Type *)T)); + } + +private: + Type *remapType(Type *SrcTy) override { return get(SrcTy); } + + bool areTypesIsomorphic(Type *DstTy, Type *SrcTy); +}; +} + +void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) { + assert(SpeculativeTypes.empty()); + assert(SpeculativeDstOpaqueTypes.empty()); + + // Check to see if these types are recursively isomorphic and establish a + // mapping between them if so. + if (!areTypesIsomorphic(DstTy, SrcTy)) { + // Oops, they aren't isomorphic. Just discard this request by rolling out + // any speculative mappings we've established. + for (Type *Ty : SpeculativeTypes) + MappedTypes.erase(Ty); + + SrcDefinitionsToResolve.resize(SrcDefinitionsToResolve.size() - + SpeculativeDstOpaqueTypes.size()); + for (StructType *Ty : SpeculativeDstOpaqueTypes) + DstResolvedOpaqueTypes.erase(Ty); + } else { + for (Type *Ty : SpeculativeTypes) + if (auto *STy = dyn_cast(Ty)) + if (STy->hasName()) + STy->setName(""); + } + SpeculativeTypes.clear(); + SpeculativeDstOpaqueTypes.clear(); +} + +/// Recursively walk this pair of types, returning true if they are isomorphic, +/// false if they are not. +bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) { + // Two types with differing kinds are clearly not isomorphic. + if (DstTy->getTypeID() != SrcTy->getTypeID()) + return false; + + // If we have an entry in the MappedTypes table, then we have our answer. + Type *&Entry = MappedTypes[SrcTy]; + if (Entry) + return Entry == DstTy; + + // Two identical types are clearly isomorphic. Remember this + // non-speculatively. + if (DstTy == SrcTy) { + Entry = DstTy; + return true; + } + + // Okay, we have two types with identical kinds that we haven't seen before. + + // If this is an opaque struct type, special case it. + if (StructType *SSTy = dyn_cast(SrcTy)) { + // Mapping an opaque type to any struct, just keep the dest struct. + if (SSTy->isOpaque()) { + Entry = DstTy; + SpeculativeTypes.push_back(SrcTy); + return true; + } + + // Mapping a non-opaque source type to an opaque dest. If this is the first + // type that we're mapping onto this destination type then we succeed. Keep + // the dest, but fill it in later. If this is the second (different) type + // that we're trying to map onto the same opaque type then we fail. + if (cast(DstTy)->isOpaque()) { + // We can only map one source type onto the opaque destination type. + if (!DstResolvedOpaqueTypes.insert(cast(DstTy)).second) + return false; + SrcDefinitionsToResolve.push_back(SSTy); + SpeculativeTypes.push_back(SrcTy); + SpeculativeDstOpaqueTypes.push_back(cast(DstTy)); + Entry = DstTy; + return true; + } + } + + // If the number of subtypes disagree between the two types, then we fail. + if (SrcTy->getNumContainedTypes() != DstTy->getNumContainedTypes()) + return false; + + // Fail if any of the extra properties (e.g. array size) of the type disagree. + if (isa(DstTy)) + return false; // bitwidth disagrees. + if (PointerType *PT = dyn_cast(DstTy)) { + if (PT->getAddressSpace() != cast(SrcTy)->getAddressSpace()) + return false; + + } else if (FunctionType *FT = dyn_cast(DstTy)) { + if (FT->isVarArg() != cast(SrcTy)->isVarArg()) + return false; + } else if (StructType *DSTy = dyn_cast(DstTy)) { + StructType *SSTy = cast(SrcTy); + if (DSTy->isLiteral() != SSTy->isLiteral() || + DSTy->isPacked() != SSTy->isPacked()) + return false; + } else if (ArrayType *DATy = dyn_cast(DstTy)) { + if (DATy->getNumElements() != cast(SrcTy)->getNumElements()) + return false; + } else if (VectorType *DVTy = dyn_cast(DstTy)) { + if (DVTy->getNumElements() != cast(SrcTy)->getNumElements()) + return false; + } + + // Otherwise, we speculate that these two types will line up and recursively + // check the subelements. + Entry = DstTy; + SpeculativeTypes.push_back(SrcTy); + + for (unsigned I = 0, E = SrcTy->getNumContainedTypes(); I != E; ++I) + if (!areTypesIsomorphic(DstTy->getContainedType(I), + SrcTy->getContainedType(I))) + return false; + + // If everything seems to have lined up, then everything is great. + return true; +} + +void TypeMapTy::linkDefinedTypeBodies() { + SmallVector Elements; + for (StructType *SrcSTy : SrcDefinitionsToResolve) { + StructType *DstSTy = cast(MappedTypes[SrcSTy]); + assert(DstSTy->isOpaque()); + + // Map the body of the source type over to a new body for the dest type. + Elements.resize(SrcSTy->getNumElements()); + for (unsigned I = 0, E = Elements.size(); I != E; ++I) + Elements[I] = get(SrcSTy->getElementType(I)); + + DstSTy->setBody(Elements, SrcSTy->isPacked()); + DstStructTypesSet.switchToNonOpaque(DstSTy); + } + SrcDefinitionsToResolve.clear(); + DstResolvedOpaqueTypes.clear(); +} + +void TypeMapTy::finishType(StructType *DTy, StructType *STy, + ArrayRef ETypes) { + DTy->setBody(ETypes, STy->isPacked()); + + // Steal STy's name. + if (STy->hasName()) { + SmallString<16> TmpName = STy->getName(); + STy->setName(""); + DTy->setName(TmpName); + } + + DstStructTypesSet.addNonOpaque(DTy); +} + +Type *TypeMapTy::get(Type *Ty) { + SmallPtrSet Visited; + return get(Ty, Visited); +} + +Type *TypeMapTy::get(Type *Ty, SmallPtrSet &Visited) { + // If we already have an entry for this type, return it. + Type **Entry = &MappedTypes[Ty]; + if (*Entry) + return *Entry; + + // These are types that LLVM itself will unique. + bool IsUniqued = !isa(Ty) || cast(Ty)->isLiteral(); + +#ifndef NDEBUG + if (!IsUniqued) { + for (auto &Pair : MappedTypes) { + assert(!(Pair.first != Ty && Pair.second == Ty) && + "mapping to a source type"); + } + } +#endif + + if (!IsUniqued && !Visited.insert(cast(Ty)).second) { + StructType *DTy = StructType::create(Ty->getContext()); + return *Entry = DTy; + } + + // If this is not a recursive type, then just map all of the elements and + // then rebuild the type from inside out. + SmallVector ElementTypes; + + // If there are no element types to map, then the type is itself. This is + // true for the anonymous {} struct, things like 'float', integers, etc. + if (Ty->getNumContainedTypes() == 0 && IsUniqued) + return *Entry = Ty; + + // Remap all of the elements, keeping track of whether any of them change. + bool AnyChange = false; + ElementTypes.resize(Ty->getNumContainedTypes()); + for (unsigned I = 0, E = Ty->getNumContainedTypes(); I != E; ++I) { + ElementTypes[I] = get(Ty->getContainedType(I), Visited); + AnyChange |= ElementTypes[I] != Ty->getContainedType(I); + } + + // If we found our type while recursively processing stuff, just use it. + Entry = &MappedTypes[Ty]; + if (*Entry) { + if (auto *DTy = dyn_cast(*Entry)) { + if (DTy->isOpaque()) { + auto *STy = cast(Ty); + finishType(DTy, STy, ElementTypes); + } + } + return *Entry; + } + + // If all of the element types mapped directly over and the type is not + // a nomed struct, then the type is usable as-is. + if (!AnyChange && IsUniqued) + return *Entry = Ty; + + // Otherwise, rebuild a modified type. + switch (Ty->getTypeID()) { + default: + llvm_unreachable("unknown derived type to remap"); + case Type::ArrayTyID: + return *Entry = ArrayType::get(ElementTypes[0], + cast(Ty)->getNumElements()); + case Type::VectorTyID: + return *Entry = VectorType::get(ElementTypes[0], + cast(Ty)->getNumElements()); + case Type::PointerTyID: + return *Entry = PointerType::get(ElementTypes[0], + cast(Ty)->getAddressSpace()); + case Type::FunctionTyID: + return *Entry = FunctionType::get(ElementTypes[0], + makeArrayRef(ElementTypes).slice(1), + cast(Ty)->isVarArg()); + case Type::StructTyID: { + auto *STy = cast(Ty); + bool IsPacked = STy->isPacked(); + if (IsUniqued) + return *Entry = StructType::get(Ty->getContext(), ElementTypes, IsPacked); + + // If the type is opaque, we can just use it directly. + if (STy->isOpaque()) { + DstStructTypesSet.addOpaque(STy); + return *Entry = Ty; + } + + if (StructType *OldT = + DstStructTypesSet.findNonOpaque(ElementTypes, IsPacked)) { + STy->setName(""); + return *Entry = OldT; + } + + if (!AnyChange) { + DstStructTypesSet.addNonOpaque(STy); + return *Entry = Ty; + } + + StructType *DTy = StructType::create(Ty->getContext()); + finishType(DTy, STy, ElementTypes); + return *Entry = DTy; + } + } +} + +LinkDiagnosticInfo::LinkDiagnosticInfo(DiagnosticSeverity Severity, + const Twine &Msg) + : DiagnosticInfo(DK_Linker, Severity), Msg(Msg) {} +void LinkDiagnosticInfo::print(DiagnosticPrinter &DP) const { DP << Msg; } + +//===----------------------------------------------------------------------===// +// IRLinker implementation. +//===----------------------------------------------------------------------===// + +namespace { +class IRLinker; + +/// Creates prototypes for functions that are lazily linked on the fly. This +/// speeds up linking for modules with many/ lazily linked functions of which +/// few get used. +class GlobalValueMaterializer final : public ValueMaterializer { + IRLinker *TheIRLinker; + +public: + GlobalValueMaterializer(IRLinker *TheIRLinker) : TheIRLinker(TheIRLinker) {} + Value *materializeDeclFor(Value *V) override; + void materializeInitFor(GlobalValue *New, GlobalValue *Old) override; + Metadata *mapTemporaryMetadata(Metadata *MD) override; + void replaceTemporaryMetadata(const Metadata *OrigMD, + Metadata *NewMD) override; + bool isMetadataNeeded(Metadata *MD) override; +}; + +class LocalValueMaterializer final : public ValueMaterializer { + IRLinker *TheIRLinker; + +public: + LocalValueMaterializer(IRLinker *TheIRLinker) : TheIRLinker(TheIRLinker) {} + Value *materializeDeclFor(Value *V) override; + void materializeInitFor(GlobalValue *New, GlobalValue *Old) override; + Metadata *mapTemporaryMetadata(Metadata *MD) override; + void replaceTemporaryMetadata(const Metadata *OrigMD, + Metadata *NewMD) override; + bool isMetadataNeeded(Metadata *MD) override; +}; + +/// This is responsible for keeping track of the state used for moving data +/// from SrcM to DstM. +class IRLinker { + Module &DstM; + Module &SrcM; + + std::function AddLazyFor; + + TypeMapTy TypeMap; + GlobalValueMaterializer GValMaterializer; + LocalValueMaterializer LValMaterializer; + + /// Mapping of values from what they used to be in Src, to what they are now + /// in DstM. ValueToValueMapTy is a ValueMap, which involves some overhead + /// due to the use of Value handles which the Linker doesn't actually need, + /// but this allows us to reuse the ValueMapper code. + ValueToValueMapTy ValueMap; + ValueToValueMapTy AliasValueMap; + + DenseSet ValuesToLink; + std::vector Worklist; + + void maybeAdd(GlobalValue *GV) { + if (ValuesToLink.insert(GV).second) + Worklist.push_back(GV); + } + + /// Set to true when all global value body linking is complete (including + /// lazy linking). Used to prevent metadata linking from creating new + /// references. + bool DoneLinkingBodies = false; + + bool HasError = false; + + /// Flag indicating that we are just linking metadata (after function + /// importing). + bool IsMetadataLinkingPostpass; + + /// Flags to pass to value mapper invocations. + RemapFlags ValueMapperFlags = RF_MoveDistinctMDs; + + /// Association between metadata values created during bitcode parsing and + /// the value id. Used to correlate temporary metadata created during + /// function importing with the final metadata parsed during the subsequent + /// metadata linking postpass. + DenseMap MetadataToIDs; + + /// Association between metadata value id and temporary metadata that + /// remains unmapped after function importing. Saved during function + /// importing and consumed during the metadata linking postpass. + DenseMap *ValIDToTempMDMap; + + /// Set of subprogram metadata that does not need to be linked into the + /// destination module, because the functions were not imported directly + /// or via an inlined body in an imported function. + SmallPtrSet UnneededSubprograms; + + /// Handles cloning of a global values from the source module into + /// the destination module, including setting the attributes and visibility. + GlobalValue *copyGlobalValueProto(const GlobalValue *SGV, bool ForDefinition); + + /// Helper method for setting a message and returning an error code. + bool emitError(const Twine &Message) { + SrcM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, Message)); + HasError = true; + return true; + } + + void emitWarning(const Twine &Message) { + SrcM.getContext().diagnose(LinkDiagnosticInfo(DS_Warning, Message)); + } + + /// Check whether we should be linking metadata from the source module. + bool shouldLinkMetadata() { + // ValIDToTempMDMap will be non-null when we are importing or otherwise want + // to link metadata lazily, and then when linking the metadata. + // We only want to return true for the former case. + return ValIDToTempMDMap == nullptr || IsMetadataLinkingPostpass; + } + + /// Given a global in the source module, return the global in the + /// destination module that is being linked to, if any. + GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) { + // If the source has no name it can't link. If it has local linkage, + // there is no name match-up going on. + if (!SrcGV->hasName() || SrcGV->hasLocalLinkage()) + return nullptr; + + // Otherwise see if we have a match in the destination module's symtab. + GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName()); + if (!DGV) + return nullptr; + + // If we found a global with the same name in the dest module, but it has + // internal linkage, we are really not doing any linkage here. + if (DGV->hasLocalLinkage()) + return nullptr; + + // Otherwise, we do in fact link to the destination global. + return DGV; + } + + void computeTypeMapping(); + + Constant *linkAppendingVarProto(GlobalVariable *DstGV, + const GlobalVariable *SrcGV); + + bool shouldLink(GlobalValue *DGV, GlobalValue &SGV); + Constant *linkGlobalValueProto(GlobalValue *GV, bool ForAlias); + + bool linkModuleFlagsMetadata(); + + void linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src); + bool linkFunctionBody(Function &Dst, Function &Src); + void linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src); + bool linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src); + + /// Functions that take care of cloning a specific global value type + /// into the destination module. + GlobalVariable *copyGlobalVariableProto(const GlobalVariable *SGVar); + Function *copyFunctionProto(const Function *SF); + GlobalValue *copyGlobalAliasProto(const GlobalAlias *SGA); + + void linkNamedMDNodes(); + + /// Populate the UnneededSubprograms set with the DISubprogram metadata + /// from the source module that we don't need to link into the dest module, + /// because the functions were not imported directly or via an inlined body + /// in an imported function. + void findNeededSubprograms(ValueToValueMapTy &ValueMap); + + /// The value mapper leaves nulls in the list of subprograms for any + /// in the UnneededSubprograms map. Strip those out after metadata linking. + void stripNullSubprograms(); + +public: + IRLinker(Module &DstM, IRMover::IdentifiedStructTypeSet &Set, Module &SrcM, + ArrayRef ValuesToLink, + std::function AddLazyFor, + DenseMap *ValIDToTempMDMap = nullptr, + bool IsMetadataLinkingPostpass = false) + : DstM(DstM), SrcM(SrcM), AddLazyFor(AddLazyFor), TypeMap(Set), + GValMaterializer(this), LValMaterializer(this), + IsMetadataLinkingPostpass(IsMetadataLinkingPostpass), + ValIDToTempMDMap(ValIDToTempMDMap) { + for (GlobalValue *GV : ValuesToLink) + maybeAdd(GV); + + // If appropriate, tell the value mapper that it can expect to see + // temporary metadata. + if (!shouldLinkMetadata()) + ValueMapperFlags = ValueMapperFlags | RF_HaveUnmaterializedMetadata; + } + + bool run(); + Value *materializeDeclFor(Value *V, bool ForAlias); + void materializeInitFor(GlobalValue *New, GlobalValue *Old, bool ForAlias); + + /// Save the mapping between the given temporary metadata and its metadata + /// value id. Used to support metadata linking as a postpass for function + /// importing. + Metadata *mapTemporaryMetadata(Metadata *MD); + + /// Replace any temporary metadata saved for the source metadata's id with + /// the new non-temporary metadata. Used when metadata linking as a postpass + /// for function importing. + void replaceTemporaryMetadata(const Metadata *OrigMD, Metadata *NewMD); + + /// Indicates whether we need to map the given metadata into the destination + /// module. Used to prevent linking of metadata only needed by functions not + /// linked into the dest module. + bool isMetadataNeeded(Metadata *MD); +}; +} + +/// The LLVM SymbolTable class autorenames globals that conflict in the symbol +/// table. This is good for all clients except for us. Go through the trouble +/// to force this back. +static void forceRenaming(GlobalValue *GV, StringRef Name) { + // If the global doesn't force its name or if it already has the right name, + // there is nothing for us to do. + if (GV->hasLocalLinkage() || GV->getName() == Name) + return; + + Module *M = GV->getParent(); + + // If there is a conflict, rename the conflict. + if (GlobalValue *ConflictGV = M->getNamedValue(Name)) { + GV->takeName(ConflictGV); + ConflictGV->setName(Name); // This will cause ConflictGV to get renamed + assert(ConflictGV->getName() != Name && "forceRenaming didn't work"); + } else { + GV->setName(Name); // Force the name back + } +} + +Value *GlobalValueMaterializer::materializeDeclFor(Value *V) { + return TheIRLinker->materializeDeclFor(V, false); +} + +void GlobalValueMaterializer::materializeInitFor(GlobalValue *New, + GlobalValue *Old) { + TheIRLinker->materializeInitFor(New, Old, false); +} + +Metadata *GlobalValueMaterializer::mapTemporaryMetadata(Metadata *MD) { + return TheIRLinker->mapTemporaryMetadata(MD); +} + +void GlobalValueMaterializer::replaceTemporaryMetadata(const Metadata *OrigMD, + Metadata *NewMD) { + TheIRLinker->replaceTemporaryMetadata(OrigMD, NewMD); +} + +bool GlobalValueMaterializer::isMetadataNeeded(Metadata *MD) { + return TheIRLinker->isMetadataNeeded(MD); +} + +Value *LocalValueMaterializer::materializeDeclFor(Value *V) { + return TheIRLinker->materializeDeclFor(V, true); +} + +void LocalValueMaterializer::materializeInitFor(GlobalValue *New, + GlobalValue *Old) { + TheIRLinker->materializeInitFor(New, Old, true); +} + +Metadata *LocalValueMaterializer::mapTemporaryMetadata(Metadata *MD) { + return TheIRLinker->mapTemporaryMetadata(MD); +} + +void LocalValueMaterializer::replaceTemporaryMetadata(const Metadata *OrigMD, + Metadata *NewMD) { + TheIRLinker->replaceTemporaryMetadata(OrigMD, NewMD); +} + +bool LocalValueMaterializer::isMetadataNeeded(Metadata *MD) { + return TheIRLinker->isMetadataNeeded(MD); +} + +Value *IRLinker::materializeDeclFor(Value *V, bool ForAlias) { + auto *SGV = dyn_cast(V); + if (!SGV) + return nullptr; + + return linkGlobalValueProto(SGV, ForAlias); +} + +void IRLinker::materializeInitFor(GlobalValue *New, GlobalValue *Old, + bool ForAlias) { + // If we already created the body, just return. + if (auto *F = dyn_cast(New)) { + if (!F->isDeclaration()) + return; + } else if (auto *V = dyn_cast(New)) { + if (V->hasInitializer()) + return; + } else { + auto *A = cast(New); + if (A->getAliasee()) + return; + } + + if (ForAlias || shouldLink(New, *Old)) + linkGlobalValueBody(*New, *Old); +} + +Metadata *IRLinker::mapTemporaryMetadata(Metadata *MD) { + if (!ValIDToTempMDMap) + return nullptr; + // If this temporary metadata has a value id recorded during function + // parsing, record that in the ValIDToTempMDMap if one was provided. + if (MetadataToIDs.count(MD)) { + unsigned Idx = MetadataToIDs[MD]; + // Check if we created a temp MD when importing a different function from + // this module. If so, reuse it the same temporary metadata, otherwise + // add this temporary metadata to the map. + if (!ValIDToTempMDMap->count(Idx)) { + MDNode *Node = cast(MD); + assert(Node->isTemporary()); + (*ValIDToTempMDMap)[Idx] = Node; + } + return (*ValIDToTempMDMap)[Idx]; + } + return nullptr; +} + +void IRLinker::replaceTemporaryMetadata(const Metadata *OrigMD, + Metadata *NewMD) { + if (!ValIDToTempMDMap) + return; +#ifndef NDEBUG + auto *N = dyn_cast_or_null(NewMD); + assert(!N || !N->isTemporary()); +#endif + // If a mapping between metadata value ids and temporary metadata + // created during function importing was provided, and the source + // metadata has a value id recorded during metadata parsing, replace + // the temporary metadata with the final mapped metadata now. + if (MetadataToIDs.count(OrigMD)) { + unsigned Idx = MetadataToIDs[OrigMD]; + // Nothing to do if we didn't need to create a temporary metadata during + // function importing. + if (!ValIDToTempMDMap->count(Idx)) + return; + MDNode *TempMD = (*ValIDToTempMDMap)[Idx]; + TempMD->replaceAllUsesWith(NewMD); + MDNode::deleteTemporary(TempMD); + ValIDToTempMDMap->erase(Idx); + } +} + +bool IRLinker::isMetadataNeeded(Metadata *MD) { + // Currently only DISubprogram metadata is marked as being unneeded. + if (UnneededSubprograms.empty()) + return true; + MDNode *Node = dyn_cast(MD); + if (!Node) + return true; + DISubprogram *SP = getDISubprogram(Node); + if (!SP) + return true; + return !UnneededSubprograms.count(SP); +} + +/// Loop through the global variables in the src module and merge them into the +/// dest module. +GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) { + // No linking to be performed or linking from the source: simply create an + // identical version of the symbol over in the dest module... the + // initializer will be filled in later by LinkGlobalInits. + GlobalVariable *NewDGV = + new GlobalVariable(DstM, TypeMap.get(SGVar->getType()->getElementType()), + SGVar->isConstant(), GlobalValue::ExternalLinkage, + /*init*/ nullptr, SGVar->getName(), + /*insertbefore*/ nullptr, SGVar->getThreadLocalMode(), + SGVar->getType()->getAddressSpace()); + NewDGV->setAlignment(SGVar->getAlignment()); + return NewDGV; +} + +/// Link the function in the source module into the destination module if +/// needed, setting up mapping information. +Function *IRLinker::copyFunctionProto(const Function *SF) { + // If there is no linkage to be performed or we are linking from the source, + // bring SF over. + return Function::Create(TypeMap.get(SF->getFunctionType()), + GlobalValue::ExternalLinkage, SF->getName(), &DstM); +} + +/// Set up prototypes for any aliases that come over from the source module. +GlobalValue *IRLinker::copyGlobalAliasProto(const GlobalAlias *SGA) { + // If there is no linkage to be performed or we're linking from the source, + // bring over SGA. + auto *Ty = TypeMap.get(SGA->getValueType()); + return GlobalAlias::create(Ty, SGA->getType()->getPointerAddressSpace(), + GlobalValue::ExternalLinkage, SGA->getName(), + &DstM); +} + +GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV, + bool ForDefinition) { + GlobalValue *NewGV; + if (auto *SGVar = dyn_cast(SGV)) { + NewGV = copyGlobalVariableProto(SGVar); + } else if (auto *SF = dyn_cast(SGV)) { + NewGV = copyFunctionProto(SF); + } else { + if (ForDefinition) + NewGV = copyGlobalAliasProto(cast(SGV)); + else + NewGV = new GlobalVariable( + DstM, TypeMap.get(SGV->getType()->getElementType()), + /*isConstant*/ false, GlobalValue::ExternalLinkage, + /*init*/ nullptr, SGV->getName(), + /*insertbefore*/ nullptr, SGV->getThreadLocalMode(), + SGV->getType()->getAddressSpace()); + } + + if (ForDefinition) + NewGV->setLinkage(SGV->getLinkage()); + else if (SGV->hasExternalWeakLinkage() || SGV->hasWeakLinkage() || + SGV->hasLinkOnceLinkage()) + NewGV->setLinkage(GlobalValue::ExternalWeakLinkage); + + NewGV->copyAttributesFrom(SGV); + return NewGV; +} + +/// Loop over all of the linked values to compute type mappings. For example, +/// if we link "extern Foo *x" and "Foo *x = NULL", then we have two struct +/// types 'Foo' but one got renamed when the module was loaded into the same +/// LLVMContext. +void IRLinker::computeTypeMapping() { + for (GlobalValue &SGV : SrcM.globals()) { + GlobalValue *DGV = getLinkedToGlobal(&SGV); + if (!DGV) + continue; + + if (!DGV->hasAppendingLinkage() || !SGV.hasAppendingLinkage()) { + TypeMap.addTypeMapping(DGV->getType(), SGV.getType()); + continue; + } + + // Unify the element type of appending arrays. + ArrayType *DAT = cast(DGV->getType()->getElementType()); + ArrayType *SAT = cast(SGV.getType()->getElementType()); + TypeMap.addTypeMapping(DAT->getElementType(), SAT->getElementType()); + } + + for (GlobalValue &SGV : SrcM) + if (GlobalValue *DGV = getLinkedToGlobal(&SGV)) + TypeMap.addTypeMapping(DGV->getType(), SGV.getType()); + + for (GlobalValue &SGV : SrcM.aliases()) + if (GlobalValue *DGV = getLinkedToGlobal(&SGV)) + TypeMap.addTypeMapping(DGV->getType(), SGV.getType()); + + // Incorporate types by name, scanning all the types in the source module. + // At this point, the destination module may have a type "%foo = { i32 }" for + // example. When the source module got loaded into the same LLVMContext, if + // it had the same type, it would have been renamed to "%foo.42 = { i32 }". + std::vector Types = SrcM.getIdentifiedStructTypes(); + for (StructType *ST : Types) { + if (!ST->hasName()) + continue; + + // Check to see if there is a dot in the name followed by a digit. + size_t DotPos = ST->getName().rfind('.'); + if (DotPos == 0 || DotPos == StringRef::npos || + ST->getName().back() == '.' || + !isdigit(static_cast(ST->getName()[DotPos + 1]))) + continue; + + // Check to see if the destination module has a struct with the prefix name. + StructType *DST = DstM.getTypeByName(ST->getName().substr(0, DotPos)); + if (!DST) + continue; + + // Don't use it if this actually came from the source module. They're in + // the same LLVMContext after all. Also don't use it unless the type is + // actually used in the destination module. This can happen in situations + // like this: + // + // Module A Module B + // -------- -------- + // %Z = type { %A } %B = type { %C.1 } + // %A = type { %B.1, [7 x i8] } %C.1 = type { i8* } + // %B.1 = type { %C } %A.2 = type { %B.3, [5 x i8] } + // %C = type { i8* } %B.3 = type { %C.1 } + // + // When we link Module B with Module A, the '%B' in Module B is + // used. However, that would then use '%C.1'. But when we process '%C.1', + // we prefer to take the '%C' version. So we are then left with both + // '%C.1' and '%C' being used for the same types. This leads to some + // variables using one type and some using the other. + if (TypeMap.DstStructTypesSet.hasType(DST)) + TypeMap.addTypeMapping(DST, ST); + } + + // Now that we have discovered all of the type equivalences, get a body for + // any 'opaque' types in the dest module that are now resolved. + TypeMap.linkDefinedTypeBodies(); +} + +static void getArrayElements(const Constant *C, + SmallVectorImpl &Dest) { + unsigned NumElements = cast(C->getType())->getNumElements(); + + for (unsigned i = 0; i != NumElements; ++i) + Dest.push_back(C->getAggregateElement(i)); +} + +/// If there were any appending global variables, link them together now. +/// Return true on error. +Constant *IRLinker::linkAppendingVarProto(GlobalVariable *DstGV, + const GlobalVariable *SrcGV) { + Type *EltTy = cast(TypeMap.get(SrcGV->getType()->getElementType())) + ->getElementType(); + + StringRef Name = SrcGV->getName(); + bool IsNewStructor = false; + bool IsOldStructor = false; + if (Name == "llvm.global_ctors" || Name == "llvm.global_dtors") { + if (cast(EltTy)->getNumElements() == 3) + IsNewStructor = true; + else + IsOldStructor = true; + } + + PointerType *VoidPtrTy = Type::getInt8Ty(SrcGV->getContext())->getPointerTo(); + if (IsOldStructor) { + auto &ST = *cast(EltTy); + Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy}; + EltTy = StructType::get(SrcGV->getContext(), Tys, false); + } + + if (DstGV) { + ArrayType *DstTy = cast(DstGV->getType()->getElementType()); + + if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage()) { + emitError( + "Linking globals named '" + SrcGV->getName() + + "': can only link appending global with another appending global!"); + return nullptr; + } + + // Check to see that they two arrays agree on type. + if (EltTy != DstTy->getElementType()) { + emitError("Appending variables with different element types!"); + return nullptr; + } + if (DstGV->isConstant() != SrcGV->isConstant()) { + emitError("Appending variables linked with different const'ness!"); + return nullptr; + } + + if (DstGV->getAlignment() != SrcGV->getAlignment()) { + emitError( + "Appending variables with different alignment need to be linked!"); + return nullptr; + } + + if (DstGV->getVisibility() != SrcGV->getVisibility()) { + emitError( + "Appending variables with different visibility need to be linked!"); + return nullptr; + } + + if (DstGV->hasUnnamedAddr() != SrcGV->hasUnnamedAddr()) { + emitError( + "Appending variables with different unnamed_addr need to be linked!"); + return nullptr; + } + + if (StringRef(DstGV->getSection()) != SrcGV->getSection()) { + emitError( + "Appending variables with different section name need to be linked!"); + return nullptr; + } + } + + SmallVector DstElements; + if (DstGV) + getArrayElements(DstGV->getInitializer(), DstElements); + + SmallVector SrcElements; + getArrayElements(SrcGV->getInitializer(), SrcElements); + + if (IsNewStructor) + SrcElements.erase( + std::remove_if(SrcElements.begin(), SrcElements.end(), + [this](Constant *E) { + auto *Key = dyn_cast( + E->getAggregateElement(2)->stripPointerCasts()); + if (!Key) + return false; + GlobalValue *DGV = getLinkedToGlobal(Key); + return !shouldLink(DGV, *Key); + }), + SrcElements.end()); + uint64_t NewSize = DstElements.size() + SrcElements.size(); + ArrayType *NewType = ArrayType::get(EltTy, NewSize); + + // Create the new global variable. + GlobalVariable *NG = new GlobalVariable( + DstM, NewType, SrcGV->isConstant(), SrcGV->getLinkage(), + /*init*/ nullptr, /*name*/ "", DstGV, SrcGV->getThreadLocalMode(), + SrcGV->getType()->getAddressSpace()); + + NG->copyAttributesFrom(SrcGV); + forceRenaming(NG, SrcGV->getName()); + + Constant *Ret = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType())); + + // Stop recursion. + ValueMap[SrcGV] = Ret; + + for (auto *V : SrcElements) { + Constant *NewV; + if (IsOldStructor) { + auto *S = cast(V); + auto *E1 = MapValue(S->getOperand(0), ValueMap, ValueMapperFlags, + &TypeMap, &GValMaterializer); + auto *E2 = MapValue(S->getOperand(1), ValueMap, ValueMapperFlags, + &TypeMap, &GValMaterializer); + Value *Null = Constant::getNullValue(VoidPtrTy); + NewV = + ConstantStruct::get(cast(EltTy), E1, E2, Null, nullptr); + } else { + NewV = + MapValue(V, ValueMap, ValueMapperFlags, &TypeMap, &GValMaterializer); + } + DstElements.push_back(NewV); + } + + NG->setInitializer(ConstantArray::get(NewType, DstElements)); + + // Replace any uses of the two global variables with uses of the new + // global. + if (DstGV) { + DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType())); + DstGV->eraseFromParent(); + } + + return Ret; +} + +static bool useExistingDest(GlobalValue &SGV, GlobalValue *DGV, + bool ShouldLink) { + if (!DGV) + return false; + + if (SGV.isDeclaration()) + return true; + + if (DGV->isDeclarationForLinker() && !SGV.isDeclarationForLinker()) + return false; + + if (ShouldLink) + return false; + + return true; +} + +bool IRLinker::shouldLink(GlobalValue *DGV, GlobalValue &SGV) { + // Already imported all the values. Just map to the Dest value + // in case it is referenced in the metadata. + if (IsMetadataLinkingPostpass) { + assert(!ValuesToLink.count(&SGV) && + "Source value unexpectedly requested for link during metadata link"); + return false; + } + + if (ValuesToLink.count(&SGV)) + return true; + + if (SGV.hasLocalLinkage()) + return true; + + if (DGV && !DGV->isDeclaration()) + return false; + + if (SGV.hasAvailableExternallyLinkage()) + return true; + + if (DoneLinkingBodies) + return false; + + AddLazyFor(SGV, [this](GlobalValue &GV) { maybeAdd(&GV); }); + return ValuesToLink.count(&SGV); +} + +Constant *IRLinker::linkGlobalValueProto(GlobalValue *SGV, bool ForAlias) { + GlobalValue *DGV = getLinkedToGlobal(SGV); + + bool ShouldLink = shouldLink(DGV, *SGV); + + // just missing from map + if (ShouldLink) { + auto I = ValueMap.find(SGV); + if (I != ValueMap.end()) + return cast(I->second); + + I = AliasValueMap.find(SGV); + if (I != AliasValueMap.end()) + return cast(I->second); + } + + DGV = nullptr; + if (ShouldLink || !ForAlias) + DGV = getLinkedToGlobal(SGV); + + // Handle the ultra special appending linkage case first. + assert(!DGV || SGV->hasAppendingLinkage() == DGV->hasAppendingLinkage()); + if (SGV->hasAppendingLinkage()) + return linkAppendingVarProto(cast_or_null(DGV), + cast(SGV)); + + GlobalValue *NewGV; + if (useExistingDest(*SGV, DGV, ShouldLink)) { + NewGV = DGV; + } else { + // If we are done linking global value bodies (i.e. we are performing + // metadata linking), don't link in the global value due to this + // reference, simply map it to null. + if (DoneLinkingBodies) + return nullptr; + + NewGV = copyGlobalValueProto(SGV, ShouldLink); + if (!ForAlias) + forceRenaming(NewGV, SGV->getName()); + } + if (ShouldLink || ForAlias) { + if (const Comdat *SC = SGV->getComdat()) { + if (auto *GO = dyn_cast(NewGV)) { + Comdat *DC = DstM.getOrInsertComdat(SC->getName()); + DC->setSelectionKind(SC->getSelectionKind()); + GO->setComdat(DC); + } + } + } + + if (!ShouldLink && ForAlias) + NewGV->setLinkage(GlobalValue::InternalLinkage); + + Constant *C = NewGV; + if (DGV) + C = ConstantExpr::getBitCast(NewGV, TypeMap.get(SGV->getType())); + + if (DGV && NewGV != DGV) { + DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewGV, DGV->getType())); + DGV->eraseFromParent(); + } + + return C; +} + +/// Update the initializers in the Dest module now that all globals that may be +/// referenced are in Dest. +void IRLinker::linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src) { + // Figure out what the initializer looks like in the dest module. + Dst.setInitializer(MapValue(Src.getInitializer(), ValueMap, ValueMapperFlags, + &TypeMap, &GValMaterializer)); +} + +/// Copy the source function over into the dest function and fix up references +/// to values. At this point we know that Dest is an external function, and +/// that Src is not. +bool IRLinker::linkFunctionBody(Function &Dst, Function &Src) { + assert(Dst.isDeclaration() && !Src.isDeclaration()); + + // Materialize if needed. + if (std::error_code EC = Src.materialize()) + return emitError(EC.message()); + + if (!shouldLinkMetadata()) + // This is only supported for lazy links. Do after materialization of + // a function and before remapping metadata on instructions below + // in RemapInstruction, as the saved mapping is used to handle + // the temporary metadata hanging off instructions. + SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, true); + + // Link in the prefix data. + if (Src.hasPrefixData()) + Dst.setPrefixData(MapValue(Src.getPrefixData(), ValueMap, ValueMapperFlags, + &TypeMap, &GValMaterializer)); + + // Link in the prologue data. + if (Src.hasPrologueData()) + Dst.setPrologueData(MapValue(Src.getPrologueData(), ValueMap, + ValueMapperFlags, &TypeMap, + &GValMaterializer)); + + // Link in the personality function. + if (Src.hasPersonalityFn()) + Dst.setPersonalityFn(MapValue(Src.getPersonalityFn(), ValueMap, + ValueMapperFlags, &TypeMap, + &GValMaterializer)); + + // Go through and convert function arguments over, remembering the mapping. + Function::arg_iterator DI = Dst.arg_begin(); + for (Argument &Arg : Src.args()) { + DI->setName(Arg.getName()); // Copy the name over. + + // Add a mapping to our mapping. + ValueMap[&Arg] = &*DI; + ++DI; + } + + // Copy over the metadata attachments. + SmallVector, 8> MDs; + Src.getAllMetadata(MDs); + for (const auto &I : MDs) + Dst.setMetadata(I.first, MapMetadata(I.second, ValueMap, ValueMapperFlags, + &TypeMap, &GValMaterializer)); + + // Splice the body of the source function into the dest function. + Dst.getBasicBlockList().splice(Dst.end(), Src.getBasicBlockList()); + + // At this point, all of the instructions and values of the function are now + // copied over. The only problem is that they are still referencing values in + // the Source function as operands. Loop through all of the operands of the + // functions and patch them up to point to the local versions. + for (BasicBlock &BB : Dst) + for (Instruction &I : BB) + RemapInstruction(&I, ValueMap, RF_IgnoreMissingEntries | ValueMapperFlags, + &TypeMap, &GValMaterializer); + + // There is no need to map the arguments anymore. + for (Argument &Arg : Src.args()) + ValueMap.erase(&Arg); + + return false; +} + +void IRLinker::linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src) { + Constant *Aliasee = Src.getAliasee(); + Constant *Val = MapValue(Aliasee, AliasValueMap, ValueMapperFlags, &TypeMap, + &LValMaterializer); + Dst.setAliasee(Val); +} + +bool IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) { + if (auto *F = dyn_cast(&Src)) + return linkFunctionBody(cast(Dst), *F); + if (auto *GVar = dyn_cast(&Src)) { + linkGlobalInit(cast(Dst), *GVar); + return false; + } + linkAliasBody(cast(Dst), cast(Src)); + return false; +} + +void IRLinker::findNeededSubprograms(ValueToValueMapTy &ValueMap) { + // Track unneeded nodes to make it simpler to handle the case + // where we are checking if an already-mapped SP is needed. + NamedMDNode *CompileUnits = SrcM.getNamedMetadata("llvm.dbg.cu"); + if (!CompileUnits) + return; + for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) { + auto *CU = cast(CompileUnits->getOperand(I)); + assert(CU && "Expected valid compile unit"); + for (auto *Op : CU->getSubprograms()) { + // Unless we were doing function importing and deferred metadata linking, + // any needed SPs should have been mapped as they would be reached + // from the function linked in (either on the function itself for linked + // function bodies, or from DILocation on inlined instructions). + assert(!(ValueMap.MD()[Op] && IsMetadataLinkingPostpass) && + "DISubprogram shouldn't be mapped yet"); + if (!ValueMap.MD()[Op]) + UnneededSubprograms.insert(Op); + } + } + if (!IsMetadataLinkingPostpass) + return; + // In the case of metadata linking as a postpass (e.g. for function + // importing), see which DISubprogram MD from the source has an associated + // temporary metadata node, which means the SP was needed by an imported + // function. + for (auto MDI : MetadataToIDs) { + const MDNode *Node = dyn_cast(MDI.first); + if (!Node) + continue; + DISubprogram *SP = getDISubprogram(Node); + if (!SP || !ValIDToTempMDMap->count(MDI.second)) + continue; + UnneededSubprograms.erase(SP); + } +} + +// Squash null subprograms from compile unit subprogram lists. +void IRLinker::stripNullSubprograms() { + NamedMDNode *CompileUnits = DstM.getNamedMetadata("llvm.dbg.cu"); + if (!CompileUnits) + return; + for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) { + auto *CU = cast(CompileUnits->getOperand(I)); + assert(CU && "Expected valid compile unit"); + + SmallVector NewSPs; + NewSPs.reserve(CU->getSubprograms().size()); + bool FoundNull = false; + for (DISubprogram *SP : CU->getSubprograms()) { + if (!SP) { + FoundNull = true; + continue; + } + NewSPs.push_back(SP); + } + if (FoundNull) + CU->replaceSubprograms(MDTuple::get(CU->getContext(), NewSPs)); + } +} + +/// Insert all of the named MDNodes in Src into the Dest module. +void IRLinker::linkNamedMDNodes() { + findNeededSubprograms(ValueMap); + const NamedMDNode *SrcModFlags = SrcM.getModuleFlagsMetadata(); + for (const NamedMDNode &NMD : SrcM.named_metadata()) { + // Don't link module flags here. Do them separately. + if (&NMD == SrcModFlags) + continue; + NamedMDNode *DestNMD = DstM.getOrInsertNamedMetadata(NMD.getName()); + // Add Src elements into Dest node. + for (const MDNode *op : NMD.operands()) + DestNMD->addOperand(MapMetadata( + op, ValueMap, ValueMapperFlags | RF_NullMapMissingGlobalValues, + &TypeMap, &GValMaterializer)); + } + stripNullSubprograms(); +} + +/// Merge the linker flags in Src into the Dest module. +bool IRLinker::linkModuleFlagsMetadata() { + // If the source module has no module flags, we are done. + const NamedMDNode *SrcModFlags = SrcM.getModuleFlagsMetadata(); + if (!SrcModFlags) + return false; + + // If the destination module doesn't have module flags yet, then just copy + // over the source module's flags. + NamedMDNode *DstModFlags = DstM.getOrInsertModuleFlagsMetadata(); + if (DstModFlags->getNumOperands() == 0) { + for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) + DstModFlags->addOperand(SrcModFlags->getOperand(I)); + + return false; + } + + // First build a map of the existing module flags and requirements. + DenseMap> Flags; + SmallSetVector Requirements; + for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) { + MDNode *Op = DstModFlags->getOperand(I); + ConstantInt *Behavior = mdconst::extract(Op->getOperand(0)); + MDString *ID = cast(Op->getOperand(1)); + + if (Behavior->getZExtValue() == Module::Require) { + Requirements.insert(cast(Op->getOperand(2))); + } else { + Flags[ID] = std::make_pair(Op, I); + } + } + + // Merge in the flags from the source module, and also collect its set of + // requirements. + for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) { + MDNode *SrcOp = SrcModFlags->getOperand(I); + ConstantInt *SrcBehavior = + mdconst::extract(SrcOp->getOperand(0)); + MDString *ID = cast(SrcOp->getOperand(1)); + MDNode *DstOp; + unsigned DstIndex; + std::tie(DstOp, DstIndex) = Flags.lookup(ID); + unsigned SrcBehaviorValue = SrcBehavior->getZExtValue(); + + // If this is a requirement, add it and continue. + if (SrcBehaviorValue == Module::Require) { + // If the destination module does not already have this requirement, add + // it. + if (Requirements.insert(cast(SrcOp->getOperand(2)))) { + DstModFlags->addOperand(SrcOp); + } + continue; + } + + // If there is no existing flag with this ID, just add it. + if (!DstOp) { + Flags[ID] = std::make_pair(SrcOp, DstModFlags->getNumOperands()); + DstModFlags->addOperand(SrcOp); + continue; + } + + // Otherwise, perform a merge. + ConstantInt *DstBehavior = + mdconst::extract(DstOp->getOperand(0)); + unsigned DstBehaviorValue = DstBehavior->getZExtValue(); + + // If either flag has override behavior, handle it first. + if (DstBehaviorValue == Module::Override) { + // Diagnose inconsistent flags which both have override behavior. + if (SrcBehaviorValue == Module::Override && + SrcOp->getOperand(2) != DstOp->getOperand(2)) { + emitError("linking module flags '" + ID->getString() + + "': IDs have conflicting override values"); + } + continue; + } else if (SrcBehaviorValue == Module::Override) { + // Update the destination flag to that of the source. + DstModFlags->setOperand(DstIndex, SrcOp); + Flags[ID].first = SrcOp; + continue; + } + + // Diagnose inconsistent merge behavior types. + if (SrcBehaviorValue != DstBehaviorValue) { + emitError("linking module flags '" + ID->getString() + + "': IDs have conflicting behaviors"); + continue; + } + + auto replaceDstValue = [&](MDNode *New) { + Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New}; + MDNode *Flag = MDNode::get(DstM.getContext(), FlagOps); + DstModFlags->setOperand(DstIndex, Flag); + Flags[ID].first = Flag; + }; + + // Perform the merge for standard behavior types. + switch (SrcBehaviorValue) { + case Module::Require: + case Module::Override: + llvm_unreachable("not possible"); + case Module::Error: { + // Emit an error if the values differ. + if (SrcOp->getOperand(2) != DstOp->getOperand(2)) { + emitError("linking module flags '" + ID->getString() + + "': IDs have conflicting values"); + } + continue; + } + case Module::Warning: { + // Emit a warning if the values differ. + if (SrcOp->getOperand(2) != DstOp->getOperand(2)) { + emitWarning("linking module flags '" + ID->getString() + + "': IDs have conflicting values"); + } + continue; + } + case Module::Append: { + MDNode *DstValue = cast(DstOp->getOperand(2)); + MDNode *SrcValue = cast(SrcOp->getOperand(2)); + SmallVector MDs; + MDs.reserve(DstValue->getNumOperands() + SrcValue->getNumOperands()); + MDs.append(DstValue->op_begin(), DstValue->op_end()); + MDs.append(SrcValue->op_begin(), SrcValue->op_end()); + + replaceDstValue(MDNode::get(DstM.getContext(), MDs)); + break; + } + case Module::AppendUnique: { + SmallSetVector Elts; + MDNode *DstValue = cast(DstOp->getOperand(2)); + MDNode *SrcValue = cast(SrcOp->getOperand(2)); + Elts.insert(DstValue->op_begin(), DstValue->op_end()); + Elts.insert(SrcValue->op_begin(), SrcValue->op_end()); + + replaceDstValue(MDNode::get(DstM.getContext(), + makeArrayRef(Elts.begin(), Elts.end()))); + break; + } + } + } + + // Check all of the requirements. + for (unsigned I = 0, E = Requirements.size(); I != E; ++I) { + MDNode *Requirement = Requirements[I]; + MDString *Flag = cast(Requirement->getOperand(0)); + Metadata *ReqValue = Requirement->getOperand(1); + + MDNode *Op = Flags[Flag].first; + if (!Op || Op->getOperand(2) != ReqValue) { + emitError("linking module flags '" + Flag->getString() + + "': does not have the required value"); + continue; + } + } + + return HasError; +} + +// This function returns true if the triples match. +static bool triplesMatch(const Triple &T0, const Triple &T1) { + // If vendor is apple, ignore the version number. + if (T0.getVendor() == Triple::Apple) + return T0.getArch() == T1.getArch() && T0.getSubArch() == T1.getSubArch() && + T0.getVendor() == T1.getVendor() && T0.getOS() == T1.getOS(); + + return T0 == T1; +} + +// This function returns the merged triple. +static std::string mergeTriples(const Triple &SrcTriple, + const Triple &DstTriple) { + // If vendor is apple, pick the triple with the larger version number. + if (SrcTriple.getVendor() == Triple::Apple) + if (DstTriple.isOSVersionLT(SrcTriple)) + return SrcTriple.str(); + + return DstTriple.str(); +} + +bool IRLinker::run() { + // Inherit the target data from the source module if the destination module + // doesn't have one already. + if (DstM.getDataLayout().isDefault()) + DstM.setDataLayout(SrcM.getDataLayout()); + + if (SrcM.getDataLayout() != DstM.getDataLayout()) { + emitWarning("Linking two modules of different data layouts: '" + + SrcM.getModuleIdentifier() + "' is '" + + SrcM.getDataLayoutStr() + "' whereas '" + + DstM.getModuleIdentifier() + "' is '" + + DstM.getDataLayoutStr() + "'\n"); + } + + // Copy the target triple from the source to dest if the dest's is empty. + if (DstM.getTargetTriple().empty() && !SrcM.getTargetTriple().empty()) + DstM.setTargetTriple(SrcM.getTargetTriple()); + + Triple SrcTriple(SrcM.getTargetTriple()), DstTriple(DstM.getTargetTriple()); + + if (!SrcM.getTargetTriple().empty() && !triplesMatch(SrcTriple, DstTriple)) + emitWarning("Linking two modules of different target triples: " + + SrcM.getModuleIdentifier() + "' is '" + SrcM.getTargetTriple() + + "' whereas '" + DstM.getModuleIdentifier() + "' is '" + + DstM.getTargetTriple() + "'\n"); + + DstM.setTargetTriple(mergeTriples(SrcTriple, DstTriple)); + + // Append the module inline asm string. + if (!SrcM.getModuleInlineAsm().empty()) { + if (DstM.getModuleInlineAsm().empty()) + DstM.setModuleInlineAsm(SrcM.getModuleInlineAsm()); + else + DstM.setModuleInlineAsm(DstM.getModuleInlineAsm() + "\n" + + SrcM.getModuleInlineAsm()); + } + + // Loop over all of the linked values to compute type mappings. + computeTypeMapping(); + + std::reverse(Worklist.begin(), Worklist.end()); + while (!Worklist.empty()) { + GlobalValue *GV = Worklist.back(); + Worklist.pop_back(); + + // Already mapped. + if (ValueMap.find(GV) != ValueMap.end() || + AliasValueMap.find(GV) != AliasValueMap.end()) + continue; + + assert(!GV->isDeclaration()); + MapValue(GV, ValueMap, ValueMapperFlags, &TypeMap, &GValMaterializer); + if (HasError) + return true; + } + + // Note that we are done linking global value bodies. This prevents + // metadata linking from creating new references. + DoneLinkingBodies = true; + + // Remap all of the named MDNodes in Src into the DstM module. We do this + // after linking GlobalValues so that MDNodes that reference GlobalValues + // are properly remapped. + if (shouldLinkMetadata()) { + // Even if just linking metadata we should link decls above in case + // any are referenced by metadata. IRLinker::shouldLink ensures that + // we don't actually link anything from source. + if (IsMetadataLinkingPostpass) { + // Ensure metadata materialized + if (SrcM.getMaterializer()->materializeMetadata()) + return true; + SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, false); + } + + linkNamedMDNodes(); + + if (IsMetadataLinkingPostpass) { + // Handle anything left in the ValIDToTempMDMap, such as metadata nodes + // not reached by the dbg.cu NamedMD (i.e. only reached from + // instructions). + // Walk the MetadataToIDs once to find the set of new (imported) MD + // that still has corresponding temporary metadata, and invoke metadata + // mapping on each one. + for (auto MDI : MetadataToIDs) { + if (!ValIDToTempMDMap->count(MDI.second)) + continue; + MapMetadata(MDI.first, ValueMap, ValueMapperFlags, &TypeMap, + &GValMaterializer); + } + assert(ValIDToTempMDMap->empty()); + } + + // Merge the module flags into the DstM module. + if (linkModuleFlagsMetadata()) + return true; + } + + return false; +} + +IRMover::StructTypeKeyInfo::KeyTy::KeyTy(ArrayRef E, bool P) + : ETypes(E), IsPacked(P) {} + +IRMover::StructTypeKeyInfo::KeyTy::KeyTy(const StructType *ST) + : ETypes(ST->elements()), IsPacked(ST->isPacked()) {} + +bool IRMover::StructTypeKeyInfo::KeyTy::operator==(const KeyTy &That) const { + if (IsPacked != That.IsPacked) + return false; + if (ETypes != That.ETypes) + return false; + return true; +} + +bool IRMover::StructTypeKeyInfo::KeyTy::operator!=(const KeyTy &That) const { + return !this->operator==(That); +} + +StructType *IRMover::StructTypeKeyInfo::getEmptyKey() { + return DenseMapInfo::getEmptyKey(); +} + +StructType *IRMover::StructTypeKeyInfo::getTombstoneKey() { + return DenseMapInfo::getTombstoneKey(); +} + +unsigned IRMover::StructTypeKeyInfo::getHashValue(const KeyTy &Key) { + return hash_combine(hash_combine_range(Key.ETypes.begin(), Key.ETypes.end()), + Key.IsPacked); +} + +unsigned IRMover::StructTypeKeyInfo::getHashValue(const StructType *ST) { + return getHashValue(KeyTy(ST)); +} + +bool IRMover::StructTypeKeyInfo::isEqual(const KeyTy &LHS, + const StructType *RHS) { + if (RHS == getEmptyKey() || RHS == getTombstoneKey()) + return false; + return LHS == KeyTy(RHS); +} + +bool IRMover::StructTypeKeyInfo::isEqual(const StructType *LHS, + const StructType *RHS) { + if (RHS == getEmptyKey()) + return LHS == getEmptyKey(); + + if (RHS == getTombstoneKey()) + return LHS == getTombstoneKey(); + + return KeyTy(LHS) == KeyTy(RHS); +} + +void IRMover::IdentifiedStructTypeSet::addNonOpaque(StructType *Ty) { + assert(!Ty->isOpaque()); + NonOpaqueStructTypes.insert(Ty); +} + +void IRMover::IdentifiedStructTypeSet::switchToNonOpaque(StructType *Ty) { + assert(!Ty->isOpaque()); + NonOpaqueStructTypes.insert(Ty); + bool Removed = OpaqueStructTypes.erase(Ty); + (void)Removed; + assert(Removed); +} + +void IRMover::IdentifiedStructTypeSet::addOpaque(StructType *Ty) { + assert(Ty->isOpaque()); + OpaqueStructTypes.insert(Ty); +} + +StructType * +IRMover::IdentifiedStructTypeSet::findNonOpaque(ArrayRef ETypes, + bool IsPacked) { + IRMover::StructTypeKeyInfo::KeyTy Key(ETypes, IsPacked); + auto I = NonOpaqueStructTypes.find_as(Key); + if (I == NonOpaqueStructTypes.end()) + return nullptr; + return *I; +} + +bool IRMover::IdentifiedStructTypeSet::hasType(StructType *Ty) { + if (Ty->isOpaque()) + return OpaqueStructTypes.count(Ty); + auto I = NonOpaqueStructTypes.find(Ty); + if (I == NonOpaqueStructTypes.end()) + return false; + return *I == Ty; +} + +IRMover::IRMover(Module &M) : Composite(M) { + TypeFinder StructTypes; + StructTypes.run(M, true); + for (StructType *Ty : StructTypes) { + if (Ty->isOpaque()) + IdentifiedStructTypes.addOpaque(Ty); + else + IdentifiedStructTypes.addNonOpaque(Ty); + } +} + +bool IRMover::move( + Module &Src, ArrayRef ValuesToLink, + std::function AddLazyFor, + DenseMap *ValIDToTempMDMap, + bool IsMetadataLinkingPostpass) { + IRLinker TheIRLinker(Composite, IdentifiedStructTypes, Src, ValuesToLink, + AddLazyFor, ValIDToTempMDMap, IsMetadataLinkingPostpass); + bool RetCode = TheIRLinker.run(); + Composite.dropTriviallyDeadConstantArrays(); + return RetCode; +} diff --git a/lib/Linker/LinkDiagnosticInfo.h b/lib/Linker/LinkDiagnosticInfo.h new file mode 100644 index 000000000000..d91f19c69aac --- /dev/null +++ b/lib/Linker/LinkDiagnosticInfo.h @@ -0,0 +1,25 @@ +//===- LinkDiagnosticInfo.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_LINKER_LINK_DIAGNOSTIC_INFO_H +#define LLVM_LIB_LINKER_LINK_DIAGNOSTIC_INFO_H + +#include "llvm/IR/DiagnosticInfo.h" + +namespace llvm { +class LinkDiagnosticInfo : public DiagnosticInfo { + const Twine &Msg; + +public: + LinkDiagnosticInfo(DiagnosticSeverity Severity, const Twine &Msg); + void print(DiagnosticPrinter &DP) const override; +}; +} + +#endif diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index f0906809ee48..9de3be412d75 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -12,447 +12,72 @@ //===----------------------------------------------------------------------===// #include "llvm/Linker/Linker.h" +#include "LinkDiagnosticInfo.h" #include "llvm-c/Linker.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/Triple.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/ADT/StringSet.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/TypeFinder.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include -#include using namespace llvm; - -//===----------------------------------------------------------------------===// -// TypeMap implementation. -//===----------------------------------------------------------------------===// - namespace { -class TypeMapTy : public ValueMapTypeRemapper { - /// This is a mapping from a source type to a destination type to use. - DenseMap MappedTypes; - - /// When checking to see if two subgraphs are isomorphic, we speculatively - /// add types to MappedTypes, but keep track of them here in case we need to - /// roll back. - SmallVector SpeculativeTypes; - - SmallVector SpeculativeDstOpaqueTypes; - - /// This is a list of non-opaque structs in the source module that are mapped - /// to an opaque struct in the destination module. - SmallVector SrcDefinitionsToResolve; - - /// This is the set of opaque types in the destination modules who are - /// getting a body from the source module. - SmallPtrSet DstResolvedOpaqueTypes; - -public: - TypeMapTy(Linker::IdentifiedStructTypeSet &DstStructTypesSet) - : DstStructTypesSet(DstStructTypesSet) {} - - Linker::IdentifiedStructTypeSet &DstStructTypesSet; - /// Indicate that the specified type in the destination module is conceptually - /// equivalent to the specified type in the source module. - void addTypeMapping(Type *DstTy, Type *SrcTy); - - /// Produce a body for an opaque type in the dest module from a type - /// definition in the source module. - void linkDefinedTypeBodies(); - - /// Return the mapped type to use for the specified input type from the - /// source module. - Type *get(Type *SrcTy); - Type *get(Type *SrcTy, SmallPtrSet &Visited); - - void finishType(StructType *DTy, StructType *STy, ArrayRef ETypes); - - FunctionType *get(FunctionType *T) { - return cast(get((Type *)T)); - } - - /// Dump out the type map for debugging purposes. - void dump() const { - for (auto &Pair : MappedTypes) { - dbgs() << "TypeMap: "; - Pair.first->print(dbgs()); - dbgs() << " => "; - Pair.second->print(dbgs()); - dbgs() << '\n'; - } - } - -private: - Type *remapType(Type *SrcTy) override { return get(SrcTy); } - - bool areTypesIsomorphic(Type *DstTy, Type *SrcTy); -}; -} - -void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) { - assert(SpeculativeTypes.empty()); - assert(SpeculativeDstOpaqueTypes.empty()); - - // Check to see if these types are recursively isomorphic and establish a - // mapping between them if so. - if (!areTypesIsomorphic(DstTy, SrcTy)) { - // Oops, they aren't isomorphic. Just discard this request by rolling out - // any speculative mappings we've established. - for (Type *Ty : SpeculativeTypes) - MappedTypes.erase(Ty); - - SrcDefinitionsToResolve.resize(SrcDefinitionsToResolve.size() - - SpeculativeDstOpaqueTypes.size()); - for (StructType *Ty : SpeculativeDstOpaqueTypes) - DstResolvedOpaqueTypes.erase(Ty); - } else { - for (Type *Ty : SpeculativeTypes) - if (auto *STy = dyn_cast(Ty)) - if (STy->hasName()) - STy->setName(""); - } - SpeculativeTypes.clear(); - SpeculativeDstOpaqueTypes.clear(); -} - -/// Recursively walk this pair of types, returning true if they are isomorphic, -/// false if they are not. -bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) { - // Two types with differing kinds are clearly not isomorphic. - if (DstTy->getTypeID() != SrcTy->getTypeID()) - return false; - - // If we have an entry in the MappedTypes table, then we have our answer. - Type *&Entry = MappedTypes[SrcTy]; - if (Entry) - return Entry == DstTy; - - // Two identical types are clearly isomorphic. Remember this - // non-speculatively. - if (DstTy == SrcTy) { - Entry = DstTy; - return true; - } - - // Okay, we have two types with identical kinds that we haven't seen before. - - // If this is an opaque struct type, special case it. - if (StructType *SSTy = dyn_cast(SrcTy)) { - // Mapping an opaque type to any struct, just keep the dest struct. - if (SSTy->isOpaque()) { - Entry = DstTy; - SpeculativeTypes.push_back(SrcTy); - return true; - } - - // Mapping a non-opaque source type to an opaque dest. If this is the first - // type that we're mapping onto this destination type then we succeed. Keep - // the dest, but fill it in later. If this is the second (different) type - // that we're trying to map onto the same opaque type then we fail. - if (cast(DstTy)->isOpaque()) { - // We can only map one source type onto the opaque destination type. - if (!DstResolvedOpaqueTypes.insert(cast(DstTy)).second) - return false; - SrcDefinitionsToResolve.push_back(SSTy); - SpeculativeTypes.push_back(SrcTy); - SpeculativeDstOpaqueTypes.push_back(cast(DstTy)); - Entry = DstTy; - return true; - } - } - - // If the number of subtypes disagree between the two types, then we fail. - if (SrcTy->getNumContainedTypes() != DstTy->getNumContainedTypes()) - return false; - - // Fail if any of the extra properties (e.g. array size) of the type disagree. - if (isa(DstTy)) - return false; // bitwidth disagrees. - if (PointerType *PT = dyn_cast(DstTy)) { - if (PT->getAddressSpace() != cast(SrcTy)->getAddressSpace()) - return false; - - } else if (FunctionType *FT = dyn_cast(DstTy)) { - if (FT->isVarArg() != cast(SrcTy)->isVarArg()) - return false; - } else if (StructType *DSTy = dyn_cast(DstTy)) { - StructType *SSTy = cast(SrcTy); - if (DSTy->isLiteral() != SSTy->isLiteral() || - DSTy->isPacked() != SSTy->isPacked()) - return false; - } else if (ArrayType *DATy = dyn_cast(DstTy)) { - if (DATy->getNumElements() != cast(SrcTy)->getNumElements()) - return false; - } else if (VectorType *DVTy = dyn_cast(DstTy)) { - if (DVTy->getNumElements() != cast(SrcTy)->getNumElements()) - return false; - } - - // Otherwise, we speculate that these two types will line up and recursively - // check the subelements. - Entry = DstTy; - SpeculativeTypes.push_back(SrcTy); - - for (unsigned I = 0, E = SrcTy->getNumContainedTypes(); I != E; ++I) - if (!areTypesIsomorphic(DstTy->getContainedType(I), - SrcTy->getContainedType(I))) - return false; - - // If everything seems to have lined up, then everything is great. - return true; -} - -void TypeMapTy::linkDefinedTypeBodies() { - SmallVector Elements; - for (StructType *SrcSTy : SrcDefinitionsToResolve) { - StructType *DstSTy = cast(MappedTypes[SrcSTy]); - assert(DstSTy->isOpaque()); - - // Map the body of the source type over to a new body for the dest type. - Elements.resize(SrcSTy->getNumElements()); - for (unsigned I = 0, E = Elements.size(); I != E; ++I) - Elements[I] = get(SrcSTy->getElementType(I)); - - DstSTy->setBody(Elements, SrcSTy->isPacked()); - DstStructTypesSet.switchToNonOpaque(DstSTy); - } - SrcDefinitionsToResolve.clear(); - DstResolvedOpaqueTypes.clear(); -} - -void TypeMapTy::finishType(StructType *DTy, StructType *STy, - ArrayRef ETypes) { - DTy->setBody(ETypes, STy->isPacked()); - - // Steal STy's name. - if (STy->hasName()) { - SmallString<16> TmpName = STy->getName(); - STy->setName(""); - DTy->setName(TmpName); - } - - DstStructTypesSet.addNonOpaque(DTy); -} - -Type *TypeMapTy::get(Type *Ty) { - SmallPtrSet Visited; - return get(Ty, Visited); -} - -Type *TypeMapTy::get(Type *Ty, SmallPtrSet &Visited) { - // If we already have an entry for this type, return it. - Type **Entry = &MappedTypes[Ty]; - if (*Entry) - return *Entry; - - // These are types that LLVM itself will unique. - bool IsUniqued = !isa(Ty) || cast(Ty)->isLiteral(); - -#ifndef NDEBUG - if (!IsUniqued) { - for (auto &Pair : MappedTypes) { - assert(!(Pair.first != Ty && Pair.second == Ty) && - "mapping to a source type"); - } - } -#endif - - if (!IsUniqued && !Visited.insert(cast(Ty)).second) { - StructType *DTy = StructType::create(Ty->getContext()); - return *Entry = DTy; - } - - // If this is not a recursive type, then just map all of the elements and - // then rebuild the type from inside out. - SmallVector ElementTypes; - - // If there are no element types to map, then the type is itself. This is - // true for the anonymous {} struct, things like 'float', integers, etc. - if (Ty->getNumContainedTypes() == 0 && IsUniqued) - return *Entry = Ty; - - // Remap all of the elements, keeping track of whether any of them change. - bool AnyChange = false; - ElementTypes.resize(Ty->getNumContainedTypes()); - for (unsigned I = 0, E = Ty->getNumContainedTypes(); I != E; ++I) { - ElementTypes[I] = get(Ty->getContainedType(I), Visited); - AnyChange |= ElementTypes[I] != Ty->getContainedType(I); - } - - // If we found our type while recursively processing stuff, just use it. - Entry = &MappedTypes[Ty]; - if (*Entry) { - if (auto *DTy = dyn_cast(*Entry)) { - if (DTy->isOpaque()) { - auto *STy = cast(Ty); - finishType(DTy, STy, ElementTypes); - } - } - return *Entry; - } - - // If all of the element types mapped directly over and the type is not - // a nomed struct, then the type is usable as-is. - if (!AnyChange && IsUniqued) - return *Entry = Ty; - - // Otherwise, rebuild a modified type. - switch (Ty->getTypeID()) { - default: - llvm_unreachable("unknown derived type to remap"); - case Type::ArrayTyID: - return *Entry = ArrayType::get(ElementTypes[0], - cast(Ty)->getNumElements()); - case Type::VectorTyID: - return *Entry = VectorType::get(ElementTypes[0], - cast(Ty)->getNumElements()); - case Type::PointerTyID: - return *Entry = PointerType::get(ElementTypes[0], - cast(Ty)->getAddressSpace()); - case Type::FunctionTyID: - return *Entry = FunctionType::get(ElementTypes[0], - makeArrayRef(ElementTypes).slice(1), - cast(Ty)->isVarArg()); - case Type::StructTyID: { - auto *STy = cast(Ty); - bool IsPacked = STy->isPacked(); - if (IsUniqued) - return *Entry = StructType::get(Ty->getContext(), ElementTypes, IsPacked); - - // If the type is opaque, we can just use it directly. - if (STy->isOpaque()) { - DstStructTypesSet.addOpaque(STy); - return *Entry = Ty; - } - - if (StructType *OldT = - DstStructTypesSet.findNonOpaque(ElementTypes, IsPacked)) { - STy->setName(""); - return *Entry = OldT; - } - - if (!AnyChange) { - DstStructTypesSet.addNonOpaque(STy); - return *Entry = Ty; - } - - StructType *DTy = StructType::create(Ty->getContext()); - finishType(DTy, STy, ElementTypes); - return *Entry = DTy; - } - } -} - -//===----------------------------------------------------------------------===// -// ModuleLinker implementation. -//===----------------------------------------------------------------------===// - -namespace { -class ModuleLinker; - -/// Creates prototypes for functions that are lazily linked on the fly. This -/// speeds up linking for modules with many/ lazily linked functions of which -/// few get used. -class ValueMaterializerTy : public ValueMaterializer { - TypeMapTy &TypeMap; - Module *DstM; - std::vector &LazilyLinkGlobalValues; - -public: - ValueMaterializerTy(TypeMapTy &TypeMap, Module *DstM, - std::vector &LazilyLinkGlobalValues) - : ValueMaterializer(), TypeMap(TypeMap), DstM(DstM), - LazilyLinkGlobalValues(LazilyLinkGlobalValues) {} - - Value *materializeValueFor(Value *V) override; -}; - -class LinkDiagnosticInfo : public DiagnosticInfo { - const Twine &Msg; - -public: - LinkDiagnosticInfo(DiagnosticSeverity Severity, const Twine &Msg); - void print(DiagnosticPrinter &DP) const override; -}; -LinkDiagnosticInfo::LinkDiagnosticInfo(DiagnosticSeverity Severity, - const Twine &Msg) - : DiagnosticInfo(DK_Linker, Severity), Msg(Msg) {} -void LinkDiagnosticInfo::print(DiagnosticPrinter &DP) const { DP << Msg; } /// This is an implementation class for the LinkModules function, which is the /// entrypoint for this file. class ModuleLinker { - Module *DstM, *SrcM; + IRMover &Mover; + Module &SrcM; - TypeMapTy TypeMap; - ValueMaterializerTy ValMaterializer; - - /// Mapping of values from what they used to be in Src, to what they are now - /// in DstM. ValueToValueMapTy is a ValueMap, which involves some overhead - /// due to the use of Value handles which the Linker doesn't actually need, - /// but this allows us to reuse the ValueMapper code. - ValueToValueMapTy ValueMap; - - struct AppendingVarInfo { - GlobalVariable *NewGV; // New aggregate global in dest module. - const Constant *DstInit; // Old initializer from dest module. - const Constant *SrcInit; // Old initializer from src module. - }; - - std::vector AppendingVars; - - // Set of items not to link in from source. - SmallPtrSet DoNotLinkFromSource; - - // Vector of GlobalValues to lazily link in. - std::vector LazilyLinkGlobalValues; - - /// Functions that have replaced other functions. - SmallPtrSet OverridingFunctions; - - DiagnosticHandlerFunction DiagnosticHandler; + SetVector ValuesToLink; + StringSet<> Internalize; /// For symbol clashes, prefer those from Src. - bool OverrideFromSrc; + unsigned Flags; -public: - ModuleLinker(Module *dstM, Linker::IdentifiedStructTypeSet &Set, Module *srcM, - DiagnosticHandlerFunction DiagnosticHandler, - bool OverrideFromSrc) - : DstM(dstM), SrcM(srcM), TypeMap(Set), - ValMaterializer(TypeMap, DstM, LazilyLinkGlobalValues), - DiagnosticHandler(DiagnosticHandler), OverrideFromSrc(OverrideFromSrc) { + /// Function index passed into ModuleLinker for using in function + /// importing/exporting handling. + const FunctionInfoIndex *ImportIndex; + + /// Functions to import from source module, all other functions are + /// imported as declarations instead of definitions. + DenseSet *FunctionsToImport; + + /// Set to true if the given FunctionInfoIndex contains any functions + /// from this source module, in which case we must conservatively assume + /// that any of its functions may be imported into another module + /// as part of a different backend compilation process. + bool HasExportedFunctions = false; + + /// Association between metadata value id and temporary metadata that + /// remains unmapped after function importing. Saved during function + /// importing and consumed during the metadata linking postpass. + DenseMap *ValIDToTempMDMap; + + /// Used as the callback for lazy linking. + /// The mover has just hit GV and we have to decide if it, and other members + /// of the same comdat, should be linked. Every member to be linked is passed + /// to Add. + void addLazyFor(GlobalValue &GV, IRMover::ValueAdder Add); + + bool shouldOverrideFromSrc() { return Flags & Linker::OverrideFromSrc; } + bool shouldLinkOnlyNeeded() { return Flags & Linker::LinkOnlyNeeded; } + bool shouldInternalizeLinkedSymbols() { + return Flags & Linker::InternalizeLinkedSymbols; } - bool run(); + /// Check if we should promote the given local value to global scope. + bool doPromoteLocalToGlobal(const GlobalValue *SGV); -private: bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest, const GlobalValue &Src); - /// Helper method for setting a message and returning an error code. + /// Should we have mover and linker error diag info? bool emitError(const Twine &Message) { - DiagnosticHandler(LinkDiagnosticInfo(DS_Error, Message)); + SrcM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, Message)); return true; } - void emitWarning(const Twine &Message) { - DiagnosticHandler(LinkDiagnosticInfo(DS_Warning, Message)); - } - - bool getComdatLeader(Module *M, StringRef ComdatName, + bool getComdatLeader(Module &M, StringRef ComdatName, const GlobalVariable *&GVar); bool computeResultingSelectionKind(StringRef ComdatName, Comdat::SelectionKind Src, @@ -463,17 +88,20 @@ private: ComdatsChosen; bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK, bool &LinkFromSrc); + // Keep track of the global value members of each comdat in source. + DenseMap> ComdatMembers; /// Given a global in the source module, return the global in the /// destination module that is being linked to, if any. GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) { + Module &DstM = Mover.getModule(); // If the source has no name it can't link. If it has local linkage, // there is no name match-up going on. - if (!SrcGV->hasName() || SrcGV->hasLocalLinkage()) + if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(getLinkage(SrcGV))) return nullptr; // Otherwise see if we have a match in the destination module's symtab. - GlobalValue *DGV = DstM->getNamedValue(SrcGV->getName()); + GlobalValue *DGV = DstM.getNamedValue(getName(SrcGV)); if (!DGV) return nullptr; @@ -486,139 +114,237 @@ private: return DGV; } - void computeTypeMapping(); + bool linkIfNeeded(GlobalValue &GV); - void upgradeMismatchedGlobalArray(StringRef Name); - void upgradeMismatchedGlobals(); + /// Helper methods to check if we are importing from or potentially + /// exporting from the current source module. + bool isPerformingImport() const { return FunctionsToImport != nullptr; } + bool isModuleExporting() const { return HasExportedFunctions; } - bool linkAppendingVarProto(GlobalVariable *DstGV, - const GlobalVariable *SrcGV); + /// If we are importing from the source module, checks if we should + /// import SGV as a definition, otherwise import as a declaration. + bool doImportAsDefinition(const GlobalValue *SGV); - bool linkGlobalValueProto(GlobalValue *GV); - bool linkModuleFlagsMetadata(); + /// Get the name for SGV that should be used in the linked destination + /// module. Specifically, this handles the case where we need to rename + /// a local that is being promoted to global scope. + std::string getName(const GlobalValue *SGV); - void linkAppendingVarInit(const AppendingVarInfo &AVI); + /// Process globals so that they can be used in ThinLTO. This includes + /// promoting local variables so that they can be reference externally by + /// thin lto imported globals and converting strong external globals to + /// available_externally. + void processGlobalsForThinLTO(); + void processGlobalForThinLTO(GlobalValue &GV); - void linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src); - bool linkFunctionBody(Function &Dst, Function &Src); - void linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src); - bool linkGlobalValueBody(GlobalValue &Src); + /// Get the new linkage for SGV that should be used in the linked destination + /// module. Specifically, for ThinLTO importing or exporting it may need + /// to be adjusted. + GlobalValue::LinkageTypes getLinkage(const GlobalValue *SGV); - void linkNamedMDNodes(); - void stripReplacedSubprograms(); +public: + ModuleLinker(IRMover &Mover, Module &SrcM, unsigned Flags, + const FunctionInfoIndex *Index = nullptr, + DenseSet *FunctionsToImport = nullptr, + DenseMap *ValIDToTempMDMap = nullptr) + : Mover(Mover), SrcM(SrcM), Flags(Flags), ImportIndex(Index), + FunctionsToImport(FunctionsToImport), + ValIDToTempMDMap(ValIDToTempMDMap) { + assert((ImportIndex || !FunctionsToImport) && + "Expect a FunctionInfoIndex when importing"); + // If we have a FunctionInfoIndex but no function to import, + // then this is the primary module being compiled in a ThinLTO + // backend compilation, and we need to see if it has functions that + // may be exported to another backend compilation. + if (ImportIndex && !FunctionsToImport) + HasExportedFunctions = ImportIndex->hasExportedFunctions(SrcM); + assert((ValIDToTempMDMap || !FunctionsToImport) && + "Function importing must provide a ValIDToTempMDMap"); + } + + bool run(); }; } -/// The LLVM SymbolTable class autorenames globals that conflict in the symbol -/// table. This is good for all clients except for us. Go through the trouble -/// to force this back. -static void forceRenaming(GlobalValue *GV, StringRef Name) { - // If the global doesn't force its name or if it already has the right name, - // there is nothing for us to do. - if (GV->hasLocalLinkage() || GV->getName() == Name) - return; - - Module *M = GV->getParent(); - - // If there is a conflict, rename the conflict. - if (GlobalValue *ConflictGV = M->getNamedValue(Name)) { - GV->takeName(ConflictGV); - ConflictGV->setName(Name); // This will cause ConflictGV to get renamed - assert(ConflictGV->getName() != Name && "forceRenaming didn't work"); - } else { - GV->setName(Name); // Force the name back +bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) { + if (!isPerformingImport()) + return false; + auto *GA = dyn_cast(SGV); + if (GA) { + if (GA->hasWeakAnyLinkage()) + return false; + const GlobalObject *GO = GA->getBaseObject(); + if (!GO->hasLinkOnceODRLinkage()) + return false; + return doImportAsDefinition(GO); } -} - -/// copy additional attributes (those not needed to construct a GlobalValue) -/// from the SrcGV to the DestGV. -static void copyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) { - DestGV->copyAttributesFrom(SrcGV); - forceRenaming(DestGV, SrcGV->getName()); -} - -static bool isLessConstraining(GlobalValue::VisibilityTypes a, - GlobalValue::VisibilityTypes b) { - if (a == GlobalValue::HiddenVisibility) - return false; - if (b == GlobalValue::HiddenVisibility) + // Always import GlobalVariable definitions, except for the special + // case of WeakAny which are imported as ExternalWeak declarations + // (see comments in ModuleLinker::getLinkage). The linkage changes + // described in ModuleLinker::getLinkage ensure the correct behavior (e.g. + // global variables with external linkage are transformed to + // available_externally definitions, which are ultimately turned into + // declarations after the EliminateAvailableExternally pass). + if (isa(SGV) && !SGV->isDeclaration() && + !SGV->hasWeakAnyLinkage()) return true; - if (a == GlobalValue::ProtectedVisibility) - return false; - if (b == GlobalValue::ProtectedVisibility) + // Only import the function requested for importing. + auto *SF = dyn_cast(SGV); + if (SF && FunctionsToImport->count(SF)) return true; + // Otherwise no. return false; } -/// Loop through the global variables in the src module and merge them into the -/// dest module. -static GlobalVariable *copyGlobalVariableProto(TypeMapTy &TypeMap, Module &DstM, - const GlobalVariable *SGVar) { - // No linking to be performed or linking from the source: simply create an - // identical version of the symbol over in the dest module... the - // initializer will be filled in later by LinkGlobalInits. - GlobalVariable *NewDGV = new GlobalVariable( - DstM, TypeMap.get(SGVar->getType()->getElementType()), - SGVar->isConstant(), SGVar->getLinkage(), /*init*/ nullptr, - SGVar->getName(), /*insertbefore*/ nullptr, SGVar->getThreadLocalMode(), - SGVar->getType()->getAddressSpace()); +bool ModuleLinker::doPromoteLocalToGlobal(const GlobalValue *SGV) { + assert(SGV->hasLocalLinkage()); + // Both the imported references and the original local variable must + // be promoted. + if (!isPerformingImport() && !isModuleExporting()) + return false; - return NewDGV; + // Local const variables never need to be promoted unless they are address + // taken. The imported uses can simply use the clone created in this module. + // For now we are conservative in determining which variables are not + // address taken by checking the unnamed addr flag. To be more aggressive, + // the address taken information must be checked earlier during parsing + // of the module and recorded in the function index for use when importing + // from that module. + auto *GVar = dyn_cast(SGV); + if (GVar && GVar->isConstant() && GVar->hasUnnamedAddr()) + return false; + + // Eventually we only need to promote functions in the exporting module that + // are referenced by a potentially exported function (i.e. one that is in the + // function index). + return true; } -/// Link the function in the source module into the destination module if -/// needed, setting up mapping information. -static Function *copyFunctionProto(TypeMapTy &TypeMap, Module &DstM, - const Function *SF) { - // If there is no linkage to be performed or we are linking from the source, - // bring SF over. - return Function::Create(TypeMap.get(SF->getFunctionType()), SF->getLinkage(), - SF->getName(), &DstM); +std::string ModuleLinker::getName(const GlobalValue *SGV) { + // For locals that must be promoted to global scope, ensure that + // the promoted name uniquely identifies the copy in the original module, + // using the ID assigned during combined index creation. When importing, + // we rename all locals (not just those that are promoted) in order to + // avoid naming conflicts between locals imported from different modules. + if (SGV->hasLocalLinkage() && + (doPromoteLocalToGlobal(SGV) || isPerformingImport())) + return FunctionInfoIndex::getGlobalNameForLocal( + SGV->getName(), + ImportIndex->getModuleId(SGV->getParent()->getModuleIdentifier())); + return SGV->getName(); } -/// Set up prototypes for any aliases that come over from the source module. -static GlobalAlias *copyGlobalAliasProto(TypeMapTy &TypeMap, Module &DstM, - const GlobalAlias *SGA) { - // If there is no linkage to be performed or we're linking from the source, - // bring over SGA. - auto *PTy = cast(TypeMap.get(SGA->getType())); - return GlobalAlias::create(PTy, SGA->getLinkage(), SGA->getName(), &DstM); -} - -static GlobalValue *copyGlobalValueProto(TypeMapTy &TypeMap, Module &DstM, - const GlobalValue *SGV) { - GlobalValue *NewGV; - if (auto *SGVar = dyn_cast(SGV)) - NewGV = copyGlobalVariableProto(TypeMap, DstM, SGVar); - else if (auto *SF = dyn_cast(SGV)) - NewGV = copyFunctionProto(TypeMap, DstM, SF); - else - NewGV = copyGlobalAliasProto(TypeMap, DstM, cast(SGV)); - copyGVAttributes(NewGV, SGV); - return NewGV; -} - -Value *ValueMaterializerTy::materializeValueFor(Value *V) { - auto *SGV = dyn_cast(V); - if (!SGV) - return nullptr; - - GlobalValue *DGV = copyGlobalValueProto(TypeMap, *DstM, SGV); - - if (Comdat *SC = SGV->getComdat()) { - if (auto *DGO = dyn_cast(DGV)) { - Comdat *DC = DstM->getOrInsertComdat(SC->getName()); - DGO->setComdat(DC); - } +GlobalValue::LinkageTypes ModuleLinker::getLinkage(const GlobalValue *SGV) { + // Any local variable that is referenced by an exported function needs + // to be promoted to global scope. Since we don't currently know which + // functions reference which local variables/functions, we must treat + // all as potentially exported if this module is exporting anything. + if (isModuleExporting()) { + if (SGV->hasLocalLinkage() && doPromoteLocalToGlobal(SGV)) + return GlobalValue::ExternalLinkage; + return SGV->getLinkage(); } - LazilyLinkGlobalValues.push_back(SGV); - return DGV; + // Otherwise, if we aren't importing, no linkage change is needed. + if (!isPerformingImport()) + return SGV->getLinkage(); + + switch (SGV->getLinkage()) { + case GlobalValue::ExternalLinkage: + // External defnitions are converted to available_externally + // definitions upon import, so that they are available for inlining + // and/or optimization, but are turned into declarations later + // during the EliminateAvailableExternally pass. + if (doImportAsDefinition(SGV) && !dyn_cast(SGV)) + return GlobalValue::AvailableExternallyLinkage; + // An imported external declaration stays external. + return SGV->getLinkage(); + + case GlobalValue::AvailableExternallyLinkage: + // An imported available_externally definition converts + // to external if imported as a declaration. + if (!doImportAsDefinition(SGV)) + return GlobalValue::ExternalLinkage; + // An imported available_externally declaration stays that way. + return SGV->getLinkage(); + + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + // These both stay the same when importing the definition. + // The ThinLTO pass will eventually force-import their definitions. + return SGV->getLinkage(); + + case GlobalValue::WeakAnyLinkage: + // Can't import weak_any definitions correctly, or we might change the + // program semantics, since the linker will pick the first weak_any + // definition and importing would change the order they are seen by the + // linker. The module linking caller needs to enforce this. + assert(!doImportAsDefinition(SGV)); + // If imported as a declaration, it becomes external_weak. + return GlobalValue::ExternalWeakLinkage; + + case GlobalValue::WeakODRLinkage: + // For weak_odr linkage, there is a guarantee that all copies will be + // equivalent, so the issue described above for weak_any does not exist, + // and the definition can be imported. It can be treated similarly + // to an imported externally visible global value. + if (doImportAsDefinition(SGV) && !dyn_cast(SGV)) + return GlobalValue::AvailableExternallyLinkage; + else + return GlobalValue::ExternalLinkage; + + case GlobalValue::AppendingLinkage: + // It would be incorrect to import an appending linkage variable, + // since it would cause global constructors/destructors to be + // executed multiple times. This should have already been handled + // by linkIfNeeded, and we will assert in shouldLinkFromSource + // if we try to import, so we simply return AppendingLinkage here + // as this helper is called more widely in getLinkedToGlobal. + return GlobalValue::AppendingLinkage; + + case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: + // If we are promoting the local to global scope, it is handled + // similarly to a normal externally visible global. + if (doPromoteLocalToGlobal(SGV)) { + if (doImportAsDefinition(SGV) && !dyn_cast(SGV)) + return GlobalValue::AvailableExternallyLinkage; + else + return GlobalValue::ExternalLinkage; + } + // A non-promoted imported local definition stays local. + // The ThinLTO pass will eventually force-import their definitions. + return SGV->getLinkage(); + + case GlobalValue::ExternalWeakLinkage: + // External weak doesn't apply to definitions, must be a declaration. + assert(!doImportAsDefinition(SGV)); + // Linkage stays external_weak. + return SGV->getLinkage(); + + case GlobalValue::CommonLinkage: + // Linkage stays common on definitions. + // The ThinLTO pass will eventually force-import their definitions. + return SGV->getLinkage(); + } + + llvm_unreachable("unknown linkage type"); } -bool ModuleLinker::getComdatLeader(Module *M, StringRef ComdatName, +static GlobalValue::VisibilityTypes +getMinVisibility(GlobalValue::VisibilityTypes A, + GlobalValue::VisibilityTypes B) { + if (A == GlobalValue::HiddenVisibility || B == GlobalValue::HiddenVisibility) + return GlobalValue::HiddenVisibility; + if (A == GlobalValue::ProtectedVisibility || + B == GlobalValue::ProtectedVisibility) + return GlobalValue::ProtectedVisibility; + return GlobalValue::DefaultVisibility; +} + +bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName, const GlobalVariable *&GVar) { - const GlobalValue *GVal = M->getNamedValue(ComdatName); + const GlobalValue *GVal = M.getNamedValue(ComdatName); if (const auto *GA = dyn_cast_or_null(GVal)) { GVal = GA->getBaseObject(); if (!GVal) @@ -641,6 +367,7 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName, Comdat::SelectionKind Dst, Comdat::SelectionKind &Result, bool &LinkFromSrc) { + Module &DstM = Mover.getModule(); // The ability to mix Comdat::SelectionKind::Any with // Comdat::SelectionKind::Largest is a behavior that comes from COFF. bool DstAnyOrLargest = Dst == Comdat::SelectionKind::Any || @@ -677,8 +404,8 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName, getComdatLeader(SrcM, ComdatName, SrcGV)) return true; - const DataLayout &DstDL = DstM->getDataLayout(); - const DataLayout &SrcDL = SrcM->getDataLayout(); + const DataLayout &DstDL = DstM.getDataLayout(); + const DataLayout &SrcDL = SrcM.getDataLayout(); uint64_t DstSize = DstDL.getTypeAllocSize(DstGV->getType()->getPointerElementType()); uint64_t SrcSize = @@ -708,9 +435,10 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName, bool ModuleLinker::getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &Result, bool &LinkFromSrc) { + Module &DstM = Mover.getModule(); Comdat::SelectionKind SSK = SrcC->getSelectionKind(); StringRef ComdatName = SrcC->getName(); - Module::ComdatSymTabType &ComdatSymTab = DstM->getComdatSymbolTable(); + Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable(); Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(ComdatName); if (DstCI == ComdatSymTab.end()) { @@ -729,14 +457,17 @@ bool ModuleLinker::getComdatResult(const Comdat *SrcC, bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest, const GlobalValue &Src) { + // Should we unconditionally use the Src? - if (OverrideFromSrc) { + if (shouldOverrideFromSrc()) { LinkFromSrc = true; return false; } // We always have to add Src if it has appending linkage. if (Src.hasAppendingLinkage()) { + // Should have prevented importing for appending linkage in linkIfNeeded. + assert(!isPerformingImport()); LinkFromSrc = true; return false; } @@ -744,6 +475,28 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc, bool SrcIsDeclaration = Src.isDeclarationForLinker(); bool DestIsDeclaration = Dest.isDeclarationForLinker(); + if (isPerformingImport()) { + if (isa(&Src)) { + // For functions, LinkFromSrc iff this is a function requested + // for importing. For variables, decide below normally. + LinkFromSrc = FunctionsToImport->count(&Src); + return false; + } + + // Check if this is an alias with an already existing definition + // in Dest, which must have come from a prior importing pass from + // the same Src module. Unlike imported function and variable + // definitions, which are imported as available_externally and are + // not definitions for the linker, that is not a valid linkage for + // imported aliases which must be definitions. Simply use the existing + // Dest copy. + if (isa(&Src) && !DestIsDeclaration) { + assert(isa(&Dest)); + LinkFromSrc = false; + return false; + } + } + if (SrcIsDeclaration) { // If Src is external or if both Src & Dest are external.. Just link the // external globals, we aren't adding anything. @@ -753,7 +506,12 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc, return false; } // If the Dest is weak, use the source linkage. - LinkFromSrc = Dest.hasExternalWeakLinkage(); + if (Dest.hasExternalWeakLinkage()) { + LinkFromSrc = true; + return false; + } + // Link an available_externally over a declaration. + LinkFromSrc = !Src.isDeclaration() && Dest.isDeclaration(); return false; } @@ -808,730 +566,117 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc, "': symbol multiply defined!"); } -/// Loop over all of the linked values to compute type mappings. For example, -/// if we link "extern Foo *x" and "Foo *x = NULL", then we have two struct -/// types 'Foo' but one got renamed when the module was loaded into the same -/// LLVMContext. -void ModuleLinker::computeTypeMapping() { - for (GlobalValue &SGV : SrcM->globals()) { - GlobalValue *DGV = getLinkedToGlobal(&SGV); - if (!DGV) - continue; +bool ModuleLinker::linkIfNeeded(GlobalValue &GV) { + GlobalValue *DGV = getLinkedToGlobal(&GV); - if (!DGV->hasAppendingLinkage() || !SGV.hasAppendingLinkage()) { - TypeMap.addTypeMapping(DGV->getType(), SGV.getType()); - continue; + if (shouldLinkOnlyNeeded() && !(DGV && DGV->isDeclaration())) + return false; + + if (DGV && !GV.hasLocalLinkage() && !GV.hasAppendingLinkage()) { + auto *DGVar = dyn_cast(DGV); + auto *SGVar = dyn_cast(&GV); + if (DGVar && SGVar) { + if (DGVar->isDeclaration() && SGVar->isDeclaration() && + (!DGVar->isConstant() || !SGVar->isConstant())) { + DGVar->setConstant(false); + SGVar->setConstant(false); + } + if (DGVar->hasCommonLinkage() && SGVar->hasCommonLinkage()) { + unsigned Align = std::max(DGVar->getAlignment(), SGVar->getAlignment()); + SGVar->setAlignment(Align); + DGVar->setAlignment(Align); + } } - // Unify the element type of appending arrays. - ArrayType *DAT = cast(DGV->getType()->getElementType()); - ArrayType *SAT = cast(SGV.getType()->getElementType()); - TypeMap.addTypeMapping(DAT->getElementType(), SAT->getElementType()); + GlobalValue::VisibilityTypes Visibility = + getMinVisibility(DGV->getVisibility(), GV.getVisibility()); + DGV->setVisibility(Visibility); + GV.setVisibility(Visibility); + + bool HasUnnamedAddr = GV.hasUnnamedAddr() && DGV->hasUnnamedAddr(); + DGV->setUnnamedAddr(HasUnnamedAddr); + GV.setUnnamedAddr(HasUnnamedAddr); } - for (GlobalValue &SGV : *SrcM) { - if (GlobalValue *DGV = getLinkedToGlobal(&SGV)) - TypeMap.addTypeMapping(DGV->getType(), SGV.getType()); - } + // Don't want to append to global_ctors list, for example, when we + // are importing for ThinLTO, otherwise the global ctors and dtors + // get executed multiple times for local variables (the latter causing + // double frees). + if (GV.hasAppendingLinkage() && isPerformingImport()) + return false; - for (GlobalValue &SGV : SrcM->aliases()) { - if (GlobalValue *DGV = getLinkedToGlobal(&SGV)) - TypeMap.addTypeMapping(DGV->getType(), SGV.getType()); - } + if (isPerformingImport() && !doImportAsDefinition(&GV)) + return false; - // Incorporate types by name, scanning all the types in the source module. - // At this point, the destination module may have a type "%foo = { i32 }" for - // example. When the source module got loaded into the same LLVMContext, if - // it had the same type, it would have been renamed to "%foo.42 = { i32 }". - std::vector Types = SrcM->getIdentifiedStructTypes(); - for (StructType *ST : Types) { - if (!ST->hasName()) - continue; + if (!DGV && !shouldOverrideFromSrc() && + (GV.hasLocalLinkage() || GV.hasLinkOnceLinkage() || + GV.hasAvailableExternallyLinkage())) + return false; - // Check to see if there is a dot in the name followed by a digit. - size_t DotPos = ST->getName().rfind('.'); - if (DotPos == 0 || DotPos == StringRef::npos || - ST->getName().back() == '.' || - !isdigit(static_cast(ST->getName()[DotPos + 1]))) - continue; + if (GV.isDeclaration()) + return false; - // Check to see if the destination module has a struct with the prefix name. - StructType *DST = DstM->getTypeByName(ST->getName().substr(0, DotPos)); - if (!DST) - continue; - - // Don't use it if this actually came from the source module. They're in - // the same LLVMContext after all. Also don't use it unless the type is - // actually used in the destination module. This can happen in situations - // like this: - // - // Module A Module B - // -------- -------- - // %Z = type { %A } %B = type { %C.1 } - // %A = type { %B.1, [7 x i8] } %C.1 = type { i8* } - // %B.1 = type { %C } %A.2 = type { %B.3, [5 x i8] } - // %C = type { i8* } %B.3 = type { %C.1 } - // - // When we link Module B with Module A, the '%B' in Module B is - // used. However, that would then use '%C.1'. But when we process '%C.1', - // we prefer to take the '%C' version. So we are then left with both - // '%C.1' and '%C' being used for the same types. This leads to some - // variables using one type and some using the other. - if (TypeMap.DstStructTypesSet.hasType(DST)) - TypeMap.addTypeMapping(DST, ST); - } - - // Now that we have discovered all of the type equivalences, get a body for - // any 'opaque' types in the dest module that are now resolved. - TypeMap.linkDefinedTypeBodies(); -} - -static void upgradeGlobalArray(GlobalVariable *GV) { - ArrayType *ATy = cast(GV->getType()->getElementType()); - StructType *OldTy = cast(ATy->getElementType()); - assert(OldTy->getNumElements() == 2 && "Expected to upgrade from 2 elements"); - - // Get the upgraded 3 element type. - PointerType *VoidPtrTy = Type::getInt8Ty(GV->getContext())->getPointerTo(); - Type *Tys[3] = {OldTy->getElementType(0), OldTy->getElementType(1), - VoidPtrTy}; - StructType *NewTy = StructType::get(GV->getContext(), Tys, false); - - // Build new constants with a null third field filled in. - Constant *OldInitC = GV->getInitializer(); - ConstantArray *OldInit = dyn_cast(OldInitC); - if (!OldInit && !isa(OldInitC)) - // Invalid initializer; give up. - return; - std::vector Initializers; - if (OldInit && OldInit->getNumOperands()) { - Value *Null = Constant::getNullValue(VoidPtrTy); - for (Use &U : OldInit->operands()) { - ConstantStruct *Init = cast(U.get()); - Initializers.push_back(ConstantStruct::get( - NewTy, Init->getOperand(0), Init->getOperand(1), Null, nullptr)); - } - } - assert(Initializers.size() == ATy->getNumElements() && - "Failed to copy all array elements"); - - // Replace the old GV with a new one. - ATy = ArrayType::get(NewTy, Initializers.size()); - Constant *NewInit = ConstantArray::get(ATy, Initializers); - GlobalVariable *NewGV = new GlobalVariable( - *GV->getParent(), ATy, GV->isConstant(), GV->getLinkage(), NewInit, "", - GV, GV->getThreadLocalMode(), GV->getType()->getAddressSpace(), - GV->isExternallyInitialized()); - NewGV->copyAttributesFrom(GV); - NewGV->takeName(GV); - assert(GV->use_empty() && "program cannot use initializer list"); - GV->eraseFromParent(); -} - -void ModuleLinker::upgradeMismatchedGlobalArray(StringRef Name) { - // Look for the global arrays. - auto *DstGV = dyn_cast_or_null(DstM->getNamedValue(Name)); - if (!DstGV) - return; - auto *SrcGV = dyn_cast_or_null(SrcM->getNamedValue(Name)); - if (!SrcGV) - return; - - // Check if the types already match. - auto *DstTy = cast(DstGV->getType()->getElementType()); - auto *SrcTy = - cast(TypeMap.get(SrcGV->getType()->getElementType())); - if (DstTy == SrcTy) - return; - - // Grab the element types. We can only upgrade an array of a two-field - // struct. Only bother if the other one has three-fields. - auto *DstEltTy = cast(DstTy->getElementType()); - auto *SrcEltTy = cast(SrcTy->getElementType()); - if (DstEltTy->getNumElements() == 2 && SrcEltTy->getNumElements() == 3) { - upgradeGlobalArray(DstGV); - return; - } - if (DstEltTy->getNumElements() == 3 && SrcEltTy->getNumElements() == 2) - upgradeGlobalArray(SrcGV); - - // We can't upgrade any other differences. -} - -void ModuleLinker::upgradeMismatchedGlobals() { - upgradeMismatchedGlobalArray("llvm.global_ctors"); - upgradeMismatchedGlobalArray("llvm.global_dtors"); -} - -/// If there were any appending global variables, link them together now. -/// Return true on error. -bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, - const GlobalVariable *SrcGV) { - - if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage()) - return emitError("Linking globals named '" + SrcGV->getName() + - "': can only link appending global with another appending global!"); - - ArrayType *DstTy = cast(DstGV->getType()->getElementType()); - ArrayType *SrcTy = - cast(TypeMap.get(SrcGV->getType()->getElementType())); - Type *EltTy = DstTy->getElementType(); - - // Check to see that they two arrays agree on type. - if (EltTy != SrcTy->getElementType()) - return emitError("Appending variables with different element types!"); - if (DstGV->isConstant() != SrcGV->isConstant()) - return emitError("Appending variables linked with different const'ness!"); - - if (DstGV->getAlignment() != SrcGV->getAlignment()) - return emitError( - "Appending variables with different alignment need to be linked!"); - - if (DstGV->getVisibility() != SrcGV->getVisibility()) - return emitError( - "Appending variables with different visibility need to be linked!"); - - if (DstGV->hasUnnamedAddr() != SrcGV->hasUnnamedAddr()) - return emitError( - "Appending variables with different unnamed_addr need to be linked!"); - - if (StringRef(DstGV->getSection()) != SrcGV->getSection()) - return emitError( - "Appending variables with different section name need to be linked!"); - - uint64_t NewSize = DstTy->getNumElements() + SrcTy->getNumElements(); - ArrayType *NewType = ArrayType::get(EltTy, NewSize); - - // Create the new global variable. - GlobalVariable *NG = - new GlobalVariable(*DstGV->getParent(), NewType, SrcGV->isConstant(), - DstGV->getLinkage(), /*init*/nullptr, /*name*/"", DstGV, - DstGV->getThreadLocalMode(), - DstGV->getType()->getAddressSpace()); - - // Propagate alignment, visibility and section info. - copyGVAttributes(NG, DstGV); - - AppendingVarInfo AVI; - AVI.NewGV = NG; - AVI.DstInit = DstGV->getInitializer(); - AVI.SrcInit = SrcGV->getInitializer(); - AppendingVars.push_back(AVI); - - // Replace any uses of the two global variables with uses of the new - // global. - ValueMap[SrcGV] = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType())); - - DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType())); - DstGV->eraseFromParent(); - - // Track the source variable so we don't try to link it. - DoNotLinkFromSource.insert(SrcGV); - - return false; -} - -bool ModuleLinker::linkGlobalValueProto(GlobalValue *SGV) { - GlobalValue *DGV = getLinkedToGlobal(SGV); - - // Handle the ultra special appending linkage case first. - if (DGV && DGV->hasAppendingLinkage()) - return linkAppendingVarProto(cast(DGV), - cast(SGV)); - - bool LinkFromSrc = true; - Comdat *C = nullptr; - GlobalValue::VisibilityTypes Visibility = SGV->getVisibility(); - bool HasUnnamedAddr = SGV->hasUnnamedAddr(); - - if (const Comdat *SC = SGV->getComdat()) { + if (const Comdat *SC = GV.getComdat()) { + bool LinkFromSrc; Comdat::SelectionKind SK; std::tie(SK, LinkFromSrc) = ComdatsChosen[SC]; - C = DstM->getOrInsertComdat(SC->getName()); - C->setSelectionKind(SK); - } else if (DGV) { - if (shouldLinkFromSource(LinkFromSrc, *DGV, *SGV)) - return true; - } - - if (!LinkFromSrc) { - // Track the source global so that we don't attempt to copy it over when - // processing global initializers. - DoNotLinkFromSource.insert(SGV); - - if (DGV) - // Make sure to remember this mapping. - ValueMap[SGV] = - ConstantExpr::getBitCast(DGV, TypeMap.get(SGV->getType())); - } - - if (DGV) { - Visibility = isLessConstraining(Visibility, DGV->getVisibility()) - ? DGV->getVisibility() - : Visibility; - HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr(); - } - - if (!LinkFromSrc && !DGV) - return false; - - GlobalValue *NewGV; - if (!LinkFromSrc) { - NewGV = DGV; - } else { - // If the GV is to be lazily linked, don't create it just yet. - // The ValueMaterializerTy will deal with creating it if it's used. - if (!DGV && !OverrideFromSrc && - (SGV->hasLocalLinkage() || SGV->hasLinkOnceLinkage() || - SGV->hasAvailableExternallyLinkage())) { - DoNotLinkFromSource.insert(SGV); - return false; - } - - NewGV = copyGlobalValueProto(TypeMap, *DstM, SGV); - - if (DGV && isa(DGV)) - if (auto *NewF = dyn_cast(NewGV)) - OverridingFunctions.insert(NewF); - } - - NewGV->setUnnamedAddr(HasUnnamedAddr); - NewGV->setVisibility(Visibility); - - if (auto *NewGO = dyn_cast(NewGV)) { - if (C) - NewGO->setComdat(C); - - if (DGV && DGV->hasCommonLinkage() && SGV->hasCommonLinkage()) - NewGO->setAlignment(std::max(DGV->getAlignment(), SGV->getAlignment())); - } - - if (auto *NewGVar = dyn_cast(NewGV)) { - auto *DGVar = dyn_cast_or_null(DGV); - auto *SGVar = dyn_cast(SGV); - if (DGVar && SGVar && DGVar->isDeclaration() && SGVar->isDeclaration() && - (!DGVar->isConstant() || !SGVar->isConstant())) - NewGVar->setConstant(false); - } - - // Make sure to remember this mapping. - if (NewGV != DGV) { - if (DGV) { - DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewGV, DGV->getType())); - DGV->eraseFromParent(); - } - ValueMap[SGV] = NewGV; - } - - return false; -} - -static void getArrayElements(const Constant *C, - SmallVectorImpl &Dest) { - unsigned NumElements = cast(C->getType())->getNumElements(); - - for (unsigned i = 0; i != NumElements; ++i) - Dest.push_back(C->getAggregateElement(i)); -} - -void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) { - // Merge the initializer. - SmallVector DstElements; - getArrayElements(AVI.DstInit, DstElements); - - SmallVector SrcElements; - getArrayElements(AVI.SrcInit, SrcElements); - - ArrayType *NewType = cast(AVI.NewGV->getType()->getElementType()); - - StringRef Name = AVI.NewGV->getName(); - bool IsNewStructor = - (Name == "llvm.global_ctors" || Name == "llvm.global_dtors") && - cast(NewType->getElementType())->getNumElements() == 3; - - for (auto *V : SrcElements) { - if (IsNewStructor) { - Constant *Key = V->getAggregateElement(2); - if (DoNotLinkFromSource.count(Key)) - continue; - } - DstElements.push_back( - MapValue(V, ValueMap, RF_None, &TypeMap, &ValMaterializer)); - } - if (IsNewStructor) { - NewType = ArrayType::get(NewType->getElementType(), DstElements.size()); - AVI.NewGV->mutateType(PointerType::get(NewType, 0)); - } - - AVI.NewGV->setInitializer(ConstantArray::get(NewType, DstElements)); -} - -/// Update the initializers in the Dest module now that all globals that may be -/// referenced are in Dest. -void ModuleLinker::linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src) { - // Figure out what the initializer looks like in the dest module. - Dst.setInitializer(MapValue(Src.getInitializer(), ValueMap, RF_None, &TypeMap, - &ValMaterializer)); -} - -/// Copy the source function over into the dest function and fix up references -/// to values. At this point we know that Dest is an external function, and -/// that Src is not. -bool ModuleLinker::linkFunctionBody(Function &Dst, Function &Src) { - assert(Dst.isDeclaration() && !Src.isDeclaration()); - - // Materialize if needed. - if (std::error_code EC = Src.materialize()) - return emitError(EC.message()); - - // Link in the prefix data. - if (Src.hasPrefixData()) - Dst.setPrefixData(MapValue(Src.getPrefixData(), ValueMap, RF_None, &TypeMap, - &ValMaterializer)); - - // Link in the prologue data. - if (Src.hasPrologueData()) - Dst.setPrologueData(MapValue(Src.getPrologueData(), ValueMap, RF_None, - &TypeMap, &ValMaterializer)); - - // Link in the personality function. - if (Src.hasPersonalityFn()) - Dst.setPersonalityFn(MapValue(Src.getPersonalityFn(), ValueMap, RF_None, - &TypeMap, &ValMaterializer)); - - // Go through and convert function arguments over, remembering the mapping. - Function::arg_iterator DI = Dst.arg_begin(); - for (Argument &Arg : Src.args()) { - DI->setName(Arg.getName()); // Copy the name over. - - // Add a mapping to our mapping. - ValueMap[&Arg] = DI; - ++DI; - } - - // Copy over the metadata attachments. - SmallVector, 8> MDs; - Src.getAllMetadata(MDs); - for (const auto &I : MDs) - Dst.setMetadata(I.first, MapMetadata(I.second, ValueMap, RF_None, &TypeMap, - &ValMaterializer)); - - // Splice the body of the source function into the dest function. - Dst.getBasicBlockList().splice(Dst.end(), Src.getBasicBlockList()); - - // At this point, all of the instructions and values of the function are now - // copied over. The only problem is that they are still referencing values in - // the Source function as operands. Loop through all of the operands of the - // functions and patch them up to point to the local versions. - for (BasicBlock &BB : Dst) - for (Instruction &I : BB) - RemapInstruction(&I, ValueMap, RF_IgnoreMissingEntries, &TypeMap, - &ValMaterializer); - - // There is no need to map the arguments anymore. - for (Argument &Arg : Src.args()) - ValueMap.erase(&Arg); - - Src.dematerialize(); - return false; -} - -void ModuleLinker::linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src) { - Constant *Aliasee = Src.getAliasee(); - Constant *Val = - MapValue(Aliasee, ValueMap, RF_None, &TypeMap, &ValMaterializer); - Dst.setAliasee(Val); -} - -bool ModuleLinker::linkGlobalValueBody(GlobalValue &Src) { - Value *Dst = ValueMap[&Src]; - assert(Dst); - if (auto *F = dyn_cast(&Src)) - return linkFunctionBody(cast(*Dst), *F); - if (auto *GVar = dyn_cast(&Src)) { - linkGlobalInit(cast(*Dst), *GVar); + if (LinkFromSrc) + ValuesToLink.insert(&GV); return false; } - linkAliasBody(cast(*Dst), cast(Src)); + + bool LinkFromSrc = true; + if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, GV)) + return true; + if (LinkFromSrc) + ValuesToLink.insert(&GV); return false; } -/// Insert all of the named MDNodes in Src into the Dest module. -void ModuleLinker::linkNamedMDNodes() { - const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata(); - for (const NamedMDNode &NMD : SrcM->named_metadata()) { - // Don't link module flags here. Do them separately. - if (&NMD == SrcModFlags) - continue; - NamedMDNode *DestNMD = DstM->getOrInsertNamedMetadata(NMD.getName()); - // Add Src elements into Dest node. - for (const MDNode *op : NMD.operands()) - DestNMD->addOperand( - MapMetadata(op, ValueMap, RF_None, &TypeMap, &ValMaterializer)); - } -} - -/// Drop DISubprograms that have been superseded. -/// -/// FIXME: this creates an asymmetric result: we strip functions from losing -/// subprograms in DstM, but leave losing subprograms in SrcM. -/// TODO: Remove this logic once the backend can correctly determine canonical -/// subprograms. -void ModuleLinker::stripReplacedSubprograms() { - // Avoid quadratic runtime by returning early when there's nothing to do. - if (OverridingFunctions.empty()) +void ModuleLinker::addLazyFor(GlobalValue &GV, IRMover::ValueAdder Add) { + // Add these to the internalize list + if (!GV.hasLinkOnceLinkage()) return; - // Move the functions now, so the set gets cleared even on early returns. - auto Functions = std::move(OverridingFunctions); - OverridingFunctions.clear(); + if (shouldInternalizeLinkedSymbols()) + Internalize.insert(GV.getName()); + Add(GV); - // Drop functions from subprograms if they've been overridden by the new - // compile unit. - NamedMDNode *CompileUnits = DstM->getNamedMetadata("llvm.dbg.cu"); - if (!CompileUnits) + const Comdat *SC = GV.getComdat(); + if (!SC) return; - for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) { - auto *CU = cast(CompileUnits->getOperand(I)); - assert(CU && "Expected valid compile unit"); - - for (DISubprogram *SP : CU->getSubprograms()) { - if (!SP || !SP->getFunction() || !Functions.count(SP->getFunction())) - continue; - - // Prevent DebugInfoFinder from tagging this as the canonical subprogram, - // since the canonical one is in the incoming module. - SP->replaceFunction(nullptr); - } + for (GlobalValue *GV2 : ComdatMembers[SC]) { + if (!GV2->hasLocalLinkage() && shouldInternalizeLinkedSymbols()) + Internalize.insert(GV2->getName()); + Add(*GV2); } } -/// Merge the linker flags in Src into the Dest module. -bool ModuleLinker::linkModuleFlagsMetadata() { - // If the source module has no module flags, we are done. - const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata(); - if (!SrcModFlags) return false; - - // If the destination module doesn't have module flags yet, then just copy - // over the source module's flags. - NamedMDNode *DstModFlags = DstM->getOrInsertModuleFlagsMetadata(); - if (DstModFlags->getNumOperands() == 0) { - for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) - DstModFlags->addOperand(SrcModFlags->getOperand(I)); - - return false; +void ModuleLinker::processGlobalForThinLTO(GlobalValue &GV) { + if (GV.hasLocalLinkage() && + (doPromoteLocalToGlobal(&GV) || isPerformingImport())) { + GV.setName(getName(&GV)); + GV.setLinkage(getLinkage(&GV)); + if (!GV.hasLocalLinkage()) + GV.setVisibility(GlobalValue::HiddenVisibility); + if (isModuleExporting()) + ValuesToLink.insert(&GV); + return; } - - // First build a map of the existing module flags and requirements. - DenseMap> Flags; - SmallSetVector Requirements; - for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) { - MDNode *Op = DstModFlags->getOperand(I); - ConstantInt *Behavior = mdconst::extract(Op->getOperand(0)); - MDString *ID = cast(Op->getOperand(1)); - - if (Behavior->getZExtValue() == Module::Require) { - Requirements.insert(cast(Op->getOperand(2))); - } else { - Flags[ID] = std::make_pair(Op, I); - } - } - - // Merge in the flags from the source module, and also collect its set of - // requirements. - bool HasErr = false; - for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) { - MDNode *SrcOp = SrcModFlags->getOperand(I); - ConstantInt *SrcBehavior = - mdconst::extract(SrcOp->getOperand(0)); - MDString *ID = cast(SrcOp->getOperand(1)); - MDNode *DstOp; - unsigned DstIndex; - std::tie(DstOp, DstIndex) = Flags.lookup(ID); - unsigned SrcBehaviorValue = SrcBehavior->getZExtValue(); - - // If this is a requirement, add it and continue. - if (SrcBehaviorValue == Module::Require) { - // If the destination module does not already have this requirement, add - // it. - if (Requirements.insert(cast(SrcOp->getOperand(2)))) { - DstModFlags->addOperand(SrcOp); - } - continue; - } - - // If there is no existing flag with this ID, just add it. - if (!DstOp) { - Flags[ID] = std::make_pair(SrcOp, DstModFlags->getNumOperands()); - DstModFlags->addOperand(SrcOp); - continue; - } - - // Otherwise, perform a merge. - ConstantInt *DstBehavior = - mdconst::extract(DstOp->getOperand(0)); - unsigned DstBehaviorValue = DstBehavior->getZExtValue(); - - // If either flag has override behavior, handle it first. - if (DstBehaviorValue == Module::Override) { - // Diagnose inconsistent flags which both have override behavior. - if (SrcBehaviorValue == Module::Override && - SrcOp->getOperand(2) != DstOp->getOperand(2)) { - HasErr |= emitError("linking module flags '" + ID->getString() + - "': IDs have conflicting override values"); - } - continue; - } else if (SrcBehaviorValue == Module::Override) { - // Update the destination flag to that of the source. - DstModFlags->setOperand(DstIndex, SrcOp); - Flags[ID].first = SrcOp; - continue; - } - - // Diagnose inconsistent merge behavior types. - if (SrcBehaviorValue != DstBehaviorValue) { - HasErr |= emitError("linking module flags '" + ID->getString() + - "': IDs have conflicting behaviors"); - continue; - } - - auto replaceDstValue = [&](MDNode *New) { - Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New}; - MDNode *Flag = MDNode::get(DstM->getContext(), FlagOps); - DstModFlags->setOperand(DstIndex, Flag); - Flags[ID].first = Flag; - }; - - // Perform the merge for standard behavior types. - switch (SrcBehaviorValue) { - case Module::Require: - case Module::Override: llvm_unreachable("not possible"); - case Module::Error: { - // Emit an error if the values differ. - if (SrcOp->getOperand(2) != DstOp->getOperand(2)) { - HasErr |= emitError("linking module flags '" + ID->getString() + - "': IDs have conflicting values"); - } - continue; - } - case Module::Warning: { - // Emit a warning if the values differ. - if (SrcOp->getOperand(2) != DstOp->getOperand(2)) { - emitWarning("linking module flags '" + ID->getString() + - "': IDs have conflicting values"); - } - continue; - } - case Module::Append: { - MDNode *DstValue = cast(DstOp->getOperand(2)); - MDNode *SrcValue = cast(SrcOp->getOperand(2)); - SmallVector MDs; - MDs.reserve(DstValue->getNumOperands() + SrcValue->getNumOperands()); - MDs.append(DstValue->op_begin(), DstValue->op_end()); - MDs.append(SrcValue->op_begin(), SrcValue->op_end()); - - replaceDstValue(MDNode::get(DstM->getContext(), MDs)); - break; - } - case Module::AppendUnique: { - SmallSetVector Elts; - MDNode *DstValue = cast(DstOp->getOperand(2)); - MDNode *SrcValue = cast(SrcOp->getOperand(2)); - Elts.insert(DstValue->op_begin(), DstValue->op_end()); - Elts.insert(SrcValue->op_begin(), SrcValue->op_end()); - - replaceDstValue(MDNode::get(DstM->getContext(), - makeArrayRef(Elts.begin(), Elts.end()))); - break; - } - } - } - - // Check all of the requirements. - for (unsigned I = 0, E = Requirements.size(); I != E; ++I) { - MDNode *Requirement = Requirements[I]; - MDString *Flag = cast(Requirement->getOperand(0)); - Metadata *ReqValue = Requirement->getOperand(1); - - MDNode *Op = Flags[Flag].first; - if (!Op || Op->getOperand(2) != ReqValue) { - HasErr |= emitError("linking module flags '" + Flag->getString() + - "': does not have the required value"); - continue; - } - } - - return HasErr; + GV.setLinkage(getLinkage(&GV)); } -// This function returns true if the triples match. -static bool triplesMatch(const Triple &T0, const Triple &T1) { - // If vendor is apple, ignore the version number. - if (T0.getVendor() == Triple::Apple) - return T0.getArch() == T1.getArch() && - T0.getSubArch() == T1.getSubArch() && - T0.getVendor() == T1.getVendor() && - T0.getOS() == T1.getOS(); - - return T0 == T1; -} - -// This function returns the merged triple. -static std::string mergeTriples(const Triple &SrcTriple, const Triple &DstTriple) { - // If vendor is apple, pick the triple with the larger version number. - if (SrcTriple.getVendor() == Triple::Apple) - if (DstTriple.isOSVersionLT(SrcTriple)) - return SrcTriple.str(); - - return DstTriple.str(); +void ModuleLinker::processGlobalsForThinLTO() { + for (GlobalVariable &GV : SrcM.globals()) + processGlobalForThinLTO(GV); + for (Function &SF : SrcM) + processGlobalForThinLTO(SF); + for (GlobalAlias &GA : SrcM.aliases()) + processGlobalForThinLTO(GA); } bool ModuleLinker::run() { - assert(DstM && "Null destination module"); - assert(SrcM && "Null source module"); - - // Inherit the target data from the source module if the destination module - // doesn't have one already. - if (DstM->getDataLayout().isDefault()) - DstM->setDataLayout(SrcM->getDataLayout()); - - if (SrcM->getDataLayout() != DstM->getDataLayout()) { - emitWarning("Linking two modules of different data layouts: '" + - SrcM->getModuleIdentifier() + "' is '" + - SrcM->getDataLayoutStr() + "' whereas '" + - DstM->getModuleIdentifier() + "' is '" + - DstM->getDataLayoutStr() + "'\n"); - } - - // Copy the target triple from the source to dest if the dest's is empty. - if (DstM->getTargetTriple().empty() && !SrcM->getTargetTriple().empty()) - DstM->setTargetTriple(SrcM->getTargetTriple()); - - Triple SrcTriple(SrcM->getTargetTriple()), DstTriple(DstM->getTargetTriple()); - - if (!SrcM->getTargetTriple().empty() && !triplesMatch(SrcTriple, DstTriple)) - emitWarning("Linking two modules of different target triples: " + - SrcM->getModuleIdentifier() + "' is '" + - SrcM->getTargetTriple() + "' whereas '" + - DstM->getModuleIdentifier() + "' is '" + - DstM->getTargetTriple() + "'\n"); - - DstM->setTargetTriple(mergeTriples(SrcTriple, DstTriple)); - - // Append the module inline asm string. - if (!SrcM->getModuleInlineAsm().empty()) { - if (DstM->getModuleInlineAsm().empty()) - DstM->setModuleInlineAsm(SrcM->getModuleInlineAsm()); - else - DstM->setModuleInlineAsm(DstM->getModuleInlineAsm()+"\n"+ - SrcM->getModuleInlineAsm()); - } - - // Loop over all of the linked values to compute type mappings. - computeTypeMapping(); - - ComdatsChosen.clear(); - for (const auto &SMEC : SrcM->getComdatSymbolTable()) { + for (const auto &SMEC : SrcM.getComdatSymbolTable()) { const Comdat &C = SMEC.getValue(); if (ComdatsChosen.count(&C)) continue; @@ -1542,233 +687,88 @@ bool ModuleLinker::run() { ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc); } - // Upgrade mismatched global arrays. - upgradeMismatchedGlobals(); + for (GlobalVariable &GV : SrcM.globals()) + if (const Comdat *SC = GV.getComdat()) + ComdatMembers[SC].push_back(&GV); + + for (Function &SF : SrcM) + if (const Comdat *SC = SF.getComdat()) + ComdatMembers[SC].push_back(&SF); + + for (GlobalAlias &GA : SrcM.aliases()) + if (const Comdat *SC = GA.getComdat()) + ComdatMembers[SC].push_back(&GA); // Insert all of the globals in src into the DstM module... without linking // initializers (which could refer to functions not yet mapped over). - for (GlobalVariable &GV : SrcM->globals()) - if (linkGlobalValueProto(&GV)) + for (GlobalVariable &GV : SrcM.globals()) + if (linkIfNeeded(GV)) return true; - // Link the functions together between the two modules, without doing function - // bodies... this just adds external function prototypes to the DstM - // function... We do this so that when we begin processing function bodies, - // all of the global values that may be referenced are available in our - // ValueMap. - for (Function &F :*SrcM) - if (linkGlobalValueProto(&F)) + for (Function &SF : SrcM) + if (linkIfNeeded(SF)) return true; - // If there were any aliases, link them now. - for (GlobalAlias &GA : SrcM->aliases()) - if (linkGlobalValueProto(&GA)) + for (GlobalAlias &GA : SrcM.aliases()) + if (linkIfNeeded(GA)) return true; - for (const AppendingVarInfo &AppendingVar : AppendingVars) - linkAppendingVarInit(AppendingVar); + processGlobalsForThinLTO(); - for (const auto &Entry : DstM->getComdatSymbolTable()) { - const Comdat &C = Entry.getValue(); - if (C.getSelectionKind() == Comdat::Any) + for (unsigned I = 0; I < ValuesToLink.size(); ++I) { + GlobalValue *GV = ValuesToLink[I]; + const Comdat *SC = GV->getComdat(); + if (!SC) continue; - const GlobalValue *GV = SrcM->getNamedValue(C.getName()); - if (GV) - MapValue(GV, ValueMap, RF_None, &TypeMap, &ValMaterializer); + for (GlobalValue *GV2 : ComdatMembers[SC]) + ValuesToLink.insert(GV2); } - // Strip replaced subprograms before mapping any metadata -- so that we're - // not changing metadata from the source module (note that - // linkGlobalValueBody() eventually calls RemapInstruction() and therefore - // MapMetadata()) -- but after linking global value protocols -- so that - // OverridingFunctions has been built. - stripReplacedSubprograms(); - - // Link in the function bodies that are defined in the source module into - // DstM. - for (Function &SF : *SrcM) { - // Skip if no body (function is external). - if (SF.isDeclaration()) - continue; - - // Skip if not linking from source. - if (DoNotLinkFromSource.count(&SF)) - continue; - - if (linkGlobalValueBody(SF)) - return true; + if (shouldInternalizeLinkedSymbols()) { + for (GlobalValue *GV : ValuesToLink) + Internalize.insert(GV->getName()); } - // Resolve all uses of aliases with aliasees. - for (GlobalAlias &Src : SrcM->aliases()) { - if (DoNotLinkFromSource.count(&Src)) - continue; - linkGlobalValueBody(Src); - } - - // Remap all of the named MDNodes in Src into the DstM module. We do this - // after linking GlobalValues so that MDNodes that reference GlobalValues - // are properly remapped. - linkNamedMDNodes(); - - // Merge the module flags into the DstM module. - if (linkModuleFlagsMetadata()) + if (Mover.move(SrcM, ValuesToLink.getArrayRef(), + [this](GlobalValue &GV, IRMover::ValueAdder Add) { + addLazyFor(GV, Add); + }, + ValIDToTempMDMap, false)) return true; - - // Update the initializers in the DstM module now that all globals that may - // be referenced are in DstM. - for (GlobalVariable &Src : SrcM->globals()) { - // Only process initialized GV's or ones not already in dest. - if (!Src.hasInitializer() || DoNotLinkFromSource.count(&Src)) - continue; - linkGlobalValueBody(Src); - } - - // Process vector of lazily linked in functions. - while (!LazilyLinkGlobalValues.empty()) { - GlobalValue *SGV = LazilyLinkGlobalValues.back(); - LazilyLinkGlobalValues.pop_back(); - - assert(!SGV->isDeclaration() && "users should not pass down decls"); - if (linkGlobalValueBody(*SGV)) - return true; + Module &DstM = Mover.getModule(); + for (auto &P : Internalize) { + GlobalValue *GV = DstM.getNamedValue(P.first()); + GV->setLinkage(GlobalValue::InternalLinkage); } return false; } -Linker::StructTypeKeyInfo::KeyTy::KeyTy(ArrayRef E, bool P) - : ETypes(E), IsPacked(P) {} +Linker::Linker(Module &M) : Mover(M) {} -Linker::StructTypeKeyInfo::KeyTy::KeyTy(const StructType *ST) - : ETypes(ST->elements()), IsPacked(ST->isPacked()) {} - -bool Linker::StructTypeKeyInfo::KeyTy::operator==(const KeyTy &That) const { - if (IsPacked != That.IsPacked) - return false; - if (ETypes != That.ETypes) - return false; - return true; +bool Linker::linkInModule(std::unique_ptr Src, unsigned Flags, + const FunctionInfoIndex *Index, + DenseSet *FunctionsToImport, + DenseMap *ValIDToTempMDMap) { + ModuleLinker ModLinker(Mover, *Src, Flags, Index, FunctionsToImport, + ValIDToTempMDMap); + return ModLinker.run(); } -bool Linker::StructTypeKeyInfo::KeyTy::operator!=(const KeyTy &That) const { - return !this->operator==(That); +bool Linker::linkInModuleForCAPI(Module &Src) { + ModuleLinker ModLinker(Mover, Src, 0, nullptr, nullptr); + return ModLinker.run(); } -StructType *Linker::StructTypeKeyInfo::getEmptyKey() { - return DenseMapInfo::getEmptyKey(); -} - -StructType *Linker::StructTypeKeyInfo::getTombstoneKey() { - return DenseMapInfo::getTombstoneKey(); -} - -unsigned Linker::StructTypeKeyInfo::getHashValue(const KeyTy &Key) { - return hash_combine(hash_combine_range(Key.ETypes.begin(), Key.ETypes.end()), - Key.IsPacked); -} - -unsigned Linker::StructTypeKeyInfo::getHashValue(const StructType *ST) { - return getHashValue(KeyTy(ST)); -} - -bool Linker::StructTypeKeyInfo::isEqual(const KeyTy &LHS, - const StructType *RHS) { - if (RHS == getEmptyKey() || RHS == getTombstoneKey()) - return false; - return LHS == KeyTy(RHS); -} - -bool Linker::StructTypeKeyInfo::isEqual(const StructType *LHS, - const StructType *RHS) { - if (RHS == getEmptyKey()) - return LHS == getEmptyKey(); - - if (RHS == getTombstoneKey()) - return LHS == getTombstoneKey(); - - return KeyTy(LHS) == KeyTy(RHS); -} - -void Linker::IdentifiedStructTypeSet::addNonOpaque(StructType *Ty) { - assert(!Ty->isOpaque()); - NonOpaqueStructTypes.insert(Ty); -} - -void Linker::IdentifiedStructTypeSet::switchToNonOpaque(StructType *Ty) { - assert(!Ty->isOpaque()); - NonOpaqueStructTypes.insert(Ty); - bool Removed = OpaqueStructTypes.erase(Ty); - (void)Removed; - assert(Removed); -} - -void Linker::IdentifiedStructTypeSet::addOpaque(StructType *Ty) { - assert(Ty->isOpaque()); - OpaqueStructTypes.insert(Ty); -} - -StructType * -Linker::IdentifiedStructTypeSet::findNonOpaque(ArrayRef ETypes, - bool IsPacked) { - Linker::StructTypeKeyInfo::KeyTy Key(ETypes, IsPacked); - auto I = NonOpaqueStructTypes.find_as(Key); - if (I == NonOpaqueStructTypes.end()) - return nullptr; - return *I; -} - -bool Linker::IdentifiedStructTypeSet::hasType(StructType *Ty) { - if (Ty->isOpaque()) - return OpaqueStructTypes.count(Ty); - auto I = NonOpaqueStructTypes.find(Ty); - if (I == NonOpaqueStructTypes.end()) - return false; - return *I == Ty; -} - -void Linker::init(Module *M, DiagnosticHandlerFunction DiagnosticHandler) { - this->Composite = M; - this->DiagnosticHandler = DiagnosticHandler; - - TypeFinder StructTypes; - StructTypes.run(*M, true); - for (StructType *Ty : StructTypes) { - if (Ty->isOpaque()) - IdentifiedStructTypes.addOpaque(Ty); - else - IdentifiedStructTypes.addNonOpaque(Ty); - } -} - -Linker::Linker(Module *M, DiagnosticHandlerFunction DiagnosticHandler) { - init(M, DiagnosticHandler); -} - -Linker::Linker(Module *M) { - init(M, [this](const DiagnosticInfo &DI) { - Composite->getContext().diagnose(DI); - }); -} - -Linker::~Linker() { -} - -void Linker::deleteModule() { - delete Composite; - Composite = nullptr; -} - -bool Linker::linkInModule(Module *Src, bool OverrideSymbols) { - ModuleLinker TheLinker(Composite, IdentifiedStructTypes, Src, - DiagnosticHandler, OverrideSymbols); - bool RetCode = TheLinker.run(); - Composite->dropTriviallyDeadConstantArrays(); - return RetCode; -} - -void Linker::setModule(Module *Dst) { - init(Dst, DiagnosticHandler); +bool Linker::linkInMetadata(Module &Src, + DenseMap *ValIDToTempMDMap) { + SetVector ValuesToLink; + if (Mover.move( + Src, ValuesToLink.getArrayRef(), + [this](GlobalValue &GV, IRMover::ValueAdder Add) { assert(false); }, + ValIDToTempMDMap, true)) + return true; + return false; } //===----------------------------------------------------------------------===// @@ -1780,34 +780,58 @@ void Linker::setModule(Module *Dst) { /// true is returned and ErrorMsg (if not null) is set to indicate the problem. /// Upon failure, the Dest module could be in a modified state, and shouldn't be /// relied on to be consistent. -bool Linker::LinkModules(Module *Dest, Module *Src, - DiagnosticHandlerFunction DiagnosticHandler) { - Linker L(Dest, DiagnosticHandler); - return L.linkInModule(Src); +bool Linker::linkModules(Module &Dest, std::unique_ptr Src, + unsigned Flags) { + Linker L(Dest); + return L.linkInModule(std::move(Src), Flags); } -bool Linker::LinkModules(Module *Dest, Module *Src) { - Linker L(Dest); - return L.linkInModule(Src); +std::unique_ptr +llvm::renameModuleForThinLTO(std::unique_ptr M, + const FunctionInfoIndex *Index) { + std::unique_ptr RenamedModule( + new llvm::Module(M->getModuleIdentifier(), M->getContext())); + Linker L(*RenamedModule.get()); + if (L.linkInModule(std::move(M), llvm::Linker::Flags::None, Index)) + return nullptr; + return RenamedModule; } //===----------------------------------------------------------------------===// // C API. //===----------------------------------------------------------------------===// +static void diagnosticHandler(const DiagnosticInfo &DI, void *C) { + auto *Message = reinterpret_cast(C); + raw_string_ostream Stream(*Message); + DiagnosticPrinterRawOStream DP(Stream); + DI.print(DP); +} + LLVMBool LLVMLinkModules(LLVMModuleRef Dest, LLVMModuleRef Src, LLVMLinkerMode Unused, char **OutMessages) { Module *D = unwrap(Dest); + LLVMContext &Ctx = D->getContext(); + + LLVMContext::DiagnosticHandlerTy OldDiagnosticHandler = + Ctx.getDiagnosticHandler(); + void *OldDiagnosticContext = Ctx.getDiagnosticContext(); std::string Message; - raw_string_ostream Stream(Message); - DiagnosticPrinterRawOStream DP(Stream); + Ctx.setDiagnosticHandler(diagnosticHandler, &Message, true); - LLVMBool Result = Linker::LinkModules( - D, unwrap(Src), [&](const DiagnosticInfo &DI) { DI.print(DP); }); + Linker L(*D); + Module *M = unwrap(Src); + LLVMBool Result = L.linkInModuleForCAPI(*M); - if (OutMessages && Result) { - Stream.flush(); + Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext, true); + + if (OutMessages && Result) *OutMessages = strdup(Message.c_str()); - } return Result; } + +LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src) { + Module *D = unwrap(Dest); + std::unique_ptr M(unwrap(Src)); + return Linker::linkModules(*D, std::move(M)); +} diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index 6554d6a9e60e..8c015644d8ad 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_library(LLVMMC MCELFObjectTargetWriter.cpp MCELFStreamer.cpp MCExpr.cpp + MCFragment.cpp MCInst.cpp MCInstPrinter.cpp MCInstrAnalysis.cpp diff --git a/lib/MC/ConstantPools.cpp b/lib/MC/ConstantPools.cpp index f7649fba6e89..9643b7594682 100644 --- a/lib/MC/ConstantPools.cpp +++ b/lib/MC/ConstantPools.cpp @@ -29,17 +29,17 @@ void ConstantPool::emitEntries(MCStreamer &Streamer) { I != E; ++I) { Streamer.EmitCodeAlignment(I->Size); // align naturally Streamer.EmitLabel(I->Label); - Streamer.EmitValue(I->Value, I->Size); + Streamer.EmitValue(I->Value, I->Size, I->Loc); } Streamer.EmitDataRegion(MCDR_DataRegionEnd); Entries.clear(); } const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context, - unsigned Size) { + unsigned Size, SMLoc Loc) { MCSymbol *CPEntryLabel = Context.createTempSymbol(); - Entries.push_back(ConstantPoolEntry(CPEntryLabel, Value, Size)); + Entries.push_back(ConstantPoolEntry(CPEntryLabel, Value, Size, Loc)); return MCSymbolRefExpr::create(CPEntryLabel, Context); } @@ -90,8 +90,8 @@ void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) { const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer, const MCExpr *Expr, - unsigned Size) { + unsigned Size, SMLoc Loc) { MCSection *Section = Streamer.getCurrentSection().first; return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext(), - Size); + Size, Loc); } diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index e925bc272dc8..e6552beefd01 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/StringSaver.h" #include using namespace llvm; @@ -106,7 +107,9 @@ class ELFObjectWriter : public MCObjectWriter { /// @name Symbol Table Data /// @{ - StringTableBuilder StrTabBuilder; + BumpPtrAllocator Alloc; + StringSaver VersionSymSaver{Alloc}; + StringTableBuilder StrTabBuilder{StringTableBuilder::ELF}; /// @} @@ -157,9 +160,9 @@ class ELFObjectWriter : public MCObjectWriter { template void write(T Val) { if (IsLittleEndian) - support::endian::Writer(OS).write(Val); + support::endian::Writer(getStream()).write(Val); else - support::endian::Writer(OS).write(Val); + support::endian::Writer(getStream()).write(Val); } void writeHeader(const MCAssembler &Asm); @@ -232,7 +235,7 @@ class ELFObjectWriter : public MCObjectWriter { } void ELFObjectWriter::align(unsigned Alignment) { - uint64_t Padding = OffsetToAlignment(OS.tell(), Alignment); + uint64_t Padding = OffsetToAlignment(getStream().tell(), Alignment); WriteZeros(Padding); } @@ -447,9 +450,6 @@ void ELFObjectWriter::writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex, ELFSymbolData &MSD, const MCAsmLayout &Layout) { const auto &Symbol = cast(*MSD.Symbol); - assert((!Symbol.getFragment() || - (Symbol.getFragment()->getParent() == &Symbol.getSection())) && - "The symbol's section doesn't match the fragment's symbol"); const MCSymbolELF *Base = cast_or_null(Layout.getBaseSymbol(Symbol)); @@ -630,28 +630,36 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, // In general, ELF has no relocations for -B. It can only represent (A + C) // or (A + C - R). If B = R + K and the relocation is not pcrel, we can // replace B to implement it: (A - R - K + C) - if (IsPCRel) - Asm.getContext().reportFatalError( + if (IsPCRel) { + Asm.getContext().reportError( Fixup.getLoc(), "No relocation available to represent this relative expression"); + return; + } const auto &SymB = cast(RefB->getSymbol()); - if (SymB.isUndefined()) - Asm.getContext().reportFatalError( + if (SymB.isUndefined()) { + Asm.getContext().reportError( Fixup.getLoc(), Twine("symbol '") + SymB.getName() + "' can not be undefined in a subtraction expression"); + return; + } assert(!SymB.isAbsolute() && "Should have been folded"); const MCSection &SecB = SymB.getSection(); - if (&SecB != &FixupSection) - Asm.getContext().reportFatalError( + if (&SecB != &FixupSection) { + Asm.getContext().reportError( Fixup.getLoc(), "Cannot represent a difference across sections"); + return; + } - if (::isWeak(SymB)) - Asm.getContext().reportFatalError( + if (::isWeak(SymB)) { + Asm.getContext().reportError( Fixup.getLoc(), "Cannot represent a subtraction with a weak symbol"); + return; + } uint64_t SymBOffset = Layout.getSymbolOffset(SymB); uint64_t K = SymBOffset - FixupOffset; @@ -764,7 +772,7 @@ void ELFObjectWriter::computeSymbolTable( SymbolTableIndex = addToSectionTable(SymtabSection); align(SymtabSection->getAlignment()); - uint64_t SecStart = OS.tell(); + uint64_t SecStart = getStream().tell(); // The first entry is the undefined symbol entry. Writer.writeSymbol(0, 0, 0, 0, 0, 0, false); @@ -784,8 +792,10 @@ void ELFObjectWriter::computeSymbolTable( Renames.count(&Symbol))) continue; - if (Symbol.isTemporary() && Symbol.isUndefined()) - Ctx.reportFatalError(SMLoc(), "Undefined temporary"); + if (Symbol.isTemporary() && Symbol.isUndefined()) { + Ctx.reportError(SMLoc(), "Undefined temporary symbol"); + continue; + } ELFSymbolData MSD; MSD.Symbol = cast(&Symbol); @@ -850,13 +860,15 @@ void ELFObjectWriter::computeSymbolTable( Buf += Name.substr(0, Pos); unsigned Skip = MSD.SectionIndex == ELF::SHN_UNDEF ? 2 : 1; Buf += Name.substr(Pos + Skip); - Name = Buf; + Name = VersionSymSaver.save(Buf.c_str()); } } // Sections have their own string table - if (Symbol.getType() != ELF::STT_SECTION) - MSD.Name = StrTabBuilder.add(Name); + if (Symbol.getType() != ELF::STT_SECTION) { + MSD.Name = Name; + StrTabBuilder.add(Name); + } if (Local) LocalSymbolData.push_back(MSD); @@ -878,7 +890,7 @@ void ELFObjectWriter::computeSymbolTable( for (const std::string &Name : FileNames) StrTabBuilder.add(Name); - StrTabBuilder.finalize(StringTableBuilder::ELF); + StrTabBuilder.finalize(); for (const std::string &Name : FileNames) Writer.writeSymbol(StrTabBuilder.getOffset(Name), @@ -911,7 +923,7 @@ void ELFObjectWriter::computeSymbolTable( assert(MSD.Symbol->getBinding() != ELF::STB_LOCAL); } - uint64_t SecEnd = OS.tell(); + uint64_t SecEnd = getStream().tell(); SectionOffsets[SymtabSection] = std::make_pair(SecStart, SecEnd); ArrayRef ShndxIndexes = Writer.getShndxIndexes(); @@ -921,12 +933,12 @@ void ELFObjectWriter::computeSymbolTable( } assert(SymtabShndxSectionIndex != 0); - SecStart = OS.tell(); + SecStart = getStream().tell(); const MCSectionELF *SymtabShndxSection = SectionTable[SymtabShndxSectionIndex - 1]; for (uint32_t Index : ShndxIndexes) write(Index); - SecEnd = OS.tell(); + SecEnd = getStream().tell(); SectionOffsets[SymtabShndxSection] = std::make_pair(SecStart, SecEnd); } @@ -957,31 +969,6 @@ ELFObjectWriter::createRelocationSection(MCContext &Ctx, return RelaSection; } -static SmallVector -getUncompressedData(const MCAsmLayout &Layout, - const MCSection::FragmentListType &Fragments) { - SmallVector UncompressedData; - for (const MCFragment &F : Fragments) { - const SmallVectorImpl *Contents; - switch (F.getKind()) { - case MCFragment::FT_Data: - Contents = &cast(F).getContents(); - break; - case MCFragment::FT_Dwarf: - Contents = &cast(F).getContents(); - break; - case MCFragment::FT_DwarfFrame: - Contents = &cast(F).getContents(); - break; - default: - llvm_unreachable( - "Not expecting any other fragment types in a debug_* section"); - } - UncompressedData.append(Contents->begin(), Contents->end()); - } - return UncompressedData; -} - // Include the debug info compression header: // "ZLIB" followed by 8 bytes representing the uncompressed size of the section, // useful for consumers to preallocate a buffer to decompress into. @@ -1016,27 +1003,29 @@ void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, return; } - // Gather the uncompressed data from all the fragments. - const MCSection::FragmentListType &Fragments = Section.getFragmentList(); - SmallVector UncompressedData = - getUncompressedData(Layout, Fragments); + SmallVector UncompressedData; + raw_svector_ostream VecOS(UncompressedData); + raw_pwrite_stream &OldStream = getStream(); + setStream(VecOS); + Asm.writeSectionData(&Section, Layout); + setStream(OldStream); SmallVector CompressedContents; zlib::Status Success = zlib::compress( StringRef(UncompressedData.data(), UncompressedData.size()), CompressedContents); if (Success != zlib::StatusOK) { - Asm.writeSectionData(&Section, Layout); + getStream() << UncompressedData; return; } if (!prependCompressionHeader(UncompressedData.size(), CompressedContents)) { - Asm.writeSectionData(&Section, Layout); + getStream() << UncompressedData; return; } Asm.getContext().renameELFSection(&Section, (".z" + SectionName.drop_front(1)).str()); - OS << CompressedContents; + getStream() << CompressedContents; } void ELFObjectWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, @@ -1061,8 +1050,13 @@ void ELFObjectWriter::writeRelocations(const MCAssembler &Asm, const MCSectionELF &Sec) { std::vector &Relocs = Relocations[&Sec]; - // Sort the relocation entries. Most targets just sort by Offset, but some - // (e.g., MIPS) have additional constraints. + // We record relocations by pushing to the end of a vector. Reverse the vector + // to get the relocations in the order they were created. + // In most cases that is not important, but it can be for special sections + // (.eh_frame) or specific relocations (TLS optimizations on SystemZ). + std::reverse(Relocs.begin(), Relocs.end()); + + // Sort the relocation entries. MIPS needs this. TargetObjectWriter->sortRelocs(Asm, Relocs); for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { @@ -1100,7 +1094,7 @@ void ELFObjectWriter::writeRelocations(const MCAssembler &Asm, const MCSectionELF *ELFObjectWriter::createStringTable(MCContext &Ctx) { const MCSectionELF *StrtabSection = SectionTable[StringTableIndex - 1]; - OS << StrTabBuilder.data(); + getStream() << StrTabBuilder.data(); return StrtabSection; } @@ -1209,12 +1203,12 @@ void ELFObjectWriter::writeObject(MCAssembler &Asm, align(Section.getAlignment()); // Remember the offset into the file for this section. - uint64_t SecStart = OS.tell(); + uint64_t SecStart = getStream().tell(); const MCSymbolELF *SignatureSymbol = Section.getGroup(); writeSectionData(Asm, Section, Layout); - uint64_t SecEnd = OS.tell(); + uint64_t SecEnd = getStream().tell(); SectionOffsets[&Section] = std::make_pair(SecStart, SecEnd); MCSectionELF *RelSection = createRelocationSection(Ctx, Section); @@ -1246,7 +1240,7 @@ void ELFObjectWriter::writeObject(MCAssembler &Asm, align(Group->getAlignment()); // Remember the offset into the file for this section. - uint64_t SecStart = OS.tell(); + uint64_t SecStart = getStream().tell(); const MCSymbol *SignatureSymbol = Group->getGroup(); assert(SignatureSymbol); @@ -1256,7 +1250,7 @@ void ELFObjectWriter::writeObject(MCAssembler &Asm, write(SecIndex); } - uint64_t SecEnd = OS.tell(); + uint64_t SecEnd = getStream().tell(); SectionOffsets[Group] = std::make_pair(SecStart, SecEnd); } @@ -1267,25 +1261,25 @@ void ELFObjectWriter::writeObject(MCAssembler &Asm, align(RelSection->getAlignment()); // Remember the offset into the file for this section. - uint64_t SecStart = OS.tell(); + uint64_t SecStart = getStream().tell(); writeRelocations(Asm, *RelSection->getAssociatedSection()); - uint64_t SecEnd = OS.tell(); + uint64_t SecEnd = getStream().tell(); SectionOffsets[RelSection] = std::make_pair(SecStart, SecEnd); } { - uint64_t SecStart = OS.tell(); + uint64_t SecStart = getStream().tell(); const MCSectionELF *Sec = createStringTable(Ctx); - uint64_t SecEnd = OS.tell(); + uint64_t SecEnd = getStream().tell(); SectionOffsets[Sec] = std::make_pair(SecStart, SecEnd); } uint64_t NaturalAlignment = is64Bit() ? 8 : 4; align(NaturalAlignment); - const unsigned SectionHeaderOffset = OS.tell(); + const unsigned SectionHeaderOffset = getStream().tell(); // ... then the section header table ... writeSectionHeader(Layout, SectionIndexMap, SectionOffsets); @@ -1301,19 +1295,19 @@ void ELFObjectWriter::writeObject(MCAssembler &Asm, uint64_t Val = SectionHeaderOffset; if (sys::IsLittleEndianHost != IsLittleEndian) sys::swapByteOrder(Val); - OS.pwrite(reinterpret_cast(&Val), sizeof(Val), - offsetof(ELF::Elf64_Ehdr, e_shoff)); + getStream().pwrite(reinterpret_cast(&Val), sizeof(Val), + offsetof(ELF::Elf64_Ehdr, e_shoff)); NumSectionsOffset = offsetof(ELF::Elf64_Ehdr, e_shnum); } else { uint32_t Val = SectionHeaderOffset; if (sys::IsLittleEndianHost != IsLittleEndian) sys::swapByteOrder(Val); - OS.pwrite(reinterpret_cast(&Val), sizeof(Val), - offsetof(ELF::Elf32_Ehdr, e_shoff)); + getStream().pwrite(reinterpret_cast(&Val), sizeof(Val), + offsetof(ELF::Elf32_Ehdr, e_shoff)); NumSectionsOffset = offsetof(ELF::Elf32_Ehdr, e_shnum); } - OS.pwrite(reinterpret_cast(&NumSections), sizeof(NumSections), - NumSectionsOffset); + getStream().pwrite(reinterpret_cast(&NumSections), + sizeof(NumSections), NumSectionsOffset); } bool ELFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp index 36c65b7bcd49..fcf139b72537 100644 --- a/lib/MC/MCAsmBackend.cpp +++ b/lib/MC/MCAsmBackend.cpp @@ -16,6 +16,10 @@ MCAsmBackend::MCAsmBackend() : HasDataInCodeSupport(false) {} MCAsmBackend::~MCAsmBackend() {} +bool MCAsmBackend::getFixupKind(StringRef Name, MCFixupKind &MappedKind) const { + return false; +} + const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { static const MCFixupKindInfo Builtins[] = { {"FK_Data_1", 0, 8, 0}, diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 100dc7c3dc60..36e10b3c6a07 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -157,3 +157,9 @@ bool MCAsmInfo::isValidUnquotedName(StringRef Name) const { return true; } + +bool MCAsmInfo::shouldOmitSectionDirective(StringRef SectionName) const { + // FIXME: Does .section .bss/.data/.text work everywhere?? + return SectionName == ".text" || SectionName == ".data" || + (SectionName == ".bss" && !usesELFSectionDirectiveForBSS()); +} diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp index 97fc76a9adb1..5b9dd2009f8b 100644 --- a/lib/MC/MCAsmInfoCOFF.cpp +++ b/lib/MC/MCAsmInfoCOFF.cpp @@ -37,8 +37,7 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() { UseIntegratedAssembler = true; - // FIXME: For now keep the previous behavior, AShr. Need to double-check - // other COFF-targeting assemblers and change this if necessary. + // At least MSVC inline-asm does AShr. UseLogicalShr = false; } diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index bb90ff2c350a..ae9486d3db4d 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -93,9 +93,4 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { UseIntegratedAssembler = true; SetDirectiveSuppressesReloc = true; - - // FIXME: For now keep the previous behavior, AShr, matching the previous - // behavior of as(1) (both -q and -Q: resp. LLVM and gas v1.38). - // If/when this changes, the AArch64 Darwin special case can go away. - UseLogicalShr = false; } diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 227c937e8d1b..c99ce7752b30 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -1,4 +1,4 @@ -//===- lib/MC/MCAsmStreamer.cpp - Text Assembly Output --------------------===// +//===- lib/MC/MCAsmStreamer.cpp - Text Assembly Output ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -29,9 +29,11 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include + using namespace llvm; namespace { @@ -78,6 +80,9 @@ public: } EmitCommentsAndEOL(); } + + void EmitSyntaxDirective() override; + void EmitCommentsAndEOL(); /// isVerboseAsm - Return true if this streamer supports verbose assembly at @@ -160,7 +165,7 @@ public: void EmitBytes(StringRef Data) override; void EmitValueImpl(const MCExpr *Value, unsigned Size, - const SMLoc &Loc = SMLoc()) override; + SMLoc Loc = SMLoc()) override; void EmitIntValue(uint64_t Value, unsigned Size) override; void EmitULEB128Value(const MCExpr *Value) override; @@ -181,7 +186,7 @@ public: void EmitCodeAlignment(unsigned ByteAlignment, unsigned MaxBytesToEmit = 0) override; - bool EmitValueToOffset(const MCExpr *Offset, + void emitValueToOffset(const MCExpr *Offset, unsigned char Value = 0) override; void EmitFileDirective(StringRef Filename) override; @@ -207,6 +212,8 @@ public: void EmitCFISameValue(int64_t Register) override; void EmitCFIRelOffset(int64_t Register, int64_t Offset) override; void EmitCFIAdjustCfaOffset(int64_t Adjustment) override; + void EmitCFIEscape(StringRef Values) override; + void EmitCFIGnuArgsSize(int64_t Size) override; void EmitCFISignalFrame() override; void EmitCFIUndefined(int64_t Register) override; void EmitCFIRegister(int64_t Register1, int64_t Register2) override; @@ -233,6 +240,9 @@ public: void EmitBundleLock(bool AlignToEnd) override; void EmitBundleUnlock() override; + bool EmitRelocDirective(const MCExpr &Offset, StringRef Name, + const MCExpr *Expr, SMLoc Loc) override; + /// EmitRawText - If this file is backed by an assembly streamer, this dumps /// the specified string in the output .s file. This capability is /// indicated by the hasRawTextSupport() predicate. @@ -250,15 +260,9 @@ public: void MCAsmStreamer::AddComment(const Twine &T) { if (!IsVerboseAsm) return; - // Make sure that CommentStream is flushed. - CommentStream.flush(); - T.toVector(CommentToEmit); // Each comment goes on its own line. CommentToEmit.push_back('\n'); - - // Tell the comment stream that the vector changed underneath it. - CommentStream.resync(); } void MCAsmStreamer::EmitCommentsAndEOL() { @@ -267,7 +271,6 @@ void MCAsmStreamer::EmitCommentsAndEOL() { return; } - CommentStream.flush(); StringRef Comments = CommentToEmit; assert(Comments.back() == '\n' && @@ -282,8 +285,6 @@ void MCAsmStreamer::EmitCommentsAndEOL() { } while (!Comments.empty()); CommentToEmit.clear(); - // Tell the comment stream that the vector changed underneath it. - CommentStream.resync(); } static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) { @@ -372,6 +373,8 @@ void MCAsmStreamer::EmitDataRegion(MCDataRegionType Kind) { void MCAsmStreamer::EmitVersionMin(MCVersionMinType Kind, unsigned Major, unsigned Minor, unsigned Update) { switch (Kind) { + case MCVM_WatchOSVersionMin: OS << "\t.watchos_version_min"; break; + case MCVM_TvOSVersionMin: OS << "\t.tvos_version_min"; break; case MCVM_IOSVersionMin: OS << "\t.ios_version_min"; break; case MCVM_OSXVersionMin: OS << "\t.macosx_version_min"; break; } @@ -480,6 +483,14 @@ void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { EmitEOL(); } +void MCAsmStreamer::EmitSyntaxDirective() { + if (MAI->getAssemblerDialect() == 1) + OS << "\t.intel_syntax noprefix\n"; + // FIXME: Currently emit unprefix'ed registers. + // The intel_syntax directive has one optional argument + // with may have a value of prefix or noprefix. +} + void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) { OS << "\t.def\t "; Symbol->print(OS, MAI); @@ -531,9 +542,6 @@ void MCAsmStreamer::emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) { void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { - // Common symbols do not belong to any actual section. - AssignSection(Symbol, nullptr); - OS << "\t.comm\t"; Symbol->print(OS, MAI); OS << ',' << Size; @@ -553,9 +561,6 @@ void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, /// @param Size - The size of the common symbol. void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlign) { - // Common symbols do not belong to any actual section. - AssignSection(Symbol, nullptr); - OS << "\t.lcomm\t"; Symbol->print(OS, MAI); OS << ',' << Size; @@ -579,7 +584,7 @@ void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, void MCAsmStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { if (Symbol) - AssignSection(Symbol, Section); + AssignFragment(Symbol, &Section->getDummyFragment()); // Note: a .zerofill directive does not switch sections. OS << ".zerofill "; @@ -603,7 +608,7 @@ void MCAsmStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, // e.g. _a. void MCAsmStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { - AssignSection(Symbol, Section); + AssignFragment(Symbol, &Section->getDummyFragment()); assert(Symbol && "Symbol shouldn't be NULL!"); // Instead of using the Section we'll just use the shortcut. @@ -654,7 +659,6 @@ static void PrintQuotedString(StringRef Data, raw_ostream &OS) { OS << '"'; } - void MCAsmStreamer::EmitBytes(StringRef Data) { assert(getCurrentSection().first && "Cannot emit contents before setting section!"); @@ -685,7 +689,7 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size) { } void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, - const SMLoc &Loc) { + SMLoc Loc) { assert(Size <= 8 && "Invalid size"); assert(getCurrentSection().first && "Cannot emit contents before setting section!"); @@ -776,7 +780,6 @@ void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) { EmitEOL(); } - /// EmitFill - Emit NumBytes bytes worth of the value specified by /// FillValue. This implements directives such as '.space'. void MCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue) { @@ -856,17 +859,15 @@ void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment, 1, MaxBytesToEmit); } -bool MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset, +void MCAsmStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value) { // FIXME: Verify that Offset is associated with the current section. OS << ".org "; Offset->print(OS, MAI); OS << ", " << (unsigned)Value; EmitEOL(); - return false; } - void MCAsmStreamer::EmitFileDirective(StringRef Filename) { assert(MAI->hasSingleParameterDotFile()); OS << "\t.file\t"; @@ -1014,6 +1015,32 @@ void MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) { EmitEOL(); } +static void PrintCFIEscape(llvm::formatted_raw_ostream &OS, StringRef Values) { + OS << "\t.cfi_escape "; + if (!Values.empty()) { + size_t e = Values.size() - 1; + for (size_t i = 0; i < e; ++i) + OS << format("0x%02x", uint8_t(Values[i])) << ", "; + OS << format("0x%02x", uint8_t(Values[e])); + } +} + +void MCAsmStreamer::EmitCFIEscape(StringRef Values) { + MCStreamer::EmitCFIEscape(Values); + PrintCFIEscape(OS, Values); + EmitEOL(); +} + +void MCAsmStreamer::EmitCFIGnuArgsSize(int64_t Size) { + MCStreamer::EmitCFIGnuArgsSize(Size); + + uint8_t Buffer[16] = { dwarf::DW_CFA_GNU_args_size }; + unsigned Len = encodeULEB128(Size, Buffer + 1) + 1; + + PrintCFIEscape(OS, StringRef((const char *)&Buffer[0], Len)); + EmitEOL(); +} + void MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) { MCStreamer::EmitCFIDefCfaRegister(Register); OS << "\t.cfi_def_cfa_register "; @@ -1203,7 +1230,7 @@ void MCAsmStreamer::EmitWinCFIPushFrame(bool Code) { EmitEOL(); } -void MCAsmStreamer::EmitWinCFIEndProlog(void) { +void MCAsmStreamer::EmitWinCFIEndProlog() { MCStreamer::EmitWinCFIEndProlog(); OS << "\t.seh_endprologue"; @@ -1217,7 +1244,6 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst, SmallVector Fixups; raw_svector_ostream VecOS(Code); Emitter->encodeInstruction(Inst, VecOS, Fixups, STI); - VecOS.flush(); // If we are showing fixups, create symbolic markers in the encoded // representation. We do this by making a per-bit map to the fixup item index, @@ -1334,6 +1360,19 @@ void MCAsmStreamer::EmitBundleUnlock() { EmitEOL(); } +bool MCAsmStreamer::EmitRelocDirective(const MCExpr &Offset, StringRef Name, + const MCExpr *Expr, SMLoc) { + OS << "\t.reloc "; + Offset.print(OS, MAI); + OS << ", " << Name; + if (Expr) { + OS << ", "; + Expr->print(OS, MAI); + } + EmitEOL(); + return false; +} + /// EmitRawText - If this file is backed by an assembly streamer, this dumps /// the specified string in the output .s file. This capability is /// indicated by the hasRawTextSupport() predicate. diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index f53b589e1aea..15e82fa49388 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -64,272 +64,11 @@ STATISTIC(RelaxedInstructions, "Number of relaxed instructions"); /* *** */ -MCAsmLayout::MCAsmLayout(MCAssembler &Asm) - : Assembler(Asm), LastValidFragment() - { - // Compute the section layout order. Virtual sections must go last. - for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) - if (!it->isVirtualSection()) - SectionOrder.push_back(&*it); - for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) - if (it->isVirtualSection()) - SectionOrder.push_back(&*it); -} - -bool MCAsmLayout::isFragmentValid(const MCFragment *F) const { - const MCSection *Sec = F->getParent(); - const MCFragment *LastValid = LastValidFragment.lookup(Sec); - if (!LastValid) - return false; - assert(LastValid->getParent() == Sec); - return F->getLayoutOrder() <= LastValid->getLayoutOrder(); -} - -void MCAsmLayout::invalidateFragmentsFrom(MCFragment *F) { - // If this fragment wasn't already valid, we don't need to do anything. - if (!isFragmentValid(F)) - return; - - // Otherwise, reset the last valid fragment to the previous fragment - // (if this is the first fragment, it will be NULL). - LastValidFragment[F->getParent()] = F->getPrevNode(); -} - -void MCAsmLayout::ensureValid(const MCFragment *F) const { - MCSection *Sec = F->getParent(); - MCFragment *Cur = LastValidFragment[Sec]; - if (!Cur) - Cur = Sec->begin(); - else - Cur = Cur->getNextNode(); - - // Advance the layout position until the fragment is valid. - while (!isFragmentValid(F)) { - assert(Cur && "Layout bookkeeping error"); - const_cast(this)->layoutFragment(Cur); - Cur = Cur->getNextNode(); - } -} - -uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const { - ensureValid(F); - assert(F->Offset != ~UINT64_C(0) && "Address not set!"); - return F->Offset; -} - -// Simple getSymbolOffset helper for the non-varibale case. -static bool getLabelOffset(const MCAsmLayout &Layout, const MCSymbol &S, - bool ReportError, uint64_t &Val) { - if (!S.getFragment()) { - if (ReportError) - report_fatal_error("unable to evaluate offset to undefined symbol '" + - S.getName() + "'"); - return false; - } - Val = Layout.getFragmentOffset(S.getFragment()) + S.getOffset(); - return true; -} - -static bool getSymbolOffsetImpl(const MCAsmLayout &Layout, const MCSymbol &S, - bool ReportError, uint64_t &Val) { - if (!S.isVariable()) - return getLabelOffset(Layout, S, ReportError, Val); - - // If SD is a variable, evaluate it. - MCValue Target; - if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr)) - report_fatal_error("unable to evaluate offset for variable '" + - S.getName() + "'"); - - uint64_t Offset = Target.getConstant(); - - const MCSymbolRefExpr *A = Target.getSymA(); - if (A) { - uint64_t ValA; - if (!getLabelOffset(Layout, A->getSymbol(), ReportError, ValA)) - return false; - Offset += ValA; - } - - const MCSymbolRefExpr *B = Target.getSymB(); - if (B) { - uint64_t ValB; - if (!getLabelOffset(Layout, B->getSymbol(), ReportError, ValB)) - return false; - Offset -= ValB; - } - - Val = Offset; - return true; -} - -bool MCAsmLayout::getSymbolOffset(const MCSymbol &S, uint64_t &Val) const { - return getSymbolOffsetImpl(*this, S, false, Val); -} - -uint64_t MCAsmLayout::getSymbolOffset(const MCSymbol &S) const { - uint64_t Val; - getSymbolOffsetImpl(*this, S, true, Val); - return Val; -} - -const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const { - if (!Symbol.isVariable()) - return &Symbol; - - const MCExpr *Expr = Symbol.getVariableValue(); - MCValue Value; - if (!Expr->evaluateAsValue(Value, *this)) - llvm_unreachable("Invalid Expression"); - - const MCSymbolRefExpr *RefB = Value.getSymB(); - if (RefB) - Assembler.getContext().reportFatalError( - SMLoc(), Twine("symbol '") + RefB->getSymbol().getName() + - "' could not be evaluated in a subtraction expression"); - - const MCSymbolRefExpr *A = Value.getSymA(); - if (!A) - return nullptr; - - const MCSymbol &ASym = A->getSymbol(); - const MCAssembler &Asm = getAssembler(); - if (ASym.isCommon()) { - // FIXME: we should probably add a SMLoc to MCExpr. - Asm.getContext().reportFatalError(SMLoc(), - "Common symbol " + ASym.getName() + - " cannot be used in assignment expr"); - } - - return &ASym; -} - -uint64_t MCAsmLayout::getSectionAddressSize(const MCSection *Sec) const { - // The size is the last fragment's end offset. - const MCFragment &F = Sec->getFragmentList().back(); - return getFragmentOffset(&F) + getAssembler().computeFragmentSize(*this, F); -} - -uint64_t MCAsmLayout::getSectionFileSize(const MCSection *Sec) const { - // Virtual sections have no file size. - if (Sec->isVirtualSection()) - return 0; - - // Otherwise, the file size is the same as the address space size. - return getSectionAddressSize(Sec); -} - -uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler, - const MCFragment *F, - uint64_t FOffset, uint64_t FSize) { - uint64_t BundleSize = Assembler.getBundleAlignSize(); - assert(BundleSize > 0 && - "computeBundlePadding should only be called if bundling is enabled"); - uint64_t BundleMask = BundleSize - 1; - uint64_t OffsetInBundle = FOffset & BundleMask; - uint64_t EndOfFragment = OffsetInBundle + FSize; - - // There are two kinds of bundling restrictions: - // - // 1) For alignToBundleEnd(), add padding to ensure that the fragment will - // *end* on a bundle boundary. - // 2) Otherwise, check if the fragment would cross a bundle boundary. If it - // would, add padding until the end of the bundle so that the fragment - // will start in a new one. - if (F->alignToBundleEnd()) { - // Three possibilities here: - // - // A) The fragment just happens to end at a bundle boundary, so we're good. - // B) The fragment ends before the current bundle boundary: pad it just - // enough to reach the boundary. - // C) The fragment ends after the current bundle boundary: pad it until it - // reaches the end of the next bundle boundary. - // - // Note: this code could be made shorter with some modulo trickery, but it's - // intentionally kept in its more explicit form for simplicity. - if (EndOfFragment == BundleSize) - return 0; - else if (EndOfFragment < BundleSize) - return BundleSize - EndOfFragment; - else { // EndOfFragment > BundleSize - return 2 * BundleSize - EndOfFragment; - } - } else if (OffsetInBundle > 0 && EndOfFragment > BundleSize) - return BundleSize - OffsetInBundle; - else - return 0; -} - -/* *** */ - -void ilist_node_traits::deleteNode(MCFragment *V) { - V->destroy(); -} - -MCFragment::MCFragment() : Kind(FragmentType(~0)), HasInstructions(false), - AlignToBundleEnd(false), BundlePadding(0) { -} - -MCFragment::~MCFragment() { } - -MCFragment::MCFragment(FragmentType Kind, bool HasInstructions, - uint8_t BundlePadding, MCSection *Parent) - : Kind(Kind), HasInstructions(HasInstructions), AlignToBundleEnd(false), - BundlePadding(BundlePadding), Parent(Parent), Atom(nullptr), - Offset(~UINT64_C(0)) { - if (Parent) - Parent->getFragmentList().push_back(this); -} - -void MCFragment::destroy() { - // First check if we are the sentinal. - if (Kind == FragmentType(~0)) { - delete this; - return; - } - - switch (Kind) { - case FT_Align: - delete cast(this); - return; - case FT_Data: - delete cast(this); - return; - case FT_CompactEncodedInst: - delete cast(this); - return; - case FT_Fill: - delete cast(this); - return; - case FT_Relaxable: - delete cast(this); - return; - case FT_Org: - delete cast(this); - return; - case FT_Dwarf: - delete cast(this); - return; - case FT_DwarfFrame: - delete cast(this); - return; - case FT_LEB: - delete cast(this); - return; - case FT_SafeSEH: - delete cast(this); - return; - } -} - -/* *** */ - MCAssembler::MCAssembler(MCContext &Context_, MCAsmBackend &Backend_, - MCCodeEmitter &Emitter_, MCObjectWriter &Writer_, - raw_ostream &OS_) + MCCodeEmitter &Emitter_, MCObjectWriter &Writer_) : Context(Context_), Backend(Backend_), Emitter(Emitter_), Writer(Writer_), - OS(OS_), BundleAlignSize(0), RelaxAll(false), - SubsectionsViaSymbols(false), ELFHeaderEFlags(0) { + BundleAlignSize(0), RelaxAll(false), SubsectionsViaSymbols(false), + IncrementalLinkerCompatible(false), ELFHeaderEFlags(0) { VersionMinInfo.Major = 0; // Major version == 0 for "none specified" } @@ -347,6 +86,7 @@ void MCAssembler::reset() { BundleAlignSize = 0; RelaxAll = false; SubsectionsViaSymbols = false; + IncrementalLinkerCompatible = false; ELFHeaderEFlags = 0; LOHContainer.reset(); VersionMinInfo.Major = 0; @@ -358,6 +98,14 @@ void MCAssembler::reset() { getLOHContainer().reset(); } +bool MCAssembler::registerSection(MCSection &Section) { + if (Section.isRegistered()) + return false; + Sections.push_back(&Section); + Section.setIsRegistered(true); + return true; +} + bool MCAssembler::isThumbFunc(const MCSymbol *Symbol) const { if (ThumbFuncs.count(Symbol)) return true; @@ -404,7 +152,7 @@ const MCSymbol *MCAssembler::getAtom(const MCSymbol &S) const { return &S; // Absolute and undefined symbols have no defining atom. - if (!S.getFragment()) + if (!S.isInSection()) return nullptr; // Non-linker visible symbols in sections which can't be atomized have no @@ -426,8 +174,13 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, // probably merge the two into a single callback that tries to evaluate a // fixup and records a relocation if one is needed. const MCExpr *Expr = Fixup.getValue(); - if (!Expr->evaluateAsRelocatable(Target, &Layout, &Fixup)) - getContext().reportFatalError(Fixup.getLoc(), "expected relocatable expression"); + if (!Expr->evaluateAsRelocatable(Target, &Layout, &Fixup)) { + getContext().reportError(Fixup.getLoc(), "expected relocatable expression"); + // Claim to have completely evaluated the fixup, to prevent any further + // processing from being done. + Value = 0; + return true; + } bool IsPCRel = Backend.getFixupKindInfo( Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel; @@ -523,12 +276,19 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, case MCFragment::FT_Org: { const MCOrgFragment &OF = cast(F); - int64_t TargetLocation; - if (!OF.getOffset().evaluateAsAbsolute(TargetLocation, Layout)) + MCValue Value; + if (!OF.getOffset().evaluateAsValue(Value, Layout)) report_fatal_error("expected assembly-time absolute expression"); // FIXME: We need a way to communicate this error. uint64_t FragmentOffset = Layout.getFragmentOffset(&OF); + int64_t TargetLocation = Value.getConstant(); + if (const MCSymbolRefExpr *A = Value.getSymA()) { + uint64_t Val; + if (!Layout.getSymbolOffset(A->getSymbol(), Val)) + report_fatal_error("expected absolute expression"); + TargetLocation += Val; + } int64_t Size = TargetLocation - FragmentOffset; if (Size < 0 || Size >= 0x40000000) report_fatal_error("invalid .org offset '" + Twine(TargetLocation) + @@ -540,6 +300,8 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, return cast(F).getContents().size(); case MCFragment::FT_DwarfFrame: return cast(F).getContents().size(); + case MCFragment::FT_Dummy: + llvm_unreachable("Should not have been added"); } llvm_unreachable("invalid fragment kind"); @@ -773,6 +535,8 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, OW->writeBytes(CF.getContents()); break; } + case MCFragment::FT_Dummy: + llvm_unreachable("Should not have been added"); } assert(OW->getStream().tell() - Start == FragmentSize && @@ -786,15 +550,14 @@ void MCAssembler::writeSectionData(const MCSection *Sec, assert(Layout.getSectionFileSize(Sec) == 0 && "Invalid size for section!"); // Check that contents are only things legal inside a virtual section. - for (MCSection::const_iterator it = Sec->begin(), ie = Sec->end(); it != ie; - ++it) { - switch (it->getKind()) { + for (const MCFragment &F : *Sec) { + switch (F.getKind()) { default: llvm_unreachable("Invalid fragment in virtual section!"); case MCFragment::FT_Data: { // Check that we aren't trying to write a non-zero contents (or fixups) // into a virtual section. This is to support clients which use standard // directives to fill the contents of virtual sections. - const MCDataFragment &DF = cast(*it); + const MCDataFragment &DF = cast(F); assert(DF.fixup_begin() == DF.fixup_end() && "Cannot have fixups in virtual section!"); for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i) @@ -810,13 +573,13 @@ void MCAssembler::writeSectionData(const MCSection *Sec, case MCFragment::FT_Align: // Check that we aren't trying to write a non-zero value into a virtual // section. - assert((cast(it)->getValueSize() == 0 || - cast(it)->getValue() == 0) && + assert((cast(F).getValueSize() == 0 || + cast(F).getValue() == 0) && "Invalid align in virtual section!"); break; case MCFragment::FT_Fill: - assert((cast(it)->getValueSize() == 0 || - cast(it)->getValue() == 0) && + assert((cast(F).getValueSize() == 0 || + cast(F).getValue() == 0) && "Invalid fill in virtual section!"); break; } @@ -828,9 +591,8 @@ void MCAssembler::writeSectionData(const MCSection *Sec, uint64_t Start = getWriter().getStream().tell(); (void)Start; - for (MCSection::const_iterator it = Sec->begin(), ie = Sec->end(); it != ie; - ++it) - writeFragment(*this, Layout, *it); + for (const MCFragment &F : *Sec) + writeFragment(*this, Layout, F); assert(getWriter().getStream().tell() - Start == Layout.getSectionAddressSize(Sec)); @@ -854,23 +616,20 @@ std::pair MCAssembler::handleFixup(const MCAsmLayout &Layout, return std::make_pair(FixedValue, IsPCRel); } -void MCAssembler::Finish() { +void MCAssembler::layout(MCAsmLayout &Layout) { DEBUG_WITH_TYPE("mc-dump", { llvm::errs() << "assembler backend - pre-layout\n--\n"; dump(); }); - // Create the layout object. - MCAsmLayout Layout(*this); - // Create dummy fragments and assign section ordinals. unsigned SectionIndex = 0; - for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { + for (MCSection &Sec : *this) { // Create dummy fragments to eliminate any empty sections, this simplifies // layout. - if (it->getFragmentList().empty()) - new MCDataFragment(&*it); + if (Sec.getFragmentList().empty()) + new MCDataFragment(&Sec); - it->setOrdinal(SectionIndex++); + Sec.setOrdinal(SectionIndex++); } // Assign layout order indices to sections and fragments. @@ -879,9 +638,8 @@ void MCAssembler::Finish() { Sec->setLayoutOrder(i); unsigned FragmentIndex = 0; - for (MCSection::iterator iFrag = Sec->begin(), iFragEnd = Sec->end(); - iFrag != iFragEnd; ++iFrag) - iFrag->setLayoutOrder(FragmentIndex++); + for (MCFragment &Frag : *Sec) + Frag.setLayoutOrder(FragmentIndex++); } // Layout until everything fits. @@ -899,17 +657,14 @@ void MCAssembler::Finish() { llvm::errs() << "assembler backend - final-layout\n--\n"; dump(); }); - uint64_t StartOffset = OS.tell(); - // Allow the object writer a chance to perform post-layout binding (for // example, to set the index fields in the symbol data). getWriter().executePostLayoutBinding(*this, Layout); // Evaluate and apply the fixups, generating relocation entries as necessary. - for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { - for (MCSection::iterator it2 = it->begin(), ie2 = it->end(); it2 != ie2; - ++it2) { - MCEncodedFragment *F = dyn_cast(it2); + for (MCSection &Sec : *this) { + for (MCFragment &Frag : Sec) { + MCEncodedFragment *F = dyn_cast(&Frag); // Data and relaxable fragments both have fixups. So only process // those here. // FIXME: Is there a better way to do this? MCEncodedFragmentWithFixups @@ -935,6 +690,15 @@ void MCAssembler::Finish() { } } } +} + +void MCAssembler::Finish() { + // Create the layout object. + MCAsmLayout Layout(*this); + layout(Layout); + + raw_ostream &OS = getWriter().getStream(); + uint64_t StartOffset = OS.tell(); // Write the object file. getWriter().writeObject(*this, Layout); @@ -960,9 +724,8 @@ bool MCAssembler::fragmentNeedsRelaxation(const MCRelaxableFragment *F, if (!getBackend().mayNeedRelaxation(F->getInst())) return false; - for (MCRelaxableFragment::const_fixup_iterator it = F->fixup_begin(), - ie = F->fixup_end(); it != ie; ++it) - if (fixupNeedsRelaxation(*it, F, Layout)) + for (const MCFixup &Fixup : F->getFixups()) + if (fixupNeedsRelaxation(Fixup, F, Layout)) return true; return false; @@ -991,7 +754,6 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout, SmallString<256> Code; raw_svector_ostream VecOS(Code); getEmitter().encodeInstruction(Relaxed, VecOS, Fixups, F.getSubtargetInfo()); - VecOS.flush(); // Update the fragment. F.setInst(Relaxed); @@ -1014,7 +776,6 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { encodeSLEB128(Value, OSE); else encodeULEB128(Value, OSE); - OSE.flush(); return OldSize != LF.getContents().size(); } @@ -1031,8 +792,8 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout, SmallString<8> &Data = DF.getContents(); Data.clear(); raw_svector_ostream OSE(Data); - MCDwarfLineAddr::Encode(Context, LineDelta, AddrDelta, OSE); - OSE.flush(); + MCDwarfLineAddr::Encode(Context, getDWARFLinetableParams(), LineDelta, + AddrDelta, OSE); return OldSize != Data.size(); } @@ -1048,7 +809,6 @@ bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout, Data.clear(); raw_svector_ostream OSE(Data); MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OSE); - OSE.flush(); return OldSize != Data.size(); } @@ -1085,7 +845,7 @@ bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout, MCSection &Sec) { break; } if (RelaxedFrag && !FirstRelaxedFragment) - FirstRelaxedFragment = I; + FirstRelaxedFragment = &*I; } if (FirstRelaxedFragment) { Layout.invalidateFragmentsFrom(FirstRelaxedFragment); @@ -1113,158 +873,3 @@ void MCAssembler::finishLayout(MCAsmLayout &Layout) { Layout.getFragmentOffset(&*Layout.getSectionOrder()[i]->rbegin()); } } - -// Debugging methods - -namespace llvm { - -raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) { - OS << ""; - return OS; -} - -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void MCFragment::dump() { - raw_ostream &OS = llvm::errs(); - - OS << "<"; - switch (getKind()) { - case MCFragment::FT_Align: OS << "MCAlignFragment"; break; - case MCFragment::FT_Data: OS << "MCDataFragment"; break; - case MCFragment::FT_CompactEncodedInst: - OS << "MCCompactEncodedInstFragment"; break; - case MCFragment::FT_Fill: OS << "MCFillFragment"; break; - case MCFragment::FT_Relaxable: OS << "MCRelaxableFragment"; break; - case MCFragment::FT_Org: OS << "MCOrgFragment"; break; - case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break; - case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break; - case MCFragment::FT_LEB: OS << "MCLEBFragment"; break; - case MCFragment::FT_SafeSEH: OS << "MCSafeSEHFragment"; break; - } - - OS << "(getBundlePadding()) << ">"; - - switch (getKind()) { - case MCFragment::FT_Align: { - const MCAlignFragment *AF = cast(this); - if (AF->hasEmitNops()) - OS << " (emit nops)"; - OS << "\n "; - OS << " Alignment:" << AF->getAlignment() - << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize() - << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">"; - break; - } - case MCFragment::FT_Data: { - const MCDataFragment *DF = cast(this); - OS << "\n "; - OS << " Contents:["; - const SmallVectorImpl &Contents = DF->getContents(); - for (unsigned i = 0, e = Contents.size(); i != e; ++i) { - if (i) OS << ","; - OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF); - } - OS << "] (" << Contents.size() << " bytes)"; - - if (DF->fixup_begin() != DF->fixup_end()) { - OS << ",\n "; - OS << " Fixups:["; - for (MCDataFragment::const_fixup_iterator it = DF->fixup_begin(), - ie = DF->fixup_end(); it != ie; ++it) { - if (it != DF->fixup_begin()) OS << ",\n "; - OS << *it; - } - OS << "]"; - } - break; - } - case MCFragment::FT_CompactEncodedInst: { - const MCCompactEncodedInstFragment *CEIF = - cast(this); - OS << "\n "; - OS << " Contents:["; - const SmallVectorImpl &Contents = CEIF->getContents(); - for (unsigned i = 0, e = Contents.size(); i != e; ++i) { - if (i) OS << ","; - OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF); - } - OS << "] (" << Contents.size() << " bytes)"; - break; - } - case MCFragment::FT_Fill: { - const MCFillFragment *FF = cast(this); - OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize() - << " Size:" << FF->getSize(); - break; - } - case MCFragment::FT_Relaxable: { - const MCRelaxableFragment *F = cast(this); - OS << "\n "; - OS << " Inst:"; - F->getInst().dump_pretty(OS); - break; - } - case MCFragment::FT_Org: { - const MCOrgFragment *OF = cast(this); - OS << "\n "; - OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue(); - break; - } - case MCFragment::FT_Dwarf: { - const MCDwarfLineAddrFragment *OF = cast(this); - OS << "\n "; - OS << " AddrDelta:" << OF->getAddrDelta() - << " LineDelta:" << OF->getLineDelta(); - break; - } - case MCFragment::FT_DwarfFrame: { - const MCDwarfCallFrameFragment *CF = cast(this); - OS << "\n "; - OS << " AddrDelta:" << CF->getAddrDelta(); - break; - } - case MCFragment::FT_LEB: { - const MCLEBFragment *LF = cast(this); - OS << "\n "; - OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned(); - break; - } - case MCFragment::FT_SafeSEH: { - const MCSafeSEHFragment *F = cast(this); - OS << "\n "; - OS << " Sym:" << F->getSymbol(); - break; - } - } - OS << ">"; -} - -void MCAssembler::dump() { - raw_ostream &OS = llvm::errs(); - - OS << "dump(); - } - OS << "],\n"; - OS << " Symbols:["; - - for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) { - if (it != symbol_begin()) OS << ",\n "; - OS << "("; - it->dump(); - OS << ", Index:" << it->getIndex() << ", "; - OS << ")"; - } - OS << "]>\n"; -} -#endif diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index a85796cfbad9..b5ad518d0330 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCSymbolCOFF.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCSymbolMachO.h" +#include "llvm/Support/COFF.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" @@ -41,7 +42,7 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri, CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), DwarfLocSeen(false), GenDwarfForAssembly(false), GenDwarfFileNumber(0), DwarfVersion(4), AllowTemporaryLabels(true), DwarfCompileUnitID(0), - AutoReset(DoAutoReset) { + AutoReset(DoAutoReset), HadError(false) { std::error_code EC = llvm::sys::fs::current_path(CompilationDir); if (EC) @@ -62,9 +63,6 @@ MCContext::~MCContext() { // NOTE: The symbols are all allocated out of a bump pointer allocator, // we don't need to free them here. - - // If the stream for the .secure_log_unique directive was created free it. - delete (raw_ostream *)SecureLog; } //===----------------------------------------------------------------------===// @@ -73,13 +71,11 @@ MCContext::~MCContext() { void MCContext::reset() { // Call the destructors so the fragments are freed - for (auto &I : ELFUniquingMap) - I.second->~MCSectionELF(); - for (auto &I : COFFUniquingMap) - I.second->~MCSectionCOFF(); - for (auto &I : MachOUniquingMap) - I.second->~MCSectionMachO(); + COFFAllocator.DestroyAll(); + ELFAllocator.DestroyAll(); + MachOAllocator.DestroyAll(); + MCSubtargetAllocator.DestroyAll(); UsedNames.clear(); Symbols.clear(); SectionSymbols.clear(); @@ -103,6 +99,8 @@ void MCContext::reset() { DwarfLocSeen = false; GenDwarfForAssembly = false; GenDwarfFileNumber = 0; + + HadError = false; } //===----------------------------------------------------------------------===// @@ -294,8 +292,8 @@ MCSectionMachO *MCContext::getMachOSection(StringRef Segment, StringRef Section, Begin = createTempSymbol(BeginSymName, false); // Otherwise, return a new section. - return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes, - Reserved2, Kind, Begin); + return Entry = new (MachOAllocator.Allocate()) MCSectionMachO( + Segment, Section, TypeAndAttributes, Reserved2, Kind, Begin); } void MCContext::renameELFSection(MCSectionELF *Section, StringRef Name) { @@ -322,7 +320,7 @@ MCSectionELF *MCContext::createELFRelSection(StringRef Name, unsigned Type, bool Inserted; std::tie(I, Inserted) = ELFRelSecNames.insert(std::make_pair(Name, true)); - return new (*this) + return new (ELFAllocator.Allocate()) MCSectionELF(I->getKey(), Type, Flags, SectionKind::getReadOnly(), EntrySize, Group, true, nullptr, Associated); } @@ -367,15 +365,15 @@ MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type, if (BeginSymName) Begin = createTempSymbol(BeginSymName, false); - MCSectionELF *Result = - new (*this) MCSectionELF(CachedName, Type, Flags, Kind, EntrySize, - GroupSym, UniqueID, Begin, Associated); + MCSectionELF *Result = new (ELFAllocator.Allocate()) + MCSectionELF(CachedName, Type, Flags, Kind, EntrySize, GroupSym, UniqueID, + Begin, Associated); Entry.second = Result; return Result; } MCSectionELF *MCContext::createELFGroupSection(const MCSymbolELF *Group) { - MCSectionELF *Result = new (*this) + MCSectionELF *Result = new (ELFAllocator.Allocate()) MCSectionELF(".group", ELF::SHT_GROUP, 0, SectionKind::getReadOnly(), 4, Group, ~0, nullptr, nullptr); return Result; @@ -404,7 +402,7 @@ MCSectionCOFF *MCContext::getCOFFSection(StringRef Section, Begin = createTempSymbol(BeginSymName, false); StringRef CachedName = Iter->first.SectionName; - MCSectionCOFF *Result = new (*this) MCSectionCOFF( + MCSectionCOFF *Result = new (COFFAllocator.Allocate()) MCSectionCOFF( CachedName, Characteristics, COMDATSymbol, Selection, Kind, Begin); Iter->second = Result; @@ -441,6 +439,10 @@ MCSectionCOFF *MCContext::getAssociativeCOFFSection(MCSectionCOFF *Sec, COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE); } +MCSubtargetInfo &MCContext::getSubtargetCopy(const MCSubtargetInfo &STI) { + return *new (MCSubtargetAllocator.Allocate()) MCSubtargetInfo(STI); +} + //===----------------------------------------------------------------------===// // Dwarf Management //===----------------------------------------------------------------------===// @@ -472,14 +474,24 @@ void MCContext::finalizeDwarfSections(MCStreamer &MCOS) { [&](MCSection *Sec) { return !MCOS.mayHaveInstructions(*Sec); }); } -void MCContext::reportFatalError(SMLoc Loc, const Twine &Msg) const { - // If we have a source manager and a location, use it. Otherwise just - // use the generic report_fatal_error(). - if (!SrcMgr || Loc == SMLoc()) +//===----------------------------------------------------------------------===// +// Error Reporting +//===----------------------------------------------------------------------===// + +void MCContext::reportError(SMLoc Loc, const Twine &Msg) { + HadError = true; + + // If we have a source manager use it. Otherwise just use the generic + // report_fatal_error(). + if (!SrcMgr) report_fatal_error(Msg, false); // Use the source manager to print the message. SrcMgr->PrintMessage(Loc, SourceMgr::DK_Error, Msg); +} + +void MCContext::reportFatalError(SMLoc Loc, const Twine &Msg) { + reportError(Loc, Msg); // If we reached here, we are failing ungracefully. Run the interrupt handlers // to make sure any special cleanups get done, in particular that we remove diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index 716d76a79fe3..82063fb74696 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -125,7 +125,6 @@ void LLVMDisasmDispose(LLVMDisasmContextRef DCR){ static void emitComments(LLVMDisasmContext *DC, formatted_raw_ostream &FormattedOS) { // Flush the stream before taking its content. - DC->CommentStream.flush(); StringRef Comments = DC->CommentsToEmit.str(); // Get the default information for printing a comment. const MCAsmInfo *MAI = DC->getAsmInfo(); @@ -147,7 +146,6 @@ static void emitComments(LLVMDisasmContext *DC, // Tell the comment stream that the vector changed underneath it. DC->CommentsToEmit.clear(); - DC->CommentStream.resync(); } /// \brief Gets latency information for \p Inst from the itinerary @@ -261,7 +259,6 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes, return 0; case MCDisassembler::Success: { - Annotations.flush(); StringRef AnnotationsStr = Annotations.str(); SmallVector InsnStr; @@ -273,7 +270,6 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes, emitLatency(DC, Inst); emitComments(DC, FormattedOS); - OS.flush(); assert(OutStringSize != 0 && "Output buffer cannot be zero size"); size_t OutputSize = std::min(OutStringSize-1, InsnStr.size()); diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index c84c4865f51e..a99ac4eca59e 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -27,27 +27,9 @@ #include "llvm/Support/Path.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; -// Given a special op, return the address skip amount (in units of -// DWARF2_LINE_MIN_INSN_LENGTH. -#define SPECIAL_ADDR(op) (((op) - DWARF2_LINE_OPCODE_BASE)/DWARF2_LINE_RANGE) - -// The maximum address skip amount that can be encoded with a special op. -#define MAX_SPECIAL_ADDR_DELTA SPECIAL_ADDR(255) - -// First special line opcode - leave room for the standard opcodes. -// Note: If you want to change this, you'll have to update the -// "standard_opcode_lengths" table that is emitted in DwarfFileTable::Emit(). -#define DWARF2_LINE_OPCODE_BASE 13 - -// Minimum line offset in a special line info. opcode. This value -// was chosen to give a reasonable range of values. -#define DWARF2_LINE_BASE -5 - -// Range of line offsets in a special line info. opcode. -#define DWARF2_LINE_RANGE 14 - static inline uint64_t ScaleAddrDelta(MCContext &Context, uint64_t AddrDelta) { unsigned MinInsnLength = Context.getAsmInfo()->getMinInstAlignment(); if (MinInsnLength == 1) @@ -197,7 +179,8 @@ EmitDwarfLineTable(MCObjectStreamer *MCOS, MCSection *Section, // // This emits the Dwarf file and the line tables. // -void MCDwarfLineTable::Emit(MCObjectStreamer *MCOS) { +void MCDwarfLineTable::Emit(MCObjectStreamer *MCOS, + MCDwarfLineTableParams Params) { MCContext &context = MCOS->getContext(); auto &LineTables = context.getMCDwarfLineTables(); @@ -212,14 +195,17 @@ void MCDwarfLineTable::Emit(MCObjectStreamer *MCOS) { // Handle the rest of the Compile Units. for (const auto &CUIDTablePair : LineTables) - CUIDTablePair.second.EmitCU(MCOS); + CUIDTablePair.second.EmitCU(MCOS, Params); } -void MCDwarfDwoLineTable::Emit(MCStreamer &MCOS) const { - MCOS.EmitLabel(Header.Emit(&MCOS, None).second); +void MCDwarfDwoLineTable::Emit(MCStreamer &MCOS, + MCDwarfLineTableParams Params) const { + MCOS.EmitLabel(Header.Emit(&MCOS, Params, None).second); } -std::pair MCDwarfLineTableHeader::Emit(MCStreamer *MCOS) const { +std::pair +MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, + MCDwarfLineTableParams Params) const { static const char StandardOpcodeLengths[] = { 0, // length of DW_LNS_copy 1, // length of DW_LNS_advance_pc @@ -234,9 +220,10 @@ std::pair MCDwarfLineTableHeader::Emit(MCStreamer *MCOS) 0, // length of DW_LNS_set_epilogue_begin 1 // DW_LNS_set_isa }; - assert(array_lengthof(StandardOpcodeLengths) == - (DWARF2_LINE_OPCODE_BASE - 1)); - return Emit(MCOS, StandardOpcodeLengths); + assert(array_lengthof(StandardOpcodeLengths) >= + (Params.DWARF2LineOpcodeBase - 1U)); + return Emit(MCOS, Params, makeArrayRef(StandardOpcodeLengths, + Params.DWARF2LineOpcodeBase - 1)); } static const MCExpr *forceExpAbs(MCStreamer &OS, const MCExpr* Expr) { @@ -256,9 +243,8 @@ static void emitAbsValue(MCStreamer &OS, const MCExpr *Value, unsigned Size) { } std::pair -MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, +MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params, ArrayRef StandardOpcodeLengths) const { - MCContext &context = MCOS->getContext(); // Create a symbol at the beginning of the line table. @@ -293,8 +279,8 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, // Parameters of the state machine, are next. MCOS->EmitIntValue(context.getAsmInfo()->getMinInstAlignment(), 1); MCOS->EmitIntValue(DWARF2_LINE_DEFAULT_IS_STMT, 1); - MCOS->EmitIntValue(DWARF2_LINE_BASE, 1); - MCOS->EmitIntValue(DWARF2_LINE_RANGE, 1); + MCOS->EmitIntValue(Params.DWARF2LineBase, 1); + MCOS->EmitIntValue(Params.DWARF2LineRange, 1); MCOS->EmitIntValue(StandardOpcodeLengths.size() + 1, 1); // Standard opcode lengths @@ -329,8 +315,9 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, return std::make_pair(LineStartSym, LineEndSym); } -void MCDwarfLineTable::EmitCU(MCObjectStreamer *MCOS) const { - MCSymbol *LineEndSym = Header.Emit(MCOS).second; +void MCDwarfLineTable::EmitCU(MCObjectStreamer *MCOS, + MCDwarfLineTableParams Params) const { + MCSymbol *LineEndSym = Header.Emit(MCOS, Params).second; // Put out the line tables. for (const auto &LineSec : MCLineSections.getMCLineEntries()) @@ -416,21 +403,31 @@ unsigned MCDwarfLineTableHeader::getFile(StringRef &Directory, } /// Utility function to emit the encoding to a streamer. -void MCDwarfLineAddr::Emit(MCStreamer *MCOS, int64_t LineDelta, - uint64_t AddrDelta) { +void MCDwarfLineAddr::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params, + int64_t LineDelta, uint64_t AddrDelta) { MCContext &Context = MCOS->getContext(); SmallString<256> Tmp; raw_svector_ostream OS(Tmp); - MCDwarfLineAddr::Encode(Context, LineDelta, AddrDelta, OS); + MCDwarfLineAddr::Encode(Context, Params, LineDelta, AddrDelta, OS); MCOS->EmitBytes(OS.str()); } +/// Given a special op, return the address skip amount (in units of +/// DWARF2_LINE_MIN_INSN_LENGTH). +static uint64_t SpecialAddr(MCDwarfLineTableParams Params, uint64_t op) { + return (op - Params.DWARF2LineOpcodeBase) / Params.DWARF2LineRange; +} + /// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas. -void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta, - uint64_t AddrDelta, raw_ostream &OS) { +void MCDwarfLineAddr::Encode(MCContext &Context, MCDwarfLineTableParams Params, + int64_t LineDelta, uint64_t AddrDelta, + raw_ostream &OS) { uint64_t Temp, Opcode; bool NeedCopy = false; + // The maximum address skip amount that can be encoded with a special op. + uint64_t MaxSpecialAddrDelta = SpecialAddr(Params, 255); + // Scale the address delta by the minimum instruction length. AddrDelta = ScaleAddrDelta(Context, AddrDelta); @@ -438,7 +435,7 @@ void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta, // DW_LNE_end_sequence. We cannot use special opcodes here, since we want the // end_sequence to emit the matrix entry. if (LineDelta == INT64_MAX) { - if (AddrDelta == MAX_SPECIAL_ADDR_DELTA) + if (AddrDelta == MaxSpecialAddrDelta) OS << char(dwarf::DW_LNS_const_add_pc); else if (AddrDelta) { OS << char(dwarf::DW_LNS_advance_pc); @@ -451,16 +448,16 @@ void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta, } // Bias the line delta by the base. - Temp = LineDelta - DWARF2_LINE_BASE; + Temp = LineDelta - Params.DWARF2LineBase; // If the line increment is out of range of a special opcode, we must encode // it with DW_LNS_advance_line. - if (Temp >= DWARF2_LINE_RANGE) { + if (Temp >= Params.DWARF2LineRange) { OS << char(dwarf::DW_LNS_advance_line); encodeSLEB128(LineDelta, OS); LineDelta = 0; - Temp = 0 - DWARF2_LINE_BASE; + Temp = 0 - Params.DWARF2LineBase; NeedCopy = true; } @@ -471,19 +468,19 @@ void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta, } // Bias the opcode by the special opcode base. - Temp += DWARF2_LINE_OPCODE_BASE; + Temp += Params.DWARF2LineOpcodeBase; // Avoid overflow when addr_delta is large. - if (AddrDelta < 256 + MAX_SPECIAL_ADDR_DELTA) { + if (AddrDelta < 256 + MaxSpecialAddrDelta) { // Try using a special opcode. - Opcode = Temp + AddrDelta * DWARF2_LINE_RANGE; + Opcode = Temp + AddrDelta * Params.DWARF2LineRange; if (Opcode <= 255) { OS << char(Opcode); return; } // Try using DW_LNS_const_add_pc followed by special op. - Opcode = Temp + (AddrDelta - MAX_SPECIAL_ADDR_DELTA) * DWARF2_LINE_RANGE; + Opcode = Temp + (AddrDelta - MaxSpecialAddrDelta) * Params.DWARF2LineRange; if (Opcode <= 255) { OS << char(dwarf::DW_LNS_const_add_pc); OS << char(Opcode); @@ -517,10 +514,14 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) { MCOS->EmitULEB128IntValue(1); MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit); MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1); - EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4); - if (MCOS->getContext().getGenDwarfSectionSyms().size() > 1 && - MCOS->getContext().getDwarfVersion() >= 3) { - EmitAbbrev(MCOS, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4); + EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, + context.getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4); + if (context.getGenDwarfSectionSyms().size() > 1 && + context.getDwarfVersion() >= 3) { + EmitAbbrev(MCOS, dwarf::DW_AT_ranges, + context.getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4); } else { EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr); EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr); @@ -845,7 +846,7 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) { LineSectionSymbol = MCOS->getDwarfLineTableSymbol(0); MCSymbol *AbbrevSectionSymbol = nullptr; MCSymbol *InfoSectionSymbol = nullptr; - MCSymbol *RangesSectionSymbol = NULL; + MCSymbol *RangesSectionSymbol = nullptr; // Create end symbols for each section, and remove empty sections MCOS->getContext().finalizeDwarfSections(*MCOS); @@ -998,38 +999,29 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol, } namespace { - class FrameEmitterImpl { - int CFAOffset; - int InitialCFAOffset; - bool IsEH; - const MCSymbol *SectionStart; - public: - FrameEmitterImpl(bool isEH) - : CFAOffset(0), InitialCFAOffset(0), IsEH(isEH), SectionStart(nullptr) { - } +class FrameEmitterImpl { + int CFAOffset = 0; + int InitialCFAOffset = 0; + bool IsEH; + MCObjectStreamer &Streamer; - void setSectionStart(const MCSymbol *Label) { SectionStart = Label; } +public: + FrameEmitterImpl(bool IsEH, MCObjectStreamer &Streamer) + : IsEH(IsEH), Streamer(Streamer) {} - /// Emit the unwind information in a compact way. - void EmitCompactUnwind(MCObjectStreamer &streamer, - const MCDwarfFrameInfo &frame); + /// Emit the unwind information in a compact way. + void EmitCompactUnwind(const MCDwarfFrameInfo &frame); - const MCSymbol &EmitCIE(MCObjectStreamer &streamer, - const MCSymbol *personality, - unsigned personalityEncoding, - const MCSymbol *lsda, - bool IsSignalFrame, - unsigned lsdaEncoding, - bool IsSimple); - MCSymbol *EmitFDE(MCObjectStreamer &streamer, - const MCSymbol &cieStart, - const MCDwarfFrameInfo &frame); - void EmitCFIInstructions(MCObjectStreamer &streamer, - ArrayRef Instrs, - MCSymbol *BaseLabel); - void EmitCFIInstruction(MCObjectStreamer &Streamer, - const MCCFIInstruction &Instr); - }; + const MCSymbol &EmitCIE(const MCSymbol *personality, + unsigned personalityEncoding, const MCSymbol *lsda, + bool IsSignalFrame, unsigned lsdaEncoding, + bool IsSimple); + void EmitFDE(const MCSymbol &cieStart, const MCDwarfFrameInfo &frame, + bool LastInSection, const MCSymbol &SectionStart); + void EmitCFIInstructions(ArrayRef Instrs, + MCSymbol *BaseLabel); + void EmitCFIInstruction(const MCCFIInstruction &Instr); +}; } // end anonymous namespace @@ -1037,8 +1029,7 @@ static void emitEncodingByte(MCObjectStreamer &Streamer, unsigned Encoding) { Streamer.EmitIntValue(Encoding, 1); } -void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer, - const MCCFIInstruction &Instr) { +void FrameEmitterImpl::EmitCFIInstruction(const MCCFIInstruction &Instr) { int dataAlignmentFactor = getDataAlignmentFactor(Streamer); auto *MRI = Streamer.getContext().getRegisterInfo(); @@ -1150,6 +1141,11 @@ void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer, Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1); return; } + case MCCFIInstruction::OpGnuArgsSize: { + Streamer.EmitIntValue(dwarf::DW_CFA_GNU_args_size, 1); + Streamer.EmitULEB128IntValue(Instr.getOffset()); + return; + } case MCCFIInstruction::OpEscape: Streamer.EmitBytes(Instr.getValues()); return; @@ -1158,8 +1154,7 @@ void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer, } /// Emit frame instructions to describe the layout of the frame. -void FrameEmitterImpl::EmitCFIInstructions(MCObjectStreamer &streamer, - ArrayRef Instrs, +void FrameEmitterImpl::EmitCFIInstructions(ArrayRef Instrs, MCSymbol *BaseLabel) { for (unsigned i = 0, N = Instrs.size(); i < N; ++i) { const MCCFIInstruction &Instr = Instrs[i]; @@ -1171,18 +1166,17 @@ void FrameEmitterImpl::EmitCFIInstructions(MCObjectStreamer &streamer, if (BaseLabel && Label) { MCSymbol *ThisSym = Label; if (ThisSym != BaseLabel) { - streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym); + Streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym); BaseLabel = ThisSym; } } - EmitCFIInstruction(streamer, Instr); + EmitCFIInstruction(Instr); } } /// Emit the unwind information in a compact way. -void FrameEmitterImpl::EmitCompactUnwind(MCObjectStreamer &Streamer, - const MCDwarfFrameInfo &Frame) { +void FrameEmitterImpl::EmitCompactUnwind(const MCDwarfFrameInfo &Frame) { MCContext &Context = Streamer.getContext(); const MCObjectFileInfo *MOFI = Context.getObjectFileInfo(); @@ -1254,39 +1248,39 @@ static unsigned getCIEVersion(bool IsEH, unsigned DwarfVersion) { case 3: return 3; case 4: + case 5: return 4; } llvm_unreachable("Unknown version"); } -const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer, - const MCSymbol *personality, +const MCSymbol &FrameEmitterImpl::EmitCIE(const MCSymbol *personality, unsigned personalityEncoding, const MCSymbol *lsda, bool IsSignalFrame, unsigned lsdaEncoding, bool IsSimple) { - MCContext &context = streamer.getContext(); + MCContext &context = Streamer.getContext(); const MCRegisterInfo *MRI = context.getRegisterInfo(); const MCObjectFileInfo *MOFI = context.getObjectFileInfo(); MCSymbol *sectionStart = context.createTempSymbol(); - streamer.EmitLabel(sectionStart); + Streamer.EmitLabel(sectionStart); MCSymbol *sectionEnd = context.createTempSymbol(); // Length - const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart, - *sectionEnd, 4); - emitAbsValue(streamer, Length, 4); + const MCExpr *Length = + MakeStartMinusEndExpr(Streamer, *sectionStart, *sectionEnd, 4); + emitAbsValue(Streamer, Length, 4); // CIE ID unsigned CIE_ID = IsEH ? 0 : -1; - streamer.EmitIntValue(CIE_ID, 4); + Streamer.EmitIntValue(CIE_ID, 4); // Version uint8_t CIEVersion = getCIEVersion(IsEH, context.getDwarfVersion()); - streamer.EmitIntValue(CIEVersion, 1); + Streamer.EmitIntValue(CIEVersion, 1); // Augmentation String SmallString<8> Augmentation; @@ -1299,31 +1293,31 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer, Augmentation += "R"; if (IsSignalFrame) Augmentation += "S"; - streamer.EmitBytes(Augmentation); + Streamer.EmitBytes(Augmentation); } - streamer.EmitIntValue(0, 1); + Streamer.EmitIntValue(0, 1); if (CIEVersion >= 4) { // Address Size - streamer.EmitIntValue(context.getAsmInfo()->getPointerSize(), 1); + Streamer.EmitIntValue(context.getAsmInfo()->getPointerSize(), 1); // Segment Descriptor Size - streamer.EmitIntValue(0, 1); + Streamer.EmitIntValue(0, 1); } // Code Alignment Factor - streamer.EmitULEB128IntValue(context.getAsmInfo()->getMinInstAlignment()); + Streamer.EmitULEB128IntValue(context.getAsmInfo()->getMinInstAlignment()); // Data Alignment Factor - streamer.EmitSLEB128IntValue(getDataAlignmentFactor(streamer)); + Streamer.EmitSLEB128IntValue(getDataAlignmentFactor(Streamer)); // Return Address Register if (CIEVersion == 1) { assert(MRI->getRARegister() <= 255 && "DWARF 2 encodes return_address_register in one byte"); - streamer.EmitIntValue(MRI->getDwarfRegNum(MRI->getRARegister(), IsEH), 1); + Streamer.EmitIntValue(MRI->getDwarfRegNum(MRI->getRARegister(), IsEH), 1); } else { - streamer.EmitULEB128IntValue( + Streamer.EmitULEB128IntValue( MRI->getDwarfRegNum(MRI->getRARegister(), IsEH)); } @@ -1335,28 +1329,28 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer, // Personality Encoding augmentationLength += 1; // Personality - augmentationLength += getSizeForEncoding(streamer, personalityEncoding); + augmentationLength += getSizeForEncoding(Streamer, personalityEncoding); } if (lsda) augmentationLength += 1; // Encoding of the FDE pointers augmentationLength += 1; - streamer.EmitULEB128IntValue(augmentationLength); + Streamer.EmitULEB128IntValue(augmentationLength); // Augmentation Data (optional) if (personality) { // Personality Encoding - emitEncodingByte(streamer, personalityEncoding); + emitEncodingByte(Streamer, personalityEncoding); // Personality - EmitPersonality(streamer, *personality, personalityEncoding); + EmitPersonality(Streamer, *personality, personalityEncoding); } if (lsda) - emitEncodingByte(streamer, lsdaEncoding); + emitEncodingByte(Streamer, lsdaEncoding); // Encoding of the FDE pointers - emitEncodingByte(streamer, MOFI->getFDEEncoding()); + emitEncodingByte(Streamer, MOFI->getFDEEncoding()); } // Initial Instructions @@ -1365,22 +1359,23 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer, if (!IsSimple) { const std::vector &Instructions = MAI->getInitialFrameState(); - EmitCFIInstructions(streamer, Instructions, nullptr); + EmitCFIInstructions(Instructions, nullptr); } InitialCFAOffset = CFAOffset; // Padding - streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getPointerSize()); + Streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getPointerSize()); - streamer.EmitLabel(sectionEnd); + Streamer.EmitLabel(sectionEnd); return *sectionStart; } -MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer, - const MCSymbol &cieStart, - const MCDwarfFrameInfo &frame) { - MCContext &context = streamer.getContext(); +void FrameEmitterImpl::EmitFDE(const MCSymbol &cieStart, + const MCDwarfFrameInfo &frame, + bool LastInSection, + const MCSymbol &SectionStart) { + MCContext &context = Streamer.getContext(); MCSymbol *fdeStart = context.createTempSymbol(); MCSymbol *fdeEnd = context.createTempSymbol(); const MCObjectFileInfo *MOFI = context.getObjectFileInfo(); @@ -1388,107 +1383,103 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer, CFAOffset = InitialCFAOffset; // Length - const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0); - emitAbsValue(streamer, Length, 4); + const MCExpr *Length = MakeStartMinusEndExpr(Streamer, *fdeStart, *fdeEnd, 0); + emitAbsValue(Streamer, Length, 4); - streamer.EmitLabel(fdeStart); + Streamer.EmitLabel(fdeStart); // CIE Pointer const MCAsmInfo *asmInfo = context.getAsmInfo(); if (IsEH) { - const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart, - 0); - emitAbsValue(streamer, offset, 4); + const MCExpr *offset = + MakeStartMinusEndExpr(Streamer, cieStart, *fdeStart, 0); + emitAbsValue(Streamer, offset, 4); } else if (!asmInfo->doesDwarfUseRelocationsAcrossSections()) { - const MCExpr *offset = MakeStartMinusEndExpr(streamer, *SectionStart, - cieStart, 0); - emitAbsValue(streamer, offset, 4); + const MCExpr *offset = + MakeStartMinusEndExpr(Streamer, SectionStart, cieStart, 0); + emitAbsValue(Streamer, offset, 4); } else { - streamer.EmitSymbolValue(&cieStart, 4); + Streamer.EmitSymbolValue(&cieStart, 4); } // PC Begin unsigned PCEncoding = IsEH ? MOFI->getFDEEncoding() : (unsigned)dwarf::DW_EH_PE_absptr; - unsigned PCSize = getSizeForEncoding(streamer, PCEncoding); - emitFDESymbol(streamer, *frame.Begin, PCEncoding, IsEH); + unsigned PCSize = getSizeForEncoding(Streamer, PCEncoding); + emitFDESymbol(Streamer, *frame.Begin, PCEncoding, IsEH); // PC Range - const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin, - *frame.End, 0); - emitAbsValue(streamer, Range, PCSize); + const MCExpr *Range = + MakeStartMinusEndExpr(Streamer, *frame.Begin, *frame.End, 0); + emitAbsValue(Streamer, Range, PCSize); if (IsEH) { // Augmentation Data Length unsigned augmentationLength = 0; if (frame.Lsda) - augmentationLength += getSizeForEncoding(streamer, frame.LsdaEncoding); + augmentationLength += getSizeForEncoding(Streamer, frame.LsdaEncoding); - streamer.EmitULEB128IntValue(augmentationLength); + Streamer.EmitULEB128IntValue(augmentationLength); // Augmentation Data if (frame.Lsda) - emitFDESymbol(streamer, *frame.Lsda, frame.LsdaEncoding, true); + emitFDESymbol(Streamer, *frame.Lsda, frame.LsdaEncoding, true); } // Call Frame Instructions - EmitCFIInstructions(streamer, frame.Instructions, frame.Begin); + EmitCFIInstructions(frame.Instructions, frame.Begin); // Padding - streamer.EmitValueToAlignment(PCSize); + // The size of a .eh_frame section has to be a multiple of the alignment + // since a null CIE is interpreted as the end. Old systems overaligned + // .eh_frame, so we do too and account for it in the last FDE. + unsigned Align = LastInSection ? asmInfo->getPointerSize() : PCSize; + Streamer.EmitValueToAlignment(Align); - return fdeEnd; + Streamer.EmitLabel(fdeEnd); } namespace { - struct CIEKey { - static const CIEKey getEmptyKey() { - return CIEKey(nullptr, 0, -1, false, false); - } - static const CIEKey getTombstoneKey() { - return CIEKey(nullptr, -1, 0, false, false); - } +struct CIEKey { + static const CIEKey getEmptyKey() { + return CIEKey(nullptr, 0, -1, false, false); + } + static const CIEKey getTombstoneKey() { + return CIEKey(nullptr, -1, 0, false, false); + } - CIEKey(const MCSymbol *Personality_, unsigned PersonalityEncoding_, - unsigned LsdaEncoding_, bool IsSignalFrame_, bool IsSimple_) - : Personality(Personality_), PersonalityEncoding(PersonalityEncoding_), - LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_), - IsSimple(IsSimple_) {} - const MCSymbol *Personality; - unsigned PersonalityEncoding; - unsigned LsdaEncoding; - bool IsSignalFrame; - bool IsSimple; - }; -} + CIEKey(const MCSymbol *Personality, unsigned PersonalityEncoding, + unsigned LsdaEncoding, bool IsSignalFrame, bool IsSimple) + : Personality(Personality), PersonalityEncoding(PersonalityEncoding), + LsdaEncoding(LsdaEncoding), IsSignalFrame(IsSignalFrame), + IsSimple(IsSimple) {} + const MCSymbol *Personality; + unsigned PersonalityEncoding; + unsigned LsdaEncoding; + bool IsSignalFrame; + bool IsSimple; +}; +} // anonymous namespace namespace llvm { - template <> - struct DenseMapInfo { - static CIEKey getEmptyKey() { - return CIEKey::getEmptyKey(); - } - static CIEKey getTombstoneKey() { - return CIEKey::getTombstoneKey(); - } - static unsigned getHashValue(const CIEKey &Key) { - return static_cast(hash_combine(Key.Personality, - Key.PersonalityEncoding, - Key.LsdaEncoding, - Key.IsSignalFrame, - Key.IsSimple)); - } - static bool isEqual(const CIEKey &LHS, - const CIEKey &RHS) { - return LHS.Personality == RHS.Personality && - LHS.PersonalityEncoding == RHS.PersonalityEncoding && - LHS.LsdaEncoding == RHS.LsdaEncoding && - LHS.IsSignalFrame == RHS.IsSignalFrame && - LHS.IsSimple == RHS.IsSimple; - } - }; -} +template <> struct DenseMapInfo { + static CIEKey getEmptyKey() { return CIEKey::getEmptyKey(); } + static CIEKey getTombstoneKey() { return CIEKey::getTombstoneKey(); } + static unsigned getHashValue(const CIEKey &Key) { + return static_cast( + hash_combine(Key.Personality, Key.PersonalityEncoding, Key.LsdaEncoding, + Key.IsSignalFrame, Key.IsSimple)); + } + static bool isEqual(const CIEKey &LHS, const CIEKey &RHS) { + return LHS.Personality == RHS.Personality && + LHS.PersonalityEncoding == RHS.PersonalityEncoding && + LHS.LsdaEncoding == RHS.LsdaEncoding && + LHS.IsSignalFrame == RHS.IsSignalFrame && + LHS.IsSimple == RHS.IsSimple; + } +}; +} // namespace llvm void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB, bool IsEH) { @@ -1496,7 +1487,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB, MCContext &Context = Streamer.getContext(); const MCObjectFileInfo *MOFI = Context.getObjectFileInfo(); - FrameEmitterImpl Emitter(IsEH); + FrameEmitterImpl Emitter(IsEH, Streamer); ArrayRef FrameArray = Streamer.getDwarfFrameInfos(); // Emit the compact unwind info if available. @@ -1514,7 +1505,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB, NeedsEHFrameSection |= Frame.CompactUnwindEncoding == MOFI->getCompactUnwindDwarfEHFrameOnly(); - Emitter.EmitCompactUnwind(Streamer, Frame); + Emitter.EmitCompactUnwind(Frame); } } @@ -1527,23 +1518,15 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB, Streamer.SwitchSection(&Section); MCSymbol *SectionStart = Context.createTempSymbol(); Streamer.EmitLabel(SectionStart); - Emitter.setSectionStart(SectionStart); - MCSymbol *FDEEnd = nullptr; DenseMap CIEStarts; const MCSymbol *DummyDebugKey = nullptr; - NeedsEHFrameSection = !MOFI->getSupportsCompactUnwindWithoutEHFrame(); - for (unsigned i = 0, n = FrameArray.size(); i < n; ++i) { - const MCDwarfFrameInfo &Frame = FrameArray[i]; - - // Emit the label from the previous iteration - if (FDEEnd) { - Streamer.EmitLabel(FDEEnd); - FDEEnd = nullptr; - } - - if (!NeedsEHFrameSection && Frame.CompactUnwindEncoding != + bool CanOmitDwarf = MOFI->getOmitDwarfIfHaveCompactUnwind(); + for (auto I = FrameArray.begin(), E = FrameArray.end(); I != E;) { + const MCDwarfFrameInfo &Frame = *I; + ++I; + if (CanOmitDwarf && Frame.CompactUnwindEncoding != MOFI->getCompactUnwindDwarfEHFrameOnly()) // Don't generate an EH frame if we don't need one. I.e., it's taken care // of by the compact unwind encoding. @@ -1553,18 +1536,12 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB, Frame.LsdaEncoding, Frame.IsSignalFrame, Frame.IsSimple); const MCSymbol *&CIEStart = IsEH ? CIEStarts[Key] : DummyDebugKey; if (!CIEStart) - CIEStart = &Emitter.EmitCIE(Streamer, Frame.Personality, - Frame.PersonalityEncoding, Frame.Lsda, - Frame.IsSignalFrame, - Frame.LsdaEncoding, - Frame.IsSimple); + CIEStart = &Emitter.EmitCIE(Frame.Personality, Frame.PersonalityEncoding, + Frame.Lsda, Frame.IsSignalFrame, + Frame.LsdaEncoding, Frame.IsSimple); - FDEEnd = Emitter.EmitFDE(Streamer, *CIEStart, Frame); + Emitter.EmitFDE(*CIEStart, Frame, I == E, *SectionStart); } - - Streamer.EmitValueToAlignment(Context.getAsmInfo()->getPointerSize()); - if (FDEEnd) - Streamer.EmitLabel(FDEEnd); } void MCDwarfFrameEmitter::EmitAdvanceLoc(MCObjectStreamer &Streamer, diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp index bc0ba85a8ff6..de645cac7370 100644 --- a/lib/MC/MCELFObjectTargetWriter.cpp +++ b/lib/MC/MCELFObjectTargetWriter.cpp @@ -29,23 +29,7 @@ bool MCELFObjectTargetWriter::needsRelocateWithSymbol(const MCSymbol &Sym, return false; } -// ELF doesn't require relocations to be in any order. We sort by the Offset, -// just to match gnu as for easier comparison. The use type is an arbitrary way -// of making the sort deterministic. -static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) { - const ELFRelocationEntry &A = *AP; - const ELFRelocationEntry &B = *BP; - if (A.Offset != B.Offset) - return B.Offset - A.Offset; - if (B.Type != A.Type) - return A.Type - B.Type; - //llvm_unreachable("ELFRelocs might be unstable!"); - return 0; -} - - void MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm, std::vector &Relocs) { - array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel); } diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index fe9ac21e17fc..06d161bccab4 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -68,7 +68,6 @@ void MCELFStreamer::mergeFragment(MCDataFragment *DF, EF->setBundlePadding(static_cast(RequiredBundlePadding)); Assembler.writeFragmentPadding(*EF, FSize, OW); - VecOS.flush(); delete OW; DF->getContents().append(Code.begin(), Code.end()); @@ -87,20 +86,10 @@ void MCELFStreamer::mergeFragment(MCDataFragment *DF, } void MCELFStreamer::InitSections(bool NoExecStack) { - // This emulates the same behavior of GNU as. This makes it easier - // to compare the output as the major sections are in the same order. MCContext &Ctx = getContext(); SwitchSection(Ctx.getObjectFileInfo()->getTextSection()); EmitCodeAlignment(4); - SwitchSection(Ctx.getObjectFileInfo()->getDataSection()); - EmitCodeAlignment(4); - - SwitchSection(Ctx.getObjectFileInfo()->getBSSSection()); - EmitCodeAlignment(4); - - SwitchSection(Ctx.getObjectFileInfo()->getTextSection()); - if (NoExecStack) SwitchSection(Ctx.getAsmInfo()->getNonexecutableStackSection(Ctx)); } @@ -112,7 +101,7 @@ void MCELFStreamer::EmitLabel(MCSymbol *S) { MCObjectStreamer::EmitLabel(Symbol); const MCSectionELF &Section = - static_cast(Symbol->getSection()); + static_cast(*getCurrentSectionOnly()); if (Section.getFlags() & ELF::SHF_TLS) Symbol->setType(ELF::STT_TLS); } @@ -134,7 +123,7 @@ void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { llvm_unreachable("invalid assembler flag!"); } -// If bundle aligment is used and there are any instructions in the section, it +// If bundle alignment is used and there are any instructions in the section, it // needs to be aligned to at least the bundle size. static void setSectionAlignmentForBundling(const MCAssembler &Assembler, MCSection *Section) { @@ -312,13 +301,20 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *S, uint64_t Size, Symbol->setType(ELF::STT_OBJECT); if (Symbol->getBinding() == ELF::STB_LOCAL) { - MCSection *Section = getAssembler().getContext().getELFSection( + MCSection &Section = *getAssembler().getContext().getELFSection( ".bss", ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); + MCSectionSubPair P = getCurrentSection(); + SwitchSection(&Section); - AssignSection(Symbol, Section); + EmitValueToAlignment(ByteAlignment, 0, 1, 0); + EmitLabel(Symbol); + EmitZeros(Size); - struct LocalCommon L = {Symbol, Size, ByteAlignment}; - LocalCommons.push_back(L); + // Update the maximum alignment of the section if necessary. + if (ByteAlignment > Section.getAlignment()) + Section.setAlignment(ByteAlignment); + + SwitchSection(P.first, P.second); } else { if(Symbol->declareCommon(Size, ByteAlignment)) report_fatal_error("Symbol: " + Symbol->getName() + @@ -344,7 +340,7 @@ void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *S, uint64_t Size, } void MCELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, - const SMLoc &Loc) { + SMLoc Loc) { if (isBundleLocked()) report_fatal_error("Emitting values inside a locked bundle is forbidden"); fixSymbolsInTLSFixups(Value); @@ -480,7 +476,6 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst, SmallString<256> Code; raw_svector_ostream VecOS(Code); Assembler.getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI); - VecOS.flush(); for (unsigned i = 0, e = Fixups.size(); i != e; ++i) fixSymbolsInTLSFixups(Fixups[i].getValue()); @@ -603,7 +598,7 @@ void MCELFStreamer::EmitBundleUnlock() { report_fatal_error("Empty bundle-locked group is forbidden"); // When the -mc-relax-all flag is used, we emit instructions to fragments - // stored on a stack. When the bundle unlock is emited, we pop a fragment + // stored on a stack. When the bundle unlock is emitted, we pop a fragment // from the stack a merge it to the one below. if (getAssembler().getRelaxAll()) { assert(!BundleGroups.empty() && "There are no bundle groups"); @@ -625,29 +620,6 @@ void MCELFStreamer::EmitBundleUnlock() { Sec.setBundleLockState(MCSection::NotBundleLocked); } -void MCELFStreamer::Flush() { - for (std::vector::const_iterator i = LocalCommons.begin(), - e = LocalCommons.end(); - i != e; ++i) { - const MCSymbol &Symbol = *i->Symbol; - uint64_t Size = i->Size; - unsigned ByteAlignment = i->ByteAlignment; - MCSection &Section = Symbol.getSection(); - - getAssembler().registerSection(Section); - new MCAlignFragment(ByteAlignment, 0, 1, ByteAlignment, &Section); - - MCFragment *F = new MCFillFragment(0, 0, Size, &Section); - Symbol.setFragment(F); - - // Update the maximum alignment of the section if necessary. - if (ByteAlignment > Section.getAlignment()) - Section.setAlignment(ByteAlignment); - } - - LocalCommons.clear(); -} - void MCELFStreamer::FinishImpl() { // Ensure the last section gets aligned if necessary. MCSection *CurSection = getCurrentSectionOnly(); @@ -655,8 +627,6 @@ void MCELFStreamer::FinishImpl() { EmitFrames(nullptr); - Flush(); - this->MCObjectStreamer::FinishImpl(); } diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index a30ceecc952b..0f26b38c29d7 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -43,7 +43,7 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI) const { const MCSymbol &Sym = SRE.getSymbol(); // Parenthesize names that start with $ so that they don't look like // absolute names. - bool UseParens = Sym.getName()[0] == '$'; + bool UseParens = Sym.getName().size() && Sym.getName()[0] == '$'; if (UseParens) { OS << '('; Sym.print(OS, MAI); @@ -202,6 +202,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_SIZE: return "SIZE"; case VK_WEAKREF: return "WEAKREF"; case VK_ARM_NONE: return "none"; + case VK_ARM_GOT_PREL: return "GOT_PREL"; case VK_ARM_TARGET1: return "target1"; case VK_ARM_TARGET2: return "target2"; case VK_ARM_PREL31: return "prel31"; @@ -311,7 +312,6 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) { .Case("got", VK_GOT) .Case("gotoff", VK_GOTOFF) .Case("gotpcrel", VK_GOTPCREL) - .Case("got_prel", VK_GOTPCREL) .Case("gottpoff", VK_GOTTPOFF) .Case("indntpoff", VK_INDNTPOFF) .Case("ntpoff", VK_NTPOFF) @@ -382,7 +382,15 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) { .Case("got@tlsld@l", VK_PPC_GOT_TLSLD_LO) .Case("got@tlsld@h", VK_PPC_GOT_TLSLD_HI) .Case("got@tlsld@ha", VK_PPC_GOT_TLSLD_HA) + .Case("gdgot", VK_Hexagon_GD_GOT) + .Case("gdplt", VK_Hexagon_GD_PLT) + .Case("iegot", VK_Hexagon_IE_GOT) + .Case("ie", VK_Hexagon_IE) + .Case("ldgot", VK_Hexagon_LD_GOT) + .Case("ldplt", VK_Hexagon_LD_PLT) + .Case("pcrel", VK_Hexagon_PCREL) .Case("none", VK_ARM_NONE) + .Case("got_prel", VK_ARM_GOT_PREL) .Case("target1", VK_ARM_TARGET1) .Case("target2", VK_ARM_TARGET2) .Case("prel31", VK_ARM_PREL31) @@ -477,7 +485,8 @@ static void AttemptToFoldSymbolOffsetDifference( if (!Asm->getWriter().isSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet)) return; - if (SA.getFragment() == SB.getFragment()) { + if (SA.getFragment() == SB.getFragment() && !SA.isVariable() && + !SB.isVariable()) { Addend += (SA.getOffset() - SB.getOffset()); // Pointers to Thumb symbols need to have their low-bit set to allow @@ -583,11 +592,6 @@ EvaluateSymbolicAdd(const MCAssembler *Asm, const MCAsmLayout *Layout, const MCSymbolRefExpr *A = LHS_A ? LHS_A : RHS_A; const MCSymbolRefExpr *B = LHS_B ? LHS_B : RHS_B; - // If we have a negated symbol, then we must have also have a non-negated - // symbol in order to encode the expression. - if (B && !A) - return false; - Res = MCValue::get(A, B, Result_Cst); return true; } @@ -606,7 +610,7 @@ bool MCExpr::evaluateAsValue(MCValue &Res, const MCAsmLayout &Layout) const { true); } -static bool canExpand(const MCSymbol &Sym, const MCAssembler *Asm, bool InSet) { +static bool canExpand(const MCSymbol &Sym, bool InSet) { const MCExpr *Expr = Sym.getVariableValue(); const auto *Inner = dyn_cast(Expr); if (Inner) { @@ -616,9 +620,7 @@ static bool canExpand(const MCSymbol &Sym, const MCAssembler *Asm, bool InSet) { if (InSet) return true; - if (!Asm) - return false; - return !Asm->getWriter().isWeak(Sym); + return !Sym.isInSection(); } bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, @@ -643,7 +645,7 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, // Evaluate recursively if this is a variable. if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None && - canExpand(Sym, Asm, InSet)) { + canExpand(Sym, InSet)) { bool IsMachO = SRE->hasSubsectionsViaSymbols(); if (Sym.getVariableValue()->evaluateAsRelocatableImpl( Res, Asm, Layout, Fixup, Addrs, InSet || IsMachO)) { @@ -739,7 +741,17 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, case MCBinaryExpr::AShr: Result = LHS >> RHS; break; case MCBinaryExpr::Add: Result = LHS + RHS; break; case MCBinaryExpr::And: Result = LHS & RHS; break; - case MCBinaryExpr::Div: Result = LHS / RHS; break; + case MCBinaryExpr::Div: + // Handle division by zero. gas just emits a warning and keeps going, + // we try to be stricter. + // FIXME: Currently the caller of this function has no way to understand + // we're bailing out because of 'division by zero'. Therefore, it will + // emit a 'expected relocatable expression' error. It would be nice to + // change this code to emit a better diagnostic. + if (RHS == 0) + return false; + Result = LHS / RHS; + break; case MCBinaryExpr::EQ: Result = LHS == RHS; break; case MCBinaryExpr::GT: Result = LHS > RHS; break; case MCBinaryExpr::GTE: Result = LHS >= RHS; break; @@ -765,45 +777,41 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, llvm_unreachable("Invalid assembly expression kind!"); } -MCSection *MCExpr::findAssociatedSection() const { +MCFragment *MCExpr::findAssociatedFragment() const { switch (getKind()) { case Target: // We never look through target specific expressions. - return cast(this)->findAssociatedSection(); + return cast(this)->findAssociatedFragment(); case Constant: - return MCSymbol::AbsolutePseudoSection; + return MCSymbol::AbsolutePseudoFragment; case SymbolRef: { const MCSymbolRefExpr *SRE = cast(this); const MCSymbol &Sym = SRE->getSymbol(); - - if (Sym.isDefined()) - return &Sym.getSection(); - - return nullptr; + return Sym.getFragment(); } case Unary: - return cast(this)->getSubExpr()->findAssociatedSection(); + return cast(this)->getSubExpr()->findAssociatedFragment(); case Binary: { const MCBinaryExpr *BE = cast(this); - MCSection *LHS_S = BE->getLHS()->findAssociatedSection(); - MCSection *RHS_S = BE->getRHS()->findAssociatedSection(); + MCFragment *LHS_F = BE->getLHS()->findAssociatedFragment(); + MCFragment *RHS_F = BE->getRHS()->findAssociatedFragment(); - // If either section is absolute, return the other. - if (LHS_S == MCSymbol::AbsolutePseudoSection) - return RHS_S; - if (RHS_S == MCSymbol::AbsolutePseudoSection) - return LHS_S; + // If either is absolute, return the other. + if (LHS_F == MCSymbol::AbsolutePseudoFragment) + return RHS_F; + if (RHS_F == MCSymbol::AbsolutePseudoFragment) + return LHS_F; // Not always correct, but probably the best we can do without more context. if (BE->getOpcode() == MCBinaryExpr::Sub) - return MCSymbol::AbsolutePseudoSection; + return MCSymbol::AbsolutePseudoFragment; - // Otherwise, return the first non-null section. - return LHS_S ? LHS_S : RHS_S; + // Otherwise, return the first non-null fragment. + return LHS_F ? LHS_F : RHS_F; } } diff --git a/lib/MC/MCFragment.cpp b/lib/MC/MCFragment.cpp new file mode 100644 index 000000000000..efdb7049203a --- /dev/null +++ b/lib/MC/MCFragment.cpp @@ -0,0 +1,458 @@ +//===- lib/MC/MCFragment.cpp - Assembler Fragment Implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCFragment.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +MCAsmLayout::MCAsmLayout(MCAssembler &Asm) + : Assembler(Asm), LastValidFragment() + { + // Compute the section layout order. Virtual sections must go last. + for (MCSection &Sec : Asm) + if (!Sec.isVirtualSection()) + SectionOrder.push_back(&Sec); + for (MCSection &Sec : Asm) + if (Sec.isVirtualSection()) + SectionOrder.push_back(&Sec); +} + +bool MCAsmLayout::isFragmentValid(const MCFragment *F) const { + const MCSection *Sec = F->getParent(); + const MCFragment *LastValid = LastValidFragment.lookup(Sec); + if (!LastValid) + return false; + assert(LastValid->getParent() == Sec); + return F->getLayoutOrder() <= LastValid->getLayoutOrder(); +} + +void MCAsmLayout::invalidateFragmentsFrom(MCFragment *F) { + // If this fragment wasn't already valid, we don't need to do anything. + if (!isFragmentValid(F)) + return; + + // Otherwise, reset the last valid fragment to the previous fragment + // (if this is the first fragment, it will be NULL). + LastValidFragment[F->getParent()] = F->getPrevNode(); +} + +void MCAsmLayout::ensureValid(const MCFragment *F) const { + MCSection *Sec = F->getParent(); + MCSection::iterator I; + if (MCFragment *Cur = LastValidFragment[Sec]) + I = ++MCSection::iterator(Cur); + else + I = Sec->begin(); + + // Advance the layout position until the fragment is valid. + while (!isFragmentValid(F)) { + assert(I != Sec->end() && "Layout bookkeeping error"); + const_cast(this)->layoutFragment(&*I); + ++I; + } +} + +uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const { + ensureValid(F); + assert(F->Offset != ~UINT64_C(0) && "Address not set!"); + return F->Offset; +} + +// Simple getSymbolOffset helper for the non-varibale case. +static bool getLabelOffset(const MCAsmLayout &Layout, const MCSymbol &S, + bool ReportError, uint64_t &Val) { + if (!S.getFragment()) { + if (ReportError) + report_fatal_error("unable to evaluate offset to undefined symbol '" + + S.getName() + "'"); + return false; + } + Val = Layout.getFragmentOffset(S.getFragment()) + S.getOffset(); + return true; +} + +static bool getSymbolOffsetImpl(const MCAsmLayout &Layout, const MCSymbol &S, + bool ReportError, uint64_t &Val) { + if (!S.isVariable()) + return getLabelOffset(Layout, S, ReportError, Val); + + // If SD is a variable, evaluate it. + MCValue Target; + if (!S.getVariableValue()->evaluateAsValue(Target, Layout)) + report_fatal_error("unable to evaluate offset for variable '" + + S.getName() + "'"); + + uint64_t Offset = Target.getConstant(); + + const MCSymbolRefExpr *A = Target.getSymA(); + if (A) { + uint64_t ValA; + if (!getLabelOffset(Layout, A->getSymbol(), ReportError, ValA)) + return false; + Offset += ValA; + } + + const MCSymbolRefExpr *B = Target.getSymB(); + if (B) { + uint64_t ValB; + if (!getLabelOffset(Layout, B->getSymbol(), ReportError, ValB)) + return false; + Offset -= ValB; + } + + Val = Offset; + return true; +} + +bool MCAsmLayout::getSymbolOffset(const MCSymbol &S, uint64_t &Val) const { + return getSymbolOffsetImpl(*this, S, false, Val); +} + +uint64_t MCAsmLayout::getSymbolOffset(const MCSymbol &S) const { + uint64_t Val; + getSymbolOffsetImpl(*this, S, true, Val); + return Val; +} + +const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const { + if (!Symbol.isVariable()) + return &Symbol; + + const MCExpr *Expr = Symbol.getVariableValue(); + MCValue Value; + if (!Expr->evaluateAsValue(Value, *this)) { + Assembler.getContext().reportError( + SMLoc(), "expression could not be evaluated"); + return nullptr; + } + + const MCSymbolRefExpr *RefB = Value.getSymB(); + if (RefB) { + Assembler.getContext().reportError( + SMLoc(), Twine("symbol '") + RefB->getSymbol().getName() + + "' could not be evaluated in a subtraction expression"); + return nullptr; + } + + const MCSymbolRefExpr *A = Value.getSymA(); + if (!A) + return nullptr; + + const MCSymbol &ASym = A->getSymbol(); + const MCAssembler &Asm = getAssembler(); + if (ASym.isCommon()) { + // FIXME: we should probably add a SMLoc to MCExpr. + Asm.getContext().reportError(SMLoc(), + "Common symbol '" + ASym.getName() + + "' cannot be used in assignment expr"); + return nullptr; + } + + return &ASym; +} + +uint64_t MCAsmLayout::getSectionAddressSize(const MCSection *Sec) const { + // The size is the last fragment's end offset. + const MCFragment &F = Sec->getFragmentList().back(); + return getFragmentOffset(&F) + getAssembler().computeFragmentSize(*this, F); +} + +uint64_t MCAsmLayout::getSectionFileSize(const MCSection *Sec) const { + // Virtual sections have no file size. + if (Sec->isVirtualSection()) + return 0; + + // Otherwise, the file size is the same as the address space size. + return getSectionAddressSize(Sec); +} + +uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler, + const MCFragment *F, + uint64_t FOffset, uint64_t FSize) { + uint64_t BundleSize = Assembler.getBundleAlignSize(); + assert(BundleSize > 0 && + "computeBundlePadding should only be called if bundling is enabled"); + uint64_t BundleMask = BundleSize - 1; + uint64_t OffsetInBundle = FOffset & BundleMask; + uint64_t EndOfFragment = OffsetInBundle + FSize; + + // There are two kinds of bundling restrictions: + // + // 1) For alignToBundleEnd(), add padding to ensure that the fragment will + // *end* on a bundle boundary. + // 2) Otherwise, check if the fragment would cross a bundle boundary. If it + // would, add padding until the end of the bundle so that the fragment + // will start in a new one. + if (F->alignToBundleEnd()) { + // Three possibilities here: + // + // A) The fragment just happens to end at a bundle boundary, so we're good. + // B) The fragment ends before the current bundle boundary: pad it just + // enough to reach the boundary. + // C) The fragment ends after the current bundle boundary: pad it until it + // reaches the end of the next bundle boundary. + // + // Note: this code could be made shorter with some modulo trickery, but it's + // intentionally kept in its more explicit form for simplicity. + if (EndOfFragment == BundleSize) + return 0; + else if (EndOfFragment < BundleSize) + return BundleSize - EndOfFragment; + else { // EndOfFragment > BundleSize + return 2 * BundleSize - EndOfFragment; + } + } else if (OffsetInBundle > 0 && EndOfFragment > BundleSize) + return BundleSize - OffsetInBundle; + else + return 0; +} + +/* *** */ + +void ilist_node_traits::deleteNode(MCFragment *V) { + V->destroy(); +} + +MCFragment::MCFragment() : Kind(FragmentType(~0)), HasInstructions(false), + AlignToBundleEnd(false), BundlePadding(0) { +} + +MCFragment::~MCFragment() { } + +MCFragment::MCFragment(FragmentType Kind, bool HasInstructions, + uint8_t BundlePadding, MCSection *Parent) + : Kind(Kind), HasInstructions(HasInstructions), AlignToBundleEnd(false), + BundlePadding(BundlePadding), Parent(Parent), Atom(nullptr), + Offset(~UINT64_C(0)) { + if (Parent && !isDummy()) + Parent->getFragmentList().push_back(this); +} + +void MCFragment::destroy() { + // First check if we are the sentinal. + if (Kind == FragmentType(~0)) { + delete this; + return; + } + + switch (Kind) { + case FT_Align: + delete cast(this); + return; + case FT_Data: + delete cast(this); + return; + case FT_CompactEncodedInst: + delete cast(this); + return; + case FT_Fill: + delete cast(this); + return; + case FT_Relaxable: + delete cast(this); + return; + case FT_Org: + delete cast(this); + return; + case FT_Dwarf: + delete cast(this); + return; + case FT_DwarfFrame: + delete cast(this); + return; + case FT_LEB: + delete cast(this); + return; + case FT_SafeSEH: + delete cast(this); + return; + case FT_Dummy: + delete cast(this); + return; + } +} + +/* *** */ + +// Debugging methods + +namespace llvm { + +raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) { + OS << ""; + return OS; +} + +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void MCFragment::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<"; + switch (getKind()) { + case MCFragment::FT_Align: OS << "MCAlignFragment"; break; + case MCFragment::FT_Data: OS << "MCDataFragment"; break; + case MCFragment::FT_CompactEncodedInst: + OS << "MCCompactEncodedInstFragment"; break; + case MCFragment::FT_Fill: OS << "MCFillFragment"; break; + case MCFragment::FT_Relaxable: OS << "MCRelaxableFragment"; break; + case MCFragment::FT_Org: OS << "MCOrgFragment"; break; + case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break; + case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break; + case MCFragment::FT_LEB: OS << "MCLEBFragment"; break; + case MCFragment::FT_SafeSEH: OS << "MCSafeSEHFragment"; break; + case MCFragment::FT_Dummy: + OS << "MCDummyFragment"; + break; + } + + OS << "(getBundlePadding()) << ">"; + + switch (getKind()) { + case MCFragment::FT_Align: { + const MCAlignFragment *AF = cast(this); + if (AF->hasEmitNops()) + OS << " (emit nops)"; + OS << "\n "; + OS << " Alignment:" << AF->getAlignment() + << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize() + << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">"; + break; + } + case MCFragment::FT_Data: { + const MCDataFragment *DF = cast(this); + OS << "\n "; + OS << " Contents:["; + const SmallVectorImpl &Contents = DF->getContents(); + for (unsigned i = 0, e = Contents.size(); i != e; ++i) { + if (i) OS << ","; + OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF); + } + OS << "] (" << Contents.size() << " bytes)"; + + if (DF->fixup_begin() != DF->fixup_end()) { + OS << ",\n "; + OS << " Fixups:["; + for (MCDataFragment::const_fixup_iterator it = DF->fixup_begin(), + ie = DF->fixup_end(); it != ie; ++it) { + if (it != DF->fixup_begin()) OS << ",\n "; + OS << *it; + } + OS << "]"; + } + break; + } + case MCFragment::FT_CompactEncodedInst: { + const MCCompactEncodedInstFragment *CEIF = + cast(this); + OS << "\n "; + OS << " Contents:["; + const SmallVectorImpl &Contents = CEIF->getContents(); + for (unsigned i = 0, e = Contents.size(); i != e; ++i) { + if (i) OS << ","; + OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF); + } + OS << "] (" << Contents.size() << " bytes)"; + break; + } + case MCFragment::FT_Fill: { + const MCFillFragment *FF = cast(this); + OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize() + << " Size:" << FF->getSize(); + break; + } + case MCFragment::FT_Relaxable: { + const MCRelaxableFragment *F = cast(this); + OS << "\n "; + OS << " Inst:"; + F->getInst().dump_pretty(OS); + break; + } + case MCFragment::FT_Org: { + const MCOrgFragment *OF = cast(this); + OS << "\n "; + OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue(); + break; + } + case MCFragment::FT_Dwarf: { + const MCDwarfLineAddrFragment *OF = cast(this); + OS << "\n "; + OS << " AddrDelta:" << OF->getAddrDelta() + << " LineDelta:" << OF->getLineDelta(); + break; + } + case MCFragment::FT_DwarfFrame: { + const MCDwarfCallFrameFragment *CF = cast(this); + OS << "\n "; + OS << " AddrDelta:" << CF->getAddrDelta(); + break; + } + case MCFragment::FT_LEB: { + const MCLEBFragment *LF = cast(this); + OS << "\n "; + OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned(); + break; + } + case MCFragment::FT_SafeSEH: { + const MCSafeSEHFragment *F = cast(this); + OS << "\n "; + OS << " Sym:" << F->getSymbol(); + break; + } + case MCFragment::FT_Dummy: + break; + } + OS << ">"; +} + +void MCAssembler::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "dump(); + } + OS << "],\n"; + OS << " Symbols:["; + + for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) { + if (it != symbol_begin()) OS << ",\n "; + OS << "("; + it->dump(); + OS << ", Index:" << it->getIndex() << ", "; + OS << ")"; + } + OS << "]>\n"; +} +#endif diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp index 7ef69be66df6..5f829aeb339c 100644 --- a/lib/MC/MCInst.cpp +++ b/lib/MC/MCInst.cpp @@ -23,6 +23,8 @@ void MCOperand::print(raw_ostream &OS) const { OS << "Reg:" << getReg(); else if (isImm()) OS << "Imm:" << getImm(); + else if (isFPImm()) + OS << "FPImm:" << getFPImm(); else if (isExpr()) { OS << "Expr:(" << *getExpr() << ")"; } else if (isInst()) { diff --git a/lib/MC/MCInstrDesc.cpp b/lib/MC/MCInstrDesc.cpp index 5be2fa1b30b6..ee55f3eff3ac 100644 --- a/lib/MC/MCInstrDesc.cpp +++ b/lib/MC/MCInstrDesc.cpp @@ -53,7 +53,7 @@ bool MCInstrDesc::mayAffectControlFlow(const MCInst &MI, bool MCInstrDesc::hasImplicitDefOfPhysReg(unsigned Reg, const MCRegisterInfo *MRI) const { - if (const uint16_t *ImpDefs = ImplicitDefs) + if (const MCPhysReg *ImpDefs = ImplicitDefs) for (; *ImpDefs; ++ImpDefs) if (*ImpDefs == Reg || (MRI && MRI->isSubRegister(Reg, *ImpDefs))) return true; diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 53cd1317a3d7..21f7571eec4a 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -60,6 +60,7 @@ public: /// state management void reset() override { + CreatedADWARFSection = false; HasSectionLabel.clear(); MCObjectStreamer::reset(); } @@ -180,8 +181,6 @@ void MCMachOStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - // isSymbolLinkerVisible uses the section. - AssignSection(Symbol, getCurrentSection().first); // We have to create a new fragment if this is an atom defining symbol, // fragments cannot span atoms. if (getAssembler().isSymbolLinkerVisible(*Symbol)) @@ -384,8 +383,6 @@ void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself. assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - AssignSection(Symbol, nullptr); - getAssembler().registerSymbol(*Symbol); Symbol->setExternal(true); Symbol->setCommon(Size, ByteAlignment); @@ -417,8 +414,6 @@ void MCMachOStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, if (ByteAlignment != 1) new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, Section); - AssignSection(Symbol, Section); - MCFragment *F = new MCFillFragment(0, 0, Size, Section); Symbol->setFragment(F); @@ -443,12 +438,11 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst, SmallString<256> Code; raw_svector_ostream VecOS(Code); getAssembler().getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI); - VecOS.flush(); // Add the fixups and data. - for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { - Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size()); - DF->getFixups().push_back(Fixups[i]); + for (MCFixup &Fixup : Fixups) { + Fixup.setOffset(Fixup.getOffset() + DF->getContents().size()); + DF->getFixups().push_back(Fixup); } DF->getContents().append(Code.begin(), Code.end()); } @@ -463,7 +457,8 @@ void MCMachOStreamer::FinishImpl() { // defining symbols. DenseMap DefiningSymbolMap; for (const MCSymbol &Symbol : getAssembler().symbols()) { - if (getAssembler().isSymbolLinkerVisible(Symbol) && Symbol.getFragment()) { + if (getAssembler().isSymbolLinkerVisible(Symbol) && Symbol.isInSection() && + !Symbol.isVariable()) { // An atom defining symbol should never be internal to a fragment. assert(Symbol.getOffset() == 0 && "Invalid offset in atom defining symbol!"); @@ -473,14 +468,12 @@ void MCMachOStreamer::FinishImpl() { // Set the fragment atom associations by tracking the last seen atom defining // symbol. - for (MCAssembler::iterator it = getAssembler().begin(), - ie = getAssembler().end(); it != ie; ++it) { + for (MCSection &Sec : getAssembler()) { const MCSymbol *CurrentAtom = nullptr; - for (MCSection::iterator it2 = it->begin(), ie2 = it->end(); it2 != ie2; - ++it2) { - if (const MCSymbol *Symbol = DefiningSymbolMap.lookup(it2)) + for (MCFragment &Frag : Sec) { + if (const MCSymbol *Symbol = DefiningSymbolMap.lookup(&Frag)) CurrentAtom = Symbol; - it2->setAtom(CurrentAtom); + Frag.setAtom(CurrentAtom); } } @@ -493,6 +486,26 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context, MCAsmBackend &MAB, bool LabelSections) { MCMachOStreamer *S = new MCMachOStreamer(Context, MAB, OS, CE, DWARFMustBeAtTheEnd, LabelSections); + const Triple &TT = Context.getObjectFileInfo()->getTargetTriple(); + if (TT.isOSDarwin()) { + unsigned Major, Minor, Update; + TT.getOSVersion(Major, Minor, Update); + // If there is a version specified, Major will be non-zero. + if (Major) { + MCVersionMinType VersionType; + if (TT.isWatchOS()) + VersionType = MCVM_WatchOSVersionMin; + else if (TT.isTvOS()) + VersionType = MCVM_TvOSVersionMin; + else if (TT.isMacOSX()) + VersionType = MCVM_OSXVersionMin; + else { + assert(TT.isiOS() && "Must only be iOS platform left"); + VersionType = MCVM_IOSVersionMin; + } + S->EmitVersionMin(VersionType, Major, Minor, Update); + } + } if (RelaxAll) S->getAssembler().setRelaxAll(true); return S; diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 576827a72d56..028f2e955b21 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -16,6 +16,8 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/Support/COFF.h" + using namespace llvm; static bool useCompactUnwind(const Triple &T) { @@ -27,6 +29,10 @@ static bool useCompactUnwind(const Triple &T) { if (T.getArch() == Triple::aarch64) return true; + // armv7k always has it. + if (T.isWatchOS()) + return true; + // Use it on newer version of OS X. if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) return true; @@ -43,9 +49,18 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) { // MachO SupportsWeakOmittedEHFrame = false; + EHFrameSection = Ctx->getMachOSection( + "__TEXT", "__eh_frame", + MachO::S_COALESCED | MachO::S_ATTR_NO_TOC | + MachO::S_ATTR_STRIP_STATIC_SYMS | MachO::S_ATTR_LIVE_SUPPORT, + SectionKind::getReadOnly()); + if (T.isOSDarwin() && T.getArch() == Triple::aarch64) SupportsCompactUnwindWithoutEHFrame = true; + if (T.isWatchOS()) + OmitDwarfIfHaveCompactUnwind = true; + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; LSDAEncoding = FDECFIEncoding = dwarf::DW_EH_PE_pcrel; @@ -61,16 +76,15 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) { MachO::S_ATTR_PURE_INSTRUCTIONS, SectionKind::getText()); DataSection // .data - = Ctx->getMachOSection("__DATA", "__data", 0, - SectionKind::getDataRel()); + = Ctx->getMachOSection("__DATA", "__data", 0, SectionKind::getData()); // BSSSection might not be expected initialized on msvc. BSSSection = nullptr; TLSDataSection // .tdata - = Ctx->getMachOSection("__DATA", "__thread_data", - MachO::S_THREAD_LOCAL_REGULAR, - SectionKind::getDataRel()); + = Ctx->getMachOSection("__DATA", "__thread_data", + MachO::S_THREAD_LOCAL_REGULAR, + SectionKind::getData()); TLSBSSSection // .tbss = Ctx->getMachOSection("__DATA", "__thread_bss", MachO::S_THREAD_LOCAL_ZEROFILL, @@ -78,14 +92,13 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) { // TODO: Verify datarel below. TLSTLVSection // .tlv - = Ctx->getMachOSection("__DATA", "__thread_vars", - MachO::S_THREAD_LOCAL_VARIABLES, - SectionKind::getDataRel()); + = Ctx->getMachOSection("__DATA", "__thread_vars", + MachO::S_THREAD_LOCAL_VARIABLES, + SectionKind::getData()); - TLSThreadInitSection - = Ctx->getMachOSection("__DATA", "__thread_init", - MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS, - SectionKind::getDataRel()); + TLSThreadInitSection = Ctx->getMachOSection( + "__DATA", "__thread_init", MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS, + SectionKind::getData()); CStringSection // .cstring = Ctx->getMachOSection("__TEXT", "__cstring", @@ -112,22 +125,35 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) { = Ctx->getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly()); - TextCoalSection - = Ctx->getMachOSection("__TEXT", "__textcoal_nt", - MachO::S_COALESCED | - MachO::S_ATTR_PURE_INSTRUCTIONS, - SectionKind::getText()); - ConstTextCoalSection - = Ctx->getMachOSection("__TEXT", "__const_coal", - MachO::S_COALESCED, - SectionKind::getReadOnly()); + // If the target is not powerpc, map the coal sections to the non-coal + // sections. + // + // "__TEXT/__textcoal_nt" => section "__TEXT/__text" + // "__TEXT/__const_coal" => section "__TEXT/__const" + // "__DATA/__datacoal_nt" => section "__DATA/__data" + Triple::ArchType ArchTy = T.getArch(); + + if (ArchTy == Triple::ppc || ArchTy == Triple::ppc64) { + TextCoalSection + = Ctx->getMachOSection("__TEXT", "__textcoal_nt", + MachO::S_COALESCED | + MachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); + ConstTextCoalSection + = Ctx->getMachOSection("__TEXT", "__const_coal", + MachO::S_COALESCED, + SectionKind::getReadOnly()); + DataCoalSection = Ctx->getMachOSection( + "__DATA", "__datacoal_nt", MachO::S_COALESCED, SectionKind::getData()); + } else { + TextCoalSection = TextSection; + ConstTextCoalSection = ReadOnlySection; + DataCoalSection = DataSection; + } + ConstDataSection // .const_data = Ctx->getMachOSection("__DATA", "__const", 0, SectionKind::getReadOnlyWithRel()); - DataCoalSection - = Ctx->getMachOSection("__DATA","__datacoal_nt", - MachO::S_COALESCED, - SectionKind::getDataRel()); DataCommonSection = Ctx->getMachOSection("__DATA","__common", MachO::S_ZEROFILL, @@ -147,21 +173,17 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) { SectionKind::getMetadata()); if (RelocM == Reloc::Static) { - StaticCtorSection - = Ctx->getMachOSection("__TEXT", "__constructor", 0, - SectionKind::getDataRel()); - StaticDtorSection - = Ctx->getMachOSection("__TEXT", "__destructor", 0, - SectionKind::getDataRel()); + StaticCtorSection = Ctx->getMachOSection("__TEXT", "__constructor", 0, + SectionKind::getData()); + StaticDtorSection = Ctx->getMachOSection("__TEXT", "__destructor", 0, + SectionKind::getData()); } else { - StaticCtorSection - = Ctx->getMachOSection("__DATA", "__mod_init_func", - MachO::S_MOD_INIT_FUNC_POINTERS, - SectionKind::getDataRel()); - StaticDtorSection - = Ctx->getMachOSection("__DATA", "__mod_term_func", - MachO::S_MOD_TERM_FUNC_POINTERS, - SectionKind::getDataRel()); + StaticCtorSection = Ctx->getMachOSection("__DATA", "__mod_init_func", + MachO::S_MOD_INIT_FUNC_POINTERS, + SectionKind::getData()); + StaticDtorSection = Ctx->getMachOSection("__DATA", "__mod_term_func", + MachO::S_MOD_TERM_FUNC_POINTERS, + SectionKind::getData()); } // Exception Handling. @@ -176,9 +198,11 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) { SectionKind::getReadOnly()); if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86) - CompactUnwindDwarfEHFrameOnly = 0x04000000; + CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_X86_64_MODE_DWARF else if (T.getArch() == Triple::aarch64) - CompactUnwindDwarfEHFrameOnly = 0x03000000; + CompactUnwindDwarfEHFrameOnly = 0x03000000; // UNWIND_ARM64_MODE_DWARF + else if (T.getArch() == Triple::arm || T.getArch() == Triple::thumb) + CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_ARM_MODE_DWARF } // Debug Information. @@ -235,6 +259,12 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) { DwarfDebugInlineSection = Ctx->getMachOSection("__DWARF", "__debug_inlined", MachO::S_ATTR_DEBUG, SectionKind::getMetadata()); + DwarfCUIndexSection = + Ctx->getMachOSection("__DWARF", "__debug_cu_index", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfTUIndexSection = + Ctx->getMachOSection("__DWARF", "__debug_tu_index", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); StackMapSection = Ctx->getMachOSection("__LLVM_STACKMAPS", "__llvm_stackmaps", 0, SectionKind::getMetadata()); @@ -258,7 +288,6 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) { FDECFIEncoding = dwarf::DW_EH_PE_pcrel | ((CMModel == CodeModel::Large) ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); - break; default: FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; @@ -391,17 +420,15 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) { break; } + unsigned EHSectionType = T.getArch() == Triple::x86_64 + ? ELF::SHT_X86_64_UNWIND + : ELF::SHT_PROGBITS; + // Solaris requires different flags for .eh_frame to seemingly every other // platform. - EHSectionType = ELF::SHT_PROGBITS; - EHSectionFlags = ELF::SHF_ALLOC; - if (T.isOSSolaris()) { - if (T.getArch() == Triple::x86_64) - EHSectionType = ELF::SHT_X86_64_UNWIND; - else - EHSectionFlags |= ELF::SHF_WRITE; - } - + unsigned EHSectionFlags = ELF::SHF_ALLOC; + if (T.isOSSolaris() && T.getArch() != Triple::x86_64) + EHSectionFlags |= ELF::SHF_WRITE; // ELF BSSSection = Ctx->getELFSection(".bss", ELF::SHT_NOBITS, @@ -423,18 +450,9 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) { TLSBSSSection = Ctx->getELFSection( ".tbss", ELF::SHT_NOBITS, ELF::SHF_ALLOC | ELF::SHF_TLS | ELF::SHF_WRITE); - DataRelSection = Ctx->getELFSection(".data.rel", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE); - - DataRelLocalSection = Ctx->getELFSection(".data.rel.local", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE); - DataRelROSection = Ctx->getELFSection(".data.rel.ro", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_WRITE); - DataRelROLocalSection = Ctx->getELFSection( - ".data.rel.ro.local", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_WRITE); - MergeableConst4Section = Ctx->getELFSection(".rodata.cst4", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE, 4, ""); @@ -519,14 +537,28 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) { DwarfAddrSection = Ctx->getELFSection(".debug_addr", ELF::SHT_PROGBITS, 0, "addr_sec"); + // DWP Sections + DwarfCUIndexSection = + Ctx->getELFSection(".debug_cu_index", ELF::SHT_PROGBITS, 0); + DwarfTUIndexSection = + Ctx->getELFSection(".debug_tu_index", ELF::SHT_PROGBITS, 0); + StackMapSection = Ctx->getELFSection(".llvm_stackmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); FaultMapSection = Ctx->getELFSection(".llvm_faultmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + + EHFrameSection = + Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags); } void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) { + EHFrameSection = Ctx->getCOFFSection( + ".eh_frame", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getData()); + bool IsWoA = T.getArch() == Triple::arm || T.getArch() == Triple::thumb; CommDirectiveSupportsAlignment = true; @@ -545,7 +577,7 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) { DataSection = Ctx->getCOFFSection( ".data", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); + SectionKind::getData()); ReadOnlySection = Ctx->getCOFFSection( ".rdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); @@ -563,21 +595,20 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) { StaticCtorSection = Ctx->getCOFFSection( ".ctors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); + SectionKind::getData()); StaticDtorSection = Ctx->getCOFFSection( ".dtors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); + SectionKind::getData()); } // FIXME: We're emitting LSDA info into a readonly section on COFF, even // though it contains relocatable pointers. In PIC mode, this is probably a // big runtime hit for C++ apps. Either the contents of the LSDA need to be // adjusted or this should be a data section. - assert(T.isOSWindows() && "Windows is the only supported COFF target"); if (T.getArch() == Triple::x86_64) { // On Windows 64 with SEH, the LSDA is emitted into the .xdata section - LSDASection = 0; + LSDASection = nullptr; } else { LSDASection = Ctx->getCOFFSection(".gcc_except_table", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | @@ -693,6 +724,16 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) { COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata(), "addr_sec"); + DwarfCUIndexSection = Ctx->getCOFFSection( + ".debug_cu_index", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfTUIndexSection = Ctx->getCOFFSection( + ".debug_tu_index", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DwarfAccelNamesSection = Ctx->getCOFFSection( ".apple_names", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | @@ -720,11 +761,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) { PDataSection = Ctx->getCOFFSection( ".pdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, - SectionKind::getDataRel()); + SectionKind::getData()); XDataSection = Ctx->getCOFFSection( ".xdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, - SectionKind::getDataRel()); + SectionKind::getData()); SXDataSection = Ctx->getCOFFSection(".sxdata", COFF::IMAGE_SCN_LNK_INFO, SectionKind::getMetadata()); @@ -732,12 +773,12 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) { TLSDataSection = Ctx->getCOFFSection( ".tls$", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); - + SectionKind::getData()); + StackMapSection = Ctx->getCOFFSection(".llvm_stackmaps", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); + SectionKind::getReadOnly()); } void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, @@ -752,6 +793,7 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, CommDirectiveSupportsAlignment = true; SupportsWeakOmittedEHFrame = true; SupportsCompactUnwindWithoutEHFrame = false; + OmitDwarfIfHaveCompactUnwind = false; PersonalityEncoding = LSDAEncoding = FDECFIEncoding = TTypeEncoding = dwarf::DW_EH_PE_absptr; @@ -767,25 +809,26 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, TT = TheTriple; - Triple::ArchType Arch = TT.getArch(); - // FIXME: Checking for Arch here to filter out bogus triples such as - // cellspu-apple-darwin. Perhaps we should fix in Triple? - if ((Arch == Triple::x86 || Arch == Triple::x86_64 || - Arch == Triple::arm || Arch == Triple::thumb || - Arch == Triple::aarch64 || - Arch == Triple::ppc || Arch == Triple::ppc64 || - Arch == Triple::UnknownArch) && - TT.isOSBinFormatMachO()) { + switch (TT.getObjectFormat()) { + case Triple::MachO: Env = IsMachO; initMachOMCObjectFileInfo(TT); - } else if ((Arch == Triple::x86 || Arch == Triple::x86_64 || - Arch == Triple::arm || Arch == Triple::thumb) && - (TT.isOSWindows() && TT.getObjectFormat() == Triple::COFF)) { + break; + case Triple::COFF: + if (!TT.isOSWindows()) + report_fatal_error( + "Cannot initialize MC for non-Windows COFF object files."); + Env = IsCOFF; initCOFFMCObjectFileInfo(TT); - } else { + break; + case Triple::ELF: Env = IsELF; initELFMCObjectFileInfo(TT); + break; + case Triple::UnknownObjectFormat: + report_fatal_error("Cannot initialize MC for unknown object file format."); + break; } } @@ -799,24 +842,3 @@ MCSection *MCObjectFileInfo::getDwarfTypesSection(uint64_t Hash) const { return Ctx->getELFSection(".debug_types", ELF::SHT_PROGBITS, ELF::SHF_GROUP, 0, utostr(Hash)); } - -void MCObjectFileInfo::InitEHFrameSection() { - if (Env == IsMachO) - EHFrameSection = - Ctx->getMachOSection("__TEXT", "__eh_frame", - MachO::S_COALESCED | - MachO::S_ATTR_NO_TOC | - MachO::S_ATTR_STRIP_STATIC_SYMS | - MachO::S_ATTR_LIVE_SUPPORT, - SectionKind::getReadOnly()); - else if (Env == IsELF) - EHFrameSection = - Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags); - else - EHFrameSection = - Ctx->getCOFFSection(".eh_frame", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); -} diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 0a637775d4ee..d0a7dafa15b8 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -28,7 +28,7 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, MCCodeEmitter *Emitter_) : MCStreamer(Context), Assembler(new MCAssembler(Context, TAB, *Emitter_, - *TAB.createObjectWriter(OS), OS)), + *TAB.createObjectWriter(OS))), EmitEHFrame(true), EmitDebugFrame(false) {} MCObjectStreamer::~MCObjectStreamer() { @@ -39,26 +39,27 @@ MCObjectStreamer::~MCObjectStreamer() { } void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) { - if (PendingLabels.size()) { - if (!F) { - F = new MCDataFragment(); - MCSection *CurSection = getCurrentSectionOnly(); - CurSection->getFragmentList().insert(CurInsertionPoint, F); - F->setParent(CurSection); - } - for (MCSymbol *Sym : PendingLabels) { - Sym->setFragment(F); - Sym->setOffset(FOffset); - } - PendingLabels.clear(); + if (PendingLabels.empty()) + return; + if (!F) { + F = new MCDataFragment(); + MCSection *CurSection = getCurrentSectionOnly(); + CurSection->getFragmentList().insert(CurInsertionPoint, F); + F->setParent(CurSection); } + for (MCSymbol *Sym : PendingLabels) { + Sym->setFragment(F); + Sym->setOffset(FOffset); + } + PendingLabels.clear(); } void MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size) { // If not assigned to the same (valid) fragment, fallback. - if (!Hi->getFragment() || Hi->getFragment() != Lo->getFragment()) { + if (!Hi->getFragment() || Hi->getFragment() != Lo->getFragment() || + Hi->isVariable() || Lo->isVariable()) { MCStreamer::emitAbsoluteSymbolDiff(Hi, Lo, Size); return; } @@ -93,7 +94,7 @@ MCFragment *MCObjectStreamer::getCurrentFragment() const { assert(getCurrentSectionOnly() && "No current section!"); if (CurInsertionPoint != getCurrentSectionOnly()->getFragmentList().begin()) - return std::prev(CurInsertionPoint); + return &*std::prev(CurInsertionPoint); return nullptr; } @@ -121,7 +122,7 @@ void MCObjectStreamer::EmitCFISections(bool EH, bool Debug) { } void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, - const SMLoc &Loc) { + SMLoc Loc) { MCStreamer::EmitValueImpl(Value, Size, Loc); MCDataFragment *DF = getOrCreateDataFragment(); flushPendingLabels(DF, DF->getContents().size()); @@ -155,7 +156,6 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) { MCStreamer::EmitLabel(Symbol); getAssembler().registerSymbol(*Symbol); - assert(!Symbol->getFragment() && "Unexpected fragment on symbol data!"); // If there is a current fragment, mark the symbol as pointing into it. // Otherwise queue the label and set its fragment pointer when we emit the @@ -276,7 +276,6 @@ void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst, raw_svector_ostream VecOS(Code); getAssembler().getEmitter().encodeInstruction(Inst, VecOS, IF->getFixups(), STI); - VecOS.flush(); IF->getContents().append(Code.begin(), Code.end()); } @@ -321,8 +320,10 @@ static const MCExpr *buildSymbolDiff(MCObjectStreamer &OS, const MCSymbol *A, return AddrDelta; } -static void emitDwarfSetLineAddr(MCObjectStreamer &OS, int64_t LineDelta, - const MCSymbol *Label, int PointerSize) { +static void emitDwarfSetLineAddr(MCObjectStreamer &OS, + MCDwarfLineTableParams Params, + int64_t LineDelta, const MCSymbol *Label, + int PointerSize) { // emit the sequence to set the address OS.EmitIntValue(dwarf::DW_LNS_extended_op, 1); OS.EmitULEB128IntValue(PointerSize + 1); @@ -330,7 +331,7 @@ static void emitDwarfSetLineAddr(MCObjectStreamer &OS, int64_t LineDelta, OS.EmitSymbolValue(Label, PointerSize); // emit the sequence for the LineDelta (from 1) and a zero address delta. - MCDwarfLineAddr::Emit(&OS, LineDelta, 0); + MCDwarfLineAddr::Emit(&OS, Params, LineDelta, 0); } void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, @@ -338,13 +339,15 @@ void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *Label, unsigned PointerSize) { if (!LastLabel) { - emitDwarfSetLineAddr(*this, LineDelta, Label, PointerSize); + emitDwarfSetLineAddr(*this, Assembler->getDWARFLinetableParams(), LineDelta, + Label, PointerSize); return; } const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel); int64_t Res; if (AddrDelta->evaluateAsAbsolute(Res, getAssembler())) { - MCDwarfLineAddr::Emit(this, LineDelta, Res); + MCDwarfLineAddr::Emit(this, Assembler->getDWARFLinetableParams(), LineDelta, + Res); return; } insert(new MCDwarfLineAddrFragment(LineDelta, *AddrDelta)); @@ -388,26 +391,9 @@ void MCObjectStreamer::EmitCodeAlignment(unsigned ByteAlignment, cast(getCurrentFragment())->setEmitNops(true); } -bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset, +void MCObjectStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value) { - int64_t Res; - if (Offset->evaluateAsAbsolute(Res, getAssembler())) { - insert(new MCOrgFragment(*Offset, Value)); - return false; - } - - MCSymbol *CurrentPos = getContext().createTempSymbol(); - EmitLabel(CurrentPos); - MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; - const MCExpr *Ref = - MCSymbolRefExpr::create(CurrentPos, Variant, getContext()); - const MCExpr *Delta = - MCBinaryExpr::create(MCBinaryExpr::Sub, Offset, Ref, getContext()); - - if (!Delta->evaluateAsAbsolute(Res, getAssembler())) - return true; - EmitFill(Res, Value); - return false; + insert(new MCOrgFragment(*Offset, Value)); } // Associate GPRel32 fixup with data and resize data area @@ -430,19 +416,31 @@ void MCObjectStreamer::EmitGPRel64Value(const MCExpr *Value) { DF->getContents().resize(DF->getContents().size() + 8, 0); } -void MCObjectStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue) { - // FIXME: A MCFillFragment would be more memory efficient but MCExpr has - // problems evaluating expressions across multiple fragments. +bool MCObjectStreamer::EmitRelocDirective(const MCExpr &Offset, StringRef Name, + const MCExpr *Expr, SMLoc Loc) { + int64_t OffsetValue; + if (!Offset.evaluateAsAbsolute(OffsetValue)) + llvm_unreachable("Offset is not absolute"); + MCDataFragment *DF = getOrCreateDataFragment(); flushPendingLabels(DF, DF->getContents().size()); - DF->getContents().append(NumBytes, FillValue); + + MCFixupKind Kind; + if (!Assembler->getBackend().getFixupKind(Name, Kind)) + return true; + + if (Expr == nullptr) + Expr = + MCSymbolRefExpr::create(getContext().createTempSymbol(), getContext()); + DF->getFixups().push_back(MCFixup::create(OffsetValue, Expr, Kind, Loc)); + return false; } -void MCObjectStreamer::EmitZeros(uint64_t NumBytes) { +void MCObjectStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue) { const MCSection *Sec = getCurrentSection().first; assert(Sec && "need a section"); unsigned ItemSize = Sec->isVirtualSection() ? 0 : 1; - insert(new MCFillFragment(0, ItemSize, NumBytes)); + insert(new MCFillFragment(FillValue, ItemSize, NumBytes)); } void MCObjectStreamer::FinishImpl() { @@ -451,7 +449,7 @@ void MCObjectStreamer::FinishImpl() { MCGenDwarfInfo::Emit(this); // Dump out the dwarf file & directory tables and line tables. - MCDwarfLineTable::Emit(this); + MCDwarfLineTable::Emit(this, getAssembler().getDWARFLinetableParams()); flushPendingLabels(nullptr); getAssembler().Finish(); diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp index 347903408737..e84f74ae81d6 100644 --- a/lib/MC/MCObjectWriter.cpp +++ b/lib/MC/MCObjectWriter.cpp @@ -33,8 +33,14 @@ bool MCObjectWriter::isSymbolRefDifferenceFullyResolved( if (!SA.getFragment() || !SB.getFragment()) return false; - return isSymbolRefDifferenceFullyResolvedImpl(Asm, SA, *SB.getFragment(), - InSet, false); + return isSymbolRefDifferenceFullyResolvedImpl(Asm, SA, SB, InSet); +} + +bool MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( + const MCAssembler &Asm, const MCSymbol &A, const MCSymbol &B, + bool InSet) const { + return isSymbolRefDifferenceFullyResolvedImpl(Asm, A, *B.getFragment(), InSet, + false); } bool MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index b983d9995f4d..36c192026856 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -436,7 +436,8 @@ StringRef AsmLexer::LexUntilEndOfLine() { return StringRef(TokStart, CurPtr-TokStart); } -const AsmToken AsmLexer::peekTok(bool ShouldSkipSpace) { +size_t AsmLexer::peekTokens(MutableArrayRef Buf, + bool ShouldSkipSpace) { const char *SavedTokStart = TokStart; const char *SavedCurPtr = CurPtr; bool SavedAtStartOfLine = isAtStartOfLine; @@ -446,7 +447,16 @@ const AsmToken AsmLexer::peekTok(bool ShouldSkipSpace) { SMLoc SavedErrLoc = getErrLoc(); SkipSpace = ShouldSkipSpace; - AsmToken Token = LexToken(); + + size_t ReadCount; + for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) { + AsmToken Token = LexToken(); + + Buf[ReadCount] = Token; + + if (Token.is(AsmToken::Eof)) + break; + } SetError(SavedErrLoc, SavedErr); @@ -455,7 +465,7 @@ const AsmToken AsmLexer::peekTok(bool ShouldSkipSpace) { CurPtr = SavedCurPtr; TokStart = SavedTokStart; - return Token; + return ReadCount; } bool AsmLexer::isAtStartOfComment(const char *Ptr) { diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 04d141389c92..646cbb43cae8 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -33,6 +33,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -251,14 +252,14 @@ private: bool parseStatement(ParseStatementInfo &Info, MCAsmParserSemaCallback *SI); void eatToEndOfLine(); - bool parseCppHashLineFilenameComment(const SMLoc &L); + bool parseCppHashLineFilenameComment(SMLoc L); void checkForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body, ArrayRef Parameters); bool expandMacro(raw_svector_ostream &OS, StringRef Body, ArrayRef Parameters, ArrayRef A, bool EnableAtPseudoVariable, - const SMLoc &L); + SMLoc L); /// \brief Are macros enabled in the parser? bool areMacrosEnabled() {return MacrosEnabledFlag;} @@ -342,6 +343,7 @@ private: enum DirectiveKind { DK_NO_DIRECTIVE, // Placeholder DK_SET, DK_EQU, DK_EQUIV, DK_ASCII, DK_ASCIZ, DK_STRING, DK_BYTE, DK_SHORT, + DK_RELOC, DK_VALUE, DK_2BYTE, DK_LONG, DK_INT, DK_4BYTE, DK_QUAD, DK_8BYTE, DK_OCTA, DK_SINGLE, DK_FLOAT, DK_DOUBLE, DK_ALIGN, DK_ALIGN32, DK_BALIGN, DK_BALIGNW, DK_BALIGNL, DK_P2ALIGN, DK_P2ALIGNW, DK_P2ALIGNL, DK_ORG, DK_FILL, DK_ENDR, @@ -374,6 +376,7 @@ private: // ".ascii", ".asciz", ".string" bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated); + bool parseDirectiveReloc(SMLoc DirectiveLoc); // ".reloc" bool parseDirectiveValue(unsigned Size); // ".byte", ".long", ... bool parseDirectiveOctaValue(); // ".octa" bool parseDirectiveRealValue(const fltSemantics &); // ".single", ... @@ -553,6 +556,8 @@ void AsmParser::Note(SMLoc L, const Twine &Msg, ArrayRef Ranges) { } bool AsmParser::Warning(SMLoc L, const Twine &Msg, ArrayRef Ranges) { + if(getTargetParser().getTargetOptions().MCNoWarn) + return false; if (getTargetParser().getTargetOptions().MCFatalWarnings) return Error(L, Msg, Ranges); printMessage(L, SourceMgr::DK_Warning, Msg, Ranges); @@ -679,11 +684,8 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // so conservatively exclude them. Only do this if we're finalizing, though, // as otherwise we won't necessarilly have seen everything yet. if (!NoFinalize && MAI.hasSubsectionsViaSymbols()) { - const MCContext::SymbolTable &Symbols = getContext().getSymbols(); - for (MCContext::SymbolTable::const_iterator i = Symbols.begin(), - e = Symbols.end(); - i != e; ++i) { - MCSymbol *Sym = i->getValue(); + for (const auto &TableEntry : getContext().getSymbols()) { + MCSymbol *Sym = TableEntry.getValue(); // Variable symbols may not be marked as defined, so check those // explicitly. If we know it's a variable, we have a definition for // the purposes of this check. @@ -691,9 +693,8 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // FIXME: We would really like to refer back to where the symbol was // first referenced for a source location. We need to add something // to track that. Currently, we just point to the end of the file. - printMessage( - getLexer().getLoc(), SourceMgr::DK_Error, - "assembler local symbol '" + Sym->getName() + "' not defined"); + return Error(getLexer().getLoc(), "assembler local symbol '" + + Sym->getName() + "' not defined"); } } @@ -702,7 +703,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { if (!HadError && !NoFinalize) Out.Finish(); - return HadError; + return HadError || getContext().hadError(); } void AsmParser::checkForValidSection() { @@ -865,11 +866,12 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { // If this is an absolute variable reference, substitute it now to preserve // semantics in the face of reassignment. - if (Sym->isVariable() && isa(Sym->getVariableValue())) { + if (Sym->isVariable() && + isa(Sym->getVariableValue(/*SetUsed*/ false))) { if (Variant) return Error(EndLoc, "unexpected modifier on variable reference"); - Res = Sym->getVariableValue(); + Res = Sym->getVariableValue(/*SetUsed*/ false); return false; } @@ -1102,8 +1104,9 @@ bool AsmParser::parseAbsoluteExpression(int64_t &Res) { return false; } -unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K, - MCBinaryExpr::Opcode &Kind) { +static unsigned getDarwinBinOpPrecedence(AsmToken::TokenKind K, + MCBinaryExpr::Opcode &Kind, + bool ShouldUseLogicalShr) { switch (K) { default: return 0; // not a binop. @@ -1155,7 +1158,7 @@ unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K, Kind = MCBinaryExpr::Shl; return 4; case AsmToken::GreaterGreater: - Kind = MAI.shouldUseLogicalShr() ? MCBinaryExpr::LShr : MCBinaryExpr::AShr; + Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr; return 4; // High Intermediate Precedence: +, - @@ -1179,6 +1182,89 @@ unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K, } } +static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K, + MCBinaryExpr::Opcode &Kind, + bool ShouldUseLogicalShr) { + switch (K) { + default: + return 0; // not a binop. + + // Lowest Precedence: &&, || + case AsmToken::AmpAmp: + Kind = MCBinaryExpr::LAnd; + return 2; + case AsmToken::PipePipe: + Kind = MCBinaryExpr::LOr; + return 1; + + // Low Precedence: ==, !=, <>, <, <=, >, >= + case AsmToken::EqualEqual: + Kind = MCBinaryExpr::EQ; + return 3; + case AsmToken::ExclaimEqual: + case AsmToken::LessGreater: + Kind = MCBinaryExpr::NE; + return 3; + case AsmToken::Less: + Kind = MCBinaryExpr::LT; + return 3; + case AsmToken::LessEqual: + Kind = MCBinaryExpr::LTE; + return 3; + case AsmToken::Greater: + Kind = MCBinaryExpr::GT; + return 3; + case AsmToken::GreaterEqual: + Kind = MCBinaryExpr::GTE; + return 3; + + // Low Intermediate Precedence: +, - + case AsmToken::Plus: + Kind = MCBinaryExpr::Add; + return 4; + case AsmToken::Minus: + Kind = MCBinaryExpr::Sub; + return 4; + + // High Intermediate Precedence: |, &, ^ + // + // FIXME: gas seems to support '!' as an infix operator? + case AsmToken::Pipe: + Kind = MCBinaryExpr::Or; + return 5; + case AsmToken::Caret: + Kind = MCBinaryExpr::Xor; + return 5; + case AsmToken::Amp: + Kind = MCBinaryExpr::And; + return 5; + + // Highest Precedence: *, /, %, <<, >> + case AsmToken::Star: + Kind = MCBinaryExpr::Mul; + return 6; + case AsmToken::Slash: + Kind = MCBinaryExpr::Div; + return 6; + case AsmToken::Percent: + Kind = MCBinaryExpr::Mod; + return 6; + case AsmToken::LessLess: + Kind = MCBinaryExpr::Shl; + return 6; + case AsmToken::GreaterGreater: + Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr; + return 6; + } +} + +unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K, + MCBinaryExpr::Opcode &Kind) { + bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr(); + return IsDarwin ? getDarwinBinOpPrecedence(K, Kind, ShouldUseLogicalShr) + : getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr); +} + /// \brief Parse all binary operators with precedence >= 'Precedence'. /// Res contains the LHS of the expression on input. bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, @@ -1251,6 +1337,15 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, // Treat '.' as a valid identifier in this context. Lex(); IDVal = "."; + } else if (Lexer.is(AsmToken::LCurly)) { + // Treat '{' as a valid identifier in this context. + Lex(); + IDVal = "{"; + + } else if (Lexer.is(AsmToken::RCurly)) { + // Treat '}' as a valid identifier in this context. + Lex(); + IDVal = "}"; } else if (parseIdentifier(IDVal)) { if (!TheCondState.Ignore) return TokError("unexpected token at start of statement"); @@ -1313,6 +1408,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, // See what kind of statement we have. switch (Lexer.getKind()) { case AsmToken::Colon: { + if (!getTargetParser().isLabel(ID)) + break; checkForValidSection(); // identifier ':' -> Label. @@ -1334,8 +1431,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true); assert(RewrittenLabel.size() && "We should have an internal name here."); - Info.AsmRewrites->push_back(AsmRewrite(AOK_Label, IDLoc, - IDVal.size(), RewrittenLabel)); + Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(), + RewrittenLabel); IDVal = RewrittenLabel; } Sym = getContext().getOrCreateSymbol(IDVal); @@ -1371,6 +1468,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, } case AsmToken::Equal: + if (!getTargetParser().equalIsAsmAssignment()) + break; // identifier '=' ... -> assignment statement Lex(); @@ -1599,6 +1698,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveError(IDLoc, true); case DK_WARNING: return parseDirectiveWarning(IDLoc); + case DK_RELOC: + return parseDirectiveReloc(IDLoc); } return Error(IDLoc, "unknown directive"); @@ -1613,12 +1714,14 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, if (ParsingInlineAsm && (IDVal == "align" || IDVal == "ALIGN")) return parseDirectiveMSAlign(IDLoc, Info); + if (ParsingInlineAsm && (IDVal == "even")) + Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4); checkForValidSection(); // Canonicalize the opcode to lower case. std::string OpcodeStr = IDVal.lower(); ParseInstructionInfo IInfo(Info.AsmRewrites); - bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, IDLoc, + bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID, Info.ParsedOperands); Info.ParseError = HadError; @@ -1703,7 +1806,7 @@ void AsmParser::eatToEndOfLine() { /// parseCppHashLineFilenameComment as this: /// ::= # number "filename" /// or just as a full line comment if it doesn't have a number and a string. -bool AsmParser::parseCppHashLineFilenameComment(const SMLoc &L) { +bool AsmParser::parseCppHashLineFilenameComment(SMLoc L) { Lex(); // Eat the hash token. if (getLexer().isNot(AsmToken::Integer)) { @@ -1743,7 +1846,7 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { raw_ostream &OS = errs(); const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr(); - const SMLoc &DiagLoc = Diag.getLoc(); + SMLoc DiagLoc = Diag.getLoc(); unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc); unsigned CppHashBuf = Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashLoc); @@ -1802,7 +1905,7 @@ static bool isIdentifierChar(char c) { bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, ArrayRef Parameters, ArrayRef A, - bool EnableAtPseudoVariable, const SMLoc &L) { + bool EnableAtPseudoVariable, SMLoc L) { unsigned NParameters = Parameters.size(); bool HasVararg = NParameters ? Parameters.back().Vararg : false; if ((!IsDarwin || NParameters != 0) && NParameters != A.size()) @@ -1858,10 +1961,8 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, break; // Otherwise substitute with the token values, with spaces eliminated. - for (MCAsmMacroArgument::const_iterator it = A[Index].begin(), - ie = A[Index].end(); - it != ie; ++it) - OS << it->getString(); + for (const AsmToken &Token : A[Index]) + OS << Token.getString(); break; } } @@ -1897,15 +1998,13 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, } } else { bool VarargParameter = HasVararg && Index == (NParameters - 1); - for (MCAsmMacroArgument::const_iterator it = A[Index].begin(), - ie = A[Index].end(); - it != ie; ++it) + for (const AsmToken &Token : A[Index]) // We expect no quotes around the string's contents when // parsing for varargs. - if (it->getKind() != AsmToken::String || VarargParameter) - OS << it->getString(); + if (Token.getKind() != AsmToken::String || VarargParameter) + OS << Token.getString(); else - OS << it->getStringContents(); + OS << Token.getStringContents(); Pos += 1 + Argument.size(); } @@ -2371,6 +2470,51 @@ bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) { return false; } +/// parseDirectiveReloc +/// ::= .reloc expression , identifier [ , expression ] +bool AsmParser::parseDirectiveReloc(SMLoc DirectiveLoc) { + const MCExpr *Offset; + const MCExpr *Expr = nullptr; + + SMLoc OffsetLoc = Lexer.getTok().getLoc(); + if (parseExpression(Offset)) + return true; + + // We can only deal with constant expressions at the moment. + int64_t OffsetValue; + if (!Offset->evaluateAsAbsolute(OffsetValue)) + return Error(OffsetLoc, "expression is not a constant value"); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("expected comma"); + Lexer.Lex(); + + if (Lexer.isNot(AsmToken::Identifier)) + return TokError("expected relocation name"); + SMLoc NameLoc = Lexer.getTok().getLoc(); + StringRef Name = Lexer.getTok().getIdentifier(); + Lexer.Lex(); + + if (Lexer.is(AsmToken::Comma)) { + Lexer.Lex(); + SMLoc ExprLoc = Lexer.getLoc(); + if (parseExpression(Expr)) + return true; + + MCValue Value; + if (!Expr->evaluateAsRelocatable(Value, nullptr, nullptr)) + return Error(ExprLoc, "expression must be relocatable"); + } + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in .reloc directive"); + + if (getStreamer().EmitRelocDirective(*Offset, Name, Expr, DirectiveLoc)) + return Error(NameLoc, "unknown relocation name"); + + return false; +} + /// parseDirectiveValue /// ::= (.byte | .short | ... ) [ expression (, expression)* ] bool AsmParser::parseDirectiveValue(unsigned Size) { @@ -2617,7 +2761,6 @@ bool AsmParser::parseDirectiveOrg() { checkForValidSection(); const MCExpr *Offset; - SMLoc Loc = getTok().getLoc(); if (parseExpression(Offset)) return true; @@ -2636,13 +2779,7 @@ bool AsmParser::parseDirectiveOrg() { } Lex(); - - // Only limited forms of relocatable expressions are accepted here, it - // has to be relative to the current section. The streamer will return - // 'true' if the expression wasn't evaluatable. - if (getStreamer().EmitValueToOffset(Offset, FillExpr)) - return Error(Loc, "expected assembly-time absolute expression"); - + getStreamer().emitValueToOffset(Offset, FillExpr); return false; } @@ -2703,7 +2840,11 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) { Alignment = 1ULL << Alignment; } else { - // Reject alignments that aren't a power of two, for gas compatibility. + // Reject alignments that aren't either a power of two or zero, + // for gas compatibility. Alignment of zero is silently rounded + // up to one. + if (Alignment == 0) + Alignment = 1; if (!isPowerOf2_64(Alignment)) Error(AlignmentLoc, "alignment must be a power of 2"); } @@ -4269,6 +4410,7 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".err"] = DK_ERR; DirectiveKindMap[".error"] = DK_ERROR; DirectiveKindMap[".warning"] = DK_WARNING; + DirectiveKindMap[".reloc"] = DK_RELOC; } MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) { @@ -4405,10 +4547,10 @@ bool AsmParser::parseDirectiveIrp(SMLoc DirectiveLoc) { SmallString<256> Buf; raw_svector_ostream OS(Buf); - for (MCAsmMacroArguments::iterator i = A.begin(), e = A.end(); i != e; ++i) { + for (const MCAsmMacroArgument &Arg : A) { // Note that the AtPseudoVariable is enabled for instantiations of .irp. // This is undocumented, but GAS seems to support it. - if (expandMacro(OS, M->Body, Parameter, *i, true, getTok().getLoc())) + if (expandMacro(OS, M->Body, Parameter, Arg, true, getTok().getLoc())) return true; } @@ -4488,10 +4630,10 @@ bool AsmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info, if (!MCE) return Error(ExprLoc, "unexpected expression in _emit"); uint64_t IntValue = MCE->getValue(); - if (!isUIntN(8, IntValue) && !isIntN(8, IntValue)) + if (!isUInt<8>(IntValue) && !isInt<8>(IntValue)) return Error(ExprLoc, "literal value out of range for directive"); - Info.AsmRewrites->push_back(AsmRewrite(AOK_Emit, IDLoc, Len)); + Info.AsmRewrites->emplace_back(AOK_Emit, IDLoc, Len); return false; } @@ -4507,8 +4649,7 @@ bool AsmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) { if (!isPowerOf2_64(IntValue)) return Error(ExprLoc, "literal value not a power of two greater then zero"); - Info.AsmRewrites->push_back( - AsmRewrite(AOK_Align, IDLoc, 5, Log2_64(IntValue))); + Info.AsmRewrites->emplace_back(AOK_Align, IDLoc, 5, Log2_64(IntValue)); return false; } @@ -4604,18 +4745,18 @@ bool AsmParser::parseMSInlineAsm( OutputDecls.push_back(OpDecl); OutputDeclsAddressOf.push_back(Operand.needAddressOf()); OutputConstraints.push_back(("=" + Operand.getConstraint()).str()); - AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Start, SymName.size())); + AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size()); } else { InputDecls.push_back(OpDecl); InputDeclsAddressOf.push_back(Operand.needAddressOf()); InputConstraints.push_back(Operand.getConstraint().str()); - AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Start, SymName.size())); + AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size()); } } // Consider implicit defs to be clobbers. Think of cpuid and push. - ArrayRef ImpDefs(Desc.getImplicitDefs(), - Desc.getNumImplicitDefs()); + ArrayRef ImpDefs(Desc.getImplicitDefs(), + Desc.getNumImplicitDefs()); ClobberRegs.insert(ClobberRegs.end(), ImpDefs.begin(), ImpDefs.end()); } @@ -4710,14 +4851,23 @@ bool AsmParser::parseMSInlineAsm( OS << ".byte"; break; case AOK_Align: { - unsigned Val = AR.Val; - OS << ".align " << Val; + // MS alignment directives are measured in bytes. If the native assembler + // measures alignment in bytes, we can pass it straight through. + OS << ".align"; + if (getContext().getAsmInfo()->getAlignmentIsInBytes()) + break; - // Skip the original immediate. + // Alignment is in log2 form, so print that instead and skip the original + // immediate. + unsigned Val = AR.Val; + OS << ' ' << Val; assert(Val < 10 && "Expected alignment less then 2^10."); AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4; break; } + case AOK_EVEN: + OS << ".even"; + break; case AOK_DotOperator: // Insert the dot if the user omitted it. OS.flush(); @@ -4803,7 +4953,8 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef, // FIXME: Diagnose assignment to protected identifier (e.g., register name). if (isSymbolUsedInExpression(Sym, Value)) return Parser.Error(EqualLoc, "Recursive use of '" + Name + "'"); - else if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable()) + else if (Sym->isUndefined(/*SetUsed*/ false) && !Sym->isUsed() && + !Sym->isVariable()) ; // Allow redefinitions of undefined symbols only used in directives. else if (Sym->isVariable() && !Sym->isUsed() && allow_redef) ; // Allow redefinitions of variables that haven't yet been used. @@ -4815,15 +4966,8 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef, return Parser.Error(EqualLoc, "invalid reassignment of non-absolute variable '" + Name + "'"); - - // Don't count these checks as uses. - Sym->setUsed(false); } else if (Name == ".") { - if (Parser.getStreamer().EmitValueToOffset(Value, 0)) { - Parser.Error(EqualLoc, "expected absolute expression"); - Parser.eatToEndOfStatement(); - return true; - } + Parser.getStreamer().emitValueToOffset(Value, 0); return false; } else Sym = Parser.getContext().getOrCreateSymbol(Name); diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp index f09bce005d6a..a4b2b195f710 100644 --- a/lib/MC/MCParser/COFFAsmParser.cpp +++ b/lib/MC/MCParser/COFFAsmParser.cpp @@ -98,11 +98,10 @@ class COFFAsmParser : public MCAsmParserExtension { SectionKind::getText()); } bool ParseSectionDirectiveData(StringRef, SMLoc) { - return ParseSectionSwitch(".data", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA - | COFF::IMAGE_SCN_MEM_READ - | COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); + return ParseSectionSwitch(".data", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getData()); } bool ParseSectionDirectiveBSS(StringRef, SMLoc) { return ParseSectionSwitch(".bss", @@ -153,7 +152,7 @@ static SectionKind computeSectionKind(unsigned Flags) { if (Flags & COFF::IMAGE_SCN_MEM_READ && (Flags & COFF::IMAGE_SCN_MEM_WRITE) == 0) return SectionKind::getReadOnly(); - return SectionKind::getDataRel(); + return SectionKind::getData(); } bool COFFAsmParser::ParseSectionFlags(StringRef FlagsString, unsigned* Flags) { diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index dc664e8a8f61..73e068a34391 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -8,10 +8,13 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCSectionMachO.h" @@ -38,6 +41,8 @@ class DarwinAsmParser : public MCAsmParserExtension { unsigned TAA = 0, unsigned ImplicitAlign = 0, unsigned StubSize = 0); + SMLoc LastVersionMinDirective; + public: DarwinAsmParser() {} @@ -164,9 +169,14 @@ public: addDirectiveHandler<&DarwinAsmParser::parseSectionDirectiveTLV>(".tlv"); addDirectiveHandler<&DarwinAsmParser::parseSectionDirectiveIdent>(".ident"); + addDirectiveHandler<&DarwinAsmParser::parseVersionMin>( + ".watchos_version_min"); + addDirectiveHandler<&DarwinAsmParser::parseVersionMin>(".tvos_version_min"); addDirectiveHandler<&DarwinAsmParser::parseVersionMin>(".ios_version_min"); addDirectiveHandler<&DarwinAsmParser::parseVersionMin>( ".macosx_version_min"); + + LastVersionMinDirective = SMLoc(); } bool parseDirectiveDesc(StringRef, SMLoc); @@ -381,9 +391,8 @@ bool DarwinAsmParser::parseSectionSwitch(const char *Segment, // FIXME: Arch specific. bool isText = TAA & MachO::S_ATTR_PURE_INSTRUCTIONS; getStreamer().SwitchSection(getContext().getMachOSection( - Segment, Section, TAA, StubSize, - isText ? SectionKind::getText() - : SectionKind::getDataRel())); + Segment, Section, TAA, StubSize, + isText ? SectionKind::getText() : SectionKind::getData())); // Set the implicit alignment, if any. // @@ -579,12 +588,34 @@ bool DarwinAsmParser::parseDirectiveSection(StringRef, SMLoc) { if (!ErrorStr.empty()) return Error(Loc, ErrorStr.c_str()); + // Issue a warning if the target is not powerpc and Section is a *coal* section. + Triple TT = getParser().getContext().getObjectFileInfo()->getTargetTriple(); + Triple::ArchType ArchTy = TT.getArch(); + + if (ArchTy != Triple::ppc && ArchTy != Triple::ppc64) { + StringRef NonCoalSection = StringSwitch(Section) + .Case("__textcoal_nt", "__text") + .Case("__const_coal", "__const") + .Case("__datacoal_nt", "__data") + .Default(Section); + + if (!Section.equals(NonCoalSection)) { + StringRef SectionVal(Loc.getPointer()); + size_t B = SectionVal.find(',') + 1, E = SectionVal.find(',', B); + SMLoc BLoc = SMLoc::getFromPointer(SectionVal.data() + B); + SMLoc ELoc = SMLoc::getFromPointer(SectionVal.data() + E); + getParser().Warning(Loc, "section \"" + Section + "\" is deprecated", + SMRange(BLoc, ELoc)); + getParser().Note(Loc, "change section name to \"" + NonCoalSection + + "\"", SMRange(BLoc, ELoc)); + } + } + // FIXME: Arch specific. bool isText = Segment == "__TEXT"; // FIXME: Hack. getStreamer().SwitchSection(getContext().getMachOSection( - Segment, Section, TAA, StubSize, - isText ? SectionKind::getText() - : SectionKind::getDataRel())); + Segment, Section, TAA, StubSize, + isText ? SectionKind::getText() : SectionKind::getData())); return false; } @@ -636,17 +667,16 @@ bool DarwinAsmParser::parseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) { "environment variable unset."); // Open the secure log file if we haven't already. - raw_ostream *OS = getContext().getSecureLog(); + raw_fd_ostream *OS = getContext().getSecureLog(); if (!OS) { std::error_code EC; - OS = new raw_fd_ostream(SecureLogFile, EC, - sys::fs::F_Append | sys::fs::F_Text); - if (EC) { - delete OS; + auto NewOS = llvm::make_unique( + SecureLogFile, EC, sys::fs::F_Append | sys::fs::F_Text); + if (EC) return Error(IDLoc, Twine("can't open secure log file: ") + SecureLogFile + " (" + EC.message() + ")"); - } - getContext().setSecureLog(OS); + OS = NewOS.get(); + getContext().setSecureLog(std::move(NewOS)); } // Write the message. @@ -867,9 +897,11 @@ bool DarwinAsmParser::parseDirectiveDataRegionEnd(StringRef, SMLoc) { /// parseVersionMin /// ::= .ios_version_min major,minor[,update] /// ::= .macosx_version_min major,minor[,update] -bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc) { +bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc Loc) { int64_t Major = 0, Minor = 0, Update = 0; int Kind = StringSwitch(Directive) + .Case(".watchos_version_min", MCVM_WatchOSVersionMin) + .Case(".tvos_version_min", MCVM_TvOSVersionMin) .Case(".ios_version_min", MCVM_IOSVersionMin) .Case(".macosx_version_min", MCVM_OSXVersionMin); // Get the major version number. @@ -902,6 +934,24 @@ bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc) { Lex(); } + const Triple &T = getContext().getObjectFileInfo()->getTargetTriple(); + Triple::OSType ExpectedOS = Triple::UnknownOS; + switch ((MCVersionMinType)Kind) { + case MCVM_WatchOSVersionMin: ExpectedOS = Triple::WatchOS; break; + case MCVM_TvOSVersionMin: ExpectedOS = Triple::TvOS; break; + case MCVM_IOSVersionMin: ExpectedOS = Triple::IOS; break; + case MCVM_OSXVersionMin: ExpectedOS = Triple::MacOSX; break; + } + if (T.getOS() != ExpectedOS) + Warning(Loc, Directive + " should only be used for " + + Triple::getOSTypeName(ExpectedOS) + " targets"); + + if (LastVersionMinDirective.isValid()) { + Warning(Loc, "overriding previous version_min directive"); + Note(LastVersionMinDirective, "previous definition is here"); + } + LastVersionMinDirective = Loc; + // We've parsed a correct version specifier, so send it to the streamer. getStreamer().EmitVersionMin((MCVersionMinType)Kind, Major, Minor, Update); diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index 5f8a6039afd0..6cbcdec5e275 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -51,8 +51,6 @@ public: &ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel"); addDirectiveHandler< &ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro"); - addDirectiveHandler< - &ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local"); addDirectiveHandler< &ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame"); addDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section"); @@ -81,8 +79,8 @@ public: // the best way for us to get access to it? bool ParseSectionDirectiveData(StringRef, SMLoc) { return ParseSectionSwitch(".data", ELF::SHT_PROGBITS, - ELF::SHF_WRITE |ELF::SHF_ALLOC, - SectionKind::getDataRel()); + ELF::SHF_WRITE | ELF::SHF_ALLOC, + SectionKind::getData()); } bool ParseSectionDirectiveText(StringRef, SMLoc) { return ParseSectionSwitch(".text", ELF::SHT_PROGBITS, @@ -113,9 +111,8 @@ public: } bool ParseSectionDirectiveDataRel(StringRef, SMLoc) { return ParseSectionSwitch(".data.rel", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | - ELF::SHF_WRITE, - SectionKind::getDataRel()); + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getData()); } bool ParseSectionDirectiveDataRelRo(StringRef, SMLoc) { return ParseSectionSwitch(".data.rel.ro", ELF::SHT_PROGBITS, @@ -123,17 +120,10 @@ public: ELF::SHF_WRITE, SectionKind::getReadOnlyWithRel()); } - bool ParseSectionDirectiveDataRelRoLocal(StringRef, SMLoc) { - return ParseSectionSwitch(".data.rel.ro.local", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | - ELF::SHF_WRITE, - SectionKind::getReadOnlyWithRelLocal()); - } bool ParseSectionDirectiveEhFrame(StringRef, SMLoc) { return ParseSectionSwitch(".eh_frame", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | - ELF::SHF_WRITE, - SectionKind::getDataRel()); + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getData()); } bool ParseDirectivePushSection(StringRef, SMLoc); bool ParseDirectivePopSection(StringRef, SMLoc); diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp index 795cc85ef547..e891bd2c6240 100644 --- a/lib/MC/MCParser/MCAsmLexer.cpp +++ b/lib/MC/MCParser/MCAsmLexer.cpp @@ -12,8 +12,8 @@ using namespace llvm; -MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()), - TokStart(nullptr), SkipSpace(true) { +MCAsmLexer::MCAsmLexer() : TokStart(nullptr), SkipSpace(true) { + CurTok.emplace_back(AsmToken::Error, StringRef()); } MCAsmLexer::~MCAsmLexer() { diff --git a/lib/MC/MCParser/MCTargetAsmParser.cpp b/lib/MC/MCParser/MCTargetAsmParser.cpp index 60a3a3b59a3d..4e4b47805cd8 100644 --- a/lib/MC/MCParser/MCTargetAsmParser.cpp +++ b/lib/MC/MCParser/MCTargetAsmParser.cpp @@ -7,13 +7,26 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCTargetAsmParser.h" using namespace llvm; -MCTargetAsmParser::MCTargetAsmParser() - : AvailableFeatures(0), ParsingInlineAsm(false) +MCTargetAsmParser::MCTargetAsmParser(MCTargetOptions const &MCOptions, + const MCSubtargetInfo &STI) + : AvailableFeatures(0), ParsingInlineAsm(false), MCOptions(MCOptions), + STI(&STI) { } MCTargetAsmParser::~MCTargetAsmParser() { } + +MCSubtargetInfo &MCTargetAsmParser::copySTI() { + MCSubtargetInfo &STICopy = getContext().getSubtargetCopy(getSTI()); + STI = &STICopy; + return STICopy; +} + +const MCSubtargetInfo &MCTargetAsmParser::getSTI() const { + return *STI; +} diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp index 9152f2b42a48..dbd544a44ce3 100644 --- a/lib/MC/MCSection.cpp +++ b/lib/MC/MCSection.cpp @@ -21,7 +21,7 @@ using namespace llvm; MCSection::MCSection(SectionVariant V, SectionKind K, MCSymbol *Begin) : Begin(Begin), BundleGroupBeforeFirstInst(false), HasInstructions(false), - IsRegistered(false), Variant(V), Kind(K) {} + IsRegistered(false), DummyFragment(this), Variant(V), Kind(K) {} MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) { if (!End) @@ -72,7 +72,7 @@ MCSection::getSubsectionInsertionPoint(unsigned Subsection) { if (MI == SubsectionFragmentMap.end()) IP = end(); else - IP = MI->second; + IP = MI->second->getIterator(); if (!ExactMatch && Subsection != 0) { // The GNU as documentation claims that subsections have an alignment of 4, // although this appears not to be the case. diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp index ce0b4f5fb411..b8373f40b8be 100644 --- a/lib/MC/MCSectionCOFF.cpp +++ b/lib/MC/MCSectionCOFF.cpp @@ -11,6 +11,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/COFF.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp index b4448d79a2d5..5a0bb7fe986f 100644 --- a/lib/MC/MCSectionELF.cpp +++ b/lib/MC/MCSectionELF.cpp @@ -27,12 +27,7 @@ bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name, if (isUnique()) return false; - // FIXME: Does .section .bss/.data/.text work everywhere?? - if (Name == ".text" || Name == ".data" || - (Name == ".bss" && !MAI.usesELFSectionDirectiveForBSS())) - return true; - - return false; + return MAI.shouldOmitSectionDirective(Name); } static void printName(raw_ostream &OS, StringRef Name) { @@ -138,6 +133,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, OS << "note"; else if (Type == ELF::SHT_PROGBITS) OS << "progbits"; + else if (Type == ELF::SHT_X86_64_UNWIND) + OS << "unwind"; if (EntrySize) { assert(Flags & ELF::SHF_MERGE); diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp index c9f15914e4b1..879c6e5ff932 100644 --- a/lib/MC/MCSectionMachO.cpp +++ b/lib/MC/MCSectionMachO.cpp @@ -177,7 +177,7 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In. TAAParsed = false; SmallVector SplitSpec; - Spec.split(SplitSpec, ","); + Spec.split(SplitSpec, ','); // Remove leading and trailing whitespace. auto GetEmptyOrTrim = [&SplitSpec](size_t Idx) -> StringRef { return SplitSpec.size() > Idx ? SplitSpec[Idx].trim() : StringRef(); @@ -235,7 +235,7 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In. // The attribute list is a '+' separated list of attributes. SmallVector SectionAttrs; - Attrs.split(SectionAttrs, "+", /*MaxSplit=*/-1, /*KeepEmpty=*/false); + Attrs.split(SectionAttrs, '+', /*MaxSplit=*/-1, /*KeepEmpty=*/false); for (StringRef &SectionAttr : SectionAttrs) { auto AttrDescriptorI = std::find_if( diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 7fbbbd95b560..836b40544642 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -107,8 +107,7 @@ void MCStreamer::EmitSLEB128IntValue(int64_t Value) { EmitBytes(OSE.str()); } -void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size, - const SMLoc &Loc) { +void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size, SMLoc Loc) { EmitValueImpl(Value, Size, Loc); } @@ -189,11 +188,9 @@ void MCStreamer::InitSections(bool NoExecStack) { SwitchSection(getContext().getObjectFileInfo()->getTextSection()); } -void MCStreamer::AssignSection(MCSymbol *Symbol, MCSection *Section) { - if (Section) - Symbol->setSection(*Section); - else - Symbol->setUndefined(); +void MCStreamer::AssignFragment(MCSymbol *Symbol, MCFragment *Fragment) { + assert(Fragment); + Symbol->setFragment(Fragment); // As we emit symbols into a section, track the order so that they can // be sorted upon later. Zero is reserved to mean 'unemitted'. @@ -203,7 +200,8 @@ void MCStreamer::AssignSection(MCSymbol *Symbol, MCSection *Section) { void MCStreamer::EmitLabel(MCSymbol *Symbol) { assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); assert(getCurrentSection().first && "Cannot emit before setting section!"); - AssignSection(Symbol, getCurrentSection().first); + assert(!Symbol->getFragment() && "Unexpected fragment on symbol data!"); + Symbol->setFragment(&getCurrentSectionOnly()->getDummyFragment()); MCTargetStreamer *TS = getTargetStreamer(); if (TS) @@ -361,6 +359,14 @@ void MCStreamer::EmitCFIEscape(StringRef Values) { CurFrame->Instructions.push_back(Instruction); } +void MCStreamer::EmitCFIGnuArgsSize(int64_t Size) { + MCSymbol *Label = EmitCFICommon(); + MCCFIInstruction Instruction = + MCCFIInstruction::createGnuArgsSize(Label, Size); + MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + CurFrame->Instructions.push_back(Instruction); +} + void MCStreamer::EmitCFISignalFrame() { EnsureValidDwarfFrame(); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); @@ -467,6 +473,8 @@ void MCStreamer::EmitWinEHHandlerData() { report_fatal_error("Chained unwind areas can't have handlers!"); } +void MCStreamer::EmitSyntaxDirective() {} + void MCStreamer::EmitWinCFIPushReg(unsigned Register) { EnsureValidWinFrameInfo(); @@ -679,8 +687,7 @@ void MCStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, void MCStreamer::ChangeSection(MCSection *, const MCExpr *) {} void MCStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {} void MCStreamer::EmitBytes(StringRef Data) {} -void MCStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, - const SMLoc &Loc) { +void MCStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) { visitUsedExpr(*Value); } void MCStreamer::EmitULEB128Value(const MCExpr *Value) {} @@ -690,9 +697,7 @@ void MCStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, unsigned MaxBytesToEmit) {} void MCStreamer::EmitCodeAlignment(unsigned ByteAlignment, unsigned MaxBytesToEmit) {} -bool MCStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) { - return false; -} +void MCStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value) {} void MCStreamer::EmitBundleAlignMode(unsigned AlignPow2) {} void MCStreamer::EmitBundleLock(bool AlignToEnd) {} void MCStreamer::FinishImpl() {} diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp index 9210cf544b16..dc864d3a17f8 100644 --- a/lib/MC/MCSubtargetInfo.cpp +++ b/lib/MC/MCSubtargetInfo.cpp @@ -32,8 +32,8 @@ void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) { CPUSchedModel = &MCSchedModel::GetDefaultSchedModel(); } -void MCSubtargetInfo::setDefaultFeatures(StringRef CPU) { - FeatureBits = getFeatures(CPU, "", ProcDesc, ProcFeatures); +void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef FS) { + FeatureBits = getFeatures(CPU, FS, ProcDesc, ProcFeatures); } MCSubtargetInfo::MCSubtargetInfo( @@ -77,13 +77,12 @@ FeatureBitset MCSubtargetInfo::ApplyFeatureFlag(StringRef FS) { const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { assert(ProcSchedModels && "Processor machine model not available!"); - unsigned NumProcs = ProcDesc.size(); -#ifndef NDEBUG - for (size_t i = 1; i < NumProcs; i++) { - assert(strcmp(ProcSchedModels[i - 1].Key, ProcSchedModels[i].Key) < 0 && - "Processor machine model table is not sorted"); - } -#endif + size_t NumProcs = ProcDesc.size(); + assert(std::is_sorted(ProcSchedModels, ProcSchedModels+NumProcs, + [](const SubtargetInfoKV &LHS, const SubtargetInfoKV &RHS) { + return strcmp(LHS.Key, RHS.Key) < 0; + }) && + "Processor machine model table is not sorted"); // Find entry const SubtargetInfoKV *Found = diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index 125380a9d140..ab3b8eb68322 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -16,8 +16,11 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -// Sentinel value for the absolute pseudo section. -MCSection *MCSymbol::AbsolutePseudoSection = reinterpret_cast(1); +// Only the address of this fragment is ever actually used. +static MCDummyFragment SentinelFragment(nullptr); + +// Sentinel value for the absolute pseudo fragment. +MCFragment *MCSymbol::AbsolutePseudoFragment = &SentinelFragment; void *MCSymbol::operator new(size_t s, const StringMapEntry *Name, MCContext &Ctx) { diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp index 1258d9e29f2e..465622715526 100644 --- a/lib/MC/MCTargetOptions.cpp +++ b/lib/MC/MCTargetOptions.cpp @@ -14,9 +14,10 @@ namespace llvm { MCTargetOptions::MCTargetOptions() : SanitizeAddress(false), MCRelaxAll(false), MCNoExecStack(false), - MCFatalWarnings(false), MCSaveTempLabels(false), - MCUseDwarfDirectory(false), ShowMCEncoding(false), ShowMCInst(false), - AsmVerbose(false), DwarfVersion(0), ABIName() {} + MCFatalWarnings(false), MCNoWarn(false), MCSaveTempLabels(false), + MCUseDwarfDirectory(false), MCIncrementalLinkerCompatible(false), + ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false), + DwarfVersion(0), ABIName() {} StringRef MCTargetOptions::getABIName() const { return ABIName; diff --git a/lib/MC/MCWinEH.cpp b/lib/MC/MCWinEH.cpp index d5d9eadf39a0..83af203c7acb 100644 --- a/lib/MC/MCWinEH.cpp +++ b/lib/MC/MCWinEH.cpp @@ -49,10 +49,10 @@ static MCSection *getUnwindInfoSection(StringRef SecName, if (CodeSecName.startswith(".text$")) CodeSecName = CodeSecName.substr(6); - return Context.getCOFFSection( - (SecName + Twine('$') + CodeSecName).str(), - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, - SectionKind::getDataRel()); + return Context.getCOFFSection((SecName + Twine('$') + CodeSecName).str(), + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getData()); } } diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 8ce6127e3866..324385fa132a 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -78,7 +78,6 @@ uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S, dyn_cast(S.getVariableValue())) return C->getValue(); - MCValue Target; if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr)) report_fatal_error("unable to evaluate offset for variable '" + @@ -117,7 +116,8 @@ uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec, return OffsetToAlignment(EndAddr, NextSec.getAlignment()); } -void MachObjectWriter::writeHeader(unsigned NumLoadCommands, +void MachObjectWriter::writeHeader(MachO::HeaderFileType Type, + unsigned NumLoadCommands, unsigned LoadCommandsSize, bool SubsectionsViaSymbols) { uint32_t Flags = 0; @@ -128,7 +128,7 @@ void MachObjectWriter::writeHeader(unsigned NumLoadCommands, // struct mach_header (28 bytes) or // struct mach_header_64 (32 bytes) - uint64_t Start = OS.tell(); + uint64_t Start = getStream().tell(); (void) Start; write32(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC); @@ -136,29 +136,30 @@ void MachObjectWriter::writeHeader(unsigned NumLoadCommands, write32(TargetObjectWriter->getCPUType()); write32(TargetObjectWriter->getCPUSubtype()); - write32(MachO::MH_OBJECT); + write32(Type); write32(NumLoadCommands); write32(LoadCommandsSize); write32(Flags); if (is64Bit()) write32(0); // reserved - assert(OS.tell() - Start == - (is64Bit()?sizeof(MachO::mach_header_64): sizeof(MachO::mach_header))); + assert( + getStream().tell() - Start == + (is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header))); } /// writeSegmentLoadCommand - Write a segment load command. /// /// \param NumSections The number of sections in this segment. /// \param SectionDataSize The total size of the sections. -void MachObjectWriter::writeSegmentLoadCommand(unsigned NumSections, - uint64_t VMSize, - uint64_t SectionDataStartOffset, - uint64_t SectionDataSize) { +void MachObjectWriter::writeSegmentLoadCommand( + StringRef Name, unsigned NumSections, uint64_t VMAddr, uint64_t VMSize, + uint64_t SectionDataStartOffset, uint64_t SectionDataSize, uint32_t MaxProt, + uint32_t InitProt) { // struct segment_command (56 bytes) or // struct segment_command_64 (72 bytes) - uint64_t Start = OS.tell(); + uint64_t Start = getStream().tell(); (void) Start; unsigned SegmentLoadCommandSize = @@ -169,31 +170,32 @@ void MachObjectWriter::writeSegmentLoadCommand(unsigned NumSections, NumSections * (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section))); - writeBytes("", 16); + assert(Name.size() <= 16); + writeBytes(Name, 16); if (is64Bit()) { - write64(0); // vmaddr + write64(VMAddr); // vmaddr write64(VMSize); // vmsize write64(SectionDataStartOffset); // file offset write64(SectionDataSize); // file size } else { - write32(0); // vmaddr + write32(VMAddr); // vmaddr write32(VMSize); // vmsize write32(SectionDataStartOffset); // file offset write32(SectionDataSize); // file size } // maxprot - write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); + write32(MaxProt); // initprot - write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); + write32(InitProt); write32(NumSections); write32(0); // flags - assert(OS.tell() - Start == SegmentLoadCommandSize); + assert(getStream().tell() - Start == SegmentLoadCommandSize); } -void MachObjectWriter::writeSection(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCSection &Sec, uint64_t FileOffset, +void MachObjectWriter::writeSection(const MCAsmLayout &Layout, + const MCSection &Sec, uint64_t VMAddr, + uint64_t FileOffset, unsigned Flags, uint64_t RelocationsStart, unsigned NumRelocations) { uint64_t SectionSize = Layout.getSectionAddressSize(&Sec); @@ -208,24 +210,20 @@ void MachObjectWriter::writeSection(const MCAssembler &Asm, // struct section (68 bytes) or // struct section_64 (80 bytes) - uint64_t Start = OS.tell(); + uint64_t Start = getStream().tell(); (void) Start; writeBytes(Section.getSectionName(), 16); writeBytes(Section.getSegmentName(), 16); if (is64Bit()) { - write64(getSectionAddress(&Sec)); // address + write64(VMAddr); // address write64(SectionSize); // size } else { - write32(getSectionAddress(&Sec)); // address + write32(VMAddr); // address write32(SectionSize); // size } write32(FileOffset); - unsigned Flags = Section.getTypeAndAttributes(); - if (Section.hasInstructions()) - Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS; - assert(isPowerOf2_32(Section.getAlignment()) && "Invalid alignment!"); write32(Log2_32(Section.getAlignment())); write32(NumRelocations ? RelocationsStart : 0); @@ -236,8 +234,8 @@ void MachObjectWriter::writeSection(const MCAssembler &Asm, if (is64Bit()) write32(0); // reserved3 - assert(OS.tell() - Start == (is64Bit() ? sizeof(MachO::section_64) : - sizeof(MachO::section))); + assert(getStream().tell() - Start == + (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section))); } void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset, @@ -246,7 +244,7 @@ void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset, uint32_t StringTableSize) { // struct symtab_command (24 bytes) - uint64_t Start = OS.tell(); + uint64_t Start = getStream().tell(); (void) Start; write32(MachO::LC_SYMTAB); @@ -256,7 +254,7 @@ void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset, write32(StringTableOffset); write32(StringTableSize); - assert(OS.tell() - Start == sizeof(MachO::symtab_command)); + assert(getStream().tell() - Start == sizeof(MachO::symtab_command)); } void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol, @@ -269,7 +267,7 @@ void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol, uint32_t NumIndirectSymbols) { // struct dysymtab_command (80 bytes) - uint64_t Start = OS.tell(); + uint64_t Start = getStream().tell(); (void) Start; write32(MachO::LC_DYSYMTAB); @@ -293,7 +291,7 @@ void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol, write32(0); // locreloff write32(0); // nlocrel - assert(OS.tell() - Start == sizeof(MachO::dysymtab_command)); + assert(getStream().tell() - Start == sizeof(MachO::dysymtab_command)); } MachObjectWriter::MachSymbolData * @@ -389,7 +387,7 @@ void MachObjectWriter::writeNlist(MachSymbolData &MSD, void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type, uint32_t DataOffset, uint32_t DataSize) { - uint64_t Start = OS.tell(); + uint64_t Start = getStream().tell(); (void) Start; write32(Type); @@ -397,7 +395,7 @@ void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type, write32(DataOffset); write32(DataSize); - assert(OS.tell() - Start == sizeof(MachO::linkedit_data_command)); + assert(getStream().tell() - Start == sizeof(MachO::linkedit_data_command)); } static unsigned ComputeLinkerOptionsLoadCommandSize( @@ -413,7 +411,7 @@ void MachObjectWriter::writeLinkerOptionsLoadCommand( const std::vector &Options) { unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit()); - uint64_t Start = OS.tell(); + uint64_t Start = getStream().tell(); (void) Start; write32(MachO::LC_LINKER_OPTION); @@ -429,7 +427,7 @@ void MachObjectWriter::writeLinkerOptionsLoadCommand( // Pad to a multiple of the pointer size. writeBytes("", OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4)); - assert(OS.tell() - Start == Size); + assert(getStream().tell() - Start == Size); } void MachObjectWriter::recordRelocation(MCAssembler &Asm, @@ -458,9 +456,9 @@ void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) { if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS && Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && Section.getType() != MachO::S_SYMBOL_STUBS) { - MCSymbol &Symbol = *it->Symbol; - report_fatal_error("indirect symbol '" + Symbol.getName() + - "' not in a symbol pointer or stub section"); + MCSymbol &Symbol = *it->Symbol; + report_fatal_error("indirect symbol '" + Symbol.getName() + + "' not in a symbol pointer or stub section"); } } @@ -522,7 +520,7 @@ void MachObjectWriter::computeSymbolTable( StringTable.add(Symbol.getName()); } - StringTable.finalize(StringTableBuilder::MachO); + StringTable.finalize(); // Build the symbol arrays but only for non-local symbols. // @@ -627,6 +625,18 @@ void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm, bindIndirectSymbols(Asm); } +bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( + const MCAssembler &Asm, const MCSymbol &A, const MCSymbol &B, + bool InSet) const { + // FIXME: We don't handle things like + // foo = . + // creating atoms. + if (A.isVariable() || B.isVariable()) + return false; + return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, A, B, + InSet); +} + bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB, bool InSet, bool IsPCRel) const { @@ -746,7 +756,7 @@ void MachObjectWriter::writeObject(MCAssembler &Asm, ++NumLoadCommands; LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit()); } - + // Compute the total size of the section data, as well as its file size and vm // size. uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) : @@ -776,18 +786,25 @@ void MachObjectWriter::writeObject(MCAssembler &Asm, SectionDataFileSize += SectionDataPadding; // Write the prolog, starting with the header and load command... - writeHeader(NumLoadCommands, LoadCommandsSize, + writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize, Asm.getSubsectionsViaSymbols()); - writeSegmentLoadCommand(NumSections, VMSize, - SectionDataStart, SectionDataSize); + uint32_t Prot = + MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE; + writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart, + SectionDataSize, Prot, Prot); // ... and then the section headers. uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; - for (const MCSection &Sec : Asm) { + for (const MCSection &Section : Asm) { + const auto &Sec = cast(Section); std::vector &Relocs = Relocations[&Sec]; unsigned NumRelocs = Relocs.size(); uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec); - writeSection(Asm, Layout, Sec, SectionStart, RelocTableEnd, NumRelocs); + unsigned Flags = Sec.getTypeAndAttributes(); + if (Sec.hasInstructions()) + Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS; + writeSection(Layout, Sec, getSectionAddress(&Sec), SectionStart, Flags, + RelocTableEnd, NumRelocs); RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info); } @@ -798,8 +815,22 @@ void MachObjectWriter::writeObject(MCAssembler &Asm, assert(VersionInfo.Major < 65536 && "unencodable major target version"); uint32_t EncodedVersion = VersionInfo.Update | (VersionInfo.Minor << 8) | (VersionInfo.Major << 16); - write32(VersionInfo.Kind == MCVM_OSXVersionMin ? MachO::LC_VERSION_MIN_MACOSX : - MachO::LC_VERSION_MIN_IPHONEOS); + MachO::LoadCommandType LCType; + switch (VersionInfo.Kind) { + case MCVM_OSXVersionMin: + LCType = MachO::LC_VERSION_MIN_MACOSX; + break; + case MCVM_IOSVersionMin: + LCType = MachO::LC_VERSION_MIN_IPHONEOS; + break; + case MCVM_TvOSVersionMin: + LCType = MachO::LC_VERSION_MIN_TVOS; + break; + case MCVM_WatchOSVersionMin: + LCType = MachO::LC_VERSION_MIN_WATCHOS; + break; + } + write32(LCType); write32(sizeof(MachO::version_min_command)); write32(EncodedVersion); write32(0); // reserved. @@ -901,12 +932,12 @@ void MachObjectWriter::writeObject(MCAssembler &Asm, // Write out the loh commands, if there is one. if (LOHSize) { #ifndef NDEBUG - unsigned Start = OS.tell(); + unsigned Start = getStream().tell(); #endif Asm.getLOHContainer().emit(*this, Layout); // Pad to a multiple of the pointer size. writeBytes("", OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4)); - assert(OS.tell() - Start == LOHSize); + assert(getStream().tell() - Start == LOHSize); } // Write the symbol table data, if used. @@ -942,7 +973,7 @@ void MachObjectWriter::writeObject(MCAssembler &Asm, writeNlist(Entry, Layout); // Write the string table. - OS << StringTable.data(); + getStream() << StringTable.data(); } } diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp index 9de9363611e6..80e552287b3d 100644 --- a/lib/MC/StringTableBuilder.cpp +++ b/lib/MC/StringTableBuilder.cpp @@ -8,35 +8,71 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/StringTableBuilder.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/COFF.h" #include "llvm/Support/Endian.h" +#include + using namespace llvm; -static bool compareBySuffix(StringRef a, StringRef b) { - size_t sizeA = a.size(); - size_t sizeB = b.size(); - size_t len = std::min(sizeA, sizeB); - for (size_t i = 0; i < len; ++i) { - char ca = a[sizeA - i - 1]; - char cb = b[sizeB - i - 1]; - if (ca != cb) - return ca > cb; - } - return sizeA > sizeB; +StringTableBuilder::StringTableBuilder(Kind K) : K(K) {} + +typedef std::pair StringPair; + +// Returns the character at Pos from end of a string. +static int charTailAt(StringPair *P, size_t Pos) { + StringRef S = P->first; + if (Pos >= S.size()) + return -1; + return (unsigned char)S[S.size() - Pos - 1]; } -void StringTableBuilder::finalize(Kind kind) { - SmallVector Strings; +// Three-way radix quicksort. This is much faster than std::sort with strcmp +// because it does not compare characters that we already know the same. +static void multikey_qsort(StringPair **Begin, StringPair **End, int Pos) { +tailcall: + if (End - Begin <= 1) + return; + + // Partition items. Items in [Begin, P) are greater than the pivot, + // [P, Q) are the same as the pivot, and [Q, End) are less than the pivot. + int Pivot = charTailAt(*Begin, Pos); + StringPair **P = Begin; + StringPair **Q = End; + for (StringPair **R = Begin + 1; R < Q;) { + int C = charTailAt(*R, Pos); + if (C > Pivot) + std::swap(*P++, *R++); + else if (C < Pivot) + std::swap(*--Q, *R); + else + R++; + } + + multikey_qsort(Begin, P, Pos); + multikey_qsort(Q, End, Pos); + if (Pivot != -1) { + // qsort(P, Q, Pos + 1), but with tail call optimization. + Begin = P; + End = Q; + ++Pos; + goto tailcall; + } +} + +void StringTableBuilder::finalize() { + std::vector *> Strings; Strings.reserve(StringIndexMap.size()); + for (std::pair &P : StringIndexMap) + Strings.push_back(&P); - for (auto i = StringIndexMap.begin(), e = StringIndexMap.end(); i != e; ++i) - Strings.push_back(i->getKey()); + if (!Strings.empty()) + multikey_qsort(&Strings[0], &Strings[0] + Strings.size(), 0); - std::sort(Strings.begin(), Strings.end(), compareBySuffix); - - switch (kind) { + switch (K) { + case RAW: + break; case ELF: case MachO: // Start the table with a NUL byte. @@ -49,22 +85,25 @@ void StringTableBuilder::finalize(Kind kind) { } StringRef Previous; - for (StringRef s : Strings) { - if (kind == WinCOFF) - assert(s.size() > COFF::NameSize && "Short string in COFF string table!"); + for (std::pair *P : Strings) { + StringRef S = P->first; + if (K == WinCOFF) + assert(S.size() > COFF::NameSize && "Short string in COFF string table!"); - if (Previous.endswith(s)) { - StringIndexMap[s] = StringTable.size() - 1 - s.size(); + if (Previous.endswith(S)) { + P->second = StringTable.size() - S.size() - (K != RAW); continue; } - StringIndexMap[s] = StringTable.size(); - StringTable += s; - StringTable += '\x00'; - Previous = s; + P->second = StringTable.size(); + StringTable += S; + if (K != RAW) + StringTable += '\x00'; + Previous = S; } - switch (kind) { + switch (K) { + case RAW: case ELF: break; case MachO: @@ -75,14 +114,31 @@ void StringTableBuilder::finalize(Kind kind) { case WinCOFF: // Write the table size in the first word. assert(StringTable.size() <= std::numeric_limits::max()); - uint32_t size = static_cast(StringTable.size()); + uint32_t Size = static_cast(StringTable.size()); support::endian::write( - StringTable.data(), size); + StringTable.data(), Size); break; } + + Size = StringTable.size(); } void StringTableBuilder::clear() { StringTable.clear(); StringIndexMap.clear(); } + +size_t StringTableBuilder::getOffset(StringRef S) const { + assert(isFinalized()); + auto I = StringIndexMap.find(S); + assert(I != StringIndexMap.end() && "String is not in table!"); + return I->second; +} + +size_t StringTableBuilder::add(StringRef S) { + assert(!isFinalized()); + auto P = StringIndexMap.insert(std::make_pair(S, Size)); + if (P.second) + Size += S.size() + (K != RAW); + return P.first->second; +} diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp index 76574e987cb1..b642f17f0e79 100644 --- a/lib/MC/SubtargetFeature.cpp +++ b/lib/MC/SubtargetFeature.cpp @@ -56,7 +56,7 @@ static inline bool isEnabled(StringRef Feature) { /// static void Split(std::vector &V, StringRef S) { SmallVector Tmp; - S.split(Tmp, ",", -1, false /* KeepEmpty */); + S.split(Tmp, ',', -1, false /* KeepEmpty */); V.assign(Tmp.begin(), Tmp.end()); } diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index 56ef1c7a2735..a3820906b76b 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Config/config.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -32,8 +33,10 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/JamCRC.h" #include "llvm/Support/TimeValue.h" #include +#include using namespace llvm; @@ -76,8 +79,6 @@ public: COFFSymbol(StringRef name); void set_name_offset(uint32_t Offset); - bool should_keep() const; - int64_t getIndex() const { return Index; } void setIndex(int Value) { Index = Value; @@ -125,7 +126,7 @@ public: COFF::header Header; sections Sections; symbols Symbols; - StringTableBuilder Strings; + StringTableBuilder Strings{StringTableBuilder::WinCOFF}; // Maps used during object file creation. section_map SectionMap; @@ -160,8 +161,6 @@ public: void SetSymbolName(COFFSymbol &S); void SetSectionName(COFFSection &S); - bool ExportSymbol(const MCSymbol &Symbol, MCAssembler &Asm); - bool IsPhysicalSection(COFFSection *S); // Entity writing methods. @@ -215,38 +214,6 @@ void COFFSymbol::set_name_offset(uint32_t Offset) { write_uint32_le(Data.Name + 4, Offset); } -/// logic to decide if the symbol should be reported in the symbol table -bool COFFSymbol::should_keep() const { - // no section means its external, keep it - if (!Section) - return true; - - // if it has relocations pointing at it, keep it - if (Relocations > 0) { - assert(Section->Number != -1 && "Sections with relocations must be real!"); - return true; - } - - // if this is a safeseh handler, keep it - if (MC && (cast(MC)->isSafeSEH())) - return true; - - // if the section its in is being droped, drop it - if (Section->Number == -1) - return false; - - // if it is the section symbol, keep it - if (Section->Symbol == this) - return true; - - // if its temporary, drop it - if (MC && MC->isTemporary()) - return false; - - // otherwise, keep it - return true; -} - //------------------------------------------------------------------------------ // Section class implementation @@ -392,7 +359,6 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &Symbol, MCAssembler &Assembler, const MCAsmLayout &Layout) { COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&Symbol); - SymbolMap[&Symbol] = coff_symbol; if (cast(Symbol).isWeakExternal()) { coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL; @@ -515,25 +481,6 @@ void WinCOFFObjectWriter::SetSymbolName(COFFSymbol &S) { std::memcpy(S.Data.Name, S.Name.c_str(), S.Name.size()); } -bool WinCOFFObjectWriter::ExportSymbol(const MCSymbol &Symbol, - MCAssembler &Asm) { - // This doesn't seem to be right. Strings referred to from the .data section - // need symbols so they can be linked to code in the .text section right? - - // return Asm.isSymbolLinkerVisible(Symbol); - - // Non-temporary labels should always be visible to the linker. - if (!Symbol.isTemporary()) - return true; - - // Temporary variable symbols are invisible. - if (Symbol.isVariable()) - return false; - - // Absolute temporary labels are never visible. - return !Symbol.isAbsolute(); -} - bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) { return (S->Header.Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) == 0; @@ -663,7 +610,7 @@ void WinCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, defineSection(static_cast(Section)); for (const MCSymbol &Symbol : Asm.symbols()) - if (ExportSymbol(Symbol, Asm)) + if (!Symbol.isTemporary()) DefineSymbol(Symbol, Asm, Layout); } @@ -674,7 +621,8 @@ bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( // thunk to implement their /INCREMENTAL feature. Make sure we don't optimize // away any relocations to functions. uint16_t Type = cast(SymA).getType(); - if ((Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION) + if (Asm.isIncrementalLinkerCompatible() && + (Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION) return false; return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB, InSet, IsPCRel); @@ -702,41 +650,49 @@ void WinCOFFObjectWriter::recordRelocation( const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) { assert(Target.getSymA() && "Relocation must reference a symbol!"); - const MCSymbol &Symbol = Target.getSymA()->getSymbol(); - const MCSymbol &A = Symbol; - if (!A.isRegistered()) - Asm.getContext().reportFatalError(Fixup.getLoc(), + const MCSymbol &A = Target.getSymA()->getSymbol(); + if (!A.isRegistered()) { + Asm.getContext().reportError(Fixup.getLoc(), Twine("symbol '") + A.getName() + "' can not be undefined"); + return; + } + if (A.isTemporary() && A.isUndefined()) { + Asm.getContext().reportError(Fixup.getLoc(), + Twine("assembler label '") + A.getName() + + "' can not be undefined"); + return; + } MCSection *Section = Fragment->getParent(); // Mark this symbol as requiring an entry in the symbol table. assert(SectionMap.find(Section) != SectionMap.end() && "Section must already have been defined in executePostLayoutBinding!"); - assert(SymbolMap.find(&A) != SymbolMap.end() && - "Symbol must already have been defined in executePostLayoutBinding!"); COFFSection *coff_section = SectionMap[Section]; - COFFSymbol *coff_symbol = SymbolMap[&A]; const MCSymbolRefExpr *SymB = Target.getSymB(); bool CrossSection = false; if (SymB) { const MCSymbol *B = &SymB->getSymbol(); - if (!B->getFragment()) - Asm.getContext().reportFatalError( + if (!B->getFragment()) { + Asm.getContext().reportError( Fixup.getLoc(), Twine("symbol '") + B->getName() + "' can not be undefined in a subtraction expression"); + return; + } - if (!A.getFragment()) - Asm.getContext().reportFatalError( + if (!A.getFragment()) { + Asm.getContext().reportError( Fixup.getLoc(), - Twine("symbol '") + Symbol.getName() + + Twine("symbol '") + A.getName() + "' can not be undefined in a subtraction expression"); + return; + } - CrossSection = &Symbol.getSection() != &B->getSection(); + CrossSection = &A.getSection() != &B->getSection(); // Offset of the symbol in the section int64_t OffsetOfB = Layout.getSymbolOffset(*B); @@ -765,12 +721,19 @@ void WinCOFFObjectWriter::recordRelocation( Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment); // Turn relocations for temporary symbols into section relocations. - if (coff_symbol->MC->isTemporary() || CrossSection) { - Reloc.Symb = coff_symbol->Section->Symbol; - FixedValue += Layout.getFragmentOffset(coff_symbol->MC->getFragment()) + - coff_symbol->MC->getOffset(); - } else - Reloc.Symb = coff_symbol; + if (A.isTemporary() || CrossSection) { + MCSection *TargetSection = &A.getSection(); + assert( + SectionMap.find(TargetSection) != SectionMap.end() && + "Section must already have been defined in executePostLayoutBinding!"); + Reloc.Symb = SectionMap[TargetSection]->Symbol; + FixedValue += Layout.getSymbolOffset(A); + } else { + assert( + SymbolMap.find(&A) != SymbolMap.end() && + "Symbol must already have been defined in executePostLayoutBinding!"); + Reloc.Symb = SymbolMap[&A]; + } ++Reloc.Symb->Relocations; @@ -884,14 +847,10 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm, // Update section number & offset for symbols that have them. if (Symbol->Section) Symbol->Data.SectionNumber = Symbol->Section->Number; - if (Symbol->should_keep()) { - Symbol->setIndex(Header.NumberOfSymbols++); - // Update auxiliary symbol info. - Symbol->Data.NumberOfAuxSymbols = Symbol->Aux.size(); - Header.NumberOfSymbols += Symbol->Data.NumberOfAuxSymbols; - } else { - Symbol->setIndex(-1); - } + Symbol->setIndex(Header.NumberOfSymbols++); + // Update auxiliary symbol info. + Symbol->Data.NumberOfAuxSymbols = Symbol->Aux.size(); + Header.NumberOfSymbols += Symbol->Data.NumberOfAuxSymbols; } // Build string table. @@ -899,16 +858,15 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm, if (S->Name.size() > COFF::NameSize) Strings.add(S->Name); for (const auto &S : Symbols) - if (S->should_keep() && S->Name.size() > COFF::NameSize) + if (S->Name.size() > COFF::NameSize) Strings.add(S->Name); - Strings.finalize(StringTableBuilder::WinCOFF); + Strings.finalize(); // Set names. for (const auto &S : Sections) SetSectionName(*S); for (auto &S : Symbols) - if (S->should_keep()) - SetSymbolName(*S); + SetSymbolName(*S); // Fixup weak external references. for (auto &Symbol : Symbols) { @@ -948,7 +906,7 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm, // Assign file offsets to COFF object file structures. - unsigned offset = 0; + unsigned offset = getInitialOffset(); if (UseBigObj) offset += COFF::Header32Size; @@ -1011,8 +969,23 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm, Header.PointerToSymbolTable = offset; + // FIXME: Remove the #else branch and make the #if branch unconditional once + // LLVM's self host configuration is aware of /Brepro. +#if (ENABLE_TIMESTAMPS == 1) + // MS LINK expects to be able to use this timestamp to implement their + // /INCREMENTAL feature. + if (Asm.isIncrementalLinkerCompatible()) { + std::time_t Now = time(nullptr); + if (Now < 0 || !isUInt<32>(Now)) + Now = UINT32_MAX; + Header.TimeDateStamp = Now; + } else { + Header.TimeDateStamp = 0; + } +#else // We want a deterministic output. It looks like GNU as also writes 0 in here. Header.TimeDateStamp = 0; +#endif // Write it all to disk... WriteFileHeader(Header); @@ -1029,6 +1002,7 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm, } } + SmallVector SectionContents; for (i = Sections.begin(), ie = Sections.end(), j = Asm.begin(), je = Asm.end(); (i != ie) && (j != je); ++i, ++j) { @@ -1037,20 +1011,47 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm, continue; if ((*i)->Header.PointerToRawData != 0) { - assert(OS.tell() <= (*i)->Header.PointerToRawData && + assert(getStream().tell() <= (*i)->Header.PointerToRawData && "Section::PointerToRawData is insane!"); - unsigned SectionDataPadding = (*i)->Header.PointerToRawData - OS.tell(); + unsigned SectionDataPadding = + (*i)->Header.PointerToRawData - getStream().tell(); assert(SectionDataPadding < 4 && "Should only need at most three bytes of padding!"); WriteZeros(SectionDataPadding); + // Save the contents of the section to a temporary buffer, we need this + // to CRC the data before we dump it into the object file. + SectionContents.clear(); + raw_svector_ostream VecOS(SectionContents); + raw_pwrite_stream &OldStream = getStream(); + // Redirect the output stream to our buffer. + setStream(VecOS); + // Fill our buffer with the section data. Asm.writeSectionData(&*j, Layout); + // Reset the stream back to what it was before. + setStream(OldStream); + + // Calculate our CRC with an initial value of '0', this is not how + // JamCRC is specified but it aligns with the expected output. + JamCRC JC(/*Init=*/0x00000000U); + JC.update(SectionContents); + + // Write the section contents to the object file. + getStream() << SectionContents; + + // Update the section definition auxiliary symbol to record the CRC. + COFFSection *Sec = SectionMap[&*j]; + COFFSymbol::AuxiliarySymbols &AuxSyms = Sec->Symbol->Aux; + assert(AuxSyms.size() == 1 && + AuxSyms[0].AuxType == ATSectionDefinition); + AuxSymbol &SecDef = AuxSyms[0]; + SecDef.Aux.SectionDefinition.CheckSum = JC.getCRC(); } if ((*i)->Relocations.size() > 0) { - assert(OS.tell() == (*i)->Header.PointerToRelocations && + assert(getStream().tell() == (*i)->Header.PointerToRelocations && "Section::PointerToRelocations is insane!"); if ((*i)->Relocations.size() >= 0xffff) { @@ -1071,14 +1072,14 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm, } } - assert(OS.tell() == Header.PointerToSymbolTable && + assert(getStream().tell() == Header.PointerToSymbolTable && "Header::PointerToSymbolTable is insane!"); for (auto &Symbol : Symbols) if (Symbol->getIndex() != -1) WriteSymbol(*Symbol); - OS.write(Strings.data().data(), Strings.data().size()); + getStream().write(Strings.data().data(), Strings.data().size()); } MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_) diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index 36dd691f07b8..a38b1a41a9b0 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -49,7 +49,6 @@ void MCWinCOFFStreamer::EmitInstToData(const MCInst &Inst, SmallString<256> Code; raw_svector_ostream VecOS(Code); getAssembler().getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI); - VecOS.flush(); // Add the fixups and data. for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { @@ -123,29 +122,37 @@ void MCWinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) { "Got non-COFF section in the COFF backend!"); if (CurSymbol) - FatalError("starting a new symbol definition without completing the " - "previous one"); + Error("starting a new symbol definition without completing the " + "previous one"); CurSymbol = Symbol; } void MCWinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { - if (!CurSymbol) - FatalError("storage class specified outside of symbol definition"); + if (!CurSymbol) { + Error("storage class specified outside of symbol definition"); + return; + } - if (StorageClass & ~COFF::SSC_Invalid) - FatalError("storage class value '" + Twine(StorageClass) + + if (StorageClass & ~COFF::SSC_Invalid) { + Error("storage class value '" + Twine(StorageClass) + "' out of range"); + return; + } getAssembler().registerSymbol(*CurSymbol); cast(CurSymbol)->setClass((uint16_t)StorageClass); } void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) { - if (!CurSymbol) - FatalError("symbol type specified outside of a symbol definition"); + if (!CurSymbol) { + Error("symbol type specified outside of a symbol definition"); + return; + } - if (Type & ~0xffff) - FatalError("type value '" + Twine(Type) + "' out of range"); + if (Type & ~0xffff) { + Error("type value '" + Twine(Type) + "' out of range"); + return; + } getAssembler().registerSymbol(*CurSymbol); cast(CurSymbol)->setType((uint16_t)Type); @@ -153,7 +160,7 @@ void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) { void MCWinCOFFStreamer::EndCOFFSymbolDef() { if (!CurSymbol) - FatalError("ending symbol definition without starting one"); + Error("ending symbol definition without starting one"); CurSymbol = nullptr; } @@ -215,8 +222,6 @@ void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, Size = std::max(Size, static_cast(ByteAlignment)); } - AssignSection(Symbol, nullptr); - getAssembler().registerSymbol(*Symbol); Symbol->setExternal(true); Symbol->setCommon(Size, ByteAlignment); @@ -228,7 +233,6 @@ void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, OS << " -aligncomm:\"" << Symbol->getName() << "\"," << Log2_32_Ceil(ByteAlignment); - OS.flush(); PushSection(); SwitchSection(MFI->getDrectveSection()); @@ -249,8 +253,6 @@ void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, getAssembler().registerSymbol(*Symbol); Symbol->setExternal(false); - AssignSection(Symbol, Section); - if (ByteAlignment != 1) new MCAlignFragment(ByteAlignment, /*Value=*/0, /*ValueSize=*/0, ByteAlignment, Section); @@ -287,9 +289,8 @@ void MCWinCOFFStreamer::FinishImpl() { MCObjectStreamer::FinishImpl(); } -LLVM_ATTRIBUTE_NORETURN -void MCWinCOFFStreamer::FatalError(const Twine &Msg) const { - getContext().reportFatalError(SMLoc(), Msg); +void MCWinCOFFStreamer::Error(const Twine &Msg) const { + getContext().reportError(SMLoc(), Msg); } } diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index d4821196a6cf..99b0650c8b7e 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -43,10 +43,10 @@ StringRef ArchiveMemberHeader::getName() const { return llvm::StringRef(Name, end); } -uint32_t ArchiveMemberHeader::getSize() const { +ErrorOr ArchiveMemberHeader::getSize() const { uint32_t Ret; if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret)) - llvm_unreachable("Size is not a decimal number."); + return object_error::parse_failed; // Size is not a decimal number. return Ret; } @@ -82,22 +82,30 @@ unsigned ArchiveMemberHeader::getGID() const { return Ret; } -Archive::Child::Child(const Archive *Parent, const char *Start) +Archive::Child::Child(const Archive *Parent, StringRef Data, + uint16_t StartOfFile) + : Parent(Parent), Data(Data), StartOfFile(StartOfFile) {} + +Archive::Child::Child(const Archive *Parent, const char *Start, + std::error_code *EC) : Parent(Parent) { if (!Start) return; - const ArchiveMemberHeader *Header = - reinterpret_cast(Start); uint64_t Size = sizeof(ArchiveMemberHeader); - if (!Parent->IsThin || Header->getName() == "/" || Header->getName() == "//") - Size += Header->getSize(); Data = StringRef(Start, Size); + if (!isThinMember()) { + ErrorOr MemberSize = getRawSize(); + if ((*EC = MemberSize.getError())) + return; + Size += MemberSize.get(); + Data = StringRef(Start, Size); + } // Setup StartOfFile and PaddingBytes. StartOfFile = sizeof(ArchiveMemberHeader); // Don't include attached name. - StringRef Name = Header->getName(); + StringRef Name = getRawName(); if (Name.startswith("#1/")) { uint64_t NameSize; if (Name.substr(3).rtrim(" ").getAsInteger(10, NameSize)) @@ -106,25 +114,40 @@ Archive::Child::Child(const Archive *Parent, const char *Start) } } -uint64_t Archive::Child::getSize() const { - if (Parent->IsThin) - return getHeader()->getSize(); +ErrorOr Archive::Child::getSize() const { + if (Parent->IsThin) { + ErrorOr Size = getHeader()->getSize(); + if (std::error_code EC = Size.getError()) + return EC; + return Size.get(); + } return Data.size() - StartOfFile; } -uint64_t Archive::Child::getRawSize() const { - return getHeader()->getSize(); +ErrorOr Archive::Child::getRawSize() const { + ErrorOr Size = getHeader()->getSize(); + if (std::error_code EC = Size.getError()) + return EC; + return Size.get(); +} + +bool Archive::Child::isThinMember() const { + StringRef Name = getHeader()->getName(); + return Parent->IsThin && Name != "/" && Name != "//"; } ErrorOr Archive::Child::getBuffer() const { - if (!Parent->IsThin) - return StringRef(Data.data() + StartOfFile, getSize()); + if (!isThinMember()) { + ErrorOr Size = getSize(); + if (std::error_code EC = Size.getError()) + return EC; + return StringRef(Data.data() + StartOfFile, Size.get()); + } ErrorOr Name = getName(); if (std::error_code EC = Name.getError()) return EC; - SmallString<128> FullName = - Parent->getMemoryBufferRef().getBufferIdentifier(); - sys::path::remove_filename(FullName); + SmallString<128> FullName = sys::path::parent_path( + Parent->getMemoryBufferRef().getBufferIdentifier()); sys::path::append(FullName, *Name); ErrorOr> Buf = MemoryBuffer::getFile(FullName); if (std::error_code EC = Buf.getError()) @@ -133,7 +156,7 @@ ErrorOr Archive::Child::getBuffer() const { return Parent->ThinBuffers.back()->getBuffer(); } -Archive::Child Archive::Child::getNext() const { +ErrorOr Archive::Child::getNext() const { size_t SpaceToSkip = Data.size(); // If it's odd, add 1 to make it even. if (SpaceToSkip & 1) @@ -141,11 +164,19 @@ Archive::Child Archive::Child::getNext() const { const char *NextLoc = Data.data() + SpaceToSkip; - // Check to see if this is past the end of the archive. - if (NextLoc >= Parent->Data.getBufferEnd()) - return Child(Parent, nullptr); + // Check to see if this is at the end of the archive. + if (NextLoc == Parent->Data.getBufferEnd()) + return Child(Parent, nullptr, nullptr); - return Child(Parent, NextLoc); + // Check to see if this is past the end of the archive. + if (NextLoc > Parent->Data.getBufferEnd()) + return object_error::parse_failed; + + std::error_code EC; + Child Ret(Parent, NextLoc, &EC); + if (EC) + return EC; + return Ret; } uint64_t Archive::Child::getChildOffset() const { @@ -168,17 +199,11 @@ ErrorOr Archive::Child::getName() const { std::size_t offset; if (name.substr(1).rtrim(" ").getAsInteger(10, offset)) llvm_unreachable("Long name offset is not an integer"); - const char *addr = Parent->StringTable->Data.begin() - + sizeof(ArchiveMemberHeader) - + offset; + // Verify it. - if (Parent->StringTable == Parent->child_end() - || addr < (Parent->StringTable->Data.begin() - + sizeof(ArchiveMemberHeader)) - || addr > (Parent->StringTable->Data.begin() - + sizeof(ArchiveMemberHeader) - + Parent->StringTable->getSize())) + if (offset >= Parent->StringTable.size()) return object_error::parse_failed; + const char *addr = Parent->StringTable.begin() + offset; // GNU long file names end with a "/\n". if (Parent->kind() == K_GNU || Parent->kind() == K_MIPS64) { @@ -227,9 +252,13 @@ ErrorOr> Archive::create(MemoryBufferRef Source) { return std::move(Ret); } +void Archive::setFirstRegular(const Child &C) { + FirstRegularData = C.Data; + FirstRegularStartOfFile = C.StartOfFile; +} + Archive::Archive(MemoryBufferRef Source, std::error_code &ec) - : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()), - StringTable(child_end()), FirstRegular(child_end()) { + : Binary(Binary::ID_Archive, Source) { StringRef Buffer = Data.getBuffer(); // Check for sufficient magic. if (Buffer.startswith(ThinMagic)) { @@ -242,15 +271,26 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec) } // Get the special members. - child_iterator i = child_begin(false); - child_iterator e = child_end(); + child_iterator I = child_begin(false); + if ((ec = I->getError())) + return; + child_iterator E = child_end(); - if (i == e) { + if (I == E) { ec = std::error_code(); return; } + const Child *C = &**I; - StringRef Name = i->getRawName(); + auto Increment = [&]() { + ++I; + if ((ec = I->getError())) + return true; + C = &**I; + return false; + }; + + StringRef Name = C->getRawName(); // Below is the pattern that is used to figure out the archive format // GNU archive format @@ -273,9 +313,13 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec) if (Name == "__.SYMDEF") { Format = K_BSD; - SymbolTable = i; - ++i; - FirstRegular = i; + // We know that the symbol table is not an external file, so we just assert + // there is no error. + SymbolTable = *C->getBuffer(); + if (Increment()) + return; + setFirstRegular(*C); + ec = std::error_code(); return; } @@ -283,16 +327,19 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec) if (Name.startswith("#1/")) { Format = K_BSD; // We know this is BSD, so getName will work since there is no string table. - ErrorOr NameOrErr = i->getName(); + ErrorOr NameOrErr = C->getName(); ec = NameOrErr.getError(); if (ec) return; Name = NameOrErr.get(); if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") { - SymbolTable = i; - ++i; + // We know that the symbol table is not an external file, so we just + // assert there is no error. + SymbolTable = *C->getBuffer(); + if (Increment()) + return; } - FirstRegular = i; + setFirstRegular(*C); return; } @@ -303,30 +350,36 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec) bool has64SymTable = false; if (Name == "/" || Name == "/SYM64/") { - SymbolTable = i; + // We know that the symbol table is not an external file, so we just assert + // there is no error. + SymbolTable = *C->getBuffer(); if (Name == "/SYM64/") has64SymTable = true; - ++i; - if (i == e) { + if (Increment()) + return; + if (I == E) { ec = std::error_code(); return; } - Name = i->getRawName(); + Name = C->getRawName(); } if (Name == "//") { Format = has64SymTable ? K_MIPS64 : K_GNU; - StringTable = i; - ++i; - FirstRegular = i; + // The string table is never an external member, so we just assert on the + // ErrorOr. + StringTable = *C->getBuffer(); + if (Increment()) + return; + setFirstRegular(*C); ec = std::error_code(); return; } if (Name[0] != '/') { Format = has64SymTable ? K_MIPS64 : K_GNU; - FirstRegular = i; + setFirstRegular(*C); ec = std::error_code(); return; } @@ -337,23 +390,30 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec) } Format = K_COFF; - SymbolTable = i; + // We know that the symbol table is not an external file, so we just assert + // there is no error. + SymbolTable = *C->getBuffer(); - ++i; - if (i == e) { - FirstRegular = i; + if (Increment()) + return; + + if (I == E) { + setFirstRegular(*C); ec = std::error_code(); return; } - Name = i->getRawName(); + Name = C->getRawName(); if (Name == "//") { - StringTable = i; - ++i; + // The string table is never an external member, so we just assert on the + // ErrorOr. + StringTable = *C->getBuffer(); + if (Increment()) + return; } - FirstRegular = i; + setFirstRegular(*C); ec = std::error_code(); } @@ -362,22 +422,25 @@ Archive::child_iterator Archive::child_begin(bool SkipInternal) const { return child_end(); if (SkipInternal) - return FirstRegular; + return Child(this, FirstRegularData, FirstRegularStartOfFile); const char *Loc = Data.getBufferStart() + strlen(Magic); - Child c(this, Loc); - return c; + std::error_code EC; + Child c(this, Loc, &EC); + if (EC) + return child_iterator(EC); + return child_iterator(c); } Archive::child_iterator Archive::child_end() const { - return Child(this, nullptr); + return Child(this, nullptr, nullptr); } StringRef Archive::Symbol::getName() const { return Parent->getSymbolTable().begin() + StringIndex; } -ErrorOr Archive::Symbol::getMember() const { +ErrorOr Archive::Symbol::getMember() const { const char *Buf = Parent->getSymbolTable().begin(); const char *Offsets = Buf; if (Parent->kind() == K_MIPS64) @@ -422,8 +485,11 @@ ErrorOr Archive::Symbol::getMember() const { } const char *Loc = Parent->getData().begin() + Offset; - child_iterator Iter(Child(Parent, Loc)); - return Iter; + std::error_code EC; + Child C(Parent, Loc, &EC); + if (EC) + return EC; + return C; } Archive::Symbol Archive::Symbol::getNext() const { @@ -506,12 +572,12 @@ Archive::symbol_iterator Archive::symbol_begin() const { } Archive::symbol_iterator Archive::symbol_end() const { - if (!hasSymbolTable()) - return symbol_iterator(Symbol(this, 0, 0)); return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0)); } uint32_t Archive::getNumberOfSymbols() const { + if (!hasSymbolTable()) + return 0; const char *buf = getSymbolTable().begin(); if (kind() == K_GNU) return read32be(buf); @@ -542,6 +608,4 @@ Archive::child_iterator Archive::findSym(StringRef name) const { return child_end(); } -bool Archive::hasSymbolTable() const { - return SymbolTable != child_end(); -} +bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); } diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp index a40901c924ea..c7343fdc171d 100644 --- a/lib/Object/ArchiveWriter.cpp +++ b/lib/Object/ArchiveWriter.cpp @@ -34,32 +34,32 @@ using namespace llvm; -NewArchiveIterator::NewArchiveIterator(object::Archive::child_iterator I, +NewArchiveIterator::NewArchiveIterator(const object::Archive::Child &OldMember, StringRef Name) - : IsNewMember(false), Name(Name), OldI(I) {} + : IsNewMember(false), Name(Name), OldMember(OldMember) {} -NewArchiveIterator::NewArchiveIterator(StringRef NewFilename, StringRef Name) - : IsNewMember(true), Name(Name), NewFilename(NewFilename) {} +NewArchiveIterator::NewArchiveIterator(StringRef FileName) + : IsNewMember(true), Name(FileName), OldMember(nullptr, nullptr, nullptr) {} StringRef NewArchiveIterator::getName() const { return Name; } bool NewArchiveIterator::isNewMember() const { return IsNewMember; } -object::Archive::child_iterator NewArchiveIterator::getOld() const { +const object::Archive::Child &NewArchiveIterator::getOld() const { assert(!IsNewMember); - return OldI; + return OldMember; } StringRef NewArchiveIterator::getNew() const { assert(IsNewMember); - return NewFilename; + return Name; } llvm::ErrorOr NewArchiveIterator::getFD(sys::fs::file_status &NewStatus) const { assert(IsNewMember); int NewFD; - if (auto EC = sys::fs::openFileForRead(NewFilename, NewFD)) + if (auto EC = sys::fs::openFileForRead(Name, NewFD)) return EC; assert(NewFD != -1); @@ -77,7 +77,7 @@ NewArchiveIterator::getFD(sys::fs::file_status &NewStatus) const { template static void printWithSpacePadding(raw_fd_ostream &OS, T Data, unsigned Size, - bool MayTruncate = false) { + bool MayTruncate = false) { uint64_t OldPos = OS.tell(); OS << Data; unsigned SizeSoFar = OS.tell() - OldPos; @@ -135,30 +135,56 @@ static void printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name, Out.write(uint8_t(0)); } +static bool useStringTable(bool Thin, StringRef Name) { + return Thin || Name.size() >= 16; +} + static void -printMemberHeader(raw_fd_ostream &Out, object::Archive::Kind Kind, +printMemberHeader(raw_fd_ostream &Out, object::Archive::Kind Kind, bool Thin, StringRef Name, std::vector::iterator &StringMapIndexIter, const sys::TimeValue &ModTime, unsigned UID, unsigned GID, unsigned Perms, unsigned Size) { if (Kind == object::Archive::K_BSD) return printBSDMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size); - if (Name.size() < 16) + if (!useStringTable(Thin, Name)) return printGNUSmallMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size); Out << '/'; printWithSpacePadding(Out, *StringMapIndexIter++, 15); printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); } -static void writeStringTable(raw_fd_ostream &Out, +// Compute the relative path from From to To. +static std::string computeRelativePath(StringRef From, StringRef To) { + if (sys::path::is_absolute(From) || sys::path::is_absolute(To)) + return To; + + StringRef DirFrom = sys::path::parent_path(From); + auto FromI = sys::path::begin(DirFrom); + auto ToI = sys::path::begin(To); + while (*FromI == *ToI) { + ++FromI; + ++ToI; + } + + SmallString<128> Relative; + for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI) + sys::path::append(Relative, ".."); + + for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI) + sys::path::append(Relative, *ToI); + + return Relative.str(); +} + +static void writeStringTable(raw_fd_ostream &Out, StringRef ArcName, ArrayRef Members, - std::vector &StringMapIndexes) { + std::vector &StringMapIndexes, + bool Thin) { unsigned StartOffset = 0; - for (ArrayRef::iterator I = Members.begin(), - E = Members.end(); - I != E; ++I) { - StringRef Name = I->getName(); - if (Name.size() < 16) + for (const NewArchiveIterator &I : Members) { + StringRef Name = sys::path::filename(I.getName()); + if (!useStringTable(Thin, Name)) continue; if (StartOffset == 0) { printWithSpacePadding(Out, "//", 58); @@ -166,7 +192,13 @@ static void writeStringTable(raw_fd_ostream &Out, StartOffset = Out.tell(); } StringMapIndexes.push_back(Out.tell() - StartOffset); - Out << Name << "/\n"; + + if (Thin) + Out << computeRelativePath(ArcName, I.getName()); + else + Out << Name; + + Out << "/\n"; } if (StartOffset == 0) return; @@ -268,9 +300,11 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, return BodyStartOffset + 4; } -std::pair llvm::writeArchive( - StringRef ArcName, std::vector &NewMembers, - bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic) { +std::pair +llvm::writeArchive(StringRef ArcName, + std::vector &NewMembers, + bool WriteSymtab, object::Archive::Kind Kind, + bool Deterministic, bool Thin) { SmallString<128> TmpArchive; int TmpArchiveFD; if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a", @@ -279,7 +313,10 @@ std::pair llvm::writeArchive( tool_output_file Output(TmpArchive, TmpArchiveFD); raw_fd_ostream &Out = Output.os(); - Out << "!\n"; + if (Thin) + Out << "!\n"; + else + Out << "!\n"; std::vector MemberOffsetRefs; @@ -309,9 +346,11 @@ std::pair llvm::writeArchive( Buffers.push_back(std::move(MemberBufferOrErr.get())); MemberRef = Buffers.back()->getMemBufferRef(); } else { - object::Archive::child_iterator OldMember = Member.getOld(); + const object::Archive::Child &OldMember = Member.getOld(); + assert((!Thin || OldMember.getParent()->isThin()) && + "Thin archives cannot refers to member of other archives"); ErrorOr MemberBufferOrErr = - OldMember->getMemoryBufferRef(); + OldMember.getMemoryBufferRef(); if (auto EC = MemberBufferOrErr.getError()) return std::make_pair("", EC); MemberRef = MemberBufferOrErr.get(); @@ -330,7 +369,7 @@ std::pair llvm::writeArchive( std::vector StringMapIndexes; if (Kind != object::Archive::K_BSD) - writeStringTable(Out, NewMembers, StringMapIndexes); + writeStringTable(Out, ArcName, NewMembers, StringMapIndexes, Thin); unsigned MemberNum = 0; unsigned NewMemberNum = 0; @@ -358,26 +397,32 @@ std::pair llvm::writeArchive( GID = Status.getGroup(); Perms = Status.permissions(); } else { - object::Archive::child_iterator OldMember = I.getOld(); - ModTime = OldMember->getLastModified(); - UID = OldMember->getUID(); - GID = OldMember->getGID(); - Perms = OldMember->getAccessMode(); + const object::Archive::Child &OldMember = I.getOld(); + ModTime = OldMember.getLastModified(); + UID = OldMember.getUID(); + GID = OldMember.getGID(); + Perms = OldMember.getAccessMode(); } if (I.isNewMember()) { StringRef FileName = I.getNew(); const sys::fs::file_status &Status = NewMemberStatus[NewMemberNum++]; - printMemberHeader(Out, Kind, sys::path::filename(FileName), + printMemberHeader(Out, Kind, Thin, sys::path::filename(FileName), StringMapIndexIter, ModTime, UID, GID, Perms, Status.getSize()); } else { - object::Archive::child_iterator OldMember = I.getOld(); - printMemberHeader(Out, Kind, I.getName(), StringMapIndexIter, ModTime, - UID, GID, Perms, OldMember->getSize()); + const object::Archive::Child &OldMember = I.getOld(); + ErrorOr Size = OldMember.getSize(); + if (std::error_code EC = Size.getError()) + return std::make_pair("", EC); + StringRef FileName = I.getName(); + printMemberHeader(Out, Kind, Thin, sys::path::filename(FileName), + StringMapIndexIter, ModTime, UID, GID, Perms, + Size.get()); } - Out << File.getBuffer(); + if (!Thin) + Out << File.getBuffer(); if (Out.tell() % 2) Out << '\n'; diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt index 8f10143fccc3..2ac2ee51dc23 100644 --- a/lib/Object/CMakeLists.txt +++ b/lib/Object/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_library(LLVMObject RecordStreamer.cpp SymbolicFile.cpp SymbolSize.cpp + FunctionIndexObjectFile.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Object diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index bcca9839b475..1f2111759a0e 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -171,6 +171,11 @@ ErrorOr COFFObjectFile::getSymbolAddress(DataRefImpl Ref) const { if (std::error_code EC = getSection(SectionNumber, Section)) return EC; Result += Section->VirtualAddress; + + // The section VirtualAddress does not include ImageBase, and we want to + // return virtual addresses. + Result += getImageBase(); + return Result; } @@ -178,10 +183,10 @@ SymbolRef::Type COFFObjectFile::getSymbolType(DataRefImpl Ref) const { COFFSymbolRef Symb = getCOFFSymbol(Ref); int32_t SectionNumber = Symb.getSectionNumber(); + if (Symb.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) + return SymbolRef::ST_Function; if (Symb.isAnyUndefined()) return SymbolRef::ST_Unknown; - if (Symb.isFunctionDefinition()) - return SymbolRef::ST_Function; if (Symb.isCommon()) return SymbolRef::ST_Data; if (Symb.isFileRecord()) @@ -230,21 +235,17 @@ uint64_t COFFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Ref) const { return Symb.getValue(); } -std::error_code -COFFObjectFile::getSymbolSection(DataRefImpl Ref, - section_iterator &Result) const { +ErrorOr +COFFObjectFile::getSymbolSection(DataRefImpl Ref) const { COFFSymbolRef Symb = getCOFFSymbol(Ref); - if (COFF::isReservedSectionNumber(Symb.getSectionNumber())) { - Result = section_end(); - } else { - const coff_section *Sec = nullptr; - if (std::error_code EC = getSection(Symb.getSectionNumber(), Sec)) - return EC; - DataRefImpl Ref; - Ref.p = reinterpret_cast(Sec); - Result = section_iterator(SectionRef(Ref, this)); - } - return std::error_code(); + if (COFF::isReservedSectionNumber(Symb.getSectionNumber())) + return section_end(); + const coff_section *Sec = nullptr; + if (std::error_code EC = getSection(Symb.getSectionNumber(), Sec)) + return EC; + DataRefImpl Ret; + Ret.p = reinterpret_cast(Sec); + return section_iterator(SectionRef(Ret, this)); } unsigned COFFObjectFile::getSymbolSectionID(SymbolRef Sym) const { @@ -266,7 +267,12 @@ std::error_code COFFObjectFile::getSectionName(DataRefImpl Ref, uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Ref) const { const coff_section *Sec = toSec(Ref); - return Sec->VirtualAddress; + uint64_t Result = Sec->VirtualAddress; + + // The section VirtualAddress does not include ImageBase, and we want to + // return virtual addresses. + Result += getImageBase(); + return Result; } uint64_t COFFObjectFile::getSectionSize(DataRefImpl Ref) const { @@ -412,10 +418,18 @@ std::error_code COFFObjectFile::initSymbolTablePtr() { return std::error_code(); } +uint64_t COFFObjectFile::getImageBase() const { + if (PE32Header) + return PE32Header->ImageBase; + else if (PE32PlusHeader) + return PE32PlusHeader->ImageBase; + // This actually comes up in practice. + return 0; +} + // Returns the file offset for the given VA. std::error_code COFFObjectFile::getVaPtr(uint64_t Addr, uintptr_t &Res) const { - uint64_t ImageBase = PE32Header ? (uint64_t)PE32Header->ImageBase - : (uint64_t)PE32PlusHeader->ImageBase; + uint64_t ImageBase = getImageBase(); uint64_t Rva = Addr - ImageBase; assert(Rva <= UINT32_MAX); return getRvaPtr((uint32_t)Rva, Res); @@ -744,6 +758,8 @@ StringRef COFFObjectFile::getFileFormatName() const { return "COFF-x86-64"; case COFF::IMAGE_FILE_MACHINE_ARMNT: return "COFF-ARM"; + case COFF::IMAGE_FILE_MACHINE_ARM64: + return "COFF-ARM64"; default: return "COFF-"; } @@ -757,6 +773,8 @@ unsigned COFFObjectFile::getArch() const { return Triple::x86_64; case COFF::IMAGE_FILE_MACHINE_ARMNT: return Triple::thumb; + case COFF::IMAGE_FILE_MACHINE_ARM64: + return Triple::aarch64; default: return Triple::UnknownArch; } diff --git a/lib/Object/COFFYAML.cpp b/lib/Object/COFFYAML.cpp index 9a24b531da9e..4c1fca19bf1b 100644 --- a/lib/Object/COFFYAML.cpp +++ b/lib/Object/COFFYAML.cpp @@ -56,6 +56,7 @@ void ScalarEnumerationTraits::enumeration( ECase(IMAGE_FILE_MACHINE_AMD64); ECase(IMAGE_FILE_MACHINE_ARM); ECase(IMAGE_FILE_MACHINE_ARMNT); + ECase(IMAGE_FILE_MACHINE_ARM64); ECase(IMAGE_FILE_MACHINE_EBC); ECase(IMAGE_FILE_MACHINE_I386); ECase(IMAGE_FILE_MACHINE_IA64); @@ -210,6 +211,7 @@ void ScalarBitSetTraits::bitset( void ScalarBitSetTraits::bitset( IO &IO, COFF::SectionCharacteristics &Value) { + BCase(IMAGE_SCN_TYPE_NOLOAD); BCase(IMAGE_SCN_TYPE_NO_PAD); BCase(IMAGE_SCN_CNT_CODE); BCase(IMAGE_SCN_CNT_INITIALIZED_DATA); diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp index 398e9e412994..62c27cc427a6 100644 --- a/lib/Object/ELF.cpp +++ b/lib/Object/ELF.cpp @@ -26,6 +26,7 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { } break; case ELF::EM_386: + case ELF::EM_IAMCU: switch (Type) { #include "llvm/Support/ELFRelocs/i386.def" default: diff --git a/lib/Object/ELFYAML.cpp b/lib/Object/ELFYAML.cpp index 72c232c32870..4a4b2276f46b 100644 --- a/lib/Object/ELFYAML.cpp +++ b/lib/Object/ELFYAML.cpp @@ -193,6 +193,7 @@ ScalarEnumerationTraits::enumeration(IO &IO, ECase(EM_VIDEOCORE5) ECase(EM_78KOR) ECase(EM_56800EX) + ECase(EM_AMDGPU) #undef ECase } @@ -316,6 +317,25 @@ void ScalarBitSetTraits::bitset(IO &IO, BCase(EF_HEXAGON_ISA_V4) BCase(EF_HEXAGON_ISA_V5) break; + case ELF::EM_AVR: + BCase(EF_AVR_ARCH_AVR1) + BCase(EF_AVR_ARCH_AVR2) + BCase(EF_AVR_ARCH_AVR25) + BCase(EF_AVR_ARCH_AVR3) + BCase(EF_AVR_ARCH_AVR31) + BCase(EF_AVR_ARCH_AVR35) + BCase(EF_AVR_ARCH_AVR4) + BCase(EF_AVR_ARCH_AVR51) + BCase(EF_AVR_ARCH_AVR6) + BCase(EF_AVR_ARCH_AVRTINY) + BCase(EF_AVR_ARCH_XMEGA1) + BCase(EF_AVR_ARCH_XMEGA2) + BCase(EF_AVR_ARCH_XMEGA3) + BCase(EF_AVR_ARCH_XMEGA4) + BCase(EF_AVR_ARCH_XMEGA5) + BCase(EF_AVR_ARCH_XMEGA6) + BCase(EF_AVR_ARCH_XMEGA7) + break; default: llvm_unreachable("Unsupported architecture"); } @@ -382,6 +402,7 @@ void ScalarEnumerationTraits::enumeration( void ScalarBitSetTraits::bitset(IO &IO, ELFYAML::ELF_SHF &Value) { + const auto *Object = static_cast(IO.getContext()); #define BCase(X) IO.bitSetCase(Value, #X, ELF::X); BCase(SHF_WRITE) BCase(SHF_ALLOC) @@ -394,6 +415,17 @@ void ScalarBitSetTraits::bitset(IO &IO, BCase(SHF_OS_NONCONFORMING) BCase(SHF_GROUP) BCase(SHF_TLS) + switch(Object->Header.Machine) { + case ELF::EM_AMDGPU: + BCase(SHF_AMDGPU_HSA_GLOBAL) + BCase(SHF_AMDGPU_HSA_READONLY) + BCase(SHF_AMDGPU_HSA_CODE) + BCase(SHF_AMDGPU_HSA_AGENT) + break; + default: + // Nothing to do. + break; + } #undef BCase } @@ -466,6 +498,7 @@ void ScalarEnumerationTraits::enumeration( #include "llvm/Support/ELFRelocs/Hexagon.def" break; case ELF::EM_386: + case ELF::EM_IAMCU: #include "llvm/Support/ELFRelocs/i386.def" break; case ELF::EM_AARCH64: diff --git a/lib/Object/Error.cpp b/lib/Object/Error.cpp index 7ca2f12f0924..7ecc3a19af9d 100644 --- a/lib/Object/Error.cpp +++ b/lib/Object/Error.cpp @@ -47,6 +47,8 @@ std::string _object_error_category::message(int EV) const { return "Invalid section index"; case object_error::bitcode_section_not_found: return "Bitcode section not found in object file"; + case object_error::elf_invalid_dynamic_table_size: + return "Invalid dynamic table size"; case object_error::macho_small_load_command: return "Mach-O load command with size < 8 bytes"; case object_error::macho_load_segment_too_many_sections: diff --git a/lib/Object/FunctionIndexObjectFile.cpp b/lib/Object/FunctionIndexObjectFile.cpp new file mode 100644 index 000000000000..fe111de1a9c8 --- /dev/null +++ b/lib/Object/FunctionIndexObjectFile.cpp @@ -0,0 +1,143 @@ +//===- FunctionIndexObjectFile.cpp - Function index file implementation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Part of the FunctionIndexObjectFile class implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/FunctionIndexObjectFile.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/FunctionInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace object; + +FunctionIndexObjectFile::FunctionIndexObjectFile( + MemoryBufferRef Object, std::unique_ptr I) + : SymbolicFile(Binary::ID_FunctionIndex, Object), Index(std::move(I)) {} + +FunctionIndexObjectFile::~FunctionIndexObjectFile() {} + +std::unique_ptr FunctionIndexObjectFile::takeIndex() { + return std::move(Index); +} + +ErrorOr +FunctionIndexObjectFile::findBitcodeInObject(const ObjectFile &Obj) { + for (const SectionRef &Sec : Obj.sections()) { + StringRef SecName; + if (std::error_code EC = Sec.getName(SecName)) + return EC; + if (SecName == ".llvmbc") { + StringRef SecContents; + if (std::error_code EC = Sec.getContents(SecContents)) + return EC; + return MemoryBufferRef(SecContents, Obj.getFileName()); + } + } + + return object_error::bitcode_section_not_found; +} + +ErrorOr +FunctionIndexObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) { + sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer()); + switch (Type) { + case sys::fs::file_magic::bitcode: + return Object; + case sys::fs::file_magic::elf_relocatable: + case sys::fs::file_magic::macho_object: + case sys::fs::file_magic::coff_object: { + ErrorOr> ObjFile = + ObjectFile::createObjectFile(Object, Type); + if (!ObjFile) + return ObjFile.getError(); + return findBitcodeInObject(*ObjFile->get()); + } + default: + return object_error::invalid_file_type; + } +} + +// Looks for function index in the given memory buffer. +// returns true if found, else false. +bool FunctionIndexObjectFile::hasFunctionSummaryInMemBuffer( + MemoryBufferRef Object, DiagnosticHandlerFunction DiagnosticHandler) { + ErrorOr BCOrErr = findBitcodeInMemBuffer(Object); + if (!BCOrErr) + return false; + + return hasFunctionSummary(BCOrErr.get(), DiagnosticHandler); +} + +// Parse function index in the given memory buffer. +// Return new FunctionIndexObjectFile instance containing parsed +// function summary/index. +ErrorOr> +FunctionIndexObjectFile::create(MemoryBufferRef Object, + DiagnosticHandlerFunction DiagnosticHandler, + bool IsLazy) { + std::unique_ptr Index; + + ErrorOr BCOrErr = findBitcodeInMemBuffer(Object); + if (!BCOrErr) + return BCOrErr.getError(); + + ErrorOr> IOrErr = getFunctionInfoIndex( + BCOrErr.get(), DiagnosticHandler, IsLazy); + + if (std::error_code EC = IOrErr.getError()) + return EC; + + Index = std::move(IOrErr.get()); + + return llvm::make_unique(Object, std::move(Index)); +} + +// Parse the function summary information for function with the +// given name out of the given buffer. Parsed information is +// stored on the index object saved in this object. +std::error_code FunctionIndexObjectFile::findFunctionSummaryInMemBuffer( + MemoryBufferRef Object, DiagnosticHandlerFunction DiagnosticHandler, + StringRef FunctionName) { + sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer()); + switch (Type) { + case sys::fs::file_magic::bitcode: { + return readFunctionSummary(Object, DiagnosticHandler, FunctionName, + std::move(Index)); + } + default: + return object_error::invalid_file_type; + } +} + +// Parse the function index out of an IR file and return the function +// index object if found, or nullptr if not. +ErrorOr> +llvm::getFunctionIndexForFile(StringRef Path, + DiagnosticHandlerFunction DiagnosticHandler) { + ErrorOr> FileOrErr = + MemoryBuffer::getFileOrSTDIN(Path); + std::error_code EC = FileOrErr.getError(); + if (EC) + return EC; + MemoryBufferRef BufferRef = (FileOrErr.get())->getMemBufferRef(); + ErrorOr> ObjOrErr = + object::FunctionIndexObjectFile::create(BufferRef, DiagnosticHandler); + EC = ObjOrErr.getError(); + if (EC) + return EC; + + object::FunctionIndexObjectFile &Obj = **ObjOrErr; + return Obj.takeIndex(); +} diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp index 9f5132e9062c..c35c413b3c3b 100644 --- a/lib/Object/IRObjectFile.cpp +++ b/lib/Object/IRObjectFile.cpp @@ -219,6 +219,12 @@ uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const { uint32_t Res = BasicSymbolRef::SF_None; if (GV->isDeclarationForLinker()) Res |= BasicSymbolRef::SF_Undefined; + else if (GV->hasHiddenVisibility() && !GV->hasLocalLinkage()) + Res |= BasicSymbolRef::SF_Hidden; + if (const GlobalVariable *GVar = dyn_cast(GV)) { + if (GVar->isConstant()) + Res |= BasicSymbolRef::SF_Const; + } if (GV->hasPrivateLinkage()) Res |= BasicSymbolRef::SF_FormatSpecific; if (!GV->hasLocalLinkage()) @@ -303,7 +309,7 @@ llvm::object::IRObjectFile::create(MemoryBufferRef Object, MemoryBuffer::getMemBuffer(BCOrErr.get(), false)); ErrorOr> MOrErr = - getLazyBitcodeModule(std::move(Buff), Context, nullptr, + getLazyBitcodeModule(std::move(Buff), Context, /*ShouldLazyLoadMetadata*/ true); if (std::error_code EC = MOrErr.getError()) return EC; diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 05900630c75c..d1f79b225ee4 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -278,7 +278,7 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, return; } LinkOptHintsLoadCmd = Load.Ptr; - } else if (Load.C.cmd == MachO::LC_DYLD_INFO || + } else if (Load.C.cmd == MachO::LC_DYLD_INFO || Load.C.cmd == MachO::LC_DYLD_INFO_ONLY) { // Multiple dyldinfo load commands if (DyldInfoLoadCmd) { @@ -401,6 +401,9 @@ SymbolRef::Type MachOObjectFile::getSymbolType(DataRefImpl Symb) const { case MachO::N_UNDF : return SymbolRef::ST_Unknown; case MachO::N_SECT : + section_iterator Sec = *getSymbolSection(Symb); + if (Sec->isData() || Sec->isBSS()) + return SymbolRef::ST_Data; return SymbolRef::ST_Function; } return SymbolRef::ST_Other; @@ -445,22 +448,18 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const { return Result; } -std::error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb, - section_iterator &Res) const { +ErrorOr +MachOObjectFile::getSymbolSection(DataRefImpl Symb) const { MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb); uint8_t index = Entry.n_sect; - if (index == 0) { - Res = section_end(); - } else { - DataRefImpl DRI; - DRI.d.a = index - 1; - if (DRI.d.a >= Sections.size()) - report_fatal_error("getSymbolSection: Invalid section index."); - Res = section_iterator(SectionRef(DRI, this)); - } - - return std::error_code(); + if (index == 0) + return section_end(); + DataRefImpl DRI; + DRI.d.a = index - 1; + if (DRI.d.a >= Sections.size()) + report_fatal_error("getSymbolSection: Invalid section index."); + return section_iterator(SectionRef(DRI, this)); } unsigned MachOObjectFile::getSymbolSectionID(SymbolRef Sym) const { @@ -487,9 +486,32 @@ uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const { } uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const { - if (is64Bit()) - return getSection64(Sec).size; - return getSection(Sec).size; + // In the case if a malformed Mach-O file where the section offset is past + // the end of the file or some part of the section size is past the end of + // the file return a size of zero or a size that covers the rest of the file + // but does not extend past the end of the file. + uint32_t SectOffset, SectType; + uint64_t SectSize; + + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + SectOffset = Sect.offset; + SectSize = Sect.size; + SectType = Sect.flags & MachO::SECTION_TYPE; + } else { + MachO::section Sect = getSection(Sec); + SectOffset = Sect.offset; + SectSize = Sect.size; + SectType = Sect.flags & MachO::SECTION_TYPE; + } + if (SectType == MachO::S_ZEROFILL || SectType == MachO::S_GB_ZEROFILL) + return SectSize; + uint64_t FileSize = getData().size(); + if (SectOffset > FileSize) + return 0; + if (FileSize - SectOffset < SectSize) + return FileSize - SectOffset; + return SectSize; } std::error_code MachOObjectFile::getSectionContents(DataRefImpl Sec, @@ -1136,8 +1158,7 @@ Triple MachOObjectFile::getThumbArch(uint32_t CPUType, uint32_t CPUSubType, } Triple MachOObjectFile::getArch(uint32_t CPUType, uint32_t CPUSubType, - const char **McpuDefault, - Triple *ThumbTriple) { + const char **McpuDefault, Triple *ThumbTriple) { Triple T = MachOObjectFile::getArch(CPUType, CPUSubType, McpuDefault); *ThumbTriple = MachOObjectFile::getThumbArch(CPUType, CPUSubType, McpuDefault); @@ -1212,8 +1233,8 @@ dice_iterator MachOObjectFile::end_dices() const { return dice_iterator(DiceRef(DRI, this)); } -ExportEntry::ExportEntry(ArrayRef T) - : Trie(T), Malformed(false), Done(false) { } +ExportEntry::ExportEntry(ArrayRef T) + : Trie(T), Malformed(false), Done(false) {} void ExportEntry::moveToFirst() { pushNode(0); @@ -1226,7 +1247,7 @@ void ExportEntry::moveToEnd() { } bool ExportEntry::operator==(const ExportEntry &Other) const { - // Common case, one at end, other iterating from begin. + // Common case, one at end, other iterating from begin. if (Done || Other.Done) return (Done == Other.Done); // Not equal if different stack sizes. @@ -1240,7 +1261,7 @@ bool ExportEntry::operator==(const ExportEntry &Other) const { if (Stack[i].Start != Other.Stack[i].Start) return false; } - return true; + return true; } uint64_t ExportEntry::readULEB128(const uint8_t *&Ptr) { @@ -1281,11 +1302,10 @@ uint32_t ExportEntry::nodeOffset() const { return Stack.back().Start - Trie.begin(); } -ExportEntry::NodeState::NodeState(const uint8_t *Ptr) - : Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0), - ImportName(nullptr), ChildCount(0), NextChildIndex(0), - ParentStringLength(0), IsExportNode(false) { -} +ExportEntry::NodeState::NodeState(const uint8_t *Ptr) + : Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0), + ImportName(nullptr), ChildCount(0), NextChildIndex(0), + ParentStringLength(0), IsExportNode(false) {} void ExportEntry::pushNode(uint64_t offset) { const uint8_t *Ptr = Trie.begin() + offset; @@ -1302,7 +1322,7 @@ void ExportEntry::pushNode(uint64_t offset) { } else { State.Address = readULEB128(State.Current); if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) - State.Other = readULEB128(State.Current); + State.Other = readULEB128(State.Current); } } State.ChildCount = *Children; @@ -1339,7 +1359,7 @@ void ExportEntry::pushDownUntilBottom() { // // There is one "export" node for each exported symbol. But because some // symbols may be a prefix of another symbol (e.g. _dup and _dup2), an export -// node may have child nodes too. +// node may have child nodes too. // // The algorithm for moveNext() is to keep moving down the leftmost unvisited // child until hitting a node with no children (which is an export node or @@ -1372,7 +1392,7 @@ void ExportEntry::moveNext() { Done = true; } -iterator_range +iterator_range MachOObjectFile::exports(ArrayRef Trie) { ExportEntry Start(Trie); if (Trie.size() == 0) @@ -1383,15 +1403,13 @@ MachOObjectFile::exports(ArrayRef Trie) { ExportEntry Finish(Trie); Finish.moveToEnd(); - return iterator_range(export_iterator(Start), - export_iterator(Finish)); + return make_range(export_iterator(Start), export_iterator(Finish)); } iterator_range MachOObjectFile::exports() const { return exports(getDyldInfoExportsTrie()); } - MachORebaseEntry::MachORebaseEntry(ArrayRef Bytes, bool is64Bit) : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0), RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0), @@ -1555,17 +1573,14 @@ MachOObjectFile::rebaseTable(ArrayRef Opcodes, bool is64) { MachORebaseEntry Finish(Opcodes, is64); Finish.moveToEnd(); - return iterator_range(rebase_iterator(Start), - rebase_iterator(Finish)); + return make_range(rebase_iterator(Start), rebase_iterator(Finish)); } iterator_range MachOObjectFile::rebaseTable() const { return rebaseTable(getDyldInfoRebaseOpcodes(), is64Bit()); } - -MachOBindEntry::MachOBindEntry(ArrayRef Bytes, bool is64Bit, - Kind BK) +MachOBindEntry::MachOBindEntry(ArrayRef Bytes, bool is64Bit, Kind BK) : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0), Ordinal(0), Flags(0), Addend(0), RemainingLoopCount(0), AdvanceAmount(0), BindType(0), PointerSize(is64Bit ? 8 : 4), @@ -1769,7 +1784,6 @@ int64_t MachOBindEntry::readSLEB128() { return Result; } - uint32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; } uint64_t MachOBindEntry::segmentOffset() const { return SegmentOffset; } @@ -1810,8 +1824,7 @@ MachOObjectFile::bindTable(ArrayRef Opcodes, bool is64, MachOBindEntry Finish(Opcodes, is64, BKind); Finish.moveToEnd(); - return iterator_range(bind_iterator(Start), - bind_iterator(Finish)); + return make_range(bind_iterator(Start), bind_iterator(Finish)); } iterator_range MachOObjectFile::bindTable() const { @@ -1841,8 +1854,7 @@ MachOObjectFile::end_load_commands() const { iterator_range MachOObjectFile::load_commands() const { - return iterator_range(begin_load_commands(), - end_load_commands()); + return make_range(begin_load_commands(), end_load_commands()); } StringRef @@ -2207,66 +2219,66 @@ MachOObjectFile::getLinkOptHintsLoadCommand() const { } ArrayRef MachOObjectFile::getDyldInfoRebaseOpcodes() const { - if (!DyldInfoLoadCmd) - return ArrayRef(); + if (!DyldInfoLoadCmd) + return None; - MachO::dyld_info_command DyldInfo - = getStruct(this, DyldInfoLoadCmd); - const uint8_t *Ptr = reinterpret_cast( - getPtr(this, DyldInfo.rebase_off)); - return ArrayRef(Ptr, DyldInfo.rebase_size); + MachO::dyld_info_command DyldInfo = + getStruct(this, DyldInfoLoadCmd); + const uint8_t *Ptr = + reinterpret_cast(getPtr(this, DyldInfo.rebase_off)); + return makeArrayRef(Ptr, DyldInfo.rebase_size); } ArrayRef MachOObjectFile::getDyldInfoBindOpcodes() const { - if (!DyldInfoLoadCmd) - return ArrayRef(); + if (!DyldInfoLoadCmd) + return None; - MachO::dyld_info_command DyldInfo - = getStruct(this, DyldInfoLoadCmd); - const uint8_t *Ptr = reinterpret_cast( - getPtr(this, DyldInfo.bind_off)); - return ArrayRef(Ptr, DyldInfo.bind_size); + MachO::dyld_info_command DyldInfo = + getStruct(this, DyldInfoLoadCmd); + const uint8_t *Ptr = + reinterpret_cast(getPtr(this, DyldInfo.bind_off)); + return makeArrayRef(Ptr, DyldInfo.bind_size); } ArrayRef MachOObjectFile::getDyldInfoWeakBindOpcodes() const { - if (!DyldInfoLoadCmd) - return ArrayRef(); + if (!DyldInfoLoadCmd) + return None; - MachO::dyld_info_command DyldInfo - = getStruct(this, DyldInfoLoadCmd); - const uint8_t *Ptr = reinterpret_cast( - getPtr(this, DyldInfo.weak_bind_off)); - return ArrayRef(Ptr, DyldInfo.weak_bind_size); + MachO::dyld_info_command DyldInfo = + getStruct(this, DyldInfoLoadCmd); + const uint8_t *Ptr = + reinterpret_cast(getPtr(this, DyldInfo.weak_bind_off)); + return makeArrayRef(Ptr, DyldInfo.weak_bind_size); } ArrayRef MachOObjectFile::getDyldInfoLazyBindOpcodes() const { - if (!DyldInfoLoadCmd) - return ArrayRef(); + if (!DyldInfoLoadCmd) + return None; - MachO::dyld_info_command DyldInfo - = getStruct(this, DyldInfoLoadCmd); - const uint8_t *Ptr = reinterpret_cast( - getPtr(this, DyldInfo.lazy_bind_off)); - return ArrayRef(Ptr, DyldInfo.lazy_bind_size); + MachO::dyld_info_command DyldInfo = + getStruct(this, DyldInfoLoadCmd); + const uint8_t *Ptr = + reinterpret_cast(getPtr(this, DyldInfo.lazy_bind_off)); + return makeArrayRef(Ptr, DyldInfo.lazy_bind_size); } ArrayRef MachOObjectFile::getDyldInfoExportsTrie() const { - if (!DyldInfoLoadCmd) - return ArrayRef(); + if (!DyldInfoLoadCmd) + return None; - MachO::dyld_info_command DyldInfo - = getStruct(this, DyldInfoLoadCmd); - const uint8_t *Ptr = reinterpret_cast( - getPtr(this, DyldInfo.export_off)); - return ArrayRef(Ptr, DyldInfo.export_size); + MachO::dyld_info_command DyldInfo = + getStruct(this, DyldInfoLoadCmd); + const uint8_t *Ptr = + reinterpret_cast(getPtr(this, DyldInfo.export_off)); + return makeArrayRef(Ptr, DyldInfo.export_size); } ArrayRef MachOObjectFile::getUuid() const { if (!UuidLoadCmd) - return ArrayRef(); + return None; // Returning a pointer is fine as uuid doesn't need endian swapping. const char *Ptr = UuidLoadCmd + offsetof(MachO::uuid_command, uuid); - return ArrayRef(reinterpret_cast(Ptr), 16); + return makeArrayRef(reinterpret_cast(Ptr), 16); } StringRef MachOObjectFile::getStringTableData() const { @@ -2315,4 +2327,3 @@ ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer) { return EC; return std::move(Ret); } - diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp index 1d0e69e4622d..a1c83b9b7f86 100644 --- a/lib/Object/MachOUniversal.cpp +++ b/lib/Object/MachOUniversal.cpp @@ -69,14 +69,14 @@ MachOUniversalBinary::ObjectForArch::ObjectForArch( ErrorOr> MachOUniversalBinary::ObjectForArch::getAsObjectFile() const { - if (Parent) { - StringRef ParentData = Parent->getData(); - StringRef ObjectData = ParentData.substr(Header.offset, Header.size); - StringRef ObjectName = Parent->getFileName(); - MemoryBufferRef ObjBuffer(ObjectData, ObjectName); - return ObjectFile::createMachOObjectFile(ObjBuffer); - } - return object_error::parse_failed; + if (!Parent) + return object_error::parse_failed; + + StringRef ParentData = Parent->getData(); + StringRef ObjectData = ParentData.substr(Header.offset, Header.size); + StringRef ObjectName = Parent->getFileName(); + MemoryBufferRef ObjBuffer(ObjectData, ObjectName); + return ObjectFile::createMachOObjectFile(ObjBuffer); } ErrorOr> diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index 5c4b7a67b2ad..b44c1a16fd08 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -98,8 +98,10 @@ void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) { void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect, LLVMSymbolIteratorRef Sym) { - if (std::error_code ec = (*unwrap(Sym))->getSection(*unwrap(Sect))) + ErrorOr SecOrErr = (*unwrap(Sym))->getSection(); + if (std::error_code ec = SecOrErr.getError()) report_fatal_error(ec.message()); + *unwrap(Sect) = *SecOrErr; } // ObjectFile Symbol iterators diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp index f82edae89bc6..d12dc411361c 100644 --- a/lib/Object/ObjectFile.cpp +++ b/lib/Object/ObjectFile.cpp @@ -29,10 +29,10 @@ ObjectFile::ObjectFile(unsigned int Type, MemoryBufferRef Source) : SymbolicFile(Type, Source) {} bool SectionRef::containsSymbol(SymbolRef S) const { - section_iterator SymSec = getObject()->section_end(); - if (S.getSection(SymSec)) + ErrorOr SymSec = S.getSection(); + if (!SymSec) return false; - return *this == *SymSec; + return *this == **SymSec; } uint64_t ObjectFile::getSymbolValue(DataRefImpl Ref) const { diff --git a/lib/Object/SymbolicFile.cpp b/lib/Object/SymbolicFile.cpp index 854e68e40f4d..bf79dfb8da62 100644 --- a/lib/Object/SymbolicFile.cpp +++ b/lib/Object/SymbolicFile.cpp @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/COFF.h" +#include "llvm/Object/COFFImportFile.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolicFile.h" @@ -54,9 +56,10 @@ ErrorOr> SymbolicFile::createSymbolicFile( case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: case sys::fs::file_magic::macho_dsym_companion: case sys::fs::file_magic::macho_kext_bundle: - case sys::fs::file_magic::coff_import_library: case sys::fs::file_magic::pecoff_executable: return ObjectFile::createObjectFile(Object, Type); + case sys::fs::file_magic::coff_import_library: + return std::unique_ptr(new COFFImportFile(Object)); case sys::fs::file_magic::elf_relocatable: case sys::fs::file_magic::macho_object: case sys::fs::file_magic::coff_object: { diff --git a/lib/Option/Arg.cpp b/lib/Option/Arg.cpp index ac000736c1f3..c3de2d1a4965 100644 --- a/lib/Option/Arg.cpp +++ b/lib/Option/Arg.cpp @@ -13,6 +13,7 @@ #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" using namespace llvm; using namespace llvm::opt; @@ -43,23 +44,25 @@ Arg::~Arg() { } } -void Arg::dump() const { - llvm::errs() << "<"; +void Arg::print(raw_ostream& O) const { + O << "<"; - llvm::errs() << " Opt:"; - Opt.dump(); + O << " Opt:"; + Opt.print(O); - llvm::errs() << " Index:" << Index; + O << " Index:" << Index; - llvm::errs() << " Values: ["; + O << " Values: ["; for (unsigned i = 0, e = Values.size(); i != e; ++i) { - if (i) llvm::errs() << ", "; - llvm::errs() << "'" << Values[i] << "'"; + if (i) O << ", "; + O << "'" << Values[i] << "'"; } - llvm::errs() << "]>\n"; + O << "]>\n"; } +LLVM_DUMP_METHOD void Arg::dump() const { print(dbgs()); } + std::string Arg::getAsString(const ArgList &Args) const { SmallString<256> Res; llvm::raw_svector_ostream OS(Res); diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp index a74ead6b3588..0826ef873195 100644 --- a/lib/Option/ArgList.cpp +++ b/lib/Option/ArgList.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Option/Arg.h" #include "llvm/Option/Option.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -258,6 +259,21 @@ void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id0, } } +void ArgList::AddAllArgs(ArgStringList &Output, + ArrayRef Ids) const { + for (const Arg *Arg : Args) { + for (OptSpecifier Id : Ids) { + if (Arg->getOption().matches(Id)) { + Arg->claim(); + Arg->render(*this, Output); + break; + } + } + } +} + +/// This 3-opt variant of AddAllArgs could be eliminated in favor of one +/// that accepts a single specifier, given the above which accepts any number. void ArgList::AddAllArgs(ArgStringList &Output, OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2) const { for (auto Arg: filtered(Id0, Id1, Id2)) { @@ -313,6 +329,15 @@ const char *ArgList::GetOrMakeJoinedArgString(unsigned Index, return MakeArgString(LHS + RHS); } +void ArgList::print(raw_ostream &O) const { + for (Arg *A : *this) { + O << "* "; + A->print(O); + } +} + +LLVM_DUMP_METHOD void ArgList::dump() const { print(dbgs()); } + // void InputArgList::releaseMemory() { diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp index e83536f2b572..09d4cebb83d0 100644 --- a/lib/Option/OptTable.cpp +++ b/lib/Option/OptTable.cpp @@ -84,11 +84,9 @@ static inline bool operator<(const OptTable::Info &I, const char *Name) { OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {} -OptTable::OptTable(const Info *OptionInfos, unsigned NumOptionInfos, - bool IgnoreCase) - : OptionInfos(OptionInfos), NumOptionInfos(NumOptionInfos), - IgnoreCase(IgnoreCase), TheInputOptionID(0), TheUnknownOptionID(0), - FirstSearchableIndex(0) { +OptTable::OptTable(ArrayRef OptionInfos, bool IgnoreCase) + : OptionInfos(OptionInfos), IgnoreCase(IgnoreCase), TheInputOptionID(0), + TheUnknownOptionID(0), FirstSearchableIndex(0) { // Explicitly zero initialize the error to work around a bug in array // value-initialization on MinGW with gcc 4.3.5. @@ -199,8 +197,8 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index, if (isInput(PrefixesUnion, Str)) return new Arg(getOption(TheInputOptionID), Str, Index++, Str); - const Info *Start = OptionInfos + FirstSearchableIndex; - const Info *End = OptionInfos + getNumOptions(); + const Info *Start = OptionInfos.begin() + FirstSearchableIndex; + const Info *End = OptionInfos.end(); StringRef Name = StringRef(Str).ltrim(PrefixChars); // Search for the first next option which could be a prefix. diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp index 221414d79e77..ebf05aab764b 100644 --- a/lib/Option/Option.cpp +++ b/lib/Option/Option.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include @@ -35,10 +36,10 @@ Option::Option(const OptTable::Info *info, const OptTable *owner) } } -void Option::dump() const { - llvm::errs() << "<"; +void Option::print(raw_ostream &O) const { + O << "<"; switch (getKind()) { -#define P(N) case N: llvm::errs() << #N; break +#define P(N) case N: O << #N; break P(GroupClass); P(InputClass); P(UnknownClass); @@ -54,33 +55,35 @@ void Option::dump() const { } if (Info->Prefixes) { - llvm::errs() << " Prefixes:["; - for (const char * const *Pre = Info->Prefixes; *Pre != nullptr; ++Pre) { - llvm::errs() << '"' << *Pre << (*(Pre + 1) == nullptr ? "\"" : "\", "); + O << " Prefixes:["; + for (const char *const *Pre = Info->Prefixes; *Pre != nullptr; ++Pre) { + O << '"' << *Pre << (*(Pre + 1) == nullptr ? "\"" : "\", "); } - llvm::errs() << ']'; + O << ']'; } - llvm::errs() << " Name:\"" << getName() << '"'; + O << " Name:\"" << getName() << '"'; const Option Group = getGroup(); if (Group.isValid()) { - llvm::errs() << " Group:"; - Group.dump(); + O << " Group:"; + Group.print(O); } const Option Alias = getAlias(); if (Alias.isValid()) { - llvm::errs() << " Alias:"; - Alias.dump(); + O << " Alias:"; + Alias.print(O); } if (getKind() == MultiArgClass) - llvm::errs() << " NumArgs:" << getNumArgs(); + O << " NumArgs:" << getNumArgs(); - llvm::errs() << ">\n"; + O << ">\n"; } +void Option::dump() const { print(dbgs()); } + bool Option::matches(OptSpecifier Opt) const { // Aliases are never considered in matching, look through them. const Option Alias = getAlias(); diff --git a/lib/Passes/LLVMBuild.txt b/lib/Passes/LLVMBuild.txt index 3063fe3e5da5..a752f42dcedd 100644 --- a/lib/Passes/LLVMBuild.txt +++ b/lib/Passes/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Passes parent = Libraries -required_libraries = Analysis Core IPA IPO InstCombine Scalar Support TransformUtils Vectorize +required_libraries = Analysis Core IPO InstCombine Scalar Support TransformUtils Vectorize diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index ba7132050e9b..8ba81f72a717 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" @@ -28,9 +29,14 @@ #include "llvm/IR/Verifier.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/IPO/ForceFunctionAttrs.h" +#include "llvm/Transforms/IPO/InferFunctionAttrs.h" +#include "llvm/Transforms/IPO/StripDeadPrototypes.h" #include "llvm/Transforms/InstCombine/InstCombine.h" +#include "llvm/Transforms/Scalar/ADCE.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" +#include "llvm/Transforms/Scalar/SROA.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" using namespace llvm; diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def index d768a3ad1d2a..241a78927c77 100644 --- a/lib/Passes/PassRegistry.def +++ b/lib/Passes/PassRegistry.def @@ -27,10 +27,13 @@ MODULE_ANALYSIS("targetlibinfo", TargetLibraryAnalysis()) #ifndef MODULE_PASS #define MODULE_PASS(NAME, CREATE_PASS) #endif +MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) +MODULE_PASS("inferattrs", InferFunctionAttrsPass()) MODULE_PASS("invalidate", InvalidateAllAnalysesPass()) MODULE_PASS("no-op-module", NoOpModulePass()) MODULE_PASS("print", PrintModulePass(dbgs())) MODULE_PASS("print-cg", LazyCallGraphPrinterPass(dbgs())) +MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass()) MODULE_PASS("verify", VerifierPass()) #undef MODULE_PASS @@ -54,6 +57,7 @@ FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis()) FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis()) FUNCTION_ANALYSIS("loops", LoopAnalysis()) FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis()) +FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis()) FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis()) FUNCTION_ANALYSIS("targetir", TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis()) @@ -62,6 +66,7 @@ FUNCTION_ANALYSIS("targetir", #ifndef FUNCTION_PASS #define FUNCTION_PASS(NAME, CREATE_PASS) #endif +FUNCTION_PASS("adce", ADCEPass()) FUNCTION_PASS("early-cse", EarlyCSEPass()) FUNCTION_PASS("instcombine", InstCombinePass()) FUNCTION_PASS("invalidate", InvalidateAllAnalysesPass()) @@ -71,7 +76,9 @@ FUNCTION_PASS("print", PrintFunctionPass(dbgs())) FUNCTION_PASS("print", AssumptionPrinterPass(dbgs())) FUNCTION_PASS("print", DominatorTreePrinterPass(dbgs())) FUNCTION_PASS("print", LoopPrinterPass(dbgs())) +FUNCTION_PASS("print", ScalarEvolutionPrinterPass(dbgs())) FUNCTION_PASS("simplify-cfg", SimplifyCFGPass()) +FUNCTION_PASS("sroa", SROA()) FUNCTION_PASS("verify", VerifierPass()) FUNCTION_PASS("verify", DominatorTreeVerifierPass()) #undef FUNCTION_PASS diff --git a/lib/ProfileData/CoverageMapping.cpp b/lib/ProfileData/CoverageMapping.cpp index cf04fea8491d..55c0fb4792ef 100644 --- a/lib/ProfileData/CoverageMapping.cpp +++ b/lib/ProfileData/CoverageMapping.cpp @@ -181,18 +181,6 @@ void FunctionRecordIterator::skipOtherFiles() { *this = FunctionRecordIterator(); } -/// Get the function name from the record, removing the filename prefix if -/// necessary. -static StringRef getFuncNameWithoutPrefix(const CoverageMappingRecord &Record) { - StringRef FunctionName = Record.FunctionName; - if (Record.Filenames.empty()) - return FunctionName; - StringRef Filename = sys::path::filename(Record.Filenames[0]); - if (FunctionName.startswith(Filename)) - FunctionName = FunctionName.drop_front(Filename.size() + 1); - return FunctionName; -} - ErrorOr> CoverageMapping::load(CoverageMappingReader &CoverageReader, IndexedInstrProfReader &ProfileReader) { @@ -216,7 +204,11 @@ CoverageMapping::load(CoverageMappingReader &CoverageReader, assert(!Record.MappingRegions.empty() && "Function has no regions"); - FunctionRecord Function(getFuncNameWithoutPrefix(Record), Record.Filenames); + StringRef OrigFuncName = Record.FunctionName; + if (!Record.Filenames.empty()) + OrigFuncName = + getFuncNameWithoutPrefix(OrigFuncName, Record.Filenames[0]); + FunctionRecord Function(OrigFuncName, Record.Filenames); for (const auto &Region : Record.MappingRegions) { ErrorOr ExecutionCount = Ctx.evaluate(Region.Count); if (!ExecutionCount) diff --git a/lib/ProfileData/CoverageMappingReader.cpp b/lib/ProfileData/CoverageMappingReader.cpp index 334a3f51ec9e..a0f82a0d4ede 100644 --- a/lib/ProfileData/CoverageMappingReader.cpp +++ b/lib/ProfileData/CoverageMappingReader.cpp @@ -290,36 +290,25 @@ std::error_code RawCoverageMappingReader::read() { return std::error_code(); } -namespace { +std::error_code InstrProfSymtab::create(SectionRef &Section) { + if (auto Err = Section.getContents(Data)) + return Err; + Address = Section.getAddress(); + return std::error_code(); +} -/// \brief A helper structure to access the data from a section -/// in an object file. -struct SectionData { - StringRef Data; - uint64_t Address; - - std::error_code load(SectionRef &Section) { - if (auto Err = Section.getContents(Data)) - return Err; - Address = Section.getAddress(); - return std::error_code(); - } - - std::error_code get(uint64_t Pointer, size_t Size, StringRef &Result) { - if (Pointer < Address) - return coveragemap_error::malformed; - auto Offset = Pointer - Address; - if (Offset + Size > Data.size()) - return coveragemap_error::malformed; - Result = Data.substr(Pointer - Address, Size); - return std::error_code(); - } -}; +StringRef InstrProfSymtab::getFuncName(uint64_t Pointer, size_t Size) { + if (Pointer < Address) + return StringRef(); + auto Offset = Pointer - Address; + if (Offset + Size > Data.size()) + return StringRef(); + return Data.substr(Pointer - Address, Size); } template -std::error_code readCoverageMappingData( - SectionData &ProfileNames, StringRef Data, +static std::error_code readCoverageMappingData( + InstrProfSymtab &ProfileNames, StringRef Data, std::vector &Records, std::vector &Filenames) { using namespace support; @@ -343,7 +332,7 @@ std::error_code readCoverageMappingData( // Skip past the function records, saving the start and end for later. const char *FunBuf = Buf; - Buf += NRecords * (sizeof(T) + 2 * sizeof(uint32_t) + sizeof(uint64_t)); + Buf += NRecords * sizeof(coverage::CovMapFunctionRecord); const char *FunEnd = Buf; // Get the filenames. @@ -366,12 +355,15 @@ std::error_code readCoverageMappingData( // before reading the next map. Buf += alignmentAdjustment(Buf, 8); - while (FunBuf < FunEnd) { + auto CFR = + reinterpret_cast *>(FunBuf); + while ((const char *)CFR < FunEnd) { // Read the function information - T NamePtr = endian::readNext(FunBuf); - uint32_t NameSize = endian::readNext(FunBuf); - uint32_t DataSize = endian::readNext(FunBuf); - uint64_t FuncHash = endian::readNext(FunBuf); + T NamePtr = endian::byte_swap(CFR->NamePtr); + uint32_t NameSize = endian::byte_swap(CFR->NameSize); + uint32_t DataSize = endian::byte_swap(CFR->DataSize); + uint64_t FuncHash = endian::byte_swap(CFR->FuncHash); + CFR++; // Now use that to read the coverage data. if (CovBuf + DataSize > CovEnd) @@ -386,9 +378,9 @@ std::error_code readCoverageMappingData( continue; // Finally, grab the name and create a record. - StringRef FuncName; - if (std::error_code EC = ProfileNames.get(NamePtr, NameSize, FuncName)) - return EC; + StringRef FuncName = ProfileNames.getFuncName(NamePtr, NameSize); + if (NameSize && FuncName.empty()) + return coveragemap_error::malformed; Records.push_back(BinaryCoverageReader::ProfileMappingRecord( CoverageMappingVersion(Version), FuncName, FuncHash, Mapping, FilenamesBegin, Filenames.size() - FilenamesBegin)); @@ -401,7 +393,7 @@ std::error_code readCoverageMappingData( static const char *TestingFormatMagic = "llvmcovmtestdata"; static std::error_code loadTestingFormat(StringRef Data, - SectionData &ProfileNames, + InstrProfSymtab &ProfileNames, StringRef &CoverageMapping, uint8_t &BytesInAddress, support::endianness &Endian) { @@ -420,14 +412,14 @@ static std::error_code loadTestingFormat(StringRef Data, if (Data.size() < 1) return coveragemap_error::truncated; N = 0; - ProfileNames.Address = + uint64_t Address = decodeULEB128(reinterpret_cast(Data.data()), &N); if (N > Data.size()) return coveragemap_error::malformed; Data = Data.substr(N); if (Data.size() < ProfileNamesSize) return coveragemap_error::malformed; - ProfileNames.Data = Data.substr(0, ProfileNamesSize); + ProfileNames.create(Data.substr(0, ProfileNamesSize), Address); CoverageMapping = Data.substr(ProfileNamesSize); return std::error_code(); } @@ -443,12 +435,10 @@ static ErrorOr lookupSection(ObjectFile &OF, StringRef Name) { return coveragemap_error::no_data_found; } -static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer, - SectionData &ProfileNames, - StringRef &CoverageMapping, - uint8_t &BytesInAddress, - support::endianness &Endian, - StringRef Arch) { +static std::error_code +loadBinaryFormat(MemoryBufferRef ObjectBuffer, InstrProfSymtab &ProfileNames, + StringRef &CoverageMapping, uint8_t &BytesInAddress, + support::endianness &Endian, StringRef Arch) { auto BinOrErr = object::createBinary(ObjectBuffer); if (std::error_code EC = BinOrErr.getError()) return EC; @@ -477,17 +467,18 @@ static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer, : support::endianness::big; // Look for the sections that we are interested in. - auto NamesSection = lookupSection(*OF, "__llvm_prf_names"); + auto NamesSection = lookupSection(*OF, getInstrProfNameSectionName(false)); if (auto EC = NamesSection.getError()) return EC; - auto CoverageSection = lookupSection(*OF, "__llvm_covmap"); + auto CoverageSection = + lookupSection(*OF, getInstrProfCoverageSectionName(false)); if (auto EC = CoverageSection.getError()) return EC; // Get the contents of the given sections. if (std::error_code EC = CoverageSection->getContents(CoverageMapping)) return EC; - if (std::error_code EC = ProfileNames.load(*NamesSection)) + if (std::error_code EC = ProfileNames.create(*NamesSection)) return EC; return std::error_code(); @@ -498,33 +489,33 @@ BinaryCoverageReader::create(std::unique_ptr &ObjectBuffer, StringRef Arch) { std::unique_ptr Reader(new BinaryCoverageReader()); - SectionData Profile; + InstrProfSymtab ProfileNames; StringRef Coverage; uint8_t BytesInAddress; support::endianness Endian; std::error_code EC; if (ObjectBuffer->getBuffer().startswith(TestingFormatMagic)) // This is a special format used for testing. - EC = loadTestingFormat(ObjectBuffer->getBuffer(), Profile, Coverage, + EC = loadTestingFormat(ObjectBuffer->getBuffer(), ProfileNames, Coverage, BytesInAddress, Endian); else - EC = loadBinaryFormat(ObjectBuffer->getMemBufferRef(), Profile, Coverage, - BytesInAddress, Endian, Arch); + EC = loadBinaryFormat(ObjectBuffer->getMemBufferRef(), ProfileNames, + Coverage, BytesInAddress, Endian, Arch); if (EC) return EC; if (BytesInAddress == 4 && Endian == support::endianness::little) EC = readCoverageMappingData( - Profile, Coverage, Reader->MappingRecords, Reader->Filenames); + ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames); else if (BytesInAddress == 4 && Endian == support::endianness::big) EC = readCoverageMappingData( - Profile, Coverage, Reader->MappingRecords, Reader->Filenames); + ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames); else if (BytesInAddress == 8 && Endian == support::endianness::little) EC = readCoverageMappingData( - Profile, Coverage, Reader->MappingRecords, Reader->Filenames); + ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames); else if (BytesInAddress == 8 && Endian == support::endianness::big) EC = readCoverageMappingData( - Profile, Coverage, Reader->MappingRecords, Reader->Filenames); + ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames); else return coveragemap_error::malformed; if (EC) diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp index 92822a71402f..f5acd23129dc 100644 --- a/lib/ProfileData/InstrProf.cpp +++ b/lib/ProfileData/InstrProf.cpp @@ -12,6 +12,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" @@ -28,28 +32,32 @@ class InstrProfErrorCategoryType : public std::error_category { return "Success"; case instrprof_error::eof: return "End of File"; + case instrprof_error::unrecognized_format: + return "Unrecognized instrumentation profile encoding format"; case instrprof_error::bad_magic: - return "Invalid profile data (bad magic)"; + return "Invalid instrumentation profile data (bad magic)"; case instrprof_error::bad_header: - return "Invalid profile data (file header is corrupt)"; + return "Invalid instrumentation profile data (file header is corrupt)"; case instrprof_error::unsupported_version: - return "Unsupported profiling format version"; + return "Unsupported instrumentation profile format version"; case instrprof_error::unsupported_hash_type: - return "Unsupported profiling hash"; + return "Unsupported instrumentation profile hash type"; case instrprof_error::too_large: return "Too much profile data"; case instrprof_error::truncated: return "Truncated profile data"; case instrprof_error::malformed: - return "Malformed profile data"; + return "Malformed instrumentation profile data"; case instrprof_error::unknown_function: return "No profile data available for function"; case instrprof_error::hash_mismatch: - return "Function hash mismatch"; + return "Function control flow change detected (hash mismatch)"; case instrprof_error::count_mismatch: - return "Function count mismatch"; + return "Function basic block count change detected (counter mismatch)"; case instrprof_error::counter_overflow: return "Counter overflow"; + case instrprof_error::value_site_count_mismatch: + return "Function value site count change detected (counter mismatch)"; } llvm_unreachable("A value of instrprof_error has no message."); } @@ -61,3 +69,415 @@ static ManagedStatic ErrorCategory; const std::error_category &llvm::instrprof_category() { return *ErrorCategory; } + +namespace llvm { + +std::string getPGOFuncName(StringRef RawFuncName, + GlobalValue::LinkageTypes Linkage, + StringRef FileName, + uint64_t Version LLVM_ATTRIBUTE_UNUSED) { + + // Function names may be prefixed with a binary '1' to indicate + // that the backend should not modify the symbols due to any platform + // naming convention. Do not include that '1' in the PGO profile name. + if (RawFuncName[0] == '\1') + RawFuncName = RawFuncName.substr(1); + + std::string FuncName = RawFuncName; + if (llvm::GlobalValue::isLocalLinkage(Linkage)) { + // For local symbols, prepend the main file name to distinguish them. + // Do not include the full path in the file name since there's no guarantee + // that it will stay the same, e.g., if the files are checked out from + // version control in different locations. + if (FileName.empty()) + FuncName = FuncName.insert(0, ":"); + else + FuncName = FuncName.insert(0, FileName.str() + ":"); + } + return FuncName; +} + +std::string getPGOFuncName(const Function &F, uint64_t Version) { + return getPGOFuncName(F.getName(), F.getLinkage(), F.getParent()->getName(), + Version); +} + +StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) { + if (FileName.empty()) + return PGOFuncName; + // Drop the file name including ':'. See also getPGOFuncName. + if (PGOFuncName.startswith(FileName)) + PGOFuncName = PGOFuncName.drop_front(FileName.size() + 1); + return PGOFuncName; +} + +// \p FuncName is the string used as profile lookup key for the function. A +// symbol is created to hold the name. Return the legalized symbol name. +static std::string getPGOFuncNameVarName(StringRef FuncName, + GlobalValue::LinkageTypes Linkage) { + std::string VarName = getInstrProfNameVarPrefix(); + VarName += FuncName; + + if (!GlobalValue::isLocalLinkage(Linkage)) + return VarName; + + // Now fix up illegal chars in local VarName that may upset the assembler. + const char *InvalidChars = "-:<>\"'"; + size_t found = VarName.find_first_of(InvalidChars); + while (found != std::string::npos) { + VarName[found] = '_'; + found = VarName.find_first_of(InvalidChars, found + 1); + } + return VarName; +} + +GlobalVariable *createPGOFuncNameVar(Module &M, + GlobalValue::LinkageTypes Linkage, + StringRef FuncName) { + + // We generally want to match the function's linkage, but available_externally + // and extern_weak both have the wrong semantics, and anything that doesn't + // need to link across compilation units doesn't need to be visible at all. + if (Linkage == GlobalValue::ExternalWeakLinkage) + Linkage = GlobalValue::LinkOnceAnyLinkage; + else if (Linkage == GlobalValue::AvailableExternallyLinkage) + Linkage = GlobalValue::LinkOnceODRLinkage; + else if (Linkage == GlobalValue::InternalLinkage || + Linkage == GlobalValue::ExternalLinkage) + Linkage = GlobalValue::PrivateLinkage; + + auto *Value = ConstantDataArray::getString(M.getContext(), FuncName, false); + auto FuncNameVar = + new GlobalVariable(M, Value->getType(), true, Linkage, Value, + getPGOFuncNameVarName(FuncName, Linkage)); + + // Hide the symbol so that we correctly get a copy for each executable. + if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage())) + FuncNameVar->setVisibility(GlobalValue::HiddenVisibility); + + return FuncNameVar; +} + +GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName) { + return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), FuncName); +} + +instrprof_error +InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input, + uint64_t Weight) { + this->sortByTargetValues(); + Input.sortByTargetValues(); + auto I = ValueData.begin(); + auto IE = ValueData.end(); + instrprof_error Result = instrprof_error::success; + for (auto J = Input.ValueData.begin(), JE = Input.ValueData.end(); J != JE; + ++J) { + while (I != IE && I->Value < J->Value) + ++I; + if (I != IE && I->Value == J->Value) { + uint64_t JCount = J->Count; + bool Overflowed; + if (Weight > 1) { + JCount = SaturatingMultiply(JCount, Weight, &Overflowed); + if (Overflowed) + Result = instrprof_error::counter_overflow; + } + I->Count = SaturatingAdd(I->Count, JCount, &Overflowed); + if (Overflowed) + Result = instrprof_error::counter_overflow; + ++I; + continue; + } + ValueData.insert(I, *J); + } + return Result; +} + +// Merge Value Profile data from Src record to this record for ValueKind. +// Scale merged value counts by \p Weight. +instrprof_error InstrProfRecord::mergeValueProfData(uint32_t ValueKind, + InstrProfRecord &Src, + uint64_t Weight) { + uint32_t ThisNumValueSites = getNumValueSites(ValueKind); + uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind); + if (ThisNumValueSites != OtherNumValueSites) + return instrprof_error::value_site_count_mismatch; + std::vector &ThisSiteRecords = + getValueSitesForKind(ValueKind); + std::vector &OtherSiteRecords = + Src.getValueSitesForKind(ValueKind); + instrprof_error Result = instrprof_error::success; + for (uint32_t I = 0; I < ThisNumValueSites; I++) + MergeResult(Result, + ThisSiteRecords[I].mergeValueData(OtherSiteRecords[I], Weight)); + return Result; +} + +instrprof_error InstrProfRecord::merge(InstrProfRecord &Other, + uint64_t Weight) { + // If the number of counters doesn't match we either have bad data + // or a hash collision. + if (Counts.size() != Other.Counts.size()) + return instrprof_error::count_mismatch; + + instrprof_error Result = instrprof_error::success; + + for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) { + bool Overflowed; + uint64_t OtherCount = Other.Counts[I]; + if (Weight > 1) { + OtherCount = SaturatingMultiply(OtherCount, Weight, &Overflowed); + if (Overflowed) + Result = instrprof_error::counter_overflow; + } + Counts[I] = SaturatingAdd(Counts[I], OtherCount, &Overflowed); + if (Overflowed) + Result = instrprof_error::counter_overflow; + } + + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + MergeResult(Result, mergeValueProfData(Kind, Other, Weight)); + + return Result; +} + +// Map indirect call target name hash to name string. +uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind, + ValueMapType *ValueMap) { + if (!ValueMap) + return Value; + switch (ValueKind) { + case IPVK_IndirectCallTarget: { + auto Result = + std::lower_bound(ValueMap->begin(), ValueMap->end(), Value, + [](const std::pair &LHS, + uint64_t RHS) { return LHS.first < RHS; }); + if (Result != ValueMap->end()) + Value = (uint64_t)Result->second; + break; + } + } + return Value; +} + +void InstrProfRecord::addValueData(uint32_t ValueKind, uint32_t Site, + InstrProfValueData *VData, uint32_t N, + ValueMapType *ValueMap) { + for (uint32_t I = 0; I < N; I++) { + VData[I].Value = remapValue(VData[I].Value, ValueKind, ValueMap); + } + std::vector &ValueSites = + getValueSitesForKind(ValueKind); + if (N == 0) + ValueSites.push_back(InstrProfValueSiteRecord()); + else + ValueSites.emplace_back(VData, VData + N); +} + +#define INSTR_PROF_COMMON_API_IMPL +#include "llvm/ProfileData/InstrProfData.inc" + +/*! + * \brief ValueProfRecordClosure Interface implementation for InstrProfRecord + * class. These C wrappers are used as adaptors so that C++ code can be + * invoked as callbacks. + */ +uint32_t getNumValueKindsInstrProf(const void *Record) { + return reinterpret_cast(Record)->getNumValueKinds(); +} + +uint32_t getNumValueSitesInstrProf(const void *Record, uint32_t VKind) { + return reinterpret_cast(Record) + ->getNumValueSites(VKind); +} + +uint32_t getNumValueDataInstrProf(const void *Record, uint32_t VKind) { + return reinterpret_cast(Record) + ->getNumValueData(VKind); +} + +uint32_t getNumValueDataForSiteInstrProf(const void *R, uint32_t VK, + uint32_t S) { + return reinterpret_cast(R) + ->getNumValueDataForSite(VK, S); +} + +void getValueForSiteInstrProf(const void *R, InstrProfValueData *Dst, + uint32_t K, uint32_t S, + uint64_t (*Mapper)(uint32_t, uint64_t)) { + return reinterpret_cast(R)->getValueForSite( + Dst, K, S, Mapper); +} + +ValueProfData *allocValueProfDataInstrProf(size_t TotalSizeInBytes) { + ValueProfData *VD = + (ValueProfData *)(new (::operator new(TotalSizeInBytes)) ValueProfData()); + memset(VD, 0, TotalSizeInBytes); + return VD; +} + +static ValueProfRecordClosure InstrProfRecordClosure = { + 0, + getNumValueKindsInstrProf, + getNumValueSitesInstrProf, + getNumValueDataInstrProf, + getNumValueDataForSiteInstrProf, + 0, + getValueForSiteInstrProf, + allocValueProfDataInstrProf}; + +// Wrapper implementation using the closure mechanism. +uint32_t ValueProfData::getSize(const InstrProfRecord &Record) { + InstrProfRecordClosure.Record = &Record; + return getValueProfDataSize(&InstrProfRecordClosure); +} + +// Wrapper implementation using the closure mechanism. +std::unique_ptr +ValueProfData::serializeFrom(const InstrProfRecord &Record) { + InstrProfRecordClosure.Record = &Record; + + std::unique_ptr VPD( + serializeValueProfDataFrom(&InstrProfRecordClosure, nullptr)); + return VPD; +} + +void ValueProfRecord::deserializeTo(InstrProfRecord &Record, + InstrProfRecord::ValueMapType *VMap) { + Record.reserveSites(Kind, NumValueSites); + + InstrProfValueData *ValueData = getValueProfRecordValueData(this); + for (uint64_t VSite = 0; VSite < NumValueSites; ++VSite) { + uint8_t ValueDataCount = this->SiteCountArray[VSite]; + Record.addValueData(Kind, VSite, ValueData, ValueDataCount, VMap); + ValueData += ValueDataCount; + } +} + +// For writing/serializing, Old is the host endianness, and New is +// byte order intended on disk. For Reading/deserialization, Old +// is the on-disk source endianness, and New is the host endianness. +void ValueProfRecord::swapBytes(support::endianness Old, + support::endianness New) { + using namespace support; + if (Old == New) + return; + + if (getHostEndianness() != Old) { + sys::swapByteOrder(NumValueSites); + sys::swapByteOrder(Kind); + } + uint32_t ND = getValueProfRecordNumValueData(this); + InstrProfValueData *VD = getValueProfRecordValueData(this); + + // No need to swap byte array: SiteCountArrray. + for (uint32_t I = 0; I < ND; I++) { + sys::swapByteOrder(VD[I].Value); + sys::swapByteOrder(VD[I].Count); + } + if (getHostEndianness() == Old) { + sys::swapByteOrder(NumValueSites); + sys::swapByteOrder(Kind); + } +} + +void ValueProfData::deserializeTo(InstrProfRecord &Record, + InstrProfRecord::ValueMapType *VMap) { + if (NumValueKinds == 0) + return; + + ValueProfRecord *VR = getFirstValueProfRecord(this); + for (uint32_t K = 0; K < NumValueKinds; K++) { + VR->deserializeTo(Record, VMap); + VR = getValueProfRecordNext(VR); + } +} + +template +static T swapToHostOrder(const unsigned char *&D, support::endianness Orig) { + using namespace support; + if (Orig == little) + return endian::readNext(D); + else + return endian::readNext(D); +} + +static std::unique_ptr allocValueProfData(uint32_t TotalSize) { + return std::unique_ptr(new (::operator new(TotalSize)) + ValueProfData()); +} + +instrprof_error ValueProfData::checkIntegrity() { + if (NumValueKinds > IPVK_Last + 1) + return instrprof_error::malformed; + // Total size needs to be mulltiple of quadword size. + if (TotalSize % sizeof(uint64_t)) + return instrprof_error::malformed; + + ValueProfRecord *VR = getFirstValueProfRecord(this); + for (uint32_t K = 0; K < this->NumValueKinds; K++) { + if (VR->Kind > IPVK_Last) + return instrprof_error::malformed; + VR = getValueProfRecordNext(VR); + if ((char *)VR - (char *)this > (ptrdiff_t)TotalSize) + return instrprof_error::malformed; + } + return instrprof_error::success; +} + +ErrorOr> +ValueProfData::getValueProfData(const unsigned char *D, + const unsigned char *const BufferEnd, + support::endianness Endianness) { + using namespace support; + if (D + sizeof(ValueProfData) > BufferEnd) + return instrprof_error::truncated; + + const unsigned char *Header = D; + uint32_t TotalSize = swapToHostOrder(Header, Endianness); + if (D + TotalSize > BufferEnd) + return instrprof_error::too_large; + + std::unique_ptr VPD = allocValueProfData(TotalSize); + memcpy(VPD.get(), D, TotalSize); + // Byte swap. + VPD->swapBytesToHost(Endianness); + + instrprof_error EC = VPD->checkIntegrity(); + if (EC != instrprof_error::success) + return EC; + + return std::move(VPD); +} + +void ValueProfData::swapBytesToHost(support::endianness Endianness) { + using namespace support; + if (Endianness == getHostEndianness()) + return; + + sys::swapByteOrder(TotalSize); + sys::swapByteOrder(NumValueKinds); + + ValueProfRecord *VR = getFirstValueProfRecord(this); + for (uint32_t K = 0; K < NumValueKinds; K++) { + VR->swapBytes(Endianness, getHostEndianness()); + VR = getValueProfRecordNext(VR); + } +} + +void ValueProfData::swapBytesFromHost(support::endianness Endianness) { + using namespace support; + if (Endianness == getHostEndianness()) + return; + + ValueProfRecord *VR = getFirstValueProfRecord(this); + for (uint32_t K = 0; K < NumValueKinds; K++) { + ValueProfRecord *NVR = getValueProfRecordNext(VR); + VR->swapBytes(getHostEndianness(), Endianness); + VR = NVR; + } + sys::swapByteOrder(TotalSize); + sys::swapByteOrder(NumValueKinds); +} + +} diff --git a/lib/ProfileData/InstrProfIndexed.h b/lib/ProfileData/InstrProfIndexed.h deleted file mode 100644 index ebca7b22fbfb..000000000000 --- a/lib/ProfileData/InstrProfIndexed.h +++ /dev/null @@ -1,56 +0,0 @@ -//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Shared header for the instrumented profile data reader and writer. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H -#define LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H - -#include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MD5.h" - -namespace llvm { - -namespace IndexedInstrProf { -enum class HashT : uint32_t { - MD5, - - Last = MD5 -}; - -static inline uint64_t MD5Hash(StringRef Str) { - MD5 Hash; - Hash.update(Str); - llvm::MD5::MD5Result Result; - Hash.final(Result); - // Return the least significant 8 bytes. Our MD5 implementation returns the - // result in little endian, so we may need to swap bytes. - using namespace llvm::support; - return endian::read(Result); -} - -static inline uint64_t ComputeHash(HashT Type, StringRef K) { - switch (Type) { - case HashT::MD5: - return IndexedInstrProf::MD5Hash(K); - } - llvm_unreachable("Unhandled hash type"); -} - -const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" -const uint64_t Version = 2; -const HashT HashType = HashT::MD5; -} - -} // end namespace llvm - -#endif diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index 8a529a000c53..5e83456822fd 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ProfileData/InstrProfReader.h" -#include "InstrProfIndexed.h" #include "llvm/ADT/STLExtras.h" #include @@ -55,8 +54,10 @@ InstrProfReader::create(std::unique_ptr Buffer) { Result.reset(new RawInstrProfReader64(std::move(Buffer))); else if (RawInstrProfReader32::hasFormat(*Buffer)) Result.reset(new RawInstrProfReader32(std::move(Buffer))); - else + else if (TextInstrProfReader::hasFormat(*Buffer)) Result.reset(new TextInstrProfReader(std::move(Buffer))); + else + return instrprof_error::unrecognized_format; // Initialize the reader and return the result. if (std::error_code EC = initializeReader(*Result)) @@ -98,16 +99,98 @@ void InstrProfIterator::Increment() { *this = InstrProfIterator(); } +bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { + // Verify that this really looks like plain ASCII text by checking a + // 'reasonable' number of characters (up to profile magic size). + size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); + StringRef buffer = Buffer.getBufferStart(); + return count == 0 || + std::all_of(buffer.begin(), buffer.begin() + count, + [](char c) { return ::isprint(c) || ::isspace(c); }); +} + +std::error_code TextInstrProfReader::readHeader() { + Symtab.reset(new InstrProfSymtab()); + return success(); +} + +std::error_code +TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { + +#define CHECK_LINE_END(Line) \ + if (Line.is_at_end()) \ + return error(instrprof_error::truncated); +#define READ_NUM(Str, Dst) \ + if ((Str).getAsInteger(10, (Dst))) \ + return error(instrprof_error::malformed); +#define VP_READ_ADVANCE(Val) \ + CHECK_LINE_END(Line); \ + uint32_t Val; \ + READ_NUM((*Line), (Val)); \ + Line++; + + if (Line.is_at_end()) + return success(); + + uint32_t NumValueKinds; + if (Line->getAsInteger(10, NumValueKinds)) { + // No value profile data + return success(); + } + if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1) + return error(instrprof_error::malformed); + Line++; + + for (uint32_t VK = 0; VK < NumValueKinds; VK++) { + VP_READ_ADVANCE(ValueKind); + if (ValueKind > IPVK_Last) + return error(instrprof_error::malformed); + VP_READ_ADVANCE(NumValueSites); + if (!NumValueSites) + continue; + + Record.reserveSites(VK, NumValueSites); + for (uint32_t S = 0; S < NumValueSites; S++) { + VP_READ_ADVANCE(NumValueData); + + std::vector CurrentValues; + for (uint32_t V = 0; V < NumValueData; V++) { + CHECK_LINE_END(Line); + std::pair VD = Line->split(':'); + uint64_t TakenCount, Value; + if (VK == IPVK_IndirectCallTarget) { + Symtab->addFuncName(VD.first); + Value = IndexedInstrProf::ComputeHash(VD.first); + } else { + READ_NUM(VD.first, Value); + } + READ_NUM(VD.second, TakenCount); + CurrentValues.push_back({Value, TakenCount}); + Line++; + } + Record.addValueData(VK, S, CurrentValues.data(), NumValueData, nullptr); + } + } + return success(); + +#undef CHECK_LINE_END +#undef READ_NUM +#undef VP_READ_ADVANCE +} + std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { // Skip empty lines and comments. while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) ++Line; // If we hit EOF while looking for a name, we're done. - if (Line.is_at_end()) + if (Line.is_at_end()) { + Symtab->finalizeSymtab(); return error(instrprof_error::eof); + } // Read the function name. Record.Name = *Line++; + Symtab->addFuncName(Record.Name); // Read the function hash. if (Line.is_at_end()) @@ -136,57 +219,35 @@ std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { Record.Counts.push_back(Count); } + // Check if value profile data exists and read it if so. + if (std::error_code EC = readValueProfileData(Record)) + return EC; + + // This is needed to avoid two pass parsing because llvm-profdata + // does dumping while reading. + Symtab->finalizeSymtab(); return success(); } -template -static uint64_t getRawMagic(); - -template <> -uint64_t getRawMagic() { - return - uint64_t(255) << 56 | - uint64_t('l') << 48 | - uint64_t('p') << 40 | - uint64_t('r') << 32 | - uint64_t('o') << 24 | - uint64_t('f') << 16 | - uint64_t('r') << 8 | - uint64_t(129); -} - -template <> -uint64_t getRawMagic() { - return - uint64_t(255) << 56 | - uint64_t('l') << 48 | - uint64_t('p') << 40 | - uint64_t('r') << 32 | - uint64_t('o') << 24 | - uint64_t('f') << 16 | - uint64_t('R') << 8 | - uint64_t(129); -} - template bool RawInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { if (DataBuffer.getBufferSize() < sizeof(uint64_t)) return false; uint64_t Magic = *reinterpret_cast(DataBuffer.getBufferStart()); - return getRawMagic() == Magic || - sys::getSwappedBytes(getRawMagic()) == Magic; + return RawInstrProf::getMagic() == Magic || + sys::getSwappedBytes(RawInstrProf::getMagic()) == Magic; } template std::error_code RawInstrProfReader::readHeader() { if (!hasFormat(*DataBuffer)) return error(instrprof_error::bad_magic); - if (DataBuffer->getBufferSize() < sizeof(RawHeader)) + if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) return error(instrprof_error::bad_header); - auto *Header = - reinterpret_cast(DataBuffer->getBufferStart()); - ShouldSwapBytes = Header->Magic != getRawMagic(); + auto *Header = reinterpret_cast( + DataBuffer->getBufferStart()); + ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic(); return readHeader(*Header); } @@ -202,29 +263,38 @@ RawInstrProfReader::readNextHeader(const char *CurrentPos) { return instrprof_error::eof; // If there isn't enough space for another header, this is probably just // garbage at the end of the file. - if (CurrentPos + sizeof(RawHeader) > End) + if (CurrentPos + sizeof(RawInstrProf::Header) > End) return instrprof_error::malformed; // The writer ensures each profile is padded to start at an aligned address. if (reinterpret_cast(CurrentPos) % alignOf()) return instrprof_error::malformed; // The magic should have the same byte order as in the previous header. uint64_t Magic = *reinterpret_cast(CurrentPos); - if (Magic != swap(getRawMagic())) + if (Magic != swap(RawInstrProf::getMagic())) return instrprof_error::bad_magic; // There's another profile to read, so we need to process the header. - auto *Header = reinterpret_cast(CurrentPos); + auto *Header = reinterpret_cast(CurrentPos); return readHeader(*Header); } -static uint64_t getRawVersion() { - return 1; +template +void RawInstrProfReader::createSymtab(InstrProfSymtab &Symtab) { + for (const RawInstrProf::ProfileData *I = Data; I != DataEnd; ++I) { + StringRef FunctionName(getName(I->NamePtr), swap(I->NameSize)); + Symtab.addFuncName(FunctionName); + const IntPtrT FPtr = swap(I->FunctionPointer); + if (!FPtr) + continue; + Symtab.mapAddress(FPtr, IndexedInstrProf::ComputeHash(FunctionName)); + } + Symtab.finalizeSymtab(); } template std::error_code -RawInstrProfReader::readHeader(const RawHeader &Header) { - if (swap(Header.Version) != getRawVersion()) +RawInstrProfReader::readHeader(const RawInstrProf::Header &Header) { + if (swap(Header.Version) != RawInstrProf::Version) return error(instrprof_error::unsupported_version); CountersDelta = swap(Header.CountersDelta); @@ -232,50 +302,69 @@ RawInstrProfReader::readHeader(const RawHeader &Header) { auto DataSize = swap(Header.DataSize); auto CountersSize = swap(Header.CountersSize); auto NamesSize = swap(Header.NamesSize); + auto ValueDataSize = swap(Header.ValueDataSize); + ValueKindLast = swap(Header.ValueKindLast); - ptrdiff_t DataOffset = sizeof(RawHeader); - ptrdiff_t CountersOffset = DataOffset + sizeof(ProfileData) * DataSize; + auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData); + auto PaddingSize = getNumPaddingBytes(NamesSize); + + ptrdiff_t DataOffset = sizeof(RawInstrProf::Header); + ptrdiff_t CountersOffset = DataOffset + DataSizeInBytes; ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize; - size_t ProfileSize = NamesOffset + sizeof(char) * NamesSize; + ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize; + size_t ProfileSize = ValueDataOffset + ValueDataSize; auto *Start = reinterpret_cast(&Header); if (Start + ProfileSize > DataBuffer->getBufferEnd()) return error(instrprof_error::bad_header); - Data = reinterpret_cast(Start + DataOffset); + Data = reinterpret_cast *>( + Start + DataOffset); DataEnd = Data + DataSize; CountersStart = reinterpret_cast(Start + CountersOffset); NamesStart = Start + NamesOffset; + ValueDataStart = reinterpret_cast(Start + ValueDataOffset); ProfileEnd = Start + ProfileSize; + std::unique_ptr NewSymtab = make_unique(); + createSymtab(*NewSymtab.get()); + Symtab = std::move(NewSymtab); return success(); } template -std::error_code -RawInstrProfReader::readNextRecord(InstrProfRecord &Record) { - if (Data == DataEnd) - if (std::error_code EC = readNextHeader(ProfileEnd)) - return EC; +std::error_code RawInstrProfReader::readName(InstrProfRecord &Record) { + Record.Name = StringRef(getName(Data->NamePtr), swap(Data->NameSize)); + if (Record.Name.data() < NamesStart || + Record.Name.data() + Record.Name.size() > + reinterpret_cast(ValueDataStart)) + return error(instrprof_error::malformed); + return success(); +} - // Get the raw data. - StringRef RawName(getName(Data->NamePtr), swap(Data->NameSize)); +template +std::error_code RawInstrProfReader::readFuncHash( + InstrProfRecord &Record) { + Record.Hash = swap(Data->FuncHash); + return success(); +} + +template +std::error_code RawInstrProfReader::readRawCounts( + InstrProfRecord &Record) { uint32_t NumCounters = swap(Data->NumCounters); + IntPtrT CounterPtr = Data->CounterPtr; if (NumCounters == 0) return error(instrprof_error::malformed); - auto RawCounts = makeArrayRef(getCounter(Data->CounterPtr), NumCounters); + + auto RawCounts = makeArrayRef(getCounter(CounterPtr), NumCounters); + auto *NamesStartAsCounter = reinterpret_cast(NamesStart); // Check bounds. - auto *NamesStartAsCounter = reinterpret_cast(NamesStart); - if (RawName.data() < NamesStart || - RawName.data() + RawName.size() > DataBuffer->getBufferEnd() || - RawCounts.data() < CountersStart || + if (RawCounts.data() < CountersStart || RawCounts.data() + RawCounts.size() > NamesStartAsCounter) return error(instrprof_error::malformed); - // Store the data in Record, byte-swapping as necessary. - Record.Hash = swap(Data->FuncHash); - Record.Name = RawName; if (ShouldSwapBytes) { Record.Counts.clear(); Record.Counts.reserve(RawCounts.size()); @@ -284,8 +373,61 @@ RawInstrProfReader::readNextRecord(InstrProfRecord &Record) { } else Record.Counts = RawCounts; + return success(); +} + +template +std::error_code +RawInstrProfReader::readValueProfilingData(InstrProfRecord &Record) { + + Record.clearValueData(); + CurValueDataSize = 0; + // Need to match the logic in value profile dumper code in compiler-rt: + uint32_t NumValueKinds = 0; + for (uint32_t I = 0; I < IPVK_Last + 1; I++) + NumValueKinds += (Data->NumValueSites[I] != 0); + + if (!NumValueKinds) + return success(); + + ErrorOr> VDataPtrOrErr = + ValueProfData::getValueProfData(ValueDataStart, + (const unsigned char *)ProfileEnd, + getDataEndianness()); + + if (VDataPtrOrErr.getError()) + return VDataPtrOrErr.getError(); + + VDataPtrOrErr.get()->deserializeTo(Record, &Symtab->getAddrHashMap()); + CurValueDataSize = VDataPtrOrErr.get()->getSize(); + return success(); +} + +template +std::error_code +RawInstrProfReader::readNextRecord(InstrProfRecord &Record) { + if (atEnd()) + if (std::error_code EC = readNextHeader(ProfileEnd)) + return EC; + + // Read name ad set it in Record. + if (std::error_code EC = readName(Record)) + return EC; + + // Read FuncHash and set it in Record. + if (std::error_code EC = readFuncHash(Record)) + return EC; + + // Read raw counts and set Record. + if (std::error_code EC = readRawCounts(Record)) + return EC; + + // Read value data and set Record. + if (std::error_code EC = readValueProfilingData(Record)) + return EC; + // Iterate. - ++Data; + advanceData(); return success(); } @@ -302,52 +444,112 @@ InstrProfLookupTrait::ComputeHash(StringRef K) { typedef InstrProfLookupTrait::data_type data_type; typedef InstrProfLookupTrait::offset_type offset_type; +bool InstrProfLookupTrait::readValueProfilingData( + const unsigned char *&D, const unsigned char *const End) { + ErrorOr> VDataPtrOrErr = + ValueProfData::getValueProfData(D, End, ValueProfDataEndianness); + + if (VDataPtrOrErr.getError()) + return false; + + VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr); + D += VDataPtrOrErr.get()->TotalSize; + + return true; +} + data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, offset_type N) { - // Check if the data is corrupt. If so, don't try to read it. if (N % sizeof(uint64_t)) return data_type(); DataBuffer.clear(); - uint64_t NumCounts; - uint64_t NumEntries = N / sizeof(uint64_t); std::vector CounterBuffer; - for (uint64_t I = 0; I < NumEntries; I += NumCounts) { - using namespace support; - // The function hash comes first. + + using namespace support; + const unsigned char *End = D + N; + while (D < End) { + // Read hash. + if (D + sizeof(uint64_t) >= End) + return data_type(); uint64_t Hash = endian::readNext(D); - if (++I >= NumEntries) - return data_type(); - - // In v1, we have at least one count. - // Later, we have the number of counts. - NumCounts = (1 == FormatVersion) - ? NumEntries - I - : endian::readNext(D); - if (1 != FormatVersion) - ++I; - - // If we have more counts than data, this is bogus. - if (I + NumCounts > NumEntries) + // Initialize number of counters for FormatVersion == 1. + uint64_t CountsSize = N / sizeof(uint64_t) - 1; + // If format version is different then read the number of counters. + if (FormatVersion != 1) { + if (D + sizeof(uint64_t) > End) + return data_type(); + CountsSize = endian::readNext(D); + } + // Read counter values. + if (D + CountsSize * sizeof(uint64_t) > End) return data_type(); CounterBuffer.clear(); - for (unsigned J = 0; J < NumCounts; ++J) + CounterBuffer.reserve(CountsSize); + for (uint64_t J = 0; J < CountsSize; ++J) CounterBuffer.push_back(endian::readNext(D)); - DataBuffer.push_back(InstrProfRecord(K, Hash, std::move(CounterBuffer))); + DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); + + // Read value profiling data. + if (FormatVersion > 2 && !readValueProfilingData(D, End)) { + DataBuffer.clear(); + return data_type(); + } } return DataBuffer; } +template +std::error_code InstrProfReaderIndex::getRecords( + StringRef FuncName, ArrayRef &Data) { + auto Iter = HashTable->find(FuncName); + if (Iter == HashTable->end()) + return instrprof_error::unknown_function; + + Data = (*Iter); + if (Data.empty()) + return instrprof_error::malformed; + + return instrprof_error::success; +} + +template +std::error_code InstrProfReaderIndex::getRecords( + ArrayRef &Data) { + if (atEnd()) + return instrprof_error::eof; + + Data = *RecordIterator; + + if (Data.empty()) + return instrprof_error::malformed; + + return instrprof_error::success; +} + +template +InstrProfReaderIndex::InstrProfReaderIndex( + const unsigned char *Buckets, const unsigned char *const Payload, + const unsigned char *const Base, IndexedInstrProf::HashT HashType, + uint64_t Version) { + FormatVersion = Version; + HashTable.reset(HashTableImpl::Create( + Buckets, Payload, Base, + typename HashTableImpl::InfoType(HashType, Version))); + RecordIterator = HashTable->data_begin(); +} + bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { if (DataBuffer.getBufferSize() < 8) return false; using namespace support; uint64_t Magic = endian::read(DataBuffer.getBufferStart()); + // Verify that it's magical. return Magic == IndexedInstrProf::Magic; } @@ -360,71 +562,91 @@ std::error_code IndexedInstrProfReader::readHeader() { using namespace support; + auto *Header = reinterpret_cast(Cur); + Cur += sizeof(IndexedInstrProf::Header); + // Check the magic number. - uint64_t Magic = endian::readNext(Cur); + uint64_t Magic = endian::byte_swap(Header->Magic); if (Magic != IndexedInstrProf::Magic) return error(instrprof_error::bad_magic); // Read the version. - FormatVersion = endian::readNext(Cur); + uint64_t FormatVersion = endian::byte_swap(Header->Version); if (FormatVersion > IndexedInstrProf::Version) return error(instrprof_error::unsupported_version); // Read the maximal function count. - MaxFunctionCount = endian::readNext(Cur); + MaxFunctionCount = + endian::byte_swap(Header->MaxFunctionCount); // Read the hash type and start offset. IndexedInstrProf::HashT HashType = static_cast( - endian::readNext(Cur)); + endian::byte_swap(Header->HashType)); if (HashType > IndexedInstrProf::HashT::Last) return error(instrprof_error::unsupported_hash_type); - uint64_t HashOffset = endian::readNext(Cur); + + uint64_t HashOffset = endian::byte_swap(Header->HashOffset); // The rest of the file is an on disk hash table. - Index.reset(InstrProfReaderIndex::Create( - Start + HashOffset, Cur, Start, - InstrProfLookupTrait(HashType, FormatVersion))); - // Set up our iterator for readNextRecord. - RecordIterator = Index->data_begin(); - + InstrProfReaderIndexBase *IndexPtr = nullptr; + IndexPtr = new InstrProfReaderIndex( + Start + HashOffset, Cur, Start, HashType, FormatVersion); + Index.reset(IndexPtr); return success(); } -std::error_code IndexedInstrProfReader::getFunctionCounts( - StringRef FuncName, uint64_t FuncHash, std::vector &Counts) { - auto Iter = Index->find(FuncName); - if (Iter == Index->end()) - return error(instrprof_error::unknown_function); +InstrProfSymtab &IndexedInstrProfReader::getSymtab() { + if (Symtab.get()) + return *Symtab.get(); + std::unique_ptr NewSymtab = make_unique(); + Index->populateSymtab(*NewSymtab.get()); + + Symtab = std::move(NewSymtab); + return *Symtab.get(); +} + +ErrorOr +IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, + uint64_t FuncHash) { + ArrayRef Data; + std::error_code EC = Index->getRecords(FuncName, Data); + if (EC != instrprof_error::success) + return EC; // Found it. Look for counters with the right hash. - ArrayRef Data = (*Iter); - if (Data.empty()) - return error(instrprof_error::malformed); - for (unsigned I = 0, E = Data.size(); I < E; ++I) { // Check for a match and fill the vector if there is one. if (Data[I].Hash == FuncHash) { - Counts = Data[I].Counts; - return success(); + return std::move(Data[I]); } } return error(instrprof_error::hash_mismatch); } std::error_code -IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) { - // Are we out of records? - if (RecordIterator == Index->data_end()) - return error(instrprof_error::eof); +IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, uint64_t FuncHash, + std::vector &Counts) { + ErrorOr Record = getInstrProfRecord(FuncName, FuncHash); + if (std::error_code EC = Record.getError()) + return EC; - if ((*RecordIterator).empty()) - return error(instrprof_error::malformed); + Counts = Record.get().Counts; + return success(); +} +std::error_code IndexedInstrProfReader::readNextRecord( + InstrProfRecord &Record) { static unsigned RecordIndex = 0; - ArrayRef Data = (*RecordIterator); + + ArrayRef Data; + + std::error_code EC = Index->getRecords(Data); + if (EC != instrprof_error::success) + return error(EC); + Record = Data[RecordIndex++]; if (RecordIndex >= Data.size()) { - ++RecordIterator; + Index->advanceToNextKey(); RecordIndex = 0; } return success(); diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index 2188543ed61c..9bb03e1e77a3 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -13,27 +13,29 @@ //===----------------------------------------------------------------------===// #include "llvm/ProfileData/InstrProfWriter.h" -#include "InstrProfIndexed.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/OnDiskHashTable.h" +#include using namespace llvm; namespace { +static support::endianness ValueProfDataEndianness = support::little; + class InstrProfRecordTrait { public: typedef StringRef key_type; typedef StringRef key_type_ref; - typedef const InstrProfWriter::CounterData *const data_type; - typedef const InstrProfWriter::CounterData *const data_type_ref; + typedef const InstrProfWriter::ProfilingData *const data_type; + typedef const InstrProfWriter::ProfilingData *const data_type_ref; typedef uint64_t hash_value_type; typedef uint64_t offset_type; static hash_value_type ComputeHash(key_type_ref K) { - return IndexedInstrProf::ComputeHash(IndexedInstrProf::HashType, K); + return IndexedInstrProf::ComputeHash(K); } static std::pair @@ -45,8 +47,15 @@ public: LE.write(N); offset_type M = 0; - for (const auto &Counts : *V) - M += (2 + Counts.second.size()) * sizeof(uint64_t); + for (const auto &ProfileData : *V) { + const InstrProfRecord &ProfRecord = ProfileData.second; + M += sizeof(uint64_t); // The function hash + M += sizeof(uint64_t); // The size of the Counts vector + M += ProfRecord.Counts.size() * sizeof(uint64_t); + + // Value data + M += ValueProfData::getSize(ProfileData.second); + } LE.write(M); return std::make_pair(N, M); @@ -60,50 +69,68 @@ public: offset_type) { using namespace llvm::support; endian::Writer LE(Out); + for (const auto &ProfileData : *V) { + const InstrProfRecord &ProfRecord = ProfileData.second; - for (const auto &Counts : *V) { - LE.write(Counts.first); - LE.write(Counts.second.size()); - for (uint64_t I : Counts.second) + LE.write(ProfileData.first); // Function hash + LE.write(ProfRecord.Counts.size()); + for (uint64_t I : ProfRecord.Counts) LE.write(I); + + // Write value data + std::unique_ptr VDataPtr = + ValueProfData::serializeFrom(ProfileData.second); + uint32_t S = VDataPtr->getSize(); + VDataPtr->swapBytesFromHost(ValueProfDataEndianness); + Out.write((const char *)VDataPtr.get(), S); } } }; } -std::error_code -InstrProfWriter::addFunctionCounts(StringRef FunctionName, - uint64_t FunctionHash, - ArrayRef Counters) { - auto &CounterData = FunctionData[FunctionName]; +// Internal interface for testing purpose only. +void InstrProfWriter::setValueProfDataEndianness( + support::endianness Endianness) { + ValueProfDataEndianness = Endianness; +} - auto Where = CounterData.find(FunctionHash); - if (Where == CounterData.end()) { +std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I, + uint64_t Weight) { + auto &ProfileDataMap = FunctionData[I.Name]; + + bool NewFunc; + ProfilingData::iterator Where; + std::tie(Where, NewFunc) = + ProfileDataMap.insert(std::make_pair(I.Hash, InstrProfRecord())); + InstrProfRecord &Dest = Where->second; + + instrprof_error Result; + if (NewFunc) { // We've never seen a function with this name and hash, add it. - CounterData[FunctionHash] = Counters; - // We keep track of the max function count as we go for simplicity. - if (Counters[0] > MaxFunctionCount) - MaxFunctionCount = Counters[0]; - return instrprof_error::success; + Dest = std::move(I); + // Fix up the name to avoid dangling reference. + Dest.Name = FunctionData.find(Dest.Name)->getKey(); + Result = instrprof_error::success; + if (Weight > 1) { + for (auto &Count : Dest.Counts) { + bool Overflowed; + Count = SaturatingMultiply(Count, Weight, &Overflowed); + if (Overflowed && Result == instrprof_error::success) { + Result = instrprof_error::counter_overflow; + } + } + } + } else { + // We're updating a function we've seen before. + Result = Dest.merge(I, Weight); } - // We're updating a function we've seen before. - auto &FoundCounters = Where->second; - // If the number of counters doesn't match we either have bad data or a hash - // collision. - if (FoundCounters.size() != Counters.size()) - return instrprof_error::count_mismatch; - - for (size_t I = 0, E = Counters.size(); I < E; ++I) { - if (FoundCounters[I] + Counters[I] < FoundCounters[I]) - return instrprof_error::counter_overflow; - FoundCounters[I] += Counters[I]; - } // We keep track of the max function count as we go for simplicity. - if (FoundCounters[0] > MaxFunctionCount) - MaxFunctionCount = FoundCounters[0]; + // Update this statistic no matter the result of the merge. + if (Dest.Counts[0] > MaxFunctionCount) + MaxFunctionCount = Dest.Counts[0]; - return instrprof_error::success; + return Result; } std::pair InstrProfWriter::writeImpl(raw_ostream &OS) { @@ -117,13 +144,23 @@ std::pair InstrProfWriter::writeImpl(raw_ostream &OS) { endian::Writer LE(OS); // Write the header. - LE.write(IndexedInstrProf::Magic); - LE.write(IndexedInstrProf::Version); - LE.write(MaxFunctionCount); - LE.write(static_cast(IndexedInstrProf::HashType)); + IndexedInstrProf::Header Header; + Header.Magic = IndexedInstrProf::Magic; + Header.Version = IndexedInstrProf::Version; + Header.MaxFunctionCount = MaxFunctionCount; + Header.HashType = static_cast(IndexedInstrProf::HashType); + Header.HashOffset = 0; + int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t); + + // Only write out all the fields execpt 'HashOffset'. We need + // to remember the offset of that field to allow back patching + // later. + for (int I = 0; I < N - 1; I++) + LE.write(reinterpret_cast(&Header)[I]); // Save a space to write the hash table start location. uint64_t HashTableStartLoc = OS.tell(); + // Reserve the space for HashOffset field. LE.write(0); // Write the hash table. uint64_t HashTableStart = Generator.Emit(OS); @@ -138,9 +175,65 @@ void InstrProfWriter::write(raw_fd_ostream &OS) { // Go back and fill in the hash table start. using namespace support; OS.seek(TableStart.first); + // Now patch the HashOffset field previously reserved. endian::Writer(OS).write(TableStart.second); } +static const char *ValueProfKindStr[] = { +#define VALUE_PROF_KIND(Enumerator, Value) #Enumerator, +#include "llvm/ProfileData/InstrProfData.inc" +}; + +void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func, + InstrProfSymtab &Symtab, + raw_fd_ostream &OS) { + OS << Func.Name << "\n"; + OS << "# Func Hash:\n" << Func.Hash << "\n"; + OS << "# Num Counters:\n" << Func.Counts.size() << "\n"; + OS << "# Counter Values:\n"; + for (uint64_t Count : Func.Counts) + OS << Count << "\n"; + + uint32_t NumValueKinds = Func.getNumValueKinds(); + if (!NumValueKinds) { + OS << "\n"; + return; + } + + OS << "# Num Value Kinds:\n" << Func.getNumValueKinds() << "\n"; + for (uint32_t VK = 0; VK < IPVK_Last + 1; VK++) { + uint32_t NS = Func.getNumValueSites(VK); + if (!NS) + continue; + OS << "# ValueKind = " << ValueProfKindStr[VK] << ":\n" << VK << "\n"; + OS << "# NumValueSites:\n" << NS << "\n"; + for (uint32_t S = 0; S < NS; S++) { + uint32_t ND = Func.getNumValueDataForSite(VK, S); + OS << ND << "\n"; + std::unique_ptr VD = Func.getValueForSite(VK, S); + for (uint32_t I = 0; I < ND; I++) { + if (VK == IPVK_IndirectCallTarget) + OS << Symtab.getFuncName(VD[I].Value) << ":" << VD[I].Count << "\n"; + else + OS << VD[I].Value << ":" << VD[I].Count << "\n"; + } + } + } + + OS << "\n"; +} + +void InstrProfWriter::writeText(raw_fd_ostream &OS) { + InstrProfSymtab Symtab; + for (const auto &I : FunctionData) + Symtab.addFuncName(I.getKey()); + Symtab.finalizeSymtab(); + + for (const auto &I : FunctionData) + for (const auto &Func : I.getValue()) + writeRecordInText(Func.second, Symtab, OS); +} + std::unique_ptr InstrProfWriter::writeBuffer() { std::string Data; llvm::raw_string_ostream OS(Data); diff --git a/lib/ProfileData/SampleProf.cpp b/lib/ProfileData/SampleProf.cpp index 920c48a24640..9ded757f2b28 100644 --- a/lib/ProfileData/SampleProf.cpp +++ b/lib/ProfileData/SampleProf.cpp @@ -16,6 +16,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" +using namespace llvm::sampleprof; using namespace llvm; namespace { @@ -27,17 +28,25 @@ class SampleProfErrorCategoryType : public std::error_category { case sampleprof_error::success: return "Success"; case sampleprof_error::bad_magic: - return "Invalid file format (bad magic)"; + return "Invalid sample profile data (bad magic)"; case sampleprof_error::unsupported_version: - return "Unsupported format version"; + return "Unsupported sample profile format version"; case sampleprof_error::too_large: return "Too much profile data"; case sampleprof_error::truncated: return "Truncated profile data"; case sampleprof_error::malformed: - return "Malformed profile data"; + return "Malformed sample profile data"; case sampleprof_error::unrecognized_format: - return "Unrecognized profile encoding format"; + return "Unrecognized sample profile encoding format"; + case sampleprof_error::unsupported_writing_format: + return "Profile encoding format unsupported for writing operations"; + case sampleprof_error::truncated_name_table: + return "Truncated function name table"; + case sampleprof_error::not_implemented: + return "Unimplemented feature"; + case sampleprof_error::counter_overflow: + return "Counter overflow"; } llvm_unreachable("A value of sampleprof_error has no message."); } @@ -49,3 +58,92 @@ static ManagedStatic ErrorCategory; const std::error_category &llvm::sampleprof_category() { return *ErrorCategory; } + +void LineLocation::print(raw_ostream &OS) const { + OS << LineOffset; + if (Discriminator > 0) + OS << "." << Discriminator; +} + +raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS, + const LineLocation &Loc) { + Loc.print(OS); + return OS; +} + +void LineLocation::dump() const { print(dbgs()); } + +void CallsiteLocation::print(raw_ostream &OS) const { + LineLocation::print(OS); + OS << ": inlined callee: " << CalleeName; +} + +void CallsiteLocation::dump() const { print(dbgs()); } + +inline raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS, + const CallsiteLocation &Loc) { + Loc.print(OS); + return OS; +} + +/// \brief Print the sample record to the stream \p OS indented by \p Indent. +void SampleRecord::print(raw_ostream &OS, unsigned Indent) const { + OS << NumSamples; + if (hasCalls()) { + OS << ", calls:"; + for (const auto &I : getCallTargets()) + OS << " " << I.first() << ":" << I.second; + } + OS << "\n"; +} + +void SampleRecord::dump() const { print(dbgs(), 0); } + +raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS, + const SampleRecord &Sample) { + Sample.print(OS, 0); + return OS; +} + +/// \brief Print the samples collected for a function on stream \p OS. +void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const { + OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size() + << " sampled lines\n"; + + OS.indent(Indent); + if (BodySamples.size() > 0) { + OS << "Samples collected in the function's body {\n"; + SampleSorter SortedBodySamples(BodySamples); + for (const auto &SI : SortedBodySamples.get()) { + OS.indent(Indent + 2); + OS << SI->first << ": " << SI->second; + } + OS.indent(Indent); + OS << "}\n"; + } else { + OS << "No samples collected in the function's body\n"; + } + + OS.indent(Indent); + if (CallsiteSamples.size() > 0) { + OS << "Samples collected in inlined callsites {\n"; + SampleSorter SortedCallsiteSamples( + CallsiteSamples); + for (const auto &CS : SortedCallsiteSamples.get()) { + OS.indent(Indent + 2); + OS << CS->first << ": "; + CS->second.print(OS, Indent + 4); + } + OS << "}\n"; + } else { + OS << "No inlined callsites in this function\n"; + } +} + +raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS, + const FunctionSamples &FS) { + FS.print(OS); + return OS; +} + +void FunctionSamples::dump(void) const { print(dbgs(), 0); } diff --git a/lib/ProfileData/SampleProfReader.cpp b/lib/ProfileData/SampleProfReader.cpp index b39bfd6e2ecd..93cd87bb82f8 100644 --- a/lib/ProfileData/SampleProfReader.cpp +++ b/lib/ProfileData/SampleProfReader.cpp @@ -8,133 +8,37 @@ //===----------------------------------------------------------------------===// // // This file implements the class that reads LLVM sample profiles. It -// supports two file formats: text and binary. The textual representation -// is useful for debugging and testing purposes. The binary representation -// is more compact, resulting in smaller file sizes. However, they can -// both be used interchangeably. +// supports three file formats: text, binary and gcov. // -// NOTE: If you are making changes to the file format, please remember -// to document them in the Clang documentation at -// tools/clang/docs/UsersManual.rst. +// The textual representation is useful for debugging and testing purposes. The +// binary representation is more compact, resulting in smaller file sizes. // -// Text format -// ----------- +// The gcov encoding is the one generated by GCC's AutoFDO profile creation +// tool (https://github.com/google/autofdo) // -// Sample profiles are written as ASCII text. The file is divided into -// sections, which correspond to each of the functions executed at runtime. -// Each section has the following format -// -// function1:total_samples:total_head_samples -// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ] -// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ] -// ... -// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ] -// -// The file may contain blank lines between sections and within a -// section. However, the spacing within a single line is fixed. Additional -// spaces will result in an error while reading the file. -// -// Function names must be mangled in order for the profile loader to -// match them in the current translation unit. The two numbers in the -// function header specify how many total samples were accumulated in the -// function (first number), and the total number of samples accumulated -// in the prologue of the function (second number). This head sample -// count provides an indicator of how frequently the function is invoked. -// -// Each sampled line may contain several items. Some are optional (marked -// below): -// -// a. Source line offset. This number represents the line number -// in the function where the sample was collected. The line number is -// always relative to the line where symbol of the function is -// defined. So, if the function has its header at line 280, the offset -// 13 is at line 293 in the file. -// -// Note that this offset should never be a negative number. This could -// happen in cases like macros. The debug machinery will register the -// line number at the point of macro expansion. So, if the macro was -// expanded in a line before the start of the function, the profile -// converter should emit a 0 as the offset (this means that the optimizers -// will not be able to associate a meaningful weight to the instructions -// in the macro). -// -// b. [OPTIONAL] Discriminator. This is used if the sampled program -// was compiled with DWARF discriminator support -// (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators). -// DWARF discriminators are unsigned integer values that allow the -// compiler to distinguish between multiple execution paths on the -// same source line location. -// -// For example, consider the line of code ``if (cond) foo(); else bar();``. -// If the predicate ``cond`` is true 80% of the time, then the edge -// into function ``foo`` should be considered to be taken most of the -// time. But both calls to ``foo`` and ``bar`` are at the same source -// line, so a sample count at that line is not sufficient. The -// compiler needs to know which part of that line is taken more -// frequently. -// -// This is what discriminators provide. In this case, the calls to -// ``foo`` and ``bar`` will be at the same line, but will have -// different discriminator values. This allows the compiler to correctly -// set edge weights into ``foo`` and ``bar``. -// -// c. Number of samples. This is an integer quantity representing the -// number of samples collected by the profiler at this source -// location. -// -// d. [OPTIONAL] Potential call targets and samples. If present, this -// line contains a call instruction. This models both direct and -// number of samples. For example, -// -// 130: 7 foo:3 bar:2 baz:7 -// -// The above means that at relative line offset 130 there is a call -// instruction that calls one of ``foo()``, ``bar()`` and ``baz()``, -// with ``baz()`` being the relatively more frequently called target. +// All three encodings can be used interchangeably as an input sample profile. // //===----------------------------------------------------------------------===// #include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Regex.h" using namespace llvm::sampleprof; using namespace llvm; -/// \brief Print the samples collected for a function on stream \p OS. -/// -/// \param OS Stream to emit the output to. -void FunctionSamples::print(raw_ostream &OS) { - OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size() - << " sampled lines\n"; - for (const auto &SI : BodySamples) { - LineLocation Loc = SI.first; - const SampleRecord &Sample = SI.second; - OS << "\tline offset: " << Loc.LineOffset - << ", discriminator: " << Loc.Discriminator - << ", number of samples: " << Sample.getSamples(); - if (Sample.hasCalls()) { - OS << ", calls:"; - for (const auto &I : Sample.getCallTargets()) - OS << " " << I.first() << ":" << I.second; - } - OS << "\n"; - } - OS << "\n"; -} - /// \brief Dump the function profile for \p FName. /// /// \param FName Name of the function to print. /// \param OS Stream to emit the output to. void SampleProfileReader::dumpFunctionProfile(StringRef FName, raw_ostream &OS) { - OS << "Function: " << FName << ": "; - Profiles[FName].print(OS); + OS << "Function: " << FName << ": " << Profiles[FName]; } /// \brief Dump all the function profiles found on stream \p OS. @@ -143,6 +47,102 @@ void SampleProfileReader::dump(raw_ostream &OS) { dumpFunctionProfile(I.getKey(), OS); } +/// \brief Parse \p Input as function head. +/// +/// Parse one line of \p Input, and update function name in \p FName, +/// function's total sample count in \p NumSamples, function's entry +/// count in \p NumHeadSamples. +/// +/// \returns true if parsing is successful. +static bool ParseHead(const StringRef &Input, StringRef &FName, + uint64_t &NumSamples, uint64_t &NumHeadSamples) { + if (Input[0] == ' ') + return false; + size_t n2 = Input.rfind(':'); + size_t n1 = Input.rfind(':', n2 - 1); + FName = Input.substr(0, n1); + if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples)) + return false; + if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples)) + return false; + return true; +} + + +/// \brief Returns true if line offset \p L is legal (only has 16 bits). +static bool isOffsetLegal(unsigned L) { + return (L & 0xffff) == L; +} + +/// \brief Parse \p Input as line sample. +/// +/// \param Input input line. +/// \param IsCallsite true if the line represents an inlined callsite. +/// \param Depth the depth of the inline stack. +/// \param NumSamples total samples of the line/inlined callsite. +/// \param LineOffset line offset to the start of the function. +/// \param Discriminator discriminator of the line. +/// \param TargetCountMap map from indirect call target to count. +/// +/// returns true if parsing is successful. +static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth, + uint64_t &NumSamples, uint32_t &LineOffset, + uint32_t &Discriminator, StringRef &CalleeName, + DenseMap &TargetCountMap) { + for (Depth = 0; Input[Depth] == ' '; Depth++) + ; + if (Depth == 0) + return false; + + size_t n1 = Input.find(':'); + StringRef Loc = Input.substr(Depth, n1 - Depth); + size_t n2 = Loc.find('.'); + if (n2 == StringRef::npos) { + if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset)) + return false; + Discriminator = 0; + } else { + if (Loc.substr(0, n2).getAsInteger(10, LineOffset)) + return false; + if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator)) + return false; + } + + StringRef Rest = Input.substr(n1 + 2); + if (Rest[0] >= '0' && Rest[0] <= '9') { + IsCallsite = false; + size_t n3 = Rest.find(' '); + if (n3 == StringRef::npos) { + if (Rest.getAsInteger(10, NumSamples)) + return false; + } else { + if (Rest.substr(0, n3).getAsInteger(10, NumSamples)) + return false; + } + while (n3 != StringRef::npos) { + n3 += Rest.substr(n3).find_first_not_of(' '); + Rest = Rest.substr(n3); + n3 = Rest.find(' '); + StringRef pair = Rest; + if (n3 != StringRef::npos) { + pair = Rest.substr(0, n3); + } + size_t n4 = pair.find(':'); + uint64_t count; + if (pair.substr(n4 + 1).getAsInteger(10, count)) + return false; + TargetCountMap[pair.substr(0, n4)] = count; + } + } else { + IsCallsite = true; + size_t n3 = Rest.find_last_of(':'); + CalleeName = Rest.substr(0, n3); + if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples)) + return false; + } + return true; +} + /// \brief Load samples from a text file. /// /// See the documentation at the top of the file for an explanation of @@ -151,14 +151,13 @@ void SampleProfileReader::dump(raw_ostream &OS) { /// \returns true if the file was loaded successfully, false otherwise. std::error_code SampleProfileReaderText::read() { line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#'); + sampleprof_error Result = sampleprof_error::success; - // Read the profile of each function. Since each function may be - // mentioned more than once, and we are collecting flat profiles, - // accumulate samples as we parse them. - Regex HeadRE("^([^0-9].*):([0-9]+):([0-9]+)$"); - Regex LineSampleRE("^([0-9]+)\\.?([0-9]+)?: ([0-9]+)(.*)$"); - Regex CallSampleRE(" +([^0-9 ][^ ]*):([0-9]+)"); - while (!LineIt.is_at_eof()) { + InlineCallStack InlineStack; + + for (; !LineIt.is_at_eof(); ++LineIt) { + if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#') + continue; // Read the header of each function. // // Note that for function identifiers we are actually expecting @@ -171,63 +170,74 @@ std::error_code SampleProfileReaderText::read() { // // The only requirement we place on the identifier, then, is that it // should not begin with a number. - SmallVector Matches; - if (!HeadRE.match(*LineIt, &Matches)) { - reportParseError(LineIt.line_number(), - "Expected 'mangled_name:NUM:NUM', found " + *LineIt); - return sampleprof_error::malformed; - } - assert(Matches.size() == 4); - StringRef FName = Matches[1]; - unsigned NumSamples, NumHeadSamples; - Matches[2].getAsInteger(10, NumSamples); - Matches[3].getAsInteger(10, NumHeadSamples); - Profiles[FName] = FunctionSamples(); - FunctionSamples &FProfile = Profiles[FName]; - FProfile.addTotalSamples(NumSamples); - FProfile.addHeadSamples(NumHeadSamples); - ++LineIt; - - // Now read the body. The body of the function ends when we reach - // EOF or when we see the start of the next function. - while (!LineIt.is_at_eof() && isdigit((*LineIt)[0])) { - if (!LineSampleRE.match(*LineIt, &Matches)) { - reportParseError( - LineIt.line_number(), - "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + *LineIt); + if ((*LineIt)[0] != ' ') { + uint64_t NumSamples, NumHeadSamples; + StringRef FName; + if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) { + reportError(LineIt.line_number(), + "Expected 'mangled_name:NUM:NUM', found " + *LineIt); return sampleprof_error::malformed; } - assert(Matches.size() == 5); - unsigned LineOffset, NumSamples, Discriminator = 0; - Matches[1].getAsInteger(10, LineOffset); - if (Matches[2] != "") - Matches[2].getAsInteger(10, Discriminator); - Matches[3].getAsInteger(10, NumSamples); - - // If there are function calls in this line, generate a call sample - // entry for each call. - std::string CallsLine(Matches[4]); - while (CallsLine != "") { - SmallVector CallSample; - if (!CallSampleRE.match(CallsLine, &CallSample)) { - reportParseError(LineIt.line_number(), - "Expected 'mangled_name:NUM', found " + CallsLine); - return sampleprof_error::malformed; - } - StringRef CalledFunction = CallSample[1]; - unsigned CalledFunctionSamples; - CallSample[2].getAsInteger(10, CalledFunctionSamples); - FProfile.addCalledTargetSamples(LineOffset, Discriminator, - CalledFunction, CalledFunctionSamples); - CallsLine = CallSampleRE.sub("", CallsLine); + Profiles[FName] = FunctionSamples(); + FunctionSamples &FProfile = Profiles[FName]; + MergeResult(Result, FProfile.addTotalSamples(NumSamples)); + MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples)); + InlineStack.clear(); + InlineStack.push_back(&FProfile); + } else { + uint64_t NumSamples; + StringRef FName; + DenseMap TargetCountMap; + bool IsCallsite; + uint32_t Depth, LineOffset, Discriminator; + if (!ParseLine(*LineIt, IsCallsite, Depth, NumSamples, LineOffset, + Discriminator, FName, TargetCountMap)) { + reportError(LineIt.line_number(), + "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + + *LineIt); + return sampleprof_error::malformed; + } + if (IsCallsite) { + while (InlineStack.size() > Depth) { + InlineStack.pop_back(); + } + FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt( + CallsiteLocation(LineOffset, Discriminator, FName)); + MergeResult(Result, FSamples.addTotalSamples(NumSamples)); + InlineStack.push_back(&FSamples); + } else { + while (InlineStack.size() > Depth) { + InlineStack.pop_back(); + } + FunctionSamples &FProfile = *InlineStack.back(); + for (const auto &name_count : TargetCountMap) { + MergeResult(Result, FProfile.addCalledTargetSamples( + LineOffset, Discriminator, name_count.first, + name_count.second)); + } + MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator, + NumSamples)); } - - FProfile.addBodySamples(LineOffset, Discriminator, NumSamples); - ++LineIt; } } - return sampleprof_error::success; + return Result; +} + +bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) { + bool result = false; + + // Check that the first non-comment line is a valid function header. + line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#'); + if (!LineIt.is_at_eof()) { + if ((*LineIt)[0] != ' ') { + uint64_t NumSamples, NumHeadSamples; + StringRef FName; + result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples); + } + } + + return result; } template ErrorOr SampleProfileReaderBinary::readNumber() { @@ -243,7 +253,7 @@ template ErrorOr SampleProfileReaderBinary::readNumber() { EC = sampleprof_error::success; if (EC) { - reportParseError(0, EC.message()); + reportError(0, EC.message()); return EC; } @@ -256,7 +266,7 @@ ErrorOr SampleProfileReaderBinary::readString() { StringRef Str(reinterpret_cast(Data)); if (Data + Str.size() + 1 > End) { EC = sampleprof_error::truncated; - reportParseError(0, EC.message()); + reportError(0, EC.message()); return EC; } @@ -264,62 +274,109 @@ ErrorOr SampleProfileReaderBinary::readString() { return Str; } +ErrorOr SampleProfileReaderBinary::readStringFromTable() { + std::error_code EC; + auto Idx = readNumber(); + if (std::error_code EC = Idx.getError()) + return EC; + if (*Idx >= NameTable.size()) + return sampleprof_error::truncated_name_table; + return NameTable[*Idx]; +} + +std::error_code +SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { + auto NumSamples = readNumber(); + if (std::error_code EC = NumSamples.getError()) + return EC; + FProfile.addTotalSamples(*NumSamples); + + // Read the samples in the body. + auto NumRecords = readNumber(); + if (std::error_code EC = NumRecords.getError()) + return EC; + + for (uint32_t I = 0; I < *NumRecords; ++I) { + auto LineOffset = readNumber(); + if (std::error_code EC = LineOffset.getError()) + return EC; + + if (!isOffsetLegal(*LineOffset)) { + return std::error_code(); + } + + auto Discriminator = readNumber(); + if (std::error_code EC = Discriminator.getError()) + return EC; + + auto NumSamples = readNumber(); + if (std::error_code EC = NumSamples.getError()) + return EC; + + auto NumCalls = readNumber(); + if (std::error_code EC = NumCalls.getError()) + return EC; + + for (uint32_t J = 0; J < *NumCalls; ++J) { + auto CalledFunction(readStringFromTable()); + if (std::error_code EC = CalledFunction.getError()) + return EC; + + auto CalledFunctionSamples = readNumber(); + if (std::error_code EC = CalledFunctionSamples.getError()) + return EC; + + FProfile.addCalledTargetSamples(*LineOffset, *Discriminator, + *CalledFunction, *CalledFunctionSamples); + } + + FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples); + } + + // Read all the samples for inlined function calls. + auto NumCallsites = readNumber(); + if (std::error_code EC = NumCallsites.getError()) + return EC; + + for (uint32_t J = 0; J < *NumCallsites; ++J) { + auto LineOffset = readNumber(); + if (std::error_code EC = LineOffset.getError()) + return EC; + + auto Discriminator = readNumber(); + if (std::error_code EC = Discriminator.getError()) + return EC; + + auto FName(readStringFromTable()); + if (std::error_code EC = FName.getError()) + return EC; + + FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( + CallsiteLocation(*LineOffset, *Discriminator, *FName)); + if (std::error_code EC = readProfile(CalleeProfile)) + return EC; + } + + return sampleprof_error::success; +} + std::error_code SampleProfileReaderBinary::read() { while (!at_eof()) { - auto FName(readString()); + auto NumHeadSamples = readNumber(); + if (std::error_code EC = NumHeadSamples.getError()) + return EC; + + auto FName(readStringFromTable()); if (std::error_code EC = FName.getError()) return EC; Profiles[*FName] = FunctionSamples(); FunctionSamples &FProfile = Profiles[*FName]; - auto Val = readNumber(); - if (std::error_code EC = Val.getError()) + FProfile.addHeadSamples(*NumHeadSamples); + + if (std::error_code EC = readProfile(FProfile)) return EC; - FProfile.addTotalSamples(*Val); - - Val = readNumber(); - if (std::error_code EC = Val.getError()) - return EC; - FProfile.addHeadSamples(*Val); - - // Read the samples in the body. - auto NumRecords = readNumber(); - if (std::error_code EC = NumRecords.getError()) - return EC; - for (unsigned I = 0; I < *NumRecords; ++I) { - auto LineOffset = readNumber(); - if (std::error_code EC = LineOffset.getError()) - return EC; - - auto Discriminator = readNumber(); - if (std::error_code EC = Discriminator.getError()) - return EC; - - auto NumSamples = readNumber(); - if (std::error_code EC = NumSamples.getError()) - return EC; - - auto NumCalls = readNumber(); - if (std::error_code EC = NumCalls.getError()) - return EC; - - for (unsigned J = 0; J < *NumCalls; ++J) { - auto CalledFunction(readString()); - if (std::error_code EC = CalledFunction.getError()) - return EC; - - auto CalledFunctionSamples = readNumber(); - if (std::error_code EC = CalledFunctionSamples.getError()) - return EC; - - FProfile.addCalledTargetSamples(*LineOffset, *Discriminator, - *CalledFunction, - *CalledFunctionSamples); - } - - FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples); - } } return sampleprof_error::success; @@ -343,6 +400,18 @@ std::error_code SampleProfileReaderBinary::readHeader() { else if (*Version != SPVersion()) return sampleprof_error::unsupported_version; + // Read the name table. + auto Size = readNumber(); + if (std::error_code EC = Size.getError()) + return EC; + NameTable.reserve(*Size); + for (uint32_t I = 0; I < *Size; ++I) { + auto Name(readString()); + if (std::error_code EC = Name.getError()) + return EC; + NameTable.push_back(*Name); + } + return sampleprof_error::success; } @@ -353,6 +422,249 @@ bool SampleProfileReaderBinary::hasFormat(const MemoryBuffer &Buffer) { return Magic == SPMagic(); } +std::error_code SampleProfileReaderGCC::skipNextWord() { + uint32_t dummy; + if (!GcovBuffer.readInt(dummy)) + return sampleprof_error::truncated; + return sampleprof_error::success; +} + +template ErrorOr SampleProfileReaderGCC::readNumber() { + if (sizeof(T) <= sizeof(uint32_t)) { + uint32_t Val; + if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits::max()) + return static_cast(Val); + } else if (sizeof(T) <= sizeof(uint64_t)) { + uint64_t Val; + if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits::max()) + return static_cast(Val); + } + + std::error_code EC = sampleprof_error::malformed; + reportError(0, EC.message()); + return EC; +} + +ErrorOr SampleProfileReaderGCC::readString() { + StringRef Str; + if (!GcovBuffer.readString(Str)) + return sampleprof_error::truncated; + return Str; +} + +std::error_code SampleProfileReaderGCC::readHeader() { + // Read the magic identifier. + if (!GcovBuffer.readGCDAFormat()) + return sampleprof_error::unrecognized_format; + + // Read the version number. Note - the GCC reader does not validate this + // version, but the profile creator generates v704. + GCOV::GCOVVersion version; + if (!GcovBuffer.readGCOVVersion(version)) + return sampleprof_error::unrecognized_format; + + if (version != GCOV::V704) + return sampleprof_error::unsupported_version; + + // Skip the empty integer. + if (std::error_code EC = skipNextWord()) + return EC; + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) { + uint32_t Tag; + if (!GcovBuffer.readInt(Tag)) + return sampleprof_error::truncated; + + if (Tag != Expected) + return sampleprof_error::malformed; + + if (std::error_code EC = skipNextWord()) + return EC; + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderGCC::readNameTable() { + if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames)) + return EC; + + uint32_t Size; + if (!GcovBuffer.readInt(Size)) + return sampleprof_error::truncated; + + for (uint32_t I = 0; I < Size; ++I) { + StringRef Str; + if (!GcovBuffer.readString(Str)) + return sampleprof_error::truncated; + Names.push_back(Str); + } + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderGCC::readFunctionProfiles() { + if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction)) + return EC; + + uint32_t NumFunctions; + if (!GcovBuffer.readInt(NumFunctions)) + return sampleprof_error::truncated; + + InlineCallStack Stack; + for (uint32_t I = 0; I < NumFunctions; ++I) + if (std::error_code EC = readOneFunctionProfile(Stack, true, 0)) + return EC; + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderGCC::readOneFunctionProfile( + const InlineCallStack &InlineStack, bool Update, uint32_t Offset) { + uint64_t HeadCount = 0; + if (InlineStack.size() == 0) + if (!GcovBuffer.readInt64(HeadCount)) + return sampleprof_error::truncated; + + uint32_t NameIdx; + if (!GcovBuffer.readInt(NameIdx)) + return sampleprof_error::truncated; + + StringRef Name(Names[NameIdx]); + + uint32_t NumPosCounts; + if (!GcovBuffer.readInt(NumPosCounts)) + return sampleprof_error::truncated; + + uint32_t NumCallsites; + if (!GcovBuffer.readInt(NumCallsites)) + return sampleprof_error::truncated; + + FunctionSamples *FProfile = nullptr; + if (InlineStack.size() == 0) { + // If this is a top function that we have already processed, do not + // update its profile again. This happens in the presence of + // function aliases. Since these aliases share the same function + // body, there will be identical replicated profiles for the + // original function. In this case, we simply not bother updating + // the profile of the original function. + FProfile = &Profiles[Name]; + FProfile->addHeadSamples(HeadCount); + if (FProfile->getTotalSamples() > 0) + Update = false; + } else { + // Otherwise, we are reading an inlined instance. The top of the + // inline stack contains the profile of the caller. Insert this + // callee in the caller's CallsiteMap. + FunctionSamples *CallerProfile = InlineStack.front(); + uint32_t LineOffset = Offset >> 16; + uint32_t Discriminator = Offset & 0xffff; + FProfile = &CallerProfile->functionSamplesAt( + CallsiteLocation(LineOffset, Discriminator, Name)); + } + + for (uint32_t I = 0; I < NumPosCounts; ++I) { + uint32_t Offset; + if (!GcovBuffer.readInt(Offset)) + return sampleprof_error::truncated; + + uint32_t NumTargets; + if (!GcovBuffer.readInt(NumTargets)) + return sampleprof_error::truncated; + + uint64_t Count; + if (!GcovBuffer.readInt64(Count)) + return sampleprof_error::truncated; + + // The line location is encoded in the offset as: + // high 16 bits: line offset to the start of the function. + // low 16 bits: discriminator. + uint32_t LineOffset = Offset >> 16; + uint32_t Discriminator = Offset & 0xffff; + + InlineCallStack NewStack; + NewStack.push_back(FProfile); + NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end()); + if (Update) { + // Walk up the inline stack, adding the samples on this line to + // the total sample count of the callers in the chain. + for (auto CallerProfile : NewStack) + CallerProfile->addTotalSamples(Count); + + // Update the body samples for the current profile. + FProfile->addBodySamples(LineOffset, Discriminator, Count); + } + + // Process the list of functions called at an indirect call site. + // These are all the targets that a function pointer (or virtual + // function) resolved at runtime. + for (uint32_t J = 0; J < NumTargets; J++) { + uint32_t HistVal; + if (!GcovBuffer.readInt(HistVal)) + return sampleprof_error::truncated; + + if (HistVal != HIST_TYPE_INDIR_CALL_TOPN) + return sampleprof_error::malformed; + + uint64_t TargetIdx; + if (!GcovBuffer.readInt64(TargetIdx)) + return sampleprof_error::truncated; + StringRef TargetName(Names[TargetIdx]); + + uint64_t TargetCount; + if (!GcovBuffer.readInt64(TargetCount)) + return sampleprof_error::truncated; + + if (Update) { + FunctionSamples &TargetProfile = Profiles[TargetName]; + TargetProfile.addCalledTargetSamples(LineOffset, Discriminator, + TargetName, TargetCount); + } + } + } + + // Process all the inlined callers into the current function. These + // are all the callsites that were inlined into this function. + for (uint32_t I = 0; I < NumCallsites; I++) { + // The offset is encoded as: + // high 16 bits: line offset to the start of the function. + // low 16 bits: discriminator. + uint32_t Offset; + if (!GcovBuffer.readInt(Offset)) + return sampleprof_error::truncated; + InlineCallStack NewStack; + NewStack.push_back(FProfile); + NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end()); + if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset)) + return EC; + } + + return sampleprof_error::success; +} + +/// \brief Read a GCC AutoFDO profile. +/// +/// This format is generated by the Linux Perf conversion tool at +/// https://github.com/google/autofdo. +std::error_code SampleProfileReaderGCC::read() { + // Read the string table. + if (std::error_code EC = readNameTable()) + return EC; + + // Read the source profile. + if (std::error_code EC = readFunctionProfiles()) + return EC; + + return sampleprof_error::success; +} + +bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) { + StringRef Magic(reinterpret_cast(Buffer.getBufferStart())); + return Magic == "adcg*704"; +} + /// \brief Prepare a memory buffer for the contents of \p Filename. /// /// \returns an error code indicating the status of the buffer. @@ -364,7 +676,7 @@ setupMemoryBuffer(std::string Filename) { auto Buffer = std::move(BufferOrErr.get()); // Sanity check the file. - if (Buffer->getBufferSize() > std::numeric_limits::max()) + if (Buffer->getBufferSize() > std::numeric_limits::max()) return sampleprof_error::too_large; return std::move(Buffer); @@ -384,13 +696,29 @@ SampleProfileReader::create(StringRef Filename, LLVMContext &C) { auto BufferOrError = setupMemoryBuffer(Filename); if (std::error_code EC = BufferOrError.getError()) return EC; + return create(BufferOrError.get(), C); +} - auto Buffer = std::move(BufferOrError.get()); +/// \brief Create a sample profile reader based on the format of the input data. +/// +/// \param B The memory buffer to create the reader from (assumes ownership). +/// +/// \param Reader The reader to instantiate according to \p Filename's format. +/// +/// \param C The LLVM context to use to emit diagnostics. +/// +/// \returns an error code indicating the status of the created reader. +ErrorOr> +SampleProfileReader::create(std::unique_ptr &B, LLVMContext &C) { std::unique_ptr Reader; - if (SampleProfileReaderBinary::hasFormat(*Buffer)) - Reader.reset(new SampleProfileReaderBinary(std::move(Buffer), C)); + if (SampleProfileReaderBinary::hasFormat(*B)) + Reader.reset(new SampleProfileReaderBinary(std::move(B), C)); + else if (SampleProfileReaderGCC::hasFormat(*B)) + Reader.reset(new SampleProfileReaderGCC(std::move(B), C)); + else if (SampleProfileReaderText::hasFormat(*B)) + Reader.reset(new SampleProfileReaderText(std::move(B), C)); else - Reader.reset(new SampleProfileReaderText(std::move(Buffer), C)); + return sampleprof_error::unrecognized_format; if (std::error_code EC = Reader->readHeader()) return EC; diff --git a/lib/ProfileData/SampleProfWriter.cpp b/lib/ProfileData/SampleProfWriter.cpp index c95267ad976b..51feee5ad7d1 100644 --- a/lib/ProfileData/SampleProfWriter.cpp +++ b/lib/ProfileData/SampleProfWriter.cpp @@ -30,16 +30,27 @@ using namespace llvm::sampleprof; using namespace llvm; /// \brief Write samples to a text file. -bool SampleProfileWriterText::write(StringRef FName, const FunctionSamples &S) { - if (S.empty()) - return true; +/// +/// Note: it may be tempting to implement this in terms of +/// FunctionSamples::print(). Please don't. The dump functionality is intended +/// for debugging and has no specified form. +/// +/// The format used here is more structured and deliberate because +/// it needs to be parsed by the SampleProfileReaderText class. +std::error_code SampleProfileWriterText::write(StringRef FName, + const FunctionSamples &S) { + auto &OS = *OutputStream; - OS << FName << ":" << S.getTotalSamples() << ":" << S.getHeadSamples() - << "\n"; + OS << FName << ":" << S.getTotalSamples(); + if (Indent == 0) + OS << ":" << S.getHeadSamples(); + OS << "\n"; - for (const auto &I : S.getBodySamples()) { - LineLocation Loc = I.first; - const SampleRecord &Sample = I.second; + SampleSorter SortedSamples(S.getBodySamples()); + for (const auto &I : SortedSamples.get()) { + LineLocation Loc = I->first; + const SampleRecord &Sample = I->second; + OS.indent(Indent + 1); if (Loc.Discriminator == 0) OS << Loc.LineOffset << ": "; else @@ -52,32 +63,89 @@ bool SampleProfileWriterText::write(StringRef FName, const FunctionSamples &S) { OS << "\n"; } - return true; + SampleSorter SortedCallsiteSamples( + S.getCallsiteSamples()); + Indent += 1; + for (const auto &I : SortedCallsiteSamples.get()) { + CallsiteLocation Loc = I->first; + const FunctionSamples &CalleeSamples = I->second; + OS.indent(Indent); + if (Loc.Discriminator == 0) + OS << Loc.LineOffset << ": "; + else + OS << Loc.LineOffset << "." << Loc.Discriminator << ": "; + if (std::error_code EC = write(Loc.CalleeName, CalleeSamples)) + return EC; + } + Indent -= 1; + + return sampleprof_error::success; } -SampleProfileWriterBinary::SampleProfileWriterBinary(StringRef F, - std::error_code &EC) - : SampleProfileWriter(F, EC, sys::fs::F_None) { - if (EC) - return; +std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) { + const auto &ret = NameTable.find(FName); + if (ret == NameTable.end()) + return sampleprof_error::truncated_name_table; + encodeULEB128(ret->second, *OutputStream); + return sampleprof_error::success; +} - // Write the file header. +void SampleProfileWriterBinary::addName(StringRef FName) { + auto NextIdx = NameTable.size(); + NameTable.insert(std::make_pair(FName, NextIdx)); +} + +void SampleProfileWriterBinary::addNames(const FunctionSamples &S) { + // Add all the names in indirect call targets. + for (const auto &I : S.getBodySamples()) { + const SampleRecord &Sample = I.second; + for (const auto &J : Sample.getCallTargets()) + addName(J.first()); + } + + // Recursively add all the names for inlined callsites. + for (const auto &J : S.getCallsiteSamples()) { + CallsiteLocation Loc = J.first; + const FunctionSamples &CalleeSamples = J.second; + addName(Loc.CalleeName); + addNames(CalleeSamples); + } +} + +std::error_code SampleProfileWriterBinary::writeHeader( + const StringMap &ProfileMap) { + auto &OS = *OutputStream; + + // Write file magic identifier. encodeULEB128(SPMagic(), OS); encodeULEB128(SPVersion(), OS); + + // Generate the name table for all the functions referenced in the profile. + for (const auto &I : ProfileMap) { + addName(I.first()); + addNames(I.second); + } + + // Write out the name table. + encodeULEB128(NameTable.size(), OS); + for (auto N : NameTable) { + OS << N.first; + encodeULEB128(0, OS); + } + + return sampleprof_error::success; } -/// \brief Write samples to a binary file. -/// -/// \returns true if the samples were written successfully, false otherwise. -bool SampleProfileWriterBinary::write(StringRef FName, - const FunctionSamples &S) { - if (S.empty()) - return true; +std::error_code SampleProfileWriterBinary::writeBody(StringRef FName, + const FunctionSamples &S) { + auto &OS = *OutputStream; + + if (std::error_code EC = writeNameIdx(FName)) + return EC; - OS << FName; - encodeULEB128(0, OS); encodeULEB128(S.getTotalSamples(), OS); - encodeULEB128(S.getHeadSamples(), OS); + + // Emit all the body samples. encodeULEB128(S.getBodySamples().size(), OS); for (const auto &I : S.getBodySamples()) { LineLocation Loc = I.first; @@ -87,18 +155,38 @@ bool SampleProfileWriterBinary::write(StringRef FName, encodeULEB128(Sample.getSamples(), OS); encodeULEB128(Sample.getCallTargets().size(), OS); for (const auto &J : Sample.getCallTargets()) { - std::string Callee = J.first(); - unsigned CalleeSamples = J.second; - OS << Callee; - encodeULEB128(0, OS); + StringRef Callee = J.first(); + uint64_t CalleeSamples = J.second; + if (std::error_code EC = writeNameIdx(Callee)) + return EC; encodeULEB128(CalleeSamples, OS); } } - return true; + // Recursively emit all the callsite samples. + encodeULEB128(S.getCallsiteSamples().size(), OS); + for (const auto &J : S.getCallsiteSamples()) { + CallsiteLocation Loc = J.first; + const FunctionSamples &CalleeSamples = J.second; + encodeULEB128(Loc.LineOffset, OS); + encodeULEB128(Loc.Discriminator, OS); + if (std::error_code EC = writeBody(Loc.CalleeName, CalleeSamples)) + return EC; + } + + return sampleprof_error::success; } -/// \brief Create a sample profile writer based on the specified format. +/// \brief Write samples of a top-level function to a binary file. +/// +/// \returns true if the samples were written successfully, false otherwise. +std::error_code SampleProfileWriterBinary::write(StringRef FName, + const FunctionSamples &S) { + encodeULEB128(S.getHeadSamples(), *OutputStream); + return writeBody(FName, S); +} + +/// \brief Create a sample profile file writer based on the specified format. /// /// \param Filename The file to create. /// @@ -110,12 +198,38 @@ bool SampleProfileWriterBinary::write(StringRef FName, ErrorOr> SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) { std::error_code EC; + std::unique_ptr OS; + if (Format == SPF_Binary) + OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_None)); + else + OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_Text)); + if (EC) + return EC; + + return create(OS, Format); +} + +/// \brief Create a sample profile stream writer based on the specified format. +/// +/// \param OS The output stream to store the profile data to. +/// +/// \param Writer The writer to instantiate according to the specified format. +/// +/// \param Format Encoding format for the profile file. +/// +/// \returns an error code indicating the status of the created writer. +ErrorOr> +SampleProfileWriter::create(std::unique_ptr &OS, + SampleProfileFormat Format) { + std::error_code EC; std::unique_ptr Writer; if (Format == SPF_Binary) - Writer.reset(new SampleProfileWriterBinary(Filename, EC)); + Writer.reset(new SampleProfileWriterBinary(OS)); else if (Format == SPF_Text) - Writer.reset(new SampleProfileWriterText(Filename, EC)); + Writer.reset(new SampleProfileWriterText(OS)); + else if (Format == SPF_GCC) + EC = sampleprof_error::unsupported_writing_format; else EC = sampleprof_error::unrecognized_format; diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 5d31225396d4..19b8221b60cb 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -767,6 +767,15 @@ APFloat::isLargest() const { && isSignificandAllOnes(); } +bool +APFloat::isInteger() const { + // This could be made more efficient; I'm going for obviously correct. + if (!isFinite()) return false; + APFloat truncated = *this; + truncated.roundToIntegral(rmTowardZero); + return compare(truncated) == cmpEqual; +} + bool APFloat::bitwiseIsEqual(const APFloat &rhs) const { if (this == &rhs) @@ -777,18 +786,12 @@ APFloat::bitwiseIsEqual(const APFloat &rhs) const { return false; if (category==fcZero || category==fcInfinity) return true; - else if (isFiniteNonZero() && exponent!=rhs.exponent) + + if (isFiniteNonZero() && exponent != rhs.exponent) return false; - else { - int i= partCount(); - const integerPart* p=significandParts(); - const integerPart* q=rhs.significandParts(); - for (; i>0; i--, p++, q++) { - if (*p != *q) - return false; - } - return true; - } + + return std::equal(significandParts(), significandParts() + partCount(), + rhs.significandParts()); } APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) { @@ -847,6 +850,21 @@ APFloat::semanticsPrecision(const fltSemantics &semantics) { return semantics.precision; } +APFloat::ExponentType +APFloat::semanticsMaxExponent(const fltSemantics &semantics) +{ + return semantics.maxExponent; +} +APFloat::ExponentType +APFloat::semanticsMinExponent(const fltSemantics &semantics) +{ + return semantics.minExponent; +} +unsigned int +APFloat::semanticsSizeInBits(const fltSemantics &semantics) +{ + return semantics.sizeInBits; +} const integerPart * APFloat::significandParts() const @@ -1762,7 +1780,7 @@ APFloat::remainder(const APFloat &rhs) /* Normalized llvm frem (C fmod). This is not currently correct in all cases. */ APFloat::opStatus -APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) +APFloat::mod(const APFloat &rhs) { opStatus fs; fs = modSpecials(rhs); @@ -1787,10 +1805,10 @@ APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) rmNearestTiesToEven); assert(fs==opOK); // should always work - fs = V.multiply(rhs, rounding_mode); + fs = V.multiply(rhs, rmNearestTiesToEven); assert(fs==opOK || fs==opInexact); // should not overflow or underflow - fs = subtract(V, rounding_mode); + fs = subtract(V, rmNearestTiesToEven); assert(fs==opOK || fs==opInexact); // likewise if (isZero()) diff --git a/lib/Support/BlockFrequency.cpp b/lib/Support/BlockFrequency.cpp index 6f7e341904b9..e7f3e1764c52 100644 --- a/lib/Support/BlockFrequency.cpp +++ b/lib/Support/BlockFrequency.cpp @@ -11,37 +11,35 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/BranchProbability.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; -BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) { +BlockFrequency &BlockFrequency::operator*=(BranchProbability Prob) { Frequency = Prob.scale(Frequency); return *this; } -const BlockFrequency -BlockFrequency::operator*(const BranchProbability &Prob) const { +BlockFrequency BlockFrequency::operator*(BranchProbability Prob) const { BlockFrequency Freq(Frequency); Freq *= Prob; return Freq; } -BlockFrequency &BlockFrequency::operator/=(const BranchProbability &Prob) { +BlockFrequency &BlockFrequency::operator/=(BranchProbability Prob) { Frequency = Prob.scaleByInverse(Frequency); return *this; } -BlockFrequency BlockFrequency::operator/(const BranchProbability &Prob) const { +BlockFrequency BlockFrequency::operator/(BranchProbability Prob) const { BlockFrequency Freq(Frequency); Freq /= Prob; return Freq; } -BlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) { +BlockFrequency &BlockFrequency::operator+=(BlockFrequency Freq) { uint64_t Before = Freq.Frequency; Frequency += Freq.Frequency; @@ -52,11 +50,25 @@ BlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) { return *this; } -const BlockFrequency -BlockFrequency::operator+(const BlockFrequency &Prob) const { - BlockFrequency Freq(Frequency); - Freq += Prob; - return Freq; +BlockFrequency BlockFrequency::operator+(BlockFrequency Freq) const { + BlockFrequency NewFreq(Frequency); + NewFreq += Freq; + return NewFreq; +} + +BlockFrequency &BlockFrequency::operator-=(BlockFrequency Freq) { + // If underflow, set frequency to 0. + if (Frequency <= Freq.Frequency) + Frequency = 0; + else + Frequency -= Freq.Frequency; + return *this; +} + +BlockFrequency BlockFrequency::operator-(BlockFrequency Freq) const { + BlockFrequency NewFreq(Frequency); + NewFreq -= Freq; + return NewFreq; } BlockFrequency &BlockFrequency::operator>>=(const unsigned count) { diff --git a/lib/Support/BranchProbability.cpp b/lib/Support/BranchProbability.cpp index 65878d6e3025..771d02c0aa3c 100644 --- a/lib/Support/BranchProbability.cpp +++ b/lib/Support/BranchProbability.cpp @@ -15,17 +15,58 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; +const uint32_t BranchProbability::D; + raw_ostream &BranchProbability::print(raw_ostream &OS) const { - return OS << N << " / " << D << " = " - << format("%g%%", ((double)N / D) * 100.0); + if (isUnknown()) + return OS << "?%"; + + // Get a percentage rounded to two decimal digits. This avoids + // implementation-defined rounding inside printf. + double Percent = rint(((double)N / D) * 100.0 * 100.0) / 100.0; + return OS << format("0x%08" PRIx32 " / 0x%08" PRIx32 " = %.2f%%", N, D, + Percent); } void BranchProbability::dump() const { print(dbgs()) << '\n'; } +BranchProbability::BranchProbability(uint32_t Numerator, uint32_t Denominator) { + assert(Denominator > 0 && "Denominator cannot be 0!"); + assert(Numerator <= Denominator && "Probability cannot be bigger than 1!"); + if (Denominator == D) + N = Numerator; + else { + uint64_t Prob64 = + (Numerator * static_cast(D) + Denominator / 2) / Denominator; + N = static_cast(Prob64); + } +} + +BranchProbability +BranchProbability::getBranchProbability(uint64_t Numerator, + uint64_t Denominator) { + assert(Numerator <= Denominator && "Probability cannot be bigger than 1!"); + // Scale down Denominator to fit in a 32-bit integer. + int Scale = 0; + while (Denominator > UINT32_MAX) { + Denominator >>= 1; + Scale++; + } + return BranchProbability(Numerator >> Scale, Denominator); +} + +// If ConstD is not zero, then replace D by ConstD so that division and modulo +// operations by D can be optimized, in case this function is not inlined by the +// compiler. +template static uint64_t scale(uint64_t Num, uint32_t N, uint32_t D) { + if (ConstD > 0) + D = ConstD; + assert(D && "divide by 0"); // Fast path for multiplying by 1.0. @@ -65,9 +106,9 @@ static uint64_t scale(uint64_t Num, uint32_t N, uint32_t D) { } uint64_t BranchProbability::scale(uint64_t Num) const { - return ::scale(Num, N, D); + return ::scale(Num, N, D); } uint64_t BranchProbability::scaleByInverse(uint64_t Num) const { - return ::scale(Num, D, N); + return ::scale<0>(Num, D, N); } diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index a8a4df51661f..75b3e89f9167 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -1,7 +1,8 @@ set(system_libs) if( NOT MSVC ) if( MINGW ) - set(system_libs ${system_libs} psapi shell32 ole32) + # libuuid required for FOLDERID_Profile usage in lib/Support/Windows/Path.inc. + set(system_libs ${system_libs} psapi shell32 ole32 uuid) elseif( CMAKE_HOST_UNIX ) if( HAVE_LIBRT ) set(system_libs ${system_libs} rt) @@ -59,6 +60,7 @@ add_llvm_library(LLVMSupport IntEqClasses.cpp IntervalMap.cpp IntrusiveRefCntPtr.cpp + JamCRC.cpp LEB128.cpp LineIterator.cpp Locale.cpp @@ -87,6 +89,7 @@ add_llvm_library(LLVMSupport StringRef.cpp SystemUtils.cpp TargetParser.cpp + ThreadPool.cpp Timer.cpp ToolOutputFile.cpp Triple.cpp diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 17fba95ebb2b..fdcdb03706de 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -120,7 +120,7 @@ public: void addOption(Option *O) { bool HadErrors = false; - if (O->ArgStr[0]) { + if (O->hasArgStr()) { // Add argument to the argument map! if (!OptionsMap.insert(std::make_pair(O->ArgStr, O)).second) { errs() << ProgramName << ": CommandLine Error: Option '" << O->ArgStr @@ -151,12 +151,12 @@ public: } void removeOption(Option *O) { - SmallVector OptionNames; + SmallVector OptionNames; O->getExtraOptionNames(OptionNames); - if (O->ArgStr[0]) + if (O->hasArgStr()) OptionNames.push_back(O->ArgStr); for (auto Name : OptionNames) - OptionsMap.erase(StringRef(Name)); + OptionsMap.erase(Name); if (O->getFormattingFlag() == cl::Positional) for (auto Opt = PositionalOpts.begin(); Opt != PositionalOpts.end(); @@ -182,13 +182,13 @@ public: nullptr != ConsumeAfterOpt); } - void updateArgStr(Option *O, const char *NewName) { + void updateArgStr(Option *O, StringRef NewName) { if (!OptionsMap.insert(std::make_pair(NewName, O)).second) { errs() << ProgramName << ": CommandLine Error: Option '" << O->ArgStr << "' registered more than once!\n"; report_fatal_error("inconsistency in registered CommandLine options"); } - OptionsMap.erase(StringRef(O->ArgStr)); + OptionsMap.erase(O->ArgStr); } void printOptionValues(); @@ -227,7 +227,7 @@ void Option::addArgument() { void Option::removeArgument() { GlobalParser->removeOption(this); } -void Option::setArgStr(const char *S) { +void Option::setArgStr(StringRef S) { if (FullyInitialized) GlobalParser->updateArgStr(this, S); ArgStr = S; @@ -296,24 +296,23 @@ static Option *LookupNearestOption(StringRef Arg, ie = OptionsMap.end(); it != ie; ++it) { Option *O = it->second; - SmallVector OptionNames; + SmallVector OptionNames; O->getExtraOptionNames(OptionNames); - if (O->ArgStr[0]) + if (O->hasArgStr()) OptionNames.push_back(O->ArgStr); bool PermitValue = O->getValueExpectedFlag() != cl::ValueDisallowed; StringRef Flag = PermitValue ? LHS : Arg; - for (size_t i = 0, e = OptionNames.size(); i != e; ++i) { - StringRef Name = OptionNames[i]; + for (auto Name : OptionNames) { unsigned Distance = StringRef(Name).edit_distance( Flag, /*AllowReplacements=*/true, /*MaxEditDistance=*/BestDistance); if (!Best || Distance < BestDistance) { Best = O; BestDistance = Distance; if (RHS.empty() || !PermitValue) - NearestString = OptionNames[i]; + NearestString = Name; else - NearestString = (Twine(OptionNames[i]) + "=" + RHS).str(); + NearestString = (Twine(Name) + "=" + RHS).str(); } } } @@ -346,10 +345,7 @@ static bool CommaSeparateAndAddOccurrence(Option *Handler, unsigned pos, Value = Val; } - if (Handler->addOccurrence(pos, ArgName, Value, MultiArg)) - return true; - - return false; + return Handler->addOccurrence(pos, ArgName, Value, MultiArg); } /// ProvideOption - For Value, this differentiates between an empty value ("") @@ -799,7 +795,7 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar, // telling us. SmallVector newArgv; BumpPtrAllocator A; - BumpPtrStringSaver Saver(A); + StringSaver Saver(A); newArgv.push_back(Saver.save(progName)); // Parse the value of the environment variable into a "command line" @@ -822,7 +818,7 @@ void CommandLineParser::ParseCommandLineOptions(int argc, // Expand response files. SmallVector newArgv(argv, argv + argc); BumpPtrAllocator A; - BumpPtrStringSaver Saver(A); + StringSaver Saver(A); ExpandResponseFiles(Saver, TokenizeGNUCommandLine, newArgv); argv = &newArgv[0]; argc = static_cast(newArgv.size()); @@ -859,7 +855,7 @@ void CommandLineParser::ParseCommandLineOptions(int argc, "error - this positional option will never be matched, " "because it does not Require a value, and a " "cl::ConsumeAfter option is active!"); - } else if (UnboundedFound && !Opt->ArgStr[0]) { + } else if (UnboundedFound && !Opt->hasArgStr()) { // This option does not "require" a value... Make sure this option is // not specified after an option that eats all extra arguments, or this // one will never get any! @@ -1144,8 +1140,8 @@ bool Option::addOccurrence(unsigned pos, StringRef ArgName, StringRef Value, // getValueStr - Get the value description string, using "DefaultMsg" if nothing // has been specified yet. // -static const char *getValueStr(const Option &O, const char *DefaultMsg) { - if (O.ValueStr[0] == 0) +static StringRef getValueStr(const Option &O, StringRef DefaultMsg) { + if (O.ValueStr.empty()) return DefaultMsg; return O.ValueStr; } @@ -1155,7 +1151,7 @@ static const char *getValueStr(const Option &O, const char *DefaultMsg) { // // Return the width of the option tag for printing... -size_t alias::getOptionWidth() const { return std::strlen(ArgStr) + 6; } +size_t alias::getOptionWidth() const { return ArgStr.size() + 6; } static void printHelpStr(StringRef HelpStr, size_t Indent, size_t FirstLineIndentedBy) { @@ -1170,7 +1166,7 @@ static void printHelpStr(StringRef HelpStr, size_t Indent, // Print out the option for the alias. void alias::printOptionInfo(size_t GlobalWidth) const { outs() << " -" << ArgStr; - printHelpStr(HelpStr, GlobalWidth, std::strlen(ArgStr) + 6); + printHelpStr(HelpStr, GlobalWidth, ArgStr.size() + 6); } //===----------------------------------------------------------------------===// @@ -1182,9 +1178,9 @@ void alias::printOptionInfo(size_t GlobalWidth) const { // Return the width of the option tag for printing... size_t basic_parser_impl::getOptionWidth(const Option &O) const { - size_t Len = std::strlen(O.ArgStr); + size_t Len = O.ArgStr.size(); if (const char *ValName = getValueName()) - Len += std::strlen(getValueStr(O, ValName)) + 3; + Len += getValueStr(O, ValName).size() + 3; return Len + 6; } @@ -1205,7 +1201,7 @@ void basic_parser_impl::printOptionInfo(const Option &O, void basic_parser_impl::printOptionName(const Option &O, size_t GlobalWidth) const { outs() << " -" << O.ArgStr; - outs().indent(GlobalWidth - std::strlen(O.ArgStr)); + outs().indent(GlobalWidth - O.ArgStr.size()); } // parser implementation @@ -1319,7 +1315,7 @@ unsigned generic_parser_base::findOption(const char *Name) { // Return the width of the option tag for printing... size_t generic_parser_base::getOptionWidth(const Option &O) const { if (O.hasArgStr()) { - size_t Size = std::strlen(O.ArgStr) + 6; + size_t Size = O.ArgStr.size() + 6; for (unsigned i = 0, e = getNumOptions(); i != e; ++i) Size = std::max(Size, std::strlen(getOption(i)) + 8); return Size; @@ -1338,7 +1334,7 @@ void generic_parser_base::printOptionInfo(const Option &O, size_t GlobalWidth) const { if (O.hasArgStr()) { outs() << " -" << O.ArgStr; - printHelpStr(O.HelpStr, GlobalWidth, std::strlen(O.ArgStr) + 6); + printHelpStr(O.HelpStr, GlobalWidth, O.ArgStr.size() + 6); for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { size_t NumSpaces = GlobalWidth - strlen(getOption(i)) - 8; @@ -1346,7 +1342,7 @@ void generic_parser_base::printOptionInfo(const Option &O, outs().indent(NumSpaces) << " - " << getDescription(i) << '\n'; } } else { - if (O.HelpStr[0]) + if (!O.HelpStr.empty()) outs() << " " << O.HelpStr << '\n'; for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { const char *Option = getOption(i); @@ -1365,7 +1361,7 @@ void generic_parser_base::printGenericOptionDiff( const Option &O, const GenericOptionValue &Value, const GenericOptionValue &Default, size_t GlobalWidth) const { outs() << " -" << O.ArgStr; - outs().indent(GlobalWidth - std::strlen(O.ArgStr)); + outs().indent(GlobalWidth - O.ArgStr.size()); unsigned NumOpts = getNumOptions(); for (unsigned i = 0; i != NumOpts; ++i) { @@ -1508,7 +1504,7 @@ public: outs() << "USAGE: " << GlobalParser->ProgramName << " [options]"; for (auto Opt : GlobalParser->PositionalOpts) { - if (Opt->ArgStr[0]) + if (Opt->hasArgStr()) outs() << " --" << Opt->ArgStr; outs() << " " << Opt->HelpStr; } diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp index aba0f1ddeee8..3f4ef9da48f1 100644 --- a/lib/Support/CrashRecoveryContext.cpp +++ b/lib/Support/CrashRecoveryContext.cpp @@ -24,6 +24,12 @@ static ManagedStatic< sys::ThreadLocal > CurrentContext; struct CrashRecoveryContextImpl { + // When threads are disabled, this links up all active + // CrashRecoveryContextImpls. When threads are enabled there's one thread + // per CrashRecoveryContext and CurrentContext is a thread-local, so only one + // CrashRecoveryContextImpl is active per thread and this is always null. + const CrashRecoveryContextImpl *Next; + CrashRecoveryContext *CRC; std::string Backtrace; ::jmp_buf JumpBuffer; @@ -34,21 +40,26 @@ public: CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC), Failed(false), SwitchedThread(false) { + Next = CurrentContext->get(); CurrentContext->set(this); } ~CrashRecoveryContextImpl() { if (!SwitchedThread) - CurrentContext->erase(); + CurrentContext->set(Next); } /// \brief Called when the separate crash-recovery thread was finished, to /// indicate that we don't need to clear the thread-local CurrentContext. - void setSwitchedThread() { SwitchedThread = true; } + void setSwitchedThread() { +#if defined(LLVM_ENABLE_THREADS) && LLVM_ENABLE_THREADS != 0 + SwitchedThread = true; +#endif + } void HandleCrash() { // Eliminate the current context entry, to avoid re-entering in case the // cleanup code crashes. - CurrentContext->erase(); + CurrentContext->set(Next); assert(!Failed && "Crash recovery context already failed!"); Failed = true; @@ -65,7 +76,7 @@ public: static ManagedStatic gCrashRecoveryContextMutex; static bool gCrashRecoveryEnabled = false; -static ManagedStatic > +static ManagedStatic> tlIsRecoveringFromCrash; CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {} @@ -73,7 +84,8 @@ CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {} CrashRecoveryContext::~CrashRecoveryContext() { // Reclaim registered resources. CrashRecoveryContextCleanup *i = head; - tlIsRecoveringFromCrash->set(head); + const CrashRecoveryContext *PC = tlIsRecoveringFromCrash->get(); + tlIsRecoveringFromCrash->set(this); while (i) { CrashRecoveryContextCleanup *tmp = i; i = tmp->next; @@ -81,7 +93,7 @@ CrashRecoveryContext::~CrashRecoveryContext() { tmp->recoverResources(); delete tmp; } - tlIsRecoveringFromCrash->erase(); + tlIsRecoveringFromCrash->set(PC); CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl; delete CRCI; @@ -232,7 +244,7 @@ void CrashRecoveryContext::Disable() { static const int Signals[] = { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP }; -static const unsigned NumSignals = sizeof(Signals) / sizeof(Signals[0]); +static const unsigned NumSignals = array_lengthof(Signals); static struct sigaction PrevActions[NumSignals]; static void CrashRecoverySignalHandler(int Signal) { diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp index 13a41557a8d2..7d7225671737 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/Support/Dwarf.cpp @@ -177,6 +177,23 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) { case DW_AT_MIPS_assumed_size: return "DW_AT_MIPS_assumed_size"; case DW_AT_lo_user: return "DW_AT_lo_user"; case DW_AT_hi_user: return "DW_AT_hi_user"; + case DW_AT_BORLAND_property_read: return "DW_AT_BORLAND_property_read"; + case DW_AT_BORLAND_property_write: return "DW_AT_BORLAND_property_write"; + case DW_AT_BORLAND_property_implements: return "DW_AT_BORLAND_property_implements"; + case DW_AT_BORLAND_property_index: return "DW_AT_BORLAND_property_index"; + case DW_AT_BORLAND_property_default: return "DW_AT_BORLAND_property_default"; + case DW_AT_BORLAND_Delphi_unit: return "DW_AT_BORLAND_Delphi_unit"; + case DW_AT_BORLAND_Delphi_class: return "DW_AT_BORLAND_Delphi_class"; + case DW_AT_BORLAND_Delphi_record: return "DW_AT_BORLAND_Delphi_record"; + case DW_AT_BORLAND_Delphi_metaclass: return "DW_AT_BORLAND_Delphi_metaclass"; + case DW_AT_BORLAND_Delphi_constructor: return "DW_AT_BORLAND_Delphi_constructor"; + case DW_AT_BORLAND_Delphi_destructor: return "DW_AT_BORLAND_Delphi_destructor"; + case DW_AT_BORLAND_Delphi_anonymous_method: return "DW_AT_BORLAND_Delphi_anonymous_method"; + case DW_AT_BORLAND_Delphi_interface: return "DW_AT_BORLAND_Delphi_interface"; + case DW_AT_BORLAND_Delphi_ABI: return "DW_AT_BORLAND_Delphi_ABI"; + case DW_AT_BORLAND_Delphi_return: return "DW_AT_BORLAND_Delphi_return"; + case DW_AT_BORLAND_Delphi_frameptr: return "DW_AT_BORLAND_Delphi_frameptr"; + case DW_AT_BORLAND_closure: return "DW_AT_BORLAND_closure"; case DW_AT_APPLE_optimized: return "DW_AT_APPLE_optimized"; case DW_AT_APPLE_flags: return "DW_AT_APPLE_flags"; case DW_AT_APPLE_isa: return "DW_AT_APPLE_isa"; @@ -201,6 +218,7 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) { case DW_AT_GNU_addr_base: return "DW_AT_GNU_addr_base"; case DW_AT_GNU_pubnames: return "DW_AT_GNU_pubnames"; case DW_AT_GNU_pubtypes: return "DW_AT_GNU_pubtypes"; + case DW_AT_GNU_discriminator: return "DW_AT_GNU_discriminator"; } return nullptr; } @@ -373,6 +391,14 @@ const char *llvm::dwarf::ConventionString(unsigned Convention) { case DW_CC_nocall: return "DW_CC_nocall"; case DW_CC_lo_user: return "DW_CC_lo_user"; case DW_CC_hi_user: return "DW_CC_hi_user"; + case DW_CC_GNU_borland_fastcall_i386: return "DW_CC_GNU_borland_fastcall_i386"; + case DW_CC_BORLAND_safecall: return "DW_CC_BORLAND_safecall"; + case DW_CC_BORLAND_stdcall: return "DW_CC_BORLAND_stdcall"; + case DW_CC_BORLAND_pascal: return "DW_CC_BORLAND_pascal"; + case DW_CC_BORLAND_msfastcall: return "DW_CC_BORLAND_msfastcall"; + case DW_CC_BORLAND_msreturn: return "DW_CC_BORLAND_msreturn"; + case DW_CC_BORLAND_thiscall: return "DW_CC_BORLAND_thiscall"; + case DW_CC_BORLAND_fastcall: return "DW_CC_BORLAND_fastcall"; } return nullptr; } @@ -442,10 +468,21 @@ const char *llvm::dwarf::MacinfoString(unsigned Encoding) { case DW_MACINFO_start_file: return "DW_MACINFO_start_file"; case DW_MACINFO_end_file: return "DW_MACINFO_end_file"; case DW_MACINFO_vendor_ext: return "DW_MACINFO_vendor_ext"; + case DW_MACINFO_invalid: return "DW_MACINFO_invalid"; } return nullptr; } +unsigned llvm::dwarf::getMacinfo(StringRef MacinfoString) { + return StringSwitch(MacinfoString) + .Case("DW_MACINFO_define", DW_MACINFO_define) + .Case("DW_MACINFO_undef", DW_MACINFO_undef) + .Case("DW_MACINFO_start_file", DW_MACINFO_start_file) + .Case("DW_MACINFO_end_file", DW_MACINFO_end_file) + .Case("DW_MACINFO_vendor_ext", DW_MACINFO_vendor_ext) + .Default(DW_MACINFO_invalid); +} + const char *llvm::dwarf::CallFrameString(unsigned Encoding) { switch (Encoding) { case DW_CFA_nop: return "DW_CFA_nop"; diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index a25e21ae043e..2808bd34af06 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/ErrorHandling.h" -#include "llvm-c/Core.h" +#include "llvm-c/ErrorHandling.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/Config/config.h" diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp index 307ff09afedc..651e679f2cb5 100644 --- a/lib/Support/FileOutputBuffer.cpp +++ b/lib/Support/FileOutputBuffer.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/Errc.h" +#include "llvm/Support/Signals.h" #include #if !defined(_MSC_VER) && !defined(__MINGW32__) @@ -34,10 +35,8 @@ FileOutputBuffer::~FileOutputBuffer() { sys::fs::remove(Twine(TempPath)); } -std::error_code -FileOutputBuffer::create(StringRef FilePath, size_t Size, - std::unique_ptr &Result, - unsigned Flags) { +ErrorOr> +FileOutputBuffer::create(StringRef FilePath, size_t Size, unsigned Flags) { // If file already exists, it must be a regular file (to be mappable). sys::fs::file_status Stat; std::error_code EC = sys::fs::status(FilePath, Stat); @@ -76,6 +75,8 @@ FileOutputBuffer::create(StringRef FilePath, size_t Size, if (EC) return EC; + sys::RemoveFileOnSignal(TempFilePath); + #ifndef LLVM_ON_WIN32 // On Windows, CreateFileMapping (the mmap function on Windows) // automatically extends the underlying file. We don't need to @@ -95,10 +96,9 @@ FileOutputBuffer::create(StringRef FilePath, size_t Size, if (Ret) return std::error_code(errno, std::generic_category()); - Result.reset( + std::unique_ptr Buf( new FileOutputBuffer(std::move(MappedFile), FilePath, TempFilePath)); - - return std::error_code(); + return std::move(Buf); } std::error_code FileOutputBuffer::commit() { @@ -107,6 +107,8 @@ std::error_code FileOutputBuffer::commit() { // Rename file to final name. - return sys::fs::rename(Twine(TempPath), Twine(FinalPath)); + std::error_code EC = sys::fs::rename(Twine(TempPath), Twine(FinalPath)); + sys::DontRemoveFileOnSignal(TempPath); + return EC; } } // namespace diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index b8538ffe1f9f..bb0ec2defef9 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -232,9 +232,29 @@ FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { Buckets = AllocateBuckets(NumBuckets); NumNodes = 0; } + +FoldingSetImpl::FoldingSetImpl(FoldingSetImpl &&Arg) + : Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) { + Arg.Buckets = nullptr; + Arg.NumBuckets = 0; + Arg.NumNodes = 0; +} + +FoldingSetImpl &FoldingSetImpl::operator=(FoldingSetImpl &&RHS) { + free(Buckets); // This may be null if the set is in a moved-from state. + Buckets = RHS.Buckets; + NumBuckets = RHS.NumBuckets; + NumNodes = RHS.NumNodes; + RHS.Buckets = nullptr; + RHS.NumBuckets = 0; + RHS.NumNodes = 0; + return *this; +} + FoldingSetImpl::~FoldingSetImpl() { free(Buckets); } + void FoldingSetImpl::clear() { // Set all but the last bucket to null pointers. memset(Buckets, 0, NumBuckets*sizeof(void*)); diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp index a9b022041468..d0e1d50e8ccb 100644 --- a/lib/Support/GraphWriter.cpp +++ b/lib/Support/GraphWriter.cpp @@ -103,7 +103,7 @@ struct GraphSession { bool TryFindProgram(StringRef Names, std::string &ProgramPath) { raw_string_ostream Log(LogBuffer); SmallVector parts; - Names.split(parts, "|"); + Names.split(parts, '|'); for (auto Name : parts) { if (ErrorOr P = sys::findProgramByName(Name)) { ProgramPath = *P; @@ -189,61 +189,87 @@ bool llvm::DisplayGraph(StringRef FilenameRef, bool wait, return ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg); } - enum PSViewerKind { PSV_None, PSV_OSXOpen, PSV_XDGOpen, PSV_Ghostview }; - PSViewerKind PSViewer = PSV_None; + enum ViewerKind { + VK_None, + VK_OSXOpen, + VK_XDGOpen, + VK_Ghostview, + VK_CmdStart + }; + ViewerKind Viewer = VK_None; #ifdef __APPLE__ - if (!PSViewer && S.TryFindProgram("open", ViewerPath)) - PSViewer = PSV_OSXOpen; + if (!Viewer && S.TryFindProgram("open", ViewerPath)) + Viewer = VK_OSXOpen; +#endif + if (!Viewer && S.TryFindProgram("gv", ViewerPath)) + Viewer = VK_Ghostview; + if (!Viewer && S.TryFindProgram("xdg-open", ViewerPath)) + Viewer = VK_XDGOpen; +#ifdef LLVM_ON_WIN32 + if (!Viewer && S.TryFindProgram("cmd", ViewerPath)) { + Viewer = VK_CmdStart; + } #endif - if (!PSViewer && S.TryFindProgram("gv", ViewerPath)) - PSViewer = PSV_Ghostview; - if (!PSViewer && S.TryFindProgram("xdg-open", ViewerPath)) - PSViewer = PSV_XDGOpen; - // PostScript graph generator + PostScript viewer + // PostScript or PDF graph generator + PostScript/PDF viewer std::string GeneratorPath; - if (PSViewer && + if (Viewer && (S.TryFindProgram(getProgramName(program), GeneratorPath) || S.TryFindProgram("dot|fdp|neato|twopi|circo", GeneratorPath))) { - std::string PSFilename = Filename + ".ps"; + std::string OutputFilename = + Filename + (Viewer == VK_CmdStart ? ".pdf" : ".ps"); std::vector args; args.push_back(GeneratorPath.c_str()); - args.push_back("-Tps"); + if (Viewer == VK_CmdStart) + args.push_back("-Tpdf"); + else + args.push_back("-Tps"); args.push_back("-Nfontname=Courier"); args.push_back("-Gsize=7.5,10"); args.push_back(Filename.c_str()); args.push_back("-o"); - args.push_back(PSFilename.c_str()); + args.push_back(OutputFilename.c_str()); args.push_back(nullptr); errs() << "Running '" << GeneratorPath << "' program... "; - if (ExecGraphViewer(GeneratorPath, args, Filename, wait, ErrMsg)) + if (ExecGraphViewer(GeneratorPath, args, Filename, true, ErrMsg)) return true; + // The lifetime of StartArg must include the call of ExecGraphViewer + // because the args are passed as vector of char*. + std::string StartArg; + args.clear(); args.push_back(ViewerPath.c_str()); - switch (PSViewer) { - case PSV_OSXOpen: + switch (Viewer) { + case VK_OSXOpen: args.push_back("-W"); - args.push_back(PSFilename.c_str()); + args.push_back(OutputFilename.c_str()); break; - case PSV_XDGOpen: + case VK_XDGOpen: wait = false; - args.push_back(PSFilename.c_str()); + args.push_back(OutputFilename.c_str()); break; - case PSV_Ghostview: + case VK_Ghostview: args.push_back("--spartan"); - args.push_back(PSFilename.c_str()); + args.push_back(OutputFilename.c_str()); break; - case PSV_None: + case VK_CmdStart: + args.push_back("/S"); + args.push_back("/C"); + StartArg = + (StringRef("start ") + (wait ? "/WAIT " : "") + OutputFilename).str(); + args.push_back(StartArg.c_str()); + break; + case VK_None: llvm_unreachable("Invalid viewer"); } args.push_back(nullptr); ErrMsg.clear(); - return ExecGraphViewer(ViewerPath, args, PSFilename, wait, ErrMsg); + return ExecGraphViewer(ViewerPath, args, OutputFilename, wait, ErrMsg); } // dotty diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 1bd1fe2bea0e..c0f9e0744b5e 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -368,8 +368,14 @@ StringRef sys::getHostCPUName() { // Broadwell: case 61: + case 71: return "broadwell"; + // Skylake: + case 78: + case 94: + return "skylake"; + case 28: // Most 45 nm Intel Atom processors case 38: // 45 nm Atom Lincroft case 39: // 32 nm Atom Medfield @@ -381,6 +387,8 @@ StringRef sys::getHostCPUName() { case 55: case 74: case 77: + case 90: + case 93: return "silvermont"; default: // Unknown family 6 CPU, try to guess. @@ -689,7 +697,7 @@ StringRef sys::getHostCPUName() { if (Lines[I].startswith("features")) { size_t Pos = Lines[I].find(":"); if (Pos != StringRef::npos) { - Lines[I].drop_front(Pos + 1).split(CPUFeatures, " "); + Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); break; } } @@ -766,14 +774,17 @@ bool sys::getHostCPUFeatures(StringMap &Features) { // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV // indicates that the AVX registers will be saved and restored on context // switch, then we have full AVX support. - bool HasAVX = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) && - !GetX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); - Features["avx"] = HasAVX; - Features["fma"] = HasAVX && (ECX >> 12) & 1; - Features["f16c"] = HasAVX && (ECX >> 29) & 1; + bool HasAVXSave = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) && + !GetX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); + Features["avx"] = HasAVXSave; + Features["fma"] = HasAVXSave && (ECX >> 12) & 1; + Features["f16c"] = HasAVXSave && (ECX >> 29) & 1; + + // Only enable XSAVE if OS has enabled support for saving YMM state. + Features["xsave"] = HasAVXSave && (ECX >> 26) & 1; // AVX512 requires additional context to be saved by the OS. - bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); + bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); unsigned MaxExtLevel; GetX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); @@ -783,15 +794,15 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); - Features["xop"] = HasAVX && HasExtLeaf1 && ((ECX >> 11) & 1); - Features["fma4"] = HasAVX && HasExtLeaf1 && ((ECX >> 16) & 1); + Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; + Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); bool HasLeaf7 = MaxLevel >= 7 && !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); // AVX2 is only supported if we have the OS save support from AVX. - Features["avx2"] = HasAVX && HasLeaf7 && (EBX >> 5) & 1; + Features["avx2"] = HasAVXSave && HasLeaf7 && ((EBX >> 5) & 1); Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); @@ -801,6 +812,8 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); + // Enable protection keys + Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); // AVX512 is only supported if the OS supports the context save for it. Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; @@ -811,6 +824,14 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; + bool HasLeafD = MaxLevel >= 0xd && + !GetX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); + + // Only enable XSAVE if OS has enabled support for saving YMM state. + Features["xsaveopt"] = HasAVXSave && HasLeafD && ((EAX >> 0) & 1); + Features["xsavec"] = HasAVXSave && HasLeafD && ((EAX >> 1) & 1); + Features["xsaves"] = HasAVXSave && HasLeafD && ((EAX >> 3) & 1); + return true; } #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) @@ -832,7 +853,7 @@ bool sys::getHostCPUFeatures(StringMap &Features) { // Look for the CPU features. for (unsigned I = 0, E = Lines.size(); I != E; ++I) if (Lines[I].startswith("Features")) { - Lines[I].split(CPUFeatures, " "); + Lines[I].split(CPUFeatures, ' '); break; } diff --git a/lib/Support/JamCRC.cpp b/lib/Support/JamCRC.cpp new file mode 100644 index 000000000000..bc21c917d55b --- /dev/null +++ b/lib/Support/JamCRC.cpp @@ -0,0 +1,96 @@ +//===-- JamCRC.cpp - Cyclic Redundancy Check --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an implementation of JamCRC. +// +//===----------------------------------------------------------------------===// +// +// The implementation technique is the one mentioned in: +// D. V. Sarwate. 1988. Computation of cyclic redundancy checks via table +// look-up. Commun. ACM 31, 8 (August 1988) +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/JamCRC.h" + +using namespace llvm; + +static const uint32_t CRCTable[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +}; + +void JamCRC::update(ArrayRef Data) { + for (char Byte : Data) { + int TableIdx = (CRC ^ Byte) & 0xff; + CRC = CRCTable[TableIdx] ^ (CRC >> 8); + } +} diff --git a/lib/Support/Locale.cpp b/lib/Support/Locale.cpp index d5cb72b5db3a..53bc0e36d830 100644 --- a/lib/Support/Locale.cpp +++ b/lib/Support/Locale.cpp @@ -1,3 +1,4 @@ +#include "llvm/Config/llvm-config.h" #include "llvm/Support/Locale.h" #include "llvm/Support/Unicode.h" diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp index b8fb2841e525..9868207b14ff 100644 --- a/lib/Support/ManagedStatic.cpp +++ b/lib/Support/ManagedStatic.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Config/config.h" #include "llvm/Support/Atomic.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/MutexGuard.h" #include diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index d09ef3a4c0bc..faee10bb07cf 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -162,13 +162,14 @@ MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) { } ErrorOr> -MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize) { +MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize, + bool RequiresNullTerminator) { SmallString<256> NameBuf; StringRef NameRef = Filename.toStringRef(NameBuf); if (NameRef == "-") return getSTDIN(); - return getFile(Filename, FileSize); + return getFile(Filename, FileSize, RequiresNullTerminator); } ErrorOr> diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index cf467381db8c..4952f59fc24d 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -455,17 +455,15 @@ void append(SmallVectorImpl &path, const Twine &a, if (!c.isTriviallyEmpty()) components.push_back(c.toStringRef(c_storage)); if (!d.isTriviallyEmpty()) components.push_back(d.toStringRef(d_storage)); - for (SmallVectorImpl::const_iterator i = components.begin(), - e = components.end(); - i != e; ++i) { + for (auto &component : components) { bool path_has_sep = !path.empty() && is_separator(path[path.size() - 1]); - bool component_has_sep = !i->empty() && is_separator((*i)[0]); - bool is_root_name = has_root_name(*i); + bool component_has_sep = !component.empty() && is_separator(component[0]); + bool is_root_name = has_root_name(component); if (path_has_sep) { // Strip separators from beginning of component. - size_t loc = i->find_first_not_of(separators); - StringRef c = i->substr(loc); + size_t loc = component.find_first_not_of(separators); + StringRef c = component.substr(loc); // Append it. path.append(c.begin(), c.end()); @@ -477,7 +475,7 @@ void append(SmallVectorImpl &path, const Twine &a, path.push_back(preferred_separator); } - path.append(i->begin(), i->end()); + path.append(component.begin(), component.end()); } } @@ -661,8 +659,51 @@ bool is_absolute(const Twine &path) { return rootDir && rootName; } -bool is_relative(const Twine &path) { - return !is_absolute(path); +bool is_relative(const Twine &path) { return !is_absolute(path); } + +StringRef remove_leading_dotslash(StringRef Path) { + // Remove leading "./" (or ".//" or "././" etc.) + while (Path.size() > 2 && Path[0] == '.' && is_separator(Path[1])) { + Path = Path.substr(2); + while (Path.size() > 0 && is_separator(Path[0])) + Path = Path.substr(1); + } + return Path; +} + +static SmallString<256> remove_dots(StringRef path, bool remove_dot_dot) { + SmallVector components; + + // Skip the root path, then look for traversal in the components. + StringRef rel = path::relative_path(path); + for (StringRef C : llvm::make_range(path::begin(rel), path::end(rel))) { + if (C == ".") + continue; + if (remove_dot_dot) { + if (C == "..") { + if (!components.empty()) + components.pop_back(); + continue; + } + } + components.push_back(C); + } + + SmallString<256> buffer = path::root_path(path); + for (StringRef C : components) + path::append(buffer, C); + return buffer; +} + +bool remove_dots(SmallVectorImpl &path, bool remove_dot_dot) { + StringRef p(path.data(), path.size()); + + SmallString<256> result = remove_dots(p, remove_dot_dot); + if (result == path) + return false; + + path.swap(result); + return true; } } // end namespace path @@ -732,7 +773,9 @@ std::error_code createUniqueDirectory(const Twine &Prefix, true, 0, FS_Dir); } -std::error_code make_absolute(SmallVectorImpl &path) { +static std::error_code make_absolute(const Twine ¤t_directory, + SmallVectorImpl &path, + bool use_current_directory) { StringRef p(path.data(), path.size()); bool rootDirectory = path::has_root_directory(p), @@ -748,7 +791,9 @@ std::error_code make_absolute(SmallVectorImpl &path) { // All of the following conditions will need the current directory. SmallString<128> current_dir; - if (std::error_code ec = current_path(current_dir)) + if (use_current_directory) + current_directory.toVector(current_dir); + else if (std::error_code ec = current_path(current_dir)) return ec; // Relative path. Prepend the current directory. @@ -785,12 +830,22 @@ std::error_code make_absolute(SmallVectorImpl &path) { "occurred above!"); } -std::error_code create_directories(const Twine &Path, bool IgnoreExisting) { +std::error_code make_absolute(const Twine ¤t_directory, + SmallVectorImpl &path) { + return make_absolute(current_directory, path, true); +} + +std::error_code make_absolute(SmallVectorImpl &path) { + return make_absolute(Twine(), path, false); +} + +std::error_code create_directories(const Twine &Path, bool IgnoreExisting, + perms Perms) { SmallString<128> PathStorage; StringRef P = Path.toStringRef(PathStorage); // Be optimistic and try to create the directory - std::error_code EC = create_directory(P, IgnoreExisting); + std::error_code EC = create_directory(P, IgnoreExisting, Perms); // If we succeeded, or had any error other than the parent not existing, just // return it. if (EC != errc::no_such_file_or_directory) @@ -802,10 +857,10 @@ std::error_code create_directories(const Twine &Path, bool IgnoreExisting) { if (Parent.empty()) return EC; - if ((EC = create_directories(Parent))) + if ((EC = create_directories(Parent, IgnoreExisting, Perms))) return EC; - return create_directory(P, IgnoreExisting); + return create_directory(P, IgnoreExisting, Perms); } std::error_code copy_file(const Twine &From, const Twine &To) { @@ -889,8 +944,7 @@ std::error_code is_other(const Twine &Path, bool &Result) { } void directory_entry::replace_filename(const Twine &filename, file_status st) { - SmallString<128> path(Path.begin(), Path.end()); - path::remove_filename(path); + SmallString<128> path = path::parent_path(Path); path::append(path, filename); Path = path.str(); Status = st; @@ -940,7 +994,8 @@ file_magic identify_magic(StringRef Magic) { break; case '!': if (Magic.size() >= 8) - if (memcmp(Magic.data(),"!\n",8) == 0) + if (memcmp(Magic.data(), "!\n", 8) == 0 || + memcmp(Magic.data(), "!\n", 8) == 0) return file_magic::archive; break; @@ -1074,3 +1129,20 @@ std::error_code directory_entry::status(file_status &result) const { #if defined(LLVM_ON_WIN32) #include "Windows/Path.inc" #endif + +namespace llvm { +namespace sys { +namespace path { + +bool user_cache_directory(SmallVectorImpl &Result, const Twine &Path1, + const Twine &Path2, const Twine &Path3) { + if (getUserCacheDir(Result)) { + append(Result, Path1, Path2, Path3); + return true; + } + return false; +} + +} // end namespace path +} // end namsspace sys +} // end namespace llvm diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp index f9f8cab9d933..05b3e31644bd 100644 --- a/lib/Support/PrettyStackTrace.cpp +++ b/lib/Support/PrettyStackTrace.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/PrettyStackTrace.h" -#include "llvm-c/Core.h" +#include "llvm-c/ErrorHandling.h" #include "llvm/ADT/SmallString.h" #include "llvm/Config/config.h" // Get autoconf configuration settings #include "llvm/Support/Compiler.h" @@ -154,6 +154,20 @@ void llvm::EnablePrettyStackTrace() { #endif } +const void* llvm::SavePrettyStackState() { +#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) + return PrettyStackTraceHead; +#else + return nullptr; +#endif +} + +void llvm::RestorePrettyStackState(const void* Top) { +#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) + PrettyStackTraceHead = (const PrettyStackTraceEntry*)Top; +#endif +} + void LLVMEnablePrettyStackTrace() { EnablePrettyStackTrace(); } diff --git a/lib/Support/Signals.cpp b/lib/Support/Signals.cpp index a11789372d93..3dc6b7c99d01 100644 --- a/lib/Support/Signals.cpp +++ b/lib/Support/Signals.cpp @@ -12,8 +12,21 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Signals.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/raw_ostream.h" +#include namespace llvm { using namespace sys; @@ -23,6 +36,131 @@ using namespace sys; //=== independent code. //===----------------------------------------------------------------------===// +static ManagedStatic>> + CallBacksToRun; +void sys::RunSignalHandlers() { + if (!CallBacksToRun.isConstructed()) + return; + for (auto &I : *CallBacksToRun) + I.first(I.second); + CallBacksToRun->clear(); +} +} + +using namespace llvm; + +static bool findModulesAndOffsets(void **StackTrace, int Depth, + const char **Modules, intptr_t *Offsets, + const char *MainExecutableName, + StringSaver &StrPool); + +/// Format a pointer value as hexadecimal. Zero pad it out so its always the +/// same width. +static FormattedNumber format_ptr(void *PC) { + // Each byte is two hex digits plus 2 for the 0x prefix. + unsigned PtrWidth = 2 + 2 * sizeof(void *); + return format_hex((uint64_t)PC, PtrWidth); +} + +static bool printSymbolizedStackTrace(void **StackTrace, int Depth, + llvm::raw_ostream &OS) + LLVM_ATTRIBUTE_USED; + +/// Helper that launches llvm-symbolizer and symbolizes a backtrace. +static bool printSymbolizedStackTrace(void **StackTrace, int Depth, + llvm::raw_ostream &OS) { + // FIXME: Subtract necessary number from StackTrace entries to turn return addresses + // into actual instruction addresses. + // Use llvm-symbolizer tool to symbolize the stack traces. + ErrorOr LLVMSymbolizerPathOrErr = + sys::findProgramByName("llvm-symbolizer"); + if (!LLVMSymbolizerPathOrErr) + return false; + const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr; + // We don't know argv0 or the address of main() at this point, but try + // to guess it anyway (it's possible on some platforms). + std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr); + if (MainExecutableName.empty() || + MainExecutableName.find("llvm-symbolizer") != std::string::npos) + return false; + + BumpPtrAllocator Allocator; + StringSaver StrPool(Allocator); + std::vector Modules(Depth, nullptr); + std::vector Offsets(Depth, 0); + if (!findModulesAndOffsets(StackTrace, Depth, Modules.data(), Offsets.data(), + MainExecutableName.c_str(), StrPool)) + return false; + int InputFD; + SmallString<32> InputFile, OutputFile; + sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile); + sys::fs::createTemporaryFile("symbolizer-output", "", OutputFile); + FileRemover InputRemover(InputFile.c_str()); + FileRemover OutputRemover(OutputFile.c_str()); + + { + raw_fd_ostream Input(InputFD, true); + for (int i = 0; i < Depth; i++) { + if (Modules[i]) + Input << Modules[i] << " " << (void*)Offsets[i] << "\n"; + } + } + + StringRef InputFileStr(InputFile); + StringRef OutputFileStr(OutputFile); + StringRef StderrFileStr; + const StringRef *Redirects[] = {&InputFileStr, &OutputFileStr, + &StderrFileStr}; + const char *Args[] = {"llvm-symbolizer", "--functions=linkage", "--inlining", +#ifdef LLVM_ON_WIN32 + // Pass --relative-address on Windows so that we don't + // have to add ImageBase from PE file. + // FIXME: Make this the default for llvm-symbolizer. + "--relative-address", +#endif + "--demangle", nullptr}; + int RunResult = + sys::ExecuteAndWait(LLVMSymbolizerPath, Args, nullptr, Redirects); + if (RunResult != 0) + return false; + + // This report format is based on the sanitizer stack trace printer. See + // sanitizer_stacktrace_printer.cc in compiler-rt. + auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str()); + if (!OutputBuf) + return false; + StringRef Output = OutputBuf.get()->getBuffer(); + SmallVector Lines; + Output.split(Lines, "\n"); + auto CurLine = Lines.begin(); + int frame_no = 0; + for (int i = 0; i < Depth; i++) { + if (!Modules[i]) { + OS << '#' << frame_no++ << ' ' << format_ptr(StackTrace[i]) << '\n'; + continue; + } + // Read pairs of lines (function name and file/line info) until we + // encounter empty line. + for (;;) { + if (CurLine == Lines.end()) + return false; + StringRef FunctionName = *CurLine++; + if (FunctionName.empty()) + break; + OS << '#' << frame_no++ << ' ' << format_ptr(StackTrace[i]) << ' '; + if (!FunctionName.startswith("??")) + OS << FunctionName << ' '; + if (CurLine == Lines.end()) + return false; + StringRef FileLineInfo = *CurLine++; + if (!FileLineInfo.startswith("??")) + OS << FileLineInfo; + else + OS << "(" << Modules[i] << '+' << format_hex(Offsets[i], 0) << ")"; + OS << "\n"; + } + } + return true; } // Include the platform-specific parts of this class. diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index 56c3b0f5659f..e49d1cbe0637 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/ManagedStatic.h" @@ -33,9 +34,6 @@ #include using namespace llvm; -// CreateInfoOutputFile - Return a file stream to print our output on. -namespace llvm { extern raw_ostream *CreateInfoOutputFile(); } - /// -stats - Command line option to cause transformations to emit stats about /// what they did. /// @@ -144,20 +142,18 @@ void llvm::PrintStatistics() { if (Stats.Stats.empty()) return; // Get the stream to write to. - raw_ostream &OutStream = *CreateInfoOutputFile(); - PrintStatistics(OutStream); - delete &OutStream; // Close the file. + std::unique_ptr OutStream = CreateInfoOutputFile(); + PrintStatistics(*OutStream); + #else // Check if the -stats option is set instead of checking // !Stats.Stats.empty(). In release builds, Statistics operators // do nothing, so stats are never Registered. if (Enabled) { // Get the stream to write to. - raw_ostream &OutStream = *CreateInfoOutputFile(); - OutStream << "Statistics are disabled. " - << "Build with asserts or with -DLLVM_ENABLE_STATS\n"; - OutStream.flush(); - delete &OutStream; // Close the file. + std::unique_ptr OutStream = CreateInfoOutputFile(); + (*OutStream) << "Statistics are disabled. " + << "Build with asserts or with -DLLVM_ENABLE_STATS\n"; } #endif } diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index ddece087a9e7..7ecff2964c51 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -140,37 +140,44 @@ std::string StringRef::upper() const { /// \return - The index of the first occurrence of \arg Str, or npos if not /// found. size_t StringRef::find(StringRef Str, size_t From) const { - size_t N = Str.size(); - if (N > Length) + if (From > Length) return npos; + const char *Needle = Str.data(); + size_t N = Str.size(); + if (N == 0) + return From; + + size_t Size = Length - From; + if (Size < N) + return npos; + + const char *Start = Data + From; + const char *Stop = Start + (Size - N + 1); + // For short haystacks or unsupported needles fall back to the naive algorithm - if (Length < 16 || N > 255 || N == 0) { - for (size_t e = Length - N + 1, i = std::min(From, e); i != e; ++i) - if (substr(i, N).equals(Str)) - return i; + if (Size < 16 || N > 255) { + do { + if (std::memcmp(Start, Needle, N) == 0) + return Start - Data; + ++Start; + } while (Start < Stop); return npos; } - if (From >= Length) - return npos; - // Build the bad char heuristic table, with uint8_t to reduce cache thrashing. uint8_t BadCharSkip[256]; std::memset(BadCharSkip, N, 256); for (unsigned i = 0; i != N-1; ++i) BadCharSkip[(uint8_t)Str[i]] = N-1-i; - unsigned Len = Length-From, Pos = From; - while (Len >= N) { - if (substr(Pos, N).equals(Str)) // See if this is the correct substring. - return Pos; + do { + if (std::memcmp(Start, Needle, N) == 0) + return Start - Data; // Otherwise skip the appropriate number of bytes. - uint8_t Skip = BadCharSkip[(uint8_t)(*this)[Pos+N-1]]; - Len -= Skip; - Pos += Skip; - } + Start += BadCharSkip[(uint8_t)Start[N-1]]; + } while (Start < Stop); return npos; } @@ -274,24 +281,56 @@ StringRef::size_type StringRef::find_last_not_of(StringRef Chars, } void StringRef::split(SmallVectorImpl &A, - StringRef Separators, int MaxSplit, + StringRef Separator, int MaxSplit, bool KeepEmpty) const { - StringRef rest = *this; + StringRef S = *this; - // rest.data() is used to distinguish cases like "a," that splits into - // "a" + "" and "a" that splits into "a" + 0. - for (int splits = 0; - rest.data() != nullptr && (MaxSplit < 0 || splits < MaxSplit); - ++splits) { - std::pair p = rest.split(Separators); + // Count down from MaxSplit. When MaxSplit is -1, this will just split + // "forever". This doesn't support splitting more than 2^31 times + // intentionally; if we ever want that we can make MaxSplit a 64-bit integer + // but that seems unlikely to be useful. + while (MaxSplit-- != 0) { + size_t Idx = S.find(Separator); + if (Idx == npos) + break; - if (KeepEmpty || p.first.size() != 0) - A.push_back(p.first); - rest = p.second; + // Push this split. + if (KeepEmpty || Idx > 0) + A.push_back(S.slice(0, Idx)); + + // Jump forward. + S = S.slice(Idx + Separator.size(), npos); } - // If we have a tail left, add it. - if (rest.data() != nullptr && (rest.size() != 0 || KeepEmpty)) - A.push_back(rest); + + // Push the tail. + if (KeepEmpty || !S.empty()) + A.push_back(S); +} + +void StringRef::split(SmallVectorImpl &A, char Separator, + int MaxSplit, bool KeepEmpty) const { + StringRef S = *this; + + // Count down from MaxSplit. When MaxSplit is -1, this will just split + // "forever". This doesn't support splitting more than 2^31 times + // intentionally; if we ever want that we can make MaxSplit a 64-bit integer + // but that seems unlikely to be useful. + while (MaxSplit-- != 0) { + size_t Idx = S.find(Separator); + if (Idx == npos) + break; + + // Push this split. + if (KeepEmpty || Idx > 0) + A.push_back(S.slice(0, Idx)); + + // Jump forward. + S = S.slice(Idx + 1, npos); + } + + // Push the tail. + if (KeepEmpty || !S.empty()) + A.push_back(S); } //===----------------------------------------------------------------------===// diff --git a/lib/Support/StringSaver.cpp b/lib/Support/StringSaver.cpp index d6b84e53dccd..bbc1fd276266 100644 --- a/lib/Support/StringSaver.cpp +++ b/lib/Support/StringSaver.cpp @@ -11,7 +11,7 @@ using namespace llvm; -const char *StringSaver::saveImpl(StringRef S) { +const char *StringSaver::save(StringRef S) { char *P = Alloc.Allocate(S.size() + 1); memcpy(P, S.data(), S.size()); P[S.size()] = '\0'; diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp index 4d4c041b8435..337532edbbc6 100644 --- a/lib/Support/TargetParser.cpp +++ b/lib/Support/TargetParser.cpp @@ -16,9 +16,11 @@ #include "llvm/Support/TargetParser.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" #include using namespace llvm; +using namespace ARM; namespace { @@ -26,36 +28,19 @@ namespace { // features they correspond to (use getFPUFeatures). // FIXME: TableGen this. // The entries must appear in the order listed in ARM::FPUKind for correct indexing -struct { - const char * Name; +static const struct { + const char *NameCStr; + size_t NameLength; ARM::FPUKind ID; ARM::FPUVersion FPUVersion; ARM::NeonSupportLevel NeonSupport; ARM::FPURestriction Restriction; + + StringRef getName() const { return StringRef(NameCStr, NameLength); } } FPUNames[] = { - { "invalid", ARM::FK_INVALID, ARM::FV_NONE, ARM::NS_None, ARM::FR_None}, - { "none", ARM::FK_NONE, ARM::FV_NONE, ARM::NS_None, ARM::FR_None}, - { "vfp", ARM::FK_VFP, ARM::FV_VFPV2, ARM::NS_None, ARM::FR_None}, - { "vfpv2", ARM::FK_VFPV2, ARM::FV_VFPV2, ARM::NS_None, ARM::FR_None}, - { "vfpv3", ARM::FK_VFPV3, ARM::FV_VFPV3, ARM::NS_None, ARM::FR_None}, - { "vfpv3-fp16", ARM::FK_VFPV3_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None, ARM::FR_None}, - { "vfpv3-d16", ARM::FK_VFPV3_D16, ARM::FV_VFPV3, ARM::NS_None, ARM::FR_D16}, - { "vfpv3-d16-fp16", ARM::FK_VFPV3_D16_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None, ARM::FR_D16}, - { "vfpv3xd", ARM::FK_VFPV3XD, ARM::FV_VFPV3, ARM::NS_None, ARM::FR_SP_D16}, - { "vfpv3xd-fp16", ARM::FK_VFPV3XD_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None, ARM::FR_SP_D16}, - { "vfpv4", ARM::FK_VFPV4, ARM::FV_VFPV4, ARM::NS_None, ARM::FR_None}, - { "vfpv4-d16", ARM::FK_VFPV4_D16, ARM::FV_VFPV4, ARM::NS_None, ARM::FR_D16}, - { "fpv4-sp-d16", ARM::FK_FPV4_SP_D16, ARM::FV_VFPV4, ARM::NS_None, ARM::FR_SP_D16}, - { "fpv5-d16", ARM::FK_FPV5_D16, ARM::FV_VFPV5, ARM::NS_None, ARM::FR_D16}, - { "fpv5-sp-d16", ARM::FK_FPV5_SP_D16, ARM::FV_VFPV5, ARM::NS_None, ARM::FR_SP_D16}, - { "fp-armv8", ARM::FK_FP_ARMV8, ARM::FV_VFPV5, ARM::NS_None, ARM::FR_None}, - { "neon", ARM::FK_NEON, ARM::FV_VFPV3, ARM::NS_Neon, ARM::FR_None}, - { "neon-fp16", ARM::FK_NEON_FP16, ARM::FV_VFPV3_FP16, ARM::NS_Neon, ARM::FR_None}, - { "neon-vfpv4", ARM::FK_NEON_VFPV4, ARM::FV_VFPV4, ARM::NS_Neon, ARM::FR_None}, - { "neon-fp-armv8", ARM::FK_NEON_FP_ARMV8, ARM::FV_VFPV5, ARM::NS_Neon, ARM::FR_None}, - { "crypto-neon-fp-armv8", - ARM::FK_CRYPTO_NEON_FP_ARMV8, ARM::FV_VFPV5, ARM::NS_Crypto, ARM::FR_None}, - { "softvfp", ARM::FK_SOFTVFP, ARM::FV_NONE, ARM::NS_None, ARM::FR_None}, +#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION) \ + { NAME, sizeof(NAME) - 1, KIND, VERSION, NEON_SUPPORT, RESTRICTION }, +#include "llvm/Support/ARMTargetParser.def" }; // List of canonical arch names (use getArchSynonym). @@ -66,165 +51,79 @@ struct { // of the triples and are not conforming with their official names. // Check to see if the expectation should be changed. // FIXME: TableGen this. -struct { - const char *Name; +static const struct { + const char *NameCStr; + size_t NameLength; + const char *CPUAttrCStr; + size_t CPUAttrLength; + const char *SubArchCStr; + size_t SubArchLength; + unsigned DefaultFPU; + unsigned ArchBaseExtensions; ARM::ArchKind ID; - const char *CPUAttr; // CPU class in build attributes. - const char *SubArch; // Sub-Arch name. ARMBuildAttrs::CPUArch ArchAttr; // Arch ID in build attributes. + + StringRef getName() const { return StringRef(NameCStr, NameLength); } + + // CPU class in build attributes. + StringRef getCPUAttr() const { return StringRef(CPUAttrCStr, CPUAttrLength); } + + // Sub-Arch name. + StringRef getSubArch() const { return StringRef(SubArchCStr, SubArchLength); } } ARCHNames[] = { - { "invalid", ARM::AK_INVALID, nullptr, nullptr, ARMBuildAttrs::CPUArch::Pre_v4 }, - { "armv2", ARM::AK_ARMV2, "2", "v2", ARMBuildAttrs::CPUArch::Pre_v4 }, - { "armv2a", ARM::AK_ARMV2A, "2A", "v2a", ARMBuildAttrs::CPUArch::Pre_v4 }, - { "armv3", ARM::AK_ARMV3, "3", "v3", ARMBuildAttrs::CPUArch::Pre_v4 }, - { "armv3m", ARM::AK_ARMV3M, "3M", "v3m", ARMBuildAttrs::CPUArch::Pre_v4 }, - { "armv4", ARM::AK_ARMV4, "4", "v4", ARMBuildAttrs::CPUArch::v4 }, - { "armv4t", ARM::AK_ARMV4T, "4T", "v4t", ARMBuildAttrs::CPUArch::v4T }, - { "armv5t", ARM::AK_ARMV5T, "5T", "v5", ARMBuildAttrs::CPUArch::v5T }, - { "armv5te", ARM::AK_ARMV5TE, "5TE", "v5e", ARMBuildAttrs::CPUArch::v5TE }, - { "armv5tej", ARM::AK_ARMV5TEJ, "5TEJ", "v5e", ARMBuildAttrs::CPUArch::v5TEJ }, - { "armv6", ARM::AK_ARMV6, "6", "v6", ARMBuildAttrs::CPUArch::v6 }, - { "armv6k", ARM::AK_ARMV6K, "6K", "v6k", ARMBuildAttrs::CPUArch::v6K }, - { "armv6t2", ARM::AK_ARMV6T2, "6T2", "v6t2", ARMBuildAttrs::CPUArch::v6T2 }, - { "armv6z", ARM::AK_ARMV6Z, "6Z", "v6z", ARMBuildAttrs::CPUArch::v6KZ }, - { "armv6zk", ARM::AK_ARMV6ZK, "6ZK", "v6zk", ARMBuildAttrs::CPUArch::v6KZ }, - { "armv6-m", ARM::AK_ARMV6M, "6-M", "v6m", ARMBuildAttrs::CPUArch::v6_M }, - { "armv6s-m", ARM::AK_ARMV6SM, "6S-M", "v6sm", ARMBuildAttrs::CPUArch::v6S_M }, - { "armv7-a", ARM::AK_ARMV7A, "7-A", "v7", ARMBuildAttrs::CPUArch::v7 }, - { "armv7-r", ARM::AK_ARMV7R, "7-R", "v7r", ARMBuildAttrs::CPUArch::v7 }, - { "armv7-m", ARM::AK_ARMV7M, "7-M", "v7m", ARMBuildAttrs::CPUArch::v7 }, - { "armv7e-m", ARM::AK_ARMV7EM, "7E-M", "v7em", ARMBuildAttrs::CPUArch::v7E_M }, - { "armv8-a", ARM::AK_ARMV8A, "8-A", "v8", ARMBuildAttrs::CPUArch::v8 }, - { "armv8.1-a", ARM::AK_ARMV8_1A, "8.1-A", "v8.1a", ARMBuildAttrs::CPUArch::v8 }, - // Non-standard Arch names. - { "iwmmxt", ARM::AK_IWMMXT, "iwmmxt", "", ARMBuildAttrs::CPUArch::v5TE }, - { "iwmmxt2", ARM::AK_IWMMXT2, "iwmmxt2", "", ARMBuildAttrs::CPUArch::v5TE }, - { "xscale", ARM::AK_XSCALE, "xscale", "", ARMBuildAttrs::CPUArch::v5TE }, - { "armv5", ARM::AK_ARMV5, "5T", "v5", ARMBuildAttrs::CPUArch::v5T }, - { "armv5e", ARM::AK_ARMV5E, "5TE", "v5e", ARMBuildAttrs::CPUArch::v5TE }, - { "armv6j", ARM::AK_ARMV6J, "6J", "v6", ARMBuildAttrs::CPUArch::v6 }, - { "armv6hl", ARM::AK_ARMV6HL, "6-M", "v6hl", ARMBuildAttrs::CPUArch::v6_M }, - { "armv7", ARM::AK_ARMV7, "7", "v7", ARMBuildAttrs::CPUArch::v7 }, - { "armv7l", ARM::AK_ARMV7L, "7-L", "v7l", ARMBuildAttrs::CPUArch::v7 }, - { "armv7hl", ARM::AK_ARMV7HL, "7-L", "v7hl", ARMBuildAttrs::CPUArch::v7 }, - { "armv7s", ARM::AK_ARMV7S, "7-S", "v7s", ARMBuildAttrs::CPUArch::v7 } +#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) \ + {NAME, sizeof(NAME) - 1, CPU_ATTR, sizeof(CPU_ATTR) - 1, SUB_ARCH, \ + sizeof(SUB_ARCH) - 1, ARCH_FPU, ARCH_BASE_EXT, ID, ARCH_ATTR}, +#include "llvm/Support/ARMTargetParser.def" }; + // List of Arch Extension names. // FIXME: TableGen this. -struct { - const char *Name; - ARM::ArchExtKind ID; +static const struct { + const char *NameCStr; + size_t NameLength; + unsigned ID; + const char *Feature; + const char *NegFeature; + + StringRef getName() const { return StringRef(NameCStr, NameLength); } } ARCHExtNames[] = { - { "invalid", ARM::AEK_INVALID }, - { "crc", ARM::AEK_CRC }, - { "crypto", ARM::AEK_CRYPTO }, - { "fp", ARM::AEK_FP }, - { "idiv", ARM::AEK_HWDIV }, - { "mp", ARM::AEK_MP }, - { "simd", ARM::AEK_SIMD }, - { "sec", ARM::AEK_SEC }, - { "virt", ARM::AEK_VIRT }, - { "os", ARM::AEK_OS }, - { "iwmmxt", ARM::AEK_IWMMXT }, - { "iwmmxt2", ARM::AEK_IWMMXT2 }, - { "maverick", ARM::AEK_MAVERICK }, - { "xscale", ARM::AEK_XSCALE } +#define ARM_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \ + { NAME, sizeof(NAME) - 1, ID, FEATURE, NEGFEATURE }, +#include "llvm/Support/ARMTargetParser.def" }; + +// List of HWDiv names (use getHWDivSynonym) and which architectural +// features they correspond to (use getHWDivFeatures). +// FIXME: TableGen this. +static const struct { + const char *NameCStr; + size_t NameLength; + unsigned ID; + + StringRef getName() const { return StringRef(NameCStr, NameLength); } +} HWDivNames[] = { +#define ARM_HW_DIV_NAME(NAME, ID) { NAME, sizeof(NAME) - 1, ID }, +#include "llvm/Support/ARMTargetParser.def" +}; + // List of CPU names and their arches. // The same CPU can have multiple arches and can be default on multiple arches. // When finding the Arch for a CPU, first-found prevails. Sort them accordingly. // When this becomes table-generated, we'd probably need two tables. // FIXME: TableGen this. -struct { - const char *Name; +static const struct { + const char *NameCStr; + size_t NameLength; ARM::ArchKind ArchID; - bool Default; + bool Default; // is $Name the default CPU for $ArchID ? + unsigned DefaultExtensions; + + StringRef getName() const { return StringRef(NameCStr, NameLength); } } CPUNames[] = { - { "arm2", ARM::AK_ARMV2, true }, - { "arm3", ARM::AK_ARMV2A, true }, - { "arm6", ARM::AK_ARMV3, true }, - { "arm7m", ARM::AK_ARMV3M, true }, - { "arm8", ARM::AK_ARMV4, false }, - { "arm810", ARM::AK_ARMV4, false }, - { "strongarm", ARM::AK_ARMV4, true }, - { "strongarm110", ARM::AK_ARMV4, false }, - { "strongarm1100", ARM::AK_ARMV4, false }, - { "strongarm1110", ARM::AK_ARMV4, false }, - { "arm7tdmi", ARM::AK_ARMV4T, true }, - { "arm7tdmi-s", ARM::AK_ARMV4T, false }, - { "arm710t", ARM::AK_ARMV4T, false }, - { "arm720t", ARM::AK_ARMV4T, false }, - { "arm9", ARM::AK_ARMV4T, false }, - { "arm9tdmi", ARM::AK_ARMV4T, false }, - { "arm920", ARM::AK_ARMV4T, false }, - { "arm920t", ARM::AK_ARMV4T, false }, - { "arm922t", ARM::AK_ARMV4T, false }, - { "arm9312", ARM::AK_ARMV4T, false }, - { "arm940t", ARM::AK_ARMV4T, false }, - { "ep9312", ARM::AK_ARMV4T, false }, - { "arm10tdmi", ARM::AK_ARMV5T, true }, - { "arm1020t", ARM::AK_ARMV5T, false }, - { "arm9e", ARM::AK_ARMV5TE, false }, - { "arm946e-s", ARM::AK_ARMV5TE, false }, - { "arm966e-s", ARM::AK_ARMV5TE, false }, - { "arm968e-s", ARM::AK_ARMV5TE, false }, - { "arm10e", ARM::AK_ARMV5TE, false }, - { "arm1020e", ARM::AK_ARMV5TE, false }, - { "arm1022e", ARM::AK_ARMV5TE, true }, - { "iwmmxt", ARM::AK_ARMV5TE, false }, - { "xscale", ARM::AK_ARMV5TE, false }, - { "arm926ej-s", ARM::AK_ARMV5TEJ, true }, - { "arm1136jf-s", ARM::AK_ARMV6, true }, - { "arm1176j-s", ARM::AK_ARMV6K, false }, - { "arm1176jz-s", ARM::AK_ARMV6K, false }, - { "mpcore", ARM::AK_ARMV6K, false }, - { "mpcorenovfp", ARM::AK_ARMV6K, false }, - { "arm1176jzf-s", ARM::AK_ARMV6K, true }, - { "arm1176jzf-s", ARM::AK_ARMV6Z, true }, - { "arm1176jzf-s", ARM::AK_ARMV6ZK, true }, - { "arm1156t2-s", ARM::AK_ARMV6T2, true }, - { "arm1156t2f-s", ARM::AK_ARMV6T2, false }, - { "cortex-m0", ARM::AK_ARMV6M, true }, - { "cortex-m0plus", ARM::AK_ARMV6M, false }, - { "cortex-m1", ARM::AK_ARMV6M, false }, - { "sc000", ARM::AK_ARMV6M, false }, - { "cortex-a5", ARM::AK_ARMV7A, false }, - { "cortex-a7", ARM::AK_ARMV7A, false }, - { "cortex-a8", ARM::AK_ARMV7A, true }, - { "cortex-a9", ARM::AK_ARMV7A, false }, - { "cortex-a12", ARM::AK_ARMV7A, false }, - { "cortex-a15", ARM::AK_ARMV7A, false }, - { "cortex-a17", ARM::AK_ARMV7A, false }, - { "krait", ARM::AK_ARMV7A, false }, - { "cortex-r4", ARM::AK_ARMV7R, true }, - { "cortex-r4f", ARM::AK_ARMV7R, false }, - { "cortex-r5", ARM::AK_ARMV7R, false }, - { "cortex-r7", ARM::AK_ARMV7R, false }, - { "sc300", ARM::AK_ARMV7M, false }, - { "cortex-m3", ARM::AK_ARMV7M, true }, - { "cortex-m4", ARM::AK_ARMV7EM, true }, - { "cortex-m7", ARM::AK_ARMV7EM, false }, - { "cortex-a53", ARM::AK_ARMV8A, true }, - { "cortex-a57", ARM::AK_ARMV8A, false }, - { "cortex-a72", ARM::AK_ARMV8A, false }, - { "cyclone", ARM::AK_ARMV8A, false }, - { "generic", ARM::AK_ARMV8_1A, true }, - // Non-standard Arch names. - { "iwmmxt", ARM::AK_IWMMXT, true }, - { "xscale", ARM::AK_XSCALE, true }, - { "arm10tdmi", ARM::AK_ARMV5, true }, - { "arm1022e", ARM::AK_ARMV5E, true }, - { "arm1136j-s", ARM::AK_ARMV6J, true }, - { "arm1136jz-s", ARM::AK_ARMV6J, false }, - { "cortex-m0", ARM::AK_ARMV6SM, true }, - { "arm1176jzf-s", ARM::AK_ARMV6HL, true }, - { "cortex-a8", ARM::AK_ARMV7, true }, - { "cortex-a8", ARM::AK_ARMV7L, true }, - { "cortex-a8", ARM::AK_ARMV7HL, true }, - { "cortex-m4", ARM::AK_ARMV7EM, true }, - { "swift", ARM::AK_ARMV7S, true }, - // Invalid CPU - { "invalid", ARM::AK_INVALID, true } +#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \ + { NAME, sizeof(NAME) - 1, ID, IS_DEFAULT, DEFAULT_EXT }, +#include "llvm/Support/ARMTargetParser.def" }; } // namespace @@ -233,33 +132,93 @@ struct { // Information by ID // ======================================================= // -const char *ARMTargetParser::getFPUName(unsigned FPUKind) { +StringRef llvm::ARM::getFPUName(unsigned FPUKind) { if (FPUKind >= ARM::FK_LAST) - return nullptr; - return FPUNames[FPUKind].Name; + return StringRef(); + return FPUNames[FPUKind].getName(); } -unsigned ARMTargetParser::getFPUVersion(unsigned FPUKind) { +unsigned llvm::ARM::getFPUVersion(unsigned FPUKind) { if (FPUKind >= ARM::FK_LAST) return 0; return FPUNames[FPUKind].FPUVersion; } -unsigned ARMTargetParser::getFPUNeonSupportLevel(unsigned FPUKind) { +unsigned llvm::ARM::getFPUNeonSupportLevel(unsigned FPUKind) { if (FPUKind >= ARM::FK_LAST) return 0; return FPUNames[FPUKind].NeonSupport; } -unsigned ARMTargetParser::getFPURestriction(unsigned FPUKind) { +unsigned llvm::ARM::getFPURestriction(unsigned FPUKind) { if (FPUKind >= ARM::FK_LAST) return 0; return FPUNames[FPUKind].Restriction; } -bool ARMTargetParser::getFPUFeatures(unsigned FPUKind, +unsigned llvm::ARM::getDefaultFPU(StringRef CPU, unsigned ArchKind) { + if (CPU == "generic") + return ARCHNames[ArchKind].DefaultFPU; + + return StringSwitch(CPU) +#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \ + .Case(NAME, DEFAULT_FPU) +#include "llvm/Support/ARMTargetParser.def" + .Default(ARM::FK_INVALID); +} + +unsigned llvm::ARM::getDefaultExtensions(StringRef CPU, unsigned ArchKind) { + if (CPU == "generic") + return ARCHNames[ArchKind].ArchBaseExtensions; + + return StringSwitch(CPU) +#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \ + .Case(NAME, ARCHNames[ID].ArchBaseExtensions | DEFAULT_EXT) +#include "llvm/Support/ARMTargetParser.def" + .Default(ARM::AEK_INVALID); +} + +bool llvm::ARM::getHWDivFeatures(unsigned HWDivKind, + std::vector &Features) { + + if (HWDivKind == ARM::AEK_INVALID) + return false; + + if (HWDivKind & ARM::AEK_HWDIVARM) + Features.push_back("+hwdiv-arm"); + else + Features.push_back("-hwdiv-arm"); + + if (HWDivKind & ARM::AEK_HWDIV) + Features.push_back("+hwdiv"); + else + Features.push_back("-hwdiv"); + + return true; +} + +bool llvm::ARM::getExtensionFeatures(unsigned Extensions, std::vector &Features) { + if (Extensions == ARM::AEK_INVALID) + return false; + + if (Extensions & ARM::AEK_CRC) + Features.push_back("+crc"); + else + Features.push_back("-crc"); + + if (Extensions & ARM::AEK_DSP) + Features.push_back("+dsp"); + else + Features.push_back("-dsp"); + + return getHWDivFeatures(Extensions, Features); +} + +bool llvm::ARM::getFPUFeatures(unsigned FPUKind, + std::vector &Features) { + if (FPUKind >= ARM::FK_LAST || FPUKind == ARM::FK_INVALID) return false; @@ -323,6 +282,7 @@ bool ARMTargetParser::getFPUFeatures(unsigned FPUKind, // crypto includes neon, so we handle this similarly to FPU version. switch (FPUNames[FPUKind].NeonSupport) { case ARM::NS_Crypto: + Features.push_back("+neon"); Features.push_back("+crypto"); break; case ARM::NS_Neon: @@ -338,88 +298,127 @@ bool ARMTargetParser::getFPUFeatures(unsigned FPUKind, return true; } -const char *ARMTargetParser::getArchName(unsigned ArchKind) { +StringRef llvm::ARM::getArchName(unsigned ArchKind) { if (ArchKind >= ARM::AK_LAST) - return nullptr; - return ARCHNames[ArchKind].Name; + return StringRef(); + return ARCHNames[ArchKind].getName(); } -const char *ARMTargetParser::getCPUAttr(unsigned ArchKind) { +StringRef llvm::ARM::getCPUAttr(unsigned ArchKind) { if (ArchKind >= ARM::AK_LAST) - return nullptr; - return ARCHNames[ArchKind].CPUAttr; + return StringRef(); + return ARCHNames[ArchKind].getCPUAttr(); } -const char *ARMTargetParser::getSubArch(unsigned ArchKind) { +StringRef llvm::ARM::getSubArch(unsigned ArchKind) { if (ArchKind >= ARM::AK_LAST) - return nullptr; - return ARCHNames[ArchKind].SubArch; + return StringRef(); + return ARCHNames[ArchKind].getSubArch(); } -unsigned ARMTargetParser::getArchAttr(unsigned ArchKind) { +unsigned llvm::ARM::getArchAttr(unsigned ArchKind) { if (ArchKind >= ARM::AK_LAST) return ARMBuildAttrs::CPUArch::Pre_v4; return ARCHNames[ArchKind].ArchAttr; } -const char *ARMTargetParser::getArchExtName(unsigned ArchExtKind) { - if (ArchExtKind >= ARM::AEK_LAST) - return nullptr; - return ARCHExtNames[ArchExtKind].Name; +StringRef llvm::ARM::getArchExtName(unsigned ArchExtKind) { + for (const auto AE : ARCHExtNames) { + if (ArchExtKind == AE.ID) + return AE.getName(); + } + return StringRef(); } -const char *ARMTargetParser::getDefaultCPU(StringRef Arch) { +const char *llvm::ARM::getArchExtFeature(StringRef ArchExt) { + if (ArchExt.startswith("no")) { + StringRef ArchExtBase(ArchExt.substr(2)); + for (const auto AE : ARCHExtNames) { + if (AE.NegFeature && ArchExtBase == AE.getName()) + return AE.NegFeature; + } + } + for (const auto AE : ARCHExtNames) { + if (AE.Feature && ArchExt == AE.getName()) + return AE.Feature; + } + + return nullptr; +} + +StringRef llvm::ARM::getHWDivName(unsigned HWDivKind) { + for (const auto D : HWDivNames) { + if (HWDivKind == D.ID) + return D.getName(); + } + return StringRef(); +} + +StringRef llvm::ARM::getDefaultCPU(StringRef Arch) { unsigned AK = parseArch(Arch); if (AK == ARM::AK_INVALID) - return nullptr; + return StringRef(); // Look for multiple AKs to find the default for pair AK+Name. for (const auto CPU : CPUNames) { if (CPU.ArchID == AK && CPU.Default) - return CPU.Name; + return CPU.getName(); } - return nullptr; + + // If we can't find a default then target the architecture instead + return "generic"; } // ======================================================= // // Parsers // ======================================================= // -StringRef ARMTargetParser::getFPUSynonym(StringRef FPU) { - return StringSwitch(FPU) - .Cases("fpa", "fpe2", "fpe3", "maverick", "invalid") // Unsupported - .Case("vfp2", "vfpv2") - .Case("vfp3", "vfpv3") - .Case("vfp4", "vfpv4") - .Case("vfp3-d16", "vfpv3-d16") - .Case("vfp4-d16", "vfpv4-d16") - .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16") - .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16") - .Case("fp5-sp-d16", "fpv5-sp-d16") - .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16") - // FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3. - .Case("neon-vfpv3", "neon") - .Default(FPU); +static StringRef getHWDivSynonym(StringRef HWDiv) { + return StringSwitch(HWDiv) + .Case("thumb,arm", "arm,thumb") + .Default(HWDiv); } -StringRef ARMTargetParser::getArchSynonym(StringRef Arch) { +static StringRef getFPUSynonym(StringRef FPU) { + return StringSwitch(FPU) + .Cases("fpa", "fpe2", "fpe3", "maverick", "invalid") // Unsupported + .Case("vfp2", "vfpv2") + .Case("vfp3", "vfpv3") + .Case("vfp4", "vfpv4") + .Case("vfp3-d16", "vfpv3-d16") + .Case("vfp4-d16", "vfpv4-d16") + .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16") + .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16") + .Case("fp5-sp-d16", "fpv5-sp-d16") + .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16") + // FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3. + .Case("neon-vfpv3", "neon") + .Default(FPU); +} + +static StringRef getArchSynonym(StringRef Arch) { return StringSwitch(Arch) - .Case("v6sm", "v6s-m") - .Case("v6m", "v6-m") - .Case("v7a", "v7-a") - .Case("v7r", "v7-r") - .Case("v7m", "v7-m") - .Case("v7em", "v7e-m") - .Cases("v8", "v8a", "aarch64", "arm64", "v8-a") - .Case("v8.1a", "v8.1-a") - .Default(Arch); + .Case("v5", "v5t") + .Case("v5e", "v5te") + .Case("v6j", "v6") + .Case("v6hl", "v6k") + .Cases("v6m", "v6sm", "v6s-m", "v6-m") + .Cases("v6z", "v6zk", "v6kz") + .Cases("v7", "v7a", "v7hl", "v7l", "v7-a") + .Case("v7r", "v7-r") + .Case("v7m", "v7-m") + .Case("v7em", "v7e-m") + .Cases("v8", "v8a", "aarch64", "arm64", "v8-a") + .Case("v8.1a", "v8.1-a") + .Case("v8.2a", "v8.2-a") + .Default(Arch); } // MArch is expected to be of the form (arm|thumb)?(eb)?(v.+)?(eb)?, but // (iwmmxt|xscale)(eb)? is also permitted. If the former, return // "v.+", if the latter, return unmodified string, minus 'eb'. // If invalid, return empty string. -StringRef ARMTargetParser::getCanonicalArchName(StringRef Arch) { +StringRef llvm::ARM::getCanonicalArchName(StringRef Arch) { size_t offset = StringRef::npos; StringRef A = Arch; StringRef Error = ""; @@ -436,7 +435,7 @@ StringRef ARMTargetParser::getCanonicalArchName(StringRef Arch) { // AArch64 uses "_be", not "eb" suffix. if (A.find("eb") != StringRef::npos) return Error; - if (A.substr(offset,3) == "_be") + if (A.substr(offset, 3) == "_be") offset += 3; } @@ -456,7 +455,7 @@ StringRef ARMTargetParser::getCanonicalArchName(StringRef Arch) { // Only match non-marketing names if (offset != StringRef::npos) { - // Must start with 'vN'. + // Must start with 'vN'. if (A[0] != 'v' || !std::isdigit(A[1])) return Error; // Can't have an extra 'eb'. @@ -468,56 +467,64 @@ StringRef ARMTargetParser::getCanonicalArchName(StringRef Arch) { return A; } -unsigned ARMTargetParser::parseFPU(StringRef FPU) { +unsigned llvm::ARM::parseHWDiv(StringRef HWDiv) { + StringRef Syn = getHWDivSynonym(HWDiv); + for (const auto D : HWDivNames) { + if (Syn == D.getName()) + return D.ID; + } + return ARM::AEK_INVALID; +} + +unsigned llvm::ARM::parseFPU(StringRef FPU) { StringRef Syn = getFPUSynonym(FPU); for (const auto F : FPUNames) { - if (Syn == F.Name) + if (Syn == F.getName()) return F.ID; } return ARM::FK_INVALID; } // Allows partial match, ex. "v7a" matches "armv7a". -unsigned ARMTargetParser::parseArch(StringRef Arch) { +unsigned llvm::ARM::parseArch(StringRef Arch) { Arch = getCanonicalArchName(Arch); StringRef Syn = getArchSynonym(Arch); for (const auto A : ARCHNames) { - if (StringRef(A.Name).endswith(Syn)) + if (A.getName().endswith(Syn)) return A.ID; } return ARM::AK_INVALID; } -unsigned ARMTargetParser::parseArchExt(StringRef ArchExt) { +unsigned llvm::ARM::parseArchExt(StringRef ArchExt) { for (const auto A : ARCHExtNames) { - if (ArchExt == A.Name) + if (ArchExt == A.getName()) return A.ID; } return ARM::AEK_INVALID; } -unsigned ARMTargetParser::parseCPUArch(StringRef CPU) { +unsigned llvm::ARM::parseCPUArch(StringRef CPU) { for (const auto C : CPUNames) { - if (CPU == C.Name) + if (CPU == C.getName()) return C.ArchID; } return ARM::AK_INVALID; } // ARM, Thumb, AArch64 -unsigned ARMTargetParser::parseArchISA(StringRef Arch) { +unsigned llvm::ARM::parseArchISA(StringRef Arch) { return StringSwitch(Arch) .StartsWith("aarch64", ARM::IK_AARCH64) - .StartsWith("arm64", ARM::IK_AARCH64) - .StartsWith("thumb", ARM::IK_THUMB) - .StartsWith("arm", ARM::IK_ARM) + .StartsWith("arm64", ARM::IK_AARCH64) + .StartsWith("thumb", ARM::IK_THUMB) + .StartsWith("arm", ARM::IK_ARM) .Default(ARM::EK_INVALID); } // Little/Big endian -unsigned ARMTargetParser::parseArchEndian(StringRef Arch) { - if (Arch.startswith("armeb") || - Arch.startswith("thumbeb") || +unsigned llvm::ARM::parseArchEndian(StringRef Arch) { + if (Arch.startswith("armeb") || Arch.startswith("thumbeb") || Arch.startswith("aarch64_be")) return ARM::EK_BIG; @@ -535,29 +542,29 @@ unsigned ARMTargetParser::parseArchEndian(StringRef Arch) { } // Profile A/R/M -unsigned ARMTargetParser::parseArchProfile(StringRef Arch) { +unsigned llvm::ARM::parseArchProfile(StringRef Arch) { Arch = getCanonicalArchName(Arch); - switch(parseArch(Arch)) { + switch (parseArch(Arch)) { case ARM::AK_ARMV6M: case ARM::AK_ARMV7M: - case ARM::AK_ARMV6SM: case ARM::AK_ARMV7EM: return ARM::PK_M; case ARM::AK_ARMV7R: return ARM::PK_R; - case ARM::AK_ARMV7: case ARM::AK_ARMV7A: + case ARM::AK_ARMV7K: case ARM::AK_ARMV8A: case ARM::AK_ARMV8_1A: + case ARM::AK_ARMV8_2A: return ARM::PK_A; } return ARM::PK_INVALID; } // Version number (ex. v7 = 7). -unsigned ARMTargetParser::parseArchVersion(StringRef Arch) { +unsigned llvm::ARM::parseArchVersion(StringRef Arch) { Arch = getCanonicalArchName(Arch); - switch(parseArch(Arch)) { + switch (parseArch(Arch)) { case ARM::AK_ARMV2: case ARM::AK_ARMV2A: return 2; @@ -567,36 +574,29 @@ unsigned ARMTargetParser::parseArchVersion(StringRef Arch) { case ARM::AK_ARMV4: case ARM::AK_ARMV4T: return 4; - case ARM::AK_ARMV5: case ARM::AK_ARMV5T: case ARM::AK_ARMV5TE: case ARM::AK_IWMMXT: case ARM::AK_IWMMXT2: case ARM::AK_XSCALE: - case ARM::AK_ARMV5E: case ARM::AK_ARMV5TEJ: return 5; case ARM::AK_ARMV6: - case ARM::AK_ARMV6J: case ARM::AK_ARMV6K: case ARM::AK_ARMV6T2: - case ARM::AK_ARMV6Z: - case ARM::AK_ARMV6ZK: + case ARM::AK_ARMV6KZ: case ARM::AK_ARMV6M: - case ARM::AK_ARMV6SM: - case ARM::AK_ARMV6HL: return 6; - case ARM::AK_ARMV7: case ARM::AK_ARMV7A: case ARM::AK_ARMV7R: case ARM::AK_ARMV7M: - case ARM::AK_ARMV7L: - case ARM::AK_ARMV7HL: case ARM::AK_ARMV7S: case ARM::AK_ARMV7EM: + case ARM::AK_ARMV7K: return 7; case ARM::AK_ARMV8A: case ARM::AK_ARMV8_1A: + case ARM::AK_ARMV8_2A: return 8; } return 0; diff --git a/lib/Support/ThreadPool.cpp b/lib/Support/ThreadPool.cpp new file mode 100644 index 000000000000..d4dcb2ee96df --- /dev/null +++ b/lib/Support/ThreadPool.cpp @@ -0,0 +1,155 @@ +//==-- llvm/Support/ThreadPool.cpp - A ThreadPool implementation -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a crude C++11 based thread pool. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ThreadPool.h" + +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#if LLVM_ENABLE_THREADS + +// Default to std::thread::hardware_concurrency +ThreadPool::ThreadPool() : ThreadPool(std::thread::hardware_concurrency()) {} + +ThreadPool::ThreadPool(unsigned ThreadCount) + : ActiveThreads(0), EnableFlag(true) { + // Create ThreadCount threads that will loop forever, wait on QueueCondition + // for tasks to be queued or the Pool to be destroyed. + Threads.reserve(ThreadCount); + for (unsigned ThreadID = 0; ThreadID < ThreadCount; ++ThreadID) { + Threads.emplace_back([&] { + while (true) { + PackagedTaskTy Task; + { + std::unique_lock LockGuard(QueueLock); + // Wait for tasks to be pushed in the queue + QueueCondition.wait(LockGuard, + [&] { return !EnableFlag || !Tasks.empty(); }); + // Exit condition + if (!EnableFlag && Tasks.empty()) + return; + // Yeah, we have a task, grab it and release the lock on the queue + + // We first need to signal that we are active before popping the queue + // in order for wait() to properly detect that even if the queue is + // empty, there is still a task in flight. + { + ++ActiveThreads; + std::unique_lock LockGuard(CompletionLock); + } + Task = std::move(Tasks.front()); + Tasks.pop(); + } + // Run the task we just grabbed +#ifndef _MSC_VER + Task(); +#else + Task(/* unused */ false); +#endif + + { + // Adjust `ActiveThreads`, in case someone waits on ThreadPool::wait() + std::unique_lock LockGuard(CompletionLock); + --ActiveThreads; + } + + // Notify task completion, in case someone waits on ThreadPool::wait() + CompletionCondition.notify_all(); + } + }); + } +} + +void ThreadPool::wait() { + // Wait for all threads to complete and the queue to be empty + std::unique_lock LockGuard(CompletionLock); + CompletionCondition.wait(LockGuard, + [&] { return Tasks.empty() && !ActiveThreads; }); +} + +std::shared_future ThreadPool::asyncImpl(TaskTy Task) { + /// Wrap the Task in a packaged_task to return a future object. + PackagedTaskTy PackagedTask(std::move(Task)); + auto Future = PackagedTask.get_future(); + { + // Lock the queue and push the new task + std::unique_lock LockGuard(QueueLock); + + // Don't allow enqueueing after disabling the pool + assert(EnableFlag && "Queuing a thread during ThreadPool destruction"); + + Tasks.push(std::move(PackagedTask)); + } + QueueCondition.notify_one(); + return Future.share(); +} + +// The destructor joins all threads, waiting for completion. +ThreadPool::~ThreadPool() { + { + std::unique_lock LockGuard(QueueLock); + EnableFlag = false; + } + QueueCondition.notify_all(); + for (auto &Worker : Threads) + Worker.join(); +} + +#else // LLVM_ENABLE_THREADS Disabled + +ThreadPool::ThreadPool() : ThreadPool(0) {} + +// No threads are launched, issue a warning if ThreadCount is not 0 +ThreadPool::ThreadPool(unsigned ThreadCount) + : ActiveThreads(0) { + if (ThreadCount) { + errs() << "Warning: request a ThreadPool with " << ThreadCount + << " threads, but LLVM_ENABLE_THREADS has been turned off\n"; + } +} + +void ThreadPool::wait() { + // Sequential implementation running the tasks + while (!Tasks.empty()) { + auto Task = std::move(Tasks.front()); + Tasks.pop(); +#ifndef _MSC_VER + Task(); +#else + Task(/* unused */ false); +#endif + } +} + +std::shared_future ThreadPool::asyncImpl(TaskTy Task) { +#ifndef _MSC_VER + // Get a Future with launch::deferred execution using std::async + auto Future = std::async(std::launch::deferred, std::move(Task)).share(); + // Wrap the future so that both ThreadPool::wait() can operate and the + // returned future can be sync'ed on. + PackagedTaskTy PackagedTask([Future]() { Future.get(); }); +#else + auto Future = std::async(std::launch::deferred, std::move(Task), false).share(); + PackagedTaskTy PackagedTask([Future](bool) -> bool { Future.get(); return false; }); +#endif + Tasks.push(std::move(PackagedTask)); + return Future; +} + +ThreadPool::~ThreadPool() { + wait(); +} + +#endif diff --git a/lib/Support/TimeValue.cpp b/lib/Support/TimeValue.cpp index 136b93eceefa..94a4c011693c 100644 --- a/lib/Support/TimeValue.cpp +++ b/lib/Support/TimeValue.cpp @@ -15,6 +15,7 @@ #include "llvm/Config/config.h" namespace llvm { + using namespace sys; const TimeValue::SecondsType @@ -22,8 +23,7 @@ const TimeValue::SecondsType const TimeValue::SecondsType TimeValue::Win32ZeroTimeSeconds = -12591158400ULL; -void -TimeValue::normalize( void ) { +void TimeValue::normalize() { if ( nanos_ >= NANOSECONDS_PER_SECOND ) { do { seconds_++; @@ -45,7 +45,7 @@ TimeValue::normalize( void ) { } } -} +} // namespace llvm /// Include the platform-specific portion of TimeValue class #ifdef LLVM_ON_UNIX diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index d7b65155d6ef..414f559f8f0e 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Timer.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" @@ -22,9 +23,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -// CreateInfoOutputFile - Return a file stream to print our output on. -namespace llvm { extern raw_ostream *CreateInfoOutputFile(); } - // getLibSupportInfoOutputFilename - This ugly hack is brought to you courtesy // of constructor/destructor ordering being unspecified by C++. Basically the // problem is that a Statistic object gets destroyed, which ends up calling @@ -52,28 +50,27 @@ namespace { cl::Hidden, cl::location(getLibSupportInfoOutputFilename())); } -// CreateInfoOutputFile - Return a file stream to print our output on. -raw_ostream *llvm::CreateInfoOutputFile() { +// Return a file stream to print our output on. +std::unique_ptr llvm::CreateInfoOutputFile() { const std::string &OutputFilename = getLibSupportInfoOutputFilename(); if (OutputFilename.empty()) - return new raw_fd_ostream(2, false); // stderr. + return llvm::make_unique(2, false); // stderr. if (OutputFilename == "-") - return new raw_fd_ostream(1, false); // stdout. - + return llvm::make_unique(1, false); // stdout. + // Append mode is used because the info output file is opened and closed // each time -stats or -time-passes wants to print output to it. To // compensate for this, the test-suite Makefiles have code to delete the // info output file before running commands which write to it. std::error_code EC; - raw_ostream *Result = new raw_fd_ostream(OutputFilename, EC, - sys::fs::F_Append | sys::fs::F_Text); + auto Result = llvm::make_unique( + OutputFilename, EC, sys::fs::F_Append | sys::fs::F_Text); if (!EC) return Result; - + errs() << "Error opening info-output-file '" << OutputFilename << " for appending!\n"; - delete Result; - return new raw_fd_ostream(2, false); // stderr. + return llvm::make_unique(2, false); // stderr. } @@ -99,17 +96,13 @@ static TimerGroup *getDefaultTimerGroup() { //===----------------------------------------------------------------------===// void Timer::init(StringRef N) { - assert(!TG && "Timer already initialized"); - Name.assign(N.begin(), N.end()); - Started = false; - TG = getDefaultTimerGroup(); - TG->addTimer(*this); + init(N, *getDefaultTimerGroup()); } void Timer::init(StringRef N, TimerGroup &tg) { assert(!TG && "Timer already initialized"); Name.assign(N.begin(), N.end()); - Started = false; + Running = Triggered = false; TG = &tg; TG->addTimer(*this); } @@ -142,25 +135,22 @@ TimeRecord TimeRecord::getCurrentTime(bool Start) { return Result; } -static ManagedStatic > ActiveTimers; - void Timer::startTimer() { - Started = true; - ActiveTimers->push_back(this); - Time -= TimeRecord::getCurrentTime(true); + assert(!Running && "Cannot start a running timer"); + Running = Triggered = true; + StartTime = TimeRecord::getCurrentTime(true); } void Timer::stopTimer() { + assert(Running && "Cannot stop a paused timer"); + Running = false; Time += TimeRecord::getCurrentTime(false); + Time -= StartTime; +} - if (ActiveTimers->back() == this) { - ActiveTimers->pop_back(); - } else { - std::vector::iterator I = - std::find(ActiveTimers->begin(), ActiveTimers->end(), this); - assert(I != ActiveTimers->end() && "stop but no startTimer?"); - ActiveTimers->erase(I); - } +void Timer::clear() { + Running = Triggered = false; + Time = StartTime = TimeRecord(); } static void printVal(double Val, double Total, raw_ostream &OS) { @@ -278,8 +268,8 @@ void TimerGroup::removeTimer(Timer &T) { sys::SmartScopedLock L(*TimerLock); // If the timer was started, move its data to TimersToPrint. - if (T.Started) - TimersToPrint.push_back(std::make_pair(T.Time, T.Name)); + if (T.hasTriggered()) + TimersToPrint.emplace_back(T.Time, T.Name); T.TG = nullptr; @@ -292,10 +282,9 @@ void TimerGroup::removeTimer(Timer &T) { // them were started. if (FirstTimer || TimersToPrint.empty()) return; - - raw_ostream *OutStream = CreateInfoOutputFile(); + + std::unique_ptr OutStream = CreateInfoOutputFile(); PrintQueuedTimers(*OutStream); - delete OutStream; // Close the file. } void TimerGroup::addTimer(Timer &T) { @@ -314,8 +303,8 @@ void TimerGroup::PrintQueuedTimers(raw_ostream &OS) { std::sort(TimersToPrint.begin(), TimersToPrint.end()); TimeRecord Total; - for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i) - Total += TimersToPrint[i].first; + for (auto &RecordNamePair : TimersToPrint) + Total += RecordNamePair.first; // Print out timing header. OS << "===" << std::string(73, '-') << "===\n"; @@ -365,12 +354,11 @@ void TimerGroup::print(raw_ostream &OS) { // See if any of our timers were started, if so add them to TimersToPrint and // reset them. for (Timer *T = FirstTimer; T; T = T->Next) { - if (!T->Started) continue; - TimersToPrint.push_back(std::make_pair(T->Time, T->Name)); + if (!T->hasTriggered()) continue; + TimersToPrint.emplace_back(T->Time, T->Name); // Clear out the time. - T->Started = 0; - T->Time = TimeRecord(); + T->clear(); } // If any timers were started, print the group. diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index c6646fb101b7..3bb1116007ed 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -25,6 +25,7 @@ const char *Triple::getArchTypeName(ArchType Kind) { case aarch64_be: return "aarch64_be"; case arm: return "arm"; case armeb: return "armeb"; + case avr: return "avr"; case bpfel: return "bpfel"; case bpfeb: return "bpfeb"; case hexagon: return "hexagon"; @@ -80,6 +81,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case thumb: case thumbeb: return "arm"; + case avr: return "avr"; + case ppc64: case ppc64le: case ppc: return "ppc"; @@ -124,8 +127,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case spir64: return "spir"; case kalimba: return "kalimba"; case shave: return "shave"; - case wasm32: return "wasm32"; - case wasm64: return "wasm64"; + case wasm32: + case wasm64: return "wasm"; } } @@ -144,6 +147,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) { case MipsTechnologies: return "mti"; case NVIDIA: return "nvidia"; case CSR: return "csr"; + case Myriad: return "myriad"; } llvm_unreachable("Invalid VendorType!"); @@ -177,6 +181,9 @@ const char *Triple::getOSTypeName(OSType Kind) { case NVCL: return "nvcl"; case AMDHSA: return "amdhsa"; case PS4: return "ps4"; + case ELFIAMCU: return "elfiamcu"; + case TvOS: return "tvos"; + case WatchOS: return "watchos"; } llvm_unreachable("Invalid OSType"); @@ -196,6 +203,8 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) { case MSVC: return "msvc"; case Itanium: return "itanium"; case Cygnus: return "cygnus"; + case AMDOpenCL: return "amdopencl"; + case CoreCLR: return "coreclr"; } llvm_unreachable("Invalid EnvironmentType!"); @@ -224,6 +233,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("arm64", aarch64) // "arm64" is an alias for "aarch64" .Case("arm", arm) .Case("armeb", armeb) + .Case("avr", avr) .StartsWith("bpf", BPFArch) .Case("mips", mips) .Case("mipsel", mipsel) @@ -265,8 +275,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { } static Triple::ArchType parseARMArch(StringRef ArchName) { - unsigned ISA = ARMTargetParser::parseArchISA(ArchName); - unsigned ENDIAN = ARMTargetParser::parseArchEndian(ArchName); + unsigned ISA = ARM::parseArchISA(ArchName); + unsigned ENDIAN = ARM::parseArchEndian(ArchName); Triple::ArchType arch = Triple::UnknownArch; switch (ENDIAN) { @@ -300,7 +310,7 @@ static Triple::ArchType parseARMArch(StringRef ArchName) { } } - ArchName = ARMTargetParser::getCanonicalArchName(ArchName); + ArchName = ARM::getCanonicalArchName(ArchName); if (ArchName.empty()) return Triple::UnknownArch; @@ -310,8 +320,8 @@ static Triple::ArchType parseARMArch(StringRef ArchName) { return Triple::UnknownArch; // Thumb only for v6m - unsigned Profile = ARMTargetParser::parseArchProfile(ArchName); - unsigned Version = ARMTargetParser::parseArchVersion(ArchName); + unsigned Profile = ARM::parseArchProfile(ArchName); + unsigned Version = ARM::parseArchVersion(ArchName); if (Profile == ARM::PK_M && Version == 6) { if (ENDIAN == ARM::EK_BIG) return Triple::thumbeb; @@ -323,10 +333,7 @@ static Triple::ArchType parseARMArch(StringRef ArchName) { } static Triple::ArchType parseArch(StringRef ArchName) { - Triple::ArchType ARMArch(parseARMArch(ArchName)); - Triple::ArchType BPFArch(parseBPFArch(ArchName)); - - return StringSwitch(ArchName) + auto AT = StringSwitch(ArchName) .Cases("i386", "i486", "i586", "i686", Triple::x86) // FIXME: Do we need to support these? .Cases("i786", "i886", "i986", Triple::x86) @@ -336,9 +343,14 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("powerpc64le", Triple::ppc64le) .Case("xscale", Triple::arm) .Case("xscaleeb", Triple::armeb) - .StartsWith("arm", ARMArch) - .StartsWith("thumb", ARMArch) - .StartsWith("aarch64", ARMArch) + .Case("aarch64", Triple::aarch64) + .Case("aarch64_be", Triple::aarch64_be) + .Case("arm64", Triple::aarch64) + .Case("arm", Triple::arm) + .Case("armeb", Triple::armeb) + .Case("thumb", Triple::thumb) + .Case("thumbeb", Triple::thumbeb) + .Case("avr", Triple::avr) .Case("msp430", Triple::msp430) .Cases("mips", "mipseb", "mipsallegrex", Triple::mips) .Cases("mipsel", "mipsallegrexel", Triple::mipsel) @@ -346,7 +358,6 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("mips64el", Triple::mips64el) .Case("r600", Triple::r600) .Case("amdgcn", Triple::amdgcn) - .StartsWith("bpf", BPFArch) .Case("hexagon", Triple::hexagon) .Case("s390x", Triple::systemz) .Case("sparc", Triple::sparc) @@ -369,6 +380,18 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("wasm32", Triple::wasm32) .Case("wasm64", Triple::wasm64) .Default(Triple::UnknownArch); + + // Some architectures require special parsing logic just to compute the + // ArchType result. + if (AT == Triple::UnknownArch) { + if (ArchName.startswith("arm") || ArchName.startswith("thumb") || + ArchName.startswith("aarch64")) + return parseARMArch(ArchName); + if (ArchName.startswith("bpf")) + return parseBPFArch(ArchName); + } + + return AT; } static Triple::VendorType parseVendor(StringRef VendorName) { @@ -384,6 +407,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) { .Case("mti", Triple::MipsTechnologies) .Case("nvidia", Triple::NVIDIA) .Case("csr", Triple::CSR) + .Case("myriad", Triple::Myriad) .Default(Triple::UnknownVendor); } @@ -414,6 +438,9 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("nvcl", Triple::NVCL) .StartsWith("amdhsa", Triple::AMDHSA) .StartsWith("ps4", Triple::PS4) + .StartsWith("elfiamcu", Triple::ELFIAMCU) + .StartsWith("tvos", Triple::TvOS) + .StartsWith("watchos", Triple::WatchOS) .Default(Triple::UnknownOS); } @@ -430,6 +457,8 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { .StartsWith("msvc", Triple::MSVC) .StartsWith("itanium", Triple::Itanium) .StartsWith("cygnus", Triple::Cygnus) + .StartsWith("amdopencl", Triple::AMDOpenCL) + .StartsWith("coreclr", Triple::CoreCLR) .Default(Triple::UnknownEnvironment); } @@ -442,7 +471,7 @@ static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) { } static Triple::SubArchType parseSubArch(StringRef SubArchName) { - StringRef ARMSubArch = ARMTargetParser::getCanonicalArchName(SubArchName); + StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName); // For now, this is the small part. Early return. if (ARMSubArch.empty()) @@ -453,14 +482,12 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { .Default(Triple::NoSubArch); // ARM sub arch. - switch(ARMTargetParser::parseArch(ARMSubArch)) { + switch(ARM::parseArch(ARMSubArch)) { case ARM::AK_ARMV4: return Triple::NoSubArch; case ARM::AK_ARMV4T: return Triple::ARMSubArch_v4t; - case ARM::AK_ARMV5: case ARM::AK_ARMV5T: - case ARM::AK_ARMV5E: return Triple::ARMSubArch_v5; case ARM::AK_ARMV5TE: case ARM::AK_IWMMXT: @@ -469,24 +496,19 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { case ARM::AK_ARMV5TEJ: return Triple::ARMSubArch_v5te; case ARM::AK_ARMV6: - case ARM::AK_ARMV6J: - case ARM::AK_ARMV6Z: return Triple::ARMSubArch_v6; case ARM::AK_ARMV6K: - case ARM::AK_ARMV6ZK: - case ARM::AK_ARMV6HL: + case ARM::AK_ARMV6KZ: return Triple::ARMSubArch_v6k; case ARM::AK_ARMV6T2: return Triple::ARMSubArch_v6t2; case ARM::AK_ARMV6M: - case ARM::AK_ARMV6SM: return Triple::ARMSubArch_v6m; - case ARM::AK_ARMV7: case ARM::AK_ARMV7A: case ARM::AK_ARMV7R: - case ARM::AK_ARMV7L: - case ARM::AK_ARMV7HL: return Triple::ARMSubArch_v7; + case ARM::AK_ARMV7K: + return Triple::ARMSubArch_v7k; case ARM::AK_ARMV7M: return Triple::ARMSubArch_v7m; case ARM::AK_ARMV7S: @@ -497,6 +519,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { return Triple::ARMSubArch_v8; case ARM::AK_ARMV8_1A: return Triple::ARMSubArch_v8_1a; + case ARM::AK_ARMV8_2A: + return Triple::ARMSubArch_v8_2a; default: return Triple::NoSubArch; } @@ -514,20 +538,53 @@ static const char *getObjectFormatTypeName(Triple::ObjectFormatType Kind) { static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { switch (T.getArch()) { - default: - break; + case Triple::UnknownArch: + case Triple::aarch64: + case Triple::arm: + case Triple::thumb: + case Triple::x86: + case Triple::x86_64: + if (T.isOSDarwin()) + return Triple::MachO; + else if (T.isOSWindows()) + return Triple::COFF; + return Triple::ELF; + + case Triple::aarch64_be: + case Triple::amdgcn: + case Triple::amdil: + case Triple::amdil64: + case Triple::armeb: + case Triple::avr: + case Triple::bpfeb: + case Triple::bpfel: case Triple::hexagon: + case Triple::hsail: + case Triple::hsail64: + case Triple::kalimba: + case Triple::le32: + case Triple::le64: case Triple::mips: - case Triple::mipsel: case Triple::mips64: case Triple::mips64el: - case Triple::r600: - case Triple::amdgcn: - case Triple::sparc: - case Triple::sparcv9: - case Triple::systemz: - case Triple::xcore: + case Triple::mipsel: + case Triple::msp430: + case Triple::nvptx: + case Triple::nvptx64: case Triple::ppc64le: + case Triple::r600: + case Triple::shave: + case Triple::sparc: + case Triple::sparcel: + case Triple::sparcv9: + case Triple::spir: + case Triple::spir64: + case Triple::systemz: + case Triple::tce: + case Triple::thumbeb: + case Triple::wasm32: + case Triple::wasm64: + case Triple::xcore: return Triple::ELF; case Triple::ppc: @@ -536,12 +593,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { return Triple::MachO; return Triple::ELF; } - - if (T.isOSDarwin()) - return Triple::MachO; - else if (T.isOSWindows()) - return Triple::COFF; - return Triple::ELF; + llvm_unreachable("unknown architecture"); } /// \brief Construct a triple from the string representation provided. @@ -549,14 +601,27 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { /// This stores the string representation and parses the various pieces into /// enum members. Triple::Triple(const Twine &Str) - : Data(Str.str()), - Arch(parseArch(getArchName())), - SubArch(parseSubArch(getArchName())), - Vendor(parseVendor(getVendorName())), - OS(parseOS(getOSName())), - Environment(parseEnvironment(getEnvironmentName())), - ObjectFormat(parseFormat(getEnvironmentName())) { - if (ObjectFormat == Triple::UnknownObjectFormat) + : Data(Str.str()), Arch(UnknownArch), SubArch(NoSubArch), + Vendor(UnknownVendor), OS(UnknownOS), Environment(UnknownEnvironment), + ObjectFormat(UnknownObjectFormat) { + // Do minimal parsing by hand here. + SmallVector Components; + StringRef(Data).split(Components, '-', /*MaxSplit*/ 3); + if (Components.size() > 0) { + Arch = parseArch(Components[0]); + SubArch = parseSubArch(Components[0]); + if (Components.size() > 1) { + Vendor = parseVendor(Components[1]); + if (Components.size() > 2) { + OS = parseOS(Components[2]); + if (Components.size() > 3) { + Environment = parseEnvironment(Components[3]); + ObjectFormat = parseFormat(Components[3]); + } + } + } + } + if (ObjectFormat == UnknownObjectFormat) ObjectFormat = getDefaultFormat(*this); } @@ -601,7 +666,7 @@ std::string Triple::normalize(StringRef Str) { // Parse into components. SmallVector Components; - Str.split(Components, "-"); + Str.split(Components, '-'); // If the first component corresponds to a known architecture, preferentially // use it for the architecture. If the second component corresponds to a @@ -889,6 +954,8 @@ bool Triple::getMacOSXVersion(unsigned &Major, unsigned &Minor, return false; break; case IOS: + case TvOS: + case WatchOS: // Ignore the version from the triple. This is only handled because the // the clang driver combines OS X and IOS support into a common Darwin // toolchain that wants to know the OS X version number even when targeting @@ -916,11 +983,38 @@ void Triple::getiOSVersion(unsigned &Major, unsigned &Minor, Micro = 0; break; case IOS: + case TvOS: getOSVersion(Major, Minor, Micro); // Default to 5.0 (or 7.0 for arm64). if (Major == 0) Major = (getArch() == aarch64) ? 7 : 5; break; + case WatchOS: + llvm_unreachable("conflicting triple info"); + } +} + +void Triple::getWatchOSVersion(unsigned &Major, unsigned &Minor, + unsigned &Micro) const { + switch (getOS()) { + default: llvm_unreachable("unexpected OS for Darwin triple"); + case Darwin: + case MacOSX: + // Ignore the version from the triple. This is only handled because the + // the clang driver combines OS X and IOS support into a common Darwin + // toolchain that wants to know the iOS version number even when targeting + // OS X. + Major = 2; + Minor = 0; + Micro = 0; + break; + case WatchOS: + getOSVersion(Major, Minor, Micro); + if (Major == 0) + Major = 2; + break; + case IOS: + llvm_unreachable("conflicting triple info"); } } @@ -993,6 +1087,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::UnknownArch: return 0; + case llvm::Triple::avr: case llvm::Triple::msp430: return 16; @@ -1062,6 +1157,7 @@ Triple Triple::get32BitArchVariant() const { case Triple::aarch64: case Triple::aarch64_be: case Triple::amdgcn: + case Triple::avr: case Triple::bpfel: case Triple::bpfeb: case Triple::msp430: @@ -1116,6 +1212,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::UnknownArch: case Triple::arm: case Triple::armeb: + case Triple::avr: case Triple::hexagon: case Triple::kalimba: case Triple::msp430: @@ -1172,6 +1269,7 @@ Triple Triple::getBigEndianArchVariant() const { case Triple::amdgcn: case Triple::amdil64: case Triple::amdil: + case Triple::avr: case Triple::hexagon: case Triple::hsail64: case Triple::hsail: @@ -1244,6 +1342,7 @@ Triple Triple::getLittleEndianArchVariant() const { case Triple::amdil64: case Triple::amdil: case Triple::arm: + case Triple::avr: case Triple::bpfel: case Triple::hexagon: case Triple::hsail64: @@ -1281,10 +1380,10 @@ Triple Triple::getLittleEndianArchVariant() const { return T; } -const char *Triple::getARMCPUForArch(StringRef MArch) const { +StringRef Triple::getARMCPUForArch(StringRef MArch) const { if (MArch.empty()) MArch = getArchName(); - MArch = ARMTargetParser::getCanonicalArchName(MArch); + MArch = ARM::getCanonicalArchName(MArch); // Some defaults are forced. switch (getOS()) { @@ -1296,15 +1395,21 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const { case llvm::Triple::Win32: // FIXME: this is invalid for WindowsCE return "cortex-a9"; + case llvm::Triple::MacOSX: + case llvm::Triple::IOS: + case llvm::Triple::WatchOS: + if (MArch == "v7k") + return "cortex-a7"; + break; default: break; } if (MArch.empty()) - return nullptr; + return StringRef(); - const char *CPU = ARMTargetParser::getDefaultCPU(MArch); - if (CPU) + StringRef CPU = ARM::getDefaultCPU(MArch); + if (!CPU.empty()) return CPU; // If no specific architecture version is requested, return the minimum CPU diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index c421ee84c2b7..d70319168b84 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -50,9 +50,8 @@ int getPosixProtectionFlags(unsigned Flags) { return PROT_READ | PROT_WRITE; case llvm::sys::Memory::MF_READ|llvm::sys::Memory::MF_EXEC: return PROT_READ | PROT_EXEC; - case llvm::sys::Memory::MF_READ | - llvm::sys::Memory::MF_WRITE | - llvm::sys::Memory::MF_EXEC: + case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_WRITE | + llvm::sys::Memory::MF_EXEC: return PROT_READ | PROT_WRITE | PROT_EXEC; case llvm::sys::Memory::MF_EXEC: #if defined(__FreeBSD__) @@ -153,6 +152,7 @@ Memory::releaseMappedMemory(MemoryBlock &M) { std::error_code Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) { + static const size_t PageSize = Process::getPageSize(); if (M.Address == nullptr || M.Size == 0) return std::error_code(); @@ -161,7 +161,7 @@ Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) { int Protect = getPosixProtectionFlags(Flags); - int Result = ::mprotect(M.Address, M.Size, Protect); + int Result = ::mprotect((void*)((uintptr_t)M.Address & ~(PageSize-1)), PageSize*((M.Size+PageSize-1)/PageSize), Protect); if (Result != 0) return std::error_code(errno, std::generic_category()); @@ -181,7 +181,7 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock, std::string *ErrMsg) { if (NumBytes == 0) return MemoryBlock(); - size_t PageSize = Process::getPageSize(); + static const size_t PageSize = Process::getPageSize(); size_t NumPages = (NumBytes+PageSize-1)/PageSize; int fd = -1; @@ -265,15 +265,12 @@ bool Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) { } bool Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) { -#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__)) if (M.Address == 0 || M.Size == 0) return false; Memory::InvalidateInstructionCache(M.Address, M.Size); +#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__)) kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address, (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY); return KERN_SUCCESS == kr; -#elif defined(__arm__) || defined(__aarch64__) - Memory::InvalidateInstructionCache(M.Address, M.Size); - return true; #else return true; #endif diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index 973d010dcac1..d85c37ab3bfa 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -75,12 +75,12 @@ test_dir(char ret[PATH_MAX], const char *dir, const char *bin) char fullpath[PATH_MAX]; snprintf(fullpath, PATH_MAX, "%s/%s", dir, bin); - if (realpath(fullpath, ret) == NULL) - return (1); + if (!realpath(fullpath, ret)) + return 1; if (stat(fullpath, &sb) != 0) - return (1); + return 1; - return (0); + return 0; } static char * @@ -91,34 +91,34 @@ getprogpath(char ret[PATH_MAX], const char *bin) /* First approach: absolute path. */ if (bin[0] == '/') { if (test_dir(ret, "/", bin) == 0) - return (ret); - return (NULL); + return ret; + return nullptr; } /* Second approach: relative path. */ - if (strchr(bin, '/') != NULL) { + if (strchr(bin, '/')) { char cwd[PATH_MAX]; - if (getcwd(cwd, PATH_MAX) == NULL) - return (NULL); + if (!getcwd(cwd, PATH_MAX)) + return nullptr; if (test_dir(ret, cwd, bin) == 0) - return (ret); - return (NULL); + return ret; + return nullptr; } /* Third approach: $PATH */ - if ((pv = getenv("PATH")) == NULL) - return (NULL); + if ((pv = getenv("PATH")) == nullptr) + return nullptr; s = pv = strdup(pv); - if (pv == NULL) - return (NULL); - while ((t = strsep(&s, ":")) != NULL) { + if (!pv) + return nullptr; + while ((t = strsep(&s, ":")) != nullptr) { if (test_dir(ret, t, bin) == 0) { free(pv); - return (ret); + return ret; } } free(pv); - return (NULL); + return nullptr; } #endif // __FreeBSD__ || __NetBSD__ || __FreeBSD_kernel__ @@ -153,8 +153,8 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) { return std::string(exe_path, len); } else { // Fall back to the classical detection. - if (getprogpath(exe_path, argv0) != NULL) - return exe_path; + if (getprogpath(exe_path, argv0)) + return exe_path; } #elif defined(HAVE_DLFCN_H) // Use dladdr to get executable path if available. @@ -219,11 +219,12 @@ std::error_code current_path(SmallVectorImpl &result) { return std::error_code(); } -std::error_code create_directory(const Twine &path, bool IgnoreExisting) { +std::error_code create_directory(const Twine &path, bool IgnoreExisting, + perms Perms) { SmallString<128> path_storage; StringRef p = path.toNullTerminatedStringRef(path_storage); - if (::mkdir(p.begin(), S_IRWXU | S_IRWXG) == -1) { + if (::mkdir(p.begin(), Perms) == -1) { if (errno != EEXIST || !IgnoreExisting) return std::error_code(errno, std::generic_category()); } @@ -324,6 +325,10 @@ std::error_code access(const Twine &Path, AccessMode Mode) { return std::error_code(); } +bool can_execute(const Twine &Path) { + return !access(Path, AccessMode::Execute); +} + bool equivalent(file_status A, file_status B) { assert(status_known(A) && status_known(B)); return A.fs_st_dev == B.fs_st_dev && @@ -555,6 +560,54 @@ bool home_directory(SmallVectorImpl &result) { return false; } +static bool getDarwinConfDir(bool TempDir, SmallVectorImpl &Result) { + #if defined(_CS_DARWIN_USER_TEMP_DIR) && defined(_CS_DARWIN_USER_CACHE_DIR) + // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR. + // macros defined in on darwin >= 9 + int ConfName = TempDir ? _CS_DARWIN_USER_TEMP_DIR + : _CS_DARWIN_USER_CACHE_DIR; + size_t ConfLen = confstr(ConfName, nullptr, 0); + if (ConfLen > 0) { + do { + Result.resize(ConfLen); + ConfLen = confstr(ConfName, Result.data(), Result.size()); + } while (ConfLen > 0 && ConfLen != Result.size()); + + if (ConfLen > 0) { + assert(Result.back() == 0); + Result.pop_back(); + return true; + } + + Result.clear(); + } + #endif + return false; +} + +static bool getUserCacheDir(SmallVectorImpl &Result) { + // First try using XDS_CACHE_HOME env variable, + // as specified in XDG Base Directory Specification at + // http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html + if (const char *XdsCacheDir = std::getenv("XDS_CACHE_HOME")) { + Result.clear(); + Result.append(XdsCacheDir, XdsCacheDir + strlen(XdsCacheDir)); + return true; + } + + // Try Darwin configuration query + if (getDarwinConfDir(false, Result)) + return true; + + // Use "$HOME/.cache" if $HOME is available + if (home_directory(Result)) { + append(Result, ".cache"); + return true; + } + + return false; +} + static const char *getEnvTempDir() { // Check whether the temporary directory is specified by an environment // variable. @@ -589,27 +642,8 @@ void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl &Result) { } } -#if defined(_CS_DARWIN_USER_TEMP_DIR) && defined(_CS_DARWIN_USER_CACHE_DIR) - // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR. - // macros defined in on darwin >= 9 - int ConfName = ErasedOnReboot? _CS_DARWIN_USER_TEMP_DIR - : _CS_DARWIN_USER_CACHE_DIR; - size_t ConfLen = confstr(ConfName, nullptr, 0); - if (ConfLen > 0) { - do { - Result.resize(ConfLen); - ConfLen = confstr(ConfName, Result.data(), Result.size()); - } while (ConfLen > 0 && ConfLen != Result.size()); - - if (ConfLen > 0) { - assert(Result.back() == 0); - Result.pop_back(); - return; - } - - Result.clear(); - } -#endif + if (getDarwinConfDir(ErasedOnReboot, Result)) + return; const char *RequestedDir = getDefaultTempDir(ErasedOnReboot); Result.append(RequestedDir, RequestedDir + strlen(RequestedDir)); diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index df13bd221739..27083eeb072d 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -430,13 +430,18 @@ const char *Process::ResetColor() { #if !defined(HAVE_DECL_ARC4RANDOM) || !HAVE_DECL_ARC4RANDOM static unsigned GetRandomNumberSeed() { // Attempt to get the initial seed from /dev/urandom, if possible. - if (FILE *RandomSource = ::fopen("/dev/urandom", "r")) { + int urandomFD = open("/dev/urandom", O_RDONLY); + + if (urandomFD != -1) { unsigned seed; - int count = ::fread((void *)&seed, sizeof(seed), 1, RandomSource); - ::fclose(RandomSource); + // Don't use a buffered read to avoid reading more data + // from /dev/urandom than we need. + int count = read(urandomFD, (void *)&seed, sizeof(seed)); + + close(urandomFD); // Return the seed if the read was successful. - if (count == 1) + if (count == sizeof(seed)) return seed; } diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index 8947b62e4dc2..a8d1fe3c07d0 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -323,7 +323,6 @@ namespace llvm { ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, bool WaitUntilTerminates, std::string *ErrMsg) { -#ifdef HAVE_SYS_WAIT_H struct sigaction Act, Old; assert(PI.Pid && "invalid pid to wait on, process not started?"); @@ -417,12 +416,6 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, // signal during execution as opposed to failing to execute. WaitResult.ReturnCode = -2; } -#else - if (ErrMsg) - *ErrMsg = "Program::Wait is not implemented on this platform yet!"; - ProcessInfo WaitResult; - WaitResult.ReturnCode = -2; -#endif return WaitResult; } diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index bfe2a3a380ed..061cdb3da216 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -17,7 +17,6 @@ #include "llvm/Support/Format.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileUtilities.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Program.h" @@ -25,7 +24,6 @@ #include "llvm/Support/raw_ostream.h" #include #include -#include #if HAVE_EXECINFO_H # include // For backtrace(). #endif @@ -58,8 +56,6 @@ static ManagedStatic > SignalsMutex; static void (*InterruptFunction)() = nullptr; static ManagedStatic> FilesToRemove; -static ManagedStatic>> - CallBacksToRun; // IntSigs - Signals that represent requested termination. There's no bug // or failure, or if there is, it's not our direct responsibility. For whatever @@ -90,12 +86,11 @@ static unsigned NumRegisteredSignals = 0; static struct { struct sigaction SA; int SigNo; -} RegisteredSignalInfo[(sizeof(IntSigs)+sizeof(KillSigs))/sizeof(KillSigs[0])]; +} RegisteredSignalInfo[array_lengthof(IntSigs) + array_lengthof(KillSigs)]; static void RegisterHandler(int Signal) { - assert(NumRegisteredSignals < - sizeof(RegisteredSignalInfo)/sizeof(RegisteredSignalInfo[0]) && + assert(NumRegisteredSignals < array_lengthof(RegisteredSignalInfo) && "Out of space for signal handlers!"); struct sigaction NewHandler; @@ -117,7 +112,7 @@ static void RegisterHandlers() { // during handling an actual signal because you can't safely call new in a // signal handler. *SignalsMutex; - + // If the handlers are already registered, we're done. if (NumRegisteredSignals != 0) return; @@ -148,9 +143,6 @@ static void RemoveFilesToRemove() { // memory. std::vector& FilesToRemoveRef = *FilesToRemove; for (unsigned i = 0, e = FilesToRemoveRef.size(); i != e; ++i) { - // We rely on a std::string implementation for which repeated calls to - // 'c_str()' don't allocate memory. We pre-call 'c_str()' on all of these - // strings to try to ensure this is safe. const char *path = FilesToRemoveRef[i].c_str(); // Get the status so we can determine if it's a file or directory. If we @@ -164,7 +156,7 @@ static void RemoveFilesToRemove() { // super-user permissions. if (!S_ISREG(buf.st_mode)) continue; - + // Otherwise, remove the file. We ignore any errors here as there is nothing // else we can do. unlink(path); @@ -205,11 +197,7 @@ static RETSIGTYPE SignalHandler(int Sig) { } // Otherwise if it is a fault (like SEGV) run any handler. - if (CallBacksToRun.isConstructed()) { - auto &CallBacksToRunRef = *CallBacksToRun; - for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i) - CallBacksToRunRef[i].first(CallBacksToRunRef[i].second); - } + llvm::sys::RunSignalHandlers(); #ifdef __s390__ // On S/390, certain signals are delivered with PSW Address pointing to @@ -239,21 +227,7 @@ bool llvm::sys::RemoveFileOnSignal(StringRef Filename, std::string* ErrMsg) { { sys::SmartScopedLock Guard(*SignalsMutex); - std::vector& FilesToRemoveRef = *FilesToRemove; - std::string *OldPtr = - FilesToRemoveRef.empty() ? nullptr : &FilesToRemoveRef[0]; - FilesToRemoveRef.push_back(Filename); - - // We want to call 'c_str()' on every std::string in this vector so that if - // the underlying implementation requires a re-allocation, it happens here - // rather than inside of the signal handler. If we see the vector grow, we - // have to call it on every entry. If it remains in place, we only need to - // call it on the latest one. - if (OldPtr == &FilesToRemoveRef[0]) - FilesToRemoveRef.back().c_str(); - else - for (unsigned i = 0, e = FilesToRemoveRef.size(); i != e; ++i) - FilesToRemoveRef[i].c_str(); + FilesToRemove->push_back(Filename); } RegisterHandlers(); @@ -268,13 +242,6 @@ void llvm::sys::DontRemoveFileOnSignal(StringRef Filename) { std::vector::iterator I = FilesToRemove->end(); if (RI != FilesToRemove->rend()) I = FilesToRemove->erase(RI.base()-1); - - // We need to call c_str() on every element which would have been moved by - // the erase. These elements, in a C++98 implementation where c_str() - // requires a reallocation on the first call may have had the call to c_str() - // made on insertion become invalid by being copied down an element. - for (std::vector::iterator E = FilesToRemove->end(); I != E; ++I) - I->c_str(); } /// AddSignalHandler - Add a function to be called when a signal is delivered @@ -285,10 +252,9 @@ void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) { RegisterHandlers(); } -#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) - -#if HAVE_LINK_H && (defined(__linux__) || defined(__FreeBSD__) || \ - defined(__FreeBSD_kernel__) || defined(__NetBSD__)) +#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) && HAVE_LINK_H && \ + (defined(__linux__) || defined(__FreeBSD__) || \ + defined(__FreeBSD_kernel__) || defined(__NetBSD__)) struct DlIteratePhdrData { void **StackTrace; int depth; @@ -321,108 +287,27 @@ static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) { return 0; } +/// If this is an ELF platform, we can find all loaded modules and their virtual +/// addresses with dl_iterate_phdr. static bool findModulesAndOffsets(void **StackTrace, int Depth, const char **Modules, intptr_t *Offsets, - const char *MainExecutableName) { + const char *MainExecutableName, + StringSaver &StrPool) { DlIteratePhdrData data = {StackTrace, Depth, true, Modules, Offsets, MainExecutableName}; dl_iterate_phdr(dl_iterate_phdr_cb, &data); return true; } #else +/// This platform does not have dl_iterate_phdr, so we do not yet know how to +/// find all loaded DSOs. static bool findModulesAndOffsets(void **StackTrace, int Depth, const char **Modules, intptr_t *Offsets, - const char *MainExecutableName) { + const char *MainExecutableName, + StringSaver &StrPool) { return false; } -#endif - -static bool printSymbolizedStackTrace(void **StackTrace, int Depth, - llvm::raw_ostream &OS) { - // FIXME: Subtract necessary number from StackTrace entries to turn return addresses - // into actual instruction addresses. - // Use llvm-symbolizer tool to symbolize the stack traces. - ErrorOr LLVMSymbolizerPathOrErr = - sys::findProgramByName("llvm-symbolizer"); - if (!LLVMSymbolizerPathOrErr) - return false; - const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr; - // We don't know argv0 or the address of main() at this point, but try - // to guess it anyway (it's possible on some platforms). - std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr); - if (MainExecutableName.empty() || - MainExecutableName.find("llvm-symbolizer") != std::string::npos) - return false; - - std::vector Modules(Depth, nullptr); - std::vector Offsets(Depth, 0); - if (!findModulesAndOffsets(StackTrace, Depth, Modules.data(), Offsets.data(), - MainExecutableName.c_str())) - return false; - int InputFD; - SmallString<32> InputFile, OutputFile; - sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile); - sys::fs::createTemporaryFile("symbolizer-output", "", OutputFile); - FileRemover InputRemover(InputFile.c_str()); - FileRemover OutputRemover(OutputFile.c_str()); - - { - raw_fd_ostream Input(InputFD, true); - for (int i = 0; i < Depth; i++) { - if (Modules[i]) - Input << Modules[i] << " " << (void*)Offsets[i] << "\n"; - } - } - - StringRef InputFileStr(InputFile); - StringRef OutputFileStr(OutputFile); - StringRef StderrFileStr; - const StringRef *Redirects[] = {&InputFileStr, &OutputFileStr, - &StderrFileStr}; - const char *Args[] = {"llvm-symbolizer", "--functions=linkage", "--inlining", - "--demangle", nullptr}; - int RunResult = - sys::ExecuteAndWait(LLVMSymbolizerPath, Args, nullptr, Redirects); - if (RunResult != 0) - return false; - - auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str()); - if (!OutputBuf) - return false; - StringRef Output = OutputBuf.get()->getBuffer(); - SmallVector Lines; - Output.split(Lines, "\n"); - auto CurLine = Lines.begin(); - int frame_no = 0; - for (int i = 0; i < Depth; i++) { - if (!Modules[i]) { - OS << format("#%d %p\n", frame_no++, StackTrace[i]); - continue; - } - // Read pairs of lines (function name and file/line info) until we - // encounter empty line. - for (;;) { - if (CurLine == Lines.end()) - return false; - StringRef FunctionName = *CurLine++; - if (FunctionName.empty()) - break; - OS << format("#%d %p ", frame_no++, StackTrace[i]); - if (!FunctionName.startswith("??")) - OS << format("%s ", FunctionName.str().c_str()); - if (CurLine == Lines.end()) - return false; - StringRef FileLineInfo = *CurLine++; - if (!FileLineInfo.startswith("??")) - OS << format("%s", FileLineInfo.str().c_str()); - else - OS << format("(%s+%p)", Modules[i], (void *)Offsets[i]); - OS << "\n"; - } - } - return true; -} -#endif // defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) +#endif // defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) && ... // PrintStackTrace - In the case of a program crash or fault, print out a stack // trace so that the user has an indication of why and where we died. diff --git a/lib/Support/Unix/Unix.h b/lib/Support/Unix/Unix.h index e16a226a8eaf..871e612f6c16 100644 --- a/lib/Support/Unix/Unix.h +++ b/lib/Support/Unix/Unix.h @@ -29,6 +29,7 @@ #include #include #include +#include #ifdef HAVE_UNISTD_H #include @@ -43,22 +44,10 @@ #endif #include -#ifdef HAVE_SYS_WAIT_H -# include -#endif - #ifdef HAVE_DLFCN_H # include #endif -#ifndef WEXITSTATUS -# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8) -#endif - -#ifndef WIFEXITED -# define WIFEXITED(stat_val) (((stat_val) & 255) == 0) -#endif - /// This function builds an error message into \p ErrMsg using the \p prefix /// string and the Unix error number given by \p errnum. If errnum is -1, the /// default then the value of errno is used. diff --git a/lib/Support/Valgrind.cpp b/lib/Support/Valgrind.cpp index facf8d927ecd..8d852a67c075 100644 --- a/lib/Support/Valgrind.cpp +++ b/lib/Support/Valgrind.cpp @@ -15,6 +15,7 @@ #include "llvm/Support/Valgrind.h" #include "llvm/Config/config.h" +#include #if HAVE_VALGRIND_VALGRIND_H #include @@ -52,23 +53,3 @@ void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) { } #endif // !HAVE_VALGRIND_VALGRIND_H - -// These functions require no implementation, tsan just looks at the arguments -// they're called with. However, they are required to be weak as some other -// application or library may already be providing these definitions for the -// same reason we are. -extern "C" { -LLVM_ATTRIBUTE_WEAK void AnnotateHappensAfter(const char *file, int line, - const volatile void *cv); -void AnnotateHappensAfter(const char *file, int line, const volatile void *cv) { -} -LLVM_ATTRIBUTE_WEAK void AnnotateHappensBefore(const char *file, int line, - const volatile void *cv); -void AnnotateHappensBefore(const char *file, int line, - const volatile void *cv) {} -LLVM_ATTRIBUTE_WEAK void AnnotateIgnoreWritesBegin(const char *file, int line); -void AnnotateIgnoreWritesBegin(const char *file, int line) {} -LLVM_ATTRIBUTE_WEAK void AnnotateIgnoreWritesEnd(const char *file, int line); -void AnnotateIgnoreWritesEnd(const char *file, int line) {} -} - diff --git a/lib/Support/Windows/COM.inc b/lib/Support/Windows/COM.inc index 0c50d6f74ea3..54f3ecf28ec2 100644 --- a/lib/Support/Windows/COM.inc +++ b/lib/Support/Windows/COM.inc @@ -1,4 +1,4 @@ -//===- llvm/Support/Windows/COM.inc - Windows COM Implementation *- C++ -*-===// +//==- llvm/Support/Windows/COM.inc - Windows COM Implementation -*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc index d38f19749118..17418b015c75 100644 --- a/lib/Support/Windows/DynamicLibrary.inc +++ b/lib/Support/Windows/DynamicLibrary.inc @@ -76,14 +76,14 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, SmallVector filenameUnicode; if (std::error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) { SetLastError(ec.value()); - MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16: "); + MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16"); return DynamicLibrary(); } HMODULE a_handle = LoadLibraryW(filenameUnicode.data()); if (a_handle == 0) { - MakeErrMsg(errMsg, std::string(filename) + ": Can't open : "); + MakeErrMsg(errMsg, std::string(filename) + ": Can't open"); return DynamicLibrary(); } diff --git a/lib/Support/Windows/Memory.inc b/lib/Support/Windows/Memory.inc index 4b2ff2e2d324..7eab9ff3afd2 100644 --- a/lib/Support/Windows/Memory.inc +++ b/lib/Support/Windows/Memory.inc @@ -192,14 +192,14 @@ static DWORD getProtection(const void *addr) { bool Memory::setWritable(MemoryBlock &M, std::string *ErrMsg) { if (!setRangeWritable(M.Address, M.Size)) { - return MakeErrMsg(ErrMsg, "Cannot set memory to writeable: "); + return MakeErrMsg(ErrMsg, "Cannot set memory to writeable"); } return true; } bool Memory::setExecutable(MemoryBlock &M, std::string *ErrMsg) { if (!setRangeExecutable(M.Address, M.Size)) { - return MakeErrMsg(ErrMsg, "Cannot set memory to executable: "); + return MakeErrMsg(ErrMsg, "Cannot set memory to executable"); } return true; } diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index 72da7c5fec32..4e4841231eff 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -182,7 +182,8 @@ std::error_code current_path(SmallVectorImpl &result) { return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result); } -std::error_code create_directory(const Twine &path, bool IgnoreExisting) { +std::error_code create_directory(const Twine &path, bool IgnoreExisting, + perms Perms) { SmallVector path_utf16; if (std::error_code ec = widenPath(path, path_utf16)) @@ -252,17 +253,34 @@ std::error_code rename(const Twine &from, const Twine &to) { return ec; std::error_code ec = std::error_code(); + + // Retry while we see ERROR_ACCESS_DENIED. + // System scanners (eg. indexer) might open the source file when it is written + // and closed. + for (int i = 0; i < 2000; i++) { + // Try ReplaceFile first, as it is able to associate a new data stream with + // the destination even if the destination file is currently open. + if (::ReplaceFileW(wide_to.begin(), wide_from.begin(), NULL, 0, NULL, NULL)) + return std::error_code(); + + // We get ERROR_FILE_NOT_FOUND if the destination file is missing. + // MoveFileEx can handle this case. + DWORD ReplaceError = ::GetLastError(); + ec = mapWindowsError(ReplaceError); + if (ReplaceError != ERROR_ACCESS_DENIED && + ReplaceError != ERROR_FILE_NOT_FOUND && + ReplaceError != ERROR_SHARING_VIOLATION) + break; + if (::MoveFileExW(wide_from.begin(), wide_to.begin(), MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING)) return std::error_code(); - DWORD LastError = ::GetLastError(); - ec = mapWindowsError(LastError); - if (LastError != ERROR_ACCESS_DENIED) - break; - // Retry MoveFile() at ACCESS_DENIED. - // System scanners (eg. indexer) might open the source file when - // It is written and closed. + + DWORD MoveError = ::GetLastError(); + ec = mapWindowsError(MoveError); + if (MoveError != ERROR_ACCESS_DENIED) break; + ::Sleep(1); } @@ -301,6 +319,11 @@ std::error_code access(const Twine &Path, AccessMode Mode) { return std::error_code(); } +bool can_execute(const Twine &Path) { + return !access(Path, AccessMode::Execute) || + !access(Path + ".exe", AccessMode::Execute); +} + bool equivalent(file_status A, file_status B) { assert(status_known(A) && status_known(B)); return A.FileIndexHigh == B.FileIndexHigh && @@ -325,10 +348,12 @@ std::error_code equivalent(const Twine &A, const Twine &B, bool &result) { static bool isReservedName(StringRef path) { // This list of reserved names comes from MSDN, at: // http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx - static const char *sReservedNames[] = { "nul", "con", "prn", "aux", - "com1", "com2", "com3", "com4", "com5", "com6", - "com7", "com8", "com9", "lpt1", "lpt2", "lpt3", - "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9" }; + static const char *const sReservedNames[] = { "nul", "con", "prn", "aux", + "com1", "com2", "com3", "com4", + "com5", "com6", "com7", "com8", + "com9", "lpt1", "lpt2", "lpt3", + "lpt4", "lpt5", "lpt6", "lpt7", + "lpt8", "lpt9" }; // First, check to see if this is a device namespace, which always // starts with \\.\, since device namespaces are not legal file paths. @@ -643,9 +668,10 @@ std::error_code openFileForRead(const Twine &Name, int &ResultFD) { if (std::error_code EC = widenPath(Name, PathUTF16)) return EC; - HANDLE H = ::CreateFileW(PathUTF16.begin(), GENERIC_READ, - FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, - OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + HANDLE H = + ::CreateFileW(PathUTF16.begin(), GENERIC_READ, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (H == INVALID_HANDLE_VALUE) { DWORD LastError = ::GetLastError(); std::error_code EC = mapWindowsError(LastError); @@ -728,30 +754,31 @@ std::error_code openFileForWrite(const Twine &Name, int &ResultFD, } // end namespace fs namespace path { - -bool home_directory(SmallVectorImpl &result) { - wchar_t Path[MAX_PATH]; - if (::SHGetFolderPathW(0, CSIDL_APPDATA | CSIDL_FLAG_CREATE, 0, - /*SHGFP_TYPE_CURRENT*/0, Path) != S_OK) +static bool getKnownFolderPath(KNOWNFOLDERID folderId, + SmallVectorImpl &result) { + wchar_t *path = nullptr; + if (::SHGetKnownFolderPath(folderId, KF_FLAG_CREATE, nullptr, &path) != S_OK) return false; - if (UTF16ToUTF8(Path, ::wcslen(Path), result)) - return false; - - return true; + bool ok = !UTF16ToUTF8(path, ::wcslen(path), result); + ::CoTaskMemFree(path); + return ok; } -static bool getTempDirEnvVar(const char *Var, SmallVectorImpl &Res) { - SmallVector NameUTF16; - if (windows::UTF8ToUTF16(Var, NameUTF16)) - return false; +bool getUserCacheDir(SmallVectorImpl &Result) { + return getKnownFolderPath(FOLDERID_LocalAppData, Result); +} +bool home_directory(SmallVectorImpl &result) { + return getKnownFolderPath(FOLDERID_Profile, result); +} + +static bool getTempDirEnvVar(const wchar_t *Var, SmallVectorImpl &Res) { SmallVector Buf; size_t Size = 1024; do { Buf.reserve(Size); - Size = - GetEnvironmentVariableW(NameUTF16.data(), Buf.data(), Buf.capacity()); + Size = GetEnvironmentVariableW(Var, Buf.data(), Buf.capacity()); if (Size == 0) return false; @@ -759,14 +786,12 @@ static bool getTempDirEnvVar(const char *Var, SmallVectorImpl &Res) { } while (Size > Buf.capacity()); Buf.set_size(Size); - if (windows::UTF16ToUTF8(Buf.data(), Size, Res)) - return false; - return true; + return !windows::UTF16ToUTF8(Buf.data(), Size, Res); } static bool getTempDirEnvVar(SmallVectorImpl &Res) { - const char *EnvironmentVariables[] = {"TMP", "TEMP", "USERPROFILE"}; - for (const char *Env : EnvironmentVariables) { + const wchar_t *EnvironmentVariables[] = {L"TMP", L"TEMP", L"USERPROFILE"}; + for (auto *Env : EnvironmentVariables) { if (getTempDirEnvVar(Env, Res)) return true; } @@ -777,13 +802,19 @@ void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl &Result) { (void)ErasedOnReboot; Result.clear(); - // Check whether the temporary directory is specified by an environment - // variable. - if (getTempDirEnvVar(Result)) + // Check whether the temporary directory is specified by an environment var. + // This matches GetTempPath logic to some degree. GetTempPath is not used + // directly as it cannot handle evn var longer than 130 chars on Windows 7 + // (fixed on Windows 8). + if (getTempDirEnvVar(Result)) { + assert(!Result.empty() && "Unexpected empty path"); + native(Result); // Some Unix-like shells use Unix path separator in $TMP. + fs::make_absolute(Result); // Make it absolute if not already. return; + } // Fall back to a system default. - const char *DefaultResult = "C:\\TEMP"; + const char *DefaultResult = "C:\\Temp"; Result.append(DefaultResult, DefaultResult + strlen(DefaultResult)); } } // end namespace path diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc index 8164956d1511..dae35a88132b 100644 --- a/lib/Support/Windows/Process.inc +++ b/lib/Support/Windows/Process.inc @@ -417,16 +417,23 @@ const char *Process::ResetColor() { return 0; } +// Include GetLastError() in a fatal error message. +static void ReportLastErrorFatal(const char *Msg) { + std::string ErrMsg; + MakeErrMsg(&ErrMsg, Msg); + report_fatal_error(ErrMsg); +} + unsigned Process::GetRandomNumber() { HCRYPTPROV HCPC; if (!::CryptAcquireContextW(&HCPC, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) - report_fatal_error("Could not acquire a cryptographic context"); + ReportLastErrorFatal("Could not acquire a cryptographic context"); ScopedCryptContext CryptoProvider(HCPC); unsigned Ret; if (!::CryptGenRandom(CryptoProvider, sizeof(Ret), reinterpret_cast(&Ret))) - report_fatal_error("Could not generate a random number"); + ReportLastErrorFatal("Could not generate a random number"); return Ret; } diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc index c29d8729b1de..d4e14ddc6518 100644 --- a/lib/Support/Windows/Program.inc +++ b/lib/Support/Windows/Program.inc @@ -75,8 +75,15 @@ ErrorOr sys::findProgramByName(StringRef Name, do { U16Result.reserve(Len); - Len = ::SearchPathW(Path, c_str(U16Name), - U16Ext.empty() ? nullptr : c_str(U16Ext), + // Lets attach the extension manually. That is needed for files + // with a point in name like aaa.bbb. SearchPathW will not add extension + // from its argument to such files because it thinks they already had one. + SmallVector U16NameExt; + if (std::error_code EC = + windows::UTF8ToUTF16(Twine(Name + Ext).str(), U16NameExt)) + return EC; + + Len = ::SearchPathW(Path, c_str(U16NameExt), nullptr, U16Result.capacity(), U16Result.data(), nullptr); } while (Len > U16Result.capacity()); @@ -132,7 +139,7 @@ static HANDLE RedirectIO(const StringRef *path, int fd, std::string* ErrMsg) { FILE_ATTRIBUTE_NORMAL, NULL); if (h == INVALID_HANDLE_VALUE) { MakeErrMsg(ErrMsg, fname + ": Can't open file for " + - (fd ? "input: " : "output: ")); + (fd ? "input" : "output")); } return h; @@ -251,6 +258,14 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, return false; } + // can_execute may succeed by looking at Program + ".exe". CreateProcessW + // will implicitly add the .exe if we provide a command line without an + // executable path, but since we use an explicit executable, we have to add + // ".exe" ourselves. + SmallString<64> ProgramStorage; + if (!sys::fs::exists(Program)) + Program = Twine(Program + ".exe").toStringRef(ProgramStorage); + // Windows wants a command line, not an array of args, to pass to the new // process. We have to concatenate them all, while quoting the args that // have embedded spaces (or are empty). @@ -416,7 +431,7 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, if (SecondsToWait) { if (!TerminateProcess(PI.ProcessHandle, 1)) { if (ErrMsg) - MakeErrMsg(ErrMsg, "Failed to terminate timed-out program."); + MakeErrMsg(ErrMsg, "Failed to terminate timed-out program"); // -2 indicates a crash or timeout as opposed to failure to execute. WaitResult.ReturnCode = -2; @@ -441,7 +456,7 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, if (!rc) { SetLastError(err); if (ErrMsg) - MakeErrMsg(ErrMsg, "Failed getting status for program."); + MakeErrMsg(ErrMsg, "Failed getting status for program"); // -2 indicates a crash or timeout as opposed to failure to execute. WaitResult.ReturnCode = -2; diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc index 5c8c23978ac9..d109a66d7035 100644 --- a/lib/Support/Windows/Signals.inc +++ b/lib/Support/Windows/Signals.inc @@ -14,7 +14,6 @@ #include #include #include -#include #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -136,6 +135,10 @@ typedef BOOL (WINAPI *fpSymGetLineFromAddr64)(HANDLE, DWORD64, PDWORD, PIMAGEHLP_LINE64); static fpSymGetLineFromAddr64 fSymGetLineFromAddr64; +typedef BOOL(WINAPI *fpSymGetModuleInfo64)(HANDLE hProcess, DWORD64 dwAddr, + PIMAGEHLP_MODULE64 ModuleInfo); +static fpSymGetModuleInfo64 fSymGetModuleInfo64; + typedef PVOID (WINAPI *fpSymFunctionTableAccess64)(HANDLE, DWORD64); static fpSymFunctionTableAccess64 fSymFunctionTableAccess64; @@ -145,6 +148,9 @@ static fpSymSetOptions fSymSetOptions; typedef BOOL (WINAPI *fpSymInitialize)(HANDLE, PCSTR, BOOL); static fpSymInitialize fSymInitialize; +typedef BOOL (WINAPI *fpEnumerateLoadedModules)(HANDLE,PENUMLOADED_MODULES_CALLBACK64,PVOID); +static fpEnumerateLoadedModules fEnumerateLoadedModules; + static bool load64BitDebugHelp(void) { HMODULE hLib = ::LoadLibraryW(L"Dbghelp.dll"); if (hLib) { @@ -156,14 +162,20 @@ static bool load64BitDebugHelp(void) { ::GetProcAddress(hLib, "SymGetSymFromAddr64"); fSymGetLineFromAddr64 = (fpSymGetLineFromAddr64) ::GetProcAddress(hLib, "SymGetLineFromAddr64"); + fSymGetModuleInfo64 = (fpSymGetModuleInfo64) + ::GetProcAddress(hLib, "SymGetModuleInfo64"); fSymFunctionTableAccess64 = (fpSymFunctionTableAccess64) ::GetProcAddress(hLib, "SymFunctionTableAccess64"); fSymSetOptions = (fpSymSetOptions)::GetProcAddress(hLib, "SymSetOptions"); fSymInitialize = (fpSymInitialize)::GetProcAddress(hLib, "SymInitialize"); + fEnumerateLoadedModules = (fpEnumerateLoadedModules) + ::GetProcAddress(hLib, "EnumerateLoadedModules64"); } return fStackWalk64 && fSymInitialize && fSymSetOptions; } +using namespace llvm; + // Forward declare. static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep); static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType); @@ -172,7 +184,6 @@ static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType); static void (*InterruptFunction)() = 0; static std::vector *FilesToRemove = NULL; -static std::vector > *CallBacksToRun = 0; static bool RegisteredUnhandledExceptionFilter = false; static bool CleanupExecuted = false; static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL; @@ -183,23 +194,106 @@ static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL; static CRITICAL_SECTION CriticalSection; static bool CriticalSectionInitialized = false; +enum { +#if defined(_M_X64) + NativeMachineType = IMAGE_FILE_MACHINE_AMD64 +#else + NativeMachineType = IMAGE_FILE_MACHINE_I386 +#endif +}; + +static bool printStackTraceWithLLVMSymbolizer(llvm::raw_ostream &OS, + HANDLE hProcess, HANDLE hThread, + STACKFRAME64 &StackFrameOrig, + CONTEXT *ContextOrig) { + // StackWalk64 modifies the incoming stack frame and context, so copy them. + STACKFRAME64 StackFrame = StackFrameOrig; + + // Copy the register context so that we don't modify it while we unwind. We + // could use InitializeContext + CopyContext, but that's only required to get + // at AVX registers, which typically aren't needed by StackWalk64. Reduce the + // flag set to indicate that there's less data. + CONTEXT Context = *ContextOrig; + Context.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER; + + static void *StackTrace[256]; + size_t Depth = 0; + while (fStackWalk64(NativeMachineType, hProcess, hThread, &StackFrame, + &Context, 0, fSymFunctionTableAccess64, + fSymGetModuleBase64, 0)) { + if (StackFrame.AddrFrame.Offset == 0) + break; + StackTrace[Depth++] = (void *)(uintptr_t)StackFrame.AddrPC.Offset; + if (Depth >= array_lengthof(StackTrace)) + break; + } + + return printSymbolizedStackTrace(&StackTrace[0], Depth, OS); +} + +namespace { +struct FindModuleData { + void **StackTrace; + int Depth; + const char **Modules; + intptr_t *Offsets; + StringSaver *StrPool; +}; +} + +static BOOL CALLBACK findModuleCallback(WIN32_ELMCB_PCSTR ModuleName, + DWORD64 ModuleBase, ULONG ModuleSize, + void *VoidData) { + FindModuleData *Data = (FindModuleData*)VoidData; + intptr_t Beg = ModuleBase; + intptr_t End = Beg + ModuleSize; + for (int I = 0; I < Data->Depth; I++) { + if (Data->Modules[I]) + continue; + intptr_t Addr = (intptr_t)Data->StackTrace[I]; + if (Beg <= Addr && Addr < End) { + Data->Modules[I] = Data->StrPool->save(ModuleName); + Data->Offsets[I] = Addr - Beg; + } + } + return TRUE; +} + +static bool findModulesAndOffsets(void **StackTrace, int Depth, + const char **Modules, intptr_t *Offsets, + const char *MainExecutableName, + StringSaver &StrPool) { + if (!fEnumerateLoadedModules) + return false; + FindModuleData Data; + Data.StackTrace = StackTrace; + Data.Depth = Depth; + Data.Modules = Modules; + Data.Offsets = Offsets; + Data.StrPool = &StrPool; + fEnumerateLoadedModules(GetCurrentProcess(), findModuleCallback, &Data); + return true; +} + static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess, HANDLE hThread, STACKFRAME64 &StackFrame, CONTEXT *Context) { - DWORD machineType; -#if defined(_M_X64) - machineType = IMAGE_FILE_MACHINE_AMD64; -#else - machineType = IMAGE_FILE_MACHINE_I386; -#endif - // Initialize the symbol handler. fSymSetOptions(SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES); fSymInitialize(hProcess, NULL, TRUE); + // Try llvm-symbolizer first. llvm-symbolizer knows how to deal with both PDBs + // and DWARF, so it should do a good job regardless of what debug info or + // linker is in use. + if (printStackTraceWithLLVMSymbolizer(OS, hProcess, hThread, StackFrame, + Context)) { + return; + } + while (true) { - if (!fStackWalk64(machineType, hProcess, hThread, &StackFrame, Context, 0, - fSymFunctionTableAccess64, fSymGetModuleBase64, 0)) { + if (!fStackWalk64(NativeMachineType, hProcess, hThread, &StackFrame, + Context, 0, fSymFunctionTableAccess64, + fSymGetModuleBase64, 0)) { break; } @@ -311,10 +405,7 @@ static void RegisterHandler() { // If we cannot load up the APIs (which would be unexpected as they should // exist on every version of Windows we support), we will bail out since // there would be nothing to report. - if (!load64BitDebugHelp()) { - assert(false && "These APIs should always be available"); - return; - } + assert(load64BitDebugHelp() && "These APIs should always be available"); if (RegisteredUnhandledExceptionFilter) { EnterCriticalSection(&CriticalSection); @@ -404,7 +495,6 @@ extern "C" VOID WINAPI RtlCaptureContext(PCONTEXT ContextRecord); #endif void llvm::sys::PrintStackTrace(raw_ostream &OS) { - STACKFRAME64 StackFrame = {}; CONTEXT Context = {}; ::RtlCaptureContext(&Context); @@ -436,8 +526,6 @@ void llvm::sys::SetInterruptFunction(void (*IF)()) { /// to the process. The handler can have a cookie passed to it to identify /// what instance of the handler it is. void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) { - if (CallBacksToRun == 0) - CallBacksToRun = new std::vector >(); CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie)); RegisterHandler(); LeaveCriticalSection(&CriticalSection); @@ -454,17 +542,12 @@ static void Cleanup() { CleanupExecuted = true; // FIXME: open files cannot be deleted. - if (FilesToRemove != NULL) while (!FilesToRemove->empty()) { llvm::sys::fs::remove(FilesToRemove->back()); FilesToRemove->pop_back(); } - - if (CallBacksToRun) - for (auto &I : *CallBacksToRun) - I.first(I.second); - + llvm::sys::RunSignalHandlers(); LeaveCriticalSection(&CriticalSection); } diff --git a/lib/Support/Windows/WindowsSupport.h b/lib/Support/Windows/WindowsSupport.h index 5bb0b8d2d788..34d961b148d1 100644 --- a/lib/Support/Windows/WindowsSupport.h +++ b/lib/Support/Windows/WindowsSupport.h @@ -26,12 +26,13 @@ #undef _WIN32_WINNT #undef _WIN32_IE -// Require at least Windows XP(5.1) API. -#define _WIN32_WINNT 0x0501 -#define _WIN32_IE 0x0600 // MinGW at it again. +// Require at least Windows 7 API. +#define _WIN32_WINNT 0x0601 +#define _WIN32_IE 0x0800 // MinGW at it again. FIXME: verify if still needed. #define WIN32_LEAN_AND_MEAN #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Config/config.h" // Get build system configuration settings @@ -47,13 +48,16 @@ inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) { if (!ErrMsg) return true; char *buffer = NULL; + DWORD LastError = GetLastError(); DWORD R = FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM, - NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL); + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, LastError, 0, (LPSTR)&buffer, 1, NULL); if (R) - *ErrMsg = prefix + buffer; + *ErrMsg = prefix + ": " + buffer; else - *ErrMsg = prefix + "Unknown error"; + *ErrMsg = prefix + ": Unknown error"; + *ErrMsg += " (0x" + llvm::utohexstr(LastError) + ")"; LocalFree(buffer); return R != 0; diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp index d55da5ef1e4a..c4384cafff62 100644 --- a/lib/Support/YAMLParser.cpp +++ b/lib/Support/YAMLParser.cpp @@ -801,7 +801,7 @@ Token &Scanner::peekNext() { removeStaleSimpleKeyCandidates(); SimpleKey SK; - SK.Tok = TokenQueue.front(); + SK.Tok = TokenQueue.begin(); if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK) == SimpleKeys.end()) break; @@ -962,10 +962,8 @@ void Scanner::skip(uint32_t Distance) { bool Scanner::isBlankOrBreak(StringRef::iterator Position) { if (Position == End) return false; - if ( *Position == ' ' || *Position == '\t' - || *Position == '\r' || *Position == '\n') - return true; - return false; + return *Position == ' ' || *Position == '\t' || *Position == '\r' || + *Position == '\n'; } bool Scanner::consumeLineBreakIfPresent() { @@ -1163,7 +1161,7 @@ bool Scanner::scanFlowCollectionStart(bool IsSequence) { TokenQueue.push_back(T); // [ and { may begin a simple key. - saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false); + saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false); // And may also be followed by a simple key. IsSimpleKeyAllowed = true; @@ -1326,7 +1324,7 @@ bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { T.Range = StringRef(Start, Current - Start); TokenQueue.push_back(T); - saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); + saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); IsSimpleKeyAllowed = false; @@ -1404,7 +1402,7 @@ bool Scanner::scanPlainScalar() { TokenQueue.push_back(T); // Plain scalars can be simple keys. - saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); + saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); IsSimpleKeyAllowed = false; @@ -1439,7 +1437,7 @@ bool Scanner::scanAliasOrAnchor(bool IsAlias) { TokenQueue.push_back(T); // Alias and anchors can be simple keys. - saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); + saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); IsSimpleKeyAllowed = false; @@ -1669,7 +1667,7 @@ bool Scanner::scanTag() { TokenQueue.push_back(T); // Tags can be simple keys. - saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); + saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false); IsSimpleKeyAllowed = false; diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp index 6b59a16514b1..2aa6e9b74683 100644 --- a/lib/Support/YAMLTraits.cpp +++ b/lib/Support/YAMLTraits.cpp @@ -332,17 +332,12 @@ std::unique_ptr Input::createHNodes(Node *N) { StringRef KeyStr = SN->getValue(StringStorage); if (!StringStorage.empty()) { // Copy string to permanent storage - unsigned Len = StringStorage.size(); - char *Buf = StringAllocator.Allocate(Len); - memcpy(Buf, &StringStorage[0], Len); - KeyStr = StringRef(Buf, Len); + KeyStr = StringStorage.str().copy(StringAllocator); } return llvm::make_unique(N, KeyStr); } else if (BlockScalarNode *BSN = dyn_cast(N)) { - StringRef Value = BSN->getValue(); - char *Buf = StringAllocator.Allocate(Value.size()); - memcpy(Buf, Value.data(), Value.size()); - return llvm::make_unique(N, StringRef(Buf, Value.size())); + StringRef ValueCopy = BSN->getValue().copy(StringAllocator); + return llvm::make_unique(N, ValueCopy); } else if (SequenceNode *SQ = dyn_cast(N)) { auto SQHNode = llvm::make_unique(N); for (Node &SN : *SQ) { @@ -365,10 +360,7 @@ std::unique_ptr Input::createHNodes(Node *N) { StringRef KeyStr = KeyScalar->getValue(StringStorage); if (!StringStorage.empty()) { // Copy string to permanent storage - unsigned Len = StringStorage.size(); - char *Buf = StringAllocator.Allocate(Len); - memcpy(Buf, &StringStorage[0], Len); - KeyStr = StringRef(Buf, Len); + KeyStr = StringStorage.str().copy(StringAllocator); } auto ValueHNode = this->createHNodes(KVN.getValue()); if (EC) diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 42f830bbf0fa..57c7ac32f559 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -517,7 +517,7 @@ raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC, /// closes the file when the stream is destroyed. raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered) : raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose), - Error(false), UseAtomicWrites(false) { + Error(false) { if (FD < 0 ) { ShouldClose = false; return; @@ -568,21 +568,7 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { pos += Size; do { - ssize_t ret; - - // Check whether we should attempt to use atomic writes. - if (LLVM_LIKELY(!UseAtomicWrites)) { - ret = ::write(FD, Ptr, Size); - } else { - // Use ::writev() where available. -#if defined(HAVE_WRITEV) - const void *Addr = static_cast(Ptr); - struct iovec IOV = {const_cast(Addr), Size }; - ret = ::writev(FD, &IOV, 1); -#else - ret = ::write(FD, Ptr, Size); -#endif - } + ssize_t ret = ::write(FD, Ptr, Size); if (ret < 0) { // If it's a recoverable error, swallow it and retry the write. @@ -755,72 +741,15 @@ void raw_string_ostream::write_impl(const char *Ptr, size_t Size) { // raw_svector_ostream //===----------------------------------------------------------------------===// -// The raw_svector_ostream implementation uses the SmallVector itself as the -// buffer for the raw_ostream. We guarantee that the raw_ostream buffer is -// always pointing past the end of the vector, but within the vector -// capacity. This allows raw_ostream to write directly into the correct place, -// and we only need to set the vector size when the data is flushed. +uint64_t raw_svector_ostream::current_pos() const { return OS.size(); } -raw_svector_ostream::raw_svector_ostream(SmallVectorImpl &O, unsigned) - : OS(O) {} - -raw_svector_ostream::raw_svector_ostream(SmallVectorImpl &O) : OS(O) { - init(); -} - -void raw_svector_ostream::init() { - // Set up the initial external buffer. We make sure that the buffer has at - // least 128 bytes free; raw_ostream itself only requires 64, but we want to - // make sure that we don't grow the buffer unnecessarily on destruction (when - // the data is flushed). See the FIXME below. - OS.reserve(OS.size() + 128); - SetBuffer(OS.end(), OS.capacity() - OS.size()); -} - -raw_svector_ostream::~raw_svector_ostream() { - // FIXME: Prevent resizing during this flush(). - flush(); +void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) { + OS.append(Ptr, Ptr + Size); } void raw_svector_ostream::pwrite_impl(const char *Ptr, size_t Size, uint64_t Offset) { - flush(); - memcpy(OS.begin() + Offset, Ptr, Size); -} - -/// resync - This is called when the SmallVector we're appending to is changed -/// outside of the raw_svector_ostream's control. It is only safe to do this -/// if the raw_svector_ostream has previously been flushed. -void raw_svector_ostream::resync() { - assert(GetNumBytesInBuffer() == 0 && "Didn't flush before mutating vector"); - - if (OS.capacity() - OS.size() < 64) - OS.reserve(OS.capacity() * 2); - SetBuffer(OS.end(), OS.capacity() - OS.size()); -} - -void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) { - if (Ptr == OS.end()) { - // Grow the buffer to include the scratch area without copying. - size_t NewSize = OS.size() + Size; - assert(NewSize <= OS.capacity() && "Invalid write_impl() call!"); - OS.set_size(NewSize); - } else { - assert(!GetNumBytesInBuffer()); - OS.append(Ptr, Ptr + Size); - } - - OS.reserve(OS.size() + 64); - SetBuffer(OS.end(), OS.capacity() - OS.size()); -} - -uint64_t raw_svector_ostream::current_pos() const { - return OS.size(); -} - -StringRef raw_svector_ostream::str() { - flush(); - return StringRef(OS.begin(), OS.size()); + memcpy(OS.data() + Offset, Ptr, Size); } //===----------------------------------------------------------------------===// diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index c9a31b64cfd3..87a3422b32ab 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -673,6 +673,14 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { PrintFatalError(CurRec->getLoc(), "Undefined reference:'" + Name + "'\n"); } + + if (isa(getType())) { + if (BitsInit *BI = dyn_cast(LHS)) { + if (Init *NewInit = BI->convertInitializerTo(IntRecTy::get())) + return NewInit; + break; + } + } } break; } @@ -1633,7 +1641,7 @@ void Record::dump() const { errs() << *this; } raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) { OS << R.getNameInitAsString(); - const std::vector &TArgs = R.getTemplateArgs(); + ArrayRef TArgs = R.getTemplateArgs(); if (!TArgs.empty()) { OS << "<"; bool NeedComma = false; diff --git a/lib/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp index 07c538159dcb..f56b17acbfba 100644 --- a/lib/TableGen/SetTheory.cpp +++ b/lib/TableGen/SetTheory.cpp @@ -196,7 +196,7 @@ struct SequenceOp : public SetTheory::Operator { if (IntInit *II = dyn_cast(Expr->arg_begin()[2])) To = II->getValue(); else - PrintFatalError(Loc, "From must be an integer: " + Expr->getAsString()); + PrintFatalError(Loc, "To must be an integer: " + Expr->getAsString()); if (To < 0 || To >= (1 << 30)) PrintFatalError(Loc, "To out of range"); diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 5c36fda2e1ca..e5f6f165d13f 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -152,7 +152,7 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) { if (AddValue(CurRec, SubClass.RefRange.Start, Val)) return true; - const std::vector &TArgs = SC->getTemplateArgs(); + ArrayRef TArgs = SC->getTemplateArgs(); // Ensure that an appropriate number of template arguments are specified. if (TArgs.size() < SubClass.TemplateArgs.size()) @@ -228,7 +228,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC, CurMC->DefPrototypes.push_back(std::move(NewDef)); } - const std::vector &SMCTArgs = SMC->Rec.getTemplateArgs(); + ArrayRef SMCTArgs = SMC->Rec.getTemplateArgs(); // Ensure that an appropriate number of template arguments are // specified. @@ -1641,7 +1641,7 @@ std::vector TGParser::ParseValueList(Record *CurRec, Record *ArgsRec, RecTy *ItemType = EltTy; unsigned int ArgN = 0; if (ArgsRec && !EltTy) { - const std::vector &TArgs = ArgsRec->getTemplateArgs(); + ArrayRef TArgs = ArgsRec->getTemplateArgs(); if (TArgs.empty()) { TokError("template argument provided to non-template class"); return std::vector(); @@ -1662,7 +1662,7 @@ std::vector TGParser::ParseValueList(Record *CurRec, Record *ArgsRec, Lex.Lex(); // Eat the comma if (ArgsRec && !EltTy) { - const std::vector &TArgs = ArgsRec->getTemplateArgs(); + ArrayRef TArgs = ArgsRec->getTemplateArgs(); if (ArgN >= TArgs.size()) { TokError("too many template arguments"); return std::vector(); @@ -2313,13 +2313,11 @@ bool TGParser::ParseMultiClass() { return false; } -Record *TGParser:: -InstantiateMulticlassDef(MultiClass &MC, - Record *DefProto, - Init *&DefmPrefix, - SMRange DefmPrefixRange, - const std::vector &TArgs, - std::vector &TemplateVals) { +Record *TGParser::InstantiateMulticlassDef(MultiClass &MC, Record *DefProto, + Init *&DefmPrefix, + SMRange DefmPrefixRange, + ArrayRef TArgs, + std::vector &TemplateVals) { // We need to preserve DefProto so it can be reused for later // instantiations, so create a new Record to inherit from it. @@ -2437,11 +2435,9 @@ InstantiateMulticlassDef(MultiClass &MC, return CurRec.release(); } -bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC, - Record *CurRec, - SMLoc DefmPrefixLoc, - SMLoc SubClassLoc, - const std::vector &TArgs, +bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC, Record *CurRec, + SMLoc DefmPrefixLoc, SMLoc SubClassLoc, + ArrayRef TArgs, std::vector &TemplateVals, bool DeleteArgs) { // Loop over all of the template arguments, setting them to the specified @@ -2540,7 +2536,7 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) { std::vector &TemplateVals = Ref.TemplateArgs; // Verify that the correct number of template arguments were specified. - const std::vector &TArgs = MC->Rec.getTemplateArgs(); + ArrayRef TArgs = MC->Rec.getTemplateArgs(); if (TArgs.size() < TemplateVals.size()) return Error(SubClassLoc, "more template args specified than multiclass expects"); diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h index d69d1f4572f9..8b41134d4ff1 100644 --- a/lib/TableGen/TGParser.h +++ b/lib/TableGen/TGParser.h @@ -135,17 +135,13 @@ private: // Parser methods. bool ParseObject(MultiClass *MC); bool ParseClass(); bool ParseMultiClass(); - Record *InstantiateMulticlassDef(MultiClass &MC, - Record *DefProto, - Init *&DefmPrefix, - SMRange DefmPrefixRange, - const std::vector &TArgs, + Record *InstantiateMulticlassDef(MultiClass &MC, Record *DefProto, + Init *&DefmPrefix, SMRange DefmPrefixRange, + ArrayRef TArgs, std::vector &TemplateVals); - bool ResolveMulticlassDefArgs(MultiClass &MC, - Record *DefProto, - SMLoc DefmPrefixLoc, - SMLoc SubClassLoc, - const std::vector &TArgs, + bool ResolveMulticlassDefArgs(MultiClass &MC, Record *DefProto, + SMLoc DefmPrefixLoc, SMLoc SubClassLoc, + ArrayRef TArgs, std::vector &TemplateVals, bool DeleteArgs); bool ResolveMulticlassDef(MultiClass &MC, diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index 9a7d6c884db5..0bff9b592c15 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -32,6 +32,15 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable ARMv8 CRC-32 checksum instructions">; +def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", + "Enable ARMv8 PMUv3 Performance Monitors extension">; + +def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", + "Full FP16", [FeatureFPARMv8]>; + +def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true", + "Enable Statistical Profiling extension">; + /// Cyclone has register move instructions which are "free". def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", "Has zero-cycle register moves">; @@ -40,6 +49,15 @@ def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", "Has zero-cycle zeroing instructions">; +def FeatureStrictAlign : SubtargetFeature<"strict-align", + "StrictAlign", "true", + "Disallow all unaligned memory " + "access">; + +def FeatureReserveX18 : SubtargetFeature<"reserve-x18", "ReserveX18", "true", + "Reserve X18, making it unavailable " + "as a GPR">; + //===----------------------------------------------------------------------===// // Architectures. // @@ -47,6 +65,9 @@ def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", "Support ARM v8.1a instructions", [FeatureCRC]>; +def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", + "Support ARM v8.2a instructions", [HasV8_1aOps]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -70,19 +91,29 @@ include "AArch64SchedA53.td" include "AArch64SchedA57.td" include "AArch64SchedCyclone.td" +def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", + "Cortex-A35 ARM processors", + [FeatureFPARMv8, + FeatureNEON, + FeatureCrypto, + FeatureCRC, + FeaturePerfMon]>; + def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", "Cortex-A53 ARM processors", [FeatureFPARMv8, FeatureNEON, FeatureCrypto, - FeatureCRC]>; + FeatureCRC, + FeaturePerfMon]>; def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", "Cortex-A57 ARM processors", [FeatureFPARMv8, FeatureNEON, FeatureCrypto, - FeatureCRC]>; + FeatureCRC, + FeaturePerfMon]>; def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone", "Cyclone", @@ -90,12 +121,16 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone", FeatureNEON, FeatureCrypto, FeatureCRC, + FeaturePerfMon, FeatureZCRegMove, FeatureZCZeroing]>; def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8, FeatureNEON, - FeatureCRC]>; + FeatureCRC, + FeaturePerfMon]>; +// FIXME: Cortex-A35 is currently modelled as a Cortex-A53 +def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>; def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>; // FIXME: Cortex-A72 is currently modelled as an Cortex-A57. @@ -109,11 +144,13 @@ def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>; def GenericAsmParserVariant : AsmParserVariant { int Variant = 0; string Name = "generic"; + string BreakCharacters = "."; } def AppleAsmParserVariant : AsmParserVariant { int Variant = 1; string Name = "apple-neon"; + string BreakCharacters = "."; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64A53Fix835769.cpp b/lib/Target/AArch64/AArch64A53Fix835769.cpp index d7ef3f4ef653..d215d9e831c0 100644 --- a/lib/Target/AArch64/AArch64A53Fix835769.cpp +++ b/lib/Target/AArch64/AArch64A53Fix835769.cpp @@ -122,7 +122,7 @@ AArch64A53Fix835769::runOnMachineFunction(MachineFunction &F) { static MachineBasicBlock *getBBFallenThrough(MachineBasicBlock *MBB, const TargetInstrInfo *TII) { // Get the previous machine basic block in the function. - MachineFunction::iterator MBBI = *MBB; + MachineFunction::iterator MBBI(MBB); // Can't go off top of function. if (MBBI == MBB->getParent()->begin()) @@ -131,7 +131,7 @@ static MachineBasicBlock *getBBFallenThrough(MachineBasicBlock *MBB, MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; - MachineBasicBlock *PrevBB = std::prev(MBBI); + MachineBasicBlock *PrevBB = &*std::prev(MBBI); for (MachineBasicBlock *S : MBB->predecessors()) if (S == PrevBB && !TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond) && !TBB && !FBB) @@ -151,10 +151,9 @@ static MachineInstr *getLastNonPseudo(MachineBasicBlock &MBB, // If there is no non-pseudo in the current block, loop back around and try // the previous block (if there is one). while ((FMBB = getBBFallenThrough(FMBB, TII))) { - for (auto I = FMBB->rbegin(), E = FMBB->rend(); I != E; ++I) { - if (!I->isPseudo()) - return &*I; - } + for (MachineInstr &I : make_range(FMBB->rbegin(), FMBB->rend())) + if (!I.isPseudo()) + return &I; } // There was no previous non-pseudo in the fallen through blocks @@ -217,8 +216,8 @@ AArch64A53Fix835769::runOnBasicBlock(MachineBasicBlock &MBB) { ++Idx; } - DEBUG(dbgs() << "Scan complete, "<< Sequences.size() - << " occurences of pattern found.\n"); + DEBUG(dbgs() << "Scan complete, " << Sequences.size() + << " occurrences of pattern found.\n"); // Then update the basic block, inserting nops between the detected sequences. for (auto &MI : Sequences) { diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp index 9d6dbd641a16..79a84ad8c6c5 100644 --- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp +++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp @@ -593,7 +593,6 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C, if (Change) { Substs[MO.getReg()] = Reg; MO.setReg(Reg); - MRI->setPhysRegUsed(Reg); Changed = true; } diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp index 716e1a37b1f7..3afcdfb8b930 100644 --- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp +++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp @@ -57,6 +57,8 @@ EnableMerge("aarch64-type-promotion-merge", cl::Hidden, " the other."), cl::init(true)); +#define AARCH64_TYPE_PROMO_NAME "AArch64 Address Type Promotion" + //===----------------------------------------------------------------------===// // AArch64AddressTypePromotion //===----------------------------------------------------------------------===// @@ -76,7 +78,7 @@ public: } const char *getPassName() const override { - return "AArch64 Address Type Promotion"; + return AARCH64_TYPE_PROMO_NAME; } /// Iterate over the functions and promote the computation of interesting @@ -143,10 +145,10 @@ private: char AArch64AddressTypePromotion::ID = 0; INITIALIZE_PASS_BEGIN(AArch64AddressTypePromotion, "aarch64-type-promotion", - "AArch64 Type Promotion Pass", false, false) + AARCH64_TYPE_PROMO_NAME, false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(AArch64AddressTypePromotion, "aarch64-type-promotion", - "AArch64 Type Promotion Pass", false, false) + AARCH64_TYPE_PROMO_NAME, false, false) FunctionPass *llvm::createAArch64AddressTypePromotionPass() { return new AArch64AddressTypePromotion(); diff --git a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp index 18d21fd38618..1644d71d2821 100644 --- a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp +++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp @@ -61,6 +61,12 @@ STATISTIC(NumScalarInsnsUsed, "Number of scalar instructions used"); STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted"); STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted"); +namespace llvm { +void initializeAArch64AdvSIMDScalarPass(PassRegistry &); +} + +#define AARCH64_ADVSIMD_NAME "AdvSIMD Scalar Operation Optimization" + namespace { class AArch64AdvSIMDScalar : public MachineFunctionPass { MachineRegisterInfo *MRI; @@ -82,12 +88,14 @@ private: public: static char ID; // Pass identification, replacement for typeid. - explicit AArch64AdvSIMDScalar() : MachineFunctionPass(ID) {} + explicit AArch64AdvSIMDScalar() : MachineFunctionPass(ID) { + initializeAArch64AdvSIMDScalarPass(*PassRegistry::getPassRegistry()); + } bool runOnMachineFunction(MachineFunction &F) override; const char *getPassName() const override { - return "AdvSIMD Scalar Operation Optimization"; + return AARCH64_ADVSIMD_NAME; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -98,6 +106,9 @@ public: char AArch64AdvSIMDScalar::ID = 0; } // end anonymous namespace +INITIALIZE_PASS(AArch64AdvSIMDScalar, "aarch64-simd-scalar", + AARCH64_ADVSIMD_NAME, false, false) + static bool isGPR64(unsigned Reg, unsigned SubReg, const MachineRegisterInfo *MRI) { if (SubReg) @@ -381,7 +392,7 @@ bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) { // Just check things on a one-block-at-a-time basis. for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) - if (processMachineBasicBlock(I)) + if (processMachineBasicBlock(&*I)) Changed = true; return Changed; } diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp index d973234dd86a..a614f555a4e9 100644 --- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp +++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp @@ -45,6 +45,12 @@ BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), STATISTIC(NumSplit, "Number of basic blocks split"); STATISTIC(NumRelaxed, "Number of conditional branches relaxed"); +namespace llvm { +void initializeAArch64BranchRelaxationPass(PassRegistry &); +} + +#define AARCH64_BR_RELAX_NAME "AArch64 branch relaxation pass" + namespace { class AArch64BranchRelaxation : public MachineFunctionPass { /// BasicBlockInfo - Information about the offset and size of a single @@ -93,17 +99,22 @@ class AArch64BranchRelaxation : public MachineFunctionPass { public: static char ID; - AArch64BranchRelaxation() : MachineFunctionPass(ID) {} + AArch64BranchRelaxation() : MachineFunctionPass(ID) { + initializeAArch64BranchRelaxationPass(*PassRegistry::getPassRegistry()); + } bool runOnMachineFunction(MachineFunction &MF) override; const char *getPassName() const override { - return "AArch64 branch relaxation pass"; + return AARCH64_BR_RELAX_NAME; } }; char AArch64BranchRelaxation::ID = 0; } +INITIALIZE_PASS(AArch64BranchRelaxation, "aarch64-branch-relax", + AARCH64_BR_RELAX_NAME, false, false) + /// verify - check BBOffsets, BBSizes, alignment of islands void AArch64BranchRelaxation::verify() { #ifndef NDEBUG @@ -131,14 +142,14 @@ void AArch64BranchRelaxation::dumpBBs() { /// into the block immediately after it. static bool BBHasFallthrough(MachineBasicBlock *MBB) { // Get the next machine basic block in the function. - MachineFunction::iterator MBBI = MBB; + MachineFunction::iterator MBBI(MBB); // Can't fall off end of function. - MachineBasicBlock *NextBB = std::next(MBBI); + auto NextBB = std::next(MBBI); if (NextBB == MBB->getParent()->end()) return false; for (MachineBasicBlock *S : MBB->successors()) - if (S == NextBB) + if (S == &*NextBB) return true; return false; @@ -216,9 +227,7 @@ AArch64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) { // Create a new MBB for the code after the OrigBB. MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); - MachineFunction::iterator MBBI = OrigBB; - ++MBBI; - MF->insert(MBBI, NewBB); + MF->insert(++OrigBB->getIterator(), NewBB); // Splice the instructions starting with MI over to NewBB. NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); @@ -421,7 +430,7 @@ bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) { MBB->replaceSuccessor(FBB, NewBB); NewBB->addSuccessor(FBB); } - MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB)); + MachineBasicBlock *NextBB = &*std::next(MachineFunction::iterator(MBB)); DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() << ", invert condition and change dest. to BB#" diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h index 1e2d1c3b93bd..bc44bc5f2461 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.h +++ b/lib/Target/AArch64/AArch64CallingConvention.h @@ -25,30 +25,28 @@ namespace { using namespace llvm; -static const uint16_t XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2, - AArch64::X3, AArch64::X4, AArch64::X5, - AArch64::X6, AArch64::X7}; -static const uint16_t HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2, - AArch64::H3, AArch64::H4, AArch64::H5, - AArch64::H6, AArch64::H7}; -static const uint16_t SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2, - AArch64::S3, AArch64::S4, AArch64::S5, - AArch64::S6, AArch64::S7}; -static const uint16_t DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2, - AArch64::D3, AArch64::D4, AArch64::D5, - AArch64::D6, AArch64::D7}; -static const uint16_t QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2, - AArch64::Q3, AArch64::Q4, AArch64::Q5, - AArch64::Q6, AArch64::Q7}; +static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2, + AArch64::X3, AArch64::X4, AArch64::X5, + AArch64::X6, AArch64::X7}; +static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2, + AArch64::H3, AArch64::H4, AArch64::H5, + AArch64::H6, AArch64::H7}; +static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2, + AArch64::S3, AArch64::S4, AArch64::S5, + AArch64::S6, AArch64::S7}; +static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2, + AArch64::D3, AArch64::D4, AArch64::D5, + AArch64::D6, AArch64::D7}; +static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2, + AArch64::Q3, AArch64::Q4, AArch64::Q5, + AArch64::Q6, AArch64::Q7}; static bool finishStackBlock(SmallVectorImpl &PendingMembers, MVT LocVT, ISD::ArgFlagsTy &ArgFlags, CCState &State, unsigned SlotAlign) { unsigned Size = LocVT.getSizeInBits() / 8; - unsigned StackAlign = State.getMachineFunction() - .getTarget() - .getDataLayout() - ->getStackAlignment(); + unsigned StackAlign = + State.getMachineFunction().getDataLayout().getStackAlignment(); unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign); for (auto &It : PendingMembers) { @@ -88,7 +86,7 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, ISD::ArgFlagsTy &ArgFlags, CCState &State) { // Try to allocate a contiguous block of registers, each of the correct // size to hold one member. - ArrayRef RegList; + ArrayRef RegList; if (LocVT.SimpleTy == MVT::i64) RegList = XRegList; else if (LocVT.SimpleTy == MVT::f16) diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index 815ebef177d8..388d64ec4e99 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -16,7 +16,7 @@ class CCIfAlign : CCIf; /// CCIfBigEndian - Match only if we're in big endian mode. class CCIfBigEndian : - CCIf<"State.getMachineFunction().getTarget().getDataLayout()->isBigEndian()", A>; + CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>; //===----------------------------------------------------------------------===// // ARM AAPCS64 Calling Convention @@ -279,6 +279,23 @@ def CSR_AArch64_TLS_Darwin FP, (sequence "Q%u", 0, 31))>; +// We can only handle a register pair with adjacent registers, the register pair +// should belong to the same class as well. Since the access function on the +// fast path calls a function that follows CSR_AArch64_TLS_Darwin, +// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin. +def CSR_AArch64_CXX_TLS_Darwin + : CalleeSavedRegs<(add CSR_AArch64_AAPCS, + (sub (sequence "X%u", 1, 28), X15, X16, X17, X18), + (sequence "D%u", 0, 31))>; + +// CSRs that are handled by prologue, epilogue. +def CSR_AArch64_CXX_TLS_Darwin_PE + : CalleeSavedRegs<(add LR, FP)>; + +// CSRs that are handled explicitly via copies. +def CSR_AArch64_CXX_TLS_Darwin_ViaCopy + : CalleeSavedRegs<(sub CSR_AArch64_CXX_TLS_Darwin, LR, FP)>; + // The ELF stub used for TLS-descriptor access saves every feasible // register. Only X0 and LR are clobbered. def CSR_AArch64_TLS_ELF diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp index 06ff9af37fd7..9310ac4a44a2 100644 --- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp +++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp @@ -117,10 +117,10 @@ struct LDTLSCleanup : public MachineFunctionPass { *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass); // Insert a copy from X0 to TLSBaseAddrReg for later. - MachineInstr *Next = I->getNextNode(); - MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), - TII->get(TargetOpcode::COPY), - *TLSBaseAddrReg).addReg(AArch64::X0); + MachineInstr *Copy = + BuildMI(*I->getParent(), ++I->getIterator(), I->getDebugLoc(), + TII->get(TargetOpcode::COPY), *TLSBaseAddrReg) + .addReg(AArch64::X0); return Copy; } diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp index efdb2e33a36e..78c239b11ef3 100644 --- a/lib/Target/AArch64/AArch64CollectLOH.cpp +++ b/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -168,6 +168,8 @@ namespace llvm { void initializeAArch64CollectLOHPass(PassRegistry &); } +#define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)" + namespace { struct AArch64CollectLOH : public MachineFunctionPass { static char ID; @@ -178,7 +180,7 @@ struct AArch64CollectLOH : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) override; const char *getPassName() const override { - return "AArch64 Collect Linker Optimization Hint (LOH)"; + return AARCH64_COLLECT_LOH_NAME; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -220,12 +222,10 @@ typedef SmallVector MapIdToReg; char AArch64CollectLOH::ID = 0; INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh", - "AArch64 Collect Linker Optimization Hint (LOH)", false, - false) + AARCH64_COLLECT_LOH_NAME, false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh", - "AArch64 Collect Linker Optimization Hint (LOH)", false, - false) + AARCH64_COLLECT_LOH_NAME, false, false) /// Given a couple (MBB, reg) get the corresponding set of instruction from /// the given "sets". @@ -353,9 +353,17 @@ static void initReachingDef(const MachineFunction &MF, for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) { MapRegToId::const_iterator ItRegId = RegToId.find(*AI); - assert(ItRegId != RegToId.end() && - "Sub-register of an " - "involved register, not recorded as involved!"); + // If this alias has not been recorded, then it is not interesting + // for the current analysis. + // We can end up in this situation because of tuple registers. + // E.g., Let say we are interested in S1. When we register + // S1, we will also register its aliases and in particular + // the tuple Q1_Q2. + // Now, when we encounter Q1_Q2, we will look through its aliases + // and will find that S2 is not registered. + if (ItRegId == RegToId.end()) + continue; + BBKillSet.set(ItRegId->second); BBGen[ItRegId->second] = &MI; } @@ -523,6 +531,8 @@ static bool isCandidateStore(const MachineInstr *Instr) { switch (Instr->getOpcode()) { default: return false; + case AArch64::STRBBui: + case AArch64::STRHHui: case AArch64::STRBui: case AArch64::STRHui: case AArch64::STRWui: @@ -884,7 +894,8 @@ static void computeOthers(const InstrToInstrs &UseToDefs, bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri); // If the chain is three instructions long and ldr is the second element, // then this ldr must load form GOT, otherwise this is not a correct chain. - if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != AArch64II::MO_GOT) + if (L2 && !IsL2Add && + !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)) continue; SmallVector Args; MCLOHType Kind; @@ -1000,7 +1011,8 @@ static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId, DEBUG(dbgs() << "** Collect Involved Register\n"); for (const auto &MBB : MF) { for (const MachineInstr &MI : MBB) { - if (!canDefBePartOfLOH(&MI)) + if (!canDefBePartOfLOH(&MI) && + !isCandidateLoad(&MI) && !isCandidateStore(&MI)) continue; // Process defs diff --git a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp index b9e41c61defe..fc27bfee73d1 100644 --- a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp +++ b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp @@ -59,6 +59,7 @@ //===----------------------------------------------------------------------===// #include "AArch64.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -153,13 +154,20 @@ MachineInstr *AArch64ConditionOptimizer::findSuitableCompare( case AArch64::SUBSXri: // cmn is an alias for adds with a dead destination register. case AArch64::ADDSWri: - case AArch64::ADDSXri: - if (MRI->use_empty(I->getOperand(0).getReg())) - return I; - - DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n'); - return nullptr; - + case AArch64::ADDSXri: { + unsigned ShiftAmt = AArch64_AM::getShiftValue(I->getOperand(3).getImm()); + if (!I->getOperand(2).isImm()) { + DEBUG(dbgs() << "Immediate of cmp is symbolic, " << *I << '\n'); + return nullptr; + } else if (I->getOperand(2).getImm() << ShiftAmt >= 0xfff) { + DEBUG(dbgs() << "Immediate of cmp may be out of range, " << *I << '\n'); + return nullptr; + } else if (!MRI->use_empty(I->getOperand(0).getReg())) { + DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n'); + return nullptr; + } + return I; + } // Prevent false positive case like: // cmp w19, #0 // cinc w0, w19, gt diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp index 2b0c92fe02d5..df1320fbd4c9 100644 --- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -353,7 +353,7 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) { MIOperands::PhysRegInfo PRI = MIOperands(I).analyzePhysReg(AArch64::NZCV, TRI); - if (PRI.Reads) { + if (PRI.Read) { // The ccmp doesn't produce exactly the same flags as the original // compare, so reject the transform if there are uses of the flags // besides the terminators. @@ -362,7 +362,7 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) { return nullptr; } - if (PRI.Clobbers) { + if (PRI.Defined || PRI.Clobbered) { DEBUG(dbgs() << "Not convertible compare: " << *I); ++NumUnknNZCVDefs; return nullptr; @@ -567,8 +567,8 @@ void SSACCmpConv::convert(SmallVectorImpl &RemovedBlocks) { // All CmpBB instructions are moved into Head, and CmpBB is deleted. // Update the CFG first. updateTailPHIs(); - Head->removeSuccessor(CmpBB); - CmpBB->removeSuccessor(Tail); + Head->removeSuccessor(CmpBB, true); + CmpBB->removeSuccessor(Tail, true); Head->transferSuccessorsAndUpdatePHIs(CmpBB); DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc(); TII->RemoveBranch(*Head); @@ -786,13 +786,13 @@ void AArch64ConditionalCompares::updateDomTree( // convert() removes CmpBB which was previously dominated by Head. // CmpBB children should be transferred to Head. MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head); - for (unsigned i = 0, e = Removed.size(); i != e; ++i) { - MachineDomTreeNode *Node = DomTree->getNode(Removed[i]); + for (MachineBasicBlock *RemovedMBB : Removed) { + MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB); assert(Node != HeadNode && "Cannot erase the head node"); assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head"); while (Node->getNumChildren()) DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); - DomTree->eraseNode(Removed[i]); + DomTree->eraseNode(RemovedMBB); } } @@ -801,8 +801,8 @@ void AArch64ConditionalCompares::updateLoops(ArrayRef Removed) { if (!Loops) return; - for (unsigned i = 0, e = Removed.size(); i != e; ++i) - Loops->removeBlock(Removed[i]); + for (MachineBasicBlock *RemovedMBB : Removed) + Loops->removeBlock(RemovedMBB); } /// Invalidate MachineTraceMetrics before if-conversion. @@ -899,7 +899,7 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) { Loops = getAnalysisIfAvailable(); Traces = &getAnalysis(); MinInstr = nullptr; - MinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize); + MinSize = MF.getFunction()->optForMinSize(); bool Changed = false; CmpConv.runOnMachineFunction(MF); diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index 74fc167433f6..576cf4a74167 100644 --- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -26,6 +26,12 @@ using namespace llvm; STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced"); +namespace llvm { +void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry &); +} + +#define AARCH64_DEAD_REG_DEF_NAME "AArch64 Dead register definitions" + namespace { class AArch64DeadRegisterDefinitions : public MachineFunctionPass { private: @@ -35,11 +41,14 @@ private: bool usesFrameIndex(const MachineInstr &MI); public: static char ID; // Pass identification, replacement for typeid. - explicit AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {} + explicit AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) { + initializeAArch64DeadRegisterDefinitionsPass( + *PassRegistry::getPassRegistry()); + } bool runOnMachineFunction(MachineFunction &F) override; - const char *getPassName() const override { return "Dead register definitions"; } + const char *getPassName() const override { return AARCH64_DEAD_REG_DEF_NAME; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -49,6 +58,9 @@ public: char AArch64DeadRegisterDefinitions::ID = 0; } // end anonymous namespace +INITIALIZE_PASS(AArch64DeadRegisterDefinitions, "aarch64-dead-defs", + AARCH64_DEAD_REG_DEF_NAME, false, false) + bool AArch64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg( unsigned Reg, const MachineInstr &MI) { for (const MachineOperand &MO : MI.implicit_operands()) diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index c2470f747a38..d24e42a93763 100644 --- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -22,18 +22,26 @@ #include "llvm/Support/MathExtras.h" using namespace llvm; +namespace llvm { +void initializeAArch64ExpandPseudoPass(PassRegistry &); +} + +#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass" + namespace { class AArch64ExpandPseudo : public MachineFunctionPass { public: static char ID; - AArch64ExpandPseudo() : MachineFunctionPass(ID) {} + AArch64ExpandPseudo() : MachineFunctionPass(ID) { + initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry()); + } const AArch64InstrInfo *TII; bool runOnMachineFunction(MachineFunction &Fn) override; const char *getPassName() const override { - return "AArch64 pseudo instruction expansion pass"; + return AARCH64_EXPAND_PSEUDO_NAME; } private: @@ -45,6 +53,9 @@ private: char AArch64ExpandPseudo::ID = 0; } +INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo", + AARCH64_EXPAND_PSEUDO_NAME, false, false) + /// \brief Transfer implicit operands on the pseudo instruction to the /// instructions created from the expansion. static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 072819836bb3..0ac4b39b0357 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -523,7 +523,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) U = C; } - if (const PointerType *Ty = dyn_cast(Obj->getType())) + if (auto *Ty = dyn_cast(Obj->getType())) if (Ty->getAddressSpace() > 255) // Fast instruction selection doesn't support the special // address spaces. @@ -969,7 +969,7 @@ bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { // Cannot encode an offset register and an immediate offset in the same // instruction. Fold the immediate offset into the load/store instruction and - // emit an additonal add to take care of the offset register. + // emit an additional add to take care of the offset register. if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) RegisterOffsetNeedsLowering = true; @@ -1058,8 +1058,8 @@ void AArch64FastISel::addLoadStoreOperands(Address &Addr, // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size // and alignment should be based on the VT. MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(FI, Offset), Flags, - MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); + MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); // Now add the rest of the operands. MIB.addFrameIndex(FI).addImm(Offset); } else { @@ -1178,7 +1178,7 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, } // Check if the mul can be folded into the instruction. - if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) { if (isMulPowOf2(RHS)) { const Value *MulLHS = cast(RHS)->getOperand(0); const Value *MulRHS = cast(RHS)->getOperand(1); @@ -1193,12 +1193,16 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, if (!RHSReg) return 0; bool RHSIsKill = hasTrivialKill(MulLHS); - return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, - AArch64_AM::LSL, ShiftVal, SetFlags, WantResult); + ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags, + WantResult); + if (ResultReg) + return ResultReg; } + } // Check if the shift can be folded into the instruction. - if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) { if (const auto *SI = dyn_cast(RHS)) { if (const auto *C = dyn_cast(SI->getOperand(1))) { AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; @@ -1214,12 +1218,15 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, if (!RHSReg) return 0; bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); - return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, - RHSIsKill, ShiftType, ShiftVal, SetFlags, - WantResult); + ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftType, ShiftVal, SetFlags, + WantResult); + if (ResultReg) + return ResultReg; } } } + } unsigned RHSReg = getRegForValue(RHS); if (!RHSReg) @@ -1323,6 +1330,10 @@ unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; + // Don't deal with undefined shifts. + if (ShiftImm >= RetVT.getSizeInBits()) + return 0; + static const unsigned OpcTable[2][2][2] = { { { AArch64::SUBWrs, AArch64::SUBXrs }, { AArch64::ADDWrs, AArch64::ADDXrs } }, @@ -1360,6 +1371,9 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; + if (ShiftImm >= 4) + return 0; + static const unsigned OpcTable[2][2][2] = { { { AArch64::SUBWrx, AArch64::SUBXrx }, { AArch64::ADDWrx, AArch64::ADDXrx } }, @@ -1542,7 +1556,7 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, return ResultReg; // Check if the mul can be folded into the instruction. - if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) { if (isMulPowOf2(RHS)) { const Value *MulLHS = cast(RHS)->getOperand(0); const Value *MulRHS = cast(RHS)->getOperand(1); @@ -1558,12 +1572,15 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, if (!RHSReg) return 0; bool RHSIsKill = hasTrivialKill(MulLHS); - return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, - RHSIsKill, ShiftVal); + ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftVal); + if (ResultReg) + return ResultReg; } + } // Check if the shift can be folded into the instruction. - if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) { if (const auto *SI = dyn_cast(RHS)) if (const auto *C = dyn_cast(SI->getOperand(1))) { uint64_t ShiftVal = C->getZExtValue(); @@ -1571,9 +1588,12 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, if (!RHSReg) return 0; bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); - return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, - RHSIsKill, ShiftVal); + ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftVal); + if (ResultReg) + return ResultReg; } + } unsigned RHSReg = getRegForValue(RHS); if (!RHSReg) @@ -1646,6 +1666,11 @@ unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, { AArch64::ORRWrs, AArch64::ORRXrs }, { AArch64::EORWrs, AArch64::EORXrs } }; + + // Don't deal with undefined shifts. + if (ShiftImm >= RetVT.getSizeInBits()) + return 0; + const TargetRegisterClass *RC; unsigned Opc; switch (RetVT.SimpleTy) { @@ -2235,14 +2260,7 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { MIB.addImm(TestBit); MIB.addMBB(TBB); - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - fastEmitBranch(FBB, DbgLoc); - + finishCondBranch(BI->getParent(), TBB, FBB); return true; } @@ -2257,7 +2275,6 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - AArch64CC::CondCode CC = AArch64CC::NE; if (const CmpInst *CI = dyn_cast(BI->getCondition())) { if (CI->hasOneUse() && isValueAvailable(CI)) { // Try to optimize or fold the cmp. @@ -2289,7 +2306,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch // instruction. - CC = getCompareCC(Predicate); + AArch64CC::CondCode CC = getCompareCC(Predicate); AArch64CC::CondCode ExtraCC = AArch64CC::AL; switch (Predicate) { default: @@ -2317,52 +2334,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { .addImm(CC) .addMBB(TBB); - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - - fastEmitBranch(FBB, DbgLoc); - return true; - } - } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { - MVT SrcVT; - if (TI->hasOneUse() && isValueAvailable(TI) && - isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) { - unsigned CondReg = getRegForValue(TI->getOperand(0)); - if (!CondReg) - return false; - bool CondIsKill = hasTrivialKill(TI->getOperand(0)); - - // Issue an extract_subreg to get the lower 32-bits. - if (SrcVT == MVT::i64) { - CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill, - AArch64::sub_32); - CondIsKill = true; - } - - unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1); - assert(ANDReg && "Unexpected AND instruction emission failure."); - emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); - - if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { - std::swap(TBB, FBB); - CC = AArch64CC::EQ; - } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); - - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - - fastEmitBranch(FBB, DbgLoc); + finishCondBranch(BI->getParent(), TBB, FBB); return true; } } else if (const auto *CI = dyn_cast(BI->getCondition())) { @@ -2371,34 +2343,31 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) .addMBB(Target); - // Obtain the branch weight and add the target to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - Target->getBasicBlock()); - FuncInfo.MBB->addSuccessor(Target, BranchWeight); + // Obtain the branch probability and add the target to the successor list. + if (FuncInfo.BPI) { + auto BranchProbability = FuncInfo.BPI->getEdgeProbability( + BI->getParent(), Target->getBasicBlock()); + FuncInfo.MBB->addSuccessor(Target, BranchProbability); + } else + FuncInfo.MBB->addSuccessorWithoutProb(Target); return true; - } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) { - // Fake request the condition, otherwise the intrinsic might be completely - // optimized away. - unsigned CondReg = getRegForValue(BI->getCondition()); - if (!CondReg) - return false; + } else { + AArch64CC::CondCode CC = AArch64CC::NE; + if (foldXALUIntrinsic(CC, I, BI->getCondition())) { + // Fake request the condition, otherwise the intrinsic might be completely + // optimized away. + unsigned CondReg = getRegForValue(BI->getCondition()); + if (!CondReg) + return false; - // Emit the branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); + // Emit the branch. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) + .addImm(CC) + .addMBB(TBB); - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - - fastEmitBranch(FBB, DbgLoc); - return true; + finishCondBranch(BI->getParent(), TBB, FBB); + return true; + } } unsigned CondReg = getRegForValue(BI->getCondition()); @@ -2406,32 +2375,22 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { return false; bool CondRegIsKill = hasTrivialKill(BI->getCondition()); - // We've been divorced from our compare! Our block was split, and - // now our compare lives in a predecessor block. We musn't - // re-compare here, as the children of the compare aren't guaranteed - // live across the block boundary (we *could* check for this). - // Regardless, the compare has been done in the predecessor block, - // and it left a value for us in a virtual register. Ergo, we test - // the one-bit value left in the virtual register. - emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0); - + // i1 conditions come as i32 values, test the lowest bit with tb(n)z. + unsigned Opcode = AArch64::TBNZW; if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { std::swap(TBB, FBB); - CC = AArch64CC::EQ; + Opcode = AArch64::TBZW; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) + const MCInstrDesc &II = TII.get(Opcode); + unsigned ConstrainedCondReg + = constrainOperandRegClass(II, CondReg, II.getNumDefs()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) + .addImm(0) .addMBB(TBB); - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - - fastEmitBranch(FBB, DbgLoc); + finishCondBranch(BI->getParent(), TBB, FBB); return true; } @@ -2447,8 +2406,8 @@ bool AArch64FastISel::selectIndirectBr(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); // Make sure the CFG is up-to-date. - for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i) - FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]); + for (auto *Succ : BI->successors()) + FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); return true; } @@ -2456,6 +2415,10 @@ bool AArch64FastISel::selectIndirectBr(const Instruction *I) { bool AArch64FastISel::selectCmp(const Instruction *I) { const CmpInst *CI = cast(I); + // Vectors of i1 are weird: bail out. + if (CI->getType()->isVectorTy()) + return false; + // Try to optimize or fold the cmp. CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); unsigned ResultReg = 0; @@ -2954,8 +2917,7 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, .addImm(NumBytes); // Process the args. - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; + for (CCValAssign &VA : ArgLocs) { const Value *ArgVal = CLI.OutVals[VA.getValNo()]; MVT ArgVT = OutVTs[VA.getValNo()]; @@ -3018,8 +2980,8 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getStack(Addr.getOffset()), - MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); + MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), + MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); if (!emitStore(ArgVT, ArgReg, Addr, MMO)) return false; @@ -3318,8 +3280,8 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, return false; // Make sure nothing is in the way - BasicBlock::const_iterator Start = I; - BasicBlock::const_iterator End = II; + BasicBlock::const_iterator Start(I); + BasicBlock::const_iterator End(II); for (auto Itr = std::prev(Start); Itr != End; --Itr) { // We only expect extractvalue instructions between the intrinsic and the // instruction to be selected. @@ -3684,6 +3646,9 @@ bool AArch64FastISel::selectRet(const Instruction *I) { if (F.isVarArg()) return false; + if (TLI.supportSplitCSR(FuncInfo.MF)) + return false; + // Build a list of return value registers. SmallVector RetRegs; @@ -3763,8 +3728,8 @@ bool AArch64FastISel::selectRet(const Instruction *I) { MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::RET_ReallyLR)); - for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) - MIB.addReg(RetRegs[i], RegState::Implicit); + for (unsigned RetReg : RetRegs) + MIB.addReg(RetReg, RegState::Implicit); return true; } diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index a76473f7e539..11ae8005370d 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -72,9 +72,9 @@ // // For most functions, some of the frame areas are empty. For those functions, // it may not be necessary to set up fp or bp: -// * A base pointer is definitly needed when there are both VLAs and local +// * A base pointer is definitely needed when there are both VLAs and local // variables with more-than-default alignment requirements. -// * A frame pointer is definitly needed when there are local variables with +// * A frame pointer is definitely needed when there are local variables with // more-than-default alignment requirements. // // In some cases when a base pointer is not strictly needed, it is generated @@ -216,11 +216,11 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves( if (CSI.empty()) return; - const DataLayout *TD = MF.getTarget().getDataLayout(); + const DataLayout &TD = MF.getDataLayout(); bool HasFP = hasFP(MF); // Calculate amount of bytes used for return address storing. - int stackGrowth = -TD->getPointerSize(0); + int stackGrowth = -TD.getPointerSize(0); // Calculate offsets. int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth; @@ -280,14 +280,17 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); - const AArch64RegisterInfo *RegInfo = static_cast( - MF.getSubtarget().getRegisterInfo()); - const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + const AArch64Subtarget &Subtarget = MF.getSubtarget(); + const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo(); bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); bool HasFP = hasFP(MF); - DebugLoc DL = MBB.findDebugLoc(MBBI); + + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc DL; // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. @@ -354,7 +357,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, if (NumBytes && NeedsRealignment) { // Use the first callee-saved register as a scratch register. scratchSPReg = AArch64::X9; - MF.getRegInfo().setPhysRegUsed(scratchSPReg); } // If we're a leaf function, try using the red zone. @@ -400,8 +402,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, } if (needsFrameMoves) { - const DataLayout *TD = MF.getTarget().getDataLayout(); - const int StackGrowth = -TD->getPointerSize(0); + const DataLayout &TD = MF.getDataLayout(); + const int StackGrowth = -TD.getPointerSize(0); unsigned FramePtr = RegInfo->getFrameRegister(MF); // An example of the prologue: // @@ -513,33 +515,33 @@ static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) { return false; } -static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) { +/// Checks whether the given instruction restores callee save registers +/// and if so returns how many. +static unsigned getNumCSRestores(MachineInstr &MI, const MCPhysReg *CSRegs) { unsigned RtIdx = 0; - if (MI->getOpcode() == AArch64::LDPXpost || - MI->getOpcode() == AArch64::LDPDpost) + switch (MI.getOpcode()) { + case AArch64::LDPXpost: + case AArch64::LDPDpost: RtIdx = 1; - - if (MI->getOpcode() == AArch64::LDPXpost || - MI->getOpcode() == AArch64::LDPDpost || - MI->getOpcode() == AArch64::LDPXi || MI->getOpcode() == AArch64::LDPDi) { - if (!isCalleeSavedRegister(MI->getOperand(RtIdx).getReg(), CSRegs) || - !isCalleeSavedRegister(MI->getOperand(RtIdx + 1).getReg(), CSRegs) || - MI->getOperand(RtIdx + 2).getReg() != AArch64::SP) - return false; - return true; + // FALLTHROUGH + case AArch64::LDPXi: + case AArch64::LDPDi: + if (!isCalleeSavedRegister(MI.getOperand(RtIdx).getReg(), CSRegs) || + !isCalleeSavedRegister(MI.getOperand(RtIdx + 1).getReg(), CSRegs) || + MI.getOperand(RtIdx + 2).getReg() != AArch64::SP) + return 0; + return 2; } - - return false; + return 0; } void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); MachineFrameInfo *MFI = MF.getFrameInfo(); - const AArch64InstrInfo *TII = - static_cast(MF.getSubtarget().getInstrInfo()); - const AArch64RegisterInfo *RegInfo = static_cast( - MF.getSubtarget().getRegisterInfo()); + const AArch64Subtarget &Subtarget = MF.getSubtarget(); + const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool IsTailCallReturn = false; if (MBB.end() != MBBI) { @@ -585,7 +587,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // ---------------------| --- | // | | | | // | CalleeSavedReg | | | - // | (NumRestores * 16) | | | + // | (NumRestores * 8) | | | // | | | | // ---------------------| | NumBytes // | | StackSize (StackAdjustUp) @@ -606,17 +608,17 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // Move past the restores of the callee-saved registers. MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); - if (LastPopI != MBB.begin()) { - do { - ++NumRestores; - --LastPopI; - } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs)); - if (!isCSRestore(LastPopI, CSRegs)) { + MachineBasicBlock::iterator Begin = MBB.begin(); + while (LastPopI != Begin) { + --LastPopI; + unsigned Restores = getNumCSRestores(*LastPopI, CSRegs); + NumRestores += Restores; + if (Restores == 0) { ++LastPopI; - --NumRestores; + break; } } - NumBytes -= NumRestores * 16; + NumBytes -= NumRestores * 8; assert(NumBytes >= 0 && "Negative stack allocation size!?"); if (!hasFP(MF)) { @@ -634,15 +636,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // be able to save any instructions. if (NumBytes || MFI->hasVarSizedObjects()) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, - -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags); -} - -/// getFrameIndexOffset - Returns the displacement from the frame register to -/// the stack frame of the specified index. -int AArch64FrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - unsigned FrameReg; - return getFrameIndexReference(MF, FI, FrameReg); + -(NumRestores - 2) * 8, TII, MachineInstr::NoFlags); } /// getFrameIndexReference - Provide a base+offset reference to an FI slot for @@ -739,9 +733,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( DebugLoc DL; assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); - if (MI != MBB.end()) - DL = MI->getDebugLoc(); - for (unsigned i = 0; i < Count; i += 2) { unsigned idx = Count - i - 2; unsigned Reg1 = CSI[idx].getReg(); @@ -911,7 +902,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, unsigned NumFPRSpilled = 0; bool ExtraCSSpill = false; bool CanEliminateFrame = true; - DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:"); + DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); // Check pairs of consecutive callee-saved registers. diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index 731f031ff855..427afdf4acbf 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -37,7 +37,6 @@ public: void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; int resolveFrameIndexReference(const MachineFunction &MF, int FI, @@ -61,6 +60,11 @@ public: void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; + + /// Returns true if the target will correctly handle shrink wrapping. + bool enableShrinkWrapping(const MachineFunction &MF) const override { + return true; + } }; } // End llvm namespace diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 772e894f4f0a..6c868880bcac 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -34,7 +34,6 @@ using namespace llvm; namespace { class AArch64DAGToDAGISel : public SelectionDAGISel { - AArch64TargetMachine &TM; /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. @@ -45,7 +44,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { public: explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr), + : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), ForCodeSize(false) {} const char *getPassName() const override { @@ -53,9 +52,7 @@ public: } bool runOnMachineFunction(MachineFunction &MF) override { - ForCodeSize = - MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) || - MF.getFunction()->hasFnAttribute(Attribute::MinSize); + ForCodeSize = MF.getFunction()->optForSize(); Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); } @@ -79,6 +76,21 @@ public: bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { return SelectShiftedRegister(N, true, Reg, Shift); } + bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed7S(N, 1, Base, OffImm); + } + bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed7S(N, 2, Base, OffImm); + } + bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed7S(N, 4, Base, OffImm); + } + bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed7S(N, 8, Base, OffImm); + } + bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed7S(N, 16, Base, OffImm); + } bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeIndexed(N, 1, Base, OffImm); } @@ -153,8 +165,7 @@ public: SDNode *SelectBitfieldExtractOp(SDNode *N); SDNode *SelectBitfieldInsertOp(SDNode *N); - - SDNode *SelectLIBM(SDNode *N); + SDNode *SelectBitfieldInsertInZeroOp(SDNode *N); SDNode *SelectReadRegister(SDNode *N); SDNode *SelectWriteRegister(SDNode *N); @@ -165,6 +176,8 @@ public: private: bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, SDValue &Shift); + bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, + SDValue &OffImm); bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, SDValue &OffImm); bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, @@ -422,7 +435,7 @@ static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { return true; } -// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a +// Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a // high lane extract. static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, SDValue &LaneOp, int &LaneIdx) { @@ -572,7 +585,7 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, } // AArch64 mandates that the RHS of the operation must use the smallest - // register classs that could contain the size being extended from. Thus, + // register class that could contain the size being extended from. Thus, // if we're folding a (sext i8), we need the RHS to be a GPR32, even though // there might not be an actual 32-bit value in the program. We can // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. @@ -587,7 +600,7 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, /// need to create a real ADD instruction from it anyway and there's no point in /// folding it into the mem op. Theoretically, it shouldn't matter, but there's /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding -/// leads to duplaicated ADRP instructions. +/// leads to duplicated ADRP instructions. static bool isWorthFoldingADDlow(SDValue N) { for (auto Use : N->uses()) { if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && @@ -604,6 +617,51 @@ static bool isWorthFoldingADDlow(SDValue N) { return true; } +/// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit +/// immediate" address. The "Size" argument is the size in bytes of the memory +/// reference, which determines the scale. +bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size, + SDValue &Base, + SDValue &OffImm) { + SDLoc dl(N); + const DataLayout &DL = CurDAG->getDataLayout(); + const TargetLowering *TLI = getTargetLowering(); + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); + OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); + return true; + } + + // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed + // selected here doesn't support labels/immediates, only base+offset. + + if (CurDAG->isBaseWithConstantOffset(N)) { + if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { + int64_t RHSC = RHS->getSExtValue(); + unsigned Scale = Log2_32(Size); + if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) && + RHSC < (0x40 << Scale)) { + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); + } + OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); + return true; + } + } + } + + // Base only. The address will be materialized into a register before + // the memory is accessed. + // add x0, Xbase, #offset + // stp x1, x2, [x0] + Base = N; + OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); + return true; +} + /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit /// immediate" address. The "Size" argument is the size in bytes of the memory /// reference, which determines the scale. @@ -867,7 +925,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, if (isa(RHS)) { int64_t ImmOff = (int64_t)cast(RHS)->getZExtValue(); unsigned Scale = Log2_32(Size); - // Skip the immediate can be seleced by load/store addressing mode. + // Skip the immediate can be selected by load/store addressing mode. // Also skip the immediate can be encoded by a single ADD (SUB is also // checked by using -ImmOff). if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || @@ -1034,6 +1092,8 @@ SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { // it into an i64. DstVT = MVT::i32; } + } else if (VT == MVT::f16) { + Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; } else if (VT == MVT::f32) { Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; } else if (VT == MVT::f64 || VT.is64BitVector()) { @@ -1222,8 +1282,8 @@ SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, SDValue SuperReg = SDValue(Ld, 0); EVT WideVT = RegSeq.getOperand(1)->getValueType(0); - static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, - AArch64::qsub3 }; + static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, + AArch64::qsub2, AArch64::qsub3 }; for (unsigned i = 0; i < NumVecs; ++i) { SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); if (Narrow) @@ -1275,8 +1335,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); } else { EVT WideVT = RegSeq.getOperand(1)->getValueType(0); - static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, - AArch64::qsub3 }; + static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, + AArch64::qsub2, AArch64::qsub3 }; for (unsigned i = 0; i < NumVecs; ++i) { SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); @@ -1420,7 +1480,7 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, // The resulting code will be at least as good as the original one // plus it may expose more opportunities for bitfield insert pattern. // FIXME: Currently we limit this to the bigger pattern, because - // some optimizations expect AND and not UBFM + // some optimizations expect AND and not UBFM. Opd0 = N->getOperand(0); } else return false; @@ -1852,6 +1912,7 @@ static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { /// Does this tree qualify as an attempt to move a bitfield into position, /// essentially "(and (shl VAL, N), Mask)". static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, + bool BiggerPattern, SDValue &Src, int &ShiftAmount, int &MaskWidth) { EVT VT = Op.getValueType(); @@ -1874,6 +1935,11 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, Op = Op.getOperand(0); } + // Don't match if the SHL has more than one use, since then we'll end up + // generating SHL+UBFIZ instead of just keeping SHL+AND. + if (!BiggerPattern && !Op.hasOneUse()) + return false; + uint64_t ShlImm; if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) return false; @@ -1887,7 +1953,11 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, // BFI encompasses sufficiently many nodes that it's worth inserting an extra // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL - // amount. + // amount. BiggerPattern is true when this pattern is being matched for BFI, + // BiggerPattern is false when this pattern is being matched for UBFIZ, in + // which case it is not profitable to insert an extra shift. + if (ShlImm - ShiftAmount != 0 && !BiggerPattern) + return false; Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); return true; @@ -1904,7 +1974,8 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, // f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2 static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst, SDValue &Src, unsigned &ImmR, - unsigned &ImmS, SelectionDAG *CurDAG) { + unsigned &ImmS, const APInt &UsefulBits, + SelectionDAG *CurDAG) { assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); // Set Opc @@ -1918,23 +1989,30 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst, // Because of simplify-demanded-bits in DAGCombine, involved masks may not // have the expected shape. Try to undo that. - APInt UsefulBits; - getUsefulBits(SDValue(N, 0), UsefulBits); unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); - // OR is commutative, check both possibilities (does llvm provide a - // way to do that directely, e.g., via code matcher?) - SDValue OrOpd1Val = N->getOperand(1); - SDNode *OrOpd0 = N->getOperand(0).getNode(); - SDNode *OrOpd1 = N->getOperand(1).getNode(); - for (int i = 0; i < 2; - ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) { + // OR is commutative, check all combinations of operand order and values of + // BiggerPattern, i.e. + // Opd0, Opd1, BiggerPattern=false + // Opd1, Opd0, BiggerPattern=false + // Opd0, Opd1, BiggerPattern=true + // Opd1, Opd0, BiggerPattern=true + // Several of these combinations may match, so check with BiggerPattern=false + // first since that will produce better results by matching more instructions + // and/or inserting fewer extra instructions. + for (int I = 0; I < 4; ++I) { + + bool BiggerPattern = I / 2; + SDNode *OrOpd0 = N->getOperand(I % 2).getNode(); + SDValue OrOpd1Val = N->getOperand((I + 1) % 2); + SDNode *OrOpd1 = OrOpd1Val.getNode(); + unsigned BFXOpc; int DstLSB, Width; if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, - NumberOfIgnoredLowBits, true)) { + NumberOfIgnoredLowBits, BiggerPattern)) { // Check that the returned opcode is compatible with the pattern, // i.e., same type and zero extended (U and not S) if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || @@ -1952,8 +2030,9 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst, // If the mask on the insertee is correct, we have a BFXIL operation. We // can share the ImmR and ImmS values from the already-computed UBFM. - } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src, - DstLSB, Width)) { + } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), + BiggerPattern, + Src, DstLSB, Width)) { ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); ImmS = Width - 1; } else @@ -2003,11 +2082,18 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) { unsigned Opc; unsigned LSB, MSB; SDValue Opd0, Opd1; + EVT VT = N->getValueType(0); + APInt NUsefulBits; + getUsefulBits(SDValue(N, 0), NUsefulBits); - if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG)) + // If all bits are not useful, just return UNDEF. + if (!NUsefulBits) + return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, VT); + + if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, NUsefulBits, + CurDAG)) return nullptr; - EVT VT = N->getValueType(0); SDLoc dl(N); SDValue Ops[] = { Opd0, Opd1, @@ -2016,58 +2102,37 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops); } -SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) { +/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the +/// equivalent of a left shift by a constant amount followed by an and masking +/// out a contiguous set of bits. +SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertInZeroOp(SDNode *N) { + if (N->getOpcode() != ISD::AND) + return nullptr; + EVT VT = N->getValueType(0); - unsigned Variant; unsigned Opc; - unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr }; + if (VT == MVT::i32) + Opc = AArch64::UBFMWri; + else if (VT == MVT::i64) + Opc = AArch64::UBFMXri; + else + return nullptr; - if (VT == MVT::f32) { - Variant = 0; - } else if (VT == MVT::f64) { - Variant = 1; - } else - return nullptr; // Unrecognized argument type. Fall back on default codegen. + SDValue Op0; + int DstLSB, Width; + if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, + Op0, DstLSB, Width)) + return nullptr; - // Pick the FRINTX variant needed to set the flags. - unsigned FRINTXOpc = FRINTXOpcs[Variant]; + // ImmR is the rotate right amount. + unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); + // ImmS is the most significant bit of the source to be moved. + unsigned ImmS = Width - 1; - switch (N->getOpcode()) { - default: - return nullptr; // Unrecognized libm ISD node. Fall back on default codegen. - case ISD::FCEIL: { - unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr }; - Opc = FRINTPOpcs[Variant]; - break; - } - case ISD::FFLOOR: { - unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr }; - Opc = FRINTMOpcs[Variant]; - break; - } - case ISD::FTRUNC: { - unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr }; - Opc = FRINTZOpcs[Variant]; - break; - } - case ISD::FROUND: { - unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr }; - Opc = FRINTAOpcs[Variant]; - break; - } - } - - SDLoc dl(N); - SDValue In = N->getOperand(0); - SmallVector Ops; - Ops.push_back(In); - - if (!TM.Options.UnsafeFPMath) { - SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In); - Ops.push_back(SDValue(FRINTX, 1)); - } - - return CurDAG->getMachineNode(Opc, dl, VT, Ops); + SDLoc DL(N); + SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), + CurDAG->getTargetConstant(ImmS, DL, VT)}; + return CurDAG->SelectNodeTo(N, Opc, VT, Ops); } bool @@ -2119,7 +2184,7 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, // into a single value to be used in the MRS/MSR instruction. static int getIntOperandFromRegisterString(StringRef RegString) { SmallVector Fields; - RegString.split(Fields, ":"); + RegString.split(Fields, ':'); if (Fields.size() == 1) return -1; @@ -2206,7 +2271,15 @@ SDNode *AArch64DAGToDAGISel::SelectWriteRegister(SDNode *N) { assert (isa(N->getOperand(2)) && "Expected a constant integer expression."); uint64_t Immed = cast(N->getOperand(2))->getZExtValue(); - return CurDAG->getMachineNode(AArch64::MSRpstate, DL, MVT::Other, + unsigned State; + if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) { + assert(Immed < 2 && "Bad imm"); + State = AArch64::MSRpstateImm1; + } else { + assert(Immed < 16 && "Bad imm"); + State = AArch64::MSRpstateImm4; + } + return CurDAG->getMachineNode(State, DL, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32), CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0)); @@ -2279,6 +2352,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { case ISD::SRA: if (SDNode *I = SelectBitfieldExtractOp(Node)) return I; + if (SDNode *I = SelectBitfieldInsertInZeroOp(Node)) + return I; break; case ISD::OR: @@ -2802,6 +2877,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { break; } } + break; } case AArch64ISD::LD2post: { if (VT == MVT::v8i8) @@ -3214,14 +3290,6 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); break; } - - case ISD::FCEIL: - case ISD::FFLOOR: - case ISD::FTRUNC: - case ISD::FROUND: - if (SDNode *I = SelectLIBM(Node)) - return I; - break; } // Select the default instruction diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 3e8f46cf1ecd..9f5beff12100 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -40,23 +40,6 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumShiftInserts, "Number of vector shift inserts"); -namespace { -enum AlignMode { - StrictAlign, - NoStrictAlign -}; -} - -static cl::opt -Align(cl::desc("Load/store alignment support"), - cl::Hidden, cl::init(NoStrictAlign), - cl::values( - clEnumValN(StrictAlign, "aarch64-strict-align", - "Disallow all unaligned memory accesses"), - clEnumValN(NoStrictAlign, "aarch64-no-strict-align", - "Allow unaligned memory accesses"), - clEnumValEnd)); - // Place holder until extr generation is tested fully. static cl::opt EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden, @@ -76,6 +59,9 @@ cl::opt EnableAArch64ELFLocalDynamicTLSGeneration( cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false)); +/// Value type used for condition codes. +static const MVT MVT_CC = MVT::i32; + AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI) : TargetLowering(TM), Subtarget(&STI) { @@ -210,11 +196,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); - // Exception handling. - // FIXME: These are guesses. Has this been defined yet? - setExceptionPointerRegister(AArch64::X0); - setExceptionSelectorRegister(AArch64::X1); - // Constant pool entries setOperationAction(ISD::ConstantPool, MVT::i64, Custom); @@ -234,6 +215,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // AArch64 lacks both left-rotate and popcount instructions. setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); + for (MVT VT : MVT::vector_valuetypes()) { + setOperationAction(ISD::ROTL, VT, Expand); + setOperationAction(ISD::ROTR, VT, Expand); + } // AArch64 doesn't have {U|S}MUL_LOHI. setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); @@ -252,6 +237,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + for (MVT VT : MVT::vector_valuetypes()) { + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Expand); + } setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i64, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Expand); @@ -315,6 +304,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FTRUNC, MVT::f16, Promote); setOperationAction(ISD::FMINNUM, MVT::f16, Promote); setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); + setOperationAction(ISD::FMINNAN, MVT::f16, Promote); + setOperationAction(ISD::FMAXNAN, MVT::f16, Promote); // v4f16 is also a storage-only type, so promote it to v4f32 when that is // known to be safe. @@ -403,10 +394,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FRINT, Ty, Legal); setOperationAction(ISD::FTRUNC, Ty, Legal); setOperationAction(ISD::FROUND, Ty, Legal); + setOperationAction(ISD::FMINNUM, Ty, Legal); + setOperationAction(ISD::FMAXNUM, Ty, Legal); + setOperationAction(ISD::FMINNAN, Ty, Legal); + setOperationAction(ISD::FMAXNAN, Ty, Legal); } setOperationAction(ISD::PREFETCH, MVT::Other, Custom); + // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0. + // This requires the Performance Monitors extension. + if (Subtarget->hasPerfMon()) + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); + if (Subtarget->isTargetMachO()) { // For iOS, we don't want to the normal expansion of a libcall to // sincos. We want to issue a libcall to __sincos_stret to avoid memory @@ -456,12 +456,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setIndexedLoadAction(im, MVT::i64, Legal); setIndexedLoadAction(im, MVT::f64, Legal); setIndexedLoadAction(im, MVT::f32, Legal); + setIndexedLoadAction(im, MVT::f16, Legal); setIndexedStoreAction(im, MVT::i8, Legal); setIndexedStoreAction(im, MVT::i16, Legal); setIndexedStoreAction(im, MVT::i32, Legal); setIndexedStoreAction(im, MVT::i64, Legal); setIndexedStoreAction(im, MVT::f64, Legal); setIndexedStoreAction(im, MVT::f32, Legal); + setIndexedStoreAction(im, MVT::f16, Legal); } // Trap. @@ -479,6 +481,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); + setTargetDAGCombine(ISD::FP_TO_SINT); + setTargetDAGCombine(ISD::FP_TO_UINT); + setTargetDAGCombine(ISD::FDIV); + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::ANY_EXTEND); @@ -487,16 +493,18 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::BITCAST); setTargetDAGCombine(ISD::CONCAT_VECTORS); setTargetDAGCombine(ISD::STORE); + if (Subtarget->supportsAddressTopByteIgnored()) + setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::VSELECT); - setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); + setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8; MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4; @@ -512,10 +520,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setMinFunctionAlignment(2); - RequireStrictAlign = (Align == StrictAlign); - setHasExtractBitsInsn(true); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + if (Subtarget->hasNEON()) { // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to // silliness like this: @@ -646,6 +654,9 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) { setOperationAction(ISD::FLOG10, VT.getSimpleVT(), Expand); setOperationAction(ISD::FEXP, VT.getSimpleVT(), Expand); setOperationAction(ISD::FEXP2, VT.getSimpleVT(), Expand); + + // But we do support custom-lowering for FCOPYSIGN. + setOperationAction(ISD::FCOPYSIGN, VT.getSimpleVT(), Custom); } setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); @@ -686,6 +697,12 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) { for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT.getSimpleVT(), Legal); + // F[MIN|MAX][NUM|NAN] are available for all FP NEON types (not f16 though!). + if (VT.isFloatingPoint() && VT.getVectorElementType() != MVT::f16) + for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN, + ISD::FMINNUM, ISD::FMAXNUM}) + setOperationAction(Opcode, VT.getSimpleVT(), Legal); + if (Subtarget->isLittleEndian()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { @@ -730,7 +747,7 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode( break; } case ISD::INTRINSIC_W_CHAIN: { - ConstantSDNode *CN = cast(Op->getOperand(1)); + ConstantSDNode *CN = cast(Op->getOperand(1)); Intrinsic::ID IntID = static_cast(CN->getZExtValue()); switch (IntID) { default: return; @@ -780,6 +797,34 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL, return MVT::i64; } +bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned AddrSpace, + unsigned Align, + bool *Fast) const { + if (Subtarget->requiresStrictAlign()) + return false; + + // FIXME: This is mostly true for Cyclone, but not necessarily others. + if (Fast) { + // FIXME: Define an attribute for slow unaligned accesses instead of + // relying on the CPU type as a proxy. + // On Cyclone, unaligned 128-bit stores are slow. + *Fast = !Subtarget->isCyclone() || VT.getStoreSize() != 16 || + // See comments in performSTORECombine() for more details about + // these conditions. + + // Code that uses clang vector extensions can mark that it + // wants unaligned accesses to be treated as fast by + // underspecifying alignment to be 1 or 2. + Align <= 2 || + + // Disregard v2i64. Memcpy lowering produces those and splitting + // them regresses performance on micro-benchmarks and olden/bh. + VT == MVT::v2i64; + } + return true; +} + FastISel * AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const { @@ -809,9 +854,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::ADCS: return "AArch64ISD::ADCS"; case AArch64ISD::SBCS: return "AArch64ISD::SBCS"; case AArch64ISD::ANDS: return "AArch64ISD::ANDS"; + case AArch64ISD::CCMP: return "AArch64ISD::CCMP"; + case AArch64ISD::CCMN: return "AArch64ISD::CCMN"; + case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP"; case AArch64ISD::FCMP: return "AArch64ISD::FCMP"; - case AArch64ISD::FMIN: return "AArch64ISD::FMIN"; - case AArch64ISD::FMAX: return "AArch64ISD::FMAX"; case AArch64ISD::DUP: return "AArch64ISD::DUP"; case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8"; case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16"; @@ -931,8 +977,7 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI, const TargetInstrInfo *TII = Subtarget->getInstrInfo(); const BasicBlock *LLVM_BB = MBB->getBasicBlock(); DebugLoc DL = MI->getDebugLoc(); - MachineFunction::iterator It = MBB; - ++It; + MachineFunction::iterator It = ++MBB->getIterator(); unsigned DestReg = MI->getOperand(0).getReg(); unsigned IfTrueReg = MI->getOperand(1).getReg(); @@ -1141,8 +1186,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, // register to WZR/XZR if it ends up being unused. unsigned Opcode = AArch64ISD::SUBS; - if (RHS.getOpcode() == ISD::SUB && isa(RHS.getOperand(0)) && - cast(RHS.getOperand(0))->getZExtValue() == 0 && + if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags @@ -1156,8 +1200,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, // the absence of information about op2. Opcode = AArch64ISD::ADDS; RHS = RHS.getOperand(1); - } else if (LHS.getOpcode() == ISD::AND && isa(RHS) && - cast(RHS)->getZExtValue() == 0 && + } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) { // Similarly, (CMP (and X, Y), 0) can be implemented with a TST // (a.k.a. ANDS) except that the flags are only guaranteed to work for one @@ -1167,14 +1210,230 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, LHS = LHS.getOperand(0); } - return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS) + return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS) .getValue(1); } +/// \defgroup AArch64CCMP CMP;CCMP matching +/// +/// These functions deal with the formation of CMP;CCMP;... sequences. +/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of +/// a comparison. They set the NZCV flags to a predefined value if their +/// predicate is false. This allows to express arbitrary conjunctions, for +/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))" +/// expressed as: +/// cmp A +/// ccmp B, inv(CB), CA +/// check for CB flags +/// +/// In general we can create code for arbitrary "... (and (and A B) C)" +/// sequences. We can also implement some "or" expressions, because "(or A B)" +/// is equivalent to "not (and (not A) (not B))" and we can implement some +/// negation operations: +/// We can negate the results of a single comparison by inverting the flags +/// used when the predicate fails and inverting the flags tested in the next +/// instruction; We can also negate the results of the whole previous +/// conditional compare sequence by inverting the flags tested in the next +/// instruction. However there is no way to negate the result of a partial +/// sequence. +/// +/// Therefore on encountering an "or" expression we can negate the subtree on +/// one side and have to be able to push the negate to the leafs of the subtree +/// on the other side (see also the comments in code). As complete example: +/// "or (or (setCA (cmp A)) (setCB (cmp B))) +/// (and (setCC (cmp C)) (setCD (cmp D)))" +/// is transformed to +/// "not (and (not (and (setCC (cmp C)) (setCC (cmp D)))) +/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))" +/// and implemented as: +/// cmp C +/// ccmp D, inv(CD), CC +/// ccmp A, CA, inv(CD) +/// ccmp B, CB, inv(CA) +/// check for CB flags +/// A counterexample is "or (and A B) (and C D)" which cannot be implemented +/// by conditional compare sequences. +/// @{ + +/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate. +static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, + ISD::CondCode CC, SDValue CCOp, + SDValue Condition, unsigned NZCV, + SDLoc DL, SelectionDAG &DAG) { + unsigned Opcode = 0; + if (LHS.getValueType().isFloatingPoint()) + Opcode = AArch64ISD::FCCMP; + else if (RHS.getOpcode() == ISD::SUB) { + SDValue SubOp0 = RHS.getOperand(0); + if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { + // See emitComparison() on why we can only do this for SETEQ and SETNE. + Opcode = AArch64ISD::CCMN; + RHS = RHS.getOperand(1); + } + } + if (Opcode == 0) + Opcode = AArch64ISD::CCMP; + + SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32); + return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp); +} + +/// Returns true if @p Val is a tree of AND/OR/SETCC operations. +/// CanPushNegate is set to true if we can push a negate operation through +/// the tree in a was that we are left with AND operations and negate operations +/// at the leafs only. i.e. "not (or (or x y) z)" can be changed to +/// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be +/// brought into such a form. +static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanPushNegate, + unsigned Depth = 0) { + if (!Val.hasOneUse()) + return false; + unsigned Opcode = Val->getOpcode(); + if (Opcode == ISD::SETCC) { + CanPushNegate = true; + return true; + } + // Protect against stack overflow. + if (Depth > 15) + return false; + if (Opcode == ISD::AND || Opcode == ISD::OR) { + SDValue O0 = Val->getOperand(0); + SDValue O1 = Val->getOperand(1); + bool CanPushNegateL; + if (!isConjunctionDisjunctionTree(O0, CanPushNegateL, Depth+1)) + return false; + bool CanPushNegateR; + if (!isConjunctionDisjunctionTree(O1, CanPushNegateR, Depth+1)) + return false; + // We cannot push a negate through an AND operation (it would become an OR), + // we can however change a (not (or x y)) to (and (not x) (not y)) if we can + // push the negate through the x/y subtrees. + CanPushNegate = (Opcode == ISD::OR) && CanPushNegateL && CanPushNegateR; + return true; + } + return false; +} + +/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain +/// of CCMP/CFCMP ops. See @ref AArch64CCMP. +/// Tries to transform the given i1 producing node @p Val to a series compare +/// and conditional compare operations. @returns an NZCV flags producing node +/// and sets @p OutCC to the flags that should be tested or returns SDValue() if +/// transformation was not possible. +/// On recursive invocations @p PushNegate may be set to true to have negation +/// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate +/// for the comparisons in the current subtree; @p Depth limits the search +/// depth to avoid stack overflow. +static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val, + AArch64CC::CondCode &OutCC, bool PushNegate = false, + SDValue CCOp = SDValue(), AArch64CC::CondCode Predicate = AArch64CC::AL, + unsigned Depth = 0) { + // We're at a tree leaf, produce a conditional comparison operation. + unsigned Opcode = Val->getOpcode(); + if (Opcode == ISD::SETCC) { + SDValue LHS = Val->getOperand(0); + SDValue RHS = Val->getOperand(1); + ISD::CondCode CC = cast(Val->getOperand(2))->get(); + bool isInteger = LHS.getValueType().isInteger(); + if (PushNegate) + CC = getSetCCInverse(CC, isInteger); + SDLoc DL(Val); + // Determine OutCC and handle FP special case. + if (isInteger) { + OutCC = changeIntCCToAArch64CC(CC); + } else { + assert(LHS.getValueType().isFloatingPoint()); + AArch64CC::CondCode ExtraCC; + changeFPCCToAArch64CC(CC, OutCC, ExtraCC); + // Surpisingly some floating point conditions can't be tested with a + // single condition code. Construct an additional comparison in this case. + // See comment below on how we deal with OR conditions. + if (ExtraCC != AArch64CC::AL) { + SDValue ExtraCmp; + if (!CCOp.getNode()) + ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG); + else { + SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC); + // Note that we want the inverse of ExtraCC, so NZCV is not inversed. + unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC); + ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, + NZCV, DL, DAG); + } + CCOp = ExtraCmp; + Predicate = AArch64CC::getInvertedCondCode(ExtraCC); + OutCC = AArch64CC::getInvertedCondCode(OutCC); + } + } + + // Produce a normal comparison if we are first in the chain + if (!CCOp.getNode()) + return emitComparison(LHS, RHS, CC, DL, DAG); + // Otherwise produce a ccmp. + SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC); + AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); + unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); + return emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, NZCV, DL, + DAG); + } else if ((Opcode != ISD::AND && Opcode != ISD::OR) || !Val->hasOneUse()) + return SDValue(); + + assert((Opcode == ISD::OR || !PushNegate) + && "Can only push negate through OR operation"); + + // Check if both sides can be transformed. + SDValue LHS = Val->getOperand(0); + SDValue RHS = Val->getOperand(1); + bool CanPushNegateL; + if (!isConjunctionDisjunctionTree(LHS, CanPushNegateL, Depth+1)) + return SDValue(); + bool CanPushNegateR; + if (!isConjunctionDisjunctionTree(RHS, CanPushNegateR, Depth+1)) + return SDValue(); + + // Do we need to negate our operands? + bool NegateOperands = Opcode == ISD::OR; + // We can negate the results of all previous operations by inverting the + // predicate flags giving us a free negation for one side. For the other side + // we need to be able to push the negation to the leafs of the tree. + if (NegateOperands) { + if (!CanPushNegateL && !CanPushNegateR) + return SDValue(); + // Order the side where we can push the negate through to LHS. + if (!CanPushNegateL && CanPushNegateR) + std::swap(LHS, RHS); + } else { + bool NeedsNegOutL = LHS->getOpcode() == ISD::OR; + bool NeedsNegOutR = RHS->getOpcode() == ISD::OR; + if (NeedsNegOutL && NeedsNegOutR) + return SDValue(); + // Order the side where we need to negate the output flags to RHS so it + // gets emitted first. + if (NeedsNegOutL) + std::swap(LHS, RHS); + } + + // Emit RHS. If we want to negate the tree we only need to push a negate + // through if we are already in a PushNegate case, otherwise we can negate + // the "flags to test" afterwards. + AArch64CC::CondCode RHSCC; + SDValue CmpR = emitConjunctionDisjunctionTree(DAG, RHS, RHSCC, PushNegate, + CCOp, Predicate, Depth+1); + if (NegateOperands && !PushNegate) + RHSCC = AArch64CC::getInvertedCondCode(RHSCC); + // Emit LHS. We must push the negate through if we need to negate it. + SDValue CmpL = emitConjunctionDisjunctionTree(DAG, LHS, OutCC, NegateOperands, + CmpR, RHSCC, Depth+1); + // If we transformed an OR to and AND then we have to negate the result + // (or absorb a PushNegate resulting in a double negation). + if (Opcode == ISD::OR && !PushNegate) + OutCC = AArch64CC::getInvertedCondCode(OutCC); + return CmpL; +} + +/// @} + static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) { - SDValue Cmp; - AArch64CC::CondCode AArch64CC; if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { EVT VT = RHS.getValueType(); uint64_t C = RHSC->getZExtValue(); @@ -1229,47 +1488,56 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, } } } - // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095. - // For the i8 operand, the largest immediate is 255, so this can be easily - // encoded in the compare instruction. For the i16 operand, however, the - // largest immediate cannot be encoded in the compare. - // Therefore, use a sign extending load and cmn to avoid materializing the -1 - // constant. For example, - // movz w1, #65535 - // ldrh w0, [x0, #0] - // cmp w0, w1 - // > - // ldrsh w0, [x0, #0] - // cmn w0, #1 - // Fundamental, we're relying on the property that (zext LHS) == (zext RHS) - // if and only if (sext LHS) == (sext RHS). The checks are in place to ensure - // both the LHS and RHS are truely zero extended and to make sure the - // transformation is profitable. + SDValue Cmp; + AArch64CC::CondCode AArch64CC; if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa(RHS)) { - if ((cast(RHS)->getZExtValue() >> 16 == 0) && - isa(LHS)) { - if (cast(LHS)->getExtensionType() == ISD::ZEXTLOAD && - cast(LHS)->getMemoryVT() == MVT::i16 && - LHS.getNode()->hasNUsesOfValue(1, 0)) { - int16_t ValueofRHS = cast(RHS)->getZExtValue(); - if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) { - SDValue SExt = - DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, - DAG.getValueType(MVT::i16)); - Cmp = emitComparison(SExt, - DAG.getConstant(ValueofRHS, dl, - RHS.getValueType()), - CC, dl, DAG); - AArch64CC = changeIntCCToAArch64CC(CC); - AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32); - return Cmp; - } + const ConstantSDNode *RHSC = cast(RHS); + + // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095. + // For the i8 operand, the largest immediate is 255, so this can be easily + // encoded in the compare instruction. For the i16 operand, however, the + // largest immediate cannot be encoded in the compare. + // Therefore, use a sign extending load and cmn to avoid materializing the + // -1 constant. For example, + // movz w1, #65535 + // ldrh w0, [x0, #0] + // cmp w0, w1 + // > + // ldrsh w0, [x0, #0] + // cmn w0, #1 + // Fundamental, we're relying on the property that (zext LHS) == (zext RHS) + // if and only if (sext LHS) == (sext RHS). The checks are in place to + // ensure both the LHS and RHS are truly zero extended and to make sure the + // transformation is profitable. + if ((RHSC->getZExtValue() >> 16 == 0) && isa(LHS) && + cast(LHS)->getExtensionType() == ISD::ZEXTLOAD && + cast(LHS)->getMemoryVT() == MVT::i16 && + LHS.getNode()->hasNUsesOfValue(1, 0)) { + int16_t ValueofRHS = cast(RHS)->getZExtValue(); + if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) { + SDValue SExt = + DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, + DAG.getValueType(MVT::i16)); + Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl, + RHS.getValueType()), + CC, dl, DAG); + AArch64CC = changeIntCCToAArch64CC(CC); + } + } + + if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) { + if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) { + if ((CC == ISD::SETNE) ^ RHSC->isNullValue()) + AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC); } } } - Cmp = emitComparison(LHS, RHS, CC, dl, DAG); - AArch64CC = changeIntCCToAArch64CC(CC); - AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32); + + if (!Cmp) { + Cmp = emitComparison(LHS, RHS, CC, dl, DAG); + AArch64CC = changeIntCCToAArch64CC(CC); + } + AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC); return Cmp; } @@ -1391,8 +1659,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, RTLIB::Libcall Call) const { SmallVector Ops(Op->op_begin(), Op->op_end()); - return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false, - SDLoc(Op)).first; + return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first; } static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) { @@ -1571,8 +1838,8 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op, // precise. That doesn't take part in the LibCall so we can't directly use // LowerF128Call. SDValue SrcVal = Op.getOperand(0); - return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, - /*isSigned*/ false, SDLoc(Op)).first; + return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false, + SDLoc(Op)).first; } static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { @@ -1581,6 +1848,16 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { // in the cost tables. EVT InVT = Op.getOperand(0).getValueType(); EVT VT = Op.getValueType(); + unsigned NumElts = InVT.getVectorNumElements(); + + // f16 vectors are promoted to f32 before a conversion. + if (InVT.getVectorElementType() == MVT::f16) { + MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts); + SDLoc dl(Op); + return DAG.getNode( + Op.getOpcode(), dl, Op.getValueType(), + DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0))); + } if (VT.getSizeInBits() < InVT.getSizeInBits()) { SDLoc dl(Op); @@ -1628,8 +1905,7 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); SmallVector Ops(Op->op_begin(), Op->op_end()); - return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false, - SDLoc(Op)).first; + return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first; } static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { @@ -1931,6 +2207,31 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); } +SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); + SDLoc dl(Op); + switch (IntNo) { + default: return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::aarch64_thread_pointer: { + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT); + } + case Intrinsic::aarch64_neon_smax: + return DAG.getNode(ISD::SMAX, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::aarch64_neon_umax: + return DAG.getNode(ISD::UMAX, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::aarch64_neon_smin: + return DAG.getNode(ISD::SMIN, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::aarch64_neon_umin: + return DAG.getNode(ISD::UMIN, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } +} + SDValue AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -2032,14 +2333,11 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerFSINCOS(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return LowerINTRINSIC_WO_CHAIN(Op, DAG); } } -/// getFunctionAlignment - Return the Log2 alignment of this function. -unsigned AArch64TargetLowering::getFunctionAlignment(const Function *F) const { - return 2; -} - //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -2214,9 +2512,10 @@ SDValue AArch64TargetLowering::LowerFormalArguments( break; } - ArgValue = DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - MemVT, false, false, false, 0); + ArgValue = DAG.getExtLoad( + ExtType, DL, VA.getLocVT(), Chain, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), + MemVT, false, false, false, 0); InVals.push_back(ArgValue); } @@ -2289,9 +2588,10 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) { unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass); SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); - SDValue Store = - DAG.getStore(Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(i * 8), false, false, 0); + SDValue Store = DAG.getStore( + Val.getValue(1), DL, Val, FIN, + MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8), false, + false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT)); @@ -2318,9 +2618,10 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass); SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); - SDValue Store = - DAG.getStore(Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(i * 16), false, false, 0); + SDValue Store = DAG.getStore( + Val.getValue(1), DL, Val, FIN, + MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16), + false, false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(16, DL, PtrVT)); @@ -2453,8 +2754,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true)); - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) - if (!ArgLocs[i].isRegLoc()) + for (const CCValAssign &ArgLoc : ArgLocs) + if (!ArgLoc.isRegLoc()) return false; } @@ -2758,7 +3059,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); DstAddr = DAG.getFrameIndex(FI, PtrVT); - DstInfo = MachinePointerInfo::getFixedStack(FI); + DstInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // Make sure any stack arguments overlapping with where we're storing // are loaded before this eventual operation. Otherwise they'll be @@ -2768,7 +3070,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL); DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); - DstInfo = MachinePointerInfo::getStack(LocMemOffset); + DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(), + LocMemOffset); } if (Outs[i].Flags.isByVal()) { @@ -2802,9 +3105,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first, - RegsToPass[i].second, InFlag); + for (auto &RegToPass : RegsToPass) { + Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first, + RegToPass.second, InFlag); InFlag = Chain.getValue(1); } @@ -2860,9 +3163,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // Add argument registers to the end of the list so that they are known live // into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); + for (auto &RegToPass : RegsToPass) + Ops.push_back(DAG.getRegister(RegToPass.first, + RegToPass.second.getValueType())); // Add a register mask operand representing the call-preserved registers. const uint32_t *Mask; @@ -2968,6 +3271,19 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } + const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); + const MCPhysReg *I = + TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); + if (I) { + for (; *I; ++I) { + if (AArch64::GPR64RegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::i64)); + else if (AArch64::FPR64RegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + } + } RetOps[0] = Chain; // Update chain. @@ -3010,11 +3326,12 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, unsigned char LoFlags = AArch64II::MO_PAGEOFF | AArch64II::MO_NC; SDValue Lo = DAG.getTargetConstantPool(GV, PtrVT, 0, 0, LoFlags); SDValue PoolAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); - SDValue GlobalAddr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), PoolAddr, - MachinePointerInfo::getConstantPool(), - /*isVolatile=*/ false, - /*isNonTemporal=*/ true, - /*isInvariant=*/ true, 8); + SDValue GlobalAddr = DAG.getLoad( + PtrVT, DL, DAG.getEntryNode(), PoolAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + /*isVolatile=*/false, + /*isNonTemporal=*/true, + /*isInvariant=*/true, 8); if (GN->getOffset() != 0) return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalAddr, DAG.getConstant(GN->getOffset(), DL, PtrVT)); @@ -3087,8 +3404,9 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, // to obtain the address of the variable. SDValue Chain = DAG.getEntryNode(); SDValue FuncTLVGet = - DAG.getLoad(MVT::i64, DL, Chain, DescAddr, MachinePointerInfo::getGOT(), - false, true, true, 8); + DAG.getLoad(MVT::i64, DL, Chain, DescAddr, + MachinePointerInfo::getGOT(DAG.getMachineFunction()), false, + true, true, 8); Chain = FuncTLVGet.getValue(1); MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); @@ -3160,6 +3478,10 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, const GlobalAddressSDNode *GA = cast(Op); TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); + + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); + if (!EnableAArch64ELFLocalDynamicTLSGeneration) { if (Model == TLSModel::LocalDynamic) Model = TLSModel::GeneralDynamic; @@ -3277,8 +3599,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch // instruction. unsigned Opc = LHS.getOpcode(); - if (LHS.getResNo() == 1 && isa(RHS) && - cast(RHS)->isOne() && + if (LHS.getResNo() == 1 && isOneConstant(RHS) && (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { assert((CC == ISD::SETEQ || CC == ISD::SETNE) && @@ -3392,17 +3713,11 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, SDValue In1 = Op.getOperand(0); SDValue In2 = Op.getOperand(1); EVT SrcVT = In2.getValueType(); - if (SrcVT != VT) { - if (SrcVT == MVT::f32 && VT == MVT::f64) - In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2); - else if (SrcVT == MVT::f64 && VT == MVT::f32) - In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, - DAG.getIntPtrConstant(0, DL)); - else - // FIXME: Src type is different, bail out for now. Can VT really be a - // vector type? - return SDValue(); - } + + if (SrcVT.bitsLT(VT)) + In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2); + else if (SrcVT.bitsGT(VT)) + In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL)); EVT VecVT; EVT EltVT; @@ -3410,7 +3725,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, SDValue VecVal1, VecVal2; if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) { EltVT = MVT::i32; - VecVT = MVT::v4i32; + VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32); EltMask = 0x80000000ULL; if (!VT.isVector()) { @@ -3571,32 +3886,6 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { } } -/// A SELECT_CC operation is really some kind of max or min if both values being -/// compared are, in some sense, equal to the results in either case. However, -/// it is permissible to compare f32 values and produce directly extended f64 -/// values. -/// -/// Extending the comparison operands would also be allowed, but is less likely -/// to happen in practice since their use is right here. Note that truncate -/// operations would *not* be semantically equivalent. -static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) { - if (Cmp == Result) - return (Cmp.getValueType() == MVT::f32 || - Cmp.getValueType() == MVT::f64); - - ConstantFPSDNode *CCmp = dyn_cast(Cmp); - ConstantFPSDNode *CResult = dyn_cast(Result); - if (CCmp && CResult && Cmp.getValueType() == MVT::f32 && - Result.getValueType() == MVT::f64) { - bool Lossy; - APFloat CmpVal = CCmp->getValueAPF(); - CmpVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &Lossy); - return CResult->getValueAPF().bitwiseIsEqual(CmpVal); - } - - return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp; -} - SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, SDValue TVal, SDValue FVal, SDLoc dl, @@ -3614,7 +3903,13 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, } } - // Handle integers first. + // Also handle f16, for which we need to do a f32 comparison. + if (LHS.getValueType() == MVT::f16) { + LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS); + RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS); + } + + // Next, handle integers. if (LHS.getValueType().isInteger()) { assert((LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)); @@ -3637,9 +3932,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, } else if (TVal.getOpcode() == ISD::XOR) { // If TVal is a NOT we want to swap TVal and FVal so that we can match // with a CSINV rather than a CSEL. - ConstantSDNode *CVal = dyn_cast(TVal.getOperand(1)); - - if (CVal && CVal->isAllOnesValue()) { + if (isAllOnesConstant(TVal.getOperand(1))) { std::swap(TVal, FVal); std::swap(CTVal, CFVal); CC = ISD::getSetCCInverse(CC, true); @@ -3647,9 +3940,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, } else if (TVal.getOpcode() == ISD::SUB) { // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so // that we can match with a CSNEG rather than a CSEL. - ConstantSDNode *CVal = dyn_cast(TVal.getOperand(0)); - - if (CVal && CVal->isNullValue()) { + if (isNullConstant(TVal.getOperand(0))) { std::swap(TVal, FVal); std::swap(CTVal, CFVal); CC = ISD::getSetCCInverse(CC, true); @@ -4109,46 +4400,57 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op, SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); - SDValue ARMcc; unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, DAG.getConstant(VTBits, dl, MVT::i64), ShAmt); - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); + SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); + + // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which + // is "undef". We wanted 0, so CSEL it directly. + SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64), + ISD::SETEQ, dl, DAG); + SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32); + HiBitsForLo = + DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64), + HiBitsForLo, CCVal, Cmp); + SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, DAG.getConstant(VTBits, dl, MVT::i64)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); - SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), - ISD::SETGE, dl, DAG); - SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32); + SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); + SDValue LoForNormalShift = + DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo); - SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); - SDValue Lo = - DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp); + Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE, + dl, DAG); + CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32); + SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); + SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift, + LoForNormalShift, CCVal, Cmp); // AArch64 shifts larger than the register width are wrapped rather than // clamped, so we can't just emit "hi >> x". - SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); - SDValue TrueValHi = Opc == ISD::SRA - ? DAG.getNode(Opc, dl, VT, ShOpHi, - DAG.getConstant(VTBits - 1, dl, - MVT::i64)) - : DAG.getConstant(0, dl, VT); - SDValue Hi = - DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp); + SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); + SDValue HiForBigShift = + Opc == ISD::SRA + ? DAG.getNode(Opc, dl, VT, ShOpHi, + DAG.getConstant(VTBits - 1, dl, MVT::i64)) + : DAG.getConstant(0, dl, VT); + SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift, + HiForNormalShift, CCVal, Cmp); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); } + /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two /// i64 values and take a 2 x i64 value to shift plus a shift amount. SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); @@ -4156,31 +4458,41 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op, SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); - SDValue ARMcc; assert(Op.getOpcode() == ISD::SHL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, DAG.getConstant(VTBits, dl, MVT::i64), ShAmt); - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); + SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); + + // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which + // is "undef". We wanted 0, so CSEL it directly. + SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64), + ISD::SETEQ, dl, DAG); + SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32); + LoBitsForHi = + DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64), + LoBitsForHi, CCVal, Cmp); + SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, DAG.getConstant(VTBits, dl, MVT::i64)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); - SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); + SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); + SDValue HiForNormalShift = + DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi); - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); - SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), - ISD::SETGE, dl, DAG); - SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32); - SDValue Hi = - DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp); + Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE, + dl, DAG); + CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32); + SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift, + HiForNormalShift, CCVal, Cmp); // AArch64 shifts of larger than register sizes are wrapped rather than // clamped, so we can't just emit "lo << a" if a is too big. - SDValue TrueValLo = DAG.getConstant(0, dl, VT); - SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); - SDValue Lo = - DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp); + SDValue LoForBigShift = DAG.getConstant(0, dl, VT); + SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); + SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift, + LoForNormalShift, CCVal, Cmp); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); @@ -4362,8 +4674,7 @@ void AArch64TargetLowering::LowerAsmOperandForConstraint( // Validate and return a target constant for them if we can. case 'z': { // 'z' maps to xzr or wzr so it needs an input of 0. - ConstantSDNode *C = dyn_cast(Op); - if (!C || C->getZExtValue() != 0) + if (!isNullConstant(Op)) return; if (Op.getValueType() == MVT::i64) @@ -5653,11 +5964,10 @@ static SDValue NormalizeBuildVector(SDValue Op, return Op; SmallVector Ops; - for (unsigned I = 0, E = VT.getVectorNumElements(); I != E; ++I) { - SDValue Lane = Op.getOperand(I); - if (Lane.getOpcode() == ISD::Constant) { + for (SDValue Lane : Op->ops()) { + if (auto *CstLane = dyn_cast(Lane)) { APInt LowBits(EltTy.getSizeInBits(), - cast(Lane)->getZExtValue()); + CstLane->getZExtValue()); Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32); } Ops.push_back(Lane); @@ -5997,8 +6307,7 @@ FailedModImm: // Empirical tests suggest this is rarely worth it for vectors of length <= 2. if (NumElts >= 4) { - SDValue shuffle = ReconstructShuffle(Op, DAG); - if (shuffle != SDValue()) + if (SDValue shuffle = ReconstructShuffle(Op, DAG)) return shuffle; } @@ -6017,7 +6326,10 @@ FailedModImm: // a) Avoid a RMW dependency on the full vector register, and // b) Allow the register coalescer to fold away the copy if the // value is already in an S or D register. - if (Op0.getOpcode() != ISD::UNDEF && (ElemSize == 32 || ElemSize == 64)) { + // Do not do this for UNDEF/LOAD nodes because we have better patterns + // for those avoiding the SCALAR_TO_VECTOR/BUILD_VECTOR. + if (Op0.getOpcode() != ISD::UNDEF && Op0.getOpcode() != ISD::LOAD && + (ElemSize == 32 || ElemSize == 64)) { unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub; MachineSDNode *N = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0, @@ -6123,24 +6435,11 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, unsigned Val = Cst->getZExtValue(); unsigned Size = Op.getValueType().getSizeInBits(); - if (Val == 0) { - switch (Size) { - case 8: - return DAG.getTargetExtractSubreg(AArch64::bsub, dl, Op.getValueType(), - Op.getOperand(0)); - case 16: - return DAG.getTargetExtractSubreg(AArch64::hsub, dl, Op.getValueType(), - Op.getOperand(0)); - case 32: - return DAG.getTargetExtractSubreg(AArch64::ssub, dl, Op.getValueType(), - Op.getOperand(0)); - case 64: - return DAG.getTargetExtractSubreg(AArch64::dsub, dl, Op.getValueType(), - Op.getOperand(0)); - default: - llvm_unreachable("Unexpected vector type in extract_subvector!"); - } - } + + // This will get lowered to an appropriate EXTRACT_SUBREG in ISel. + if (Val == 0) + return Op; + // If this is extracting the upper 64-bits of a 128-bit vector, we match // that directly. if (Size == 64 && Val * VT.getVectorElementType().getSizeInBits() == 64) @@ -6213,26 +6512,20 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { /// 0 <= Value <= ElementBits for a long left shift. static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); - unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); + int64_t ElementBits = VT.getVectorElementType().getSizeInBits(); if (!getVShiftImm(Op, ElementBits, Cnt)) return false; return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits); } /// isVShiftRImm - Check if this is a valid build_vector for the immediate -/// operand of a vector shift right operation. For a shift opcode, the value -/// is positive, but for an intrinsic the value count must be negative. The -/// absolute value must be in the range: -/// 1 <= |Value| <= ElementBits for a right shift; or -/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. -static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, - int64_t &Cnt) { +/// operand of a vector shift right operation. The value must be in the range: +/// 1 <= Value <= ElementBits for a right shift; or +static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); - unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); + int64_t ElementBits = VT.getVectorElementType().getSizeInBits(); if (!getVShiftImm(Op, ElementBits, Cnt)) return false; - if (isIntrinsic) - Cnt = -Cnt; return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits)); } @@ -6261,8 +6554,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, case ISD::SRA: case ISD::SRL: // Right shift immediate - if (isVShiftRImm(Op.getOperand(1), VT, false, false, Cnt) && - Cnt < EltSize) { + if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) { unsigned Opc = (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR; return DAG.getNode(Opc, DL, VT, Op.getOperand(0), @@ -6451,7 +6743,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::aarch64_neon_ld4r: { Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. - uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8; + uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); Info.offset = 0; @@ -6477,7 +6769,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; - NumElts += DL.getTypeAllocSize(ArgTy) / 8; + NumElts += DL.getTypeSizeInBits(ArgTy) / 64; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); @@ -6720,10 +7012,10 @@ bool AArch64TargetLowering::lowerInterleavedLoad( const DataLayout &DL = LI->getModule()->getDataLayout(); VectorType *VecTy = Shuffles[0]->getType(); - unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy); + unsigned VecSize = DL.getTypeSizeInBits(VecTy); - // Skip illegal vector types. - if (VecSize != 64 && VecSize != 128) + // Skip if we do not have NEON and skip illegal vector types. + if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128)) return false; // A pointer vector can not be the return type of the ldN intrinsics. Need to @@ -6806,10 +7098,10 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts); const DataLayout &DL = SI->getModule()->getDataLayout(); - unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); - // Skip illegal vector types. - if (SubVecSize != 64 && SubVecSize != 128) + // Skip if we do not have NEON and skip illegal vector types. + if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128)) return false; Value *Op0 = SVI->getOperand(0); @@ -7228,8 +7520,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget) { // First try to optimize away the conversion when it's conditionally from // a constant. Vectors only. - SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG); - if (Res != SDValue()) + if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG)) return Res; EVT VT = N->getValueType(0); @@ -7242,7 +7533,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, // If the result of an integer load is only used by an integer-to-float // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead. - // This eliminates an "integer-to-vector-move UOP and improve throughput. + // This eliminates an "integer-to-vector-move" UOP and improves throughput. SDValue N0 = N->getOperand(0); if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not change the width of a volatile load. @@ -7265,6 +7556,134 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// Fold a floating-point multiply by power of two into floating-point to +/// fixed-point conversion. +static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + if (!Subtarget->hasNEON()) + return SDValue(); + + SDValue Op = N->getOperand(0); + if (!Op.getValueType().isVector() || Op.getOpcode() != ISD::FMUL) + return SDValue(); + + SDValue ConstVec = Op->getOperand(1); + if (!isa(ConstVec)) + return SDValue(); + + MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); + uint32_t FloatBits = FloatTy.getSizeInBits(); + if (FloatBits != 32 && FloatBits != 64) + return SDValue(); + + MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); + uint32_t IntBits = IntTy.getSizeInBits(); + if (IntBits != 16 && IntBits != 32 && IntBits != 64) + return SDValue(); + + // Avoid conversions where iN is larger than the float (e.g., float -> i64). + if (IntBits > FloatBits) + return SDValue(); + + BitVector UndefElements; + BuildVectorSDNode *BV = cast(ConstVec); + int32_t Bits = IntBits == 64 ? 64 : 32; + int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1); + if (C == -1 || C == 0 || C > Bits) + return SDValue(); + + MVT ResTy; + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + switch (NumLanes) { + default: + return SDValue(); + case 2: + ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64; + break; + case 4: + ResTy = MVT::v4i32; + break; + } + + SDLoc DL(N); + bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; + unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs + : Intrinsic::aarch64_neon_vcvtfp2fxu; + SDValue FixConv = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy, + DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), + Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32)); + // We can handle smaller integers by generating an extra trunc. + if (IntBits < FloatBits) + FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv); + + return FixConv; +} + +/// Fold a floating-point divide by power of two into fixed-point to +/// floating-point conversion. +static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + if (!Subtarget->hasNEON()) + return SDValue(); + + SDValue Op = N->getOperand(0); + unsigned Opc = Op->getOpcode(); + if (!Op.getValueType().isVector() || + (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP)) + return SDValue(); + + SDValue ConstVec = N->getOperand(1); + if (!isa(ConstVec)) + return SDValue(); + + MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); + int32_t IntBits = IntTy.getSizeInBits(); + if (IntBits != 16 && IntBits != 32 && IntBits != 64) + return SDValue(); + + MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); + int32_t FloatBits = FloatTy.getSizeInBits(); + if (FloatBits != 32 && FloatBits != 64) + return SDValue(); + + // Avoid conversions where iN is larger than the float (e.g., i64 -> float). + if (IntBits > FloatBits) + return SDValue(); + + BitVector UndefElements; + BuildVectorSDNode *BV = cast(ConstVec); + int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1); + if (C == -1 || C == 0 || C > FloatBits) + return SDValue(); + + MVT ResTy; + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + switch (NumLanes) { + default: + return SDValue(); + case 2: + ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64; + break; + case 4: + ResTy = MVT::v4i32; + break; + } + + SDLoc DL(N); + SDValue ConvInput = Op.getOperand(0); + bool IsSigned = Opc == ISD::SINT_TO_FP; + if (IntBits < FloatBits) + ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, + ResTy, ConvInput); + + unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp + : Intrinsic::aarch64_neon_vcvtfxu2fp; + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), + DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput, + DAG.getConstant(C, DL, MVT::i32)); +} + /// An EXTR instruction is made up of two shifts, ORed together. This helper /// searches for and classifies those shifts. static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, @@ -7964,7 +8383,6 @@ static SDValue performIntrinsicCombine(SDNode *N, case Intrinsic::aarch64_neon_vcvtfxs2fp: case Intrinsic::aarch64_neon_vcvtfxu2fp: return tryCombineFixedPointConvert(N, DCI, DAG); - break; case Intrinsic::aarch64_neon_saddv: return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG); case Intrinsic::aarch64_neon_uaddv: @@ -7978,10 +8396,16 @@ static SDValue performIntrinsicCombine(SDNode *N, case Intrinsic::aarch64_neon_umaxv: return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG); case Intrinsic::aarch64_neon_fmax: - return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0), + return DAG.getNode(ISD::FMAXNAN, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); case Intrinsic::aarch64_neon_fmin: - return DAG.getNode(AArch64ISD::FMIN, SDLoc(N), N->getValueType(0), + return DAG.getNode(ISD::FMINNAN, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2)); + case Intrinsic::aarch64_neon_fmaxnm: + return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2)); + case Intrinsic::aarch64_neon_fminnm: + return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); case Intrinsic::aarch64_neon_smull: case Intrinsic::aarch64_neon_umull: @@ -8141,7 +8565,7 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) { unsigned Alignment = std::min(OrigAlignment, EltOffset); // Create scalar stores. This is at least as good as the code sequence for a - // split unaligned store wich is a dup.s, ext.b, and two stores. + // split unaligned store which is a dup.s, ext.b, and two stores. // Most of the time the three stores should be replaced by store pair // instructions (stp). SDLoc DL(St); @@ -8162,10 +8586,9 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) { return NewST1; } -static SDValue performSTORECombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG, - const AArch64Subtarget *Subtarget) { +static SDValue split16BStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { if (!DCI.isBeforeLegalize()) return SDValue(); @@ -8173,15 +8596,17 @@ static SDValue performSTORECombine(SDNode *N, if (S->isVolatile()) return SDValue(); + // FIXME: The logic for deciding if an unaligned store should be split should + // be included in TLI.allowsMisalignedMemoryAccesses(), and there should be + // a call to that function here. + // Cyclone has bad performance on unaligned 16B stores when crossing line and // page boundaries. We want to split such stores. if (!Subtarget->isCyclone()) return SDValue(); - // Don't split at Oz. - MachineFunction &MF = DAG.getMachineFunction(); - bool IsMinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize); - if (IsMinSize) + // Don't split at -Oz. + if (DAG.getMachineFunction().getFunction()->optForMinSize()) return SDValue(); SDValue StVal = S->getValue(); @@ -8204,8 +8629,7 @@ static SDValue performSTORECombine(SDNode *N, // If we get a splat of a scalar convert this vector store to a store of // scalars. They will be merged into store pairs thereby removing two // instructions. - SDValue ReplacedSplat = replaceSplatVectorStore(DAG, S); - if (ReplacedSplat != SDValue()) + if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, S)) return ReplacedSplat; SDLoc DL(S); @@ -8326,6 +8750,299 @@ static SDValue performPostLD1Combine(SDNode *N, return SDValue(); } +/// Simplify \Addr given that the top byte of it is ignored by HW during +/// address translation. +static bool performTBISimplification(SDValue Addr, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + APInt DemandedMask = APInt::getLowBitsSet(64, 56); + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), + DCI.isBeforeLegalizeOps()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.SimplifyDemandedBits(Addr, DemandedMask, KnownZero, KnownOne, TLO)) { + DCI.CommitTargetLoweringOpt(TLO); + return true; + } + return false; +} + +static SDValue performSTORECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + SDValue Split = split16BStores(N, DCI, DAG, Subtarget); + if (Split.getNode()) + return Split; + + if (Subtarget->supportsAddressTopByteIgnored() && + performTBISimplification(N->getOperand(2), DCI, DAG)) + return SDValue(N, 0); + + return SDValue(); +} + + /// This function handles the log2-shuffle pattern produced by the +/// LoopVectorizer for the across vector reduction. It consists of +/// log2(NumVectorElements) steps and, in each step, 2^(s) elements +/// are reduced, where s is an induction variable from 0 to +/// log2(NumVectorElements). +static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV, + unsigned Op, + SelectionDAG &DAG) { + EVT VTy = OpV->getOperand(0).getValueType(); + if (!VTy.isVector()) + return SDValue(); + + int NumVecElts = VTy.getVectorNumElements(); + if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) { + if (NumVecElts != 4) + return SDValue(); + } else { + if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16) + return SDValue(); + } + + int NumExpectedSteps = APInt(8, NumVecElts).logBase2(); + SDValue PreOp = OpV; + // Iterate over each step of the across vector reduction. + for (int CurStep = 0; CurStep != NumExpectedSteps; ++CurStep) { + SDValue CurOp = PreOp.getOperand(0); + SDValue Shuffle = PreOp.getOperand(1); + if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) { + // Try to swap the 1st and 2nd operand as add and min/max instructions + // are commutative. + CurOp = PreOp.getOperand(1); + Shuffle = PreOp.getOperand(0); + if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) + return SDValue(); + } + + // Check if the input vector is fed by the operator we want to handle, + // except the last step; the very first input vector is not necessarily + // the same operator we are handling. + if (CurOp.getOpcode() != Op && (CurStep != (NumExpectedSteps - 1))) + return SDValue(); + + // Check if it forms one step of the across vector reduction. + // E.g., + // %cur = add %1, %0 + // %shuffle = vector_shuffle %cur, <2, 3, u, u> + // %pre = add %cur, %shuffle + if (Shuffle.getOperand(0) != CurOp) + return SDValue(); + + int NumMaskElts = 1 << CurStep; + ArrayRef Mask = cast(Shuffle)->getMask(); + // Check mask values in each step. + // We expect the shuffle mask in each step follows a specific pattern + // denoted here by the form, where M is a sequence of integers + // starting from NumMaskElts, increasing by 1, and the number integers + // in M should be NumMaskElts. U is a sequence of UNDEFs and the number + // of undef in U should be NumVecElts - NumMaskElts. + // E.g., for <8 x i16>, mask values in each step should be : + // step 0 : <1,u,u,u,u,u,u,u> + // step 1 : <2,3,u,u,u,u,u,u> + // step 2 : <4,5,6,7,u,u,u,u> + for (int i = 0; i < NumVecElts; ++i) + if ((i < NumMaskElts && Mask[i] != (NumMaskElts + i)) || + (i >= NumMaskElts && !(Mask[i] < 0))) + return SDValue(); + + PreOp = CurOp; + } + unsigned Opcode; + bool IsIntrinsic = false; + + switch (Op) { + default: + llvm_unreachable("Unexpected operator for across vector reduction"); + case ISD::ADD: + Opcode = AArch64ISD::UADDV; + break; + case ISD::SMAX: + Opcode = AArch64ISD::SMAXV; + break; + case ISD::UMAX: + Opcode = AArch64ISD::UMAXV; + break; + case ISD::SMIN: + Opcode = AArch64ISD::SMINV; + break; + case ISD::UMIN: + Opcode = AArch64ISD::UMINV; + break; + case ISD::FMAXNUM: + Opcode = Intrinsic::aarch64_neon_fmaxnmv; + IsIntrinsic = true; + break; + case ISD::FMINNUM: + Opcode = Intrinsic::aarch64_neon_fminnmv; + IsIntrinsic = true; + break; + } + SDLoc DL(N); + + return IsIntrinsic + ? DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0), + DAG.getConstant(Opcode, DL, MVT::i32), PreOp) + : DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), + DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp), + DAG.getConstant(0, DL, MVT::i64)); +} + +/// Target-specific DAG combine for the across vector min/max reductions. +/// This function specifically handles the final clean-up step of the vector +/// min/max reductions produced by the LoopVectorizer. It is the log2-shuffle +/// pattern, which narrows down and finds the final min/max value from all +/// elements of the vector. +/// For example, for a <16 x i8> vector : +/// svn0 = vector_shuffle %0, undef<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> +/// %smax0 = smax %arr, svn0 +/// %svn1 = vector_shuffle %smax0, undef<4,5,6,7,u,u,u,u,u,u,u,u,u,u,u,u> +/// %smax1 = smax %smax0, %svn1 +/// %svn2 = vector_shuffle %smax1, undef<2,3,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +/// %smax2 = smax %smax1, svn2 +/// %svn3 = vector_shuffle %smax2, undef<1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +/// %sc = setcc %smax2, %svn3, gt +/// %n0 = extract_vector_elt %sc, #0 +/// %n1 = extract_vector_elt %smax2, #0 +/// %n2 = extract_vector_elt $smax2, #1 +/// %result = select %n0, %n1, n2 +/// becomes : +/// %1 = smaxv %0 +/// %result = extract_vector_elt %1, 0 +static SDValue +performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + if (!Subtarget->hasNEON()) + return SDValue(); + + SDValue N0 = N->getOperand(0); + SDValue IfTrue = N->getOperand(1); + SDValue IfFalse = N->getOperand(2); + + // Check if the SELECT merges up the final result of the min/max + // from a vector. + if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + IfTrue.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + IfFalse.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + // Expect N0 is fed by SETCC. + SDValue SetCC = N0.getOperand(0); + EVT SetCCVT = SetCC.getValueType(); + if (SetCC.getOpcode() != ISD::SETCC || !SetCCVT.isVector() || + SetCCVT.getVectorElementType() != MVT::i1) + return SDValue(); + + SDValue VectorOp = SetCC.getOperand(0); + unsigned Op = VectorOp->getOpcode(); + // Check if the input vector is fed by the operator we want to handle. + if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN && + Op != ISD::UMIN && Op != ISD::FMAXNUM && Op != ISD::FMINNUM) + return SDValue(); + + EVT VTy = VectorOp.getValueType(); + if (!VTy.isVector()) + return SDValue(); + + if (VTy.getSizeInBits() < 64) + return SDValue(); + + EVT EltTy = VTy.getVectorElementType(); + if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) { + if (EltTy != MVT::f32) + return SDValue(); + } else { + if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8) + return SDValue(); + } + + // Check if extracting from the same vector. + // For example, + // %sc = setcc %vector, %svn1, gt + // %n0 = extract_vector_elt %sc, #0 + // %n1 = extract_vector_elt %vector, #0 + // %n2 = extract_vector_elt $vector, #1 + if (!(VectorOp == IfTrue->getOperand(0) && + VectorOp == IfFalse->getOperand(0))) + return SDValue(); + + // Check if the condition code is matched with the operator type. + ISD::CondCode CC = cast(SetCC->getOperand(2))->get(); + if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) || + (Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) || + (Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) || + (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE) || + (Op == ISD::FMAXNUM && CC != ISD::SETOGT && CC != ISD::SETOGE && + CC != ISD::SETUGT && CC != ISD::SETUGE && CC != ISD::SETGT && + CC != ISD::SETGE) || + (Op == ISD::FMINNUM && CC != ISD::SETOLT && CC != ISD::SETOLE && + CC != ISD::SETULT && CC != ISD::SETULE && CC != ISD::SETLT && + CC != ISD::SETLE)) + return SDValue(); + + // Expect to check only lane 0 from the vector SETCC. + if (!isNullConstant(N0.getOperand(1))) + return SDValue(); + + // Expect to extract the true value from lane 0. + if (!isNullConstant(IfTrue.getOperand(1))) + return SDValue(); + + // Expect to extract the false value from lane 1. + if (!isOneConstant(IfFalse.getOperand(1))) + return SDValue(); + + return tryMatchAcrossLaneShuffleForReduction(N, SetCC, Op, DAG); +} + +/// Target-specific DAG combine for the across vector add reduction. +/// This function specifically handles the final clean-up step of the vector +/// add reduction produced by the LoopVectorizer. It is the log2-shuffle +/// pattern, which adds all elements of a vector together. +/// For example, for a <4 x i32> vector : +/// %1 = vector_shuffle %0, <2,3,u,u> +/// %2 = add %0, %1 +/// %3 = vector_shuffle %2, <1,u,u,u> +/// %4 = add %2, %3 +/// %result = extract_vector_elt %4, 0 +/// becomes : +/// %0 = uaddv %0 +/// %result = extract_vector_elt %0, 0 +static SDValue +performAcrossLaneAddReductionCombine(SDNode *N, SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + if (!Subtarget->hasNEON()) + return SDValue(); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Check if the input vector is fed by the ADD. + if (N0->getOpcode() != ISD::ADD) + return SDValue(); + + // The vector extract idx must constant zero because we only expect the final + // result of the reduction is placed in lane 0. + if (!isNullConstant(N1)) + return SDValue(); + + EVT VTy = N0.getValueType(); + if (!VTy.isVector()) + return SDValue(); + + EVT EltTy = VTy.getVectorElementType(); + if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8) + return SDValue(); + + if (VTy.getSizeInBits() < 64) + return SDValue(); + + return tryMatchAcrossLaneShuffleForReduction(N, N0, ISD::ADD, DAG); +} + /// Target-specific DAG combine function for NEON load/store intrinsics /// to merge base address updates. static SDValue performNEONPostLDSTCombine(SDNode *N, @@ -8751,10 +9468,10 @@ static SDValue performBRCONDCombine(SDNode *N, if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64) return SDValue(); - if (isa(LHS) && cast(LHS)->isNullValue()) + if (isNullConstant(LHS)) std::swap(LHS, RHS); - if (!isa(RHS) || !cast(RHS)->isNullValue()) + if (!isNullConstant(RHS)) return SDValue(); if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA || @@ -8868,75 +9585,6 @@ static SDValue performSelectCombine(SDNode *N, return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2)); } -/// performSelectCCCombine - Target-specific DAG combining for ISD::SELECT_CC -/// to match FMIN/FMAX patterns. -static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG) { - // Try to use FMIN/FMAX instructions for FP selects like "x < y ? x : y". - // Unless the NoNaNsFPMath option is set, be careful about NaNs: - // vmax/vmin return NaN if either operand is a NaN; - // only do the transformation when it matches that behavior. - - SDValue CondLHS = N->getOperand(0); - SDValue CondRHS = N->getOperand(1); - SDValue LHS = N->getOperand(2); - SDValue RHS = N->getOperand(3); - ISD::CondCode CC = cast(N->getOperand(4))->get(); - - unsigned Opcode; - bool IsReversed; - if (selectCCOpsAreFMaxCompatible(CondLHS, LHS) && - selectCCOpsAreFMaxCompatible(CondRHS, RHS)) { - IsReversed = false; // x CC y ? x : y - } else if (selectCCOpsAreFMaxCompatible(CondRHS, LHS) && - selectCCOpsAreFMaxCompatible(CondLHS, RHS)) { - IsReversed = true ; // x CC y ? y : x - } else { - return SDValue(); - } - - bool IsUnordered = false, IsOrEqual; - switch (CC) { - default: - return SDValue(); - case ISD::SETULT: - case ISD::SETULE: - IsUnordered = true; - case ISD::SETOLT: - case ISD::SETOLE: - case ISD::SETLT: - case ISD::SETLE: - IsOrEqual = (CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE); - Opcode = IsReversed ? AArch64ISD::FMAX : AArch64ISD::FMIN; - break; - - case ISD::SETUGT: - case ISD::SETUGE: - IsUnordered = true; - case ISD::SETOGT: - case ISD::SETOGE: - case ISD::SETGT: - case ISD::SETGE: - IsOrEqual = (CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE); - Opcode = IsReversed ? AArch64ISD::FMIN : AArch64ISD::FMAX; - break; - } - - // If LHS is NaN, an ordered comparison will be false and the result will be - // the RHS, but FMIN(NaN, RHS) = FMAX(NaN, RHS) = NaN. Avoid this by checking - // that LHS != NaN. Likewise, for unordered comparisons, check for RHS != NaN. - if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) - return SDValue(); - - // For xxx-or-equal comparisons, "+0 <= -0" and "-0 >= +0" will both be true, - // but FMIN will return -0, and FMAX will return +0. So FMIN/FMAX can only be - // used for unsafe math or if one of the operands is known to be nonzero. - if (IsOrEqual && !DAG.getTarget().Options.UnsafeFPMath && - !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) - return SDValue(); - - return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS); -} - /// Get rid of unnecessary NVCASTs (that don't change the type). static SDValue performNVCASTCombine(SDNode *N) { if (N->getValueType(0) == N->getOperand(0).getValueType()) @@ -8961,6 +9609,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return performIntToFpCombine(N, DAG, Subtarget); + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + return performFpToIntCombine(N, DAG, Subtarget); + case ISD::FDIV: + return performFDivCombine(N, DAG, Subtarget); case ISD::OR: return performORCombine(N, DCI, Subtarget); case ISD::INTRINSIC_WO_CHAIN: @@ -8973,12 +9626,18 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performBitcastCombine(N, DCI, DAG); case ISD::CONCAT_VECTORS: return performConcatVectorsCombine(N, DCI, DAG); - case ISD::SELECT: - return performSelectCombine(N, DCI); + case ISD::SELECT: { + SDValue RV = performSelectCombine(N, DCI); + if (!RV.getNode()) + RV = performAcrossLaneMinMaxReductionCombine(N, DAG, Subtarget); + return RV; + } case ISD::VSELECT: return performVSelectCombine(N, DCI.DAG); - case ISD::SELECT_CC: - return performSelectCCCombine(N, DCI.DAG); + case ISD::LOAD: + if (performTBISimplification(N->getOperand(1), DCI, DAG)) + return SDValue(N, 0); + break; case ISD::STORE: return performSTORECombine(N, DCI, DAG, Subtarget); case AArch64ISD::BRCOND: @@ -8991,6 +9650,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performNVCASTCombine(N); case ISD::INSERT_VECTOR_ELT: return performPostLD1Combine(N, DCI, true); + case ISD::EXTRACT_VECTOR_ELT: + return performAcrossLaneAddReductionCombine(N, DAG, Subtarget); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { @@ -9157,6 +9818,20 @@ static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl &Results, Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op)); } +static void ReplaceReductionResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG, unsigned InterOp, + unsigned AcrossOp) { + EVT LoVT, HiVT; + SDValue Lo, Hi; + SDLoc dl(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); + SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi); + SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal); + Results.push_back(SplitVal); +} + void AArch64TargetLowering::ReplaceNodeResults( SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { switch (N->getOpcode()) { @@ -9165,6 +9840,24 @@ void AArch64TargetLowering::ReplaceNodeResults( case ISD::BITCAST: ReplaceBITCASTResults(N, Results, DAG); return; + case AArch64ISD::SADDV: + ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV); + return; + case AArch64ISD::UADDV: + ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::UADDV); + return; + case AArch64ISD::SMINV: + ReplaceReductionResults(N, Results, DAG, ISD::SMIN, AArch64ISD::SMINV); + return; + case AArch64ISD::UMINV: + ReplaceReductionResults(N, Results, DAG, ISD::UMIN, AArch64ISD::UMINV); + return; + case AArch64ISD::SMAXV: + ReplaceReductionResults(N, Results, DAG, ISD::SMAX, AArch64ISD::SMAXV); + return; + case AArch64ISD::UMAXV: + ReplaceReductionResults(N, Results, DAG, ISD::UMAX, AArch64ISD::UMAXV); + return; case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion"); @@ -9177,10 +9870,10 @@ bool AArch64TargetLowering::useLoadStackGuardNode() const { return true; } -bool AArch64TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { +unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const { // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal if there are three or more FDIVs. - return NumUsers > 2; + return 3; } TargetLoweringBase::LegalizeTypeAction @@ -9206,20 +9899,21 @@ bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { // Loads and stores less than 128-bits are already atomic; ones above that // are doomed anyway, so defer to the default libcall and blame the OS when // things go wrong. -bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { +TargetLowering::AtomicExpansionKind +AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { unsigned Size = LI->getType()->getPrimitiveSizeInBits(); - return Size == 128; + return Size == 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; } // For the real atomic operations, we have ldxr/stxr up to 128 bits, -TargetLoweringBase::AtomicRMWExpansionKind +TargetLowering::AtomicExpansionKind AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= 128 ? AtomicRMWExpansionKind::LLSC - : AtomicRMWExpansionKind::None; + return Size <= 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; } -bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const { +bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR( + AtomicCmpXchgInst *AI) const { return true; } @@ -9258,6 +9952,13 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, cast(Addr->getType())->getElementType()); } +void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance( + IRBuilder<> &Builder) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Builder.CreateCall( + llvm::Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex)); +} + Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const { @@ -9294,3 +9995,70 @@ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { return Ty->isArrayTy(); } + +bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, + EVT) const { + return false; +} + +Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const { + if (!Subtarget->isTargetAndroid()) + return TargetLowering::getSafeStackPointerLocation(IRB); + + // Android provides a fixed TLS slot for the SafeStack pointer. See the + // definition of TLS_SLOT_SAFESTACK in + // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h + const unsigned TlsOffset = 0x48; + Module *M = IRB.GetInsertBlock()->getParent()->getParent(); + Function *ThreadPointerFunc = + Intrinsic::getDeclaration(M, Intrinsic::aarch64_thread_pointer); + return IRB.CreatePointerCast( + IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset), + Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0)); +} + +void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { + // Update IsSplitCSR in AArch64unctionInfo. + AArch64FunctionInfo *AFI = Entry->getParent()->getInfo(); + AFI->setIsSplitCSR(true); +} + +void AArch64TargetLowering::insertCopiesSplitCSR( + MachineBasicBlock *Entry, + const SmallVectorImpl &Exits) const { + const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); + const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); + if (!IStart) + return; + + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); + for (const MCPhysReg *I = IStart; *I; ++I) { + const TargetRegisterClass *RC = nullptr; + if (AArch64::GPR64RegClass.contains(*I)) + RC = &AArch64::GPR64RegClass; + else if (AArch64::FPR64RegClass.contains(*I)) + RC = &AArch64::FPR64RegClass; + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + + unsigned NewVR = MRI->createVirtualRegister(RC); + // Create copy from CSR to a virtual register. + // FIXME: this currently does not emit CFI pseudo-instructions, it works + // fine for CXX_FAST_TLS since the C++-style TLS access functions should be + // nounwind. If we want to generalize this later, we may need to emit + // CFI pseudo-instructions. + assert(Entry->getParent()->getFunction()->hasFnAttribute( + Attribute::NoUnwind) && + "Function should be nounwind in insertCopiesSplitCSR!"); + Entry->addLiveIn(*I); + BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), + NewVR) + .addReg(*I); + + for (auto *Exit : Exits) + BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), + *I) + .addReg(NewVR); + } +} diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index c73ce1e54b3e..e99616c94068 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H +#include "AArch64.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/CallingConv.h" @@ -58,13 +59,14 @@ enum NodeType : unsigned { SBCS, ANDS, + // Conditional compares. Operands: left,right,falsecc,cc,flags + CCMP, + CCMN, + FCCMP, + // Floating point comparison FCMP, - // Floating point max and min instructions. - FMAX, - FMIN, - // Scalar extract EXTR, @@ -217,8 +219,6 @@ class AArch64Subtarget; class AArch64TargetMachine; class AArch64TargetLowering : public TargetLowering { - bool RequireStrictAlign; - public: explicit AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI); @@ -226,46 +226,35 @@ public: /// Selects the correct CCAssignFn for a given CallingConvention value. CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; - /// computeKnownBitsForTargetNode - Determine which of the bits specified in - /// Mask are known to be either zero or one and return them in the - /// KnownZero/KnownOne bitsets. + /// Determine which of the bits specified in Mask are known to be either zero + /// or one and return them in the KnownZero/KnownOne bitsets. void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth = 0) const override; MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; - /// allowsMisalignedMemoryAccesses - Returns true if the target allows - /// unaligned memory accesses of the specified type. + /// Returns true if the target allows unaligned memory accesses of the + /// specified type. bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0, unsigned Align = 1, - bool *Fast = nullptr) const override { - if (RequireStrictAlign) - return false; - // FIXME: True for Cyclone, but not necessary others. - if (Fast) - *Fast = true; - return true; - } + bool *Fast = nullptr) const override; - /// LowerOperation - Provide custom lowering hooks for some operations. + /// Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; const char *getTargetNodeName(unsigned Opcode) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - /// getFunctionAlignment - Return the Log2 alignment of this function. - unsigned getFunctionAlignment(const Function *F) const; - /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { // Addrspacecasts are always noops. return true; } - /// createFastISel - This method returns a target specific FastISel object, - /// or null if the target does not support "fast" ISel. + /// This method returns a target specific FastISel object, or null if the + /// target does not support "fast" ISel. FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override; @@ -273,11 +262,11 @@ public: bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; - /// isShuffleMaskLegal - Return true if the given shuffle mask can be - /// codegen'd directly, or if it should be stack expanded. + /// Return true if the given shuffle mask can be codegen'd directly, or if it + /// should be stack expanded. bool isShuffleMaskLegal(const SmallVectorImpl &M, EVT VT) const override; - /// getSetCCResultType - Return the ISD::SETCC ValueType + /// Return the ISD::SETCC ValueType. EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; @@ -322,8 +311,8 @@ public: bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const override; - /// isLegalAddressingMode - Return true if the addressing mode represented - /// by AM is legal for this target, for a load/store of the specified type. + /// Return true if the addressing mode represented by AM is legal for this + /// target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; @@ -335,10 +324,9 @@ public: int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; - /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster - /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be - /// expanded to FMAs when this method returns true, otherwise fmuladd is - /// expanded to fmul + fadd. + /// Return true if an FMA operation is faster than a pair of fmul and fadd + /// instructions. fmuladd intrinsics will be expanded to FMAs when this method + /// returns true, otherwise fmuladd is expanded to fmul + fadd. bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; @@ -351,25 +339,65 @@ public: bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; - bool hasLoadLinkedStoreConditional() const override; Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; - bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; + void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override; + + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - TargetLoweringBase::AtomicRMWExpansionKind + TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; + bool useLoadStackGuardNode() const override; TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; + /// If the target has a standard location for the unsafe stack pointer, + /// returns the address of that location. Otherwise, returns nullptr. + Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override; + + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + unsigned + getExceptionPointerRegister(const Constant *PersonalityFn) const override { + // FIXME: This is a guess. Has this been defined yet? + return AArch64::X0; + } + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + unsigned + getExceptionSelectorRegister(const Constant *PersonalityFn) const override { + // FIXME: This is a guess. Has this been defined yet? + return AArch64::X1; + } + + bool isCheapToSpeculateCttz() const override { + return true; + } + + bool isCheapToSpeculateCtlz() const override { + return true; + } + bool supportSplitCSR(MachineFunction *MF) const override { + return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); + } + void initializeSplitCSR(MachineBasicBlock *Entry) const override; + void insertCopiesSplitCSR( + MachineBasicBlock *Entry, + const SmallVectorImpl &Exits) const override; + private: bool isExtFreeImpl(const Instruction *Ext) const override; - /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can + /// Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. const AArch64Subtarget *Subtarget; @@ -392,6 +420,8 @@ private: SelectionDAG &DAG, SmallVectorImpl &InVals, bool isThisReturn, SDValue ThisVal) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + bool isEligibleForTailCallOptimization( SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet, bool isCallerStructRet, @@ -470,7 +500,7 @@ private: SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector *Created) const override; - bool combineRepeatedFPDivisors(unsigned NumUsers) const override; + unsigned combineRepeatedFPDivisors() const override; ConstraintType getConstraintType(StringRef Constraint) const override; unsigned getRegisterByName(const char* RegName, EVT VT, @@ -516,6 +546,8 @@ private: bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; + + bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; }; namespace AArch64 { diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 3f2e772a90c4..6ac2175e5035 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -248,6 +248,12 @@ def simm7s16 : Operand { let PrintMethod = "printImmScale<16>"; } +def am_indexed7s8 : ComplexPattern; +def am_indexed7s16 : ComplexPattern; +def am_indexed7s32 : ComplexPattern; +def am_indexed7s64 : ComplexPattern; +def am_indexed7s128 : ComplexPattern; + class AsmImmRange : AsmOperandClass { let Name = "Imm" # Low # "_" # High; let DiagnosticType = "InvalidImm" # Low # "_" # High; @@ -346,9 +352,11 @@ class fixedpoint_i64 let ParserMatchClass = Imm1_64Operand; } +def fixedpoint_f16_i32 : fixedpoint_i32; def fixedpoint_f32_i32 : fixedpoint_i32; def fixedpoint_f64_i32 : fixedpoint_i32; +def fixedpoint_f16_i64 : fixedpoint_i64; def fixedpoint_f32_i64 : fixedpoint_i64; def fixedpoint_f64_i64 : fixedpoint_i64; @@ -402,6 +410,7 @@ def vecshiftR64Narrow : Operand, ImmLeaf; def Imm0_7Operand : AsmImmRange<0, 7>; def Imm0_15Operand : AsmImmRange<0, 15>; def Imm0_31Operand : AsmImmRange<0, 31>; @@ -525,6 +534,20 @@ def imm0_31 : Operand, ImmLeaf, ImmLeaf { + let ParserMatchClass = Imm0_31Operand; +} + +// imm0_1 predicate - True if the immediate is in the range [0,1] +def imm0_1 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_1Operand; +} + // imm0_15 predicate - True if the immediate is in the range [0,15] def imm0_15 : Operand, ImmLeaf, ImmLeaf, ImmLeaf; +}]> { + let ParserMatchClass = Imm0_15Operand; +} // An arithmetic shifter operand: // {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr @@ -690,6 +715,17 @@ class arith_extended_reg32to64 : Operand, } // Floating-point immediate. +def fpimm16 : Operand, + PatLeaf<(f16 fpimm), [{ + return AArch64_AM::getFP16Imm(N->getValueAPF()) != -1; + }], SDNodeXFormgetValueAPF(); + uint32_t enc = AArch64_AM::getFP16Imm(InVal); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); + }]>> { + let ParserMatchClass = FPImmOperand; + let PrintMethod = "printFPImmOperand"; +} def fpimm32 : Operand, PatLeaf<(f32 fpimm), [{ return AArch64_AM::getFP32Imm(N->getValueAPF()) != -1; @@ -822,7 +858,7 @@ class RtSystemI // model patterns with sufficiently fine granularity let mayStore = 1, mayLoad = 1, hasSideEffects = 1 in class HintI - : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#" $imm", "", + : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#"\t$imm", "", [(int_aarch64_hint imm0_127:$imm)]>, Sched<[WriteHint]> { bits <7> imm; @@ -875,6 +911,25 @@ def msr_sysreg_op : Operand { let PrintMethod = "printMSRSystemRegister"; } +def PSBHintOperand : AsmOperandClass { + let Name = "PSBHint"; + let ParserMethod = "tryParsePSBHint"; +} +def psbhint_op : Operand { + let ParserMatchClass = PSBHintOperand; + let PrintMethod = "printPSBHintOp"; + let MCOperandPredicate = [{ + // Check, if operand is valid, to fix exhaustive aliasing in disassembly. + // "psb" is an alias to "hint" only for certain values of CRm:Op2 fields. + if (!MCOp.isImm()) + return false; + bool ValidNamed; + (void)AArch64PSBHint::PSBHintMapper().toString(MCOp.getImm(), + STI.getFeatureBits(), ValidNamed); + return ValidNamed; + }]; +} + class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg), "mrs", "\t$Rt, $systemreg"> { bits<16> systemreg; @@ -890,19 +945,19 @@ class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt), let Inst{20-5} = systemreg; } -def SystemPStateFieldOperand : AsmOperandClass { - let Name = "SystemPStateField"; +def SystemPStateFieldWithImm0_15Operand : AsmOperandClass { + let Name = "SystemPStateFieldWithImm0_15"; let ParserMethod = "tryParseSysReg"; } -def pstatefield_op : Operand { - let ParserMatchClass = SystemPStateFieldOperand; +def pstatefield4_op : Operand { + let ParserMatchClass = SystemPStateFieldWithImm0_15Operand; let PrintMethod = "printSystemPStateField"; } let Defs = [NZCV] in -class MSRpstateI - : SimpleSystemI<0, (ins pstatefield_op:$pstate_field, imm0_15:$imm), - "msr", "\t$pstate_field, $imm">, +class MSRpstateImm0_15 + : SimpleSystemI<0, (ins pstatefield4_op:$pstatefield, imm0_15:$imm), + "msr", "\t$pstatefield, $imm">, Sched<[WriteSys]> { bits<6> pstatefield; bits<4> imm; @@ -913,6 +968,37 @@ class MSRpstateI let Inst{7-5} = pstatefield{2-0}; let DecoderMethod = "DecodeSystemPStateInstruction"; + // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns + // Fail the decoder should attempt to decode the instruction as MSRI. + let hasCompleteDecoder = 0; +} + +def SystemPStateFieldWithImm0_1Operand : AsmOperandClass { + let Name = "SystemPStateFieldWithImm0_1"; + let ParserMethod = "tryParseSysReg"; +} +def pstatefield1_op : Operand { + let ParserMatchClass = SystemPStateFieldWithImm0_1Operand; + let PrintMethod = "printSystemPStateField"; +} + +let Defs = [NZCV] in +class MSRpstateImm0_1 + : SimpleSystemI<0, (ins pstatefield1_op:$pstatefield, imm0_1:$imm), + "msr", "\t$pstatefield, $imm">, + Sched<[WriteSys]> { + bits<6> pstatefield; + bit imm; + let Inst{20-19} = 0b00; + let Inst{18-16} = pstatefield{5-3}; + let Inst{15-9} = 0b0100000; + let Inst{8} = imm; + let Inst{7-5} = pstatefield{2-0}; + + let DecoderMethod = "DecodeSystemPStateInstruction"; + // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns + // Fail the decoder should attempt to decode the instruction as MSRI. + let hasCompleteDecoder = 0; } // SYS and SYSL generic system instructions. @@ -1341,7 +1427,7 @@ multiclass Shift shift_type, string asm, SDNode OpNode> { } class ShiftAlias - : InstAlias; class BaseMulAccum opc, RegisterClass multype, @@ -1407,13 +1493,13 @@ class MulHi opc, string asm, SDNode OpNode> } class MulAccumWAlias - : InstAlias; class MulAccumXAlias - : InstAlias; class WideMulAccumAlias - : InstAlias; class BaseCRC32 sz, bit C, RegisterClass StreamReg, @@ -1643,7 +1729,7 @@ class BaseAddSubEReg64 - : InstAlias; @@ -1701,10 +1787,10 @@ multiclass AddSub sub Rd, Rn, imm - def : InstAlias(NAME # "Wri") GPR32sp:$Rd, GPR32sp:$Rn, addsub_shifted_imm32_neg:$imm), 0>; - def : InstAlias(NAME # "Xri") GPR64sp:$Rd, GPR64sp:$Rn, addsub_shifted_imm64_neg:$imm), 0>; @@ -1776,43 +1862,43 @@ multiclass AddSubS subs Rd, Rn, imm - def : InstAlias(NAME # "Wri") GPR32:$Rd, GPR32sp:$Rn, addsub_shifted_imm32_neg:$imm), 0>; - def : InstAlias(NAME # "Xri") GPR64:$Rd, GPR64sp:$Rn, addsub_shifted_imm64_neg:$imm), 0>; // Compare aliases - def : InstAlias(NAME#"Wri") + def : InstAlias(NAME#"Wri") WZR, GPR32sp:$src, addsub_shifted_imm32:$imm), 5>; - def : InstAlias(NAME#"Xri") + def : InstAlias(NAME#"Xri") XZR, GPR64sp:$src, addsub_shifted_imm64:$imm), 5>; - def : InstAlias(NAME#"Wrx") + def : InstAlias(NAME#"Wrx") WZR, GPR32sp:$src1, GPR32:$src2, arith_extend:$sh), 4>; - def : InstAlias(NAME#"Xrx") + def : InstAlias(NAME#"Xrx") XZR, GPR64sp:$src1, GPR32:$src2, arith_extend:$sh), 4>; - def : InstAlias(NAME#"Xrx64") + def : InstAlias(NAME#"Xrx64") XZR, GPR64sp:$src1, GPR64:$src2, arith_extendlsl64:$sh), 4>; - def : InstAlias(NAME#"Wrs") + def : InstAlias(NAME#"Wrs") WZR, GPR32:$src1, GPR32:$src2, arith_shift32:$sh), 4>; - def : InstAlias(NAME#"Xrs") + def : InstAlias(NAME#"Xrs") XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>; // Support negative immediates, e.g. cmp Rn, -imm -> cmn Rn, imm - def : InstAlias(NAME#"Wri") + def : InstAlias(NAME#"Wri") WZR, GPR32sp:$src, addsub_shifted_imm32_neg:$imm), 0>; - def : InstAlias(NAME#"Xri") + def : InstAlias(NAME#"Xri") XZR, GPR64sp:$src, addsub_shifted_imm64_neg:$imm), 0>; // Compare shorthands - def : InstAlias(NAME#"Wrs") + def : InstAlias(NAME#"Wrs") WZR, GPR32:$src1, GPR32:$src2, 0), 5>; - def : InstAlias(NAME#"Xrs") + def : InstAlias(NAME#"Xrs") XZR, GPR64:$src1, GPR64:$src2, 0), 5>; - def : InstAlias(NAME#"Wrx") + def : InstAlias(NAME#"Wrx") WZR, GPR32sponly:$src1, GPR32:$src2, 16), 5>; - def : InstAlias(NAME#"Xrx64") + def : InstAlias(NAME#"Xrx64") XZR, GPR64sponly:$src1, GPR64:$src2, 24), 5>; // Register/register aliases with no shift when SP is not used. @@ -1998,7 +2084,7 @@ class BaseLogicalSReg opc, bit N, RegisterClass regtype, // Aliases for register+register logical instructions. class LogicalRegAlias - : InstAlias; multiclass LogicalImm opc, string mnemonic, SDNode OpNode, @@ -2017,10 +2103,10 @@ multiclass LogicalImm opc, string mnemonic, SDNode OpNode, let Inst{31} = 1; } - def : InstAlias(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn, logical_imm32_not:$imm), 0>; - def : InstAlias(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn, logical_imm64_not:$imm), 0>; } @@ -2039,10 +2125,10 @@ multiclass LogicalImmS opc, string mnemonic, SDNode OpNode, } } // end Defs = [NZCV] - def : InstAlias(NAME # "Wri") GPR32:$Rd, GPR32:$Rn, logical_imm32_not:$imm), 0>; - def : InstAlias(NAME # "Xri") GPR64:$Rd, GPR64:$Rn, logical_imm64_not:$imm), 0>; } @@ -2105,9 +2191,12 @@ multiclass LogicalRegS opc, bit N, string mnemonic, //--- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondSetFlagsImm - : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $imm, $nzcv, $cond", "", []>, +class BaseCondComparisonImm + : I<(outs), (ins regtype:$Rn, immtype:$imm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $imm, $nzcv, $cond", "", + [(set NZCV, (OpNode regtype:$Rn, immtype:$imm, (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]>, Sched<[WriteI, ReadI]> { let Uses = [NZCV]; let Defs = [NZCV]; @@ -2127,19 +2216,13 @@ class BaseCondSetFlagsImm let Inst{3-0} = nzcv; } -multiclass CondSetFlagsImm { - def Wi : BaseCondSetFlagsImm { - let Inst{31} = 0; - } - def Xi : BaseCondSetFlagsImm { - let Inst{31} = 1; - } -} - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondSetFlagsReg - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, +class BaseCondComparisonReg + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", + [(set NZCV, (OpNode regtype:$Rn, regtype:$Rm, (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]>, Sched<[WriteI, ReadI, ReadI]> { let Uses = [NZCV]; let Defs = [NZCV]; @@ -2159,11 +2242,19 @@ class BaseCondSetFlagsReg let Inst{3-0} = nzcv; } -multiclass CondSetFlagsReg { - def Wr : BaseCondSetFlagsReg { +multiclass CondComparison { + // immediate operand variants + def Wi : BaseCondComparisonImm { let Inst{31} = 0; } - def Xr : BaseCondSetFlagsReg { + def Xi : BaseCondComparisonImm { + let Inst{31} = 1; + } + // register operand variants + def Wr : BaseCondComparisonReg { + let Inst{31} = 0; + } + def Xr : BaseCondComparisonReg { let Inst{31} = 1; } } @@ -2328,7 +2419,7 @@ multiclass LoadUI sz, bit V, bits<2> opc, RegisterClass regtype, asm, pattern>, Sched<[WriteLD]>; - def : InstAlias(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; } @@ -2340,7 +2431,7 @@ multiclass StoreUI sz, bit V, bits<2> opc, RegisterClass regtype, asm, pattern>, Sched<[WriteST]>; - def : InstAlias(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; } @@ -2508,7 +2599,7 @@ class LoadStore8RO sz, bit V, bits<2> opc, RegisterClass regtype, } class ROInstAlias - : InstAlias; multiclass Load8RO sz, bit V, bits<2> opc, RegisterClass regtype, @@ -2934,7 +3025,7 @@ multiclass LoadUnscaled sz, bit V, bits<2> opc, RegisterClass regtype, (ins GPR64sp:$Rn, simm9:$offset), asm, pattern>, Sched<[WriteLD]>; - def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; } @@ -2946,7 +3037,7 @@ multiclass StoreUnscaled sz, bit V, bits<2> opc, RegisterClass regtype, asm, pattern>, Sched<[WriteST]>; - def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; } @@ -2958,7 +3049,7 @@ multiclass PrefetchUnscaled sz, bit V, bits<2> opc, string asm, asm, pat>, Sched<[WriteLD]>; - def : InstAlias(NAME # "i") prfop:$Rt, GPR64sp:$Rn, 0)>; } @@ -2993,7 +3084,7 @@ multiclass LoadUnprivileged sz, bit V, bits<2> opc, (ins GPR64sp:$Rn, simm9:$offset), asm>, Sched<[WriteLD]>; - def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; } @@ -3005,7 +3096,7 @@ multiclass StoreUnprivileged sz, bit V, bits<2> opc, asm>, Sched<[WriteST]>; - def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; } @@ -3136,7 +3227,7 @@ multiclass LoadPairOffset opc, bit V, RegisterClass regtype, (ins GPR64sp:$Rn, indextype:$offset), asm>, Sched<[WriteLD, WriteLDHi]>; - def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, GPR64sp:$Rn, 0)>; } @@ -3151,7 +3242,7 @@ multiclass StorePairOffset opc, bit V, RegisterClass regtype, asm>, Sched<[WriteSTP]>; - def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, GPR64sp:$Rn, 0)>; } @@ -3230,8 +3321,8 @@ class LoadPairPostIdx opc, bit V, RegisterClass regtype, let mayStore = 1, mayLoad = 0 in class StorePairPostIdx opc, bit V, RegisterClass regtype, Operand idxtype, string asm> - : BaseLoadStorePairPostIdx, Sched<[WriteAdr, WriteSTP]>; @@ -3477,6 +3568,20 @@ class BaseFPToInteger type, bits<2> rmode, bits<3> opcode, multiclass FPToIntegerUnscaled rmode, bits<3> opcode, string asm, SDPatternOperator OpN> { + // Unscaled half-precision to 32-bit + def UWHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR32, asm, + [(set GPR32:$Rd, (OpN FPR16:$Rn))]> { + let Inst{31} = 0; // 32-bit GPR flag + let Predicates = [HasFullFP16]; + } + + // Unscaled half-precision to 64-bit + def UXHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR64, asm, + [(set GPR64:$Rd, (OpN FPR16:$Rn))]> { + let Inst{31} = 1; // 64-bit GPR flag + let Predicates = [HasFullFP16]; + } + // Unscaled single-precision to 32-bit def UWSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR32, asm, [(set GPR32:$Rd, (OpN FPR32:$Rn))]> { @@ -3504,6 +3609,25 @@ multiclass FPToIntegerUnscaled rmode, bits<3> opcode, string asm, multiclass FPToIntegerScaled rmode, bits<3> opcode, string asm, SDPatternOperator OpN> { + // Scaled half-precision to 32-bit + def SWHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR32, + fixedpoint_f16_i32, asm, + [(set GPR32:$Rd, (OpN (fmul FPR16:$Rn, + fixedpoint_f16_i32:$scale)))]> { + let Inst{31} = 0; // 32-bit GPR flag + let scale{5} = 1; + let Predicates = [HasFullFP16]; + } + + // Scaled half-precision to 64-bit + def SXHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR64, + fixedpoint_f16_i64, asm, + [(set GPR64:$Rd, (OpN (fmul FPR16:$Rn, + fixedpoint_f16_i64:$scale)))]> { + let Inst{31} = 1; // 64-bit GPR flag + let Predicates = [HasFullFP16]; + } + // Scaled single-precision to 32-bit def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32, fixedpoint_f32_i32, asm, @@ -3553,7 +3677,7 @@ class BaseIntegerToFP Rd; bits<5> Rn; bits<6> scale; - let Inst{30-23} = 0b00111100; + let Inst{30-24} = 0b0011110; let Inst{21-17} = 0b00001; let Inst{16} = isUnsigned; let Inst{15-10} = scale; @@ -3570,7 +3694,7 @@ class BaseIntegerToFPUnscaled Rd; bits<5> Rn; bits<6> scale; - let Inst{30-23} = 0b00111100; + let Inst{30-24} = 0b0011110; let Inst{21-17} = 0b10001; let Inst{16} = isUnsigned; let Inst{15-10} = 0b000000; @@ -3580,33 +3704,55 @@ class BaseIntegerToFPUnscaled { // Unscaled + def UWHri: BaseIntegerToFPUnscaled { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + def UWSri: BaseIntegerToFPUnscaled { let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag } def UWDri: BaseIntegerToFPUnscaled { let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + } + + def UXHri: BaseIntegerToFPUnscaled { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; } def UXSri: BaseIntegerToFPUnscaled { let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag } def UXDri: BaseIntegerToFPUnscaled { let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag } // Scaled + def SWHri: BaseIntegerToFP { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let scale{5} = 1; + let Predicates = [HasFullFP16]; + } + def SWSri: BaseIntegerToFP { let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag let scale{5} = 1; } @@ -3615,16 +3761,25 @@ multiclass IntegerToFP { (fdiv (node GPR32:$Rn), fixedpoint_f64_i32:$scale))]> { let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag let scale{5} = 1; } + def SXHri: BaseIntegerToFP { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + def SXSri: BaseIntegerToFP { let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag } def SXDri: BaseIntegerToFP { (fdiv (node GPR64:$Rn), fixedpoint_f64_i64:$scale))]> { let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag } } @@ -3654,7 +3809,7 @@ class BaseUnscaledConversion rmode, bits<3> opcode, Sched<[WriteFCopy]> { bits<5> Rd; bits<5> Rn; - let Inst{30-23} = 0b00111100; + let Inst{30-24} = 0b0011110; let Inst{21} = 1; let Inst{20-19} = rmode; let Inst{18-16} = opcode; @@ -3704,26 +3859,49 @@ class BaseUnscaledConversionFromHigh rmode, bits<3> opcode, } - multiclass UnscaledConversion { + def WHr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR16, asm> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def XHr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR16, asm> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + def WSr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR32, asm> { let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag } def XDr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR64, asm> { let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + } + + def HWr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR32, asm> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def HXr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR64, asm> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; } def SWr : BaseUnscaledConversion<0b00, 0b110, FPR32, GPR32, asm> { let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag } def DXr : BaseUnscaledConversion<0b00, 0b110, FPR64, GPR64, asm> { let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag } def XDHighr : BaseUnscaledConversionToHigh<0b01, 0b111, GPR64, V128, @@ -3796,7 +3974,7 @@ class BaseSingleOperandFPData opcode, RegisterClass regtype, Sched<[WriteF]> { bits<5> Rd; bits<5> Rn; - let Inst{31-23} = 0b000111100; + let Inst{31-24} = 0b00011110; let Inst{21-19} = 0b100; let Inst{18-15} = opcode; let Inst{14-10} = 0b10000; @@ -3806,12 +3984,17 @@ class BaseSingleOperandFPData opcode, RegisterClass regtype, multiclass SingleOperandFPData opcode, string asm, SDPatternOperator node = null_frag> { + def Hr : BaseSingleOperandFPData { + let Inst{23-22} = 0b11; // 16-bit size flag + let Predicates = [HasFullFP16]; + } + def Sr : BaseSingleOperandFPData { - let Inst{22} = 0; // 32-bit size flag + let Inst{23-22} = 0b00; // 32-bit size flag } def Dr : BaseSingleOperandFPData { - let Inst{22} = 1; // 64-bit size flag + let Inst{23-22} = 0b01; // 64-bit size flag } } @@ -3828,7 +4011,7 @@ class BaseTwoOperandFPData opcode, RegisterClass regtype, bits<5> Rd; bits<5> Rn; bits<5> Rm; - let Inst{31-23} = 0b000111100; + let Inst{31-24} = 0b00011110; let Inst{21} = 1; let Inst{20-16} = Rm; let Inst{15-12} = opcode; @@ -3839,28 +4022,41 @@ class BaseTwoOperandFPData opcode, RegisterClass regtype, multiclass TwoOperandFPData opcode, string asm, SDPatternOperator node = null_frag> { + def Hrr : BaseTwoOperandFPData { + let Inst{23-22} = 0b11; // 16-bit size flag + let Predicates = [HasFullFP16]; + } + def Srr : BaseTwoOperandFPData { - let Inst{22} = 0; // 32-bit size flag + let Inst{23-22} = 0b00; // 32-bit size flag } def Drr : BaseTwoOperandFPData { - let Inst{22} = 1; // 64-bit size flag + let Inst{23-22} = 0b01; // 64-bit size flag } } multiclass TwoOperandFPDataNeg opcode, string asm, SDNode node> { + def Hrr : BaseTwoOperandFPData { + let Inst{23-22} = 0b11; // 16-bit size flag + let Predicates = [HasFullFP16]; + } + def Srr : BaseTwoOperandFPData { - let Inst{22} = 0; // 32-bit size flag + let Inst{23-22} = 0b00; // 32-bit size flag } def Drr : BaseTwoOperandFPData { - let Inst{22} = 1; // 64-bit size flag + let Inst{23-22} = 0b01; // 64-bit size flag } } @@ -3878,7 +4074,7 @@ class BaseThreeOperandFPData Rn; bits<5> Rm; bits<5> Ra; - let Inst{31-23} = 0b000111110; + let Inst{31-24} = 0b00011111; let Inst{21} = isNegated; let Inst{20-16} = Rm; let Inst{15} = isSub; @@ -3889,16 +4085,23 @@ class BaseThreeOperandFPData { + def Hrrr : BaseThreeOperandFPData { + let Inst{23-22} = 0b11; // 16-bit size flag + let Predicates = [HasFullFP16]; + } + def Srrr : BaseThreeOperandFPData { - let Inst{22} = 0; // 32-bit size flag + let Inst{23-22} = 0b00; // 32-bit size flag } def Drrr : BaseThreeOperandFPData { - let Inst{22} = 1; // 64-bit size flag + let Inst{23-22} = 0b01; // 64-bit size flag } } @@ -3913,7 +4116,7 @@ class BaseOneOperandFPComparison, Sched<[WriteFCmp]> { bits<5> Rn; - let Inst{31-23} = 0b000111100; + let Inst{31-24} = 0b00011110; let Inst{21} = 1; let Inst{15-10} = 0b001000; @@ -3932,7 +4135,7 @@ class BaseTwoOperandFPComparison { bits<5> Rm; bits<5> Rn; - let Inst{31-23} = 0b000111100; + let Inst{31-24} = 0b00011110; let Inst{21} = 1; let Inst{20-16} = Rm; let Inst{15-10} = 0b001000; @@ -3944,24 +4147,36 @@ class BaseTwoOperandFPComparison { let Defs = [NZCV] in { + def Hrr : BaseTwoOperandFPComparison { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + + def Hri : BaseOneOperandFPComparison { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + def Srr : BaseTwoOperandFPComparison { - let Inst{22} = 0; + let Inst{23-22} = 0b00; } def Sri : BaseOneOperandFPComparison { - let Inst{22} = 0; + let Inst{23-22} = 0b00; } def Drr : BaseTwoOperandFPComparison { - let Inst{22} = 1; + let Inst{23-22} = 0b01; } def Dri : BaseOneOperandFPComparison { - let Inst{22} = 1; + let Inst{23-22} = 0b01; } } // Defs = [NZCV] } @@ -3971,17 +4186,20 @@ multiclass FPComparison - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, +class BaseFPCondComparison pat> + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", pat>, Sched<[WriteFCmp]> { + let Uses = [NZCV]; + let Defs = [NZCV]; + bits<5> Rn; bits<5> Rm; bits<4> nzcv; bits<4> cond; - let Inst{31-23} = 0b000111100; + let Inst{31-24} = 0b00011110; let Inst{21} = 1; let Inst{20-16} = Rm; let Inst{15-12} = cond; @@ -3991,16 +4209,24 @@ class BaseFPCondComparison { - let Defs = [NZCV], Uses = [NZCV] in { - def Srr : BaseFPCondComparison { - let Inst{22} = 0; +multiclass FPCondComparison { + def Hrr : BaseFPCondComparison { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; } - def Drr : BaseFPCondComparison { - let Inst{22} = 1; + def Srr : BaseFPCondComparison { + let Inst{23-22} = 0b00; + } + + def Drr : BaseFPCondComparison { + let Inst{23-22} = 0b01; } - } // Defs = [NZCV], Uses = [NZCV] } //--- @@ -4019,7 +4245,7 @@ class BaseFPCondSelect bits<5> Rm; bits<4> cond; - let Inst{31-23} = 0b000111100; + let Inst{31-24} = 0b00011110; let Inst{21} = 1; let Inst{20-16} = Rm; let Inst{15-12} = cond; @@ -4030,12 +4256,17 @@ class BaseFPCondSelect multiclass FPCondSelect { let Uses = [NZCV] in { + def Hrrr : BaseFPCondSelect { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + def Srrr : BaseFPCondSelect { - let Inst{22} = 0; + let Inst{23-22} = 0b00; } def Drrr : BaseFPCondSelect { - let Inst{22} = 1; + let Inst{23-22} = 0b01; } } // Uses = [NZCV] } @@ -4050,7 +4281,7 @@ class BaseFPMoveImmediate Sched<[WriteFImm]> { bits<5> Rd; bits<8> imm; - let Inst{31-23} = 0b000111100; + let Inst{31-24} = 0b00011110; let Inst{21} = 1; let Inst{20-13} = imm; let Inst{12-5} = 0b10000000; @@ -4058,12 +4289,17 @@ class BaseFPMoveImmediate } multiclass FPMoveImmediate { + def Hi : BaseFPMoveImmediate { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + def Si : BaseFPMoveImmediate { - let Inst{22} = 0; + let Inst{23-22} = 0b00; } def Di : BaseFPMoveImmediate { - let Inst{22} = 1; + let Inst{23-22} = 0b01; } } } // end of 'let Predicates = [HasFPARMv8]' @@ -4079,7 +4315,7 @@ let Predicates = [HasNEON] in { //---------------------------------------------------------------------------- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDThreeSameVector size, bits<5> opcode, +class BaseSIMDThreeSameVector size, bits<5> opcode, RegisterOperand regtype, string asm, string kind, list pattern> : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, @@ -4093,8 +4329,7 @@ class BaseSIMDThreeSameVector size, bits<5> opcode, let Inst{30} = Q; let Inst{29} = U; let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 1; + let Inst{23-21} = size; let Inst{20-16} = Rm; let Inst{15-11} = opcode; let Inst{10} = 1; @@ -4103,7 +4338,7 @@ class BaseSIMDThreeSameVector size, bits<5> opcode, } let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDThreeSameVectorTied size, bits<5> opcode, +class BaseSIMDThreeSameVectorTied size, bits<5> opcode, RegisterOperand regtype, string asm, string kind, list pattern> : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm, @@ -4117,8 +4352,7 @@ class BaseSIMDThreeSameVectorTied size, bits<5> opcode, let Inst{30} = Q; let Inst{29} = U; let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 1; + let Inst{23-21} = size; let Inst{20-16} = Rm; let Inst{15-11} = opcode; let Inst{10} = 1; @@ -4129,25 +4363,25 @@ class BaseSIMDThreeSameVectorTied size, bits<5> opcode, // All operand sizes distinguished in the encoding. multiclass SIMDThreeSameVector opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, asm, ".8b", [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, asm, ".16b", [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, asm, ".4h", [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, asm, ".8h", [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, asm, ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, asm, ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; - def v2i64 : BaseSIMDThreeSameVector<1, U, 0b11, opc, V128, + def v2i64 : BaseSIMDThreeSameVector<1, U, 0b111, opc, V128, asm, ".2d", [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>; } @@ -4155,49 +4389,49 @@ multiclass SIMDThreeSameVector opc, string asm, // As above, but D sized elements unsupported. multiclass SIMDThreeSameVectorBHS opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, asm, ".8b", [(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, asm, ".16b", [(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>; - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, asm, ".4h", [(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, asm, ".8h", [(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, asm, ".2s", [(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, asm, ".4s", [(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>; } multiclass SIMDThreeSameVectorBHSTied opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b001, opc, V64, asm, ".8b", [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b001, opc, V128, asm, ".16b", [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; - def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b011, opc, V64, asm, ".4h", [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b011, opc, V128, asm, ".8h", [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b101, opc, V64, asm, ".2s", [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b101, opc, V128, asm, ".4s", [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; @@ -4206,54 +4440,80 @@ multiclass SIMDThreeSameVectorBHSTied opc, string asm, // As above, but only B sized elements supported. multiclass SIMDThreeSameVectorB opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, asm, ".8b", [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, asm, ".16b", [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; } -// As above, but only S and D sized floating point elements supported. -multiclass SIMDThreeSameVectorFP opc, +// As above, but only floating point elements supported. +multiclass SIMDThreeSameVectorFP opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64, + asm, ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; + def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128, + asm, ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64, asm, ".2s", [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128, asm, ".4s", [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128, asm, ".2d", [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; } -multiclass SIMDThreeSameVectorFPCmp opc, +multiclass SIMDThreeSameVectorFPCmp opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64, + asm, ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; + def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128, + asm, ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64, asm, ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128, asm, ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128, asm, ".2d", [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; } -multiclass SIMDThreeSameVectorFPTied opc, +multiclass SIMDThreeSameVectorFPTied opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVectorTied<0, U, {S,0b10}, {0b00,opc}, V64, + asm, ".4h", + [(set (v4f16 V64:$dst), + (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; + def v8f16 : BaseSIMDThreeSameVectorTied<1, U, {S,0b10}, {0b00,opc}, V128, + asm, ".8h", + [(set (v8f16 V128:$dst), + (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0b01}, {0b11,opc}, V64, asm, ".2s", [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0b01}, {0b11,opc}, V128, asm, ".4s", [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,0b11}, {0b11,opc}, V128, asm, ".2d", [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; @@ -4262,16 +4522,16 @@ multiclass SIMDThreeSameVectorFPTied opc, // As above, but D and B sized elements unsupported. multiclass SIMDThreeSameVectorHS opc, string asm, SDPatternOperator OpNode> { - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, asm, ".4h", [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, asm, ".8h", [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, asm, ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, asm, ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; } @@ -4279,10 +4539,10 @@ multiclass SIMDThreeSameVectorHS opc, string asm, // Logical three vector ops share opcode bits, and only use B sized elements. multiclass SIMDLogicalThreeVector size, string asm, SDPatternOperator OpNode = null_frag> { - def v8i8 : BaseSIMDThreeSameVector<0, U, size, 0b00011, V64, + def v8i8 : BaseSIMDThreeSameVector<0, U, {size,1}, 0b00011, V64, asm, ".8b", [(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, size, 0b00011, V128, + def v16i8 : BaseSIMDThreeSameVector<1, U, {size,1}, 0b00011, V128, asm, ".16b", [(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>; @@ -4303,11 +4563,11 @@ multiclass SIMDLogicalThreeVector size, string asm, multiclass SIMDLogicalThreeVectorTied size, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVectorTied<0, U, size, 0b00011, V64, + def v8i8 : BaseSIMDThreeSameVectorTied<0, U, {size,1}, 0b00011, V64, asm, ".8b", [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVectorTied<1, U, size, 0b00011, V128, + def v16i8 : BaseSIMDThreeSameVectorTied<1, U, {size,1}, 0b00011, V128, asm, ".16b", [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), @@ -4347,8 +4607,8 @@ multiclass SIMDLogicalThreeVectorTied size, let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in class BaseSIMDTwoSameVector size, bits<5> opcode, - RegisterOperand regtype, string asm, string dstkind, - string srckind, list pattern> + bits<2> size2, RegisterOperand regtype, string asm, + string dstkind, string srckind, list pattern> : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "{\t$Rd" # dstkind # ", $Rn" # srckind # "|" # dstkind # "\t$Rd, $Rn}", "", pattern>, @@ -4360,7 +4620,9 @@ class BaseSIMDTwoSameVector size, bits<5> opcode, let Inst{29} = U; let Inst{28-24} = 0b01110; let Inst{23-22} = size; - let Inst{21-17} = 0b10000; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; let Inst{16-12} = opcode; let Inst{11-10} = 0b10; let Inst{9-5} = Rn; @@ -4369,8 +4631,9 @@ class BaseSIMDTwoSameVector size, bits<5> opcode, let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in class BaseSIMDTwoSameVectorTied size, bits<5> opcode, - RegisterOperand regtype, string asm, string dstkind, - string srckind, list pattern> + bits<2> size2, RegisterOperand regtype, + string asm, string dstkind, string srckind, + list pattern> : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm, "{\t$Rd" # dstkind # ", $Rn" # srckind # "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, @@ -4382,7 +4645,9 @@ class BaseSIMDTwoSameVectorTied size, bits<5> opcode, let Inst{29} = U; let Inst{28-24} = 0b01110; let Inst{23-22} = size; - let Inst{21-17} = 0b10000; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; let Inst{16-12} = opcode; let Inst{11-10} = 0b10; let Inst{9-5} = Rn; @@ -4392,22 +4657,22 @@ class BaseSIMDTwoSameVectorTied size, bits<5> opcode, // Supports B, H, and S element sizes. multiclass SIMDTwoVectorBHS opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, asm, ".8b", ".8b", [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, asm, ".16b", ".16b", [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, asm, ".4h", ".4h", [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, asm, ".8h", ".8h", [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; } @@ -4450,49 +4715,49 @@ multiclass SIMDVectorLShiftLongBySizeBHS { // Supports all element sizes. multiclass SIMDLongTwoVector opc, string asm, SDPatternOperator OpNode> { - def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, + def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, asm, ".4h", ".8b", [(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, + def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, asm, ".8h", ".16b", [(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, + def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, asm, ".2s", ".4h", [(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, + def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, asm, ".4s", ".8h", [(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, + def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, asm, ".1d", ".2s", [(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, + def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, asm, ".2d", ".4s", [(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; } multiclass SIMDLongTwoVectorTied opc, string asm, SDPatternOperator OpNode> { - def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64, + def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64, asm, ".4h", ".8b", [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v8i8 V64:$Rn)))]>; - def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128, + def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128, asm, ".8h", ".16b", [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v16i8 V128:$Rn)))]>; - def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64, + def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64, asm, ".2s", ".4h", [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v4i16 V64:$Rn)))]>; - def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128, + def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128, asm, ".4s", ".8h", [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v8i16 V128:$Rn)))]>; - def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64, + def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64, asm, ".1d", ".2s", [(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd), (v2i32 V64:$Rn)))]>; - def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128, + def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128, asm, ".2d", ".4s", [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v4i32 V128:$Rn)))]>; @@ -4501,50 +4766,50 @@ multiclass SIMDLongTwoVectorTied opc, string asm, // Supports all element sizes, except 1xD. multiclass SIMDTwoVectorBHSDTied opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64, asm, ".8b", ".8b", [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128, asm, ".16b", ".16b", [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64, asm, ".4h", ".4h", [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128, asm, ".8h", ".8h", [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>; - def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, V128, + def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, 0b00, V128, asm, ".2d", ".2d", [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>; } multiclass SIMDTwoVectorBHSD opc, string asm, SDPatternOperator OpNode = null_frag> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, asm, ".8b", ".8b", [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, asm, ".16b", ".16b", [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, asm, ".4h", ".4h", [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, asm, ".8h", ".8h", [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; - def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, V128, + def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, 0b00, V128, asm, ".2d", ".2d", [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; } @@ -4553,10 +4818,10 @@ multiclass SIMDTwoVectorBHSD opc, string asm, // Supports only B element sizes. multiclass SIMDTwoVectorB size, bits<5> opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, V64, + def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, 0b00, V64, asm, ".8b", ".8b", [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, V128, + def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, 0b00, V128, asm, ".16b", ".16b", [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; @@ -4565,16 +4830,16 @@ multiclass SIMDTwoVectorB size, bits<5> opc, string asm, // Supports only B and H element sizes. multiclass SIMDTwoVectorBH opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, asm, ".8b", ".8b", [(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, asm, ".16b", ".16b", [(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, asm, ".4h", ".4h", [(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, asm, ".8h", ".8h", [(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>; } @@ -4583,13 +4848,21 @@ multiclass SIMDTwoVectorBH opc, string asm, // as an extra opcode bit. multiclass SIMDTwoVectorFP opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, + asm, ".4h", ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>; + def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, + asm, ".8h", ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, asm, ".2d", ".2d", [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; } @@ -4597,10 +4870,10 @@ multiclass SIMDTwoVectorFP opc, string asm, // Supports only S element size. multiclass SIMDTwoVectorS opc, string asm, SDPatternOperator OpNode> { - def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, + def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, + def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; } @@ -4608,26 +4881,42 @@ multiclass SIMDTwoVectorS opc, string asm, multiclass SIMDTwoVectorFPToInt opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>; + def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, asm, ".2d", ".2d", [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; } multiclass SIMDTwoVectorIntToFP opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, + asm, ".4h", ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; + def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, + asm, ".8h", ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, asm, ".2d", ".2d", [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; } @@ -4706,10 +4995,10 @@ multiclass SIMDMixedTwoVector opc, string asm, (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; } -class BaseSIMDCmpTwoVector size, bits<5> opcode, - RegisterOperand regtype, - string asm, string kind, string zero, - ValueType dty, ValueType sty, SDNode OpNode> +class BaseSIMDCmpTwoVector size, bits<2> size2, + bits<5> opcode, RegisterOperand regtype, string asm, + string kind, string zero, ValueType dty, + ValueType sty, SDNode OpNode> : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero # "|" # kind # "\t$Rd, $Rn, #" # zero # "}", "", @@ -4722,7 +5011,9 @@ class BaseSIMDCmpTwoVector size, bits<5> opcode, let Inst{29} = U; let Inst{28-24} = 0b01110; let Inst{23-22} = size; - let Inst{21-17} = 0b10000; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; let Inst{16-12} = opcode; let Inst{11-10} = 0b10; let Inst{9-5} = Rn; @@ -4732,54 +5023,74 @@ class BaseSIMDCmpTwoVector size, bits<5> opcode, // Comparisons support all element sizes, except 1xD. multiclass SIMDCmpTwoVector opc, string asm, SDNode OpNode> { - def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, opc, V64, + def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, 0b00, opc, V64, asm, ".8b", "0", v8i8, v8i8, OpNode>; - def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, opc, V128, + def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, 0b00, opc, V128, asm, ".16b", "0", v16i8, v16i8, OpNode>; - def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, opc, V64, + def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, 0b00, opc, V64, asm, ".4h", "0", v4i16, v4i16, OpNode>; - def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, opc, V128, + def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, 0b00, opc, V128, asm, ".8h", "0", v8i16, v8i16, OpNode>; - def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, opc, V64, + def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, 0b00, opc, V64, asm, ".2s", "0", v2i32, v2i32, OpNode>; - def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, opc, V128, + def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, 0b00, opc, V128, asm, ".4s", "0", v4i32, v4i32, OpNode>; - def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, opc, V128, + def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, 0b00, opc, V128, asm, ".2d", "0", v2i64, v2i64, OpNode>; } -// FP Comparisons support only S and D element sizes. +// FP Comparisons support only S and D element sizes (and H for v8.2a). multiclass SIMDFPCmpTwoVector opc, string asm, SDNode OpNode> { - def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16rz : BaseSIMDCmpTwoVector<0, U, {S,1}, 0b11, opc, V64, + asm, ".4h", "0.0", + v4i16, v4f16, OpNode>; + def v8i16rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b11, opc, V128, + asm, ".8h", "0.0", + v8i16, v8f16, OpNode>; + } // Predicates = [HasNEON, HasFullFP16] + def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, 0b00, opc, V64, asm, ".2s", "0.0", v2i32, v2f32, OpNode>; - def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, opc, V128, + def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, 0b00, opc, V128, asm, ".4s", "0.0", v4i32, v4f32, OpNode>; - def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, opc, V128, + def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b00, opc, V128, asm, ".2d", "0.0", v2i64, v2f64, OpNode>; - def : InstAlias(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>; + def : InstAlias(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>; + } + def : InstAlias(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>; - def : InstAlias(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>; - def : InstAlias(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>; - def : InstAlias(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>; + def : InstAlias(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>; + } + def : InstAlias(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>; - def : InstAlias(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>; - def : InstAlias(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>; } @@ -5325,7 +5636,7 @@ multiclass SIMDZipVectoropc, string asm, //---------------------------------------------------------------------------- let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDThreeScalar size, bits<5> opcode, +class BaseSIMDThreeScalar size, bits<5> opcode, RegisterClass regtype, string asm, list pattern> : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, @@ -5337,8 +5648,7 @@ class BaseSIMDThreeScalar size, bits<5> opcode, let Inst{31-30} = 0b01; let Inst{29} = U; let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21} = 1; + let Inst{23-21} = size; let Inst{20-16} = Rm; let Inst{15-11} = opcode; let Inst{10} = 1; @@ -5369,17 +5679,17 @@ class BaseSIMDThreeScalarTied size, bit R, bits<5> opcode, multiclass SIMDThreeScalarD opc, string asm, SDPatternOperator OpNode> { - def v1i64 : BaseSIMDThreeScalar; } multiclass SIMDThreeScalarBHSD opc, string asm, SDPatternOperator OpNode> { - def v1i64 : BaseSIMDThreeScalar; - def v1i32 : BaseSIMDThreeScalar; - def v1i16 : BaseSIMDThreeScalar; - def v1i8 : BaseSIMDThreeScalar; + def v1i32 : BaseSIMDThreeScalar; + def v1i16 : BaseSIMDThreeScalar; + def v1i8 : BaseSIMDThreeScalar; def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))), (!cast(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>; @@ -5389,9 +5699,9 @@ multiclass SIMDThreeScalarBHSD opc, string asm, multiclass SIMDThreeScalarHS opc, string asm, SDPatternOperator OpNode> { - def v1i32 : BaseSIMDThreeScalar; - def v1i16 : BaseSIMDThreeScalar; + def v1i16 : BaseSIMDThreeScalar; } multiclass SIMDThreeScalarHSTied opc, string asm, @@ -5404,26 +5714,34 @@ multiclass SIMDThreeScalarHSTied opc, string asm, asm, []>; } -multiclass SIMDThreeScalarSD opc, string asm, +multiclass SIMDFPThreeScalar opc, string asm, SDPatternOperator OpNode = null_frag> { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def #NAME#64 : BaseSIMDThreeScalar; - def #NAME#32 : BaseSIMDThreeScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def #NAME#16 : BaseSIMDThreeScalar; + } // Predicates = [HasNEON, HasFullFP16] } def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (!cast(NAME # "64") FPR64:$Rn, FPR64:$Rm)>; } -multiclass SIMDThreeScalarFPCmp opc, string asm, +multiclass SIMDThreeScalarFPCmp opc, string asm, SDPatternOperator OpNode = null_frag> { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def #NAME#64 : BaseSIMDThreeScalar; - def #NAME#32 : BaseSIMDThreeScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def #NAME#16 : BaseSIMDThreeScalar; + } // Predicates = [HasNEON, HasFullFP16] } def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), @@ -5482,7 +5800,7 @@ multiclass SIMDThreeScalarMixedTiedHS opc, string asm, //---------------------------------------------------------------------------- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDTwoScalar size, bits<5> opcode, +class BaseSIMDTwoScalar size, bits<2> size2, bits<5> opcode, RegisterClass regtype, RegisterClass regtype2, string asm, list pat> : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm, @@ -5494,7 +5812,9 @@ class BaseSIMDTwoScalar size, bits<5> opcode, let Inst{29} = U; let Inst{28-24} = 0b11110; let Inst{23-22} = size; - let Inst{21-17} = 0b10000; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; let Inst{16-12} = opcode; let Inst{11-10} = 0b10; let Inst{9-5} = Rn; @@ -5523,7 +5843,7 @@ class BaseSIMDTwoScalarTied size, bits<5> opcode, let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDCmpTwoScalar size, bits<5> opcode, +class BaseSIMDCmpTwoScalar size, bits<2> size2, bits<5> opcode, RegisterClass regtype, string asm, string zero> : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn, #" # zero, "", []>, @@ -5534,7 +5854,9 @@ class BaseSIMDCmpTwoScalar size, bits<5> opcode, let Inst{29} = U; let Inst{28-24} = 0b11110; let Inst{23-22} = size; - let Inst{21-17} = 0b10000; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; let Inst{16-12} = opcode; let Inst{11-10} = 0b10; let Inst{9-5} = Rn; @@ -5556,21 +5878,28 @@ class SIMDInexactCvtTwoScalar opcode, string asm> multiclass SIMDCmpTwoScalarD opc, string asm, SDPatternOperator OpNode> { - def v1i64rz : BaseSIMDCmpTwoScalar; + def v1i64rz : BaseSIMDCmpTwoScalar; def : Pat<(v1i64 (OpNode FPR64:$Rn)), (!cast(NAME # v1i64rz) FPR64:$Rn)>; } -multiclass SIMDCmpTwoScalarSD opc, string asm, +multiclass SIMDFPCmpTwoScalar opc, string asm, SDPatternOperator OpNode> { - def v1i64rz : BaseSIMDCmpTwoScalar; - def v1i32rz : BaseSIMDCmpTwoScalar; + def v1i64rz : BaseSIMDCmpTwoScalar; + def v1i32rz : BaseSIMDCmpTwoScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16rz : BaseSIMDCmpTwoScalar; + } - def : InstAlias(NAME # v1i64rz) FPR64:$Rd, FPR64:$Rn), 0>; - def : InstAlias(NAME # v1i32rz) FPR32:$Rd, FPR32:$Rn), 0>; + let Predicates = [HasNEON, HasFullFP16] in { + def : InstAlias(NAME # v1i16rz) FPR16:$Rd, FPR16:$Rn), 0>; + } def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))), (!cast(NAME # v1i64rz) FPR64:$Rn)>; @@ -5578,35 +5907,42 @@ multiclass SIMDCmpTwoScalarSD opc, string asm, multiclass SIMDTwoScalarD opc, string asm, SDPatternOperator OpNode = null_frag> { - def v1i64 : BaseSIMDTwoScalar; def : Pat<(i64 (OpNode (i64 FPR64:$Rn))), (!cast(NAME # "v1i64") FPR64:$Rn)>; } -multiclass SIMDTwoScalarSD opc, string asm> { - def v1i64 : BaseSIMDTwoScalar; - def v1i32 : BaseSIMDTwoScalar; +multiclass SIMDFPTwoScalar opc, string asm> { + def v1i64 : BaseSIMDTwoScalar; + def v1i32 : BaseSIMDTwoScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def v1f16 : BaseSIMDTwoScalar; + } } -multiclass SIMDTwoScalarCVTSD opc, string asm, +multiclass SIMDFPTwoScalarCVT opc, string asm, SDPatternOperator OpNode> { - def v1i64 : BaseSIMDTwoScalar; - def v1i32 : BaseSIMDTwoScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16 : BaseSIMDTwoScalar; + } } multiclass SIMDTwoScalarBHSD opc, string asm, SDPatternOperator OpNode = null_frag> { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def v1i64 : BaseSIMDTwoScalar; - def v1i32 : BaseSIMDTwoScalar; - def v1i16 : BaseSIMDTwoScalar; - def v1i8 : BaseSIMDTwoScalar; + def v1i16 : BaseSIMDTwoScalar; + def v1i8 : BaseSIMDTwoScalar; } def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))), @@ -5633,10 +5969,10 @@ multiclass SIMDTwoScalarBHSDTied opc, string asm, let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in multiclass SIMDTwoScalarMixedBHS opc, string asm, SDPatternOperator OpNode = null_frag> { - def v1i32 : BaseSIMDTwoScalar; - def v1i16 : BaseSIMDTwoScalar; - def v1i8 : BaseSIMDTwoScalar; + def v1i16 : BaseSIMDTwoScalar; + def v1i8 : BaseSIMDTwoScalar; } //---------------------------------------------------------------------------- @@ -5668,10 +6004,14 @@ multiclass SIMDPairwiseScalarD opc, string asm> { asm, ".2d">; } -multiclass SIMDPairwiseScalarSD opc, string asm> { - def v2i32p : BaseSIMDPairwiseScalar opc, string asm> { + let Predicates = [HasNEON, HasFullFP16] in { + def v2i16p : BaseSIMDPairwiseScalar<0, {S,0}, opc, FPR16Op, V64, + asm, ".2h">; + } + def v2i32p : BaseSIMDPairwiseScalar<1, {S,0}, opc, FPR32Op, V64, asm, ".2s">; - def v2i64p : BaseSIMDPairwiseScalar; } @@ -5727,8 +6067,16 @@ multiclass SIMDAcrossLanesHSD opcode, string asm> { asm, ".4s", []>; } -multiclass SIMDAcrossLanesS opcode, bit sz1, string asm, +multiclass SIMDFPAcrossLanes opcode, bit sz1, string asm, Intrinsic intOp> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16v : BaseSIMDAcrossLanes<0, 0, {sz1, 0}, opcode, FPR16, V64, + asm, ".4h", + [(set FPR16:$Rd, (intOp (v4f16 V64:$Rn)))]>; + def v8i16v : BaseSIMDAcrossLanes<1, 0, {sz1, 0}, opcode, FPR16, V128, + asm, ".8h", + [(set FPR16:$Rd, (intOp (v8f16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128, asm, ".4s", [(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>; @@ -5925,7 +6273,7 @@ class SIMDInsMainMovAlias : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2" # - # "|" # size #" $dst$idx, $src$idx2}", + # "|" # size #"\t$dst$idx, $src$idx2}", (inst V128:$dst, idxtype:$idx, V128:$src, idxtype:$idx2)>; @@ -6215,7 +6563,7 @@ multiclass SIMDScalarCPY { // AdvSIMD modified immediate instructions //---------------------------------------------------------------------------- -class BaseSIMDModifiedImm pattern> : I, @@ -6227,16 +6575,17 @@ class BaseSIMDModifiedImm pattern> - : BaseSIMDModifiedImm pattern> - : BaseSIMDModifiedImm b15_b12, RegisterOperand vectype, string asm, string kind, list pattern> - : BaseSIMDModifiedImmVector { bits<2> shift; @@ -6284,7 +6633,7 @@ class BaseSIMDModifiedImmVectorShiftTied b15_b12, class BaseSIMDModifiedImmVectorShiftHalf b15_b12, RegisterOperand vectype, string asm, string kind, list pattern> - : BaseSIMDModifiedImmVector { bits<2> shift; @@ -6349,7 +6698,7 @@ multiclass SIMDModifiedImmVectorShiftTied hw_cmode, class SIMDModifiedImmMoveMSL cmode, RegisterOperand vectype, string asm, string kind, list pattern> - : BaseSIMDModifiedImmVector { bits<1> shift; @@ -6357,18 +6706,18 @@ class SIMDModifiedImmMoveMSL cmode, let Inst{12} = shift; } -class SIMDModifiedImmVectorNoShift cmode, +class SIMDModifiedImmVectorNoShift cmode, RegisterOperand vectype, Operand imm_type, string asm, string kind, list pattern> - : BaseSIMDModifiedImmVector { let Inst{15-12} = cmode; } class SIMDModifiedImmScalarNoShift cmode, string asm, list pattern> - : BaseSIMDModifiedImm { let Inst{15-12} = cmode; let DecoderMethod = "DecodeModImmInstruction"; @@ -6438,8 +6787,36 @@ class BaseSIMDIndexedTied size, bits<4> opc, let Inst{4-0} = Rd; } -multiclass SIMDFPIndexedSD opc, string asm, - SDPatternOperator OpNode> { +multiclass SIMDFPIndexed opc, string asm, + SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b00, opc, + V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4f16 V64:$Rd), + (OpNode (v4f16 V64:$Rn), + (v4f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b00, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8f16 V128:$Rd), + (OpNode (v8f16 V128:$Rn), + (v8f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, V64, V64, V128, VectorIndexS, @@ -6476,6 +6853,21 @@ multiclass SIMDFPIndexedSD opc, string asm, let Inst{21} = 0; } + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b00, opc, + FPR16Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", + [(set (f16 FPR16Op:$Rd), + (OpNode (f16 FPR16Op:$Rn), + (f16 (vector_extract (v8f16 V128_lo:$Rm), + VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, FPR32Op, FPR32Op, V128, VectorIndexS, asm, ".s", "", "", ".s", @@ -6501,7 +6893,7 @@ multiclass SIMDFPIndexedSD opc, string asm, } } -multiclass SIMDFPIndexedSDTiedPatterns { +multiclass SIMDFPIndexedTiedPatterns { // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar. def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (AArch64duplane32 (v4f32 V128:$Rm), @@ -6553,7 +6945,28 @@ multiclass SIMDFPIndexedSDTiedPatterns { V128:$Rm, VectorIndexD:$idx)>; } -multiclass SIMDFPIndexedSDTied opc, string asm> { +multiclass SIMDFPIndexedTied opc, string asm> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b00, opc, V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b00, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64, V128, VectorIndexS, asm, ".2s", ".2s", ".2s", ".s", []> { @@ -6580,6 +6993,16 @@ multiclass SIMDFPIndexedSDTied opc, string asm> { let Inst{21} = 0; } + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b00, opc, + FPR16Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, FPR32Op, FPR32Op, V128, VectorIndexS, @@ -7117,7 +7540,13 @@ class BaseSIMDScalarShiftTied opc, bits<7> fixed_imm, } -multiclass SIMDScalarRShiftSD opc, string asm> { +multiclass SIMDFPScalarRShift opc, string asm> { + let Predicates = [HasNEON, HasFullFP16] in { + def h : BaseSIMDScalarShift { + let Inst{19-16} = imm{3-0}; + } + } // Predicates = [HasNEON, HasFullFP16] def s : BaseSIMDScalarShift { let Inst{20-16} = imm{4-0}; @@ -7297,6 +7726,23 @@ class BaseSIMDVectorShiftTied opc, bits<7> fixed_imm, multiclass SIMDVectorRShiftSD opc, string asm, Intrinsic OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftR16, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR16, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + } // Predicates = [HasNEON, HasFullFP16] def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, V64, V64, vecshiftR32, asm, ".2s", ".2s", @@ -7322,8 +7768,26 @@ multiclass SIMDVectorRShiftSD opc, string asm, } } -multiclass SIMDVectorRShiftSDToFP opc, string asm, +multiclass SIMDVectorRShiftToFP opc, string asm, Intrinsic OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftR16, + asm, ".4h", ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR16, + asm, ".8h", ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + } // Predicates = [HasNEON, HasFullFP16] + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, V64, V64, vecshiftR32, asm, ".2s", ".2s", @@ -8604,9 +9068,8 @@ let Predicates = [HasNEON, HasV8_1a] in { class BaseSIMDThreeSameVectorTiedR0 size, bits<5> opcode, RegisterOperand regtype, string asm, string kind, list pattern> - : BaseSIMDThreeSameVectorTied { - let Inst{21}=0; } multiclass SIMDThreeSameVectorSQRDMLxHTiedHS opc, string asm, SDPatternOperator Accum> { @@ -9041,6 +9504,7 @@ def : TokenAlias<".8H", ".8h">; def : TokenAlias<".4S", ".4s">; def : TokenAlias<".2D", ".2d">; def : TokenAlias<".1Q", ".1q">; +def : TokenAlias<".2H", ".2h">; def : TokenAlias<".B", ".b">; def : TokenAlias<".H", ".h">; def : TokenAlias<".S", ".s">; diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index c0b3f2c60916..3ef3c8b840cb 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "AArch64InstrInfo.h" -#include "AArch64MachineCombinerPattern.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -533,6 +532,14 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, CC); } +/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx. +static bool canBeExpandedToORR(const MachineInstr *MI, unsigned BitSize) { + uint64_t Imm = MI->getOperand(1).getImm(); + uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); + uint64_t Encoding; + return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding); +} + // FIXME: this implementation should be micro-architecture dependent, so a // micro-architecture target hook should be introduced here in future. bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const { @@ -573,6 +580,12 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const { case AArch64::ORRWrr: case AArch64::ORRXrr: return true; + // If MOVi32imm or MOVi64imm can be expanded into ORRWri or + // ORRXri, it is as cheap as MOV + case AArch64::MOVi32imm: + return canBeExpandedToORR(MI, 32); + case AArch64::MOVi64imm: + return canBeExpandedToORR(MI, 64); } llvm_unreachable("Unknown opcode to check as cheap as a move!"); @@ -1379,42 +1392,34 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( Width = 1; Scale = 1; break; - case AArch64::LDRXui: - case AArch64::STRXui: - Scale = Width = 8; - break; - case AArch64::LDRWui: - case AArch64::STRWui: - Scale = Width = 4; - break; - case AArch64::LDRBui: - case AArch64::STRBui: - Scale = Width = 1; - break; - case AArch64::LDRHui: - case AArch64::STRHui: - Scale = Width = 2; - break; - case AArch64::LDRSui: - case AArch64::STRSui: - Scale = Width = 4; - break; - case AArch64::LDRDui: - case AArch64::STRDui: - Scale = Width = 8; - break; case AArch64::LDRQui: case AArch64::STRQui: Scale = Width = 16; break; - case AArch64::LDRBBui: - case AArch64::STRBBui: - Scale = Width = 1; + case AArch64::LDRXui: + case AArch64::LDRDui: + case AArch64::STRXui: + case AArch64::STRDui: + Scale = Width = 8; break; + case AArch64::LDRWui: + case AArch64::LDRSui: + case AArch64::STRWui: + case AArch64::STRSui: + Scale = Width = 4; + break; + case AArch64::LDRHui: case AArch64::LDRHHui: + case AArch64::STRHui: case AArch64::STRHHui: Scale = Width = 2; break; + case AArch64::LDRBui: + case AArch64::LDRBBui: + case AArch64::STRBui: + case AArch64::STRBBui: + Scale = Width = 1; + break; }; BaseReg = LdSt->getOperand(1).getReg(); @@ -1445,23 +1450,43 @@ bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First, MachineInstr *Second) const { - // Cyclone can fuse CMN, CMP followed by Bcc. - - // FIXME: B0 can also fuse: - // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ. - if (Second->getOpcode() != AArch64::Bcc) - return false; - switch (First->getOpcode()) { - default: - return false; - case AArch64::SUBSWri: - case AArch64::ADDSWri: - case AArch64::ANDSWri: - case AArch64::SUBSXri: - case AArch64::ADDSXri: - case AArch64::ANDSXri: - return true; + if (Subtarget.isCyclone()) { + // Cyclone can fuse CMN, CMP, TST followed by Bcc. + unsigned SecondOpcode = Second->getOpcode(); + if (SecondOpcode == AArch64::Bcc) { + switch (First->getOpcode()) { + default: + return false; + case AArch64::SUBSWri: + case AArch64::ADDSWri: + case AArch64::ANDSWri: + case AArch64::SUBSXri: + case AArch64::ADDSXri: + case AArch64::ANDSXri: + return true; + } + } + // Cyclone B0 also supports ALU operations followed by CBZ/CBNZ. + if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX || + SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) { + switch (First->getOpcode()) { + default: + return false; + case AArch64::ADDWri: + case AArch64::ADDXri: + case AArch64::ANDWri: + case AArch64::ANDXri: + case AArch64::EORWri: + case AArch64::EORXri: + case AArch64::ORRWri: + case AArch64::ORRXri: + case AArch64::SUBWri: + case AArch64::SUBXri: + return true; + } + } } + return false; } MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue( @@ -1814,7 +1839,7 @@ void AArch64InstrInfo::storeRegToStackSlot( MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); MachineMemOperand *MMO = MF.getMachineMemOperand( PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); unsigned Opc = 0; @@ -1911,7 +1936,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); MachineMemOperand *MMO = MF.getMachineMemOperand( PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); @@ -2226,11 +2251,19 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, case AArch64::LDPDi: case AArch64::STPXi: case AArch64::STPDi: + case AArch64::LDNPXi: + case AArch64::LDNPDi: + case AArch64::STNPXi: + case AArch64::STNPDi: + ImmIdx = 3; IsSigned = true; Scale = 8; break; case AArch64::LDPQi: case AArch64::STPQi: + case AArch64::LDNPQi: + case AArch64::STNPQi: + ImmIdx = 3; IsSigned = true; Scale = 16; break; @@ -2238,6 +2271,11 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, case AArch64::LDPSi: case AArch64::STPWi: case AArch64::STPSi: + case AArch64::LDNPWi: + case AArch64::LDNPSi: + case AArch64::STNPWi: + case AArch64::STNPSi: + ImmIdx = 3; IsSigned = true; Scale = 4; break; @@ -2457,7 +2495,7 @@ static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, bool AArch64InstrInfo::getMachineCombinerPatterns( MachineInstr &Root, - SmallVectorImpl &Patterns) const { + SmallVectorImpl &Patterns) const { unsigned Opc = Root.getOpcode(); MachineBasicBlock &MBB = *Root.getParent(); bool Found = false; @@ -2485,76 +2523,76 @@ bool AArch64InstrInfo::getMachineCombinerPatterns( "ADDWrr does not have register operands"); if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MC_MULADDW_OP1); + Patterns.push_back(MachineCombinerPattern::MULADDW_OP1); Found = true; } if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MC_MULADDW_OP2); + Patterns.push_back(MachineCombinerPattern::MULADDW_OP2); Found = true; } break; case AArch64::ADDXrr: if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MC_MULADDX_OP1); + Patterns.push_back(MachineCombinerPattern::MULADDX_OP1); Found = true; } if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MC_MULADDX_OP2); + Patterns.push_back(MachineCombinerPattern::MULADDX_OP2); Found = true; } break; case AArch64::SUBWrr: if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MC_MULSUBW_OP1); + Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1); Found = true; } if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr, AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MC_MULSUBW_OP2); + Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2); Found = true; } break; case AArch64::SUBXrr: if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MC_MULSUBX_OP1); + Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1); Found = true; } if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr, AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MC_MULSUBX_OP2); + Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2); Found = true; } break; case AArch64::ADDWri: if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MC_MULADDWI_OP1); + Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1); Found = true; } break; case AArch64::ADDXri: if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MC_MULADDXI_OP1); + Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1); Found = true; } break; case AArch64::SUBWri: if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr, AArch64::WZR)) { - Patterns.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1); + Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1); Found = true; } break; case AArch64::SUBXri: if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr, AArch64::XZR)) { - Patterns.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1); + Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1); Found = true; } break; @@ -2661,7 +2699,7 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, /// this function generates the instructions that could replace the /// original code sequence void AArch64InstrInfo::genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern, + MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const { @@ -2677,13 +2715,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence( default: // signal error. break; - case MachineCombinerPattern::MC_MULADDW_OP1: - case MachineCombinerPattern::MC_MULADDX_OP1: + case MachineCombinerPattern::MULADDW_OP1: + case MachineCombinerPattern::MULADDX_OP1: // MUL I=A,B,0 // ADD R,I,C // ==> MADD R,A,B,C // --- Create(MADD); - if (Pattern == MachineCombinerPattern::MC_MULADDW_OP1) { + if (Pattern == MachineCombinerPattern::MULADDW_OP1) { Opc = AArch64::MADDWrrr; RC = &AArch64::GPR32RegClass; } else { @@ -2692,13 +2730,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; - case MachineCombinerPattern::MC_MULADDW_OP2: - case MachineCombinerPattern::MC_MULADDX_OP2: + case MachineCombinerPattern::MULADDW_OP2: + case MachineCombinerPattern::MULADDX_OP2: // MUL I=A,B,0 // ADD R,C,I // ==> MADD R,A,B,C // --- Create(MADD); - if (Pattern == MachineCombinerPattern::MC_MULADDW_OP2) { + if (Pattern == MachineCombinerPattern::MULADDW_OP2) { Opc = AArch64::MADDWrrr; RC = &AArch64::GPR32RegClass; } else { @@ -2707,8 +2745,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MC_MULADDWI_OP1: - case MachineCombinerPattern::MC_MULADDXI_OP1: { + case MachineCombinerPattern::MULADDWI_OP1: + case MachineCombinerPattern::MULADDXI_OP1: { // MUL I=A,B,0 // ADD R,I,Imm // ==> ORR V, ZR, Imm @@ -2716,7 +2754,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( // --- Create(MADD); const TargetRegisterClass *OrrRC; unsigned BitSize, OrrOpc, ZeroReg; - if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) { + if (Pattern == MachineCombinerPattern::MULADDWI_OP1) { OrrOpc = AArch64::ORRWri; OrrRC = &AArch64::GPR32spRegClass; BitSize = 32; @@ -2751,8 +2789,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } break; } - case MachineCombinerPattern::MC_MULSUBW_OP1: - case MachineCombinerPattern::MC_MULSUBX_OP1: { + case MachineCombinerPattern::MULSUBW_OP1: + case MachineCombinerPattern::MULSUBX_OP1: { // MUL I=A,B,0 // SUB R,I, C // ==> SUB V, 0, C @@ -2760,7 +2798,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( // --- Create(MADD); const TargetRegisterClass *SubRC; unsigned SubOpc, ZeroReg; - if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) { + if (Pattern == MachineCombinerPattern::MULSUBW_OP1) { SubOpc = AArch64::SUBWrr; SubRC = &AArch64::GPR32spRegClass; ZeroReg = AArch64::WZR; @@ -2784,13 +2822,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); break; } - case MachineCombinerPattern::MC_MULSUBW_OP2: - case MachineCombinerPattern::MC_MULSUBX_OP2: + case MachineCombinerPattern::MULSUBW_OP2: + case MachineCombinerPattern::MULSUBX_OP2: // MUL I=A,B,0 // SUB R,C,I // ==> MSUB R,A,B,C (computes C - A*B) // --- Create(MSUB); - if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP2) { + if (Pattern == MachineCombinerPattern::MULSUBW_OP2) { Opc = AArch64::MSUBWrrr; RC = &AArch64::GPR32RegClass; } else { @@ -2799,8 +2837,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; - case MachineCombinerPattern::MC_MULSUBWI_OP1: - case MachineCombinerPattern::MC_MULSUBXI_OP1: { + case MachineCombinerPattern::MULSUBWI_OP1: + case MachineCombinerPattern::MULSUBXI_OP1: { // MUL I=A,B,0 // SUB R,I, Imm // ==> ORR V, ZR, -Imm @@ -2808,7 +2846,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( // --- Create(MADD); const TargetRegisterClass *OrrRC; unsigned BitSize, OrrOpc, ZeroReg; - if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) { + if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) { OrrOpc = AArch64::ORRWri; OrrRC = &AArch64::GPR32spRegClass; BitSize = 32; @@ -2944,3 +2982,34 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const { MI->eraseFromParent(); return true; } + +std::pair +AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { + const unsigned Mask = AArch64II::MO_FRAGMENT; + return std::make_pair(TF & Mask, TF & ~Mask); +} + +ArrayRef> +AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { + using namespace AArch64II; + static const std::pair TargetFlags[] = { + {MO_PAGE, "aarch64-page"}, + {MO_PAGEOFF, "aarch64-pageoff"}, + {MO_G3, "aarch64-g3"}, + {MO_G2, "aarch64-g2"}, + {MO_G1, "aarch64-g1"}, + {MO_G0, "aarch64-g0"}, + {MO_HI12, "aarch64-hi12"}}; + return makeArrayRef(TargetFlags); +} + +ArrayRef> +AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { + using namespace AArch64II; + static const std::pair TargetFlags[] = { + {MO_GOT, "aarch64-got"}, + {MO_NC, "aarch64-nc"}, + {MO_TLS, "aarch64-tls"}, + {MO_CONSTPOOL, "aarch64-constant-pool"}}; + return makeArrayRef(TargetFlags); +} diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 68c2a2882580..ae02822a32e6 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -167,13 +167,13 @@ public: /// for an instruction chain ending in . All potential patterns are /// listed in the array. bool getMachineCombinerPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns) + SmallVectorImpl &Patterns) const override; /// When getMachineCombinerPatterns() finds patterns, this function generates /// the instructions that could replace the original code sequence void genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern, + MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const override; @@ -181,6 +181,14 @@ public: bool useMachineCombiner() const override; bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; + + std::pair + decomposeMachineOperandsTargetFlags(unsigned TF) const override; + ArrayRef> + getSerializableDirectMachineOperandTargetFlags() const override; + ArrayRef> + getSerializableBitmaskMachineOperandTargetFlags() const override; + private: void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB, diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index fa1a46acba84..d02bc9ff394d 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -16,6 +16,8 @@ // def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, AssemblerPredicate<"HasV8_1aOps", "armv8.1a">; +def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, + AssemblerPredicate<"HasV8_2aOps", "armv8.2a">; def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">; def HasNEON : Predicate<"Subtarget->hasNEON()">, @@ -24,6 +26,12 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">, AssemblerPredicate<"FeatureCrypto", "crypto">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; +def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">; +def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, + AssemblerPredicate<"FeatureFullFP16", "fullfp16">; +def HasSPE : Predicate<"Subtarget->hasSPE()">, + AssemblerPredicate<"FeatureSPE", "spe">; + def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsCyclone : Predicate<"Subtarget->isCyclone()">; @@ -66,6 +74,20 @@ def SDT_AArch64CSel : SDTypeProfile<1, 4, SDTCisSameAs<0, 2>, SDTCisInt<3>, SDTCisVT<4, i32>]>; +def SDT_AArch64CCMP : SDTypeProfile<1, 5, + [SDTCisVT<0, i32>, + SDTCisInt<1>, + SDTCisSameAs<1, 2>, + SDTCisInt<3>, + SDTCisInt<4>, + SDTCisVT<5, i32>]>; +def SDT_AArch64FCCMP : SDTypeProfile<1, 5, + [SDTCisVT<0, i32>, + SDTCisFP<1>, + SDTCisSameAs<1, 2>, + SDTCisInt<3>, + SDTCisInt<4>, + SDTCisVT<5, i32>]>; def SDT_AArch64FCmp : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>; @@ -160,13 +182,14 @@ def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; +def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; +def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; +def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; + def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; -def AArch64fmax : SDNode<"AArch64ISD::FMAX", SDTFPBinOp>; -def AArch64fmin : SDNode<"AArch64ISD::FMIN", SDTFPBinOp>; - def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; @@ -361,6 +384,9 @@ def : InstAlias<"wfi", (HINT 0b011)>; def : InstAlias<"sev", (HINT 0b100)>; def : InstAlias<"sevl", (HINT 0b101)>; +// v8.2a Statistical Profiling extension +def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>; + // As far as LLVM is concerned this writes to the system's exclusive monitors. let mayLoad = 1, mayStore = 1 in def CLREX : CRmSystemI; @@ -383,12 +409,17 @@ def : InstAlias<"isb", (ISB 0xf)>; def MRS : MRSI; def MSR : MSRI; -def MSRpstate: MSRpstateI; +def MSRpstateImm1 : MSRpstateImm0_1; +def MSRpstateImm4 : MSRpstateImm0_15; // The thread pointer (on Linux, at least, where this has been implemented) is // TPIDR_EL0. def : Pat<(AArch64threadpointer), (MRS 0xde82)>; +// The cycle counter PMC register is PMCCNTR_EL0. +let Predicates = [HasPerfMon] in +def : Pat<(readcyclecounter), (MRS 0xdce8)>; + // Generic system instructions def SYSxt : SystemXtI<0, "sys">; def SYSLxt : SystemLXtI<1, "sysl">; @@ -595,10 +626,12 @@ def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; +let AddedComplexity = 1 in { def : Pat<(sub GPR32sp:$R2, arith_extended_reg32:$R3), (SUBSWrx GPR32sp:$R2, arith_extended_reg32:$R3)>; def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64:$R3), (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64:$R3)>; +} // Because of the immediate format for add/sub-imm instructions, the // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). @@ -823,7 +856,7 @@ defm AND : LogicalReg<0b00, 0, "and", and>; defm BIC : LogicalReg<0b00, 1, "bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; defm EON : LogicalReg<0b10, 1, "eon", - BinOpFrag<(xor node:$LHS, (not node:$RHS))>>; + BinOpFrag<(not (xor node:$LHS, node:$RHS))>>; defm EOR : LogicalReg<0b10, 0, "eor", xor>; defm ORN : LogicalReg<0b01, 1, "orn", BinOpFrag<(or node:$LHS, (not node:$RHS))>>; @@ -1020,13 +1053,10 @@ def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; //===----------------------------------------------------------------------===// -// Conditionally set flags instructions. +// Conditional comparison instructions. //===----------------------------------------------------------------------===// -defm CCMN : CondSetFlagsImm<0, "ccmn">; -defm CCMP : CondSetFlagsImm<1, "ccmp">; - -defm CCMN : CondSetFlagsReg<0, "ccmn">; -defm CCMP : CondSetFlagsReg<1, "ccmp">; +defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; +defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; //===----------------------------------------------------------------------===// // Conditional select instructions. @@ -2421,6 +2451,26 @@ defm FCVTZS_Int : FPToIntegerScaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvt defm FCVTZU_Int : FPToIntegerScaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>; } +multiclass FPToIntegerPats { + def : Pat<(i32 (to_int (round f32:$Rn))), + (!cast(INST # UWSr) f32:$Rn)>; + def : Pat<(i64 (to_int (round f32:$Rn))), + (!cast(INST # UXSr) f32:$Rn)>; + def : Pat<(i32 (to_int (round f64:$Rn))), + (!cast(INST # UWDr) f64:$Rn)>; + def : Pat<(i64 (to_int (round f64:$Rn))), + (!cast(INST # UXDr) f64:$Rn)>; +} + +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; +defm : FPToIntegerPats; + //===----------------------------------------------------------------------===// // Scaled integer to floating point conversion instructions. //===----------------------------------------------------------------------===// @@ -2466,14 +2516,7 @@ defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>; def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))), (FRINTNDr FPR64:$Rn)>; -// FRINTX is inserted to set the flags as required by FENV_ACCESS ON behavior -// in the C spec. Setting hasSideEffects ensures it is not DCE'd. -// -// TODO: We should really model the FPSR flags correctly. This is really ugly. -let hasSideEffects = 1 in { defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>; -} - defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>; let SchedRW = [WriteFDiv] in { @@ -2488,23 +2531,23 @@ defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>; let SchedRW = [WriteFDiv] in { defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>; } -defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_aarch64_neon_fmaxnm>; -defm FMAX : TwoOperandFPData<0b0100, "fmax", AArch64fmax>; -defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_aarch64_neon_fminnm>; -defm FMIN : TwoOperandFPData<0b0101, "fmin", AArch64fmin>; +defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>; +defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaxnan>; +defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>; +defm FMIN : TwoOperandFPData<0b0101, "fmin", fminnan>; let SchedRW = [WriteFMul] in { defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>; defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>; } defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>; -def : Pat<(v1f64 (AArch64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), +def : Pat<(v1f64 (fmaxnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (AArch64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), +def : Pat<(v1f64 (fminnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMINDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (int_aarch64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), +def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (int_aarch64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), +def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; //===----------------------------------------------------------------------===// @@ -2556,7 +2599,7 @@ defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>; //===----------------------------------------------------------------------===// defm FCCMPE : FPCondComparison<1, "fccmpe">; -defm FCCMP : FPCondComparison<0, "fccmp">; +defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; //===----------------------------------------------------------------------===// // Floating point conditional select instruction. @@ -2589,6 +2632,40 @@ defm FMOV : FPMoveImmediate<"fmov">; // Advanced SIMD two vector instructions. //===----------------------------------------------------------------------===// +defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", + int_aarch64_neon_uabd>; +// Match UABDL in log2-shuffle patterns. +def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), + (v8i16 (add (sub (zext (v8i8 V64:$opA)), + (zext (v8i8 V64:$opB))), + (AArch64vashr v8i16:$src, (i32 15))))), + (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; +def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), + (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)), + (zext (extract_high_v16i8 V128:$opB))), + (AArch64vashr v8i16:$src, (i32 15))))), + (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; +def : Pat<(xor (v4i32 (AArch64vashr v4i32:$src, (i32 31))), + (v4i32 (add (sub (zext (v4i16 V64:$opA)), + (zext (v4i16 V64:$opB))), + (AArch64vashr v4i32:$src, (i32 31))))), + (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; +def : Pat<(xor (v4i32 (AArch64vashr v4i32:$src, (i32 31))), + (v4i32 (add (sub (zext (extract_high_v8i16 V128:$opA)), + (zext (extract_high_v8i16 V128:$opB))), + (AArch64vashr v4i32:$src, (i32 31))))), + (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>; +def : Pat<(xor (v2i64 (AArch64vashr v2i64:$src, (i32 63))), + (v2i64 (add (sub (zext (v2i32 V64:$opA)), + (zext (v2i32 V64:$opB))), + (AArch64vashr v2i64:$src, (i32 63))))), + (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>; +def : Pat<(xor (v2i64 (AArch64vashr v2i64:$src, (i32 63))), + (v2i64 (add (sub (zext (extract_high_v4i32 V128:$opA)), + (zext (extract_high_v4i32 V128:$opB))), + (AArch64vashr v2i64:$src, (i32 63))))), + (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>; + defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>; def : Pat<(xor (v8i8 (AArch64vashr V64:$src, (i32 7))), (v8i8 (add V64:$src, (AArch64vashr V64:$src, (i32 7))))), @@ -2780,29 +2857,29 @@ defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; -defm FABD : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_aarch64_neon_fabd>; -defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_aarch64_neon_facge>; -defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_aarch64_neon_facgt>; -defm FADDP : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_aarch64_neon_addp>; -defm FADD : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>; -defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>; -defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>; -defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>; -defm FDIV : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>; -defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; -defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_aarch64_neon_fmaxnm>; -defm FMAXP : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>; -defm FMAX : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", AArch64fmax>; -defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_aarch64_neon_fminnmp>; -defm FMINNM : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_aarch64_neon_fminnm>; -defm FMINP : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_aarch64_neon_fminp>; -defm FMIN : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", AArch64fmin>; +defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>; +defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>; +defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>; +defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>; +defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>; +defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; +defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; +defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; +defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>; +defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; +defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>; +defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; +defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaxnan>; +defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; +defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>; +defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; +defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminnan>; // NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the // instruction expects the addend first, while the fma intrinsic puts it last. -defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b11001, "fmla", +defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; -defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b11001, "fmls", +defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; // The following def pats catch the case where the LHS of an FMA is negated. @@ -2816,11 +2893,11 @@ def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>; -defm FMULX : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_aarch64_neon_fmulx>; -defm FMUL : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>; -defm FRECPS : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_aarch64_neon_frecps>; -defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_aarch64_neon_frsqrts>; -defm FSUB : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>; +defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; +defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>; +defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; +defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; +defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>; defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >; defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", @@ -2833,9 +2910,9 @@ defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>; defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>; defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; -defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_aarch64_neon_smax>; +defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>; defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; -defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_aarch64_neon_smin>; +defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>; defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; @@ -2852,9 +2929,9 @@ defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>; defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>; defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; -defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_aarch64_neon_umax>; +defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>; defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; -defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_aarch64_neon_umin>; +defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>; defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; @@ -2879,54 +2956,6 @@ defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; -def : Pat<(v8i8 (smin V64:$Rn, V64:$Rm)), - (SMINv8i8 V64:$Rn, V64:$Rm)>; -def : Pat<(v4i16 (smin V64:$Rn, V64:$Rm)), - (SMINv4i16 V64:$Rn, V64:$Rm)>; -def : Pat<(v2i32 (smin V64:$Rn, V64:$Rm)), - (SMINv2i32 V64:$Rn, V64:$Rm)>; -def : Pat<(v16i8 (smin V128:$Rn, V128:$Rm)), - (SMINv16i8 V128:$Rn, V128:$Rm)>; -def : Pat<(v8i16 (smin V128:$Rn, V128:$Rm)), - (SMINv8i16 V128:$Rn, V128:$Rm)>; -def : Pat<(v4i32 (smin V128:$Rn, V128:$Rm)), - (SMINv4i32 V128:$Rn, V128:$Rm)>; -def : Pat<(v8i8 (smax V64:$Rn, V64:$Rm)), - (SMAXv8i8 V64:$Rn, V64:$Rm)>; -def : Pat<(v4i16 (smax V64:$Rn, V64:$Rm)), - (SMAXv4i16 V64:$Rn, V64:$Rm)>; -def : Pat<(v2i32 (smax V64:$Rn, V64:$Rm)), - (SMAXv2i32 V64:$Rn, V64:$Rm)>; -def : Pat<(v16i8 (smax V128:$Rn, V128:$Rm)), - (SMAXv16i8 V128:$Rn, V128:$Rm)>; -def : Pat<(v8i16 (smax V128:$Rn, V128:$Rm)), - (SMAXv8i16 V128:$Rn, V128:$Rm)>; -def : Pat<(v4i32 (smax V128:$Rn, V128:$Rm)), - (SMAXv4i32 V128:$Rn, V128:$Rm)>; -def : Pat<(v8i8 (umin V64:$Rn, V64:$Rm)), - (UMINv8i8 V64:$Rn, V64:$Rm)>; -def : Pat<(v4i16 (umin V64:$Rn, V64:$Rm)), - (UMINv4i16 V64:$Rn, V64:$Rm)>; -def : Pat<(v2i32 (umin V64:$Rn, V64:$Rm)), - (UMINv2i32 V64:$Rn, V64:$Rm)>; -def : Pat<(v16i8 (umin V128:$Rn, V128:$Rm)), - (UMINv16i8 V128:$Rn, V128:$Rm)>; -def : Pat<(v8i16 (umin V128:$Rn, V128:$Rm)), - (UMINv8i16 V128:$Rn, V128:$Rm)>; -def : Pat<(v4i32 (umin V128:$Rn, V128:$Rm)), - (UMINv4i32 V128:$Rn, V128:$Rm)>; -def : Pat<(v8i8 (umax V64:$Rn, V64:$Rm)), - (UMAXv8i8 V64:$Rn, V64:$Rm)>; -def : Pat<(v4i16 (umax V64:$Rn, V64:$Rm)), - (UMAXv4i16 V64:$Rn, V64:$Rm)>; -def : Pat<(v2i32 (umax V64:$Rn, V64:$Rm)), - (UMAXv2i32 V64:$Rn, V64:$Rm)>; -def : Pat<(v16i8 (umax V128:$Rn, V128:$Rm)), - (UMAXv16i8 V128:$Rn, V128:$Rm)>; -def : Pat<(v8i16 (umax V128:$Rn, V128:$Rm)), - (UMAXv8i16 V128:$Rn, V128:$Rm)>; -def : Pat<(v4i32 (umax V128:$Rn, V128:$Rm)), - (UMAXv4i32 V128:$Rn, V128:$Rm)>; def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; @@ -3052,6 +3081,14 @@ def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # "|cmlt.2d\t$dst, $src1, $src2}", (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" # + "|fcmle.4h\t$dst, $src1, $src2}", + (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" # + "|fcmle.8h\t$dst, $src1, $src2}", + (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # "|fcmle.2s\t$dst, $src1, $src2}", (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; @@ -3062,6 +3099,14 @@ def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # "|fcmle.2d\t$dst, $src1, $src2}", (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" # + "|fcmlt.4h\t$dst, $src1, $src2}", + (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" # + "|fcmlt.8h\t$dst, $src1, $src2}", + (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # "|fcmlt.2s\t$dst, $src1, $src2}", (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; @@ -3072,6 +3117,14 @@ def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # "|fcmlt.2d\t$dst, $src1, $src2}", (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" # + "|facle.4h\t$dst, $src1, $src2}", + (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" # + "|facle.8h\t$dst, $src1, $src2}", + (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # "|facle.2s\t$dst, $src1, $src2}", (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; @@ -3082,6 +3135,14 @@ def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # "|facle.2d\t$dst, $src1, $src2}", (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" # + "|faclt.4h\t$dst, $src1, $src2}", + (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" # + "|faclt.8h\t$dst, $src1, $src2}", + (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # "|faclt.2s\t$dst, $src1, $src2}", (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; @@ -3103,19 +3164,19 @@ defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; -defm FABD : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_aarch64_sisd_fabd>; +defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>; def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FABD64 FPR64:$Rn, FPR64:$Rm)>; -defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge", +defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge", int_aarch64_neon_facge>; -defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt", +defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", int_aarch64_neon_facgt>; -defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>; -defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>; -defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>; -defm FMULX : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_aarch64_neon_fmulx>; -defm FRECPS : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_aarch64_neon_frecps>; -defm FRSQRTS : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_aarch64_neon_frsqrts>; +defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; +defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; +defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; +defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx>; +defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps>; +defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts>; defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; @@ -3198,35 +3259,35 @@ defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; -defm FCMEQ : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; -defm FCMGE : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", AArch64fcmgez>; -defm FCMGT : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; -defm FCMLE : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", AArch64fcmlez>; -defm FCMLT : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; -defm FCVTAS : SIMDTwoScalarSD< 0, 0, 0b11100, "fcvtas">; -defm FCVTAU : SIMDTwoScalarSD< 1, 0, 0b11100, "fcvtau">; -defm FCVTMS : SIMDTwoScalarSD< 0, 0, 0b11011, "fcvtms">; -defm FCVTMU : SIMDTwoScalarSD< 1, 0, 0b11011, "fcvtmu">; -defm FCVTNS : SIMDTwoScalarSD< 0, 0, 0b11010, "fcvtns">; -defm FCVTNU : SIMDTwoScalarSD< 1, 0, 0b11010, "fcvtnu">; -defm FCVTPS : SIMDTwoScalarSD< 0, 1, 0b11010, "fcvtps">; -defm FCVTPU : SIMDTwoScalarSD< 1, 1, 0b11010, "fcvtpu">; +defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; +defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>; +defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; +defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>; +defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; +defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">; +defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">; +defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">; +defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">; +defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">; +defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">; +defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">; +defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; -defm FCVTZS : SIMDTwoScalarSD< 0, 1, 0b11011, "fcvtzs">; -defm FCVTZU : SIMDTwoScalarSD< 1, 1, 0b11011, "fcvtzu">; -defm FRECPE : SIMDTwoScalarSD< 0, 1, 0b11101, "frecpe">; -defm FRECPX : SIMDTwoScalarSD< 0, 1, 0b11111, "frecpx">; -defm FRSQRTE : SIMDTwoScalarSD< 1, 1, 0b11101, "frsqrte">; +defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; +defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; +defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">; +defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">; +defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">; defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", UnOpFrag<(sub immAllZerosV, node:$LHS)> >; -defm SCVTF : SIMDTwoScalarCVTSD< 0, 0, 0b11101, "scvtf", AArch64sitof>; +defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", int_aarch64_neon_suqadd>; -defm UCVTF : SIMDTwoScalarCVTSD< 1, 0, 0b11101, "ucvtf", AArch64uitof>; +defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>; defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", int_aarch64_neon_usqadd>; @@ -3390,8 +3451,6 @@ defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", int_aarch64_neon_uabd>; -defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", - int_aarch64_neon_uabd>; defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>; defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", @@ -3449,8 +3508,8 @@ defm : Neon_mulacc_widen_patterns< // Patterns for 64-bit pmull def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm), (PMULLv1i64 V64:$Rn, V64:$Rm)>; -def : Pat<(int_aarch64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)), - (vector_extract (v2i64 V128:$Rm), (i64 1))), +def : Pat<(int_aarch64_neon_pmull64 (extractelt (v2i64 V128:$Rn), (i64 1)), + (extractelt (v2i64 V128:$Rm), (i64 1))), (PMULLv2i64 V128:$Rn, V128:$Rm)>; // CodeGen patterns for addhn and subhn instructions, which can actually be @@ -3593,11 +3652,11 @@ defm CPY : SIMDScalarCPY<"cpy">; //---------------------------------------------------------------------------- defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; -defm FADDP : SIMDPairwiseScalarSD<1, 0, 0b01101, "faddp">; -defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">; -defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">; -defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">; -defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">; +defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">; +defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">; +defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">; +defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">; +defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">; def : Pat<(v2i64 (AArch64saddv V128:$Rn)), (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), @@ -3713,12 +3772,12 @@ defm : DUPWithTruncPats; multiclass DUPWithTrunci64Pats { - def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v2i64 V128:$Rn), + def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn), imm:$idx))))), (DUP V128:$Rn, (IdxXFORM imm:$idx))>; - def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v1i64 V64:$Rn), - imm:$idx))))), + def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn), + imm:$idx))))), (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; } @@ -3747,6 +3806,13 @@ def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; +def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), + VectorIndexB:$idx)))), i8), + (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; +def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), + VectorIndexH:$idx)))), i16), + (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; + // Extracting i8 or i16 elements will have the zero-extend transformed to // an 'and' mask by type legalization since neither i8 nor i16 are legal types // for AArch64. Match these patterns here since UMOV already zeroes out the high @@ -3784,6 +3850,11 @@ def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (i64 FPR64:$Rn), dsub))>; +def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), + (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; +def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), + (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; + def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), @@ -3949,10 +4020,10 @@ defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; -defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; -defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; -defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; -defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>; +defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; +defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; +defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; +defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>; // Patterns for across-vector intrinsics, that have a node equivalent, that // returns a vector (with only the low lane defined) instead of a scalar. @@ -4199,15 +4270,23 @@ def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; // AdvSIMD FMOV -def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8, +def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8, "fmov", ".2d", [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; -def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64, fpimm8, +def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8, "fmov", ".2s", [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; -def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8, +def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8, "fmov", ".4s", [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; +let Predicates = [HasNEON, HasFullFP16] in { +def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8, + "fmov", ".4h", + [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; +def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8, + "fmov", ".8h", + [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; +} // Predicates = [HasNEON, HasFullFP16] // AdvSIMD MOVI @@ -4235,7 +4314,7 @@ def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>; // The movi_edit node has the immediate value already encoded, so we use // a plain imm0_255 in the pattern let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128, +def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128, simdimmtype10, "movi", ".2d", [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; @@ -4296,10 +4375,10 @@ def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; // Per byte: 8b & 16b -def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64, imm0_255, +def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255, "movi", ".8b", [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; -def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255, +def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255, "movi", ".16b", [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; @@ -4340,8 +4419,8 @@ def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", //---------------------------------------------------------------------------- let hasSideEffects = 0 in { - defm FMLA : SIMDFPIndexedSDTied<0, 0b0001, "fmla">; - defm FMLS : SIMDFPIndexedSDTied<0, 0b0101, "fmls">; + defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">; + defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">; } // NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the @@ -4349,18 +4428,18 @@ let hasSideEffects = 0 in { // On the other hand, there are quite a few valid combinatorial options due to // the commutativity of multiplication and the fact that (-x) * y = x * (-y). -defm : SIMDFPIndexedSDTiedPatterns<"FMLA", +defm : SIMDFPIndexedTiedPatterns<"FMLA", TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>; -defm : SIMDFPIndexedSDTiedPatterns<"FMLA", +defm : SIMDFPIndexedTiedPatterns<"FMLA", TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>; -defm : SIMDFPIndexedSDTiedPatterns<"FMLS", +defm : SIMDFPIndexedTiedPatterns<"FMLS", TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; -defm : SIMDFPIndexedSDTiedPatterns<"FMLS", +defm : SIMDFPIndexedTiedPatterns<"FMLS", TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; -defm : SIMDFPIndexedSDTiedPatterns<"FMLS", +defm : SIMDFPIndexedTiedPatterns<"FMLS", TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; -defm : SIMDFPIndexedSDTiedPatterns<"FMLS", +defm : SIMDFPIndexedTiedPatterns<"FMLS", TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; multiclass FMLSIndexedAfterNegPatterns { @@ -4424,7 +4503,9 @@ multiclass FMLSIndexedAfterNegPatterns { (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, V128:$Rm, VectorIndexS:$idx)>; def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), - (vector_extract (v2f32 (fneg V64:$Rm)), + (vector_extract (v4f32 (insert_subvector undef, + (v2f32 (fneg V64:$Rm)), + (i32 0))), VectorIndexS:$idx))), (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; @@ -4442,8 +4523,8 @@ defm : FMLSIndexedAfterNegPatterns< defm : FMLSIndexedAfterNegPatterns< TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >; -defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; -defm FMUL : SIMDFPIndexedSD<0, 0b1001, "fmul", fmul>; +defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; +defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", fmul>; def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), (FMULv2i32_indexed V64:$Rn, @@ -4497,10 +4578,10 @@ def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), //---------------------------------------------------------------------------- // AdvSIMD scalar shift instructions //---------------------------------------------------------------------------- -defm FCVTZS : SIMDScalarRShiftSD<0, 0b11111, "fcvtzs">; -defm FCVTZU : SIMDScalarRShiftSD<1, 0b11111, "fcvtzu">; -defm SCVTF : SIMDScalarRShiftSD<0, 0b11100, "scvtf">; -defm UCVTF : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">; +defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">; +defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">; +defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">; +defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">; // Codegen patterns for the above. We don't put these directly on the // instructions because TableGen's type inference can't handle the truth. // Having the same base pattern for fp <--> int totally freaks it out. @@ -4573,7 +4654,7 @@ defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", //---------------------------------------------------------------------------- defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; -defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf", +defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", int_aarch64_neon_vcvtfxs2fp>; defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", int_aarch64_neon_rshrn>; @@ -4608,7 +4689,7 @@ defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; -defm UCVTF : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf", +defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf", int_aarch64_neon_vcvtfxu2fp>; defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", int_aarch64_neon_uqrshrn>; @@ -5133,10 +5214,10 @@ def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>; def : Pat<(i64 (anyext GPR32:$src)), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; -// When we need to explicitly zero-extend, we use an unsigned bitfield move -// instruction (UBFM) on the enclosing super-reg. +// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and +// then assert the extension has happened. def : Pat<(i64 (zext GPR32:$src)), - (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; + (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; // To sign extend, we use a signed bitfield move instruction (SBFM) on the // containing super-reg. @@ -5801,6 +5882,21 @@ def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 (REV16v16i8 FPR128:$src))>; } +def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; +def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))), + (EXTRACT_SUBREG V128:$Rn, dsub)>; + def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), @@ -5852,6 +5948,45 @@ def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; +// Patterns for nontemporal/no-allocate stores. +// We have to resort to tricks to turn a single-input store into a store pair, +// because there is no single-input nontemporal store, only STNP. +let Predicates = [IsLE] in { +let AddedComplexity = 15 in { +class NTStore128Pat : + Pat<(nontemporalstore (VT FPR128:$Rt), + (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), + (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub), + (CPYi64 FPR128:$Rt, (i64 1)), + GPR64sp:$Rn, simm7s8:$offset)>; + +def : NTStore128Pat; +def : NTStore128Pat; +def : NTStore128Pat; +def : NTStore128Pat; + +class NTStore64Pat : + Pat<(nontemporalstore (VT FPR64:$Rt), + (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), + (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub), + (CPYi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), + GPR64sp:$Rn, simm7s4:$offset)>; + +// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64? +def : NTStore64Pat; +def : NTStore64Pat; +def : NTStore64Pat; +def : NTStore64Pat; +def : NTStore64Pat; + +def : Pat<(nontemporalstore GPR64:$Rt, + (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), + (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), + (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 0, 31), sub_32), + GPR64sp:$Rn, simm7s4:$offset)>; +} // AddedComplexity=10 +} // Predicates = [IsLE] + // Tail call return handling. These are all compiler pseudo-instructions, // so no encoding information or anything like that. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 82f77a77ab5e..566aa2c9a9ba 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -41,54 +41,85 @@ STATISTIC(NumPostFolded, "Number of post-index updates folded"); STATISTIC(NumPreFolded, "Number of pre-index updates folded"); STATISTIC(NumUnscaledPairCreated, "Number of load/store from unscaled generated"); +STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted"); +STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); +STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted"); static cl::opt ScanLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden); -// Place holder while testing unscaled load/store combining -static cl::opt EnableAArch64UnscaledMemOp( - "aarch64-unscaled-mem-op", cl::Hidden, - cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true)); +namespace llvm { +void initializeAArch64LoadStoreOptPass(PassRegistry &); +} + +#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass" namespace { + +typedef struct LdStPairFlags { + // If a matching instruction is found, MergeForward is set to true if the + // merge is to remove the first instruction and replace the second with + // a pair-wise insn, and false if the reverse is true. + bool MergeForward; + + // SExtIdx gives the index of the result of the load pair that must be + // extended. The value of SExtIdx assumes that the paired load produces the + // value in this order: (I, returned iterator), i.e., -1 means no value has + // to be extended, 0 means I, and 1 means the returned iterator. + int SExtIdx; + + LdStPairFlags() : MergeForward(false), SExtIdx(-1) {} + + void setMergeForward(bool V = true) { MergeForward = V; } + bool getMergeForward() const { return MergeForward; } + + void setSExtIdx(int V) { SExtIdx = V; } + int getSExtIdx() const { return SExtIdx; } + +} LdStPairFlags; + struct AArch64LoadStoreOpt : public MachineFunctionPass { static char ID; - AArch64LoadStoreOpt() : MachineFunctionPass(ID) {} + AArch64LoadStoreOpt() : MachineFunctionPass(ID) { + initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry()); + } const AArch64InstrInfo *TII; const TargetRegisterInfo *TRI; + const AArch64Subtarget *Subtarget; // Scan the instructions looking for a load/store that can be combined // with the current instruction into a load/store pair. // Return the matching instruction if one is found, else MBB->end(). - // If a matching instruction is found, MergeForward is set to true if the - // merge is to remove the first instruction and replace the second with - // a pair-wise insn, and false if the reverse is true. - // \p SExtIdx[out] gives the index of the result of the load pair that - // must be extended. The value of SExtIdx assumes that the paired load - // produces the value in this order: (I, returned iterator), i.e., - // -1 means no value has to be extended, 0 means I, and 1 means the - // returned iterator. MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, - bool &MergeForward, int &SExtIdx, + LdStPairFlags &Flags, unsigned Limit); + + // Scan the instructions looking for a store that writes to the address from + // which the current load instruction reads. Return true if one is found. + bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit, + MachineBasicBlock::iterator &StoreI); + // Merge the two instructions indicated into a single pair-wise instruction. // If MergeForward is true, erase the first instruction and fold its // operation into the second. If false, the reverse. Return the instruction // following the first instruction (which may change during processing). - // \p SExtIdx index of the result that must be extended for a paired load. - // -1 means none, 0 means I, and 1 means Paired. MachineBasicBlock::iterator mergePairedInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, bool MergeForward, - int SExtIdx); + MachineBasicBlock::iterator Paired, + const LdStPairFlags &Flags); + + // Promote the load that reads directly from the address stored to. + MachineBasicBlock::iterator + promoteLoadFromStore(MachineBasicBlock::iterator LoadI, + MachineBasicBlock::iterator StoreI); // Scan the instruction list to find a base register update that can // be combined with the current instruction (a load or store) using // pre or post indexed addressing with writeback. Scan forwards. MachineBasicBlock::iterator findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit, - int Value); + int UnscaledOffset); // Scan the instruction list to find a base register update that can // be combined with the current instruction (a load or store) using @@ -96,97 +127,177 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { MachineBasicBlock::iterator findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit); - // Merge a pre-index base register update into a ld/st instruction. - MachineBasicBlock::iterator - mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update); + // Find an instruction that updates the base register of the ld/st + // instruction. + bool isMatchingUpdateInsn(MachineInstr *MemMI, MachineInstr *MI, + unsigned BaseReg, int Offset); - // Merge a post-index base register update into a ld/st instruction. + // Merge a pre- or post-index base register update into a ld/st instruction. MachineBasicBlock::iterator - mergePostIdxUpdateInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update); + mergeUpdateInsn(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Update, bool IsPreIdx); - bool optimizeBlock(MachineBasicBlock &MBB); + // Find and merge foldable ldr/str instructions. + bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI); + + // Find and promote load instructions which read directly from store. + bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI); + + // Check if converting two narrow loads into a single wider load with + // bitfield extracts could be enabled. + bool enableNarrowLdMerge(MachineFunction &Fn); + + bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt); bool runOnMachineFunction(MachineFunction &Fn) override; const char *getPassName() const override { - return "AArch64 load / store optimization pass"; + return AARCH64_LOAD_STORE_OPT_NAME; } - -private: - int getMemSize(MachineInstr *MemMI); }; char AArch64LoadStoreOpt::ID = 0; } // namespace -static bool isUnscaledLdst(unsigned Opc) { +INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", + AARCH64_LOAD_STORE_OPT_NAME, false, false) + +static bool isUnscaledLdSt(unsigned Opc) { switch (Opc) { default: return false; case AArch64::STURSi: - return true; case AArch64::STURDi: - return true; case AArch64::STURQi: - return true; + case AArch64::STURBBi: + case AArch64::STURHHi: case AArch64::STURWi: - return true; case AArch64::STURXi: - return true; case AArch64::LDURSi: - return true; case AArch64::LDURDi: - return true; case AArch64::LDURQi: - return true; case AArch64::LDURWi: - return true; case AArch64::LDURXi: - return true; case AArch64::LDURSWi: + case AArch64::LDURHHi: + case AArch64::LDURBBi: + case AArch64::LDURSBWi: + case AArch64::LDURSHWi: return true; } } -// Size in bytes of the data moved by an unscaled load or store -int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) { - switch (MemMI->getOpcode()) { +static bool isUnscaledLdSt(MachineInstr *MI) { + return isUnscaledLdSt(MI->getOpcode()); +} + +static unsigned getBitExtrOpcode(MachineInstr *MI) { + switch (MI->getOpcode()) { default: - llvm_unreachable("Opcode has unknown size!"); - case AArch64::STRSui: - case AArch64::STURSi: - return 4; - case AArch64::STRDui: - case AArch64::STURDi: - return 8; - case AArch64::STRQui: - case AArch64::STURQi: - return 16; - case AArch64::STRWui: - case AArch64::STURWi: - return 4; - case AArch64::STRXui: - case AArch64::STURXi: - return 8; + llvm_unreachable("Unexpected opcode."); + case AArch64::LDRBBui: + case AArch64::LDURBBi: + case AArch64::LDRHHui: + case AArch64::LDURHHi: + return AArch64::UBFMWri; + case AArch64::LDRSBWui: + case AArch64::LDURSBWi: + case AArch64::LDRSHWui: + case AArch64::LDURSHWi: + return AArch64::SBFMWri; + } +} + +static bool isNarrowStore(unsigned Opc) { + switch (Opc) { + default: + return false; + case AArch64::STRBBui: + case AArch64::STURBBi: + case AArch64::STRHHui: + case AArch64::STURHHi: + return true; + } +} + +static bool isNarrowStore(MachineInstr *MI) { + return isNarrowStore(MI->getOpcode()); +} + +static bool isNarrowLoad(unsigned Opc) { + switch (Opc) { + default: + return false; + case AArch64::LDRHHui: + case AArch64::LDURHHi: + case AArch64::LDRBBui: + case AArch64::LDURBBi: + case AArch64::LDRSHWui: + case AArch64::LDURSHWi: + case AArch64::LDRSBWui: + case AArch64::LDURSBWi: + return true; + } +} + +static bool isNarrowLoad(MachineInstr *MI) { + return isNarrowLoad(MI->getOpcode()); +} + +// Scaling factor for unscaled load or store. +static int getMemScale(MachineInstr *MI) { + switch (MI->getOpcode()) { + default: + llvm_unreachable("Opcode has unknown scale!"); + case AArch64::LDRBBui: + case AArch64::LDURBBi: + case AArch64::LDRSBWui: + case AArch64::LDURSBWi: + case AArch64::STRBBui: + case AArch64::STURBBi: + return 1; + case AArch64::LDRHHui: + case AArch64::LDURHHi: + case AArch64::LDRSHWui: + case AArch64::LDURSHWi: + case AArch64::STRHHui: + case AArch64::STURHHi: + return 2; case AArch64::LDRSui: case AArch64::LDURSi: + case AArch64::LDRSWui: + case AArch64::LDURSWi: + case AArch64::LDRWui: + case AArch64::LDURWi: + case AArch64::STRSui: + case AArch64::STURSi: + case AArch64::STRWui: + case AArch64::STURWi: + case AArch64::LDPSi: + case AArch64::LDPSWi: + case AArch64::LDPWi: + case AArch64::STPSi: + case AArch64::STPWi: return 4; case AArch64::LDRDui: case AArch64::LDURDi: + case AArch64::LDRXui: + case AArch64::LDURXi: + case AArch64::STRDui: + case AArch64::STURDi: + case AArch64::STRXui: + case AArch64::STURXi: + case AArch64::LDPDi: + case AArch64::LDPXi: + case AArch64::STPDi: + case AArch64::STPXi: return 8; case AArch64::LDRQui: case AArch64::LDURQi: + case AArch64::STRQui: + case AArch64::STURQi: + case AArch64::LDPQi: + case AArch64::STPQi: return 16; - case AArch64::LDRWui: - case AArch64::LDURWi: - return 4; - case AArch64::LDRXui: - case AArch64::LDURXi: - return 8; - case AArch64::LDRSWui: - case AArch64::LDURSWi: - return 4; } } @@ -203,6 +314,10 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc, case AArch64::STURDi: case AArch64::STRQui: case AArch64::STURQi: + case AArch64::STRBBui: + case AArch64::STURBBi: + case AArch64::STRHHui: + case AArch64::STURHHi: case AArch64::STRWui: case AArch64::STURWi: case AArch64::STRXui: @@ -219,11 +334,23 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc, case AArch64::STURSi: case AArch64::LDRSui: case AArch64::LDURSi: + case AArch64::LDRHHui: + case AArch64::LDURHHi: + case AArch64::LDRBBui: + case AArch64::LDURBBi: return Opc; case AArch64::LDRSWui: return AArch64::LDRWui; case AArch64::LDURSWi: return AArch64::LDURWi; + case AArch64::LDRSBWui: + return AArch64::LDRBBui; + case AArch64::LDRSHWui: + return AArch64::LDRHHui; + case AArch64::LDURSBWi: + return AArch64::LDURBBi; + case AArch64::LDURSHWi: + return AArch64::LDURHHi; } } @@ -240,6 +367,14 @@ static unsigned getMatchingPairOpcode(unsigned Opc) { case AArch64::STRQui: case AArch64::STURQi: return AArch64::STPQi; + case AArch64::STRBBui: + return AArch64::STRHHui; + case AArch64::STRHHui: + return AArch64::STRWui; + case AArch64::STURBBi: + return AArch64::STURHHi; + case AArch64::STURHHi: + return AArch64::STURWi; case AArch64::STRWui: case AArch64::STURWi: return AArch64::STPWi; @@ -264,6 +399,48 @@ static unsigned getMatchingPairOpcode(unsigned Opc) { case AArch64::LDRSWui: case AArch64::LDURSWi: return AArch64::LDPSWi; + case AArch64::LDRHHui: + case AArch64::LDRSHWui: + return AArch64::LDRWui; + case AArch64::LDURHHi: + case AArch64::LDURSHWi: + return AArch64::LDURWi; + case AArch64::LDRBBui: + case AArch64::LDRSBWui: + return AArch64::LDRHHui; + case AArch64::LDURBBi: + case AArch64::LDURSBWi: + return AArch64::LDURHHi; + } +} + +static unsigned isMatchingStore(MachineInstr *LoadInst, + MachineInstr *StoreInst) { + unsigned LdOpc = LoadInst->getOpcode(); + unsigned StOpc = StoreInst->getOpcode(); + switch (LdOpc) { + default: + llvm_unreachable("Unsupported load instruction!"); + case AArch64::LDRBBui: + return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui || + StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; + case AArch64::LDURBBi: + return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi || + StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; + case AArch64::LDRHHui: + return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui || + StOpc == AArch64::STRXui; + case AArch64::LDURHHi: + return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi || + StOpc == AArch64::STURXi; + case AArch64::LDRWui: + return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui; + case AArch64::LDURWi: + return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi; + case AArch64::LDRXui: + return StOpc == AArch64::STRXui; + case AArch64::LDURXi: + return StOpc == AArch64::STURXi; } } @@ -277,6 +454,10 @@ static unsigned getPreIndexedOpcode(unsigned Opc) { return AArch64::STRDpre; case AArch64::STRQui: return AArch64::STRQpre; + case AArch64::STRBBui: + return AArch64::STRBBpre; + case AArch64::STRHHui: + return AArch64::STRHHpre; case AArch64::STRWui: return AArch64::STRWpre; case AArch64::STRXui: @@ -287,12 +468,38 @@ static unsigned getPreIndexedOpcode(unsigned Opc) { return AArch64::LDRDpre; case AArch64::LDRQui: return AArch64::LDRQpre; + case AArch64::LDRBBui: + return AArch64::LDRBBpre; + case AArch64::LDRHHui: + return AArch64::LDRHHpre; case AArch64::LDRWui: return AArch64::LDRWpre; case AArch64::LDRXui: return AArch64::LDRXpre; case AArch64::LDRSWui: return AArch64::LDRSWpre; + case AArch64::LDPSi: + return AArch64::LDPSpre; + case AArch64::LDPSWi: + return AArch64::LDPSWpre; + case AArch64::LDPDi: + return AArch64::LDPDpre; + case AArch64::LDPQi: + return AArch64::LDPQpre; + case AArch64::LDPWi: + return AArch64::LDPWpre; + case AArch64::LDPXi: + return AArch64::LDPXpre; + case AArch64::STPSi: + return AArch64::STPSpre; + case AArch64::STPDi: + return AArch64::STPDpre; + case AArch64::STPQi: + return AArch64::STPQpre; + case AArch64::STPWi: + return AArch64::STPWpre; + case AArch64::STPXi: + return AArch64::STPXpre; } } @@ -306,6 +513,10 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { return AArch64::STRDpost; case AArch64::STRQui: return AArch64::STRQpost; + case AArch64::STRBBui: + return AArch64::STRBBpost; + case AArch64::STRHHui: + return AArch64::STRHHpost; case AArch64::STRWui: return AArch64::STRWpost; case AArch64::STRXui: @@ -316,19 +527,111 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { return AArch64::LDRDpost; case AArch64::LDRQui: return AArch64::LDRQpost; + case AArch64::LDRBBui: + return AArch64::LDRBBpost; + case AArch64::LDRHHui: + return AArch64::LDRHHpost; case AArch64::LDRWui: return AArch64::LDRWpost; case AArch64::LDRXui: return AArch64::LDRXpost; case AArch64::LDRSWui: return AArch64::LDRSWpost; + case AArch64::LDPSi: + return AArch64::LDPSpost; + case AArch64::LDPSWi: + return AArch64::LDPSWpost; + case AArch64::LDPDi: + return AArch64::LDPDpost; + case AArch64::LDPQi: + return AArch64::LDPQpost; + case AArch64::LDPWi: + return AArch64::LDPWpost; + case AArch64::LDPXi: + return AArch64::LDPXpost; + case AArch64::STPSi: + return AArch64::STPSpost; + case AArch64::STPDi: + return AArch64::STPDpost; + case AArch64::STPQi: + return AArch64::STPQpost; + case AArch64::STPWi: + return AArch64::STPWpost; + case AArch64::STPXi: + return AArch64::STPXpost; } } +static bool isPairedLdSt(const MachineInstr *MI) { + switch (MI->getOpcode()) { + default: + return false; + case AArch64::LDPSi: + case AArch64::LDPSWi: + case AArch64::LDPDi: + case AArch64::LDPQi: + case AArch64::LDPWi: + case AArch64::LDPXi: + case AArch64::STPSi: + case AArch64::STPDi: + case AArch64::STPQi: + case AArch64::STPWi: + case AArch64::STPXi: + return true; + } +} + +static const MachineOperand &getLdStRegOp(const MachineInstr *MI, + unsigned PairedRegOp = 0) { + assert(PairedRegOp < 2 && "Unexpected register operand idx."); + unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0; + return MI->getOperand(Idx); +} + +static const MachineOperand &getLdStBaseOp(const MachineInstr *MI) { + unsigned Idx = isPairedLdSt(MI) ? 2 : 1; + return MI->getOperand(Idx); +} + +static const MachineOperand &getLdStOffsetOp(const MachineInstr *MI) { + unsigned Idx = isPairedLdSt(MI) ? 3 : 2; + return MI->getOperand(Idx); +} + +static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst, + MachineInstr *StoreInst) { + assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st."); + int LoadSize = getMemScale(LoadInst); + int StoreSize = getMemScale(StoreInst); + int UnscaledStOffset = isUnscaledLdSt(StoreInst) + ? getLdStOffsetOp(StoreInst).getImm() + : getLdStOffsetOp(StoreInst).getImm() * StoreSize; + int UnscaledLdOffset = isUnscaledLdSt(LoadInst) + ? getLdStOffsetOp(LoadInst).getImm() + : getLdStOffsetOp(LoadInst).getImm() * LoadSize; + return (UnscaledStOffset <= UnscaledLdOffset) && + (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize)); +} + +// Copy MachineMemOperands from Op0 and Op1 to a new array assigned to MI. +static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, + MachineInstr *Op1) { + assert(MI->memoperands_empty() && "expected a new machineinstr"); + size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) + + (Op1->memoperands_end() - Op1->memoperands_begin()); + + MachineFunction *MF = MI->getParent()->getParent(); + MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); + MachineSDNode::mmo_iterator MemEnd = + std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); + MemEnd = std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); + MI->setMemRefs(MemBegin, MemEnd); +} + MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, - bool MergeForward, int SExtIdx) { + const LdStPairFlags &Flags) { MachineBasicBlock::iterator NextI = I; ++NextI; // If NextI is the second of the two instructions to be merged, we need @@ -338,25 +641,26 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, if (NextI == Paired) ++NextI; + int SExtIdx = Flags.getSExtIdx(); unsigned Opc = SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); - bool IsUnscaled = isUnscaledLdst(Opc); - int OffsetStride = - IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1; + bool IsUnscaled = isUnscaledLdSt(Opc); + int OffsetStride = IsUnscaled ? getMemScale(I) : 1; + bool MergeForward = Flags.getMergeForward(); unsigned NewOpc = getMatchingPairOpcode(Opc); // Insert our new paired instruction after whichever of the paired // instructions MergeForward indicates. MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; // Also based on MergeForward is from where we copy the base register operand // so we get the flags compatible with the input code. - MachineOperand &BaseRegOp = - MergeForward ? Paired->getOperand(1) : I->getOperand(1); + const MachineOperand &BaseRegOp = + MergeForward ? getLdStBaseOp(Paired) : getLdStBaseOp(I); // Which register is Rt and which is Rt2 depends on the offset order. MachineInstr *RtMI, *Rt2MI; - if (I->getOperand(2).getImm() == - Paired->getOperand(2).getImm() + OffsetStride) { + if (getLdStOffsetOp(I).getImm() == + getLdStOffsetOp(Paired).getImm() + OffsetStride) { RtMI = Paired; Rt2MI = I; // Here we swapped the assumption made for SExtIdx. @@ -368,18 +672,135 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, RtMI = I; Rt2MI = Paired; } - // Handle Unscaled - int OffsetImm = RtMI->getOperand(2).getImm(); - if (IsUnscaled && EnableAArch64UnscaledMemOp) - OffsetImm /= OffsetStride; + + int OffsetImm = getLdStOffsetOp(RtMI).getImm(); + + if (isNarrowLoad(Opc)) { + // Change the scaled offset from small to large type. + if (!IsUnscaled) { + assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge"); + OffsetImm /= 2; + } + MachineInstr *RtNewDest = MergeForward ? I : Paired; + // When merging small (< 32 bit) loads for big-endian targets, the order of + // the component parts gets swapped. + if (!Subtarget->isLittleEndian()) + std::swap(RtMI, Rt2MI); + // Construct the new load instruction. + MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2; + NewMemMI = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(NewOpc)) + .addOperand(getLdStRegOp(RtNewDest)) + .addOperand(BaseRegOp) + .addImm(OffsetImm); + + // Copy MachineMemOperands from the original loads. + concatenateMemOperands(NewMemMI, I, Paired); + + DEBUG( + dbgs() + << "Creating the new load and extract. Replacing instructions:\n "); + DEBUG(I->print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(Paired->print(dbgs())); + DEBUG(dbgs() << " with instructions:\n "); + DEBUG((NewMemMI)->print(dbgs())); + + int Width = getMemScale(I) == 1 ? 8 : 16; + int LSBLow = 0; + int LSBHigh = Width; + int ImmsLow = LSBLow + Width - 1; + int ImmsHigh = LSBHigh + Width - 1; + MachineInstr *ExtDestMI = MergeForward ? Paired : I; + if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) { + // Create the bitfield extract for high bits. + BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(getBitExtrOpcode(Rt2MI))) + .addOperand(getLdStRegOp(Rt2MI)) + .addReg(getLdStRegOp(RtNewDest).getReg()) + .addImm(LSBHigh) + .addImm(ImmsHigh); + // Create the bitfield extract for low bits. + if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) { + // For unsigned, prefer to use AND for low bits. + BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(AArch64::ANDWri)) + .addOperand(getLdStRegOp(RtMI)) + .addReg(getLdStRegOp(RtNewDest).getReg()) + .addImm(ImmsLow); + } else { + BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(getBitExtrOpcode(RtMI))) + .addOperand(getLdStRegOp(RtMI)) + .addReg(getLdStRegOp(RtNewDest).getReg()) + .addImm(LSBLow) + .addImm(ImmsLow); + } + } else { + // Create the bitfield extract for low bits. + if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) { + // For unsigned, prefer to use AND for low bits. + BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(AArch64::ANDWri)) + .addOperand(getLdStRegOp(RtMI)) + .addReg(getLdStRegOp(RtNewDest).getReg()) + .addImm(ImmsLow); + } else { + BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(getBitExtrOpcode(RtMI))) + .addOperand(getLdStRegOp(RtMI)) + .addReg(getLdStRegOp(RtNewDest).getReg()) + .addImm(LSBLow) + .addImm(ImmsLow); + } + + // Create the bitfield extract for high bits. + BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(getBitExtrOpcode(Rt2MI))) + .addOperand(getLdStRegOp(Rt2MI)) + .addReg(getLdStRegOp(RtNewDest).getReg()) + .addImm(LSBHigh) + .addImm(ImmsHigh); + } + DEBUG(dbgs() << " "); + DEBUG((BitExtMI1)->print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG((BitExtMI2)->print(dbgs())); + DEBUG(dbgs() << "\n"); + + // Erase the old instructions. + I->eraseFromParent(); + Paired->eraseFromParent(); + return NextI; + } // Construct the new instruction. - MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint, - I->getDebugLoc(), TII->get(NewOpc)) - .addOperand(RtMI->getOperand(0)) - .addOperand(Rt2MI->getOperand(0)) - .addOperand(BaseRegOp) - .addImm(OffsetImm); + MachineInstrBuilder MIB; + if (isNarrowStore(Opc)) { + // Change the scaled offset from small to large type. + if (!IsUnscaled) { + assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge"); + OffsetImm /= 2; + } + MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(NewOpc)) + .addOperand(getLdStRegOp(I)) + .addOperand(BaseRegOp) + .addImm(OffsetImm); + // Copy MachineMemOperands from the original stores. + concatenateMemOperands(MIB, I, Paired); + } else { + // Handle Unscaled + if (IsUnscaled) + OffsetImm /= OffsetStride; + MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), + TII->get(NewOpc)) + .addOperand(getLdStRegOp(RtMI)) + .addOperand(getLdStRegOp(Rt2MI)) + .addOperand(BaseRegOp) + .addImm(OffsetImm); + } + (void)MIB; // FIXME: Do we need/want to copy the mem operands from the source @@ -439,13 +860,112 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, return NextI; } +MachineBasicBlock::iterator +AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, + MachineBasicBlock::iterator StoreI) { + MachineBasicBlock::iterator NextI = LoadI; + ++NextI; + + int LoadSize = getMemScale(LoadI); + int StoreSize = getMemScale(StoreI); + unsigned LdRt = getLdStRegOp(LoadI).getReg(); + unsigned StRt = getLdStRegOp(StoreI).getReg(); + bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt); + + assert((IsStoreXReg || + TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) && + "Unexpected RegClass"); + + MachineInstr *BitExtMI; + if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) { + // Remove the load, if the destination register of the loads is the same + // register for stored value. + if (StRt == LdRt && LoadSize == 8) { + DEBUG(dbgs() << "Remove load instruction:\n "); + DEBUG(LoadI->print(dbgs())); + DEBUG(dbgs() << "\n"); + LoadI->eraseFromParent(); + return NextI; + } + // Replace the load with a mov if the load and store are in the same size. + BitExtMI = + BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), + TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt) + .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR) + .addReg(StRt) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + } else { + // FIXME: Currently we disable this transformation in big-endian targets as + // performance and correctness are verified only in little-endian. + if (!Subtarget->isLittleEndian()) + return NextI; + bool IsUnscaled = isUnscaledLdSt(LoadI); + assert(IsUnscaled == isUnscaledLdSt(StoreI) && "Unsupported ld/st match"); + assert(LoadSize <= StoreSize && "Invalid load size"); + int UnscaledLdOffset = IsUnscaled + ? getLdStOffsetOp(LoadI).getImm() + : getLdStOffsetOp(LoadI).getImm() * LoadSize; + int UnscaledStOffset = IsUnscaled + ? getLdStOffsetOp(StoreI).getImm() + : getLdStOffsetOp(StoreI).getImm() * StoreSize; + int Width = LoadSize * 8; + int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset); + int Imms = Immr + Width - 1; + unsigned DestReg = IsStoreXReg + ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32, + &AArch64::GPR64RegClass) + : LdRt; + + assert((UnscaledLdOffset >= UnscaledStOffset && + (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) && + "Invalid offset"); + + Immr = 8 * (UnscaledLdOffset - UnscaledStOffset); + Imms = Immr + Width - 1; + if (UnscaledLdOffset == UnscaledStOffset) { + uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N + | ((Immr) << 6) // immr + | ((Imms) << 0) // imms + ; + + BitExtMI = + BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), + TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri), + DestReg) + .addReg(StRt) + .addImm(AndMaskEncoded); + } else { + BitExtMI = + BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), + TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri), + DestReg) + .addReg(StRt) + .addImm(Immr) + .addImm(Imms); + } + } + + DEBUG(dbgs() << "Promoting load by replacing :\n "); + DEBUG(StoreI->print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(LoadI->print(dbgs())); + DEBUG(dbgs() << " with instructions:\n "); + DEBUG(StoreI->print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG((BitExtMI)->print(dbgs())); + DEBUG(dbgs() << "\n"); + + // Erase the old instructions. + LoadI->eraseFromParent(); + return NextI; +} + /// trackRegDefsUses - Remember what registers the specified instruction uses /// and modifies. -static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs, +static void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs, BitVector &UsedRegs, const TargetRegisterInfo *TRI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (MO.isRegMask()) ModifiedRegs.setBitsNotInMask(MO.getRegMask()); @@ -464,16 +984,12 @@ static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs, } static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { - if (!IsUnscaled && (Offset > 63 || Offset < -64)) - return false; - if (IsUnscaled) { - // Convert the byte-offset used by unscaled into an "element" offset used - // by the scaled pair load/store instructions. - int ElemOffset = Offset / OffsetStride; - if (ElemOffset > 63 || ElemOffset < -64) - return false; - } - return true; + // Convert the byte-offset used by unscaled into an "element" offset used + // by the scaled pair load/store instructions. + if (IsUnscaled) + Offset /= OffsetStride; + + return Offset <= 63 && Offset >= -64; } // Do alignment, specialized to power of 2 and for signed ints, @@ -507,12 +1023,65 @@ static bool mayAlias(MachineInstr *MIa, return false; } +bool AArch64LoadStoreOpt::findMatchingStore( + MachineBasicBlock::iterator I, unsigned Limit, + MachineBasicBlock::iterator &StoreI) { + MachineBasicBlock::iterator E = I->getParent()->begin(); + MachineBasicBlock::iterator MBBI = I; + MachineInstr *FirstMI = I; + unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); + + // Track which registers have been modified and used between the first insn + // and the second insn. + BitVector ModifiedRegs, UsedRegs; + ModifiedRegs.resize(TRI->getNumRegs()); + UsedRegs.resize(TRI->getNumRegs()); + + for (unsigned Count = 0; MBBI != E && Count < Limit;) { + --MBBI; + MachineInstr *MI = MBBI; + // Skip DBG_VALUE instructions. Otherwise debug info can affect the + // optimization by changing how far we scan. + if (MI->isDebugValue()) + continue; + // Now that we know this is a real instruction, count it. + ++Count; + + // If the load instruction reads directly from the address to which the + // store instruction writes and the stored value is not modified, we can + // promote the load. Since we do not handle stores with pre-/post-index, + // it's unnecessary to check if BaseReg is modified by the store itself. + if (MI->mayStore() && isMatchingStore(FirstMI, MI) && + BaseReg == getLdStBaseOp(MI).getReg() && + isLdOffsetInRangeOfSt(FirstMI, MI) && + !ModifiedRegs[getLdStRegOp(MI).getReg()]) { + StoreI = MBBI; + return true; + } + + if (MI->isCall()) + return false; + + // Update modified / uses register lists. + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + + // Otherwise, if the base register is modified, we have no match, so + // return early. + if (ModifiedRegs[BaseReg]) + return false; + + // If we encounter a store aliased with the load, return early. + if (MI->mayStore() && mayAlias(FirstMI, MI, TII)) + return false; + } + return false; +} + /// findMatchingInsn - Scan the instructions looking for a load/store that can /// be combined with the current instruction into a load/store pair. MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, - bool &MergeForward, int &SExtIdx, - unsigned Limit) { + LdStPairFlags &Flags, unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; MachineInstr *FirstMI = I; @@ -520,21 +1089,27 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, unsigned Opc = FirstMI->getOpcode(); bool MayLoad = FirstMI->mayLoad(); - bool IsUnscaled = isUnscaledLdst(Opc); - unsigned Reg = FirstMI->getOperand(0).getReg(); - unsigned BaseReg = FirstMI->getOperand(1).getReg(); - int Offset = FirstMI->getOperand(2).getImm(); + bool IsUnscaled = isUnscaledLdSt(FirstMI); + unsigned Reg = getLdStRegOp(FirstMI).getReg(); + unsigned BaseReg = getLdStBaseOp(FirstMI).getReg(); + int Offset = getLdStOffsetOp(FirstMI).getImm(); + bool IsNarrowStore = isNarrowStore(Opc); + + // For narrow stores, find only the case where the stored value is WZR. + if (IsNarrowStore && Reg != AArch64::WZR) + return E; // Early exit if the first instruction modifies the base register. // e.g., ldr x0, [x0] - // Early exit if the offset if not possible to match. (6 bits of positive - // range, plus allow an extra one in case we find a later insn that matches - // with Offset-1 if (FirstMI->modifiesRegister(BaseReg, TRI)) return E; - int OffsetStride = - IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1; - if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) + + // Early exit if the offset if not possible to match. (6 bits of positive + // range, plus allow an extra one in case we find a later insn that matches + // with Offset-1) + int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1; + if (!(isNarrowLoad(Opc) || IsNarrowStore) && + !inBoundsForPair(IsUnscaled, Offset, OffsetStride)) return E; // Track which registers have been modified and used between the first insn @@ -557,18 +1132,19 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, ++Count; bool CanMergeOpc = Opc == MI->getOpcode(); - SExtIdx = -1; + Flags.setSExtIdx(-1); if (!CanMergeOpc) { bool IsValidLdStrOpc; unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc); - if (!IsValidLdStrOpc) - continue; + assert(IsValidLdStrOpc && + "Given Opc should be a Load or Store with an immediate"); // Opc will be the first instruction in the pair. - SExtIdx = NonSExtOpc == (unsigned)Opc ? 1 : 0; + Flags.setSExtIdx(NonSExtOpc == (unsigned)Opc ? 1 : 0); CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode()); } - if (CanMergeOpc && MI->getOperand(2).isImm()) { + if (CanMergeOpc && getLdStOffsetOp(MI).isImm()) { + assert(MI->mayLoadOrStore() && "Expected memory operation."); // If we've found another instruction with the same opcode, check to see // if the base and offset are compatible with our starting instruction. // These instructions all have scaled immediate operands, so we just @@ -579,8 +1155,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // Pairwise instructions have a 7-bit signed offset field. Single insns // have a 12-bit unsigned offset field. To be a valid combine, the // final offset must be in range. - unsigned MIBaseReg = MI->getOperand(1).getReg(); - int MIOffset = MI->getOperand(2).getImm(); + unsigned MIBaseReg = getLdStBaseOp(MI).getReg(); + int MIOffset = getLdStOffsetOp(MI).getImm(); if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || (Offset + OffsetStride == MIOffset))) { int MinOffset = Offset < MIOffset ? Offset : MIOffset; @@ -591,30 +1167,43 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, return E; // If the resultant immediate offset of merging these instructions // is out of range for a pairwise instruction, bail and keep looking. - bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode()); - if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) { + bool MIIsUnscaled = isUnscaledLdSt(MI); + bool IsNarrowLoad = isNarrowLoad(MI->getOpcode()); + if (!IsNarrowLoad && + !inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - if (MI->mayLoadOrStore()) - MemInsns.push_back(MI); + MemInsns.push_back(MI); continue; } - // If the alignment requirements of the paired (scaled) instruction - // can't express the offset of the unscaled input, bail and keep - // looking. - if (IsUnscaled && EnableAArch64UnscaledMemOp && - (alignTo(MinOffset, OffsetStride) != MinOffset)) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - if (MI->mayLoadOrStore()) + + if (IsNarrowLoad || IsNarrowStore) { + // If the alignment requirements of the scaled wide load/store + // instruction can't express the offset of the scaled narrow + // input, bail and keep looking. + if (!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) { + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); MemInsns.push_back(MI); - continue; + continue; + } + } else { + // If the alignment requirements of the paired (scaled) instruction + // can't express the offset of the unscaled input, bail and keep + // looking. + if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) { + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + MemInsns.push_back(MI); + continue; + } } // If the destination register of the loads is the same register, bail // and keep looking. A load-pair instruction with both destination // registers the same is UNPREDICTABLE and will result in an exception. - if (MayLoad && Reg == MI->getOperand(0).getReg()) { + // For narrow stores, allow only when the stored value is the same + // (i.e., WZR). + if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) || + (IsNarrowStore && Reg != getLdStRegOp(MI).getReg())) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - if (MI->mayLoadOrStore()) - MemInsns.push_back(MI); + MemInsns.push_back(MI); continue; } @@ -622,10 +1211,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // the two instructions and none of the instructions between the second // and first alias with the second, we can combine the second into the // first. - if (!ModifiedRegs[MI->getOperand(0).getReg()] && - !(MI->mayLoad() && UsedRegs[MI->getOperand(0).getReg()]) && + if (!ModifiedRegs[getLdStRegOp(MI).getReg()] && + !(MI->mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) && !mayAlias(MI, MemInsns, TII)) { - MergeForward = false; + Flags.setMergeForward(false); return MBBI; } @@ -633,11 +1222,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // between the two instructions and none of the instructions between the // first and the second alias with the first, we can combine the first // into the second. - if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] && - !(FirstMI->mayLoad() && - UsedRegs[FirstMI->getOperand(0).getReg()]) && + if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] && + !(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) && !mayAlias(FirstMI, MemInsns, TII)) { - MergeForward = true; + Flags.setMergeForward(true); return MBBI; } // Unable to combine these instructions due to interference in between. @@ -666,8 +1254,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, } MachineBasicBlock::iterator -AArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update) { +AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Update, + bool IsPreIdx) { assert((Update->getOpcode() == AArch64::ADDXri || Update->getOpcode() == AArch64::SUBXri) && "Unexpected base register update instruction to merge!"); @@ -680,20 +1269,36 @@ AArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, int Value = Update->getOperand(2).getImm(); assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && - "Can't merge 1 << 12 offset into pre-indexed load / store"); + "Can't merge 1 << 12 offset into pre-/post-indexed load / store"); if (Update->getOpcode() == AArch64::SUBXri) Value = -Value; - unsigned NewOpc = getPreIndexedOpcode(I->getOpcode()); - MachineInstrBuilder MIB = - BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) - .addOperand(Update->getOperand(0)) - .addOperand(I->getOperand(0)) - .addOperand(I->getOperand(1)) - .addImm(Value); + unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode()) + : getPostIndexedOpcode(I->getOpcode()); + MachineInstrBuilder MIB; + if (!isPairedLdSt(I)) { + // Non-paired instruction. + MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) + .addOperand(getLdStRegOp(Update)) + .addOperand(getLdStRegOp(I)) + .addOperand(getLdStBaseOp(I)) + .addImm(Value); + } else { + // Paired instruction. + int Scale = getMemScale(I); + MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) + .addOperand(getLdStRegOp(Update)) + .addOperand(getLdStRegOp(I, 0)) + .addOperand(getLdStRegOp(I, 1)) + .addOperand(getLdStBaseOp(I)) + .addImm(Value / Scale); + } (void)MIB; - DEBUG(dbgs() << "Creating pre-indexed load/store."); + if (IsPreIdx) + DEBUG(dbgs() << "Creating pre-indexed load/store."); + else + DEBUG(dbgs() << "Creating post-indexed load/store."); DEBUG(dbgs() << " Replacing instructions:\n "); DEBUG(I->print(dbgs())); DEBUG(dbgs() << " "); @@ -709,51 +1314,9 @@ AArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, return NextI; } -MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn( - MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update) { - assert((Update->getOpcode() == AArch64::ADDXri || - Update->getOpcode() == AArch64::SUBXri) && - "Unexpected base register update instruction to merge!"); - MachineBasicBlock::iterator NextI = I; - // Return the instruction following the merged instruction, which is - // the instruction following our unmerged load. Unless that's the add/sub - // instruction we're merging, in which case it's the one after that. - if (++NextI == Update) - ++NextI; - - int Value = Update->getOperand(2).getImm(); - assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && - "Can't merge 1 << 12 offset into post-indexed load / store"); - if (Update->getOpcode() == AArch64::SUBXri) - Value = -Value; - - unsigned NewOpc = getPostIndexedOpcode(I->getOpcode()); - MachineInstrBuilder MIB = - BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) - .addOperand(Update->getOperand(0)) - .addOperand(I->getOperand(0)) - .addOperand(I->getOperand(1)) - .addImm(Value); - (void)MIB; - - DEBUG(dbgs() << "Creating post-indexed load/store."); - DEBUG(dbgs() << " Replacing instructions:\n "); - DEBUG(I->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG(Update->print(dbgs())); - DEBUG(dbgs() << " with instruction:\n "); - DEBUG(((MachineInstr *)MIB)->print(dbgs())); - DEBUG(dbgs() << "\n"); - - // Erase the old instructions for the block. - I->eraseFromParent(); - Update->eraseFromParent(); - - return NextI; -} - -static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg, - int Offset) { +bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr *MemMI, + MachineInstr *MI, + unsigned BaseReg, int Offset) { switch (MI->getOpcode()) { default: break; @@ -769,44 +1332,65 @@ static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg, // Watch out for 1 << 12 shifted value. if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm())) break; - // If the instruction has the base register as source and dest and the - // immediate will fit in a signed 9-bit integer, then we have a match. - if (MI->getOperand(0).getReg() == BaseReg && - MI->getOperand(1).getReg() == BaseReg && - MI->getOperand(2).getImm() <= 255 && - MI->getOperand(2).getImm() >= -256) { - // If we have a non-zero Offset, we check that it matches the amount - // we're adding to the register. - if (!Offset || Offset == MI->getOperand(2).getImm()) - return true; + + // The update instruction source and destination register must be the + // same as the load/store base register. + if (MI->getOperand(0).getReg() != BaseReg || + MI->getOperand(1).getReg() != BaseReg) + break; + + bool IsPairedInsn = isPairedLdSt(MemMI); + int UpdateOffset = MI->getOperand(2).getImm(); + // For non-paired load/store instructions, the immediate must fit in a + // signed 9-bit integer. + if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256)) + break; + + // For paired load/store instructions, the immediate must be a multiple of + // the scaling factor. The scaled offset must also fit into a signed 7-bit + // integer. + if (IsPairedInsn) { + int Scale = getMemScale(MemMI); + if (UpdateOffset % Scale != 0) + break; + + int ScaledOffset = UpdateOffset / Scale; + if (ScaledOffset > 64 || ScaledOffset < -64) + break; } + + // If we have a non-zero Offset, we check that it matches the amount + // we're adding to the register. + if (!Offset || Offset == MI->getOperand(2).getImm()) + return true; break; } return false; } MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( - MachineBasicBlock::iterator I, unsigned Limit, int Value) { + MachineBasicBlock::iterator I, unsigned Limit, int UnscaledOffset) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineInstr *MemMI = I; MachineBasicBlock::iterator MBBI = I; - const MachineFunction &MF = *MemMI->getParent()->getParent(); - unsigned DestReg = MemMI->getOperand(0).getReg(); - unsigned BaseReg = MemMI->getOperand(1).getReg(); - int Offset = MemMI->getOperand(2).getImm() * - TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); + unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); + int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI); - // If the base register overlaps the destination register, we can't + // Scan forward looking for post-index opportunities. Updating instructions + // can't be formed if the memory instruction doesn't have the offset we're + // looking for. + if (MIUnscaledOffset != UnscaledOffset) + return E; + + // If the base register overlaps a destination register, we can't // merge the update. - if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) - return E; - - // Scan forward looking for post-index opportunities. - // Updating instructions can't be formed if the memory insn already - // has an offset other than the value we're looking for. - if (Offset != Value) - return E; + bool IsPairedInsn = isPairedLdSt(MemMI); + for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { + unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); + if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) + return E; + } // Track which registers have been modified and used between the first insn // (inclusive) and the second insn. @@ -825,7 +1409,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( ++Count; // If we found a match, return it. - if (isMatchingUpdateInsn(MI, BaseReg, Value)) + if (isMatchingUpdateInsn(I, MI, BaseReg, UnscaledOffset)) return MBBI; // Update the status of what the instruction clobbered and used. @@ -845,21 +1429,22 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( MachineBasicBlock::iterator E = I->getParent()->end(); MachineInstr *MemMI = I; MachineBasicBlock::iterator MBBI = I; - const MachineFunction &MF = *MemMI->getParent()->getParent(); - unsigned DestReg = MemMI->getOperand(0).getReg(); - unsigned BaseReg = MemMI->getOperand(1).getReg(); - int Offset = MemMI->getOperand(2).getImm(); - unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); + unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); + int Offset = getLdStOffsetOp(MemMI).getImm(); // If the load/store is the first instruction in the block, there's obviously // not any matching update. Ditto if the memory offset isn't zero. if (MBBI == B || Offset != 0) return E; - // If the base register overlaps the destination register, we can't + // If the base register overlaps a destination register, we can't // merge the update. - if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) - return E; + bool IsPairedInsn = isPairedLdSt(MemMI); + for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { + unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); + if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) + return E; + } // Track which registers have been modified and used between the first insn // (inclusive) and the second insn. @@ -878,7 +1463,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( ++Count; // If we found a match, return it. - if (isMatchingUpdateInsn(MI, BaseReg, RegSize)) + if (isMatchingUpdateInsn(I, MI, BaseReg, Offset)) return MBBI; // Update the status of what the instruction clobbered and used. @@ -892,17 +1477,101 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( return E; } -bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { +bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( + MachineBasicBlock::iterator &MBBI) { + MachineInstr *MI = MBBI; + // If this is a volatile load, don't mess with it. + if (MI->hasOrderedMemoryRef()) + return false; + + // Make sure this is a reg+imm. + // FIXME: It is possible to extend it to handle reg+reg cases. + if (!getLdStOffsetOp(MI).isImm()) + return false; + + // Look backward up to ScanLimit instructions. + MachineBasicBlock::iterator StoreI; + if (findMatchingStore(MBBI, ScanLimit, StoreI)) { + ++NumLoadsFromStoresPromoted; + // Promote the load. Keeping the iterator straight is a + // pain, so we let the merge routine tell us what the next instruction + // is after it's done mucking about. + MBBI = promoteLoadFromStore(MBBI, StoreI); + return true; + } + return false; +} + +bool AArch64LoadStoreOpt::tryToMergeLdStInst( + MachineBasicBlock::iterator &MBBI) { + MachineInstr *MI = MBBI; + MachineBasicBlock::iterator E = MI->getParent()->end(); + // If this is a volatile load/store, don't mess with it. + if (MI->hasOrderedMemoryRef()) + return false; + + // Make sure this is a reg+imm (as opposed to an address reloc). + if (!getLdStOffsetOp(MI).isImm()) + return false; + + // Check if this load/store has a hint to avoid pair formation. + // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. + if (TII->isLdStPairSuppressed(MI)) + return false; + + // Look ahead up to ScanLimit instructions for a pairable instruction. + LdStPairFlags Flags; + MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, Flags, ScanLimit); + if (Paired != E) { + if (isNarrowLoad(MI)) { + ++NumNarrowLoadsPromoted; + } else if (isNarrowStore(MI)) { + ++NumZeroStoresPromoted; + } else { + ++NumPairCreated; + if (isUnscaledLdSt(MI)) + ++NumUnscaledPairCreated; + } + + // Merge the loads into a pair. Keeping the iterator straight is a + // pain, so we let the merge routine tell us what the next instruction + // is after it's done mucking about. + MBBI = mergePairedInsns(MBBI, Paired, Flags); + return true; + } + return false; +} + +bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, + bool enableNarrowLdOpt) { bool Modified = false; - // Two tranformations to do here: - // 1) Find loads and stores that can be merged into a single load or store + // Three tranformations to do here: + // 1) Find loads that directly read from stores and promote them by + // replacing with mov instructions. If the store is wider than the load, + // the load will be replaced with a bitfield extract. + // e.g., + // str w1, [x0, #4] + // ldrh w2, [x0, #6] + // ; becomes + // str w1, [x0, #4] + // lsr w2, w1, #16 + // 2) Find narrow loads that can be converted into a single wider load + // with bitfield extract instructions. + // e.g., + // ldrh w0, [x2] + // ldrh w1, [x2, #2] + // ; becomes + // ldr w0, [x2] + // ubfx w1, w0, #16, #16 + // and w0, w0, #ffff + // 3) Find loads and stores that can be merged into a single load or store // pair instruction. // e.g., // ldr x0, [x2] // ldr x1, [x2, #8] // ; becomes // ldp x0, x1, [x2] - // 2) Find base register updates that can be merged into the load or store + // 4) Find base register updates that can be merged into the load or store // as a base-reg writeback. // e.g., // ldr x0, [x2] @@ -918,6 +1587,69 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { // Just move on to the next instruction. ++MBBI; break; + // Scaled instructions. + case AArch64::LDRBBui: + case AArch64::LDRHHui: + case AArch64::LDRWui: + case AArch64::LDRXui: + // Unscaled instructions. + case AArch64::LDURBBi: + case AArch64::LDURHHi: + case AArch64::LDURWi: + case AArch64::LDURXi: { + if (tryToPromoteLoadFromStore(MBBI)) { + Modified = true; + break; + } + ++MBBI; + break; + } + // FIXME: Do the other instructions. + } + } + + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + enableNarrowLdOpt && MBBI != E;) { + MachineInstr *MI = MBBI; + switch (MI->getOpcode()) { + default: + // Just move on to the next instruction. + ++MBBI; + break; + // Scaled instructions. + case AArch64::LDRBBui: + case AArch64::LDRHHui: + case AArch64::LDRSBWui: + case AArch64::LDRSHWui: + case AArch64::STRBBui: + case AArch64::STRHHui: + // Unscaled instructions. + case AArch64::LDURBBi: + case AArch64::LDURHHi: + case AArch64::LDURSBWi: + case AArch64::LDURSHWi: + case AArch64::STURBBi: + case AArch64::STURHHi: { + if (tryToMergeLdStInst(MBBI)) { + Modified = true; + break; + } + ++MBBI; + break; + } + // FIXME: Do the other instructions. + } + } + + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + MachineInstr *MI = MBBI; + switch (MI->getOpcode()) { + default: + // Just move on to the next instruction. + ++MBBI; + break; + // Scaled instructions. case AArch64::STRSui: case AArch64::STRDui: case AArch64::STRQui: @@ -929,7 +1661,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { case AArch64::LDRXui: case AArch64::LDRWui: case AArch64::LDRSWui: - // do the unscaled versions as well + // Unscaled instructions. case AArch64::STURSi: case AArch64::STURDi: case AArch64::STURQi: @@ -941,37 +1673,8 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { case AArch64::LDURWi: case AArch64::LDURXi: case AArch64::LDURSWi: { - // If this is a volatile load/store, don't mess with it. - if (MI->hasOrderedMemoryRef()) { - ++MBBI; - break; - } - // Make sure this is a reg+imm (as opposed to an address reloc). - if (!MI->getOperand(2).isImm()) { - ++MBBI; - break; - } - // Check if this load/store has a hint to avoid pair formation. - // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. - if (TII->isLdStPairSuppressed(MI)) { - ++MBBI; - break; - } - // Look ahead up to ScanLimit instructions for a pairable instruction. - bool MergeForward = false; - int SExtIdx = -1; - MachineBasicBlock::iterator Paired = - findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit); - if (Paired != E) { - // Merge the loads into a pair. Keeping the iterator straight is a - // pain, so we let the merge routine tell us what the next instruction - // is after it's done mucking about. - MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx); - + if (tryToMergeLdStInst(MBBI)) { Modified = true; - ++NumPairCreated; - if (isUnscaledLdst(MI->getOpcode())) - ++NumUnscaledPairCreated; break; } ++MBBI; @@ -992,17 +1695,22 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { // Just move on to the next instruction. ++MBBI; break; + // Scaled instructions. case AArch64::STRSui: case AArch64::STRDui: case AArch64::STRQui: case AArch64::STRXui: case AArch64::STRWui: + case AArch64::STRHHui: + case AArch64::STRBBui: case AArch64::LDRSui: case AArch64::LDRDui: case AArch64::LDRQui: case AArch64::LDRXui: case AArch64::LDRWui: - // do the unscaled versions as well + case AArch64::LDRHHui: + case AArch64::LDRBBui: + // Unscaled instructions. case AArch64::STURSi: case AArch64::STURDi: case AArch64::STURQi: @@ -1012,25 +1720,41 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { case AArch64::LDURDi: case AArch64::LDURQi: case AArch64::LDURWi: - case AArch64::LDURXi: { + case AArch64::LDURXi: + // Paired instructions. + case AArch64::LDPSi: + case AArch64::LDPSWi: + case AArch64::LDPDi: + case AArch64::LDPQi: + case AArch64::LDPWi: + case AArch64::LDPXi: + case AArch64::STPSi: + case AArch64::STPDi: + case AArch64::STPQi: + case AArch64::STPWi: + case AArch64::STPXi: { // Make sure this is a reg+imm (as opposed to an address reloc). - if (!MI->getOperand(2).isImm()) { + if (!getLdStOffsetOp(MI).isImm()) { ++MBBI; break; } - // Look ahead up to ScanLimit instructions for a mergable instruction. + // Look forward to try to form a post-index instruction. For example, + // ldr x0, [x20] + // add x20, x20, #32 + // merged into: + // ldr x0, [x20], #32 MachineBasicBlock::iterator Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, 0); if (Update != E) { // Merge the update into the ld/st. - MBBI = mergePostIdxUpdateInsn(MBBI, Update); + MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false); Modified = true; ++NumPostFolded; break; } // Don't know how to handle pre/post-index versions, so move to the next // instruction. - if (isUnscaledLdst(Opc)) { + if (isUnscaledLdSt(Opc)) { ++MBBI; break; } @@ -1043,28 +1767,25 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit); if (Update != E) { // Merge the update into the ld/st. - MBBI = mergePreIdxUpdateInsn(MBBI, Update); + MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); Modified = true; ++NumPreFolded; break; } + // The immediate in the load/store is scaled by the size of the memory + // operation. The immediate in the add we're looking for, + // however, is not, so adjust here. + int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI); // Look forward to try to find a post-index instruction. For example, // ldr x1, [x0, #64] // add x0, x0, #64 // merged into: // ldr x1, [x0, #64]! - - // The immediate in the load/store is scaled by the size of the register - // being loaded. The immediate in the add we're looking for, - // however, is not, so adjust here. - int Value = MI->getOperand(2).getImm() * - TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent())) - ->getSize(); - Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value); + Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, UnscaledOffset); if (Update != E) { // Merge the update into the ld/st. - MBBI = mergePreIdxUpdateInsn(MBBI, Update); + MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); Modified = true; ++NumPreFolded; break; @@ -1081,13 +1802,24 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { return Modified; } +bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) { + bool ProfitableArch = Subtarget->isCortexA57(); + // FIXME: The benefit from converting narrow loads into a wider load could be + // microarchitectural as it assumes that a single load with two bitfield + // extracts is cheaper than two narrow loads. Currently, this conversion is + // enabled only in cortex-a57 on which performance benefits were verified. + return ProfitableArch && !Subtarget->requiresStrictAlign(); +} + bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - TII = static_cast(Fn.getSubtarget().getInstrInfo()); - TRI = Fn.getSubtarget().getRegisterInfo(); + Subtarget = &static_cast(Fn.getSubtarget()); + TII = static_cast(Subtarget->getInstrInfo()); + TRI = Subtarget->getRegisterInfo(); bool Modified = false; + bool enableNarrowLdOpt = enableNarrowLdMerge(Fn); for (auto &MBB : Fn) - Modified |= optimizeBlock(MBB); + Modified |= optimizeBlock(MBB, enableNarrowLdOpt); return Modified; } @@ -1095,8 +1827,8 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep // loads and stores near one another? -/// createARMLoadStoreOptimizationPass - returns an instance of the load / store -/// optimization pass. +/// createAArch64LoadStoreOptimizationPass - returns an instance of the +/// load / store optimization pass. FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() { return new AArch64LoadStoreOpt(); } diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index 580427ab3cc1..2b4cdf1083be 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -207,9 +207,9 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO, void AArch64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + for (const MachineOperand &MO : MI->operands()) { MCOperand MCOp; - if (lowerOperand(MI->getOperand(i), MCOp)) + if (lowerOperand(MO, MCOp)) OutMI.addOperand(MCOp); } } diff --git a/lib/Target/AArch64/AArch64MachineCombinerPattern.h b/lib/Target/AArch64/AArch64MachineCombinerPattern.h deleted file mode 100644 index 4164b3364559..000000000000 --- a/lib/Target/AArch64/AArch64MachineCombinerPattern.h +++ /dev/null @@ -1,42 +0,0 @@ -//===- AArch64MachineCombinerPattern.h -===// -//===- AArch64 instruction pattern supported by combiner -===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines instruction pattern supported by combiner -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINECOMBINERPATTERN_H -#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINECOMBINERPATTERN_H - -namespace llvm { - -/// Enumeration of instruction pattern supported by machine combiner -/// -/// -namespace MachineCombinerPattern { -enum MC_PATTERN : int { - MC_NONE = 0, - MC_MULADDW_OP1 = 1, - MC_MULADDW_OP2 = 2, - MC_MULSUBW_OP1 = 3, - MC_MULSUBW_OP2 = 4, - MC_MULADDWI_OP1 = 5, - MC_MULSUBWI_OP1 = 6, - MC_MULADDX_OP1 = 7, - MC_MULADDX_OP2 = 8, - MC_MULSUBX_OP1 = 9, - MC_MULSUBX_OP2 = 10, - MC_MULADDXI_OP1 = 11, - MC_MULSUBXI_OP1 = 12 -}; -} // end namespace MachineCombinerPattern -} // end namespace llvm - -#endif diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 536a8d0f97a0..318f83953505 100644 --- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -1,4 +1,4 @@ -//=- AArch64MachineFuctionInfo.h - AArch64 machine function info --*- C++ -*-=// +//=- AArch64MachineFunctionInfo.h - AArch64 machine function info -*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -42,7 +42,7 @@ class AArch64FunctionInfo : public MachineFunctionInfo { unsigned ArgumentStackToRestore; /// HasStackFrame - True if this function has a stack frame. Set by - /// processFunctionBeforeCalleeSavedScan(). + /// determineCalleeSaves(). bool HasStackFrame; /// \brief Amount of stack frame size, not including callee-saved registers. @@ -72,16 +72,22 @@ class AArch64FunctionInfo : public MachineFunctionInfo { /// registers. unsigned VarArgsFPRSize; + /// True if this function has a subset of CSRs that is handled explicitly via + /// copies. + bool IsSplitCSR; + public: AArch64FunctionInfo() : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0), - VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) {} + VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0), + IsSplitCSR(false) {} explicit AArch64FunctionInfo(MachineFunction &MF) : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0), - VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) { + VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0), + IsSplitCSR(false) { (void)MF; } @@ -96,6 +102,9 @@ public: bool hasStackFrame() const { return HasStackFrame; } void setHasStackFrame(bool s) { HasStackFrame = s; } + bool isSplitCSR() const { return IsSplitCSR; } + void setIsSplitCSR(bool s) { IsSplitCSR = s; } + void setLocalStackSize(unsigned Size) { LocalStackSize = Size; } unsigned getLocalStackSize() const { return LocalStackSize; } diff --git a/lib/Target/AArch64/AArch64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp index e1b93bf07c89..79c09d9f058d 100644 --- a/lib/Target/AArch64/AArch64PromoteConstant.cpp +++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp @@ -489,7 +489,7 @@ bool AArch64PromoteConstant::insertDefinitions( for (const auto &IPI : InsertPts) { // Create the load of the global variable. - IRBuilder<> Builder(IPI.first->getParent(), IPI.first); + IRBuilder<> Builder(IPI.first); LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV); DEBUG(dbgs() << "**********\n"); DEBUG(dbgs() << "New def: "); @@ -540,7 +540,7 @@ bool AArch64PromoteConstant::runOnFunction(Function &F) { bool LocalChange = false; SmallPtrSet AlreadyChecked; - for (Instruction &I : inst_range(&F)) { + for (Instruction &I : instructions(&F)) { // Traverse the operand, looking for constant vectors. Replace them by a // load of a global variable of constant vector type. for (Value *Op : I.operand_values()) { diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 841af55f7a65..32b4888f2f64 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -15,6 +15,7 @@ #include "AArch64RegisterInfo.h" #include "AArch64FrameLowering.h" #include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" @@ -34,10 +35,6 @@ using namespace llvm; #define GET_REGINFO_TARGET_DESC #include "AArch64GenRegisterInfo.inc" -static cl::opt -ReserveX18("aarch64-reserve-x18", cl::Hidden, - cl::desc("Reserve X18, making it unavailable as GPR")); - AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT) : AArch64GenRegisterInfo(AArch64::LR), TT(TT) {} @@ -50,10 +47,23 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_AArch64_NoRegs_SaveList; if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) return CSR_AArch64_AllRegs_SaveList; + if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS) + return MF->getInfo()->isSplitCSR() ? + CSR_AArch64_CXX_TLS_Darwin_PE_SaveList : + CSR_AArch64_CXX_TLS_Darwin_SaveList; else return CSR_AArch64_AAPCS_SaveList; } +const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy( + const MachineFunction *MF) const { + assert(MF && "Invalid MachineFunction pointer."); + if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getInfo()->isSplitCSR()) + return CSR_AArch64_CXX_TLS_Darwin_ViaCopy_SaveList; + return nullptr; +} + const uint32_t * AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { @@ -62,6 +72,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, return CSR_AArch64_NoRegs_RegMask; if (CC == CallingConv::AnyReg) return CSR_AArch64_AllRegs_RegMask; + if (CC == CallingConv::CXX_FAST_TLS) + return CSR_AArch64_CXX_TLS_Darwin_RegMask; else return CSR_AArch64_AAPCS_RegMask; } @@ -104,7 +116,7 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AArch64::W29); } - if (TT.isOSDarwin() || ReserveX18) { + if (MF.getSubtarget().isX18Reserved()) { Reserved.set(AArch64::X18); // Platform register Reserved.set(AArch64::W18); } @@ -131,7 +143,7 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF, return true; case AArch64::X18: case AArch64::W18: - return TT.isOSDarwin() || ReserveX18; + return MF.getSubtarget().isX18Reserved(); case AArch64::FP: case AArch64::W29: return TFI->hasFP(MF) || TT.isOSDarwin(); @@ -186,29 +198,6 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { return false; } -bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const { - - if (MF.getFunction()->hasFnAttribute("no-realign-stack")) - return false; - - return true; -} - -// FIXME: share this with other backends with identical implementation? -bool -AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const AArch64FrameLowering *TFI = getFrameLowering(MF); - const Function *F = MF.getFunction(); - unsigned StackAlign = TFI->getStackAlignment(); - bool requiresRealignment = - ((MFI->getMaxAlignment() > StackAlign) || - F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackAlignment)); - - return requiresRealignment && canRealignStack(MF); -} - unsigned AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const AArch64FrameLowering *TFI = getFrameLowering(MF); @@ -424,10 +413,11 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case AArch64::GPR64RegClassID: case AArch64::GPR32commonRegClassID: case AArch64::GPR64commonRegClassID: - return 32 - 1 // XZR/SP - - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP - - (TT.isOSDarwin() || ReserveX18) // X18 reserved as platform register - - hasBasePointer(MF); // X19 + return 32 - 1 // XZR/SP + - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP + - MF.getSubtarget() + .isX18Reserved() // X18 reserved as platform register + - hasBasePointer(MF); // X19 case AArch64::FPR8RegClassID: case AArch64::FPR16RegClassID: case AArch64::FPR32RegClassID: diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h index 8c379d926108..f33f788fd437 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/lib/Target/AArch64/AArch64RegisterInfo.h @@ -35,6 +35,8 @@ public: /// Code Generation virtual methods... const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const MCPhysReg * + getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override; @@ -93,9 +95,6 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; - // Base pointer (stack realignment) support. - bool canRealignStack(const MachineFunction &MF) const; - bool needsStackRealignment(const MachineFunction &MF) const override; }; } // end namespace llvm diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index b2efca023372..a8c8b176efa9 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -407,7 +407,7 @@ def FPR128 : RegisterClass<"AArch64", // The lower 16 vector registers. Some instructions can only take registers // in this range. def FPR128_lo : RegisterClass<"AArch64", - [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128, (trunc FPR128, 16)>; // Pairs, triples, and quads of 64-bit vector registers. diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index 486efd6ce3a2..f6ee8cf47a6a 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -31,6 +31,11 @@ static cl::opt EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " "converter pass"), cl::init(true), cl::Hidden); +// If OS supports TBI, use this flag to enable it. +static cl::opt +UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " + "an address is ignored"), cl::init(false), cl::Hidden); + AArch64Subtarget & AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) { // Determine default and user-specified characteristics @@ -46,9 +51,11 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM, bool LittleEndian) : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), - HasV8_1aOps(false), HasFPARMv8(false), HasNEON(false), HasCrypto(false), - HasCRC(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), - IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(), + HasV8_1aOps(false), HasV8_2aOps(false), HasFPARMv8(false), HasNEON(false), + HasCrypto(false), HasCRC(false), HasPerfMon(false), HasFullFP16(false), + HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), + StrictAlign(false), ReserveX18(TT.isOSDarwin()), IsLittle(LittleEndian), + CPUString(CPU), TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(), TLInfo(TM, *this) {} @@ -113,12 +120,30 @@ void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, // bi-directional scheduling. 253.perlbmk. Policy.OnlyTopDown = false; Policy.OnlyBottomUp = false; + // Enabling or Disabling the latency heuristic is a close call: It seems to + // help nearly no benchmark on out-of-order architectures, on the other hand + // it regresses register pressure on a few benchmarking. + if (isCyclone()) + Policy.DisableLatencyHeuristic = true; } bool AArch64Subtarget::enableEarlyIfConversion() const { return EnableEarlyIfConvert; } +bool AArch64Subtarget::supportsAddressTopByteIgnored() const { + if (!UseAddressTopByteIgnored) + return false; + + if (TargetTriple.isiOS()) { + unsigned Major, Minor, Micro; + TargetTriple.getiOSVersion(Major, Minor, Micro); + return Major >= 8; + } + + return false; +} + std::unique_ptr AArch64Subtarget::getCustomPBQPConstraints() const { if (!isCortexA57()) diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index 6bb069423060..1b8b9b27719c 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -33,17 +33,21 @@ class Triple; class AArch64Subtarget : public AArch64GenSubtargetInfo { protected: - enum ARMProcFamilyEnum {Others, CortexA53, CortexA57, Cyclone}; + enum ARMProcFamilyEnum {Others, CortexA35, CortexA53, CortexA57, Cyclone}; /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. ARMProcFamilyEnum ARMProcFamily; bool HasV8_1aOps; + bool HasV8_2aOps; bool HasFPARMv8; bool HasNEON; bool HasCrypto; bool HasCRC; + bool HasPerfMon; + bool HasFullFP16; + bool HasSPE; // HasZeroCycleRegMove - Has zero-cycle register mov instructions. bool HasZeroCycleRegMove; @@ -51,6 +55,12 @@ protected: // HasZeroCycleZeroing - Has zero-cycle zeroing instructions. bool HasZeroCycleZeroing; + // StrictAlign - Disallow unaligned memory accesses. + bool StrictAlign; + + // ReserveX18 - X18 is not available as a general purpose register. + bool ReserveX18; + bool IsLittle; /// CPUString - String name of used CPU. @@ -92,19 +102,30 @@ public: const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override { return true; } bool enablePostRAScheduler() const override { - return isCortexA53() || isCortexA57(); + return isGeneric() || isCortexA53() || isCortexA57(); } bool hasV8_1aOps() const { return HasV8_1aOps; } + bool hasV8_2aOps() const { return HasV8_2aOps; } bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; } bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } + bool requiresStrictAlign() const { return StrictAlign; } + + bool isX18Reserved() const { return ReserveX18; } bool hasFPARMv8() const { return HasFPARMv8; } bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } + /// CPU has TBI (top byte of addresses is ignored during HW address + /// translation) and OS enables it. + bool supportsAddressTopByteIgnored() const; + + bool hasPerfMon() const { return HasPerfMon; } + bool hasFullFP16() const { return HasFullFP16; } + bool hasSPE() const { return HasSPE; } bool isLittleEndian() const { return IsLittle; } @@ -112,11 +133,13 @@ public: bool isTargetIOS() const { return TargetTriple.isiOS(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetWindows() const { return TargetTriple.isOSWindows(); } + bool isTargetAndroid() const { return TargetTriple.isAndroid(); } bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } + bool isGeneric() const { return CPUString == "generic"; } bool isCyclone() const { return CPUString == "cyclone"; } bool isCortexA57() const { return CPUString == "cortex-a57"; } bool isCortexA53() const { return CPUString == "cortex-a53"; } diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index db6e244337a7..c52c5544fc7e 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -203,7 +203,7 @@ public: } // namespace TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](Function &F) { + return TargetIRAnalysis([this](const Function &F) { return TargetTransformInfo(AArch64TTIImpl(this, F)); }); } diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index e085cca35f1c..9af0e6444789 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -23,7 +23,7 @@ using namespace llvm; /// \brief Calculate the cost of materializing a 64-bit value. This helper /// method might only calculate a fraction of a larger immediate. Therefore it /// is valid to return a cost of ZERO. -unsigned AArch64TTIImpl::getIntImmCost(int64_t Val) { +int AArch64TTIImpl::getIntImmCost(int64_t Val) { // Check if the immediate can be encoded within an instruction. if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64)) return 0; @@ -37,7 +37,7 @@ unsigned AArch64TTIImpl::getIntImmCost(int64_t Val) { } /// \brief Calculate the cost of materializing the given constant. -unsigned AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -51,18 +51,18 @@ unsigned AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { // Split the constant into 64-bit chunks and calculate the cost for each // chunk. - unsigned Cost = 0; + int Cost = 0; for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) { APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64); int64_t Val = Tmp.getSExtValue(); Cost += getIntImmCost(Val); } // We need at least one instruction to materialze the constant. - return std::max(1U, Cost); + return std::max(1, Cost); } -unsigned AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) { +int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -118,17 +118,17 @@ unsigned AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, } if (Idx == ImmIdx) { - unsigned NumConstants = (BitSize + 63) / 64; - unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty); + int NumConstants = (BitSize + 63) / 64; + int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty); return (Cost <= NumConstants * TTI::TCC_Basic) - ? static_cast(TTI::TCC_Free) + ? static_cast(TTI::TCC_Free) : Cost; } return AArch64TTIImpl::getIntImmCost(Imm, Ty); } -unsigned AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { +int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -147,10 +147,10 @@ unsigned AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: if (Idx == 1) { - unsigned NumConstants = (BitSize + 63) / 64; - unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty); + int NumConstants = (BitSize + 63) / 64; + int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty); return (Cost <= NumConstants * TTI::TCC_Basic) - ? static_cast(TTI::TCC_Free) + ? static_cast(TTI::TCC_Free) : Cost; } break; @@ -176,8 +176,7 @@ AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) { return TTI::PSK_Software; } -unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) { +int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -187,7 +186,31 @@ unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, if (!SrcTy.isSimple() || !DstTy.isSimple()) return BaseT::getCastInstrCost(Opcode, Dst, Src); - static const TypeConversionCostTblEntry ConversionTbl[] = { + static const TypeConversionCostTblEntry + ConversionTbl[] = { + { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 }, + + // The number of shll instructions for the extension. + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, + // LowerVectorINT_TO_FP: { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, @@ -210,6 +233,16 @@ unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + // Complex: to v8f32 + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + + // Complex: to v16f32 + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 }, + // Complex: to v2f64 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, @@ -250,22 +283,21 @@ unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 }, }; - int Idx = ConvertCostTableLookup( - ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(), - SrcTy.getSimpleVT()); - if (Idx != -1) - return ConversionTbl[Idx].Cost; + if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD, + DstTy.getSimpleVT(), + SrcTy.getSimpleVT())) + return Entry->Cost; return BaseT::getCastInstrCost(Opcode, Dst, Src); } -unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) { +int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) { assert(Val->isVectorTy() && "This must be a vector type"); if (Index != -1U) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Val); + std::pair LT = TLI->getTypeLegalizationCost(DL, Val); // This type is legalized to a scalar type. if (!LT.second.isVector()) @@ -281,15 +313,15 @@ unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, } // All other insert/extracts cost this much. - return 2; + return 3; } -unsigned AArch64TTIImpl::getArithmeticInstrCost( +int AArch64TTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -300,10 +332,9 @@ unsigned AArch64TTIImpl::getArithmeticInstrCost( // normally expanded to the sequence ADD + CMP + SELECT + SRA. // The OperandValue properties many not be same as that of previous // operation; conservatively assume OP_None. - unsigned Cost = - getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info, - TargetTransformInfo::OP_None, - TargetTransformInfo::OP_None); + int Cost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info, + TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None); Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); @@ -331,7 +362,7 @@ unsigned AArch64TTIImpl::getArithmeticInstrCost( } } -unsigned AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { +int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { // Address computations in vectorized code with non-consecutive addresses will // likely result in more instructions compared to scalar code where the // computation can more often be merged into the index mode. The resulting @@ -346,19 +377,20 @@ unsigned AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { return 1; } -unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) { +int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) { int ISD = TLI->InstructionOpcodeToISD(Opcode); - // We don't lower vector selects well that are wider than the register width. + // We don't lower some vector selects well that are wider than the register + // width. if (ValTy->isVectorTy() && ISD == ISD::SELECT) { // We would need this many instructions to hide the scalarization happening. - const unsigned AmortizationCost = 20; - static const TypeConversionCostTblEntry + const int AmortizationCost = 20; + static const TypeConversionCostTblEntry VectorSelectTbl[] = { - { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost }, - { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost }, - { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost }, + { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 }, { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost }, { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost }, { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost } @@ -367,20 +399,18 @@ unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, EVT SelCondTy = TLI->getValueType(DL, CondTy); EVT SelValTy = TLI->getValueType(DL, ValTy); if (SelCondTy.isSimple() && SelValTy.isSimple()) { - int Idx = - ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(), - SelValTy.getSimpleVT()); - if (Idx != -1) - return VectorSelectTbl[Idx].Cost; + if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD, + SelCondTy.getSimpleVT(), + SelValTy.getSimpleVT())) + return Entry->Cost; } } return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); } -unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) { - std::pair LT = TLI->getTypeLegalizationCost(DL, Src); +int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, unsigned AddressSpace) { + std::pair LT = TLI->getTypeLegalizationCost(DL, Src); if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 && Src->getVectorElementType()->isIntegerTy(64)) { @@ -389,7 +419,7 @@ unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, // practice on inlined memcpy code. // We make v2i64 stores expensive so that we will only vectorize if there // are 6 other instructions getting vectorized. - unsigned AmortizationCost = 6; + int AmortizationCost = 6; return LT.first * 2 * AmortizationCost; } @@ -407,16 +437,18 @@ unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, return LT.first; } -unsigned AArch64TTIImpl::getInterleavedMemoryOpCost( - unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, - unsigned Alignment, unsigned AddressSpace) { +int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef Indices, + unsigned Alignment, + unsigned AddressSpace) { assert(Factor >= 2 && "Invalid interleave factor"); assert(isa(VecTy) && "Expect a vector type"); if (Factor <= TLI->getMaxSupportedInterleaveFactor()) { unsigned NumElts = VecTy->getVectorNumElements(); Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); - unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy); + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); // ldN/stN only support legal vector types of size 64 or 128 in bits. if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) @@ -427,8 +459,8 @@ unsigned AArch64TTIImpl::getInterleavedMemoryOpCost( Alignment, AddressSpace); } -unsigned AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef Tys) { - unsigned Cost = 0; +int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef Tys) { + int Cost = 0; for (auto *I : Tys) { if (!I->isVectorTy()) continue; @@ -506,7 +538,7 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, case Intrinsic::aarch64_neon_ld4: Info.ReadMem = true; Info.WriteMem = false; - Info.Vol = false; + Info.IsSimple = true; Info.NumMemRefs = 1; Info.PtrVal = Inst->getArgOperand(0); break; @@ -515,7 +547,7 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, case Intrinsic::aarch64_neon_st4: Info.ReadMem = false; Info.WriteMem = true; - Info.Vol = false; + Info.IsSimple = true; Info.NumMemRefs = 1; Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1); break; diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h index 444d3ccc15e1..ec58c4fe309f 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -48,7 +48,7 @@ class AArch64TTIImpl : public BasicTTIImplBase { }; public: - explicit AArch64TTIImpl(const AArch64TargetMachine *TM, Function &F) + explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} @@ -63,12 +63,11 @@ public: /// @{ using BaseT::getIntImmCost; - unsigned getIntImmCost(int64_t Val); - unsigned getIntImmCost(const APInt &Imm, Type *Ty); - unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty); - unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty); + int getIntImmCost(int64_t Val); + int getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); /// @} @@ -76,6 +75,8 @@ public: /// \name Vector TTI Implementations /// @{ + bool enableInterleavedAccessVectorization() { return true; } + unsigned getNumberOfRegisters(bool Vector) { if (Vector) { if (ST->hasNEON()) @@ -96,25 +97,25 @@ public: unsigned getMaxInterleaveFactor(unsigned VF); - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); - unsigned getArithmeticInstrCost( + int getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None); - unsigned getAddressComputationCost(Type *Ty, bool IsComplex); + int getAddressComputationCost(Type *Ty, bool IsComplex); - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace); + int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace); - unsigned getCostOfKeepingLiveOverCall(ArrayRef Tys); + int getCostOfKeepingLiveOverCall(ArrayRef Tys); void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); @@ -123,11 +124,9 @@ public: bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); - unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, - unsigned Factor, - ArrayRef Indices, - unsigned Alignment, - unsigned AddressSpace); + int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, + ArrayRef Indices, unsigned Alignment, + unsigned AddressSpace); /// @} }; diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 38e8b4d9a938..394c8e78581f 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -43,7 +43,6 @@ class AArch64Operand; class AArch64AsmParser : public MCTargetAsmParser { private: StringRef Mnemonic; ///< Instruction mnemonic. - MCSubtargetInfo &STI; // Map of register aliases registers via the .req directive. StringMap > RegisterReqs; @@ -101,6 +100,7 @@ private: OperandMatchResultTy tryParseSysReg(OperandVector &Operands); OperandMatchResultTy tryParseSysCROperand(OperandVector &Operands); OperandMatchResultTy tryParsePrefetch(OperandVector &Operands); + OperandMatchResultTy tryParsePSBHint(OperandVector &Operands); OperandMatchResultTy tryParseAdrpLabel(OperandVector &Operands); OperandMatchResultTy tryParseAdrLabel(OperandVector &Operands); OperandMatchResultTy tryParseFPImm(OperandVector &Operands); @@ -115,16 +115,16 @@ public: #define GET_OPERAND_DIAGNOSTIC_TYPES #include "AArch64GenAsmMatcher.inc" }; - AArch64AsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser, + AArch64AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(STI) { + : MCTargetAsmParser(Options, STI) { MCAsmParserExtension::Initialize(Parser); MCStreamer &S = getParser().getStreamer(); if (S.getTargetStreamer() == nullptr) new AArch64TargetStreamer(S); // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); } bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, @@ -160,7 +160,8 @@ private: k_Prefetch, k_ShiftExtend, k_FPImm, - k_Barrier + k_Barrier, + k_PSBHint, } Kind; SMLoc StartLoc, EndLoc; @@ -228,6 +229,12 @@ private: unsigned Length; }; + struct PSBHintOp { + unsigned Val; + const char *Data; + unsigned Length; + }; + struct ShiftExtendOp { AArch64_AM::ShiftExtendType Type; unsigned Amount; @@ -251,6 +258,7 @@ private: struct SysRegOp SysReg; struct SysCRImmOp SysCRImm; struct PrefetchOp Prefetch; + struct PSBHintOp PSBHint; struct ShiftExtendOp ShiftExtend; }; @@ -302,6 +310,9 @@ public: case k_Prefetch: Prefetch = o.Prefetch; break; + case k_PSBHint: + PSBHint = o.PSBHint; + break; case k_ShiftExtend: ShiftExtend = o.ShiftExtend; break; @@ -393,6 +404,16 @@ public: return Prefetch.Val; } + unsigned getPSBHint() const { + assert(Kind == k_PSBHint && "Invalid access!"); + return PSBHint.Val; + } + + StringRef getPSBHintName() const { + assert(Kind == k_PSBHint && "Invalid access!"); + return StringRef(PSBHint.Data, PSBHint.Length); + } + StringRef getPrefetchName() const { assert(Kind == k_Prefetch && "Invalid access!"); return StringRef(Prefetch.Data, Prefetch.Length); @@ -497,6 +518,15 @@ public: return (Val % Scale) == 0 && Val >= 0 && (Val / Scale) < 0x1000; } + bool isImm0_1() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val >= 0 && Val < 2); + } bool isImm0_7() const { if (!isImm()) return false; @@ -876,12 +906,15 @@ public: } bool isMSRSystemRegister() const { if (!isSysReg()) return false; - return SysReg.MSRReg != -1U; } - bool isSystemPStateField() const { + bool isSystemPStateFieldWithImm0_1() const { if (!isSysReg()) return false; - + return (SysReg.PStateField == AArch64PState::PAN || + SysReg.PStateField == AArch64PState::UAO); + } + bool isSystemPStateFieldWithImm0_15() const { + if (!isSysReg() || isSystemPStateFieldWithImm0_1()) return false; return SysReg.PStateField != -1U; } bool isReg() const override { return Kind == k_Register && !Reg.isVector; } @@ -950,6 +983,7 @@ public: } bool isSysCR() const { return Kind == k_SysCR; } bool isPrefetch() const { return Kind == k_Prefetch; } + bool isPSBHint() const { return Kind == k_PSBHint; } bool isShiftExtend() const { return Kind == k_ShiftExtend; } bool isShifter() const { if (!isShiftExtend()) @@ -1175,8 +1209,10 @@ public: template void addVectorList64Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - static unsigned FirstRegs[] = { AArch64::D0, AArch64::D0_D1, - AArch64::D0_D1_D2, AArch64::D0_D1_D2_D3 }; + static const unsigned FirstRegs[] = { AArch64::D0, + AArch64::D0_D1, + AArch64::D0_D1_D2, + AArch64::D0_D1_D2_D3 }; unsigned FirstReg = FirstRegs[NumRegs - 1]; Inst.addOperand( @@ -1186,8 +1222,10 @@ public: template void addVectorList128Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - static unsigned FirstRegs[] = { AArch64::Q0, AArch64::Q0_Q1, - AArch64::Q0_Q1_Q2, AArch64::Q0_Q1_Q2_Q3 }; + static const unsigned FirstRegs[] = { AArch64::Q0, + AArch64::Q0_Q1, + AArch64::Q0_Q1_Q2, + AArch64::Q0_Q1_Q2_Q3 }; unsigned FirstReg = FirstRegs[NumRegs - 1]; Inst.addOperand( @@ -1304,6 +1342,12 @@ public: Inst.addOperand(MCOperand::createImm(MCE->getValue() / 16)); } + void addImm0_1Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = cast(getImm()); + Inst.addOperand(MCOperand::createImm(MCE->getValue())); + } + void addImm0_7Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = cast(getImm()); @@ -1491,7 +1535,13 @@ public: Inst.addOperand(MCOperand::createImm(SysReg.MSRReg)); } - void addSystemPStateFieldOperands(MCInst &Inst, unsigned N) const { + void addSystemPStateFieldWithImm0_1Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::createImm(SysReg.PStateField)); + } + + void addSystemPStateFieldWithImm0_15Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createImm(SysReg.PStateField)); @@ -1507,6 +1557,11 @@ public: Inst.addOperand(MCOperand::createImm(getPrefetch())); } + void addPSBHintOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createImm(getPSBHint())); + } + void addShifterOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); unsigned Imm = @@ -1703,6 +1758,19 @@ public: return Op; } + static std::unique_ptr CreatePSBHint(unsigned Val, + StringRef Str, + SMLoc S, + MCContext &Ctx) { + auto Op = make_unique(k_PSBHint, Ctx); + Op->PSBHint.Val = Val; + Op->PSBHint.Data = Str.data(); + Op->PSBHint.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + static std::unique_ptr CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val, bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) { @@ -1776,6 +1844,10 @@ void AArch64Operand::print(raw_ostream &OS) const { OS << ""; break; } + case k_PSBHint: { + OS << getPSBHintName(); + break; + } case k_ShiftExtend: { OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #" << getShiftExtendAmount(); @@ -1849,6 +1921,8 @@ static bool isValidVectorKind(StringRef Name) { .Case(".h", true) .Case(".s", true) .Case(".d", true) + // Needed for fp16 scalar pairwise reductions + .Case(".2h", true) .Default(false); } @@ -2016,7 +2090,7 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) { bool Valid; auto Mapper = AArch64PRFM::PRFMMapper(); StringRef Name = - Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid); + Mapper.toString(MCE->getValue(), getSTI().getFeatureBits(), Valid); Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name, S, getContext())); return MatchOperand_Success; @@ -2030,7 +2104,7 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) { bool Valid; auto Mapper = AArch64PRFM::PRFMMapper(); unsigned prfop = - Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid); + Mapper.fromString(Tok.getString(), getSTI().getFeatureBits(), Valid); if (!Valid) { TokError("pre-fetch hint expected"); return MatchOperand_ParseFail; @@ -2042,6 +2116,32 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) { return MatchOperand_Success; } +/// tryParsePSBHint - Try to parse a PSB operand, mapped to Hint command +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParsePSBHint(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + SMLoc S = getLoc(); + const AsmToken &Tok = Parser.getTok(); + if (Tok.isNot(AsmToken::Identifier)) { + TokError("invalid operand for instruction"); + return MatchOperand_ParseFail; + } + + bool Valid; + auto Mapper = AArch64PSBHint::PSBHintMapper(); + unsigned psbhint = + Mapper.fromString(Tok.getString(), getSTI().getFeatureBits(), Valid); + if (!Valid) { + TokError("invalid operand for instruction"); + return MatchOperand_ParseFail; + } + + Parser.Lex(); // Eat identifier token. + Operands.push_back(AArch64Operand::CreatePSBHint(psbhint, Tok.getString(), + S, getContext())); + return MatchOperand_Success; +} + /// tryParseAdrpLabel - Parse and validate a source label for the ADRP /// instruction. AArch64AsmParser::OperandMatchResultTy @@ -2439,6 +2539,13 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, } else if (!Op.compare_lower("cisw")) { // SYS #0, C7, C14, #2 SYS_ALIAS(0, 7, 14, 2); + } else if (!Op.compare_lower("cvap")) { + if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) { + // SYS #3, C7, C12, #1 + SYS_ALIAS(3, 7, 12, 1); + } else { + return TokError("DC CVAP requires ARMv8.2a"); + } } else { return TokError("invalid operand for DC instruction"); } @@ -2479,6 +2586,20 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, } else if (!Op.compare_lower("s12e0w")) { // SYS #4, C7, C8, #7 SYS_ALIAS(4, 7, 8, 7); + } else if (!Op.compare_lower("s1e1rp")) { + if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) { + // SYS #0, C7, C9, #0 + SYS_ALIAS(0, 7, 9, 0); + } else { + return TokError("AT S1E1RP requires ARMv8.2a"); + } + } else if (!Op.compare_lower("s1e1wp")) { + if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) { + // SYS #0, C7, C9, #1 + SYS_ALIAS(0, 7, 9, 1); + } else { + return TokError("AT S1E1WP requires ARMv8.2a"); + } } else { return TokError("invalid operand for AT instruction"); } @@ -2644,7 +2765,7 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { bool Valid; auto Mapper = AArch64DB::DBarrierMapper(); StringRef Name = - Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid); + Mapper.toString(MCE->getValue(), getSTI().getFeatureBits(), Valid); Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name, ExprLoc, getContext())); return MatchOperand_Success; @@ -2658,7 +2779,7 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { bool Valid; auto Mapper = AArch64DB::DBarrierMapper(); unsigned Opt = - Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid); + Mapper.fromString(Tok.getString(), getSTI().getFeatureBits(), Valid); if (!Valid) { TokError("invalid barrier option name"); return MatchOperand_ParseFail; @@ -2687,20 +2808,21 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) { bool IsKnown; auto MRSMapper = AArch64SysReg::MRSMapper(); - uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), STI.getFeatureBits(), - IsKnown); + uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), + getSTI().getFeatureBits(), IsKnown); assert(IsKnown == (MRSReg != -1U) && "register should be -1 if and only if it's unknown"); auto MSRMapper = AArch64SysReg::MSRMapper(); - uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), STI.getFeatureBits(), - IsKnown); + uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), + getSTI().getFeatureBits(), IsKnown); assert(IsKnown == (MSRReg != -1U) && "register should be -1 if and only if it's unknown"); auto PStateMapper = AArch64PState::PStateMapper(); uint32_t PStateField = - PStateMapper.fromString(Tok.getString(), STI.getFeatureBits(), IsKnown); + PStateMapper.fromString(Tok.getString(), + getSTI().getFeatureBits(), IsKnown); assert(IsKnown == (PStateField != -1U) && "register should be -1 if and only if it's unknown"); @@ -3151,7 +3273,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, if (Operands.size() < 2 || !static_cast(*Operands[1]).isReg()) - return true; + return Error(Loc, "Only valid when first operand is register"); bool IsXReg = AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( @@ -3183,7 +3305,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, } // If it is a label or an imm that cannot fit in a movz, put it into CP. const MCExpr *CPLoc = - getTargetStreamer().addConstantPoolEntry(SubExprVal, IsXReg ? 8 : 4); + getTargetStreamer().addConstantPoolEntry(SubExprVal, IsXReg ? 8 : 4, Loc); Operands.push_back(AArch64Operand::CreateImm(CPLoc, S, E, Ctx)); return false; } @@ -3601,6 +3723,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) { return Error(Loc, "index must be a multiple of 8 in range [0, 32760]."); case Match_InvalidMemoryIndexed16: return Error(Loc, "index must be a multiple of 16 in range [0, 65520]."); + case Match_InvalidImm0_1: + return Error(Loc, "immediate must be an integer in range [0, 1]."); case Match_InvalidImm0_7: return Error(Loc, "immediate must be an integer in range [0, 7]."); case Match_InvalidImm0_15: @@ -3912,7 +4036,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, AArch64Operand &ImmOp = static_cast(*Operands[2]); if (RegOp.isReg() && ImmOp.isFPImm() && ImmOp.getFPImm() == (unsigned)-1) { unsigned zreg = - AArch64MCRegisterClasses[AArch64::FPR32RegClassID].contains( + !AArch64MCRegisterClasses[AArch64::FPR64RegClassID].contains( RegOp.getReg()) ? AArch64::WZR : AArch64::XZR; @@ -3929,10 +4053,27 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // If that fails, try against the alternate table containing long-form NEON: // "fadd v0.2s, v1.2s, v2.2s" - if (MatchResult != Match_Success) + if (MatchResult != Match_Success) { + // But first, save the short-form match result: we can use it in case the + // long-form match also fails. + auto ShortFormNEONErrorInfo = ErrorInfo; + auto ShortFormNEONMatchResult = MatchResult; + MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 0); + // Now, both matches failed, and the long-form match failed on the mnemonic + // suffix token operand. The short-form match failure is probably more + // relevant: use it instead. + if (MatchResult == Match_InvalidOperand && ErrorInfo == 1 && + Operands.size() > 1 && ((AArch64Operand &)*Operands[1]).isToken() && + ((AArch64Operand &)*Operands[1]).isTokenSuffix()) { + MatchResult = ShortFormNEONMatchResult; + ErrorInfo = ShortFormNEONErrorInfo; + } + } + + switch (MatchResult) { case Match_Success: { // Perform range checking and other semantic validations @@ -3944,7 +4085,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return true; Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst, STI); + Out.EmitInstruction(Inst, getSTI()); return false; } case Match_MissingFeature: { @@ -3966,6 +4107,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return showMatchError(IDLoc, MatchResult); case Match_InvalidOperand: { SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0ULL) { if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); @@ -4011,6 +4153,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidMemoryIndexed8SImm7: case Match_InvalidMemoryIndexed16SImm7: case Match_InvalidMemoryIndexedSImm9: + case Match_InvalidImm0_1: case Match_InvalidImm0_7: case Match_InvalidImm0_15: case Match_InvalidImm0_31: @@ -4083,7 +4226,7 @@ bool AArch64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { if (getParser().parseExpression(Value)) return true; - getParser().getStreamer().EmitValue(Value, Size); + getParser().getStreamer().EmitValue(Value, Size, L); if (getLexer().is(AsmToken::EndOfStatement)) break; @@ -4155,7 +4298,7 @@ bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) { Inst.setOpcode(AArch64::TLSDESCCALL); Inst.addOperand(MCOperand::createExpr(Expr)); - getParser().getStreamer().EmitInstruction(Inst, STI); + getParser().getStreamer().EmitInstruction(Inst, getSTI()); return false; } diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index db9fb0e775df..f1f968e73123 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -1516,6 +1516,10 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst, uint64_t pstate_field = (op1 << 3) | op2; + if ((pstate_field == AArch64PState::PAN || + pstate_field == AArch64PState::UAO) && crm > 1) + return Fail; + Inst.addOperand(MCOperand::createImm(pstate_field)); Inst.addOperand(MCOperand::createImm(crm)); diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 7f56c2cf6bb8..d8a810824370 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -55,7 +56,7 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, unsigned Opcode = MI->getOpcode(); if (Opcode == AArch64::SYSxt) - if (printSysAlias(MI, O)) { + if (printSysAlias(MI, STI, O)) { printAnnotation(O, Annot); return; } @@ -269,7 +270,7 @@ struct LdStNInstrDesc { int NaturalOffset; }; -static LdStNInstrDesc LdStNInstInfo[] = { +static const LdStNInstrDesc LdStNInstInfo[] = { { AArch64::LD1i8, "ld1", ".b", 1, true, 0 }, { AArch64::LD1i16, "ld1", ".h", 1, true, 0 }, { AArch64::LD1i32, "ld1", ".s", 1, true, 0 }, @@ -612,7 +613,7 @@ static LdStNInstrDesc LdStNInstInfo[] = { { AArch64::ST4Fourv2s_POST, "st4", ".2s", 1, false, 32 }, }; -static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) { +static const LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) { unsigned Idx; for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx) if (LdStNInstInfo[Idx].Opcode == Opcode) @@ -641,7 +642,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - if (LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) { + if (const LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) { O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t'; // Now onto the operands: first a vector list with possible lane @@ -674,7 +675,9 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, AArch64InstPrinter::printInst(MI, O, Annot, STI); } -bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) { +bool AArch64InstPrinter::printSysAlias(const MCInst *MI, + const MCSubtargetInfo &STI, + raw_ostream &O) { #ifndef NDEBUG unsigned Opcode = MI->getOpcode(); assert(Opcode == AArch64::SYSxt && "Invalid opcode for SYS alias!"); @@ -729,6 +732,11 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) { if (Op1Val == 3 && Op2Val == 1) Asm = "dc\tcvau"; break; + case 12: + if (Op1Val == 3 && Op2Val == 1 && + (STI.getFeatureBits()[AArch64::HasV8_2aOps])) + Asm = "dc\tcvap"; + break; case 14: if (Op1Val == 3 && Op2Val == 1) Asm = "dc\tcivac"; @@ -773,6 +781,21 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) { break; } break; + case 9: + switch (Op1Val) { + default: + break; + case 0: + if (STI.getFeatureBits()[AArch64::HasV8_2aOps]) { + switch (Op2Val) { + default: + break; + case 0: Asm = "at\ts1e1rp"; break; + case 1: Asm = "at\ts1e1wp"; break; + } + } + break; + } } } else if (CnVal == 8) { // TLBI aliases @@ -1122,6 +1145,19 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum, O << '#' << prfop; } +void AArch64InstPrinter::printPSBHintOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + unsigned psbhintop = MI->getOperand(OpNum).getImm(); + bool Valid; + StringRef Name = + AArch64PSBHint::PSBHintMapper().toString(psbhintop, STI.getFeatureBits(), Valid); + if (Valid) + O << Name; + else + O << '#' << psbhintop; +} + void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index 15dee978e229..ea68d9848b42 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -15,14 +15,10 @@ #define LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H #include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCSubtargetInfo.h" namespace llvm { -class MCOperand; - class AArch64InstPrinter : public MCInstPrinter { public: AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, @@ -48,7 +44,8 @@ public: unsigned AltIdx = AArch64::NoRegAltName); protected: - bool printSysAlias(const MCInst *MI, raw_ostream &O); + bool printSysAlias(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); // Operand printers void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); @@ -122,6 +119,9 @@ protected: void printPrefetchOp(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + void printPSBHintOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printFPImmOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h index ed24343a6f2a..648b1dfc8c5e 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h @@ -364,6 +364,32 @@ static inline float getFPImmFloat(unsigned Imm) { return FPUnion.F; } +/// getFP16Imm - Return an 8-bit floating-point version of the 16-bit +/// floating-point value. If the value cannot be represented as an 8-bit +/// floating-point value, then return -1. +static inline int getFP16Imm(const APInt &Imm) { + uint32_t Sign = Imm.lshr(15).getZExtValue() & 1; + int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15; // -14 to 15 + int32_t Mantissa = Imm.getZExtValue() & 0x3ff; // 10 bits + + // We can handle 4 bits of mantissa. + // mantissa = (16+UInt(e:f:g:h))/16. + if (Mantissa & 0x3f) + return -1; + Mantissa >>= 6; + + // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 + if (Exp < -3 || Exp > 4) + return -1; + Exp = ((Exp+3) & 0x7) ^ 4; + + return ((int)Sign << 7) | (Exp << 4) | Mantissa; +} + +static inline int getFP16Imm(const APFloat &FPImm) { + return getFP16Imm(FPImm.bitcastToAPInt()); +} + /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit /// floating-point value. If the value cannot be represented as an 8-bit /// floating-point value, then return -1. diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 16d53569b231..d26604f5765d 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -128,10 +128,9 @@ public: /// This is one of the functions used to emit data into an ELF section, so the /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) /// if necessary. - void EmitValueImpl(const MCExpr *Value, unsigned Size, - const SMLoc &Loc) override { + void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override { EmitDataMappingSymbol(); - MCELFStreamer::EmitValueImpl(Value, Size); + MCELFStreamer::EmitValueImpl(Value, Size, Loc); } private: diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index 921c4b94a729..fbce26e1d9a1 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -48,10 +48,6 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() { UseDataRegionDirectives = true; ExceptionsType = ExceptionHandling::DwarfCFI; - - // AArch64 Darwin doesn't have the baggage of X86/ARM, so it's fine to use - // LShr instead of AShr. - UseLogicalShr = true; } const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol( diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index 28703419514a..a540f49866a9 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -85,13 +85,13 @@ void AArch64MCExpr::visitUsedExpr(MCStreamer &Streamer) const { Streamer.visitUsedExpr(*getSubExpr()); } -MCSection *AArch64MCExpr::findAssociatedSection() const { +MCFragment *AArch64MCExpr::findAssociatedFragment() const { llvm_unreachable("FIXME: what goes here?"); } bool AArch64MCExpr::evaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout, - const MCFixup *Fixup) const { + const MCAsmLayout *Layout, + const MCFixup *Fixup) const { if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup)) return false; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h index 1165314e4105..db36a65564ce 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -149,11 +149,10 @@ public: void visitUsedExpr(MCStreamer &Streamer) const override; - MCSection *findAssociatedSection() const override; + MCFragment *findAssociatedFragment() const override; - bool evaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout, - const MCFixup *Fixup) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, + const MCFixup *Fixup) const override; void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; @@ -162,7 +161,6 @@ public: } static bool classof(const AArch64MCExpr *) { return true; } - }; } // end namespace llvm diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index 741b273073e4..61c96f1d93c1 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -90,9 +90,11 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo( Log2Size = llvm::Log2_32(4); // This encompasses the relocation for the whole 21-bit value. switch (Sym->getKind()) { - default: - Asm.getContext().reportFatalError(Fixup.getLoc(), - "ADR/ADRP relocations must be GOT relative"); + default: { + Asm.getContext().reportError(Fixup.getLoc(), + "ADR/ADRP relocations must be GOT relative"); + return false; + } case MCSymbolRefExpr::VK_PAGE: RelocType = unsigned(MachO::ARM64_RELOC_PAGE21); return true; @@ -170,25 +172,25 @@ void AArch64MachObjectWriter::recordRelocation( // assembler local symbols. If we got here, that's not what we have, // so complain loudly. if (Kind == AArch64::fixup_aarch64_pcrel_branch19) { - Asm.getContext().reportFatalError(Fixup.getLoc(), - "conditional branch requires assembler-local" - " label. '" + - Target.getSymA()->getSymbol().getName() + - "' is external."); + Asm.getContext().reportError(Fixup.getLoc(), + "conditional branch requires assembler-local" + " label. '" + + Target.getSymA()->getSymbol().getName() + + "' is external."); return; } // 14-bit branch relocations should only target internal labels, and so // should never get here. if (Kind == AArch64::fixup_aarch64_pcrel_branch14) { - Asm.getContext().reportFatalError(Fixup.getLoc(), - "Invalid relocation on conditional branch!"); + Asm.getContext().reportError(Fixup.getLoc(), + "Invalid relocation on conditional branch!"); return; } if (!getAArch64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size, - Asm)) { - Asm.getContext().reportFatalError(Fixup.getLoc(), "unknown AArch64 fixup kind!"); + Asm)) { + Asm.getContext().reportError(Fixup.getLoc(), "unknown AArch64 fixup kind!"); return; } @@ -200,8 +202,9 @@ void AArch64MachObjectWriter::recordRelocation( Type = MachO::ARM64_RELOC_UNSIGNED; if (IsPCRel) { - Asm.getContext().reportFatalError(Fixup.getLoc(), - "PC relative absolute relocation!"); + Asm.getContext().reportError(Fixup.getLoc(), + "PC relative absolute relocation!"); + return; // FIXME: x86_64 sets the type to a branch reloc here. Should we do // something similar? @@ -229,16 +232,20 @@ void AArch64MachObjectWriter::recordRelocation( Writer->addRelocation(A_Base, Fragment->getParent(), MRE); return; } else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || - Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) + Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) { // Otherwise, neither symbol can be modified. - Asm.getContext().reportFatalError(Fixup.getLoc(), - "unsupported relocation of modified symbol"); + Asm.getContext().reportError(Fixup.getLoc(), + "unsupported relocation of modified symbol"); + return; + } // We don't support PCrel relocations of differences. - if (IsPCRel) - Asm.getContext().reportFatalError(Fixup.getLoc(), - "unsupported pc-relative relocation of " - "difference"); + if (IsPCRel) { + Asm.getContext().reportError(Fixup.getLoc(), + "unsupported pc-relative relocation of " + "difference"); + return; + } // AArch64 always uses external relocations. If there is no symbol to use as // a base address (a local symbol with no preceding non-local symbol), @@ -246,20 +253,26 @@ void AArch64MachObjectWriter::recordRelocation( // // FIXME: We should probably just synthesize an external symbol and use // that. - if (!A_Base) - Asm.getContext().reportFatalError( + if (!A_Base) { + Asm.getContext().reportError( Fixup.getLoc(), "unsupported relocation of local symbol '" + A->getName() + "'. Must have non-local symbol earlier in section."); - if (!B_Base) - Asm.getContext().reportFatalError( + return; + } + if (!B_Base) { + Asm.getContext().reportError( Fixup.getLoc(), "unsupported relocation of local symbol '" + B->getName() + "'. Must have non-local symbol earlier in section."); + return; + } - if (A_Base == B_Base && A_Base) - Asm.getContext().reportFatalError(Fixup.getLoc(), - "unsupported relocation with identical base"); + if (A_Base == B_Base && A_Base) { + Asm.getContext().reportError( + Fixup.getLoc(), "unsupported relocation with identical base"); + return; + } Value += (!A->getFragment() ? 0 : Writer->getSymbolAddress(*A, Layout)) - (!A_Base || !A_Base->getFragment() ? 0 : Writer->getSymbolAddress( @@ -309,10 +322,12 @@ void AArch64MachObjectWriter::recordRelocation( // we need to preserve and merge with the new Target? How about // the FixedValue? if (!Symbol->getVariableValue()->evaluateAsRelocatable(Target, &Layout, - &Fixup)) - Asm.getContext().reportFatalError(Fixup.getLoc(), - "unable to resolve variable '" + - Symbol->getName() + "'"); + &Fixup)) { + Asm.getContext().reportError(Fixup.getLoc(), + "unable to resolve variable '" + + Symbol->getName() + "'"); + return; + } return recordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, FixedValue); } @@ -337,11 +352,13 @@ void AArch64MachObjectWriter::recordRelocation( Value += Layout.getSymbolOffset(*Symbol) - Layout.getSymbolOffset(*Base); } else if (Symbol->isInSection()) { - if (!CanUseLocalRelocation) - Asm.getContext().reportFatalError( + if (!CanUseLocalRelocation) { + Asm.getContext().reportError( Fixup.getLoc(), "unsupported relocation of local symbol '" + Symbol->getName() + "'. Must have non-local symbol earlier in section."); + return; + } // Adjust the relocation to be section-relative. // The index is the section ordinal (1-based). const MCSection &Sec = Symbol->getSection(); @@ -361,9 +378,10 @@ void AArch64MachObjectWriter::recordRelocation( return; } } - Asm.getContext().reportFatalError(Fixup.getLoc(), + Asm.getContext().reportError(Fixup.getLoc(), "unsupported relocation of variable '" + Symbol->getName() + "'"); + return; } } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp index 52b000d15b8d..3e86a42d5be6 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp @@ -26,8 +26,9 @@ AArch64TargetStreamer::~AArch64TargetStreamer() {} // The constant pool handling is shared by all AArch64TargetStreamer // implementations. const MCExpr *AArch64TargetStreamer::addConstantPoolEntry(const MCExpr *Expr, - unsigned Size) { - return ConstantPools->addEntry(Streamer, Expr, Size); + unsigned Size, + SMLoc Loc) { + return ConstantPools->addEntry(Streamer, Expr, Size, Loc); } void AArch64TargetStreamer::emitCurrentConstantPool() { diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h index fcc0d053f6e2..51432830f795 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h @@ -24,7 +24,7 @@ public: /// Callback used to implement the ldr= pseudo. /// Add a new entry to the constant pool for the current section and return an /// MCExpr that can be used to refer to the constant pool location. - const MCExpr *addConstantPoolEntry(const MCExpr *, unsigned Size); + const MCExpr *addConstantPoolEntry(const MCExpr *, unsigned Size, SMLoc Loc); /// Callback used to implemnt the .ltorg directive. /// Emit contents of constant pool for the current section. diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index ee85b65bf39a..78f5289ec26d 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -146,11 +146,22 @@ const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings // v8.1a "Privileged Access Never" extension-specific PStates {"pan", PAN, {AArch64::HasV8_1aOps}}, + + // v8.2a + {"uao", UAO, {AArch64::HasV8_2aOps}}, }; AArch64PState::PStateMapper::PStateMapper() : AArch64NamedImmMapper(PStateMappings, 0) {} +const AArch64NamedImmMapper::Mapping AArch64PSBHint::PSBHintMapper::PSBHintMappings[] = { + // v8.2a "Statistical Profiling" extension-specific PSB operand + {"csync", CSync, {AArch64::FeatureSPE}}, +}; + +AArch64PSBHint::PSBHintMapper::PSBHintMapper() + : AArch64NamedImmMapper(PSBHintMappings, 0) {} + const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = { {"mdccsr_el0", MDCCSR_EL0, {}}, {"dbgdtrrx_el0", DBGDTRRX_EL0, {}}, @@ -192,6 +203,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = { {"id_aa64isar1_el1", ID_A64ISAR1_EL1, {}}, {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1, {}}, {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1, {}}, + {"id_aa64mmfr2_el1", ID_A64MMFR2_EL1, {AArch64::HasV8_2aOps}}, {"mvfr0_el1", MVFR0_EL1, {}}, {"mvfr1_el1", MVFR1_EL1, {}}, {"mvfr2_el1", MVFR2_EL1, {}}, @@ -275,9 +287,6 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = { {"icc_sgi1r_el1", ICC_SGI1R_EL1, {}}, {"icc_asgi1r_el1", ICC_ASGI1R_EL1, {}}, {"icc_sgi0r_el1", ICC_SGI0R_EL1, {}}, - - // v8.1a "Privileged Access Never" extension-specific system registers - {"pan", PAN, {AArch64::HasV8_1aOps}}, }; AArch64SysReg::MSRMapper::MSRMapper() { @@ -804,6 +813,24 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings {"cntv_cval_el02", CNTV_CVAL_EL02, {AArch64::HasV8_1aOps}}, {"spsr_el12", SPSR_EL12, {AArch64::HasV8_1aOps}}, {"elr_el12", ELR_EL12, {AArch64::HasV8_1aOps}}, + + // v8.2a registers + {"uao", UAO, {AArch64::HasV8_2aOps}}, + + // v8.2a "Statistical Profiling extension" registers + {"pmblimitr_el1", PMBLIMITR_EL1, {AArch64::FeatureSPE}}, + {"pmbptr_el1", PMBPTR_EL1, {AArch64::FeatureSPE}}, + {"pmbsr_el1", PMBSR_EL1, {AArch64::FeatureSPE}}, + {"pmbidr_el1", PMBIDR_EL1, {AArch64::FeatureSPE}}, + {"pmscr_el2", PMSCR_EL2, {AArch64::FeatureSPE}}, + {"pmscr_el12", PMSCR_EL12, {AArch64::FeatureSPE}}, + {"pmscr_el1", PMSCR_EL1, {AArch64::FeatureSPE}}, + {"pmsicr_el1", PMSICR_EL1, {AArch64::FeatureSPE}}, + {"pmsirr_el1", PMSIRR_EL1, {AArch64::FeatureSPE}}, + {"pmsfcr_el1", PMSFCR_EL1, {AArch64::FeatureSPE}}, + {"pmsevfr_el1", PMSEVFR_EL1, {AArch64::FeatureSPE}}, + {"pmslatfr_el1", PMSLATFR_EL1, {AArch64::FeatureSPE}}, + {"pmsidr_el1", PMSIDR_EL1, {AArch64::FeatureSPE}}, }; uint32_t diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 7e42f8e3601e..f649cb9b8a8d 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -337,7 +337,9 @@ namespace AArch64AT { S12E1R = 0x63c4, // 01 100 0111 1000 100 S12E1W = 0x63c5, // 01 100 0111 1000 101 S12E0R = 0x63c6, // 01 100 0111 1000 110 - S12E0W = 0x63c7 // 01 100 0111 1000 111 + S12E0W = 0x63c7, // 01 100 0111 1000 111 + S1E1RP = 0x43c8, // 01 000 0111 1001 000 + S1E1WP = 0x43c9 // 01 000 0111 1001 001 }; struct ATMapper : AArch64NamedImmMapper { @@ -463,6 +465,9 @@ namespace AArch64PState { // v8.1a "Privileged Access Never" extension-specific PStates PAN = 0x04, + + // v8.2a "User Access Override" extension-specific PStates + UAO = 0x03 }; struct PStateMapper : AArch64NamedImmMapper { @@ -473,6 +478,21 @@ namespace AArch64PState { } +namespace AArch64PSBHint { + enum PSBHintValues { + Invalid = -1, + // v8.2a "Statistical Profiling" extension-specific PSB operands + CSync = 0x11, // psb csync = hint #0x11 + }; + + struct PSBHintMapper : AArch64NamedImmMapper { + const static Mapping PSBHintMappings[]; + + PSBHintMapper(); + }; + +} + namespace AArch64SE { enum ShiftExtSpecifiers { Invalid = -1, @@ -594,6 +614,7 @@ namespace AArch64SysReg { ID_A64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001 ID_A64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000 ID_A64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001 + ID_A64MMFR2_EL1 = 0xc03a, // 11 000 0000 0111 010 MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000 MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001 MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010 @@ -1190,6 +1211,24 @@ namespace AArch64SysReg { SPSR_EL12 = 0xea00, // 11 101 0100 0000 000 ELR_EL12 = 0xea01, // 11 101 0100 0000 001 + // v8.2a registers + UAO = 0xc214, // 11 000 0100 0010 100 + + // v8.2a "Statistical Profiling extension" registers + PMBLIMITR_EL1 = 0xc4d0, // 11 000 1001 1010 000 + PMBPTR_EL1 = 0xc4d1, // 11 000 1001 1010 001 + PMBSR_EL1 = 0xc4d3, // 11 000 1001 1010 011 + PMBIDR_EL1 = 0xc4d7, // 11 000 1001 1010 111 + PMSCR_EL2 = 0xe4c8, // 11 100 1001 1001 000 + PMSCR_EL12 = 0xecc8, // 11 101 1001 1001 000 + PMSCR_EL1 = 0xc4c8, // 11 000 1001 1001 000 + PMSICR_EL1 = 0xc4ca, // 11 000 1001 1001 010 + PMSIRR_EL1 = 0xc4cb, // 11 000 1001 1001 011 + PMSFCR_EL1 = 0xc4cc, // 11 000 1001 1001 100 + PMSEVFR_EL1 = 0xc4cd, // 11 000 1001 1001 101 + PMSLATFR_EL1 = 0xc4ce, // 11 000 1001 1001 110 + PMSIDR_EL1 = 0xc4cf, // 11 000 1001 1001 111 + // Cyclone specific system registers CPM_IOACC_CTL_EL3 = 0xff90, }; diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h index 0a05d25189b0..8c3cb567fc7e 100644 --- a/lib/Target/AMDGPU/AMDGPU.h +++ b/lib/Target/AMDGPU/AMDGPU.h @@ -44,15 +44,21 @@ FunctionPass *createSIShrinkInstructionsPass(); FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm); FunctionPass *createSILowerControlFlowPass(TargetMachine &tm); FunctionPass *createSIFixControlFlowLiveIntervalsPass(); -FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm); +FunctionPass *createSIFixSGPRCopiesPass(); FunctionPass *createSIFixSGPRLiveRangesPass(); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); FunctionPass *createSIInsertWaits(TargetMachine &tm); -FunctionPass *createSIPrepareScratchRegs(); + +ModulePass *createAMDGPUAnnotateKernelFeaturesPass(); +void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); +extern char &AMDGPUAnnotateKernelFeaturesID; void initializeSIFoldOperandsPass(PassRegistry &); extern char &SIFoldOperandsID; +void initializeSIFixSGPRCopiesPass(PassRegistry &); +extern char &SIFixSGPRCopiesID; + void initializeSILowerI1CopiesPass(PassRegistry &); extern char &SILowerI1CopiesID; @@ -64,6 +70,8 @@ FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST); Pass *createAMDGPUStructurizeCFGPass(); FunctionPass *createAMDGPUISelDag(TargetMachine &tm); ModulePass *createAMDGPUAlwaysInlinePass(); +ModulePass *createAMDGPUOpenCLImageTypeLoweringPass(); +FunctionPass *createAMDGPUAnnotateUniformValues(); void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&); extern char &SIFixControlFlowLiveIntervalsID; @@ -71,6 +79,8 @@ extern char &SIFixControlFlowLiveIntervalsID; void initializeSIFixSGPRLiveRangesPass(PassRegistry&); extern char &SIFixSGPRLiveRangesID; +void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); +extern char &AMDGPUAnnotateUniformValuesPassID; extern Target TheAMDGPUTarget; extern Target TheGCNTarget; @@ -85,8 +95,6 @@ enum TargetIndex { }; } -#define END_OF_TEXT_LABEL_NAME "EndOfTextLabel" - } // End namespace llvm namespace ShaderType { diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 68b50504ee44..d4af8d2e48d1 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -108,6 +108,11 @@ def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <"unsafe-ds-offset-fol "true", "Force using DS instruction immediate offsets on SI">; +def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", + "FlatForGlobal", + "true", + "Force to generate flat instruction for global">; + def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", "FlatAddressSpace", "true", @@ -272,9 +277,14 @@ def isSICI : Predicate< "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" >, AssemblerPredicate<"FeatureGCN1Encoding">; +def isVI : Predicate < + "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, + AssemblerPredicate<"FeatureGCN3Encoding">; + class PredicateControl { Predicate SubtargetPredicate; Predicate SIAssemblerPredicate = isSICI; + Predicate VIAssemblerPredicate = isVI; list AssemblerPredicates = []; Predicate AssemblerPredicate = TruePredicate; list OtherPredicates = []; diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp new file mode 100644 index 000000000000..378183927242 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -0,0 +1,126 @@ +//===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This pass adds target attributes to functions which use intrinsics +/// which will impact calling convention lowering. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" + +#define DEBUG_TYPE "amdgpu-annotate-kernel-features" + +using namespace llvm; + +namespace { + +class AMDGPUAnnotateKernelFeatures : public ModulePass { +private: + void addAttrToCallers(Function *Intrin, StringRef AttrName); + bool addAttrsForIntrinsics(Module &M, ArrayRef); + +public: + static char ID; + + AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { } + bool runOnModule(Module &M) override; + const char *getPassName() const override { + return "AMDGPU Annotate Kernel Features"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + ModulePass::getAnalysisUsage(AU); + } +}; + +} + +char AMDGPUAnnotateKernelFeatures::ID = 0; + +char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; + + +INITIALIZE_PASS_BEGIN(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, + "Add AMDGPU function attributes", false, false) +INITIALIZE_PASS_END(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, + "Add AMDGPU function attributes", false, false) + + +void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin, + StringRef AttrName) { + SmallPtrSet SeenFuncs; + + for (User *U : Intrin->users()) { + // CallInst is the only valid user for an intrinsic. + CallInst *CI = cast(U); + + Function *CallingFunction = CI->getParent()->getParent(); + if (SeenFuncs.insert(CallingFunction).second) + CallingFunction->addFnAttr(AttrName); + } +} + +bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics( + Module &M, + ArrayRef IntrinsicToAttr) { + bool Changed = false; + + for (const StringRef *Arr : IntrinsicToAttr) { + if (Function *Fn = M.getFunction(Arr[0])) { + addAttrToCallers(Fn, Arr[1]); + Changed = true; + } + } + + return Changed; +} + +bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { + Triple TT(M.getTargetTriple()); + + static const StringRef IntrinsicToAttr[][2] = { + // .x omitted + { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" }, + { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" }, + + // .x omitted + { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" }, + { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" } + + }; + + static const StringRef HSAIntrinsicToAttr[][2] = { + { "llvm.r600.read.local.size.x", "amdgpu-dispatch-ptr" }, + { "llvm.r600.read.local.size.y", "amdgpu-dispatch-ptr" }, + { "llvm.r600.read.local.size.z", "amdgpu-dispatch-ptr" }, + + { "llvm.r600.read.global.size.x", "amdgpu-dispatch-ptr" }, + { "llvm.r600.read.global.size.y", "amdgpu-dispatch-ptr" }, + { "llvm.r600.read.global.size.z", "amdgpu-dispatch-ptr" }, + { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" } + }; + + // TODO: Intrinsics that require queue ptr. + + // We do not need to note the x workitem or workgroup id because they are + // always initialized. + + bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr); + if (TT.getOS() == Triple::AMDHSA) + Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr); + + return Changed; +} + +ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { + return new AMDGPUAnnotateKernelFeatures(); +} diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp new file mode 100644 index 000000000000..dfddc345f286 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -0,0 +1,84 @@ +//===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass adds amdgpu.uniform metadata to IR values so this information +/// can be used during instruction selection. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUIntrinsicInfo.h" +#include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "amdgpu-annotate-uniform" + +using namespace llvm; + +namespace { + +class AMDGPUAnnotateUniformValues : public FunctionPass, + public InstVisitor { + DivergenceAnalysis *DA; + +public: + static char ID; + AMDGPUAnnotateUniformValues() : + FunctionPass(ID) { } + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; + const char *getPassName() const override { return "AMDGPU Annotate Uniform Values"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesAll(); + } + + void visitLoadInst(LoadInst &I); + +}; + +} // End anonymous namespace + +INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, + "Add AMDGPU uniform metadata", false, false) +INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) +INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, + "Add AMDGPU uniform metadata", false, false) + +char AMDGPUAnnotateUniformValues::ID = 0; + +void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { + Value *Ptr = I.getPointerOperand(); + if (!DA->isUniform(Ptr)) + return; + + if (Instruction *PtrI = dyn_cast(Ptr)) + PtrI->setMetadata("amdgpu.uniform", MDNode::get(I.getContext(), {})); + +} + +bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { + return false; +} + +bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) { + DA = &getAnalysis(); + visit(F); + + return true; +} + +FunctionPass * +llvm::createAMDGPUAnnotateUniformValues() { + return new AMDGPUAnnotateUniformValues(); +} diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 0a5309b16ee5..ba71dc05a8fc 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -100,14 +100,63 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { } } -void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { +void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { + const SIMachineFunctionInfo *MFI = MF->getInfo(); + const AMDGPUSubtarget &STM = MF->getSubtarget(); + if (MFI->isKernel() && STM.isAmdHsaOS()) { + AMDGPUTargetStreamer *TS = + static_cast(OutStreamer->getTargetStreamer()); + TS->EmitAMDGPUSymbolType(CurrentFnSym->getName(), + ELF::STT_AMDGPU_HSA_KERNEL); + } - // This label is used to mark the end of the .text section. - const TargetLoweringObjectFile &TLOF = getObjFileLowering(); - OutStreamer->SwitchSection(TLOF.getTextSection()); - MCSymbol *EndOfTextLabel = - OutContext.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME)); - OutStreamer->EmitLabel(EndOfTextLabel); + AsmPrinter::EmitFunctionEntryLabel(); +} + +static bool isModuleLinkage(const GlobalValue *GV) { + switch (GV->getLinkage()) { + case GlobalValue::InternalLinkage: + case GlobalValue::CommonLinkage: + return true; + case GlobalValue::ExternalLinkage: + return false; + default: llvm_unreachable("unknown linkage type"); + } +} + +void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + + if (TM.getTargetTriple().getOS() != Triple::AMDHSA) { + AsmPrinter::EmitGlobalVariable(GV); + return; + } + + if (GV->isDeclaration() || GV->getLinkage() == GlobalValue::PrivateLinkage) { + AsmPrinter::EmitGlobalVariable(GV); + return; + } + + // Group segment variables aren't emitted in HSA. + if (AMDGPU::isGroupSegment(GV)) + return; + + AMDGPUTargetStreamer *TS = + static_cast(OutStreamer->getTargetStreamer()); + if (isModuleLinkage(GV)) { + TS->EmitAMDGPUHsaModuleScopeGlobal(GV->getName()); + } else { + TS->EmitAMDGPUHsaProgramScopeGlobal(GV->getName()); + } + + const DataLayout &DL = getDataLayout(); + OutStreamer->PushSection(); + OutStreamer->SwitchSection( + getObjFileLowering().SectionForGlobal(GV, *Mang, TM)); + MCSymbol *GVSym = getSymbol(GV); + const Constant *C = GV->getInitializer(); + OutStreamer->EmitLabel(GVSym); + EmitGlobalConstant(DL, C); + OutStreamer->PopSection(); } bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { @@ -125,8 +174,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { const AMDGPUSubtarget &STM = MF.getSubtarget(); SIProgramInfo KernelInfo; if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { + getSIProgramInfo(KernelInfo, MF); if (!STM.isAmdHsaOS()) { - getSIProgramInfo(KernelInfo, MF); EmitProgramInfoSI(MF, KernelInfo); } // Emit directives @@ -165,6 +214,23 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { false); OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize), false); + + OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " + + Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)), + false); + OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " + + Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)), + false); + OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " + + Twine(G_00B84C_TGID_Y_EN(KernelInfo.ComputePGMRSrc2)), + false); + OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " + + Twine(G_00B84C_TGID_Z_EN(KernelInfo.ComputePGMRSrc2)), + false); + OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " + + Twine(G_00B84C_TIDIG_COMP_CNT(KernelInfo.ComputePGMRSrc2)), + false); + } else { R600MachineFunctionInfo *MFI = MF.getInfo(); OutStreamer->emitRawComment( @@ -278,27 +344,30 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, unsigned width = 0; bool isSGPR = false; - if (!MO.isReg()) { + if (!MO.isReg()) continue; - } - unsigned reg = MO.getReg(); - if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO || - reg == AMDGPU::VCC_HI) { - VCCUsed = true; - continue; - } else if (reg == AMDGPU::FLAT_SCR || - reg == AMDGPU::FLAT_SCR_LO || - reg == AMDGPU::FLAT_SCR_HI) { - FlatUsed = true; - continue; - } + unsigned reg = MO.getReg(); switch (reg) { - default: break; - case AMDGPU::SCC: case AMDGPU::EXEC: + case AMDGPU::SCC: case AMDGPU::M0: continue; + + case AMDGPU::VCC: + case AMDGPU::VCC_LO: + case AMDGPU::VCC_HI: + VCCUsed = true; + continue; + + case AMDGPU::FLAT_SCR: + case AMDGPU::FLAT_SCR_LO: + case AMDGPU::FLAT_SCR_HI: + FlatUsed = true; + continue; + + default: + break; } if (AMDGPU::SReg_32RegClass.contains(reg)) { @@ -348,11 +417,15 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, } } - if (VCCUsed) + if (VCCUsed || FlatUsed) MaxSGPR += 2; - if (FlatUsed) + if (FlatUsed) { MaxSGPR += 2; + // 2 additional for VI+. + if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + MaxSGPR += 2; + } // We found the maximum register index. They start at 0, so add one to get the // number of registers. @@ -368,6 +441,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; } + if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) { + LLVMContext &Ctx = MF.getFunction()->getContext(); + Ctx.emitError("too many user SGPRs used"); + } + ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4; ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8; // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode @@ -419,18 +497,27 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, S_00B848_FLOAT_MODE(ProgInfo.FloatMode) | S_00B848_PRIV(ProgInfo.Priv) | S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) | - S_00B848_IEEE_MODE(ProgInfo.DebugMode) | + S_00B848_DEBUG_MODE(ProgInfo.DebugMode) | S_00B848_IEEE_MODE(ProgInfo.IEEEMode); + // 0 = X, 1 = XY, 2 = XYZ + unsigned TIDIGCompCnt = 0; + if (MFI->hasWorkItemIDZ()) + TIDIGCompCnt = 2; + else if (MFI->hasWorkItemIDY()) + TIDIGCompCnt = 1; + ProgInfo.ComputePGMRSrc2 = S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) | - S_00B84C_USER_SGPR(MFI->NumUserSGPRs) | - S_00B84C_TGID_X_EN(1) | - S_00B84C_TGID_Y_EN(1) | - S_00B84C_TGID_Z_EN(1) | - S_00B84C_TG_SIZE_EN(1) | - S_00B84C_TIDIG_COMP_CNT(2) | - S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks); + S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) | + S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) | + S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) | + S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) | + S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) | + S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) | + S_00B84C_EXCP_EN_MSB(0) | + S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks) | + S_00B84C_EXCP_EN(0); } static unsigned getRsrcReg(unsigned ShaderType) { @@ -491,14 +578,53 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, header.compute_pgm_resource_registers = KernelInfo.ComputePGMRSrc1 | (KernelInfo.ComputePGMRSrc2 << 32); - header.code_properties = - AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR | - AMD_CODE_PROPERTY_IS_PTR64; + header.code_properties = AMD_CODE_PROPERTY_IS_PTR64; + + if (MFI->hasPrivateSegmentBuffer()) { + header.code_properties |= + AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER; + } + + if (MFI->hasDispatchPtr()) + header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; + + if (MFI->hasQueuePtr()) + header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; + + if (MFI->hasKernargSegmentPtr()) + header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR; + + if (MFI->hasDispatchID()) + header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID; + + if (MFI->hasFlatScratchInit()) + header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; + + // TODO: Private segment size + + if (MFI->hasGridWorkgroupCountX()) { + header.code_properties |= + AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X; + } + + if (MFI->hasGridWorkgroupCountY()) { + header.code_properties |= + AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y; + } + + if (MFI->hasGridWorkgroupCountZ()) { + header.code_properties |= + AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z; + } + + if (MFI->hasDispatchPtr()) + header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; header.kernarg_segment_byte_size = MFI->ABIArgOffset; header.wavefront_sgpr_count = KernelInfo.NumSGPR; header.workitem_vgpr_count = KernelInfo.NumVGPR; - + header.workitem_private_segment_byte_size = KernelInfo.ScratchSize; + header.workgroup_group_segment_byte_size = KernelInfo.LDSSize; AMDGPUTargetStreamer *TS = static_cast(OutStreamer->getTargetStreamer()); diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 345af9b85e15..817cbfc0c0eb 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -99,7 +99,9 @@ public: void EmitFunctionBodyStart() override; - void EmitEndOfAsmFile(Module &M) override; + void EmitFunctionEntryLabel() override; + + void EmitGlobalVariable(const GlobalVariable *GV) override; bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, diff --git a/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.cpp b/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.cpp new file mode 100644 index 000000000000..2f6b3022dd6e --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.cpp @@ -0,0 +1,26 @@ +//===-- AMDGPUDiagnosticInfoUnsupported.cpp -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUDiagnosticInfoUnsupported.h" + +using namespace llvm; + +DiagnosticInfoUnsupported::DiagnosticInfoUnsupported( + const Function &Fn, + const Twine &Desc, + DiagnosticSeverity Severity) + : DiagnosticInfo(getKindID(), Severity), + Description(Desc), + Fn(Fn) { } + +int DiagnosticInfoUnsupported::KindID = 0; + +void DiagnosticInfoUnsupported::print(DiagnosticPrinter &DP) const { + DP << "unsupported " << getDescription() << " in " << Fn.getName(); +} diff --git a/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.h b/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.h new file mode 100644 index 000000000000..0fd37e1ede6b --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.h @@ -0,0 +1,48 @@ +//===-- AMDGPUDiagnosticInfoUnsupported.h - Error reporting -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUDIAGNOSTICINFOUNSUPPORTED_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUDIAGNOSTICINFOUNSUPPORTED_H + +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" + +namespace llvm { + +/// Diagnostic information for unimplemented or unsupported feature reporting. +class DiagnosticInfoUnsupported : public DiagnosticInfo { +private: + const Twine &Description; + const Function &Fn; + + static int KindID; + + static int getKindID() { + if (KindID == 0) + KindID = llvm::getNextAvailablePluginDiagnosticKind(); + return KindID; + } + +public: + DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc, + DiagnosticSeverity Severity = DS_Error); + + const Function &getFunction() const { return Fn; } + const Twine &getDescription() const { return Description; } + + void print(DiagnosticPrinter &DP) const override; + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == getKindID(); + } +}; + +} + +#endif diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp index 8175786fb9b1..4d84d281d998 100644 --- a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp @@ -71,9 +71,15 @@ unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const { } /// \returns The number of registers allocated for \p FI. -int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { +int AMDGPUFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + + // Fill in FrameReg output argument. + FrameReg = RI->getFrameRegister(MF); + // Start the offset at 2 so we don't overwrite work group information. // XXX: We should only do this when the shader actually uses this // information. diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/lib/Target/AMDGPU/AMDGPUFrameLowering.h index 9f31be1af794..257a3da40589 100644 --- a/lib/Target/AMDGPU/AMDGPUFrameLowering.h +++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.h @@ -8,14 +8,12 @@ //===----------------------------------------------------------------------===// // /// \file -/// \brief Interface to describe a layout of a stack frame on a AMDIL target -/// machine. +/// \brief Interface to describe a layout of a stack frame on an AMDGPU target. // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H -#define LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { @@ -34,7 +32,8 @@ public: /// \returns The number of 32-bit sub-registers that are used when storing /// values to the stack. unsigned getStackWidth(const MachineFunction &MF) const; - int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override; void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 64c54ccb31ff..b33040b4d06a 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -11,6 +11,8 @@ /// \brief Defines an instruction selector for the AMDGPU target. // //===----------------------------------------------------------------------===// + +#include "AMDGPUDiagnosticInfoUnsupported.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUISelLowering.h" // For AMDGPUISD #include "AMDGPURegisterInfo.h" @@ -20,9 +22,9 @@ #include "SIISelLowering.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Function.h" @@ -40,12 +42,14 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can // make the right decision when generating code for different targets. const AMDGPUSubtarget *Subtarget; + public: AMDGPUDAGToDAGISel(TargetMachine &TM); virtual ~AMDGPUDAGToDAGISel(); bool runOnMachineFunction(MachineFunction &MF) override; SDNode *Select(SDNode *N) override; const char *getPassName() const override; + void PreprocessISelDAG() override; void PostprocessISelDAG() override; private: @@ -91,7 +95,7 @@ private: bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, SDValue &Offset1) const; - void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, + bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, SDValue &TFE) const; @@ -108,6 +112,16 @@ private: SDValue &TFE) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &GLC) const; + bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, + bool &Imm) const; + bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, + bool &Imm) const; + bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; + bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; + bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; + bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; + bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; + bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; SDNode *SelectAddrSpaceCast(SDNode *N); bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; @@ -273,6 +287,23 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { return N; } +static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { + switch (NumVectorElts) { + case 1: + return AMDGPU::SReg_32RegClassID; + case 2: + return AMDGPU::SReg_64RegClassID; + case 4: + return AMDGPU::SReg_128RegClassID; + case 8: + return AMDGPU::SReg_256RegClassID; + case 16: + return AMDGPU::SReg_512RegClassID; + } + + llvm_unreachable("invalid vector size"); +} + SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { @@ -306,38 +337,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { EVT EltVT = VT.getVectorElementType(); assert(EltVT.bitsEq(MVT::i32)); if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { - bool UseVReg = true; - for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); - U != E; ++U) { - if (!U->isMachineOpcode()) { - continue; - } - const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); - if (!RC) { - continue; - } - if (static_cast(TRI)->isSGPRClass(RC)) { - UseVReg = false; - } - } - switch(NumVectorElts) { - case 1: RegClassID = UseVReg ? AMDGPU::VGPR_32RegClassID : - AMDGPU::SReg_32RegClassID; - break; - case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID : - AMDGPU::SReg_64RegClassID; - break; - case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID : - AMDGPU::SReg_128RegClassID; - break; - case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID : - AMDGPU::SReg_256RegClassID; - break; - case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID : - AMDGPU::SReg_512RegClassID; - break; - default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); - } + RegClassID = selectSGPRVectorRegClassID(NumVectorElts); } else { // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG // that adds a 128 bits reg copy when going through TwoAddressInstructions @@ -455,98 +455,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, N->getValueType(0), Ops); } - - case ISD::LOAD: { - LoadSDNode *LD = cast(N); - SDLoc SL(N); - EVT VT = N->getValueType(0); - - if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) { - N = glueCopyToM0(N); - break; - } - - // To simplify the TableGen patters, we replace all i64 loads with - // v2i32 loads. Alternatively, we could promote i64 loads to v2i32 - // during DAG legalization, however, so places (ExpandUnalignedLoad) - // in the DAG legalizer assume that if i64 is legal, so doing this - // promotion early can cause problems. - - SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(), - LD->getBasePtr(), LD->getMemOperand()); - SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL, - MVT::i64, NewLoad); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1)); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast); - SDNode *Load = glueCopyToM0(NewLoad.getNode()); - SelectCode(Load); - N = BitCast.getNode(); - break; - } - + case ISD::LOAD: case ISD::STORE: { - // Handle i64 stores here for the same reason mentioned above for loads. - StoreSDNode *ST = cast(N); - SDValue Value = ST->getValue(); - if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) { - - SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N), - MVT::v2i32, Value); - SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue, - ST->getBasePtr(), ST->getMemOperand()); - - CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore); - - if (NewValue.getOpcode() == ISD::BITCAST) { - Select(NewStore.getNode()); - return SelectCode(NewValue.getNode()); - } - - // getNode() may fold the bitcast if its input was another bitcast. If that - // happens we should only select the new store. - N = NewStore.getNode(); - } - N = glueCopyToM0(N); break; } - case AMDGPUISD::REGISTER_LOAD: { - if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) - break; - SDValue Addr, Offset; - - SDLoc DL(N); - SelectADDRIndirect(N->getOperand(1), Addr, Offset); - const SDValue Ops[] = { - Addr, - Offset, - CurDAG->getTargetConstant(0, DL, MVT::i32), - N->getOperand(0), - }; - return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL, - CurDAG->getVTList(MVT::i32, MVT::i64, - MVT::Other), - Ops); - } - case AMDGPUISD::REGISTER_STORE: { - if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) - break; - SDValue Addr, Offset; - SelectADDRIndirect(N->getOperand(2), Addr, Offset); - SDLoc DL(N); - const SDValue Ops[] = { - N->getOperand(1), - Addr, - Offset, - CurDAG->getTargetConstant(0, DL, MVT::i32), - N->getOperand(0), - }; - return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL, - CurDAG->getVTList(MVT::Other), - Ops); - } - case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) @@ -575,7 +489,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N), N->getOperand(0), OffsetVal, WidthVal); - } case AMDGPUISD::DIV_SCALE: { return SelectDIV_SCALE(N); @@ -601,7 +514,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } - bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { assert(AS != 0 && "Use checkPrivateAddress instead."); if (!Ptr) @@ -681,7 +593,7 @@ bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { if (checkPrivateAddress(N->getMemOperand())) { if (MMO) { const PseudoSourceValue *PSV = MMO->getPseudoValue(); - if (PSV && PSV == PseudoSourceValue::getConstantPool()) { + if (PSV && PSV->isConstantPool()) { return true; } } @@ -847,7 +759,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { unsigned Opc = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; - // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod + // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, + // omod SDValue Ops[8]; SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); @@ -883,15 +796,39 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, Offset = N1; return true; } - } + } else if (Addr.getOpcode() == ISD::SUB) { + // sub C, x -> add (sub 0, x), C + if (const ConstantSDNode *C = dyn_cast(Addr.getOperand(0))) { + int64_t ByteOffset = C->getSExtValue(); + if (isUInt<16>(ByteOffset)) { + SDLoc DL(Addr); + SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); - SDLoc DL(Addr); + // XXX - This is kind of hacky. Create a dummy sub node so we can check + // the known bits in isDSOffsetLegal. We need to emit the selected node + // here, so this is thrown away. + SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, + Zero, Addr.getOperand(1)); + + if (isDSOffsetLegal(Sub, ByteOffset, 16)) { + MachineSDNode *MachineSub + = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, + Zero, Addr.getOperand(1)); + + Base = SDValue(MachineSub, 0); + Offset = Addr.getOperand(0); + return true; + } + } + } + } else if (const ConstantSDNode *CAddr = dyn_cast(Addr)) { + // If we have a constant address, prefer to put the constant into the + // offset. This can save moves to load the constant address since multiple + // operations can share the zero base address register, and enables merging + // into read2 / write2 instructions. + + SDLoc DL(Addr); - // If we have a constant address, prefer to put the constant into the - // offset. This can save moves to load the constant address since multiple - // operations can share the zero base address register, and enables merging - // into read2 / write2 instructions. - if (const ConstantSDNode *CAddr = dyn_cast(Addr)) { if (isUInt<16>(CAddr->getZExtValue())) { SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, @@ -904,10 +841,11 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, // default case Base = Addr; - Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); return true; } +// TODO: If offset is too big, put low 16-bit into offset. bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, SDValue &Offset0, SDValue &Offset1) const { @@ -926,9 +864,35 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); return true; } - } + } else if (Addr.getOpcode() == ISD::SUB) { + // sub C, x -> add (sub 0, x), C + if (const ConstantSDNode *C = dyn_cast(Addr.getOperand(0))) { + unsigned DWordOffset0 = C->getZExtValue() / 4; + unsigned DWordOffset1 = DWordOffset0 + 1; - if (const ConstantSDNode *CAddr = dyn_cast(Addr)) { + if (isUInt<8>(DWordOffset0)) { + SDLoc DL(Addr); + SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); + + // XXX - This is kind of hacky. Create a dummy sub node so we can check + // the known bits in isDSOffsetLegal. We need to emit the selected node + // here, so this is thrown away. + SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, + Zero, Addr.getOperand(1)); + + if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { + MachineSDNode *MachineSub + = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, + Zero, Addr.getOperand(1)); + + Base = SDValue(MachineSub, 0); + Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); + Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); + return true; + } + } + } + } else if (const ConstantSDNode *CAddr = dyn_cast(Addr)) { unsigned DWordOffset0 = CAddr->getZExtValue() / 4; unsigned DWordOffset1 = DWordOffset0 + 1; assert(4 * DWordOffset0 == CAddr->getZExtValue()); @@ -956,12 +920,16 @@ static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { return isUInt<12>(Imm->getZExtValue()); } -void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, +bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, SDValue &TFE) const { + // Subtarget prefers to use flat instruction + if (Subtarget->useFlatForGlobal()) + return false; + SDLoc DL(Addr); GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); @@ -994,14 +962,14 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, if (isLegalMUBUFImmOffset(C1)) { Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); - return; + return true; } else if (isUInt<32>(C1->getZExtValue())) { // Illegal offset, store it in soffset. Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 0); - return; + return true; } } @@ -1013,7 +981,7 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, Ptr = N0; VAddr = N1; Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); - return; + return true; } // default case -> offset @@ -1021,6 +989,7 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, Ptr = Addr; Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); + return true; } bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, @@ -1033,8 +1002,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) return false; - SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - GLC, SLC, TFE); + if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, + GLC, SLC, TFE)) + return false; ConstantSDNode *C = cast(Addr64); if (C->getSExtValue()) { @@ -1052,8 +1022,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, - SDValue &Offset, - SDValue &SLC) const { + SDValue &Offset, + SDValue &SLC) const { SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); SDValue GLC, TFE; @@ -1066,36 +1036,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, SDLoc DL(Addr); MachineFunction &MF = CurDAG->getMachineFunction(); - const SIRegisterInfo *TRI = - static_cast(Subtarget->getRegisterInfo()); - MachineRegisterInfo &MRI = MF.getRegInfo(); - const SITargetLowering& Lowering = - *static_cast(getTargetLowering()); + const SIMachineFunctionInfo *Info = MF.getInfo(); - unsigned ScratchOffsetReg = - TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET); - Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass, - ScratchOffsetReg, MVT::i32); - SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32); - SDValue ScratchRsrcDword0 = - SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0); - - SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32); - SDValue ScratchRsrcDword1 = - SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0); - - const SDValue RsrcOps[] = { - CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), - ScratchRsrcDword0, - CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), - ScratchRsrcDword1, - CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32), - }; - SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, - MVT::v2i32, RsrcOps), 0); - Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0); - SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, - MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32); + Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); + SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); // (add n0, c1) if (CurDAG->isBaseWithConstantOffset(Addr)) { @@ -1126,8 +1070,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, const SIInstrInfo *TII = static_cast(Subtarget->getInstrInfo()); - SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - GLC, SLC, TFE); + if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, + GLC, SLC, TFE)) + return false; if (!cast(Offen)->getSExtValue() && !cast(Idxen)->getSExtValue() && @@ -1153,18 +1098,134 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); } +/// +/// \param EncodedOffset This is the immediate value that will be encoded +/// directly into the instruction. On SI/CI the \p EncodedOffset +/// will be in units of dwords and on VI+ it will be units of bytes. +static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, + int64_t EncodedOffset) { + return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? + isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); +} + +bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, + SDValue &Offset, bool &Imm) const { + + // FIXME: Handle non-constant offsets. + ConstantSDNode *C = dyn_cast(ByteOffsetNode); + if (!C) + return false; + + SDLoc SL(ByteOffsetNode); + AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); + int64_t ByteOffset = C->getSExtValue(); + int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? + ByteOffset >> 2 : ByteOffset; + + if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { + Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); + Imm = true; + return true; + } + + if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) + return false; + + if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { + // 32-bit Immediates are supported on Sea Islands. + Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); + } else { + SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); + Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, + C32Bit), 0); + } + Imm = false; + return true; +} + +bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, + SDValue &Offset, bool &Imm) const { + + SDLoc SL(Addr); + if (CurDAG->isBaseWithConstantOffset(Addr)) { + SDValue N0 = Addr.getOperand(0); + SDValue N1 = Addr.getOperand(1); + + if (SelectSMRDOffset(N1, Offset, Imm)) { + SBase = N0; + return true; + } + } + SBase = Addr; + Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); + Imm = true; + return true; +} + +bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, + SDValue &Offset) const { + bool Imm; + return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; +} + +bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, + SDValue &Offset) const { + + if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) + return false; + + bool Imm; + if (!SelectSMRD(Addr, SBase, Offset, Imm)) + return false; + + return !Imm && isa(Offset); +} + +bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, + SDValue &Offset) const { + bool Imm; + return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && + !isa(Offset); +} + +bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, + SDValue &Offset) const { + bool Imm; + return SelectSMRDOffset(Addr, Offset, Imm) && Imm; +} + +bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, + SDValue &Offset) const { + if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) + return false; + + bool Imm; + if (!SelectSMRDOffset(Addr, Offset, Imm)) + return false; + + return !Imm && isa(Offset); +} + +bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, + SDValue &Offset) const { + bool Imm; + return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && + !isa(Offset); +} + // FIXME: This is incorrect and only enough to be able to compile. SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { AddrSpaceCastSDNode *ASC = cast(N); SDLoc DL(N); + const MachineFunction &MF = CurDAG->getMachineFunction(); + DiagnosticInfoUnsupported NotImplemented(*MF.getFunction(), + "addrspacecast not implemented"); + CurDAG->getContext()->diagnose(NotImplemented); + assert(Subtarget->hasFlatAddressSpace() && "addrspacecast only supported with flat address space!"); - assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && - ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) && - "Cannot cast address space to / from constant address!"); - assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS || ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) && "Can only cast to / from flat address space!"); @@ -1190,7 +1251,6 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32)); } - if (DestSize > SrcSize) { assert(SrcSize == 32 && DestSize == 64); @@ -1371,6 +1431,65 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, return SelectVOP3Mods(In, Src, SrcMods); } +void AMDGPUDAGToDAGISel::PreprocessISelDAG() { + bool Modified = false; + + // XXX - Other targets seem to be able to do this without a worklist. + SmallVector LoadsToReplace; + SmallVector StoresToReplace; + + for (SDNode &Node : CurDAG->allnodes()) { + if (LoadSDNode *LD = dyn_cast(&Node)) { + EVT VT = LD->getValueType(0); + if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) + continue; + + // To simplify the TableGen patters, we replace all i64 loads with v2i32 + // loads. Alternatively, we could promote i64 loads to v2i32 during DAG + // legalization, however, so places (ExpandUnalignedLoad) in the DAG + // legalizer assume that if i64 is legal, so doing this promotion early + // can cause problems. + LoadsToReplace.push_back(LD); + } else if (StoreSDNode *ST = dyn_cast(&Node)) { + // Handle i64 stores here for the same reason mentioned above for loads. + SDValue Value = ST->getValue(); + if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore()) + continue; + StoresToReplace.push_back(ST); + } + } + + for (LoadSDNode *LD : LoadsToReplace) { + SDLoc SL(LD); + + SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(), + LD->getBasePtr(), LD->getMemOperand()); + SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL, + MVT::i64, NewLoad); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1)); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast); + Modified = true; + } + + for (StoreSDNode *ST : StoresToReplace) { + SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST), + MVT::v2i32, ST->getValue()); + const SDValue StoreOps[] = { + ST->getChain(), + NewValue, + ST->getBasePtr(), + ST->getOffset() + }; + + CurDAG->UpdateNodeOperands(ST, StoreOps); + Modified = true; + } + + // XXX - Is this necessary? + if (Modified) + CurDAG->RemoveDeadNodes(); +} + void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast(getTargetLowering()); diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 3a65f3b56146..222f63161be5 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -15,6 +15,7 @@ #include "AMDGPUISelLowering.h" #include "AMDGPU.h" +#include "AMDGPUDiagnosticInfoUnsupported.h" #include "AMDGPUFrameLowering.h" #include "AMDGPUIntrinsicInfo.h" #include "AMDGPURegisterInfo.h" @@ -27,50 +28,9 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/DiagnosticPrinter.h" using namespace llvm; -namespace { - -/// Diagnostic information for unimplemented or unsupported feature reporting. -class DiagnosticInfoUnsupported : public DiagnosticInfo { -private: - const Twine &Description; - const Function &Fn; - - static int KindID; - - static int getKindID() { - if (KindID == 0) - KindID = llvm::getNextAvailablePluginDiagnosticKind(); - return KindID; - } - -public: - DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc, - DiagnosticSeverity Severity = DS_Error) - : DiagnosticInfo(getKindID(), Severity), - Description(Desc), - Fn(Fn) { } - - const Function &getFunction() const { return Fn; } - const Twine &getDescription() const { return Description; } - - void print(DiagnosticPrinter &DP) const override { - DP << "unsupported " << getDescription() << " in " << Fn.getName(); - } - - static bool classof(const DiagnosticInfo *DI) { - return DI->getKind() == getKindID(); - } -}; - -int DiagnosticInfoUnsupported::KindID = 0; -} - - static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { @@ -113,6 +73,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::BRIND, MVT::Other, Expand); + // This is totally unsupported, just custom lower to produce an error. + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + // We need to custom lower some of the intrinsics setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -352,7 +315,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); setOperationAction(ISD::SDIVREM, VT, Custom); - setOperationAction(ISD::UDIVREM, VT, Custom); + setOperationAction(ISD::UDIVREM, VT, Expand); setOperationAction(ISD::ADDC, VT, Expand); setOperationAction(ISD::SUBC, VT, Expand); setOperationAction(ISD::ADDE, VT, Expand); @@ -429,12 +392,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setSelectIsExpensive(false); PredictableSelectIsExpensive = false; - // There are no integer divide instructions, and these expand to a pretty - // large sequence of instructions. - setIntDivIsCheap(false); - setPow2SDivIsCheap(false); setFsqrtIsCheap(true); + // We want to find all load dependencies for long chains of stores to enable + // merging into very wide vectors. The problem is with vectors with > 4 + // elements. MergeConsecutiveStores will attempt to merge these because x8/x16 + // vectors are a legal type, even though we have to split the loads + // usually. When we can more precisely specify load legality per address + // space, we should be able to make FindBetterChain/MergeConsecutiveStores + // smarter so that they can figure out what to do in 2 iterations without all + // N > 4 stores on the same chain. + GatherAllAliasesMaxDepth = 16; + // FIXME: Need to really handle these. MaxStoresPerMemcpy = 4096; MaxStoresPerMemmove = 4096; @@ -534,6 +503,18 @@ bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT, return true; } +bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const { + // There are few operations which truly have vector input operands. Any vector + // operation is going to involve operations on each component, and a + // build_vector will be a copy per element, so it always makes sense to use a + // build_vector input in place of the extracted element to avoid a copy into a + // super register. + // + // We should probably only do this if all users are extracts only, but this + // should be the common case. + return true; +} + bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const { // Truncate is just accessing a subregister. return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0); @@ -617,6 +598,15 @@ SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI, return SDValue(); } +SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { + const Function &Fn = *DAG.getMachineFunction().getFunction(); + + DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "dynamic alloca"); + DAG.getContext()->diagnose(NoDynamicAlloca); + return SDValue(); +} + SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -643,6 +633,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); } return Op; } @@ -892,7 +883,9 @@ SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op, FrameIndexSDNode *FIN = cast(Op); unsigned FrameIndex = FIN->getIndex(); - unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex); + unsigned IgnoredFrameReg; + unsigned Offset = + TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg); return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op), Op.getValueType()); } @@ -1043,9 +1036,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1), Op.getOperand(2)); - case AMDGPUIntrinsic::AMDGPU_brev: - return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1)); - case Intrinsic::AMDGPU_class: return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT, Op.getOperand(1), Op.getOperand(2)); @@ -1057,6 +1047,8 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); case AMDGPUIntrinsic::AMDGPU_trunc: // Legacy name. return DAG.getNode(ISD::FTRUNC, DL, VT, Op.getOperand(1)); + case AMDGPUIntrinsic::AMDGPU_brev: // Legacy name + return DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(1)); } } @@ -1077,6 +1069,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); EVT VT = Op.getValueType(); + // TODO: Should this propagate fast-math-flags? SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, DAG.getConstantFP(1.0f, DL, MVT::f32), Op.getOperand(1)); @@ -1167,45 +1160,6 @@ SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL, return SDValue(); } -// FIXME: Remove this when combines added to DAGCombiner. -SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL, - EVT VT, - SDValue LHS, - SDValue RHS, - SDValue True, - SDValue False, - SDValue CC, - SelectionDAG &DAG) const { - if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) - return SDValue(); - - ISD::CondCode CCOpcode = cast(CC)->get(); - switch (CCOpcode) { - case ISD::SETULE: - case ISD::SETULT: { - unsigned Opc = (LHS == True) ? ISD::UMIN : ISD::UMAX; - return DAG.getNode(Opc, DL, VT, LHS, RHS); - } - case ISD::SETLE: - case ISD::SETLT: { - unsigned Opc = (LHS == True) ? ISD::SMIN : ISD::SMAX; - return DAG.getNode(Opc, DL, VT, LHS, RHS); - } - case ISD::SETGT: - case ISD::SETGE: { - unsigned Opc = (LHS == True) ? ISD::SMAX : ISD::SMIN; - return DAG.getNode(Opc, DL, VT, LHS, RHS); - } - case ISD::SETUGE: - case ISD::SETUGT: { - unsigned Opc = (LHS == True) ? ISD::UMAX : ISD::UMIN; - return DAG.getNode(Opc, DL, VT, LHS, RHS); - } - default: - return SDValue(); - } -} - SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op, SelectionDAG &DAG) const { LoadSDNode *Load = cast(Op); @@ -1260,7 +1214,8 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op, EVT PtrVT = BasePtr.getValueType(); EVT MemVT = Load->getMemoryVT(); SDLoc SL(Op); - MachinePointerInfo SrcValue(Load->getMemOperand()->getValue()); + + const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo(); EVT LoVT, HiVT; EVT LoMemVT, HiMemVT; @@ -1269,23 +1224,27 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op, std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT); std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT); + + unsigned Size = LoMemVT.getStoreSize(); + unsigned BaseAlign = Load->getAlignment(); + unsigned HiAlign = MinAlign(BaseAlign, Size); + SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT, Load->getChain(), BasePtr, SrcValue, LoMemVT, Load->isVolatile(), Load->isNonTemporal(), - Load->isInvariant(), Load->getAlignment()); + Load->isInvariant(), BaseAlign); SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, - DAG.getConstant(LoMemVT.getStoreSize(), SL, - PtrVT)); + DAG.getConstant(Size, SL, PtrVT)); SDValue HiLoad = DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(), HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()), HiMemVT, Load->isVolatile(), Load->isNonTemporal(), - Load->isInvariant(), Load->getAlignment()); + Load->isInvariant(), HiAlign); SDValue Ops[] = { DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad), @@ -1415,7 +1374,11 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, DAG.getConstant(LoMemVT.getStoreSize(), SL, PtrVT)); - MachinePointerInfo SrcValue(Store->getMemOperand()->getValue()); + const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo(); + unsigned BaseAlign = Store->getAlignment(); + unsigned Size = LoMemVT.getStoreSize(); + unsigned HiAlign = MinAlign(BaseAlign, Size); + SDValue LoStore = DAG.getTruncStore(Chain, SL, Lo, BasePtr, @@ -1423,15 +1386,15 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, LoMemVT, Store->isNonTemporal(), Store->isVolatile(), - Store->getAlignment()); + BaseAlign); SDValue HiStore = DAG.getTruncStore(Chain, SL, Hi, HiPtr, - SrcValue.getWithOffset(LoMemVT.getStoreSize()), + SrcValue.getWithOffset(Size), HiMemVT, Store->isNonTemporal(), Store->isVolatile(), - Store->getAlignment()); + HiAlign); return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore); } @@ -1529,7 +1492,7 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && Store->getValue().getValueType().isVector()) { - return ScalarizeVectorStore(Op, DAG); + return SplitVectorStore(Op, DAG); } EVT MemVT = Store->getMemoryVT(); @@ -1630,6 +1593,7 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool // float fb = (float)ib; SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib); + // TODO: Should this propagate fast-math-flags? // float fq = native_divide(fa, fb); SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT, fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb)); @@ -1940,6 +1904,8 @@ SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const { SDValue X = Op.getOperand(0); SDValue Y = Op.getOperand(1); + // TODO: Should this propagate fast-math-flags? + SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y); SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div); SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y); @@ -1968,6 +1934,7 @@ SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const { SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero); + // TODO: Should this propagate fast-math-flags? return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); } @@ -2045,6 +2012,8 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const { SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64); SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src); + // TODO: Should this propagate fast-math-flags? + SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign); SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign); @@ -2074,6 +2043,8 @@ SDValue AMDGPUTargetLowering::LowerFROUND32(SDValue Op, SelectionDAG &DAG) const SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X); + // TODO: Should this propagate fast-math-flags? + SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T); SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff); @@ -2184,6 +2155,7 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const { SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero); + // TODO: Should this propagate fast-math-flags? return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); } @@ -2206,7 +2178,7 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi, DAG.getConstant(32, SL, MVT::i32)); - + // TODO: Should this propagate fast-math-flags? return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo); } @@ -2231,6 +2203,7 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, DAG.getConstant(1, DL, MVT::i32)); SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi); + // TODO: Should this propagate fast-math-flags? FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi, DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32 return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi); @@ -2257,7 +2230,7 @@ SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG, MVT::f64); SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL, MVT::f64); - + // TODO: Should this propagate fast-math-flags? SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0); SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul); @@ -2511,12 +2484,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, if (VT == MVT::f32) return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI); - - // TODO: Implement min / max Evergreen instructions. - if (VT == MVT::i32 && - Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { - return CombineIMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); - } } break; @@ -2652,20 +2619,14 @@ bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const { if (ConstantFPSDNode * CFP = dyn_cast(Op)) { return CFP->isExactlyValue(1.0); } - if (ConstantSDNode *C = dyn_cast(Op)) { - return C->isAllOnesValue(); - } - return false; + return isAllOnesConstant(Op); } bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const { if (ConstantFPSDNode * CFP = dyn_cast(Op)) { return CFP->getValueAPF().isZero(); } - if (ConstantSDNode *C = dyn_cast(Op)) { - return C->isNullValue(); - } - return false; + return isNullConstant(Op); } SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, @@ -2738,7 +2699,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BFE_I32) NODE_NAME_CASE(BFI) NODE_NAME_CASE(BFM) - NODE_NAME_CASE(BREV) NODE_NAME_CASE(MUL_U24) NODE_NAME_CASE(MUL_I24) NODE_NAME_CASE(MAD_U24) @@ -2893,8 +2853,7 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode( return 1; unsigned SignBits = 32 - Width->getZExtValue() + 1; - ConstantSDNode *Offset = dyn_cast(Op.getOperand(1)); - if (!Offset || !Offset->isNullValue()) + if (!isNullConstant(Op.getOperand(1))) return SignBits; // TODO: Could probably figure something out with non-0 offsets. diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index 478b2035fd75..7314cc050ba5 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -138,6 +138,7 @@ public: bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, unsigned AS) const override; + bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override; bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; @@ -149,6 +150,9 @@ public: SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; void ReplaceNodeResults(SDNode * N, @@ -165,14 +169,6 @@ public: SDValue False, SDValue CC, DAGCombinerInfo &DCI) const; - SDValue CombineIMinMax(SDLoc DL, - EVT VT, - SDValue LHS, - SDValue RHS, - SDValue True, - SDValue False, - SDValue CC, - SelectionDAG &DAG) const; const char* getTargetNodeName(unsigned Opcode) const override; @@ -216,7 +212,7 @@ public: /// \brief Helper function that returns the byte offset of the given /// type of implicit parameter. - unsigned getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, + uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const; }; @@ -267,7 +263,6 @@ enum NodeType : unsigned { BFE_I32, // Extract range of bits with sign extension to 32-bits. BFI, // (src0 & src1) | (~src0 & src2) BFM, // Insert a range of bits into a 32-bit word. - BREV, // Reverse bits. MUL_U24, MUL_I24, MAD_U24, diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index 15a3d543a68c..a266e711af5b 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -164,11 +164,6 @@ MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl( // TODO: Implement this function return nullptr; } -bool AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - ArrayRef Ops) const { - // TODO: Implement this function - return false; -} bool AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, unsigned Reg, bool UnfoldLoad, @@ -312,7 +307,9 @@ int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { return -1; } - Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexOffset(MF, -1); + unsigned IgnoredFrameReg; + Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexReference( + MF, -1, IgnoredFrameReg); return getIndirectIndexBegin(MF) + Offset; } @@ -367,3 +364,14 @@ int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const { return MCOp; } + +ArrayRef> +AMDGPUInstrInfo::getSerializableTargetIndices() const { + static const std::pair TargetIndices[] = { + {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"}, + {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"}, + {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"}, + {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"}, + {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}}; + return makeArrayRef(TargetIndices); +} diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h index 86d3962b3856..53e8b23b3d62 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -103,8 +103,6 @@ public: /// read or write or -1 if indirect addressing is not used by this program. int getIndirectIndexEnd(const MachineFunction &MF) const; - bool canFoldMemoryOperand(const MachineInstr *MI, - ArrayRef Ops) const override; bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl &NewMIs) const override; @@ -147,6 +145,9 @@ public: return get(pseudoToMCOpcode(Opcode)); } + ArrayRef> + getSerializableTargetIndices() const override; + //===---------------------------------------------------------------------===// // Pure virtual funtions to be implemented by sub-classes. //===---------------------------------------------------------------------===// @@ -195,6 +196,7 @@ public: }; namespace AMDGPU { + LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex); } // End namespace AMDGPU diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td index b413897d9d23..70e589c28429 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -191,8 +191,6 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>; def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>; def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; -def AMDGPUbrev : SDNode<"AMDGPUISD::BREV", SDTIntUnaryOp>; - // Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when // performing the mulitply. The result is a 32-bit value. def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp, diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td index 72cab39277c6..11f6139deddd 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -514,7 +514,7 @@ class POW_Common class Extract_Element : Pat< - (sub_type (vector_extract vec_type:$src, sub_idx)), + (sub_type (extractelt vec_type:$src, sub_idx)), (EXTRACT_SUBREG $src, sub_reg) >; @@ -522,7 +522,7 @@ class Extract_Element : Pat < - (vector_insert vec_type:$vec, elem_type:$elem, sub_idx), + (insertelt vec_type:$vec, elem_type:$elem, sub_idx), (INSERT_SUBREG $vec, $elem, sub_reg) >; diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/lib/Target/AMDGPU/AMDGPUIntrinsics.td index ab489cd2a4ab..1de3546485b1 100644 --- a/lib/Target/AMDGPU/AMDGPUIntrinsics.td +++ b/lib/Target/AMDGPU/AMDGPUIntrinsics.td @@ -69,8 +69,8 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_barrier_local : Intrinsic<[], [], []>; - def int_AMDGPU_barrier_global : Intrinsic<[], [], []>; + def int_AMDGPU_barrier_local : Intrinsic<[], [], [IntrConvergent]>; + def int_AMDGPU_barrier_global : Intrinsic<[], [], [IntrConvergent]>; } // Legacy names for compatibility. diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 20831460b933..dfc652f31da5 100644 --- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -61,7 +61,7 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = MCOperand::createImm(MO.getImm()); break; case MachineOperand::MO_Register: - MCOp = MCOperand::createReg(MO.getReg()); + MCOp = MCOperand::createReg(AMDGPU::getMCReg(MO.getReg(), ST)); break; case MachineOperand::MO_MachineBasicBlock: MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( @@ -73,13 +73,6 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx)); break; } - case MachineOperand::MO_TargetIndex: { - assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START); - MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME)); - const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); - MCOp = MCOperand::createExpr(Expr); - break; - } case MachineOperand::MO_ExternalSymbol: { MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName())); const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); @@ -104,10 +97,9 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { #endif if (MI->isBundle()) { const MachineBasicBlock *MBB = MI->getParent(); - MachineBasicBlock::const_instr_iterator I = MI; - ++I; - while (I != MBB->end() && I->isInsideBundle()) { - EmitInstruction(I); + MachineBasicBlock::const_instr_iterator I = ++MI->getIterator(); + while (I != MBB->instr_end() && I->isInsideBundle()) { + EmitInstruction(&*I); ++I; } } else { @@ -136,8 +128,6 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCCodeEmitter &InstEmitter = ObjStreamer.getAssembler().getEmitter(); InstEmitter.encodeInstruction(TmpInst, CodeStream, Fixups, MF->getSubtarget()); - CodeStream.flush(); - HexLines.resize(HexLines.size() + 1); std::string &HexLine = HexLines.back(); raw_string_ostream HexStream(HexLine); diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index 21c7da663234..54137177e4c0 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -1,11 +1,10 @@ #include "AMDGPUMachineFunction.h" #include "AMDGPU.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" using namespace llvm; -static const char *const ShaderTypeAttribute = "ShaderType"; - // Pin the vtable to this file. void AMDGPUMachineFunction::anchor() {} @@ -13,13 +12,9 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : MachineFunctionInfo(), ShaderType(ShaderType::COMPUTE), LDSSize(0), + ABIArgOffset(0), ScratchSize(0), IsKernel(true) { - Attribute A = MF.getFunction()->getFnAttribute(ShaderTypeAttribute); - if (A.isStringAttribute()) { - StringRef Str = A.getValueAsString(); - if (Str.getAsInteger(0, ShaderType)) - llvm_unreachable("Can't parse shader type!"); - } + ShaderType = AMDGPU::getShaderType(*MF.getFunction()); } diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h index f5e4694e76f6..46fcee874887 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -37,6 +37,11 @@ public: return ShaderType; } + bool isKernel() const { + // FIXME: Assume everything is a kernel until function calls are supported. + return true; + } + unsigned ScratchSize; bool IsKernel; }; diff --git a/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp b/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp new file mode 100644 index 000000000000..554bf1da81f5 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp @@ -0,0 +1,373 @@ +//===-- AMDGPUOpenCLImageTypeLoweringPass.cpp -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass resolves calls to OpenCL image attribute, image resource ID and +/// sampler resource ID getter functions. +/// +/// Image attributes (size and format) are expected to be passed to the kernel +/// as kernel arguments immediately following the image argument itself, +/// therefore this pass adds image size and format arguments to the kernel +/// functions in the module. The kernel functions with image arguments are +/// re-created using the new signature. The new arguments are added to the +/// kernel metadata with kernel_arg_type set to "image_size" or "image_format". +/// Note: this pass may invalidate pointers to functions. +/// +/// Resource IDs of read-only images, write-only images and samplers are +/// defined to be their index among the kernel arguments of the same +/// type and access qualifier. +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/Cloning.h" + +using namespace llvm; + +namespace { + +StringRef GetImageSizeFunc = "llvm.OpenCL.image.get.size"; +StringRef GetImageFormatFunc = "llvm.OpenCL.image.get.format"; +StringRef GetImageResourceIDFunc = "llvm.OpenCL.image.get.resource.id"; +StringRef GetSamplerResourceIDFunc = "llvm.OpenCL.sampler.get.resource.id"; + +StringRef ImageSizeArgMDType = "__llvm_image_size"; +StringRef ImageFormatArgMDType = "__llvm_image_format"; + +StringRef KernelsMDNodeName = "opencl.kernels"; +StringRef KernelArgMDNodeNames[] = { + "kernel_arg_addr_space", + "kernel_arg_access_qual", + "kernel_arg_type", + "kernel_arg_base_type", + "kernel_arg_type_qual"}; +const unsigned NumKernelArgMDNodes = 5; + +typedef SmallVector MDVector; +struct KernelArgMD { + MDVector ArgVector[NumKernelArgMDNodes]; +}; + +} // end anonymous namespace + +static inline bool +IsImageType(StringRef TypeString) { + return TypeString == "image2d_t" || TypeString == "image3d_t"; +} + +static inline bool +IsSamplerType(StringRef TypeString) { + return TypeString == "sampler_t"; +} + +static Function * +GetFunctionFromMDNode(MDNode *Node) { + if (!Node) + return nullptr; + + size_t NumOps = Node->getNumOperands(); + if (NumOps != NumKernelArgMDNodes + 1) + return nullptr; + + auto F = mdconst::dyn_extract(Node->getOperand(0)); + if (!F) + return nullptr; + + // Sanity checks. + size_t ExpectNumArgNodeOps = F->arg_size() + 1; + for (size_t i = 0; i < NumKernelArgMDNodes; ++i) { + MDNode *ArgNode = dyn_cast_or_null(Node->getOperand(i + 1)); + if (ArgNode->getNumOperands() != ExpectNumArgNodeOps) + return nullptr; + if (!ArgNode->getOperand(0)) + return nullptr; + + // FIXME: It should be possible to do image lowering when some metadata + // args missing or not in the expected order. + MDString *StringNode = dyn_cast(ArgNode->getOperand(0)); + if (!StringNode || StringNode->getString() != KernelArgMDNodeNames[i]) + return nullptr; + } + + return F; +} + +static StringRef +AccessQualFromMD(MDNode *KernelMDNode, unsigned ArgIdx) { + MDNode *ArgAQNode = cast(KernelMDNode->getOperand(2)); + return cast(ArgAQNode->getOperand(ArgIdx + 1))->getString(); +} + +static StringRef +ArgTypeFromMD(MDNode *KernelMDNode, unsigned ArgIdx) { + MDNode *ArgTypeNode = cast(KernelMDNode->getOperand(3)); + return cast(ArgTypeNode->getOperand(ArgIdx + 1))->getString(); +} + +static MDVector +GetArgMD(MDNode *KernelMDNode, unsigned OpIdx) { + MDVector Res; + for (unsigned i = 0; i < NumKernelArgMDNodes; ++i) { + MDNode *Node = cast(KernelMDNode->getOperand(i + 1)); + Res.push_back(Node->getOperand(OpIdx)); + } + return Res; +} + +static void +PushArgMD(KernelArgMD &MD, const MDVector &V) { + assert(V.size() == NumKernelArgMDNodes); + for (unsigned i = 0; i < NumKernelArgMDNodes; ++i) { + MD.ArgVector[i].push_back(V[i]); + } +} + +namespace { + +class AMDGPUOpenCLImageTypeLoweringPass : public ModulePass { + static char ID; + + LLVMContext *Context; + Type *Int32Type; + Type *ImageSizeType; + Type *ImageFormatType; + SmallVector InstsToErase; + + bool replaceImageUses(Argument &ImageArg, uint32_t ResourceID, + Argument &ImageSizeArg, + Argument &ImageFormatArg) { + bool Modified = false; + + for (auto &Use : ImageArg.uses()) { + auto Inst = dyn_cast(Use.getUser()); + if (!Inst) { + continue; + } + + Function *F = Inst->getCalledFunction(); + if (!F) + continue; + + Value *Replacement = nullptr; + StringRef Name = F->getName(); + if (Name.startswith(GetImageResourceIDFunc)) { + Replacement = ConstantInt::get(Int32Type, ResourceID); + } else if (Name.startswith(GetImageSizeFunc)) { + Replacement = &ImageSizeArg; + } else if (Name.startswith(GetImageFormatFunc)) { + Replacement = &ImageFormatArg; + } else { + continue; + } + + Inst->replaceAllUsesWith(Replacement); + InstsToErase.push_back(Inst); + Modified = true; + } + + return Modified; + } + + bool replaceSamplerUses(Argument &SamplerArg, uint32_t ResourceID) { + bool Modified = false; + + for (const auto &Use : SamplerArg.uses()) { + auto Inst = dyn_cast(Use.getUser()); + if (!Inst) { + continue; + } + + Function *F = Inst->getCalledFunction(); + if (!F) + continue; + + Value *Replacement = nullptr; + StringRef Name = F->getName(); + if (Name == GetSamplerResourceIDFunc) { + Replacement = ConstantInt::get(Int32Type, ResourceID); + } else { + continue; + } + + Inst->replaceAllUsesWith(Replacement); + InstsToErase.push_back(Inst); + Modified = true; + } + + return Modified; + } + + bool replaceImageAndSamplerUses(Function *F, MDNode *KernelMDNode) { + uint32_t NumReadOnlyImageArgs = 0; + uint32_t NumWriteOnlyImageArgs = 0; + uint32_t NumSamplerArgs = 0; + + bool Modified = false; + InstsToErase.clear(); + for (auto ArgI = F->arg_begin(); ArgI != F->arg_end(); ++ArgI) { + Argument &Arg = *ArgI; + StringRef Type = ArgTypeFromMD(KernelMDNode, Arg.getArgNo()); + + // Handle image types. + if (IsImageType(Type)) { + StringRef AccessQual = AccessQualFromMD(KernelMDNode, Arg.getArgNo()); + uint32_t ResourceID; + if (AccessQual == "read_only") { + ResourceID = NumReadOnlyImageArgs++; + } else if (AccessQual == "write_only") { + ResourceID = NumWriteOnlyImageArgs++; + } else { + llvm_unreachable("Wrong image access qualifier."); + } + + Argument &SizeArg = *(++ArgI); + Argument &FormatArg = *(++ArgI); + Modified |= replaceImageUses(Arg, ResourceID, SizeArg, FormatArg); + + // Handle sampler type. + } else if (IsSamplerType(Type)) { + uint32_t ResourceID = NumSamplerArgs++; + Modified |= replaceSamplerUses(Arg, ResourceID); + } + } + for (unsigned i = 0; i < InstsToErase.size(); ++i) { + InstsToErase[i]->eraseFromParent(); + } + + return Modified; + } + + std::tuple + addImplicitArgs(Function *F, MDNode *KernelMDNode) { + bool Modified = false; + + FunctionType *FT = F->getFunctionType(); + SmallVector ArgTypes; + + // Metadata operands for new MDNode. + KernelArgMD NewArgMDs; + PushArgMD(NewArgMDs, GetArgMD(KernelMDNode, 0)); + + // Add implicit arguments to the signature. + for (unsigned i = 0; i < FT->getNumParams(); ++i) { + ArgTypes.push_back(FT->getParamType(i)); + MDVector ArgMD = GetArgMD(KernelMDNode, i + 1); + PushArgMD(NewArgMDs, ArgMD); + + if (!IsImageType(ArgTypeFromMD(KernelMDNode, i))) + continue; + + // Add size implicit argument. + ArgTypes.push_back(ImageSizeType); + ArgMD[2] = ArgMD[3] = MDString::get(*Context, ImageSizeArgMDType); + PushArgMD(NewArgMDs, ArgMD); + + // Add format implicit argument. + ArgTypes.push_back(ImageFormatType); + ArgMD[2] = ArgMD[3] = MDString::get(*Context, ImageFormatArgMDType); + PushArgMD(NewArgMDs, ArgMD); + + Modified = true; + } + if (!Modified) { + return std::make_tuple(nullptr, nullptr); + } + + // Create function with new signature and clone the old body into it. + auto NewFT = FunctionType::get(FT->getReturnType(), ArgTypes, false); + auto NewF = Function::Create(NewFT, F->getLinkage(), F->getName()); + ValueToValueMapTy VMap; + auto NewFArgIt = NewF->arg_begin(); + for (auto &Arg: F->args()) { + auto ArgName = Arg.getName(); + NewFArgIt->setName(ArgName); + VMap[&Arg] = &(*NewFArgIt++); + if (IsImageType(ArgTypeFromMD(KernelMDNode, Arg.getArgNo()))) { + (NewFArgIt++)->setName(Twine("__size_") + ArgName); + (NewFArgIt++)->setName(Twine("__format_") + ArgName); + } + } + SmallVector Returns; + CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns); + + // Build new MDNode. + SmallVector KernelMDArgs; + KernelMDArgs.push_back(ConstantAsMetadata::get(NewF)); + for (unsigned i = 0; i < NumKernelArgMDNodes; ++i) + KernelMDArgs.push_back(MDNode::get(*Context, NewArgMDs.ArgVector[i])); + MDNode *NewMDNode = MDNode::get(*Context, KernelMDArgs); + + return std::make_tuple(NewF, NewMDNode); + } + + bool transformKernels(Module &M) { + NamedMDNode *KernelsMDNode = M.getNamedMetadata(KernelsMDNodeName); + if (!KernelsMDNode) + return false; + + bool Modified = false; + for (unsigned i = 0; i < KernelsMDNode->getNumOperands(); ++i) { + MDNode *KernelMDNode = KernelsMDNode->getOperand(i); + Function *F = GetFunctionFromMDNode(KernelMDNode); + if (!F) + continue; + + Function *NewF; + MDNode *NewMDNode; + std::tie(NewF, NewMDNode) = addImplicitArgs(F, KernelMDNode); + if (NewF) { + // Replace old function and metadata with new ones. + F->eraseFromParent(); + M.getFunctionList().push_back(NewF); + M.getOrInsertFunction(NewF->getName(), NewF->getFunctionType(), + NewF->getAttributes()); + KernelsMDNode->setOperand(i, NewMDNode); + + F = NewF; + KernelMDNode = NewMDNode; + Modified = true; + } + + Modified |= replaceImageAndSamplerUses(F, KernelMDNode); + } + + return Modified; + } + + public: + AMDGPUOpenCLImageTypeLoweringPass() : ModulePass(ID) {} + + bool runOnModule(Module &M) override { + Context = &M.getContext(); + Int32Type = Type::getInt32Ty(M.getContext()); + ImageSizeType = ArrayType::get(Int32Type, 3); + ImageFormatType = ArrayType::get(Int32Type, 2); + + return transformKernels(M); + } + + const char *getPassName() const override { + return "AMDGPU OpenCL Image Type Pass"; + } +}; + +char AMDGPUOpenCLImageTypeLoweringPass::ID = 0; + +} // end anonymous namespace + +ModulePass *llvm::createAMDGPUOpenCLImageTypeLoweringPass() { + return new AMDGPUOpenCLImageTypeLoweringPass(); +} diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 57b7a73bf56c..87d50d587059 100644 --- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -54,7 +54,7 @@ bool AMDGPUPromoteAlloca::doInitialization(Module &M) { bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { - const FunctionType *FTy = F.getFunctionType(); + FunctionType *FTy = F.getFunctionType(); LocalMemAvailable = ST.getLocalMemorySize(); @@ -63,7 +63,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { // possible these arguments require the entire local memory space, so // we cannot use local memory in the pass. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) { - const Type *ParamTy = FTy->getParamType(i); + Type *ParamTy = FTy->getParamType(i); if (ParamTy->isPointerTy() && ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { LocalMemAvailable = 0; @@ -77,7 +77,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { // Check how much local memory is being used by global objects for (Module::global_iterator I = Mod->global_begin(), E = Mod->global_end(); I != E; ++I) { - GlobalVariable *GV = I; + GlobalVariable *GV = &*I; PointerType *GVTy = GV->getType(); if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) continue; @@ -101,7 +101,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { return false; } -static VectorType *arrayTypeToVecType(const Type *ArrayTy) { +static VectorType *arrayTypeToVecType(Type *ArrayTy) { return VectorType::get(ArrayTy->getArrayElementType(), ArrayTy->getArrayNumElements()); } @@ -276,6 +276,9 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector &WorkList) { } void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) { + if (!I.isStaticAlloca()) + return; + IRBuilder<> Builder(&I); // First try to replace the alloca with a vector diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/lib/Target/AMDGPU/AMDGPURegisterInfo.h index cfd800bdc703..0344834328f6 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterInfo.h +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.h @@ -37,10 +37,6 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { assert(!"Unimplemented"); return BitVector(); } - virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const { - assert(!"Unimplemented"); return nullptr; - } - virtual unsigned getHWRegIndex(unsigned Reg) const { assert(!"Unimplemented"); return 0; } diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 5f32a65c9338..44e0c47877a9 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -16,6 +16,7 @@ #include "R600ISelLowering.h" #include "R600InstrInfo.h" #include "R600MachineScheduler.h" +#include "SIFrameLowering.h" #include "SIISelLowering.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" @@ -44,6 +45,8 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, // disable it. SmallString<256> FullFS("+promote-alloca,+fp64-denormals,"); + if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. + FullFS += "+flat-for-global,"; FullFS += FS; if (GPU == "" && TT.getArch() == Triple::amdgcn) @@ -67,26 +70,36 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, DumpCode(false), R600ALUInst(false), HasVertexCache(false), TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false), FP64Denormals(false), FP32Denormals(false), FastFMAF32(false), - CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true), - EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), - EnableUnsafeDSOffsetFolding(false), + CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false), + EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), + EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false), - FrameLowering(TargetFrameLowering::StackGrowsUp, - 64 * 16, // Maximum stack alignment (long16) - 0), + FrameLowering(nullptr), InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) { initializeSubtargetDependencies(TT, GPU, FS); + const unsigned MaxStackAlign = 64 * 16; // Maximum stack alignment (long16) + if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { InstrInfo.reset(new R600InstrInfo(*this)); TLInfo.reset(new R600TargetLowering(TM, *this)); + + // FIXME: Should have R600 specific FrameLowering + FrameLowering.reset(new AMDGPUFrameLowering( + TargetFrameLowering::StackGrowsUp, + MaxStackAlign, + 0)); } else { InstrInfo.reset(new SIInstrInfo(*this)); TLInfo.reset(new SITargetLowering(TM, *this)); + FrameLowering.reset(new SIFrameLowering( + TargetFrameLowering::StackGrowsUp, + MaxStackAlign, + 0)); } } diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 735f01dfa7c5..9c7bb88f8f4a 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -1,4 +1,4 @@ -//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====// +//=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====// // // The LLVM Compiler Infrastructure // @@ -12,17 +12,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TARGET_R600_AMDGPUSUBTARGET_H -#define LLVM_LIB_TARGET_R600_AMDGPUSUBTARGET_H +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H + #include "AMDGPU.h" #include "AMDGPUFrameLowering.h" #include "AMDGPUInstrInfo.h" -#include "AMDGPUIntrinsicInfo.h" +#include "AMDGPUISelLowering.h" #include "AMDGPUSubtarget.h" -#include "R600ISelLowering.h" -#include "AMDKernelCodeT.h" #include "Utils/AMDGPUBaseInfo.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -72,6 +70,7 @@ private: bool FastFMAF32; bool CaymanISA; bool FlatAddressSpace; + bool FlatForGlobal; bool EnableIRStructurizer; bool EnablePromoteAlloca; bool EnableIfCvt; @@ -88,10 +87,10 @@ private: bool CIInsts; bool FeatureDisable; int LDSBankCount; - unsigned IsaVersion; + unsigned IsaVersion; bool EnableHugeScratchBuffer; - AMDGPUFrameLowering FrameLowering; + std::unique_ptr FrameLowering; std::unique_ptr TLInfo; std::unique_ptr InstrInfo; InstrItineraryData InstrItins; @@ -104,7 +103,7 @@ public: StringRef GPU, StringRef FS); const AMDGPUFrameLowering *getFrameLowering() const override { - return &FrameLowering; + return FrameLowering.get(); } const AMDGPUInstrInfo *getInstrInfo() const override { return InstrInfo.get(); @@ -161,6 +160,10 @@ public: return FlatAddressSpace; } + bool useFlatForGlobal() const { + return FlatForGlobal; + } + bool hasBFE() const { return (getGeneration() >= EVERGREEN); } @@ -305,6 +308,9 @@ public: return isAmdHsaOS() ? 0 : 36; } + unsigned getMaxNumUserSGPRs() const { + return 16; + } }; } // End namespace llvm diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 2297b52b423c..22f85b3e663c 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUTargetMachine.h" +#include "AMDGPUTargetObjectFile.h" #include "AMDGPU.h" #include "AMDGPUTargetTransformInfo.h" #include "R600ISelLowering.h" @@ -41,6 +42,23 @@ extern "C" void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine X(TheAMDGPUTarget); RegisterTargetMachine Y(TheGCNTarget); + + PassRegistry *PR = PassRegistry::getPassRegistry(); + initializeSILowerI1CopiesPass(*PR); + initializeSIFixSGPRCopiesPass(*PR); + initializeSIFoldOperandsPass(*PR); + initializeSIFixSGPRLiveRangesPass(*PR); + initializeSIFixControlFlowLiveIntervalsPass(*PR); + initializeSILoadStoreOptimizerPass(*PR); + initializeAMDGPUAnnotateKernelFeaturesPass(*PR); + initializeAMDGPUAnnotateUniformValuesPass(*PR); +} + +static std::unique_ptr createTLOF(const Triple &TT) { + if (TT.getOS() == Triple::AMDHSA) + return make_unique(); + + return make_unique(); } static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { @@ -72,15 +90,13 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, CodeGenOpt::Level OptLevel) : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM, OptLevel), - TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this), + TLOF(createTLOF(getTargetTriple())), Subtarget(TT, CPU, FS, *this), IntrinsicInfo() { setRequiresStructuredCFG(true); initAsmInfo(); } -AMDGPUTargetMachine::~AMDGPUTargetMachine() { - delete TLOF; -} +AMDGPUTargetMachine::~AMDGPUTargetMachine() { } //===----------------------------------------------------------------------===// // R600 Target Machine (R600 -> Cayman) @@ -110,7 +126,13 @@ namespace { class AMDGPUPassConfig : public TargetPassConfig { public: AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) { + + // Exceptions and StackMaps are not supported, so these passes will never do + // anything. + disablePass(&StackMapLivenessID); + disablePass(&FuncletLayoutID); + } AMDGPUTargetMachine &getAMDGPUTargetMachine() const { return getTM(); @@ -126,8 +148,9 @@ public: void addIRPasses() override; void addCodeGenPrepare() override; - virtual bool addPreISel() override; - virtual bool addInstSelector() override; + bool addPreISel() override; + bool addInstSelector() override; + bool addGCPasses() override; }; class R600PassConfig : public AMDGPUPassConfig { @@ -147,6 +170,8 @@ public: : AMDGPUPassConfig(TM, PM) { } bool addPreISel() override; bool addInstSelector() override; + void addFastRegAlloc(FunctionPass *RegAllocPass) override; + void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; void addPreRegAlloc() override; void addPostRegAlloc() override; void addPreSched2() override; @@ -156,7 +181,7 @@ public: } // End of anonymous namespace TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](Function &F) { + return TargetIRAnalysis([this](const Function &F) { return TargetTransformInfo( AMDGPUTTIImpl(this, F.getParent()->getDataLayout())); }); @@ -172,6 +197,10 @@ void AMDGPUPassConfig::addIRPasses() { // functions, then we will generate code for the first function // without ever running any passes on the second. addPass(createBarrierNoopPass()); + + // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. + addPass(createAMDGPUOpenCLImageTypeLoweringPass()); + TargetPassConfig::addIRPasses(); } @@ -198,6 +227,11 @@ bool AMDGPUPassConfig::addInstSelector() { return false; } +bool AMDGPUPassConfig::addGCPasses() { + // Do nothing. GC is not supported. + return false; +} + //===----------------------------------------------------------------------===// // R600 Pass Setup //===----------------------------------------------------------------------===// @@ -238,16 +272,23 @@ TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { bool GCNPassConfig::addPreISel() { AMDGPUPassConfig::addPreISel(); + + // FIXME: We need to run a pass to propagate the attributes when calls are + // supported. + addPass(&AMDGPUAnnotateKernelFeaturesID); + addPass(createSinkingPass()); addPass(createSITypeRewriter()); addPass(createSIAnnotateControlFlowPass()); + addPass(createAMDGPUAnnotateUniformValues()); + return false; } bool GCNPassConfig::addInstSelector() { AMDGPUPassConfig::addInstSelector(); addPass(createSILowerI1CopiesPass()); - addPass(createSIFixSGPRCopiesPass(*TM)); + addPass(&SIFixSGPRCopiesID); addPass(createSIFoldOperandsPass()); return false; } @@ -259,7 +300,6 @@ void GCNPassConfig::addPreRegAlloc() { // earlier passes might recompute live intervals. // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass if (getOptLevel() > CodeGenOpt::None) { - initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry()); insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); } @@ -269,16 +309,27 @@ void GCNPassConfig::addPreRegAlloc() { // This should be run after scheduling, but before register allocation. It // also need extra copies to the address operand to be eliminated. - initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); insertPass(&MachineSchedulerID, &RegisterCoalescerID); } addPass(createSIShrinkInstructionsPass(), false); - addPass(createSIFixSGPRLiveRangesPass(), false); +} + +void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { + addPass(&SIFixSGPRLiveRangesID); + TargetPassConfig::addFastRegAlloc(RegAllocPass); +} + +void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + // We want to run this after LiveVariables is computed to avoid computing them + // twice. + // FIXME: We shouldn't disable the verifier here. r249087 introduced a failure + // that needs to be fixed. + insertPass(&LiveVariablesID, &SIFixSGPRLiveRangesID, /*VerifyAfter=*/false); + TargetPassConfig::addOptimizedRegAlloc(RegAllocPass); } void GCNPassConfig::addPostRegAlloc() { - addPass(createSIPrepareScratchRegs(), false); addPass(createSIShrinkInstructionsPass(), false); } diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 14792e347a7a..236e3f824030 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -32,7 +32,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { private: protected: - TargetLoweringObjectFile *TLOF; + std::unique_ptr TLOF; AMDGPUSubtarget Subtarget; AMDGPUIntrinsicInfo IntrinsicInfo; @@ -52,7 +52,7 @@ public: TargetIRAnalysis getTargetIRAnalysis() override; TargetLoweringObjectFile *getObjFileLowering() const override { - return TLOF; + return TLOF.get(); } }; diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp new file mode 100644 index 000000000000..e050f21091ba --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp @@ -0,0 +1,87 @@ +//===-- AMDGPUHSATargetObjectFile.cpp - AMDGPU Object Files ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUTargetObjectFile.h" +#include "AMDGPU.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Support/ELF.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Generic Object File +//===----------------------------------------------------------------------===// + +MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, + Mangler &Mang, + const TargetMachine &TM) const { + if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GV)) + return TextSection; + + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM); +} + +//===----------------------------------------------------------------------===// +// HSA Object File +//===----------------------------------------------------------------------===// + + +void AMDGPUHSATargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM){ + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); + + TextSection = AMDGPU::getHSATextSection(Ctx); + + DataGlobalAgentSection = AMDGPU::getHSADataGlobalAgentSection(Ctx); + DataGlobalProgramSection = AMDGPU::getHSADataGlobalProgramSection(Ctx); + + RodataReadonlyAgentSection = AMDGPU::getHSARodataReadonlyAgentSection(Ctx); +} + +bool AMDGPUHSATargetObjectFile::isAgentAllocationSection( + const char *SectionName) const { + return cast(DataGlobalAgentSection) + ->getSectionName() + .equals(SectionName); +} + +bool AMDGPUHSATargetObjectFile::isAgentAllocation(const GlobalValue *GV) const { + // Read-only segments can only have agent allocation. + return AMDGPU::isReadOnlySegment(GV) || + (AMDGPU::isGlobalSegment(GV) && GV->hasSection() && + isAgentAllocationSection(GV->getSection())); +} + +bool AMDGPUHSATargetObjectFile::isProgramAllocation( + const GlobalValue *GV) const { + // The default for global segments is program allocation. + return AMDGPU::isGlobalSegment(GV) && !isAgentAllocation(GV); +} + +MCSection *AMDGPUHSATargetObjectFile::SelectSectionForGlobal( + const GlobalValue *GV, SectionKind Kind, + Mangler &Mang, + const TargetMachine &TM) const { + if (Kind.isText() && !GV->hasComdat()) + return getTextSection(); + + if (AMDGPU::isGlobalSegment(GV)) { + if (isAgentAllocation(GV)) + return DataGlobalAgentSection; + + if (isProgramAllocation(GV)) + return DataGlobalProgramSection; + } + + return AMDGPUTargetObjectFile::SelectSectionForGlobal(GV, Kind, Mang, TM); +} diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h new file mode 100644 index 000000000000..921341ebb897 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h @@ -0,0 +1,51 @@ +//===-- AMDGPUTargetObjectFile.h - AMDGPU Object Info ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares the AMDGPU-specific subclass of +/// TargetLoweringObjectFile. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class AMDGPUTargetObjectFile : public TargetLoweringObjectFileELF { + public: + MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler &Mang, + const TargetMachine &TM) const override; +}; + +class AMDGPUHSATargetObjectFile final : public AMDGPUTargetObjectFile { +private: + MCSection *DataGlobalAgentSection; + MCSection *DataGlobalProgramSection; + MCSection *RodataReadonlyAgentSection; + + bool isAgentAllocationSection(const char *SectionName) const; + bool isAgentAllocation(const GlobalValue *GV) const; + bool isProgramAllocation(const GlobalValue *GV) const; + +public: + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; + + MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler &Mang, + const TargetMachine &TM) const override; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 6dacc742b129..54a003d6a9cf 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -74,9 +74,109 @@ unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) { return 4 * 128; // XXX - 4 channels. Should these count as vector instead? } -unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool) { return 32; } +unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) { + return Vector ? 0 : 32; +} unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) { // Semi-arbitrary large amount. return 64; } + +unsigned AMDGPUTTIImpl::getCFInstrCost(unsigned Opcode) { + // XXX - For some reason this isn't called for switch. + switch (Opcode) { + case Instruction::Br: + case Instruction::Ret: + return 10; + default: + return BaseT::getCFInstrCost(Opcode); + } +} + +int AMDGPUTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, + unsigned Index) { + switch (Opcode) { + case Instruction::ExtractElement: + // Dynamic indexing isn't free and is best avoided. + return Index == ~0u ? 2 : 0; + default: + return BaseT::getVectorInstrCost(Opcode, ValTy, Index); + } +} + +static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII, + const IntrinsicInst *I) { + switch (I->getIntrinsicID()) { + default: + return false; + case Intrinsic::not_intrinsic: + // This means we have an intrinsic that isn't defined in + // IntrinsicsAMDGPU.td + break; + + case Intrinsic::amdgcn_interp_p1: + case Intrinsic::amdgcn_interp_p2: + case Intrinsic::amdgcn_mbcnt_hi: + case Intrinsic::amdgcn_mbcnt_lo: + case Intrinsic::r600_read_tidig_x: + case Intrinsic::r600_read_tidig_y: + case Intrinsic::r600_read_tidig_z: + return true; + } + + StringRef Name = I->getCalledFunction()->getName(); + switch (TII->lookupName((const char *)Name.bytes_begin(), Name.size())) { + default: + return false; + case AMDGPUIntrinsic::SI_tid: + case AMDGPUIntrinsic::SI_fs_interp: + return true; + } +} + +static bool isArgPassedInSGPR(const Argument *A) { + const Function *F = A->getParent(); + unsigned ShaderType = AMDGPU::getShaderType(*F); + + // Arguments to compute shaders are never a source of divergence. + if (ShaderType == ShaderType::COMPUTE) + return true; + + // For non-compute shaders, SGPR inputs are marked with either inreg or byval. + if (F->getAttributes().hasAttribute(A->getArgNo() + 1, Attribute::InReg) || + F->getAttributes().hasAttribute(A->getArgNo() + 1, Attribute::ByVal)) + return true; + + // Everything else is in VGPRs. + return false; +} + +/// +/// \returns true if the result of the value could potentially be +/// different across workitems in a wavefront. +bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const { + + if (const Argument *A = dyn_cast(V)) + return !isArgPassedInSGPR(A); + + // Loads from the private address space are divergent, because threads + // can execute the load instruction with the same inputs and get different + // results. + // + // All other loads are not divergent, because if threads issue loads with the + // same arguments, they will always get the same result. + if (const LoadInst *Load = dyn_cast(V)) + return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; + + if (const IntrinsicInst *Intrinsic = dyn_cast(V)) { + const TargetMachine &TM = getTLI()->getTargetMachine(); + return isIntrinsicSourceOfDivergence(TM.getIntrinsicInfo(), Intrinsic); + } + + // Assume all function calls are a source of divergence. + if (isa(V) || isa(V)) + return true; + + return false; +} diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index dee0a69d1e68..976afb03443b 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -60,6 +60,11 @@ public: unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); unsigned getMaxInterleaveFactor(unsigned VF); + + unsigned getCFInstrCost(unsigned Opcode); + + int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); + bool isSourceOfDivergence(const Value *V) const; }; } // end namespace llvm diff --git a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp index d918ac3a5b3b..917efd149e00 100644 --- a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -185,7 +185,7 @@ protected: MachinePostDominatorTree *PDT; MachineLoopInfo *MLI; const R600InstrInfo *TII; - const AMDGPURegisterInfo *TRI; + const R600RegisterInfo *TRI; // PRINT FUNCTIONS /// Print the ordered Blocks. @@ -881,7 +881,7 @@ bool AMDGPUCFGStructurizer::run() { } //while, "one iteration" over the function. MachineBasicBlock *EntryMBB = - GraphTraits::nodes_begin(FuncRep); + &*GraphTraits::nodes_begin(FuncRep); if (EntryMBB->succ_size() == 0) { Finish = true; DEBUG( @@ -904,7 +904,7 @@ bool AMDGPUCFGStructurizer::run() { } while (!Finish && MakeProgress); // Misc wrap up to maintain the consistency of the Function representation. - wrapup(GraphTraits::nodes_begin(FuncRep)); + wrapup(&*GraphTraits::nodes_begin(FuncRep)); // Detach retired Block, release memory. for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end(); @@ -1164,7 +1164,7 @@ int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep, for (SmallVectorImpl::iterator It = ContMBB.begin(), E = ContMBB.end(); It != E; ++It) { - (*It)->removeSuccessor(LoopHeader); + (*It)->removeSuccessor(LoopHeader, true); } numLoopcontPatternMatch += NumCont; @@ -1353,7 +1353,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, // If MigrateTrue is true, then TrueBB is the block being "branched into" // and if MigrateFalse is true, then FalseBB is the block being // "branched into" - // + // // Here is the pseudo code for how I think the optimization should work: // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head. // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from. @@ -1372,7 +1372,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, // the late machine optimization passes, however if we implement // bool TargetRegisterInfo::requiresRegisterScavenging( // const MachineFunction &MF) - // and have it return true, liveness will be tracked correctly + // and have it return true, liveness will be tracked correctly // by generic optimization passes. We will also need to make sure that // all of our target-specific passes that run after regalloc and before // the CFGStructurizer track liveness and we will need to modify this pass @@ -1487,7 +1487,7 @@ void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, ); DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end()); - DstMBB->removeSuccessor(SrcMBB); + DstMBB->removeSuccessor(SrcMBB, true); cloneSuccessorList(DstMBB, SrcMBB); removeSuccessor(SrcMBB); @@ -1537,9 +1537,9 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, if (TrueMBB) { MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end()); - MBB->removeSuccessor(TrueMBB); + MBB->removeSuccessor(TrueMBB, true); if (LandMBB && TrueMBB->succ_size()!=0) - TrueMBB->removeSuccessor(LandMBB); + TrueMBB->removeSuccessor(LandMBB, true); retireBlock(TrueMBB); MLI->removeBlock(TrueMBB); } @@ -1548,9 +1548,9 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, insertInstrBefore(I, AMDGPU::ELSE); MBB->splice(I, FalseMBB, FalseMBB->begin(), FalseMBB->end()); - MBB->removeSuccessor(FalseMBB); + MBB->removeSuccessor(FalseMBB, true); if (LandMBB && FalseMBB->succ_size() != 0) - FalseMBB->removeSuccessor(LandMBB); + FalseMBB->removeSuccessor(LandMBB, true); retireBlock(FalseMBB); MLI->removeBlock(FalseMBB); } @@ -1570,8 +1570,7 @@ void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc()); insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc()); - DstBlk->addSuccessor(LandMBB); - DstBlk->removeSuccessor(DstBlk); + DstBlk->replaceSuccessor(DstBlk, LandMBB); } @@ -1592,7 +1591,7 @@ void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, //now branchInst can be erase safely BranchMI->eraseFromParent(); //now take care of successors, retire blocks - ExitingMBB->removeSuccessor(LandMBB); + ExitingMBB->removeSuccessor(LandMBB, true); } void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, @@ -1666,8 +1665,7 @@ AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB); //srcBlk, oldBlk, newBlk - PredMBB->removeSuccessor(MBB); - PredMBB->addSuccessor(CloneMBB); + PredMBB->replaceSuccessor(MBB, CloneMBB); // add all successor to cloneBlk cloneSuccessorList(CloneMBB, MBB); @@ -1695,10 +1693,7 @@ void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, ); SpliceEnd = SrcMBB->end(); } else { - DEBUG( - dbgs() << "migrateInstruction see branch instr\n" ; - BranchMI->dump(); - ); + DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI); SpliceEnd = BranchMI; } DEBUG( @@ -1711,7 +1706,7 @@ void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, DEBUG( dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size() - << "srcSize = " << SrcMBB->size() << "\n"; + << "srcSize = " << SrcMBB->size() << '\n'; ); } @@ -1743,7 +1738,7 @@ void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. while ((BranchMI = getLoopendBlockBranchInstr(MBB)) && isUncondBranch(BranchMI)) { - DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump();); + DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI); BranchMI->eraseFromParent(); } } @@ -1759,10 +1754,10 @@ void AMDGPUCFGStructurizer::removeRedundantConditionalBranch( MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); assert(BranchMI && isCondBranch(BranchMI)); - DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump();); + DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI); BranchMI->eraseFromParent(); SHOWNEWBLK(MBB1, "Removing redundant successor"); - MBB->removeSuccessor(MBB1); + MBB->removeSuccessor(MBB1, true); } void AMDGPUCFGStructurizer::addDummyExitBlock( diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 2018983bc306..d9f753f40133 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -28,7 +28,9 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -83,6 +85,7 @@ public: unsigned RegNo; int Modifiers; const MCRegisterInfo *TRI; + const MCSubtargetInfo *STI; bool IsForcedVOP3; }; @@ -102,7 +105,7 @@ public: } void addRegOperands(MCInst &Inst, unsigned N) const { - Inst.addOperand(MCOperand::createReg(getReg())); + Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), *Reg.STI))); } void addRegOrImmOperands(MCInst &Inst, unsigned N) const { @@ -215,6 +218,10 @@ public: (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)); } + bool isSCSrc64() const { + return (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)) || isInlineImm(); + } + bool isVCSrc32() const { return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID)); } @@ -251,7 +258,22 @@ public: return EndLoc; } - void print(raw_ostream &OS) const override { } + void print(raw_ostream &OS) const override { + switch (Kind) { + case Register: + OS << "'; + break; + case Immediate: + OS << getImm(); + break; + case Token: + OS << '\'' << getToken() << '\''; + break; + case Expression: + OS << "'; + break; + } + } static std::unique_ptr CreateImm(int64_t Val, SMLoc Loc, enum ImmTy Type = ImmTyNone, @@ -278,10 +300,12 @@ public: static std::unique_ptr CreateReg(unsigned RegNo, SMLoc S, SMLoc E, const MCRegisterInfo *TRI, + const MCSubtargetInfo *STI, bool ForceVOP3) { auto Op = llvm::make_unique(Register); Op->Reg.RegNo = RegNo; Op->Reg.TRI = TRI; + Op->Reg.STI = STI; Op->Reg.Modifiers = -1; Op->Reg.IsForcedVOP3 = ForceVOP3; Op->StartLoc = S; @@ -301,14 +325,32 @@ public: bool isDSOffset01() const; bool isSWaitCnt() const; bool isMubufOffset() const; + bool isSMRDOffset() const; + bool isSMRDLiteralOffset() const; }; class AMDGPUAsmParser : public MCTargetAsmParser { - MCSubtargetInfo &STI; const MCInstrInfo &MII; MCAsmParser &Parser; unsigned ForcedEncodingSize; + + bool isSI() const { + return AMDGPU::isSI(getSTI()); + } + + bool isCI() const { + return AMDGPU::isCI(getSTI()); + } + + bool isVI() const { + return AMDGPU::isVI(getSTI()); + } + + bool hasSGPR102_SGPR103() const { + return !isVI(); + } + /// @name Auto-generated Match Functions /// { @@ -323,20 +365,34 @@ private: bool ParseDirectiveHSACodeObjectISA(); bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); bool ParseDirectiveAMDKernelCodeT(); + bool ParseSectionDirectiveHSAText(); + bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; + bool ParseDirectiveAMDGPUHsaKernel(); + bool ParseDirectiveAMDGPUHsaModuleGlobal(); + bool ParseDirectiveAMDGPUHsaProgramGlobal(); + bool ParseSectionDirectiveHSADataGlobalAgent(); + bool ParseSectionDirectiveHSADataGlobalProgram(); + bool ParseSectionDirectiveHSARodataReadonlyAgent(); public: - AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser, +public: + enum AMDGPUMatchResultTy { + Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY + }; + + AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(STI), MII(MII), Parser(_Parser), - ForcedEncodingSize(0){ + : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser), + ForcedEncodingSize(0) { + MCAsmParserExtension::Initialize(Parser); - if (STI.getFeatureBits().none()) { + if (getSTI().getFeatureBits().none()) { // Set default features. - STI.ToggleFeature("SOUTHERN_ISLANDS"); + copySTI().ToggleFeature("SOUTHERN_ISLANDS"); } - setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); } AMDGPUTargetStreamer &getTargetStreamer() { @@ -420,10 +476,10 @@ struct OptionalOperand { } -static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) { +static int getRegClass(bool IsVgpr, unsigned RegWidth) { if (IsVgpr) { switch (RegWidth) { - default: llvm_unreachable("Unknown register width"); + default: return -1; case 1: return AMDGPU::VGPR_32RegClassID; case 2: return AMDGPU::VReg_64RegClassID; case 3: return AMDGPU::VReg_96RegClassID; @@ -434,7 +490,7 @@ static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) { } switch (RegWidth) { - default: llvm_unreachable("Unknown register width"); + default: return -1; case 1: return AMDGPU::SGPR_32RegClassID; case 2: return AMDGPU::SGPR_64RegClassID; case 4: return AMDGPU::SReg_128RegClassID; @@ -443,16 +499,16 @@ static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) { } } -static unsigned getRegForName(const StringRef &RegName) { +static unsigned getRegForName(StringRef RegName) { return StringSwitch(RegName) .Case("exec", AMDGPU::EXEC) .Case("vcc", AMDGPU::VCC) - .Case("flat_scr", AMDGPU::FLAT_SCR) + .Case("flat_scratch", AMDGPU::FLAT_SCR) .Case("m0", AMDGPU::M0) .Case("scc", AMDGPU::SCC) - .Case("flat_scr_lo", AMDGPU::FLAT_SCR_LO) - .Case("flat_scr_hi", AMDGPU::FLAT_SCR_HI) + .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) + .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) .Case("vcc_lo", AMDGPU::VCC_LO) .Case("vcc_hi", AMDGPU::VCC_HI) .Case("exec_lo", AMDGPU::EXEC_LO) @@ -464,12 +520,14 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &End const AsmToken Tok = Parser.getTok(); StartLoc = Tok.getLoc(); EndLoc = Tok.getEndLoc(); - const StringRef &RegName = Tok.getString(); + const MCRegisterInfo *TRI = getContext().getRegisterInfo(); + + StringRef RegName = Tok.getString(); RegNo = getRegForName(RegName); if (RegNo) { Parser.Lex(); - return false; + return !subtargetHasRegister(*TRI, RegNo); } // Match vgprs and sgprs @@ -514,16 +572,24 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &End RegIndexInClass = RegLo; } else { // SGPR registers are aligned. Max alignment is 4 dwords. - RegIndexInClass = RegLo / std::min(RegWidth, 4u); + unsigned Size = std::min(RegWidth, 4u); + if (RegLo % Size != 0) + return true; + + RegIndexInClass = RegLo / Size; } } - const MCRegisterInfo *TRC = getContext().getRegisterInfo(); - unsigned RC = getRegClass(IsVgpr, RegWidth); - if (RegIndexInClass > TRC->getRegClass(RC).getNumRegs()) + int RCID = getRegClass(IsVgpr, RegWidth); + if (RCID == -1) return true; - RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass); - return false; + + const MCRegisterClass RC = TRI->getRegClass(RCID); + if (RegIndexInClass >= RC.getNumRegs()) + return true; + + RegNo = RC.getRegister(RegIndexInClass); + return !subtargetHasRegister(*TRI, RegNo); } unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { @@ -534,6 +600,11 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3))) return Match_InvalidOperand; + if ((TSFlags & SIInstrFlags::VOP3) && + (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && + getForcedEncodingSize() != 64) + return Match_PreferE32; + return Match_Success; } @@ -549,7 +620,7 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, default: break; case Match_Success: Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst, STI); + Out.EmitInstruction(Inst, getSTI()); return false; case Match_MissingFeature: return Error(IDLoc, "instruction not supported on this GPU"); @@ -592,6 +663,9 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } return Error(ErrorLoc, "invalid operand for instruction"); } + case Match_PreferE32: + return Error(IDLoc, "internal error: instruction without _e64 suffix " + "should be encoded as e32"); } llvm_unreachable("Implement any new match types added!"); } @@ -640,7 +714,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { // If this directive has no arguments, then use the ISA version for the // targeted GPU. if (getLexer().is(AsmToken::EndOfStatement)) { - AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(STI.getFeatureBits()); + AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits()); getTargetStreamer().EmitDirectiveHSACodeObjectISA(Isa.Major, Isa.Minor, Isa.Stepping, "AMD", "AMDGPU"); @@ -852,7 +926,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { amd_kernel_code_t Header; - AMDGPU::initDefaultAMDKernelCodeT(Header, STI.getFeatureBits()); + AMDGPU::initDefaultAMDKernelCodeT(Header, getSTI().getFeatureBits()); while (true) { @@ -882,6 +956,64 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { return false; } +bool AMDGPUAsmParser::ParseSectionDirectiveHSAText() { + getParser().getStreamer().SwitchSection( + AMDGPU::getHSATextSection(getContext())); + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { + if (getLexer().isNot(AsmToken::Identifier)) + return TokError("expected symbol name"); + + StringRef KernelName = Parser.getTok().getString(); + + getTargetStreamer().EmitAMDGPUSymbolType(KernelName, + ELF::STT_AMDGPU_HSA_KERNEL); + Lex(); + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaModuleGlobal() { + if (getLexer().isNot(AsmToken::Identifier)) + return TokError("expected symbol name"); + + StringRef GlobalName = Parser.getTok().getIdentifier(); + + getTargetStreamer().EmitAMDGPUHsaModuleScopeGlobal(GlobalName); + Lex(); + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaProgramGlobal() { + if (getLexer().isNot(AsmToken::Identifier)) + return TokError("expected symbol name"); + + StringRef GlobalName = Parser.getTok().getIdentifier(); + + getTargetStreamer().EmitAMDGPUHsaProgramScopeGlobal(GlobalName); + Lex(); + return false; +} + +bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalAgent() { + getParser().getStreamer().SwitchSection( + AMDGPU::getHSADataGlobalAgentSection(getContext())); + return false; +} + +bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalProgram() { + getParser().getStreamer().SwitchSection( + AMDGPU::getHSADataGlobalProgramSection(getContext())); + return false; +} + +bool AMDGPUAsmParser::ParseSectionDirectiveHSARodataReadonlyAgent() { + getParser().getStreamer().SwitchSection( + AMDGPU::getHSARodataReadonlyAgentSection(getContext())); + return false; +} + bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); @@ -894,6 +1026,55 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".amd_kernel_code_t") return ParseDirectiveAMDKernelCodeT(); + if (IDVal == ".hsatext" || IDVal == ".text") + return ParseSectionDirectiveHSAText(); + + if (IDVal == ".amdgpu_hsa_kernel") + return ParseDirectiveAMDGPUHsaKernel(); + + if (IDVal == ".amdgpu_hsa_module_global") + return ParseDirectiveAMDGPUHsaModuleGlobal(); + + if (IDVal == ".amdgpu_hsa_program_global") + return ParseDirectiveAMDGPUHsaProgramGlobal(); + + if (IDVal == ".hsadata_global_agent") + return ParseSectionDirectiveHSADataGlobalAgent(); + + if (IDVal == ".hsadata_global_program") + return ParseSectionDirectiveHSADataGlobalProgram(); + + if (IDVal == ".hsarodata_readonly_agent") + return ParseSectionDirectiveHSARodataReadonlyAgent(); + + return true; +} + +bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, + unsigned RegNo) const { + if (isCI()) + return true; + + if (isSI()) { + // No flat_scr + switch (RegNo) { + case AMDGPU::FLAT_SCR: + case AMDGPU::FLAT_SCR_LO: + case AMDGPU::FLAT_SCR_HI: + return false; + default: + return true; + } + } + + // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that + // SI/CI have. + for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); + R.isValid(); ++R) { + if (*R == RegNo) + return false; + } + return true; } @@ -943,13 +1124,11 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { int64_t IntVal; if (getParser().parseAbsoluteExpression(IntVal)) return MatchOperand_ParseFail; - APInt IntVal32(32, IntVal); - if (IntVal32.getSExtValue() != IntVal) { + if (!isInt<32>(IntVal) && !isUInt<32>(IntVal)) { Error(S, "invalid immediate: only 32-bit values are legal"); return MatchOperand_ParseFail; } - IntVal = IntVal32.getSExtValue(); if (Negate) IntVal *= -1; Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S)); @@ -1002,7 +1181,7 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { Operands.push_back(AMDGPUOperand::CreateReg( - RegNo, S, E, getContext().getRegisterInfo(), + RegNo, S, E, getContext().getRegisterInfo(), &getSTI(), isForcedVOP3())); if (HasModifiers || Modifiers) { @@ -1570,6 +1749,23 @@ AMDGPUAsmParser::parseR128(OperandVector &Operands) { return parseNamedBit("r128", Operands); } +//===----------------------------------------------------------------------===// +// smrd +//===----------------------------------------------------------------------===// + +bool AMDGPUOperand::isSMRDOffset() const { + + // FIXME: Support 20-bit offsets on VI. We need to to pass subtarget + // information here. + return isImm() && isUInt<8>(getImm()); +} + +bool AMDGPUOperand::isSMRDLiteralOffset() const { + // 32-bit literals are only supported on CI and we only want to use them + // when the offset is > 8-bits. + return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); +} + //===----------------------------------------------------------------------===// // vop3 //===----------------------------------------------------------------------===// @@ -1653,8 +1849,12 @@ AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) { } void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { - ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); - unsigned i = 2; + + unsigned i = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + if (Desc.getNumDefs() > 0) { + ((AMDGPUOperand &)*Operands[i++]).addRegOperands(Inst, 1); + } std::map OptionalIdx; diff --git a/lib/Target/AMDGPU/CIInstructions.td b/lib/Target/AMDGPU/CIInstructions.td index 2f5fdbe92078..88a090d3df35 100644 --- a/lib/Target/AMDGPU/CIInstructions.td +++ b/lib/Target/AMDGPU/CIInstructions.td @@ -8,6 +8,22 @@ //===----------------------------------------------------------------------===// // Instruction definitions for CI and newer. //===----------------------------------------------------------------------===// +// Remaining instructions: +// S_CBRANCH_CDBGUSER +// S_CBRANCH_CDBGSYS +// S_CBRANCH_CDBGSYS_OR_USER +// S_CBRANCH_CDBGSYS_AND_USER +// DS_NOP +// DS_GWS_SEMA_RELEASE_ALL +// DS_WRAP_RTN_B32 +// DS_CNDXCHG32_RTN_B64 +// DS_WRITE_B96 +// DS_WRITE_B128 +// DS_CONDXCHG32_RTN_B128 +// DS_READ_B96 +// DS_READ_B128 +// BUFFER_LOAD_DWORDX3 +// BUFFER_STORE_DWORDX3 def isCIVI : Predicate < @@ -23,6 +39,7 @@ def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">; let SubtargetPredicate = isCIVI in { +let SchedRW = [WriteDoubleAdd] in { defm V_TRUNC_F64 : VOP1Inst , "v_trunc_f64", VOP_F64_F64, ftrunc >; @@ -35,82 +52,218 @@ defm V_FLOOR_F64 : VOP1Inst , "v_floor_f64", defm V_RNDNE_F64 : VOP1Inst , "v_rndne_f64", VOP_F64_F64, frint >; +} // End SchedRW = [WriteDoubleAdd] + +let SchedRW = [WriteQuarterRate32] in { defm V_LOG_LEGACY_F32 : VOP1Inst , "v_log_legacy_f32", VOP_F32_F32 >; defm V_EXP_LEGACY_F32 : VOP1Inst , "v_exp_legacy_f32", VOP_F32_F32 >; +} // End SchedRW = [WriteQuarterRate32] + +//===----------------------------------------------------------------------===// +// VOP3 Instructions +//===----------------------------------------------------------------------===// + +defm V_QSAD_PK_U16_U8 : VOP3Inst , "v_qsad_pk_u16_u8", + VOP_I32_I32_I32 +>; +defm V_MQSAD_U16_U8 : VOP3Inst , "v_mqsad_u16_u8", + VOP_I32_I32_I32 +>; +defm V_MQSAD_U32_U8 : VOP3Inst , "v_mqsad_u32_u8", + VOP_I32_I32_I32 +>; + +let isCommutable = 1 in { +defm V_MAD_U64_U32 : VOP3Inst , "v_mad_u64_u32", + VOP_I64_I32_I32_I64 +>; + +// XXX - Does this set VCC? +defm V_MAD_I64_I32 : VOP3Inst , "v_mad_i64_i32", + VOP_I64_I32_I32_I64 +>; +} // End isCommutable = 1 + + +//===----------------------------------------------------------------------===// +// DS Instructions +//===----------------------------------------------------------------------===// +defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">; + +// DS_CONDXCHG32_RTN_B64 +// DS_CONDXCHG32_RTN_B128 + +//===----------------------------------------------------------------------===// +// SMRD Instructions +//===----------------------------------------------------------------------===// + +defm S_DCACHE_INV_VOL : SMRD_Inval , + "s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>; + +//===----------------------------------------------------------------------===// +// MUBUF Instructions +//===----------------------------------------------------------------------===// + +defm BUFFER_WBINVL1_VOL : MUBUF_Invalidate , + "buffer_wbinvl1_vol", int_amdgcn_buffer_wbinvl1_vol +>; //===----------------------------------------------------------------------===// // Flat Instructions //===----------------------------------------------------------------------===// -def FLAT_LOAD_UBYTE : FLAT_Load_Helper <0x8, "flat_load_ubyte", VGPR_32>; -def FLAT_LOAD_SBYTE : FLAT_Load_Helper <0x9, "flat_load_sbyte", VGPR_32>; -def FLAT_LOAD_USHORT : FLAT_Load_Helper <0xa, "flat_load_ushort", VGPR_32>; -def FLAT_LOAD_SSHORT : FLAT_Load_Helper <0xb, "flat_load_sshort", VGPR_32>; -def FLAT_LOAD_DWORD : FLAT_Load_Helper <0xc, "flat_load_dword", VGPR_32>; -def FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <0xd, "flat_load_dwordx2", VReg_64>; -def FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <0xe, "flat_load_dwordx4", VReg_128>; -def FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <0xf, "flat_load_dwordx3", VReg_96>; -def FLAT_STORE_BYTE : FLAT_Store_Helper <0x18, "flat_store_byte", VGPR_32>; -def FLAT_STORE_SHORT : FLAT_Store_Helper <0x1a, "flat_store_short", VGPR_32>; -def FLAT_STORE_DWORD : FLAT_Store_Helper <0x1c, "flat_store_dword", VGPR_32>; -def FLAT_STORE_DWORDX2 : FLAT_Store_Helper < - 0x1d, "flat_store_dwordx2", VReg_64 +defm FLAT_LOAD_UBYTE : FLAT_Load_Helper < + flat<0x8, 0x10>, "flat_load_ubyte", VGPR_32 >; -def FLAT_STORE_DWORDX4 : FLAT_Store_Helper < - 0x1e, "flat_store_dwordx4", VReg_128 +defm FLAT_LOAD_SBYTE : FLAT_Load_Helper < + flat<0x9, 0x11>, "flat_load_sbyte", VGPR_32 >; -def FLAT_STORE_DWORDX3 : FLAT_Store_Helper < - 0x1f, "flat_store_dwordx3", VReg_96 +defm FLAT_LOAD_USHORT : FLAT_Load_Helper < + flat<0xa, 0x12>, "flat_load_ushort", VGPR_32 +>; +defm FLAT_LOAD_SSHORT : FLAT_Load_Helper < + flat<0xb, 0x13>, "flat_load_sshort", VGPR_32> +; +defm FLAT_LOAD_DWORD : FLAT_Load_Helper < + flat<0xc, 0x14>, "flat_load_dword", VGPR_32 +>; +defm FLAT_LOAD_DWORDX2 : FLAT_Load_Helper < + flat<0xd, 0x15>, "flat_load_dwordx2", VReg_64 +>; +defm FLAT_LOAD_DWORDX4 : FLAT_Load_Helper < + flat<0xe, 0x17>, "flat_load_dwordx4", VReg_128 +>; +defm FLAT_LOAD_DWORDX3 : FLAT_Load_Helper < + flat<0xf, 0x16>, "flat_load_dwordx3", VReg_96 +>; +defm FLAT_STORE_BYTE : FLAT_Store_Helper < + flat<0x18>, "flat_store_byte", VGPR_32 +>; +defm FLAT_STORE_SHORT : FLAT_Store_Helper < + flat <0x1a>, "flat_store_short", VGPR_32 +>; +defm FLAT_STORE_DWORD : FLAT_Store_Helper < + flat<0x1c>, "flat_store_dword", VGPR_32 +>; +defm FLAT_STORE_DWORDX2 : FLAT_Store_Helper < + flat<0x1d>, "flat_store_dwordx2", VReg_64 +>; +defm FLAT_STORE_DWORDX4 : FLAT_Store_Helper < + flat<0x1e, 0x1f>, "flat_store_dwordx4", VReg_128 +>; +defm FLAT_STORE_DWORDX3 : FLAT_Store_Helper < + flat<0x1f, 0x1e>, "flat_store_dwordx3", VReg_96 +>; +defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC < + flat<0x30, 0x40>, "flat_atomic_swap", VGPR_32 >; -defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC <0x30, "flat_atomic_swap", VGPR_32>; defm FLAT_ATOMIC_CMPSWAP : FLAT_ATOMIC < - 0x31, "flat_atomic_cmpswap", VGPR_32, VReg_64 + flat<0x31, 0x41>, "flat_atomic_cmpswap", VGPR_32, VReg_64 >; -defm FLAT_ATOMIC_ADD : FLAT_ATOMIC <0x32, "flat_atomic_add", VGPR_32>; -defm FLAT_ATOMIC_SUB : FLAT_ATOMIC <0x33, "flat_atomic_sub", VGPR_32>; -defm FLAT_ATOMIC_RSUB : FLAT_ATOMIC <0x34, "flat_atomic_rsub", VGPR_32>; -defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC <0x35, "flat_atomic_smin", VGPR_32>; -defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC <0x36, "flat_atomic_umin", VGPR_32>; -defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC <0x37, "flat_atomic_smax", VGPR_32>; -defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC <0x38, "flat_atomic_umax", VGPR_32>; -defm FLAT_ATOMIC_AND : FLAT_ATOMIC <0x39, "flat_atomic_and", VGPR_32>; -defm FLAT_ATOMIC_OR : FLAT_ATOMIC <0x3a, "flat_atomic_or", VGPR_32>; -defm FLAT_ATOMIC_XOR : FLAT_ATOMIC <0x3b, "flat_atomic_xor", VGPR_32>; -defm FLAT_ATOMIC_INC : FLAT_ATOMIC <0x3c, "flat_atomic_inc", VGPR_32>; -defm FLAT_ATOMIC_DEC : FLAT_ATOMIC <0x3d, "flat_atomic_dec", VGPR_32>; -defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC < - 0x3e, "flat_atomic_fcmpswap", VGPR_32, VReg_64 +defm FLAT_ATOMIC_ADD : FLAT_ATOMIC < + flat<0x32, 0x42>, "flat_atomic_add", VGPR_32 +>; +defm FLAT_ATOMIC_SUB : FLAT_ATOMIC < + flat<0x33, 0x43>, "flat_atomic_sub", VGPR_32 +>; +defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC < + flat<0x35, 0x44>, "flat_atomic_smin", VGPR_32 +>; +defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC < + flat<0x36, 0x45>, "flat_atomic_umin", VGPR_32 +>; +defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC < + flat<0x37, 0x46>, "flat_atomic_smax", VGPR_32 +>; +defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC < + flat<0x38, 0x47>, "flat_atomic_umax", VGPR_32 +>; +defm FLAT_ATOMIC_AND : FLAT_ATOMIC < + flat<0x39, 0x48>, "flat_atomic_and", VGPR_32 +>; +defm FLAT_ATOMIC_OR : FLAT_ATOMIC < + flat<0x3a, 0x49>, "flat_atomic_or", VGPR_32 +>; +defm FLAT_ATOMIC_XOR : FLAT_ATOMIC < + flat<0x3b, 0x4a>, "flat_atomic_xor", VGPR_32 +>; +defm FLAT_ATOMIC_INC : FLAT_ATOMIC < + flat<0x3c, 0x4b>, "flat_atomic_inc", VGPR_32 +>; +defm FLAT_ATOMIC_DEC : FLAT_ATOMIC < + flat<0x3d, 0x4c>, "flat_atomic_dec", VGPR_32 +>; +defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC < + flat<0x50, 0x60>, "flat_atomic_swap_x2", VReg_64 >; -defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC <0x3f, "flat_atomic_fmin", VGPR_32>; -defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC <0x40, "flat_atomic_fmax", VGPR_32>; -defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC <0x50, "flat_atomic_swap_x2", VReg_64>; defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_ATOMIC < - 0x51, "flat_atomic_cmpswap_x2", VReg_64, VReg_128 + flat<0x51, 0x61>, "flat_atomic_cmpswap_x2", VReg_64, VReg_128 >; -defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC <0x52, "flat_atomic_add_x2", VReg_64>; -defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC <0x53, "flat_atomic_sub_x2", VReg_64>; -defm FLAT_ATOMIC_RSUB_X2 : FLAT_ATOMIC <0x54, "flat_atomic_rsub_x2", VReg_64>; -defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC <0x55, "flat_atomic_smin_x2", VReg_64>; -defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC <0x56, "flat_atomic_umin_x2", VReg_64>; -defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC <0x57, "flat_atomic_smax_x2", VReg_64>; -defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC <0x58, "flat_atomic_umax_x2", VReg_64>; -defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC <0x59, "flat_atomic_and_x2", VReg_64>; -defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC <0x5a, "flat_atomic_or_x2", VReg_64>; -defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC <0x5b, "flat_atomic_xor_x2", VReg_64>; -defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC <0x5c, "flat_atomic_inc_x2", VReg_64>; -defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC <0x5d, "flat_atomic_dec_x2", VReg_64>; -defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC < - 0x5e, "flat_atomic_fcmpswap_x2", VReg_64, VReg_128 +defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC < + flat<0x52, 0x62>, "flat_atomic_add_x2", VReg_64 +>; +defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC < + flat<0x53, 0x63>, "flat_atomic_sub_x2", VReg_64 +>; +defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC < + flat<0x55, 0x64>, "flat_atomic_smin_x2", VReg_64 +>; +defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC < + flat<0x56, 0x65>, "flat_atomic_umin_x2", VReg_64 +>; +defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC < + flat<0x57, 0x66>, "flat_atomic_smax_x2", VReg_64 +>; +defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC < + flat<0x58, 0x67>, "flat_atomic_umax_x2", VReg_64 +>; +defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC < + flat<0x59, 0x68>, "flat_atomic_and_x2", VReg_64 +>; +defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC < + flat<0x5a, 0x69>, "flat_atomic_or_x2", VReg_64 +>; +defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC < + flat<0x5b, 0x6a>, "flat_atomic_xor_x2", VReg_64 +>; +defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC < + flat<0x5c, 0x6b>, "flat_atomic_inc_x2", VReg_64 +>; +defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC < + flat<0x5d, 0x6c>, "flat_atomic_dec_x2", VReg_64 >; -defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC <0x5f, "flat_atomic_fmin_x2", VReg_64>; -defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <0x60, "flat_atomic_fmax_x2", VReg_64>; } // End SubtargetPredicate = isCIVI +// CI Only flat instructions + +let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst in { + +defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC < + flat<0x3e>, "flat_atomic_fcmpswap", VGPR_32, VReg_64 +>; +defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC < + flat<0x3f>, "flat_atomic_fmin", VGPR_32 +>; +defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC < + flat<0x40>, "flat_atomic_fmax", VGPR_32 +>; +defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC < + flat<0x5e>, "flat_atomic_fcmpswap_x2", VReg_64, VReg_128 +>; +defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC < + flat<0x5f>, "flat_atomic_fmin_x2", VReg_64 +>; +defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC < + flat<0x60>, "flat_atomic_fmax_x2", VReg_64 +>; + +} // End let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst + //===----------------------------------------------------------------------===// // Flat Patterns //===----------------------------------------------------------------------===// @@ -147,3 +300,80 @@ def : FLATStore_Pattern ; } // End HasFlatAddressSpace predicate +let Predicates = [isCI] in { + +// Convert (x - floor(x)) to fract(x) +def : Pat < + (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)), + (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))), + (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +// Convert (x + (-floor(x))) to fract(x) +def : Pat < + (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)), + (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))), + (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) +>; + +} // End Predicates = [isCI] + + +//===----------------------------------------------------------------------===// +// Patterns to generate flat for global +//===----------------------------------------------------------------------===// + +def useFlatForGlobal : Predicate < + "Subtarget->useFlatForGlobal() || " + "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">; + +let Predicates = [useFlatForGlobal] in { + +// 1. Offset as 20bit DWORD immediate +def : Pat < + (SIload_constant v4i32:$sbase, IMM20bit:$offset), + (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset)) +>; + +// Patterns for global loads with no offset +class FlatLoadPat : Pat < + (vt (node i64:$addr)), + (inst $addr, 0, 0, 0) +>; + +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; + +class FlatStorePat : Pat < + (node vt:$data, i64:$addr), + (inst $data, $addr, 0, 0, 0) +>; + +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; + +class FlatAtomicPat : Pat < + (vt (node i64:$addr, vt:$data)), + (inst $addr, $data, 0, 0) +>; + +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; +def : FlatAtomicPat ; + +} // End Predicates = [useFlatForGlobal] diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt index 9460bf6b9338..30bb0e0adde8 100644 --- a/lib/Target/AMDGPU/CMakeLists.txt +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -15,12 +15,17 @@ add_public_tablegen_target(AMDGPUCommonTableGen) add_llvm_target(AMDGPUCodeGen AMDILCFGStructurizer.cpp AMDGPUAlwaysInlinePass.cpp + AMDGPUAnnotateKernelFeatures.cpp + AMDGPUAnnotateUniformValues.cpp AMDGPUAsmPrinter.cpp + AMDGPUDiagnosticInfoUnsupported.cpp AMDGPUFrameLowering.cpp + AMDGPUTargetObjectFile.cpp AMDGPUIntrinsicInfo.cpp AMDGPUISelDAGToDAG.cpp AMDGPUMCInstLower.cpp AMDGPUMachineFunction.cpp + AMDGPUOpenCLImageTypeLoweringPass.cpp AMDGPUSubtarget.cpp AMDGPUTargetMachine.cpp AMDGPUTargetTransformInfo.cpp @@ -45,6 +50,7 @@ add_llvm_target(AMDGPUCodeGen SIFixSGPRCopies.cpp SIFixSGPRLiveRanges.cpp SIFoldOperands.cpp + SIFrameLowering.cpp SIInsertWaits.cpp SIInstrInfo.cpp SIISelLowering.cpp @@ -52,7 +58,6 @@ add_llvm_target(AMDGPUCodeGen SILowerControlFlow.cpp SILowerI1Copies.cpp SIMachineFunctionInfo.cpp - SIPrepareScratchRegs.cpp SIRegisterInfo.cpp SIShrinkInstructions.cpp SITypeRewriter.cpp diff --git a/lib/Target/AMDGPU/CaymanInstructions.td b/lib/Target/AMDGPU/CaymanInstructions.td index ba4df82a6d37..a6c3785c815b 100644 --- a/lib/Target/AMDGPU/CaymanInstructions.td +++ b/lib/Target/AMDGPU/CaymanInstructions.td @@ -82,6 +82,10 @@ def RAT_STORE_DWORD32 : RAT_STORE_DWORD ; def RAT_STORE_DWORD64 : RAT_STORE_DWORD ; def RAT_STORE_DWORD128 : RAT_STORE_DWORD ; +def RAT_STORE_TYPED_cm: CF_MEM_RAT_STORE_TYPED<0> { + let eop = 0; // This bit is not used on Cayman. +} + class VTX_READ_cm buffer_id, dag outs, list pattern> : VTX_WORD0_cm, VTX_READ { diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td index 7adcd46fe196..779a14e95d22 100644 --- a/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/lib/Target/AMDGPU/EvergreenInstructions.td @@ -40,6 +40,15 @@ class CF_MEM_RAT rat_inst, bits<4> rat_id, dag ins, string name, : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins, "MEM_RAT "#name, pattern>; +class CF_MEM_RAT_STORE_TYPED has_eop> + : CF_MEM_RAT <0x1, ?, (ins R600_Reg128:$rw_gpr, R600_Reg128:$index_gpr, + i32imm:$rat_id, InstFlag:$eop), + "STORE_TYPED RAT($rat_id) $rw_gpr, $index_gpr" + #!if(has_eop, ", $eop", ""), + [(int_r600_rat_store_typed R600_Reg128:$rw_gpr, + R600_Reg128:$index_gpr, + (i32 imm:$rat_id))]>; + def RAT_MSKOR : CF_MEM_RAT <0x11, 0, (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr), "MSKOR $rw_gpr.XW, $index_gpr", @@ -105,6 +114,8 @@ def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf, [(global_store v4i32:$rw_gpr, i32:$index_gpr)] >; +def RAT_STORE_TYPED_eg: CF_MEM_RAT_STORE_TYPED<1>; + } // End usesCustomInserter = 1 class VTX_READ_eg buffer_id, dag outs, list pattern> diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index e811d5cff221..a187de88f639 100644 --- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -283,8 +284,13 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) { O << "4.0"; else if (Imm == DoubleToBits(-4.0)) O << "-4.0"; - else - llvm_unreachable("64-bit literal constants not supported"); + else { + assert(isUInt<32>(Imm)); + + // In rare situations, we will have a 32-bit literal in a 64-bit + // operand. This is technically allowed for the encoding of s_mov_b64. + O << formatHex(static_cast(Imm)); + } } void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, @@ -592,11 +598,11 @@ void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo, } else { unsigned Stream = (SImm16 >> 8) & 0x3; if (Op == 1) - O << "cut"; + O << "cut"; else if (Op == 2) - O << "emit"; + O << "emit"; else if (Op == 3) - O << "emit-cut"; + O << "emit-cut"; O << " stream " << Stream; } O << "), [m0] "; diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h index 14fb511e9232..90541d86132d 100644 --- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -13,9 +13,7 @@ #ifndef LLVM_LIB_TARGET_R600_INSTPRINTER_AMDGPUINSTPRINTER_H #define LLVM_LIB_TARGET_R600_INSTPRINTER_AMDGPUINSTPRINTER_H -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCInstPrinter.h" -#include "llvm/Support/raw_ostream.h" namespace llvm { diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 4434d9b119c6..60e8c8f3d303 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -99,14 +99,22 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, case AMDGPU::fixup_si_rodata: { uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset()); - *Dst = Value; - break; - } - - case AMDGPU::fixup_si_end_of_text: { - uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset()); - // The value points to the last instruction in the text section, so we - // need to add 4 bytes to get to the start of the constants. + // We emit constant data at the end of the text section and generate its + // address using the following code sequence: + // s_getpc_b64 s[0:1] + // s_add_u32 s0, s0, $symbol + // s_addc_u32 s1, s1, 0 + // + // s_getpc_b64 returns the address of the s_add_u32 instruction and then + // the fixup replaces $symbol with a literal constant, which is a + // pc-relative offset from the encoding of the $symbol operand to the + // constant data. + // + // What we want here is an offset from the start of the s_add_u32 + // instruction to the constant data, but since the encoding of $symbol + // starts 4 bytes after the start of the add instruction, we end up + // with an offset that is 4 bytes too small. This requires us to + // add 4 to the fixup value before applying it. *Dst = Value + 4; break; } @@ -136,8 +144,7 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo( const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = { // name offset bits flags { "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_si_rodata", 0, 32, 0 }, - { "fixup_si_end_of_text", 0, 32, MCFixupKindInfo::FKF_IsPCRel } + { "fixup_si_rodata", 0, 32, MCFixupKindInfo::FKF_IsPCRel } }; if (Kind < FirstTargetFixupKind) diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp new file mode 100644 index 000000000000..9ff9fe794d2b --- /dev/null +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp @@ -0,0 +1,26 @@ +//===-------- AMDGPUELFStreamer.cpp - ELF Object Output -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUELFStreamer.h" +#include "Utils/AMDGPUBaseInfo.h" + +using namespace llvm; + +void AMDGPUELFStreamer::InitSections(bool NoExecStack) { + // Start with the .hsatext section by default. + SwitchSection(AMDGPU::getHSATextSection(getContext())); +} + +MCELFStreamer *llvm::createAMDGPUELFStreamer(MCContext &Context, + MCAsmBackend &MAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll) { + return new AMDGPUELFStreamer(Context, MAB, OS, Emitter); +} diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h new file mode 100644 index 000000000000..488d7e74d741 --- /dev/null +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h @@ -0,0 +1,40 @@ +//===-------- AMDGPUELFStreamer.h - ELF Object Output ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a custom MCELFStreamer which allows us to insert some hooks before +// emitting data into an actual object file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUELFSTREAMER_H +#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUELFSTREAMER_H + +#include "llvm/MC/MCELFStreamer.h" + +namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCSubtargetInfo; + +class AMDGPUELFStreamer : public MCELFStreamer { +public: + AMDGPUELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS, + MCCodeEmitter *Emitter) + : MCELFStreamer(Context, MAB, OS, Emitter) { } + + virtual void InitSections(bool NoExecStac) override; +}; + +MCELFStreamer *createAMDGPUELFStreamer(MCContext &Context, MCAsmBackend &MAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll); +} // namespace llvm. + +#endif diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h index 01021d67ffd9..59a9178082f6 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h @@ -21,9 +21,6 @@ enum Fixups { /// fixup for global addresses with constant initializers fixup_si_rodata, - /// fixup for offset from instruction to end of text section - fixup_si_end_of_text, - // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 028a86dfc7ad..68b1d1ae83cc 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -22,13 +22,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() { InlineAsmEnd = ";#ASMEND"; //===--- Data Emission Directives -------------------------------------===// - ZeroDirective = ".zero"; - AsciiDirective = ".ascii\t"; - AscizDirective = ".asciz\t"; - Data8bitsDirective = ".byte\t"; - Data16bitsDirective = ".short\t"; - Data32bitsDirective = ".long\t"; - Data64bitsDirective = ".quad\t"; SunStyleELFSectionSwitchSyntax = true; UsesELFSectionDirectiveForBSS = true; @@ -41,3 +34,10 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() { //===--- Dwarf Emission Directives -----------------------------------===// SupportsDebugInformation = true; } + +bool AMDGPUMCAsmInfo::shouldOmitSectionDirective(StringRef SectionName) const { + return SectionName == ".hsatext" || SectionName == ".hsadata_global_agent" || + SectionName == ".hsadata_global_program" || + SectionName == ".hsarodata_readonly_agent" || + MCAsmInfo::shouldOmitSectionDirective(SectionName); +} diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h index a5bac51e356f..a546961705d7 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h @@ -21,12 +21,13 @@ class Triple; // If you need to create another MCAsmInfo class, which inherits from MCAsmInfo, // you will need to make sure your new class sets PrivateGlobalPrefix to -// a prefix that won't appeary in a fuction name. The default value +// a prefix that won't appear in a function name. The default value // for PrivateGlobalPrefix is 'L', so it will consider any function starting // with 'L' as a local symbol. class AMDGPUMCAsmInfo : public MCAsmInfoELF { public: explicit AMDGPUMCAsmInfo(const Triple &TT); + bool shouldOmitSectionDirective(StringRef SectionName) const override; }; } // namespace llvm #endif diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp index c709741f3777..f70409470276 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMCTargetDesc.h" +#include "AMDGPUELFStreamer.h" #include "AMDGPUMCAsmInfo.h" #include "AMDGPUTargetStreamer.h" #include "InstPrinter/AMDGPUInstPrinter.h" @@ -85,6 +86,15 @@ static MCTargetStreamer * createAMDGPUObjectTargetStreamer( return new AMDGPUTargetELFStreamer(S); } +static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, + MCAsmBackend &MAB, raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll) { + if (T.getOS() == Triple::AMDHSA) + return createAMDGPUELFStreamer(Context, MAB, OS, Emitter, RelaxAll); + + return createELFStreamer(Context, MAB, OS, Emitter, RelaxAll); +} + extern "C" void LLVMInitializeAMDGPUTargetMC() { for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) { RegisterMCAsmInfo X(*T); @@ -95,6 +105,7 @@ extern "C" void LLVMInitializeAMDGPUTargetMC() { TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo); TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter); TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend); + TargetRegistry::RegisterELFStreamer(*T, createMCStreamer); } // R600 specific registration diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 09e6cb1f1ffc..b91134d2ee9b 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -13,6 +13,7 @@ #include "AMDGPUTargetStreamer.h" #include "SIDefines.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFStreamer.h" @@ -220,6 +221,26 @@ AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { } +void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, + unsigned Type) { + switch (Type) { + default: llvm_unreachable("Invalid AMDGPU symbol type"); + case ELF::STT_AMDGPU_HSA_KERNEL: + OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ; + break; + } +} + +void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal( + StringRef GlobalName) { + OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n'; +} + +void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal( + StringRef GlobalName) { + OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n'; +} + //===----------------------------------------------------------------------===// // AMDGPUTargetELFStreamer //===----------------------------------------------------------------------===// @@ -291,7 +312,35 @@ AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { MCStreamer &OS = getStreamer(); OS.PushSection(); - OS.SwitchSection(OS.getContext().getObjectFileInfo()->getTextSection()); + // The MCObjectFileInfo that is available to the assembler is a generic + // implementation and not AMDGPUHSATargetObjectFile, so we can't use + // MCObjectFileInfo::getTextSection() here for fetching the HSATextSection. + OS.SwitchSection(AMDGPU::getHSATextSection(OS.getContext())); OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header))); OS.PopSection(); } + +void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, + unsigned Type) { + MCSymbolELF *Symbol = cast( + getStreamer().getContext().getOrCreateSymbol(SymbolName)); + Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL); +} + +void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal( + StringRef GlobalName) { + + MCSymbolELF *Symbol = cast( + getStreamer().getContext().getOrCreateSymbol(GlobalName)); + Symbol->setType(ELF::STT_OBJECT); + Symbol->setBinding(ELF::STB_LOCAL); +} + +void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal( + StringRef GlobalName) { + + MCSymbolELF *Symbol = cast( + getStreamer().getContext().getOrCreateSymbol(GlobalName)); + Symbol->setType(ELF::STT_OBJECT); + Symbol->setBinding(ELF::STB_GLOBAL); +} diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index d37677c6b863..83bb728f541c 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -7,6 +7,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUTARGETSTREAMER_H +#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUTARGETSTREAMER_H + #include "AMDKernelCodeT.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" @@ -27,6 +30,12 @@ public: StringRef ArchName) = 0; virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) = 0; + + virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0; + + virtual void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) = 0; + + virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0; }; class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer { @@ -41,6 +50,12 @@ public: StringRef ArchName) override; void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override; + + void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; + + void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override; + + void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; }; class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer { @@ -72,6 +87,12 @@ public: void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override; + void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; + + void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override; + + void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; }; } +#endif diff --git a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt index 8306a051ff98..c823ee7e0080 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_library(LLVMAMDGPUDesc AMDGPUAsmBackend.cpp AMDGPUELFObjectWriter.cpp + AMDGPUELFStreamer.cpp AMDGPUMCCodeEmitter.cpp AMDGPUMCTargetDesc.cpp AMDGPUMCAsmInfo.cpp diff --git a/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt index 4217bb362975..aa9a02198d04 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = AMDGPUDesc parent = AMDGPU -required_libraries = MC AMDGPUAsmPrinter AMDGPUInfo Support +required_libraries = MC AMDGPUAsmPrinter AMDGPUInfo AMDGPUUtils Support add_to_library_groups = AMDGPU diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index e683498d52a5..3c1142dd664b 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -37,7 +37,6 @@ class R600MCCodeEmitter : public AMDGPUMCCodeEmitter { const MCRegisterInfo &MRI; public: - R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri) : MCII(mcii), MRI(mri) { } @@ -50,8 +49,8 @@ public: uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; -private: +private: void EmitByte(unsigned int byte, raw_ostream &OS) const; void Emit(uint32_t value, raw_ostream &OS) const; @@ -59,7 +58,6 @@ private: unsigned getHWRegChan(unsigned reg) const; unsigned getHWReg(unsigned regNo) const; - }; } // End anonymous namespace @@ -83,7 +81,7 @@ enum FCInstr { MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - MCContext &Ctx) { + MCContext &Ctx) { return new R600MCCodeEmitter(MCII, MRI); } diff --git a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index 65a0eeba2b16..9eb3dadbc5e2 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -36,7 +36,6 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { void operator=(const SIMCCodeEmitter &) = delete; const MCInstrInfo &MCII; const MCRegisterInfo &MRI; - MCContext &Ctx; /// \brief Can this operand also contain immediate values? bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const; @@ -47,7 +46,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { public: SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, MCContext &ctx) - : MCII(mcii), MRI(mri), Ctx(ctx) { } + : MCII(mcii), MRI(mri) { } ~SIMCCodeEmitter() override {} @@ -250,17 +249,7 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, if (MO.isExpr()) { const MCSymbolRefExpr *Expr = cast(MO.getExpr()); - MCFixupKind Kind; - const MCSymbol *Sym = - Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME)); - - if (&Expr->getSymbol() == Sym) { - // Add the offset to the beginning of the constant values. - Kind = (MCFixupKind)AMDGPU::fixup_si_end_of_text; - } else { - // This is used for constant data stored in .rodata. - Kind = (MCFixupKind)AMDGPU::fixup_si_rodata; - } + MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_rodata; Fixups.push_back(MCFixup::create(4, Expr, Kind, MI.getLoc())); } diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td index d9a0723bedc9..a1584a224cbd 100644 --- a/lib/Target/AMDGPU/Processors.td +++ b/lib/Target/AMDGPU/Processors.td @@ -142,3 +142,7 @@ def : ProcessorModel<"carrizo", SIQuarterSpeedModel, def : ProcessorModel<"fiji", SIQuarterSpeedModel, [FeatureVolcanicIslands, FeatureISAVersion8_0_1] >; + +def : ProcessorModel<"stoney", SIQuarterSpeedModel, + [FeatureVolcanicIslands, FeatureISAVersion8_0_1] +>; diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index c8f37f61fc16..bd80bb211b4f 100644 --- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -405,8 +405,8 @@ private: if (MO.isReg() && MO.isInternalRead()) MO.setIsInternalRead(false); } - getLiteral(BI, Literals); - ClauseContent.push_back(BI); + getLiteral(&*BI, Literals); + ClauseContent.push_back(&*BI); } I = BI; DeleteMI->eraseFromParent(); diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 4e4d554f0ee7..124a9c6e0f56 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -190,6 +190,10 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM, setSchedulingPreference(Sched::Source); } +static inline bool isEOP(MachineBasicBlock::iterator I) { + return std::next(I)->getOpcode() == AMDGPU::RETURN; +} + MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( MachineInstr * MI, MachineBasicBlock * BB) const { MachineFunction * MF = BB->getParent(); @@ -276,12 +280,18 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( case AMDGPU::RAT_WRITE_CACHELESS_32_eg: case AMDGPU::RAT_WRITE_CACHELESS_64_eg: case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { - unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) - .addImm(EOP); // Set End of program bit + .addImm(isEOP(I)); // Set End of program bit + break; + } + case AMDGPU::RAT_STORE_TYPED_eg: { + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(2)) + .addImm(isEOP(I)); // Set End of program bit break; } @@ -539,7 +549,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( } } } - bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; + bool EOP = isEOP(I); if (!EOP && !isLastInstructionOfItsType) return BB; unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40; @@ -946,6 +956,8 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue Arg = Op.getOperand(0); SDLoc DL(Op); + + // TODO: Should this propagate fast-math-flags? SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT, DAG.getNode(ISD::FADD, DL, VT, DAG.getNode(ISD::FMUL, DL, VT, Arg, @@ -1936,6 +1948,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, Arg->getOperand(0).getOperand(Element)); } } + break; } case ISD::SELECT_CC: { diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index 855fa9fe45b2..8b6eea17130b 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -922,7 +922,7 @@ bool R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, unsigned ExtraPredCycles, - const BranchProbability &Probability) const{ + BranchProbability Probability) const{ return true; } @@ -933,14 +933,14 @@ R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, MachineBasicBlock &FMBB, unsigned NumFCycles, unsigned ExtraFCycles, - const BranchProbability &Probability) const { + BranchProbability Probability) const { return true; } bool R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, - const BranchProbability &Probability) + BranchProbability Probability) const { return true; } diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h index dee4c2b9ae31..e7251c31107b 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.h +++ b/lib/Target/AMDGPU/R600InstrInfo.h @@ -174,18 +174,18 @@ namespace llvm { bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, - const BranchProbability &Probability) const override; + BranchProbability Probability) const override; bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, unsigned ExtraPredCycles, - const BranchProbability &Probability) const override ; + BranchProbability Probability) const override ; bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB, unsigned NumFCycles, unsigned ExtraFCycles, - const BranchProbability &Probability) const override; + BranchProbability Probability) const override; bool DefinesPredicate(MachineInstr *MI, std::vector &Pred) const override; diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index 7beed092b3f7..33ef6a4e19ea 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -1655,7 +1655,7 @@ def : InsertVerticalPat ; // ISel Patterns //===----------------------------------------------------------------------===// -// CND*_INT Pattterns for f32 True / False values +// CND*_INT Patterns for f32 True / False values class CND_INT_f32 : Pat < (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc), diff --git a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp index 0c06ccc736d0..5efb3b9fc20e 100644 --- a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp +++ b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp @@ -318,7 +318,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { MRI = &(Fn.getRegInfo()); for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); MBB != MBBe; ++MBB) { - MachineBasicBlock *MB = MBB; + MachineBasicBlock *MB = &*MBB; PreviousRegSeq.clear(); PreviousRegSeqByReg.clear(); PreviousRegSeqByUndefCount.clear(); diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp index deee5bc39974..21269613a305 100644 --- a/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/lib/Target/AMDGPU/R600Packetizer.cpp @@ -81,11 +81,11 @@ private: int LastDstChan = -1; do { bool isTrans = false; - int BISlot = getSlot(BI); + int BISlot = getSlot(&*BI); if (LastDstChan >= BISlot) isTrans = true; LastDstChan = BISlot; - if (TII->isPredicated(BI)) + if (TII->isPredicated(&*BI)) continue; int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write); if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0) @@ -95,7 +95,7 @@ private: continue; } unsigned Dst = BI->getOperand(DstIdx).getReg(); - if (isTrans || TII->isTransOnly(BI)) { + if (isTrans || TII->isTransOnly(&*BI)) { Result[Dst] = AMDGPU::PS; continue; } @@ -149,7 +149,7 @@ private: public: // Ctor. R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI) - : VLIWPacketizerList(MF, MLI, true), + : VLIWPacketizerList(MF, MLI, nullptr), TII(static_cast( MF.getSubtarget().getInstrInfo())), TRI(TII->getRegisterInfo()) { @@ -162,14 +162,14 @@ public: } // ignorePseudoInstruction - Ignore bundling of pseudo instructions. - bool ignorePseudoInstruction(MachineInstr *MI, - MachineBasicBlock *MBB) override { + bool ignorePseudoInstruction(const MachineInstr *MI, + const MachineBasicBlock *MBB) override { return false; } // isSoloInstruction - return true if instruction MI can not be packetized // with any other instruction, which means that MI itself is a packet. - bool isSoloInstruction(MachineInstr *MI) override { + bool isSoloInstruction(const MachineInstr *MI) override { if (TII->isVector(*MI)) return true; if (!TII->isALUInstr(MI->getOpcode())) @@ -375,7 +375,7 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { // instruction stream until we find the nearest boundary. MachineBasicBlock::iterator I = RegionEnd; for(;I != MBB->begin(); --I, --RemainingCount) { - if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn)) + if (TII->isSchedulingBoundary(&*std::prev(I), &*MBB, Fn)) break; } I = MBB->begin(); @@ -392,7 +392,7 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { continue; } - Packetizer.PacketizeMIs(MBB, I, RegionEnd); + Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd); RegionEnd = I; } } diff --git a/lib/Target/AMDGPU/R600RegisterInfo.h b/lib/Target/AMDGPU/R600RegisterInfo.h index 9713e600a721..4f8a129ce4a6 100644 --- a/lib/Target/AMDGPU/R600RegisterInfo.h +++ b/lib/Target/AMDGPU/R600RegisterInfo.h @@ -35,7 +35,7 @@ struct R600RegisterInfo : public AMDGPURegisterInfo { /// \brief get the register class of the specified type to use in the /// CFGStructurizer - const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override; + const TargetRegisterClass *getCFGStructurizerRegClass(MVT VT) const; const RegClassWeight & getRegClassWeight(const TargetRegisterClass *RC) const override; diff --git a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp index ccfbf1bf19ed..fa4d24a2f25a 100644 --- a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -312,11 +312,10 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) { if (std::find(Latches.begin(), Latches.end(), *PI) == Latches.end()) Preds.push_back(*PI); } - BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", nullptr, DT, - LI, false); + BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, false); } - CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt()); + CallInst::Create(EndCf, popSaved(), "", &*BB->getFirstInsertionPt()); } /// \brief Annotate the control flow with intrinsics so the backend can diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h index 4c3263911c40..7f79dd34f3ba 100644 --- a/lib/Target/AMDGPU/SIDefines.h +++ b/lib/Target/AMDGPU/SIDefines.h @@ -37,7 +37,8 @@ enum { MIMG = 1 << 18, FLAT = 1 << 19, WQM = 1 << 20, - VGPRSpill = 1 << 21 + VGPRSpill = 1 << 21, + VOPAsmPrefer32Bit = 1 << 22 }; } diff --git a/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp b/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp index 5fe8d19426dd..636750dcfba2 100644 --- a/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp +++ b/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp @@ -16,15 +16,9 @@ #include "AMDGPU.h" #include "SIInstrInfo.h" -#include "SIRegisterInfo.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 23502b45905c..96e37c566240 100644 --- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -82,22 +82,10 @@ using namespace llvm; namespace { class SIFixSGPRCopies : public MachineFunctionPass { - -private: - static char ID; - const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI, - const MachineRegisterInfo &MRI, - unsigned Reg, - unsigned SubReg) const; - const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI, - const MachineRegisterInfo &MRI, - unsigned Reg, - unsigned SubReg) const; - bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI, - const MachineRegisterInfo &MRI) const; - public: - SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { } + static char ID; + + SIFixSGPRCopies() : MachineFunctionPass(ID) { } bool runOnMachineFunction(MachineFunction &MF) override; @@ -105,14 +93,23 @@ public: return "SI Fix SGPR copies"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } }; } // End anonymous namespace +INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE, + "SI Fix SGPR copies", false, false) + char SIFixSGPRCopies::ID = 0; -FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) { - return new SIFixSGPRCopies(tm); +char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID; + +FunctionPass *llvm::createSIFixSGPRCopiesPass() { + return new SIFixSGPRCopies(); } static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) { @@ -128,77 +125,115 @@ static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) { return false; } -/// This functions walks the use list of Reg until it finds an Instruction -/// that isn't a COPY returns the register class of that instruction. -/// \return The register defined by the first non-COPY instruction. -const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses( - const SIRegisterInfo *TRI, - const MachineRegisterInfo &MRI, - unsigned Reg, - unsigned SubReg) const { - - const TargetRegisterClass *RC - = TargetRegisterInfo::isVirtualRegister(Reg) ? - MRI.getRegClass(Reg) : - TRI->getPhysRegClass(Reg); - - RC = TRI->getSubRegClass(RC, SubReg); - for (MachineRegisterInfo::use_instr_iterator - I = MRI.use_instr_begin(Reg), E = MRI.use_instr_end(); I != E; ++I) { - switch (I->getOpcode()) { - case AMDGPU::COPY: - RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI, - I->getOperand(0).getReg(), - I->getOperand(0).getSubReg())); - break; - } - } - - return RC; -} - -const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef( - const SIRegisterInfo *TRI, - const MachineRegisterInfo &MRI, - unsigned Reg, - unsigned SubReg) const { - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { - const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg); - return TRI->getSubRegClass(RC, SubReg); - } - MachineInstr *Def = MRI.getVRegDef(Reg); - if (Def->getOpcode() != AMDGPU::COPY) { - return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg); - } - - return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(), - Def->getOperand(1).getSubReg()); -} - -bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy, - const SIRegisterInfo *TRI, - const MachineRegisterInfo &MRI) const { - +static std::pair +getCopyRegClasses(const MachineInstr &Copy, + const SIRegisterInfo &TRI, + const MachineRegisterInfo &MRI) { unsigned DstReg = Copy.getOperand(0).getReg(); unsigned SrcReg = Copy.getOperand(1).getReg(); - unsigned SrcSubReg = Copy.getOperand(1).getSubReg(); - if (!TargetRegisterInfo::isVirtualRegister(DstReg)) { - // If the destination register is a physical register there isn't really - // much we can do to fix this. + const TargetRegisterClass *SrcRC = + TargetRegisterInfo::isVirtualRegister(SrcReg) ? + MRI.getRegClass(SrcReg) : + TRI.getPhysRegClass(SrcReg); + + // We don't really care about the subregister here. + // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg()); + + const TargetRegisterClass *DstRC = + TargetRegisterInfo::isVirtualRegister(DstReg) ? + MRI.getRegClass(DstReg) : + TRI.getPhysRegClass(DstReg); + + return std::make_pair(SrcRC, DstRC); +} + +static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC, + const TargetRegisterClass *DstRC, + const SIRegisterInfo &TRI) { + return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC); +} + +static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, + const TargetRegisterClass *DstRC, + const SIRegisterInfo &TRI) { + return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC); +} + +// Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE. +// +// SGPRx = ... +// SGPRy = REG_SEQUENCE SGPRx, sub0 ... +// VGPRz = COPY SGPRy +// +// ==> +// +// VGPRx = COPY SGPRx +// VGPRz = REG_SEQUENCE VGPRx, sub0 +// +// This exposes immediate folding opportunities when materializing 64-bit +// immediates. +static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, + const SIRegisterInfo *TRI, + const SIInstrInfo *TII, + MachineRegisterInfo &MRI) { + assert(MI.isRegSequence()); + + unsigned DstReg = MI.getOperand(0).getReg(); + if (!TRI->isSGPRClass(MRI.getRegClass(DstReg))) return false; + + if (!MRI.hasOneUse(DstReg)) + return false; + + MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg); + if (!CopyUse.isCopy()) + return false; + + const TargetRegisterClass *SrcRC, *DstRC; + std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI); + + if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) + return false; + + // TODO: Could have multiple extracts? + unsigned SubReg = CopyUse.getOperand(1).getSubReg(); + if (SubReg != AMDGPU::NoSubRegister) + return false; + + MRI.setRegClass(DstReg, DstRC); + + // SGPRx = ... + // SGPRy = REG_SEQUENCE SGPRx, sub0 ... + // VGPRz = COPY SGPRy + + // => + // VGPRx = COPY SGPRx + // VGPRz = REG_SEQUENCE VGPRx, sub0 + + MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg()); + + for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) { + unsigned SrcReg = MI.getOperand(I).getReg(); + unsigned SrcSubReg = MI.getOperand(I).getReg(); + + const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); + assert(TRI->isSGPRClass(SrcRC) && + "Expected SGPR REG_SEQUENCE to only have SGPR inputs"); + + SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg); + const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC); + + unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC); + + BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg) + .addOperand(MI.getOperand(I)); + + MI.getOperand(I).setReg(TmpReg); } - const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg); - - const TargetRegisterClass *SrcRC; - - if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || - MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass) - return false; - - SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg); - return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC); + CopyUse.eraseFromParent(); + return true; } bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { @@ -207,40 +242,38 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { static_cast(MF.getSubtarget().getRegisterInfo()); const SIInstrInfo *TII = static_cast(MF.getSubtarget().getInstrInfo()); + + SmallVector Worklist; + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { + I != E; ++I) { MachineInstr &MI = *I; - if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) { - DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n"); - DEBUG(MI.print(dbgs())); - TII->moveToVALU(MI); - - } switch (MI.getOpcode()) { - default: continue; + default: + continue; + case AMDGPU::COPY: { + // If the destination register is a physical register there isn't really + // much we can do to fix this. + if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) + continue; + + const TargetRegisterClass *SrcRC, *DstRC; + std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI); + if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) { + DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI); + TII->moveToVALU(MI); + } + + break; + } case AMDGPU::PHI: { DEBUG(dbgs() << "Fixing PHI: " << MI); - - for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { - const MachineOperand &Op = MI.getOperand(i); - unsigned Reg = Op.getReg(); - const TargetRegisterClass *RC - = inferRegClassFromDef(TRI, MRI, Reg, Op.getSubReg()); - - MRI.constrainRegClass(Op.getReg(), RC); - } unsigned Reg = MI.getOperand(0).getReg(); - const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg, - MI.getOperand(0).getSubReg()); - if (TRI->getCommonSubClass(RC, &AMDGPU::VGPR_32RegClass)) { - MRI.constrainRegClass(Reg, &AMDGPU::VGPR_32RegClass); - } - if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) break; @@ -310,8 +343,10 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { } case AMDGPU::REG_SEQUENCE: { if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) || - !hasVGPROperands(MI, TRI)) + !hasVGPROperands(MI, TRI)) { + foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI); continue; + } DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI); diff --git a/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp b/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp index 0c54446b0fb1..8bda283f0fca 100644 --- a/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp +++ b/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp @@ -7,9 +7,8 @@ // //===----------------------------------------------------------------------===// // -/// \file -/// SALU instructions ignore control flow, so we need to modify the live ranges -/// of the registers they define in some cases. +/// \file SALU instructions ignore the execution mask, so we need to modify the +/// live ranges of the registers they define in some cases. /// /// The main case we need to handle is when a def is used in one side of a /// branch and not another. For example: @@ -42,13 +41,15 @@ /// ENDIF /// %use /// -/// Adding this use will make the def live thoughout the IF branch, which is +/// Adding this use will make the def live throughout the IF branch, which is /// what we want. #include "AMDGPU.h" #include "SIInstrInfo.h" #include "SIRegisterInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachinePostDominators.h" @@ -79,9 +80,13 @@ public: } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -90,7 +95,7 @@ public: INITIALIZE_PASS_BEGIN(SIFixSGPRLiveRanges, DEBUG_TYPE, "SI Fix SGPR Live Ranges", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(LiveVariables) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_END(SIFixSGPRLiveRanges, DEBUG_TYPE, "SI Fix SGPR Live Ranges", false, false) @@ -108,40 +113,48 @@ bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) { const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); const SIRegisterInfo *TRI = static_cast( MF.getSubtarget().getRegisterInfo()); - LiveIntervals *LIS = &getAnalysis(); - MachinePostDominatorTree *PDT = &getAnalysis(); - std::vector> SGPRLiveRanges; + bool MadeChange = false; - // First pass, collect all live intervals for SGPRs - for (const MachineBasicBlock &MBB : MF) { - for (const MachineInstr &MI : MBB) { + MachinePostDominatorTree *PDT = &getAnalysis(); + SmallVector SGPRLiveRanges; + + LiveVariables *LV = &getAnalysis(); + MachineBasicBlock *Entry = &MF.front(); + + // Use a depth first order so that in SSA, we encounter all defs before + // uses. Once the defs of the block have been found, attempt to insert + // SGPR_USE instructions in successor blocks if required. + for (MachineBasicBlock *MBB : depth_first(Entry)) { + for (const MachineInstr &MI : *MBB) { for (const MachineOperand &MO : MI.defs()) { - if (MO.isImplicit()) - continue; + // We should never see a live out def of a physical register, so we also + // do not need to worry about implicit_defs(). unsigned Def = MO.getReg(); if (TargetRegisterInfo::isVirtualRegister(Def)) { - if (TRI->isSGPRClass(MRI.getRegClass(Def))) - SGPRLiveRanges.push_back( - std::make_pair(Def, &LIS->getInterval(Def))); - } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) { - SGPRLiveRanges.push_back( - std::make_pair(Def, &LIS->getRegUnit(Def))); + if (TRI->isSGPRClass(MRI.getRegClass(Def))) { + // Only consider defs that are live outs. We don't care about def / + // use within the same block. + + // LiveVariables does not consider registers that are only used in a + // phi in a sucessor block as live out, unlike LiveIntervals. + // + // This is OK because SIFixSGPRCopies replaced any SGPR phis with + // VGPRs. + if (LV->isLiveOut(Def, *MBB)) + SGPRLiveRanges.push_back(Def); + } } } } - } - // Second pass fix the intervals - for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); - BI != BE; ++BI) { - MachineBasicBlock &MBB = *BI; - if (MBB.succ_size() < 2) + if (MBB->succ_size() < 2) continue; - // We have structured control flow, so number of succesors should be two. - assert(MBB.succ_size() == 2); - MachineBasicBlock *SuccA = *MBB.succ_begin(); - MachineBasicBlock *SuccB = *(++MBB.succ_begin()); + // We have structured control flow, so the number of successors should be + // two. + assert(MBB->succ_size() == 2); + MachineBasicBlock *SuccA = *MBB->succ_begin(); + MachineBasicBlock *SuccB = *(++MBB->succ_begin()); MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB); if (!NCD) @@ -156,37 +169,51 @@ bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) { NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(), *(++NCD->succ_begin())); } - assert(SuccA && SuccB); - for (std::pair RegLR : SGPRLiveRanges) { - unsigned Reg = RegLR.first; - LiveRange *LR = RegLR.second; - // FIXME: We could be smarter here. If the register is Live-In to - // one block, but the other doesn't have any SGPR defs, then there - // won't be a conflict. Also, if the branch decision is based on - // a value in an SGPR, then there will be no conflict. - bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA); - bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB); + for (unsigned Reg : SGPRLiveRanges) { + // FIXME: We could be smarter here. If the register is Live-In to one + // block, but the other doesn't have any SGPR defs, then there won't be a + // conflict. Also, if the branch condition is uniform then there will be + // no conflict. + bool LiveInToA = LV->isLiveIn(Reg, *SuccA); + bool LiveInToB = LV->isLiveIn(Reg, *SuccB); - if ((!LiveInToA && !LiveInToB) || - (LiveInToA && LiveInToB)) + if (!LiveInToA && !LiveInToB) { + DEBUG(dbgs() << PrintReg(Reg, TRI, 0) + << " is live into neither successor\n"); continue; + } + + if (LiveInToA && LiveInToB) { + DEBUG(dbgs() << PrintReg(Reg, TRI, 0) + << " is live into both successors\n"); + continue; + } // This interval is live in to one successor, but not the other, so // we need to update its range so it is live in to both. - DEBUG(dbgs() << "Possible SGPR conflict detected " << " in " << *LR << - " BB#" << SuccA->getNumber() << ", BB#" << - SuccB->getNumber() << - " with NCD = " << NCD->getNumber() << '\n'); + DEBUG(dbgs() << "Possible SGPR conflict detected for " + << PrintReg(Reg, TRI, 0) + << " BB#" << SuccA->getNumber() + << ", BB#" << SuccB->getNumber() + << " with NCD = BB#" << NCD->getNumber() << '\n'); + + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "Not expecting to extend live range of physreg"); // FIXME: Need to figure out how to update LiveRange here so this pass // will be able to preserve LiveInterval analysis. - BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(), - TII->get(AMDGPU::SGPR_USE)) - .addReg(Reg, RegState::Implicit); - DEBUG(NCD->getFirstNonPHI()->dump()); + MachineInstr *NCDSGPRUse = + BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(), + TII->get(AMDGPU::SGPR_USE)) + .addReg(Reg, RegState::Implicit); + + MadeChange = true; + LV->HandleVirtRegUse(Reg, NCD, NCDSGPRUse); + + DEBUG(NCDSGPRUse->dump()); } } - return false; + return MadeChange; } diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index c2887255cc11..02a39307e74e 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -45,6 +45,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addPreserved(); AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -164,8 +165,8 @@ static bool tryAddToFoldList(std::vector &FoldList, // Operand is not legal, so try to commute the instruction to // see if this makes it possible to fold. - unsigned CommuteIdx0; - unsigned CommuteIdx1; + unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex; + unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex; bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1); if (CanCommute) { @@ -175,7 +176,16 @@ static bool tryAddToFoldList(std::vector &FoldList, OpNo = CommuteIdx0; } - if (!CanCommute || !TII->commuteInstruction(MI)) + // One of operands might be an Imm operand, and OpNo may refer to it after + // the call of commuteInstruction() below. Such situations are avoided + // here explicitly as OpNo must be a register operand to be a candidate + // for memory folding. + if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() || + !MI->getOperand(CommuteIdx1).isReg())) + return false; + + if (!CanCommute || + !TII->commuteInstruction(MI, false, CommuteIdx0, CommuteIdx1)) return false; if (!TII->isOperandLegal(MI, OpNo, OpToFold)) @@ -186,6 +196,110 @@ static bool tryAddToFoldList(std::vector &FoldList, return true; } +static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, + unsigned UseOpIdx, + std::vector &FoldList, + SmallVectorImpl &CopiesToReplace, + const SIInstrInfo *TII, const SIRegisterInfo &TRI, + MachineRegisterInfo &MRI) { + const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); + + // FIXME: Fold operands with subregs. + if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) || + UseOp.isImplicit())) { + return; + } + + bool FoldingImm = OpToFold.isImm(); + APInt Imm; + + if (FoldingImm) { + unsigned UseReg = UseOp.getReg(); + const TargetRegisterClass *UseRC + = TargetRegisterInfo::isVirtualRegister(UseReg) ? + MRI.getRegClass(UseReg) : + TRI.getPhysRegClass(UseReg); + + Imm = APInt(64, OpToFold.getImm()); + + const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode()); + const TargetRegisterClass *FoldRC = + TRI.getRegClass(FoldDesc.OpInfo[0].RegClass); + + // Split 64-bit constants into 32-bits for folding. + if (FoldRC->getSize() == 8 && UseOp.getSubReg()) { + if (UseRC->getSize() != 8) + return; + + if (UseOp.getSubReg() == AMDGPU::sub0) { + Imm = Imm.getLoBits(32); + } else { + assert(UseOp.getSubReg() == AMDGPU::sub1); + Imm = Imm.getHiBits(32); + } + } + + // In order to fold immediates into copies, we need to change the + // copy to a MOV. + if (UseMI->getOpcode() == AMDGPU::COPY) { + unsigned DestReg = UseMI->getOperand(0).getReg(); + const TargetRegisterClass *DestRC + = TargetRegisterInfo::isVirtualRegister(DestReg) ? + MRI.getRegClass(DestReg) : + TRI.getPhysRegClass(DestReg); + + unsigned MovOp = TII->getMovOpcode(DestRC); + if (MovOp == AMDGPU::COPY) + return; + + UseMI->setDesc(TII->get(MovOp)); + CopiesToReplace.push_back(UseMI); + } + } + + // Special case for REG_SEQUENCE: We can't fold literals into + // REG_SEQUENCE instructions, so we have to fold them into the + // uses of REG_SEQUENCE. + if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) { + unsigned RegSeqDstReg = UseMI->getOperand(0).getReg(); + unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); + + for (MachineRegisterInfo::use_iterator + RSUse = MRI.use_begin(RegSeqDstReg), + RSE = MRI.use_end(); RSUse != RSE; ++RSUse) { + + MachineInstr *RSUseMI = RSUse->getParent(); + if (RSUse->getSubReg() != RegSeqDstSubReg) + continue; + + foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList, + CopiesToReplace, TII, TRI, MRI); + } + return; + } + + const MCInstrDesc &UseDesc = UseMI->getDesc(); + + // Don't fold into target independent nodes. Target independent opcodes + // don't have defined register classes. + if (UseDesc.isVariadic() || + UseDesc.OpInfo[UseOpIdx].RegClass == -1) + return; + + if (FoldingImm) { + MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); + tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII); + return; + } + + tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); + + // FIXME: We could try to change the instruction from 64-bit to 32-bit + // to enable more folding opportunites. The shrink operands pass + // already does this. + return; +} + bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = @@ -226,88 +340,36 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { OpToFold.getSubReg())) continue; + + // We need mutate the operands of new mov instructions to add implicit + // uses of EXEC, but adding them invalidates the use_iterator, so defer + // this. + SmallVector CopiesToReplace; + std::vector FoldList; for (MachineRegisterInfo::use_iterator Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end(); Use != E; ++Use) { MachineInstr *UseMI = Use->getParent(); - const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo()); - // FIXME: Fold operands with subregs. - if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) || - UseOp.isImplicit())) { - continue; - } - - APInt Imm; - - if (FoldingImm) { - unsigned UseReg = UseOp.getReg(); - const TargetRegisterClass *UseRC - = TargetRegisterInfo::isVirtualRegister(UseReg) ? - MRI.getRegClass(UseReg) : - TRI.getPhysRegClass(UseReg); - - Imm = APInt(64, OpToFold.getImm()); - - // Split 64-bit constants into 32-bits for folding. - if (UseOp.getSubReg()) { - if (UseRC->getSize() != 8) - continue; - - if (UseOp.getSubReg() == AMDGPU::sub0) { - Imm = Imm.getLoBits(32); - } else { - assert(UseOp.getSubReg() == AMDGPU::sub1); - Imm = Imm.getHiBits(32); - } - } - - // In order to fold immediates into copies, we need to change the - // copy to a MOV. - if (UseMI->getOpcode() == AMDGPU::COPY) { - unsigned DestReg = UseMI->getOperand(0).getReg(); - const TargetRegisterClass *DestRC - = TargetRegisterInfo::isVirtualRegister(DestReg) ? - MRI.getRegClass(DestReg) : - TRI.getPhysRegClass(DestReg); - - unsigned MovOp = TII->getMovOpcode(DestRC); - if (MovOp == AMDGPU::COPY) - continue; - - UseMI->setDesc(TII->get(MovOp)); - } - } - - const MCInstrDesc &UseDesc = UseMI->getDesc(); - - // Don't fold into target independent nodes. Target independent opcodes - // don't have defined register classes. - if (UseDesc.isVariadic() || - UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1) - continue; - - if (FoldingImm) { - MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); - tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII); - continue; - } - - tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII); - - // FIXME: We could try to change the instruction from 64-bit to 32-bit - // to enable more folding opportunites. The shrink operands pass - // already does this. + foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList, + CopiesToReplace, TII, TRI, MRI); } + // Make sure we add EXEC uses to any new v_mov instructions created. + for (MachineInstr *Copy : CopiesToReplace) + Copy->addImplicitDefUseOperands(MF); + for (FoldCandidate &Fold : FoldList) { if (updateOperand(Fold, TRI)) { // Clear kill flags. if (!Fold.isImm()) { assert(Fold.OpToFold && Fold.OpToFold->isReg()); - Fold.OpToFold->setIsKill(false); + // FIXME: Probably shouldn't bother trying to fold if not an + // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR + // copies. + MRI.clearKillFlags(Fold.OpToFold->getReg()); } DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << Fold.UseOpNo << " of " << *Fold.UseMI << '\n'); diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp new file mode 100644 index 000000000000..6b3c81c3af74 --- /dev/null +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -0,0 +1,243 @@ +//===----------------------- SIFrameLowering.cpp --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// + +#include "SIFrameLowering.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/RegisterScavenging.h" + +using namespace llvm; + + +static bool hasOnlySGPRSpills(const SIMachineFunctionInfo *FuncInfo, + const MachineFrameInfo *FrameInfo) { + if (!FuncInfo->hasSpilledSGPRs()) + return false; + + if (FuncInfo->hasSpilledVGPRs()) + return false; + + for (int I = FrameInfo->getObjectIndexBegin(), + E = FrameInfo->getObjectIndexEnd(); I != E; ++I) { + if (!FrameInfo->isSpillSlotObjectIndex(I)) + return false; + } + + return true; +} + +static ArrayRef getAllSGPR128() { + return makeArrayRef(AMDGPU::SReg_128RegClass.begin(), + AMDGPU::SReg_128RegClass.getNumRegs()); +} + +static ArrayRef getAllSGPRs() { + return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(), + AMDGPU::SGPR_32RegClass.getNumRegs()); +} + +void SIFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + if (!MF.getFrameInfo()->hasStackObjects()) + return; + + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); + + SIMachineFunctionInfo *MFI = MF.getInfo(); + + // If we only have SGPR spills, we won't actually be using scratch memory + // since these spill to VGPRs. + // + // FIXME: We should be cleaning up these unused SGPR spill frame indices + // somewhere. + if (hasOnlySGPRSpills(MFI, MF.getFrameInfo())) + return; + + const SIInstrInfo *TII = + static_cast(MF.getSubtarget().getInstrInfo()); + const SIRegisterInfo *TRI = &TII->getRegisterInfo(); + const AMDGPUSubtarget &ST = MF.getSubtarget(); + + // We need to insert initialization of the scratch resource descriptor. + unsigned ScratchRsrcReg = MFI->getScratchRSrcReg(); + assert(ScratchRsrcReg != AMDGPU::NoRegister); + + unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); + assert(ScratchWaveOffsetReg != AMDGPU::NoRegister); + + unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue( + MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); + + unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; + if (ST.isAmdHsaOS()) { + PreloadedPrivateBufferReg = TRI->getPreloadedValue( + MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); + } + + // If we reserved the original input registers, we don't need to copy to the + // reserved registers. + if (ScratchRsrcReg == PreloadedPrivateBufferReg) { + // We should always reserve these 5 registers at the same time. + assert(ScratchWaveOffsetReg == PreloadedScratchWaveOffsetReg && + "scratch wave offset and private segment buffer inconsistent"); + return; + } + + + // We added live-ins during argument lowering, but since they were not used + // they were deleted. We're adding the uses now, so add them back. + MachineRegisterInfo &MRI = MF.getRegInfo(); + MRI.addLiveIn(PreloadedScratchWaveOffsetReg); + MBB.addLiveIn(PreloadedScratchWaveOffsetReg); + + if (ST.isAmdHsaOS()) { + MRI.addLiveIn(PreloadedPrivateBufferReg); + MBB.addLiveIn(PreloadedPrivateBufferReg); + } + + // We reserved the last registers for this. Shift it down to the end of those + // which were actually used. + // + // FIXME: It might be safer to use a pseudoregister before replacement. + + // FIXME: We should be able to eliminate unused input registers. We only + // cannot do this for the resources required for scratch access. For now we + // skip over user SGPRs and may leave unused holes. + + // We find the resource first because it has an alignment requirement. + if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + + unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4; + // Skip the last 2 elements because the last one is reserved for VCC, and + // this is the 2nd to last element already. + for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) { + // Pick the first unallocated one. Make sure we don't clobber the other + // reserved input we needed. + if (!MRI.isPhysRegUsed(Reg)) { + assert(MRI.isAllocatable(Reg)); + MRI.replaceRegWith(ScratchRsrcReg, Reg); + ScratchRsrcReg = Reg; + MFI->setScratchRSrcReg(ScratchRsrcReg); + break; + } + } + } + + if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + // Skip the last 2 elements because the last one is reserved for VCC, and + // this is the 2nd to last element already. + unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); + for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) { + // Pick the first unallocated SGPR. Be careful not to pick an alias of the + // scratch descriptor, since we haven’t added its uses yet. + if (!MRI.isPhysRegUsed(Reg)) { + assert(MRI.isAllocatable(Reg) && + !TRI->isSubRegisterEq(ScratchRsrcReg, Reg)); + + MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); + ScratchWaveOffsetReg = Reg; + MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg); + break; + } + } + } + + + assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg)); + + const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); + MachineBasicBlock::iterator I = MBB.begin(); + DebugLoc DL; + + if (PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) { + // Make sure we emit the copy for the offset first. We may have chosen to copy + // the buffer resource into a register that aliases the input offset register. + BuildMI(MBB, I, DL, SMovB32, ScratchWaveOffsetReg) + .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill); + } + + if (ST.isAmdHsaOS()) { + // Insert copies from argument register. + assert( + !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchRsrcReg) && + !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchWaveOffsetReg)); + + unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); + unsigned Rsrc23 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2_sub3); + + unsigned Lo = TRI->getSubReg(PreloadedPrivateBufferReg, AMDGPU::sub0_sub1); + unsigned Hi = TRI->getSubReg(PreloadedPrivateBufferReg, AMDGPU::sub2_sub3); + + const MCInstrDesc &SMovB64 = TII->get(AMDGPU::S_MOV_B64); + + BuildMI(MBB, I, DL, SMovB64, Rsrc01) + .addReg(Lo, RegState::Kill); + BuildMI(MBB, I, DL, SMovB64, Rsrc23) + .addReg(Hi, RegState::Kill); + } else { + unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); + unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); + + // Use relocations to get the pointer, and setup the other bits manually. + uint64_t Rsrc23 = TII->getScratchRsrcWords23(); + BuildMI(MBB, I, DL, SMovB32, Rsrc0) + .addExternalSymbol("SCRATCH_RSRC_DWORD0") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, SMovB32, Rsrc1) + .addExternalSymbol("SCRATCH_RSRC_DWORD1") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, SMovB32, Rsrc2) + .addImm(Rsrc23 & 0xffffffff) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, SMovB32, Rsrc3) + .addImm(Rsrc23 >> 32) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + } + + // Make the register selected live throughout the function. + for (MachineBasicBlock &OtherBB : MF) { + if (&OtherBB == &MBB) + continue; + + OtherBB.addLiveIn(ScratchRsrcReg); + OtherBB.addLiveIn(ScratchWaveOffsetReg); + } +} + +void SIFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + + if (!MFI->hasStackObjects()) + return; + + bool MayNeedScavengingEmergencySlot = MFI->hasStackObjects(); + + assert((RS || !MayNeedScavengingEmergencySlot) && + "RegScavenger required if spilling"); + + if (MayNeedScavengingEmergencySlot) { + int ScavengeFI = MFI->CreateSpillStackObject( + AMDGPU::SGPR_32RegClass.getSize(), + AMDGPU::SGPR_32RegClass.getAlignment()); + RS->addScavengingFrameIndex(ScavengeFI); + } +} diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h new file mode 100644 index 000000000000..a9152fd8b2aa --- /dev/null +++ b/lib/Target/AMDGPU/SIFrameLowering.h @@ -0,0 +1,34 @@ +//===--------------------- SIFrameLowering.h --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_SIFRAMELOWERING_H +#define LLVM_LIB_TARGET_AMDGPU_SIFRAMELOWERING_H + +#include "AMDGPUFrameLowering.h" + +namespace llvm { + +class SIFrameLowering final : public AMDGPUFrameLowering { +public: + SIFrameLowering(StackDirection D, unsigned StackAl, int LAO, + unsigned TransAl = 1) : + AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {} + ~SIFrameLowering() override {} + + void emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const override; + + void processFunctionBeforeFrameFinalized( + MachineFunction &MF, + RegScavenger *RS = nullptr) const override; +}; + +} + +#endif diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index c2db9ff537e9..0e043cb47da7 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -20,6 +20,7 @@ #include "SIISelLowering.h" #include "AMDGPU.h" +#include "AMDGPUDiagnosticInfoUnsupported.h" #include "AMDGPUIntrinsicInfo.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" @@ -51,6 +52,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass); addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass); + addRegisterClass(MVT::v2i64, &AMDGPU::SReg_128RegClass); + addRegisterClass(MVT::v2f64, &AMDGPU::SReg_128RegClass); + addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); @@ -103,6 +107,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setOperationAction(ISD::SETCC, MVT::v4i1, Expand); setOperationAction(ISD::BSWAP, MVT::i32, Legal); + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom); @@ -155,13 +160,30 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); + setTruncStoreAction(MVT::i64, MVT::i32, Expand); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); + + setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand); + + setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); + setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand); + setOperationAction(ISD::LOAD, MVT::i1, Custom); + setOperationAction(ISD::LOAD, MVT::v2i64, Promote); + AddPromotedToType(ISD::LOAD, MVT::v2i64, MVT::v4i32); + + setOperationAction(ISD::STORE, MVT::v2i64, Promote); + AddPromotedToType(ISD::STORE, MVT::v2i64, MVT::v4i32); + + setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); @@ -173,9 +195,14 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); setOperationAction(ISD::SELECT, MVT::i1, Promote); + setOperationAction(ISD::TRUNCATE, MVT::v2i32, Expand); + + + setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); + // We only support LOAD/STORE and vector manipulation ops for vectors // with > 4 elements. - for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32}) { + for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64}) { for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { switch(Op) { case ISD::LOAD: @@ -186,6 +213,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, case ISD::INSERT_VECTOR_ELT: case ISD::INSERT_SUBVECTOR: case ISD::EXTRACT_SUBVECTOR: + case ISD::SCALAR_TO_VECTOR: break; case ISD::CONCAT_VECTORS: setOperationAction(Op, VT, Custom); @@ -197,6 +225,22 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, } } + // Most operations are naturally 32-bit vector operations. We only support + // load and store of i64 vectors, so promote v2i64 vector operations to v4i32. + for (MVT Vec64 : { MVT::v2i64, MVT::v2f64 }) { + setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); + AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); + AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v4i32); + + setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote); + AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v4i32); + + setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote); + AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v4i32); + } + if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); @@ -261,6 +305,41 @@ bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const { return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1); } +bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const { + // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and + // additionally can do r + r + i with addr64. 32-bit has more addressing + // mode options. Depending on the resource constant, it can also do + // (i64 r0) + (i32 r1) * (i14 i). + // + // Private arrays end up using a scratch buffer most of the time, so also + // assume those use MUBUF instructions. Scratch loads / stores are currently + // implemented as mubuf instructions with offen bit set, so slightly + // different than the normal addr64. + if (!isUInt<12>(AM.BaseOffs)) + return false; + + // FIXME: Since we can split immediate into soffset and immediate offset, + // would it make sense to allow any immediate? + + switch (AM.Scale) { + case 0: // r + i or just i, depending on HasBaseReg. + return true; + case 1: + return true; // We have r + r or r + i. + case 2: + if (AM.HasBaseReg) { + // Reject 2 * r + r. + return false; + } + + // Allow 2 * r as r + r + // Or 2 * r + i is allowed as r + r + i. + return true; + default: // Don't allow n * r + return false; + } +} + bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { @@ -269,7 +348,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, return false; switch (AS) { - case AMDGPUAS::GLOBAL_ADDRESS: + case AMDGPUAS::GLOBAL_ADDRESS: { if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { // Assume the we will use FLAT for all global memory accesses // on VI. @@ -282,51 +361,51 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, // because it has never been validated. return isLegalFlatAddressingMode(AM); } - // fall-through - case AMDGPUAS::PRIVATE_ADDRESS: - case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions? - case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: { - // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and - // additionally can do r + r + i with addr64. 32-bit has more addressing - // mode options. Depending on the resource constant, it can also do - // (i64 r0) + (i32 r1) * (i14 i). - // - // SMRD instructions have an 8-bit, dword offset. - // - // Assume nonunifom access, since the address space isn't enough to know - // what instruction we will use, and since we don't know if this is a load - // or store and scalar stores are only available on VI. - // - // We also know if we are doing an extload, we can't do a scalar load. - // - // Private arrays end up using a scratch buffer most of the time, so also - // assume those use MUBUF instructions. Scratch loads / stores are currently - // implemented as mubuf instructions with offen bit set, so slightly - // different than the normal addr64. - if (!isUInt<12>(AM.BaseOffs)) - return false; - // FIXME: Since we can split immediate into soffset and immediate offset, - // would it make sense to allow any immediate? - - switch (AM.Scale) { - case 0: // r + i or just i, depending on HasBaseReg. - return true; - case 1: - return true; // We have r + r or r + i. - case 2: - if (AM.HasBaseReg) { - // Reject 2 * r + r. - return false; - } - - // Allow 2 * r as r + r - // Or 2 * r + i is allowed as r + r + i. - return true; - default: // Don't allow n * r - return false; - } + return isLegalMUBUFAddressingMode(AM); } + case AMDGPUAS::CONSTANT_ADDRESS: { + // If the offset isn't a multiple of 4, it probably isn't going to be + // correctly aligned. + if (AM.BaseOffs % 4 != 0) + return isLegalMUBUFAddressingMode(AM); + + // There are no SMRD extloads, so if we have to do a small type access we + // will use a MUBUF load. + // FIXME?: We also need to do this if unaligned, but we don't know the + // alignment here. + if (DL.getTypeStoreSize(Ty) < 4) + return isLegalMUBUFAddressingMode(AM); + + if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) { + // SMRD instructions have an 8-bit, dword offset on SI. + if (!isUInt<8>(AM.BaseOffs / 4)) + return false; + } else if (Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) { + // On CI+, this can also be a 32-bit literal constant offset. If it fits + // in 8-bits, it can use a smaller encoding. + if (!isUInt<32>(AM.BaseOffs / 4)) + return false; + } else if (Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) { + // On VI, these use the SMEM format and the offset is 20-bit in bytes. + if (!isUInt<20>(AM.BaseOffs)) + return false; + } else + llvm_unreachable("unhandled generation"); + + if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg. + return true; + + if (AM.Scale == 1 && AM.HasBaseReg) + return true; + + return false; + } + + case AMDGPUAS::PRIVATE_ADDRESS: + case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: + return isLegalMUBUFAddressingMode(AM); + case AMDGPUAS::LOCAL_ADDRESS: case AMDGPUAS::REGION_ADDRESS: { // Basic, single offset DS instructions allow a 16-bit unsigned immediate @@ -374,7 +453,10 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte // aligned, 8 byte access in a single operation using ds_read2/write2_b32 // with adjacent offsets. - return Align % 4 == 0; + bool AlignedBy4 = (Align % 4 == 0); + if (IsFast) + *IsFast = AlignedBy4; + return AlignedBy4; } // Smaller than dword value must be aligned. @@ -411,6 +493,32 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, return MVT::Other; } +static bool isFlatGlobalAddrSpace(unsigned AS) { + return AS == AMDGPUAS::GLOBAL_ADDRESS || + AS == AMDGPUAS::FLAT_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS; +} + +bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, + unsigned DestAS) const { + return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS); +} + + +bool SITargetLowering::isMemOpUniform(const SDNode *N) const { + const MemSDNode *MemNode = cast(N); + const Value *Ptr = MemNode->getMemOperand()->getValue(); + + // UndefValue means this is a load of a kernel input. These are uniform. + // Sometimes LDS instructions have constant pointers + if (isa(Ptr) || isa(Ptr) || isa(Ptr) || + isa(Ptr)) + return true; + + const Instruction *I = dyn_cast_or_null(Ptr); + return I && I->getMetadata("amdgpu.uniform"); +} + TargetLoweringBase::LegalizeTypeAction SITargetLowering::getPreferredVectorAction(EVT VT) const { if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16)) @@ -426,12 +534,6 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return TII->isInlineConstant(Imm); } -static EVT toIntegerVT(EVT VT) { - if (VT.isVector()) - return VT.changeVectorElementTypeToInteger(); - return MVT::getIntegerVT(VT.getSizeInBits()); -} - SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc SL, SDValue Chain, unsigned Offset, bool Signed) const { @@ -439,7 +541,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, MachineFunction &MF = DAG.getMachineFunction(); const SIRegisterInfo *TRI = static_cast(Subtarget->getRegisterInfo()); - unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR); + unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); @@ -455,30 +557,10 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, unsigned Align = DL.getABITypeAlignment(Ty); - if (VT != MemVT && VT.isFloatingPoint()) { - // Do an integer load and convert. - // FIXME: This is mostly because load legalization after type legalization - // doesn't handle FP extloads. - assert(VT.getScalarType() == MVT::f32 && - MemVT.getScalarType() == MVT::f16); - - EVT IVT = toIntegerVT(VT); - EVT MemIVT = toIntegerVT(MemVT); - SDValue Load = DAG.getLoad(ISD::UNINDEXED, ISD::ZEXTLOAD, - IVT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemIVT, - false, // isVolatile - true, // isNonTemporal - true, // isInvariant - Align); // Alignment - SDValue Ops[] = { - DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load), - Load.getValue(1) - }; - - return DAG.getMergeValues(Ops, SL); - } - ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + if (MemVT.isFloatingPoint()) + ExtTy = ISD::EXTLOAD; + return DAG.getLoad(ISD::UNINDEXED, ExtTy, VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT, false, // isVolatile @@ -497,8 +579,16 @@ SDValue SITargetLowering::LowerFormalArguments( MachineFunction &MF = DAG.getMachineFunction(); FunctionType *FType = MF.getFunction()->getFunctionType(); SIMachineFunctionInfo *Info = MF.getInfo(); + const AMDGPUSubtarget &ST = MF.getSubtarget(); - assert(CallConv == CallingConv::C); + if (Subtarget->isAmdHsaOS() && Info->getShaderType() != ShaderType::COMPUTE) { + const Function *Fn = MF.getFunction(); + DiagnosticInfoUnsupported NoGraphicsHSA(*Fn, "non-compute shaders with HSA"); + DAG.getContext()->diagnose(NoGraphicsHSA); + return SDValue(); + } + + // FIXME: We currently assume all calling conventions are kernels. SmallVector Splits; BitVector Skipped(Ins.size()); @@ -513,7 +603,7 @@ SDValue SITargetLowering::LowerFormalArguments( assert((PSInputNum <= 15) && "Too many PS inputs!"); if (!Arg.Used) { - // We can savely skip PS inputs + // We can safely skip PS inputs Skipped.set(i); ++PSInputNum; continue; @@ -530,7 +620,7 @@ SDValue SITargetLowering::LowerFormalArguments( // We REALLY want the ORIGINAL number of vertex elements here, e.g. a // three or five element vertex only needs three or five registers, - // NOT four or eigth. + // NOT four or eight. Type *ParamType = FType->getParamType(Arg.getOrigArgIndex()); unsigned NumElements = ParamType->getVectorNumElements(); @@ -556,41 +646,30 @@ SDValue SITargetLowering::LowerFormalArguments( CCInfo.AllocateReg(AMDGPU::VGPR1); } - // The pointer to the list of arguments is stored in SGPR0, SGPR1 - // The pointer to the scratch buffer is stored in SGPR2, SGPR3 - if (Info->getShaderType() == ShaderType::COMPUTE) { - if (Subtarget->isAmdHsaOS()) - Info->NumUserSGPRs = 2; // FIXME: Need to support scratch buffers. - else - Info->NumUserSGPRs = 4; - - unsigned InputPtrReg = - TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR); - unsigned InputPtrRegLo = - TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 0); - unsigned InputPtrRegHi = - TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 1); - - unsigned ScratchPtrReg = - TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR); - unsigned ScratchPtrRegLo = - TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 0); - unsigned ScratchPtrRegHi = - TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 1); - - CCInfo.AllocateReg(InputPtrRegLo); - CCInfo.AllocateReg(InputPtrRegHi); - CCInfo.AllocateReg(ScratchPtrRegLo); - CCInfo.AllocateReg(ScratchPtrRegHi); - MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass); - MF.addLiveIn(ScratchPtrReg, &AMDGPU::SReg_64RegClass); - } - if (Info->getShaderType() == ShaderType::COMPUTE) { getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins, Splits); } + // FIXME: How should these inputs interact with inreg / custom SGPR inputs? + if (Info->hasPrivateSegmentBuffer()) { + unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI); + MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass); + CCInfo.AllocateReg(PrivateSegmentBufferReg); + } + + if (Info->hasDispatchPtr()) { + unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI); + MF.addLiveIn(DispatchPtrReg, &AMDGPU::SReg_64RegClass); + CCInfo.AllocateReg(DispatchPtrReg); + } + + if (Info->hasKernargSegmentPtr()) { + unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI); + MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass); + CCInfo.AllocateReg(InputPtrReg); + } + AnalyzeFormalArguments(CCInfo, Splits); SmallVector Chains; @@ -617,7 +696,7 @@ SDValue SITargetLowering::LowerFormalArguments( Offset, Ins[i].Flags.isSExt()); Chains.push_back(Arg.getValue(1)); - const PointerType *ParamTy = + auto *ParamTy = dyn_cast(FType->getParamType(Ins[i].getOrigArgIndex())); if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS && ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { @@ -678,10 +757,113 @@ SDValue SITargetLowering::LowerFormalArguments( InVals.push_back(Val); } - if (Info->getShaderType() != ShaderType::COMPUTE) { - unsigned ScratchIdx = CCInfo.getFirstUnallocated(ArrayRef( - AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs())); - Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx); + // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read + // these from the dispatch pointer. + + // Start adding system SGPRs. + if (Info->hasWorkGroupIDX()) { + unsigned Reg = Info->addWorkGroupIDX(); + MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass); + CCInfo.AllocateReg(Reg); + } else + llvm_unreachable("work group id x is always enabled"); + + if (Info->hasWorkGroupIDY()) { + unsigned Reg = Info->addWorkGroupIDY(); + MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass); + CCInfo.AllocateReg(Reg); + } + + if (Info->hasWorkGroupIDZ()) { + unsigned Reg = Info->addWorkGroupIDZ(); + MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass); + CCInfo.AllocateReg(Reg); + } + + if (Info->hasWorkGroupInfo()) { + unsigned Reg = Info->addWorkGroupInfo(); + MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass); + CCInfo.AllocateReg(Reg); + } + + if (Info->hasPrivateSegmentWaveByteOffset()) { + // Scratch wave offset passed in system SGPR. + unsigned PrivateSegmentWaveByteOffsetReg + = Info->addPrivateSegmentWaveByteOffset(); + + MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass); + CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg); + } + + // Now that we've figured out where the scratch register inputs are, see if + // should reserve the arguments and use them directly. + + bool HasStackObjects = MF.getFrameInfo()->hasStackObjects(); + + if (ST.isAmdHsaOS()) { + // TODO: Assume we will spill without optimizations. + if (HasStackObjects) { + // If we have stack objects, we unquestionably need the private buffer + // resource. For the HSA ABI, this will be the first 4 user SGPR + // inputs. We can reserve those and use them directly. + + unsigned PrivateSegmentBufferReg = TRI->getPreloadedValue( + MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); + Info->setScratchRSrcReg(PrivateSegmentBufferReg); + + unsigned PrivateSegmentWaveByteOffsetReg = TRI->getPreloadedValue( + MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); + Info->setScratchWaveOffsetReg(PrivateSegmentWaveByteOffsetReg); + } else { + unsigned ReservedBufferReg + = TRI->reservedPrivateSegmentBufferReg(MF); + unsigned ReservedOffsetReg + = TRI->reservedPrivateSegmentWaveByteOffsetReg(MF); + + // We tentatively reserve the last registers (skipping the last two + // which may contain VCC). After register allocation, we'll replace + // these with the ones immediately after those which were really + // allocated. In the prologue copies will be inserted from the argument + // to these reserved registers. + Info->setScratchRSrcReg(ReservedBufferReg); + Info->setScratchWaveOffsetReg(ReservedOffsetReg); + } + } else { + unsigned ReservedBufferReg = TRI->reservedPrivateSegmentBufferReg(MF); + + // Without HSA, relocations are used for the scratch pointer and the + // buffer resource setup is always inserted in the prologue. Scratch wave + // offset is still in an input SGPR. + Info->setScratchRSrcReg(ReservedBufferReg); + + if (HasStackObjects) { + unsigned ScratchWaveOffsetReg = TRI->getPreloadedValue( + MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); + Info->setScratchWaveOffsetReg(ScratchWaveOffsetReg); + } else { + unsigned ReservedOffsetReg + = TRI->reservedPrivateSegmentWaveByteOffsetReg(MF); + Info->setScratchWaveOffsetReg(ReservedOffsetReg); + } + } + + if (Info->hasWorkItemIDX()) { + unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X); + MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); + CCInfo.AllocateReg(Reg); + } else + llvm_unreachable("workitem id x should always be enabled"); + + if (Info->hasWorkItemIDY()) { + unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y); + MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); + CCInfo.AllocateReg(Reg); + } + + if (Info->hasWorkItemIDZ()) { + unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z); + MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); + CCInfo.AllocateReg(Reg); } if (Chains.empty()) @@ -693,27 +875,11 @@ SDValue SITargetLowering::LowerFormalArguments( MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MachineInstr * MI, MachineBasicBlock * BB) const { - MachineBasicBlock::iterator I = *MI; - const SIInstrInfo *TII = - static_cast(Subtarget->getInstrInfo()); - switch (MI->getOpcode()) { default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); case AMDGPU::BRANCH: return BB; - case AMDGPU::SI_RegisterStorePseudo: { - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - MachineInstrBuilder MIB = - BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore), - Reg); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) - MIB.addOperand(MI->getOperand(i)); - - MI->eraseFromParent(); - break; - } } return BB; } @@ -944,20 +1110,8 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, const GlobalValue *GV = GSD->getGlobal(); MVT PtrVT = getPointerTy(DAG.getDataLayout(), GSD->getAddressSpace()); - SDValue Ptr = DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT); SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); - - SDValue PtrLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr, - DAG.getConstant(0, DL, MVT::i32)); - SDValue PtrHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr, - DAG.getConstant(1, DL, MVT::i32)); - - SDValue Lo = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i32, MVT::Glue), - PtrLo, GA); - SDValue Hi = DAG.getNode(ISD::ADDE, DL, DAG.getVTList(MVT::i32, MVT::Glue), - PtrHi, DAG.getConstant(0, DL, MVT::i32), - SDValue(Lo.getNode(), 1)); - return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); + return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT, GA); } SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, @@ -977,6 +1131,18 @@ SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, // a glue result. } +SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG, + SDValue Op, + MVT VT, + unsigned Offset) const { + SDLoc SL(Op); + SDValue Param = LowerParameter(DAG, MVT::i32, MVT::i32, SL, + DAG.getEntryNode(), Offset, false); + // The local size values will have the hi 16-bits as zero. + return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Param, + DAG.getValueType(VT)); +} + SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -988,7 +1154,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDLoc DL(Op); unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue(); + // TODO: Should this propagate fast-math-flags? + switch (IntrinsicID) { + case Intrinsic::amdgcn_dispatch_ptr: + return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, + TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT); + case Intrinsic::r600_read_ngroups_x: return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), SI::KernelInputOffsets::NGROUPS_X, false); @@ -1008,37 +1180,36 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), SI::KernelInputOffsets::GLOBAL_SIZE_Z, false); case Intrinsic::r600_read_local_size_x: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), - SI::KernelInputOffsets::LOCAL_SIZE_X, false); + return lowerImplicitZextParam(DAG, Op, MVT::i16, + SI::KernelInputOffsets::LOCAL_SIZE_X); case Intrinsic::r600_read_local_size_y: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), - SI::KernelInputOffsets::LOCAL_SIZE_Y, false); + return lowerImplicitZextParam(DAG, Op, MVT::i16, + SI::KernelInputOffsets::LOCAL_SIZE_Y); case Intrinsic::r600_read_local_size_z: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), - SI::KernelInputOffsets::LOCAL_SIZE_Z, false); - + return lowerImplicitZextParam(DAG, Op, MVT::i16, + SI::KernelInputOffsets::LOCAL_SIZE_Z); case Intrinsic::AMDGPU_read_workdim: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), - getImplicitParameterOffset(MFI, GRID_DIM), false); - + // Really only 2 bits. + return lowerImplicitZextParam(DAG, Op, MVT::i8, + getImplicitParameterOffset(MFI, GRID_DIM)); case Intrinsic::r600_read_tgid_x: return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, - TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT); + TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_X), VT); case Intrinsic::r600_read_tgid_y: return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, - TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Y), VT); + TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Y), VT); case Intrinsic::r600_read_tgid_z: return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, - TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Z), VT); + TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Z), VT); case Intrinsic::r600_read_tidig_x: return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass, - TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_X), VT); + TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X), VT); case Intrinsic::r600_read_tidig_y: return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass, - TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Y), VT); + TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y), VT); case Intrinsic::r600_read_tidig_z: return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass, - TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Z), VT); + TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z), VT); case AMDGPUIntrinsic::SI_load_const: { SDValue Ops[] = { Op.getOperand(1), @@ -1077,6 +1248,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, DAG.getConstant(2, DL, MVT::i32), // P0 Op.getOperand(1), Op.getOperand(2), Glue); } + case AMDGPUIntrinsic::SI_packf16: + if (Op.getOperand(1).isUndef() && Op.getOperand(2).isUndef()) + return DAG.getUNDEF(MVT::i32); + return Op; case AMDGPUIntrinsic::SI_fs_interp: { SDValue IJ = Op.getOperand(4); SDValue I = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ, @@ -1092,6 +1267,19 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J, Op.getOperand(1), Op.getOperand(2), Glue); } + case Intrinsic::amdgcn_interp_p1: { + SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4)); + SDValue Glue = M0.getValue(1); + return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3), Glue); + } + case Intrinsic::amdgcn_interp_p2: { + SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5)); + SDValue Glue = SDValue(M0.getNode(), 1); + return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(4), + Glue); + } default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); } @@ -1152,16 +1340,29 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { "Custom lowering for non-i32 vectors hasn't been implemented."); unsigned NumElements = Op.getValueType().getVectorNumElements(); assert(NumElements != 2 && "v2 loads are supported for all address spaces."); + switch (Load->getAddressSpace()) { default: break; + case AMDGPUAS::CONSTANT_ADDRESS: + if (isMemOpUniform(Load)) + break; + // Non-uniform loads will be selected to MUBUF instructions, so they + // have the same legalization requires ments as global and private + // loads. + // + // Fall-through case AMDGPUAS::GLOBAL_ADDRESS: case AMDGPUAS::PRIVATE_ADDRESS: + if (NumElements >= 8) + return SplitVectorLoad(Op, DAG); + // v4 loads are supported for private and global memory. if (NumElements <= 4) break; // fall-through case AMDGPUAS::LOCAL_ADDRESS: - return ScalarizeVectorLoad(Op, DAG); + // If properly aligned, if we split we might be able to use ds_read_b64. + return SplitVectorLoad(Op, DAG); } } @@ -1236,8 +1437,10 @@ SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const { if (Unsafe) { // Turn into multiply by the reciprocal. // x / y -> x * (1.0 / y) + SDNodeFlags Flags; + Flags.setUnsafeAlgebra(true); SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS); - return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip); + return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, &Flags); } return SDValue(); @@ -1274,6 +1477,8 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One); + // TODO: Should this propagate fast-math-flags? + r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3); SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1); @@ -1379,7 +1584,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return Ret; if (VT.isVector() && VT.getVectorNumElements() >= 8) - return ScalarizeVectorStore(Op, DAG); + return SplitVectorStore(Op, DAG); if (VT == MVT::i1) return DAG.getTruncStore(Store->getChain(), DL, @@ -1393,6 +1598,7 @@ SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue Arg = Op.getOperand(0); + // TODO: Should this propagate fast-math-flags? SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT, DAG.getNode(ISD::FMUL, DL, VT, Arg, DAG.getConstantFP(0.5/M_PI, DL, @@ -2125,9 +2331,14 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, static_cast(Subtarget->getInstrInfo()); MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); - TII->legalizeOperands(MI); - if (TII->isMIMG(MI->getOpcode())) { + if (TII->isVOP3(MI->getOpcode())) { + // Make sure constant bus requirements are respected. + TII->legalizeOperandsVOP3(MRI, MI); + return; + } + + if (TII->isMIMG(*MI)) { unsigned VReg = MI->getOperand(0).getReg(); unsigned Writemask = MI->getOperand(1).getImm(); unsigned BitsSet = 0; @@ -2169,53 +2380,38 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG, SDLoc DL, SDValue Ptr) const { const SIInstrInfo *TII = - static_cast(Subtarget->getInstrInfo()); -#if 1 - // XXX - Workaround for moveToVALU not handling different register class - // inserts for REG_SEQUENCE. + static_cast(Subtarget->getInstrInfo()); - // Build the half of the subregister with the constants. - const SDValue Ops0[] = { - DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32), - buildSMovImm32(DAG, DL, 0), - DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32), - buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32), - DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32) - }; + // Build the half of the subregister with the constants before building the + // full 128-bit register. If we are building multiple resource descriptors, + // this will allow CSEing of the 2-component register. + const SDValue Ops0[] = { + DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32), + buildSMovImm32(DAG, DL, 0), + DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32), + buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32), + DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32) + }; - SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, - MVT::v2i32, Ops0), 0); + SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, + MVT::v2i32, Ops0), 0); - // Combine the constants and the pointer. - const SDValue Ops1[] = { - DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32), - Ptr, - DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32), - SubRegHi, - DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32) - }; + // Combine the constants and the pointer. + const SDValue Ops1[] = { + DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32), + Ptr, + DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32), + SubRegHi, + DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32) + }; - return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1); -#else - const SDValue Ops[] = { - DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32), - Ptr, - DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32), - buildSMovImm32(DAG, DL, 0), - DAG.getTargetConstant(AMDGPU::sub2, MVT::i32), - buildSMovImm32(DAG, DL, TII->getDefaultRsrcFormat() >> 32), - DAG.getTargetConstant(AMDGPU::sub3, MVT::i32) - }; - - return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops); - -#endif + return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1); } /// \brief Return a resource descriptor with the 'Add TID' bit enabled -/// The TID (Thread ID) is multipled by the stride value (bits [61:48] -/// of the resource descriptor) to create an offset, which is added to the -/// resource ponter. +/// The TID (Thread ID) is multiplied by the stride value (bits [61:48] +/// of the resource descriptor) to create an offset, which is added to +/// the resource pointer. MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG, SDLoc DL, SDValue Ptr, @@ -2248,15 +2444,6 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG, return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops); } -MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG, - SDLoc DL, - SDValue Ptr) const { - const SIInstrInfo *TII = - static_cast(Subtarget->getInstrInfo()); - - return buildRSRC(DAG, DL, Ptr, 0, TII->getScratchRsrcWords23()); -} - SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const { @@ -2274,13 +2461,41 @@ std::pair SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { - if (Constraint == "r") { - switch(VT.SimpleTy) { - default: llvm_unreachable("Unhandled type for 'r' inline asm constraint"); - case MVT::i64: - return std::make_pair(0U, &AMDGPU::SGPR_64RegClass); - case MVT::i32: + + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 's': + case 'r': + switch (VT.getSizeInBits()) { + default: + return std::make_pair(0U, nullptr); + case 32: return std::make_pair(0U, &AMDGPU::SGPR_32RegClass); + case 64: + return std::make_pair(0U, &AMDGPU::SGPR_64RegClass); + case 128: + return std::make_pair(0U, &AMDGPU::SReg_128RegClass); + case 256: + return std::make_pair(0U, &AMDGPU::SReg_256RegClass); + } + + case 'v': + switch (VT.getSizeInBits()) { + default: + return std::make_pair(0U, nullptr); + case 32: + return std::make_pair(0U, &AMDGPU::VGPR_32RegClass); + case 64: + return std::make_pair(0U, &AMDGPU::VReg_64RegClass); + case 96: + return std::make_pair(0U, &AMDGPU::VReg_96RegClass); + case 128: + return std::make_pair(0U, &AMDGPU::VReg_128RegClass); + case 256: + return std::make_pair(0U, &AMDGPU::VReg_256RegClass); + case 512: + return std::make_pair(0U, &AMDGPU::VReg_512RegClass); + } } } @@ -2301,3 +2516,16 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } + +SITargetLowering::ConstraintType +SITargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 's': + case 'v': + return C_RegisterClass; + } + } + return TargetLowering::getConstraintType(Constraint); +} diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index d84c32ec0092..e2f8cb19d6be 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -28,6 +28,9 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const override; + SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op, + MVT VT, unsigned Offset) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; @@ -57,6 +60,7 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const; bool isLegalFlatAddressingMode(const AddrMode &AM) const; + bool isLegalMUBUFAddressingMode(const AddrMode &AM) const; public: SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI); @@ -76,6 +80,9 @@ public: bool MemcpyStrSrc, MachineFunction &MF) const override; + bool isMemOpUniform(const SDNode *N) const; + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; + TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; @@ -112,13 +119,10 @@ public: SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const; - MachineSDNode *buildScratchRSRC(SelectionDAG &DAG, - SDLoc DL, - SDValue Ptr) const; - std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue V) const; }; diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp index 90a37f174682..821aada526c7 100644 --- a/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -91,7 +91,8 @@ private: bool isOpRelevant(MachineOperand &Op); /// \brief Get register interval an operand affects. - RegInterval getRegInterval(MachineOperand &Op); + RegInterval getRegInterval(const TargetRegisterClass *RC, + const MachineOperand &Reg) const; /// \brief Handle instructions async components void pushInstruction(MachineBasicBlock &MBB, @@ -121,9 +122,13 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; const char *getPassName() const override { - return "SI insert wait instructions"; + return "SI insert wait instructions"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } }; } // End anonymous namespace @@ -138,9 +143,8 @@ FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) { } Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { - - uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags; - Counters Result; + uint64_t TSFlags = MI.getDesc().TSFlags; + Counters Result = { { 0, 0, 0 } }; Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT); @@ -151,15 +155,22 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { // LGKM may uses larger values if (TSFlags & SIInstrFlags::LGKM_CNT) { - if (TII->isSMRD(MI.getOpcode())) { + if (TII->isSMRD(MI)) { - MachineOperand &Op = MI.getOperand(0); - assert(Op.isReg() && "First LGKM operand must be a register!"); - - unsigned Reg = Op.getReg(); - unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); - Result.Named.LGKM = Size > 4 ? 2 : 1; + if (MI.getNumOperands() != 0) { + assert(MI.getOperand(0).isReg() && + "First LGKM operand must be a register!"); + // XXX - What if this is a write into a super register? + const TargetRegisterClass *RC = TII->getOpRegClass(MI, 0); + unsigned Size = RC->getSize(); + Result.Named.LGKM = Size > 4 ? 2 : 1; + } else { + // s_dcache_inv etc. do not have a a destination register. Assume we + // want a wait on these. + // XXX - What is the right value? + Result.Named.LGKM = 1; + } } else { // DS Result.Named.LGKM = 1; @@ -173,9 +184,8 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { } bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { - // Constants are always irrelevant - if (!Op.isReg()) + if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg())) return false; // Defines are always relevant @@ -196,7 +206,7 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { // operand comes before the value operand and it may have // multiple data operands. - if (TII->isDS(MI.getOpcode())) { + if (TII->isDS(MI)) { MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::data); if (Data && Op.isIdenticalTo(*Data)) return true; @@ -224,18 +234,13 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { return false; } -RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) { - - if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg())) - return std::make_pair(0, 0); - - unsigned Reg = Op.getReg(); - unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); - +RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC, + const MachineOperand &Reg) const { + unsigned Size = RC->getSize(); assert(Size >= 4); RegInterval Result; - Result.first = TRI->getEncodingValue(Reg); + Result.first = TRI->getEncodingValue(Reg.getReg()); Result.second = Result.first + Size / 4; return Result; @@ -246,10 +251,13 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, // Get the hardware counter increments and sum them up Counters Increment = getHwCounts(*I); + Counters Limit = ZeroCounts; unsigned Sum = 0; for (unsigned i = 0; i < 3; ++i) { LastIssued.Array[i] += Increment.Array[i]; + if (Increment.Array[i]) + Limit.Array[i] = LastIssued.Array[i]; Sum += Increment.Array[i]; } @@ -261,7 +269,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, if (MBB.getParent()->getSubtarget().getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { - // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM + // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM // or SMEM clause, respectively. // // The temporary workaround is to break the clauses with S_NOP. @@ -270,7 +278,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, // and destination registers don't overlap, e.g. this is illegal: // r0 = load r2 // r2 = load r0 - if ((LastOpcodeType == SMEM && TII->isSMRD(I->getOpcode())) || + if ((LastOpcodeType == SMEM && TII->isSMRD(*I)) || (LastOpcodeType == VMEM && Increment.Named.VM)) { // Insert a NOP to break the clause. BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)) @@ -278,7 +286,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, LastInstWritesM0 = false; } - if (TII->isSMRD(I->getOpcode())) + if (TII->isSMRD(*I)) LastOpcodeType = SMEM; else if (Increment.Named.VM) LastOpcodeType = VMEM; @@ -290,21 +298,21 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, } for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - MachineOperand &Op = I->getOperand(i); if (!isOpRelevant(Op)) continue; - RegInterval Interval = getRegInterval(Op); + const TargetRegisterClass *RC = TII->getOpRegClass(*I, i); + RegInterval Interval = getRegInterval(RC, Op); for (unsigned j = Interval.first; j < Interval.second; ++j) { // Remember which registers we define if (Op.isDef()) - DefinedRegs[j] = LastIssued; + DefinedRegs[j] = Limit; // and which one we are using if (Op.isUse()) - UsedRegs[j] = LastIssued; + UsedRegs[j] = Limit; } } } @@ -390,12 +398,18 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) { if (MI.getOpcode() == AMDGPU::S_SENDMSG) return LastIssued; - // For each register affected by this - // instruction increase the result sequence + // For each register affected by this instruction increase the result + // sequence. + // + // TODO: We could probably just look at explicit operands if we removed VCC / + // EXEC from SMRD dest reg classes. for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &Op = MI.getOperand(i); - RegInterval Interval = getRegInterval(Op); + if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg())) + continue; + + const TargetRegisterClass *RC = TII->getOpRegClass(MI, i); + RegInterval Interval = getRegInterval(RC, Op); for (unsigned j = Interval.first; j < Interval.second; ++j) { if (Op.isDef()) { diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td index 211666a9bdbc..0e883f64caa3 100644 --- a/lib/Target/AMDGPU/SIInstrFormats.td +++ b/lib/Target/AMDGPU/SIInstrFormats.td @@ -41,6 +41,10 @@ class InstSI pattern> : field bits<1> WQM = 0; field bits<1> VGPRSpill = 0; + // This bit tells the assembler to use the 32-bit encoding in case it + // is unable to infer the encoding from the operands. + field bits<1> VOPAsmPrefer32Bit = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = VM_CNT; let TSFlags{1} = EXP_CNT; @@ -68,10 +72,8 @@ class InstSI pattern> : let TSFlags{19} = FLAT; let TSFlags{20} = WQM; let TSFlags{21} = VGPRSpill; + let TSFlags{22} = VOPAsmPrefer32Bit; - // Most instructions require adjustments after selection to satisfy - // operand requirements. - let hasPostISelHook = 1; let SchedRW = [Write32Bit]; } @@ -86,7 +88,6 @@ class Enc64 { } class VOPDstOperand : RegisterOperand ; -def VOPDstVCC : VOPDstOperand ; let Uses = [EXEC] in { @@ -101,11 +102,11 @@ class VOPAnyCommon pattern> : } class VOPCCommon pattern> : - VOPAnyCommon <(outs VOPDstVCC:$dst), ins, asm, pattern> { + VOPAnyCommon <(outs), ins, asm, pattern> { - let DisableEncoding = "$dst"; let VOPC = 1; let Size = 4; + let Defs = [VCC]; } class VOP1Common pattern> : @@ -138,6 +139,11 @@ class VOP3Common pattern> : let isCodeGenOnly = 0; int Size = 8; + + // Because SGPRs may be allowed if there are multiple operands, we + // need a post-isel hook to insert copies in order to avoid + // violating constant bus requirements. + let hasPostISelHook = 1; } } // End Uses = [EXEC] @@ -222,6 +228,20 @@ class SMRDe op, bits<1> imm> : Enc32 { let Inst{31-27} = 0x18; //encoding } +class SMRD_IMMe_ci op> : Enc64 { + bits<7> sdst; + bits<7> sbase; + bits<32> offset; + + let Inst{7-0} = 0xff; + let Inst{8} = 0; + let Inst{14-9} = sbase{6-1}; + let Inst{21-15} = sdst; + let Inst{26-22} = op; + let Inst{31-27} = 0x18; //encoding + let Inst{63-32} = offset; +} + let SchedRW = [WriteSALU] in { class SOP1 pattern> : InstSI { @@ -249,13 +269,13 @@ class SOP2 pattern> : class SOPC op, dag outs, dag ins, string asm, list pattern> : InstSI, SOPCe { - let DisableEncoding = "$dst"; let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let SALU = 1; let SOPC = 1; let isCodeGenOnly = 0; + let Defs = [SCC]; let UseNamedOperandTable = 1; } @@ -598,15 +618,13 @@ class VINTRPCommon pattern> : // Vector I/O operations //===----------------------------------------------------------------------===// -let Uses = [EXEC] in { - class DS pattern> : InstSI { let LGKM_CNT = 1; let DS = 1; let UseNamedOperandTable = 1; - let Uses = [M0]; + let Uses = [M0, EXEC]; // Most instruction load and store data, so set this as the default. let mayLoad = 1; @@ -623,6 +641,7 @@ class MUBUF pattern> : let VM_CNT = 1; let EXP_CNT = 1; let MUBUF = 1; + let Uses = [EXEC]; let hasSideEffects = 0; let UseNamedOperandTable = 1; @@ -636,6 +655,7 @@ class MTBUF pattern> : let VM_CNT = 1; let EXP_CNT = 1; let MTBUF = 1; + let Uses = [EXEC]; let hasSideEffects = 0; let UseNamedOperandTable = 1; @@ -665,9 +685,7 @@ class MIMG op, dag outs, dag ins, string asm, list pattern> : let VM_CNT = 1; let EXP_CNT = 1; let MIMG = 1; + let Uses = [EXEC]; let hasSideEffects = 0; // XXX ???? } - - -} // End Uses = [EXEC] diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index cfd2c42d1aef..a08a5a8fed36 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -82,6 +82,7 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, switch (MI->getOpcode()) { case AMDGPU::V_MOV_B32_e32: case AMDGPU::V_MOV_B32_e64: + case AMDGPU::V_MOV_B64_PSEUDO: return true; default: return false; @@ -204,7 +205,8 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, unsigned &Offset, const TargetRegisterInfo *TRI) const { unsigned Opc = LdSt->getOpcode(); - if (isDS(Opc)) { + + if (isDS(*LdSt)) { const MachineOperand *OffsetImm = getNamedOperand(*LdSt, AMDGPU::OpName::offset); if (OffsetImm) { @@ -254,7 +256,7 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, return false; } - if (isMUBUF(Opc) || isMTBUF(Opc)) { + if (isMUBUF(*LdSt) || isMTBUF(*LdSt)) { if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1) return false; @@ -270,7 +272,7 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, return true; } - if (isSMRD(Opc)) { + if (isSMRD(*LdSt)) { const MachineOperand *OffsetImm = getNamedOperand(*LdSt, AMDGPU::OpName::offset); if (!OffsetImm) @@ -289,20 +291,18 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt, unsigned NumLoads) const { - unsigned Opc0 = FirstLdSt->getOpcode(); - unsigned Opc1 = SecondLdSt->getOpcode(); - // TODO: This needs finer tuning if (NumLoads > 4) return false; - if (isDS(Opc0) && isDS(Opc1)) + if (isDS(*FirstLdSt) && isDS(*SecondLdSt)) return true; - if (isSMRD(Opc0) && isSMRD(Opc1)) + if (isSMRD(*FirstLdSt) && isSMRD(*SecondLdSt)) return true; - if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) + if ((isMUBUF(*FirstLdSt) || isMTBUF(*FirstLdSt)) && + (isMUBUF(*SecondLdSt) || isMTBUF(*SecondLdSt))) return true; return false; @@ -323,28 +323,45 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, - AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0 + AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, + }; + + static const int16_t Sub0_15_64[] = { + AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, + AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, + AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, + AMDGPU::sub12_sub13, AMDGPU::sub14_sub15, }; static const int16_t Sub0_7[] = { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, - AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0 + AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, + }; + + static const int16_t Sub0_7_64[] = { + AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, + AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, }; static const int16_t Sub0_3[] = { - AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0 + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, + }; + + static const int16_t Sub0_3_64[] = { + AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, }; static const int16_t Sub0_2[] = { - AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0 + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, }; static const int16_t Sub0_1[] = { - AMDGPU::sub0, AMDGPU::sub1, 0 + AMDGPU::sub0, AMDGPU::sub1, }; unsigned Opcode; - const int16_t *SubIndices; + ArrayRef SubIndices; + bool Forward; if (AMDGPU::SReg_32RegClass.contains(DestReg)) { assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); @@ -360,7 +377,7 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } else { // FIXME: Hack until VReg_1 removed. assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); - BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC) + BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32)) .addImm(0) .addReg(SrcReg, getKillRegState(KillSrc)); } @@ -375,18 +392,18 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) { assert(AMDGPU::SReg_128RegClass.contains(SrcReg)); - Opcode = AMDGPU::S_MOV_B32; - SubIndices = Sub0_3; + Opcode = AMDGPU::S_MOV_B64; + SubIndices = Sub0_3_64; } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) { assert(AMDGPU::SReg_256RegClass.contains(SrcReg)); - Opcode = AMDGPU::S_MOV_B32; - SubIndices = Sub0_7; + Opcode = AMDGPU::S_MOV_B64; + SubIndices = Sub0_7_64; } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); - Opcode = AMDGPU::S_MOV_B32; - SubIndices = Sub0_15; + Opcode = AMDGPU::S_MOV_B64; + SubIndices = Sub0_15_64; } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) { assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || @@ -428,13 +445,27 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, llvm_unreachable("Can't copy register!"); } - while (unsigned SubIdx = *SubIndices++) { + if (RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg)) + Forward = true; + else + Forward = false; + + for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { + unsigned SubIdx; + if (Forward) + SubIdx = SubIndices[Idx]; + else + SubIdx = SubIndices[SubIndices.size() - Idx - 1]; + MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, get(Opcode), RI.getSubReg(DestReg, SubIdx)); - Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc)); + Builder.addReg(RI.getSubReg(SrcReg, SubIdx)); - if (*SubIndices) + if (Idx == SubIndices.size() - 1) + Builder.addReg(SrcReg, RegState::Kill | RegState::Implicit); + + if (Idx == 0) Builder.addReg(DestReg, RegState::Define | RegState::Implicit); } } @@ -471,6 +502,40 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { return AMDGPU::COPY; } +static unsigned getSGPRSpillSaveOpcode(unsigned Size) { + switch (Size) { + case 4: + return AMDGPU::SI_SPILL_S32_SAVE; + case 8: + return AMDGPU::SI_SPILL_S64_SAVE; + case 16: + return AMDGPU::SI_SPILL_S128_SAVE; + case 32: + return AMDGPU::SI_SPILL_S256_SAVE; + case 64: + return AMDGPU::SI_SPILL_S512_SAVE; + default: + llvm_unreachable("unknown register size"); + } +} + +static unsigned getVGPRSpillSaveOpcode(unsigned Size) { + switch (Size) { + case 4: + return AMDGPU::SI_SPILL_V32_SAVE; + case 8: + return AMDGPU::SI_SPILL_V64_SAVE; + case 16: + return AMDGPU::SI_SPILL_V128_SAVE; + case 32: + return AMDGPU::SI_SPILL_V256_SAVE; + case 64: + return AMDGPU::SI_SPILL_V512_SAVE; + default: + llvm_unreachable("unknown register size"); + } +} + void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, @@ -481,47 +546,83 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo *FrameInfo = MF->getFrameInfo(); DebugLoc DL = MBB.findDebugLoc(MI); - int Opcode = -1; + + unsigned Size = FrameInfo->getObjectSize(FrameIndex); + unsigned Align = FrameInfo->getObjectAlignment(FrameIndex); + MachinePointerInfo PtrInfo + = MachinePointerInfo::getFixedStack(*MF, FrameIndex); + MachineMemOperand *MMO + = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + Size, Align); if (RI.isSGPRClass(RC)) { + MFI->setHasSpilledSGPRs(); + // We are only allowed to create one new instruction when spilling // registers, so we need to use pseudo instruction for spilling // SGPRs. - switch (RC->getSize() * 8) { - case 32: Opcode = AMDGPU::SI_SPILL_S32_SAVE; break; - case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break; - case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break; - case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break; - case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; - } - } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) { - MFI->setHasSpilledVGPRs(); + unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize()); + BuildMI(MBB, MI, DL, get(Opcode)) + .addReg(SrcReg) // src + .addFrameIndex(FrameIndex) // frame_idx + .addMemOperand(MMO); - switch(RC->getSize() * 8) { - case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break; - case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break; - case 96: Opcode = AMDGPU::SI_SPILL_V96_SAVE; break; - case 128: Opcode = AMDGPU::SI_SPILL_V128_SAVE; break; - case 256: Opcode = AMDGPU::SI_SPILL_V256_SAVE; break; - case 512: Opcode = AMDGPU::SI_SPILL_V512_SAVE; break; - } + return; } - if (Opcode != -1) { - FrameInfo->setObjectAlignment(FrameIndex, 4); - BuildMI(MBB, MI, DL, get(Opcode)) - .addReg(SrcReg) - .addFrameIndex(FrameIndex) - // Place-holder registers, these will be filled in by - // SIPrepareScratchRegs. - .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) - .addReg(AMDGPU::SGPR0, RegState::Undef); - } else { + if (!ST.isVGPRSpillingEnabled(MFI)) { LLVMContext &Ctx = MF->getFunction()->getContext(); Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to" " spill register"); BuildMI(MBB, MI, DL, get(AMDGPU::KILL)) - .addReg(SrcReg); + .addReg(SrcReg); + + return; + } + + assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); + + unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize()); + MFI->setHasSpilledVGPRs(); + BuildMI(MBB, MI, DL, get(Opcode)) + .addReg(SrcReg) // src + .addFrameIndex(FrameIndex) // frame_idx + .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc + .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset + .addMemOperand(MMO); +} + +static unsigned getSGPRSpillRestoreOpcode(unsigned Size) { + switch (Size) { + case 4: + return AMDGPU::SI_SPILL_S32_RESTORE; + case 8: + return AMDGPU::SI_SPILL_S64_RESTORE; + case 16: + return AMDGPU::SI_SPILL_S128_RESTORE; + case 32: + return AMDGPU::SI_SPILL_S256_RESTORE; + case 64: + return AMDGPU::SI_SPILL_S512_RESTORE; + default: + llvm_unreachable("unknown register size"); + } +} + +static unsigned getVGPRSpillRestoreOpcode(unsigned Size) { + switch (Size) { + case 4: + return AMDGPU::SI_SPILL_V32_RESTORE; + case 8: + return AMDGPU::SI_SPILL_V64_RESTORE; + case 16: + return AMDGPU::SI_SPILL_V128_RESTORE; + case 32: + return AMDGPU::SI_SPILL_V256_RESTORE; + case 64: + return AMDGPU::SI_SPILL_V512_RESTORE; + default: + llvm_unreachable("unknown register size"); } } @@ -534,42 +635,43 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo *FrameInfo = MF->getFrameInfo(); DebugLoc DL = MBB.findDebugLoc(MI); - int Opcode = -1; + unsigned Align = FrameInfo->getObjectAlignment(FrameIndex); + unsigned Size = FrameInfo->getObjectSize(FrameIndex); - if (RI.isSGPRClass(RC)){ - switch(RC->getSize() * 8) { - case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break; - case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break; - case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break; - case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break; - case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break; - } - } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) { - switch(RC->getSize() * 8) { - case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break; - case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break; - case 96: Opcode = AMDGPU::SI_SPILL_V96_RESTORE; break; - case 128: Opcode = AMDGPU::SI_SPILL_V128_RESTORE; break; - case 256: Opcode = AMDGPU::SI_SPILL_V256_RESTORE; break; - case 512: Opcode = AMDGPU::SI_SPILL_V512_RESTORE; break; - } + MachinePointerInfo PtrInfo + = MachinePointerInfo::getFixedStack(*MF, FrameIndex); + + MachineMemOperand *MMO = MF->getMachineMemOperand( + PtrInfo, MachineMemOperand::MOLoad, Size, Align); + + if (RI.isSGPRClass(RC)) { + // FIXME: Maybe this should not include a memoperand because it will be + // lowered to non-memory instructions. + unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize()); + BuildMI(MBB, MI, DL, get(Opcode), DestReg) + .addFrameIndex(FrameIndex) // frame_idx + .addMemOperand(MMO); + + return; } - if (Opcode != -1) { - FrameInfo->setObjectAlignment(FrameIndex, 4); - BuildMI(MBB, MI, DL, get(Opcode), DestReg) - .addFrameIndex(FrameIndex) - // Place-holder registers, these will be filled in by - // SIPrepareScratchRegs. - .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) - .addReg(AMDGPU::SGPR0, RegState::Undef); - - } else { + if (!ST.isVGPRSpillingEnabled(MFI)) { LLVMContext &Ctx = MF->getFunction()->getContext(); Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to" " restore register"); BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg); + + return; } + + assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); + + unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize()); + BuildMI(MBB, MI, DL, get(Opcode), DestReg) + .addFrameIndex(FrameIndex) // frame_idx + .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc + .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset + .addMemOperand(MMO); } /// \param @Offset Offset in bytes of the FrameIndex being spilled @@ -601,17 +703,21 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB, if (MFI->getShaderType() == ShaderType::COMPUTE && WorkGroupSize > WavefrontSize) { - unsigned TIDIGXReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_X); - unsigned TIDIGYReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Y); - unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z); + unsigned TIDIGXReg + = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_X); + unsigned TIDIGYReg + = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Y); + unsigned TIDIGZReg + = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Z); unsigned InputPtrReg = - TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR); + TRI->getPreloadedValue(*MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) { if (!Entry.isLiveIn(Reg)) Entry.addLiveIn(Reg); } RS->enterBasicBlock(&Entry); + // FIXME: Can we scavenge an SReg_64 and access the subregs? unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0) @@ -667,8 +773,8 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB, return TmpReg; } -void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI, - int Count) const { +void SIInstrInfo::insertWaitStates(MachineBasicBlock::iterator MI, + int Count) const { while (Count > 0) { int Arg; if (Count >= 8) @@ -687,26 +793,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { switch (MI->getOpcode()) { default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); - case AMDGPU::SI_CONSTDATA_PTR: { - unsigned Reg = MI->getOperand(0).getReg(); - unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0); - unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1); - - BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg); - - // Add 32-bit offset from this instruction to the start of the constant data. - BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo) - .addReg(RegLo) - .addTargetIndex(AMDGPU::TI_CONSTDATA_START) - .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit); - BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi) - .addReg(RegHi) - .addImm(0) - .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit) - .addReg(AMDGPU::SCC, RegState::Implicit); - MI->eraseFromParent(); - break; - } case AMDGPU::SGPR_USE: // This is just a placeholder for register allocation. MI->eraseFromParent(); @@ -760,49 +846,90 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MI->eraseFromParent(); break; } + + case AMDGPU::SI_CONSTDATA_PTR: { + const SIRegisterInfo *TRI = + static_cast(ST.getRegisterInfo()); + MachineFunction &MF = *MBB.getParent(); + unsigned Reg = MI->getOperand(0).getReg(); + unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0); + unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1); + + // Create a bundle so these instructions won't be re-ordered by the + // post-RA scheduler. + MIBundleBuilder Bundler(MBB, MI); + Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg)); + + // Add 32-bit offset from this instruction to the start of the + // constant data. + Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo) + .addReg(RegLo) + .addOperand(MI->getOperand(1))); + Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi) + .addReg(RegHi) + .addImm(0)); + + llvm::finalizeBundle(MBB, Bundler.begin()); + + MI->eraseFromParent(); + break; + } } return true; } -MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, - bool NewMI) const { - - if (MI->getNumOperands() < 3) - return nullptr; - +/// Commutes the operands in the given instruction. +/// The commutable operands are specified by their indices OpIdx0 and OpIdx1. +/// +/// Do not call this method for a non-commutable instruction or for +/// non-commutable pair of operand indices OpIdx0 and OpIdx1. +/// Even though the instruction is commutable, the method may still +/// fail to commute the operands, null pointer is returned in such cases. +MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr *MI, + bool NewMI, + unsigned OpIdx0, + unsigned OpIdx1) const { int CommutedOpcode = commuteOpcode(*MI); if (CommutedOpcode == -1) return nullptr; int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::src0); - assert(Src0Idx != -1 && "Should always have src0 operand"); - MachineOperand &Src0 = MI->getOperand(Src0Idx); if (!Src0.isReg()) return nullptr; int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::src1); - if (Src1Idx == -1) + + if ((OpIdx0 != static_cast(Src0Idx) || + OpIdx1 != static_cast(Src1Idx)) && + (OpIdx0 != static_cast(Src1Idx) || + OpIdx1 != static_cast(Src0Idx))) return nullptr; MachineOperand &Src1 = MI->getOperand(Src1Idx); - // Make sure it's legal to commute operands for VOP2. - if (isVOP2(MI->getOpcode()) && - (!isOperandLegal(MI, Src0Idx, &Src1) || - !isOperandLegal(MI, Src1Idx, &Src0))) { - return nullptr; + + if (isVOP2(*MI)) { + const MCInstrDesc &InstrDesc = MI->getDesc(); + // For VOP2 instructions, any operand type is valid to use for src0. Make + // sure we can use the src1 as src0. + // + // We could be stricter here and only allow commuting if there is a reason + // to do so. i.e. if both operands are VGPRs there is no real benefit, + // although MachineCSE attempts to find matches by commuting. + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) + return nullptr; } if (!Src1.isReg()) { // Allow commuting instructions with Imm operands. if (NewMI || !Src1.isImm() || - (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) { + (!isVOP2(*MI) && !isVOP3(*MI))) { return nullptr; } - // Be sure to copy the source modifiers to the right place. if (MachineOperand *Src0Mods = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { @@ -832,7 +959,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, Src1.ChangeToRegister(Reg, false); Src1.setSubReg(SubReg); } else { - MI = TargetInstrInfo::commuteInstruction(MI, NewMI); + MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1); } if (MI) @@ -845,8 +972,8 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, // between the true commutable operands, and the base // TargetInstrInfo::commuteInstruction uses it. bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI, - unsigned &SrcOpIdx1, - unsigned &SrcOpIdx2) const { + unsigned &SrcOpIdx0, + unsigned &SrcOpIdx1) const { const MCInstrDesc &MCID = MI->getDesc(); if (!MCID.isCommutable()) return false; @@ -857,7 +984,8 @@ bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI, return false; // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on - // immediate. + // immediate. Also, immediate src0 operand is not handled in + // SIInstrInfo::commuteInstruction(); if (!MI->getOperand(Src0Idx).isReg()) return false; @@ -865,18 +993,22 @@ bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI, if (Src1Idx == -1) return false; - if (!MI->getOperand(Src1Idx).isReg()) + MachineOperand &Src1 = MI->getOperand(Src1Idx); + if (Src1.isImm()) { + // SIInstrInfo::commuteInstruction() does support commuting the immediate + // operand src1 in 2 and 3 operand instructions. + if (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode())) + return false; + } else if (Src1.isReg()) { + // If any source modifiers are set, the generic instruction commuting won't + // understand how to copy the source modifiers. + if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) || + hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers)) + return false; + } else return false; - // If any source modifiers are set, the generic instruction commuting won't - // understand how to copy the source modifiers. - if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) || - hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers)) - return false; - - SrcOpIdx1 = Src0Idx; - SrcOpIdx2 = Src1Idx; - return true; + return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx); } MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB, @@ -898,11 +1030,6 @@ bool SIInstrInfo::isMov(unsigned Opcode) const { } } -bool -SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { - return RC != &AMDGPU::EXECRegRegClass; -} - static void removeModOperands(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, @@ -984,9 +1111,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); } - UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, - AMDGPU::OpName::src2)); - // ChangingToImmediate adds Src2 back to the instruction. Src2->ChangeToImmediate(Imm); removeModOperands(*UseMI); @@ -1045,18 +1169,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, return false; } -bool -SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI, - AliasAnalysis *AA) const { - switch(MI->getOpcode()) { - default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA); - case AMDGPU::S_MOV_B32: - case AMDGPU::S_MOV_B64: - case AMDGPU::V_MOV_B32_e32: - return MI->getOperand(1).isImm(); - } -} - static bool offsetsDoNotOverlap(int WidthA, int OffsetA, int WidthB, int OffsetB) { int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; @@ -1088,9 +1200,6 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa, bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb, AliasAnalysis *AA) const { - unsigned Opc0 = MIa->getOpcode(); - unsigned Opc1 = MIb->getOpcode(); - assert(MIa && (MIa->mayLoad() || MIa->mayStore()) && "MIa must load from or modify a memory location"); assert(MIb && (MIb->mayLoad() || MIb->mayStore()) && @@ -1105,32 +1214,32 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, // TODO: Should we check the address space from the MachineMemOperand? That // would allow us to distinguish objects we know don't alias based on the - // underlying addres space, even if it was lowered to a different one, + // underlying address space, even if it was lowered to a different one, // e.g. private accesses lowered to use MUBUF instructions on a scratch // buffer. - if (isDS(Opc0)) { - if (isDS(Opc1)) + if (isDS(*MIa)) { + if (isDS(*MIb)) return checkInstOffsetsDoNotOverlap(MIa, MIb); - return !isFLAT(Opc1); + return !isFLAT(*MIb); } - if (isMUBUF(Opc0) || isMTBUF(Opc0)) { - if (isMUBUF(Opc1) || isMTBUF(Opc1)) + if (isMUBUF(*MIa) || isMTBUF(*MIa)) { + if (isMUBUF(*MIb) || isMTBUF(*MIb)) return checkInstOffsetsDoNotOverlap(MIa, MIb); - return !isFLAT(Opc1) && !isSMRD(Opc1); + return !isFLAT(*MIb) && !isSMRD(*MIb); } - if (isSMRD(Opc0)) { - if (isSMRD(Opc1)) + if (isSMRD(*MIa)) { + if (isSMRD(*MIb)) return checkInstOffsetsDoNotOverlap(MIa, MIb); - return !isFLAT(Opc1) && !isMUBUF(Opc0) && !isMTBUF(Opc0); + return !isFLAT(*MIb) && !isMUBUF(*MIa) && !isMTBUF(*MIa); } - if (isFLAT(Opc0)) { - if (isFLAT(Opc1)) + if (isFLAT(*MIa)) { + if (isFLAT(*MIb)) return checkInstOffsetsDoNotOverlap(MIa, MIb); return false; @@ -1319,6 +1428,26 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, return false; } +static unsigned findImplicitSGPRRead(const MachineInstr &MI) { + for (const MachineOperand &MO : MI.implicit_operands()) { + // We only care about reads. + if (MO.isDef()) + continue; + + switch (MO.getReg()) { + case AMDGPU::VCC: + case AMDGPU::M0: + case AMDGPU::FLAT_SCR: + return MO.getReg(); + + default: + break; + } + } + + return AMDGPU::NoRegister; +} + bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, StringRef &ErrInfo) const { uint16_t Opcode = MI->getOpcode(); @@ -1335,7 +1464,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, return false; } - // Make sure the register classes are correct + // Make sure the register classes are correct. for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isFPImm()) { ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast " @@ -1392,14 +1521,17 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, // Verify VOP* - if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) { + if (isVOP1(*MI) || isVOP2(*MI) || isVOP3(*MI) || isVOPC(*MI)) { // Only look at the true operands. Only a real operand can use the constant // bus, and we don't want to check pseudo-operands like the source modifier // flags. const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; unsigned ConstantBusCount = 0; - unsigned SGPRUsed = AMDGPU::NoRegister; + unsigned SGPRUsed = findImplicitSGPRRead(*MI); + if (SGPRUsed != AMDGPU::NoRegister) + ++ConstantBusCount; + for (int OpIdx : OpIndices) { if (OpIdx == -1) break; @@ -1435,6 +1567,16 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, } } + // Make sure we aren't losing exec uses in the td files. This mostly requires + // being careful when using let Uses to try to add other use registers. + if (!isGenericOpcode(Opcode) && !isSALU(Opcode) && !isSMRD(Opcode)) { + const MachineOperand *Exec = MI->findRegisterUseOperand(AMDGPU::EXEC); + if (!Exec || !Exec->isImplicit()) { + ErrInfo = "VALU instruction does not implicitly read exec mask"; + return false; + } + } + return true; } @@ -1483,11 +1625,17 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; case AMDGPU::S_LOAD_DWORD_IMM: - case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64; + case AMDGPU::S_LOAD_DWORD_SGPR: + case AMDGPU::S_LOAD_DWORD_IMM_ci: + return AMDGPU::BUFFER_LOAD_DWORD_ADDR64; case AMDGPU::S_LOAD_DWORDX2_IMM: - case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; + case AMDGPU::S_LOAD_DWORDX2_SGPR: + case AMDGPU::S_LOAD_DWORDX2_IMM_ci: + return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; case AMDGPU::S_LOAD_DWORDX4_IMM: - case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; + case AMDGPU::S_LOAD_DWORDX4_SGPR: + case AMDGPU::S_LOAD_DWORDX4_IMM_ci: + return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; @@ -1562,17 +1710,21 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, unsigned SubIdx, const TargetRegisterClass *SubRC) const { - assert(SuperReg.isReg()); - - unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); unsigned SubReg = MRI.createVirtualRegister(SubRC); + if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) { + BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) + .addReg(SuperReg.getReg(), 0, SubIdx); + return SubReg; + } + // Just in case the super register is itself a sub-register, copy it to a new // value so we don't need to worry about merging its subreg index with the // SubIdx passed to this function. The register coalescer should be able to // eliminate this extra copy. - MachineBasicBlock *MBB = MI->getParent(); - DebugLoc DL = MI->getDebugLoc(); + unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); @@ -1605,36 +1757,6 @@ MachineOperand SIInstrInfo::buildExtractSubRegOrImm( return MachineOperand::CreateReg(SubReg, false); } -unsigned SIInstrInfo::split64BitImm(SmallVectorImpl &Worklist, - MachineBasicBlock::iterator MI, - MachineRegisterInfo &MRI, - const TargetRegisterClass *RC, - const MachineOperand &Op) const { - MachineBasicBlock *MBB = MI->getParent(); - DebugLoc DL = MI->getDebugLoc(); - unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); - unsigned Dst = MRI.createVirtualRegister(RC); - - MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), - LoDst) - .addImm(Op.getImm() & 0xFFFFFFFF); - MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), - HiDst) - .addImm(Op.getImm() >> 32); - - BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst) - .addReg(LoDst) - .addImm(AMDGPU::sub0) - .addReg(HiDst) - .addImm(AMDGPU::sub1); - - Worklist.push_back(Lo); - Worklist.push_back(Hi); - - return Dst; -} - // Change the order of operands from (0, 1, 2) to (0, 2, 1) void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const { assert(Inst->getNumExplicitOperands() == 3); @@ -1643,6 +1765,41 @@ void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const { Inst->addOperand(Op1); } +bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, + const MCOperandInfo &OpInfo, + const MachineOperand &MO) const { + if (!MO.isReg()) + return false; + + unsigned Reg = MO.getReg(); + const TargetRegisterClass *RC = + TargetRegisterInfo::isVirtualRegister(Reg) ? + MRI.getRegClass(Reg) : + RI.getPhysRegClass(Reg); + + // In order to be legal, the common sub-class must be equal to the + // class of the current operand. For example: + // + // v_mov_b32 s0 ; Operand defined as vsrc_32 + // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL + // + // s_sendmsg 0, s0 ; Operand defined as m0reg + // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL + + return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC; +} + +bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, + const MCOperandInfo &OpInfo, + const MachineOperand &MO) const { + if (MO.isReg()) + return isLegalRegOperand(MRI, OpInfo, MO); + + // Handle non-register types that are treated like immediates. + assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); + return true; +} + bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, const MachineOperand *MO) const { const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); @@ -1653,7 +1810,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, if (!MO) MO = &MI->getOperand(OpIdx); - if (isVALU(InstDesc.Opcode) && + if (isVALU(*MI) && usesConstantBus(MRI, *MO, DefinedRC->getSize())) { unsigned SGPRUsed = MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister; @@ -1670,21 +1827,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, if (MO->isReg()) { assert(DefinedRC); - const TargetRegisterClass *RC = - TargetRegisterInfo::isVirtualRegister(MO->getReg()) ? - MRI.getRegClass(MO->getReg()) : - RI.getPhysRegClass(MO->getReg()); - - // In order to be legal, the common sub-class must be equal to the - // class of the current operand. For example: - // - // v_mov_b32 s0 ; Operand defined as vsrc_32 - // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL - // - // s_sendmsg 0, s0 ; Operand defined as m0reg - // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL - - return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC; + return isLegalRegOperand(MRI, OpInfo, *MO); } @@ -1699,81 +1842,143 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, return isImmOperandLegal(MI, OpIdx, *MO); } -void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { - MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); +void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, + MachineInstr *MI) const { + unsigned Opc = MI->getOpcode(); + const MCInstrDesc &InstrDesc = get(Opc); - int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), - AMDGPU::OpName::src0); - int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), - AMDGPU::OpName::src1); - int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), - AMDGPU::OpName::src2); + int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); + MachineOperand &Src1 = MI->getOperand(Src1Idx); - // Legalize VOP2 - if (isVOP2(MI->getOpcode()) && Src1Idx != -1) { - // Legalize src0 - if (!isOperandLegal(MI, Src0Idx)) + // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32 + // we need to only have one constant bus use. + // + // Note we do not need to worry about literal constants here. They are + // disabled for the operand type for instructions because they will always + // violate the one constant bus use rule. + bool HasImplicitSGPR = findImplicitSGPRRead(*MI) != AMDGPU::NoRegister; + if (HasImplicitSGPR) { + int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); + MachineOperand &Src0 = MI->getOperand(Src0Idx); + + if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) legalizeOpWithMove(MI, Src0Idx); + } - // Legalize src1 - if (isOperandLegal(MI, Src1Idx)) - return; - - // Usually src0 of VOP2 instructions allow more types of inputs - // than src1, so try to commute the instruction to decrease our - // chances of having to insert a MOV instruction to legalize src1. - if (MI->isCommutable()) { - if (commuteInstruction(MI)) - // If we are successful in commuting, then we know MI is legal, so - // we are done. - return; - } + // VOP2 src0 instructions support all operand types, so we don't need to check + // their legality. If src1 is already legal, we don't need to do anything. + if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1)) + return; + // We do not use commuteInstruction here because it is too aggressive and will + // commute if it is possible. We only want to commute here if it improves + // legality. This can be called a fairly large number of times so don't waste + // compile time pointlessly swapping and checking legality again. + if (HasImplicitSGPR || !MI->isCommutable()) { legalizeOpWithMove(MI, Src1Idx); return; } - // XXX - Do any VOP3 instructions read VCC? - // Legalize VOP3 - if (isVOP3(MI->getOpcode())) { - int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx }; + int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); + MachineOperand &Src0 = MI->getOperand(Src0Idx); - // Find the one SGPR operand we are allowed to use. - unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx); + // If src0 can be used as src1, commuting will make the operands legal. + // Otherwise we have to give up and insert a move. + // + // TODO: Other immediate-like operand kinds could be commuted if there was a + // MachineOperand::ChangeTo* for them. + if ((!Src1.isImm() && !Src1.isReg()) || + !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) { + legalizeOpWithMove(MI, Src1Idx); + return; + } - for (unsigned i = 0; i < 3; ++i) { - int Idx = VOP3Idx[i]; - if (Idx == -1) - break; - MachineOperand &MO = MI->getOperand(Idx); + int CommutedOpc = commuteOpcode(*MI); + if (CommutedOpc == -1) { + legalizeOpWithMove(MI, Src1Idx); + return; + } - if (MO.isReg()) { - if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) - continue; // VGPRs are legal + MI->setDesc(get(CommutedOpc)); - assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction"); + unsigned Src0Reg = Src0.getReg(); + unsigned Src0SubReg = Src0.getSubReg(); + bool Src0Kill = Src0.isKill(); - if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { - SGPRReg = MO.getReg(); - // We can use one SGPR in each VOP3 instruction. - continue; - } - } else if (!isLiteralConstant(MO, getOpSize(MI->getOpcode(), Idx))) { - // If it is not a register and not a literal constant, then it must be - // an inline constant which is always legal. - continue; - } - // If we make it this far, then the operand is not legal and we must - // legalize it. - legalizeOpWithMove(MI, Idx); + if (Src1.isImm()) + Src0.ChangeToImmediate(Src1.getImm()); + else if (Src1.isReg()) { + Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill()); + Src0.setSubReg(Src1.getSubReg()); + } else + llvm_unreachable("Should only have register or immediate operands"); + + Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill); + Src1.setSubReg(Src0SubReg); +} + +// Legalize VOP3 operands. Because all operand types are supported for any +// operand, and since literal constants are not allowed and should never be +// seen, we only need to worry about inserting copies if we use multiple SGPR +// operands. +void SIInstrInfo::legalizeOperandsVOP3( + MachineRegisterInfo &MRI, + MachineInstr *MI) const { + unsigned Opc = MI->getOpcode(); + + int VOP3Idx[3] = { + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0), + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2) + }; + + // Find the one SGPR operand we are allowed to use. + unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx); + + for (unsigned i = 0; i < 3; ++i) { + int Idx = VOP3Idx[i]; + if (Idx == -1) + break; + MachineOperand &MO = MI->getOperand(Idx); + + // We should never see a VOP3 instruction with an illegal immediate operand. + if (!MO.isReg()) + continue; + + if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) + continue; // VGPRs are legal + + if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { + SGPRReg = MO.getReg(); + // We can use one SGPR in each VOP3 instruction. + continue; } + + // If we make it this far, then the operand is not legal and we must + // legalize it. + legalizeOpWithMove(MI, Idx); + } +} + +void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + + // Legalize VOP2 + if (isVOP2(*MI)) { + legalizeOperandsVOP2(MRI, MI); + return; + } + + // Legalize VOP3 + if (isVOP3(*MI)) { + legalizeOperandsVOP3(MRI, MI); + return; } // Legalize REG_SEQUENCE and PHI // The register class of the operands much be the same type as the register // class of the output. - if (MI->getOpcode() == AMDGPU::REG_SEQUENCE || - MI->getOpcode() == AMDGPU::PHI) { + if (MI->getOpcode() == AMDGPU::PHI) { const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { if (!MI->getOperand(i).isReg() || @@ -1802,28 +2007,55 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { } // Update all the operands so they have the same type. - for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { - if (!MI->getOperand(i).isReg() || - !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) + for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { + MachineOperand &Op = MI->getOperand(I); + if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) continue; unsigned DstReg = MRI.createVirtualRegister(RC); - MachineBasicBlock *InsertBB; - MachineBasicBlock::iterator Insert; - if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { - InsertBB = MI->getParent(); - Insert = MI; - } else { - // MI is a PHI instruction. - InsertBB = MI->getOperand(i + 1).getMBB(); - Insert = InsertBB->getFirstTerminator(); - } - BuildMI(*InsertBB, Insert, MI->getDebugLoc(), - get(AMDGPU::COPY), DstReg) - .addOperand(MI->getOperand(i)); - MI->getOperand(i).setReg(DstReg); + + // MI is a PHI instruction. + MachineBasicBlock *InsertBB = MI->getOperand(I + 1).getMBB(); + MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator(); + + BuildMI(*InsertBB, Insert, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg) + .addOperand(Op); + Op.setReg(DstReg); } } + // REG_SEQUENCE doesn't really require operand legalization, but if one has a + // VGPR dest type and SGPR sources, insert copies so all operands are + // VGPRs. This seems to help operand folding / the register coalescer. + if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { + MachineBasicBlock *MBB = MI->getParent(); + const TargetRegisterClass *DstRC = getOpRegClass(*MI, 0); + if (RI.hasVGPRs(DstRC)) { + // Update all the operands so they are VGPR register classes. These may + // not be the same register class because REG_SEQUENCE supports mixing + // subregister index types e.g. sub0_sub1 + sub2 + sub3 + for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { + MachineOperand &Op = MI->getOperand(I); + if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) + continue; + + const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg()); + const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC); + if (VRC == OpRC) + continue; + + unsigned DstReg = MRI.createVirtualRegister(VRC); + + BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg) + .addOperand(Op); + + Op.setReg(DstReg); + Op.setIsKill(); + } + } + + return; + } + // Legalize INSERT_SUBREG // src0 must have the same register class as dst if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) { @@ -1858,15 +2090,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { } MachineBasicBlock &MBB = *MI->getParent(); + // Extract the ptr from the resource descriptor. - - // SRsrcPtrLo = srsrc:sub0 - unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc, - &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VGPR_32RegClass); - - // SRsrcPtrHi = srsrc:sub1 - unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc, - &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VGPR_32RegClass); + unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc, + &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass); // Create an empty resource descriptor unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); @@ -1891,80 +2118,112 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { .addImm(RsrcDataFormat >> 32); // NewSRsrc = {Zero64, SRsrcFormat} - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), - NewSRsrc) - .addReg(Zero64) - .addImm(AMDGPU::sub0_sub1) - .addReg(SRsrcFormatLo) - .addImm(AMDGPU::sub2) - .addReg(SRsrcFormatHi) - .addImm(AMDGPU::sub3); + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc) + .addReg(Zero64) + .addImm(AMDGPU::sub0_sub1) + .addReg(SRsrcFormatLo) + .addImm(AMDGPU::sub2) + .addReg(SRsrcFormatHi) + .addImm(AMDGPU::sub3); MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); - unsigned NewVAddrLo; - unsigned NewVAddrHi; if (VAddr) { // This is already an ADDR64 instruction so we need to add the pointer // extracted from the resource descriptor to the current value of VAddr. - NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - // NewVaddrLo = SRsrcPtrLo + VAddr:sub0 - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32), - NewVAddrLo) - .addReg(SRsrcPtrLo) - .addReg(VAddr->getReg(), 0, AMDGPU::sub0) - .addReg(AMDGPU::VCC, RegState::ImplicitDefine); + // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0 + DebugLoc DL = MI->getDebugLoc(); + BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo) + .addReg(SRsrcPtr, 0, AMDGPU::sub0) + .addReg(VAddr->getReg(), 0, AMDGPU::sub0); - // NewVaddrHi = SRsrcPtrHi + VAddr:sub1 - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32), - NewVAddrHi) - .addReg(SRsrcPtrHi) - .addReg(VAddr->getReg(), 0, AMDGPU::sub1) - .addReg(AMDGPU::VCC, RegState::ImplicitDefine) - .addReg(AMDGPU::VCC, RegState::Implicit); + // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1 + BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi) + .addReg(SRsrcPtr, 0, AMDGPU::sub1) + .addReg(VAddr->getReg(), 0, AMDGPU::sub1); + // NewVaddr = {NewVaddrHi, NewVaddrLo} + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) + .addReg(NewVAddrLo) + .addImm(AMDGPU::sub0) + .addReg(NewVAddrHi) + .addImm(AMDGPU::sub1); } else { // This instructions is the _OFFSET variant, so we need to convert it to // ADDR64. + assert(MBB.getParent()->getSubtarget().getGeneration() + < AMDGPUSubtarget::VOLCANIC_ISLANDS && + "FIXME: Need to emit flat atomics here"); + MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata); MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset); MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset); - - // Create the new instruction. unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode()); - MachineInstr *Addr64 = - BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) - .addOperand(*VData) - .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. - // This will be replaced later - // with the new value of vaddr. - .addOperand(*SRsrc) - .addOperand(*SOffset) - .addOperand(*Offset) - .addImm(0) // glc - .addImm(0) // slc - .addImm(0); // tfe + + // Atomics rith return have have an additional tied operand and are + // missing some of the special bits. + MachineOperand *VDataIn = getNamedOperand(*MI, AMDGPU::OpName::vdata_in); + MachineInstr *Addr64; + + if (!VDataIn) { + // Regular buffer load / store. + MachineInstrBuilder MIB + = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) + .addOperand(*VData) + .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. + // This will be replaced later + // with the new value of vaddr. + .addOperand(*SRsrc) + .addOperand(*SOffset) + .addOperand(*Offset); + + // Atomics do not have this operand. + if (const MachineOperand *GLC + = getNamedOperand(*MI, AMDGPU::OpName::glc)) { + MIB.addImm(GLC->getImm()); + } + + MIB.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc)); + + if (const MachineOperand *TFE + = getNamedOperand(*MI, AMDGPU::OpName::tfe)) { + MIB.addImm(TFE->getImm()); + } + + MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + Addr64 = MIB; + } else { + // Atomics with return. + Addr64 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode)) + .addOperand(*VData) + .addOperand(*VDataIn) + .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. + // This will be replaced later + // with the new value of vaddr. + .addOperand(*SRsrc) + .addOperand(*SOffset) + .addOperand(*Offset) + .addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc)) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + } MI->removeFromParent(); MI = Addr64; - NewVAddrLo = SRsrcPtrLo; - NewVAddrHi = SRsrcPtrHi; + // NewVaddr = {NewVaddrHi, NewVaddrLo} + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) + .addReg(SRsrcPtr, 0, AMDGPU::sub0) + .addImm(AMDGPU::sub0) + .addReg(SRsrcPtr, 0, AMDGPU::sub1) + .addImm(AMDGPU::sub1); + VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr); SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc); } - // NewVaddr = {NewVaddrHi, NewVaddrLo} - BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), - NewVAddr) - .addReg(NewVAddrLo) - .addImm(AMDGPU::sub0) - .addReg(NewVAddrHi) - .addImm(AMDGPU::sub1); - - // Update the instruction to use NewVaddr VAddr->setReg(NewVAddr); // Update the instruction to use NewSRsrc @@ -2028,53 +2287,64 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI, .addOperand(*SOff); unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR) - .addOperand(*SOff) - .addImm(HalfSize); - Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp)) + .addReg(SOff->getReg(), 0, SOff->getSubReg()) + .addImm(HalfSize); + Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi) .addReg(SBase->getReg(), getKillRegState(IsKill), SBase->getSubReg()) .addReg(OffsetSGPR); } unsigned SubLo, SubHi; + const TargetRegisterClass *NewDstRC; switch (HalfSize) { case 4: SubLo = AMDGPU::sub0; SubHi = AMDGPU::sub1; + NewDstRC = &AMDGPU::VReg_64RegClass; break; case 8: SubLo = AMDGPU::sub0_sub1; SubHi = AMDGPU::sub2_sub3; + NewDstRC = &AMDGPU::VReg_128RegClass; break; case 16: SubLo = AMDGPU::sub0_sub1_sub2_sub3; SubHi = AMDGPU::sub4_sub5_sub6_sub7; + NewDstRC = &AMDGPU::VReg_256RegClass; break; case 32: SubLo = AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7; SubHi = AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15; + NewDstRC = &AMDGPU::VReg_512RegClass; break; default: llvm_unreachable("Unhandled HalfSize"); } - BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE)) - .addOperand(MI->getOperand(0)) - .addReg(RegLo) - .addImm(SubLo) - .addReg(RegHi) - .addImm(SubHi); + unsigned OldDst = MI->getOperand(0).getReg(); + unsigned NewDst = MRI.createVirtualRegister(NewDstRC); + + MRI.replaceRegWith(OldDst, NewDst); + + BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewDst) + .addReg(RegLo) + .addImm(SubLo) + .addReg(RegHi) + .addImm(SubHi); } -void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const { +void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, + MachineRegisterInfo &MRI, + SmallVectorImpl &Worklist) const { MachineBasicBlock *MBB = MI->getParent(); - switch (MI->getOpcode()) { - case AMDGPU::S_LOAD_DWORD_IMM: - case AMDGPU::S_LOAD_DWORD_SGPR: - case AMDGPU::S_LOAD_DWORDX2_IMM: - case AMDGPU::S_LOAD_DWORDX2_SGPR: - case AMDGPU::S_LOAD_DWORDX4_IMM: - case AMDGPU::S_LOAD_DWORDX4_SGPR: { + int DstIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst); + assert(DstIdx != -1); + unsigned DstRCID = get(MI->getOpcode()).OpInfo[DstIdx].RegClass; + switch(RI.getRegClass(DstRCID)->getSize()) { + case 4: + case 8: + case 16: { unsigned NewOpcode = getVALUOp(*MI); unsigned RegOffset; unsigned ImmOffset; @@ -2118,53 +2388,55 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3) .addImm(RsrcDataFormat >> 32); BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc) - .addReg(DWord0) - .addImm(AMDGPU::sub0) - .addReg(DWord1) - .addImm(AMDGPU::sub1) - .addReg(DWord2) - .addImm(AMDGPU::sub2) - .addReg(DWord3) - .addImm(AMDGPU::sub3); - MI->setDesc(get(NewOpcode)); - if (MI->getOperand(2).isReg()) { - MI->getOperand(2).setReg(SRsrc); - } else { - MI->getOperand(2).ChangeToRegister(SRsrc, false); - } - MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); - MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset)); - MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc - MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc - MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe + .addReg(DWord0) + .addImm(AMDGPU::sub0) + .addReg(DWord1) + .addImm(AMDGPU::sub1) + .addReg(DWord2) + .addImm(AMDGPU::sub2) + .addReg(DWord3) + .addImm(AMDGPU::sub3); - const TargetRegisterClass *NewDstRC = - RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass); - - unsigned DstReg = MI->getOperand(0).getReg(); + const MCInstrDesc &NewInstDesc = get(NewOpcode); + const TargetRegisterClass *NewDstRC + = RI.getRegClass(NewInstDesc.OpInfo[0].RegClass); unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); + unsigned DstReg = MI->getOperand(0).getReg(); MRI.replaceRegWith(DstReg, NewDstReg); + + MachineInstr *NewInst = + BuildMI(*MBB, MI, MI->getDebugLoc(), NewInstDesc, NewDstReg) + .addOperand(MI->getOperand(1)) // sbase + .addReg(SRsrc) + .addImm(0) + .addImm(ImmOffset) + .addImm(0) // glc + .addImm(0) // slc + .addImm(0) // tfe + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MI->eraseFromParent(); + + legalizeOperands(NewInst); + addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); break; } - case AMDGPU::S_LOAD_DWORDX8_IMM: - case AMDGPU::S_LOAD_DWORDX8_SGPR: { + case 32: { MachineInstr *Lo, *Hi; splitSMRD(MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM, AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi); MI->eraseFromParent(); - moveSMRDToVALU(Lo, MRI); - moveSMRDToVALU(Hi, MRI); + moveSMRDToVALU(Lo, MRI, Worklist); + moveSMRDToVALU(Hi, MRI, Worklist); break; } - case AMDGPU::S_LOAD_DWORDX16_IMM: - case AMDGPU::S_LOAD_DWORDX16_SGPR: { + case 64: { MachineInstr *Lo, *Hi; splitSMRD(MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM, AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi); MI->eraseFromParent(); - moveSMRDToVALU(Lo, MRI); - moveSMRDToVALU(Hi, MRI); + moveSMRDToVALU(Lo, MRI, Worklist); + moveSMRDToVALU(Hi, MRI, Worklist); break; } } @@ -2185,51 +2457,28 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { // Handle some special cases switch (Opcode) { default: - if (isSMRD(Inst->getOpcode())) { - moveSMRDToVALU(Inst, MRI); + if (isSMRD(*Inst)) { + moveSMRDToVALU(Inst, MRI, Worklist); + continue; } break; - case AMDGPU::S_MOV_B64: { - DebugLoc DL = Inst->getDebugLoc(); - - // If the source operand is a register we can replace this with a - // copy. - if (Inst->getOperand(1).isReg()) { - MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY)) - .addOperand(Inst->getOperand(0)) - .addOperand(Inst->getOperand(1)); - Worklist.push_back(Copy); - } else { - // Otherwise, we need to split this into two movs, because there is - // no 64-bit VALU move instruction. - unsigned Reg = Inst->getOperand(0).getReg(); - unsigned Dst = split64BitImm(Worklist, - Inst, - MRI, - MRI.getRegClass(Reg), - Inst->getOperand(1)); - MRI.replaceRegWith(Reg, Dst); - } - Inst->eraseFromParent(); - continue; - } case AMDGPU::S_AND_B64: - splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32); + splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64); Inst->eraseFromParent(); continue; case AMDGPU::S_OR_B64: - splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32); + splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64); Inst->eraseFromParent(); continue; case AMDGPU::S_XOR_B64: - splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32); + splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64); Inst->eraseFromParent(); continue; case AMDGPU::S_NOT_B64: - splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32); + splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32); Inst->eraseFromParent(); continue; @@ -2281,6 +2530,11 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { } break; + case AMDGPU::S_ABS_I32: + lowerScalarAbs(Worklist, Inst); + Inst->eraseFromParent(); + continue; + case AMDGPU::S_BFE_U64: case AMDGPU::S_BFM_B64: llvm_unreachable("Moving this op to VALU not implemented"); @@ -2319,7 +2573,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { Inst->addOperand(MachineOperand::CreateImm(0)); } - addDescImplicitUseDef(NewDesc, Inst); + Inst->addImplicitDefUseOperands(*Inst->getParent()->getParent()); if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { const MachineOperand &OffsetWidthOp = Inst->getOperand(2); @@ -2337,27 +2591,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { } // Update the destination register class. - - const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0); - - switch (Opcode) { - // For target instructions, getOpRegClass just returns the virtual - // register class associated with the operand, so we need to find an - // equivalent VGPR register class in order to move the instruction to the - // VALU. - case AMDGPU::COPY: - case AMDGPU::PHI: - case AMDGPU::REG_SEQUENCE: - case AMDGPU::INSERT_SUBREG: - if (RI.hasVGPRs(NewDstRC)) - continue; - NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); - if (!NewDstRC) - continue; - break; - default: - break; - } + const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(*Inst); + if (!NewDstRC) + continue; unsigned DstReg = Inst->getOperand(0).getReg(); unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); @@ -2366,13 +2602,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { // Legalize the operands legalizeOperands(Inst); - for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg), - E = MRI.use_end(); I != E; ++I) { - MachineInstr &UseMI = *I->getParent(); - if (!canReadVGPR(UseMI, I.getOperandNo())) { - Worklist.push_back(&UseMI); - } - } + addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); } } @@ -2390,6 +2620,30 @@ const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { return &AMDGPU::VGPR_32RegClass; } +void SIInstrInfo::lowerScalarAbs(SmallVectorImpl &Worklist, + MachineInstr *Inst) const { + MachineBasicBlock &MBB = *Inst->getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + MachineBasicBlock::iterator MII = Inst; + DebugLoc DL = Inst->getDebugLoc(); + + MachineOperand &Dest = Inst->getOperand(0); + MachineOperand &Src = Inst->getOperand(1); + unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg) + .addImm(0) + .addReg(Src.getReg()); + + BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg) + .addReg(Src.getReg()) + .addReg(TmpReg); + + MRI.replaceRegWith(Dest.getReg(), ResultReg); + addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); +} + void SIInstrInfo::splitScalar64BitUnaryOp( SmallVectorImpl &Worklist, MachineInstr *Inst, @@ -2414,20 +2668,21 @@ void SIInstrInfo::splitScalar64BitUnaryOp( AMDGPU::sub0, Src0SubRC); const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); - const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); + const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); + const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); - unsigned DestSub0 = MRI.createVirtualRegister(DestRC); - MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) + unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); + BuildMI(MBB, MII, DL, InstDesc, DestSub0) .addOperand(SrcReg0Sub0); MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC); - unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); - MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) + unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); + BuildMI(MBB, MII, DL, InstDesc, DestSub1) .addOperand(SrcReg0Sub1); - unsigned FullDestReg = MRI.createVirtualRegister(DestRC); + unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) .addImm(AMDGPU::sub0) @@ -2436,10 +2691,11 @@ void SIInstrInfo::splitScalar64BitUnaryOp( MRI.replaceRegWith(Dest.getReg(), FullDestReg); - // Try to legalize the operands in case we need to swap the order to keep it - // valid. - Worklist.push_back(LoHalf); - Worklist.push_back(HiHalf); + // We don't need to legalizeOperands here because for a single operand, src0 + // will support any kind of input. + + // Move all users of this moved value. + addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); } void SIInstrInfo::splitScalar64BitBinaryOp( @@ -2474,9 +2730,10 @@ void SIInstrInfo::splitScalar64BitBinaryOp( AMDGPU::sub0, Src1SubRC); const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); - const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); + const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); + const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); - unsigned DestSub0 = MRI.createVirtualRegister(DestRC); + unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) .addOperand(SrcReg0Sub0) .addOperand(SrcReg1Sub0); @@ -2486,12 +2743,12 @@ void SIInstrInfo::splitScalar64BitBinaryOp( MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC); - unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); + unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) .addOperand(SrcReg0Sub1) .addOperand(SrcReg1Sub1); - unsigned FullDestReg = MRI.createVirtualRegister(DestRC); + unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) .addReg(DestSub0) .addImm(AMDGPU::sub0) @@ -2502,8 +2759,11 @@ void SIInstrInfo::splitScalar64BitBinaryOp( // Try to legalize the operands in case we need to swap the order to keep it // valid. - Worklist.push_back(LoHalf); - Worklist.push_back(HiHalf); + legalizeOperands(LoHalf); + legalizeOperands(HiHalf); + + // Move all users of this moved vlaue. + addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); } void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl &Worklist, @@ -2532,18 +2792,19 @@ void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl &Worklist MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub1, SrcSubRC); - MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg) + BuildMI(MBB, MII, DL, InstDesc, MidReg) .addOperand(SrcRegSub0) .addImm(0); - MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg) + BuildMI(MBB, MII, DL, InstDesc, ResultReg) .addOperand(SrcRegSub1) .addReg(MidReg); MRI.replaceRegWith(Dest.getReg(), ResultReg); - Worklist.push_back(First); - Worklist.push_back(Second); + // We don't need to legalize operands here. src0 for etiher instruction can be + // an SGPR, and the second input is unused or determined here. + addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl &Worklist, @@ -2587,6 +2848,7 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl &Worklist, .addImm(AMDGPU::sub1); MRI.replaceRegWith(Dest.getReg(), ResultReg); + addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); return; } @@ -2605,33 +2867,53 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl &Worklist, .addImm(AMDGPU::sub1); MRI.replaceRegWith(Dest.getReg(), ResultReg); + addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } -void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc, - MachineInstr *Inst) const { - // Add the implict and explicit register definitions. - if (NewDesc.ImplicitUses) { - for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) { - unsigned Reg = NewDesc.ImplicitUses[i]; - Inst->addOperand(MachineOperand::CreateReg(Reg, false, true)); - } - } - - if (NewDesc.ImplicitDefs) { - for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) { - unsigned Reg = NewDesc.ImplicitDefs[i]; - Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); +void SIInstrInfo::addUsersToMoveToVALUWorklist( + unsigned DstReg, + MachineRegisterInfo &MRI, + SmallVectorImpl &Worklist) const { + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg), + E = MRI.use_end(); I != E; ++I) { + MachineInstr &UseMI = *I->getParent(); + if (!canReadVGPR(UseMI, I.getOperandNo())) { + Worklist.push_back(&UseMI); } } } +const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( + const MachineInstr &Inst) const { + const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0); + + switch (Inst.getOpcode()) { + // For target instructions, getOpRegClass just returns the virtual register + // class associated with the operand, so we need to find an equivalent VGPR + // register class in order to move the instruction to the VALU. + case AMDGPU::COPY: + case AMDGPU::PHI: + case AMDGPU::REG_SEQUENCE: + case AMDGPU::INSERT_SUBREG: + if (RI.hasVGPRs(NewDstRC)) + return nullptr; + + NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); + if (!NewDstRC) + return nullptr; + return NewDstRC; + default: + return NewDstRC; + } +} + +// Find the one SGPR operand we are allowed to use. unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI, int OpIndices[3]) const { - const MCInstrDesc &Desc = get(MI->getOpcode()); + const MCInstrDesc &Desc = MI->getDesc(); // Find the one SGPR operand we are allowed to use. - unsigned SGPRReg = AMDGPU::NoRegister; - + // // First we need to consider the instruction's operand requirements before // legalizing. Some operands are required to be SGPRs, such as implicit uses // of VCC, but we are still bound by the constant bus requirement to only use @@ -2639,17 +2921,9 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI, // // If the operand's class is an SGPR, we can never move it. - for (const MachineOperand &MO : MI->implicit_operands()) { - // We only care about reads. - if (MO.isDef()) - continue; - - if (MO.getReg() == AMDGPU::VCC) - return AMDGPU::VCC; - - if (MO.getReg() == AMDGPU::FLAT_SCR) - return AMDGPU::FLAT_SCR; - } + unsigned SGPRReg = findImplicitSGPRRead(*MI); + if (SGPRReg != AMDGPU::NoRegister) + return SGPRReg; unsigned UsedSGPRs[3] = { AMDGPU::NoRegister }; const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); @@ -2660,16 +2934,23 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI, break; const MachineOperand &MO = MI->getOperand(Idx); - if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) - SGPRReg = MO.getReg(); + if (!MO.isReg()) + continue; - if (MO.isReg() && RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) - UsedSGPRs[i] = MO.getReg(); + // Is this operand statically required to be an SGPR based on the operand + // constraints? + const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass); + bool IsRequiredSGPR = RI.isSGPRClass(OpRC); + if (IsRequiredSGPR) + return MO.getReg(); + + // If this could be a VGPR or an SGPR, Check the dynamic register class. + unsigned Reg = MO.getReg(); + const TargetRegisterClass *RegRC = MRI.getRegClass(Reg); + if (RI.isSGPRClass(RegRC)) + UsedSGPRs[i] = Reg; } - if (SGPRReg != AMDGPU::NoRegister) - return SGPRReg; - // We don't have a required SGPR operand, so we have a bit more freedom in // selecting operands to move. @@ -2680,6 +2961,9 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI, // V_FMA_F32 v0, s0, s0, s0 -> No moves // V_FMA_F32 v0, s0, s1, s0 -> Move s1 + // TODO: If some of the operands are 64-bit SGPRs and some 32, we should + // prefer those. + if (UsedSGPRs[0] != AMDGPU::NoRegister) { if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2]) SGPRReg = UsedSGPRs[0]; @@ -2720,7 +3004,7 @@ MachineInstrBuilder SIInstrInfo::buildIndirectRead( unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister( getIndirectIndexBegin(*MBB->getParent())); - return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC)) + return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC_V1)) .addOperand(I->getOperand(0)) .addOperand(I->getOperand(1)) .addReg(IndirectBaseReg) diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index 5053786a39f5..307ef67ed263 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -39,14 +39,11 @@ private: unsigned SubIdx, const TargetRegisterClass *SubRC) const; - unsigned split64BitImm(SmallVectorImpl &Worklist, - MachineBasicBlock::iterator MI, - MachineRegisterInfo &MRI, - const TargetRegisterClass *RC, - const MachineOperand &Op) const; - void swapOperands(MachineBasicBlock::iterator Inst) const; + void lowerScalarAbs(SmallVectorImpl &Worklist, + MachineInstr *Inst) const; + void splitScalar64BitUnaryOp(SmallVectorImpl &Worklist, MachineInstr *Inst, unsigned Opcode) const; @@ -58,13 +55,24 @@ private: void splitScalar64BitBFE(SmallVectorImpl &Worklist, MachineInstr *Inst) const; - void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const; + void addUsersToMoveToVALUWorklist( + unsigned Reg, MachineRegisterInfo &MRI, + SmallVectorImpl &Worklist) const; + + const TargetRegisterClass * + getDestEquivalentVGPRClass(const MachineInstr &Inst) const; bool checkInstOffsetsDoNotOverlap(MachineInstr *MIa, MachineInstr *MIb) const; unsigned findUsedSGPR(const MachineInstr *MI, int OpIndices[3]) const; +protected: + MachineInstr *commuteInstructionImpl(MachineInstr *MI, + bool NewMI, + unsigned OpIdx0, + unsigned OpIdx1) const override; + public: explicit SIInstrInfo(const AMDGPUSubtarget &st); @@ -117,17 +125,14 @@ public: // register. If there is no hardware instruction that can store to \p // DstRC, then AMDGPU::COPY is returned. unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; + + LLVM_READONLY int commuteOpcode(const MachineInstr &MI) const; - MachineInstr *commuteInstruction(MachineInstr *MI, - bool NewMI = false) const override; bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; - bool isTriviallyReMaterializable(const MachineInstr *MI, - AliasAnalysis *AA = nullptr) const; - bool areMemAccessesTriviallyDisjoint( MachineInstr *MIa, MachineInstr *MIb, AliasAnalysis *AA = nullptr) const override; @@ -137,8 +142,6 @@ public: unsigned DstReg, unsigned SrcReg) const override; bool isMov(unsigned Opcode) const override; - bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override; - bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, unsigned Reg, MachineRegisterInfo *MRI) const final; @@ -148,78 +151,154 @@ public: MachineBasicBlock::iterator &MI, LiveVariables *LV) const override; + static bool isSALU(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::SALU; + } + bool isSALU(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SALU; } + static bool isVALU(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::VALU; + } + bool isVALU(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::VALU; } + static bool isSOP1(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::SOP1; + } + bool isSOP1(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SOP1; } + static bool isSOP2(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::SOP2; + } + bool isSOP2(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SOP2; } + static bool isSOPC(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::SOPC; + } + bool isSOPC(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SOPC; } + static bool isSOPK(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::SOPK; + } + bool isSOPK(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SOPK; } + static bool isSOPP(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::SOPP; + } + bool isSOPP(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SOPP; } + static bool isVOP1(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::VOP1; + } + bool isVOP1(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::VOP1; } + static bool isVOP2(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::VOP2; + } + bool isVOP2(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::VOP2; } + static bool isVOP3(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::VOP3; + } + bool isVOP3(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::VOP3; } + static bool isVOPC(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::VOPC; + } + bool isVOPC(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::VOPC; } + static bool isMUBUF(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::MUBUF; + } + bool isMUBUF(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::MUBUF; } + static bool isMTBUF(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::MTBUF; + } + bool isMTBUF(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::MTBUF; } + static bool isSMRD(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::SMRD; + } + bool isSMRD(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SMRD; } + static bool isDS(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::DS; + } + bool isDS(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::DS; } + static bool isMIMG(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::MIMG; + } + bool isMIMG(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::MIMG; } + static bool isFLAT(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::FLAT; + } + bool isFLAT(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::FLAT; } + static bool isWQM(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::WQM; + } + bool isWQM(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::WQM; } + static bool isVGPRSpill(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill; + } + bool isVGPRSpill(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill; } @@ -302,6 +381,26 @@ public: bool isOperandLegal(const MachineInstr *MI, unsigned OpIdx, const MachineOperand *MO = nullptr) const; + /// \brief Check if \p MO would be a valid operand for the given operand + /// definition \p OpInfo. Note this does not attempt to validate constant bus + /// restrictions (e.g. literal constant usage). + bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, + const MCOperandInfo &OpInfo, + const MachineOperand &MO) const; + + /// \brief Check if \p MO (a register operand) is a legal register for the + /// given operand description. + bool isLegalRegOperand(const MachineRegisterInfo &MRI, + const MCOperandInfo &OpInfo, + const MachineOperand &MO) const; + + /// \brief Legalize operands in \p MI by either commuting it or inserting a + /// copy of src1. + void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr *MI) const; + + /// \brief Fix operands in \p MI to satisfy constant bus requirements. + void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr *MI) const; + /// \brief Legalize all operands in this instruction. This function may /// create new instruction and insert them before \p MI. void legalizeOperands(MachineInstr *MI) const; @@ -312,7 +411,8 @@ public: unsigned HalfImmOp, unsigned HalfSGPROp, MachineInstr *&Lo, MachineInstr *&Hi) const; - void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const; + void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI, + SmallVectorImpl &Worklist) const; /// \brief Replace this instruction's opcode with the equivalent VALU /// opcode. This function will also move the users of \p MI to the @@ -341,29 +441,49 @@ public: void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I, unsigned SavReg, unsigned IndexReg) const; - void insertNOPs(MachineBasicBlock::iterator MI, int Count) const; + void insertWaitStates(MachineBasicBlock::iterator MI, int Count) const; /// \brief Returns the operand named \p Op. If \p MI does not have an /// operand named \c Op, this function returns nullptr. + LLVM_READONLY MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const; + LLVM_READONLY const MachineOperand *getNamedOperand(const MachineInstr &MI, unsigned OpName) const { return getNamedOperand(const_cast(MI), OpName); } + /// Get required immediate operand + int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const { + int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); + return MI.getOperand(Idx).getImm(); + } + uint64_t getDefaultRsrcDataFormat() const; uint64_t getScratchRsrcWords23() const; }; namespace AMDGPU { - + LLVM_READONLY int getVOPe64(uint16_t Opcode); + + LLVM_READONLY int getVOPe32(uint16_t Opcode); + + LLVM_READONLY int getCommuteRev(uint16_t Opcode); + + LLVM_READONLY int getCommuteOrig(uint16_t Opcode); + + LLVM_READONLY int getAddr64Inst(uint16_t Opcode); + + LLVM_READONLY int getAtomicRetOp(uint16_t Opcode); + + LLVM_READONLY int getAtomicNoRetOp(uint16_t Opcode); const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 8d8110bca4c5..10f2adde4867 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// def isCI : Predicate<"Subtarget->getGeneration() " ">= AMDGPUSubtarget::SEA_ISLANDS">; -def isVI : Predicate < - "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, - AssemblerPredicate<"FeatureGCN3Encoding">; +def isCIOnly : Predicate<"Subtarget->getGeneration() ==" + "AMDGPUSubtarget::SEA_ISLANDS">, + AssemblerPredicate <"FeatureSeaIslands">; def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">; @@ -69,6 +69,15 @@ class sopk si, bits<5> vi = si> { field bits<5> VI = vi; } +// Specify an SMRD opcode for SI and SMEM opcode for VI + +// FIXME: This should really be bits<5> si, Tablegen crashes if +// parameter default value is other parameter with different bit size +class smrd si, bits<8> vi = si> { + field bits<5> SI = si{4-0}; + field bits<8> VI = vi; +} + // Execpt for the NONE field, this must be kept in sync with the SISubtarget enum // in AMDGPUInstrInfo.cpp def SISubtarget { @@ -121,9 +130,20 @@ def SIsampled : SDSample<"AMDGPUISD::SAMPLED">; def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">; def SIconstdata_ptr : SDNode< - "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]> + "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, i64>, + SDTCisVT<0, i64>]> >; +def mubuf_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ + return isGlobalLoad(cast(N)) || + isConstantLoad(cast(N), -1); +}]>; + +def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ + return isConstantLoad(cast(N), -1) && + static_cast(getTargetLowering())->isMemOpUniform(N); +}]>; + //===----------------------------------------------------------------------===// // SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1 // to be glued to the memory instructions. @@ -328,9 +348,9 @@ class SGPRImm : PatLeaf(Subtarget->getRegisterInfo()); for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); U != E; ++U) { - if (SIRI->isSGPRClass(getOperandRegClass(*U, U.getOperandNo()))) { + const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); + if (RC && SIRI->isSGPRClass(RC)) return true; - } } return false; }]>; @@ -354,6 +374,8 @@ def sopp_brtarget : Operand { let ParserMatchClass = SoppBrTarget; } +def const_ga : Operand; + include "SIInstrFormats.td" include "VIInstrFormats.td" @@ -393,7 +415,7 @@ def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">; class GLCBaseMatchClass : AsmOperandClass { let Name = "GLC"#parser; let PredicateMethod = "isImm"; - let ParserMethod = parser; + let ParserMethod = parser; let RenderMethod = "addImmOperands"; } @@ -436,6 +458,17 @@ def ClampMatchClass : AsmOperandClass { let RenderMethod = "addImmOperands"; } +class SMRDOffsetBaseMatchClass : AsmOperandClass { + let Name = "SMRDOffset"#predicate; + let PredicateMethod = predicate; + let RenderMethod = "addImmOperands"; +} + +def SMRDOffsetMatchClass : SMRDOffsetBaseMatchClass <"isSMRDOffset">; +def SMRDLiteralOffsetMatchClass : SMRDOffsetBaseMatchClass < + "isSMRDLiteralOffset" +>; + let OperandType = "OPERAND_IMMEDIATE" in { def offen : Operand { @@ -510,6 +543,16 @@ def ClampMod : Operand { let ParserMatchClass = ClampMatchClass; } +def smrd_offset : Operand { + let PrintMethod = "printU32ImmOperand"; + let ParserMatchClass = SMRDOffsetMatchClass; +} + +def smrd_literal_offset : Operand { + let PrintMethod = "printU32ImmOperand"; + let ParserMatchClass = SMRDLiteralOffsetMatchClass; +} + } // End OperandType = "OPERAND_IMMEDIATE" def VOPDstS64 : VOPDstOperand ; @@ -528,6 +571,13 @@ def MUBUFScratch : ComplexPattern; def MUBUFOffset : ComplexPattern; def MUBUFOffsetAtomic : ComplexPattern; +def SMRDImm : ComplexPattern; +def SMRDImm32 : ComplexPattern; +def SMRDSgpr : ComplexPattern; +def SMRDBufferImm : ComplexPattern; +def SMRDBufferImm32 : ComplexPattern; +def SMRDBufferSgpr : ComplexPattern; + def VOP3Mods0 : ComplexPattern; def VOP3NoMods0 : ComplexPattern; def VOP3Mods0Clamp : ComplexPattern; @@ -717,19 +767,6 @@ class SOP2_Real_vi : let AssemblerPredicates = [isVI]; } -multiclass SOP2_SELECT_32 pattern> { - def "" : SOP2_Pseudo ; - - def _si : SOP2_Real_si ; - - def _vi : SOP2_Real_vi ; -} - multiclass SOP2_m pattern> { @@ -758,8 +795,10 @@ multiclass SOP2_64_32 pattern> : SOP2_m < class SOPC_Helper op, RegisterOperand rc, ValueType vt, string opName, PatLeaf cond> : SOPC < - op, (outs SCCReg:$dst), (ins rc:$src0, rc:$src1), - opName#" $src0, $src1", []>; + op, (outs), (ins rc:$src0, rc:$src1), + opName#" $src0, $src1", []> { + let Defs = [SCC]; +} class SOPC_32 op, string opName, PatLeaf cond = COND_NULL> : SOPC_Helper; @@ -812,15 +851,20 @@ multiclass SOPK_32 pattern> { } multiclass SOPK_SCC pattern> { - def "" : SOPK_Pseudo ; + def "" : SOPK_Pseudo { + let Defs = [SCC]; + } - let DisableEncoding = "$dst" in { - def _si : SOPK_Real_si ; - def _vi : SOPK_Real_vi ; + def _si : SOPK_Real_si { + let Defs = [SCC]; + } + + def _vi : SOPK_Real_vi { + let Defs = [SCC]; } } @@ -868,35 +912,68 @@ class SMRD_Real_si op, string opName, bit imm, dag outs, dag ins, } class SMRD_Real_vi op, string opName, bit imm, dag outs, dag ins, - string asm> : - SMRD , + string asm, list pattern = []> : + SMRD , SMEMe_vi , SIMCInstr { let AssemblerPredicates = [isVI]; } -multiclass SMRD_m op, string opName, bit imm, dag outs, dag ins, +multiclass SMRD_m pattern> { def "" : SMRD_Pseudo ; - def _si : SMRD_Real_si ; + def _si : SMRD_Real_si ; // glc is only applicable to scalar stores, which are not yet // implemented. let glc = 0 in { - def _vi : SMRD_Real_vi <{0, 0, 0, op}, opName, imm, outs, ins, asm>; + def _vi : SMRD_Real_vi ; } } -multiclass SMRD_Helper op, string opName, RegisterClass baseClass, +multiclass SMRD_Inval { + let hasSideEffects = 1, mayStore = 1 in { + def "" : SMRD_Pseudo ; + + let sbase = 0, offset = 0 in { + let sdst = 0 in { + def _si : SMRD_Real_si ; + } + + let glc = 0, sdata = 0 in { + def _vi : SMRD_Real_vi ; + } + } + } +} + +class SMEM_Inval op, string opName, SDPatternOperator node> : + SMRD_Real_vi { + let hasSideEffects = 1; + let mayStore = 1; + let sbase = 0; + let sdata = 0; + let glc = 0; + let offset = 0; +} + +multiclass SMRD_Helper { defm _IMM : SMRD_m < op, opName#"_IMM", 1, (outs dstClass:$dst), - (ins baseClass:$sbase, u32imm:$offset), + (ins baseClass:$sbase, smrd_offset:$offset), opName#" $dst, $sbase, $offset", [] >; + def _IMM_ci : SMRD < + (outs dstClass:$dst), (ins baseClass:$sbase, smrd_literal_offset:$offset), + opName#" $dst, $sbase, $offset", []>, SMRD_IMMe_ci { + let AssemblerPredicates = [isCIOnly]; + } + defm _SGPR : SMRD_m < op, opName#"_SGPR", 0, (outs dstClass:$dst), (ins baseClass:$sbase, SReg_32:$soff), @@ -922,11 +999,12 @@ def InputModsNoDefault : Operand { let ParserMatchClass = InputModsMatchClass; } -class getNumSrcArgs { +class getNumSrcArgs { int ret = - !if (!eq(Src1.Value, untyped.Value), 1, // VOP1 + !if (!eq(Src0.Value, untyped.Value), 0, + !if (!eq(Src1.Value, untyped.Value), 1, // VOP1 !if (!eq(Src2.Value, untyped.Value), 2, // VOP2 - 3)); // VOP3 + 3))); // VOP3 } // Returns the register class to use for the destination of VOP[123C] @@ -934,28 +1012,37 @@ class getNumSrcArgs { class getVALUDstForVT { RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand, !if(!eq(VT.Size, 64), VOPDstOperand, - VOPDstOperand)); // else VT == i1 + !if(!eq(VT.Size, 16), VOPDstOperand, + VOPDstOperand))); // else VT == i1 } // Returns the register class to use for source 0 of VOP[12C] // instructions for the given VT. class getVOPSrc0ForVT { - RegisterOperand ret = !if(!eq(VT.Size, 32), VSrc_32, VSrc_64); + RegisterOperand ret = !if(!eq(VT.Size, 64), VSrc_64, VSrc_32); } // Returns the register class to use for source 1 of VOP[12C] for the // given VT. class getVOPSrc1ForVT { - RegisterClass ret = !if(!eq(VT.Size, 32), VGPR_32, VReg_64); + RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32); } // Returns the register class to use for sources of VOP3 instructions for the // given VT. class getVOP3SrcForVT { - RegisterOperand ret = !if(!eq(VT.Size, 32), VCSrc_32, VCSrc_64); + RegisterOperand ret = + !if(!eq(VT.Size, 64), + VCSrc_64, + !if(!eq(VT.Value, i1.Value), + SCSrc_64, + VCSrc_32 + ) + ); } // Returns 1 if the source arguments have modifiers, 0 if they do not. +// XXX - do f16 instructions? class hasModifiers { bit ret = !if(!eq(SrcVT.Value, f32.Value), 1, !if(!eq(SrcVT.Value, f64.Value), 1, 0)); @@ -1009,17 +1096,20 @@ class getIns64 { +class getAsm32 { + string dst = "$dst"; + string src0 = ", $src0"; string src1 = ", $src1"; string src2 = ", $src2"; - string ret = "$dst, $src0"# - !if(!eq(NumSrcArgs, 1), "", src1)# - !if(!eq(NumSrcArgs, 3), src2, ""); + string ret = !if(HasDst, dst, "") # + !if(!eq(NumSrcArgs, 1), src0, "") # + !if(!eq(NumSrcArgs, 2), src0#src1, "") # + !if(!eq(NumSrcArgs, 3), src0#src1#src2, ""); } // Returns the assembly string for the inputs and outputs of a VOP3 // instruction. -class getAsm64 { +class getAsm64 { string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); string src1 = !if(!eq(NumSrcArgs, 1), "", !if(!eq(NumSrcArgs, 2), " $src1_modifiers", @@ -1027,11 +1117,10 @@ class getAsm64 { string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); string ret = !if(!eq(HasModifiers, 0), - getAsm32.ret, + getAsm32.ret, "$dst, "#src0#src1#src2#"$clamp"#"$omod"); } - class VOPProfile _ArgVT> { field list ArgVT = _ArgVT; @@ -1047,29 +1136,38 @@ class VOPProfile _ArgVT> { field RegisterOperand Src1RC64 = getVOP3SrcForVT.ret; field RegisterOperand Src2RC64 = getVOP3SrcForVT.ret; - field int NumSrcArgs = getNumSrcArgs.ret; + field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1); + field bit HasDst32 = HasDst; + field int NumSrcArgs = getNumSrcArgs.ret; field bit HasModifiers = hasModifiers.ret; - field dag Outs = (outs DstRC:$dst); + field dag Outs = !if(HasDst,(outs DstRC:$dst),(outs)); + + // VOP3b instructions are a special case with a second explicit + // output. This is manually overridden for them. + field dag Outs32 = Outs; + field dag Outs64 = Outs; field dag Ins32 = getIns32.ret; field dag Ins64 = getIns64.ret; - field string Asm32 = getAsm32.ret; - field string Asm64 = getAsm64.ret; + field string Asm32 = getAsm32.ret; + field string Asm64 = getAsm64.ret; } // FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order // for the instruction patterns to work. -def VOP_F16_F16 : VOPProfile <[f32, f32, untyped, untyped]>; -def VOP_F16_I16 : VOPProfile <[f32, i32, untyped, untyped]>; -def VOP_I16_F16 : VOPProfile <[i32, f32, untyped, untyped]>; +def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>; +def VOP_F16_I16 : VOPProfile <[f16, i32, untyped, untyped]>; +def VOP_I16_F16 : VOPProfile <[i32, f16, untyped, untyped]>; -def VOP_F16_F16_F16 : VOPProfile <[f32, f32, f32, untyped]>; -def VOP_F16_F16_I16 : VOPProfile <[f32, f32, i32, untyped]>; +def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; +def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i32, untyped]>; def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>; +def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>; + def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>; def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>; def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>; @@ -1087,25 +1185,76 @@ def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>; def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; -def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> { + +// Write out to vcc or arbitrary SGPR. +def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> { + let Asm32 = "$dst, vcc, $src0, $src1"; + let Asm64 = "$dst, $sdst, $src0, $src1"; + let Outs32 = (outs DstRC:$dst); + let Outs64 = (outs DstRC:$dst, SReg_64:$sdst); +} + +// Write out to vcc or arbitrary SGPR and read in from vcc or +// arbitrary SGPR. +def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { + // We use VCSrc_32 to exclude literal constants, even though the + // encoding normally allows them since the implicit VCC use means + // using one would always violate the constant bus + // restriction. SGPRs are still allowed because it should + // technically be possible to use VCC again as src0. let Src0RC32 = VCSrc_32; + let Asm32 = "$dst, vcc, $src0, $src1, vcc"; + let Asm64 = "$dst, $sdst, $src0, $src1, $src2"; + let Outs32 = (outs DstRC:$dst); + let Outs64 = (outs DstRC:$dst, SReg_64:$sdst); + + // Suppress src2 implied by type since the 32-bit encoding uses an + // implicit VCC use. + let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); } -def VOP_I1_F32_I32 : VOPProfile <[i1, f32, i32, untyped]> { +class VOP3b_Profile : VOPProfile<[vt, vt, vt, vt]> { + let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); + let Asm64 = "$vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod"; +} + +def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile { + // FIXME: Hack to stop printing _e64 + let DstRC = RegisterOperand; +} + +def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile { + // FIXME: Hack to stop printing _e64 + let DstRC = RegisterOperand; +} + +// VOPC instructions are a special case because for the 32-bit +// encoding, we want to display the implicit vcc write as if it were +// an explicit $dst. +class VOPC_Profile : VOPProfile <[i1, vt0, vt1, untyped]> { + let Asm32 = "vcc, $src0, $src1"; + // The destination for 32-bit encoding is implicit. + let HasDst32 = 0; +} + +class VOPC_Class_Profile : VOPC_Profile { let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1); let Asm64 = "$dst, $src0_modifiers, $src1"; } -def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> { - let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1); - let Asm64 = "$dst, $src0_modifiers, $src1"; -} +def VOPC_I1_F32_F32 : VOPC_Profile; +def VOPC_I1_F64_F64 : VOPC_Profile; +def VOPC_I1_I32_I32 : VOPC_Profile; +def VOPC_I1_I64_I64 : VOPC_Profile; + +def VOPC_I1_F32_I32 : VOPC_Class_Profile; +def VOPC_I1_F64_I32 : VOPC_Class_Profile; def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> { - let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VCCReg:$src2); + let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2); let Asm64 = "$dst, $src0, $src1, $src2"; } @@ -1119,13 +1268,60 @@ def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> { let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); let Ins64 = getIns64, 3, HasModifiers>.ret; - let Asm32 = getAsm32<2>.ret; - let Asm64 = getAsm64<2, HasModifiers>.ret; + let Asm32 = getAsm32<1, 2>.ret; + let Asm64 = getAsm64<1, 2, HasModifiers>.ret; } def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; +class SIInstAlias : + InstAlias , PredicateControl { + + field bit isCompare; + field bit isCommutable; + + let ResultInst = + !if (p.HasDst32, + !if (!eq(p.NumSrcArgs, 0), + // 1 dst, 0 src + (inst p.DstRC:$dst), + !if (!eq(p.NumSrcArgs, 1), + // 1 dst, 1 src + (inst p.DstRC:$dst, p.Src0RC32:$src0), + !if (!eq(p.NumSrcArgs, 2), + // 1 dst, 2 src + (inst p.DstRC:$dst, p.Src0RC32:$src0, p.Src1RC32:$src1), + // else - unreachable + (inst)))), + // else + !if (!eq(p.NumSrcArgs, 2), + // 0 dst, 2 src + (inst p.Src0RC32:$src0, p.Src1RC32:$src1), + !if (!eq(p.NumSrcArgs, 1), + // 0 dst, 1 src + (inst p.Src0RC32:$src1), + // else + // 0 dst, 0 src + (inst)))); +} + +class SIInstAliasSI : + SIInstAlias (op_name#"_e32_si"), p> { + let AssemblerPredicate = SIAssemblerPredicate; +} + +class SIInstAliasVI : + SIInstAlias (op_name#"_e32_vi"), p> { + let AssemblerPredicates = [isVI]; +} + +multiclass SIInstAliasBuilder { + + def : SIInstAliasSI ; + + def : SIInstAliasVI ; +} class VOP { string OpName = opName; @@ -1165,20 +1361,22 @@ class VOP1_Real_vi : let AssemblerPredicates = [isVI]; } -multiclass VOP1_m pattern, - string opName> { - def "" : VOP1_Pseudo ; +multiclass VOP1_m pattern, + string asm = opName#p.Asm32> { + def "" : VOP1_Pseudo ; - def _si : VOP1_Real_si ; + def _si : VOP1_Real_si ; + + def _vi : VOP1_Real_vi ; - def _vi : VOP1_Real_vi ; } -multiclass VOP1SI_m pattern, - string opName> { - def "" : VOP1_Pseudo ; +multiclass VOP1SI_m pattern, + string asm = opName#p.Asm32> { - def _si : VOP1_Real_si ; + def "" : VOP1_Pseudo ; + + def _si : VOP1_Real_si ; } class VOP2_Pseudo pattern, string opName> : @@ -1202,22 +1400,24 @@ class VOP2_Real_vi : let AssemblerPredicates = [isVI]; } -multiclass VOP2SI_m pattern, - string opName, string revOp> { - def "" : VOP2_Pseudo , +multiclass VOP2SI_m pattern, + string revOp> { + + def "" : VOP2_Pseudo , VOP2_REV; - def _si : VOP2_Real_si ; + def _si : VOP2_Real_si ; } -multiclass VOP2_m pattern, - string opName, string revOp> { - def "" : VOP2_Pseudo , +multiclass VOP2_m pattern, + string revOp> { + + def "" : VOP2_Pseudo , VOP2_REV; - def _si : VOP2_Real_si ; + def _si : VOP2_Real_si ; - def _vi : VOP2_Real_vi ; + def _vi : VOP2_Real_vi ; } @@ -1250,6 +1450,9 @@ class VOP3_Pseudo pattern, string opName> : MnemonicAlias { let isPseudo = 1; let isCodeGenOnly = 1; + + field bit vdst; + field bit src0; } class VOP3_Real_si op, dag outs, dag ins, string asm, string opName> : @@ -1295,22 +1498,6 @@ multiclass VOP3_m pattern, HasMods>; } -// VOP3_m without source modifiers -multiclass VOP3_m_nomods pattern, - string opName, int NumSrcArgs, bit HasMods = 1> { - - def "" : VOP3_Pseudo ; - - let src0_modifiers = 0, - src1_modifiers = 0, - src2_modifiers = 0, - clamp = 0, - omod = 0 in { - def _si : VOP3_Real_si ; - def _vi : VOP3_Real_vi ; - } -} - multiclass VOP3_1_m pattern, string opName, bit HasMods = 1> { @@ -1335,7 +1522,7 @@ multiclass VOP3SI_1_m pattern, string opName, string revOp, - bit HasMods = 1, bit UseFullOp = 0> { + bit HasMods = 1> { def "" : VOP3_Pseudo , VOP2_REV; @@ -1349,7 +1536,7 @@ multiclass VOP3_2_m pattern, string opName, string revOp, - bit HasMods = 1, bit UseFullOp = 0> { + bit HasMods = 1> { def "" : VOP3_Pseudo , VOP2_REV; @@ -1360,54 +1547,41 @@ multiclass VOP3SI_2_m pattern, string opName, string revOp, - bit HasMods = 1, bit UseFullOp = 0> { - def "" : VOP3_Pseudo , - VOP2_REV; - - // The VOP2 variant puts the carry out into VCC, the VOP3 variant - // can write it into any SGPR. We currently don't use the carry out, - // so for now hardcode it to VCC as well. - let sdst = SIOperand.VCC, Defs = [VCC] in { - def _si : VOP3b_Real_si , - VOP3DisableFields<1, 0, HasMods>; - - def _vi : VOP3b_Real_vi , - VOP3DisableFields<1, 0, HasMods>; - } // End sdst = SIOperand.VCC, Defs = [VCC] -} - -multiclass VOP3b_3_m pattern, string opName, string revOp, - bit HasMods = 1, bit UseFullOp = 0> { +// Two operand VOP3b instruction that may have a 3rd SGPR bool operand +// instead of an implicit VCC as in the VOP2b format. +multiclass VOP3b_2_3_m pattern, string opName, string revOp, + bit HasMods = 1, bit useSrc2Input = 0> { def "" : VOP3_Pseudo ; - def _si : VOP3b_Real_si , - VOP3DisableFields<1, 1, HasMods>; + VOP3DisableFields<1, useSrc2Input, HasMods>; def _vi : VOP3b_Real_vi , - VOP3DisableFields<1, 1, HasMods>; + VOP3DisableFields<1, useSrc2Input, HasMods>; } multiclass VOP3_C_m pattern, string opName, - bit HasMods, bit defExec, string revOp> { + bit HasMods, bit defExec, + string revOp, list sched> { def "" : VOP3_Pseudo , - VOP2_REV; + VOP2_REV { + let Defs = !if(defExec, [EXEC], []); + let SchedRW = sched; + } def _si : VOP3_Real_si , VOP3DisableFields<1, 0, HasMods> { let Defs = !if(defExec, [EXEC], []); + let SchedRW = sched; } def _vi : VOP3_Real_vi , VOP3DisableFields<1, 0, HasMods> { let Defs = !if(defExec, [EXEC], []); + let SchedRW = sched; } } @@ -1432,32 +1606,28 @@ multiclass VOP2SI_3VI_m pat32, - dag ins64, string asm64, list pat64, - bit HasMods> { +multiclass VOP1_Helper pat32, + list pat64> { - defm _e32 : VOP1_m ; + defm _e32 : VOP1_m ; - defm _e64 : VOP3_1_m ; + defm _e64 : VOP3_1_m ; } multiclass VOP1Inst : VOP1_Helper < - op, opName, P.Outs, - P.Ins32, P.Asm32, [], - P.Ins64, P.Asm64, + op, opName, P, [], !if(P.HasModifiers, [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod))))], - [(set P.DstVT:$dst, (node P.Src0VT:$src0))]), - P.HasModifiers + [(set P.DstVT:$dst, (node P.Src0VT:$src0))]) >; multiclass VOP1InstSI { - defm _e32 : VOP1SI_m ; + defm _e32 : VOP1SI_m ; defm _e64 : VOP3SI_1_m ; } -multiclass VOP2_Helper pat32, - dag ins64, string asm64, list pat64, - string revOp, bit HasMods> { - defm _e32 : VOP2_m ; +multiclass VOP2_Helper pat32, + list pat64, string revOp> { - defm _e64 : VOP3_2_m ; + defm _e32 : VOP2_m ; + + defm _e64 : VOP3_2_m ; } multiclass VOP2Inst : VOP2_Helper < - op, opName, P.Outs, - P.Ins32, P.Asm32, [], - P.Ins64, P.Asm64, + op, opName, P, [], !if(P.HasModifiers, [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), - revOp, P.HasModifiers + revOp >; multiclass VOP2InstSI { - defm _e32 : VOP2SI_m ; + + defm _e32 : VOP2SI_m ; defm _e64 : VOP3SI_2_m ; } -multiclass VOP2b_Helper pat32, - dag ins64, string asm64, list pat64, - string revOp, bit HasMods> { +multiclass VOP2b_Helper pat32, list pat64, + string revOp, bit useSGPRInput> { - defm _e32 : VOP2_m ; + let SchedRW = [Write32Bit, WriteSALU] in { + let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { + defm _e32 : VOP2_m ; + } - defm _e64 : VOP3b_2_m ; + defm _e64 : VOP3b_2_3_m ; + } } multiclass VOP2bInst : VOP2b_Helper < - op, opName, P.Outs, - P.Ins32, P.Asm32, [], - P.Ins64, P.Asm64, + op, opName, P, [], !if(P.HasModifiers, [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), - revOp, P.HasModifiers + revOp, !eq(P.NumSrcArgs, 3) >; // A VOP2 instruction that is VOP3-only on VI. -multiclass VOP2_VI3_Helper pat32, - dag ins64, string asm64, list pat64, - string revOp, bit HasMods> { - defm _e32 : VOP2SI_m ; +multiclass VOP2_VI3_Helper pat32, list pat64, string revOp> { - defm _e64 : VOP3_2_m ; + defm _e32 : VOP2SI_m ; + + defm _e64 : VOP3_2_m ; } multiclass VOP2_VI3_Inst : VOP2_VI3_Helper < - op, opName, P.Outs, - P.Ins32, P.Asm32, [], - P.Ins64, P.Asm64, + op, opName, P, [], !if(P.HasModifiers, [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), - revOp, P.HasModifiers + revOp >; multiclass VOP2MADK pattern = []> { @@ -1583,64 +1747,75 @@ let isCodeGenOnly = 0 in { } // End isCodeGenOnly = 0 } -class VOPC_Pseudo pattern, string opName> : +class VOPC_Pseudo pattern, string opName> : VOPCCommon , VOP , - SIMCInstr, - MnemonicAlias { + SIMCInstr { let isPseudo = 1; let isCodeGenOnly = 1; } -multiclass VOPC_m pattern, - string opName, bit DefExec, string revOpName = ""> { - def "" : VOPC_Pseudo ; - - def _si : VOPC, - SIMCInstr { - let Defs = !if(DefExec, [EXEC], []); - let hasSideEffects = DefExec; - let AssemblerPredicates = [isSICI]; +multiclass VOPC_m pattern, + string opName, bit DefExec, VOPProfile p, + list sched, + string revOpName = "", string asm = opName#"_e32 "#op_asm, + string alias_asm = opName#" "#op_asm> { + def "" : VOPC_Pseudo { + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let SchedRW = sched; } - def _vi : VOPC, - SIMCInstr { - let Defs = !if(DefExec, [EXEC], []); - let hasSideEffects = DefExec; - let AssemblerPredicates = [isVI]; - } + let AssemblerPredicates = [isSICI] in { + def _si : VOPC, + SIMCInstr { + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let hasSideEffects = DefExec; + let SchedRW = sched; + } + + } // End AssemblerPredicates = [isSICI] + + let AssemblerPredicates = [isVI] in { + def _vi : VOPC, + SIMCInstr { + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let hasSideEffects = DefExec; + let SchedRW = sched; + } + + } // End AssemblerPredicates = [isVI] + + defm : SIInstAliasBuilder; } -multiclass VOPC_Helper pat32, - dag out64, dag ins64, string asm64, list pat64, - bit HasMods, bit DefExec, string revOp> { - defm _e32 : VOPC_m ; +multiclass VOPC_Helper pat32, + list pat64, bit DefExec, string revOp, + VOPProfile p, list sched> { + defm _e32 : VOPC_m ; - defm _e64 : VOP3_C_m ; + defm _e64 : VOP3_C_m ; } // Special case for class instructions which only have modifiers on // the 1st source operand. -multiclass VOPC_Class_Helper pat32, - dag out64, dag ins64, string asm64, list pat64, - bit HasMods, bit DefExec, string revOp> { - defm _e32 : VOPC_m ; +multiclass VOPC_Class_Helper pat32, + list pat64, bit DefExec, string revOp, + VOPProfile p, list sched> { + defm _e32 : VOPC_m ; - defm _e64 : VOP3_C_m , + defm _e64 : VOP3_C_m , VOP3DisableModFields<1, 0, 0>; } multiclass VOPCInst : VOPC_Helper < - op, opName, - P.Ins32, P.Asm32, [], - (outs VOPDstS64:$dst), P.Ins64, P.Asm64, + bit DefExec = 0, + list sched = [Write32Bit]> : + VOPC_Helper < + op, opName, [], !if(P.HasModifiers, [(set i1:$dst, (setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, @@ -1648,51 +1823,51 @@ multiclass VOPCInst ; multiclass VOPCClassInst : VOPC_Class_Helper < - op, opName, - P.Ins32, P.Asm32, [], - (outs VOPDstS64:$dst), P.Ins64, P.Asm64, + bit DefExec = 0, + list sched> : VOPC_Class_Helper < + op, opName, [], !if(P.HasModifiers, [(set i1:$dst, (AMDGPUfp_class (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)), P.Src1VT:$src1))], [(set i1:$dst, (AMDGPUfp_class P.Src0VT:$src0, P.Src1VT:$src1))]), - P.HasModifiers, DefExec, opName + DefExec, opName, P, sched >; multiclass VOPC_F32 : - VOPCInst ; + VOPCInst ; multiclass VOPC_F64 : - VOPCInst ; + VOPCInst ; multiclass VOPC_I32 : - VOPCInst ; + VOPCInst ; multiclass VOPC_I64 : - VOPCInst ; + VOPCInst ; multiclass VOPCX sched, string revOp = ""> - : VOPCInst ; + : VOPCInst ; multiclass VOPCX_F32 : - VOPCX ; + VOPCX ; multiclass VOPCX_F64 : - VOPCX ; + VOPCX ; multiclass VOPCX_I32 : - VOPCX ; + VOPCX ; multiclass VOPCX_I64 : - VOPCX ; + VOPCX ; multiclass VOP3_Helper pat, int NumSrcArgs, bit HasMods> : VOP3_m < @@ -1700,16 +1875,16 @@ multiclass VOP3_Helper ; multiclass VOPC_CLASS_F32 : - VOPCClassInst ; + VOPCClassInst ; multiclass VOPCX_CLASS_F32 : - VOPCClassInst ; + VOPCClassInst ; multiclass VOPC_CLASS_F64 : - VOPCClassInst ; + VOPCClassInst ; multiclass VOPCX_CLASS_F64 : - VOPCClassInst ; + VOPCClassInst ; multiclass VOP3Inst : VOP3_Helper < @@ -1761,25 +1936,13 @@ multiclass VOP3_VCC_Inst ; -multiclass VOP3b_Helper pattern> : - VOP3b_3_m < - op, (outs vrc:$vdst, SReg_64:$sdst), - (ins InputModsNoDefault:$src0_modifiers, arc:$src0, - InputModsNoDefault:$src1_modifiers, arc:$src1, - InputModsNoDefault:$src2_modifiers, arc:$src2, - ClampMod:$clamp, omod:$omod), - opName#" $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", pattern, - opName, opName, 1, 1 +multiclass VOP3bInst pattern = []> : + VOP3b_2_3_m < + op, P.Outs64, P.Ins64, + opName#" "#P.Asm64, pattern, + opName, "", 1, 1 >; -multiclass VOP3b_64 pattern> : - VOP3b_Helper ; - -multiclass VOP3b_32 pattern> : - VOP3b_Helper ; - - class Vop3ModPat : Pat< (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), @@ -1925,12 +2088,14 @@ multiclass DS_1A1D_RET op, string opName, RegisterClass rc, dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds), string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> { - def "" : DS_Pseudo , - AtomicNoRet; + let hasPostISelHook = 1 in { + def "" : DS_Pseudo , + AtomicNoRet; - let data1 = 0 in { - def _si : DS_Off16_Real_si ; - def _vi : DS_Off16_Real_vi ; + let data1 = 0 in { + def _si : DS_Off16_Real_si ; + def _vi : DS_Off16_Real_vi ; + } } } @@ -1939,11 +2104,13 @@ multiclass DS_1A2D_RET_m op, string opName, RegisterClass rc, dag outs = (outs rc:$vdst), string asm = opName#" $vdst, $addr, $data0, $data1"#"$offset"#"$gds"> { - def "" : DS_Pseudo , - AtomicNoRet; + let hasPostISelHook = 1 in { + def "" : DS_Pseudo , + AtomicNoRet; - def _si : DS_Off16_Real_si ; - def _vi : DS_Off16_Real_vi ; + def _si : DS_Off16_Real_si ; + def _vi : DS_Off16_Real_vi ; + } } multiclass DS_1A2D_RET op, string asm, RegisterClass rc, @@ -2214,7 +2381,7 @@ multiclass MUBUF_Atomic ; @@ -2233,7 +2400,7 @@ multiclass MUBUF_Atomic { @@ -2368,47 +2537,121 @@ multiclass MUBUF_Store_Helper op, string asm, RegisterClass regClass> : - FLAT { - let data = 0; - let mayLoad = 1; +// For cache invalidation instructions. +multiclass MUBUF_Invalidate { + let hasSideEffects = 1, mayStore = 1, AsmMatchConverter = "" in { + def "" : MUBUF_Pseudo ; + + // Set everything to 0. + let offset = 0, offen = 0, idxen = 0, glc = 0, vaddr = 0, + vdata = 0, srsrc = 0, slc = 0, tfe = 0, soffset = 0 in { + let addr64 = 0 in { + def _si : MUBUF_Real_si ; + } + + def _vi : MUBUF_Real_vi ; + } + } // End hasSideEffects = 1, mayStore = 1, AsmMatchConverter = "" } -class FLAT_Store_Helper op, string name, RegisterClass vdataClass> : - FLAT { +//===----------------------------------------------------------------------===// +// FLAT classes +//===----------------------------------------------------------------------===// - let mayLoad = 0; - let mayStore = 1; - - // Encoding - let vdst = 0; +class flat ci, bits<7> vi = ci> { + field bits<7> CI = ci; + field bits<7> VI = vi; } -multiclass FLAT_ATOMIC op, string name, RegisterClass vdst_rc, - RegisterClass data_rc = vdst_rc> { +class FLAT_Pseudo pattern> : + FLAT <0, outs, ins, "", pattern>, + SIMCInstr { + let isPseudo = 1; + let isCodeGenOnly = 1; +} - let mayLoad = 1, mayStore = 1 in { - def "" : FLAT , - AtomicNoRet { - let glc = 0; - let vdst = 0; - } +class FLAT_Real_ci op, string opName, dag outs, dag ins, string asm> : + FLAT , + SIMCInstr { + let AssemblerPredicate = isCIOnly; +} - def _RTN : FLAT , - AtomicNoRet { - let glc = 1; - } +class FLAT_Real_vi op, string opName, dag outs, dag ins, string asm> : + FLAT , + SIMCInstr { + let AssemblerPredicate = VIAssemblerPredicate; +} + +multiclass FLAT_AtomicRet_m pattern> { + def "" : FLAT_Pseudo , + AtomicNoRet ; + + def _ci : FLAT_Real_ci ; + + def _vi : FLAT_Real_vi ; +} + +multiclass FLAT_Load_Helper { + + let data = 0, mayLoad = 1 in { + + def "" : FLAT_Pseudo ; + + def _ci : FLAT_Real_ci ; + + def _vi : FLAT_Real_vi ; + } +} + +multiclass FLAT_Store_Helper { + + let mayLoad = 0, mayStore = 1, vdst = 0 in { + + def "" : FLAT_Pseudo ; + + def _ci : FLAT_Real_ci ; + + def _vi : FLAT_Real_vi ; + } +} + +multiclass FLAT_ATOMIC { + + let mayLoad = 1, mayStore = 1, glc = 0, vdst = 0 in { + def "" : FLAT_Pseudo , + AtomicNoRet ; + + def _ci : FLAT_Real_ci ; + + def _vi : FLAT_Real_vi ; + } + + let glc = 1, hasPostISelHook = 1 in { + defm _RTN : FLAT_AtomicRet_m ; } } diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index e0eeea9034b3..6f653c70aca0 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -30,7 +30,9 @@ def isGCN : Predicate<"Subtarget->getGeneration() " ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">, AssemblerPredicate<"FeatureGCN">; def isSI : Predicate<"Subtarget->getGeneration() " - "== AMDGPUSubtarget::SOUTHERN_ISLANDS">; + "== AMDGPUSubtarget::SOUTHERN_ISLANDS">, + AssemblerPredicate<"FeatureSouthernIslands">; + def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">; def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">; @@ -62,36 +64,38 @@ let mayLoad = 1 in { // We are using the SGPR_32 and not the SReg_32 register class for 32-bit // SMRD instructions, because the SGPR_32 register class does not include M0 // and writing to M0 from an SMRD instruction will hang the GPU. -defm S_LOAD_DWORD : SMRD_Helper <0x00, "s_load_dword", SReg_64, SGPR_32>; -defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "s_load_dwordx2", SReg_64, SReg_64>; -defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "s_load_dwordx4", SReg_64, SReg_128>; -defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "s_load_dwordx8", SReg_64, SReg_256>; -defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "s_load_dwordx16", SReg_64, SReg_512>; +defm S_LOAD_DWORD : SMRD_Helper , "s_load_dword", SReg_64, SGPR_32>; +defm S_LOAD_DWORDX2 : SMRD_Helper , "s_load_dwordx2", SReg_64, SReg_64>; +defm S_LOAD_DWORDX4 : SMRD_Helper , "s_load_dwordx4", SReg_64, SReg_128>; +defm S_LOAD_DWORDX8 : SMRD_Helper , "s_load_dwordx8", SReg_64, SReg_256>; +defm S_LOAD_DWORDX16 : SMRD_Helper , "s_load_dwordx16", SReg_64, SReg_512>; defm S_BUFFER_LOAD_DWORD : SMRD_Helper < - 0x08, "s_buffer_load_dword", SReg_128, SGPR_32 + smrd<0x08>, "s_buffer_load_dword", SReg_128, SGPR_32 >; defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper < - 0x09, "s_buffer_load_dwordx2", SReg_128, SReg_64 + smrd<0x09>, "s_buffer_load_dwordx2", SReg_128, SReg_64 >; defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper < - 0x0a, "s_buffer_load_dwordx4", SReg_128, SReg_128 + smrd<0x0a>, "s_buffer_load_dwordx4", SReg_128, SReg_128 >; defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper < - 0x0b, "s_buffer_load_dwordx8", SReg_128, SReg_256 + smrd<0x0b>, "s_buffer_load_dwordx8", SReg_128, SReg_256 >; defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < - 0x0c, "s_buffer_load_dwordx16", SReg_128, SReg_512 + smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512 >; } // mayLoad = 1 //def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>; -//def S_DCACHE_INV : SMRD_ <0x0000001f, "s_dcache_inv", []>; + +defm S_DCACHE_INV : SMRD_Inval , "s_dcache_inv", + int_amdgcn_s_dcache_inv>; //===----------------------------------------------------------------------===// // SOP1 Instructions @@ -123,7 +127,7 @@ let Defs = [SCC] in { defm S_BREV_B32 : SOP1_32 , "s_brev_b32", - [(set i32:$dst, (AMDGPUbrev i32:$src0))] + [(set i32:$dst, (bitreverse i32:$src0))] >; defm S_BREV_B64 : SOP1_64 , "s_brev_b64", []>; @@ -183,10 +187,14 @@ defm S_XNOR_SAVEEXEC_B64 : SOP1_64 , "s_xnor_saveexec_b64", []> defm S_QUADMASK_B32 : SOP1_32 , "s_quadmask_b32", []>; defm S_QUADMASK_B64 : SOP1_64 , "s_quadmask_b64", []>; + +let Uses = [M0] in { defm S_MOVRELS_B32 : SOP1_32 , "s_movrels_b32", []>; defm S_MOVRELS_B64 : SOP1_64 , "s_movrels_b64", []>; defm S_MOVRELD_B32 : SOP1_32 , "s_movreld_b32", []>; defm S_MOVRELD_B64 : SOP1_64 , "s_movreld_b64", []>; +} // End Uses = [M0] + defm S_CBRANCH_JOIN : SOP1_1 , "s_cbranch_join", []>; defm S_MOV_REGRD_B32 : SOP1_32 , "s_mov_regrd_b32", []>; let Defs = [SCC] in { @@ -354,7 +362,7 @@ def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "s_cmp_le_u32">; // SOPK Instructions //===----------------------------------------------------------------------===// -let isReMaterializable = 1 in { +let isReMaterializable = 1, isMoveImm = 1 in { defm S_MOVK_I32 : SOPK_32 , "s_movk_i32", []>; } // End isReMaterializable = 1 let Uses = [SCC] in { @@ -438,36 +446,38 @@ def S_BRANCH : SOPP < let isBarrier = 1; } -let DisableEncoding = "$scc" in { +let Uses = [SCC] in { def S_CBRANCH_SCC0 : SOPP < - 0x00000004, (ins sopp_brtarget:$simm16, SCCReg:$scc), + 0x00000004, (ins sopp_brtarget:$simm16), "s_cbranch_scc0 $simm16" >; def S_CBRANCH_SCC1 : SOPP < - 0x00000005, (ins sopp_brtarget:$simm16, SCCReg:$scc), + 0x00000005, (ins sopp_brtarget:$simm16), "s_cbranch_scc1 $simm16" >; -} // End DisableEncoding = "$scc" +} // End Uses = [SCC] +let Uses = [VCC] in { def S_CBRANCH_VCCZ : SOPP < - 0x00000006, (ins sopp_brtarget:$simm16, VCCReg:$vcc), + 0x00000006, (ins sopp_brtarget:$simm16), "s_cbranch_vccz $simm16" >; def S_CBRANCH_VCCNZ : SOPP < - 0x00000007, (ins sopp_brtarget:$simm16, VCCReg:$vcc), + 0x00000007, (ins sopp_brtarget:$simm16), "s_cbranch_vccnz $simm16" >; +} // End Uses = [VCC] -let DisableEncoding = "$exec" in { +let Uses = [EXEC] in { def S_CBRANCH_EXECZ : SOPP < - 0x00000008, (ins sopp_brtarget:$simm16, EXECReg:$exec), + 0x00000008, (ins sopp_brtarget:$simm16), "s_cbranch_execz $simm16" >; def S_CBRANCH_EXECNZ : SOPP < - 0x00000009, (ins sopp_brtarget:$simm16, EXECReg:$exec), + 0x00000009, (ins sopp_brtarget:$simm16), "s_cbranch_execnz $simm16" >; -} // End DisableEncoding = "$exec" +} // End Uses = [EXEC] } // End isBranch = 1 @@ -477,11 +487,11 @@ let hasSideEffects = 1 in { def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier", [(int_AMDGPU_barrier_local)] > { + let SchedRW = [WriteBarrier]; let simm16 = 0; - let isBarrier = 1; - let hasCtrlDep = 1; let mayLoad = 1; let mayStore = 1; + let isConvergent = 1; } def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">; @@ -805,9 +815,6 @@ defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmps defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">; defm DS_MIN_RTN_F32 : DS_1A2D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">; defm DS_MAX_RTN_F32 : DS_1A2D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">; -let SubtargetPredicate = isCI in { -defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">; -} // End isCI defm DS_SWIZZLE_B32 : DS_1A_RET <0x35, "ds_swizzle_b32", VGPR_32>; let mayStore = 0 in { defm DS_READ_B32 : DS_1A_RET <0x36, "ds_read_b32", VGPR_32>; @@ -905,11 +912,6 @@ defm DS_WRITE_SRC2_B64 : DS_1A <0xcc, "ds_write_src2_b64">; defm DS_MIN_SRC2_F64 : DS_1A <0xd2, "ds_min_src2_f64">; defm DS_MAX_SRC2_F64 : DS_1A <0xd3, "ds_max_src2_f64">; -//let SubtargetPredicate = isCI in { -// DS_CONDXCHG32_RTN_B64 -// DS_CONDXCHG32_RTN_B128 -//} // End isCI - //===----------------------------------------------------------------------===// // MUBUF Instructions //===----------------------------------------------------------------------===// @@ -951,13 +953,13 @@ defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper < mubuf<0x0b, 0x13>, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global >; defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper < - mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, global_load + mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, mubuf_load >; defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper < - mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, global_load + mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, mubuf_load >; defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper < - mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, global_load + mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, mubuf_load >; defm BUFFER_STORE_BYTE : MUBUF_Store_Helper < @@ -1034,9 +1036,12 @@ defm BUFFER_ATOMIC_XOR : MUBUF_Atomic < //def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 , "buffer_atomic_fcmpswap_x2", []>; // isn't on VI //def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 , "buffer_atomic_fmin_x2", []>; // isn't on VI //def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 , "buffer_atomic_fmax_x2", []>; // isn't on VI -//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 , "buffer_wbinvl1_sc", []>; // isn't on CI & VI -//def BUFFER_WBINVL1_VOL : MUBUF_WBINVL1 , "buffer_wbinvl1_vol", []>; // isn't on SI -//def BUFFER_WBINVL1 : MUBUF_WBINVL1 , "buffer_wbinvl1", []>; + +let SubtargetPredicate = isSI in { +defm BUFFER_WBINVL1_SC : MUBUF_Invalidate , "buffer_wbinvl1_sc", int_amdgcn_buffer_wbinvl1_sc>; // isn't on CI & VI +} + +defm BUFFER_WBINVL1 : MUBUF_Invalidate , "buffer_wbinvl1", int_amdgcn_buffer_wbinvl1>; //===----------------------------------------------------------------------===// // MTBUF Instructions @@ -1155,8 +1160,8 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o" // VOP1 Instructions //===----------------------------------------------------------------------===// -let vdst = 0, src0 = 0 in { -defm V_NOP : VOP1_m , (outs), (ins), "v_nop", [], "v_nop">; +let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in { +defm V_NOP : VOP1Inst , "v_nop", VOP_NONE>; } let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in { @@ -1292,7 +1297,9 @@ defm V_SQRT_F64 : VOP1Inst , "v_sqrt_f64", VOP_F64_F64, fsqrt >; -} // let SchedRW = [WriteDouble] +} // End SchedRW = [WriteDouble] + +let SchedRW = [WriteQuarterRate32] in { defm V_SIN_F32 : VOP1Inst , "v_sin_f32", VOP_F32_F32, AMDGPUsin @@ -1300,6 +1307,9 @@ defm V_SIN_F32 : VOP1Inst , "v_sin_f32", defm V_COS_F32 : VOP1Inst , "v_cos_f32", VOP_F32_F32, AMDGPUcos >; + +} // End SchedRW = [WriteQuarterRate32] + defm V_NOT_B32 : VOP1Inst , "v_not_b32", VOP_I32_I32>; defm V_BFREV_B32 : VOP1Inst , "v_bfrev_b32", VOP_I32_I32>; defm V_FFBH_U32 : VOP1Inst , "v_ffbh_u32", VOP_I32_I32>; @@ -1308,24 +1318,33 @@ defm V_FFBH_I32 : VOP1Inst , "v_ffbh_i32", VOP_I32_I32>; defm V_FREXP_EXP_I32_F64 : VOP1Inst , "v_frexp_exp_i32_f64", VOP_I32_F64 >; + +let SchedRW = [WriteDoubleAdd] in { defm V_FREXP_MANT_F64 : VOP1Inst , "v_frexp_mant_f64", VOP_F64_F64 >; -defm V_FRACT_F64 : VOP1Inst , "v_fract_f64", VOP_F64_F64>; + +defm V_FRACT_F64 : VOP1Inst , "v_fract_f64", + VOP_F64_F64 +>; +} // End SchedRW = [WriteDoubleAdd] + + defm V_FREXP_EXP_I32_F32 : VOP1Inst , "v_frexp_exp_i32_f32", VOP_I32_F32 >; defm V_FREXP_MANT_F32 : VOP1Inst , "v_frexp_mant_f32", VOP_F32_F32 >; -let vdst = 0, src0 = 0 in { -defm V_CLREXCP : VOP1_m , (outs), (ins), "v_clrexcp", [], - "v_clrexcp" ->; +let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in { +defm V_CLREXCP : VOP1Inst , "v_clrexcp", VOP_NONE>; } + +let Uses = [M0, EXEC] in { defm V_MOVRELD_B32 : VOP1Inst , "v_movreld_b32", VOP_I32_I32>; defm V_MOVRELS_B32 : VOP1Inst , "v_movrels_b32", VOP_I32_I32>; defm V_MOVRELSD_B32 : VOP1Inst , "v_movrelsd_b32", VOP_I32_I32>; +} // End Uses = [M0, EXEC] // These instruction only exist on SI and CI let SubtargetPredicate = isSICI in { @@ -1343,7 +1362,7 @@ defm V_RSQ_LEGACY_F32 : VOP1InstSI , "v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy >; -} // End let SchedRW = [WriteQuarterRate32] +} // End SchedRW = [WriteQuarterRate32] let SchedRW = [WriteDouble] in { @@ -1360,7 +1379,7 @@ defm V_RSQ_CLAMP_F64 : VOP1InstSI , "v_rsq_clamp_f64", // VINTRP Instructions //===----------------------------------------------------------------------===// -let Uses = [M0] in { +let Uses = [M0, EXEC] in { // FIXME: Specify SchedRW for VINTRP insturctions. @@ -1405,16 +1424,14 @@ defm V_INTERP_MOV_F32 : VINTRP_m < [(set f32:$dst, (AMDGPUinterp_mov (i32 imm:$src0), (i32 imm:$attr_chan), (i32 imm:$attr)))]>; -} // End Uses = [M0] +} // End Uses = [M0, EXEC] //===----------------------------------------------------------------------===// // VOP2 Instructions //===----------------------------------------------------------------------===// multiclass V_CNDMASK { - defm _e32 : VOP2_m < - op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins32, VOP_CNDMASK.Asm32, [], - name, name>; + defm _e32 : VOP2_m ; defm _e64 : VOP3_m < op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64, @@ -1500,34 +1517,32 @@ let isCommutable = 1 in { defm V_MADAK_F32 : VOP2MADK , "v_madak_f32">; } // End isCommutable = 1 -let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC +let isCommutable = 1 in { // No patterns so that the scalar instructions are always selected. // The scalar versions will be replaced with vector when needed later. // V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI, // but the VI instructions behave the same as the SI versions. defm V_ADD_I32 : VOP2bInst , "v_add_i32", - VOP_I32_I32_I32, add + VOP2b_I32_I1_I32_I32 >; -defm V_SUB_I32 : VOP2bInst , "v_sub_i32", VOP_I32_I32_I32>; +defm V_SUB_I32 : VOP2bInst , "v_sub_i32", VOP2b_I32_I1_I32_I32>; defm V_SUBREV_I32 : VOP2bInst , "v_subrev_i32", - VOP_I32_I32_I32, null_frag, "v_sub_i32" + VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32" >; -let Uses = [VCC] in { // Carry-in comes from VCC defm V_ADDC_U32 : VOP2bInst , "v_addc_u32", - VOP_I32_I32_I32_VCC + VOP2b_I32_I1_I32_I32_I1 >; defm V_SUBB_U32 : VOP2bInst , "v_subb_u32", - VOP_I32_I32_I32_VCC + VOP2b_I32_I1_I32_I32_I1 >; defm V_SUBBREV_U32 : VOP2bInst , "v_subbrev_u32", - VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32" + VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32" >; -} // End Uses = [VCC] -} // End isCommutable = 1, Defs = [VCC] +} // End isCommutable = 1 defm V_READLANE_B32 : VOP2SI_3VI_m < vop3 <0x001, 0x289>, @@ -1575,10 +1590,10 @@ defm V_BCNT_U32_B32 : VOP2_VI3_Inst , "v_bcnt_u32_b32", VOP_I32_I32_I32 >; defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst , "v_mbcnt_lo_u32_b32", - VOP_I32_I32_I32 + VOP_I32_I32_I32, int_amdgcn_mbcnt_lo >; defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst , "v_mbcnt_hi_u32_b32", - VOP_I32_I32_I32 + VOP_I32_I32_I32, int_amdgcn_mbcnt_hi >; defm V_LDEXP_F32 : VOP2_VI3_Inst , "v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp @@ -1704,15 +1719,15 @@ defm V_DIV_FIXUP_F32 : VOP3Inst < vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup >; -let SchedRW = [WriteDouble] in { +let SchedRW = [WriteDoubleAdd] in { defm V_DIV_FIXUP_F64 : VOP3Inst < vop3<0x160, 0x1df>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup >; -} // let SchedRW = [WriteDouble] +} // End SchedRW = [WriteDouble] -let SchedRW = [WriteDouble] in { +let SchedRW = [WriteDoubleAdd] in { let isCommutable = 1 in { defm V_ADD_F64 : VOP3Inst , "v_add_f64", @@ -1735,7 +1750,7 @@ defm V_LDEXP_F64 : VOP3Inst , "v_ldexp_f64", VOP_F64_F64_I32, AMDGPUldexp >; -} // let SchedRW = [WriteDouble] +} // let SchedRW = [WriteDoubleAdd] let isCommutable = 1, SchedRW = [WriteQuarterRate32] in { @@ -1756,16 +1771,21 @@ defm V_MUL_HI_I32 : VOP3Inst , "v_mul_hi_i32", } // isCommutable = 1, SchedRW = [WriteQuarterRate32] let SchedRW = [WriteFloatFMA, WriteSALU] in { -defm V_DIV_SCALE_F32 : VOP3b_32 , "v_div_scale_f32", []>; +defm V_DIV_SCALE_F32 : VOP3bInst , "v_div_scale_f32", + VOP3b_F32_I1_F32_F32_F32 +>; } let SchedRW = [WriteDouble, WriteSALU] in { // Double precision division pre-scale. -defm V_DIV_SCALE_F64 : VOP3b_64 , "v_div_scale_f64", []>; +defm V_DIV_SCALE_F64 : VOP3bInst , "v_div_scale_f64", + VOP3b_F64_I1_F64_F64_F64 +>; } // let SchedRW = [WriteDouble] -let isCommutable = 1, Uses = [VCC] in { +let isCommutable = 1, Uses = [VCC, EXEC] in { +let SchedRW = [WriteFloatFMA] in { // v_div_fmas_f32: // result = src0 * src1 + src2 // if (vcc) @@ -1774,6 +1794,7 @@ let isCommutable = 1, Uses = [VCC] in { defm V_DIV_FMAS_F32 : VOP3_VCC_Inst , "v_div_fmas_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fmas >; +} let SchedRW = [WriteDouble] in { // v_div_fmas_f64: @@ -1786,7 +1807,7 @@ defm V_DIV_FMAS_F64 : VOP3_VCC_Inst , "v_div_fmas_f64", >; } // End SchedRW = [WriteDouble] -} // End isCommutable = 1 +} // End isCommutable = 1, Uses = [VCC, EXEC] //def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>; //def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>; @@ -1835,13 +1856,13 @@ def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$dst), (ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", [] >; -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in { // 64-bit vector move instruction. This is mainly used by the SIFoldOperands // pass to enable folding of inline immediates. def V_MOV_B64_PSEUDO : InstSI <(outs VReg_64:$dst), (ins VSrc_64:$src0), "", []>; } // end let hasSideEffects = 0, mayLoad = 0, mayStore = 0 -let hasSideEffects = 1 in { +let hasSideEffects = 1, SALU = 1 in { def SGPR_USE : InstSI <(outs),(ins), "", []>; } @@ -1921,39 +1942,9 @@ def SI_KILL : InstSI < let Uses = [EXEC], Defs = [EXEC,VCC,M0] in { -//defm SI_ : RegisterLoadStore ; - -let UseNamedOperandTable = 1 in { - -def SI_RegisterLoad : InstSI < +class SI_INDIRECT_SRC : InstSI < (outs VGPR_32:$dst, SReg_64:$temp), - (ins FRAMEri32:$addr, i32imm:$chan), - "", [] -> { - let isRegisterLoad = 1; - let mayLoad = 1; -} - -class SIRegStore : InstSI < - outs, - (ins VGPR_32:$val, FRAMEri32:$addr, i32imm:$chan), - "", [] -> { - let isRegisterStore = 1; - let mayStore = 1; -} - -let usesCustomInserter = 1 in { -def SI_RegisterStorePseudo : SIRegStore<(outs)>; -} // End usesCustomInserter = 1 -def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>; - - -} // End UseNamedOperandTable = 1 - -def SI_INDIRECT_SRC : InstSI < - (outs VGPR_32:$dst, SReg_64:$temp), - (ins unknown:$src, VSrc_32:$idx, i32imm:$off), + (ins rc:$src, VSrc_32:$idx, i32imm:$off), "si_indirect_src $dst, $temp, $src, $idx, $off", [] >; @@ -1967,6 +1958,13 @@ class SI_INDIRECT_DST : InstSI < let Constraints = "$src = $dst"; } +// TODO: We can support indirect SGPR access. +def SI_INDIRECT_SRC_V1 : SI_INDIRECT_SRC; +def SI_INDIRECT_SRC_V2 : SI_INDIRECT_SRC; +def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC; +def SI_INDIRECT_SRC_V8 : SI_INDIRECT_SRC; +def SI_INDIRECT_SRC_V16 : SI_INDIRECT_SRC; + def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST; def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST; def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST; @@ -1977,19 +1975,24 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST; multiclass SI_SPILL_SGPR { - let UseNamedOperandTable = 1 in { + let UseNamedOperandTable = 1, Uses = [EXEC] in { def _SAVE : InstSI < (outs), - (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, - SReg_32:$scratch_offset), + (ins sgpr_class:$src, i32imm:$frame_idx), "", [] - >; + > { + let mayStore = 1; + let mayLoad = 0; + } def _RESTORE : InstSI < (outs sgpr_class:$dst), - (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), + (ins i32imm:$frame_idx), "", [] - >; + > { + let mayStore = 0; + let mayLoad = 1; + } } // End UseNamedOperandTable = 1 } @@ -2003,19 +2006,25 @@ defm SI_SPILL_S256 : SI_SPILL_SGPR ; defm SI_SPILL_S512 : SI_SPILL_SGPR ; multiclass SI_SPILL_VGPR { - let UseNamedOperandTable = 1, VGPRSpill = 1 in { + let UseNamedOperandTable = 1, VGPRSpill = 1, Uses = [EXEC] in { def _SAVE : InstSI < (outs), (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), "", [] - >; + > { + let mayStore = 1; + let mayLoad = 0; + } def _RESTORE : InstSI < (outs vgpr_class:$dst), (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), "", [] - >; + > { + let mayStore = 0; + let mayLoad = 1; + } } // End UseNamedOperandTable = 1, VGPRSpill = 1 } @@ -2030,9 +2039,11 @@ let Defs = [SCC] in { def SI_CONSTDATA_PTR : InstSI < (outs SReg_64:$dst), - (ins), - "", [(set SReg_64:$dst, (i64 SIconstdata_ptr))] ->; + (ins const_ga:$ptr), + "", [(set SReg_64:$dst, (i64 (SIconstdata_ptr (tglobaladdr:$ptr))))] +> { + let SALU = 1; +} } // End Defs = [SCC] @@ -2072,84 +2083,63 @@ def : Pat < // SMRD Patterns //===----------------------------------------------------------------------===// -multiclass SMRD_Pattern { +multiclass SMRD_Pattern { - // 1. SI-CI: Offset as 8bit DWORD immediate + // 1. IMM offset def : Pat < - (constant_load (add i64:$sbase, (i64 IMM8bitDWORD:$offset))), - (vt (Instr_IMM $sbase, (as_dword_i32imm $offset))) + (smrd_load (SMRDImm i64:$sbase, i32:$offset)), + (vt (!cast(Instr#"_IMM") $sbase, $offset)) >; - // 2. Offset loaded in an 32bit SGPR + // 2. SGPR offset def : Pat < - (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))), - (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset))))) + (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)), + (vt (!cast(Instr#"_SGPR") $sbase, $offset)) >; - // 3. No offset at all def : Pat < - (constant_load i64:$sbase), - (vt (Instr_IMM $sbase, 0)) - >; + (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), + (vt (!cast(Instr#"_IMM_ci") $sbase, $offset)) + > { + let Predicates = [isCIOnly]; + } } -multiclass SMRD_Pattern_vi { +// Global and constant loads can be selected to either MUBUF or SMRD +// instructions, but SMRD instructions are faster so we want the instruction +// selector to prefer those. +let AddedComplexity = 100 in { - // 1. VI: Offset as 20bit immediate in bytes - def : Pat < - (constant_load (add i64:$sbase, (i64 IMM20bit:$offset))), - (vt (Instr_IMM $sbase, (as_i32imm $offset))) - >; +defm : SMRD_Pattern <"S_LOAD_DWORD", i32>; +defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>; +defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>; +defm : SMRD_Pattern <"S_LOAD_DWORDX8", v32i8>; +defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>; +defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>; - // 2. Offset loaded in an 32bit SGPR - def : Pat < - (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))), - (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset))))) - >; - - // 3. No offset at all - def : Pat < - (constant_load i64:$sbase), - (vt (Instr_IMM $sbase, 0)) - >; -} - -let Predicates = [isSICI] in { -defm : SMRD_Pattern ; -defm : SMRD_Pattern ; -defm : SMRD_Pattern ; -defm : SMRD_Pattern ; -defm : SMRD_Pattern ; -defm : SMRD_Pattern ; -defm : SMRD_Pattern ; -} // End Predicates = [isSICI] - -let Predicates = [isVI] in { -defm : SMRD_Pattern_vi ; -defm : SMRD_Pattern_vi ; -defm : SMRD_Pattern_vi ; -defm : SMRD_Pattern_vi ; -defm : SMRD_Pattern_vi ; -defm : SMRD_Pattern_vi ; -defm : SMRD_Pattern_vi ; -} // End Predicates = [isVI] - -let Predicates = [isSICI] in { - -// 1. Offset as 8bit DWORD immediate +// 1. Offset as an immediate def : Pat < - (SIload_constant v4i32:$sbase, IMM8bitDWORD:$offset), - (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset)) + (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)), + (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset) >; -} // End Predicates = [isSICI] - // 2. Offset loaded in an 32bit SGPR def : Pat < - (SIload_constant v4i32:$sbase, imm:$offset), - (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset)) + (SIload_constant v4i32:$sbase, (SMRDBufferSgpr i32:$offset)), + (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset) >; +let Predicates = [isCI] in { + +def : Pat < + (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)), + (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset) +>; + +} // End Predicates = [isCI] + +} // End let AddedComplexity = 10000 + //===----------------------------------------------------------------------===// // SOP1 Patterns //===----------------------------------------------------------------------===// @@ -2161,6 +2151,11 @@ def : Pat < (S_MOV_B32 0), sub1)) >; +def : Pat < + (i32 (smax i32:$x, (i32 (ineg i32:$x)))), + (S_ABS_I32 $x) +>; + //===----------------------------------------------------------------------===// // SOP2 Patterns //===----------------------------------------------------------------------===// @@ -2488,6 +2483,11 @@ def : Pat < /********** Extraction, Insertion, Building and Casting **********/ /********** ============================================ **********/ +//def : Extract_Element; +//def : Extract_Element; +//def : Extract_Element; +//def : Extract_Element; + foreach Index = 0-2 in { def Extract_Element_v2i32_#Index : Extract_Element < i32, v2i32, Index, !cast(sub#Index) @@ -2568,11 +2568,25 @@ def : BitConvert ; def : BitConvert ; def : BitConvert ; def : BitConvert ; +def : BitConvert ; def : BitConvert ; +def : BitConvert ; def : BitConvert ; def : BitConvert ; def : BitConvert ; + +def : BitConvert ; +def : BitConvert ; + +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; + + + + def : BitConvert ; def : BitConvert ; def : BitConvert ; @@ -2601,10 +2615,9 @@ def : Pat < // Prevent expanding both fneg and fabs. -// FIXME: Should use S_OR_B32 def : Pat < (fneg (fabs f32:$src)), - (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */ + (S_OR_B32 $src, 0x80000000) /* Set sign bit */ >; // FIXME: Should use S_OR_B32 @@ -2836,10 +2849,6 @@ class DSAtomicRetPat : Pat < // -1. For the non-rtn variants, the manual says it does // DS[A] = (DS[A] >= D0) ? 0 : DS[A] + 1, and setting D0 to uint_max // will always do the increment so I'm assuming it's the same. -// -// We also load this -1 with s_mov_b32 / s_mov_b64 even though this -// needs to be a VGPR. The SGPR copy pass will fix this, and it's -// easier since there is no v_mov_b64. class DSAtomicIncRetPat : Pat < (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)), @@ -2855,9 +2864,9 @@ class DSAtomicCmpXChg : Pat < // 32-bit atomics. def : DSAtomicIncRetPat; + V_MOV_B32_e32, si_atomic_load_add_local>; def : DSAtomicIncRetPat; + V_MOV_B32_e32, si_atomic_load_sub_local>; def : DSAtomicRetPat; def : DSAtomicRetPat; @@ -2874,9 +2883,9 @@ def : DSAtomicCmpXChg; // 64-bit atomics. def : DSAtomicIncRetPat; + V_MOV_B64_PSEUDO, si_atomic_load_add_local>; def : DSAtomicIncRetPat; + V_MOV_B64_PSEUDO, si_atomic_load_sub_local>; def : DSAtomicRetPat; def : DSAtomicRetPat; @@ -3019,90 +3028,46 @@ def : MTBUF_StoreResource ; def : MTBUF_StoreResource ; def : MTBUF_StoreResource ; -let SubtargetPredicate = isCI in { - -defm V_QSAD_PK_U16_U8 : VOP3Inst , "v_qsad_pk_u16_u8", - VOP_I32_I32_I32 ->; -defm V_MQSAD_U16_U8 : VOP3Inst , "v_mqsad_u16_u8", - VOP_I32_I32_I32 ->; -defm V_MQSAD_U32_U8 : VOP3Inst , "v_mqsad_u32_u8", - VOP_I32_I32_I32 ->; - -let isCommutable = 1 in { -defm V_MAD_U64_U32 : VOP3Inst , "v_mad_u64_u32", - VOP_I64_I32_I32_I64 ->; - -// XXX - Does this set VCC? -defm V_MAD_I64_I32 : VOP3Inst , "v_mad_i64_i32", - VOP_I64_I32_I32_I64 ->; -} // End isCommutable = 1 - -// Remaining instructions: -// FLAT_* -// S_CBRANCH_CDBGUSER -// S_CBRANCH_CDBGSYS -// S_CBRANCH_CDBGSYS_OR_USER -// S_CBRANCH_CDBGSYS_AND_USER -// S_DCACHE_INV_VOL -// DS_NOP -// DS_GWS_SEMA_RELEASE_ALL -// DS_WRAP_RTN_B32 -// DS_CNDXCHG32_RTN_B64 -// DS_WRITE_B96 -// DS_WRITE_B128 -// DS_CONDXCHG32_RTN_B128 -// DS_READ_B96 -// DS_READ_B128 -// BUFFER_LOAD_DWORDX3 -// BUFFER_STORE_DWORDX3 - -} // End isCI - /********** ====================== **********/ /********** Indirect adressing **********/ /********** ====================== **********/ -multiclass SI_INDIRECT_Pattern { +multiclass SI_INDIRECT_Pattern { // 1. Extract with offset def : Pat< - (eltvt (vector_extract vt:$vec, (add i32:$idx, imm:$off))), - (SI_INDIRECT_SRC $vec, $idx, imm:$off) + (eltvt (extractelt vt:$vec, (add i32:$idx, imm:$off))), + (!cast("SI_INDIRECT_SRC_"#VecSize) $vec, $idx, imm:$off) >; // 2. Extract without offset def : Pat< - (eltvt (vector_extract vt:$vec, i32:$idx)), - (SI_INDIRECT_SRC $vec, $idx, 0) + (eltvt (extractelt vt:$vec, i32:$idx)), + (!cast("SI_INDIRECT_SRC_"#VecSize) $vec, $idx, 0) >; // 3. Insert with offset def : Pat< - (vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)), - (IndDst $vec, $idx, imm:$off, $val) + (insertelt vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)), + (!cast("SI_INDIRECT_DST_"#VecSize) $vec, $idx, imm:$off, $val) >; // 4. Insert without offset def : Pat< - (vector_insert vt:$vec, eltvt:$val, i32:$idx), - (IndDst $vec, $idx, 0, $val) + (insertelt vt:$vec, eltvt:$val, i32:$idx), + (!cast("SI_INDIRECT_DST_"#VecSize) $vec, $idx, 0, $val) >; } -defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; //===----------------------------------------------------------------------===// // Conversion Patterns @@ -3215,12 +3180,12 @@ def : Pat < def : Pat < (i1 (trunc i32:$a)), - (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1) + (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1), $a), 1) >; def : Pat < (i1 (trunc i64:$a)), - (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), + (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1), (EXTRACT_SUBREG $a, sub0)), 1) >; @@ -3301,24 +3266,6 @@ def : Pat < } // End Predicates = [isSI] -let Predicates = [isCI] in { - -// Convert (x - floor(x)) to fract(x) -def : Pat < - (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)), - (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))), - (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) ->; - -// Convert (x + (-floor(x))) to fract(x) -def : Pat < - (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)), - (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))), - (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) ->; - -} // End Predicates = [isCI] - //============================================================================// // Miscellaneous Optimization Patterns //============================================================================// diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp index c319b32111fe..126f6245dfc0 100644 --- a/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -103,6 +103,10 @@ public: return "SI Lower control flow instructions"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } }; } // End anonymous namespace @@ -140,8 +144,7 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { DebugLoc DL = From.getDebugLoc(); BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) - .addOperand(To) - .addReg(AMDGPU::EXEC); + .addOperand(To); } void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) { @@ -159,8 +162,7 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) { // If the exec mask is non-zero, skip the next two instructions BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) - .addImm(3) - .addReg(AMDGPU::EXEC); + .addImm(3); // Exec mask is zero: Export to NULL target... BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP)) @@ -269,8 +271,7 @@ void SILowerControlFlowPass::Loop(MachineInstr &MI) { .addReg(Src); BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) - .addOperand(MI.getOperand(1)) - .addReg(AMDGPU::EXEC); + .addOperand(MI.getOperand(1)); MI.eraseFromParent(); } @@ -316,7 +317,7 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) { .addImm(0); } } else { - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC) + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32)) .addImm(0) .addOperand(Op); } @@ -362,9 +363,9 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int .addReg(AMDGPU::VCC_LO); // Compare the just read M0 value to all possible Idx values - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC) - .addReg(AMDGPU::M0) - .addReg(Idx); + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32)) + .addReg(AMDGPU::M0) + .addReg(Idx); // Update EXEC, save the original EXEC value to VCC BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) @@ -385,8 +386,7 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) - .addImm(-7) - .addReg(AMDGPU::EXEC); + .addImm(-7); // Restore EXEC BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) @@ -438,7 +438,6 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) { MachineInstr *MovRel = BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) .addReg(Reg) - .addReg(AMDGPU::M0, RegState::Implicit) .addReg(Vec, RegState::Implicit); LoadM0(MI, MovRel, Off); @@ -460,7 +459,6 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32)) .addReg(Reg, RegState::Define) .addReg(Val) - .addReg(AMDGPU::M0, RegState::Implicit) .addReg(Dst, RegState::Implicit); LoadM0(MI, MovRel, Off); @@ -486,11 +484,11 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { Next = std::next(I); MachineInstr &MI = *I; - if (TII->isWQM(MI.getOpcode()) || TII->isDS(MI.getOpcode())) + if (TII->isWQM(MI) || TII->isDS(MI)) NeedWQM = true; // Flat uses m0 in case it needs to access LDS. - if (TII->isFLAT(MI.getOpcode())) + if (TII->isFLAT(MI)) NeedFlat = true; switch (MI.getOpcode()) { @@ -541,7 +539,11 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { Branch(MI); break; - case AMDGPU::SI_INDIRECT_SRC: + case AMDGPU::SI_INDIRECT_SRC_V1: + case AMDGPU::SI_INDIRECT_SRC_V2: + case AMDGPU::SI_INDIRECT_SRC_V4: + case AMDGPU::SI_INDIRECT_SRC_V8: + case AMDGPU::SI_INDIRECT_SRC_V16: IndirectSrc(MI); break; diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp index 67421e231d8d..a2fa5fd93aad 100644 --- a/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -48,6 +48,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addPreserved(); AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 587ea63d6796..935aad427198 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -29,10 +29,114 @@ void SIMachineFunctionInfo::anchor() {} SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), TIDReg(AMDGPU::NoRegister), - HasSpilledVGPRs(false), + ScratchRSrcReg(AMDGPU::NoRegister), + ScratchWaveOffsetReg(AMDGPU::NoRegister), + PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), + DispatchPtrUserSGPR(AMDGPU::NoRegister), + QueuePtrUserSGPR(AMDGPU::NoRegister), + KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), + DispatchIDUserSGPR(AMDGPU::NoRegister), + FlatScratchInitUserSGPR(AMDGPU::NoRegister), + PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), + GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), + GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), + GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), + WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), + WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), + WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), + WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), + PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), + LDSWaveSpillSize(0), PSInputAddr(0), NumUserSGPRs(0), - LDSWaveSpillSize(0) { } + NumSystemSGPRs(0), + HasSpilledSGPRs(false), + HasSpilledVGPRs(false), + PrivateSegmentBuffer(false), + DispatchPtr(false), + QueuePtr(false), + DispatchID(false), + KernargSegmentPtr(false), + FlatScratchInit(false), + GridWorkgroupCountX(false), + GridWorkgroupCountY(false), + GridWorkgroupCountZ(false), + WorkGroupIDX(true), + WorkGroupIDY(false), + WorkGroupIDZ(false), + WorkGroupInfo(false), + PrivateSegmentWaveByteOffset(false), + WorkItemIDX(true), + WorkItemIDY(false), + WorkItemIDZ(false) { + const AMDGPUSubtarget &ST = MF.getSubtarget(); + const Function *F = MF.getFunction(); + + const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); + + if (getShaderType() == ShaderType::COMPUTE) + KernargSegmentPtr = true; + + if (F->hasFnAttribute("amdgpu-work-group-id-y")) + WorkGroupIDY = true; + + if (F->hasFnAttribute("amdgpu-work-group-id-z")) + WorkGroupIDZ = true; + + if (F->hasFnAttribute("amdgpu-work-item-id-y")) + WorkItemIDY = true; + + if (F->hasFnAttribute("amdgpu-work-item-id-z")) + WorkItemIDZ = true; + + bool MaySpill = ST.isVGPRSpillingEnabled(this); + bool HasStackObjects = FrameInfo->hasStackObjects(); + + if (HasStackObjects || MaySpill) + PrivateSegmentWaveByteOffset = true; + + if (ST.isAmdHsaOS()) { + if (HasStackObjects || MaySpill) + PrivateSegmentBuffer = true; + + if (F->hasFnAttribute("amdgpu-dispatch-ptr")) + DispatchPtr = true; + } + + // X, XY, and XYZ are the only supported combinations, so make sure Y is + // enabled if Z is. + if (WorkItemIDZ) + WorkItemIDY = true; +} + +unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( + const SIRegisterInfo &TRI) { + PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); + NumUserSGPRs += 4; + return PrivateSegmentBufferUserSGPR; +} + +unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { + DispatchPtrUserSGPR = TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + NumUserSGPRs += 2; + return DispatchPtrUserSGPR; +} + +unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { + QueuePtrUserSGPR = TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + NumUserSGPRs += 2; + return QueuePtrUserSGPR; +} + +unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { + KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + NumUserSGPRs += 2; + return KernargSegmentPtrUserSGPR; +} SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg( MachineFunction *MF, @@ -53,7 +157,6 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg( if (!LaneVGPRs.count(LaneVGPRIdx)) { unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass); LaneVGPRs[LaneVGPRIdx] = LaneVGPR; - MRI.setPhysRegUsed(LaneVGPR); // Add this register as live-in to all blocks to avoid machine verifer // complaining about use of an undefined physical register. diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 667da4c8af61..9c528d63bd0e 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -26,13 +26,83 @@ class MachineRegisterInfo; /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which /// tells the hardware which interpolation parameters to load. class SIMachineFunctionInfo : public AMDGPUMachineFunction { + // FIXME: This should be removed and getPreloadedValue moved here. + friend struct SIRegisterInfo; void anchor() override; unsigned TIDReg; - bool HasSpilledVGPRs; + + // Registers that may be reserved for spilling purposes. These may be the same + // as the input registers. + unsigned ScratchRSrcReg; + unsigned ScratchWaveOffsetReg; + + // Input registers setup for the HSA ABI. + // User SGPRs in allocation order. + unsigned PrivateSegmentBufferUserSGPR; + unsigned DispatchPtrUserSGPR; + unsigned QueuePtrUserSGPR; + unsigned KernargSegmentPtrUserSGPR; + unsigned DispatchIDUserSGPR; + unsigned FlatScratchInitUserSGPR; + unsigned PrivateSegmentSizeUserSGPR; + unsigned GridWorkGroupCountXUserSGPR; + unsigned GridWorkGroupCountYUserSGPR; + unsigned GridWorkGroupCountZUserSGPR; + + // System SGPRs in allocation order. + unsigned WorkGroupIDXSystemSGPR; + unsigned WorkGroupIDYSystemSGPR; + unsigned WorkGroupIDZSystemSGPR; + unsigned WorkGroupInfoSystemSGPR; + unsigned PrivateSegmentWaveByteOffsetSystemSGPR; public: + // FIXME: Make private + unsigned LDSWaveSpillSize; + unsigned PSInputAddr; + std::map LaneVGPRs; + unsigned ScratchOffsetReg; + unsigned NumUserSGPRs; + unsigned NumSystemSGPRs; +private: + bool HasSpilledSGPRs; + bool HasSpilledVGPRs; + + // Feature bits required for inputs passed in user SGPRs. + bool PrivateSegmentBuffer : 1; + bool DispatchPtr : 1; + bool QueuePtr : 1; + bool DispatchID : 1; + bool KernargSegmentPtr : 1; + bool FlatScratchInit : 1; + bool GridWorkgroupCountX : 1; + bool GridWorkgroupCountY : 1; + bool GridWorkgroupCountZ : 1; + + // Feature bits required for inputs passed in system SGPRs. + bool WorkGroupIDX : 1; // Always initialized. + bool WorkGroupIDY : 1; + bool WorkGroupIDZ : 1; + bool WorkGroupInfo : 1; + bool PrivateSegmentWaveByteOffset : 1; + + bool WorkItemIDX : 1; // Always initialized. + bool WorkItemIDY : 1; + bool WorkItemIDZ : 1; + + + MCPhysReg getNextUserSGPR() const { + assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); + return AMDGPU::SGPR0 + NumUserSGPRs; + } + + MCPhysReg getNextSystemSGPR() const { + return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; + } + +public: struct SpilledReg { unsigned VGPR; int Lane; @@ -46,16 +116,162 @@ public: SIMachineFunctionInfo(const MachineFunction &MF); SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex, unsigned SubIdx); - unsigned PSInputAddr; - unsigned NumUserSGPRs; - std::map LaneVGPRs; - unsigned LDSWaveSpillSize; - unsigned ScratchOffsetReg; bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }; unsigned getTIDReg() const { return TIDReg; }; void setTIDReg(unsigned Reg) { TIDReg = Reg; } - bool hasSpilledVGPRs() const { return HasSpilledVGPRs; } - void setHasSpilledVGPRs(bool Spill = true) { HasSpilledVGPRs = Spill; } + + // Add user SGPRs. + unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); + unsigned addDispatchPtr(const SIRegisterInfo &TRI); + unsigned addQueuePtr(const SIRegisterInfo &TRI); + unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); + + // Add system SGPRs. + unsigned addWorkGroupIDX() { + WorkGroupIDXSystemSGPR = getNextSystemSGPR(); + NumSystemSGPRs += 1; + return WorkGroupIDXSystemSGPR; + } + + unsigned addWorkGroupIDY() { + WorkGroupIDYSystemSGPR = getNextSystemSGPR(); + NumSystemSGPRs += 1; + return WorkGroupIDYSystemSGPR; + } + + unsigned addWorkGroupIDZ() { + WorkGroupIDZSystemSGPR = getNextSystemSGPR(); + NumSystemSGPRs += 1; + return WorkGroupIDZSystemSGPR; + } + + unsigned addWorkGroupInfo() { + WorkGroupInfoSystemSGPR = getNextSystemSGPR(); + NumSystemSGPRs += 1; + return WorkGroupInfoSystemSGPR; + } + + unsigned addPrivateSegmentWaveByteOffset() { + PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR(); + NumSystemSGPRs += 1; + return PrivateSegmentWaveByteOffsetSystemSGPR; + } + + bool hasPrivateSegmentBuffer() const { + return PrivateSegmentBuffer; + } + + bool hasDispatchPtr() const { + return DispatchPtr; + } + + bool hasQueuePtr() const { + return QueuePtr; + } + + bool hasDispatchID() const { + return DispatchID; + } + + bool hasKernargSegmentPtr() const { + return KernargSegmentPtr; + } + + bool hasFlatScratchInit() const { + return FlatScratchInit; + } + + bool hasGridWorkgroupCountX() const { + return GridWorkgroupCountX; + } + + bool hasGridWorkgroupCountY() const { + return GridWorkgroupCountY; + } + + bool hasGridWorkgroupCountZ() const { + return GridWorkgroupCountZ; + } + + bool hasWorkGroupIDX() const { + return WorkGroupIDX; + } + + bool hasWorkGroupIDY() const { + return WorkGroupIDY; + } + + bool hasWorkGroupIDZ() const { + return WorkGroupIDZ; + } + + bool hasWorkGroupInfo() const { + return WorkGroupInfo; + } + + bool hasPrivateSegmentWaveByteOffset() const { + return PrivateSegmentWaveByteOffset; + } + + bool hasWorkItemIDX() const { + return WorkItemIDX; + } + + bool hasWorkItemIDY() const { + return WorkItemIDY; + } + + bool hasWorkItemIDZ() const { + return WorkItemIDZ; + } + + unsigned getNumUserSGPRs() const { + return NumUserSGPRs; + } + + unsigned getNumPreloadedSGPRs() const { + return NumUserSGPRs + NumSystemSGPRs; + } + + unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { + return PrivateSegmentWaveByteOffsetSystemSGPR; + } + + /// \brief Returns the physical register reserved for use as the resource + /// descriptor for scratch accesses. + unsigned getScratchRSrcReg() const { + return ScratchRSrcReg; + } + + void setScratchRSrcReg(unsigned Reg) { + assert(Reg != AMDGPU::NoRegister && "Should never be unset"); + ScratchRSrcReg = Reg; + } + + unsigned getScratchWaveOffsetReg() const { + return ScratchWaveOffsetReg; + } + + void setScratchWaveOffsetReg(unsigned Reg) { + assert(Reg != AMDGPU::NoRegister && "Should never be unset"); + ScratchWaveOffsetReg = Reg; + } + + bool hasSpilledSGPRs() const { + return HasSpilledSGPRs; + } + + void setHasSpilledSGPRs(bool Spill = true) { + HasSpilledSGPRs = Spill; + } + + bool hasSpilledVGPRs() const { + return HasSpilledVGPRs; + } + + void setHasSpilledVGPRs(bool Spill = true) { + HasSpilledVGPRs = Spill; + } unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const; }; diff --git a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp b/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp deleted file mode 100644 index 2cd600df2268..000000000000 --- a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp +++ /dev/null @@ -1,193 +0,0 @@ -//===-- SIPrepareScratchRegs.cpp - Use predicates for control flow --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// -/// This pass loads scratch pointer and scratch offset into a register or a -/// frame index which can be used anywhere in the program. These values will -/// be used for spilling VGPRs. -/// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "AMDGPUSubtarget.h" -#include "SIDefines.h" -#include "SIInstrInfo.h" -#include "SIMachineFunctionInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" - -using namespace llvm; - -namespace { - -class SIPrepareScratchRegs : public MachineFunctionPass { - -private: - static char ID; - -public: - SIPrepareScratchRegs() : MachineFunctionPass(ID) { } - - bool runOnMachineFunction(MachineFunction &MF) override; - - const char *getPassName() const override { - return "SI prepare scratch registers"; - } - -}; - -} // End anonymous namespace - -char SIPrepareScratchRegs::ID = 0; - -FunctionPass *llvm::createSIPrepareScratchRegs() { - return new SIPrepareScratchRegs(); -} - -bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { - SIMachineFunctionInfo *MFI = MF.getInfo(); - const SIInstrInfo *TII = - static_cast(MF.getSubtarget().getInstrInfo()); - const SIRegisterInfo *TRI = &TII->getRegisterInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - MachineBasicBlock *Entry = MF.begin(); - MachineBasicBlock::iterator I = Entry->begin(); - DebugLoc DL = I->getDebugLoc(); - - // FIXME: If we don't have enough VGPRs for SGPR spilling we will need to - // run this pass. - if (!MFI->hasSpilledVGPRs()) - return false; - - unsigned ScratchPtrPreloadReg = - TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR); - unsigned ScratchOffsetPreloadReg = - TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET); - - if (!Entry->isLiveIn(ScratchPtrPreloadReg)) - Entry->addLiveIn(ScratchPtrPreloadReg); - - if (!Entry->isLiveIn(ScratchOffsetPreloadReg)) - Entry->addLiveIn(ScratchOffsetPreloadReg); - - // Load the scratch offset. - unsigned ScratchOffsetReg = - TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass); - int ScratchOffsetFI = -1; - - if (ScratchOffsetReg != AMDGPU::NoRegister) { - // Found an SGPR to use - MRI.setPhysRegUsed(ScratchOffsetReg); - BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg) - .addReg(ScratchOffsetPreloadReg); - } else { - // No SGPR is available, we must spill. - ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4); - BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE)) - .addReg(ScratchOffsetPreloadReg) - .addFrameIndex(ScratchOffsetFI) - .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) - .addReg(AMDGPU::SGPR0, RegState::Undef); - } - - - // Now that we have the scratch pointer and offset values, we need to - // add them to all the SI_SPILL_V* instructions. - - RegScavenger RS; - unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4); - RS.addScavengingFrameIndex(ScratchRsrcFI); - - for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); - BI != BE; ++BI) { - - MachineBasicBlock &MBB = *BI; - // Add the scratch offset reg as a live-in so that the register scavenger - // doesn't re-use it. - if (!MBB.isLiveIn(ScratchOffsetReg) && - ScratchOffsetReg != AMDGPU::NoRegister) - MBB.addLiveIn(ScratchOffsetReg); - RS.enterBasicBlock(&MBB); - - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - MachineInstr &MI = *I; - RS.forward(I); - DebugLoc DL = MI.getDebugLoc(); - if (!TII->isVGPRSpill(MI.getOpcode())) - continue; - - // Scratch resource - unsigned ScratchRsrcReg = - RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0); - - uint64_t Rsrc23 = TII->getScratchRsrcWords23(); - - unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); - unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); - unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); - unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); - - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0) - .addExternalSymbol("SCRATCH_RSRC_DWORD0") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1) - .addExternalSymbol("SCRATCH_RSRC_DWORD1") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2) - .addImm(Rsrc23 & 0xffffffff) - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3) - .addImm(Rsrc23 >> 32) - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - - // Scratch Offset - if (ScratchOffsetReg == AMDGPU::NoRegister) { - ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); - BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE), - ScratchOffsetReg) - .addFrameIndex(ScratchOffsetFI) - .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) - .addReg(AMDGPU::SGPR0, RegState::Undef); - } else if (!MBB.isLiveIn(ScratchOffsetReg)) { - MBB.addLiveIn(ScratchOffsetReg); - } - - if (ScratchRsrcReg == AMDGPU::NoRegister || - ScratchOffsetReg == AMDGPU::NoRegister) { - LLVMContext &Ctx = MF.getFunction()->getContext(); - Ctx.emitError("ran out of SGPRs for spilling VGPRs"); - ScratchRsrcReg = AMDGPU::SGPR0; - ScratchOffsetReg = AMDGPU::SGPR0; - } - MI.getOperand(2).setReg(ScratchRsrcReg); - MI.getOperand(2).setIsKill(true); - MI.getOperand(2).setIsUndef(false); - MI.getOperand(3).setReg(ScratchOffsetReg); - MI.getOperand(3).setIsUndef(false); - MI.getOperand(3).setIsKill(false); - MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true)); - MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true)); - MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true)); - MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true)); - } - } - return true; -} diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index e9e8412e263d..3cdffef05583 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// - #include "SIRegisterInfo.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" @@ -33,6 +32,40 @@ void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) co Reserved.set(*R); } +unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( + const MachineFunction &MF) const { + const AMDGPUSubtarget &ST = MF.getSubtarget(); + if (ST.hasSGPRInitBug()) { + unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4; + unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); + return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); + } + + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + // 98/99 need to be reserved for flat_scr, and 100/101 for vcc. This is the + // next sgpr128 down. + return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95; + } + + return AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99; +} + +unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( + const MachineFunction &MF) const { + const AMDGPUSubtarget &ST = MF.getSubtarget(); + if (ST.hasSGPRInitBug()) { + unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5; + return AMDGPU::SGPR_32RegClass.getRegister(Idx); + } + + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + // Next register before reservations for flat_scr and vcc. + return AMDGPU::SGPR97; + } + + return AMDGPU::SGPR95; +} + BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); @@ -42,13 +75,22 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, AMDGPU::EXEC); reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); - // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs - reserveRegisterTuples(Reserved, AMDGPU::VGPR254); - reserveRegisterTuples(Reserved, AMDGPU::VGPR255); + // Reserve the last 2 registers so we will always have at least 2 more that + // will physically contain VCC. + reserveRegisterTuples(Reserved, AMDGPU::SGPR102_SGPR103); + + const AMDGPUSubtarget &ST = MF.getSubtarget(); + + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + // SI/CI have 104 SGPRs. VI has 102. We need to shift down the reservation + // for VCC/FLAT_SCR. + reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99); + reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101); + } // Tonga and Iceland can only allocate a fixed number of SGPRs due // to a hw bug. - if (MF.getSubtarget().hasSGPRInitBug()) { + if (ST.hasSGPRInitBug()) { unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs). // Assume XNACK_MASK is unused. @@ -60,34 +102,57 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { } } + const SIMachineFunctionInfo *MFI = MF.getInfo(); + + unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); + if (ScratchWaveOffsetReg != AMDGPU::NoRegister) { + // Reserve 1 SGPR for scratch wave offset in case we need to spill. + reserveRegisterTuples(Reserved, ScratchWaveOffsetReg); + } + + unsigned ScratchRSrcReg = MFI->getScratchRSrcReg(); + if (ScratchRSrcReg != AMDGPU::NoRegister) { + // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need + // to spill. + // TODO: May need to reserve a VGPR if doing LDS spilling. + reserveRegisterTuples(Reserved, ScratchRSrcReg); + assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg)); + } + return Reserved; } unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const { - const AMDGPUSubtarget &STI = MF.getSubtarget(); // FIXME: We should adjust the max number of waves based on LDS size. unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(), STI.getMaxWavesPerCU()); unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU()); + unsigned VSLimit = SGPRLimit + VGPRLimit; + for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; - unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1); + unsigned NumSubRegs = std::max((int)RC->getSize() / 4, 1); unsigned Limit; - if (isSGPRClass(*I)) { + if (isPseudoRegClass(RC)) { + // FIXME: This is a hack. We should never be considering the pressure of + // these since no virtual register should ever have this class. + Limit = VSLimit; + } else if (isSGPRClass(RC)) { Limit = SGPRLimit / NumSubRegs; } else { Limit = VGPRLimit / NumSubRegs; } - const int *Sets = getRegClassPressureSets(*I); + const int *Sets = getRegClassPressureSets(RC); assert(Sets); for (unsigned i = 0; Sets[i] != -1; ++i) { - if (Sets[i] == (int)Idx) + if (Sets[i] == (int)Idx) return Limit; } } @@ -174,17 +239,17 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, unsigned SubReg = NumSubRegs > 1 ? getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) : Value; - bool IsKill = (i == e - 1); BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) - .addReg(SubReg, getDefRegState(IsLoad)) - .addReg(ScratchRsrcReg, getKillRegState(IsKill)) - .addReg(SOffset) - .addImm(Offset) - .addImm(0) // glc - .addImm(0) // slc - .addImm(0) // tfe - .addReg(Value, RegState::Implicit | getDefRegState(IsLoad)); + .addReg(SubReg, getDefRegState(IsLoad)) + .addReg(ScratchRsrcReg) + .addReg(SOffset) + .addImm(Offset) + .addImm(0) // glc + .addImm(0) // slc + .addImm(0) // tfe + .addReg(Value, RegState::Implicit | getDefRegState(IsLoad)) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); } } @@ -228,6 +293,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, .addReg(SubReg) .addImm(Spill.Lane); + // FIXME: Since this spills to another register instead of an actual + // frame index, we should delete the frame index when all references to + // it are fixed. } MI->eraseFromParent(); break; @@ -263,16 +331,17 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, // TODO: only do this when it is needed switch (MF->getSubtarget().getGeneration()) { case AMDGPUSubtarget::SOUTHERN_ISLANDS: - // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI - TII->insertNOPs(MI, 3); + // "VALU writes SGPR" -> "SMRD reads that SGPR" needs 4 wait states + // ("S_NOP 3") on SI + TII->insertWaitStates(MI, 4); break; case AMDGPUSubtarget::SEA_ISLANDS: break; default: // VOLCANIC_ISLANDS and later - // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI - // and later. This also applies to VALUs which write VCC, but we're - // unlikely to see VMEM use VCC. - TII->insertNOPs(MI, 4); + // "VALU writes SGPR -> VMEM reads that SGPR" needs 5 wait states + // ("S_NOP 4") on VI and later. This also applies to VALUs which write + // VCC, but we're unlikely to see VMEM use VCC. + TII->insertWaitStates(MI, 5); } MI->eraseFromParent(); @@ -322,22 +391,16 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } } -const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass( - MVT VT) const { - switch(VT.SimpleTy) { - default: - case MVT::i32: return &AMDGPU::VGPR_32RegClass; - } -} - unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const { return getEncodingValue(Reg) & 0xff; } +// FIXME: This is very slow. It might be worth creating a map from physreg to +// register class. const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { assert(!TargetRegisterInfo::isVirtualRegister(Reg)); - static const TargetRegisterClass *BaseClasses[] = { + static const TargetRegisterClass *const BaseClasses[] = { &AMDGPU::VGPR_32RegClass, &AMDGPU::SReg_32RegClass, &AMDGPU::VReg_64RegClass, @@ -359,33 +422,45 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { return nullptr; } +// TODO: It might be helpful to have some target specific flags in +// TargetRegisterClass to mark which classes are VGPRs to make this trivial. bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { - return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) || - getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) || - getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) || - getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) || - getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) || - getCommonSubClass(&AMDGPU::VReg_512RegClass, RC); + switch (RC->getSize()) { + case 4: + return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr; + case 8: + return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr; + case 12: + return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr; + case 16: + return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr; + case 32: + return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr; + case 64: + return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr; + default: + llvm_unreachable("Invalid register class size"); + } } const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( const TargetRegisterClass *SRC) const { - if (hasVGPRs(SRC)) { - return SRC; - } else if (SRC == &AMDGPU::SCCRegRegClass) { - return &AMDGPU::VCCRegRegClass; - } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) { - return &AMDGPU::VGPR_32RegClass; - } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) { - return &AMDGPU::VReg_64RegClass; - } else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) { - return &AMDGPU::VReg_128RegClass; - } else if (getCommonSubClass(SRC, &AMDGPU::SReg_256RegClass)) { - return &AMDGPU::VReg_256RegClass; - } else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) { - return &AMDGPU::VReg_512RegClass; - } - return nullptr; + switch (SRC->getSize()) { + case 4: + return &AMDGPU::VGPR_32RegClass; + case 8: + return &AMDGPU::VReg_64RegClass; + case 12: + return &AMDGPU::VReg_96RegClass; + case 16: + return &AMDGPU::VReg_128RegClass; + case 32: + return &AMDGPU::VReg_256RegClass; + case 64: + return &AMDGPU::VReg_512RegClass; + default: + llvm_unreachable("Invalid register class size"); + } } const TargetRegisterClass *SIRegisterInfo::getSubRegClass( @@ -402,6 +477,30 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass( } } +bool SIRegisterInfo::shouldRewriteCopySrc( + const TargetRegisterClass *DefRC, + unsigned DefSubReg, + const TargetRegisterClass *SrcRC, + unsigned SrcSubReg) const { + // We want to prefer the smallest register class possible, so we don't want to + // stop and rewrite on anything that looks like a subregister + // extract. Operations mostly don't care about the super register class, so we + // only want to stop on the most basic of copies between the smae register + // class. + // + // e.g. if we have something like + // vreg0 = ... + // vreg1 = ... + // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2 + // vreg3 = COPY vreg2, sub0 + // + // We want to look through the COPY to find: + // => vreg3 = COPY vreg0 + + // Plain copy. + return getCommonSubClass(DefRC, SrcRC) != nullptr; +} + unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg, const TargetRegisterClass *SubRC, unsigned Channel) const { @@ -462,30 +561,47 @@ bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const { return OpType == AMDGPU::OPERAND_REG_INLINE_C; } +// FIXME: Most of these are flexible with HSA and we don't need to reserve them +// as input registers if unused. Whether the dispatch ptr is necessary should be +// easy to detect from used intrinsics. Scratch setup is harder to know. unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, enum PreloadedValue Value) const { const SIMachineFunctionInfo *MFI = MF.getInfo(); + const AMDGPUSubtarget &ST = MF.getSubtarget(); + (void)ST; switch (Value) { - case SIRegisterInfo::TGID_X: - return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 0); - case SIRegisterInfo::TGID_Y: - return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 1); - case SIRegisterInfo::TGID_Z: - return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2); - case SIRegisterInfo::SCRATCH_WAVE_OFFSET: - if (MFI->getShaderType() != ShaderType::COMPUTE) - return MFI->ScratchOffsetReg; - return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4); - case SIRegisterInfo::SCRATCH_PTR: - return AMDGPU::SGPR2_SGPR3; - case SIRegisterInfo::INPUT_PTR: - return AMDGPU::SGPR0_SGPR1; - case SIRegisterInfo::TIDIG_X: + case SIRegisterInfo::WORKGROUP_ID_X: + assert(MFI->hasWorkGroupIDX()); + return MFI->WorkGroupIDXSystemSGPR; + case SIRegisterInfo::WORKGROUP_ID_Y: + assert(MFI->hasWorkGroupIDY()); + return MFI->WorkGroupIDYSystemSGPR; + case SIRegisterInfo::WORKGROUP_ID_Z: + assert(MFI->hasWorkGroupIDZ()); + return MFI->WorkGroupIDZSystemSGPR; + case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: + return MFI->PrivateSegmentWaveByteOffsetSystemSGPR; + case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER: + assert(ST.isAmdHsaOS() && "Non-HSA ABI currently uses relocations"); + assert(MFI->hasPrivateSegmentBuffer()); + return MFI->PrivateSegmentBufferUserSGPR; + case SIRegisterInfo::KERNARG_SEGMENT_PTR: + assert(MFI->hasKernargSegmentPtr()); + return MFI->KernargSegmentPtrUserSGPR; + case SIRegisterInfo::DISPATCH_PTR: + assert(MFI->hasDispatchPtr()); + return MFI->DispatchPtrUserSGPR; + case SIRegisterInfo::QUEUE_PTR: + llvm_unreachable("not implemented"); + case SIRegisterInfo::WORKITEM_ID_X: + assert(MFI->hasWorkItemIDX()); return AMDGPU::VGPR0; - case SIRegisterInfo::TIDIG_Y: + case SIRegisterInfo::WORKITEM_ID_Y: + assert(MFI->hasWorkItemIDY()); return AMDGPU::VGPR1; - case SIRegisterInfo::TIDIG_Z: + case SIRegisterInfo::WORKITEM_ID_Z: + assert(MFI->hasWorkItemIDZ()); return AMDGPU::VGPR2; } llvm_unreachable("unexpected preloaded value type"); @@ -496,12 +612,9 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, // AMDGPU::NoRegister. unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC) const { - - for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); - I != E; ++I) { - if (!MRI.isPhysRegUsed(*I)) - return *I; - } + for (unsigned Reg : *RC) + if (!MRI.isPhysRegUsed(Reg)) + return Reg; return AMDGPU::NoRegister; } diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index 7da6de282c11..1795237c2140 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -18,6 +18,7 @@ #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" namespace llvm { @@ -29,6 +30,15 @@ private: public: SIRegisterInfo(); + /// Return the end register initially reserved for the scratch buffer in case + /// spilling is needed. + unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const; + + /// Return the end register initially reserved for the scratch wave offset in + /// case spilling is needed. + unsigned reservedPrivateSegmentWaveByteOffsetReg( + const MachineFunction &MF) const; + BitVector getReservedRegs(const MachineFunction &MF) const override; unsigned getRegPressureSetLimit(const MachineFunction &MF, @@ -40,10 +50,6 @@ public: unsigned FIOperandNum, RegScavenger *RS) const override; - /// \brief get the register class of the specified type to use in the - /// CFGStructurizer - const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override; - unsigned getHWRegIndex(unsigned Reg) const override; /// \brief Return the 'base' register class for this register. @@ -52,23 +58,30 @@ public: /// \returns true if this class contains only SGPR registers bool isSGPRClass(const TargetRegisterClass *RC) const { - if (!RC) - return false; - return !hasVGPRs(RC); } /// \returns true if this class ID contains only SGPR registers bool isSGPRClassID(unsigned RCID) const { - if (static_cast(RCID) == -1) - return false; - return isSGPRClass(getRegClass(RCID)); } + bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const { + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return isSGPRClass(MRI.getRegClass(Reg)); + return getPhysRegClass(Reg); + } + /// \returns true if this class contains VGPR registers. bool hasVGPRs(const TargetRegisterClass *RC) const; + /// returns true if this is a pseudoregister class combination of VGPRs and + /// SGPRs for operand modeling. FIXME: We should set isAllocatable = 0 on + /// them. + static bool isPseudoRegClass(const TargetRegisterClass *RC) { + return RC == &AMDGPU::VS_32RegClass || RC == &AMDGPU::VS_64RegClass; + } + /// \returns A VGPR reg class with the same width as \p SRC const TargetRegisterClass *getEquivalentVGPRClass( const TargetRegisterClass *SRC) const; @@ -79,6 +92,11 @@ public: const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const; + bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, + unsigned DefSubReg, + const TargetRegisterClass *SrcRC, + unsigned SrcSubReg) const override; + /// \p Channel This is the register channel (e.g. a value from 0-16), not the /// SubReg index. /// \returns The sub-register of Reg that is in Channel. @@ -91,19 +109,25 @@ public: /// \returns True if operands defined with this operand type can accept /// an inline constant. i.e. An integer value in the range (-16, 64) or - /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f. + /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f. bool opCanUseInlineConstant(unsigned OpType) const; enum PreloadedValue { - TGID_X, - TGID_Y, - TGID_Z, - SCRATCH_WAVE_OFFSET, - SCRATCH_PTR, - INPUT_PTR, - TIDIG_X, - TIDIG_Y, - TIDIG_Z + // SGPRS: + PRIVATE_SEGMENT_BUFFER = 0, + DISPATCH_PTR = 1, + QUEUE_PTR = 2, + KERNARG_SEGMENT_PTR = 3, + WORKGROUP_ID_X = 10, + WORKGROUP_ID_Y = 11, + WORKGROUP_ID_Z = 12, + PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14, + + // VGPRS: + FIRST_VGPR_VALUE = 15, + WORKITEM_ID_X = FIRST_VGPR_VALUE, + WORKITEM_ID_Y = 16, + WORKITEM_ID_Z = 17 }; /// \brief Returns the physical register that \p Value is stored in. diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td index 2a9017fa2a98..bfaf93709d8c 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/lib/Target/AMDGPU/SIRegisterInfo.td @@ -10,10 +10,13 @@ //===----------------------------------------------------------------------===// // Declarations that describe the SI registers //===----------------------------------------------------------------------===// - -class SIReg encoding = 0> : Register { +class SIReg regIdx = 0> : Register, + DwarfRegNum<[!cast(HWEncoding)]> { let Namespace = "AMDGPU"; - let HWEncoding = encoding; + + // This is the not yet the complete register encoding. An additional + // bit is set for VGPRs. + let HWEncoding = regIdx; } // Special Registers @@ -21,7 +24,8 @@ def VCC_LO : SIReg<"vcc_lo", 106>; def VCC_HI : SIReg<"vcc_hi", 107>; // VCC for 64-bit instructions -def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> { +def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>, + DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; let HWEncoding = 106; @@ -30,7 +34,8 @@ def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> { def EXEC_LO : SIReg<"exec_lo", 126>; def EXEC_HI : SIReg<"exec_hi", 127>; -def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]> { +def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>, + DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; let HWEncoding = 126; @@ -39,18 +44,29 @@ def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]> { def SCC : SIReg<"scc", 253>; def M0 : SIReg <"m0", 124>; -def FLAT_SCR_LO : SIReg<"flat_scr_lo", 104>; // Offset in units of 256-bytes. -def FLAT_SCR_HI : SIReg<"flat_scr_hi", 105>; // Size is the per-thread scratch size, in bytes. - -// Pair to indicate location of scratch space for flat accesses. -def FLAT_SCR : RegisterWithSubRegs <"flat_scr", [FLAT_SCR_LO, FLAT_SCR_HI]> { - let Namespace = "AMDGPU"; - let SubRegIndices = [sub0, sub1]; - let HWEncoding = 104; +multiclass FLAT_SCR_LOHI_m ci_e, bits<16> vi_e> { + def _ci : SIReg; + def _vi : SIReg; + def "" : SIReg<"", 0>; } +class FlatReg encoding> : + RegisterWithSubRegs<"flat_scratch", [lo, hi]>, + DwarfRegAlias { + let Namespace = "AMDGPU"; + let SubRegIndices = [sub0, sub1]; + let HWEncoding = encoding; +} + +defm FLAT_SCR_LO : FLAT_SCR_LOHI_m<"flat_scratch_lo", 104, 102>; // Offset in units of 256-bytes. +defm FLAT_SCR_HI : FLAT_SCR_LOHI_m<"flat_scratch_hi", 105, 103>; // Size is the per-thread scratch size, in bytes. + +def FLAT_SCR_ci : FlatReg; +def FLAT_SCR_vi : FlatReg; +def FLAT_SCR : FlatReg; + // SGPR registers -foreach Index = 0-101 in { +foreach Index = 0-103 in { def SGPR#Index : SIReg <"SGPR"#Index, Index>; } @@ -65,25 +81,27 @@ foreach Index = 0-255 in { // Groupings using register classes and tuples //===----------------------------------------------------------------------===// +// TODO: Do we need to set DwarfRegAlias on register tuples? + // SGPR 32-bit registers def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32, - (add (sequence "SGPR%u", 0, 101))>; + (add (sequence "SGPR%u", 0, 103))>; // SGPR 64-bit registers def SGPR_64Regs : RegisterTuples<[sub0, sub1], - [(add (decimate (trunc SGPR_32, 101), 2)), + [(add (decimate SGPR_32, 2)), (add (decimate (shl SGPR_32, 1), 2))]>; // SGPR 128-bit registers def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], - [(add (decimate (trunc SGPR_32, 99), 4)), + [(add (decimate SGPR_32, 4)), (add (decimate (shl SGPR_32, 1), 4)), (add (decimate (shl SGPR_32, 2), 4)), (add (decimate (shl SGPR_32, 3), 4))]>; // SGPR 256-bit registers def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], - [(add (decimate (trunc SGPR_32, 95), 4)), + [(add (decimate SGPR_32, 4)), (add (decimate (shl SGPR_32, 1), 4)), (add (decimate (shl SGPR_32, 2), 4)), (add (decimate (shl SGPR_32, 3), 4)), @@ -95,7 +113,7 @@ def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], // SGPR 512-bit registers def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], - [(add (decimate (trunc SGPR_32, 87), 4)), + [(add (decimate SGPR_32, 4)), (add (decimate (shl SGPR_32, 1), 4)), (add (decimate (shl SGPR_32, 2), 4)), (add (decimate (shl SGPR_32, 3), 4)), @@ -174,44 +192,57 @@ class RegImmMatcher : AsmOperandClass { let RenderMethod = "addRegOrImmOperands"; } -// Special register classes for predicates and the M0 register -def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> { - let CopyCost = -1; // Theoretically it is possible to read from SCC, - // but it should never be necessary. -} - -def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>; -def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>; - // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI) >; -def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 64, (add SGPR_64Regs)>; +def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)>; -def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 64, - (add SGPR_64, VCCReg, EXECReg, FLAT_SCR) +def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32, + (add SGPR_64, VCC, EXEC, FLAT_SCR) >; -def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>; - -def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>; - -def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 512, (add SGPR_512)>; - -// Register class for all vector registers (VGPRs + Interploation Registers) -def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>; - -def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> { - let Size = 96; +def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128)> { + // Requires 2 s_mov_b64 to copy + let CopyCost = 2; } -def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>; +def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 32, (add SGPR_256)> { + // Requires 4 s_mov_b64 to copy + let CopyCost = 4; +} -def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>; +def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 32, (add SGPR_512)> { + // Requires 8 s_mov_b64 to copy + let CopyCost = 8; +} -def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>; +// Register class for all vector registers (VGPRs + Interploation Registers) +def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 32, (add VGPR_64)> { + // Requires 2 v_mov_b32 to copy + let CopyCost = 2; +} + +def VReg_96 : RegisterClass<"AMDGPU", [untyped], 32, (add VGPR_96)> { + let Size = 96; + + // Requires 3 v_mov_b32 to copy + let CopyCost = 3; +} + +def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> { + // Requires 4 v_mov_b32 to copy + let CopyCost = 4; +} + +def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 32, (add VGPR_256)> { + let CopyCost = 8; +} + +def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> { + let CopyCost = 16; +} def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> { let Size = 32; @@ -253,7 +284,9 @@ def SCSrc_32 : RegInlineOperand { def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>; -def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>; +def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> { + let CopyCost = 2; +} def VSrc_32 : RegisterOperand { let OperandNamespace = "AMDGPU"; @@ -282,3 +315,13 @@ def VCSrc_64 : RegisterOperand { let OperandType = "OPERAND_REG_INLINE_C"; let ParserMatchClass = RegImmMatcher<"VCSrc64">; } + +//===----------------------------------------------------------------------===// +// SCSrc_* Operands with an SGPR or an inline constant +//===----------------------------------------------------------------------===// + +def SCSrc_64 : RegisterOperand { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_REG_INLINE_C"; + let ParserMatchClass = RegImmMatcher<"SCSrc64">; +} diff --git a/lib/Target/AMDGPU/SISchedule.td b/lib/Target/AMDGPU/SISchedule.td index 9b1f676020bf..cd77e519abb2 100644 --- a/lib/Target/AMDGPU/SISchedule.td +++ b/lib/Target/AMDGPU/SISchedule.td @@ -17,16 +17,28 @@ def WriteLDS : SchedWrite; def WriteSALU : SchedWrite; def WriteSMEM : SchedWrite; def WriteVMEM : SchedWrite; +def WriteBarrier : SchedWrite; // Vector ALU instructions def Write32Bit : SchedWrite; def WriteQuarterRate32 : SchedWrite; +def WriteFullOrQuarterRate32 : SchedWrite; def WriteFloatFMA : SchedWrite; -def WriteDouble : SchedWrite; +// Slow quarter rate f64 instruction. +def WriteDouble : SchedWrite; + +// half rate f64 instruction (same as v_add_f64) def WriteDoubleAdd : SchedWrite; +// Half rate 64-bit instructions. +def Write64Bit : SchedWrite; + +// FIXME: Should there be a class for instructions which are VALU +// instructions and have VALU rates, but write to the SALU (i.e. VOPC +// instructions) + def SIFullSpeedModel : SchedMachineModel; def SIQuarterSpeedModel : SchedMachineModel; @@ -53,7 +65,7 @@ class HWVALUWriteRes : // The latency numbers are taken from AMD Accelerated Parallel Processing -// guide. They may not be acurate. +// guide. They may not be accurate. // The latency values are 1 / (operations / cycle) / 4. multiclass SICommonWriteRes { @@ -64,8 +76,10 @@ multiclass SICommonWriteRes { def : HWWriteRes; def : HWWriteRes; // XXX: Guessed ??? def : HWWriteRes; // 300 - 600 + def : HWWriteRes; // XXX: Guessed ??? def : HWVALUWriteRes; + def : HWVALUWriteRes; def : HWVALUWriteRes; } diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 5d00bdd6a9bb..4f0913fe62f2 100644 --- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -141,8 +141,7 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, if (!MRI.isSSA()) return; - assert(TII->isVOP1(MI.getOpcode()) || TII->isVOP2(MI.getOpcode()) || - TII->isVOPC(MI.getOpcode())); + assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI)); const SIRegisterInfo &TRI = TII->getRegisterInfo(); int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); @@ -187,6 +186,21 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, } +// Copy MachineOperand with all flags except setting it as implicit. +static MachineOperand copyRegOperandAsImplicit(const MachineOperand &Orig) { + assert(!Orig.isImplicit()); + return MachineOperand::CreateReg(Orig.getReg(), + Orig.isDef(), + true, + Orig.isKill(), + Orig.isDead(), + Orig.isUndef(), + Orig.isEarlyClobber(), + Orig.getSubReg(), + Orig.isDebug(), + Orig.isInternalRead()); +} + bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = @@ -236,14 +250,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (TII->isVOPC(Op32)) { unsigned DstReg = MI.getOperand(0).getReg(); if (TargetRegisterInfo::isVirtualRegister(DstReg)) { - // VOPC instructions can only write to the VCC register. We can't - // force them to use VCC here, because the register allocator has - // trouble with sequences like this, which cause the allocator to run - // out of registers if vreg0 and vreg1 belong to the VCCReg register - // class: - // vreg0 = VOPC; - // vreg1 = VOPC; - // S_AND_B64 vreg0, vreg1 + // VOPC instructions can only write to the VCC register. We can't + // force them to use VCC here, because this is only one register and + // cannot deal with sequences which would require multiple copies of + // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...) // // So, instead of forcing the instruction to write to VCC, we provide // a hint to the register allocator to use VCC and then we we will run @@ -272,13 +282,22 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { } // We can shrink this instruction - DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';); + DEBUG(dbgs() << "Shrinking " << MI); MachineInstrBuilder Inst32 = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32)); - // dst - Inst32.addOperand(MI.getOperand(0)); + // Add the dst operand if the 32-bit encoding also has an explicit $dst. + // For VOPC instructions, this is replaced by an implicit def of vcc. + int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::dst); + if (Op32DstIdx != -1) { + // dst + Inst32.addOperand(MI.getOperand(0)); + } else { + assert(MI.getOperand(0).getReg() == AMDGPU::VCC && + "Unexpected case"); + } + Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0)); @@ -288,9 +307,19 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { Inst32.addOperand(*Src1); const MachineOperand *Src2 = - TII->getNamedOperand(MI, AMDGPU::OpName::src2); - if (Src2) - Inst32.addOperand(*Src2); + TII->getNamedOperand(MI, AMDGPU::OpName::src2); + if (Src2) { + int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2); + if (Op32Src2Idx != -1) { + Inst32.addOperand(*Src2); + } else { + // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is + // replaced with an implicit read of vcc. + assert(Src2->getReg() == AMDGPU::VCC && + "Unexpected missing register operand"); + Inst32.addOperand(copyRegOperandAsImplicit(*Src2)); + } + } ++NumInstructionsShrunk; MI.eraseFromParent(); diff --git a/lib/Target/AMDGPU/SITypeRewriter.cpp b/lib/Target/AMDGPU/SITypeRewriter.cpp index 591ce857cc7d..dbdc76b917f3 100644 --- a/lib/Target/AMDGPU/SITypeRewriter.cpp +++ b/lib/Target/AMDGPU/SITypeRewriter.cpp @@ -22,6 +22,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -61,14 +62,7 @@ bool SITypeRewriter::doInitialization(Module &M) { } bool SITypeRewriter::runOnFunction(Function &F) { - Attribute A = F.getFnAttribute("ShaderType"); - - unsigned ShaderType = ShaderType::COMPUTE; - if (A.isStringAttribute()) { - StringRef Str = A.getValueAsString(); - Str.getAsInteger(0, ShaderType); - } - if (ShaderType == ShaderType::COMPUTE) + if (AMDGPU::getShaderType(F) == ShaderType::COMPUTE) return false; visit(F); diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index b76b4007003f..add415e215cf 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -7,12 +7,23 @@ // //===----------------------------------------------------------------------===// #include "AMDGPUBaseInfo.h" +#include "AMDGPU.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" #define GET_SUBTARGETINFO_ENUM #include "AMDGPUGenSubtargetInfo.inc" #undef GET_SUBTARGETINFO_ENUM +#define GET_REGINFO_ENUM +#include "AMDGPUGenRegisterInfo.inc" +#undef GET_REGINFO_ENUM + namespace llvm { namespace AMDGPU { @@ -56,5 +67,91 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, Header.private_segment_alignment = 4; } +MCSection *getHSATextSection(MCContext &Ctx) { + return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE | + ELF::SHF_EXECINSTR | + ELF::SHF_AMDGPU_HSA_AGENT | + ELF::SHF_AMDGPU_HSA_CODE); +} + +MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) { + return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE | + ELF::SHF_AMDGPU_HSA_GLOBAL | + ELF::SHF_AMDGPU_HSA_AGENT); +} + +MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) { + return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE | + ELF::SHF_AMDGPU_HSA_GLOBAL); +} + +MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) { + return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY | + ELF::SHF_AMDGPU_HSA_AGENT); +} + +bool isGroupSegment(const GlobalValue *GV) { + return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +} + +bool isGlobalSegment(const GlobalValue *GV) { + return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; +} + +bool isReadOnlySegment(const GlobalValue *GV) { + return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; +} + +static const char ShaderTypeAttribute[] = "ShaderType"; + +unsigned getShaderType(const Function &F) { + Attribute A = F.getFnAttribute(ShaderTypeAttribute); + unsigned ShaderType = ShaderType::COMPUTE; + + if (A.isStringAttribute()) { + StringRef Str = A.getValueAsString(); + if (Str.getAsInteger(0, ShaderType)) { + LLVMContext &Ctx = F.getContext(); + Ctx.emitError("can't parse shader type"); + } + } + return ShaderType; +} + +bool isSI(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; +} + +bool isCI(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; +} + +bool isVI(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; +} + +unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { + + switch(Reg) { + default: break; + case AMDGPU::FLAT_SCR: + assert(!isSI(STI)); + return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi; + + case AMDGPU::FLAT_SCR_LO: + assert(!isSI(STI)); + return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi; + + case AMDGPU::FLAT_SCR_HI: + assert(!isSI(STI)); + return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi; + } + return Reg; +} + } // End namespace AMDGPU } // End namespace llvm diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index f57028cc5bfd..19419a29f5e0 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -15,6 +15,11 @@ namespace llvm { class FeatureBitset; +class Function; +class GlobalValue; +class MCContext; +class MCSection; +class MCSubtargetInfo; namespace AMDGPU { @@ -27,6 +32,27 @@ struct IsaVersion { IsaVersion getIsaVersion(const FeatureBitset &Features); void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features); +MCSection *getHSATextSection(MCContext &Ctx); + +MCSection *getHSADataGlobalAgentSection(MCContext &Ctx); + +MCSection *getHSADataGlobalProgramSection(MCContext &Ctx); + +MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx); + +bool isGroupSegment(const GlobalValue *GV); +bool isGlobalSegment(const GlobalValue *GV); +bool isReadOnlySegment(const GlobalValue *GV); + +unsigned getShaderType(const Function &F); + +bool isSI(const MCSubtargetInfo &STI); +bool isCI(const MCSubtargetInfo &STI); +bool isVI(const MCSubtargetInfo &STI); + +/// If \p Reg is a pseudo reg, return the correct hardware register given +/// \p STI otherwise return \p Reg. +unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); } // end namespace AMDGPU } // end namespace llvm diff --git a/lib/Target/AMDGPU/Utils/LLVMBuild.txt b/lib/Target/AMDGPU/Utils/LLVMBuild.txt index dec5360e3bc7..2453bc546b99 100644 --- a/lib/Target/AMDGPU/Utils/LLVMBuild.txt +++ b/lib/Target/AMDGPU/Utils/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = AMDGPUUtils parent = AMDGPU -required_libraries = Support +required_libraries = Core MC Support add_to_library_groups = AMDGPU diff --git a/lib/Target/AMDGPU/VIInstructions.td b/lib/Target/AMDGPU/VIInstructions.td index aca46732adb9..20a026a822e2 100644 --- a/lib/Target/AMDGPU/VIInstructions.td +++ b/lib/Target/AMDGPU/VIInstructions.td @@ -73,8 +73,8 @@ defm V_MIN_I16 : VOP2Inst , "v_min_i16", VOP_I16_I16_I16>; } // End isCommutable = 1 defm V_LDEXP_F16 : VOP2Inst , "v_ldexp_f16", VOP_F16_F16_I16>; -// Aliases to simplify matching of floating-pint instructions that are VOP2 on -// SI and VOP3 on VI. +// Aliases to simplify matching of floating-point instructions that +// are VOP2 on SI and VOP3 on VI. class SI2_VI3Alias : InstAlias < name#" $dst, $src0, $src1", @@ -89,60 +89,15 @@ def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; +//===----------------------------------------------------------------------===// +// SMEM Instructions +//===----------------------------------------------------------------------===// + +def S_DCACHE_WB : SMEM_Inval <0x21, + "s_dcache_wb", int_amdgcn_s_dcache_wb>; + +def S_DCACHE_WB_VOL : SMEM_Inval <0x23, + "s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>; + } // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI -//===----------------------------------------------------------------------===// -// SMEM Patterns -//===----------------------------------------------------------------------===// - -let Predicates = [isVI] in { - -// 1. Offset as 20bit DWORD immediate -def : Pat < - (SIload_constant v4i32:$sbase, IMM20bit:$offset), - (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset)) ->; - -// Patterns for global loads with no offset -class FlatLoadPat : Pat < - (vt (node i64:$addr)), - (inst $addr, 0, 0, 0) ->; - -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; - -class FlatStorePat : Pat < - (node vt:$data, i64:$addr), - (inst $data, $addr, 0, 0, 0) ->; - -def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; - -class FlatAtomicPat : Pat < - (vt (node i64:$addr, vt:$data)), - (inst $addr, $data, 0, 0) ->; - -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; -def : FlatAtomicPat ; - - -} // End Predicates = [isVI] diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 9550a3a3cad1..cd7540e52410 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -35,7 +35,6 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, FunctionPass *createA15SDOptimizerPass(); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); -FunctionPass *createARMGlobalBaseRegPass(); FunctionPass *createARMConstantIslandPass(); FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index ef609a66d032..a44dc830a673 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -16,6 +16,17 @@ include "llvm/Target/Target.td" +//===----------------------------------------------------------------------===// +// ARM Helper classes. +// + +class ProcNoItin Features> + : Processor; + +class Architecture features > + : SubtargetFeature; + //===----------------------------------------------------------------------===// // ARM Subtarget state. // @@ -51,8 +62,11 @@ def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", "Enable ARMv8 FP", [FeatureVFP4]>; +def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", + "Enable full half-precision floating point", + [FeatureFPARMv8]>; def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", - "Restrict VFP3 to 16 double registers">; + "Restrict FP to 16 double registers">; def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", "Enable divide instructions">; def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", @@ -119,9 +133,9 @@ def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true", "Has return address stack">; -/// Some M architectures don't have the DSP extension (v7E-M vs. v7M) -def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true", - "Supports v7 DSP instructions in Thumb2">; +/// DSP extension. +def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", + "Supports DSP instructions in ARM and/or Thumb2">; // Multiprocessing extension. def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", @@ -150,11 +164,28 @@ def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass", def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", "NaCl trap">; +def FeatureStrictAlign : SubtargetFeature<"strict-align", + "StrictAlign", "true", + "Disallow all unaligned memory " + "access">; + def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", "Generate calls via indirect call " "instructions">; -// ARM ISAs. +def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", + "Reserve R9, making it unavailable as " + "GPR">; + +def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", + "Don't use movt/movw pairs for 32-bit " + "imms">; + + +//===----------------------------------------------------------------------===// +// ARM ISAa. +// + def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", "Support ARM v4T instructions">; def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true", @@ -180,302 +211,444 @@ def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", [HasV6T2Ops, FeaturePerfMon]>; def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", "Support ARM v8 instructions", - [HasV7Ops, FeatureVirtualization, - FeatureMP]>; + [HasV7Ops]>; def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", "Support ARM v8.1a instructions", - [HasV8Ops, FeatureAClass, FeatureCRC]>; + [HasV8Ops]>; +def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", + "Support ARM v8.2a instructions", + [HasV8_1aOps]>; + //===----------------------------------------------------------------------===// -// ARM Processors supported. +// ARM Processor subtarget features. +// + +def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", + "Cortex-A5 ARM processors", []>; +def ProcA7 : SubtargetFeature<"a7", "ARMProcFamily", "CortexA7", + "Cortex-A7 ARM processors", []>; +def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", + "Cortex-A8 ARM processors", []>; +def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", + "Cortex-A9 ARM processors", []>; +def ProcA12 : SubtargetFeature<"a12", "ARMProcFamily", "CortexA12", + "Cortex-A12 ARM processors", []>; +def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", + "Cortex-A15 ARM processors", []>; +def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17", + "Cortex-A17 ARM processors", []>; +def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", + "Cortex-A35 ARM processors", []>; +def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", + "Cortex-A53 ARM processors", []>; +def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", + "Cortex-A57 ARM processors", []>; +def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", + "Cortex-A72 ARM processors", []>; + +def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", + "Qualcomm ARM processors", []>; +def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", + "Swift ARM processors", []>; + + +def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4", + "Cortex-R4 ARM processors", []>; +def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", + "Cortex-R5 ARM processors", []>; +def ProcR7 : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7", + "Cortex-R7 ARM processors", []>; + + +//===----------------------------------------------------------------------===// +// ARM schedules. // include "ARMSchedule.td" -// ARM processor families. -def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", - "Cortex-A5 ARM processors", - [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, - FeatureVMLxForwarding, FeatureT2XtPk, - FeatureTrustZone, FeatureMP]>; -def ProcA7 : SubtargetFeature<"a7", "ARMProcFamily", "CortexA7", - "Cortex-A7 ARM processors", - [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, - FeatureVMLxForwarding, FeatureT2XtPk, - FeatureVFP4, FeatureMP, - FeatureHWDiv, FeatureHWDivARM, - FeatureTrustZone, FeatureVirtualization]>; -def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", - "Cortex-A8 ARM processors", - [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, - FeatureVMLxForwarding, FeatureT2XtPk, - FeatureTrustZone]>; -def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", - "Cortex-A9 ARM processors", - [FeatureVMLxForwarding, - FeatureT2XtPk, FeatureFP16, - FeatureAvoidPartialCPSR, - FeatureTrustZone]>; -def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", - "Swift ARM processors", - [FeatureNEONForFP, FeatureT2XtPk, - FeatureVFP4, FeatureMP, FeatureHWDiv, - FeatureHWDivARM, FeatureAvoidPartialCPSR, - FeatureAvoidMOVsShOp, - FeatureHasSlowFPVMLx, FeatureTrustZone]>; -def ProcA12 : SubtargetFeature<"a12", "ARMProcFamily", "CortexA12", - "Cortex-A12 ARM processors", - [FeatureVMLxForwarding, - FeatureT2XtPk, FeatureVFP4, - FeatureHWDiv, FeatureHWDivARM, - FeatureAvoidPartialCPSR, - FeatureVirtualization, - FeatureTrustZone]>; + +//===----------------------------------------------------------------------===// +// ARM architectures +// + +def ARMv2 : Architecture<"armv2", "ARMv2", []>; + +def ARMv2a : Architecture<"armv2a", "ARMv2a", []>; + +def ARMv3 : Architecture<"armv3", "ARMv3", []>; + +def ARMv3m : Architecture<"armv3m", "ARMv3m", []>; + +def ARMv4 : Architecture<"armv4", "ARMv4", []>; + +def ARMv4t : Architecture<"armv4t", "ARMv4t", [HasV4TOps]>; + +def ARMv5t : Architecture<"armv5t", "ARMv5t", [HasV5TOps]>; + +def ARMv5te : Architecture<"armv5te", "ARMv5te", [HasV5TEOps]>; + +def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>; + +def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops]>; + +def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops, + FeatureDSP]>; + +def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>; + +def ARMv6kz : Architecture<"armv6kz", "ARMv6kz", [HasV6KOps, + FeatureTrustZone]>; + +def ARMv6m : Architecture<"armv6-m", "ARMv6m", [HasV6MOps, + FeatureNoARM, + FeatureDB, + FeatureMClass]>; + +def ARMv6sm : Architecture<"armv6s-m", "ARMv6sm", [HasV6MOps, + FeatureNoARM, + FeatureDB, + FeatureMClass]>; + +def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops, + FeatureNEON, + FeatureDB, + FeatureDSP, + FeatureAClass]>; + +def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops, + FeatureDB, + FeatureDSP, + FeatureHWDiv, + FeatureRClass]>; + +def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops, + FeatureThumb2, + FeatureNoARM, + FeatureDB, + FeatureHWDiv, + FeatureMClass]>; + +def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops, + FeatureThumb2, + FeatureNoARM, + FeatureDB, + FeatureHWDiv, + FeatureMClass, + FeatureDSP, + FeatureT2XtPk]>; + +def ARMv8a : Architecture<"armv8-a", "ARMv8a", [HasV8Ops, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC]>; + +def ARMv81a : Architecture<"armv8.1-a", "ARMv81a", [HasV8_1aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC]>; + +def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC]>; + +// Aliases +def IWMMXT : Architecture<"iwmmxt", "ARMv5te", [ARMv5te]>; +def IWMMXT2 : Architecture<"iwmmxt2", "ARMv5te", [ARMv5te]>; +def XScale : Architecture<"xscale", "ARMv5te", [ARMv5te]>; +def ARMv6j : Architecture<"armv6j", "ARMv7a", [ARMv6]>; +def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>; +def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>; -// FIXME: It has not been determined if A15 has these features. -def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", - "Cortex-A15 ARM processors", - [FeatureT2XtPk, FeatureVFP4, - FeatureMP, FeatureHWDiv, FeatureHWDivARM, - FeatureAvoidPartialCPSR, - FeatureTrustZone, FeatureVirtualization]>; +//===----------------------------------------------------------------------===// +// ARM processors +// -def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17", - "Cortex-A17 ARM processors", - [FeatureVMLxForwarding, - FeatureT2XtPk, FeatureVFP4, - FeatureHWDiv, FeatureHWDivARM, - FeatureAvoidPartialCPSR, - FeatureVirtualization, - FeatureTrustZone]>; +// Dummy CPU, used to target architectures +def : ProcNoItin<"generic", []>; -def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", - "Cortex-A53 ARM processors", - [FeatureHWDiv, FeatureHWDivARM, - FeatureTrustZone, FeatureT2XtPk, - FeatureCrypto, FeatureCRC]>; +def : ProcNoItin<"arm8", [ARMv4]>; +def : ProcNoItin<"arm810", [ARMv4]>; +def : ProcNoItin<"strongarm", [ARMv4]>; +def : ProcNoItin<"strongarm110", [ARMv4]>; +def : ProcNoItin<"strongarm1100", [ARMv4]>; +def : ProcNoItin<"strongarm1110", [ARMv4]>; -def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", - "Cortex-A57 ARM processors", - [FeatureHWDiv, FeatureHWDivARM, - FeatureTrustZone, FeatureT2XtPk, - FeatureCrypto, FeatureCRC]>; +def : ProcNoItin<"arm7tdmi", [ARMv4t]>; +def : ProcNoItin<"arm7tdmi-s", [ARMv4t]>; +def : ProcNoItin<"arm710t", [ARMv4t]>; +def : ProcNoItin<"arm720t", [ARMv4t]>; +def : ProcNoItin<"arm9", [ARMv4t]>; +def : ProcNoItin<"arm9tdmi", [ARMv4t]>; +def : ProcNoItin<"arm920", [ARMv4t]>; +def : ProcNoItin<"arm920t", [ARMv4t]>; +def : ProcNoItin<"arm922t", [ARMv4t]>; +def : ProcNoItin<"arm940t", [ARMv4t]>; +def : ProcNoItin<"ep9312", [ARMv4t]>; -def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4", - "Cortex-R4 ARM processors", - [FeatureHWDiv, - FeatureAvoidPartialCPSR, - FeatureDSPThumb2, FeatureT2XtPk, - HasV7Ops, FeatureDB, FeatureHasRAS, - FeatureRClass]>; +def : ProcNoItin<"arm10tdmi", [ARMv5t]>; +def : ProcNoItin<"arm1020t", [ARMv5t]>; -def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", - "Cortex-R5 ARM processors", - [FeatureSlowFPBrcc, - FeatureHWDiv, FeatureHWDivARM, - FeatureHasSlowFPVMLx, - FeatureAvoidPartialCPSR, - FeatureT2XtPk]>; +def : ProcNoItin<"arm9e", [ARMv5te]>; +def : ProcNoItin<"arm926ej-s", [ARMv5te]>; +def : ProcNoItin<"arm946e-s", [ARMv5te]>; +def : ProcNoItin<"arm966e-s", [ARMv5te]>; +def : ProcNoItin<"arm968e-s", [ARMv5te]>; +def : ProcNoItin<"arm10e", [ARMv5te]>; +def : ProcNoItin<"arm1020e", [ARMv5te]>; +def : ProcNoItin<"arm1022e", [ARMv5te]>; +def : ProcNoItin<"xscale", [ARMv5te]>; +def : ProcNoItin<"iwmmxt", [ARMv5te]>; -// FIXME: krait has currently the same features as A9 -// plus VFP4 and hardware division features. -def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", - "Qualcomm ARM processors", - [FeatureVMLxForwarding, - FeatureT2XtPk, FeatureFP16, - FeatureAvoidPartialCPSR, - FeatureTrustZone, - FeatureVFP4, - FeatureHWDiv, - FeatureHWDivARM]>; +def : Processor<"arm1136j-s", ARMV6Itineraries, [ARMv6]>; +def : Processor<"arm1136jf-s", ARMV6Itineraries, [ARMv6, + FeatureVFP2, + FeatureHasSlowFPVMLx]>; +def : Processor<"cortex-m0", ARMV6Itineraries, [ARMv6m]>; +def : Processor<"cortex-m0plus", ARMV6Itineraries, [ARMv6m]>; +def : Processor<"cortex-m1", ARMV6Itineraries, [ARMv6m]>; +def : Processor<"sc000", ARMV6Itineraries, [ARMv6m]>; -class ProcNoItin Features> - : Processor; +def : Processor<"arm1176jz-s", ARMV6Itineraries, [ARMv6kz]>; +def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ARMv6kz, + FeatureVFP2, + FeatureHasSlowFPVMLx]>; -// V4 Processors. -def : ProcNoItin<"generic", []>; -def : ProcNoItin<"arm8", []>; -def : ProcNoItin<"arm810", []>; -def : ProcNoItin<"strongarm", []>; -def : ProcNoItin<"strongarm110", []>; -def : ProcNoItin<"strongarm1100", []>; -def : ProcNoItin<"strongarm1110", []>; +def : Processor<"mpcorenovfp", ARMV6Itineraries, [ARMv6k]>; +def : Processor<"mpcore", ARMV6Itineraries, [ARMv6k, + FeatureVFP2, + FeatureHasSlowFPVMLx]>; -// V4T Processors. -def : ProcNoItin<"arm7tdmi", [HasV4TOps]>; -def : ProcNoItin<"arm7tdmi-s", [HasV4TOps]>; -def : ProcNoItin<"arm710t", [HasV4TOps]>; -def : ProcNoItin<"arm720t", [HasV4TOps]>; -def : ProcNoItin<"arm9", [HasV4TOps]>; -def : ProcNoItin<"arm9tdmi", [HasV4TOps]>; -def : ProcNoItin<"arm920", [HasV4TOps]>; -def : ProcNoItin<"arm920t", [HasV4TOps]>; -def : ProcNoItin<"arm922t", [HasV4TOps]>; -def : ProcNoItin<"arm940t", [HasV4TOps]>; -def : ProcNoItin<"ep9312", [HasV4TOps]>; +def : Processor<"arm1156t2-s", ARMV6Itineraries, [ARMv6t2]>; +def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2, + FeatureVFP2, + FeatureHasSlowFPVMLx]>; -// V5T Processors. -def : ProcNoItin<"arm10tdmi", [HasV5TOps]>; -def : ProcNoItin<"arm1020t", [HasV5TOps]>; - -// V5TE Processors. -def : ProcNoItin<"arm9e", [HasV5TEOps]>; -def : ProcNoItin<"arm926ej-s", [HasV5TEOps]>; -def : ProcNoItin<"arm946e-s", [HasV5TEOps]>; -def : ProcNoItin<"arm966e-s", [HasV5TEOps]>; -def : ProcNoItin<"arm968e-s", [HasV5TEOps]>; -def : ProcNoItin<"arm10e", [HasV5TEOps]>; -def : ProcNoItin<"arm1020e", [HasV5TEOps]>; -def : ProcNoItin<"arm1022e", [HasV5TEOps]>; -def : ProcNoItin<"xscale", [HasV5TEOps]>; -def : ProcNoItin<"iwmmxt", [HasV5TEOps]>; - -// V6 Processors. -def : Processor<"arm1136j-s", ARMV6Itineraries, [HasV6Ops]>; -def : Processor<"arm1136jf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2, - FeatureHasSlowFPVMLx]>; - -// V6M Processors. -def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6MOps, FeatureNoARM, - FeatureDB, FeatureMClass]>; -def : Processor<"cortex-m0plus", ARMV6Itineraries, [HasV6MOps, FeatureNoARM, - FeatureDB, FeatureMClass]>; -def : Processor<"cortex-m1", ARMV6Itineraries, [HasV6MOps, FeatureNoARM, - FeatureDB, FeatureMClass]>; -def : Processor<"sc000", ARMV6Itineraries, [HasV6MOps, FeatureNoARM, - FeatureDB, FeatureMClass]>; - -// V6K Processors. -def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6KOps]>; -def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6KOps, FeatureVFP2, - FeatureHasSlowFPVMLx]>; -def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6KOps]>; -def : Processor<"mpcore", ARMV6Itineraries, [HasV6KOps, FeatureVFP2, - FeatureHasSlowFPVMLx]>; - -// V6T2 Processors. -def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops, - FeatureDSPThumb2]>; -def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2, - FeatureHasSlowFPVMLx, - FeatureDSPThumb2]>; - -// V7a Processors. // FIXME: A5 has currently the same Schedule model as A8 -def : ProcessorModel<"cortex-a5", CortexA8Model, - [ProcA5, HasV7Ops, FeatureNEON, FeatureDB, - FeatureVFP4, FeatureDSPThumb2, - FeatureHasRAS, FeatureAClass]>; -def : ProcessorModel<"cortex-a7", CortexA8Model, - [ProcA7, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS, - FeatureAClass]>; -def : ProcessorModel<"cortex-a8", CortexA8Model, - [ProcA8, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS, - FeatureAClass]>; -def : ProcessorModel<"cortex-a9", CortexA9Model, - [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS, FeatureMP, - FeatureAClass]>; +def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5, + FeatureHasRAS, + FeatureTrustZone, + FeatureSlowFPBrcc, + FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, + FeatureT2XtPk, + FeatureMP, + FeatureVFP4]>; + +def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7, + FeatureHasRAS, + FeatureTrustZone, + FeatureSlowFPBrcc, + FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, + FeatureT2XtPk, + FeatureMP, + FeatureVFP4, + FeatureHWDiv, + FeatureHWDivARM, + FeatureVirtualization]>; + +def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8, + FeatureHasRAS, + FeatureTrustZone, + FeatureSlowFPBrcc, + FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, + FeatureT2XtPk]>; + +def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9, + FeatureHasRAS, + FeatureTrustZone, + FeatureVMLxForwarding, + FeatureT2XtPk, + FeatureFP16, + FeatureAvoidPartialCPSR, + FeatureMP]>; // FIXME: A12 has currently the same Schedule model as A9 -def : ProcessorModel<"cortex-a12", CortexA9Model, - [ProcA12, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureMP, - FeatureHasRAS, FeatureAClass]>; +def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, + FeatureHasRAS, + FeatureTrustZone, + FeatureVMLxForwarding, + FeatureT2XtPk, + FeatureVFP4, + FeatureHWDiv, + FeatureHWDivARM, + FeatureAvoidPartialCPSR, + FeatureVirtualization, + FeatureMP]>; -// FIXME: A15 has currently the same ProcessorModel as A9. -def : ProcessorModel<"cortex-a15", CortexA9Model, - [ProcA15, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS, - FeatureAClass]>; +// FIXME: A15 has currently the same Schedule model as A9. +def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, + FeatureHasRAS, + FeatureTrustZone, + FeatureT2XtPk, + FeatureVFP4, + FeatureMP, + FeatureHWDiv, + FeatureHWDivARM, + FeatureAvoidPartialCPSR, + FeatureVirtualization]>; // FIXME: A17 has currently the same Schedule model as A9 -def : ProcessorModel<"cortex-a17", CortexA9Model, - [ProcA17, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureMP, - FeatureHasRAS, FeatureAClass]>; +def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17, + FeatureHasRAS, + FeatureTrustZone, + FeatureMP, + FeatureVMLxForwarding, + FeatureT2XtPk, + FeatureVFP4, + FeatureHWDiv, + FeatureHWDivARM, + FeatureAvoidPartialCPSR, + FeatureVirtualization]>; // FIXME: krait has currently the same Schedule model as A9 -def : ProcessorModel<"krait", CortexA9Model, - [ProcKrait, HasV7Ops, - FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS, - FeatureAClass]>; +// FIXME: krait has currently the same features as A9 plus VFP4 and hardware +// division features. +def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, + FeatureHasRAS, + FeatureVMLxForwarding, + FeatureT2XtPk, + FeatureFP16, + FeatureAvoidPartialCPSR, + FeatureVFP4, + FeatureHWDiv, + FeatureHWDivARM]>; + +def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, + FeatureHasRAS, + FeatureNEONForFP, + FeatureT2XtPk, + FeatureVFP4, + FeatureMP, + FeatureHWDiv, + FeatureHWDivARM, + FeatureAvoidPartialCPSR, + FeatureAvoidMOVsShOp, + FeatureHasSlowFPVMLx]>; // FIXME: R4 has currently the same ProcessorModel as A8. -def : ProcessorModel<"cortex-r4", CortexA8Model, - [ProcR4]>; +def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4, + FeatureHasRAS, + FeatureAvoidPartialCPSR, + FeatureT2XtPk]>; // FIXME: R4F has currently the same ProcessorModel as A8. -def : ProcessorModel<"cortex-r4f", CortexA8Model, - [ProcR4, - FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, - FeatureVFP3, FeatureD16]>; +def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4, + FeatureHasRAS, + FeatureSlowFPBrcc, + FeatureHasSlowFPVMLx, + FeatureVFP3, + FeatureD16, + FeatureAvoidPartialCPSR, + FeatureT2XtPk]>; // FIXME: R5 has currently the same ProcessorModel as A8. -def : ProcessorModel<"cortex-r5", CortexA8Model, - [ProcR5, HasV7Ops, FeatureDB, - FeatureVFP3, FeatureDSPThumb2, - FeatureHasRAS, - FeatureD16, FeatureRClass]>; +def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5, + FeatureHasRAS, + FeatureVFP3, + FeatureD16, + FeatureSlowFPBrcc, + FeatureHWDivARM, + FeatureHasSlowFPVMLx, + FeatureAvoidPartialCPSR, + FeatureT2XtPk]>; // FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5. -def : ProcessorModel<"cortex-r7", CortexA8Model, - [ProcR5, HasV7Ops, FeatureDB, - FeatureVFP3, FeatureDSPThumb2, - FeatureHasRAS, FeatureVFPOnlySP, - FeatureD16, FeatureMP, FeatureRClass]>; +def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, + FeatureHasRAS, + FeatureVFP3, + FeatureVFPOnlySP, + FeatureD16, + FeatureFP16, + FeatureMP, + FeatureSlowFPBrcc, + FeatureHWDivARM, + FeatureHasSlowFPVMLx, + FeatureAvoidPartialCPSR, + FeatureT2XtPk]>; -// V7M Processors. -def : ProcNoItin<"cortex-m3", [HasV7Ops, - FeatureThumb2, FeatureNoARM, FeatureDB, - FeatureHWDiv, FeatureMClass]>; -def : ProcNoItin<"sc300", [HasV7Ops, - FeatureThumb2, FeatureNoARM, FeatureDB, - FeatureHWDiv, FeatureMClass]>; +def : ProcNoItin<"cortex-m3", [ARMv7m]>; +def : ProcNoItin<"sc300", [ARMv7m]>; -// V7EM Processors. -def : ProcNoItin<"cortex-m4", [HasV7Ops, - FeatureThumb2, FeatureNoARM, FeatureDB, - FeatureHWDiv, FeatureDSPThumb2, - FeatureT2XtPk, FeatureVFP4, - FeatureVFPOnlySP, FeatureD16, - FeatureMClass]>; -def : ProcNoItin<"cortex-m7", [HasV7Ops, - FeatureThumb2, FeatureNoARM, FeatureDB, - FeatureHWDiv, FeatureDSPThumb2, - FeatureT2XtPk, FeatureFPARMv8, - FeatureD16, FeatureMClass]>; +def : ProcNoItin<"cortex-m4", [ARMv7em, + FeatureVFP4, + FeatureVFPOnlySP, + FeatureD16]>; + +def : ProcNoItin<"cortex-m7", [ARMv7em, + FeatureFPARMv8, + FeatureD16]>; -// Swift uArch Processors. -def : ProcessorModel<"swift", SwiftModel, - [ProcSwift, HasV7Ops, FeatureNEON, - FeatureDB, FeatureDSPThumb2, - FeatureHasRAS, FeatureAClass]>; +def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35, + FeatureHWDiv, + FeatureHWDivARM, + FeatureT2XtPk, + FeatureCrypto, + FeatureCRC]>; -// V8 Processors -def : ProcNoItin<"cortex-a53", [ProcA53, HasV8Ops, FeatureAClass, - FeatureDB, FeatureFPARMv8, - FeatureNEON, FeatureDSPThumb2]>; -def : ProcNoItin<"cortex-a57", [ProcA57, HasV8Ops, FeatureAClass, - FeatureDB, FeatureFPARMv8, - FeatureNEON, FeatureDSPThumb2]>; -// FIXME: Cortex-A72 is currently modelled as an Cortex-A57. -def : ProcNoItin<"cortex-a72", [ProcA57, HasV8Ops, FeatureAClass, - FeatureDB, FeatureFPARMv8, - FeatureNEON, FeatureDSPThumb2]>; +def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, + FeatureHWDiv, + FeatureHWDivARM, + FeatureT2XtPk, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57, + FeatureHWDiv, + FeatureHWDivARM, + FeatureT2XtPk, + FeatureCrypto, + FeatureCRC]>; + +def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72, + FeatureHWDiv, + FeatureHWDivARM, + FeatureT2XtPk, + FeatureCrypto, + FeatureCRC]>; // Cyclone is very similar to swift -def : ProcessorModel<"cyclone", SwiftModel, - [ProcSwift, HasV8Ops, HasV7Ops, - FeatureCrypto, FeatureFPARMv8, - FeatureDB,FeatureDSPThumb2, - FeatureHasRAS, FeatureZCZeroing]>; +def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, + FeatureHasRAS, + FeatureNEONForFP, + FeatureT2XtPk, + FeatureVFP4, + FeatureMP, + FeatureHWDiv, + FeatureHWDivARM, + FeatureAvoidPartialCPSR, + FeatureAvoidMOVsShOp, + FeatureHasSlowFPVMLx, + FeatureCrypto, + FeatureZCZeroing]>; + //===----------------------------------------------------------------------===// // Register File Description @@ -504,8 +677,15 @@ def ARMAsmWriter : AsmWriter { bit isMCAsmWriter = 1; } +def ARMAsmParserVariant : AsmParserVariant { + int Variant = 0; + string Name = "ARM"; + string BreakCharacters = "."; +} + def ARM : Target { // Pull in Instruction Info: let InstructionSet = ARMInstrInfo; let AssemblyWriters = [ARMAsmWriter]; + let AssemblyParserVariants = [ARMAsmParserVariant]; } diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 738ddedccdac..206db9619a2f 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -60,7 +60,7 @@ using namespace llvm; ARMAsmPrinter::ARMAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) : AsmPrinter(TM, std::move(Streamer)), AFI(nullptr), MCP(nullptr), - InConstantPool(false) {} + InConstantPool(false), OptimizationGoals(-1) {} void ARMAsmPrinter::EmitFunctionBodyEnd() { // Make sure to terminate any constant pools that were at the end @@ -80,8 +80,8 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() { OutStreamer->EmitLabel(CurrentFnSym); } -void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { - uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType()); +void ARMAsmPrinter::EmitXXStructor(const DataLayout &DL, const Constant *CV) { + uint64_t Size = getDataLayout().getTypeAllocSize(CV->getType()); assert(Size && "C++ constructor pointer had zero size!"); const GlobalValue *GV = dyn_cast(CV->stripPointerCasts()); @@ -106,9 +106,38 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); SetupMachineFunction(MF); + const Function* F = MF.getFunction(); + const TargetMachine& TM = MF.getTarget(); + + // Calculate this function's optimization goal. + unsigned OptimizationGoal; + if (F->hasFnAttribute(Attribute::OptimizeNone)) + // For best debugging illusion, speed and small size sacrificed + OptimizationGoal = 6; + else if (F->optForMinSize()) + // Aggressively for small size, speed and debug illusion sacrificed + OptimizationGoal = 4; + else if (F->optForSize()) + // For small size, but speed and debugging illusion preserved + OptimizationGoal = 3; + else if (TM.getOptLevel() == CodeGenOpt::Aggressive) + // Aggressively for speed, small size and debug illusion sacrificed + OptimizationGoal = 2; + else if (TM.getOptLevel() > CodeGenOpt::None) + // For speed, but small size and good debug illusion preserved + OptimizationGoal = 1; + else // TM.getOptLevel() == CodeGenOpt::None + // For good debugging, but speed and small size preserved + OptimizationGoal = 5; + + // Combine a new optimization goal with existing ones. + if (OptimizationGoals == -1) // uninitialized goals + OptimizationGoals = OptimizationGoal; + else if (OptimizationGoals != (int)OptimizationGoal) // conflicting goals + OptimizationGoals = 0; if (Subtarget->isTargetCOFF()) { - bool Internal = MF.getFunction()->hasInternalLinkage(); + bool Internal = F->hasInternalLinkage(); COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL; int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; @@ -198,22 +227,13 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, MCSymbol *ARMAsmPrinter:: GetARMJTIPICJumpTableLabel(unsigned uid) const { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); SmallString<60> Name; - raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI" + raw_svector_ostream(Name) << DL.getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << uid; return OutContext.getOrCreateSymbol(Name); } - -MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const { - const DataLayout *DL = TM.getDataLayout(); - SmallString<60> Name; - raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH" - << getFunctionNumber(); - return OutContext.getOrCreateSymbol(Name); -} - bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { @@ -515,6 +535,17 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // generates code that does this, it is always safe to set. OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } + + // The last attribute to be emitted is ABI_optimization_goals + MCTargetStreamer &TS = *OutStreamer->getTargetStreamer(); + ARMTargetStreamer &ATS = static_cast(TS); + + if (OptimizationGoals > 0 && + (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI())) + ATS.emitAttribute(ARMBuildAttrs::ABI_optimization_goals, OptimizationGoals); + OptimizationGoals = -1; + + ATS.finishAttributeSection(); } //===----------------------------------------------------------------------===// @@ -532,7 +563,7 @@ static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU, if (Subtarget->hasV8Ops()) return ARMBuildAttrs::v8; else if (Subtarget->hasV7Ops()) { - if (Subtarget->isMClass() && Subtarget->hasThumb2DSP()) + if (Subtarget->isMClass() && Subtarget->hasDSP()) return ARMBuildAttrs::v7E_M; return ARMBuildAttrs::v7; } else if (Subtarget->hasV6T2Ops()) @@ -587,7 +618,7 @@ void ARMAsmPrinter::emitAttributes() { // We consider krait as a "cortex-a9" + hwdiv CPU // Enable hwdiv through ".arch_extension idiv" if (STI.hasDivide() || STI.hasDivideInARMMode()) - ATS.emitArchExtension(ARM::AEK_HWDIV); + ATS.emitArchExtension(ARM::AEK_HWDIV | ARM::AEK_HWDIVARM); } else ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); } @@ -807,8 +838,6 @@ void ARMAsmPrinter::emitAttributes() { else if (STI.hasVirtualization()) ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowVirtualization); - - ATS.finishAttributeSection(); } //===----------------------------------------------------------------------===// @@ -828,8 +857,7 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) { case ARMCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD; case ARMCP::TPOFF: return MCSymbolRefExpr::VK_TPOFF; case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_GOTTPOFF; - case ARMCP::GOT: return MCSymbolRefExpr::VK_GOT; - case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_GOTOFF; + case ARMCP::GOT_PREL: return MCSymbolRefExpr::VK_ARM_GOT_PREL; } llvm_unreachable("Invalid ARMCPModifier!"); } @@ -875,8 +903,8 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, void ARMAsmPrinter:: EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { - const DataLayout *DL = TM.getDataLayout(); - int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType()); + const DataLayout &DL = getDataLayout(); + int Size = DL.getTypeAllocSize(MCPV->getType()); ARMConstantPoolValue *ACPV = static_cast(MCPV); @@ -909,10 +937,9 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { OutContext); if (ACPV->getPCAdjustment()) { - MCSymbol *PCLabel = getPICLabel(DL->getPrivateGlobalPrefix(), - getFunctionNumber(), - ACPV->getLabelId(), - OutContext); + MCSymbol *PCLabel = + getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(), + ACPV->getLabelId(), OutContext); const MCExpr *PCRelExpr = MCSymbolRefExpr::create(PCLabel, OutContext); PCRelExpr = MCBinaryExpr::createAdd(PCRelExpr, @@ -1136,6 +1163,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { Offset = 0; break; case ARM::ADDri: + case ARM::t2ADDri: Offset = -MI->getOperand(2).getImm(); break; case ARM::SUBri: @@ -1198,7 +1226,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { #include "ARMGenMCPseudoLowering.inc" void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); // If we just ended a constant pool, mark it as such. if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) { @@ -1355,9 +1383,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *GVSym = GetARMGVSymbol(GV, TF); const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext); - MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(), - getFunctionNumber(), - MI->getOperand(2).getImm(), OutContext); + MCSymbol *LabelSym = + getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(), + MI->getOperand(2).getImm(), OutContext); const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext); unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4; const MCExpr *PCRelExpr = @@ -1388,9 +1416,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *GVSym = GetARMGVSymbol(GV, TF); const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext); - MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(), - getFunctionNumber(), - MI->getOperand(3).getImm(), OutContext); + MCSymbol *LabelSym = + getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(), + MI->getOperand(3).getImm(), OutContext); const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext); unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4; const MCExpr *PCRelExpr = @@ -1414,10 +1442,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // This adds the address of LPC0 to r0. // Emit the label. - OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(), + OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(), - MI->getOperand(2).getImm(), - OutContext)); + MI->getOperand(2).getImm(), OutContext)); // Form and emit the add. EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr) @@ -1436,10 +1463,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // This adds the address of LPC0 to r0. // Emit the label. - OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(), + OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(), - MI->getOperand(2).getImm(), - OutContext)); + MI->getOperand(2).getImm(), OutContext)); // Form and emit the add. EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDrr) @@ -1468,10 +1494,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // a PC-relative address at the ldr instruction. // Emit the label. - OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(), + OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(), - MI->getOperand(2).getImm(), - OutContext)); + MI->getOperand(2).getImm(), OutContext)); // Form and emit the load unsigned Opcode; @@ -1519,7 +1544,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (MCPE.isMachineConstantPoolEntry()) EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); else - EmitGlobalConstant(MCPE.Val.ConstVal); + EmitGlobalConstant(DL, MCPE.Val.ConstVal); return; } case ARM::JUMPTABLE_ADDRS: @@ -1653,12 +1678,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // adds $val, #7 // str $val, [$src, #4] // movs r0, #0 - // b 1f + // b LSJLJEH // movs r0, #1 - // 1: + // LSJLJEH: unsigned SrcReg = MI->getOperand(0).getReg(); unsigned ValReg = MI->getOperand(1).getReg(); - MCSymbol *Label = GetARMSJLJEHLabel(); + MCSymbol *Label = OutContext.createTempSymbol("SJLJEH", false, true); OutStreamer->AddComment("eh_setjmp begin"); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr) .addReg(ValReg) diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 3d251213f5bf..ed7be2de51ca 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -51,6 +51,11 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter { /// labels used for ARMv4t thumb code to make register indirect calls. SmallVector, 4> ThumbIndirectPads; + /// OptimizationGoals - Maintain a combined optimization goal for all + /// functions in a module: one of Tag_ABI_optimization_goals values, + /// -1 if uninitialized, 0 if conflicting goals + int OptimizationGoals; + public: explicit ARMAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer); @@ -84,7 +89,7 @@ public: void EmitFunctionEntryLabel() override; void EmitStartOfAsmFile(Module &M) override; void EmitEndOfAsmFile(Module &M) override; - void EmitXXStructor(const Constant *CV) override; + void EmitXXStructor(const DataLayout &DL, const Constant *CV) override; // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); @@ -119,8 +124,6 @@ private: MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); MCSymbol *GetARMJTIPICJumpTableLabel(unsigned uid) const; - MCSymbol *GetARMSJLJEHLabel() const; - MCSymbol *GetARMGVSymbol(const GlobalValue *GV, unsigned char TargetFlags); public: diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 9f43e732bd73..49f328852667 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -97,7 +97,7 @@ ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) Subtarget(STI) { for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) { if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) - assert(false && "Duplicated entries?"); + llvm_unreachable("Duplicated entries?"); MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc); MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc); } @@ -440,7 +440,7 @@ ReverseBranchCondition(SmallVectorImpl &Cond) const { bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { if (MI->isBundle()) { - MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator I = MI->getIterator(); MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); while (++I != E && I->isInsideBundle()) { int PIdx = I->findFirstPredOperandIdx(); @@ -518,7 +518,7 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, static bool isCPSRDefined(const MachineInstr *MI) { for (const auto &MO : MI->operands()) - if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef()) + if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead()) return true; return false; } @@ -647,7 +647,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { unsigned Size = 0; - MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator I = MI->getIterator(); MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); while (++I != E && I->isInsideBundle()) { assert(!I->isBundle() && "No nested bundle!"); @@ -853,11 +853,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOStore, - MFI.getObjectSize(FI), - Align); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), Align); switch (RC->getSize()) { case 4: @@ -1043,12 +1041,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOLoad, - MFI.getObjectSize(FI), - Align); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), Align); switch (RC->getSize()) { case 4: @@ -1224,6 +1219,60 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); } +/// \brief Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD +/// depending on whether the result is used. +void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MBBI) const { + bool isThumb1 = Subtarget.isThumb1Only(); + bool isThumb2 = Subtarget.isThumb2(); + const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo(); + + MachineInstr *MI = MBBI; + DebugLoc dl = MI->getDebugLoc(); + MachineBasicBlock *BB = MI->getParent(); + + MachineInstrBuilder LDM, STM; + if (isThumb1 || !MI->getOperand(1).isDead()) { + LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD + : isThumb1 ? ARM::tLDMIA_UPD + : ARM::LDMIA_UPD)) + .addOperand(MI->getOperand(1)); + } else { + LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA)); + } + + if (isThumb1 || !MI->getOperand(0).isDead()) { + STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD + : isThumb1 ? ARM::tSTMIA_UPD + : ARM::STMIA_UPD)) + .addOperand(MI->getOperand(0)); + } else { + STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA)); + } + + AddDefaultPred(LDM.addOperand(MI->getOperand(3))); + AddDefaultPred(STM.addOperand(MI->getOperand(2))); + + // Sort the scratch registers into ascending order. + const TargetRegisterInfo &TRI = getRegisterInfo(); + llvm::SmallVector ScratchRegs; + for(unsigned I = 5; I < MI->getNumOperands(); ++I) + ScratchRegs.push_back(MI->getOperand(I).getReg()); + std::sort(ScratchRegs.begin(), ScratchRegs.end(), + [&TRI](const unsigned &Reg1, + const unsigned &Reg2) -> bool { + return TRI.getEncodingValue(Reg1) < + TRI.getEncodingValue(Reg2); + }); + + for (const auto &Reg : ScratchRegs) { + LDM.addReg(Reg, RegState::Define); + STM.addReg(Reg, RegState::Kill); + } + + BB->erase(MBBI); +} + + bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MachineFunction &MF = *MI->getParent()->getParent(); @@ -1237,6 +1286,11 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { return true; } + if (MI->getOpcode() == ARM::MEMCPY) { + expandMEMCPY(MI); + return true; + } + // This hook gets to expand COPY instructions before they become // copyPhysReg() calls. Look for VMOVS instructions that can legally be // widened to VMOVD. We prefer the VMOVD when possible because it may be @@ -1325,9 +1379,9 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { // instructions, so that's probably OK, but is PIC always correct when // we get here? if (ACPV->isGlobalValue()) - NewCPV = ARMConstantPoolConstant:: - Create(cast(ACPV)->getGV(), PCLabelId, - ARMCP::CPValue, 4); + NewCPV = ARMConstantPoolConstant::Create( + cast(ACPV)->getGV(), PCLabelId, ARMCP::CPValue, + 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress()); else if (ACPV->isExtSymbol()) NewCPV = ARMConstantPoolSymbol:: Create(MF.getFunction()->getContext(), @@ -1645,16 +1699,14 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, bool ARMBaseInstrInfo:: isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, - const BranchProbability &Probability) const { + BranchProbability Probability) const { if (!NumCycles) return false; // If we are optimizing for size, see if the branch in the predecessor can be // lowered to cbn?z by the constant island lowering pass, and return false if // so. This results in a shorter instruction sequence. - const Function *F = MBB.getParent()->getFunction(); - if (F->hasFnAttribute(Attribute::OptimizeForSize) || - F->hasFnAttribute(Attribute::MinSize)) { + if (MBB.getParent()->getFunction()->optForSize()) { MachineBasicBlock *Pred = *MBB.pred_begin(); if (!Pred->empty()) { MachineInstr *LastMI = &*Pred->rbegin(); @@ -1677,12 +1729,14 @@ isProfitableToIfCvt(MachineBasicBlock &MBB, } // Attempt to estimate the relative costs of predication versus branching. - unsigned UnpredCost = Probability.getNumerator() * NumCycles; - UnpredCost /= Probability.getDenominator(); - UnpredCost += 1; // The branch itself - UnpredCost += Subtarget.getMispredictionPenalty() / 10; + // Here we scale up each component of UnpredCost to avoid precision issue when + // scaling NumCycles by Probability. + const unsigned ScalingUpFactor = 1024; + unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor); + UnpredCost += ScalingUpFactor; // The branch itself + UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; - return (NumCycles + ExtraPredCycles) <= UnpredCost; + return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost; } bool ARMBaseInstrInfo:: @@ -1690,23 +1744,22 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned TCycles, unsigned TExtra, MachineBasicBlock &FMBB, unsigned FCycles, unsigned FExtra, - const BranchProbability &Probability) const { + BranchProbability Probability) const { if (!TCycles || !FCycles) return false; // Attempt to estimate the relative costs of predication versus branching. - unsigned TUnpredCost = Probability.getNumerator() * TCycles; - TUnpredCost /= Probability.getDenominator(); - - uint32_t Comp = Probability.getDenominator() - Probability.getNumerator(); - unsigned FUnpredCost = Comp * FCycles; - FUnpredCost /= Probability.getDenominator(); - + // Here we scale up each component of UnpredCost to avoid precision issue when + // scaling TCycles/FCycles by Probability. + const unsigned ScalingUpFactor = 1024; + unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); + unsigned FUnpredCost = + Probability.getCompl().scale(FCycles * ScalingUpFactor); unsigned UnpredCost = TUnpredCost + FUnpredCost; - UnpredCost += 1; // The branch itself - UnpredCost += Subtarget.getMispredictionPenalty() / 10; + UnpredCost += 1 * ScalingUpFactor; // The branch itself + UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; - return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; + return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost; } bool @@ -1744,9 +1797,10 @@ unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) { llvm_unreachable("Unknown unconditional branch opcode!"); } -/// commuteInstruction - Handle commutable instructions. -MachineInstr * -ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { +MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr *MI, + bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const { switch (MI->getOpcode()) { case ARM::MOVCCr: case ARM::t2MOVCCr: { @@ -1756,7 +1810,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // MOVCC AL can't be inverted. Shouldn't happen. if (CC == ARMCC::AL || PredReg != ARM::CPSR) return nullptr; - MI = TargetInstrInfo::commuteInstruction(MI, NewMI); + MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); if (!MI) return nullptr; // After swapping the MOVCC operands, also invert the condition. @@ -1765,7 +1819,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { return MI; } } - return TargetInstrInfo::commuteInstruction(MI, NewMI); + return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); } /// Identify instructions that can be folded into a MOVCC instruction, and @@ -1975,21 +2029,12 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, } } -static bool isAnySubRegLive(unsigned Reg, const TargetRegisterInfo *TRI, - MachineInstr *MI) { - for (MCSubRegIterator Subreg(Reg, TRI, /* IncludeSelf */ true); - Subreg.isValid(); ++Subreg) - if (MI->getParent()->computeRegisterLiveness(TRI, *Subreg, MI) != - MachineBasicBlock::LQR_Dead) - return true; - return false; -} bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes) { // This optimisation potentially adds lots of load and store // micro-operations, it's only really a great benefit to code-size. - if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize)) + if (!MF.getFunction()->optForMinSize()) return false; // If only one register is pushed/popped, LLVM can use an LDR/STR @@ -2058,11 +2103,9 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, // registers live within the function we might clobber a return value // register; the other way a register can be live here is if it's // callee-saved. - // TODO: Currently, computeRegisterLiveness() does not report "live" if a - // sub reg is live. When computeRegisterLiveness() works for sub reg, it - // can replace isAnySubRegLive(). if (isCalleeSavedRegister(CurReg, CSRegs) || - isAnySubRegLive(CurReg, TRI, MI)) { + MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) != + MachineBasicBlock::LQR_Dead) { // VFP pops don't allow holes in the register list, so any skip is fatal // for our transformation. GPR pops do, so we should just keep looking. if (IsVFPPushPop) @@ -3381,7 +3424,7 @@ static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, assert(Idx != -1 && "Cannot find bundled definition!"); DefIdx = Idx; - return II; + return &*II; } static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, @@ -3389,7 +3432,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, unsigned &UseIdx, unsigned &Dist) { Dist = 0; - MachineBasicBlock::const_instr_iterator II = MI; ++II; + MachineBasicBlock::const_instr_iterator II = ++MI->getIterator(); assert(II->isInsideBundle() && "Empty bundle?"); MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); @@ -3410,7 +3453,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, } UseIdx = Idx; - return II; + return &*II; } /// Return the number of cycles to add to (or subtract from) the static @@ -3652,6 +3695,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // instructions). if (Latency > 0 && Subtarget.isThumb2()) { const MachineFunction *MF = DefMI->getParent()->getParent(); + // FIXME: Use Function::optForSize(). if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) --Latency; } @@ -3931,11 +3975,11 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, // other passes may query the latency of a bundled instruction. if (MI->isBundle()) { unsigned Latency = 0; - MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator I = MI->getIterator(); MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); while (++I != E && I->isInsideBundle()) { if (I->getOpcode() != ARM::t2IT) - Latency += getInstrLatency(ItinData, I, PredCost); + Latency += getInstrLatency(ItinData, &*I, PredCost); } return Latency; } @@ -4054,8 +4098,8 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); MIB.addReg(Reg, RegState::Kill).addImm(0); unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; - MachineMemOperand *MMO = MBB.getParent()-> - getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 4, 4); + MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( + MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4); MIB.addMemOperand(MMO); AddDefaultPred(MIB); } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index b4706e348933..d80c49494c77 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -86,6 +86,18 @@ protected: RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const override; + /// Commutes the operands in the given instruction. + /// The commutable operands are specified by their indices OpIdx1 and OpIdx2. + /// + /// Do not call this method for a non-commutable instruction or for + /// non-commutable pair of operand indices OpIdx1 and OpIdx2. + /// Even though the instruction is commutable, the method may still + /// fail to commute the operands, null pointer is returned in such cases. + MachineInstr *commuteInstructionImpl(MachineInstr *MI, + bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const override; + public: // Return whether the target has an explicit NOP encoding. bool hasNOP() const; @@ -188,9 +200,6 @@ public: MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const override; - MachineInstr *commuteInstruction(MachineInstr*, - bool=false) const override; - const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI) const; @@ -224,15 +233,15 @@ public: bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, - const BranchProbability &Probability) const override; + BranchProbability Probability) const override; bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT, unsigned ExtraT, MachineBasicBlock &FMBB, unsigned NumF, unsigned ExtraF, - const BranchProbability &Probability) const override; + BranchProbability Probability) const override; bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, - const BranchProbability &Probability) const override { + BranchProbability Probability) const override { return NumCycles == 1; } @@ -343,6 +352,8 @@ private: virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI, Reloc::Model RM) const = 0; + void expandMEMCPY(MachineBasicBlock::iterator) const; + private: /// Modeling special VFP / NEON fp MLA / MLS hazards. diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index e7d5be7753e4..419717c85a79 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -225,7 +225,8 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, ArrayRef Order, SmallVectorImpl &Hints, const MachineFunction &MF, - const VirtRegMap *VRM) const { + const VirtRegMap *VRM, + const LiveRegMatrix *Matrix) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); std::pair Hint = MRI.getRegAllocationHint(VirtReg); @@ -338,7 +339,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { // 1. Dynamic stack realignment is explicitly disabled, // 2. This is a Thumb1 function (it's not useful, so we don't bother), or // 3. There are VLAs in the function and the base pointer is disabled. - if (MF.getFunction()->hasFnAttribute("no-realign-stack")) + if (!TargetRegisterInfo::canRealignStack(MF)) return false; if (AFI->isThumb1OnlyFunction()) return false; @@ -355,18 +356,6 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { return MRI->canReserveReg(BasePtr); } -bool ARMBaseRegisterInfo:: -needsStackRealignment(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARMFrameLowering *TFI = getFrameLowering(MF); - const Function *F = MF.getFunction(); - unsigned StackAlign = TFI->getStackAlignment(); - bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || - F->hasFnAttribute(Attribute::StackAlignment)); - - return requiresRealignment && canRealignStack(MF); -} - bool ARMBaseRegisterInfo:: cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index fdc1ef9432c8..cea8b80c7821 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -94,7 +94,7 @@ public: const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override; - const uint32_t *getNoPreservedMask() const; + const uint32_t *getNoPreservedMask() const override; /// getThisReturnPreservedMask - Returns a call preserved mask specific to the /// case that 'returned' is on an i32 first argument if the calling convention @@ -126,15 +126,15 @@ public: ArrayRef Order, SmallVectorImpl &Hints, const MachineFunction &MF, - const VirtRegMap *VRM) const override; + const VirtRegMap *VRM, + const LiveRegMatrix *Matrix) const override; void updateRegAllocHint(unsigned Reg, unsigned NewReg, MachineFunction &MF) const override; bool hasBasePointer(const MachineFunction &MF) const; - bool canRealignStack(const MachineFunction &MF) const; - bool needsStackRealignment(const MachineFunction &MF) const override; + bool canRealignStack(const MachineFunction &MF) const override; int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override; bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override; diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index d687568d7eb9..a731d00883a1 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -160,15 +160,15 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, State); } -static const uint16_t RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; +static const MCPhysReg RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; -static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, - ARM::S4, ARM::S5, ARM::S6, ARM::S7, - ARM::S8, ARM::S9, ARM::S10, ARM::S11, - ARM::S12, ARM::S13, ARM::S14, ARM::S15 }; -static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; -static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 }; +static const MCPhysReg SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, + ARM::S4, ARM::S5, ARM::S6, ARM::S7, + ARM::S8, ARM::S9, ARM::S10, ARM::S11, + ARM::S12, ARM::S13, ARM::S14, ARM::S15 }; +static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; +static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 }; // Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA @@ -199,9 +199,11 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT, // Try to allocate a contiguous block of registers, each of the correct // size to hold one member. - unsigned Align = std::min(PendingMembers[0].getExtraInfo(), 8U); + auto &DL = State.getMachineFunction().getDataLayout(); + unsigned StackAlign = DL.getStackAlignment(); + unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign); - ArrayRef RegList; + ArrayRef RegList; switch (LocVT.SimpleTy) { case MVT::i32: { RegList = RRegList; diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 27cf06b995a0..233516415149 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -125,6 +125,8 @@ def CC_ARM_AAPCS_Common : CallingConv<[ CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>, CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>, + CCIfType<[v2f64], CCIfAlign<"16", + CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>, CCIfType<[v2f64], CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>> ]>; diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index f4ec8c67c977..e89757c19ecc 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -342,7 +342,7 @@ void ARMConstantIslands::verify() { #ifndef NDEBUG for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = MBBI; + MachineBasicBlock *MBB = &*MBBI; unsigned MBBId = MBB->getNumber(); assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); } @@ -542,7 +542,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector &CPEMIs) // identity mapping of CPI's to CPE's. const std::vector &CPs = MCP->getConstants(); - const DataLayout &TD = *MF->getTarget().getDataLayout(); + const DataLayout &TD = MF->getDataLayout(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); assert(Size >= 4 && "Too small constant pool entry"); @@ -589,6 +589,8 @@ void ARMConstantIslands::doInitialJumpTablePlacement( MachineBasicBlock *LastCorrectlyNumberedBB = nullptr; for (MachineBasicBlock &MBB : *MF) { auto MI = MBB.getLastNonDebugInstr(); + if (MI == MBB.end()) + continue; unsigned JTOpcode; switch (MI->getOpcode()) { @@ -639,12 +641,12 @@ void ARMConstantIslands::doInitialJumpTablePlacement( /// into the block immediately after it. bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) { // Get the next machine basic block in the function. - MachineFunction::iterator MBBI = MBB; + MachineFunction::iterator MBBI = MBB->getIterator(); // Can't fall off end of function. if (std::next(MBBI) == MBB->getParent()->end()) return false; - MachineBasicBlock *NextBB = std::next(MBBI); + MachineBasicBlock *NextBB = &*std::next(MBBI); if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end()) return false; @@ -722,15 +724,15 @@ initializeFunctionInfo(const std::vector &CPEMIs) { // has any inline assembly in it. If so, we have to be conservative about // alignment assumptions, as we don't know for sure the size of any // instructions in the inline assembly. - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - computeBlockSize(I); + for (MachineBasicBlock &MBB : *MF) + computeBlockSize(&MBB); // The known bits of the entry block offset are determined by the function // alignment. BBInfo.front().KnownBits = MF->getAlignment(); // Compute block offsets and known bits. - adjustBBOffsetsAfter(MF->begin()); + adjustBBOffsetsAfter(&MF->front()); // Now go back through the instructions and build up our data structures. for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); @@ -968,7 +970,7 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { // Create a new MBB for the code after the OrigBB. MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); - MachineFunction::iterator MBBI = OrigBB; ++MBBI; + MachineFunction::iterator MBBI = ++OrigBB->getIterator(); MF->insert(MBBI, NewBB); // Splice the instructions starting with MI over to NewBB. @@ -1088,7 +1090,7 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset, unsigned CPELogAlign = getCPELogAlign(U.CPEMI); unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); unsigned NextBlockOffset, NextBlockAlignment; - MachineFunction::const_iterator NextBlock = Water; + MachineFunction::const_iterator NextBlock = Water->getIterator(); if (++NextBlock == MF->end()) { NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); NextBlockAlignment = 0; @@ -1350,7 +1352,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, if (isOffsetInRange(UserOffset, CPEOffset, U)) { DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() << format(", expected CPE offset %#x\n", CPEOffset)); - NewMBB = std::next(MachineFunction::iterator(UserMBB)); + NewMBB = &*++UserMBB->getIterator(); // Add an unconditional branch from UserMBB to fallthrough block. Record // it for branch lengthening; this new branch will not get out of range, // but if the preceding conditional branch is out of range, the targets @@ -1503,8 +1505,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { NewWaterList.insert(NewIsland); // The new CPE goes before the following block (NewMBB). - NewMBB = std::next(MachineFunction::iterator(WaterBB)); - + NewMBB = &*++WaterBB->getIterator(); } else { // No water found. DEBUG(dbgs() << "No water found\n"); @@ -1515,7 +1516,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { // next iteration for constant pools, but in this context, we don't want // it. Check for this so it will be removed from the WaterList. // Also remove any entry from NewWaterList. - MachineBasicBlock *WaterBB = std::prev(MachineFunction::iterator(NewMBB)); + MachineBasicBlock *WaterBB = &*--NewMBB->getIterator(); IP = std::find(WaterList.begin(), WaterList.end(), WaterBB); if (IP != WaterList.end()) NewWaterList.erase(WaterBB); @@ -1532,7 +1533,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { WaterList.erase(IP); // Okay, we know we can put an island before NewMBB now, do it! - MF->insert(NewMBB, NewIsland); + MF->insert(NewMBB->getIterator(), NewIsland); // Update internal data structures to account for the newly inserted MBB. updateForInsertedWaterBlock(NewIsland); @@ -1553,7 +1554,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { // Increase the size of the island block to account for the new entry. BBInfo[NewIsland->getNumber()].Size += Size; - adjustBBOffsetsAfter(std::prev(MachineFunction::iterator(NewIsland))); + adjustBBOffsetsAfter(&*--NewIsland->getIterator()); // Finally, change the CPI in the instruction operand to be ID. for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) @@ -1732,7 +1733,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { MBB->back().eraseFromParent(); // BBInfo[SplitBB].Offset is wrong temporarily, fixed below } - MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB)); + MachineBasicBlock *NextBB = &*++MBB->getIterator(); DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() << " also invert condition and change dest. to BB#" @@ -2058,9 +2059,9 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI, /// \brief Returns whether CPEMI is the first instruction in the block /// immediately following JTMI (assumed to be a TBB or TBH terminator). If so, /// we can switch the first register to PC and usually remove the address -/// calculation that preceeded it. +/// calculation that preceded it. static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) { - MachineFunction::iterator MBB = JTMI->getParent(); + MachineFunction::iterator MBB = JTMI->getParent()->getIterator(); MachineFunction *MF = MBB->getParent(); ++MBB; @@ -2235,7 +2236,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; SmallVector CondPrior; - MachineFunction::iterator BBi = BB; + MachineFunction::iterator BBi = BB->getIterator(); MachineFunction::iterator OldPrior = std::prev(BBi); // If the block terminator isn't analyzable, don't try to move the block @@ -2258,7 +2259,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // Create a new MBB for the code after the jump BB. MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(JTBB->getBasicBlock()); - MachineFunction::iterator MBBI = JTBB; ++MBBI; + MachineFunction::iterator MBBI = ++JTBB->getIterator(); MF->insert(MBBI, NewBB); // Add an unconditional branch from NewBB to BB. @@ -2273,8 +2274,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // Update the CFG. NewBB->addSuccessor(BB); - JTBB->removeSuccessor(BB); - JTBB->addSuccessor(NewBB); + JTBB->replaceSuccessor(BB, NewBB); ++NumJTInserted; return NewBB; diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index 7d41c69f08b8..c9849b2605ea 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -52,8 +52,7 @@ const char *ARMConstantPoolValue::getModifierText() const { // strings if that's legal. case ARMCP::no_modifier: return "none"; case ARMCP::TLSGD: return "tlsgd"; - case ARMCP::GOT: return "GOT"; - case ARMCP::GOTOFF: return "GOTOFF"; + case ARMCP::GOT_PREL: return "GOT_PREL"; case ARMCP::GOTTPOFF: return "gottpoff"; case ARMCP::TPOFF: return "tpoff"; } diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 36f63e239a9e..6b18a4e52878 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -39,8 +39,7 @@ namespace ARMCP { enum ARMCPModifier { no_modifier, TLSGD, - GOT, - GOTOFF, + GOT_PREL, GOTTPOFF, TPOFF }; @@ -103,8 +102,6 @@ public: bool isLSDA() const { return Kind == ARMCP::CPLSDA; } bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; } - unsigned getRelocationInfo() const override { return 2; } - int getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) override; diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 4438f50758dc..56f3498e1204 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -330,22 +330,19 @@ static const NEONLdStTableEntry NEONLdStTable[] = { /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON /// load or store pseudo instruction. static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) { - const unsigned NumEntries = array_lengthof(NEONLdStTable); - #ifndef NDEBUG // Make sure the table is sorted. static bool TableChecked = false; if (!TableChecked) { - for (unsigned i = 0; i != NumEntries-1; ++i) - assert(NEONLdStTable[i] < NEONLdStTable[i+1] && - "NEONLdStTable is not sorted!"); + assert(std::is_sorted(std::begin(NEONLdStTable), std::end(NEONLdStTable)) && + "NEONLdStTable is not sorted!"); TableChecked = true; } #endif - const NEONLdStTableEntry *I = - std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode); - if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode) + auto I = std::lower_bound(std::begin(NEONLdStTable), + std::end(NEONLdStTable), Opcode); + if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode) return I; return nullptr; } @@ -734,7 +731,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, HI16.addImm(Pred).addReg(PredReg); if (RequiresBundling) - finalizeBundle(MBB, &*LO16, &*MBBI); + finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator()); TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); @@ -747,6 +744,55 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, switch (Opcode) { default: return false; + + case ARM::TCRETURNdi: + case ARM::TCRETURNri: { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI->isReturn() && + "Can only insert epilog into returning blocks"); + unsigned RetOpcode = MBBI->getOpcode(); + DebugLoc dl = MBBI->getDebugLoc(); + const ARMBaseInstrInfo &TII = *static_cast( + MBB.getParent()->getSubtarget().getInstrInfo()); + + // Tail call return: adjust the stack pointer and jump to callee. + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + + // Jump to label or value in register. + if (RetOpcode == ARM::TCRETURNdi) { + unsigned TCOpcode = + STI->isThumb() + ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) + : ARM::TAILJMPd; + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); + if (JumpTarget.isGlobal()) + MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + else { + assert(JumpTarget.isSymbol()); + MIB.addExternalSymbol(JumpTarget.getSymbolName(), + JumpTarget.getTargetFlags()); + } + + // Add the default predicate in Thumb mode. + if (STI->isThumb()) + MIB.addImm(ARMCC::AL).addReg(0); + } else if (RetOpcode == ARM::TCRETURNri) { + BuildMI(MBB, MBBI, dl, + TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)) + .addReg(JumpTarget.getReg(), RegState::Kill); + } + + MachineInstr *NewMI = std::prev(MBBI); + for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) + NewMI->addOperand(MBBI->getOperand(i)); + + // Delete the pseudo instruction TCRETURN. + MBB.erase(MBBI); + MBBI = NewMI; + return true; + } case ARM::VMOVScc: case ARM::VMOVDcc: { unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index fdd0763ea608..9bdf823c85bd 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -922,12 +922,9 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr, if (Addr.BaseType == Address::FrameIndexBase) { int FI = Addr.Base.FI; int Offset = Addr.Offset; - MachineMemOperand *MMO = - FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(FI, Offset), - Flags, - MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); + MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); // Now add the rest of the operands. MIB.addFrameIndex(FI); @@ -1278,8 +1275,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc)) .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR); - fastEmitBranch(FBB, DbgLoc); - FuncInfo.MBB->addSuccessor(TBB); + finishCondBranch(BI->getParent(), TBB, FBB); return true; } } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { @@ -1303,8 +1299,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc)) .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); - fastEmitBranch(FBB, DbgLoc); - FuncInfo.MBB->addSuccessor(TBB); + finishCondBranch(BI->getParent(), TBB, FBB); return true; } } else if (const ConstantInt *CI = @@ -1341,8 +1336,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc)) .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); - fastEmitBranch(FBB, DbgLoc); - FuncInfo.MBB->addSuccessor(TBB); + finishCondBranch(BI->getParent(), TBB, FBB); return true; } @@ -1355,8 +1349,8 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { TII.get(Opc)).addReg(AddrReg)); const IndirectBrInst *IB = cast(I); - for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i) - FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]); + for (const BasicBlock *SuccBB : IB->successors()) + FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]); return true; } @@ -1860,8 +1854,9 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); else return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); - } else - return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + } else { + return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + } case CallingConv::ARM_AAPCS_VFP: if (!isVarArg) return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); @@ -2944,48 +2939,51 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT) { - bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); - ARMConstantPoolConstant *CPV = - ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); - unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); + bool UseGOT_PREL = + !(GV->hasHiddenVisibility() || GV->hasLocalLinkage()); - unsigned Opc; - unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT)); - // Load value. - if (isThumb2) { - DestReg1 = constrainOperandRegClass(TII.get(ARM::t2LDRpci), DestReg1, 0); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(ARM::t2LDRpci), DestReg1) - .addConstantPoolIndex(Idx)); - Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs; - } else { - // The extra immediate is for addrmode2. - DestReg1 = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg1, 0); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DbgLoc, TII.get(ARM::LDRcp), DestReg1) - .addConstantPoolIndex(Idx).addImm(0)); - Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs; - } + LLVMContext *Context = &MF->getFunction()->getContext(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); + unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; + ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create( + GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, + UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier, + /*AddCurrentAddress=*/UseGOT_PREL); - unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); - if (GlobalBaseReg == 0) { - GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT)); - AFI->setGlobalBaseReg(GlobalBaseReg); - } + unsigned ConstAlign = + MF->getDataLayout().getPrefTypeAlignment(Type::getInt32PtrTy(*Context)); + unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign); - unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT)); - DestReg2 = constrainOperandRegClass(TII.get(Opc), DestReg2, 0); - DestReg1 = constrainOperandRegClass(TII.get(Opc), DestReg1, 1); - GlobalBaseReg = constrainOperandRegClass(TII.get(Opc), GlobalBaseReg, 2); - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DbgLoc, TII.get(Opc), DestReg2) - .addReg(DestReg1) - .addReg(GlobalBaseReg); - if (!UseGOTOFF) + unsigned TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass); + unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp; + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), TempReg) + .addConstantPoolIndex(Idx); + if (Opc == ARM::LDRcp) MIB.addImm(0); - AddOptionalDefs(MIB); + AddDefaultPred(MIB); - return DestReg2; + // Fix the address by adding pc. + unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR + : ARM::PICADD; + DestReg = constrainOperandRegClass(TII.get(Opc), DestReg, 0); + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) + .addReg(TempReg) + .addImm(ARMPCLabelIndex); + if (!Subtarget->isThumb()) + AddDefaultPred(MIB); + + if (UseGOT_PREL && Subtarget->isThumb()) { + unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(ARM::t2LDRi12), NewDestReg) + .addReg(DestReg) + .addImm(0); + DestReg = NewDestReg; + AddOptionalDefs(MIB); + } + return DestReg; } bool ARMFastISel::fastLowerArguments() { @@ -3038,7 +3036,7 @@ bool ARMFastISel::fastLowerArguments() { } - static const uint16_t GPRArgRegs[] = { + static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; @@ -3055,7 +3053,7 @@ bool ARMFastISel::fastLowerArguments() { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(DstReg, getKillRegState(true)); - updateValueMap(I, ResultReg); + updateValueMap(&*I, ResultReg); } return true; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 6744000afe2b..c5990bb7d1fb 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCContext.h" @@ -58,7 +59,7 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); // iOS requires FP not to be clobbered for backtracing purpose. - if (STI.isTargetIOS()) + if (STI.isTargetIOS() || STI.isTargetWatchOS()) return true; const MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -288,7 +289,6 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, void ARMFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented"); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); @@ -305,7 +305,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector &CSI = MFI->getCalleeSavedInfo(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc dl; + unsigned FramePtr = RegInfo->getFrameRegister(MF); // Determine the sizes of each callee-save spill areas and record which frame @@ -489,7 +493,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, if (NumBytes) { // Adjust SP after all the callee-save spills. - if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) + if (AFI->getNumAlignedDPRCS2Regs() == 0 && + tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes); else { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, @@ -689,60 +694,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, AFI->setShouldRestoreSPFromFP(true); } -// Resolve TCReturn pseudo-instruction -void ARMFrameLowering::fixTCReturn(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); - unsigned RetOpcode = MBBI->getOpcode(); - DebugLoc dl = MBBI->getDebugLoc(); - const ARMBaseInstrInfo &TII = - *static_cast(MF.getSubtarget().getInstrInfo()); - - if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri)) - return; - - // Tail call return: adjust the stack pointer and jump to callee. - MBBI = MBB.getLastNonDebugInstr(); - MachineOperand &JumpTarget = MBBI->getOperand(0); - - // Jump to label or value in register. - if (RetOpcode == ARM::TCRETURNdi) { - unsigned TCOpcode = STI.isThumb() ? - (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) : - ARM::TAILJMPd; - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); - if (JumpTarget.isGlobal()) - MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), - JumpTarget.getTargetFlags()); - else { - assert(JumpTarget.isSymbol()); - MIB.addExternalSymbol(JumpTarget.getSymbolName(), - JumpTarget.getTargetFlags()); - } - - // Add the default predicate in Thumb mode. - if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0); - } else if (RetOpcode == ARM::TCRETURNri) { - BuildMI(MBB, MBBI, dl, - TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). - addReg(JumpTarget.getReg(), RegState::Kill); - } - - MachineInstr *NewMI = std::prev(MBBI); - for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) - NewMI->addOperand(MBBI->getOperand(i)); - - // Delete the pseudo instruction TCRETURN. - MBB.erase(MBBI); - MBBI = NewMI; -} - void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); - DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); @@ -758,10 +711,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. - if (MF.getFunction()->getCallingConv() == CallingConv::GHC) { - fixTCReturn(MF, MBB); + if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; - } + + // First put ourselves on the first (from top) terminator instructions. + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); if (!AFI->hasStackFrame()) { if (NumBytes - ArgRegsSaveSize != 0) @@ -840,8 +795,6 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } - fixTCReturn(MF, MBB); - if (ArgRegsSaveSize) emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize); } @@ -932,12 +885,6 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, return Offset; } -int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - unsigned FrameReg; - return getFrameIndexReference(MF, FI, FrameReg); -} - void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, @@ -950,7 +897,6 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); SmallVector, 4> Regs; unsigned i = CSI.size(); @@ -1008,7 +954,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, // Put any subsequent vpush instructions before this one: they will refer to // higher register numbers so need to be pushed first in order to preserve // monotonicity. - --MI; + if (MI != MBB.begin()) + --MI; } } @@ -1022,12 +969,20 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); ARMFunctionInfo *AFI = MF.getInfo(); - DebugLoc DL = MI->getDebugLoc(); - unsigned RetOpcode = MI->getOpcode(); - bool isTailCall = (RetOpcode == ARM::TCRETURNdi || - RetOpcode == ARM::TCRETURNri); - bool isInterrupt = - RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR; + DebugLoc DL; + bool isTailCall = false; + bool isInterrupt = false; + bool isTrap = false; + if (MBB.end() != MI) { + DL = MI->getDebugLoc(); + unsigned RetOpcode = MI->getOpcode(); + isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri); + isInterrupt = + RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR; + isTrap = + RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl || + RetOpcode == ARM::tTRAP; + } SmallVector Regs; unsigned i = CSI.size(); @@ -1043,11 +998,14 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, continue; if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && - STI.hasV5TOps()) { - Reg = ARM::PC; - LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; + !isTrap && STI.hasV5TOps()) { + if (MBB.succ_empty()) { + Reg = ARM::PC; + DeleteRet = true; + LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; + } else + LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; // Fold the return instruction into the LDM. - DeleteRet = true; } // If NoGap is true, pop consecutive registers and then leave the rest @@ -1068,7 +1026,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, .addReg(ARM::SP)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i], getDefRegState(true)); - if (DeleteRet) { + if (DeleteRet && MI != MBB.end()) { MIB.copyImplicitOps(&*MI); MI->eraseFromParent(); } @@ -1095,7 +1053,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, // Put any subsequent vpop instructions after this one: they will refer to // higher register numbers so need to be popped afterwards. - ++MI; + if (MI != MBB.end()) + ++MI; } } @@ -1109,7 +1068,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) { MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo(); - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineFrameInfo &MFI = *MF.getFrameInfo(); @@ -1118,7 +1077,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, // slot offsets can be wrong. The offset for d8 will always be correct. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned DNum = CSI[i].getReg() - ARM::D8; - if (DNum >= 8) + if (DNum > NumAlignedDPRCS2Regs - 1) continue; int FI = CSI[i].getFrameIdx(); // The even-numbered registers will be 16-byte aligned, the odd-numbered @@ -1269,7 +1228,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) { MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo(); - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); // Find the frame index assigned to d8. @@ -1654,13 +1613,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // FIXME: We could add logic to be more precise about negative offsets // and which instructions will need a scratch register for them. Is it // worth the effort and added fragility? - bool BigStack = - (RS && - (MFI->estimateStackSize(MF) + - ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= - estimateRSStackSizeLimit(MF, this))) - || MFI->hasVarSizedObjects() - || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); + bool BigStack = (RS && (MFI->estimateStackSize(MF) + + ((hasFP(MF) && AFI->hasStackFrame()) ? 4 : 0) >= + estimateRSStackSizeLimit(MF, this))) || + MFI->hasVarSizedObjects() || + (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); bool ExtraCSSpill = false; if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { @@ -1698,8 +1655,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, if (CS1Spilled && !UnspilledCS1GPRs.empty()) { for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { unsigned Reg = UnspilledCS1GPRs[i]; - // Don't spill high register if the function is thumb + // Don't spill high register if the function is thumb. In the case of + // Windows on ARM, accept R11 (frame pointer) if (!AFI->isThumbFunction() || + (STI.isTargetWindows() && Reg == ARM::R11) || isARMLowRegister(Reg) || Reg == ARM::LR) { SavedRegs.set(Reg); if (!MRI.isReserved(Reg)) @@ -1784,8 +1743,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - unsigned Align = getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; + Amount = alignSPAdjust(Amount); ARMFunctionInfo *AFI = MF.getInfo(); assert(!AFI->isThumb1OnlyFunction() && @@ -1885,7 +1843,6 @@ void ARMFrameLowering::adjustForSegmentedStacks( if (!ST->isTargetAndroid() && !ST->isTargetLinux()) report_fatal_error("Segmented stacks not supported on this platform."); - assert(&PrologueMBB == &MF.front() && "Shrink-wrapping not yet implemented"); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); MCContext &Context = MMI.getContext(); @@ -1913,21 +1870,48 @@ void ARMFrameLowering::adjustForSegmentedStacks( MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock(); MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock(); - for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(), - e = PrologueMBB.livein_end(); - i != e; ++i) { - AllocMBB->addLiveIn(*i); - GetMBB->addLiveIn(*i); - McrMBB->addLiveIn(*i); - PrevStackMBB->addLiveIn(*i); - PostStackMBB->addLiveIn(*i); + // Grab everything that reaches PrologueMBB to update there liveness as well. + SmallPtrSet BeforePrologueRegion; + SmallVector WalkList; + WalkList.push_back(&PrologueMBB); + + do { + MachineBasicBlock *CurMBB = WalkList.pop_back_val(); + for (MachineBasicBlock *PredBB : CurMBB->predecessors()) { + if (BeforePrologueRegion.insert(PredBB).second) + WalkList.push_back(PredBB); + } + } while (!WalkList.empty()); + + // The order in that list is important. + // The blocks will all be inserted before PrologueMBB using that order. + // Therefore the block that should appear first in the CFG should appear + // first in the list. + MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB, + PostStackMBB}; + + for (MachineBasicBlock *B : AddedBlocks) + BeforePrologueRegion.insert(B); + + for (const auto &LI : PrologueMBB.liveins()) { + for (MachineBasicBlock *PredBB : BeforePrologueRegion) + PredBB->addLiveIn(LI); } - MF.push_front(PostStackMBB); - MF.push_front(AllocMBB); - MF.push_front(GetMBB); - MF.push_front(McrMBB); - MF.push_front(PrevStackMBB); + // Remove the newly added blocks from the list, since we know + // we do not have to do the following updates for them. + for (MachineBasicBlock *B : AddedBlocks) { + BeforePrologueRegion.erase(B); + MF.insert(PrologueMBB.getIterator(), B); + } + + for (MachineBasicBlock *MBB : BeforePrologueRegion) { + // Make sure the LiveIns are still sorted and unique. + MBB->sortUniqueLiveIns(); + // Replace the edges to PrologueMBB by edges to the sequences + // we are about to add. + MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]); + } // The required stack size that is aligned to ARM constant criterion. AlignedStackSize = alignToARMConstant(StackSize); @@ -1991,7 +1975,7 @@ void ARMFrameLowering::adjustForSegmentedStacks( ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create( MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0); MachineConstantPool *MCP = MF.getConstantPool(); - unsigned CPI = MCP->getConstantPoolIndex(NewCPV, MF.getAlignment()); + unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4); // ldr SR0, [pc, offset(STACK_LIMIT)] AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0) diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 6fdc5eff5e47..66f4dfb6ef52 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -31,8 +31,6 @@ public: void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const; - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, @@ -52,7 +50,6 @@ public: unsigned &FrameReg) const override; int ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const; - int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; @@ -60,6 +57,11 @@ public: void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override; + /// Returns true if the target will correctly handle shrink wrapping. + bool enableShrinkWrapping(const MachineFunction &MF) const override { + return true; + } + private: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, unsigned StmOpc, diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index b110628a0a86..024244092a34 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -160,11 +160,6 @@ public: // Thumb Addressing Modes: bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); - bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset, - unsigned Scale); - bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset); - bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset); - bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset); bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, SDValue &OffImm); bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, @@ -176,8 +171,6 @@ public: bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); // Thumb 2 Addressing Modes: - bool SelectT2ShifterOperandReg(SDValue N, - SDValue &BaseReg, SDValue &Opc); bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); @@ -278,6 +271,22 @@ private: // Get the alignment operand for a NEON VLD or VST instruction. SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs, bool is64BitVector); + + /// Returns the number of instructions required to materialize the given + /// constant in a register, or 3 if a literal pool load is needed. + unsigned ConstantMaterializationCost(unsigned Val) const; + + /// Checks if N is a multiplication by a constant where we can extract out a + /// power of two from the constant so that it can be used in a shift, but only + /// if it simplifies the materialization of the constant. Returns true if it + /// is, and assigns to PowerOfTwo the power of two that should be extracted + /// out and to NewMulConst the new constant to be multiplied by. + bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, + unsigned &PowerOfTwo, SDValue &NewMulConst) const; + + /// Replace N with M in CurDAG, in a way that also ensures that M gets + /// selected when N would have been selected. + void replaceDAGValue(const SDValue &N, SDValue M); }; } @@ -334,7 +343,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() { bool isThumb2 = Subtarget->isThumb(); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { - SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. if (N->getOpcode() != ISD::ADD) continue; @@ -388,7 +397,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() { SDValue CPTmp1; SDValue CPTmp2; if (isThumb2) { - if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1)) + if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) continue; } else { if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || @@ -471,6 +480,61 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); } +unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const { + if (Subtarget->isThumb()) { + if (Val <= 255) return 1; // MOV + if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW + if (~Val <= 255) return 2; // MOV + MVN + if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL + } else { + if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV + if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN + if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW + if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs + } + if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT + return 3; // Literal pool load +} + +bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, + unsigned MaxShift, + unsigned &PowerOfTwo, + SDValue &NewMulConst) const { + assert(N.getOpcode() == ISD::MUL); + assert(MaxShift > 0); + + // If the multiply is used in more than one place then changing the constant + // will make other uses incorrect, so don't. + if (!N.hasOneUse()) return false; + // Check if the multiply is by a constant + ConstantSDNode *MulConst = dyn_cast(N.getOperand(1)); + if (!MulConst) return false; + // If the constant is used in more than one place then modifying it will mean + // we need to materialize two constants instead of one, which is a bad idea. + if (!MulConst->hasOneUse()) return false; + unsigned MulConstVal = MulConst->getZExtValue(); + if (MulConstVal == 0) return false; + + // Find the largest power of 2 that MulConstVal is a multiple of + PowerOfTwo = MaxShift; + while ((MulConstVal % (1 << PowerOfTwo)) != 0) { + --PowerOfTwo; + if (PowerOfTwo == 0) return false; + } + + // Only optimise if the new cost is better + unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); + NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); + unsigned OldCost = ConstantMaterializationCost(MulConstVal); + unsigned NewCost = ConstantMaterializationCost(NewMulConstVal); + return NewCost < OldCost; +} + +void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { + CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); + CurDAG->ReplaceAllUsesWith(N, M); +} + bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, SDValue &BaseReg, SDValue &Opc, @@ -478,6 +542,24 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, if (DisableShifterOp) return false; + // If N is a multiply-by-constant and it's profitable to extract a shift and + // use it in a shifted operand do so. + if (N.getOpcode() == ISD::MUL) { + unsigned PowerOfTwo = 0; + SDValue NewMulConst; + if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { + BaseReg = SDValue(Select(CurDAG->getNode(ISD::MUL, SDLoc(N), MVT::i32, + N.getOperand(0), NewMulConst) + .getNode()), + 0); + replaceDAGValue(N.getOperand(1), NewMulConst); + Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl, + PowerOfTwo), + SDLoc(N), MVT::i32); + return true; + } + } + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); // Don't match base register only case. That is matched to a separate @@ -662,6 +744,18 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, } } + // If Offset is a multiply-by-constant and it's profitable to extract a shift + // and use it in a shifted operand do so. + if (Offset.getOpcode() == ISD::MUL) { + unsigned PowerOfTwo = 0; + SDValue NewMulConst; + if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { + replaceDAGValue(Offset.getOperand(1), NewMulConst); + ShAmt = PowerOfTwo; + ShOpcVal = ARM_AM::lsl; + } + } + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), SDLoc(N), MVT::i32); return true; @@ -1085,78 +1179,14 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, return true; } -bool -ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base, - SDValue &Offset, unsigned Scale) { - if (Scale == 4) { - SDValue TmpBase, TmpOffImm; - if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm)) - return false; // We want to select tLDRspi / tSTRspi instead. - - if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() == ISD::TargetConstantPool) - return false; // We want to select tLDRpci instead. - } - - if (!CurDAG->isBaseWithConstantOffset(N)) - return false; - - // Thumb does not have [sp, r] address mode. - RegisterSDNode *LHSR = dyn_cast(N.getOperand(0)); - RegisterSDNode *RHSR = dyn_cast(N.getOperand(1)); - if ((LHSR && LHSR->getReg() == ARM::SP) || - (RHSR && RHSR->getReg() == ARM::SP)) - return false; - - // FIXME: Why do we explicitly check for a match here and then return false? - // Presumably to allow something else to match, but shouldn't this be - // documented? - int RHSC; - if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) - return false; - - Base = N.getOperand(0); - Offset = N.getOperand(1); - return true; -} - -bool -ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N, - SDValue &Base, - SDValue &Offset) { - return SelectThumbAddrModeRI(N, Base, Offset, 1); -} - -bool -ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N, - SDValue &Base, - SDValue &Offset) { - return SelectThumbAddrModeRI(N, Base, Offset, 2); -} - -bool -ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N, - SDValue &Base, - SDValue &Offset) { - return SelectThumbAddrModeRI(N, Base, Offset, 4); -} - bool ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, SDValue &OffImm) { - if (Scale == 4) { - SDValue TmpBase, TmpOffImm; - if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm)) - return false; // We want to select tLDRspi / tSTRspi instead. - - if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() == ISD::TargetConstantPool) - return false; // We want to select tLDRpci instead. - } - if (!CurDAG->isBaseWithConstantOffset(N)) { - if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { + if (N.getOpcode() == ISD::ADD) { + return false; // We want to select register offset instead + } else if (N.getOpcode() == ARMISD::Wrapper && + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); } else { Base = N; @@ -1166,23 +1196,6 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, return true; } - RegisterSDNode *LHSR = dyn_cast(N.getOperand(0)); - RegisterSDNode *RHSR = dyn_cast(N.getOperand(1)); - if ((LHSR && LHSR->getReg() == ARM::SP) || - (RHSR && RHSR->getReg() == ARM::SP)) { - ConstantSDNode *LHS = dyn_cast(N.getOperand(0)); - ConstantSDNode *RHS = dyn_cast(N.getOperand(1)); - unsigned LHSC = LHS ? LHS->getZExtValue() : 0; - unsigned RHSC = RHS ? RHS->getZExtValue() : 0; - - // Thumb does not have [sp, #imm5] address mode for non-zero imm5. - if (LHSC != 0 || RHSC != 0) return false; - - Base = N; - OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); - return true; - } - // If the RHS is + imm5 * scale, fold into addr mode. int RHSC; if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { @@ -1191,9 +1204,8 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, return true; } - Base = N.getOperand(0); - OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); - return true; + // Offset is too large, so use register offset instead. + return false; } bool @@ -1263,28 +1275,6 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, //===----------------------------------------------------------------------===// -bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg, - SDValue &Opc) { - if (DisableShifterOp) - return false; - - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); - - // Don't match base register only case. That is matched to a separate - // lower complexity pattern with explicit register operand. - if (ShOpcVal == ARM_AM::no_shift) return false; - - BaseReg = N.getOperand(0); - unsigned ShImmVal = 0; - if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { - ShImmVal = RHS->getZExtValue() & 31; - Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), SDLoc(N)); - return true; - } - - return false; -} - bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm) { // Match simple R + imm12 operands. @@ -1425,6 +1415,17 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, } } + // If OffReg is a multiply-by-constant and it's profitable to extract a shift + // and use it in a shifted operand do so. + if (OffReg.getOpcode() == ISD::MUL) { + unsigned PowerOfTwo = 0; + SDValue NewMulConst; + if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { + replaceDAGValue(OffReg.getOperand(1), NewMulConst); + ShAmt = PowerOfTwo; + } + } + ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); return true; @@ -2503,25 +2504,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ISD::Constant: { unsigned Val = cast(N)->getZExtValue(); - bool UseCP = true; - if (Subtarget->useMovt(*MF)) - // Thumb2-aware targets have the MOVT instruction, so all immediates can - // be done with MOV + MOVT, at worst. - UseCP = false; - else { - if (Subtarget->isThumb()) { - UseCP = (Val > 255 && // MOV - ~Val > 255 && // MOV + MVN - !ARM_AM::isThumbImmShiftedVal(Val) && // MOV + LSL - !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW - } else - UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV - ARM_AM::getSOImmVal(~Val) == -1 && // MVN - !ARM_AM::isSOImmTwoPartVal(Val) && // two instrs. - !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW - } - - if (UseCP) { + // If we can't materialize the constant we need to use a literal pool + if (ConstantMaterializationCost(Val) > 2) { SDValue CPIdx = CurDAG->getTargetConstantPool( ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), TLI->getPointerTy(CurDAG->getDataLayout())); @@ -3376,7 +3360,7 @@ static void getIntOperandsFromRegisterString(StringRef RegString, SelectionDAG *CurDAG, SDLoc DL, std::vector& Ops) { SmallVector Fields; - RegString.split(Fields, ":"); + RegString.split(Fields, ':'); if (Fields.size() > 1) { bool AllIntFields = true; @@ -3461,9 +3445,9 @@ static inline int getMClassRegisterSYSmValueMask(StringRef RegString) { // The flags here are common to those allowed for apsr in the A class cores and // those allowed for the special registers in the M class cores. Returns a // value representing which flags were present, -1 if invalid. -static inline int getMClassFlagsMask(StringRef Flags) { +static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) { if (Flags.empty()) - return 0x3; + return 0x2 | (int)hasDSP; return StringSwitch(Flags) .Case("g", 0x1) @@ -3492,7 +3476,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead, } // We know we are now handling a write so need to get the mask for the flags. - int Mask = getMClassFlagsMask(Flags); + int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP()); // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values // shouldn't have flags present. @@ -3501,7 +3485,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead, // The _g and _nzcvqg versions are only valid if the DSP extension is // available. - if (!Subtarget->hasThumb2DSP() && (Mask & 0x2)) + if (!Subtarget->hasDSP() && (Mask & 0x1)) return -1; // The register was valid so need to put the mask in the correct place @@ -3523,7 +3507,7 @@ static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { // The flags permitted for apsr are the same flags that are allowed in // M class registers. We get the flag value and then shift the flags into // the correct place to combine with the mask. - Mask = getMClassFlagsMask(Flags); + Mask = getMClassFlagsMask(Flags, true); if (Mask == -1) return -1; return Mask << 2; @@ -3742,7 +3726,7 @@ SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){ } SmallVector Fields; - StringRef(SpecialReg).split(Fields, "_", 1, false); + StringRef(SpecialReg).split(Fields, '_', 1, false); std::string Reg = Fields[0].str(); StringRef Flags = Fields.size() == 2 ? Fields[1] : ""; @@ -3943,6 +3927,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, // be an immediate and not a memory constraint. // Fallthrough. case InlineAsm::Constraint_m: + case InlineAsm::Constraint_o: case InlineAsm::Constraint_Q: case InlineAsm::Constraint_Um: case InlineAsm::Constraint_Un: diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 8cc06df71633..9cfb06b00c4b 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -142,6 +142,11 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); + + if (!VT.isFloatingPoint() && + VT != MVT::v2i64 && VT != MVT::v1i64) + for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) + setOperationAction(Opcode, VT, Legal); } void ARMTargetLowering::addDRTypeForNEON(MVT VT) { @@ -166,77 +171,78 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // Uses VFP for Thumb libfuncs if available. if (Subtarget->isThumb() && Subtarget->hasVFP2() && Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) { - // Single-precision floating-point arithmetic. - setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); - setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); - setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); - setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); + static const struct { + const RTLIB::Libcall Op; + const char * const Name; + const ISD::CondCode Cond; + } LibraryCalls[] = { + // Single-precision floating-point arithmetic. + { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID }, + { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID }, + { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID }, + { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID }, - // Double-precision floating-point arithmetic. - setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); - setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); - setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); - setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); + // Double-precision floating-point arithmetic. + { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID }, + { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID }, + { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID }, + { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID }, - // Single-precision comparisons. - setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); - setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); - setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); - setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); - setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); - setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); - setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); - setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); + // Single-precision comparisons. + { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE }, + { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE }, + { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE }, + { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE }, + { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE }, + { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE }, + { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE }, + { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ }, - setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); - setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); - setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); - setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); - setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); - setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); - setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); - setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); + // Double-precision comparisons. + { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE }, + { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE }, + { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE }, + { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE }, + { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE }, + { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE }, + { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE }, + { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ }, - // Double-precision comparisons. - setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); - setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); - setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); - setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); - setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); - setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); - setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); - setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); + // Floating-point to integer conversions. + // i64 conversions are done via library routines even when generating VFP + // instructions, so use the same ones. + { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID }, - setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); - setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); - setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); - setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); - setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); - setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); - setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); - setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); + // Conversions between floating types. + { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID }, + { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID }, - // Floating-point to integer conversions. - // i64 conversions are done via library routines even when generating VFP - // instructions, so use the same ones. - setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); - setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); - setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); - setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); + // Integer to floating-point conversions. + // i64 conversions are done via library routines even when generating VFP + // instructions, so use the same ones. + // FIXME: There appears to be some naming inconsistency in ARM libgcc: + // e.g., __floatunsidf vs. __floatunssidfvfp. + { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID }, + }; - // Conversions between floating types. - setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); - setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + if (LC.Cond != ISD::SETCC_INVALID) + setCmpLibcallCC(LC.Op, LC.Cond); + } + } - // Integer to floating-point conversions. - // i64 conversions are done via library routines even when generating VFP - // instructions, so use the same ones. - // FIXME: There appears to be some naming inconsistency in ARM libgcc: - // e.g., __floatunsidf vs. __floatunssidfvfp. - setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); - setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); - setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); - setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); + // Set the correct calling convention for ARMv7k WatchOS. It's just + // AAPCS_VFP for functions as simple as libcalls. + if (Subtarget->isTargetWatchOS()) { + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) + setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP); } } @@ -245,8 +251,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); - if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() && - !Subtarget->isTargetWindows()) { + // RTLIB + if (Subtarget->isAAPCS_ABI() && + (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || + Subtarget->isTargetAndroid())) { static const struct { const RTLIB::Libcall Op; const char * const Name; @@ -334,12 +342,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, - - // Memory operations - // RTABI chapter 4.3.4 - { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, - { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, - { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, }; for (const auto &LC : LibraryCalls) { @@ -348,6 +350,30 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (LC.Cond != ISD::SETCC_INVALID) setCmpLibcallCC(LC.Op, LC.Cond); } + + // EABI dependent RTLIB + if (TM.Options.EABIVersion == EABI::EABI4 || + TM.Options.EABIVersion == EABI::EABI5) { + static const struct { + const RTLIB::Libcall Op; + const char *const Name; + const CallingConv::ID CC; + const ISD::CondCode Cond; + } MemOpsLibraryCalls[] = { + // Memory operations + // RTABI chapter 4.3.4 + { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + }; + + for (const auto &LC : MemOpsLibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + if (LC.Cond != ISD::SETCC_INVALID) + setCmpLibcallCC(LC.Op, LC.Cond); + } + } } if (Subtarget->isTargetWindows()) { @@ -364,6 +390,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SDIV_I32, "__rt_sdiv", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UDIV_I32, "__rt_udiv", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SDIV_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UDIV_I64, "__rt_udiv64", CallingConv::ARM_AAPCS_VFP }, }; for (const auto &LC : LibraryCalls) { @@ -373,8 +403,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } // Use divmod compiler-rt calls for iOS 5.0 and later. - if (Subtarget->getTargetTriple().isiOS() && - !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { + if (Subtarget->isTargetWatchOS() || + (Subtarget->isTargetIOS() && + !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); } @@ -392,6 +423,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); } + // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have + // a __gnu_ prefix (which is the default). + if (Subtarget->isTargetAEABI()) { + setLibcallName(RTLIB::FPROUND_F32_F16, "__aeabi_f2h"); + setLibcallName(RTLIB::FPROUND_F64_F16, "__aeabi_d2h"); + setLibcallName(RTLIB::FPEXT_F16_F32, "__aeabi_h2f"); + } + if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, &ARM::tGPRRegClass); else @@ -579,7 +618,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); - setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::BUILD_VECTOR); setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); @@ -605,7 +643,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::ADDC); if (Subtarget->isFPOnlySP()) { - // When targetting a floating-point unit with only single-precision + // When targeting a floating-point unit with only single-precision // operations, f64 is legal for the few double-precision instructions which // are present However, no double-precision operations other than moves, // loads and stores are provided by the hardware. @@ -689,7 +727,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); } if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() - || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP())) + || (Subtarget->isThumb2() && !Subtarget->hasDSP())) setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); @@ -706,8 +744,15 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SUBE, MVT::i32, Custom); } + if (!Subtarget->isThumb1Only()) + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + // ARM does not have ROTL. - setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Expand); + for (MVT VT : MVT::vector_valuetypes()) { + setOperationAction(ISD::ROTL, VT, Expand); + setOperationAction(ISD::ROTR, VT, Expand); + } setOperationAction(ISD::CTTZ, MVT::i32, Custom); setOperationAction(ISD::CTPOP, MVT::i32, Expand); if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) @@ -717,7 +762,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); - setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); + // @llvm.readcyclecounter requires the Performance Monitors extension. + // Default to the 0 expansion on unsupported platforms. + // FIXME: Technically there are older ARM CPUs that have + // implementation-specific ways of obtaining this information. + if (Subtarget->hasPerfMon()) + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); // Only ARMv6 has BSWAP. if (!Subtarget->hasV6Ops()) @@ -726,15 +776,17 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) && !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) { // These are expanded into libcalls if the cpu doesn't have HW divider. - setOperationAction(ISD::SDIV, MVT::i32, Expand); - setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i32, LibCall); + setOperationAction(ISD::UDIV, MVT::i32, LibCall); } - // FIXME: Also set divmod for SREM on EABI setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); // Register based DivRem for AEABI (RTABI 4.2) - if (Subtarget->isTargetAEABI()) { + if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) { + setOperationAction(ISD::SREM, MVT::i64, Custom); + setOperationAction(ISD::UREM, MVT::i64, Custom); + setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod"); setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod"); setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod"); @@ -762,7 +814,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); - setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); @@ -776,13 +827,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - if (!Subtarget->isTargetMachO()) { - // Non-MachO platforms may return values in these registers via the - // personality function. - setExceptionPointerRegister(ARM::R0); - setExceptionSelectorRegister(ARM::R1); - } - if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); else @@ -849,11 +893,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - if (Subtarget->isTargetDarwin()) { - setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); - setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); + setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); + if (Subtarget->useSjLjEH()) setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); - } setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); @@ -912,7 +956,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); - if (Subtarget->getTargetTriple().isiOS()) { + if (Subtarget->isTargetWatchOS()) { + setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP); + setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP); + } + if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) { // For iOS, we don't want to the normal expansion of a libcall to // sincos. We want to issue a libcall to __sincos_stret. setOperationAction(ISD::FSINCOS, MVT::f64, Custom); @@ -928,6 +976,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FTRUNC, MVT::f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); + if (!Subtarget->isFPOnlySP()) { setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); @@ -935,8 +990,22 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); setOperationAction(ISD::FRINT, MVT::f64, Legal); + setOperationAction(ISD::FMINNUM, MVT::f64, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); } } + + if (Subtarget->hasNEON()) { + // vmin and vmax aren't available in a scalar form, so we use + // a NEON instruction with an undef lane instead. + setOperationAction(ISD::FMINNAN, MVT::f32, Legal); + setOperationAction(ISD::FMAXNAN, MVT::f32, Legal); + setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal); + setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal); + setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal); + setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal); + } + // We have target-specific dag combine patterns for the following nodes: // ARMISD::VMOVRRD - No need to call setTargetDAGCombine setTargetDAGCombine(ISD::ADD); @@ -959,11 +1028,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, //// temporary - rewrite interface to use type MaxStoresPerMemset = 8; - MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemsetOptSize = 4; MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores - MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; + MaxStoresPerMemcpyOptSize = 2; MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores - MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; + MaxStoresPerMemmoveOptSize = 2; // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. @@ -1054,8 +1123,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CMOV: return "ARMISD::CMOV"; - case ARMISD::RBIT: return "ARMISD::RBIT"; - case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; @@ -1069,7 +1136,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; - case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; + case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP"; + case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH"; case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; @@ -1082,6 +1150,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK"; + case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK"; case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; @@ -1133,14 +1202,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::UMLAL: return "ARMISD::UMLAL"; case ARMISD::SMLAL: return "ARMISD::SMLAL"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; - case ARMISD::FMAX: return "ARMISD::FMAX"; - case ARMISD::FMIN: return "ARMISD::FMIN"; - case ARMISD::VMAXNM: return "ARMISD::VMAX"; - case ARMISD::VMINNM: return "ARMISD::VMIN"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; case ARMISD::VBSL: return "ARMISD::VBSL"; + case ARMISD::MEMCPY: return "ARMISD::MEMCPY"; case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; @@ -1449,9 +1515,10 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), StackPtr, PtrOff); - return DAG.getStore(Chain, dl, Arg, PtrOff, - MachinePointerInfo::getStack(LocMemOffset), - false, false, 0); + return DAG.getStore( + Chain, dl, Arg, PtrOff, + MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset), + false, false, 0); } void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, @@ -1734,9 +1801,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Get the address of the callee into a register SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), false, false, - false, 0); + Callee = DAG.getLoad( + PtrVt, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); } else if (ExternalSymbolSDNode *S=dyn_cast(Callee)) { const char *Sym = S->getSymbol(); @@ -1748,9 +1816,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Get the address of the callee into a register SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), false, false, - false, 0); + Callee = DAG.getLoad( + PtrVt, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); } } else if (GlobalAddressSDNode *G = dyn_cast(Callee)) { const GlobalValue *GV = G->getGlobal(); @@ -1768,7 +1837,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ARMISD::WrapperPIC, dl, PtrVt, DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee, - MachinePointerInfo::getGOT(), false, false, true, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + false, false, true, 0); } else if (Subtarget->isTargetCOFF()) { assert(Subtarget->isTargetWindows() && "Windows is the only supported COFF target"); @@ -1781,7 +1851,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), - MachinePointerInfo::getGOT(), false, false, false, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + false, false, false, 0); } else { // On ELF targets for PIC code, direct calls should go through the PLT unsigned OpFlags = 0; @@ -1804,9 +1875,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ARMPCLabelIndex, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), false, false, - false, 0); + Callee = DAG.getLoad( + PtrVt, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel); } else { @@ -1821,7 +1893,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // FIXME: handle tail calls differently. unsigned CallOpc; - bool HasMinSizeAttr = MF.getFunction()->hasFnAttribute(Attribute::MinSize); if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; @@ -1831,8 +1902,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (!isDirect && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; else if (doesNotRet && isDirect && Subtarget->hasRAS() && - // Emit regular call when code size is the priority - !HasMinSizeAttr) + // Emit regular call when code size is the priority + !MF.getFunction()->optForMinSize()) // "mov lr, pc; b _foo" to avoid confusing the RSP CallOpc = ARMISD::CALL_NOLINK; else @@ -2014,6 +2085,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CallerCC = CallerF->getCallingConv(); bool CCMatch = CallerCC == CalleeCC; + assert(Subtarget->supportsTailCall()); + // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. @@ -2033,26 +2106,6 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (isCalleeStructRet || isCallerStructRet) return false; - // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo:: - // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as - // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation - // support in the assembler and linker to be used. This would need to be - // fixed to fully support tail calls in Thumb1. - // - // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take - // LR. This means if we need to reload LR, it takes an extra instructions, - // which outweighs the value of the tail call; but here we don't know yet - // whether LR is going to be used. Probably the right approach is to - // generate the tail call here and turn it back into CALL/RET in - // emitEpilogue if LR is used. - - // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, - // but we need to make sure there are enough registers; the only valid - // registers are the 4 used for parameters. We don't currently do this - // case. - if (Subtarget->isThumb1Only()) - return false; - // Externally-defined functions with weak linkage should not be // tail-called on ARM when the OS does not support dynamic // pre-emption of symbols, as the AAELF spec requires normal calls @@ -2400,7 +2453,7 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { if (!CI->isTailCall() || Attr.getValueAsString() == "true") return false; - return !Subtarget->isThumb1Only(); + return true; } // Trying to write a 64 bit value so need to split into two 32 bit values first, @@ -2467,9 +2520,10 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); } CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); - SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); + SDValue Result = + DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, 0); if (RelocM == Reloc::Static) return Result; SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32); @@ -2491,9 +2545,10 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); - Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); + Argument = + DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, 0); SDValue Chain = Argument.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); @@ -2543,17 +2598,19 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, true); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); - Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); + Offset = DAG.getLoad( + PtrVT, dl, Chain, Offset, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); Chain = Offset.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); - Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); + Offset = DAG.getLoad( + PtrVT, dl, Chain, Offset, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); } else { // local exec model assert(model == TLSModel::LocalExec); @@ -2561,9 +2618,10 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); - Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); + Offset = DAG.getLoad( + PtrVT, dl, Chain, Offset, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); } // The address of the thread local variable is the add of the thread @@ -2577,6 +2635,8 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetELF() && "TLS not implemented for non-ELF targets"); GlobalAddressSDNode *GA = cast(Op); + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); @@ -2597,22 +2657,31 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDLoc dl(Op); const GlobalValue *GV = cast(Op)->getGlobal(); if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { - bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); - ARMConstantPoolValue *CPV = - ARMConstantPoolConstant::Create(GV, - UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); + bool UseGOT_PREL = + !(GV->hasHiddenVisibility() || GV->hasLocalLinkage()); + + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDLoc dl(Op); + unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; + ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create( + GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, + UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier, + /*AddCurrentAddress=*/UseGOT_PREL); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), - CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); + SDValue Result = DAG.getLoad( + PtrVT, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); SDValue Chain = Result.getValue(1); - SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); - Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); - if (!UseGOTOFF) + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); + Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); + if (UseGOT_PREL) Result = DAG.getLoad(PtrVT, dl, Chain, Result, - MachinePointerInfo::getGOT(), + MachinePointerInfo::getGOT(DAG.getMachineFunction()), false, false, false, 0); return Result; } @@ -2628,9 +2697,10 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, } else { SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); + return DAG.getLoad( + PtrVT, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); } } @@ -2654,7 +2724,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(), false, false, false, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + false, false, false, 0); return Result; } @@ -2680,32 +2751,11 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, TargetFlags)); if (GV->hasDLLImportStorageClass()) Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(), false, false, false, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + false, false, false, 0); return Result; } -SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, - SelectionDAG &DAG) const { - assert(Subtarget->isTargetELF() && - "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); - MachineFunction &MF = DAG.getMachineFunction(); - ARMFunctionInfo *AFI = MF.getInfo(); - unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc dl(Op); - unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; - ARMConstantPoolValue *CPV = - ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", - ARMPCLabelIndex, PCAdj); - SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); - CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); - return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); -} - SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -2722,6 +2772,13 @@ ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32)); } +SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other, + Op.getOperand(0)); +} + SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { @@ -2732,7 +2789,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, case Intrinsic::arm_rbit: { assert(Op.getOperand(1).getValueType() == MVT::i32 && "RBIT intrinsic must have i32 type!"); - return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1)); + return DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Op.getOperand(1)); } case Intrinsic::arm_thread_pointer: { EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -2752,10 +2809,10 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, ARMCP::CPLSDA, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - SDValue Result = - DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); + SDValue Result = DAG.getLoad( + PtrVT, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); if (RelocM == Reloc::PIC_) { SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); @@ -2770,6 +2827,36 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } + case Intrinsic::arm_neon_vminnm: + case Intrinsic::arm_neon_vmaxnm: { + unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm) + ? ISD::FMINNUM : ISD::FMAXNUM; + return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } + case Intrinsic::arm_neon_vminu: + case Intrinsic::arm_neon_vmaxu: { + if (Op.getValueType().isFloatingPoint()) + return SDValue(); + unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu) + ? ISD::UMIN : ISD::UMAX; + return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } + case Intrinsic::arm_neon_vmins: + case Intrinsic::arm_neon_vmaxs: { + // v{min,max}s is overloaded between signed integers and floats. + if (!Op.getValueType().isFloatingPoint()) { + unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) + ? ISD::SMIN : ISD::SMAX; + return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } + unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) + ? ISD::FMINNAN : ISD::FMAXNAN; + return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } } } @@ -2870,9 +2957,10 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); - ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, false, 0); + ArgValue2 = DAG.getLoad( + MVT::i32, dl, Root, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false, + false, false, 0); } else { Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); @@ -3056,9 +3144,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, if (VA.isMemLoc()) { int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, false, 0); + ArgValue2 = DAG.getLoad( + MVT::f64, dl, Chain, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), + false, false, false, 0); } else { ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); @@ -3139,9 +3228,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, "Byval arguments cannot be implicit"); unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); - int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg, - CurByValIndex, VA.getLocMemOffset(), - Flags.getByValSize()); + int FrameIndex = StoreByValRegs( + CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex, + VA.getLocMemOffset(), Flags.getByValSize()); InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT)); CCInfo.nextInRegsParam(); } else { @@ -3151,9 +3240,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, false, 0)); + InVals.push_back(DAG.getLoad( + VA.getValVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), + false, false, false, 0)); } lastInsIndex = index; } @@ -3188,13 +3278,9 @@ static bool isFloatingPointZero(SDValue Op) { // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64) // created by LowerConstantFP(). SDValue BitcastOp = Op->getOperand(0); - if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) { - SDValue MoveOp = BitcastOp->getOperand(0); - if (MoveOp->getOpcode() == ISD::TargetConstant && - cast(MoveOp)->getZExtValue() == 0) { - return true; - } - } + if (BitcastOp->getOpcode() == ARMISD::VMOVIMM && + isNullConstant(BitcastOp->getOperand(0))) + return true; } return false; } @@ -3559,113 +3645,6 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // Try to generate VMAXNM/VMINNM on ARMv8. if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { - // We can use VMAXNM/VMINNM for a compare followed by a select with the - // same operands, as follows: - // c = fcmp [?gt, ?ge, ?lt, ?le] a, b - // select c, a, b - // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'. - bool swapSides = false; - if (!getTargetMachine().Options.NoNaNsFPMath) { - // transformability may depend on which way around we compare - switch (CC) { - default: - break; - case ISD::SETOGT: - case ISD::SETOGE: - case ISD::SETOLT: - case ISD::SETOLE: - // the non-NaN should be RHS - swapSides = DAG.isKnownNeverNaN(LHS) && !DAG.isKnownNeverNaN(RHS); - break; - case ISD::SETUGT: - case ISD::SETUGE: - case ISD::SETULT: - case ISD::SETULE: - // the non-NaN should be LHS - swapSides = DAG.isKnownNeverNaN(RHS) && !DAG.isKnownNeverNaN(LHS); - break; - } - } - swapSides = swapSides || (LHS == FalseVal && RHS == TrueVal); - if (swapSides) { - CC = ISD::getSetCCSwappedOperands(CC); - std::swap(LHS, RHS); - } - if (LHS == TrueVal && RHS == FalseVal) { - bool canTransform = true; - // FIXME: FastMathFlags::noSignedZeros() doesn't appear reachable from here - if (!getTargetMachine().Options.UnsafeFPMath && - !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) { - const ConstantFPSDNode *Zero; - switch (CC) { - default: - break; - case ISD::SETOGT: - case ISD::SETUGT: - case ISD::SETGT: - // RHS must not be -0 - canTransform = (Zero = dyn_cast(RHS)) && - !Zero->isNegative(); - break; - case ISD::SETOGE: - case ISD::SETUGE: - case ISD::SETGE: - // LHS must not be -0 - canTransform = (Zero = dyn_cast(LHS)) && - !Zero->isNegative(); - break; - case ISD::SETOLT: - case ISD::SETULT: - case ISD::SETLT: - // RHS must not be +0 - canTransform = (Zero = dyn_cast(RHS)) && - Zero->isNegative(); - break; - case ISD::SETOLE: - case ISD::SETULE: - case ISD::SETLE: - // LHS must not be +0 - canTransform = (Zero = dyn_cast(LHS)) && - Zero->isNegative(); - break; - } - } - if (canTransform) { - // Note: If one of the elements in a pair is a number and the other - // element is NaN, the corresponding result element is the number. - // This is consistent with the IEEE 754-2008 standard. - // Therefore, a > b ? a : b <=> vmax(a,b), if b is constant and a is NaN - switch (CC) { - default: - break; - case ISD::SETOGT: - case ISD::SETOGE: - if (!DAG.isKnownNeverNaN(RHS)) - break; - return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS); - case ISD::SETUGT: - case ISD::SETUGE: - if (!DAG.isKnownNeverNaN(LHS)) - break; - case ISD::SETGT: - case ISD::SETGE: - return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS); - case ISD::SETOLT: - case ISD::SETOLE: - if (!DAG.isKnownNeverNaN(RHS)) - break; - return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS); - case ISD::SETULT: - case ISD::SETULE: - if (!DAG.isKnownNeverNaN(LHS)) - break; - case ISD::SETLT: - case ISD::SETLE: - return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS); - } - } - } - bool swpCmpOps = false; bool swpVselOps = false; checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); @@ -3890,16 +3869,18 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { Addr, Op.getOperand(2), JTI); } if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { - Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, - MachinePointerInfo::getJumpTable(), - false, false, false, 0); + Addr = + DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, + MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), + false, false, false, 0); Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); } else { - Addr = DAG.getLoad(PTy, dl, Chain, Addr, - MachinePointerInfo::getJumpTable(), - false, false, false, 0); + Addr = + DAG.getLoad(PTy, dl, Chain, Addr, + MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), + false, false, false, 0); Chain = Addr.getValue(1); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); } @@ -3936,7 +3917,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { else LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); - return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1, + return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0), /*isSigned*/ false, SDLoc(Op)).first; } @@ -3988,7 +3969,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { else LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); - return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1, + return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0), /*isSigned*/ false, SDLoc(Op)).first; } @@ -4153,6 +4134,56 @@ static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl &Results, Results.push_back(Read.getOperand(0)); } +/// \p BC is a bitcast that is about to be turned into a VMOVDRR. +/// When \p DstVT, the destination type of \p BC, is on the vector +/// register bank and the source of bitcast, \p Op, operates on the same bank, +/// it might be possible to combine them, such that everything stays on the +/// vector register bank. +/// \p return The node that would replace \p BT, if the combine +/// is possible. +static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, + SelectionDAG &DAG) { + SDValue Op = BC->getOperand(0); + EVT DstVT = BC->getValueType(0); + + // The only vector instruction that can produce a scalar (remember, + // since the bitcast was about to be turned into VMOVDRR, the source + // type is i64) from a vector is EXTRACT_VECTOR_ELT. + // Moreover, we can do this combine only if there is one use. + // Finally, if the destination type is not a vector, there is not + // much point on forcing everything on the vector bank. + if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !Op.hasOneUse()) + return SDValue(); + + // If the index is not constant, we will introduce an additional + // multiply that will stick. + // Give up in that case. + ConstantSDNode *Index = dyn_cast(Op.getOperand(1)); + if (!Index) + return SDValue(); + unsigned DstNumElt = DstVT.getVectorNumElements(); + + // Compute the new index. + const APInt &APIntIndex = Index->getAPIntValue(); + APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt); + NewIndex *= APIntIndex; + // Check if the new constant index fits into i32. + if (NewIndex.getBitWidth() > 32) + return SDValue(); + + // vMTy bitcast(i64 extractelt vNi64 src, i32 index) -> + // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M) + SDLoc dl(Op); + SDValue ExtractSrc = Op.getOperand(0); + EVT VecVT = EVT::getVectorVT( + *DAG.getContext(), DstVT.getScalarType(), + ExtractSrc.getValueType().getVectorNumElements() * DstNumElt); + SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast, + DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32)); +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -4172,6 +4203,11 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { // Turn i64->f64 into VMOVDRR. if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { + // Do not force values to GPRs (this is what VMOVDRR does for the inputs) + // if we can combine the bitcast with its source. + if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG)) + return Val; + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(0, dl, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, @@ -4383,7 +4419,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, if (!ST->hasV6T2Ops()) return SDValue(); - SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); + SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0)); return DAG.getNode(ISD::CTLZ, dl, VT, rbit); } @@ -4544,8 +4580,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, "Unknown shift to lower!"); // We only lower SRA, SRL of 1 here, all others use generic lowering. - if (!isa(N->getOperand(1)) || - cast(N->getOperand(1))->getZExtValue() != 1) + if (!isOneConstant(N->getOperand(1))) return SDValue(); // If we are in thumb mode, we don't have RRX. @@ -5036,18 +5071,56 @@ static bool isVTBLMask(ArrayRef M, EVT VT) { return VT == MVT::v8i8 && M.size() == 8; } +// Checks whether the shuffle mask represents a vector transpose (VTRN) by +// checking that pairs of elements in the shuffle mask represent the same index +// in each vector, incrementing the expected index by 2 at each step. +// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6] +// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g} +// v2={e,f,g,h} +// WhichResult gives the offset for each element in the mask based on which +// of the two results it belongs to. +// +// The transpose can be represented either as: +// result1 = shufflevector v1, v2, result1_shuffle_mask +// result2 = shufflevector v1, v2, result2_shuffle_mask +// where v1/v2 and the shuffle masks have the same number of elements +// (here WhichResult (see below) indicates which result is being checked) +// +// or as: +// results = shufflevector v1, v2, shuffle_mask +// where both results are returned in one vector and the shuffle mask has twice +// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we +// want to check the low half and high half of the shuffle mask as if it were +// the other case static bool isVTRNMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); - for (unsigned i = 0; i < NumElts; i += 2) { - if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || - (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) - return false; + if (M.size() != NumElts && M.size() != NumElts*2) + return false; + + // If the mask is twice as long as the input vector then we need to check the + // upper and lower parts of the mask with a matching value for WhichResult + // FIXME: A mask with only even values will be rejected in case the first + // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only + // M[0] is used to determine WhichResult + for (unsigned i = 0; i < M.size(); i += NumElts) { + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; + for (unsigned j = 0; j < NumElts; j += 2) { + if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) || + (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult)) + return false; + } } + + if (M.size() == NumElts*2) + WhichResult = 0; + return true; } @@ -5060,28 +5133,55 @@ static bool isVTRN_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ return false; unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); - for (unsigned i = 0; i < NumElts; i += 2) { - if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || - (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) - return false; + if (M.size() != NumElts && M.size() != NumElts*2) + return false; + + for (unsigned i = 0; i < M.size(); i += NumElts) { + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; + for (unsigned j = 0; j < NumElts; j += 2) { + if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) || + (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult)) + return false; + } } + + if (M.size() == NumElts*2) + WhichResult = 0; + return true; } +// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking +// that the mask elements are either all even and in steps of size 2 or all odd +// and in steps of size 2. +// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6] +// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g} +// v2={e,f,g,h} +// Requires similar checks to that of isVTRNMask with +// respect the how results are returned. static bool isVUZPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); - for (unsigned i = 0; i != NumElts; ++i) { - if (M[i] < 0) continue; // ignore UNDEF indices - if ((unsigned) M[i] != 2 * i + WhichResult) - return false; + if (M.size() != NumElts && M.size() != NumElts*2) + return false; + + for (unsigned i = 0; i < M.size(); i += NumElts) { + WhichResult = M[i] == 0 ? 0 : 1; + for (unsigned j = 0; j < NumElts; ++j) { + if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult) + return false; + } } + if (M.size() == NumElts*2) + WhichResult = 0; + // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) return false; @@ -5097,18 +5197,27 @@ static bool isVUZP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ if (EltSz == 64) return false; - unsigned Half = VT.getVectorNumElements() / 2; - WhichResult = (M[0] == 0 ? 0 : 1); - for (unsigned j = 0; j != 2; ++j) { - unsigned Idx = WhichResult; - for (unsigned i = 0; i != Half; ++i) { - int MIdx = M[i + j * Half]; - if (MIdx >= 0 && (unsigned) MIdx != Idx) - return false; - Idx += 2; + unsigned NumElts = VT.getVectorNumElements(); + if (M.size() != NumElts && M.size() != NumElts*2) + return false; + + unsigned Half = NumElts / 2; + for (unsigned i = 0; i < M.size(); i += NumElts) { + WhichResult = M[i] == 0 ? 0 : 1; + for (unsigned j = 0; j < NumElts; j += Half) { + unsigned Idx = WhichResult; + for (unsigned k = 0; k < Half; ++k) { + int MIdx = M[i + j + k]; + if (MIdx >= 0 && (unsigned) MIdx != Idx) + return false; + Idx += 2; + } } } + if (M.size() == NumElts*2) + WhichResult = 0; + // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) return false; @@ -5116,21 +5225,37 @@ static bool isVUZP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ return true; } +// Checks whether the shuffle mask represents a vector zip (VZIP) by checking +// that pairs of elements of the shufflemask represent the same index in each +// vector incrementing sequentially through the vectors. +// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5] +// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f} +// v2={e,f,g,h} +// Requires similar checks to that of isVTRNMask with respect the how results +// are returned. static bool isVZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); - unsigned Idx = WhichResult * NumElts / 2; - for (unsigned i = 0; i != NumElts; i += 2) { - if ((M[i] >= 0 && (unsigned) M[i] != Idx) || - (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) - return false; - Idx += 1; + if (M.size() != NumElts && M.size() != NumElts*2) + return false; + + for (unsigned i = 0; i < M.size(); i += NumElts) { + WhichResult = M[i] == 0 ? 0 : 1; + unsigned Idx = WhichResult * NumElts / 2; + for (unsigned j = 0; j < NumElts; j += 2) { + if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || + (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts)) + return false; + Idx += 1; + } } + if (M.size() == NumElts*2) + WhichResult = 0; + // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) return false; @@ -5147,15 +5272,23 @@ static bool isVZIP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ return false; unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); - unsigned Idx = WhichResult * NumElts / 2; - for (unsigned i = 0; i != NumElts; i += 2) { - if ((M[i] >= 0 && (unsigned) M[i] != Idx) || - (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) - return false; - Idx += 1; + if (M.size() != NumElts && M.size() != NumElts*2) + return false; + + for (unsigned i = 0; i < M.size(); i += NumElts) { + WhichResult = M[i] == 0 ? 0 : 1; + unsigned Idx = WhichResult * NumElts / 2; + for (unsigned j = 0; j < NumElts; j += 2) { + if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || + (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx)) + return false; + Idx += 1; + } } + if (M.size() == NumElts*2) + WhichResult = 0; + // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) return false; @@ -5329,16 +5462,14 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // just use VDUPLANE. We can only do this if the lane being extracted // is at a constant index, as the VDUP from lane instructions only have // constant-index forms. + ConstantSDNode *constIndex; if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && - isa(Value->getOperand(1))) { + (constIndex = dyn_cast(Value->getOperand(1)))) { // We need to create a new undef vector to use for the VDUPLANE if the // size of the vector from which we get the value is different than the // size of the vector that we need to create. We will insert the element // such that the register coalescer will remove unnecessary copies. if (VT != Value->getOperand(0).getValueType()) { - ConstantSDNode *constIndex; - constIndex = dyn_cast(Value->getOperand(1)); - assert(constIndex && "The index is not a constant!"); unsigned index = constIndex->getAPIntValue().getLimitedValue() % VT.getVectorNumElements(); N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, @@ -5437,14 +5568,35 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // shuffle in combination with VEXTs. SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"); SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); - SmallVector SourceVecs; - SmallVector MinElts; - SmallVector MaxElts; + struct ShuffleSourceInfo { + SDValue Vec; + unsigned MinElt; + unsigned MaxElt; + // We may insert some combination of BITCASTs and VEXT nodes to force Vec to + // be compatible with the shuffle we intend to construct. As a result + // ShuffleVec will be some sliding window into the original Vec. + SDValue ShuffleVec; + + // Code should guarantee that element i in Vec starts at element "WindowBase + // + i * WindowScale in ShuffleVec". + int WindowBase; + int WindowScale; + + bool operator ==(SDValue OtherVec) { return Vec == OtherVec; } + ShuffleSourceInfo(SDValue Vec) + : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0), + WindowScale(1) {} + }; + + // First gather all vectors used as an immediate source for this BUILD_VECTOR + // node. + SmallVector Sources; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.getOpcode() == ISD::UNDEF) @@ -5453,127 +5605,166 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // A shuffle can only come from building a vector from various // elements of other vectors. return SDValue(); - } else if (V.getOperand(0).getValueType().getVectorElementType() != - VT.getVectorElementType()) { - // This code doesn't know how to handle shuffles where the vector - // element types do not match (this happens because type legalization - // promotes the return type of EXTRACT_VECTOR_ELT). - // FIXME: It might be appropriate to extend this code to handle - // mismatched types. + } else if (!isa(V.getOperand(1))) { + // Furthermore, shuffles require a constant mask, whereas extractelts + // accept variable indices. return SDValue(); } - // Record this extraction against the appropriate vector if possible... + // Add this element source to the list if it's not already there. SDValue SourceVec = V.getOperand(0); - // If the element number isn't a constant, we can't effectively - // analyze what's going on. - if (!isa(V.getOperand(1))) - return SDValue(); - unsigned EltNo = cast(V.getOperand(1))->getZExtValue(); - bool FoundSource = false; - for (unsigned j = 0; j < SourceVecs.size(); ++j) { - if (SourceVecs[j] == SourceVec) { - if (MinElts[j] > EltNo) - MinElts[j] = EltNo; - if (MaxElts[j] < EltNo) - MaxElts[j] = EltNo; - FoundSource = true; - break; - } - } + auto Source = std::find(Sources.begin(), Sources.end(), SourceVec); + if (Source == Sources.end()) + Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec)); - // Or record a new source if not... - if (!FoundSource) { - SourceVecs.push_back(SourceVec); - MinElts.push_back(EltNo); - MaxElts.push_back(EltNo); - } + // Update the minimum and maximum lane number seen. + unsigned EltNo = cast(V.getOperand(1))->getZExtValue(); + Source->MinElt = std::min(Source->MinElt, EltNo); + Source->MaxElt = std::max(Source->MaxElt, EltNo); } // Currently only do something sane when at most two source vectors - // involved. - if (SourceVecs.size() > 2) + // are involved. + if (Sources.size() > 2) return SDValue(); - SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; - int VEXTOffsets[2] = {0, 0}; + // Find out the smallest element size among result and two sources, and use + // it as element size to build the shuffle_vector. + EVT SmallestEltTy = VT.getVectorElementType(); + for (auto &Source : Sources) { + EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType(); + if (SrcEltTy.bitsLT(SmallestEltTy)) + SmallestEltTy = SrcEltTy; + } + unsigned ResMultiplier = + VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits(); + NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits(); + EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts); - // This loop extracts the usage patterns of the source vectors - // and prepares appropriate SDValues for a shuffle if possible. - for (unsigned i = 0; i < SourceVecs.size(); ++i) { - if (SourceVecs[i].getValueType() == VT) { - // No VEXT necessary - ShuffleSrcs[i] = SourceVecs[i]; - VEXTOffsets[i] = 0; + // If the source vector is too wide or too narrow, we may nevertheless be able + // to construct a compatible shuffle either by concatenating it with UNDEF or + // extracting a suitable range of elements. + for (auto &Src : Sources) { + EVT SrcVT = Src.ShuffleVec.getValueType(); + + if (SrcVT.getSizeInBits() == VT.getSizeInBits()) + continue; + + // This stage of the search produces a source with the same element type as + // the original, but with a total width matching the BUILD_VECTOR output. + EVT EltVT = SrcVT.getVectorElementType(); + unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits(); + EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts); + + if (SrcVT.getSizeInBits() < VT.getSizeInBits()) { + if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits()) + return SDValue(); + // We can pad out the smaller vector for free, so if it's part of a + // shuffle... + Src.ShuffleVec = + DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec, + DAG.getUNDEF(Src.ShuffleVec.getValueType())); continue; - } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { - // It probably isn't worth padding out a smaller vector just to - // break it down again in a shuffle. - return SDValue(); } - // Since only 64-bit and 128-bit vectors are legal on ARM and - // we've eliminated the other cases... - assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && - "unexpected vector sizes in ReconstructShuffle"); + if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits()) + return SDValue(); - if (MaxElts[i] - MinElts[i] >= NumElts) { + if (Src.MaxElt - Src.MinElt >= NumSrcElts) { // Span too large for a VEXT to cope return SDValue(); } - if (MinElts[i] >= NumElts) { + if (Src.MinElt >= NumSrcElts) { // The extraction can just take the second half - VEXTOffsets[i] = NumElts; - ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - SourceVecs[i], - DAG.getIntPtrConstant(NumElts, dl)); - } else if (MaxElts[i] < NumElts) { + Src.ShuffleVec = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, + DAG.getConstant(NumSrcElts, dl, MVT::i32)); + Src.WindowBase = -NumSrcElts; + } else if (Src.MaxElt < NumSrcElts) { // The extraction can just take the first half - VEXTOffsets[i] = 0; - ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - SourceVecs[i], - DAG.getIntPtrConstant(0, dl)); + Src.ShuffleVec = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, + DAG.getConstant(0, dl, MVT::i32)); } else { // An actual VEXT is needed - VEXTOffsets[i] = MinElts[i]; - SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - SourceVecs[i], - DAG.getIntPtrConstant(0, dl)); - SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - SourceVecs[i], - DAG.getIntPtrConstant(NumElts, dl)); - ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2, - DAG.getConstant(VEXTOffsets[i], dl, - MVT::i32)); + SDValue VEXTSrc1 = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, + DAG.getConstant(0, dl, MVT::i32)); + SDValue VEXTSrc2 = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, + DAG.getConstant(NumSrcElts, dl, MVT::i32)); + + Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1, + VEXTSrc2, + DAG.getConstant(Src.MinElt, dl, MVT::i32)); + Src.WindowBase = -Src.MinElt; } } - SmallVector Mask; - - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Entry = Op.getOperand(i); - if (Entry.getOpcode() == ISD::UNDEF) { - Mask.push_back(-1); + // Another possible incompatibility occurs from the vector element types. We + // can fix this by bitcasting the source vectors to the same type we intend + // for the shuffle. + for (auto &Src : Sources) { + EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType(); + if (SrcEltTy == SmallestEltTy) continue; - } + assert(ShuffleVT.getVectorElementType() == SmallestEltTy); + Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec); + Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits(); + Src.WindowBase *= Src.WindowScale; + } - SDValue ExtractVec = Entry.getOperand(0); - int ExtractElt = cast(Op.getOperand(i) - .getOperand(1))->getSExtValue(); - if (ExtractVec == SourceVecs[0]) { - Mask.push_back(ExtractElt - VEXTOffsets[0]); - } else { - Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); - } + // Final sanity check before we try to actually produce a shuffle. + DEBUG( + for (auto Src : Sources) + assert(Src.ShuffleVec.getValueType() == ShuffleVT); + ); + + // The stars all align, our next step is to produce the mask for the shuffle. + SmallVector Mask(ShuffleVT.getVectorNumElements(), -1); + int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits(); + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { + SDValue Entry = Op.getOperand(i); + if (Entry.getOpcode() == ISD::UNDEF) + continue; + + auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0)); + int EltNo = cast(Entry.getOperand(1))->getSExtValue(); + + // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit + // trunc. So only std::min(SrcBits, DestBits) actually get defined in this + // segment. + EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType(); + int BitsDefined = std::min(OrigEltTy.getSizeInBits(), + VT.getVectorElementType().getSizeInBits()); + int LanesDefined = BitsDefined / BitsPerShuffleLane; + + // This source is expected to fill ResMultiplier lanes of the final shuffle, + // starting at the appropriate offset. + int *LaneMask = &Mask[i * ResMultiplier]; + + int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase; + ExtractBase += NumElts * (Src - Sources.begin()); + for (int j = 0; j < LanesDefined; ++j) + LaneMask[j] = ExtractBase + j; } // Final check before we try to produce nonsense... - if (isShuffleMaskLegal(Mask, VT)) - return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], - &Mask[0]); + if (!isShuffleMaskLegal(Mask, ShuffleVT)) + return SDValue(); - return SDValue(); + // We can't handle more than two sources. This should have already + // been checked before this point. + assert(Sources.size() <= 2 && "Too many sources!"); + + SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) }; + for (unsigned i = 0; i < Sources.size(); ++i) + ShuffleOps[i] = Sources[i].ShuffleVec; + + SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], + ShuffleOps[1], &Mask[0]); + return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); } /// isShuffleMaskLegal - Targets can use this to indicate that they only @@ -6235,6 +6426,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { + // TODO: Should this propagate fast-math-flags? + // Convert to float // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); @@ -6265,6 +6458,8 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) { + // TODO: Should this propagate fast-math-flags? + SDValue N2; // Convert to float. // float4 yf = vcvt_f32_s32(vmovl_s16(y)); @@ -6337,6 +6532,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { + // TODO: Should this propagate fast-math-flags? EVT VT = Op.getValueType(); assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"); @@ -6445,45 +6641,56 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { auto PtrVT = getPointerTy(DAG.getDataLayout()); MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Pair of floats / doubles used to pass the result. - StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr); - - // Create stack object for sret. + Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr); auto &DL = DAG.getDataLayout(); - const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); - const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy); - int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); - SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL)); ArgListTy Args; + bool ShouldUseSRet = Subtarget->isAPCS_ABI(); + SDValue SRet; + if (ShouldUseSRet) { + // Create stack object for sret. + const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); + const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy); + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); + SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL)); + + ArgListEntry Entry; + Entry.Node = SRet; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isSRet = true; + Args.push_back(Entry); + RetTy = Type::getVoidTy(*DAG.getContext()); + } + ArgListEntry Entry; - - Entry.Node = SRet; - Entry.Ty = RetTy->getPointerTo(); - Entry.isSExt = false; - Entry.isZExt = false; - Entry.isSRet = true; - Args.push_back(Entry); - Entry.Node = Arg; Entry.Ty = ArgTy; Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); - const char *LibcallName = (ArgVT == MVT::f64) - ? "__sincos_stret" : "__sincosf_stret"; + const char *LibcallName = + (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; + RTLIB::Libcall LC = + (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32; + CallingConv::ID CC = getLibcallCallingConv(LC); SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL)); TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee, - std::move(Args), 0) - .setDiscardResult(); - + CLI.setDebugLoc(dl) + .setChain(DAG.getEntryNode()) + .setCallee(CC, RetTy, Callee, std::move(Args), 0) + .setDiscardResult(ShouldUseSRet); std::pair CallResult = LowerCallTo(CLI); + if (!ShouldUseSRet) + return CallResult.first; + SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo(), false, false, false, 0); @@ -6498,6 +6705,85 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { LoadSin.getValue(0), LoadCos.getValue(0)); } +SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, + bool Signed, + SDValue &Chain) const { + EVT VT = Op.getValueType(); + assert((VT == MVT::i32 || VT == MVT::i64) && + "unexpected type for custom lowering DIV"); + SDLoc dl(Op); + + const auto &DL = DAG.getDataLayout(); + const auto &TLI = DAG.getTargetLoweringInfo(); + + const char *Name = nullptr; + if (Signed) + Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64"; + else + Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64"; + + SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL)); + + ARMTargetLowering::ArgListTy Args; + + for (auto AI : {1, 0}) { + ArgListEntry Arg; + Arg.Node = Op.getOperand(AI); + Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Args.push_back(Arg); + } + + CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()), + ES, std::move(Args), 0); + + return LowerCallTo(CLI).first; +} + +SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, + bool Signed) const { + assert(Op.getValueType() == MVT::i32 && + "unexpected type for custom lowering DIV"); + SDLoc dl(Op); + + SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other, + DAG.getEntryNode(), Op.getOperand(1)); + + return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK); +} + +void ARMTargetLowering::ExpandDIV_Windows( + SDValue Op, SelectionDAG &DAG, bool Signed, + SmallVectorImpl &Results) const { + const auto &DL = DAG.getDataLayout(); + const auto &TLI = DAG.getTargetLoweringInfo(); + + assert(Op.getValueType() == MVT::i64 && + "unexpected type for custom lowering DIV"); + SDLoc dl(Op); + + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1), + DAG.getConstant(0, dl, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1), + DAG.getConstant(1, dl, MVT::i32)); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i32, Lo, Hi); + + SDValue DBZCHK = + DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other, DAG.getEntryNode(), Or); + + SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK); + + SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result); + SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result, + DAG.getConstant(32, dl, TLI.getPointerTy(DL))); + Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper); + + Results.push_back(Lower); + Results.push_back(Upper); +} + static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { // Monotonic load/store is legal for all targets if (cast(Op)->getOrdering() <= Monotonic) @@ -6513,36 +6799,22 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { SDLoc DL(N); - SDValue Cycles32, OutChain; + // Under Power Management extensions, the cycle-count is: + // mrc p15, #0, , c9, c13, #0 + SDValue Ops[] = { N->getOperand(0), // Chain + DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), + DAG.getConstant(15, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(9, DL, MVT::i32), + DAG.getConstant(13, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32) + }; - if (Subtarget->hasPerfMon()) { - // Under Power Management extensions, the cycle-count is: - // mrc p15, #0, , c9, c13, #0 - SDValue Ops[] = { N->getOperand(0), // Chain - DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), - DAG.getConstant(15, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(9, DL, MVT::i32), - DAG.getConstant(13, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32) - }; - - Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, - DAG.getVTList(MVT::i32, MVT::Other), Ops); - OutChain = Cycles32.getValue(1); - } else { - // Intrinsic is defined to return 0 on unsupported platforms. Technically - // there are older ARM CPUs that have implementation-specific ways of - // obtaining this information (FIXME!). - Cycles32 = DAG.getConstant(0, DL, MVT::i32); - OutChain = DAG.getEntryNode(); - } - - - SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, - Cycles32, DAG.getConstant(0, DL, MVT::i32)); - Results.push_back(Cycles64); - Results.push_back(OutChain); + SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, + DAG.getVTList(MVT::i32, MVT::Other), Ops); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32, + DAG.getConstant(0, DL, MVT::i32))); + Results.push_back(Cycles32.getValue(1)); } SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -6576,15 +6848,17 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); - case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); + case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); case ISD::SHL: case ISD::SRL: case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); + case ISD::SREM: return LowerREM(Op.getNode(), DAG); + case ISD::UREM: return LowerREM(Op.getNode(), DAG); case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); case ISD::SRL_PARTS: case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); @@ -6622,13 +6896,14 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("Don't know how to custom lower this!"); case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); + case ARMISD::WIN__DBZCHK: return SDValue(); } } /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. void ARMTargetLowering::ReplaceNodeResults(SDNode *N, - SmallVectorImpl&Results, + SmallVectorImpl &Results, SelectionDAG &DAG) const { SDValue Res; switch (N->getOpcode()) { @@ -6644,9 +6919,18 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); break; + case ISD::SREM: + case ISD::UREM: + Res = LowerREM(N, DAG); + break; case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; + case ISD::UDIV: + case ISD::SDIV: + assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows"); + return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV, + Results); } if (Res.getNode()) Results.push_back(Res); @@ -6683,12 +6967,12 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, // Grab constant pool and fixed stack memory operands. MachineMemOperand *CPMMO = - MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(), - MachineMemOperand::MOLoad, 4, 4); + MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF), + MachineMemOperand::MOLoad, 4, 4); MachineMemOperand *FIMMOSt = - MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOStore, 4, 4); + MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI), + MachineMemOperand::MOStore, 4, 4); // Load the address of the dispatch MBB into the jump buffer. if (isThumb2) { @@ -6792,7 +7076,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, MachineModuleInfo &MMI = MF->getMMI(); for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { - if (!BB->isLandingPad()) continue; + if (!BB->isEHPad()) continue; // FIXME: We should assert that the EH_LABEL is the first MI in the landing // pad. @@ -6807,7 +7091,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, for (SmallVectorImpl::iterator CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); CSI != CSE; ++CSI) { - CallSiteNumToLPad[*CSI].push_back(BB); + CallSiteNumToLPad[*CSI].push_back(&*BB); MaxCSNum = std::max(MaxCSNum, *CSI); } break; @@ -6840,7 +7124,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, // Shove the dispatch's address into the return slot in the function context. MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); - DispatchBB->setIsLandingPad(); + DispatchBB->setIsEHPad(); MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); unsigned trap_opcode; @@ -6864,10 +7148,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, // context. SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI); - MachineMemOperand *FIMMOLd = - MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOLoad | - MachineMemOperand::MOVolatile, 4, 4); + MachineMemOperand *FIMMOLd = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), + MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4); MachineInstrBuilder MIB; MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); @@ -6982,9 +7265,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, .addReg(NewVReg2, RegState::Kill) .addReg(NewVReg3)); - MachineMemOperand *JTMMOLd = - MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), - MachineMemOperand::MOLoad, 4, 4); + MachineMemOperand *JTMMOLd = MF->getMachineMemOperand( + MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) @@ -7066,9 +7348,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) .addJumpTableIndex(MJTI)); - MachineMemOperand *JTMMOLd = - MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), - MachineMemOperand::MOLoad, 4, 4); + MachineMemOperand *JTMMOLd = MF->getMachineMemOperand( + MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); AddDefaultPred( BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) @@ -7109,13 +7390,14 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, BB->succ_end()); while (!Successors.empty()) { MachineBasicBlock *SMBB = Successors.pop_back_val(); - if (SMBB->isLandingPad()) { + if (SMBB->isEHPad()) { BB->removeSuccessor(SMBB); MBBLPads.push_back(SMBB); } } - BB->addSuccessor(DispatchBB); + BB->addSuccessor(DispatchBB, BranchProbability::getZero()); + BB->normalizeSuccProbs(); // Find the invoke call and mark all of the callee-saved registers as // 'implicit defined' so that they're spilled. This prevents code from @@ -7157,7 +7439,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, // landing pad now. for (SmallVectorImpl::iterator I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I) - (*I)->setIsLandingPad(false); + (*I)->setIsEHPad(false); // The instruction is gone now. MI->eraseFromParent(); @@ -7280,8 +7562,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, // Otherwise, we will generate unrolled scalar copies. const TargetInstrInfo *TII = Subtarget->getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); unsigned dest = MI->getOperand(0).getReg(); unsigned src = MI->getOperand(1).getReg(); @@ -7573,6 +7854,32 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, return MBB; } +MachineBasicBlock * +ARMTargetLowering::EmitLowered__dbzchk(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + MachineFunction *MF = MBB->getParent(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + + MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock(); + MF->push_back(ContBB); + ContBB->splice(ContBB->begin(), MBB, + std::next(MachineBasicBlock::iterator(MI)), MBB->end()); + MBB->addSuccessor(ContBB); + + MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); + MF->push_back(TrapBB); + BuildMI(TrapBB, DL, TII->get(ARM::t2UDF)).addImm(249); + MBB->addSuccessor(TrapBB); + + BuildMI(*MBB, MI, DL, TII->get(ARM::tCBZ)) + .addReg(MI->getOperand(0).getReg()) + .addMBB(TrapBB); + + MI->eraseFromParent(); + return ContBB; +} + MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -7643,8 +7950,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // destination vreg to set, the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); // thisMBB: // ... @@ -7741,6 +8047,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::tInt_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp_nofp: + return BB; + + case ARM::Int_eh_sjlj_setup_dispatch: EmitSjLjDispatchBlock(MI, BB); return BB; @@ -7759,8 +8068,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) // SinkBB: V1 = PHI(V2, V3) const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator BBI = BB; - ++BBI; + MachineFunction::iterator BBI = ++BB->getIterator(); MachineFunction *Fn = BB->getParent(); MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); @@ -7824,11 +8132,46 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitStructByval(MI, BB); case ARM::WIN__CHKSTK: return EmitLowered__chkstk(MI, BB); + case ARM::WIN__DBZCHK: + return EmitLowered__dbzchk(MI, BB); + } +} + +/// \brief Attaches vregs to MEMCPY that it will use as scratch registers +/// when it is expanded into LDM/STM. This is done as a post-isel lowering +/// instead of as a custom inserter because we need the use list from the SDNode. +static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, + MachineInstr *MI, const SDNode *Node) { + bool isThumb1 = Subtarget->isThumb1Only(); + + DebugLoc DL = MI->getDebugLoc(); + MachineFunction *MF = MI->getParent()->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineInstrBuilder MIB(*MF, MI); + + // If the new dst/src is unused mark it as dead. + if (!Node->hasAnyUseOfValue(0)) { + MI->getOperand(0).setIsDead(true); + } + if (!Node->hasAnyUseOfValue(1)) { + MI->getOperand(1).setIsDead(true); + } + + // The MEMCPY both defines and kills the scratch registers. + for (unsigned I = 0; I != MI->getOperand(4).getImm(); ++I) { + unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass + : &ARM::GPRRegClass); + MIB.addReg(TmpReg, RegState::Define|RegState::Dead); } } void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { + if (MI->getOpcode() == ARM::MEMCPY) { + attachMEMCPYScratchRegs(Subtarget, MI, Node); + return; + } + const MCInstrDesc *MCID = &MI->getDesc(); // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, // RSC. Coming out of isel, they have an implicit CPSR def, but the optional @@ -7898,10 +8241,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // Helper function that checks if N is a null or all ones constant. static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) { - ConstantSDNode *C = dyn_cast(N); - if (!C) - return false; - return AllOnes ? C->isAllOnesValue() : C->isNullValue(); + return AllOnes ? isAllOnesConstant(N) : isNullConstant(N); } // Return true if N is conditionally 0 or all ones. @@ -8723,12 +9063,88 @@ static SDValue PerformXORCombine(SDNode *N, return SDValue(); } -/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff -/// the bits being cleared by the AND are not demanded by the BFI. +// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it, +// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and +// their position in "to" (Rd). +static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) { + assert(N->getOpcode() == ARMISD::BFI); + + SDValue From = N->getOperand(1); + ToMask = ~cast(N->getOperand(2))->getAPIntValue(); + FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation()); + + // If the Base came from a SHR #C, we can deduce that it is really testing bit + // #C in the base of the SHR. + if (From->getOpcode() == ISD::SRL && + isa(From->getOperand(1))) { + APInt Shift = cast(From->getOperand(1))->getAPIntValue(); + assert(Shift.getLimitedValue() < 32 && "Shift too large!"); + FromMask <<= Shift.getLimitedValue(31); + From = From->getOperand(0); + } + + return From; +} + +// If A and B contain one contiguous set of bits, does A | B == A . B? +// +// Neither A nor B must be zero. +static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) { + unsigned LastActiveBitInA = A.countTrailingZeros(); + unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1; + return LastActiveBitInA - 1 == FirstActiveBitInB; +} + +static SDValue FindBFIToCombineWith(SDNode *N) { + // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with, + // if one exists. + APInt ToMask, FromMask; + SDValue From = ParseBFI(N, ToMask, FromMask); + SDValue To = N->getOperand(0); + + // Now check for a compatible BFI to merge with. We can pass through BFIs that + // aren't compatible, but not if they set the same bit in their destination as + // we do (or that of any BFI we're going to combine with). + SDValue V = To; + APInt CombinedToMask = ToMask; + while (V.getOpcode() == ARMISD::BFI) { + APInt NewToMask, NewFromMask; + SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask); + if (NewFrom != From) { + // This BFI has a different base. Keep going. + CombinedToMask |= NewToMask; + V = V.getOperand(0); + continue; + } + + // Do the written bits conflict with any we've seen so far? + if ((NewToMask & CombinedToMask).getBoolValue()) + // Conflicting bits - bail out because going further is unsafe. + return SDValue(); + + // Are the new bits contiguous when combined with the old bits? + if (BitsProperlyConcatenate(ToMask, NewToMask) && + BitsProperlyConcatenate(FromMask, NewFromMask)) + return V; + if (BitsProperlyConcatenate(NewToMask, ToMask) && + BitsProperlyConcatenate(NewFromMask, FromMask)) + return V; + + // We've seen a write to some bits, so track it. + CombinedToMask |= NewToMask; + // Keep going... + V = V.getOperand(0); + } + + return SDValue(); +} + static SDValue PerformBFICombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDValue N1 = N->getOperand(1); if (N1.getOpcode() == ISD::AND) { + // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff + // the bits being cleared by the AND are not demanded by the BFI. ConstantSDNode *N11C = dyn_cast(N1.getOperand(1)); if (!N11C) return SDValue(); @@ -8744,6 +9160,38 @@ static SDValue PerformBFICombine(SDNode *N, return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0), N->getOperand(0), N1.getOperand(0), N->getOperand(2)); + } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) { + // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes. + // Keep track of any consecutive bits set that all come from the same base + // value. We can combine these together into a single BFI. + SDValue CombineBFI = FindBFIToCombineWith(N); + if (CombineBFI == SDValue()) + return SDValue(); + + // We've found a BFI. + APInt ToMask1, FromMask1; + SDValue From1 = ParseBFI(N, ToMask1, FromMask1); + + APInt ToMask2, FromMask2; + SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2); + assert(From1 == From2); + (void)From2; + + // First, unlink CombineBFI. + DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0)); + // Then create a new BFI, combining the two together. + APInt NewFromMask = FromMask1 | FromMask2; + APInt NewToMask = ToMask1 | ToMask2; + + EVT VT = N->getValueType(0); + SDLoc dl(N); + + if (NewFromMask[0] == 0) + From1 = DCI.DAG.getNode( + ISD::SRL, dl, VT, From1, + DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT)); + return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1, + DCI.DAG.getConstant(~NewToMask, dl, VT)); } return SDValue(); } @@ -9521,32 +9969,6 @@ static SDValue PerformSTORECombine(SDNode *N, return SDValue(); } -// isConstVecPow2 - Return true if each vector element is a power of 2, all -// elements are the same constant, C, and Log2(C) ranges from 1 to 32. -static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) -{ - integerPart cN; - integerPart c0 = 0; - for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements(); - I != E; I++) { - ConstantFPSDNode *C = dyn_cast(ConstVec.getOperand(I)); - if (!C) - return false; - - bool isExact; - APFloat APF = C->getValueAPF(); - if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact) - != APFloat::opOK || !isExact) - return false; - - c0 = (I == 0) ? cN : c0; - if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32) - return false; - } - C = c0; - return true; -} - /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) /// can replace combinations of VMUL and VCVT (floating-point to integer) /// when the VMUL has a constant operand that is a power of 2. @@ -9556,30 +9978,25 @@ static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) /// vcvt.s32.f32 d16, d16 /// becomes: /// vcvt.s32.f32 d16, d16, #3 -static SDValue PerformVCVTCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, +static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { - SelectionDAG &DAG = DCI.DAG; - SDValue Op = N->getOperand(0); - - if (!Subtarget->hasNEON() || !Op.getValueType().isVector() || - Op.getOpcode() != ISD::FMUL) + if (!Subtarget->hasNEON()) return SDValue(); - uint64_t C; - SDValue N0 = Op->getOperand(0); - SDValue ConstVec = Op->getOperand(1); - bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; + SDValue Op = N->getOperand(0); + if (!Op.getValueType().isVector() || Op.getOpcode() != ISD::FMUL) + return SDValue(); - if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || - !isConstVecPow2(ConstVec, isSigned, C)) + SDValue ConstVec = Op->getOperand(1); + if (!isa(ConstVec)) return SDValue(); MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); + uint32_t FloatBits = FloatTy.getSizeInBits(); MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); + uint32_t IntBits = IntTy.getSizeInBits(); unsigned NumLanes = Op.getValueType().getVectorNumElements(); - if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 || - NumLanes > 4) { + if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) { // These instructions only exist converting from f32 to i32. We can handle // smaller integers by generating an extra truncate, but larger ones would // be lossy. We also can't handle more then 4 lanes, since these intructions @@ -9587,16 +10004,22 @@ static SDValue PerformVCVTCombine(SDNode *N, return SDValue(); } + BitVector UndefElements; + BuildVectorSDNode *BV = cast(ConstVec); + int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33); + if (C == -1 || C == 0 || C > 32) + return SDValue(); + SDLoc dl(N); + bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : Intrinsic::arm_neon_vcvtfp2fxu; - SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, - NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, - DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), - N0, - DAG.getConstant(Log2_64(C), dl, MVT::i32)); + SDValue FixConv = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, + DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0), + DAG.getConstant(C, dl, MVT::i32)); - if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) + if (IntBits < FloatBits) FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv); return FixConv; @@ -9611,38 +10034,44 @@ static SDValue PerformVCVTCombine(SDNode *N, /// vdiv.f32 d16, d17, d16 /// becomes: /// vcvt.f32.s32 d16, d16, #3 -static SDValue PerformVDIVCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, +static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { - SelectionDAG &DAG = DCI.DAG; + if (!Subtarget->hasNEON()) + return SDValue(); + SDValue Op = N->getOperand(0); unsigned OpOpcode = Op.getNode()->getOpcode(); - - if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() || + if (!N->getValueType(0).isVector() || (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) return SDValue(); - uint64_t C; SDValue ConstVec = N->getOperand(1); - bool isSigned = OpOpcode == ISD::SINT_TO_FP; - - if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || - !isConstVecPow2(ConstVec, isSigned, C)) + if (!isa(ConstVec)) return SDValue(); MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); + uint32_t FloatBits = FloatTy.getSizeInBits(); MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); - if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { + uint32_t IntBits = IntTy.getSizeInBits(); + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) { // These instructions only exist converting from i32 to f32. We can handle // smaller integers by generating an extra extend, but larger ones would - // be lossy. + // be lossy. We also can't handle more then 4 lanes, since these intructions + // only support v2i32/v4i32 types. return SDValue(); } + BitVector UndefElements; + BuildVectorSDNode *BV = cast(ConstVec); + int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33); + if (C == -1 || C == 0 || C > 32) + return SDValue(); + SDLoc dl(N); + bool isSigned = OpOpcode == ISD::SINT_TO_FP; SDValue ConvInput = Op.getOperand(0); - unsigned NumLanes = Op.getValueType().getVectorNumElements(); - if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) + if (IntBits < FloatBits) ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, ConvInput); @@ -9652,7 +10081,7 @@ static SDValue PerformVDIVCombine(SDNode *N, return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), - ConvInput, DAG.getConstant(Log2_64(C), dl, MVT::i32)); + ConvInput, DAG.getConstant(C, dl, MVT::i32)); } /// Getvshiftimm - Check if this is a valid build_vector for the immediate @@ -9680,7 +10109,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { /// 0 <= Value <= ElementBits for a long left shift. static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); - unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); + int64_t ElementBits = VT.getVectorElementType().getSizeInBits(); if (! getVShiftImm(Op, ElementBits, Cnt)) return false; return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); @@ -9695,12 +10124,16 @@ static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); - unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); + int64_t ElementBits = VT.getVectorElementType().getSizeInBits(); if (! getVShiftImm(Op, ElementBits, Cnt)) return false; - if (isIntrinsic) + if (!isIntrinsic) + return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); + if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) { Cnt = -Cnt; - return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); + return true; + } + return false; } /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. @@ -9939,89 +10372,123 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC -/// to match f32 max/min patterns to use NEON vmax/vmin instructions. -static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, - const ARMSubtarget *ST) { - // If the target supports NEON, try to use vmax/vmin instructions for f32 - // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, - // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is - // a NaN; only do the transformation when it matches that behavior. +static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero, + APInt &KnownOne) { + if (Op.getOpcode() == ARMISD::BFI) { + // Conservatively, we can recurse down the first operand + // and just mask out all affected bits. + computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne); - // For now only do this when using NEON for FP operations; if using VFP, it - // is not obvious that the benefit outweighs the cost of switching to the - // NEON pipeline. - if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || - N->getValueType(0) != MVT::f32) + // The operand to BFI is already a mask suitable for removing the bits it + // sets. + ConstantSDNode *CI = cast(Op.getOperand(2)); + APInt Mask = CI->getAPIntValue(); + KnownZero &= Mask; + KnownOne &= Mask; + return; + } + if (Op.getOpcode() == ARMISD::CMOV) { + APInt KZ2(KnownZero.getBitWidth(), 0); + APInt KO2(KnownOne.getBitWidth(), 0); + computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne); + computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2); + + KnownZero &= KZ2; + KnownOne &= KO2; + return; + } + return DAG.computeKnownBits(Op, KnownZero, KnownOne); +} + +SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { + // If we have a CMOV, OR and AND combination such as: + // if (x & CN) + // y |= CM; + // + // And: + // * CN is a single bit; + // * All bits covered by CM are known zero in y + // + // Then we can convert this into a sequence of BFI instructions. This will + // always be a win if CM is a single bit, will always be no worse than the + // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is + // three bits (due to the extra IT instruction). + + SDValue Op0 = CMOV->getOperand(0); + SDValue Op1 = CMOV->getOperand(1); + auto CCNode = cast(CMOV->getOperand(2)); + auto CC = CCNode->getAPIntValue().getLimitedValue(); + SDValue CmpZ = CMOV->getOperand(4); + + // The compare must be against zero. + if (!isNullConstant(CmpZ->getOperand(1))) return SDValue(); - SDValue CondLHS = N->getOperand(0); - SDValue CondRHS = N->getOperand(1); - SDValue LHS = N->getOperand(2); - SDValue RHS = N->getOperand(3); - ISD::CondCode CC = cast(N->getOperand(4))->get(); + assert(CmpZ->getOpcode() == ARMISD::CMPZ); + SDValue And = CmpZ->getOperand(0); + if (And->getOpcode() != ISD::AND) + return SDValue(); + ConstantSDNode *AndC = dyn_cast(And->getOperand(1)); + if (!AndC || !AndC->getAPIntValue().isPowerOf2()) + return SDValue(); + SDValue X = And->getOperand(0); - unsigned Opcode = 0; - bool IsReversed; - if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { - IsReversed = false; // x CC y ? x : y - } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { - IsReversed = true ; // x CC y ? y : x + if (CC == ARMCC::EQ) { + // We're performing an "equal to zero" compare. Swap the operands so we + // canonicalize on a "not equal to zero" compare. + std::swap(Op0, Op1); } else { + assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?"); + } + + if (Op1->getOpcode() != ISD::OR) return SDValue(); + + ConstantSDNode *OrC = dyn_cast(Op1->getOperand(1)); + if (!OrC) + return SDValue(); + SDValue Y = Op1->getOperand(0); + + if (Op0 != Y) + return SDValue(); + + // Now, is it profitable to continue? + APInt OrCI = OrC->getAPIntValue(); + unsigned Heuristic = Subtarget->isThumb() ? 3 : 2; + if (OrCI.countPopulation() > Heuristic) + return SDValue(); + + // Lastly, can we determine that the bits defined by OrCI + // are zero in Y? + APInt KnownZero, KnownOne; + computeKnownBits(DAG, Y, KnownZero, KnownOne); + if ((OrCI & KnownZero) != OrCI) + return SDValue(); + + // OK, we can do the combine. + SDValue V = Y; + SDLoc dl(X); + EVT VT = X.getValueType(); + unsigned BitInX = AndC->getAPIntValue().logBase2(); + + if (BitInX != 0) { + // We must shift X first. + X = DAG.getNode(ISD::SRL, dl, VT, X, + DAG.getConstant(BitInX, dl, VT)); } - bool IsUnordered; - switch (CC) { - default: break; - case ISD::SETOLT: - case ISD::SETOLE: - case ISD::SETLT: - case ISD::SETLE: - case ISD::SETULT: - case ISD::SETULE: - // If LHS is NaN, an ordered comparison will be false and the result will - // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS - // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. - IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); - if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) - break; - // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin - // will return -0, so vmin can only be used for unsafe math or if one of - // the operands is known to be nonzero. - if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && - !DAG.getTarget().Options.UnsafeFPMath && - !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) - break; - Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; - break; - - case ISD::SETOGT: - case ISD::SETOGE: - case ISD::SETGT: - case ISD::SETGE: - case ISD::SETUGT: - case ISD::SETUGE: - // If LHS is NaN, an ordered comparison will be false and the result will - // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS - // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. - IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); - if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) - break; - // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax - // will return +0, so vmax can only be used for unsafe math or if one of - // the operands is known to be nonzero. - if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && - !DAG.getTarget().Options.UnsafeFPMath && - !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) - break; - Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; - break; + for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits(); + BitInY < NumActiveBits; ++BitInY) { + if (OrCI[BitInY] == 0) + continue; + APInt Mask(VT.getSizeInBits(), 0); + Mask.setBit(BitInY); + V = DAG.getNode(ARMISD::BFI, dl, VT, V, X, + // Confusingly, the operand is an *inverted* mask. + DAG.getConstant(~Mask, dl, VT)); } - if (!Opcode) - return SDValue(); - return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS); + return V; } /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. @@ -10042,6 +10509,13 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { ARMCC::CondCodes CC = (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); + // BFI is only available on V6T2+. + if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) { + SDValue R = PerformCMOVToBFICombine(N, DAG); + if (R) + return R; + } + // Simplify // mov r1, r0 // cmp r1, x @@ -10108,8 +10582,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget); - case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget); + case ISD::FP_TO_UINT: + return PerformVCVTCombine(N, DCI.DAG, Subtarget); + case ISD::FDIV: + return PerformVDIVCombine(N, DCI.DAG, Subtarget); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: case ISD::SRA: @@ -10117,7 +10593,6 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); - case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); case ISD::LOAD: return PerformLOADCombine(N, DCI); case ARMISD::VLD2DUP: @@ -11043,8 +11518,48 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } +static RTLIB::Libcall getDivRemLibcall( + const SDNode *N, MVT::SimpleValueType SVT) { + assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || + N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && + "Unhandled Opcode in getDivRemLibcall"); + bool isSigned = N->getOpcode() == ISD::SDIVREM || + N->getOpcode() == ISD::SREM; + RTLIB::Libcall LC; + switch (SVT) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + } + return LC; +} + +static TargetLowering::ArgListTy getDivRemArgList( + const SDNode *N, LLVMContext *Context) { + assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || + N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && + "Unhandled Opcode in getDivRemArgList"); + bool isSigned = N->getOpcode() == ISD::SDIVREM || + N->getOpcode() == ISD::SREM; + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + EVT ArgVT = N->getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*Context); + Entry.Node = N->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + return Args; +} + SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { - assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only"); + assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) && + "Register-based DivRem lowering only"); unsigned Opcode = Op->getOpcode(); assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && "Invalid opcode for Div/Rem lowering"); @@ -11052,28 +11567,12 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op->getValueType(0); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); - RTLIB::Libcall LC; - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); - case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; - case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; - case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; - case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; - } - + RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(), + VT.getSimpleVT().SimpleTy); SDValue InChain = DAG.getEntryNode(); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { - EVT ArgVT = Op->getOperand(i).getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = Op->getOperand(i); - Entry.Ty = ArgTy; - Entry.isSExt = isSigned; - Entry.isZExt = !isSigned; - Args.push_back(Entry); - } + TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(), + DAG.getContext()); SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy(DAG.getDataLayout())); @@ -11090,6 +11589,47 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { return CallInfo.first; } +// Lowers REM using divmod helpers +// see RTABI section 4.2/4.3 +SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const { + // Build return types (div and rem) + std::vector RetTyParams; + Type *RetTyElement; + + switch (N->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break; + case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break; + case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break; + case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break; + } + + RetTyParams.push_back(RetTyElement); + RetTyParams.push_back(RetTyElement); + ArrayRef ret = ArrayRef(RetTyParams); + Type *RetTy = StructType::get(*DAG.getContext(), ret); + + RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT(). + SimpleTy); + SDValue InChain = DAG.getEntryNode(); + TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext()); + bool isSigned = N->getOpcode() == ISD::SREM; + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), + getPointerTy(DAG.getDataLayout())); + + // Lower call + CallLoweringInfo CLI(DAG); + CLI.setChain(InChain) + .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args), 0) + .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N)); + std::pair CallResult = LowerCallTo(CLI); + + // Return second (rem) result operand (first contains div) + SDNode *ResNode = CallResult.first.getNode(); + assert(ResNode->getNumOperands() == 2 && "divmod should return two operands"); + return ResNode->getOperand(1); +} + SDValue ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetWindows() && "unsupported target platform"); @@ -11124,8 +11664,8 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); SDValue SrcVal = Op.getOperand(0); - return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, - /*isSigned*/ false, SDLoc(Op)).first; + return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false, + SDLoc(Op)).first; } SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { @@ -11137,8 +11677,8 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); SDValue SrcVal = Op.getOperand(0); - return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, - /*isSigned*/ false, SDLoc(Op)).first; + return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false, + SDLoc(Op)).first; } bool @@ -11186,7 +11726,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); - uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8; + uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; @@ -11212,7 +11752,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; - NumElts += DL.getTypeAllocSize(ArgTy) / 8; + NumElts += DL.getTypeSizeInBits(ArgTy) / 64; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); @@ -11295,8 +11835,6 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return true; } -bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; } - Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); @@ -11392,19 +11930,26 @@ bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that // guarantee, see DDI0406C ARM architecture reference manual, // sections A8.8.72-74 LDRD) -bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { +TargetLowering::AtomicExpansionKind +ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { unsigned Size = LI->getType()->getPrimitiveSizeInBits(); - return (Size == 64) && !Subtarget->isMClass(); + return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly + : AtomicExpansionKind::None; } // For the real atomic operations, we have ldrex/strex up to 32 bits, // and up to 64 bits on the non-M profiles -TargetLoweringBase::AtomicRMWExpansionKind +TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); return (Size <= (Subtarget->isMClass() ? 32U : 64U)) - ? AtomicRMWExpansionKind::LLSC - : AtomicRMWExpansionKind::None; + ? AtomicExpansionKind::LLSC + : AtomicExpansionKind::None; +} + +bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR( + AtomicCmpXchgInst *AI) const { + return true; } // This has so far only been implemented for MachO. @@ -11419,7 +11964,7 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx, return false; // Floating point values and vector values map to the same register file. - // Therefore, althought we could do a store extract of a vector type, this is + // Therefore, although we could do a store extract of a vector type, this is // better to leave at float as we have more freedom in the addressing mode for // those. if (VectorTy->isFPOrFPVectorTy()) @@ -11441,6 +11986,14 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx, return false; } +bool ARMTargetLowering::isCheapToSpeculateCttz() const { + return Subtarget->hasV6T2Ops(); +} + +bool ARMTargetLowering::isCheapToSpeculateCtlz() const { + return Subtarget->hasV6T2Ops(); +} + Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); @@ -11477,6 +12030,14 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, cast(Addr->getType())->getElementType()); } +void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance( + IRBuilder<> &Builder) const { + if (!Subtarget->hasV7Ops()) + return; + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Builder.CreateCall(llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_clrex)); +} + Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const { @@ -11534,12 +12095,12 @@ bool ARMTargetLowering::lowerInterleavedLoad( Type *EltTy = VecTy->getVectorElementType(); const DataLayout &DL = LI->getModule()->getDataLayout(); - unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy); - bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; + unsigned VecSize = DL.getTypeSizeInBits(VecTy); + bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; - // Skip illegal vector types and vector types of i64/f64 element (vldN doesn't - // support i64/f64 element). - if ((VecSize != 64 && VecSize != 128) || EltIs64Bits) + // Skip if we do not have NEON and skip illegal vector types and vector types + // with i64/f64 elements (vldN doesn't support i64/f64 elements). + if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits) return false; // A pointer vector can not be the return type of the ldN intrinsics. Need to @@ -11552,9 +12113,6 @@ bool ARMTargetLowering::lowerInterleavedLoad( Intrinsic::arm_neon_vld3, Intrinsic::arm_neon_vld4}; - Function *VldnFunc = - Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], VecTy); - IRBuilder<> Builder(LI); SmallVector Ops; @@ -11562,6 +12120,9 @@ bool ARMTargetLowering::lowerInterleavedLoad( Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr)); Ops.push_back(Builder.getInt32(LI->getAlignment())); + Type *Tys[] = { VecTy, Int8Ptr }; + Function *VldnFunc = + Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys); CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN"); // Replace uses of each shufflevector with the corresponding vector loaded @@ -11624,12 +12185,13 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts); const DataLayout &DL = SI->getModule()->getDataLayout(); - unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); - bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); + bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; - // Skip illegal sub vector types and vector types of i64/f64 element (vstN - // doesn't support i64/f64 element). - if ((SubVecSize != 64 && SubVecSize != 128) || EltIs64Bits) + // Skip if we do not have NEON and skip illegal vector types and vector types + // with i64/f64 elements (vstN doesn't support i64/f64 elements). + if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128) || + EltIs64Bits) return false; Value *Op0 = SVI->getOperand(0); @@ -11650,17 +12212,18 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, SubVecTy = VectorType::get(IntTy, NumSubElts); } - static Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2, - Intrinsic::arm_neon_vst3, - Intrinsic::arm_neon_vst4}; - Function *VstNFunc = Intrinsic::getDeclaration( - SI->getModule(), StoreInts[Factor - 2], SubVecTy); - + static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2, + Intrinsic::arm_neon_vst3, + Intrinsic::arm_neon_vst4}; SmallVector Ops; Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace()); Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr)); + Type *Tys[] = { Int8Ptr, SubVecTy }; + Function *VstNFunc = Intrinsic::getDeclaration( + SI->getModule(), StoreInts[Factor - 2], Tys); + // Split the shufflevector operands into sub vectors for the new vstN call. for (unsigned i = 0; i < Factor; i++) Ops.push_back(Builder.CreateShuffleVector( @@ -11681,14 +12244,14 @@ enum HABaseType { static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members) { - if (const StructType *ST = dyn_cast(Ty)) { + if (auto *ST = dyn_cast(Ty)) { for (unsigned i = 0; i < ST->getNumElements(); ++i) { uint64_t SubMembers = 0; if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers)) return false; Members += SubMembers; } - } else if (const ArrayType *AT = dyn_cast(Ty)) { + } else if (auto *AT = dyn_cast(Ty)) { uint64_t SubMembers = 0; if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers)) return false; @@ -11703,7 +12266,7 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, return false; Members = 1; Base = HA_DOUBLE; - } else if (const VectorType *VT = dyn_cast(Ty)) { + } else if (auto *VT = dyn_cast(Ty)) { Members = 1; switch (Base) { case HA_FLOAT: @@ -11747,3 +12310,17 @@ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy(); return IsHA || IsIntArray; } + +unsigned ARMTargetLowering::getExceptionPointerRegister( + const Constant *PersonalityFn) const { + // Platforms which do not use SjLj EH may return values in these registers + // via the personality function. + return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0; +} + +unsigned ARMTargetLowering::getExceptionSelectorRegister( + const Constant *PersonalityFn) const { + // Platforms which do not use SjLj EH may return values in these registers + // via the personality function. + return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1; +} diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index efc9020c193a..b764624f1492 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -63,8 +63,6 @@ namespace llvm { BCC_i64, - RBIT, // ARM bitreverse instruction - SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out. SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out. RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag. @@ -79,6 +77,7 @@ namespace llvm { EH_SJLJ_SETJMP, // SjLj exception handling setjmp. EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. + EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch. TC_RETURN, // Tail call return pseudo. @@ -91,6 +90,7 @@ namespace llvm { PRELOAD, // Preload WIN__CHKSTK, // Windows' __chkstk call to do stack probing. + WIN__DBZCHK, // Windows' divide by zero check VCEQ, // Vector compare equal. VCEQZ, // Vector compare equal to zero. @@ -172,12 +172,6 @@ namespace llvm { // BUILD_VECTOR for this purpose. BUILD_VECTOR, - // Floating-point max and min: - FMAX, - FMIN, - VMAXNM, - VMINNM, - // Bit-field insert BFI, @@ -189,6 +183,10 @@ namespace llvm { // Vector bitwise select VBSL, + // Pseudo-instruction representing a memory copy using ldm/stm + // instructions. + MEMCPY, + // Vector load N-element structure to all lanes: VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE, VLD3DUP, @@ -260,6 +258,7 @@ namespace llvm { SDNode *Node) const override; SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const; + SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override; @@ -348,6 +347,8 @@ namespace llvm { getInlineAsmMemConstraint(StringRef ConstraintCode) const override { if (ConstraintCode == "Q") return InlineAsm::Constraint_Q; + else if (ConstraintCode == "o") + return InlineAsm::Constraint_o; else if (ConstraintCode.size() == 2) { if (ConstraintCode[0] == 'U') { switch(ConstraintCode[1]) { @@ -420,13 +421,24 @@ namespace llvm { bool functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; - bool hasLoadLinkedStoreConditional() const override; + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + unsigned + getExceptionPointerRegister(const Constant *PersonalityFn) const override; + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + unsigned + getExceptionSelectorRegister(const Constant *PersonalityFn) const override; + Instruction *makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const; Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; + void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override; + Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override; Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, @@ -441,16 +453,21 @@ namespace llvm { bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override; - bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - TargetLoweringBase::AtomicRMWExpansionKind + TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; bool useLoadStackGuardNode() const override; bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override; + bool isCheapToSpeculateCttz() const override; + bool isCheapToSpeculateCtlz() const override; + protected: std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, @@ -496,6 +513,7 @@ namespace llvm { ISD::ArgFlagsTy Flags) const; SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; @@ -508,7 +526,6 @@ namespace llvm { SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA, SelectionDAG &DAG, TLSModel::Model model) const; - SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; @@ -526,6 +543,12 @@ namespace llvm { const ARMSubtarget *ST) const; SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const; + void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed, + SmallVectorImpl &Results) const; + SDValue LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, bool Signed, + SDValue &Chain) const; + SDValue LowerREM(SDNode *N, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; @@ -635,6 +658,8 @@ namespace llvm { MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI, MachineBasicBlock *MBB) const; + MachineBasicBlock *EmitLowered__dbzchk(MachineInstr *MI, + MachineBasicBlock *MBB) const; }; enum NEONModImmType { diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 84f95be30991..cf973d68085f 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -51,7 +51,8 @@ void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { switch (Opc) { - default: break; + default: + break; case ARM::LDR_PRE_IMM: case ARM::LDR_PRE_REG: case ARM::LDR_POST_IMM: @@ -124,82 +125,10 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI, .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY); unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( - MachinePointerInfo::getGOT(), Flag, 4, 4); + MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4); MIB.addMemOperand(MMO); MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg); MIB.addReg(Reg, RegState::Kill).addImm(0); MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); AddDefaultPred(MIB); } - -namespace { - /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC - /// global base register for ARM ELF. - struct ARMCGBR : public MachineFunctionPass { - static char ID; - ARMCGBR() : MachineFunctionPass(ID) {} - - bool runOnMachineFunction(MachineFunction &MF) override { - ARMFunctionInfo *AFI = MF.getInfo(); - if (AFI->getGlobalBaseReg() == 0) - return false; - const ARMSubtarget &STI = - static_cast(MF.getSubtarget()); - // Don't do this for Thumb1. - if (STI.isThumb1Only()) - return false; - - const TargetMachine &TM = MF.getTarget(); - if (TM.getRelocationModel() != Reloc::PIC_) - return false; - - LLVMContext *Context = &MF.getFunction()->getContext(); - unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - unsigned PCAdj = STI.isThumb() ? 4 : 8; - ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create( - *Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj); - - unsigned Align = TM.getDataLayout()->getPrefTypeAlignment( - Type::getInt32PtrTy(*Context)); - unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align); - - MachineBasicBlock &FirstMBB = MF.front(); - MachineBasicBlock::iterator MBBI = FirstMBB.begin(); - DebugLoc DL = FirstMBB.findDebugLoc(MBBI); - unsigned TempReg = - MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass); - unsigned Opc = STI.isThumb2() ? ARM::t2LDRpci : ARM::LDRcp; - const TargetInstrInfo &TII = *STI.getInstrInfo(); - MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL, - TII.get(Opc), TempReg) - .addConstantPoolIndex(Idx); - if (Opc == ARM::LDRcp) - MIB.addImm(0); - AddDefaultPred(MIB); - - // Fix the GOT address by adding pc. - unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); - Opc = STI.isThumb2() ? ARM::tPICADD : ARM::PICADD; - MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), GlobalBaseReg) - .addReg(TempReg) - .addImm(ARMPCLabelIndex); - if (Opc == ARM::PICADD) - AddDefaultPred(MIB); - - return true; - } - - const char *getPassName() const override { - return "ARM PIC Global Base Reg Initialization"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - }; -} - -char ARMCGBR::ID = 0; -FunctionPass* -llvm::createARMGlobalBaseRegPass() { return new ARMCGBR(); } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 61c45af26fe1..b9de83bfe6dc 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -59,6 +59,7 @@ def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; +def SDT_ARMEH_SJLJ_SetupDispatch: SDTypeProfile<0, 0, []>; def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>; @@ -70,8 +71,11 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; -def SDT_ARMVMAXNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>; -def SDT_ARMVMINNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>; +def SDT_WIN__DBZCHK : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; + +def SDT_ARMMEMCPY : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>, + SDTCisVT<4, i32>]>; def SDTBinaryArithWithFlags : SDTypeProfile<2, 2, [SDTCisSameAs<0, 2>, @@ -163,21 +167,23 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain, SDNPSideEffect]>; +def ARMeh_sjlj_setup_dispatch: SDNode<"ARMISD::EH_SJLJ_SETUP_DISPATCH", + SDT_ARMEH_SJLJ_SetupDispatch, + [SDNPHasChain, SDNPSideEffect]>; def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, [SDNPHasChain, SDNPSideEffect]>; def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH, [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; -def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; - def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; -def ARMvmaxnm : SDNode<"ARMISD::VMAXNM", SDT_ARMVMAXNM, []>; -def ARMvminnm : SDNode<"ARMISD::VMINNM", SDT_ARMVMINNM, []>; +def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, + SDNPMayStore, SDNPMayLoad]>; //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. @@ -209,6 +215,8 @@ def PreV8 : Predicate<"!Subtarget->hasV8Ops()">, AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">; def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, AssemblerPredicate<"HasV8_1aOps", "armv8.1a">; +def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, + AssemblerPredicate<"HasV8_2aOps", "armv8.2a">; def NoVFP : Predicate<"!Subtarget->hasVFP2()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate<"FeatureVFP2", "VFP2">; @@ -228,7 +236,9 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">, def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, - AssemblerPredicate<"FeatureFP16","half-float">; + AssemblerPredicate<"FeatureFP16","half-float conversions">; +def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, + AssemblerPredicate<"FeatureFullFP16","full half-float">; def HasDivide : Predicate<"Subtarget->hasDivide()">, AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">; def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, @@ -236,9 +246,8 @@ def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">, AssemblerPredicate<"FeatureT2XtPk", "pack/extract">; -def HasThumb2DSP : Predicate<"Subtarget->hasThumb2DSP()">, - AssemblerPredicate<"FeatureDSPThumb2", - "thumb2-dsp">; +def HasDSP : Predicate<"Subtarget->hasDSP()">, + AssemblerPredicate<"FeatureDSP", "dsp">; def HasDB : Predicate<"Subtarget->hasDataBarrier()">, AssemblerPredicate<"FeatureDB", "data-barriers">; @@ -2322,6 +2331,7 @@ def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", let Inst{23-4} = 0b01100000000000000111; let Inst{3-0} = opt; } +def : MnemonicAlias<"smi", "smc">; // Supervisor Call (Software Interrupt) let isCall = 1, Uses = [SP] in { @@ -3671,10 +3681,10 @@ def USAT16 : AI<(outs GPRnopc:$Rd), let Inst{3-0} = Rn; } -def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm:$pos), - (SSAT imm:$pos, GPRnopc:$a, 0)>; -def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos), - (USAT imm:$pos, GPRnopc:$a, 0)>; +def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm1_32:$pos), + (SSAT imm1_32:$pos, GPRnopc:$a, 0)>; +def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos), + (USAT imm0_31:$pos, GPRnopc:$a, 0)>; //===----------------------------------------------------------------------===// // Bitwise Instructions. @@ -4186,7 +4196,7 @@ def CLZ : AMiscA1I<0b00010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm), def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "rbit", "\t$Rd, $Rm", - [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>, + [(set GPR:$Rd, (bitreverse GPR:$Rm))]>, Requires<[IsARM, HasV6T2]>, Sched<[WriteALU]>; @@ -4578,6 +4588,19 @@ let usesCustomInserter = 1 in { [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>; } +let hasPostISelHook = 1, Constraints = "$newdst = $dst, $newsrc = $src" in { + // %newsrc, %newdst = MEMCPY %dst, %src, N, ...N scratch regs... + // Copies N registers worth of memory from address %src to address %dst + // and returns the incremented addresses. N scratch register will + // be attached for the copy to use. + def MEMCPY : PseudoInst< + (outs GPR:$newdst, GPR:$newsrc), + (ins GPR:$dst, GPR:$src, i32imm:$nreg, variable_ops), + NoItinerary, + [(set GPR:$newdst, GPR:$newsrc, + (ARMmemcopy GPR:$dst, GPR:$src, imm:$nreg))]>; +} + def ldrex_1 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i8; }]>; @@ -4705,7 +4728,7 @@ def STLEXD : AIstlex<0b01, (outs GPR:$Rd), def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", [(int_arm_clrex)]>, - Requires<[IsARM, HasV7]> { + Requires<[IsARM, HasV6K]> { let Inst{31-0} = 0b11110101011111111111000000011111; } @@ -5242,6 +5265,12 @@ def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone, let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>; +def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK, + [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; +let usesCustomInserter = 1, Defs = [CPSR] in + def WIN__DBZCHK : PseudoInst<(outs), (ins GPR:$divisor), NoItinerary, + [(win__dbzchk GPR:$divisor)]>; + //===----------------------------------------------------------------------===// // TLS Instructions // @@ -5301,6 +5330,10 @@ def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), Requires<[IsARM]>; } +let isBarrier = 1, hasSideEffects = 1, usesCustomInserter = 1 in +def Int_eh_sjlj_setup_dispatch : PseudoInst<(outs), (ins), NoItinerary, + [(ARMeh_sjlj_setup_dispatch)]>; + // eh.sjlj.dispatchsetup pseudo-instruction. // This pseudo is used for both ARM and Thumb. Any differences are handled when // the pseudo is expanded (which happens before any passes that need the @@ -5622,16 +5655,16 @@ def : ARMInstAlias<"mvn${s}${p} $Rd, $imm", (MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>; // Same for AND <--> BIC def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm", - (ANDri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm, + (ANDri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"bic${s}${p} $Rdn, $imm", - (ANDri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm, + (ANDri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm", - (BICri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm, + (BICri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"and${s}${p} $Rdn, $imm", - (BICri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm, + (BICri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; // Likewise, "add Rd, mod_imm_neg" -> sub diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index f035d6150ec0..7020ffb41b64 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -587,11 +587,6 @@ def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; -def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>]>; -def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; -def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; - def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast(N->getOperand(0)); unsigned EltBits = 0; @@ -2465,17 +2460,17 @@ class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; // Same as above, but not predicated. -class N2VDIntnp op17_16, bits<3> op10_8, bit op7, +class N2VDIntnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), + : N2Vnp; -class N2VQIntnp op17_16, bits<3> op10_8, bit op7, +class N2VQIntnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), + : N2Vnp; @@ -3255,6 +3250,13 @@ multiclass N2V_QHS_cmp op24_23, bits<2> op21_20, bits<2> op17_16, [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { let Inst{10} = 1; // overwrite F = 1 } + def v4f16 : N2V, + Requires<[HasNEON,HasFullFP16]> { + let Inst{10} = 1; // overwrite F = 1 + } // 128-bit vector types. def v16i8 : N2V op24_23, bits<2> op21_20, bits<2> op17_16, [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { let Inst{10} = 1; // overwrite F = 1 } + def v8f16 : N2V, + Requires<[HasNEON,HasFullFP16]> { + let Inst{10} = 1; // overwrite F = 1 + } } @@ -4110,6 +4119,12 @@ def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", v2f32, v2f32, fadd, 1>; def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", v4f32, v4f32, fadd, 1>; +def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16", + v4f16, v4f16, fadd, 1>, + Requires<[HasNEON,HasFullFP16]>; +def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16", + v8f16, v8f16, fadd, 1>, + Requires<[HasNEON,HasFullFP16]>; // VADDL : Vector Add Long (Q = D + D) defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, "vaddl", "s", add, sext, 1>; @@ -4165,10 +4180,21 @@ def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", v2f32, v2f32, fmul, 1>; def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", v4f32, v4f32, fmul, 1>; +def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16", + v4f16, v4f16, fmul, 1>, + Requires<[HasNEON,HasFullFP16]>; +def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16", + v8f16, v8f16, fmul, 1>, + Requires<[HasNEON,HasFullFP16]>; defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>; +def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>, + Requires<[HasNEON,HasFullFP16]>; +def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16, + v4f16, fmul>, + Requires<[HasNEON,HasFullFP16]>; def : Pat<(v8i16 (mul (v8i16 QPR:$src1), (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), @@ -4277,6 +4303,12 @@ def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", v4f32, fmul_su, fadd_mlx>, Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; +def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16", + v4f16, fmul_su, fadd_mlx>, + Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; +def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16", + v8f16, fmul_su, fadd_mlx>, + Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", @@ -4285,6 +4317,12 @@ def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", v4f32, v2f32, fmul_su, fadd_mlx>, Requires<[HasNEON, UseFPVMLx]>; +def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16", + v4f16, fmul, fadd>, + Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; +def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16", + v8f16, v4f16, fmul, fadd>, + Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; def : Pat<(v8i16 (add (v8i16 QPR:$src1), (mul (v8i16 QPR:$src2), @@ -4495,6 +4533,12 @@ def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", v4f32, fmul_su, fsub_mlx>, Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; +def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16", + v4f16, fmul, fsub>, + Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; +def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16", + v8f16, fmul, fsub>, + Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", @@ -4503,6 +4547,12 @@ def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", v4f32, v2f32, fmul_su, fsub_mlx>, Requires<[HasNEON, UseFPVMLx]>; +def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16", + v4f16, fmul, fsub>, + Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; +def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16", + v8f16, v4f16, fmul, fsub>, + Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; def : Pat<(v8i16 (sub (v8i16 QPR:$src1), (mul (v8i16 QPR:$src2), @@ -4570,6 +4620,13 @@ def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", v4f32, fmul_su, fadd_mlx>, Requires<[HasNEON,HasVFP4,UseFusedMAC]>; +def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16", + v4f16, fmul, fadd>, + Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; + +def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16", + v8f16, fmul, fadd>, + Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; // Fused Vector Multiply Subtract (floating-point) def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", @@ -4578,6 +4635,12 @@ def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", v4f32, fmul_su, fsub_mlx>, Requires<[HasNEON,HasVFP4,UseFusedMAC]>; +def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16", + v4f16, fmul, fsub>, + Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; +def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16", + v8f16, fmul, fsub>, + Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; // Match @llvm.fma.* intrinsics def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), @@ -4602,6 +4665,12 @@ def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", v2f32, v2f32, fsub, 0>; def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", v4f32, v4f32, fsub, 0>; +def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16", + v4f16, v4f16, fsub, 0>, + Requires<[HasNEON,HasFullFP16]>; +def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16", + v8f16, v8f16, fsub, 0>, + Requires<[HasNEON,HasFullFP16]>; // VSUBL : Vector Subtract Long (Q = D - D) defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, "vsubl", "s", sub, sext, 0>; @@ -4646,6 +4715,12 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, NEONvceq, 1>; def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, NEONvceq, 1>; +def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, + NEONvceq, 1>, + Requires<[HasNEON, HasFullFP16]>; +def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, + NEONvceq, 1>, + Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", @@ -4660,6 +4735,12 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, NEONvcge, 0>; def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, NEONvcge, 0>; +def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, + NEONvcge, 0>, + Requires<[HasNEON, HasFullFP16]>; +def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, + NEONvcge, 0>, + Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", @@ -4677,6 +4758,12 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, NEONvcgt, 0>; def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, NEONvcgt, 0>; +def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, + NEONvcgt, 0>, + Requires<[HasNEON, HasFullFP16]>; +def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, + NEONvcgt, 0>, + Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", @@ -4686,36 +4773,68 @@ defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", } // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) -def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", +def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; -def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", +def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; +def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", + "f16", v4i16, v4f16, int_arm_neon_vacge, 0>, + Requires<[HasNEON, HasFullFP16]>; +def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", + "f16", v8i16, v8f16, int_arm_neon_vacge, 0>, + Requires<[HasNEON, HasFullFP16]>; // VACGT : Vector Absolute Compare Greater Than (aka VCAGT) -def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", +def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; -def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", +def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; +def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", + "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>, + Requires<[HasNEON, HasFullFP16]>; +def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", + "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>, + Requires<[HasNEON, HasFullFP16]>; // VTST : Vector Test Bits defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", - (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; + (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", - (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; + (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", - (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; + (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", - (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; + (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; +let Predicates = [HasNEON, HasFullFP16] in { +def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", + (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", + (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", + (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", + (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; +} def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", - (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; + (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", - (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; + (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", - (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; + (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", - (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; + (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; +let Predicates = [HasNEON, HasFullFP16] in { +def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", + (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", + (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", + (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", + (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; +} // Vector Bitwise Operations. @@ -5007,6 +5126,12 @@ def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; +def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND, + "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>, + Requires<[HasNEON, HasFullFP16]>; +def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ, + "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>, + Requires<[HasNEON, HasFullFP16]>; // VABDL : Vector Absolute Difference Long (Q = | D - D |) defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, @@ -5014,6 +5139,29 @@ defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, "vabdl", "u", int_arm_neon_vabdu, zext, 1>; +def abd_shr : + PatFrag<(ops node:$in1, node:$in2, node:$shift), + (NEONvshrs (sub (zext node:$in1), + (zext node:$in2)), (i32 $shift))>; + +def : Pat<(xor (v4i32 (bitconvert (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15)))), + (v4i32 (bitconvert (v8i16 (add (sub (zext (v8i8 DPR:$opA)), + (zext (v8i8 DPR:$opB))), + (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15))))))), + (VABDLuv8i16 DPR:$opA, DPR:$opB)>; + +def : Pat<(xor (v4i32 (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)), + (v4i32 (add (sub (zext (v4i16 DPR:$opA)), + (zext (v4i16 DPR:$opB))), + (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)))), + (VABDLuv4i32 DPR:$opA, DPR:$opB)>; + +def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), + (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)), + (zext (v2i32 DPR:$opB))), + (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))), + (VABDLuv2i64 DPR:$opA, DPR:$opB)>; + // VABA : Vector Absolute Difference and Accumulate defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, "vaba", "s", int_arm_neon_vabds, add>; @@ -5031,53 +5179,85 @@ defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, // VMAX : Vector Maximum defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vmax", "s", int_arm_neon_vmaxs, 1>; + "vmax", "s", smax, 1>; defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vmax", "u", int_arm_neon_vmaxu, 1>; + "vmax", "u", umax, 1>; def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmax", "f32", - v2f32, v2f32, int_arm_neon_vmaxs, 1>; + v2f32, v2f32, fmaxnan, 1>; def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmax", "f32", - v4f32, v4f32, int_arm_neon_vmaxs, 1>; + v4f32, v4f32, fmaxnan, 1>; +def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND, + "vmax", "f16", + v4f16, v4f16, fmaxnan, 1>, + Requires<[HasNEON, HasFullFP16]>; +def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ, + "vmax", "f16", + v8f16, v8f16, fmaxnan, 1>, + Requires<[HasNEON, HasFullFP16]>; // VMAXNM let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { - def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, + def VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, N3RegFrm, NoItinerary, "vmaxnm", "f32", - v2f32, v2f32, int_arm_neon_vmaxnm, 1>, + v2f32, v2f32, fmaxnum, 1>, Requires<[HasV8, HasNEON]>; - def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, + def VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, N3RegFrm, NoItinerary, "vmaxnm", "f32", - v4f32, v4f32, int_arm_neon_vmaxnm, 1>, + v4f32, v4f32, fmaxnum, 1>, Requires<[HasV8, HasNEON]>; + def VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f16", + v4f16, v4f16, fmaxnum, 1>, + Requires<[HasV8, HasNEON, HasFullFP16]>; + def VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f16", + v8f16, v8f16, fmaxnum, 1>, + Requires<[HasV8, HasNEON, HasFullFP16]>; } // VMIN : Vector Minimum defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vmin", "s", int_arm_neon_vmins, 1>; + "vmin", "s", smin, 1>; defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vmin", "u", int_arm_neon_vminu, 1>; + "vmin", "u", umin, 1>; def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmin", "f32", - v2f32, v2f32, int_arm_neon_vmins, 1>; + v2f32, v2f32, fminnan, 1>; def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin", "f32", - v4f32, v4f32, int_arm_neon_vmins, 1>; + v4f32, v4f32, fminnan, 1>; +def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND, + "vmin", "f16", + v4f16, v4f16, fminnan, 1>, + Requires<[HasNEON, HasFullFP16]>; +def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ, + "vmin", "f16", + v8f16, v8f16, fminnan, 1>, + Requires<[HasNEON, HasFullFP16]>; // VMINNM let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { - def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, + def VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, N3RegFrm, NoItinerary, "vminnm", "f32", - v2f32, v2f32, int_arm_neon_vminnm, 1>, + v2f32, v2f32, fminnum, 1>, Requires<[HasV8, HasNEON]>; - def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, + def VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, N3RegFrm, NoItinerary, "vminnm", "f32", - v4f32, v4f32, int_arm_neon_vminnm, 1>, + v4f32, v4f32, fminnum, 1>, Requires<[HasV8, HasNEON]>; + def VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vminnm", "f16", + v4f16, v4f16, fminnum, 1>, + Requires<[HasV8, HasNEON, HasFullFP16]>; + def VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vminnm", "f16", + v8f16, v8f16, fminnum, 1>, + Requires<[HasV8, HasNEON, HasFullFP16]>; } // Vector Pairwise Operations. @@ -5095,6 +5275,10 @@ def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, IIC_VPBIND, "vpadd", "f32", v2f32, v2f32, int_arm_neon_vpadd, 0>; +def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm, + IIC_VPBIND, "vpadd", "f16", + v4f16, v4f16, int_arm_neon_vpadd, 0>, + Requires<[HasNEON, HasFullFP16]>; // VPADDL : Vector Pairwise Add Long defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", @@ -5123,6 +5307,9 @@ def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; +def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", + "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>, + Requires<[HasNEON, HasFullFP16]>; // VPMIN : Vector Pairwise Minimum def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", @@ -5139,6 +5326,9 @@ def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; +def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", + "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>, + Requires<[HasNEON, HasFullFP16]>; // Vector Reciprocal and Reciprocal Square Root Estimate and Step. @@ -5155,6 +5345,14 @@ def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, IIC_VUNAQ, "vrecpe", "f32", v4f32, v4f32, int_arm_neon_vrecpe>; +def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, + IIC_VUNAD, "vrecpe", "f16", + v4f16, v4f16, int_arm_neon_vrecpe>, + Requires<[HasNEON, HasFullFP16]>; +def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, + IIC_VUNAQ, "vrecpe", "f16", + v8f16, v8f16, int_arm_neon_vrecpe>, + Requires<[HasNEON, HasFullFP16]>; // VRECPS : Vector Reciprocal Step def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, @@ -5163,6 +5361,14 @@ def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, IIC_VRECSQ, "vrecps", "f32", v4f32, v4f32, int_arm_neon_vrecps, 1>; +def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, + IIC_VRECSD, "vrecps", "f16", + v4f16, v4f16, int_arm_neon_vrecps, 1>, + Requires<[HasNEON, HasFullFP16]>; +def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, + IIC_VRECSQ, "vrecps", "f16", + v8f16, v8f16, int_arm_neon_vrecps, 1>, + Requires<[HasNEON, HasFullFP16]>; // VRSQRTE : Vector Reciprocal Square Root Estimate def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, @@ -5177,6 +5383,14 @@ def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, IIC_VUNAQ, "vrsqrte", "f32", v4f32, v4f32, int_arm_neon_vrsqrte>; +def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, + IIC_VUNAD, "vrsqrte", "f16", + v4f16, v4f16, int_arm_neon_vrsqrte>, + Requires<[HasNEON, HasFullFP16]>; +def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, + IIC_VUNAQ, "vrsqrte", "f16", + v8f16, v8f16, int_arm_neon_vrsqrte>, + Requires<[HasNEON, HasFullFP16]>; // VRSQRTS : Vector Reciprocal Square Root Step def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, @@ -5185,6 +5399,14 @@ def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, IIC_VRECSQ, "vrsqrts", "f32", v4f32, v4f32, int_arm_neon_vrsqrts, 1>; +def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, + IIC_VRECSD, "vrsqrts", "f16", + v4f16, v4f16, int_arm_neon_vrsqrts, 1>, + Requires<[HasNEON, HasFullFP16]>; +def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, + IIC_VRECSQ, "vrsqrts", "f16", + v8f16, v8f16, int_arm_neon_vrsqrts, 1>, + Requires<[HasNEON, HasFullFP16]>; // Vector Shifts. @@ -5336,6 +5558,14 @@ def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs", "f32", v4f32, v4f32, fabs>; +def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0, + "vabs", "f16", + v4f16, v4f16, fabs>, + Requires<[HasNEON, HasFullFP16]>; +def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, + "vabs", "f16", + v8f16, v8f16, fabs>, + Requires<[HasNEON, HasFullFP16]>; def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), (v2i32 (bitconvert (v8i8 (add DPR:$src, @@ -5398,6 +5628,16 @@ def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, "vneg", "f32", "$Vd, $Vm", "", [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; +def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0, + (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, + "vneg", "f16", "$Vd, $Vm", "", + [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>, + Requires<[HasNEON, HasFullFP16]>; +def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0, + (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, + "vneg", "f16", "$Vd, $Vm", "", + [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>, + Requires<[HasNEON, HasFullFP16]>; def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; @@ -5868,18 +6108,56 @@ def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v4f32, v4i32, uint_to_fp>; +def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", + v4i16, v4f16, fp_to_sint>, + Requires<[HasNEON, HasFullFP16]>; +def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", + v4i16, v4f16, fp_to_uint>, + Requires<[HasNEON, HasFullFP16]>; +def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", + v4f16, v4i16, sint_to_fp>, + Requires<[HasNEON, HasFullFP16]>; +def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", + v4f16, v4i16, uint_to_fp>, + Requires<[HasNEON, HasFullFP16]>; + +def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", + v8i16, v8f16, fp_to_sint>, + Requires<[HasNEON, HasFullFP16]>; +def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", + v8i16, v8f16, fp_to_uint>, + Requires<[HasNEON, HasFullFP16]>; +def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", + v8f16, v8i16, sint_to_fp>, + Requires<[HasNEON, HasFullFP16]>; +def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", + v8f16, v8i16, uint_to_fp>, + Requires<[HasNEON, HasFullFP16]>; + // VCVT{A, N, P, M} multiclass VCVT_FPI op10_8, SDPatternOperator IntS, SDPatternOperator IntU> { let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { - def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; - def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; - def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; - def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; + def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s16.f16", v4i16, v4f16, IntS>, + Requires<[HasV8, HasNEON, HasFullFP16]>; + def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s16.f16", v8i16, v8f16, IntS>, + Requires<[HasV8, HasNEON, HasFullFP16]>; + def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u16.f16", v4i16, v4f16, IntU>, + Requires<[HasV8, HasNEON, HasFullFP16]>; + def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u16.f16", v8i16, v8f16, IntU>, + Requires<[HasV8, HasNEON, HasFullFP16]>; } } @@ -5898,6 +6176,16 @@ def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; +let Predicates = [HasNEON, HasFullFP16] in { +def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", + v4i16, v4f16, int_arm_neon_vcvtfp2fxs>; +def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", + v4i16, v4f16, int_arm_neon_vcvtfp2fxu>; +def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", + v4f16, v4i16, int_arm_neon_vcvtfxs2fp>; +def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", + v4f16, v4i16, int_arm_neon_vcvtfxu2fp>; +} // Predicates = [HasNEON, HasFullFP16] } let DecoderMethod = "DecodeVCVTQ" in { @@ -5909,6 +6197,16 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; +let Predicates = [HasNEON, HasFullFP16] in { +def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", + v8i16, v8f16, int_arm_neon_vcvtfp2fxs>; +def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", + v8i16, v8f16, int_arm_neon_vcvtfp2fxu>; +def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", + v8f16, v8i16, int_arm_neon_vcvtfxs2fp>; +def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", + v8f16, v8i16, int_arm_neon_vcvtfxu2fp>; +} // Predicates = [HasNEON, HasFullFP16] } def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", @@ -5929,6 +6227,24 @@ def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0", + (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0", + (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0", + (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0", + (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0", + (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0", + (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0", + (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0", + (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>; + // VCVT : Vector Convert Between Half-Precision and Single-Precision. def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, @@ -6182,22 +6498,40 @@ def VTBX4Pseudo // VRINT : Vector Rounding multiclass VRINT_FPI op9_7, SDPatternOperator Int> { let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { - def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary, + def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary, !strconcat("vrint", op), "f32", v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { let Inst{9-7} = op9_7; } - def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary, + def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary, !strconcat("vrint", op), "f32", v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { let Inst{9-7} = op9_7; } + def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f16", + v4f16, v4f16, Int>, + Requires<[HasV8, HasNEON, HasFullFP16]> { + let Inst{9-7} = op9_7; + } + def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f16", + v8f16, v8f16, Int>, + Requires<[HasV8, HasNEON, HasFullFP16]> { + let Inst{9-7} = op9_7; + } } def : NEONInstAlias(NAME#"D") DPR:$Dd, DPR:$Dm)>; + (!cast(NAME#"Df") DPR:$Dd, DPR:$Dm)>; def : NEONInstAlias(NAME#"Q") QPR:$Qd, QPR:$Qm)>; + (!cast(NAME#"Qf") QPR:$Qd, QPR:$Qm)>; + let Predicates = [HasNEON, HasFullFP16] in { + def : NEONInstAlias(NAME#"Dh") DPR:$Dd, DPR:$Dm)>; + def : NEONInstAlias(NAME#"Qh") QPR:$Qd, QPR:$Qm)>; + } } defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; @@ -6343,8 +6677,8 @@ def : N3VSMulOpPat, Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; def : N2VSPat; def : N2VSPat; -def : N3VSPat; -def : N3VSPat; +def : N3VSPat, Requires<[HasNEON]>; +def : N3VSPat, Requires<[HasNEON]>; def : NVCVTFIPat; def : NVCVTFIPat; def : NVCVTIFPat; @@ -7704,6 +8038,9 @@ def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +let Predicates = [HasNEON, HasFullFP16] in +def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm", + (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; // Q-register versions. def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; @@ -7719,6 +8056,9 @@ def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +let Predicates = [HasNEON, HasFullFP16] in +def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm", + (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. // D-register versions. @@ -7736,6 +8076,9 @@ def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +let Predicates = [HasNEON, HasFullFP16] in +def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm", + (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; // Q-register versions. def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; @@ -7751,6 +8094,9 @@ def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +let Predicates = [HasNEON, HasFullFP16] in +def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm", + (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; // VSWP allows, but does not require, a type suffix. defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 40414da3ca81..df6f24306354 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -591,6 +591,34 @@ def tTRAP : TI<(outs), (ins), IIC_Br, // Load Store Instructions. // +// PC-relative loads need to be matched first as constant pool accesses need to +// always be PC-relative. We do this using AddedComplexity, as the pattern is +// simpler than the patterns of the other load instructions. +let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 10 in +def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, + "ldr", "\t$Rt, $addr", + [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>, + T1Encoding<{0,1,0,0,1,?}> { + // A6.2 & A8.6.59 + bits<3> Rt; + bits<8> addr; + let Inst{10-8} = Rt; + let Inst{7-0} = addr; +} + +// SP-relative loads should be matched before standard immediate-offset loads as +// it means we avoid having to move SP to another register. +let canFoldAsLoad = 1 in +def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, + "ldr", "\t$Rt, $addr", + [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>, + T1LdStSP<{1,?,?}> { + bits<3> Rt; + bits<8> addr; + let Inst{10-8} = Rt; + let Inst{7-0} = addr; +} + // Loads: reg/reg and reg/imm5 let canFoldAsLoad = 1, isReMaterializable = 1 in multiclass thumb_ld_rr_ri_enc reg_opc, bits<4> imm_opc, @@ -598,16 +626,20 @@ multiclass thumb_ld_rr_ri_enc reg_opc, bits<4> imm_opc, AddrMode am, InstrItinClass itin_r, InstrItinClass itin_i, string asm, PatFrag opnode> { - def r : // reg/reg - T1pILdStEncode; + // Immediate-offset loads should be matched before register-offset loads as + // when the offset is a constant it's simpler to first check if it fits in the + // immediate offset field then fall back to register-offset if it doesn't. def i : // reg/imm5 T1pILdStEncodeImm; + // Register-offset loads are matched last. + def r : // reg/reg + T1pILdStEncode; } // Stores: reg/reg and reg/imm5 multiclass thumb_st_rr_ri_enc reg_opc, bits<4> imm_opc, @@ -615,32 +647,32 @@ multiclass thumb_st_rr_ri_enc reg_opc, bits<4> imm_opc, AddrMode am, InstrItinClass itin_r, InstrItinClass itin_i, string asm, PatFrag opnode> { - def r : // reg/reg - T1pILdStEncode; def i : // reg/imm5 T1pILdStEncodeImm; + def r : // reg/reg + T1pILdStEncode; } // A8.6.57 & A8.6.60 -defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rrs4, +defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr, t_addrmode_is4, AddrModeT1_4, IIC_iLoad_r, IIC_iLoad_i, "ldr", UnOpFrag<(load node:$Src)>>; // A8.6.64 & A8.6.61 -defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rrs1, +defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr, t_addrmode_is1, AddrModeT1_1, IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb", UnOpFrag<(zextloadi8 node:$Src)>>; // A8.6.76 & A8.6.73 -defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2, +defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr, t_addrmode_is2, AddrModeT1_2, IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh", UnOpFrag<(zextloadi16 node:$Src)>>; @@ -659,47 +691,6 @@ def tLDRSH : // A8.6.84 "ldrsh", "\t$Rt, $addr", [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>; -let canFoldAsLoad = 1 in -def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, - "ldr", "\t$Rt, $addr", - [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>, - T1LdStSP<{1,?,?}> { - bits<3> Rt; - bits<8> addr; - let Inst{10-8} = Rt; - let Inst{7-0} = addr; -} - -let canFoldAsLoad = 1, isReMaterializable = 1 in -def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, - "ldr", "\t$Rt, $addr", - [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>, - T1Encoding<{0,1,0,0,1,?}> { - // A6.2 & A8.6.59 - bits<3> Rt; - bits<8> addr; - let Inst{10-8} = Rt; - let Inst{7-0} = addr; -} - -// A8.6.194 & A8.6.192 -defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4, - t_addrmode_is4, AddrModeT1_4, - IIC_iStore_r, IIC_iStore_i, "str", - BinOpFrag<(store node:$LHS, node:$RHS)>>; - -// A8.6.197 & A8.6.195 -defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1, - t_addrmode_is1, AddrModeT1_1, - IIC_iStore_bh_r, IIC_iStore_bh_i, "strb", - BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; - -// A8.6.207 & A8.6.205 -defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2, - t_addrmode_is2, AddrModeT1_2, - IIC_iStore_bh_r, IIC_iStore_bh_i, "strh", - BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; - def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, "str", "\t$Rt, $addr", @@ -711,6 +702,25 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, let Inst{7-0} = addr; } +// A8.6.194 & A8.6.192 +defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr, + t_addrmode_is4, AddrModeT1_4, + IIC_iStore_r, IIC_iStore_i, "str", + BinOpFrag<(store node:$LHS, node:$RHS)>>; + +// A8.6.197 & A8.6.195 +defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr, + t_addrmode_is1, AddrModeT1_1, + IIC_iStore_bh_r, IIC_iStore_bh_i, "strb", + BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; + +// A8.6.207 & A8.6.205 +defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr, + t_addrmode_is2, AddrModeT1_2, + IIC_iStore_bh_r, IIC_iStore_bh_i, "strh", + BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; + + //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -730,6 +740,7 @@ def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops), // Writeback version is just a pseudo, as there's no encoding difference. // Writeback happens iff the base register is not in the destination register // list. +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in def tLDMIA_UPD : InstTemplate, @@ -1328,16 +1339,16 @@ def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs), (tSUBrr tGPR:$lhs, tGPR:$rhs)>; // Bswap 16 with load/store -def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rrs2:$addr)), (i32 16)), - (tREV16 (tLDRHr t_addrmode_rrs2:$addr))>; def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)), (tREV16 (tLDRHi t_addrmode_is2:$addr))>; -def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)), - t_addrmode_rrs2:$addr), - (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rrs2:$addr)>; +def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rr:$addr)), (i32 16)), + (tREV16 (tLDRHr t_addrmode_rr:$addr))>; def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)), t_addrmode_is2:$addr), (tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>; +def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)), + t_addrmode_rr:$addr), + (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rr:$addr)>; // ConstantPool def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>; @@ -1372,10 +1383,10 @@ def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>, Requires<[IsThumb, HasV5T]>; // zextload i1 -> zextload i8 -def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr), - (tLDRBr t_addrmode_rrs1:$addr)>; def : T1Pat<(zextloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; +def : T1Pat<(zextloadi1 t_addrmode_rr:$addr), + (tLDRBr t_addrmode_rr:$addr)>; // extload from the stack -> word load from the stack, as it avoids having to // materialize the base in a separate register. This only works when a word @@ -1389,61 +1400,61 @@ def : T1Pat<(extloadi16 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>, Requires<[IsThumb, IsThumb1Only, IsLE]>; // extload -> zextload -def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>; -def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; -def : T1Pat<(extloadi8 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>; -def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; -def : T1Pat<(extloadi16 t_addrmode_rrs2:$addr), (tLDRHr t_addrmode_rrs2:$addr)>; -def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>; +def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; +def : T1Pat<(extloadi1 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>; +def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; +def : T1Pat<(extloadi8 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>; +def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>; +def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>; // If it's impossible to use [r,r] address mode for sextload, select to // ldr{b|h} + sxt{b|h} instead. def : T1Pat<(sextloadi8 t_addrmode_is1:$addr), (tSXTB (tLDRBi t_addrmode_is1:$addr))>, Requires<[IsThumb, IsThumb1Only, HasV6]>; -def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr), - (tSXTB (tLDRBr t_addrmode_rrs1:$addr))>, +def : T1Pat<(sextloadi8 t_addrmode_rr:$addr), + (tSXTB (tLDRBr t_addrmode_rr:$addr))>, Requires<[IsThumb, IsThumb1Only, HasV6]>; def : T1Pat<(sextloadi16 t_addrmode_is2:$addr), (tSXTH (tLDRHi t_addrmode_is2:$addr))>, Requires<[IsThumb, IsThumb1Only, HasV6]>; -def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr), - (tSXTH (tLDRHr t_addrmode_rrs2:$addr))>, +def : T1Pat<(sextloadi16 t_addrmode_rr:$addr), + (tSXTH (tLDRHr t_addrmode_rr:$addr))>, Requires<[IsThumb, IsThumb1Only, HasV6]>; -def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr), - (tASRri (tLSLri (tLDRBr t_addrmode_rrs1:$addr), 24), 24)>; def : T1Pat<(sextloadi8 t_addrmode_is1:$addr), (tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>; -def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr), - (tASRri (tLSLri (tLDRHr t_addrmode_rrs2:$addr), 16), 16)>; +def : T1Pat<(sextloadi8 t_addrmode_rr:$addr), + (tASRri (tLSLri (tLDRBr t_addrmode_rr:$addr), 24), 24)>; def : T1Pat<(sextloadi16 t_addrmode_is2:$addr), (tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>; +def : T1Pat<(sextloadi16 t_addrmode_rr:$addr), + (tASRri (tLSLri (tLDRHr t_addrmode_rr:$addr), 16), 16)>; def : T1Pat<(atomic_load_8 t_addrmode_is1:$src), (tLDRBi t_addrmode_is1:$src)>; -def : T1Pat<(atomic_load_8 t_addrmode_rrs1:$src), - (tLDRBr t_addrmode_rrs1:$src)>; +def : T1Pat<(atomic_load_8 t_addrmode_rr:$src), + (tLDRBr t_addrmode_rr:$src)>; def : T1Pat<(atomic_load_16 t_addrmode_is2:$src), (tLDRHi t_addrmode_is2:$src)>; -def : T1Pat<(atomic_load_16 t_addrmode_rrs2:$src), - (tLDRHr t_addrmode_rrs2:$src)>; +def : T1Pat<(atomic_load_16 t_addrmode_rr:$src), + (tLDRHr t_addrmode_rr:$src)>; def : T1Pat<(atomic_load_32 t_addrmode_is4:$src), (tLDRi t_addrmode_is4:$src)>; -def : T1Pat<(atomic_load_32 t_addrmode_rrs4:$src), - (tLDRr t_addrmode_rrs4:$src)>; +def : T1Pat<(atomic_load_32 t_addrmode_rr:$src), + (tLDRr t_addrmode_rr:$src)>; def : T1Pat<(atomic_store_8 t_addrmode_is1:$ptr, tGPR:$val), (tSTRBi tGPR:$val, t_addrmode_is1:$ptr)>; -def : T1Pat<(atomic_store_8 t_addrmode_rrs1:$ptr, tGPR:$val), - (tSTRBr tGPR:$val, t_addrmode_rrs1:$ptr)>; +def : T1Pat<(atomic_store_8 t_addrmode_rr:$ptr, tGPR:$val), + (tSTRBr tGPR:$val, t_addrmode_rr:$ptr)>; def : T1Pat<(atomic_store_16 t_addrmode_is2:$ptr, tGPR:$val), (tSTRHi tGPR:$val, t_addrmode_is2:$ptr)>; -def : T1Pat<(atomic_store_16 t_addrmode_rrs2:$ptr, tGPR:$val), - (tSTRHr tGPR:$val, t_addrmode_rrs2:$ptr)>; +def : T1Pat<(atomic_store_16 t_addrmode_rr:$ptr, tGPR:$val), + (tSTRHr tGPR:$val, t_addrmode_rr:$ptr)>; def : T1Pat<(atomic_store_32 t_addrmode_is4:$ptr, tGPR:$val), (tSTRi tGPR:$val, t_addrmode_is4:$ptr)>; -def : T1Pat<(atomic_store_32 t_addrmode_rrs4:$ptr, tGPR:$val), - (tSTRr tGPR:$val, t_addrmode_rrs4:$ptr)>; +def : T1Pat<(atomic_store_32 t_addrmode_rr:$ptr, tGPR:$val), + (tSTRr tGPR:$val, t_addrmode_rr:$ptr)>; // Large immediate handling. diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index aba8a7b10fd9..d460d33fa0a3 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -43,7 +43,7 @@ def t2_shift_imm : Operand { // Shifted operands. No register controlled shifts for Thumb2. // Note: We do not support rrx shifted operands yet. def t2_so_reg : Operand, // reg imm - ComplexPattern { let EncoderMethod = "getT2SORegOpValue"; let PrintMethod = "printT2SOOperand"; @@ -1554,19 +1554,21 @@ def t2STRBT : T2IstT<0b00, "strbt", IIC_iStore_bh_i>; def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>; // ldrd / strd pre / post variants -// For disassembly only. +let mayLoad = 1 in def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), (ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> { let DecoderMethod = "DecodeT2LDRDPreInstruction"; } +let mayLoad = 1 in def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), (ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm", "$addr.base = $wb", []>; +let mayStore = 1 in def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb), (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!", @@ -1574,6 +1576,7 @@ def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb), let DecoderMethod = "DecodeT2STRDPreInstruction"; } +let mayStore = 1 in def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb), (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr, t2am_imm8s4_offset:$imm), @@ -2100,7 +2103,7 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR), def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-24} = 0b010; let Inst{23} = 0b1; @@ -2117,7 +2120,7 @@ class T2I_pam op22_20, bits<4> op7_4, string opc, dag iops = (ins rGPR:$Rn, rGPR:$Rm), string asm = "\t$Rd, $Rn, $Rm"> : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0101; let Inst{22-20} = op22_20; @@ -2215,13 +2218,13 @@ class T2FourReg_mac op22_20, bits<4> op7_4, dag oops, def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{15-12} = 0b1111; } def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary, "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsThumb2, HasThumb2DSP]>; + Requires<[IsThumb2, HasDSP]>; // Signed/Unsigned saturate. class T2SatI, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1100; let Inst{20} = 0; @@ -2278,7 +2281,7 @@ def t2USAT: T2SatI< def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn), NoItinerary, "usat16", "\t$Rd, $sat_imm, $Rn", []>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{31-22} = 0b1111001110; let Inst{20} = 0; let Inst{15} = 0; @@ -2288,8 +2291,8 @@ def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn), let Inst{5-4} = 0b00; } -def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>; -def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>; +def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>; +def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), (t2USAT imm0_31:$pos, GPR:$a, 0)>; //===----------------------------------------------------------------------===// // Shift and rotate Instructions. @@ -2605,7 +2608,7 @@ def t2UMAAL : T2MulLong<0b110, 0b0110, (outs rGPR:$RdLo, rGPR:$RdHi), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsThumb2, HasThumb2DSP]>; + Requires<[IsThumb2, HasDSP]>; } // hasSideEffects // Rounding variants of the below included for disassembly only @@ -2614,7 +2617,7 @@ def t2UMAAL : T2MulLong<0b110, 0b0110, def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -2624,7 +2627,7 @@ def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -2636,7 +2639,7 @@ def t2SMMLA : T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>, - Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { + Requires<[IsThumb2, HasDSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -2646,7 +2649,7 @@ def t2SMMLA : T2FourReg< def t2SMMLAR: T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -2657,7 +2660,7 @@ def t2SMMLS: T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>, - Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { + Requires<[IsThumb2, HasDSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b110; @@ -2667,7 +2670,7 @@ def t2SMMLS: T2FourReg< def t2SMMLSR:T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b110; @@ -2679,7 +2682,7 @@ multiclass T2I_smul { !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16), (sext_inreg rGPR:$Rm, i16)))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2692,7 +2695,7 @@ multiclass T2I_smul { !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16), (sra rGPR:$Rm, (i32 16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2705,7 +2708,7 @@ multiclass T2I_smul { !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)), (sext_inreg rGPR:$Rm, i16)))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2718,7 +2721,7 @@ multiclass T2I_smul { !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)), (sra rGPR:$Rm, (i32 16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2730,7 +2733,7 @@ multiclass T2I_smul { def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -2742,7 +2745,7 @@ multiclass T2I_smul { def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -2760,7 +2763,7 @@ multiclass T2I_smla { [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16), (sext_inreg rGPR:$Rm, i16))))]>, - Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { + Requires<[IsThumb2, HasDSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2773,7 +2776,7 @@ multiclass T2I_smla { !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16), (sra rGPR:$Rm, (i32 16)))))]>, - Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { + Requires<[IsThumb2, HasDSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2786,7 +2789,7 @@ multiclass T2I_smla { !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)), (sext_inreg rGPR:$Rm, i16))))]>, - Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { + Requires<[IsThumb2, HasDSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2799,7 +2802,7 @@ multiclass T2I_smla { !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)), (sra rGPR:$Rm, (i32 16)))))]>, - Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { + Requires<[IsThumb2, HasDSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2811,7 +2814,7 @@ multiclass T2I_smla { (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { + Requires<[IsThumb2, HasDSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -2823,7 +2826,7 @@ multiclass T2I_smla { (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { + Requires<[IsThumb2, HasDSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -2839,79 +2842,79 @@ defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm", [/* For disassembly only; pattern left blank */]>, - Requires<[IsThumb2, HasThumb2DSP]>; + Requires<[IsThumb2, HasDSP]>; def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm", [/* For disassembly only; pattern left blank */]>, - Requires<[IsThumb2, HasThumb2DSP]>; + Requires<[IsThumb2, HasDSP]>; def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm", [/* For disassembly only; pattern left blank */]>, - Requires<[IsThumb2, HasThumb2DSP]>; + Requires<[IsThumb2, HasDSP]>; def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm", [/* For disassembly only; pattern left blank */]>, - Requires<[IsThumb2, HasThumb2DSP]>; + Requires<[IsThumb2, HasDSP]>; // Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD def t2SMUAD: T2ThreeReg_mac< 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{15-12} = 0b1111; } def t2SMUADX:T2ThreeReg_mac< 0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{15-12} = 0b1111; } def t2SMUSD: T2ThreeReg_mac< 0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{15-12} = 0b1111; } def t2SMUSDX:T2ThreeReg_mac< 0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasDSP]> { let Inst{15-12} = 0b1111; } def t2SMLAD : T2FourReg_mac< 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsThumb2, HasThumb2DSP]>; + Requires<[IsThumb2, HasDSP]>; def t2SMLADX : T2FourReg_mac< 0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsThumb2, HasThumb2DSP]>; + Requires<[IsThumb2, HasDSP]>; def t2SMLSD : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsThumb2, HasThumb2DSP]>; + Requires<[IsThumb2, HasDSP]>; def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsThumb2, HasThumb2DSP]>; + Requires<[IsThumb2, HasDSP]>; def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "smlald", "\t$Ra, $Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasThumb2DSP]>; + Requires<[IsThumb2, HasDSP]>; def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaldx", "\t$Ra, $Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasThumb2DSP]>; + Requires<[IsThumb2, HasDSP]>; def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlsld", "\t$Ra, $Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasThumb2DSP]>; + Requires<[IsThumb2, HasDSP]>; def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx", "\t$Ra, $Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasThumb2DSP]>; + Requires<[IsThumb2, HasDSP]>; //===----------------------------------------------------------------------===// // Division Instructions. @@ -2961,7 +2964,7 @@ def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, "rbit", "\t$Rd, $Rm", - [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>, + [(set rGPR:$Rd, (bitreverse rGPR:$Rm))]>, Sched<[WriteALU]>; def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e83f8c850632..050cd1a445ad 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -20,7 +20,6 @@ def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>; def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; - //===----------------------------------------------------------------------===// // Operand Definitions. // @@ -93,7 +92,7 @@ def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr), def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), IIC_fpLoad32, "vldr", "\t$Sd, $addr", - [(set SPR:$Sd, (load addrmode5:$addr))]> { + [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]> { // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; @@ -107,7 +106,7 @@ def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr), def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr), IIC_fpStore32, "vstr", "\t$Sd, $addr", - [(store SPR:$Sd, addrmode5:$addr)]> { + [(alignedstore32 SPR:$Sd, addrmode5:$addr)]> { // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; @@ -393,8 +392,8 @@ multiclass vmaxmin_inst { } } -defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, ARMvmaxnm>; -defm VMINNM : vmaxmin_inst<"vminnm", 1, ARMvminnm>; +defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>; +defm VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>; // Match reassociated forms only if not sign dependent rounding. def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)), @@ -541,19 +540,23 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, // FIXME: Verify encoding after integrated assembler is working. def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[HasFP16]>; def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[HasFP16]>; def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[HasFP16]>; def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[HasFP16]>; def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs DPR:$Dd), (ins SPR:$Sm), @@ -922,6 +925,22 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011, let isRegSequence = 1; } +// Hoist an fabs or a fneg of a value coming from integer registers +// and do the fabs/fneg on the integer value. This is never a lose +// and could enable the conversion to float to be removed completely. +def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)), + (VMOVDRR GPR:$Rl, (BFC GPR:$Rh, (i32 0x7FFFFFFF)))>, + Requires<[IsARM]>; +def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)), + (VMOVDRR GPR:$Rl, (t2BFC GPR:$Rh, (i32 0x7FFFFFFF)))>, + Requires<[IsThumb2]>; +def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)), + (VMOVDRR GPR:$Rl, (EORri GPR:$Rh, (i32 0x80000000)))>, + Requires<[IsARM]>; +def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)), + (VMOVDRR GPR:$Rl, (t2EORri GPR:$Rh, (i32 0x80000000)))>, + Requires<[IsThumb2]>; + let hasSideEffects = 0 in def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), @@ -1003,7 +1022,7 @@ let Predicates=[HasVFP2, HasDPVFP] in { def : VFPPat<(f64 (sint_to_fp GPR:$a)), (VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>; - def : VFPPat<(f64 (sint_to_fp (i32 (load addrmode5:$a)))), + def : VFPPat<(f64 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VSITOD (VLDRS addrmode5:$a))>; } @@ -1021,7 +1040,7 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)), (VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>; -def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (load addrmode5:$a)))), +def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VSITOS (VLDRS addrmode5:$a))>; def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, @@ -1035,7 +1054,7 @@ let Predicates=[HasVFP2, HasDPVFP] in { def : VFPPat<(f64 (uint_to_fp GPR:$a)), (VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>; - def : VFPPat<(f64 (uint_to_fp (i32 (load addrmode5:$a)))), + def : VFPPat<(f64 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VUITOD (VLDRS addrmode5:$a))>; } @@ -1053,7 +1072,7 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)), (VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>; -def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (load addrmode5:$a)))), +def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VUITOS (VLDRS addrmode5:$a))>; // FP -> Int: @@ -1106,7 +1125,7 @@ let Predicates=[HasVFP2, HasDPVFP] in { def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))), (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>; - def : VFPPat<(store (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr), + def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr), (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>; } @@ -1124,7 +1143,8 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)), (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>; -def : VFPNoNEONPat<(store (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr), +def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))), + addrmode5:$ptr), (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>; def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, @@ -1138,7 +1158,7 @@ let Predicates=[HasVFP2, HasDPVFP] in { def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))), (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>; - def : VFPPat<(store (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr), + def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr), (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>; } @@ -1156,7 +1176,8 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)), (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>; -def : VFPNoNEONPat<(store (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr), +def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))), + addrmode5:$ptr), (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>; // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 265b86f75f1d..725b8383c961 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -60,17 +60,24 @@ STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); +namespace llvm { +void initializeARMLoadStoreOptPass(PassRegistry &); +} + +#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass" + namespace { /// Post- register allocation pass the combine load / store instructions to /// form ldm / stm instructions. struct ARMLoadStoreOpt : public MachineFunctionPass { static char ID; - ARMLoadStoreOpt() : MachineFunctionPass(ID) {} + ARMLoadStoreOpt() : MachineFunctionPass(ID) { + initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry()); + } const MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - const MachineRegisterInfo *MRI; const ARMSubtarget *STI; const TargetLowering *TL; ARMFunctionInfo *AFI; @@ -84,7 +91,7 @@ namespace { bool runOnMachineFunction(MachineFunction &Fn) override; const char *getPassName() const override { - return "ARM load / store optimization pass"; + return ARM_LOAD_STORE_OPT_NAME; } private: @@ -118,6 +125,7 @@ namespace { }; SpecificBumpPtrAllocator Allocator; SmallVector Candidates; + SmallVector MergeBaseCandidates; void moveLiveRegsBefore(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator Before); @@ -140,12 +148,16 @@ namespace { MachineBasicBlock::iterator &MBBI); bool MergeBaseUpdateLoadStore(MachineInstr *MI); bool MergeBaseUpdateLSMultiple(MachineInstr *MI); + bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); bool MergeReturnIntoLDM(MachineBasicBlock &MBB); + bool CombineMovBx(MachineBasicBlock &MBB); }; char ARMLoadStoreOpt::ID = 0; } +INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false) + static bool definesCPSR(const MachineInstr *MI) { for (const auto &MO : MI->operands()) { if (!MO.isReg()) @@ -619,9 +631,10 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, unsigned NewBase; if (isi32Load(Opcode)) { - // If it is a load, then just use one of the destination register to - // use as the new base. + // If it is a load, then just use one of the destination registers + // as the new base. Will no longer be writeback in Thumb1. NewBase = Regs[NumRegs-1].first; + Writeback = false; } else { // Find a free register that we can use as scratch register. moveLiveRegsBefore(MBB, InsertBefore); @@ -725,9 +738,12 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, MachineInstrBuilder MIB; if (Writeback) { - if (Opcode == ARM::tLDMIA) + assert(isThumb1 && "expected Writeback only inThumb1"); + if (Opcode == ARM::tLDMIA) { + assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs"); // Update tLDMIA with writeback if necessary. Opcode = ARM::tLDMIA_UPD; + } MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); @@ -784,6 +800,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { SmallVector, 8> Regs; SmallVector ImpDefs; DenseSet KilledRegs; + DenseSet UsedRegs; // Determine list of registers and list of implicit super-register defs. for (const MachineInstr *MI : Cand.Instrs) { const MachineOperand &MO = getLoadStoreRegOp(*MI); @@ -792,6 +809,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { if (IsKill) KilledRegs.insert(Reg); Regs.push_back(std::make_pair(Reg, IsKill)); + UsedRegs.insert(Reg); if (IsLoad) { // Collect any implicit defs of super-registers, after merging we can't @@ -881,7 +899,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { for (MachineOperand &MO : MI.uses()) { if (!MO.isReg() || !MO.isKill()) continue; - if (KilledRegs.count(MO.getReg())) + if (UsedRegs.count(MO.getReg())) MO.setIsKill(false); } } @@ -995,76 +1013,6 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { } while (SIndex < EIndex); } -static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, - unsigned Bytes, unsigned Limit, - ARMCC::CondCodes Pred, unsigned PredReg) { - unsigned MyPredReg = 0; - if (!MI) - return false; - - bool CheckCPSRDef = false; - switch (MI->getOpcode()) { - default: return false; - case ARM::tSUBi8: - case ARM::t2SUBri: - case ARM::SUBri: - CheckCPSRDef = true; - break; - case ARM::tSUBspi: - break; - } - - // Make sure the offset fits in 8 bits. - if (Bytes == 0 || (Limit && Bytes >= Limit)) - return false; - - unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || - MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME - if (!(MI->getOperand(0).getReg() == Base && - MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm() * Scale) == Bytes && - getInstrPredicate(MI, MyPredReg) == Pred && - MyPredReg == PredReg)) - return false; - - return CheckCPSRDef ? !definesCPSR(MI) : true; -} - -static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, - unsigned Bytes, unsigned Limit, - ARMCC::CondCodes Pred, unsigned PredReg) { - unsigned MyPredReg = 0; - if (!MI) - return false; - - bool CheckCPSRDef = false; - switch (MI->getOpcode()) { - default: return false; - case ARM::tADDi8: - case ARM::t2ADDri: - case ARM::ADDri: - CheckCPSRDef = true; - break; - case ARM::tADDspi: - break; - } - - if (Bytes == 0 || (Limit && Bytes >= Limit)) - // Make sure the offset fits in 8 bits. - return false; - - unsigned Scale = (MI->getOpcode() == ARM::tADDspi || - MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME - if (!(MI->getOperand(0).getReg() == Base && - MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm() * Scale) == Bytes && - getInstrPredicate(MI, MyPredReg) == Pred && - MyPredReg == PredReg)) - return false; - - return CheckCPSRDef ? !definesCPSR(MI) : true; -} - static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, ARM_AM::AMSubMode Mode) { switch (Opc) { @@ -1132,6 +1080,75 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } } +/// Check if the given instruction increments or decrements a register and +/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags +/// generated by the instruction are possibly read as well. +static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, + ARMCC::CondCodes Pred, unsigned PredReg) { + bool CheckCPSRDef; + int Scale; + switch (MI.getOpcode()) { + case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; + case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; + case ARM::t2SUBri: + case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; + case ARM::t2ADDri: + case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; + case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; + case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; + default: return 0; + } + + unsigned MIPredReg; + if (MI.getOperand(0).getReg() != Reg || + MI.getOperand(1).getReg() != Reg || + getInstrPredicate(&MI, MIPredReg) != Pred || + MIPredReg != PredReg) + return 0; + + if (CheckCPSRDef && definesCPSR(&MI)) + return 0; + return MI.getOperand(2).getImm() * Scale; +} + +/// Searches for an increment or decrement of \p Reg before \p MBBI. +static MachineBasicBlock::iterator +findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, + ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { + Offset = 0; + MachineBasicBlock &MBB = *MBBI->getParent(); + MachineBasicBlock::iterator BeginMBBI = MBB.begin(); + MachineBasicBlock::iterator EndMBBI = MBB.end(); + if (MBBI == BeginMBBI) + return EndMBBI; + + // Skip debug values. + MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); + while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) + --PrevMBBI; + + Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); + return Offset == 0 ? EndMBBI : PrevMBBI; +} + +/// Searches for a increment or decrement of \p Reg after \p MBBI. +static MachineBasicBlock::iterator +findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, + ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { + Offset = 0; + MachineBasicBlock &MBB = *MBBI->getParent(); + MachineBasicBlock::iterator EndMBBI = MBB.end(); + MachineBasicBlock::iterator NextMBBI = std::next(MBBI); + // Skip debug values. + while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) + ++NextMBBI; + if (NextMBBI == EndMBBI) + return EndMBBI; + + Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); + return Offset == 0 ? EndMBBI : NextMBBI; +} + /// Fold proceeding/trailing inc/dec of base register into the /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: /// @@ -1151,7 +1168,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { const MachineOperand &BaseOP = MI->getOperand(0); unsigned Base = BaseOP.getReg(); bool BaseKill = BaseOP.isKill(); - unsigned Bytes = getLSMultipleTransferSize(MI); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); unsigned Opcode = MI->getOpcode(); @@ -1163,49 +1179,24 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { if (MI->getOperand(i).getReg() == Base) return false; - bool DoMerge = false; - ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); - - // Try merging with the previous instruction. + int Bytes = getLSMultipleTransferSize(MI); MachineBasicBlock &MBB = *MI->getParent(); - MachineBasicBlock::iterator BeginMBBI = MBB.begin(); MachineBasicBlock::iterator MBBI(MI); - if (MBBI != BeginMBBI) { - MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); - while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) - --PrevMBBI; - if (Mode == ARM_AM::ia && - isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - Mode = ARM_AM::db; - DoMerge = true; - } else if (Mode == ARM_AM::ib && - isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - Mode = ARM_AM::da; - DoMerge = true; - } - if (DoMerge) - MBB.erase(PrevMBBI); + int Offset; + MachineBasicBlock::iterator MergeInstr + = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); + ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); + if (Mode == ARM_AM::ia && Offset == -Bytes) { + Mode = ARM_AM::db; + } else if (Mode == ARM_AM::ib && Offset == -Bytes) { + Mode = ARM_AM::da; + } else { + MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); + if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && + ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) + return false; } - - // Try merging with the next instruction. - MachineBasicBlock::iterator EndMBBI = MBB.end(); - if (!DoMerge && MBBI != EndMBBI) { - MachineBasicBlock::iterator NextMBBI = std::next(MBBI); - while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) - ++NextMBBI; - if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && - isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && - isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - } - if (DoMerge) - MBB.erase(NextMBBI); - } - - if (!DoMerge) - return false; + MBB.erase(MergeInstr); unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) @@ -1283,7 +1274,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { unsigned Base = getLoadStoreBaseOp(*MI).getReg(); bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); - unsigned Bytes = getLSMultipleTransferSize(MI); unsigned Opcode = MI->getOpcode(); DebugLoc DL = MI->getDebugLoc(); bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || @@ -1295,7 +1285,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) return false; - bool isLd = isLoadSingle(Opcode); // Can't do the merge if the destination register is the same as the would-be // writeback register. if (MI->getOperand(0).getReg() == Base) @@ -1303,55 +1292,31 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); - bool DoMerge = false; - ARM_AM::AddrOpc AddSub = ARM_AM::add; - unsigned NewOpc = 0; - // AM2 - 12 bits, thumb2 - 8 bits. - unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); - - // Try merging with the previous instruction. + int Bytes = getLSMultipleTransferSize(MI); MachineBasicBlock &MBB = *MI->getParent(); - MachineBasicBlock::iterator BeginMBBI = MBB.begin(); MachineBasicBlock::iterator MBBI(MI); - if (MBBI != BeginMBBI) { - MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); - while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) - --PrevMBBI; - if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { - DoMerge = true; - AddSub = ARM_AM::sub; - } else if (!isAM5 && - isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { - DoMerge = true; - } - if (DoMerge) { - NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); - MBB.erase(PrevMBBI); - } + int Offset; + MachineBasicBlock::iterator MergeInstr + = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); + unsigned NewOpc; + if (!isAM5 && Offset == Bytes) { + NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); + } else if (Offset == -Bytes) { + NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); + } else { + MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); + if (Offset == Bytes) { + NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); + } else if (!isAM5 && Offset == -Bytes) { + NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); + } else + return false; } + MBB.erase(MergeInstr); - // Try merging with the next instruction. - MachineBasicBlock::iterator EndMBBI = MBB.end(); - if (!DoMerge && MBBI != EndMBBI) { - MachineBasicBlock::iterator NextMBBI = std::next(MBBI); - while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) - ++NextMBBI; - if (!isAM5 && - isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { - DoMerge = true; - AddSub = ARM_AM::sub; - } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { - DoMerge = true; - } - if (DoMerge) { - NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); - MBB.erase(NextMBBI); - } - } - - if (!DoMerge) - return false; + ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; + bool isLd = isLoadSingle(Opcode); if (isAM5) { // VLDM[SD]_UPD, VSTM[SD]_UPD // (There are no base-updating versions of VLDR/VSTR instructions, but the @@ -1368,18 +1333,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { if (isAM2) { // LDR_PRE, LDR_POST if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { - int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) .addReg(Base, RegState::Define) .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); } else { - int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); + int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) .addReg(Base, RegState::Define) - .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); + .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); } } else { - int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; // t2LDR_PRE, t2LDR_POST BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) .addReg(Base, RegState::Define) @@ -1391,13 +1354,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { // the vestigal zero-reg offset register. When that's fixed, this clause // can be removed entirely. if (isAM2 && NewOpc == ARM::STR_POST_IMM) { - int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); + int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); // STR_PRE, STR_POST BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) .addReg(MO.getReg(), getKillRegState(MO.isKill())) - .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); + .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); } else { - int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; // t2STR_PRE, t2STR_POST BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) .addReg(MO.getReg(), getKillRegState(MO.isKill())) @@ -1409,46 +1371,75 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { return true; } +bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); + assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && + "Must have t2STRDi8 or t2LDRDi8"); + if (MI.getOperand(3).getImm() != 0) + return false; + + // Behaviour for writeback is undefined if base register is the same as one + // of the others. + const MachineOperand &BaseOp = MI.getOperand(2); + unsigned Base = BaseOp.getReg(); + const MachineOperand &Reg0Op = MI.getOperand(0); + const MachineOperand &Reg1Op = MI.getOperand(1); + if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) + return false; + + unsigned PredReg; + ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); + MachineBasicBlock::iterator MBBI(MI); + MachineBasicBlock &MBB = *MI.getParent(); + int Offset; + MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, + PredReg, Offset); + unsigned NewOpc; + if (Offset == 8 || Offset == -8) { + NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; + } else { + MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); + if (Offset == 8 || Offset == -8) { + NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; + } else + return false; + } + MBB.erase(MergeInstr); + + DebugLoc DL = MI.getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); + if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { + MIB.addOperand(Reg0Op).addOperand(Reg1Op) + .addReg(BaseOp.getReg(), RegState::Define); + } else { + assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); + MIB.addReg(BaseOp.getReg(), RegState::Define) + .addOperand(Reg0Op).addOperand(Reg1Op); + } + MIB.addReg(BaseOp.getReg(), RegState::Kill) + .addImm(Offset).addImm(Pred).addReg(PredReg); + assert(TII->get(Opcode).getNumOperands() == 6 && + TII->get(NewOpc).getNumOperands() == 7 && + "Unexpected number of operands in Opcode specification."); + + // Transfer implicit operands. + for (const MachineOperand &MO : MI.implicit_operands()) + MIB.addOperand(MO); + MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + + MBB.erase(MBBI); + return true; +} + /// Returns true if instruction is a memory operation that this pass is capable /// of operating on. -static bool isMemoryOp(const MachineInstr *MI) { - // When no memory operands are present, conservatively assume unaligned, - // volatile, unfoldable. - if (!MI->hasOneMemOperand()) - return false; - - const MachineMemOperand *MMO = *MI->memoperands_begin(); - - // Don't touch volatile memory accesses - we may be changing their order. - if (MMO->isVolatile()) - return false; - - // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is - // not. - if (MMO->getAlignment() < 4) - return false; - - // str could probably be eliminated entirely, but for now we just want - // to avoid making a mess of it. - // FIXME: Use str as a wildcard to enable better stm folding. - if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && - MI->getOperand(0).isUndef()) - return false; - - // Likewise don't mess with references to undefined addresses. - if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && - MI->getOperand(1).isUndef()) - return false; - - unsigned Opcode = MI->getOpcode(); +static bool isMemoryOp(const MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); switch (Opcode) { - default: break; case ARM::VLDRS: case ARM::VSTRS: - return MI->getOperand(1).isReg(); case ARM::VLDRD: case ARM::VSTRD: - return MI->getOperand(1).isReg(); case ARM::LDRi12: case ARM::STRi12: case ARM::tLDRi: @@ -1459,9 +1450,40 @@ static bool isMemoryOp(const MachineInstr *MI) { case ARM::t2LDRi12: case ARM::t2STRi8: case ARM::t2STRi12: - return MI->getOperand(1).isReg(); + break; + default: + return false; } - return false; + if (!MI.getOperand(1).isReg()) + return false; + + // When no memory operands are present, conservatively assume unaligned, + // volatile, unfoldable. + if (!MI.hasOneMemOperand()) + return false; + + const MachineMemOperand &MMO = **MI.memoperands_begin(); + + // Don't touch volatile memory accesses - we may be changing their order. + if (MMO.isVolatile()) + return false; + + // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is + // not. + if (MMO.getAlignment() < 4) + return false; + + // str could probably be eliminated entirely, but for now we just want + // to avoid making a mess of it. + // FIXME: Use str as a wildcard to enable better stm folding. + if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef()) + return false; + + // Likewise don't mess with references to undefined addresses. + if (MI.getOperand(1).isUndef()) + return false; + + return true; } static void InsertLDR_STR(MachineBasicBlock &MBB, @@ -1616,6 +1638,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { ARMCC::CondCodes CurrPred = ARMCC::AL; unsigned Position = 0; assert(Candidates.size() == 0); + assert(MergeBaseCandidates.size() == 0); LiveRegsValid = false; for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); @@ -1626,7 +1649,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { continue; ++Position; - if (isMemoryOp(MBBI)) { + if (isMemoryOp(*MBBI)) { unsigned Opcode = MBBI->getOpcode(); const MachineOperand &MO = MBBI->getOperand(0); unsigned Reg = MO.getReg(); @@ -1694,8 +1717,15 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { MBBI = I; --Position; // Fallthrough to look into existing chain. - } else if (MBBI->isDebugValue()) + } else if (MBBI->isDebugValue()) { continue; + } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || + MBBI->getOpcode() == ARM::t2STRDi8) { + // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions + // remember them because we may still be able to merge add/sub into them. + MergeBaseCandidates.push_back(MBBI); + } + // If we are here then the chain is broken; Extract candidates for a merge. if (MemOps.size() > 0) { @@ -1726,7 +1756,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (Merged) { Changed = true; unsigned Opcode = Merged->getOpcode(); - if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8) + if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) + MergeBaseUpdateLSDouble(*Merged); + else MergeBaseUpdateLSMultiple(Merged); } else { for (MachineInstr *MI : Candidate->Instrs) { @@ -1741,6 +1773,10 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { } } Candidates.clear(); + // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. + for (MachineInstr *MI : MergeBaseCandidates) + MergeBaseUpdateLSDouble(*MI); + MergeBaseCandidates.clear(); return Changed; } @@ -1765,7 +1801,11 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::MOVPCLR)) { - MachineInstr *PrevMI = std::prev(MBBI); + MachineBasicBlock::iterator PrevI = std::prev(MBBI); + // Ignore any DBG_VALUE instructions. + while (PrevI->isDebugValue() && PrevI != MBB.begin()) + --PrevI; + MachineInstr *PrevMI = PrevI; unsigned Opcode = PrevMI->getOpcode(); if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || @@ -1786,6 +1826,30 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { return false; } +bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + if (MBBI == MBB.begin() || MBBI == MBB.end() || + MBBI->getOpcode() != ARM::tBX_RET) + return false; + + MachineBasicBlock::iterator Prev = MBBI; + --Prev; + if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR)) + return false; + + for (auto Use : Prev->uses()) + if (Use.isKill()) { + AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) + .addReg(Use.getReg(), RegState::Kill)) + .copyImplicitOps(&*MBBI); + MBB.erase(MBBI); + MBB.erase(Prev); + return true; + } + + llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?"); +} + bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { MF = &Fn; STI = &static_cast(Fn.getSubtarget()); @@ -1793,7 +1857,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { AFI = Fn.getInfo(); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); - MRI = &Fn.getRegInfo(); + RegClassInfoValid = false; isThumb2 = AFI->isThumb2Function(); isThumb1 = AFI->isThumbFunction() && !isThumb2; @@ -1805,18 +1869,29 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { Modified |= LoadStoreMultipleOpti(MBB); if (STI->hasV5TOps()) Modified |= MergeReturnIntoLDM(MBB); + if (isThumb1) + Modified |= CombineMovBx(MBB); } Allocator.DestroyAll(); return Modified; } +namespace llvm { +void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); +} + +#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \ + "ARM pre- register allocation load / store optimization pass" + namespace { /// Pre- register allocation pass that move load / stores from consecutive /// locations close to make it more likely they will be combined later. struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ static char ID; - ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} + ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) { + initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry()); + } const DataLayout *TD; const TargetInstrInfo *TII; @@ -1828,7 +1903,7 @@ namespace { bool runOnMachineFunction(MachineFunction &Fn) override; const char *getPassName() const override { - return "ARM pre- register allocation load / store optimization pass"; + return ARM_PREALLOC_LOAD_STORE_OPT_NAME; } private: @@ -1847,8 +1922,11 @@ namespace { char ARMPreAllocLoadStoreOpt::ID = 0; } +INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt", + ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false) + bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - TD = Fn.getTarget().getDataLayout(); + TD = &Fn.getDataLayout(); STI = &static_cast(Fn.getSubtarget()); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); @@ -1856,9 +1934,8 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { MF = &Fn; bool Modified = false; - for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; - ++MFI) - Modified |= RescheduleLoadStoreInstrs(MFI); + for (MachineBasicBlock &MFI : Fn) + Modified |= RescheduleLoadStoreInstrs(&MFI); return Modified; } @@ -2187,7 +2264,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { if (!MI->isDebugValue()) MI2LocMap[MI] = ++Loc; - if (!isMemoryOp(MI)) + if (!isMemoryOp(*MI)) continue; unsigned PredReg = 0; if (getInstrPredicate(MI, PredReg) != ARMCC::AL) @@ -2275,3 +2352,4 @@ FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { return new ARMPreAllocLoadStoreOpt(); return new ARMLoadStoreOpt(); } + diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp index f5250ff83f0b..ac0330fbcb34 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp +++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -1,4 +1,4 @@ -//===-- ARMMachineFuctionInfo.cpp - ARM machine function info -------------===// +//===-- ARMMachineFunctionInfo.cpp - ARM machine function info ------------===// // // The LLVM Compiler Infrastructure // @@ -20,5 +20,4 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF) RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), - PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false), - GlobalBaseReg(0) {} + PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {} diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 14dd9ef333af..d6447978ef2c 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -1,4 +1,4 @@ -//===-- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===// +//===-- ARMMachineFunctionInfo.h - ARM machine function info ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -52,7 +52,7 @@ class ARMFunctionInfo : public MachineFunctionInfo { unsigned ReturnRegsCount; /// HasStackFrame - True if this function has a stack frame. Set by - /// processFunctionBeforeCalleeSavedScan(). + /// determineCalleeSaves(). bool HasStackFrame; /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by @@ -110,11 +110,6 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// pass. DenseMap CPEClones; - /// GlobalBaseReg - keeps track of the virtual register initialized for - /// use as the global base register. This is used for PIC in some PIC - /// relocation models. - unsigned GlobalBaseReg; - /// ArgumentStackSize - amount of bytes on stack consumed by the arguments /// being passed on the stack unsigned ArgumentStackSize; @@ -133,7 +128,7 @@ public: FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0), NumAlignedDPRCS2Regs(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} + VarArgsFrameIndex(0), HasITBlocks(false) {} explicit ARMFunctionInfo(MachineFunction &MF); @@ -204,9 +199,6 @@ public: bool hasITBlocks() const { return HasITBlocks; } void setHasITBlocks(bool h) { HasITBlocks = h; } - unsigned getGlobalBaseReg() const { return GlobalBaseReg; } - void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } - void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) { if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second) llvm_unreachable("Duplicate entries!"); diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 45cc9ea91f37..02cbfb1fa9f1 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -266,12 +266,19 @@ def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> { } // Scalar single precision floating point register class.. -// FIXME: Allocation order changed to s0, s2, s4, ... as a quick hack to -// avoid partial-write dependencies on D registers (S registers are -// renamed as portions of D registers). -def SPR : RegisterClass<"ARM", [f32], 32, (add (decimate - (sequence "S%u", 0, 31), 2), - (sequence "S%u", 0, 31))>; +// FIXME: Allocation order changed to s0, s2, ... or s0, s4, ... as a quick hack +// to avoid partial-write dependencies on D or Q (depending on platform) +// registers (S registers are renamed as portions of D/Q registers). +def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)> { + let AltOrders = [(add (decimate SPR, 2), SPR), + (add (decimate SPR, 4), + (decimate SPR, 2), + (decimate (rotl SPR, 1), 4), + (decimate (rotl SPR, 1), 2))]; + let AltOrderSelect = [{ + return 1 + MF.getSubtarget().useStride4VFPs(MF); + }]; +} // Subset of SPR which can be used as a source of NEON scalars for 16-bit // operations @@ -281,25 +288,29 @@ def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>; // class. // ARM requires only word alignment for double. It's more performant if it // is double-word alignment though. -def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, +def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64, (sequence "D%u", 0, 31)> { - // Allocate non-VFP2 registers D16-D31 first. - let AltOrders = [(rotl DPR, 16)]; - let AltOrderSelect = [{ return 1; }]; + // Allocate non-VFP2 registers D16-D31 first, and prefer even registers on + // Darwin platforms. + let AltOrders = [(rotl DPR, 16), + (add (decimate (rotl DPR, 16), 2), (rotl DPR, 16))]; + let AltOrderSelect = [{ + return 1 + MF.getSubtarget().useStride4VFPs(MF); + }]; } // Subset of DPR that are accessible with VFP2 (and so that also have // 32-bit SPR subregs). -def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, +def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64, (trunc DPR, 16)>; // Subset of DPR which can be used as a source of NEON scalars for 16-bit // operations -def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, +def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64, (trunc DPR, 8)>; // Generic 128-bit vector register class. -def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, +def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128, (sequence "Q%u", 0, 15)> { // Allocate non-VFP2 aliases Q8-Q15 first. let AltOrders = [(rotl QPR, 8)]; diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td index b03d5ff44c6e..3ad7730228e5 100644 --- a/lib/Target/ARM/ARMScheduleSwift.td +++ b/lib/Target/ARM/ARMScheduleSwift.td @@ -37,1050 +37,13 @@ def SW_FDIV : FuncUnit; // FIXME: Add preload instruction when it is documented. // FIXME: Model non-pipelined nature of FP div / sqrt unit. -def SwiftItineraries : ProcessorItineraries< - [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [ - // - // Move instructions, unconditional - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1]>, - InstrItinData, - InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1]>, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [2]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>, - InstrStage<1, [SW_ALU0, SW_ALU1]>, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [3]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>, - InstrStage<1, [SW_ALU0, SW_ALU1]>, - InstrStage<1, [SW_LS]>], - [5]>, - // - // MVN instructions - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1]>, - // - // No operand cycles - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>]>, - // - // Binary Instructions that produce a result - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1, 1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [2, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [2, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [2, 1, 1, 1]>, - // - // Bitwise Instructions that produce a result - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1, 1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [2, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [2, 1, 1, 1]>, - // - // Unary Instructions that produce a result - - // CLZ, RBIT, etc. - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1, 1]>, - - // BFC, BFI, UBFX, SBFX - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [2, 1]>, - - // - // Zero and sign extension instructions - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1, 1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1, 1, 1, 1]>, - // - // Compare instructions - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1, 1]>, - InstrItinData, - InstrStage<2, [SW_ALU0, SW_ALU1]>], - [1, 1]>, - InstrItinData, - InstrStage<2, [SW_ALU0, SW_ALU1]>], - [1, 1, 1]>, - // - // Test instructions - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1, 1]>, - InstrItinData, - InstrStage<2, [SW_ALU0, SW_ALU1]>], - [1, 1]>, - InstrItinData, - InstrStage<2, [SW_ALU0, SW_ALU1]>], - [1, 1, 1]>, - // - // Move instructions, conditional - // FIXME: Correctly model the extra input dep on the destination. - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1, 1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1, 1]>, - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [2, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1]>, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [2]>, - - // Integer multiply pipeline - // - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [3, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [3, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU0], 1>, - InstrStage<1, [SW_ALU0], 3>, - InstrStage<1, [SW_ALU0]>], - [5, 5, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU0], 1>, - InstrStage<1, [SW_ALU0], 1>, - InstrStage<1, [SW_ALU0, SW_ALU1], 3>, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [5, 6, 1, 1]>, - // - // Integer divide - InstrItinData, - InstrStage<1, [SW_ALU0], 0>, - InstrStage<14, [SW_IDIV]>], - [14, 1, 1]>, - - // Integer load pipeline - // FIXME: The timings are some rough approximations - // - // Immediate offset - InstrItinData, - InstrStage<1, [SW_LS]>], - [3, 1]>, - InstrItinData, - InstrStage<1, [SW_LS]>], - [3, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_LS], 1>, - InstrStage<1, [SW_LS]>], - [3, 4, 1]>, - // - // Register offset - InstrItinData, - InstrStage<1, [SW_LS]>], - [3, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_LS]>], - [3, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_LS], 1>, - InstrStage<1, [SW_LS], 3>, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [3, 4, 1, 1]>, - // - // Scaled register offset - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 2>, - InstrStage<1, [SW_LS]>], - [5, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 2>, - InstrStage<1, [SW_LS]>], - [5, 1, 1]>, - // - // Immediate offset with update - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS]>], - [3, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS]>], - [3, 1, 1]>, - // - // Register offset with update - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0], 1>, - InstrStage<1, [SW_LS]>], - [3, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0], 1>, - InstrStage<1, [SW_LS]>], - [3, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 0>, - InstrStage<1, [SW_LS], 3>, - InstrStage<1, [SW_LS], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [3, 4, 1, 1]>, - // - // Scaled register offset with update - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 2>, - InstrStage<1, [SW_LS], 3>, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [5, 3, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 2>, - InstrStage<1, [SW_LS], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [5, 3, 1, 1]>, - // - // Load multiple, def is the 5th operand. - // FIXME: This assumes 3 to 4 registers. - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS]>], - [1, 1, 1, 1, 3], [], -1>, // dynamic uops - - // - // Load multiple + update, defs are the 1st and 5th operands. - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 0>, - InstrStage<1, [SW_LS], 3>, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [2, 1, 1, 1, 3], [], -1>, // dynamic uops - // - // Load multiple plus branch - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS]>], - [1, 1, 1, 1, 3], [], -1>, // dynamic uops - // - // Pop, def is the 3rd operand. - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS]>], - [1, 1, 3], [], -1>, // dynamic uops - // - // Pop + branch, def is the 3rd operand. - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS]>], - [1, 1, 3], [], -1>, // dynamic uops - - // - // iLoadi + iALUr for t2LDRpci_pic. - InstrItinData, - InstrStage<1, [SW_LS], 3>, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [4, 1]>, - - // Integer store pipeline - /// - // Immediate offset - InstrItinData, - InstrStage<1, [SW_LS]>], - [1, 1]>, - InstrItinData, - InstrStage<1, [SW_LS]>], - [1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_LS], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS]>], - [1, 1]>, - // - // Register offset - InstrItinData, - InstrStage<1, [SW_LS]>], - [1, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_LS]>], - [1, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_LS], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS]>], - [1, 1, 1]>, - // - // Scaled register offset - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 2>, - InstrStage<1, [SW_LS]>], - [1, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 2>, - InstrStage<1, [SW_LS]>], - [1, 1, 1]>, - // - // Immediate offset with update - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS]>], - [1, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS]>], - [1, 1, 1]>, - // - // Register offset with update - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS]>], - [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS]>], - [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS]>], - [1, 1, 1, 1]>, - // - // Scaled register offset with update - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 2>, - InstrStage<1, [SW_LS], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>], - [3, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 2>, - InstrStage<1, [SW_LS], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>], - [3, 1, 1, 1]>, - // - // Store multiple - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS], 1>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS], 1>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS]>], - [], [], -1>, // dynamic uops - // - // Store multiple + update - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS], 1>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS], 1>, - InstrStage<1, [SW_ALU0, SW_ALU1], 1>, - InstrStage<1, [SW_LS]>], - [2], [], -1>, // dynamic uops - - // - // Preload - InstrItinData], [1, 1]>, - - // Branch - // - // no delay slots, so the latency of a branch is unimportant - InstrItinData]>, - - // FP Special Register to Integer Register File Move - InstrItinData, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [1]>, - // - // Single-precision FP Unary - // - // Most floating-point moves get issued on ALU0. - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [2, 1]>, - // - // Double-precision FP Unary - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [2, 1]>, - - // - // Single-precision FP Compare - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [1, 1]>, - // - // Double-precision FP Compare - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [1, 1]>, - // - // Single to Double FP Convert - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1]>, - // - // Double to Single FP Convert - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1]>, - - // - // Single to Half FP Convert - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU1], 4>, - InstrStage<1, [SW_ALU1]>], - [6, 1]>, - // - // Half to Single FP Convert - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1]>, - - // - // Single-Precision FP to Integer Convert - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1]>, - // - // Double-Precision FP to Integer Convert - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1]>, - // - // Integer to Single-Precision FP Convert - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1]>, - // - // Integer to Double-Precision FP Convert - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1]>, - // - // Single-precision FP ALU - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [2, 1, 1]>, - // - // Double-precision FP ALU - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [2, 1, 1]>, - // - // Single-precision FP Multiply - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1, 1]>, - // - // Double-precision FP Multiply - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [6, 1, 1]>, - // - // Single-precision FP MAC - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [8, 1, 1]>, - // - // Double-precision FP MAC - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [12, 1, 1]>, - // - // Single-precision Fused FP MAC - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [8, 1, 1]>, - // - // Double-precision Fused FP MAC - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [12, 1, 1]>, - // - // Single-precision FP DIV - InstrItinData, - InstrStage<1, [SW_ALU1], 0>, - InstrStage<15, [SW_FDIV]>], - [17, 1, 1]>, - // - // Double-precision FP DIV - InstrItinData, - InstrStage<1, [SW_ALU1], 0>, - InstrStage<30, [SW_FDIV]>], - [32, 1, 1]>, - // - // Single-precision FP SQRT - InstrItinData, - InstrStage<1, [SW_ALU1], 0>, - InstrStage<15, [SW_FDIV]>], - [17, 1]>, - // - // Double-precision FP SQRT - InstrItinData, - InstrStage<1, [SW_ALU1], 0>, - InstrStage<30, [SW_FDIV]>], - [32, 1, 1]>, - - // - // Integer to Single-precision Move - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_LS], 4>, - InstrStage<1, [SW_ALU0]>], - [6, 1]>, - // - // Integer to Double-precision Move - InstrItinData, - InstrStage<1, [SW_LS]>], - [4, 1]>, - // - // Single-precision to Integer Move - InstrItinData, - InstrStage<1, [SW_LS]>], - [3, 1]>, - // - // Double-precision to Integer Move - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_LS], 3>, - InstrStage<1, [SW_LS]>], - [3, 4, 1]>, - // - // Single-precision FP Load - InstrItinData, - InstrStage<1, [SW_LS]>], - [4, 1]>, - // - // Double-precision FP Load - InstrItinData, - InstrStage<1, [SW_LS]>], - [4, 1]>, - // - // FP Load Multiple - // FIXME: Assumes a single Q register. - InstrItinData, - InstrStage<1, [SW_LS]>], - [1, 1, 1, 4], [], -1>, // dynamic uops - // - // FP Load Multiple + update - // FIXME: Assumes a single Q register. - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_LS], 4>, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [2, 1, 1, 1, 4], [], -1>, // dynamic uops - // - // Single-precision FP Store - InstrItinData, - InstrStage<1, [SW_LS]>], - [1, 1]>, - // - // Double-precision FP Store - InstrItinData, - InstrStage<1, [SW_LS]>], - [1, 1]>, - // - // FP Store Multiple - // FIXME: Assumes a single Q register. - InstrItinData, - InstrStage<1, [SW_LS]>], - [1, 1, 1], [], -1>, // dynamic uops - // - // FP Store Multiple + update - // FIXME: Assumes a single Q register. - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_LS], 4>, - InstrStage<1, [SW_ALU0, SW_ALU1]>], - [2, 1, 1, 1], [], -1>, // dynamic uops - // NEON - // - // Double-register Integer Unary - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1]>, - // - // Quad-register Integer Unary - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1]>, - // - // Double-register Integer Q-Unary - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1]>, - // - // Quad-register Integer CountQ-Unary - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1]>, - // - // Double-register Integer Binary - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [2, 1, 1]>, - // - // Quad-register Integer Binary - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [2, 1, 1]>, - // - // Double-register Integer Subtract - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [2, 1, 1]>, - // - // Quad-register Integer Subtract - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [2, 1, 1]>, - // - // Double-register Integer Shift - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [2, 1, 1]>, - // - // Quad-register Integer Shift - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [2, 1, 1]>, - // - // Double-register Integer Shift (4 cycle) - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1, 1]>, - // - // Quad-register Integer Shift (4 cycle) - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1, 1]>, - // - // Double-register Integer Binary (4 cycle) - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1, 1]>, - // - // Quad-register Integer Binary (4 cycle) - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1, 1]>, - // - // Double-register Integer Subtract (4 cycle) - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1, 1]>, - // - // Quad-register Integer Subtract (4 cycle) - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1, 1]>, - - // - // Double-register Integer Count - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [2, 1, 1]>, - // - // Quad-register Integer Count - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [2, 1, 1]>, - // - // Double-register Absolute Difference and Accumulate - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1, 1, 1]>, - // - // Quad-register Absolute Difference and Accumulate - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1, 1, 1]>, - // - // Double-register Integer Pair Add Long - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1, 1]>, - // - // Quad-register Integer Pair Add Long - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1, 1]>, - - // - // Double-register Integer Multiply (.8, .16) - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1, 1]>, - // - // Quad-register Integer Multiply (.8, .16) - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1, 1]>, - - // - // Double-register Integer Multiply (.32) - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1, 1]>, - // - // Quad-register Integer Multiply (.32) - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1, 1]>, - // - // Double-register Integer Multiply-Accumulate (.8, .16) - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1, 1, 1]>, - // - // Double-register Integer Multiply-Accumulate (.32) - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1, 1, 1]>, - // - // Quad-register Integer Multiply-Accumulate (.8, .16) - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1, 1, 1]>, - // - // Quad-register Integer Multiply-Accumulate (.32) - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1, 1, 1]>, - - // - // Move - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [2, 1]>, - // - // Move Immediate - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [2]>, - // - // Double-register Permute Move - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [2, 1]>, - // - // Quad-register Permute Move - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [2, 1]>, - // - // Integer to Single-precision Move - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_LS], 4>, - InstrStage<1, [SW_ALU0]>], - [6, 1]>, - // - // Integer to Double-precision Move - InstrItinData, - InstrStage<1, [SW_LS]>], - [4, 1, 1]>, - // - // Single-precision to Integer Move - InstrItinData, - InstrStage<1, [SW_LS]>], - [3, 1]>, - // - // Double-precision to Integer Move - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_LS], 3>, - InstrStage<1, [SW_LS]>], - [3, 4, 1]>, - // - // Integer to Lane Move - // FIXME: I think this is correct, but it is not clear from the tuning guide. - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_LS], 4>, - InstrStage<1, [SW_ALU0]>], - [6, 1]>, - - // - // Vector narrow move - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [2, 1]>, - // - // Double-register FP Unary - // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here, - // and they issue on a different pipeline. - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [2, 1]>, - // - // Quad-register FP Unary - // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here, - // and they issue on a different pipeline. - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [2, 1]>, - // - // Double-register FP Binary - // FIXME: We're using this itin for many instructions. - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1, 1]>, - - // - // VPADD, etc. - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1, 1]>, - // - // Double-register FP VMUL - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1, 1]>, - // - // Quad-register FP Binary - InstrItinData, - InstrStage<1, [SW_ALU0]>], - [4, 1, 1]>, - // - // Quad-register FP VMUL - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [4, 1, 1]>, - // - // Double-register FP Multiple-Accumulate - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [8, 1, 1]>, - // - // Quad-register FP Multiple-Accumulate - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [8, 1, 1]>, - // - // Double-register Fused FP Multiple-Accumulate - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [8, 1, 1]>, - // - // Quad-register FusedF P Multiple-Accumulate - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [8, 1, 1]>, - // - // Double-register Reciprical Step - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [8, 1, 1]>, - // - // Quad-register Reciprical Step - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [8, 1, 1]>, - // - // Double-register Permute - // FIXME: The latencies are unclear from the documentation. - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1]>], - [3, 4, 3, 4]>, - // - // Quad-register Permute - // FIXME: The latencies are unclear from the documentation. - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1]>], - [3, 4, 3, 4]>, - // - // Quad-register Permute (3 cycle issue on A9) - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1]>], - [3, 4, 3, 4]>, - - // - // Double-register VEXT - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [2, 1, 1]>, - // - // Quad-register VEXT - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [2, 1, 1]>, - // - // VTB - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [2, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1]>], - [4, 1, 3, 3]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1]>], - [6, 1, 3, 5, 5]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1]>], - [8, 1, 3, 5, 7, 7]>, - // - // VTBX - InstrItinData, - InstrStage<1, [SW_ALU1]>], - [2, 1, 1]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1]>], - [4, 1, 3, 3]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1]>], - [6, 1, 3, 5, 5]>, - InstrItinData, - InstrStage<1, [SW_DIS1], 0>, - InstrStage<1, [SW_DIS2], 0>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1], 2>, - InstrStage<1, [SW_ALU1]>], - [8, 1, 3, 5, 7, 7]> -]>; - -// ===---------------------------------------------------------------------===// -// This following definitions describe the simple machine model which -// will replace itineraries. - // Swift machine model for scheduling and other instruction cost heuristics. def SwiftModel : SchedMachineModel { let IssueWidth = 3; // 3 micro-ops are dispatched per cycle. let MicroOpBufferSize = 45; // Based on NEON renamed registers. let LoadLatency = 3; let MispredictPenalty = 14; // A branch direction mispredict. - - let Itineraries = SwiftItineraries; + let CompleteModel = 0; // FIXME: Remove if all instructions are covered. } // Swift predicates. @@ -1558,6 +521,13 @@ let SchedModel = SwiftModel in { (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD", "PUSH", "tPUSH")>; + // LDRLIT pseudo instructions, they expand to LDR + PICADD + def : InstRW<[SwiftWriteP2ThreeCycle, WriteALU], + (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>; + // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR + def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2ThreeCycle], + (instregex "LDRLIT_ga_pcrel_ldr")>; + // 4.2.26 Branch def : WriteRes { let Latency = 0; } def : WriteRes { let Latency = 2; } diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 6cafbbb9f8eb..6fded9c8ab73 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -160,41 +160,39 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, unsigned VTSize = 4; unsigned i = 0; // Emit a maximum of 4 loads in Thumb1 since we have fewer registers - const unsigned MAX_LOADS_IN_LDM = Subtarget.isThumb1Only() ? 4 : 6; + const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6; SDValue TFOps[6]; SDValue Loads[6]; uint64_t SrcOff = 0, DstOff = 0; - // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the - // same number of stores. The loads and stores will get combined into - // ldm/stm later on. - while (EmittedNumMemOps < NumMemOps) { - for (i = 0; - i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { - Loads[i] = DAG.getLoad(VT, dl, Chain, - DAG.getNode(ISD::ADD, dl, MVT::i32, Src, - DAG.getConstant(SrcOff, dl, MVT::i32)), - SrcPtrInfo.getWithOffset(SrcOff), isVolatile, - false, false, 0); - TFOps[i] = Loads[i].getValue(1); - SrcOff += VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - makeArrayRef(TFOps, i)); + // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to + // VLDM/VSTM and make this code emit it when appropriate. This would reduce + // pressure on the general purpose registers. However this seems harder to map + // onto the register allocator's view of the world. - for (i = 0; - i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { - TFOps[i] = DAG.getStore(Chain, dl, Loads[i], - DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, - DAG.getConstant(DstOff, dl, MVT::i32)), - DstPtrInfo.getWithOffset(DstOff), - isVolatile, false, 0); - DstOff += VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - makeArrayRef(TFOps, i)); + // The number of MEMCPY pseudo-instructions to emit. We use up to + // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm + // later on. This is a lower bound on the number of MEMCPY operations we must + // emit. + unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM; - EmittedNumMemOps += i; + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue); + + for (unsigned I = 0; I != NumMEMCPYs; ++I) { + // Evenly distribute registers among MEMCPY operations to reduce register + // pressure. + unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs; + unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps; + + Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src, + DAG.getConstant(NumRegs, dl, MVT::i32)); + Src = Dst.getValue(1); + Chain = Dst.getValue(2); + + DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize); + SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize); + + EmittedNumMemOps = NextEmittedNumMemOps; } if (BytesLeft == 0) diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 002c3e9b6291..bb6ae28065bd 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" @@ -39,38 +40,10 @@ using namespace llvm; #define GET_SUBTARGETINFO_CTOR #include "ARMGenSubtargetInfo.inc" -static cl::opt -ReserveR9("arm-reserve-r9", cl::Hidden, - cl::desc("Reserve R9, making it unavailable as GPR")); - -static cl::opt -ArmUseMOVT("arm-use-movt", cl::init(true), cl::Hidden); - static cl::opt UseFusedMulOps("arm-use-mulops", cl::init(true), cl::Hidden); -namespace { -enum AlignMode { - DefaultAlign, - StrictAlign, - NoStrictAlign -}; -} - -static cl::opt -Align(cl::desc("Load/store alignment support"), - cl::Hidden, cl::init(DefaultAlign), - cl::values( - clEnumValN(DefaultAlign, "arm-default-align", - "Generate unaligned accesses only on hardware/OS " - "combinations that are known to support them"), - clEnumValN(StrictAlign, "arm-strict-align", - "Disallow all unaligned memory accesses"), - clEnumValN(NoStrictAlign, "arm-no-strict-align", - "Allow unaligned memory accesses"), - clEnumValEnd)); - enum ITMode { DefaultIT, RestrictedIT, @@ -88,6 +61,12 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), "Allow IT blocks based on ARMv7"), clEnumValEnd)); +/// ForceFastISel - Use the fast-isel, even for subtargets where it is not +/// currently supported (for testing only). +static cl::opt +ForceFastISel("arm-force-fast-isel", + cl::init(false), cl::Hidden); + /// initializeSubtargetDependencies - Initializes using a CPU and feature string /// so that we can use initializer lists for subtarget initialization. ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU, @@ -110,8 +89,8 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle) : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), - ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle), - TargetTriple(TT), Options(TM.Options), TM(TM), + ARMProcClass(None), ARMArch(ARMv4t), stackAlignment(4), CPUString(CPU), + IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)), // At this point initializeSubtargetDependencies has been called so // we can query directly. @@ -133,6 +112,7 @@ void ARMSubtarget::initializeEnvironment() { HasV7Ops = false; HasV8Ops = false; HasV8_1aOps = false; + HasV8_2aOps = false; HasVFPv2 = false; HasVFPv3 = false; HasVFPv4 = false; @@ -147,10 +127,11 @@ void ARMSubtarget::initializeEnvironment() { UseSoftFloat = false; HasThumb2 = false; NoARM = false; - IsR9Reserved = ReserveR9; - UseMovt = false; + ReserveR9 = false; + NoMovt = false; SupportsTailCall = false; HasFP16 = false; + HasFullFP16 = false; HasD16 = false; HasHardwareDivide = false; HasHardwareDivideInARM = false; @@ -168,20 +149,36 @@ void ARMSubtarget::initializeEnvironment() { HasCrypto = false; HasCRC = false; HasZeroCycleZeroing = false; - AllowsUnalignedMem = false; - Thumb2DSP = false; + StrictAlign = false; + HasDSP = false; UseNaClTrap = false; GenLongCalls = false; UnsafeFPMath = false; + + // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this + // directly from it, but we can try to make sure they're consistent when both + // available. + UseSjLjEH = isTargetDarwin() && !isTargetWatchOS(); + assert((!TM.getMCAsmInfo() || + (TM.getMCAsmInfo()->getExceptionHandlingType() == + ExceptionHandling::SjLj) == UseSjLjEH) && + "inconsistent sjlj choice between CodeGen and MC"); } void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (CPUString.empty()) { - if (isTargetDarwin() && TargetTriple.getArchName().endswith("v7s")) - // Default to the Swift CPU when targeting armv7s/thumbv7s. - CPUString = "swift"; - else - CPUString = "generic"; + CPUString = "generic"; + + if (isTargetDarwin()) { + StringRef ArchName = TargetTriple.getArchName(); + if (ArchName.endswith("v7s")) + // Default to the Swift CPU when targeting armv7s/thumbv7s. + CPUString = "swift"; + else if (ArchName.endswith("v7k")) + // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k. + // ARMv7k does not use SjLj exception handling. + CPUString = "cortex-a7"; + } } // Insert the architecture feature derived from the target triple into the @@ -212,44 +209,31 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (isAAPCS_ABI()) stackAlignment = 8; - if (isTargetNaCl()) + if (isTargetNaCl() || isAAPCS16_ABI()) stackAlignment = 16; - UseMovt = hasV6T2Ops() && ArmUseMOVT; + // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo:: + // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as + // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation + // support in the assembler and linker to be used. This would need to be + // fixed to fully support tail calls in Thumb1. + // + // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take + // LR. This means if we need to reload LR, it takes an extra instructions, + // which outweighs the value of the tail call; but here we don't know yet + // whether LR is going to be used. Probably the right approach is to + // generate the tail call here and turn it back into CALL/RET in + // emitEpilogue if LR is used. - if (isTargetMachO()) { - IsR9Reserved = ReserveR9 || !HasV6Ops; - SupportsTailCall = !isTargetIOS() || !getTargetTriple().isOSVersionLT(5, 0); - } else { - IsR9Reserved = ReserveR9; - SupportsTailCall = !isThumb1Only(); - } + // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, + // but we need to make sure there are enough registers; the only valid + // registers are the 4 used for parameters. We don't currently do this + // case. - if (Align == DefaultAlign) { - // Assume pre-ARMv6 doesn't support unaligned accesses. - // - // ARMv6 may or may not support unaligned accesses depending on the - // SCTLR.U bit, which is architecture-specific. We assume ARMv6 - // Darwin and NetBSD targets support unaligned accesses, and others don't. - // - // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit - // which raises an alignment fault on unaligned accesses. Linux - // defaults this bit to 0 and handles it as a system-wide (not - // per-process) setting. It is therefore safe to assume that ARMv7+ - // Linux targets support unaligned accesses. The same goes for NaCl. - // - // The above behavior is consistent with GCC. - AllowsUnalignedMem = - (hasV7Ops() && (isTargetLinux() || isTargetNaCl() || - isTargetNetBSD())) || - (hasV6Ops() && (isTargetMachO() || isTargetNetBSD())); - } else { - AllowsUnalignedMem = !(Align == StrictAlign); - } + SupportsTailCall = !isThumb1Only(); - // No v6M core supports unaligned memory access (v6M ARM ARM A3.2) - if (isV6M()) - AllowsUnalignedMem = false; + if (isTargetMachO() && isTargetIOS() && getTargetTriple().isOSVersionLT(5, 0)) + SupportsTailCall = false; switch (IT) { case DefaultIT: @@ -276,8 +260,14 @@ bool ARMSubtarget::isAPCS_ABI() const { } bool ARMSubtarget::isAAPCS_ABI() const { assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); - return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS; + return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS || + TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; } +bool ARMSubtarget::isAAPCS16_ABI() const { + assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); + return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; +} + /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. bool @@ -321,11 +311,23 @@ unsigned ARMSubtarget::getMispredictionPenalty() const { } bool ARMSubtarget::hasSinCos() const { - return getTargetTriple().isiOS() && !getTargetTriple().isOSVersionLT(7, 0); + return isTargetWatchOS() || + (isTargetIOS() && !getTargetTriple().isOSVersionLT(7, 0)); +} + +bool ARMSubtarget::enableMachineScheduler() const { + // Enable the MachineScheduler before register allocation for out-of-order + // architectures where we do not use the PostRA scheduler anymore (for now + // restricted to swift). + return getSchedModel().isOutOfOrder() && isSwift(); } // This overrides the PostRAScheduler bit in the SchedModel for any CPU. bool ARMSubtarget::enablePostRAScheduler() const { + // No need for PostRA scheduling on out of order CPUs (for now restricted to + // swift). + if (getSchedModel().isOutOfOrder() && isSwift()) + return false; return (!isThumb() || hasThumb2()); } @@ -333,15 +335,30 @@ bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier() && !isThumb1Only(); } +bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const { + // For general targets, the prologue can grow when VFPs are allocated with + // stride 4 (more vpush instructions). But WatchOS uses a compact unwind + // format which it's more important to get right. + return isTargetWatchOS() || (isSwift() && !MF.getFunction()->optForMinSize()); +} + bool ARMSubtarget::useMovt(const MachineFunction &MF) const { // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit // immediates as it is inherently position independent, and may be out of // range otherwise. - return UseMovt && (isTargetWindows() || - !MF.getFunction()->hasFnAttribute(Attribute::MinSize)); + return !NoMovt && hasV6T2Ops() && + (isTargetWindows() || !MF.getFunction()->optForMinSize()); } bool ARMSubtarget::useFastISel() const { + // Enable fast-isel for any target, for testing only. + if (ForceFastISel) + return true; + + // Limit fast-isel to the targets that are or have been tested. + if (!hasV6Ops()) + return false; + // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl. return TM.Options.EnableFastISel && ((isTargetMachO() && !isThumb1Only()) || diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index dd101df9b63d..a8b28018f1b2 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -43,11 +43,17 @@ class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15, - CortexA17, CortexR4, CortexR4F, CortexR5, Swift, CortexA53, CortexA57, Krait, + CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexA35, CortexA53, + CortexA57, CortexA72, Krait, Swift }; enum ARMProcClassEnum { None, AClass, RClass, MClass }; + enum ARMArchEnum { + ARMv2, ARMv2a, ARMv3, ARMv3m, ARMv4, ARMv4t, ARMv5, ARMv5t, ARMv5te, + ARMv5tej, ARMv6, ARMv6k, ARMv6kz, ARMv6t2, ARMv6m, ARMv6sm, ARMv7a, ARMv7r, + ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a + }; /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. ARMProcFamilyEnum ARMProcFamily; @@ -55,6 +61,9 @@ protected: /// ARMProcClass - ARM processor class: None, AClass, RClass or MClass. ARMProcClassEnum ARMProcClass; + /// ARMArch - ARM architecture + ARMArchEnum ARMArch; + /// HasV4TOps, HasV5TOps, HasV5TEOps, /// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops - /// Specify whether target support specific ARM ISA variants. @@ -68,6 +77,7 @@ protected: bool HasV7Ops; bool HasV8Ops; bool HasV8_1aOps; + bool HasV8_2aOps; /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what /// floating point ISAs are supported. @@ -109,22 +119,24 @@ protected: /// NoARM - True if subtarget does not support ARM mode execution. bool NoARM; - /// IsR9Reserved - True if R9 is a not available as general purpose register. - bool IsR9Reserved; + /// ReserveR9 - True if R9 is not available as a general purpose register. + bool ReserveR9; - /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit - /// imms (including global addresses). - bool UseMovt; + /// NoMovt - True if MOVT / MOVW pairs are not used for materialization of + /// 32-bit imms (including global addresses). + bool NoMovt; /// SupportsTailCall - True if the OS supports tail call. The dynamic linker /// must be able to synthesize call stubs for interworking between ARM and /// Thumb. bool SupportsTailCall; - /// HasFP16 - True if subtarget supports half-precision FP (We support VFP+HF - /// only so far) + /// HasFP16 - True if subtarget supports half-precision FP conversions bool HasFP16; + /// HasFullFP16 - True if subtarget supports half-precision FP operations + bool HasFullFP16; + /// HasD16 - True if subtarget is limited to 16 double precision /// FP registers for VFPv3. bool HasD16; @@ -190,18 +202,18 @@ protected: /// particularly effective at zeroing a VFP register. bool HasZeroCycleZeroing; - /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory + /// StrictAlign - If true, the subtarget disallows unaligned memory /// accesses for some types. For details, see /// ARMTargetLowering::allowsMisalignedMemoryAccesses(). - bool AllowsUnalignedMem; + bool StrictAlign; /// RestrictIT - If true, the subtarget disallows generation of deprecated IT /// blocks to conform to ARMv8 rule. bool RestrictIT; - /// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith - /// and such) instructions in Thumb2 code. - bool Thumb2DSP; + /// HasDSP - If true, the subtarget supports the DSP (saturating arith + /// and such) instructions. + bool HasDSP; /// NaCl TRAP instruction is generated instead of the regular TRAP. bool UseNaClTrap; @@ -212,6 +224,9 @@ protected: /// Target machine allowed unsafe FP math (such as use of NEON fp) bool UnsafeFPMath; + /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS). + bool UseSjLjEH; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -297,6 +312,7 @@ public: bool hasV7Ops() const { return HasV7Ops; } bool hasV8Ops() const { return HasV8Ops; } bool hasV8_1aOps() const { return HasV8_1aOps; } + bool hasV8_2aOps() const { return HasV8_2aOps; } bool isCortexA5() const { return ARMProcFamily == CortexA5; } bool isCortexA7() const { return ARMProcFamily == CortexA7; } @@ -343,17 +359,20 @@ public: bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } bool hasRAS() const { return HasRAS; } bool hasMPExtension() const { return HasMPExtension; } - bool hasThumb2DSP() const { return Thumb2DSP; } + bool hasDSP() const { return HasDSP; } bool useNaClTrap() const { return UseNaClTrap; } + bool useSjLjEH() const { return UseSjLjEH; } bool genLongCalls() const { return GenLongCalls; } bool hasFP16() const { return HasFP16; } bool hasD16() const { return HasD16; } + bool hasFullFP16() const { return HasFullFP16; } const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetIOS() const { return TargetTriple.isiOS(); } + bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); } @@ -375,6 +394,11 @@ public: TargetTriple.getEnvironment() == Triple::EABIHF) && !isTargetDarwin() && !isTargetWindows(); } + bool isTargetGNUAEABI() const { + return (TargetTriple.getEnvironment() == Triple::GNUEABI || + TargetTriple.getEnvironment() == Triple::GNUEABIHF) && + !isTargetDarwin() && !isTargetWindows(); + } // ARM Targets that support EHABI exception handling standard // Darwin uses SjLj. Other targets might need more checks. @@ -383,7 +407,7 @@ public: TargetTriple.getEnvironment() == Triple::GNUEABI || TargetTriple.getEnvironment() == Triple::EABIHF || TargetTriple.getEnvironment() == Triple::GNUEABIHF || - TargetTriple.getEnvironment() == Triple::Android) && + isTargetAndroid()) && !isTargetDarwin() && !isTargetWindows(); } @@ -391,14 +415,13 @@ public: // FIXME: this is invalid for WindowsCE return TargetTriple.getEnvironment() == Triple::GNUEABIHF || TargetTriple.getEnvironment() == Triple::EABIHF || - isTargetWindows(); - } - bool isTargetAndroid() const { - return TargetTriple.getEnvironment() == Triple::Android; + isTargetWindows() || isAAPCS16_ABI(); } + bool isTargetAndroid() const { return TargetTriple.isAndroid(); } bool isAPCS_ABI() const; bool isAAPCS_ABI() const; + bool isAAPCS16_ABI() const; bool useSoftFloat() const { return UseSoftFloat; } bool isThumb() const { return InThumbMode; } @@ -409,17 +432,17 @@ public: bool isRClass() const { return ARMProcClass == RClass; } bool isAClass() const { return ARMProcClass == AClass; } - bool isV6M() const { - return isThumb1Only() && isMClass(); + bool isR9Reserved() const { + return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9; } - bool isR9Reserved() const { return IsR9Reserved; } + bool useStride4VFPs(const MachineFunction &MF) const; bool useMovt(const MachineFunction &MF) const; bool supportsTailCall() const { return SupportsTailCall; } - bool allowsUnalignedMem() const { return AllowsUnalignedMem; } + bool allowsUnalignedMem() const { return !StrictAlign; } bool restrictIT() const { return RestrictIT; } @@ -433,6 +456,9 @@ public: /// compiler runtime or math libraries. bool hasSinCos() const; + /// Returns true if machine scheduler should be enabled. + bool enableMachineScheduler() const override; + /// True for some subtargets at > -O0. bool enablePostRAScheduler() const override; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 93495d66ae70..fca1901dc57c 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -66,7 +66,9 @@ static std::unique_ptr createTLOF(const Triple &TT) { static ARMBaseTargetMachine::ARMABI computeTargetABI(const Triple &TT, StringRef CPU, const TargetOptions &Options) { - if (Options.MCOptions.getABIName().startswith("aapcs")) + if (Options.MCOptions.getABIName() == "aapcs16") + return ARMBaseTargetMachine::ARM_ABI_AAPCS16; + else if (Options.MCOptions.getABIName().startswith("aapcs")) return ARMBaseTargetMachine::ARM_ABI_AAPCS; else if (Options.MCOptions.getABIName().startswith("apcs")) return ARMBaseTargetMachine::ARM_ABI_APCS; @@ -83,6 +85,8 @@ computeTargetABI(const Triple &TT, StringRef CPU, (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) || CPU.startswith("cortex-m")) { TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; + } else if (TT.isWatchOS()) { + TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16; } else { TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; } @@ -106,7 +110,7 @@ computeTargetABI(const Triple &TT, StringRef CPU, if (TT.isOSNetBSD()) TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; else - TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; + TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; break; } } @@ -145,7 +149,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU, // to 64. We always ty to give them natural alignment. if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS) Ret += "-v64:32:64-v128:32:128"; - else + else if (ABI != ARMBaseTargetMachine::ARM_ABI_AAPCS16) Ret += "-v128:64:128"; // Try to align aggregates to 32 bits (the default is 64 bits, which has no @@ -157,7 +161,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU, // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit // aligned everywhere else. - if (TT.isOSNaCl()) + if (TT.isOSNaCl() || ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16) Ret += "-S128"; else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS) Ret += "-S64"; @@ -184,6 +188,15 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, if (Options.FloatABIType == FloatABI::Default) this->Options.FloatABIType = Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft; + + // Default to triple-appropriate EABI + if (Options.EABIVersion == EABI::Default || + Options.EABIVersion == EABI::Unknown) { + if (Subtarget.isTargetGNUAEABI()) + this->Options.EABIVersion = EABI::GNU; + else + this->Options.EABIVersion = EABI::EABI5; + } } ARMBaseTargetMachine::~ARMBaseTargetMachine() {} @@ -225,12 +238,12 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { } TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis( - [this](Function &F) { return TargetTransformInfo(ARMTTIImpl(this, F)); }); + return TargetIRAnalysis([this](const Function &F) { + return TargetTransformInfo(ARMTTIImpl(this, F)); + }); } - -void ARMTargetMachine::anchor() { } +void ARMTargetMachine::anchor() {} ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -244,7 +257,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT, "support ARM mode execution!"); } -void ARMLETargetMachine::anchor() { } +void ARMLETargetMachine::anchor() {} ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -253,7 +266,7 @@ ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, CodeGenOpt::Level OL) : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} -void ARMBETargetMachine::anchor() { } +void ARMBETargetMachine::anchor() {} ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -262,7 +275,7 @@ ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, CodeGenOpt::Level OL) : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} -void ThumbTargetMachine::anchor() { } +void ThumbTargetMachine::anchor() {} ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -273,7 +286,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT, initAsmInfo(); } -void ThumbLETargetMachine::anchor() { } +void ThumbLETargetMachine::anchor() {} ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -282,7 +295,7 @@ ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT, CodeGenOpt::Level OL) : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} -void ThumbBETargetMachine::anchor() { } +void ThumbBETargetMachine::anchor() {} ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -348,7 +361,13 @@ bool ARMPassConfig::addPreISel() { // tricky when doing code gen per function. bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) && (EnableGlobalMerge == cl::BOU_UNSET); - addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize)); + // Merging of extern globals is enabled by default on non-Mach-O as we + // expect it to be generally either beneficial or harmless. On Mach-O it + // is disabled as we emit the .subsections_via_symbols directive which + // means that merging extern globals is not safe. + bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO(); + addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize, + MergeExternalByDefault)); } return false; @@ -356,9 +375,6 @@ bool ARMPassConfig::addPreISel() { bool ARMPassConfig::addInstSelector() { addPass(createARMISelDag(getARMTargetMachine(), getOptLevel())); - - if (TM->getTargetTriple().isOSBinFormatELF() && TM->Options.EnableFastISel) - addPass(createARMGlobalBaseRegPass()); return false; } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 8c98e082ce9a..8ad1f3dc2c34 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -26,7 +26,8 @@ public: enum ARMABI { ARM_ABI_UNKNOWN, ARM_ABI_APCS, - ARM_ABI_AAPCS // ARM EABI + ARM_ABI_AAPCS, // ARM EABI + ARM_ABI_AAPCS16 } TargetABI; protected: diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 2f194cf7ae06..c1520119ef21 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -15,7 +15,7 @@ using namespace llvm; #define DEBUG_TYPE "armtti" -unsigned ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned Bits = Ty->getPrimitiveSizeInBits(); @@ -47,12 +47,12 @@ unsigned ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { return 3; } -unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { +int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); // Single to/from double precision conversions. - static const CostTblEntry NEONFltDblTbl[] = { + static const CostTblEntry NEONFltDblTbl[] = { // Vector fptrunc/fpext conversions. { ISD::FP_ROUND, MVT::v2f64, 2 }, { ISD::FP_EXTEND, MVT::v2f32, 2 }, @@ -61,10 +61,9 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND || ISD == ISD::FP_EXTEND)) { - std::pair LT = TLI->getTypeLegalizationCost(DL, Src); - int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second); - if (Idx != -1) - return LT.first * NEONFltDblTbl[Idx].Cost; + std::pair LT = TLI->getTypeLegalizationCost(DL, Src); + if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second)) + return LT.first * Entry->Cost; } EVT SrcTy = TLI->getValueType(DL, Src); @@ -76,8 +75,7 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { // Some arithmetic, load and store operations have specific instructions // to cast up/down their types automatically at no extra cost. // TODO: Get these tables to know at least what the related operations are. - static const TypeConversionCostTblEntry - NEONVectorConversionTbl[] = { + static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, @@ -153,15 +151,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { }; if (SrcTy.isVector() && ST->hasNEON()) { - int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, ISD, - DstTy.getSimpleVT(), SrcTy.getSimpleVT()); - if (Idx != -1) - return NEONVectorConversionTbl[Idx].Cost; + if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD, + DstTy.getSimpleVT(), + SrcTy.getSimpleVT())) + return Entry->Cost; } // Scalar float to integer conversions. - static const TypeConversionCostTblEntry - NEONFloatConversionTbl[] = { + static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = { { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 }, { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 }, { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 }, @@ -184,15 +181,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 } }; if (SrcTy.isFloatingPoint() && ST->hasNEON()) { - int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, ISD, - DstTy.getSimpleVT(), SrcTy.getSimpleVT()); - if (Idx != -1) - return NEONFloatConversionTbl[Idx].Cost; + if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD, + DstTy.getSimpleVT(), + SrcTy.getSimpleVT())) + return Entry->Cost; } // Scalar integer to float conversions. - static const TypeConversionCostTblEntry - NEONIntegerConversionTbl[] = { + static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = { { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 }, { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 }, @@ -216,15 +212,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { }; if (SrcTy.isInteger() && ST->hasNEON()) { - int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, ISD, - DstTy.getSimpleVT(), SrcTy.getSimpleVT()); - if (Idx != -1) - return NEONIntegerConversionTbl[Idx].Cost; + if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl, + ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT())) + return Entry->Cost; } // Scalar integer conversion costs. - static const TypeConversionCostTblEntry - ARMIntegerConversionTbl[] = { + static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = { // i16 -> i64 requires two dependent operations. { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 }, @@ -236,17 +231,17 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { }; if (SrcTy.isInteger()) { - int Idx = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD, - DstTy.getSimpleVT(), SrcTy.getSimpleVT()); - if (Idx != -1) - return ARMIntegerConversionTbl[Idx].Cost; + if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD, + DstTy.getSimpleVT(), + SrcTy.getSimpleVT())) + return Entry->Cost; } return BaseT::getCastInstrCost(Opcode, Dst, Src); } -unsigned ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, - unsigned Index) { +int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, + unsigned Index) { // Penalize inserting into an D-subregister. We end up with a three times // lower estimated throughput on swift. if (ST->isSwift() && @@ -255,28 +250,30 @@ unsigned ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, ValTy->getScalarSizeInBits() <= 32) return 3; - // Cross-class copies are expensive on many microarchitectures, - // so assume they are expensive by default. if ((Opcode == Instruction::InsertElement || - Opcode == Instruction::ExtractElement) && - ValTy->getVectorElementType()->isIntegerTy()) - return 3; + Opcode == Instruction::ExtractElement)) { + // Cross-class copies are expensive on many microarchitectures, + // so assume they are expensive by default. + if (ValTy->getVectorElementType()->isIntegerTy()) + return 3; + + // Even if it's not a cross class copy, this likely leads to mixing + // of NEON and VFP code and should be therefore penalized. + if (ValTy->isVectorTy() && + ValTy->getScalarSizeInBits() <= 32) + return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U); + } return BaseT::getVectorInstrCost(Opcode, ValTy, Index); } -unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) { +int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // On NEON a a vector select gets lowered to vbsl. if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { // Lowering of some vector selects is currently far from perfect. - static const TypeConversionCostTblEntry - NEONVectorSelectTbl[] = { - { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 }, - { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 }, - { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 }, + static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = { { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 }, { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 }, { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 } @@ -285,21 +282,20 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, EVT SelCondTy = TLI->getValueType(DL, CondTy); EVT SelValTy = TLI->getValueType(DL, ValTy); if (SelCondTy.isSimple() && SelValTy.isSimple()) { - int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD, - SelCondTy.getSimpleVT(), - SelValTy.getSimpleVT()); - if (Idx != -1) - return NEONVectorSelectTbl[Idx].Cost; + if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD, + SelCondTy.getSimpleVT(), + SelValTy.getSimpleVT())) + return Entry->Cost; } - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); return LT.first; } return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); } -unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { +int ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { // Address computations in vectorized code with non-consecutive addresses will // likely result in more instructions compared to scalar code where the // computation can more often be merged into the index mode. The resulting @@ -314,7 +310,7 @@ unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { return 1; } -unsigned ARMTTIImpl::getFPOpCost(Type *Ty) { +int ARMTTIImpl::getFPOpCost(Type *Ty) { // Use similar logic that's in ARMISelLowering: // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access // to VFP. @@ -333,14 +329,14 @@ unsigned ARMTTIImpl::getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Expensive; } -unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) { +int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) { // We only handle costs of reverse and alternate shuffles for now. if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate) return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); if (Kind == TTI::SK_Reverse) { - static const CostTblEntry NEONShuffleTbl[] = { + static const CostTblEntry NEONShuffleTbl[] = { // Reverse shuffle cost one instruction if we are shuffling within a // double word (vrev) or two if we shuffle a quad word (vrev, vext). {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, @@ -353,16 +349,16 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2}, {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}}; - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); - if (Idx == -1) - return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); + if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, + LT.second)) + return LT.first * Entry->Cost; - return LT.first * NEONShuffleTbl[Idx].Cost; + return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } if (Kind == TTI::SK_Alternate) { - static const CostTblEntry NEONAltShuffleTbl[] = { + static const CostTblEntry NEONAltShuffleTbl[] = { // Alt shuffle cost table for ARM. Cost is the number of instructions // required to create the shuffled vector. @@ -379,27 +375,26 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - int Idx = - CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); - if (Idx == -1) - return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); - return LT.first * NEONAltShuffleTbl[Idx].Cost; + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + if (const auto *Entry = CostTableLookup(NEONAltShuffleTbl, + ISD::VECTOR_SHUFFLE, LT.second)) + return LT.first * Entry->Cost; + return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } -unsigned ARMTTIImpl::getArithmeticInstrCost( +int ARMTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo) { int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); const unsigned FunctionCallDivCost = 20; const unsigned ReciprocalDivCost = 10; - static const CostTblEntry CostTbl[] = { + static const CostTblEntry CostTbl[] = { // Division. // These costs are somewhat random. Choose a cost of 20 to indicate that // vectorizing devision (added function call) is going to be very expensive. @@ -440,16 +435,12 @@ unsigned ARMTTIImpl::getArithmeticInstrCost( // Multiplication. }; - int Idx = -1; - if (ST->hasNEON()) - Idx = CostTableLookup(CostTbl, ISDOpcode, LT.second); + if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second)) + return LT.first * Entry->Cost; - if (Idx != -1) - return LT.first * CostTbl[Idx].Cost; - - unsigned Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, - Opd1PropInfo, Opd2PropInfo); + int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + Opd1PropInfo, Opd2PropInfo); // This is somewhat of a hack. The problem that we are facing is that SROA // creates a sequence of shift, and, or instructions to construct values. @@ -465,10 +456,9 @@ unsigned ARMTTIImpl::getArithmeticInstrCost( return Cost; } -unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) { - std::pair LT = TLI->getTypeLegalizationCost(DL, Src); +int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) { + std::pair LT = TLI->getTypeLegalizationCost(DL, Src); if (Src->isVectorTy() && Alignment != 16 && Src->getVectorElementType()->isDoubleTy()) { @@ -479,21 +469,21 @@ unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, return LT.first; } -unsigned ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, - unsigned Factor, - ArrayRef Indices, - unsigned Alignment, - unsigned AddressSpace) { +int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef Indices, + unsigned Alignment, + unsigned AddressSpace) { assert(Factor >= 2 && "Invalid interleave factor"); assert(isa(VecTy) && "Expect a vector type"); // vldN/vstN doesn't support vector types of i64/f64 element. - bool EltIs64Bits = DL.getTypeAllocSizeInBits(VecTy->getScalarType()) == 64; + bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64; if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) { unsigned NumElts = VecTy->getVectorNumElements(); Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); - unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy); + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); // vldN/vstN only support legal vector types of size 64 or 128 in bits. if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 84f256f73722..7d8d2381c983 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -41,7 +41,7 @@ class ARMTTIImpl : public BasicTTIImplBase { const ARMTargetLowering *getTLI() const { return TLI; } public: - explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, Function &F) + explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} @@ -52,11 +52,13 @@ public: : BaseT(std::move(static_cast(Arg))), ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {} + bool enableInterleavedAccessVectorization() { return true; } + /// \name Scalar TTI Implementations /// @{ using BaseT::getIntImmCost; - unsigned getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty); /// @} @@ -92,34 +94,31 @@ public: return 1; } - unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp); + int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); - unsigned getAddressComputationCost(Type *Val, bool IsComplex); + int getAddressComputationCost(Type *Val, bool IsComplex); - unsigned getFPOpCost(Type *Ty); + int getFPOpCost(Type *Ty); - unsigned getArithmeticInstrCost( + int getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info = TTI::OK_AnyValue, TTI::OperandValueKind Op2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None); - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace); + int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace); - unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, - unsigned Factor, - ArrayRef Indices, - unsigned Alignment, - unsigned AddressSpace); + int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, + ArrayRef Indices, unsigned Alignment, + unsigned AddressSpace); /// @} }; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index cf6b8929f311..c69a741244cf 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -129,7 +129,6 @@ public: }; class ARMAsmParser : public MCTargetAsmParser { - MCSubtargetInfo &STI; const MCInstrInfo &MII; const MCRegisterInfo *MRI; UnwindContext UC; @@ -247,48 +246,49 @@ class ARMAsmParser : public MCTargetAsmParser { OperandVector &Operands); bool isThumb() const { // FIXME: Can tablegen auto-generate this? - return STI.getFeatureBits()[ARM::ModeThumb]; + return getSTI().getFeatureBits()[ARM::ModeThumb]; } bool isThumbOne() const { - return isThumb() && !STI.getFeatureBits()[ARM::FeatureThumb2]; + return isThumb() && !getSTI().getFeatureBits()[ARM::FeatureThumb2]; } bool isThumbTwo() const { - return isThumb() && STI.getFeatureBits()[ARM::FeatureThumb2]; + return isThumb() && getSTI().getFeatureBits()[ARM::FeatureThumb2]; } bool hasThumb() const { - return STI.getFeatureBits()[ARM::HasV4TOps]; + return getSTI().getFeatureBits()[ARM::HasV4TOps]; } bool hasV6Ops() const { - return STI.getFeatureBits()[ARM::HasV6Ops]; + return getSTI().getFeatureBits()[ARM::HasV6Ops]; } bool hasV6MOps() const { - return STI.getFeatureBits()[ARM::HasV6MOps]; + return getSTI().getFeatureBits()[ARM::HasV6MOps]; } bool hasV7Ops() const { - return STI.getFeatureBits()[ARM::HasV7Ops]; + return getSTI().getFeatureBits()[ARM::HasV7Ops]; } bool hasV8Ops() const { - return STI.getFeatureBits()[ARM::HasV8Ops]; + return getSTI().getFeatureBits()[ARM::HasV8Ops]; } bool hasARM() const { - return !STI.getFeatureBits()[ARM::FeatureNoARM]; + return !getSTI().getFeatureBits()[ARM::FeatureNoARM]; } - bool hasThumb2DSP() const { - return STI.getFeatureBits()[ARM::FeatureDSPThumb2]; + bool hasDSP() const { + return getSTI().getFeatureBits()[ARM::FeatureDSP]; } bool hasD16() const { - return STI.getFeatureBits()[ARM::FeatureD16]; + return getSTI().getFeatureBits()[ARM::FeatureD16]; } bool hasV8_1aOps() const { - return STI.getFeatureBits()[ARM::HasV8_1aOps]; + return getSTI().getFeatureBits()[ARM::HasV8_1aOps]; } void SwitchMode() { + MCSubtargetInfo &STI = copySTI(); uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb)); setAvailableFeatures(FB); } bool isMClass() const { - return STI.getFeatureBits()[ARM::FeatureMClass]; + return getSTI().getFeatureBits()[ARM::FeatureMClass]; } /// @name Auto-generated Match Functions @@ -343,14 +343,15 @@ public: Match_RequiresNotITBlock, Match_RequiresV6, Match_RequiresThumb2, + Match_RequiresV8, #define GET_OPERAND_DIAGNOSTIC_TYPES #include "ARMGenAsmMatcher.inc" }; - ARMAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser, + ARMAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : STI(STI), MII(MII), UC(Parser) { + : MCTargetAsmParser(Options, STI), MII(MII), UC(Parser) { MCAsmParserExtension::Initialize(Parser); // Cache the MCRegisterInfo. @@ -564,87 +565,6 @@ class ARMOperand : public MCParsedAsmOperand { public: ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} - ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() { - Kind = o.Kind; - StartLoc = o.StartLoc; - EndLoc = o.EndLoc; - switch (Kind) { - case k_CondCode: - CC = o.CC; - break; - case k_ITCondMask: - ITMask = o.ITMask; - break; - case k_Token: - Tok = o.Tok; - break; - case k_CCOut: - case k_Register: - Reg = o.Reg; - break; - case k_RegisterList: - case k_DPRRegisterList: - case k_SPRRegisterList: - Registers = o.Registers; - break; - case k_VectorList: - case k_VectorListAllLanes: - case k_VectorListIndexed: - VectorList = o.VectorList; - break; - case k_CoprocNum: - case k_CoprocReg: - Cop = o.Cop; - break; - case k_CoprocOption: - CoprocOption = o.CoprocOption; - break; - case k_Immediate: - Imm = o.Imm; - break; - case k_MemBarrierOpt: - MBOpt = o.MBOpt; - break; - case k_InstSyncBarrierOpt: - ISBOpt = o.ISBOpt; - case k_Memory: - Memory = o.Memory; - break; - case k_PostIndexRegister: - PostIdxReg = o.PostIdxReg; - break; - case k_MSRMask: - MMask = o.MMask; - break; - case k_BankedReg: - BankedReg = o.BankedReg; - break; - case k_ProcIFlags: - IFlags = o.IFlags; - break; - case k_ShifterImmediate: - ShifterImm = o.ShifterImm; - break; - case k_ShiftedRegister: - RegShiftedReg = o.RegShiftedReg; - break; - case k_ShiftedImmediate: - RegShiftedImm = o.RegShiftedImm; - break; - case k_RotateImmediate: - RotImm = o.RotImm; - break; - case k_ModifiedImmediate: - ModImm = o.ModImm; - break; - case k_BitfieldDescriptor: - Bitfield = o.Bitfield; - break; - case k_VectorIndex: - VectorIndex = o.VectorIndex; - break; - } - } /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const override { return StartLoc; } @@ -4054,7 +3974,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { if (FlagsVal == ~0U) return MatchOperand_NoMatch; - if (!hasThumb2DSP() && (FlagsVal & 0x400)) + if (!hasDSP() && (FlagsVal & 0x400)) // The _g and _nzcvqg versions are only valid if the DSP extension is // available. return MatchOperand_NoMatch; @@ -5202,6 +5122,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { // FALLTHROUGH } case AsmToken::Colon: { + S = Parser.getTok().getLoc(); // ":lower16:" and ":upper16:" expression prefixes // FIXME: Check it's an expression prefix, // e.g. (FOO - :lower16:BAR) isn't legal. @@ -5220,8 +5141,9 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { return false; } case AsmToken::Equal: { + S = Parser.getTok().getLoc(); if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val) - return Error(Parser.getTok().getLoc(), "unexpected token in operand"); + return Error(S, "unexpected token in operand"); Parser.Lex(); // Eat '=' const MCExpr *SubExprVal; @@ -5229,7 +5151,8 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { return true; E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - const MCExpr *CPLoc = getTargetStreamer().addConstantPoolEntry(SubExprVal); + const MCExpr *CPLoc = + getTargetStreamer().addConstantPoolEntry(SubExprVal, S); Operands.push_back(ARMOperand::CreateImm(CPLoc, S, E)); return false; } @@ -5682,9 +5605,11 @@ bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic, // VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON unsigned RegIdx = 3; if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") && - static_cast(*Operands[2]).getToken() == ".f32") { + (static_cast(*Operands[2]).getToken() == ".f32" || + static_cast(*Operands[2]).getToken() == ".f16")) { if (static_cast(*Operands[3]).isToken() && - static_cast(*Operands[3]).getToken() == ".f32") + (static_cast(*Operands[3]).getToken() == ".f32" || + static_cast(*Operands[3]).getToken() == ".f16")) RegIdx = 4; if (static_cast(*Operands[RegIdx]).isReg() && @@ -8610,18 +8535,29 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR && inITBlock()) return Match_RequiresNotITBlock; + } else if (isThumbOne()) { + // Some high-register supporting Thumb1 encodings only allow both registers + // to be from r0-r7 when in Thumb2. + if (Opc == ARM::tADDhirr && !hasV6MOps() && + isARMLowRegister(Inst.getOperand(1).getReg()) && + isARMLowRegister(Inst.getOperand(2).getReg())) + return Match_RequiresThumb2; + // Others only require ARMv6 or later. + else if (Opc == ARM::tMOVr && !hasV6Ops() && + isARMLowRegister(Inst.getOperand(0).getReg()) && + isARMLowRegister(Inst.getOperand(1).getReg())) + return Match_RequiresV6; } - // Some high-register supporting Thumb1 encodings only allow both registers - // to be from r0-r7 when in Thumb2. - else if (Opc == ARM::tADDhirr && isThumbOne() && !hasV6MOps() && - isARMLowRegister(Inst.getOperand(1).getReg()) && - isARMLowRegister(Inst.getOperand(2).getReg())) - return Match_RequiresThumb2; - // Others only require ARMv6 or later. - else if (Opc == ARM::tMOVr && isThumbOne() && !hasV6Ops() && - isARMLowRegister(Inst.getOperand(0).getReg()) && - isARMLowRegister(Inst.getOperand(1).getReg())) - return Match_RequiresV6; + + for (unsigned I = 0; I < MCID.NumOperands; ++I) + if (MCID.OpInfo[I].RegClass == ARM::rGPRRegClassID) { + // rGPRRegClass excludes PC, and also excluded SP before ARMv8 + if ((Inst.getOperand(I).getReg() == ARM::SP) && !hasV8Ops()) + return Match_RequiresV8; + else if (Inst.getOperand(I).getReg() == ARM::PC) + return Match_InvalidOperand; + } + return Match_Success; } @@ -8680,7 +8616,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return false; Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst, STI); + Out.EmitInstruction(Inst, getSTI()); return false; case Match_MissingFeature: { assert(ErrorInfo && "Unknown missing feature!"); @@ -8720,6 +8656,8 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(IDLoc, "instruction variant requires ARMv6 or later"); case Match_RequiresThumb2: return Error(IDLoc, "instruction variant requires Thumb2"); + case Match_RequiresV8: + return Error(IDLoc, "instruction variant requires ARMv8 or later"); case Match_ImmRange0_15: { SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; @@ -8868,7 +8806,7 @@ bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) { return false; } - getParser().getStreamer().EmitValue(Value, Size); + getParser().getStreamer().EmitValue(Value, Size, L); if (getLexer().is(AsmToken::EndOfStatement)) break; @@ -9098,7 +9036,7 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { bool ARMAsmParser::parseDirectiveArch(SMLoc L) { StringRef Arch = getParser().parseStringToEndOfStatement().trim(); - unsigned ID = ARMTargetParser::parseArch(Arch); + unsigned ID = ARM::parseArch(Arch); if (ID == ARM::AK_INVALID) { Error(L, "Unknown arch name"); @@ -9106,7 +9044,8 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) { } Triple T; - STI.setDefaultFeatures(T.getARMCPUForArch(Arch)); + MCSubtargetInfo &STI = copySTI(); + STI.setDefaultFeatures("", ("+" + ARM::getArchName(ID)).str()); setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); getTargetStreamer().emitArch(ID); @@ -9233,12 +9172,13 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { // FIXME: This is using table-gen data, but should be moved to // ARMTargetParser once that is table-gen'd. - if (!STI.isCPUStringValid(CPU)) { + if (!getSTI().isCPUStringValid(CPU)) { Error(L, "Unknown CPU name"); return false; } - STI.setDefaultFeatures(CPU); + MCSubtargetInfo &STI = copySTI(); + STI.setDefaultFeatures(CPU, ""); setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); return false; @@ -9249,13 +9189,14 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { SMLoc FPUNameLoc = getTok().getLoc(); StringRef FPU = getParser().parseStringToEndOfStatement().trim(); - unsigned ID = ARMTargetParser::parseFPU(FPU); + unsigned ID = ARM::parseFPU(FPU); std::vector Features; - if (!ARMTargetParser::getFPUFeatures(ID, Features)) { + if (!ARM::getFPUFeatures(ID, Features)) { Error(FPUNameLoc, "Unknown FPU name"); return false; } + MCSubtargetInfo &STI = copySTI(); for (auto Feature : Features) STI.ApplyFeatureFlag(Feature); setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); @@ -9895,7 +9836,7 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) { SMLoc ArchLoc = Parser.getTok().getLoc(); getLexer().Lex(); - unsigned ID = ARMTargetParser::parseArch(Arch); + unsigned ID = ARM::parseArch(Arch); if (ID == ARM::AK_INVALID) { Error(ArchLoc, "unknown architecture '" + Arch + "'"); @@ -9976,22 +9917,22 @@ extern "C" void LLVMInitializeARMAsmParser() { // when we start to table-generate them, and we can use the ARM // flags below, that were generated by table-gen. static const struct { - const ARM::ArchExtKind Kind; - const unsigned ArchCheck; + const unsigned Kind; + const uint64_t ArchCheck; const FeatureBitset Features; } Extensions[] = { { ARM::AEK_CRC, Feature_HasV8, {ARM::FeatureCRC} }, { ARM::AEK_CRYPTO, Feature_HasV8, {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} }, { ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} }, - { ARM::AEK_HWDIV, Feature_HasV7 | Feature_IsNotMClass, + { (ARM::AEK_HWDIV | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureHWDiv, ARM::FeatureHWDivARM} }, { ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} }, { ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} }, - // FIXME: Also available in ARMv6-K - { ARM::AEK_SEC, Feature_HasV7, {ARM::FeatureTrustZone} }, + { ARM::AEK_SEC, Feature_HasV6K, {ARM::FeatureTrustZone} }, // FIXME: Only available in A-class, isel not predicated { ARM::AEK_VIRT, Feature_HasV7, {ARM::FeatureVirtualization} }, + { ARM::AEK_FP16, Feature_HasV8_2a, {ARM::FeatureFPARMv8, ARM::FeatureFullFP16} }, // FIXME: Unsupported extensions. { ARM::AEK_OS, Feature_None, {} }, { ARM::AEK_IWMMXT, Feature_None, {} }, @@ -10020,7 +9961,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { EnableFeature = false; Name = Name.substr(2); } - unsigned FeatureKind = ARMTargetParser::parseArchExt(Name); + unsigned FeatureKind = ARM::parseArchExt(Name); if (FeatureKind == ARM::AEK_INVALID) Error(ExtLoc, "unknown architectural extension: " + Name); @@ -10037,6 +9978,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { return false; } + MCSubtargetInfo &STI = copySTI(); FeatureBitset ToggleFeatures = EnableFeature ? (~STI.getFeatureBits() & Extension.Features) : ( STI.getFeatureBits() & Extension.Features); @@ -10078,6 +10020,10 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, "expression value must be representable in 32 bits"); } break; + case MCK_rGPR: + if (hasV8Ops() && Op.isReg() && Op.getReg() == ARM::SP) + return Match_Success; + break; case MCK_GPRPair: if (Op.isReg() && MRI->getRegClass(ARM::GPRRegClassID).contains(Op.getReg())) diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 097ec04e7052..e63defed2288 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -59,7 +59,7 @@ namespace { } // Called when decoding an IT instruction. Sets the IT state for the following - // instructions that for the IT block. Firstcond and Mask correspond to the + // instructions that for the IT block. Firstcond and Mask correspond to the // fields in the IT instruction encoding. void setITState(char Firstcond, char Mask) { // (3 - the number of trailing zeros) is the number of then / else. @@ -459,21 +459,18 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // VFP and NEON instructions, similarly, are shared between ARM // and Thumb modes. - MI.clear(); Result = decodeInstruction(DecoderTableVFP32, MI, Insn, Address, this, STI); if (Result != MCDisassembler::Fail) { Size = 4; return Result; } - MI.clear(); Result = decodeInstruction(DecoderTableVFPV832, MI, Insn, Address, this, STI); if (Result != MCDisassembler::Fail) { Size = 4; return Result; } - MI.clear(); Result = decodeInstruction(DecoderTableNEONData32, MI, Insn, Address, this, STI); if (Result != MCDisassembler::Fail) { @@ -485,7 +482,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return Result; } - MI.clear(); Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, Insn, Address, this, STI); if (Result != MCDisassembler::Fail) { @@ -497,7 +493,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return Result; } - MI.clear(); Result = decodeInstruction(DecoderTableNEONDup32, MI, Insn, Address, this, STI); if (Result != MCDisassembler::Fail) { @@ -509,7 +504,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return Result; } - MI.clear(); Result = decodeInstruction(DecoderTablev8NEON32, MI, Insn, Address, this, STI); if (Result != MCDisassembler::Fail) { @@ -517,7 +511,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return Result; } - MI.clear(); Result = decodeInstruction(DecoderTablev8Crypto32, MI, Insn, Address, this, STI); if (Result != MCDisassembler::Fail) { @@ -525,7 +518,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return Result; } - MI.clear(); Size = 0; return MCDisassembler::Fail; } @@ -718,7 +710,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return Result; } - MI.clear(); Result = decodeInstruction(DecoderTableThumbSBit16, MI, Insn16, Address, this, STI); if (Result) { @@ -729,7 +720,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return Result; } - MI.clear(); Result = decodeInstruction(DecoderTableThumb216, MI, Insn16, Address, this, STI); if (Result != MCDisassembler::Fail) { @@ -763,7 +753,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, uint32_t Insn32 = (Bytes[3] << 8) | (Bytes[2] << 0) | (Bytes[1] << 24) | (Bytes[0] << 16); - MI.clear(); Result = decodeInstruction(DecoderTableThumb32, MI, Insn32, Address, this, STI); if (Result != MCDisassembler::Fail) { @@ -774,7 +763,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return Result; } - MI.clear(); Result = decodeInstruction(DecoderTableThumb232, MI, Insn32, Address, this, STI); if (Result != MCDisassembler::Fail) { @@ -784,7 +772,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } if (fieldFromInstruction(Insn32, 28, 4) == 0xE) { - MI.clear(); Result = decodeInstruction(DecoderTableVFP32, MI, Insn32, Address, this, STI); if (Result != MCDisassembler::Fail) { @@ -794,7 +781,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } } - MI.clear(); Result = decodeInstruction(DecoderTableVFPV832, MI, Insn32, Address, this, STI); if (Result != MCDisassembler::Fail) { @@ -803,7 +789,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } if (fieldFromInstruction(Insn32, 28, 4) == 0xE) { - MI.clear(); Result = decodeInstruction(DecoderTableNEONDup32, MI, Insn32, Address, this, STI); if (Result != MCDisassembler::Fail) { @@ -814,7 +799,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } if (fieldFromInstruction(Insn32, 24, 8) == 0xF9) { - MI.clear(); uint32_t NEONLdStInsn = Insn32; NEONLdStInsn &= 0xF0FFFFFF; NEONLdStInsn |= 0x04000000; @@ -828,7 +812,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } if (fieldFromInstruction(Insn32, 24, 4) == 0xF) { - MI.clear(); uint32_t NEONDataInsn = Insn32; NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24 NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24 @@ -841,7 +824,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return Result; } - MI.clear(); uint32_t NEONCryptoInsn = Insn32; NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24 NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24 @@ -853,7 +835,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return Result; } - MI.clear(); uint32_t NEONv8Insn = Insn32; NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26 Result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address, @@ -864,7 +845,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } } - MI.clear(); Size = 0; return MCDisassembler::Fail; } @@ -902,7 +882,7 @@ static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - + if (RegNo == 15) S = MCDisassembler::SoftFail; @@ -986,8 +966,13 @@ static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - if (RegNo == 13 || RegNo == 15) + + const FeatureBitset &featureBits = + ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits(); + + if ((RegNo == 13 && !featureBits[ARM::HasV8Ops]) || RegNo == 15) S = MCDisassembler::SoftFail; + Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder)); return S; } @@ -1147,7 +1132,7 @@ static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val, unsigned imm = fieldFromInstruction(Val, 7, 5); // Register-immediate - if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; ARM_AM::ShiftOpc Shift = ARM_AM::lsl; @@ -1658,7 +1643,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, case ARM::STRD_POST: if (P == 0 && W == 1) S = MCDisassembler::SoftFail; - + if (writeback && (Rn == 15 || Rn == Rt || Rn == Rt2)) S = MCDisassembler::SoftFail; if (type && Rm == 15) @@ -4131,7 +4116,7 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, // indicates the move for the GE{3:0} bits, the mask{0} bit can be set // only if the processor includes the DSP extension. if (Mask == 0 || (Mask != 2 && ValLow > 3) || - (!(FeatureBits[ARM::FeatureDSPThumb2]) && (Mask & 1))) + (!(FeatureBits[ARM::FeatureDSP]) && (Mask & 1))) S = MCDisassembler::SoftFail; } } @@ -5065,6 +5050,10 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { + const FeatureBitset &featureBits = + ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits(); + bool hasFullFP16 = featureBits[ARM::FeatureFullFP16]; + unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0); Vd |= (fieldFromInstruction(Insn, 22, 1) << 4); unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0); @@ -5075,10 +5064,35 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, DecodeStatus S = MCDisassembler::Success; - // VMOVv2f32 is ambiguous with these decodings. - if (!(imm & 0x38) && cmode == 0xF) { - if (op == 1) return MCDisassembler::Fail; - Inst.setOpcode(ARM::VMOVv2f32); + // If the top 3 bits of imm are clear, this is a VMOV (immediate) + if (!(imm & 0x38)) { + if (cmode == 0xF) { + if (op == 1) return MCDisassembler::Fail; + Inst.setOpcode(ARM::VMOVv2f32); + } + if (hasFullFP16) { + if (cmode == 0xE) { + if (op == 1) { + Inst.setOpcode(ARM::VMOVv1i64); + } else { + Inst.setOpcode(ARM::VMOVv8i8); + } + } + if (cmode == 0xD) { + if (op == 1) { + Inst.setOpcode(ARM::VMVNv2i32); + } else { + Inst.setOpcode(ARM::VMOVv2i32); + } + } + if (cmode == 0xC) { + if (op == 1) { + Inst.setOpcode(ARM::VMVNv2i32); + } else { + Inst.setOpcode(ARM::VMOVv2i32); + } + } + } return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder); } @@ -5095,6 +5109,10 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { + const FeatureBitset &featureBits = + ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits(); + bool hasFullFP16 = featureBits[ARM::FeatureFullFP16]; + unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0); Vd |= (fieldFromInstruction(Insn, 22, 1) << 4); unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0); @@ -5105,10 +5123,35 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, DecodeStatus S = MCDisassembler::Success; - // VMOVv4f32 is ambiguous with these decodings. - if (!(imm & 0x38) && cmode == 0xF) { - if (op == 1) return MCDisassembler::Fail; - Inst.setOpcode(ARM::VMOVv4f32); + // If the top 3 bits of imm are clear, this is a VMOV (immediate) + if (!(imm & 0x38)) { + if (cmode == 0xF) { + if (op == 1) return MCDisassembler::Fail; + Inst.setOpcode(ARM::VMOVv4f32); + } + if (hasFullFP16) { + if (cmode == 0xE) { + if (op == 1) { + Inst.setOpcode(ARM::VMOVv2i64); + } else { + Inst.setOpcode(ARM::VMOVv16i8); + } + } + if (cmode == 0xD) { + if (op == 1) { + Inst.setOpcode(ARM::VMVNv4i32); + } else { + Inst.setOpcode(ARM::VMOVv4i32); + } + } + if (cmode == 0xC) { + if (op == 1) { + Inst.setOpcode(ARM::VMVNv4i32); + } else { + Inst.setOpcode(ARM::VMOVv4i32); + } + } + } return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder); } @@ -5132,7 +5175,7 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, unsigned Rm = fieldFromInstruction(Val, 0, 4); Rm |= (fieldFromInstruction(Val, 23, 1) << 4); unsigned Cond = fieldFromInstruction(Val, 28, 4); - + if (fieldFromInstruction(Val, 8, 4) != 0 || Rn == Rt) S = MCDisassembler::SoftFail; diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 0bff52141da5..33fc85af9b19 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -804,7 +805,7 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, unsigned Opcode = MI->getOpcode(); // For writes, handle extended mask bits if the DSP extension is present. - if (Opcode == ARM::t2MSR_M && FeatureBits[ARM::FeatureDSPThumb2]) { + if (Opcode == ARM::t2MSR_M && FeatureBits[ARM::FeatureDSP]) { switch (SYSm) { case 0x400: O << "apsr_g"; diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 3927c9f8bfd3..52f7115f0558 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -15,12 +15,9 @@ #define LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCSubtargetInfo.h" namespace llvm { -class MCOperand; - class ARMInstPrinter : public MCInstPrinter { public: ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 111463588565..fa52c9354c17 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -25,13 +25,17 @@ #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/MachO.h" +#include "llvm/Support/TargetParser.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -180,9 +184,8 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const { return false; } -bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const { +const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup, + uint64_t Value) const { switch ((unsigned)Fixup.getKind()) { case ARM::fixup_arm_thumb_br: { // Relaxing tB to t2B. tB has a signed 12-bit displacement with the @@ -192,7 +195,9 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, // // Relax if the value is too big for a (signed) i8. int64_t Offset = int64_t(Value) - 4; - return Offset > 2046 || Offset < -2048; + if (Offset > 2046 || Offset < -2048) + return "out of range pc-relative fixup value"; + break; } case ARM::fixup_arm_thumb_bcc: { // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the @@ -202,23 +207,40 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, // // Relax if the value is too big for a (signed) i8. int64_t Offset = int64_t(Value) - 4; - return Offset > 254 || Offset < -256; + if (Offset > 254 || Offset < -256) + return "out of range pc-relative fixup value"; + break; } case ARM::fixup_thumb_adr_pcrel_10: case ARM::fixup_arm_thumb_cp: { // If the immediate is negative, greater than 1020, or not a multiple // of four, the wide version of the instruction must be used. int64_t Offset = int64_t(Value) - 4; - return Offset > 1020 || Offset < 0 || Offset & 3; + if (Offset & 3) + return "misaligned pc-relative fixup value"; + else if (Offset > 1020 || Offset < 0) + return "out of range pc-relative fixup value"; + break; } - case ARM::fixup_arm_thumb_cb: + case ARM::fixup_arm_thumb_cb: { // If we have a Thumb CBZ or CBNZ instruction and its target is the next // instruction it is is actually out of range for the instruction. // It will be changed to a NOP. int64_t Offset = (Value & ~1); - return Offset == 2; + if (Offset == 2) + return "will be converted to nop"; + break; } - llvm_unreachable("Unexpected fixup kind in fixupNeedsRelaxation()!"); + default: + llvm_unreachable("Unexpected fixup kind in reasonForFixupRelaxation()!"); + } + return nullptr; +} + +bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const { + return reasonForFixupRelaxation(Fixup, Value); } void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { @@ -317,9 +339,10 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf, return Value; } -static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, - bool IsPCRel, MCContext *Ctx, - bool IsLittleEndian) { +unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, + bool IsPCRel, MCContext *Ctx, + bool IsLittleEndian, + bool IsResolved) const { unsigned Kind = Fixup.getKind(); switch (Kind) { default: @@ -372,8 +395,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, Value = -Value; isAdd = false; } - if (Ctx && Value >= 4096) - Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value"); + if (Ctx && Value >= 4096) { + Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + return 0; + } Value |= isAdd << 23; // Same addressing mode as fixup_arm_pcrel_10, @@ -383,8 +408,6 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return Value; } - case ARM::fixup_thumb_adr_pcrel_10: - return ((Value - 4) >> 2) & 0xff; case ARM::fixup_arm_adr_pcrel_12: { // ARM PC-relative values are offset by 8. Value -= 8; @@ -393,8 +416,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, Value = -Value; opc = 2; // 0b0010 } - if (Ctx && ARM_AM::getSOImmVal(Value) == -1) - Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value"); + if (Ctx && ARM_AM::getSOImmVal(Value) == -1) { + Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + return 0; + } // Encode the immediate and shift the opcode into place. return ARM_AM::getSOImmVal(Value) | (opc << 21); } @@ -517,21 +542,44 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, ((uint16_t)imm10LBits) << 1); return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian); } + case ARM::fixup_thumb_adr_pcrel_10: case ARM::fixup_arm_thumb_cp: - // Offset by 4, and don't encode the low two bits. Two bytes of that - // 'off by 4' is implicitly handled by the half-word ordering of the - // Thumb encoding, so we only need to adjust by 2 here. - return ((Value - 2) >> 2) & 0xff; + // On CPUs supporting Thumb2, this will be relaxed to an ldr.w, otherwise we + // could have an error on our hands. + if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) { + const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value); + if (FixupDiagnostic) { + Ctx->reportError(Fixup.getLoc(), FixupDiagnostic); + return 0; + } + } + // Offset by 4, and don't encode the low two bits. + return ((Value - 4) >> 2) & 0xff; case ARM::fixup_arm_thumb_cb: { // Offset by 4 and don't encode the lower bit, which is always 0. + // FIXME: diagnose if no Thumb2 uint32_t Binary = (Value - 4) >> 1; return ((Binary & 0x20) << 4) | ((Binary & 0x1f) << 3); } case ARM::fixup_arm_thumb_br: // Offset by 4 and don't encode the lower bit, which is always 0. + if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) { + const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value); + if (FixupDiagnostic) { + Ctx->reportError(Fixup.getLoc(), FixupDiagnostic); + return 0; + } + } return ((Value - 4) >> 1) & 0x7ff; case ARM::fixup_arm_thumb_bcc: // Offset by 4 and don't encode the lower bit, which is always 0. + if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) { + const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value); + if (FixupDiagnostic) { + Ctx->reportError(Fixup.getLoc(), FixupDiagnostic); + return 0; + } + } return ((Value - 4) >> 1) & 0xff; case ARM::fixup_arm_pcrel_10_unscaled: { Value = Value - 8; // ARM fixups offset by an additional word and don't @@ -542,8 +590,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, isAdd = false; } // The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8]. - if (Ctx && Value >= 256) - Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value"); + if (Ctx && Value >= 256) { + Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + return 0; + } Value = (Value & 0xf) | ((Value & 0xf0) << 4); return Value | (isAdd << 23); } @@ -561,8 +611,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, } // These values don't encode the low two bits since they're always zero. Value >>= 2; - if (Ctx && Value >= 256) - Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value"); + if (Ctx && Value >= 256) { + Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + return 0; + } Value |= isAdd << 23; // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords @@ -582,6 +634,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, const MCValue &Target, uint64_t &Value, bool &IsResolved) { const MCSymbolRefExpr *A = Target.getSymA(); + const MCSymbol *Sym = A ? &A->getSymbol() : nullptr; // Some fixups to thumb function symbols need the low bit (thumb bit) // twiddled. if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 && @@ -590,18 +643,21 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 && (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 && (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) { - if (A) { - const MCSymbol &Sym = A->getSymbol(); - if (Asm.isThumbFunc(&Sym)) + if (Sym) { + if (Asm.isThumbFunc(Sym)) Value |= 1; } } - // For Thumb1 BL instruction, it is possible to be a long jump between - // the basic blocks of the same function. Thus, we would like to resolve - // the offset when the destination has the same MCFragment. - if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { - const MCSymbol &Sym = A->getSymbol(); - IsResolved = (Sym.getFragment() == DF); + if (IsResolved && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { + assert(Sym && "How did we resolve this?"); + + // If the symbol is external the linker will handle it. + // FIXME: Should we handle it as an optimization? + + // If the symbol is out of range, produce a relocation and hope the + // linker can handle it. GNU AS produces an error in this case. + if (Sym->isExternal() || Value >= 0x400004) + IsResolved = false; } // We must always generate a relocation for BL/BLX instructions if we have // a symbol to reference, as the linker relies on knowing the destination @@ -616,7 +672,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // the instruction. This allows adjustFixupValue() to issue a diagnostic // if the value aren't invalid. (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext(), - IsLittleEndian); + IsLittleEndian, IsResolved); } /// getFixupKindNumBytes - The number of bytes the fixup may change. @@ -719,7 +775,8 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); - Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian); + Value = + adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian, true); if (!Value) return; // Doesn't change encoding. @@ -743,6 +800,249 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, } } +namespace CU { + +/// \brief Compact unwind encoding values. +enum CompactUnwindEncodings { + UNWIND_ARM_MODE_MASK = 0x0F000000, + UNWIND_ARM_MODE_FRAME = 0x01000000, + UNWIND_ARM_MODE_FRAME_D = 0x02000000, + UNWIND_ARM_MODE_DWARF = 0x04000000, + + UNWIND_ARM_FRAME_STACK_ADJUST_MASK = 0x00C00000, + + UNWIND_ARM_FRAME_FIRST_PUSH_R4 = 0x00000001, + UNWIND_ARM_FRAME_FIRST_PUSH_R5 = 0x00000002, + UNWIND_ARM_FRAME_FIRST_PUSH_R6 = 0x00000004, + + UNWIND_ARM_FRAME_SECOND_PUSH_R8 = 0x00000008, + UNWIND_ARM_FRAME_SECOND_PUSH_R9 = 0x00000010, + UNWIND_ARM_FRAME_SECOND_PUSH_R10 = 0x00000020, + UNWIND_ARM_FRAME_SECOND_PUSH_R11 = 0x00000040, + UNWIND_ARM_FRAME_SECOND_PUSH_R12 = 0x00000080, + + UNWIND_ARM_FRAME_D_REG_COUNT_MASK = 0x00000F00, + + UNWIND_ARM_DWARF_SECTION_OFFSET = 0x00FFFFFF +}; + +} // end CU namespace + +/// Generate compact unwind encoding for the function based on the CFI +/// instructions. If the CFI instructions describe a frame that cannot be +/// encoded in compact unwind, the method returns UNWIND_ARM_MODE_DWARF which +/// tells the runtime to fallback and unwind using dwarf. +uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding( + ArrayRef Instrs) const { + DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() << "generateCU()\n"); + // Only armv7k uses CFI based unwinding. + if (Subtype != MachO::CPU_SUBTYPE_ARM_V7K) + return 0; + // No .cfi directives means no frame. + if (Instrs.empty()) + return 0; + // Start off assuming CFA is at SP+0. + int CFARegister = ARM::SP; + int CFARegisterOffset = 0; + // Mark savable registers as initially unsaved + DenseMap RegOffsets; + int FloatRegCount = 0; + // Process each .cfi directive and build up compact unwind info. + for (size_t i = 0, e = Instrs.size(); i != e; ++i) { + int Reg; + const MCCFIInstruction &Inst = Instrs[i]; + switch (Inst.getOperation()) { + case MCCFIInstruction::OpDefCfa: // DW_CFA_def_cfa + CFARegisterOffset = -Inst.getOffset(); + CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true); + break; + case MCCFIInstruction::OpDefCfaOffset: // DW_CFA_def_cfa_offset + CFARegisterOffset = -Inst.getOffset(); + break; + case MCCFIInstruction::OpDefCfaRegister: // DW_CFA_def_cfa_register + CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true); + break; + case MCCFIInstruction::OpOffset: // DW_CFA_offset + Reg = MRI.getLLVMRegNum(Inst.getRegister(), true); + if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) + RegOffsets[Reg] = Inst.getOffset(); + else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) { + RegOffsets[Reg] = Inst.getOffset(); + ++FloatRegCount; + } else { + DEBUG_WITH_TYPE("compact-unwind", + llvm::dbgs() << ".cfi_offset on unknown register=" + << Inst.getRegister() << "\n"); + return CU::UNWIND_ARM_MODE_DWARF; + } + break; + case MCCFIInstruction::OpRelOffset: // DW_CFA_advance_loc + // Ignore + break; + default: + // Directive not convertable to compact unwind, bail out. + DEBUG_WITH_TYPE("compact-unwind", + llvm::dbgs() + << "CFI directive not compatiable with comact " + "unwind encoding, opcode=" << Inst.getOperation() + << "\n"); + return CU::UNWIND_ARM_MODE_DWARF; + break; + } + } + + // If no frame set up, return no unwind info. + if ((CFARegister == ARM::SP) && (CFARegisterOffset == 0)) + return 0; + + // Verify standard frame (lr/r7) was used. + if (CFARegister != ARM::R7) { + DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() << "frame register is " + << CFARegister + << " instead of r7\n"); + return CU::UNWIND_ARM_MODE_DWARF; + } + int StackAdjust = CFARegisterOffset - 8; + if (RegOffsets.lookup(ARM::LR) != (-4 - StackAdjust)) { + DEBUG_WITH_TYPE("compact-unwind", + llvm::dbgs() + << "LR not saved as standard frame, StackAdjust=" + << StackAdjust + << ", CFARegisterOffset=" << CFARegisterOffset + << ", lr save at offset=" << RegOffsets[14] << "\n"); + return CU::UNWIND_ARM_MODE_DWARF; + } + if (RegOffsets.lookup(ARM::R7) != (-8 - StackAdjust)) { + DEBUG_WITH_TYPE("compact-unwind", + llvm::dbgs() << "r7 not saved as standard frame\n"); + return CU::UNWIND_ARM_MODE_DWARF; + } + uint32_t CompactUnwindEncoding = CU::UNWIND_ARM_MODE_FRAME; + + // If var-args are used, there may be a stack adjust required. + switch (StackAdjust) { + case 0: + break; + case 4: + CompactUnwindEncoding |= 0x00400000; + break; + case 8: + CompactUnwindEncoding |= 0x00800000; + break; + case 12: + CompactUnwindEncoding |= 0x00C00000; + break; + default: + DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() + << ".cfi_def_cfa stack adjust (" + << StackAdjust << ") out of range\n"); + return CU::UNWIND_ARM_MODE_DWARF; + } + + // If r6 is saved, it must be right below r7. + static struct { + unsigned Reg; + unsigned Encoding; + } GPRCSRegs[] = {{ARM::R6, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R6}, + {ARM::R5, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R5}, + {ARM::R4, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R4}, + {ARM::R12, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R12}, + {ARM::R11, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R11}, + {ARM::R10, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R10}, + {ARM::R9, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R9}, + {ARM::R8, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R8}}; + + int CurOffset = -8 - StackAdjust; + for (auto CSReg : GPRCSRegs) { + auto Offset = RegOffsets.find(CSReg.Reg); + if (Offset == RegOffsets.end()) + continue; + + int RegOffset = Offset->second; + if (RegOffset != CurOffset - 4) { + DEBUG_WITH_TYPE("compact-unwind", + llvm::dbgs() << MRI.getName(CSReg.Reg) << " saved at " + << RegOffset << " but only supported at " + << CurOffset << "\n"); + return CU::UNWIND_ARM_MODE_DWARF; + } + CompactUnwindEncoding |= CSReg.Encoding; + CurOffset -= 4; + } + + // If no floats saved, we are done. + if (FloatRegCount == 0) + return CompactUnwindEncoding; + + // Switch mode to include D register saving. + CompactUnwindEncoding &= ~CU::UNWIND_ARM_MODE_MASK; + CompactUnwindEncoding |= CU::UNWIND_ARM_MODE_FRAME_D; + + // FIXME: supporting more than 4 saved D-registers compactly would be trivial, + // but needs coordination with the linker and libunwind. + if (FloatRegCount > 4) { + DEBUG_WITH_TYPE("compact-unwind", + llvm::dbgs() << "unsupported number of D registers saved (" + << FloatRegCount << ")\n"); + return CU::UNWIND_ARM_MODE_DWARF; + } + + // Floating point registers must either be saved sequentially, or we defer to + // DWARF. No gaps allowed here so check that each saved d-register is + // precisely where it should be. + static unsigned FPRCSRegs[] = { ARM::D8, ARM::D10, ARM::D12, ARM::D14 }; + for (int Idx = FloatRegCount - 1; Idx >= 0; --Idx) { + auto Offset = RegOffsets.find(FPRCSRegs[Idx]); + if (Offset == RegOffsets.end()) { + DEBUG_WITH_TYPE("compact-unwind", + llvm::dbgs() << FloatRegCount << " D-regs saved, but " + << MRI.getName(FPRCSRegs[Idx]) + << " not saved\n"); + return CU::UNWIND_ARM_MODE_DWARF; + } else if (Offset->second != CurOffset - 8) { + DEBUG_WITH_TYPE("compact-unwind", + llvm::dbgs() << FloatRegCount << " D-regs saved, but " + << MRI.getName(FPRCSRegs[Idx]) + << " saved at " << Offset->second + << ", expected at " << CurOffset - 8 + << "\n"); + return CU::UNWIND_ARM_MODE_DWARF; + } + CurOffset -= 8; + } + + return CompactUnwindEncoding | ((FloatRegCount - 1) << 8); +} + +static MachO::CPUSubTypeARM getMachOSubTypeFromArch(StringRef Arch) { + unsigned AK = ARM::parseArch(Arch); + switch (AK) { + default: + return MachO::CPU_SUBTYPE_ARM_V7; + case ARM::AK_ARMV4T: + return MachO::CPU_SUBTYPE_ARM_V4T; + case ARM::AK_ARMV5T: + case ARM::AK_ARMV5TE: + case ARM::AK_ARMV5TEJ: + return MachO::CPU_SUBTYPE_ARM_V5; + case ARM::AK_ARMV6: + case ARM::AK_ARMV6K: + return MachO::CPU_SUBTYPE_ARM_V6; + case ARM::AK_ARMV7A: + return MachO::CPU_SUBTYPE_ARM_V7; + case ARM::AK_ARMV7S: + return MachO::CPU_SUBTYPE_ARM_V7S; + case ARM::AK_ARMV7K: + return MachO::CPU_SUBTYPE_ARM_V7K; + case ARM::AK_ARMV6M: + return MachO::CPU_SUBTYPE_ARM_V6M; + case ARM::AK_ARMV7M: + return MachO::CPU_SUBTYPE_ARM_V7M; + case ARM::AK_ARMV7EM: + return MachO::CPU_SUBTYPE_ARM_V7EM; + } +} + MCAsmBackend *llvm::createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TheTriple, StringRef CPU, @@ -751,19 +1051,8 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, default: llvm_unreachable("unsupported object format"); case Triple::MachO: { - MachO::CPUSubTypeARM CS = - StringSwitch(TheTriple.getArchName()) - .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T) - .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ) - .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6) - .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M) - .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM) - .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K) - .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M) - .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S) - .Default(MachO::CPU_SUBTYPE_ARM_V7); - - return new ARMAsmBackendDarwin(T, TheTriple, CS); + MachO::CPUSubTypeARM CS = getMachOSubTypeFromArch(TheTriple.getArchName()); + return new ARMAsmBackendDarwin(T, TheTriple, MRI, CS); } case Triple::COFF: assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported"); diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h index 6b4abd5898eb..28a62132a419 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -45,6 +45,10 @@ public: const MCValue &Target, uint64_t &Value, bool &IsResolved) override; + unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool IsPCRel, + MCContext *Ctx, bool IsLittleEndian, + bool IsResolved) const; + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const override; @@ -52,6 +56,9 @@ public: bool mayNeedRelaxation(const MCInst &Inst) const override; + const char *reasonForFixupRelaxation(const MCFixup &Fixup, + uint64_t Value) const; + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override; diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h index a6206e3d9585..995dd0fe08ee 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h @@ -16,11 +16,12 @@ using namespace llvm; namespace { class ARMAsmBackendDarwin : public ARMAsmBackend { + const MCRegisterInfo &MRI; public: const MachO::CPUSubTypeARM Subtype; ARMAsmBackendDarwin(const Target &T, const Triple &TT, - MachO::CPUSubTypeARM st) - : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) { + const MCRegisterInfo &MRI, MachO::CPUSubTypeARM st) + : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), MRI(MRI), Subtype(st) { HasDataInCodeSupport = true; } @@ -28,6 +29,9 @@ public: return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM, Subtype); } + + uint32_t generateCompactUnwindEncoding( + ArrayRef Instrs) const override; }; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 804d3534096a..52eba8be288f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -95,7 +95,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_GOTTPOFF: Type = ELF::R_ARM_TLS_IE32; break; - case MCSymbolRefExpr::VK_GOTPCREL: + case MCSymbolRefExpr::VK_ARM_GOT_PREL: Type = ELF::R_ARM_GOT_PREL; break; } @@ -192,7 +192,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_GOTOFF: Type = ELF::R_ARM_GOTOFF32; break; - case MCSymbolRefExpr::VK_GOTPCREL: + case MCSymbolRefExpr::VK_ARM_GOT_PREL: Type = ELF::R_ARM_GOT_PREL; break; case MCSymbolRefExpr::VK_ARM_TARGET1: diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index d17fdb95dbdf..6084f22c8470 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -79,7 +79,7 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer { void emitAttribute(unsigned Attribute, unsigned Value) override; void emitTextAttribute(unsigned Attribute, StringRef String) override; void emitIntTextAttribute(unsigned Attribute, unsigned IntValue, - StringRef StrinValue) override; + StringRef StringValue) override; void emitArch(unsigned Arch) override; void emitArchExtension(unsigned ArchExt) override; void emitObjectArch(unsigned Arch) override; @@ -195,16 +195,16 @@ void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute, OS << "\n"; } void ARMTargetAsmStreamer::emitArch(unsigned Arch) { - OS << "\t.arch\t" << ARMTargetParser::getArchName(Arch) << "\n"; + OS << "\t.arch\t" << ARM::getArchName(Arch) << "\n"; } void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) { - OS << "\t.arch_extension\t" << ARMTargetParser::getArchExtName(ArchExt) << "\n"; + OS << "\t.arch_extension\t" << ARM::getArchExtName(ArchExt) << "\n"; } void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) { - OS << "\t.object_arch\t" << ARMTargetParser::getArchName(Arch) << '\n'; + OS << "\t.object_arch\t" << ARM::getArchName(Arch) << '\n'; } void ARMTargetAsmStreamer::emitFPU(unsigned FPU) { - OS << "\t.fpu\t" << ARMTargetParser::getFPUName(FPU) << "\n"; + OS << "\t.fpu\t" << ARM::getFPUName(FPU) << "\n"; } void ARMTargetAsmStreamer::finishAttributeSection() { } @@ -243,7 +243,7 @@ void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset, class ARMTargetELFStreamer : public ARMTargetStreamer { private: // This structure holds all attributes, accounting for - // their string/numeric value, so we can later emmit them + // their string/numeric value, so we can later emit them // in declaration order, keeping all in the same vector struct AttributeItem { enum { @@ -254,7 +254,7 @@ private: } Type; unsigned Tag; unsigned IntValue; - StringRef StringValue; + std::string StringValue; static bool LessTag(const AttributeItem &LHS, const AttributeItem &RHS) { // The conformance tag must be emitted first when serialised @@ -507,14 +507,15 @@ public: /// This is one of the functions used to emit data into an ELF section, so the /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if /// necessary. - void EmitValueImpl(const MCExpr *Value, unsigned Size, - const SMLoc &Loc) override { + void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override { if (const MCSymbolRefExpr *SRE = dyn_cast_or_null(Value)) - if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4)) - getContext().reportFatalError(Loc, "relocated expression must be 32-bit"); + if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4)) { + getContext().reportError(Loc, "relocated expression must be 32-bit"); + return; + } EmitDataMappingSymbol(); - MCELFStreamer::EmitValueImpl(Value, Size); + MCELFStreamer::EmitValueImpl(Value, Size, Loc); } void EmitAssemblerFlag(MCAssemblerFlag Flag) override { @@ -684,16 +685,16 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { using namespace ARMBuildAttrs; setAttributeItem(CPU_name, - ARMTargetParser::getCPUAttr(Arch), + ARM::getCPUAttr(Arch), false); if (EmittedArch == ARM::AK_INVALID) setAttributeItem(CPU_arch, - ARMTargetParser::getArchAttr(Arch), + ARM::getArchAttr(Arch), false); else setAttributeItem(CPU_arch, - ARMTargetParser::getArchAttr(EmittedArch), + ARM::getArchAttr(EmittedArch), false); switch (Arch) { @@ -702,7 +703,6 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { case ARM::AK_ARMV3: case ARM::AK_ARMV3M: case ARM::AK_ARMV4: - case ARM::AK_ARMV5: setAttributeItem(ARM_ISA_use, Allowed, false); break; @@ -710,7 +710,6 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { case ARM::AK_ARMV5T: case ARM::AK_ARMV5TE: case ARM::AK_ARMV6: - case ARM::AK_ARMV6J: setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, Allowed, false); break; @@ -721,8 +720,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { break; case ARM::AK_ARMV6K: - case ARM::AK_ARMV6Z: - case ARM::AK_ARMV6ZK: + case ARM::AK_ARMV6KZ: setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, Allowed, false); setAttributeItem(Virtualization_use, AllowTZ, false); @@ -732,10 +730,6 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { setAttributeItem(THUMB_ISA_use, Allowed, false); break; - case ARM::AK_ARMV7: - setAttributeItem(THUMB_ISA_use, AllowThumb32, false); - break; - case ARM::AK_ARMV7A: setAttributeItem(CPU_arch_profile, ApplicationProfile, false); setAttributeItem(ARM_ISA_use, Allowed, false); @@ -755,6 +749,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { case ARM::AK_ARMV8A: case ARM::AK_ARMV8_1A: + case ARM::AK_ARMV8_2A: setAttributeItem(CPU_arch_profile, ApplicationProfile, false); setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, AllowThumb32, false); @@ -1084,19 +1079,14 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, } inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) { - SwitchToEHSection(".ARM.extab", - ELF::SHT_PROGBITS, - ELF::SHF_ALLOC, - SectionKind::getDataRel(), - FnStart); + SwitchToEHSection(".ARM.extab", ELF::SHT_PROGBITS, ELF::SHF_ALLOC, + SectionKind::getData(), FnStart); } inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) { - SwitchToEHSection(".ARM.exidx", - ELF::SHT_ARM_EXIDX, + SwitchToEHSection(".ARM.exidx", ELF::SHT_ARM_EXIDX, ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER, - SectionKind::getDataRel(), - FnStart); + SectionKind::getData(), FnStart); } void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) { MCDataFragment *Frag = getOrCreateDataFragment(); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 1ac08159bd3d..bda37f6616a8 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -33,7 +33,9 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) { SupportsDebugInformation = true; // Exceptions handling - ExceptionsType = ExceptionHandling::SjLj; + ExceptionsType = TheTriple.isOSDarwin() && !TheTriple.isWatchOS() + ? ExceptionHandling::SjLj + : ExceptionHandling::DwarfCFI; UseIntegratedAssembler = true; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h index 99a5fff5ec27..5e548162bec6 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h @@ -19,34 +19,37 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { - class Triple; +class Triple; - class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin { - virtual void anchor(); +class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin { + virtual void anchor(); - public: - explicit ARMMCAsmInfoDarwin(const Triple &TheTriple); - }; +public: + explicit ARMMCAsmInfoDarwin(const Triple &TheTriple); +}; - class ARMELFMCAsmInfo : public MCAsmInfoELF { - void anchor() override; - public: - explicit ARMELFMCAsmInfo(const Triple &TT); +class ARMELFMCAsmInfo : public MCAsmInfoELF { + void anchor() override; - void setUseIntegratedAssembler(bool Value) override; - }; +public: + explicit ARMELFMCAsmInfo(const Triple &TT); - class ARMCOFFMCAsmInfoMicrosoft : public MCAsmInfoMicrosoft { - void anchor() override; - public: - explicit ARMCOFFMCAsmInfoMicrosoft(); - }; + void setUseIntegratedAssembler(bool Value) override; +}; - class ARMCOFFMCAsmInfoGNU : public MCAsmInfoGNUCOFF { - void anchor() override; - public: - explicit ARMCOFFMCAsmInfoGNU(); - }; +class ARMCOFFMCAsmInfoMicrosoft : public MCAsmInfoMicrosoft { + void anchor() override; + +public: + explicit ARMCOFFMCAsmInfoMicrosoft(); +}; + +class ARMCOFFMCAsmInfoGNU : public MCAsmInfoGNUCOFF { + void anchor() override; + +public: + explicit ARMCOFFMCAsmInfoGNU(); +}; } // namespace llvm diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index 9146d4def75a..75dde8008fca 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -63,8 +63,8 @@ public: return false; } void visitUsedExpr(MCStreamer &Streamer) const override; - MCSection *findAssociatedSection() const override { - return getSubExpr()->findAssociatedSection(); + MCFragment *findAssociatedFragment() const override { + return getSubExpr()->findAssociatedFragment(); } // There are no TLS ARMMCExprs at the moment. diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 21c9fc1e58b2..8c8c249addb5 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -24,6 +24,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -134,101 +135,11 @@ std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) { bool isThumb = TT.getArch() == Triple::thumb || TT.getArch() == Triple::thumbeb; - bool NoCPU = CPU == "generic" || CPU.empty(); std::string ARMArchFeature; - switch (TT.getSubArch()) { - default: - llvm_unreachable("invalid sub-architecture for ARM"); - case Triple::ARMSubArch_v8: - if (NoCPU) - // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, - // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, - // FeatureT2XtPk, FeatureCrypto, FeatureCRC - ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm," - "+trustzone,+t2xtpk,+crypto,+crc"; - else - // Use CPU to figure out the exact features - ARMArchFeature = "+v8"; - break; - case Triple::ARMSubArch_v8_1a: - if (NoCPU) - // v8.1a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, - // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, - // FeatureT2XtPk, FeatureCrypto, FeatureCRC, FeatureV8_1a - ARMArchFeature = "+v8.1a,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm," - "+trustzone,+t2xtpk,+crypto,+crc"; - else - // Use CPU to figure out the exact features - ARMArchFeature = "+v8.1a"; - break; - case Triple::ARMSubArch_v7m: - isThumb = true; - if (NoCPU) - // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass - ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass"; - else - // Use CPU to figure out the exact features. - ARMArchFeature = "+v7"; - break; - case Triple::ARMSubArch_v7em: - if (NoCPU) - // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, - // FeatureT2XtPk, FeatureMClass - ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,+t2xtpk,+mclass"; - else - // Use CPU to figure out the exact features. - ARMArchFeature = "+v7"; - break; - case Triple::ARMSubArch_v7s: - if (NoCPU) - // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS - // Swift - ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+ras"; - else - // Use CPU to figure out the exact features. - ARMArchFeature = "+v7"; - break; - case Triple::ARMSubArch_v7: - // v7 CPUs have lots of different feature sets. If no CPU is specified, - // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return - // the "minimum" feature set and use CPU string to figure out the exact - // features. - if (NoCPU) - // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk - ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk"; - else - // Use CPU to figure out the exact features. - ARMArchFeature = "+v7"; - break; - case Triple::ARMSubArch_v6t2: - ARMArchFeature = "+v6t2"; - break; - case Triple::ARMSubArch_v6k: - ARMArchFeature = "+v6k"; - break; - case Triple::ARMSubArch_v6m: - isThumb = true; - if (NoCPU) - // v6m: FeatureNoARM, FeatureMClass - ARMArchFeature = "+v6m,+noarm,+mclass"; - else - ARMArchFeature = "+v6"; - break; - case Triple::ARMSubArch_v6: - ARMArchFeature = "+v6"; - break; - case Triple::ARMSubArch_v5te: - ARMArchFeature = "+v5te"; - break; - case Triple::ARMSubArch_v5: - ARMArchFeature = "+v5t"; - break; - case Triple::ARMSubArch_v4t: - ARMArchFeature = "+v4t"; - break; - case Triple::NoSubArch: - break; - } + + unsigned ArchID = ARM::parseArch(TT.getArchName()); + if (ArchID != ARM::AK_INVALID && (CPU.empty() || CPU == "generic")) + ARMArchFeature = (ARMArchFeature + "+" + ARM::getArchName(ArchID)).str(); if (isThumb) { if (ARMArchFeature.empty()) diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index fd30623d79af..c2bbc8e828c4 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -86,7 +86,8 @@ MCAsmBackend *createThumbBEAsmBackend(const Target &T, // object file. MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll); + MCCodeEmitter *Emitter, bool RelaxAll, + bool IncrementalLinkerCompatible); /// Construct an ELF Mach-O object writer. MCObjectWriter *createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 95d7ea7c04a3..cfd504e533af 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -150,10 +150,12 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); - if (!A->getFragment()) - Asm.getContext().reportFatalError(Fixup.getLoc(), + if (!A->getFragment()) { + Asm.getContext().reportError(Fixup.getLoc(), "symbol '" + A->getName() + "' can not be undefined in a subtraction expression"); + return; + } uint32_t Value = Writer->getSymbolAddress(*A, Layout); uint32_t Value2 = 0; @@ -163,10 +165,12 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, if (const MCSymbolRefExpr *B = Target.getSymB()) { const MCSymbol *SB = &B->getSymbol(); - if (!SB->getFragment()) - Asm.getContext().reportFatalError(Fixup.getLoc(), + if (!SB->getFragment()) { + Asm.getContext().reportError(Fixup.getLoc(), "symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); + return; + } // Select the appropriate difference relocation type. Type = MachO::ARM_RELOC_HALF_SECTDIFF; @@ -251,10 +255,12 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); - if (!A->getFragment()) - Asm.getContext().reportFatalError(Fixup.getLoc(), + if (!A->getFragment()) { + Asm.getContext().reportError(Fixup.getLoc(), "symbol '" + A->getName() + "' can not be undefined in a subtraction expression"); + return; + } uint32_t Value = Writer->getSymbolAddress(*A, Layout); uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent()); @@ -265,10 +271,12 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, assert(Type == MachO::ARM_RELOC_VANILLA && "invalid reloc for 2 symbols"); const MCSymbol *SB = &B->getSymbol(); - if (!SB->getFragment()) - Asm.getContext().reportFatalError(Fixup.getLoc(), + if (!SB->getFragment()) { + Asm.getContext().reportError(Fixup.getLoc(), "symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); + return; + } // Select the appropriate difference relocation type. Type = MachO::ARM_RELOC_SECTDIFF; @@ -346,13 +354,15 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer, unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned Log2Size; unsigned RelocType = MachO::ARM_RELOC_VANILLA; - if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) + if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) { // If we failed to get fixup kind info, it's because there's no legal // relocation type for the fixup kind. This happens when it's a fixup that's // expected to always be resolvable at assembly time and not have any // relocations needed. - Asm.getContext().reportFatalError(Fixup.getLoc(), - "unsupported relocation on symbol"); + Asm.getContext().reportError(Fixup.getLoc(), + "unsupported relocation on symbol"); + return; + } // If this is a difference or a defined symbol plus an offset, then we need a // scattered relocation entry. Differences always require scattered diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index b680db5c3a78..dad50f2834ee 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -27,8 +27,8 @@ ARMTargetStreamer::~ARMTargetStreamer() {} // The constant pool handling is shared by all ARMTargetStreamer // implementations. -const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr) { - return ConstantPools->addEntry(Streamer, Expr, 4); +const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr, SMLoc Loc) { + return ConstantPools->addEntry(Streamer, Expr, 4, Loc); } void ARMTargetStreamer::emitCurrentConstantPool() { diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp index b993b1be4847..83fa084e60c7 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -37,11 +37,11 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) { } } -MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context, - MCAsmBackend &MAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll) { - return new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS); +MCStreamer *llvm::createARMWinCOFFStreamer( + MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, bool IncrementalLinkerCompatible) { + auto *S = new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS); + S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible); + return S; } diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 090a003424a4..5acb2d46f3e7 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -711,7 +711,6 @@ this in a target-independent way: we should probably fold that (when using "undefined at zero" semantics) to set the "defined at zero" bit and have the code generator expand out the right code. - //===---------------------------------------------------------------------===// Clean up the test/MC/ARM files to have more robust register choices. diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 3b4358b5d9bf..93e0ac4aa320 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -13,6 +13,7 @@ #include "Thumb1FrameLowering.h" #include "ARMMachineFunctionInfo.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -84,7 +85,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented"); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); @@ -100,7 +100,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, assert(NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); const std::vector &CSI = MFI->getCalleeSavedInfo(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc dl; + unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned BasePtr = RegInfo->getBaseRegister(); int CFAOffset = 0; @@ -168,8 +172,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { ++MBBI; - if (MBBI != MBB.end()) - dl = MBBI->getDebugLoc(); } // Determine starting offsets of spill areas. @@ -232,11 +234,10 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, } } - // Adjust FP so it point to the stack slot that contains the previous FP. if (HasFP) { - FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI) - + GPRCS1Size + ArgRegsSaveSize; + FramePtrOffsetInBlock += + MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4) .setMIFlags(MachineInstr::FrameSetup)); @@ -321,11 +322,8 @@ static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) { void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert((MBBI->getOpcode() == ARM::tBX_RET || - MBBI->getOpcode() == ARM::tPOP_RET) && - "Can only insert epilog into returning blocks"); - DebugLoc dl = MBBI->getDebugLoc(); + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); const ThumbRegisterInfo *RegInfo = @@ -377,9 +375,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, ARM::SP) .addReg(FramePtr)); } else { - if (MBBI->getOpcode() == ARM::tBX_RET && - &MBB.front() != MBBI && - std::prev(MBBI)->getOpcode() == ARM::tPOP) { + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && + &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = std::prev(MBBI); if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes)) emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); @@ -388,66 +385,189 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, } } - bool IsV4PopReturn = false; - for (const CalleeSavedInfo &CSI : MFI->getCalleeSavedInfo()) + if (needPopSpecialFixUp(MF)) { + bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true); + (void)Done; + assert(Done && "Emission of the special fixup failed!?"); + } +} + +bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { + if (!needPopSpecialFixUp(*MBB.getParent())) + return true; + + MachineBasicBlock *TmpMBB = const_cast(&MBB); + return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false); +} + +bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { + ARMFunctionInfo *AFI = + const_cast(&MF)->getInfo(); + if (AFI->getArgRegsSaveSize()) + return true; + + // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up. + for (const CalleeSavedInfo &CSI : MF.getFrameInfo()->getCalleeSavedInfo()) if (CSI.getReg() == ARM::LR) - IsV4PopReturn = true; - IsV4PopReturn &= STI.hasV4TOps() && !STI.hasV5TOps(); + return true; - // Unlike T2 and ARM mode, the T1 pop instruction cannot restore - // to LR, and we can't pop the value directly to the PC since - // we need to update the SP after popping the value. So instead - // we have to emit: - // POP {r3} - // ADD sp, #offset - // BX r3 - // If this would clobber a return value, then generate this sequence instead: - // MOV ip, r3 - // POP {r3} - // ADD sp, #offset - // MOV lr, r3 - // MOV r3, ip - // BX lr - if (ArgRegsSaveSize || IsV4PopReturn) { - // Get the last instruction, tBX_RET - MBBI = MBB.getLastNonDebugInstr(); - assert (MBBI->getOpcode() == ARM::tBX_RET); - DebugLoc dl = MBBI->getDebugLoc(); + return false; +} - if (AFI->getReturnRegsCount() <= 3) { - // Epilogue: pop saved LR to R3 and branch off it. - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) - .addReg(ARM::R3, RegState::Define); +bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, + bool DoIt) const { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + const ThumbRegisterInfo *RegInfo = + static_cast(STI.getRegisterInfo()); - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); - - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) - .addReg(ARM::R3, RegState::Kill); - AddDefaultPred(MIB); - MIB.copyImplicitOps(&*MBBI); - // erase the old tBX_RET instruction - MBB.erase(MBBI); - } else { - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(ARM::R12, RegState::Define) - .addReg(ARM::R3, RegState::Kill)); - - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) - .addReg(ARM::R3, RegState::Define); - - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); - - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(ARM::LR, RegState::Define) - .addReg(ARM::R3, RegState::Kill)); - - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(ARM::R3, RegState::Define) - .addReg(ARM::R12, RegState::Kill)); - // Keep the tBX_RET instruction + // If MBBI is a return instruction, or is a tPOP followed by a return + // instruction in the successor BB, we may be able to directly restore + // LR in the PC. + // This is only possible with v5T ops (v4T can't change the Thumb bit via + // a POP PC instruction), and only if we do not need to emit any SP update. + // Otherwise, we need a temporary register to pop the value + // and copy that value into LR. + auto MBBI = MBB.getFirstTerminator(); + bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize; + if (CanRestoreDirectly) { + if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB) + CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET || + MBBI->getOpcode() == ARM::tPOP_RET); + else { + auto MBBI_prev = MBBI; + MBBI_prev--; + assert(MBBI_prev->getOpcode() == ARM::tPOP); + assert(MBB.succ_size() == 1); + if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET) + MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET. + else + CanRestoreDirectly = false; } } + + if (CanRestoreDirectly) { + if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET) + return true; + MachineInstrBuilder MIB = + AddDefaultPred( + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))); + // Copy implicit ops and popped registers, if any. + for (auto MO: MBBI->operands()) + if (MO.isReg() && (MO.isImplicit() || MO.isDef())) + MIB.addOperand(MO); + MIB.addReg(ARM::PC, RegState::Define); + // Erase the old instruction (tBX_RET or tPOP). + MBB.erase(MBBI); + return true; + } + + // Look for a temporary register to use. + // First, compute the liveness information. + LivePhysRegs UsedRegs(STI.getRegisterInfo()); + UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true); + // The semantic of pristines changed recently and now, + // the callee-saved registers that are touched in the function + // are not part of the pristines set anymore. + // Add those callee-saved now. + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + for (unsigned i = 0; CSRegs[i]; ++i) + UsedRegs.addReg(CSRegs[i]); + + DebugLoc dl = DebugLoc(); + if (MBBI != MBB.end()) { + dl = MBBI->getDebugLoc(); + auto InstUpToMBBI = MBB.end(); + while (InstUpToMBBI != MBBI) + // The pre-decrement is on purpose here. + // We want to have the liveness right before MBBI. + UsedRegs.stepBackward(*--InstUpToMBBI); + } + + // Look for a register that can be directly use in the POP. + unsigned PopReg = 0; + // And some temporary register, just in case. + unsigned TemporaryReg = 0; + BitVector PopFriendly = + TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID)); + assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); + // Rebuild the GPRs from the high registers because they are removed + // form the GPR reg class for thumb1. + BitVector GPRsNoLRSP = + TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID)); + GPRsNoLRSP |= PopFriendly; + GPRsNoLRSP.reset(ARM::LR); + GPRsNoLRSP.reset(ARM::SP); + GPRsNoLRSP.reset(ARM::PC); + for (int Register = GPRsNoLRSP.find_first(); Register != -1; + Register = GPRsNoLRSP.find_next(Register)) { + if (!UsedRegs.contains(Register)) { + // Remember the first pop-friendly register and exit. + if (PopFriendly.test(Register)) { + PopReg = Register; + TemporaryReg = 0; + break; + } + // Otherwise, remember that the register will be available to + // save a pop-friendly register. + TemporaryReg = Register; + } + } + + if (!DoIt && !PopReg && !TemporaryReg) + return false; + + assert((PopReg || TemporaryReg) && "Cannot get LR"); + + if (TemporaryReg) { + assert(!PopReg && "Unnecessary MOV is about to be inserted"); + PopReg = PopFriendly.find_first(); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(TemporaryReg, RegState::Define) + .addReg(PopReg, RegState::Kill)); + } + + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { + // We couldn't use the direct restoration above, so + // perform the opposite conversion: tPOP_RET to tPOP. + MachineInstrBuilder MIB = + AddDefaultPred( + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))); + bool Popped = false; + for (auto MO: MBBI->operands()) + if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && + MO.getReg() != ARM::PC) { + MIB.addOperand(MO); + if (!MO.isImplicit()) + Popped = true; + } + // Is there anything left to pop? + if (!Popped) + MBB.erase(MIB.getInstr()); + // Erase the old instruction. + MBB.erase(MBBI); + MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET))); + } + + assert(PopReg && "Do not know how to get LR"); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) + .addReg(PopReg, RegState::Define); + + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); + + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::LR, RegState::Define) + .addReg(PopReg, RegState::Kill)); + + if (TemporaryReg) + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(PopReg, RegState::Define) + .addReg(TemporaryReg, RegState::Kill)); + + return true; } bool Thumb1FrameLowering:: @@ -461,8 +581,6 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, DebugLoc DL; const TargetInstrInfo &TII = *STI.getInstrInfo(); - if (MI != MBB.end()) DL = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)); AddDefaultPred(MIB); for (unsigned i = CSI.size(); i != 0; --i) { @@ -501,31 +619,38 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, const TargetInstrInfo &TII = *STI.getInstrInfo(); bool isVarArg = AFI->getArgRegsSaveSize() > 0; - DebugLoc DL = MI->getDebugLoc(); + DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)); AddDefaultPred(MIB); - bool NumRegs = false; + bool NeedsPop = false; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (Reg == ARM::LR) { - // Special epilogue for vararg functions. See emitEpilogue - if (isVarArg) + if (MBB.succ_empty()) { + // Special epilogue for vararg functions. See emitEpilogue + if (isVarArg) + continue; + // ARMv4T requires BX, see emitEpilogue + if (!STI.hasV5TOps()) + continue; + Reg = ARM::PC; + (*MIB).setDesc(TII.get(ARM::tPOP_RET)); + if (MI != MBB.end()) + MIB.copyImplicitOps(&*MI); + MI = MBB.erase(MI); + } else + // LR may only be popped into PC, as part of return sequence. + // If this isn't the return sequence, we'll need emitPopSpecialFixUp + // to restore LR the hard way. continue; - // ARMv4T requires BX, see emitEpilogue - if (STI.hasV4TOps() && !STI.hasV5TOps()) - continue; - Reg = ARM::PC; - (*MIB).setDesc(TII.get(ARM::tPOP_RET)); - MIB.copyImplicitOps(&*MI); - MI = MBB.erase(MI); } MIB.addReg(Reg, getDefRegState(true)); - NumRegs = true; + NeedsPop = true; } // It's illegal to emit pop instruction without operands. - if (NumRegs) + if (NeedsPop) MBB.insert(MI, &*MIB); else MF.DeleteMachineInstr(MIB); diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h index 31d57325ebd6..812f9830824d 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.h +++ b/lib/Target/ARM/Thumb1FrameLowering.h @@ -45,6 +45,42 @@ public: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + + /// Check whether or not the given \p MBB can be used as a epilogue + /// for the target. + /// The epilogue will be inserted before the first terminator of that block. + /// This method is used by the shrink-wrapping pass to decide if + /// \p MBB will be correctly handled by the target. + bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override; + +private: + /// Check if the frame lowering of \p MF needs a special fixup + /// code sequence for the epilogue. + /// Unlike T2 and ARM mode, the T1 pop instruction cannot restore + /// to LR, and we can't pop the value directly to the PC when + /// we need to update the SP after popping the value. So instead + /// we have to emit: + /// POP {r3} + /// ADD sp, #offset + /// BX r3 + /// If this would clobber a return value, then generate this sequence instead: + /// MOV ip, r3 + /// POP {r3} + /// ADD sp, #offset + /// MOV lr, r3 + /// MOV r3, ip + /// BX lr + bool needPopSpecialFixUp(const MachineFunction &MF) const; + + /// Emit the special fixup code sequence for the epilogue. + /// \see needPopSpecialFixUp for more details. + /// \p DoIt, tells this method whether or not to actually insert + /// the code sequence in \p MBB. I.e., when \p DoIt is false, + /// \p MBB is left untouched. + /// \returns For \p DoIt == true: True when the emission succeeded + /// false otherwise. For \p DoIt == false: True when the emission + /// would have been possible, false otherwise. + bool emitPopSpecialFixUp(MachineBasicBlock &MBB, bool DoIt) const; }; } // End llvm namespace diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 216e776932be..530e1d33839a 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -84,11 +84,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOStore, - MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); @@ -112,11 +110,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOLoad, - MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 68736bc1decd..bf0498dfda69 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -256,8 +256,8 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); // Finalize the bundle. - MachineBasicBlock::instr_iterator LI = LastITMI; - finalizeBundle(MBB, InsertPos.getInstrIterator(), std::next(LI)); + finalizeBundle(MBB, InsertPos.getInstrIterator(), + ++LastITMI->getIterator()); Modified = true; ++NumITs; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index dc74f4e38ff8..4da769f23280 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -131,11 +131,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOStore, - MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass || RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass || @@ -171,11 +169,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOLoad, - MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index d9ab824995c1..bcd0e5751258 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -125,7 +125,10 @@ namespace { { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 }, { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 }, - // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent + // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent. + // tSTMIA_UPD is a change in semantics which can only be used if the base + // register is killed. This difference is correctly handled elsewhere. + { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 } }; @@ -210,12 +213,12 @@ Thumb2SizeReduce::Thumb2SizeReduce(std::function Ftor) for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { unsigned FromOpc = ReduceTable[i].WideOpc; if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) - assert(false && "Duplicated entries?"); + llvm_unreachable("Duplicated entries?"); } } static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { - for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs) + for (const MCPhysReg *Regs = MCID.getImplicitDefs(); *Regs; ++Regs) if (*Regs == ARM::CPSR) return true; return false; @@ -435,6 +438,14 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, isLdStMul = true; break; } + case ARM::t2STMIA: { + // If the base register is killed, we don't care what its value is after the + // instruction, so we can use an updating STMIA. + if (!MI->getOperand(0).isKill()) + return false; + + break; + } case ARM::t2LDMIA_RET: { unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg != ARM::SP) @@ -492,6 +503,12 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, // Add the 16-bit load / store instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); + + // tSTMIA_UPD takes a defining register operand. We've already checked that + // the register is killed, so mark it as dead here. + if (Entry.WideOpc == ARM::t2STMIA) + MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead); + if (!isLdStMul) { MIB.addOperand(MI->getOperand(0)); MIB.addOperand(MI->getOperand(1)); @@ -633,10 +650,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; - if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && - STI->avoidMOVsShifterOperand()) + if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) // Don't issue movs with shifter operand for some CPUs unless we - // are optimizing / minimizing for size. + // are optimizing for size. return false; unsigned Reg0 = MI->getOperand(0).getReg(); @@ -660,11 +676,13 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, } } else if (Reg0 != Reg1) { // Try to commute the operands to make it a 2-address instruction. - unsigned CommOpIdx1, CommOpIdx2; + unsigned CommOpIdx1 = 1; + unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex; if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || - CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0) + MI->getOperand(CommOpIdx2).getReg() != Reg0) return false; - MachineInstr *CommutedMI = TII->commuteInstruction(MI); + MachineInstr *CommutedMI = + TII->commuteInstruction(MI, false, CommOpIdx1, CommOpIdx2); if (!CommutedMI) return false; } @@ -750,10 +768,9 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) return false; - if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && - STI->avoidMOVsShifterOperand()) + if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) // Don't issue movs with shifter operand for some CPUs unless we - // are optimizing / minimizing for size. + // are optimizing for size. return false; unsigned Limit = ~0U; @@ -1012,9 +1029,9 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { TII = static_cast(STI->getInstrInfo()); - // Optimizing / minimizing size? - OptimizeSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); - MinimizeSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize); + // Optimizing / minimizing size? Minimizing size implies optimizing for size. + OptimizeSize = MF.getFunction()->optForSize(); + MinimizeSize = MF.getFunction()->optForMinSize(); BlockInfo.clear(); BlockInfo.resize(MF.getNumBlockIDs()); diff --git a/lib/Target/AVR/AVR.td b/lib/Target/AVR/AVR.td new file mode 100644 index 000000000000..9e80717cd680 --- /dev/null +++ b/lib/Target/AVR/AVR.td @@ -0,0 +1,563 @@ +//===-- AVR.td - Describe the AVR Target Machine ----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// This is the top level entry point for the AVR target. +//===---------------------------------------------------------------------===// + +//===---------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===---------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===---------------------------------------------------------------------===// +// AVR Subtarget Features. +//===---------------------------------------------------------------------===// + +// :TODO: Implement the skip errata, see `gcc/config/avr/avr-arch.h` for details +// :TODO: We define all devices with SRAM to have all variants of LD/ST/LDD/STD. +// In reality, avr1 (no SRAM) has one variant each of `LD` and `ST`. +// avr2 (with SRAM) adds the rest of the variants. +// :TODO: s/AVRTiny/Tiny + + +// A feature set aggregates features, grouping them. We don't want to create a +// new member in AVRSubtarget (to store a value) for each set because we do not +// care if the set is supported, only the subfeatures inside the set. We fix +// this by simply setting the same dummy member for all feature sets, which is +// then ignored. +class FeatureSet i> + : SubtargetFeature; + +// A family of microcontrollers, defining a set of supported features. +class Family i> + : FeatureSet; + +// The device has SRAM, and supports the bare minimum of +// SRAM-relevant instructions. +// +// These are: +// LD - all 9 variants +// ST - all 9 variants +// LDD - two variants for Y and Z +// STD - two variants for Y and Z +// `LDS Rd, K` +// `STS k, Rr` +// `PUSH`/`POP` +def FeatureSRAM : SubtargetFeature<"sram", "m_hasSRAM", "true", + "The device has random access memory">; + +// The device supports the `JMP k` and `CALL k` instructions. +def FeatureJMPCALL : SubtargetFeature<"jmpcall", "m_hasJMPCALL", "true", + "The device supports the `JMP` and " + "`CALL` instructions">; + + +// The device supports the indirect branches `IJMP` and `ICALL`. +def FeatureIJMPCALL : SubtargetFeature<"ijmpcall", "m_hasIJMPCALL", + "true", + "The device supports `IJMP`/`ICALL`" + "instructions">; + +// The device supports the extended indirect branches `EIJMP` and `EICALL`. +def FeatureEIJMPCALL : SubtargetFeature<"eijmpcall", "m_hasEIJMPCALL", + "true", "The device supports the " + "`EIJMP`/`EICALL` instructions">; + +// The device supports `ADDI Rd, K`, `SUBI Rd, K`. +def FeatureADDSUBIW : SubtargetFeature<"addsubiw", "m_hasADDSUBIW", + "true", "Enable 16-bit register-immediate " + "addition and subtraction instructions">; + +// The device has an 8-bit stack pointer (SP) register. +def FeatureSmallStack : SubtargetFeature<"smallstack", "m_hasSmallStack", + "true", "The device has an 8-bit " + "stack pointer">; + +// The device supports the 16-bit GPR pair MOVW instruction. +def FeatureMOVW : SubtargetFeature<"movw", "m_hasMOVW", "true", + "The device supports the 16-bit MOVW " + "instruction">; + +// The device supports the `LPM` instruction, with implied destination being r0. +def FeatureLPM : SubtargetFeature<"lpm", "m_hasLPM", "true", + "The device supports the `LPM` instruction">; + +// The device supports the `LPM Rd, Z[+] instruction. +def FeatureLPMX : SubtargetFeature<"lpmx", "m_hasLPMX", "true", + "The device supports the `LPM Rd, Z[+]` " + "instruction">; + +// The device supports the `ELPM` instruction. +def FeatureELPM : SubtargetFeature<"elpm", "m_hasELPM", "true", + "The device supports the ELPM instruction">; + +// The device supports the `ELPM Rd, Z[+]` instructions. +def FeatureELPMX : SubtargetFeature<"elpmx", "m_hasELPMX", "true", + "The device supports the `ELPM Rd, Z[+]` " + "instructions">; + +// The device supports the `SPM` instruction. +def FeatureSPM : SubtargetFeature<"spm", "m_hasSPM", "true", + "The device supports the `SPM` instruction">; + +// The device supports the `SPM Z+` instruction. +def FeatureSPMX : SubtargetFeature<"spmx", "m_hasSPMX", "true", + "The device supports the `SPM Z+` " + "instruction">; + +// The device supports the `DES k` instruction. +def FeatureDES : SubtargetFeature<"des", "m_hasDES", "true", + "The device supports the `DES k` encryption " + "instruction">; + +// The device supports the Read-Write-Modify instructions +// XCH, LAS, LAC, and LAT. +def FeatureRMW : SubtargetFeature<"rmw", "m_supportsRMW", "true", + "The device supports the read-write-modify " + "instructions: XCH, LAS, LAC, LAT">; + +// The device supports the `[F]MUL[S][U]` family of instructions. +def FeatureMultiplication : SubtargetFeature<"mul", "m_supportsMultiplication", + "true", "The device supports the " + "multiplication instructions">; + +// The device supports the `BREAK` instruction. +def FeatureBREAK : SubtargetFeature<"break", "m_hasBREAK", "true", + "The device supports the `BREAK` debugging " + "instruction">; + +// The device has instruction encodings specific to the Tiny core. +def FeatureTinyEncoding : SubtargetFeature<"tinyencoding", + "m_hasTinyEncoding", "true", + "The device has Tiny core specific " + "instruction encodings">; + +class ELFArch : SubtargetFeature<"", "ELFArch", + !strconcat("ELF::",name), "">; + +// ELF e_flags architecture values +def ELFArchAVR1 : ELFArch<"EF_AVR_ARCH_AVR1">; +def ELFArchAVR2 : ELFArch<"EF_AVR_ARCH_AVR2">; +def ELFArchAVR25 : ELFArch<"EF_AVR_ARCH_AVR25">; +def ELFArchAVR3 : ELFArch<"EF_AVR_ARCH_AVR3">; +def ELFArchAVR31 : ELFArch<"EF_AVR_ARCH_AVR31">; +def ELFArchAVR35 : ELFArch<"EF_AVR_ARCH_AVR35">; +def ELFArchAVR4 : ELFArch<"EF_AVR_ARCH_AVR4">; +def ELFArchAVR5 : ELFArch<"EF_AVR_ARCH_AVR5">; +def ELFArchAVR51 : ELFArch<"EF_AVR_ARCH_AVR51">; +def ELFArchAVR6 : ELFArch<"EF_AVR_ARCH_AVR6">; +def ELFArchAVRTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">; +def ELFArchXMEGA1 : ELFArch<"EF_AVR_ARCH_XMEGA1">; +def ELFArchXMEGA2 : ELFArch<"EF_AVR_ARCH_XMEGA2">; +def ELFArchXMEGA3 : ELFArch<"EF_AVR_ARCH_XMEGA3">; +def ELFArchXMEGA4 : ELFArch<"EF_AVR_ARCH_XMEGA4">; +def ELFArchXMEGA5 : ELFArch<"EF_AVR_ARCH_XMEGA5">; +def ELFArchXMEGA6 : ELFArch<"EF_AVR_ARCH_XMEGA6">; +def ELFArchXMEGA7 : ELFArch<"EF_AVR_ARCH_XMEGA7">; + +//===---------------------------------------------------------------------===// +// AVR Families +//===---------------------------------------------------------------------===// + +// The device has at least the bare minimum that **every** single AVR +// device should have. +def FamilyAVR0 : Family<"avr0", []>; + +def FamilyAVR1 : Family<"avr1", [FamilyAVR0, FeatureLPM]>; + +def FamilyAVR2 : Family<"avr2", + [FamilyAVR1, FeatureIJMPCALL, FeatureADDSUBIW, + FeatureSRAM]>; + +def FamilyAVR25 : Family<"avr25", + [FamilyAVR2, FeatureMOVW, FeatureLPMX, + FeatureSPM, FeatureBREAK]>; + +def FamilyAVR3 : Family<"avr3", + [FamilyAVR2, FeatureJMPCALL]>; + +def FamilyAVR31 : Family<"avr31", + [FamilyAVR3, FeatureELPM]>; + +def FamilyAVR35 : Family<"avr35", + [FamilyAVR3, FeatureMOVW, FeatureLPMX, + FeatureSPM, FeatureBREAK]>; + +def FamilyAVR4 : Family<"avr4", + [FamilyAVR2, FeatureMultiplication, + FeatureMOVW, FeatureLPMX, FeatureSPM, + FeatureBREAK]>; + +def FamilyAVR5 : Family<"avr5", + [FamilyAVR3, FeatureMultiplication, + FeatureMOVW, FeatureLPMX, FeatureSPM, + FeatureBREAK]>; + +def FamilyAVR51 : Family<"avr51", + [FamilyAVR5, FeatureELPM, FeatureELPMX]>; + +def FamilyAVR6 : Family<"avr6", + [FamilyAVR51]>; + +def FamilyAVRTiny : Family<"avrtiny", + [FamilyAVR0, FeatureBREAK, FeatureSRAM, + FeatureTinyEncoding]>; + +def FamilyXMEGA : Family<"xmega", + [FamilyAVR51, FeatureEIJMPCALL, FeatureSPMX, + FeatureDES]>; + +def FamilyXMEGAU : Family<"xmegau", + [FamilyXMEGA, FeatureRMW]>; + +def FeatureSetSpecial : FeatureSet<"special", + "Enable use of the entire instruction " + "set - used for debugging", + [FeatureSRAM, FeatureJMPCALL, + FeatureIJMPCALL, FeatureEIJMPCALL, + FeatureADDSUBIW, FeatureMOVW, + FeatureLPM, FeatureLPMX, FeatureELPM, + FeatureELPMX, FeatureSPM, FeatureSPMX, + FeatureDES, FeatureRMW, + FeatureMultiplication, FeatureBREAK]>; + +//===---------------------------------------------------------------------===// +// AVR microcontrollers supported. +//===---------------------------------------------------------------------===// + +class Device ExtraFeatures = []> + : Processor; + +// Generic MCUs +// Note that several versions of GCC has strange ELF architecture +// settings for backwards compatibility - see `gas/config/tc-avr.c` +// in AVR binutils. We do not replicate this. +def : Device<"avr1", FamilyAVR1, ELFArchAVR1>; +def : Device<"avr2", FamilyAVR2, ELFArchAVR2>; +def : Device<"avr25", FamilyAVR25, ELFArchAVR25>; +def : Device<"avr3", FamilyAVR3, ELFArchAVR3>; +def : Device<"avr31", FamilyAVR31, ELFArchAVR31>; +def : Device<"avr35", FamilyAVR35, ELFArchAVR35>; +def : Device<"avr4", FamilyAVR4, ELFArchAVR4>; +def : Device<"avr5", FamilyAVR5, ELFArchAVR5>; +def : Device<"avr51", FamilyAVR51, ELFArchAVR51>; +def : Device<"avr6", FamilyAVR6, ELFArchAVR6>; +def : Device<"avrxmega1", FamilyXMEGA, ELFArchXMEGA1>; +def : Device<"avrxmega2", FamilyXMEGA, ELFArchXMEGA2>; +def : Device<"avrxmega3", FamilyXMEGA, ELFArchXMEGA3>; +def : Device<"avrxmega4", FamilyXMEGA, ELFArchXMEGA4>; +def : Device<"avrxmega5", FamilyXMEGA, ELFArchXMEGA5>; +def : Device<"avrxmega6", FamilyXMEGA, ELFArchXMEGA6>; +def : Device<"avrxmega7", FamilyXMEGA, ELFArchXMEGA7>; +def : Device<"avrtiny", FamilyAVRTiny, ELFArchAVRTiny>; + +// Specific MCUs +def : Device<"at90s1200", FamilyAVR0, ELFArchAVR1>; +def : Device<"attiny11", FamilyAVR1, ELFArchAVR1>; +def : Device<"attiny12", FamilyAVR1, ELFArchAVR1>; +def : Device<"attiny15", FamilyAVR1, ELFArchAVR1>; +def : Device<"attiny28", FamilyAVR1, ELFArchAVR1>; +def : Device<"at90s2313", FamilyAVR2, ELFArchAVR2>; +def : Device<"at90s2323", FamilyAVR2, ELFArchAVR2>; +def : Device<"at90s2333", FamilyAVR2, ELFArchAVR2>; +def : Device<"at90s2343", FamilyAVR2, ELFArchAVR2>; +def : Device<"attiny22", FamilyAVR2, ELFArchAVR2>; +def : Device<"attiny26", FamilyAVR2, ELFArchAVR2, [FeatureLPMX]>; +def : Device<"at86rf401", FamilyAVR2, ELFArchAVR25, + [FeatureMOVW, FeatureLPMX]>; +def : Device<"at90s4414", FamilyAVR2, ELFArchAVR2>; +def : Device<"at90s4433", FamilyAVR2, ELFArchAVR2>; +def : Device<"at90s4434", FamilyAVR2, ELFArchAVR2>; +def : Device<"at90s8515", FamilyAVR2, ELFArchAVR2>; +def : Device<"at90c8534", FamilyAVR2, ELFArchAVR2>; +def : Device<"at90s8535", FamilyAVR2, ELFArchAVR2>; +def : Device<"ata5272", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny13", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny13a", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny2313", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny2313a", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny24", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny24a", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny4313", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny44", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny44a", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny84", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny84a", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny25", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny45", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny85", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny261", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny261a", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny461", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny461a", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny861", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny861a", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny87", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny43u", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny48", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny88", FamilyAVR25, ELFArchAVR25>; +def : Device<"attiny828", FamilyAVR25, ELFArchAVR25>; +def : Device<"at43usb355", FamilyAVR3, ELFArchAVR3>; +def : Device<"at76c711", FamilyAVR3, ELFArchAVR3>; +def : Device<"atmega103", FamilyAVR31, ELFArchAVR31>; +def : Device<"at43usb320", FamilyAVR31, ELFArchAVR31>; +def : Device<"attiny167", FamilyAVR35, ELFArchAVR35>; +def : Device<"at90usb82", FamilyAVR35, ELFArchAVR35>; +def : Device<"at90usb162", FamilyAVR35, ELFArchAVR35>; +def : Device<"ata5505", FamilyAVR35, ELFArchAVR35>; +def : Device<"atmega8u2", FamilyAVR35, ELFArchAVR35>; +def : Device<"atmega16u2", FamilyAVR35, ELFArchAVR35>; +def : Device<"atmega32u2", FamilyAVR35, ELFArchAVR35>; +def : Device<"attiny1634", FamilyAVR35, ELFArchAVR35>; +def : Device<"atmega8", FamilyAVR4, ELFArchAVR4>; // FIXME: family may be wrong +def : Device<"ata6289", FamilyAVR4, ELFArchAVR4>; +def : Device<"atmega8a", FamilyAVR4, ELFArchAVR4>; +def : Device<"ata6285", FamilyAVR4, ELFArchAVR4>; +def : Device<"ata6286", FamilyAVR4, ELFArchAVR4>; +def : Device<"atmega48", FamilyAVR4, ELFArchAVR4>; +def : Device<"atmega48a", FamilyAVR4, ELFArchAVR4>; +def : Device<"atmega48pa", FamilyAVR4, ELFArchAVR4>; +def : Device<"atmega48p", FamilyAVR4, ELFArchAVR4>; +def : Device<"atmega88", FamilyAVR4, ELFArchAVR4>; +def : Device<"atmega88a", FamilyAVR4, ELFArchAVR4>; +def : Device<"atmega88p", FamilyAVR4, ELFArchAVR4>; +def : Device<"atmega88pa", FamilyAVR4, ELFArchAVR4>; +def : Device<"atmega8515", FamilyAVR2, ELFArchAVR4, + [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>; +def : Device<"atmega8535", FamilyAVR2, ELFArchAVR4, + [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>; +def : Device<"atmega8hva", FamilyAVR4, ELFArchAVR4>; +def : Device<"at90pwm1", FamilyAVR4, ELFArchAVR4>; +def : Device<"at90pwm2", FamilyAVR4, ELFArchAVR4>; +def : Device<"at90pwm2b", FamilyAVR4, ELFArchAVR4>; +def : Device<"at90pwm3", FamilyAVR4, ELFArchAVR4>; +def : Device<"at90pwm3b", FamilyAVR4, ELFArchAVR4>; +def : Device<"at90pwm81", FamilyAVR4, ELFArchAVR4>; +def : Device<"ata5790", FamilyAVR5, ELFArchAVR5>; +def : Device<"ata5795", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega16", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega16a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega161", FamilyAVR3, ELFArchAVR5, + [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>; +def : Device<"atmega162", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega163", FamilyAVR3, ELFArchAVR5, + [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>; +def : Device<"atmega164a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega164p", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega164pa", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega165", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega165a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega165p", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega165pa", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega168", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega168a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega168p", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega168pa", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega169", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega169a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega169p", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega169pa", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega32", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega32a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega323", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega324a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega324p", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega324pa", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega325", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega325a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega325p", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega325pa", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega3250", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega3250a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega3250p", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega3250pa", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega328", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega328p", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega329", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega329a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega329p", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega329pa", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega3290", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega3290a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega3290p", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega3290pa", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega406", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega64", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega64a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega640", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega644", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega644a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega644p", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega644pa", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega645", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega645a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega645p", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega649", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega649a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega649p", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega6450", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega6450a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega6450p", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega6490", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega6490a", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega6490p", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega64rfr2", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega644rfr2", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega16hva", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega16hva2", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega16hvb", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega16hvbrevb", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega32hvb", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega32hvbrevb", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega64hve", FamilyAVR5, ELFArchAVR5>; +def : Device<"at90can32", FamilyAVR5, ELFArchAVR5>; +def : Device<"at90can64", FamilyAVR5, ELFArchAVR5>; +def : Device<"at90pwm161", FamilyAVR5, ELFArchAVR5>; +def : Device<"at90pwm216", FamilyAVR5, ELFArchAVR5>; +def : Device<"at90pwm316", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega32c1", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega64c1", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega16m1", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega32m1", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega64m1", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega16u4", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega32u4", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega32u6", FamilyAVR5, ELFArchAVR5>; +def : Device<"at90usb646", FamilyAVR5, ELFArchAVR5>; +def : Device<"at90usb647", FamilyAVR5, ELFArchAVR5>; +def : Device<"at90scr100", FamilyAVR5, ELFArchAVR5>; +def : Device<"at94k", FamilyAVR3, ELFArchAVR5, + [FeatureMultiplication, FeatureMOVW, FeatureLPMX]>; +def : Device<"m3000", FamilyAVR5, ELFArchAVR5>; +def : Device<"atmega128", FamilyAVR51, ELFArchAVR51>; +def : Device<"atmega128a", FamilyAVR51, ELFArchAVR51>; +def : Device<"atmega1280", FamilyAVR51, ELFArchAVR51>; +def : Device<"atmega1281", FamilyAVR51, ELFArchAVR51>; +def : Device<"atmega1284", FamilyAVR51, ELFArchAVR51>; +def : Device<"atmega1284p", FamilyAVR51, ELFArchAVR51>; +def : Device<"atmega128rfa1", FamilyAVR51, ELFArchAVR51>; +def : Device<"atmega128rfr2", FamilyAVR51, ELFArchAVR51>; +def : Device<"atmega1284rfr2", FamilyAVR51, ELFArchAVR51>; +def : Device<"at90can128", FamilyAVR51, ELFArchAVR51>; +def : Device<"at90usb1286", FamilyAVR51, ELFArchAVR51>; +def : Device<"at90usb1287", FamilyAVR51, ELFArchAVR51>; +def : Device<"atmega2560", FamilyAVR6, ELFArchAVR6>; +def : Device<"atmega2561", FamilyAVR6, ELFArchAVR6>; +def : Device<"atmega256rfr2", FamilyAVR6, ELFArchAVR6>; +def : Device<"atmega2564rfr2", FamilyAVR6, ELFArchAVR6>; +def : Device<"atxmega16a4", FamilyXMEGA, ELFArchXMEGA2>; +def : Device<"atxmega16a4u", FamilyXMEGAU, ELFArchXMEGA2>; +def : Device<"atxmega16c4", FamilyXMEGAU, ELFArchXMEGA2>; +def : Device<"atxmega16d4", FamilyXMEGA, ELFArchXMEGA2>; +def : Device<"atxmega32a4", FamilyXMEGA, ELFArchXMEGA2>; +def : Device<"atxmega32a4u", FamilyXMEGAU, ELFArchXMEGA2>; +def : Device<"atxmega32c4", FamilyXMEGAU, ELFArchXMEGA2>; +def : Device<"atxmega32d4", FamilyXMEGA, ELFArchXMEGA2>; +def : Device<"atxmega32e5", FamilyXMEGA, ELFArchXMEGA2>; +def : Device<"atxmega16e5", FamilyXMEGA, ELFArchXMEGA2>; +def : Device<"atxmega8e5", FamilyXMEGA, ELFArchXMEGA2>; +def : Device<"atxmega32x1", FamilyXMEGA, ELFArchXMEGA2>; +def : Device<"atxmega64a3", FamilyXMEGA, ELFArchXMEGA4>; +def : Device<"atxmega64a3u", FamilyXMEGAU, ELFArchXMEGA4>; +def : Device<"atxmega64a4u", FamilyXMEGAU, ELFArchXMEGA4>; +def : Device<"atxmega64b1", FamilyXMEGAU, ELFArchXMEGA4>; +def : Device<"atxmega64b3", FamilyXMEGAU, ELFArchXMEGA4>; +def : Device<"atxmega64c3", FamilyXMEGAU, ELFArchXMEGA4>; +def : Device<"atxmega64d3", FamilyXMEGA, ELFArchXMEGA4>; +def : Device<"atxmega64d4", FamilyXMEGA, ELFArchXMEGA4>; +def : Device<"atxmega64a1", FamilyXMEGA, ELFArchXMEGA5>; +def : Device<"atxmega64a1u", FamilyXMEGAU, ELFArchXMEGA5>; +def : Device<"atxmega128a3", FamilyXMEGA, ELFArchXMEGA6>; +def : Device<"atxmega128a3u", FamilyXMEGAU, ELFArchXMEGA6>; +def : Device<"atxmega128b1", FamilyXMEGAU, ELFArchXMEGA6>; +def : Device<"atxmega128b3", FamilyXMEGAU, ELFArchXMEGA6>; +def : Device<"atxmega128c3", FamilyXMEGAU, ELFArchXMEGA6>; +def : Device<"atxmega128d3", FamilyXMEGA, ELFArchXMEGA6>; +def : Device<"atxmega128d4", FamilyXMEGA, ELFArchXMEGA6>; +def : Device<"atxmega192a3", FamilyXMEGA, ELFArchXMEGA6>; +def : Device<"atxmega192a3u", FamilyXMEGAU, ELFArchXMEGA6>; +def : Device<"atxmega192c3", FamilyXMEGAU, ELFArchXMEGA6>; +def : Device<"atxmega192d3", FamilyXMEGA, ELFArchXMEGA6>; +def : Device<"atxmega256a3", FamilyXMEGA, ELFArchXMEGA6>; +def : Device<"atxmega256a3u", FamilyXMEGAU, ELFArchXMEGA6>; +def : Device<"atxmega256a3b", FamilyXMEGA, ELFArchXMEGA6>; +def : Device<"atxmega256a3bu", FamilyXMEGAU, ELFArchXMEGA6>; +def : Device<"atxmega256c3", FamilyXMEGAU, ELFArchXMEGA6>; +def : Device<"atxmega256d3", FamilyXMEGA, ELFArchXMEGA6>; +def : Device<"atxmega384c3", FamilyXMEGAU, ELFArchXMEGA6>; +def : Device<"atxmega384d3", FamilyXMEGA, ELFArchXMEGA6>; +def : Device<"atxmega128a1", FamilyXMEGA, ELFArchXMEGA7>; +def : Device<"atxmega128a1u", FamilyXMEGAU, ELFArchXMEGA7>; +def : Device<"atxmega128a4u", FamilyXMEGAU, ELFArchXMEGA7>; +def : Device<"attiny4", FamilyAVRTiny, ELFArchAVRTiny>; +def : Device<"attiny5", FamilyAVRTiny, ELFArchAVRTiny>; +def : Device<"attiny9", FamilyAVRTiny, ELFArchAVRTiny>; +def : Device<"attiny10", FamilyAVRTiny, ELFArchAVRTiny>; +def : Device<"attiny20", FamilyAVRTiny, ELFArchAVRTiny>; +def : Device<"attiny40", FamilyAVRTiny, ELFArchAVRTiny>; + +//===---------------------------------------------------------------------===// +// Register File Description +//===---------------------------------------------------------------------===// + +include "AVRRegisterInfo.td" + +//===---------------------------------------------------------------------===// +// Instruction Descriptions +//===---------------------------------------------------------------------===// + +//include "AVRInstrInfo.td" + +//def AVRInstrInfo : InstrInfo; + +//===---------------------------------------------------------------------===// +// Calling Conventions +//===---------------------------------------------------------------------===// + +include "AVRCallingConv.td" + +//===---------------------------------------------------------------------===// +// Assembly Printers +//===---------------------------------------------------------------------===// + +// def AVRAsmWriter : AsmWriter { +// string AsmWriterClassName = "InstPrinter"; +// bit isMCAsmWriter = 1; +// } + +//===---------------------------------------------------------------------===// +// Assembly Parsers +//===---------------------------------------------------------------------===// + +// def AVRAsmParser : AsmParser { +// let ShouldEmitMatchRegisterName = 1; +// let ShouldEmitMatchRegisterAltName = 1; +// } + +// def AVRAsmParserVariant : AsmParserVariant { +// int Variant = 0; +// +// // Recognize hard coded registers. +// string RegisterPrefix = "$"; +// } + +//===---------------------------------------------------------------------===// +// Target Declaration +//===---------------------------------------------------------------------===// + +def AVR : Target { +// let InstructionSet = AVRInstrInfo; +// let AssemblyWriters = [AVRAsmWriter]; +// +// let AssemblyParsers = [AVRAsmParser]; +// let AssemblyParserVariants = [AVRAsmParserVariant]; +} + diff --git a/lib/Target/AVR/AVRCallingConv.td b/lib/Target/AVR/AVRCallingConv.td new file mode 100644 index 000000000000..d8cb3fe84022 --- /dev/null +++ b/lib/Target/AVR/AVRCallingConv.td @@ -0,0 +1,65 @@ +//===-- AVRCallingConv.td - Calling Conventions for AVR ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for AVR architecture. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// AVR Return Value Calling Convention +//===----------------------------------------------------------------------===// + +def RetCC_AVR : CallingConv +<[ + // i8 is returned in R24. + CCIfType<[i8], CCAssignToReg<[R24]>>, + + // i16 are returned in R25:R24, R23:R22, R21:R20 and R19:R18. + CCIfType<[i16], CCAssignToReg<[R25R24, R23R22, R21R20, R19R18]>> +]>; + +// Special return value calling convention for runtime functions. +def RetCC_AVR_RT : CallingConv +<[ + CCIfType<[i8], CCAssignToReg<[R24,R25]>>, + CCIfType<[i16], CCAssignToReg<[R23R22, R25R24]>> +]>; + +//===----------------------------------------------------------------------===// +// AVR Argument Calling Conventions +//===----------------------------------------------------------------------===// + +// The calling conventions are implemented in custom C++ code + +// Calling convention for variadic functions. +def ArgCC_AVR_Vararg : CallingConv +<[ + // i16 are always passed through the stack with an alignment of 1. + CCAssignToStack<2, 1> +]>; + +// Special argument calling convention for +// multiplication runtime functions. +def ArgCC_AVR_RT_MUL : CallingConv +<[ + CCIfType<[i16], CCAssignToReg<[R27R26,R19R18]>> +]>; + +// Special argument calling convention for +// division runtime functions. +def ArgCC_AVR_RT_DIV : CallingConv +<[ + CCIfType<[i8], CCAssignToReg<[R24,R22]>>, + CCIfType<[i16], CCAssignToReg<[R25R24, R23R22]>> +]>; + +//===----------------------------------------------------------------------===// +// Callee-saved register lists. +//===----------------------------------------------------------------------===// + +def CSR_Normal : CalleeSavedRegs<(add R29, R28, (sequence "R%u", 17, 2))>; +def CSR_Interrupts : CalleeSavedRegs<(add (sequence "R%u", 31, 0))>; diff --git a/lib/Target/AVR/AVRConfig.h b/lib/Target/AVR/AVRConfig.h new file mode 100644 index 000000000000..65588bc50840 --- /dev/null +++ b/lib/Target/AVR/AVRConfig.h @@ -0,0 +1,15 @@ +//===-- AVRConfig.h - AVR Backend Configuration Header ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AVR_CONFIG_H +#define LLVM_AVR_CONFIG_H + +#define LLVM_AVR_GCC_COMPAT + +#endif // LLVM_AVR_CONFIG_H diff --git a/lib/Target/AVR/AVRMachineFunctionInfo.h b/lib/Target/AVR/AVRMachineFunctionInfo.h new file mode 100644 index 000000000000..6571d5d3e603 --- /dev/null +++ b/lib/Target/AVR/AVRMachineFunctionInfo.h @@ -0,0 +1,73 @@ +//===-- AVRMachineFuctionInfo.h - AVR machine function info -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares AVR-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_AVR_MACHINE_FUNCTION_INFO_H +#define LLVM_AVR_MACHINE_FUNCTION_INFO_H + +#include "AVRConfig.h" + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/** + * Contains AVR-specific information for each MachineFunction. + */ +class AVRMachineFunctionInfo : public MachineFunctionInfo { + /// Indicates if a register has been spilled by the register + /// allocator. + bool HasSpills; + + /// Indicates if there are any fixed size allocas present. + /// Note that if there are only variable sized allocas this is set to false. + bool HasAllocas; + + /// Indicates if arguments passed using the stack are being + /// used inside the function. + bool HasStackArgs; + + /// Size of the callee-saved register portion of the + /// stack frame in bytes. + unsigned CalleeSavedFrameSize; + + /// FrameIndex for start of varargs area. + int VarArgsFrameIndex; + +public: + AVRMachineFunctionInfo() + : HasSpills(false), HasAllocas(false), HasStackArgs(false), + CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {} + + explicit AVRMachineFunctionInfo(MachineFunction &MF) + : HasSpills(false), HasAllocas(false), HasStackArgs(false), + CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {} + + bool getHasSpills() const { return HasSpills; } + void setHasSpills(bool B) { HasSpills = B; } + + bool getHasAllocas() const { return HasAllocas; } + void setHasAllocas(bool B) { HasAllocas = B; } + + bool getHasStackArgs() const { return HasStackArgs; } + void setHasStackArgs(bool B) { HasStackArgs = B; } + + unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } + void setCalleeSavedFrameSize(unsigned Bytes) { CalleeSavedFrameSize = Bytes; } + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; } +}; + +} // end llvm namespace + +#endif // LLVM_AVR_MACHINE_FUNCTION_INFO_H diff --git a/lib/Target/AVR/AVRRegisterInfo.td b/lib/Target/AVR/AVRRegisterInfo.td new file mode 100644 index 000000000000..32650fc66751 --- /dev/null +++ b/lib/Target/AVR/AVRRegisterInfo.td @@ -0,0 +1,216 @@ +//===-- AVRRegisterInfo.td - AVR Register defs -------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the AVR register file +//===----------------------------------------------------------------------===// + +// 8-bit General purpose register definition. +class AVRReg num, + string name, + list subregs = [], + list altNames = []> + : RegisterWithSubRegs +{ + field bits<16> Num = num; + + let HWEncoding = num; + let Namespace = "AVR"; + let SubRegs = subregs; + let AltNames = altNames; +} + +// Subregister indices. +let Namespace = "AVR" in +{ + def sub_lo : SubRegIndex<8>; + def sub_hi : SubRegIndex<8, 8>; +} + +let Namespace = "AVR" in { + def ptr : RegAltNameIndex; +} + + +//===----------------------------------------------------------------------===// +// 8-bit general purpose registers +//===----------------------------------------------------------------------===// + +def R0 : AVRReg<0, "r0">, DwarfRegNum<[0]>; +def R1 : AVRReg<1, "r1">, DwarfRegNum<[1]>; +def R2 : AVRReg<2, "r2">, DwarfRegNum<[2]>; +def R3 : AVRReg<3, "r3">, DwarfRegNum<[3]>; +def R4 : AVRReg<4, "r4">, DwarfRegNum<[4]>; +def R5 : AVRReg<5, "r5">, DwarfRegNum<[5]>; +def R6 : AVRReg<6, "r6">, DwarfRegNum<[6]>; +def R7 : AVRReg<7, "r7">, DwarfRegNum<[7]>; +def R8 : AVRReg<8, "r8">, DwarfRegNum<[8]>; +def R9 : AVRReg<9, "r9">, DwarfRegNum<[9]>; +def R10 : AVRReg<10, "r10">, DwarfRegNum<[10]>; +def R11 : AVRReg<11, "r11">, DwarfRegNum<[11]>; +def R12 : AVRReg<12, "r12">, DwarfRegNum<[12]>; +def R13 : AVRReg<13, "r13">, DwarfRegNum<[13]>; +def R14 : AVRReg<14, "r14">, DwarfRegNum<[14]>; +def R15 : AVRReg<15, "r15">, DwarfRegNum<[15]>; +def R16 : AVRReg<16, "r16">, DwarfRegNum<[16]>; +def R17 : AVRReg<17, "r17">, DwarfRegNum<[17]>; +def R18 : AVRReg<18, "r18">, DwarfRegNum<[18]>; +def R19 : AVRReg<19, "r19">, DwarfRegNum<[19]>; +def R20 : AVRReg<20, "r20">, DwarfRegNum<[20]>; +def R21 : AVRReg<21, "r21">, DwarfRegNum<[21]>; +def R22 : AVRReg<22, "r22">, DwarfRegNum<[22]>; +def R23 : AVRReg<23, "r23">, DwarfRegNum<[23]>; +def R24 : AVRReg<24, "r24">, DwarfRegNum<[24]>; +def R25 : AVRReg<25, "r25">, DwarfRegNum<[25]>; +def R26 : AVRReg<26, "r26">, DwarfRegNum<[26]>; +def R27 : AVRReg<27, "r27">, DwarfRegNum<[27]>; +def R28 : AVRReg<28, "r28">, DwarfRegNum<[28]>; +def R29 : AVRReg<29, "r29">, DwarfRegNum<[29]>; +def R30 : AVRReg<30, "r30">, DwarfRegNum<[30]>; +def R31 : AVRReg<31, "r31">, DwarfRegNum<[31]>; +def SPL : AVRReg<32, "SPL">, DwarfRegNum<[32]>; +def SPH : AVRReg<33, "SPH">, DwarfRegNum<[33]>; + +let SubRegIndices = [sub_lo, sub_hi], +CoveredBySubRegs = 1 in +{ + // 16 bit GPR pairs. + def SP : AVRReg<32, "SP", [SPL, SPH]>, DwarfRegNum<[32]>; + + // The pointer registers (X,Y,Z) are a special case because they + // are printed as a `high:low` pair when a DREG is expected, + // but printed using `X`, `Y`, `Z` when a pointer register is expected. + let RegAltNameIndices = [ptr] in { + def R31R30 : AVRReg<30, "r31:r30", [R30, R31], ["Z"]>, DwarfRegNum<[30]>; + def R29R28 : AVRReg<28, "r29:r28", [R28, R29], ["Y"]>, DwarfRegNum<[28]>; + def R27R26 : AVRReg<26, "r27:r26", [R26, R27], ["X"]>, DwarfRegNum<[26]>; + } + def R25R24 : AVRReg<24, "r25:r24", [R24, R25]>, DwarfRegNum<[24]>; + def R23R22 : AVRReg<22, "r23:r22", [R22, R23]>, DwarfRegNum<[22]>; + def R21R20 : AVRReg<20, "r21:r20", [R20, R21]>, DwarfRegNum<[20]>; + def R19R18 : AVRReg<18, "r19:r18", [R18, R19]>, DwarfRegNum<[18]>; + def R17R16 : AVRReg<16, "r17:r16", [R16, R17]>, DwarfRegNum<[16]>; + def R15R14 : AVRReg<14, "r15:r14", [R14, R15]>, DwarfRegNum<[14]>; + def R13R12 : AVRReg<12, "r13:r12", [R12, R13]>, DwarfRegNum<[12]>; + def R11R10 : AVRReg<10, "r11:r10", [R10, R11]>, DwarfRegNum<[10]>; + def R9R8 : AVRReg<8, "r9:r8", [R8, R9]>, DwarfRegNum<[8]>; + def R7R6 : AVRReg<6, "r7:r6", [R6, R7]>, DwarfRegNum<[6]>; + def R5R4 : AVRReg<4, "r5:r4", [R4, R5]>, DwarfRegNum<[4]>; + def R3R2 : AVRReg<2, "r3:r2", [R2, R3]>, DwarfRegNum<[2]>; + def R1R0 : AVRReg<0, "r1:r0", [R0, R1]>, DwarfRegNum<[0]>; +} + +//===----------------------------------------------------------------------===// +// Register Classes +//===----------------------------------------------------------------------===// + +//:TODO: use proper set instructions instead of using always "add" + +// Main 8-bit register class. +def GPR8 : RegisterClass<"AVR", [i8], 8, + ( + // Return value and argument registers. + add R24, R25, R18, R19, R20, R21, R22, R23, + // Scratch registers. + R30, R31, R26, R27, + // Callee saved registers. + R28, R29, R17, R16, R15, R14, R13, R12, R11, R10, + R9, R8, R7, R6, R5, R4, R3, R2, R0, R1 + )>; + +// Simple lower registers r0..r15 +def GPR8lo : RegisterClass<"AVR", [i8], 8, + ( + add R15, R14, R13, R12, R11, R10, R9, R8, R7, R6, R5, R4, R3, R2, R0, R1 + )>; + +// 8-bit register class for instructions which take immediates. +def LD8 : RegisterClass<"AVR", [i8], 8, + ( + // Return value and arguments. + add R24, R25, R18, R19, R20, R21, R22, R23, + // Scratch registers. + R30, R31, R26, R27, + // Callee saved registers. + R28, R29, R17, R16 + )>; + +// Simple lower registers r16..r23 +def LD8lo : RegisterClass<"AVR", [i8], 8, + ( + add R23, R22, R21, R20, R19, R18, R17, R16 + )>; + +// Main 16-bit pair register class. +def DREGS : RegisterClass<"AVR", [i16], 8, + ( + // Return value and arguments. + add R25R24, R19R18, R21R20, R23R22, + // Scratch registers. + R31R30, R27R26, + // Callee saved registers. + R29R28, R17R16, R15R14, R13R12, R11R10, + R9R8, R7R6, R5R4, R3R2, R1R0 + )>; + +// 16-bit register class for immediate instructions. +def DLDREGS : RegisterClass<"AVR", [i16], 8, + ( + // Return value and arguments. + add R25R24, R19R18, R21R20, R23R22, + // Scratch registers. + R31R30, R27R26, + // Callee saved registers. + R29R28, R17R16 + )>; + +// 16-bit register class for the adiw/sbiw instructions. +def IWREGS : RegisterClass<"AVR", [i16], 8, + ( + // Return value and arguments. + add R25R24, + // Scratch registers. + R31R30, R27R26, + // Callee saved registers. + R29R28 + )>; + +// 16-bit register class for the ld and st instructions. +// AKA X,Y, and Z +def PTRREGS : RegisterClass<"AVR", [i16], 8, + ( + add R27R26, // X + R29R28, // Y + R31R30 // Z + ), ptr>; + +// 16-bit register class for the ldd and std instructions. +// AKA Y and Z. +def PTRDISPREGS : RegisterClass<"AVR", [i16], 8, + ( + add R31R30, R29R28 + ), ptr>; + +// We have a bunch of instructions with an explicit Z register argument. We +// model this using a register class containing only the Z register. +// :TODO: Rename to 'ZREG'. +def ZREGS : RegisterClass<"AVR", [i16], 8, (add R31R30)>; + +// Register class used for the stack read pseudo instruction. +def GPRSP: RegisterClass<"AVR", [i16], 8, (add SP)>; + +//:TODO: if we remove this we get an error in tablegen +//:TODO: this is just a hack, remove it once add16 works! +// Status register. +def SREG : AVRReg<14, "FLAGS">, DwarfRegNum<[88]>; +def CCR : RegisterClass<"AVR", [i8], 8, (add SREG)> +{ + let CopyCost = -1; // Don't allow copying of status registers +} + diff --git a/lib/Target/AVR/AVRTargetMachine.cpp b/lib/Target/AVR/AVRTargetMachine.cpp new file mode 100644 index 000000000000..a91dce8a63f4 --- /dev/null +++ b/lib/Target/AVR/AVRTargetMachine.cpp @@ -0,0 +1,4 @@ + +extern "C" void LLVMInitializeAVRTarget() { + +} diff --git a/lib/Target/AVR/CMakeLists.txt b/lib/Target/AVR/CMakeLists.txt new file mode 100644 index 000000000000..22b30ef35855 --- /dev/null +++ b/lib/Target/AVR/CMakeLists.txt @@ -0,0 +1,14 @@ +set(LLVM_TARGET_DEFINITIONS AVR.td) + +tablegen(LLVM AVRGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM AVRGenCallingConv.inc -gen-callingconv) +add_public_tablegen_target(AVRCommonTableGen) + +add_llvm_target(AVRCodeGen + AVRTargetMachine.cpp + ) + +add_dependencies(LLVMAVRCodeGen intrinsics_gen) + +add_subdirectory(TargetInfo) + diff --git a/lib/Target/AVR/LLVMBuild.txt b/lib/Target/AVR/LLVMBuild.txt new file mode 100644 index 000000000000..386c594b20ec --- /dev/null +++ b/lib/Target/AVR/LLVMBuild.txt @@ -0,0 +1,33 @@ +;===- ./lib/Target/AVR/LLVMBuild.txt ---------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = TargetInfo + +[component_0] +type = TargetGroup +name = AVR +parent = Target +has_asmprinter = 0 +has_asmparser = 0 + +[component_1] +type = Library +name = AVRCodeGen +parent = AVR +required_libraries = AsmPrinter CodeGen Core MC AVRInfo SelectionDAG Support Target +add_to_library_groups = AVR diff --git a/lib/Target/AVR/Makefile b/lib/Target/AVR/Makefile new file mode 100644 index 000000000000..c91b6f5c0ae9 --- /dev/null +++ b/lib/Target/AVR/Makefile @@ -0,0 +1,19 @@ +##===- lib/Target/AVR/Makefile -----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMAVRCodeGen +TARGET = AVR + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = AVRGenRegisterInfo.inc + +DIRS = TargetInfo + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp b/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp new file mode 100644 index 000000000000..c0e0d20029c2 --- /dev/null +++ b/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp @@ -0,0 +1,25 @@ +//===-- AVRTargetInfo.cpp - AVR Target Implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/Module.h" +#include "llvm/Support/TargetRegistry.h" + +namespace llvm { +Target TheAVRTarget; +} + +extern "C" void LLVMInitializeAVRTargetInfo() { + llvm::RegisterTarget X( + llvm::TheAVRTarget, "avr", "Atmel AVR Microcontroller"); +} + +// FIXME: Temporary stub - this function must be defined for linking +// to succeed. Remove once this function is properly implemented. +extern "C" void LLVMInitializeAVRTargetMC() { +} diff --git a/lib/Target/AVR/TargetInfo/CMakeLists.txt b/lib/Target/AVR/TargetInfo/CMakeLists.txt new file mode 100644 index 000000000000..f27090037702 --- /dev/null +++ b/lib/Target/AVR/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAVRInfo + AVRTargetInfo.cpp +) + diff --git a/lib/Target/AVR/TargetInfo/LLVMBuild.txt b/lib/Target/AVR/TargetInfo/LLVMBuild.txt new file mode 100644 index 000000000000..bc6e0ad2ee19 --- /dev/null +++ b/lib/Target/AVR/TargetInfo/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/AVR/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AVRInfo +parent = AVR +required_libraries = MC Support +add_to_library_groups = AVR \ No newline at end of file diff --git a/lib/Target/AVR/TargetInfo/Makefile b/lib/Target/AVR/TargetInfo/Makefile new file mode 100644 index 000000000000..92b483dd028b --- /dev/null +++ b/lib/Target/AVR/TargetInfo/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/AVR/TargetInfo/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMAVRInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/BPF/BPF.td b/lib/Target/BPF/BPF.td index a4ce90af0439..8493b0fd1e4a 100644 --- a/lib/Target/BPF/BPF.td +++ b/lib/Target/BPF/BPF.td @@ -25,7 +25,14 @@ def BPFInstPrinter : AsmWriter { bit isMCAsmWriter = 1; } +def BPFAsmParserVariant : AsmParserVariant { + int Variant = 0; + string Name = "BPF"; + string BreakCharacters = "."; +} + def BPF : Target { let InstructionSet = BPFInstrInfo; let AssemblyWriters = [BPFInstPrinter]; + let AssemblyParserVariants = [BPFAsmParserVariant]; } diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp index 73418283d9bf..6a5b37e153d8 100644 --- a/lib/Target/BPF/BPFISelLowering.cpp +++ b/lib/Target/BPF/BPFISelLowering.cpp @@ -547,8 +547,7 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // to set, the condition code register to branch on, the true/false values to // select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator I = BB; - ++I; + MachineFunction::iterator I = ++BB->getIterator(); // ThisMBB: // ... diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h index adcaff686933..4276d0858c2e 100644 --- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h +++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h @@ -17,8 +17,6 @@ #include "llvm/MC/MCInstPrinter.h" namespace llvm { -class MCOperand; - class BPFInstPrinter : public MCInstPrinter { public: BPFInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index 36f99262ed70..8c358cab62e8 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -68,16 +68,23 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) { assert(Value == 0); - return; - } - assert(Fixup.getKind() == FK_PCRel_2); - Value = (uint16_t)((Value - 8) / 8); - if (IsLittleEndian) { - Data[Fixup.getOffset() + 2] = Value & 0xFF; - Data[Fixup.getOffset() + 3] = Value >> 8; + } else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) { + unsigned Size = Fixup.getKind() == FK_Data_4 ? 4 : 8; + + for (unsigned i = 0; i != Size; ++i) { + unsigned Idx = IsLittleEndian ? i : Size - i; + Data[Fixup.getOffset() + Idx] = uint8_t(Value >> (i * 8)); + } } else { - Data[Fixup.getOffset() + 2] = Value >> 8; - Data[Fixup.getOffset() + 3] = Value & 0xFF; + assert(Fixup.getKind() == FK_PCRel_2); + Value = (uint16_t)((Value - 8) / 8); + if (IsLittleEndian) { + Data[Fixup.getOffset() + 2] = Value & 0xFF; + Data[Fixup.getOffset() + 3] = Value >> 8; + } else { + Data[Fixup.getOffset() + 2] = Value >> 8; + Data[Fixup.getOffset() + 3] = Value & 0xFF; + } } } diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp index 05ba6183e322..87cdd5eb9dad 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -44,6 +44,10 @@ unsigned BPFELFObjectWriter::GetRelocType(const MCValue &Target, return ELF::R_X86_64_64; case FK_SecRel_4: return ELF::R_X86_64_PC32; + case FK_Data_8: + return IsPCRel ? ELF::R_X86_64_PC64 : ELF::R_X86_64_64; + case FK_Data_4: + return IsPCRel ? ELF::R_X86_64_PC32 : ELF::R_X86_64_32; } } diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h index d63bbf49294e..1f440fe87871 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h +++ b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h @@ -34,6 +34,8 @@ public: UsesELFSectionDirectiveForBSS = true; HasSingleParameterDotFile = false; HasDotTypeDotSizeDirective = false; + + SupportsDebugInformation = true; } }; } diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 272688edb8a1..5ea6551ebc9c 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -551,7 +551,8 @@ void CppWriter::printAttributes(const AttributeSet &PAL, void CppWriter::printType(Type* Ty) { // We don't print definitions for primitive types if (Ty->isFloatingPointTy() || Ty->isX86_MMXTy() || Ty->isIntegerTy() || - Ty->isLabelTy() || Ty->isMetadataTy() || Ty->isVoidTy()) + Ty->isLabelTy() || Ty->isMetadataTy() || Ty->isVoidTy() || + Ty->isTokenTy()) return; // If we already defined this type, we don't need to define it again. @@ -1355,23 +1356,18 @@ void CppWriter::printInstruction(const Instruction *I, } case Instruction::GetElementPtr: { const GetElementPtrInst* gep = cast(I); - if (gep->getNumOperands() <= 2) { - Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create(" - << opNames[0]; - if (gep->getNumOperands() == 2) - Out << ", " << opNames[1]; - } else { - Out << "std::vector " << iName << "_indices;"; - nl(Out); - for (unsigned i = 1; i < gep->getNumOperands(); ++i ) { - Out << iName << "_indices.push_back(" - << opNames[i] << ");"; - nl(Out); + Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create(" + << getCppName(gep->getSourceElementType()) << ", " << opNames[0] << ", {"; + in(); + for (unsigned i = 1; i < gep->getNumOperands(); ++i ) { + if (i != 1) { + Out << ", "; } - Out << "Instruction* " << iName << " = GetElementPtrInst::Create(" - << opNames[0] << ", " << iName << "_indices"; + nl(Out); + Out << opNames[i]; } - Out << ", \""; + out(); + nl(Out) << "}, \""; printEscapedString(gep->getName()); Out << "\", " << bbname << ");"; break; @@ -1803,13 +1799,12 @@ void CppWriter::printFunctionBody(const Function *F) { << "->arg_begin();"; nl(Out); } - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI) { - Out << "Value* " << getCppName(AI) << " = args++;"; + for (const Argument &AI : F->args()) { + Out << "Value* " << getCppName(&AI) << " = args++;"; nl(Out); - if (AI->hasName()) { - Out << getCppName(AI) << "->setName(\""; - printEscapedString(AI->getName()); + if (AI.hasName()) { + Out << getCppName(&AI) << "->setName(\""; + printEscapedString(AI.getName()); Out << "\");"; nl(Out); } @@ -1818,29 +1813,25 @@ void CppWriter::printFunctionBody(const Function *F) { // Create all the basic blocks nl(Out); - for (Function::const_iterator BI = F->begin(), BE = F->end(); - BI != BE; ++BI) { - std::string bbname(getCppName(BI)); + for (const BasicBlock &BI : *F) { + std::string bbname(getCppName(&BI)); Out << "BasicBlock* " << bbname << " = BasicBlock::Create(mod->getContext(), \""; - if (BI->hasName()) - printEscapedString(BI->getName()); - Out << "\"," << getCppName(BI->getParent()) << ",0);"; + if (BI.hasName()) + printEscapedString(BI.getName()); + Out << "\"," << getCppName(BI.getParent()) << ",0);"; nl(Out); } // Output all of its basic blocks... for the function - for (Function::const_iterator BI = F->begin(), BE = F->end(); - BI != BE; ++BI) { - std::string bbname(getCppName(BI)); - nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")"; + for (const BasicBlock &BI : *F) { + std::string bbname(getCppName(&BI)); + nl(Out) << "// Block " << BI.getName() << " (" << bbname << ")"; nl(Out); // Output all of the instructions in the basic block... - for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); - I != E; ++I) { - printInstruction(I,bbname); - } + for (const Instruction &I : BI) + printInstruction(&I, bbname); } // Loop over the ForwardRefs and resolve them now that all instructions @@ -1883,7 +1874,7 @@ void CppWriter::printInline(const std::string& fname, printFunctionUses(F); printFunctionBody(F); is_inline = false; - Out << "return " << getCppName(F->begin()) << ";"; + Out << "return " << getCppName(&F->front()) << ";"; nl(Out) << "}"; nl(Out); } @@ -1896,17 +1887,14 @@ void CppWriter::printModuleBody() { // Functions can call each other and global variables can reference them so // define all the functions first before emitting their function bodies. nl(Out) << "// Function Declarations"; nl(Out); - for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); - I != E; ++I) - printFunctionHead(I); + for (const Function &I : *TheModule) + printFunctionHead(&I); // Process the global variables declarations. We can't initialze them until // after the constants are printed so just print a header for each global nl(Out) << "// Global Variable Declarations\n"; nl(Out); - for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) { - printVariableHead(I); - } + for (const GlobalVariable &I : TheModule->globals()) + printVariableHead(&I); // Print out all the constants definitions. Constants don't recurse except // through GlobalValues. All GlobalValues have been declared at this point @@ -1918,21 +1906,18 @@ void CppWriter::printModuleBody() { // been emitted. These definitions just couple the gvars with their constant // initializers. nl(Out) << "// Global Variable Definitions"; nl(Out); - for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) { - printVariableBody(I); - } + for (const GlobalVariable &I : TheModule->globals()) + printVariableBody(&I); // Finally, we can safely put out all of the function bodies. nl(Out) << "// Function Definitions"; nl(Out); - for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); - I != E; ++I) { - if (!I->isDeclaration()) { - nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I) + for (const Function &I : *TheModule) { + if (!I.isDeclaration()) { + nl(Out) << "// Function: " << I.getName() << " (" << getCppName(&I) << ")"; nl(Out) << "{"; nl(Out,1); - printFunctionBody(I); + printFunctionBody(&I); nl(Out,-1) << "}"; nl(Out); } diff --git a/lib/Target/Hexagon/AsmParser/CMakeLists.txt b/lib/Target/Hexagon/AsmParser/CMakeLists.txt new file mode 100644 index 000000000000..bbfa92d59628 --- /dev/null +++ b/lib/Target/Hexagon/AsmParser/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMHexagonAsmParser + HexagonAsmParser.cpp + ) + +add_dependencies( LLVMHexagonAsmParser HexagonCommonTableGen ) diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp new file mode 100644 index 000000000000..a8622a96527c --- /dev/null +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -0,0 +1,2152 @@ +//===-- HexagonAsmParser.cpp - Parse Hexagon asm to MCInst instructions----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mcasmparser" + +#include "Hexagon.h" +#include "HexagonRegisterInfo.h" +#include "HexagonTargetStreamer.h" +#include "MCTargetDesc/HexagonBaseInfo.h" +#include "MCTargetDesc/HexagonMCELFStreamer.h" +#include "MCTargetDesc/HexagonMCChecker.h" +#include "MCTargetDesc/HexagonMCExpr.h" +#include "MCTargetDesc/HexagonMCShuffler.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "MCTargetDesc/HexagonMCAsmInfo.h" +#include "MCTargetDesc/HexagonShuffler.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +static cl::opt EnableFutureRegs("mfuture-regs", + cl::desc("Enable future registers")); + +static cl::opt WarnMissingParenthesis("mwarn-missing-parenthesis", +cl::desc("Warn for missing parenthesis around predicate registers"), +cl::init(true)); +static cl::opt ErrorMissingParenthesis("merror-missing-parenthesis", +cl::desc("Error for missing parenthesis around predicate registers"), +cl::init(false)); +static cl::opt WarnSignedMismatch("mwarn-sign-mismatch", +cl::desc("Warn for mismatching a signed and unsigned value"), +cl::init(true)); +static cl::opt WarnNoncontigiousRegister("mwarn-noncontigious-register", +cl::desc("Warn for register names that arent contigious"), +cl::init(true)); +static cl::opt ErrorNoncontigiousRegister("merror-noncontigious-register", +cl::desc("Error for register names that aren't contigious"), +cl::init(false)); + + +namespace { +struct HexagonOperand; + +class HexagonAsmParser : public MCTargetAsmParser { + + HexagonTargetStreamer &getTargetStreamer() { + MCTargetStreamer &TS = *Parser.getStreamer().getTargetStreamer(); + return static_cast(TS); + } + + MCAsmParser &Parser; + MCAssembler *Assembler; + MCInstrInfo const &MCII; + MCInst MCB; + bool InBrackets; + + MCAsmParser &getParser() const { return Parser; } + MCAssembler *getAssembler() const { return Assembler; } + MCAsmLexer &getLexer() const { return Parser.getLexer(); } + + bool equalIsAsmAssignment() override { return false; } + bool isLabel(AsmToken &Token) override; + + void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } + bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } + bool ParseDirectiveFalign(unsigned Size, SMLoc L); + + virtual bool ParseRegister(unsigned &RegNo, + SMLoc &StartLoc, + SMLoc &EndLoc) override; + bool ParseDirectiveSubsection(SMLoc L); + bool ParseDirectiveValue(unsigned Size, SMLoc L); + bool ParseDirectiveComm(bool IsLocal, SMLoc L); + bool RegisterMatchesArch(unsigned MatchNum) const; + + bool matchBundleOptions(); + bool handleNoncontigiousRegister(bool Contigious, SMLoc &Loc); + bool finishBundle(SMLoc IDLoc, MCStreamer &Out); + void canonicalizeImmediates(MCInst &MCI); + bool matchOneInstruction(MCInst &MCB, SMLoc IDLoc, + OperandVector &InstOperands, uint64_t &ErrorInfo, + bool MatchingInlineAsm, bool &MustExtend); + + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, bool MatchingInlineAsm) override; + + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override; + void OutOfRange(SMLoc IDLoc, long long Val, long long Max); + int processInstruction(MCInst &Inst, OperandVector const &Operands, + SMLoc IDLoc, bool &MustExtend); + + // Check if we have an assembler and, if so, set the ELF e_header flags. + void chksetELFHeaderEFlags(unsigned flags) { + if (getAssembler()) + getAssembler()->setELFHeaderEFlags(flags); + } + +/// @name Auto-generated Match Functions +/// { + +#define GET_ASSEMBLER_HEADER +#include "HexagonGenAsmMatcher.inc" + + /// } + +public: + HexagonAsmParser(const MCSubtargetInfo &_STI, MCAsmParser &_Parser, + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(Options, _STI), Parser(_Parser), + MCII (MII), MCB(HexagonMCInstrInfo::createBundle()), InBrackets(false) { + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); + + MCAsmParserExtension::Initialize(_Parser); + + Assembler = nullptr; + // FIXME: need better way to detect AsmStreamer (upstream removed getKind()) + if (!Parser.getStreamer().hasRawTextSupport()) { + MCELFStreamer *MES = static_cast(&Parser.getStreamer()); + Assembler = &MES->getAssembler(); + } + } + + bool mustExtend(OperandVector &Operands); + bool splitIdentifier(OperandVector &Operands); + bool parseOperand(OperandVector &Operands); + bool parseInstruction(OperandVector &Operands); + bool implicitExpressionLocation(OperandVector &Operands); + bool parseExpressionOrOperand(OperandVector &Operands); + bool parseExpression(MCExpr const *& Expr); + virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override + { + llvm_unreachable("Unimplemented"); + } + virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + AsmToken ID, OperandVector &Operands) override; + + virtual bool ParseDirective(AsmToken DirectiveID) override; +}; + +/// HexagonOperand - Instances of this class represent a parsed Hexagon machine +/// instruction. +struct HexagonOperand : public MCParsedAsmOperand { + enum KindTy { Token, Immediate, Register } Kind; + + SMLoc StartLoc, EndLoc; + + struct TokTy { + const char *Data; + unsigned Length; + }; + + struct RegTy { + unsigned RegNum; + }; + + struct ImmTy { + const MCExpr *Val; + bool MustExtend; + }; + + struct InstTy { + OperandVector *SubInsts; + }; + + union { + struct TokTy Tok; + struct RegTy Reg; + struct ImmTy Imm; + }; + + HexagonOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + +public: + HexagonOperand(const HexagonOperand &o) : MCParsedAsmOperand() { + Kind = o.Kind; + StartLoc = o.StartLoc; + EndLoc = o.EndLoc; + switch (Kind) { + case Register: + Reg = o.Reg; + break; + case Immediate: + Imm = o.Imm; + break; + case Token: + Tok = o.Tok; + break; + } + } + + /// getStartLoc - Get the location of the first token of this operand. + SMLoc getStartLoc() const { return StartLoc; } + + /// getEndLoc - Get the location of the last token of this operand. + SMLoc getEndLoc() const { return EndLoc; } + + unsigned getReg() const { + assert(Kind == Register && "Invalid access!"); + return Reg.RegNum; + } + + const MCExpr *getImm() const { + assert(Kind == Immediate && "Invalid access!"); + return Imm.Val; + } + + bool isToken() const { return Kind == Token; } + bool isImm() const { return Kind == Immediate; } + bool isMem() const { llvm_unreachable("No isMem"); } + bool isReg() const { return Kind == Register; } + + bool CheckImmRange(int immBits, int zeroBits, bool isSigned, + bool isRelocatable, bool Extendable) const { + if (Kind == Immediate) { + const MCExpr *myMCExpr = getImm(); + if (Imm.MustExtend && !Extendable) + return false; + int64_t Res; + if (myMCExpr->evaluateAsAbsolute(Res)) { + int bits = immBits + zeroBits; + // Field bit range is zerobits + bits + // zeroBits must be 0 + if (Res & ((1 << zeroBits) - 1)) + return false; + if (isSigned) { + if (Res < (1LL << (bits - 1)) && Res >= -(1LL << (bits - 1))) + return true; + } else { + if (bits == 64) + return true; + if (Res >= 0) + return ((uint64_t)Res < (uint64_t)(1ULL << bits)) ? true : false; + else { + const int64_t high_bit_set = 1ULL << 63; + const uint64_t mask = (high_bit_set >> (63 - bits)); + return (((uint64_t)Res & mask) == mask) ? true : false; + } + } + } else if (myMCExpr->getKind() == MCExpr::SymbolRef && isRelocatable) + return true; + else if (myMCExpr->getKind() == MCExpr::Binary || + myMCExpr->getKind() == MCExpr::Unary) + return true; + } + return false; + } + + bool isf32Ext() const { return false; } + bool iss32Imm() const { return CheckImmRange(32, 0, true, true, false); } + bool iss8Imm() const { return CheckImmRange(8, 0, true, false, false); } + bool iss8Imm64() const { return CheckImmRange(8, 0, true, true, false); } + bool iss7Imm() const { return CheckImmRange(7, 0, true, false, false); } + bool iss6Imm() const { return CheckImmRange(6, 0, true, false, false); } + bool iss4Imm() const { return CheckImmRange(4, 0, true, false, false); } + bool iss4_0Imm() const { return CheckImmRange(4, 0, true, false, false); } + bool iss4_1Imm() const { return CheckImmRange(4, 1, true, false, false); } + bool iss4_2Imm() const { return CheckImmRange(4, 2, true, false, false); } + bool iss4_3Imm() const { return CheckImmRange(4, 3, true, false, false); } + bool iss4_6Imm() const { return CheckImmRange(4, 0, true, false, false); } + bool iss3_6Imm() const { return CheckImmRange(3, 0, true, false, false); } + bool iss3Imm() const { return CheckImmRange(3, 0, true, false, false); } + + bool isu64Imm() const { return CheckImmRange(64, 0, false, true, true); } + bool isu32Imm() const { return CheckImmRange(32, 0, false, true, false); } + bool isu26_6Imm() const { return CheckImmRange(26, 6, false, true, false); } + bool isu16Imm() const { return CheckImmRange(16, 0, false, true, false); } + bool isu16_0Imm() const { return CheckImmRange(16, 0, false, true, false); } + bool isu16_1Imm() const { return CheckImmRange(16, 1, false, true, false); } + bool isu16_2Imm() const { return CheckImmRange(16, 2, false, true, false); } + bool isu16_3Imm() const { return CheckImmRange(16, 3, false, true, false); } + bool isu11_3Imm() const { return CheckImmRange(11, 3, false, false, false); } + bool isu6_0Imm() const { return CheckImmRange(6, 0, false, false, false); } + bool isu6_1Imm() const { return CheckImmRange(6, 1, false, false, false); } + bool isu6_2Imm() const { return CheckImmRange(6, 2, false, false, false); } + bool isu6_3Imm() const { return CheckImmRange(6, 3, false, false, false); } + bool isu10Imm() const { return CheckImmRange(10, 0, false, false, false); } + bool isu9Imm() const { return CheckImmRange(9, 0, false, false, false); } + bool isu8Imm() const { return CheckImmRange(8, 0, false, false, false); } + bool isu7Imm() const { return CheckImmRange(7, 0, false, false, false); } + bool isu6Imm() const { return CheckImmRange(6, 0, false, false, false); } + bool isu5Imm() const { return CheckImmRange(5, 0, false, false, false); } + bool isu4Imm() const { return CheckImmRange(4, 0, false, false, false); } + bool isu3Imm() const { return CheckImmRange(3, 0, false, false, false); } + bool isu2Imm() const { return CheckImmRange(2, 0, false, false, false); } + bool isu1Imm() const { return CheckImmRange(1, 0, false, false, false); } + + bool ism6Imm() const { return CheckImmRange(6, 0, false, false, false); } + bool isn8Imm() const { return CheckImmRange(8, 0, false, false, false); } + + bool iss16Ext() const { return CheckImmRange(16 + 26, 0, true, true, true); } + bool iss12Ext() const { return CheckImmRange(12 + 26, 0, true, true, true); } + bool iss10Ext() const { return CheckImmRange(10 + 26, 0, true, true, true); } + bool iss9Ext() const { return CheckImmRange(9 + 26, 0, true, true, true); } + bool iss8Ext() const { return CheckImmRange(8 + 26, 0, true, true, true); } + bool iss7Ext() const { return CheckImmRange(7 + 26, 0, true, true, true); } + bool iss6Ext() const { return CheckImmRange(6 + 26, 0, true, true, true); } + bool iss11_0Ext() const { + return CheckImmRange(11 + 26, 0, true, true, true); + } + bool iss11_1Ext() const { + return CheckImmRange(11 + 26, 1, true, true, true); + } + bool iss11_2Ext() const { + return CheckImmRange(11 + 26, 2, true, true, true); + } + bool iss11_3Ext() const { + return CheckImmRange(11 + 26, 3, true, true, true); + } + + bool isu6Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); } + bool isu7Ext() const { return CheckImmRange(7 + 26, 0, false, true, true); } + bool isu8Ext() const { return CheckImmRange(8 + 26, 0, false, true, true); } + bool isu9Ext() const { return CheckImmRange(9 + 26, 0, false, true, true); } + bool isu10Ext() const { return CheckImmRange(10 + 26, 0, false, true, true); } + bool isu6_0Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); } + bool isu6_1Ext() const { return CheckImmRange(6 + 26, 1, false, true, true); } + bool isu6_2Ext() const { return CheckImmRange(6 + 26, 2, false, true, true); } + bool isu6_3Ext() const { return CheckImmRange(6 + 26, 3, false, true, true); } + bool isu32MustExt() const { return isImm() && Imm.MustExtend; } + + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getReg())); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createExpr(getImm())); + } + + void addSignedImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + MCExpr const *Expr = getImm(); + int64_t Value; + if (!Expr->evaluateAsAbsolute(Value)) { + Inst.addOperand(MCOperand::createExpr(Expr)); + return; + } + int64_t Extended = SignExtend64 (Value, 32); + if ((Extended < 0) == (Value < 0)) { + Inst.addOperand(MCOperand::createExpr(Expr)); + return; + } + // Flip bit 33 to signal signed unsigned mismatch + Extended ^= 0x100000000; + Inst.addOperand(MCOperand::createImm(Extended)); + } + + void addf32ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + + void adds32ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds8ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds8Imm64Operands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds6ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds4ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds4_0ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds4_1ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds4_2ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds4_3ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds3ImmOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + + void addu64ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu32ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu26_6ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu16ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu16_0ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu16_1ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu16_2ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu16_3ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu11_3ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu10ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu9ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu8ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu7ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_0ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_1ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_2ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_3ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu5ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu4ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu3ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu2ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu1ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + + void addm6ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addn8ImmOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + + void adds16ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds12ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds10ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds9ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds8ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds6ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds11_0ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds11_1ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds11_2ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + void adds11_3ExtOperands(MCInst &Inst, unsigned N) const { + addSignedImmOperands(Inst, N); + } + + void addu6ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu7ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu8ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu9ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu10ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_0ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_1ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_2ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu6_3ExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + void addu32MustExtOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); + } + + void adds4_6ImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast(getImm()); + Inst.addOperand(MCOperand::createImm(CE->getValue() * 64)); + } + + void adds3_6ImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast(getImm()); + Inst.addOperand(MCOperand::createImm(CE->getValue() * 64)); + } + + StringRef getToken() const { + assert(Kind == Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + virtual void print(raw_ostream &OS) const; + + static std::unique_ptr CreateToken(StringRef Str, SMLoc S) { + HexagonOperand *Op = new HexagonOperand(Token); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + return std::unique_ptr(Op); + } + + static std::unique_ptr CreateReg(unsigned RegNum, SMLoc S, + SMLoc E) { + HexagonOperand *Op = new HexagonOperand(Register); + Op->Reg.RegNum = RegNum; + Op->StartLoc = S; + Op->EndLoc = E; + return std::unique_ptr(Op); + } + + static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc S, + SMLoc E) { + HexagonOperand *Op = new HexagonOperand(Immediate); + Op->Imm.Val = Val; + Op->Imm.MustExtend = false; + Op->StartLoc = S; + Op->EndLoc = E; + return std::unique_ptr(Op); + } +}; + +} // end anonymous namespace. + +void HexagonOperand::print(raw_ostream &OS) const { + switch (Kind) { + case Immediate: + getImm()->print(OS, nullptr); + break; + case Register: + OS << ""; + break; + case Token: + OS << "'" << getToken() << "'"; + break; + } +} + +/// @name Auto-generated Match Functions +static unsigned MatchRegisterName(StringRef Name); + +bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) { + DEBUG(dbgs() << "Bundle:"); + DEBUG(MCB.dump_pretty(dbgs())); + DEBUG(dbgs() << "--\n"); + + // Check the bundle for errors. + const MCRegisterInfo *RI = getContext().getRegisterInfo(); + HexagonMCChecker Check(MCII, getSTI(), MCB, MCB, *RI); + + bool CheckOk = HexagonMCInstrInfo::canonicalizePacket(MCII, getSTI(), + getContext(), MCB, + &Check); + + while (Check.getNextErrInfo() == true) { + unsigned Reg = Check.getErrRegister(); + Twine R(RI->getName(Reg)); + + uint64_t Err = Check.getError(); + if (Err != HexagonMCErrInfo::CHECK_SUCCESS) { + if (HexagonMCErrInfo::CHECK_ERROR_BRANCHES & Err) + Error(IDLoc, + "unconditional branch cannot precede another branch in packet"); + + if (HexagonMCErrInfo::CHECK_ERROR_NEWP & Err || + HexagonMCErrInfo::CHECK_ERROR_NEWV & Err) + Error(IDLoc, "register `" + R + + "' used with `.new' " + "but not validly modified in the same packet"); + + if (HexagonMCErrInfo::CHECK_ERROR_REGISTERS & Err) + Error(IDLoc, "register `" + R + "' modified more than once"); + + if (HexagonMCErrInfo::CHECK_ERROR_READONLY & Err) + Error(IDLoc, "cannot write to read-only register `" + R + "'"); + + if (HexagonMCErrInfo::CHECK_ERROR_LOOP & Err) + Error(IDLoc, "loop-setup and some branch instructions " + "cannot be in the same packet"); + + if (HexagonMCErrInfo::CHECK_ERROR_ENDLOOP & Err) { + Twine N(HexagonMCInstrInfo::isInnerLoop(MCB) ? '0' : '1'); + Error(IDLoc, "packet marked with `:endloop" + N + "' " + + "cannot contain instructions that modify register " + + "`" + R + "'"); + } + + if (HexagonMCErrInfo::CHECK_ERROR_SOLO & Err) + Error(IDLoc, + "instruction cannot appear in packet with other instructions"); + + if (HexagonMCErrInfo::CHECK_ERROR_NOSLOTS & Err) + Error(IDLoc, "too many slots used in packet"); + + if (Err & HexagonMCErrInfo::CHECK_ERROR_SHUFFLE) { + uint64_t Erm = Check.getShuffleError(); + + if (HexagonShuffler::SHUFFLE_ERROR_INVALID == Erm) + Error(IDLoc, "invalid instruction packet"); + else if (HexagonShuffler::SHUFFLE_ERROR_STORES == Erm) + Error(IDLoc, "invalid instruction packet: too many stores"); + else if (HexagonShuffler::SHUFFLE_ERROR_LOADS == Erm) + Error(IDLoc, "invalid instruction packet: too many loads"); + else if (HexagonShuffler::SHUFFLE_ERROR_BRANCHES == Erm) + Error(IDLoc, "too many branches in packet"); + else if (HexagonShuffler::SHUFFLE_ERROR_NOSLOTS == Erm) + Error(IDLoc, "invalid instruction packet: out of slots"); + else if (HexagonShuffler::SHUFFLE_ERROR_SLOTS == Erm) + Error(IDLoc, "invalid instruction packet: slot error"); + else if (HexagonShuffler::SHUFFLE_ERROR_ERRATA2 == Erm) + Error(IDLoc, "v60 packet violation"); + else if (HexagonShuffler::SHUFFLE_ERROR_STORE_LOAD_CONFLICT == Erm) + Error(IDLoc, "slot 0 instruction does not allow slot 1 store"); + else + Error(IDLoc, "unknown error in instruction packet"); + } + } + + unsigned Warn = Check.getWarning(); + if (Warn != HexagonMCErrInfo::CHECK_SUCCESS) { + if (HexagonMCErrInfo::CHECK_WARN_CURRENT & Warn) + Warning(IDLoc, "register `" + R + "' used with `.cur' " + "but not used in the same packet"); + else if (HexagonMCErrInfo::CHECK_WARN_TEMPORARY & Warn) + Warning(IDLoc, "register `" + R + "' used with `.tmp' " + "but not used in the same packet"); + } + } + + if (CheckOk) { + MCB.setLoc(IDLoc); + if (HexagonMCInstrInfo::bundleSize(MCB) == 0) { + assert(!HexagonMCInstrInfo::isInnerLoop(MCB)); + assert(!HexagonMCInstrInfo::isOuterLoop(MCB)); + // Empty packets are valid yet aren't emitted + return false; + } + Out.EmitInstruction(MCB, getSTI()); + } else { + // If compounding and duplexing didn't reduce the size below + // 4 or less we have a packet that is too big. + if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) { + Error(IDLoc, "invalid instruction packet: out of slots"); + return true; // Error + } + } + + return false; // No error +} + +bool HexagonAsmParser::matchBundleOptions() { + MCAsmParser &Parser = getParser(); + MCAsmLexer &Lexer = getLexer(); + while (true) { + if (!Parser.getTok().is(AsmToken::Colon)) + return false; + Lexer.Lex(); + StringRef Option = Parser.getTok().getString(); + if (Option.compare_lower("endloop0") == 0) + HexagonMCInstrInfo::setInnerLoop(MCB); + else if (Option.compare_lower("endloop1") == 0) + HexagonMCInstrInfo::setOuterLoop(MCB); + else if (Option.compare_lower("mem_noshuf") == 0) + HexagonMCInstrInfo::setMemReorderDisabled(MCB); + else if (Option.compare_lower("mem_shuf") == 0) + HexagonMCInstrInfo::setMemStoreReorderEnabled(MCB); + else + return true; + Lexer.Lex(); + } +} + +// For instruction aliases, immediates are generated rather than +// MCConstantExpr. Convert them for uniform MCExpr. +// Also check for signed/unsigned mismatches and warn +void HexagonAsmParser::canonicalizeImmediates(MCInst &MCI) { + MCInst NewInst; + NewInst.setOpcode(MCI.getOpcode()); + for (MCOperand &I : MCI) + if (I.isImm()) { + int64_t Value (I.getImm()); + if ((Value & 0x100000000) != (Value & 0x80000000)) { + // Detect flipped bit 33 wrt bit 32 and signal warning + Value ^= 0x100000000; + if (WarnSignedMismatch) + Warning (MCI.getLoc(), "Signed/Unsigned mismatch"); + } + NewInst.addOperand(MCOperand::createExpr( + MCConstantExpr::create(Value, getContext()))); + } + else + NewInst.addOperand(I); + MCI = NewInst; +} + +bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc, + OperandVector &InstOperands, + uint64_t &ErrorInfo, + bool MatchingInlineAsm, + bool &MustExtend) { + // Perform matching with tablegen asmmatcher generated function + int result = + MatchInstructionImpl(InstOperands, MCI, ErrorInfo, MatchingInlineAsm); + if (result == Match_Success) { + MCI.setLoc(IDLoc); + MustExtend = mustExtend(InstOperands); + canonicalizeImmediates(MCI); + result = processInstruction(MCI, InstOperands, IDLoc, MustExtend); + + DEBUG(dbgs() << "Insn:"); + DEBUG(MCI.dump_pretty(dbgs())); + DEBUG(dbgs() << "\n\n"); + + MCI.setLoc(IDLoc); + } + + // Create instruction operand for bundle instruction + // Break this into a separate function Code here is less readable + // Think about how to get an instruction error to report correctly. + // SMLoc will return the "{" + switch (result) { + default: + break; + case Match_Success: + return false; + case Match_MissingFeature: + return Error(IDLoc, "invalid instruction"); + case Match_MnemonicFail: + return Error(IDLoc, "unrecognized instruction"); + case Match_InvalidOperand: + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + if (ErrorInfo >= InstOperands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = (static_cast(InstOperands[ErrorInfo].get())) + ->getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + return Error(ErrorLoc, "invalid operand for instruction"); + } + llvm_unreachable("Implement any new match types added!"); +} + +bool HexagonAsmParser::mustExtend(OperandVector &Operands) { + unsigned Count = 0; + for (std::unique_ptr &i : Operands) + if (i->isImm()) + if (static_cast(i.get())->Imm.MustExtend) + ++Count; + // Multiple extenders should have been filtered by iss9Ext et. al. + assert(Count < 2 && "Multiple extenders"); + return Count == 1; +} + +bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + if (!InBrackets) { + MCB.clear(); + MCB.addOperand(MCOperand::createImm(0)); + } + HexagonOperand &FirstOperand = static_cast(*Operands[0]); + if (FirstOperand.isToken() && FirstOperand.getToken() == "{") { + assert(Operands.size() == 1 && "Brackets should be by themselves"); + if (InBrackets) { + getParser().Error(IDLoc, "Already in a packet"); + return true; + } + InBrackets = true; + return false; + } + if (FirstOperand.isToken() && FirstOperand.getToken() == "}") { + assert(Operands.size() == 1 && "Brackets should be by themselves"); + if (!InBrackets) { + getParser().Error(IDLoc, "Not in a packet"); + return true; + } + InBrackets = false; + if (matchBundleOptions()) + return true; + return finishBundle(IDLoc, Out); + } + MCInst *SubInst = new (getParser().getContext()) MCInst; + bool MustExtend = false; + if (matchOneInstruction(*SubInst, IDLoc, Operands, ErrorInfo, + MatchingInlineAsm, MustExtend)) + return true; + HexagonMCInstrInfo::extendIfNeeded( + getParser().getContext(), MCII, MCB, *SubInst, + HexagonMCInstrInfo::isExtended(MCII, *SubInst) || MustExtend); + MCB.addOperand(MCOperand::createInst(SubInst)); + if (!InBrackets) + return finishBundle(IDLoc, Out); + return false; +} + +/// ParseDirective parses the Hexagon specific directives +bool HexagonAsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + if ((IDVal.lower() == ".word") || (IDVal.lower() == ".4byte")) + return ParseDirectiveValue(4, DirectiveID.getLoc()); + if (IDVal.lower() == ".short" || IDVal.lower() == ".hword" || + IDVal.lower() == ".half") + return ParseDirectiveValue(2, DirectiveID.getLoc()); + if (IDVal.lower() == ".falign") + return ParseDirectiveFalign(256, DirectiveID.getLoc()); + if ((IDVal.lower() == ".lcomm") || (IDVal.lower() == ".lcommon")) + return ParseDirectiveComm(true, DirectiveID.getLoc()); + if ((IDVal.lower() == ".comm") || (IDVal.lower() == ".common")) + return ParseDirectiveComm(false, DirectiveID.getLoc()); + if (IDVal.lower() == ".subsection") + return ParseDirectiveSubsection(DirectiveID.getLoc()); + + return true; +} +bool HexagonAsmParser::ParseDirectiveSubsection(SMLoc L) { + const MCExpr *Subsection = 0; + int64_t Res; + + assert((getLexer().isNot(AsmToken::EndOfStatement)) && + "Invalid subsection directive"); + getParser().parseExpression(Subsection); + + if (!Subsection->evaluateAsAbsolute(Res)) + return Error(L, "Cannot evaluate subsection number"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + // 0-8192 is the hard-coded range in MCObjectStreamper.cpp, this keeps the + // negative subsections together and in the same order but at the opposite + // end of the section. Only legacy hexagon-gcc created assembly code + // used negative subsections. + if ((Res < 0) && (Res > -8193)) + Subsection = MCConstantExpr::create(8192 + Res, this->getContext()); + + getStreamer().SubSection(Subsection); + return false; +} + +/// ::= .falign [expression] +bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) { + + int64_t MaxBytesToFill = 15; + + // if there is an arguement + if (getLexer().isNot(AsmToken::EndOfStatement)) { + const MCExpr *Value; + SMLoc ExprLoc = L; + + // Make sure we have a number (false is returned if expression is a number) + if (getParser().parseExpression(Value) == false) { + // Make sure this is a number that is in range + const MCConstantExpr *MCE = dyn_cast(Value); + uint64_t IntValue = MCE->getValue(); + if (!isUIntN(Size, IntValue) && !isIntN(Size, IntValue)) + return Error(ExprLoc, "literal value out of range (256) for falign"); + MaxBytesToFill = IntValue; + Lex(); + } else { + return Error(ExprLoc, "not a valid expression for falign directive"); + } + } + + getTargetStreamer().emitFAlign(16, MaxBytesToFill); + Lex(); + + return false; +} + +/// ::= .word [ expression (, expression)* ] +bool HexagonAsmParser::ParseDirectiveValue(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + + for (;;) { + const MCExpr *Value; + SMLoc ExprLoc = L; + if (getParser().parseExpression(Value)) + return true; + + // Special case constant expressions to match code generator. + if (const MCConstantExpr *MCE = dyn_cast(Value)) { + assert(Size <= 8 && "Invalid size"); + uint64_t IntValue = MCE->getValue(); + if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue)) + return Error(ExprLoc, "literal value out of range for directive"); + getStreamer().EmitIntValue(IntValue, Size); + } else + getStreamer().EmitValue(Value, Size); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + } + } + + Lex(); + return false; +} + +// This is largely a copy of AsmParser's ParseDirectiveComm extended to +// accept a 3rd argument, AccessAlignment which indicates the smallest +// memory access made to the symbol, expressed in bytes. If no +// AccessAlignment is specified it defaults to the Alignment Value. +// Hexagon's .lcomm: +// .lcomm Symbol, Length, Alignment, AccessAlignment +bool HexagonAsmParser::ParseDirectiveComm(bool IsLocal, SMLoc Loc) { + // FIXME: need better way to detect if AsmStreamer (upstream removed + // getKind()) + if (getStreamer().hasRawTextSupport()) + return true; // Only object file output requires special treatment. + + StringRef Name; + if (getParser().parseIdentifier(Name)) + return TokError("expected identifier in directive"); + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = getLexer().getLoc(); + if (getParser().parseAbsoluteExpression(Size)) + return true; + + int64_t ByteAlignment = 1; + SMLoc ByteAlignmentLoc; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + ByteAlignmentLoc = getLexer().getLoc(); + if (getParser().parseAbsoluteExpression(ByteAlignment)) + return true; + if (!isPowerOf2_64(ByteAlignment)) + return Error(ByteAlignmentLoc, "alignment must be a power of 2"); + } + + int64_t AccessAlignment = 0; + if (getLexer().is(AsmToken::Comma)) { + // The optional access argument specifies the size of the smallest memory + // access to be made to the symbol, expressed in bytes. + SMLoc AccessAlignmentLoc; + Lex(); + AccessAlignmentLoc = getLexer().getLoc(); + if (getParser().parseAbsoluteExpression(AccessAlignment)) + return true; + + if (!isPowerOf2_64(AccessAlignment)) + return Error(AccessAlignmentLoc, "access alignment must be a power of 2"); + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.comm' or '.lcomm' directive"); + + Lex(); + + // NOTE: a size of zero for a .comm should create a undefined symbol + // but a size of .lcomm creates a bss symbol of size zero. + if (Size < 0) + return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't " + "be less than zero"); + + // NOTE: The alignment in the directive is a power of 2 value, the assembler + // may internally end up wanting an alignment in bytes. + // FIXME: Diagnose overflow. + if (ByteAlignment < 0) + return Error(ByteAlignmentLoc, "invalid '.comm' or '.lcomm' directive " + "alignment, can't be less than zero"); + + if (!Sym->isUndefined()) + return Error(Loc, "invalid symbol redefinition"); + + HexagonMCELFStreamer &HexagonELFStreamer = + static_cast(getStreamer()); + if (IsLocal) { + HexagonELFStreamer.HexagonMCEmitLocalCommonSymbol(Sym, Size, ByteAlignment, + AccessAlignment); + return false; + } + + HexagonELFStreamer.HexagonMCEmitCommonSymbol(Sym, Size, ByteAlignment, + AccessAlignment); + return false; +} + +// validate register against architecture +bool HexagonAsmParser::RegisterMatchesArch(unsigned MatchNum) const { + return true; +} + +// extern "C" void LLVMInitializeHexagonAsmLexer(); + +/// Force static initialization. +extern "C" void LLVMInitializeHexagonAsmParser() { + RegisterMCAsmParser X(TheHexagonTarget); +} + +#define GET_MATCHER_IMPLEMENTATION +#define GET_REGISTER_MATCHER +#include "HexagonGenAsmMatcher.inc" + +namespace { +bool previousEqual(OperandVector &Operands, size_t Index, StringRef String) { + if (Index >= Operands.size()) + return false; + MCParsedAsmOperand &Operand = *Operands[Operands.size() - Index - 1]; + if (!Operand.isToken()) + return false; + return static_cast(Operand).getToken().equals_lower(String); +} +bool previousIsLoop(OperandVector &Operands, size_t Index) { + return previousEqual(Operands, Index, "loop0") || + previousEqual(Operands, Index, "loop1") || + previousEqual(Operands, Index, "sp1loop0") || + previousEqual(Operands, Index, "sp2loop0") || + previousEqual(Operands, Index, "sp3loop0"); +} +} + +bool HexagonAsmParser::splitIdentifier(OperandVector &Operands) { + AsmToken const &Token = getParser().getTok(); + StringRef String = Token.getString(); + SMLoc Loc = Token.getLoc(); + getLexer().Lex(); + do { + std::pair HeadTail = String.split('.'); + if (!HeadTail.first.empty()) + Operands.push_back(HexagonOperand::CreateToken(HeadTail.first, Loc)); + if (!HeadTail.second.empty()) + Operands.push_back(HexagonOperand::CreateToken( + String.substr(HeadTail.first.size(), 1), Loc)); + String = HeadTail.second; + } while (!String.empty()); + return false; +} + +bool HexagonAsmParser::parseOperand(OperandVector &Operands) { + unsigned Register; + SMLoc Begin; + SMLoc End; + MCAsmLexer &Lexer = getLexer(); + if (!ParseRegister(Register, Begin, End)) { + if (!ErrorMissingParenthesis) + switch (Register) { + default: + break; + case Hexagon::P0: + case Hexagon::P1: + case Hexagon::P2: + case Hexagon::P3: + if (previousEqual(Operands, 0, "if")) { + if (WarnMissingParenthesis) + Warning (Begin, "Missing parenthesis around predicate register"); + static char const *LParen = "("; + static char const *RParen = ")"; + Operands.push_back(HexagonOperand::CreateToken(LParen, Begin)); + Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End)); + AsmToken MaybeDotNew = Lexer.getTok(); + if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) && + MaybeDotNew.getString().equals_lower(".new")) + splitIdentifier(Operands); + Operands.push_back(HexagonOperand::CreateToken(RParen, Begin)); + return false; + } + if (previousEqual(Operands, 0, "!") && + previousEqual(Operands, 1, "if")) { + if (WarnMissingParenthesis) + Warning (Begin, "Missing parenthesis around predicate register"); + static char const *LParen = "("; + static char const *RParen = ")"; + Operands.insert(Operands.end () - 1, + HexagonOperand::CreateToken(LParen, Begin)); + Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End)); + AsmToken MaybeDotNew = Lexer.getTok(); + if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) && + MaybeDotNew.getString().equals_lower(".new")) + splitIdentifier(Operands); + Operands.push_back(HexagonOperand::CreateToken(RParen, Begin)); + return false; + } + break; + } + Operands.push_back(HexagonOperand::CreateReg( + Register, Begin, End)); + return false; + } + return splitIdentifier(Operands); +} + +bool HexagonAsmParser::isLabel(AsmToken &Token) { + MCAsmLexer &Lexer = getLexer(); + AsmToken const &Second = Lexer.getTok(); + AsmToken Third = Lexer.peekTok(); + StringRef String = Token.getString(); + if (Token.is(AsmToken::TokenKind::LCurly) || + Token.is(AsmToken::TokenKind::RCurly)) + return false; + if (!Token.is(AsmToken::TokenKind::Identifier)) + return true; + if (!MatchRegisterName(String.lower())) + return true; + (void)Second; + assert(Second.is(AsmToken::Colon)); + StringRef Raw (String.data(), Third.getString().data() - String.data() + + Third.getString().size()); + std::string Collapsed = Raw; + Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace), + Collapsed.end()); + StringRef Whole = Collapsed; + std::pair DotSplit = Whole.split('.'); + if (!MatchRegisterName(DotSplit.first.lower())) + return true; + return false; +} + +bool HexagonAsmParser::handleNoncontigiousRegister(bool Contigious, SMLoc &Loc) { + if (!Contigious && ErrorNoncontigiousRegister) { + Error(Loc, "Register name is not contigious"); + return true; + } + if (!Contigious && WarnNoncontigiousRegister) + Warning(Loc, "Register name is not contigious"); + return false; +} + +bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { + MCAsmLexer &Lexer = getLexer(); + StartLoc = getLexer().getLoc(); + SmallVector Lookahead; + StringRef RawString(Lexer.getTok().getString().data(), 0); + bool Again = Lexer.is(AsmToken::Identifier); + bool NeededWorkaround = false; + while (Again) { + AsmToken const &Token = Lexer.getTok(); + RawString = StringRef(RawString.data(), + Token.getString().data() - RawString.data () + + Token.getString().size()); + Lookahead.push_back(Token); + Lexer.Lex(); + bool Contigious = Lexer.getTok().getString().data() == + Lookahead.back().getString().data() + + Lookahead.back().getString().size(); + bool Type = Lexer.is(AsmToken::Identifier) || Lexer.is(AsmToken::Dot) || + Lexer.is(AsmToken::Integer) || Lexer.is(AsmToken::Real) || + Lexer.is(AsmToken::Colon); + bool Workaround = Lexer.is(AsmToken::Colon) || + Lookahead.back().is(AsmToken::Colon); + Again = (Contigious && Type) || (Workaround && Type); + NeededWorkaround = NeededWorkaround || (Again && !(Contigious && Type)); + } + std::string Collapsed = RawString; + Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace), + Collapsed.end()); + StringRef FullString = Collapsed; + std::pair DotSplit = FullString.split('.'); + unsigned DotReg = MatchRegisterName(DotSplit.first.lower()); + if (DotReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) { + if (DotSplit.second.empty()) { + RegNo = DotReg; + EndLoc = Lexer.getLoc(); + if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc)) + return true; + return false; + } else { + RegNo = DotReg; + size_t First = RawString.find('.'); + StringRef DotString (RawString.data() + First, RawString.size() - First); + Lexer.UnLex(AsmToken(AsmToken::Identifier, DotString)); + EndLoc = Lexer.getLoc(); + if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc)) + return true; + return false; + } + } + std::pair ColonSplit = StringRef(FullString).split(':'); + unsigned ColonReg = MatchRegisterName(ColonSplit.first.lower()); + if (ColonReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) { + Lexer.UnLex(Lookahead.back()); + Lookahead.pop_back(); + Lexer.UnLex(Lookahead.back()); + Lookahead.pop_back(); + RegNo = ColonReg; + EndLoc = Lexer.getLoc(); + if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc)) + return true; + return false; + } + while (!Lookahead.empty()) { + Lexer.UnLex(Lookahead.back()); + Lookahead.pop_back(); + } + return true; +} + +bool HexagonAsmParser::implicitExpressionLocation(OperandVector &Operands) { + if (previousEqual(Operands, 0, "call")) + return true; + if (previousEqual(Operands, 0, "jump")) + if (!getLexer().getTok().is(AsmToken::Colon)) + return true; + if (previousEqual(Operands, 0, "(") && previousIsLoop(Operands, 1)) + return true; + if (previousEqual(Operands, 1, ":") && previousEqual(Operands, 2, "jump") && + (previousEqual(Operands, 0, "nt") || previousEqual(Operands, 0, "t"))) + return true; + return false; +} + +bool HexagonAsmParser::parseExpression(MCExpr const *& Expr) { + llvm::SmallVector Tokens; + MCAsmLexer &Lexer = getLexer(); + bool Done = false; + static char const * Comma = ","; + do { + Tokens.emplace_back (Lexer.getTok()); + Lexer.Lex(); + switch (Tokens.back().getKind()) + { + case AsmToken::TokenKind::Hash: + if (Tokens.size () > 1) + if ((Tokens.end () - 2)->getKind() == AsmToken::TokenKind::Plus) { + Tokens.insert(Tokens.end() - 2, + AsmToken(AsmToken::TokenKind::Comma, Comma)); + Done = true; + } + break; + case AsmToken::TokenKind::RCurly: + case AsmToken::TokenKind::EndOfStatement: + case AsmToken::TokenKind::Eof: + Done = true; + break; + default: + break; + } + } while (!Done); + while (!Tokens.empty()) { + Lexer.UnLex(Tokens.back()); + Tokens.pop_back(); + } + return getParser().parseExpression(Expr); +} + +bool HexagonAsmParser::parseExpressionOrOperand(OperandVector &Operands) { + if (implicitExpressionLocation(Operands)) { + MCAsmParser &Parser = getParser(); + SMLoc Loc = Parser.getLexer().getLoc(); + std::unique_ptr Expr = + HexagonOperand::CreateImm(nullptr, Loc, Loc); + MCExpr const *& Val = Expr->Imm.Val; + Operands.push_back(std::move(Expr)); + return parseExpression(Val); + } + return parseOperand(Operands); +} + +/// Parse an instruction. +bool HexagonAsmParser::parseInstruction(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + MCAsmLexer &Lexer = getLexer(); + while (true) { + AsmToken const &Token = Parser.getTok(); + switch (Token.getKind()) { + case AsmToken::EndOfStatement: { + Lexer.Lex(); + return false; + } + case AsmToken::LCurly: { + if (!Operands.empty()) + return true; + Operands.push_back( + HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); + Lexer.Lex(); + return false; + } + case AsmToken::RCurly: { + if (Operands.empty()) { + Operands.push_back( + HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); + Lexer.Lex(); + } + return false; + } + case AsmToken::Comma: { + Lexer.Lex(); + continue; + } + case AsmToken::EqualEqual: + case AsmToken::ExclaimEqual: + case AsmToken::GreaterEqual: + case AsmToken::GreaterGreater: + case AsmToken::LessEqual: + case AsmToken::LessLess: { + Operands.push_back(HexagonOperand::CreateToken( + Token.getString().substr(0, 1), Token.getLoc())); + Operands.push_back(HexagonOperand::CreateToken( + Token.getString().substr(1, 1), Token.getLoc())); + Lexer.Lex(); + continue; + } + case AsmToken::Hash: { + bool MustNotExtend = false; + bool ImplicitExpression = implicitExpressionLocation(Operands); + std::unique_ptr Expr = HexagonOperand::CreateImm( + nullptr, Lexer.getLoc(), Lexer.getLoc()); + if (!ImplicitExpression) + Operands.push_back( + HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); + Lexer.Lex(); + bool MustExtend = false; + bool HiOnly = false; + bool LoOnly = false; + if (Lexer.is(AsmToken::Hash)) { + Lexer.Lex(); + MustExtend = true; + } else if (ImplicitExpression) + MustNotExtend = true; + AsmToken const &Token = Parser.getTok(); + if (Token.is(AsmToken::Identifier)) { + StringRef String = Token.getString(); + AsmToken IDToken = Token; + if (String.lower() == "hi") { + HiOnly = true; + } else if (String.lower() == "lo") { + LoOnly = true; + } + if (HiOnly || LoOnly) { + AsmToken LParen = Lexer.peekTok(); + if (!LParen.is(AsmToken::LParen)) { + HiOnly = false; + LoOnly = false; + } else { + Lexer.Lex(); + } + } + } + if (parseExpression(Expr->Imm.Val)) + return true; + int64_t Value; + MCContext &Context = Parser.getContext(); + assert(Expr->Imm.Val != nullptr); + if (Expr->Imm.Val->evaluateAsAbsolute(Value)) { + if (HiOnly) + Expr->Imm.Val = MCBinaryExpr::createLShr( + Expr->Imm.Val, MCConstantExpr::create(16, Context), Context); + if (HiOnly || LoOnly) + Expr->Imm.Val = MCBinaryExpr::createAnd( + Expr->Imm.Val, MCConstantExpr::create(0xffff, Context), Context); + } + if (MustNotExtend) + Expr->Imm.Val = HexagonNoExtendOperand::Create(Expr->Imm.Val, Context); + Expr->Imm.MustExtend = MustExtend; + Operands.push_back(std::move(Expr)); + continue; + } + default: + break; + } + if (parseExpressionOrOperand(Operands)) + return true; + } +} + +bool HexagonAsmParser::ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, + AsmToken ID, + OperandVector &Operands) { + getLexer().UnLex(ID); + return parseInstruction(Operands); +} + +namespace { +MCInst makeCombineInst(int opCode, MCOperand &Rdd, + MCOperand &MO1, MCOperand &MO2) { + MCInst TmpInst; + TmpInst.setOpcode(opCode); + TmpInst.addOperand(Rdd); + TmpInst.addOperand(MO1); + TmpInst.addOperand(MO2); + + return TmpInst; +} +} + +// Define this matcher function after the auto-generated include so we +// have the match class enum definitions. +unsigned HexagonAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, + unsigned Kind) { + HexagonOperand *Op = static_cast(&AsmOp); + + switch (Kind) { + case MCK_0: { + int64_t Value; + return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == 0 + ? Match_Success + : Match_InvalidOperand; + } + case MCK_1: { + int64_t Value; + return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == 1 + ? Match_Success + : Match_InvalidOperand; + } + case MCK__MINUS_1: { + int64_t Value; + return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == -1 + ? Match_Success + : Match_InvalidOperand; + } + } + if (Op->Kind == HexagonOperand::Token && Kind != InvalidMatchClass) { + StringRef myStringRef = StringRef(Op->Tok.Data, Op->Tok.Length); + if (matchTokenString(myStringRef.lower()) == (MatchClassKind)Kind) + return Match_Success; + if (matchTokenString(myStringRef.upper()) == (MatchClassKind)Kind) + return Match_Success; + } + + DEBUG(dbgs() << "Unmatched Operand:"); + DEBUG(Op->dump()); + DEBUG(dbgs() << "\n"); + + return Match_InvalidOperand; +} + +void HexagonAsmParser::OutOfRange(SMLoc IDLoc, long long Val, long long Max) { + std::string errStr; + raw_string_ostream ES(errStr); + ES << "value " << Val << "(" << format_hex(Val, 0) << ") out of range: "; + if (Max >= 0) + ES << "0-" << Max; + else + ES << Max << "-" << (-Max - 1); + Error(IDLoc, ES.str().c_str()); +} + +int HexagonAsmParser::processInstruction(MCInst &Inst, + OperandVector const &Operands, + SMLoc IDLoc, bool &MustExtend) { + MCContext &Context = getParser().getContext(); + const MCRegisterInfo *RI = getContext().getRegisterInfo(); + std::string r = "r"; + std::string v = "v"; + std::string Colon = ":"; + + bool is32bit = false; // used to distinguish between CONST32 and CONST64 + switch (Inst.getOpcode()) { + default: + break; + + case Hexagon::M4_mpyrr_addr: + case Hexagon::S4_addi_asl_ri: + case Hexagon::S4_addi_lsr_ri: + case Hexagon::S4_andi_asl_ri: + case Hexagon::S4_andi_lsr_ri: + case Hexagon::S4_ori_asl_ri: + case Hexagon::S4_ori_lsr_ri: + case Hexagon::S4_or_andix: + case Hexagon::S4_subi_asl_ri: + case Hexagon::S4_subi_lsr_ri: { + MCOperand &Ry = Inst.getOperand(0); + MCOperand &src = Inst.getOperand(2); + if (RI->getEncodingValue(Ry.getReg()) != RI->getEncodingValue(src.getReg())) + return Match_InvalidOperand; + break; + } + + case Hexagon::C2_cmpgei: { + MCOperand &MO = Inst.getOperand(2); + MO.setExpr(MCBinaryExpr::createSub( + MO.getExpr(), MCConstantExpr::create(1, Context), Context)); + Inst.setOpcode(Hexagon::C2_cmpgti); + break; + } + + case Hexagon::C2_cmpgeui: { + MCOperand &MO = Inst.getOperand(2); + int64_t Value; + bool Success = MO.getExpr()->evaluateAsAbsolute(Value); + (void)Success; + assert(Success && "Assured by matcher"); + if (Value == 0) { + MCInst TmpInst; + MCOperand &Pd = Inst.getOperand(0); + MCOperand &Rt = Inst.getOperand(1); + TmpInst.setOpcode(Hexagon::C2_cmpeq); + TmpInst.addOperand(Pd); + TmpInst.addOperand(Rt); + TmpInst.addOperand(Rt); + Inst = TmpInst; + } else { + MO.setExpr(MCBinaryExpr::createSub( + MO.getExpr(), MCConstantExpr::create(1, Context), Context)); + Inst.setOpcode(Hexagon::C2_cmpgtui); + } + break; + } + case Hexagon::J2_loop1r: + case Hexagon::J2_loop1i: + case Hexagon::J2_loop0r: + case Hexagon::J2_loop0i: { + MCOperand &MO = Inst.getOperand(0); + // Loop has different opcodes for extended vs not extended, but we should + // not use the other opcode as it is a legacy artifact of TD files. + int64_t Value; + if (MO.getExpr()->evaluateAsAbsolute(Value)) { + // if the operand can fit within a 7:2 field + if (Value < (1 << 8) && Value >= -(1 << 8)) { + SMLoc myLoc = Operands[2]->getStartLoc(); + // # is left in startLoc in the case of ## + // If '##' found then force extension. + if (*myLoc.getPointer() == '#') { + MustExtend = true; + break; + } + } else { + // If immediate and out of 7:2 range. + MustExtend = true; + } + } + break; + } + + // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)" + case Hexagon::A2_tfrp: { + MCOperand &MO = Inst.getOperand(1); + unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); + std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + StringRef Reg1(R1); + MO.setReg(MatchRegisterName(Reg1)); + // Add a new operand for the second register in the pair. + std::string R2 = r + llvm::utostr_32(RegPairNum); + StringRef Reg2(R2); + Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst.setOpcode(Hexagon::A2_combinew); + break; + } + + case Hexagon::A2_tfrpt: + case Hexagon::A2_tfrpf: { + MCOperand &MO = Inst.getOperand(2); + unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); + std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + StringRef Reg1(R1); + MO.setReg(MatchRegisterName(Reg1)); + // Add a new operand for the second register in the pair. + std::string R2 = r + llvm::utostr_32(RegPairNum); + StringRef Reg2(R2); + Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt) + ? Hexagon::C2_ccombinewt + : Hexagon::C2_ccombinewf); + break; + } + case Hexagon::A2_tfrptnew: + case Hexagon::A2_tfrpfnew: { + MCOperand &MO = Inst.getOperand(2); + unsigned int RegPairNum = RI->getEncodingValue(MO.getReg()); + std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + StringRef Reg1(R1); + MO.setReg(MatchRegisterName(Reg1)); + // Add a new operand for the second register in the pair. + std::string R2 = r + llvm::utostr_32(RegPairNum); + StringRef Reg2(R2); + Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew) + ? Hexagon::C2_ccombinewnewt + : Hexagon::C2_ccombinewnewf); + break; + } + + // Translate a "$Rx = CONST32(#imm)" to "$Rx = memw(gp+#LABEL) " + case Hexagon::CONST32: + case Hexagon::CONST32_Float_Real: + case Hexagon::CONST32_Int_Real: + case Hexagon::FCONST32_nsdata: + is32bit = true; + // Translate a "$Rx:y = CONST64(#imm)" to "$Rx:y = memd(gp+#LABEL) " + case Hexagon::CONST64_Float_Real: + case Hexagon::CONST64_Int_Real: + + // FIXME: need better way to detect AsmStreamer (upstream removed getKind()) + if (!Parser.getStreamer().hasRawTextSupport()) { + MCELFStreamer *MES = static_cast(&Parser.getStreamer()); + MCOperand &MO_1 = Inst.getOperand(1); + MCOperand &MO_0 = Inst.getOperand(0); + + // push section onto section stack + MES->PushSection(); + + std::string myCharStr; + MCSectionELF *mySection; + + // check if this as an immediate or a symbol + int64_t Value; + bool Absolute = MO_1.getExpr()->evaluateAsAbsolute(Value); + if (Absolute) { + // Create a new section - one for each constant + // Some or all of the zeros are replaced with the given immediate. + if (is32bit) { + std::string myImmStr = utohexstr(static_cast(Value)); + myCharStr = StringRef(".gnu.linkonce.l4.CONST_00000000") + .drop_back(myImmStr.size()) + .str() + + myImmStr; + } else { + std::string myImmStr = utohexstr(Value); + myCharStr = StringRef(".gnu.linkonce.l8.CONST_0000000000000000") + .drop_back(myImmStr.size()) + .str() + + myImmStr; + } + + mySection = getContext().getELFSection(myCharStr, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE); + } else if (MO_1.isExpr()) { + // .lita - for expressions + myCharStr = ".lita"; + mySection = getContext().getELFSection(myCharStr, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE); + } else + llvm_unreachable("unexpected type of machine operand!"); + + MES->SwitchSection(mySection); + unsigned byteSize = is32bit ? 4 : 8; + getStreamer().EmitCodeAlignment(byteSize, byteSize); + + MCSymbol *Sym; + + // for symbols, get rid of prepended ".gnu.linkonce.lx." + + // emit symbol if needed + if (Absolute) { + Sym = getContext().getOrCreateSymbol(StringRef(myCharStr.c_str() + 16)); + if (Sym->isUndefined()) { + getStreamer().EmitLabel(Sym); + getStreamer().EmitSymbolAttribute(Sym, MCSA_Global); + getStreamer().EmitIntValue(Value, byteSize); + } + } else if (MO_1.isExpr()) { + const char *StringStart = 0; + const char *StringEnd = 0; + if (*Operands[4]->getStartLoc().getPointer() == '#') { + StringStart = Operands[5]->getStartLoc().getPointer(); + StringEnd = Operands[6]->getStartLoc().getPointer(); + } else { // no pound + StringStart = Operands[4]->getStartLoc().getPointer(); + StringEnd = Operands[5]->getStartLoc().getPointer(); + } + + unsigned size = StringEnd - StringStart; + std::string DotConst = ".CONST_"; + Sym = getContext().getOrCreateSymbol(DotConst + + StringRef(StringStart, size)); + + if (Sym->isUndefined()) { + // case where symbol is not yet defined: emit symbol + getStreamer().EmitLabel(Sym); + getStreamer().EmitSymbolAttribute(Sym, MCSA_Local); + getStreamer().EmitValue(MO_1.getExpr(), 4); + } + } else + llvm_unreachable("unexpected type of machine operand!"); + + MES->PopSection(); + + if (Sym) { + MCInst TmpInst; + if (is32bit) // 32 bit + TmpInst.setOpcode(Hexagon::L2_loadrigp); + else // 64 bit + TmpInst.setOpcode(Hexagon::L2_loadrdgp); + + TmpInst.addOperand(MO_0); + TmpInst.addOperand( + MCOperand::createExpr(MCSymbolRefExpr::create(Sym, getContext()))); + Inst = TmpInst; + } + } + break; + + // Translate a "$Rdd = #-imm" to "$Rdd = combine(#[-1,0], #-imm)" + case Hexagon::A2_tfrpi: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &MO = Inst.getOperand(1); + int64_t Value; + int sVal = (MO.getExpr()->evaluateAsAbsolute(Value) && Value < 0) ? -1 : 0; + MCOperand imm(MCOperand::createExpr(MCConstantExpr::create(sVal, Context))); + Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, imm, MO); + break; + } + + // Translate a "$Rdd = [#]#imm" to "$Rdd = combine(#, [#]#imm)" + case Hexagon::TFRI64_V4: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &MO = Inst.getOperand(1); + int64_t Value; + if (MO.getExpr()->evaluateAsAbsolute(Value)) { + unsigned long long u64 = Value; + signed int s8 = (u64 >> 32) & 0xFFFFFFFF; + if (s8 < -128 || s8 > 127) + OutOfRange(IDLoc, s8, -128); + MCOperand imm(MCOperand::createExpr( + MCConstantExpr::create(s8, Context))); // upper 32 + MCOperand imm2(MCOperand::createExpr( + MCConstantExpr::create(u64 & 0xFFFFFFFF, Context))); // lower 32 + Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, imm2); + } else { + MCOperand imm(MCOperand::createExpr( + MCConstantExpr::create(0, Context))); // upper 32 + Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, MO); + } + break; + } + + // Handle $Rdd = combine(##imm, #imm)" + case Hexagon::TFRI64_V2_ext: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &MO1 = Inst.getOperand(1); + MCOperand &MO2 = Inst.getOperand(2); + int64_t Value; + if (MO2.getExpr()->evaluateAsAbsolute(Value)) { + int s8 = Value; + if (s8 < -128 || s8 > 127) + OutOfRange(IDLoc, s8, -128); + } + Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, MO1, MO2); + break; + } + + // Handle $Rdd = combine(#imm, ##imm)" + case Hexagon::A4_combineii: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &MO1 = Inst.getOperand(1); + int64_t Value; + if (MO1.getExpr()->evaluateAsAbsolute(Value)) { + int s8 = Value; + if (s8 < -128 || s8 > 127) + OutOfRange(IDLoc, s8, -128); + } + MCOperand &MO2 = Inst.getOperand(2); + Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, MO1, MO2); + break; + } + + case Hexagon::S2_tableidxb_goodsyntax: { + Inst.setOpcode(Hexagon::S2_tableidxb); + break; + } + + case Hexagon::S2_tableidxh_goodsyntax: { + MCInst TmpInst; + MCOperand &Rx = Inst.getOperand(0); + MCOperand &_dst_ = Inst.getOperand(1); + MCOperand &Rs = Inst.getOperand(2); + MCOperand &Imm4 = Inst.getOperand(3); + MCOperand &Imm6 = Inst.getOperand(4); + Imm6.setExpr(MCBinaryExpr::createSub( + Imm6.getExpr(), MCConstantExpr::create(1, Context), Context)); + TmpInst.setOpcode(Hexagon::S2_tableidxh); + TmpInst.addOperand(Rx); + TmpInst.addOperand(_dst_); + TmpInst.addOperand(Rs); + TmpInst.addOperand(Imm4); + TmpInst.addOperand(Imm6); + Inst = TmpInst; + break; + } + + case Hexagon::S2_tableidxw_goodsyntax: { + MCInst TmpInst; + MCOperand &Rx = Inst.getOperand(0); + MCOperand &_dst_ = Inst.getOperand(1); + MCOperand &Rs = Inst.getOperand(2); + MCOperand &Imm4 = Inst.getOperand(3); + MCOperand &Imm6 = Inst.getOperand(4); + Imm6.setExpr(MCBinaryExpr::createSub( + Imm6.getExpr(), MCConstantExpr::create(2, Context), Context)); + TmpInst.setOpcode(Hexagon::S2_tableidxw); + TmpInst.addOperand(Rx); + TmpInst.addOperand(_dst_); + TmpInst.addOperand(Rs); + TmpInst.addOperand(Imm4); + TmpInst.addOperand(Imm6); + Inst = TmpInst; + break; + } + + case Hexagon::S2_tableidxd_goodsyntax: { + MCInst TmpInst; + MCOperand &Rx = Inst.getOperand(0); + MCOperand &_dst_ = Inst.getOperand(1); + MCOperand &Rs = Inst.getOperand(2); + MCOperand &Imm4 = Inst.getOperand(3); + MCOperand &Imm6 = Inst.getOperand(4); + Imm6.setExpr(MCBinaryExpr::createSub( + Imm6.getExpr(), MCConstantExpr::create(3, Context), Context)); + TmpInst.setOpcode(Hexagon::S2_tableidxd); + TmpInst.addOperand(Rx); + TmpInst.addOperand(_dst_); + TmpInst.addOperand(Rs); + TmpInst.addOperand(Imm4); + TmpInst.addOperand(Imm6); + Inst = TmpInst; + break; + } + + case Hexagon::M2_mpyui: { + Inst.setOpcode(Hexagon::M2_mpyi); + break; + } + case Hexagon::M2_mpysmi: { + MCInst TmpInst; + MCOperand &Rd = Inst.getOperand(0); + MCOperand &Rs = Inst.getOperand(1); + MCOperand &Imm = Inst.getOperand(2); + int64_t Value; + bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value); + assert(Absolute); + (void)Absolute; + if (!MustExtend) { + if (Value < 0 && Value > -256) { + Imm.setExpr(MCConstantExpr::create(Value * -1, Context)); + TmpInst.setOpcode(Hexagon::M2_mpysin); + } else if (Value < 256 && Value >= 0) + TmpInst.setOpcode(Hexagon::M2_mpysip); + else + return Match_InvalidOperand; + } else { + if (Value >= 0) + TmpInst.setOpcode(Hexagon::M2_mpysip); + else + return Match_InvalidOperand; + } + TmpInst.addOperand(Rd); + TmpInst.addOperand(Rs); + TmpInst.addOperand(Imm); + Inst = TmpInst; + break; + } + + case Hexagon::S2_asr_i_r_rnd_goodsyntax: { + MCOperand &Imm = Inst.getOperand(2); + MCInst TmpInst; + int64_t Value; + bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value); + assert(Absolute); + (void)Absolute; + if (Value == 0) { // convert to $Rd = $Rs + TmpInst.setOpcode(Hexagon::A2_tfr); + MCOperand &Rd = Inst.getOperand(0); + MCOperand &Rs = Inst.getOperand(1); + TmpInst.addOperand(Rd); + TmpInst.addOperand(Rs); + } else { + Imm.setExpr(MCBinaryExpr::createSub( + Imm.getExpr(), MCConstantExpr::create(1, Context), Context)); + TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd); + MCOperand &Rd = Inst.getOperand(0); + MCOperand &Rs = Inst.getOperand(1); + TmpInst.addOperand(Rd); + TmpInst.addOperand(Rs); + TmpInst.addOperand(Imm); + } + Inst = TmpInst; + break; + } + + case Hexagon::S2_asr_i_p_rnd_goodsyntax: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &Rss = Inst.getOperand(1); + MCOperand &Imm = Inst.getOperand(2); + int64_t Value; + bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value); + assert(Absolute); + (void)Absolute; + if (Value == 0) { // convert to $Rdd = combine ($Rs[0], $Rs[1]) + MCInst TmpInst; + unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg()); + std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + StringRef Reg1(R1); + Rss.setReg(MatchRegisterName(Reg1)); + // Add a new operand for the second register in the pair. + std::string R2 = r + llvm::utostr_32(RegPairNum); + StringRef Reg2(R2); + TmpInst.setOpcode(Hexagon::A2_combinew); + TmpInst.addOperand(Rdd); + TmpInst.addOperand(Rss); + TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst = TmpInst; + } else { + Imm.setExpr(MCBinaryExpr::createSub( + Imm.getExpr(), MCConstantExpr::create(1, Context), Context)); + Inst.setOpcode(Hexagon::S2_asr_i_p_rnd); + } + break; + } + + case Hexagon::A4_boundscheck: { + MCOperand &Rs = Inst.getOperand(1); + unsigned int RegNum = RI->getEncodingValue(Rs.getReg()); + if (RegNum & 1) { // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2 + Inst.setOpcode(Hexagon::A4_boundscheck_hi); + std::string Name = + r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + StringRef RegPair = Name; + Rs.setReg(MatchRegisterName(RegPair)); + } else { // raw:lo + Inst.setOpcode(Hexagon::A4_boundscheck_lo); + std::string Name = + r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + StringRef RegPair = Name; + Rs.setReg(MatchRegisterName(RegPair)); + } + break; + } + + case Hexagon::A2_addsp: { + MCOperand &Rs = Inst.getOperand(1); + unsigned int RegNum = RI->getEncodingValue(Rs.getReg()); + if (RegNum & 1) { // Odd mapped to raw:hi + Inst.setOpcode(Hexagon::A2_addsph); + std::string Name = + r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + StringRef RegPair = Name; + Rs.setReg(MatchRegisterName(RegPair)); + } else { // Even mapped raw:lo + Inst.setOpcode(Hexagon::A2_addspl); + std::string Name = + r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + StringRef RegPair = Name; + Rs.setReg(MatchRegisterName(RegPair)); + } + break; + } + + case Hexagon::M2_vrcmpys_s1: { + MCOperand &Rt = Inst.getOperand(2); + unsigned int RegNum = RI->getEncodingValue(Rt.getReg()); + if (RegNum & 1) { // Odd mapped to sat:raw:hi + Inst.setOpcode(Hexagon::M2_vrcmpys_s1_h); + std::string Name = + r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } else { // Even mapped sat:raw:lo + Inst.setOpcode(Hexagon::M2_vrcmpys_s1_l); + std::string Name = + r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } + break; + } + + case Hexagon::M2_vrcmpys_acc_s1: { + MCInst TmpInst; + MCOperand &Rxx = Inst.getOperand(0); + MCOperand &Rss = Inst.getOperand(2); + MCOperand &Rt = Inst.getOperand(3); + unsigned int RegNum = RI->getEncodingValue(Rt.getReg()); + if (RegNum & 1) { // Odd mapped to sat:raw:hi + TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h); + std::string Name = + r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } else { // Even mapped sat:raw:lo + TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l); + std::string Name = + r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } + // Registers are in different positions + TmpInst.addOperand(Rxx); + TmpInst.addOperand(Rxx); + TmpInst.addOperand(Rss); + TmpInst.addOperand(Rt); + Inst = TmpInst; + break; + } + + case Hexagon::M2_vrcmpys_s1rp: { + MCOperand &Rt = Inst.getOperand(2); + unsigned int RegNum = RI->getEncodingValue(Rt.getReg()); + if (RegNum & 1) { // Odd mapped to rnd:sat:raw:hi + Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h); + std::string Name = + r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } else { // Even mapped rnd:sat:raw:lo + Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l); + std::string Name = + r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum); + StringRef RegPair = Name; + Rt.setReg(MatchRegisterName(RegPair)); + } + break; + } + + case Hexagon::S5_asrhub_rnd_sat_goodsyntax: { + MCOperand &Imm = Inst.getOperand(2); + int64_t Value; + bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value); + assert(Absolute); + (void)Absolute; + if (Value == 0) + Inst.setOpcode(Hexagon::S2_vsathub); + else { + Imm.setExpr(MCBinaryExpr::createSub( + Imm.getExpr(), MCConstantExpr::create(1, Context), Context)); + Inst.setOpcode(Hexagon::S5_asrhub_rnd_sat); + } + break; + } + + case Hexagon::S5_vasrhrnd_goodsyntax: { + MCOperand &Rdd = Inst.getOperand(0); + MCOperand &Rss = Inst.getOperand(1); + MCOperand &Imm = Inst.getOperand(2); + int64_t Value; + bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value); + assert(Absolute); + (void)Absolute; + if (Value == 0) { + MCInst TmpInst; + unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg()); + std::string R1 = r + llvm::utostr_32(RegPairNum + 1); + StringRef Reg1(R1); + Rss.setReg(MatchRegisterName(Reg1)); + // Add a new operand for the second register in the pair. + std::string R2 = r + llvm::utostr_32(RegPairNum); + StringRef Reg2(R2); + TmpInst.setOpcode(Hexagon::A2_combinew); + TmpInst.addOperand(Rdd); + TmpInst.addOperand(Rss); + TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2))); + Inst = TmpInst; + } else { + Imm.setExpr(MCBinaryExpr::createSub( + Imm.getExpr(), MCConstantExpr::create(1, Context), Context)); + Inst.setOpcode(Hexagon::S5_vasrhrnd); + } + break; + } + + case Hexagon::A2_not: { + MCInst TmpInst; + MCOperand &Rd = Inst.getOperand(0); + MCOperand &Rs = Inst.getOperand(1); + TmpInst.setOpcode(Hexagon::A2_subri); + TmpInst.addOperand(Rd); + TmpInst.addOperand( + MCOperand::createExpr(MCConstantExpr::create(-1, Context))); + TmpInst.addOperand(Rs); + Inst = TmpInst; + break; + } + } // switch + + return Match_Success; +} diff --git a/lib/Target/Hexagon/AsmParser/LLVMBuild.txt b/lib/Target/Hexagon/AsmParser/LLVMBuild.txt new file mode 100644 index 000000000000..fdd875b61906 --- /dev/null +++ b/lib/Target/Hexagon/AsmParser/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/Hexagon/AsmParser/LLVMBuild.txt --------------*- Conf -*-===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = HexagonAsmParser +parent = Hexagon +required_libraries = MC MCParser Support HexagonDesc HexagonInfo +add_to_library_groups = Hexagon diff --git a/lib/Target/Hexagon/AsmParser/Makefile b/lib/Target/Hexagon/AsmParser/Makefile new file mode 100644 index 000000000000..0aa0b4140c3e --- /dev/null +++ b/lib/Target/Hexagon/AsmParser/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/Hexagon/AsmParser/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMHexagonAsmParser + +# Hack: we need to include 'main' Hexagon target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp index cb7e633fb82f..ea96eb0ee10a 100644 --- a/lib/Target/Hexagon/BitTracker.cpp +++ b/lib/Target/Hexagon/BitTracker.cpp @@ -868,7 +868,7 @@ void BT::visitNonBranch(const MachineInstr *MI) { continue; bool Changed = false; - if (!Eval || !ResMap.has(RD.Reg)) { + if (!Eval || ResMap.count(RD.Reg) == 0) { // Set to "ref" (aka "bottom"). uint16_t DefBW = ME.getRegBitWidth(RD); RegisterCell RefC = RegisterCell::self(RD.Reg, DefBW); @@ -951,11 +951,11 @@ void BT::visitBranchesFrom(const MachineInstr *BI) { // be processed. for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) { const MachineBasicBlock *SB = *I; - if (SB->isLandingPad()) + if (SB->isEHPad()) Targets.insert(SB); } if (FallsThrough) { - MachineFunction::const_iterator BIt = &B; + MachineFunction::const_iterator BIt = B.getIterator(); MachineFunction::const_iterator Next = std::next(BIt); if (Next != MF.end()) Targets.insert(&*Next); @@ -1005,7 +1005,7 @@ void BT::put(RegisterRef RR, const RegisterCell &RC) { // Replace all references to bits from OldRR with the corresponding bits // in NewRR. void BT::subst(RegisterRef OldRR, RegisterRef NewRR) { - assert(Map.has(OldRR.Reg) && "OldRR not present in map"); + assert(Map.count(OldRR.Reg) > 0 && "OldRR not present in map"); BitMask OM = ME.mask(OldRR.Reg, OldRR.Sub); BitMask NM = ME.mask(NewRR.Reg, NewRR.Sub); uint16_t OMB = OM.first(), OME = OM.last(); @@ -1104,9 +1104,9 @@ void BT::run() { } // If block end has been reached, add the fall-through edge to the queue. if (It == End) { - MachineFunction::const_iterator BIt = &B; + MachineFunction::const_iterator BIt = B.getIterator(); MachineFunction::const_iterator Next = std::next(BIt); - if (Next != MF.end()) { + if (Next != MF.end() && B.isSuccessor(&*Next)) { int ThisN = B.getNumber(); int NextN = Next->getNumber(); FlowQ.push(CFGEdge(ThisN, NextN)); diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h index ed002a794d66..959c8318fd60 100644 --- a/lib/Target/Hexagon/BitTracker.h +++ b/lib/Target/Hexagon/BitTracker.h @@ -36,9 +36,7 @@ struct BitTracker { typedef SetVector BranchTargetList; - struct CellMapType : public std::map { - bool has(unsigned Reg) const; - }; + typedef std::map CellMapType; BitTracker(const MachineEvaluator &E, MachineFunction &F); ~BitTracker(); @@ -79,7 +77,6 @@ private: // Abstraction of a reference to bit at position Pos from a register Reg. struct BitTracker::BitRef { BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {} - BitRef(const BitRef &BR) : Reg(BR.Reg), Pos(BR.Pos) {} bool operator== (const BitRef &BR) const { // If Reg is 0, disregard Pos. return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos); @@ -146,7 +143,6 @@ struct BitTracker::BitValue { BitValue(ValueType T = Top) : Type(T) {} BitValue(bool B) : Type(B ? One : Zero) {} - BitValue(const BitValue &V) : Type(V.Type), RefI(V.RefI) {} BitValue(unsigned Reg, uint16_t Pos) : Type(Ref), RefI(Reg, Pos) {} bool operator== (const BitValue &V) const { @@ -279,11 +275,6 @@ struct BitTracker::RegisterCell { return !operator==(RC); } - const RegisterCell &operator=(const RegisterCell &RC) { - Bits = RC.Bits; - return *this; - } - // Generate a "ref" cell for the corresponding register. In the resulting // cell each bit will be described as being the same as the corresponding // bit in register Reg (i.e. the cell is "defined" by register Reg). @@ -344,11 +335,6 @@ BitTracker::RegisterCell::ref(const RegisterCell &C) { return RC; } - -inline bool BitTracker::CellMapType::has(unsigned Reg) const { - return find(Reg) != end(); -} - // A class to evaluate target's instructions and update the cell maps. // This is used internally by the bit tracker. A target that wants to // utilize this should implement the evaluation functions (noted below) diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index 7ab2f0ba01df..181e4e3aa85d 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -1,5 +1,6 @@ set(LLVM_TARGET_DEFINITIONS Hexagon.td) +tablegen(LLVM HexagonGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM HexagonGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM HexagonGenCallingConv.inc -gen-callingconv) tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel) @@ -14,16 +15,19 @@ add_public_tablegen_target(HexagonCommonTableGen) add_llvm_target(HexagonCodeGen BitTracker.cpp HexagonAsmPrinter.cpp + HexagonBitSimplify.cpp HexagonBitTracker.cpp HexagonCFGOptimizer.cpp HexagonCommonGEP.cpp HexagonCopyToCombine.cpp + HexagonEarlyIfConv.cpp HexagonExpandCondsets.cpp HexagonExpandPredSpillCode.cpp HexagonFixupHwLoops.cpp HexagonFrameLowering.cpp HexagonGenExtract.cpp HexagonGenInsert.cpp + HexagonGenMux.cpp HexagonGenPredicate.cpp HexagonHardwareLoops.cpp HexagonInstrInfo.cpp @@ -33,17 +37,21 @@ add_llvm_target(HexagonCodeGen HexagonMachineScheduler.cpp HexagonMCInstLower.cpp HexagonNewValueJump.cpp + HexagonOptimizeSZextends.cpp HexagonPeephole.cpp HexagonRegisterInfo.cpp - HexagonRemoveSZExtArgs.cpp HexagonSelectionDAGInfo.cpp HexagonSplitConst32AndConst64.cpp + HexagonSplitDouble.cpp + HexagonStoreWidening.cpp HexagonSubtarget.cpp HexagonTargetMachine.cpp HexagonTargetObjectFile.cpp + HexagonTargetTransformInfo.cpp HexagonVLIWPacketizer.cpp ) +add_subdirectory(AsmParser) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) add_subdirectory(Disassembler) diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 9cc1e944d359..4a9c3413cb29 100644 --- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -7,42 +7,45 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "hexagon-disassembler" + #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" -#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonMCChecker.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" - -#include "llvm/MC/MCContext.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonInstPrinter.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/MemoryObject.h" #include "llvm/Support/raw_ostream.h" -#include +#include "llvm/Support/TargetRegistry.h" #include using namespace llvm; using namespace Hexagon; -#define DEBUG_TYPE "hexagon-disassembler" - -// Pull DecodeStatus and its enum values into the global namespace. -typedef llvm::MCDisassembler::DecodeStatus DecodeStatus; +typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { /// \brief Hexagon disassembler for all Hexagon platforms. class HexagonDisassembler : public MCDisassembler { public: + std::unique_ptr const MCII; std::unique_ptr CurrentBundle; - HexagonDisassembler(MCSubtargetInfo const &STI, MCContext &Ctx) - : MCDisassembler(STI, Ctx), CurrentBundle(new MCInst *) {} + HexagonDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, + MCInstrInfo const *MCII) + : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *) {} DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB, ArrayRef Bytes, uint64_t Address, @@ -52,23 +55,57 @@ public: ArrayRef Bytes, uint64_t Address, raw_ostream &VStream, raw_ostream &CStream) const override; + + void adjustExtendedInstructions(MCInst &MCI, MCInst const &MCB) const; + void addSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) const; }; } -static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, +// Forward declare these because the auto-generated code will reference them. +// Definitions are further down. + +static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, - void const *Decoder); + const void *Decoder); + +static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn); +static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn, + void const *Decoder); static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op, raw_ostream &os); -static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst); +static unsigned getRegFromSubinstEncoding(unsigned encoded_reg); + +static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp, + uint64_t Address, const void *Decoder); static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, @@ -95,129 +132,19 @@ static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); - -static const uint16_t IntRegDecoderTable[] = { - Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, - Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9, - Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14, - Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, - Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24, - Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29, - Hexagon::R30, Hexagon::R31}; - -static const uint16_t PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1, - Hexagon::P2, Hexagon::P3}; - -static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo, - const uint16_t Table[], size_t Size) { - if (RegNo < Size) { - Inst.addOperand(MCOperand::createReg(Table[RegNo])); - return MCDisassembler::Success; - } else - return MCDisassembler::Fail; -} - -static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - void const *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - unsigned Register = IntRegDecoderTable[RegNo]; - Inst.addOperand(MCOperand::createReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - const void *Decoder) { - static const uint16_t CtrlRegDecoderTable[] = { - Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1, - Hexagon::P3_0, Hexagon::NoRegister, Hexagon::C6, Hexagon::C7, - Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP, - Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPCH}; - - if (RegNo >= sizeof(CtrlRegDecoderTable) / sizeof(CtrlRegDecoderTable[0])) - return MCDisassembler::Fail; - - if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister) - return MCDisassembler::Fail; - - unsigned Register = CtrlRegDecoderTable[RegNo]; - Inst.addOperand(MCOperand::createReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - void const *Decoder) { - static const uint16_t CtrlReg64DecoderTable[] = { - Hexagon::C1_0, Hexagon::NoRegister, Hexagon::C3_2, - Hexagon::NoRegister, Hexagon::NoRegister, Hexagon::NoRegister, - Hexagon::C7_6, Hexagon::NoRegister, Hexagon::C9_8, - Hexagon::NoRegister, Hexagon::C11_10, Hexagon::NoRegister, - Hexagon::CS, Hexagon::NoRegister, Hexagon::UPC, - Hexagon::NoRegister}; - - if (RegNo >= sizeof(CtrlReg64DecoderTable) / sizeof(CtrlReg64DecoderTable[0])) - return MCDisassembler::Fail; - - if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister) - return MCDisassembler::Fail; - - unsigned Register = CtrlReg64DecoderTable[RegNo]; - Inst.addOperand(MCOperand::createReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - const void *Decoder) { - unsigned Register = 0; - switch (RegNo) { - case 0: - Register = Hexagon::M0; - break; - case 1: - Register = Hexagon::M1; - break; - default: - return MCDisassembler::Fail; - } - Inst.addOperand(MCOperand::createReg(Register)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - const void *Decoder) { - static const uint16_t DoubleRegDecoderTable[] = { - Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3, - Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7, - Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11, - Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15}; - - return (DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable, - sizeof(DoubleRegDecoderTable))); -} - -static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t /*Address*/, - void const *Decoder) { - if (RegNo > 3) - return MCDisassembler::Fail; - - unsigned Register = PredRegDecoderTable[RegNo]; - Inst.addOperand(MCOperand::createReg(Register)); - return MCDisassembler::Success; -} +static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); +static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder); #include "HexagonGenDisassemblerTables.inc" -static MCDisassembler *createHexagonDisassembler(Target const &T, - MCSubtargetInfo const &STI, +static MCDisassembler *createHexagonDisassembler(const Target &T, + const MCSubtargetInfo &STI, MCContext &Ctx) { - return new HexagonDisassembler(STI, Ctx); + return new HexagonDisassembler(STI, Ctx, T.createMCInstrInfo()); } extern "C" void LLVMInitializeHexagonDisassembler() { @@ -235,8 +162,7 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Size = 0; *CurrentBundle = &MI; - MI.setOpcode(Hexagon::BUNDLE); - MI.addOperand(MCOperand::createImm(0)); + MI = HexagonMCInstrInfo::createBundle(); while (Result == Success && Complete == false) { if (Bytes.size() < HEXAGON_INSTR_SIZE) return MCDisassembler::Fail; @@ -246,7 +172,21 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Size += HEXAGON_INSTR_SIZE; Bytes = Bytes.slice(HEXAGON_INSTR_SIZE); } - return Result; + if(Result == MCDisassembler::Fail) + return Result; + HexagonMCChecker Checker (*MCII, STI, MI, MI, *getContext().getRegisterInfo()); + if(!Checker.check()) + return MCDisassembler::Fail; + return MCDisassembler::Success; +} + +namespace { +HexagonDisassembler const &disassembler(void const *Decoder) { + return *static_cast(Decoder); +} +MCContext &contextFromDecoder(void const *Decoder) { + return disassembler(Decoder).getContext(); +} } DecodeStatus HexagonDisassembler::getSingleInstruction( @@ -255,8 +195,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction( assert(Bytes.size() >= HEXAGON_INSTR_SIZE); uint32_t Instruction = - llvm::support::endian::read(Bytes.data()); + (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0); auto BundleSize = HexagonMCInstrInfo::bundleSize(MCB); if ((Instruction & HexagonII::INST_PARSE_MASK) == @@ -360,8 +299,8 @@ DecodeStatus HexagonDisassembler::getSingleInstruction( MILow->setOpcode(opLow); MCInst *MIHigh = new (getContext()) MCInst; MIHigh->setOpcode(opHigh); - AddSubinstOperands(MILow, opLow, instLow); - AddSubinstOperands(MIHigh, opHigh, instHigh); + addSubinstOperands(MILow, opLow, instLow); + addSubinstOperands(MIHigh, opHigh, instHigh); // see ConvertToSubInst() in // lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp @@ -378,102 +317,774 @@ DecodeStatus HexagonDisassembler::getSingleInstruction( // Calling the auto-generated decoder function. Result = decodeInstruction(DecoderTable32, MI, Instruction, Address, this, STI); + + // If a, "standard" insn isn't found check special cases. + if (MCDisassembler::Success != Result || + MI.getOpcode() == Hexagon::A4_ext) { + Result = decodeImmext(MI, Instruction, this); + if (MCDisassembler::Success != Result) { + Result = decodeSpecial(MI, Instruction); + } + } else { + // If the instruction is a compound instruction, register values will + // follow the duplex model, so the register values in the MCInst are + // incorrect. If the instruction is a compound, loop through the + // operands and change registers appropriately. + if (llvm::HexagonMCInstrInfo::getType(*MCII, MI) == + HexagonII::TypeCOMPOUND) { + for (MCInst::iterator i = MI.begin(), last = MI.end(); i < last; ++i) { + if (i->isReg()) { + unsigned reg = i->getReg() - Hexagon::R0; + i->setReg(getRegFromSubinstEncoding(reg)); + } + } + } + } } + if (HexagonMCInstrInfo::isNewValue(*MCII, MI)) { + unsigned OpIndex = HexagonMCInstrInfo::getNewValueOp(*MCII, MI); + MCOperand &MCO = MI.getOperand(OpIndex); + assert(MCO.isReg() && "New value consumers must be registers"); + unsigned Register = + getContext().getRegisterInfo()->getEncodingValue(MCO.getReg()); + if ((Register & 0x6) == 0) + // HexagonPRM 10.11 Bit 1-2 == 0 is reserved + return MCDisassembler::Fail; + unsigned Lookback = (Register & 0x6) >> 1; + unsigned Offset = 1; + bool Vector = HexagonMCInstrInfo::isVector(*MCII, MI); + auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle); + auto i = Instructions.end() - 1; + for (auto n = Instructions.begin() - 1;; --i, ++Offset) { + if (i == n) + // Couldn't find producer + return MCDisassembler::Fail; + if (Vector && !HexagonMCInstrInfo::isVector(*MCII, *i->getInst())) + // Skip scalars when calculating distances for vectors + ++Lookback; + if (HexagonMCInstrInfo::isImmext(*i->getInst())) + ++Lookback; + if (Offset == Lookback) + break; + } + auto const &Inst = *i->getInst(); + bool SubregBit = (Register & 0x1) != 0; + if (SubregBit && HexagonMCInstrInfo::hasNewValue2(*MCII, Inst)) { + // If subreg bit is set we're selecting the second produced newvalue + unsigned Producer = + HexagonMCInstrInfo::getNewValueOperand2(*MCII, Inst).getReg(); + assert(Producer != Hexagon::NoRegister); + MCO.setReg(Producer); + } else if (HexagonMCInstrInfo::hasNewValue(*MCII, Inst)) { + unsigned Producer = + HexagonMCInstrInfo::getNewValueOperand(*MCII, Inst).getReg(); + if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15) + Producer = ((Producer - Hexagon::W0) << 1) + SubregBit + Hexagon::V0; + else if (SubregBit) + // Subreg bit should not be set for non-doublevector newvalue producers + return MCDisassembler::Fail; + assert(Producer != Hexagon::NoRegister); + MCO.setReg(Producer); + } else + return MCDisassembler::Fail; + } + + adjustExtendedInstructions(MI, MCB); + MCInst const *Extender = + HexagonMCInstrInfo::extenderForIndex(MCB, + HexagonMCInstrInfo::bundleSize(MCB)); + if(Extender != nullptr) { + MCInst const & Inst = HexagonMCInstrInfo::isDuplex(*MCII, MI) ? + *MI.getOperand(1).getInst() : MI; + if (!HexagonMCInstrInfo::isExtendable(*MCII, Inst) && + !HexagonMCInstrInfo::isExtended(*MCII, Inst)) + return MCDisassembler::Fail; + } return Result; } +void HexagonDisassembler::adjustExtendedInstructions(MCInst &MCI, + MCInst const &MCB) const { + if (!HexagonMCInstrInfo::hasExtenderForIndex( + MCB, HexagonMCInstrInfo::bundleSize(MCB))) { + unsigned opcode; + // This code is used by the disassembler to disambiguate between GP + // relative and absolute addressing instructions since they both have + // same encoding bits. However, an absolute addressing instruction must + // follow an immediate extender. Disassembler alwaus select absolute + // addressing instructions first and uses this code to change them into + // GP relative instruction in the absence of the corresponding immediate + // extender. + switch (MCI.getOpcode()) { + case Hexagon::S2_storerbabs: + opcode = Hexagon::S2_storerbgp; + break; + case Hexagon::S2_storerhabs: + opcode = Hexagon::S2_storerhgp; + break; + case Hexagon::S2_storerfabs: + opcode = Hexagon::S2_storerfgp; + break; + case Hexagon::S2_storeriabs: + opcode = Hexagon::S2_storerigp; + break; + case Hexagon::S2_storerbnewabs: + opcode = Hexagon::S2_storerbnewgp; + break; + case Hexagon::S2_storerhnewabs: + opcode = Hexagon::S2_storerhnewgp; + break; + case Hexagon::S2_storerinewabs: + opcode = Hexagon::S2_storerinewgp; + break; + case Hexagon::S2_storerdabs: + opcode = Hexagon::S2_storerdgp; + break; + case Hexagon::L4_loadrb_abs: + opcode = Hexagon::L2_loadrbgp; + break; + case Hexagon::L4_loadrub_abs: + opcode = Hexagon::L2_loadrubgp; + break; + case Hexagon::L4_loadrh_abs: + opcode = Hexagon::L2_loadrhgp; + break; + case Hexagon::L4_loadruh_abs: + opcode = Hexagon::L2_loadruhgp; + break; + case Hexagon::L4_loadri_abs: + opcode = Hexagon::L2_loadrigp; + break; + case Hexagon::L4_loadrd_abs: + opcode = Hexagon::L2_loadrdgp; + break; + default: + opcode = MCI.getOpcode(); + } + MCI.setOpcode(opcode); + } +} + +namespace llvm { +extern const MCInstrDesc HexagonInsts[]; +} + +static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo, + ArrayRef Table) { + if (RegNo < Table.size()) { + Inst.addOperand(MCOperand::createReg(Table[RegNo])); + return MCDisassembler::Success; + } + + return MCDisassembler::Fail; +} + +static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeIntRegsRegisterClass(Inst, RegNo, Address, Decoder); +} + +static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + static const MCPhysReg IntRegDecoderTable[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9, + Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14, + Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, + Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24, + Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29, + Hexagon::R30, Hexagon::R31}; + + return DecodeRegisterClass(Inst, RegNo, IntRegDecoderTable); +} + +static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg VecRegDecoderTable[] = { + Hexagon::V0, Hexagon::V1, Hexagon::V2, Hexagon::V3, Hexagon::V4, + Hexagon::V5, Hexagon::V6, Hexagon::V7, Hexagon::V8, Hexagon::V9, + Hexagon::V10, Hexagon::V11, Hexagon::V12, Hexagon::V13, Hexagon::V14, + Hexagon::V15, Hexagon::V16, Hexagon::V17, Hexagon::V18, Hexagon::V19, + Hexagon::V20, Hexagon::V21, Hexagon::V22, Hexagon::V23, Hexagon::V24, + Hexagon::V25, Hexagon::V26, Hexagon::V27, Hexagon::V28, Hexagon::V29, + Hexagon::V30, Hexagon::V31}; + + return DecodeRegisterClass(Inst, RegNo, VecRegDecoderTable); +} + +static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg DoubleRegDecoderTable[] = { + Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3, + Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7, + Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11, + Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15}; + + return DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable); +} + +static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg VecDblRegDecoderTable[] = { + Hexagon::W0, Hexagon::W1, Hexagon::W2, Hexagon::W3, + Hexagon::W4, Hexagon::W5, Hexagon::W6, Hexagon::W7, + Hexagon::W8, Hexagon::W9, Hexagon::W10, Hexagon::W11, + Hexagon::W12, Hexagon::W13, Hexagon::W14, Hexagon::W15}; + + return (DecodeRegisterClass(Inst, RegNo >> 1, VecDblRegDecoderTable)); +} + +static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1, + Hexagon::P2, Hexagon::P3}; + + return DecodeRegisterClass(Inst, RegNo, PredRegDecoderTable); +} + +static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg VecPredRegDecoderTable[] = {Hexagon::Q0, Hexagon::Q1, + Hexagon::Q2, Hexagon::Q3}; + + return DecodeRegisterClass(Inst, RegNo, VecPredRegDecoderTable); +} + +static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg CtrlRegDecoderTable[] = { + Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1, + Hexagon::P3_0, Hexagon::C5, Hexagon::C6, Hexagon::C7, + Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP, + Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPC + }; + + if (RegNo >= array_lengthof(CtrlRegDecoderTable)) + return MCDisassembler::Fail; + + if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister) + return MCDisassembler::Fail; + + unsigned Register = CtrlRegDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + static const MCPhysReg CtrlReg64DecoderTable[] = { + Hexagon::C1_0, Hexagon::NoRegister, + Hexagon::C3_2, Hexagon::NoRegister, + Hexagon::C7_6, Hexagon::NoRegister, + Hexagon::C9_8, Hexagon::NoRegister, + Hexagon::C11_10, Hexagon::NoRegister, + Hexagon::CS, Hexagon::NoRegister, + Hexagon::UPC, Hexagon::NoRegister + }; + + if (RegNo >= array_lengthof(CtrlReg64DecoderTable)) + return MCDisassembler::Fail; + + if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister) + return MCDisassembler::Fail; + + unsigned Register = CtrlReg64DecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + unsigned Register = 0; + switch (RegNo) { + case 0: + Register = Hexagon::M0; + break; + case 1: + Register = Hexagon::M1; + break; + default: + return MCDisassembler::Fail; + } + Inst.addOperand(MCOperand::createReg(Register)); + return MCDisassembler::Success; +} + +namespace { +uint32_t fullValue(MCInstrInfo const &MCII, + MCInst &MCB, + MCInst &MI, + int64_t Value) { + MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex( + MCB, HexagonMCInstrInfo::bundleSize(MCB)); + if(!Extender || MI.size() != HexagonMCInstrInfo::getExtendableOp(MCII, MI)) + return Value; + unsigned Alignment = HexagonMCInstrInfo::getExtentAlignment(MCII, MI); + uint32_t Lower6 = static_cast(Value >> Alignment) & 0x3f; + int64_t Bits; + bool Success = Extender->getOperand(0).getExpr()->evaluateAsAbsolute(Bits); + assert(Success);(void)Success; + uint32_t Upper26 = static_cast(Bits); + uint32_t Operand = Upper26 | Lower6; + return Operand; +} +template +void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) { + HexagonDisassembler const &Disassembler = disassembler(Decoder); + int64_t FullValue = fullValue(*Disassembler.MCII, + **Disassembler.CurrentBundle, + MI, SignExtend64(tmp)); + int64_t Extended = SignExtend64<32>(FullValue); + HexagonMCInstrInfo::addConstant(MI, Extended, + Disassembler.getContext()); +} +} + +static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, + const void *Decoder) { + HexagonDisassembler const &Disassembler = disassembler(Decoder); + int64_t FullValue = fullValue(*Disassembler.MCII, + **Disassembler.CurrentBundle, + MI, tmp); + assert(FullValue >= 0 && "Negative in unsigned decoder"); + HexagonMCInstrInfo::addConstant(MI, FullValue, Disassembler.getContext()); + return MCDisassembler::Success; +} + static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<16>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<16>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<12>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<12>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<11>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<11>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<12>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + HexagonMCInstrInfo::addConstant(MI, SignExtend64<12>(tmp), contextFromDecoder(Decoder)); return MCDisassembler::Success; } static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<13>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<13>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<14>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<14>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<10>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<10>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<8>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<8>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<6>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<6>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<4>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<4>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<5>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<5>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<6>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<6>(MI, tmp, Decoder); return MCDisassembler::Success; } static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { - uint64_t imm = SignExtend64<7>(tmp); - MI.addOperand(MCOperand::createImm(imm)); + signedDecoder<7>(MI, tmp, Decoder); return MCDisassembler::Success; } +static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<10>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp, + uint64_t /*Address*/, const void *Decoder) { + signedDecoder<19>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + +// custom decoder for various jump/call immediates +static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address, + const void *Decoder) { + HexagonDisassembler const &Disassembler = disassembler(Decoder); + unsigned Bits = HexagonMCInstrInfo::getExtentBits(*Disassembler.MCII, MI); + // r13_2 is not extendable, so if there are no extent bits, it's r13_2 + if (Bits == 0) + Bits = 15; + uint32_t FullValue = fullValue(*Disassembler.MCII, + **Disassembler.CurrentBundle, + MI, SignExtend64(tmp, Bits)); + int64_t Extended = SignExtend64<32>(FullValue) + Address; + if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, + 0, 4)) + HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext()); + return MCDisassembler::Success; +} + +// Addressing mode dependent load store opcode map. +// - If an insn is preceded by an extender the address is absolute. +// - memw(##symbol) = r0 +// - If an insn is not preceded by an extender the address is GP relative. +// - memw(gp + #symbol) = r0 +// Please note that the instructions must be ordered in the descending order +// of their opcode. +// HexagonII::INST_ICLASS_ST +static const unsigned int StoreConditionalOpcodeData[][2] = { + {S4_pstorerdfnew_abs, 0xafc02084}, + {S4_pstorerdtnew_abs, 0xafc02080}, + {S4_pstorerdf_abs, 0xafc00084}, + {S4_pstorerdt_abs, 0xafc00080}, + {S4_pstorerinewfnew_abs, 0xafa03084}, + {S4_pstorerinewtnew_abs, 0xafa03080}, + {S4_pstorerhnewfnew_abs, 0xafa02884}, + {S4_pstorerhnewtnew_abs, 0xafa02880}, + {S4_pstorerbnewfnew_abs, 0xafa02084}, + {S4_pstorerbnewtnew_abs, 0xafa02080}, + {S4_pstorerinewf_abs, 0xafa01084}, + {S4_pstorerinewt_abs, 0xafa01080}, + {S4_pstorerhnewf_abs, 0xafa00884}, + {S4_pstorerhnewt_abs, 0xafa00880}, + {S4_pstorerbnewf_abs, 0xafa00084}, + {S4_pstorerbnewt_abs, 0xafa00080}, + {S4_pstorerifnew_abs, 0xaf802084}, + {S4_pstoreritnew_abs, 0xaf802080}, + {S4_pstorerif_abs, 0xaf800084}, + {S4_pstorerit_abs, 0xaf800080}, + {S4_pstorerhfnew_abs, 0xaf402084}, + {S4_pstorerhtnew_abs, 0xaf402080}, + {S4_pstorerhf_abs, 0xaf400084}, + {S4_pstorerht_abs, 0xaf400080}, + {S4_pstorerbfnew_abs, 0xaf002084}, + {S4_pstorerbtnew_abs, 0xaf002080}, + {S4_pstorerbf_abs, 0xaf000084}, + {S4_pstorerbt_abs, 0xaf000080}}; +// HexagonII::INST_ICLASS_LD + +// HexagonII::INST_ICLASS_LD_ST_2 +static unsigned int LoadStoreOpcodeData[][2] = {{L4_loadrd_abs, 0x49c00000}, + {L4_loadri_abs, 0x49800000}, + {L4_loadruh_abs, 0x49600000}, + {L4_loadrh_abs, 0x49400000}, + {L4_loadrub_abs, 0x49200000}, + {L4_loadrb_abs, 0x49000000}, + {S2_storerdabs, 0x48c00000}, + {S2_storerinewabs, 0x48a01000}, + {S2_storerhnewabs, 0x48a00800}, + {S2_storerbnewabs, 0x48a00000}, + {S2_storeriabs, 0x48800000}, + {S2_storerfabs, 0x48600000}, + {S2_storerhabs, 0x48400000}, + {S2_storerbabs, 0x48000000}}; +static const size_t NumCondS = array_lengthof(StoreConditionalOpcodeData); +static const size_t NumLS = array_lengthof(LoadStoreOpcodeData); + +static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) { + + unsigned MachineOpcode = 0; + unsigned LLVMOpcode = 0; + + if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_ST) { + for (size_t i = 0; i < NumCondS; ++i) { + if ((insn & StoreConditionalOpcodeData[i][1]) == + StoreConditionalOpcodeData[i][1]) { + MachineOpcode = StoreConditionalOpcodeData[i][1]; + LLVMOpcode = StoreConditionalOpcodeData[i][0]; + break; + } + } + } + if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_LD_ST_2) { + for (size_t i = 0; i < NumLS; ++i) { + if ((insn & LoadStoreOpcodeData[i][1]) == LoadStoreOpcodeData[i][1]) { + MachineOpcode = LoadStoreOpcodeData[i][1]; + LLVMOpcode = LoadStoreOpcodeData[i][0]; + break; + } + } + } + + if (MachineOpcode) { + unsigned Value = 0; + unsigned shift = 0; + MI.setOpcode(LLVMOpcode); + // Remove the parse bits from the insn. + insn &= ~HexagonII::INST_PARSE_MASK; + + switch (LLVMOpcode) { + default: + return MCDisassembler::Fail; + break; + + case Hexagon::S4_pstorerdf_abs: + case Hexagon::S4_pstorerdt_abs: + case Hexagon::S4_pstorerdfnew_abs: + case Hexagon::S4_pstorerdtnew_abs: { + // op: Pv + Value = insn & UINT64_C(3); + DecodePredRegsRegisterClass(MI, Value, 0, 0); + // op: u6 + Value = (insn >> 12) & UINT64_C(48); + Value |= (insn >> 3) & UINT64_C(15); + MI.addOperand(MCOperand::createImm(Value)); + // op: Rtt + Value = (insn >> 8) & UINT64_C(31); + DecodeDoubleRegsRegisterClass(MI, Value, 0, 0); + break; + } + + case Hexagon::S4_pstorerbnewf_abs: + case Hexagon::S4_pstorerbnewt_abs: + case Hexagon::S4_pstorerbnewfnew_abs: + case Hexagon::S4_pstorerbnewtnew_abs: + case Hexagon::S4_pstorerhnewf_abs: + case Hexagon::S4_pstorerhnewt_abs: + case Hexagon::S4_pstorerhnewfnew_abs: + case Hexagon::S4_pstorerhnewtnew_abs: + case Hexagon::S4_pstorerinewf_abs: + case Hexagon::S4_pstorerinewt_abs: + case Hexagon::S4_pstorerinewfnew_abs: + case Hexagon::S4_pstorerinewtnew_abs: { + // op: Pv + Value = insn & UINT64_C(3); + DecodePredRegsRegisterClass(MI, Value, 0, 0); + // op: u6 + Value = (insn >> 12) & UINT64_C(48); + Value |= (insn >> 3) & UINT64_C(15); + MI.addOperand(MCOperand::createImm(Value)); + // op: Nt + Value = (insn >> 8) & UINT64_C(7); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + break; + } + + case Hexagon::S4_pstorerbf_abs: + case Hexagon::S4_pstorerbt_abs: + case Hexagon::S4_pstorerbfnew_abs: + case Hexagon::S4_pstorerbtnew_abs: + case Hexagon::S4_pstorerhf_abs: + case Hexagon::S4_pstorerht_abs: + case Hexagon::S4_pstorerhfnew_abs: + case Hexagon::S4_pstorerhtnew_abs: + case Hexagon::S4_pstorerif_abs: + case Hexagon::S4_pstorerit_abs: + case Hexagon::S4_pstorerifnew_abs: + case Hexagon::S4_pstoreritnew_abs: { + // op: Pv + Value = insn & UINT64_C(3); + DecodePredRegsRegisterClass(MI, Value, 0, 0); + // op: u6 + Value = (insn >> 12) & UINT64_C(48); + Value |= (insn >> 3) & UINT64_C(15); + MI.addOperand(MCOperand::createImm(Value)); + // op: Rt + Value = (insn >> 8) & UINT64_C(31); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + break; + } + + case Hexagon::L4_ploadrdf_abs: + case Hexagon::L4_ploadrdt_abs: + case Hexagon::L4_ploadrdfnew_abs: + case Hexagon::L4_ploadrdtnew_abs: { + // op: Rdd + Value = insn & UINT64_C(31); + DecodeDoubleRegsRegisterClass(MI, Value, 0, 0); + // op: Pt + Value = ((insn >> 9) & UINT64_C(3)); + DecodePredRegsRegisterClass(MI, Value, 0, 0); + // op: u6 + Value = ((insn >> 15) & UINT64_C(62)); + Value |= ((insn >> 8) & UINT64_C(1)); + MI.addOperand(MCOperand::createImm(Value)); + break; + } + + case Hexagon::L4_ploadrbf_abs: + case Hexagon::L4_ploadrbt_abs: + case Hexagon::L4_ploadrbfnew_abs: + case Hexagon::L4_ploadrbtnew_abs: + case Hexagon::L4_ploadrhf_abs: + case Hexagon::L4_ploadrht_abs: + case Hexagon::L4_ploadrhfnew_abs: + case Hexagon::L4_ploadrhtnew_abs: + case Hexagon::L4_ploadrubf_abs: + case Hexagon::L4_ploadrubt_abs: + case Hexagon::L4_ploadrubfnew_abs: + case Hexagon::L4_ploadrubtnew_abs: + case Hexagon::L4_ploadruhf_abs: + case Hexagon::L4_ploadruht_abs: + case Hexagon::L4_ploadruhfnew_abs: + case Hexagon::L4_ploadruhtnew_abs: + case Hexagon::L4_ploadrif_abs: + case Hexagon::L4_ploadrit_abs: + case Hexagon::L4_ploadrifnew_abs: + case Hexagon::L4_ploadritnew_abs: + // op: Rd + Value = insn & UINT64_C(31); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + // op: Pt + Value = (insn >> 9) & UINT64_C(3); + DecodePredRegsRegisterClass(MI, Value, 0, 0); + // op: u6 + Value = (insn >> 15) & UINT64_C(62); + Value |= (insn >> 8) & UINT64_C(1); + MI.addOperand(MCOperand::createImm(Value)); + break; + + // op: g16_2 + case (Hexagon::L4_loadri_abs): + ++shift; + // op: g16_1 + case Hexagon::L4_loadrh_abs: + case Hexagon::L4_loadruh_abs: + ++shift; + // op: g16_0 + case Hexagon::L4_loadrb_abs: + case Hexagon::L4_loadrub_abs: { + // op: Rd + Value |= insn & UINT64_C(31); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + Value = (insn >> 11) & UINT64_C(49152); + Value |= (insn >> 7) & UINT64_C(15872); + Value |= (insn >> 5) & UINT64_C(511); + MI.addOperand(MCOperand::createImm(Value << shift)); + break; + } + + case Hexagon::L4_loadrd_abs: { + Value = insn & UINT64_C(31); + DecodeDoubleRegsRegisterClass(MI, Value, 0, 0); + Value = (insn >> 11) & UINT64_C(49152); + Value |= (insn >> 7) & UINT64_C(15872); + Value |= (insn >> 5) & UINT64_C(511); + MI.addOperand(MCOperand::createImm(Value << 3)); + break; + } + + case Hexagon::S2_storerdabs: { + // op: g16_3 + Value = (insn >> 11) & UINT64_C(49152); + Value |= (insn >> 7) & UINT64_C(15872); + Value |= (insn >> 5) & UINT64_C(256); + Value |= insn & UINT64_C(255); + MI.addOperand(MCOperand::createImm(Value << 3)); + // op: Rtt + Value = (insn >> 8) & UINT64_C(31); + DecodeDoubleRegsRegisterClass(MI, Value, 0, 0); + break; + } + + // op: g16_2 + case Hexagon::S2_storerinewabs: + ++shift; + // op: g16_1 + case Hexagon::S2_storerhnewabs: + ++shift; + // op: g16_0 + case Hexagon::S2_storerbnewabs: { + Value = (insn >> 11) & UINT64_C(49152); + Value |= (insn >> 7) & UINT64_C(15872); + Value |= (insn >> 5) & UINT64_C(256); + Value |= insn & UINT64_C(255); + MI.addOperand(MCOperand::createImm(Value << shift)); + // op: Nt + Value = (insn >> 8) & UINT64_C(7); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + break; + } + + // op: g16_2 + case Hexagon::S2_storeriabs: + ++shift; + // op: g16_1 + case Hexagon::S2_storerhabs: + case Hexagon::S2_storerfabs: + ++shift; + // op: g16_0 + case Hexagon::S2_storerbabs: { + Value = (insn >> 11) & UINT64_C(49152); + Value |= (insn >> 7) & UINT64_C(15872); + Value |= (insn >> 5) & UINT64_C(256); + Value |= insn & UINT64_C(255); + MI.addOperand(MCOperand::createImm(Value << shift)); + // op: Rt + Value = (insn >> 8) & UINT64_C(31); + DecodeIntRegsRegisterClass(MI, Value, 0, 0); + break; + } + } + return MCDisassembler::Success; + } + return MCDisassembler::Fail; +} + +static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn, + void const *Decoder) { + + // Instruction Class for a constant a extender: bits 31:28 = 0x0000 + if ((~insn & 0xf0000000) == 0xf0000000) { + unsigned Value; + // 27:16 High 12 bits of 26-bit extender. + Value = (insn & 0x0fff0000) << 4; + // 13:0 Low 14 bits of 26-bit extender. + Value |= ((insn & 0x3fff) << 6); + MI.setOpcode(Hexagon::A4_ext); + HexagonMCInstrInfo::addConstant(MI, Value, contextFromDecoder(Decoder)); + return MCDisassembler::Success; + } + return MCDisassembler::Fail; +} + // These values are from HexagonGenMCCodeEmitter.inc and HexagonIsetDx.td enum subInstBinaryValues { V4_SA1_addi_BITS = 0x0000, @@ -731,6 +1342,8 @@ static unsigned getRegFromSubinstEncoding(unsigned encoded_reg) { return Hexagon::R0 + encoded_reg; else if (encoded_reg < 16) return Hexagon::R0 + encoded_reg + 8; + + // patently false value return Hexagon::NoRegister; } @@ -739,10 +1352,13 @@ static unsigned getDRegFromSubinstEncoding(unsigned encoded_dreg) { return Hexagon::D0 + encoded_dreg; else if (encoded_dreg < 8) return Hexagon::D0 + encoded_dreg + 4; + + // patently false value return Hexagon::NoRegister; } -static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { +void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode, + unsigned inst) const { int64_t operand; MCOperand Op; switch (opcode) { @@ -762,8 +1378,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { case Hexagon::V4_SS2_allocframe: // u 8-4{5_3} operand = ((inst & 0x1f0) >> 4) << 3; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SL1_loadri_io: // Rd 3-0, Rs 7-4, u 11-8{4_2} @@ -774,8 +1389,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = (inst & 0xf00) >> 6; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SL1_loadrub_io: // Rd 3-0, Rs 7-4, u 11-8 @@ -786,8 +1400,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = (inst & 0xf00) >> 8; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SL2_loadrb_io: // Rd 3-0, Rs 7-4, u 10-8 @@ -798,8 +1411,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = (inst & 0x700) >> 8; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SL2_loadrh_io: case Hexagon::V4_SL2_loadruh_io: @@ -811,8 +1423,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = ((inst & 0x700) >> 8) << 1; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SL2_loadrd_sp: // Rdd 2-0, u 7-3{5_3} @@ -820,8 +1431,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = ((inst & 0x0f8) >> 3) << 3; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SL2_loadri_sp: // Rd 3-0, u 8-4{5_2} @@ -829,8 +1439,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = ((inst & 0x1f0) >> 4) << 2; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SA1_addi: // Rx 3-0 (x2), s7 10-4 @@ -839,8 +1448,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { MI->addOperand(Op); MI->addOperand(Op); operand = SignExtend64<7>((inst & 0x7f0) >> 4); - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SA1_addrx: // Rx 3-0 (x2), Rs 7-4 @@ -873,8 +1481,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = ((inst & 0x3f0) >> 4) << 2; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SA1_seti: // Rd 3-0, u 9-4 @@ -882,8 +1489,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = (inst & 0x3f0) >> 4; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SA1_clrf: case Hexagon::V4_SA1_clrfnew: @@ -901,8 +1507,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = inst & 0x3; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SA1_combine0i: case Hexagon::V4_SA1_combine1i: @@ -913,8 +1518,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = (inst & 0x060) >> 5; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SA1_combinerz: case Hexagon::V4_SA1_combinezr: @@ -932,8 +1536,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = (inst & 0xf00) >> 8; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); MI->addOperand(Op); @@ -944,8 +1547,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = ((inst & 0xf00) >> 8) << 2; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); MI->addOperand(Op); @@ -957,8 +1559,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = inst & 0xf; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SS2_storewi0: case Hexagon::V4_SS2_storewi1: @@ -967,25 +1568,23 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = (inst & 0xf) << 2; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); break; case Hexagon::V4_SS2_stored_sp: // s 8-3{6_3}, Rtt 2-0 operand = SignExtend64<9>(((inst & 0x1f8) >> 3) << 3); - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); operand = getDRegFromSubinstEncoding(inst & 0x7); Op = MCOperand::createReg(operand); MI->addOperand(Op); + break; case Hexagon::V4_SS2_storeh_io: // Rs 7-4, u 10-8{3_1}, Rt 3-0 operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4); Op = MCOperand::createReg(operand); MI->addOperand(Op); operand = ((inst & 0x700) >> 8) << 1; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); MI->addOperand(Op); @@ -993,8 +1592,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) { case Hexagon::V4_SS2_storew_sp: // u 8-4{5_2}, Rd 3-0 operand = ((inst & 0x1f0) >> 4) << 2; - Op = MCOperand::createImm(operand); - MI->addOperand(Op); + HexagonMCInstrInfo::addConstant(*MI, operand, getContext()); operand = getRegFromSubinstEncoding(inst & 0xf); Op = MCOperand::createReg(operand); MI->addOperand(Op); diff --git a/lib/Target/Hexagon/Disassembler/LLVMBuild.txt b/lib/Target/Hexagon/Disassembler/LLVMBuild.txt index 43bace75a852..6c251020f818 100644 --- a/lib/Target/Hexagon/Disassembler/LLVMBuild.txt +++ b/lib/Target/Hexagon/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = HexagonDisassembler parent = Hexagon -required_libraries = HexagonDesc HexagonInfo MCDisassembler Support +required_libraries = HexagonDesc HexagonInfo MC MCDisassembler Support add_to_library_groups = Hexagon diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index d360be2aa5b2..ed7d9578902e 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -47,15 +47,8 @@ #include "llvm/Target/TargetMachine.h" namespace llvm { - class MachineInstr; - class MCInst; - class MCInstrInfo; - class HexagonAsmPrinter; class HexagonTargetMachine; - void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI, - HexagonAsmPrinter &AP); - /// \brief Creates a Hexagon-specific Target Transformation Info pass. ImmutablePass *createHexagonTargetTransformInfoPass(const HexagonTargetMachine *TM); } // end namespace llvm; diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td index 53a687c337ec..1189cfd488ee 100644 --- a/lib/Target/Hexagon/Hexagon.td +++ b/lib/Target/Hexagon/Hexagon.td @@ -24,14 +24,32 @@ include "llvm/Target/Target.td" // Hexagon Architectures def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "V4", "Hexagon V4">; def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Hexagon V5">; +def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "V55", "Hexagon V55">; +def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "V60", "Hexagon V60">; + +// Hexagon ISA Extensions +def ExtensionHVX: SubtargetFeature<"hvx", "UseHVXOps", + "true", "Hexagon HVX instructions">; +def ExtensionHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps", + "true", "Hexagon HVX Double instructions">; //===----------------------------------------------------------------------===// // Hexagon Instruction Predicate Definitions. //===----------------------------------------------------------------------===// -def HasV5T : Predicate<"HST->hasV5TOps()">; -def NoV5T : Predicate<"!HST->hasV5TOps()">; -def UseMEMOP : Predicate<"HST->useMemOps()">; -def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">; +def HasV5T : Predicate<"HST->hasV5TOps()">; +def NoV5T : Predicate<"!HST->hasV5TOps()">; +def HasV55T : Predicate<"HST->hasV55TOps()">, + AssemblerPredicate<"ArchV55">; +def HasV60T : Predicate<"HST->hasV60TOps()">, + AssemblerPredicate<"ArchV60">; +def UseMEMOP : Predicate<"HST->useMemOps()">; +def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">; +def UseHVXDbl : Predicate<"HST->useHVXDblOps()">, + AssemblerPredicate<"ExtensionHVXDbl">; +def UseHVXSgl : Predicate<"HST->useHVXSglOps()">; + +def UseHVX : Predicate<"HST->useHVXSglOps() ||HST->useHVXDblOps()">, + AssemblerPredicate<"ExtensionHVX">; //===----------------------------------------------------------------------===// // Classes used for relation maps. @@ -53,6 +71,7 @@ class NewValueRel: PredNewRel; // NewValueRel - Filter class used to relate load/store instructions having // different addressing modes with each other. class AddrModeRel: NewValueRel; +class IntrinsicsRel; //===----------------------------------------------------------------------===// // Generate mapping table to relate non-predicate instructions with their @@ -62,7 +81,7 @@ class AddrModeRel: NewValueRel; def getPredOpcode : InstrMapping { let FilterClass = "PredRel"; // Instructions with the same BaseOpcode and isNVStore values form a row. - let RowFields = ["BaseOpcode", "isNVStore", "PNewValue"]; + let RowFields = ["BaseOpcode", "isNVStore", "PNewValue", "isNT"]; // Instructions with the same predicate sense form a column. let ColFields = ["PredSense"]; // The key column is the unpredicated instructions. @@ -77,7 +96,7 @@ def getPredOpcode : InstrMapping { // def getFalsePredOpcode : InstrMapping { let FilterClass = "PredRel"; - let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"]; + let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken", "isNT"]; let ColFields = ["PredSense"]; let KeyCol = ["true"]; let ValueCols = [["false"]]; @@ -89,7 +108,7 @@ def getFalsePredOpcode : InstrMapping { // def getTruePredOpcode : InstrMapping { let FilterClass = "PredRel"; - let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"]; + let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken", "isNT"]; let ColFields = ["PredSense"]; let KeyCol = ["false"]; let ValueCols = [["true"]]; @@ -125,7 +144,7 @@ def getPredOldOpcode : InstrMapping { // def getNewValueOpcode : InstrMapping { let FilterClass = "NewValueRel"; - let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode"]; + let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode", "isNT"]; let ColFields = ["NValueST"]; let KeyCol = ["false"]; let ValueCols = [["true"]]; @@ -137,16 +156,16 @@ def getNewValueOpcode : InstrMapping { // def getNonNVStore : InstrMapping { let FilterClass = "NewValueRel"; - let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode"]; + let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode", "isNT"]; let ColFields = ["NValueST"]; let KeyCol = ["true"]; let ValueCols = [["false"]]; } -def getBasedWithImmOffset : InstrMapping { +def getBaseWithImmOffset : InstrMapping { let FilterClass = "AddrModeRel"; let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore", - "isMEMri", "isFloat"]; + "isFloat"]; let ColFields = ["addrMode"]; let KeyCol = ["Absolute"]; let ValueCols = [["BaseImmOffset"]]; @@ -168,6 +187,37 @@ def getRegForm : InstrMapping { let ValueCols = [["reg"]]; } +def getRegShlForm : InstrMapping { + let FilterClass = "ImmRegShl"; + let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; + let ColFields = ["InputType"]; + let KeyCol = ["imm"]; + let ValueCols = [["reg"]]; +} + +def notTakenBranchPrediction : InstrMapping { + let FilterClass = "PredRel"; + let RowFields = ["BaseOpcode", "PNewValue", "PredSense", "isBranch", "isPredicated"]; + let ColFields = ["isBrTaken"]; + let KeyCol = ["true"]; + let ValueCols = [["false"]]; +} + +def takenBranchPrediction : InstrMapping { + let FilterClass = "PredRel"; + let RowFields = ["BaseOpcode", "PNewValue", "PredSense", "isBranch", "isPredicated"]; + let ColFields = ["isBrTaken"]; + let KeyCol = ["false"]; + let ValueCols = [["true"]]; +} + +def getRealHWInstr : InstrMapping { + let FilterClass = "IntrinsicsRel"; + let RowFields = ["BaseOpcode"]; + let ColFields = ["InstrType"]; + let KeyCol = ["Pseudo"]; + let ValueCols = [["Pseudo"], ["Real"]]; +} //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions //===----------------------------------------------------------------------===// @@ -192,12 +242,22 @@ def : Proc<"hexagonv4", HexagonModelV4, [ArchV4]>; def : Proc<"hexagonv5", HexagonModelV4, [ArchV4, ArchV5]>; +def : Proc<"hexagonv55", HexagonModelV55, + [ArchV4, ArchV5, ArchV55]>; +def : Proc<"hexagonv60", HexagonModelV60, + [ArchV4, ArchV5, ArchV55, ArchV60, ExtensionHVX]>; //===----------------------------------------------------------------------===// // Declare the target which we are implementing //===----------------------------------------------------------------------===// +def HexagonAsmParserVariant : AsmParserVariant { + int Variant = 0; + string TokenizingCharacters = "#()=:.<>!+*"; +} + def Hexagon : Target { // Pull in Instruction Info: let InstructionSet = HexagonInstrInfo; + let AssemblyParserVariants = [HexagonAsmParserVariant]; } diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 05728d2b627e..e213089687e8 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -40,11 +40,13 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" @@ -56,12 +58,27 @@ using namespace llvm; +namespace llvm { + void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI, + MCInst &MCB, HexagonAsmPrinter &AP); +} + #define DEBUG_TYPE "asm-printer" static cl::opt AlignCalls( "hexagon-align-calls", cl::Hidden, cl::init(true), cl::desc("Insert falign after call instruction for Hexagon target")); +// Given a scalar register return its pair. +inline static unsigned getHexagonRegisterPair(unsigned Reg, + const MCRegisterInfo *RI) { + assert(Hexagon::IntRegsRegClass.contains(Reg)); + MCSuperRegIterator SR(Reg, RI, false); + unsigned Pair = *SR; + assert(Hexagon::DoubleRegsRegClass.contains(Pair)); + return Pair; +} + HexagonAsmPrinter::HexagonAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr) {} @@ -102,9 +119,8 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, // bool HexagonAsmPrinter:: isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { - if (MBB->hasAddressTaken()) { + if (MBB->hasAddressTaken()) return false; - } return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); } @@ -117,7 +133,8 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS) { // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { - if (ExtraCode[1] != 0) return true; // Unknown modifier. + if (ExtraCode[1] != 0) + return true; // Unknown modifier. switch (ExtraCode[0]) { default: @@ -173,45 +190,407 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } +MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI, + MCStreamer &OutStreamer, + const MCOperand &Imm, int AlignSize) { + MCSymbol *Sym; + int64_t Value; + if (Imm.getExpr()->evaluateAsAbsolute(Value)) { + StringRef sectionPrefix; + std::string ImmString; + StringRef Name; + if (AlignSize == 8) { + Name = ".CONST_0000000000000000"; + sectionPrefix = ".gnu.linkonce.l8"; + ImmString = utohexstr(Value); + } else { + Name = ".CONST_00000000"; + sectionPrefix = ".gnu.linkonce.l4"; + ImmString = utohexstr(static_cast(Value)); + } + + std::string symbolName = // Yes, leading zeros are kept. + Name.drop_back(ImmString.size()).str() + ImmString; + std::string sectionName = sectionPrefix.str() + symbolName; + + MCSectionELF *Section = OutStreamer.getContext().getELFSection( + sectionName, ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); + OutStreamer.SwitchSection(Section); + + Sym = AP.OutContext.getOrCreateSymbol(Twine(symbolName)); + if (Sym->isUndefined()) { + OutStreamer.EmitLabel(Sym); + OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global); + OutStreamer.EmitIntValue(Value, AlignSize); + OutStreamer.EmitCodeAlignment(AlignSize); + } + } else { + assert(Imm.isExpr() && "Expected expression and found none"); + const MachineOperand &MO = MI.getOperand(1); + assert(MO.isGlobal() || MO.isCPI() || MO.isJTI()); + MCSymbol *MOSymbol = nullptr; + if (MO.isGlobal()) + MOSymbol = AP.getSymbol(MO.getGlobal()); + else if (MO.isCPI()) + MOSymbol = AP.GetCPISymbol(MO.getIndex()); + else if (MO.isJTI()) + MOSymbol = AP.GetJTISymbol(MO.getIndex()); + else + llvm_unreachable("Unknown operand type!"); + + StringRef SymbolName = MOSymbol->getName(); + std::string LitaName = ".CONST_" + SymbolName.str(); + + MCSectionELF *Section = OutStreamer.getContext().getELFSection( + ".lita", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); + + OutStreamer.SwitchSection(Section); + Sym = AP.OutContext.getOrCreateSymbol(Twine(LitaName)); + if (Sym->isUndefined()) { + OutStreamer.EmitLabel(Sym); + OutStreamer.EmitSymbolAttribute(Sym, MCSA_Local); + OutStreamer.EmitValue(Imm.getExpr(), AlignSize); + OutStreamer.EmitCodeAlignment(AlignSize); + } + } + return Sym; +} + +void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, + const MachineInstr &MI) { + MCInst &MappedInst = static_cast (Inst); + const MCRegisterInfo *RI = OutStreamer->getContext().getRegisterInfo(); + + switch (Inst.getOpcode()) { + default: return; + + // "$dst = CONST64(#$src1)", + case Hexagon::CONST64_Float_Real: + case Hexagon::CONST64_Int_Real: + if (!OutStreamer->hasRawTextSupport()) { + const MCOperand &Imm = MappedInst.getOperand(1); + MCSectionSubPair Current = OutStreamer->getCurrentSection(); + + MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 8); + + OutStreamer->SwitchSection(Current.first, Current.second); + MCInst TmpInst; + MCOperand &Reg = MappedInst.getOperand(0); + TmpInst.setOpcode(Hexagon::L2_loadrdgp); + TmpInst.addOperand(Reg); + TmpInst.addOperand(MCOperand::createExpr( + MCSymbolRefExpr::create(Sym, OutContext))); + MappedInst = TmpInst; + + } + break; + case Hexagon::CONST32: + case Hexagon::CONST32_Float_Real: + case Hexagon::CONST32_Int_Real: + case Hexagon::FCONST32_nsdata: + if (!OutStreamer->hasRawTextSupport()) { + MCOperand &Imm = MappedInst.getOperand(1); + MCSectionSubPair Current = OutStreamer->getCurrentSection(); + MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 4); + OutStreamer->SwitchSection(Current.first, Current.second); + MCInst TmpInst; + MCOperand &Reg = MappedInst.getOperand(0); + TmpInst.setOpcode(Hexagon::L2_loadrigp); + TmpInst.addOperand(Reg); + TmpInst.addOperand(MCOperand::createExpr( + MCSymbolRefExpr::create(Sym, OutContext))); + MappedInst = TmpInst; + } + break; + + // C2_pxfer_map maps to C2_or instruction. Though, it's possible to use + // C2_or during instruction selection itself but it results + // into suboptimal code. + case Hexagon::C2_pxfer_map: { + MCOperand &Ps = Inst.getOperand(1); + MappedInst.setOpcode(Hexagon::C2_or); + MappedInst.addOperand(Ps); + return; + } + + // Vector reduce complex multiply by scalar, Rt & 1 map to :hi else :lo + // The insn is mapped from the 4 operand to the 3 operand raw form taking + // 3 register pairs. + case Hexagon::M2_vrcmpys_acc_s1: { + MCOperand &Rt = Inst.getOperand(3); + assert (Rt.isReg() && "Expected register and none was found"); + unsigned Reg = RI->getEncodingValue(Rt.getReg()); + if (Reg & 1) + MappedInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h); + else + MappedInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l); + Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI)); + return; + } + case Hexagon::M2_vrcmpys_s1: { + MCOperand &Rt = Inst.getOperand(2); + assert (Rt.isReg() && "Expected register and none was found"); + unsigned Reg = RI->getEncodingValue(Rt.getReg()); + if (Reg & 1) + MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1_h); + else + MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1_l); + Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI)); + return; + } + + case Hexagon::M2_vrcmpys_s1rp: { + MCOperand &Rt = Inst.getOperand(2); + assert (Rt.isReg() && "Expected register and none was found"); + unsigned Reg = RI->getEncodingValue(Rt.getReg()); + if (Reg & 1) + MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h); + else + MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l); + Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI)); + return; + } + + case Hexagon::A4_boundscheck: { + MCOperand &Rs = Inst.getOperand(1); + assert (Rs.isReg() && "Expected register and none was found"); + unsigned Reg = RI->getEncodingValue(Rs.getReg()); + if (Reg & 1) // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2 + MappedInst.setOpcode(Hexagon::A4_boundscheck_hi); + else // raw:lo + MappedInst.setOpcode(Hexagon::A4_boundscheck_lo); + Rs.setReg(getHexagonRegisterPair(Rs.getReg(), RI)); + return; + } + case Hexagon::S5_asrhub_rnd_sat_goodsyntax: { + MCOperand &MO = MappedInst.getOperand(2); + int64_t Imm; + MCExpr const *Expr = MO.getExpr(); + bool Success = Expr->evaluateAsAbsolute(Imm); + assert (Success && "Expected immediate and none was found");(void)Success; + MCInst TmpInst; + if (Imm == 0) { + TmpInst.setOpcode(Hexagon::S2_vsathub); + TmpInst.addOperand(MappedInst.getOperand(0)); + TmpInst.addOperand(MappedInst.getOperand(1)); + MappedInst = TmpInst; + return; + } + TmpInst.setOpcode(Hexagon::S5_asrhub_rnd_sat); + TmpInst.addOperand(MappedInst.getOperand(0)); + TmpInst.addOperand(MappedInst.getOperand(1)); + const MCExpr *One = MCConstantExpr::create(1, OutContext); + const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext); + TmpInst.addOperand(MCOperand::createExpr(Sub)); + MappedInst = TmpInst; + return; + } + case Hexagon::S5_vasrhrnd_goodsyntax: + case Hexagon::S2_asr_i_p_rnd_goodsyntax: { + MCOperand &MO2 = MappedInst.getOperand(2); + MCExpr const *Expr = MO2.getExpr(); + int64_t Imm; + bool Success = Expr->evaluateAsAbsolute(Imm); + assert (Success && "Expected immediate and none was found");(void)Success; + MCInst TmpInst; + if (Imm == 0) { + TmpInst.setOpcode(Hexagon::A2_combinew); + TmpInst.addOperand(MappedInst.getOperand(0)); + MCOperand &MO1 = MappedInst.getOperand(1); + unsigned High = RI->getSubReg(MO1.getReg(), Hexagon::subreg_hireg); + unsigned Low = RI->getSubReg(MO1.getReg(), Hexagon::subreg_loreg); + // Add a new operand for the second register in the pair. + TmpInst.addOperand(MCOperand::createReg(High)); + TmpInst.addOperand(MCOperand::createReg(Low)); + MappedInst = TmpInst; + return; + } + + if (Inst.getOpcode() == Hexagon::S2_asr_i_p_rnd_goodsyntax) + TmpInst.setOpcode(Hexagon::S2_asr_i_p_rnd); + else + TmpInst.setOpcode(Hexagon::S5_vasrhrnd); + TmpInst.addOperand(MappedInst.getOperand(0)); + TmpInst.addOperand(MappedInst.getOperand(1)); + const MCExpr *One = MCConstantExpr::create(1, OutContext); + const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext); + TmpInst.addOperand(MCOperand::createExpr(Sub)); + MappedInst = TmpInst; + return; + } + // if ("#u5==0") Assembler mapped to: "Rd=Rs"; else Rd=asr(Rs,#u5-1):rnd + case Hexagon::S2_asr_i_r_rnd_goodsyntax: { + MCOperand &MO = Inst.getOperand(2); + MCExpr const *Expr = MO.getExpr(); + int64_t Imm; + bool Success = Expr->evaluateAsAbsolute(Imm); + assert (Success && "Expected immediate and none was found");(void)Success; + MCInst TmpInst; + if (Imm == 0) { + TmpInst.setOpcode(Hexagon::A2_tfr); + TmpInst.addOperand(MappedInst.getOperand(0)); + TmpInst.addOperand(MappedInst.getOperand(1)); + MappedInst = TmpInst; + return; + } + TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd); + TmpInst.addOperand(MappedInst.getOperand(0)); + TmpInst.addOperand(MappedInst.getOperand(1)); + const MCExpr *One = MCConstantExpr::create(1, OutContext); + const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext); + TmpInst.addOperand(MCOperand::createExpr(Sub)); + MappedInst = TmpInst; + return; + } + case Hexagon::TFRI_f: + MappedInst.setOpcode(Hexagon::A2_tfrsi); + return; + case Hexagon::TFRI_cPt_f: + MappedInst.setOpcode(Hexagon::C2_cmoveit); + return; + case Hexagon::TFRI_cNotPt_f: + MappedInst.setOpcode(Hexagon::C2_cmoveif); + return; + case Hexagon::MUX_ri_f: + MappedInst.setOpcode(Hexagon::C2_muxri); + return; + case Hexagon::MUX_ir_f: + MappedInst.setOpcode(Hexagon::C2_muxir); + return; + + // Translate a "$Rdd = #imm" to "$Rdd = combine(#[-1,0], #imm)" + case Hexagon::A2_tfrpi: { + MCInst TmpInst; + MCOperand &Rdd = MappedInst.getOperand(0); + MCOperand &MO = MappedInst.getOperand(1); + + TmpInst.setOpcode(Hexagon::A2_combineii); + TmpInst.addOperand(Rdd); + int64_t Imm; + bool Success = MO.getExpr()->evaluateAsAbsolute(Imm); + if (Success && Imm < 0) { + const MCExpr *MOne = MCConstantExpr::create(-1, OutContext); + TmpInst.addOperand(MCOperand::createExpr(MOne)); + } else { + const MCExpr *Zero = MCConstantExpr::create(0, OutContext); + TmpInst.addOperand(MCOperand::createExpr(Zero)); + } + TmpInst.addOperand(MO); + MappedInst = TmpInst; + return; + } + // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)" + case Hexagon::A2_tfrp: { + MCOperand &MO = MappedInst.getOperand(1); + unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg); + unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg); + MO.setReg(High); + // Add a new operand for the second register in the pair. + MappedInst.addOperand(MCOperand::createReg(Low)); + MappedInst.setOpcode(Hexagon::A2_combinew); + return; + } + + case Hexagon::A2_tfrpt: + case Hexagon::A2_tfrpf: { + MCOperand &MO = MappedInst.getOperand(2); + unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg); + unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg); + MO.setReg(High); + // Add a new operand for the second register in the pair. + MappedInst.addOperand(MCOperand::createReg(Low)); + MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt) + ? Hexagon::C2_ccombinewt + : Hexagon::C2_ccombinewf); + return; + } + case Hexagon::A2_tfrptnew: + case Hexagon::A2_tfrpfnew: { + MCOperand &MO = MappedInst.getOperand(2); + unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg); + unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg); + MO.setReg(High); + // Add a new operand for the second register in the pair. + MappedInst.addOperand(MCOperand::createReg(Low)); + MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew) + ? Hexagon::C2_ccombinewnewt + : Hexagon::C2_ccombinewnewf); + return; + } + + case Hexagon::M2_mpysmi: { + MCOperand &Imm = MappedInst.getOperand(2); + MCExpr const *Expr = Imm.getExpr(); + int64_t Value; + bool Success = Expr->evaluateAsAbsolute(Value); + assert(Success);(void)Success; + if (Value < 0 && Value > -256) { + MappedInst.setOpcode(Hexagon::M2_mpysin); + Imm.setExpr(MCUnaryExpr::createMinus(Expr, OutContext)); + } + else + MappedInst.setOpcode(Hexagon::M2_mpysip); + return; + } + + case Hexagon::A2_addsp: { + MCOperand &Rt = Inst.getOperand(1); + assert (Rt.isReg() && "Expected register and none was found"); + unsigned Reg = RI->getEncodingValue(Rt.getReg()); + if (Reg & 1) + MappedInst.setOpcode(Hexagon::A2_addsph); + else + MappedInst.setOpcode(Hexagon::A2_addspl); + Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI)); + return; + } + case Hexagon::HEXAGON_V6_vd0_pseudo: + case Hexagon::HEXAGON_V6_vd0_pseudo_128B: { + MCInst TmpInst; + assert (Inst.getOperand(0).isReg() && + "Expected register and none was found"); + + TmpInst.setOpcode(Hexagon::V6_vxor); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); + MappedInst = TmpInst; + return; + } + + } +} + /// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to /// the current output stream. /// void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { - MCInst MCB; - MCB.setOpcode(Hexagon::BUNDLE); - MCB.addOperand(MCOperand::createImm(0)); + MCInst MCB = HexagonMCInstrInfo::createBundle(); + const MCInstrInfo &MCII = *Subtarget->getInstrInfo(); if (MI->isBundle()) { const MachineBasicBlock* MBB = MI->getParent(); - MachineBasicBlock::const_instr_iterator MII = MI; + MachineBasicBlock::const_instr_iterator MII = MI->getIterator(); unsigned IgnoreCount = 0; - for (++MII; MII != MBB->end() && MII->isInsideBundle(); ++MII) { + for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII) if (MII->getOpcode() == TargetOpcode::DBG_VALUE || MII->getOpcode() == TargetOpcode::IMPLICIT_DEF) ++IgnoreCount; - else { - HexagonLowerToMC(MII, MCB, *this); - } - } + else + HexagonLowerToMC(MCII, &*MII, MCB, *this); } - else { - HexagonLowerToMC(MI, MCB, *this); - HexagonMCInstrInfo::padEndloop(MCB); - } - // Examine the packet and try to find instructions that can be converted - // to compounds. - HexagonMCInstrInfo::tryCompound(*Subtarget->getInstrInfo(), - OutStreamer->getContext(), MCB); - // Examine the packet and convert pairs of instructions to duplex - // instructions when possible. - SmallVector possibleDuplexes; - possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties( - *Subtarget->getInstrInfo(), MCB); - HexagonMCShuffle(*Subtarget->getInstrInfo(), *Subtarget, - OutStreamer->getContext(), MCB, possibleDuplexes); - EmitToStreamer(*OutStreamer, MCB); + else + HexagonLowerToMC(MCII, MI, MCB, *this); + + bool Ok = HexagonMCInstrInfo::canonicalizePacket( + MCII, *Subtarget, OutStreamer->getContext(), MCB, nullptr); + assert(Ok); + (void)Ok; + if(HexagonMCInstrInfo::bundleSize(MCB) == 0) + return; + OutStreamer->EmitInstruction(MCB, getSubtargetInfo()); } extern "C" void LLVMInitializeHexagonAsmPrinter() { diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h index 792fc8b7af3a..a78d97e28427 100755 --- a/lib/Target/Hexagon/HexagonAsmPrinter.h +++ b/lib/Target/Hexagon/HexagonAsmPrinter.h @@ -42,6 +42,10 @@ namespace llvm { void EmitInstruction(const MachineInstr *MI) override; + void HexagonProcessInstruction(MCInst &Inst, + const MachineInstr &MBB); + + void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp new file mode 100644 index 000000000000..77907b054d54 --- /dev/null +++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -0,0 +1,2778 @@ +//===--- HexagonBitSimplify.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexbit" + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "HexagonTargetMachine.h" +#include "HexagonBitTracker.h" + +using namespace llvm; + +namespace llvm { + void initializeHexagonBitSimplifyPass(PassRegistry& Registry); + FunctionPass *createHexagonBitSimplify(); +} + +namespace { + // Set of virtual registers, based on BitVector. + struct RegisterSet : private BitVector { + RegisterSet() : BitVector() {} + explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} + RegisterSet(const RegisterSet &RS) : BitVector(RS) {} + + using BitVector::clear; + using BitVector::count; + + unsigned find_first() const { + int First = BitVector::find_first(); + if (First < 0) + return 0; + return x2v(First); + } + + unsigned find_next(unsigned Prev) const { + int Next = BitVector::find_next(v2x(Prev)); + if (Next < 0) + return 0; + return x2v(Next); + } + + RegisterSet &insert(unsigned R) { + unsigned Idx = v2x(R); + ensure(Idx); + return static_cast(BitVector::set(Idx)); + } + RegisterSet &remove(unsigned R) { + unsigned Idx = v2x(R); + if (Idx >= size()) + return *this; + return static_cast(BitVector::reset(Idx)); + } + + RegisterSet &insert(const RegisterSet &Rs) { + return static_cast(BitVector::operator|=(Rs)); + } + RegisterSet &remove(const RegisterSet &Rs) { + return static_cast(BitVector::reset(Rs)); + } + + reference operator[](unsigned R) { + unsigned Idx = v2x(R); + ensure(Idx); + return BitVector::operator[](Idx); + } + bool operator[](unsigned R) const { + unsigned Idx = v2x(R); + assert(Idx < size()); + return BitVector::operator[](Idx); + } + bool has(unsigned R) const { + unsigned Idx = v2x(R); + if (Idx >= size()) + return false; + return BitVector::test(Idx); + } + + bool empty() const { + return !BitVector::any(); + } + bool includes(const RegisterSet &Rs) const { + // A.BitVector::test(B) <=> A-B != {} + return !Rs.BitVector::test(*this); + } + bool intersects(const RegisterSet &Rs) const { + return BitVector::anyCommon(Rs); + } + + private: + void ensure(unsigned Idx) { + if (size() <= Idx) + resize(std::max(Idx+1, 32U)); + } + static inline unsigned v2x(unsigned v) { + return TargetRegisterInfo::virtReg2Index(v); + } + static inline unsigned x2v(unsigned x) { + return TargetRegisterInfo::index2VirtReg(x); + } + }; + + + struct PrintRegSet { + PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) + : RS(S), TRI(RI) {} + friend raw_ostream &operator<< (raw_ostream &OS, + const PrintRegSet &P); + private: + const RegisterSet &RS; + const TargetRegisterInfo *TRI; + }; + + raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) + LLVM_ATTRIBUTE_UNUSED; + raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { + OS << '{'; + for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) + OS << ' ' << PrintReg(R, P.TRI); + OS << " }"; + return OS; + } +} + + +namespace { + class Transformation; + + class HexagonBitSimplify : public MachineFunctionPass { + public: + static char ID; + HexagonBitSimplify() : MachineFunctionPass(ID), MDT(0) { + initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry()); + } + virtual const char *getPassName() const { + return "Hexagon bit simplification"; + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + virtual bool runOnMachineFunction(MachineFunction &MF); + + static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs); + static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses); + static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1, + const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W); + static bool isConst(const BitTracker::RegisterCell &RC, uint16_t B, + uint16_t W); + static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B, + uint16_t W); + static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B, + uint16_t W, uint64_t &U); + static bool replaceReg(unsigned OldR, unsigned NewR, + MachineRegisterInfo &MRI); + static bool getSubregMask(const BitTracker::RegisterRef &RR, + unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI); + static bool replaceRegWithSub(unsigned OldR, unsigned NewR, + unsigned NewSR, MachineRegisterInfo &MRI); + static bool replaceSubWithSub(unsigned OldR, unsigned OldSR, + unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI); + static bool parseRegSequence(const MachineInstr &I, + BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH); + + static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits, + uint16_t Begin); + static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits, + uint16_t Begin, const HexagonInstrInfo &HII); + + static const TargetRegisterClass *getFinalVRegClass( + const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI); + static bool isTransparentCopy(const BitTracker::RegisterRef &RD, + const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI); + + private: + MachineDominatorTree *MDT; + + bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs); + }; + + char HexagonBitSimplify::ID = 0; + typedef HexagonBitSimplify HBS; + + + // The purpose of this class is to provide a common facility to traverse + // the function top-down or bottom-up via the dominator tree, and keep + // track of the available registers. + class Transformation { + public: + bool TopDown; + Transformation(bool TD) : TopDown(TD) {} + virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0; + virtual ~Transformation() {} + }; +} + +INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit", + "Hexagon bit simplification", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit", + "Hexagon bit simplification", false, false) + + +bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T, + RegisterSet &AVs) { + MachineDomTreeNode *N = MDT->getNode(&B); + typedef GraphTraits GTN; + bool Changed = false; + + if (T.TopDown) + Changed = T.processBlock(B, AVs); + + RegisterSet Defs; + for (auto &I : B) + getInstrDefs(I, Defs); + RegisterSet NewAVs = AVs; + NewAVs.insert(Defs); + + for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) { + MachineBasicBlock *SB = (*I)->getBlock(); + Changed |= visitBlock(*SB, T, NewAVs); + } + if (!T.TopDown) + Changed |= T.processBlock(B, AVs); + + return Changed; +} + +// +// Utility functions: +// +void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI, + RegisterSet &Defs) { + for (auto &Op : MI.operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + Defs.insert(R); + } +} + +void HexagonBitSimplify::getInstrUses(const MachineInstr &MI, + RegisterSet &Uses) { + for (auto &Op : MI.operands()) { + if (!Op.isReg() || !Op.isUse()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + Uses.insert(R); + } +} + +// Check if all the bits in range [B, E) in both cells are equal. +bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1, + uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2, + uint16_t W) { + for (uint16_t i = 0; i < W; ++i) { + // If RC1[i] is "bottom", it cannot be proven equal to RC2[i]. + if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0) + return false; + // Same for RC2[i]. + if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0) + return false; + if (RC1[B1+i] != RC2[B2+i]) + return false; + } + return true; +} + + +bool HexagonBitSimplify::isConst(const BitTracker::RegisterCell &RC, + uint16_t B, uint16_t W) { + assert(B < RC.width() && B+W <= RC.width()); + for (uint16_t i = B; i < B+W; ++i) + if (!RC[i].num()) + return false; + return true; +} + + +bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC, + uint16_t B, uint16_t W) { + assert(B < RC.width() && B+W <= RC.width()); + for (uint16_t i = B; i < B+W; ++i) + if (!RC[i].is(0)) + return false; + return true; +} + + +bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC, + uint16_t B, uint16_t W, uint64_t &U) { + assert(B < RC.width() && B+W <= RC.width()); + int64_t T = 0; + for (uint16_t i = B+W; i > B; --i) { + const BitTracker::BitValue &BV = RC[i-1]; + T <<= 1; + if (BV.is(1)) + T |= 1; + else if (!BV.is(0)) + return false; + } + U = T; + return true; +} + + +bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR, + MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(OldR) || + !TargetRegisterInfo::isVirtualRegister(NewR)) + return false; + auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); + decltype(End) NextI; + for (auto I = Begin; I != End; I = NextI) { + NextI = std::next(I); + I->setReg(NewR); + } + return Begin != End; +} + + +bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR, + unsigned NewSR, MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(OldR) || + !TargetRegisterInfo::isVirtualRegister(NewR)) + return false; + auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); + decltype(End) NextI; + for (auto I = Begin; I != End; I = NextI) { + NextI = std::next(I); + I->setReg(NewR); + I->setSubReg(NewSR); + } + return Begin != End; +} + + +bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR, + unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(OldR) || + !TargetRegisterInfo::isVirtualRegister(NewR)) + return false; + auto Begin = MRI.use_begin(OldR), End = MRI.use_end(); + decltype(End) NextI; + for (auto I = Begin; I != End; I = NextI) { + NextI = std::next(I); + if (I->getSubReg() != OldSR) + continue; + I->setReg(NewR); + I->setSubReg(NewSR); + } + return Begin != End; +} + + +// For a register ref (pair Reg:Sub), set Begin to the position of the LSB +// of Sub in Reg, and set Width to the size of Sub in bits. Return true, +// if this succeeded, otherwise return false. +bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, + unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) { + const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg); + if (RC == &Hexagon::IntRegsRegClass) { + assert(RR.Sub == 0); + Begin = 0; + Width = 32; + return true; + } + if (RC == &Hexagon::DoubleRegsRegClass) { + if (RR.Sub == 0) { + Begin = 0; + Width = 64; + return true; + } + assert(RR.Sub == Hexagon::subreg_loreg || RR.Sub == Hexagon::subreg_hireg); + Width = 32; + Begin = (RR.Sub == Hexagon::subreg_loreg ? 0 : 32); + return true; + } + return false; +} + + +// For a REG_SEQUENCE, set SL to the low subregister and SH to the high +// subregister. +bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I, + BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH) { + assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE); + unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm(); + assert(Sub1 != Sub2); + if (Sub1 == Hexagon::subreg_loreg && Sub2 == Hexagon::subreg_hireg) { + SL = I.getOperand(1); + SH = I.getOperand(3); + return true; + } + if (Sub1 == Hexagon::subreg_hireg && Sub2 == Hexagon::subreg_loreg) { + SH = I.getOperand(1); + SL = I.getOperand(3); + return true; + } + return false; +} + + +// All stores (except 64-bit stores) take a 32-bit register as the source +// of the value to be stored. If the instruction stores into a location +// that is shorter than 32 bits, some bits of the source register are not +// used. For each store instruction, calculate the set of used bits in +// the source register, and set appropriate bits in Bits. Return true if +// the bits are calculated, false otherwise. +bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits, + uint16_t Begin) { + using namespace Hexagon; + + switch (Opc) { + // Store byte + case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32 + case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new + case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32 + case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32 + case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32 + case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32 + case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new + case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new + case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new + case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new + case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32 + case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new + case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32 + case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32 + case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32 + case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32 + case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new + case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new + case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new + case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new + case S4_storerb_ap: // memb(Re32=#U6)=Rt32 + case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new + case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32 + case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new + case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32 + case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new + case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32 + case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new + case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32 + case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new + case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32 + case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new + case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32 + case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new + case S2_storerbgp: // memb(gp+#u16:0)=Rt32 + case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new + case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32 + case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32 + case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32 + case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32 + case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new + case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new + case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new + case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new + Bits.set(Begin, Begin+8); + return true; + + // Store low half + case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32 + case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new + case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32 + case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32 + case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32 + case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32 + case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new + case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new + case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new + case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new + case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32 + case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new + case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32 + case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32 + case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32 + case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32 + case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new + case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new + case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new + case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new + case S4_storerh_ap: // memh(Re32=#U6)=Rt32 + case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new + case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32 + case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new + case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32 + case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new + case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32 + case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new + case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32 + case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new + case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32 + case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new + case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 + case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32 + case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new + case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new + case S2_storerhgp: // memh(gp+#u16:1)=Rt32 + case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new + case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32 + case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32 + case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32 + case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32 + case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new + case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new + case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new + case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new + Bits.set(Begin, Begin+16); + return true; + + // Store high half + case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32 + case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32 + case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32 + case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32 + case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32 + case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32 + case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32 + case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32 + case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32 + case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32 + case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32 + case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32 + case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32 + case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32 + case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32 + case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32 + case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32 + case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 + case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32 + case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 + case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32 + case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32 + case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32 + case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32 + case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32 + case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32 + Bits.set(Begin+16, Begin+32); + return true; + } + + return false; +} + + +// For an instruction with opcode Opc, calculate the set of bits that it +// uses in a register in operand OpN. This only calculates the set of used +// bits for cases where it does not depend on any operands (as is the case +// in shifts, for example). For concrete instructions from a program, the +// operand may be a subregister of a larger register, while Bits would +// correspond to the larger register in its entirety. Because of that, +// the parameter Begin can be used to indicate which bit of Bits should be +// considered the LSB of of the operand. +bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN, + BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) { + using namespace Hexagon; + + const MCInstrDesc &D = HII.get(Opc); + if (D.mayStore()) { + if (OpN == D.getNumOperands()-1) + return getUsedBitsInStore(Opc, Bits, Begin); + return false; + } + + switch (Opc) { + // One register source. Used bits: R1[0-7]. + case A2_sxtb: + case A2_zxtb: + case A4_cmpbeqi: + case A4_cmpbgti: + case A4_cmpbgtui: + if (OpN == 1) { + Bits.set(Begin, Begin+8); + return true; + } + break; + + // One register source. Used bits: R1[0-15]. + case A2_aslh: + case A2_sxth: + case A2_zxth: + case A4_cmpheqi: + case A4_cmphgti: + case A4_cmphgtui: + if (OpN == 1) { + Bits.set(Begin, Begin+16); + return true; + } + break; + + // One register source. Used bits: R1[16-31]. + case A2_asrh: + if (OpN == 1) { + Bits.set(Begin+16, Begin+32); + return true; + } + break; + + // Two register sources. Used bits: R1[0-7], R2[0-7]. + case A4_cmpbeq: + case A4_cmpbgt: + case A4_cmpbgtu: + if (OpN == 1) { + Bits.set(Begin, Begin+8); + return true; + } + break; + + // Two register sources. Used bits: R1[0-15], R2[0-15]. + case A4_cmpheq: + case A4_cmphgt: + case A4_cmphgtu: + case A2_addh_h16_ll: + case A2_addh_h16_sat_ll: + case A2_addh_l16_ll: + case A2_addh_l16_sat_ll: + case A2_combine_ll: + case A2_subh_h16_ll: + case A2_subh_h16_sat_ll: + case A2_subh_l16_ll: + case A2_subh_l16_sat_ll: + case M2_mpy_acc_ll_s0: + case M2_mpy_acc_ll_s1: + case M2_mpy_acc_sat_ll_s0: + case M2_mpy_acc_sat_ll_s1: + case M2_mpy_ll_s0: + case M2_mpy_ll_s1: + case M2_mpy_nac_ll_s0: + case M2_mpy_nac_ll_s1: + case M2_mpy_nac_sat_ll_s0: + case M2_mpy_nac_sat_ll_s1: + case M2_mpy_rnd_ll_s0: + case M2_mpy_rnd_ll_s1: + case M2_mpy_sat_ll_s0: + case M2_mpy_sat_ll_s1: + case M2_mpy_sat_rnd_ll_s0: + case M2_mpy_sat_rnd_ll_s1: + case M2_mpyd_acc_ll_s0: + case M2_mpyd_acc_ll_s1: + case M2_mpyd_ll_s0: + case M2_mpyd_ll_s1: + case M2_mpyd_nac_ll_s0: + case M2_mpyd_nac_ll_s1: + case M2_mpyd_rnd_ll_s0: + case M2_mpyd_rnd_ll_s1: + case M2_mpyu_acc_ll_s0: + case M2_mpyu_acc_ll_s1: + case M2_mpyu_ll_s0: + case M2_mpyu_ll_s1: + case M2_mpyu_nac_ll_s0: + case M2_mpyu_nac_ll_s1: + case M2_mpyud_acc_ll_s0: + case M2_mpyud_acc_ll_s1: + case M2_mpyud_ll_s0: + case M2_mpyud_ll_s1: + case M2_mpyud_nac_ll_s0: + case M2_mpyud_nac_ll_s1: + if (OpN == 1 || OpN == 2) { + Bits.set(Begin, Begin+16); + return true; + } + break; + + // Two register sources. Used bits: R1[0-15], R2[16-31]. + case A2_addh_h16_lh: + case A2_addh_h16_sat_lh: + case A2_combine_lh: + case A2_subh_h16_lh: + case A2_subh_h16_sat_lh: + case M2_mpy_acc_lh_s0: + case M2_mpy_acc_lh_s1: + case M2_mpy_acc_sat_lh_s0: + case M2_mpy_acc_sat_lh_s1: + case M2_mpy_lh_s0: + case M2_mpy_lh_s1: + case M2_mpy_nac_lh_s0: + case M2_mpy_nac_lh_s1: + case M2_mpy_nac_sat_lh_s0: + case M2_mpy_nac_sat_lh_s1: + case M2_mpy_rnd_lh_s0: + case M2_mpy_rnd_lh_s1: + case M2_mpy_sat_lh_s0: + case M2_mpy_sat_lh_s1: + case M2_mpy_sat_rnd_lh_s0: + case M2_mpy_sat_rnd_lh_s1: + case M2_mpyd_acc_lh_s0: + case M2_mpyd_acc_lh_s1: + case M2_mpyd_lh_s0: + case M2_mpyd_lh_s1: + case M2_mpyd_nac_lh_s0: + case M2_mpyd_nac_lh_s1: + case M2_mpyd_rnd_lh_s0: + case M2_mpyd_rnd_lh_s1: + case M2_mpyu_acc_lh_s0: + case M2_mpyu_acc_lh_s1: + case M2_mpyu_lh_s0: + case M2_mpyu_lh_s1: + case M2_mpyu_nac_lh_s0: + case M2_mpyu_nac_lh_s1: + case M2_mpyud_acc_lh_s0: + case M2_mpyud_acc_lh_s1: + case M2_mpyud_lh_s0: + case M2_mpyud_lh_s1: + case M2_mpyud_nac_lh_s0: + case M2_mpyud_nac_lh_s1: + // These four are actually LH. + case A2_addh_l16_hl: + case A2_addh_l16_sat_hl: + case A2_subh_l16_hl: + case A2_subh_l16_sat_hl: + if (OpN == 1) { + Bits.set(Begin, Begin+16); + return true; + } + if (OpN == 2) { + Bits.set(Begin+16, Begin+32); + return true; + } + break; + + // Two register sources, used bits: R1[16-31], R2[0-15]. + case A2_addh_h16_hl: + case A2_addh_h16_sat_hl: + case A2_combine_hl: + case A2_subh_h16_hl: + case A2_subh_h16_sat_hl: + case M2_mpy_acc_hl_s0: + case M2_mpy_acc_hl_s1: + case M2_mpy_acc_sat_hl_s0: + case M2_mpy_acc_sat_hl_s1: + case M2_mpy_hl_s0: + case M2_mpy_hl_s1: + case M2_mpy_nac_hl_s0: + case M2_mpy_nac_hl_s1: + case M2_mpy_nac_sat_hl_s0: + case M2_mpy_nac_sat_hl_s1: + case M2_mpy_rnd_hl_s0: + case M2_mpy_rnd_hl_s1: + case M2_mpy_sat_hl_s0: + case M2_mpy_sat_hl_s1: + case M2_mpy_sat_rnd_hl_s0: + case M2_mpy_sat_rnd_hl_s1: + case M2_mpyd_acc_hl_s0: + case M2_mpyd_acc_hl_s1: + case M2_mpyd_hl_s0: + case M2_mpyd_hl_s1: + case M2_mpyd_nac_hl_s0: + case M2_mpyd_nac_hl_s1: + case M2_mpyd_rnd_hl_s0: + case M2_mpyd_rnd_hl_s1: + case M2_mpyu_acc_hl_s0: + case M2_mpyu_acc_hl_s1: + case M2_mpyu_hl_s0: + case M2_mpyu_hl_s1: + case M2_mpyu_nac_hl_s0: + case M2_mpyu_nac_hl_s1: + case M2_mpyud_acc_hl_s0: + case M2_mpyud_acc_hl_s1: + case M2_mpyud_hl_s0: + case M2_mpyud_hl_s1: + case M2_mpyud_nac_hl_s0: + case M2_mpyud_nac_hl_s1: + if (OpN == 1) { + Bits.set(Begin+16, Begin+32); + return true; + } + if (OpN == 2) { + Bits.set(Begin, Begin+16); + return true; + } + break; + + // Two register sources, used bits: R1[16-31], R2[16-31]. + case A2_addh_h16_hh: + case A2_addh_h16_sat_hh: + case A2_combine_hh: + case A2_subh_h16_hh: + case A2_subh_h16_sat_hh: + case M2_mpy_acc_hh_s0: + case M2_mpy_acc_hh_s1: + case M2_mpy_acc_sat_hh_s0: + case M2_mpy_acc_sat_hh_s1: + case M2_mpy_hh_s0: + case M2_mpy_hh_s1: + case M2_mpy_nac_hh_s0: + case M2_mpy_nac_hh_s1: + case M2_mpy_nac_sat_hh_s0: + case M2_mpy_nac_sat_hh_s1: + case M2_mpy_rnd_hh_s0: + case M2_mpy_rnd_hh_s1: + case M2_mpy_sat_hh_s0: + case M2_mpy_sat_hh_s1: + case M2_mpy_sat_rnd_hh_s0: + case M2_mpy_sat_rnd_hh_s1: + case M2_mpyd_acc_hh_s0: + case M2_mpyd_acc_hh_s1: + case M2_mpyd_hh_s0: + case M2_mpyd_hh_s1: + case M2_mpyd_nac_hh_s0: + case M2_mpyd_nac_hh_s1: + case M2_mpyd_rnd_hh_s0: + case M2_mpyd_rnd_hh_s1: + case M2_mpyu_acc_hh_s0: + case M2_mpyu_acc_hh_s1: + case M2_mpyu_hh_s0: + case M2_mpyu_hh_s1: + case M2_mpyu_nac_hh_s0: + case M2_mpyu_nac_hh_s1: + case M2_mpyud_acc_hh_s0: + case M2_mpyud_acc_hh_s1: + case M2_mpyud_hh_s0: + case M2_mpyud_hh_s1: + case M2_mpyud_nac_hh_s0: + case M2_mpyud_nac_hh_s1: + if (OpN == 1 || OpN == 2) { + Bits.set(Begin+16, Begin+32); + return true; + } + break; + } + + return false; +} + + +// Calculate the register class that matches Reg:Sub. For example, if +// vreg1 is a double register, then vreg1:subreg_hireg would match "int" +// register class. +const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( + const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return nullptr; + auto *RC = MRI.getRegClass(RR.Reg); + if (RR.Sub == 0) + return RC; + + auto VerifySR = [] (unsigned Sub) -> void { + assert(Sub == Hexagon::subreg_hireg || Sub == Hexagon::subreg_loreg); + }; + + switch (RC->getID()) { + case Hexagon::DoubleRegsRegClassID: + VerifySR(RR.Sub); + return &Hexagon::IntRegsRegClass; + } + return nullptr; +} + + +// Check if RD could be replaced with RS at any possible use of RD. +// For example a predicate register cannot be replaced with a integer +// register, but a 64-bit register with a subregister can be replaced +// with a 32-bit register. +bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD, + const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(RD.Reg) || + !TargetRegisterInfo::isVirtualRegister(RS.Reg)) + return false; + // Return false if one (or both) classes are nullptr. + auto *DRC = getFinalVRegClass(RD, MRI); + if (!DRC) + return false; + + return DRC == getFinalVRegClass(RS, MRI); +} + + +// +// Dead code elimination +// +namespace { + class DeadCodeElimination { + public: + DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt) + : MF(mf), HII(*MF.getSubtarget().getInstrInfo()), + MDT(mdt), MRI(mf.getRegInfo()) {} + + bool run() { + return runOnNode(MDT.getRootNode()); + } + + private: + bool isDead(unsigned R) const; + bool runOnNode(MachineDomTreeNode *N); + + MachineFunction &MF; + const HexagonInstrInfo &HII; + MachineDominatorTree &MDT; + MachineRegisterInfo &MRI; + }; +} + + +bool DeadCodeElimination::isDead(unsigned R) const { + for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) { + MachineInstr *UseI = I->getParent(); + if (UseI->isDebugValue()) + continue; + if (UseI->isPHI()) { + assert(!UseI->getOperand(0).getSubReg()); + unsigned DR = UseI->getOperand(0).getReg(); + if (DR == R) + continue; + } + return false; + } + return true; +} + + +bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) { + bool Changed = false; + typedef GraphTraits GTN; + for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) + Changed |= runOnNode(*I); + + MachineBasicBlock *B = N->getBlock(); + std::vector Instrs; + for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) + Instrs.push_back(&*I); + + for (auto MI : Instrs) { + unsigned Opc = MI->getOpcode(); + // Do not touch lifetime markers. This is why the target-independent DCE + // cannot be used. + if (Opc == TargetOpcode::LIFETIME_START || + Opc == TargetOpcode::LIFETIME_END) + continue; + bool Store = false; + if (MI->isInlineAsm()) + continue; + // Delete PHIs if possible. + if (!MI->isPHI() && !MI->isSafeToMove(nullptr, Store)) + continue; + + bool AllDead = true; + SmallVector Regs; + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R) || !isDead(R)) { + AllDead = false; + break; + } + Regs.push_back(R); + } + if (!AllDead) + continue; + + B->erase(MI); + for (unsigned i = 0, n = Regs.size(); i != n; ++i) + MRI.markUsesInDebugValueAsUndef(Regs[i]); + Changed = true; + } + + return Changed; +} + + +// +// Eliminate redundant instructions +// +// This transformation will identify instructions where the output register +// is the same as one of its input registers. This only works on instructions +// that define a single register (unlike post-increment loads, for example). +// The equality check is actually more detailed: the code calculates which +// bits of the output are used, and only compares these bits with the input +// registers. +// If the output matches an input, the instruction is replaced with COPY. +// The copies will be removed by another transformation. +namespace { + class RedundantInstrElimination : public Transformation { + public: + RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii, + MachineRegisterInfo &mri) + : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + private: + bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN, + unsigned &LostB, unsigned &LostE); + bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN, + unsigned &LostB, unsigned &LostE); + bool computeUsedBits(unsigned Reg, BitVector &Bits); + bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits, + uint16_t Begin); + bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS); + + const HexagonInstrInfo &HII; + MachineRegisterInfo &MRI; + BitTracker &BT; + }; +} + + +// Check if the instruction is a lossy shift left, where the input being +// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range +// of bit indices that are lost. +bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI, + unsigned OpN, unsigned &LostB, unsigned &LostE) { + using namespace Hexagon; + unsigned Opc = MI.getOpcode(); + unsigned ImN, RegN, Width; + switch (Opc) { + case S2_asl_i_p: + ImN = 2; + RegN = 1; + Width = 64; + break; + case S2_asl_i_p_acc: + case S2_asl_i_p_and: + case S2_asl_i_p_nac: + case S2_asl_i_p_or: + case S2_asl_i_p_xacc: + ImN = 3; + RegN = 2; + Width = 64; + break; + case S2_asl_i_r: + ImN = 2; + RegN = 1; + Width = 32; + break; + case S2_addasl_rrri: + case S4_andi_asl_ri: + case S4_ori_asl_ri: + case S4_addi_asl_ri: + case S4_subi_asl_ri: + case S2_asl_i_r_acc: + case S2_asl_i_r_and: + case S2_asl_i_r_nac: + case S2_asl_i_r_or: + case S2_asl_i_r_sat: + case S2_asl_i_r_xacc: + ImN = 3; + RegN = 2; + Width = 32; + break; + default: + return false; + } + + if (RegN != OpN) + return false; + + assert(MI.getOperand(ImN).isImm()); + unsigned S = MI.getOperand(ImN).getImm(); + if (S == 0) + return false; + LostB = Width-S; + LostE = Width; + return true; +} + + +// Check if the instruction is a lossy shift right, where the input being +// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range +// of bit indices that are lost. +bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI, + unsigned OpN, unsigned &LostB, unsigned &LostE) { + using namespace Hexagon; + unsigned Opc = MI.getOpcode(); + unsigned ImN, RegN; + switch (Opc) { + case S2_asr_i_p: + case S2_lsr_i_p: + ImN = 2; + RegN = 1; + break; + case S2_asr_i_p_acc: + case S2_asr_i_p_and: + case S2_asr_i_p_nac: + case S2_asr_i_p_or: + case S2_lsr_i_p_acc: + case S2_lsr_i_p_and: + case S2_lsr_i_p_nac: + case S2_lsr_i_p_or: + case S2_lsr_i_p_xacc: + ImN = 3; + RegN = 2; + break; + case S2_asr_i_r: + case S2_lsr_i_r: + ImN = 2; + RegN = 1; + break; + case S4_andi_lsr_ri: + case S4_ori_lsr_ri: + case S4_addi_lsr_ri: + case S4_subi_lsr_ri: + case S2_asr_i_r_acc: + case S2_asr_i_r_and: + case S2_asr_i_r_nac: + case S2_asr_i_r_or: + case S2_lsr_i_r_acc: + case S2_lsr_i_r_and: + case S2_lsr_i_r_nac: + case S2_lsr_i_r_or: + case S2_lsr_i_r_xacc: + ImN = 3; + RegN = 2; + break; + + default: + return false; + } + + if (RegN != OpN) + return false; + + assert(MI.getOperand(ImN).isImm()); + unsigned S = MI.getOperand(ImN).getImm(); + LostB = 0; + LostE = S; + return true; +} + + +// Calculate the bit vector that corresponds to the used bits of register Reg. +// The vector Bits has the same size, as the size of Reg in bits. If the cal- +// culation fails (i.e. the used bits are unknown), it returns false. Other- +// wise, it returns true and sets the corresponding bits in Bits. +bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) { + BitVector Used(Bits.size()); + RegisterSet Visited; + std::vector Pending; + Pending.push_back(Reg); + + for (unsigned i = 0; i < Pending.size(); ++i) { + unsigned R = Pending[i]; + if (Visited.has(R)) + continue; + Visited.insert(R); + for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) { + BitTracker::RegisterRef UR = *I; + unsigned B, W; + if (!HBS::getSubregMask(UR, B, W, MRI)) + return false; + MachineInstr &UseI = *I->getParent(); + if (UseI.isPHI() || UseI.isCopy()) { + unsigned DefR = UseI.getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DefR)) + return false; + Pending.push_back(DefR); + } else { + if (!computeUsedBits(UseI, I.getOperandNo(), Used, B)) + return false; + } + } + } + Bits |= Used; + return true; +} + + +// Calculate the bits used by instruction MI in a register in operand OpN. +// Return true/false if the calculation succeeds/fails. If is succeeds, set +// used bits in Bits. This function does not reset any bits in Bits, so +// subsequent calls over different instructions will result in the union +// of the used bits in all these instructions. +// The register in question may be used with a sub-register, whereas Bits +// holds the bits for the entire register. To keep track of that, the +// argument Begin indicates where in Bits is the lowest-significant bit +// of the register used in operand OpN. For example, in instruction: +// vreg1 = S2_lsr_i_r vreg2:subreg_hireg, 10 +// the operand 1 is a 32-bit register, which happens to be a subregister +// of the 64-bit register vreg2, and that subregister starts at position 32. +// In this case Begin=32, since Bits[32] would be the lowest-significant bit +// of vreg2:subreg_hireg. +bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI, + unsigned OpN, BitVector &Bits, uint16_t Begin) { + unsigned Opc = MI.getOpcode(); + BitVector T(Bits.size()); + bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII); + // Even if we don't have bits yet, we could still provide some information + // if the instruction is a lossy shift: the lost bits will be marked as + // not used. + unsigned LB, LE; + if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) { + assert(MI.getOperand(OpN).isReg()); + BitTracker::RegisterRef RR = MI.getOperand(OpN); + const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI); + uint16_t Width = RC->getSize()*8; + + if (!GotBits) + T.set(Begin, Begin+Width); + assert(LB <= LE && LB < Width && LE <= Width); + T.reset(Begin+LB, Begin+LE); + GotBits = true; + } + if (GotBits) + Bits |= T; + return GotBits; +} + + +// Calculates the used bits in RD ("defined register"), and checks if these +// bits in RS ("used register") and RD are identical. +bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD, + BitTracker::RegisterRef RS) { + const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg); + const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); + + unsigned DB, DW; + if (!HBS::getSubregMask(RD, DB, DW, MRI)) + return false; + unsigned SB, SW; + if (!HBS::getSubregMask(RS, SB, SW, MRI)) + return false; + if (SW != DW) + return false; + + BitVector Used(DC.width()); + if (!computeUsedBits(RD.Reg, Used)) + return false; + + for (unsigned i = 0; i != DW; ++i) + if (Used[i+DB] && DC[DB+i] != SC[SB+i]) + return false; + return true; +} + + +bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, + const RegisterSet&) { + bool Changed = false; + + for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) { + NextI = std::next(I); + MachineInstr *MI = &*I; + + if (MI->getOpcode() == TargetOpcode::COPY) + continue; + if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) + continue; + unsigned NumD = MI->getDesc().getNumDefs(); + if (NumD != 1) + continue; + + BitTracker::RegisterRef RD = MI->getOperand(0); + if (!BT.has(RD.Reg)) + continue; + const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg); + + // Find a source operand that is equal to the result. + for (auto &Op : MI->uses()) { + if (!Op.isReg()) + continue; + BitTracker::RegisterRef RS = Op; + if (!BT.has(RS.Reg)) + continue; + if (!HBS::isTransparentCopy(RD, RS, MRI)) + continue; + + unsigned BN, BW; + if (!HBS::getSubregMask(RS, BN, BW, MRI)) + continue; + + const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); + if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW)) + continue; + + // If found, replace the instruction with a COPY. + DebugLoc DL = MI->getDebugLoc(); + const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); + unsigned NewR = MRI.createVirtualRegister(FRC); + BuildMI(B, I, DL, HII.get(TargetOpcode::COPY), NewR) + .addReg(RS.Reg, 0, RS.Sub); + HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); + BT.put(BitTracker::RegisterRef(NewR), SC); + Changed = true; + break; + } + } + + return Changed; +} + + +// +// Const generation +// +// Recognize instructions that produce constant values known at compile-time. +// Replace them with register definitions that load these constants directly. +namespace { + class ConstGeneration : public Transformation { + public: + ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii, + MachineRegisterInfo &mri) + : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + private: + bool isTfrConst(const MachineInstr *MI) const; + bool isConst(unsigned R, int64_t &V) const; + unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C, + MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL); + + const HexagonInstrInfo &HII; + MachineRegisterInfo &MRI; + BitTracker &BT; + }; +} + +bool ConstGeneration::isConst(unsigned R, int64_t &C) const { + if (!BT.has(R)) + return false; + const BitTracker::RegisterCell &RC = BT.lookup(R); + int64_t T = 0; + for (unsigned i = RC.width(); i > 0; --i) { + const BitTracker::BitValue &V = RC[i-1]; + T <<= 1; + if (V.is(1)) + T |= 1; + else if (!V.is(0)) + return false; + } + C = T; + return true; +} + + +bool ConstGeneration::isTfrConst(const MachineInstr *MI) const { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::A2_combineii: + case Hexagon::A4_combineii: + case Hexagon::A2_tfrsi: + case Hexagon::A2_tfrpi: + case Hexagon::TFR_PdTrue: + case Hexagon::TFR_PdFalse: + case Hexagon::CONST32_Int_Real: + case Hexagon::CONST64_Int_Real: + return true; + } + return false; +} + + +// Generate a transfer-immediate instruction that is appropriate for the +// register class and the actual value being transferred. +unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C, + MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) { + unsigned Reg = MRI.createVirtualRegister(RC); + if (RC == &Hexagon::IntRegsRegClass) { + BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg) + .addImm(int32_t(C)); + return Reg; + } + + if (RC == &Hexagon::DoubleRegsRegClass) { + if (isInt<8>(C)) { + BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg) + .addImm(C); + return Reg; + } + + unsigned Lo = Lo_32(C), Hi = Hi_32(C); + if (isInt<8>(Lo) || isInt<8>(Hi)) { + unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii + : Hexagon::A4_combineii; + BuildMI(B, At, DL, HII.get(Opc), Reg) + .addImm(int32_t(Hi)) + .addImm(int32_t(Lo)); + return Reg; + } + + BuildMI(B, At, DL, HII.get(Hexagon::CONST64_Int_Real), Reg) + .addImm(C); + return Reg; + } + + if (RC == &Hexagon::PredRegsRegClass) { + unsigned Opc; + if (C == 0) + Opc = Hexagon::TFR_PdFalse; + else if ((C & 0xFF) == 0xFF) + Opc = Hexagon::TFR_PdTrue; + else + return 0; + BuildMI(B, At, DL, HII.get(Opc), Reg); + return Reg; + } + + return 0; +} + + +bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) { + bool Changed = false; + RegisterSet Defs; + + for (auto I = B.begin(), E = B.end(); I != E; ++I) { + if (isTfrConst(I)) + continue; + Defs.clear(); + HBS::getInstrDefs(*I, Defs); + if (Defs.count() != 1) + continue; + unsigned DR = Defs.find_first(); + if (!TargetRegisterInfo::isVirtualRegister(DR)) + continue; + int64_t C; + if (isConst(DR, C)) { + DebugLoc DL = I->getDebugLoc(); + auto At = I->isPHI() ? B.getFirstNonPHI() : I; + unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL); + if (ImmReg) { + HBS::replaceReg(DR, ImmReg, MRI); + BT.put(ImmReg, BT.lookup(DR)); + Changed = true; + } + } + } + return Changed; +} + + +// +// Copy generation +// +// Identify pairs of available registers which hold identical values. +// In such cases, only one of them needs to be calculated, the other one +// will be defined as a copy of the first. +// +// Copy propagation +// +// Eliminate register copies RD = RS, by replacing the uses of RD with +// with uses of RS. +namespace { + class CopyGeneration : public Transformation { + public: + CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii, + MachineRegisterInfo &mri) + : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + private: + bool findMatch(const BitTracker::RegisterRef &Inp, + BitTracker::RegisterRef &Out, const RegisterSet &AVs); + + const HexagonInstrInfo &HII; + MachineRegisterInfo &MRI; + BitTracker &BT; + }; + + class CopyPropagation : public Transformation { + public: + CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) + : Transformation(false), MRI(mri) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + static bool isCopyReg(unsigned Opc); + private: + bool propagateRegCopy(MachineInstr &MI); + + MachineRegisterInfo &MRI; + }; + +} + + +/// Check if there is a register in AVs that is identical to Inp. If so, +/// set Out to the found register. The output may be a pair Reg:Sub. +bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp, + BitTracker::RegisterRef &Out, const RegisterSet &AVs) { + if (!BT.has(Inp.Reg)) + return false; + const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg); + unsigned B, W; + if (!HBS::getSubregMask(Inp, B, W, MRI)) + return false; + + for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) { + if (!BT.has(R) || !HBS::isTransparentCopy(R, Inp, MRI)) + continue; + const BitTracker::RegisterCell &RC = BT.lookup(R); + unsigned RW = RC.width(); + if (W == RW) { + if (MRI.getRegClass(Inp.Reg) != MRI.getRegClass(R)) + continue; + if (!HBS::isEqual(InpRC, B, RC, 0, W)) + continue; + Out.Reg = R; + Out.Sub = 0; + return true; + } + // Check if there is a super-register, whose part (with a subregister) + // is equal to the input. + // Only do double registers for now. + if (W*2 != RW) + continue; + if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass) + continue; + + if (HBS::isEqual(InpRC, B, RC, 0, W)) + Out.Sub = Hexagon::subreg_loreg; + else if (HBS::isEqual(InpRC, B, RC, W, W)) + Out.Sub = Hexagon::subreg_hireg; + else + continue; + Out.Reg = R; + return true; + } + return false; +} + + +bool CopyGeneration::processBlock(MachineBasicBlock &B, + const RegisterSet &AVs) { + RegisterSet AVB(AVs); + bool Changed = false; + RegisterSet Defs; + + for (auto I = B.begin(), E = B.end(), NextI = I; I != E; + ++I, AVB.insert(Defs)) { + NextI = std::next(I); + Defs.clear(); + HBS::getInstrDefs(*I, Defs); + + unsigned Opc = I->getOpcode(); + if (CopyPropagation::isCopyReg(Opc)) + continue; + + for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) { + BitTracker::RegisterRef MR; + if (!findMatch(R, MR, AVB)) + continue; + DebugLoc DL = I->getDebugLoc(); + auto *FRC = HBS::getFinalVRegClass(MR, MRI); + unsigned NewR = MRI.createVirtualRegister(FRC); + auto At = I->isPHI() ? B.getFirstNonPHI() : I; + BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR) + .addReg(MR.Reg, 0, MR.Sub); + BT.put(BitTracker::RegisterRef(NewR), BT.get(MR)); + } + } + + return Changed; +} + + +bool CopyPropagation::isCopyReg(unsigned Opc) { + switch (Opc) { + case TargetOpcode::COPY: + case TargetOpcode::REG_SEQUENCE: + case Hexagon::A2_tfr: + case Hexagon::A2_tfrp: + case Hexagon::A2_combinew: + case Hexagon::A4_combineir: + case Hexagon::A4_combineri: + return true; + default: + break; + } + return false; +} + + +bool CopyPropagation::propagateRegCopy(MachineInstr &MI) { + bool Changed = false; + unsigned Opc = MI.getOpcode(); + BitTracker::RegisterRef RD = MI.getOperand(0); + assert(MI.getOperand(0).getSubReg() == 0); + + switch (Opc) { + case TargetOpcode::COPY: + case Hexagon::A2_tfr: + case Hexagon::A2_tfrp: { + BitTracker::RegisterRef RS = MI.getOperand(1); + if (!HBS::isTransparentCopy(RD, RS, MRI)) + break; + if (RS.Sub != 0) + Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI); + else + Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI); + break; + } + case TargetOpcode::REG_SEQUENCE: { + BitTracker::RegisterRef SL, SH; + if (HBS::parseRegSequence(MI, SL, SH)) { + Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg, + SL.Reg, SL.Sub, MRI); + Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg, + SH.Reg, SH.Sub, MRI); + } + break; + } + case Hexagon::A2_combinew: { + BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2); + Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg, + RL.Reg, RL.Sub, MRI); + Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg, + RH.Reg, RH.Sub, MRI); + break; + } + case Hexagon::A4_combineir: + case Hexagon::A4_combineri: { + unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1; + unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::subreg_loreg + : Hexagon::subreg_hireg; + BitTracker::RegisterRef RS = MI.getOperand(SrcX); + Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI); + break; + } + } + return Changed; +} + + +bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) { + std::vector Instrs; + for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) + Instrs.push_back(&*I); + + bool Changed = false; + for (auto I : Instrs) { + unsigned Opc = I->getOpcode(); + if (!CopyPropagation::isCopyReg(Opc)) + continue; + Changed |= propagateRegCopy(*I); + } + + return Changed; +} + + +// +// Bit simplification +// +// Recognize patterns that can be simplified and replace them with the +// simpler forms. +// This is by no means complete +namespace { + class BitSimplification : public Transformation { + public: + BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii, + MachineRegisterInfo &mri) + : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; + private: + struct RegHalf : public BitTracker::RegisterRef { + bool Low; // Low/High halfword. + }; + + bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC, + unsigned B, RegHalf &RH); + + bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC, + BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt); + unsigned getCombineOpcode(bool HLow, bool LLow); + + bool genStoreUpperHalf(MachineInstr *MI); + bool genStoreImmediate(MachineInstr *MI); + bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC); + bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC); + bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC); + bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC); + bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC); + + const HexagonInstrInfo &HII; + MachineRegisterInfo &MRI; + BitTracker &BT; + }; +} + + +// Check if the bits [B..B+16) in register cell RC form a valid halfword, +// i.e. [0..16), [16..32), etc. of some register. If so, return true and +// set the information about the found register in RH. +bool BitSimplification::matchHalf(unsigned SelfR, + const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) { + // XXX This could be searching in the set of available registers, in case + // the match is not exact. + + // Match 16-bit chunks, where the RC[B..B+15] references exactly one + // register and all the bits B..B+15 match between RC and the register. + // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... }, + // and RC = { [0]:0 [1-15]:v1[1-15]... }. + bool Low = false; + unsigned I = B; + while (I < B+16 && RC[I].num()) + I++; + if (I == B+16) + return false; + + unsigned Reg = RC[I].RefI.Reg; + unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B. + if (P < I-B) + return false; + unsigned Pos = P - (I-B); + + if (Reg == 0 || Reg == SelfR) // Don't match "self". + return false; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + if (!BT.has(Reg)) + return false; + + const BitTracker::RegisterCell &SC = BT.lookup(Reg); + if (Pos+16 > SC.width()) + return false; + + for (unsigned i = 0; i < 16; ++i) { + const BitTracker::BitValue &RV = RC[i+B]; + if (RV.Type == BitTracker::BitValue::Ref) { + if (RV.RefI.Reg != Reg) + return false; + if (RV.RefI.Pos != i+Pos) + return false; + continue; + } + if (RC[i+B] != SC[i+Pos]) + return false; + } + + unsigned Sub = 0; + switch (Pos) { + case 0: + Sub = Hexagon::subreg_loreg; + Low = true; + break; + case 16: + Sub = Hexagon::subreg_loreg; + Low = false; + break; + case 32: + Sub = Hexagon::subreg_hireg; + Low = true; + break; + case 48: + Sub = Hexagon::subreg_hireg; + Low = false; + break; + default: + return false; + } + + RH.Reg = Reg; + RH.Sub = Sub; + RH.Low = Low; + // If the subregister is not valid with the register, set it to 0. + if (!HBS::getFinalVRegClass(RH, MRI)) + RH.Sub = 0; + + return true; +} + + +// Check if RC matches the pattern of a S2_packhl. If so, return true and +// set the inputs Rs and Rt. +bool BitSimplification::matchPackhl(unsigned SelfR, + const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs, + BitTracker::RegisterRef &Rt) { + RegHalf L1, H1, L2, H2; + + if (!matchHalf(SelfR, RC, 0, L2) || !matchHalf(SelfR, RC, 16, L1)) + return false; + if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1)) + return false; + + // Rs = H1.L1, Rt = H2.L2 + if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low) + return false; + if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low) + return false; + + Rs = H1; + Rt = H2; + return true; +} + + +unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) { + return HLow ? LLow ? Hexagon::A2_combine_ll + : Hexagon::A2_combine_lh + : LLow ? Hexagon::A2_combine_hl + : Hexagon::A2_combine_hh; +} + + +// If MI stores the upper halfword of a register (potentially obtained via +// shifts or extracts), replace it with a storerf instruction. This could +// cause the "extraction" code to become dead. +bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + if (Opc != Hexagon::S2_storerh_io) + return false; + + MachineOperand &ValOp = MI->getOperand(2); + BitTracker::RegisterRef RS = ValOp; + if (!BT.has(RS.Reg)) + return false; + const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg); + RegHalf H; + if (!matchHalf(0, RC, 0, H)) + return false; + if (H.Low) + return false; + MI->setDesc(HII.get(Hexagon::S2_storerf_io)); + ValOp.setReg(H.Reg); + ValOp.setSubReg(H.Sub); + return true; +} + + +// If MI stores a value known at compile-time, and the value is within a range +// that avoids using constant-extenders, replace it with a store-immediate. +bool BitSimplification::genStoreImmediate(MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + unsigned Align = 0; + switch (Opc) { + case Hexagon::S2_storeri_io: + Align++; + case Hexagon::S2_storerh_io: + Align++; + case Hexagon::S2_storerb_io: + break; + default: + return false; + } + + // Avoid stores to frame-indices (due to an unknown offset). + if (!MI->getOperand(0).isReg()) + return false; + MachineOperand &OffOp = MI->getOperand(1); + if (!OffOp.isImm()) + return false; + + int64_t Off = OffOp.getImm(); + // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x). + if (!isUIntN(6+Align, Off) || (Off & ((1<getOperand(2); + if (!BT.has(RS.Reg)) + return false; + const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg); + uint64_t U; + if (!HBS::getConst(RC, 0, RC.width(), U)) + return false; + + // Only consider 8-bit values to avoid constant-extenders. + int V; + switch (Opc) { + case Hexagon::S2_storerb_io: + V = int8_t(U); + break; + case Hexagon::S2_storerh_io: + V = int16_t(U); + break; + case Hexagon::S2_storeri_io: + V = int32_t(U); + break; + } + if (!isInt<8>(V)) + return false; + + MI->RemoveOperand(2); + switch (Opc) { + case Hexagon::S2_storerb_io: + MI->setDesc(HII.get(Hexagon::S4_storeirb_io)); + break; + case Hexagon::S2_storerh_io: + MI->setDesc(HII.get(Hexagon::S4_storeirh_io)); + break; + case Hexagon::S2_storeri_io: + MI->setDesc(HII.get(Hexagon::S4_storeiri_io)); + break; + } + MI->addOperand(MachineOperand::CreateImm(V)); + return true; +} + + +// If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the +// last instruction in a sequence that results in something equivalent to +// the pack-halfwords. The intent is to cause the entire sequence to become +// dead. +bool BitSimplification::genPackhl(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { + unsigned Opc = MI->getOpcode(); + if (Opc == Hexagon::S2_packhl) + return false; + BitTracker::RegisterRef Rs, Rt; + if (!matchPackhl(RD.Reg, RC, Rs, Rt)) + return false; + + MachineBasicBlock &B = *MI->getParent(); + unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); + DebugLoc DL = MI->getDebugLoc(); + BuildMI(B, MI, DL, HII.get(Hexagon::S2_packhl), NewR) + .addReg(Rs.Reg, 0, Rs.Sub) + .addReg(Rt.Reg, 0, Rt.Sub); + HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); + BT.put(BitTracker::RegisterRef(NewR), RC); + return true; +} + + +// If MI produces halfword of the input in the low half of the output, +// replace it with zero-extend or extractu. +bool BitSimplification::genExtractHalf(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { + RegHalf L; + // Check for halfword in low 16 bits, zeros elsewhere. + if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16)) + return false; + + unsigned Opc = MI->getOpcode(); + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + // Prefer zxth, since zxth can go in any slot, while extractu only in + // slots 2 and 3. + unsigned NewR = 0; + if (L.Low && Opc != Hexagon::A2_zxth) { + NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(B, MI, DL, HII.get(Hexagon::A2_zxth), NewR) + .addReg(L.Reg, 0, L.Sub); + } else if (!L.Low && Opc != Hexagon::S2_extractu) { + NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(B, MI, DL, HII.get(Hexagon::S2_extractu), NewR) + .addReg(L.Reg, 0, L.Sub) + .addImm(16) + .addImm(16); + } + if (NewR == 0) + return false; + HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); + BT.put(BitTracker::RegisterRef(NewR), RC); + return true; +} + + +// If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the +// combine. +bool BitSimplification::genCombineHalf(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { + RegHalf L, H; + // Check for combine h/l + if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H)) + return false; + // Do nothing if this is just a reg copy. + if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low) + return false; + + unsigned Opc = MI->getOpcode(); + unsigned COpc = getCombineOpcode(H.Low, L.Low); + if (COpc == Opc) + return false; + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(B, MI, DL, HII.get(COpc), NewR) + .addReg(H.Reg, 0, H.Sub) + .addReg(L.Reg, 0, L.Sub); + HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); + BT.put(BitTracker::RegisterRef(NewR), RC); + return true; +} + + +// If MI resets high bits of a register and keeps the lower ones, replace it +// with zero-extend byte/half, and-immediate, or extractu, as appropriate. +bool BitSimplification::genExtractLow(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::A2_zxtb: + case Hexagon::A2_zxth: + case Hexagon::S2_extractu: + return false; + } + if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) { + int32_t Imm = MI->getOperand(2).getImm(); + if (isInt<10>(Imm)) + return false; + } + + if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm()) + return false; + unsigned W = RC.width(); + while (W > 0 && RC[W-1].is(0)) + W--; + if (W == 0 || W == RC.width()) + return false; + unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb + : (W == 16) ? Hexagon::A2_zxth + : (W < 10) ? Hexagon::A2_andir + : Hexagon::S2_extractu; + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + for (auto &Op : MI->uses()) { + if (!Op.isReg()) + continue; + BitTracker::RegisterRef RS = Op; + if (!BT.has(RS.Reg)) + continue; + const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); + unsigned BN, BW; + if (!HBS::getSubregMask(RS, BN, BW, MRI)) + continue; + if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W)) + continue; + + unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + auto MIB = BuildMI(B, MI, DL, HII.get(NewOpc), NewR) + .addReg(RS.Reg, 0, RS.Sub); + if (NewOpc == Hexagon::A2_andir) + MIB.addImm((1 << W) - 1); + else if (NewOpc == Hexagon::S2_extractu) + MIB.addImm(W).addImm(0); + HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI); + BT.put(BitTracker::RegisterRef(NewR), RC); + return true; + } + return false; +} + + +// Check for tstbit simplification opportunity, where the bit being checked +// can be tracked back to another register. For example: +// vreg2 = S2_lsr_i_r vreg1, 5 +// vreg3 = S2_tstbit_i vreg2, 0 +// => +// vreg3 = S2_tstbit_i vreg1, 5 +bool BitSimplification::simplifyTstbit(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { + unsigned Opc = MI->getOpcode(); + if (Opc != Hexagon::S2_tstbit_i) + return false; + + unsigned BN = MI->getOperand(2).getImm(); + BitTracker::RegisterRef RS = MI->getOperand(1); + unsigned F, W; + DebugLoc DL = MI->getDebugLoc(); + if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI)) + return false; + MachineBasicBlock &B = *MI->getParent(); + + const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg); + const BitTracker::BitValue &V = SC[F+BN]; + if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) { + const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg); + // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is + // a double register, need to use a subregister and adjust bit + // number. + unsigned P = UINT_MAX; + BitTracker::RegisterRef RR(V.RefI.Reg, 0); + if (TC == &Hexagon::DoubleRegsRegClass) { + P = V.RefI.Pos; + RR.Sub = Hexagon::subreg_loreg; + if (P >= 32) { + P -= 32; + RR.Sub = Hexagon::subreg_hireg; + } + } else if (TC == &Hexagon::IntRegsRegClass) { + P = V.RefI.Pos; + } + if (P != UINT_MAX) { + unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); + BuildMI(B, MI, DL, HII.get(Hexagon::S2_tstbit_i), NewR) + .addReg(RR.Reg, 0, RR.Sub) + .addImm(P); + HBS::replaceReg(RD.Reg, NewR, MRI); + BT.put(NewR, RC); + return true; + } + } else if (V.is(0) || V.is(1)) { + unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass); + unsigned NewOpc = V.is(0) ? Hexagon::TFR_PdFalse : Hexagon::TFR_PdTrue; + BuildMI(B, MI, DL, HII.get(NewOpc), NewR); + HBS::replaceReg(RD.Reg, NewR, MRI); + return true; + } + + return false; +} + + +bool BitSimplification::processBlock(MachineBasicBlock &B, + const RegisterSet &AVs) { + bool Changed = false; + RegisterSet AVB = AVs; + RegisterSet Defs; + + for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) { + MachineInstr *MI = &*I; + Defs.clear(); + HBS::getInstrDefs(*MI, Defs); + + unsigned Opc = MI->getOpcode(); + if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE) + continue; + + if (MI->mayStore()) { + bool T = genStoreUpperHalf(MI); + T = T || genStoreImmediate(MI); + Changed |= T; + continue; + } + + if (Defs.count() != 1) + continue; + const MachineOperand &Op0 = MI->getOperand(0); + if (!Op0.isReg() || !Op0.isDef()) + continue; + BitTracker::RegisterRef RD = Op0; + if (!BT.has(RD.Reg)) + continue; + const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); + const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg); + + if (FRC->getID() == Hexagon::DoubleRegsRegClassID) { + bool T = genPackhl(MI, RD, RC); + Changed |= T; + continue; + } + + if (FRC->getID() == Hexagon::IntRegsRegClassID) { + bool T = genExtractHalf(MI, RD, RC); + T = T || genCombineHalf(MI, RD, RC); + T = T || genExtractLow(MI, RD, RC); + Changed |= T; + continue; + } + + if (FRC->getID() == Hexagon::PredRegsRegClassID) { + bool T = simplifyTstbit(MI, RD, RC); + Changed |= T; + continue; + } + } + return Changed; +} + + +bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { + auto &HST = MF.getSubtarget(); + auto &HRI = *HST.getRegisterInfo(); + auto &HII = *HST.getInstrInfo(); + + MDT = &getAnalysis(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + bool Changed; + + Changed = DeadCodeElimination(MF, *MDT).run(); + + const HexagonEvaluator HE(HRI, MRI, HII, MF); + BitTracker BT(HE, MF); + DEBUG(BT.trace(true)); + BT.run(); + + MachineBasicBlock &Entry = MF.front(); + + RegisterSet AIG; // Available registers for IG. + ConstGeneration ImmG(BT, HII, MRI); + Changed |= visitBlock(Entry, ImmG, AIG); + + RegisterSet ARE; // Available registers for RIE. + RedundantInstrElimination RIE(BT, HII, MRI); + Changed |= visitBlock(Entry, RIE, ARE); + + RegisterSet ACG; // Available registers for CG. + CopyGeneration CopyG(BT, HII, MRI); + Changed |= visitBlock(Entry, CopyG, ACG); + + RegisterSet ACP; // Available registers for CP. + CopyPropagation CopyP(HRI, MRI); + Changed |= visitBlock(Entry, CopyP, ACP); + + Changed = DeadCodeElimination(MF, *MDT).run() || Changed; + + BT.run(); + RegisterSet ABS; // Available registers for BS. + BitSimplification BitS(BT, HII, MRI); + Changed |= visitBlock(Entry, BitS, ABS); + + Changed = DeadCodeElimination(MF, *MDT).run() || Changed; + + if (Changed) { + for (auto &B : MF) + for (auto &I : B) + I.clearKillInfo(); + DeadCodeElimination(MF, *MDT).run(); + } + return Changed; +} + + +// Recognize loops where the code at the end of the loop matches the code +// before the entry of the loop, and the matching code is such that is can +// be simplified. This pass relies on the bit simplification above and only +// prepares code in a way that can be handled by the bit simplifcation. +// +// This is the motivating testcase (and explanation): +// +// { +// loop0(.LBB0_2, r1) // %for.body.preheader +// r5:4 = memd(r0++#8) +// } +// { +// r3 = lsr(r4, #16) +// r7:6 = combine(r5, r5) +// } +// { +// r3 = insert(r5, #16, #16) +// r7:6 = vlsrw(r7:6, #16) +// } +// .LBB0_2: +// { +// memh(r2+#4) = r5 +// memh(r2+#6) = r6 # R6 is really R5.H +// } +// { +// r2 = add(r2, #8) +// memh(r2+#0) = r4 +// memh(r2+#2) = r3 # R3 is really R4.H +// } +// { +// r5:4 = memd(r0++#8) +// } +// { # "Shuffling" code that sets up R3 and R6 +// r3 = lsr(r4, #16) # so that their halves can be stored in the +// r7:6 = combine(r5, r5) # next iteration. This could be folded into +// } # the stores if the code was at the beginning +// { # of the loop iteration. Since the same code +// r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved +// r7:6 = vlsrw(r7:6, #16) # there. +// }:endloop0 +// +// +// The outcome: +// +// { +// loop0(.LBB0_2, r1) +// r5:4 = memd(r0++#8) +// } +// .LBB0_2: +// { +// memh(r2+#4) = r5 +// memh(r2+#6) = r5.h +// } +// { +// r2 = add(r2, #8) +// memh(r2+#0) = r4 +// memh(r2+#2) = r4.h +// } +// { +// r5:4 = memd(r0++#8) +// }:endloop0 + +namespace llvm { + FunctionPass *createHexagonLoopRescheduling(); + void initializeHexagonLoopReschedulingPass(PassRegistry&); +} + +namespace { + class HexagonLoopRescheduling : public MachineFunctionPass { + public: + static char ID; + HexagonLoopRescheduling() : MachineFunctionPass(ID), + HII(0), HRI(0), MRI(0), BTP(0) { + initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; + MachineRegisterInfo *MRI; + BitTracker *BTP; + + struct LoopCand { + LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb, + MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {} + MachineBasicBlock *LB, *PB, *EB; + }; + typedef std::vector InstrList; + struct InstrGroup { + BitTracker::RegisterRef Inp, Out; + InstrList Ins; + }; + struct PhiInfo { + PhiInfo(MachineInstr &P, MachineBasicBlock &B); + unsigned DefR; + BitTracker::RegisterRef LR, PR; + MachineBasicBlock *LB, *PB; + }; + + static unsigned getDefReg(const MachineInstr *MI); + bool isConst(unsigned Reg) const; + bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const; + bool isStoreInput(const MachineInstr *MI, unsigned DefR) const; + bool isShuffleOf(unsigned OutR, unsigned InpR) const; + bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2, + unsigned &InpR2) const; + void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB, + MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR); + bool processLoop(LoopCand &C); + }; +} + +char HexagonLoopRescheduling::ID = 0; + +INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched", + "Hexagon Loop Rescheduling", false, false) + + +HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P, + MachineBasicBlock &B) { + DefR = HexagonLoopRescheduling::getDefReg(&P); + LB = &B; + PB = nullptr; + for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) { + const MachineOperand &OpB = P.getOperand(i+1); + if (OpB.getMBB() == &B) { + LR = P.getOperand(i); + continue; + } + PB = OpB.getMBB(); + PR = P.getOperand(i); + } +} + + +unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) { + RegisterSet Defs; + HBS::getInstrDefs(*MI, Defs); + if (Defs.count() != 1) + return 0; + return Defs.find_first(); +} + + +bool HexagonLoopRescheduling::isConst(unsigned Reg) const { + if (!BTP->has(Reg)) + return false; + const BitTracker::RegisterCell &RC = BTP->lookup(Reg); + for (unsigned i = 0, w = RC.width(); i < w; ++i) { + const BitTracker::BitValue &V = RC[i]; + if (!V.is(0) && !V.is(1)) + return false; + } + return true; +} + + +bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI, + unsigned DefR) const { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case TargetOpcode::COPY: + case Hexagon::S2_lsr_i_r: + case Hexagon::S2_asr_i_r: + case Hexagon::S2_asl_i_r: + case Hexagon::S2_lsr_i_p: + case Hexagon::S2_asr_i_p: + case Hexagon::S2_asl_i_p: + case Hexagon::S2_insert: + case Hexagon::A2_or: + case Hexagon::A2_orp: + case Hexagon::A2_and: + case Hexagon::A2_andp: + case Hexagon::A2_combinew: + case Hexagon::A4_combineri: + case Hexagon::A4_combineir: + case Hexagon::A2_combineii: + case Hexagon::A4_combineii: + case Hexagon::A2_combine_ll: + case Hexagon::A2_combine_lh: + case Hexagon::A2_combine_hl: + case Hexagon::A2_combine_hh: + return true; + } + return false; +} + + +bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI, + unsigned InpR) const { + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &Op = MI->getOperand(i); + if (!Op.isReg()) + continue; + if (Op.getReg() == InpR) + return i == n-1; + } + return false; +} + + +bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const { + if (!BTP->has(OutR) || !BTP->has(InpR)) + return false; + const BitTracker::RegisterCell &OutC = BTP->lookup(OutR); + for (unsigned i = 0, w = OutC.width(); i < w; ++i) { + const BitTracker::BitValue &V = OutC[i]; + if (V.Type != BitTracker::BitValue::Ref) + continue; + if (V.RefI.Reg != InpR) + return false; + } + return true; +} + + +bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1, + unsigned OutR2, unsigned &InpR2) const { + if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2)) + return false; + const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1); + const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2); + unsigned W = OutC1.width(); + unsigned MatchR = 0; + if (W != OutC2.width()) + return false; + for (unsigned i = 0; i < W; ++i) { + const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i]; + if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One) + return false; + if (V1.Type != BitTracker::BitValue::Ref) + continue; + if (V1.RefI.Pos != V2.RefI.Pos) + return false; + if (V1.RefI.Reg != InpR1) + return false; + if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2) + return false; + if (!MatchR) + MatchR = V2.RefI.Reg; + else if (V2.RefI.Reg != MatchR) + return false; + } + InpR2 = MatchR; + return true; +} + + +void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, + MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR, + unsigned NewPredR) { + DenseMap RegMap; + + const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR); + unsigned PhiR = MRI->createVirtualRegister(PhiRC); + BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR) + .addReg(NewPredR) + .addMBB(&PB) + .addReg(G.Inp.Reg) + .addMBB(&LB); + RegMap.insert(std::make_pair(G.Inp.Reg, PhiR)); + + for (unsigned i = G.Ins.size(); i > 0; --i) { + const MachineInstr *SI = G.Ins[i-1]; + unsigned DR = getDefReg(SI); + const TargetRegisterClass *RC = MRI->getRegClass(DR); + unsigned NewDR = MRI->createVirtualRegister(RC); + DebugLoc DL = SI->getDebugLoc(); + + auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR); + for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) { + const MachineOperand &Op = SI->getOperand(j); + if (!Op.isReg()) { + MIB.addOperand(Op); + continue; + } + if (!Op.isUse()) + continue; + unsigned UseR = RegMap[Op.getReg()]; + MIB.addReg(UseR, 0, Op.getSubReg()); + } + RegMap.insert(std::make_pair(DR, NewDR)); + } + + HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI); +} + + +bool HexagonLoopRescheduling::processLoop(LoopCand &C) { + DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n"); + std::vector Phis; + for (auto &I : *C.LB) { + if (!I.isPHI()) + break; + unsigned PR = getDefReg(&I); + if (isConst(PR)) + continue; + bool BadUse = false, GoodUse = false; + for (auto UI = MRI->use_begin(PR), UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseI = UI->getParent(); + if (UseI->getParent() != C.LB) { + BadUse = true; + break; + } + if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR)) + GoodUse = true; + } + if (BadUse || !GoodUse) + continue; + + Phis.push_back(PhiInfo(I, *C.LB)); + } + + DEBUG({ + dbgs() << "Phis: {"; + for (auto &I : Phis) { + dbgs() << ' ' << PrintReg(I.DefR, HRI) << "=phi(" + << PrintReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber() + << ',' << PrintReg(I.LR.Reg, HRI, I.LR.Sub) << ":b" + << I.LB->getNumber() << ')'; + } + dbgs() << " }\n"; + }); + + if (Phis.empty()) + return false; + + bool Changed = false; + InstrList ShufIns; + + // Go backwards in the block: for each bit shuffling instruction, check + // if that instruction could potentially be moved to the front of the loop: + // the output of the loop cannot be used in a non-shuffling instruction + // in this loop. + for (auto I = C.LB->rbegin(), E = C.LB->rend(); I != E; ++I) { + if (I->isTerminator()) + continue; + if (I->isPHI()) + break; + + RegisterSet Defs; + HBS::getInstrDefs(*I, Defs); + if (Defs.count() != 1) + continue; + unsigned DefR = Defs.find_first(); + if (!TargetRegisterInfo::isVirtualRegister(DefR)) + continue; + if (!isBitShuffle(&*I, DefR)) + continue; + + bool BadUse = false; + for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseI = UI->getParent(); + if (UseI->getParent() == C.LB) { + if (UseI->isPHI()) { + // If the use is in a phi node in this loop, then it should be + // the value corresponding to the back edge. + unsigned Idx = UI.getOperandNo(); + if (UseI->getOperand(Idx+1).getMBB() != C.LB) + BadUse = true; + } else { + auto F = std::find(ShufIns.begin(), ShufIns.end(), UseI); + if (F == ShufIns.end()) + BadUse = true; + } + } else { + // There is a use outside of the loop, but there is no epilog block + // suitable for a copy-out. + if (C.EB == nullptr) + BadUse = true; + } + if (BadUse) + break; + } + + if (BadUse) + continue; + ShufIns.push_back(&*I); + } + + // Partition the list of shuffling instructions into instruction groups, + // where each group has to be moved as a whole (i.e. a group is a chain of + // dependent instructions). A group produces a single live output register, + // which is meant to be the input of the loop phi node (although this is + // not checked here yet). It also uses a single register as its input, + // which is some value produced in the loop body. After moving the group + // to the beginning of the loop, that input register would need to be + // the loop-carried register (through a phi node) instead of the (currently + // loop-carried) output register. + typedef std::vector InstrGroupList; + InstrGroupList Groups; + + for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) { + MachineInstr *SI = ShufIns[i]; + if (SI == nullptr) + continue; + + InstrGroup G; + G.Ins.push_back(SI); + G.Out.Reg = getDefReg(SI); + RegisterSet Inputs; + HBS::getInstrUses(*SI, Inputs); + + for (unsigned j = i+1; j < n; ++j) { + MachineInstr *MI = ShufIns[j]; + if (MI == nullptr) + continue; + RegisterSet Defs; + HBS::getInstrDefs(*MI, Defs); + // If this instruction does not define any pending inputs, skip it. + if (!Defs.intersects(Inputs)) + continue; + // Otherwise, add it to the current group and remove the inputs that + // are defined by MI. + G.Ins.push_back(MI); + Inputs.remove(Defs); + // Then add all registers used by MI. + HBS::getInstrUses(*MI, Inputs); + ShufIns[j] = nullptr; + } + + // Only add a group if it requires at most one register. + if (Inputs.count() > 1) + continue; + auto LoopInpEq = [G] (const PhiInfo &P) -> bool { + return G.Out.Reg == P.LR.Reg; + }; + if (std::find_if(Phis.begin(), Phis.end(), LoopInpEq) == Phis.end()) + continue; + + G.Inp.Reg = Inputs.find_first(); + Groups.push_back(G); + } + + DEBUG({ + for (unsigned i = 0, n = Groups.size(); i < n; ++i) { + InstrGroup &G = Groups[i]; + dbgs() << "Group[" << i << "] inp: " + << PrintReg(G.Inp.Reg, HRI, G.Inp.Sub) + << " out: " << PrintReg(G.Out.Reg, HRI, G.Out.Sub) << "\n"; + for (unsigned j = 0, m = G.Ins.size(); j < m; ++j) + dbgs() << " " << *G.Ins[j]; + } + }); + + for (unsigned i = 0, n = Groups.size(); i < n; ++i) { + InstrGroup &G = Groups[i]; + if (!isShuffleOf(G.Out.Reg, G.Inp.Reg)) + continue; + auto LoopInpEq = [G] (const PhiInfo &P) -> bool { + return G.Out.Reg == P.LR.Reg; + }; + auto F = std::find_if(Phis.begin(), Phis.end(), LoopInpEq); + if (F == Phis.end()) + continue; + unsigned PredR = 0; + if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PredR)) { + const MachineInstr *DefPredR = MRI->getVRegDef(F->PR.Reg); + unsigned Opc = DefPredR->getOpcode(); + if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi) + continue; + if (!DefPredR->getOperand(1).isImm()) + continue; + if (DefPredR->getOperand(1).getImm() != 0) + continue; + const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg); + if (RC != MRI->getRegClass(F->PR.Reg)) { + PredR = MRI->createVirtualRegister(RC); + unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi + : Hexagon::A2_tfrpi; + auto T = C.PB->getFirstTerminator(); + DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc(); + BuildMI(*C.PB, T, DL, HII->get(TfrI), PredR) + .addImm(0); + } else { + PredR = F->PR.Reg; + } + } + assert(MRI->getRegClass(PredR) == MRI->getRegClass(G.Inp.Reg)); + moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PredR); + Changed = true; + } + + return Changed; +} + + +bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) { + auto &HST = MF.getSubtarget(); + HII = HST.getInstrInfo(); + HRI = HST.getRegisterInfo(); + MRI = &MF.getRegInfo(); + const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); + BitTracker BT(HE, MF); + DEBUG(BT.trace(true)); + BT.run(); + BTP = &BT; + + std::vector Cand; + + for (auto &B : MF) { + if (B.pred_size() != 2 || B.succ_size() != 2) + continue; + MachineBasicBlock *PB = nullptr; + bool IsLoop = false; + for (auto PI = B.pred_begin(), PE = B.pred_end(); PI != PE; ++PI) { + if (*PI != &B) + PB = *PI; + else + IsLoop = true; + } + if (!IsLoop) + continue; + + MachineBasicBlock *EB = nullptr; + for (auto SI = B.succ_begin(), SE = B.succ_end(); SI != SE; ++SI) { + if (*SI == &B) + continue; + // Set EP to the epilog block, if it has only 1 predecessor (i.e. the + // edge from B to EP is non-critical. + if ((*SI)->pred_size() == 1) + EB = *SI; + break; + } + + Cand.push_back(LoopCand(&B, PB, EB)); + } + + bool Changed = false; + for (auto &C : Cand) + Changed |= processLoop(C); + + return Changed; +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createHexagonLoopRescheduling() { + return new HexagonLoopRescheduling(); +} + +FunctionPass *llvm::createHexagonBitSimplify() { + return new HexagonBitSimplify(); +} + diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp index 021e58a1d08a..d5848dc45a3b 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -84,6 +84,8 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const { uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub)); switch (ID) { case DoubleRegsRegClassID: + case VecDblRegsRegClassID: + case VecDblRegs128BRegClassID: return (Sub == subreg_loreg) ? BT::BitMask(0, RW-1) : BT::BitMask(RW, 2*RW-1); default: @@ -95,30 +97,29 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const { llvm_unreachable("Unexpected register/subregister"); } - namespace { - struct RegisterRefs : public std::vector { - typedef std::vector Base; - RegisterRefs(const MachineInstr *MI); - const BT::RegisterRef &operator[](unsigned n) const { - // The main purpose of this operator is to assert with bad argument. - assert(n < size()); - return Base::operator[](n); - } - }; +class RegisterRefs { + std::vector Vector; - RegisterRefs::RegisterRefs(const MachineInstr *MI) - : Base(MI->getNumOperands()) { - for (unsigned i = 0, n = size(); i < n; ++i) { +public: + RegisterRefs(const MachineInstr *MI) : Vector(MI->getNumOperands()) { + for (unsigned i = 0, n = Vector.size(); i < n; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg()) - at(i) = BT::RegisterRef(MO); + Vector[i] = BT::RegisterRef(MO); // For indices that don't correspond to registers, the entry will // remain constructed via the default constructor. } } -} + size_t size() const { return Vector.size(); } + const BT::RegisterRef &operator[](unsigned n) const { + // The main purpose of this operator is to assert with bad argument. + assert(n < Vector.size()); + return Vector[n]; + } +}; +} bool HexagonEvaluator::evaluate(const MachineInstr *MI, const CellMapType &Inputs, CellMapType &Outputs) const { @@ -189,7 +190,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr *MI, return true; }; // Get the cell corresponding to the N-th operand. - auto cop = [this,Reg,MI,Inputs] (unsigned N, uint16_t W) + auto cop = [this,&Reg,&MI,&Inputs] (unsigned N, uint16_t W) -> BT::RegisterCell { const MachineOperand &Op = MI->getOperand(N); if (Op.isImm()) diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index 3753b745657b..efafdd007289 100644 --- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -102,7 +102,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); MBBb != MBBe; ++MBBb) { - MachineBasicBlock* MBB = MBBb; + MachineBasicBlock *MBB = &*MBBb; // Traverse the basic block. MachineBasicBlock::iterator MII = MBB->getFirstTerminator(); @@ -186,13 +186,11 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { if (case1 || case2) { InvertAndChangeJumpTarget(MI, UncondTarget); - MBB->removeSuccessor(JumpAroundTarget); - MBB->addSuccessor(UncondTarget); + MBB->replaceSuccessor(JumpAroundTarget, UncondTarget); // Remove the unconditional branch in LayoutSucc. LayoutSucc->erase(LayoutSucc->begin()); - LayoutSucc->removeSuccessor(UncondTarget); - LayoutSucc->addSuccessor(JumpAroundTarget); + LayoutSucc->replaceSuccessor(UncondTarget, JumpAroundTarget); // This code performs the conversion for case 2, which moves // the block to the fall-thru case (BB3 in the code above). @@ -210,16 +208,15 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { // The live-in to LayoutSucc is now all values live-in to // JumpAroundTarget. // - std::vector OrigLiveIn(LayoutSucc->livein_begin(), - LayoutSucc->livein_end()); - std::vector NewLiveIn(JumpAroundTarget->livein_begin(), - JumpAroundTarget->livein_end()); - for (unsigned i = 0; i < OrigLiveIn.size(); ++i) { - LayoutSucc->removeLiveIn(OrigLiveIn[i]); - } - for (unsigned i = 0; i < NewLiveIn.size(); ++i) { - LayoutSucc->addLiveIn(NewLiveIn[i]); - } + std::vector OrigLiveIn( + LayoutSucc->livein_begin(), LayoutSucc->livein_end()); + std::vector NewLiveIn( + JumpAroundTarget->livein_begin(), + JumpAroundTarget->livein_end()); + for (const auto &OrigLI : OrigLiveIn) + LayoutSucc->removeLiveIn(OrigLI.PhysReg); + for (const auto &NewLI : NewLiveIn) + LayoutSucc->addLiveIn(NewLI); } } } diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp index 9f5fac156527..931db6687bf8 100644 --- a/lib/Target/Hexagon/HexagonCommonGEP.cpp +++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -59,30 +59,23 @@ namespace { // Numbering map for gep nodes. Used to keep track of ordering for // gep nodes. - struct NodeNumbering : public std::map { - }; - - struct NodeOrdering : public NodeNumbering { + struct NodeOrdering { NodeOrdering() : LastNum(0) {} -#ifdef _MSC_VER - void special_insert_for_special_msvc(const GepNode *N) -#else - using NodeNumbering::insert; - void insert(const GepNode* N) -#endif - { - insert(std::make_pair(N, ++LastNum)); - } - bool operator() (const GepNode* N1, const GepNode *N2) const { - const_iterator F1 = find(N1), F2 = find(N2); - assert(F1 != end() && F2 != end()); + + void insert(const GepNode *N) { Map.insert(std::make_pair(N, ++LastNum)); } + void clear() { Map.clear(); } + + bool operator()(const GepNode *N1, const GepNode *N2) const { + auto F1 = Map.find(N1), F2 = Map.find(N2); + assert(F1 != Map.end() && F2 != Map.end()); return F1->second < F2->second; } + private: + std::map Map; unsigned LastNum; }; - class HexagonCommonGEP : public FunctionPass { public: static char ID; @@ -360,11 +353,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, Us.insert(&UI.getUse()); } Nodes.push_back(N); -#ifdef _MSC_VER - NodeOrder.special_insert_for_special_msvc(N); -#else NodeOrder.insert(N); -#endif // Skip the first index operand, since we only handle 0. This dereferences // the pointer operand. @@ -379,11 +368,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, Nx->PTy = PtrTy; Nx->Idx = Op; Nodes.push_back(Nx); -#ifdef _MSC_VER - NodeOrder.special_insert_for_special_msvc(Nx); -#else NodeOrder.insert(Nx); -#endif PN = Nx; PtrTy = next_type(PtrTy, Op); @@ -404,7 +389,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, void HexagonCommonGEP::collect() { // Establish depth-first traversal order of the dominator tree. ValueVect BO; - getBlockTraversalOrder(Fn->begin(), BO); + getBlockTraversalOrder(&Fn->front(), BO); // The creation of gep nodes requires DT-traversal. When processing a GEP // instruction that uses another GEP instruction as the base pointer, the @@ -737,7 +722,7 @@ namespace { Instruction *In = cast(V); if (In->getParent() != B) continue; - BasicBlock::iterator It = In; + BasicBlock::iterator It = In->getIterator(); if (std::distance(FirstUse, BEnd) < std::distance(It, BEnd)) FirstUse = It; } @@ -1135,7 +1120,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, ArrayRef A(IdxList, IdxC); Type *InpTy = Input->getType(); Type *ElTy = cast(InpTy->getScalarType())->getElementType(); - NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", At); + NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", &*At); DEBUG(dbgs() << "new GEP: " << *NewInst << '\n'); Input = NewInst; } while (nax <= Num); @@ -1213,7 +1198,7 @@ void HexagonCommonGEP::materialize(NodeToValueMap &Loc) { Last = Child; } while (true); - BasicBlock::iterator InsertAt = LastB->getTerminator(); + BasicBlock::iterator InsertAt = LastB->getTerminator()->getIterator(); if (LastUsed || LastCN > 0) { ValueVect Urs; getAllUsersForNode(Root, Urs, NCM); diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp new file mode 100644 index 000000000000..ee0c318ffb5d --- /dev/null +++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -0,0 +1,1063 @@ +//===--- HexagonEarlyIfConv.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a Hexagon-specific if-conversion pass that runs on the +// SSA form. +// In SSA it is not straightforward to represent instructions that condi- +// tionally define registers, since a conditionally-defined register may +// only be used under the same condition on which the definition was based. +// To avoid complications of this nature, this patch will only generate +// predicated stores, and speculate other instructions from the "if-conver- +// ted" block. +// The code will recognize CFG patterns where a block with a conditional +// branch "splits" into a "true block" and a "false block". Either of these +// could be omitted (in case of a triangle, for example). +// If after conversion of the side block(s) the CFG allows it, the resul- +// ting blocks may be merged. If the "join" block contained PHI nodes, they +// will be replaced with MUX (or MUX-like) instructions to maintain the +// semantics of the PHI. +// +// Example: +// +// %vreg40 = L2_loadrub_io %vreg39, 1 +// %vreg41 = S2_tstbit_i %vreg40, 0 +// J2_jumpt %vreg41, , %PC +// J2_jump , %PC +// Successors according to CFG: BB#4(62) BB#5(62) +// +// BB#4: derived from LLVM BB %if.then +// Predecessors according to CFG: BB#3 +// %vreg11 = A2_addp %vreg6, %vreg10 +// S2_storerd_io %vreg32, 16, %vreg11 +// Successors according to CFG: BB#5 +// +// BB#5: derived from LLVM BB %if.end +// Predecessors according to CFG: BB#3 BB#4 +// %vreg12 = PHI %vreg6, , %vreg11, +// %vreg13 = A2_addp %vreg7, %vreg12 +// %vreg42 = C2_cmpeqi %vreg9, 10 +// J2_jumpf %vreg42, , %PC +// J2_jump , %PC +// Successors according to CFG: BB#6(4) BB#3(124) +// +// would become: +// +// %vreg40 = L2_loadrub_io %vreg39, 1 +// %vreg41 = S2_tstbit_i %vreg40, 0 +// spec-> %vreg11 = A2_addp %vreg6, %vreg10 +// pred-> S2_pstorerdf_io %vreg41, %vreg32, 16, %vreg11 +// %vreg46 = MUX64_rr %vreg41, %vreg6, %vreg11 +// %vreg13 = A2_addp %vreg7, %vreg46 +// %vreg42 = C2_cmpeqi %vreg9, 10 +// J2_jumpf %vreg42, , %PC +// J2_jump , %PC +// Successors according to CFG: BB#6 BB#3 + +#define DEBUG_TYPE "hexagon-eif" + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "HexagonTargetMachine.h" + +#include +#include +#include + +using namespace llvm; + +namespace llvm { + FunctionPass *createHexagonEarlyIfConversion(); + void initializeHexagonEarlyIfConversionPass(PassRegistry& Registry); +} + +namespace { + cl::opt EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden, + cl::init(false), cl::desc("Enable branch probability info")); + cl::opt SizeLimit("eif-limit", cl::init(6), cl::Hidden, + cl::desc("Size limit in Hexagon early if-conversion")); + + struct PrintMB { + PrintMB(const MachineBasicBlock *B) : MB(B) {} + const MachineBasicBlock *MB; + }; + raw_ostream &operator<< (raw_ostream &OS, const PrintMB &P) { + if (!P.MB) + return OS << ""; + return OS << '#' << P.MB->getNumber(); + } + + struct FlowPattern { + FlowPattern() : SplitB(0), TrueB(0), FalseB(0), JoinB(0), PredR(0) {} + FlowPattern(MachineBasicBlock *B, unsigned PR, MachineBasicBlock *TB, + MachineBasicBlock *FB, MachineBasicBlock *JB) + : SplitB(B), TrueB(TB), FalseB(FB), JoinB(JB), PredR(PR) {} + + MachineBasicBlock *SplitB; + MachineBasicBlock *TrueB, *FalseB, *JoinB; + unsigned PredR; + }; + struct PrintFP { + PrintFP(const FlowPattern &P, const TargetRegisterInfo &T) + : FP(P), TRI(T) {} + const FlowPattern &FP; + const TargetRegisterInfo &TRI; + friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P); + }; + raw_ostream &operator<<(raw_ostream &OS, + const PrintFP &P) LLVM_ATTRIBUTE_UNUSED; + raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) { + OS << "{ SplitB:" << PrintMB(P.FP.SplitB) + << ", PredR:" << PrintReg(P.FP.PredR, &P.TRI) + << ", TrueB:" << PrintMB(P.FP.TrueB) << ", FalseB:" + << PrintMB(P.FP.FalseB) + << ", JoinB:" << PrintMB(P.FP.JoinB) << " }"; + return OS; + } + + class HexagonEarlyIfConversion : public MachineFunctionPass { + public: + static char ID; + HexagonEarlyIfConversion() : MachineFunctionPass(ID), + TII(0), TRI(0), MFN(0), MRI(0), MDT(0), MLI(0) { + initializeHexagonEarlyIfConversionPass(*PassRegistry::getPassRegistry()); + } + const char *getPassName() const override { + return "Hexagon early if conversion"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + typedef DenseSet BlockSetType; + + bool isPreheader(const MachineBasicBlock *B) const; + bool matchFlowPattern(MachineBasicBlock *B, MachineLoop *L, + FlowPattern &FP); + bool visitBlock(MachineBasicBlock *B, MachineLoop *L); + bool visitLoop(MachineLoop *L); + + bool hasEHLabel(const MachineBasicBlock *B) const; + bool hasUncondBranch(const MachineBasicBlock *B) const; + bool isValidCandidate(const MachineBasicBlock *B) const; + bool usesUndefVReg(const MachineInstr *MI) const; + bool isValid(const FlowPattern &FP) const; + unsigned countPredicateDefs(const MachineBasicBlock *B) const; + unsigned computePhiCost(MachineBasicBlock *B) const; + bool isProfitable(const FlowPattern &FP) const; + bool isPredicableStore(const MachineInstr *MI) const; + bool isSafeToSpeculate(const MachineInstr *MI) const; + + unsigned getCondStoreOpcode(unsigned Opc, bool IfTrue) const; + void predicateInstr(MachineBasicBlock *ToB, MachineBasicBlock::iterator At, + MachineInstr *MI, unsigned PredR, bool IfTrue); + void predicateBlockNB(MachineBasicBlock *ToB, + MachineBasicBlock::iterator At, MachineBasicBlock *FromB, + unsigned PredR, bool IfTrue); + + void updatePhiNodes(MachineBasicBlock *WhereB, const FlowPattern &FP); + void convert(const FlowPattern &FP); + + void removeBlock(MachineBasicBlock *B); + void eliminatePhis(MachineBasicBlock *B); + void replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *NewB); + void mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB); + void simplifyFlowGraph(const FlowPattern &FP); + + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineFunction *MFN; + MachineRegisterInfo *MRI; + MachineDominatorTree *MDT; + MachineLoopInfo *MLI; + BlockSetType Deleted; + const MachineBranchProbabilityInfo *MBPI; + }; + + char HexagonEarlyIfConversion::ID = 0; +} + +INITIALIZE_PASS(HexagonEarlyIfConversion, "hexagon-eif", + "Hexagon early if conversion", false, false) + +bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const { + if (B->succ_size() != 1) + return false; + MachineBasicBlock *SB = *B->succ_begin(); + MachineLoop *L = MLI->getLoopFor(SB); + return L && SB == L->getHeader(); +} + + +bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, + MachineLoop *L, FlowPattern &FP) { + DEBUG(dbgs() << "Checking flow pattern at BB#" << B->getNumber() << "\n"); + + // Interested only in conditional branches, no .new, no new-value, etc. + // Check the terminators directly, it's easier than handling all responses + // from AnalyzeBranch. + MachineBasicBlock *TB = 0, *FB = 0; + MachineBasicBlock::const_iterator T1I = B->getFirstTerminator(); + if (T1I == B->end()) + return false; + unsigned Opc = T1I->getOpcode(); + if (Opc != Hexagon::J2_jumpt && Opc != Hexagon::J2_jumpf) + return false; + unsigned PredR = T1I->getOperand(0).getReg(); + + // Get the layout successor, or 0 if B does not have one. + MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B)); + MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : 0; + + MachineBasicBlock *T1B = T1I->getOperand(1).getMBB(); + MachineBasicBlock::const_iterator T2I = std::next(T1I); + // The second terminator should be an unconditional branch. + assert(T2I == B->end() || T2I->getOpcode() == Hexagon::J2_jump); + MachineBasicBlock *T2B = (T2I == B->end()) ? NextB + : T2I->getOperand(0).getMBB(); + if (T1B == T2B) { + // XXX merge if T1B == NextB, or convert branch to unconditional. + // mark as diamond with both sides equal? + return false; + } + // Loop could be null for both. + if (MLI->getLoopFor(T1B) != L || MLI->getLoopFor(T2B) != L) + return false; + + // Record the true/false blocks in such a way that "true" means "if (PredR)", + // and "false" means "if (!PredR)". + if (Opc == Hexagon::J2_jumpt) + TB = T1B, FB = T2B; + else + TB = T2B, FB = T1B; + + if (!MDT->properlyDominates(B, TB) || !MDT->properlyDominates(B, FB)) + return false; + + // Detect triangle first. In case of a triangle, one of the blocks TB/FB + // can fall through into the other, in other words, it will be executed + // in both cases. We only want to predicate the block that is executed + // conditionally. + unsigned TNP = TB->pred_size(), FNP = FB->pred_size(); + unsigned TNS = TB->succ_size(), FNS = FB->succ_size(); + + // A block is predicable if it has one predecessor (it must be B), and + // it has a single successor. In fact, the block has to end either with + // an unconditional branch (which can be predicated), or with a fall- + // through. + bool TOk = (TNP == 1) && (TNS == 1); + bool FOk = (FNP == 1) && (FNS == 1); + + // If neither is predicable, there is nothing interesting. + if (!TOk && !FOk) + return false; + + MachineBasicBlock *TSB = (TNS > 0) ? *TB->succ_begin() : 0; + MachineBasicBlock *FSB = (FNS > 0) ? *FB->succ_begin() : 0; + MachineBasicBlock *JB = 0; + + if (TOk) { + if (FOk) { + if (TSB == FSB) + JB = TSB; + // Diamond: "if (P) then TB; else FB;". + } else { + // TOk && !FOk + if (TSB == FB) { + JB = FB; + FB = 0; + } + } + } else { + // !TOk && FOk (at least one must be true by now). + if (FSB == TB) { + JB = TB; + TB = 0; + } + } + // Don't try to predicate loop preheaders. + if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) { + DEBUG(dbgs() << "One of blocks " << PrintMB(TB) << ", " << PrintMB(FB) + << " is a loop preheader. Skipping.\n"); + return false; + } + + FP = FlowPattern(B, PredR, TB, FB, JB); + DEBUG(dbgs() << "Detected " << PrintFP(FP, *TRI) << "\n"); + return true; +} + + +// KLUDGE: HexagonInstrInfo::AnalyzeBranch won't work on a block that +// contains EH_LABEL. +bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const { + for (auto &I : *B) + if (I.isEHLabel()) + return true; + return false; +} + + +// KLUDGE: HexagonInstrInfo::AnalyzeBranch may be unable to recognize +// that a block can never fall-through. +bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B) + const { + MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end(); + while (I != E) { + if (I->isBarrier()) + return true; + ++I; + } + return false; +} + + +bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B) + const { + if (!B) + return true; + if (B->isEHPad() || B->hasAddressTaken()) + return false; + if (B->succ_size() == 0) + return false; + + for (auto &MI : *B) { + if (MI.isDebugValue()) + continue; + if (MI.isConditionalBranch()) + return false; + unsigned Opc = MI.getOpcode(); + bool IsJMP = (Opc == Hexagon::J2_jump); + if (!isPredicableStore(&MI) && !IsJMP && !isSafeToSpeculate(&MI)) + return false; + // Look for predicate registers defined by this instruction. It's ok + // to speculate such an instruction, but the predicate register cannot + // be used outside of this block (or else it won't be possible to + // update the use of it after predication). PHI uses will be updated + // to use a result of a MUX, and a MUX cannot be created for predicate + // registers. + for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef()) + continue; + unsigned R = MO->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + if (MRI->getRegClass(R) != &Hexagon::PredRegsRegClass) + continue; + for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U) + if (U->getParent()->isPHI()) + return false; + } + } + return true; +} + + +bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const { + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isUse()) + continue; + unsigned R = MO->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + const MachineInstr *DefI = MRI->getVRegDef(R); + // "Undefined" virtual registers are actually defined via IMPLICIT_DEF. + assert(DefI && "Expecting a reaching def in MRI"); + if (DefI->isImplicitDef()) + return true; + } + return false; +} + + +bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const { + if (hasEHLabel(FP.SplitB)) // KLUDGE: see function definition + return false; + if (FP.TrueB && !isValidCandidate(FP.TrueB)) + return false; + if (FP.FalseB && !isValidCandidate(FP.FalseB)) + return false; + // Check the PHIs in the join block. If any of them use a register + // that is defined as IMPLICIT_DEF, do not convert this. This can + // legitimately happen if one side of the split never executes, but + // the compiler is unable to prove it. That side may then seem to + // provide an "undef" value to the join block, however it will never + // execute at run-time. If we convert this case, the "undef" will + // be used in a MUX instruction, and that may seem like actually + // using an undefined value to other optimizations. This could lead + // to trouble further down the optimization stream, cause assertions + // to fail, etc. + if (FP.JoinB) { + const MachineBasicBlock &B = *FP.JoinB; + for (auto &MI : B) { + if (!MI.isPHI()) + break; + if (usesUndefVReg(&MI)) + return false; + unsigned DefR = MI.getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(DefR); + if (RC == &Hexagon::PredRegsRegClass) + return false; + } + } + return true; +} + + +unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const { + assert(B->pred_size() <= 2); + if (B->pred_size() < 2) + return 0; + + unsigned Cost = 0; + MachineBasicBlock::const_iterator I, E = B->getFirstNonPHI(); + for (I = B->begin(); I != E; ++I) { + const MachineOperand &RO1 = I->getOperand(1); + const MachineOperand &RO3 = I->getOperand(3); + assert(RO1.isReg() && RO3.isReg()); + // Must have a MUX if the phi uses a subregister. + if (RO1.getSubReg() != 0 || RO3.getSubReg() != 0) { + Cost++; + continue; + } + MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg()); + MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg()); + if (!TII->isPredicable(Def1) || !TII->isPredicable(Def3)) + Cost++; + } + return Cost; +} + + +unsigned HexagonEarlyIfConversion::countPredicateDefs( + const MachineBasicBlock *B) const { + unsigned PredDefs = 0; + for (auto &MI : *B) { + for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef()) + continue; + unsigned R = MO->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + if (MRI->getRegClass(R) == &Hexagon::PredRegsRegClass) + PredDefs++; + } + } + return PredDefs; +} + + +bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const { + if (FP.TrueB && FP.FalseB) { + + // Do not IfCovert if the branch is one sided. + if (MBPI) { + BranchProbability Prob(9, 10); + if (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob) + return false; + if (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob) + return false; + } + + // If both sides are predicable, convert them if they join, and the + // join block has no other predecessors. + MachineBasicBlock *TSB = *FP.TrueB->succ_begin(); + MachineBasicBlock *FSB = *FP.FalseB->succ_begin(); + if (TSB != FSB) + return false; + if (TSB->pred_size() != 2) + return false; + } + + // Calculate the total size of the predicated blocks. + // Assume instruction counts without branches to be the approximation of + // the code size. If the predicated blocks are smaller than a packet size, + // approximate the spare room in the packet that could be filled with the + // predicated/speculated instructions. + unsigned TS = 0, FS = 0, Spare = 0; + if (FP.TrueB) { + TS = std::distance(FP.TrueB->begin(), FP.TrueB->getFirstTerminator()); + if (TS < HEXAGON_PACKET_SIZE) + Spare += HEXAGON_PACKET_SIZE-TS; + } + if (FP.FalseB) { + FS = std::distance(FP.FalseB->begin(), FP.FalseB->getFirstTerminator()); + if (FS < HEXAGON_PACKET_SIZE) + Spare += HEXAGON_PACKET_SIZE-TS; + } + unsigned TotalIn = TS+FS; + DEBUG(dbgs() << "Total number of instructions to be predicated/speculated: " + << TotalIn << ", spare room: " << Spare << "\n"); + if (TotalIn >= SizeLimit+Spare) + return false; + + // Count the number of PHI nodes that will need to be updated (converted + // to MUX). Those can be later converted to predicated instructions, so + // they aren't always adding extra cost. + // KLUDGE: Also, count the number of predicate register definitions in + // each block. The scheduler may increase the pressure of these and cause + // expensive spills (e.g. bitmnp01). + unsigned TotalPh = 0; + unsigned PredDefs = countPredicateDefs(FP.SplitB); + if (FP.JoinB) { + TotalPh = computePhiCost(FP.JoinB); + PredDefs += countPredicateDefs(FP.JoinB); + } else { + if (FP.TrueB && FP.TrueB->succ_size() > 0) { + MachineBasicBlock *SB = *FP.TrueB->succ_begin(); + TotalPh += computePhiCost(SB); + PredDefs += countPredicateDefs(SB); + } + if (FP.FalseB && FP.FalseB->succ_size() > 0) { + MachineBasicBlock *SB = *FP.FalseB->succ_begin(); + TotalPh += computePhiCost(SB); + PredDefs += countPredicateDefs(SB); + } + } + DEBUG(dbgs() << "Total number of extra muxes from converted phis: " + << TotalPh << "\n"); + if (TotalIn+TotalPh >= SizeLimit+Spare) + return false; + + DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs << "\n"); + if (PredDefs > 4) + return false; + + return true; +} + + +bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B, + MachineLoop *L) { + bool Changed = false; + + // Visit all dominated blocks from the same loop first, then process B. + MachineDomTreeNode *N = MDT->getNode(B); + typedef GraphTraits GTN; + // We will change CFG/DT during this traversal, so take precautions to + // avoid problems related to invalidated iterators. In fact, processing + // a child C of B cannot cause another child to be removed, but it can + // cause a new child to be added (which was a child of C before C itself + // was removed. This new child C, however, would have been processed + // prior to processing B, so there is no need to process it again. + // Simply keep a list of children of B, and traverse that list. + typedef SmallVector DTNodeVectType; + DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N)); + for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) { + MachineBasicBlock *SB = (*I)->getBlock(); + if (!Deleted.count(SB)) + Changed |= visitBlock(SB, L); + } + // When walking down the dominator tree, we want to traverse through + // blocks from nested (other) loops, because they can dominate blocks + // that are in L. Skip the non-L blocks only after the tree traversal. + if (MLI->getLoopFor(B) != L) + return Changed; + + FlowPattern FP; + if (!matchFlowPattern(B, L, FP)) + return Changed; + + if (!isValid(FP)) { + DEBUG(dbgs() << "Conversion is not valid\n"); + return Changed; + } + if (!isProfitable(FP)) { + DEBUG(dbgs() << "Conversion is not profitable\n"); + return Changed; + } + + convert(FP); + simplifyFlowGraph(FP); + return true; +} + + +bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) { + MachineBasicBlock *HB = L ? L->getHeader() : 0; + DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB) + : dbgs() << "Visiting function") << "\n"); + bool Changed = false; + if (L) { + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) + Changed |= visitLoop(*I); + } + + MachineBasicBlock *EntryB = GraphTraits::getEntryNode(MFN); + Changed |= visitBlock(L ? HB : EntryB, L); + return Changed; +} + + +bool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr *MI) + const { + // Exclude post-increment stores. Those return a value, so we cannot + // predicate them. + unsigned Opc = MI->getOpcode(); + using namespace Hexagon; + switch (Opc) { + // Store byte: + case S2_storerb_io: case S4_storerb_rr: + case S2_storerbabs: case S4_storeirb_io: case S2_storerbgp: + // Store halfword: + case S2_storerh_io: case S4_storerh_rr: + case S2_storerhabs: case S4_storeirh_io: case S2_storerhgp: + // Store upper halfword: + case S2_storerf_io: case S4_storerf_rr: + case S2_storerfabs: case S2_storerfgp: + // Store word: + case S2_storeri_io: case S4_storeri_rr: + case S2_storeriabs: case S4_storeiri_io: case S2_storerigp: + // Store doubleword: + case S2_storerd_io: case S4_storerd_rr: + case S2_storerdabs: case S2_storerdgp: + return true; + } + return false; +} + + +bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI) + const { + if (MI->mayLoad() || MI->mayStore()) + return false; + if (MI->isCall() || MI->isBarrier() || MI->isBranch()) + return false; + if (MI->hasUnmodeledSideEffects()) + return false; + + return true; +} + + +unsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc, + bool IfTrue) const { + // Exclude post-increment stores. + using namespace Hexagon; + switch (Opc) { + case S2_storerb_io: + return IfTrue ? S2_pstorerbt_io : S2_pstorerbf_io; + case S4_storerb_rr: + return IfTrue ? S4_pstorerbt_rr : S4_pstorerbf_rr; + case S2_storerbabs: + case S2_storerbgp: + return IfTrue ? S4_pstorerbt_abs : S4_pstorerbf_abs; + case S4_storeirb_io: + return IfTrue ? S4_storeirbt_io : S4_storeirbf_io; + case S2_storerh_io: + return IfTrue ? S2_pstorerht_io : S2_pstorerhf_io; + case S4_storerh_rr: + return IfTrue ? S4_pstorerht_rr : S4_pstorerhf_rr; + case S2_storerhabs: + case S2_storerhgp: + return IfTrue ? S4_pstorerht_abs : S4_pstorerhf_abs; + case S2_storerf_io: + return IfTrue ? S2_pstorerft_io : S2_pstorerff_io; + case S4_storerf_rr: + return IfTrue ? S4_pstorerft_rr : S4_pstorerff_rr; + case S2_storerfabs: + case S2_storerfgp: + return IfTrue ? S4_pstorerft_abs : S4_pstorerff_abs; + case S4_storeirh_io: + return IfTrue ? S4_storeirht_io : S4_storeirhf_io; + case S2_storeri_io: + return IfTrue ? S2_pstorerit_io : S2_pstorerif_io; + case S4_storeri_rr: + return IfTrue ? S4_pstorerit_rr : S4_pstorerif_rr; + case S2_storeriabs: + case S2_storerigp: + return IfTrue ? S4_pstorerit_abs : S4_pstorerif_abs; + case S4_storeiri_io: + return IfTrue ? S4_storeirit_io : S4_storeirif_io; + case S2_storerd_io: + return IfTrue ? S2_pstorerdt_io : S2_pstorerdf_io; + case S4_storerd_rr: + return IfTrue ? S4_pstorerdt_rr : S4_pstorerdf_rr; + case S2_storerdabs: + case S2_storerdgp: + return IfTrue ? S4_pstorerdt_abs : S4_pstorerdf_abs; + } + llvm_unreachable("Unexpected opcode"); + return 0; +} + + +void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB, + MachineBasicBlock::iterator At, MachineInstr *MI, + unsigned PredR, bool IfTrue) { + DebugLoc DL; + if (At != ToB->end()) + DL = At->getDebugLoc(); + else if (!ToB->empty()) + DL = ToB->back().getDebugLoc(); + + unsigned Opc = MI->getOpcode(); + + if (isPredicableStore(MI)) { + unsigned COpc = getCondStoreOpcode(Opc, IfTrue); + assert(COpc); + MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, TII->get(COpc)) + .addReg(PredR); + for (MIOperands MO(MI); MO.isValid(); ++MO) + MIB.addOperand(*MO); + + // Set memory references. + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + MIB.setMemRefs(MMOBegin, MMOEnd); + + MI->eraseFromParent(); + return; + } + + if (Opc == Hexagon::J2_jump) { + MachineBasicBlock *TB = MI->getOperand(0).getMBB(); + const MCInstrDesc &D = TII->get(IfTrue ? Hexagon::J2_jumpt + : Hexagon::J2_jumpf); + BuildMI(*ToB, At, DL, D) + .addReg(PredR) + .addMBB(TB); + MI->eraseFromParent(); + return; + } + + // Print the offending instruction unconditionally as we are about to + // abort. + dbgs() << *MI; + llvm_unreachable("Unexpected instruction"); +} + + +// Predicate/speculate non-branch instructions from FromB into block ToB. +// Leave the branches alone, they will be handled later. Btw, at this point +// FromB should have at most one branch, and it should be unconditional. +void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB, + MachineBasicBlock::iterator At, MachineBasicBlock *FromB, + unsigned PredR, bool IfTrue) { + DEBUG(dbgs() << "Predicating block " << PrintMB(FromB) << "\n"); + MachineBasicBlock::iterator End = FromB->getFirstTerminator(); + MachineBasicBlock::iterator I, NextI; + + for (I = FromB->begin(); I != End; I = NextI) { + assert(!I->isPHI()); + NextI = std::next(I); + if (isSafeToSpeculate(&*I)) + ToB->splice(At, FromB, I); + else + predicateInstr(ToB, At, &*I, PredR, IfTrue); + } +} + + +void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB, + const FlowPattern &FP) { + // Visit all PHI nodes in the WhereB block and generate MUX instructions + // in the split block. Update the PHI nodes with the values of the MUX. + auto NonPHI = WhereB->getFirstNonPHI(); + for (auto I = WhereB->begin(); I != NonPHI; ++I) { + MachineInstr *PN = &*I; + // Registers and subregisters corresponding to TrueB, FalseB and SplitB. + unsigned TR = 0, TSR = 0, FR = 0, FSR = 0, SR = 0, SSR = 0; + for (int i = PN->getNumOperands()-2; i > 0; i -= 2) { + const MachineOperand &RO = PN->getOperand(i), &BO = PN->getOperand(i+1); + if (BO.getMBB() == FP.SplitB) + SR = RO.getReg(), SSR = RO.getSubReg(); + else if (BO.getMBB() == FP.TrueB) + TR = RO.getReg(), TSR = RO.getSubReg(); + else if (BO.getMBB() == FP.FalseB) + FR = RO.getReg(), FSR = RO.getSubReg(); + else + continue; + PN->RemoveOperand(i+1); + PN->RemoveOperand(i); + } + if (TR == 0) + TR = SR, TSR = SSR; + else if (FR == 0) + FR = SR, FSR = SSR; + assert(TR && FR); + + using namespace Hexagon; + unsigned DR = PN->getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(DR); + const MCInstrDesc &D = RC == &IntRegsRegClass ? TII->get(C2_mux) + : TII->get(MUX64_rr); + + MachineBasicBlock::iterator MuxAt = FP.SplitB->getFirstTerminator(); + DebugLoc DL; + if (MuxAt != FP.SplitB->end()) + DL = MuxAt->getDebugLoc(); + unsigned MuxR = MRI->createVirtualRegister(RC); + BuildMI(*FP.SplitB, MuxAt, DL, D, MuxR) + .addReg(FP.PredR) + .addReg(TR, 0, TSR) + .addReg(FR, 0, FSR); + + PN->addOperand(MachineOperand::CreateReg(MuxR, false)); + PN->addOperand(MachineOperand::CreateMBB(FP.SplitB)); + } +} + + +void HexagonEarlyIfConversion::convert(const FlowPattern &FP) { + MachineBasicBlock *TSB = 0, *FSB = 0; + MachineBasicBlock::iterator OldTI = FP.SplitB->getFirstTerminator(); + assert(OldTI != FP.SplitB->end()); + DebugLoc DL = OldTI->getDebugLoc(); + + if (FP.TrueB) { + TSB = *FP.TrueB->succ_begin(); + predicateBlockNB(FP.SplitB, OldTI, FP.TrueB, FP.PredR, true); + } + if (FP.FalseB) { + FSB = *FP.FalseB->succ_begin(); + MachineBasicBlock::iterator At = FP.SplitB->getFirstTerminator(); + predicateBlockNB(FP.SplitB, At, FP.FalseB, FP.PredR, false); + } + + // Regenerate new terminators in the split block and update the successors. + // First, remember any information that may be needed later and remove the + // existing terminators/successors from the split block. + MachineBasicBlock *SSB = 0; + FP.SplitB->erase(OldTI, FP.SplitB->end()); + while (FP.SplitB->succ_size() > 0) { + MachineBasicBlock *T = *FP.SplitB->succ_begin(); + // It's possible that the split block had a successor that is not a pre- + // dicated block. This could only happen if there was only one block to + // be predicated. Example: + // split_b: + // if (p) jump true_b + // jump unrelated2_b + // unrelated1_b: + // ... + // unrelated2_b: ; can have other predecessors, so it's not "false_b" + // jump other_b + // true_b: ; only reachable from split_b, can be predicated + // ... + // + // Find this successor (SSB) if it exists. + if (T != FP.TrueB && T != FP.FalseB) { + assert(!SSB); + SSB = T; + } + FP.SplitB->removeSuccessor(FP.SplitB->succ_begin()); + } + + // Insert new branches and update the successors of the split block. This + // may create unconditional branches to the layout successor, etc., but + // that will be cleaned up later. For now, make sure that correct code is + // generated. + if (FP.JoinB) { + assert(!SSB || SSB == FP.JoinB); + BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump)) + .addMBB(FP.JoinB); + FP.SplitB->addSuccessor(FP.JoinB); + } else { + bool HasBranch = false; + if (TSB) { + BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jumpt)) + .addReg(FP.PredR) + .addMBB(TSB); + FP.SplitB->addSuccessor(TSB); + HasBranch = true; + } + if (FSB) { + const MCInstrDesc &D = HasBranch ? TII->get(Hexagon::J2_jump) + : TII->get(Hexagon::J2_jumpf); + MachineInstrBuilder MIB = BuildMI(*FP.SplitB, FP.SplitB->end(), DL, D); + if (!HasBranch) + MIB.addReg(FP.PredR); + MIB.addMBB(FSB); + FP.SplitB->addSuccessor(FSB); + } + if (SSB) { + // This cannot happen if both TSB and FSB are set. [TF]SB are the + // successor blocks of the TrueB and FalseB (or null of the TrueB + // or FalseB block is null). SSB is the potential successor block + // of the SplitB that is neither TrueB nor FalseB. + BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump)) + .addMBB(SSB); + FP.SplitB->addSuccessor(SSB); + } + } + + // What is left to do is to update the PHI nodes that could have entries + // referring to predicated blocks. + if (FP.JoinB) { + updatePhiNodes(FP.JoinB, FP); + } else { + if (TSB) + updatePhiNodes(TSB, FP); + if (FSB) + updatePhiNodes(FSB, FP); + // Nothing to update in SSB, since SSB's predecessors haven't changed. + } +} + + +void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) { + DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n"); + + // Transfer the immediate dominator information from B to its descendants. + MachineDomTreeNode *N = MDT->getNode(B); + MachineDomTreeNode *IDN = N->getIDom(); + if (IDN) { + MachineBasicBlock *IDB = IDN->getBlock(); + typedef GraphTraits GTN; + typedef SmallVector DTNodeVectType; + DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N)); + for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) { + MachineBasicBlock *SB = (*I)->getBlock(); + MDT->changeImmediateDominator(SB, IDB); + } + } + + while (B->succ_size() > 0) + B->removeSuccessor(B->succ_begin()); + + for (auto I = B->pred_begin(), E = B->pred_end(); I != E; ++I) + (*I)->removeSuccessor(B, true); + + Deleted.insert(B); + MDT->eraseNode(B); + MFN->erase(B->getIterator()); +} + + +void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) { + DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n"); + MachineBasicBlock::iterator I, NextI, NonPHI = B->getFirstNonPHI(); + for (I = B->begin(); I != NonPHI; I = NextI) { + NextI = std::next(I); + MachineInstr *PN = &*I; + assert(PN->getNumOperands() == 3 && "Invalid phi node"); + MachineOperand &UO = PN->getOperand(1); + unsigned UseR = UO.getReg(), UseSR = UO.getSubReg(); + unsigned DefR = PN->getOperand(0).getReg(); + unsigned NewR = UseR; + if (UseSR) { + // MRI.replaceVregUsesWith does not allow to update the subregister, + // so instead of doing the use-iteration here, create a copy into a + // "non-subregistered" register. + DebugLoc DL = PN->getDebugLoc(); + const TargetRegisterClass *RC = MRI->getRegClass(DefR); + NewR = MRI->createVirtualRegister(RC); + NonPHI = BuildMI(*B, NonPHI, DL, TII->get(TargetOpcode::COPY), NewR) + .addReg(UseR, 0, UseSR); + } + MRI->replaceRegWith(DefR, NewR); + B->erase(I); + } +} + + +void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB, + MachineBasicBlock *NewB) { + for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) { + MachineBasicBlock *SB = *I; + MachineBasicBlock::iterator P, N = SB->getFirstNonPHI(); + for (P = SB->begin(); P != N; ++P) { + MachineInstr *PN = &*P; + for (MIOperands MO(PN); MO.isValid(); ++MO) + if (MO->isMBB() && MO->getMBB() == OldB) + MO->setMBB(NewB); + } + } +} + + +void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB, + MachineBasicBlock *SuccB) { + DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and " + << PrintMB(SuccB) << "\n"); + bool TermOk = hasUncondBranch(SuccB); + eliminatePhis(SuccB); + TII->RemoveBranch(*PredB); + PredB->removeSuccessor(SuccB); + PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end()); + MachineBasicBlock::succ_iterator I, E = SuccB->succ_end(); + for (I = SuccB->succ_begin(); I != E; ++I) + PredB->addSuccessor(*I); + PredB->normalizeSuccProbs(); + replacePhiEdges(SuccB, PredB); + removeBlock(SuccB); + if (!TermOk) + PredB->updateTerminator(); +} + + +void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) { + if (FP.TrueB) + removeBlock(FP.TrueB); + if (FP.FalseB) + removeBlock(FP.FalseB); + + FP.SplitB->updateTerminator(); + if (FP.SplitB->succ_size() != 1) + return; + + MachineBasicBlock *SB = *FP.SplitB->succ_begin(); + if (SB->pred_size() != 1) + return; + + // By now, the split block has only one successor (SB), and SB has only + // one predecessor. We can try to merge them. We will need to update ter- + // minators in FP.Split+SB, and that requires working AnalyzeBranch, which + // fails on Hexagon for blocks that have EH_LABELs. However, if SB ends + // with an unconditional branch, we won't need to touch the terminators. + if (!hasEHLabel(SB) || hasUncondBranch(SB)) + mergeBlocks(FP.SplitB, SB); +} + + +bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) { + auto &ST = MF.getSubtarget(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + MFN = &MF; + MRI = &MF.getRegInfo(); + MDT = &getAnalysis(); + MLI = &getAnalysis(); + MBPI = EnableHexagonBP ? &getAnalysis() : + nullptr; + + Deleted.clear(); + bool Changed = false; + + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) + Changed |= visitLoop(*I); + Changed |= visitLoop(0); + + return Changed; +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// +FunctionPass *llvm::createHexagonEarlyIfConversion() { + return new HexagonEarlyIfConversion(); +} + diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp index e4c8d8f7b28c..6e2dbc06b124 100644 --- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp +++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp @@ -74,7 +74,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); MBBb != MBBe; ++MBBb) { - MachineBasicBlock* MBB = MBBb; + MachineBasicBlock *MBB = &*MBBb; // Traverse the basic block. for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); ++MII) { diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 21a8996b1159..7a52a1c9eaec 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -147,6 +147,48 @@ static cl::opt ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX), cl::Hidden, cl::ZeroOrMore, cl::desc("Max count of stack frame " "shrink-wraps")); +static cl::opt UseAllocframe("use-allocframe", cl::init(true), + cl::Hidden, cl::desc("Use allocframe more conservatively")); + + +namespace llvm { + void initializeHexagonCallFrameInformationPass(PassRegistry&); + FunctionPass *createHexagonCallFrameInformation(); +} + +namespace { + class HexagonCallFrameInformation : public MachineFunctionPass { + public: + static char ID; + HexagonCallFrameInformation() : MachineFunctionPass(ID) { + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializeHexagonCallFrameInformationPass(PR); + } + bool runOnMachineFunction(MachineFunction &MF) override; + }; + + char HexagonCallFrameInformation::ID = 0; +} + +bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) { + auto &HFI = *MF.getSubtarget().getFrameLowering(); + bool NeedCFI = MF.getMMI().hasDebugInfo() || + MF.getFunction()->needsUnwindTableEntry(); + + if (!NeedCFI) + return false; + HFI.insertCFIInstructions(MF); + return true; +} + +INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi", + "Hexagon call frame information", false, false) + +FunctionPass *llvm::createHexagonCallFrameInformation() { + return new HexagonCallFrameInformation(); +} + + namespace { /// Map a register pair Reg to the subregister that has the greater "number", /// i.e. D3 (aka R7:6) will be mapped to R7, etc. @@ -370,11 +412,11 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF, insertEpilogueInBlock(*EpilogB); } else { for (auto &B : MF) - if (!B.empty() && B.back().isReturn()) + if (B.isReturnBlock()) insertCSRRestoresInBlock(B, CSI, HRI); for (auto &B : MF) - if (!B.empty() && B.back().isReturn()) + if (B.isReturnBlock()) insertEpilogueInBlock(B); } } @@ -383,10 +425,7 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF, void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineModuleInfo &MMI = MF.getMMI(); - MachineBasicBlock::iterator MBBI = MBB.begin(); - auto &HTM = static_cast(MF.getTarget()); - auto &HST = static_cast(MF.getSubtarget()); + auto &HST = MF.getSubtarget(); auto &HII = *HST.getInstrInfo(); auto &HRI = *HST.getRegisterInfo(); DebugLoc dl; @@ -405,10 +444,6 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const { bool AlignStack = (MaxAlign > getStackAlignment()); - // Check if frame moves are needed for EH. - bool needsFrameMoves = MMI.hasDebugInfo() || - MF.getFunction()->needsUnwindTableEntry(); - // Get the number of bytes to allocate from the FrameInfo. unsigned NumBytes = MFI->getStackSize(); unsigned SP = HRI.getStackRegister(); @@ -424,14 +459,7 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const { MI->eraseFromParent(); } - // - // Only insert ALLOCFRAME if we need to or at -O0 for the debugger. Think - // that this shouldn't be required, but doing so now because gcc does and - // gdb can't break at the start of the function without it. Will remove if - // this turns out to be a gdb bug. - // - bool NoOpt = (HTM.getOptLevel() == CodeGenOpt::None); - if (!NoOpt && !FuncInfo->hasClobberLR() && !hasFP(MF)) + if (!hasFP(MF)) return; // Check for overflow. @@ -469,92 +497,11 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const { .addReg(SP) .addImm(-int64_t(MaxAlign)); } - - if (needsFrameMoves) { - std::vector Instructions = MMI.getFrameInstructions(); - MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); - - // Advance CFA. DW_CFA_def_cfa - unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true); - unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true); - - // CFA = FP + 8 - unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( - FrameLabel, DwFPReg, -8)); - BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - - // R31 (return addr) = CFA - #4 - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( - FrameLabel, DwRAReg, -4)); - BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - - // R30 (frame ptr) = CFA - #8) - CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( - FrameLabel, DwFPReg, -8)); - BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - - unsigned int regsToMove[] = { - Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2, - Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18, - Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22, - Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26, - Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9, Hexagon::D10, - Hexagon::D11, Hexagon::D12, Hexagon::D13, Hexagon::NoRegister - }; - - const std::vector &CSI = MFI->getCalleeSavedInfo(); - - for (unsigned i = 0; regsToMove[i] != Hexagon::NoRegister; ++i) { - for (unsigned I = 0, E = CSI.size(); I < E; ++I) { - if (CSI[I].getReg() == regsToMove[i]) { - // Subtract 8 to make room for R30 and R31, which are added above. - int64_t Offset = getFrameIndexOffset(MF, CSI[I].getFrameIdx()) - 8; - - if (regsToMove[i] < Hexagon::D0 || regsToMove[i] > Hexagon::D15) { - unsigned DwarfReg = HRI.getDwarfRegNum(regsToMove[i], true); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createOffset(FrameLabel, - DwarfReg, Offset)); - BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } else { - // Split the double regs into subregs, and generate appropriate - // cfi_offsets. - // The only reason, we are split double regs is, llvm-mc does not - // understand paired registers for cfi_offset. - // Eg .cfi_offset r1:0, -64 - unsigned HiReg = getMax32BitSubRegister(regsToMove[i], HRI); - unsigned LoReg = getMax32BitSubRegister(regsToMove[i], HRI, false); - unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true); - unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true); - unsigned HiCFIIndex = MMI.addFrameInst( - MCCFIInstruction::createOffset(FrameLabel, - HiDwarfReg, Offset+4)); - BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(HiCFIIndex); - unsigned LoCFIIndex = MMI.addFrameInst( - MCCFIInstruction::createOffset(FrameLabel, - LoDwarfReg, Offset)); - BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(LoCFIIndex); - } - break; - } - } // for CSI.size() - } // for regsToMove - } // needsFrameMoves } void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { MachineFunction &MF = *MBB.getParent(); - // - // Only insert deallocframe if we need to. Also at -O0. See comment - // in insertPrologueInBlock above. - // - if (!hasFP(MF) && MF.getTarget().getOptLevel() != CodeGenOpt::None) + if (!hasFP(MF)) return; auto &HST = static_cast(MF.getSubtarget()); @@ -630,12 +577,172 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { } +namespace { + bool IsAllocFrame(MachineBasicBlock::const_iterator It) { + if (!It->isBundle()) + return It->getOpcode() == Hexagon::S2_allocframe; + auto End = It->getParent()->instr_end(); + MachineBasicBlock::const_instr_iterator I = It.getInstrIterator(); + while (++I != End && I->isBundled()) + if (I->getOpcode() == Hexagon::S2_allocframe) + return true; + return false; + } + + MachineBasicBlock::iterator FindAllocFrame(MachineBasicBlock &B) { + for (auto &I : B) + if (IsAllocFrame(I)) + return I; + return B.end(); + } +} + + +void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const { + for (auto &B : MF) { + auto AF = FindAllocFrame(B); + if (AF == B.end()) + continue; + insertCFIInstructionsAt(B, ++AF); + } +} + + +void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator At) const { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo &MMI = MF.getMMI(); + auto &HST = MF.getSubtarget(); + auto &HII = *HST.getInstrInfo(); + auto &HRI = *HST.getRegisterInfo(); + + // If CFI instructions have debug information attached, something goes + // wrong with the final assembly generation: the prolog_end is placed + // in a wrong location. + DebugLoc DL; + const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION); + + MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); + + if (hasFP(MF)) { + unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true); + unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true); + + // Define CFA via an offset from the value of FP. + // + // -8 -4 0 (SP) + // --+----+----+--------------------- + // | FP | LR | increasing addresses --> + // --+----+----+--------------------- + // | +-- Old SP (before allocframe) + // +-- New FP (after allocframe) + // + // MCCFIInstruction::createDefCfa subtracts the offset from the register. + // MCCFIInstruction::createOffset takes the offset without sign change. + auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(DefCfa)); + // R31 (return addr) = CFA - 4 + auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(OffR31)); + // R30 (frame ptr) = CFA - 8 + auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(OffR30)); + } + + static unsigned int RegsToMove[] = { + Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2, + Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18, + Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22, + Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26, + Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9, + Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13, + Hexagon::NoRegister + }; + + const std::vector &CSI = MFI->getCalleeSavedInfo(); + + for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) { + unsigned Reg = RegsToMove[i]; + auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool { + return C.getReg() == Reg; + }; + auto F = std::find_if(CSI.begin(), CSI.end(), IfR); + if (F == CSI.end()) + continue; + + // Subtract 8 to make room for R30 and R31, which are added above. + unsigned FrameReg; + int64_t Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg) - 8; + + if (Reg < Hexagon::D0 || Reg > Hexagon::D15) { + unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true); + auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg, + Offset); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(OffReg)); + } else { + // Split the double regs into subregs, and generate appropriate + // cfi_offsets. + // The only reason, we are split double regs is, llvm-mc does not + // understand paired registers for cfi_offset. + // Eg .cfi_offset r1:0, -64 + + unsigned HiReg = HRI.getSubReg(Reg, Hexagon::subreg_hireg); + unsigned LoReg = HRI.getSubReg(Reg, Hexagon::subreg_loreg); + unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true); + unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true); + auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg, + Offset+4); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(OffHi)); + auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg, + Offset); + BuildMI(MBB, At, DL, CFID) + .addCFIIndex(MMI.addFrameInst(OffLo)); + } + } +} + + bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const HexagonMachineFunctionInfo *FuncInfo = - MF.getInfo(); - return MFI->hasCalls() || MFI->getStackSize() > 0 || - FuncInfo->hasClobberLR(); + auto &MFI = *MF.getFrameInfo(); + auto &HRI = *MF.getSubtarget().getRegisterInfo(); + + bool HasFixed = MFI.getNumFixedObjects(); + bool HasPrealloc = const_cast(MFI) + .getLocalFrameObjectCount(); + bool HasExtraAlign = HRI.needsStackRealignment(MF); + bool HasAlloca = MFI.hasVarSizedObjects(); + + // Insert ALLOCFRAME if we need to or at -O0 for the debugger. Think + // that this shouldn't be required, but doing so now because gcc does and + // gdb can't break at the start of the function without it. Will remove if + // this turns out to be a gdb bug. + // + if (MF.getTarget().getOptLevel() == CodeGenOpt::None) + return true; + + // By default we want to use SP (since it's always there). FP requires + // some setup (i.e. ALLOCFRAME). + // Fixed and preallocated objects need FP if the distance from them to + // the SP is unknown (as is with alloca or aligna). + if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign)) + return true; + + if (MFI.getStackSize() > 0) { + if (UseAllocframe) + return true; + } + + if (MFI.hasCalls() || + MF.getInfo()->hasClobberLR()) + return true; + + return false; } @@ -718,9 +825,89 @@ static void addCalleeSaveRegistersAsImpOperand(MachineInstr *Inst, } -int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - return MF.getFrameInfo()->getObjectOffset(FI); +int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, unsigned &FrameReg) const { + auto &MFI = *MF.getFrameInfo(); + auto &HRI = *MF.getSubtarget().getRegisterInfo(); + + // Large parts of this code are shared with HRI::eliminateFrameIndex. + int Offset = MFI.getObjectOffset(FI); + bool HasAlloca = MFI.hasVarSizedObjects(); + bool HasExtraAlign = HRI.needsStackRealignment(MF); + bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None; + + unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister(); + unsigned AP = 0; + if (const MachineInstr *AI = getAlignaInstr(MF)) + AP = AI->getOperand(0).getReg(); + unsigned FrameSize = MFI.getStackSize(); + + bool UseFP = false, UseAP = false; // Default: use SP (except at -O0). + // Use FP at -O0, except when there are objects with extra alignment. + // That additional alignment requirement may cause a pad to be inserted, + // which will make it impossible to use FP to access objects located + // past the pad. + if (NoOpt && !HasExtraAlign) + UseFP = true; + if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) { + // Fixed and preallocated objects will be located before any padding + // so FP must be used to access them. + UseFP |= (HasAlloca || HasExtraAlign); + } else { + if (HasAlloca) { + if (HasExtraAlign) + UseAP = true; + else + UseFP = true; + } + } + + // If FP was picked, then there had better be FP. + bool HasFP = hasFP(MF); + assert((HasFP || !UseFP) && "This function must have frame pointer"); + + // Having FP implies allocframe. Allocframe will store extra 8 bytes: + // FP/LR. If the base register is used to access an object across these + // 8 bytes, then the offset will need to be adjusted by 8. + // + // After allocframe: + // HexagonISelLowering adds 8 to ---+ + // the offsets of all stack-based | + // arguments (*) | + // | + // getObjectOffset < 0 0 8 getObjectOffset >= 8 + // ------------------------+-----+------------------------> increasing + // |FP/LR| addresses + // -----------------+------+-----+------------------------> + // | | + // SP/AP point --+ +-- FP points here (**) + // somewhere on + // this side of FP/LR + // + // (*) See LowerFormalArguments. The FP/LR is assumed to be present. + // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR. + + // The lowering assumes that FP/LR is present, and so the offsets of + // the formal arguments start at 8. If FP/LR is not there we need to + // reduce the offset by 8. + if (Offset > 0 && !HasFP) + Offset -= 8; + + if (UseFP) + FrameReg = FP; + else if (UseAP) + FrameReg = AP; + else + FrameReg = SP; + + // Calculate the actual offset in the instruction. If there is no FP + // (in other words, no allocframe), then SP will not be adjusted (i.e. + // there will be no SP -= FrameSize), so the frame size should not be + // added to the calculated offset. + int RealOffset = Offset; + if (!UseFP && !UseAP && HasFP) + RealOffset = FrameSize+Offset; + return RealOffset; } @@ -731,7 +918,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI = MBB.begin(); MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + auto &HII = *MF.getSubtarget().getInstrInfo(); if (useSpillFunction(MF, CSI)) { unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI); @@ -739,7 +926,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, // Call spill function. DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); MachineInstr *SaveRegsCall = - BuildMI(MBB, MI, DL, TII.get(Hexagon::SAVE_REGISTERS_CALL_V4)) + BuildMI(MBB, MI, DL, HII.get(Hexagon::SAVE_REGISTERS_CALL_V4)) .addExternalSymbol(SpillFun); // Add callee-saved registers as use. addCalleeSaveRegistersAsImpOperand(SaveRegsCall, MaxReg, false); @@ -757,7 +944,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg); int FI = CSI[i].getFrameIdx(); const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI); + HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI); if (IsKill) MBB.addLiveIn(Reg); } @@ -772,7 +959,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + auto &HII = *MF.getSubtarget().getInstrInfo(); if (useRestoreFunction(MF, CSI)) { bool HasTC = hasTailCall(MBB) || !hasReturn(MBB); @@ -787,14 +974,14 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, if (HasTC) { unsigned ROpc = Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4; - DeallocCall = BuildMI(MBB, MI, DL, TII.get(ROpc)) + DeallocCall = BuildMI(MBB, MI, DL, HII.get(ROpc)) .addExternalSymbol(RestoreFn); } else { // The block has a return. MachineBasicBlock::iterator It = MBB.getFirstTerminator(); assert(It->isReturn() && std::next(It) == MBB.end()); unsigned ROpc = Hexagon::RESTORE_DEALLOC_RET_JMP_V4; - DeallocCall = BuildMI(MBB, It, DL, TII.get(ROpc)) + DeallocCall = BuildMI(MBB, It, DL, HII.get(ROpc)) .addExternalSymbol(RestoreFn); // Transfer the function live-out registers. DeallocCall->copyImplicitOps(MF, It); @@ -807,7 +994,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); int FI = CSI[i].getFrameIdx(); - TII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI); + HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI); } return true; } @@ -832,9 +1019,9 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized( // via AP, which may not be available at the particular place in the program. MachineFrameInfo *MFI = MF.getFrameInfo(); bool HasAlloca = MFI->hasVarSizedObjects(); - bool HasAligna = (MFI->getMaxAlignment() > getStackAlignment()); + bool NeedsAlign = (MFI->getMaxAlignment() > getStackAlignment()); - if (!HasAlloca || !HasAligna) + if (!HasAlloca || !NeedsAlign) return; unsigned LFS = MFI->getLocalFrameSize(); @@ -864,13 +1051,13 @@ static bool needToReserveScavengingSpillSlots(MachineFunction &MF, // Check for an unused caller-saved register. for ( ; *CallerSavedRegs; ++CallerSavedRegs) { MCPhysReg FreeReg = *CallerSavedRegs; - if (MRI.isPhysRegUsed(FreeReg)) + if (!MRI.reg_nodbg_empty(FreeReg)) continue; // Check aliased register usage. bool IsCurrentRegUsed = false; for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI) - if (MRI.isPhysRegUsed(*AI)) { + if (!MRI.reg_nodbg_empty(*AI)) { IsCurrentRegUsed = true; break; } @@ -896,7 +1083,7 @@ bool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF) // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); MBBb != MBBe; ++MBBb) { - MachineBasicBlock* MBB = MBBb; + MachineBasicBlock *MBB = &*MBBb; // Traverse the basic block. MachineBasicBlock::iterator NextII; for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); @@ -1210,7 +1397,8 @@ bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const { } -MachineInstr *HexagonFrameLowering::getAlignaInstr(MachineFunction &MF) const { +const MachineInstr *HexagonFrameLowering::getAlignaInstr( + const MachineFunction &MF) const { for (auto &B : MF) for (auto &I : B) if (I.getOpcode() == Hexagon::ALIGNA) @@ -1219,6 +1407,7 @@ MachineInstr *HexagonFrameLowering::getAlignaInstr(MachineFunction &MF) const { } +// FIXME: Use Function::optForSize(). inline static bool isOptSize(const MachineFunction &MF) { AttributeSet AF = MF.getFunction()->getAttributes(); return AF.hasAttribute(AttributeSet::FunctionIndex, @@ -1226,8 +1415,7 @@ inline static bool isOptSize(const MachineFunction &MF) { } inline static bool isMinSize(const MachineFunction &MF) { - AttributeSet AF = MF.getFunction()->getAttributes(); - return AF.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + return MF.getFunction()->optForMinSize(); } @@ -1289,4 +1477,3 @@ bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF, : SpillFuncThreshold; return Threshold < NumCSI; } - diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h index d39ee2c77195..683b303d43ea 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/lib/Target/Hexagon/HexagonFrameLowering.h @@ -51,7 +51,8 @@ public: bool targetHandlesStackFrameRounding() const override { return true; } - int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; bool hasFP(const MachineFunction &MF) const override; const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) @@ -73,7 +74,9 @@ public: const override; bool needsAligna(const MachineFunction &MF) const; - MachineInstr *getAlignaInstr(MachineFunction &MF) const; + const MachineInstr *getAlignaInstr(const MachineFunction &MF) const; + + void insertCFIInstructions(MachineFunction &MF) const; private: typedef std::vector CSIVect; @@ -86,6 +89,8 @@ private: const HexagonRegisterInfo &HRI) const; bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI, const HexagonRegisterInfo &HRI) const; + void insertCFIInstructionsAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator At) const; void adjustForCalleeSavedRegsSpillCall(MachineFunction &MF) const; bool replacePredRegPseudoSpillCode(MachineFunction &MF) const; @@ -94,7 +99,7 @@ private: void findShrunkPrologEpilog(MachineFunction &MF, MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const; - bool shouldInlineCSR(llvm::MachineFunction&, const CSIVect&) const; + bool shouldInlineCSR(llvm::MachineFunction &MF, const CSIVect &CSI) const; bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const; bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const; }; diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp index 4d32208bd5aa..f26e2ff764d7 100644 --- a/lib/Target/Hexagon/HexagonGenExtract.cpp +++ b/lib/Target/Hexagon/HexagonGenExtract.cpp @@ -195,7 +195,7 @@ bool HexagonGenExtract::convert(Instruction *In) { return false; } - IRBuilder<> IRB(BB, In); + IRBuilder<> IRB(In); Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu : Intrinsic::hexagon_S2_extractup; Module *Mod = BB->getParent()->getParent(); diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp index 096da949e77b..64a2b6cec18a 100644 --- a/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -77,9 +77,8 @@ namespace { namespace { // Set of virtual registers, based on BitVector. struct RegisterSet : private BitVector { - RegisterSet() : BitVector() {} + RegisterSet() = default; explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} - RegisterSet(const RegisterSet &RS) : BitVector(RS) {} using BitVector::clear; @@ -1496,7 +1495,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { // version of DCE that preserves lifetime markers. Without it, merging // of stack objects can fail to recognize and merge disjoint objects // leading to unnecessary stack growth. - Changed |= removeDeadCode(MDT->getRootNode()); + Changed = removeDeadCode(MDT->getRootNode()); const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); BitTracker BTLoc(HE, MF); @@ -1534,7 +1533,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { } if (IFMap.empty()) - return false; + return Changed; { NamedRegionTimer _T("pruning", "hexinsert", TimingDetail); @@ -1547,7 +1546,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { } if (IFMap.empty()) - return false; + return Changed; { NamedRegionTimer _T("selection", "hexinsert", TimingDetail); @@ -1572,13 +1571,15 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { for (unsigned i = 0, n = Out.size(); i < n; ++i) IFMap.erase(Out[i]); } + if (IFMap.empty()) + return Changed; { NamedRegionTimer _T("generation", "hexinsert", TimingDetail); - Changed = generateInserts(); + generateInserts(); } - return Changed; + return true; } diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp new file mode 100644 index 000000000000..c059d566709e --- /dev/null +++ b/lib/Target/Hexagon/HexagonGenMux.cpp @@ -0,0 +1,319 @@ +//===--- HexagonGenMux.cpp ------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// During instruction selection, MUX instructions are generated for +// conditional assignments. Since such assignments often present an +// opportunity to predicate instructions, HexagonExpandCondsets +// expands MUXes into pairs of conditional transfers, and then proceeds +// with predication of the producers/consumers of the registers involved. +// This happens after exiting from the SSA form, but before the machine +// instruction scheduler. After the scheduler and after the register +// allocation there can be cases of pairs of conditional transfers +// resulting from a MUX where neither of them was further predicated. If +// these transfers are now placed far enough from the instruction defining +// the predicate register, they cannot use the .new form. In such cases it +// is better to collapse them back to a single MUX instruction. + +#define DEBUG_TYPE "hexmux" + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "HexagonTargetMachine.h" + +using namespace llvm; + +namespace llvm { + FunctionPass *createHexagonGenMux(); + void initializeHexagonGenMuxPass(PassRegistry& Registry); +} + +namespace { + class HexagonGenMux : public MachineFunctionPass { + public: + static char ID; + HexagonGenMux() : MachineFunctionPass(ID), HII(0), HRI(0) { + initializeHexagonGenMuxPass(*PassRegistry::getPassRegistry()); + } + const char *getPassName() const override { + return "Hexagon generate mux instructions"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; + + struct CondsetInfo { + unsigned PredR; + unsigned TrueX, FalseX; + CondsetInfo() : PredR(0), TrueX(UINT_MAX), FalseX(UINT_MAX) {} + }; + struct DefUseInfo { + BitVector Defs, Uses; + DefUseInfo() : Defs(), Uses() {} + DefUseInfo(const BitVector &D, const BitVector &U) : Defs(D), Uses(U) {} + }; + struct MuxInfo { + MachineBasicBlock::iterator At; + unsigned DefR, PredR; + MachineOperand *SrcT, *SrcF; + MachineInstr *Def1, *Def2; + MuxInfo(MachineBasicBlock::iterator It, unsigned DR, unsigned PR, + MachineOperand *TOp, MachineOperand *FOp, + MachineInstr *D1, MachineInstr *D2) + : At(It), DefR(DR), PredR(PR), SrcT(TOp), SrcF(FOp), Def1(D1), + Def2(D2) {} + }; + typedef DenseMap InstrIndexMap; + typedef DenseMap DefUseInfoMap; + typedef SmallVector MuxInfoList; + + bool isRegPair(unsigned Reg) const { + return Hexagon::DoubleRegsRegClass.contains(Reg); + } + void getSubRegs(unsigned Reg, BitVector &SRs) const; + void expandReg(unsigned Reg, BitVector &Set) const; + void getDefsUses(const MachineInstr *MI, BitVector &Defs, + BitVector &Uses) const; + void buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X, + DefUseInfoMap &DUM); + bool isCondTransfer(unsigned Opc) const; + unsigned getMuxOpcode(const MachineOperand &Src1, + const MachineOperand &Src2) const; + bool genMuxInBlock(MachineBasicBlock &B); + }; + + char HexagonGenMux::ID = 0; +} + +INITIALIZE_PASS(HexagonGenMux, "hexagon-mux", + "Hexagon generate mux instructions", false, false) + + +void HexagonGenMux::getSubRegs(unsigned Reg, BitVector &SRs) const { + for (MCSubRegIterator I(Reg, HRI); I.isValid(); ++I) + SRs[*I] = true; +} + + +void HexagonGenMux::expandReg(unsigned Reg, BitVector &Set) const { + if (isRegPair(Reg)) + getSubRegs(Reg, Set); + else + Set[Reg] = true; +} + + +void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs, + BitVector &Uses) const { + // First, get the implicit defs and uses for this instruction. + unsigned Opc = MI->getOpcode(); + const MCInstrDesc &D = HII->get(Opc); + if (const MCPhysReg *R = D.ImplicitDefs) + while (*R) + expandReg(*R++, Defs); + if (const MCPhysReg *R = D.ImplicitUses) + while (*R) + expandReg(*R++, Uses); + + // Look over all operands, and collect explicit defs and uses. + for (ConstMIOperands Mo(MI); Mo.isValid(); ++Mo) { + if (!Mo->isReg() || Mo->isImplicit()) + continue; + unsigned R = Mo->getReg(); + BitVector &Set = Mo->isDef() ? Defs : Uses; + expandReg(R, Set); + } +} + + +void HexagonGenMux::buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X, + DefUseInfoMap &DUM) { + unsigned Index = 0; + unsigned NR = HRI->getNumRegs(); + BitVector Defs(NR), Uses(NR); + + for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { + MachineInstr *MI = &*I; + I2X.insert(std::make_pair(MI, Index)); + Defs.reset(); + Uses.reset(); + getDefsUses(MI, Defs, Uses); + DUM.insert(std::make_pair(Index, DefUseInfo(Defs, Uses))); + Index++; + } +} + + +bool HexagonGenMux::isCondTransfer(unsigned Opc) const { + switch (Opc) { + case Hexagon::A2_tfrt: + case Hexagon::A2_tfrf: + case Hexagon::C2_cmoveit: + case Hexagon::C2_cmoveif: + return true; + } + return false; +} + + +unsigned HexagonGenMux::getMuxOpcode(const MachineOperand &Src1, + const MachineOperand &Src2) const { + bool IsReg1 = Src1.isReg(), IsReg2 = Src2.isReg(); + if (IsReg1) + return IsReg2 ? Hexagon::C2_mux : Hexagon::C2_muxir; + if (IsReg2) + return Hexagon::C2_muxri; + + // Neither is a register. The first source is extendable, but the second + // is not (s8). + if (Src2.isImm() && isInt<8>(Src2.getImm())) + return Hexagon::C2_muxii; + + return 0; +} + + +bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { + bool Changed = false; + InstrIndexMap I2X; + DefUseInfoMap DUM; + buildMaps(B, I2X, DUM); + + typedef DenseMap CondsetMap; + CondsetMap CM; + MuxInfoList ML; + + MachineBasicBlock::iterator NextI, End = B.end(); + for (MachineBasicBlock::iterator I = B.begin(); I != End; I = NextI) { + MachineInstr *MI = &*I; + NextI = std::next(I); + unsigned Opc = MI->getOpcode(); + if (!isCondTransfer(Opc)) + continue; + unsigned DR = MI->getOperand(0).getReg(); + if (isRegPair(DR)) + continue; + + unsigned PR = MI->getOperand(1).getReg(); + unsigned Idx = I2X.lookup(MI); + CondsetMap::iterator F = CM.find(DR); + bool IfTrue = HII->isPredicatedTrue(Opc); + + // If there is no record of a conditional transfer for this register, + // or the predicate register differs, create a new record for it. + if (F != CM.end() && F->second.PredR != PR) { + CM.erase(F); + F = CM.end(); + } + if (F == CM.end()) { + auto It = CM.insert(std::make_pair(DR, CondsetInfo())); + F = It.first; + F->second.PredR = PR; + } + CondsetInfo &CI = F->second; + if (IfTrue) + CI.TrueX = Idx; + else + CI.FalseX = Idx; + if (CI.TrueX == UINT_MAX || CI.FalseX == UINT_MAX) + continue; + + // There is now a complete definition of DR, i.e. we have the predicate + // register, the definition if-true, and definition if-false. + + // First, check if both definitions are far enough from the definition + // of the predicate register. + unsigned MinX = std::min(CI.TrueX, CI.FalseX); + unsigned MaxX = std::max(CI.TrueX, CI.FalseX); + unsigned SearchX = (MaxX > 4) ? MaxX-4 : 0; + bool NearDef = false; + for (unsigned X = SearchX; X < MaxX; ++X) { + const DefUseInfo &DU = DUM.lookup(X); + if (!DU.Defs[PR]) + continue; + NearDef = true; + break; + } + if (NearDef) + continue; + + // The predicate register is not defined in the last few instructions. + // Check if the conversion to MUX is possible (either "up", i.e. at the + // place of the earlier partial definition, or "down", where the later + // definition is located). Examine all defs and uses between these two + // definitions. + // SR1, SR2 - source registers from the first and the second definition. + MachineBasicBlock::iterator It1 = B.begin(), It2 = B.begin(); + std::advance(It1, MinX); + std::advance(It2, MaxX); + MachineInstr *Def1 = It1, *Def2 = It2; + MachineOperand *Src1 = &Def1->getOperand(2), *Src2 = &Def2->getOperand(2); + unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0; + unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0; + bool Failure = false, CanUp = true, CanDown = true; + for (unsigned X = MinX+1; X < MaxX; X++) { + const DefUseInfo &DU = DUM.lookup(X); + if (DU.Defs[PR] || DU.Defs[DR] || DU.Uses[DR]) { + Failure = true; + break; + } + if (CanDown && DU.Defs[SR1]) + CanDown = false; + if (CanUp && DU.Defs[SR2]) + CanUp = false; + } + if (Failure || (!CanUp && !CanDown)) + continue; + + MachineOperand *SrcT = (MinX == CI.TrueX) ? Src1 : Src2; + MachineOperand *SrcF = (MinX == CI.FalseX) ? Src1 : Src2; + // Prefer "down", since this will move the MUX farther away from the + // predicate definition. + MachineBasicBlock::iterator At = CanDown ? Def2 : Def1; + ML.push_back(MuxInfo(At, DR, PR, SrcT, SrcF, Def1, Def2)); + } + + for (unsigned I = 0, N = ML.size(); I < N; ++I) { + MuxInfo &MX = ML[I]; + MachineBasicBlock &B = *MX.At->getParent(); + DebugLoc DL = MX.At->getDebugLoc(); + unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF); + if (!MxOpc) + continue; + BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR) + .addReg(MX.PredR) + .addOperand(*MX.SrcT) + .addOperand(*MX.SrcF); + B.erase(MX.Def1); + B.erase(MX.Def2); + Changed = true; + } + + return Changed; +} + +bool HexagonGenMux::runOnMachineFunction(MachineFunction &MF) { + HII = MF.getSubtarget().getInstrInfo(); + HRI = MF.getSubtarget().getRegisterInfo(); + bool Changed = false; + for (auto &I : MF) + Changed |= genMuxInBlock(I); + return Changed; +} + +FunctionPass *llvm::createHexagonGenMux() { + return new HexagonGenMux(); +} + diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp index 6905c4f6d125..d9675b5173d2 100644 --- a/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -250,7 +250,7 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) { unsigned NewPR = MRI->createVirtualRegister(PredRC); // For convertible instructions, do not modify them, so that they can - // be coverted later. Generate a copy from Reg to NewPR. + // be converted later. Generate a copy from Reg to NewPR. if (isConvertibleToPredForm(DefI)) { MachineBasicBlock::iterator DefIt = DefI; BuildMI(B, std::next(DefIt), DL, TII->get(TargetOpcode::COPY), NewPR) diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 53b6bf617e8f..d20a809d6c09 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -727,9 +727,9 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, // Phis that may feed into the loop. LoopFeederMap LoopFeederPhi; - // Check if the inital value may be zero and can be decremented in the first + // Check if the initial value may be zero and can be decremented in the first // iteration. If the value is zero, the endloop instruction will not decrement - // the loop counter, so we shoudn't generate a hardware loop in this case. + // the loop counter, so we shouldn't generate a hardware loop in this case. if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop, LoopFeederPhi)) return nullptr; @@ -1288,14 +1288,14 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, typedef MachineBasicBlock::instr_iterator instr_iterator; // Check if things are in order to begin with. - for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I) + for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I) if (&*I == CmpI) return true; // Out of order. unsigned PredR = CmpI->getOperand(0).getReg(); bool FoundBump = false; - instr_iterator CmpIt = CmpI, NextIt = std::next(CmpIt); + instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt); for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) { MachineInstr *In = &*I; for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) { @@ -1307,9 +1307,7 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, } if (In == BumpI) { - instr_iterator After = BumpI; - instr_iterator From = CmpI; - BB->splice(std::next(After), BB, From); + BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator()); FoundBump = true; break; } @@ -1440,7 +1438,7 @@ bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow( if (Comparison::isSigned(Cmp)) return false; - // Check if there is a comparison of the inital value. If the initial value + // Check if there is a comparison of the initial value. If the initial value // is greater than or not equal to another value, then assume this is a // range check. if ((Cmp & Comparison::G) || Cmp == Comparison::NE) @@ -1850,7 +1848,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( } MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock(); - MF->insert(Header, NewPH); + MF->insert(Header->getIterator(), NewPH); if (Header->pred_size() > 2) { // Ensure that the header has only two predecessors: the preheader and diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 9123057e60d1..a0da945e7572 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -50,16 +50,21 @@ namespace { class HexagonDAGToDAGISel : public SelectionDAGISel { const HexagonTargetMachine& HTM; const HexagonSubtarget *HST; + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; public: explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), HTM(tm) { + : SelectionDAGISel(tm, OptLevel), HTM(tm), HST(nullptr), HII(nullptr), + HRI(nullptr) { initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override { // Reset the subtarget each time through. HST = &MF.getSubtarget(); + HII = HST->getInstrInfo(); + HRI = HST->getRegisterInfo(); SelectionDAGISel::runOnMachineFunction(MF); return true; } @@ -104,7 +109,6 @@ public: SDNode *SelectConstantFP(SDNode *N); SDNode *SelectAdd(SDNode *N); SDNode *SelectBitOp(SDNode *N); - bool isConstExtProfitable(SDNode *N) const; // XformMskToBitPosU5Imm - Returns the bit position which // the single bit 32 bit mask represents. @@ -139,8 +143,8 @@ public: // type i32 where the negative literal is transformed into a positive literal // for use in -= memops. inline SDValue XformM5ToU5Imm(signed Imm, SDLoc DL) { - assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); - return CurDAG->getTargetConstant( - Imm, DL, MVT::i32); + assert((Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); + return CurDAG->getTargetConstant(-Imm, DL, MVT::i32); } // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range @@ -203,11 +207,10 @@ void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) { // Intrinsics that return a a predicate. -static unsigned doesIntrinsicReturnPredicate(unsigned ID) -{ +static bool doesIntrinsicReturnPredicate(unsigned ID) { switch (ID) { default: - return 0; + return false; case Intrinsic::hexagon_C2_cmpeq: case Intrinsic::hexagon_C2_cmpgt: case Intrinsic::hexagon_C2_cmpgtu: @@ -244,7 +247,7 @@ static unsigned doesIntrinsicReturnPredicate(unsigned ID) case Intrinsic::hexagon_C2_tfrrp: case Intrinsic::hexagon_S2_tstbit_i: case Intrinsic::hexagon_S2_tstbit_r: - return 1; + return true; } } @@ -258,8 +261,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD, SDNode *OffsetNode = Offset.getNode(); int32_t Val = cast(OffsetNode)->getSExtValue(); - const HexagonInstrInfo &TII = *HST->getInstrInfo(); - if (TII.isValidAutoIncImm(LoadedVT, Val)) { + if (HII->isValidAutoIncImm(LoadedVT, Val)) { SDValue TargetConst = CurDAG->getTargetConstant(Val, dl, MVT::i32); SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, MVT::Other, Base, TargetConst, @@ -312,8 +314,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD, SDNode *OffsetNode = Offset.getNode(); int32_t Val = cast(OffsetNode)->getSExtValue(); - const HexagonInstrInfo &TII = *HST->getInstrInfo(); - if (TII.isValidAutoIncImm(LoadedVT, Val)) { + if (HII->isValidAutoIncImm(LoadedVT, Val)) { SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, @@ -378,29 +379,46 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { // loads. ISD::LoadExtType ExtType = LD->getExtensionType(); bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD); + bool HasVecOffset = false; // Figure out the opcode. - const HexagonInstrInfo &TII = *HST->getInstrInfo(); if (LoadedVT == MVT::i64) { - if (TII.isValidAutoIncImm(LoadedVT, Val)) + if (HII->isValidAutoIncImm(LoadedVT, Val)) Opcode = Hexagon::L2_loadrd_pi; else Opcode = Hexagon::L2_loadrd_io; } else if (LoadedVT == MVT::i32) { - if (TII.isValidAutoIncImm(LoadedVT, Val)) + if (HII->isValidAutoIncImm(LoadedVT, Val)) Opcode = Hexagon::L2_loadri_pi; else Opcode = Hexagon::L2_loadri_io; } else if (LoadedVT == MVT::i16) { - if (TII.isValidAutoIncImm(LoadedVT, Val)) + if (HII->isValidAutoIncImm(LoadedVT, Val)) Opcode = IsZeroExt ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi; else Opcode = IsZeroExt ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io; } else if (LoadedVT == MVT::i8) { - if (TII.isValidAutoIncImm(LoadedVT, Val)) + if (HII->isValidAutoIncImm(LoadedVT, Val)) Opcode = IsZeroExt ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi; else Opcode = IsZeroExt ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io; + } else if (LoadedVT == MVT::v16i32 || LoadedVT == MVT::v8i64 || + LoadedVT == MVT::v32i16 || LoadedVT == MVT::v64i8) { + HasVecOffset = true; + if (HII->isValidAutoIncImm(LoadedVT, Val)) { + Opcode = Hexagon::V6_vL32b_pi; + } + else + Opcode = Hexagon::V6_vL32b_ai; + // 128B + } else if (LoadedVT == MVT::v32i32 || LoadedVT == MVT::v16i64 || + LoadedVT == MVT::v64i16 || LoadedVT == MVT::v128i8) { + HasVecOffset = true; + if (HII->isValidAutoIncImm(LoadedVT, Val)) { + Opcode = Hexagon::V6_vL32b_pi_128B; + } + else + Opcode = Hexagon::V6_vL32b_ai_128B; } else llvm_unreachable("unknown memory type"); @@ -411,7 +429,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { if (LD->getValueType(0) == MVT::i64 && ExtType == ISD::SEXTLOAD) return SelectIndexedLoadSignExtend64(LD, Opcode, dl); - if (TII.isValidAutoIncImm(LoadedVT, Val)) { + if (HII->isValidAutoIncImm(LoadedVT, Val)) { SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32); SDNode* Result = CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), @@ -420,15 +438,25 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) { MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = LD->getMemOperand(); cast(Result)->setMemRefs(MemOp, MemOp + 1); - const SDValue Froms[] = { SDValue(LD, 0), - SDValue(LD, 1), - SDValue(LD, 2) - }; - const SDValue Tos[] = { SDValue(Result, 0), - SDValue(Result, 1), - SDValue(Result, 2) - }; - ReplaceUses(Froms, Tos, 3); + if (HasVecOffset) { + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Result, 2) + }; + ReplaceUses(Froms, Tos, 2); + } else { + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Result, 1), + SDValue(Result, 2) + }; + ReplaceUses(Froms, Tos, 3); + } return Result; } else { SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); @@ -487,8 +515,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { // Offset value must be within representable range // and must have correct alignment properties. - const HexagonInstrInfo &TII = *HST->getInstrInfo(); - if (TII.isValidAutoIncImm(StoredVT, Val)) { + if (HII->isValidAutoIncImm(StoredVT, Val)) { unsigned Opcode = 0; // Figure out the post inc version of opcode. @@ -496,7 +523,15 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_pi; else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_pi; else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi; - else llvm_unreachable("unknown memory type"); + else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 || + StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8) { + Opcode = Hexagon::V6_vS32b_pi; + } + // 128B + else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 || + StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8) { + Opcode = Hexagon::V6_vS32b_pi_128B; + } else llvm_unreachable("unknown memory type"); if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) { assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store"); @@ -530,6 +565,13 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) { else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io; else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io; else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io; + else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 || + StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8) + Opcode = Hexagon::V6_vS32b_ai; + // 128B + else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 || + StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8) + Opcode = Hexagon::V6_vS32b_ai_128B; else llvm_unreachable("unknown memory type"); // Build regular store. @@ -1113,14 +1155,12 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { } if (Opc == ISD::AND) { - if (((ValueVT == MVT::i32) && - (!((Val & 0x80000000) || (Val & 0x7fffffff)))) || - ((ValueVT == MVT::i64) && - (!((Val & 0x8000000000000000) || (Val & 0x7fffffff))))) - // If it's simple AND, do the normal op. - return SelectCode(N); - else + // Check if this is a bit-clearing AND, if not select code the usual way. + if ((ValueVT == MVT::i32 && isPowerOf2_32(~Val)) || + (ValueVT == MVT::i64 && isPowerOf2_64(~Val))) Val = ~Val; + else + return SelectCode(N); } // If OR or AND is being fed by shl, srl and, sra don't do this change, @@ -1128,7 +1168,8 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { // Traverse the DAG to see if there is shl, srl and sra. if (Opc == ISD::OR || Opc == ISD::AND) { switch (N->getOperand(0)->getOpcode()) { - default: break; + default: + break; case ISD::SRA: case ISD::SRL: case ISD::SHL: @@ -1137,23 +1178,24 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { } // Make sure it's power of 2. - unsigned bitpos = 0; + unsigned BitPos = 0; if (Opc != ISD::FABS && Opc != ISD::FNEG) { - if (((ValueVT == MVT::i32) && !isPowerOf2_32(Val)) || - ((ValueVT == MVT::i64) && !isPowerOf2_64(Val))) + if ((ValueVT == MVT::i32 && !isPowerOf2_32(Val)) || + (ValueVT == MVT::i64 && !isPowerOf2_64(Val))) return SelectCode(N); // Get the bit position. - bitpos = countTrailingZeros(uint64_t(Val)); + BitPos = countTrailingZeros(uint64_t(Val)); } else { // For fabs and fneg, it's always the 31st bit. - bitpos = 31; + BitPos = 31; } unsigned BitOpc = 0; // Set the right opcode for bitwise operations. - switch(Opc) { - default: llvm_unreachable("Only bit-wise/abs/neg operations are allowed."); + switch (Opc) { + default: + llvm_unreachable("Only bit-wise/abs/neg operations are allowed."); case ISD::AND: case ISD::FABS: BitOpc = Hexagon::S2_clrbit_i; @@ -1169,7 +1211,7 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { SDNode *Result; // Get the right SDVal for the opcode. - SDValue SDVal = CurDAG->getTargetConstant(bitpos, dl, MVT::i32); + SDValue SDVal = CurDAG->getTargetConstant(BitPos, dl, MVT::i32); if (ValueVT == MVT::i32 || ValueVT == MVT::f32) { Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT, @@ -1198,7 +1240,7 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { MVT::i32, SDValue(Reg, 0)); // Clear/set/toggle hi or lo registers depending on the bit position. - if (SubValueVT != MVT::f32 && bitpos < 32) { + if (SubValueVT != MVT::f32 && BitPos < 32) { SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT, SubregLO, SDVal); const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx, @@ -1207,7 +1249,7 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) { dl, ValueVT, Ops); } else { if (Opc != ISD::FABS && Opc != ISD::FNEG) - SDVal = CurDAG->getTargetConstant(bitpos - 32, dl, MVT::i32); + SDVal = CurDAG->getTargetConstant(BitPos-32, dl, MVT::i32); SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT, SubregHI, SDVal); const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx, @@ -1328,25 +1370,12 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, return false; } -bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const { - unsigned UseCount = 0; - unsigned CallCount = 0; - for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { - // Ignore call instructions. - if (I->getOpcode() == ISD::CopyToReg) - ++CallCount; - UseCount++; - } - - return (UseCount <= 1) || (CallCount > 1); - -} void HexagonDAGToDAGISel::PreprocessISelDAG() { SelectionDAG &DAG = *CurDAG; std::vector Nodes; - for (auto I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) - Nodes.push_back(I); + for (SDNode &Node : DAG.allnodes()) + Nodes.push_back(&Node); // Simplify: (or (select c x 0) z) -> (select c (or x z) z) // (or (select c 0 y) z) -> (select c z (or y z)) @@ -1397,11 +1426,10 @@ void HexagonDAGToDAGISel::EmitFunctionEntryCode() { return; MachineFrameInfo *MFI = MF->getFrameInfo(); - MachineBasicBlock *EntryBB = MF->begin(); + MachineBasicBlock *EntryBB = &MF->front(); unsigned AR = FuncInfo->CreateReg(MVT::i32); unsigned MaxA = MFI->getMaxAlignment(); - auto &HII = *HST.getInstrInfo(); - BuildMI(EntryBB, DebugLoc(), HII.get(Hexagon::ALIGNA), AR) + BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::ALIGNA), AR) .addImm(MaxA); MF->getInfo()->setStackAlignBaseVReg(AR); } diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index c739afb70c15..01670902e2b0 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -41,8 +41,8 @@ using namespace llvm; #define DEBUG_TYPE "hexagon-lowering" -static cl::opt -EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden, +static cl::opt EmitJumpTables("hexagon-emit-jump-tables", + cl::init(true), cl::Hidden, cl::desc("Control jump table emission on Hexagon target")); static cl::opt EnableHexSDNodeSched("enable-hexagon-sdnode-sched", @@ -98,6 +98,9 @@ public: } // Implement calling convention for Hexagon. + +static bool IsHvxVectorType(MVT ty); + static bool CC_Hexagon(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, @@ -113,6 +116,11 @@ CC_Hexagon64(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); +static bool +CC_HexagonVector(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, @@ -128,6 +136,11 @@ RetCC_Hexagon64(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); +static bool +RetCC_HexagonVector(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + static bool CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, @@ -169,15 +182,43 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); return false; } + if (LocVT == MVT::v2i64 || LocVT == MVT::v4i32 || LocVT == MVT::v8i16 || + LocVT == MVT::v16i8) { + ofst = State.AllocateStack(16, 16); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::v4i64 || LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || + LocVT == MVT::v32i8) { + ofst = State.AllocateStack(32, 32); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 || + LocVT == MVT::v64i8 || LocVT == MVT::v512i1) { + ofst = State.AllocateStack(64, 64); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || + LocVT == MVT::v128i8 || LocVT == MVT::v1024i1) { + ofst = State.AllocateStack(128, 128); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 || + LocVT == MVT::v256i8) { + ofst = State.AllocateStack(256, 256); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + llvm_unreachable(nullptr); } -static bool -CC_Hexagon (unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - +static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { if (ArgFlags.isByVal()) { // Passed on stack. unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), @@ -213,6 +254,17 @@ CC_Hexagon (unsigned ValNo, MVT ValVT, return false; } + if (LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || LocVT == MVT::v32i8) { + unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 32); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + + if (IsHvxVectorType(LocVT)) { + if (!CC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + return true; // CC didn't match. } @@ -260,10 +312,82 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT, return false; } +static bool CC_HexagonVector(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + static const MCPhysReg VecLstS[] = { Hexagon::V0, Hexagon::V1, + Hexagon::V2, Hexagon::V3, + Hexagon::V4, Hexagon::V5, + Hexagon::V6, Hexagon::V7, + Hexagon::V8, Hexagon::V9, + Hexagon::V10, Hexagon::V11, + Hexagon::V12, Hexagon::V13, + Hexagon::V14, Hexagon::V15}; + static const MCPhysReg VecLstD[] = { Hexagon::W0, Hexagon::W1, + Hexagon::W2, Hexagon::W3, + Hexagon::W4, Hexagon::W5, + Hexagon::W6, Hexagon::W7}; + auto &MF = State.getMachineFunction(); + auto &HST = MF.getSubtarget(); + bool UseHVX = HST.useHVXOps(); + bool UseHVXDbl = HST.useHVXDblOps(); + + if ((UseHVX && !UseHVXDbl) && + (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 || + LocVT == MVT::v64i8 || LocVT == MVT::v512i1)) { + if (unsigned Reg = State.AllocateReg(VecLstS)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + unsigned Offset = State.AllocateStack(64, 64); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + if ((UseHVX && !UseHVXDbl) && + (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || + LocVT == MVT::v128i8)) { + if (unsigned Reg = State.AllocateReg(VecLstD)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + unsigned Offset = State.AllocateStack(128, 128); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + // 128B Mode + if ((UseHVX && UseHVXDbl) && + (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 || + LocVT == MVT::v256i8)) { + if (unsigned Reg = State.AllocateReg(VecLstD)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + unsigned Offset = State.AllocateStack(256, 256); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + if ((UseHVX && UseHVXDbl) && + (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || + LocVT == MVT::v128i8 || LocVT == MVT::v1024i1)) { + if (unsigned Reg = State.AllocateReg(VecLstS)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + unsigned Offset = State.AllocateStack(128, 128); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + return true; +} + static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - + auto &MF = State.getMachineFunction(); + auto &HST = MF.getSubtarget(); + bool UseHVX = HST.useHVXOps(); + bool UseHVXDbl = HST.useHVXDblOps(); if (LocVT == MVT::i1 || LocVT == MVT::i8 || @@ -282,8 +406,24 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) { LocVT = MVT::i64; LocInfo = CCValAssign::BCvt; + } else if (LocVT == MVT::v64i8 || LocVT == MVT::v32i16 || + LocVT == MVT::v16i32 || LocVT == MVT::v8i64 || + LocVT == MVT::v512i1) { + LocVT = MVT::v16i32; + ValVT = MVT::v16i32; + LocInfo = CCValAssign::Full; + } else if (LocVT == MVT::v128i8 || LocVT == MVT::v64i16 || + LocVT == MVT::v32i32 || LocVT == MVT::v16i64 || + (LocVT == MVT::v1024i1 && UseHVX && UseHVXDbl)) { + LocVT = MVT::v32i32; + ValVT = MVT::v32i32; + LocInfo = CCValAssign::Full; + } else if (LocVT == MVT::v256i8 || LocVT == MVT::v128i16 || + LocVT == MVT::v64i32 || LocVT == MVT::v32i64) { + LocVT = MVT::v64i32; + ValVT = MVT::v64i32; + LocInfo = CCValAssign::Full; } - if (LocVT == MVT::i32 || LocVT == MVT::f32) { if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) return false; @@ -293,7 +433,10 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) return false; } - + if (LocVT == MVT::v16i32 || LocVT == MVT::v32i32 || LocVT == MVT::v64i32) { + if (!RetCC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } return true; // CC didn't match. } @@ -328,6 +471,52 @@ static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT, return false; } +static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + auto &MF = State.getMachineFunction(); + auto &HST = MF.getSubtarget(); + bool UseHVX = HST.useHVXOps(); + bool UseHVXDbl = HST.useHVXDblOps(); + + unsigned OffSiz = 64; + if (LocVT == MVT::v16i32) { + if (unsigned Reg = State.AllocateReg(Hexagon::V0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } else if (LocVT == MVT::v32i32) { + unsigned Req = (UseHVX && UseHVXDbl) ? Hexagon::V0 : Hexagon::W0; + if (unsigned Reg = State.AllocateReg(Req)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + OffSiz = 128; + } else if (LocVT == MVT::v64i32) { + if (unsigned Reg = State.AllocateReg(Hexagon::W0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + OffSiz = 256; + } + + unsigned Offset = State.AllocateStack(OffSiz, OffSiz); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) { + if (VT != PromotedLdStVT) { + setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); + AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), + PromotedLdStVT.getSimpleVT()); + + setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); + AddPromotedToType(ISD::STORE, VT.getSimpleVT(), + PromotedLdStVT.getSimpleVT()); + } +} + SDValue HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { @@ -351,6 +540,15 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, MachinePointerInfo(), MachinePointerInfo()); } +static bool IsHvxVectorType(MVT ty) { + return (ty == MVT::v8i64 || ty == MVT::v16i32 || ty == MVT::v32i16 || + ty == MVT::v64i8 || + ty == MVT::v16i64 || ty == MVT::v32i32 || ty == MVT::v64i16 || + ty == MVT::v128i8 || + ty == MVT::v32i64 || ty == MVT::v64i32 || ty == MVT::v128i16 || + ty == MVT::v256i8 || + ty == MVT::v512i1 || ty == MVT::v1024i1); +} // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is // passed by value, the function prototype is modified to return void and @@ -463,19 +661,15 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Check for varargs. int NumNamedVarArgParams = -1; - if (GlobalAddressSDNode *GA = dyn_cast(Callee)) - { - const Function* CalleeFn = nullptr; - Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, MVT::i32); - if ((CalleeFn = dyn_cast(GA->getGlobal()))) - { + if (GlobalAddressSDNode *GAN = dyn_cast(Callee)) { + const GlobalValue *GV = GAN->getGlobal(); + Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); + if (const Function* F = dyn_cast(GV)) { // If a function has zero args and is a vararg function, that's // disallowed so it must be an undeclared function. Do not assume // varargs if the callee is undefined. - if (CalleeFn->isVarArg() && - CalleeFn->getFunctionType()->getNumParams() != 0) { - NumNamedVarArgParams = CalleeFn->getFunctionType()->getNumParams(); - } + if (F->isVarArg() && F->getFunctionType()->getNumParams() != 0) + NumNamedVarArgParams = F->getFunctionType()->getNumParams(); } } @@ -519,11 +713,16 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT); + bool NeedsArgAlign = false; + unsigned LargestAlignSeen = 0; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; + // Record if we need > 8 byte alignment on an argument. + bool ArgAlign = IsHvxVectorType(VA.getValVT()); + NeedsArgAlign |= ArgAlign; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -549,13 +748,17 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue MemAddr = DAG.getConstant(LocMemOffset, dl, StackPtr.getValueType()); MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr); + if (ArgAlign) + LargestAlignSeen = std::max(LargestAlignSeen, + VA.getLocVT().getStoreSizeInBits() >> 3); if (Flags.isByVal()) { // The argument is a struct passed by value. According to LLVM, "Arg" // is is pointer. MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain, Flags, DAG, dl)); } else { - MachinePointerInfo LocPI = MachinePointerInfo::getStack(LocMemOffset); + MachinePointerInfo LocPI = MachinePointerInfo::getStack( + DAG.getMachineFunction(), LocMemOffset); SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI, false, false, 0); MemOpChains.push_back(S); @@ -569,6 +772,17 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } + if (NeedsArgAlign && Subtarget.hasV60TOps()) { + DEBUG(dbgs() << "Function needs byte stack align due to call args\n"); + MachineFrameInfo* MFI = DAG.getMachineFunction().getFrameInfo(); + // V6 vectors passed by value have 64 or 128 byte alignment depending + // on whether we are 64 byte vector mode or 128 byte. + bool UseHVXDbl = Subtarget.useHVXDblOps(); + assert(Subtarget.useHVXOps()); + const unsigned ObjAlign = UseHVXDbl ? 128 : 64; + LargestAlignSeen = std::max(LargestAlignSeen, ObjAlign); + MFI->ensureMaxAlignment(LargestAlignSeen); + } // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) @@ -613,12 +827,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. - if (flag_aligned_memcpy) { - const char *MemcpyName = - "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes"; - Callee = DAG.getTargetExternalSymbol(MemcpyName, PtrVT); - flag_aligned_memcpy = false; - } else if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT); } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { @@ -668,7 +877,19 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT, if (Ptr->getOpcode() != ISD::ADD) return false; - if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) { + auto &HST = static_cast(DAG.getSubtarget()); + bool UseHVX = HST.useHVXOps(); + bool UseHVXDbl = HST.useHVXDblOps(); + + bool ValidHVXDblType = + (UseHVX && UseHVXDbl) && (VT == MVT::v32i32 || VT == MVT::v16i64 || + VT == MVT::v64i16 || VT == MVT::v128i8); + bool ValidHVXType = + UseHVX && !UseHVXDbl && (VT == MVT::v16i32 || VT == MVT::v8i64 || + VT == MVT::v32i16 || VT == MVT::v64i8); + + if (ValidHVXDblType || ValidHVXType || + VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) { isInc = (Ptr->getOpcode() == ISD::ADD); Base = Ptr->getOperand(0); Offset = Ptr->getOperand(1); @@ -679,23 +900,6 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT, return false; } -// TODO: Put this function along with the other isS* functions in -// HexagonISelDAGToDAG.cpp into a common file. Or better still, use the -// functions defined in HexagonOperands.td. -static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) { - ConstantSDNode *N = cast(S); - - // immS4 predicate - True if the immediate fits in a 4-bit sign extended. - // field. - int64_t v = (int64_t)N->getSExtValue(); - int64_t m = 0; - if (ShiftAmount > 0) { - m = v % ShiftAmount; - v = v >> ShiftAmount; - } - return (v <= 7) && (v >= -8) && (m == 0); -} - /// getPostIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if this node can be /// combined with a load / store to form a post-indexed load / store. @@ -724,18 +928,20 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, bool isInc = false; bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); - // ShiftAmount = number of left-shifted bits in the Hexagon instruction. - int ShiftAmount = VT.getSizeInBits() / 16; - if (isLegal && Is_PostInc_S4_Offset(Offset.getNode(), ShiftAmount)) { - AM = isInc ? ISD::POST_INC : ISD::POST_DEC; - return true; + if (isLegal) { + auto &HII = *Subtarget.getInstrInfo(); + int32_t OffsetVal = cast(Offset.getNode())->getSExtValue(); + if (HII.isValidAutoIncImm(VT, OffsetVal)) { + AM = isInc ? ISD::POST_INC : ISD::POST_DEC; + return true; + } } return false; } -SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op, - SelectionDAG &DAG) const { +SDValue +HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); MachineFunction &MF = DAG.getMachineFunction(); auto &FuncInfo = *MF.getInfo(); @@ -784,47 +990,6 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op, return Op; } - -// -// Taken from the XCore backend. -// -SDValue HexagonTargetLowering:: -LowerBR_JT(SDValue Op, SelectionDAG &DAG) const -{ - SDValue Chain = Op.getOperand(0); - SDValue Table = Op.getOperand(1); - SDValue Index = Op.getOperand(2); - SDLoc dl(Op); - JumpTableSDNode *JT = cast(Table); - unsigned JTI = JT->getIndex(); - MachineFunction &MF = DAG.getMachineFunction(); - const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); - SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); - - // Mark all jump table targets as address taken. - const std::vector &JTE = MJTI->getJumpTables(); - const std::vector &JTBBs = JTE[JTI].MBBs; - for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { - MachineBasicBlock *MBB = JTBBs[i]; - MBB->setHasAddressTaken(); - // This line is needed to set the hasAddressTaken flag on the BasicBlock - // object. - BlockAddress::get(const_cast(MBB->getBasicBlock())); - } - - SDValue JumpTableBase = DAG.getNode( - HexagonISD::JT, dl, getPointerTy(DAG.getDataLayout()), TargetJT); - SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index, - DAG.getConstant(2, dl, MVT::i32)); - SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase, - ShiftIndex); - SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress, - MachinePointerInfo(), false, false, false, - 0); - return DAG.getNode(HexagonISD::BR_JT, dl, MVT::Other, Chain, LoadTarget); -} - - SDValue HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { @@ -850,7 +1015,10 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue AC = DAG.getConstant(A, dl, MVT::i32); SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); - return DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC); + SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC); + if (Op.getNode()->getHasDebugValue()) + DAG.TransferDbgValues(Op, AA); + return AA; } SDValue @@ -882,7 +1050,8 @@ const { // equal to) 8 bytes. If not, no address will be passed into callee and // callee return the result direclty through R0/R1. - SmallVector MemOps; + SmallVector MemOps; + bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -908,6 +1077,42 @@ const { RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + + // Single Vector + } else if ((RegVT == MVT::v8i64 || RegVT == MVT::v16i32 || + RegVT == MVT::v32i16 || RegVT == MVT::v64i8)) { + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::VectorRegsRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else if (UseHVX && UseHVXDbl && + ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 || + RegVT == MVT::v64i16 || RegVT == MVT::v128i8))) { + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::VectorRegs128BRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + + // Double Vector + } else if ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 || + RegVT == MVT::v64i16 || RegVT == MVT::v128i8)) { + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::VecDblRegsRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else if (UseHVX && UseHVXDbl && + ((RegVT == MVT::v32i64 || RegVT == MVT::v64i32 || + RegVT == MVT::v128i16 || RegVT == MVT::v256i8))) { + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::VecDblRegs128BRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else if (RegVT == MVT::v512i1 || RegVT == MVT::v1024i1) { + assert(0 && "need to support VecPred regs"); + unsigned VReg = + RegInfo.createVirtualRegister(&Hexagon::VecPredRegsRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); } else { assert (0); } @@ -1056,8 +1261,8 @@ SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) - const { +SDValue +HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue PredOp = Op.getOperand(0); SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2); EVT OpVT = Op1.getValueType(); @@ -1163,16 +1368,33 @@ SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT ValTy = Op.getValueType(); - SDLoc dl(Op); - ConstantPoolSDNode *CP = cast(Op); - SDValue Res; - if (CP->isMachineConstantPoolEntry()) - Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), ValTy, - CP->getAlignment()); + ConstantPoolSDNode *CPN = cast(Op); + unsigned Align = CPN->getAlignment(); + Reloc::Model RM = HTM.getRelocationModel(); + unsigned char TF = (RM == Reloc::PIC_) ? HexagonII::MO_PCREL : 0; + + SDValue T; + if (CPN->isMachineConstantPoolEntry()) + T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, TF); else - Res = DAG.getTargetConstantPool(CP->getConstVal(), ValTy, - CP->getAlignment()); - return DAG.getNode(HexagonISD::CP, dl, ValTy, Res); + T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, TF); + if (RM == Reloc::PIC_) + return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T); + return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T); +} + +SDValue +HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + int Idx = cast(Op)->getIndex(); + Reloc::Model RM = HTM.getRelocationModel(); + if (RM == Reloc::PIC_) { + SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL); + return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T); + } + + SDValue T = DAG.getTargetJumpTable(Idx, VT); + return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T); } SDValue @@ -1219,52 +1441,70 @@ HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } -SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, - SelectionDAG& DAG) const { +SDValue +HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const { SDLoc dl(Op); return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0)); } -SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, - SelectionDAG &DAG) const { - SDValue Result; - const GlobalValue *GV = cast(Op)->getGlobal(); - int64_t Offset = cast(Op)->getOffset(); +SDValue +HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); + auto *GAN = cast(Op); auto PtrVT = getPointerTy(DAG.getDataLayout()); - Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset); + auto *GV = GAN->getGlobal(); + int64_t Offset = GAN->getOffset(); - const HexagonTargetObjectFile *TLOF = - static_cast( - getTargetMachine().getObjFileLowering()); - if (TLOF->IsGlobalInSmallSection(GV, getTargetMachine())) { - return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, Result); + auto &HLOF = *HTM.getObjFileLowering(); + Reloc::Model RM = HTM.getRelocationModel(); + + if (RM == Reloc::Static) { + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset); + if (HLOF.IsGlobalInSmallSection(GV, HTM)) + return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA); + return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA); } - return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, Result); + bool UsePCRel = GV->hasInternalLinkage() || GV->hasHiddenVisibility() || + (GV->hasLocalLinkage() && !isa(GV)); + if (UsePCRel) { + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset, + HexagonII::MO_PCREL); + return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA); + } + + // Use GOT index. + SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT); + SDValue Off = DAG.getConstant(Offset, dl, MVT::i32); + return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off); } // Specifies that for loads and stores VT can be promoted to PromotedLdStVT. -void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) { - if (VT != PromotedLdStVT) { - setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); - AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), - PromotedLdStVT.getSimpleVT()); - - setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); - AddPromotedToType(ISD::STORE, VT.getSimpleVT(), - PromotedLdStVT.getSimpleVT()); - } -} - SDValue HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { const BlockAddress *BA = cast(Op)->getBlockAddress(); - SDValue BA_SD = DAG.getTargetBlockAddress(BA, MVT::i32); SDLoc dl(Op); - return DAG.getNode(HexagonISD::CONST32_GP, dl, - getPointerTy(DAG.getDataLayout()), BA_SD); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + Reloc::Model RM = HTM.getRelocationModel(); + if (RM == Reloc::Static) { + SDValue A = DAG.getTargetBlockAddress(BA, PtrVT); + return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A); + } + + SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL); + return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A); +} + +SDValue +HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) + const { + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT, + HexagonII::MO_PCREL); + return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym); } //===----------------------------------------------------------------------===// @@ -1272,18 +1512,19 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { //===----------------------------------------------------------------------===// HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, - const HexagonSubtarget &STI) + const HexagonSubtarget &ST) : TargetLowering(TM), HTM(static_cast(TM)), - Subtarget(STI) { + Subtarget(ST) { bool IsV4 = !Subtarget.hasV5TOps(); auto &HRI = *Subtarget.getRegisterInfo(); + bool UseHVX = Subtarget.useHVXOps(); + bool UseHVXSgl = Subtarget.useHVXSglOps(); + bool UseHVXDbl = Subtarget.useHVXDblOps(); setPrefLoopAlignment(4); setPrefFunctionAlignment(4); setMinFunctionAlignment(2); setInsertFencesForAtomic(false); - setExceptionPointerRegister(Hexagon::R0); - setExceptionSelectorRegister(Hexagon::R1); setStackPointerRegisterToSaveRestore(HRI.getStackRegister()); if (EnableHexSDNodeSched) @@ -1320,6 +1561,31 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass); } + if (Subtarget.hasV60TOps()) { + if (Subtarget.useHVXSglOps()) { + addRegisterClass(MVT::v64i8, &Hexagon::VectorRegsRegClass); + addRegisterClass(MVT::v32i16, &Hexagon::VectorRegsRegClass); + addRegisterClass(MVT::v16i32, &Hexagon::VectorRegsRegClass); + addRegisterClass(MVT::v8i64, &Hexagon::VectorRegsRegClass); + addRegisterClass(MVT::v128i8, &Hexagon::VecDblRegsRegClass); + addRegisterClass(MVT::v64i16, &Hexagon::VecDblRegsRegClass); + addRegisterClass(MVT::v32i32, &Hexagon::VecDblRegsRegClass); + addRegisterClass(MVT::v16i64, &Hexagon::VecDblRegsRegClass); + addRegisterClass(MVT::v512i1, &Hexagon::VecPredRegsRegClass); + } else if (Subtarget.useHVXDblOps()) { + addRegisterClass(MVT::v128i8, &Hexagon::VectorRegs128BRegClass); + addRegisterClass(MVT::v64i16, &Hexagon::VectorRegs128BRegClass); + addRegisterClass(MVT::v32i32, &Hexagon::VectorRegs128BRegClass); + addRegisterClass(MVT::v16i64, &Hexagon::VectorRegs128BRegClass); + addRegisterClass(MVT::v256i8, &Hexagon::VecDblRegs128BRegClass); + addRegisterClass(MVT::v128i16, &Hexagon::VecDblRegs128BRegClass); + addRegisterClass(MVT::v64i32, &Hexagon::VecDblRegs128BRegClass); + addRegisterClass(MVT::v32i64, &Hexagon::VecDblRegs128BRegClass); + addRegisterClass(MVT::v1024i1, &Hexagon::VecPredRegs128BRegClass); + } + + } + // // Handling of scalar operations. // @@ -1336,10 +1602,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); setOperationAction(ISD::INLINEASM, MVT::Other, Custom); setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); + setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); // Custom legalize GlobalAddress nodes into CONST32. @@ -1361,11 +1629,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); if (EmitJumpTables) - setOperationAction(ISD::BR_JT, MVT::Other, Custom); + setMinimumJumpTableEntries(2); else - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - // Increase jump tables cutover to 5, was 4. - setMinimumJumpTableEntries(MinimumJumpTables); + setMinimumJumpTableEntries(MinimumJumpTables); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); // Hexagon has instructions for add/sub with carry. The problem with // modeling these instructions is that they produce 2 results: Rdd and Px. @@ -1420,9 +1687,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::MULHS, MVT::i64, Expand); for (unsigned IntExpOp : - {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, - ISD::ROTL, ISD::ROTR, ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS, - ISD::SRL_PARTS, ISD::SMUL_LOHI, ISD::UMUL_LOHI}) { + { ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, + ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR, + ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS, + ISD::SMUL_LOHI, ISD::UMUL_LOHI }) { setOperationAction(IntExpOp, MVT::i32, Expand); setOperationAction(IntExpOp, MVT::i64, Expand); } @@ -1475,7 +1743,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // Set the action for vector operations to "expand", then override it with // either "custom" or "legal" for specific cases. - static unsigned VectExpOps[] = { + static const unsigned VectExpOps[] = { // Integer arithmetic: ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::ADDC, @@ -1539,7 +1807,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VSELECT, MVT::v2i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); - + if (UseHVX) { + if (UseHVXSgl) { + setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64, Custom); + } else if (UseHVXDbl) { + setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64, Custom); + } else { + llvm_unreachable("Unrecognized HVX mode"); + } + } // Subtarget-specific operation actions. // if (Subtarget.hasV5TOps()) { @@ -1586,7 +1868,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, for (ISD::CondCode FPExpCCV4 : {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE, - ISD::SETUO, ISD::SETO}) { + ISD::SETUO, ISD::SETO}) { setCondCodeAction(FPExpCCV4, MVT::f32, Expand); setCondCodeAction(FPExpCCV4, MVT::f64, Expand); } @@ -1599,6 +1881,13 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setIndexedStoreAction(ISD::POST_INC, LSXTy, Legal); } + if (UseHVXDbl) { + for (MVT VT : {MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) { + setIndexedLoadAction(ISD::POST_INC, VT, Legal); + setIndexedStoreAction(ISD::POST_INC, VT, Legal); + } + } + computeRegisterProperties(&HRI); // @@ -1720,7 +2009,6 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT"; case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL"; case HexagonISD::BARRIER: return "HexagonISD::BARRIER"; - case HexagonISD::BR_JT: return "HexagonISD::BR_JT"; case HexagonISD::CALLR: return "HexagonISD::CALLR"; case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr"; case HexagonISD::CALLv3: return "HexagonISD::CALLv3"; @@ -1737,7 +2025,6 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::INSERTRP: return "HexagonISD::INSERTRP"; case HexagonISD::JT: return "HexagonISD::JT"; case HexagonISD::PACKHL: return "HexagonISD::PACKHL"; - case HexagonISD::PIC_ADD: return "HexagonISD::PIC_ADD"; case HexagonISD::POPCOUNT: return "HexagonISD::POPCOUNT"; case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB"; @@ -1754,6 +2041,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ"; case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT"; case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU"; + case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE"; case HexagonISD::VSHLH: return "HexagonISD::VSHLH"; case HexagonISD::VSHLW: return "HexagonISD::VSHLW"; case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB"; @@ -1923,8 +2211,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { unsigned Size = VT.getSizeInBits(); - // A vector larger than 64 bits cannot be represented in Hexagon. - // Expand will split the vector. + // Only handle vectors of 64 bits or shorter. if (Size > 64) return SDValue(); @@ -2058,58 +2345,61 @@ SDValue HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); + bool UseHVX = Subtarget.useHVXOps(); EVT VT = Op.getValueType(); unsigned NElts = Op.getNumOperands(); - SDValue Vec = Op.getOperand(0); - EVT VecVT = Vec.getValueType(); - SDValue Width = DAG.getConstant(VecVT.getSizeInBits(), dl, MVT::i64); - SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, - DAG.getConstant(32, dl, MVT::i64)); - SDValue ConstVal = DAG.getConstant(0, dl, MVT::i64); + SDValue Vec0 = Op.getOperand(0); + EVT VecVT = Vec0.getValueType(); + unsigned Width = VecVT.getSizeInBits(); - ConstantSDNode *W = dyn_cast(Width); - ConstantSDNode *S = dyn_cast(Shifted); + if (NElts == 2) { + MVT ST = VecVT.getSimpleVT(); + // We are trying to concat two v2i16 to a single v4i16, or two v4i8 + // into a single v8i8. + if (ST == MVT::v2i16 || ST == MVT::v4i8) + return DAG.getNode(HexagonISD::COMBINE, dl, VT, Op.getOperand(1), Vec0); - if ((VecVT.getSimpleVT() == MVT::v2i16) && (NElts == 2) && W && S) { - if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) { - // We are trying to concat two v2i16 to a single v4i16. - SDValue Vec0 = Op.getOperand(1); - SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec); - return DAG.getNode(ISD::BITCAST, dl, VT, Combined); + if (UseHVX) { + assert((Width == 64*8 && Subtarget.useHVXSglOps()) || + (Width == 128*8 && Subtarget.useHVXDblOps())); + SDValue Vec1 = Op.getOperand(1); + MVT OpTy = Subtarget.useHVXSglOps() ? MVT::v16i32 : MVT::v32i32; + MVT ReTy = Subtarget.useHVXSglOps() ? MVT::v32i32 : MVT::v64i32; + SDValue B0 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec0); + SDValue B1 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec1); + SDValue VC = DAG.getNode(HexagonISD::VCOMBINE, dl, ReTy, B1, B0); + return DAG.getNode(ISD::BITCAST, dl, VT, VC); } } - if ((VecVT.getSimpleVT() == MVT::v4i8) && (NElts == 2) && W && S) { - if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) { - // We are trying to concat two v4i8 to a single v8i8. - SDValue Vec0 = Op.getOperand(1); - SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec); - return DAG.getNode(ISD::BITCAST, dl, VT, Combined); - } - } + if (VT.getSizeInBits() != 32 && VT.getSizeInBits() != 64) + return SDValue(); + + SDValue C0 = DAG.getConstant(0, dl, MVT::i64); + SDValue C32 = DAG.getConstant(32, dl, MVT::i64); + SDValue W = DAG.getConstant(Width, dl, MVT::i64); + // Create the "width" part of the argument to insert_rp/insertp_rp. + SDValue S = DAG.getNode(ISD::SHL, dl, MVT::i64, W, C32); + SDValue V = C0; for (unsigned i = 0, e = NElts; i != e; ++i) { - unsigned OpIdx = NElts - i - 1; - SDValue Operand = Op.getOperand(OpIdx); + unsigned N = NElts-i-1; + SDValue OpN = Op.getOperand(N); - if (VT.getSizeInBits() == 64 && - Operand.getValueType().getSizeInBits() == 32) { + if (VT.getSizeInBits() == 64 && OpN.getValueType().getSizeInBits() == 32) { SDValue C = DAG.getConstant(0, dl, MVT::i32); - Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand); + OpN = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, OpN); } - - SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64); - SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width); - SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); - const SDValue Ops[] = {ConstVal, Operand, Combined}; - + SDValue Idx = DAG.getConstant(N, dl, MVT::i64); + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, W); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset); if (VT.getSizeInBits() == 32) - ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops); + V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or}); else - ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops); + V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or}); } - return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal); + return DAG.getNode(ISD::BITCAST, dl, VT, V); } SDValue @@ -2301,6 +2591,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SHL: case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); // Frame & Return address. Currently unimplemented. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); @@ -2308,8 +2599,8 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); - case ISD::BR_JT: return LowerBR_JT(Op, DAG); // Custom lower some vector loads. case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); @@ -2321,6 +2612,16 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { } } +/// Returns relocation base for the given PIC jumptable. +SDValue +HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const { + int Idx = cast(Table)->getIndex(); + EVT VT = Table.getValueType(); + SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL); + return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T); +} + MachineBasicBlock * HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) @@ -2343,6 +2644,8 @@ HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, std::pair HexagonTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { + bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps(); + if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': // R0-R31 @@ -2358,6 +2661,42 @@ HexagonTargetLowering::getRegForInlineAsmConstraint( case MVT::f64: return std::make_pair(0U, &Hexagon::DoubleRegsRegClass); } + case 'q': // q0-q3 + switch (VT.SimpleTy) { + default: + llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); + case MVT::v1024i1: + case MVT::v512i1: + case MVT::v32i16: + case MVT::v16i32: + case MVT::v64i8: + case MVT::v8i64: + return std::make_pair(0U, &Hexagon::VecPredRegsRegClass); + } + case 'v': // V0-V31 + switch (VT.SimpleTy) { + default: + llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); + case MVT::v16i32: + case MVT::v32i16: + case MVT::v64i8: + case MVT::v8i64: + return std::make_pair(0U, &Hexagon::VectorRegsRegClass); + case MVT::v32i32: + case MVT::v64i16: + case MVT::v16i64: + case MVT::v128i8: + if (Subtarget.hasV60TOps() && UseHVX && UseHVXDbl) + return std::make_pair(0U, &Hexagon::VectorRegs128BRegClass); + else + return std::make_pair(0U, &Hexagon::VecDblRegsRegClass); + case MVT::v256i8: + case MVT::v128i16: + case MVT::v64i32: + case MVT::v32i64: + return std::make_pair(0U, &Hexagon::VecDblRegs128BRegClass); + } + default: llvm_unreachable("Unknown asm register class"); } @@ -2397,6 +2736,14 @@ bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL, return true; } +/// Return true if folding a constant offset with the given GlobalAddress is +/// legal. It is frequently not legal in PIC relocation models. +bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) + const { + return HTM.getRelocationModel() == Reloc::Static; +} + + /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can compare /// a register against the immediate without having to materialize the @@ -2428,8 +2775,8 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization( // *************************************************************************** // If this is a tail call via a function pointer, then don't do it! - if (!(dyn_cast(Callee)) - && !(dyn_cast(Callee))) { + if (!(isa(Callee)) && + !(isa(Callee))) { return false; } @@ -2467,6 +2814,41 @@ bool llvm::isPositiveHalfWord(SDNode *N) { } } +std::pair +HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, + MVT VT) const { + const TargetRegisterClass *RRC = nullptr; + + uint8_t Cost = 1; + switch (VT.SimpleTy) { + default: + return TargetLowering::findRepresentativeClass(TRI, VT); + case MVT::v64i8: + case MVT::v32i16: + case MVT::v16i32: + case MVT::v8i64: + RRC = &Hexagon::VectorRegsRegClass; + break; + case MVT::v128i8: + case MVT::v64i16: + case MVT::v32i32: + case MVT::v16i64: + if (Subtarget.hasV60TOps() && Subtarget.useHVXOps() && + Subtarget.useHVXDblOps()) + RRC = &Hexagon::VectorRegs128BRegClass; + else + RRC = &Hexagon::VecDblRegsRegClass; + break; + case MVT::v256i8: + case MVT::v128i16: + case MVT::v64i32: + case MVT::v32i64: + RRC = &Hexagon::VecDblRegs128BRegClass; + break; + } + return std::make_pair(RRC, Cost); +} + Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { BasicBlock *BB = Builder.GetInsertBlock(); @@ -2498,13 +2880,15 @@ Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder, return Ext; } -bool HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { +TargetLowering::AtomicExpansionKind +HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { // Do not expand loads and stores that don't exceed 64 bits. - return LI->getType()->getPrimitiveSizeInBits() > 64; + return LI->getType()->getPrimitiveSizeInBits() > 64 + ? AtomicExpansionKind::LLOnly + : AtomicExpansionKind::None; } bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { // Do not expand loads and stores that don't exceed 64 bits. return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64; } - diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 2642abffaddd..bf378b922220 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -35,16 +35,14 @@ bool isPositiveHalfWord(SDNode *N); ALLOCA, ARGEXTEND, - PIC_ADD, - AT_GOT, - AT_PCREL, + AT_GOT, // Index in GOT. + AT_PCREL, // Offset relative to PC. CALLv3, // A V3+ call instruction. CALLv3nr, // A V3+ call instruction that doesn't return. CALLR, RET_FLAG, // Return with a flag operand. - BR_JT, // Branch through jump table. BARRIER, // Memory barrier. JT, // Jump table. CP, // Constant pool. @@ -80,6 +78,7 @@ bool isPositiveHalfWord(SDNode *N); INSERTRP, EXTRACTU, EXTRACTURP, + VCOMBINE, TC_RETURN, EH_RETURN, DCFETCH, @@ -127,7 +126,6 @@ bool isPositiveHalfWord(SDNode *N); SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const; @@ -137,6 +135,7 @@ bool isPositiveHalfWord(SDNode *N); SelectionDAG &DAG, SmallVectorImpl &InVals) const override; SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; @@ -163,8 +162,23 @@ bool isPositiveHalfWord(SDNode *N); MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const override; + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + unsigned + getExceptionPointerRegister(const Constant *PersonalityFn) const override { + return Hexagon::R0; + } + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + unsigned + getExceptionSelectorRegister(const Constant *PersonalityFn) const override { + return Hexagon::R1; + } + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override { if (!VT.isVector()) @@ -200,6 +214,10 @@ bool isPositiveHalfWord(SDNode *N); /// TODO: Handle pre/postinc as well. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; + /// Return true if folding a constant offset with the given GlobalAddress + /// is legal. It is frequently not legal in PIC relocation models. + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; /// isLegalICmpImmediate - Return true if the specified immediate is legal @@ -208,20 +226,26 @@ bool isPositiveHalfWord(SDNode *N); /// the immediate into a register. bool isLegalICmpImmediate(int64_t Imm) const override; + /// Returns relocation base for the given PIC jumptable. + SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) + const override; + // Handling of atomic RMW instructions. - bool hasLoadLinkedStoreConditional() const override { - return true; - } Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; - bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; + AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - AtomicRMWExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) - const override { - return AtomicRMWExpansionKind::LLSC; + AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override { + return AtomicExpansionKind::LLSC; } + + protected: + std::pair + findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) + const override; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/HexagonInstrAlias.td b/lib/Target/Hexagon/HexagonInstrAlias.td new file mode 100644 index 000000000000..5a1a69b40d4d --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrAlias.td @@ -0,0 +1,462 @@ +//==- HexagonInstrAlias.td - Hexagon Instruction Aliases ---*- tablegen -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Hexagon Instruction Mappings +//===----------------------------------------------------------------------===// + + +def : InstAlias<"memb({GP}+#$addr) = $Nt.new", + (S2_storerbnewgp u16_0Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memh({GP}+#$addr) = $Nt.new", + (S2_storerhnewgp u16_1Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memw({GP}+#$addr) = $Nt.new", + (S2_storerinewgp u16_2Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memb({GP}+#$addr) = $Nt", + (S2_storerbgp u16_0Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memh({GP}+#$addr) = $Nt", + (S2_storerhgp u16_1Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memh({GP}+#$addr) = $Nt.h", + (S2_storerfgp u16_1Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memw({GP}+#$addr) = $Nt", + (S2_storerigp u16_2Imm:$addr, IntRegs:$Nt)>; +def : InstAlias<"memd({GP}+#$addr) = $Nt", + (S2_storerdgp u16_3Imm:$addr, DoubleRegs:$Nt)>; + +def : InstAlias<"$Nt = memb({GP}+#$addr)", + (L2_loadrbgp IntRegs:$Nt, u16_0Imm:$addr)>; +def : InstAlias<"$Nt = memub({GP}+#$addr)", + (L2_loadrubgp IntRegs:$Nt, u16_0Imm:$addr)>; +def : InstAlias<"$Nt = memh({GP}+#$addr)", + (L2_loadrhgp IntRegs:$Nt, u16_1Imm:$addr)>; +def : InstAlias<"$Nt = memuh({GP}+#$addr)", + (L2_loadruhgp IntRegs:$Nt, u16_1Imm:$addr)>; +def : InstAlias<"$Nt = memw({GP}+#$addr)", + (L2_loadrigp IntRegs:$Nt, u16_2Imm:$addr)>; +def : InstAlias<"$Nt = memd({GP}+#$addr)", + (L2_loadrdgp DoubleRegs:$Nt, u16_3Imm:$addr)>; + +// Alias of: memXX($Rs+#XX) = $Rt to memXX($Rs) = $Rt +def : InstAlias<"memb($Rs) = $Rt", + (S2_storerb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memh($Rs) = $Rt", + (S2_storerh_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memh($Rs) = $Rt.h", + (S2_storerf_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memw($Rs) = $Rt", + (S2_storeri_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memb($Rs) = $Rt.new", + (S2_storerbnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memh($Rs) = $Rt.new", + (S2_storerhnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memw($Rs) = $Rt.new", + (S2_storerinew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"memb($Rs) = #$S8", + (S4_storeirb_io IntRegs:$Rs, 0, s8Ext:$S8), 0>; + +def : InstAlias<"memh($Rs) = #$S8", + (S4_storeirh_io IntRegs:$Rs, 0, s8Ext:$S8), 0>; + +def : InstAlias<"memw($Rs) = #$S8", + (S4_storeiri_io IntRegs:$Rs, 0, s8Ext:$S8), 0>; + +def : InstAlias<"memd($Rs) = $Rtt", + (S2_storerd_io IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>; + +def : InstAlias<"memb($Rs) = setbit(#$U5)", + (L4_ior_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +def : InstAlias<"memh($Rs) = setbit(#$U5)", + (L4_ior_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +def : InstAlias<"memw($Rs) = setbit(#$U5)", + (L4_ior_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +def : InstAlias<"memb($Rs) = clrbit(#$U5)", + (L4_iand_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +def : InstAlias<"memh($Rs) = clrbit(#$U5)", + (L4_iand_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +def : InstAlias<"memw($Rs) = clrbit(#$U5)", + (L4_iand_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>; + +// Alias of: $Rd = memXX($Rs+#XX) to $Rd = memXX($Rs) +def : InstAlias<"$Rd = memb($Rs)", + (L2_loadrb_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = memub($Rs)", + (L2_loadrub_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = memh($Rs)", + (L2_loadrh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = memuh($Rs)", + (L2_loadruh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = memw($Rs)", + (L2_loadri_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rdd = memd($Rs)", + (L2_loadrd_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = memubh($Rs)", + (L2_loadbzw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rdd = memubh($Rs)", + (L2_loadbzw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rd = membh($Rs)", + (L2_loadbsw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rdd = membh($Rs)", + (L2_loadbsw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rdd = memb_fifo($Rs)", + (L2_loadalignb_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"$Rdd = memh_fifo($Rs)", + (L2_loadalignh_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>; + +// Alias of: if ($Pt) $Rd = memXX($Rs + #$u6_X) +// to: if ($Pt) $Rd = memXX($Rs) +def : InstAlias<"if ($Pt) $Rd = memb($Rs)", + (L2_ploadrbt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt) $Rd = memub($Rs)", + (L2_ploadrubt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt) $Rd = memh($Rs)", + (L2_ploadrht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt) $Rd = memuh($Rs)", + (L2_ploadruht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt) $Rd = memw($Rs)", + (L2_ploadrit_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt) $Rdd = memd($Rs)", + (L2_ploadrdt_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +// Alias of: if ($Pt) memXX($Rs + #$u6_X) = $Rt +// to: if ($Pt) memXX($Rs) = $Rt +def : InstAlias<"if ($Pt) memb($Rs) = $Rt", + (S2_pstorerbt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memh($Rs) = $Rt", + (S2_pstorerht_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memh($Rs) = $Rt.h", + (S2_pstorerft_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memw($Rs) = $Rt", + (S2_pstorerit_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memd($Rs) = $Rtt", + (S2_pstorerdt_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>; + +def : InstAlias<"if ($Pt) memb($Rs) = $Rt.new", + (S2_pstorerbnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memh($Rs) = $Rt.new", + (S2_pstorerhnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memw($Rs) = $Rt.new", + (S2_pstorerinewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt.new) memb($Rs) = $Rt.new", + (S4_pstorerbnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt.new) memh($Rs) = $Rt.new", + (S4_pstorerhnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt.new) memw($Rs) = $Rt.new", + (S4_pstorerinewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + + +// Alias of: if (!$Pt) $Rd = memXX($Rs + #$u6_X) +// to: if (!$Pt) $Rd = memXX($Rs) +def : InstAlias<"if (!$Pt) $Rd = memb($Rs)", + (L2_ploadrbf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt) $Rd = memub($Rs)", + (L2_ploadrubf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt) $Rd = memh($Rs)", + (L2_ploadrhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt) $Rd = memuh($Rs)", + (L2_ploadruhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt) $Rd = memw($Rs)", + (L2_ploadrif_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt) $Rdd = memd($Rs)", + (L2_ploadrdf_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +// Alias of: if (!$Pt) memXX($Rs + #$u6_X) = $Rt +// to: if (!$Pt) memXX($Rs) = $Rt +def : InstAlias<"if (!$Pt) memb($Rs) = $Rt", + (S2_pstorerbf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memh($Rs) = $Rt", + (S2_pstorerhf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.h", + (S2_pstorerff_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memw($Rs) = $Rt", + (S2_pstorerif_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memd($Rs) = $Rtt", + (S2_pstorerdf_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>; + +def : InstAlias<"if (!$Pt) memb($Rs) = $Rt.new", + (S2_pstorerbnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.new", + (S2_pstorerhnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt) memw($Rs) = $Rt.new", + (S2_pstorerinewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt.new) memb($Rs) = $Rt.new", + (S4_pstorerbnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt.new) memh($Rs) = $Rt.new", + (S4_pstorerhnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pt.new) memw($Rs) = $Rt.new", + (S4_pstorerinewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pt) memb($Rs) = #$S6", + (S4_storeirbt_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if ($Pt) memh($Rs) = #$S6", + (S4_storeirht_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if ($Pt) memw($Rs) = #$S6", + (S4_storeirit_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if ($Pt.new) memb($Rs) = #$S6", + (S4_storeirbtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if ($Pt.new) memh($Rs) = #$S6", + (S4_storeirhtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if ($Pt.new) memw($Rs) = #$S6", + (S4_storeiritnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt) memb($Rs) = #$S6", + (S4_storeirbf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt) memh($Rs) = #$S6", + (S4_storeirhf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt) memw($Rs) = #$S6", + (S4_storeirif_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt.new) memb($Rs) = #$S6", + (S4_storeirbfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt.new) memh($Rs) = #$S6", + (S4_storeirhfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +def : InstAlias<"if (!$Pt.new) memw($Rs) = #$S6", + (S4_storeirifnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>; + +// Alias of: memXX($Rs + $u6_X) |= $Rt, also &=, +=, -= +// to: memXX($Rs) |= $Rt +def : InstAlias<"memb($Rs) &= $Rt", + (L4_and_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memb($Rs) |= $Rt", + (L4_or_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memb($Rs) += $Rt", + (L4_add_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memb($Rs) -= $Rt", + (L4_sub_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memb($Rs) += #$U5", + (L4_iadd_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memb($Rs) -= #$U5", + (L4_isub_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) &= $Rt", + (L4_and_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) |= $Rt", + (L4_or_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) += $Rt", + (L4_add_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) -= $Rt", + (L4_sub_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) += #$U5", + (L4_iadd_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memh($Rs) -= #$U5", + (L4_isub_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) &= $Rt", + (L4_and_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) |= $Rt", + (L4_or_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) += $Rt", + (L4_add_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) -= $Rt", + (L4_sub_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) += #$U5", + (L4_iadd_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +def : InstAlias<"memw($Rs) -= #$U5", + (L4_isub_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>, + Requires<[UseMEMOP]>; + +// +// Alias of: if ($Pv.new) memX($Rs) = $Rt +// to: if (p3.new) memX(r17 + #0) = $Rt +def : InstAlias<"if ($Pv.new) memb($Rs) = $Rt", + (S4_pstorerbtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt", + (S4_pstorerhtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt.h", + (S4_pstorerftnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pv.new) memw($Rs) = $Rt", + (S4_pstoreritnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if ($Pv.new) memd($Rs) = $Rtt", + (S4_pstorerdtnew_io + PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>; + +def : InstAlias<"if (!$Pv.new) memb($Rs) = $Rt", + (S4_pstorerbfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt", + (S4_pstorerhfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt.h", + (S4_pstorerffnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pv.new) memw($Rs) = $Rt", + (S4_pstorerifnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>; + +def : InstAlias<"if (!$Pv.new) memd($Rs) = $Rtt", + (S4_pstorerdfnew_io + PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>; + +// +// Alias of: if ($Pt.new) $Rd = memub($Rs) -- And if (!$Pt.new) ... +// to: if ($Pt.new) $Rd = memub($Rs + #$u6_0) +def : InstAlias<"if ($Pt.new) $Rd = memub($Rs)", + (L2_ploadrubtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt.new) $Rd = memb($Rs)", + (L2_ploadrbtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt.new) $Rd = memh($Rs)", + (L2_ploadrhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt.new) $Rd = memuh($Rs)", + (L2_ploadruhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt.new) $Rd = memw($Rs)", + (L2_ploadritnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if ($Pt.new) $Rdd = memd($Rs)", + (L2_ploadrdtnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rd = memub($Rs)", + (L2_ploadrubfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rd = memb($Rs)", + (L2_ploadrbfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rd = memh($Rs)", + (L2_ploadrhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rd = memuh($Rs)", + (L2_ploadruhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rd = memw($Rs)", + (L2_ploadrifnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"if (!$Pt.new) $Rdd = memd($Rs)", + (L2_ploadrdfnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>; + +def : InstAlias<"dcfetch($Rs)", + (Y2_dcfetchbo IntRegs:$Rs, 0), 0>; + +// Alias of some insn mappings, others must be handled by the parser +def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)", + (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>; +def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)", + (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>; + +// Rd=neg(Rs) is aliased to Rd=sub(#0,Rs) +def : InstAlias<"$Rd = neg($Rs)", + (A2_subri IntRegs:$Rd, 0, IntRegs:$Rs), 0>; + +def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>; +def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>; +def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>; +def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>; + +def : InstAlias<"$Pd = $Ps", + (C2_or PredRegs:$Pd, PredRegs:$Ps, PredRegs:$Ps), 0>; + +def : InstAlias<"$Rdd = vaddb($Rss, $Rtt)", + (A2_vaddub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 1>; + +def : InstAlias<"$Rdd = vsubb($Rss,$Rtt)", + (A2_vsubub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 0>; + +def : InstAlias<"$Rd = mpyui($Rs,$Rt)", + (M2_mpyi IntRegs:$Rd, IntRegs:$Rs, IntRegs:$Rt), 0>; + +// Assembler mapped insns: cmp.lt(a,b) -> cmp.gt(b,a) +def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)", + (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>; +def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)", + (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>; + diff --git a/lib/Target/Hexagon/HexagonInstrEnc.td b/lib/Target/Hexagon/HexagonInstrEnc.td new file mode 100644 index 000000000000..280832fd167f --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrEnc.td @@ -0,0 +1,1019 @@ +class Enc_COPROC_VX_3op_v opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + + let Inst{31-16} = { opc{14-4}, src2}; + let Inst{13-0} = { opc{3}, src1, opc{2-0}, dst}; +} + +class V6_vtmpyb_enc : Enc_COPROC_VX_3op_v<0b000110010000000>; +class V6_vtmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000001>; +class V6_vdmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110010000010>; +class V6_vrmpyub_enc : Enc_COPROC_VX_3op_v<0b000110010000011>; +class V6_vrmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000100>; +class V6_vdsaduh_enc : Enc_COPROC_VX_3op_v<0b000110010000101>; +class V6_vdmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000110>; +class V6_vdmpybus_dv_enc : Enc_COPROC_VX_3op_v<0b000110010000111>; +class V6_vtmpyb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001000>; +class V6_vtmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001001>; +class V6_vtmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001010>; +class V6_vdmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001011>; +class V6_vrmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001100>; +class V6_vrmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001101>; +class V6_vdmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001110>; +class V6_vdmpybus_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001111>; +class V6_vdmpyhsusat_enc : Enc_COPROC_VX_3op_v<0b000110010010000>; +class V6_vdmpyhsuisat_enc : Enc_COPROC_VX_3op_v<0b000110010010001>; +class V6_vdmpyhsat_enc : Enc_COPROC_VX_3op_v<0b000110010010010>; +class V6_vdmpyhisat_enc : Enc_COPROC_VX_3op_v<0b000110010010011>; +class V6_vdmpyhb_dv_enc : Enc_COPROC_VX_3op_v<0b000110010010100>; +class V6_vmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010010101>; +class V6_vmpabus_enc : Enc_COPROC_VX_3op_v<0b000110010010110>; +class V6_vmpahb_enc : Enc_COPROC_VX_3op_v<0b000110010010111>; +class V6_vdmpyhsusat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011000>; +class V6_vdmpyhsuisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011001>; +class V6_vdmpyhisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011010>; +class V6_vdmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011011>; +class V6_vdmpyhb_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011100>; +class V6_vmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011101>; +class V6_vmpabus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011110>; +class V6_vmpahb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011111>; +class V6_vmpyh_enc : Enc_COPROC_VX_3op_v<0b000110010100000>; +class V6_vmpyhss_enc : Enc_COPROC_VX_3op_v<0b000110010100001>; +class V6_vmpyhsrs_enc : Enc_COPROC_VX_3op_v<0b000110010100010>; +class V6_vmpyuh_enc : Enc_COPROC_VX_3op_v<0b000110010100011>; +class V6_vmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101000>; +class V6_vmpyuh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101001>; +class V6_vmpyiwb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101010>; +class V6_vmpyiwh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101011>; +class V6_vmpyihb_enc : Enc_COPROC_VX_3op_v<0b000110010110000>; +class V6_vror_enc : Enc_COPROC_VX_3op_v<0b000110010110001>; +class V6_vasrw_enc : Enc_COPROC_VX_3op_v<0b000110010110101>; +class V6_vasrh_enc : Enc_COPROC_VX_3op_v<0b000110010110110>; +class V6_vaslw_enc : Enc_COPROC_VX_3op_v<0b000110010110111>; +class V6_vdsaduh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111000>; +class V6_vmpyihb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111001>; +class V6_vaslw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111010>; +class V6_vasrw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111101>; +class V6_vaslh_enc : Enc_COPROC_VX_3op_v<0b000110011000000>; +class V6_vlsrw_enc : Enc_COPROC_VX_3op_v<0b000110011000001>; +class V6_vlsrh_enc : Enc_COPROC_VX_3op_v<0b000110011000010>; +class V6_vmpyiwh_enc : Enc_COPROC_VX_3op_v<0b000110011000111>; +class V6_vmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110011001000>; +class V6_vmpyiwb_enc : Enc_COPROC_VX_3op_v<0b000110011010000>; +class V6_vtmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110011010100>; +class V6_vmpyub_enc : Enc_COPROC_VX_3op_v<0b000110011100000>; +class V6_vrmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000000>; +class V6_vrmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000001>; +class V6_vrmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000010>; +class V6_vdmpyhvsat_enc : Enc_COPROC_VX_3op_v<0b000111000000011>; +class V6_vmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000100>; +class V6_vmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000101>; +class V6_vmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000110>; +class V6_vmpyhv_enc : Enc_COPROC_VX_3op_v<0b000111000000111>; +class V6_vrmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001000>; +class V6_vrmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001001>; +class V6_vrmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001010>; +class V6_vdmpyhvsat_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001011>; +class V6_vmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001100>; +class V6_vmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001101>; +class V6_vmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001110>; +class V6_vmpyhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001111>; +class V6_vmpyuhv_enc : Enc_COPROC_VX_3op_v<0b000111000010000>; +class V6_vmpyhvsrs_enc : Enc_COPROC_VX_3op_v<0b000111000010001>; +class V6_vmpyhus_enc : Enc_COPROC_VX_3op_v<0b000111000010010>; +class V6_vmpabusv_enc : Enc_COPROC_VX_3op_v<0b000111000010011>; +class V6_vmpyih_enc : Enc_COPROC_VX_3op_v<0b000111000010100>; +class V6_vand_enc : Enc_COPROC_VX_3op_v<0b000111000010101>; +class V6_vor_enc : Enc_COPROC_VX_3op_v<0b000111000010110>; +class V6_vxor_enc : Enc_COPROC_VX_3op_v<0b000111000010111>; +class V6_vmpyuhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011000>; +class V6_vmpyhus_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011001>; +class V6_vmpyih_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011100>; +class V6_vmpyiewuh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011101>; +class V6_vmpyowh_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011110>; +class V6_vmpyowh_rnd_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011111>; +class V6_vaddw_enc : Enc_COPROC_VX_3op_v<0b000111000100000>; +class V6_vaddubsat_enc : Enc_COPROC_VX_3op_v<0b000111000100001>; +class V6_vadduhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100010>; +class V6_vaddhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100011>; +class V6_vaddwsat_enc : Enc_COPROC_VX_3op_v<0b000111000100100>; +class V6_vsubb_enc : Enc_COPROC_VX_3op_v<0b000111000100101>; +class V6_vsubh_enc : Enc_COPROC_VX_3op_v<0b000111000100110>; +class V6_vsubw_enc : Enc_COPROC_VX_3op_v<0b000111000100111>; +class V6_vmpyiewh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000101000>; +class V6_vsububsat_enc : Enc_COPROC_VX_3op_v<0b000111000110000>; +class V6_vsubuhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110001>; +class V6_vsubhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110010>; +class V6_vsubwsat_enc : Enc_COPROC_VX_3op_v<0b000111000110011>; +class V6_vaddb_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110100>; +class V6_vaddh_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110101>; +class V6_vaddw_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110110>; +class V6_vaddubsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110111>; +class V6_vadduhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000000>; +class V6_vaddhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000001>; +class V6_vaddwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000010>; +class V6_vsubb_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000011>; +class V6_vsubh_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000100>; +class V6_vsubw_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000101>; +class V6_vsububsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000110>; +class V6_vsubuhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000111>; +class V6_vsubhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010000>; +class V6_vsubwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010001>; +class V6_vaddubh_enc : Enc_COPROC_VX_3op_v<0b000111001010010>; +class V6_vadduhw_enc : Enc_COPROC_VX_3op_v<0b000111001010011>; +class V6_vaddhw_enc : Enc_COPROC_VX_3op_v<0b000111001010100>; +class V6_vsububh_enc : Enc_COPROC_VX_3op_v<0b000111001010101>; +class V6_vsubuhw_enc : Enc_COPROC_VX_3op_v<0b000111001010110>; +class V6_vsubhw_enc : Enc_COPROC_VX_3op_v<0b000111001010111>; +class V6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b000111001100000>; +class V6_vabsdiffh_enc : Enc_COPROC_VX_3op_v<0b000111001100001>; +class V6_vabsdiffuh_enc : Enc_COPROC_VX_3op_v<0b000111001100010>; +class V6_vabsdiffw_enc : Enc_COPROC_VX_3op_v<0b000111001100011>; +class V6_vavgub_enc : Enc_COPROC_VX_3op_v<0b000111001100100>; +class V6_vavguh_enc : Enc_COPROC_VX_3op_v<0b000111001100101>; +class V6_vavgh_enc : Enc_COPROC_VX_3op_v<0b000111001100110>; +class V6_vavgw_enc : Enc_COPROC_VX_3op_v<0b000111001100111>; +class V6_vnavgub_enc : Enc_COPROC_VX_3op_v<0b000111001110000>; +class V6_vnavgh_enc : Enc_COPROC_VX_3op_v<0b000111001110001>; +class V6_vnavgw_enc : Enc_COPROC_VX_3op_v<0b000111001110010>; +class V6_vavgubrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110011>; +class V6_vavguhrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110100>; +class V6_vavghrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110101>; +class V6_vavgwrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110110>; +class V6_vmpabuuv_enc : Enc_COPROC_VX_3op_v<0b000111001110111>; +class V6_vminub_enc : Enc_COPROC_VX_3op_v<0b000111110000001>; +class V6_vminuh_enc : Enc_COPROC_VX_3op_v<0b000111110000010>; +class V6_vminh_enc : Enc_COPROC_VX_3op_v<0b000111110000011>; +class V6_vminw_enc : Enc_COPROC_VX_3op_v<0b000111110000100>; +class V6_vmaxub_enc : Enc_COPROC_VX_3op_v<0b000111110000101>; +class V6_vmaxuh_enc : Enc_COPROC_VX_3op_v<0b000111110000110>; +class V6_vmaxh_enc : Enc_COPROC_VX_3op_v<0b000111110000111>; +class V6_vmaxw_enc : Enc_COPROC_VX_3op_v<0b000111110010000>; +class V6_vdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010001>; +class V6_vrdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010011>; +class V6_vdealb4w_enc : Enc_COPROC_VX_3op_v<0b000111110010111>; +class V6_vmpyowh_rnd_enc : Enc_COPROC_VX_3op_v<0b000111110100000>; +class V6_vshuffeb_enc : Enc_COPROC_VX_3op_v<0b000111110100001>; +class V6_vshuffob_enc : Enc_COPROC_VX_3op_v<0b000111110100010>; +class V6_vshufeh_enc : Enc_COPROC_VX_3op_v<0b000111110100011>; +class V6_vshufoh_enc : Enc_COPROC_VX_3op_v<0b000111110100100>; +class V6_vshufoeh_enc : Enc_COPROC_VX_3op_v<0b000111110100101>; +class V6_vshufoeb_enc : Enc_COPROC_VX_3op_v<0b000111110100110>; +class V6_vcombine_enc : Enc_COPROC_VX_3op_v<0b000111110100111>; +class V6_vmpyieoh_enc : Enc_COPROC_VX_3op_v<0b000111110110000>; +class V6_vsathub_enc : Enc_COPROC_VX_3op_v<0b000111110110010>; +class V6_vsatwh_enc : Enc_COPROC_VX_3op_v<0b000111110110011>; +class V6_vroundwh_enc : Enc_COPROC_VX_3op_v<0b000111110110100>; +class V6_vroundwuh_enc : Enc_COPROC_VX_3op_v<0b000111110110101>; +class V6_vroundhb_enc : Enc_COPROC_VX_3op_v<0b000111110110110>; +class V6_vroundhub_enc : Enc_COPROC_VX_3op_v<0b000111110110111>; +class V6_vasrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010000>; +class V6_vlsrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010001>; +class V6_vlsrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010010>; +class V6_vasrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010011>; +class V6_vaslwv_enc : Enc_COPROC_VX_3op_v<0b000111111010100>; +class V6_vaslhv_enc : Enc_COPROC_VX_3op_v<0b000111111010101>; +class V6_vaddb_enc : Enc_COPROC_VX_3op_v<0b000111111010110>; +class V6_vaddh_enc : Enc_COPROC_VX_3op_v<0b000111111010111>; +class V6_vmpyiewuh_enc : Enc_COPROC_VX_3op_v<0b000111111100000>; +class V6_vmpyiowh_enc : Enc_COPROC_VX_3op_v<0b000111111100001>; +class V6_vpackeb_enc : Enc_COPROC_VX_3op_v<0b000111111100010>; +class V6_vpackeh_enc : Enc_COPROC_VX_3op_v<0b000111111100011>; +class V6_vpackhub_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100101>; +class V6_vpackhb_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100110>; +class V6_vpackwuh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100111>; +class V6_vpackwh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111110000>; +class V6_vpackob_enc : Enc_COPROC_VX_3op_v<0b000111111110001>; +class V6_vpackoh_enc : Enc_COPROC_VX_3op_v<0b000111111110010>; +class V6_vmpyewuh_enc : Enc_COPROC_VX_3op_v<0b000111111110101>; +class V6_vmpyowh_enc : Enc_COPROC_VX_3op_v<0b000111111110111>; +class V6_extractw_enc : Enc_COPROC_VX_3op_v<0b100100100000001>; +class M6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b111010001010000>; +class M6_vabsdiffb_enc : Enc_COPROC_VX_3op_v<0b111010001110000>; + +class Enc_COPROC_VX_cmp opc> : OpcodeHexagon { + bits<2> dst; + bits<5> src1; + bits<5> src2; + + let Inst{31-16} = { 0b00011, opc{12-7}, src2{4-0} }; + let Inst{13-0} = { opc{6}, src1{4-0}, opc{5-0}, dst{1-0} }; +} + +class V6_vandvrt_acc_enc : Enc_COPROC_VX_cmp<0b0010111100000>; +class V6_vandvrt_enc : Enc_COPROC_VX_cmp<0b0011010010010>; +class V6_veqb_and_enc : Enc_COPROC_VX_cmp<0b1001001000000>; +class V6_veqh_and_enc : Enc_COPROC_VX_cmp<0b1001001000001>; +class V6_veqw_and_enc : Enc_COPROC_VX_cmp<0b1001001000010>; +class V6_vgtb_and_enc : Enc_COPROC_VX_cmp<0b1001001000100>; +class V6_vgth_and_enc : Enc_COPROC_VX_cmp<0b1001001000101>; +class V6_vgtw_and_enc : Enc_COPROC_VX_cmp<0b1001001000110>; +class V6_vgtub_and_enc : Enc_COPROC_VX_cmp<0b1001001001000>; +class V6_vgtuh_and_enc : Enc_COPROC_VX_cmp<0b1001001001001>; +class V6_vgtuw_and_enc : Enc_COPROC_VX_cmp<0b1001001001010>; +class V6_veqb_or_enc : Enc_COPROC_VX_cmp<0b1001001010000>; +class V6_veqh_or_enc : Enc_COPROC_VX_cmp<0b1001001010001>; +class V6_veqw_or_enc : Enc_COPROC_VX_cmp<0b1001001010010>; +class V6_vgtb_or_enc : Enc_COPROC_VX_cmp<0b1001001010100>; +class V6_vgth_or_enc : Enc_COPROC_VX_cmp<0b1001001010101>; +class V6_vgtw_or_enc : Enc_COPROC_VX_cmp<0b1001001010110>; +class V6_vgtub_or_enc : Enc_COPROC_VX_cmp<0b1001001011000>; +class V6_vgtuh_or_enc : Enc_COPROC_VX_cmp<0b1001001011001>; +class V6_vgtuw_or_enc : Enc_COPROC_VX_cmp<0b1001001011010>; +class V6_veqb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100000>; +class V6_veqh_xor_enc : Enc_COPROC_VX_cmp<0b1001001100001>; +class V6_veqw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100010>; +class V6_vgtb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100100>; +class V6_vgth_xor_enc : Enc_COPROC_VX_cmp<0b1001001100101>; +class V6_vgtw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100110>; +class V6_vgtub_xor_enc : Enc_COPROC_VX_cmp<0b1001001101000>; +class V6_vgtuh_xor_enc : Enc_COPROC_VX_cmp<0b1001001101001>; +class V6_vgtuw_xor_enc : Enc_COPROC_VX_cmp<0b1001001101010>; +class V6_veqb_enc : Enc_COPROC_VX_cmp<0b1111000000000>; +class V6_veqh_enc : Enc_COPROC_VX_cmp<0b1111000000001>; +class V6_veqw_enc : Enc_COPROC_VX_cmp<0b1111000000010>; +class V6_vgtb_enc : Enc_COPROC_VX_cmp<0b1111000000100>; +class V6_vgth_enc : Enc_COPROC_VX_cmp<0b1111000000101>; +class V6_vgtw_enc : Enc_COPROC_VX_cmp<0b1111000000110>; +class V6_vgtub_enc : Enc_COPROC_VX_cmp<0b1111000001000>; +class V6_vgtuh_enc : Enc_COPROC_VX_cmp<0b1111000001001>; +class V6_vgtuw_enc : Enc_COPROC_VX_cmp<0b1111000001010>; + +class Enc_COPROC_VX_p2op opc> : OpcodeHexagon { + bits<2> src1; + bits<5> dst; + bits<5> src2; + + let Inst{31-16} = { 0b00011110, src1{1-0}, 0b0000, opc{4-3} }; + let Inst{13-0} = { 1, src2{4-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vaddbq_enc : Enc_COPROC_VX_p2op<0b01000>; +class V6_vaddhq_enc : Enc_COPROC_VX_p2op<0b01001>; +class V6_vaddwq_enc : Enc_COPROC_VX_p2op<0b01010>; +class V6_vaddbnq_enc : Enc_COPROC_VX_p2op<0b01011>; +class V6_vaddhnq_enc : Enc_COPROC_VX_p2op<0b01100>; +class V6_vaddwnq_enc : Enc_COPROC_VX_p2op<0b01101>; +class V6_vsubbq_enc : Enc_COPROC_VX_p2op<0b01110>; +class V6_vsubhq_enc : Enc_COPROC_VX_p2op<0b01111>; +class V6_vsubwq_enc : Enc_COPROC_VX_p2op<0b10000>; +class V6_vsubbnq_enc : Enc_COPROC_VX_p2op<0b10001>; +class V6_vsubhnq_enc : Enc_COPROC_VX_p2op<0b10010>; +class V6_vsubwnq_enc : Enc_COPROC_VX_p2op<0b10011>; + +class Enc_COPROC_VX_2op opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + + let Inst{31-16} = { 0b00011110000000, opc{5-4} }; + let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vabsh_enc : Enc_COPROC_VX_2op<0b000000>; +class V6_vabsh_sat_enc : Enc_COPROC_VX_2op<0b000001>; +class V6_vabsw_enc : Enc_COPROC_VX_2op<0b000010>; +class V6_vabsw_sat_enc : Enc_COPROC_VX_2op<0b000011>; +class V6_vnot_enc : Enc_COPROC_VX_2op<0b000100>; +class V6_vdealh_enc : Enc_COPROC_VX_2op<0b000110>; +class V6_vdealb_enc : Enc_COPROC_VX_2op<0b000111>; +class V6_vunpackob_enc : Enc_COPROC_VX_2op<0b001000>; +class V6_vunpackoh_enc : Enc_COPROC_VX_2op<0b001001>; +class V6_vunpackub_enc : Enc_COPROC_VX_2op<0b010000>; +class V6_vunpackuh_enc : Enc_COPROC_VX_2op<0b010001>; +class V6_vunpackb_enc : Enc_COPROC_VX_2op<0b010010>; +class V6_vunpackh_enc : Enc_COPROC_VX_2op<0b010011>; +class V6_vshuffh_enc : Enc_COPROC_VX_2op<0b010111>; +class V6_vshuffb_enc : Enc_COPROC_VX_2op<0b100000>; +class V6_vzb_enc : Enc_COPROC_VX_2op<0b100001>; +class V6_vzh_enc : Enc_COPROC_VX_2op<0b100010>; +class V6_vsb_enc : Enc_COPROC_VX_2op<0b100011>; +class V6_vsh_enc : Enc_COPROC_VX_2op<0b100100>; +class V6_vcl0w_enc : Enc_COPROC_VX_2op<0b100101>; +class V6_vpopcounth_enc : Enc_COPROC_VX_2op<0b100110>; +class V6_vcl0h_enc : Enc_COPROC_VX_2op<0b100111>; +class V6_vnormamtw_enc : Enc_COPROC_VX_2op<0b110100>; +class V6_vnormamth_enc : Enc_COPROC_VX_2op<0b110101>; +class V6_vassign_enc : Enc_COPROC_VX_2op<0b111111>; + +class Enc_COPROC_VMEM_vL32_b_ai opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<10> src2; + bits<4> src2_vector; + + let src2_vector = src2{9-6}; + let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vL32b_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0000>; +class V6_vL32b_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0001>; +class V6_vL32b_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0010>; +class V6_vL32Ub_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0111>; +class V6_vL32b_nt_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1000>; +class V6_vL32b_nt_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1001>; +class V6_vL32b_nt_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1010>; + +class Enc_COPROC_VMEM_vL32_b_ai_128B opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<11> src2; + bits<4> src2_vector; + + let src2_vector = src2{10-7}; + let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vL32b_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0000>; +class V6_vL32b_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0001>; +class V6_vL32b_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0010>; +class V6_vL32Ub_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0111>; +class V6_vL32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1000>; +class V6_vL32b_nt_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1001>; +class V6_vL32b_nt_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1010>; + +class Enc_COPROC_VMEM_vS32_b_ai_64B opc> : OpcodeHexagon { + bits<5> src1; + bits<10> src2; + bits<4> src2_vector; + bits<5> src3; + + let src2_vector = src2{9-6}; + let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} }; +} + +class Enc_COPROC_VMEM_vS32_b_ai_128B opc> : OpcodeHexagon { + bits<5> src1; + bits<11> src2; + bits<4> src2_vector; + bits<5> src3; + + let src2_vector = src2{10-7}; + let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} }; +} + +class V6_vS32b_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0000>; +class V6_vS32Ub_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0111>; +class V6_vS32b_nt_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b1000>; + +class V6_vS32b_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0000>; +class V6_vS32Ub_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0111>; +class V6_vS32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b1000>; + +class Enc_COPROC_VMEM_vS32b_n_ew_ai_64B opc> : OpcodeHexagon { + bits<5> src1; + bits<10> src2; + bits<4> src2_vector; + bits<3> src3; + + let src2_vector = src2{9-6}; + let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} }; +} + +class V6_vS32b_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<0>; +class V6_vS32b_nt_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<1>; + +class Enc_COPROC_VMEM_vS32b_n_ew_ai_128B opc> : OpcodeHexagon { + bits<5> src1; + bits<11> src2; + bits<4> src2_vector; + bits<3> src3; + + let src2_vector = src2{10-7}; + let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} }; + let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} }; +} + +class V6_vS32b_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<0>; +class V6_vS32b_nt_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<1>; + +class Enc_COPROC_VMEM_vS32_b_pred_ai opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<10> src3; + bits<4> src3_vector; + bits<5> src4; + + let src3_vector = src3{9-6}; + let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} }; + let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} }; +} + +class Enc_COPROC_VMEM_vS32_b_pred_ai_128B opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<11> src3; + bits<4> src3_vector; + bits<5> src4; + + let src3_vector = src3{10-7}; + let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} }; + let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} }; +} + +class V6_vS32b_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00000>; +class V6_vS32b_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00001>; +class V6_vS32b_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01000>; +class V6_vS32b_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01001>; +class V6_vS32Ub_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01110>; +class V6_vS32Ub_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01111>; +class V6_vS32b_nt_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10000>; +class V6_vS32b_nt_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10001>; +class V6_vS32b_nt_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11000>; +class V6_vS32b_nt_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11001>; + +class V6_vS32b_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00000>; +class V6_vS32b_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00001>; +class V6_vS32b_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01000>; +class V6_vS32b_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01001>; +class V6_vS32Ub_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01110>; +class V6_vS32Ub_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01111>; +class V6_vS32b_nt_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10000>; +class V6_vS32b_nt_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10001>; +class V6_vS32b_nt_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11000>; +class V6_vS32b_nt_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11001>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<10> src3; + bits<4> src3_vector; + bits<3> src4; + + let src3_vector = src3{9-6}; + let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} }; + let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} }; +} + +class V6_vS32b_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0000>; +class V6_vS32b_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0101>; +class V6_vS32b_nt_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1010>; +class V6_vS32b_nt_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1111>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<11> src3; + bits<4> src3_vector; + bits<3> src4; + + let src3_vector = src3{10-7}; + let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} }; + let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} }; +} + +class V6_vS32b_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0000>; +class V6_vS32b_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0101>; +class V6_vS32b_nt_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1010>; +class V6_vS32b_nt_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1111>; + +// TODO: Change script to generate dst, src1, src2 instead of +// dst, dst2, src1. +class Enc_COPROC_VMEM_vL32_b_pi opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<9> src2; + bits<3> src2_vector; + + let src2_vector = src2{8-6}; + let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} }; + let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vL32b_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0000>; +class V6_vL32b_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0001>; +class V6_vL32b_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0010>; +class V6_vL32Ub_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0111>; +class V6_vL32b_nt_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1000>; +class V6_vL32b_nt_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1001>; +class V6_vL32b_nt_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1010>; + +class Enc_COPROC_VMEM_vL32_b_pi_128B opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<10> src2; + bits<3> src2_vector; + + let src2_vector = src2{9-7}; + let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} }; + let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} }; +} + +class V6_vL32b_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0000>; +class V6_vL32b_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0001>; +class V6_vL32b_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0010>; +class V6_vL32Ub_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0111>; +class V6_vL32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1000>; +class V6_vL32b_nt_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1001>; +class V6_vL32b_nt_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1010>; + + +// TODO: Change script to generate src1, src2 and src3 instead of +// dst, src1, src2. +class Enc_COPROC_VMEM_vS32_b_pi opc> : OpcodeHexagon { + bits<5> src1; + bits<9> src2; + bits<3> src2_vector; + bits<5> src3; + + let src2_vector = src2{8-6}; + let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} }; + let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} }; +} + +class V6_vS32b_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0000>; +class V6_vS32Ub_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0111>; +class V6_vS32b_nt_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b1000>; + +class Enc_COPROC_VMEM_vS32_b_pi_128B opc> : OpcodeHexagon { + bits<5> src1; + bits<10> src2; + bits<3> src2_vector; + bits<5> src3; + + let src2_vector = src2{9-7}; + let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} }; + let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} }; +} + +class V6_vS32b_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0000>; +class V6_vS32Ub_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0111>; +class V6_vS32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b1000>; + +// TODO: Change script to generate src1, src2 and src3 instead of +// dst, src1, src2. +class Enc_COPROC_VMEM_vS32b_n_ew_pi opc> : OpcodeHexagon { + bits<5> src1; + bits<9> src2; + bits<3> src2_vector; + bits<3> src3; + + let src2_vector = src2{8-6}; + let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} }; + let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} }; +} + +class V6_vS32b_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<0>; +class V6_vS32b_nt_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<1>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pi_128B opc> : OpcodeHexagon { + bits<5> src1; + bits<10> src2; + bits<3> src2_vector; + bits<3> src3; + + let src2_vector = src2{9-7}; + let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} }; + let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} }; +} + +class V6_vS32b_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<0>; +class V6_vS32b_nt_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<1>; + +// TODO: Change script to generate src1, src2,src3 and src4 instead of +// dst, src1, src2, src3. +class Enc_COPROC_VMEM_vS32_b_pred_pi opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<9> src3; + bits<3> src3_vector; + bits<5> src4; + + let src3_vector = src3{8-6}; + let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} }; + let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} }; +} + +class V6_vS32b_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00000>; +class V6_vS32b_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00001>; +class V6_vS32b_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01000>; +class V6_vS32b_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01001>; +class V6_vS32Ub_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01110>; +class V6_vS32Ub_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01111>; +class V6_vS32b_nt_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10000>; +class V6_vS32b_nt_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10001>; +class V6_vS32b_nt_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11000>; +class V6_vS32b_nt_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11001>; + +// TODO: Change script to generate src1, src2,src3 and src4 instead of +// dst, src1, src2, src3. +class Enc_COPROC_VMEM_vS32_b_pred_pi_128B opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<10> src3; + bits<3> src3_vector; + bits<5> src4; + + let src3_vector = src3{9-7}; + let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} }; + let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} }; +} + +class V6_vS32b_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00000>; +class V6_vS32b_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00001>; +class V6_vS32b_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01000>; +class V6_vS32b_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01001>; +class V6_vS32Ub_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01110>; +class V6_vS32Ub_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01111>; +class V6_vS32b_nt_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10000>; +class V6_vS32b_nt_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10001>; +class V6_vS32b_nt_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11000>; +class V6_vS32b_nt_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11001>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<9> src3; + bits<3> src3_vector; + bits<3> src4; + + let src3_vector = src3{8-6}; + let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} }; + let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} }; +} + +class V6_vS32b_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0000>; +class V6_vS32b_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0101>; +class V6_vS32b_nt_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1010>; +class V6_vS32b_nt_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1111>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<10> src3; + bits<3> src3_vector; + bits<3> src4; + + let src3_vector = src3{9-7}; + let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} }; + let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} }; +} + +class V6_vS32b_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0000>; +class V6_vS32b_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0101>; +class V6_vS32b_nt_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1010>; +class V6_vS32b_nt_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1111>; + +class Enc_LD_load_m opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<1> src2; + + let Inst{31-16} = { opc{12}, 0, opc{11-10}, 1, opc{9-4}, src1{4-0} }; + let Inst{13-0} = { src2{0}, 0b000, opc{3}, 0, opc{2-0}, dst{4-0} }; +} + +class V6_vL32b_ppu_enc : Enc_LD_load_m<0b0100110000000>; +class V6_vL32b_cur_ppu_enc : Enc_LD_load_m<0b0100110000001>; +class V6_vL32b_tmp_ppu_enc : Enc_LD_load_m<0b0100110000010>; +class V6_vL32Ub_ppu_enc : Enc_LD_load_m<0b0100110000111>; +class V6_vL32b_nt_ppu_enc : Enc_LD_load_m<0b0100110100000>; +class V6_vL32b_nt_cur_ppu_enc : Enc_LD_load_m<0b0100110100001>; +class V6_vL32b_nt_tmp_ppu_enc : Enc_LD_load_m<0b0100110100010>; + +class Enc_COPROC_VMEM_vS32_b_ppu opc> : OpcodeHexagon { + bits<5> src1; + bits<1> src2; + bits<5> src3; + + let Inst{31-16} = { 0b001010110, opc{3}, 1, src1{4-0} }; + let Inst{13-0} = { src2{0}, 0b00000, opc{2-0}, src3{4-0} }; +} + +class V6_vS32b_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0000>; +class V6_vS32Ub_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0111>; +class V6_vS32b_nt_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b1000>; + +class Enc_COPROC_VMEM_vS32b_new_ppu opc> : OpcodeHexagon { + bits<5> src1; + bits<1> src2; + bits<3> src3; + + let Inst{31-16} = { 0b001010110, opc{0}, 1, src1{4-0} }; + let Inst{13-0} = { src2{0}, 0b0000000100, src3{2-0} }; +} + +class V6_vS32b_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<0>; +class V6_vS32b_nt_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<1>; + +class Enc_COPROC_VMEM_vS32_b_pred_ppu opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<1> src3; + bits<5> src4; + + let Inst{31-16} = { 0b001010111, opc{4-3}, src2{4-0} }; + let Inst{13-0} = { src3{0}, src1{1-0}, 0b000, opc{2-0}, src4{4-0} }; +} + +class V6_vS32b_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00000>; +class V6_vS32b_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00001>; +class V6_vS32b_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01000>; +class V6_vS32b_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01001>; +class V6_vS32Ub_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01110>; +class V6_vS32Ub_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01111>; +class V6_vS32b_nt_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10000>; +class V6_vS32b_nt_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10001>; +class V6_vS32b_nt_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11000>; +class V6_vS32b_nt_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11001>; + +class Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu opc> : OpcodeHexagon { + bits<2> src1; + bits<5> src2; + bits<1> src3; + bits<3> src4; + + let Inst{31-16} = { 0b001010111, opc{3}, 1, src2{4-0} }; + let Inst{13-0} = { src3{0}, src1{1-0}, 0b00001, opc{2-0}, src4{2-0} }; +} + +class V6_vS32b_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0000>; +class V6_vS32b_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0101>; +class V6_vS32b_nt_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1010>; +class V6_vS32b_nt_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1111>; + + +class Enc_COPROC_VX_4op_i opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<1> src3; + + let Inst{31-16} = { 0b00011001, opc{4-2}, src2{4-0} }; + let Inst{13-0} = { opc{1}, src1{4-0}, 1, opc{0}, src3{0}, dst{4-0} }; +} + +class V6_vrmpybusi_enc : Enc_COPROC_VX_4op_i<0b01000>; +class V6_vrsadubi_enc : Enc_COPROC_VX_4op_i<0b01001>; +class V6_vrmpybusi_acc_enc : Enc_COPROC_VX_4op_i<0b01010>; +class V6_vrsadubi_acc_enc : Enc_COPROC_VX_4op_i<0b01011>; +class V6_vrmpyubi_acc_enc : Enc_COPROC_VX_4op_i<0b01111>; +class V6_vrmpyubi_enc : Enc_COPROC_VX_4op_i<0b10101>; + +class Enc_COPROC_VX_vandqrt opc> : OpcodeHexagon { + bits<5> dst; + bits<2> src1; + bits<5> src2; + + let Inst{31-16} = { 0b00011001, opc{4-3}, 1, src2{4-0} }; + let Inst{13-0} = { opc{2}, 0b000, src1{1-0}, opc{1-0}, 1, dst{4-0} }; +} + +class V6_vandqrt_acc_enc : Enc_COPROC_VX_vandqrt<0b01101>; +class V6_vandqrt_enc : Enc_COPROC_VX_vandqrt<0b10010>; + +class Enc_COPROC_VX_cards opc> : OpcodeHexagon { + bits<5> src1; + bits<5> src2; + bits<5> src3; + + let Inst{31-16} = { 0b00011001111, src3{4-0} }; + let Inst{13-0} = { 1, src1{4-0}, 0, opc{1-0}, src2{4-0} }; +} + +class V6_vshuff_enc : Enc_COPROC_VX_cards<0b01>; +class V6_vdeal_enc : Enc_COPROC_VX_cards<0b10>; + + +class Enc_COPROC_VX_v_cmov opc> : OpcodeHexagon { + bits<2> src1; + bits<5> dst; + bits<5> src2; + + let Inst{31-16} = { 0b0001101000, opc{0}, 0b00000 }; + let Inst{13-0} = { 0, src2{4-0}, 0, src1{1-0}, dst{4-0} }; +} + +class V6_vcmov_enc : Enc_COPROC_VX_v_cmov<0>; +class V6_vncmov_enc : Enc_COPROC_VX_v_cmov<1>; + +class Enc_X_p3op opc> : OpcodeHexagon { + bits<2> src1; + bits<5> dst; + bits<5> src2; + bits<5> src3; + + let Inst{31-16} = { opc{7-5}, 0b1101, opc{4}, 0, opc{3-2}, src3{4-0} }; + let Inst{13-0} = { opc{1}, src2{4-0}, opc{0}, src1{1-0}, dst{4-0} }; +} + +class V6_vnccombine_enc : Enc_X_p3op<0b00001000>; +class V6_vccombine_enc : Enc_X_p3op<0b00001100>; + +class Enc_COPROC_VX_4op_r opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<3> src3; + + let Inst{31-16} = { 0b00011011, src2{4-0}, src3{2-0} }; + let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} }; +} + +class V6_valignb_enc : Enc_COPROC_VX_4op_r<0b0000>; +class V6_vlalignb_enc : Enc_COPROC_VX_4op_r<0b0001>; +class V6_vasrwh_enc : Enc_COPROC_VX_4op_r<0b0010>; +class V6_vasrwhsat_enc : Enc_COPROC_VX_4op_r<0b0011>; +class V6_vasrwhrndsat_enc : Enc_COPROC_VX_4op_r<0b0100>; +class V6_vasrwuhsat_enc : Enc_COPROC_VX_4op_r<0b0101>; +class V6_vasrhubsat_enc : Enc_COPROC_VX_4op_r<0b0110>; +class V6_vasrhubrndsat_enc : Enc_COPROC_VX_4op_r<0b0111>; +class V6_vasrhbrndsat_enc : Enc_COPROC_VX_4op_r<0b1000>; +class V6_vlutvvb_enc : Enc_COPROC_VX_4op_r<0b1001>; +class V6_vshuffvdd_enc : Enc_COPROC_VX_4op_r<0b1011>; +class V6_vdealvdd_enc : Enc_COPROC_VX_4op_r<0b1100>; +class V6_vlutvvb_oracc_enc : Enc_COPROC_VX_4op_r<0b1101>; +class V6_vlutvwh_enc : Enc_COPROC_VX_4op_r<0b1110>; +class V6_vlutvwh_oracc_enc : Enc_COPROC_VX_4op_r<0b1111>; + +class Enc_S_3op_valign_i opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<3> src3; + + let Inst{31-16} = { opc{8-7}, 0, opc{6-3}, 0b00, opc{2-1}, src2{4-0} }; + let Inst{13-0} = { opc{0}, src1{4-0}, src3{2-0}, dst{4-0} }; +} + +class V6_vlutb_enc : Enc_S_3op_valign_i<0b001100000>; +class V6_vlutb_dv_enc : Enc_S_3op_valign_i<0b001100010>; +class V6_vlutb_acc_enc : Enc_S_3op_valign_i<0b001100100>; +class V6_vlutb_dv_acc_enc : Enc_S_3op_valign_i<0b001100110>; +class V6_valignbi_enc : Enc_S_3op_valign_i<0b001111011>; +class V6_vlalignbi_enc : Enc_S_3op_valign_i<0b001111111>; +class S2_valignib_enc : Enc_S_3op_valign_i<0b110000000>; +class S2_addasl_rrri_enc : Enc_S_3op_valign_i<0b110010000>; + +class Enc_COPROC_VX_3op_q opc> : OpcodeHexagon { + bits<2> dst; + bits<2> src1; + bits<2> src2; + + let Inst{31-16} = { 0b00011110, src2{1-0}, 0b000011 }; + let Inst{13-0} = { 0b0000, src1{1-0}, 0b000, opc{2-0}, dst{1-0} }; +} + +class V6_pred_and_enc : Enc_COPROC_VX_3op_q<0b000>; +class V6_pred_or_enc : Enc_COPROC_VX_3op_q<0b001>; +class V6_pred_xor_enc : Enc_COPROC_VX_3op_q<0b011>; +class V6_pred_or_n_enc : Enc_COPROC_VX_3op_q<0b100>; +class V6_pred_and_n_enc : Enc_COPROC_VX_3op_q<0b101>; + +class V6_pred_not_enc : OpcodeHexagon { + bits<2> dst; + bits<2> src1; + + let Inst{31-16} = { 0b0001111000000011 }; + let Inst{13-0} = { 0b0000, src1{1-0}, 0b000010, dst{1-0} }; +} + +class Enc_COPROC_VX_4op_q opc> : OpcodeHexagon { + bits<5> dst; + bits<2> src1; + bits<5> src2; + bits<5> src3; + + let Inst{31-16} = { 0b000111101, opc{0}, 1, src3{4-0} }; + let Inst{13-0} = { 1, src2{4-0}, 0, src1{1-0}, dst{4-0} }; +} + +class V6_vswap_enc : Enc_COPROC_VX_4op_q<0>; +class V6_vmux_enc : Enc_COPROC_VX_4op_q<1>; + +class Enc_X_2op opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + + let Inst{31-16} = { opc{15-5}, src1{4-0} }; + let Inst{13-0} = { opc{4-3}, 0b0000, opc{2-0}, dst{4-0} }; +} + +class V6_lvsplatw_enc : Enc_X_2op<0b0001100110100001>; +class V6_vinsertwr_enc : Enc_X_2op<0b0001100110110001>; +class S6_vsplatrbp_enc : Enc_X_2op<0b1000010001000100>; + + +class Enc_CR_2op_r opc> : OpcodeHexagon { + bits<2> dst; + bits<5> src1; + + let Inst{31-16} = { opc{11}, 0, opc{10-7}, 0, opc{6-3}, src1{4-0} }; + let Inst{13-0} = { opc{2}, 0b000000, opc{1}, 0b000, opc{0}, dst{1-0} }; +} + +class V6_pred_scalar2_enc : Enc_CR_2op_r<0b001101101011>; +class Y5_l2locka_enc : Enc_CR_2op_r<0b110000111100>; + +class Enc_S_3op_i6 opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<6> src2; + + let Inst{31-16} = { 0b1000, opc{8-6}, 0, opc{5-3}, src1{4-0} }; + let Inst{13-0} = { src2{5-0}, opc{2-0}, dst{4-0} }; +} + +class S6_rol_i_p_enc : Enc_S_3op_i6<0b000000011>; +class S6_rol_i_p_nac_enc : Enc_S_3op_i6<0b001000011>; +class S6_rol_i_p_acc_enc : Enc_S_3op_i6<0b001000111>; +class S6_rol_i_p_and_enc : Enc_S_3op_i6<0b001010011>; +class S6_rol_i_p_or_enc : Enc_S_3op_i6<0b001010111>; +class S6_rol_i_p_xacc_enc : Enc_S_3op_i6<0b001100011>; + +class Enc_X_3op_r opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + + let Inst{31-16} = { opc{14-4}, src1{4-0} }; + let Inst{13-0} = { opc{3}, src2{4-0}, opc{2-0}, dst{4-0} }; +} + +class S6_rol_i_r_enc : Enc_X_3op_r<0b100011000000011>; +class S6_rol_i_r_nac_enc : Enc_X_3op_r<0b100011100000011>; +class S6_rol_i_r_acc_enc : Enc_X_3op_r<0b100011100000111>; +class S6_rol_i_r_and_enc : Enc_X_3op_r<0b100011100100011>; +class S6_rol_i_r_or_enc : Enc_X_3op_r<0b100011100100111>; +class S6_rol_i_r_xacc_enc : Enc_X_3op_r<0b100011101000011>; +class S6_vtrunehb_ppp_enc : Enc_X_3op_r<0b110000011000011>; +class S6_vtrunohb_ppp_enc : Enc_X_3op_r<0b110000011000101>; + +class Enc_no_operands opc> : OpcodeHexagon { + + let Inst{31-16} = { opc{24-10}, 0 }; + let Inst{13-0} = { opc{9-7}, 0b000, opc{6-0}, 0 }; +} + +class Y5_l2gunlock_enc : Enc_no_operands<0b1010100000100000010000000>; +class Y5_l2gclean_enc : Enc_no_operands<0b1010100000100000100000000>; +class Y5_l2gcleaninv_enc : Enc_no_operands<0b1010100000100000110000000>; +class V6_vhist_enc : Enc_no_operands<0b0001111000000001001000000>; + +class Enc_J_jumpr opc> : OpcodeHexagon { + bits<5> src1; + + let Inst{31-16} = { opc{12-6}, 0, opc{5-3}, src1{4-0} }; + let Inst{13-0} = { 0b00, opc{2}, 0b0000, opc{1-0}, 0b00000 }; +} + +class Y5_l2unlocka_enc : Enc_J_jumpr<0b1010011011000>; +class Y2_l2cleaninvidx_enc : Enc_J_jumpr<0b1010100011000>; + +class Enc_ST_l2gclean_pa opc> : OpcodeHexagon { + bits<5> src1; + + let Inst{31-16} = { 0b101001101, opc{1-0}, 0b00000 }; + let Inst{13-0} = { 0, src1{4-0}, 0b00000000 }; +} + +class Y6_l2gcleanpa_enc : Enc_ST_l2gclean_pa<0b01>; +class Y6_l2gcleaninvpa_enc : Enc_ST_l2gclean_pa<0b10>; + +class A5_ACS_enc : OpcodeHexagon { + bits<5> dst1; + bits<2> dst2; + bits<5> src1; + bits<5> src2; + + let Inst{31-16} = { 0b11101010101, src1{4-0} }; + let Inst{13-0} = { 0, src2{4-0}, 0, dst2{1-0}, dst1{4-0} }; +} + +class Enc_X_4op_r opc> : OpcodeHexagon { + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<2> src3; + + let Inst{31-16} = { 0b11, opc{7}, 0, opc{6-5}, 1, opc{4-1}, src1{4-0} }; + let Inst{13-0} = { 0, src2{4-0}, opc{0}, src3{1-0}, dst{4-0} }; +} + +class S2_vsplicerb_enc : Enc_X_4op_r<0b00001000>; +class S2_cabacencbin_enc : Enc_X_4op_r<0b00001010>; +class F2_sffma_sc_enc : Enc_X_4op_r<0b11110111>; + +class V6_vhistq_enc : OpcodeHexagon { + bits<2> src1; + + let Inst{31-16} = { 0b00011110, src1{1-0}, 0b000010 }; + let Inst{13-0} = { 0b10000010000000 }; +} + +// TODO: Change script to generate dst1 instead of dst. +class A6_vminub_RdP_enc : OpcodeHexagon { + bits<5> dst1; + bits<2> dst2; + bits<5> src1; + bits<5> src2; + + let Inst{31-16} = { 0b11101010111, src2{4-0} }; + let Inst{13-0} = { 0, src1{4-0}, 0, dst2{1-0}, dst1{4-0} }; +} diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index 44bab292f32c..3c5ec1701dc2 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -34,6 +34,8 @@ class SubTarget value> { def HasAnySubT : SubTarget<0x3f>; // 111111 def HasV5SubT : SubTarget<0x3e>; // 111110 +def HasV55SubT : SubTarget<0x3c>; // 111100 +def HasV60SubT : SubTarget<0x38>; // 111000 // Addressing modes for load/store instructions class AddrModeType value> { @@ -57,6 +59,8 @@ def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb). def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh). def WordAccess : MemAccessSize<3>;// Word access instruction (memw). def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd) +def Vector64Access : MemAccessSize<7>;// Vector access instruction (memv) +def Vector128Access : MemAccessSize<8>;// Vector access instruction (memv) //===----------------------------------------------------------------------===// @@ -167,14 +171,23 @@ class InstHexagon pattern, bits<1> isFP = 0; let TSFlags {48} = isFP; // Floating-point. + bits<1> hasNewValue2 = 0; + let TSFlags{50} = hasNewValue2; // Second New-value producer insn. + bits<3> opNewValue2 = 0; + let TSFlags{53-51} = opNewValue2; // Second New-value produced operand. + + bits<1> isAccumulator = 0; + let TSFlags{54} = isAccumulator; + // Fields used for relation models. + bit isNonTemporal = 0; + string isNT = ""; // set to "true" for non-temporal vector stores. string BaseOpcode = ""; string CextOpcode = ""; string PredSense = ""; string PNewValue = ""; string NValueST = ""; // Set to "true" for new-value stores. string InputType = ""; // Input is "imm" or "reg" type. - string isMEMri = "false"; // Set to "true" for load/store with MEMri operand. string isFloat = "false"; // Set to "true" for the floating-point load/store. string isBrTaken = !if(isTaken, "true", "false"); // Set to "true"/"false" for jump instructions @@ -182,6 +195,7 @@ class InstHexagon pattern, ""); let PNewValue = !if(isPredicatedNew, "new", ""); let NValueST = !if(isNVStore, "true", "false"); + let isNT = !if(isNonTemporal, "true", "false"); // *** Must match MCTargetDesc/HexagonBaseInfo.h *** } @@ -217,6 +231,11 @@ class LD0Inst pattern = [], string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0> : InstHexagon, OpcodeHexagon; +let mayLoad = 1 in +class LD1Inst pattern = [], + string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0> + : InstHexagon; + // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. // Definition of the instruction class CHANGED from V2/V3 to V4. @@ -234,6 +253,12 @@ class ST0Inst pattern = [], string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0> : InstHexagon, OpcodeHexagon; +// Same as ST0Inst but doesn't derive from OpcodeHexagon. +let mayStore = 1 in +class ST1Inst pattern = [], + string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0> + : InstHexagon; + // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. // Definition of the instruction class CHANGED from V2/V3 to V4. @@ -277,6 +302,11 @@ class MInst pattern = [], : InstHexagon, OpcodeHexagon; +// Same as above but doesn't derive from OpcodeHexagon +class MInst2 pattern = [], + string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23> + : InstHexagon; + // M Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. @@ -294,6 +324,10 @@ class SInst pattern = [], : InstHexagon, OpcodeHexagon; +class SInst2 pattern = [], + string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23> + : InstHexagon; + // S Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. @@ -402,3 +436,13 @@ include "HexagonInstrFormatsV4.td" //===----------------------------------------------------------------------===// // V4 Instruction Format Definitions + //===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// V60 Instruction Format Definitions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrFormatsV60.td" + +//===----------------------------------------------------------------------===// +// V60 Instruction Format Definitions + +//===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td index db83ef6bc474..2d1dea526eed 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -21,8 +21,6 @@ def TypeMEMOP : IType<9>; def TypeNV : IType<10>; def TypeDUPLEX : IType<11>; def TypeCOMPOUND : IType<12>; -def TypeAG_VX : IType<28>; -def TypeAG_VM : IType<29>; def TypePREFIX : IType<30>; // Duplex Instruction Class Declaration diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/lib/Target/Hexagon/HexagonInstrFormatsV60.td new file mode 100644 index 000000000000..f3d43dec733e --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrFormatsV60.td @@ -0,0 +1,238 @@ +//==- HexagonInstrFormatsV60.td - Hexagon Instruction Formats -*- tablegen -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V60 instruction classes in TableGen format. +// +//===----------------------------------------------------------------------===// + +//----------------------------------------------------------------------------// +// Hexagon Intruction Flags + +// +// *** Must match BaseInfo.h *** +//----------------------------------------------------------------------------// + +def TypeCVI_VA : IType<13>; +def TypeCVI_VA_DV : IType<14>; +def TypeCVI_VX : IType<15>; +def TypeCVI_VX_DV : IType<16>; +def TypeCVI_VP : IType<17>; +def TypeCVI_VP_VS : IType<18>; +def TypeCVI_VS : IType<19>; +def TypeCVI_VINLANESAT : IType<20>; +def TypeCVI_VM_LD : IType<21>; +def TypeCVI_VM_TMP_LD : IType<22>; +def TypeCVI_VM_CUR_LD : IType<23>; +def TypeCVI_VM_VP_LDU : IType<24>; +def TypeCVI_VM_ST : IType<25>; +def TypeCVI_VM_NEW_ST : IType<26>; +def TypeCVI_VM_STU : IType<27>; +def TypeCVI_HIST : IType<28>; +//----------------------------------------------------------------------------// +// Intruction Classes Definitions + +//----------------------------------------------------------------------------// + +let validSubTargets = HasV60SubT in +{ +class CVI_VA_Resource pattern = [], string cstr = "", + InstrItinClass itin = CVI_VA> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VA_DV_Resource pattern = [], string cstr = "", + InstrItinClass itin = CVI_VA_DV> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VX_Resource_long pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_LONG> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VX_Resource_late pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_LATE> + : InstHexagon, + Requires<[HasV60T, UseHVX]>; + +class CVI_VX_Resource pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VX_DV_Resource pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_DV> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VX_DV_Slot2_Resource pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_DV_SLOT2> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VX_DV_Resource_long pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_DV_LONG> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VP_Resource_long pattern = [], string cstr = "", + InstrItinClass itin = CVI_VP_LONG> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VP_VS_Resource_early pattern = [], string cstr = "", + InstrItinClass itin = CVI_VP_VS_EARLY> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VP_VS_Resource_long pattern = [], string cstr = "", + InstrItinClass itin = CVI_VP_VS_LONG> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VP_VS_Resource_long_early pattern = [], string cstr = "", + InstrItinClass itin = CVI_VP_VS_LONG_EARLY> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VS_Resource pattern = [], string cstr = "", + InstrItinClass itin = CVI_VS> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VINLANESAT_Resource pattern = [], string cstr = "", + InstrItinClass itin = CVI_VINLANESAT> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VS_Resource_long pattern = [], string cstr = "", + InstrItinClass itin = CVI_VS> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_LD_Resource pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_LD> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_LD_Resource_long pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_LD> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_TMP_LD_Resource pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_TMP_LD> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_TMP_LD_Resource_long pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_TMP_LD> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_CUR_LD_Resource pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_CUR_LD> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_VP_LDU_Resource pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_VP_LDU> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_VP_LDU_Resource_long pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_VP_LDU> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_ST_Resource pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_ST> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_ST_Resource_long pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_ST> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_NEW_ST_Resource pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_NEW_ST> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_NEW_ST_Resource_long pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_NEW_ST> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_STU_Resource pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_STU> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_VM_STU_Resource_long pattern = [], string cstr = "", + InstrItinClass itin = CVI_VM_STU> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + +class CVI_HIST_Resource pattern = [], string cstr = "", + InstrItinClass itin = CVI_HIST> + : InstHexagon, + OpcodeHexagon, Requires<[HasV60T, UseHVX]>; +} + +let validSubTargets = HasV60SubT in +{ +class CVI_VA_Resource1 pattern = [], string cstr = "", + InstrItinClass itin = CVI_VA> + : InstHexagon, + Requires<[HasV60T, UseHVX]>; + +class CVI_VX_DV_Resource1 pattern = [], string cstr = "", + InstrItinClass itin = CVI_VX_DV> + : InstHexagon, + Requires<[HasV60T, UseHVX]>; + +class CVI_HIST_Resource1 pattern = [], string cstr = "", + InstrItinClass itin = CVI_HIST> + : InstHexagon, + Requires<[HasV60T, UseHVX]>; +} + + diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 3cb082349b41..eb3590cb1076 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -23,9 +23,11 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; @@ -36,9 +38,41 @@ using namespace llvm; #include "HexagonGenInstrInfo.inc" #include "HexagonGenDFAPacketizer.inc" +using namespace llvm; + +cl::opt ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden, + cl::init(false), cl::desc("Do not consider inline-asm a scheduling/" + "packetization boundary.")); + +static cl::opt EnableBranchPrediction("hexagon-enable-branch-prediction", + cl::Hidden, cl::init(true), cl::desc("Enable branch prediction")); + +static cl::opt DisableNVSchedule("disable-hexagon-nv-schedule", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable schedule adjustment for new value stores.")); + +static cl::opt EnableTimingClassLatency( + "enable-timing-class-latency", cl::Hidden, cl::init(false), + cl::desc("Enable timing class latency")); + +static cl::opt EnableALUForwarding( + "enable-alu-forwarding", cl::Hidden, cl::init(true), + cl::desc("Enable vec alu forwarding")); + +static cl::opt EnableACCForwarding( + "enable-acc-forwarding", cl::Hidden, cl::init(true), + cl::desc("Enable vec acc forwarding")); + +static cl::opt BranchRelaxAsmLarge("branch-relax-asm-large", + cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("branch relax asm")); + /// /// Constants for Hexagon instructions. /// +const int Hexagon_MEMV_OFFSET_MAX_128B = 2047; // #s7 +const int Hexagon_MEMV_OFFSET_MIN_128B = -2048; // #s7 +const int Hexagon_MEMV_OFFSET_MAX = 1023; // #s6 +const int Hexagon_MEMV_OFFSET_MIN = -1024; // #s6 const int Hexagon_MEMW_OFFSET_MAX = 4095; const int Hexagon_MEMW_OFFSET_MIN = -4096; const int Hexagon_MEMD_OFFSET_MAX = 8191; @@ -57,71 +91,49 @@ const int Hexagon_MEMH_AUTOINC_MAX = 14; const int Hexagon_MEMH_AUTOINC_MIN = -16; const int Hexagon_MEMB_AUTOINC_MAX = 7; const int Hexagon_MEMB_AUTOINC_MIN = -8; +const int Hexagon_MEMV_AUTOINC_MAX = 192; +const int Hexagon_MEMV_AUTOINC_MIN = -256; +const int Hexagon_MEMV_AUTOINC_MAX_128B = 384; +const int Hexagon_MEMV_AUTOINC_MIN_128B = -512; // Pin the vtable to this file. void HexagonInstrInfo::anchor() {} HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST) : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), - RI(), Subtarget(ST) {} - -/// isLoadFromStackSlot - If the specified machine instruction is a direct -/// load from a stack slot, return the virtual or physical register number of -/// the destination along with the FrameIndex of the loaded stack slot. If -/// not, return 0. This predicate must return 0 if the instruction has -/// any side effects other than loading from the stack slot. -unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { + RI() {} - switch (MI->getOpcode()) { - default: break; - case Hexagon::L2_loadri_io: - case Hexagon::L2_loadrd_io: - case Hexagon::L2_loadrh_io: - case Hexagon::L2_loadrb_io: - case Hexagon::L2_loadrub_io: - if (MI->getOperand(2).isFI() && - MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { - FrameIndex = MI->getOperand(2).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - return 0; +static bool isIntRegForSubInst(unsigned Reg) { + return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) || + (Reg >= Hexagon::R16 && Reg <= Hexagon::R23); } -/// isStoreToStackSlot - If the specified machine instruction is a direct -/// store to a stack slot, return the virtual or physical register number of -/// the source reg along with the FrameIndex of the loaded stack slot. If -/// not, return 0. This predicate must return 0 if the instruction has -/// any side effects other than storing to the stack slot. -unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case Hexagon::S2_storeri_io: - case Hexagon::S2_storerd_io: - case Hexagon::S2_storerh_io: - case Hexagon::S2_storerb_io: - if (MI->getOperand(2).isFI() && - MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { - FrameIndex = MI->getOperand(0).getIndex(); - return MI->getOperand(2).getReg(); - } - break; - } - return 0; +static bool isDblRegForSubInst(unsigned Reg, const HexagonRegisterInfo &HRI) { + return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_loreg)) && + isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_hireg)); } -// Find the hardware loop instruction used to set-up the specified loop. -// On Hexagon, we have two instructions used to set-up the hardware loop -// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions -// to indicate the end of a loop. -static MachineInstr * -findLoopInstr(MachineBasicBlock *BB, int EndLoopOp, - SmallPtrSet &Visited) { + +/// Calculate number of instructions excluding the debug instructions. +static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB, + MachineBasicBlock::const_instr_iterator MIE) { + unsigned Count = 0; + for (; MIB != MIE; ++MIB) { + if (!MIB->isDebugValue()) + ++Count; + } + return Count; +} + + +/// Find the hardware loop instruction used to set-up the specified loop. +/// On Hexagon, we have two instructions used to set-up the hardware loop +/// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions +/// to indicate the end of a loop. +static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp, + SmallPtrSet &Visited) { int LOOPi; int LOOPr; if (EndLoopOp == Hexagon::ENDLOOP0) { @@ -157,100 +169,108 @@ findLoopInstr(MachineBasicBlock *BB, int EndLoopOp, return 0; } -unsigned HexagonInstrInfo::InsertBranch( - MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB, - ArrayRef Cond, DebugLoc DL) const { - Opcode_t BOpc = Hexagon::J2_jump; - Opcode_t BccOpc = Hexagon::J2_jumpt; +/// Gather register def/uses from MI. +/// This treats possible (predicated) defs as actually happening ones +/// (conservatively). +static inline void parseOperands(const MachineInstr *MI, + SmallVector &Defs, SmallVector &Uses) { + Defs.clear(); + Uses.clear(); - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); - // Check if ReverseBranchCondition has asked to reverse this branch - // If we want to reverse the branch an odd number of times, we want - // J2_jumpf. - if (!Cond.empty() && Cond[0].isImm()) - BccOpc = Cond[0].getImm(); + if (!MO.isReg()) + continue; - if (!FBB) { - if (Cond.empty()) { - // Due to a bug in TailMerging/CFG Optimization, we need to add a - // special case handling of a predicated jump followed by an - // unconditional jump. If not, Tail Merging and CFG Optimization go - // into an infinite loop. - MachineBasicBlock *NewTBB, *NewFBB; - SmallVector Cond; - MachineInstr *Term = MBB.getFirstTerminator(); - if (Term != MBB.end() && isPredicated(Term) && - !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) { - MachineBasicBlock *NextBB = - std::next(MachineFunction::iterator(&MBB)); - if (NewTBB == NextBB) { - ReverseBranchCondition(Cond); - RemoveBranch(MBB); - return InsertBranch(MBB, TBB, nullptr, Cond, DL); - } - } - BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); - } else if (isEndLoopN(Cond[0].getImm())) { - int EndLoopOp = Cond[0].getImm(); - assert(Cond[1].isMBB()); - // Since we're adding an ENDLOOP, there better be a LOOP instruction. - // Check for it, and change the BB target if needed. - SmallPtrSet VisitedBBs; - MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs); - assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP"); - Loop->getOperand(0).setMBB(TBB); - // Add the ENDLOOP after the finding the LOOP0. - BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB); - } else if (isNewValueJump(Cond[0].getImm())) { - assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump"); - // New value jump - // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset) - // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset) - unsigned Flags1 = getUndefRegState(Cond[1].isUndef()); - DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber();); - if (Cond[2].isReg()) { - unsigned Flags2 = getUndefRegState(Cond[2].isUndef()); - BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1). - addReg(Cond[2].getReg(), Flags2).addMBB(TBB); - } else if(Cond[2].isImm()) { - BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1). - addImm(Cond[2].getImm()).addMBB(TBB); - } else - llvm_unreachable("Invalid condition for branching"); - } else { - assert((Cond.size() == 2) && "Malformed cond vector"); - const MachineOperand &RO = Cond[1]; - unsigned Flags = getUndefRegState(RO.isUndef()); - BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB); + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (MO.isUse()) + Uses.push_back(MO.getReg()); + + if (MO.isDef()) + Defs.push_back(MO.getReg()); + } +} + + +// Position dependent, so check twice for swap. +static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) { + switch (Ga) { + case HexagonII::HSIG_None: + default: + return false; + case HexagonII::HSIG_L1: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_L2: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_S1: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_S2: + return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 || + Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_S2 || + Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_A: + return (Gb == HexagonII::HSIG_A); + case HexagonII::HSIG_Compound: + return (Gb == HexagonII::HSIG_Compound); + } + return false; +} + + + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case Hexagon::L2_loadri_io: + case Hexagon::L2_loadrd_io: + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadrb_io: + case Hexagon::L2_loadrub_io: + if (MI->getOperand(2).isFI() && + MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); } - return 1; + break; } - assert((!Cond.empty()) && - "Cond. cannot be empty when multiple branchings are required"); - assert((!isNewValueJump(Cond[0].getImm())) && - "NV-jump cannot be inserted with another branch"); - // Special case for hardware loops. The condition is a basic block. - if (isEndLoopN(Cond[0].getImm())) { - int EndLoopOp = Cond[0].getImm(); - assert(Cond[1].isMBB()); - // Since we're adding an ENDLOOP, there better be a LOOP instruction. - // Check for it, and change the BB target if needed. - SmallPtrSet VisitedBBs; - MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs); - assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP"); - Loop->getOperand(0).setMBB(TBB); - // Add the ENDLOOP after the finding the LOOP0. - BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB); - } else { - const MachineOperand &RO = Cond[1]; - unsigned Flags = getUndefRegState(RO.isUndef()); - BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB); - } - BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); + return 0; +} - return 2; + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerd_io: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerb_io: + if (MI->getOperand(2).isFI() && + MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { + FrameIndex = MI->getOperand(0).getIndex(); + return MI->getOperand(2).getReg(); + } + break; + } + return 0; } @@ -269,9 +289,6 @@ unsigned HexagonInstrInfo::InsertBranch( /// Cond[0] = Hexagon::CMPEQri_f_Jumpnv_t_V4 -- specific opcode /// Cond[1] = R /// Cond[2] = Imm -/// @note Related function is \fn findInstrPredicate which fills in -/// Cond. vector when a predicated instruction is passed to it. -/// We follow same protocol in that case too. /// bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, @@ -314,7 +331,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return false; --I; } - + bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump && I->getOperand(0).isMBB(); // Delete the J2_jump if it's equivalent to a fall-through. @@ -327,17 +344,17 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return false; --I; } - if (!isUnpredicatedTerminator(I)) + if (!isUnpredicatedTerminator(&*I)) return false; // Get the last instruction in the block. - MachineInstr *LastInst = I; + MachineInstr *LastInst = &*I; MachineInstr *SecondLastInst = nullptr; // Find one more terminator if present. - do { - if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(I)) { + for (;;) { + if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) { if (!SecondLastInst) - SecondLastInst = I; + SecondLastInst = &*I; else // This is a third branch. return true; @@ -345,7 +362,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, if (I == MBB.instr_begin()) break; --I; - } while(I); + } int LastOpcode = LastInst->getOpcode(); int SecLastOpcode = SecondLastInst ? SecondLastInst->getOpcode() : 0; @@ -418,7 +435,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // executed, so remove it. if (SecLastOpcode == Hexagon::J2_jump && LastOpcode == Hexagon::J2_jump) { TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; + I = LastInst->getIterator(); if (AllowModify) I->eraseFromParent(); return false; @@ -438,6 +455,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return true; } + unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber()); MachineBasicBlock::iterator I = MBB.end(); @@ -458,13 +476,818 @@ unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return Count; } + +unsigned HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, MachineBasicBlock *FBB, + ArrayRef Cond, DebugLoc DL) const { + unsigned BOpc = Hexagon::J2_jump; + unsigned BccOpc = Hexagon::J2_jumpt; + assert(validateBranchCond(Cond) && "Invalid branching condition"); + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + + // Check if ReverseBranchCondition has asked to reverse this branch + // If we want to reverse the branch an odd number of times, we want + // J2_jumpf. + if (!Cond.empty() && Cond[0].isImm()) + BccOpc = Cond[0].getImm(); + + if (!FBB) { + if (Cond.empty()) { + // Due to a bug in TailMerging/CFG Optimization, we need to add a + // special case handling of a predicated jump followed by an + // unconditional jump. If not, Tail Merging and CFG Optimization go + // into an infinite loop. + MachineBasicBlock *NewTBB, *NewFBB; + SmallVector Cond; + MachineInstr *Term = MBB.getFirstTerminator(); + if (Term != MBB.end() && isPredicated(Term) && + !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) { + MachineBasicBlock *NextBB = &*++MBB.getIterator(); + if (NewTBB == NextBB) { + ReverseBranchCondition(Cond); + RemoveBranch(MBB); + return InsertBranch(MBB, TBB, nullptr, Cond, DL); + } + } + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); + } else if (isEndLoopN(Cond[0].getImm())) { + int EndLoopOp = Cond[0].getImm(); + assert(Cond[1].isMBB()); + // Since we're adding an ENDLOOP, there better be a LOOP instruction. + // Check for it, and change the BB target if needed. + SmallPtrSet VisitedBBs; + MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs); + assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP"); + Loop->getOperand(0).setMBB(TBB); + // Add the ENDLOOP after the finding the LOOP0. + BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB); + } else if (isNewValueJump(Cond[0].getImm())) { + assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump"); + // New value jump + // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset) + // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset) + unsigned Flags1 = getUndefRegState(Cond[1].isUndef()); + DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber();); + if (Cond[2].isReg()) { + unsigned Flags2 = getUndefRegState(Cond[2].isUndef()); + BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1). + addReg(Cond[2].getReg(), Flags2).addMBB(TBB); + } else if(Cond[2].isImm()) { + BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1). + addImm(Cond[2].getImm()).addMBB(TBB); + } else + llvm_unreachable("Invalid condition for branching"); + } else { + assert((Cond.size() == 2) && "Malformed cond vector"); + const MachineOperand &RO = Cond[1]; + unsigned Flags = getUndefRegState(RO.isUndef()); + BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB); + } + return 1; + } + assert((!Cond.empty()) && + "Cond. cannot be empty when multiple branchings are required"); + assert((!isNewValueJump(Cond[0].getImm())) && + "NV-jump cannot be inserted with another branch"); + // Special case for hardware loops. The condition is a basic block. + if (isEndLoopN(Cond[0].getImm())) { + int EndLoopOp = Cond[0].getImm(); + assert(Cond[1].isMBB()); + // Since we're adding an ENDLOOP, there better be a LOOP instruction. + // Check for it, and change the BB target if needed. + SmallPtrSet VisitedBBs; + MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs); + assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP"); + Loop->getOperand(0).setMBB(TBB); + // Add the ENDLOOP after the finding the LOOP0. + BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB); + } else { + const MachineOperand &RO = Cond[1]; + unsigned Flags = getUndefRegState(RO.isUndef()); + BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB); + } + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); + + return 2; +} + + +bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, unsigned ExtraPredCycles, + BranchProbability Probability) const { + return nonDbgBBSize(&MBB) <= 3; +} + + +bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB, + unsigned NumFCycles, unsigned ExtraFCycles, BranchProbability Probability) + const { + return nonDbgBBSize(&TMBB) <= 3 && nonDbgBBSize(&FMBB) <= 3; +} + + +bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, + unsigned NumInstrs, BranchProbability Probability) const { + return NumInstrs <= 4; +} + + +void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { + auto &HRI = getRegisterInfo(); + if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), DestReg).addReg(SrcReg); + return; + } + if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::A2_tfrp), DestReg).addReg(SrcReg); + return; + } + if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) { + // Map Pd = Ps to Pd = or(Ps, Ps). + BuildMI(MBB, I, DL, get(Hexagon::C2_or), + DestReg).addReg(SrcReg).addReg(SrcReg); + return; + } + if (Hexagon::DoubleRegsRegClass.contains(DestReg) && + Hexagon::IntRegsRegClass.contains(SrcReg)) { + // We can have an overlap between single and double reg: r1:0 = r0. + if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) { + // r1:0 = r0 + BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg, + Hexagon::subreg_hireg))).addImm(0); + } else { + // r1:0 = r1 or no overlap. + BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), (RI.getSubReg(DestReg, + Hexagon::subreg_loreg))).addReg(SrcReg); + BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg, + Hexagon::subreg_hireg))).addImm(0); + } + return; + } + if (Hexagon::CtrRegsRegClass.contains(DestReg) && + Hexagon::IntRegsRegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(Hexagon::A2_tfrrcr), DestReg).addReg(SrcReg); + return; + } + if (Hexagon::PredRegsRegClass.contains(SrcReg) && + Hexagon::IntRegsRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (Hexagon::IntRegsRegClass.contains(SrcReg) && + Hexagon::PredRegsRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::C2_tfrrp), DestReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (Hexagon::PredRegsRegClass.contains(SrcReg) && + Hexagon::IntRegsRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (Hexagon::VectorRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::V6_vassign), DestReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (Hexagon::VecDblRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::V6_vcombine), DestReg). + addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg), + getKillRegState(KillSrc)). + addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg), + getKillRegState(KillSrc)); + return; + } + if (Hexagon::VecPredRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), DestReg). + addReg(SrcReg). + addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (Hexagon::VecPredRegsRegClass.contains(SrcReg) && + Hexagon::VectorRegsRegClass.contains(DestReg)) { + llvm_unreachable("Unimplemented pred to vec"); + return; + } + if (Hexagon::VecPredRegsRegClass.contains(DestReg) && + Hexagon::VectorRegsRegClass.contains(SrcReg)) { + llvm_unreachable("Unimplemented vec to pred"); + return; + } + if (Hexagon::VecPredRegs128BRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), + HRI.getSubReg(DestReg, Hexagon::subreg_hireg)). + addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg), + getKillRegState(KillSrc)); + BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), + HRI.getSubReg(DestReg, Hexagon::subreg_loreg)). + addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg), + getKillRegState(KillSrc)); + return; + } + +#ifndef NDEBUG + // Show the invalid registers to ease debugging. + dbgs() << "Invalid registers for copy in BB#" << MBB.getNumber() + << ": " << PrintReg(DestReg, &HRI) + << " = " << PrintReg(SrcReg, &HRI) << '\n'; +#endif + llvm_unreachable("Unimplemented"); +} + + +void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(I); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); + + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), Align); + + if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::S2_storeri_io)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + } else if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::S2_storerd_io)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + } else if (Hexagon::PredRegsRegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::STriw_pred)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + } else { + llvm_unreachable("Unimplemented"); + } +} + + +void HexagonInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, unsigned DestReg, int FI, + const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(I); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); + + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), Align); + if (RC == &Hexagon::IntRegsRegClass) { + BuildMI(MBB, I, DL, get(Hexagon::L2_loadri_io), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (RC == &Hexagon::DoubleRegsRegClass) { + BuildMI(MBB, I, DL, get(Hexagon::L2_loadrd_io), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (RC == &Hexagon::PredRegsRegClass) { + BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else { + llvm_unreachable("Can't store this register to stack slot"); + } +} + + +/// expandPostRAPseudo - This function is called for all pseudo instructions +/// that remain after register allocation. Many pseudo instructions are +/// created to help register allocation. This is the place to convert them +/// into real instructions. The target can edit MI in place, or it can insert +/// new instructions and erase MI. The function should return true if +/// anything was changed. +bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) + const { + const HexagonRegisterInfo &HRI = getRegisterInfo(); + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Opc = MI->getOpcode(); + const unsigned VecOffset = 1; + bool Is128B = false; + + switch (Opc) { + case Hexagon::ALIGNA: + BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI->getOperand(0).getReg()) + .addReg(HRI.getFrameRegister()) + .addImm(-MI->getOperand(1).getImm()); + MBB.erase(MI); + return true; + case Hexagon::HEXAGON_V6_vassignp_128B: + case Hexagon::HEXAGON_V6_vassignp: { + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + if (SrcReg != DstReg) + copyPhysReg(MBB, MI, DL, DstReg, SrcReg, MI->getOperand(1).isKill()); + MBB.erase(MI); + return true; + } + case Hexagon::HEXAGON_V6_lo_128B: + case Hexagon::HEXAGON_V6_lo: { + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg); + copyPhysReg(MBB, MI, DL, DstReg, SrcSubLo, MI->getOperand(1).isKill()); + MBB.erase(MI); + MRI.clearKillFlags(SrcSubLo); + return true; + } + case Hexagon::HEXAGON_V6_hi_128B: + case Hexagon::HEXAGON_V6_hi: { + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg); + copyPhysReg(MBB, MI, DL, DstReg, SrcSubHi, MI->getOperand(1).isKill()); + MBB.erase(MI); + MRI.clearKillFlags(SrcSubHi); + return true; + } + case Hexagon::STrivv_indexed_128B: + Is128B = true; + case Hexagon::STrivv_indexed: { + unsigned SrcReg = MI->getOperand(2).getReg(); + unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg); + unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg); + unsigned NewOpcd = Is128B ? Hexagon::V6_vS32b_ai_128B + : Hexagon::V6_vS32b_ai; + unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6; + MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpcd)) + .addOperand(MI->getOperand(0)) + .addImm(MI->getOperand(1).getImm()) + .addReg(SrcSubLo) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MI1New->getOperand(0).setIsKill(false); + BuildMI(MBB, MI, DL, get(NewOpcd)) + .addOperand(MI->getOperand(0)) + // The Vectors are indexed in multiples of vector size. + .addImm(MI->getOperand(1).getImm()+Offset) + .addReg(SrcSubHi) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MBB.erase(MI); + return true; + } + case Hexagon::LDrivv_pseudo_V6_128B: + case Hexagon::LDrivv_indexed_128B: + Is128B = true; + case Hexagon::LDrivv_pseudo_V6: + case Hexagon::LDrivv_indexed: { + unsigned NewOpcd = Is128B ? Hexagon::V6_vL32b_ai_128B + : Hexagon::V6_vL32b_ai; + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6; + MachineInstr *MI1New = + BuildMI(MBB, MI, DL, get(NewOpcd), + HRI.getSubReg(DstReg, Hexagon::subreg_loreg)) + .addOperand(MI->getOperand(1)) + .addImm(MI->getOperand(2).getImm()); + MI1New->getOperand(1).setIsKill(false); + BuildMI(MBB, MI, DL, get(NewOpcd), + HRI.getSubReg(DstReg, Hexagon::subreg_hireg)) + .addOperand(MI->getOperand(1)) + // The Vectors are indexed in multiples of vector size. + .addImm(MI->getOperand(2).getImm() + Offset) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MBB.erase(MI); + return true; + } + case Hexagon::LDriv_pseudo_V6_128B: + Is128B = true; + case Hexagon::LDriv_pseudo_V6: { + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned NewOpc = Is128B ? Hexagon::V6_vL32b_ai_128B + : Hexagon::V6_vL32b_ai; + int32_t Off = MI->getOperand(2).getImm(); + int32_t Idx = Off; + BuildMI(MBB, MI, DL, get(NewOpc), DstReg) + .addOperand(MI->getOperand(1)) + .addImm(Idx) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MBB.erase(MI); + return true; + } + case Hexagon::STriv_pseudo_V6_128B: + Is128B = true; + case Hexagon::STriv_pseudo_V6: { + unsigned NewOpc = Is128B ? Hexagon::V6_vS32b_ai_128B + : Hexagon::V6_vS32b_ai; + int32_t Off = MI->getOperand(1).getImm(); + int32_t Idx = Is128B ? (Off >> 7) : (Off >> 6); + BuildMI(MBB, MI, DL, get(NewOpc)) + .addOperand(MI->getOperand(0)) + .addImm(Idx) + .addOperand(MI->getOperand(2)) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MBB.erase(MI); + return true; + } + case Hexagon::TFR_PdTrue: { + unsigned Reg = MI->getOperand(0).getReg(); + BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + MBB.erase(MI); + return true; + } + case Hexagon::TFR_PdFalse: { + unsigned Reg = MI->getOperand(0).getReg(); + BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + MBB.erase(MI); + return true; + } + case Hexagon::VMULW: { + // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies. + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned Src1Reg = MI->getOperand(1).getReg(); + unsigned Src2Reg = MI->getOperand(2).getReg(); + unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); + unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); + unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); + unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi), + HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) + .addReg(Src2SubHi); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi), + HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) + .addReg(Src2SubLo); + MBB.erase(MI); + MRI.clearKillFlags(Src1SubHi); + MRI.clearKillFlags(Src1SubLo); + MRI.clearKillFlags(Src2SubHi); + MRI.clearKillFlags(Src2SubLo); + return true; + } + case Hexagon::VMULW_ACC: { + // Expand 64-bit vector multiply with addition into 2 scalar multiplies. + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned Src1Reg = MI->getOperand(1).getReg(); + unsigned Src2Reg = MI->getOperand(2).getReg(); + unsigned Src3Reg = MI->getOperand(3).getReg(); + unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); + unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); + unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); + unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); + unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::subreg_hireg); + unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::subreg_loreg); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci), + HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) + .addReg(Src2SubHi).addReg(Src3SubHi); + BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci), + HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) + .addReg(Src2SubLo).addReg(Src3SubLo); + MBB.erase(MI); + MRI.clearKillFlags(Src1SubHi); + MRI.clearKillFlags(Src1SubLo); + MRI.clearKillFlags(Src2SubHi); + MRI.clearKillFlags(Src2SubLo); + MRI.clearKillFlags(Src3SubHi); + MRI.clearKillFlags(Src3SubLo); + return true; + } + case Hexagon::MUX64_rr: { + const MachineOperand &Op0 = MI->getOperand(0); + const MachineOperand &Op1 = MI->getOperand(1); + const MachineOperand &Op2 = MI->getOperand(2); + const MachineOperand &Op3 = MI->getOperand(3); + unsigned Rd = Op0.getReg(); + unsigned Pu = Op1.getReg(); + unsigned Rs = Op2.getReg(); + unsigned Rt = Op3.getReg(); + DebugLoc DL = MI->getDebugLoc(); + unsigned K1 = getKillRegState(Op1.isKill()); + unsigned K2 = getKillRegState(Op2.isKill()); + unsigned K3 = getKillRegState(Op3.isKill()); + if (Rd != Rs) + BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpt), Rd) + .addReg(Pu, (Rd == Rt) ? K1 : 0) + .addReg(Rs, K2); + if (Rd != Rt) + BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpf), Rd) + .addReg(Pu, K1) + .addReg(Rt, K3); + MBB.erase(MI); + return true; + } + case Hexagon::TCRETURNi: + MI->setDesc(get(Hexagon::J2_jump)); + return true; + case Hexagon::TCRETURNr: + MI->setDesc(get(Hexagon::J2_jumpr)); + return true; + case Hexagon::TFRI_f: + case Hexagon::TFRI_cPt_f: + case Hexagon::TFRI_cNotPt_f: { + unsigned Opx = (Opc == Hexagon::TFRI_f) ? 1 : 2; + APFloat FVal = MI->getOperand(Opx).getFPImm()->getValueAPF(); + APInt IVal = FVal.bitcastToAPInt(); + MI->RemoveOperand(Opx); + unsigned NewOpc = (Opc == Hexagon::TFRI_f) ? Hexagon::A2_tfrsi : + (Opc == Hexagon::TFRI_cPt_f) ? Hexagon::C2_cmoveit : + Hexagon::C2_cmoveif; + MI->setDesc(get(NewOpc)); + MI->addOperand(MachineOperand::CreateImm(IVal.getZExtValue())); + return true; + } + } + + return false; +} + + +// We indicate that we want to reverse the branch by +// inserting the reversed branching opcode. +bool HexagonInstrInfo::ReverseBranchCondition( + SmallVectorImpl &Cond) const { + if (Cond.empty()) + return true; + assert(Cond[0].isImm() && "First entry in the cond vector not imm-val"); + unsigned opcode = Cond[0].getImm(); + //unsigned temp; + assert(get(opcode).isBranch() && "Should be a branching condition."); + if (isEndLoopN(opcode)) + return true; + unsigned NewOpcode = getInvertedPredicatedOpcode(opcode); + Cond[0].setImm(NewOpcode); + return false; +} + + +void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + DebugLoc DL; + BuildMI(MBB, MI, DL, get(Hexagon::A2_nop)); +} + + +// Returns true if an instruction is predicated irrespective of the predicate +// sense. For example, all of the following will return true. +// if (p0) R1 = add(R2, R3) +// if (!p0) R1 = add(R2, R3) +// if (p0.new) R1 = add(R2, R3) +// if (!p0.new) R1 = add(R2, R3) +// Note: New-value stores are not included here as in the current +// implementation, we don't need to check their predicate sense. +bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask; +} + + +bool HexagonInstrInfo::PredicateInstruction(MachineInstr *MI, + ArrayRef Cond) const { + if (Cond.empty() || isNewValueJump(Cond[0].getImm()) || + isEndLoopN(Cond[0].getImm())) { + DEBUG(dbgs() << "\nCannot predicate:"; MI->dump();); + return false; + } + int Opc = MI->getOpcode(); + assert (isPredicable(MI) && "Expected predicable instruction"); + bool invertJump = predOpcodeHasNot(Cond); + + // We have to predicate MI "in place", i.e. after this function returns, + // MI will need to be transformed into a predicated form. To avoid com- + // plicated manipulations with the operands (handling tied operands, + // etc.), build a new temporary instruction, then overwrite MI with it. + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned PredOpc = getCondOpcode(Opc, invertJump); + MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc)); + unsigned NOp = 0, NumOps = MI->getNumOperands(); + while (NOp < NumOps) { + MachineOperand &Op = MI->getOperand(NOp); + if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) + break; + T.addOperand(Op); + NOp++; + } + + unsigned PredReg, PredRegPos, PredRegFlags; + bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags); + (void)GotPredReg; + assert(GotPredReg); + T.addReg(PredReg, PredRegFlags); + while (NOp < NumOps) + T.addOperand(MI->getOperand(NOp++)); + + MI->setDesc(get(PredOpc)); + while (unsigned n = MI->getNumOperands()) + MI->RemoveOperand(n-1); + for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i) + MI->addOperand(T->getOperand(i)); + + MachineBasicBlock::instr_iterator TI = T->getIterator(); + B.erase(TI); + + MachineRegisterInfo &MRI = B.getParent()->getRegInfo(); + MRI.clearKillFlags(PredReg); + return true; +} + + +bool HexagonInstrInfo::SubsumesPredicate(ArrayRef Pred1, + ArrayRef Pred2) const { + // TODO: Fix this + return false; +} + + +bool HexagonInstrInfo::DefinesPredicate(MachineInstr *MI, + std::vector &Pred) const { + auto &HRI = getRegisterInfo(); + for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) { + MachineOperand MO = MI->getOperand(oper); + if (MO.isReg() && MO.isDef()) { + const TargetRegisterClass* RC = HRI.getMinimalPhysRegClass(MO.getReg()); + if (RC == &Hexagon::PredRegsRegClass) { + Pred.push_back(MO); + return true; + } + } + } + return false; +} + +bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { + bool isPred = MI->getDesc().isPredicable(); + + if (!isPred) + return false; + + const int Opc = MI->getOpcode(); + int NumOperands = MI->getNumOperands(); + + // Keep a flag for upto 4 operands in the instructions, to indicate if + // that operand has been constant extended. + bool OpCExtended[4]; + if (NumOperands > 4) + NumOperands = 4; + + for (int i = 0; i < NumOperands; i++) + OpCExtended[i] = (isOperandExtended(MI, i) && isConstExtended(MI)); + + switch(Opc) { + case Hexagon::A2_tfrsi: + return (isOperandExtended(MI, 1) && isConstExtended(MI)) || + isInt<12>(MI->getOperand(1).getImm()); + + case Hexagon::S2_storerd_io: + return isShiftedUInt<6,3>(MI->getOperand(1).getImm()); + + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerinew_io: + return isShiftedUInt<6,2>(MI->getOperand(1).getImm()); + + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerhnew_io: + return isShiftedUInt<6,1>(MI->getOperand(1).getImm()); + + case Hexagon::S2_storerb_io: + case Hexagon::S2_storerbnew_io: + return isUInt<6>(MI->getOperand(1).getImm()); + + case Hexagon::L2_loadrd_io: + return isShiftedUInt<6,3>(MI->getOperand(2).getImm()); + + case Hexagon::L2_loadri_io: + return isShiftedUInt<6,2>(MI->getOperand(2).getImm()); + + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadruh_io: + return isShiftedUInt<6,1>(MI->getOperand(2).getImm()); + + case Hexagon::L2_loadrb_io: + case Hexagon::L2_loadrub_io: + return isUInt<6>(MI->getOperand(2).getImm()); + + case Hexagon::L2_loadrd_pi: + return isShiftedInt<4,3>(MI->getOperand(3).getImm()); + + case Hexagon::L2_loadri_pi: + return isShiftedInt<4,2>(MI->getOperand(3).getImm()); + + case Hexagon::L2_loadrh_pi: + case Hexagon::L2_loadruh_pi: + return isShiftedInt<4,1>(MI->getOperand(3).getImm()); + + case Hexagon::L2_loadrb_pi: + case Hexagon::L2_loadrub_pi: + return isInt<4>(MI->getOperand(3).getImm()); + + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeiri_io: + return (OpCExtended[1] || isUInt<6>(MI->getOperand(1).getImm())) && + (OpCExtended[2] || isInt<6>(MI->getOperand(2).getImm())); + + case Hexagon::A2_addi: + return isInt<8>(MI->getOperand(2).getImm()); + + case Hexagon::A2_aslh: + case Hexagon::A2_asrh: + case Hexagon::A2_sxtb: + case Hexagon::A2_sxth: + case Hexagon::A2_zxtb: + case Hexagon::A2_zxth: + return true; + } + + return true; +} + + +bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, const MachineFunction &MF) const { + // Debug info is never a scheduling boundary. It's necessary to be explicit + // due to the special treatment of IT instructions below, otherwise a + // dbg_value followed by an IT will result in the IT instruction being + // considered a scheduling hazard, which is wrong. It should be the actual + // instruction preceding the dbg_value instruction(s), just like it is + // when debug info is not present. + if (MI->isDebugValue()) + return false; + + // Throwing call is a boundary. + if (MI->isCall()) { + // If any of the block's successors is a landing pad, this could be a + // throwing call. + for (auto I : MBB->successors()) + if (I->isEHPad()) + return true; + } + + // Don't mess around with no return calls. + if (MI->getOpcode() == Hexagon::CALLv3nr) + return true; + + // Terminators and labels can't be scheduled around. + if (MI->getDesc().isTerminator() || MI->isPosition()) + return true; + + if (MI->isInlineAsm() && !ScheduleInlineAsm) + return true; + + return false; +} + + +/// Measure the specified inline asm to determine an approximation of its +/// length. +/// Comments (which run till the next SeparatorString or newline) do not +/// count as an instruction. +/// Any other non-whitespace text is considered an instruction, with +/// multiple instructions separated by SeparatorString or newlines. +/// Variable-length instructions are not handled here; this function +/// may be overloaded in the target code to do that. +/// Hexagon counts the number of ##'s and adjust for that many +/// constant exenders. +unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str, + const MCAsmInfo &MAI) const { + StringRef AStr(Str); + // Count the number of instructions in the asm. + bool atInsnStart = true; + unsigned Length = 0; + for (; *Str; ++Str) { + if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), + strlen(MAI.getSeparatorString())) == 0) + atInsnStart = true; + if (atInsnStart && !std::isspace(static_cast(*Str))) { + Length += MAI.getMaxInstLength(); + atInsnStart = false; + } + if (atInsnStart && strncmp(Str, MAI.getCommentString(), + strlen(MAI.getCommentString())) == 0) + atInsnStart = false; + } + + // Add to size number of constant extenders seen * 4. + StringRef Occ("##"); + Length += AStr.count(Occ)*4; + return Length; +} + + +ScheduleHazardRecognizer* +HexagonInstrInfo::CreateTargetPostRAHazardRecognizer( + const InstrItineraryData *II, const ScheduleDAG *DAG) const { + return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); +} + + /// \brief For a comparison instruction, return the source registers in /// \p SrcReg and \p SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction /// can be analyzed. bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, unsigned &SrcReg2, - int &Mask, int &Value) const { + unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const { unsigned Opc = MI->getOpcode(); // Set mask and the first source register. @@ -548,247 +1371,66 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, } -void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { - if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) { - BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), DestReg).addReg(SrcReg); - return; - } - if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) { - BuildMI(MBB, I, DL, get(Hexagon::A2_tfrp), DestReg).addReg(SrcReg); - return; - } - if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) { - // Map Pd = Ps to Pd = or(Ps, Ps). - BuildMI(MBB, I, DL, get(Hexagon::C2_or), - DestReg).addReg(SrcReg).addReg(SrcReg); - return; - } - if (Hexagon::DoubleRegsRegClass.contains(DestReg) && - Hexagon::IntRegsRegClass.contains(SrcReg)) { - // We can have an overlap between single and double reg: r1:0 = r0. - if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) { - // r1:0 = r0 - BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg, - Hexagon::subreg_hireg))).addImm(0); - } else { - // r1:0 = r1 or no overlap. - BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), (RI.getSubReg(DestReg, - Hexagon::subreg_loreg))).addReg(SrcReg); - BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg, - Hexagon::subreg_hireg))).addImm(0); - } - return; - } - if (Hexagon::CtrRegsRegClass.contains(DestReg) && - Hexagon::IntRegsRegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(Hexagon::A2_tfrrcr), DestReg).addReg(SrcReg); - return; - } - if (Hexagon::PredRegsRegClass.contains(SrcReg) && - Hexagon::IntRegsRegClass.contains(DestReg)) { - BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg). - addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - if (Hexagon::IntRegsRegClass.contains(SrcReg) && - Hexagon::PredRegsRegClass.contains(DestReg)) { - BuildMI(MBB, I, DL, get(Hexagon::C2_tfrrp), DestReg). - addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - - llvm_unreachable("Unimplemented"); +unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, unsigned *PredCost) const { + return getInstrTimingClassLatency(ItinData, MI); } -void HexagonInstrInfo:: -storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - - DebugLoc DL = MBB.findDebugLoc(I); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - unsigned Align = MFI.getObjectAlignment(FI); - - MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), - MachineMemOperand::MOStore, - MFI.getObjectSize(FI), - Align); - - if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) { - BuildMI(MBB, I, DL, get(Hexagon::S2_storeri_io)) - .addFrameIndex(FI).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); - } else if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC)) { - BuildMI(MBB, I, DL, get(Hexagon::S2_storerd_io)) - .addFrameIndex(FI).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); - } else if (Hexagon::PredRegsRegClass.hasSubClassEq(RC)) { - BuildMI(MBB, I, DL, get(Hexagon::STriw_pred)) - .addFrameIndex(FI).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); - } else { - llvm_unreachable("Unimplemented"); - } +DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState( + const TargetSubtargetInfo &STI) const { + const InstrItineraryData *II = STI.getInstrItineraryData(); + return static_cast(STI).createDFAPacketizer(II); } -void HexagonInstrInfo::storeRegToAddr( - MachineFunction &MF, unsigned SrcReg, - bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const -{ - llvm_unreachable("Unimplemented"); -} +// Inspired by this pair: +// %R13 = L2_loadri_io %R29, 136; mem:LD4[FixedStack0] +// S2_storeri_io %R29, 132, %R1; flags: mem:ST4[FixedStack1] +// Currently AA considers the addresses in these instructions to be aliasing. +bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, + MachineInstr *MIb, AliasAnalysis *AA) const { + int OffsetA = 0, OffsetB = 0; + unsigned SizeA = 0, SizeB = 0; + if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() || + MIa->hasOrderedMemoryRef() || MIa->hasOrderedMemoryRef()) + return false; -void HexagonInstrInfo:: -loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - DebugLoc DL = MBB.findDebugLoc(I); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - unsigned Align = MFI.getObjectAlignment(FI); + // Instructions that are pure loads, not loads and stores like memops are not + // dependent. + if (MIa->mayLoad() && !isMemOp(MIa) && MIb->mayLoad() && !isMemOp(MIb)) + return true; - MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), - MachineMemOperand::MOLoad, - MFI.getObjectSize(FI), - Align); - if (RC == &Hexagon::IntRegsRegClass) { - BuildMI(MBB, I, DL, get(Hexagon::L2_loadri_io), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); - } else if (RC == &Hexagon::DoubleRegsRegClass) { - BuildMI(MBB, I, DL, get(Hexagon::L2_loadrd_io), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); - } else if (RC == &Hexagon::PredRegsRegClass) { - BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); - } else { - llvm_unreachable("Can't store this register to stack slot"); - } -} + // Get base, offset, and access size in MIa. + unsigned BaseRegA = getBaseAndOffset(MIa, OffsetA, SizeA); + if (!BaseRegA || !SizeA) + return false; + // Get base, offset, and access size in MIb. + unsigned BaseRegB = getBaseAndOffset(MIb, OffsetB, SizeB); + if (!BaseRegB || !SizeB) + return false; -void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const { - llvm_unreachable("Unimplemented"); -} -bool -HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { - const HexagonRegisterInfo &TRI = getRegisterInfo(); - MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); - MachineBasicBlock &MBB = *MI->getParent(); - DebugLoc DL = MI->getDebugLoc(); - unsigned Opc = MI->getOpcode(); + if (BaseRegA != BaseRegB) + return false; - switch (Opc) { - case Hexagon::ALIGNA: - BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI->getOperand(0).getReg()) - .addReg(TRI.getFrameRegister()) - .addImm(-MI->getOperand(1).getImm()); - MBB.erase(MI); - return true; - case Hexagon::TFR_PdTrue: { - unsigned Reg = MI->getOperand(0).getReg(); - BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg) - .addReg(Reg, RegState::Undef) - .addReg(Reg, RegState::Undef); - MBB.erase(MI); - return true; - } - case Hexagon::TFR_PdFalse: { - unsigned Reg = MI->getOperand(0).getReg(); - BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg) - .addReg(Reg, RegState::Undef) - .addReg(Reg, RegState::Undef); - MBB.erase(MI); - return true; - } - case Hexagon::VMULW: { - // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies. - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned Src1Reg = MI->getOperand(1).getReg(); - unsigned Src2Reg = MI->getOperand(2).getReg(); - unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); - unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); - unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); - unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); - BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi), - TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) - .addReg(Src2SubHi); - BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi), - TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) - .addReg(Src2SubLo); - MBB.erase(MI); - MRI.clearKillFlags(Src1SubHi); - MRI.clearKillFlags(Src1SubLo); - MRI.clearKillFlags(Src2SubHi); - MRI.clearKillFlags(Src2SubLo); - return true; - } - case Hexagon::VMULW_ACC: { - // Expand 64-bit vector multiply with addition into 2 scalar multiplies. - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned Src1Reg = MI->getOperand(1).getReg(); - unsigned Src2Reg = MI->getOperand(2).getReg(); - unsigned Src3Reg = MI->getOperand(3).getReg(); - unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg); - unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg); - unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg); - unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg); - unsigned Src3SubHi = TRI.getSubReg(Src3Reg, Hexagon::subreg_hireg); - unsigned Src3SubLo = TRI.getSubReg(Src3Reg, Hexagon::subreg_loreg); - BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci), - TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi) - .addReg(Src2SubHi).addReg(Src3SubHi); - BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci), - TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo) - .addReg(Src2SubLo).addReg(Src3SubLo); - MBB.erase(MI); - MRI.clearKillFlags(Src1SubHi); - MRI.clearKillFlags(Src1SubLo); - MRI.clearKillFlags(Src2SubHi); - MRI.clearKillFlags(Src2SubLo); - MRI.clearKillFlags(Src3SubHi); - MRI.clearKillFlags(Src3SubLo); - return true; - } - case Hexagon::TCRETURNi: - MI->setDesc(get(Hexagon::J2_jump)); - return true; - case Hexagon::TCRETURNr: - MI->setDesc(get(Hexagon::J2_jumpr)); - return true; + // This is a mem access with the same base register and known offsets from it. + // Reason about it. + if (OffsetA > OffsetB) { + uint64_t offDiff = (uint64_t)((int64_t)OffsetA - (int64_t)OffsetB); + return (SizeB <= offDiff); + } else if (OffsetA < OffsetB) { + uint64_t offDiff = (uint64_t)((int64_t)OffsetB - (int64_t)OffsetA); + return (SizeA <= offDiff); } return false; } -MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl( - MachineFunction &MF, MachineInstr *MI, ArrayRef Ops, - MachineBasicBlock::iterator InsertPt, int FI) const { - // Hexagon_TODO: Implement. - return nullptr; -} unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { - - MachineRegisterInfo &RegInfo = MF->getRegInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetRegisterClass *TRC; if (VT == MVT::i1) { TRC = &Hexagon::PredRegsRegClass; @@ -800,629 +1442,74 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { llvm_unreachable("Cannot handle this register class"); } - unsigned NewReg = RegInfo.createVirtualRegister(TRC); + unsigned NewReg = MRI.createVirtualRegister(TRC); return NewReg; } -bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const { - const MCInstrDesc &MID = MI->getDesc(); - const uint64_t F = MID.TSFlags; - if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask) - return true; - // TODO: This is largely obsolete now. Will need to be removed - // in consecutive patches. - switch(MI->getOpcode()) { - // TFR_FI Remains a special case. - case Hexagon::TFR_FI: - return true; - default: - return false; - } - return false; +bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr* MI) const { + return (getAddrMode(MI) == HexagonII::AbsoluteSet); } -// This returns true in two cases: -// - The OP code itself indicates that this is an extended instruction. -// - One of MOs has been marked with HMOTF_ConstExtended flag. -bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const { - // First check if this is permanently extended op code. + +bool HexagonInstrInfo::isAccumulator(const MachineInstr *MI) const { const uint64_t F = MI->getDesc().TSFlags; - if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask) - return true; - // Use MO operand flags to determine if one of MI's operands - // has HMOTF_ConstExtended flag set. - for (MachineInstr::const_mop_iterator I = MI->operands_begin(), - E = MI->operands_end(); I != E; ++I) { - if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended) - return true; - } - return false; + return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask); } -bool HexagonInstrInfo::isBranch (const MachineInstr *MI) const { - return MI->getDesc().isBranch(); -} -bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const { - if (isNewValueJump(MI)) - return true; +bool HexagonInstrInfo::isComplex(const MachineInstr *MI) const { + const MachineFunction *MF = MI->getParent()->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - if (isNewValueStore(MI)) + if (!(isTC1(MI)) + && !(QII->isTC2Early(MI)) + && !(MI->getDesc().mayLoad()) + && !(MI->getDesc().mayStore()) + && (MI->getDesc().getOpcode() != Hexagon::S2_allocframe) + && (MI->getDesc().getOpcode() != Hexagon::L2_deallocframe) + && !(QII->isMemOp(MI)) + && !(MI->isBranch()) + && !(MI->isReturn()) + && !MI->isCall()) return true; return false; } -bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const { - const uint64_t F = MI->getDesc().TSFlags; - return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); -} -bool HexagonInstrInfo::isNewValue(Opcode_t Opcode) const { - const uint64_t F = get(Opcode).TSFlags; - return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); -} - -bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const { - return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4; -} - -bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { - bool isPred = MI->getDesc().isPredicable(); - - if (!isPred) - return false; - - const int Opc = MI->getOpcode(); - - switch(Opc) { - case Hexagon::A2_tfrsi: - return (isOperandExtended(MI, 1) && isConstExtended(MI)) || isInt<12>(MI->getOperand(1).getImm()); - - case Hexagon::S2_storerd_io: - return isShiftedUInt<6,3>(MI->getOperand(1).getImm()); - - case Hexagon::S2_storeri_io: - case Hexagon::S2_storerinew_io: - return isShiftedUInt<6,2>(MI->getOperand(1).getImm()); - - case Hexagon::S2_storerh_io: - case Hexagon::S2_storerhnew_io: - return isShiftedUInt<6,1>(MI->getOperand(1).getImm()); - - case Hexagon::S2_storerb_io: - case Hexagon::S2_storerbnew_io: - return isUInt<6>(MI->getOperand(1).getImm()); - - case Hexagon::L2_loadrd_io: - return isShiftedUInt<6,3>(MI->getOperand(2).getImm()); - - case Hexagon::L2_loadri_io: - return isShiftedUInt<6,2>(MI->getOperand(2).getImm()); - - case Hexagon::L2_loadrh_io: - case Hexagon::L2_loadruh_io: - return isShiftedUInt<6,1>(MI->getOperand(2).getImm()); - - case Hexagon::L2_loadrb_io: - case Hexagon::L2_loadrub_io: - return isUInt<6>(MI->getOperand(2).getImm()); - - case Hexagon::L2_loadrd_pi: - return isShiftedInt<4,3>(MI->getOperand(3).getImm()); - - case Hexagon::L2_loadri_pi: - return isShiftedInt<4,2>(MI->getOperand(3).getImm()); - - case Hexagon::L2_loadrh_pi: - case Hexagon::L2_loadruh_pi: - return isShiftedInt<4,1>(MI->getOperand(3).getImm()); - - case Hexagon::L2_loadrb_pi: - case Hexagon::L2_loadrub_pi: - return isInt<4>(MI->getOperand(3).getImm()); - - case Hexagon::S4_storeirb_io: - case Hexagon::S4_storeirh_io: - case Hexagon::S4_storeiri_io: - return (isUInt<6>(MI->getOperand(1).getImm()) && - isInt<6>(MI->getOperand(2).getImm())); - - case Hexagon::A2_addi: - return isInt<8>(MI->getOperand(2).getImm()); - - case Hexagon::A2_aslh: - case Hexagon::A2_asrh: - case Hexagon::A2_sxtb: - case Hexagon::A2_sxth: - case Hexagon::A2_zxtb: - case Hexagon::A2_zxth: - return true; - } - - return true; -} - -// This function performs the following inversiones: -// -// cPt ---> cNotPt -// cNotPt ---> cPt -// -unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { - int InvPredOpcode; - InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc) - : Hexagon::getTruePredOpcode(Opc); - if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate. - return InvPredOpcode; - - switch(Opc) { - default: llvm_unreachable("Unexpected predicated instruction"); - case Hexagon::C2_ccombinewt: - return Hexagon::C2_ccombinewf; - case Hexagon::C2_ccombinewf: - return Hexagon::C2_ccombinewt; - - // Dealloc_return. - case Hexagon::L4_return_t: - return Hexagon::L4_return_f; - case Hexagon::L4_return_f: - return Hexagon::L4_return_t; - } -} - -// New Value Store instructions. -bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; - - return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask); -} - -bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const { - const uint64_t F = get(Opcode).TSFlags; - - return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask); -} - -int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { - enum Hexagon::PredSense inPredSense; - inPredSense = invertPredicate ? Hexagon::PredSense_false : - Hexagon::PredSense_true; - int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense); - if (CondOpcode >= 0) // Valid Conditional opcode/instruction - return CondOpcode; - - // This switch case will be removed once all the instructions have been - // modified to use relation maps. - switch(Opc) { - case Hexagon::TFRI_f: - return !invertPredicate ? Hexagon::TFRI_cPt_f : - Hexagon::TFRI_cNotPt_f; - case Hexagon::A2_combinew: - return !invertPredicate ? Hexagon::C2_ccombinewt : - Hexagon::C2_ccombinewf; - - // DEALLOC_RETURN. - case Hexagon::L4_return: - return !invertPredicate ? Hexagon::L4_return_t: - Hexagon::L4_return_f; - } - llvm_unreachable("Unexpected predicable instruction"); +// Return true if the instruction is a compund branch instruction. +bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr *MI) const { + return (getType(MI) == HexagonII::TypeCOMPOUND && MI->isBranch()); } -bool HexagonInstrInfo:: -PredicateInstruction(MachineInstr *MI, - ArrayRef Cond) const { - if (Cond.empty() || isEndLoopN(Cond[0].getImm())) { - DEBUG(dbgs() << "\nCannot predicate:"; MI->dump();); - return false; - } - int Opc = MI->getOpcode(); - assert (isPredicable(MI) && "Expected predicable instruction"); - bool invertJump = predOpcodeHasNot(Cond); - - // We have to predicate MI "in place", i.e. after this function returns, - // MI will need to be transformed into a predicated form. To avoid com- - // plicated manipulations with the operands (handling tied operands, - // etc.), build a new temporary instruction, then overwrite MI with it. - - MachineBasicBlock &B = *MI->getParent(); - DebugLoc DL = MI->getDebugLoc(); - unsigned PredOpc = getCondOpcode(Opc, invertJump); - MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc)); - unsigned NOp = 0, NumOps = MI->getNumOperands(); - while (NOp < NumOps) { - MachineOperand &Op = MI->getOperand(NOp); - if (!Op.isReg() || !Op.isDef() || Op.isImplicit()) - break; - T.addOperand(Op); - NOp++; - } - - unsigned PredReg, PredRegPos, PredRegFlags; - bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags); - (void)GotPredReg; - assert(GotPredReg); - T.addReg(PredReg, PredRegFlags); - while (NOp < NumOps) - T.addOperand(MI->getOperand(NOp++)); - - MI->setDesc(get(PredOpc)); - while (unsigned n = MI->getNumOperands()) - MI->RemoveOperand(n-1); - for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i) - MI->addOperand(T->getOperand(i)); - - MachineBasicBlock::instr_iterator TI = &*T; - B.erase(TI); - - MachineRegisterInfo &MRI = B.getParent()->getRegInfo(); - MRI.clearKillFlags(PredReg); - - return true; +bool HexagonInstrInfo::isCondInst(const MachineInstr *MI) const { + return (MI->isBranch() && isPredicated(MI)) || + isConditionalTransfer(MI) || + isConditionalALU32(MI) || + isConditionalLoad(MI) || + // Predicated stores which don't have a .new on any operands. + (MI->mayStore() && isPredicated(MI) && !isNewValueStore(MI) && + !isPredicatedNew(MI)); } -bool -HexagonInstrInfo:: -isProfitableToIfCvt(MachineBasicBlock &MBB, - unsigned NumCycles, - unsigned ExtraPredCycles, - const BranchProbability &Probability) const { - return true; -} - - -bool -HexagonInstrInfo:: -isProfitableToIfCvt(MachineBasicBlock &TMBB, - unsigned NumTCycles, - unsigned ExtraTCycles, - MachineBasicBlock &FMBB, - unsigned NumFCycles, - unsigned ExtraFCycles, - const BranchProbability &Probability) const { - return true; -} - -// Returns true if an instruction is predicated irrespective of the predicate -// sense. For example, all of the following will return true. -// if (p0) R1 = add(R2, R3) -// if (!p0) R1 = add(R2, R3) -// if (p0.new) R1 = add(R2, R3) -// if (!p0.new) R1 = add(R2, R3) -bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; - - return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); -} - -bool HexagonInstrInfo::isPredicated(unsigned Opcode) const { - const uint64_t F = get(Opcode).TSFlags; - - return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); -} - -bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; - - assert(isPredicated(MI)); - return (!((F >> HexagonII::PredicatedFalsePos) & - HexagonII::PredicatedFalseMask)); -} - -bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const { - const uint64_t F = get(Opcode).TSFlags; - - // Make sure that the instruction is predicated. - assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); - return (!((F >> HexagonII::PredicatedFalsePos) & - HexagonII::PredicatedFalseMask)); -} - -bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; - - assert(isPredicated(MI)); - return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask); -} - -bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const { - const uint64_t F = get(Opcode).TSFlags; - - assert(isPredicated(Opcode)); - return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask); -} - -// Returns true, if a ST insn can be promoted to a new-value store. -bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; - - return ((F >> HexagonII::mayNVStorePos) & - HexagonII::mayNVStoreMask); -} - -bool -HexagonInstrInfo::DefinesPredicate(MachineInstr *MI, - std::vector &Pred) const { - for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) { - MachineOperand MO = MI->getOperand(oper); - if (MO.isReg() && MO.isDef()) { - const TargetRegisterClass* RC = RI.getMinimalPhysRegClass(MO.getReg()); - if (RC == &Hexagon::PredRegsRegClass) { - Pred.push_back(MO); - return true; - } - } - } - return false; -} - - -bool -HexagonInstrInfo:: -SubsumesPredicate(ArrayRef Pred1, - ArrayRef Pred2) const { - // TODO: Fix this - return false; -} - - -// -// We indicate that we want to reverse the branch by -// inserting the reversed branching opcode. -// -bool HexagonInstrInfo::ReverseBranchCondition( - SmallVectorImpl &Cond) const { - if (Cond.empty()) - return true; - assert(Cond[0].isImm() && "First entry in the cond vector not imm-val"); - Opcode_t opcode = Cond[0].getImm(); - //unsigned temp; - assert(get(opcode).isBranch() && "Should be a branching condition."); - if (isEndLoopN(opcode)) - return true; - Opcode_t NewOpcode = getInvertedPredicatedOpcode(opcode); - Cond[0].setImm(NewOpcode); - return false; -} - - -bool HexagonInstrInfo:: -isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs, - const BranchProbability &Probability) const { - return (NumInstrs <= 4); -} - -bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const { +bool HexagonInstrInfo::isConditionalALU32(const MachineInstr* MI) const { switch (MI->getOpcode()) { - default: return false; - case Hexagon::L4_return: - case Hexagon::L4_return_t: - case Hexagon::L4_return_f: - case Hexagon::L4_return_tnew_pnt: - case Hexagon::L4_return_fnew_pnt: - case Hexagon::L4_return_tnew_pt: - case Hexagon::L4_return_fnew_pt: - return true; - } -} - - -bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, - bool Extend) const { - // This function is to check whether the "Offset" is in the correct range of - // the given "Opcode". If "Offset" is not in the correct range, "A2_addi" is - // inserted to calculate the final address. Due to this reason, the function - // assumes that the "Offset" has correct alignment. - // We used to assert if the offset was not properly aligned, however, - // there are cases where a misaligned pointer recast can cause this - // problem, and we need to allow for it. The front end warns of such - // misaligns with respect to load size. - - switch (Opcode) { - case Hexagon::J2_loop0i: - case Hexagon::J2_loop1i: - return isUInt<10>(Offset); - } - - if (Extend) - return true; - - switch (Opcode) { - case Hexagon::L2_loadri_io: - case Hexagon::S2_storeri_io: - return (Offset >= Hexagon_MEMW_OFFSET_MIN) && - (Offset <= Hexagon_MEMW_OFFSET_MAX); - - case Hexagon::L2_loadrd_io: - case Hexagon::S2_storerd_io: - return (Offset >= Hexagon_MEMD_OFFSET_MIN) && - (Offset <= Hexagon_MEMD_OFFSET_MAX); - - case Hexagon::L2_loadrh_io: - case Hexagon::L2_loadruh_io: - case Hexagon::S2_storerh_io: - return (Offset >= Hexagon_MEMH_OFFSET_MIN) && - (Offset <= Hexagon_MEMH_OFFSET_MAX); - - case Hexagon::L2_loadrb_io: - case Hexagon::S2_storerb_io: - case Hexagon::L2_loadrub_io: - return (Offset >= Hexagon_MEMB_OFFSET_MIN) && - (Offset <= Hexagon_MEMB_OFFSET_MAX); - - case Hexagon::A2_addi: - return (Offset >= Hexagon_ADDI_OFFSET_MIN) && - (Offset <= Hexagon_ADDI_OFFSET_MAX); - - case Hexagon::L4_iadd_memopw_io: - case Hexagon::L4_isub_memopw_io: - case Hexagon::L4_add_memopw_io: - case Hexagon::L4_sub_memopw_io: - case Hexagon::L4_and_memopw_io: - case Hexagon::L4_or_memopw_io: - return (0 <= Offset && Offset <= 255); - - case Hexagon::L4_iadd_memoph_io: - case Hexagon::L4_isub_memoph_io: - case Hexagon::L4_add_memoph_io: - case Hexagon::L4_sub_memoph_io: - case Hexagon::L4_and_memoph_io: - case Hexagon::L4_or_memoph_io: - return (0 <= Offset && Offset <= 127); - - case Hexagon::L4_iadd_memopb_io: - case Hexagon::L4_isub_memopb_io: - case Hexagon::L4_add_memopb_io: - case Hexagon::L4_sub_memopb_io: - case Hexagon::L4_and_memopb_io: - case Hexagon::L4_or_memopb_io: - return (0 <= Offset && Offset <= 63); - - // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of - // any size. Later pass knows how to handle it. - case Hexagon::STriw_pred: - case Hexagon::LDriw_pred: - return true; - - case Hexagon::TFR_FI: - case Hexagon::TFR_FIA: - case Hexagon::INLINEASM: - return true; - } - - llvm_unreachable("No offset range is defined for this opcode. " - "Please define it in the above switch statement!"); -} - - -// -// Check if the Offset is a valid auto-inc imm by Load/Store Type. -// -bool HexagonInstrInfo:: -isValidAutoIncImm(const EVT VT, const int Offset) const { - - if (VT == MVT::i64) { - return (Offset >= Hexagon_MEMD_AUTOINC_MIN && - Offset <= Hexagon_MEMD_AUTOINC_MAX && - (Offset & 0x7) == 0); - } - if (VT == MVT::i32) { - return (Offset >= Hexagon_MEMW_AUTOINC_MIN && - Offset <= Hexagon_MEMW_AUTOINC_MAX && - (Offset & 0x3) == 0); - } - if (VT == MVT::i16) { - return (Offset >= Hexagon_MEMH_AUTOINC_MIN && - Offset <= Hexagon_MEMH_AUTOINC_MAX && - (Offset & 0x1) == 0); - } - if (VT == MVT::i8) { - return (Offset >= Hexagon_MEMB_AUTOINC_MIN && - Offset <= Hexagon_MEMB_AUTOINC_MAX); - } - llvm_unreachable("Not an auto-inc opc!"); -} - - -bool HexagonInstrInfo:: -isMemOp(const MachineInstr *MI) const { -// return MI->getDesc().mayLoad() && MI->getDesc().mayStore(); - - switch (MI->getOpcode()) - { - default: return false; - case Hexagon::L4_iadd_memopw_io: - case Hexagon::L4_isub_memopw_io: - case Hexagon::L4_add_memopw_io: - case Hexagon::L4_sub_memopw_io: - case Hexagon::L4_and_memopw_io: - case Hexagon::L4_or_memopw_io: - case Hexagon::L4_iadd_memoph_io: - case Hexagon::L4_isub_memoph_io: - case Hexagon::L4_add_memoph_io: - case Hexagon::L4_sub_memoph_io: - case Hexagon::L4_and_memoph_io: - case Hexagon::L4_or_memoph_io: - case Hexagon::L4_iadd_memopb_io: - case Hexagon::L4_isub_memopb_io: - case Hexagon::L4_add_memopb_io: - case Hexagon::L4_sub_memopb_io: - case Hexagon::L4_and_memopb_io: - case Hexagon::L4_or_memopb_io: - case Hexagon::L4_ior_memopb_io: - case Hexagon::L4_ior_memoph_io: - case Hexagon::L4_ior_memopw_io: - case Hexagon::L4_iand_memopb_io: - case Hexagon::L4_iand_memoph_io: - case Hexagon::L4_iand_memopw_io: - return true; - } - return false; -} - - -bool HexagonInstrInfo:: -isSpillPredRegOp(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: return false; - case Hexagon::STriw_pred : - case Hexagon::LDriw_pred : - return true; - } -} - -bool HexagonInstrInfo::isNewValueJumpCandidate(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: return false; - case Hexagon::C2_cmpeq: - case Hexagon::C2_cmpeqi: - case Hexagon::C2_cmpgt: - case Hexagon::C2_cmpgti: - case Hexagon::C2_cmpgtu: - case Hexagon::C2_cmpgtui: - return true; - } -} - -bool HexagonInstrInfo:: -isConditionalTransfer (const MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: return false; - case Hexagon::A2_tfrt: - case Hexagon::A2_tfrf: - case Hexagon::C2_cmoveit: - case Hexagon::C2_cmoveif: - case Hexagon::A2_tfrtnew: - case Hexagon::A2_tfrfnew: - case Hexagon::C2_cmovenewit: - case Hexagon::C2_cmovenewif: - return true; - } -} - -bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const { - switch (MI->getOpcode()) - { - default: return false; case Hexagon::A2_paddf: case Hexagon::A2_paddfnew: + case Hexagon::A2_paddif: + case Hexagon::A2_paddifnew: + case Hexagon::A2_paddit: + case Hexagon::A2_padditnew: case Hexagon::A2_paddt: case Hexagon::A2_paddtnew: case Hexagon::A2_pandf: case Hexagon::A2_pandfnew: case Hexagon::A2_pandt: case Hexagon::A2_pandtnew: - case Hexagon::A4_paslhf: - case Hexagon::A4_paslhfnew: - case Hexagon::A4_paslht: - case Hexagon::A4_paslhtnew: - case Hexagon::A4_pasrhf: - case Hexagon::A4_pasrhfnew: - case Hexagon::A4_pasrht: - case Hexagon::A4_pasrhtnew: case Hexagon::A2_porf: case Hexagon::A2_porfnew: case Hexagon::A2_port: @@ -1435,14 +1522,22 @@ bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const { case Hexagon::A2_pxorfnew: case Hexagon::A2_pxort: case Hexagon::A2_pxortnew: - case Hexagon::A4_psxthf: - case Hexagon::A4_psxthfnew: - case Hexagon::A4_psxtht: - case Hexagon::A4_psxthtnew: + case Hexagon::A4_paslhf: + case Hexagon::A4_paslhfnew: + case Hexagon::A4_paslht: + case Hexagon::A4_paslhtnew: + case Hexagon::A4_pasrhf: + case Hexagon::A4_pasrhfnew: + case Hexagon::A4_pasrht: + case Hexagon::A4_pasrhtnew: case Hexagon::A4_psxtbf: case Hexagon::A4_psxtbfnew: case Hexagon::A4_psxtbt: case Hexagon::A4_psxtbtnew: + case Hexagon::A4_psxthf: + case Hexagon::A4_psxthfnew: + case Hexagon::A4_psxtht: + case Hexagon::A4_psxthtnew: case Hexagon::A4_pzxtbf: case Hexagon::A4_pzxtbfnew: case Hexagon::A4_pzxtbt: @@ -1451,97 +1546,32 @@ bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const { case Hexagon::A4_pzxthfnew: case Hexagon::A4_pzxtht: case Hexagon::A4_pzxthtnew: - case Hexagon::A2_paddit: - case Hexagon::A2_paddif: - case Hexagon::C2_ccombinewt: case Hexagon::C2_ccombinewf: + case Hexagon::C2_ccombinewt: return true; } + return false; } -bool HexagonInstrInfo:: -isConditionalLoad (const MachineInstr* MI) const { - switch (MI->getOpcode()) - { - default: return false; - case Hexagon::L2_ploadrdt_io : - case Hexagon::L2_ploadrdf_io: - case Hexagon::L2_ploadrit_io: - case Hexagon::L2_ploadrif_io: - case Hexagon::L2_ploadrht_io: - case Hexagon::L2_ploadrhf_io: - case Hexagon::L2_ploadrbt_io: - case Hexagon::L2_ploadrbf_io: - case Hexagon::L2_ploadruht_io: - case Hexagon::L2_ploadruhf_io: - case Hexagon::L2_ploadrubt_io: - case Hexagon::L2_ploadrubf_io: - case Hexagon::L2_ploadrdt_pi: - case Hexagon::L2_ploadrdf_pi: - case Hexagon::L2_ploadrit_pi: - case Hexagon::L2_ploadrif_pi: - case Hexagon::L2_ploadrht_pi: - case Hexagon::L2_ploadrhf_pi: - case Hexagon::L2_ploadrbt_pi: - case Hexagon::L2_ploadrbf_pi: - case Hexagon::L2_ploadruht_pi: - case Hexagon::L2_ploadruhf_pi: - case Hexagon::L2_ploadrubt_pi: - case Hexagon::L2_ploadrubf_pi: - case Hexagon::L4_ploadrdt_rr: - case Hexagon::L4_ploadrdf_rr: - case Hexagon::L4_ploadrbt_rr: - case Hexagon::L4_ploadrbf_rr: - case Hexagon::L4_ploadrubt_rr: - case Hexagon::L4_ploadrubf_rr: - case Hexagon::L4_ploadrht_rr: - case Hexagon::L4_ploadrhf_rr: - case Hexagon::L4_ploadruht_rr: - case Hexagon::L4_ploadruhf_rr: - case Hexagon::L4_ploadrit_rr: - case Hexagon::L4_ploadrif_rr: - return true; - } + +// FIXME - Function name and it's functionality don't match. +// It should be renamed to hasPredNewOpcode() +bool HexagonInstrInfo::isConditionalLoad(const MachineInstr* MI) const { + if (!MI->getDesc().mayLoad() || !isPredicated(MI)) + return false; + + int PNewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode()); + // Instruction with valid predicated-new opcode can be promoted to .new. + return PNewOpcode >= 0; } + // Returns true if an instruction is a conditional store. // // Note: It doesn't include conditional new-value stores as they can't be // converted to .new predicate. -// -// p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ] -// ^ ^ -// / \ (not OK. it will cause new-value store to be -// / X conditional on p0.new while R2 producer is -// / \ on p0) -// / \. -// p.new store p.old NV store -// [if(p0.new)memw(R0+#0)=R2] [if(p0)memw(R0+#0)=R2.new] -// ^ ^ -// \ / -// \ / -// \ / -// p.old store -// [if (p0)memw(R0+#0)=R2] -// -// The above diagram shows the steps involoved in the conversion of a predicated -// store instruction to its .new predicated new-value form. -// -// The following set of instructions further explains the scenario where -// conditional new-value store becomes invalid when promoted to .new predicate -// form. -// -// { 1) if (p0) r0 = add(r1, r2) -// 2) p0 = cmp.eq(r3, #0) } -// -// 3) if (p0) memb(r1+#0) = r0 --> this instruction can't be grouped with -// the first two instructions because in instr 1, r0 is conditional on old value -// of p0 but its use in instr 3 is conditional on p0 modified by instr 2 which -// is not valid for new-value stores. -bool HexagonInstrInfo:: -isConditionalStore (const MachineInstr* MI) const { - switch (MI->getOpcode()) - { +bool HexagonInstrInfo::isConditionalStore(const MachineInstr* MI) const { + switch (MI->getOpcode()) { default: return false; case Hexagon::S4_storeirbt_io: case Hexagon::S4_storeirbf_io: @@ -1587,199 +1617,36 @@ isConditionalStore (const MachineInstr* MI) const { // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded // from the "Conditional Store" list. Because a predicated new value store - // would NOT be promoted to a double dot new store. See diagram below: + // would NOT be promoted to a double dot new store. // This function returns yes for those stores that are predicated but not // yet promoted to predicate dot new instructions. - // - // +---------------------+ - // /-----| if (p0) memw(..)=r0 |---------\~ - // || +---------------------+ || - // promote || /\ /\ || promote - // || /||\ /||\ || - // \||/ demote || \||/ - // \/ || || \/ - // +-------------------------+ || +-------------------------+ - // | if (p0.new) memw(..)=r0 | || | if (p0) memw(..)=r0.new | - // +-------------------------+ || +-------------------------+ - // || || || - // || demote \||/ - // promote || \/ NOT possible - // || || /\~ - // \||/ || /||\~ - // \/ || || - // +-----------------------------+ - // | if (p0.new) memw(..)=r0.new | - // +-----------------------------+ - // Double Dot New Store - // } } -bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const { - if (isNewValue(MI) && isBranch(MI)) - return true; +bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::A2_tfrt: + case Hexagon::A2_tfrf: + case Hexagon::C2_cmoveit: + case Hexagon::C2_cmoveif: + case Hexagon::A2_tfrtnew: + case Hexagon::A2_tfrfnew: + case Hexagon::C2_cmovenewit: + case Hexagon::C2_cmovenewif: + case Hexagon::A2_tfrpt: + case Hexagon::A2_tfrpf: + return true; + + default: + return false; + } return false; } -bool HexagonInstrInfo::isNewValueJump(Opcode_t Opcode) const { - return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode); -} - -bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const { - return (getAddrMode(MI) == HexagonII::PostInc); -} - -// Returns true, if any one of the operands is a dot new -// insn, whether it is predicated dot new or register dot new. -bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const { - return (isNewValueInst(MI) || - (isPredicated(MI) && isPredicatedNew(MI))); -} - -// Returns the most basic instruction for the .new predicated instructions and -// new-value stores. -// For example, all of the following instructions will be converted back to the -// same instruction: -// 1) if (p0.new) memw(R0+#0) = R1.new ---> -// 2) if (p0) memw(R0+#0)= R1.new -------> if (p0) memw(R0+#0) = R1 -// 3) if (p0.new) memw(R0+#0) = R1 ---> -// - -int HexagonInstrInfo::GetDotOldOp(const int opc) const { - int NewOp = opc; - if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form - NewOp = Hexagon::getPredOldOpcode(NewOp); - assert(NewOp >= 0 && - "Couldn't change predicate new instruction to its old form."); - } - - if (isNewValueStore(NewOp)) { // Convert into non-new-value format - NewOp = Hexagon::getNonNVStore(NewOp); - assert(NewOp >= 0 && "Couldn't change new-value store to its old form."); - } - return NewOp; -} - -// Return the new value instruction for a given store. -int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const { - int NVOpcode = Hexagon::getNewValueOpcode(MI->getOpcode()); - if (NVOpcode >= 0) // Valid new-value store instruction. - return NVOpcode; - - switch (MI->getOpcode()) { - default: llvm_unreachable("Unknown .new type"); - case Hexagon::S4_storerb_ur: - return Hexagon::S4_storerbnew_ur; - - case Hexagon::S4_storerh_ur: - return Hexagon::S4_storerhnew_ur; - - case Hexagon::S4_storeri_ur: - return Hexagon::S4_storerinew_ur; - - case Hexagon::S2_storerb_pci: - return Hexagon::S2_storerb_pci; - - case Hexagon::S2_storeri_pci: - return Hexagon::S2_storeri_pci; - - case Hexagon::S2_storerh_pci: - return Hexagon::S2_storerh_pci; - - case Hexagon::S2_storerd_pci: - return Hexagon::S2_storerd_pci; - - case Hexagon::S2_storerf_pci: - return Hexagon::S2_storerf_pci; - } - return 0; -} - -// Return .new predicate version for an instruction. -int HexagonInstrInfo::GetDotNewPredOp(MachineInstr *MI, - const MachineBranchProbabilityInfo - *MBPI) const { - - int NewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode()); - if (NewOpcode >= 0) // Valid predicate new instruction - return NewOpcode; - - switch (MI->getOpcode()) { - default: llvm_unreachable("Unknown .new type"); - // Condtional Jumps - case Hexagon::J2_jumpt: - case Hexagon::J2_jumpf: - return getDotNewPredJumpOp(MI, MBPI); - - case Hexagon::J2_jumprt: - return Hexagon::J2_jumptnewpt; - - case Hexagon::J2_jumprf: - return Hexagon::J2_jumprfnewpt; - - case Hexagon::JMPrett: - return Hexagon::J2_jumprtnewpt; - - case Hexagon::JMPretf: - return Hexagon::J2_jumprfnewpt; - - - // Conditional combine - case Hexagon::C2_ccombinewt: - return Hexagon::C2_ccombinewnewt; - case Hexagon::C2_ccombinewf: - return Hexagon::C2_ccombinewnewf; - } -} - - -unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const { - const uint64_t F = MI->getDesc().TSFlags; - - return((F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask); -} - -/// immediateExtend - Changes the instruction in place to one using an immediate -/// extender. -void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const { - assert((isExtendable(MI)||isConstExtended(MI)) && - "Instruction must be extendable"); - // Find which operand is extendable. - short ExtOpNum = getCExtOpNum(MI); - MachineOperand &MO = MI->getOperand(ExtOpNum); - // This needs to be something we understand. - assert((MO.isMBB() || MO.isImm()) && - "Branch with unknown extendable field type"); - // Mark given operand as extended. - MO.addTargetFlag(HexagonII::HMOTF_ConstExtended); -} - -DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState( - const TargetSubtargetInfo &STI) const { - const InstrItineraryData *II = STI.getInstrItineraryData(); - return static_cast(STI).createDFAPacketizer(II); -} - -bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const { - // Debug info is never a scheduling boundary. It's necessary to be explicit - // due to the special treatment of IT instructions below, otherwise a - // dbg_value followed by an IT will result in the IT instruction being - // considered a scheduling hazard, which is wrong. It should be the actual - // instruction preceding the dbg_value instruction(s), just like it is - // when debug info is not present. - if (MI->isDebugValue()) - return false; - - // Terminators and labels can't be scheduled around. - if (MI->getDesc().isTerminator() || MI->isPosition() || MI->isInlineAsm()) - return true; - - return false; -} +// TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle +// isFPImm and later getFPImm as well. bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const { const uint64_t F = MI->getDesc().TSFlags; unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; @@ -1791,6 +1658,9 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const { if (!isExtendable) return false; + if (MI->isCall()) + return false; + short ExtOpNum = getCExtOpNum(MI); const MachineOperand &MO = MI->getOperand(ExtOpNum); // Use MO operand flags to determine if MO @@ -1823,40 +1693,1425 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const { return (ImmValue < MinValue || ImmValue > MaxValue); } -// Return the number of bytes required to encode the instruction. -// Hexagon instructions are fixed length, 4 bytes, unless they -// use a constant extender, which requires another 4 bytes. -// For debug instructions and prolog labels, return 0. -unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const { - if (MI->isDebugValue() || MI->isPosition()) +bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::L4_return : + case Hexagon::L4_return_t : + case Hexagon::L4_return_f : + case Hexagon::L4_return_tnew_pnt : + case Hexagon::L4_return_fnew_pnt : + case Hexagon::L4_return_tnew_pt : + case Hexagon::L4_return_fnew_pt : + return true; + } + return false; +} + + +// Return true when ConsMI uses a register defined by ProdMI. +bool HexagonInstrInfo::isDependent(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const { + const MCInstrDesc &ProdMCID = ProdMI->getDesc(); + if (!ProdMCID.getNumDefs()) + return false; + + auto &HRI = getRegisterInfo(); + + SmallVector DefsA; + SmallVector DefsB; + SmallVector UsesA; + SmallVector UsesB; + + parseOperands(ProdMI, DefsA, UsesA); + parseOperands(ConsMI, DefsB, UsesB); + + for (auto &RegA : DefsA) + for (auto &RegB : UsesB) { + // True data dependency. + if (RegA == RegB) + return true; + + if (Hexagon::DoubleRegsRegClass.contains(RegA)) + for (MCSubRegIterator SubRegs(RegA, &HRI); SubRegs.isValid(); ++SubRegs) + if (RegB == *SubRegs) + return true; + + if (Hexagon::DoubleRegsRegClass.contains(RegB)) + for (MCSubRegIterator SubRegs(RegB, &HRI); SubRegs.isValid(); ++SubRegs) + if (RegA == *SubRegs) + return true; + } + + return false; +} + + +// Returns true if the instruction is alread a .cur. +bool HexagonInstrInfo::isDotCurInst(const MachineInstr* MI) const { + switch (MI->getOpcode()) { + case Hexagon::V6_vL32b_cur_pi: + case Hexagon::V6_vL32b_cur_ai: + case Hexagon::V6_vL32b_cur_pi_128B: + case Hexagon::V6_vL32b_cur_ai_128B: + return true; + } + return false; +} + + +// Returns true, if any one of the operands is a dot new +// insn, whether it is predicated dot new or register dot new. +bool HexagonInstrInfo::isDotNewInst(const MachineInstr* MI) const { + if (isNewValueInst(MI) || + (isPredicated(MI) && isPredicatedNew(MI))) + return true; + + return false; +} + + +/// Symmetrical. See if these two instructions are fit for duplex pair. +bool HexagonInstrInfo::isDuplexPair(const MachineInstr *MIa, + const MachineInstr *MIb) const { + HexagonII::SubInstructionGroup MIaG = getDuplexCandidateGroup(MIa); + HexagonII::SubInstructionGroup MIbG = getDuplexCandidateGroup(MIb); + return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG)); +} + + +bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr *MI) const { + if (!MI) + return false; + + if (MI->mayLoad() || MI->mayStore() || MI->isCompare()) + return true; + + // Multiply + unsigned SchedClass = MI->getDesc().getSchedClass(); + if (SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23) + return true; + return false; +} + + +bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const { + return (Opcode == Hexagon::ENDLOOP0 || + Opcode == Hexagon::ENDLOOP1); +} + + +bool HexagonInstrInfo::isExpr(unsigned OpType) const { + switch(OpType) { + case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + return true; + default: + return false; + } +} + + +bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const { + const MCInstrDesc &MID = MI->getDesc(); + const uint64_t F = MID.TSFlags; + if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask) + return true; + + // TODO: This is largely obsolete now. Will need to be removed + // in consecutive patches. + switch(MI->getOpcode()) { + // TFR_FI Remains a special case. + case Hexagon::TFR_FI: + return true; + default: + return false; + } + return false; +} + + +// This returns true in two cases: +// - The OP code itself indicates that this is an extended instruction. +// - One of MOs has been marked with HMOTF_ConstExtended flag. +bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const { + // First check if this is permanently extended op code. + const uint64_t F = MI->getDesc().TSFlags; + if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask) + return true; + // Use MO operand flags to determine if one of MI's operands + // has HMOTF_ConstExtended flag set. + for (MachineInstr::const_mop_iterator I = MI->operands_begin(), + E = MI->operands_end(); I != E; ++I) { + if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended) + return true; + } + return false; +} + + +bool HexagonInstrInfo::isFloat(const MachineInstr *MI) const { + unsigned Opcode = MI->getOpcode(); + const uint64_t F = get(Opcode).TSFlags; + return (F >> HexagonII::FPPos) & HexagonII::FPMask; +} + + +// No V60 HVX VMEM with A_INDIRECT. +bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr *I, + const MachineInstr *J) const { + if (!isV60VectorInstruction(I)) + return false; + if (!I->mayLoad() && !I->mayStore()) + return false; + return J->isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J); +} + + +bool HexagonInstrInfo::isIndirectCall(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::J2_callr : + case Hexagon::J2_callrf : + case Hexagon::J2_callrt : + return true; + } + return false; +} + + +bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::L4_return : + case Hexagon::L4_return_t : + case Hexagon::L4_return_f : + case Hexagon::L4_return_fnew_pnt : + case Hexagon::L4_return_fnew_pt : + case Hexagon::L4_return_tnew_pnt : + case Hexagon::L4_return_tnew_pt : + return true; + } + return false; +} + + +bool HexagonInstrInfo::isJumpR(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::J2_jumpr : + case Hexagon::J2_jumprt : + case Hexagon::J2_jumprf : + case Hexagon::J2_jumprtnewpt : + case Hexagon::J2_jumprfnewpt : + case Hexagon::J2_jumprtnew : + case Hexagon::J2_jumprfnew : + return true; + } + return false; +} + + +// Return true if a given MI can accomodate given offset. +// Use abs estimate as oppose to the exact number. +// TODO: This will need to be changed to use MC level +// definition of instruction extendable field size. +bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr *MI, + unsigned offset) const { + // This selection of jump instructions matches to that what + // AnalyzeBranch can parse, plus NVJ. + if (isNewValueJump(MI)) // r9:2 + return isInt<11>(offset); + + switch (MI->getOpcode()) { + // Still missing Jump to address condition on register value. + default: + return false; + case Hexagon::J2_jump: // bits<24> dst; // r22:2 + case Hexagon::J2_call: + case Hexagon::CALLv3nr: + return isInt<24>(offset); + case Hexagon::J2_jumpt: //bits<17> dst; // r15:2 + case Hexagon::J2_jumpf: + case Hexagon::J2_jumptnew: + case Hexagon::J2_jumptnewpt: + case Hexagon::J2_jumpfnew: + case Hexagon::J2_jumpfnewpt: + case Hexagon::J2_callt: + case Hexagon::J2_callf: + return isInt<17>(offset); + case Hexagon::J2_loop0i: + case Hexagon::J2_loop0iext: + case Hexagon::J2_loop0r: + case Hexagon::J2_loop0rext: + case Hexagon::J2_loop1i: + case Hexagon::J2_loop1iext: + case Hexagon::J2_loop1r: + case Hexagon::J2_loop1rext: + return isInt<9>(offset); + // TODO: Add all the compound branches here. Can we do this in Relation model? + case Hexagon::J4_cmpeqi_tp0_jump_nt: + case Hexagon::J4_cmpeqi_tp1_jump_nt: + return isInt<11>(offset); + } +} + + +bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI, + const MachineInstr *ESMI) const { + if (!LRMI || !ESMI) + return false; + + bool isLate = isLateResultInstr(LRMI); + bool isEarly = isEarlySourceInstr(ESMI); + + DEBUG(dbgs() << "V60" << (isLate ? "-LR " : " -- ")); + DEBUG(LRMI->dump()); + DEBUG(dbgs() << "V60" << (isEarly ? "-ES " : " -- ")); + DEBUG(ESMI->dump()); + + if (isLate && isEarly) { + DEBUG(dbgs() << "++Is Late Result feeding Early Source\n"); + return true; + } + + return false; +} + + +bool HexagonInstrInfo::isLateResultInstr(const MachineInstr *MI) const { + if (!MI) + return false; + + switch (MI->getOpcode()) { + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + case TargetOpcode::PHI: + return false; + default: + break; + } + + unsigned SchedClass = MI->getDesc().getSchedClass(); + + switch (SchedClass) { + case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123: + case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123: + case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123: + case Hexagon::Sched::ALU64_tc_1_SLOT23: + case Hexagon::Sched::EXTENDER_tc_1_SLOT0123: + case Hexagon::Sched::S_2op_tc_1_SLOT23: + case Hexagon::Sched::S_3op_tc_1_SLOT23: + case Hexagon::Sched::V2LDST_tc_ld_SLOT01: + case Hexagon::Sched::V2LDST_tc_st_SLOT0: + case Hexagon::Sched::V2LDST_tc_st_SLOT01: + case Hexagon::Sched::V4LDST_tc_ld_SLOT01: + case Hexagon::Sched::V4LDST_tc_st_SLOT0: + case Hexagon::Sched::V4LDST_tc_st_SLOT01: + return false; + } + return true; +} + + +bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr *MI) const { + if (!MI) + return false; + + // Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply + // resource, but all operands can be received late like an ALU instruction. + return MI->getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE; +} + + +bool HexagonInstrInfo::isLoopN(const MachineInstr *MI) const { + unsigned Opcode = MI->getOpcode(); + return Opcode == Hexagon::J2_loop0i || + Opcode == Hexagon::J2_loop0r || + Opcode == Hexagon::J2_loop0iext || + Opcode == Hexagon::J2_loop0rext || + Opcode == Hexagon::J2_loop1i || + Opcode == Hexagon::J2_loop1r || + Opcode == Hexagon::J2_loop1iext || + Opcode == Hexagon::J2_loop1rext; +} + + +bool HexagonInstrInfo::isMemOp(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: return false; + case Hexagon::L4_iadd_memopw_io : + case Hexagon::L4_isub_memopw_io : + case Hexagon::L4_add_memopw_io : + case Hexagon::L4_sub_memopw_io : + case Hexagon::L4_and_memopw_io : + case Hexagon::L4_or_memopw_io : + case Hexagon::L4_iadd_memoph_io : + case Hexagon::L4_isub_memoph_io : + case Hexagon::L4_add_memoph_io : + case Hexagon::L4_sub_memoph_io : + case Hexagon::L4_and_memoph_io : + case Hexagon::L4_or_memoph_io : + case Hexagon::L4_iadd_memopb_io : + case Hexagon::L4_isub_memopb_io : + case Hexagon::L4_add_memopb_io : + case Hexagon::L4_sub_memopb_io : + case Hexagon::L4_and_memopb_io : + case Hexagon::L4_or_memopb_io : + case Hexagon::L4_ior_memopb_io: + case Hexagon::L4_ior_memoph_io: + case Hexagon::L4_ior_memopw_io: + case Hexagon::L4_iand_memopb_io: + case Hexagon::L4_iand_memoph_io: + case Hexagon::L4_iand_memopw_io: + return true; + } + return false; +} + + +bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask; +} + + +bool HexagonInstrInfo::isNewValue(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask; +} + + +bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const { + return isNewValueJump(MI) || isNewValueStore(MI); +} + + +bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const { + return isNewValue(MI) && MI->isBranch(); +} + + +bool HexagonInstrInfo::isNewValueJump(unsigned Opcode) const { + return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode); +} + + +bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask; +} + + +bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask; +} + + +// Returns true if a particular operand is extendable for an instruction. +bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI, + unsigned OperandNum) const { + const uint64_t F = MI->getDesc().TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) + == OperandNum; +} + + +bool HexagonInstrInfo::isPostIncrement(const MachineInstr* MI) const { + return getAddrMode(MI) == HexagonII::PostInc; +} + + +bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + assert(isPredicated(MI)); + return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask; +} + + +bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + assert(isPredicated(Opcode)); + return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask; +} + + +bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return !((F >> HexagonII::PredicatedFalsePos) & + HexagonII::PredicatedFalseMask); +} + + +bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + // Make sure that the instruction is predicated. + assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); + return !((F >> HexagonII::PredicatedFalsePos) & + HexagonII::PredicatedFalseMask); +} + + +bool HexagonInstrInfo::isPredicated(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask; +} + + +bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask; +} + + +bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + assert(get(Opcode).isBranch() && + (isPredicatedNew(Opcode) || isNewValue(Opcode))); + return (F >> HexagonII::TakenPos) & HexagonII::TakenMask; +} + + +bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const { + return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 || + MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT; +} + + +bool HexagonInstrInfo::isSolo(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::SoloPos) & HexagonII::SoloMask; +} + + +bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::STriw_pred : + case Hexagon::LDriw_pred : + return true; + default: + return false; + } +} + + +// Returns true when SU has a timing class TC1. +bool HexagonInstrInfo::isTC1(const MachineInstr *MI) const { + unsigned SchedClass = MI->getDesc().getSchedClass(); + switch (SchedClass) { + case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123: + case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123: + case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123: + case Hexagon::Sched::ALU64_tc_1_SLOT23: + case Hexagon::Sched::EXTENDER_tc_1_SLOT0123: + //case Hexagon::Sched::M_tc_1_SLOT23: + case Hexagon::Sched::S_2op_tc_1_SLOT23: + case Hexagon::Sched::S_3op_tc_1_SLOT23: + return true; + + default: + return false; + } +} + + +bool HexagonInstrInfo::isTC2(const MachineInstr *MI) const { + unsigned SchedClass = MI->getDesc().getSchedClass(); + switch (SchedClass) { + case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123: + case Hexagon::Sched::ALU64_tc_2_SLOT23: + case Hexagon::Sched::CR_tc_2_SLOT3: + case Hexagon::Sched::M_tc_2_SLOT23: + case Hexagon::Sched::S_2op_tc_2_SLOT23: + case Hexagon::Sched::S_3op_tc_2_SLOT23: + return true; + + default: + return false; + } +} + + +bool HexagonInstrInfo::isTC2Early(const MachineInstr *MI) const { + unsigned SchedClass = MI->getDesc().getSchedClass(); + switch (SchedClass) { + case Hexagon::Sched::ALU32_2op_tc_2early_SLOT0123: + case Hexagon::Sched::ALU32_3op_tc_2early_SLOT0123: + case Hexagon::Sched::ALU64_tc_2early_SLOT23: + case Hexagon::Sched::CR_tc_2early_SLOT23: + case Hexagon::Sched::CR_tc_2early_SLOT3: + case Hexagon::Sched::J_tc_2early_SLOT0123: + case Hexagon::Sched::J_tc_2early_SLOT2: + case Hexagon::Sched::J_tc_2early_SLOT23: + case Hexagon::Sched::S_2op_tc_2early_SLOT23: + case Hexagon::Sched::S_3op_tc_2early_SLOT23: + return true; + + default: + return false; + } +} + + +bool HexagonInstrInfo::isTC4x(const MachineInstr *MI) const { + if (!MI) + return false; + + unsigned SchedClass = MI->getDesc().getSchedClass(); + return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23; +} + + +bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr *MI) const { + if (!MI) + return false; + + const uint64_t V = getType(MI); + return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST; +} + + +// Check if the Offset is a valid auto-inc imm by Load/Store Type. +// +bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const { + if (VT == MVT::v16i32 || VT == MVT::v8i64 || + VT == MVT::v32i16 || VT == MVT::v64i8) { + return (Offset >= Hexagon_MEMV_AUTOINC_MIN && + Offset <= Hexagon_MEMV_AUTOINC_MAX && + (Offset & 0x3f) == 0); + } + // 128B + if (VT == MVT::v32i32 || VT == MVT::v16i64 || + VT == MVT::v64i16 || VT == MVT::v128i8) { + return (Offset >= Hexagon_MEMV_AUTOINC_MIN_128B && + Offset <= Hexagon_MEMV_AUTOINC_MAX_128B && + (Offset & 0x7f) == 0); + } + if (VT == MVT::i64) { + return (Offset >= Hexagon_MEMD_AUTOINC_MIN && + Offset <= Hexagon_MEMD_AUTOINC_MAX && + (Offset & 0x7) == 0); + } + if (VT == MVT::i32) { + return (Offset >= Hexagon_MEMW_AUTOINC_MIN && + Offset <= Hexagon_MEMW_AUTOINC_MAX && + (Offset & 0x3) == 0); + } + if (VT == MVT::i16) { + return (Offset >= Hexagon_MEMH_AUTOINC_MIN && + Offset <= Hexagon_MEMH_AUTOINC_MAX && + (Offset & 0x1) == 0); + } + if (VT == MVT::i8) { + return (Offset >= Hexagon_MEMB_AUTOINC_MIN && + Offset <= Hexagon_MEMB_AUTOINC_MAX); + } + llvm_unreachable("Not an auto-inc opc!"); +} + + +bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, + bool Extend) const { + // This function is to check whether the "Offset" is in the correct range of + // the given "Opcode". If "Offset" is not in the correct range, "A2_addi" is + // inserted to calculate the final address. Due to this reason, the function + // assumes that the "Offset" has correct alignment. + // We used to assert if the offset was not properly aligned, however, + // there are cases where a misaligned pointer recast can cause this + // problem, and we need to allow for it. The front end warns of such + // misaligns with respect to load size. + + switch (Opcode) { + case Hexagon::STriq_pred_V6: + case Hexagon::STriq_pred_vec_V6: + case Hexagon::STriv_pseudo_V6: + case Hexagon::STrivv_pseudo_V6: + case Hexagon::LDriq_pred_V6: + case Hexagon::LDriq_pred_vec_V6: + case Hexagon::LDriv_pseudo_V6: + case Hexagon::LDrivv_pseudo_V6: + case Hexagon::LDrivv_indexed: + case Hexagon::STrivv_indexed: + case Hexagon::V6_vL32b_ai: + case Hexagon::V6_vS32b_ai: + case Hexagon::V6_vL32Ub_ai: + case Hexagon::V6_vS32Ub_ai: + return (Offset >= Hexagon_MEMV_OFFSET_MIN) && + (Offset <= Hexagon_MEMV_OFFSET_MAX); + + case Hexagon::STriq_pred_V6_128B: + case Hexagon::STriq_pred_vec_V6_128B: + case Hexagon::STriv_pseudo_V6_128B: + case Hexagon::STrivv_pseudo_V6_128B: + case Hexagon::LDriq_pred_V6_128B: + case Hexagon::LDriq_pred_vec_V6_128B: + case Hexagon::LDriv_pseudo_V6_128B: + case Hexagon::LDrivv_pseudo_V6_128B: + case Hexagon::LDrivv_indexed_128B: + case Hexagon::STrivv_indexed_128B: + case Hexagon::V6_vL32b_ai_128B: + case Hexagon::V6_vS32b_ai_128B: + case Hexagon::V6_vL32Ub_ai_128B: + case Hexagon::V6_vS32Ub_ai_128B: + return (Offset >= Hexagon_MEMV_OFFSET_MIN_128B) && + (Offset <= Hexagon_MEMV_OFFSET_MAX_128B); + + case Hexagon::J2_loop0i: + case Hexagon::J2_loop1i: + return isUInt<10>(Offset); + } + + if (Extend) + return true; + + switch (Opcode) { + case Hexagon::L2_loadri_io: + case Hexagon::S2_storeri_io: + return (Offset >= Hexagon_MEMW_OFFSET_MIN) && + (Offset <= Hexagon_MEMW_OFFSET_MAX); + + case Hexagon::L2_loadrd_io: + case Hexagon::S2_storerd_io: + return (Offset >= Hexagon_MEMD_OFFSET_MIN) && + (Offset <= Hexagon_MEMD_OFFSET_MAX); + + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadruh_io: + case Hexagon::S2_storerh_io: + return (Offset >= Hexagon_MEMH_OFFSET_MIN) && + (Offset <= Hexagon_MEMH_OFFSET_MAX); + + case Hexagon::L2_loadrb_io: + case Hexagon::L2_loadrub_io: + case Hexagon::S2_storerb_io: + return (Offset >= Hexagon_MEMB_OFFSET_MIN) && + (Offset <= Hexagon_MEMB_OFFSET_MAX); + + case Hexagon::A2_addi: + return (Offset >= Hexagon_ADDI_OFFSET_MIN) && + (Offset <= Hexagon_ADDI_OFFSET_MAX); + + case Hexagon::L4_iadd_memopw_io : + case Hexagon::L4_isub_memopw_io : + case Hexagon::L4_add_memopw_io : + case Hexagon::L4_sub_memopw_io : + case Hexagon::L4_and_memopw_io : + case Hexagon::L4_or_memopw_io : + return (0 <= Offset && Offset <= 255); + + case Hexagon::L4_iadd_memoph_io : + case Hexagon::L4_isub_memoph_io : + case Hexagon::L4_add_memoph_io : + case Hexagon::L4_sub_memoph_io : + case Hexagon::L4_and_memoph_io : + case Hexagon::L4_or_memoph_io : + return (0 <= Offset && Offset <= 127); + + case Hexagon::L4_iadd_memopb_io : + case Hexagon::L4_isub_memopb_io : + case Hexagon::L4_add_memopb_io : + case Hexagon::L4_sub_memopb_io : + case Hexagon::L4_and_memopb_io : + case Hexagon::L4_or_memopb_io : + return (0 <= Offset && Offset <= 63); + + // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of + // any size. Later pass knows how to handle it. + case Hexagon::STriw_pred: + case Hexagon::LDriw_pred: + return true; + + case Hexagon::TFR_FI: + case Hexagon::TFR_FIA: + case Hexagon::INLINEASM: + return true; + + case Hexagon::L2_ploadrbt_io: + case Hexagon::L2_ploadrbf_io: + case Hexagon::L2_ploadrubt_io: + case Hexagon::L2_ploadrubf_io: + case Hexagon::S2_pstorerbt_io: + case Hexagon::S2_pstorerbf_io: + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirbt_io: + case Hexagon::S4_storeirbf_io: + return isUInt<6>(Offset); + + case Hexagon::L2_ploadrht_io: + case Hexagon::L2_ploadrhf_io: + case Hexagon::L2_ploadruht_io: + case Hexagon::L2_ploadruhf_io: + case Hexagon::S2_pstorerht_io: + case Hexagon::S2_pstorerhf_io: + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeirht_io: + case Hexagon::S4_storeirhf_io: + return isShiftedUInt<6,1>(Offset); + + case Hexagon::L2_ploadrit_io: + case Hexagon::L2_ploadrif_io: + case Hexagon::S2_pstorerit_io: + case Hexagon::S2_pstorerif_io: + case Hexagon::S4_storeiri_io: + case Hexagon::S4_storeirit_io: + case Hexagon::S4_storeirif_io: + return isShiftedUInt<6,2>(Offset); + + case Hexagon::L2_ploadrdt_io: + case Hexagon::L2_ploadrdf_io: + case Hexagon::S2_pstorerdt_io: + case Hexagon::S2_pstorerdf_io: + return isShiftedUInt<6,3>(Offset); + } // switch + + llvm_unreachable("No offset range is defined for this opcode. " + "Please define it in the above switch statement!"); +} + + +bool HexagonInstrInfo::isVecAcc(const MachineInstr *MI) const { + return MI && isV60VectorInstruction(MI) && isAccumulator(MI); +} + + +bool HexagonInstrInfo::isVecALU(const MachineInstr *MI) const { + if (!MI) + return false; + const uint64_t F = get(MI->getOpcode()).TSFlags; + const uint64_t V = ((F >> HexagonII::TypePos) & HexagonII::TypeMask); + return + V == HexagonII::TypeCVI_VA || + V == HexagonII::TypeCVI_VA_DV; +} + + +bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const { + if (EnableACCForwarding && isVecAcc(ProdMI) && isVecAcc(ConsMI)) + return true; + + if (EnableALUForwarding && (isVecALU(ConsMI) || isLateSourceInstr(ConsMI))) + return true; + + if (mayBeNewStore(ConsMI)) + return true; + + return false; +} + + +/// \brief Can these instructions execute at the same time in a bundle. +bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr *First, + const MachineInstr *Second) const { + if (DisableNVSchedule) + return false; + if (mayBeNewStore(Second)) { + // Make sure the definition of the first instruction is the value being + // stored. + const MachineOperand &Stored = + Second->getOperand(Second->getNumOperands() - 1); + if (!Stored.isReg()) + return false; + for (unsigned i = 0, e = First->getNumOperands(); i < e; ++i) { + const MachineOperand &Op = First->getOperand(i); + if (Op.isReg() && Op.isDef() && Op.getReg() == Stored.getReg()) + return true; + } + } + return false; +} + + +bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const { + for (auto &I : *B) + if (I.isEHLabel()) + return true; + return false; +} + + +// Returns true if an instruction can be converted into a non-extended +// equivalent instruction. +bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr *MI) const { + short NonExtOpcode; + // Check if the instruction has a register form that uses register in place + // of the extended operand, if so return that as the non-extended form. + if (Hexagon::getRegForm(MI->getOpcode()) >= 0) + return true; + + if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) { + // Check addressing mode and retrieve non-ext equivalent instruction. + + switch (getAddrMode(MI)) { + case HexagonII::Absolute : + // Load/store with absolute addressing mode can be converted into + // base+offset mode. + NonExtOpcode = Hexagon::getBaseWithImmOffset(MI->getOpcode()); + break; + case HexagonII::BaseImmOffset : + // Load/store with base+offset addressing mode can be converted into + // base+register offset addressing mode. However left shift operand should + // be set to 0. + NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode()); + break; + case HexagonII::BaseLongOffset: + NonExtOpcode = Hexagon::getRegShlForm(MI->getOpcode()); + break; + default: + return false; + } + if (NonExtOpcode < 0) + return false; + return true; + } + return false; +} + + +bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr *MI) const { + return Hexagon::getRealHWInstr(MI->getOpcode(), + Hexagon::InstrType_Pseudo) >= 0; +} + + +bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B) + const { + MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end(); + while (I != E) { + if (I->isBarrier()) + return true; + ++I; + } + return false; +} + + +// Returns true, if a LD insn can be promoted to a cur load. +bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr *MI) const { + auto &HST = MI->getParent()->getParent()->getSubtarget(); + const uint64_t F = MI->getDesc().TSFlags; + return ((F >> HexagonII::mayCVLoadPos) & HexagonII::mayCVLoadMask) && + HST.hasV60TOps(); +} + + +// Returns true, if a ST insn can be promoted to a new-value store. +bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask; +} + + +bool HexagonInstrInfo::producesStall(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const { + // There is no stall when ProdMI is not a V60 vector. + if (!isV60VectorInstruction(ProdMI)) + return false; + + // There is no stall when ProdMI and ConsMI are not dependent. + if (!isDependent(ProdMI, ConsMI)) + return false; + + // When Forward Scheduling is enabled, there is no stall if ProdMI and ConsMI + // are scheduled in consecutive packets. + if (isVecUsableNextPacket(ProdMI, ConsMI)) + return false; + + return true; +} + + +bool HexagonInstrInfo::producesStall(const MachineInstr *MI, + MachineBasicBlock::const_instr_iterator BII) const { + // There is no stall when I is not a V60 vector. + if (!isV60VectorInstruction(MI)) + return false; + + MachineBasicBlock::const_instr_iterator MII = BII; + MachineBasicBlock::const_instr_iterator MIE = MII->getParent()->instr_end(); + + if (!(*MII).isBundle()) { + const MachineInstr *J = &*MII; + if (!isV60VectorInstruction(J)) + return false; + else if (isVecUsableNextPacket(J, MI)) + return false; + return true; + } + + for (++MII; MII != MIE && MII->isInsideBundle(); ++MII) { + const MachineInstr *J = &*MII; + if (producesStall(J, MI)) + return true; + } + return false; +} + + +bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr *MI, + unsigned PredReg) const { + for (unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.isReg() && MO.isDef() && MO.isImplicit() && (MO.getReg() == PredReg)) + return false; // Predicate register must be explicitly defined. + } + + // Hexagon Programmer's Reference says that decbin, memw_locked, and + // memd_locked cannot be used as .new as well, + // but we don't seem to have these instructions defined. + return MI->getOpcode() != Hexagon::A4_tlbmatch; +} + + +bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const { + return (Opcode == Hexagon::J2_jumpt) || + (Opcode == Hexagon::J2_jumpf) || + (Opcode == Hexagon::J2_jumptnew) || + (Opcode == Hexagon::J2_jumpfnew) || + (Opcode == Hexagon::J2_jumptnewpt) || + (Opcode == Hexagon::J2_jumpfnewpt); +} + + +bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef Cond) const { + if (Cond.empty() || !isPredicated(Cond[0].getImm())) + return false; + return !isPredicatedTrue(Cond[0].getImm()); +} + + +unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask; +} + + +// Returns the base register in a memory access (load/store). The offset is +// returned in Offset and the access size is returned in AccessSize. +unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr *MI, + int &Offset, unsigned &AccessSize) const { + // Return if it is not a base+offset type instruction or a MemOp. + if (getAddrMode(MI) != HexagonII::BaseImmOffset && + getAddrMode(MI) != HexagonII::BaseLongOffset && + !isMemOp(MI) && !isPostIncrement(MI)) return 0; - unsigned Size = MI->getDesc().getSize(); - if (!Size) - // Assume the default insn size in case it cannot be determined - // for whatever reason. - Size = HEXAGON_INSTR_SIZE; + // Since it is a memory access instruction, getMemAccessSize() should never + // return 0. + assert (getMemAccessSize(MI) && + "BaseImmOffset or BaseLongOffset or MemOp without accessSize"); - if (isConstExtended(MI) || isExtended(MI)) - Size += HEXAGON_INSTR_SIZE; + // Return Values of getMemAccessSize() are + // 0 - Checked in the assert above. + // 1, 2, 3, 4 & 7, 8 - The statement below is correct for all these. + // MemAccessSize is represented as 1+log2(N) where N is size in bits. + AccessSize = (1U << (getMemAccessSize(MI) - 1)); - return Size; + unsigned basePos = 0, offsetPos = 0; + if (!getBaseAndOffsetPosition(MI, basePos, offsetPos)) + return 0; + + // Post increment updates its EA after the mem access, + // so we need to treat its offset as zero. + if (isPostIncrement(MI)) + Offset = 0; + else { + Offset = MI->getOperand(offsetPos).getImm(); + } + + return MI->getOperand(basePos).getReg(); +} + + +/// Return the position of the base and offset operands for this instruction. +bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr *MI, + unsigned &BasePos, unsigned &OffsetPos) const { + // Deal with memops first. + if (isMemOp(MI)) { + assert (MI->getOperand(0).isReg() && MI->getOperand(1).isImm() && + "Bad Memop."); + BasePos = 0; + OffsetPos = 1; + } else if (MI->mayStore()) { + BasePos = 0; + OffsetPos = 1; + } else if (MI->mayLoad()) { + BasePos = 1; + OffsetPos = 2; + } else + return false; + + if (isPredicated(MI)) { + BasePos++; + OffsetPos++; + } + if (isPostIncrement(MI)) { + BasePos++; + OffsetPos++; + } + + if (!MI->getOperand(BasePos).isReg() || !MI->getOperand(OffsetPos).isImm()) + return false; + + return true; +} + + +// Inserts branching instructions in reverse order of their occurence. +// e.g. jump_t t1 (i1) +// jump t2 (i2) +// Jumpers = {i2, i1} +SmallVector HexagonInstrInfo::getBranchingInstrs( + MachineBasicBlock& MBB) const { + SmallVector Jumpers; + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::instr_iterator I = MBB.instr_end(); + if (I == MBB.instr_begin()) + return Jumpers; + + // A basic block may looks like this: + // + // [ insn + // EH_LABEL + // insn + // insn + // insn + // EH_LABEL + // insn ] + // + // It has two succs but does not have a terminator + // Don't know how to handle it. + do { + --I; + if (I->isEHLabel()) + return Jumpers; + } while (I != MBB.instr_begin()); + + I = MBB.instr_end(); + --I; + + while (I->isDebugValue()) { + if (I == MBB.instr_begin()) + return Jumpers; + --I; + } + if (!isUnpredicatedTerminator(&*I)) + return Jumpers; + + // Get the last instruction in the block. + MachineInstr *LastInst = &*I; + Jumpers.push_back(LastInst); + MachineInstr *SecondLastInst = nullptr; + // Find one more terminator if present. + do { + if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) { + if (!SecondLastInst) { + SecondLastInst = &*I; + Jumpers.push_back(SecondLastInst); + } else // This is a third branch. + return Jumpers; + } + if (I == MBB.instr_begin()) + break; + --I; + } while (true); + return Jumpers; +} + + +// Returns Operand Index for the constant extended instruction. +unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask; +} + +// See if instruction could potentially be a duplex candidate. +// If so, return its group. Zero otherwise. +HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup( + const MachineInstr *MI) const { + unsigned DstReg, SrcReg, Src1Reg, Src2Reg; + + switch (MI->getOpcode()) { + default: + return HexagonII::HCG_None; + // + // Compound pairs. + // "p0=cmp.eq(Rs16,Rt16); if (p0.new) jump:nt #r9:2" + // "Rd16=#U6 ; jump #r9:2" + // "Rd16=Rs16 ; jump #r9:2" + // + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtu: + DstReg = MI->getOperand(0).getReg(); + Src1Reg = MI->getOperand(1).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (Hexagon::PredRegsRegClass.contains(DstReg) && + (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg)) + return HexagonII::HCG_A; + break; + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtui: + // P0 = cmp.eq(Rs,#u2) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (Hexagon::PredRegsRegClass.contains(DstReg) && + (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() && + ((isUInt<5>(MI->getOperand(2).getImm())) || + (MI->getOperand(2).getImm() == -1))) + return HexagonII::HCG_A; + break; + case Hexagon::A2_tfr: + // Rd = Rs + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg)) + return HexagonII::HCG_A; + break; + case Hexagon::A2_tfrsi: + // Rd = #u6 + // Do not test for #u6 size since the const is getting extended + // regardless and compound could be formed. + DstReg = MI->getOperand(0).getReg(); + if (isIntRegForSubInst(DstReg)) + return HexagonII::HCG_A; + break; + case Hexagon::S2_tstbit_i: + DstReg = MI->getOperand(0).getReg(); + Src1Reg = MI->getOperand(1).getReg(); + if (Hexagon::PredRegsRegClass.contains(DstReg) && + (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && + MI->getOperand(2).isImm() && + isIntRegForSubInst(Src1Reg) && (MI->getOperand(2).getImm() == 0)) + return HexagonII::HCG_A; + break; + // The fact that .new form is used pretty much guarantees + // that predicate register will match. Nevertheless, + // there could be some false positives without additional + // checking. + case Hexagon::J2_jumptnew: + case Hexagon::J2_jumpfnew: + case Hexagon::J2_jumptnewpt: + case Hexagon::J2_jumpfnewpt: + Src1Reg = MI->getOperand(0).getReg(); + if (Hexagon::PredRegsRegClass.contains(Src1Reg) && + (Hexagon::P0 == Src1Reg || Hexagon::P1 == Src1Reg)) + return HexagonII::HCG_B; + break; + // Transfer and jump: + // Rd=#U6 ; jump #r9:2 + // Rd=Rs ; jump #r9:2 + // Do not test for jump range here. + case Hexagon::J2_jump: + case Hexagon::RESTORE_DEALLOC_RET_JMP_V4: + return HexagonII::HCG_C; + break; + } + + return HexagonII::HCG_None; +} + + +// Returns -1 when there is no opcode found. +unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr *GA, + const MachineInstr *GB) const { + assert(getCompoundCandidateGroup(GA) == HexagonII::HCG_A); + assert(getCompoundCandidateGroup(GB) == HexagonII::HCG_B); + if ((GA->getOpcode() != Hexagon::C2_cmpeqi) || + (GB->getOpcode() != Hexagon::J2_jumptnew)) + return -1; + unsigned DestReg = GA->getOperand(0).getReg(); + if (!GB->readsRegister(DestReg)) + return -1; + if (DestReg == Hexagon::P0) + return Hexagon::J4_cmpeqi_tp0_jump_nt; + if (DestReg == Hexagon::P1) + return Hexagon::J4_cmpeqi_tp1_jump_nt; + return -1; +} + + +int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { + enum Hexagon::PredSense inPredSense; + inPredSense = invertPredicate ? Hexagon::PredSense_false : + Hexagon::PredSense_true; + int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense); + if (CondOpcode >= 0) // Valid Conditional opcode/instruction + return CondOpcode; + + // This switch case will be removed once all the instructions have been + // modified to use relation maps. + switch(Opc) { + case Hexagon::TFRI_f: + return !invertPredicate ? Hexagon::TFRI_cPt_f : + Hexagon::TFRI_cNotPt_f; + } + + llvm_unreachable("Unexpected predicable instruction"); +} + + +// Return the cur value instruction for a given store. +int HexagonInstrInfo::getDotCurOp(const MachineInstr* MI) const { + switch (MI->getOpcode()) { + default: llvm_unreachable("Unknown .cur type"); + case Hexagon::V6_vL32b_pi: + return Hexagon::V6_vL32b_cur_pi; + case Hexagon::V6_vL32b_ai: + return Hexagon::V6_vL32b_cur_ai; + //128B + case Hexagon::V6_vL32b_pi_128B: + return Hexagon::V6_vL32b_cur_pi_128B; + case Hexagon::V6_vL32b_ai_128B: + return Hexagon::V6_vL32b_cur_ai_128B; + } + return 0; +} + + + +// The diagram below shows the steps involved in the conversion of a predicated +// store instruction to its .new predicated new-value form. +// +// p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ] +// ^ ^ +// / \ (not OK. it will cause new-value store to be +// / X conditional on p0.new while R2 producer is +// / \ on p0) +// / \. +// p.new store p.old NV store +// [if(p0.new)memw(R0+#0)=R2] [if(p0)memw(R0+#0)=R2.new] +// ^ ^ +// \ / +// \ / +// \ / +// p.old store +// [if (p0)memw(R0+#0)=R2] +// +// +// The following set of instructions further explains the scenario where +// conditional new-value store becomes invalid when promoted to .new predicate +// form. +// +// { 1) if (p0) r0 = add(r1, r2) +// 2) p0 = cmp.eq(r3, #0) } +// +// 3) if (p0) memb(r1+#0) = r0 --> this instruction can't be grouped with +// the first two instructions because in instr 1, r0 is conditional on old value +// of p0 but its use in instr 3 is conditional on p0 modified by instr 2 which +// is not valid for new-value stores. +// Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded +// from the "Conditional Store" list. Because a predicated new value store +// would NOT be promoted to a double dot new store. See diagram below: +// This function returns yes for those stores that are predicated but not +// yet promoted to predicate dot new instructions. +// +// +---------------------+ +// /-----| if (p0) memw(..)=r0 |---------\~ +// || +---------------------+ || +// promote || /\ /\ || promote +// || /||\ /||\ || +// \||/ demote || \||/ +// \/ || || \/ +// +-------------------------+ || +-------------------------+ +// | if (p0.new) memw(..)=r0 | || | if (p0) memw(..)=r0.new | +// +-------------------------+ || +-------------------------+ +// || || || +// || demote \||/ +// promote || \/ NOT possible +// || || /\~ +// \||/ || /||\~ +// \/ || || +// +-----------------------------+ +// | if (p0.new) memw(..)=r0.new | +// +-----------------------------+ +// Double Dot New Store +// +// Returns the most basic instruction for the .new predicated instructions and +// new-value stores. +// For example, all of the following instructions will be converted back to the +// same instruction: +// 1) if (p0.new) memw(R0+#0) = R1.new ---> +// 2) if (p0) memw(R0+#0)= R1.new -------> if (p0) memw(R0+#0) = R1 +// 3) if (p0.new) memw(R0+#0) = R1 ---> +// +// To understand the translation of instruction 1 to its original form, consider +// a packet with 3 instructions. +// { p0 = cmp.eq(R0,R1) +// if (p0.new) R2 = add(R3, R4) +// R5 = add (R3, R1) +// } +// if (p0) memw(R5+#0) = R2 <--- trying to include it in the previous packet +// +// This instruction can be part of the previous packet only if both p0 and R2 +// are promoted to .new values. This promotion happens in steps, first +// predicate register is promoted to .new and in the next iteration R2 is +// promoted. Therefore, in case of dependence check failure (due to R5) during +// next iteration, it should be converted back to its most basic form. + + +// Return the new value instruction for a given store. +int HexagonInstrInfo::getDotNewOp(const MachineInstr* MI) const { + int NVOpcode = Hexagon::getNewValueOpcode(MI->getOpcode()); + if (NVOpcode >= 0) // Valid new-value store instruction. + return NVOpcode; + + switch (MI->getOpcode()) { + default: llvm_unreachable("Unknown .new type"); + case Hexagon::S4_storerb_ur: + return Hexagon::S4_storerbnew_ur; + + case Hexagon::S2_storerb_pci: + return Hexagon::S2_storerb_pci; + + case Hexagon::S2_storeri_pci: + return Hexagon::S2_storeri_pci; + + case Hexagon::S2_storerh_pci: + return Hexagon::S2_storerh_pci; + + case Hexagon::S2_storerd_pci: + return Hexagon::S2_storerd_pci; + + case Hexagon::S2_storerf_pci: + return Hexagon::S2_storerf_pci; + + case Hexagon::V6_vS32b_ai: + return Hexagon::V6_vS32b_new_ai; + + case Hexagon::V6_vS32b_pi: + return Hexagon::V6_vS32b_new_pi; + + // 128B + case Hexagon::V6_vS32b_ai_128B: + return Hexagon::V6_vS32b_new_ai_128B; + + case Hexagon::V6_vS32b_pi_128B: + return Hexagon::V6_vS32b_new_pi_128B; + } + return 0; } // Returns the opcode to use when converting MI, which is a conditional jump, // into a conditional instruction which uses the .new value of the predicate. // We also use branch probabilities to add a hint to the jump. -int -HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI, - const - MachineBranchProbabilityInfo *MBPI) const { - +int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr *MI, + const MachineBranchProbabilityInfo *MBPI) const { // We assume that block can have at most two successors. bool taken = false; - MachineBasicBlock *Src = MI->getParent(); - MachineOperand *BrTarget = &MI->getOperand(1); - MachineBasicBlock *Dst = BrTarget->getMBB(); + const MachineBasicBlock *Src = MI->getParent(); + const MachineOperand *BrTarget = &MI->getOperand(1); + const MachineBasicBlock *Dst = BrTarget->getMBB(); const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst); if (Prediction >= BranchProbability(1,2)) @@ -1872,35 +3127,452 @@ HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI, llvm_unreachable("Unexpected jump instruction."); } } -// Returns true if a particular operand is extendable for an instruction. -bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI, - unsigned short OperandNum) const { - const uint64_t F = MI->getDesc().TSFlags; - return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) - == OperandNum; + +// Return .new predicate version for an instruction. +int HexagonInstrInfo::getDotNewPredOp(const MachineInstr *MI, + const MachineBranchProbabilityInfo *MBPI) const { + int NewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode()); + if (NewOpcode >= 0) // Valid predicate new instruction + return NewOpcode; + + switch (MI->getOpcode()) { + // Condtional Jumps + case Hexagon::J2_jumpt: + case Hexagon::J2_jumpf: + return getDotNewPredJumpOp(MI, MBPI); + + default: + assert(0 && "Unknown .new type"); + } + return 0; } -// Returns Operand Index for the constant extended instruction. -unsigned short HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; - return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask); + +int HexagonInstrInfo::getDotOldOp(const int opc) const { + int NewOp = opc; + if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form + NewOp = Hexagon::getPredOldOpcode(NewOp); + assert(NewOp >= 0 && + "Couldn't change predicate new instruction to its old form."); + } + + if (isNewValueStore(NewOp)) { // Convert into non-new-value format + NewOp = Hexagon::getNonNVStore(NewOp); + assert(NewOp >= 0 && "Couldn't change new-value store to its old form."); + } + return NewOp; } -// Returns the min value that doesn't need to be extended. -int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const { - const uint64_t F = MI->getDesc().TSFlags; - unsigned isSigned = (F >> HexagonII::ExtentSignedPos) - & HexagonII::ExtentSignedMask; - unsigned bits = (F >> HexagonII::ExtentBitsPos) - & HexagonII::ExtentBitsMask; - if (isSigned) // if value is signed - return -1U << (bits - 1); - else +// See if instruction could potentially be a duplex candidate. +// If so, return its group. Zero otherwise. +HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( + const MachineInstr *MI) const { + unsigned DstReg, SrcReg, Src1Reg, Src2Reg; + auto &HRI = getRegisterInfo(); + + switch (MI->getOpcode()) { + default: + return HexagonII::HSIG_None; + // + // Group L1: + // + // Rd = memw(Rs+#u4:2) + // Rd = memub(Rs+#u4:0) + case Hexagon::L2_loadri_io: + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + // Special case this one from Group L2. + // Rd = memw(r29+#u5:2) + if (isIntRegForSubInst(DstReg)) { + if (Hexagon::IntRegsRegClass.contains(SrcReg) && + HRI.getStackRegister() == SrcReg && + MI->getOperand(2).isImm() && + isShiftedUInt<5,2>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_L2; + // Rd = memw(Rs+#u4:2) + if (isIntRegForSubInst(SrcReg) && + (MI->getOperand(2).isImm() && + isShiftedUInt<4,2>(MI->getOperand(2).getImm()))) + return HexagonII::HSIG_L1; + } + break; + case Hexagon::L2_loadrub_io: + // Rd = memub(Rs+#u4:0) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) && + MI->getOperand(2).isImm() && isUInt<4>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_L1; + break; + // + // Group L2: + // + // Rd = memh/memuh(Rs+#u3:1) + // Rd = memb(Rs+#u3:0) + // Rd = memw(r29+#u5:2) - Handled above. + // Rdd = memd(r29+#u5:3) + // deallocframe + // [if ([!]p0[.new])] dealloc_return + // [if ([!]p0[.new])] jumpr r31 + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadruh_io: + // Rd = memh/memuh(Rs+#u3:1) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) && + MI->getOperand(2).isImm() && + isShiftedUInt<3,1>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_L2; + break; + case Hexagon::L2_loadrb_io: + // Rd = memb(Rs+#u3:0) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) && + MI->getOperand(2).isImm() && + isUInt<3>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_L2; + break; + case Hexagon::L2_loadrd_io: + // Rdd = memd(r29+#u5:3) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isDblRegForSubInst(DstReg, HRI) && + Hexagon::IntRegsRegClass.contains(SrcReg) && + HRI.getStackRegister() == SrcReg && + MI->getOperand(2).isImm() && + isShiftedUInt<5,3>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_L2; + break; + // dealloc_return is not documented in Hexagon Manual, but marked + // with A_SUBINSN attribute in iset_v4classic.py. + case Hexagon::RESTORE_DEALLOC_RET_JMP_V4: + case Hexagon::L4_return: + case Hexagon::L2_deallocframe: + return HexagonII::HSIG_L2; + case Hexagon::EH_RETURN_JMPR: + case Hexagon::JMPret : + // jumpr r31 + // Actual form JMPR %PC, %R31, %R0. + DstReg = MI->getOperand(0).getReg(); + if (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg)) + return HexagonII::HSIG_L2; + break; + case Hexagon::JMPrett: + case Hexagon::JMPretf: + case Hexagon::JMPrettnewpt: + case Hexagon::JMPretfnewpt : + case Hexagon::JMPrettnew : + case Hexagon::JMPretfnew : + DstReg = MI->getOperand(1).getReg(); + SrcReg = MI->getOperand(0).getReg(); + // [if ([!]p0[.new])] jumpr r31 + if ((Hexagon::PredRegsRegClass.contains(SrcReg) && + (Hexagon::P0 == SrcReg)) && + (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg))) + return HexagonII::HSIG_L2; + break; + case Hexagon::L4_return_t : + case Hexagon::L4_return_f : + case Hexagon::L4_return_tnew_pnt : + case Hexagon::L4_return_fnew_pnt : + case Hexagon::L4_return_tnew_pt : + case Hexagon::L4_return_fnew_pt : + // [if ([!]p0[.new])] dealloc_return + SrcReg = MI->getOperand(0).getReg(); + if (Hexagon::PredRegsRegClass.contains(SrcReg) && (Hexagon::P0 == SrcReg)) + return HexagonII::HSIG_L2; + break; + // + // Group S1: + // + // memw(Rs+#u4:2) = Rt + // memb(Rs+#u4:0) = Rt + case Hexagon::S2_storeri_io: + // Special case this one from Group S2. + // memw(r29+#u5:2) = Rt + Src1Reg = MI->getOperand(0).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (Hexagon::IntRegsRegClass.contains(Src1Reg) && + isIntRegForSubInst(Src2Reg) && + HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() && + isShiftedUInt<5,2>(MI->getOperand(1).getImm())) + return HexagonII::HSIG_S2; + // memw(Rs+#u4:2) = Rt + if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) && + MI->getOperand(1).isImm() && + isShiftedUInt<4,2>(MI->getOperand(1).getImm())) + return HexagonII::HSIG_S1; + break; + case Hexagon::S2_storerb_io: + // memb(Rs+#u4:0) = Rt + Src1Reg = MI->getOperand(0).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) && + MI->getOperand(1).isImm() && isUInt<4>(MI->getOperand(1).getImm())) + return HexagonII::HSIG_S1; + break; + // + // Group S2: + // + // memh(Rs+#u3:1) = Rt + // memw(r29+#u5:2) = Rt + // memd(r29+#s6:3) = Rtt + // memw(Rs+#u4:2) = #U1 + // memb(Rs+#u4) = #U1 + // allocframe(#u5:3) + case Hexagon::S2_storerh_io: + // memh(Rs+#u3:1) = Rt + Src1Reg = MI->getOperand(0).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) && + MI->getOperand(1).isImm() && + isShiftedUInt<3,1>(MI->getOperand(1).getImm())) + return HexagonII::HSIG_S1; + break; + case Hexagon::S2_storerd_io: + // memd(r29+#s6:3) = Rtt + Src1Reg = MI->getOperand(0).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (isDblRegForSubInst(Src2Reg, HRI) && + Hexagon::IntRegsRegClass.contains(Src1Reg) && + HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() && + isShiftedInt<6,3>(MI->getOperand(1).getImm())) + return HexagonII::HSIG_S2; + break; + case Hexagon::S4_storeiri_io: + // memw(Rs+#u4:2) = #U1 + Src1Reg = MI->getOperand(0).getReg(); + if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() && + isShiftedUInt<4,2>(MI->getOperand(1).getImm()) && + MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_S2; + break; + case Hexagon::S4_storeirb_io: + // memb(Rs+#u4) = #U1 + Src1Reg = MI->getOperand(0).getReg(); + if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() && + isUInt<4>(MI->getOperand(1).getImm()) && MI->getOperand(2).isImm() && + MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_S2; + break; + case Hexagon::S2_allocframe: + if (MI->getOperand(0).isImm() && + isShiftedUInt<5,3>(MI->getOperand(0).getImm())) + return HexagonII::HSIG_S1; + break; + // + // Group A: + // + // Rx = add(Rx,#s7) + // Rd = Rs + // Rd = #u6 + // Rd = #-1 + // if ([!]P0[.new]) Rd = #0 + // Rd = add(r29,#u6:2) + // Rx = add(Rx,Rs) + // P0 = cmp.eq(Rs,#u2) + // Rdd = combine(#0,Rs) + // Rdd = combine(Rs,#0) + // Rdd = combine(#u2,#U2) + // Rd = add(Rs,#1) + // Rd = add(Rs,#-1) + // Rd = sxth/sxtb/zxtb/zxth(Rs) + // Rd = and(Rs,#1) + case Hexagon::A2_addi: + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg)) { + // Rd = add(r29,#u6:2) + if (Hexagon::IntRegsRegClass.contains(SrcReg) && + HRI.getStackRegister() == SrcReg && MI->getOperand(2).isImm() && + isShiftedUInt<6,2>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_A; + // Rx = add(Rx,#s7) + if ((DstReg == SrcReg) && MI->getOperand(2).isImm() && + isInt<7>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_A; + // Rd = add(Rs,#1) + // Rd = add(Rs,#-1) + if (isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() && + ((MI->getOperand(2).getImm() == 1) || + (MI->getOperand(2).getImm() == -1))) + return HexagonII::HSIG_A; + } + break; + case Hexagon::A2_add: + // Rx = add(Rx,Rs) + DstReg = MI->getOperand(0).getReg(); + Src1Reg = MI->getOperand(1).getReg(); + Src2Reg = MI->getOperand(2).getReg(); + if (isIntRegForSubInst(DstReg) && (DstReg == Src1Reg) && + isIntRegForSubInst(Src2Reg)) + return HexagonII::HSIG_A; + break; + case Hexagon::A2_andir: + // Same as zxtb. + // Rd16=and(Rs16,#255) + // Rd16=and(Rs16,#1) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) && + MI->getOperand(2).isImm() && + ((MI->getOperand(2).getImm() == 1) || + (MI->getOperand(2).getImm() == 255))) + return HexagonII::HSIG_A; + break; + case Hexagon::A2_tfr: + // Rd = Rs + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg)) + return HexagonII::HSIG_A; + break; + case Hexagon::A2_tfrsi: + // Rd = #u6 + // Do not test for #u6 size since the const is getting extended + // regardless and compound could be formed. + // Rd = #-1 + DstReg = MI->getOperand(0).getReg(); + if (isIntRegForSubInst(DstReg)) + return HexagonII::HSIG_A; + break; + case Hexagon::C2_cmoveit: + case Hexagon::C2_cmovenewit: + case Hexagon::C2_cmoveif: + case Hexagon::C2_cmovenewif: + // if ([!]P0[.new]) Rd = #0 + // Actual form: + // %R16 = C2_cmovenewit %P0, 0, %R16; + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && + Hexagon::PredRegsRegClass.contains(SrcReg) && Hexagon::P0 == SrcReg && + MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) + return HexagonII::HSIG_A; + break; + case Hexagon::C2_cmpeqi: + // P0 = cmp.eq(Rs,#u2) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (Hexagon::PredRegsRegClass.contains(DstReg) && + Hexagon::P0 == DstReg && isIntRegForSubInst(SrcReg) && + MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm())) + return HexagonII::HSIG_A; + break; + case Hexagon::A2_combineii: + case Hexagon::A4_combineii: + // Rdd = combine(#u2,#U2) + DstReg = MI->getOperand(0).getReg(); + if (isDblRegForSubInst(DstReg, HRI) && + ((MI->getOperand(1).isImm() && isUInt<2>(MI->getOperand(1).getImm())) || + (MI->getOperand(1).isGlobal() && + isUInt<2>(MI->getOperand(1).getOffset()))) && + ((MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm())) || + (MI->getOperand(2).isGlobal() && + isUInt<2>(MI->getOperand(2).getOffset())))) + return HexagonII::HSIG_A; + break; + case Hexagon::A4_combineri: + // Rdd = combine(Rs,#0) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) && + ((MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) || + (MI->getOperand(2).isGlobal() && MI->getOperand(2).getOffset() == 0))) + return HexagonII::HSIG_A; + break; + case Hexagon::A4_combineir: + // Rdd = combine(#0,Rs) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(2).getReg(); + if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) && + ((MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) || + (MI->getOperand(1).isGlobal() && MI->getOperand(1).getOffset() == 0))) + return HexagonII::HSIG_A; + break; + case Hexagon::A2_sxtb: + case Hexagon::A2_sxth: + case Hexagon::A2_zxtb: + case Hexagon::A2_zxth: + // Rd = sxth/sxtb/zxtb/zxth(Rs) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg)) + return HexagonII::HSIG_A; + break; + } + + return HexagonII::HSIG_None; +} + + +short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr *MI) const { + return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Real); +} + + +// Return first non-debug instruction in the basic block. +MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB) + const { + for (auto MII = BB->instr_begin(), End = BB->instr_end(); MII != End; MII++) { + MachineInstr *MI = &*MII; + if (MI->isDebugValue()) + continue; + return MI; + } + return nullptr; +} + + +unsigned HexagonInstrInfo::getInstrTimingClassLatency( + const InstrItineraryData *ItinData, const MachineInstr *MI) const { + // Default to one cycle for no itinerary. However, an "empty" itinerary may + // still have a MinLatency property, which getStageLatency checks. + if (!ItinData) + return getInstrLatency(ItinData, MI); + + // Get the latency embedded in the itinerary. If we're not using timing class + // latencies or if we using BSB scheduling, then restrict the maximum latency + // to 1 (that is, either 0 or 1). + if (MI->isTransient()) return 0; + unsigned Latency = ItinData->getStageLatency(MI->getDesc().getSchedClass()); + if (!EnableTimingClassLatency || + MI->getParent()->getParent()->getSubtarget(). + useBSBScheduling()) + if (Latency > 1) + Latency = 1; + return Latency; } + +// inverts the predication logic. +// p -> NotP +// NotP -> P +bool HexagonInstrInfo::getInvertedPredSense( + SmallVectorImpl &Cond) const { + if (Cond.empty()) + return false; + unsigned Opc = getInvertedPredicatedOpcode(Cond[0].getImm()); + Cond[0].setImm(Opc); + return true; +} + + +unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { + int InvPredOpcode; + InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc) + : Hexagon::getTruePredOpcode(Opc); + if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate. + return InvPredOpcode; + + llvm_unreachable("Unexpected predicated instruction"); +} + + // Returns the max value that doesn't need to be extended. int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const { const uint64_t F = MI->getDesc().TSFlags; @@ -1915,44 +3587,30 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const { return ~(-1U << bits); } -// Returns true if an instruction can be converted into a non-extended -// equivalent instruction. -bool HexagonInstrInfo::NonExtEquivalentExists (const MachineInstr *MI) const { - short NonExtOpcode; - // Check if the instruction has a register form that uses register in place - // of the extended operand, if so return that as the non-extended form. - if (Hexagon::getRegForm(MI->getOpcode()) >= 0) - return true; - - if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) { - // Check addressing mode and retrieve non-ext equivalent instruction. - - switch (getAddrMode(MI)) { - case HexagonII::Absolute : - // Load/store with absolute addressing mode can be converted into - // base+offset mode. - NonExtOpcode = Hexagon::getBasedWithImmOffset(MI->getOpcode()); - break; - case HexagonII::BaseImmOffset : - // Load/store with base+offset addressing mode can be converted into - // base+register offset addressing mode. However left shift operand should - // be set to 0. - NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode()); - break; - default: - return false; - } - if (NonExtOpcode < 0) - return false; - return true; - } - return false; +unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::MemAccessSizePos) & HexagonII::MemAccesSizeMask; } -// Returns opcode of the non-extended equivalent instruction. -short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const { +// Returns the min value that doesn't need to be extended. +int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + unsigned isSigned = (F >> HexagonII::ExtentSignedPos) + & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) + & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return -1U << (bits - 1); + else + return 0; +} + + +// Returns opcode of the non-extended equivalent instruction. +short HexagonInstrInfo::getNonExtOpcode(const MachineInstr *MI) const { // Check if the instruction has a register form that uses register in place // of the extended operand, if so return that as the non-extended form. short NonExtOpcode = Hexagon::getRegForm(MI->getOpcode()); @@ -1963,9 +3621,12 @@ short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const { // Check addressing mode and retrieve non-ext equivalent instruction. switch (getAddrMode(MI)) { case HexagonII::Absolute : - return Hexagon::getBasedWithImmOffset(MI->getOpcode()); + return Hexagon::getBaseWithImmOffset(MI->getOpcode()); case HexagonII::BaseImmOffset : return Hexagon::getBaseWithRegOffset(MI->getOpcode()); + case HexagonII::BaseLongOffset: + return Hexagon::getRegShlForm(MI->getOpcode()); + default: return -1; } @@ -1973,29 +3634,9 @@ short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const { return -1; } -bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const { - return (Opcode == Hexagon::J2_jumpt) || - (Opcode == Hexagon::J2_jumpf) || - (Opcode == Hexagon::J2_jumptnewpt) || - (Opcode == Hexagon::J2_jumpfnewpt) || - (Opcode == Hexagon::J2_jumpt) || - (Opcode == Hexagon::J2_jumpf); -} - -bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef Cond) const { - if (Cond.empty() || !isPredicated(Cond[0].getImm())) - return false; - return !isPredicatedTrue(Cond[0].getImm()); -} - -bool HexagonInstrInfo::isEndLoopN(Opcode_t Opcode) const { - return (Opcode == Hexagon::ENDLOOP0 || - Opcode == Hexagon::ENDLOOP1); -} bool HexagonInstrInfo::getPredReg(ArrayRef Cond, - unsigned &PredReg, unsigned &PredRegPos, - unsigned &PredRegFlags) const { + unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const { if (Cond.empty()) return false; assert(Cond.size() == 2); @@ -2014,3 +3655,174 @@ bool HexagonInstrInfo::getPredReg(ArrayRef Cond, return true; } + +short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr *MI) const { + return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Pseudo); +} + + +short HexagonInstrInfo::getRegForm(const MachineInstr *MI) const { + return Hexagon::getRegForm(MI->getOpcode()); +} + + +// Return the number of bytes required to encode the instruction. +// Hexagon instructions are fixed length, 4 bytes, unless they +// use a constant extender, which requires another 4 bytes. +// For debug instructions and prolog labels, return 0. +unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const { + if (MI->isDebugValue() || MI->isPosition()) + return 0; + + unsigned Size = MI->getDesc().getSize(); + if (!Size) + // Assume the default insn size in case it cannot be determined + // for whatever reason. + Size = HEXAGON_INSTR_SIZE; + + if (isConstExtended(MI) || isExtended(MI)) + Size += HEXAGON_INSTR_SIZE; + + // Try and compute number of instructions in asm. + if (BranchRelaxAsmLarge && MI->getOpcode() == Hexagon::INLINEASM) { + const MachineBasicBlock &MBB = *MI->getParent(); + const MachineFunction *MF = MBB.getParent(); + const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); + + // Count the number of register definitions to find the asm string. + unsigned NumDefs = 0; + for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); + ++NumDefs) + assert(NumDefs != MI->getNumOperands()-2 && "No asm string?"); + + assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); + // Disassemble the AsmStr and approximate number of instructions. + const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + Size = getInlineAsmLength(AsmStr, *MAI); + } + + return Size; +} + + +uint64_t HexagonInstrInfo::getType(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return (F >> HexagonII::TypePos) & HexagonII::TypeMask; +} + + +unsigned HexagonInstrInfo::getUnits(const MachineInstr* MI) const { + const TargetSubtargetInfo &ST = MI->getParent()->getParent()->getSubtarget(); + const InstrItineraryData &II = *ST.getInstrItineraryData(); + const InstrStage &IS = *II.beginStage(MI->getDesc().getSchedClass()); + + return IS.getUnits(); +} + + +unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask; +} + + +// Calculate size of the basic block without debug instructions. +unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const { + return nonDbgMICount(BB->instr_begin(), BB->instr_end()); +} + + +unsigned HexagonInstrInfo::nonDbgBundleSize( + MachineBasicBlock::const_iterator BundleHead) const { + assert(BundleHead->isBundle() && "Not a bundle header"); + auto MII = BundleHead.getInstrIterator(); + // Skip the bundle header. + return nonDbgMICount(++MII, getBundleEnd(BundleHead)); +} + + +/// immediateExtend - Changes the instruction in place to one using an immediate +/// extender. +void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const { + assert((isExtendable(MI)||isConstExtended(MI)) && + "Instruction must be extendable"); + // Find which operand is extendable. + short ExtOpNum = getCExtOpNum(MI); + MachineOperand &MO = MI->getOperand(ExtOpNum); + // This needs to be something we understand. + assert((MO.isMBB() || MO.isImm()) && + "Branch with unknown extendable field type"); + // Mark given operand as extended. + MO.addTargetFlag(HexagonII::HMOTF_ConstExtended); +} + + +bool HexagonInstrInfo::invertAndChangeJumpTarget( + MachineInstr* MI, MachineBasicBlock* NewTarget) const { + DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#" + << NewTarget->getNumber(); MI->dump();); + assert(MI->isBranch()); + unsigned NewOpcode = getInvertedPredicatedOpcode(MI->getOpcode()); + int TargetPos = MI->getNumOperands() - 1; + // In general branch target is the last operand, + // but some implicit defs added at the end might change it. + while ((TargetPos > -1) && !MI->getOperand(TargetPos).isMBB()) + --TargetPos; + assert((TargetPos >= 0) && MI->getOperand(TargetPos).isMBB()); + MI->getOperand(TargetPos).setMBB(NewTarget); + if (EnableBranchPrediction && isPredicatedNew(MI)) { + NewOpcode = reversePrediction(NewOpcode); + } + MI->setDesc(get(NewOpcode)); + return true; +} + + +void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const { + /* +++ The code below is used to generate complete set of Hexagon Insn +++ */ + MachineFunction::iterator A = MF.begin(); + MachineBasicBlock &B = *A; + MachineBasicBlock::iterator I = B.begin(); + MachineInstr *MI = &*I; + DebugLoc DL = MI->getDebugLoc(); + MachineInstr *NewMI; + + for (unsigned insn = TargetOpcode::GENERIC_OP_END+1; + insn < Hexagon::INSTRUCTION_LIST_END; ++insn) { + NewMI = BuildMI(B, MI, DL, get(insn)); + DEBUG(dbgs() << "\n" << getName(NewMI->getOpcode()) << + " Class: " << NewMI->getDesc().getSchedClass()); + NewMI->eraseFromParent(); + } + /* --- The code above is used to generate complete set of Hexagon Insn --- */ +} + + +// inverts the predication logic. +// p -> NotP +// NotP -> P +bool HexagonInstrInfo::reversePredSense(MachineInstr* MI) const { + DEBUG(dbgs() << "\nTrying to reverse pred. sense of:"; MI->dump()); + MI->setDesc(get(getInvertedPredicatedOpcode(MI->getOpcode()))); + return true; +} + + +// Reverse the branch prediction. +unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const { + int PredRevOpcode = -1; + if (isPredictedTaken(Opcode)) + PredRevOpcode = Hexagon::notTakenBranchPrediction(Opcode); + else + PredRevOpcode = Hexagon::takenBranchPrediction(Opcode); + assert(PredRevOpcode > 0); + return PredRevOpcode; +} + + +// TODO: Add more rigorous validation. +bool HexagonInstrInfo::validateBranchCond(const ArrayRef &Cond) + const { + return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1)); +} + diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index d0b8a4631c1d..9530d9f2aa0d 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -1,4 +1,3 @@ - //===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===// // // The LLVM Compiler Infrastructure @@ -28,23 +27,18 @@ namespace llvm { struct EVT; class HexagonSubtarget; + class HexagonInstrInfo : public HexagonGenInstrInfo { virtual void anchor(); const HexagonRegisterInfo RI; - const HexagonSubtarget &Subtarget; public: - typedef unsigned Opcode_t; - explicit HexagonInstrInfo(HexagonSubtarget &ST); - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As - /// such, whenever a client has an instance of instruction info, it should - /// always be able to get register info as well (through this method). + /// TargetInstrInfo overrides. /// - const HexagonRegisterInfo &getRegisterInfo() const { return RI; } - /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has @@ -52,7 +46,7 @@ public: unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const override; - /// isStoreToStackSlot - If the specified machine instruction is a direct + /// If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has @@ -60,50 +54,118 @@ public: unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const override; - + /// Analyze the branching code at the end of MBB, returning + /// true if it cannot be understood (e.g. it's a switch dispatch or isn't + /// implemented for a target). Upon success, this returns false and returns + /// with the following information in various cases: + /// + /// 1. If this block ends with no branches (it just falls through to its succ) + /// just return false, leaving TBB/FBB null. + /// 2. If this block ends with only an unconditional branch, it sets TBB to be + /// the destination block. + /// 3. If this block ends with a conditional branch and it falls through to a + /// successor block, it sets TBB to be the branch destination block and a + /// list of operands that evaluate the condition. These operands can be + /// passed to other TargetInstrInfo methods to create new branches. + /// 4. If this block ends with a conditional branch followed by an + /// unconditional branch, it returns the 'true' destination in TBB, the + /// 'false' destination in FBB, and a list of operands that evaluate the + /// condition. These operands can be passed to other TargetInstrInfo + /// methods to create new branches. + /// + /// Note that RemoveBranch and InsertBranch must be implemented to support + /// cases where this method returns success. + /// + /// If AllowModify is true, then this routine is allowed to modify the basic + /// block (e.g. delete instructions after the unconditional branch). + /// bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const override; + /// Remove the branching code at the end of the specific MBB. + /// This is only invoked in cases where AnalyzeBranch returns success. It + /// returns the number of instructions that were removed. unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + /// Insert branch code into the end of the specified MachineBasicBlock. + /// The operands to this method are the same as those + /// returned by AnalyzeBranch. This is only invoked in cases where + /// AnalyzeBranch returns success. It returns the number of instructions + /// inserted. + /// + /// It is also invoked by tail merging to add unconditional branches in + /// cases where AnalyzeBranch doesn't apply because there was no original + /// branch to analyze. At least this much must be implemented, else tail + /// merging needs to be disabled. unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, DebugLoc DL) const override; - bool analyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, unsigned &SrcReg2, - int &Mask, int &Value) const override; + /// Return true if it's profitable to predicate + /// instructions with accumulated instruction latency of "NumCycles" + /// of the specified basic block, where the probability of the instructions + /// being executed is given by Probability, and Confidence is a measure + /// of our confidence that it will be properly predicted. + bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + unsigned ExtraPredCycles, + BranchProbability Probability) const override; + /// Second variant of isProfitableToIfCvt. This one + /// checks for the case where two basic blocks from true and false path + /// of a if-then-else (diamond) are predicated on mutally exclusive + /// predicates, where the probability of the true path being taken is given + /// by Probability, and Confidence is a measure of our confidence that it + /// will be properly predicted. + bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumTCycles, unsigned ExtraTCycles, + MachineBasicBlock &FMBB, + unsigned NumFCycles, unsigned ExtraFCycles, + BranchProbability Probability) const override; + + /// Return true if it's profitable for if-converter to duplicate instructions + /// of specified accumulated instruction latencies in the specified MBB to + /// enable if-conversion. + /// The probability of the instructions being executed is given by + /// Probability, and Confidence is a measure of our confidence that it + /// will be properly predicted. + bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + BranchProbability Probability) const override; + + /// Emit instructions to copy a pair of physical registers. + /// + /// This function should support copies within any legal register class as + /// well as any cross-class copies created during instruction selection. + /// + /// The source and destination registers may overlap, which may require a + /// careful implementation when multiple copy instructions are required for + /// large registers. See for example the ARM target. void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; + /// Store the specified register of the given register class to the specified + /// stack frame index. The store instruction is to be added to the given + /// machine basic block before the specified machine instruction. If isKill + /// is true, the register operand is the last use and must be marked kill. void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; - void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - + /// Load the specified register of the given register class from the specified + /// stack frame index. The load instruction is to be added to the given + /// machine basic block before the specified machine instruction. void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; - void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - - /// expandPostRAPseudo - This function is called for all pseudo instructions + /// This function is called for all pseudo instructions /// that remain after register allocation. Many pseudo instructions are /// created to help register allocation. This is the place to convert them /// into real instructions. The target can edit MI in place, or it can insert @@ -111,122 +173,228 @@ public: /// anything was changed. bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; - MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - ArrayRef Ops, - MachineBasicBlock::iterator InsertPt, - int FrameIndex) const override; + /// Reverses the branch condition of the specified condition list, + /// returning false on success and true if it cannot be reversed. + bool ReverseBranchCondition(SmallVectorImpl &Cond) + const override; - MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - ArrayRef Ops, - MachineBasicBlock::iterator InsertPt, - MachineInstr *LoadMI) const override { - return nullptr; - } + /// Insert a noop into the instruction stream at the specified point. + void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override; - unsigned createVR(MachineFunction* MF, MVT VT) const; + /// Returns true if the instruction is already predicated. + bool isPredicated(const MachineInstr *MI) const override; - bool isBranch(const MachineInstr *MI) const; - bool isPredicable(MachineInstr *MI) const override; + /// Convert the instruction into a predicated instruction. + /// It returns true if the operation was successful. bool PredicateInstruction(MachineInstr *MI, ArrayRef Cond) const override; - bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, - unsigned ExtraPredCycles, - const BranchProbability &Probability) const override; - - bool isProfitableToIfCvt(MachineBasicBlock &TMBB, - unsigned NumTCycles, unsigned ExtraTCycles, - MachineBasicBlock &FMBB, - unsigned NumFCycles, unsigned ExtraFCycles, - const BranchProbability &Probability) const override; - - bool isPredicated(const MachineInstr *MI) const override; - bool isPredicated(unsigned Opcode) const; - bool isPredicatedTrue(const MachineInstr *MI) const; - bool isPredicatedTrue(unsigned Opcode) const; - bool isPredicatedNew(const MachineInstr *MI) const; - bool isPredicatedNew(unsigned Opcode) const; - bool DefinesPredicate(MachineInstr *MI, - std::vector &Pred) const override; + /// Returns true if the first specified predicate + /// subsumes the second, e.g. GE subsumes GT. bool SubsumesPredicate(ArrayRef Pred1, ArrayRef Pred2) const override; - bool - ReverseBranchCondition(SmallVectorImpl &Cond) const override; + /// If the specified instruction defines any predicate + /// or condition code register(s) used for predication, returns true as well + /// as the definition predicate(s) by reference. + bool DefinesPredicate(MachineInstr *MI, + std::vector &Pred) const override; - bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, - const BranchProbability &Probability) const override; - - DFAPacketizer * - CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override; + /// Return true if the specified instruction can be predicated. + /// By default, this returns true for every instruction with a + /// PredicateOperand. + bool isPredicable(MachineInstr *MI) const override; + /// Test if the given instruction should be considered a scheduling boundary. + /// This primarily includes labels and terminators. bool isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override; - bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const; - bool isValidAutoIncImm(const EVT VT, const int Offset) const; - bool isMemOp(const MachineInstr *MI) const; - bool isSpillPredRegOp(const MachineInstr *MI) const; - bool isU6_3Immediate(const int value) const; - bool isU6_2Immediate(const int value) const; - bool isU6_1Immediate(const int value) const; - bool isU6_0Immediate(const int value) const; - bool isS4_3Immediate(const int value) const; - bool isS4_2Immediate(const int value) const; - bool isS4_1Immediate(const int value) const; - bool isS4_0Immediate(const int value) const; - bool isS12_Immediate(const int value) const; - bool isU6_Immediate(const int value) const; - bool isS8_Immediate(const int value) const; - bool isS6_Immediate(const int value) const; - bool isSaveCalleeSavedRegsCall(const MachineInstr* MI) const; - bool isConditionalTransfer(const MachineInstr* MI) const; + /// Measure the specified inline asm to determine an approximation of its + /// length. + unsigned getInlineAsmLength(const char *Str, + const MCAsmInfo &MAI) const override; + + /// Allocate and return a hazard recognizer to use for this target when + /// scheduling the machine instructions after register allocation. + ScheduleHazardRecognizer* + CreateTargetPostRAHazardRecognizer(const InstrItineraryData*, + const ScheduleDAG *DAG) const override; + + /// For a comparison instruction, return the source registers + /// in SrcReg and SrcReg2 if having two register operands, and the value it + /// compares against in CmpValue. Return true if the comparison instruction + /// can be analyzed. + bool analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const override; + + /// Compute the instruction latency of a given instruction. + /// If the instruction has higher cost when predicated, it's returned via + /// PredCost. + unsigned getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost = 0) const override; + + /// Create machine specific model for scheduling. + DFAPacketizer * + CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override; + + // Sometimes, it is possible for the target + // to tell, even without aliasing information, that two MIs access different + // memory addresses. This function returns true if two MIs access different + // memory addresses and false otherwise. + bool areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb, + AliasAnalysis *AA = nullptr) + const override; + + + /// HexagonInstrInfo specifics. + /// + + const HexagonRegisterInfo &getRegisterInfo() const { return RI; } + + unsigned createVR(MachineFunction* MF, MVT VT) const; + + bool isAbsoluteSet(const MachineInstr* MI) const; + bool isAccumulator(const MachineInstr *MI) const; + bool isComplex(const MachineInstr *MI) const; + bool isCompoundBranchInstr(const MachineInstr *MI) const; + bool isCondInst(const MachineInstr *MI) const; bool isConditionalALU32 (const MachineInstr* MI) const; - bool isConditionalLoad (const MachineInstr* MI) const; + bool isConditionalLoad(const MachineInstr* MI) const; bool isConditionalStore(const MachineInstr* MI) const; - bool isNewValueInst(const MachineInstr* MI) const; - bool isNewValue(const MachineInstr* MI) const; - bool isNewValue(Opcode_t Opcode) const; - bool isDotNewInst(const MachineInstr* MI) const; - int GetDotOldOp(const int opc) const; - int GetDotNewOp(const MachineInstr* MI) const; - int GetDotNewPredOp(MachineInstr *MI, - const MachineBranchProbabilityInfo - *MBPI) const; - bool mayBeNewStore(const MachineInstr* MI) const; + bool isConditionalTransfer(const MachineInstr* MI) const; + bool isConstExtended(const MachineInstr *MI) const; bool isDeallocRet(const MachineInstr *MI) const; - unsigned getInvertedPredicatedOpcode(const int Opc) const; + bool isDependent(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const; + bool isDotCurInst(const MachineInstr* MI) const; + bool isDotNewInst(const MachineInstr* MI) const; + bool isDuplexPair(const MachineInstr *MIa, const MachineInstr *MIb) const; + bool isEarlySourceInstr(const MachineInstr *MI) const; + bool isEndLoopN(unsigned Opcode) const; + bool isExpr(unsigned OpType) const; bool isExtendable(const MachineInstr* MI) const; bool isExtended(const MachineInstr* MI) const; - bool isPostIncrement(const MachineInstr* MI) const; + bool isFloat(const MachineInstr *MI) const; + bool isHVXMemWithAIndirect(const MachineInstr *I, + const MachineInstr *J) const; + bool isIndirectCall(const MachineInstr *MI) const; + bool isIndirectL4Return(const MachineInstr *MI) const; + bool isJumpR(const MachineInstr *MI) const; + bool isJumpWithinBranchRange(const MachineInstr *MI, unsigned offset) const; + bool isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI, + const MachineInstr *ESMI) const; + bool isLateResultInstr(const MachineInstr *MI) const; + bool isLateSourceInstr(const MachineInstr *MI) const; + bool isLoopN(const MachineInstr *MI) const; + bool isMemOp(const MachineInstr *MI) const; + bool isNewValue(const MachineInstr* MI) const; + bool isNewValue(unsigned Opcode) const; + bool isNewValueInst(const MachineInstr* MI) const; + bool isNewValueJump(const MachineInstr* MI) const; + bool isNewValueJump(unsigned Opcode) const; bool isNewValueStore(const MachineInstr* MI) const; bool isNewValueStore(unsigned Opcode) const; - bool isNewValueJump(const MachineInstr* MI) const; - bool isNewValueJump(Opcode_t Opcode) const; - bool isNewValueJumpCandidate(const MachineInstr *MI) const; + bool isOperandExtended(const MachineInstr *MI, unsigned OperandNum) const; + bool isPostIncrement(const MachineInstr* MI) const; + bool isPredicatedNew(const MachineInstr *MI) const; + bool isPredicatedNew(unsigned Opcode) const; + bool isPredicatedTrue(const MachineInstr *MI) const; + bool isPredicatedTrue(unsigned Opcode) const; + bool isPredicated(unsigned Opcode) const; + bool isPredicateLate(unsigned Opcode) const; + bool isPredictedTaken(unsigned Opcode) const; + bool isSaveCalleeSavedRegsCall(const MachineInstr *MI) const; + bool isSolo(const MachineInstr* MI) const; + bool isSpillPredRegOp(const MachineInstr *MI) const; + bool isTC1(const MachineInstr *MI) const; + bool isTC2(const MachineInstr *MI) const; + bool isTC2Early(const MachineInstr *MI) const; + bool isTC4x(const MachineInstr *MI) const; + bool isV60VectorInstruction(const MachineInstr *MI) const; + bool isValidAutoIncImm(const EVT VT, const int Offset) const; + bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const; + bool isVecAcc(const MachineInstr *MI) const; + bool isVecALU(const MachineInstr *MI) const; + bool isVecUsableNextPacket(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const; + + + bool canExecuteInBundle(const MachineInstr *First, + const MachineInstr *Second) const; + bool hasEHLabel(const MachineBasicBlock *B) const; + bool hasNonExtEquivalent(const MachineInstr *MI) const; + bool hasPseudoInstrPair(const MachineInstr *MI) const; + bool hasUncondBranch(const MachineBasicBlock *B) const; + bool mayBeCurLoad(const MachineInstr* MI) const; + bool mayBeNewStore(const MachineInstr* MI) const; + bool producesStall(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) const; + bool producesStall(const MachineInstr *MI, + MachineBasicBlock::const_instr_iterator MII) const; + bool predCanBeUsedAsDotNew(const MachineInstr *MI, unsigned PredReg) const; + bool PredOpcodeHasJMP_c(unsigned Opcode) const; + bool predOpcodeHasNot(ArrayRef Cond) const; + + + unsigned getAddrMode(const MachineInstr* MI) const; + unsigned getBaseAndOffset(const MachineInstr *MI, int &Offset, + unsigned &AccessSize) const; + bool getBaseAndOffsetPosition(const MachineInstr *MI, unsigned &BasePos, + unsigned &OffsetPos) const; + SmallVector getBranchingInstrs(MachineBasicBlock& MBB) const; + unsigned getCExtOpNum(const MachineInstr *MI) const; + HexagonII::CompoundGroup + getCompoundCandidateGroup(const MachineInstr *MI) const; + unsigned getCompoundOpcode(const MachineInstr *GA, + const MachineInstr *GB) const; + int getCondOpcode(int Opc, bool sense) const; + int getDotCurOp(const MachineInstr* MI) const; + int getDotNewOp(const MachineInstr* MI) const; + int getDotNewPredJumpOp(const MachineInstr *MI, + const MachineBranchProbabilityInfo *MBPI) const; + int getDotNewPredOp(const MachineInstr *MI, + const MachineBranchProbabilityInfo *MBPI) const; + int getDotOldOp(const int opc) const; + HexagonII::SubInstructionGroup getDuplexCandidateGroup(const MachineInstr *MI) + const; + short getEquivalentHWInstr(const MachineInstr *MI) const; + MachineInstr *getFirstNonDbgInst(MachineBasicBlock *BB) const; + unsigned getInstrTimingClassLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI) const; + bool getInvertedPredSense(SmallVectorImpl &Cond) const; + unsigned getInvertedPredicatedOpcode(const int Opc) const; + int getMaxValue(const MachineInstr *MI) const; + unsigned getMemAccessSize(const MachineInstr* MI) const; + int getMinValue(const MachineInstr *MI) const; + short getNonExtOpcode(const MachineInstr *MI) const; + bool getPredReg(ArrayRef Cond, unsigned &PredReg, + unsigned &PredRegPos, unsigned &PredRegFlags) const; + short getPseudoInstrPair(const MachineInstr *MI) const; + short getRegForm(const MachineInstr *MI) const; + unsigned getSize(const MachineInstr *MI) const; + uint64_t getType(const MachineInstr* MI) const; + unsigned getUnits(const MachineInstr* MI) const; + unsigned getValidSubTargets(const unsigned Opcode) const; + + + /// getInstrTimingClassLatency - Compute the instruction latency of a given + /// instruction using Timing Class information, if available. + unsigned nonDbgBBSize(const MachineBasicBlock *BB) const; + unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const; void immediateExtend(MachineInstr *MI) const; - bool isConstExtended(const MachineInstr *MI) const; - unsigned getSize(const MachineInstr *MI) const; - int getDotNewPredJumpOp(MachineInstr *MI, - const MachineBranchProbabilityInfo *MBPI) const; - unsigned getAddrMode(const MachineInstr* MI) const; - bool isOperandExtended(const MachineInstr *MI, - unsigned short OperandNum) const; - unsigned short getCExtOpNum(const MachineInstr *MI) const; - int getMinValue(const MachineInstr *MI) const; - int getMaxValue(const MachineInstr *MI) const; - bool NonExtEquivalentExists (const MachineInstr *MI) const; - short getNonExtOpcode(const MachineInstr *MI) const; - bool PredOpcodeHasJMP_c(Opcode_t Opcode) const; - bool predOpcodeHasNot(ArrayRef Cond) const; - bool isEndLoopN(Opcode_t Opcode) const; - bool getPredReg(ArrayRef Cond, unsigned &PredReg, - unsigned &PredRegPos, unsigned &PredRegFlags) const; - int getCondOpcode(int Opc, bool sense) const; - + bool invertAndChangeJumpTarget(MachineInstr* MI, + MachineBasicBlock* NewTarget) const; + void genAllInsnTimingClasses(MachineFunction &MF) const; + bool reversePredSense(MachineInstr* MI) const; + unsigned reversePrediction(unsigned Opcode) const; + bool validateBranchCond(const ArrayRef &Cond) const; }; } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index 3b32c10ed5b0..5cfeba720d90 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -13,7 +13,7 @@ include "HexagonInstrFormats.td" include "HexagonOperands.td" - +include "HexagonInstrEnc.td" // Pattern fragment that combines the value type and the register class // into a single parameter. // The pat frags in the definitions below need to have a named register, @@ -1426,9 +1426,6 @@ def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>; -def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; -def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>; - class CondStr { string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") "; } @@ -1606,8 +1603,6 @@ def EH_RETURN_JMPR : T_JMPr; def: Pat<(eh_return), (EH_RETURN_JMPR (i32 R31))>; -def: Pat<(HexagonBR_JT (i32 IntRegs:$dst)), - (J2_jumpr IntRegs:$dst)>; def: Pat<(brind (i32 IntRegs:$dst)), (J2_jumpr IntRegs:$dst)>; @@ -2825,7 +2820,7 @@ let CextOpcode = "ADD_acc" in { let isExtentSigned = 1 in def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext, [(set (i32 IntRegs:$dst), - (add (add (i32 IntRegs:$src2), s16_16ImmPred:$src3), + (add (add (i32 IntRegs:$src2), s32ImmPred:$src3), (i32 IntRegs:$src1)))]>, ImmRegRel; def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0, @@ -2859,7 +2854,7 @@ class T_MType_acc_pat2 def : T_MType_acc_pat2 ; def : T_MType_acc_pat1 ; -def : T_MType_acc_pat1 ; +def : T_MType_acc_pat1 ; def : T_MType_acc_pat2 ; //===----------------------------------------------------------------------===// @@ -3303,7 +3298,8 @@ class T_store_pi MajOp, bit isHalf, bit isPredNot, bit isPredNew > + bits<4> MajOp, bit isHalf, bit isPredNot, bit isPredNew> : STInst <(outs IntRegs:$_dst_), (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3), !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", @@ -3341,7 +3337,8 @@ class T_pstore_pi ; //===----------------------------------------------------------------------===// // Template class for post increment stores with register offset. //===----------------------------------------------------------------------===// -let isNVStorable = 1 in class T_store_pr MajOp, MemAccessSize AccessSz, bit isHalf = 0> : STInst <(outs IntRegs:$_dst_), @@ -3416,6 +3412,9 @@ class T_store_pr MajOp, bits<5> src3; let accessSize = AccessSz; + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if(!eq(mnemonic,"memd"), 0, !if(isHalf,0,1)); + let IClass = 0b1010; let Inst{27-24} = 0b1101; @@ -3430,12 +3429,11 @@ def S2_storerb_pr : T_store_pr<"memb", IntRegs, 0b000, ByteAccess>; def S2_storerh_pr : T_store_pr<"memh", IntRegs, 0b010, HalfWordAccess>; def S2_storeri_pr : T_store_pr<"memw", IntRegs, 0b100, WordAccess>; def S2_storerd_pr : T_store_pr<"memd", DoubleRegs, 0b110, DoubleWordAccess>; - def S2_storerf_pr : T_store_pr<"memh", IntRegs, 0b011, HalfWordAccess, 1>; let opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in class T_store_io MajOp, bit isH = 0> + bits<3> MajOp, bit isH = 0> : STInst <(outs), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), mnemonic#"($src1+#$src2) = $src3"#!if(isH,".h","")>, @@ -3455,6 +3453,8 @@ class T_store_io MajOp, bit isH = 0> { let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in { @@ -3665,7 +3668,7 @@ def S2_allocframe: ST0Inst < // S2_storer[bhwdf]_pci: Store byte/half/word/double. // S2_storer[bhwdf]_pci -> S2_storerbnew_pci -let Uses = [CS], isNVStorable = 1 in +let Uses = [CS] in class T_store_pci MajOp, MemAccessSize AlignSize, string RegSrc = "Rt"> @@ -3679,6 +3682,8 @@ class T_store_pci Mu; bits<5> Rt; let accessSize = AlignSize; + let isNVStorable = !if(!eq(mnemonic,"memd"), 0, + !if(!eq(RegSrc,"Rt.h"), 0, 1)); let IClass = 0b1010; let Inst{27-25} = 0b100; @@ -3696,15 +3701,15 @@ class T_store_pci ; + ByteAccess>; def S2_storerh_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1010, - HalfWordAccess>; + HalfWordAccess>; def S2_storerf_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1011, - HalfWordAccess, "Rt.h">; + HalfWordAccess, "Rt.h">; def S2_storeri_pci : T_store_pci<"memw", IntRegs, s4_2Imm, 0b1100, - WordAccess>; + WordAccess>; def S2_storerd_pci : T_store_pci<"memd", DoubleRegs, s4_3Imm, 0b1110, - DoubleWordAccess>; + DoubleWordAccess>; let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 4 in class T_storenew_pci ; //===----------------------------------------------------------------------===// // Circular stores with auto-increment register //===----------------------------------------------------------------------===// -let Uses = [CS], isNVStorable = 1 in +let Uses = [CS] in class T_store_pcr MajOp, MemAccessSize AlignSize, string RegSrc = "Rt"> : STInst <(outs IntRegs:$_dst_), @@ -3775,6 +3780,8 @@ class T_store_pcr MajOp, bits<5> Rt; let accessSize = AlignSize; + let isNVStorable = !if(!eq(mnemonic,"memd"), 0, + !if(!eq(RegSrc,"Rt.h"), 0, 1)); let IClass = 0b1010; let Inst{27-25} = 0b100; @@ -5783,8 +5790,20 @@ include "HexagonInstrInfoV5.td" // V5 Instructions - //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// V60 Instructions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrInfoV60.td" + +//===----------------------------------------------------------------------===// +// V60 Instructions - +//===----------------------------------------------------------------------===// + //===----------------------------------------------------------------------===// // ALU32/64/Vector + //===----------------------------------------------------------------------===/// include "HexagonInstrInfoVector.td" + +include "HexagonInstrAlias.td" diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 65b0f4974367..87d6b359f5fb 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -684,7 +684,7 @@ def: Pat<(i64 (zext (i32 IntRegs:$src1))), // Template class for store instructions with Absolute set addressing mode. //===----------------------------------------------------------------------===// let isExtended = 1, opExtendable = 1, opExtentBits = 6, - addrMode = AbsoluteSet, isNVStorable = 1 in + addrMode = AbsoluteSet in class T_ST_absset MajOp, MemAccessSize AccessSz, bit isHalf = 0> : STInst<(outs IntRegs:$dst), @@ -696,6 +696,9 @@ class T_ST_absset MajOp, MemAccessSize AccessSz, bit isHalf = 0> : STInst<(outs), @@ -766,6 +769,10 @@ class T_StoreAbsReg MajOp, bit isH> bits<2> u2; bits<5> Rt; + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1)); + let IClass = 0b0011; let Inst{27-24} = 0b1011; @@ -888,6 +898,8 @@ class T_pstore_rr MajOp, let isPredicatedFalse = isNot; let isPredicatedNew = isPredNew; + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1)); let IClass = 0b0011; @@ -1826,43 +1838,22 @@ def: LogLogNot_pat; // below are needed to support code generation for PIC //===----------------------------------------------------------------------===// -def SDT_HexagonPICAdd +def SDT_HexagonAtGot + : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; +def SDT_HexagonAtPcrel : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -def SDT_HexagonGOTAdd - : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -def SDT_HexagonGOTAddInternal : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; -def SDT_HexagonGOTAddInternalJT : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; -def SDT_HexagonGOTAddInternalBA : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; +// AT_GOT address-of-GOT, address-of-global, offset-in-global +def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>; +// AT_PCREL address-of-global +def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>; -def Hexagonpic_add : SDNode<"HexagonISD::PIC_ADD", SDT_HexagonPICAdd>; -def Hexagonat_got : SDNode<"HexagonISD::AT_GOT", SDT_HexagonGOTAdd>; -def Hexagongat_pcrel : SDNode<"HexagonISD::AT_PCREL", - SDT_HexagonGOTAddInternal>; -def Hexagongat_pcrel_jt : SDNode<"HexagonISD::AT_PCREL", - SDT_HexagonGOTAddInternalJT>; -def Hexagongat_pcrel_ba : SDNode<"HexagonISD::AT_PCREL", - SDT_HexagonGOTAddInternalBA>; - -// PIC: Map from a block address computation to a PC-relative add -def: Pat<(Hexagongat_pcrel_ba tblockaddress:$src1), - (C4_addipc u32ImmPred:$src1)>; - -// PIC: Map from the computation to generate a GOT pointer to a PC-relative add -def: Pat<(Hexagonpic_add texternalsym:$src1), - (C4_addipc u32ImmPred:$src1)>; - -// PIC: Map from a jump table address computation to a PC-relative add -def: Pat<(Hexagongat_pcrel_jt tjumptable:$src1), - (C4_addipc u32ImmPred:$src1)>; - -// PIC: Map from a GOT-relative symbol reference to a load -def: Pat<(Hexagonat_got (i32 IntRegs:$src1), tglobaladdr:$src2), - (L2_loadri_io IntRegs:$src1, s30_2ImmPred:$src2)>; - -// PIC: Map from a static symbol reference to a PC-relative add -def: Pat<(Hexagongat_pcrel tglobaladdr:$src1), - (C4_addipc u32ImmPred:$src1)>; +def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)), + (L2_loadri_io I32:$got, imm:$addr)>; +def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off), + (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>; +def: Pat<(HexagonAtPcrel I32:$addr), + (C4_addipc imm:$addr)>; //===----------------------------------------------------------------------===// // CR - @@ -1903,7 +1894,7 @@ def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6), "$Rd = add($Rs, add($Ru, #$s6))" , [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs), - (add (i32 IntRegs:$Ru), s16_16ImmPred:$s6)))], + (add (i32 IntRegs:$Ru), s32ImmPred:$s6)))], "", ALU64_tc_2_SLOT23> { bits<5> Rd; bits<5> Rs; @@ -3290,27 +3281,33 @@ defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel; let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in { def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">; + let isExtended = 1, opExtendable = 0 in + def RESTORE_DEALLOC_RET_JMP_V4_EXT : T_JMP<"">; } // Restore registers and dealloc frame before a tail call. let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in { def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel; + let isExtended = 1, opExtendable = 0 in + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT : T_Call<"">, PredRel; } // Save registers function call. let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in { def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel; + let isExtended = 1, opExtendable = 0 in + def SAVE_REGISTERS_CALL_V4_EXT : T_Call<"">, PredRel; } //===----------------------------------------------------------------------===// // Template class for non predicated store instructions with // GP-Relative or absolute addressing. //===----------------------------------------------------------------------===// -let hasSideEffects = 0, isPredicable = 1, isNVStorable = 1 in +let hasSideEffects = 0, isPredicable = 1 in class T_StoreAbsGP MajOp, Operand AddrOp, bit isAbs, bit isHalf> - : STInst<(outs), (ins AddrOp:$addr, RC:$src), - mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src"#!if(isHalf, ".h",""), + bits<2>MajOp, bit isAbs, bit isHalf> + : STInst<(outs), (ins ImmOp:$addr, RC:$src), + mnemonic # "(#$addr) = $src"#!if(isHalf, ".h",""), [], "", V2LDST_tc_st_SLOT01> { bits<19> addr; bits<5> src; @@ -3321,6 +3318,9 @@ class T_StoreAbsGP MajOp, bit isHalf, bit isNot, bit isNew> - : STInst<(outs), (ins PredRegs:$src1, u6Ext:$absaddr, RC: $src2), + : STInst<(outs), (ins PredRegs:$src1, u32MustExt:$absaddr, RC: $src2), !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ", ") ")#mnemonic#"(#$absaddr) = $src2"#!if(isHalf, ".h",""), [], "", ST_tc_st_SLOT01>, AddrModeRel { @@ -3351,6 +3350,8 @@ class T_StoreAbs_Pred MajOp, let isPredicatedNew = isNew; let isPredicatedFalse = isNot; + // Store upper-half and store doubleword cannot be NV. + let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1)); let IClass = 0b1010; @@ -3371,7 +3372,7 @@ class T_StoreAbs_Pred MajOp, //===----------------------------------------------------------------------===// class T_StoreAbs MajOp, bit isHalf> - : T_StoreAbsGP , + : T_StoreAbsGP , AddrModeRel { string ImmOpStr = !cast(ImmOp); let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, @@ -3538,7 +3539,7 @@ defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>; let isAsmParserOnly = 1 in class T_StoreGP MajOp, bit isHalf = 0> - : T_StoreAbsGP { + : T_StoreAbsGP { // Set BaseOpcode same as absolute addressing instructions so that // non-predicated GP-Rel instructions can have relate with predicated // Absolute instruction. @@ -3553,7 +3554,7 @@ multiclass ST_GP ; + 0, isHalf>; // New-value store def NAME#newgp : T_StoreAbsGP_NV ; } @@ -3615,9 +3616,9 @@ let AddedComplexity = 100 in { //===----------------------------------------------------------------------===// let isPredicable = 1, hasSideEffects = 0 in class T_LoadAbsGP MajOp, Operand AddrOp, bit isAbs> - : LDInst <(outs RC:$dst), (ins AddrOp:$addr), - "$dst = "#mnemonic# !if(isAbs, "(##", "(#")#"$addr)", + bits<3> MajOp> + : LDInst <(outs RC:$dst), (ins ImmOp:$addr), + "$dst = "#mnemonic# "(#$addr)", [], "", V2LDST_tc_ld_SLOT01> { bits<5> dst; bits<19> addr; @@ -3642,7 +3643,7 @@ class T_LoadAbsGP MajOp> - : T_LoadAbsGP , AddrModeRel { + : T_LoadAbsGP , AddrModeRel { string ImmOpStr = !cast(ImmOp); let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19, @@ -3660,10 +3661,11 @@ class T_LoadAbs MajOp, bit isPredNot, bit isPredNew> - : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u6Ext:$absaddr), + : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u32MustExt:$absaddr), !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", ") ")#"$dst = "#mnemonic#"(#$absaddr)">, AddrModeRel { bits<5> dst; @@ -3737,7 +3739,7 @@ defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>; let isAsmParserOnly = 1 in class T_LoadGP MajOp> - : T_LoadAbsGP , PredNewRel { + : T_LoadAbsGP , PredNewRel { let BaseOpcode = BaseOp#_abs; } @@ -3841,26 +3843,6 @@ let AddedComplexity = 100 in { def: Stoream_pat; } -// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd -let AddedComplexity = 100 in -def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i1 (C2_tfrrp (i32 (L2_loadrbgp tglobaladdr:$global))))>; - -// Transfer global address into a register -let isExtended = 1, opExtendable = 1, AddedComplexity=50, isMoveImm = 1, -isAsCheapAsAMove = 1, isReMaterializable = 1, isCodeGenOnly = 1 in -def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1), - "$dst = #$src1", - [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>; - -// Transfer a block address into a register -def : Pat<(HexagonCONST32_GP tblockaddress:$src1), - (TFRI_V4 tblockaddress:$src1)>; - -let AddedComplexity = 50 in -def : Pat<(HexagonCONST32_GP tglobaladdr:$src1), - (TFRI_V4 tglobaladdr:$src1)>; - // i8/i16/i32 -> i64 loads // We need a complexity of 120 here to override preceding handling of // zextload. diff --git a/lib/Target/Hexagon/HexagonInstrInfoV5.td b/lib/Target/Hexagon/HexagonInstrInfoV5.td index 337f4ea2184a..823961fb6e6f 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV5.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV5.td @@ -98,21 +98,21 @@ def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1), // HexagonInstrInfo.td patterns. let isExtended = 1, opExtendable = 1, isMoveImm = 1, isReMaterializable = 1, isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT, - isCodeGenOnly = 1 in + isCodeGenOnly = 1, isPseudo = 1 in def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32Ext:$src1), "$dst = #$src1", [(set F32:$dst, fpimm:$src1)]>, Requires<[HasV5T]>; -let isExtended = 1, opExtendable = 2, isPredicated = 1, - hasSideEffects = 0, validSubTargets = HasV5SubT, isCodeGenOnly = 1 in +let isExtended = 1, opExtendable = 2, isPredicated = 1, hasSideEffects = 0, + validSubTargets = HasV5SubT, isCodeGenOnly = 1, isPseudo = 1 in def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, f32Ext:$src2), "if ($src1) $dst = #$src2", []>, Requires<[HasV5T]>; -let isPseudo = 1, isExtended = 1, opExtendable = 2, isPredicated = 1, - isPredicatedFalse = 1, hasSideEffects = 0, validSubTargets = HasV5SubT in +let isExtended = 1, opExtendable = 2, isPredicated = 1, isPredicatedFalse = 1, + hasSideEffects = 0, validSubTargets = HasV5SubT, isPseudo = 1 in def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, f32Ext:$src2), "if (!$src1) $dst = #$src2", []>, diff --git a/lib/Target/Hexagon/HexagonInstrInfoV60.td b/lib/Target/Hexagon/HexagonInstrInfoV60.td new file mode 100644 index 000000000000..897ada081534 --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrInfoV60.td @@ -0,0 +1,2241 @@ +//=- HexagonInstrInfoV60.td - Target Desc. for Hexagon Target -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V60 instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + + +// Vector store +let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in +{ + class VSTInst pattern = [], + string cstr = "", InstrItinClass itin = CVI_VM_ST, + IType type = TypeCVI_VM_ST> + : InstHexagon, OpcodeHexagon; + +} + +// Vector load +let Predicates = [HasV60T, UseHVX] in +let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in + class V6_LDInst pattern = [], + string cstr = "", InstrItinClass itin = CVI_VM_LD, + IType type = TypeCVI_VM_LD> + : InstHexagon; + +let Predicates = [HasV60T, UseHVX] in +let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in +class V6_STInst pattern = [], + string cstr = "", InstrItinClass itin = CVI_VM_ST, + IType type = TypeCVI_VM_ST> +: InstHexagon; + +//===----------------------------------------------------------------------===// +// Vector loads with base + immediate offset +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset, accessSize = Vector64Access in +class T_vload_ai + : V6_LDInst <(outs VectorRegs:$dst), (ins IntRegs:$src1, s4_6Imm:$src2), + asmStr>; + +let isCodeGenOnly = 1, addrMode = BaseImmOffset, accessSize = Vector128Access in +class T_vload_ai_128B + : V6_LDInst <(outs VectorRegs128B:$dst), (ins IntRegs:$src1, s4_7Imm:$src2), + asmStr>; + +let isCVLoadable = 1, hasNewValue = 1 in { + def V6_vL32b_ai : T_vload_ai <"$dst = vmem($src1+#$src2)">, + V6_vL32b_ai_enc; + def V6_vL32b_nt_ai : T_vload_ai <"$dst = vmem($src1+#$src2):nt">, + V6_vL32b_nt_ai_enc; + // 128B + def V6_vL32b_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2)">, + V6_vL32b_ai_128B_enc; + def V6_vL32b_nt_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2):nt">, + V6_vL32b_nt_ai_128B_enc; +} + +let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU, hasNewValue = 1 in { + def V6_vL32Ub_ai : T_vload_ai <"$dst = vmemu($src1+#$src2)">, + V6_vL32Ub_ai_enc; + def V6_vL32Ub_ai_128B : T_vload_ai_128B <"$dst = vmemu($src1+#$src2)">, + V6_vL32Ub_ai_128B_enc; +} + +let Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD, isCVLoad = 1, + hasNewValue = 1 in { + def V6_vL32b_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2)">, + V6_vL32b_cur_ai_enc; + def V6_vL32b_nt_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2):nt">, + V6_vL32b_nt_cur_ai_enc; + // 128B + def V6_vL32b_cur_ai_128B : T_vload_ai_128B + <"$dst.cur = vmem($src1+#$src2)">, + V6_vL32b_cur_ai_128B_enc; + def V6_vL32b_nt_cur_ai_128B : T_vload_ai_128B + <"$dst.cur = vmem($src1+#$src2):nt">, + V6_vL32b_nt_cur_ai_128B_enc; +} + + +let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD, hasNewValue = 1 in { + def V6_vL32b_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2)">, + V6_vL32b_tmp_ai_enc; + def V6_vL32b_nt_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2):nt">, + V6_vL32b_nt_tmp_ai_enc; + // 128B + def V6_vL32b_tmp_ai_128B : T_vload_ai_128B + <"$dst.tmp = vmem($src1+#$src2)">, + V6_vL32b_tmp_ai_128B_enc; + def V6_vL32b_nt_tmp_ai_128B : T_vload_ai_128B + <"$dst.tmp = vmem($src1+#$src2)">, + V6_vL32b_nt_tmp_ai_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Vector stores with base + immediate offset - unconditional +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset, accessSize = Vector64Access in +class T_vstore_ai + : V6_STInst <(outs), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + mnemonic#"($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3">, NewValueRel { + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_ai_64B + : T_vstore_ai ; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_ai_128B + : T_vstore_ai ; + +let isNVStorable = 1 in { + def V6_vS32b_ai : T_vstore_ai_64B <"vmem", "vS32b_ai">, + V6_vS32b_ai_enc; + def V6_vS32b_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai">, + V6_vS32b_ai_128B_enc; +} + +let isNVStorable = 1, isNonTemporal = 1 in { + def V6_vS32b_nt_ai : T_vstore_ai_64B <"vmem", "vS32b_ai", 1>, + V6_vS32b_nt_ai_enc; + def V6_vS32b_nt_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai", 1>, + V6_vS32b_nt_ai_128B_enc; +} + +let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in { + def V6_vS32Ub_ai : T_vstore_ai_64B <"vmemu", "vs32Ub_ai">, + V6_vS32Ub_ai_enc; + def V6_vS32Ub_ai_128B : T_vstore_ai_128B <"vmemu", "vs32Ub_ai">, + V6_vS32Ub_ai_128B_enc; +} +//===----------------------------------------------------------------------===// +// Vector stores with base + immediate offset - unconditional new +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset, isNewValue = 1, opNewValue = 2, isNVStore = 1, + Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST in +class T_vstore_new_ai + : V6_STInst <(outs ), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + "vmem($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3.new">, NewValueRel { + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_new_ai_64B + : T_vstore_new_ai ; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_new_ai_128B + : T_vstore_new_ai ; + +def V6_vS32b_new_ai : T_vstore_new_ai_64B <"vS32b_ai">, V6_vS32b_new_ai_enc; +def V6_vS32b_new_ai_128B : T_vstore_new_ai_128B <"vS32b_ai">, + V6_vS32b_new_ai_128B_enc; + +let isNonTemporal = 1 in { + def V6_vS32b_nt_new_ai : T_vstore_new_ai_64B<"vS32b_ai", 1>, + V6_vS32b_nt_new_ai_enc; + def V6_vS32b_nt_new_ai_128B : T_vstore_new_ai_128B<"vS32b_ai", 1>, + V6_vS32b_nt_new_ai_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Vector stores with base + immediate offset - conditional +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset, isPredicated = 1 in +class T_vstore_pred_ai + : V6_STInst <(outs), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) " + #mnemonic#"($src2+#$src3)"#!if(isNT, ":nt", "")#" = $src4">, NewValueRel { + let isPredicatedFalse = isPredNot; + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_pred_ai_64B + : T_vstore_pred_ai ; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_pred_ai_128B + : T_vstore_pred_ai ; + +let isNVStorable = 1 in { + def V6_vS32b_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai">, + V6_vS32b_pred_ai_enc; + def V6_vS32b_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1>, + V6_vS32b_npred_ai_enc; + // 128B + def V6_vS32b_pred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai">, + V6_vS32b_pred_ai_128B_enc; + def V6_vS32b_npred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai", 1>, + V6_vS32b_npred_ai_128B_enc; +} +let isNVStorable = 1, isNonTemporal = 1 in { + def V6_vS32b_nt_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 0, 1>, + V6_vS32b_nt_pred_ai_enc; + def V6_vS32b_nt_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1, 1>, + V6_vS32b_nt_npred_ai_enc; + // 128B + def V6_vS32b_nt_pred_ai_128B : T_vstore_pred_ai_128B + <"vmem", "vS32b_ai", 0, 1>, + V6_vS32b_nt_pred_ai_128B_enc; + def V6_vS32b_nt_npred_ai_128B : T_vstore_pred_ai_128B + <"vmem", "vS32b_ai", 1, 1>, + V6_vS32b_nt_npred_ai_128B_enc; +} + +let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in { + def V6_vS32Ub_pred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai">, + V6_vS32Ub_pred_ai_enc; + def V6_vS32Ub_npred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai", 1>, + V6_vS32Ub_npred_ai_enc; + // 128B + def V6_vS32Ub_pred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai">, + V6_vS32Ub_pred_ai_128B_enc; + def V6_vS32Ub_npred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai", 1>, + V6_vS32Ub_npred_ai_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Vector stores with base + immediate offset - byte-enabled aligned +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset in +class T_vstore_qpred_ai + : V6_STInst <(outs), + (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)" + #!if(isNT, ":nt", "")#" = $src4"> { + let isPredicatedFalse = isPredNot; +} + +let accessSize = Vector64Access in +class T_vstore_qpred_ai_64B + : T_vstore_qpred_ai ; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_qpred_ai_128B + : T_vstore_qpred_ai ; + +def V6_vS32b_qpred_ai : T_vstore_qpred_ai_64B, V6_vS32b_qpred_ai_enc; +def V6_vS32b_nqpred_ai : T_vstore_qpred_ai_64B <1>, + V6_vS32b_nqpred_ai_enc; +def V6_vS32b_nt_qpred_ai : T_vstore_qpred_ai_64B <0, 1>, + V6_vS32b_nt_qpred_ai_enc; +def V6_vS32b_nt_nqpred_ai : T_vstore_qpred_ai_64B <1, 1>, + V6_vS32b_nt_nqpred_ai_enc; +// 128B +def V6_vS32b_qpred_ai_128B : T_vstore_qpred_ai_128B, V6_vS32b_qpred_ai_128B_enc; +def V6_vS32b_nqpred_ai_128B : T_vstore_qpred_ai_128B<1>, + V6_vS32b_nqpred_ai_128B_enc; +def V6_vS32b_nt_qpred_ai_128B : T_vstore_qpred_ai_128B<0, 1>, + V6_vS32b_nt_qpred_ai_128B_enc; +def V6_vS32b_nt_nqpred_ai_128B : T_vstore_qpred_ai_128B<1, 1>, + V6_vS32b_nt_nqpred_ai_128B_enc; + + +//===----------------------------------------------------------------------===// +// Vector stores with base + immediate offset - conditional new +//===----------------------------------------------------------------------===// +let addrMode = BaseImmOffset, isPredicated = 1, isNewValue = 1, opNewValue = 3, + isNVStore = 1, Type = TypeCVI_VM_NEW_ST, Itinerary = CVI_VM_NEW_ST in +class T_vstore_new_pred_ai + : V6_STInst <(outs), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)" + #!if(isNT, ":nt", "")#" = $src4.new">, NewValueRel { + let isPredicatedFalse = isPredNot; + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_new_pred_ai_64B + : T_vstore_new_pred_ai ; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_new_pred_ai_128B + : T_vstore_new_pred_ai ; + + +def V6_vS32b_new_pred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai">, + V6_vS32b_new_pred_ai_enc; +def V6_vS32b_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1>, + V6_vS32b_new_npred_ai_enc; +// 128B +def V6_vS32b_new_pred_ai_128B : T_vstore_new_pred_ai_128B <"vS32b_ai">, + V6_vS32b_new_pred_ai_128B_enc; +def V6_vS32b_new_npred_ai_128B : T_vstore_new_pred_ai_128B <"vS32b_ai", 1>, + V6_vS32b_new_npred_ai_128B_enc; +let isNonTemporal = 1 in { + def V6_vS32b_nt_new_pred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 0, 1>, + V6_vS32b_nt_new_pred_ai_enc; + def V6_vS32b_nt_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1, 1>, + V6_vS32b_nt_new_npred_ai_enc; + // 128B + def V6_vS32b_nt_new_pred_ai_128B : T_vstore_new_pred_ai_128B + <"vS32b_ai", 0, 1>, + V6_vS32b_nt_new_pred_ai_128B_enc; + def V6_vS32b_nt_new_npred_ai_128B : T_vstore_new_pred_ai_128B + <"vS32b_ai", 1, 1>, + V6_vS32b_nt_new_npred_ai_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector loads with immediate offset. +//===----------------------------------------------------------------------===// +let addrMode = PostInc, hasNewValue = 1 in +class T_vload_pi + : V6_LDInst <(outs RC:$dst, IntRegs:$_dst_), + (ins IntRegs:$src1, ImmOp:$src2), asmStr, [], + "$src1 = $_dst_">; + +let accessSize = Vector64Access in +class T_vload_pi_64B + : T_vload_pi ; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vload_pi_128B + : T_vload_pi ; + +let isCVLoadable = 1 in { + def V6_vL32b_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2)">, + V6_vL32b_pi_enc; + def V6_vL32b_nt_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2):nt">, + V6_vL32b_nt_pi_enc; + // 128B + def V6_vL32b_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2)">, + V6_vL32b_pi_128B_enc; + def V6_vL32b_nt_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2):nt">, + V6_vL32b_nt_pi_128B_enc; +} + +let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in { + def V6_vL32Ub_pi : T_vload_pi_64B <"$dst = vmemu($src1++#$src2)">, + V6_vL32Ub_pi_enc; + // 128B + def V6_vL32Ub_pi_128B : T_vload_pi_128B <"$dst = vmemu($src1++#$src2)">, + V6_vL32Ub_pi_128B_enc; +} + +let isCVLoad = 1, Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD in { + def V6_vL32b_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2)">, + V6_vL32b_cur_pi_enc; + def V6_vL32b_nt_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2):nt">, + V6_vL32b_nt_cur_pi_enc; + // 128B + def V6_vL32b_cur_pi_128B : T_vload_pi_128B + <"$dst.cur = vmem($src1++#$src2)">, + V6_vL32b_cur_pi_128B_enc; + def V6_vL32b_nt_cur_pi_128B : T_vload_pi_128B + <"$dst.cur = vmem($src1++#$src2):nt">, + V6_vL32b_nt_cur_pi_128B_enc; +} + +let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in { + def V6_vL32b_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2)">, + V6_vL32b_tmp_pi_enc; + def V6_vL32b_nt_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2):nt">, + V6_vL32b_nt_tmp_pi_enc; + //128B + def V6_vL32b_tmp_pi_128B : T_vload_pi_128B + <"$dst.tmp = vmem($src1++#$src2)">, + V6_vL32b_tmp_pi_128B_enc; + def V6_vL32b_nt_tmp_pi_128B : T_vload_pi_128B + <"$dst.tmp = vmem($src1++#$src2):nt">, + V6_vL32b_nt_tmp_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector stores with immediate offset. +//===----------------------------------------------------------------------===// +let addrMode = PostInc in +class T_vstore_pi + : V6_STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + mnemonic#"($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3", [], + "$src1 = $_dst_">, NewValueRel; + +let accessSize = Vector64Access in +class T_vstore_pi_64B + : T_vstore_pi ; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_pi_128B + : T_vstore_pi ; + +let isNVStorable = 1 in { + def V6_vS32b_pi : T_vstore_pi_64B <"vmem", "vS32b_pi">, V6_vS32b_pi_enc; + def V6_vS32b_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi">, + V6_vS32b_pi_128B_enc; +} + +let isNVStorable = 1 , isNonTemporal = 1 in { + def V6_vS32b_nt_pi : T_vstore_pi_64B <"vmem", "vS32b_pi", 1>, + V6_vS32b_nt_pi_enc; + def V6_vS32b_nt_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi", 1>, + V6_vS32b_nt_pi_128B_enc; +} + + +let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in { + def V6_vS32Ub_pi : T_vstore_pi_64B <"vmemu", "vS32Ub_pi">, + V6_vS32Ub_pi_enc; + def V6_vS32Ub_pi_128B : T_vstore_pi_128B <"vmemu", "vS32Ub_pi">, + V6_vS32Ub_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment unconditional .new vector stores with immediate offset. +//===----------------------------------------------------------------------===// +let addrMode = PostInc, isNVStore = 1 in +let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1, + opNewValue = 3, isNVStore = 1 in +class T_vstore_new_pi + : V6_STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), + "vmem($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [], + "$src1 = $_dst_">, NewValueRel { + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_new_pi_64B + : T_vstore_new_pi ; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_new_pi_128B + : T_vstore_new_pi ; + + +def V6_vS32b_new_pi : T_vstore_new_pi_64B <"vS32b_pi">, + V6_vS32b_new_pi_enc; +def V6_vS32b_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi">, + V6_vS32b_new_pi_128B_enc; + +let isNonTemporal = 1 in { + def V6_vS32b_nt_new_pi : T_vstore_new_pi_64B <"vS32b_pi", 1>, + V6_vS32b_nt_new_pi_enc; + def V6_vS32b_nt_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi", 1>, + V6_vS32b_nt_new_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment conditional vector stores with immediate offset +//===----------------------------------------------------------------------===// +let isPredicated = 1, addrMode = PostInc in +class T_vstore_pred_pi + : V6_STInst<(outs IntRegs:$_dst_), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++#$src3)" + #!if(isNT, ":nt", "")#" = $src4", [], + "$src2 = $_dst_">, NewValueRel { + let isPredicatedFalse = isPredNot; + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_pred_pi_64B + : T_vstore_pred_pi ; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_pred_pi_128B + : T_vstore_pred_pi ; + +let isNVStorable = 1 in { + def V6_vS32b_pred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi">, + V6_vS32b_pred_pi_enc; + def V6_vS32b_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1>, + V6_vS32b_npred_pi_enc; + // 128B + def V6_vS32b_pred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi">, + V6_vS32b_pred_pi_128B_enc; + def V6_vS32b_npred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi", 1>, + V6_vS32b_npred_pi_128B_enc; +} +let isNVStorable = 1, isNonTemporal = 1 in { + def V6_vS32b_nt_pred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 0, 1>, + V6_vS32b_nt_pred_pi_enc; + def V6_vS32b_nt_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1, 1>, + V6_vS32b_nt_npred_pi_enc; + // 128B + def V6_vS32b_nt_pred_pi_128B : T_vstore_pred_pi_128B + <"vmem", "vS32b_pi", 0, 1>, + V6_vS32b_nt_pred_pi_128B_enc; + def V6_vS32b_nt_npred_pi_128B : T_vstore_pred_pi_128B + <"vmem", "vS32b_pi", 1, 1>, + V6_vS32b_nt_npred_pi_128B_enc; +} + +let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in { + def V6_vS32Ub_pred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi">, + V6_vS32Ub_pred_pi_enc; + def V6_vS32Ub_npred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi", 1>, + V6_vS32Ub_npred_pi_enc; + // 128B + def V6_vS32Ub_pred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi">, + V6_vS32Ub_pred_pi_128B_enc; + def V6_vS32Ub_npred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi", 1>, + V6_vS32Ub_npred_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector stores with immediate offset - byte-enabled aligned +//===----------------------------------------------------------------------===// +let addrMode = PostInc in +class T_vstore_qpred_pi + : V6_STInst <(outs IntRegs:$_dst_), + (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)" + #!if(isNT, ":nt", "")#" = $src4", [], + "$src2 = $_dst_">; + +let accessSize = Vector64Access in +class T_vstore_qpred_pi_64B + : T_vstore_qpred_pi ; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_qpred_pi_128B + : T_vstore_qpred_pi ; + +def V6_vS32b_qpred_pi : T_vstore_qpred_pi_64B, V6_vS32b_qpred_pi_enc; +def V6_vS32b_nqpred_pi : T_vstore_qpred_pi_64B <1>, V6_vS32b_nqpred_pi_enc; +// 128B +def V6_vS32b_qpred_pi_128B : T_vstore_qpred_pi_128B, + V6_vS32b_qpred_pi_128B_enc; +def V6_vS32b_nqpred_pi_128B : T_vstore_qpred_pi_128B<1>, + V6_vS32b_nqpred_pi_128B_enc; + +let isNonTemporal = 1 in { + def V6_vS32b_nt_qpred_pi : T_vstore_qpred_pi_64B <0, 1>, + V6_vS32b_nt_qpred_pi_enc; + def V6_vS32b_nt_nqpred_pi : T_vstore_qpred_pi_64B <1, 1>, + V6_vS32b_nt_nqpred_pi_enc; + // 128B + def V6_vS32b_nt_qpred_pi_128B : T_vstore_qpred_pi_128B<0, 1>, + V6_vS32b_nt_qpred_pi_128B_enc; + def V6_vS32b_nt_nqpred_pi_128B : T_vstore_qpred_pi_128B<1, 1>, + V6_vS32b_nt_nqpred_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment conditional .new vector stores with immediate offset +//===----------------------------------------------------------------------===// +let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1, + isNewValue = 1, opNewValue = 4, addrMode = PostInc, isNVStore = 1 in +class T_vstore_new_pred_pi + : V6_STInst <(outs IntRegs:$_dst_), + (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4), + "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)" + #!if(isNT, ":nt", "")#" = $src4.new", [], + "$src2 = $_dst_"> , NewValueRel { + let isPredicatedFalse = isPredNot; + let BaseOpcode = baseOp; +} + +let accessSize = Vector64Access in +class T_vstore_new_pred_pi_64B + : T_vstore_new_pred_pi ; + +let isCodeGenOnly = 1, accessSize = Vector128Access in +class T_vstore_new_pred_pi_128B + : T_vstore_new_pred_pi ; + +def V6_vS32b_new_pred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi">, + V6_vS32b_new_pred_pi_enc; +def V6_vS32b_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1>, + V6_vS32b_new_npred_pi_enc; +// 128B +def V6_vS32b_new_pred_pi_128B : T_vstore_new_pred_pi_128B <"vS32b_pi">, + V6_vS32b_new_pred_pi_128B_enc; +def V6_vS32b_new_npred_pi_128B : T_vstore_new_pred_pi_128B <"vS32b_pi", 1>, + V6_vS32b_new_npred_pi_128B_enc; +let isNonTemporal = 1 in { + def V6_vS32b_nt_new_pred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 0, 1>, + V6_vS32b_nt_new_pred_pi_enc; + def V6_vS32b_nt_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1, 1>, + V6_vS32b_nt_new_npred_pi_enc; + // 128B + def V6_vS32b_nt_new_pred_pi_128B : T_vstore_new_pred_pi_128B + <"vS32b_pi", 0, 1>, + V6_vS32b_nt_new_pred_pi_128B_enc; + def V6_vS32b_nt_new_npred_pi_128B : T_vstore_new_pred_pi_128B + <"vS32b_pi", 1, 1>, + V6_vS32b_nt_new_npred_pi_128B_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector loads with register offset +//===----------------------------------------------------------------------===// +let hasNewValue = 1 in +class T_vload_ppu + : V6_LDInst <(outs VectorRegs:$dst, IntRegs:$_dst_), + (ins IntRegs:$src1, ModRegs:$src2), asmStr, [], + "$src1 = $_dst_">, NewValueRel; + +let isCVLoadable = 1 in { + def V6_vL32b_ppu : T_vload_ppu <"$dst = vmem($src1++$src2)">, + V6_vL32b_ppu_enc; + def V6_vL32b_nt_ppu : T_vload_ppu <"$dst = vmem($src1++$src2):nt">, + V6_vL32b_nt_ppu_enc; +} + +let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in +def V6_vL32Ub_ppu : T_vload_ppu <"$dst = vmemu($src1++$src2)">, + V6_vL32Ub_ppu_enc; + +let isCVLoad = 1, Itinerary = CVI_VM_CUR_LD, Type = TypeCVI_VM_CUR_LD in { + def V6_vL32b_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2)">, + V6_vL32b_cur_ppu_enc; + def V6_vL32b_nt_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2):nt">, + V6_vL32b_nt_cur_ppu_enc; +} + +let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in { + def V6_vL32b_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2)">, + V6_vL32b_tmp_ppu_enc; + def V6_vL32b_nt_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2):nt">, + V6_vL32b_nt_tmp_ppu_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector stores with register offset +//===----------------------------------------------------------------------===// +class T_vstore_ppu + : V6_STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3), + mnemonic#"($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3", [], + "$src1 = $_dst_">, NewValueRel; + +let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in { + def V6_vS32b_ppu : T_vstore_ppu <"vmem">, + V6_vS32b_ppu_enc; + let isNonTemporal = 1, BaseOpcode = "vS32b_ppu" in + def V6_vS32b_nt_ppu : T_vstore_ppu <"vmem", 1>, + V6_vS32b_nt_ppu_enc; +} + +let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in +def V6_vS32Ub_ppu : T_vstore_ppu <"vmemu">, V6_vS32Ub_ppu_enc; + +//===----------------------------------------------------------------------===// +// Post increment .new vector stores with register offset +//===----------------------------------------------------------------------===// +let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1, + opNewValue = 3, isNVStore = 1 in +class T_vstore_new_ppu + : V6_STInst <(outs IntRegs:$_dst_), + (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3), + "vmem($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [], + "$src1 = $_dst_">, NewValueRel; + +let BaseOpcode = "vS32b_ppu" in +def V6_vS32b_new_ppu : T_vstore_new_ppu, V6_vS32b_new_ppu_enc; + +let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in +def V6_vS32b_nt_new_ppu : T_vstore_new_ppu<1>, V6_vS32b_nt_new_ppu_enc; + +//===----------------------------------------------------------------------===// +// Post increment conditional .new vector stores with register offset +//===----------------------------------------------------------------------===// +let isPredicated = 1 in +class T_vstore_pred_ppu + : V6_STInst<(outs IntRegs:$_dst_), + (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++$src3)" + #!if(isNT, ":nt", "")#" = $src4", [], + "$src2 = $_dst_">, NewValueRel { + let isPredicatedFalse = isPredNot; +} + +let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in { + def V6_vS32b_pred_ppu : T_vstore_pred_ppu<"vmem">, V6_vS32b_pred_ppu_enc; + def V6_vS32b_npred_ppu: T_vstore_pred_ppu<"vmem", 1>, V6_vS32b_npred_ppu_enc; +} + +let isNVStorable = 1, BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in { + def V6_vS32b_nt_pred_ppu : T_vstore_pred_ppu <"vmem", 0, 1>, + V6_vS32b_nt_pred_ppu_enc; + def V6_vS32b_nt_npred_ppu : T_vstore_pred_ppu <"vmem", 1, 1>, + V6_vS32b_nt_npred_ppu_enc; +} + +let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU, + Type = TypeCVI_VM_STU in { + def V6_vS32Ub_pred_ppu : T_vstore_pred_ppu <"vmemu">, + V6_vS32Ub_pred_ppu_enc; + def V6_vS32Ub_npred_ppu : T_vstore_pred_ppu <"vmemu", 1>, + V6_vS32Ub_npred_ppu_enc; +} + +//===----------------------------------------------------------------------===// +// Post increment vector stores with register offset - byte-enabled aligned +//===----------------------------------------------------------------------===// +class T_vstore_qpred_ppu + : V6_STInst <(outs IntRegs:$_dst_), + (ins VecPredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4), + "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)" + #!if(isNT, ":nt", "")#" = $src4", [], + "$src2 = $_dst_">, NewValueRel; + +def V6_vS32b_qpred_ppu : T_vstore_qpred_ppu, V6_vS32b_qpred_ppu_enc; +def V6_vS32b_nqpred_ppu : T_vstore_qpred_ppu<1>, V6_vS32b_nqpred_ppu_enc; +def V6_vS32b_nt_qpred_ppu : T_vstore_qpred_ppu<0, 1>, + V6_vS32b_nt_qpred_ppu_enc; +def V6_vS32b_nt_nqpred_ppu : T_vstore_qpred_ppu<1, 1>, + V6_vS32b_nt_nqpred_ppu_enc; + +//===----------------------------------------------------------------------===// +// Post increment conditional .new vector stores with register offset +//===----------------------------------------------------------------------===// +let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1, + isNewValue = 1, opNewValue = 4, isNVStore = 1 in +class T_vstore_new_pred_ppu + : V6_STInst <(outs IntRegs:$_dst_), + (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4), + "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)" + #!if(isNT, ":nt", "")#" = $src4.new", [], + "$src2 = $_dst_">, NewValueRel { + let isPredicatedFalse = isPredNot; +} + +let BaseOpcode = "vS32b_ppu" in { + def V6_vS32b_new_pred_ppu : T_vstore_new_pred_ppu, + V6_vS32b_new_pred_ppu_enc; + def V6_vS32b_new_npred_ppu : T_vstore_new_pred_ppu<1>, + V6_vS32b_new_npred_ppu_enc; +} + +let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in { +def V6_vS32b_nt_new_pred_ppu : T_vstore_new_pred_ppu<0, 1>, + V6_vS32b_nt_new_pred_ppu_enc; +def V6_vS32b_nt_new_npred_ppu : T_vstore_new_pred_ppu<1, 1>, + V6_vS32b_nt_new_npred_ppu_enc; +} + +let isPseudo = 1, validSubTargets = HasV60SubT in +class STrivv_template: + VSTInst<(outs), (ins IntRegs:$addr, ImmOp:$off, RC:$src), + #mnemonic#"($addr+#$off) = $src", []>; + +def STrivv_indexed: STrivv_template<"vvmem", s4_6Imm, VecDblRegs>, + Requires<[HasV60T, UseHVXSgl]>; +def STrivv_indexed_128B: STrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>, + Requires<[HasV60T, UseHVXDbl]>; + +multiclass STrivv_pats { + def : Pat<(store (VTSgl VecDblRegs:$src1), IntRegs:$addr), + (STrivv_indexed IntRegs:$addr, #0, (VTSgl VecDblRegs:$src1))>, + Requires<[UseHVXSgl]>; + + def : Pat<(store (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), + (STrivv_indexed_128B IntRegs:$addr, #0, + (VTDbl VecDblRegs128B:$src1))>, + Requires<[UseHVXDbl]>; +} + +defm : STrivv_pats ; +defm : STrivv_pats ; +defm : STrivv_pats ; +defm : STrivv_pats ; + + +multiclass vS32b_ai_pats { + // Aligned stores + def : Pat<(store (VTSgl VectorRegs:$src1), IntRegs:$addr), + (V6_vS32b_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; + + // 128B Aligned stores + def : Pat<(store (VTDbl VectorRegs128B:$src1), IntRegs:$addr), + (V6_vS32b_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; + + // Fold Add R+IFF into vector store. + let AddedComplexity = 10 in + def : Pat<(store (VTSgl VectorRegs:$src1), + (add IntRegs:$src2, s4_6ImmPred:$offset)), + (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset, + (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; + + // Fold Add R+IFF into vector store 128B. + let AddedComplexity = 10 in + def : Pat<(store (VTDbl VectorRegs128B:$src1), + (add IntRegs:$src2, s4_7ImmPred:$offset)), + (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset, + (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; +} + +defm : vS32b_ai_pats ; +defm : vS32b_ai_pats ; +defm : vS32b_ai_pats ; +defm : vS32b_ai_pats ; + +let isPseudo = 1, validSubTargets = HasV60SubT in +class LDrivv_template + : V6_LDInst <(outs RC:$dst), (ins IntRegs:$addr, ImmOp:$off), + "$dst="#mnemonic#"($addr+#$off)", + []>, + Requires<[HasV60T,UseHVXSgl]>; + +def LDrivv_indexed: LDrivv_template<"vvmem", s4_6Imm, VecDblRegs>; +def LDrivv_indexed_128B: LDrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>; + +multiclass LDrivv_pats { + def : Pat < (VTSgl (load IntRegs:$addr)), + (LDrivv_indexed IntRegs:$addr, #0) >, + Requires<[UseHVXSgl]>; + + def : Pat < (VTDbl (load IntRegs:$addr)), + (LDrivv_indexed_128B IntRegs:$addr, #0) >, + Requires<[UseHVXDbl]>; +} + +defm : LDrivv_pats ; +defm : LDrivv_pats ; +defm : LDrivv_pats ; +defm : LDrivv_pats ; + +multiclass vL32b_ai_pats { + // Aligned loads + def : Pat < (VTSgl (load IntRegs:$addr)), + (V6_vL32b_ai IntRegs:$addr, #0) >, + Requires<[UseHVXSgl]>; + + // 128B Load + def : Pat < (VTDbl (load IntRegs:$addr)), + (V6_vL32b_ai_128B IntRegs:$addr, #0) >, + Requires<[UseHVXDbl]>; + + // Fold Add R+IFF into vector load. + let AddedComplexity = 10 in + def : Pat<(VTDbl (load (add IntRegs:$src2, s4_7ImmPred:$offset))), + (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>, + Requires<[UseHVXDbl]>; + + let AddedComplexity = 10 in + def : Pat<(VTSgl (load (add IntRegs:$src2, s4_6ImmPred:$offset))), + (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>, + Requires<[UseHVXSgl]>; +} + +defm : vL32b_ai_pats ; +defm : vL32b_ai_pats ; +defm : vL32b_ai_pats ; +defm : vL32b_ai_pats ; + +// Store vector predicate pseudo. +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, + isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { +def STriq_pred_V6 : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VecPredRegs:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; + +def STriq_pred_vec_V6 : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; + +def STriq_pred_V6_128B : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VecPredRegs128B:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; + +def STriq_pred_vec_V6_128B : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +// Load vector predicate pseudo. +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, + opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { +def LDriq_pred_V6 : LDInst<(outs VecPredRegs:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def LDriq_pred_vec_V6 : LDInst<(outs VectorRegs:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def LDriq_pred_V6_128B : LDInst<(outs VecPredRegs128B:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +def LDriq_pred_vec_V6_128B : LDInst<(outs VectorRegs128B:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +// Store vector pseudo. +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, + isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { +def STriv_pseudo_V6 : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def STriv_pseudo_V6_128B : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, + isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { +def STrivv_pseudo_V6 : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VecDblRegs:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def STrivv_pseudo_V6_128B : STInst<(outs), + (ins IntRegs:$base, s32Imm:$offset, VecDblRegs128B:$src1), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +// Load vector pseudo. +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, + opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { +def LDriv_pseudo_V6 : LDInst<(outs VectorRegs:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def LDriv_pseudo_V6_128B : LDInst<(outs VectorRegs128B:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, + opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { +def LDrivv_pseudo_V6 : LDInst<(outs VecDblRegs:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def LDrivv_pseudo_V6_128B : LDInst<(outs VecDblRegs128B:$dst), + (ins IntRegs:$base, s32Imm:$offset), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXDbl]>; +} + +class VSELInst pattern = [], + string cstr = "", InstrItinClass itin = CVI_VA_DV, + IType type = TypeCVI_VA_DV> + : InstHexagon; + +let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { +def VSelectPseudo_V6 : VSELInst<(outs VectorRegs:$dst), + (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +def VSelectDblPseudo_V6 : VSELInst<(outs VecDblRegs:$dst), + (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3), + ".error \"should not emit\" ", + []>, + Requires<[HasV60T,UseHVXSgl]>; +} + +def : Pat <(v16i32 (selectcc (i32 IntRegs:$lhs), (i32 IntRegs:$rhs), + (v16i32 VectorRegs:$tval), + (v16i32 VectorRegs:$fval), SETEQ)), + (v16i32 (VSelectPseudo_V6 (i32 (C2_cmpeq (i32 IntRegs:$lhs), + (i32 IntRegs:$rhs))), + (v16i32 VectorRegs:$tval), + (v16i32 VectorRegs:$fval)))>; + + +let hasNewValue = 1 in +class T_vmpy + : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2), + asmString >; + +multiclass T_vmpy { + def NAME : T_vmpy ; + let isCodeGenOnly = 1 in + def NAME#_128B : T_vmpy (RCout#"128B"), + !cast(RCin#"128B")>; +} + +multiclass T_vmpy_VV : + T_vmpy ; + +multiclass T_vmpy_WW : + T_vmpy ; + +multiclass T_vmpy_VW : + T_vmpy ; + +multiclass T_vmpy_WV : + T_vmpy ; + +defm V6_vtmpyb :T_vmpy_WW<"$dst.h = vtmpy($src1.b,$src2.b)">, V6_vtmpyb_enc; +defm V6_vtmpybus :T_vmpy_WW<"$dst.h = vtmpy($src1.ub,$src2.b)">, V6_vtmpybus_enc; +defm V6_vdsaduh :T_vmpy_WW<"$dst.uw = vdsad($src1.uh,$src2.uh)">, V6_vdsaduh_enc; +defm V6_vmpybus :T_vmpy_WV<"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybus_enc; +defm V6_vmpabus :T_vmpy_WW<"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabus_enc; +defm V6_vmpahb :T_vmpy_WW<"$dst.w = vmpa($src1.h,$src2.b)">, V6_vmpahb_enc; +defm V6_vmpyh :T_vmpy_WV<"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyh_enc; +defm V6_vmpyuh :T_vmpy_WV<"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuh_enc; +defm V6_vmpyiwh :T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_enc; +defm V6_vtmpyhb :T_vmpy_WW<"$dst.w = vtmpy($src1.h,$src2.b)">, V6_vtmpyhb_enc; +defm V6_vmpyub :T_vmpy_WV<"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyub_enc; + +let Itinerary = CVI_VX_LONG, Type = TypeCVI_VX in +defm V6_vmpyihb :T_vmpy_VV<"$dst.h = vmpyi($src1.h,$src2.b)">, V6_vmpyihb_enc; + +defm V6_vdmpybus_dv : + T_vmpy_WW <"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_dv_enc; +defm V6_vdmpyhsusat : + T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.uh):sat">, V6_vdmpyhsusat_enc; +defm V6_vdmpyhsuisat : + T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.uh,#1):sat">, V6_vdmpyhsuisat_enc; +defm V6_vdmpyhsat : + T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhsat_enc; +defm V6_vdmpyhisat : + T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhisat_enc; +defm V6_vdmpyhb_dv : + T_vmpy_WW <"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_dv_enc; +defm V6_vmpyhss : + T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:sat">, V6_vmpyhss_enc; +defm V6_vmpyhsrs : + T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhsrs_enc; + +let Itinerary = CVI_VP, Type = TypeCVI_VP in +defm V6_vror : T_vmpy_VV <"$dst = vror($src1,$src2)">, V6_vror_enc; + +let Itinerary = CVI_VX, Type = TypeCVI_VX in { +defm V6_vdmpyhb : T_vmpy_VV<"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_enc; +defm V6_vrmpybus : T_vmpy_VV<"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybus_enc; +defm V6_vdmpybus : T_vmpy_VV<"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_enc; +defm V6_vmpyiwb : T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.b)">, V6_vmpyiwb_enc; +defm V6_vrmpyub : T_vmpy_VV<"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyub_enc; +} + +let Itinerary = CVI_VS, Type = TypeCVI_VS in { +defm V6_vasrw : T_vmpy_VV <"$dst.w = vasr($src1.w,$src2)">, V6_vasrw_enc; +defm V6_vasrh : T_vmpy_VV <"$dst.h = vasr($src1.h,$src2)">, V6_vasrh_enc; +defm V6_vaslw : T_vmpy_VV <"$dst.w = vasl($src1.w,$src2)">, V6_vaslw_enc; +defm V6_vaslh : T_vmpy_VV <"$dst.h = vasl($src1.h,$src2)">, V6_vaslh_enc; +defm V6_vlsrw : T_vmpy_VV <"$dst.uw = vlsr($src1.uw,$src2)">, V6_vlsrw_enc; +defm V6_vlsrh : T_vmpy_VV <"$dst.uh = vlsr($src1.uh,$src2)">, V6_vlsrh_enc; +} + +let hasNewValue = 1 in +class T_HVX_alu + : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2), + asmString >{ + let Itinerary = itin; + let Type = !cast("Type"#itin); +} + +multiclass T_HVX_alu { + def NAME : T_HVX_alu ; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_alu (RCout#"128B"), + !cast(RCin#"128B")>; +} + +multiclass T_HVX_alu_VV : + T_HVX_alu ; + +multiclass T_HVX_alu_WW : + T_HVX_alu ; + +multiclass T_HVX_alu_WV : + T_HVX_alu ; + + +let Itinerary = CVI_VX, Type = TypeCVI_VX in { +defm V6_vrmpyubv : + T_HVX_alu_VV <"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyubv_enc; +defm V6_vrmpybv : + T_HVX_alu_VV <"$dst.w = vrmpy($src1.b,$src2.b)">, V6_vrmpybv_enc; +defm V6_vrmpybusv : + T_HVX_alu_VV <"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybusv_enc; +defm V6_vabsdiffub : + T_HVX_alu_VV <"$dst.ub = vabsdiff($src1.ub,$src2.ub)">, V6_vabsdiffub_enc; +defm V6_vabsdiffh : + T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.h,$src2.h)">, V6_vabsdiffh_enc; +defm V6_vabsdiffuh : + T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.uh,$src2.uh)">, V6_vabsdiffuh_enc; +defm V6_vabsdiffw : + T_HVX_alu_VV <"$dst.uw = vabsdiff($src1.w,$src2.w)">, V6_vabsdiffw_enc; +} + +let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in { +defm V6_vdmpyhvsat : + T_HVX_alu_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhvsat_enc; +defm V6_vmpyhvsrs : + T_HVX_alu_VV<"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhvsrs_enc; +defm V6_vmpyih : + T_HVX_alu_VV <"$dst.h = vmpyi($src1.h,$src2.h)">, V6_vmpyih_enc; +} + +defm V6_vand : + T_HVX_alu_VV <"$dst = vand($src1,$src2)">, V6_vand_enc; +defm V6_vor : + T_HVX_alu_VV <"$dst = vor($src1,$src2)">, V6_vor_enc; +defm V6_vxor : + T_HVX_alu_VV <"$dst = vxor($src1,$src2)">, V6_vxor_enc; +defm V6_vaddw : + T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_enc; +defm V6_vaddubsat : + T_HVX_alu_VV <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_enc; +defm V6_vadduhsat : + T_HVX_alu_VV <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_enc; +defm V6_vaddhsat : + T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_enc; +defm V6_vaddwsat : + T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_enc; +defm V6_vsubb : + T_HVX_alu_VV <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_enc; +defm V6_vsubh : + T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_enc; +defm V6_vsubw : + T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_enc; +defm V6_vsububsat : + T_HVX_alu_VV <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_enc; +defm V6_vsubuhsat : + T_HVX_alu_VV <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_enc; +defm V6_vsubhsat : + T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_enc; +defm V6_vsubwsat : + T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_enc; +defm V6_vavgub : + T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub)">, V6_vavgub_enc; +defm V6_vavguh : + T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh)">, V6_vavguh_enc; +defm V6_vavgh : + T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h)">, V6_vavgh_enc; +defm V6_vavgw : + T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w)">, V6_vavgw_enc; +defm V6_vnavgub : + T_HVX_alu_VV <"$dst.b = vnavg($src1.ub,$src2.ub)">, V6_vnavgub_enc; +defm V6_vnavgh : + T_HVX_alu_VV <"$dst.h = vnavg($src1.h,$src2.h)">, V6_vnavgh_enc; +defm V6_vnavgw : + T_HVX_alu_VV <"$dst.w = vnavg($src1.w,$src2.w)">, V6_vnavgw_enc; +defm V6_vavgubrnd : + T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub):rnd">, V6_vavgubrnd_enc; +defm V6_vavguhrnd : + T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh):rnd">, V6_vavguhrnd_enc; +defm V6_vavghrnd : + T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h):rnd">, V6_vavghrnd_enc; +defm V6_vavgwrnd : + T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w):rnd">, V6_vavgwrnd_enc; + +defm V6_vmpybv : + T_HVX_alu_WV <"$dst.h = vmpy($src1.b,$src2.b)">, V6_vmpybv_enc; +defm V6_vmpyubv : + T_HVX_alu_WV <"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyubv_enc; +defm V6_vmpybusv : + T_HVX_alu_WV <"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybusv_enc; +defm V6_vmpyhv : + T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyhv_enc; +defm V6_vmpyuhv : + T_HVX_alu_WV <"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuhv_enc; +defm V6_vmpyhus : + T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.uh)">, V6_vmpyhus_enc; +defm V6_vaddubh : + T_HVX_alu_WV <"$dst.h = vadd($src1.ub,$src2.ub)">, V6_vaddubh_enc; +defm V6_vadduhw : + T_HVX_alu_WV <"$dst.w = vadd($src1.uh,$src2.uh)">, V6_vadduhw_enc; +defm V6_vaddhw : + T_HVX_alu_WV <"$dst.w = vadd($src1.h,$src2.h)">, V6_vaddhw_enc; +defm V6_vsububh : + T_HVX_alu_WV <"$dst.h = vsub($src1.ub,$src2.ub)">, V6_vsububh_enc; +defm V6_vsubuhw : + T_HVX_alu_WV <"$dst.w = vsub($src1.uh,$src2.uh)">, V6_vsubuhw_enc; +defm V6_vsubhw : + T_HVX_alu_WV <"$dst.w = vsub($src1.h,$src2.h)">, V6_vsubhw_enc; + +defm V6_vaddb_dv : + T_HVX_alu_WW <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_dv_enc; +defm V6_vaddh_dv : + T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_dv_enc; +defm V6_vaddw_dv : + T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_dv_enc; +defm V6_vaddubsat_dv : + T_HVX_alu_WW <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_dv_enc; +defm V6_vadduhsat_dv : + T_HVX_alu_WW <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_dv_enc; +defm V6_vaddhsat_dv : + T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_dv_enc; +defm V6_vaddwsat_dv : + T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_dv_enc; +defm V6_vsubb_dv : + T_HVX_alu_WW <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_dv_enc; +defm V6_vsubh_dv : + T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_dv_enc; +defm V6_vsubw_dv : + T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_dv_enc; +defm V6_vsububsat_dv : + T_HVX_alu_WW <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_dv_enc; +defm V6_vsubuhsat_dv : + T_HVX_alu_WW <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_dv_enc; +defm V6_vsubhsat_dv : + T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_dv_enc; +defm V6_vsubwsat_dv : + T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_dv_enc; + +let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV in { +defm V6_vmpabusv : + T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabusv_enc; +defm V6_vmpabuuv : + T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.ub)">, V6_vmpabuuv_enc; +} + +let isAccumulator = 1, hasNewValue = 1 in +class T_HVX_vmpyacc + : CVI_VA_Resource1 <(outs RCout:$dst), + (ins RCout:$_src_, RCin1:$src1, RCin2:$src2), asmString, + [], "$dst = $_src_" > { + let Itinerary = itin; + let Type = !cast("Type"#itin); +} + +multiclass T_HVX_vmpyacc_both { + def NAME : T_HVX_vmpyacc ; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vmpyacc (RCout#"128B"), + !cast(RCin1#"128B"), + !cast(RCin2# + !if(!eq (!cast(RCin2), "IntRegs"), "", "128B"))>; +} + +multiclass T_HVX_vmpyacc_VVR : + T_HVX_vmpyacc_both ; + +multiclass T_HVX_vmpyacc_VWR : + T_HVX_vmpyacc_both ; + +multiclass T_HVX_vmpyacc_WVR : + T_HVX_vmpyacc_both ; + +multiclass T_HVX_vmpyacc_WWR : + T_HVX_vmpyacc_both ; + +multiclass T_HVX_vmpyacc_VVV : + T_HVX_vmpyacc_both ; + +multiclass T_HVX_vmpyacc_WVV : + T_HVX_vmpyacc_both ; + + +defm V6_vtmpyb_acc : + T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.b,$src2.b)">, + V6_vtmpyb_acc_enc; +defm V6_vtmpybus_acc : + T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.ub,$src2.b)">, + V6_vtmpybus_acc_enc; +defm V6_vtmpyhb_acc : + T_HVX_vmpyacc_WWR <"$dst.w += vtmpy($src1.h,$src2.b)">, + V6_vtmpyhb_acc_enc; +defm V6_vdmpyhb_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.b)">, + V6_vdmpyhb_acc_enc; +defm V6_vrmpyub_acc : + T_HVX_vmpyacc_VVR <"$dst.uw += vrmpy($src1.ub,$src2.ub)">, + V6_vrmpyub_acc_enc; +defm V6_vrmpybus_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vrmpy($src1.ub,$src2.b)">, + V6_vrmpybus_acc_enc; +defm V6_vdmpybus_acc : + T_HVX_vmpyacc_VVR <"$dst.h += vdmpy($src1.ub,$src2.b)">, + V6_vdmpybus_acc_enc; +defm V6_vdmpybus_dv_acc : + T_HVX_vmpyacc_WWR <"$dst.h += vdmpy($src1.ub,$src2.b)">, + V6_vdmpybus_dv_acc_enc; +defm V6_vdmpyhsuisat_acc : + T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.uh,#1):sat">, + V6_vdmpyhsuisat_acc_enc; +defm V6_vdmpyhisat_acc : + T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.h):sat">, + V6_vdmpyhisat_acc_enc; +defm V6_vdmpyhb_dv_acc : + T_HVX_vmpyacc_WWR <"$dst.w += vdmpy($src1.h,$src2.b)">, + V6_vdmpyhb_dv_acc_enc; +defm V6_vmpybus_acc : + T_HVX_vmpyacc_WVR <"$dst.h += vmpy($src1.ub,$src2.b)">, + V6_vmpybus_acc_enc; +defm V6_vmpabus_acc : + T_HVX_vmpyacc_WWR <"$dst.h += vmpa($src1.ub,$src2.b)">, + V6_vmpabus_acc_enc; +defm V6_vmpahb_acc : + T_HVX_vmpyacc_WWR <"$dst.w += vmpa($src1.h,$src2.b)">, + V6_vmpahb_acc_enc; +defm V6_vmpyhsat_acc : + T_HVX_vmpyacc_WVR <"$dst.w += vmpy($src1.h,$src2.h):sat">, + V6_vmpyhsat_acc_enc; +defm V6_vmpyuh_acc : + T_HVX_vmpyacc_WVR <"$dst.uw += vmpy($src1.uh,$src2.uh)">, + V6_vmpyuh_acc_enc; +defm V6_vmpyiwb_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vmpyi($src1.w,$src2.b)">, + V6_vmpyiwb_acc_enc; +defm V6_vdsaduh_acc : + T_HVX_vmpyacc_WWR <"$dst.uw += vdsad($src1.uh,$src2.uh)">, + V6_vdsaduh_acc_enc; +defm V6_vmpyihb_acc : + T_HVX_vmpyacc_VVR <"$dst.h += vmpyi($src1.h,$src2.b)">, + V6_vmpyihb_acc_enc; +defm V6_vmpyub_acc : + T_HVX_vmpyacc_WVR <"$dst.uh += vmpy($src1.ub,$src2.ub)">, + V6_vmpyub_acc_enc; + +let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in { +defm V6_vdmpyhsusat_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.uh):sat">, + V6_vdmpyhsusat_acc_enc; +defm V6_vdmpyhsat_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.h):sat">, + V6_vdmpyhsat_acc_enc; +defm V6_vmpyiwh_acc : T_HVX_vmpyacc_VVR + <"$dst.w += vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_acc_enc; +} + +let Itinerary = CVI_VS, Type = TypeCVI_VS in { +defm V6_vaslw_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vasl($src1.w,$src2)">, V6_vaslw_acc_enc; +defm V6_vasrw_acc : + T_HVX_vmpyacc_VVR <"$dst.w += vasr($src1.w,$src2)">, V6_vasrw_acc_enc; +} + +defm V6_vdmpyhvsat_acc : + T_HVX_vmpyacc_VVV <"$dst.w += vdmpy($src1.h,$src2.h):sat">, + V6_vdmpyhvsat_acc_enc; +defm V6_vmpybusv_acc : + T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.ub,$src2.b)">, + V6_vmpybusv_acc_enc; +defm V6_vmpybv_acc : + T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.b,$src2.b)">, V6_vmpybv_acc_enc; +defm V6_vmpyhus_acc : + T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.uh)">, V6_vmpyhus_acc_enc; +defm V6_vmpyhv_acc : + T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.h)">, V6_vmpyhv_acc_enc; +defm V6_vmpyiewh_acc : + T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.h)">, + V6_vmpyiewh_acc_enc; +defm V6_vmpyiewuh_acc : + T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.uh)">, + V6_vmpyiewuh_acc_enc; +defm V6_vmpyih_acc : + T_HVX_vmpyacc_VVV <"$dst.h += vmpyi($src1.h,$src2.h)">, V6_vmpyih_acc_enc; +defm V6_vmpyowh_rnd_sacc : + T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:rnd:sat:shift">, + V6_vmpyowh_rnd_sacc_enc; +defm V6_vmpyowh_sacc : + T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:sat:shift">, + V6_vmpyowh_sacc_enc; +defm V6_vmpyubv_acc : + T_HVX_vmpyacc_WVV <"$dst.uh += vmpy($src1.ub,$src2.ub)">, + V6_vmpyubv_acc_enc; +defm V6_vmpyuhv_acc : + T_HVX_vmpyacc_WVV <"$dst.uw += vmpy($src1.uh,$src2.uh)">, + V6_vmpyuhv_acc_enc; +defm V6_vrmpybusv_acc : + T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.ub,$src2.b)">, + V6_vrmpybusv_acc_enc; +defm V6_vrmpybv_acc : + T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.b,$src2.b)">, V6_vrmpybv_acc_enc; +defm V6_vrmpyubv_acc : + T_HVX_vmpyacc_VVV <"$dst.uw += vrmpy($src1.ub,$src2.ub)">, + V6_vrmpyubv_acc_enc; + + +class T_HVX_vcmp + : CVI_VA_Resource1 <(outs RCout:$dst), + (ins RCout:$_src_, RCin:$src1, RCin:$src2), asmString, + [], "$dst = $_src_" > { + let Itinerary = CVI_VA; + let Type = TypeCVI_VA; +} + +multiclass T_HVX_vcmp { + def NAME : T_HVX_vcmp ; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vcmp ; +} + +defm V6_veqb_and : + T_HVX_vcmp <"$dst &= vcmp.eq($src1.b,$src2.b)">, V6_veqb_and_enc; +defm V6_veqh_and : + T_HVX_vcmp <"$dst &= vcmp.eq($src1.h,$src2.h)">, V6_veqh_and_enc; +defm V6_veqw_and : + T_HVX_vcmp <"$dst &= vcmp.eq($src1.w,$src2.w)">, V6_veqw_and_enc; +defm V6_vgtb_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_and_enc; +defm V6_vgth_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.h,$src2.h)">, V6_vgth_and_enc; +defm V6_vgtw_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_and_enc; +defm V6_vgtub_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_and_enc; +defm V6_vgtuh_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_and_enc; +defm V6_vgtuw_and : + T_HVX_vcmp <"$dst &= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_and_enc; +defm V6_veqb_or : + T_HVX_vcmp <"$dst |= vcmp.eq($src1.b,$src2.b)">, V6_veqb_or_enc; +defm V6_veqh_or : + T_HVX_vcmp <"$dst |= vcmp.eq($src1.h,$src2.h)">, V6_veqh_or_enc; +defm V6_veqw_or : + T_HVX_vcmp <"$dst |= vcmp.eq($src1.w,$src2.w)">, V6_veqw_or_enc; +defm V6_vgtb_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_or_enc; +defm V6_vgth_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.h,$src2.h)">, V6_vgth_or_enc; +defm V6_vgtw_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_or_enc; +defm V6_vgtub_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_or_enc; +defm V6_vgtuh_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_or_enc; +defm V6_vgtuw_or : + T_HVX_vcmp <"$dst |= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_or_enc; +defm V6_veqb_xor : + T_HVX_vcmp <"$dst ^= vcmp.eq($src1.b,$src2.b)">, V6_veqb_xor_enc; +defm V6_veqh_xor : + T_HVX_vcmp <"$dst ^= vcmp.eq($src1.h,$src2.h)">, V6_veqh_xor_enc; +defm V6_veqw_xor : + T_HVX_vcmp <"$dst ^= vcmp.eq($src1.w,$src2.w)">, V6_veqw_xor_enc; +defm V6_vgtb_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_xor_enc; +defm V6_vgth_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.h,$src2.h)">, V6_vgth_xor_enc; +defm V6_vgtw_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_xor_enc; +defm V6_vgtub_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_xor_enc; +defm V6_vgtuh_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_xor_enc; +defm V6_vgtuw_xor : + T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_xor_enc; + +defm V6_vminub : + T_HVX_alu_VV <"$dst.ub = vmin($src1.ub,$src2.ub)">, V6_vminub_enc; +defm V6_vminuh : + T_HVX_alu_VV <"$dst.uh = vmin($src1.uh,$src2.uh)">, V6_vminuh_enc; +defm V6_vminh : + T_HVX_alu_VV <"$dst.h = vmin($src1.h,$src2.h)">, V6_vminh_enc; +defm V6_vminw : + T_HVX_alu_VV <"$dst.w = vmin($src1.w,$src2.w)">, V6_vminw_enc; +defm V6_vmaxub : + T_HVX_alu_VV <"$dst.ub = vmax($src1.ub,$src2.ub)">, V6_vmaxub_enc; +defm V6_vmaxuh : + T_HVX_alu_VV <"$dst.uh = vmax($src1.uh,$src2.uh)">, V6_vmaxuh_enc; +defm V6_vmaxh : + T_HVX_alu_VV <"$dst.h = vmax($src1.h,$src2.h)">, V6_vmaxh_enc; +defm V6_vmaxw : + T_HVX_alu_VV <"$dst.w = vmax($src1.w,$src2.w)">, V6_vmaxw_enc; +defm V6_vshuffeb : + T_HVX_alu_VV <"$dst.b = vshuffe($src1.b,$src2.b)">, V6_vshuffeb_enc; +defm V6_vshuffob : + T_HVX_alu_VV <"$dst.b = vshuffo($src1.b,$src2.b)">, V6_vshuffob_enc; +defm V6_vshufeh : + T_HVX_alu_VV <"$dst.h = vshuffe($src1.h,$src2.h)">, V6_vshufeh_enc; +defm V6_vshufoh : + T_HVX_alu_VV <"$dst.h = vshuffo($src1.h,$src2.h)">, V6_vshufoh_enc; + +let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in { +defm V6_vmpyowh_rnd : + T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:rnd:sat">, + V6_vmpyowh_rnd_enc; +defm V6_vmpyiewuh : + T_HVX_alu_VV <"$dst.w = vmpyie($src1.w,$src2.uh)">, V6_vmpyiewuh_enc; +defm V6_vmpyewuh : + T_HVX_alu_VV <"$dst.w = vmpye($src1.w,$src2.uh)">, V6_vmpyewuh_enc; +defm V6_vmpyowh : + T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:sat">, V6_vmpyowh_enc; +defm V6_vmpyiowh : + T_HVX_alu_VV <"$dst.w = vmpyio($src1.w,$src2.h)">, V6_vmpyiowh_enc; +} +let Itinerary = CVI_VX, Type = TypeCVI_VX in +defm V6_vmpyieoh : + T_HVX_alu_VV <"$dst.w = vmpyieo($src1.h,$src2.h)">, V6_vmpyieoh_enc; + +let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in { +defm V6_vshufoeh : + T_HVX_alu_WV <"$dst.h = vshuffoe($src1.h,$src2.h)">, V6_vshufoeh_enc; +defm V6_vshufoeb : + T_HVX_alu_WV <"$dst.b = vshuffoe($src1.b,$src2.b)">, V6_vshufoeb_enc; +} + +let isRegSequence = 1, Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in +defm V6_vcombine : + T_HVX_alu_WV <"$dst = vcombine($src1,$src2)">, V6_vcombine_enc; + +def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, + SDTCisSubVecOfVec<1, 0>]>; + +def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>; + +def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs), + (v16i32 VectorRegs:$Vt))), + (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; +def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs), + (v32i32 VecDblRegs:$Vt))), + (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; + +let Itinerary = CVI_VINLANESAT, Type = TypeCVI_VINLANESAT in { +defm V6_vsathub : + T_HVX_alu_VV <"$dst.ub = vsat($src1.h,$src2.h)">, V6_vsathub_enc; +defm V6_vsatwh : + T_HVX_alu_VV <"$dst.h = vsat($src1.w,$src2.w)">, V6_vsatwh_enc; +} + +let Itinerary = CVI_VS, Type = TypeCVI_VS in { +defm V6_vroundwh : + T_HVX_alu_VV <"$dst.h = vround($src1.w,$src2.w):sat">, V6_vroundwh_enc; +defm V6_vroundwuh : + T_HVX_alu_VV <"$dst.uh = vround($src1.w,$src2.w):sat">, V6_vroundwuh_enc; +defm V6_vroundhb : + T_HVX_alu_VV <"$dst.b = vround($src1.h,$src2.h):sat">, V6_vroundhb_enc; +defm V6_vroundhub : + T_HVX_alu_VV <"$dst.ub = vround($src1.h,$src2.h):sat">, V6_vroundhub_enc; +defm V6_vasrwv : + T_HVX_alu_VV <"$dst.w = vasr($src1.w,$src2.w)">, V6_vasrwv_enc; +defm V6_vlsrwv : + T_HVX_alu_VV <"$dst.w = vlsr($src1.w,$src2.w)">, V6_vlsrwv_enc; +defm V6_vlsrhv : + T_HVX_alu_VV <"$dst.h = vlsr($src1.h,$src2.h)">, V6_vlsrhv_enc; +defm V6_vasrhv : + T_HVX_alu_VV <"$dst.h = vasr($src1.h,$src2.h)">, V6_vasrhv_enc; +defm V6_vaslwv : + T_HVX_alu_VV <"$dst.w = vasl($src1.w,$src2.w)">, V6_vaslwv_enc; +defm V6_vaslhv : + T_HVX_alu_VV <"$dst.h = vasl($src1.h,$src2.h)">, V6_vaslhv_enc; +} + +defm V6_vaddb : + T_HVX_alu_VV <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_enc; +defm V6_vaddh : + T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_enc; + +let Itinerary = CVI_VP, Type = TypeCVI_VP in { +defm V6_vdelta : + T_HVX_alu_VV <"$dst = vdelta($src1,$src2)">, V6_vdelta_enc; +defm V6_vrdelta : + T_HVX_alu_VV <"$dst = vrdelta($src1,$src2)">, V6_vrdelta_enc; +defm V6_vdealb4w : + T_HVX_alu_VV <"$dst.b = vdeale($src1.b,$src2.b)">, V6_vdealb4w_enc; +defm V6_vpackeb : + T_HVX_alu_VV <"$dst.b = vpacke($src1.h,$src2.h)">, V6_vpackeb_enc; +defm V6_vpackeh : + T_HVX_alu_VV <"$dst.h = vpacke($src1.w,$src2.w)">, V6_vpackeh_enc; +defm V6_vpackhub_sat : + T_HVX_alu_VV <"$dst.ub = vpack($src1.h,$src2.h):sat">, V6_vpackhub_sat_enc; +defm V6_vpackhb_sat : + T_HVX_alu_VV <"$dst.b = vpack($src1.h,$src2.h):sat">, V6_vpackhb_sat_enc; +defm V6_vpackwuh_sat : + T_HVX_alu_VV <"$dst.uh = vpack($src1.w,$src2.w):sat">, V6_vpackwuh_sat_enc; +defm V6_vpackwh_sat : + T_HVX_alu_VV <"$dst.h = vpack($src1.w,$src2.w):sat">, V6_vpackwh_sat_enc; +defm V6_vpackob : + T_HVX_alu_VV <"$dst.b = vpacko($src1.h,$src2.h)">, V6_vpackob_enc; +defm V6_vpackoh : + T_HVX_alu_VV <"$dst.h = vpacko($src1.w,$src2.w)">, V6_vpackoh_enc; +} + +let hasNewValue = 1, hasSideEffects = 0 in +class T_HVX_condALU + : CVI_VA_Resource1 <(outs RC2:$dst), + (ins RC1:$src1, RC2:$_src_, RC2:$src2), asmString, + [], "$dst = $_src_" > { + let Itinerary = CVI_VA; + let Type = TypeCVI_VA; +} + +multiclass T_HVX_condALU { + def NAME : T_HVX_condALU ; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_condALU ; +} + +defm V6_vaddbq : T_HVX_condALU <"if ($src1) $dst.b += $src2.b">, + V6_vaddbq_enc; +defm V6_vaddhq : T_HVX_condALU <"if ($src1) $dst.h += $src2.h">, + V6_vaddhq_enc; +defm V6_vaddwq : T_HVX_condALU <"if ($src1) $dst.w += $src2.w">, + V6_vaddwq_enc; +defm V6_vsubbq : T_HVX_condALU <"if ($src1) $dst.b -= $src2.b">, + V6_vsubbq_enc; +defm V6_vsubhq : T_HVX_condALU <"if ($src1) $dst.h -= $src2.h">, + V6_vsubhq_enc; +defm V6_vsubwq : T_HVX_condALU <"if ($src1) $dst.w -= $src2.w">, + V6_vsubwq_enc; +defm V6_vaddbnq : T_HVX_condALU <"if (!$src1) $dst.b += $src2.b">, + V6_vaddbnq_enc; +defm V6_vaddhnq : T_HVX_condALU <"if (!$src1) $dst.h += $src2.h">, + V6_vaddhnq_enc; +defm V6_vaddwnq : T_HVX_condALU <"if (!$src1) $dst.w += $src2.w">, + V6_vaddwnq_enc; +defm V6_vsubbnq : T_HVX_condALU <"if (!$src1) $dst.b -= $src2.b">, + V6_vsubbnq_enc; +defm V6_vsubhnq : T_HVX_condALU <"if (!$src1) $dst.h -= $src2.h">, + V6_vsubhnq_enc; +defm V6_vsubwnq : T_HVX_condALU <"if (!$src1) $dst.w -= $src2.w">, + V6_vsubwnq_enc; + +let hasNewValue = 1 in +class T_HVX_alu_2op + : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1), + asmString >{ + let Itinerary = itin; + let Type = !cast("Type"#itin); +} + +multiclass T_HVX_alu_2op { + def NAME : T_HVX_alu_2op ; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_alu_2op (RCout#"128B"), + !cast(RCin#"128B")>; +} + +let hasNewValue = 1 in +multiclass T_HVX_alu_2op_VV : + T_HVX_alu_2op ; + +multiclass T_HVX_alu_2op_WV : + T_HVX_alu_2op ; + + +defm V6_vabsh : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h)">, + V6_vabsh_enc; +defm V6_vabsw : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w)">, + V6_vabsw_enc; +defm V6_vabsh_sat : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h):sat">, + V6_vabsh_sat_enc; +defm V6_vabsw_sat : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w):sat">, + V6_vabsw_sat_enc; +defm V6_vnot : T_HVX_alu_2op_VV <"$dst = vnot($src1)">, + V6_vnot_enc; +defm V6_vassign : T_HVX_alu_2op_VV <"$dst = $src1">, + V6_vassign_enc; + +defm V6_vzb : T_HVX_alu_2op_WV <"$dst.uh = vzxt($src1.ub)">, + V6_vzb_enc; +defm V6_vzh : T_HVX_alu_2op_WV <"$dst.uw = vzxt($src1.uh)">, + V6_vzh_enc; +defm V6_vsb : T_HVX_alu_2op_WV <"$dst.h = vsxt($src1.b)">, + V6_vsb_enc; +defm V6_vsh : T_HVX_alu_2op_WV <"$dst.w = vsxt($src1.h)">, + V6_vsh_enc; + +let Itinerary = CVI_VP, Type = TypeCVI_VP in { +defm V6_vdealh : T_HVX_alu_2op_VV <"$dst.h = vdeal($src1.h)">, + V6_vdealh_enc; +defm V6_vdealb : T_HVX_alu_2op_VV <"$dst.b = vdeal($src1.b)">, + V6_vdealb_enc; +defm V6_vshuffh : T_HVX_alu_2op_VV <"$dst.h = vshuff($src1.h)">, + V6_vshuffh_enc; +defm V6_vshuffb : T_HVX_alu_2op_VV <"$dst.b = vshuff($src1.b)">, + V6_vshuffb_enc; +} + +let Itinerary = CVI_VP_VS, Type = TypeCVI_VP_VS in { +defm V6_vunpackub : T_HVX_alu_2op_WV <"$dst.uh = vunpack($src1.ub)">, + V6_vunpackub_enc; +defm V6_vunpackuh : T_HVX_alu_2op_WV <"$dst.uw = vunpack($src1.uh)">, + V6_vunpackuh_enc; +defm V6_vunpackb : T_HVX_alu_2op_WV <"$dst.h = vunpack($src1.b)">, + V6_vunpackb_enc; +defm V6_vunpackh : T_HVX_alu_2op_WV <"$dst.w = vunpack($src1.h)">, + V6_vunpackh_enc; +} + +let Itinerary = CVI_VS, Type = TypeCVI_VS in { +defm V6_vcl0w : T_HVX_alu_2op_VV <"$dst.uw = vcl0($src1.uw)">, + V6_vcl0w_enc; +defm V6_vcl0h : T_HVX_alu_2op_VV <"$dst.uh = vcl0($src1.uh)">, + V6_vcl0h_enc; +defm V6_vnormamtw : T_HVX_alu_2op_VV <"$dst.w = vnormamt($src1.w)">, + V6_vnormamtw_enc; +defm V6_vnormamth : T_HVX_alu_2op_VV <"$dst.h = vnormamt($src1.h)">, + V6_vnormamth_enc; +defm V6_vpopcounth : T_HVX_alu_2op_VV <"$dst.h = vpopcount($src1.h)">, + V6_vpopcounth_enc; +} + +let isAccumulator = 1, hasNewValue = 1, Itinerary = CVI_VX_DV_LONG, + Type = TypeCVI_VX_DV in +class T_HVX_vmpyacc2 + : CVI_VA_Resource1 <(outs RC:$dst), + (ins RC:$_src_, RC:$src1, IntRegs:$src2, u1Imm:$src3), + asmString, [], "$dst = $_src_" > ; + + +multiclass T_HVX_vmpyacc2 { + def NAME : T_HVX_vmpyacc2 ; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vmpyacc2 ; +} + +defm V6_vrmpybusi_acc : + T_HVX_vmpyacc2<"$dst.w += vrmpy($src1.ub,$src2.b,#$src3)">, + V6_vrmpybusi_acc_enc; +defm V6_vrsadubi_acc : + T_HVX_vmpyacc2<"$dst.uw += vrsad($src1.ub,$src2.ub,#$src3)">, + V6_vrsadubi_acc_enc; +defm V6_vrmpyubi_acc : + T_HVX_vmpyacc2<"$dst.uw += vrmpy($src1.ub,$src2.ub,#$src3)">, + V6_vrmpyubi_acc_enc; + + +let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV, hasNewValue = 1 in +class T_HVX_vmpy2 + : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, IntRegs:$src2, u1Imm:$src3), + asmString>; + + +multiclass T_HVX_vmpy2 { + def NAME : T_HVX_vmpy2 ; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vmpy2 ; +} + +defm V6_vrmpybusi : + T_HVX_vmpy2 <"$dst.w = vrmpy($src1.ub,$src2.b,#$src3)">, V6_vrmpybusi_enc; +defm V6_vrsadubi : + T_HVX_vmpy2 <"$dst.uw = vrsad($src1.ub,$src2.ub,#$src3)">, V6_vrsadubi_enc; +defm V6_vrmpyubi : + T_HVX_vmpy2 <"$dst.uw = vrmpy($src1.ub,$src2.ub,#$src3)">, V6_vrmpyubi_enc; + + +let Itinerary = CVI_VP_VS_LONG_EARLY, Type = TypeCVI_VP_VS, + hasSideEffects = 0, hasNewValue2 = 1, opNewValue2 = 1 in +class T_HVX_perm + : CVI_VA_Resource1 <(outs RC:$_dst1_, RC:$_dst2_), + (ins RC:$src1, RC:$src2, IntRegs:$src3), + asmString, [], "$_dst1_ = $src1, $_dst2_ = $src2" >; + +multiclass T_HVX_perm { + def NAME : T_HVX_perm ; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_perm ; +} + +let hasNewValue = 1, opNewValue = 0, hasNewValue2 = 1, opNewValue2 = 1 in { + defm V6_vshuff : T_HVX_perm <"vshuff($src1,$src2,$src3)">, V6_vshuff_enc; + defm V6_vdeal : T_HVX_perm <"vdeal($src1,$src2,$src3)">, V6_vdeal_enc; +} + +// Conditional vector move. +let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +class T_HVX_cmov + : CVI_VA_Resource1 <(outs RC:$dst), (ins PredRegs:$src1, RC:$src2), + "if ("#!if(isPredNot, "!", "")#"$src1) $dst = $src2"> { + let isPredicatedFalse = isPredNot; +} + +multiclass T_HVX_cmov { + def NAME : T_HVX_cmov ; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_cmov ; +} + +defm V6_vcmov : T_HVX_cmov, V6_vcmov_enc; +defm V6_vncmov : T_HVX_cmov<1>, V6_vncmov_enc; + +// Conditional vector combine. +let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, isPredicated = 1, + hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in +class T_HVX_ccombine + : CVI_VA_Resource1 < (outs RCout:$dst), + (ins PredRegs:$src1, RCin:$src2, RCin:$src3), + "if ("#!if(isPredNot, "!", "")#"$src1) $dst = vcombine($src2,$src3)"> { + let isPredicatedFalse = isPredNot; +} + +multiclass T_HVX_ccombine { + def NAME : T_HVX_ccombine ; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_ccombine ; +} + +defm V6_vccombine : T_HVX_ccombine, V6_vccombine_enc; +defm V6_vnccombine : T_HVX_ccombine<1>, V6_vnccombine_enc; + +let hasNewValue = 1 in +class T_HVX_shift + : CVI_VX_DV_Resource1<(outs RCout:$dst), + (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3), + asmString >; + +multiclass T_HVX_shift { + def NAME : T_HVX_shift ; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_shift (RCout#"128B"), + !cast(RCin#"128B")>; +} + +multiclass T_HVX_shift_VV : + T_HVX_shift ; + +multiclass T_HVX_shift_WV : + T_HVX_shift ; + +let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in { +defm V6_valignb : + T_HVX_shift_VV <"$dst = valign($src1,$src2,$src3)">, V6_valignb_enc; +defm V6_vlalignb : + T_HVX_shift_VV <"$dst = vlalign($src1,$src2,$src3)">, V6_vlalignb_enc; +} + +let Itinerary = CVI_VS, Type = TypeCVI_VS in { +defm V6_vasrwh : + T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3)">, V6_vasrwh_enc; +defm V6_vasrwhsat : + T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):sat">, + V6_vasrwhsat_enc; +defm V6_vasrwhrndsat : + T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):rnd:sat">, + V6_vasrwhrndsat_enc; +defm V6_vasrwuhsat : + T_HVX_shift_VV <"$dst.uh = vasr($src1.w,$src2.w,$src3):sat">, + V6_vasrwuhsat_enc; +defm V6_vasrhubsat : + T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):sat">, + V6_vasrhubsat_enc; +defm V6_vasrhubrndsat : + T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):rnd:sat">, + V6_vasrhubrndsat_enc; +defm V6_vasrhbrndsat : + T_HVX_shift_VV <"$dst.b = vasr($src1.h,$src2.h,$src3):rnd:sat">, + V6_vasrhbrndsat_enc; +} + +// Assembler mapped -- alias? +//defm V6_vtran2x2vdd : T_HVX_shift_VV <"">, V6_vtran2x2vdd_enc; +let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in { +defm V6_vshuffvdd : + T_HVX_shift_WV <"$dst = vshuff($src1,$src2,$src3)">, V6_vshuffvdd_enc; +defm V6_vdealvdd : + T_HVX_shift_WV <"$dst = vdeal($src1,$src2,$src3)">, V6_vdealvdd_enc; +} + +let hasNewValue = 1, Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in +class T_HVX_unpack + : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCout:$_src_, RCin:$src1), + asmString, [], "$dst = $_src_">; + +multiclass T_HVX_unpack { + def NAME : T_HVX_unpack ; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_unpack ; +} + +defm V6_vunpackob : T_HVX_unpack <"$dst.h |= vunpacko($src1.b)">, V6_vunpackob_enc; +defm V6_vunpackoh : T_HVX_unpack <"$dst.w |= vunpacko($src1.h)">, V6_vunpackoh_enc; + +let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1, + hasSideEffects = 0 in +class T_HVX_valign + : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2, u3Imm:$src3), + asmString>; + +multiclass T_HVX_valign { + def NAME : T_HVX_valign ; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_valign ; +} + +defm V6_valignbi : + T_HVX_valign <"$dst = valign($src1,$src2,#$src3)">, V6_valignbi_enc; +defm V6_vlalignbi : + T_HVX_valign <"$dst = vlalign($src1,$src2,#$src3)">, V6_vlalignbi_enc; + +let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in +class T_HVX_predAlu + : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2), + asmString>; + +multiclass T_HVX_predAlu { + def NAME : T_HVX_predAlu ; + + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_predAlu ; +} + +defm V6_pred_and : T_HVX_predAlu <"$dst = and($src1,$src2)">, V6_pred_and_enc; +defm V6_pred_or : T_HVX_predAlu <"$dst = or($src1,$src2)">, V6_pred_or_enc; +defm V6_pred_xor : T_HVX_predAlu <"$dst = xor($src1,$src2)">, V6_pred_xor_enc; +defm V6_pred_or_n : T_HVX_predAlu <"$dst = or($src1,!$src2)">, V6_pred_or_n_enc; +defm V6_pred_and_n : + T_HVX_predAlu <"$dst = and($src1,!$src2)">, V6_pred_and_n_enc; + +let Itinerary = CVI_VA, Type = TypeCVI_VA in +class T_HVX_prednot + : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1), + "$dst = not($src1)">, V6_pred_not_enc; + +def V6_pred_not : T_HVX_prednot ; +let isCodeGenOnly = 1 in +def V6_pred_not_128B : T_HVX_prednot ; + +let Itinerary = CVI_VA, Type = TypeCVI_VA in +class T_HVX_vcmp2 + : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2), + asmString >; + +multiclass T_HVX_vcmp2 { + def NAME : T_HVX_vcmp2 ; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vcmp2 ; +} + +defm V6_veqb : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.b,$src2.b)">, V6_veqb_enc; +defm V6_veqh : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.h,$src2.h)">, V6_veqh_enc; +defm V6_veqw : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.w,$src2.w)">, V6_veqw_enc; +defm V6_vgtb : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.b,$src2.b)">, V6_vgtb_enc; +defm V6_vgth : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.h,$src2.h)">, V6_vgth_enc; +defm V6_vgtw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.w,$src2.w)">, V6_vgtw_enc; +defm V6_vgtub : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_enc; +defm V6_vgtuh : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_enc; +defm V6_vgtuw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_enc; + +let isAccumulator = 1, hasNewValue = 1, hasSideEffects = 0 in +class T_V6_vandqrt_acc + : CVI_VX_Resource_late<(outs RCout:$dst), + (ins RCout:$_src_, RCin:$src1, IntRegs:$src2), + "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandqrt_acc_enc; + +def V6_vandqrt_acc : T_V6_vandqrt_acc ; +let isCodeGenOnly = 1 in +def V6_vandqrt_acc_128B : T_V6_vandqrt_acc ; + +let isAccumulator = 1 in +class T_V6_vandvrt_acc + : CVI_VX_Resource_late<(outs RCout:$dst), + (ins RCout:$_src_, RCin:$src1, IntRegs:$src2), + "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandvrt_acc_enc; + +def V6_vandvrt_acc : T_V6_vandvrt_acc ; +let isCodeGenOnly = 1 in +def V6_vandvrt_acc_128B : T_V6_vandvrt_acc ; + +let hasNewValue = 1, hasSideEffects = 0 in +class T_V6_vandqrt + : CVI_VX_Resource_late<(outs RCout:$dst), + (ins RCin:$src1, IntRegs:$src2), + "$dst = vand($src1,$src2)" >, V6_vandqrt_enc; + +def V6_vandqrt : T_V6_vandqrt ; +let isCodeGenOnly = 1 in +def V6_vandqrt_128B : T_V6_vandqrt ; + +let hasNewValue = 1, hasSideEffects = 0 in +class T_V6_lvsplatw + : CVI_VX_Resource_late<(outs RC:$dst), (ins IntRegs:$src1), + "$dst = vsplat($src1)" >, V6_lvsplatw_enc; + +def V6_lvsplatw : T_V6_lvsplatw ; +let isCodeGenOnly = 1 in +def V6_lvsplatw_128B : T_V6_lvsplatw ; + + +let hasNewValue = 1 in +class T_V6_vinsertwr + : CVI_VX_Resource_late<(outs RC:$dst), (ins RC:$_src_, IntRegs:$src1), + "$dst.w = vinsert($src1)", [], "$dst = $_src_">, + V6_vinsertwr_enc; + +def V6_vinsertwr : T_V6_vinsertwr ; +let isCodeGenOnly = 1 in +def V6_vinsertwr_128B : T_V6_vinsertwr ; + + +let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in +class T_V6_pred_scalar2 + : CVI_VA_Resource1<(outs RC:$dst), (ins IntRegs:$src1), + "$dst = vsetq($src1)">, V6_pred_scalar2_enc; + +def V6_pred_scalar2 : T_V6_pred_scalar2 ; +let isCodeGenOnly = 1 in +def V6_pred_scalar2_128B : T_V6_pred_scalar2 ; + +class T_V6_vandvrt + : CVI_VX_Resource_late<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2), + "$dst = vand($src1,$src2)">, V6_vandvrt_enc; + +def V6_vandvrt : T_V6_vandvrt ; +let isCodeGenOnly = 1 in +def V6_vandvrt_128B : T_V6_vandvrt ; + +let validSubTargets = HasV60SubT in +class T_HVX_rol + : SInst2 <(outs RC:$dst), (ins RC:$src1, ImmOp:$src2), asmString>; + +class T_HVX_rol_R + : T_HVX_rol ; +class T_HVX_rol_P + : T_HVX_rol ; + +def S6_rol_i_p : T_HVX_rol_P <"$dst = rol($src1,#$src2)">, S6_rol_i_p_enc; +let hasNewValue = 1, opNewValue = 0 in +def S6_rol_i_r : T_HVX_rol_R <"$dst = rol($src1,#$src2)">, S6_rol_i_r_enc; + +let validSubTargets = HasV60SubT in +class T_HVX_rol_acc + : SInst2 <(outs RC:$dst), (ins RC:$_src_, RC:$src1, ImmOp:$src2), + asmString, [], "$dst = $_src_" >; + +class T_HVX_rol_acc_P + : T_HVX_rol_acc ; + +class T_HVX_rol_acc_R + : T_HVX_rol_acc ; + +def S6_rol_i_p_nac : + T_HVX_rol_acc_P <"$dst -= rol($src1,#$src2)">, S6_rol_i_p_nac_enc; +def S6_rol_i_p_acc : + T_HVX_rol_acc_P <"$dst += rol($src1,#$src2)">, S6_rol_i_p_acc_enc; +def S6_rol_i_p_and : + T_HVX_rol_acc_P <"$dst &= rol($src1,#$src2)">, S6_rol_i_p_and_enc; +def S6_rol_i_p_or : + T_HVX_rol_acc_P <"$dst |= rol($src1,#$src2)">, S6_rol_i_p_or_enc; +def S6_rol_i_p_xacc : + T_HVX_rol_acc_P<"$dst ^= rol($src1,#$src2)">, S6_rol_i_p_xacc_enc; + +let hasNewValue = 1, opNewValue = 0 in { +def S6_rol_i_r_nac : + T_HVX_rol_acc_R <"$dst -= rol($src1,#$src2)">, S6_rol_i_r_nac_enc; +def S6_rol_i_r_acc : + T_HVX_rol_acc_R <"$dst += rol($src1,#$src2)">, S6_rol_i_r_acc_enc; +def S6_rol_i_r_and : + T_HVX_rol_acc_R <"$dst &= rol($src1,#$src2)">, S6_rol_i_r_and_enc; +def S6_rol_i_r_or : + T_HVX_rol_acc_R <"$dst |= rol($src1,#$src2)">, S6_rol_i_r_or_enc; +def S6_rol_i_r_xacc : + T_HVX_rol_acc_R <"$dst ^= rol($src1,#$src2)">, S6_rol_i_r_xacc_enc; +} + +let isSolo = 1, Itinerary = LD_tc_ld_SLOT0, Type = TypeLD in +class T_V6_extractw + : LD1Inst <(outs IntRegs:$dst), (ins RC:$src1, IntRegs:$src2), + "$dst = vextract($src1,$src2)">, V6_extractw_enc; + +def V6_extractw : T_V6_extractw ; +let isCodeGenOnly = 1 in +def V6_extractw_128B : T_V6_extractw ; + +let Itinerary = ST_tc_st_SLOT0, validSubTargets = HasV55SubT in +class T_sys0op + : ST1Inst <(outs), (ins), asmString>; + +let isSolo = 1, validSubTargets = HasV55SubT in { +def Y5_l2gunlock : T_sys0op <"l2gunlock">, Y5_l2gunlock_enc; +def Y5_l2gclean : T_sys0op <"l2gclean">, Y5_l2gclean_enc; +def Y5_l2gcleaninv : T_sys0op <"l2gcleaninv">, Y5_l2gcleaninv_enc; +} + +class T_sys1op + : ST1Inst <(outs), (ins RC:$src1), asmString>; + +class T_sys1op_R : T_sys1op ; +class T_sys1op_P : T_sys1op ; + +let isSoloAX = 1, validSubTargets = HasV55SubT in +def Y5_l2unlocka : T_sys1op_R <"l2unlocka($src1)">, Y5_l2unlocka_enc; + +let isSolo = 1, validSubTargets = HasV60SubT in { +def Y6_l2gcleanpa : T_sys1op_P <"l2gclean($src1)">, Y6_l2gcleanpa_enc; +def Y6_l2gcleaninvpa : T_sys1op_P <"l2gcleaninv($src1)">, Y6_l2gcleaninvpa_enc; +} + +let Itinerary = ST_tc_3stall_SLOT0, isPredicateLate = 1, isSoloAX = 1, + validSubTargets = HasV55SubT in +def Y5_l2locka : ST1Inst <(outs PredRegs:$dst), (ins IntRegs:$src1), + "$dst = l2locka($src1)">, Y5_l2locka_enc; + +// not defined on etc side. why? +// defm S2_cabacencbin : _VV <"Rdd=encbin(Rss,$src2,Pu)">, S2_cabacencbin_enc; + +let Defs = [USR_OVF], Itinerary = M_tc_3stall_SLOT23, isPredicateLate = 1, + hasSideEffects = 0, +validSubTargets = HasV55SubT in +def A5_ACS : MInst2 <(outs DoubleRegs:$dst1, PredRegs:$dst2), + (ins DoubleRegs:$_src_, DoubleRegs:$src1, DoubleRegs:$src2), + "$dst1,$dst2 = vacsh($src1,$src2)", [], + "$dst1 = $_src_" >, Requires<[HasV55T]>, A5_ACS_enc; + +let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, hasNewValue = 1, + hasSideEffects = 0 in +class T_HVX_alu2 + : CVI_VA_Resource1<(outs RCout:$dst), + (ins RCin1:$src1, RCin2:$src2, RCin2:$src3), asmString>; + +multiclass T_HVX_alu2 { + def NAME : T_HVX_alu2 ; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_alu2 (RC#"128B"), + VecPredRegs128B, VectorRegs128B>; +} + +multiclass T_HVX_alu2_V : + T_HVX_alu2 ; + +multiclass T_HVX_alu2_W : + T_HVX_alu2 ; + +defm V6_vswap : T_HVX_alu2_W <"$dst = vswap($src1,$src2,$src3)">, V6_vswap_enc; + +let Itinerary = CVI_VA, Type = TypeCVI_VA, hasNewValue = 1, + hasSideEffects = 0 in +defm V6_vmux : T_HVX_alu2_V <"$dst = vmux($src1,$src2,$src3)">, V6_vmux_enc; + +class T_HVX_vlutb + : CVI_VA_Resource1<(outs RCout:$dst), + (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3), asmString>; + +multiclass T_HVX_vlutb { + def NAME : T_HVX_vlutb ; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vlutb (RCout#"128B"), + !cast(RCin#"128B")>; +} + +multiclass T_HVX_vlutb_V : + T_HVX_vlutb ; + +multiclass T_HVX_vlutb_W : + T_HVX_vlutb ; + +let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, isAccumulator = 1 in +class T_HVX_vlutb_acc + : CVI_VA_Resource1<(outs RCout:$dst), + (ins RCout:$_src_, RCin:$src1, RCin:$src2, IntRegsLow8:$src3), + asmString, [], "$dst = $_src_">; + +multiclass T_HVX_vlutb_acc { + def NAME : T_HVX_vlutb_acc ; + let isCodeGenOnly = 1 in + def NAME#_128B : T_HVX_vlutb_acc(RCout#"128B"), + !cast(RCin#"128B")>; +} + +multiclass T_HVX_vlutb_acc_V : + T_HVX_vlutb_acc ; + +multiclass T_HVX_vlutb_acc_W : + T_HVX_vlutb_acc ; + + +let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1 in +defm V6_vlutvvb: + T_HVX_vlutb_V <"$dst.b = vlut32($src1.b,$src2.b,$src3)">, V6_vlutvvb_enc; + +let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, hasNewValue = 1 in +defm V6_vlutvwh: + T_HVX_vlutb_W <"$dst.h = vlut16($src1.b,$src2.h,$src3)">, V6_vlutvwh_enc; + +let hasNewValue = 1 in { + defm V6_vlutvvb_oracc: + T_HVX_vlutb_acc_V <"$dst.b |= vlut32($src1.b,$src2.b,$src3)">, + V6_vlutvvb_oracc_enc; + defm V6_vlutvwh_oracc: + T_HVX_vlutb_acc_W <"$dst.h |= vlut16($src1.b,$src2.h,$src3)">, + V6_vlutvwh_oracc_enc; +} + +// It's a fake instruction and should not be defined? +def S2_cabacencbin + : SInst2<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3), + "$dst = encbin($src1,$src2,$src3)">, S2_cabacencbin_enc; + +// Vhist instructions +def V6_vhistq + : CVI_HIST_Resource1 <(outs), (ins VecPredRegs:$src1), + "vhist($src1)">, V6_vhistq_enc; + +def V6_vhist + : CVI_HIST_Resource1 <(outs), (ins), + "vhist" >, V6_vhist_enc; diff --git a/lib/Target/Hexagon/HexagonInstrInfoVector.td b/lib/Target/Hexagon/HexagonInstrInfoVector.td index f4fb946d5bad..96dd5315b87f 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoVector.td +++ b/lib/Target/Hexagon/HexagonInstrInfoVector.td @@ -35,6 +35,34 @@ multiclass bitconvert_64 { (a DoubleRegs:$src)>; } +multiclass bitconvert_vec { + def : Pat <(b (bitconvert (a VectorRegs:$src))), + (b VectorRegs:$src)>; + def : Pat <(a (bitconvert (b VectorRegs:$src))), + (a VectorRegs:$src)>; +} + +multiclass bitconvert_dblvec { + def : Pat <(b (bitconvert (a VecDblRegs:$src))), + (b VecDblRegs:$src)>; + def : Pat <(a (bitconvert (b VecDblRegs:$src))), + (a VecDblRegs:$src)>; +} + +multiclass bitconvert_predvec { + def : Pat <(b (bitconvert (a VecPredRegs:$src))), + (b VectorRegs:$src)>; + def : Pat <(a (bitconvert (b VectorRegs:$src))), + (a VecPredRegs:$src)>; +} + +multiclass bitconvert_dblvec128B { + def : Pat <(b (bitconvert (a VecDblRegs128B:$src))), + (b VecDblRegs128B:$src)>; + def : Pat <(a (bitconvert (b VecDblRegs128B:$src))), + (a VecDblRegs128B:$src)>; +} + // Bit convert vector types. defm : bitconvert_32; defm : bitconvert_32; @@ -47,6 +75,21 @@ defm : bitconvert_64; defm : bitconvert_64; defm : bitconvert_64; +defm : bitconvert_vec; +defm : bitconvert_vec; +defm : bitconvert_vec; + +defm : bitconvert_dblvec; +defm : bitconvert_dblvec; +defm : bitconvert_dblvec; + +defm : bitconvert_dblvec128B; +defm : bitconvert_dblvec128B; +defm : bitconvert_dblvec128B; + +defm : bitconvert_dblvec128B; +defm : bitconvert_dblvec128B; +defm : bitconvert_dblvec128B; // Vector shift support. Vector shifting in Hexagon is rather different // from internal representation of LLVM. diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td index 1d0d015f798b..b207aaf392f4 100644 --- a/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/lib/Target/Hexagon/HexagonIntrinsics.td @@ -691,15 +691,15 @@ def: T_RR_pat; def: T_RR_pat; def: T_RR_pat; -def: T_II_pat; +def: T_II_pat; def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))), (i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>; // Mux -def : T_QRI_pat; -def : T_QIR_pat; -def : T_QII_pat; +def : T_QRI_pat; +def : T_QIR_pat; +def : T_QII_pat; // Shift halfword def : T_R_pat; @@ -720,17 +720,17 @@ def : T_RR_pat; def : T_RR_pat; def : T_RR_pat; -def : T_RI_pat; -def : T_RI_pat; -def : T_RI_pat; +def : T_RI_pat; +def : T_RI_pat; +def : T_RI_pat; -def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s8ExtPred:$src2)), +def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)), (i32 (C2_cmpgti (I32:$src1), - (DEC_CONST_SIGNED s8ExtPred:$src2)))>; + (DEC_CONST_SIGNED s32ImmPred:$src2)))>; -def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u8ExtPred:$src2)), +def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)), (i32 (C2_cmpgtui (I32:$src1), - (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>; + (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>; // The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0. def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)), @@ -1289,3 +1289,5 @@ def: T_stc_pat; include "HexagonIntrinsicsV3.td" include "HexagonIntrinsicsV4.td" include "HexagonIntrinsicsV5.td" +include "HexagonIntrinsicsV60.td" + diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/lib/Target/Hexagon/HexagonIntrinsicsV60.td new file mode 100644 index 000000000000..24a3e4d36de9 --- /dev/null +++ b/lib/Target/Hexagon/HexagonIntrinsicsV60.td @@ -0,0 +1,836 @@ +//=- HexagonIntrinsicsV60.td - Target Description for Hexagon -*- tablegen *-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V60 Compiler Intrinsics in TableGen format. +// +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1 in { +def HEXAGON_V6_vd0_pseudo : CVI_VA_Resource<(outs VectorRegs:$dst), + (ins ), + "$dst=#0", + [(set VectorRegs:$dst, (int_hexagon_V6_vd0 ))]>; + +def HEXAGON_V6_vd0_pseudo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst), + (ins ), + "$dst=#0", + [(set VectorRegs128B:$dst, (int_hexagon_V6_vd0_128B ))]>; +} +let isPseudo = 1 in +def HEXAGON_V6_vassignp : CVI_VA_Resource<(outs VecDblRegs:$dst), + (ins VecDblRegs:$src1), + "$dst=vassignp_W($src1)", + [(set VecDblRegs:$dst, (int_hexagon_V6_vassignp VecDblRegs:$src1))]>; + +let isPseudo = 1 in +def HEXAGON_V6_vassignp_128B : CVI_VA_Resource<(outs VecDblRegs128B:$dst), + (ins VecDblRegs128B:$src1), + "$dst=vassignp_W_128B($src1)", + [(set VecDblRegs128B:$dst, (int_hexagon_V6_vassignp_128B + VecDblRegs128B:$src1))]>; + +let isPseudo = 1 in +def HEXAGON_V6_lo : CVI_VA_Resource<(outs VectorRegs:$dst), + (ins VecDblRegs:$src1), + "$dst=lo_W($src1)", + [(set VectorRegs:$dst, (int_hexagon_V6_lo VecDblRegs:$src1))]>; + +let isPseudo = 1 in +def HEXAGON_V6_hi : CVI_VA_Resource<(outs VectorRegs:$dst), + (ins VecDblRegs:$src1), + "$dst=hi_W($src1)", + [(set VectorRegs:$dst, (int_hexagon_V6_hi VecDblRegs:$src1))]>; + +let isPseudo = 1 in +def HEXAGON_V6_lo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst), + (ins VecDblRegs128B:$src1), + "$dst=lo_W($src1)", + [(set VectorRegs128B:$dst, (int_hexagon_V6_lo_128B VecDblRegs128B:$src1))]>; + +let isPseudo = 1 in +def HEXAGON_V6_hi_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst), + (ins VecDblRegs128B:$src1), + "$dst=hi_W($src1)", + [(set VectorRegs128B:$dst, (int_hexagon_V6_hi_128B VecDblRegs128B:$src1))]>; + +let AddedComplexity = 100 in { +def : Pat < (v16i32 (int_hexagon_V6_lo (v32i32 VecDblRegs:$src1))), + (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_loreg)) >, + Requires<[UseHVXSgl]>; + +def : Pat < (v16i32 (int_hexagon_V6_hi (v32i32 VecDblRegs:$src1))), + (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_hireg)) >, + Requires<[UseHVXSgl]>; + +def : Pat < (v32i32 (int_hexagon_V6_lo_128B (v64i32 VecDblRegs128B:$src1))), + (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1), + subreg_loreg)) >, + Requires<[UseHVXDbl]>; + +def : Pat < (v32i32 (int_hexagon_V6_hi_128B (v64i32 VecDblRegs128B:$src1))), + (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1), + subreg_hireg)) >, + Requires<[UseHVXDbl]>; +} + +def : Pat <(v512i1 (bitconvert (v16i32 VectorRegs:$src1))), + (v512i1 (V6_vandvrt(v16i32 VectorRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v512i1 (bitconvert (v32i16 VectorRegs:$src1))), + (v512i1 (V6_vandvrt(v32i16 VectorRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v512i1 (bitconvert (v64i8 VectorRegs:$src1))), + (v512i1 (V6_vandvrt(v64i8 VectorRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v512i1 (bitconvert (v8i64 VectorRegs:$src1))), + (v512i1 (V6_vandvrt(v8i64 VectorRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v16i32 (bitconvert (v512i1 VecPredRegs:$src1))), + (v16i32 (V6_vandqrt(v512i1 VecPredRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v32i16 (bitconvert (v512i1 VecPredRegs:$src1))), + (v32i16 (V6_vandqrt(v512i1 VecPredRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v64i8 (bitconvert (v512i1 VecPredRegs:$src1))), + (v64i8 (V6_vandqrt(v512i1 VecPredRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v8i64 (bitconvert (v512i1 VecPredRegs:$src1))), + (v8i64 (V6_vandqrt(v512i1 VecPredRegs:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v1024i1 (bitconvert (v32i32 VectorRegs128B:$src1))), + (v1024i1 (V6_vandvrt_128B(v32i32 VectorRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v1024i1 (bitconvert (v64i16 VectorRegs128B:$src1))), + (v1024i1 (V6_vandvrt_128B(v64i16 VectorRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v1024i1 (bitconvert (v128i8 VectorRegs128B:$src1))), + (v1024i1 (V6_vandvrt_128B(v128i8 VectorRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v1024i1 (bitconvert (v16i64 VectorRegs128B:$src1))), + (v1024i1 (V6_vandvrt_128B(v16i64 VectorRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v32i32 (bitconvert (v1024i1 VecPredRegs128B:$src1))), + (v32i32 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v64i16 (bitconvert (v1024i1 VecPredRegs128B:$src1))), + (v64i16 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v128i8 (bitconvert (v1024i1 VecPredRegs128B:$src1))), + (v128i8 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v16i64 (bitconvert (v1024i1 VecPredRegs128B:$src1))), + (v16i64 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; + +let AddedComplexity = 140 in { +def : Pat <(store (v512i1 VecPredRegs:$src1), (i32 IntRegs:$addr)), + (V6_vS32b_ai IntRegs:$addr, 0, + (v16i32 (V6_vandqrt (v512i1 VecPredRegs:$src1), + (A2_tfrsi 0x01010101))))>, + Requires<[UseHVXSgl]>; + +def : Pat <(v512i1 (load (i32 IntRegs:$addr))), + (v512i1 (V6_vandvrt + (v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXSgl]>; + +def : Pat <(store (v1024i1 VecPredRegs128B:$src1), (i32 IntRegs:$addr)), + (V6_vS32b_ai_128B IntRegs:$addr, 0, + (v32i32 (V6_vandqrt_128B (v1024i1 VecPredRegs128B:$src1), + (A2_tfrsi 0x01010101))))>, + Requires<[UseHVXDbl]>; + +def : Pat <(v1024i1 (load (i32 IntRegs:$addr))), + (v1024i1 (V6_vandvrt_128B + (v32i32 (V6_vL32b_ai_128B IntRegs:$addr, 0)), + (A2_tfrsi 0x01010101)))>, + Requires<[UseHVXDbl]>; +} + +multiclass T_R_pat { + def: Pat<(IntID IntRegs:$src1), (MI IntRegs:$src1)>, + Requires<[UseHVXSgl]>; + def: Pat<(!cast(IntID#"_128B") IntRegs:$src1), + (!cast(MI#"_128B") IntRegs:$src1)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_V_pat { + def: Pat<(IntID VectorRegs:$src1), + (MI VectorRegs:$src1)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VectorRegs128B:$src1), + (!cast(MI#"_128B") VectorRegs128B:$src1)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_Q_pat { + def: Pat<(IntID VecPredRegs:$src1), + (MI VecPredRegs:$src1)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VecPredRegs128B:$src1), + (!cast(MI#"_128B") VecPredRegs128B:$src1)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WR_pat { + def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2), + (MI VecDblRegs:$src1, IntRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B")VecDblRegs128B:$src1, IntRegs:$src2), + (!cast(MI#"_128B")VecDblRegs128B:$src1, IntRegs:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VR_pat { + def: Pat<(IntID VectorRegs:$src1, IntRegs:$src2), + (MI VectorRegs:$src1, IntRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B")VectorRegs128B:$src1, IntRegs:$src2), + (!cast(MI#"_128B")VectorRegs128B:$src1, IntRegs:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WV_pat { + def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2), + (MI VecDblRegs:$src1, VectorRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2), + (!cast(MI#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WW_pat { + def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2), + (MI VecDblRegs:$src1, VecDblRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2), + (!cast(MI#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VV_pat { + def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2), + (MI VectorRegs:$src1, VectorRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2), + (!cast(MI#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_QR_pat { + def: Pat<(IntID VecPredRegs:$src1, IntRegs:$src2), + (MI VecPredRegs:$src1, IntRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VecPredRegs128B:$src1, + IntRegs:$src2), + (!cast(MI#"_128B") VecPredRegs128B:$src1, + IntRegs:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_QQ_pat { + def: Pat<(IntID VecPredRegs:$src1, VecPredRegs:$src2), + (MI VecPredRegs:$src1, VecPredRegs:$src2)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VecPredRegs128B:$src1, + VecPredRegs128B:$src2), + (!cast(MI#"_128B") VecPredRegs128B:$src1, + VecPredRegs128B:$src2)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WWR_pat { + def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3), + (MI VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3), + (!cast(MI#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VVR_pat { + def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3), + (MI VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3), + (!cast(MI#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WVR_pat { + def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, IntRegs:$src3), + (MI VecDblRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3), + (!cast(MI#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VWR_pat { + def: Pat<(IntID VectorRegs:$src1, VecDblRegs:$src2, IntRegs:$src3), + (MI VectorRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VectorRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3), + (!cast(MI#"_128B") VectorRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VVV_pat { + def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), + (MI VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3), + (!cast(MI#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WVV_pat { + def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), + (MI VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3), + (!cast(MI#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_QVV_pat { + def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), + (MI VecPredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VecPredRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3), + (!cast(MI#"_128B") VecPredRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VQR_pat { + def: Pat<(IntID VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3), + (MI VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VectorRegs128B:$src1, + VecPredRegs128B:$src2, + IntRegs:$src3), + (!cast(MI#"_128B") VectorRegs128B:$src1, + VecPredRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + + +multiclass T_QVR_pat { + def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2, IntRegs:$src3), + (MI VecPredRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VecPredRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3), + (!cast(MI#"_128B") VecPredRegs128B:$src1, + VectorRegs128B:$src2, + IntRegs:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VVI_pat { + def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, imm:$src3), + (MI VectorRegs:$src1, VectorRegs:$src2, imm:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, imm:$src3), + (!cast(MI#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, imm:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WRI_pat { + def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2, imm:$src3), + (MI VecDblRegs:$src1, IntRegs:$src2, imm:$src3)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VecDblRegs128B:$src1, + IntRegs:$src2, imm:$src3), + (!cast(MI#"_128B") VecDblRegs128B:$src1, + IntRegs:$src2, imm:$src3)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WWRI_pat { + def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3, imm:$src4), + (MI VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3, imm:$src4)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3, imm:$src4), + (!cast(MI#"_128B") VecDblRegs128B:$src1, + VecDblRegs128B:$src2, + IntRegs:$src3, imm:$src4)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_VVVR_pat { + def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, + IntRegs:$src4), + (MI VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, + IntRegs:$src4)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3, + IntRegs:$src4), + (!cast(MI#"_128B") VectorRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3, + IntRegs:$src4)>, + Requires<[UseHVXDbl]>; +} + +multiclass T_WVVR_pat { + def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, + IntRegs:$src4), + (MI VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, + IntRegs:$src4)>, + Requires<[UseHVXSgl]>; + + def: Pat<(!cast(IntID#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3, + IntRegs:$src4), + (!cast(MI#"_128B") VecDblRegs128B:$src1, + VectorRegs128B:$src2, + VectorRegs128B:$src3, + IntRegs:$src4)>, + Requires<[UseHVXDbl]>; +} + +defm : T_WR_pat; +defm : T_WR_pat ; +defm : T_VR_pat ; +defm : T_VR_pat ; +defm : T_VR_pat ; +defm : T_WR_pat ; +defm : T_VR_pat ; +defm : T_WR_pat ; +defm : T_VR_pat ; +defm : T_WR_pat ; +defm : T_VR_pat ; +defm : T_WR_pat ; +defm : T_WR_pat ; +defm : T_VR_pat ; +defm : T_WR_pat ; +defm : T_WR_pat ; +defm : T_VR_pat ; +defm : T_VR_pat ; +defm : T_VR_pat ; +defm : T_VR_pat ; +defm : T_VR_pat ; +defm : T_VR_pat ; +defm : T_VR_pat ; +defm : T_VR_pat ; +defm : T_VR_pat ; +defm : T_VR_pat ; +defm : T_VR_pat ; +defm : T_VR_pat ; +defm : T_VR_pat ; +defm : T_VR_pat ; +defm : T_WR_pat ; +defm : T_VR_pat ; + +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_WW_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_WW_pat ; +defm : T_WW_pat ; +defm : T_WW_pat ; +defm : T_WW_pat ; +defm : T_WW_pat ; +defm : T_WW_pat ; +defm : T_WW_pat ; +defm : T_WW_pat ; +defm : T_WW_pat ; +defm : T_WW_pat ; +defm : T_WW_pat ; +defm : T_WW_pat ; +defm : T_WW_pat ; +defm : T_WW_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_WW_pat ; + +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVR_pat ; + +defm : T_VWR_pat ; +defm : T_VWR_pat ; + +defm : T_WVR_pat ; +defm : T_WVR_pat ; +defm : T_WVR_pat ; +defm : T_WVR_pat ; + +defm : T_WWR_pat ; +defm : T_WWR_pat ; +defm : T_WWR_pat ; +defm : T_WWR_pat ; +defm : T_WWR_pat ; +defm : T_WWR_pat ; +defm : T_WWR_pat ; +defm : T_WWR_pat ; + +defm : T_VVV_pat ; +defm : T_WVV_pat ; +defm : T_WVV_pat ; +defm : T_WVV_pat ; +defm : T_WVV_pat ; +defm : T_VVV_pat ; +defm : T_VVV_pat ; +defm : T_VVV_pat ; +defm : T_VVV_pat ; +defm : T_VVV_pat ; +defm : T_WVV_pat ; +defm : T_WVV_pat ; +defm : T_VVV_pat ; +defm : T_VVV_pat ; +defm : T_VVV_pat ; + +// Compare instructions +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; + +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; + +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QVV_pat ; + +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; +defm : T_V_pat ; + +defm : T_WRI_pat ; +defm : T_WRI_pat ; +defm : T_WRI_pat ; + +defm : T_WWRI_pat ; +defm : T_WWRI_pat ; +defm : T_WWRI_pat ; + +// assembler mapped. +//defm : T_V_pat ; +// not present earlier.. need to add intrinsic +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVR_pat ; + +defm : T_VVR_pat ; +defm : T_VVR_pat ; + +defm : T_WV_pat ; +defm : T_WV_pat ; +defm : T_VVI_pat ; +defm : T_VVI_pat ; + +defm : T_QVV_pat ; +defm : T_QVV_pat ; +defm : T_QQ_pat ; +defm : T_QQ_pat ; +defm : T_Q_pat ; +defm : T_QQ_pat ; +defm : T_QQ_pat ; +defm : T_QQ_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; +defm : T_VV_pat ; + +defm : T_VQR_pat ; +defm : T_QVR_pat ; +defm : T_QR_pat ; +defm : T_R_pat ; +defm : T_R_pat ; +defm : T_VR_pat ; + +defm : T_VVR_pat ; +defm : T_VVR_pat ; +defm : T_VVVR_pat ; +defm : T_WVVR_pat ; + +defm : T_QVR_pat ; +def : T_PI_pat ; +def : T_RI_pat ; +def : T_PPI_pat ; +def : T_PPI_pat ; +def : T_PPI_pat ; +def : T_PPI_pat ; +def : T_PPI_pat ; +def : T_RRI_pat ; +def : T_RRI_pat ; +def : T_RRI_pat ; +def : T_RRI_pat ; +def : T_RRI_pat ; + +defm : T_VR_pat ; +defm : T_VR_pat ; + +def : T_PPQ_pat ; + +def: Pat<(v64i16 (trunc v64i32:$Vdd)), + (v64i16 (V6_vpackwh_sat_128B + (v32i32 (HEXAGON_V6_hi_128B VecDblRegs128B:$Vdd)), + (v32i32 (HEXAGON_V6_lo_128B VecDblRegs128B:$Vdd))))>, + Requires<[UseHVXDbl]>; + + diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp index 75189b696ea2..624c0f6cf49d 100644 --- a/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -26,39 +26,71 @@ using namespace llvm; -static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol, - HexagonAsmPrinter& Printer) { +namespace llvm { + void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI, + MCInst &MCB, HexagonAsmPrinter &AP); +} + +static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, + HexagonAsmPrinter &Printer) { MCContext &MC = Printer.OutContext; const MCExpr *ME; - ME = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, MC); + // Populate the relocation type based on Hexagon target flags + // set on an operand + MCSymbolRefExpr::VariantKind RelocationType; + switch (MO.getTargetFlags()) { + default: + RelocationType = MCSymbolRefExpr::VK_None; + break; + case HexagonII::MO_PCREL: + RelocationType = MCSymbolRefExpr::VK_Hexagon_PCREL; + break; + case HexagonII::MO_GOT: + RelocationType = MCSymbolRefExpr::VK_GOT; + break; + case HexagonII::MO_LO16: + RelocationType = MCSymbolRefExpr::VK_Hexagon_LO16; + break; + case HexagonII::MO_HI16: + RelocationType = MCSymbolRefExpr::VK_Hexagon_HI16; + break; + case HexagonII::MO_GPREL: + RelocationType = MCSymbolRefExpr::VK_Hexagon_GPREL; + break; + } + + ME = MCSymbolRefExpr::create(Symbol, RelocationType, MC); if (!MO.isJTI() && MO.getOffset()) ME = MCBinaryExpr::createAdd(ME, MCConstantExpr::create(MO.getOffset(), MC), MC); - return (MCOperand::createExpr(ME)); + return MCOperand::createExpr(ME); } // Create an MCInst from a MachineInstr -void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB, - HexagonAsmPrinter& AP) { - if(MI->getOpcode() == Hexagon::ENDLOOP0){ +void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI, + MCInst &MCB, HexagonAsmPrinter &AP) { + if (MI->getOpcode() == Hexagon::ENDLOOP0) { HexagonMCInstrInfo::setInnerLoop(MCB); return; } - if(MI->getOpcode() == Hexagon::ENDLOOP1){ + if (MI->getOpcode() == Hexagon::ENDLOOP1) { HexagonMCInstrInfo::setOuterLoop(MCB); return; } - MCInst* MCI = new (AP.OutContext) MCInst; + MCInst *MCI = new (AP.OutContext) MCInst; MCI->setOpcode(MI->getOpcode()); assert(MCI->getOpcode() == static_cast(MI->getOpcode()) && "MCI opcode should have been set on construction"); + bool MustExtend = false; for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) { const MachineOperand &MO = MI->getOperand(i); MCOperand MCO; + if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended) + MustExtend = true; switch (MO.getType()) { default: @@ -73,11 +105,14 @@ void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB, APFloat Val = MO.getFPImm()->getValueAPF(); // FP immediates are used only when setting GPRs, so they may be dealt // with like regular immediates from this point on. - MCO = MCOperand::createImm(*Val.bitcastToAPInt().getRawData()); + MCO = MCOperand::createExpr( + MCConstantExpr::create(*Val.bitcastToAPInt().getRawData(), + AP.OutContext)); break; } case MachineOperand::MO_Immediate: - MCO = MCOperand::createImm(MO.getImm()); + MCO = MCOperand::createExpr( + MCConstantExpr::create(MO.getImm(), AP.OutContext)); break; case MachineOperand::MO_MachineBasicBlock: MCO = MCOperand::createExpr @@ -104,5 +139,8 @@ void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB, MCI->addOperand(MCO); } + AP.HexagonProcessInstruction(*MCI, *MI); + HexagonMCInstrInfo::extendIfNeeded(AP.OutContext, MCII, MCB, *MCI, + MustExtend); MCB.addOperand(MCOperand::createInst(MCI)); } diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 35f732cd6207..7a52d6874c33 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -179,7 +179,11 @@ void VLIWMachineScheduler::schedule() { initQueues(TopRoots, BotRoots); bool IsTopNode = false; - while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + while (true) { + DEBUG(dbgs() << "** VLIWMachineScheduler::schedule picking next node\n"); + SUnit *SU = SchedImpl->pickNode(IsTopNode); + if (!SU) break; + if (!checkSchedLimit()) break; diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index 707bfdbb6ab6..20c4ab112b5f 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -92,6 +92,7 @@ namespace { /// \brief A handle to the branch probability pass. const MachineBranchProbabilityInfo *MBPI; + bool isNewValueJumpCandidate(const MachineInstr *MI) const; }; } // end of anonymous namespace @@ -280,9 +281,9 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, return true; } -// Given a compare operator, return a matching New Value Jump -// compare operator. Make sure that MI here is included in -// HexagonInstrInfo.cpp::isNewValueJumpCandidate + +// Given a compare operator, return a matching New Value Jump compare operator. +// Make sure that MI here is included in isNewValueJumpCandidate. static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg, bool secondRegNewified, MachineBasicBlock *jmpTarget, @@ -341,6 +342,24 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg, return taken ? Hexagon::J4_cmpgtui_t_jumpnv_t : Hexagon::J4_cmpgtui_t_jumpnv_nt; + case Hexagon::C4_cmpneq: + return taken ? Hexagon::J4_cmpeq_f_jumpnv_t + : Hexagon::J4_cmpeq_f_jumpnv_nt; + + case Hexagon::C4_cmplte: + if (secondRegNewified) + return taken ? Hexagon::J4_cmplt_f_jumpnv_t + : Hexagon::J4_cmplt_f_jumpnv_nt; + return taken ? Hexagon::J4_cmpgt_f_jumpnv_t + : Hexagon::J4_cmpgt_f_jumpnv_nt; + + case Hexagon::C4_cmplteu: + if (secondRegNewified) + return taken ? Hexagon::J4_cmpltu_f_jumpnv_t + : Hexagon::J4_cmpltu_f_jumpnv_nt; + return taken ? Hexagon::J4_cmpgtu_f_jumpnv_t + : Hexagon::J4_cmpgtu_f_jumpnv_nt; + default: llvm_unreachable("Could not find matching New Value Jump instruction."); } @@ -348,6 +367,26 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg, return 0; } +bool HexagonNewValueJump::isNewValueJumpCandidate(const MachineInstr *MI) + const { + switch (MI->getOpcode()) { + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtui: + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmplte: + case Hexagon::C4_cmplteu: + return true; + + default: + return false; + } +} + + bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" @@ -372,7 +411,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { // Loop through all the bb's of the function for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); MBBb != MBBe; ++MBBb) { - MachineBasicBlock* MBB = MBBb; + MachineBasicBlock *MBB = &*MBBb; DEBUG(dbgs() << "** dumping bb ** " << MBB->getNumber() << "\n"); @@ -468,7 +507,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { MI->getOperand(0).getReg() == predReg) { // Not all compares can be new value compare. Arch Spec: 7.6.1.1 - if (QII->isNewValueJumpCandidate(MI)) { + if (isNewValueJumpCandidate(MI)) { assert((MI->getDesc().isCompare()) && "Only compare instruction can be collapsed into New Value Jump"); @@ -591,8 +630,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DebugLoc dl = MI->getDebugLoc(); MachineInstr *NewMI; - assert((QII->isNewValueJumpCandidate(cmpInstr)) && - "This compare is not a New Value Jump candidate."); + assert((isNewValueJumpCandidate(cmpInstr)) && + "This compare is not a New Value Jump candidate."); unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2, isSecondOpNewified, jmpTarget, MBPI); diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td index 2bece8f42f53..fbd29cd4d6d1 100644 --- a/lib/Target/Hexagon/HexagonOperands.td +++ b/lib/Target/Hexagon/HexagonOperands.td @@ -1,4 +1,4 @@ -//===- HexagonOperands.td - Hexagon immediate processing -*- tablegen -*-===// +//===- HexagonImmediates.td - Hexagon immediate processing -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,59 +7,114 @@ // //===----------------------------------------------------------------------===// +def s32ImmOperand : AsmOperandClass { let Name = "s32Imm"; } +def s8ImmOperand : AsmOperandClass { let Name = "s8Imm"; } +def s8Imm64Operand : AsmOperandClass { let Name = "s8Imm64"; } +def s6ImmOperand : AsmOperandClass { let Name = "s6Imm"; } +def s4ImmOperand : AsmOperandClass { let Name = "s4Imm"; } def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; } def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; } def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; } def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; } - +def s4_6ImmOperand : AsmOperandClass { let Name = "s4_6Imm"; } +def s3_6ImmOperand : AsmOperandClass { let Name = "s3_6Imm"; } +def u64ImmOperand : AsmOperandClass { let Name = "u64Imm"; } +def u32ImmOperand : AsmOperandClass { let Name = "u32Imm"; } +def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; } +def u16ImmOperand : AsmOperandClass { let Name = "u16Imm"; } +def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; } +def u16_1ImmOperand : AsmOperandClass { let Name = "u16_1Imm"; } +def u16_2ImmOperand : AsmOperandClass { let Name = "u16_2Imm"; } +def u16_3ImmOperand : AsmOperandClass { let Name = "u16_3Imm"; } +def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; } +def u10ImmOperand : AsmOperandClass { let Name = "u10Imm"; } +def u9ImmOperand : AsmOperandClass { let Name = "u9Imm"; } +def u8ImmOperand : AsmOperandClass { let Name = "u8Imm"; } +def u7ImmOperand : AsmOperandClass { let Name = "u7Imm"; } +def u6ImmOperand : AsmOperandClass { let Name = "u6Imm"; } +def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; } +def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; } +def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; } +def u6_3ImmOperand : AsmOperandClass { let Name = "u6_3Imm"; } +def u5ImmOperand : AsmOperandClass { let Name = "u5Imm"; } +def u4ImmOperand : AsmOperandClass { let Name = "u4Imm"; } +def u3ImmOperand : AsmOperandClass { let Name = "u3Imm"; } +def u2ImmOperand : AsmOperandClass { let Name = "u2Imm"; } +def u1ImmOperand : AsmOperandClass { let Name = "u1Imm"; } +def n8ImmOperand : AsmOperandClass { let Name = "n8Imm"; } // Immediate operands. -let PrintMethod = "printImmOperand" in { - def s32Imm : Operand; - def s8Imm : Operand; - def s8Imm64 : Operand; - def s6Imm : Operand; +let OperandType = "OPERAND_IMMEDIATE", + DecoderMethod = "unsignedImmDecoder" in { + def s32Imm : Operand { let ParserMatchClass = s32ImmOperand; + let DecoderMethod = "s32ImmDecoder"; } + def s8Imm : Operand { let ParserMatchClass = s8ImmOperand; + let DecoderMethod = "s8ImmDecoder"; } + def s8Imm64 : Operand { let ParserMatchClass = s8Imm64Operand; + let DecoderMethod = "s8ImmDecoder"; } + def s6Imm : Operand { let ParserMatchClass = s6ImmOperand; + let DecoderMethod = "s6_0ImmDecoder"; } def s6_3Imm : Operand; - def s4Imm : Operand; - def s4_0Imm : Operand { let DecoderMethod = "s4_0ImmDecoder"; } - def s4_1Imm : Operand { let DecoderMethod = "s4_1ImmDecoder"; } - def s4_2Imm : Operand { let DecoderMethod = "s4_2ImmDecoder"; } - def s4_3Imm : Operand { let DecoderMethod = "s4_3ImmDecoder"; } - def u64Imm : Operand; - def u32Imm : Operand; - def u26_6Imm : Operand; - def u16Imm : Operand; - def u16_0Imm : Operand; - def u16_1Imm : Operand; - def u16_2Imm : Operand; - def u16_3Imm : Operand; - def u11_3Imm : Operand; - def u10Imm : Operand; - def u9Imm : Operand; - def u8Imm : Operand; - def u7Imm : Operand; - def u6Imm : Operand; - def u6_0Imm : Operand; - def u6_1Imm : Operand; - def u6_2Imm : Operand; - def u6_3Imm : Operand; - def u5Imm : Operand; + def s4Imm : Operand { let ParserMatchClass = s4ImmOperand; + let DecoderMethod = "s4_0ImmDecoder"; } + def s4_0Imm : Operand { let ParserMatchClass = s4_0ImmOperand; + let DecoderMethod = "s4_0ImmDecoder"; } + def s4_1Imm : Operand { let ParserMatchClass = s4_1ImmOperand; + let DecoderMethod = "s4_1ImmDecoder"; } + def s4_2Imm : Operand { let ParserMatchClass = s4_2ImmOperand; + let DecoderMethod = "s4_2ImmDecoder"; } + def s4_3Imm : Operand { let ParserMatchClass = s4_3ImmOperand; + let DecoderMethod = "s4_3ImmDecoder"; } + def u64Imm : Operand { let ParserMatchClass = u64ImmOperand; } + def u32Imm : Operand { let ParserMatchClass = u32ImmOperand; } + def u26_6Imm : Operand { let ParserMatchClass = u26_6ImmOperand; } + def u16Imm : Operand { let ParserMatchClass = u16ImmOperand; } + def u16_0Imm : Operand { let ParserMatchClass = u16_0ImmOperand; } + def u16_1Imm : Operand { let ParserMatchClass = u16_1ImmOperand; } + def u16_2Imm : Operand { let ParserMatchClass = u16_2ImmOperand; } + def u16_3Imm : Operand { let ParserMatchClass = u16_3ImmOperand; } + def u11_3Imm : Operand { let ParserMatchClass = u11_3ImmOperand; } + def u10Imm : Operand { let ParserMatchClass = u10ImmOperand; } + def u9Imm : Operand { let ParserMatchClass = u9ImmOperand; } + def u8Imm : Operand { let ParserMatchClass = u8ImmOperand; } + def u7Imm : Operand { let ParserMatchClass = u7ImmOperand; } + def u6Imm : Operand { let ParserMatchClass = u6ImmOperand; } + def u6_0Imm : Operand { let ParserMatchClass = u6_0ImmOperand; } + def u6_1Imm : Operand { let ParserMatchClass = u6_1ImmOperand; } + def u6_2Imm : Operand { let ParserMatchClass = u6_2ImmOperand; } + def u6_3Imm : Operand { let ParserMatchClass = u6_3ImmOperand; } + def u5Imm : Operand { let ParserMatchClass = u5ImmOperand; } + def u5_0Imm : Operand; + def u5_1Imm : Operand; def u5_2Imm : Operand; def u5_3Imm : Operand; - def u4Imm : Operand; + def u4Imm : Operand { let ParserMatchClass = u4ImmOperand; } def u4_0Imm : Operand; + def u4_1Imm : Operand; def u4_2Imm : Operand; - def u3Imm : Operand; + def u4_3Imm : Operand; + def u3Imm : Operand { let ParserMatchClass = u3ImmOperand; } def u3_0Imm : Operand; def u3_1Imm : Operand; - def u2Imm : Operand; - def u1Imm : Operand; - def n8Imm : Operand; - def m6Imm : Operand; + def u3_2Imm : Operand; + def u3_3Imm : Operand; + def u2Imm : Operand { let ParserMatchClass = u2ImmOperand; } + def u1Imm : Operand { let ParserMatchClass = u1ImmOperand; } + def n8Imm : Operand { let ParserMatchClass = n8ImmOperand; } } -let PrintMethod = "printNOneImmOperand" in -def nOneImm : Operand; +let OperandType = "OPERAND_IMMEDIATE" in { + def s4_6Imm : Operand { let ParserMatchClass = s4_6ImmOperand; + let PrintMethod = "prints4_6ImmOperand"; + let DecoderMethod = "s4_6ImmDecoder";} + def s4_7Imm : Operand { let PrintMethod = "prints4_7ImmOperand"; + let DecoderMethod = "s4_6ImmDecoder";} + def s3_6Imm : Operand { let ParserMatchClass = s3_6ImmOperand; + let PrintMethod = "prints3_6ImmOperand"; + let DecoderMethod = "s3_6ImmDecoder";} + def s3_7Imm : Operand { let PrintMethod = "prints3_7ImmOperand"; + let DecoderMethod = "s3_6ImmDecoder";} +} // // Immediate predicates @@ -81,32 +136,12 @@ def s31_1ImmPred : PatLeaf<(i32 imm), [{ def s30_2ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<31,1>(v); + return isShiftedInt<30,2>(v); }]>; def s29_3ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<31,1>(v); -}]>; - -def s22_10ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<22,10>(v); -}]>; - -def s8_24ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<8,24>(v); -}]>; - -def s16_16ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<16,16>(v); -}]>; - -def s26_6ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<26,6>(v); + return isShiftedInt<29,3>(v); }]>; def s16ImmPred : PatLeaf<(i32 imm), [{ @@ -114,16 +149,6 @@ def s16ImmPred : PatLeaf<(i32 imm), [{ return isInt<16>(v); }]>; -def s13ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isInt<13>(v); -}]>; - -def s12ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isInt<12>(v); -}]>; - def s11_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isInt<11>(v); @@ -149,16 +174,6 @@ def s10ImmPred : PatLeaf<(i32 imm), [{ return isInt<10>(v); }]>; -def s9ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isInt<9>(v); -}]>; - -def m9ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isInt<9>(v) && (v != -256); -}]>; - def s8ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isInt<8>(v); @@ -194,7 +209,6 @@ def s4_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4,3>(v); }]>; - def u64ImmPred : PatLeaf<(i64 imm), [{ // Adding "N ||" to suppress gcc unused warning. return (N || true); @@ -230,26 +244,31 @@ def u26_6ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<26,6>(v); }]>; -def u16ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isUInt<16>(v); -}]>; - -def u16_s8ImmPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedUInt<16,8>(v); -}]>; - def u16_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isUInt<16>(v); }]>; +def u16_1ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<16,1>(v); +}]>; + +def u16_2ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<16,2>(v); +}]>; + def u11_3ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isShiftedUInt<11,3>(v); }]>; +def u10ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<10>(v); +}]>; + def u9ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isUInt<9>(v); @@ -321,6 +340,11 @@ def u1ImmPred : PatLeaf<(i1 imm), [{ return isUInt<1>(v); }]>; +def u1ImmPred32 : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<1>(v); +}]>; + def m5BImmPred : PatLeaf<(i32 imm), [{ // m5BImmPred predicate - True if the (char) number is in range -1 .. -31 // and will fit in a 5 bit field when made positive, for use in memops. @@ -379,7 +403,7 @@ def Clr5ImmPred : PatLeaf<(i32 imm), [{ }]>; def SetClr5ImmPred : PatLeaf<(i32 imm), [{ - // SetClr5ImmPred predicate - True if the immediate is in range 0..31. + // True if the immediate is in range 0..31. int32_t v = (int32_t)N->getSExtValue(); return (v >= 0 && v <= 31); }]>; @@ -404,14 +428,13 @@ def Clr4ImmPred : PatLeaf<(i32 imm), [{ }]>; def SetClr4ImmPred : PatLeaf<(i32 imm), [{ - // SetClr4ImmPred predicate - True if the immediate is in the range 0..15. + // True if the immediate is in the range 0..15. int16_t v = (int16_t)N->getSExtValue(); return (v >= 0 && v <= 15); }]>; def Set3ImmPred : PatLeaf<(i32 imm), [{ - // Set3ImmPred predicate - True if the number is in the series of values: - // [ 2^0, 2^1, ... 2^7 ]. + // True if the number is in the series of values: [ 2^0, 2^1, ... 2^7 ]. // For use in setbit immediate. uint8_t v = (int8_t)N->getSExtValue(); // Constrain to 8 bits, and then check for single bit. @@ -419,9 +442,7 @@ def Set3ImmPred : PatLeaf<(i32 imm), [{ }]>; def Clr3ImmPred : PatLeaf<(i32 imm), [{ - // Clr3ImmPred predicate - True if the number is in the series of - // bit negated values: - // [ 2^0, 2^1, ... 2^7 ]. + // True if the number is in the series of bit negated values: [ 2^0, 2^1, ... 2^7 ]. // For use in setbit and clrbit immediate. uint8_t v = ~ (int8_t)N->getSExtValue(); // Constrain to 8 bits, and then check for single bit. @@ -429,76 +450,109 @@ def Clr3ImmPred : PatLeaf<(i32 imm), [{ }]>; def SetClr3ImmPred : PatLeaf<(i32 imm), [{ - // SetClr3ImmPred predicate - True if the immediate is in the range 0..7. + // True if the immediate is in the range 0..7. int8_t v = (int8_t)N->getSExtValue(); return (v >= 0 && v <= 7); }]>; // Extendable immediate operands. +def f32ExtOperand : AsmOperandClass { let Name = "f32Ext"; } +def s16ExtOperand : AsmOperandClass { let Name = "s16Ext"; } +def s12ExtOperand : AsmOperandClass { let Name = "s12Ext"; } +def s10ExtOperand : AsmOperandClass { let Name = "s10Ext"; } +def s9ExtOperand : AsmOperandClass { let Name = "s9Ext"; } +def s8ExtOperand : AsmOperandClass { let Name = "s8Ext"; } +def s7ExtOperand : AsmOperandClass { let Name = "s7Ext"; } +def s6ExtOperand : AsmOperandClass { let Name = "s6Ext"; } +def s11_0ExtOperand : AsmOperandClass { let Name = "s11_0Ext"; } +def s11_1ExtOperand : AsmOperandClass { let Name = "s11_1Ext"; } +def s11_2ExtOperand : AsmOperandClass { let Name = "s11_2Ext"; } +def s11_3ExtOperand : AsmOperandClass { let Name = "s11_3Ext"; } +def u6ExtOperand : AsmOperandClass { let Name = "u6Ext"; } +def u7ExtOperand : AsmOperandClass { let Name = "u7Ext"; } +def u8ExtOperand : AsmOperandClass { let Name = "u8Ext"; } +def u9ExtOperand : AsmOperandClass { let Name = "u9Ext"; } +def u10ExtOperand : AsmOperandClass { let Name = "u10Ext"; } +def u6_0ExtOperand : AsmOperandClass { let Name = "u6_0Ext"; } +def u6_1ExtOperand : AsmOperandClass { let Name = "u6_1Ext"; } +def u6_2ExtOperand : AsmOperandClass { let Name = "u6_2Ext"; } +def u6_3ExtOperand : AsmOperandClass { let Name = "u6_3Ext"; } +def u32MustExtOperand : AsmOperandClass { let Name = "u32MustExt"; } -let PrintMethod = "printExtOperand" in { - def f32Ext : Operand; - def s16Ext : Operand { let DecoderMethod = "s16ImmDecoder"; } - def s12Ext : Operand { let DecoderMethod = "s12ImmDecoder"; } - def s11_0Ext : Operand { let DecoderMethod = "s11_0ImmDecoder"; } - def s11_1Ext : Operand { let DecoderMethod = "s11_1ImmDecoder"; } - def s11_2Ext : Operand { let DecoderMethod = "s11_2ImmDecoder"; } - def s11_3Ext : Operand { let DecoderMethod = "s11_3ImmDecoder"; } - def s10Ext : Operand { let DecoderMethod = "s10ImmDecoder"; } - def s9Ext : Operand { let DecoderMethod = "s90ImmDecoder"; } - def s8Ext : Operand { let DecoderMethod = "s8ImmDecoder"; } - def s7Ext : Operand; - def s6Ext : Operand { let DecoderMethod = "s6_0ImmDecoder"; } - def u6Ext : Operand; - def u7Ext : Operand; - def u8Ext : Operand; - def u9Ext : Operand; - def u10Ext : Operand; - def u6_0Ext : Operand; - def u6_1Ext : Operand; - def u6_2Ext : Operand; - def u6_3Ext : Operand; + + +let OperandType = "OPERAND_IMMEDIATE", PrintMethod = "printExtOperand", + DecoderMethod = "unsignedImmDecoder" in { + def f32Ext : Operand { let ParserMatchClass = f32ExtOperand; } + def s16Ext : Operand { let ParserMatchClass = s16ExtOperand; + let DecoderMethod = "s16ImmDecoder"; } + def s12Ext : Operand { let ParserMatchClass = s12ExtOperand; + let DecoderMethod = "s12ImmDecoder"; } + def s11_0Ext : Operand { let ParserMatchClass = s11_0ExtOperand; + let DecoderMethod = "s11_0ImmDecoder"; } + def s11_1Ext : Operand { let ParserMatchClass = s11_1ExtOperand; + let DecoderMethod = "s11_1ImmDecoder"; } + def s11_2Ext : Operand { let ParserMatchClass = s11_2ExtOperand; + let DecoderMethod = "s11_2ImmDecoder"; } + def s11_3Ext : Operand { let ParserMatchClass = s11_3ExtOperand; + let DecoderMethod = "s11_3ImmDecoder"; } + def s10Ext : Operand { let ParserMatchClass = s10ExtOperand; + let DecoderMethod = "s10ImmDecoder"; } + def s9Ext : Operand { let ParserMatchClass = s9ExtOperand; + let DecoderMethod = "s90ImmDecoder"; } + def s8Ext : Operand { let ParserMatchClass = s8ExtOperand; + let DecoderMethod = "s8ImmDecoder"; } + def s7Ext : Operand { let ParserMatchClass = s7ExtOperand; } + def s6Ext : Operand { let ParserMatchClass = s6ExtOperand; + let DecoderMethod = "s6_0ImmDecoder"; } + def u6Ext : Operand { let ParserMatchClass = u6ExtOperand; } + def u7Ext : Operand { let ParserMatchClass = u7ExtOperand; } + def u8Ext : Operand { let ParserMatchClass = u8ExtOperand; } + def u9Ext : Operand { let ParserMatchClass = u9ExtOperand; } + def u10Ext : Operand { let ParserMatchClass = u10ExtOperand; } + def u6_0Ext : Operand { let ParserMatchClass = u6_0ExtOperand; } + def u6_1Ext : Operand { let ParserMatchClass = u6_1ExtOperand; } + def u6_2Ext : Operand { let ParserMatchClass = u6_2ExtOperand; } + def u6_3Ext : Operand { let ParserMatchClass = u6_3ExtOperand; } + def u32MustExt : Operand { let ParserMatchClass = u32MustExtOperand; } } -def s10ExtPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - if (isInt<10>(v)) - return true; - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); +def s4_7ImmPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + if (HST->hasV60TOps()) + // Return true if the immediate can fit in a 10-bit sign extended field and + // is 128-byte aligned. + return isShiftedInt<4,7>(v); + return false; }]>; -def s8ExtPred : PatLeaf<(i32 imm), [{ +def s3_7ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (isInt<8>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit signed field. - return isConstExtProfitable(Node) && isInt<32>(v); + if (HST->hasV60TOps()) + // Return true if the immediate can fit in a 9-bit sign extended field and + // is 128-byte aligned. + return isShiftedInt<3,7>(v); + return false; }]>; -def u8ExtPred : PatLeaf<(i32 imm), [{ +def s4_6ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<8>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); + if (HST->hasV60TOps()) + // Return true if the immediate can fit in a 10-bit sign extended field and + // is 64-byte aligned. + return isShiftedInt<4,6>(v); + return false; }]>; -def u9ExtPred : PatLeaf<(i32 imm), [{ +def s3_6ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); - if (isUInt<9>(v)) - return true; - - // Return true if extending this immediate is profitable and the value - // can fit in a 32-bit unsigned field. - return isConstExtProfitable(Node) && isUInt<32>(v); + if (HST->hasV60TOps()) + // Return true if the immediate can fit in a 9-bit sign extended field and + // is 64-byte aligned. + return isShiftedInt<3,6>(v); + return false; }]>; @@ -523,21 +577,21 @@ let PrintMethod = "printGlobalOperand" in { let PrintMethod = "printJumpTable" in def jumptablebase : Operand; -def brtarget : Operand; -def brtargetExt : Operand { - let PrintMethod = "printExtBrtarget"; +def brtarget : Operand { + let DecoderMethod = "brtargetDecoder"; + let PrintMethod = "printBrtarget"; +} +def brtargetExt : Operand { + let DecoderMethod = "brtargetDecoder"; + let PrintMethod = "printBrtarget"; +} +def calltarget : Operand { + let DecoderMethod = "brtargetDecoder"; + let PrintMethod = "printBrtarget"; } -def calltarget : Operand; def bblabel : Operand; -def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf , [], "BasicBlockSDNode">; - -def symbolHi32 : Operand { - let PrintMethod = "printSymbolHi"; -} -def symbolLo32 : Operand { - let PrintMethod = "printSymbolLo"; -} +def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">; // Return true if for a 32 to 64-bit sign-extended load. def is_sext_i32 : PatLeaf<(i64 DoubleRegs:$src1), [{ diff --git a/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp new file mode 100644 index 000000000000..1723771550c9 --- /dev/null +++ b/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp @@ -0,0 +1,150 @@ +//===- HexagonOptimizeSZextends.cpp - Remove unnecessary argument extends -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Pass that removes sign extends for function parameters. These parameters +// are already sign extended by the caller per Hexagon's ABI +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" + +#include "Hexagon.h" + +using namespace llvm; + +namespace llvm { + FunctionPass *createHexagonOptimizeSZextends(); + void initializeHexagonOptimizeSZextendsPass(PassRegistry&); +} + +namespace { + struct HexagonOptimizeSZextends : public FunctionPass { + public: + static char ID; + HexagonOptimizeSZextends() : FunctionPass(ID) { + initializeHexagonOptimizeSZextendsPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override; + + const char *getPassName() const override { + return "Remove sign extends"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + FunctionPass::getAnalysisUsage(AU); + } + + bool intrinsicAlreadySextended(Intrinsic::ID IntID); + }; +} + +char HexagonOptimizeSZextends::ID = 0; + +INITIALIZE_PASS(HexagonOptimizeSZextends, "reargs", + "Remove Sign and Zero Extends for Args", false, false) + +bool HexagonOptimizeSZextends::intrinsicAlreadySextended(Intrinsic::ID IntID) { + switch(IntID) { + case llvm::Intrinsic::hexagon_A2_addh_l16_sat_ll: + return true; + default: + break; + } + return false; +} + +bool HexagonOptimizeSZextends::runOnFunction(Function &F) { + unsigned Idx = 1; + // Try to optimize sign extends in formal parameters. It's relying on + // callee already sign extending the values. I'm not sure if our ABI + // requires callee to sign extend though. + for (auto &Arg : F.args()) { + if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) { + if (!isa(Arg.getType())) { + for (auto UI = Arg.use_begin(); UI != Arg.use_end();) { + if (isa(*UI)) { + Instruction* Use = cast(*UI); + SExtInst* SI = new SExtInst(&Arg, Use->getType()); + assert (EVT::getEVT(SI->getType()) == + (EVT::getEVT(Use->getType()))); + ++UI; + Use->replaceAllUsesWith(SI); + Instruction* First = &F.getEntryBlock().front(); + SI->insertBefore(First); + Use->eraseFromParent(); + } else { + ++UI; + } + } + } + } + ++Idx; + } + + // Try to remove redundant sext operations on Hexagon. The hardware + // already sign extends many 16 bit intrinsic operations to 32 bits. + // For example: + // %34 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %x, i32 %y) + // %sext233 = shl i32 %34, 16 + // %conv52 = ashr exact i32 %sext233, 16 + for (auto &B : F) { + for (auto &I : B) { + // Look for arithmetic shift right by 16. + BinaryOperator *Ashr = dyn_cast(&I); + if (!(Ashr && Ashr->getOpcode() == Instruction::AShr)) + continue; + Value *AshrOp1 = Ashr->getOperand(1); + ConstantInt *C = dyn_cast(AshrOp1); + // Right shifted by 16. + if (!(C && C->getSExtValue() == 16)) + continue; + + // The first operand of Ashr comes from logical shift left. + Instruction *Shl = dyn_cast(Ashr->getOperand(0)); + if (!(Shl && Shl->getOpcode() == Instruction::Shl)) + continue; + Value *Intr = Shl->getOperand(0); + Value *ShlOp1 = Shl->getOperand(1); + C = dyn_cast(ShlOp1); + // Left shifted by 16. + if (!(C && C->getSExtValue() == 16)) + continue; + + // The first operand of Shl comes from an intrinsic. + if (IntrinsicInst *I = dyn_cast(Intr)) { + if (!intrinsicAlreadySextended(I->getIntrinsicID())) + continue; + // All is well. Replace all uses of AShr with I. + for (auto UI = Ashr->user_begin(), UE = Ashr->user_end(); + UI != UE; ++UI) { + const Use &TheUse = UI.getUse(); + if (Instruction *J = dyn_cast(TheUse.getUser())) { + J->replaceUsesOfWith(Ashr, I); + } + } + } + } + } + + return true; +} + + +FunctionPass *llvm::createHexagonOptimizeSZextends() { + return new HexagonOptimizeSZextends(); +} diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index 93dcbe233b25..e68ff85b1da6 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -124,7 +124,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); MBBb != MBBe; ++MBBb) { - MachineBasicBlock* MBB = MBBb; + MachineBasicBlock *MBB = &*MBBb; PeepholeMap.clear(); PeepholeDoubleRegsMap.clear(); @@ -180,7 +180,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { unsigned DstReg = Dst.getReg(); unsigned SrcReg = Src1.getReg(); PeepholeDoubleRegsMap[DstReg] = - std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/); + std::make_pair(*&SrcReg, Hexagon::subreg_hireg); } // Look for P=NOT(P). diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index f6bb4a045438..61c0589fb5bf 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -66,6 +66,8 @@ HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF) const { switch (HST.getHexagonArchVersion()) { case HexagonSubtarget::V4: case HexagonSubtarget::V5: + case HexagonSubtarget::V55: + case HexagonSubtarget::V60: return CallerSavedRegsV4; } llvm_unreachable( @@ -84,6 +86,8 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { switch (MF->getSubtarget().getHexagonArchVersion()) { case HexagonSubtarget::V4: case HexagonSubtarget::V5: + case HexagonSubtarget::V55: + case HexagonSubtarget::V60: return CalleeSavedRegsV3; } llvm_unreachable("Callee saved registers requested for unknown architecture " @@ -98,7 +102,7 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF) Reserved.set(Hexagon::R29); Reserved.set(Hexagon::R30); Reserved.set(Hexagon::R31); - Reserved.set(Hexagon::D14); + Reserved.set(Hexagon::PC); Reserved.set(Hexagon::D15); Reserved.set(Hexagon::LC0); Reserved.set(Hexagon::LC1); @@ -116,62 +120,21 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; - MachineBasicBlock &MB = *MI.getParent(); MachineFunction &MF = *MB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - auto &HST = static_cast(MF.getSubtarget()); + auto &HST = MF.getSubtarget(); auto &HII = *HST.getInstrInfo(); auto &HFI = *HST.getFrameLowering(); + unsigned BP = 0; int FI = MI.getOperand(FIOp).getIndex(); - int Offset = MFI.getObjectOffset(FI) + MI.getOperand(FIOp+1).getImm(); - bool HasAlloca = MFI.hasVarSizedObjects(); - bool HasAlign = needsStackRealignment(MF); - - // XXX: Fixed objects cannot be accessed through SP if there are aligned - // objects in the local frame, or if there are dynamically allocated objects. - // In such cases, there has to be FP available. - if (!HFI.hasFP(MF)) { - assert(!HasAlloca && !HasAlign && "This function must have frame pointer"); - // We will not reserve space on the stack for the lr and fp registers. - Offset -= 8; - } - - unsigned SP = getStackRegister(), FP = getFrameRegister(); - unsigned AP = 0; - if (MachineInstr *AI = HFI.getAlignaInstr(MF)) - AP = AI->getOperand(0).getReg(); - unsigned FrameSize = MFI.getStackSize(); - - // Special handling of dbg_value instructions and INLINEASM. - if (MI.isDebugValue() || MI.isInlineAsm()) { - MI.getOperand(FIOp).ChangeToRegister(SP, false /*isDef*/); - MI.getOperand(FIOp+1).ChangeToImmediate(Offset+FrameSize); - return; - } - - bool UseFP = false, UseAP = false; // Default: use SP. - if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) { - UseFP = HasAlloca || HasAlign; - } else { - if (HasAlloca) { - if (HasAlign) - UseAP = true; - else - UseFP = true; - } - } + // Select the base pointer (BP) and calculate the actual offset from BP + // to the beginning of the object at index FI. + int Offset = HFI.getFrameIndexReference(MF, FI, BP); + // Add the offset from the instruction. + int RealOffset = Offset + MI.getOperand(FIOp+1).getImm(); unsigned Opc = MI.getOpcode(); - bool ValidSP = HII.isValidOffset(Opc, FrameSize+Offset); - bool ValidFP = HII.isValidOffset(Opc, Offset); - - // Calculate the actual offset in the instruction. - int64_t RealOffset = Offset; - if (!UseFP && !UseAP) - RealOffset = FrameSize+Offset; - switch (Opc) { case Hexagon::TFR_FIA: MI.setDesc(HII.get(Hexagon::A2_addi)); @@ -184,20 +147,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, break; } - unsigned BP = 0; - bool Valid = false; - if (UseFP) { - BP = FP; - Valid = ValidFP; - } else if (UseAP) { - BP = AP; - Valid = ValidFP; - } else { - BP = SP; - Valid = ValidSP; - } - - if (Valid) { + if (HII.isValidOffset(Opc, RealOffset)) { MI.getOperand(FIOp).ChangeToRegister(BP, false); MI.getOperand(FIOp+1).ChangeToImmediate(RealOffset); return; @@ -223,8 +173,8 @@ unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const HexagonFrameLowering *TFI = getFrameLowering(MF); if (TFI->hasFP(MF)) - return Hexagon::R30; - return Hexagon::R29; + return getFrameRegister(); + return getStackRegister(); } @@ -238,17 +188,9 @@ unsigned HexagonRegisterInfo::getStackRegister() const { } -bool -HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { - const HexagonFrameLowering *TFI = getFrameLowering(MF); - return TFI->hasFP(MF); -} - - -bool -HexagonRegisterInfo::needsStackRealignment(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->getMaxAlignment() > 8; +bool HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) + const { + return MF.getSubtarget().getFrameLowering()->hasFP(MF); } diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index 7edefee93993..db7e0f27815d 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -63,8 +63,6 @@ public: return true; } - bool needsStackRealignment(const MachineFunction &MF) const override; - /// Returns true if the frame pointer is valid. bool useFPForScavengingIndex(const MachineFunction &MF) const override; diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td index edf1c251ac77..81629dc6d47f 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -53,6 +53,12 @@ let Namespace = "Hexagon" in { let Num = num; } + + // Rq - vector predicate registers + class Rq num, string n> : Register { + let HWEncoding{2-0} = num; + } + // Rc - control registers class Rc num, string n, list alt = [], list alias = []> : @@ -131,20 +137,21 @@ let Namespace = "Hexagon" in { def LC1 : Rc<3, "lc1", ["c3"]>, DwarfRegNum<[70]>; def P3_0 : Rc<4, "p3:0", ["c4"], [P0, P1, P2, P3]>, DwarfRegNum<[71]>; - def C6 : Rc<6, "c6", [], [M0]>, DwarfRegNum<[72]>; - def C7 : Rc<7, "c7", [], [M1]>, DwarfRegNum<[73]>; + def C5 : Rc<5, "c5", ["c5"]>, DwarfRegNum<[72]>; // future use + def C6 : Rc<6, "c6", [], [M0]>, DwarfRegNum<[73]>; + def C7 : Rc<7, "c7", [], [M1]>, DwarfRegNum<[74]>; - def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[74]> { + def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[75]> { let SubRegIndices = [subreg_overflow]; let SubRegs = [USR_OVF]; } - def PC : Rc<9, "pc">, DwarfRegNum<[75]>; - def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[76]>; - def GP : Rc<11, "gp">, DwarfRegNum<[77]>; - def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[78]>; - def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[79]>; - def UPCL : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[80]>; - def UPCH : Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[81]>; + def PC : Rc<9, "pc">, DwarfRegNum<[76]>; + def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>; + def GP : Rc<11, "gp">, DwarfRegNum<[78]>; + def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>; + def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[80]>; + def UPCL : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[81]>; + def UPCH : Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[82]>; } // Control registers pairs. @@ -158,6 +165,36 @@ let Namespace = "Hexagon" in { def UPC : Rcc<14, "c15:14", [UPCL, UPCH]>, DwarfRegNum<[80]>; } + foreach i = 0-31 in { + def V#i : Ri, DwarfRegNum<[!add(i, 99)]>; + } + + // Aliases of the V* registers used to hold double vec values. + let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in { + def W0 : Rd< 0, "v1:0", [V0, V1]>, DwarfRegNum<[99]>; + def W1 : Rd< 2, "v3:2", [V2, V3]>, DwarfRegNum<[101]>; + def W2 : Rd< 4, "v5:4", [V4, V5]>, DwarfRegNum<[103]>; + def W3 : Rd< 6, "v7:6", [V6, V7]>, DwarfRegNum<[105]>; + def W4 : Rd< 8, "v9:8", [V8, V9]>, DwarfRegNum<[107]>; + def W5 : Rd<10, "v11:10", [V10, V11]>, DwarfRegNum<[109]>; + def W6 : Rd<12, "v13:12", [V12, V13]>, DwarfRegNum<[111]>; + def W7 : Rd<14, "v15:14", [V14, V15]>, DwarfRegNum<[113]>; + def W8 : Rd<16, "v17:16", [V16, V17]>, DwarfRegNum<[115]>; + def W9 : Rd<18, "v19:18", [V18, V19]>, DwarfRegNum<[117]>; + def W10 : Rd<20, "v21:20", [V20, V21]>, DwarfRegNum<[119]>; + def W11 : Rd<22, "v23:22", [V22, V23]>, DwarfRegNum<[121]>; + def W12 : Rd<24, "v25:24", [V24, V25]>, DwarfRegNum<[123]>; + def W13 : Rd<26, "v27:26", [V26, V27]>, DwarfRegNum<[125]>; + def W14 : Rd<28, "v29:28", [V28, V29]>, DwarfRegNum<[127]>; + def W15 : Rd<30, "v31:30", [V30, V31]>, DwarfRegNum<[129]>; + } + + // Vector Predicate registers. + def Q0 : Rq<0, "q0">, DwarfRegNum<[131]>; + def Q1 : Rq<1, "q1">, DwarfRegNum<[132]>; + def Q2 : Rq<2, "q2">, DwarfRegNum<[133]>; + def Q3 : Rq<3, "q3">, DwarfRegNum<[134]>; + // Register classes. // // FIXME: the register order should be defined in terms of the preferred @@ -169,10 +206,34 @@ def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32, R10, R11, R29, R30, R31)> { } +// Registers are listed in reverse order for allocation preference reasons. +def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32, + (add R7, R6, R5, R4, R3, R2, R1, R0)> ; + def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64, (add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>; +def VectorRegs : RegisterClass<"Hexagon", [v64i8, v32i16, v16i32, v8i64], 512, + (add (sequence "V%u", 0, 31))>; + +def VecDblRegs : RegisterClass<"Hexagon", + [v128i8, v64i16, v32i32, v16i64], 1024, + (add (sequence "W%u", 0, 15))>; + +def VectorRegs128B : RegisterClass<"Hexagon", + [v128i8, v64i16, v32i32, v16i64], 1024, + (add (sequence "V%u", 0, 31))>; + +def VecDblRegs128B : RegisterClass<"Hexagon", + [v256i8,v128i16,v64i32,v32i64], 2048, + (add (sequence "W%u", 0, 15))>; + +def VecPredRegs : RegisterClass<"Hexagon", [v512i1], 512, + (add (sequence "Q%u", 0, 3))>; + +def VecPredRegs128B : RegisterClass<"Hexagon", [v1024i1], 1024, + (add (sequence "Q%u", 0, 3))>; def PredRegs : RegisterClass<"Hexagon", [i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32, diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp deleted file mode 100644 index 7069ad36e21a..000000000000 --- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp +++ /dev/null @@ -1,91 +0,0 @@ -//===- HexagonRemoveExtendArgs.cpp - Remove unnecessary argument sign extends // -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Pass that removes sign extends for function parameters. These parameters -// are already sign extended by the caller per Hexagon's ABI -// -//===----------------------------------------------------------------------===// - -#include "Hexagon.h" -#include "HexagonTargetMachine.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/StackProtector.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" - -using namespace llvm; - -namespace llvm { - FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM); - void initializeHexagonRemoveExtendArgsPass(PassRegistry&); -} - -namespace { - struct HexagonRemoveExtendArgs : public FunctionPass { - public: - static char ID; - HexagonRemoveExtendArgs() : FunctionPass(ID) { - initializeHexagonRemoveExtendArgsPass(*PassRegistry::getPassRegistry()); - } - bool runOnFunction(Function &F) override; - - const char *getPassName() const override { - return "Remove sign extends"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); - FunctionPass::getAnalysisUsage(AU); - } - }; -} - -char HexagonRemoveExtendArgs::ID = 0; - -INITIALIZE_PASS(HexagonRemoveExtendArgs, "reargs", - "Remove Sign and Zero Extends for Args", false, false) - -bool HexagonRemoveExtendArgs::runOnFunction(Function &F) { - unsigned Idx = 1; - for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; - ++AI, ++Idx) { - if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) { - Argument* Arg = AI; - if (!isa(Arg->getType())) { - for (auto UI = Arg->user_begin(); UI != Arg->user_end();) { - if (isa(*UI)) { - Instruction* I = cast(*UI); - SExtInst* SI = new SExtInst(Arg, I->getType()); - assert (EVT::getEVT(SI->getType()) == - (EVT::getEVT(I->getType()))); - ++UI; - I->replaceAllUsesWith(SI); - Instruction* First = F.getEntryBlock().begin(); - SI->insertBefore(First); - I->eraseFromParent(); - } else { - ++UI; - } - } - } - } - } - return true; -} - - - -FunctionPass* -llvm::createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM) { - return new HexagonRemoveExtendArgs(); -} diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td index 528cafc2bfea..6e4987b7e4e3 100644 --- a/lib/Target/Hexagon/HexagonSchedule.td +++ b/lib/Target/Hexagon/HexagonSchedule.td @@ -13,6 +13,12 @@ include "HexagonScheduleV4.td" +// V55 Machine Info + +include "HexagonScheduleV55.td" + //===----------------------------------------------------------------------===// -// V4 Machine Info - +// V60 Machine Info - //===----------------------------------------------------------------------===// + +include "HexagonScheduleV60.td" + diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td index a7d2d4724d0b..67af147b25b3 100644 --- a/lib/Target/Hexagon/HexagonScheduleV4.td +++ b/lib/Target/Hexagon/HexagonScheduleV4.td @@ -35,10 +35,11 @@ def SLOT_ENDLOOP: FuncUnit; // Itinerary classes. def PSEUDO : InstrItinClass; -def PSEUDOM : InstrItinClass; +def PSEUDOM : InstrItinClass; // ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4. def DUPLEX : InstrItinClass; def PREFIX : InstrItinClass; +def COMPOUND_CJ_ARCHDEPSLOT : InstrItinClass; def COMPOUND : InstrItinClass; def ALU32_2op_tc_1_SLOT0123 : InstrItinClass; @@ -58,6 +59,7 @@ def CR_tc_2early_SLOT3 : InstrItinClass; def CR_tc_3x_SLOT23 : InstrItinClass; def CR_tc_3x_SLOT3 : InstrItinClass; def J_tc_2early_SLOT23 : InstrItinClass; +def J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT : InstrItinClass; def J_tc_2early_SLOT2 : InstrItinClass; def LD_tc_ld_SLOT01 : InstrItinClass; def LD_tc_ld_SLOT0 : InstrItinClass; @@ -91,6 +93,7 @@ def V4LDST_tc_st_SLOT0 : InstrItinClass; def V4LDST_tc_st_SLOT01 : InstrItinClass; def J_tc_2early_SLOT0123 : InstrItinClass; def EXTENDER_tc_1_SLOT0123 : InstrItinClass; +def S_3op_tc_3stall_SLOT23 : InstrItinClass; def HexagonItinerariesV4 : diff --git a/lib/Target/Hexagon/HexagonScheduleV55.td b/lib/Target/Hexagon/HexagonScheduleV55.td new file mode 100644 index 000000000000..d9ad25d4cd5a --- /dev/null +++ b/lib/Target/Hexagon/HexagonScheduleV55.td @@ -0,0 +1,170 @@ +//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine. +// This file describes that machine information. + +// +// |===========|==================================================| +// | PIPELINE | Instruction Classes | +// |===========|==================================================| +// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM | +// |-----------|--------------------------------------------------| +// | SLOT1 | LD ST ALU32 | +// |-----------|--------------------------------------------------| +// | SLOT2 | XTYPE ALU32 J JR | +// |-----------|--------------------------------------------------| +// | SLOT3 | XTYPE ALU32 J CR | +// |===========|==================================================| + +def CJ_tc_1_SLOT23 : InstrItinClass; +def CJ_tc_2early_SLOT23 : InstrItinClass; +def COPROC_VMEM_vtc_long_SLOT01 : InstrItinClass; +def COPROC_VX_vtc_long_SLOT23 : InstrItinClass; +def COPROC_VX_vtc_SLOT23 : InstrItinClass; +def J_tc_3stall_SLOT2 : InstrItinClass; +def MAPPING_tc_1_SLOT0123 : InstrItinClass; +def M_tc_3stall_SLOT23 : InstrItinClass; +def SUBINSN_tc_1_SLOT01 : InstrItinClass; +def SUBINSN_tc_2early_SLOT0 : InstrItinClass; +def SUBINSN_tc_2early_SLOT01 : InstrItinClass; +def SUBINSN_tc_3stall_SLOT0 : InstrItinClass; +def SUBINSN_tc_ld_SLOT0 : InstrItinClass; +def SUBINSN_tc_ld_SLOT01 : InstrItinClass; +def SUBINSN_tc_st_SLOT01 : InstrItinClass; + +def HexagonItinerariesV55 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [ + // ALU32 + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // ALU64 + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // CR -> System + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // Jump (conditional/unconditional/return etc) + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // JR + InstrItinData]>, + InstrItinData]>, + + // Extender + InstrItinData]>, + + // Load + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // M + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // Store + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // Subinsn + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // S + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // New Value Compare Jump + InstrItinData]>, + + // Mem ops + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // Endloop + InstrItinData]>, + + // Vector + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // Misc + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData, + InstrStage<1, [SLOT2, SLOT3]>]> + + ]>; + +def HexagonModelV55 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV55; + let LoadLatency = 1; +} + +//===----------------------------------------------------------------------===// +// Hexagon V4 Resource Definitions - +//===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonScheduleV60.td b/lib/Target/Hexagon/HexagonScheduleV60.td new file mode 100644 index 000000000000..2ccff8242a47 --- /dev/null +++ b/lib/Target/Hexagon/HexagonScheduleV60.td @@ -0,0 +1,310 @@ +//=-HexagonScheduleV60.td - HexagonV60 Scheduling Definitions *- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// CVI pipes from the "Hexagon Multimedia Co-Processor Extensions Arch Spec". +def CVI_ST : FuncUnit; +def CVI_XLANE : FuncUnit; +def CVI_SHIFT : FuncUnit; +def CVI_MPY0 : FuncUnit; +def CVI_MPY1 : FuncUnit; +def CVI_LD : FuncUnit; + +// Combined functional units. +def CVI_XLSHF : FuncUnit; +def CVI_MPY01 : FuncUnit; +def CVI_ALL : FuncUnit; + +// Combined functional unit data. +def HexagonComboFuncsV60 : + ComboFuncUnits<[ + ComboFuncData, + ComboFuncData, + ComboFuncData + ]>; + +// Note: When adding additional vector scheduling classes, add the +// corresponding methods to the class HexagonInstrInfo. +def CVI_VA : InstrItinClass; +def CVI_VA_DV : InstrItinClass; +def CVI_VX_LONG : InstrItinClass; +def CVI_VX_LATE : InstrItinClass; +def CVI_VX : InstrItinClass; +def CVI_VX_DV_LONG : InstrItinClass; +def CVI_VX_DV : InstrItinClass; +def CVI_VX_DV_SLOT2 : InstrItinClass; +def CVI_VP : InstrItinClass; +def CVI_VP_LONG : InstrItinClass; +def CVI_VP_VS_EARLY : InstrItinClass; +def CVI_VP_VS_LONG_EARLY : InstrItinClass; +def CVI_VP_VS_LONG : InstrItinClass; +def CVI_VP_VS : InstrItinClass; +def CVI_VP_DV : InstrItinClass; +def CVI_VS : InstrItinClass; +def CVI_VINLANESAT : InstrItinClass; +def CVI_VM_LD : InstrItinClass; +def CVI_VM_TMP_LD : InstrItinClass; +def CVI_VM_CUR_LD : InstrItinClass; +def CVI_VM_VP_LDU : InstrItinClass; +def CVI_VM_ST : InstrItinClass; +def CVI_VM_NEW_ST : InstrItinClass; +def CVI_VM_STU : InstrItinClass; +def CVI_HIST : InstrItinClass; +def CVI_VA_EXT : InstrItinClass; + +// There are four SLOTS (four parallel pipelines) in Hexagon V60 machine. +// This file describes that machine information. +// +// |===========|==================================================| +// | PIPELINE | Instruction Classes | +// |===========|==================================================| +// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM | +// |-----------|--------------------------------------------------| +// | SLOT1 | LD ST ALU32 | +// |-----------|--------------------------------------------------| +// | SLOT2 | XTYPE ALU32 J JR | +// |-----------|--------------------------------------------------| +// | SLOT3 | XTYPE ALU32 J CR | +// |===========|==================================================| +// +// +// In addition to using the above SLOTS, there are also six vector pipelines +// in the CVI co-processor in the Hexagon V60 machine. +// +// |=========| |=========| |=========| |=========| |=========| |=========| +// SLOT | CVI_LD | |CVI_MPY3 | |CVI_MPY2 | |CVI_SHIFT| |CVI_XLANE| | CVI_ST | +// ==== |=========| |=========| |=========| |=========| |=========| |=========| +// S0-3 | | | CVI_VA | | CVI_VA | | CVI_VA | | CVI_VA | | | +// S2-3 | | | CVI_VX | | CVI_VX | | | | | | | +// S0-3 | | | | | | | | | CVI_VP | | | +// S0-3 | | | | | | | CVI_VS | | | | | +// S0-1 |(CVI_LD) | | CVI_LD | | CVI_LD | | CVI_LD | | CVI_LD | | | +// S0-1 |(C*TMP_LD) | | | | | | | | | | +// S01 |(C*_LDU) | | | | | | | | C*_LDU | | | +// S0 | | | CVI_ST | | CVI_ST | | CVI_ST | | CVI_ST | |(CVI_ST) | +// S0 | | | | | | | | | | |(C*TMP_ST) +// S01 | | | | | | | | | VSTU | |(C*_STU) | +// |=========| |=========| |=========| |=========| |=========| |=========| +// |=====================| |=====================| +// | CVI_MPY2 & CVI_MPY3 | |CVI_XLANE & CVI_SHIFT| +// |=====================| |=====================| +// S0-3 | CVI_VA_DV | | CVI_VA_DV | +// S0-3 | | | CVI_VP_DV | +// S2-3 | CVI_VX_DV | | | +// |=====================| |=====================| +// |=====================================================================| +// S0-3 | CVI_HIST Histogram | +// S0123| CVI_VA_EXT Extract | +// |=====================================================================| + +def HexagonItinerariesV60 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP, + CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1, + CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL], [], [ + // ALU32 + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // ALU64 + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // CR -> System + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // Jump (conditional/unconditional/return etc) + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // JR + InstrItinData]>, + InstrItinData]>, + + // Extender + InstrItinData]>, + + // Load + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // M + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // Store + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // Subinsn + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // S + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + // The S_2op_tc_3x_SLOT23 slots are 4 cycles on v60. + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // New Value Compare Jump + InstrItinData]>, + + // Mem ops + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // Endloop + InstrItinData]>, + + // Vector + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // Duplex and Compound + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + // Misc + InstrItinData]>, + InstrItinData]>, + InstrItinData, + InstrStage<1, [SLOT2, SLOT3]>]>, + + // Latest CVI spec definitions. + InstrItinData, + InstrStage<1, [CVI_XLANE,CVI_SHIFT, + CVI_MPY0, CVI_MPY1]>]>, + InstrItinData, + InstrStage<1, [CVI_XLSHF, CVI_MPY01]>]>, + InstrItinData, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>, + InstrItinData, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>, + InstrItinData, + InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>, + InstrItinData, + InstrStage<1, [CVI_MPY01]>]>, + InstrItinData, + InstrStage<1, [CVI_MPY01]>]>, + InstrItinData, + InstrStage<1, [CVI_MPY01]>]>, + InstrItinData, + InstrStage<1, [CVI_XLANE]>]>, + InstrItinData, + InstrStage<1, [CVI_XLANE]>]>, + InstrItinData, + InstrStage<1, [CVI_XLSHF]>]>, + InstrItinData, + InstrStage<1, [CVI_XLSHF]>]>, + InstrItinData, + InstrStage<1, [CVI_XLSHF]>]>, + InstrItinData, + InstrStage<1, [CVI_XLSHF]>]>, + InstrItinData, + InstrStage<1, [CVI_XLSHF]>]>, + InstrItinData, + InstrStage<1, [CVI_SHIFT]>]>, + InstrItinData, + InstrStage<1, [CVI_SHIFT]>]>, + InstrItinData, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE, CVI_SHIFT, + CVI_MPY0, CVI_MPY1]>]>, + InstrItinData, + InstrStage<1, [CVI_LD]>]>, + InstrItinData, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE, CVI_SHIFT, + CVI_MPY0, CVI_MPY1]>]>, + InstrItinData, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>]>, + InstrItinData, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE, CVI_SHIFT, + CVI_MPY0, CVI_MPY1]>]>, + InstrItinData, + InstrStage<1, [CVI_ST]>]>, + InstrItinData, + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>]>, + InstrItinData, + InstrStage<1, [CVI_ALL]>]> + ]>; + +def HexagonModelV60 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV60; + let LoadLatency = 1; +} + +//===----------------------------------------------------------------------===// +// Hexagon V60 Resource Definitions - +//===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp index 276cc69eed0f..239dbda8f27b 100644 --- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp +++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp @@ -12,12 +12,11 @@ //===----------------------------------------------------------------------===// #include "HexagonTargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" using namespace llvm; #define DEBUG_TYPE "hexagon-selectiondag-info" -bool llvm::flag_aligned_memcpy; - SDValue HexagonSelectionDAGInfo:: EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, @@ -25,15 +24,40 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { - flag_aligned_memcpy = false; - if ((Align & 0x3) == 0) { - ConstantSDNode *ConstantSize = dyn_cast(Size); - if (ConstantSize) { - uint64_t SizeVal = ConstantSize->getZExtValue(); - if ((SizeVal > 32) && ((SizeVal % 8) == 0)) - flag_aligned_memcpy = true; - } - } + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (AlwaysInline || (Align & 0x3) != 0 || !ConstantSize) + return SDValue(); - return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (SizeVal < 32 || (SizeVal % 8) != 0) + return SDValue(); + + // Special case aligned memcpys with size >= 32 bytes and a multiple of 8. + // + const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + Entry.Node = Src; + Args.push_back(Entry); + Entry.Node = Size; + Args.push_back(Entry); + + const char *SpecialMemcpyName = + "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes"; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY), + Type::getVoidTy(*DAG.getContext()), + DAG.getTargetExternalSymbol( + SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0) + .setDiscardResult(); + + std::pair CallResult = TLI.LowerCallTo(CLI); + return CallResult.second; } diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index d3eb56f4ba0f..10fe606985dd 100644 --- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -81,7 +81,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { // Loop over all of the basic blocks for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); MBBb != MBBe; ++MBBb) { - MachineBasicBlock* MBB = MBBb; + MachineBasicBlock *MBB = &*MBBb; // Traverse the basic block MachineBasicBlock::iterator MII = MBB->begin(); MachineBasicBlock::iterator MIE = MBB->end (); diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp new file mode 100644 index 000000000000..d4e95b0d0210 --- /dev/null +++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -0,0 +1,1209 @@ +//===--- HexagonSplitDouble.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hsdr" + +#include "HexagonRegisterInfo.h" +#include "HexagonTargetMachine.h" + +#include "llvm/Pass.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include +#include +#include + +using namespace llvm; + +namespace llvm { + FunctionPass *createHexagonSplitDoubleRegs(); + void initializeHexagonSplitDoubleRegsPass(PassRegistry&); +} + +namespace { + static cl::opt MaxHSDR("max-hsdr", cl::Hidden, cl::init(-1), + cl::desc("Maximum number of split partitions")); + static cl::opt MemRefsFixed("hsdr-no-mem", cl::Hidden, cl::init(true), + cl::desc("Do not split loads or stores")); + + class HexagonSplitDoubleRegs : public MachineFunctionPass { + public: + static char ID; + HexagonSplitDoubleRegs() : MachineFunctionPass(ID), TRI(nullptr), + TII(nullptr) { + initializeHexagonSplitDoubleRegsPass(*PassRegistry::getPassRegistry()); + } + const char *getPassName() const override { + return "Hexagon Split Double Registers"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + static const TargetRegisterClass *const DoubleRC; + + const HexagonRegisterInfo *TRI; + const HexagonInstrInfo *TII; + const MachineLoopInfo *MLI; + MachineRegisterInfo *MRI; + + typedef std::set USet; + typedef std::map UUSetMap; + typedef std::pair UUPair; + typedef std::map UUPairMap; + typedef std::map LoopRegMap; + + bool isInduction(unsigned Reg, LoopRegMap &IRM) const; + bool isVolatileInstr(const MachineInstr *MI) const; + bool isFixedInstr(const MachineInstr *MI) const; + void partitionRegisters(UUSetMap &P2Rs); + int32_t profit(const MachineInstr *MI) const; + bool isProfitable(const USet &Part, LoopRegMap &IRM) const; + + void collectIndRegsForLoop(const MachineLoop *L, USet &Rs); + void collectIndRegs(LoopRegMap &IRM); + + void createHalfInstr(unsigned Opc, MachineInstr *MI, + const UUPairMap &PairMap, unsigned SubR); + void splitMemRef(MachineInstr *MI, const UUPairMap &PairMap); + void splitImmediate(MachineInstr *MI, const UUPairMap &PairMap); + void splitCombine(MachineInstr *MI, const UUPairMap &PairMap); + void splitExt(MachineInstr *MI, const UUPairMap &PairMap); + void splitShift(MachineInstr *MI, const UUPairMap &PairMap); + void splitAslOr(MachineInstr *MI, const UUPairMap &PairMap); + bool splitInstr(MachineInstr *MI, const UUPairMap &PairMap); + void replaceSubregUses(MachineInstr *MI, const UUPairMap &PairMap); + void collapseRegPairs(MachineInstr *MI, const UUPairMap &PairMap); + bool splitPartition(const USet &Part); + + static int Counter; + static void dump_partition(raw_ostream&, const USet&, + const TargetRegisterInfo&); + }; + char HexagonSplitDoubleRegs::ID; + int HexagonSplitDoubleRegs::Counter = 0; + const TargetRegisterClass *const HexagonSplitDoubleRegs::DoubleRC + = &Hexagon::DoubleRegsRegClass; +} + +INITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double", + "Hexagon Split Double Registers", false, false) + + +static inline uint32_t getRegState(const MachineOperand &R) { + assert(R.isReg()); + return getDefRegState(R.isDef()) | + getImplRegState(R.isImplicit()) | + getKillRegState(R.isKill()) | + getDeadRegState(R.isDead()) | + getUndefRegState(R.isUndef()) | + getInternalReadRegState(R.isInternalRead()) | + (R.isDebug() ? RegState::Debug : 0); +} + + +void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os, + const USet &Part, const TargetRegisterInfo &TRI) { + dbgs() << '{'; + for (auto I : Part) + dbgs() << ' ' << PrintReg(I, &TRI); + dbgs() << " }"; +} + + +bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const { + for (auto I : IRM) { + const USet &Rs = I.second; + if (Rs.find(Reg) != Rs.end()) + return true; + } + return false; +} + + +bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const { + for (auto &I : MI->memoperands()) + if (I->isVolatile()) + return true; + return false; +} + + +bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const { + if (MI->mayLoad() || MI->mayStore()) + if (MemRefsFixed || isVolatileInstr(MI)) + return true; + if (MI->isDebugValue()) + return false; + + unsigned Opc = MI->getOpcode(); + switch (Opc) { + default: + return true; + + case TargetOpcode::PHI: + case TargetOpcode::COPY: + break; + + case Hexagon::L2_loadrd_io: + // Not handling stack stores (only reg-based addresses). + if (MI->getOperand(1).isReg()) + break; + return true; + case Hexagon::S2_storerd_io: + // Not handling stack stores (only reg-based addresses). + if (MI->getOperand(0).isReg()) + break; + return true; + case Hexagon::L2_loadrd_pi: + case Hexagon::S2_storerd_pi: + + case Hexagon::A2_tfrpi: + case Hexagon::A2_combineii: + case Hexagon::A4_combineir: + case Hexagon::A4_combineii: + case Hexagon::A4_combineri: + case Hexagon::A2_combinew: + case Hexagon::CONST64_Int_Real: + + case Hexagon::A2_sxtw: + + case Hexagon::A2_andp: + case Hexagon::A2_orp: + case Hexagon::A2_xorp: + case Hexagon::S2_asl_i_p_or: + case Hexagon::S2_asl_i_p: + case Hexagon::S2_asr_i_p: + case Hexagon::S2_lsr_i_p: + break; + } + + for (auto &Op : MI->operands()) { + if (!Op.isReg()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + return true; + } + return false; +} + + +void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { + typedef std::map UUMap; + typedef std::vector UVect; + + unsigned NumRegs = MRI->getNumVirtRegs(); + BitVector DoubleRegs(NumRegs); + for (unsigned i = 0; i < NumRegs; ++i) { + unsigned R = TargetRegisterInfo::index2VirtReg(i); + if (MRI->getRegClass(R) == DoubleRC) + DoubleRegs.set(i); + } + + BitVector FixedRegs(NumRegs); + for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { + unsigned R = TargetRegisterInfo::index2VirtReg(x); + MachineInstr *DefI = MRI->getVRegDef(R); + // In some cases a register may exist, but never be defined or used. + // It should never appear anywhere, but mark it as "fixed", just to be + // safe. + if (!DefI || isFixedInstr(DefI)) + FixedRegs.set(x); + } + + UUSetMap AssocMap; + for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { + if (FixedRegs[x]) + continue; + unsigned R = TargetRegisterInfo::index2VirtReg(x); + DEBUG(dbgs() << PrintReg(R, TRI) << " ~~"); + USet &Asc = AssocMap[R]; + for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end(); + U != Z; ++U) { + MachineOperand &Op = *U; + MachineInstr *UseI = Op.getParent(); + if (isFixedInstr(UseI)) + continue; + for (unsigned i = 0, n = UseI->getNumOperands(); i < n; ++i) { + MachineOperand &MO = UseI->getOperand(i); + // Skip non-registers or registers with subregisters. + if (&MO == &Op || !MO.isReg() || MO.getSubReg()) + continue; + unsigned T = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(T)) { + FixedRegs.set(x); + continue; + } + if (MRI->getRegClass(T) != DoubleRC) + continue; + unsigned u = TargetRegisterInfo::virtReg2Index(T); + if (FixedRegs[u]) + continue; + DEBUG(dbgs() << ' ' << PrintReg(T, TRI)); + Asc.insert(T); + // Make it symmetric. + AssocMap[T].insert(R); + } + } + DEBUG(dbgs() << '\n'); + } + + UUMap R2P; + unsigned NextP = 1; + USet Visited; + for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) { + unsigned R = TargetRegisterInfo::index2VirtReg(x); + if (Visited.count(R)) + continue; + // Create a new partition for R. + unsigned ThisP = FixedRegs[x] ? 0 : NextP++; + UVect WorkQ; + WorkQ.push_back(R); + for (unsigned i = 0; i < WorkQ.size(); ++i) { + unsigned T = WorkQ[i]; + if (Visited.count(T)) + continue; + R2P[T] = ThisP; + Visited.insert(T); + // Add all registers associated with T. + USet &Asc = AssocMap[T]; + for (USet::iterator J = Asc.begin(), F = Asc.end(); J != F; ++J) + WorkQ.push_back(*J); + } + } + + for (auto I : R2P) + P2Rs[I.second].insert(I.first); +} + + +static inline int32_t profitImm(unsigned Lo, unsigned Hi) { + int32_t P = 0; + bool LoZ1 = false, HiZ1 = false; + if (Lo == 0 || Lo == 0xFFFFFFFF) + P += 10, LoZ1 = true; + if (Hi == 0 || Hi == 0xFFFFFFFF) + P += 10, HiZ1 = true; + if (!LoZ1 && !HiZ1 && Lo == Hi) + P += 3; + return P; +} + + +int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { + unsigned ImmX = 0; + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case TargetOpcode::PHI: + for (const auto &Op : MI->operands()) + if (!Op.getSubReg()) + return 0; + return 10; + case TargetOpcode::COPY: + if (MI->getOperand(1).getSubReg() != 0) + return 10; + return 0; + + case Hexagon::L2_loadrd_io: + case Hexagon::S2_storerd_io: + return -1; + case Hexagon::L2_loadrd_pi: + case Hexagon::S2_storerd_pi: + return 2; + + case Hexagon::A2_tfrpi: + case Hexagon::CONST64_Int_Real: { + uint64_t D = MI->getOperand(1).getImm(); + unsigned Lo = D & 0xFFFFFFFFULL; + unsigned Hi = D >> 32; + return profitImm(Lo, Hi); + } + case Hexagon::A2_combineii: + case Hexagon::A4_combineii: + return profitImm(MI->getOperand(1).getImm(), + MI->getOperand(2).getImm()); + case Hexagon::A4_combineri: + ImmX++; + case Hexagon::A4_combineir: { + ImmX++; + int64_t V = MI->getOperand(ImmX).getImm(); + if (V == 0 || V == -1) + return 10; + // Fall through into A2_combinew. + } + case Hexagon::A2_combinew: + return 2; + + case Hexagon::A2_sxtw: + return 3; + + case Hexagon::A2_andp: + case Hexagon::A2_orp: + case Hexagon::A2_xorp: + return 1; + + case Hexagon::S2_asl_i_p_or: { + unsigned S = MI->getOperand(3).getImm(); + if (S == 0 || S == 32) + return 10; + return -1; + } + case Hexagon::S2_asl_i_p: + case Hexagon::S2_asr_i_p: + case Hexagon::S2_lsr_i_p: + unsigned S = MI->getOperand(2).getImm(); + if (S == 0 || S == 32) + return 10; + if (S == 16) + return 5; + if (S == 48) + return 7; + return -10; + } + + return 0; +} + + +bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM) + const { + unsigned FixedNum = 0, SplitNum = 0, LoopPhiNum = 0; + int32_t TotalP = 0; + + for (unsigned DR : Part) { + MachineInstr *DefI = MRI->getVRegDef(DR); + int32_t P = profit(DefI); + if (P == INT_MIN) + return false; + TotalP += P; + // Reduce the profitability of splitting induction registers. + if (isInduction(DR, IRM)) + TotalP -= 30; + + for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end(); + U != W; ++U) { + MachineInstr *UseI = U->getParent(); + if (isFixedInstr(UseI)) { + FixedNum++; + // Calculate the cost of generating REG_SEQUENCE instructions. + for (auto &Op : UseI->operands()) { + if (Op.isReg() && Part.count(Op.getReg())) + if (Op.getSubReg()) + TotalP -= 2; + } + continue; + } + // If a register from this partition is used in a fixed instruction, + // and there is also a register in this partition that is used in + // a loop phi node, then decrease the splitting profit as this can + // confuse the modulo scheduler. + if (UseI->isPHI()) { + const MachineBasicBlock *PB = UseI->getParent(); + const MachineLoop *L = MLI->getLoopFor(PB); + if (L && L->getHeader() == PB) + LoopPhiNum++; + } + // Splittable instruction. + SplitNum++; + int32_t P = profit(UseI); + if (P == INT_MIN) + return false; + TotalP += P; + } + } + + if (FixedNum > 0 && LoopPhiNum > 0) + TotalP -= 20*LoopPhiNum; + + DEBUG(dbgs() << "Partition profit: " << TotalP << '\n'); + return TotalP > 0; +} + + +void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, + USet &Rs) { + const MachineBasicBlock *HB = L->getHeader(); + const MachineBasicBlock *LB = L->getLoopLatch(); + if (!HB || !LB) + return; + + // Examine the latch branch. Expect it to be a conditional branch to + // the header (either "br-cond header" or "br-cond exit; br header"). + MachineBasicBlock *TB = 0, *FB = 0; + MachineBasicBlock *TmpLB = const_cast(LB); + SmallVector Cond; + bool BadLB = TII->AnalyzeBranch(*TmpLB, TB, FB, Cond, false); + // Only analyzable conditional branches. HII::AnalyzeBranch will put + // the branch opcode as the first element of Cond, and the predicate + // operand as the second. + if (BadLB || Cond.size() != 2) + return; + // Only simple jump-conditional (with or without negation). + if (!TII->PredOpcodeHasJMP_c(Cond[0].getImm())) + return; + // Must go to the header. + if (TB != HB && FB != HB) + return; + assert(Cond[1].isReg() && "Unexpected Cond vector from AnalyzeBranch"); + // Expect a predicate register. + unsigned PR = Cond[1].getReg(); + assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass); + + // Get the registers on which the loop controlling compare instruction + // depends. + unsigned CmpR1 = 0, CmpR2 = 0; + const MachineInstr *CmpI = MRI->getVRegDef(PR); + while (CmpI->getOpcode() == Hexagon::C2_not) + CmpI = MRI->getVRegDef(CmpI->getOperand(1).getReg()); + + int Mask = 0, Val = 0; + bool OkCI = TII->analyzeCompare(CmpI, CmpR1, CmpR2, Mask, Val); + if (!OkCI) + return; + // Eliminate non-double input registers. + if (CmpR1 && MRI->getRegClass(CmpR1) != DoubleRC) + CmpR1 = 0; + if (CmpR2 && MRI->getRegClass(CmpR2) != DoubleRC) + CmpR2 = 0; + if (!CmpR1 && !CmpR2) + return; + + // Now examine the top of the loop: the phi nodes that could poten- + // tially define loop induction registers. The registers defined by + // such a phi node would be used in a 64-bit add, which then would + // be used in the loop compare instruction. + + // Get the set of all double registers defined by phi nodes in the + // loop header. + typedef std::vector UVect; + UVect DP; + for (auto &MI : *HB) { + if (!MI.isPHI()) + break; + const MachineOperand &MD = MI.getOperand(0); + unsigned R = MD.getReg(); + if (MRI->getRegClass(R) == DoubleRC) + DP.push_back(R); + } + if (DP.empty()) + return; + + auto NoIndOp = [this, CmpR1, CmpR2] (unsigned R) -> bool { + for (auto I = MRI->use_nodbg_begin(R), E = MRI->use_nodbg_end(); + I != E; ++I) { + const MachineInstr *UseI = I->getParent(); + if (UseI->getOpcode() != Hexagon::A2_addp) + continue; + // Get the output from the add. If it is one of the inputs to the + // loop-controlling compare instruction, then R is likely an induc- + // tion register. + unsigned T = UseI->getOperand(0).getReg(); + if (T == CmpR1 || T == CmpR2) + return false; + } + return true; + }; + UVect::iterator End = std::remove_if(DP.begin(), DP.end(), NoIndOp); + Rs.insert(DP.begin(), End); + Rs.insert(CmpR1); + Rs.insert(CmpR2); + + DEBUG({ + dbgs() << "For loop at BB#" << HB->getNumber() << " ind regs: "; + dump_partition(dbgs(), Rs, *TRI); + dbgs() << '\n'; + }); +} + + +void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) { + typedef std::vector LoopVector; + LoopVector WorkQ; + + for (auto I : *MLI) + WorkQ.push_back(I); + for (unsigned i = 0; i < WorkQ.size(); ++i) { + for (auto I : *WorkQ[i]) + WorkQ.push_back(I); + } + + USet Rs; + for (unsigned i = 0, n = WorkQ.size(); i < n; ++i) { + MachineLoop *L = WorkQ[i]; + Rs.clear(); + collectIndRegsForLoop(L, Rs); + if (!Rs.empty()) + IRM.insert(std::make_pair(L, Rs)); + } +} + + +void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI, + const UUPairMap &PairMap, unsigned SubR) { + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + MachineInstr *NewI = BuildMI(B, MI, DL, TII->get(Opc)); + + for (auto &Op : MI->operands()) { + if (!Op.isReg()) { + NewI->addOperand(Op); + continue; + } + // For register operands, set the subregister. + unsigned R = Op.getReg(); + unsigned SR = Op.getSubReg(); + bool isVirtReg = TargetRegisterInfo::isVirtualRegister(R); + bool isKill = Op.isKill(); + if (isVirtReg && MRI->getRegClass(R) == DoubleRC) { + isKill = false; + UUPairMap::const_iterator F = PairMap.find(R); + if (F == PairMap.end()) { + SR = SubR; + } else { + const UUPair &P = F->second; + R = (SubR == Hexagon::subreg_loreg) ? P.first : P.second; + SR = 0; + } + } + auto CO = MachineOperand::CreateReg(R, Op.isDef(), Op.isImplicit(), isKill, + Op.isDead(), Op.isUndef(), Op.isEarlyClobber(), SR, Op.isDebug(), + Op.isInternalRead()); + NewI->addOperand(CO); + } +} + + +void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI, + const UUPairMap &PairMap) { + bool Load = MI->mayLoad(); + unsigned OrigOpc = MI->getOpcode(); + bool PostInc = (OrigOpc == Hexagon::L2_loadrd_pi || + OrigOpc == Hexagon::S2_storerd_pi); + MachineInstr *LowI, *HighI; + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + // Index of the base-address-register operand. + unsigned AdrX = PostInc ? (Load ? 2 : 1) + : (Load ? 1 : 0); + MachineOperand &AdrOp = MI->getOperand(AdrX); + unsigned RSA = getRegState(AdrOp); + MachineOperand &ValOp = Load ? MI->getOperand(0) + : (PostInc ? MI->getOperand(3) + : MI->getOperand(2)); + UUPairMap::const_iterator F = PairMap.find(ValOp.getReg()); + assert(F != PairMap.end()); + + if (Load) { + const UUPair &P = F->second; + int64_t Off = PostInc ? 0 : MI->getOperand(2).getImm(); + LowI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.first) + .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg()) + .addImm(Off); + HighI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.second) + .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg()) + .addImm(Off+4); + } else { + const UUPair &P = F->second; + int64_t Off = PostInc ? 0 : MI->getOperand(1).getImm(); + LowI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io)) + .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg()) + .addImm(Off) + .addReg(P.first); + HighI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io)) + .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg()) + .addImm(Off+4) + .addReg(P.second); + } + + if (PostInc) { + // Create the increment of the address register. + int64_t Inc = Load ? MI->getOperand(3).getImm() + : MI->getOperand(2).getImm(); + MachineOperand &UpdOp = Load ? MI->getOperand(1) : MI->getOperand(0); + const TargetRegisterClass *RC = MRI->getRegClass(UpdOp.getReg()); + unsigned NewR = MRI->createVirtualRegister(RC); + assert(!UpdOp.getSubReg() && "Def operand with subreg"); + BuildMI(B, MI, DL, TII->get(Hexagon::A2_addi), NewR) + .addReg(AdrOp.getReg(), RSA) + .addImm(Inc); + MRI->replaceRegWith(UpdOp.getReg(), NewR); + // The original instruction will be deleted later. + } + + // Generate a new pair of memory-operands. + MachineFunction &MF = *B.getParent(); + for (auto &MO : MI->memoperands()) { + const MachinePointerInfo &Ptr = MO->getPointerInfo(); + unsigned F = MO->getFlags(); + int A = MO->getAlignment(); + + auto *Tmp1 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, A); + LowI->addMemOperand(MF, Tmp1); + auto *Tmp2 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, std::min(A, 4)); + HighI->addMemOperand(MF, Tmp2); + } +} + + +void HexagonSplitDoubleRegs::splitImmediate(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + assert(Op0.isReg() && Op1.isImm()); + uint64_t V = Op1.getImm(); + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); + assert(F != PairMap.end()); + const UUPair &P = F->second; + + // The operand to A2_tfrsi can only have 32 significant bits. Immediate + // values in MachineOperand are stored as 64-bit integers, and so the + // value -1 may be represented either as 64-bit -1, or 4294967295. Both + // will have the 32 higher bits truncated in the end, but -1 will remain + // as -1, while the latter may appear to be a large unsigned value + // requiring a constant extender. The casting to int32_t will select the + // former representation. (The same reasoning applies to all 32-bit + // values.) + BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first) + .addImm(int32_t(V & 0xFFFFFFFFULL)); + BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second) + .addImm(int32_t(V >> 32)); +} + + +void HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + assert(Op0.isReg()); + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); + assert(F != PairMap.end()); + const UUPair &P = F->second; + + if (Op1.isImm()) { + BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second) + .addImm(Op1.getImm()); + } else if (Op1.isReg()) { + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.second) + .addReg(Op1.getReg(), getRegState(Op1), Op1.getSubReg()); + } else + llvm_unreachable("Unexpected operand"); + + if (Op2.isImm()) { + BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first) + .addImm(Op2.getImm()); + } else if (Op2.isReg()) { + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first) + .addReg(Op2.getReg(), getRegState(Op2), Op2.getSubReg()); + } else + llvm_unreachable("Unexpected operand"); +} + + +void HexagonSplitDoubleRegs::splitExt(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + assert(Op0.isReg() && Op1.isReg()); + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); + assert(F != PairMap.end()); + const UUPair &P = F->second; + unsigned RS = getRegState(Op1); + + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first) + .addReg(Op1.getReg(), RS & ~RegState::Kill, Op1.getSubReg()); + BuildMI(B, MI, DL, TII->get(Hexagon::S2_asr_i_r), P.second) + .addReg(Op1.getReg(), RS, Op1.getSubReg()) + .addImm(31); +} + + +void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + assert(Op0.isReg() && Op1.isReg() && Op2.isImm()); + int64_t Sh64 = Op2.getImm(); + assert(Sh64 >= 0 && Sh64 < 64); + unsigned S = Sh64; + + UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); + assert(F != PairMap.end()); + const UUPair &P = F->second; + unsigned LoR = P.first; + unsigned HiR = P.second; + using namespace Hexagon; + + unsigned Opc = MI->getOpcode(); + bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p); + bool Left = !Right; + bool Signed = (Opc == S2_asr_i_p); + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned RS = getRegState(Op1); + unsigned ShiftOpc = Left ? S2_asl_i_r + : (Signed ? S2_asr_i_r : S2_lsr_i_r); + unsigned LoSR = subreg_loreg; + unsigned HiSR = subreg_hireg; + + if (S == 0) { + // No shift, subregister copy. + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR); + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), HiR) + .addReg(Op1.getReg(), RS, HiSR); + } else if (S < 32) { + const TargetRegisterClass *IntRC = &IntRegsRegClass; + unsigned TmpR = MRI->createVirtualRegister(IntRC); + // Expansion: + // Shift left: DR = shl R, #s + // LoR = shl R.lo, #s + // TmpR = extractu R.lo, #s, #32-s + // HiR = or (TmpR, asl(R.hi, #s)) + // Shift right: DR = shr R, #s + // HiR = shr R.hi, #s + // TmpR = shr R.lo, #s + // LoR = insert TmpR, R.hi, #s, #32-s + + // Shift left: + // LoR = shl R.lo, #s + // Shift right: + // TmpR = shr R.lo, #s + + // Make a special case for A2_aslh and A2_asrh (they are predicable as + // opposed to S2_asl_i_r/S2_asr_i_r). + if (S == 16 && Left) + BuildMI(B, MI, DL, TII->get(A2_aslh), LoR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR); + else if (S == 16 && Signed) + BuildMI(B, MI, DL, TII->get(A2_asrh), TmpR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR); + else + BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? LoR : TmpR)) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR) + .addImm(S); + + if (Left) { + // TmpR = extractu R.lo, #s, #32-s + BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR) + .addImm(S) + .addImm(32-S); + // HiR = or (TmpR, asl(R.hi, #s)) + BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR) + .addReg(TmpR) + .addReg(Op1.getReg(), RS, HiSR) + .addImm(S); + } else { + // HiR = shr R.hi, #s + BuildMI(B, MI, DL, TII->get(ShiftOpc), HiR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR) + .addImm(S); + // LoR = insert TmpR, R.hi, #s, #32-s + BuildMI(B, MI, DL, TII->get(S2_insert), LoR) + .addReg(TmpR) + .addReg(Op1.getReg(), RS, HiSR) + .addImm(S) + .addImm(32-S); + } + } else if (S == 32) { + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), (Left ? HiR : LoR)) + .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR)); + if (!Signed) + BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR)) + .addImm(0); + else // Must be right shift. + BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR) + .addReg(Op1.getReg(), RS, HiSR) + .addImm(31); + } else if (S < 64) { + S -= 32; + if (S == 16 && Left) + BuildMI(B, MI, DL, TII->get(A2_aslh), HiR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR); + else if (S == 16 && Signed) + BuildMI(B, MI, DL, TII->get(A2_asrh), LoR) + .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR); + else + BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? HiR : LoR)) + .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR)) + .addImm(S); + + if (Signed) + BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR) + .addReg(Op1.getReg(), RS, HiSR) + .addImm(31); + else + BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR)) + .addImm(0); + } +} + + +void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineOperand &Op0 = MI->getOperand(0); + MachineOperand &Op1 = MI->getOperand(1); + MachineOperand &Op2 = MI->getOperand(2); + MachineOperand &Op3 = MI->getOperand(3); + assert(Op0.isReg() && Op1.isReg() && Op2.isReg() && Op3.isImm()); + int64_t Sh64 = Op3.getImm(); + assert(Sh64 >= 0 && Sh64 < 64); + unsigned S = Sh64; + + UUPairMap::const_iterator F = PairMap.find(Op0.getReg()); + assert(F != PairMap.end()); + const UUPair &P = F->second; + unsigned LoR = P.first; + unsigned HiR = P.second; + using namespace Hexagon; + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned RS1 = getRegState(Op1); + unsigned RS2 = getRegState(Op2); + const TargetRegisterClass *IntRC = &IntRegsRegClass; + + unsigned LoSR = subreg_loreg; + unsigned HiSR = subreg_hireg; + + // Op0 = S2_asl_i_p_or Op1, Op2, Op3 + // means: Op0 = or (Op1, asl(Op2, Op3)) + + // Expansion of + // DR = or (R1, asl(R2, #s)) + // + // LoR = or (R1.lo, asl(R2.lo, #s)) + // Tmp1 = extractu R2.lo, #s, #32-s + // Tmp2 = or R1.hi, Tmp1 + // HiR = or (Tmp2, asl(R2.hi, #s)) + + if (S == 0) { + // DR = or (R1, asl(R2, #0)) + // -> or (R1, R2) + // i.e. LoR = or R1.lo, R2.lo + // HiR = or R1.hi, R2.hi + BuildMI(B, MI, DL, TII->get(A2_or), LoR) + .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR) + .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR); + BuildMI(B, MI, DL, TII->get(A2_or), HiR) + .addReg(Op1.getReg(), RS1, HiSR) + .addReg(Op2.getReg(), RS2, HiSR); + } else if (S < 32) { + BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), LoR) + .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR) + .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR) + .addImm(S); + unsigned TmpR1 = MRI->createVirtualRegister(IntRC); + BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR1) + .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR) + .addImm(S) + .addImm(32-S); + unsigned TmpR2 = MRI->createVirtualRegister(IntRC); + BuildMI(B, MI, DL, TII->get(A2_or), TmpR2) + .addReg(Op1.getReg(), RS1, HiSR) + .addReg(TmpR1); + BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR) + .addReg(TmpR2) + .addReg(Op2.getReg(), RS2, HiSR) + .addImm(S); + } else if (S == 32) { + // DR = or (R1, asl(R2, #32)) + // -> or R1, R2.lo + // LoR = R1.lo + // HiR = or R1.hi, R2.lo + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR) + .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR); + BuildMI(B, MI, DL, TII->get(A2_or), HiR) + .addReg(Op1.getReg(), RS1, HiSR) + .addReg(Op2.getReg(), RS2, LoSR); + } else if (S < 64) { + // DR = or (R1, asl(R2, #s)) + // + // LoR = R1:lo + // HiR = or (R1:hi, asl(R2:lo, #s-32)) + S -= 32; + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR) + .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR); + BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR) + .addReg(Op1.getReg(), RS1, HiSR) + .addReg(Op2.getReg(), RS2, LoSR) + .addImm(S); + } +} + + +bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI, + const UUPairMap &PairMap) { + DEBUG(dbgs() << "Splitting: " << *MI); + bool Split = false; + unsigned Opc = MI->getOpcode(); + using namespace Hexagon; + + switch (Opc) { + case TargetOpcode::PHI: + case TargetOpcode::COPY: { + unsigned DstR = MI->getOperand(0).getReg(); + if (MRI->getRegClass(DstR) == DoubleRC) { + createHalfInstr(Opc, MI, PairMap, subreg_loreg); + createHalfInstr(Opc, MI, PairMap, subreg_hireg); + Split = true; + } + break; + } + case A2_andp: + createHalfInstr(A2_and, MI, PairMap, subreg_loreg); + createHalfInstr(A2_and, MI, PairMap, subreg_hireg); + Split = true; + break; + case A2_orp: + createHalfInstr(A2_or, MI, PairMap, subreg_loreg); + createHalfInstr(A2_or, MI, PairMap, subreg_hireg); + Split = true; + break; + case A2_xorp: + createHalfInstr(A2_xor, MI, PairMap, subreg_loreg); + createHalfInstr(A2_xor, MI, PairMap, subreg_hireg); + Split = true; + break; + + case L2_loadrd_io: + case L2_loadrd_pi: + case S2_storerd_io: + case S2_storerd_pi: + splitMemRef(MI, PairMap); + Split = true; + break; + + case A2_tfrpi: + case CONST64_Int_Real: + splitImmediate(MI, PairMap); + Split = true; + break; + + case A2_combineii: + case A4_combineir: + case A4_combineii: + case A4_combineri: + case A2_combinew: + splitCombine(MI, PairMap); + Split = true; + break; + + case A2_sxtw: + splitExt(MI, PairMap); + Split = true; + break; + + case S2_asl_i_p: + case S2_asr_i_p: + case S2_lsr_i_p: + splitShift(MI, PairMap); + Split = true; + break; + + case S2_asl_i_p_or: + splitAslOr(MI, PairMap); + Split = true; + break; + + default: + llvm_unreachable("Instruction not splitable"); + return false; + } + + return Split; +} + + +void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI, + const UUPairMap &PairMap) { + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse() || !Op.getSubReg()) + continue; + unsigned R = Op.getReg(); + UUPairMap::const_iterator F = PairMap.find(R); + if (F == PairMap.end()) + continue; + const UUPair &P = F->second; + switch (Op.getSubReg()) { + case Hexagon::subreg_loreg: + Op.setReg(P.first); + break; + case Hexagon::subreg_hireg: + Op.setReg(P.second); + break; + } + Op.setSubReg(0); + } +} + + +void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI, + const UUPairMap &PairMap) { + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + for (auto &Op : MI->operands()) { + if (!Op.isReg() || !Op.isUse()) + continue; + unsigned R = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + if (MRI->getRegClass(R) != DoubleRC || Op.getSubReg()) + continue; + UUPairMap::const_iterator F = PairMap.find(R); + if (F == PairMap.end()) + continue; + const UUPair &Pr = F->second; + unsigned NewDR = MRI->createVirtualRegister(DoubleRC); + BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR) + .addReg(Pr.first) + .addImm(Hexagon::subreg_loreg) + .addReg(Pr.second) + .addImm(Hexagon::subreg_hireg); + Op.setReg(NewDR); + } +} + + +bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) { + const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; + typedef std::set MISet; + bool Changed = false; + + DEBUG(dbgs() << "Splitting partition: "; dump_partition(dbgs(), Part, *TRI); + dbgs() << '\n'); + + UUPairMap PairMap; + + MISet SplitIns; + for (unsigned DR : Part) { + MachineInstr *DefI = MRI->getVRegDef(DR); + SplitIns.insert(DefI); + + // Collect all instructions, including fixed ones. We won't split them, + // but we need to visit them again to insert the REG_SEQUENCE instructions. + for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end(); + U != W; ++U) + SplitIns.insert(U->getParent()); + + unsigned LoR = MRI->createVirtualRegister(IntRC); + unsigned HiR = MRI->createVirtualRegister(IntRC); + DEBUG(dbgs() << "Created mapping: " << PrintReg(DR, TRI) << " -> " + << PrintReg(HiR, TRI) << ':' << PrintReg(LoR, TRI) << '\n'); + PairMap.insert(std::make_pair(DR, UUPair(LoR, HiR))); + } + + MISet Erase; + for (auto MI : SplitIns) { + if (isFixedInstr(MI)) { + collapseRegPairs(MI, PairMap); + } else { + bool Done = splitInstr(MI, PairMap); + if (Done) + Erase.insert(MI); + Changed |= Done; + } + } + + for (unsigned DR : Part) { + // Before erasing "double" instructions, revisit all uses of the double + // registers in this partition, and replace all uses of them with subre- + // gisters, with the corresponding single registers. + MISet Uses; + for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end(); + U != W; ++U) + Uses.insert(U->getParent()); + for (auto M : Uses) + replaceSubregUses(M, PairMap); + } + + for (auto MI : Erase) { + MachineBasicBlock *B = MI->getParent(); + B->erase(MI); + } + + return Changed; +} + + +bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "Splitting double registers in function: " + << MF.getName() << '\n'); + + auto &ST = MF.getSubtarget(); + TRI = ST.getRegisterInfo(); + TII = ST.getInstrInfo(); + MRI = &MF.getRegInfo(); + MLI = &getAnalysis(); + + UUSetMap P2Rs; + LoopRegMap IRM; + + collectIndRegs(IRM); + partitionRegisters(P2Rs); + + DEBUG({ + dbgs() << "Register partitioning: (partition #0 is fixed)\n"; + for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) { + dbgs() << '#' << I->first << " -> "; + dump_partition(dbgs(), I->second, *TRI); + dbgs() << '\n'; + } + }); + + bool Changed = false; + int Limit = MaxHSDR; + + for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) { + if (I->first == 0) + continue; + if (Limit >= 0 && Counter >= Limit) + break; + USet &Part = I->second; + DEBUG(dbgs() << "Calculating profit for partition #" << I->first << '\n'); + if (!isProfitable(Part, IRM)) + continue; + Counter++; + Changed |= splitPartition(Part); + } + + return Changed; +} + +FunctionPass *llvm::createHexagonSplitDoubleRegs() { + return new HexagonSplitDoubleRegs(); +} diff --git a/lib/Target/Hexagon/HexagonStoreWidening.cpp b/lib/Target/Hexagon/HexagonStoreWidening.cpp new file mode 100644 index 000000000000..b5339ff4c0dc --- /dev/null +++ b/lib/Target/Hexagon/HexagonStoreWidening.cpp @@ -0,0 +1,616 @@ +//===--- HexagonStoreWidening.cpp------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Replace sequences of "narrow" stores to adjacent memory locations with +// a fewer "wide" stores that have the same effect. +// For example, replace: +// S4_storeirb_io %vreg100, 0, 0 ; store-immediate-byte +// S4_storeirb_io %vreg100, 1, 0 ; store-immediate-byte +// with +// S4_storeirh_io %vreg100, 0, 0 ; store-immediate-halfword +// The above is the general idea. The actual cases handled by the code +// may be a bit more complex. +// The purpose of this pass is to reduce the number of outstanding stores, +// or as one could say, "reduce store queue pressure". Also, wide stores +// mean fewer stores, and since there are only two memory instructions allowed +// per packet, it also means fewer packets, and ultimately fewer cycles. +//===---------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-widen-stores" + +#include "HexagonTargetMachine.h" + +#include "llvm/PassSupport.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +#include + + +using namespace llvm; + +namespace llvm { + FunctionPass *createHexagonStoreWidening(); + void initializeHexagonStoreWideningPass(PassRegistry&); +} + +namespace { + struct HexagonStoreWidening : public MachineFunctionPass { + const HexagonInstrInfo *TII; + const HexagonRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + AliasAnalysis *AA; + MachineFunction *MF; + + public: + static char ID; + HexagonStoreWidening() : MachineFunctionPass(ID) { + initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "Hexagon Store Widening"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + static bool handledStoreType(const MachineInstr *MI); + + private: + static const int MaxWideSize = 4; + + typedef std::vector InstrGroup; + typedef std::vector InstrGroupList; + + bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO); + bool instrAliased(InstrGroup &Stores, const MachineInstr *MI); + void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin, + InstrGroup::iterator End, InstrGroup &Group); + void createStoreGroups(MachineBasicBlock &MBB, + InstrGroupList &StoreGroups); + bool processBasicBlock(MachineBasicBlock &MBB); + bool processStoreGroup(InstrGroup &Group); + bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End, + InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize); + bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize); + bool replaceStores(InstrGroup &OG, InstrGroup &NG); + bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2); + }; + +} // namespace + + +namespace { + +// Some local helper functions... +unsigned getBaseAddressRegister(const MachineInstr *MI) { + const MachineOperand &MO = MI->getOperand(0); + assert(MO.isReg() && "Expecting register operand"); + return MO.getReg(); +} + +int64_t getStoreOffset(const MachineInstr *MI) { + unsigned OpC = MI->getOpcode(); + assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode"); + + switch (OpC) { + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeiri_io: { + const MachineOperand &MO = MI->getOperand(1); + assert(MO.isImm() && "Expecting immediate offset"); + return MO.getImm(); + } + } + dbgs() << *MI; + llvm_unreachable("Store offset calculation missing for a handled opcode"); + return 0; +} + +const MachineMemOperand &getStoreTarget(const MachineInstr *MI) { + assert(!MI->memoperands_empty() && "Expecting memory operands"); + return **MI->memoperands_begin(); +} + +} // namespace + + +char HexagonStoreWidening::ID = 0; + +INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores", + "Hexason Store Widening", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores", + "Hexagon Store Widening", false, false) + + +// Filtering function: any stores whose opcodes are not "approved" of by +// this function will not be subjected to widening. +inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) { + // For now, only handle stores of immediate values. + // Also, reject stores to stack slots. + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeiri_io: + // Base address must be a register. (Implement FI later.) + return MI->getOperand(0).isReg(); + default: + return false; + } +} + + +// Check if the machine memory operand MMO is aliased with any of the +// stores in the store group Stores. +bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, + const MachineMemOperand &MMO) { + if (!MMO.getValue()) + return true; + + MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo()); + + for (auto SI : Stores) { + const MachineMemOperand &SMO = getStoreTarget(SI); + if (!SMO.getValue()) + return true; + + MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo()); + if (AA->alias(L, SL)) + return true; + } + + return false; +} + + +// Check if the machine instruction MI accesses any storage aliased with +// any store in the group Stores. +bool HexagonStoreWidening::instrAliased(InstrGroup &Stores, + const MachineInstr *MI) { + for (auto &I : MI->memoperands()) + if (instrAliased(Stores, *I)) + return true; + return false; +} + + +// Inspect a machine basic block, and generate store groups out of stores +// encountered in the block. +// +// A store group is a group of stores that use the same base register, +// and which can be reordered within that group without altering the +// semantics of the program. A single store group could be widened as +// a whole, if there existed a single store instruction with the same +// semantics as the entire group. In many cases, a single store group +// may need more than one wide store. +void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB, + InstrGroupList &StoreGroups) { + InstrGroup AllInsns; + + // Copy all instruction pointers from the basic block to a temporary + // list. This will allow operating on the list, and modifying its + // elements without affecting the basic block. + for (auto &I : MBB) + AllInsns.push_back(&I); + + // Traverse all instructions in the AllInsns list, and if we encounter + // a store, then try to create a store group starting at that instruction + // i.e. a sequence of independent stores that can be widened. + for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) { + MachineInstr *MI = *I; + // Skip null pointers (processed instructions). + if (!MI || !handledStoreType(MI)) + continue; + + // Found a store. Try to create a store group. + InstrGroup G; + createStoreGroup(MI, I+1, E, G); + if (G.size() > 1) + StoreGroups.push_back(G); + } +} + + +// Create a single store group. The stores need to be independent between +// themselves, and also there cannot be other instructions between them +// that could read or modify storage being stored into. +void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore, + InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) { + assert(handledStoreType(BaseStore) && "Unexpected instruction"); + unsigned BaseReg = getBaseAddressRegister(BaseStore); + InstrGroup Other; + + Group.push_back(BaseStore); + + for (auto I = Begin; I != End; ++I) { + MachineInstr *MI = *I; + if (!MI) + continue; + + if (handledStoreType(MI)) { + // If this store instruction is aliased with anything already in the + // group, terminate the group now. + if (instrAliased(Group, getStoreTarget(MI))) + return; + // If this store is aliased to any of the memory instructions we have + // seen so far (that are not a part of this group), terminate the group. + if (instrAliased(Other, getStoreTarget(MI))) + return; + + unsigned BR = getBaseAddressRegister(MI); + if (BR == BaseReg) { + Group.push_back(MI); + *I = 0; + continue; + } + } + + // Assume calls are aliased to everything. + if (MI->isCall() || MI->hasUnmodeledSideEffects()) + return; + + if (MI->mayLoad() || MI->mayStore()) { + if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI)) + return; + Other.push_back(MI); + } + } // for +} + + +// Check if store instructions S1 and S2 are adjacent. More precisely, +// S2 has to access memory immediately following that accessed by S1. +bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1, + const MachineInstr *S2) { + if (!handledStoreType(S1) || !handledStoreType(S2)) + return false; + + const MachineMemOperand &S1MO = getStoreTarget(S1); + + // Currently only handling immediate stores. + int Off1 = S1->getOperand(1).getImm(); + int Off2 = S2->getOperand(1).getImm(); + + return (Off1 >= 0) ? Off1+S1MO.getSize() == unsigned(Off2) + : int(Off1+S1MO.getSize()) == Off2; +} + + +/// Given a sequence of adjacent stores, and a maximum size of a single wide +/// store, pick a group of stores that can be replaced by a single store +/// of size not exceeding MaxSize. The selected sequence will be recorded +/// in OG ("old group" of instructions). +/// OG should be empty on entry, and should be left empty if the function +/// fails. +bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin, + InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize, + unsigned MaxSize) { + assert(Begin != End && "No instructions to analyze"); + assert(OG.empty() && "Old group not empty on entry"); + + if (std::distance(Begin, End) <= 1) + return false; + + MachineInstr *FirstMI = *Begin; + assert(!FirstMI->memoperands_empty() && "Expecting some memory operands"); + const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI); + unsigned Alignment = FirstMMO.getAlignment(); + unsigned SizeAccum = FirstMMO.getSize(); + unsigned FirstOffset = getStoreOffset(FirstMI); + + // The initial value of SizeAccum should always be a power of 2. + assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2"); + + // If the size of the first store equals to or exceeds the limit, do nothing. + if (SizeAccum >= MaxSize) + return false; + + // If the size of the first store is greater than or equal to the address + // stored to, then the store cannot be made any wider. + if (SizeAccum >= Alignment) + return false; + + // The offset of a store will put restrictions on how wide the store can be. + // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0. + // If the first store already exhausts the offset limits, quit. Test this + // by checking if the next wider size would exceed the limit. + if ((2*SizeAccum-1) & FirstOffset) + return false; + + OG.push_back(FirstMI); + MachineInstr *S1 = FirstMI, *S2 = *(Begin+1); + InstrGroup::iterator I = Begin+1; + + // Pow2Num will be the largest number of elements in OG such that the sum + // of sizes of stores 0...Pow2Num-1 will be a power of 2. + unsigned Pow2Num = 1; + unsigned Pow2Size = SizeAccum; + + // Be greedy: keep accumulating stores as long as they are to adjacent + // memory locations, and as long as the total number of bytes stored + // does not exceed the limit (MaxSize). + // Keep track of when the total size covered is a power of 2, since + // this is a size a single store can cover. + while (I != End) { + S2 = *I; + // Stores are sorted, so if S1 and S2 are not adjacent, there won't be + // any other store to fill the "hole". + if (!storesAreAdjacent(S1, S2)) + break; + + unsigned S2Size = getStoreTarget(S2).getSize(); + if (SizeAccum + S2Size > std::min(MaxSize, Alignment)) + break; + + OG.push_back(S2); + SizeAccum += S2Size; + if (isPowerOf2_32(SizeAccum)) { + Pow2Num = OG.size(); + Pow2Size = SizeAccum; + } + if ((2*Pow2Size-1) & FirstOffset) + break; + + S1 = S2; + ++I; + } + + // The stores don't add up to anything that can be widened. Clean up. + if (Pow2Num <= 1) { + OG.clear(); + return false; + } + + // Only leave the stored being widened. + OG.resize(Pow2Num); + TotalSize = Pow2Size; + return true; +} + + +/// Given an "old group" OG of stores, create a "new group" NG of instructions +/// to replace them. Ideally, NG would only have a single instruction in it, +/// but that may only be possible for store-immediate. +bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, + unsigned TotalSize) { + // XXX Current limitations: + // - only expect stores of immediate values in OG, + // - only handle a TotalSize of up to 4. + + if (TotalSize > 4) + return false; + + unsigned Acc = 0; // Value accumulator. + unsigned Shift = 0; + + for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) { + MachineInstr *MI = *I; + const MachineMemOperand &MMO = getStoreTarget(MI); + MachineOperand &SO = MI->getOperand(2); // Source. + assert(SO.isImm() && "Expecting an immediate operand"); + + unsigned NBits = MMO.getSize()*8; + unsigned Mask = (0xFFFFFFFFU >> (32-NBits)); + unsigned Val = (SO.getImm() & Mask) << Shift; + Acc |= Val; + Shift += NBits; + } + + + MachineInstr *FirstSt = OG.front(); + DebugLoc DL = OG.back()->getDebugLoc(); + const MachineMemOperand &OldM = getStoreTarget(FirstSt); + MachineMemOperand *NewM = + MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(), + TotalSize, OldM.getAlignment(), + OldM.getAAInfo()); + + if (Acc < 0x10000) { + // Create mem[hw] = #Acc + unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io : + (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0; + assert(WOpc && "Unexpected size"); + + int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc); + const MCInstrDesc &StD = TII->get(WOpc); + MachineOperand &MR = FirstSt->getOperand(0); + int64_t Off = FirstSt->getOperand(1).getImm(); + MachineInstr *StI = BuildMI(*MF, DL, StD) + .addReg(MR.getReg(), getKillRegState(MR.isKill())) + .addImm(Off) + .addImm(Val); + StI->addMemOperand(*MF, NewM); + NG.push_back(StI); + } else { + // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg + const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi); + const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF); + unsigned VReg = MF->getRegInfo().createVirtualRegister(RC); + MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg) + .addImm(int(Acc)); + NG.push_back(TfrI); + + unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io : + (TotalSize == 4) ? Hexagon::S2_storeri_io : 0; + assert(WOpc && "Unexpected size"); + + const MCInstrDesc &StD = TII->get(WOpc); + MachineOperand &MR = FirstSt->getOperand(0); + int64_t Off = FirstSt->getOperand(1).getImm(); + MachineInstr *StI = BuildMI(*MF, DL, StD) + .addReg(MR.getReg(), getKillRegState(MR.isKill())) + .addImm(Off) + .addReg(VReg, RegState::Kill); + StI->addMemOperand(*MF, NewM); + NG.push_back(StI); + } + + return true; +} + + +// Replace instructions from the old group OG with instructions from the +// new group NG. Conceptually, remove all instructions in OG, and then +// insert all instructions in NG, starting at where the first instruction +// from OG was (in the order in which they appeared in the basic block). +// (The ordering in OG does not have to match the order in the basic block.) +bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) { + DEBUG({ + dbgs() << "Replacing:\n"; + for (auto I : OG) + dbgs() << " " << *I; + dbgs() << "with\n"; + for (auto I : NG) + dbgs() << " " << *I; + }); + + MachineBasicBlock *MBB = OG.back()->getParent(); + MachineBasicBlock::iterator InsertAt = MBB->end(); + + // Need to establish the insertion point. The best one is right before + // the first store in the OG, but in the order in which the stores occur + // in the program list. Since the ordering in OG does not correspond + // to the order in the program list, we need to do some work to find + // the insertion point. + + // Create a set of all instructions in OG (for quick lookup). + SmallPtrSet InstrSet; + for (auto I : OG) + InstrSet.insert(I); + + // Traverse the block, until we hit an instruction from OG. + for (auto &I : *MBB) { + if (InstrSet.count(&I)) { + InsertAt = I; + break; + } + } + + assert((InsertAt != MBB->end()) && "Cannot locate any store from the group"); + + bool AtBBStart = false; + + // InsertAt points at the first instruction that will be removed. We need + // to move it out of the way, so it remains valid after removing all the + // old stores, and so we are able to recover it back to the proper insertion + // position. + if (InsertAt != MBB->begin()) + --InsertAt; + else + AtBBStart = true; + + for (auto I : OG) + I->eraseFromParent(); + + if (!AtBBStart) + ++InsertAt; + else + InsertAt = MBB->begin(); + + for (auto I : NG) + MBB->insert(InsertAt, I); + + return true; +} + + +// Break up the group into smaller groups, each of which can be replaced by +// a single wide store. Widen each such smaller group and replace the old +// instructions with the widened ones. +bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) { + bool Changed = false; + InstrGroup::iterator I = Group.begin(), E = Group.end(); + InstrGroup OG, NG; // Old and new groups. + unsigned CollectedSize; + + while (I != E) { + OG.clear(); + NG.clear(); + + bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) && + createWideStores(OG, NG, CollectedSize) && + replaceStores(OG, NG); + if (!Succ) + continue; + + assert(OG.size() > 1 && "Created invalid group"); + assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements"); + I += OG.size()-1; + + Changed = true; + } + + return Changed; +} + + +// Process a single basic block: create the store groups, and replace them +// with the widened stores, if possible. Processing of each basic block +// is independent from processing of any other basic block. This transfor- +// mation could be stopped after having processed any basic block without +// any ill effects (other than not having performed widening in the unpro- +// cessed blocks). Also, the basic blocks can be processed in any order. +bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) { + InstrGroupList SGs; + bool Changed = false; + + createStoreGroups(MBB, SGs); + + auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool { + return getStoreOffset(A) < getStoreOffset(B); + }; + for (auto &G : SGs) { + assert(G.size() > 1 && "Store group with fewer than 2 elements"); + std::sort(G.begin(), G.end(), Less); + + Changed |= processStoreGroup(G); + } + + return Changed; +} + + +bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) { + MF = &MFn; + auto &ST = MFn.getSubtarget(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + MRI = &MFn.getRegInfo(); + AA = &getAnalysis().getAAResults(); + + bool Changed = false; + + for (auto &B : MFn) + Changed |= processBasicBlock(B); + + return Changed; +} + + +FunctionPass *llvm::createHexagonStoreWidening() { + return new HexagonStoreWidening(); +} + diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index cd482b3e3af1..aa0efd4f65e0 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -16,6 +16,8 @@ #include "HexagonRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include + using namespace llvm; #define DEBUG_TYPE "hexagon-subtarget" @@ -24,49 +26,65 @@ using namespace llvm; #define GET_SUBTARGETINFO_TARGET_DESC #include "HexagonGenSubtargetInfo.inc" -static cl::opt -EnableV3("enable-hexagon-v3", cl::Hidden, - cl::desc("Enable Hexagon V3 instructions.")); +static cl::opt EnableMemOps("enable-hexagon-memops", + cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true), + cl::desc("Generate V4 MEMOP in code generation for Hexagon target")); -static cl::opt -EnableMemOps( - "enable-hexagon-memops", - cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true), - cl::desc( - "Generate V4 MEMOP in code generation for Hexagon target")); +static cl::opt DisableMemOps("disable-hexagon-memops", + cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false), + cl::desc("Do not generate V4 MEMOP in code generation for Hexagon target")); -static cl::opt -DisableMemOps( - "disable-hexagon-memops", - cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false), - cl::desc( - "Do not generate V4 MEMOP in code generation for Hexagon target")); +static cl::opt EnableIEEERndNear("enable-hexagon-ieee-rnd-near", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Generate non-chopped conversion from fp to int.")); -static cl::opt -EnableIEEERndNear( - "enable-hexagon-ieee-rnd-near", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Generate non-chopped conversion from fp to int.")); +static cl::opt EnableBSBSched("enable-bsb-sched", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + +static cl::opt EnableHexagonHVXDouble("enable-hexagon-hvx-double", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enable Hexagon Double Vector eXtensions")); + +static cl::opt EnableHexagonHVX("enable-hexagon-hvx", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enable Hexagon Vector eXtensions")); static cl::opt DisableHexagonMISched("disable-hexagon-misched", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Disable Hexagon MI Scheduling")); + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon MI Scheduling")); + +void HexagonSubtarget::initializeEnvironment() { + UseMemOps = false; + ModeIEEERndNear = false; + UseBSBScheduling = false; +} HexagonSubtarget & HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { - // If the programmer has not specified a Hexagon version, default to -mv4. - if (CPUString.empty()) - CPUString = "hexagonv4"; + CPUString = HEXAGON_MC::selectHexagonCPU(getTargetTriple(), CPU); - if (CPUString == "hexagonv4") { - HexagonArchVersion = V4; - } else if (CPUString == "hexagonv5") { - HexagonArchVersion = V5; - } else { + static std::map CpuTable { + { "hexagonv4", V4 }, + { "hexagonv5", V5 }, + { "hexagonv55", V55 }, + { "hexagonv60", V60 }, + }; + + auto foundIt = CpuTable.find(CPUString); + if (foundIt != CpuTable.end()) + HexagonArchVersion = foundIt->second; + else llvm_unreachable("Unrecognized Hexagon processor version"); - } + UseHVXOps = false; + UseHVXDblOps = false; ParseSubtargetFeatures(CPUString, FS); + + if (EnableHexagonHVX.getPosition()) + UseHVXOps = EnableHexagonHVX; + if (EnableHexagonHVXDouble.getPosition()) + UseHVXDblOps = EnableHexagonHVXDouble; + return *this; } @@ -76,6 +94,8 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), FrameLowering() { + initializeEnvironment(); + // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); @@ -91,6 +111,8 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, ModeIEEERndNear = true; else ModeIEEERndNear = false; + + UseBSBScheduling = hasV60TOps() && EnableBSBSched; } // Pin the vtable to this file. diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h index 34cdad786f82..c7ae139c4346 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.h +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -34,15 +34,19 @@ namespace llvm { class HexagonSubtarget : public HexagonGenSubtargetInfo { virtual void anchor(); - bool UseMemOps; + bool UseMemOps, UseHVXOps, UseHVXDblOps; bool ModeIEEERndNear; public: enum HexagonArchEnum { - V4, V5 + V4, V5, V55, V60 }; HexagonArchEnum HexagonArchVersion; + /// True if the target should use Back-Skip-Back scheduling. This is the + /// default for V60. + bool UseBSBScheduling; + private: std::string CPUString; HexagonInstrInfo InstrInfo; @@ -50,6 +54,7 @@ private: HexagonSelectionDAGInfo TSInfo; HexagonFrameLowering FrameLowering; InstrItineraryData InstrItins; + void initializeEnvironment(); public: HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, @@ -84,7 +89,16 @@ public: bool useMemOps() const { return UseMemOps; } bool hasV5TOps() const { return getHexagonArchVersion() >= V5; } bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; } + bool hasV55TOps() const { return getHexagonArchVersion() >= V55; } + bool hasV55TOpsOnly() const { return getHexagonArchVersion() == V55; } + bool hasV60TOps() const { return getHexagonArchVersion() >= V60; } + bool hasV60TOpsOnly() const { return getHexagonArchVersion() == V60; } bool modeIEEERndNear() const { return ModeIEEERndNear; } + bool useHVXOps() const { return UseHVXOps; } + bool useHVXDblOps() const { return UseHVXOps && UseHVXDblOps; } + bool useHVXSglOps() const { return UseHVXOps && !UseHVXDblOps; } + + bool useBSBScheduling() const { return UseBSBScheduling; } bool enableMachineScheduler() const override; // Always use the TargetLowering default scheduler. // FIXME: This will use the vliw scheduler which is probably just hurting @@ -98,7 +112,7 @@ public: return Hexagon_SMALL_DATA_THRESHOLD; } const HexagonArchEnum &getHexagonArchVersion() const { - return HexagonArchVersion; + return HexagonArchVersion; } }; diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index b50442969a29..9dccd696c989 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -16,12 +16,12 @@ #include "HexagonISelLowering.h" #include "HexagonMachineScheduler.h" #include "HexagonTargetObjectFile.h" +#include "HexagonTargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -33,10 +33,16 @@ static cl::opt DisableHexagonCFGOpt("disable-hexagon-cfgopt", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon CFG Optimization")); +static cl::opt DisableStoreWidening("disable-store-widen", + cl::Hidden, cl::init(false), cl::desc("Disable store widening")); + static cl::opt EnableExpandCondsets("hexagon-expand-condsets", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Early expansion of MUX")); +static cl::opt EnableEarlyIf("hexagon-eif", cl::init(true), cl::Hidden, + cl::ZeroOrMore, cl::desc("Enable early if-conversion")); + static cl::opt EnableGenInsert("hexagon-insert", cl::init(true), cl::Hidden, cl::desc("Generate \"insert\" instructions")); @@ -46,10 +52,22 @@ static cl::opt EnableCommGEP("hexagon-commgep", cl::init(true), static cl::opt EnableGenExtract("hexagon-extract", cl::init(true), cl::Hidden, cl::desc("Generate \"extract\" instructions")); +static cl::opt EnableGenMux("hexagon-mux", cl::init(true), cl::Hidden, + cl::desc("Enable converting conditional transfers into MUX instructions")); + static cl::opt EnableGenPred("hexagon-gen-pred", cl::init(true), cl::Hidden, cl::desc("Enable conversion of arithmetic operations to " "predicate instructions")); +static cl::opt DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden, + cl::desc("Disable splitting double registers")); + +static cl::opt EnableBitSimplify("hexagon-bit", cl::init(true), + cl::Hidden, cl::desc("Bit simplification")); + +static cl::opt EnableLoopResched("hexagon-loop-resched", cl::init(true), + cl::Hidden, cl::desc("Loop rescheduling")); + /// HexagonTargetMachineModule - Note that this is used on hosts that /// cannot link in a library unless there are references into the /// library. In particular, it seems that it is not possible to get @@ -72,23 +90,30 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", createVLIWMachineSched); namespace llvm { + FunctionPass *createHexagonBitSimplify(); + FunctionPass *createHexagonCallFrameInformation(); FunctionPass *createHexagonCFGOptimizer(); FunctionPass *createHexagonCommonGEP(); FunctionPass *createHexagonCopyToCombine(); + FunctionPass *createHexagonEarlyIfConversion(); FunctionPass *createHexagonExpandCondsets(); FunctionPass *createHexagonExpandPredSpillCode(); FunctionPass *createHexagonFixupHwLoops(); FunctionPass *createHexagonGenExtract(); FunctionPass *createHexagonGenInsert(); + FunctionPass *createHexagonGenMux(); FunctionPass *createHexagonGenPredicate(); FunctionPass *createHexagonHardwareLoops(); FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOpt::Level OptLevel); + FunctionPass *createHexagonLoopRescheduling(); FunctionPass *createHexagonNewValueJump(); + FunctionPass *createHexagonOptimizeSZextends(); FunctionPass *createHexagonPacketizer(); FunctionPass *createHexagonPeephole(); - FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM); FunctionPass *createHexagonSplitConst32AndConst64(); + FunctionPass *createHexagonSplitDoubleRegs(); + FunctionPass *createHexagonStoreWidening(); } // end namespace llvm; /// HexagonTargetMachine ctor - Create an ILP32 architecture model. @@ -101,13 +126,46 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, "e-m:e-p:32:32-i1:32-i64:64-a:0-n32", TT, CPU, FS, - Options, RM, CM, OL), - TLOF(make_unique()), - Subtarget(TT, CPU, FS, *this) { - initAsmInfo(); + : LLVMTargetMachine(T, "e-m:e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-" + "i1:8:8-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-" + "n16:32", TT, CPU, FS, Options, RM, CM, OL), + TLOF(make_unique()) { + initAsmInfo(); } +const HexagonSubtarget * +HexagonTargetMachine::getSubtargetImpl(const Function &F) const { + AttributeSet FnAttrs = F.getAttributes(); + Attribute CPUAttr = + FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu"); + Attribute FSAttr = + FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features"); + + std::string CPU = !CPUAttr.hasAttribute(Attribute::None) + ? CPUAttr.getValueAsString().str() + : TargetCPU; + std::string FS = !FSAttr.hasAttribute(Attribute::None) + ? FSAttr.getValueAsString().str() + : TargetFS; + + auto &I = SubtargetMap[CPU + FS]; + if (!I) { + // This needs to be done before we create a new subtarget since any + // creation will depend on the TM and the code generation flags on the + // function that reside in TargetOptions. + resetTargetOptions(F); + I = llvm::make_unique(TargetTriple, CPU, FS, *this); + } + return I.get(); +} + +TargetIRAnalysis HexagonTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis([this](const Function &F) { + return TargetTransformInfo(HexagonTTIImpl(this, F)); + }); +} + + HexagonTargetMachine::~HexagonTargetMachine() {} namespace { @@ -166,7 +224,7 @@ bool HexagonPassConfig::addInstSelector() { bool NoOpt = (getOptLevel() == CodeGenOpt::None); if (!NoOpt) - addPass(createHexagonRemoveExtendArgs(TM)); + addPass(createHexagonOptimizeSZextends()); addPass(createHexagonISelDag(TM, getOptLevel())); @@ -174,19 +232,33 @@ bool HexagonPassConfig::addInstSelector() { // Create logical operations on predicate registers. if (EnableGenPred) addPass(createHexagonGenPredicate(), false); + // Rotate loops to expose bit-simplification opportunities. + if (EnableLoopResched) + addPass(createHexagonLoopRescheduling(), false); + // Split double registers. + if (!DisableHSDR) + addPass(createHexagonSplitDoubleRegs()); + // Bit simplification. + if (EnableBitSimplify) + addPass(createHexagonBitSimplify(), false); addPass(createHexagonPeephole()); printAndVerify("After hexagon peephole pass"); if (EnableGenInsert) addPass(createHexagonGenInsert(), false); + if (EnableEarlyIf) + addPass(createHexagonEarlyIfConversion(), false); } return false; } void HexagonPassConfig::addPreRegAlloc() { - if (getOptLevel() != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) { + if (!DisableStoreWidening) + addPass(createHexagonStoreWidening(), false); if (!DisableHardwareLoops) addPass(createHexagonHardwareLoops(), false); + } } void HexagonPassConfig::addPostRegAlloc() { @@ -215,6 +287,13 @@ void HexagonPassConfig::addPreEmitPass() { if (!NoOpt) { if (!DisableHardwareLoops) addPass(createHexagonFixupHwLoops(), false); + // Generate MUX from pairs of conditional transfers. + if (EnableGenMux) + addPass(createHexagonGenMux(), false); + addPass(createHexagonPacketizer(), false); } + + // Add CFI instructions if necessary. + addPass(createHexagonCallFrameInformation(), false); } diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index 115eadb98c33..968814b3ea32 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -16,6 +16,7 @@ #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" +#include "HexagonTargetObjectFile.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -24,7 +25,7 @@ class Module; class HexagonTargetMachine : public LLVMTargetMachine { std::unique_ptr TLOF; - HexagonSubtarget Subtarget; + mutable StringMap> SubtargetMap; public: HexagonTargetMachine(const Target &T, const Triple &TT, StringRef CPU, @@ -32,20 +33,18 @@ public: Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); ~HexagonTargetMachine() override; - const HexagonSubtarget *getSubtargetImpl(const Function &) const override { - return &Subtarget; - } + const HexagonSubtarget *getSubtargetImpl(const Function &F) const override; + static unsigned getModuleMatchQuality(const Module &M); TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + TargetIRAnalysis getTargetIRAnalysis() override; - TargetLoweringObjectFile *getObjFileLowering() const override { - return TLOF.get(); + HexagonTargetObjectFile *getObjFileLowering() const override { + return static_cast(TLOF.get()); } }; -extern bool flag_aligned_memcpy; - } // end namespace llvm #endif diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index 4ea0e0d11998..ccca62021f5b 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -73,9 +73,10 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, if (!GVA) return false; - if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) { + if (Kind.isBSS() || Kind.isData() || Kind.isCommon()) { Type *Ty = GV->getType()->getElementType(); - return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty)); + return IsInSmallSection( + GV->getParent()->getDataLayout().getTypeAllocSize(Ty)); } return false; @@ -89,7 +90,7 @@ HexagonTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, // Handle Small Section classification here. if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind)) return SmallBSSSection; - if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind)) + if (Kind.isData() && IsGlobalInSmallSection(GV, TM, Kind)) return SmallDataSection; // Otherwise, we work the same as ELF. diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp new file mode 100644 index 000000000000..a05443eb83b8 --- /dev/null +++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -0,0 +1,38 @@ +//===-- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// Hexagon target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#include "HexagonTargetTransformInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "hexagontti" + +TargetTransformInfo::PopcntSupportKind +HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const { + // Return Fast Hardware support as every input < 64 bits will be promoted + // to 64 bits. + return TargetTransformInfo::PSK_FastHardware; +} + +// The Hexagon target can unroll loops with run-time trip counts. +void HexagonTTIImpl::getUnrollingPreferences(Loop *L, + TTI::UnrollingPreferences &UP) { + UP.Runtime = UP.Partial = true; +} + +unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const { + return vector ? 0 : 32; +} diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/lib/Target/Hexagon/HexagonTargetTransformInfo.h new file mode 100644 index 000000000000..71ae17a19e5f --- /dev/null +++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -0,0 +1,70 @@ +//===-- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// Hexagon target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H + +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +class HexagonTTIImpl : public BasicTTIImplBase { + typedef BasicTTIImplBase BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const HexagonSubtarget *ST; + const HexagonTargetLowering *TLI; + + const HexagonSubtarget *getST() const { return ST; } + const HexagonTargetLowering *getTLI() const { return TLI; } + +public: + explicit HexagonTTIImpl(const HexagonTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + // Provide value semantics. MSVC requires that we spell all of these out. + HexagonTTIImpl(const HexagonTTIImpl &Arg) + : BaseT(static_cast(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} + HexagonTTIImpl(HexagonTTIImpl &&Arg) + : BaseT(std::move(static_cast(Arg))), ST(std::move(Arg.ST)), + TLI(std::move(Arg.TLI)) {} + + /// \name Scalar TTI Implementations + /// @{ + + TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; + + // The Hexagon target can unroll loops with run-time trip counts. + void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + unsigned getNumberOfRegisters(bool vector) const; + + /// @} +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index b91a3f6f8c6c..81850548bb6e 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -16,35 +16,19 @@ // prune the dependence. // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/DFAPacketizer.h" -#include "Hexagon.h" -#include "HexagonMachineFunctionInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "HexagonVLIWPacketizer.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/ScheduleDAGInstrs.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" #include #include @@ -52,9 +36,22 @@ using namespace llvm; #define DEBUG_TYPE "packets" +static cl::opt DisablePacketizer("disable-packetizer", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon packetizer pass")); + static cl::opt PacketizeVolatiles("hexagon-packetize-volatiles", - cl::ZeroOrMore, cl::Hidden, cl::init(true), - cl::desc("Allow non-solo packetization of volatile memory references")); + cl::ZeroOrMore, cl::Hidden, cl::init(true), + cl::desc("Allow non-solo packetization of volatile memory references")); + +static cl::opt EnableGenAllInsnClass("enable-gen-insn", cl::init(false), + cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC")); + +static cl::opt DisableVecDblNVStores("disable-vecdbl-nv-stores", + cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc("Disable vector double new-value-stores")); + +extern cl::opt ScheduleInlineAsm; namespace llvm { FunctionPass *createHexagonPacketizer(); @@ -64,7 +61,6 @@ namespace llvm { namespace { class HexagonPacketizer : public MachineFunctionPass { - public: static char ID; HexagonPacketizer() : MachineFunctionPass(ID) { @@ -73,103 +69,25 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const override { return "Hexagon Packetizer"; } - bool runOnMachineFunction(MachineFunction &Fn) override; + + private: + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; }; + char HexagonPacketizer::ID = 0; - - class HexagonPacketizerList : public VLIWPacketizerList { - - private: - - // Has the instruction been promoted to a dot-new instruction. - bool PromotedToDotNew; - - // Has the instruction been glued to allocframe. - bool GlueAllocframeStore; - - // Has the feeder instruction been glued to new value jump. - bool GlueToNewValueJump; - - // Check if there is a dependence between some instruction already in this - // packet and this instruction. - bool Dependence; - - // Only check for dependence if there are resources available to - // schedule this instruction. - bool FoundSequentialDependence; - - /// \brief A handle to the branch probability pass. - const MachineBranchProbabilityInfo *MBPI; - - // Track MIs with ignored dependece. - std::vector IgnoreDepMIs; - - public: - // Ctor. - HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, - const MachineBranchProbabilityInfo *MBPI); - - // initPacketizerState - initialize some internal flags. - void initPacketizerState() override; - - // ignorePseudoInstruction - Ignore bundling of pseudo instructions. - bool ignorePseudoInstruction(MachineInstr *MI, - MachineBasicBlock *MBB) override; - - // isSoloInstruction - return true if instruction MI can not be packetized - // with any other instruction, which means that MI itself is a packet. - bool isSoloInstruction(MachineInstr *MI) override; - - // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ - // together. - bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override; - - // isLegalToPruneDependencies - Is it legal to prune dependece between SUI - // and SUJ. - bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override; - - MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override; - private: - bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg); - bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType, - MachineBasicBlock::iterator &MII, - const TargetRegisterClass* RC); - bool CanPromoteToDotNew(MachineInstr *MI, SUnit *PacketSU, unsigned DepReg, - const std::map &MIToSUnit, - MachineBasicBlock::iterator &MII, - const TargetRegisterClass *RC); - bool - CanPromoteToNewValue(MachineInstr *MI, SUnit *PacketSU, unsigned DepReg, - const std::map &MIToSUnit, - MachineBasicBlock::iterator &MII); - bool CanPromoteToNewValueStore( - MachineInstr *MI, MachineInstr *PacketMI, unsigned DepReg, - const std::map &MIToSUnit); - bool DemoteToDotOld(MachineInstr *MI); - bool ArePredicatesComplements( - MachineInstr *MI1, MachineInstr *MI2, - const std::map &MIToSUnit); - bool RestrictingDepExistInPacket(MachineInstr *, unsigned, - const std::map &); - bool isNewifiable(MachineInstr* MI); - bool isCondInst(MachineInstr* MI); - bool tryAllocateResourcesForConstExt(MachineInstr* MI); - bool canReserveResourcesForConstExt(MachineInstr *MI); - void reserveResourcesForConstExt(MachineInstr* MI); - bool isNewValueInst(MachineInstr* MI); - }; } INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer", @@ -177,26 +95,93 @@ INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer", INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer", false, false) -// HexagonPacketizerList Ctor. -HexagonPacketizerList::HexagonPacketizerList( - MachineFunction &MF, MachineLoopInfo &MLI, - const MachineBranchProbabilityInfo *MBPI) - : VLIWPacketizerList(MF, MLI, true) { - this->MBPI = MBPI; +HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, + MachineLoopInfo &MLI, AliasAnalysis *AA, + const MachineBranchProbabilityInfo *MBPI) + : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) { + HII = MF.getSubtarget().getInstrInfo(); + HRI = MF.getSubtarget().getRegisterInfo(); } -bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) { - const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); - MachineLoopInfo &MLI = getAnalysis(); - const MachineBranchProbabilityInfo *MBPI = - &getAnalysis(); +// Check if FirstI modifies a register that SecondI reads. +static bool hasWriteToReadDep(const MachineInstr *FirstI, + const MachineInstr *SecondI, const TargetRegisterInfo *TRI) { + for (auto &MO : FirstI->operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned R = MO.getReg(); + if (SecondI->readsRegister(R, TRI)) + return true; + } + return false; +} + + +static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI, + MachineBasicBlock::iterator BundleIt, bool Before) { + MachineBasicBlock::instr_iterator InsertPt; + if (Before) + InsertPt = BundleIt.getInstrIterator(); + else + InsertPt = std::next(BundleIt).getInstrIterator(); + + MachineBasicBlock &B = *MI->getParent(); + // The instruction should at least be bundled with the preceding instruction + // (there will always be one, i.e. BUNDLE, if nothing else). + assert(MI->isBundledWithPred()); + if (MI->isBundledWithSucc()) { + MI->clearFlag(MachineInstr::BundledSucc); + MI->clearFlag(MachineInstr::BundledPred); + } else { + // If it's not bundled with the successor (i.e. it is the last one + // in the bundle), then we can simply unbundle it from the predecessor, + // which will take care of updating the predecessor's flag. + MI->unbundleFromPred(); + } + B.splice(InsertPt, &B, MI); + + // Get the size of the bundle without asserting. + MachineBasicBlock::const_instr_iterator I(BundleIt); + MachineBasicBlock::const_instr_iterator E = B.instr_end(); + unsigned Size = 0; + for (++I; I != E && I->isBundledWithPred(); ++I) + ++Size; + + // If there are still two or more instructions, then there is nothing + // else to be done. + if (Size > 1) + return BundleIt; + + // Otherwise, extract the single instruction out and delete the bundle. + MachineBasicBlock::iterator NextIt = std::next(BundleIt); + MachineInstr *SingleI = BundleIt->getNextNode(); + SingleI->unbundleFromPred(); + assert(!SingleI->isBundledWithSucc()); + BundleIt->eraseFromParent(); + return NextIt; +} + + +bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { + if (DisablePacketizer) + return false; + + HII = MF.getSubtarget().getInstrInfo(); + HRI = MF.getSubtarget().getRegisterInfo(); + auto &MLI = getAnalysis(); + auto *AA = &getAnalysis().getAAResults(); + auto *MBPI = &getAnalysis(); + + if (EnableGenAllInsnClass) + HII->genAllInsnTimingClasses(MF); + // Instantiate the packetizer. - HexagonPacketizerList Packetizer(Fn, MLI, MBPI); + HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI); // DFA state table should not be empty. assert(Packetizer.getResourceTracker() && "Empty DFA table!"); @@ -211,162 +196,107 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) { // dependence between Insn 0 and Insn 2. This can lead to incorrect // packetization // - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) { - MachineBasicBlock::iterator End = MBB->end(); - MachineBasicBlock::iterator MI = MBB->begin(); + for (auto &MB : MF) { + auto End = MB.end(); + auto MI = MB.begin(); while (MI != End) { + auto NextI = std::next(MI); if (MI->isKill()) { - MachineBasicBlock::iterator DeleteMI = MI; - ++MI; - MBB->erase(DeleteMI); - End = MBB->end(); - continue; + MB.erase(MI); + End = MB.end(); } - ++MI; + MI = NextI; } } // Loop over all of the basic blocks. - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) { - // Find scheduling regions and schedule / packetize each region. - unsigned RemainingCount = MBB->size(); - for(MachineBasicBlock::iterator RegionEnd = MBB->end(); - RegionEnd != MBB->begin();) { - // The next region starts above the previous region. Look backward in the - // instruction stream until we find the nearest boundary. - MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingCount) { - if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn)) - break; - } - I = MBB->begin(); + for (auto &MB : MF) { + auto Begin = MB.begin(), End = MB.end(); + while (Begin != End) { + // First the first non-boundary starting from the end of the last + // scheduling region. + MachineBasicBlock::iterator RB = Begin; + while (RB != End && HII->isSchedulingBoundary(RB, &MB, MF)) + ++RB; + // First the first boundary starting from the beginning of the new + // region. + MachineBasicBlock::iterator RE = RB; + while (RE != End && !HII->isSchedulingBoundary(RE, &MB, MF)) + ++RE; + // Add the scheduling boundary if it's not block end. + if (RE != End) + ++RE; + // If RB == End, then RE == End. + if (RB != End) + Packetizer.PacketizeMIs(&MB, RB, RE); - // Skip empty scheduling regions. - if (I == RegionEnd) { - RegionEnd = std::prev(RegionEnd); - --RemainingCount; - continue; - } - // Skip regions with one instruction. - if (I == std::prev(RegionEnd)) { - RegionEnd = std::prev(RegionEnd); - continue; - } - - Packetizer.PacketizeMIs(MBB, I, RegionEnd); - RegionEnd = I; + Begin = RE; } } + Packetizer.unpacketizeSoloInstrs(MF); return true; } -static bool IsIndirectCall(MachineInstr* MI) { - return MI->getOpcode() == Hexagon::J2_callr; +// Reserve resources for a constant extender. Trigger an assertion if the +// reservation fails. +void HexagonPacketizerList::reserveResourcesForConstExt() { + if (!tryAllocateResourcesForConstExt(true)) + llvm_unreachable("Resources not available"); } -// Reserve resources for constant extender. Trigure an assertion if -// reservation fail. -void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext), - MI->getDebugLoc()); - - if (ResourceTracker->canReserveResources(PseudoMI)) { - ResourceTracker->reserveResources(PseudoMI); - MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); - } else { - MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); - llvm_unreachable("can not reserve resources for constant extender."); - } - return; +bool HexagonPacketizerList::canReserveResourcesForConstExt() { + return tryAllocateResourcesForConstExt(false); } -bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - assert((QII->isExtended(MI) || QII->isConstExtended(MI)) && - "Should only be called for constant extended instructions"); - MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext), - MI->getDebugLoc()); - bool CanReserve = ResourceTracker->canReserveResources(PseudoMI); - MF.DeleteMachineInstr(PseudoMI); - return CanReserve; +// Allocate resources (i.e. 4 bytes) for constant extender. If succeeded, +// return true, otherwise, return false. +bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) { + auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc()); + bool Avail = ResourceTracker->canReserveResources(ExtMI); + if (Reserve && Avail) + ResourceTracker->reserveResources(ExtMI); + MF.DeleteMachineInstr(ExtMI); + return Avail; } -// Allocate resources (i.e. 4 bytes) for constant extender. If succeed, return -// true, otherwise, return false. -bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext), - MI->getDebugLoc()); - if (ResourceTracker->canReserveResources(PseudoMI)) { - ResourceTracker->reserveResources(PseudoMI); - MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); +bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI, + SDep::Kind DepType, unsigned DepReg) { + // Check for LR dependence. + if (DepReg == HRI->getRARegister()) return true; - } else { - MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); - return false; - } -} - -bool HexagonPacketizerList::IsCallDependent(MachineInstr* MI, - SDep::Kind DepType, - unsigned DepReg) { - - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - const HexagonRegisterInfo *QRI = - (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo(); - - // Check for lr dependence - if (DepReg == QRI->getRARegister()) { - return true; - } - - if (QII->isDeallocRet(MI)) { - if (DepReg == QRI->getFrameRegister() || - DepReg == QRI->getStackRegister()) + if (HII->isDeallocRet(MI)) + if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister()) return true; - } - // Check if this is a predicate dependence - const TargetRegisterClass* RC = QRI->getMinimalPhysRegClass(DepReg); - if (RC == &Hexagon::PredRegsRegClass) { + // Check if this is a predicate dependence. + const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg); + if (RC == &Hexagon::PredRegsRegClass) return true; - } - // - // Lastly check for an operand used in an indirect call - // If we had an attribute for checking if an instruction is an indirect call, - // then we could have avoided this relatively brittle implementation of - // IsIndirectCall() - // - // Assumes that the first operand of the CALLr is the function address - // - if (IsIndirectCall(MI) && (DepType == SDep::Data)) { + // Assumes that the first operand of the CALLr is the function address. + if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) { MachineOperand MO = MI->getOperand(0); - if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) { + if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) return true; - } } return false; } -static bool IsRegDependence(const SDep::Kind DepType) { - return (DepType == SDep::Data || DepType == SDep::Anti || - DepType == SDep::Output); +static bool isRegDependence(const SDep::Kind DepType) { + return DepType == SDep::Data || DepType == SDep::Anti || + DepType == SDep::Output; } -static bool IsDirectJump(MachineInstr* MI) { - return (MI->getOpcode() == Hexagon::J2_jump); +static bool isDirectJump(const MachineInstr* MI) { + return MI->getOpcode() == Hexagon::J2_jump; } -static bool IsSchedBarrier(MachineInstr* MI) { +static bool isSchedBarrier(const MachineInstr* MI) { switch (MI->getOpcode()) { case Hexagon::Y2_barrier: return true; @@ -374,76 +304,127 @@ static bool IsSchedBarrier(MachineInstr* MI) { return false; } -static bool IsControlFlow(MachineInstr* MI) { +static bool isControlFlow(const MachineInstr* MI) { return (MI->getDesc().isTerminator() || MI->getDesc().isCall()); } -static bool IsLoopN(MachineInstr *MI) { - return (MI->getOpcode() == Hexagon::J2_loop0i || - MI->getOpcode() == Hexagon::J2_loop0r); -} -/// DoesModifyCalleeSavedReg - Returns true if the instruction modifies a -/// callee-saved register. -static bool DoesModifyCalleeSavedReg(MachineInstr *MI, +/// Returns true if the instruction modifies a callee-saved register. +static bool doesModifyCalleeSavedReg(const MachineInstr *MI, const TargetRegisterInfo *TRI) { - for (const MCPhysReg *CSR = - TRI->getCalleeSavedRegs(MI->getParent()->getParent()); - *CSR; ++CSR) { - unsigned CalleeSavedReg = *CSR; - if (MI->modifiesRegister(CalleeSavedReg, TRI)) + const MachineFunction &MF = *MI->getParent()->getParent(); + for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) + if (MI->modifiesRegister(*CSR, TRI)) return true; - } return false; } -// Returns true if an instruction can be promoted to .new predicate -// or new-value store. -bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - return isCondInst(MI) || QII->mayBeNewStore(MI); +// TODO: MI->isIndirectBranch() and IsRegisterJump(MI) +// Returns true if an instruction can be promoted to .new predicate or +// new-value store. +bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) { + return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI); } -bool HexagonPacketizerList::isCondInst (MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - const MCInstrDesc& TID = MI->getDesc(); - // bug 5670: until that is fixed, - // this portion is disabled. - if ( TID.isConditionalBranch() // && !IsRegisterJump(MI)) || - || QII->isConditionalTransfer(MI) - || QII->isConditionalALU32(MI) - || QII->isConditionalLoad(MI) - || QII->isConditionalStore(MI)) { - return true; - } - return false; -} - - -// Promote an instructiont to its .new form. -// At this time, we have already made a call to CanPromoteToDotNew -// and made sure that it can *indeed* be promoted. -bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI, - SDep::Kind DepType, MachineBasicBlock::iterator &MII, - const TargetRegisterClass* RC) { - - assert (DepType == SDep::Data); - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - - int NewOpcode; - if (RC == &Hexagon::PredRegsRegClass) - NewOpcode = QII->GetDotNewPredOp(MI, MBPI); - else - NewOpcode = QII->GetDotNewOp(MI); - MI->setDesc(QII->get(NewOpcode)); - +// Promote an instructiont to its .cur form. +// At this time, we have already made a call to canPromoteToDotCur and made +// sure that it can *indeed* be promoted. +bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI, + SDep::Kind DepType, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC) { + assert(DepType == SDep::Data); + int CurOpcode = HII->getDotCurOp(MI); + MI->setDesc(HII->get(CurOpcode)); return true; } -bool HexagonPacketizerList::DemoteToDotOld(MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - int NewOpcode = QII->GetDotOldOp(MI->getOpcode()); - MI->setDesc(QII->get(NewOpcode)); +void HexagonPacketizerList::cleanUpDotCur() { + MachineInstr *MI = NULL; + for (auto BI : CurrentPacketMIs) { + DEBUG(dbgs() << "Cleanup packet has "; BI->dump();); + if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) { + MI = BI; + continue; + } + if (MI) { + for (auto &MO : BI->operands()) + if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg()) + return; + } + } + if (!MI) + return; + // We did not find a use of the CUR, so de-cur it. + MI->setDesc(HII->get(Hexagon::V6_vL32b_ai)); + DEBUG(dbgs() << "Demoted CUR "; MI->dump();); +} + +// Check to see if an instruction can be dot cur. +bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI, + const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, + const TargetRegisterClass *RC) { + if (!HII->isV60VectorInstruction(MI)) + return false; + if (!HII->isV60VectorInstruction(MII)) + return false; + + // Already a dot new instruction. + if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI)) + return false; + + if (!HII->mayBeCurLoad(MI)) + return false; + + // The "cur value" cannot come from inline asm. + if (PacketSU->getInstr()->isInlineAsm()) + return false; + + // Make sure candidate instruction uses cur. + DEBUG(dbgs() << "Can we DOT Cur Vector MI\n"; + MI->dump(); + dbgs() << "in packet\n";); + MachineInstr *MJ = MII; + DEBUG(dbgs() << "Checking CUR against "; MJ->dump();); + unsigned DestReg = MI->getOperand(0).getReg(); + bool FoundMatch = false; + for (auto &MO : MJ->operands()) + if (MO.isReg() && MO.getReg() == DestReg) + FoundMatch = true; + if (!FoundMatch) + return false; + + // Check for existing uses of a vector register within the packet which + // would be affected by converting a vector load into .cur formt. + for (auto BI : CurrentPacketMIs) { + DEBUG(dbgs() << "packet has "; BI->dump();); + if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo())) + return false; + } + + DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump();); + // We can convert the opcode into a .cur. + return true; +} + +// Promote an instruction to its .new form. At this time, we have already +// made a call to canPromoteToDotNew and made sure that it can *indeed* be +// promoted. +bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI, + SDep::Kind DepType, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC) { + assert (DepType == SDep::Data); + int NewOpcode; + if (RC == &Hexagon::PredRegsRegClass) + NewOpcode = HII->getDotNewPredOp(MI, MBPI); + else + NewOpcode = HII->getDotNewOp(MI); + MI->setDesc(HII->get(NewOpcode)); + return true; +} + +bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) { + int NewOpcode = HII->getDotOldOp(MI->getOpcode()); + MI->setDesc(HII->get(NewOpcode)); return true; } @@ -455,175 +436,173 @@ enum PredicateKind { /// Returns true if an instruction is predicated on p0 and false if it's /// predicated on !p0. -static PredicateKind getPredicateSense(MachineInstr* MI, - const HexagonInstrInfo *QII) { - if (!QII->isPredicated(MI)) +static PredicateKind getPredicateSense(const MachineInstr *MI, + const HexagonInstrInfo *HII) { + if (!HII->isPredicated(MI)) return PK_Unknown; - - if (QII->isPredicatedTrue(MI)) + if (HII->isPredicatedTrue(MI)) return PK_True; - return PK_False; } -static MachineOperand& GetPostIncrementOperand(MachineInstr *MI, - const HexagonInstrInfo *QII) { - assert(QII->isPostIncrement(MI) && "Not a post increment operation."); +static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI, + const HexagonInstrInfo *HII) { + assert(HII->isPostIncrement(MI) && "Not a post increment operation."); #ifndef NDEBUG // Post Increment means duplicates. Use dense map to find duplicates in the // list. Caution: Densemap initializes with the minimum of 64 buckets, // whereas there are at most 5 operands in the post increment. - DenseMap DefRegsSet; - for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) - if (MI->getOperand(opNum).isReg() && - MI->getOperand(opNum).isDef()) { - DefRegsSet[MI->getOperand(opNum).getReg()] = 1; - } + DenseSet DefRegsSet; + for (auto &MO : MI->operands()) + if (MO.isReg() && MO.isDef()) + DefRegsSet.insert(MO.getReg()); - for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) - if (MI->getOperand(opNum).isReg() && - MI->getOperand(opNum).isUse()) { - if (DefRegsSet[MI->getOperand(opNum).getReg()]) { - return MI->getOperand(opNum); - } - } + for (auto &MO : MI->operands()) + if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg())) + return MO; #else - if (MI->getDesc().mayLoad()) { + if (MI->mayLoad()) { + const MachineOperand &Op1 = MI->getOperand(1); // The 2nd operand is always the post increment operand in load. - assert(MI->getOperand(1).isReg() && - "Post increment operand has be to a register."); - return (MI->getOperand(1)); + assert(Op1.isReg() && "Post increment operand has be to a register."); + return Op1; } if (MI->getDesc().mayStore()) { + const MachineOperand &Op0 = MI->getOperand(0); // The 1st operand is always the post increment operand in store. - assert(MI->getOperand(0).isReg() && - "Post increment operand has be to a register."); - return (MI->getOperand(0)); + assert(Op0.isReg() && "Post increment operand has be to a register."); + return Op0; } #endif // we should never come here. llvm_unreachable("mayLoad or mayStore not set for Post Increment operation"); } -// get the value being stored -static MachineOperand& GetStoreValueOperand(MachineInstr *MI) { +// Get the value being stored. +static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) { // value being stored is always the last operand. - return (MI->getOperand(MI->getNumOperands()-1)); + return MI->getOperand(MI->getNumOperands()-1); } -// can be new value store? +static bool isLoadAbsSet(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::L4_loadrd_ap: + case Hexagon::L4_loadrb_ap: + case Hexagon::L4_loadrh_ap: + case Hexagon::L4_loadrub_ap: + case Hexagon::L4_loadruh_ap: + case Hexagon::L4_loadri_ap: + return true; + } + return false; +} + +static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) { + assert(isLoadAbsSet(MI)); + return MI->getOperand(1); +} + + +// Can be new value store? // Following restrictions are to be respected in convert a store into // a new value store. // 1. If an instruction uses auto-increment, its address register cannot // be a new-value register. Arch Spec 5.4.2.1 -// 2. If an instruction uses absolute-set addressing mode, -// its address register cannot be a new-value register. -// Arch Spec 5.4.2.1.TODO: This is not enabled as -// as absolute-set address mode patters are not implemented. +// 2. If an instruction uses absolute-set addressing mode, its address +// register cannot be a new-value register. Arch Spec 5.4.2.1. // 3. If an instruction produces a 64-bit result, its registers cannot be used // as new-value registers. Arch Spec 5.4.2.2. -// 4. If the instruction that sets a new-value register is conditional, then +// 4. If the instruction that sets the new-value register is conditional, then // the instruction that uses the new-value register must also be conditional, // and both must always have their predicates evaluate identically. // Arch Spec 5.4.2.3. -// 5. There is an implied restriction of a packet can not have another store, -// if there is a new value store in the packet. Corollary, if there is +// 5. There is an implied restriction that a packet cannot have another store, +// if there is a new value store in the packet. Corollary: if there is // already a store in a packet, there can not be a new value store. // Arch Spec: 3.4.4.2 -bool HexagonPacketizerList::CanPromoteToNewValueStore( - MachineInstr *MI, MachineInstr *PacketMI, unsigned DepReg, - const std::map &MIToSUnit) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; +bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, + const MachineInstr *PacketMI, unsigned DepReg) { // Make sure we are looking at the store, that can be promoted. - if (!QII->mayBeNewStore(MI)) + if (!HII->mayBeNewStore(MI)) return false; - // Make sure there is dependency and can be new value'ed - if (GetStoreValueOperand(MI).isReg() && - GetStoreValueOperand(MI).getReg() != DepReg) + // Make sure there is dependency and can be new value'd. + const MachineOperand &Val = getStoreValueOperand(MI); + if (Val.isReg() && Val.getReg() != DepReg) return false; - const HexagonRegisterInfo *QRI = - (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo(); const MCInstrDesc& MCID = PacketMI->getDesc(); - // first operand is always the result - const TargetRegisterClass* PacketRC = QII->getRegClass(MCID, 0, QRI, MF); - - // if there is already an store in the packet, no can do new value store - // Arch Spec 3.4.4.2. - for (std::vector::iterator VI = CurrentPacketMIs.begin(), - VE = CurrentPacketMIs.end(); - (VI != VE); ++VI) { - SUnit *PacketSU = MIToSUnit.find(*VI)->second; - if (PacketSU->getInstr()->getDesc().mayStore() || - // if we have mayStore = 1 set on ALLOCFRAME and DEALLOCFRAME, - // then we don't need this - PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe || - PacketSU->getInstr()->getOpcode() == Hexagon::L2_deallocframe) - return false; - } - - if (PacketRC == &Hexagon::DoubleRegsRegClass) { - // new value store constraint: double regs can not feed into new value store - // arch spec section: 5.4.2.2 + // First operand is always the result. + const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF); + // Double regs can not feed into new value store: PRM section: 5.4.2.2. + if (PacketRC == &Hexagon::DoubleRegsRegClass) return false; + + // New-value stores are of class NV (slot 0), dual stores require class ST + // in slot 0 (PRM 5.5). + for (auto I : CurrentPacketMIs) { + SUnit *PacketSU = MIToSUnit.find(I)->second; + if (PacketSU->getInstr()->mayStore()) + return false; } // Make sure it's NOT the post increment register that we are going to // new value. - if (QII->isPostIncrement(MI) && - MI->getDesc().mayStore() && - GetPostIncrementOperand(MI, QII).getReg() == DepReg) { + if (HII->isPostIncrement(MI) && + getPostIncrementOperand(MI, HII).getReg() == DepReg) { return false; } - if (QII->isPostIncrement(PacketMI) && - PacketMI->getDesc().mayLoad() && - GetPostIncrementOperand(PacketMI, QII).getReg() == DepReg) { - // if source is post_inc, or absolute-set addressing, - // it can not feed into new value store - // r3 = memw(r2++#4) - // memw(r30 + #-1404) = r2.new -> can not be new value store - // arch spec section: 5.4.2.1 + if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() && + getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) { + // If source is post_inc, or absolute-set addressing, it can not feed + // into new value store + // r3 = memw(r2++#4) + // memw(r30 + #-1404) = r2.new -> can not be new value store + // arch spec section: 5.4.2.1. return false; } + if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg) + return false; + // If the source that feeds the store is predicated, new value store must // also be predicated. - if (QII->isPredicated(PacketMI)) { - if (!QII->isPredicated(MI)) + if (HII->isPredicated(PacketMI)) { + if (!HII->isPredicated(MI)) return false; // Check to make sure that they both will have their predicates - // evaluate identically + // evaluate identically. unsigned predRegNumSrc = 0; unsigned predRegNumDst = 0; const TargetRegisterClass* predRegClass = nullptr; - // Get predicate register used in the source instruction - for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) { - if ( PacketMI->getOperand(opNum).isReg()) - predRegNumSrc = PacketMI->getOperand(opNum).getReg(); - predRegClass = QRI->getMinimalPhysRegClass(predRegNumSrc); - if (predRegClass == &Hexagon::PredRegsRegClass) { + // Get predicate register used in the source instruction. + for (auto &MO : PacketMI->operands()) { + if (!MO.isReg()) + continue; + predRegNumSrc = MO.getReg(); + predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc); + if (predRegClass == &Hexagon::PredRegsRegClass) break; - } } - assert ((predRegClass == &Hexagon::PredRegsRegClass ) && - ("predicate register not found in a predicated PacketMI instruction")); + assert((predRegClass == &Hexagon::PredRegsRegClass) && + "predicate register not found in a predicated PacketMI instruction"); - // Get predicate register used in new-value store instruction - for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) { - if ( MI->getOperand(opNum).isReg()) - predRegNumDst = MI->getOperand(opNum).getReg(); - predRegClass = QRI->getMinimalPhysRegClass(predRegNumDst); - if (predRegClass == &Hexagon::PredRegsRegClass) { + // Get predicate register used in new-value store instruction. + for (auto &MO : MI->operands()) { + if (!MO.isReg()) + continue; + predRegNumDst = MO.getReg(); + predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst); + if (predRegClass == &Hexagon::PredRegsRegClass) break; - } } - assert ((predRegClass == &Hexagon::PredRegsRegClass ) && - ("predicate register not found in a predicated MI instruction")); + assert((predRegClass == &Hexagon::PredRegsRegClass) && + "predicate register not found in a predicated MI instruction"); // New-value register producer and user (store) need to satisfy these // constraints: @@ -632,13 +611,11 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( // should also be .new predicated and if producer is not .new predicated // then store should not be .new predicated. // 3) Both new-value register producer and user should have same predicate - // sense, i.e, either both should be negated or both should be none negated. - - if (( predRegNumDst != predRegNumSrc) || - QII->isDotNewInst(PacketMI) != QII->isDotNewInst(MI) || - getPredicateSense(MI, QII) != getPredicateSense(PacketMI, QII)) { + // sense, i.e, either both should be negated or both should be non-negated. + if (predRegNumDst != predRegNumSrc || + HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) || + getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII)) return false; - } } // Make sure that other than the new-value register no other store instruction @@ -649,81 +626,77 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( // including PacketMI. Howerver, we need to perform the check for the // remaining instructions in the packet. - std::vector::iterator VI; - std::vector::iterator VE; unsigned StartCheck = 0; - for (VI=CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end(); - (VI != VE); ++VI) { - SUnit *TempSU = MIToSUnit.find(*VI)->second; + for (auto I : CurrentPacketMIs) { + SUnit *TempSU = MIToSUnit.find(I)->second; MachineInstr* TempMI = TempSU->getInstr(); // Following condition is true for all the instructions until PacketMI is // reached (StartCheck is set to 0 before the for loop). // StartCheck flag is 1 for all the instructions after PacketMI. - if (TempMI != PacketMI && !StartCheck) // start processing only after - continue; // encountering PacketMI + if (TempMI != PacketMI && !StartCheck) // Start processing only after + continue; // encountering PacketMI. StartCheck = 1; - if (TempMI == PacketMI) // We don't want to check PacketMI for dependence + if (TempMI == PacketMI) // We don't want to check PacketMI for dependence. continue; - for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) { - if (MI->getOperand(opNum).isReg() && - TempSU->getInstr()->modifiesRegister(MI->getOperand(opNum).getReg(), - QRI)) + for (auto &MO : MI->operands()) + if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI)) return false; - } } // Make sure that for non-POST_INC stores: // 1. The only use of reg is DepReg and no other registers. // This handles V4 base+index registers. // The following store can not be dot new. - // Eg. r0 = add(r0, #3)a + // Eg. r0 = add(r0, #3) // memw(r1+r0<<#2) = r0 - if (!QII->isPostIncrement(MI) && - GetStoreValueOperand(MI).isReg() && - GetStoreValueOperand(MI).getReg() == DepReg) { - for(unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) { - if (MI->getOperand(opNum).isReg() && - MI->getOperand(opNum).getReg() == DepReg) { + if (!HII->isPostIncrement(MI)) { + for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.isReg() && MO.getReg() == DepReg) return false; - } - } - // 2. If data definition is because of implicit definition of the register, - // do not newify the store. Eg. - // %R9 = ZXTH %R12, %D6, %R12 - // STrih_indexed %R8, 2, %R12; mem:ST2[%scevgep343] - for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) { - if (PacketMI->getOperand(opNum).isReg() && - PacketMI->getOperand(opNum).getReg() == DepReg && - PacketMI->getOperand(opNum).isDef() && - PacketMI->getOperand(opNum).isImplicit()) { - return false; - } } } + // If data definition is because of implicit definition of the register, + // do not newify the store. Eg. + // %R9 = ZXTH %R12, %D6, %R12 + // S2_storerh_io %R8, 2, %R12; mem:ST2[%scevgep343] + for (auto &MO : PacketMI->operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.isImplicit()) + continue; + unsigned R = MO.getReg(); + if (R == DepReg || HRI->isSuperRegister(DepReg, R)) + return false; + } + + // Handle imp-use of super reg case. There is a target independent side + // change that should prevent this situation but I am handling it for + // just-in-case. For example, we cannot newify R2 in the following case: + // %R3 = A2_tfrsi 0; + // S2_storeri_io %R0, 0, %R2, %D1; + for (auto &MO : MI->operands()) { + if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg) + return false; + } + // Can be dot new store. return true; } -// can this MI to promoted to either -// new value store or new value jump -bool HexagonPacketizerList::CanPromoteToNewValue( - MachineInstr *MI, SUnit *PacketSU, unsigned DepReg, - const std::map &MIToSUnit, - MachineBasicBlock::iterator &MII) { - - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - if (!QII->mayBeNewStore(MI)) +// Can this MI to promoted to either new value store or new value jump. +bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI, + const SUnit *PacketSU, unsigned DepReg, + MachineBasicBlock::iterator &MII) { + if (!HII->mayBeNewStore(MI)) return false; - MachineInstr *PacketMI = PacketSU->getInstr(); - // Check to see the store can be new value'ed. - if (CanPromoteToNewValueStore(MI, PacketMI, DepReg, MIToSUnit)) + MachineInstr *PacketMI = PacketSU->getInstr(); + if (canPromoteToNewValueStore(MI, PacketMI, DepReg)) return true; // Check to see the compare/jump can be new value'ed. @@ -731,93 +704,110 @@ bool HexagonPacketizerList::CanPromoteToNewValue( return false; } +static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) { + for (auto &MO : I->operands()) + if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit()) + return true; + return false; +} + // Check to see if an instruction can be dot new // There are three kinds. // 1. dot new on predicate - V2/V3/V4 // 2. dot new on stores NV/ST - V4 // 3. dot new on jump NV/J - V4 -- This is generated in a pass. -bool HexagonPacketizerList::CanPromoteToDotNew( - MachineInstr *MI, SUnit *PacketSU, unsigned DepReg, - const std::map &MIToSUnit, - MachineBasicBlock::iterator &MII, const TargetRegisterClass *RC) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; +bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI, + const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC) { // Already a dot new instruction. - if (QII->isDotNewInst(MI) && !QII->mayBeNewStore(MI)) + if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI)) return false; if (!isNewifiable(MI)) return false; - // predicate .new - if (RC == &Hexagon::PredRegsRegClass && isCondInst(MI)) - return true; - else if (RC != &Hexagon::PredRegsRegClass && - !QII->mayBeNewStore(MI)) // MI is not a new-value store + const MachineInstr *PI = PacketSU->getInstr(); + + // The "new value" cannot come from inline asm. + if (PI->isInlineAsm()) return false; - else { - // Create a dot new machine instruction to see if resources can be - // allocated. If not, bail out now. - int NewOpcode = QII->GetDotNewOp(MI); - const MCInstrDesc &desc = QII->get(NewOpcode); - DebugLoc dl; - MachineInstr *NewMI = - MI->getParent()->getParent()->CreateMachineInstr(desc, dl); - bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI); - MI->getParent()->getParent()->DeleteMachineInstr(NewMI); - if (!ResourcesAvailable) - return false; + // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no + // sense. + if (PI->isImplicitDef()) + return false; + + // If dependency is trough an implicitly defined register, we should not + // newify the use. + if (isImplicitDependency(PI, DepReg)) + return false; + + const MCInstrDesc& MCID = PI->getDesc(); + const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF); + if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass) + return false; + + // predicate .new + // bug 5670: until that is fixed + // TODO: MI->isIndirectBranch() and IsRegisterJump(MI) + if (RC == &Hexagon::PredRegsRegClass) + if (HII->isCondInst(MI) || MI->isReturn()) + return HII->predCanBeUsedAsDotNew(PI, DepReg); + + if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI)) + return false; + + // Create a dot new machine instruction to see if resources can be + // allocated. If not, bail out now. + int NewOpcode = HII->getDotNewOp(MI); + const MCInstrDesc &D = HII->get(NewOpcode); + MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc()); + bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI); + MF.DeleteMachineInstr(NewMI); + if (!ResourcesAvailable) + return false; + + // New Value Store only. New Value Jump generated as a separate pass. + if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII)) + return false; - // new value store only - // new new value jump generated as a passes - if (!CanPromoteToNewValue(MI, PacketSU, DepReg, MIToSUnit, MII)) { - return false; - } - } return true; } -// Go through the packet instructions and search for anti dependency -// between them and DepReg from MI -// Consider this case: +// Go through the packet instructions and search for an anti dependency between +// them and DepReg from MI. Consider this case: // Trying to add // a) %R1 = TFRI_cdNotPt %P3, 2 // to this packet: // { -// b) %P0 = OR_pp %P3, %P0 -// c) %P3 = TFR_PdRs %R23 -// d) %R1 = TFRI_cdnPt %P3, 4 +// b) %P0 = C2_or %P3, %P0 +// c) %P3 = C2_tfrrp %R23 +// d) %R1 = C2_cmovenewit %P3, 4 // } // The P3 from a) and d) will be complements after // a)'s P3 is converted to .new form -// Anti Dep between c) and b) is irrelevant for this case -bool HexagonPacketizerList::RestrictingDepExistInPacket( - MachineInstr *MI, unsigned DepReg, - const std::map &MIToSUnit) { - - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; +// Anti-dep between c) and b) is irrelevant for this case +bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI, + unsigned DepReg) { SUnit *PacketSUDep = MIToSUnit.find(MI)->second; - for (std::vector::iterator VIN = CurrentPacketMIs.begin(), - VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) { - + for (auto I : CurrentPacketMIs) { // We only care for dependencies to predicated instructions - if(!QII->isPredicated(*VIN)) continue; + if (!HII->isPredicated(I)) + continue; // Scheduling Unit for current insn in the packet - SUnit *PacketSU = MIToSUnit.find(*VIN)->second; + SUnit *PacketSU = MIToSUnit.find(I)->second; - // Look at dependencies between current members of the packet - // and predicate defining instruction MI. - // Make sure that dependency is on the exact register - // we care about. + // Look at dependencies between current members of the packet and + // predicate defining instruction MI. Make sure that dependency is + // on the exact register we care about. if (PacketSU->isSucc(PacketSUDep)) { for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { - if ((PacketSU->Succs[i].getSUnit() == PacketSUDep) && - (PacketSU->Succs[i].getKind() == SDep::Anti) && - (PacketSU->Succs[i].getReg() == DepReg)) { + auto &Dep = PacketSU->Succs[i]; + if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti && + Dep.getReg() == DepReg) return true; - } } } } @@ -831,159 +821,308 @@ static unsigned getPredicatedRegister(MachineInstr *MI, const HexagonInstrInfo *QII) { /// We use the following rule: The first predicate register that is a use is /// the predicate register of a predicated instruction. - assert(QII->isPredicated(MI) && "Must be predicated instruction"); - for (MachineInstr::mop_iterator OI = MI->operands_begin(), - OE = MI->operands_end(); OI != OE; ++OI) { - MachineOperand &Op = *OI; + for (auto &Op : MI->operands()) { if (Op.isReg() && Op.getReg() && Op.isUse() && Hexagon::PredRegsRegClass.contains(Op.getReg())) return Op.getReg(); } llvm_unreachable("Unknown instruction operand layout"); - return 0; } // Given two predicated instructions, this function detects whether -// the predicates are complements -bool HexagonPacketizerList::ArePredicatesComplements( - MachineInstr *MI1, MachineInstr *MI2, - const std::map &MIToSUnit) { - - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - +// the predicates are complements. +bool HexagonPacketizerList::arePredicatesComplements(MachineInstr *MI1, + MachineInstr *MI2) { // If we don't know the predicate sense of the instructions bail out early, we // need it later. - if (getPredicateSense(MI1, QII) == PK_Unknown || - getPredicateSense(MI2, QII) == PK_Unknown) + if (getPredicateSense(MI1, HII) == PK_Unknown || + getPredicateSense(MI2, HII) == PK_Unknown) return false; - // Scheduling unit for candidate - SUnit *SU = MIToSUnit.find(MI1)->second; + // Scheduling unit for candidate. + SUnit *SU = MIToSUnit[MI1]; // One corner case deals with the following scenario: // Trying to add - // a) %R24 = TFR_cPt %P0, %R25 + // a) %R24 = A2_tfrt %P0, %R25 // to this packet: - // // { - // b) %R25 = TFR_cNotPt %P0, %R24 - // c) %P0 = CMPEQri %R26, 1 + // b) %R25 = A2_tfrf %P0, %R24 + // c) %P0 = C2_cmpeqi %R26, 1 // } // - // On general check a) and b) are complements, but - // presence of c) will convert a) to .new form, and - // then it is not a complement - // We attempt to detect it by analyzing existing - // dependencies in the packet + // On general check a) and b) are complements, but presence of c) will + // convert a) to .new form, and then it is not a complement. + // We attempt to detect it by analyzing existing dependencies in the packet. // Analyze relationships between all existing members of the packet. - // Look for Anti dependecy on the same predicate reg - // as used in the candidate - for (std::vector::iterator VIN = CurrentPacketMIs.begin(), - VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) { - - // Scheduling Unit for current insn in the packet - SUnit *PacketSU = MIToSUnit.find(*VIN)->second; + // Look for Anti dependecy on the same predicate reg as used in the + // candidate. + for (auto I : CurrentPacketMIs) { + // Scheduling Unit for current insn in the packet. + SUnit *PacketSU = MIToSUnit.find(I)->second; // If this instruction in the packet is succeeded by the candidate... if (PacketSU->isSucc(SU)) { for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { - // The corner case exist when there is true data - // dependency between candidate and one of current - // packet members, this dep is on predicate reg, and - // there already exist anti dep on the same pred in + auto Dep = PacketSU->Succs[i]; + // The corner case exist when there is true data dependency between + // candidate and one of current packet members, this dep is on + // predicate reg, and there already exist anti dep on the same pred in // the packet. - if (PacketSU->Succs[i].getSUnit() == SU && - PacketSU->Succs[i].getKind() == SDep::Data && - Hexagon::PredRegsRegClass.contains( - PacketSU->Succs[i].getReg()) && - // Here I know that *VIN is predicate setting instruction - // with true data dep to candidate on the register - // we care about - c) in the above example. - // Now I need to see if there is an anti dependency - // from c) to any other instruction in the - // same packet on the pred reg of interest - RestrictingDepExistInPacket(*VIN,PacketSU->Succs[i].getReg(), - MIToSUnit)) { - return false; + if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data && + Hexagon::PredRegsRegClass.contains(Dep.getReg())) { + // Here I know that I is predicate setting instruction with true + // data dep to candidate on the register we care about - c) in the + // above example. Now I need to see if there is an anti dependency + // from c) to any other instruction in the same packet on the pred + // reg of interest. + if (restrictingDepExistInPacket(I, Dep.getReg())) + return false; } } } } - // If the above case does not apply, check regular - // complement condition. - // Check that the predicate register is the same and - // that the predicate sense is different - // We also need to differentiate .old vs. .new: - // !p0 is not complimentary to p0.new - unsigned PReg1 = getPredicatedRegister(MI1, QII); - unsigned PReg2 = getPredicatedRegister(MI2, QII); - return ((PReg1 == PReg2) && - Hexagon::PredRegsRegClass.contains(PReg1) && - Hexagon::PredRegsRegClass.contains(PReg2) && - (getPredicateSense(MI1, QII) != getPredicateSense(MI2, QII)) && - (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2))); + // If the above case does not apply, check regular complement condition. + // Check that the predicate register is the same and that the predicate + // sense is different We also need to differentiate .old vs. .new: !p0 + // is not complementary to p0.new. + unsigned PReg1 = getPredicatedRegister(MI1, HII); + unsigned PReg2 = getPredicatedRegister(MI2, HII); + return PReg1 == PReg2 && + Hexagon::PredRegsRegClass.contains(PReg1) && + Hexagon::PredRegsRegClass.contains(PReg2) && + getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) && + HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2); } -// initPacketizerState - Initialize packetizer flags +// Initialize packetizer flags. void HexagonPacketizerList::initPacketizerState() { - Dependence = false; PromotedToDotNew = false; GlueToNewValueJump = false; GlueAllocframeStore = false; FoundSequentialDependence = false; - - return; } -// ignorePseudoInstruction - Ignore bundling of pseudo instructions. -bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI, - MachineBasicBlock *MBB) { +// Ignore bundling of pseudo instructions. +bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr *MI, + const MachineBasicBlock*) { if (MI->isDebugValue()) return true; if (MI->isCFIInstruction()) return false; - // We must print out inline assembly + // We must print out inline assembly. if (MI->isInlineAsm()) return false; - // We check if MI has any functional units mapped to it. - // If it doesn't, we ignore the instruction. + if (MI->isImplicitDef()) + return false; + + // We check if MI has any functional units mapped to it. If it doesn't, + // we ignore the instruction. const MCInstrDesc& TID = MI->getDesc(); - unsigned SchedClass = TID.getSchedClass(); - const InstrStage* IS = - ResourceTracker->getInstrItins()->beginStage(SchedClass); + auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass()); unsigned FuncUnits = IS->getUnits(); return !FuncUnits; } -// isSoloInstruction: - Returns true for instructions that must be -// scheduled in their own packet. -bool HexagonPacketizerList::isSoloInstruction(MachineInstr *MI) { +bool HexagonPacketizerList::isSoloInstruction(const MachineInstr *MI) { if (MI->isEHLabel() || MI->isCFIInstruction()) return true; - if (MI->isInlineAsm()) + // Consider inline asm to not be a solo instruction by default. + // Inline asm will be put in a packet temporarily, but then it will be + // removed, and placed outside of the packet (before or after, depending + // on dependencies). This is to reduce the impact of inline asm as a + // "packet splitting" instruction. + if (MI->isInlineAsm() && !ScheduleInlineAsm) return true; // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints: // trap, pause, barrier, icinva, isync, and syncht are solo instructions. // They must not be grouped with other instructions in a packet. - if (IsSchedBarrier(MI)) + if (isSchedBarrier(MI)) + return true; + + if (HII->isSolo(MI)) + return true; + + if (MI->getOpcode() == Hexagon::A2_nop) return true; return false; } -// isLegalToPacketizeTogether: + +// Quick check if instructions MI and MJ cannot coexist in the same packet. +// Limit the tests to be "one-way", e.g. "if MI->isBranch and MJ->isInlineAsm", +// but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm". +// For full test call this function twice: +// cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI) +// Doing the test only one way saves the amount of code in this function, +// since every test would need to be repeated with the MI and MJ reversed. +static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ, + const HexagonInstrInfo &HII) { + const MachineFunction *MF = MI->getParent()->getParent(); + if (MF->getSubtarget().hasV60TOpsOnly() && + HII.isHVXMemWithAIndirect(MI, MJ)) + return true; + + // An inline asm cannot be together with a branch, because we may not be + // able to remove the asm out after packetizing (i.e. if the asm must be + // moved past the bundle). Similarly, two asms cannot be together to avoid + // complications when determining their relative order outside of a bundle. + if (MI->isInlineAsm()) + return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() || + MJ->isCall() || MJ->isTerminator(); + + // "False" really means that the quick check failed to determine if + // I and J cannot coexist. + return false; +} + + +// Full, symmetric check. +bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI, + const MachineInstr *MJ) { + return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII); +} + +void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) { + for (auto &B : MF) { + MachineBasicBlock::iterator BundleIt; + MachineBasicBlock::instr_iterator NextI; + for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) { + NextI = std::next(I); + MachineInstr *MI = &*I; + if (MI->isBundle()) + BundleIt = I; + if (!MI->isInsideBundle()) + continue; + + // Decide on where to insert the instruction that we are pulling out. + // Debug instructions always go before the bundle, but the placement of + // INLINE_ASM depends on potential dependencies. By default, try to + // put it before the bundle, but if the asm writes to a register that + // other instructions in the bundle read, then we need to place it + // after the bundle (to preserve the bundle semantics). + bool InsertBeforeBundle; + if (MI->isInlineAsm()) + InsertBeforeBundle = !hasWriteToReadDep(MI, BundleIt, HRI); + else if (MI->isDebugValue()) + InsertBeforeBundle = true; + else + continue; + + BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle); + } + } +} + +// Check if a given instruction is of class "system". +static bool isSystemInstr(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::Y2_barrier: + case Hexagon::Y2_dcfetchbo: + return true; + } + return false; +} + +bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I, + const MachineInstr *J) { + // The dependence graph may not include edges between dead definitions, + // so without extra checks, we could end up packetizing two instruction + // defining the same (dead) register. + if (I->isCall() || J->isCall()) + return false; + if (HII->isPredicated(I) || HII->isPredicated(J)) + return false; + + BitVector DeadDefs(Hexagon::NUM_TARGET_REGS); + for (auto &MO : I->operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.isDead()) + continue; + DeadDefs[MO.getReg()] = true; + } + + for (auto &MO : J->operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.isDead()) + continue; + unsigned R = MO.getReg(); + if (R != Hexagon::USR_OVF && DeadDefs[R]) + return true; + } + return false; +} + +bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I, + const MachineInstr *J) { + // A save callee-save register function call can only be in a packet + // with instructions that don't write to the callee-save registers. + if ((HII->isSaveCalleeSavedRegsCall(I) && + doesModifyCalleeSavedReg(J, HRI)) || + (HII->isSaveCalleeSavedRegsCall(J) && + doesModifyCalleeSavedReg(I, HRI))) + return true; + + // Two control flow instructions cannot go in the same packet. + if (isControlFlow(I) && isControlFlow(J)) + return true; + + // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot + // contain a speculative indirect jump, + // a new-value compare jump or a dealloc_return. + auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool { + if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI)) + return true; + if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI)) + return true; + return false; + }; + + if (HII->isLoopN(I) && isBadForLoopN(J)) + return true; + if (HII->isLoopN(J) && isBadForLoopN(I)) + return true; + + // dealloc_return cannot appear in the same packet as a conditional or + // unconditional jump. + return HII->isDeallocRet(I) && + (J->isBranch() || J->isCall() || J->isBarrier()); +} + +bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I, + const MachineInstr *J) { + bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J); + bool StoreI = I->mayStore(), StoreJ = J->mayStore(); + if ((SysI && StoreJ) || (SysJ && StoreI)) + return true; + + if (StoreI && StoreJ) { + if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I)) + return true; + } else { + // A memop cannot be in the same packet with another memop or a store. + // Two stores can be together, but here I and J cannot both be stores. + bool MopStI = HII->isMemOp(I) || StoreI; + bool MopStJ = HII->isMemOp(J) || StoreJ; + if (MopStI && MopStJ) + return true; + } + + return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J)); +} + // SUI is the current instruction that is out side of the current packet. // SUJ is the current instruction inside the current packet against which that // SUI will be packetized. @@ -992,115 +1131,52 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { MachineInstr *J = SUJ->getInstr(); assert(I && J && "Unable to packetize null instruction!"); - const MCInstrDesc &MCIDI = I->getDesc(); - const MCInstrDesc &MCIDJ = J->getDesc(); + // Clear IgnoreDepMIs when Packet starts. + if (CurrentPacketMIs.size() == 1) + IgnoreDepMIs.clear(); MachineBasicBlock::iterator II = I; - const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); - const HexagonRegisterInfo *QRI = - (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo(); - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - // Inline asm cannot go in the packet. - if (I->getOpcode() == Hexagon::INLINEASM) - llvm_unreachable("Should not meet inline asm here!"); + // Solo instructions cannot go in the packet. + assert(!isSoloInstruction(I) && "Unexpected solo instr!"); - if (isSoloInstruction(I)) - llvm_unreachable("Should not meet solo instr here!"); - - // A save callee-save register function call can only be in a packet - // with instructions that don't write to the callee-save registers. - if ((QII->isSaveCalleeSavedRegsCall(I) && - DoesModifyCalleeSavedReg(J, QRI)) || - (QII->isSaveCalleeSavedRegsCall(J) && - DoesModifyCalleeSavedReg(I, QRI))) { - Dependence = true; + if (cannotCoexist(I, J)) return false; - } - // Two control flow instructions cannot go in the same packet. - if (IsControlFlow(I) && IsControlFlow(J)) { - Dependence = true; + Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J); + if (Dependence) return false; - } - // A LoopN instruction cannot appear in the same packet as a jump or call. - if (IsLoopN(I) && - (IsDirectJump(J) || MCIDJ.isCall() || QII->isDeallocRet(J))) { - Dependence = true; + // V4 allows dual stores. It does not allow second store, if the first + // store is not in SLOT0. New value store, new value jump, dealloc_return + // and memop always take SLOT0. Arch spec 3.4.4.2. + Dependence = hasV4SpecificDependence(I, J); + if (Dependence) return false; - } - if (IsLoopN(J) && - (IsDirectJump(I) || MCIDI.isCall() || QII->isDeallocRet(I))) { - Dependence = true; - return false; - } - - // dealloc_return cannot appear in the same packet as a conditional or - // unconditional jump. - if (QII->isDeallocRet(I) && - (MCIDJ.isBranch() || MCIDJ.isCall() || MCIDJ.isBarrier())) { - Dependence = true; - return false; - } - - - // V4 allows dual store. But does not allow second store, if the - // first store is not in SLOT0. New value store, new value jump, - // dealloc_return and memop always take SLOT0. - // Arch spec 3.4.4.2 - if (MCIDI.mayStore() && MCIDJ.mayStore() && - (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) { - Dependence = true; - return false; - } - - if ((QII->isMemOp(J) && MCIDI.mayStore()) - || (MCIDJ.mayStore() && QII->isMemOp(I)) - || (QII->isMemOp(J) && QII->isMemOp(I))) { - Dependence = true; - return false; - } - - //if dealloc_return - if (MCIDJ.mayStore() && QII->isDeallocRet(I)) { - Dependence = true; - return false; - } // If an instruction feeds new value jump, glue it. MachineBasicBlock::iterator NextMII = I; ++NextMII; - if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) { + if (NextMII != I->getParent()->end() && HII->isNewValueJump(NextMII)) { MachineInstr *NextMI = NextMII; bool secondRegMatch = false; - bool maintainNewValueJump = false; + const MachineOperand &NOp0 = NextMI->getOperand(0); + const MachineOperand &NOp1 = NextMI->getOperand(1); - if (NextMI->getOperand(1).isReg() && - I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) { + if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg()) secondRegMatch = true; - maintainNewValueJump = true; - } - if (!secondRegMatch && - I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) { - maintainNewValueJump = true; - } - - for (std::vector::iterator - VI = CurrentPacketMIs.begin(), - VE = CurrentPacketMIs.end(); - (VI != VE && maintainNewValueJump); ++VI) { - SUnit *PacketSU = MIToSUnit.find(*VI)->second; - - // NVJ can not be part of the dual jump - Arch Spec: section 7.8 - if (PacketSU->getInstr()->getDesc().isCall()) { + for (auto I : CurrentPacketMIs) { + SUnit *PacketSU = MIToSUnit.find(I)->second; + MachineInstr *PI = PacketSU->getInstr(); + // NVJ can not be part of the dual jump - Arch Spec: section 7.8. + if (PI->isCall()) { Dependence = true; break; } - // Validate + // Validate: // 1. Packet does not have a store in it. // 2. If the first operand of the nvj is newified, and the second // operand is also a reg, it (second reg) is not defined in @@ -1108,302 +1184,413 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // 3. If the second operand of the nvj is newified, (which means // first operand is also a reg), first reg is not defined in // the same packet. - if (PacketSU->getInstr()->getDesc().mayStore() || - PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe || - // Check #2. - (!secondRegMatch && NextMI->getOperand(1).isReg() && - PacketSU->getInstr()->modifiesRegister( - NextMI->getOperand(1).getReg(), QRI)) || - // Check #3. - (secondRegMatch && - PacketSU->getInstr()->modifiesRegister( - NextMI->getOperand(0).getReg(), QRI))) { + if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() || + HII->isLoopN(PI)) { + Dependence = true; + break; + } + // Check #2/#3. + const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1; + if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) { Dependence = true; break; } } - if (!Dependence) - GlueToNewValueJump = true; - else + + if (Dependence) return false; + GlueToNewValueJump = true; } - if (SUJ->isSucc(SUI)) { - for (unsigned i = 0; - (i < SUJ->Succs.size()) && !FoundSequentialDependence; - ++i) { + // There no dependency between a prolog instruction and its successor. + if (!SUJ->isSucc(SUI)) + return true; - if (SUJ->Succs[i].getSUnit() != SUI) { + for (unsigned i = 0; i < SUJ->Succs.size(); ++i) { + if (FoundSequentialDependence) + break; + + if (SUJ->Succs[i].getSUnit() != SUI) + continue; + + SDep::Kind DepType = SUJ->Succs[i].getKind(); + // For direct calls: + // Ignore register dependences for call instructions for packetization + // purposes except for those due to r31 and predicate registers. + // + // For indirect calls: + // Same as direct calls + check for true dependences to the register + // used in the indirect call. + // + // We completely ignore Order dependences for call instructions. + // + // For returns: + // Ignore register dependences for return instructions like jumpr, + // dealloc return unless we have dependencies on the explicit uses + // of the registers used by jumpr (like r31) or dealloc return + // (like r29 or r30). + // + // TODO: Currently, jumpr is handling only return of r31. So, the + // following logic (specificaly isCallDependent) is working fine. + // We need to enable jumpr for register other than r31 and then, + // we need to rework the last part, where it handles indirect call + // of that (isCallDependent) function. Bug 6216 is opened for this. + unsigned DepReg = 0; + const TargetRegisterClass *RC = nullptr; + if (DepType == SDep::Data) { + DepReg = SUJ->Succs[i].getReg(); + RC = HRI->getMinimalPhysRegClass(DepReg); + } + + if (I->isCall() || I->isReturn()) { + if (!isRegDependence(DepType)) continue; + if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg())) + continue; + } + + if (DepType == SDep::Data) { + if (canPromoteToDotCur(J, SUJ, DepReg, II, RC)) + if (promoteToDotCur(J, DepType, II, RC)) + continue; + } + + // Data dpendence ok if we have load.cur. + if (DepType == SDep::Data && HII->isDotCurInst(J)) { + if (HII->isV60VectorInstruction(I)) + continue; + } + + // For instructions that can be promoted to dot-new, try to promote. + if (DepType == SDep::Data) { + if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) { + if (promoteToDotNew(I, DepType, II, RC)) { + PromotedToDotNew = true; + continue; + } } + if (HII->isNewValueJump(I)) + continue; + } - SDep::Kind DepType = SUJ->Succs[i].getKind(); - - // For direct calls: - // Ignore register dependences for call instructions for - // packetization purposes except for those due to r31 and - // predicate registers. + // For predicated instructions, if the predicates are complements then + // there can be no dependence. + if (HII->isPredicated(I) && HII->isPredicated(J) && + arePredicatesComplements(I, J)) { + // Not always safe to do this translation. + // DAG Builder attempts to reduce dependence edges using transitive + // nature of dependencies. Here is an example: // - // For indirect calls: - // Same as direct calls + check for true dependences to the register - // used in the indirect call. + // r0 = tfr_pt ... (1) + // r0 = tfr_pf ... (2) + // r0 = tfr_pt ... (3) // - // We completely ignore Order dependences for call instructions - // - // For returns: - // Ignore register dependences for return instructions like jumpr, - // dealloc return unless we have dependencies on the explicit uses - // of the registers used by jumpr (like r31) or dealloc return - // (like r29 or r30). - // - // TODO: Currently, jumpr is handling only return of r31. So, the - // following logic (specificaly IsCallDependent) is working fine. - // We need to enable jumpr for register other than r31 and then, - // we need to rework the last part, where it handles indirect call - // of that (IsCallDependent) function. Bug 6216 is opened for this. - // - unsigned DepReg = 0; - const TargetRegisterClass* RC = nullptr; - if (DepType == SDep::Data) { - DepReg = SUJ->Succs[i].getReg(); - RC = QRI->getMinimalPhysRegClass(DepReg); - } - if ((MCIDI.isCall() || MCIDI.isReturn()) && - (!IsRegDependence(DepType) || - !IsCallDependent(I, DepType, SUJ->Succs[i].getReg()))) { - /* do nothing */ + // There will be an output dependence between (1)->(2) and (2)->(3). + // However, there is no dependence edge between (1)->(3). This results + // in all 3 instructions going in the same packet. We ignore dependce + // only once to avoid this situation. + auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J); + if (Itr != IgnoreDepMIs.end()) { + Dependence = true; + return false; } + IgnoreDepMIs.push_back(I); + continue; + } - // For instructions that can be promoted to dot-new, try to promote. - else if ((DepType == SDep::Data) && - CanPromoteToDotNew(I, SUJ, DepReg, MIToSUnit, II, RC) && - PromoteToDotNew(I, DepType, II, RC)) { - PromotedToDotNew = true; - /* do nothing */ - } + // Ignore Order dependences between unconditional direct branches + // and non-control-flow instructions. + if (isDirectJump(I) && !J->isBranch() && !J->isCall() && + DepType == SDep::Order) + continue; - else if ((DepType == SDep::Data) && - (QII->isNewValueJump(I))) { - /* do nothing */ - } + // Ignore all dependences for jumps except for true and output + // dependences. + if (I->isConditionalBranch() && DepType != SDep::Data && + DepType != SDep::Output) + continue; - // For predicated instructions, if the predicates are complements - // then there can be no dependence. - else if (QII->isPredicated(I) && - QII->isPredicated(J) && - ArePredicatesComplements(I, J, MIToSUnit)) { - /* do nothing */ + // Ignore output dependences due to superregs. We can write to two + // different subregisters of R1:0 for instance in the same cycle. - } - else if (IsDirectJump(I) && - !MCIDJ.isBranch() && - !MCIDJ.isCall() && - (DepType == SDep::Order)) { - // Ignore Order dependences between unconditional direct branches - // and non-control-flow instructions - /* do nothing */ - } - else if (MCIDI.isConditionalBranch() && (DepType != SDep::Data) && - (DepType != SDep::Output)) { - // Ignore all dependences for jumps except for true and output - // dependences - /* do nothing */ - } + // If neither I nor J defines DepReg, then this is a superfluous output + // dependence. The dependence must be of the form: + // R0 = ... + // R1 = ... + // and there is an output dependence between the two instructions with + // DepReg = D0. + // We want to ignore these dependences. Ideally, the dependence + // constructor should annotate such dependences. We can then avoid this + // relatively expensive check. + // + if (DepType == SDep::Output) { + // DepReg is the register that's responsible for the dependence. + unsigned DepReg = SUJ->Succs[i].getReg(); - // Ignore output dependences due to superregs. We can - // write to two different subregisters of R1:0 for instance - // in the same cycle - // + // Check if I and J really defines DepReg. + if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg)) + continue; + FoundSequentialDependence = true; + break; + } - // - // Let the - // If neither I nor J defines DepReg, then this is a - // superfluous output dependence. The dependence must be of the - // form: - // R0 = ... - // R1 = ... - // and there is an output dependence between the two instructions - // with - // DepReg = D0 - // We want to ignore these dependences. - // Ideally, the dependence constructor should annotate such - // dependences. We can then avoid this relatively expensive check. - // - else if (DepType == SDep::Output) { - // DepReg is the register that's responsible for the dependence. - unsigned DepReg = SUJ->Succs[i].getReg(); - - // Check if I and J really defines DepReg. - if (I->definesRegister(DepReg) || - J->definesRegister(DepReg)) { + // For Order dependences: + // 1. On V4 or later, volatile loads/stores can be packetized together, + // unless other rules prevent is. + // 2. Store followed by a load is not allowed. + // 3. Store followed by a store is only valid on V4 or later. + // 4. Load followed by any memory operation is allowed. + if (DepType == SDep::Order) { + if (!PacketizeVolatiles) { + bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef(); + if (OrdRefs) { FoundSequentialDependence = true; break; } } - - // We ignore Order dependences for - // 1. Two loads unless they are volatile. - // 2. Two stores in V4 unless they are volatile. - else if ((DepType == SDep::Order) && - !I->hasOrderedMemoryRef() && - !J->hasOrderedMemoryRef()) { - if (MCIDI.mayStore() && MCIDJ.mayStore()) { - /* do nothing */ - } - // store followed by store-- not OK on V2 - // store followed by load -- not OK on all (OK if addresses - // are not aliased) - // load followed by store -- OK on all - // load followed by load -- OK on all - else if ( !MCIDJ.mayStore()) { - /* do nothing */ - } - else { + // J is first, I is second. + bool LoadJ = J->mayLoad(), StoreJ = J->mayStore(); + bool LoadI = I->mayLoad(), StoreI = I->mayStore(); + if (StoreJ) { + // Two stores are only allowed on V4+. Load following store is never + // allowed. + if (LoadI) { FoundSequentialDependence = true; break; } - } - - // For V4, special case ALLOCFRAME. Even though there is dependency - // between ALLOCFRAME and subsequent store, allow it to be - // packetized in a same packet. This implies that the store is using - // caller's SP. Hence, offset needs to be updated accordingly. - else if (DepType == SDep::Data - && J->getOpcode() == Hexagon::S2_allocframe - && (I->getOpcode() == Hexagon::S2_storerd_io - || I->getOpcode() == Hexagon::S2_storeri_io - || I->getOpcode() == Hexagon::S2_storerb_io) - && I->getOperand(0).getReg() == QRI->getStackRegister() - && QII->isValidOffset(I->getOpcode(), - I->getOperand(1).getImm() - - (FrameSize + HEXAGON_LRFP_SIZE))) - { - GlueAllocframeStore = true; - // Since this store is to be glued with allocframe in the same - // packet, it will use SP of the previous stack frame, i.e - // caller's SP. Therefore, we need to recalculate offset according - // to this change. - I->getOperand(1).setImm(I->getOperand(1).getImm() - - (FrameSize + HEXAGON_LRFP_SIZE)); - } - - // - // Skip over anti-dependences. Two instructions that are - // anti-dependent can share a packet - // - else if (DepType != SDep::Anti) { + } else if (!LoadJ || (!LoadI && !StoreI)) { + // If J is neither load nor store, assume a dependency. + // If J is a load, but I is neither, also assume a dependency. FoundSequentialDependence = true; break; } + // Store followed by store: not OK on V2. + // Store followed by load: not OK on all. + // Load followed by store: OK on all. + // Load followed by load: OK on all. + continue; } - if (FoundSequentialDependence) { - Dependence = true; - return false; + // For V4, special case ALLOCFRAME. Even though there is dependency + // between ALLOCFRAME and subsequent store, allow it to be packetized + // in a same packet. This implies that the store is using the caller's + // SP. Hence, offset needs to be updated accordingly. + if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) { + unsigned Opc = I->getOpcode(); + switch (Opc) { + case Hexagon::S2_storerd_io: + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerb_io: + if (I->getOperand(0).getReg() == HRI->getStackRegister()) { + int64_t Imm = I->getOperand(1).getImm(); + int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE); + if (HII->isValidOffset(Opc, NewOff)) { + GlueAllocframeStore = true; + // Since this store is to be glued with allocframe in the same + // packet, it will use SP of the previous stack frame, i.e. + // caller's SP. Therefore, we need to recalculate offset + // according to this change. + I->getOperand(1).setImm(NewOff); + continue; + } + } + default: + break; + } + } + + // Skip over anti-dependences. Two instructions that are anti-dependent + // can share a packet. + if (DepType != SDep::Anti) { + FoundSequentialDependence = true; + break; } } - return true; -} - -// isLegalToPruneDependencies -bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { - MachineInstr *I = SUI->getInstr(); - assert(I && SUJ->getInstr() && "Unable to packetize null instruction!"); - - const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); - - if (Dependence) { - - // Check if the instruction was promoted to a dot-new. If so, demote it - // back into a dot-old. - if (PromotedToDotNew) { - DemoteToDotOld(I); - } - - // Check if the instruction (must be a store) was glued with an Allocframe - // instruction. If so, restore its offset to its original value, i.e. use - // curent SP instead of caller's SP. - if (GlueAllocframeStore) { - I->getOperand(1).setImm(I->getOperand(1).getImm() + - FrameSize + HEXAGON_LRFP_SIZE); - } - + if (FoundSequentialDependence) { + Dependence = true; return false; } + return true; } +bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { + MachineInstr *I = SUI->getInstr(); + MachineInstr *J = SUJ->getInstr(); + assert(I && J && "Unable to packetize null instruction!"); + + if (cannotCoexist(I, J)) + return false; + + if (!Dependence) + return true; + + // Check if the instruction was promoted to a dot-new. If so, demote it + // back into a dot-old. + if (PromotedToDotNew) + demoteToDotOld(I); + + cleanUpDotCur(); + // Check if the instruction (must be a store) was glued with an allocframe + // instruction. If so, restore its offset to its original value, i.e. use + // current SP instead of caller's SP. + if (GlueAllocframeStore) { + unsigned FrameSize = MF.getFrameInfo()->getStackSize(); + MachineOperand &MOff = I->getOperand(1); + MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE); + } + return false; +} + + MachineBasicBlock::iterator HexagonPacketizerList::addToPacket(MachineInstr *MI) { - - MachineBasicBlock::iterator MII = MI; - MachineBasicBlock *MBB = MI->getParent(); - - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - - if (GlueToNewValueJump) { - - ++MII; - MachineInstr *nvjMI = MII; - assert(ResourceTracker->canReserveResources(MI)); - ResourceTracker->reserveResources(MI); - if ((QII->isExtended(MI) || QII->isConstExtended(MI)) && - !tryAllocateResourcesForConstExt(MI)) { - endPacket(MBB, MI); - ResourceTracker->reserveResources(MI); - assert(canReserveResourcesForConstExt(MI) && - "Ensure that there is a slot"); - reserveResourcesForConstExt(MI); - // Reserve resources for new value jump constant extender. - assert(canReserveResourcesForConstExt(MI) && - "Ensure that there is a slot"); - reserveResourcesForConstExt(nvjMI); - assert(ResourceTracker->canReserveResources(nvjMI) && - "Ensure that there is a slot"); - - } else if ( // Extended instruction takes two slots in the packet. - // Try reserve and allocate 4-byte in the current packet first. - (QII->isExtended(nvjMI) - && (!tryAllocateResourcesForConstExt(nvjMI) - || !ResourceTracker->canReserveResources(nvjMI))) - || // For non-extended instruction, no need to allocate extra 4 bytes. - (!QII->isExtended(nvjMI) && - !ResourceTracker->canReserveResources(nvjMI))) - { - endPacket(MBB, MI); - // A new and empty packet starts. - // We are sure that the resources requirements can be satisfied. - // Therefore, do not need to call "canReserveResources" anymore. - ResourceTracker->reserveResources(MI); - if (QII->isExtended(nvjMI)) - reserveResourcesForConstExt(nvjMI); - } - // Here, we are sure that "reserveResources" would succeed. - ResourceTracker->reserveResources(nvjMI); - CurrentPacketMIs.push_back(MI); - CurrentPacketMIs.push_back(nvjMI); - } else { - if ( (QII->isExtended(MI) || QII->isConstExtended(MI)) - && ( !tryAllocateResourcesForConstExt(MI) - || !ResourceTracker->canReserveResources(MI))) - { - endPacket(MBB, MI); - // Check if the instruction was promoted to a dot-new. If so, demote it - // back into a dot-old - if (PromotedToDotNew) { - DemoteToDotOld(MI); - } - reserveResourcesForConstExt(MI); - } - // In case that "MI" is not an extended insn, - // the resource availability has already been checked. - ResourceTracker->reserveResources(MI); - CurrentPacketMIs.push_back(MI); + MachineBasicBlock::iterator MII = MI; + MachineBasicBlock *MBB = MI->getParent(); + if (MI->isImplicitDef()) { + unsigned R = MI->getOperand(0).getReg(); + if (Hexagon::IntRegsRegClass.contains(R)) { + MCSuperRegIterator S(R, HRI, false); + MI->addOperand(MachineOperand::CreateReg(*S, true, true)); } return MII; + } + assert(ResourceTracker->canReserveResources(MI)); + + bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI); + bool Good = true; + + if (GlueToNewValueJump) { + MachineInstr *NvjMI = ++MII; + // We need to put both instructions in the same packet: MI and NvjMI. + // Either of them can require a constant extender. Try to add both to + // the current packet, and if that fails, end the packet and start a + // new one. + ResourceTracker->reserveResources(MI); + if (ExtMI) + Good = tryAllocateResourcesForConstExt(true); + + bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI); + if (Good) { + if (ResourceTracker->canReserveResources(NvjMI)) + ResourceTracker->reserveResources(NvjMI); + else + Good = false; + } + if (Good && ExtNvjMI) + Good = tryAllocateResourcesForConstExt(true); + + if (!Good) { + endPacket(MBB, MI); + assert(ResourceTracker->canReserveResources(MI)); + ResourceTracker->reserveResources(MI); + if (ExtMI) { + assert(canReserveResourcesForConstExt()); + tryAllocateResourcesForConstExt(true); + } + assert(ResourceTracker->canReserveResources(NvjMI)); + ResourceTracker->reserveResources(NvjMI); + if (ExtNvjMI) { + assert(canReserveResourcesForConstExt()); + reserveResourcesForConstExt(); + } + } + CurrentPacketMIs.push_back(MI); + CurrentPacketMIs.push_back(NvjMI); + return MII; + } + + ResourceTracker->reserveResources(MI); + if (ExtMI && !tryAllocateResourcesForConstExt(true)) { + endPacket(MBB, MI); + if (PromotedToDotNew) + demoteToDotOld(MI); + ResourceTracker->reserveResources(MI); + reserveResourcesForConstExt(); + } + + CurrentPacketMIs.push_back(MI); + return MII; } +void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, + MachineInstr *MI) { + OldPacketMIs = CurrentPacketMIs; + VLIWPacketizerList::endPacket(MBB, MI); +} + +bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr *MI) { + return !producesStall(MI); +} + + +// Return true when ConsMI uses a register defined by ProdMI. +static bool isDependent(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) { + if (!ProdMI->getOperand(0).isReg()) + return false; + unsigned DstReg = ProdMI->getOperand(0).getReg(); + + for (auto &Op : ConsMI->operands()) + if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg) + // The MIs depend on each other. + return true; + + return false; +} + +// V60 forward scheduling. +bool HexagonPacketizerList::producesStall(const MachineInstr *I) { + // Check whether the previous packet is in a different loop. If this is the + // case, there is little point in trying to avoid a stall because that would + // favor the rare case (loop entry) over the common case (loop iteration). + // + // TODO: We should really be able to check all the incoming edges if this is + // the first packet in a basic block, so we can avoid stalls from the loop + // backedge. + if (!OldPacketMIs.empty()) { + auto *OldBB = OldPacketMIs.front()->getParent(); + auto *ThisBB = I->getParent(); + if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB)) + return false; + } + + // Check for stall between two vector instructions. + if (HII->isV60VectorInstruction(I)) { + for (auto J : OldPacketMIs) { + if (!HII->isV60VectorInstruction(J)) + continue; + if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I)) + return true; + } + return false; + } + + // Check for stall between two scalar instructions. First, check that + // there is no definition of a use in the current packet, because it + // may be a candidate for .new. + for (auto J : CurrentPacketMIs) + if (!HII->isV60VectorInstruction(J) && isDependent(J, I)) + return false; + + // Check for stall between I and instructions in the previous packet. + if (MF.getSubtarget().useBSBScheduling()) { + for (auto J : OldPacketMIs) { + if (HII->isV60VectorInstruction(J)) + continue; + if (!HII->isLateInstrFeedsEarlyInstr(J, I)) + continue; + if (isDependent(J, I) && !HII->canExecuteInBundle(J, I)) + return true; + } + } + + return false; +} + + //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h new file mode 100644 index 000000000000..960cf6ca5bbc --- /dev/null +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -0,0 +1,114 @@ +#ifndef HEXAGONVLIWPACKETIZER_H +#define HEXAGONVLIWPACKETIZER_H + +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" + +namespace llvm { +class HexagonPacketizerList : public VLIWPacketizerList { + // Vector of instructions assigned to the packet that has just been created. + std::vector OldPacketMIs; + + // Has the instruction been promoted to a dot-new instruction. + bool PromotedToDotNew; + + // Has the instruction been glued to allocframe. + bool GlueAllocframeStore; + + // Has the feeder instruction been glued to new value jump. + bool GlueToNewValueJump; + + // Check if there is a dependence between some instruction already in this + // packet and this instruction. + bool Dependence; + + // Only check for dependence if there are resources available to + // schedule this instruction. + bool FoundSequentialDependence; + + // Track MIs with ignored dependence. + std::vector IgnoreDepMIs; + +protected: + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + const MachineLoopInfo *MLI; + +private: + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; + +public: + // Ctor. + HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, + AliasAnalysis *AA, + const MachineBranchProbabilityInfo *MBPI); + + // initPacketizerState - initialize some internal flags. + void initPacketizerState() override; + + // ignorePseudoInstruction - Ignore bundling of pseudo instructions. + bool ignorePseudoInstruction(const MachineInstr *MI, + const MachineBasicBlock *MBB) override; + + // isSoloInstruction - return true if instruction MI can not be packetized + // with any other instruction, which means that MI itself is a packet. + bool isSoloInstruction(const MachineInstr *MI) override; + + // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ + // together. + bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override; + + // isLegalToPruneDependencies - Is it legal to prune dependece between SUI + // and SUJ. + bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override; + + MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override; + void endPacket(MachineBasicBlock *MBB, MachineInstr *MI) override; + bool shouldAddToPacket(const MachineInstr *MI) override; + + void unpacketizeSoloInstrs(MachineFunction &MF); + +protected: + bool isCallDependent(const MachineInstr* MI, SDep::Kind DepType, + unsigned DepReg); + bool promoteToDotCur(MachineInstr* MI, SDep::Kind DepType, + MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); + bool canPromoteToDotCur(const MachineInstr* MI, const SUnit* PacketSU, + unsigned DepReg, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); + void cleanUpDotCur(); + + bool promoteToDotNew(MachineInstr* MI, SDep::Kind DepType, + MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); + bool canPromoteToDotNew(const MachineInstr* MI, const SUnit* PacketSU, + unsigned DepReg, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); + bool canPromoteToNewValue(const MachineInstr* MI, const SUnit* PacketSU, + unsigned DepReg, MachineBasicBlock::iterator &MII); + bool canPromoteToNewValueStore(const MachineInstr* MI, + const MachineInstr* PacketMI, unsigned DepReg); + bool demoteToDotOld(MachineInstr* MI); + bool arePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2); + bool restrictingDepExistInPacket(MachineInstr*, unsigned); + bool isNewifiable(const MachineInstr *MI); + bool isCurifiable(MachineInstr* MI); + bool cannotCoexist(const MachineInstr *MI, const MachineInstr *MJ); + inline bool isPromotedToDotNew() const { + return PromotedToDotNew; + } + bool tryAllocateResourcesForConstExt(bool Reserve); + bool canReserveResourcesForConstExt(); + void reserveResourcesForConstExt(); + bool hasDeadDependence(const MachineInstr *I, const MachineInstr *J); + bool hasControlDependence(const MachineInstr *I, const MachineInstr *J); + bool hasV4SpecificDependence(const MachineInstr *I, const MachineInstr *J); + bool producesStall(const MachineInstr *MI); +}; +} // namespace llvm +#endif // HEXAGONVLIWPACKETIZER_H + diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt index 9d288af0214a..4088cedafd84 100644 --- a/lib/Target/Hexagon/LLVMBuild.txt +++ b/lib/Target/Hexagon/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = Disassembler MCTargetDesc TargetInfo +subdirectories = AsmParser Disassembler MCTargetDesc TargetInfo [component_0] type = TargetGroup @@ -33,6 +33,7 @@ required_libraries = AsmPrinter CodeGen Core + HexagonAsmParser HexagonDesc HexagonInfo MC diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt index 5403b106cbbe..2c5d0dab2848 100644 --- a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt @@ -3,10 +3,12 @@ add_llvm_library(LLVMHexagonDesc HexagonELFObjectWriter.cpp HexagonInstPrinter.cpp HexagonMCAsmInfo.cpp + HexagonMCChecker.cpp HexagonMCCodeEmitter.cpp HexagonMCCompound.cpp HexagonMCDuplexInfo.cpp HexagonMCELFStreamer.cpp + HexagonMCExpr.cpp HexagonMCInstrInfo.cpp HexagonMCShuffler.cpp HexagonMCTargetDesc.cpp diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 99ea2fabf867..b73af8249cb5 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -13,7 +13,9 @@ #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCInstrInfo.h" @@ -33,14 +35,28 @@ class HexagonAsmBackend : public MCAsmBackend { mutable uint64_t relaxedCnt; std::unique_ptr MCII; std::unique_ptr RelaxTarget; + MCInst * Extender; public: HexagonAsmBackend(Target const &T, uint8_t OSABI, StringRef CPU) : - OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *){} + OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *), + Extender(nullptr) {} MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { return createHexagonELFObjectWriter(OS, OSABI, CPU); } + void setExtender(MCContext &Context) const { + if (Extender == nullptr) + const_cast(this)->Extender = new (Context) MCInst; + } + + MCInst *takeExtender() const { + assert(Extender != nullptr); + MCInst * Result = Extender; + const_cast(this)->Extender = nullptr; + return Result; + } + unsigned getNumFixupKinds() const override { return Hexagon::NumTargetFixupKinds; } @@ -222,6 +238,7 @@ public: if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) { ++relaxedCnt; *RelaxTarget = &MCI; + setExtender(Layout.getAssembler().getContext()); return true; } else { return false; @@ -262,6 +279,7 @@ public: if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) { ++relaxedCnt; *RelaxTarget = &MCI; + setExtender(Layout.getAssembler().getContext()); return true; } } @@ -276,9 +294,35 @@ public: llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced"); } - void relaxInstruction(MCInst const & /*Inst*/, - MCInst & /*Res*/) const override { - llvm_unreachable("relaxInstruction() unimplemented"); + void relaxInstruction(MCInst const & Inst, + MCInst & Res) const override { + assert(HexagonMCInstrInfo::isBundle(Inst) && + "Hexagon relaxInstruction only works on bundles"); + + Res = HexagonMCInstrInfo::createBundle(); + // Copy the results into the bundle. + bool Update = false; + for (auto &I : HexagonMCInstrInfo::bundleInstructions(Inst)) { + MCInst &CrntHMI = const_cast(*I.getInst()); + + // if immediate extender needed, add it in + if (*RelaxTarget == &CrntHMI) { + Update = true; + assert((HexagonMCInstrInfo::bundleSize(Res) < HEXAGON_PACKET_SIZE) && + "No room to insert extender for relaxation"); + + MCInst *HMIx = takeExtender(); + *HMIx = HexagonMCInstrInfo::deriveExtender( + *MCII, CrntHMI, + HexagonMCInstrInfo::getExtendableOperand(*MCII, CrntHMI)); + Res.addOperand(MCOperand::createInst(HMIx)); + *RelaxTarget = nullptr; + } + // now copy over the original instruction(the one we may have extended) + Res.addOperand(MCOperand::createInst(I.getInst())); + } + (void)Update; + assert(Update && "Didn't find relaxation target"); } bool writeNopData(uint64_t Count, diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index f4d162ccf6a8..47a6f8636276 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -44,6 +44,25 @@ namespace HexagonII { TypeMEMOP = 9, TypeNV = 10, TypeDUPLEX = 11, + TypeCOMPOUND = 12, + TypeCVI_FIRST = 13, + TypeCVI_VA = TypeCVI_FIRST, + TypeCVI_VA_DV = 14, + TypeCVI_VX = 15, + TypeCVI_VX_DV = 16, + TypeCVI_VP = 17, + TypeCVI_VP_VS = 18, + TypeCVI_VS = 19, + TypeCVI_VINLANESAT= 20, + TypeCVI_VM_LD = 21, + TypeCVI_VM_TMP_LD = 22, + TypeCVI_VM_CUR_LD = 23, + TypeCVI_VM_VP_LDU = 24, + TypeCVI_VM_ST = 25, + TypeCVI_VM_NEW_ST = 26, + TypeCVI_VM_STU = 27, + TypeCVI_HIST = 28, + TypeCVI_LAST = TypeCVI_HIST, TypePREFIX = 30, // Such as extenders. TypeENDLOOP = 31 // Such as end of a HW loop. }; @@ -71,12 +90,16 @@ namespace HexagonII { PostInc = 6 // Post increment addressing mode }; + // MemAccessSize is represented as 1+log2(N) where N is size in bits. enum class MemAccessSize { NoMemAccess = 0, // Not a memory acces instruction. ByteAccess = 1, // Byte access instruction (memb). HalfWordAccess = 2, // Half word access instruction (memh). WordAccess = 3, // Word access instruction (memw). - DoubleWordAccess = 4 // Double word access instruction (memd) + DoubleWordAccess = 4, // Double word access instruction (memd) + // 5, // We do not have a 16 byte vector access. + Vector64Access = 7, // 64 Byte vector access instruction (vmem). + Vector128Access = 8 // 128 Byte vector access instruction (vmem). }; // MCInstrDesc TSFlags @@ -156,7 +179,7 @@ namespace HexagonII { AddrModeMask = 0x7, // Access size for load/store instructions. MemAccessSizePos = 43, - MemAccesSizeMask = 0x7, + MemAccesSizeMask = 0xf, // Branch predicted taken. TakenPos = 47, @@ -164,7 +187,23 @@ namespace HexagonII { // Floating-point instructions. FPPos = 48, - FPMask = 0x1 + FPMask = 0x1, + + // New-Value producer-2 instructions. + hasNewValuePos2 = 50, + hasNewValueMask2 = 0x1, + + // Which operand consumes or produces a new value. + NewValueOpPos2 = 51, + NewValueOpMask2 = 0x7, + + // Accumulator instructions. + AccumulatorPos = 54, + AccumulatorMask = 0x1, + + // Complex XU, prevent xu competition by prefering slot3 + PrefersSlot3Pos = 55, + PrefersSlot3Mask = 0x1, }; // *** The code above must match HexagonInstrFormat*.td *** // @@ -219,6 +258,26 @@ namespace HexagonII { INST_PARSE_EXTENDER = 0x00000000 }; + enum InstIClassBits : unsigned { + INST_ICLASS_MASK = 0xf0000000, + INST_ICLASS_EXTENDER = 0x00000000, + INST_ICLASS_J_1 = 0x10000000, + INST_ICLASS_J_2 = 0x20000000, + INST_ICLASS_LD_ST_1 = 0x30000000, + INST_ICLASS_LD_ST_2 = 0x40000000, + INST_ICLASS_J_3 = 0x50000000, + INST_ICLASS_CR = 0x60000000, + INST_ICLASS_ALU32_1 = 0x70000000, + INST_ICLASS_XTYPE_1 = 0x80000000, + INST_ICLASS_LD = 0x90000000, + INST_ICLASS_ST = 0xa0000000, + INST_ICLASS_ALU32_2 = 0xb0000000, + INST_ICLASS_XTYPE_2 = 0xc0000000, + INST_ICLASS_XTYPE_3 = 0xd0000000, + INST_ICLASS_XTYPE_4 = 0xe0000000, + INST_ICLASS_ALU32_3 = 0xf0000000 + }; + } // End namespace HexagonII. } // End namespace llvm. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index 36f81465eef6..06ccec532211 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "HexagonAsmPrinter.h" -#include "Hexagon.h" #include "HexagonInstPrinter.h" +#include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -28,104 +28,33 @@ using namespace llvm; #define GET_INSTRUCTION_NAME #include "HexagonGenAsmWriter.inc" -HexagonAsmInstPrinter::HexagonAsmInstPrinter(MCInstPrinter *RawPrinter) - : MCInstPrinter(*RawPrinter), RawPrinter(RawPrinter) {} - -void HexagonAsmInstPrinter::printInst(MCInst const *MI, raw_ostream &O, - StringRef Annot, - MCSubtargetInfo const &STI) { - assert(HexagonMCInstrInfo::isBundle(*MI)); - assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE); - std::string Buffer; - { - raw_string_ostream TempStream(Buffer); - RawPrinter->printInst(MI, TempStream, "", STI); - } - StringRef Contents(Buffer); - auto PacketBundle = Contents.rsplit('\n'); - auto HeadTail = PacketBundle.first.split('\n'); - auto Preamble = "\t{\n\t\t"; - auto Separator = ""; - while(!HeadTail.first.empty()) { - O << Separator; - StringRef Inst; - auto Duplex = HeadTail.first.split('\v'); - if(!Duplex.second.empty()){ - O << Duplex.first << "\n"; - Inst = Duplex.second; - } - else - Inst = Duplex.first; - O << Preamble; - O << Inst; - HeadTail = HeadTail.second.split('\n'); - Preamble = ""; - Separator = "\n\t\t"; - } - O << "\n\t}" << PacketBundle.second; -} - -void HexagonAsmInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { - RawPrinter->printRegName(O, RegNo); -} - -// Return the minimum value that a constant extendable operand can have -// without being extended. -static int getMinValue(uint64_t TSFlags) { - unsigned isSigned = - (TSFlags >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; - unsigned bits = - (TSFlags >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask; - - if (isSigned) - return -1U << (bits - 1); - - return 0; -} - -// Return the maximum value that a constant extendable operand can have -// without being extended. -static int getMaxValue(uint64_t TSFlags) { - unsigned isSigned = - (TSFlags >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; - unsigned bits = - (TSFlags >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask; - - if (isSigned) - return ~(-1U << (bits - 1)); - - return ~(-1U << bits); -} - -// Return true if the instruction must be extended. -static bool isExtended(uint64_t TSFlags) { - return (TSFlags >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; -} - -// Currently just used in an assert statement -static bool isExtendable(uint64_t TSFlags) LLVM_ATTRIBUTE_UNUSED; -// Return true if the instruction may be extended based on the operand value. -static bool isExtendable(uint64_t TSFlags) { - return (TSFlags >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask; +HexagonInstPrinter::HexagonInstPrinter(MCAsmInfo const &MAI, + MCInstrInfo const &MII, + MCRegisterInfo const &MRI) + : MCInstPrinter(MAI, MII, MRI), MII(MII), HasExtender(false) { } StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const { return MII.getName(Opcode); } -void HexagonInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - OS << getRegisterName(RegNo); +void HexagonInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { + O << getRegName(RegNo); +} + +StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const { + return getRegisterName(RegNo); } void HexagonInstPrinter::setExtender(MCInst const &MCI) { HasExtender = HexagonMCInstrInfo::isImmext(MCI); } -void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &OS, - StringRef Annot, - MCSubtargetInfo const &STI) { +void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, + StringRef Annot, const MCSubtargetInfo &STI) { assert(HexagonMCInstrInfo::isBundle(*MI)); assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE); + assert(HexagonMCInstrInfo::bundleSize(*MI) > 0); HasExtender = false; for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MI)) { MCInst const &MCI = *I.getInst(); @@ -157,145 +86,148 @@ void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &OS, } } -void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::printOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { - const MCOperand& MO = MI->getOperand(OpNo); - - if (MO.isReg()) { - printRegName(O, MO.getReg()); - } else if(MO.isExpr()) { - MO.getExpr()->print(O, &MAI); - } else if(MO.isImm()) { - printImmOperand(MI, OpNo, O); - } else { - llvm_unreachable("Unknown operand"); - } -} - -void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const { - const MCOperand& MO = MI->getOperand(OpNo); - - if(MO.isExpr()) { - MO.getExpr()->print(O, &MAI); - } else if(MO.isImm()) { - O << MI->getOperand(OpNo).getImm(); - } else { - llvm_unreachable("Unknown operand"); - } -} - -void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const { - const MCOperand &MO = MI->getOperand(OpNo); - const MCInstrDesc &MII = getMII().get(MI->getOpcode()); - - assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) && - "Expecting an extendable operand"); - - if (MO.isExpr() || isExtended(MII.TSFlags)) { + if (HexagonMCInstrInfo::getExtendableOp(MII, *MI) == OpNo && + (HasExtender || HexagonMCInstrInfo::isConstExtended(MII, *MI))) O << "#"; - } else if (MO.isImm()) { - int ImmValue = MO.getImm(); - if (ImmValue < getMinValue(MII.TSFlags) || - ImmValue > getMaxValue(MII.TSFlags)) - O << "#"; + MCOperand const &MO = MI->getOperand(OpNo); + if (MO.isReg()) { + O << getRegisterName(MO.getReg()); + } else if (MO.isExpr()) { + int64_t Value; + if (MO.getExpr()->evaluateAsAbsolute(Value)) + O << formatImm(Value); + else + O << *MO.getExpr(); + } else { + llvm_unreachable("Unknown operand"); } +} + +void HexagonInstPrinter::printExtOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { printOperand(MI, OpNo, O); } -void HexagonInstPrinter::printUnsignedImmOperand(const MCInst *MI, - unsigned OpNo, raw_ostream &O) const { +void HexagonInstPrinter::printUnsignedImmOperand(MCInst const *MI, + unsigned OpNo, + raw_ostream &O) const { O << MI->getOperand(OpNo).getImm(); } -void HexagonInstPrinter::printNegImmOperand(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::printNegImmOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { O << -MI->getOperand(OpNo).getImm(); } -void HexagonInstPrinter::printNOneImmOperand(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::printNOneImmOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { O << -1; } -void HexagonInstPrinter::printMEMriOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const { - const MCOperand& MO0 = MI->getOperand(OpNo); - const MCOperand& MO1 = MI->getOperand(OpNo + 1); - - printRegName(O, MO0.getReg()); - O << " + #" << MO1.getImm(); +void HexagonInstPrinter::prints3_6ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + int64_t Imm; + bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm); + Imm = SignExtend64<9>(Imm); + assert(Success); (void)Success; + assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO."); + O << formatImm(Imm/64); } -void HexagonInstPrinter::printFrameIndexOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const { - const MCOperand& MO0 = MI->getOperand(OpNo); - const MCOperand& MO1 = MI->getOperand(OpNo + 1); - - printRegName(O, MO0.getReg()); - O << ", #" << MO1.getImm(); +void HexagonInstPrinter::prints3_7ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + int64_t Imm; + bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm); + Imm = SignExtend64<10>(Imm); + assert(Success); (void)Success; + assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO."); + O << formatImm(Imm/128); } -void HexagonInstPrinter::printGlobalOperand(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::prints4_6ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + int64_t Imm; + bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm); + Imm = SignExtend64<10>(Imm); + assert(Success); (void)Success; + assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO."); + O << formatImm(Imm/64); +} + +void HexagonInstPrinter::prints4_7ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + int64_t Imm; + bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm); + Imm = SignExtend64<11>(Imm); + assert(Success); (void)Success; + assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO."); + O << formatImm(Imm/128); +} + +void HexagonInstPrinter::printGlobalOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { - assert(MI->getOperand(OpNo).isExpr() && "Expecting expression"); - printOperand(MI, OpNo, O); } -void HexagonInstPrinter::printJumpTable(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::printJumpTable(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { assert(MI->getOperand(OpNo).isExpr() && "Expecting expression"); printOperand(MI, OpNo, O); } -void HexagonInstPrinter::printConstantPool(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::printConstantPool(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { assert(MI->getOperand(OpNo).isExpr() && "Expecting expression"); printOperand(MI, OpNo, O); } -void HexagonInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::printBranchOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { // Branches can take an immediate operand. This is used by the branch // selection pass to print $+8, an eight byte displacement from the PC. llvm_unreachable("Unknown branch operand."); } -void HexagonInstPrinter::printCallOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const { -} +void HexagonInstPrinter::printCallOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const {} -void HexagonInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const { -} +void HexagonInstPrinter::printAbsAddrOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const {} -void HexagonInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const { -} +void HexagonInstPrinter::printPredicateOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const {} -void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo, +void HexagonInstPrinter::printSymbol(MCInst const *MI, unsigned OpNo, raw_ostream &O, bool hi) const { - assert(MI->getOperand(OpNo).isImm() && "Unknown symbol operand"); + MCOperand const &MO = MI->getOperand(OpNo); - O << '#' << (hi ? "HI" : "LO") << "(#"; - printOperand(MI, OpNo, O); + O << '#' << (hi ? "HI" : "LO") << '('; + if (MO.isImm()) { + O << '#'; + printOperand(MI, OpNo, O); + } else { + printOperand(MI, OpNo, O); + assert("Unknown symbol operand"); + } O << ')'; } -void HexagonInstPrinter::printExtBrtarget(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const { - const MCOperand &MO = MI->getOperand(OpNo); - const MCInstrDesc &MII = getMII().get(MI->getOpcode()); - - assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) && - "Expecting an extendable operand"); - - if (MO.isExpr() || isExtended(MII.TSFlags)) { - O << "##"; +void HexagonInstPrinter::printBrtarget(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const { + MCOperand const &MO = MI->getOperand(OpNo); + assert (MO.isExpr()); + MCExpr const &Expr = *MO.getExpr(); + int64_t Value; + if (Expr.evaluateAsAbsolute(Value)) + O << format("0x%" PRIx64, Value); + else { + if (HasExtender || HexagonMCInstrInfo::isConstExtended(MII, *MI)) + if (HexagonMCInstrInfo::getExtendableOp(MII, *MI) == OpNo) + O << "##"; + O << Expr; } - printOperand(MI, OpNo, O); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h index 534ac237d635..5f421184b20a 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// // -// This class prints an Hexagon MCInst to a .s file. // //===----------------------------------------------------------------------===// @@ -15,17 +14,8 @@ #define LLVM_LIB_TARGET_HEXAGON_INSTPRINTER_HEXAGONINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCInstrInfo.h" namespace llvm { -class HexagonAsmInstPrinter : public MCInstPrinter { -public: - HexagonAsmInstPrinter(MCInstPrinter *RawPrinter); - void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot, - MCSubtargetInfo const &STI) override; - void printRegName(raw_ostream &O, unsigned RegNo) const override; - std::unique_ptr RawPrinter; -}; /// Prints bundles as a newline separated list of individual instructions /// Duplexes are separated by a vertical tab \v character /// A trailing line includes bundle properties such as endloop0/1 @@ -33,68 +23,69 @@ public: /// r0 = add(r1, r2) /// r0 = #0 \v jump 0x0 /// :endloop0 :endloop1 - class HexagonInstPrinter : public MCInstPrinter { - public: - explicit HexagonInstPrinter(MCAsmInfo const &MAI, - MCInstrInfo const &MII, - MCRegisterInfo const &MRI) - : MCInstPrinter(MAI, MII, MRI), MII(MII) {} +class HexagonInstPrinter : public MCInstPrinter { +public: + explicit HexagonInstPrinter(MCAsmInfo const &MAI, MCInstrInfo const &MII, + MCRegisterInfo const &MRI); + void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; + virtual StringRef getOpcodeName(unsigned Opcode) const; + void printInstruction(MCInst const *MI, raw_ostream &O); - void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot, - const MCSubtargetInfo &STI) override; - virtual StringRef getOpcodeName(unsigned Opcode) const; - void printInstruction(const MCInst *MI, raw_ostream &O); - void printRegName(raw_ostream &OS, unsigned RegNo) const override; - static const char *getRegisterName(unsigned RegNo); + StringRef getRegName(unsigned RegNo) const; + static char const *getRegisterName(unsigned RegNo); + void printRegName(raw_ostream &O, unsigned RegNo) const override; - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; - void printImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; - void printExtOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; - void printUnsignedImmOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const; - void printNegImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; - void printNOneImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; - void printMEMriOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; - void printFrameIndexOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const; - void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; - void printCallOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; - void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; - void printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; - void printGlobalOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; - void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; - void printExtBrtarget(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; - - void printConstantPool(const MCInst *MI, unsigned OpNo, + void printOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + void printExtOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + void printUnsignedImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printNegImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printNOneImmOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + void prints3_6ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void prints3_7ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void prints4_6ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void prints4_7ImmOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printBranchOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printCallOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + void printAbsAddrOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printPredicateOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printGlobalOperand(MCInst const *MI, unsigned OpNo, + raw_ostream &O) const; + void printJumpTable(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; + void printBrtarget(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; - void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O) const - { printSymbol(MI, OpNo, O, true); } - void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O) const - { printSymbol(MI, OpNo, O, false); } + void printConstantPool(MCInst const *MI, unsigned OpNo, raw_ostream &O) const; - const MCInstrInfo &getMII() const { - return MII; - } + void printSymbolHi(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { + printSymbol(MI, OpNo, O, true); + } + void printSymbolLo(MCInst const *MI, unsigned OpNo, raw_ostream &O) const { + printSymbol(MI, OpNo, O, false); + } - protected: - void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi) - const; + MCAsmInfo const &getMAI() const { return MAI; } + MCInstrInfo const &getMII() const { return MII; } - private: - const MCInstrInfo &MII; +protected: + void printSymbol(MCInst const *MI, unsigned OpNo, raw_ostream &O, + bool hi) const; - bool HasExtender; - void setExtender(MCInst const &MCI); - }; +private: + MCInstrInfo const &MII; + + bool HasExtender; + void setExtender(MCInst const &MCI); +}; } // end namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h index dc0706994786..a8456b4ead9c 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h @@ -18,13 +18,14 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { - class Triple; +class Triple; - class HexagonMCAsmInfo : public MCAsmInfoELF { - void anchor() override; - public: - explicit HexagonMCAsmInfo(const Triple &TT); - }; +class HexagonMCAsmInfo : public MCAsmInfoELF { + void anchor() override; + +public: + explicit HexagonMCAsmInfo(const Triple &TT); +}; } // namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp new file mode 100644 index 000000000000..46b7b41fec3b --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -0,0 +1,581 @@ +//===----- HexagonMCChecker.cpp - Instruction bundle checking -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the checking of insns inside a bundle according to the +// packet constraint rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCChecker.h" + +#include "HexagonBaseInfo.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt RelaxNVChecks("relax-nv-checks", cl::init(false), + cl::ZeroOrMore, cl::Hidden, cl::desc("Relax checks of new-value validity")); + +const HexagonMCChecker::PredSense + HexagonMCChecker::Unconditional(Hexagon::NoRegister, false); + +void HexagonMCChecker::init() { + // Initialize read-only registers set. + ReadOnly.insert(Hexagon::PC); + + // Figure out the loop-registers definitions. + if (HexagonMCInstrInfo::isInnerLoop(MCB)) { + Defs[Hexagon::SA0].insert(Unconditional); // FIXME: define or change SA0? + Defs[Hexagon::LC0].insert(Unconditional); + } + if (HexagonMCInstrInfo::isOuterLoop(MCB)) { + Defs[Hexagon::SA1].insert(Unconditional); // FIXME: define or change SA0? + Defs[Hexagon::LC1].insert(Unconditional); + } + + if (HexagonMCInstrInfo::isBundle(MCB)) + // Unfurl a bundle. + for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) { + init(*I.getInst()); + } + else + init(MCB); +} + +void HexagonMCChecker::init(MCInst const& MCI) { + const MCInstrDesc& MCID = HexagonMCInstrInfo::getDesc(MCII, MCI); + unsigned PredReg = Hexagon::NoRegister; + bool isTrue = false; + + // Get used registers. + for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i) + if (MCI.getOperand(i).isReg()) { + unsigned R = MCI.getOperand(i).getReg(); + + if (HexagonMCInstrInfo::isPredicated(MCII, MCI) && isPredicateRegister(R)) { + // Note an used predicate register. + PredReg = R; + isTrue = HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI); + + // Note use of new predicate register. + if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) + NewPreds.insert(PredReg); + } + else + // Note register use. Super-registers are not tracked directly, + // but their components. + for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); + SRI.isValid(); + ++SRI) + if (!MCSubRegIterator(*SRI, &RI).isValid()) + // Skip super-registers used indirectly. + Uses.insert(*SRI); + } + + // Get implicit register definitions. + if (const MCPhysReg *ImpDef = MCID.getImplicitDefs()) + for (; *ImpDef; ++ImpDef) { + unsigned R = *ImpDef; + + if (Hexagon::R31 != R && MCID.isCall()) + // Any register other than the LR and the PC are actually volatile ones + // as defined by the ABI, not modified implicitly by the call insn. + continue; + if (Hexagon::PC == R) + // Branches are the only insns that can change the PC, + // otherwise a read-only register. + continue; + + if (Hexagon::USR_OVF == R) + // Many insns change the USR implicitly, but only one or another flag. + // The instruction table models the USR.OVF flag, which can be implicitly + // modified more than once, but cannot be modified in the same packet + // with an instruction that modifies is explicitly. Deal with such situ- + // ations individually. + SoftDefs.insert(R); + else if (isPredicateRegister(R) && + HexagonMCInstrInfo::isPredicateLate(MCII, MCI)) + // Include implicit late predicates. + LatePreds.insert(R); + else + Defs[R].insert(PredSense(PredReg, isTrue)); + } + + // Figure out explicit register definitions. + for (unsigned i = 0; i < MCID.getNumDefs(); ++i) { + unsigned R = MCI.getOperand(i).getReg(), + S = Hexagon::NoRegister; + + // Note register definitions, direct ones as well as indirect side-effects. + // Super-registers are not tracked directly, but their components. + for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); + SRI.isValid(); + ++SRI) { + if (MCSubRegIterator(*SRI, &RI).isValid()) + // Skip super-registers defined indirectly. + continue; + + if (R == *SRI) { + if (S == R) + // Avoid scoring the defined register multiple times. + continue; + else + // Note that the defined register has already been scored. + S = R; + } + + if (Hexagon::P3_0 != R && Hexagon::P3_0 == *SRI) + // P3:0 is a special case, since multiple predicate register definitions + // in a packet is allowed as the equivalent of their logical "and". + // Only an explicit definition of P3:0 is noted as such; if a + // side-effect, then note as a soft definition. + SoftDefs.insert(*SRI); + else if (HexagonMCInstrInfo::isPredicateLate(MCII, MCI) && isPredicateRegister(*SRI)) + // Some insns produce predicates too late to be used in the same packet. + LatePreds.insert(*SRI); + else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_CUR_LD) + // Current loads should be used in the same packet. + // TODO: relies on the impossibility of a current and a temporary loads + // in the same packet. + CurDefs.insert(*SRI), Defs[*SRI].insert(PredSense(PredReg, isTrue)); + else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_TMP_LD) + // Temporary loads should be used in the same packet, but don't commit + // results, so it should be disregarded if another insn changes the same + // register. + // TODO: relies on the impossibility of a current and a temporary loads + // in the same packet. + TmpDefs.insert(*SRI); + else if (i <= 1 && llvm::HexagonMCInstrInfo::hasNewValue2(MCII, MCI) ) + // vshuff(Vx, Vy, Rx) <- Vx(0) and Vy(1) are both source and + // destination registers with this instruction. same for vdeal(Vx,Vy,Rx) + Uses.insert(*SRI); + else + Defs[*SRI].insert(PredSense(PredReg, isTrue)); + } + } + + // Figure out register definitions that produce new values. + if (HexagonMCInstrInfo::hasNewValue(MCII, MCI)) { + unsigned R = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg(); + + if (HexagonMCInstrInfo::isCompound(MCII, MCI)) + compoundRegisterMap(R); // Compound insns have a limited register range. + + for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); + SRI.isValid(); + ++SRI) + if (!MCSubRegIterator(*SRI, &RI).isValid()) + // No super-registers defined indirectly. + NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), + HexagonMCInstrInfo::isFloat(MCII, MCI))); + + // For fairly unique 2-dot-new producers, example: + // vdeal(V1, V9, R0) V1.new and V9.new can be used by consumers. + if (HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) { + unsigned R2 = HexagonMCInstrInfo::getNewValueOperand2(MCII, MCI).getReg(); + + for(MCRegAliasIterator SRI(R2, &RI, !MCSubRegIterator(R2, &RI).isValid()); + SRI.isValid(); + ++SRI) + if (!MCSubRegIterator(*SRI, &RI).isValid()) + NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), + HexagonMCInstrInfo::isFloat(MCII, MCI))); + } + } + + // Figure out definitions of new predicate registers. + if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) + for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i) + if (MCI.getOperand(i).isReg()) { + unsigned P = MCI.getOperand(i).getReg(); + + if (isPredicateRegister(P)) + NewPreds.insert(P); + } + + // Figure out uses of new values. + if (HexagonMCInstrInfo::isNewValue(MCII, MCI)) { + unsigned N = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg(); + + if (!MCSubRegIterator(N, &RI).isValid()) { + // Super-registers cannot use new values. + if (MCID.isBranch()) + NewUses[N] = NewSense::Jmp(llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV); + else + NewUses[N] = NewSense::Use(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI)); + } + } +} + +HexagonMCChecker::HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &mcb, MCInst &mcbdx, + MCRegisterInfo const &ri) + : MCB(mcb), MCBDX(mcbdx), RI(ri), MCII(MCII), STI(STI), + bLoadErrInfo(false) { + init(); +} + +bool HexagonMCChecker::check() { + bool chkB = checkBranches(); + bool chkP = checkPredicates(); + bool chkNV = checkNewValues(); + bool chkR = checkRegisters(); + bool chkS = checkSolo(); + bool chkSh = checkShuffle(); + bool chkSl = checkSlots(); + bool chk = chkB && chkP && chkNV && chkR && chkS && chkSh && chkSl; + + return chk; +} + +bool HexagonMCChecker::checkSlots() + +{ + unsigned slotsUsed = 0; + for (auto HMI: HexagonMCInstrInfo::bundleInstructions(MCBDX)) { + MCInst const& MCI = *HMI.getInst(); + if (HexagonMCInstrInfo::isImmext(MCI)) + continue; + if (HexagonMCInstrInfo::isDuplex(MCII, MCI)) + slotsUsed += 2; + else + ++slotsUsed; + } + + if (slotsUsed > HEXAGON_PACKET_SIZE) { + HexagonMCErrInfo errInfo; + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NOSLOTS); + addErrInfo(errInfo); + return false; + } + return true; +} + +// Check legal use of branches. +bool HexagonMCChecker::checkBranches() { + HexagonMCErrInfo errInfo; + if (HexagonMCInstrInfo::isBundle(MCB)) { + bool hasConditional = false; + unsigned Branches = 0, Returns = 0, NewIndirectBranches = 0, + NewValueBranches = 0, Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE, + Unconditional = HEXAGON_PRESHUFFLE_PACKET_SIZE; + + for (unsigned i = HexagonMCInstrInfo::bundleInstructionsOffset; + i < MCB.size(); ++i) { + MCInst const &MCI = *MCB.begin()[i].getInst(); + + if (HexagonMCInstrInfo::isImmext(MCI)) + continue; + if (HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch() || + HexagonMCInstrInfo::getDesc(MCII, MCI).isCall()) { + ++Branches; + if (HexagonMCInstrInfo::getDesc(MCII, MCI).isIndirectBranch() && + HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) + ++NewIndirectBranches; + if (HexagonMCInstrInfo::isNewValue(MCII, MCI)) + ++NewValueBranches; + + if (HexagonMCInstrInfo::isPredicated(MCII, MCI) || + HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) { + hasConditional = true; + Conditional = i; // Record the position of the conditional branch. + } else { + Unconditional = i; // Record the position of the unconditional branch. + } + } + if (HexagonMCInstrInfo::getDesc(MCII, MCI).isReturn() && + HexagonMCInstrInfo::getDesc(MCII, MCI).mayLoad()) + ++Returns; + } + + if (Branches) // FIXME: should "Defs.count(Hexagon::PC)" be here too? + if (HexagonMCInstrInfo::isInnerLoop(MCB) || + HexagonMCInstrInfo::isOuterLoop(MCB)) { + // Error out if there's any branch in a loop-end packet. + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_ENDLOOP, Hexagon::PC); + addErrInfo(errInfo); + return false; + } + if (Branches > 1) + if (!hasConditional || Conditional > Unconditional) { + // Error out if more than one unconditional branch or + // the conditional branch appears after the unconditional one. + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_BRANCHES); + addErrInfo(errInfo); + return false; + } + } + + return true; +} + +// Check legal use of predicate registers. +bool HexagonMCChecker::checkPredicates() { + HexagonMCErrInfo errInfo; + // Check for proper use of new predicate registers. + for (const auto& I : NewPreds) { + unsigned P = I; + + if (!Defs.count(P) || LatePreds.count(P)) { + // Error out if the new predicate register is not defined, + // or defined "late" + // (e.g., "{ if (p3.new)... ; p3 = sp1loop0(#r7:2, Rs) }"). + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWP, P); + addErrInfo(errInfo); + return false; + } + } + + // Check for proper use of auto-anded of predicate registers. + for (const auto& I : LatePreds) { + unsigned P = I; + + if (LatePreds.count(P) > 1 || Defs.count(P)) { + // Error out if predicate register defined "late" multiple times or + // defined late and regularly defined + // (e.g., "{ p3 = sp1loop0(...); p3 = cmp.eq(...) }". + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, P); + addErrInfo(errInfo); + return false; + } + } + + return true; +} + +// Check legal use of new values. +bool HexagonMCChecker::checkNewValues() { + HexagonMCErrInfo errInfo; + memset(&errInfo, 0, sizeof(errInfo)); + for (auto& I : NewUses) { + unsigned R = I.first; + NewSense &US = I.second; + + if (!hasValidNewValueDef(US, NewDefs[R])) { + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWV, R); + addErrInfo(errInfo); + return false; + } + } + + return true; +} + +// Check for legal register uses and definitions. +bool HexagonMCChecker::checkRegisters() { + HexagonMCErrInfo errInfo; + // Check for proper register definitions. + for (const auto& I : Defs) { + unsigned R = I.first; + + if (ReadOnly.count(R)) { + // Error out for definitions of read-only registers. + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_READONLY, R); + addErrInfo(errInfo); + return false; + } + if (isLoopRegister(R) && Defs.count(R) > 1 && + (HexagonMCInstrInfo::isInnerLoop(MCB) || + HexagonMCInstrInfo::isOuterLoop(MCB))) { + // Error out for definitions of loop registers at the end of a loop. + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_LOOP, R); + addErrInfo(errInfo); + return false; + } + if (SoftDefs.count(R)) { + // Error out for explicit changes to registers also weakly defined + // (e.g., "{ usr = r0; r0 = sfadd(...) }"). + unsigned UsrR = Hexagon::USR; // Silence warning about mixed types in ?:. + unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R; + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR); + addErrInfo(errInfo); + return false; + } + if (!isPredicateRegister(R) && Defs[R].size() > 1) { + // Check for multiple register definitions. + PredSet &PM = Defs[R]; + + // Check for multiple unconditional register definitions. + if (PM.count(Unconditional)) { + // Error out on an unconditional change when there are any other + // changes, conditional or not. + unsigned UsrR = Hexagon::USR; + unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R; + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR); + addErrInfo(errInfo); + return false; + } + // Check for multiple conditional register definitions. + for (const auto& J : PM) { + PredSense P = J; + + // Check for multiple uses of the same condition. + if (PM.count(P) > 1) { + // Error out on conditional changes based on the same predicate + // (e.g., "{ if (!p0) r0 =...; if (!p0) r0 =... }"). + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R); + addErrInfo(errInfo); + return false; + } + // Check for the use of the complementary condition. + P.second = !P.second; + if (PM.count(P) && PM.size() > 2) { + // Error out on conditional changes based on the same predicate + // multiple times + // (e.g., "{ if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =... }"). + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R); + addErrInfo(errInfo); + return false; + } + } + } + } + + // Check for use of current definitions. + for (const auto& I : CurDefs) { + unsigned R = I; + + if (!Uses.count(R)) { + // Warn on an unused current definition. + errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_CURRENT, R); + addErrInfo(errInfo); + return true; + } + } + + // Check for use of temporary definitions. + for (const auto& I : TmpDefs) { + unsigned R = I; + + if (!Uses.count(R)) { + // special case for vhist + bool vHistFound = false; + for (auto const&HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) { + if(llvm::HexagonMCInstrInfo::getType(MCII, *HMI.getInst()) == HexagonII::TypeCVI_HIST) { + vHistFound = true; // vhist() implicitly uses ALL REGxx.tmp + break; + } + } + // Warn on an unused temporary definition. + if (vHistFound == false) { + errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_TEMPORARY, R); + addErrInfo(errInfo); + return true; + } + } + } + + return true; +} + +// Check for legal use of solo insns. +bool HexagonMCChecker::checkSolo() { + HexagonMCErrInfo errInfo; + if (HexagonMCInstrInfo::isBundle(MCB) && + HexagonMCInstrInfo::bundleSize(MCB) > 1) { + for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) { + if (llvm::HexagonMCInstrInfo::isSolo(MCII, *I.getInst())) { + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SOLO); + addErrInfo(errInfo); + return false; + } + } + } + + return true; +} + +bool HexagonMCChecker::checkShuffle() { + HexagonMCErrInfo errInfo; + // Branch info is lost when duplexing. The unduplexed insns must be + // checked and only branch errors matter for this case. + HexagonMCShuffler MCS(MCII, STI, MCB); + if (!MCS.check()) { + if (MCS.getError() == HexagonShuffler::SHUFFLE_ERROR_BRANCHES) { + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE); + errInfo.setShuffleError(MCS.getError()); + addErrInfo(errInfo); + return false; + } + } + HexagonMCShuffler MCSDX(MCII, STI, MCBDX); + if (!MCSDX.check()) { + errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE); + errInfo.setShuffleError(MCSDX.getError()); + addErrInfo(errInfo); + return false; + } + return true; +} + +void HexagonMCChecker::compoundRegisterMap(unsigned& Register) { + switch (Register) { + default: + break; + case Hexagon::R15: + Register = Hexagon::R23; + break; + case Hexagon::R14: + Register = Hexagon::R22; + break; + case Hexagon::R13: + Register = Hexagon::R21; + break; + case Hexagon::R12: + Register = Hexagon::R20; + break; + case Hexagon::R11: + Register = Hexagon::R19; + break; + case Hexagon::R10: + Register = Hexagon::R18; + break; + case Hexagon::R9: + Register = Hexagon::R17; + break; + case Hexagon::R8: + Register = Hexagon::R16; + break; + } +} + +bool HexagonMCChecker::hasValidNewValueDef(const NewSense &Use, + const NewSenseList &Defs) const { + bool Strict = !RelaxNVChecks; + + for (unsigned i = 0, n = Defs.size(); i < n; ++i) { + const NewSense &Def = Defs[i]; + // NVJ cannot use a new FP value [7.6.1] + if (Use.IsNVJ && (Def.IsFloat || Def.PredReg != 0)) + continue; + // If the definition was not predicated, then it does not matter if + // the use is. + if (Def.PredReg == 0) + return true; + // With the strict checks, both the definition and the use must be + // predicated on the same register and condition. + if (Strict) { + if (Def.PredReg == Use.PredReg && Def.Cond == Use.Cond) + return true; + } else { + // With the relaxed checks, if the definition was predicated, the only + // detectable violation is if the use is predicated on the opposing + // condition, otherwise, it's ok. + if (Def.PredReg != Use.PredReg || Def.Cond == Use.Cond) + return true; + } + } + return false; +} + diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h new file mode 100644 index 000000000000..5fc0bdeaccbb --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h @@ -0,0 +1,218 @@ +//===----- HexagonMCChecker.h - Instruction bundle checking ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the checking of insns inside a bundle according to the +// packet constraint rules of the Hexagon ISA. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONMCCHECKER_H +#define HEXAGONMCCHECKER_H + +#include +#include +#include +#include "MCTargetDesc/HexagonMCShuffler.h" + +using namespace llvm; + +namespace llvm { +class MCOperandInfo; + +typedef struct { + unsigned Error, Warning, ShuffleError; + unsigned Register; +} ErrInfo_T; + +class HexagonMCErrInfo { +public: + enum { + CHECK_SUCCESS = 0, + // Errors. + CHECK_ERROR_BRANCHES = 0x00001, + CHECK_ERROR_NEWP = 0x00002, + CHECK_ERROR_NEWV = 0x00004, + CHECK_ERROR_REGISTERS = 0x00008, + CHECK_ERROR_READONLY = 0x00010, + CHECK_ERROR_LOOP = 0x00020, + CHECK_ERROR_ENDLOOP = 0x00040, + CHECK_ERROR_SOLO = 0x00080, + CHECK_ERROR_SHUFFLE = 0x00100, + CHECK_ERROR_NOSLOTS = 0x00200, + CHECK_ERROR_UNKNOWN = 0x00400, + // Warnings. + CHECK_WARN_CURRENT = 0x10000, + CHECK_WARN_TEMPORARY = 0x20000 + }; + ErrInfo_T s; + + void reset() { + s.Error = CHECK_SUCCESS; + s.Warning = CHECK_SUCCESS; + s.ShuffleError = HexagonShuffler::SHUFFLE_SUCCESS; + s.Register = Hexagon::NoRegister; + }; + HexagonMCErrInfo() { + reset(); + }; + + void setError(unsigned e, unsigned r = Hexagon::NoRegister) + { s.Error = e; s.Register = r; }; + void setWarning(unsigned w, unsigned r = Hexagon::NoRegister) + { s.Warning = w; s.Register = r; }; + void setShuffleError(unsigned e) { s.ShuffleError = e; }; +}; + +/// Check for a valid bundle. +class HexagonMCChecker { + /// Insn bundle. + MCInst& MCB; + MCInst& MCBDX; + const MCRegisterInfo& RI; + MCInstrInfo const &MCII; + MCSubtargetInfo const &STI; + bool bLoadErrInfo; + + /// Set of definitions: register #, if predicated, if predicated true. + typedef std::pair PredSense; + static const PredSense Unconditional; + typedef std::multiset PredSet; + typedef std::multiset::iterator PredSetIterator; + + typedef llvm::DenseMap::iterator DefsIterator; + llvm::DenseMap Defs; + + /// Information about how a new-value register is defined or used: + /// PredReg = predicate register, 0 if use/def not predicated, + /// Cond = true/false for if(PredReg)/if(!PredReg) respectively, + /// IsFloat = true if definition produces a floating point value + /// (not valid for uses), + /// IsNVJ = true if the use is a new-value branch (not valid for + /// definitions). + struct NewSense { + unsigned PredReg; + bool IsFloat, IsNVJ, Cond; + // The special-case "constructors": + static NewSense Jmp(bool isNVJ) { + NewSense NS = { /*PredReg=*/ 0, /*IsFloat=*/ false, /*IsNVJ=*/ isNVJ, + /*Cond=*/ false }; + return NS; + } + static NewSense Use(unsigned PR, bool True) { + NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ false, /*IsNVJ=*/ false, + /*Cond=*/ True }; + return NS; + } + static NewSense Def(unsigned PR, bool True, bool Float) { + NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ Float, /*IsNVJ=*/ false, + /*Cond=*/ True }; + return NS; + } + }; + /// Set of definitions that produce new register: + typedef llvm::SmallVector NewSenseList; + typedef llvm::DenseMap::iterator NewDefsIterator; + llvm::DenseMap NewDefs; + + /// Set of weak definitions whose clashes should be enforced selectively. + typedef std::set::iterator SoftDefsIterator; + std::set SoftDefs; + + /// Set of current definitions committed to the register file. + typedef std::set::iterator CurDefsIterator; + std::set CurDefs; + + /// Set of temporary definitions not committed to the register file. + typedef std::set::iterator TmpDefsIterator; + std::set TmpDefs; + + /// Set of new predicates used. + typedef std::set::iterator NewPredsIterator; + std::set NewPreds; + + /// Set of predicates defined late. + typedef std::multiset::iterator LatePredsIterator; + std::multiset LatePreds; + + /// Set of uses. + typedef std::set::iterator UsesIterator; + std::set Uses; + + /// Set of new values used: new register, if new-value jump. + typedef llvm::DenseMap::iterator NewUsesIterator; + llvm::DenseMap NewUses; + + /// Pre-defined set of read-only registers. + typedef std::set::iterator ReadOnlyIterator; + std::set ReadOnly; + + std::queue ErrInfoQ; + HexagonMCErrInfo CrntErrInfo; + + void getErrInfo() { + if (bLoadErrInfo == true) { + if (ErrInfoQ.empty()) { + CrntErrInfo.reset(); + } else { + CrntErrInfo.s = ErrInfoQ.front(); + ErrInfoQ.pop(); + } + } + bLoadErrInfo = false; + } + + void init(); + void init(MCInst const&); + + // Checks performed. + bool checkBranches(); + bool checkPredicates(); + bool checkNewValues(); + bool checkRegisters(); + bool checkSolo(); + bool checkShuffle(); + bool checkSlots(); + + static void compoundRegisterMap(unsigned&); + + bool isPredicateRegister(unsigned R) const { + return (Hexagon::P0 == R || Hexagon::P1 == R || + Hexagon::P2 == R || Hexagon::P3 == R); + }; + bool isLoopRegister(unsigned R) const { + return (Hexagon::SA0 == R || Hexagon::LC0 == R || + Hexagon::SA1 == R || Hexagon::LC1 == R); + }; + + bool hasValidNewValueDef(const NewSense &Use, + const NewSenseList &Defs) const; + + public: + explicit HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst& mcb, MCInst &mcbdx, + const MCRegisterInfo& ri); + + bool check(); + + /// add a new error/warning + void addErrInfo(HexagonMCErrInfo &err) { ErrInfoQ.push(err.s); }; + + /// Return the error code for the last operation in the insn bundle. + unsigned getError() { getErrInfo(); return CrntErrInfo.s.Error; }; + unsigned getWarning() { getErrInfo(); return CrntErrInfo.s.Warning; }; + unsigned getShuffleError() { getErrInfo(); return CrntErrInfo.s.ShuffleError; }; + unsigned getErrRegister() { getErrInfo(); return CrntErrInfo.s.Register; }; + bool getNextErrInfo() { + bLoadErrInfo = true; + return (ErrInfoQ.empty()) ? false : (getErrInfo(), true); + } +}; + +} + +#endif // HEXAGONMCCHECKER_H diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index 9fc4e2aeaba6..c2c6275e7e8d 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -96,6 +96,12 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction( assert(!HexagonMCInstrInfo::isBundle(HMB)); uint64_t Binary; + // Compound instructions are limited to using registers 0-7 and 16-23 + // and here we make a map 16-23 to 8-15 so they can be correctly encoded. + static unsigned RegMap[8] = {Hexagon::R8, Hexagon::R9, Hexagon::R10, + Hexagon::R11, Hexagon::R12, Hexagon::R13, + Hexagon::R14, Hexagon::R15}; + // Pseudo instructions don't get encoded and shouldn't be here // in the first place! assert(!HexagonMCInstrInfo::getDesc(MCII, HMB).isPseudo() && @@ -104,6 +110,16 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction( " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'" "\n"); + if (llvm::HexagonMCInstrInfo::getType(MCII, HMB) == HexagonII::TypeCOMPOUND) { + for (unsigned i = 0; i < HMB.getNumOperands(); ++i) + if (HMB.getOperand(i).isReg()) { + unsigned Reg = + MCT.getRegisterInfo()->getEncodingValue(HMB.getOperand(i).getReg()); + if ((Reg <= 23) && (Reg >= 16)) + HMB.getOperand(i).setReg(RegMap[Reg - 16]); + } + } + if (HexagonMCInstrInfo::isNewValue(MCII, HMB)) { // Calculate the new value distance to the associated producer MCOperand &MCO = @@ -318,7 +334,7 @@ static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI, // The only relocs left should be GP relative: default: if (MCID.mayStore() || MCID.mayLoad()) { - for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; + for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) { if (*ImpUses == Hexagon::GP) { switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) { @@ -389,10 +405,8 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, return cast(ME)->getValue(); } if (MK == MCExpr::Binary) { - unsigned Res; - Res = getExprOpValue(MI, MO, cast(ME)->getLHS(), Fixups, STI); - Res += - getExprOpValue(MI, MO, cast(ME)->getRHS(), Fixups, STI); + getExprOpValue(MI, MO, cast(ME)->getLHS(), Fixups, STI); + getExprOpValue(MI, MO, cast(ME)->getRHS(), Fixups, STI); return 0; } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp index 886f8db3bc63..d194bea3d8dc 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp @@ -115,8 +115,8 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { SrcReg = MI.getOperand(1).getReg(); if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - MI.getOperand(2).isImm() && ((isUInt<5>(MI.getOperand(2).getImm())) || - (MI.getOperand(2).getImm() == -1))) + (HexagonMCInstrInfo::inRange<5>(MI, 2) || + HexagonMCInstrInfo::minConstant(MI, 2) == -1)) return HexagonII::HCG_A; break; case Hexagon::A2_tfr: @@ -134,8 +134,8 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { return false; // Rd = #u6 DstReg = MI.getOperand(0).getReg(); - if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() <= 63 && - MI.getOperand(1).getImm() >= 0 && + if (HexagonMCInstrInfo::minConstant(MI, 1) <= 63 && + HexagonMCInstrInfo::minConstant(MI, 1) >= 0 && HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) return HexagonII::HCG_A; break; @@ -145,9 +145,8 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { DstReg = MI.getOperand(0).getReg(); Src1Reg = MI.getOperand(1).getReg(); if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) && - MI.getOperand(2).isImm() && HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && - (MI.getOperand(2).getImm() == 0)) + HexagonMCInstrInfo::minConstant(MI, 2) == 0) return HexagonII::HCG_A; break; // The fact that .new form is used pretty much guarantees @@ -206,6 +205,8 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { MCInst *CompoundInsn = 0; unsigned compoundOpcode; MCOperand Rs, Rt; + int64_t Value; + bool Success; switch (L.getOpcode()) { default: @@ -277,7 +278,10 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { case Hexagon::C2_cmpeqi: DEBUG(dbgs() << "CX: C2_cmpeqi\n"); - if (L.getOperand(2).getImm() == -1) + Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value); + (void)Success; + assert(Success); + if (Value == -1) compoundOpcode = cmpeqn1BitOpcode[getCompoundOp(R)]; else compoundOpcode = cmpeqiBitOpcode[getCompoundOp(R)]; @@ -286,14 +290,17 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { CompoundInsn = new (Context) MCInst; CompoundInsn->setOpcode(compoundOpcode); CompoundInsn->addOperand(Rs); - if (L.getOperand(2).getImm() != -1) + if (Value != -1) CompoundInsn->addOperand(L.getOperand(2)); CompoundInsn->addOperand(R.getOperand(1)); break; case Hexagon::C2_cmpgti: DEBUG(dbgs() << "CX: C2_cmpgti\n"); - if (L.getOperand(2).getImm() == -1) + Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value); + (void)Success; + assert(Success); + if (Value == -1) compoundOpcode = cmpgtn1BitOpcode[getCompoundOp(R)]; else compoundOpcode = cmpgtiBitOpcode[getCompoundOp(R)]; @@ -302,7 +309,7 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { CompoundInsn = new (Context) MCInst; CompoundInsn->setOpcode(compoundOpcode); CompoundInsn->addOperand(Rs); - if (L.getOperand(2).getImm() != -1) + if (Value != -1) CompoundInsn->addOperand(L.getOperand(2)); CompoundInsn->addOperand(R.getOperand(1)); break; @@ -404,7 +411,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { /// additional slot. void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { - assert(MCI.getOpcode() == Hexagon::BUNDLE && + assert(HexagonMCInstrInfo::isBundle(MCI) && "Non-Bundle where Bundle expected"); // By definition a compound must have 2 insn. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp index 7e9247cef6ad..e6194f61a6ba 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp @@ -26,7 +26,7 @@ using namespace Hexagon; #define DEBUG_TYPE "hexagon-mcduplex-info" // pair table of subInstructions with opcodes -static std::pair opcodeData[] = { +static const std::pair opcodeData[] = { std::make_pair((unsigned)V4_SA1_addi, 0), std::make_pair((unsigned)V4_SA1_addrx, 6144), std::make_pair((unsigned)V4_SA1_addsp, 3072), @@ -81,8 +81,7 @@ static std::pair opcodeData[] = { std::make_pair((unsigned)V4_SS2_storewi1, 4352)}; static std::map - subinstOpcodeMap(opcodeData, - opcodeData + sizeof(opcodeData) / sizeof(opcodeData[0])); + subinstOpcodeMap(std::begin(opcodeData), std::end(opcodeData)); bool HexagonMCInstrInfo::isDuplexPairMatch(unsigned Ga, unsigned Gb) { switch (Ga) { @@ -195,15 +194,13 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { // Special case this one from Group L2. // Rd = memw(r29+#u5:2) if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { - if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg && - MCI.getOperand(2).isImm() && - isShiftedUInt<5, 2>(MCI.getOperand(2).getImm())) { + if (HexagonMCInstrInfo::isIntReg(SrcReg) && + Hexagon::R29 == SrcReg && inRange<5, 2>(MCI, 2)) { return HexagonII::HSIG_L2; } // Rd = memw(Rs+#u4:2) if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - (MCI.getOperand(2).isImm() && - isShiftedUInt<4, 2>(MCI.getOperand(2).getImm()))) { + inRange<4, 2>(MCI, 2)) { return HexagonII::HSIG_L1; } } @@ -214,7 +211,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { SrcReg = MCI.getOperand(1).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - MCI.getOperand(2).isImm() && isUInt<4>(MCI.getOperand(2).getImm())) { + inRange<4>(MCI, 2)) { return HexagonII::HSIG_L1; } break; @@ -235,8 +232,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { SrcReg = MCI.getOperand(1).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - MCI.getOperand(2).isImm() && - isShiftedUInt<3, 1>(MCI.getOperand(2).getImm())) { + inRange<3, 1>(MCI, 2)) { return HexagonII::HSIG_L2; } break; @@ -246,7 +242,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { SrcReg = MCI.getOperand(1).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - MCI.getOperand(2).isImm() && isUInt<3>(MCI.getOperand(2).getImm())) { + inRange<3>(MCI, 2)) { return HexagonII::HSIG_L2; } break; @@ -256,8 +252,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { SrcReg = MCI.getOperand(1).getReg(); if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg && - MCI.getOperand(2).isImm() && - isShiftedUInt<5, 3>(MCI.getOperand(2).getImm())) { + inRange<5, 3>(MCI, 2)) { return HexagonII::HSIG_L2; } break; @@ -326,15 +321,13 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { Src2Reg = MCI.getOperand(2).getReg(); if (HexagonMCInstrInfo::isIntReg(Src1Reg) && HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && - Hexagon::R29 == Src1Reg && MCI.getOperand(1).isImm() && - isShiftedUInt<5, 2>(MCI.getOperand(1).getImm())) { + Hexagon::R29 == Src1Reg && inRange<5, 2>(MCI, 1)) { return HexagonII::HSIG_S2; } // memw(Rs+#u4:2) = Rt if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && - MCI.getOperand(1).isImm() && - isShiftedUInt<4, 2>(MCI.getOperand(1).getImm())) { + inRange<4, 2>(MCI, 1)) { return HexagonII::HSIG_S1; } break; @@ -344,7 +337,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { Src2Reg = MCI.getOperand(2).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && - MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm())) { + inRange<4>(MCI, 1)) { return HexagonII::HSIG_S1; } break; @@ -363,8 +356,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { Src2Reg = MCI.getOperand(2).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) && - MCI.getOperand(1).isImm() && - isShiftedUInt<3, 1>(MCI.getOperand(1).getImm())) { + inRange<3, 1>(MCI, 1)) { return HexagonII::HSIG_S2; } break; @@ -374,8 +366,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { Src2Reg = MCI.getOperand(2).getReg(); if (HexagonMCInstrInfo::isDblRegForSubInst(Src2Reg) && HexagonMCInstrInfo::isIntReg(Src1Reg) && Hexagon::R29 == Src1Reg && - MCI.getOperand(1).isImm() && - isShiftedInt<6, 3>(MCI.getOperand(1).getImm())) { + inSRange<6, 3>(MCI, 1)) { return HexagonII::HSIG_S2; } break; @@ -383,9 +374,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { // memw(Rs+#u4:2) = #U1 Src1Reg = MCI.getOperand(0).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && - MCI.getOperand(1).isImm() && - isShiftedUInt<4, 2>(MCI.getOperand(1).getImm()) && - MCI.getOperand(2).isImm() && isUInt<1>(MCI.getOperand(2).getImm())) { + inRange<4, 2>(MCI, 1) && inRange<1>(MCI, 2)) { return HexagonII::HSIG_S2; } break; @@ -393,16 +382,13 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { // memb(Rs+#u4) = #U1 Src1Reg = MCI.getOperand(0).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) && - MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm()) && - MCI.getOperand(2).isImm() && isUInt<1>(MCI.getOperand(2).getImm())) { + inRange<4>(MCI, 1) && inRange<1>(MCI, 2)) { return HexagonII::HSIG_S2; } break; case Hexagon::S2_allocframe: - if (MCI.getOperand(0).isImm() && - isShiftedUInt<5, 3>(MCI.getOperand(0).getImm())) { + if (inRange<5, 3>(MCI, 0)) return HexagonII::HSIG_S2; - } break; // // Group A: @@ -428,8 +414,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { // Rd = add(r29,#u6:2) if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg && - MCI.getOperand(2).isImm() && - isShiftedUInt<6, 2>(MCI.getOperand(2).getImm())) { + inRange<6, 2>(MCI, 2)) { return HexagonII::HSIG_A; } // Rx = add(Rx,#s7) @@ -439,8 +424,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { // Rd = add(Rs,#1) // Rd = add(Rs,#-1) if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - MCI.getOperand(2).isImm() && ((MCI.getOperand(2).getImm() == 1) || - (MCI.getOperand(2).getImm() == -1))) { + (minConstant(MCI, 2) == 1 || minConstant(MCI, 2) == -1)) { return HexagonII::HSIG_A; } } @@ -460,8 +444,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { SrcReg = MCI.getOperand(1).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - MCI.getOperand(2).isImm() && ((MCI.getOperand(2).getImm() == 1) || - (MCI.getOperand(2).getImm() == 255))) { + (minConstant(MCI, 2) == 1 || minConstant(MCI, 2) == 255)) { return HexagonII::HSIG_A; } break; @@ -491,8 +474,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { DstReg = MCI.getOperand(0).getReg(); // Rd PredReg = MCI.getOperand(1).getReg(); // P0 if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && - Hexagon::P0 == PredReg && MCI.getOperand(2).isImm() && - MCI.getOperand(2).getImm() == 0) { + Hexagon::P0 == PredReg && minConstant(MCI, 2) == 0) { return HexagonII::HSIG_A; } break; @@ -502,7 +484,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { SrcReg = MCI.getOperand(1).getReg(); if (Hexagon::P0 == DstReg && HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - MCI.getOperand(2).isImm() && isUInt<2>(MCI.getOperand(2).getImm())) { + inRange<2>(MCI, 2)) { return HexagonII::HSIG_A; } break; @@ -511,10 +493,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { // Rdd = combine(#u2,#U2) DstReg = MCI.getOperand(0).getReg(); if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && - // TODO: Handle Globals/Symbols - (MCI.getOperand(1).isImm() && isUInt<2>(MCI.getOperand(1).getImm())) && - ((MCI.getOperand(2).isImm() && - isUInt<2>(MCI.getOperand(2).getImm())))) { + inRange<2>(MCI, 1) && inRange<2>(MCI, 2)) { return HexagonII::HSIG_A; } break; @@ -524,7 +503,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { SrcReg = MCI.getOperand(1).getReg(); if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - (MCI.getOperand(2).isImm() && MCI.getOperand(2).getImm() == 0)) { + minConstant(MCI, 2) == 0) { return HexagonII::HSIG_A; } break; @@ -534,7 +513,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { SrcReg = MCI.getOperand(2).getReg(); if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) && HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) && - (MCI.getOperand(1).isImm() && MCI.getOperand(1).getImm() == 0)) { + minConstant(MCI, 1) == 0) { return HexagonII::HSIG_A; } break; @@ -556,19 +535,17 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { } bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) { - unsigned DstReg, SrcReg; - switch (potentialDuplex.getOpcode()) { case Hexagon::A2_addi: // testing for case of: Rx = add(Rx,#s7) DstReg = potentialDuplex.getOperand(0).getReg(); SrcReg = potentialDuplex.getOperand(1).getReg(); if (DstReg == SrcReg && HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { - if (potentialDuplex.getOperand(2).isExpr()) + int64_t Value; + if (!potentialDuplex.getOperand(2).getExpr()->evaluateAsAbsolute(Value)) return true; - if (potentialDuplex.getOperand(2).isImm() && - !(isShiftedInt<7, 0>(potentialDuplex.getOperand(2).getImm()))) + if (!isShiftedInt<7, 0>(Value)) return true; } break; @@ -576,15 +553,14 @@ bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) { DstReg = potentialDuplex.getOperand(0).getReg(); if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) { - if (potentialDuplex.getOperand(1).isExpr()) + int64_t Value; + if (!potentialDuplex.getOperand(1).getExpr()->evaluateAsAbsolute(Value)) return true; // Check for case of Rd = #-1. - if (potentialDuplex.getOperand(1).isImm() && - (potentialDuplex.getOperand(1).getImm() == -1)) + if (Value == -1) return false; // Check for case of Rd = #u6. - if (potentialDuplex.getOperand(1).isImm() && - !isShiftedUInt<6, 0>(potentialDuplex.getOperand(1).getImm())) + if (!isShiftedUInt<6, 0>(Value)) return true; } break; @@ -712,19 +688,23 @@ inline static void addOps(MCInst &subInstPtr, MCInst const &Inst, MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { MCInst Result; + bool Absolute; + int64_t Value; switch (Inst.getOpcode()) { default: // dbgs() << "opcode: "<< Inst->getOpcode() << "\n"; llvm_unreachable("Unimplemented subinstruction \n"); break; case Hexagon::A2_addi: - if (Inst.getOperand(2).isImm() && Inst.getOperand(2).getImm() == 1) { + Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value); + assert(Absolute);(void)Absolute; + if (Value == 1) { Result.setOpcode(Hexagon::V4_SA1_inc); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; } // 1,2 SUBInst $Rd = add($Rs, #1) - else if (Inst.getOperand(2).isImm() && Inst.getOperand(2).getImm() == -1) { + else if (Value == -1) { Result.setOpcode(Hexagon::V4_SA1_dec); addOps(Result, Inst, 0); addOps(Result, Inst, 1); @@ -754,7 +734,7 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { addOps(Result, Inst, 0); break; // 1 SUBInst allocframe(#$u5_3) case Hexagon::A2_andir: - if (Inst.getOperand(2).getImm() == 255) { + if (minConstant(Inst, 2) == 255) { Result.setOpcode(Hexagon::V4_SA1_zxtb); addOps(Result, Inst, 0); addOps(Result, Inst, 1); @@ -772,26 +752,27 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { break; // 2,3 SUBInst p0 = cmp.eq($Rs, #$u2) case Hexagon::A4_combineii: case Hexagon::A2_combineii: - if (Inst.getOperand(1).getImm() == 1) { + Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value); + assert(Absolute);(void)Absolute; + if (Value == 1) { Result.setOpcode(Hexagon::V4_SA1_combine1i); addOps(Result, Inst, 0); addOps(Result, Inst, 2); break; // 1,3 SUBInst $Rdd = combine(#1, #$u2) } - - if (Inst.getOperand(1).getImm() == 3) { + if (Value == 3) { Result.setOpcode(Hexagon::V4_SA1_combine3i); addOps(Result, Inst, 0); addOps(Result, Inst, 2); break; // 1,3 SUBInst $Rdd = combine(#3, #$u2) } - if (Inst.getOperand(1).getImm() == 0) { + if (Value == 0) { Result.setOpcode(Hexagon::V4_SA1_combine0i); addOps(Result, Inst, 0); addOps(Result, Inst, 2); break; // 1,3 SUBInst $Rdd = combine(#0, #$u2) } - if (Inst.getOperand(1).getImm() == 2) { + if (Value == 2) { Result.setOpcode(Hexagon::V4_SA1_combine2i); addOps(Result, Inst, 0); addOps(Result, Inst, 2); @@ -894,12 +875,14 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { break; // 1,2,3 SUBInst $Rd = memw($Rs + #$u4_2) } case Hexagon::S4_storeirb_io: - if (Inst.getOperand(2).getImm() == 0) { + Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value); + assert(Absolute);(void)Absolute; + if (Value == 0) { Result.setOpcode(Hexagon::V4_SS2_storebi0); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 1,2 SUBInst memb($Rs + #$u4_0)=#0 - } else if (Inst.getOperand(2).getImm() == 1) { + } else if (Value == 1) { Result.setOpcode(Hexagon::V4_SS2_storebi1); addOps(Result, Inst, 0); addOps(Result, Inst, 1); @@ -923,12 +906,14 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { addOps(Result, Inst, 2); break; // 1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt case Hexagon::S4_storeiri_io: - if (Inst.getOperand(2).getImm() == 0) { + Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value); + assert(Absolute);(void)Absolute; + if (Value == 0) { Result.setOpcode(Hexagon::V4_SS2_storewi0); addOps(Result, Inst, 0); addOps(Result, Inst, 1); break; // 3 1,2 SUBInst memw($Rs + #$u4_2)=#0 - } else if (Inst.getOperand(2).getImm() == 1) { + } else if (Value == 1) { Result.setOpcode(Hexagon::V4_SS2_storewi1); addOps(Result, Inst, 0); addOps(Result, Inst, 1); @@ -983,7 +968,8 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { addOps(Result, Inst, 0); break; // 2 SUBInst if (p0) $Rd = #0 case Hexagon::A2_tfrsi: - if (Inst.getOperand(1).isImm() && Inst.getOperand(1).getImm() == -1) { + Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value); + if (Absolute && Value == -1) { Result.setOpcode(Hexagon::V4_SA1_setin1); addOps(Result, Inst, 0); break; // 2 1 SUBInst $Rd = #-1 @@ -1044,6 +1030,8 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII, << "\n"); bisReversable = false; } + if (HexagonMCInstrInfo::isMemReorderDisabled(MCB)) // }:mem_noshuf + bisReversable = false; // Try in order. if (isOrderedDuplexPair( diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index bf51c3515e95..eaa3550d07f6 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -37,9 +37,7 @@ static cl::opt void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK, const MCSubtargetInfo &STI) { - MCInst HMI; - HMI.setOpcode(Hexagon::BUNDLE); - HMI.addOperand(MCOperand::createImm(0)); + MCInst HMI = HexagonMCInstrInfo::createBundle(); MCInst *MCB; if (MCK.getOpcode() != Hexagon::BUNDLE) { @@ -50,7 +48,7 @@ void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK, // Examines packet and pad the packet, if needed, when an // end-loop is in the bundle. - HexagonMCInstrInfo::padEndloop(*MCB); + HexagonMCInstrInfo::padEndloop(getContext(), *MCB); HexagonMCShuffle(*MCII, STI, *MCB); assert(HexagonMCInstrInfo::bundleSize(*MCB) <= HEXAGON_PACKET_SIZE); @@ -60,9 +58,9 @@ void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK, if (Extended) { if (HexagonMCInstrInfo::isDuplex(*MCII, *MCI)) { MCInst *SubInst = const_cast(MCI->getOperand(1).getInst()); - HexagonMCInstrInfo::clampExtended(*MCII, *SubInst); + HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *SubInst); } else { - HexagonMCInstrInfo::clampExtended(*MCII, *MCI); + HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *MCI); } Extended = false; } else { @@ -114,7 +112,7 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol, MCSection *Section = getAssembler().getContext().getELFSection( SectionName, ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); SwitchSection(Section); - AssignSection(Symbol, Section); + AssignFragment(Symbol, getCurrentFragment()); MCELFStreamer::EmitCommonSymbol(Symbol, Size, ByteAlignment); SwitchSection(CrntSection); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp new file mode 100644 index 000000000000..fc6262657514 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp @@ -0,0 +1,49 @@ +//===-- HexagonMCExpr.cpp - Hexagon specific MC expression classes +//----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "hexagon-mcexpr" + +HexagonNoExtendOperand *HexagonNoExtendOperand::Create(MCExpr const *Expr, + MCContext &Ctx) { + return new (Ctx) HexagonNoExtendOperand(Expr); +} + +bool HexagonNoExtendOperand::evaluateAsRelocatableImpl( + MCValue &Res, MCAsmLayout const *Layout, MCFixup const *Fixup) const { + return Expr->evaluateAsRelocatable(Res, Layout, Fixup); +} + +void HexagonNoExtendOperand::visitUsedExpr(MCStreamer &Streamer) const {} + +MCFragment *llvm::HexagonNoExtendOperand::findAssociatedFragment() const { + return Expr->findAssociatedFragment(); +} + +void HexagonNoExtendOperand::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {} + +MCExpr const *HexagonNoExtendOperand::getExpr() const { return Expr; } + +bool HexagonNoExtendOperand::classof(MCExpr const *E) { + return E->getKind() == MCExpr::Target; +} + +HexagonNoExtendOperand::HexagonNoExtendOperand(MCExpr const *Expr) + : Expr(Expr) {} + +void HexagonNoExtendOperand::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { + Expr->print(OS, MAI); +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h new file mode 100644 index 000000000000..60f180fb2bc4 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h @@ -0,0 +1,35 @@ +//==- HexagonMCExpr.h - Hexagon specific MC expression classes --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H + +#include "llvm/MC/MCExpr.h" + +namespace llvm { +class MCInst; +class HexagonNoExtendOperand : public MCTargetExpr { +public: + static HexagonNoExtendOperand *Create(MCExpr const *Expr, MCContext &Ctx); + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, + const MCFixup *Fixup) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; + MCFragment *findAssociatedFragment() const override; + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; + static bool classof(MCExpr const *E); + MCExpr const *getExpr() const; + +private: + HexagonNoExtendOperand(MCExpr const *Expr); + MCExpr const *Expr; +}; +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index 48b15f85a783..e6842076db2a 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -15,17 +15,37 @@ #include "Hexagon.h" #include "HexagonBaseInfo.h" +#include "HexagonMCChecker.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" namespace llvm { +void HexagonMCInstrInfo::addConstant(MCInst &MI, uint64_t Value, + MCContext &Context) { + MI.addOperand(MCOperand::createExpr(MCConstantExpr::create(Value, Context))); +} + +void HexagonMCInstrInfo::addConstExtender(MCContext &Context, + MCInstrInfo const &MCII, MCInst &MCB, + MCInst const &MCI) { + assert(HexagonMCInstrInfo::isBundle(MCB)); + MCOperand const &exOp = + MCI.getOperand(HexagonMCInstrInfo::getExtendableOp(MCII, MCI)); + + // Create the extender. + MCInst *XMCI = + new (Context) MCInst(HexagonMCInstrInfo::deriveExtender(MCII, MCI, exOp)); + + MCB.addOperand(MCOperand::createInst(XMCI)); +} + iterator_range HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) { assert(isBundle(MCI)); - return iterator_range( - MCI.begin() + bundleInstructionsOffset, MCI.end()); + return make_range(MCI.begin() + bundleInstructionsOffset, MCI.end()); } size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) { @@ -35,7 +55,40 @@ size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) { return (1); } -void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, MCInst &MCI) { +bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, + MCContext &Context, MCInst &MCB, + HexagonMCChecker *Check) { + // Examine the packet and convert pairs of instructions to compound + // instructions when possible. + if (!HexagonDisableCompound) + HexagonMCInstrInfo::tryCompound(MCII, Context, MCB); + // Check the bundle for errors. + bool CheckOk = Check ? Check->check() : true; + if (!CheckOk) + return false; + HexagonMCShuffle(MCII, STI, MCB); + // Examine the packet and convert pairs of instructions to duplex + // instructions when possible. + MCInst InstBundlePreDuplex = MCInst(MCB); + if (!HexagonDisableDuplex) { + SmallVector possibleDuplexes; + possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(MCII, MCB); + HexagonMCShuffle(MCII, STI, Context, MCB, possibleDuplexes); + } + // Examines packet and pad the packet, if needed, when an + // end-loop is in the bundle. + HexagonMCInstrInfo::padEndloop(Context, MCB); + // If compounding and duplexing didn't reduce the size below + // 4 or less we have a packet that is too big. + if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) + return false; + HexagonMCShuffle(MCII, STI, MCB); + return true; +} + +void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, + MCContext &Context, MCInst &MCI) { assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) || HexagonMCInstrInfo::isExtended(MCII, MCI)); MCOperand &exOp = @@ -43,13 +96,20 @@ void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, MCInst &MCI) { // If the extended value is a constant, then use it for the extended and // for the extender instructions, masking off the lower 6 bits and // including the assumed bits. - if (exOp.isImm()) { + int64_t Value; + if (exOp.getExpr()->evaluateAsAbsolute(Value)) { unsigned Shift = HexagonMCInstrInfo::getExtentAlignment(MCII, MCI); - int64_t Bits = exOp.getImm(); - exOp.setImm((Bits & 0x3f) << Shift); + exOp.setExpr(MCConstantExpr::create((Value & 0x3f) << Shift, Context)); } } +MCInst HexagonMCInstrInfo::createBundle() { + MCInst Result; + Result.setOpcode(Hexagon::BUNDLE); + Result.addOperand(MCOperand::createImm(0)); + return Result; +} + MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0, MCInst const &inst1) { @@ -64,6 +124,27 @@ MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass, return duplexInst; } +MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII, + MCInst const &Inst, + MCOperand const &MO) { + assert(HexagonMCInstrInfo::isExtendable(MCII, Inst) || + HexagonMCInstrInfo::isExtended(MCII, Inst)); + + MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, Inst); + MCInst XMI; + XMI.setOpcode((Desc.isBranch() || Desc.isCall() || + HexagonMCInstrInfo::getType(MCII, Inst) == HexagonII::TypeCR) + ? Hexagon::A4_ext_b + : Hexagon::A4_ext); + if (MO.isImm()) + XMI.addOperand(MCOperand::createImm(MO.getImm() & (~0x3f))); + else if (MO.isExpr()) + XMI.addOperand(MCOperand::createExpr(MO.getExpr())); + else + llvm_unreachable("invalid extendable operand"); + return XMI; +} + MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB, size_t Index) { assert(Index <= bundleSize(MCB)); @@ -76,6 +157,13 @@ MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB, return nullptr; } +void HexagonMCInstrInfo::extendIfNeeded(MCContext &Context, + MCInstrInfo const &MCII, MCInst &MCB, + MCInst const &MCI, bool MustExtend) { + if (isConstExtended(MCII, MCI) || MustExtend) + addConstExtender(Context, MCII, MCB, MCI); +} + HexagonII::MemAccessSize HexagonMCInstrInfo::getAccessSize(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; @@ -186,6 +274,25 @@ MCOperand const &HexagonMCInstrInfo::getNewValueOperand(MCInstrInfo const &MCII, return (MCO); } +/// Return the new value or the newly produced value. +unsigned short HexagonMCInstrInfo::getNewValueOp2(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::NewValueOpPos2) & HexagonII::NewValueOpMask2); +} + +MCOperand const & +HexagonMCInstrInfo::getNewValueOperand2(MCInstrInfo const &MCII, + MCInst const &MCI) { + unsigned O = HexagonMCInstrInfo::getNewValueOp2(MCII, MCI); + MCOperand const &MCO = MCI.getOperand(O); + + assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) || + HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) && + MCO.isReg()); + return (MCO); +} + int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; @@ -242,6 +349,13 @@ bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII, return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask); } +/// Return whether the insn produces a second value. +bool HexagonMCInstrInfo::hasNewValue2(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::hasNewValuePos2) & HexagonII::hasNewValueMask2); +} + MCInst const &HexagonMCInstrInfo::instruction(MCInst const &MCB, size_t Index) { assert(isBundle(MCB)); assert(Index < HEXAGON_PACKET_SIZE); @@ -261,6 +375,11 @@ bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) { HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP); } +bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII, + MCInst const &MCI) { + return (getType(MCII, MCI) == HexagonII::TypeCOMPOUND); +} + bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) { return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) || (Reg >= Hexagon::D8 && Reg <= Hexagon::D11)); @@ -282,14 +401,21 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI) { if (HexagonMCInstrInfo::isExtended(MCII, MCI)) return true; - - if (!HexagonMCInstrInfo::isExtendable(MCII, MCI)) + // Branch insns are handled as necessary by relaxation. + if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeJ) || + (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCOMPOUND && + HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()) || + (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV && + HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch())) + return false; + // Otherwise loop instructions and other CR insts are handled by relaxation + else if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR) && + (MCI.getOpcode() != Hexagon::C4_addipc)) + return false; + else if (!HexagonMCInstrInfo::isExtendable(MCII, MCI)) return false; - short ExtOpNum = HexagonMCInstrInfo::getCExtOpNum(MCII, MCI); - int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI); - int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI); - MCOperand const &MO = MCI.getOperand(ExtOpNum); + MCOperand const &MO = HexagonMCInstrInfo::getExtendableOperand(MCII, MCI); // We could be using an instruction with an extendable immediate and shoehorn // a global address into it. If it is a global address it will be constant @@ -297,15 +423,13 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII, // We currently only handle isGlobal() because it is the only kind of // object we are going to end up with here for now. // In the future we probably should add isSymbol(), etc. - if (MO.isExpr()) + assert(!MO.isImm()); + int64_t Value; + if (!MO.getExpr()->evaluateAsAbsolute(Value)) return true; - - // If the extendable operand is not 'Immediate' type, the instruction should - // have 'isExtended' flag set. - assert(MO.isImm() && "Extendable operand must be Immediate type"); - - int ImmValue = MO.getImm(); - return (ImmValue < MinValue || ImmValue > MaxValue); + int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI); + int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI); + return (MinValue > Value || Value > MaxValue); } bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII, @@ -374,6 +498,19 @@ bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII, return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); } +bool HexagonMCInstrInfo::isPredicateLate(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return (F >> HexagonII::PredicateLatePos & HexagonII::PredicateLateMask); +} + +/// Return whether the insn is newly predicated. +bool HexagonMCInstrInfo::isPredicatedNew(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask); +} + bool HexagonMCInstrInfo::isPredicatedTrue(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; @@ -394,6 +531,18 @@ bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) { return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask); } +bool HexagonMCInstrInfo::isMemReorderDisabled(MCInst const &MCI) { + assert(isBundle(MCI)); + auto Flags = MCI.getOperand(0).getImm(); + return (Flags & memReorderDisabledMask) != 0; +} + +bool HexagonMCInstrInfo::isMemStoreReorderEnabled(MCInst const &MCI) { + assert(isBundle(MCI)); + auto Flags = MCI.getOperand(0).getImm(); + return (Flags & memStoreReorderEnabledMask) != 0; +} + bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; return ((F >> HexagonII::SoloAXPos) & HexagonII::SoloAXMask); @@ -405,7 +554,28 @@ bool HexagonMCInstrInfo::isSoloAin1(MCInstrInfo const &MCII, return ((F >> HexagonII::SoloAin1Pos) & HexagonII::SoloAin1Mask); } -void HexagonMCInstrInfo::padEndloop(MCInst &MCB) { +bool HexagonMCInstrInfo::isVector(MCInstrInfo const &MCII, MCInst const &MCI) { + if ((getType(MCII, MCI) <= HexagonII::TypeCVI_LAST) && + (getType(MCII, MCI) >= HexagonII::TypeCVI_FIRST)) + return true; + return false; +} + +int64_t HexagonMCInstrInfo::minConstant(MCInst const &MCI, size_t Index) { + auto Sentinal = static_cast(std::numeric_limits::max()) + << 8; + if (MCI.size() <= Index) + return Sentinal; + MCOperand const &MCO = MCI.getOperand(Index); + if (!MCO.isExpr()) + return Sentinal; + int64_t Value; + if (!MCO.getExpr()->evaluateAsAbsolute(Value)) + return Sentinal; + return Value; +} + +void HexagonMCInstrInfo::padEndloop(MCContext &Context, MCInst &MCB) { MCInst Nop; Nop.setOpcode(Hexagon::A2_nop); assert(isBundle(MCB)); @@ -413,7 +583,7 @@ void HexagonMCInstrInfo::padEndloop(MCInst &MCB) { (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_INNER_SIZE)) || ((HexagonMCInstrInfo::isOuterLoop(MCB) && (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_OUTER_SIZE)))) - MCB.addOperand(MCOperand::createInst(new MCInst(Nop))); + MCB.addOperand(MCOperand::createInst(new (Context) MCInst(Nop))); } bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII, @@ -456,6 +626,20 @@ void HexagonMCInstrInfo::setInnerLoop(MCInst &MCI) { Operand.setImm(Operand.getImm() | innerLoopMask); } +void HexagonMCInstrInfo::setMemReorderDisabled(MCInst &MCI) { + assert(isBundle(MCI)); + MCOperand &Operand = MCI.getOperand(0); + Operand.setImm(Operand.getImm() | memReorderDisabledMask); + assert(isMemReorderDisabled(MCI)); +} + +void HexagonMCInstrInfo::setMemStoreReorderEnabled(MCInst &MCI) { + assert(isBundle(MCI)); + MCOperand &Operand = MCI.getOperand(0); + Operand.setImm(Operand.getImm() | memStoreReorderEnabledMask); + assert(isMemStoreReorderEnabled(MCI)); +} + void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) { assert(isBundle(MCI)); MCOperand &Operand = MCI.getOperand(0); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h index 32d61a4a7be5..0237b2884a3b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -14,9 +14,11 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H #define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H +#include "HexagonMCExpr.h" #include "llvm/MC/MCInst.h" namespace llvm { +class HexagonMCChecker; class MCContext; class MCInstrDesc; class MCInstrInfo; @@ -39,20 +41,47 @@ int64_t const innerLoopMask = 1 << innerLoopOffset; size_t const outerLoopOffset = 1; int64_t const outerLoopMask = 1 << outerLoopOffset; +// do not reorder memory load/stores by default load/stores are re-ordered +// and by default loads can be re-ordered +size_t const memReorderDisabledOffset = 2; +int64_t const memReorderDisabledMask = 1 << memReorderDisabledOffset; + +// allow re-ordering of memory stores by default stores cannot be re-ordered +size_t const memStoreReorderEnabledOffset = 3; +int64_t const memStoreReorderEnabledMask = 1 << memStoreReorderEnabledOffset; + size_t const bundleInstructionsOffset = 1; +void addConstant(MCInst &MI, uint64_t Value, MCContext &Context); +void addConstExtender(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB, + MCInst const &MCI); + // Returns a iterator range of instructions in this bundle iterator_range bundleInstructions(MCInst const &MCI); // Returns the number of instructions in the bundle size_t bundleSize(MCInst const &MCI); +// Put the packet in to canonical form, compound, duplex, pad, and shuffle +bool canonicalizePacket(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCContext &Context, MCInst &MCB, + HexagonMCChecker *Checker); + // Clamp off upper 26 bits of extendable operand for emission -void clampExtended(MCInstrInfo const &MCII, MCInst &MCI); +void clampExtended(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI); + +MCInst createBundle(); + +// Return the extender for instruction at Index or nullptr if none +MCInst const *extenderForIndex(MCInst const &MCB, size_t Index); +void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB, + MCInst const &MCI, bool MustExtend); // Create a duplex instruction given the two subinsts MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0, MCInst const &inst1); +MCInst deriveExtender(MCInstrInfo const &MCII, MCInst const &Inst, + MCOperand const &MO); // Convert this instruction in to a duplex subinst MCInst deriveSubInst(MCInst const &Inst); @@ -108,6 +137,9 @@ unsigned short getNewValueOp(MCInstrInfo const &MCII, MCInst const &MCI); // Return the operand that consumes or produces a new value. MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI); +unsigned short getNewValueOp2(MCInstrInfo const &MCII, MCInst const &MCI); +MCOperand const &getNewValueOperand2(MCInstrInfo const &MCII, + MCInst const &MCI); int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI); @@ -125,6 +157,7 @@ bool hasImmExt(MCInst const &MCI); // Return whether the instruction is a legal new-value producer. bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI); +bool hasNewValue2(MCInstrInfo const &MCII, MCInst const &MCI); // Return the instruction at Index MCInst const &instruction(MCInst const &MCB, size_t Index); @@ -134,10 +167,24 @@ bool isBundle(MCInst const &MCI); // Return whether the insn is an actual insn. bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI); +bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI); // Return the duplex iclass given the two duplex classes unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb); +int64_t minConstant(MCInst const &MCI, size_t Index); +template +bool inRange(MCInst const &MCI, size_t Index) { + return isShiftedUInt(minConstant(MCI, Index)); +} +template +bool inSRange(MCInst const &MCI, size_t Index) { + return isShiftedInt(minConstant(MCI, Index)); +} +template bool inRange(MCInst const &MCI, size_t Index) { + return isUInt(minConstant(MCI, Index)); +} + // Return whether the instruction needs to be constant extended. bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI); @@ -173,6 +220,8 @@ bool isIntReg(unsigned Reg); // Is this register suitable for use in a duplex subinst bool isIntRegForSubInst(unsigned Reg); +bool isMemReorderDisabled(MCInst const &MCI); +bool isMemStoreReorderEnabled(MCInst const &MCI); // Return whether the insn is a new-value consumer. bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI); @@ -191,6 +240,8 @@ bool isOuterLoop(MCInst const &MCI); // Return whether this instruction is predicated bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI); +bool isPredicateLate(MCInstrInfo const &MCII, MCInst const &MCI); +bool isPredicatedNew(MCInstrInfo const &MCII, MCInst const &MCI); // Return whether the predicate sense is true bool isPredicatedTrue(MCInstrInfo const &MCII, MCInst const &MCI); @@ -209,9 +260,10 @@ bool isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI); /// Return whether the insn can be packaged only with an A-type insn in slot #1. bool isSoloAin1(MCInstrInfo const &MCII, MCInst const &MCI); +bool isVector(MCInstrInfo const &MCII, MCInst const &MCI); // Pad the bundle with nops to satisfy endloop requirements -void padEndloop(MCInst &MCI); +void padEndloop(MCContext &Context, MCInst &MCI); bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI); @@ -220,6 +272,8 @@ void replaceDuplex(MCContext &Context, MCInst &MCB, DuplexCandidate Candidate); // Marks a bundle as endloop0 void setInnerLoop(MCInst &MCI); +void setMemReorderDisabled(MCInst &MCI); +void setMemStoreReorderEnabled(MCInst &MCI); // Marks a bundle as endloop1 void setOuterLoop(MCInst &MCI); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 53305d85fd80..9a292577a8f3 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -40,6 +40,20 @@ using namespace llvm; #define GET_REGINFO_MC_DESC #include "HexagonGenRegisterInfo.inc" +cl::opt llvm::HexagonDisableCompound + ("mno-compound", + cl::desc("Disable looking for compound instructions for Hexagon")); + +cl::opt llvm::HexagonDisableDuplex + ("mno-pairing", + cl::desc("Disable looking for duplex instructions for Hexagon")); + +StringRef HEXAGON_MC::selectHexagonCPU(const Triple &TT, StringRef CPU) { + if (CPU.empty()) + CPU = "hexagonv60"; + return CPU; +} + MCInstrInfo *llvm::createHexagonMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitHexagonMCInstrInfo(X); @@ -54,6 +68,7 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) { static MCSubtargetInfo * createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { + CPU = HEXAGON_MC::selectHexagonCPU(TT, CPU); return createHexagonMCSubtargetInfoImpl(TT, CPU, FS); } @@ -76,28 +91,23 @@ public: StringRef Contents(Buffer); auto PacketBundle = Contents.rsplit('\n'); auto HeadTail = PacketBundle.first.split('\n'); - auto Preamble = "\t{\n\t\t"; - auto Separator = ""; - while(!HeadTail.first.empty()) { - OS << Separator; - StringRef Inst; + StringRef Separator = "\n"; + StringRef Indent = "\t\t"; + OS << "\t{\n"; + while (!HeadTail.first.empty()) { + StringRef InstTxt; auto Duplex = HeadTail.first.split('\v'); - if(!Duplex.second.empty()){ - OS << Duplex.first << "\n"; - Inst = Duplex.second; + if (!Duplex.second.empty()) { + OS << Indent << Duplex.first << Separator; + InstTxt = Duplex.second; + } else if (!HeadTail.first.trim().startswith("immext")) { + InstTxt = Duplex.first; } - else { - if(!HeadTail.first.startswith("immext")) - Inst = Duplex.first; - } - OS << Preamble; - OS << Inst; + if (!InstTxt.empty()) + OS << Indent << InstTxt << Separator; HeadTail = HeadTail.second.split('\n'); - Preamble = ""; - Separator = "\n\t\t"; } - if(HexagonMCInstrInfo::bundleSize(Inst) != 0) - OS << "\n\t}" << PacketBundle.second; + OS << "\t}" << PacketBundle.second; } }; } @@ -154,9 +164,9 @@ static MCCodeGenInfo *createHexagonMCCodeGenInfo(const Triple &TT, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); - // For the time being, use static relocations, since there's really no - // support for PIC yet. - X->initMCCodeGenInfo(Reloc::Static, CM, OL); + if (RM == Reloc::Default) + RM = Reloc::Static; + X->initMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index cb626503313f..a005a014416b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -16,6 +16,8 @@ #include +#include "llvm/Support/CommandLine.h" + namespace llvm { struct InstrItinerary; struct InstrStage; @@ -33,22 +35,27 @@ class raw_ostream; class raw_pwrite_stream; extern Target TheHexagonTarget; - +extern cl::opt HexagonDisableCompound; +extern cl::opt HexagonDisableDuplex; extern const InstrStage HexagonStages[]; MCInstrInfo *createHexagonMCInstrInfo(); -MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII, - MCRegisterInfo const &MRI, +MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, MCContext &MCT); -MCAsmBackend *createHexagonAsmBackend(Target const &T, - MCRegisterInfo const &MRI, +MCAsmBackend *createHexagonAsmBackend(const Target &T, + const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU); MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, StringRef CPU); +namespace HEXAGON_MC { + StringRef selectHexagonCPU(const Triple &TT, StringRef CPU); +} + } // End llvm namespace // Define symbolic names for Hexagon registers. This defines a mapping from diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index 41112ac0b46e..6ceb848ba20c 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -27,6 +27,7 @@ using namespace llvm; +namespace { // Insn shuffling priority. class HexagonBid { // The priority is directly proportional to how restricted the insn is based @@ -75,6 +76,7 @@ public: return false; }; }; +} // end anonymous namespace unsigned HexagonResource::setWeight(unsigned s) { const unsigned SlotWeight = 8; @@ -93,6 +95,60 @@ unsigned HexagonResource::setWeight(unsigned s) { return (Weight); } +HexagonCVIResource::TypeUnitsAndLanes *HexagonCVIResource::TUL; + +bool HexagonCVIResource::SetUp = HexagonCVIResource::setup(); + +bool HexagonCVIResource::setup() { + assert(!TUL); + TUL = new (TypeUnitsAndLanes); + + (*TUL)[HexagonII::TypeCVI_VA] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2); + (*TUL)[HexagonII::TypeCVI_VX] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VX_DV] = UnitsAndLanes(CVI_MPY0, 2); + (*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1); + (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2); + (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1); + (*TUL)[HexagonII::TypeCVI_VINLANESAT] = UnitsAndLanes(CVI_SHIFT, 1); + (*TUL)[HexagonII::TypeCVI_VM_LD] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0); + (*TUL)[HexagonII::TypeCVI_VM_CUR_LD] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VM_VP_LDU] = UnitsAndLanes(CVI_XLANE, 1); + (*TUL)[HexagonII::TypeCVI_VM_ST] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0); + (*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1); + (*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4); + + return true; +} + +HexagonCVIResource::HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, + MCInst const *id) + : HexagonResource(s) { + unsigned T = HexagonMCInstrInfo::getType(MCII, *id); + + if (TUL->count(T)) { + // For an HVX insn. + Valid = true; + setUnits((*TUL)[T].first); + setLanes((*TUL)[T].second); + setLoad(HexagonMCInstrInfo::getDesc(MCII, *id).mayLoad()); + setStore(HexagonMCInstrInfo::getDesc(MCII, *id).mayStore()); + } else { + // For core insns. + Valid = false; + setUnits(0); + setLanes(0); + setLoad(false); + setStore(false); + } +} + HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI) : MCII(MCII), STI(STI) { @@ -107,7 +163,7 @@ void HexagonShuffler::reset() { void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender, unsigned S, bool X) { - HexagonInstr PI(ID, Extender, S, X); + HexagonInstr PI(MCII, ID, Extender, S, X); Packet.push_back(PI); } @@ -126,6 +182,8 @@ bool HexagonShuffler::check() { // Number of memory operations, loads, solo loads, stores, solo stores, single // stores. unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0; + // Number of HVX loads, HVX stores. + unsigned CVIloads = 0, CVIstores = 0; // Number of duplex insns, solo insns. unsigned duplex = 0, solo = 0; // Number of insns restricting other insns in the packet to A and X types, @@ -168,6 +226,12 @@ bool HexagonShuffler::check() { case HexagonII::TypeJ: ++jumps; break; + case HexagonII::TypeCVI_VM_VP_LDU: + ++onlyNo1; + case HexagonII::TypeCVI_VM_LD: + case HexagonII::TypeCVI_VM_TMP_LD: + case HexagonII::TypeCVI_VM_CUR_LD: + ++CVIloads; case HexagonII::TypeLD: ++loads; ++memory; @@ -176,6 +240,11 @@ bool HexagonShuffler::check() { if (HexagonMCInstrInfo::getDesc(MCII, *ID).isReturn()) ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. break; + case HexagonII::TypeCVI_VM_STU: + ++onlyNo1; + case HexagonII::TypeCVI_VM_ST: + case HexagonII::TypeCVI_VM_NEW_ST: + ++CVIstores; case HexagonII::TypeST: ++stores; ++memory; @@ -203,9 +272,9 @@ bool HexagonShuffler::check() { } // Check if the packet is legal. - if ((load0 > 1 || store0 > 1) || (duplex > 1 || (duplex && memory)) || - (solo && size() > 1) || (onlyAX && neitherAnorX > 1) || - (onlyAX && xtypeFloat)) { + if ((load0 > 1 || store0 > 1 || CVIloads > 1 || CVIstores > 1) || + (duplex > 1 || (duplex && memory)) || (solo && size() > 1) || + (onlyAX && neitherAnorX > 1) || (onlyAX && xtypeFloat)) { Error = SHUFFLE_ERROR_INVALID; return false; } @@ -336,6 +405,19 @@ bool HexagonShuffler::check() { return false; } } + // Verify the CVI slot subscriptions. + { + HexagonUnitAuction AuctionCVI; + + std::sort(begin(), end(), HexagonInstr::lessCVI); + + for (iterator I = begin(); I != end(); ++I) + for (unsigned i = 0; i < I->CVI.getLanes(); ++i) // TODO: I->CVI.isValid? + if (!AuctionCVI.bid(I->CVI.getUnits() << i)) { + Error = SHUFFLE_ERROR_SLOTS; + return false; + } + } Error = SHUFFLE_SUCCESS; return true; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h index 8b6c72ee25e6..174f10fb2580 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h @@ -51,6 +51,44 @@ public: }; }; +// HVX insn resources. +class HexagonCVIResource : public HexagonResource { + typedef std::pair UnitsAndLanes; + typedef llvm::DenseMap TypeUnitsAndLanes; + + // Available HVX slots. + enum { + CVI_NONE = 0, + CVI_XLANE = 1 << 0, + CVI_SHIFT = 1 << 1, + CVI_MPY0 = 1 << 2, + CVI_MPY1 = 1 << 3 + }; + + static bool SetUp; + static bool setup(); + static TypeUnitsAndLanes *TUL; + + // Count of adjacent slots that the insn requires to be executed. + unsigned Lanes; + // Flag whether the insn is a load or a store. + bool Load, Store; + // Flag whether the HVX resources are valid. + bool Valid; + + void setLanes(unsigned l) { Lanes = l; }; + void setLoad(bool f = true) { Load = f; }; + void setStore(bool f = true) { Store = f; }; + +public: + HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, MCInst const *id); + + bool isValid() const { return (Valid); }; + unsigned getLanes() const { return (Lanes); }; + bool mayLoad() const { return (Load); }; + bool mayStore() const { return (Store); }; +}; + // Handle to an insn used by the shuffling algorithm. class HexagonInstr { friend class HexagonShuffler; @@ -58,12 +96,14 @@ class HexagonInstr { MCInst const *ID; MCInst const *Extender; HexagonResource Core; + HexagonCVIResource CVI; bool SoloException; public: - HexagonInstr(MCInst const *id, MCInst const *Extender, unsigned s, - bool x = false) - : ID(id), Extender(Extender), Core(s), SoloException(x){}; + HexagonInstr(MCInstrInfo const &MCII, MCInst const *id, + MCInst const *Extender, unsigned s, bool x = false) + : ID(id), Extender(Extender), Core(s), CVI(MCII, s, id), + SoloException(x){}; MCInst const *getDesc() const { return (ID); }; @@ -79,6 +119,10 @@ public: static bool lessCore(const HexagonInstr &A, const HexagonInstr &B) { return (HexagonResource::lessUnits(A.Core, B.Core)); }; + // Check if the handles are in ascending order by HVX slots. + static bool lessCVI(const HexagonInstr &A, const HexagonInstr &B) { + return (HexagonResource::lessUnits(A.CVI, B.CVI)); + }; }; // Bundle shuffler. @@ -108,6 +152,8 @@ public: SHUFFLE_ERROR_BRANCHES, ///< No free slots for branch insns. SHUFFLE_ERROR_NOSLOTS, ///< No free slots for other insns. SHUFFLE_ERROR_SLOTS, ///< Over-subscribed slots. + SHUFFLE_ERROR_ERRATA2, ///< Errata violation (v60). + SHUFFLE_ERROR_STORE_LOAD_CONFLICT, ///< store/load conflict SHUFFLE_ERROR_UNKNOWN ///< Unknown error. }; diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile index 04b41e5986ac..c53b8e56aafc 100644 --- a/lib/Target/Hexagon/Makefile +++ b/lib/Target/Hexagon/Makefile @@ -13,6 +13,7 @@ TARGET = Hexagon # Make sure that tblgen is run, first thing. BUILT_SOURCES = HexagonGenRegisterInfo.inc \ HexagonGenInstrInfo.inc \ + HexagonGenAsmMatcher.inc \ HexagonGenAsmWriter.inc \ HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \ HexagonGenCallingConv.inc \ @@ -20,6 +21,6 @@ BUILT_SOURCES = HexagonGenRegisterInfo.inc \ HexagonGenMCCodeEmitter.inc \ HexagonGenDisassemblerTables.inc -DIRS = TargetInfo MCTargetDesc Disassembler +DIRS = TargetInfo MCTargetDesc Disassembler AsmParser include $(LEVEL)/Makefile.common diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index f05d7a465252..eb794ebc7216 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -22,6 +22,7 @@ subdirectories = AMDGPU ARM AArch64 + AVR BPF CppBackend Hexagon diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h index 70141a998e4a..72afec18becb 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h @@ -17,8 +17,6 @@ #include "llvm/MC/MCInstPrinter.h" namespace llvm { - class MCOperand; - class MSP430InstPrinter : public MCInstPrinter { public: MSP430InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h index ff5b0b6d858c..183dee36a047 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h @@ -17,13 +17,14 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { - class Triple; +class Triple; - class MSP430MCAsmInfo : public MCAsmInfoELF { - void anchor() override; - public: - explicit MSP430MCAsmInfo(const Triple &TT); - }; +class MSP430MCAsmInfo : public MCAsmInfoELF { + void anchor() override; + +public: + explicit MSP430MCAsmInfo(const Triple &TT); +}; } // namespace llvm diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp index ffcf22216d4f..606abc250d98 100644 --- a/lib/Target/MSP430/MSP430BranchSelector.cpp +++ b/lib/Target/MSP430/MSP430BranchSelector.cpp @@ -64,7 +64,7 @@ bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) { unsigned FuncSize = 0; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { - MachineBasicBlock *MBB = MFI; + MachineBasicBlock *MBB = &*MFI; unsigned BlockSize = 0; for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end(); diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 29bc8b33988a..18f38b7e90da 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -69,10 +69,6 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM, computeRegisterProperties(STI.getRegisterInfo()); // Provide all sorts of operation actions - - // Division is expensive - setIntDivIsCheap(false); - setStackPointerRegisterToSaveRestore(MSP430::SP); setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? @@ -508,9 +504,10 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i16); - InVal = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, false, 0); + InVal = DAG.getLoad( + VA.getLocVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), + false, false, false, 0); } InVals.push_back(InVal); @@ -1231,8 +1228,7 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, } const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator I = BB; - ++I; + MachineFunction::iterator I = ++BB->getIterator(); // Create loop block MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -1320,8 +1316,7 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // to set, the condition code register to branch on, the true/false values to // select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator I = BB; - ++I; + MachineFunction::iterator I = ++BB->getIterator(); // thisMBB: // ... diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 72b1780fd1ce..d4f82bda1ec9 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -44,11 +44,10 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - MachineMemOperand::MOStore, - MFI.getObjectSize(FrameIdx), - MFI.getObjectAlignment(FrameIdx)); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FrameIdx), + MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); if (RC == &MSP430::GR16RegClass) BuildMI(MBB, MI, DL, get(MSP430::MOV16mr)) @@ -72,11 +71,10 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - MachineMemOperand::MOLoad, - MFI.getObjectSize(FrameIdx), - MFI.getObjectAlignment(FrameIdx)); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FrameIdx), + MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); if (RC == &MSP430::GR16RegClass) BuildMI(MBB, MI, DL, get(MSP430::MOV16rm)) diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp index 54154a8afac1..47b0e270c5b3 100644 --- a/lib/Target/MSP430/MSP430MCInstLower.cpp +++ b/lib/Target/MSP430/MSP430MCInstLower.cpp @@ -50,9 +50,9 @@ GetExternalSymbolSymbol(const MachineOperand &MO) const { MCSymbol *MSP430MCInstLower:: GetJumpTableSymbol(const MachineOperand &MO) const { - const DataLayout *DL = Printer.TM.getDataLayout(); + const DataLayout &DL = Printer.getDataLayout(); SmallString<256> Name; - raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI" + raw_svector_ostream(Name) << DL.getPrivateGlobalPrefix() << "JTI" << Printer.getFunctionNumber() << '_' << MO.getIndex(); @@ -67,9 +67,9 @@ GetJumpTableSymbol(const MachineOperand &MO) const { MCSymbol *MSP430MCInstLower:: GetConstantPoolIndexSymbol(const MachineOperand &MO) const { - const DataLayout *DL = Printer.TM.getDataLayout(); + const DataLayout &DL = Printer.getDataLayout(); SmallString<256> Name; - raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "CPI" + raw_svector_ostream(Name) << DL.getPrivateGlobalPrefix() << "CPI" << Printer.getFunctionNumber() << '_' << MO.getIndex(); diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp b/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp index 0f7539908458..b442fc03b257 100644 --- a/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp +++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp @@ -1,4 +1,4 @@ -//===-- MSP430MachineFuctionInfo.cpp - MSP430 machine function info -------===// +//===-- MSP430MachineFunctionInfo.cpp - MSP430 machine function info ------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h index fcc5f5b88600..2d937318c7e5 100644 --- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h +++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h @@ -1,4 +1,4 @@ -//===- MSP430MachineFuctionInfo.h - MSP430 machine function info -*- C++ -*-==// +//=== MSP430MachineFunctionInfo.h - MSP430 machine function info -*- C++ -*-==// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MSP430/README.txt b/lib/Target/MSP430/README.txt index e9899247fb5e..458f3f8b4c7e 100644 --- a/lib/Target/MSP430/README.txt +++ b/lib/Target/MSP430/README.txt @@ -2,7 +2,7 @@ // MSP430 backend. //===---------------------------------------------------------------------===// -DISCLAIMER: Thid backend should be considered as highly experimental. I never +DISCLAIMER: This backend should be considered as highly experimental. I never seen nor worked with this MCU, all information was gathered from datasheet only. The original intention of making this backend was to write documentation of form "How to write backend for dummies" :) Thes notes hopefully will be diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 5107d2ae58c3..d4e061f00d3a 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/MipsMCExpr.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "MipsRegisterInfo.h" +#include "MipsTargetObjectFile.h" #include "MipsTargetStreamer.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" @@ -106,7 +107,6 @@ class MipsAsmParser : public MCTargetAsmParser { return static_cast(TS); } - MCSubtargetInfo &STI; MipsABIInfo ABI; SmallVector, 2> AssemblerOptions; MCSymbol *CurrentFn; // Pointer to the function being parsed. It may be a @@ -114,6 +114,12 @@ class MipsAsmParser : public MCTargetAsmParser { // selected. This usually happens after an '.end func' // directive. bool IsLittleEndian; + bool IsPicEnabled; + bool IsCpRestoreSet; + int CpRestoreOffset; + unsigned CpSaveLocation; + /// If true, then CpSaveLocation is a register, otherwise it's an offset. + bool CpSaveLocationIsRegister; // Print a warning along with its fix-it message at the given range. void printWarningWithFixIt(const Twine &Msg, const Twine &FixMsg, @@ -141,50 +147,41 @@ class MipsAsmParser : public MCTargetAsmParser { bool ParseDirective(AsmToken DirectiveID) override; - MipsAsmParser::OperandMatchResultTy parseMemOperand(OperandVector &Operands); - - MipsAsmParser::OperandMatchResultTy + OperandMatchResultTy parseMemOperand(OperandVector &Operands); + OperandMatchResultTy matchAnyRegisterNameWithoutDollar(OperandVector &Operands, StringRef Identifier, SMLoc S); - - MipsAsmParser::OperandMatchResultTy - matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S); - - MipsAsmParser::OperandMatchResultTy parseAnyRegister(OperandVector &Operands); - - MipsAsmParser::OperandMatchResultTy parseImm(OperandVector &Operands); - - MipsAsmParser::OperandMatchResultTy parseJumpTarget(OperandVector &Operands); - - MipsAsmParser::OperandMatchResultTy parseInvNum(OperandVector &Operands); - - MipsAsmParser::OperandMatchResultTy parseLSAImm(OperandVector &Operands); - - MipsAsmParser::OperandMatchResultTy - parseRegisterPair (OperandVector &Operands); - - MipsAsmParser::OperandMatchResultTy - parseMovePRegPair(OperandVector &Operands); - - MipsAsmParser::OperandMatchResultTy - parseRegisterList (OperandVector &Operands); + OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands, + SMLoc S); + OperandMatchResultTy parseAnyRegister(OperandVector &Operands); + OperandMatchResultTy parseImm(OperandVector &Operands); + OperandMatchResultTy parseJumpTarget(OperandVector &Operands); + OperandMatchResultTy parseInvNum(OperandVector &Operands); + OperandMatchResultTy parseLSAImm(OperandVector &Operands); + OperandMatchResultTy parseRegisterPair(OperandVector &Operands); + OperandMatchResultTy parseMovePRegPair(OperandVector &Operands); + OperandMatchResultTy parseRegisterList(OperandVector &Operands); bool searchSymbolAlias(OperandVector &Operands); bool parseOperand(OperandVector &, StringRef Mnemonic); - bool needsExpansion(MCInst &Inst); + enum MacroExpanderResultTy { + MER_NotAMacro, + MER_Success, + MER_Fail, + }; // Expands assembly pseudo instructions. - // Returns false on success, true otherwise. - bool expandInstruction(MCInst &Inst, SMLoc IDLoc, - SmallVectorImpl &Instructions); + MacroExpanderResultTy + tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl &Instructions); bool expandJalWithRegs(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions); bool loadImmediate(int64_t ImmValue, unsigned DstReg, unsigned SrcReg, - bool Is32BitImm, SMLoc IDLoc, + bool Is32BitImm, bool IsAddress, SMLoc IDLoc, SmallVectorImpl &Instructions); bool loadAndAddSymbolAddress(const MCExpr *SymExpr, unsigned DstReg, @@ -194,11 +191,10 @@ class MipsAsmParser : public MCTargetAsmParser { bool expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc, SmallVectorImpl &Instructions); - bool expandLoadAddressImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc, - SmallVectorImpl &Instructions); + bool expandLoadAddress(unsigned DstReg, unsigned BaseReg, + const MCOperand &Offset, bool Is32BitAddress, + SMLoc IDLoc, SmallVectorImpl &Instructions); - bool expandLoadAddressReg(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc, - SmallVectorImpl &Instructions); bool expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions); @@ -209,24 +205,43 @@ class MipsAsmParser : public MCTargetAsmParser { bool expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions); + bool expandAliasImmediate(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl &Instructions); + bool expandBranchImm(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions); bool expandCondBranches(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions); - bool expandUlhu(MCInst &Inst, SMLoc IDLoc, - SmallVectorImpl &Instructions); + bool expandDiv(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl &Instructions, const bool IsMips64, + const bool Signed); + + bool expandUlh(MCInst &Inst, bool Signed, SMLoc IDLoc, + SmallVectorImpl &Instructions); bool expandUlw(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions); + bool expandRotation(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl &Instructions); + bool expandRotationImm(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl &Instructions); + bool expandDRotation(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl &Instructions); + bool expandDRotationImm(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl &Instructions); + void createNop(bool hasShortDelaySlot, SMLoc IDLoc, SmallVectorImpl &Instructions); void createAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit, SmallVectorImpl &Instructions); + void createCpRestoreMemOp(bool IsLoad, int StackOffset, SMLoc IDLoc, + SmallVectorImpl &Instructions); + bool reportParseError(Twine ErrorMsg); bool reportParseError(SMLoc Loc, Twine ErrorMsg); @@ -239,8 +254,11 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseSetMips0Directive(); bool parseSetArchDirective(); bool parseSetFeature(uint64_t Feature); + bool isPicAndNotNxxAbi(); // Used by .cpload, .cprestore, and .cpsetup. bool parseDirectiveCpLoad(SMLoc Loc); + bool parseDirectiveCpRestore(SMLoc Loc); bool parseDirectiveCPSetup(); + bool parseDirectiveCPReturn(); bool parseDirectiveNaN(); bool parseDirectiveSet(); bool parseDirectiveOption(); @@ -337,6 +355,7 @@ class MipsAsmParser : public MCTargetAsmParser { // FeatureMipsGP64 | FeatureMips1) // Clearing Mips3 is equivalent to clear (FeatureMips3 | FeatureMips4). void selectArch(StringRef ArchFeature) { + MCSubtargetInfo &STI = copySTI(); FeatureBitset FeatureBits = STI.getFeatureBits(); FeatureBits &= ~MipsAssemblerOptions::AllArchRelatedMask; STI.setFeatureBits(FeatureBits); @@ -346,7 +365,8 @@ class MipsAsmParser : public MCTargetAsmParser { } void setFeatureBits(uint64_t Feature, StringRef FeatureString) { - if (!(STI.getFeatureBits()[Feature])) { + if (!(getSTI().getFeatureBits()[Feature])) { + MCSubtargetInfo &STI = copySTI(); setAvailableFeatures( ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); @@ -354,7 +374,8 @@ class MipsAsmParser : public MCTargetAsmParser { } void clearFeatureBits(uint64_t Feature, StringRef FeatureString) { - if (STI.getFeatureBits()[Feature]) { + if (getSTI().getFeatureBits()[Feature]) { + MCSubtargetInfo &STI = copySTI(); setAvailableFeatures( ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); @@ -363,26 +384,25 @@ class MipsAsmParser : public MCTargetAsmParser { void setModuleFeatureBits(uint64_t Feature, StringRef FeatureString) { setFeatureBits(Feature, FeatureString); - AssemblerOptions.front()->setFeatures(STI.getFeatureBits()); + AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits()); } void clearModuleFeatureBits(uint64_t Feature, StringRef FeatureString) { clearFeatureBits(Feature, FeatureString); - AssemblerOptions.front()->setFeatures(STI.getFeatureBits()); + AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits()); } public: enum MipsMatchResultTy { - Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY + Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY, #define GET_OPERAND_DIAGNOSTIC_TYPES #include "MipsGenAsmMatcher.inc" #undef GET_OPERAND_DIAGNOSTIC_TYPES - }; - MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser, + MipsAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(sti), + : MCTargetAsmParser(Options, sti), ABI(MipsABIInfo::computeTargetABI(Triple(sti.getTargetTriple()), sti.getCPU(), Options)) { MCAsmParserExtension::Initialize(parser); @@ -390,15 +410,15 @@ public: parser.addAliasForDirective(".asciiz", ".asciz"); // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); - + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); + // Remember the initial assembler options. The user can not modify these. AssemblerOptions.push_back( - llvm::make_unique(STI.getFeatureBits())); - + llvm::make_unique(getSTI().getFeatureBits())); + // Create an assembler options environment for the user to modify. AssemblerOptions.push_back( - llvm::make_unique(STI.getFeatureBits())); + llvm::make_unique(getSTI().getFeatureBits())); getTargetStreamer().updateABIInfo(*this); @@ -407,6 +427,12 @@ public: CurrentFn = nullptr; + IsPicEnabled = + (getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_); + + IsCpRestoreSet = false; + CpRestoreOffset = -1; + Triple TheTriple(sti.getTargetTriple()); if ((TheTriple.getArch() == Triple::mips) || (TheTriple.getArch() == Triple::mips64)) @@ -418,70 +444,103 @@ public: /// True if all of $fcc0 - $fcc7 exist for the current ISA. bool hasEightFccRegisters() const { return hasMips4() || hasMips32(); } - bool isGP64bit() const { return STI.getFeatureBits()[Mips::FeatureGP64Bit]; } - bool isFP64bit() const { return STI.getFeatureBits()[Mips::FeatureFP64Bit]; } + bool isGP64bit() const { + return getSTI().getFeatureBits()[Mips::FeatureGP64Bit]; + } + bool isFP64bit() const { + return getSTI().getFeatureBits()[Mips::FeatureFP64Bit]; + } const MipsABIInfo &getABI() const { return ABI; } bool isABI_N32() const { return ABI.IsN32(); } bool isABI_N64() const { return ABI.IsN64(); } bool isABI_O32() const { return ABI.IsO32(); } - bool isABI_FPXX() const { return STI.getFeatureBits()[Mips::FeatureFPXX]; } + bool isABI_FPXX() const { + return getSTI().getFeatureBits()[Mips::FeatureFPXX]; + } bool useOddSPReg() const { - return !(STI.getFeatureBits()[Mips::FeatureNoOddSPReg]); + return !(getSTI().getFeatureBits()[Mips::FeatureNoOddSPReg]); } bool inMicroMipsMode() const { - return STI.getFeatureBits()[Mips::FeatureMicroMips]; + return getSTI().getFeatureBits()[Mips::FeatureMicroMips]; + } + bool hasMips1() const { + return getSTI().getFeatureBits()[Mips::FeatureMips1]; + } + bool hasMips2() const { + return getSTI().getFeatureBits()[Mips::FeatureMips2]; + } + bool hasMips3() const { + return getSTI().getFeatureBits()[Mips::FeatureMips3]; + } + bool hasMips4() const { + return getSTI().getFeatureBits()[Mips::FeatureMips4]; + } + bool hasMips5() const { + return getSTI().getFeatureBits()[Mips::FeatureMips5]; } - bool hasMips1() const { return STI.getFeatureBits()[Mips::FeatureMips1]; } - bool hasMips2() const { return STI.getFeatureBits()[Mips::FeatureMips2]; } - bool hasMips3() const { return STI.getFeatureBits()[Mips::FeatureMips3]; } - bool hasMips4() const { return STI.getFeatureBits()[Mips::FeatureMips4]; } - bool hasMips5() const { return STI.getFeatureBits()[Mips::FeatureMips5]; } bool hasMips32() const { - return STI.getFeatureBits()[Mips::FeatureMips32]; + return getSTI().getFeatureBits()[Mips::FeatureMips32]; } bool hasMips64() const { - return STI.getFeatureBits()[Mips::FeatureMips64]; + return getSTI().getFeatureBits()[Mips::FeatureMips64]; } bool hasMips32r2() const { - return STI.getFeatureBits()[Mips::FeatureMips32r2]; + return getSTI().getFeatureBits()[Mips::FeatureMips32r2]; } bool hasMips64r2() const { - return STI.getFeatureBits()[Mips::FeatureMips64r2]; + return getSTI().getFeatureBits()[Mips::FeatureMips64r2]; } bool hasMips32r3() const { - return (STI.getFeatureBits()[Mips::FeatureMips32r3]); + return (getSTI().getFeatureBits()[Mips::FeatureMips32r3]); } bool hasMips64r3() const { - return (STI.getFeatureBits()[Mips::FeatureMips64r3]); + return (getSTI().getFeatureBits()[Mips::FeatureMips64r3]); } bool hasMips32r5() const { - return (STI.getFeatureBits()[Mips::FeatureMips32r5]); + return (getSTI().getFeatureBits()[Mips::FeatureMips32r5]); } bool hasMips64r5() const { - return (STI.getFeatureBits()[Mips::FeatureMips64r5]); + return (getSTI().getFeatureBits()[Mips::FeatureMips64r5]); } bool hasMips32r6() const { - return STI.getFeatureBits()[Mips::FeatureMips32r6]; + return getSTI().getFeatureBits()[Mips::FeatureMips32r6]; } bool hasMips64r6() const { - return STI.getFeatureBits()[Mips::FeatureMips64r6]; + return getSTI().getFeatureBits()[Mips::FeatureMips64r6]; } - bool hasDSP() const { return STI.getFeatureBits()[Mips::FeatureDSP]; } - bool hasDSPR2() const { return STI.getFeatureBits()[Mips::FeatureDSPR2]; } - bool hasMSA() const { return STI.getFeatureBits()[Mips::FeatureMSA]; } + bool hasDSP() const { + return getSTI().getFeatureBits()[Mips::FeatureDSP]; + } + bool hasDSPR2() const { + return getSTI().getFeatureBits()[Mips::FeatureDSPR2]; + } + bool hasDSPR3() const { + return getSTI().getFeatureBits()[Mips::FeatureDSPR3]; + } + bool hasMSA() const { + return getSTI().getFeatureBits()[Mips::FeatureMSA]; + } bool hasCnMips() const { - return (STI.getFeatureBits()[Mips::FeatureCnMips]); + return (getSTI().getFeatureBits()[Mips::FeatureCnMips]); + } + + bool inPicMode() { + return IsPicEnabled; } bool inMips16Mode() const { - return STI.getFeatureBits()[Mips::FeatureMips16]; + return getSTI().getFeatureBits()[Mips::FeatureMips16]; + } + + bool useTraps() const { + return getSTI().getFeatureBits()[Mips::FeatureUseTCCInDIV]; } bool useSoftFloat() const { - return STI.getFeatureBits()[Mips::FeatureSoftFloat]; + return getSTI().getFeatureBits()[Mips::FeatureSoftFloat]; } /// Warn if RegIndex is the same as the current AT. @@ -869,6 +928,16 @@ public: Inst.addOperand(MCOperand::createReg(getHWRegsReg())); } + template + void addConstantUImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + uint64_t Imm = getConstantImm() - Offset; + Imm &= (1 << Bits) - 1; + Imm += Offset; + Imm += AdjustOffset; + Inst.addOperand(MCOperand::createImm(Imm)); + } + void addImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCExpr *Expr = getImm(); @@ -878,7 +947,9 @@ public: void addMemOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::createReg(getMemBase()->getGPR32Reg())); + Inst.addOperand(MCOperand::createReg(AsmParser.getABI().ArePtrs64bit() + ? getMemBase()->getGPR64Reg() + : getMemBase()->getGPR32Reg())); const MCExpr *Expr = getMemOff(); addExpr(Inst, Expr); @@ -924,10 +995,16 @@ public: bool isRegIdx() const { return Kind == k_RegisterIndex; } bool isImm() const override { return Kind == k_Immediate; } bool isConstantImm() const { - return isImm() && dyn_cast(getImm()); + return isImm() && isa(getImm()); } - template bool isUImm() const { - return isImm() && isConstantImm() && isUInt(getConstantImm()); + bool isConstantImmz() const { + return isConstantImm() && getConstantImm() == 0; + } + template bool isConstantUImm() const { + return isConstantImm() && isUInt(getConstantImm() - Offset); + } + template bool isConstantSImm() const { + return isConstantImm() && isInt(getConstantImm()); } bool isToken() const override { // Note: It's not possible to pretend that other operand kinds are tokens. @@ -936,10 +1013,15 @@ public: } bool isMem() const override { return Kind == k_Memory; } bool isConstantMemOff() const { - return isMem() && dyn_cast(getMemOff()); + return isMem() && isa(getMemOff()); } template bool isMemWithSimmOffset() const { - return isMem() && isConstantMemOff() && isInt(getConstantMemOff()); + return isMem() && isConstantMemOff() && isInt(getConstantMemOff()) + && getMemBase()->isGPRAsmReg(); + } + template bool isMemWithSimmOffsetGPR() const { + return isMem() && isConstantMemOff() && isInt(getConstantMemOff()) && + getMemBase()->isGPRAsmReg(); } bool isMemWithGRPMM16Base() const { return isMem() && getMemBase()->isMM16AsmReg(); @@ -953,13 +1035,23 @@ public: && (getConstantMemOff() % 4 == 0) && getMemBase()->isRegIdx() && (getMemBase()->getGPR32Reg() == Mips::SP); } + template + bool isScaledUImm() const { + return isConstantImm() && + isShiftedUInt(getConstantImm()); + } bool isRegList16() const { if (!isRegList()) return false; int Size = RegList.List->size(); - if (Size < 2 || Size > 5 || *RegList.List->begin() != Mips::S0 || - RegList.List->back() != Mips::RA) + if (Size < 2 || Size > 5) + return false; + + unsigned R0 = RegList.List->front(); + unsigned R1 = RegList.List->back(); + if (!((R0 == Mips::S0 && R1 == Mips::RA) || + (R0 == Mips::S0_64 && R1 == Mips::RA_64))) return false; int PrevReg = *RegList.List->begin(); @@ -1304,9 +1396,123 @@ static bool hasShortDelaySlot(unsigned Opcode) { } } +static const MCSymbol *getSingleMCSymbol(const MCExpr *Expr) { + if (const MCSymbolRefExpr *SRExpr = dyn_cast(Expr)) { + return &SRExpr->getSymbol(); + } + + if (const MCBinaryExpr *BExpr = dyn_cast(Expr)) { + const MCSymbol *LHSSym = getSingleMCSymbol(BExpr->getLHS()); + const MCSymbol *RHSSym = getSingleMCSymbol(BExpr->getRHS()); + + if (LHSSym) + return LHSSym; + + if (RHSSym) + return RHSSym; + + return nullptr; + } + + if (const MCUnaryExpr *UExpr = dyn_cast(Expr)) + return getSingleMCSymbol(UExpr->getSubExpr()); + + return nullptr; +} + +static unsigned countMCSymbolRefExpr(const MCExpr *Expr) { + if (isa(Expr)) + return 1; + + if (const MCBinaryExpr *BExpr = dyn_cast(Expr)) + return countMCSymbolRefExpr(BExpr->getLHS()) + + countMCSymbolRefExpr(BExpr->getRHS()); + + if (const MCUnaryExpr *UExpr = dyn_cast(Expr)) + return countMCSymbolRefExpr(UExpr->getSubExpr()); + + return 0; +} + +namespace { +void emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1, SMLoc IDLoc, + SmallVectorImpl &Instructions) { + MCInst tmpInst; + tmpInst.setOpcode(Opcode); + tmpInst.addOperand(MCOperand::createReg(Reg0)); + tmpInst.addOperand(Op1); + tmpInst.setLoc(IDLoc); + Instructions.push_back(tmpInst); +} + +void emitRI(unsigned Opcode, unsigned Reg0, int32_t Imm, SMLoc IDLoc, + SmallVectorImpl &Instructions) { + emitRX(Opcode, Reg0, MCOperand::createImm(Imm), IDLoc, Instructions); +} + +void emitRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, SMLoc IDLoc, + SmallVectorImpl &Instructions) { + emitRX(Opcode, Reg0, MCOperand::createReg(Reg1), IDLoc, Instructions); +} + +void emitII(unsigned Opcode, int16_t Imm1, int16_t Imm2, SMLoc IDLoc, + SmallVectorImpl &Instructions) { + MCInst tmpInst; + tmpInst.setOpcode(Opcode); + tmpInst.addOperand(MCOperand::createImm(Imm1)); + tmpInst.addOperand(MCOperand::createImm(Imm2)); + tmpInst.setLoc(IDLoc); + Instructions.push_back(tmpInst); +} + +void emitR(unsigned Opcode, unsigned Reg0, SMLoc IDLoc, + SmallVectorImpl &Instructions) { + MCInst tmpInst; + tmpInst.setOpcode(Opcode); + tmpInst.addOperand(MCOperand::createReg(Reg0)); + tmpInst.setLoc(IDLoc); + Instructions.push_back(tmpInst); +} + +void emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, MCOperand Op2, + SMLoc IDLoc, SmallVectorImpl &Instructions) { + MCInst tmpInst; + tmpInst.setOpcode(Opcode); + tmpInst.addOperand(MCOperand::createReg(Reg0)); + tmpInst.addOperand(MCOperand::createReg(Reg1)); + tmpInst.addOperand(Op2); + tmpInst.setLoc(IDLoc); + Instructions.push_back(tmpInst); +} + +void emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, unsigned Reg2, + SMLoc IDLoc, SmallVectorImpl &Instructions) { + emitRRX(Opcode, Reg0, Reg1, MCOperand::createReg(Reg2), IDLoc, + Instructions); +} + +void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm, + SMLoc IDLoc, SmallVectorImpl &Instructions) { + emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc, + Instructions); +} + +void emitAppropriateDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount, + SMLoc IDLoc, SmallVectorImpl &Instructions) { + if (ShiftAmount >= 32) { + emitRRI(Mips::DSLL32, DstReg, SrcReg, ShiftAmount - 32, IDLoc, + Instructions); + return; + } + + emitRRI(Mips::DSLL, DstReg, SrcReg, ShiftAmount, IDLoc, Instructions); +} +} // end anonymous namespace. + bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions) { const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); + bool ExpandedJalSym = false; Inst.setLoc(IDLoc); @@ -1365,12 +1571,14 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, return Error(IDLoc, "branch to misaligned address"); break; case Mips::BEQZ16_MM: + case Mips::BEQZC16_MMR6: case Mips::BNEZ16_MM: + case Mips::BNEZC16_MMR6: assert(MCID.getNumOperands() == 2 && "unexpected number of operands"); Offset = Inst.getOperand(1); if (!Offset.isImm()) break; // We'll deal with this situation later on when applying fixups. - if (!isIntN(8, Offset.getImm())) + if (!isInt<8>(Offset.getImm())) return Error(IDLoc, "branch target out of range"); if (OffsetToAlignment(Offset.getImm(), 2LL)) return Error(IDLoc, "branch to misaligned address"); @@ -1415,32 +1623,6 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, } break; - case Mips::CINS: - case Mips::CINS32: - case Mips::EXTS: - case Mips::EXTS32: - assert(MCID.getNumOperands() == 4 && "unexpected number of operands"); - // Check length - Opnd = Inst.getOperand(3); - if (!Opnd.isImm()) - return Error(IDLoc, "expected immediate operand kind"); - Imm = Opnd.getImm(); - if (Imm < 0 || Imm > 31) - return Error(IDLoc, "immediate operand value out of range"); - // Check position - Opnd = Inst.getOperand(2); - if (!Opnd.isImm()) - return Error(IDLoc, "expected immediate operand kind"); - Imm = Opnd.getImm(); - if (Imm < 0 || Imm > (Opcode == Mips::CINS || - Opcode == Mips::EXTS ? 63 : 31)) - return Error(IDLoc, "immediate operand value out of range"); - if (Imm > 31) { - Inst.setOpcode(Opcode == Mips::CINS ? Mips::CINS32 : Mips::EXTS32); - Inst.getOperand(2).setImm(Imm - 32); - } - break; - case Mips::SEQi: case Mips::SNEi: assert(MCID.getNumOperands() == 3 && "unexpected number of operands"); @@ -1454,6 +1636,81 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, } } + // This expansion is not in a function called by tryExpandInstruction() + // because the pseudo-instruction doesn't have a distinct opcode. + if ((Inst.getOpcode() == Mips::JAL || Inst.getOpcode() == Mips::JAL_MM) && + inPicMode()) { + warnIfNoMacro(IDLoc); + + const MCExpr *JalExpr = Inst.getOperand(0).getExpr(); + + // We can do this expansion if there's only 1 symbol in the argument + // expression. + if (countMCSymbolRefExpr(JalExpr) > 1) + return Error(IDLoc, "jal doesn't support multiple symbols in PIC mode"); + + // FIXME: This is checking the expression can be handled by the later stages + // of the assembler. We ought to leave it to those later stages but + // we can't do that until we stop evaluateRelocExpr() rewriting the + // expressions into non-equivalent forms. + const MCSymbol *JalSym = getSingleMCSymbol(JalExpr); + + // FIXME: Add support for label+offset operands (currently causes an error). + // FIXME: Add support for forward-declared local symbols. + // FIXME: Add expansion for when the LargeGOT option is enabled. + if (JalSym->isInSection() || JalSym->isTemporary()) { + if (isABI_O32()) { + // If it's a local symbol and the O32 ABI is being used, we expand to: + // lw $25, 0($gp) + // R_(MICRO)MIPS_GOT16 label + // addiu $25, $25, 0 + // R_(MICRO)MIPS_LO16 label + // jalr $25 + const MCExpr *Got16RelocExpr = evaluateRelocExpr(JalExpr, "got"); + const MCExpr *Lo16RelocExpr = evaluateRelocExpr(JalExpr, "lo"); + + emitRRX(Mips::LW, Mips::T9, Mips::GP, + MCOperand::createExpr(Got16RelocExpr), IDLoc, Instructions); + emitRRX(Mips::ADDiu, Mips::T9, Mips::T9, + MCOperand::createExpr(Lo16RelocExpr), IDLoc, Instructions); + } else if (isABI_N32() || isABI_N64()) { + // If it's a local symbol and the N32/N64 ABIs are being used, + // we expand to: + // lw/ld $25, 0($gp) + // R_(MICRO)MIPS_GOT_DISP label + // jalr $25 + const MCExpr *GotDispRelocExpr = evaluateRelocExpr(JalExpr, "got_disp"); + + emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, Mips::GP, + MCOperand::createExpr(GotDispRelocExpr), IDLoc, Instructions); + } + } else { + // If it's an external/weak symbol, we expand to: + // lw/ld $25, 0($gp) + // R_(MICRO)MIPS_CALL16 label + // jalr $25 + const MCExpr *Call16RelocExpr = evaluateRelocExpr(JalExpr, "call16"); + + emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, Mips::GP, + MCOperand::createExpr(Call16RelocExpr), IDLoc, Instructions); + } + + MCInst JalrInst; + if (IsCpRestoreSet && inMicroMipsMode()) + JalrInst.setOpcode(Mips::JALRS_MM); + else + JalrInst.setOpcode(inMicroMipsMode() ? Mips::JALR_MM : Mips::JALR); + JalrInst.addOperand(MCOperand::createReg(Mips::RA)); + JalrInst.addOperand(MCOperand::createReg(Mips::T9)); + + // FIXME: Add an R_(MICRO)MIPS_JALR relocation after the JALR. + // This relocation is supposed to be an optimization hint for the linker + // and is not necessary for correctness. + + Inst = JalrInst; + ExpandedJalSym = true; + } + if (MCID.mayLoad() || MCID.mayStore()) { // Check the offset of memory operand, if it is a symbol // reference or immediate we may have to expand instructions. @@ -1500,17 +1757,14 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, int MemOffset = Op.getImm(); MCOperand &DstReg = Inst.getOperand(0); MCOperand &BaseReg = Inst.getOperand(1); - if (isIntN(9, MemOffset) && (MemOffset % 4 == 0) && + if (isInt<9>(MemOffset) && (MemOffset % 4 == 0) && getContext().getRegisterInfo()->getRegClass( Mips::GPRMM16RegClassID).contains(DstReg.getReg()) && - BaseReg.getReg() == Mips::GP) { - MCInst TmpInst; - TmpInst.setLoc(IDLoc); - TmpInst.setOpcode(Mips::LWGP_MM); - TmpInst.addOperand(MCOperand::createReg(DstReg.getReg())); - TmpInst.addOperand(MCOperand::createReg(Mips::GP)); - TmpInst.addOperand(MCOperand::createImm(MemOffset)); - Instructions.push_back(TmpInst); + (BaseReg.getReg() == Mips::GP || + BaseReg.getReg() == Mips::GP_64)) { + + emitRRI(Mips::LWGP_MM, DstReg.getReg(), Mips::GP, MemOffset, + IDLoc, Instructions); return false; } } @@ -1597,7 +1851,14 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, if (Imm < -1 || Imm > 14) return Error(IDLoc, "immediate operand value out of range"); break; + case Mips::TEQ_MM: + case Mips::TGE_MM: + case Mips::TGEU_MM: + case Mips::TLT_MM: + case Mips::TLTU_MM: + case Mips::TNE_MM: case Mips::SB16_MM: + case Mips::SB16_MMR6: Opnd = Inst.getOperand(2); if (!Opnd.isImm()) return Error(IDLoc, "expected immediate operand kind"); @@ -1607,6 +1868,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, break; case Mips::LHU16_MM: case Mips::SH16_MM: + case Mips::SH16_MMR6: Opnd = Inst.getOperand(2); if (!Opnd.isImm()) return Error(IDLoc, "expected immediate operand kind"); @@ -1616,6 +1878,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, break; case Mips::LW16_MM: case Mips::SW16_MM: + case Mips::SW16_MMR6: Opnd = Inst.getOperand(2); if (!Opnd.isImm()) return Error(IDLoc, "expected immediate operand kind"); @@ -1623,93 +1886,111 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, if (Imm < 0 || Imm > 60 || (Imm % 4 != 0)) return Error(IDLoc, "immediate operand value out of range"); break; - case Mips::CACHE: - case Mips::PREF: - Opnd = Inst.getOperand(2); - if (!Opnd.isImm()) - return Error(IDLoc, "expected immediate operand kind"); - Imm = Opnd.getImm(); - if (!isUInt<5>(Imm)) - return Error(IDLoc, "immediate operand value out of range"); - break; case Mips::ADDIUPC_MM: MCOperand Opnd = Inst.getOperand(1); if (!Opnd.isImm()) return Error(IDLoc, "expected immediate operand kind"); int Imm = Opnd.getImm(); - if ((Imm % 4 != 0) || !isIntN(25, Imm)) + if ((Imm % 4 != 0) || !isInt<25>(Imm)) return Error(IDLoc, "immediate operand value out of range"); break; } } - if (needsExpansion(Inst)) { - if (expandInstruction(Inst, IDLoc, Instructions)) - return true; - } else + MacroExpanderResultTy ExpandResult = + tryExpandInstruction(Inst, IDLoc, Instructions); + switch (ExpandResult) { + case MER_NotAMacro: Instructions.push_back(Inst); + break; + case MER_Success: + break; + case MER_Fail: + return true; + } // If this instruction has a delay slot and .set reorder is active, // emit a NOP after it. if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder()) createNop(hasShortDelaySlot(Inst.getOpcode()), IDLoc, Instructions); + if ((Inst.getOpcode() == Mips::JalOneReg || + Inst.getOpcode() == Mips::JalTwoReg || ExpandedJalSym) && + isPicAndNotNxxAbi()) { + if (IsCpRestoreSet) { + // We need a NOP between the JALR and the LW: + // If .set reorder has been used, we've already emitted a NOP. + // If .set noreorder has been used, we need to emit a NOP at this point. + if (!AssemblerOptions.back()->isReorder()) + createNop(hasShortDelaySlot(Inst.getOpcode()), IDLoc, Instructions); + + // Load the $gp from the stack. + SmallVector LoadInsts; + createCpRestoreMemOp(true /*IsLoad*/, CpRestoreOffset /*StackOffset*/, + IDLoc, LoadInsts); + + for (const MCInst &Inst : LoadInsts) + Instructions.push_back(Inst); + + } else + Warning(IDLoc, "no .cprestore used in PIC mode"); + } + return false; } -bool MipsAsmParser::needsExpansion(MCInst &Inst) { - +MipsAsmParser::MacroExpanderResultTy +MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl &Instructions) { switch (Inst.getOpcode()) { - case Mips::LoadImm32: - case Mips::LoadImm64: - case Mips::LoadAddrImm32: - case Mips::LoadAddrReg32: - case Mips::B_MM_Pseudo: - case Mips::LWM_MM: - case Mips::SWM_MM: - case Mips::JalOneReg: - case Mips::JalTwoReg: - case Mips::BneImm: - case Mips::BeqImm: - case Mips::BLT: - case Mips::BLE: - case Mips::BGE: - case Mips::BGT: - case Mips::BLTU: - case Mips::BLEU: - case Mips::BGEU: - case Mips::BGTU: - case Mips::Ulhu: - case Mips::Ulw: - return true; default: - return false; - } -} - -bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, - SmallVectorImpl &Instructions) { - switch (Inst.getOpcode()) { - default: llvm_unreachable("unimplemented expansion"); + return MER_NotAMacro; case Mips::LoadImm32: - return expandLoadImm(Inst, true, IDLoc, Instructions); + return expandLoadImm(Inst, true, IDLoc, Instructions) ? MER_Fail + : MER_Success; case Mips::LoadImm64: - return expandLoadImm(Inst, false, IDLoc, Instructions); + return expandLoadImm(Inst, false, IDLoc, Instructions) ? MER_Fail + : MER_Success; case Mips::LoadAddrImm32: - return expandLoadAddressImm(Inst, true, IDLoc, Instructions); + case Mips::LoadAddrImm64: + assert(Inst.getOperand(0).isReg() && "expected register operand kind"); + assert((Inst.getOperand(1).isImm() || Inst.getOperand(1).isExpr()) && + "expected immediate operand kind"); + + return expandLoadAddress(Inst.getOperand(0).getReg(), Mips::NoRegister, + Inst.getOperand(1), + Inst.getOpcode() == Mips::LoadAddrImm32, IDLoc, + Instructions) + ? MER_Fail + : MER_Success; case Mips::LoadAddrReg32: - return expandLoadAddressReg(Inst, true, IDLoc, Instructions); + case Mips::LoadAddrReg64: + assert(Inst.getOperand(0).isReg() && "expected register operand kind"); + assert(Inst.getOperand(1).isReg() && "expected register operand kind"); + assert((Inst.getOperand(2).isImm() || Inst.getOperand(2).isExpr()) && + "expected immediate operand kind"); + + return expandLoadAddress(Inst.getOperand(0).getReg(), + Inst.getOperand(1).getReg(), Inst.getOperand(2), + Inst.getOpcode() == Mips::LoadAddrReg32, IDLoc, + Instructions) + ? MER_Fail + : MER_Success; case Mips::B_MM_Pseudo: - return expandUncondBranchMMPseudo(Inst, IDLoc, Instructions); + case Mips::B_MMR6_Pseudo: + return expandUncondBranchMMPseudo(Inst, IDLoc, Instructions) ? MER_Fail + : MER_Success; case Mips::SWM_MM: case Mips::LWM_MM: - return expandLoadStoreMultiple(Inst, IDLoc, Instructions); + return expandLoadStoreMultiple(Inst, IDLoc, Instructions) ? MER_Fail + : MER_Success; case Mips::JalOneReg: case Mips::JalTwoReg: - return expandJalWithRegs(Inst, IDLoc, Instructions); + return expandJalWithRegs(Inst, IDLoc, Instructions) ? MER_Fail + : MER_Success; case Mips::BneImm: case Mips::BeqImm: - return expandBranchImm(Inst, IDLoc, Instructions); + return expandBranchImm(Inst, IDLoc, Instructions) ? MER_Fail : MER_Success; case Mips::BLT: case Mips::BLE: case Mips::BGE: @@ -1718,78 +1999,97 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, case Mips::BLEU: case Mips::BGEU: case Mips::BGTU: - return expandCondBranches(Inst, IDLoc, Instructions); + case Mips::BLTL: + case Mips::BLEL: + case Mips::BGEL: + case Mips::BGTL: + case Mips::BLTUL: + case Mips::BLEUL: + case Mips::BGEUL: + case Mips::BGTUL: + case Mips::BLTImmMacro: + case Mips::BLEImmMacro: + case Mips::BGEImmMacro: + case Mips::BGTImmMacro: + case Mips::BLTUImmMacro: + case Mips::BLEUImmMacro: + case Mips::BGEUImmMacro: + case Mips::BGTUImmMacro: + case Mips::BLTLImmMacro: + case Mips::BLELImmMacro: + case Mips::BGELImmMacro: + case Mips::BGTLImmMacro: + case Mips::BLTULImmMacro: + case Mips::BLEULImmMacro: + case Mips::BGEULImmMacro: + case Mips::BGTULImmMacro: + return expandCondBranches(Inst, IDLoc, Instructions) ? MER_Fail + : MER_Success; + case Mips::SDivMacro: + return expandDiv(Inst, IDLoc, Instructions, false, true) ? MER_Fail + : MER_Success; + case Mips::DSDivMacro: + return expandDiv(Inst, IDLoc, Instructions, true, true) ? MER_Fail + : MER_Success; + case Mips::UDivMacro: + return expandDiv(Inst, IDLoc, Instructions, false, false) ? MER_Fail + : MER_Success; + case Mips::DUDivMacro: + return expandDiv(Inst, IDLoc, Instructions, true, false) ? MER_Fail + : MER_Success; + case Mips::Ulh: + return expandUlh(Inst, true, IDLoc, Instructions) ? MER_Fail : MER_Success; case Mips::Ulhu: - return expandUlhu(Inst, IDLoc, Instructions); + return expandUlh(Inst, false, IDLoc, Instructions) ? MER_Fail : MER_Success; case Mips::Ulw: - return expandUlw(Inst, IDLoc, Instructions); + return expandUlw(Inst, IDLoc, Instructions) ? MER_Fail : MER_Success; + case Mips::NORImm: + return expandAliasImmediate(Inst, IDLoc, Instructions) ? MER_Fail + : MER_Success; + case Mips::ADDi: + case Mips::ADDiu: + case Mips::SLTi: + case Mips::SLTiu: + if ((Inst.getNumOperands() == 3) && Inst.getOperand(0).isReg() && + Inst.getOperand(1).isReg() && Inst.getOperand(2).isImm()) { + int64_t ImmValue = Inst.getOperand(2).getImm(); + if (isInt<16>(ImmValue)) + return MER_NotAMacro; + return expandAliasImmediate(Inst, IDLoc, Instructions) ? MER_Fail + : MER_Success; + } + return MER_NotAMacro; + case Mips::ANDi: + case Mips::ORi: + case Mips::XORi: + if ((Inst.getNumOperands() == 3) && Inst.getOperand(0).isReg() && + Inst.getOperand(1).isReg() && Inst.getOperand(2).isImm()) { + int64_t ImmValue = Inst.getOperand(2).getImm(); + if (isUInt<16>(ImmValue)) + return MER_NotAMacro; + return expandAliasImmediate(Inst, IDLoc, Instructions) ? MER_Fail + : MER_Success; + } + return MER_NotAMacro; + case Mips::ROL: + case Mips::ROR: + return expandRotation(Inst, IDLoc, Instructions) ? MER_Fail + : MER_Success; + case Mips::ROLImm: + case Mips::RORImm: + return expandRotationImm(Inst, IDLoc, Instructions) ? MER_Fail + : MER_Success; + case Mips::DROL: + case Mips::DROR: + return expandDRotation(Inst, IDLoc, Instructions) ? MER_Fail + : MER_Success; + case Mips::DROLImm: + case Mips::DRORImm: + return expandDRotationImm(Inst, IDLoc, Instructions) ? MER_Fail + : MER_Success; } } -namespace { -void emitRX(unsigned Opcode, unsigned DstReg, MCOperand Imm, SMLoc IDLoc, - SmallVectorImpl &Instructions) { - MCInst tmpInst; - tmpInst.setOpcode(Opcode); - tmpInst.addOperand(MCOperand::createReg(DstReg)); - tmpInst.addOperand(Imm); - tmpInst.setLoc(IDLoc); - Instructions.push_back(tmpInst); -} - -void emitRI(unsigned Opcode, unsigned DstReg, int16_t Imm, SMLoc IDLoc, - SmallVectorImpl &Instructions) { - emitRX(Opcode, DstReg, MCOperand::createImm(Imm), IDLoc, Instructions); -} - - -void emitRRX(unsigned Opcode, unsigned DstReg, unsigned SrcReg, MCOperand Imm, - SMLoc IDLoc, SmallVectorImpl &Instructions) { - MCInst tmpInst; - tmpInst.setOpcode(Opcode); - tmpInst.addOperand(MCOperand::createReg(DstReg)); - tmpInst.addOperand(MCOperand::createReg(SrcReg)); - tmpInst.addOperand(Imm); - tmpInst.setLoc(IDLoc); - Instructions.push_back(tmpInst); -} - -void emitRRR(unsigned Opcode, unsigned DstReg, unsigned SrcReg, - unsigned SrcReg2, SMLoc IDLoc, - SmallVectorImpl &Instructions) { - emitRRX(Opcode, DstReg, SrcReg, MCOperand::createReg(SrcReg2), IDLoc, - Instructions); -} - -void emitRRI(unsigned Opcode, unsigned DstReg, unsigned SrcReg, int16_t Imm, - SMLoc IDLoc, SmallVectorImpl &Instructions) { - emitRRX(Opcode, DstReg, SrcReg, MCOperand::createImm(Imm), IDLoc, - Instructions); -} - -template -void createLShiftOri(MCOperand Operand, unsigned RegNo, SMLoc IDLoc, - SmallVectorImpl &Instructions) { - if (ShiftAmount >= 32) - emitRRI(Mips::DSLL32, RegNo, RegNo, ShiftAmount - 32, IDLoc, Instructions); - else if (ShiftAmount > 0) - emitRRI(Mips::DSLL, RegNo, RegNo, ShiftAmount, IDLoc, Instructions); - - // There's no need for an ORi if the immediate is 0. - if (Operand.isImm() && Operand.getImm() == 0) - return; - - emitRRX(Mips::ORi, RegNo, RegNo, Operand, IDLoc, Instructions); -} - -template -void createLShiftOri(int64_t Value, unsigned RegNo, SMLoc IDLoc, - SmallVectorImpl &Instructions) { - createLShiftOri(MCOperand::createImm(Value), RegNo, IDLoc, - Instructions); -} -} - bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions) { // Create a JALR instruction which is going to replace the pseudo-JAL. @@ -1800,8 +2100,11 @@ bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc, if (Opcode == Mips::JalOneReg) { // jal $rs => jalr $rs - if (inMicroMipsMode()) { - JalrInst.setOpcode(Mips::JALR16_MM); + if (IsCpRestoreSet && inMicroMipsMode()) { + JalrInst.setOpcode(Mips::JALRS16_MM); + JalrInst.addOperand(FirstRegOp); + } else if (inMicroMipsMode()) { + JalrInst.setOpcode(hasMips32r6() ? Mips::JALRC16_MMR6 : Mips::JALR16_MM); JalrInst.addOperand(FirstRegOp); } else { JalrInst.setOpcode(Mips::JALR); @@ -1810,30 +2113,47 @@ bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc, } } else if (Opcode == Mips::JalTwoReg) { // jal $rd, $rs => jalr $rd, $rs - JalrInst.setOpcode(inMicroMipsMode() ? Mips::JALR_MM : Mips::JALR); + if (IsCpRestoreSet && inMicroMipsMode()) + JalrInst.setOpcode(Mips::JALRS_MM); + else + JalrInst.setOpcode(inMicroMipsMode() ? Mips::JALR_MM : Mips::JALR); JalrInst.addOperand(FirstRegOp); const MCOperand SecondRegOp = Inst.getOperand(1); JalrInst.addOperand(SecondRegOp); } Instructions.push_back(JalrInst); - // If .set reorder is active, emit a NOP after it. - if (AssemblerOptions.back()->isReorder()) { - // This is a 32-bit NOP because these 2 pseudo-instructions - // do not have a short delay slot. - MCInst NopInst; - NopInst.setOpcode(Mips::SLL); - NopInst.addOperand(MCOperand::createReg(Mips::ZERO)); - NopInst.addOperand(MCOperand::createReg(Mips::ZERO)); - NopInst.addOperand(MCOperand::createImm(0)); - Instructions.push_back(NopInst); + // If .set reorder is active and branch instruction has a delay slot, + // emit a NOP after it. + const MCInstrDesc &MCID = getInstDesc(JalrInst.getOpcode()); + if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder()) { + createNop(hasShortDelaySlot(JalrInst.getOpcode()), IDLoc, Instructions); } return false; } +/// Can the value be represented by a unsigned N-bit value and a shift left? +template static bool isShiftedUIntAtAnyPosition(uint64_t x) { + unsigned BitNum = findFirstSet(x); + + return (x == x >> BitNum << BitNum) && isUInt(x >> BitNum); +} + +/// Load (or add) an immediate into a register. +/// +/// @param ImmValue The immediate to load. +/// @param DstReg The register that will hold the immediate. +/// @param SrcReg A register to add to the immediate or Mips::NoRegister +/// for a simple initialization. +/// @param Is32BitImm Is ImmValue 32-bit or 64-bit? +/// @param IsAddress True if the immediate represents an address. False if it +/// is an integer. +/// @param IDLoc Location of the immediate in the source file. +/// @param Instructions The instructions emitted by this expansion. bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg, - unsigned SrcReg, bool Is32BitImm, SMLoc IDLoc, + unsigned SrcReg, bool Is32BitImm, + bool IsAddress, SMLoc IDLoc, SmallVectorImpl &Instructions) { if (!Is32BitImm && !isGP64bit()) { Error(IDLoc, "instruction requires a 64-bit architecture"); @@ -1852,6 +2172,9 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg, } } + unsigned ZeroReg = IsAddress ? ABI.GetNullPtr() : ABI.GetZeroReg(); + unsigned AdduOp = !Is32BitImm ? Mips::DADDu : Mips::ADDu; + bool UseSrcReg = false; if (SrcReg != Mips::NoRegister) UseSrcReg = true; @@ -1866,111 +2189,129 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg, TmpReg = ATReg; } - // FIXME: gas has a special case for values that are 000...1111, which - // becomes a li -1 and then a dsrl if (isInt<16>(ImmValue)) { - // li d,j => addiu d,$zero,j if (!UseSrcReg) - SrcReg = Mips::ZERO; - emitRRI(Mips::ADDiu, DstReg, SrcReg, ImmValue, IDLoc, Instructions); - } else if (isUInt<16>(ImmValue)) { - // li d,j => ori d,$zero,j - unsigned TmpReg = DstReg; - if (SrcReg == DstReg) { - unsigned ATReg = getATReg(IDLoc); - if (!ATReg) - return true; - TmpReg = ATReg; + SrcReg = ZeroReg; + + // This doesn't quite follow the usual ABI expectations for N32 but matches + // traditional assembler behaviour. N32 would normally use addiu for both + // integers and addresses. + if (IsAddress && !Is32BitImm) { + emitRRI(Mips::DADDiu, DstReg, SrcReg, ImmValue, IDLoc, Instructions); + return false; } - emitRRI(Mips::ORi, TmpReg, Mips::ZERO, ImmValue, IDLoc, Instructions); + emitRRI(Mips::ADDiu, DstReg, SrcReg, ImmValue, IDLoc, Instructions); + return false; + } + + if (isUInt<16>(ImmValue)) { + unsigned TmpReg = DstReg; + if (SrcReg == DstReg) { + TmpReg = getATReg(IDLoc); + if (!TmpReg) + return true; + } + + emitRRI(Mips::ORi, TmpReg, ZeroReg, ImmValue, IDLoc, Instructions); if (UseSrcReg) - emitRRR(Mips::ADDu, DstReg, TmpReg, SrcReg, IDLoc, Instructions); - } else if (isInt<32>(ImmValue) || isUInt<32>(ImmValue)) { + emitRRR(ABI.GetPtrAdduOp(), DstReg, TmpReg, SrcReg, IDLoc, Instructions); + return false; + } + + if (isInt<32>(ImmValue) || isUInt<32>(ImmValue)) { warnIfNoMacro(IDLoc); - // For all other values which are representable as a 32-bit integer: - // li d,j => lui d,hi16(j) - // ori d,d,lo16(j) uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff; uint16_t Bits15To0 = ImmValue & 0xffff; if (!Is32BitImm && !isInt<32>(ImmValue)) { - // For DLI, expand to an ORi instead of a LUi to avoid sign-extending the + // Traditional behaviour seems to special case this particular value. It's + // not clear why other masks are handled differently. + if (ImmValue == 0xffffffff) { + emitRI(Mips::LUi, TmpReg, 0xffff, IDLoc, Instructions); + emitRRI(Mips::DSRL32, TmpReg, TmpReg, 0, IDLoc, Instructions); + if (UseSrcReg) + emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions); + return false; + } + + // Expand to an ORi instead of a LUi to avoid sign-extending into the // upper 32 bits. - emitRRI(Mips::ORi, TmpReg, Mips::ZERO, Bits31To16, IDLoc, Instructions); + emitRRI(Mips::ORi, TmpReg, ZeroReg, Bits31To16, IDLoc, Instructions); emitRRI(Mips::DSLL, TmpReg, TmpReg, 16, IDLoc, Instructions); - } else - emitRI(Mips::LUi, TmpReg, Bits31To16, IDLoc, Instructions); - createLShiftOri<0>(Bits15To0, TmpReg, IDLoc, Instructions); - - if (UseSrcReg) - createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions); - - } else if ((ImmValue & (0xffffLL << 48)) == 0) { - warnIfNoMacro(IDLoc); - - // <------- lo32 ------> - // <------- hi32 ------> - // <- hi16 -> <- lo16 -> - // _________________________________ - // | | | | - // | 16-bits | 16-bits | 16-bits | - // |__________|__________|__________| - // - // For any 64-bit value that is representable as a 48-bit integer: - // li d,j => lui d,hi16(j) - // ori d,d,hi16(lo32(j)) - // dsll d,d,16 - // ori d,d,lo16(lo32(j)) - uint16_t Bits47To32 = (ImmValue >> 32) & 0xffff; - uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff; - uint16_t Bits15To0 = ImmValue & 0xffff; - - emitRI(Mips::LUi, TmpReg, Bits47To32, IDLoc, Instructions); - createLShiftOri<0>(Bits31To16, TmpReg, IDLoc, Instructions); - createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions); - - if (UseSrcReg) - createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions); - - } else { - warnIfNoMacro(IDLoc); - - // <------- hi32 ------> <------- lo32 ------> - // <- hi16 -> <- lo16 -> - // ___________________________________________ - // | | | | | - // | 16-bits | 16-bits | 16-bits | 16-bits | - // |__________|__________|__________|__________| - // - // For all other values which are representable as a 64-bit integer: - // li d,j => lui d,hi16(j) - // ori d,d,lo16(hi32(j)) - // dsll d,d,16 - // ori d,d,hi16(lo32(j)) - // dsll d,d,16 - // ori d,d,lo16(lo32(j)) - uint16_t Bits63To48 = (ImmValue >> 48) & 0xffff; - uint16_t Bits47To32 = (ImmValue >> 32) & 0xffff; - uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff; - uint16_t Bits15To0 = ImmValue & 0xffff; - - emitRI(Mips::LUi, TmpReg, Bits63To48, IDLoc, Instructions); - createLShiftOri<0>(Bits47To32, TmpReg, IDLoc, Instructions); - - // When Bits31To16 is 0, do a left shift of 32 bits instead of doing - // two left shifts of 16 bits. - if (Bits31To16 == 0) { - createLShiftOri<32>(Bits15To0, TmpReg, IDLoc, Instructions); - } else { - createLShiftOri<16>(Bits31To16, TmpReg, IDLoc, Instructions); - createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions); + if (Bits15To0) + emitRRI(Mips::ORi, TmpReg, TmpReg, Bits15To0, IDLoc, Instructions); + if (UseSrcReg) + emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions); + return false; } + emitRI(Mips::LUi, TmpReg, Bits31To16, IDLoc, Instructions); + if (Bits15To0) + emitRRI(Mips::ORi, TmpReg, TmpReg, Bits15To0, IDLoc, Instructions); if (UseSrcReg) - createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions); + emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions); + return false; } + + if (isShiftedUIntAtAnyPosition<16>(ImmValue)) { + if (Is32BitImm) { + Error(IDLoc, "instruction requires a 32-bit immediate"); + return true; + } + + // Traditionally, these immediates are shifted as little as possible and as + // such we align the most significant bit to bit 15 of our temporary. + unsigned FirstSet = findFirstSet((uint64_t)ImmValue); + unsigned LastSet = findLastSet((uint64_t)ImmValue); + unsigned ShiftAmount = FirstSet - (15 - (LastSet - FirstSet)); + uint16_t Bits = (ImmValue >> ShiftAmount) & 0xffff; + emitRRI(Mips::ORi, TmpReg, ZeroReg, Bits, IDLoc, Instructions); + emitRRI(Mips::DSLL, TmpReg, TmpReg, ShiftAmount, IDLoc, Instructions); + + if (UseSrcReg) + emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions); + + return false; + } + + warnIfNoMacro(IDLoc); + + // The remaining case is packed with a sequence of dsll and ori with zeros + // being omitted and any neighbouring dsll's being coalesced. + // The highest 32-bit's are equivalent to a 32-bit immediate load. + + // Load bits 32-63 of ImmValue into bits 0-31 of the temporary register. + if (loadImmediate(ImmValue >> 32, TmpReg, Mips::NoRegister, true, false, + IDLoc, Instructions)) + return false; + + // Shift and accumulate into the register. If a 16-bit chunk is zero, then + // skip it and defer the shift to the next chunk. + unsigned ShiftCarriedForwards = 16; + for (int BitNum = 16; BitNum >= 0; BitNum -= 16) { + uint16_t ImmChunk = (ImmValue >> BitNum) & 0xffff; + + if (ImmChunk != 0) { + emitAppropriateDSLL(TmpReg, TmpReg, ShiftCarriedForwards, IDLoc, + Instructions); + emitRRI(Mips::ORi, TmpReg, TmpReg, ImmChunk, IDLoc, Instructions); + ShiftCarriedForwards = 0; + } + + ShiftCarriedForwards += 16; + } + ShiftCarriedForwards -= 16; + + // Finish any remaining shifts left by trailing zeros. + if (ShiftCarriedForwards) + emitAppropriateDSLL(TmpReg, TmpReg, ShiftCarriedForwards, IDLoc, + Instructions); + + if (UseSrcReg) + emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions); + return false; } @@ -1982,63 +2323,38 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc, assert(DstRegOp.isReg() && "expected register operand kind"); if (loadImmediate(ImmOp.getImm(), DstRegOp.getReg(), Mips::NoRegister, - Is32BitImm, IDLoc, Instructions)) + Is32BitImm, false, IDLoc, Instructions)) return true; return false; } -bool -MipsAsmParser::expandLoadAddressReg(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc, - SmallVectorImpl &Instructions) { - const MCOperand &DstRegOp = Inst.getOperand(0); - assert(DstRegOp.isReg() && "expected register operand kind"); - - const MCOperand &SrcRegOp = Inst.getOperand(1); - assert(SrcRegOp.isReg() && "expected register operand kind"); - - const MCOperand &ImmOp = Inst.getOperand(2); - assert((ImmOp.isImm() || ImmOp.isExpr()) && - "expected immediate operand kind"); - if (!ImmOp.isImm()) { - if (loadAndAddSymbolAddress(ImmOp.getExpr(), DstRegOp.getReg(), - SrcRegOp.getReg(), Is32BitImm, IDLoc, - Instructions)) - return true; - - return false; +bool MipsAsmParser::expandLoadAddress(unsigned DstReg, unsigned BaseReg, + const MCOperand &Offset, + bool Is32BitAddress, SMLoc IDLoc, + SmallVectorImpl &Instructions) { + // la can't produce a usable address when addresses are 64-bit. + if (Is32BitAddress && ABI.ArePtrs64bit()) { + // FIXME: Demote this to a warning and continue as if we had 'dla' instead. + // We currently can't do this because we depend on the equality + // operator and N64 can end up with a GPR32/GPR64 mismatch. + Error(IDLoc, "la used to load 64-bit address"); + // Continue as if we had 'dla' instead. + Is32BitAddress = false; } - if (loadImmediate(ImmOp.getImm(), DstRegOp.getReg(), SrcRegOp.getReg(), - Is32BitImm, IDLoc, Instructions)) + // dla requires 64-bit addresses. + if (!Is32BitAddress && !ABI.ArePtrs64bit()) { + Error(IDLoc, "instruction requires a 64-bit architecture"); return true; - - return false; -} - -bool -MipsAsmParser::expandLoadAddressImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc, - SmallVectorImpl &Instructions) { - const MCOperand &DstRegOp = Inst.getOperand(0); - assert(DstRegOp.isReg() && "expected register operand kind"); - - const MCOperand &ImmOp = Inst.getOperand(1); - assert((ImmOp.isImm() || ImmOp.isExpr()) && - "expected immediate operand kind"); - if (!ImmOp.isImm()) { - if (loadAndAddSymbolAddress(ImmOp.getExpr(), DstRegOp.getReg(), - Mips::NoRegister, Is32BitImm, IDLoc, - Instructions)) - return true; - - return false; } - if (loadImmediate(ImmOp.getImm(), DstRegOp.getReg(), Mips::NoRegister, - Is32BitImm, IDLoc, Instructions)) - return true; + if (!Offset.isImm()) + return loadAndAddSymbolAddress(Offset.getExpr(), DstReg, BaseReg, + Is32BitAddress, IDLoc, Instructions); - return false; + return loadImmediate(Offset.getImm(), DstReg, BaseReg, Is32BitAddress, true, + IDLoc, Instructions); } bool MipsAsmParser::loadAndAddSymbolAddress( @@ -2046,67 +2362,102 @@ bool MipsAsmParser::loadAndAddSymbolAddress( SMLoc IDLoc, SmallVectorImpl &Instructions) { warnIfNoMacro(IDLoc); - if (Is32BitSym && isABI_N64()) - Warning(IDLoc, "instruction loads the 32-bit address of a 64-bit symbol"); - - MCInst tmpInst; - const MCSymbolRefExpr *Symbol = cast(SymExpr); - const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::create( - &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_ABS_HI, getContext()); - const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::create( - &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_ABS_LO, getContext()); + const MCExpr *Symbol = cast(SymExpr); + const MipsMCExpr *HiExpr = MipsMCExpr::create( + MCSymbolRefExpr::VK_Mips_ABS_HI, Symbol, getContext()); + const MipsMCExpr *LoExpr = MipsMCExpr::create( + MCSymbolRefExpr::VK_Mips_ABS_LO, Symbol, getContext()); bool UseSrcReg = SrcReg != Mips::NoRegister; + // This is the 64-bit symbol address expansion. + if (ABI.ArePtrs64bit() && isGP64bit()) { + // We always need AT for the 64-bit expansion. + // If it is not available we exit. + unsigned ATReg = getATReg(IDLoc); + if (!ATReg) + return true; + + const MipsMCExpr *HighestExpr = MipsMCExpr::create( + MCSymbolRefExpr::VK_Mips_HIGHEST, Symbol, getContext()); + const MipsMCExpr *HigherExpr = MipsMCExpr::create( + MCSymbolRefExpr::VK_Mips_HIGHER, Symbol, getContext()); + + if (UseSrcReg && (DstReg == SrcReg)) { + // If $rs is the same as $rd: + // (d)la $rd, sym($rd) => lui $at, %highest(sym) + // daddiu $at, $at, %higher(sym) + // dsll $at, $at, 16 + // daddiu $at, $at, %hi(sym) + // dsll $at, $at, 16 + // daddiu $at, $at, %lo(sym) + // daddu $rd, $at, $rd + emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HighestExpr), IDLoc, + Instructions); + emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(HigherExpr), + IDLoc, Instructions); + emitRRI(Mips::DSLL, ATReg, ATReg, 16, IDLoc, Instructions); + emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(HiExpr), IDLoc, + Instructions); + emitRRI(Mips::DSLL, ATReg, ATReg, 16, IDLoc, Instructions); + emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(LoExpr), IDLoc, + Instructions); + emitRRR(Mips::DADDu, DstReg, ATReg, SrcReg, IDLoc, Instructions); + + return false; + } + + // Otherwise, if the $rs is different from $rd or if $rs isn't specified: + // (d)la $rd, sym/sym($rs) => lui $rd, %highest(sym) + // lui $at, %hi(sym) + // daddiu $rd, $rd, %higher(sym) + // daddiu $at, $at, %lo(sym) + // dsll32 $rd, $rd, 0 + // daddu $rd, $rd, $at + // (daddu $rd, $rd, $rs) + emitRX(Mips::LUi, DstReg, MCOperand::createExpr(HighestExpr), IDLoc, + Instructions); + emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HiExpr), IDLoc, + Instructions); + emitRRX(Mips::DADDiu, DstReg, DstReg, MCOperand::createExpr(HigherExpr), + IDLoc, Instructions); + emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(LoExpr), IDLoc, + Instructions); + emitRRI(Mips::DSLL32, DstReg, DstReg, 0, IDLoc, Instructions); + emitRRR(Mips::DADDu, DstReg, DstReg, ATReg, IDLoc, Instructions); + if (UseSrcReg) + emitRRR(Mips::DADDu, DstReg, DstReg, SrcReg, IDLoc, Instructions); + + return false; + } + + // And now, the 32-bit symbol address expansion: + // If $rs is the same as $rd: + // (d)la $rd, sym($rd) => lui $at, %hi(sym) + // ori $at, $at, %lo(sym) + // addu $rd, $at, $rd + // Otherwise, if the $rs is different from $rd or if $rs isn't specified: + // (d)la $rd, sym/sym($rs) => lui $rd, %hi(sym) + // ori $rd, $rd, %lo(sym) + // (addu $rd, $rd, $rs) unsigned TmpReg = DstReg; if (UseSrcReg && (DstReg == SrcReg)) { - // At this point we need AT to perform the expansions and we exit if it is - // not available. + // If $rs is the same as $rd, we need to use AT. + // If it is not available we exit. unsigned ATReg = getATReg(IDLoc); if (!ATReg) return true; TmpReg = ATReg; } - if (!Is32BitSym) { - // If it's a 64-bit architecture, expand to: - // la d,sym => lui d,highest(sym) - // ori d,d,higher(sym) - // dsll d,d,16 - // ori d,d,hi16(sym) - // dsll d,d,16 - // ori d,d,lo16(sym) - const MCSymbolRefExpr *HighestExpr = MCSymbolRefExpr::create( - &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_HIGHEST, getContext()); - const MCSymbolRefExpr *HigherExpr = MCSymbolRefExpr::create( - &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_HIGHER, getContext()); - - tmpInst.setOpcode(Mips::LUi); - tmpInst.addOperand(MCOperand::createReg(TmpReg)); - tmpInst.addOperand(MCOperand::createExpr(HighestExpr)); - Instructions.push_back(tmpInst); - - createLShiftOri<0>(MCOperand::createExpr(HigherExpr), TmpReg, SMLoc(), - Instructions); - createLShiftOri<16>(MCOperand::createExpr(HiExpr), TmpReg, SMLoc(), - Instructions); - createLShiftOri<16>(MCOperand::createExpr(LoExpr), TmpReg, SMLoc(), - Instructions); - } else { - // Otherwise, expand to: - // la d,sym => lui d,hi16(sym) - // ori d,d,lo16(sym) - tmpInst.setOpcode(Mips::LUi); - tmpInst.addOperand(MCOperand::createReg(TmpReg)); - tmpInst.addOperand(MCOperand::createExpr(HiExpr)); - Instructions.push_back(tmpInst); - - emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), SMLoc(), - Instructions); - } + emitRX(Mips::LUi, TmpReg, MCOperand::createExpr(HiExpr), IDLoc, Instructions); + emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), IDLoc, + Instructions); if (UseSrcReg) - createAddu(DstReg, TmpReg, SrcReg, !Is32BitSym, Instructions); + emitRRR(Mips::ADDu, DstReg, TmpReg, SrcReg, IDLoc, Instructions); + else + assert(DstReg == TmpReg); return false; } @@ -2125,12 +2476,13 @@ bool MipsAsmParser::expandUncondBranchMMPseudo( Inst.addOperand(MCOperand::createExpr(Offset.getExpr())); } else { assert(Offset.isImm() && "expected immediate operand kind"); - if (isIntN(11, Offset.getImm())) { + if (isInt<11>(Offset.getImm())) { // If offset fits into 11 bits then this instruction becomes microMIPS // 16-bit unconditional branch instruction. - Inst.setOpcode(Mips::B16_MM); + if (inMicroMipsMode()) + Inst.setOpcode(hasMips32r6() ? Mips::BC16_MMR6 : Mips::B16_MM); } else { - if (!isIntN(17, Offset.getImm())) + if (!isInt<17>(Offset.getImm())) Error(IDLoc, "branch target out of range"); if (OffsetToAlignment(Offset.getImm(), 1LL << 1)) Error(IDLoc, "branch to misaligned address"); @@ -2143,8 +2495,10 @@ bool MipsAsmParser::expandUncondBranchMMPseudo( } Instructions.push_back(Inst); - // If .set reorder is active, emit a NOP after the branch instruction. - if (AssemblerOptions.back()->isReorder()) + // If .set reorder is active and branch instruction has a delay slot, + // emit a NOP after it. + const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); + if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder()) createNop(true, IDLoc, Instructions); return false; @@ -2175,30 +2529,21 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc, } int64_t ImmValue = ImmOp.getImm(); - if (ImmValue == 0) { - MCInst BranchInst; - BranchInst.setOpcode(OpCode); - BranchInst.addOperand(DstRegOp); - BranchInst.addOperand(MCOperand::createReg(Mips::ZERO)); - BranchInst.addOperand(MemOffsetOp); - Instructions.push_back(BranchInst); - } else { + if (ImmValue == 0) + emitRRX(OpCode, DstRegOp.getReg(), Mips::ZERO, MemOffsetOp, IDLoc, + Instructions); + else { warnIfNoMacro(IDLoc); unsigned ATReg = getATReg(IDLoc); if (!ATReg) return true; - if (loadImmediate(ImmValue, ATReg, Mips::NoRegister, !isGP64bit(), IDLoc, - Instructions)) + if (loadImmediate(ImmValue, ATReg, Mips::NoRegister, !isGP64bit(), true, + IDLoc, Instructions)) return true; - MCInst BranchInst; - BranchInst.setOpcode(OpCode); - BranchInst.addOperand(DstRegOp); - BranchInst.addOperand(MCOperand::createReg(ATReg)); - BranchInst.addOperand(MemOffsetOp); - Instructions.push_back(BranchInst); + emitRRX(OpCode, DstRegOp.getReg(), ATReg, MemOffsetOp, IDLoc, Instructions); } return false; } @@ -2206,7 +2551,6 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc, void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions, bool isLoad, bool isImmOpnd) { - MCInst TempInst; unsigned ImmOffset, HiOffset, LoOffset; const MCExpr *ExprOffset; unsigned TmpRegNum; @@ -2227,8 +2571,6 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, HiOffset++; } else ExprOffset = Inst.getOperand(2).getExpr(); - // All instructions will have the same location. - TempInst.setLoc(IDLoc); // These are some of the types of expansions we perform here: // 1) lw $8, sym => lui $8, %hi(sym) // lw $8, %lo(sym)($8) @@ -2267,40 +2609,20 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, return; } - TempInst.setOpcode(Mips::LUi); - TempInst.addOperand(MCOperand::createReg(TmpRegNum)); - if (isImmOpnd) - TempInst.addOperand(MCOperand::createImm(HiOffset)); - else { - const MCExpr *HiExpr = evaluateRelocExpr(ExprOffset, "hi"); - TempInst.addOperand(MCOperand::createExpr(HiExpr)); - } - // Add the instruction to the list. - Instructions.push_back(TempInst); - // Prepare TempInst for next instruction. - TempInst.clear(); + emitRX(Mips::LUi, TmpRegNum, + isImmOpnd ? MCOperand::createImm(HiOffset) + : MCOperand::createExpr(evaluateRelocExpr(ExprOffset, "hi")), + IDLoc, Instructions); // Add temp register to base. - if (BaseRegNum != Mips::ZERO) { - TempInst.setOpcode(Mips::ADDu); - TempInst.addOperand(MCOperand::createReg(TmpRegNum)); - TempInst.addOperand(MCOperand::createReg(TmpRegNum)); - TempInst.addOperand(MCOperand::createReg(BaseRegNum)); - Instructions.push_back(TempInst); - TempInst.clear(); - } + if (BaseRegNum != Mips::ZERO) + emitRRR(Mips::ADDu, TmpRegNum, TmpRegNum, BaseRegNum, IDLoc, Instructions); // And finally, create original instruction with low part // of offset and new base. - TempInst.setOpcode(Inst.getOpcode()); - TempInst.addOperand(MCOperand::createReg(RegOpNum)); - TempInst.addOperand(MCOperand::createReg(TmpRegNum)); - if (isImmOpnd) - TempInst.addOperand(MCOperand::createImm(LoOffset)); - else { - const MCExpr *LoExpr = evaluateRelocExpr(ExprOffset, "lo"); - TempInst.addOperand(MCOperand::createExpr(LoExpr)); - } - Instructions.push_back(TempInst); - TempInst.clear(); + emitRRX(Inst.getOpcode(), RegOpNum, TmpRegNum, + isImmOpnd + ? MCOperand::createImm(LoOffset) + : MCOperand::createExpr(evaluateRelocExpr(ExprOffset, "lo")), + IDLoc, Instructions); } bool @@ -2316,10 +2638,16 @@ MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc, if (OpNum < 8 && Inst.getOperand(OpNum - 1).getImm() <= 60 && Inst.getOperand(OpNum - 1).getImm() >= 0 && - Inst.getOperand(OpNum - 2).getReg() == Mips::SP && - Inst.getOperand(OpNum - 3).getReg() == Mips::RA) + (Inst.getOperand(OpNum - 2).getReg() == Mips::SP || + Inst.getOperand(OpNum - 2).getReg() == Mips::SP_64) && + (Inst.getOperand(OpNum - 3).getReg() == Mips::RA || + Inst.getOperand(OpNum - 3).getReg() == Mips::RA_64)) { // It can be implemented as SWM16 or LWM16 instruction. - NewOpcode = Opcode == Mips::SWM_MM ? Mips::SWM16_MM : Mips::LWM16_MM; + if (inMicroMipsMode() && hasMips32r6()) + NewOpcode = Opcode == Mips::SWM_MM ? Mips::SWM16_MMR6 : Mips::LWM16_MMR6; + else + NewOpcode = Opcode == Mips::SWM_MM ? Mips::SWM16_MM : Mips::LWM16_MM; + } Inst.setOpcode(NewOpcode); Instructions.push_back(Inst); @@ -2328,44 +2656,126 @@ MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc, bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions) { + bool EmittedNoMacroWarning = false; unsigned PseudoOpcode = Inst.getOpcode(); unsigned SrcReg = Inst.getOperand(0).getReg(); - unsigned TrgReg = Inst.getOperand(1).getReg(); + const MCOperand &TrgOp = Inst.getOperand(1); const MCExpr *OffsetExpr = Inst.getOperand(2).getExpr(); unsigned ZeroSrcOpcode, ZeroTrgOpcode; - bool ReverseOrderSLT, IsUnsigned, AcceptsEquality; + bool ReverseOrderSLT, IsUnsigned, IsLikely, AcceptsEquality; + + unsigned TrgReg; + if (TrgOp.isReg()) + TrgReg = TrgOp.getReg(); + else if (TrgOp.isImm()) { + warnIfNoMacro(IDLoc); + EmittedNoMacroWarning = true; + + TrgReg = getATReg(IDLoc); + if (!TrgReg) + return true; + + switch(PseudoOpcode) { + default: + llvm_unreachable("unknown opcode for branch pseudo-instruction"); + case Mips::BLTImmMacro: + PseudoOpcode = Mips::BLT; + break; + case Mips::BLEImmMacro: + PseudoOpcode = Mips::BLE; + break; + case Mips::BGEImmMacro: + PseudoOpcode = Mips::BGE; + break; + case Mips::BGTImmMacro: + PseudoOpcode = Mips::BGT; + break; + case Mips::BLTUImmMacro: + PseudoOpcode = Mips::BLTU; + break; + case Mips::BLEUImmMacro: + PseudoOpcode = Mips::BLEU; + break; + case Mips::BGEUImmMacro: + PseudoOpcode = Mips::BGEU; + break; + case Mips::BGTUImmMacro: + PseudoOpcode = Mips::BGTU; + break; + case Mips::BLTLImmMacro: + PseudoOpcode = Mips::BLTL; + break; + case Mips::BLELImmMacro: + PseudoOpcode = Mips::BLEL; + break; + case Mips::BGELImmMacro: + PseudoOpcode = Mips::BGEL; + break; + case Mips::BGTLImmMacro: + PseudoOpcode = Mips::BGTL; + break; + case Mips::BLTULImmMacro: + PseudoOpcode = Mips::BLTUL; + break; + case Mips::BLEULImmMacro: + PseudoOpcode = Mips::BLEUL; + break; + case Mips::BGEULImmMacro: + PseudoOpcode = Mips::BGEUL; + break; + case Mips::BGTULImmMacro: + PseudoOpcode = Mips::BGTUL; + break; + } + + if (loadImmediate(TrgOp.getImm(), TrgReg, Mips::NoRegister, !isGP64bit(), + false, IDLoc, Instructions)) + return true; + } switch (PseudoOpcode) { case Mips::BLT: case Mips::BLTU: + case Mips::BLTL: + case Mips::BLTUL: AcceptsEquality = false; ReverseOrderSLT = false; - IsUnsigned = (PseudoOpcode == Mips::BLTU); + IsUnsigned = ((PseudoOpcode == Mips::BLTU) || (PseudoOpcode == Mips::BLTUL)); + IsLikely = ((PseudoOpcode == Mips::BLTL) || (PseudoOpcode == Mips::BLTUL)); ZeroSrcOpcode = Mips::BGTZ; ZeroTrgOpcode = Mips::BLTZ; break; case Mips::BLE: case Mips::BLEU: + case Mips::BLEL: + case Mips::BLEUL: AcceptsEquality = true; ReverseOrderSLT = true; - IsUnsigned = (PseudoOpcode == Mips::BLEU); + IsUnsigned = ((PseudoOpcode == Mips::BLEU) || (PseudoOpcode == Mips::BLEUL)); + IsLikely = ((PseudoOpcode == Mips::BLEL) || (PseudoOpcode == Mips::BLEUL)); ZeroSrcOpcode = Mips::BGEZ; ZeroTrgOpcode = Mips::BLEZ; break; case Mips::BGE: case Mips::BGEU: + case Mips::BGEL: + case Mips::BGEUL: AcceptsEquality = true; ReverseOrderSLT = false; - IsUnsigned = (PseudoOpcode == Mips::BGEU); + IsUnsigned = ((PseudoOpcode == Mips::BGEU) || (PseudoOpcode == Mips::BGEUL)); + IsLikely = ((PseudoOpcode == Mips::BGEL) || (PseudoOpcode == Mips::BGEUL)); ZeroSrcOpcode = Mips::BLEZ; ZeroTrgOpcode = Mips::BGEZ; break; case Mips::BGT: case Mips::BGTU: + case Mips::BGTL: + case Mips::BGTUL: AcceptsEquality = false; ReverseOrderSLT = true; - IsUnsigned = (PseudoOpcode == Mips::BGTU); + IsUnsigned = ((PseudoOpcode == Mips::BGTU) || (PseudoOpcode == Mips::BGTUL)); + IsLikely = ((PseudoOpcode == Mips::BGTL) || (PseudoOpcode == Mips::BGTUL)); ZeroSrcOpcode = Mips::BLTZ; ZeroTrgOpcode = Mips::BGTZ; break; @@ -2373,7 +2783,6 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc, llvm_unreachable("unknown opcode for branch pseudo-instruction"); } - MCInst BranchInst; bool IsTrgRegZero = (TrgReg == Mips::ZERO); bool IsSrcRegZero = (SrcReg == Mips::ZERO); if (IsSrcRegZero && IsTrgRegZero) { @@ -2381,51 +2790,37 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc, // with GAS' behaviour. However, they may not generate the most efficient // code in some circumstances. if (PseudoOpcode == Mips::BLT) { - BranchInst.setOpcode(Mips::BLTZ); - BranchInst.addOperand(MCOperand::createReg(Mips::ZERO)); - BranchInst.addOperand(MCOperand::createExpr(OffsetExpr)); - Instructions.push_back(BranchInst); + emitRX(Mips::BLTZ, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc, + Instructions); return false; } if (PseudoOpcode == Mips::BLE) { - BranchInst.setOpcode(Mips::BLEZ); - BranchInst.addOperand(MCOperand::createReg(Mips::ZERO)); - BranchInst.addOperand(MCOperand::createExpr(OffsetExpr)); - Instructions.push_back(BranchInst); + emitRX(Mips::BLEZ, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc, + Instructions); Warning(IDLoc, "branch is always taken"); return false; } if (PseudoOpcode == Mips::BGE) { - BranchInst.setOpcode(Mips::BGEZ); - BranchInst.addOperand(MCOperand::createReg(Mips::ZERO)); - BranchInst.addOperand(MCOperand::createExpr(OffsetExpr)); - Instructions.push_back(BranchInst); + emitRX(Mips::BGEZ, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc, + Instructions); Warning(IDLoc, "branch is always taken"); return false; } if (PseudoOpcode == Mips::BGT) { - BranchInst.setOpcode(Mips::BGTZ); - BranchInst.addOperand(MCOperand::createReg(Mips::ZERO)); - BranchInst.addOperand(MCOperand::createExpr(OffsetExpr)); - Instructions.push_back(BranchInst); + emitRX(Mips::BGTZ, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc, + Instructions); return false; } if (PseudoOpcode == Mips::BGTU) { - BranchInst.setOpcode(Mips::BNE); - BranchInst.addOperand(MCOperand::createReg(Mips::ZERO)); - BranchInst.addOperand(MCOperand::createReg(Mips::ZERO)); - BranchInst.addOperand(MCOperand::createExpr(OffsetExpr)); - Instructions.push_back(BranchInst); + emitRRX(Mips::BNE, Mips::ZERO, Mips::ZERO, + MCOperand::createExpr(OffsetExpr), IDLoc, Instructions); return false; } if (AcceptsEquality) { // If both registers are $0 and the pseudo-branch accepts equality, it // will always be taken, so we emit an unconditional branch. - BranchInst.setOpcode(Mips::BEQ); - BranchInst.addOperand(MCOperand::createReg(Mips::ZERO)); - BranchInst.addOperand(MCOperand::createReg(Mips::ZERO)); - BranchInst.addOperand(MCOperand::createExpr(OffsetExpr)); - Instructions.push_back(BranchInst); + emitRRX(Mips::BEQ, Mips::ZERO, Mips::ZERO, + MCOperand::createExpr(OffsetExpr), IDLoc, Instructions); Warning(IDLoc, "branch is always taken"); return false; } @@ -2449,11 +2844,8 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc, // the pseudo-branch will always be taken, so we emit an unconditional // branch. // This only applies to unsigned pseudo-branches. - BranchInst.setOpcode(Mips::BEQ); - BranchInst.addOperand(MCOperand::createReg(Mips::ZERO)); - BranchInst.addOperand(MCOperand::createReg(Mips::ZERO)); - BranchInst.addOperand(MCOperand::createExpr(OffsetExpr)); - Instructions.push_back(BranchInst); + emitRRX(Mips::BEQ, Mips::ZERO, Mips::ZERO, + MCOperand::createExpr(OffsetExpr), IDLoc, Instructions); Warning(IDLoc, "branch is always taken"); return false; } @@ -2470,21 +2862,17 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc, // // Because only BLEU and BGEU branch on equality, we can use the // AcceptsEquality variable to decide when to emit the BEQZ. - BranchInst.setOpcode(AcceptsEquality ? Mips::BEQ : Mips::BNE); - BranchInst.addOperand( - MCOperand::createReg(IsSrcRegZero ? TrgReg : SrcReg)); - BranchInst.addOperand(MCOperand::createReg(Mips::ZERO)); - BranchInst.addOperand(MCOperand::createExpr(OffsetExpr)); - Instructions.push_back(BranchInst); + emitRRX(AcceptsEquality ? Mips::BEQ : Mips::BNE, + IsSrcRegZero ? TrgReg : SrcReg, Mips::ZERO, + MCOperand::createExpr(OffsetExpr), IDLoc, Instructions); return false; } // If we have a signed pseudo-branch and one of the registers is $0, // we can use an appropriate compare-to-zero branch. We select which one // to use in the switch statement above. - BranchInst.setOpcode(IsSrcRegZero ? ZeroSrcOpcode : ZeroTrgOpcode); - BranchInst.addOperand(MCOperand::createReg(IsSrcRegZero ? TrgReg : SrcReg)); - BranchInst.addOperand(MCOperand::createExpr(OffsetExpr)); - Instructions.push_back(BranchInst); + emitRX(IsSrcRegZero ? ZeroSrcOpcode : ZeroTrgOpcode, + IsSrcRegZero ? TrgReg : SrcReg, MCOperand::createExpr(OffsetExpr), + IDLoc, Instructions); return false; } @@ -2494,7 +2882,8 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc, if (!ATRegNum) return true; - warnIfNoMacro(IDLoc); + if (!EmittedNoMacroWarning) + warnIfNoMacro(IDLoc); // SLT fits well with 2 of our 4 pseudo-branches: // BLT, where $rs < $rt, translates into "slt $at, $rs, $rt" and @@ -2511,23 +2900,135 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc, // // The same applies to the unsigned variants, except that SLTu is used // instead of SLT. - MCInst SetInst; - SetInst.setOpcode(IsUnsigned ? Mips::SLTu : Mips::SLT); - SetInst.addOperand(MCOperand::createReg(ATRegNum)); - SetInst.addOperand(MCOperand::createReg(ReverseOrderSLT ? TrgReg : SrcReg)); - SetInst.addOperand(MCOperand::createReg(ReverseOrderSLT ? SrcReg : TrgReg)); - Instructions.push_back(SetInst); + emitRRR(IsUnsigned ? Mips::SLTu : Mips::SLT, ATRegNum, + ReverseOrderSLT ? TrgReg : SrcReg, ReverseOrderSLT ? SrcReg : TrgReg, + IDLoc, Instructions); - BranchInst.setOpcode(AcceptsEquality ? Mips::BEQ : Mips::BNE); - BranchInst.addOperand(MCOperand::createReg(ATRegNum)); - BranchInst.addOperand(MCOperand::createReg(Mips::ZERO)); - BranchInst.addOperand(MCOperand::createExpr(OffsetExpr)); - Instructions.push_back(BranchInst); + emitRRX(IsLikely ? (AcceptsEquality ? Mips::BEQL : Mips::BNEL) + : (AcceptsEquality ? Mips::BEQ : Mips::BNE), + ATRegNum, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc, + Instructions); return false; } -bool MipsAsmParser::expandUlhu(MCInst &Inst, SMLoc IDLoc, - SmallVectorImpl &Instructions) { +bool MipsAsmParser::expandDiv(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl &Instructions, + const bool IsMips64, const bool Signed) { + if (hasMips32r6()) { + Error(IDLoc, "instruction not supported on mips32r6 or mips64r6"); + return false; + } + + warnIfNoMacro(IDLoc); + + const MCOperand &RsRegOp = Inst.getOperand(0); + assert(RsRegOp.isReg() && "expected register operand kind"); + unsigned RsReg = RsRegOp.getReg(); + + const MCOperand &RtRegOp = Inst.getOperand(1); + assert(RtRegOp.isReg() && "expected register operand kind"); + unsigned RtReg = RtRegOp.getReg(); + unsigned DivOp; + unsigned ZeroReg; + + if (IsMips64) { + DivOp = Signed ? Mips::DSDIV : Mips::DUDIV; + ZeroReg = Mips::ZERO_64; + } else { + DivOp = Signed ? Mips::SDIV : Mips::UDIV; + ZeroReg = Mips::ZERO; + } + + bool UseTraps = useTraps(); + + if (RsReg == Mips::ZERO || RsReg == Mips::ZERO_64) { + if (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64) + Warning(IDLoc, "dividing zero by zero"); + if (IsMips64) { + if (Signed && (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64)) { + if (UseTraps) { + emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, Instructions); + return false; + } + + emitII(Mips::BREAK, 0x7, 0, IDLoc, Instructions); + return false; + } + } else { + emitRR(DivOp, RsReg, RtReg, IDLoc, Instructions); + return false; + } + } + + if (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64) { + Warning(IDLoc, "division by zero"); + if (Signed) { + if (UseTraps) { + emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, Instructions); + return false; + } + + emitII(Mips::BREAK, 0x7, 0, IDLoc, Instructions); + return false; + } + } + + // FIXME: The values for these two BranchTarget variables may be different in + // micromips. These magic numbers need to be removed. + unsigned BranchTargetNoTraps; + unsigned BranchTarget; + + if (UseTraps) { + BranchTarget = IsMips64 ? 12 : 8; + emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, Instructions); + } else { + BranchTarget = IsMips64 ? 20 : 16; + BranchTargetNoTraps = 8; + // Branch to the li instruction. + emitRRI(Mips::BNE, RtReg, ZeroReg, BranchTargetNoTraps, IDLoc, + Instructions); + } + + emitRR(DivOp, RsReg, RtReg, IDLoc, Instructions); + + if (!UseTraps) + emitII(Mips::BREAK, 0x7, 0, IDLoc, Instructions); + + if (!Signed) { + emitR(Mips::MFLO, RsReg, IDLoc, Instructions); + return false; + } + + unsigned ATReg = getATReg(IDLoc); + if (!ATReg) + return true; + + emitRRI(Mips::ADDiu, ATReg, ZeroReg, -1, IDLoc, Instructions); + if (IsMips64) { + // Branch to the mflo instruction. + emitRRI(Mips::BNE, RtReg, ATReg, BranchTarget, IDLoc, Instructions); + emitRRI(Mips::ADDiu, ATReg, ZeroReg, 1, IDLoc, Instructions); + emitRRI(Mips::DSLL32, ATReg, ATReg, 0x1f, IDLoc, Instructions); + } else { + // Branch to the mflo instruction. + emitRRI(Mips::BNE, RtReg, ATReg, BranchTarget, IDLoc, Instructions); + emitRI(Mips::LUi, ATReg, (uint16_t)0x8000, IDLoc, Instructions); + } + + if (UseTraps) + emitRRI(Mips::TEQ, RsReg, ATReg, 0x6, IDLoc, Instructions); + else { + // Branch to the mflo instruction. + emitRRI(Mips::BNE, RsReg, ATReg, BranchTargetNoTraps, IDLoc, Instructions); + emitRRI(Mips::SLL, ZeroReg, ZeroReg, 0, IDLoc, Instructions); + emitII(Mips::BREAK, 0x6, 0, IDLoc, Instructions); + } + emitR(Mips::MFLO, RsReg, IDLoc, Instructions); + return false; +} + +bool MipsAsmParser::expandUlh(MCInst &Inst, bool Signed, SMLoc IDLoc, + SmallVectorImpl &Instructions) { if (hasMips32r6() || hasMips64r6()) { Error(IDLoc, "instruction not supported on mips32r6 or mips64r6"); return false; @@ -2562,7 +3063,7 @@ bool MipsAsmParser::expandUlhu(MCInst &Inst, SMLoc IDLoc, LoadedOffsetInAT = true; if (loadImmediate(OffsetValue, ATReg, Mips::NoRegister, !ABI.ArePtrs64bit(), - IDLoc, Instructions)) + true, IDLoc, Instructions)) return true; // NOTE: We do this (D)ADDu here instead of doing it in loadImmediate() @@ -2590,33 +3091,15 @@ bool MipsAsmParser::expandUlhu(MCInst &Inst, SMLoc IDLoc, unsigned SllReg = LoadedOffsetInAT ? DstReg : ATReg; - MCInst TmpInst; - TmpInst.setOpcode(Mips::LBu); - TmpInst.addOperand(MCOperand::createReg(FirstLbuDstReg)); - TmpInst.addOperand(MCOperand::createReg(LbuSrcReg)); - TmpInst.addOperand(MCOperand::createImm(FirstLbuOffset)); - Instructions.push_back(TmpInst); + emitRRI(Signed ? Mips::LB : Mips::LBu, FirstLbuDstReg, LbuSrcReg, + FirstLbuOffset, IDLoc, Instructions); - TmpInst.clear(); - TmpInst.setOpcode(Mips::LBu); - TmpInst.addOperand(MCOperand::createReg(SecondLbuDstReg)); - TmpInst.addOperand(MCOperand::createReg(LbuSrcReg)); - TmpInst.addOperand(MCOperand::createImm(SecondLbuOffset)); - Instructions.push_back(TmpInst); + emitRRI(Mips::LBu, SecondLbuDstReg, LbuSrcReg, SecondLbuOffset, IDLoc, + Instructions); - TmpInst.clear(); - TmpInst.setOpcode(Mips::SLL); - TmpInst.addOperand(MCOperand::createReg(SllReg)); - TmpInst.addOperand(MCOperand::createReg(SllReg)); - TmpInst.addOperand(MCOperand::createImm(8)); - Instructions.push_back(TmpInst); + emitRRI(Mips::SLL, SllReg, SllReg, 8, IDLoc, Instructions); - TmpInst.clear(); - TmpInst.setOpcode(Mips::OR); - TmpInst.addOperand(MCOperand::createReg(DstReg)); - TmpInst.addOperand(MCOperand::createReg(DstReg)); - TmpInst.addOperand(MCOperand::createReg(ATReg)); - Instructions.push_back(TmpInst); + emitRRR(Mips::OR, DstReg, DstReg, ATReg, IDLoc, Instructions); return false; } @@ -2654,7 +3137,7 @@ bool MipsAsmParser::expandUlw(MCInst &Inst, SMLoc IDLoc, warnIfNoMacro(IDLoc); if (loadImmediate(OffsetValue, ATReg, Mips::NoRegister, !ABI.ArePtrs64bit(), - IDLoc, Instructions)) + true, IDLoc, Instructions)) return true; // NOTE: We do this (D)ADDu here instead of doing it in loadImmediate() @@ -2677,37 +3160,373 @@ bool MipsAsmParser::expandUlw(MCInst &Inst, SMLoc IDLoc, RightLoadOffset = LoadedOffsetInAT ? 3 : (OffsetValue + 3); } - MCInst LeftLoadInst; - LeftLoadInst.setOpcode(Mips::LWL); - LeftLoadInst.addOperand(DstRegOp); - LeftLoadInst.addOperand(MCOperand::createReg(FinalSrcReg)); - LeftLoadInst.addOperand(MCOperand::createImm(LeftLoadOffset)); - Instructions.push_back(LeftLoadInst); + emitRRI(Mips::LWL, DstRegOp.getReg(), FinalSrcReg, LeftLoadOffset, IDLoc, + Instructions); - MCInst RightLoadInst; - RightLoadInst.setOpcode(Mips::LWR); - RightLoadInst.addOperand(DstRegOp); - RightLoadInst.addOperand(MCOperand::createReg(FinalSrcReg)); - RightLoadInst.addOperand(MCOperand::createImm(RightLoadOffset )); - Instructions.push_back(RightLoadInst); + emitRRI(Mips::LWR, DstRegOp.getReg(), FinalSrcReg, RightLoadOffset, IDLoc, + Instructions); return false; } +bool MipsAsmParser::expandAliasImmediate(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl &Instructions) { + + assert (Inst.getNumOperands() == 3 && "Invalid operand count"); + assert (Inst.getOperand(0).isReg() && + Inst.getOperand(1).isReg() && + Inst.getOperand(2).isImm() && "Invalid instruction operand."); + + unsigned ATReg = Mips::NoRegister; + unsigned FinalDstReg = Mips::NoRegister; + unsigned DstReg = Inst.getOperand(0).getReg(); + unsigned SrcReg = Inst.getOperand(1).getReg(); + int64_t ImmValue = Inst.getOperand(2).getImm(); + + bool Is32Bit = isInt<32>(ImmValue) || isUInt<32>(ImmValue); + + unsigned FinalOpcode = Inst.getOpcode(); + + if (DstReg == SrcReg) { + ATReg = getATReg(Inst.getLoc()); + if (!ATReg) + return true; + FinalDstReg = DstReg; + DstReg = ATReg; + } + + if (!loadImmediate(ImmValue, DstReg, Mips::NoRegister, Is32Bit, false, Inst.getLoc(), Instructions)) { + switch (FinalOpcode) { + default: + llvm_unreachable("unimplemented expansion"); + case (Mips::ADDi): + FinalOpcode = Mips::ADD; + break; + case (Mips::ADDiu): + FinalOpcode = Mips::ADDu; + break; + case (Mips::ANDi): + FinalOpcode = Mips::AND; + break; + case (Mips::NORImm): + FinalOpcode = Mips::NOR; + break; + case (Mips::ORi): + FinalOpcode = Mips::OR; + break; + case (Mips::SLTi): + FinalOpcode = Mips::SLT; + break; + case (Mips::SLTiu): + FinalOpcode = Mips::SLTu; + break; + case (Mips::XORi): + FinalOpcode = Mips::XOR; + break; + } + + if (FinalDstReg == Mips::NoRegister) + emitRRR(FinalOpcode, DstReg, DstReg, SrcReg, IDLoc, Instructions); + else + emitRRR(FinalOpcode, FinalDstReg, FinalDstReg, DstReg, IDLoc, + Instructions); + return false; + } + return true; +} + +bool MipsAsmParser::expandRotation(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl &Instructions) { + unsigned ATReg = Mips::NoRegister; + unsigned DReg = Inst.getOperand(0).getReg(); + unsigned SReg = Inst.getOperand(1).getReg(); + unsigned TReg = Inst.getOperand(2).getReg(); + unsigned TmpReg = DReg; + + unsigned FirstShift = Mips::NOP; + unsigned SecondShift = Mips::NOP; + + if (hasMips32r2()) { + + if (DReg == SReg) { + TmpReg = getATReg(Inst.getLoc()); + if (!TmpReg) + return true; + } + + if (Inst.getOpcode() == Mips::ROL) { + emitRRR(Mips::SUBu, TmpReg, Mips::ZERO, TReg, Inst.getLoc(), Instructions); + emitRRR(Mips::ROTRV, DReg, SReg, TmpReg, Inst.getLoc(), Instructions); + return false; + } + + if (Inst.getOpcode() == Mips::ROR) { + emitRRR(Mips::ROTRV, DReg, SReg, TReg, Inst.getLoc(), Instructions); + return false; + } + + return true; + } + + if (hasMips32()) { + + switch (Inst.getOpcode()) { + default: + llvm_unreachable("unexpected instruction opcode"); + case Mips::ROL: + FirstShift = Mips::SRLV; + SecondShift = Mips::SLLV; + break; + case Mips::ROR: + FirstShift = Mips::SLLV; + SecondShift = Mips::SRLV; + break; + } + + ATReg = getATReg(Inst.getLoc()); + if (!ATReg) + return true; + + emitRRR(Mips::SUBu, ATReg, Mips::ZERO, TReg, Inst.getLoc(), Instructions); + emitRRR(FirstShift, ATReg, SReg, ATReg, Inst.getLoc(), Instructions); + emitRRR(SecondShift, DReg, SReg, TReg, Inst.getLoc(), Instructions); + emitRRR(Mips::OR, DReg, DReg, ATReg, Inst.getLoc(), Instructions); + + return false; + } + + return true; +} + +bool MipsAsmParser::expandRotationImm(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl &Instructions) { + + unsigned ATReg = Mips::NoRegister; + unsigned DReg = Inst.getOperand(0).getReg(); + unsigned SReg = Inst.getOperand(1).getReg(); + int64_t ImmValue = Inst.getOperand(2).getImm(); + + unsigned FirstShift = Mips::NOP; + unsigned SecondShift = Mips::NOP; + + if (hasMips32r2()) { + + if (Inst.getOpcode() == Mips::ROLImm) { + uint64_t MaxShift = 32; + uint64_t ShiftValue = ImmValue; + if (ImmValue != 0) + ShiftValue = MaxShift - ImmValue; + emitRRI(Mips::ROTR, DReg, SReg, ShiftValue, Inst.getLoc(), Instructions); + return false; + } + + if (Inst.getOpcode() == Mips::RORImm) { + emitRRI(Mips::ROTR, DReg, SReg, ImmValue, Inst.getLoc(), Instructions); + return false; + } + + return true; + } + + if (hasMips32()) { + + if (ImmValue == 0) { + emitRRI(Mips::SRL, DReg, SReg, 0, Inst.getLoc(), Instructions); + return false; + } + + switch (Inst.getOpcode()) { + default: + llvm_unreachable("unexpected instruction opcode"); + case Mips::ROLImm: + FirstShift = Mips::SLL; + SecondShift = Mips::SRL; + break; + case Mips::RORImm: + FirstShift = Mips::SRL; + SecondShift = Mips::SLL; + break; + } + + ATReg = getATReg(Inst.getLoc()); + if (!ATReg) + return true; + + emitRRI(FirstShift, ATReg, SReg, ImmValue, Inst.getLoc(), Instructions); + emitRRI(SecondShift, DReg, SReg, 32 - ImmValue, Inst.getLoc(), Instructions); + emitRRR(Mips::OR, DReg, DReg, ATReg, Inst.getLoc(), Instructions); + + return false; + } + + return true; +} + +bool MipsAsmParser::expandDRotation(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl &Instructions) { + + unsigned ATReg = Mips::NoRegister; + unsigned DReg = Inst.getOperand(0).getReg(); + unsigned SReg = Inst.getOperand(1).getReg(); + unsigned TReg = Inst.getOperand(2).getReg(); + unsigned TmpReg = DReg; + + unsigned FirstShift = Mips::NOP; + unsigned SecondShift = Mips::NOP; + + if (hasMips64r2()) { + + if (TmpReg == SReg) { + TmpReg = getATReg(Inst.getLoc()); + if (!TmpReg) + return true; + } + + if (Inst.getOpcode() == Mips::DROL) { + emitRRR(Mips::DSUBu, TmpReg, Mips::ZERO, TReg, Inst.getLoc(), Instructions); + emitRRR(Mips::DROTRV, DReg, SReg, TmpReg, Inst.getLoc(), Instructions); + return false; + } + + if (Inst.getOpcode() == Mips::DROR) { + emitRRR(Mips::DROTRV, DReg, SReg, TReg, Inst.getLoc(), Instructions); + return false; + } + + return true; + } + + if (hasMips64()) { + + switch (Inst.getOpcode()) { + default: + llvm_unreachable("unexpected instruction opcode"); + case Mips::DROL: + FirstShift = Mips::DSRLV; + SecondShift = Mips::DSLLV; + break; + case Mips::DROR: + FirstShift = Mips::DSLLV; + SecondShift = Mips::DSRLV; + break; + } + + ATReg = getATReg(Inst.getLoc()); + if (!ATReg) + return true; + + emitRRR(Mips::DSUBu, ATReg, Mips::ZERO, TReg, Inst.getLoc(), Instructions); + emitRRR(FirstShift, ATReg, SReg, ATReg, Inst.getLoc(), Instructions); + emitRRR(SecondShift, DReg, SReg, TReg, Inst.getLoc(), Instructions); + emitRRR(Mips::OR, DReg, DReg, ATReg, Inst.getLoc(), Instructions); + + return false; + } + + return true; +} + +bool MipsAsmParser::expandDRotationImm(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl &Instructions) { + + unsigned ATReg = Mips::NoRegister; + unsigned DReg = Inst.getOperand(0).getReg(); + unsigned SReg = Inst.getOperand(1).getReg(); + int64_t ImmValue = Inst.getOperand(2).getImm() % 64; + + unsigned FirstShift = Mips::NOP; + unsigned SecondShift = Mips::NOP; + + MCInst TmpInst; + + if (hasMips64r2()) { + + unsigned FinalOpcode = Mips::NOP; + if (ImmValue == 0) + FinalOpcode = Mips::DROTR; + else if (ImmValue % 32 == 0) + FinalOpcode = Mips::DROTR32; + else if ((ImmValue >= 1) && (ImmValue <= 32)) { + if (Inst.getOpcode() == Mips::DROLImm) + FinalOpcode = Mips::DROTR32; + else + FinalOpcode = Mips::DROTR; + } else if (ImmValue >= 33) { + if (Inst.getOpcode() == Mips::DROLImm) + FinalOpcode = Mips::DROTR; + else + FinalOpcode = Mips::DROTR32; + } + + uint64_t ShiftValue = ImmValue % 32; + if (Inst.getOpcode() == Mips::DROLImm) + ShiftValue = (32 - ImmValue % 32) % 32; + + emitRRI(FinalOpcode, DReg, SReg, ShiftValue, Inst.getLoc(), Instructions); + + return false; + } + + if (hasMips64()) { + + if (ImmValue == 0) { + emitRRI(Mips::DSRL, DReg, SReg, 0, Inst.getLoc(), Instructions); + return false; + } + + switch (Inst.getOpcode()) { + default: + llvm_unreachable("unexpected instruction opcode"); + case Mips::DROLImm: + if ((ImmValue >= 1) && (ImmValue <= 31)) { + FirstShift = Mips::DSLL; + SecondShift = Mips::DSRL32; + } + if (ImmValue == 32) { + FirstShift = Mips::DSLL32; + SecondShift = Mips::DSRL32; + } + if ((ImmValue >= 33) && (ImmValue <= 63)) { + FirstShift = Mips::DSLL32; + SecondShift = Mips::DSRL; + } + break; + case Mips::DRORImm: + if ((ImmValue >= 1) && (ImmValue <= 31)) { + FirstShift = Mips::DSRL; + SecondShift = Mips::DSLL32; + } + if (ImmValue == 32) { + FirstShift = Mips::DSRL32; + SecondShift = Mips::DSLL32; + } + if ((ImmValue >= 33) && (ImmValue <= 63)) { + FirstShift = Mips::DSRL32; + SecondShift = Mips::DSLL; + } + break; + } + + ATReg = getATReg(Inst.getLoc()); + if (!ATReg) + return true; + + emitRRI(FirstShift, ATReg, SReg, ImmValue % 32, Inst.getLoc(), Instructions); + emitRRI(SecondShift, DReg, SReg, (32 - ImmValue % 32) % 32, Inst.getLoc(), Instructions); + emitRRR(Mips::OR, DReg, DReg, ATReg, Inst.getLoc(), Instructions); + + return false; + } + + return true; +} + void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc, SmallVectorImpl &Instructions) { - MCInst NopInst; - if (hasShortDelaySlot) { - NopInst.setOpcode(Mips::MOVE16_MM); - NopInst.addOperand(MCOperand::createReg(Mips::ZERO)); - NopInst.addOperand(MCOperand::createReg(Mips::ZERO)); - } else { - NopInst.setOpcode(Mips::SLL); - NopInst.addOperand(MCOperand::createReg(Mips::ZERO)); - NopInst.addOperand(MCOperand::createReg(Mips::ZERO)); - NopInst.addOperand(MCOperand::createImm(0)); - } - Instructions.push_back(NopInst); + if (hasShortDelaySlot) + emitRR(Mips::MOVE16_MM, Mips::ZERO, Mips::ZERO, IDLoc, Instructions); + else + emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, Instructions); } void MipsAsmParser::createAddu(unsigned DstReg, unsigned SrcReg, @@ -2717,6 +3536,24 @@ void MipsAsmParser::createAddu(unsigned DstReg, unsigned SrcReg, Instructions); } +void MipsAsmParser::createCpRestoreMemOp( + bool IsLoad, int StackOffset, SMLoc IDLoc, + SmallVectorImpl &Instructions) { + // If the offset can not fit into 16 bits, we need to expand. + if (!isInt<16>(StackOffset)) { + MCInst MemInst; + MemInst.setOpcode(IsLoad ? Mips::LW : Mips::SW); + MemInst.addOperand(MCOperand::createReg(Mips::GP)); + MemInst.addOperand(MCOperand::createReg(Mips::SP)); + MemInst.addOperand(MCOperand::createImm(StackOffset)); + expandMemInst(MemInst, IDLoc, Instructions, IsLoad, true /*HasImmOpnd*/); + return; + } + + emitRRI(IsLoad ? Mips::LW : Mips::SW, Mips::GP, Mips::SP, StackOffset, IDLoc, + Instructions); +} + unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) { // As described by the Mips32r2 spec, the registers Rd and Rs for // jalr.hb must be different. @@ -2729,6 +3566,17 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) { return Match_Success; } +static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands, + uint64_t ErrorInfo) { + if (ErrorInfo != ~0ULL && ErrorInfo < Operands.size()) { + SMLoc ErrorLoc = Operands[ErrorInfo]->getStartLoc(); + if (ErrorLoc == SMLoc()) + return Loc; + return ErrorLoc; + } + return Loc; +} + bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -2745,7 +3593,7 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (processInstruction(Inst, IDLoc, Instructions)) return true; for (unsigned i = 0; i < Instructions.size(); i++) - Out.EmitInstruction(Instructions[i], STI); + Out.EmitInstruction(Instructions[i], getSTI()); return false; } case Match_MissingFeature: @@ -2757,7 +3605,7 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); - ErrorLoc = ((MipsOperand &)*Operands[ErrorInfo]).getStartLoc(); + ErrorLoc = Operands[ErrorInfo]->getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; } @@ -2768,6 +3616,58 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(IDLoc, "invalid instruction"); case Match_RequiresDifferentSrcAndDst: return Error(IDLoc, "source and destination must be different"); + case Match_Immz: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), "expected '0'"); + case Match_UImm1_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 1-bit unsigned immediate"); + case Match_UImm2_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 2-bit unsigned immediate"); + case Match_UImm2_1: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected immediate in range 1 .. 4"); + case Match_UImm3_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 3-bit unsigned immediate"); + case Match_UImm4_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 4-bit unsigned immediate"); + case Match_UImm5_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 5-bit unsigned immediate"); + case Match_UImm5_1: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected immediate in range 1 .. 32"); + case Match_UImm5_32: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected immediate in range 32 .. 63"); + case Match_UImm5_33: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected immediate in range 33 .. 64"); + case Match_UImm5_0_Report_UImm6: + // This is used on UImm5 operands that have a corresponding UImm5_32 + // operand to avoid confusing the user. + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 6-bit unsigned immediate"); + case Match_UImm5_Lsl2: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected both 7-bit unsigned immediate and multiple of 4"); + case Match_UImm6_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 6-bit unsigned immediate"); + case Match_SImm6: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 6-bit signed immediate"); + case Match_UImm7_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 7-bit unsigned immediate"); + case Match_UImm8_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 8-bit unsigned immediate"); + case Match_UImm10_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 10-bit unsigned immediate"); } llvm_unreachable("Implement any new match types added!"); @@ -3264,7 +4164,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); // Get the next token. if (Tok.isNot(AsmToken::LParen)) { MipsOperand &Mnemonic = static_cast(*Operands[0]); - if (Mnemonic.getToken() == "la") { + if (Mnemonic.getToken() == "la" || Mnemonic.getToken() == "dla") { SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(MipsOperand::CreateImm(IdVal, S, E, *this)); @@ -3598,12 +4498,15 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) { if (RegRange) { // Remove last register operand because registers from register range // should be inserted first. - if (RegNo == Mips::RA) { + if ((isGP64bit() && RegNo == Mips::RA_64) || + (!isGP64bit() && RegNo == Mips::RA)) { Regs.push_back(RegNo); } else { unsigned TmpReg = PrevReg + 1; while (TmpReg <= RegNo) { - if ((TmpReg < Mips::S0) || (TmpReg > Mips::S7)) { + if ((((TmpReg < Mips::S0) || (TmpReg > Mips::S7)) && !isGP64bit()) || + (((TmpReg < Mips::S0_64) || (TmpReg > Mips::S7_64)) && + isGP64bit())) { Error(E, "invalid register operand"); return MatchOperand_ParseFail; } @@ -3615,16 +4518,23 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) { RegRange = false; } else { - if ((PrevReg == Mips::NoRegister) && (RegNo != Mips::S0) && - (RegNo != Mips::RA)) { + if ((PrevReg == Mips::NoRegister) && + ((isGP64bit() && (RegNo != Mips::S0_64) && (RegNo != Mips::RA_64)) || + (!isGP64bit() && (RegNo != Mips::S0) && (RegNo != Mips::RA)))) { Error(E, "$16 or $31 expected"); return MatchOperand_ParseFail; - } else if (((RegNo < Mips::S0) || (RegNo > Mips::S7)) && - (RegNo != Mips::FP) && (RegNo != Mips::RA)) { + } else if (!(((RegNo == Mips::FP || RegNo == Mips::RA || + (RegNo >= Mips::S0 && RegNo <= Mips::S7)) && + !isGP64bit()) || + ((RegNo == Mips::FP_64 || RegNo == Mips::RA_64 || + (RegNo >= Mips::S0_64 && RegNo <= Mips::S7_64)) && + isGP64bit()))) { Error(E, "invalid register operand"); return MatchOperand_ParseFail; } else if ((PrevReg != Mips::NoRegister) && (RegNo != PrevReg + 1) && - (RegNo != Mips::FP) && (RegNo != Mips::RA)) { + ((RegNo != Mips::FP && RegNo != Mips::RA && !isGP64bit()) || + (RegNo != Mips::FP_64 && RegNo != Mips::RA_64 && + isGP64bit()))) { Error(E, "consecutive register numbers expected"); return MatchOperand_ParseFail; } @@ -4152,6 +5062,7 @@ bool MipsAsmParser::parseSetPopDirective() { if (AssemblerOptions.size() == 2) return reportParseError(Loc, ".set pop with no .set push"); + MCSubtargetInfo &STI = copySTI(); AssemblerOptions.pop_back(); setAvailableFeatures( ComputeAvailableFeatures(AssemblerOptions.back()->getFeatures())); @@ -4225,6 +5136,7 @@ bool MipsAsmParser::parseSetMips0Directive() { return reportParseError("unexpected token, expected end of statement"); // Reset assembler options to their initial values. + MCSubtargetInfo &STI = copySTI(); setAvailableFeatures( ComputeAvailableFeatures(AssemblerOptions.front()->getFeatures())); STI.setFeatureBits(AssemblerOptions.front()->getFeatures()); @@ -4366,6 +5278,14 @@ bool MipsAsmParser::eatComma(StringRef ErrorStr) { return true; } +// Used to determine if .cpload, .cprestore, and .cpsetup have any effect. +// In this class, it is only used for .cprestore. +// FIXME: Only keep track of IsPicEnabled in one place, instead of in both +// MipsTargetELFStreamer and MipsAsmParser. +bool MipsAsmParser::isPicAndNotNxxAbi() { + return inPicMode() && !(isABI_N32() || isABI_N64()); +} + bool MipsAsmParser::parseDirectiveCpLoad(SMLoc Loc) { if (AssemblerOptions.back()->isReorder()) Warning(Loc, ".cpload should be inside a noreorder section"); @@ -4398,6 +5318,54 @@ bool MipsAsmParser::parseDirectiveCpLoad(SMLoc Loc) { return false; } +bool MipsAsmParser::parseDirectiveCpRestore(SMLoc Loc) { + MCAsmParser &Parser = getParser(); + + // Note that .cprestore is ignored if used with the N32 and N64 ABIs or if it + // is used in non-PIC mode. + + if (inMips16Mode()) { + reportParseError(".cprestore is not supported in Mips16 mode"); + return false; + } + + // Get the stack offset value. + const MCExpr *StackOffset; + int64_t StackOffsetVal; + if (Parser.parseExpression(StackOffset)) { + reportParseError("expected stack offset value"); + return false; + } + + if (!StackOffset->evaluateAsAbsolute(StackOffsetVal)) { + reportParseError("stack offset is not an absolute expression"); + return false; + } + + if (StackOffsetVal < 0) { + Warning(Loc, ".cprestore with negative stack offset has no effect"); + IsCpRestoreSet = false; + } else { + IsCpRestoreSet = true; + CpRestoreOffset = StackOffsetVal; + } + + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + // Store the $gp on the stack. + SmallVector StoreInsts; + createCpRestoreMemOp(false /*IsLoad*/, CpRestoreOffset /*StackOffset*/, Loc, + StoreInsts); + + getTargetStreamer().emitDirectiveCpRestore(StoreInsts, CpRestoreOffset); + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + bool MipsAsmParser::parseDirectiveCPSetup() { MCAsmParser &Parser = getParser(); unsigned FuncReg; @@ -4427,16 +5395,19 @@ bool MipsAsmParser::parseDirectiveCPSetup() { ResTy = parseAnyRegister(TmpReg); if (ResTy == MatchOperand_NoMatch) { - const AsmToken &Tok = Parser.getTok(); - if (Tok.is(AsmToken::Integer)) { - Save = Tok.getIntVal(); - SaveIsReg = false; - Parser.Lex(); - } else { - reportParseError("expected save register or stack offset"); + const MCExpr *OffsetExpr; + int64_t OffsetVal; + SMLoc ExprLoc = getLexer().getLoc(); + + if (Parser.parseExpression(OffsetExpr) || + !OffsetExpr->evaluateAsAbsolute(OffsetVal)) { + reportParseError(ExprLoc, "expected save register or stack offset"); Parser.eatToEndOfStatement(); return false; } + + Save = OffsetVal; + SaveIsReg = false; } else { MipsOperand &SaveOpnd = static_cast(*TmpReg[0]); if (!SaveOpnd.isGPRAsmReg()) { @@ -4462,11 +5433,20 @@ bool MipsAsmParser::parseDirectiveCPSetup() { } const MCSymbolRefExpr *Ref = static_cast(Expr); + CpSaveLocation = Save; + CpSaveLocationIsRegister = SaveIsReg; + getTargetStreamer().emitDirectiveCpsetup(FuncReg, Save, Ref->getSymbol(), SaveIsReg); return false; } +bool MipsAsmParser::parseDirectiveCPReturn() { + getTargetStreamer().emitDirectiveCpreturn(CpSaveLocation, + CpSaveLocationIsRegister); + return false; +} + bool MipsAsmParser::parseDirectiveNaN() { MCAsmParser &Parser = getParser(); if (getLexer().isNot(AsmToken::EndOfStatement)) { @@ -4655,6 +5635,9 @@ bool MipsAsmParser::parseDirectiveOption() { StringRef Option = Tok.getIdentifier(); if (Option == "pic0") { + // MipsAsmParser needs to know if the current PIC mode changes. + IsPicEnabled = false; + getTargetStreamer().emitDirectiveOptionPic0(); Parser.Lex(); if (Parser.getTok().isNot(AsmToken::EndOfStatement)) { @@ -4666,6 +5649,9 @@ bool MipsAsmParser::parseDirectiveOption() { } if (Option == "pic2") { + // MipsAsmParser needs to know if the current PIC mode changes. + IsPicEnabled = true; + getTargetStreamer().emitDirectiveOptionPic2(); Parser.Lex(); if (Parser.getTok().isNot(AsmToken::EndOfStatement)) { @@ -4924,6 +5910,8 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".cpload") return parseDirectiveCpLoad(DirectiveID.getLoc()); + if (IDVal == ".cprestore") + return parseDirectiveCpRestore(DirectiveID.getLoc()); if (IDVal == ".dword") { parseDataDirective(8, DirectiveID.getLoc()); return false; @@ -4974,6 +5962,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { getTargetStreamer().emitDirectiveEnt(*Sym); CurrentFn = Sym; + IsCpRestoreSet = false; return false; } @@ -5002,6 +5991,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { getTargetStreamer().emitDirectiveEnd(SymbolName); CurrentFn = nullptr; + IsCpRestoreSet = false; return false; } @@ -5073,6 +6063,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { getTargetStreamer().emitFrame(StackReg, FrameSizeVal, ReturnRegOpnd.getGPR32Reg()); + IsCpRestoreSet = false; return false; } @@ -5173,6 +6164,9 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".cpsetup") return parseDirectiveCPSetup(); + if (IDVal == ".cpreturn") + return parseDirectiveCPReturn(); + if (IDVal == ".module") return parseDirectiveModule(); diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index a34ba3bd0ee8..3c1a771f97e9 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -229,6 +229,13 @@ static DecodeStatus DecodeBranchTargetMM(MCInst &Inst, uint64_t Address, const void *Decoder); +// DecodeBranchTarget26MM - Decode microMIPS branch offset, which is +// shifted left by 1 bit. +static DecodeStatus DecodeBranchTarget26MM(MCInst &Inst, + unsigned Offset, + uint64_t Address, + const void *Decoder); + // DecodeJumpTargetMM - Decode microMIPS jump target, which is // shifted left by 1 bit. static DecodeStatus DecodeJumpTargetMM(MCInst &Inst, @@ -241,17 +248,42 @@ static DecodeStatus DecodeMem(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMemEVA(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLoadByte9(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLoadByte15(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeCacheOp(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCacheOpR6(MCInst &Inst, +static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeCacheOpMM(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCacheOpMM(MCInst &Inst, +static DecodeStatus DecodeStoreEvaOpMM(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodePrefeOpMM(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); @@ -261,6 +293,11 @@ static DecodeStatus DecodeSyncI(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSynciR6(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); @@ -284,6 +321,11 @@ static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMemMMImm9(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeMemMMImm12(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -330,6 +372,11 @@ static DecodeStatus DecodeLiSimm7(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodePOOL16BEncodedField(MCInst &Inst, + unsigned Value, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeSimm4(MCInst &Inst, unsigned Value, uint64_t Address, @@ -340,23 +387,15 @@ static DecodeStatus DecodeSimm16(MCInst &Inst, uint64_t Address, const void *Decoder); -// Decode the immediate field of an LSA instruction which -// is off by one. -static DecodeStatus DecodeLSAImm(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); +template +static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeInsSize(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeExtSize(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); - static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); @@ -830,9 +869,24 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, if (IsMicroMips) { Result = readInstruction16(Bytes, Address, Size, Insn, IsBigEndian); + if (Result == MCDisassembler::Fail) + return MCDisassembler::Fail; + + if (hasMips32r6()) { + DEBUG(dbgs() << "Trying MicroMipsR616 table (16-bit instructions):\n"); + // Calling the auto-generated decoder function for microMIPS32R6 + // (and microMIPS64R6) 16-bit instructions. + Result = decodeInstruction(DecoderTableMicroMipsR616, Instr, Insn, + Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 2; + return Result; + } + } DEBUG(dbgs() << "Trying MicroMips16 table (16-bit instructions):\n"); - // Calling the auto-generated decoder function. + // Calling the auto-generated decoder function for microMIPS 16-bit + // instructions. Result = decodeInstruction(DecoderTableMicroMips16, Instr, Insn, Address, this, STI); if (Result != MCDisassembler::Fail) { @@ -847,24 +901,33 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, if (hasMips32r6()) { DEBUG(dbgs() << "Trying MicroMips32r632 table (32-bit instructions):\n"); // Calling the auto-generated decoder function. - Result = decodeInstruction(DecoderTableMicroMips32r632, Instr, Insn, Address, - this, STI); - } else { - DEBUG(dbgs() << "Trying MicroMips32 table (32-bit instructions):\n"); - // Calling the auto-generated decoder function. - Result = decodeInstruction(DecoderTableMicroMips32, Instr, Insn, Address, + Result = decodeInstruction(DecoderTableMicroMipsR632, Instr, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } } + + DEBUG(dbgs() << "Trying MicroMips32 table (32-bit instructions):\n"); + // Calling the auto-generated decoder function. + Result = decodeInstruction(DecoderTableMicroMips32, Instr, Insn, Address, + this, STI); if (Result != MCDisassembler::Fail) { Size = 4; return Result; } + // This is an invalid instruction. Let the disassembler move forward by the + // minimum instruction size. + Size = 2; return MCDisassembler::Fail; } Result = readInstruction32(Bytes, Address, Size, Insn, IsBigEndian, false); - if (Result == MCDisassembler::Fail) + if (Result == MCDisassembler::Fail) { + Size = 4; return MCDisassembler::Fail; + } if (hasCOP3()) { DEBUG(dbgs() << "Trying COP3_ table (32-bit opcodes):\n"); @@ -925,6 +988,7 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, return Result; } + Size = 4; return MCDisassembler::Fail; } @@ -1079,10 +1143,66 @@ static DecodeStatus DecodeMem(MCInst &Inst, Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); Base = getReg(Decoder, Mips::GPR32RegClassID, Base); - if(Inst.getOpcode() == Mips::SC || - Inst.getOpcode() == Mips::SCD){ + if (Inst.getOpcode() == Mips::SC || + Inst.getOpcode() == Mips::SCD) Inst.addOperand(MCOperand::createReg(Reg)); - } + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeMemEVA(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<9>(Insn >> 7); + unsigned Reg = fieldFromInstruction(Insn, 16, 5); + unsigned Base = fieldFromInstruction(Insn, 21, 5); + + Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); + Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + + if (Inst.getOpcode() == Mips::SCE) + Inst.addOperand(MCOperand::createReg(Reg)); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLoadByte9(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<9>(Insn & 0x1ff); + unsigned Base = fieldFromInstruction(Insn, 16, 5); + unsigned Reg = fieldFromInstruction(Insn, 21, 5); + + Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLoadByte15(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<16>(Insn & 0xffff); + unsigned Base = fieldFromInstruction(Insn, 16, 5); + unsigned Reg = fieldFromInstruction(Insn, 21, 5); + + Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); Inst.addOperand(MCOperand::createReg(Reg)); Inst.addOperand(MCOperand::createReg(Base)); @@ -1125,11 +1245,28 @@ static DecodeStatus DecodeCacheOpMM(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeCacheOpR6(MCInst &Inst, +static DecodeStatus DecodePrefeOpMM(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - int Offset = fieldFromInstruction(Insn, 7, 9); + int Offset = SignExtend32<9>(Insn & 0x1ff); + unsigned Base = fieldFromInstruction(Insn, 16, 5); + unsigned Hint = fieldFromInstruction(Insn, 21, 5); + + Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset)); + Inst.addOperand(MCOperand::createImm(Hint)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<9>(Insn >> 7); unsigned Hint = fieldFromInstruction(Insn, 16, 5); unsigned Base = fieldFromInstruction(Insn, 21, 5); @@ -1142,6 +1279,24 @@ static DecodeStatus DecodeCacheOpR6(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeStoreEvaOpMM(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<9>(Insn & 0x1ff); + unsigned Reg = fieldFromInstruction(Insn, 21, 5); + unsigned Base = fieldFromInstruction(Insn, 16, 5); + + Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); + Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + static DecodeStatus DecodeSyncI(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -1157,6 +1312,21 @@ static DecodeStatus DecodeSyncI(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeSynciR6(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Immediate = SignExtend32<16>(Insn & 0xffff); + unsigned Base = fieldFromInstruction(Insn, 16, 5); + + Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Immediate)); + + return MCDisassembler::Success; +} + static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 16, 10)); @@ -1220,8 +1390,11 @@ static DecodeStatus DecodeMemMMImm4(MCInst &Inst, return MCDisassembler::Fail; break; case Mips::SB16_MM: + case Mips::SB16_MMR6: case Mips::SH16_MM: + case Mips::SH16_MMR6: case Mips::SW16_MM: + case Mips::SW16_MMR6: if (DecodeGPRMM16ZeroRegisterClass(Inst, Reg, Address, Decoder) == MCDisassembler::Fail) return MCDisassembler::Fail; @@ -1240,14 +1413,17 @@ static DecodeStatus DecodeMemMMImm4(MCInst &Inst, Inst.addOperand(MCOperand::createImm(Offset)); break; case Mips::SB16_MM: + case Mips::SB16_MMR6: Inst.addOperand(MCOperand::createImm(Offset)); break; case Mips::LHU16_MM: case Mips::SH16_MM: + case Mips::SH16_MMR6: Inst.addOperand(MCOperand::createImm(Offset << 1)); break; case Mips::LW16_MM: case Mips::SW16_MM: + case Mips::SW16_MMR6: Inst.addOperand(MCOperand::createImm(Offset << 2)); break; } @@ -1291,7 +1467,16 @@ static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - int Offset = SignExtend32<4>(Insn & 0xf); + int Offset; + switch (Inst.getOpcode()) { + case Mips::LWM16_MMR6: + case Mips::SWM16_MMR6: + Offset = fieldFromInstruction(Insn, 4, 4); + break; + default: + Offset = SignExtend32<4>(Insn & 0xf); + break; + } if (DecodeRegListOperand16(Inst, Insn, Address, Decoder) == MCDisassembler::Fail) @@ -1303,6 +1488,27 @@ static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeMemMMImm9(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<9>(Insn & 0x1ff); + unsigned Reg = fieldFromInstruction(Insn, 21, 5); + unsigned Base = fieldFromInstruction(Insn, 16, 5); + + Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); + Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + + if (Inst.getOpcode() == Mips::SCE_MM) + Inst.addOperand(MCOperand::createReg(Reg)); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + static DecodeStatus DecodeMemMMImm12(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -1659,6 +1865,16 @@ static DecodeStatus DecodeBranchTargetMM(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeBranchTarget26MM(MCInst &Inst, + unsigned Offset, + uint64_t Address, + const void *Decoder) { + int32_t BranchOffset = SignExtend32<26>(Offset) << 1; + + Inst.addOperand(MCOperand::createImm(BranchOffset)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeJumpTargetMM(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -1700,6 +1916,14 @@ static DecodeStatus DecodeLiSimm7(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodePOOL16BEncodedField(MCInst &Inst, + unsigned Value, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::createImm(Value == 0x0 ? 8 : Value)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeSimm4(MCInst &Inst, unsigned Value, uint64_t Address, @@ -1716,12 +1940,12 @@ static DecodeStatus DecodeSimm16(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeLSAImm(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { - // We add one to the immediate field as it was encoded as 'imm - 1'. - Inst.addOperand(MCOperand::createImm(Insn + 1)); +template +static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value, + uint64_t Address, + const void *Decoder) { + Value &= ((1 << Bits) - 1); + Inst.addOperand(MCOperand::createImm(Value + Offset)); return MCDisassembler::Success; } @@ -1736,15 +1960,6 @@ static DecodeStatus DecodeInsSize(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeExtSize(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { - int Size = (int) Insn + 1; - Inst.addOperand(MCOperand::createImm(SignExtend32<16>(Size))); - return MCDisassembler::Success; -} - static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::createImm(SignExtend32<19>(Insn) * 4)); @@ -1792,15 +2007,21 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, uint64_t Address, const void *Decoder) { unsigned Regs[] = {Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5, - Mips::S6, Mips::FP}; + Mips::S6, Mips::S7, Mips::FP}; unsigned RegNum; unsigned RegLst = fieldFromInstruction(Insn, 21, 5); + // Empty register lists are not allowed. if (RegLst == 0) return MCDisassembler::Fail; RegNum = RegLst & 0xf; + + // RegLst values 10-15, and 26-31 are reserved. + if (RegNum > 9) + return MCDisassembler::Fail; + for (unsigned i = 0; i < RegNum; i++) Inst.addOperand(MCOperand::createReg(Regs[i])); @@ -1814,7 +2035,16 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Regs[] = {Mips::S0, Mips::S1, Mips::S2, Mips::S3}; - unsigned RegLst = fieldFromInstruction(Insn, 4, 2); + unsigned RegLst; + switch(Inst.getOpcode()) { + default: + RegLst = fieldFromInstruction(Insn, 4, 2); + break; + case Mips::LWM16_MMR6: + case Mips::SWM16_MMR6: + RegLst = fieldFromInstruction(Insn, 8, 2); + break; + } unsigned RegNum = RegLst & 0x3; for (unsigned i = 0; i <= RegNum; i++) diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index a5637b16b636..a7b7d2e080ee 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -235,7 +235,9 @@ printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) { case Mips::SWM32_MM: case Mips::LWM32_MM: case Mips::SWM16_MM: + case Mips::SWM16_MMR6: case Mips::LWM16_MM: + case Mips::LWM16_MMR6: opNum = MI->getNumOperands() - 2; break; } diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index 713f35c70830..0e61ea61899a 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -73,8 +73,6 @@ enum CondCode { const char *MipsFCCToString(Mips::CondCode CC); } // end namespace Mips -class TargetMachine; - class MipsInstPrinter : public MCInstPrinter { public: MipsInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp index 8e6c9e69b223..cdcc3923b81e 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp @@ -23,7 +23,7 @@ static const MCPhysReg Mips64IntRegs[8] = { Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64}; } -const ArrayRef MipsABIInfo::GetByValArgRegs() const { +ArrayRef MipsABIInfo::GetByValArgRegs() const { if (IsO32()) return makeArrayRef(O32IntRegs); if (IsN32() || IsN64()) @@ -31,7 +31,7 @@ const ArrayRef MipsABIInfo::GetByValArgRegs() const { llvm_unreachable("Unhandled ABI"); } -const ArrayRef MipsABIInfo::GetVarArgRegs() const { +ArrayRef MipsABIInfo::GetVarArgRegs() const { if (IsO32()) return makeArrayRef(O32IntRegs); if (IsN32() || IsN64()) @@ -78,7 +78,6 @@ MipsABIInfo MipsABIInfo::computeTargetABI(const Triple &TT, StringRef CPU, .Case("mips32r3", MipsABIInfo::O32()) .Case("mips32r5", MipsABIInfo::O32()) .Case("mips32r6", MipsABIInfo::O32()) - .Case("mips16", MipsABIInfo::O32()) .Case("mips3", MipsABIInfo::N64()) .Case("mips4", MipsABIInfo::N64()) .Case("mips5", MipsABIInfo::N64()) @@ -107,6 +106,10 @@ unsigned MipsABIInfo::GetNullPtr() const { return ArePtrs64bit() ? Mips::ZERO_64 : Mips::ZERO; } +unsigned MipsABIInfo::GetZeroReg() const { + return AreGprs64bit() ? Mips::ZERO_64 : Mips::ZERO; +} + unsigned MipsABIInfo::GetPtrAdduOp() const { return ArePtrs64bit() ? Mips::DADDu : Mips::ADDu; } @@ -115,6 +118,10 @@ unsigned MipsABIInfo::GetPtrAddiuOp() const { return ArePtrs64bit() ? Mips::DADDiu : Mips::ADDiu; } +unsigned MipsABIInfo::GetGPRMoveOp() const { + return ArePtrs64bit() ? Mips::OR64 : Mips::OR; +} + unsigned MipsABIInfo::GetEhDataReg(unsigned I) const { static const unsigned EhDataReg[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h index 40c5681acc17..ffa2c765e79b 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h @@ -47,10 +47,10 @@ public: ABI GetEnumValue() const { return ThisABI; } /// The registers to use for byval arguments. - const ArrayRef GetByValArgRegs() const; + ArrayRef GetByValArgRegs() const; /// The registers to use for the variable argument list. - const ArrayRef GetVarArgRegs() const; + ArrayRef GetVarArgRegs() const; /// Obtain the size of the area allocated by the callee for arguments. /// CallingConv::FastCall affects the value for O32. @@ -67,9 +67,12 @@ public: unsigned GetFramePtr() const; unsigned GetBasePtr() const; unsigned GetNullPtr() const; + unsigned GetZeroReg() const; unsigned GetPtrAdduOp() const; unsigned GetPtrAddiuOp() const; + unsigned GetGPRMoveOp() const; inline bool ArePtrs64bit() const { return IsN64(); } + inline bool AreGprs64bit() const { return IsN32() || IsN64(); } unsigned GetEhDataReg(unsigned I) const; }; diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 328e71720cac..e4865e2455ee 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -63,15 +63,19 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // address range. Forcing a signed division because Value can be negative. Value = (int64_t)Value / 4; // We now check if Value can be encoded as a 16-bit signed immediate. - if (!isIntN(16, Value) && Ctx) - Ctx->reportFatalError(Fixup.getLoc(), "out of range PC16 fixup"); + if (!isInt<16>(Value) && Ctx) { + Ctx->reportError(Fixup.getLoc(), "out of range PC16 fixup"); + return 0; + } break; case Mips::fixup_MIPS_PC19_S2: // Forcing a signed division because Value can be negative. Value = (int64_t)Value / 4; // We now check if Value can be encoded as a 19-bit signed immediate. - if (!isIntN(19, Value) && Ctx) - Ctx->reportFatalError(Fixup.getLoc(), "out of range PC19 fixup"); + if (!isInt<19>(Value) && Ctx) { + Ctx->reportError(Fixup.getLoc(), "out of range PC19 fixup"); + return 0; + } break; case Mips::fixup_Mips_26: // So far we are only using this type for jumps. @@ -104,45 +108,57 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // Forcing a signed division because Value can be negative. Value = (int64_t) Value / 2; // We now check if Value can be encoded as a 7-bit signed immediate. - if (!isIntN(7, Value) && Ctx) - Ctx->reportFatalError(Fixup.getLoc(), "out of range PC7 fixup"); + if (!isInt<7>(Value) && Ctx) { + Ctx->reportError(Fixup.getLoc(), "out of range PC7 fixup"); + return 0; + } break; case Mips::fixup_MICROMIPS_PC10_S1: Value -= 2; // Forcing a signed division because Value can be negative. Value = (int64_t) Value / 2; // We now check if Value can be encoded as a 10-bit signed immediate. - if (!isIntN(10, Value) && Ctx) - Ctx->reportFatalError(Fixup.getLoc(), "out of range PC10 fixup"); + if (!isInt<10>(Value) && Ctx) { + Ctx->reportError(Fixup.getLoc(), "out of range PC10 fixup"); + return 0; + } break; case Mips::fixup_MICROMIPS_PC16_S1: Value -= 4; // Forcing a signed division because Value can be negative. Value = (int64_t)Value / 2; // We now check if Value can be encoded as a 16-bit signed immediate. - if (!isIntN(16, Value) && Ctx) - Ctx->reportFatalError(Fixup.getLoc(), "out of range PC16 fixup"); + if (!isInt<16>(Value) && Ctx) { + Ctx->reportError(Fixup.getLoc(), "out of range PC16 fixup"); + return 0; + } break; case Mips::fixup_MIPS_PC18_S3: // Forcing a signed division because Value can be negative. Value = (int64_t)Value / 8; // We now check if Value can be encoded as a 18-bit signed immediate. - if (!isIntN(18, Value) && Ctx) - Ctx->reportFatalError(Fixup.getLoc(), "out of range PC18 fixup"); + if (!isInt<18>(Value) && Ctx) { + Ctx->reportError(Fixup.getLoc(), "out of range PC18 fixup"); + return 0; + } break; case Mips::fixup_MIPS_PC21_S2: // Forcing a signed division because Value can be negative. Value = (int64_t) Value / 4; // We now check if Value can be encoded as a 21-bit signed immediate. - if (!isIntN(21, Value) && Ctx) - Ctx->reportFatalError(Fixup.getLoc(), "out of range PC21 fixup"); + if (!isInt<21>(Value) && Ctx) { + Ctx->reportError(Fixup.getLoc(), "out of range PC21 fixup"); + return 0; + } break; case Mips::fixup_MIPS_PC26_S2: // Forcing a signed division because Value can be negative. Value = (int64_t) Value / 4; // We now check if Value can be encoded as a 26-bit signed immediate. - if (!isIntN(26, Value) && Ctx) - Ctx->reportFatalError(Fixup.getLoc(), "out of range PC26 fixup"); + if (!isInt<26>(Value) && Ctx) { + Ctx->reportError(Fixup.getLoc(), "out of range PC26 fixup"); + return 0; + } break; } @@ -232,6 +248,18 @@ void MipsAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, } } +bool MipsAsmBackend::getFixupKind(StringRef Name, MCFixupKind &MappedKind) const { + if (Name == "R_MIPS_NONE") { + MappedKind = (MCFixupKind)Mips::fixup_Mips_NONE; + return true; + } + if (Name == "R_MIPS_32") { + MappedKind = FK_Data_4; + return true; + } + return MCAsmBackend::getFixupKind(Name, MappedKind); +} + const MCFixupKindInfo &MipsAsmBackend:: getFixupKindInfo(MCFixupKind Kind) const { const static MCFixupKindInfo LittleEndianInfos[Mips::NumTargetFixupKinds] = { @@ -239,6 +267,7 @@ getFixupKindInfo(MCFixupKind Kind) const { // MipsFixupKinds.h. // // name offset bits flags + { "fixup_Mips_NONE", 0, 0, 0 }, { "fixup_Mips_16", 0, 16, 0 }, { "fixup_Mips_32", 0, 32, 0 }, { "fixup_Mips_REL32", 0, 32, 0 }, @@ -304,6 +333,7 @@ getFixupKindInfo(MCFixupKind Kind) const { // MipsFixupKinds.h. // // name offset bits flags + { "fixup_Mips_NONE", 0, 0, 0 }, { "fixup_Mips_16", 16, 16, 0 }, { "fixup_Mips_32", 0, 32, 0 }, { "fixup_Mips_REL32", 0, 32, 0 }, diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index b3d5a4964f86..1c9af9227ffe 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -41,6 +41,7 @@ public: void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const override; + bool getFixupKind(StringRef Name, MCFixupKind &MappedKind) const override; const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; unsigned getNumFixupKinds() const override { diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 9b2952720edd..5b9f02b89be5 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -68,6 +68,8 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, unsigned Kind = (unsigned)Fixup.getKind(); switch (Kind) { + case Mips::fixup_Mips_NONE: + return ELF::R_MIPS_NONE; case Mips::fixup_Mips_16: case FK_Data_2: return IsPCRel ? ELF::R_MIPS_PC16 : ELF::R_MIPS_16; @@ -325,13 +327,24 @@ static void setMatch(MipsRelocationEntry &Hi, MipsRelocationEntry &Lo) { // matching LO; // - prefer LOs without a pair; // - prefer LOs with higher offset; + +static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) { + const ELFRelocationEntry &A = *AP; + const ELFRelocationEntry &B = *BP; + if (A.Offset != B.Offset) + return B.Offset - A.Offset; + if (B.Type != A.Type) + return A.Type - B.Type; + return 0; +} + void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm, std::vector &Relocs) { if (Relocs.size() < 2) return; - // The default function sorts entries by Offset in descending order. - MCELFObjectTargetWriter::sortRelocs(Asm, Relocs); + // Sorts entries by Offset in descending order. + array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel); // Init MipsRelocs from Relocs. std::vector MipsRelocs; diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index b45d9cf621d7..e7d687e89a8a 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -63,7 +63,7 @@ void MipsELFStreamer::SwitchSection(MCSection *Section, } void MipsELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, - const SMLoc &Loc) { + SMLoc Loc) { MCELFStreamer::EmitValueImpl(Value, Size, Loc); Labels.clear(); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h index af9311fa4288..a241cdebdcc8 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h @@ -60,8 +60,7 @@ public: /// Overriding this function allows us to dismiss all labels that are /// candidates for marking as microMIPS when .word directive is emitted. - void EmitValueImpl(const MCExpr *Value, unsigned Size, - const SMLoc &Loc) override; + void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override; /// Emits all the option records stored up until the point it's called. void EmitMipsOptionRecords(); diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index e601963264de..3652f4bab0d4 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -23,8 +23,11 @@ namespace Mips { // in MipsAsmBackend.cpp. // enum Fixups { + // Branch fixups resulting in R_MIPS_NONE. + fixup_Mips_NONE = FirstTargetFixupKind, + // Branch fixups resulting in R_MIPS_16. - fixup_Mips_16 = FirstTargetFixupKind, + fixup_Mips_16, // Pure 32 bit data fixup resulting in - R_MIPS_32. fixup_Mips_32, diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h index 5d23fcbd7a44..d4ccf0349c16 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h @@ -17,13 +17,14 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { - class Triple; +class Triple; - class MipsMCAsmInfo : public MCAsmInfoELF { - void anchor() override; - public: - explicit MipsMCAsmInfo(const Triple &TheTriple); - }; +class MipsMCAsmInfo : public MCAsmInfoELF { + void anchor() override; + +public: + explicit MipsMCAsmInfo(const Triple &TheTriple); +}; } // namespace llvm diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index e36263d54ca4..4b030ebfce8c 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -190,6 +190,10 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, else NewOpcode = Mips::Std2MicroMips(Opcode, Mips::Arch_micromips); + // Check whether it is Dsp instruction. + if (NewOpcode == -1) + NewOpcode = Mips::Dsp2MicroMips(Opcode, Mips::Arch_mmdsp); + if (NewOpcode != -1) { if (Fixups.size() > N) Fixups.pop_back(); @@ -346,6 +350,23 @@ getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo, return 0; } +/// getBranchTarget26OpValueMM - Return binary encoding of the branch +/// target operand. If the machine operand requires relocation, +/// record the relocation and return zero. +unsigned MipsMCCodeEmitter::getBranchTarget26OpValueMM( + const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + + const MCOperand &MO = MI.getOperand(OpNo); + + // If the destination is an immediate, divide by 2. + if (MO.isImm()) + return MO.getImm() >> 1; + + // TODO: Push 26 PC fixup. + return 0; +} + /// getJumpOffset16OpValue - Return binary encoding of the jump /// target operand. If the machine operand requires relocation, /// record the relocation and return zero. @@ -745,7 +766,8 @@ getMemEncodingMMSPImm5Lsl2(const MCInst &MI, unsigned OpNo, const MCSubtargetInfo &STI) const { // Register is encoded in bits 9-5, offset is encoded in bits 4-0. assert(MI.getOperand(OpNo).isReg() && - MI.getOperand(OpNo).getReg() == Mips::SP && + (MI.getOperand(OpNo).getReg() == Mips::SP || + MI.getOperand(OpNo).getReg() == Mips::SP_64) && "Unexpected base register!"); unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) >> 2; @@ -768,6 +790,19 @@ getMemEncodingMMGPImm7Lsl2(const MCInst &MI, unsigned OpNo, return OffBits & 0x7F; } +unsigned MipsMCCodeEmitter:: +getMemEncodingMMImm9(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Base register is encoded in bits 20-16, offset is encoded in bits 8-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, + STI) << 16; + unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI); + + return (OffBits & 0x1FF) | RegBits; +} + unsigned MipsMCCodeEmitter:: getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, @@ -791,6 +826,19 @@ getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo, return (OffBits & 0x0FFF) | RegBits; } +unsigned MipsMCCodeEmitter:: +getMemEncodingMMImm16(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Base register is encoded in bits 20-16, offset is encoded in bits 15-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, + STI) << 16; + unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); + + return (OffBits & 0xFFFF) | RegBits; +} + unsigned MipsMCCodeEmitter:: getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, @@ -801,7 +849,9 @@ getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo, default: break; case Mips::SWM16_MM: + case Mips::SWM16_MMR6: case Mips::LWM16_MM: + case Mips::LWM16_MMR6: OpNo = MI.getNumOperands() - 2; break; } @@ -815,15 +865,6 @@ getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo, return ((OffBits >> 2) & 0x0F); } -unsigned -MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - assert(MI.getOperand(OpNo).isImm()); - unsigned SizeEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI); - return SizeEncoding - 1; -} - // FIXME: should be called getMSBEncoding // unsigned @@ -838,13 +879,15 @@ MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo, return Position + Size - 1; } +template unsigned -MipsMCCodeEmitter::getLSAImmEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +MipsMCCodeEmitter::getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { assert(MI.getOperand(OpNo).isImm()); - // The immediate is encoded as 'immediate - 1'. - return getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) - 1; + unsigned Value = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI); + Value -= Offset; + return Value; } unsigned diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h index 911cc2f77a45..fdacd172e3a2 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h @@ -137,6 +137,13 @@ public: SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + // getBranchTarget26OpValueMM - Return binary encoding of the branch + // offset operand. If the machine operand requires relocation, + // record the relocation and return zero. + unsigned getBranchTarget26OpValueMM(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + // getJumpOffset16OpValue - Return binary encoding of the jump // offset operand. If the machine operand requires relocation, // record the relocation and return zero. @@ -172,23 +179,27 @@ public: unsigned getMemEncodingMMGPImm7Lsl2(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + unsigned getMemEncodingMMImm9(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; unsigned getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + unsigned getMemEncodingMMImm16(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; unsigned getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; - unsigned getSizeExtEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; - // getLSAImmEncoding - Return binary encoding of LSA immediate. - unsigned getLSAImmEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + /// Subtract Offset then encode as a N-bit unsigned integer. + template + unsigned getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; unsigned getSimm19Lsl2Encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h index fd2ed17ee785..e889972c5c0e 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h @@ -51,8 +51,8 @@ public: const MCAsmLayout *Layout, const MCFixup *Fixup) const override; void visitUsedExpr(MCStreamer &Streamer) const override; - MCSection *findAssociatedSection() const override { - return getSubExpr()->findAssociatedSection(); + MCFragment *findAssociatedFragment() const override { + return getSubExpr()->findAssociatedFragment(); } // There are no TLS MipsMCExprs at the moment. diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index e4da2df75d47..e5fa7556053f 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -89,9 +89,15 @@ void MipsTargetStreamer::emitDirectiveSetHardFloat() { void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {} +void MipsTargetStreamer::emitDirectiveCpRestore( + SmallVector &StoreInsts, int Offset) { + forbidModuleDirective(); +} void MipsTargetStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, bool IsReg) { } +void MipsTargetStreamer::emitDirectiveCpreturn(unsigned SaveLocation, + bool SaveLocationIsRegister) {} void MipsTargetStreamer::emitDirectiveModuleFP() {} @@ -358,6 +364,12 @@ void MipsTargetAsmStreamer::emitDirectiveCpLoad(unsigned RegNo) { forbidModuleDirective(); } +void MipsTargetAsmStreamer::emitDirectiveCpRestore( + SmallVector &StoreInsts, int Offset) { + MipsTargetStreamer::emitDirectiveCpRestore(StoreInsts, Offset); + OS << "\t.cprestore\t" << Offset << "\n"; +} + void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, @@ -373,7 +385,13 @@ void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo, OS << ", "; - OS << Sym.getName() << "\n"; + OS << Sym.getName(); + forbidModuleDirective(); +} + +void MipsTargetAsmStreamer::emitDirectiveCpreturn(unsigned SaveLocation, + bool SaveLocationIsRegister) { + OS << "\t.cpreturn"; forbidModuleDirective(); } @@ -595,8 +613,9 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) { MCSectionELF *Sec = Context.getELFSection(".pdr", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHT_REL); + MCSymbol *Sym = Context.getOrCreateSymbol(Name); const MCSymbolRefExpr *ExprRef = - MCSymbolRefExpr::create(Name, MCSymbolRefExpr::VK_None, Context); + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Context); MCA.registerSection(*Sec); Sec->setAlignment(4); @@ -622,10 +641,25 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) { GPRInfoSet = FPRInfoSet = FrameInfoSet = false; OS.PopSection(); + + // .end also implicitly sets the size. + MCSymbol *CurPCSym = Context.createTempSymbol(); + OS.EmitLabel(CurPCSym); + const MCExpr *Size = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(CurPCSym, MCSymbolRefExpr::VK_None, Context), + ExprRef, Context); + int64_t AbsSize; + if (!Size->evaluateAsAbsolute(AbsSize, MCA)) + llvm_unreachable("Function size must be evaluatable as absolute"); + Size = MCConstantExpr::create(AbsSize, Context); + static_cast(Sym)->setSize(Size); } void MipsTargetELFStreamer::emitDirectiveEnt(const MCSymbol &Symbol) { GPRInfoSet = FPRInfoSet = FrameInfoSet = false; + + // .ent also acts like an implicit '.type symbol, STT_FUNC' + static_cast(Symbol).setType(ELF::STT_FUNC); } void MipsTargetELFStreamer::emitDirectiveAbiCalls() { @@ -752,6 +786,24 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) { forbidModuleDirective(); } +void MipsTargetELFStreamer::emitDirectiveCpRestore( + SmallVector &StoreInsts, int Offset) { + MipsTargetStreamer::emitDirectiveCpRestore(StoreInsts, Offset); + // .cprestore offset + // When PIC mode is enabled and the O32 ABI is used, this directive expands + // to: + // sw $gp, offset($sp) + // and adds a corresponding LW after every JAL. + + // Note that .cprestore is ignored if used with the N32 and N64 ABIs or if it + // is used in non-PIC mode. + if (!Pic || (getABI().IsN32() || getABI().IsN64())) + return; + + for (const MCInst &Inst : StoreInsts) + getStreamer().EmitInstruction(Inst, STI); +} + void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, @@ -766,7 +818,7 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, // Either store the old $gp in a register or on the stack if (IsReg) { // move $save, $gpreg - Inst.setOpcode(Mips::DADDu); + Inst.setOpcode(Mips::OR64); Inst.addOperand(MCOperand::createReg(RegOrOffset)); Inst.addOperand(MCOperand::createReg(Mips::GP)); Inst.addOperand(MCOperand::createReg(Mips::ZERO)); @@ -810,6 +862,30 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, forbidModuleDirective(); } +void MipsTargetELFStreamer::emitDirectiveCpreturn(unsigned SaveLocation, + bool SaveLocationIsRegister) { + // Only N32 and N64 emit anything for .cpreturn iff PIC is set. + if (!Pic || !(getABI().IsN32() || getABI().IsN64())) + return; + + MCInst Inst; + // Either restore the old $gp from a register or on the stack + if (SaveLocationIsRegister) { + Inst.setOpcode(Mips::OR); + Inst.addOperand(MCOperand::createReg(Mips::GP)); + Inst.addOperand(MCOperand::createReg(SaveLocation)); + Inst.addOperand(MCOperand::createReg(Mips::ZERO)); + } else { + Inst.setOpcode(Mips::LD); + Inst.addOperand(MCOperand::createReg(Mips::GP)); + Inst.addOperand(MCOperand::createReg(Mips::SP)); + Inst.addOperand(MCOperand::createImm(SaveLocation)); + } + getStreamer().EmitInstruction(Inst, STI); + + forbidModuleDirective(); +} + void MipsTargetELFStreamer::emitMipsAbiFlags() { MCAssembler &MCA = getStreamer().getAssembler(); MCContext &Context = MCA.getContext(); diff --git a/lib/Target/Mips/MicroMips32r6InstrFormats.td b/lib/Target/Mips/MicroMips32r6InstrFormats.td index 187a022b2563..400f6eef3fb0 100644 --- a/lib/Target/Mips/MicroMips32r6InstrFormats.td +++ b/lib/Target/Mips/MicroMips32r6InstrFormats.td @@ -16,6 +16,64 @@ class MMR6Arch { string BaseOpcode = opstr; } +// Class used for microMIPS32r6 and microMIPS64r6 instructions. +class MicroMipsR6Inst16 : PredicateControl { + string DecoderNamespace = "MicroMipsR6"; + let InsnPredicates = [HasMicroMips32r6]; +} + +class BC16_FM_MM16R6 { + bits<10> offset; + + bits<16> Inst; + + let Inst{15-10} = 0x33; + let Inst{9-0} = offset; +} + +class BEQZC_BNEZC_FM_MM16R6 op> : MicroMipsR6Inst16 { + bits<3> rs; + bits<7> offset; + + bits<16> Inst; + + let Inst{15-10} = op; + let Inst{9-7} = rs; + let Inst{6-0} = offset; +} + +class POOL16C_JALRC_FM_MM16R6 op> { + bits<5> rs; + + bits<16> Inst; + + let Inst{15-10} = 0x11; + let Inst{9-5} = rs; + let Inst{4-0} = op; +} + +class POOL16C_JRCADDIUSP_FM_MM16R6 op> { + bits<5> imm; + + bits<16> Inst; + + let Inst{15-10} = 0x11; + let Inst{9-5} = imm; + let Inst{4-0} = op; +} + +class POOL16C_LWM_SWM_FM_MM16R6 funct> { + bits<2> rt; + bits<4> addr; + + bits<16> Inst; + + let Inst{15-10} = 0x11; + let Inst{9-8} = rt; + let Inst{7-4} = addr; + let Inst{3-0} = funct; +} + class POOL32A_BITSWAP_FM_MMR6 funct> : MipsR6Inst { bits<5> rd; bits<5> rt; @@ -71,6 +129,64 @@ class ADDI_FM_MMR6 op> : MMR6Arch { let Inst{15-0} = imm16; } +class POOL32C_ST_EVA_FM_MMR6 op, bits<3> funct> : MipsR6Inst { + bits<21> addr; + bits<5> hint; + bits<5> base = addr{20-16}; + bits<9> offset = addr{8-0}; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = hint; + let Inst{20-16} = base; + let Inst{15-12} = 0b1010; + let Inst{11-9} = funct; + let Inst{8-0} = offset; +} + +class LB32_FM_MMR6 : MipsR6Inst { + bits<21> addr; + bits<5> rt; + bits<5> base = addr{20-16}; + bits<16> offset = addr{15-0}; + + bits<32> Inst; + + let Inst{31-26} = 0b000111; + let Inst{25-21} = rt; + let Inst{20-16} = base; + let Inst{15-0} = offset; +} + +class LBU32_FM_MMR6 : MipsR6Inst { + bits<21> addr; + bits<5> rt; + bits<5> base = addr{20-16}; + bits<16> offset = addr{15-0}; + + bits<32> Inst; + + let Inst{31-26} = 0b000101; + let Inst{25-21} = rt; + let Inst{20-16} = base; + let Inst{15-0} = offset; +} + +class POOL32C_LB_LBU_FM_MMR6 funct> : MipsR6Inst { + bits<21> addr; + bits<5> rt; + + bits<32> Inst; + + let Inst{31-26} = 0b011000; + let Inst{25-21} = rt; + let Inst{20-16} = addr{20-16}; + let Inst{15-12} = 0b0110; + let Inst{11-9} = funct; + let Inst{8-0} = addr{8-0}; +} + class SIGN_EXTEND_FM_MMR6 funct> : MMR6Arch { bits<5> rd; @@ -124,6 +240,69 @@ class POOL32A_FM_MMR6 funct> : MipsR6Inst { let Inst{9-0} = funct; } +class POOL32A_PAUSE_FM_MMR6 op> : MMR6Arch { + bits<32> Inst; + + let Inst{31-26} = 0; + let Inst{25-21} = 0; + let Inst{20-16} = 0; + let Inst{15-11} = op; + let Inst{10-6} = 0; + let Inst{5-0} = 0; +} + +class POOL32A_RDPGPR_FM_MMR6 funct> { + bits<5> rt; + bits<5> rd; + bits<32> Inst; + + let Inst{31-26} = 0; + let Inst{25-21} = rt; + let Inst{20-16} = rd; + let Inst{15-6} = funct; + let Inst{5-0} = 0b111100; +} + +class POOL32A_RDHWR_FM_MMR6 { + bits<5> rt; + bits<5> rs; + bits<3> sel; + bits<32> Inst; + + let Inst{31-26} = 0; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-14} = 0; + let Inst{13-11} = sel; + let Inst{10} = 0; + let Inst{9-0} = 0b0111000000; +} + +class POOL32A_SYNC_FM_MMR6 { + bits<5> stype; + + bits<32> Inst; + + let Inst{31-26} = 0; + let Inst{25-21} = 0; + let Inst{20-16} = stype; + let Inst{15-6} = 0b0110101101; + let Inst{5-0} = 0b111100; +} + +class POOL32I_SYNCI_FM_MMR6 { + bits<21> addr; + bits<5> base = addr{20-16}; + bits<16> immediate = addr{15-0}; + + bits<32> Inst; + + let Inst{31-26} = 0b010000; + let Inst{25-21} = 0b01100; + let Inst{20-16} = base; + let Inst{15-0} = immediate; +} + class POOL32A_2R_FM_MMR6 funct> : MipsR6Inst { bits<5> rs; bits<5> rt; @@ -198,6 +377,78 @@ class POOL32A_LSA_FM funct> : MipsR6Inst { let Inst{5-0} = funct; } +class SB32_SH32_STORE_FM_MMR6 op> { + bits<5> rt; + bits<21> addr; + bits<5> base = addr{20-16}; + bits<16> offset = addr{15-0}; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = rt; + let Inst{20-16} = base; + let Inst{15-0} = offset; +} + +class POOL32C_STORE_EVA_FM_MMR6 funct> { + bits<5> rt; + bits<21> addr; + bits<5> base = addr{20-16}; + bits<9> offset = addr{8-0}; + + bits<32> Inst; + + let Inst{31-26} = 0b011000; + let Inst{25-21} = rt; + let Inst{20-16} = base; + let Inst{15-12} = 0b1010; + let Inst{11-9} = funct; + let Inst{8-0} = offset; +} + +class LOAD_WORD_EVA_FM_MMR6 funct> { + bits<5> rt; + bits<21> addr; + bits<5> base = addr{20-16}; + bits<9> offset = addr{8-0}; + + bits<32> Inst; + + let Inst{31-26} = 0b011000; + let Inst{25-21} = rt; + let Inst{20-16} = base; + let Inst{15-12} = 0b0110; + let Inst{11-9} = funct; + let Inst{8-0} = offset; +} + +class LOAD_WORD_FM_MMR6 { + bits<5> rt; + bits<21> addr; + bits<5> base = addr{20-16}; + bits<16> offset = addr{15-0}; + + bits<32> Inst; + + let Inst{31-26} = 0b111111; + let Inst{25-21} = rt; + let Inst{20-16} = base; + let Inst{15-0} = offset; +} + +class LOAD_UPPER_IMM_FM_MMR6 { + bits<5> rt; + bits<16> imm16; + + bits<32> Inst; + + let Inst{31-26} = 0b000100; + let Inst{25-21} = rt; + let Inst{20-16} = 0; + let Inst{15-0} = imm16; +} + class CMP_BRANCH_1R_RT_OFF16_FM_MMR6 funct> : MipsR6Inst { bits<5> rt; bits<16> offset; @@ -222,12 +473,13 @@ class CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6 funct> : MipsR6Inst { let Inst{15-0} = offset; } -class ERET_FM_MMR6 : MMR6Arch { +class POOL32A_ERET_FM_MMR6 funct> + : MMR6Arch { bits<32> Inst; let Inst{31-26} = 0x00; let Inst{25-16} = 0x00; - let Inst{15-6} = 0x3cd; + let Inst{15-6} = funct; let Inst{5-0} = 0x3c; } @@ -262,7 +514,8 @@ class BARRIER_MMR6_ENC op> : MMR6Arch { let Inst{5-0} = 0x0; } -class EIDI_MMR6_ENC funct> : MMR6Arch { +class POOL32A_EIDI_MMR6_ENC funct> + : MMR6Arch { bits<32> Inst; bits<5> rt; // Actually rs but we're sharing code with the standard encodings which call it rt @@ -287,3 +540,323 @@ class SHIFT_MMR6_ENC funct, bit rotate> : MMR6Arch op> : MMR6Arch { + bits<5> rt; + bits<21> addr; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = rt; + let Inst{20-16} = addr{20-16}; + let Inst{15-0} = addr{15-0}; +} + +class POOL32C_SWE_FM_MMR6 op, bits<4> fmt, + bits<3> funct> : MMR6Arch { + bits<5> rt; + bits<21> addr; + bits<5> base = addr{20-16}; + bits<9> offset = addr{8-0}; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = rt; + let Inst{20-16} = base; + let Inst{15-12} = fmt; + let Inst{11-9} = funct; + let Inst{8-0} = offset; +} + +class POOL32F_ARITH_FM_MMR6 fmt, bits<8> funct> + : MMR6Arch, MipsR6Inst { + bits<5> ft; + bits<5> fs; + bits<5> fd; + + bits<32> Inst; + + let Inst{31-26} = 0b010101; + let Inst{25-21} = ft; + let Inst{20-16} = fs; + let Inst{15-11} = fd; + let Inst{10} = 0; + let Inst{9-8} = fmt; + let Inst{7-0} = funct; +} + +class POOL32F_ARITHF_FM_MMR6 fmt, bits<9> funct> + : MMR6Arch, MipsR6Inst { + bits<5> ft; + bits<5> fs; + bits<5> fd; + + bits<32> Inst; + + let Inst{31-26} = 0b010101; + let Inst{25-21} = ft; + let Inst{20-16} = fs; + let Inst{15-11} = fd; + let Inst{10-9} = fmt; + let Inst{8-0} = funct; +} + +class POOL32F_MOV_NEG_FM_MMR6 fmt, bits<7> funct> + : MMR6Arch, MipsR6Inst { + bits<5> ft; + bits<5> fs; + + bits<32> Inst; + + let Inst{31-26} = 0b010101; + let Inst{25-21} = ft; + let Inst{20-16} = fs; + let Inst{15} = 0; + let Inst{14-13} = fmt; + let Inst{12-6} = funct; + let Inst{5-0} = 0b111011; +} + +class POOL32F_MINMAX_FM fmt, bits<9> funct> + : MMR6Arch, MipsR6Inst { + bits<5> ft; + bits<5> fs; + bits<5> fd; + + bits<32> Inst; + + let Inst{31-26} = 0b010101; + let Inst{25-21} = ft; + let Inst{20-16} = fs; + let Inst{15-11} = fd; + let Inst{10-9} = fmt; + let Inst{8-0} = funct; +} + +class POOL32F_CMP_FM format, FIELD_CMP_COND Cond> + : MMR6Arch, MipsR6Inst { + bits<5> ft; + bits<5> fs; + bits<5> fd; + + bits<32> Inst; + + let Inst{31-26} = 0b010101; + let Inst{25-21} = ft; + let Inst{20-16} = fs; + let Inst{15-11} = fd; + let Inst{10-6} = Cond.Value; + let Inst{5-0} = format; +} + +class POOL32F_CVT_LW_FM funct> + : MMR6Arch, MipsR6Inst { + bits<5> ft; + bits<5> fs; + + bits<32> Inst; + let Inst{31-26} = 0b010101; + let Inst{25-21} = ft; + let Inst{20-16} = fs; + let Inst{15} = 0; + let Inst{14} = fmt; + let Inst{13-6} = funct; + let Inst{5-0} = 0b111011; +} + +class POOL32F_CVT_DS_FM fmt, bits<7> funct> + : MMR6Arch, MipsR6Inst { + bits<5> ft; + bits<5> fs; + + bits<32> Inst; + let Inst{31-26} = 0b010101; + let Inst{25-21} = ft; + let Inst{20-16} = fs; + let Inst{15} = 0; + let Inst{14-13} = fmt; + let Inst{12-6} = funct; + let Inst{5-0} = 0b111011; +} + +class POOL32F_ABS_FM_MMR6 fmt, bits<7> funct> + : MMR6Arch, MipsR6Inst { + bits<5> ft; + bits<5> fs; + + bits<32> Inst; + + let Inst{31-26} = 0b010101; + let Inst{25-21} = ft; + let Inst{20-16} = fs; + let Inst{15} = 0; + let Inst{14-13} = fmt; + let Inst{12-6} = funct; + let Inst{5-0} = 0b111011; +} + +class POOL32F_MATH_FM_MMR6 fmt, bits<8> funct> + : MMR6Arch, MipsR6Inst { + bits<5> ft; + bits<5> fs; + + bits<32> Inst; + + let Inst{31-26} = 0b010101; + let Inst{25-21} = ft; + let Inst{20-16} = fs; + let Inst{15} = 0; + let Inst{14} = fmt; + let Inst{13-6} = funct; + let Inst{5-0} = 0b111011; +} + +class POOL16A_ADDU16_FM_MMR6 : MicroMipsR6Inst16 { + bits<3> rs; + bits<3> rt; + bits<3> rd; + + bits<16> Inst; + + let Inst{15-10} = 0b000001; + let Inst{9-7} = rs; + let Inst{6-4} = rt; + let Inst{3-1} = rd; + let Inst{0} = 0; +} + +class POOL16C_AND16_FM_MMR6 : MicroMipsR6Inst16 { + bits<3> rt; + bits<3> rs; + + bits<16> Inst; + + let Inst{15-10} = 0b010001; + let Inst{9-7} = rt; + let Inst{6-4} = rs; + let Inst{3-0} = 0b0001; +} + +class POOL16C_NOT16_FM_MMR6 : MicroMipsR6Inst16 { + bits<3> rt; + bits<3> rs; + + bits<16> Inst; + + let Inst{15-10} = 0x11; + let Inst{9-7} = rt; + let Inst{6-4} = rs; + let Inst{3-0} = 0b0000; +} + +class POOL16C_OR16_XOR16_FM_MMR6 op> { + bits<3> rt; + bits<3> rs; + + bits<16> Inst; + + let Inst{15-10} = 0b010001; + let Inst{9-7} = rt; + let Inst{6-4} = rs; + let Inst{3-0} = op; +} + +class POOL16C_BREAKPOINT_FM_MMR6 op> { + bits<4> code_; + bits<16> Inst; + + let Inst{15-10} = 0b010001; + let Inst{9-6} = code_; + let Inst{5-0} = op; +} + +class POOL16A_SUBU16_FM_MMR6 { + bits<3> rs; + bits<3> rt; + bits<3> rd; + + bits<16> Inst; + + let Inst{15-10} = 0b000001; + let Inst{9-7} = rs; + let Inst{6-4} = rt; + let Inst{3-1} = rd; + let Inst{0} = 0b1; +} + +class POOL32A_WRPGPR_WSBH_FM_MMR6 funct> : MipsR6Inst { + bits<5> rt; + bits<5> rs; + + bits<32> Inst; + + let Inst{31-26} = 0x00; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-6} = funct; + let Inst{5-0} = 0x3c; +} + +class POOL32F_RECIP_ROUND_FM_MMR6 fmt, bits<8> funct> + : MMR6Arch, MipsR6Inst { + bits<5> ft; + bits<5> fs; + + bits<32> Inst; + + let Inst{31-26} = 0b010101; + let Inst{25-21} = ft; + let Inst{20-16} = fs; + let Inst{15} = 0; + let Inst{14} = fmt; + let Inst{13-6} = funct; + let Inst{5-0} = 0b111011; +} + +class POOL32F_RINT_FM_MMR6 fmt> + : MMR6Arch, MipsR6Inst { + bits<5> fs; + bits<5> fd; + + bits<32> Inst; + + let Inst{31-26} = 0b010101; + let Inst{25-21} = fs; + let Inst{20-16} = fd; + let Inst{15-11} = 0; + let Inst{10-9} = fmt; + let Inst{8-0} = 0b000100000; +} + +class POOL32F_SEL_FM_MMR6 fmt, bits<9> funct> + : MMR6Arch, MipsR6Inst { + bits<5> ft; + bits<5> fs; + bits<5> fd; + + bits<32> Inst; + + let Inst{31-26} = 0b010101; + let Inst{25-21} = ft; + let Inst{20-16} = fs; + let Inst{15-11} = fd; + let Inst{10-9} = fmt; + let Inst{8-0} = funct; +} + +class POOL32F_CLASS_FM_MMR6 fmt, bits<9> funct> + : MMR6Arch, MipsR6Inst { + bits<5> fs; + bits<5> fd; + + bits<32> Inst; + + let Inst{31-26} = 0b010101; + let Inst{25-21} = fs; + let Inst{20-16} = fd; + let Inst{15-11} = 0b00000; + let Inst{10-9} = fmt; + let Inst{8-0} = funct; +} diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td index 53bde1379e29..31b5db036daa 100644 --- a/lib/Target/Mips/MicroMips32r6InstrInfo.td +++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td @@ -11,6 +11,13 @@ // //===----------------------------------------------------------------------===// +def brtarget26_mm : Operand { + let EncoderMethod = "getBranchTarget26OpValueMM"; + let OperandType = "OPERAND_PCREL"; + let DecoderMethod = "DecodeBranchTarget26MM"; + let ParserMatchClass = MipsJumpTargetAsmOperand; +} + //===----------------------------------------------------------------------===// // // Instruction Encodings @@ -28,6 +35,9 @@ class ALIGN_MMR6_ENC : POOL32A_ALIGN_FM_MMR6<0b011111>; class AUI_MMR6_ENC : AUI_FM_MMR6; class BALC_MMR6_ENC : BRANCH_OFF26_FM<0b101101>; class BC_MMR6_ENC : BRANCH_OFF26_FM<0b100101>; +class BC16_MMR6_ENC : BC16_FM_MM16R6; +class BEQZC16_MMR6_ENC : BEQZC_BNEZC_FM_MM16R6<0x23>; +class BNEZC16_MMR6_ENC : BEQZC_BNEZC_FM_MM16R6<0x2b>; class BITSWAP_MMR6_ENC : POOL32A_BITSWAP_FM_MMR6<0b101100>; class BRK_MMR6_ENC : BREAK_MMR6_ENC<"break">; class BEQZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b011101>; @@ -42,13 +52,19 @@ class CLZ_MMR6_ENC : SPECIAL_2R_FM_MMR6<0b010000>; class DIV_MMR6_ENC : ARITH_FM_MMR6<"div", 0x118>; class DIVU_MMR6_ENC : ARITH_FM_MMR6<"divu", 0x198>; class EHB_MMR6_ENC : BARRIER_MMR6_ENC<"ehb", 0x3>; -class EI_MMR6_ENC : EIDI_MMR6_ENC<"ei", 0x15d>; -class ERET_MMR6_ENC : ERET_FM_MMR6<"eret">; +class EI_MMR6_ENC : POOL32A_EIDI_MMR6_ENC<"ei", 0x15d>; +class DI_MMR6_ENC : POOL32A_EIDI_MMR6_ENC<"di", 0b0100011101>; +class ERET_MMR6_ENC : POOL32A_ERET_FM_MMR6<"eret", 0x3cd>; +class DERET_MMR6_ENC : POOL32A_ERET_FM_MMR6<"eret", 0b1110001101>; class ERETNC_MMR6_ENC : ERETNC_FM_MMR6<"eretnc">; +class JALRC16_MMR6_ENC : POOL16C_JALRC_FM_MM16R6<0xb>; class JIALC_MMR6_ENC : JMP_IDX_COMPACT_FM<0b100000>; class JIC_MMR6_ENC : JMP_IDX_COMPACT_FM<0b101000>; +class JRC16_MMR6_ENC: POOL16C_JALRC_FM_MM16R6<0x3>; +class JRCADDIUSP_MMR6_ENC : POOL16C_JRCADDIUSP_FM_MM16R6<0x13>; class LSA_MMR6_ENC : POOL32A_LSA_FM<0b001111>; class LWPC_MMR6_ENC : PCREL19_FM_MMR6<0b01>; +class LWM16_MMR6_ENC : POOL16C_LWM_SWM_FM_MM16R6<0x2>; class MOD_MMR6_ENC : ARITH_FM_MMR6<"mod", 0x158>; class MODU_MMR6_ENC : ARITH_FM_MMR6<"modu", 0x1d8>; class MUL_MMR6_ENC : ARITH_FM_MMR6<"mul", 0x18>; @@ -59,15 +75,99 @@ class NOR_MMR6_ENC : ARITH_FM_MMR6<"nor", 0x2d0>; class OR_MMR6_ENC : ARITH_FM_MMR6<"or", 0x290>; class ORI_MMR6_ENC : ADDI_FM_MMR6<"ori", 0x14>; class PREF_MMR6_ENC : CACHE_PREF_FM_MMR6<0b011000, 0b0010>; +class SB16_MMR6_ENC : LOAD_STORE_FM_MM16<0x22>; class SEB_MMR6_ENC : SIGN_EXTEND_FM_MMR6<"seb", 0b0010101100>; class SEH_MMR6_ENC : SIGN_EXTEND_FM_MMR6<"seh", 0b0011101100>; class SELEQZ_MMR6_ENC : POOL32A_FM_MMR6<0b0101000000>; class SELNEZ_MMR6_ENC : POOL32A_FM_MMR6<0b0110000000>; +class SH16_MMR6_ENC : LOAD_STORE_FM_MM16<0x2a>; class SLL_MMR6_ENC : SHIFT_MMR6_ENC<"sll", 0x00, 0b0>; class SUB_MMR6_ENC : ARITH_FM_MMR6<"sub", 0x190>; class SUBU_MMR6_ENC : ARITH_FM_MMR6<"subu", 0x1d0>; +class SW_MMR6_ENC : SW32_FM_MMR6<"sw", 0x3e>; +class SWE_MMR6_ENC : POOL32C_SWE_FM_MMR6<"swe", 0x18, 0xa, 0x7>; +class SW16_MMR6_ENC : LOAD_STORE_FM_MM16<0x3a>; +class SWM16_MMR6_ENC : POOL16C_LWM_SWM_FM_MM16R6<0xa>; +class SWSP_MMR6_ENC : LOAD_STORE_SP_FM_MM16<0x32>; +class PREFE_MMR6_ENC : POOL32C_ST_EVA_FM_MMR6<0b011000, 0b010>; +class CACHEE_MMR6_ENC : POOL32C_ST_EVA_FM_MMR6<0b011000, 0b011>; +class WRPGPR_MMR6_ENC : POOL32A_WRPGPR_WSBH_FM_MMR6<0x3c5>; +class WSBH_MMR6_ENC : POOL32A_WRPGPR_WSBH_FM_MMR6<0x1ec>; +class LB_MMR6_ENC : LB32_FM_MMR6; +class LBU_MMR6_ENC : LBU32_FM_MMR6; +class LBE_MMR6_ENC : POOL32C_LB_LBU_FM_MMR6<0b100>; +class LBUE_MMR6_ENC : POOL32C_LB_LBU_FM_MMR6<0b000>; +class PAUSE_MMR6_ENC : POOL32A_PAUSE_FM_MMR6<"pause", 0b00101>; +class RDHWR_MMR6_ENC : POOL32A_RDHWR_FM_MMR6; +class WAIT_MMR6_ENC : WAIT_FM_MM, MMR6Arch<"wait">; +class SSNOP_MMR6_ENC : BARRIER_FM_MM<0x1>, MMR6Arch<"ssnop">; +class SYNC_MMR6_ENC : POOL32A_SYNC_FM_MMR6; +class SYNCI_MMR6_ENC : POOL32I_SYNCI_FM_MMR6, MMR6Arch<"synci">; +class RDPGPR_MMR6_ENC : POOL32A_RDPGPR_FM_MMR6<0b1110000101>; +class SDBBP_MMR6_ENC : SDBBP_FM_MM, MMR6Arch<"sdbbp">; class XOR_MMR6_ENC : ARITH_FM_MMR6<"xor", 0x310>; class XORI_MMR6_ENC : ADDI_FM_MMR6<"xori", 0x1c>; +class ABS_S_MMR6_ENC : POOL32F_ABS_FM_MMR6<"abs.s", 0, 0b0001101>; +class ABS_D_MMR6_ENC : POOL32F_ABS_FM_MMR6<"abs.d", 1, 0b0001101>; +class FLOOR_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.l.s", 0, 0b00001100>; +class FLOOR_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.l.d", 1, 0b00001100>; +class FLOOR_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.w.s", 0, 0b00101100>; +class FLOOR_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.w.d", 1, 0b00101100>; +class CEIL_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.l.s", 0, 0b01001100>; +class CEIL_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.l.d", 1, 0b01001100>; +class CEIL_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.w.s", 0, 0b01101100>; +class CEIL_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.w.d", 1, 0b01101100>; +class TRUNC_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.l.s", 0, 0b10001100>; +class TRUNC_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.l.d", 1, 0b10001100>; +class TRUNC_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.w.s", 0, 0b10101100>; +class TRUNC_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.w.d", 1, 0b10101100>; +class SQRT_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"sqrt.s", 0, 0b00101000>; +class SQRT_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"sqrt.d", 1, 0b00101000>; +class RSQRT_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"rsqrt.s", 0, 0b00001000>; +class RSQRT_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"rsqrt.d", 1, 0b00001000>; +class SB_MMR6_ENC : SB32_SH32_STORE_FM_MMR6<0b000110>; +class SBE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b100>; +class SCE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b110>; +class SH_MMR6_ENC : SB32_SH32_STORE_FM_MMR6<0b001110>; +class SHE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b101>; +class LLE_MMR6_ENC : LOAD_WORD_EVA_FM_MMR6<0b110>; +class LWE_MMR6_ENC : LOAD_WORD_EVA_FM_MMR6<0b111>; +class LW_MMR6_ENC : LOAD_WORD_FM_MMR6; +class LUI_MMR6_ENC : LOAD_UPPER_IMM_FM_MMR6; +class RECIP_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"recip.s", 0, 0b01001000>; +class RECIP_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"recip.d", 1, 0b01001000>; +class RINT_S_MMR6_ENC : POOL32F_RINT_FM_MMR6<"rint.s", 0>; +class RINT_D_MMR6_ENC : POOL32F_RINT_FM_MMR6<"rint.d", 1>; +class ROUND_L_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.l.s", 0, + 0b11001100>; +class ROUND_L_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.l.d", 1, + 0b11001100>; +class ROUND_W_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.w.s", 0, + 0b11101100>; +class ROUND_W_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.w.d", 1, + 0b11101100>; +class SEL_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"sel.s", 0, 0b010111000>; +class SEL_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"sel.d", 1, 0b010111000>; +class SELEQZ_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"seleqz.s", 0, 0b000111000>; +class SELEQZ_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"seleqz.d", 1, 0b000111000>; +class SELENZ_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"selenz.s", 0, 0b001111000>; +class SELENZ_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"selenz.d", 1, 0b001111000>; +class CLASS_S_MMR6_ENC : POOL32F_CLASS_FM_MMR6<"class.s", 0, 0b001100000>; +class CLASS_D_MMR6_ENC : POOL32F_CLASS_FM_MMR6<"class.d", 1, 0b001100000>; + +class ADDU16_MMR6_ENC : POOL16A_ADDU16_FM_MMR6; +class AND16_MMR6_ENC : POOL16C_AND16_FM_MMR6; +class ANDI16_MMR6_ENC : ANDI_FM_MM16<0b001011>, MicroMipsR6Inst16; +class NOT16_MMR6_ENC : POOL16C_NOT16_FM_MMR6; +class OR16_MMR6_ENC : POOL16C_OR16_XOR16_FM_MMR6<0b1001>; +class SLL16_MMR6_ENC : SHIFT_FM_MM16<0>, MicroMipsR6Inst16; +class SRL16_MMR6_ENC : SHIFT_FM_MM16<1>, MicroMipsR6Inst16; +class BREAK16_MMR6_ENC : POOL16C_BREAKPOINT_FM_MMR6<0b011011>; +class LI16_MMR6_ENC : LI_FM_MM16; +class MOVE16_MMR6_ENC : MOVE_FM_MM16<0b000011>; +class SDBBP16_MMR6_ENC : POOL16C_BREAKPOINT_FM_MMR6<0b111011>; +class SUBU16_MMR6_ENC : POOL16A_SUBU16_FM_MMR6; +class XOR16_MMR6_ENC : POOL16C_OR16_XOR16_FM_MMR6<0b1000>; class CMP_CBR_RT_Z_MMR6_DESC_BASE @@ -108,6 +208,43 @@ class BNEZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bnezalc", brtarget_mm, list Defs = [RA]; } +/// Floating Point Instructions +class FADD_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"add.s", 0, 0b00110000>; +class FADD_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"add.d", 1, 0b00110000>; +class FSUB_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"sub.s", 0, 0b01110000>; +class FSUB_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"sub.d", 1, 0b01110000>; +class FMUL_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"mul.s", 0, 0b10110000>; +class FMUL_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"mul.d", 1, 0b10110000>; +class FDIV_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"div.s", 0, 0b11110000>; +class FDIV_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"div.d", 1, 0b11110000>; +class MADDF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.s", 0, 0b110111000>; +class MADDF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.d", 1, 0b110111000>; +class MSUBF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.s", 0, 0b111111000>; +class MSUBF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.d", 1, 0b111111000>; +class FMOV_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.s", 0, 0b0000001>; +class FMOV_D_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.d", 1, 0b0000001>; +class FNEG_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"neg.s", 0, 0b0101101>; +class FNEG_D_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"neg.d", 1, 0b0101101>; +class MAX_S_MMR6_ENC : POOL32F_MINMAX_FM<"max.s", 0, 0b000001011>; +class MAX_D_MMR6_ENC : POOL32F_MINMAX_FM<"max.d", 1, 0b000001011>; +class MAXA_S_MMR6_ENC : POOL32F_MINMAX_FM<"maxa.s", 0, 0b000101011>; +class MAXA_D_MMR6_ENC : POOL32F_MINMAX_FM<"maxa.d", 1, 0b000101011>; +class MIN_S_MMR6_ENC : POOL32F_MINMAX_FM<"min.s", 0, 0b000000011>; +class MIN_D_MMR6_ENC : POOL32F_MINMAX_FM<"min.d", 1, 0b000000011>; +class MINA_S_MMR6_ENC : POOL32F_MINMAX_FM<"mina.s", 0, 0b000100011>; +class MINA_D_MMR6_ENC : POOL32F_MINMAX_FM<"mina.d", 1, 0b000100011>; + +class CVT_L_S_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.l.s", 0, 0b00000100>; +class CVT_L_D_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.l.d", 1, 0b00000100>; +class CVT_W_S_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.w.s", 0, 0b00100100>; +class CVT_W_D_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.w.d", 1, 0b00100100>; +class CVT_D_S_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.s", 0, 0b1001101>; +class CVT_D_W_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.w", 1, 0b1001101>; +class CVT_D_L_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.l", 2, 0b1001101>; +class CVT_S_D_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.d", 0, 0b1101101>; +class CVT_S_W_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.w", 1, 0b1101101>; +class CVT_S_L_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.l", 2, 0b1101101>; + //===----------------------------------------------------------------------===// // // Instruction Descriptions @@ -130,11 +267,34 @@ class BC_MMR6_DESC_BASE bit isBarrier = 1; } -class BALC_MMR6_DESC : BC_MMR6_DESC_BASE<"balc", brtarget26> { +class BALC_MMR6_DESC : BC_MMR6_DESC_BASE<"balc", brtarget26_mm> { bit isCall = 1; list Defs = [RA]; } -class BC_MMR6_DESC : BC_MMR6_DESC_BASE<"bc", brtarget26>; +class BC_MMR6_DESC : BC_MMR6_DESC_BASE<"bc", brtarget26_mm>; + +class BC16_MMR6_DESC : MicroMipsInst16<(outs), (ins brtarget10_mm:$offset), + !strconcat("bc16", "\t$offset"), [], + II_BC, FrmI>, + MMR6Arch<"bc16">, MicroMipsR6Inst16 { + let isBranch = 1; + let isTerminator = 1; + let isBarrier = 1; + let hasDelaySlot = 0; + let AdditionalPredicates = [RelocPIC]; + let Defs = [AT]; +} + +class BEQZC_BNEZC_MM16R6_DESC_BASE + : CBranchZeroMM, MMR6Arch { + let isBranch = 1; + let isTerminator = 1; + let hasDelaySlot = 0; + let Defs = [AT]; +} +class BEQZC16_MMR6_DESC : BEQZC_BNEZC_MM16R6_DESC_BASE<"beqzc16">; +class BNEZC16_MMR6_DESC : BEQZC_BNEZC_MM16R6_DESC_BASE<"bnezc16">; + class SUB_MMR6_DESC : ArithLogicR<"sub", GPR32Opnd>; class SUBU_MMR6_DESC : ArithLogicR<"subu", GPR32Opnd>; @@ -162,6 +322,35 @@ class CACHE_HINT_MMR6_DESC; class PREF_MMR6_DESC : CACHE_HINT_MMR6_DESC<"pref", mem_mm_12, GPR32Opnd>; +class PREFE_CACHEE_MMR6_DESC_BASE : + CACHE_HINT_MMR6_DESC { + string DecoderMethod = "DecodePrefeOpMM"; +} + +class PREFE_MMR6_DESC : PREFE_CACHEE_MMR6_DESC_BASE<"prefe", mem_mm_9, GPR32Opnd>; +class CACHEE_MMR6_DESC : PREFE_CACHEE_MMR6_DESC_BASE<"cachee", mem_mm_9, GPR32Opnd>; + +class LB_LBU_MMR6_DESC_BASE : MMR6Arch { + dag OutOperandList = (outs GPROpnd:$rt); + dag InOperandList = (ins MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); + string DecoderMethod = "DecodeLoadByte15"; + bit mayLoad = 1; +} +class LB_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lb", mem_mm_16, GPR32Opnd>; +class LBU_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lbu", mem_mm_16, GPR32Opnd>; + +class LBE_LBUE_MMR6_DESC_BASE + : LB_LBU_MMR6_DESC_BASE { + let DecoderMethod = "DecodeLoadByte9"; +} +class LBE_MMR6_DESC : LBE_LBUE_MMR6_DESC_BASE<"lbe", mem_mm_9, GPR32Opnd>; +class LBUE_MMR6_DESC : LBE_LBUE_MMR6_DESC_BASE<"lbue", mem_mm_9, GPR32Opnd>; + class CLO_CLZ_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rt); @@ -174,10 +363,22 @@ class CLZ_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clz", GPR32Opnd>; class EHB_MMR6_DESC : Barrier<"ehb">; class EI_MMR6_DESC : DEI_FT<"ei", GPR32Opnd>; +class DI_MMR6_DESC : DEI_FT<"di", GPR32Opnd>; class ERET_MMR6_DESC : ER_FT<"eret">; +class DERET_MMR6_DESC : ER_FT<"deret">; class ERETNC_MMR6_DESC : ER_FT<"eretnc">; +class JALRC16_MMR6_DESC_BASE + : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), + [(MipsJmpLink RO:$rs)], II_JALR, FrmR>, + MMR6Arch, MicroMipsR6Inst16 { + let isCall = 1; + let hasDelaySlot = 0; + let Defs = [RA]; +} +class JALRC16_MMR6_DESC : JALRC16_MMR6_DESC_BASE<"jalr", GPR32Opnd>; + class JMP_MMR6_IDX_COMPACT_DESC_BASE : MMR6Arch { @@ -200,6 +401,27 @@ class JIC_MMR6_DESC : JMP_MMR6_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, list Defs = [AT]; } +class JRC16_MMR6_DESC_BASE + : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), + [], II_JR, FrmR>, + MMR6Arch, MicroMipsR6Inst16 { + let hasDelaySlot = 0; + let isBranch = 1; + let isIndirectBranch = 1; +} +class JRC16_MMR6_DESC : JRC16_MMR6_DESC_BASE<"jrc16", GPR32Opnd>; + +class JRCADDIUSP_MMR6_DESC + : MicroMipsInst16<(outs), (ins uimm5_lsl2:$imm), "jrcaddiusp\t$imm", + [], II_JRADDIUSP, FrmR>, + MMR6Arch<"jrcaddiusp">, MicroMipsR6Inst16 { + let hasDelaySlot = 0; + let isTerminator = 1; + let isBarrier = 1; + let isBranch = 1; + let isIndirectBranch = 1; +} + class ALIGN_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rd); @@ -241,7 +463,7 @@ class LSA_MMR6_DESC_BASE Pattern = []; } -class LSA_MMR6_DESC : LSA_MMR6_DESC_BASE<"lsa", GPR32Opnd, uimm2>; +class LSA_MMR6_DESC : LSA_MMR6_DESC_BASE<"lsa", GPR32Opnd, uimm2_plus1>; class PCREL_MMR6_DESC_BASE : MMR6Arch { @@ -264,6 +486,18 @@ class SELEQNE_Z_MMR6_DESC_BASE class SELEQZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"seleqz", GPR32Opnd>; class SELNEZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"selnez", GPR32Opnd>; +class PAUSE_MMR6_DESC : Barrier<"pause">; +class RDHWR_MMR6_DESC : MMR6Arch<"rdhwr">, MipsR6Inst { + dag OutOperandList = (outs GPR32Opnd:$rt); + dag InOperandList = (ins HWRegsOpnd:$rs, uimm3:$sel); + string AsmString = !strconcat("rdhwr", "\t$rt, $rs, $sel"); + list Pattern = []; + InstrItinClass Itinerary = II_RDHWR; + Format Form = FrmR; +} + +class WAIT_MMR6_DESC : WaitMM<"wait">; +class SSNOP_MMR6_DESC : Barrier<"ssnop">; class SLL_MMR6_DESC : shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL>; class DIV_MMR6_DESC : ArithLogicR<"div", GPR32Opnd>; class DIVU_MMR6_DESC : ArithLogicR<"divu", GPR32Opnd>; @@ -277,13 +511,426 @@ class ORI_MMR6_DESC : ArithLogicI<"ori", simm16, GPR32Opnd>; class XOR_MMR6_DESC : ArithLogicR<"xor", GPR32Opnd>; class XORI_MMR6_DESC : ArithLogicI<"xori", simm16, GPR32Opnd>; +class SWE_MMR6_DESC_BASE : + InstSE<(outs), (ins RO:$rt, MO:$addr), !strconcat(opstr, "\t$rt, $addr"), + [(OpNode RO:$rt, Addr:$addr)], Itin, FrmI, opstr> { + let DecoderMethod = "DecodeMem"; + let mayStore = 1; +} +class SW_MMR6_DESC : Store<"sw", GPR32Opnd>; +class SWE_MMR6_DESC : SWE_MMR6_DESC_BASE<"swe", GPR32Opnd, mem_simm9>; + +class WRPGPR_WSBH_MMR6_DESC_BASE + : MMR6Arch { + dag InOperandList = (ins RO:$rs); + dag OutOperandList = (outs RO:$rt); + string AsmString = !strconcat(instr_asm, "\t$rt, $rs"); + list Pattern = []; + Format f = FrmR; + string BaseOpcode = instr_asm; + bit hasSideEffects = 0; +} +class WRPGPR_MMR6_DESC : WRPGPR_WSBH_MMR6_DESC_BASE<"wrpgpr", GPR32Opnd>; +class WSBH_MMR6_DESC : WRPGPR_WSBH_MMR6_DESC_BASE<"wsbh", GPR32Opnd>; + +/// Floating Point Instructions +class FARITH_MMR6_DESC_BASE : HARDFLOAT { + dag OutOperandList = (outs RC:$fd); + dag InOperandList = (ins RC:$ft, RC:$fs); + string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft"); + list Pattern = [(set RC:$fd, (OpNode RC:$fs, RC:$ft))]; + InstrItinClass Itinerary = Itin; + bit isCommutable = isComm; +} +class FADD_S_MMR6_DESC + : FARITH_MMR6_DESC_BASE<"add.s", FGR32Opnd, II_ADD_S, 1, fadd>; +class FADD_D_MMR6_DESC + : FARITH_MMR6_DESC_BASE<"add.d", AFGR64Opnd, II_ADD_D, 1, fadd>; +class FSUB_S_MMR6_DESC + : FARITH_MMR6_DESC_BASE<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>; +class FSUB_D_MMR6_DESC + : FARITH_MMR6_DESC_BASE<"sub.d", AFGR64Opnd, II_SUB_D, 0, fsub>; +class FMUL_S_MMR6_DESC + : FARITH_MMR6_DESC_BASE<"mul.s", FGR32Opnd, II_MUL_S, 1, fmul>; +class FMUL_D_MMR6_DESC + : FARITH_MMR6_DESC_BASE<"mul.d", AFGR64Opnd, II_MUL_D, 1, fmul>; +class FDIV_S_MMR6_DESC + : FARITH_MMR6_DESC_BASE<"div.s", FGR32Opnd, II_DIV_S, 0, fdiv>; +class FDIV_D_MMR6_DESC + : FARITH_MMR6_DESC_BASE<"div.d", AFGR64Opnd, II_DIV_D, 0, fdiv>; +class MADDF_S_MMR6_DESC : COP1_4R_DESC_BASE<"maddf.s", FGR32Opnd>, HARDFLOAT; +class MADDF_D_MMR6_DESC : COP1_4R_DESC_BASE<"maddf.d", FGR64Opnd>, HARDFLOAT; +class MSUBF_S_MMR6_DESC : COP1_4R_DESC_BASE<"msubf.s", FGR32Opnd>, HARDFLOAT; +class MSUBF_D_MMR6_DESC : COP1_4R_DESC_BASE<"msubf.d", FGR64Opnd>, HARDFLOAT; + +class FMOV_FNEG_MMR6_DESC_BASE + : HARDFLOAT, NeverHasSideEffects { + dag OutOperandList = (outs DstRC:$ft); + dag InOperandList = (ins SrcRC:$fs); + string AsmString = !strconcat(instr_asm, "\t$ft, $fs"); + list Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))]; + InstrItinClass Itinerary = Itin; + Format Form = FrmFR; +} +class FMOV_S_MMR6_DESC + : FMOV_FNEG_MMR6_DESC_BASE<"mov.s", FGR32Opnd, FGR32Opnd, II_MOV_S>; +class FMOV_D_MMR6_DESC + : FMOV_FNEG_MMR6_DESC_BASE<"mov.d", AFGR64Opnd, AFGR64Opnd, II_MOV_D>; +class FNEG_S_MMR6_DESC + : FMOV_FNEG_MMR6_DESC_BASE<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>; +class FNEG_D_MMR6_DESC + : FMOV_FNEG_MMR6_DESC_BASE<"neg.d", AFGR64Opnd, AFGR64Opnd, II_NEG, fneg>; + +class MAX_S_MMR6_DESC : MAX_MIN_DESC_BASE<"max.s", FGR32Opnd>, HARDFLOAT; +class MAX_D_MMR6_DESC : MAX_MIN_DESC_BASE<"max.d", FGR64Opnd>, HARDFLOAT; +class MIN_S_MMR6_DESC : MAX_MIN_DESC_BASE<"min.s", FGR32Opnd>, HARDFLOAT; +class MIN_D_MMR6_DESC : MAX_MIN_DESC_BASE<"min.d", FGR64Opnd>, HARDFLOAT; + +class MAXA_S_MMR6_DESC : MAX_MIN_DESC_BASE<"maxa.s", FGR32Opnd>, HARDFLOAT; +class MAXA_D_MMR6_DESC : MAX_MIN_DESC_BASE<"maxa.d", FGR64Opnd>, HARDFLOAT; +class MINA_S_MMR6_DESC : MAX_MIN_DESC_BASE<"mina.s", FGR32Opnd>, HARDFLOAT; +class MINA_D_MMR6_DESC : MAX_MIN_DESC_BASE<"mina.d", FGR64Opnd>, HARDFLOAT; + +class CVT_MMR6_DESC_BASE< + string instr_asm, RegisterOperand DstRC, RegisterOperand SrcRC, + InstrItinClass Itin, SDPatternOperator OpNode = null_frag> + : HARDFLOAT, NeverHasSideEffects { + dag OutOperandList = (outs DstRC:$ft); + dag InOperandList = (ins SrcRC:$fs); + string AsmString = !strconcat(instr_asm, "\t$ft, $fs"); + list Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))]; + InstrItinClass Itinerary = Itin; + Format Form = FrmFR; +} + +class CVT_L_S_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.l.s", FGR64Opnd, FGR32Opnd, + II_CVT>; +class CVT_L_D_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.l.d", FGR64Opnd, FGR64Opnd, + II_CVT>; +class CVT_W_S_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.w.s", FGR32Opnd, FGR32Opnd, + II_CVT>; +class CVT_W_D_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.w.d", FGR32Opnd, AFGR64Opnd, + II_CVT>; +class CVT_D_S_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.d.s", FGR32Opnd, AFGR64Opnd, + II_CVT>; +class CVT_D_W_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.d.w", FGR32Opnd, AFGR64Opnd, + II_CVT>; +class CVT_D_L_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.d.l", FGR64Opnd, FGR64Opnd, + II_CVT>, FGR_64; +class CVT_S_D_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.d", AFGR64Opnd, FGR32Opnd, + II_CVT>; +class CVT_S_W_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.w", FGR32Opnd, FGR32Opnd, + II_CVT>; +class CVT_S_L_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.l", FGR64Opnd, FGR32Opnd, + II_CVT>, FGR_64; + +multiclass CMP_CC_MMR6 format, string Typestr, + RegisterOperand FGROpnd> { + def CMP_AF_#NAME : POOL32F_CMP_FM< + !strconcat("cmp.af.", Typestr), format, FIELD_CMP_COND_AF>, + CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel, + ISA_MICROMIPS32R6; + def CMP_UN_#NAME : POOL32F_CMP_FM< + !strconcat("cmp.un.", Typestr), format, FIELD_CMP_COND_UN>, + CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel, + ISA_MICROMIPS32R6; + def CMP_EQ_#NAME : POOL32F_CMP_FM< + !strconcat("cmp.eq.", Typestr), format, FIELD_CMP_COND_EQ>, + CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel, + ISA_MICROMIPS32R6; + def CMP_UEQ_#NAME : POOL32F_CMP_FM< + !strconcat("cmp.ueq.", Typestr), format, FIELD_CMP_COND_UEQ>, + CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel, + ISA_MICROMIPS32R6; + def CMP_LT_#NAME : POOL32F_CMP_FM< + !strconcat("cmp.lt.", Typestr), format, FIELD_CMP_COND_LT>, + CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel, + ISA_MICROMIPS32R6; + def CMP_ULT_#NAME : POOL32F_CMP_FM< + !strconcat("cmp.ult.", Typestr), format, FIELD_CMP_COND_ULT>, + CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel, + ISA_MICROMIPS32R6; + def CMP_LE_#NAME : POOL32F_CMP_FM< + !strconcat("cmp.le.", Typestr), format, FIELD_CMP_COND_LE>, + CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel, + ISA_MICROMIPS32R6; + def CMP_ULE_#NAME : POOL32F_CMP_FM< + !strconcat("cmp.ule.", Typestr), format, FIELD_CMP_COND_ULE>, + CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel, + ISA_MICROMIPS32R6; + def CMP_SAF_#NAME : POOL32F_CMP_FM< + !strconcat("cmp.saf.", Typestr), format, FIELD_CMP_COND_SAF>, + CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel, + ISA_MICROMIPS32R6; + def CMP_SUN_#NAME : POOL32F_CMP_FM< + !strconcat("cmp.sun.", Typestr), format, FIELD_CMP_COND_SUN>, + CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel, + ISA_MICROMIPS32R6; + def CMP_SEQ_#NAME : POOL32F_CMP_FM< + !strconcat("cmp.seq.", Typestr), format, FIELD_CMP_COND_SEQ>, + CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel, + ISA_MICROMIPS32R6; + def CMP_SUEQ_#NAME : POOL32F_CMP_FM< + !strconcat("cmp.sueq.", Typestr), format, FIELD_CMP_COND_SUEQ>, + CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel, + ISA_MICROMIPS32R6; + def CMP_SLT_#NAME : POOL32F_CMP_FM< + !strconcat("cmp.slt.", Typestr), format, FIELD_CMP_COND_SLT>, + CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel, + ISA_MICROMIPS32R6; + def CMP_SULT_#NAME : POOL32F_CMP_FM< + !strconcat("cmp.sult.", Typestr), format, FIELD_CMP_COND_SULT>, + CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel, + ISA_MICROMIPS32R6; + def CMP_SLE_#NAME : POOL32F_CMP_FM< + !strconcat("cmp.sle.", Typestr), format, FIELD_CMP_COND_SLE>, + CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel, + ISA_MICROMIPS32R6; + def CMP_SULE_#NAME : POOL32F_CMP_FM< + !strconcat("cmp.sule.", Typestr), format, FIELD_CMP_COND_SULE>, + CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel, + ISA_MICROMIPS32R6; +} + +class ABSS_FT_MMR6_DESC_BASE + : HARDFLOAT, NeverHasSideEffects { + dag OutOperandList = (outs DstRC:$ft); + dag InOperandList = (ins SrcRC:$fs); + string AsmString = !strconcat(instr_asm, "\t$ft, $fs"); + list Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))]; + InstrItinClass Itinerary = Itin; + Format Form = FrmFR; + list EncodingPredicates = [HasStdEnc]; +} + +class ABS_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"abs.s", FGR32Opnd, FGR32Opnd, + II_ABS, fabs>; +class ABS_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"abs.d", AFGR64Opnd, AFGR64Opnd, + II_ABS, fabs>; +class FLOOR_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.l.s", FGR64Opnd, + FGR32Opnd, II_FLOOR>; +class FLOOR_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.l.d", FGR64Opnd, + FGR64Opnd, II_FLOOR>; +class FLOOR_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.w.s", FGR32Opnd, + FGR32Opnd, II_FLOOR>; +class FLOOR_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.w.d", FGR32Opnd, + AFGR64Opnd, II_FLOOR>; +class CEIL_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.l.s", FGR64Opnd, + FGR32Opnd, II_CEIL>; +class CEIL_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.l.d", FGR64Opnd, + FGR64Opnd, II_CEIL>; +class CEIL_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.w.s", FGR32Opnd, + FGR32Opnd, II_CEIL>; +class CEIL_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.w.d", FGR32Opnd, + AFGR64Opnd, II_CEIL>; +class TRUNC_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.s", FGR64Opnd, + FGR32Opnd, II_TRUNC>; +class TRUNC_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.d", FGR64Opnd, + FGR64Opnd, II_TRUNC>; +class TRUNC_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.s", FGR32Opnd, + FGR32Opnd, II_TRUNC>; +class TRUNC_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.d", FGR32Opnd, + AFGR64Opnd, II_TRUNC>; +class SQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.s", FGR32Opnd, FGR32Opnd, + II_SQRT_S, fsqrt>; +class SQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.d", AFGR64Opnd, AFGR64Opnd, + II_SQRT_D, fsqrt>; +class RSQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"rsqrt.s", FGR32Opnd, + FGR32Opnd, II_TRUNC>; +class RSQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"rsqrt.d", FGR32Opnd, + AFGR64Opnd, II_TRUNC>; +class RECIP_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"recip.s", FGR32Opnd, + FGR32Opnd, II_ROUND>; +class RECIP_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"recip.d", FGR32Opnd, FGR32Opnd, + II_ROUND>; +class ROUND_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.l.s", FGR64Opnd, + FGR32Opnd, II_ROUND>; +class ROUND_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.l.d", FGR64Opnd, + FGR64Opnd, II_ROUND>; +class ROUND_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.w.s", FGR32Opnd, + FGR32Opnd, II_ROUND>; +class ROUND_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.w.d", FGR64Opnd, + FGR64Opnd, II_ROUND>; + +class SEL_S_MMR6_DESC : COP1_SEL_DESC_BASE<"sel.s", FGR32Opnd>; +class SEL_D_MMR6_DESC : COP1_SEL_DESC_BASE<"sel.d", FGR64Opnd> { + // We must insert a SUBREG_TO_REG around $fd_in + bit usesCustomInserter = 1; +} + +class SELEQZ_S_MMR6_DESC : SELEQNEZ_DESC_BASE<"seleqz.s", FGR32Opnd>; +class SELEQZ_D_MMR6_DESC : SELEQNEZ_DESC_BASE<"seleqz.d", FGR64Opnd>; +class SELENZ_S_MMR6_DESC : SELEQNEZ_DESC_BASE<"selnez.s", FGR32Opnd>; +class SELENZ_D_MMR6_DESC : SELEQNEZ_DESC_BASE<"selnez.d", FGR64Opnd>; +class RINT_S_MMR6_DESC : CLASS_RINT_DESC_BASE<"rint.s", FGR32Opnd>; +class RINT_D_MMR6_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd>; +class CLASS_S_MMR6_DESC : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd>; +class CLASS_D_MMR6_DESC : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd>; + +class STORE_MMR6_DESC_BASE + : Store, MMR6Arch { + let DecoderMethod = "DecodeMemMMImm16"; +} +class SB_MMR6_DESC : STORE_MMR6_DESC_BASE<"sb", GPR32Opnd>; + +class STORE_EVA_MMR6_DESC_BASE + : MMR6Arch, MipsR6Inst { + dag OutOperandList = (outs); + dag InOperandList = (ins RO:$rt, mem_mm_9:$addr); + string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); + string DecoderMethod = "DecodeStoreEvaOpMM"; + bit mayStore = 1; +} +class SBE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"sbe", GPR32Opnd>; +class SCE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"sce", GPR32Opnd>; +class SH_MMR6_DESC : STORE_MMR6_DESC_BASE<"sh", GPR32Opnd>; +class SHE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"she", GPR32Opnd>; +class LOAD_WORD_EVA_MMR6_DESC_BASE : + MMR6Arch, MipsR6Inst { + dag OutOperandList = (outs RO:$rt); + dag InOperandList = (ins mem_mm_12:$addr); + string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); + string DecoderMethod = "DecodeMemMMImm9"; + bit mayLoad = 1; +} +class LLE_MMR6_DESC : LOAD_WORD_EVA_MMR6_DESC_BASE<"lle", GPR32Opnd>; +class LWE_MMR6_DESC : LOAD_WORD_EVA_MMR6_DESC_BASE<"lwe", GPR32Opnd>; +class ADDU16_MMR6_DESC : ArithRMM16<"addu16", GPRMM16Opnd, 1, II_ADDU, add>, + MMR6Arch<"addu16">; +class AND16_MMR6_DESC : LogicRMM16<"and16", GPRMM16Opnd, II_AND, and>, + MMR6Arch<"and16">; +class ANDI16_MMR6_DESC : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>, + MMR6Arch<"andi16">; +class NOT16_MMR6_DESC : NotMM16<"not16", GPRMM16Opnd>, MMR6Arch<"not16">; +class OR16_MMR6_DESC : LogicRMM16<"or16", GPRMM16Opnd, II_OR, or>, + MMR6Arch<"or16">; +class SLL16_MMR6_DESC : ShiftIMM16<"sll16", uimm3_shift, GPRMM16Opnd, II_SLL>, + MMR6Arch<"sll16">; +class SRL16_MMR6_DESC : ShiftIMM16<"srl16", uimm3_shift, GPRMM16Opnd, II_SRL>, + MMR6Arch<"srl16">; +class BREAK16_MMR6_DESC : BrkSdbbp16MM<"break16">, MMR6Arch<"srl16">, + MicroMipsR6Inst16; +class LI16_MMR6_DESC : LoadImmMM16<"li16", li_simm7, GPRMM16Opnd>, + MMR6Arch<"srl16">, MicroMipsR6Inst16, IsAsCheapAsAMove; +class MOVE16_MMR6_DESC : MoveMM16<"move16", GPR32Opnd>, MMR6Arch<"srl16">, + MicroMipsR6Inst16; +class SDBBP16_MMR6_DESC : BrkSdbbp16MM<"sdbbp16">, MMR6Arch<"sdbbp16">, + MicroMipsR6Inst16; +class SUBU16_MMR6_DESC : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>, + MMR6Arch<"sdbbp16">, MicroMipsR6Inst16; +class XOR16_MMR6_DESC : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>, + MMR6Arch<"sdbbp16">, MicroMipsR6Inst16; + +class LW_MMR6_DESC : MMR6Arch<"lw">, MipsR6Inst { + dag OutOperandList = (outs GPR32Opnd:$rt); + dag InOperandList = (ins mem:$addr); + string AsmString = "lw\t$rt, $addr"; + let DecoderMethod = "DecodeMemMMImm16"; + let canFoldAsLoad = 1; + let mayLoad = 1; + list Pattern = [(set GPR32Opnd:$rt, (load addrDefault:$addr))]; + InstrItinClass Itinerary = II_LW; +} + +class LUI_MMR6_DESC : IsAsCheapAsAMove, MMR6Arch<"lui">, MipsR6Inst{ + dag OutOperandList = (outs GPR32Opnd:$rt); + dag InOperandList = (ins uimm16:$imm16); + string AsmString = "lui\t$rt, $imm16"; + list Pattern = []; + bit hasSideEffects = 0; + bit isReMaterializable = 1; + InstrItinClass Itinerary = II_LUI; + Format Form = FrmI; +} + +class SYNC_MMR6_DESC : MMR6Arch<"sync">, MipsR6Inst { + dag OutOperandList = (outs); + dag InOperandList = (ins i32imm:$stype); + string AsmString = !strconcat("sync", "\t$stype"); + list Pattern = [(MipsSync imm:$stype)]; + InstrItinClass Itinerary = NoItinerary; + bit HasSideEffects = 1; +} + +class SYNCI_MMR6_DESC : SYNCI_FT<"synci"> { + let DecoderMethod = "DecodeSynciR6"; +} + +class RDPGPR_MMR6_DESC : MMR6Arch<"rdpgpr">, MipsR6Inst { + dag OutOperandList = (outs GPR32Opnd:$rt); + dag InOperandList = (ins GPR32Opnd:$rd); + string AsmString = !strconcat("rdpgpr", "\t$rt, $rd"); +} + +class SDBBP_MMR6_DESC : MipsR6Inst { + dag OutOperandList = (outs); + dag InOperandList = (ins uimm20:$code_); + string AsmString = !strconcat("sdbbp", "\t$code_"); + list Pattern = []; +} + +class LWM16_MMR6_DESC + : MicroMipsInst16<(outs reglist16:$rt), (ins mem_mm_4sp:$addr), + !strconcat("lwm16", "\t$rt, $addr"), [], + NoItinerary, FrmI>, + MMR6Arch<"lwm16">, MicroMipsR6Inst16 { + let DecoderMethod = "DecodeMemMMReglistImm4Lsl2"; + let mayLoad = 1; + InstrItinClass Itin = NoItinerary; + ComplexPattern Addr = addr; +} + +class SWM16_MMR6_DESC + : MicroMipsInst16<(outs), (ins reglist16:$rt, mem_mm_4sp:$addr), + !strconcat("swm16", "\t$rt, $addr"), [], + NoItinerary, FrmI>, + MMR6Arch<"swm16">, MicroMipsR6Inst16 { + let DecoderMethod = "DecodeMemMMReglistImm4Lsl2"; + let mayStore = 1; + InstrItinClass Itin = NoItinerary; + ComplexPattern Addr = addr; +} + +class SB16_MMR6_DESC_BASE + : MicroMipsInst16<(outs), (ins RTOpnd:$rt, MemOpnd:$addr), + !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI>, + MMR6Arch, MicroMipsR6Inst16 { + let DecoderMethod = "DecodeMemMMImm4"; + let mayStore = 1; +} +class SB16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sb16", GPRMM16OpndZero, GPRMM16Opnd, + truncstorei8, II_SB, mem_mm_4>; +class SH16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sh16", GPRMM16OpndZero, GPRMM16Opnd, + truncstorei16, II_SH, mem_mm_4_lsl1>; +class SW16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sw16", GPRMM16OpndZero, GPRMM16Opnd, + store, II_SW, mem_mm_4_lsl2>; + +class SWSP_MMR6_DESC + : MicroMipsInst16<(outs), (ins GPR32Opnd:$rt, mem_mm_sp_imm5_lsl2:$offset), + !strconcat("sw", "\t$rt, $offset"), [], II_SW, FrmI>, + MMR6Arch<"sw">, MicroMipsR6Inst16 { + let DecoderMethod = "DecodeMemMMSPImm5Lsl2"; + let mayStore = 1; +} + //===----------------------------------------------------------------------===// // // Instruction Definitions // //===----------------------------------------------------------------------===// -let DecoderNamespace = "MicroMips32r6" in { +let DecoderNamespace = "MicroMipsR6" in { def ADD_MMR6 : StdMMR6Rel, ADD_MMR6_DESC, ADD_MMR6_ENC, ISA_MICROMIPS32R6; def ADDIU_MMR6 : StdMMR6Rel, ADDIU_MMR6_DESC, ADDIU_MMR6_ENC, ISA_MICROMIPS32R6; def ADDU_MMR6 : StdMMR6Rel, ADDU_MMR6_DESC, ADDU_MMR6_ENC, ISA_MICROMIPS32R6; @@ -298,6 +945,11 @@ def ALIGN_MMR6 : R6MMR6Rel, ALIGN_MMR6_ENC, ALIGN_MMR6_DESC, ISA_MICROMIPS32R6; def AUI_MMR6 : R6MMR6Rel, AUI_MMR6_ENC, AUI_MMR6_DESC, ISA_MICROMIPS32R6; def BALC_MMR6 : R6MMR6Rel, BALC_MMR6_ENC, BALC_MMR6_DESC, ISA_MICROMIPS32R6; def BC_MMR6 : R6MMR6Rel, BC_MMR6_ENC, BC_MMR6_DESC, ISA_MICROMIPS32R6; +def BC16_MMR6 : StdMMR6Rel, BC16_MMR6_DESC, BC16_MMR6_ENC, ISA_MICROMIPS32R6; +def BEQZC16_MMR6 : StdMMR6Rel, BEQZC16_MMR6_DESC, BEQZC16_MMR6_ENC, + ISA_MICROMIPS32R6; +def BNEZC16_MMR6 : StdMMR6Rel, BNEZC16_MMR6_DESC, BNEZC16_MMR6_ENC, + ISA_MICROMIPS32R6; def BITSWAP_MMR6 : R6MMR6Rel, BITSWAP_MMR6_ENC, BITSWAP_MMR6_DESC, ISA_MICROMIPS32R6; def BEQZALC_MMR6 : R6MMR6Rel, BEQZALC_MMR6_ENC, BEQZALC_MMR6_DESC, @@ -320,13 +972,21 @@ def DIV_MMR6 : R6MMR6Rel, DIV_MMR6_DESC, DIV_MMR6_ENC, ISA_MICROMIPS32R6; def DIVU_MMR6 : R6MMR6Rel, DIVU_MMR6_DESC, DIVU_MMR6_ENC, ISA_MICROMIPS32R6; def EHB_MMR6 : StdMMR6Rel, EHB_MMR6_DESC, EHB_MMR6_ENC, ISA_MICROMIPS32R6; def EI_MMR6 : StdMMR6Rel, EI_MMR6_DESC, EI_MMR6_ENC, ISA_MICROMIPS32R6; -def ERET_MMR6 : R6MMR6Rel, ERET_MMR6_DESC, ERET_MMR6_ENC, ISA_MICROMIPS32R6; +def DI_MMR6 : StdMMR6Rel, DI_MMR6_DESC, DI_MMR6_ENC, ISA_MICROMIPS32R6; +def ERET_MMR6 : StdMMR6Rel, ERET_MMR6_DESC, ERET_MMR6_ENC, ISA_MICROMIPS32R6; +def DERET_MMR6 : StdMMR6Rel, DERET_MMR6_DESC, DERET_MMR6_ENC, ISA_MICROMIPS32R6; def ERETNC_MMR6 : R6MMR6Rel, ERETNC_MMR6_DESC, ERETNC_MMR6_ENC, ISA_MICROMIPS32R6; +def JALRC16_MMR6 : R6MMR6Rel, JALRC16_MMR6_DESC, JALRC16_MMR6_ENC, + ISA_MICROMIPS32R6; def JIALC_MMR6 : R6MMR6Rel, JIALC_MMR6_ENC, JIALC_MMR6_DESC, ISA_MICROMIPS32R6; def JIC_MMR6 : R6MMR6Rel, JIC_MMR6_ENC, JIC_MMR6_DESC, ISA_MICROMIPS32R6; +def JRC16_MMR6 : R6MMR6Rel, JRC16_MMR6_DESC, JRC16_MMR6_ENC, ISA_MICROMIPS32R6; +def JRCADDIUSP_MMR6 : R6MMR6Rel, JRCADDIUSP_MMR6_DESC, JRCADDIUSP_MMR6_ENC, + ISA_MICROMIPS32R6; def LSA_MMR6 : R6MMR6Rel, LSA_MMR6_ENC, LSA_MMR6_DESC, ISA_MICROMIPS32R6; def LWPC_MMR6 : R6MMR6Rel, LWPC_MMR6_ENC, LWPC_MMR6_DESC, ISA_MICROMIPS32R6; +def LWM16_MMR6 : StdMMR6Rel, LWM16_MMR6_DESC, LWM16_MMR6_ENC, ISA_MICROMIPS32R6; def MOD_MMR6 : R6MMR6Rel, MOD_MMR6_DESC, MOD_MMR6_ENC, ISA_MICROMIPS32R6; def MODU_MMR6 : R6MMR6Rel, MODU_MMR6_DESC, MODU_MMR6_ENC, ISA_MICROMIPS32R6; def MUL_MMR6 : R6MMR6Rel, MUL_MMR6_DESC, MUL_MMR6_ENC, ISA_MICROMIPS32R6; @@ -337,17 +997,211 @@ def NOR_MMR6 : StdMMR6Rel, NOR_MMR6_DESC, NOR_MMR6_ENC, ISA_MICROMIPS32R6; def OR_MMR6 : StdMMR6Rel, OR_MMR6_DESC, OR_MMR6_ENC, ISA_MICROMIPS32R6; def ORI_MMR6 : StdMMR6Rel, ORI_MMR6_DESC, ORI_MMR6_ENC, ISA_MICROMIPS32R6; def PREF_MMR6 : R6MMR6Rel, PREF_MMR6_ENC, PREF_MMR6_DESC, ISA_MICROMIPS32R6; +def SB16_MMR6 : StdMMR6Rel, SB16_MMR6_DESC, SB16_MMR6_ENC, ISA_MICROMIPS32R6; def SEB_MMR6 : StdMMR6Rel, SEB_MMR6_DESC, SEB_MMR6_ENC, ISA_MICROMIPS32R6; def SEH_MMR6 : StdMMR6Rel, SEH_MMR6_DESC, SEH_MMR6_ENC, ISA_MICROMIPS32R6; def SELEQZ_MMR6 : R6MMR6Rel, SELEQZ_MMR6_ENC, SELEQZ_MMR6_DESC, ISA_MICROMIPS32R6; def SELNEZ_MMR6 : R6MMR6Rel, SELNEZ_MMR6_ENC, SELNEZ_MMR6_DESC, ISA_MICROMIPS32R6; +def SH16_MMR6 : StdMMR6Rel, SH16_MMR6_DESC, SH16_MMR6_ENC, ISA_MICROMIPS32R6; def SLL_MMR6 : StdMMR6Rel, SLL_MMR6_DESC, SLL_MMR6_ENC, ISA_MICROMIPS32R6; def SUB_MMR6 : StdMMR6Rel, SUB_MMR6_DESC, SUB_MMR6_ENC, ISA_MICROMIPS32R6; def SUBU_MMR6 : StdMMR6Rel, SUBU_MMR6_DESC, SUBU_MMR6_ENC, ISA_MICROMIPS32R6; +def SW16_MMR6 : StdMMR6Rel, SW16_MMR6_DESC, SW16_MMR6_ENC, ISA_MICROMIPS32R6; +def SWM16_MMR6 : StdMMR6Rel, SWM16_MMR6_DESC, SWM16_MMR6_ENC, ISA_MICROMIPS32R6; +def SWSP_MMR6 : StdMMR6Rel, SWSP_MMR6_DESC, SWSP_MMR6_ENC, ISA_MICROMIPS32R6; +def PREFE_MMR6 : StdMMR6Rel, PREFE_MMR6_ENC, PREFE_MMR6_DESC, ISA_MICROMIPS32R6; +def CACHEE_MMR6 : StdMMR6Rel, CACHEE_MMR6_ENC, CACHEE_MMR6_DESC, + ISA_MICROMIPS32R6; +def WRPGPR_MMR6 : StdMMR6Rel, WRPGPR_MMR6_ENC, WRPGPR_MMR6_DESC, + ISA_MICROMIPS32R6; +def WSBH_MMR6 : StdMMR6Rel, WSBH_MMR6_ENC, WSBH_MMR6_DESC, ISA_MICROMIPS32R6; +def LB_MMR6 : R6MMR6Rel, LB_MMR6_ENC, LB_MMR6_DESC, ISA_MICROMIPS32R6; +def LBU_MMR6 : R6MMR6Rel, LBU_MMR6_ENC, LBU_MMR6_DESC, ISA_MICROMIPS32R6; +def LBE_MMR6 : R6MMR6Rel, LBE_MMR6_ENC, LBE_MMR6_DESC, ISA_MICROMIPS32R6; +def LBUE_MMR6 : R6MMR6Rel, LBUE_MMR6_ENC, LBUE_MMR6_DESC, ISA_MICROMIPS32R6; +def PAUSE_MMR6 : StdMMR6Rel, PAUSE_MMR6_DESC, PAUSE_MMR6_ENC, ISA_MICROMIPS32R6; +def RDHWR_MMR6 : R6MMR6Rel, RDHWR_MMR6_DESC, RDHWR_MMR6_ENC, ISA_MICROMIPS32R6; +def WAIT_MMR6 : StdMMR6Rel, WAIT_MMR6_DESC, WAIT_MMR6_ENC, ISA_MICROMIPS32R6; +def SSNOP_MMR6 : StdMMR6Rel, SSNOP_MMR6_DESC, SSNOP_MMR6_ENC, ISA_MICROMIPS32R6; +def SYNC_MMR6 : StdMMR6Rel, SYNC_MMR6_DESC, SYNC_MMR6_ENC, ISA_MICROMIPS32R6; +def SYNCI_MMR6 : StdMMR6Rel, SYNCI_MMR6_DESC, SYNCI_MMR6_ENC, ISA_MICROMIPS32R6; +def RDPGPR_MMR6 : R6MMR6Rel, RDPGPR_MMR6_DESC, RDPGPR_MMR6_ENC, + ISA_MICROMIPS32R6; +def SDBBP_MMR6 : R6MMR6Rel, SDBBP_MMR6_DESC, SDBBP_MMR6_ENC, ISA_MICROMIPS32R6; def XOR_MMR6 : StdMMR6Rel, XOR_MMR6_DESC, XOR_MMR6_ENC, ISA_MICROMIPS32R6; def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6; +let DecoderMethod = "DecodeMemMMImm16" in { + def SW_MMR6 : StdMMR6Rel, SW_MMR6_DESC, SW_MMR6_ENC, ISA_MICROMIPS32R6; +} +let DecoderMethod = "DecodeMemMMImm9" in { + def SWE_MMR6 : StdMMR6Rel, SWE_MMR6_DESC, SWE_MMR6_ENC, ISA_MICROMIPS32R6; +} +/// Floating Point Instructions +def FADD_S_MMR6 : StdMMR6Rel, FADD_S_MMR6_ENC, FADD_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def FADD_D_MMR6 : StdMMR6Rel, FADD_D_MMR6_ENC, FADD_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def FSUB_S_MMR6 : StdMMR6Rel, FSUB_S_MMR6_ENC, FSUB_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def FSUB_D_MMR6 : StdMMR6Rel, FSUB_D_MMR6_ENC, FSUB_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def FMUL_S_MMR6 : StdMMR6Rel, FMUL_S_MMR6_ENC, FMUL_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def FMUL_D_MMR6 : StdMMR6Rel, FMUL_D_MMR6_ENC, FMUL_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def FDIV_S_MMR6 : StdMMR6Rel, FDIV_S_MMR6_ENC, FDIV_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def FDIV_D_MMR6 : StdMMR6Rel, FDIV_D_MMR6_ENC, FDIV_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def MADDF_S_MMR6 : R6MMR6Rel, MADDF_S_MMR6_ENC, MADDF_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def MADDF_D_MMR6 : R6MMR6Rel, MADDF_D_MMR6_ENC, MADDF_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def MSUBF_S_MMR6 : R6MMR6Rel, MSUBF_S_MMR6_ENC, MSUBF_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def MSUBF_D_MMR6 : R6MMR6Rel, MSUBF_D_MMR6_ENC, MSUBF_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def FMOV_S_MMR6 : StdMMR6Rel, FMOV_S_MMR6_ENC, FMOV_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def FMOV_D_MMR6 : StdMMR6Rel, FMOV_D_MMR6_ENC, FMOV_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def FNEG_S_MMR6 : StdMMR6Rel, FNEG_S_MMR6_ENC, FNEG_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def FNEG_D_MMR6 : StdMMR6Rel, FNEG_D_MMR6_ENC, FNEG_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def MAX_S_MMR6 : R6MMR6Rel, MAX_S_MMR6_ENC, MAX_S_MMR6_DESC, ISA_MICROMIPS32R6; +def MAX_D_MMR6 : R6MMR6Rel, MAX_D_MMR6_ENC, MAX_D_MMR6_DESC, ISA_MICROMIPS32R6; +def MIN_S_MMR6 : R6MMR6Rel, MIN_S_MMR6_ENC, MIN_S_MMR6_DESC, ISA_MICROMIPS32R6; +def MIN_D_MMR6 : R6MMR6Rel, MIN_D_MMR6_ENC, MIN_D_MMR6_DESC, ISA_MICROMIPS32R6; +def MAXA_S_MMR6 : R6MMR6Rel, MAXA_S_MMR6_ENC, MAXA_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def MAXA_D_MMR6 : R6MMR6Rel, MAXA_D_MMR6_ENC, MAXA_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def MINA_S_MMR6 : R6MMR6Rel, MINA_S_MMR6_ENC, MINA_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def MINA_D_MMR6 : R6MMR6Rel, MINA_D_MMR6_ENC, MINA_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def CVT_L_S_MMR6 : StdMMR6Rel, CVT_L_S_MMR6_ENC, CVT_L_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def CVT_L_D_MMR6 : StdMMR6Rel, CVT_L_D_MMR6_ENC, CVT_L_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def CVT_W_S_MMR6 : StdMMR6Rel, CVT_W_S_MMR6_ENC, CVT_W_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def CVT_W_D_MMR6 : StdMMR6Rel, CVT_W_D_MMR6_ENC, CVT_W_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def CVT_D_S_MMR6 : StdMMR6Rel, CVT_D_S_MMR6_ENC, CVT_D_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def CVT_D_W_MMR6 : StdMMR6Rel, CVT_D_W_MMR6_ENC, CVT_D_W_MMR6_DESC, + ISA_MICROMIPS32R6; +def CVT_D_L_MMR6 : StdMMR6Rel, CVT_D_L_MMR6_ENC, CVT_D_L_MMR6_DESC, + ISA_MICROMIPS32R6; +def CVT_S_D_MMR6 : StdMMR6Rel, CVT_S_D_MMR6_ENC, CVT_S_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def CVT_S_W_MMR6 : StdMMR6Rel, CVT_S_W_MMR6_ENC, CVT_S_W_MMR6_DESC, + ISA_MICROMIPS32R6; +def CVT_S_L_MMR6 : StdMMR6Rel, CVT_S_L_MMR6_ENC, CVT_S_L_MMR6_DESC, + ISA_MICROMIPS32R6; +defm S_MMR6 : CMP_CC_MMR6<0b000101, "s", FGR32Opnd>; +defm D_MMR6 : CMP_CC_MMR6<0b010101, "d", FGR64Opnd>; +def ABS_S_MMR6 : StdMMR6Rel, ABS_S_MMR6_ENC, ABS_S_MMR6_DESC, ISA_MICROMIPS32R6; +def ABS_D_MMR6 : StdMMR6Rel, ABS_D_MMR6_ENC, ABS_D_MMR6_DESC, ISA_MICROMIPS32R6; +def FLOOR_L_S_MMR6 : StdMMR6Rel, FLOOR_L_S_MMR6_ENC, FLOOR_L_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def FLOOR_L_D_MMR6 : StdMMR6Rel, FLOOR_L_D_MMR6_ENC, FLOOR_L_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def FLOOR_W_S_MMR6 : StdMMR6Rel, FLOOR_W_S_MMR6_ENC, FLOOR_W_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def FLOOR_W_D_MMR6 : StdMMR6Rel, FLOOR_W_D_MMR6_ENC, FLOOR_W_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def CEIL_L_S_MMR6 : StdMMR6Rel, CEIL_L_S_MMR6_ENC, CEIL_L_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def CEIL_L_D_MMR6 : StdMMR6Rel, CEIL_L_D_MMR6_ENC, CEIL_L_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def CEIL_W_S_MMR6 : StdMMR6Rel, CEIL_W_S_MMR6_ENC, CEIL_W_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def CEIL_W_D_MMR6 : StdMMR6Rel, CEIL_W_D_MMR6_ENC, CEIL_W_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def TRUNC_L_S_MMR6 : StdMMR6Rel, TRUNC_L_S_MMR6_ENC, TRUNC_L_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def TRUNC_L_D_MMR6 : StdMMR6Rel, TRUNC_L_D_MMR6_ENC, TRUNC_L_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def TRUNC_W_S_MMR6 : StdMMR6Rel, TRUNC_W_S_MMR6_ENC, TRUNC_W_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def TRUNC_W_D_MMR6 : StdMMR6Rel, TRUNC_W_D_MMR6_ENC, TRUNC_W_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def SQRT_S_MMR6 : StdMMR6Rel, SQRT_S_MMR6_ENC, SQRT_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def SQRT_D_MMR6 : StdMMR6Rel, SQRT_D_MMR6_ENC, SQRT_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def RSQRT_S_MMR6 : StdMMR6Rel, RSQRT_S_MMR6_ENC, RSQRT_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def RSQRT_D_MMR6 : StdMMR6Rel, RSQRT_D_MMR6_ENC, RSQRT_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def SB_MMR6 : StdMMR6Rel, SB_MMR6_DESC, SB_MMR6_ENC, ISA_MICROMIPS32R6; +def SBE_MMR6 : StdMMR6Rel, SBE_MMR6_DESC, SBE_MMR6_ENC, ISA_MICROMIPS32R6; +def SCE_MMR6 : StdMMR6Rel, SCE_MMR6_DESC, SCE_MMR6_ENC, ISA_MICROMIPS32R6; +def SH_MMR6 : StdMMR6Rel, SH_MMR6_DESC, SH_MMR6_ENC, ISA_MICROMIPS32R6; +def SHE_MMR6 : StdMMR6Rel, SHE_MMR6_DESC, SHE_MMR6_ENC, ISA_MICROMIPS32R6; +def LLE_MMR6 : StdMMR6Rel, LLE_MMR6_DESC, LLE_MMR6_ENC, ISA_MICROMIPS32R6; +def LWE_MMR6 : StdMMR6Rel, LWE_MMR6_DESC, LWE_MMR6_ENC, ISA_MICROMIPS32R6; +def LW_MMR6 : StdMMR6Rel, LW_MMR6_DESC, LW_MMR6_ENC, ISA_MICROMIPS32R6; +def LUI_MMR6 : R6MMR6Rel, LUI_MMR6_DESC, LUI_MMR6_ENC, ISA_MICROMIPS32R6; +def ADDU16_MMR6 : StdMMR6Rel, ADDU16_MMR6_DESC, ADDU16_MMR6_ENC, + ISA_MICROMIPS32R6; +def AND16_MMR6 : StdMMR6Rel, AND16_MMR6_DESC, AND16_MMR6_ENC, + ISA_MICROMIPS32R6; +def ANDI16_MMR6 : StdMMR6Rel, ANDI16_MMR6_DESC, ANDI16_MMR6_ENC, + ISA_MICROMIPS32R6; +def NOT16_MMR6 : StdMMR6Rel, NOT16_MMR6_DESC, NOT16_MMR6_ENC, + ISA_MICROMIPS32R6; +def OR16_MMR6 : StdMMR6Rel, OR16_MMR6_DESC, OR16_MMR6_ENC, + ISA_MICROMIPS32R6; +def SLL16_MMR6 : StdMMR6Rel, SLL16_MMR6_DESC, SLL16_MMR6_ENC, + ISA_MICROMIPS32R6; +def SRL16_MMR6 : StdMMR6Rel, SRL16_MMR6_DESC, SRL16_MMR6_ENC, + ISA_MICROMIPS32R6; +def BREAK16_MMR6 : StdMMR6Rel, BREAK16_MMR6_DESC, BREAK16_MMR6_ENC, + ISA_MICROMIPS32R6; +def LI16_MMR6 : StdMMR6Rel, LI16_MMR6_DESC, LI16_MMR6_ENC, + ISA_MICROMIPS32R6; +def MOVE16_MMR6 : StdMMR6Rel, MOVE16_MMR6_DESC, MOVE16_MMR6_ENC, + ISA_MICROMIPS32R6; +def SDBBP16_MMR6 : StdMMR6Rel, SDBBP16_MMR6_DESC, SDBBP16_MMR6_ENC, + ISA_MICROMIPS32R6; +def SUBU16_MMR6 : StdMMR6Rel, SUBU16_MMR6_DESC, SUBU16_MMR6_ENC, + ISA_MICROMIPS32R6; +def XOR16_MMR6 : StdMMR6Rel, XOR16_MMR6_DESC, XOR16_MMR6_ENC, + ISA_MICROMIPS32R6; +def RECIP_S_MMR6 : StdMMR6Rel, RECIP_S_MMR6_ENC, RECIP_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def RECIP_D_MMR6 : StdMMR6Rel, RECIP_D_MMR6_ENC, RECIP_D_MMR6_DESC, ISA_MICROMIPS32R6; +def RINT_S_MMR6 : StdMMR6Rel, RINT_S_MMR6_ENC, RINT_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def RINT_D_MMR6 : StdMMR6Rel, RINT_D_MMR6_ENC, RINT_D_MMR6_DESC, ISA_MICROMIPS32R6; +def ROUND_L_S_MMR6 : StdMMR6Rel, ROUND_L_S_MMR6_ENC, ROUND_L_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def ROUND_L_D_MMR6 : StdMMR6Rel, ROUND_L_D_MMR6_ENC, ROUND_L_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def ROUND_W_S_MMR6 : StdMMR6Rel, ROUND_W_S_MMR6_ENC, ROUND_W_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def ROUND_W_D_MMR6 : StdMMR6Rel, ROUND_W_D_MMR6_ENC, ROUND_W_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def SEL_S_MMR6 : StdMMR6Rel, SEL_S_MMR6_ENC, SEL_S_MMR6_DESC, ISA_MICROMIPS32R6; +def SEL_D_MMR6 : StdMMR6Rel, SEL_D_MMR6_ENC, SEL_D_MMR6_DESC, ISA_MICROMIPS32R6; +def SELEQZ_S_MMR6 : StdMMR6Rel, SELEQZ_S_MMR6_ENC, SELEQZ_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def SELEQZ_D_MMR6 : StdMMR6Rel, SELEQZ_D_MMR6_ENC, SELEQZ_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def SELENZ_S_MMR6 : StdMMR6Rel, SELENZ_S_MMR6_ENC, SELENZ_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def SELENZ_D_MMR6 : StdMMR6Rel, SELENZ_D_MMR6_ENC, SELENZ_D_MMR6_DESC, + ISA_MICROMIPS32R6; +def CLASS_S_MMR6 : StdMMR6Rel, CLASS_S_MMR6_ENC, CLASS_S_MMR6_DESC, + ISA_MICROMIPS32R6; +def CLASS_D_MMR6 : StdMMR6Rel, CLASS_D_MMR6_ENC, CLASS_D_MMR6_DESC, + ISA_MICROMIPS32R6; } //===----------------------------------------------------------------------===// @@ -357,4 +1211,23 @@ def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6; //===----------------------------------------------------------------------===// def : MipsInstAlias<"ei", (EI_MMR6 ZERO), 1>, ISA_MICROMIPS32R6; +def : MipsInstAlias<"di", (DI_MMR6 ZERO), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"nop", (SLL_MMR6 ZERO, ZERO, 0), 1>, ISA_MICROMIPS32R6; +def B_MMR6_Pseudo : MipsAsmPseudoInst<(outs), (ins brtarget_mm:$offset), + !strconcat("b", "\t$offset")> { + string DecoderNamespace = "MicroMipsR6"; +} +def : MipsInstAlias<"sync", (SYNC_MMR6 0), 1>, ISA_MICROMIPS32R6; +def : MipsInstAlias<"sdbbp", (SDBBP_MMR6 0), 1>, ISA_MICROMIPS32R6; +def : MipsInstAlias<"rdhwr $rt, $rs", + (RDHWR_MMR6 GPR32Opnd:$rt, HWRegsOpnd:$rs, 0), 1>, + ISA_MICROMIPS32R6; + +//===----------------------------------------------------------------------===// +// +// MicroMips arbitrary patterns that map to one or more instructions +// +//===----------------------------------------------------------------------===// + +def : MipsPat<(store GPRMM16:$src, addrimm4lsl2:$addr), + (SW16_MMR6 GPRMM16:$src, addrimm4lsl2:$addr)>, ISA_MICROMIPS32R6; diff --git a/lib/Target/Mips/MicroMips64r6InstrFormats.td b/lib/Target/Mips/MicroMips64r6InstrFormats.td new file mode 100644 index 000000000000..da305a2d508a --- /dev/null +++ b/lib/Target/Mips/MicroMips64r6InstrFormats.td @@ -0,0 +1,86 @@ +//=- MicroMips64r6InstrFormats.td - Instruction Formats -*- tablegen -* -=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes microMIPS64r6 instruction formats. +// +//===----------------------------------------------------------------------===// + +class DAUI_FM_MMR6 { + bits<5> rt; + bits<5> rs; + bits<16> imm; + + bits<32> Inst; + + let Inst{31-26} = 0b111100; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-0} = imm; +} + +class POOL32I_ADD_IMM_FM_MMR6 funct> { + bits<5> rs; + bits<16> imm; + + bits<32> Inst; + + let Inst{31-26} = 0b010000; + let Inst{25-21} = funct; + let Inst{20-16} = rs; + let Inst{15-0} = imm; +} + +class POOL32S_EXTBITS_FM_MMR6 funct> { + bits<5> rt; + bits<5> rs; + bits<5> size; + bits<5> pos; + + bits<32> Inst; + + let Inst{31-26} = 0b010110; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-11} = size; + let Inst{10-6} = pos; + let Inst{5-0} = funct; +} + +class POOL32S_DALIGN_FM_MMR6 { + bits<5> rs; + bits<5> rt; + bits<5> rd; + bits<3> bp; + + bits<32> Inst; + + let Inst{31-26} = 0b010110; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-8} = bp; + let Inst{7-6} = 0b00; + let Inst{5-0} = 0b011100; +} + +class POOL32A_DIVMOD_FM_MMR6 funct> + : MMR6Arch { + bits<5> rd; + bits<5> rs; + bits<5> rt; + + bits<32> Inst; + + let Inst{31-26} = 0b010110; + let Inst{25-21} = rd; + let Inst{20-16} = rs; + let Inst{15-11} = rt; + let Inst{10-9} = 0b00; + let Inst{8-0} = funct; +} diff --git a/lib/Target/Mips/MicroMips64r6InstrInfo.td b/lib/Target/Mips/MicroMips64r6InstrInfo.td new file mode 100644 index 000000000000..ec1aef86a942 --- /dev/null +++ b/lib/Target/Mips/MicroMips64r6InstrInfo.td @@ -0,0 +1,119 @@ +//=- MicroMips64r6InstrInfo.td - Instruction Information -*- tablegen -*- -=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes MicroMips64r6 instructions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// Instruction Encodings +// +//===----------------------------------------------------------------------===// + +class DAUI_MMR6_ENC : DAUI_FM_MMR6; +class DAHI_MMR6_ENC : POOL32I_ADD_IMM_FM_MMR6<0b10001>; +class DATI_MMR6_ENC : POOL32I_ADD_IMM_FM_MMR6<0b10000>; +class DEXT_MMR6_ENC : POOL32S_EXTBITS_FM_MMR6<0b101100>; +class DEXTM_MMR6_ENC : POOL32S_EXTBITS_FM_MMR6<0b100100>; +class DEXTU_MMR6_ENC : POOL32S_EXTBITS_FM_MMR6<0b010100>; +class DALIGN_MMR6_ENC : POOL32S_DALIGN_FM_MMR6; +class DDIV_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"ddiv", 0b100011000>; +class DMOD_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"dmod", 0b101011000>; +class DDIVU_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"ddivu", 0b110011000>; +class DMODU_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"dmodu", 0b111011000>; + +//===----------------------------------------------------------------------===// +// +// Instruction Descriptions +// +//===----------------------------------------------------------------------===// + +class DAUI_MMR6_DESC_BASE + : MMR6Arch, MipsR6Inst { + dag OutOperandList = (outs GPROpnd:$rt); + dag InOperandList = (ins GPROpnd:$rs, simm16:$imm); + string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $imm"); + list Pattern = []; +} +class DAUI_MMR6_DESC : DAUI_MMR6_DESC_BASE<"daui", GPR64Opnd>; + +class DAHI_DATI_DESC_BASE + : MMR6Arch, MipsR6Inst { + dag OutOperandList = (outs GPROpnd:$rs); + dag InOperandList = (ins GPROpnd:$rt, simm16:$imm); + string AsmString = !strconcat(instr_asm, "\t$rt, $imm"); + string Constraints = "$rs = $rt"; +} +class DAHI_MMR6_DESC : DAHI_DATI_DESC_BASE<"dahi", GPR64Opnd>; +class DATI_MMR6_DESC : DAHI_DATI_DESC_BASE<"dati", GPR64Opnd>; + +class EXTBITS_DESC_BASE + : MMR6Arch, MipsR6Inst { + dag OutOperandList = (outs RO:$rt); + dag InOperandList = (ins RO:$rs, PosOpnd:$pos, SizeOpnd:$size); + string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $pos, $size"); + list Pattern = [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))]; + InstrItinClass Itinerary = II_EXT; + Format Form = FrmR; + string BaseOpcode = instr_asm; +} +// TODO: Add 'pos + size' constraint check to dext* instructions +// DEXT: 0 < pos + size <= 63 +// DEXTM, DEXTU: 32 < pos + size <= 64 +class DEXT_MMR6_DESC : EXTBITS_DESC_BASE<"dext", GPR64Opnd, uimm5, + uimm5_plus1, MipsExt>; +class DEXTM_MMR6_DESC : EXTBITS_DESC_BASE<"dextm", GPR64Opnd, uimm5, + uimm5_plus33, MipsExt>; +class DEXTU_MMR6_DESC : EXTBITS_DESC_BASE<"dextu", GPR64Opnd, uimm5_plus32, + uimm5_plus1, MipsExt>; + +class DALIGN_DESC_BASE : MMR6Arch, MipsR6Inst { + dag OutOperandList = (outs GPROpnd:$rd); + dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$bp); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $bp"); + list Pattern = []; +} + +class DALIGN_MMR6_DESC : DALIGN_DESC_BASE<"dalign", GPR64Opnd, uimm3>; + +class DDIV_MM64R6_DESC : ArithLogicR<"ddiv", GPR32Opnd>; +class DMOD_MM64R6_DESC : ArithLogicR<"dmod", GPR32Opnd>; +class DDIVU_MM64R6_DESC : ArithLogicR<"ddivu", GPR32Opnd>; +class DMODU_MM64R6_DESC : ArithLogicR<"dmodu", GPR32Opnd>; + +//===----------------------------------------------------------------------===// +// +// Instruction Definitions +// +//===----------------------------------------------------------------------===// + +let DecoderNamespace = "MicroMipsR6" in { + def DAUI_MM64R6 : StdMMR6Rel, DAUI_MMR6_DESC, DAUI_MMR6_ENC, ISA_MICROMIPS64R6; + def DAHI_MM64R6 : StdMMR6Rel, DAHI_MMR6_DESC, DAHI_MMR6_ENC, ISA_MICROMIPS64R6; + def DATI_MM64R6 : StdMMR6Rel, DATI_MMR6_DESC, DATI_MMR6_ENC, ISA_MICROMIPS64R6; + def DEXT_MM64R6 : StdMMR6Rel, DEXT_MMR6_DESC, DEXT_MMR6_ENC, + ISA_MICROMIPS64R6; + def DEXTM_MM64R6 : StdMMR6Rel, DEXTM_MMR6_DESC, DEXTM_MMR6_ENC, + ISA_MICROMIPS64R6; + def DEXTU_MM64R6 : StdMMR6Rel, DEXTU_MMR6_DESC, DEXTU_MMR6_ENC, + ISA_MICROMIPS64R6; + def DALIGN_MM64R6 : StdMMR6Rel, DALIGN_MMR6_DESC, DALIGN_MMR6_ENC, + ISA_MICROMIPS64R6; + def DDIV_MM64R6 : R6MMR6Rel, DDIV_MM64R6_DESC, DDIV_MM64R6_ENC, + ISA_MICROMIPS64R6; + def DMOD_MM64R6 : R6MMR6Rel, DMOD_MM64R6_DESC, DMOD_MM64R6_ENC, + ISA_MICROMIPS64R6; + def DDIVU_MM64R6 : R6MMR6Rel, DDIVU_MM64R6_DESC, DDIVU_MM64R6_ENC, + ISA_MICROMIPS64R6; + def DMODU_MM64R6 : R6MMR6Rel, DMODU_MM64R6_DESC, DMODU_MM64R6_ENC, + ISA_MICROMIPS64R6; +} diff --git a/lib/Target/Mips/MicroMipsDSPInstrFormats.td b/lib/Target/Mips/MicroMipsDSPInstrFormats.td new file mode 100644 index 000000000000..f11c09abfc36 --- /dev/null +++ b/lib/Target/Mips/MicroMipsDSPInstrFormats.td @@ -0,0 +1,244 @@ +//===-- MicroMipsDSPInstrFormats.td - Instruction Formats --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +class MMDSPInst + : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>, PredicateControl { + let InsnPredicates = [HasDSP]; + let AdditionalPredicates = [InMicroMips]; + string BaseOpcode = opstr; + string Arch = "mmdsp"; + let DecoderNamespace = "MicroMips"; +} + +class MMDSPInstAlias + : InstAlias, PredicateControl { + let InsnPredicates = [HasDSP]; + let AdditionalPredicates = [InMicroMips]; +} + +class POOL32A_3R_FMT op> : MMDSPInst { + bits<5> rd; + bits<5> rs; + bits<5> rt; + + let Inst{31-26} = 0b000000; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-11} = rd; + let Inst{10-0} = op; +} + +class POOL32A_2R_FMT op> : MMDSPInst { + bits<5> rt; + bits<5> rs; + + let Inst{31-26} = 0b000000; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-6} = op; + let Inst{5-0} = 0b111100; +} + +class POOL32A_2RAC_FMT op> : MMDSPInst { + bits<5> rt; + bits<5> rs; + bits<2> ac; + + let Inst{31-26} = 0b000000; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-14} = ac; + let Inst{13-6} = op; + let Inst{5-0} = 0b111100; +} + +class POOL32A_3RB0_FMT op> : MMDSPInst { + bits<5> rd; + bits<5> rs; + bits<5> rt; + + let Inst{31-26} = 0b000000; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-11} = rd; + let Inst{10} = 0b0; + let Inst{9-0} = op; +} + +class POOL32A_2RSA4_FMT op> : MMDSPInst { + bits<5> rt; + bits<5> rs; + bits<4> sa; + + let Inst{31-26} = 0b000000; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-12} = sa; + let Inst{11-0} = op; +} + +class POOL32A_2RSA3_FMT op> : MMDSPInst { + bits<5> rt; + bits<5> rs; + bits<3> sa; + + let Inst{31-26} = 0b000000; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-13} = sa; + let Inst{12-6} = op; + let Inst{5-0} = 0b111100; +} + +class POOL32A_2RSA5B0_FMT op> : MMDSPInst { + bits<5> rt; + bits<5> rs; + bits<5> sa; + + let Inst{31-26} = 0b000000; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-11} = sa; + let Inst{10} = 0b0; + let Inst{9-0} = op; +} + +class POOL32A_2RSA4B0_FMT op> : MMDSPInst { + bits<5> rt; + bits<5> rs; + bits<4> sa; + + let Inst{31-26} = 0b000000; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-12} = sa; + let Inst{11} = 0b0; + let Inst{10-0} = op; +} + +class POOL32A_2RSA4OP6_FMT op> : MMDSPInst { + bits<5> rt; + bits<5> rs; + bits<4> sa; + + let Inst{31-26} = 0b000000; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-12} = sa; + let Inst{11-6} = op; + let Inst{5-0} = 0b111100; +} + +class POOL32A_1RIMM5AC_FMT funct> : MMDSPInst { + bits<5> rt; + bits<5> imm; + bits<2> ac; + + let Inst{31-26} = 0b000000; + let Inst{25-21} = rt; + let Inst{20-16} = imm; + let Inst{15-14} = ac; + let Inst{13-6} = funct; + let Inst{5-0} = 0b111100; +} + +class POOL32A_2RSA5_FMT op> : MMDSPInst { + bits<5> rt; + bits<5> rs; + bits<5> sa; + + let Inst{31-26} = 0b000000; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-11} = sa; + let Inst{10-0} = op; +} + +class POOL32A_1RMEMB0_FMT funct> : MMDSPInst { + bits<5> index; + bits<5> base; + bits<5> rd; + + let Inst{31-26} = 0; + let Inst{25-21} = index; + let Inst{20-16} = base; + let Inst{15-11} = rd; + let Inst{10} = 0b0; + let Inst{9-0} = funct; +} + +class POOL32A_1RAC_FMT funct> : MMDSPInst { + bits<5> rs; + bits<2> ac; + + let Inst{31-26} = 0; + let Inst{25-21} = 0; + let Inst{20-16} = rs; + let Inst{15-14} = ac; + let Inst{13-6} = funct; + let Inst{5-0} = 0b111100; +} + +class POOL32A_1RMASK7_FMT op> : MMDSPInst { + bits<5> rt; + bits<7> mask; + + let Inst{31-26} = 0b000000; + let Inst{25-21} = rt; + let Inst{20-14} = mask; + let Inst{13-6} = op; + let Inst{5-0} = 0b111100; +} + +class POOL32A_1RIMM10_FMT op> : MMDSPInst { + bits<5> rd; + bits<10> imm; + + let Inst{31-26} = 0; + let Inst{25-16} = imm; + let Inst{15-11} = rd; + let Inst{10} = 0; + let Inst{9-0} = op; +} + +class POOL32A_1RIMM8_FMT op> : MMDSPInst { + bits<5> rt; + bits<8> imm; + + let Inst{31-26} = 0; + let Inst{25-21} = rt; + let Inst{20-13} = imm; + let Inst{12} = 0; + let Inst{11-6} = op; + let Inst{5-0} = 0b111100; +} + +class POOL32A_4B0SHIFT6AC4B0_FMT op> : MMDSPInst { + bits<6> shift; + bits<2> ac; + + let Inst{31-26} = 0b000000; + let Inst{25-22} = 0b0000; + let Inst{21-16} = shift; + let Inst{15-14} = ac; + let Inst{13-10} = 0b0000; + let Inst{9-0} = op; +} + +class POOL32A_5B01RAC_FMT op> : MMDSPInst { + bits<5> rs; + bits<2> ac; + + let Inst{31-26} = 0b000000; + let Inst{25-21} = 0b00000; + let Inst{20-16} = rs; + let Inst{15-14} = ac; + let Inst{13-6} = op; + let Inst{5-0} = 0b111100; +} diff --git a/lib/Target/Mips/MicroMipsDSPInstrInfo.td b/lib/Target/Mips/MicroMipsDSPInstrInfo.td new file mode 100644 index 000000000000..b342e2371df4 --- /dev/null +++ b/lib/Target/Mips/MicroMipsDSPInstrInfo.td @@ -0,0 +1,528 @@ +//===- MicroMipsDSPInstrInfo.td - Micromips DSP instructions -*- tablegen *-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes MicroMips DSP instructions. +// +//===----------------------------------------------------------------------===// + +// Instruction encoding. +class ADDQ_PH_MM_ENC : POOL32A_3R_FMT<"addq.ph", 0b00000001101>; +class ADDQ_S_PH_MM_ENC : POOL32A_3R_FMT<"addq_s.ph", 0b10000001101>; +class ADDQ_S_W_MM_ENC : POOL32A_3RB0_FMT<"addq_s.w", 0b1100000101>; +class ADDQH_PH_MMR2_ENC : POOL32A_3R_FMT<"addqh.ph", 0b00001001101>; +class ADDQH_R_PH_MMR2_ENC : POOL32A_3R_FMT<"addqh_r.ph", 0b10001001101>; +class ADDQH_W_MMR2_ENC: POOL32A_3R_FMT<"addqh.w", 0b00010001101>; +class ADDQH_R_W_MMR2_ENC : POOL32A_3R_FMT<"addqh_r.w", 0b10010001101>; +class ADDU_PH_MMR2_ENC : POOL32A_3R_FMT<"addu.ph", 0b00100001101>; +class ADDU_S_PH_MMR2_ENC : POOL32A_3R_FMT<"addu_s.ph", 0b10100001101>; +class ADDU_QB_MM_ENC : POOL32A_3R_FMT<"addu.qb", 0b00011001101>; +class ADDU_S_QB_MM_ENC : POOL32A_3R_FMT<"addu_s.qb", 0b10011001101>; +class ADDUH_QB_MMR2_ENC : POOL32A_3R_FMT<"adduh.qb", 0b00101001101>; +class ADDUH_R_QB_MMR2_ENC : POOL32A_3R_FMT<"adduh_r.qb", 0b10101001101>; +class ADDSC_MM_ENC : POOL32A_3RB0_FMT<"addsc", 0b1110000101>; +class ADDWC_MM_ENC : POOL32A_3RB0_FMT<"addwc", 0b1111000101>; +class DPA_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpa.w.ph", 0b00000010>; +class DPAQ_S_W_PH_MM_ENC : POOL32A_2RAC_FMT<"dpaq_s.w.ph", 0b00001010>; +class DPAQ_SA_L_W_MM_ENC : POOL32A_2RAC_FMT<"dpaq_sa.l.w", 0b01001010>; +class DPAQX_S_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpaqx_s.w.ph", 0b10001010>; +class DPAQX_SA_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpaqx_sa.w.ph", 0b11001010>; +class DPAU_H_QBL_MM_ENC : POOL32A_2RAC_FMT<"dpau.h.qbl", 0b10000010>; +class DPAU_H_QBR_MM_ENC : POOL32A_2RAC_FMT<"dpau.h.qbr", 0b11000010>; +class DPAX_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpax.w.ph", 0b01000010>; +class ABSQ_S_PH_MM_ENC : POOL32A_2R_FMT<"absq_s.ph", 0b0001000100>; +class ABSQ_S_W_MM_ENC : POOL32A_2R_FMT<"absq_s.w", 0b0010000100>; +class ABSQ_S_QB_MMR2_ENC : POOL32A_2R_FMT<"absq_s.qb", 0b0000000100>; +class INSV_MM_ENC : POOL32A_2R_FMT<"insv", 0b0100000100>; +class MADD_DSP_MM_ENC : POOL32A_2RAC_FMT<"madd", 0b00101010>; +class MADDU_DSP_MM_ENC : POOL32A_2RAC_FMT<"maddu", 0b01101010>; +class MSUB_DSP_MM_ENC : POOL32A_2RAC_FMT<"msub", 0b10101010>; +class MSUBU_DSP_MM_ENC : POOL32A_2RAC_FMT<"msubu", 0b11101010>; +class MULT_DSP_MM_ENC : POOL32A_2RAC_FMT<"mult", 0b00110010>; +class MULTU_DSP_MM_ENC : POOL32A_2RAC_FMT<"multu", 0b01110010>; +class SHLL_PH_MM_ENC : POOL32A_2RSA4_FMT<"shll.ph", 0b001110110101>; +class SHLL_S_PH_MM_ENC : POOL32A_2RSA4_FMT<"shll_s.ph", 0b101110110101>; +class SHLL_QB_MM_ENC : POOL32A_2RSA3_FMT<"shll.qb", 0b0100001>; +class SHLLV_PH_MM_ENC : POOL32A_3R_FMT<"shllv.ph", 0b00000001110>; +class SHLLV_S_PH_MM_ENC : POOL32A_3R_FMT<"shllv_s.ph", 0b10000001110>; +class SHLLV_QB_MM_ENC : POOL32A_3RB0_FMT<"shllv.qb", 0b1110010101>; +class SHLLV_S_W_MM_ENC : POOL32A_3RB0_FMT<"shllv_s.w", 0b1111010101>; +class SHLL_S_W_MM_ENC : POOL32A_2RSA5B0_FMT<"shll_s.w", 0b1111110101>; +class SHRA_QB_MMR2_ENC : POOL32A_2RSA3_FMT<"shra.qb", 0b0000111>; +class SHRA_R_QB_MMR2_ENC : POOL32A_2RSA3_FMT<"shra_r.qb", 0b1000111>; +class SHRA_PH_MM_ENC : POOL32A_2RSA4B0_FMT<"shra.ph", 0b01100110101>; +class SHRA_R_PH_MM_ENC : POOL32A_2RSA4B0_FMT<"shra_r.ph", 0b11100110101>; +class SHRAV_PH_MM_ENC : POOL32A_3R_FMT<"shrav.ph", 0b00110001101>; +class SHRAV_R_PH_MM_ENC : POOL32A_3R_FMT<"shrav_r.ph", 0b10110001101>; +class SHRAV_QB_MMR2_ENC : POOL32A_3R_FMT<"shrav.qb", 0b00111001101>; +class SHRAV_R_QB_MMR2_ENC : POOL32A_3R_FMT<"shrav_r.qb", 0b10111001101>; +class SHRAV_R_W_MM_ENC : POOL32A_3RB0_FMT<"shrav_r.w", 0b1011010101>; +class SHRA_R_W_MM_ENC : POOL32A_2RSA5B0_FMT<"shra_r.w", 0b1011110101>; +class SHRL_PH_MMR2_ENC : POOL32A_2RSA4OP6_FMT<"shrl.ph", 0b001111>; +class SHRL_QB_MM_ENC : POOL32A_2RSA3_FMT<"shrl.qb", 0b1100001>; +class SHRLV_PH_MMR2_ENC : POOL32A_3RB0_FMT<"shrlv.ph", 0b1100010101>; +class SHRLV_QB_MM_ENC : POOL32A_3RB0_FMT<"shrlv.qb", 0b1101010101>; +class PRECEQ_W_PHL_MM_ENC : POOL32A_2R_FMT<"preceq.w.phl", 0b0101000100>; +class PRECEQ_W_PHR_MM_ENC : POOL32A_2R_FMT<"preceq.w.phr", 0b0110000100>; +class PRECEQU_PH_QBL_MM_ENC : POOL32A_2R_FMT<"precequ.ph.qbl", 0b0111000100>; +class PRECEQU_PH_QBLA_MM_ENC : POOL32A_2R_FMT<"precequ.ph.qbla", 0b0111001100>; +class PRECEQU_PH_QBR_MM_ENC : POOL32A_2R_FMT<"precequ.ph.qbr", 0b1001000100>; +class PRECEQU_PH_QBRA_MM_ENC : POOL32A_2R_FMT<"precequ.ph.qbra", 0b1001001100>; +class PRECEU_PH_QBL_MM_ENC : POOL32A_2R_FMT<"preceu.ph.qbl", 0b1011000100>; +class PRECEU_PH_QBLA_MM_ENC : POOL32A_2R_FMT<"preceu.ph.qbla", 0b1011001100>; +class PRECEU_PH_QBR_MM_ENC : POOL32A_2R_FMT<"preceu.ph.qbr", 0b1101000100>; +class PRECEU_PH_QBRA_MM_ENC : POOL32A_2R_FMT<"preceu.ph.qbra", 0b1101001100>; +class SUBQ_PH_MM_ENC : POOL32A_3R_FMT<"subq.ph", 0b01000001101>; +class SUBQ_S_PH_MM_ENC : POOL32A_3R_FMT<"subq_s.ph", 0b11000001101>; +class SUBQ_S_W_MM_ENC : POOL32A_3RB0_FMT<"subq_s.w", 0b1101000101>; +class SUBQH_PH_MMR2_ENC : POOL32A_3R_FMT<"subqh.ph", 0b01001001101>; +class SUBQH_R_PH_MMR2_ENC : POOL32A_3R_FMT<"subqh_r.ph", 0b11001001101>; +class SUBQH_W_MMR2_ENC : POOL32A_3R_FMT<"subqh.w", 0b01010001101>; +class SUBQH_R_W_MMR2_ENC : POOL32A_3R_FMT<"subqh_r.w", 0b11010001101>; +class SUBU_PH_MMR2_ENC : POOL32A_3R_FMT<"subu.ph", 0b01100001101>; +class SUBU_S_PH_MMR2_ENC : POOL32A_3R_FMT<"subu_s.ph", 0b11100001101>; +class SUBU_QB_MM_ENC : POOL32A_3R_FMT<"subu.qb", 0b01011001101>; +class SUBU_S_QB_MM_ENC : POOL32A_3R_FMT<"subu_s.qb", 0b11011001101>; +class SUBUH_QB_MMR2_ENC : POOL32A_3R_FMT<"subuh.qb", 0b01101001101>; +class SUBUH_R_QB_MMR2_ENC : POOL32A_3R_FMT<"subuh_r.qb", 0b11101001101>; +class EXTP_MM_ENC : POOL32A_1RIMM5AC_FMT<"extp", 0b10011001>; +class EXTPDP_MM_ENC : POOL32A_1RIMM5AC_FMT<"extpdp", 0b11011001>; +class EXTPDPV_MM_ENC : POOL32A_2RAC_FMT<"extpdpv", 0b11100010>; +class EXTPV_MM_ENC : POOL32A_2RAC_FMT<"extpv", 0b10100010>; +class EXTR_W_MM_ENC : POOL32A_1RIMM5AC_FMT<"extr.w", 0b00111001>; +class EXTR_R_W_MM_ENC : POOL32A_1RIMM5AC_FMT<"extr_r.w", 0b01111001>; +class EXTR_RS_W_MM_ENC : POOL32A_1RIMM5AC_FMT<"extr_rs.w", 0b10111001>; +class EXTR_S_H_MM_ENC : POOL32A_1RIMM5AC_FMT<"extr_s.h", 0b11111001>; +class EXTRV_W_MM_ENC : POOL32A_2RAC_FMT<"extrv.w", 0b00111010>; +class EXTRV_R_W_MM_ENC : POOL32A_2RAC_FMT<"extrv_r.w", 0b01111010>; +class EXTRV_RS_W_MM_ENC : POOL32A_2RAC_FMT<"extrv_rs.w", 0b10111010>; +class EXTRV_S_H_MM_ENC : POOL32A_2RAC_FMT<"extrv_s.h", 0b11111010>; +class DPS_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dps.w.ph", 0b00010010>; +class DPSQ_S_W_PH_MM_ENC : POOL32A_2RAC_FMT<"dpsq_s.w.ph", 0b00011010>; +class DPSQ_SA_L_W_MM_ENC : POOL32A_2RAC_FMT<"dpsq_sa.l.w", 0b01011010>; +class DPSQX_S_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpsqx_s.w.ph", 0b10011010>; +class DPSQX_SA_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpsqx_sa.w.ph", 0b11011010>; +class DPSU_H_QBL_MM_ENC : POOL32A_2RAC_FMT<"dpsu.h.qbl", 0b10010010>; +class DPSU_H_QBR_MM_ENC : POOL32A_2RAC_FMT<"dpsu.h.qbr", 0b11010010>; +class DPSX_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpsx.w.ph", 0b01010010>; +class MUL_PH_MMR2_ENC : POOL32A_3R_FMT<"mul.ph", 0b00000101101>; +class MUL_S_PH_MMR2_ENC : POOL32A_3R_FMT<"mul_s.ph", 0b10000101101>; +class MULEQ_S_W_PHL_MM_ENC : POOL32A_3RB0_FMT<"muleq_s.w.phl", 0b0000100101>; +class MULEQ_S_W_PHR_MM_ENC : POOL32A_3RB0_FMT<"muleq_s.w.phr", 0b0001100101>; +class MULEU_S_PH_QBL_MM_ENC : POOL32A_3RB0_FMT<"muleu_s.ph.qbl", 0b0010010101>; +class MULEU_S_PH_QBR_MM_ENC : POOL32A_3RB0_FMT<"muleu_s.ph.qbr", 0b0011010101>; +class MULQ_RS_PH_MM_ENC : POOL32A_3RB0_FMT<"mulq_rs.ph", 0b0100010101>; +class MULQ_RS_W_MMR2_ENC : POOL32A_3RB0_FMT<"mulq_rs.w", 0b0110010101>; +class MULQ_S_PH_MMR2_ENC : POOL32A_3RB0_FMT<"mulq_s.ph", 0b0101010101>; +class MULQ_S_W_MMR2_ENC : POOL32A_3RB0_FMT<"mulq_s.w", 0b0111010101>; +class PRECR_QB_PH_MMR2_ENC : POOL32A_3RB0_FMT<"precr.qb.ph", 0b0001101101>; +class PRECR_SRA_PH_W_MMR2_ENC + : POOL32A_2RSA5_FMT<"precr_sra.ph.w", 0b01111001101>; +class PRECR_SRA_R_PH_W_MMR2_ENC + : POOL32A_2RSA5_FMT<"precr_sra_r.ph.w", 0b11111001101>; +class PRECRQ_PH_W_MM_ENC : POOL32A_3RB0_FMT<"precrq.ph.w", 0b0011101101>; +class PRECRQ_QB_PH_MM_ENC : POOL32A_3RB0_FMT<"precrq.qb.ph", 0b0010101101>; +class PRECRQU_S_QB_PH_MM_ENC + : POOL32A_3RB0_FMT<"precrqu_s.qb.ph", 0b0101101101>; +class PRECRQ_RS_PH_W_MM_ENC : POOL32A_3RB0_FMT<"precrq_rs.ph.w", 0b0100101101>; +class LBUX_MM_ENC : POOL32A_1RMEMB0_FMT<"lbux", 0b1000100101>; +class LHX_MM_ENC : POOL32A_1RMEMB0_FMT<"lhx", 0b0101100101>; +class LWX_MM_ENC : POOL32A_1RMEMB0_FMT<"lwx", 0b0110100101>; +class MAQ_S_W_PHL_MM_ENC : POOL32A_2RAC_FMT<"maq_s.w.phl", 0b01101001>; +class MAQ_SA_W_PHL_MM_ENC : POOL32A_2RAC_FMT<"maq_sa.w.phl", 0b11101001>; +class MAQ_S_W_PHR_MM_ENC : POOL32A_2RAC_FMT<"maq_s.w.phr", 0b00101001>; +class MAQ_SA_W_PHR_MM_ENC : POOL32A_2RAC_FMT<"maq_sa.w.phr", 0b10101001>; +class MFHI_MM_ENC : POOL32A_1RAC_FMT<"mfhi", 0b00000001>; +class MFLO_MM_ENC : POOL32A_1RAC_FMT<"mflo", 0b01000001>; +class MTHI_MM_ENC : POOL32A_1RAC_FMT<"mthi", 0b10000001>; +class MTLO_MM_ENC : POOL32A_1RAC_FMT<"mthi", 0b11000001>; +class PREPEND_MMR2_ENC : POOL32A_2RSA5B0_FMT<"prepend", 0b1001010101>; +class RADDU_W_QB_MM_ENC : POOL32A_2R_FMT<"raddu.w.qb", 0b1111000100>; +class RDDSP_MM_ENC : POOL32A_1RMASK7_FMT<"rddsp", 0b00011001>; +class REPL_PH_MM_ENC : POOL32A_1RIMM10_FMT<"repl.ph", 0b0000111101>; +class REPL_QB_MM_ENC : POOL32A_1RIMM8_FMT<"repl.qb", 0b010111>; +class REPLV_PH_MM_ENC : POOL32A_2R_FMT<"replv.ph", 0b0000001100>; +class REPLV_QB_MM_ENC : POOL32A_2R_FMT<"replv.qb", 0b0001001100>; +class MTHLIP_MM_ENC : POOL32A_1RAC_FMT<"mthlip", 0b00001001>; +class PACKRL_PH_MM_ENC : POOL32A_3RB0_FMT<"packrl.ph", 0b0110101101>; +class PICK_PH_MM_ENC : POOL32A_3RB0_FMT<"pick.ph", 0b1000101101>; +class PICK_QB_MM_ENC : POOL32A_3RB0_FMT<"pick.qb", 0b0111101101>; +class SHILO_MM_ENC : POOL32A_4B0SHIFT6AC4B0_FMT<"shilo", 0b0000011101>; +class SHILOV_MM_ENC : POOL32A_5B01RAC_FMT<"shilov", 0b01001001>; +class WRDSP_MM_ENC : POOL32A_1RMASK7_FMT<"wrdsp", 0b01011001>; + +// Instruction desc. +class ABSQ_S_PH_MM_R2_DESC_BASE { + dag OutOperandList = (outs ROD:$rt); + dag InOperandList = (ins ROS:$rs); + string AsmString = !strconcat(opstr, "\t$rt, $rs"); + list Pattern = [(set ROD:$rt, (OpNode ROS:$rs))]; + InstrItinClass Itinerary = itin; +} +class ABSQ_S_PH_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE< + "absq_s.ph", int_mips_absq_s_ph, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag20]>; +class ABSQ_S_W_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE< + "absq_s.w", int_mips_absq_s_w, NoItinerary, GPR32Opnd>, Defs<[DSPOutFlag20]>; +class ABSQ_S_QB_MMR2_DESC : ABSQ_S_PH_MM_R2_DESC_BASE< + "absq_s.qb", int_mips_absq_s_qb, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag20]>; +class PRECEQ_W_PHL_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE< + "preceq.w.phl", int_mips_preceq_w_phl, NoItinerary, GPR32Opnd, DSPROpnd>; +class PRECEQ_W_PHR_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE< + "preceq.w.phr", int_mips_preceq_w_phr, NoItinerary, GPR32Opnd, DSPROpnd>; +class PRECEQU_PH_QBL_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE< + "precequ.ph.qbl", int_mips_precequ_ph_qbl, NoItinerary, DSPROpnd>; +class PRECEQU_PH_QBLA_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE< + "precequ.ph.qbla", int_mips_precequ_ph_qbla, NoItinerary, DSPROpnd>; +class PRECEQU_PH_QBR_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE< + "precequ.ph.qbr", int_mips_precequ_ph_qbr, NoItinerary, DSPROpnd>; +class PRECEQU_PH_QBRA_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE< + "precequ.ph.qbra", int_mips_precequ_ph_qbra, NoItinerary, DSPROpnd>; +class PRECEU_PH_QBL_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE< + "preceu.ph.qbl", int_mips_preceu_ph_qbl, NoItinerary, DSPROpnd>; +class PRECEU_PH_QBLA_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE< + "preceu.ph.qbla", int_mips_preceu_ph_qbla, NoItinerary, DSPROpnd>; +class PRECEU_PH_QBR_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE< + "preceu.ph.qbr", int_mips_preceu_ph_qbr, NoItinerary, DSPROpnd>; +class PRECEU_PH_QBRA_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE< + "preceu.ph.qbra", int_mips_preceu_ph_qbra, NoItinerary, DSPROpnd>; + +class SHLL_R2_MM_DESC_BASE { + dag OutOperandList = (outs RO:$rt); + dag InOperandList = (ins RO:$rs, ImmOpnd:$sa); + string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); + list Pattern = [(set RO:$rt, (OpNode RO:$rs, ImmPat:$sa))]; + InstrItinClass Itinerary = itin; + bit hasSideEffects = 1; +} +class SHLL_PH_MM_DESC : SHLL_R2_MM_DESC_BASE< + "shll.ph", null_frag, immZExt4, NoItinerary, DSPROpnd, uimm4>, + Defs<[DSPOutFlag22]>; +class SHLL_S_PH_MM_DESC : SHLL_R2_MM_DESC_BASE< + "shll_s.ph", int_mips_shll_s_ph, immZExt4, NoItinerary, DSPROpnd, uimm4>, + Defs<[DSPOutFlag22]>; +class SHLL_QB_MM_DESC : SHLL_R2_MM_DESC_BASE< + "shll.qb", null_frag, immZExt3, NoItinerary, DSPROpnd, uimm3>, + Defs<[DSPOutFlag22]>; +class SHLL_S_W_MM_DESC : SHLL_R2_MM_DESC_BASE< + "shll_s.w", int_mips_shll_s_w, immZExt5, NoItinerary, GPR32Opnd, uimm5>, + Defs<[DSPOutFlag22]>; +class SHRA_QB_MMR2_DESC : SHLL_R2_MM_DESC_BASE< + "shra.qb", null_frag, immZExt3, NoItinerary, DSPROpnd, uimm3>; +class SHRA_R_QB_MMR2_DESC : SHLL_R2_MM_DESC_BASE< + "shra_r.qb", int_mips_shra_r_qb, immZExt3, NoItinerary, DSPROpnd, uimm3>; +class SHRA_PH_MM_DESC : SHLL_R2_MM_DESC_BASE< + "shra.ph", null_frag, immZExt4, NoItinerary, DSPROpnd, uimm4>; +class SHRA_R_PH_MM_DESC : SHLL_R2_MM_DESC_BASE< + "shra_r.ph", int_mips_shra_r_ph, immZExt4, NoItinerary, DSPROpnd, uimm4>; +class SHRA_R_W_MM_DESC : SHLL_R2_MM_DESC_BASE< + "shra_r.w", int_mips_shra_r_w, immZExt5, NoItinerary, GPR32Opnd, uimm5>; +class SHRL_QB_MM_DESC : SHLL_R2_MM_DESC_BASE< + "shrl.qb", null_frag, immZExt3, NoItinerary, DSPROpnd, uimm3>; +class SHRL_PH_MMR2_DESC : SHLL_R2_MM_DESC_BASE< + "shrl.ph", null_frag, immZExt4, NoItinerary, DSPROpnd, uimm4>; + +class SHLLV_R3_MM_DESC_BASE { + dag OutOperandList = (outs RO:$rd); + dag InOperandList = (ins RO:$rt, GPR32Opnd:$rs); + string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs"); + list Pattern = [(set RO:$rd, (OpNode RO:$rt, GPR32Opnd:$rs))]; + InstrItinClass Itinerary = itin; +} +class SHLLV_PH_MM_DESC : SHLLV_R3_MM_DESC_BASE< + "shllv.ph", int_mips_shll_ph, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>; +class SHLLV_S_PH_MM_DESC : SHLLV_R3_MM_DESC_BASE< + "shllv_s.ph", int_mips_shll_s_ph, NoItinerary, DSPROpnd>, + Defs<[DSPOutFlag22]>; +class SHLLV_QB_MM_DESC : SHLLV_R3_MM_DESC_BASE< + "shllv.qb", int_mips_shll_qb, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>; +class SHLLV_S_W_MM_DESC : SHLLV_R3_MM_DESC_BASE< + "shllv_s.w", int_mips_shll_s_w, NoItinerary, GPR32Opnd>, Defs<[DSPOutFlag22]>; +class SHRAV_PH_MM_DESC : SHLLV_R3_MM_DESC_BASE< + "shrav.ph", int_mips_shra_ph, NoItinerary, DSPROpnd>; +class SHRAV_R_PH_MM_DESC : SHLLV_R3_MM_DESC_BASE< + "shrav_r.ph", int_mips_shra_r_ph, NoItinerary, DSPROpnd>; +class SHRAV_QB_MMR2_DESC : SHLLV_R3_MM_DESC_BASE< + "shrav.qb", int_mips_shra_qb, NoItinerary, DSPROpnd>; +class SHRAV_R_QB_MMR2_DESC : SHLLV_R3_MM_DESC_BASE< + "shrav_r.qb", int_mips_shra_r_qb, NoItinerary, DSPROpnd>; +class SHRAV_R_W_MM_DESC : SHLLV_R3_MM_DESC_BASE< + "shrav_r.w", int_mips_shra_r_w, NoItinerary, GPR32Opnd>; +class SHRLV_PH_MMR2_DESC : SHLLV_R3_MM_DESC_BASE< + "shrlv.ph", int_mips_shrl_ph, NoItinerary, DSPROpnd>; +class SHRLV_QB_MM_DESC : SHLLV_R3_MM_DESC_BASE< + "shrlv.qb", int_mips_shrl_qb, NoItinerary, DSPROpnd>; + +class EXT_MM_2R_DESC_BASE { + dag OutOperandList = (outs GPR32Opnd:$rt); + dag InOperandList = (ins ACC64DSPOpnd:$ac, GPR32Opnd:$rs); + string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $rs"); + InstrItinClass Itinerary = itin; +} +class EXT_MM_1R_DESC_BASE { + dag OutOperandList = (outs GPR32Opnd:$rt); + dag InOperandList = (ins ACC64DSPOpnd:$ac, uimm5:$imm); + string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $imm"); + InstrItinClass Itinerary = itin; +} + +class EXTP_MM_DESC + : EXT_MM_1R_DESC_BASE<"extp", MipsEXTP, NoItinerary>, + Uses<[DSPPos]>, Defs<[DSPEFI]>; +class EXTPDP_MM_DESC + : EXT_MM_1R_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>, + Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>; +class EXTPDPV_MM_DESC + : EXT_MM_2R_DESC_BASE<"extpdpv", MipsEXTPDP, NoItinerary>, + Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>; +class EXTPV_MM_DESC + : EXT_MM_2R_DESC_BASE<"extpv", MipsEXTP, NoItinerary>, + Uses<[DSPPos]>, Defs<[DSPEFI]>; +class EXTR_W_MM_DESC + : EXT_MM_1R_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>, + Defs<[DSPOutFlag23]>; +class EXTR_R_W_MM_DESC + : EXT_MM_1R_DESC_BASE<"extr_r.w", MipsEXTR_R_W, NoItinerary>, + Defs<[DSPOutFlag23]>; +class EXTR_RS_W_MM_DESC + : EXT_MM_1R_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W, NoItinerary>, + Defs<[DSPOutFlag23]>; +class EXTR_S_H_MM_DESC + : EXT_MM_1R_DESC_BASE<"extr_s.h", MipsEXTR_S_H, NoItinerary>, + Defs<[DSPOutFlag23]>; +class EXTRV_W_MM_DESC + : EXT_MM_2R_DESC_BASE<"extrv.w", MipsEXTR_W, NoItinerary>, + Defs<[DSPOutFlag23]>; +class EXTRV_R_W_MM_DESC + : EXT_MM_2R_DESC_BASE<"extrv_r.w", MipsEXTR_R_W, NoItinerary>, + Defs<[DSPOutFlag23]>; +class EXTRV_RS_W_MM_DESC + : EXT_MM_2R_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W, NoItinerary>, + Defs<[DSPOutFlag23]>; +class EXTRV_S_H_MM_DESC + : EXT_MM_2R_DESC_BASE<"extrv_s.h", MipsEXTR_S_H, NoItinerary>, + Defs<[DSPOutFlag23]>; + +class MFHI_MM_DESC_BASE { + dag OutOperandList = (outs GPR32Opnd:$rs); + dag InOperandList = (ins RO:$ac); + string AsmString = !strconcat(instr_asm, "\t$rs, $ac"); + list Pattern = [(set GPR32Opnd:$rs, (OpNode RO:$ac))]; + InstrItinClass Itinerary = itin; +} + +class MFHI_MM_DESC : MFHI_MM_DESC_BASE<"mfhi", ACC64DSPOpnd, MipsMFHI, + NoItinerary>; +class MFLO_MM_DESC : MFHI_MM_DESC_BASE<"mflo", ACC64DSPOpnd, MipsMFLO, + NoItinerary>; + +class RADDU_W_QB_MM_DESC { + dag OutOperandList = (outs GPR32Opnd:$rt); + dag InOperandList = (ins DSPROpnd:$rs); + string AsmString = !strconcat("raddu.w.qb", "\t$rt, $rs"); + list Pattern = [(set GPR32Opnd:$rt, (int_mips_raddu_w_qb DSPROpnd:$rs))]; + InstrItinClass Itinerary = NoItinerary; + string BaseOpcode = "raddu.w.qb"; +} + +class RDDSP_MM_DESC { + dag OutOperandList = (outs GPR32Opnd:$rt); + dag InOperandList = (ins uimm16:$mask); + string AsmString = !strconcat("rddsp", "\t$rt, $mask"); + list Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp immZExt10:$mask))]; + InstrItinClass Itinerary = NoItinerary; +} + +class REPL_QB_MM_DESC { + dag OutOperandList = (outs DSPROpnd:$rt); + dag InOperandList = (ins uimm16:$imm); + string AsmString = !strconcat("repl.qb", "\t$rt, $imm"); + list Pattern = [(set DSPROpnd:$rt, (int_mips_repl_qb immZExt8:$imm))]; + InstrItinClass Itinerary = NoItinerary; +} + +class REPLV_PH_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<"replv.ph", int_mips_repl_ph, + NoItinerary, DSPROpnd, + GPR32Opnd>; +class REPLV_QB_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<"replv.qb", int_mips_repl_qb, + NoItinerary, DSPROpnd, + GPR32Opnd>; + +class WRDSP_MM_DESC { + dag OutOperandList = (outs); + dag InOperandList = (ins GPR32Opnd:$rt, uimm7:$mask); + string AsmString = !strconcat("wrdsp", "\t$rt, $mask"); + list Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, immZExt7:$mask)]; + InstrItinClass Itinerary = NoItinerary; +} + +// Instruction defs. +// microMIPS DSP Rev 1 +def ADDQ_PH_MM : DspMMRel, ADDQ_PH_MM_ENC, ADDQ_PH_DESC; +def ADDQ_S_PH_MM : DspMMRel, ADDQ_S_PH_MM_ENC, ADDQ_S_PH_DESC; +def ADDQ_S_W_MM : DspMMRel, ADDQ_S_W_MM_ENC, ADDQ_S_W_DESC; +def ADDU_QB_MM : DspMMRel, ADDU_QB_MM_ENC, ADDU_QB_DESC; +def ADDU_S_QB_MM : DspMMRel, ADDU_S_QB_MM_ENC, ADDU_S_QB_DESC; +def ADDSC_MM : DspMMRel, ADDSC_MM_ENC, ADDSC_DESC; +def ADDWC_MM : DspMMRel, ADDWC_MM_ENC, ADDWC_DESC; +def DPAQ_S_W_PH_MM : DspMMRel, DPAQ_S_W_PH_MM_ENC, DPAQ_S_W_PH_DESC; +def DPAQ_SA_L_W_MM : DspMMRel, DPAQ_SA_L_W_MM_ENC, DPAQ_SA_L_W_DESC; +def DPAU_H_QBL_MM : DspMMRel, DPAU_H_QBL_MM_ENC, DPAU_H_QBL_DESC; +def DPAU_H_QBR_MM : DspMMRel, DPAU_H_QBR_MM_ENC, DPAU_H_QBR_DESC; +def ABSQ_S_PH_MM : DspMMRel, ABSQ_S_PH_MM_ENC, ABSQ_S_PH_MM_DESC; +def ABSQ_S_W_MM : DspMMRel, ABSQ_S_W_MM_ENC, ABSQ_S_W_MM_DESC; +def INSV_MM : DspMMRel, INSV_MM_ENC, INSV_DESC; +def MADD_DSP_MM : DspMMRel, MADD_DSP_MM_ENC, MADD_DSP_DESC; +def MADDU_DSP_MM : DspMMRel, MADDU_DSP_MM_ENC, MADDU_DSP_DESC; +def MSUB_DSP_MM : DspMMRel, MSUB_DSP_MM_ENC, MSUB_DSP_DESC; +def MSUBU_DSP_MM : DspMMRel, MSUBU_DSP_MM_ENC, MSUBU_DSP_DESC; +def MULT_DSP_MM : DspMMRel, MULT_DSP_MM_ENC, MULT_DSP_DESC; +def MULTU_DSP_MM : DspMMRel, MULTU_DSP_MM_ENC, MULTU_DSP_DESC; +def SHLL_PH_MM : DspMMRel, SHLL_PH_MM_ENC, SHLL_PH_MM_DESC; +def SHLL_S_PH_MM : DspMMRel, SHLL_S_PH_MM_ENC, SHLL_S_PH_MM_DESC; +def SHLL_QB_MM : DspMMRel, SHLL_QB_MM_ENC, SHLL_QB_MM_DESC; +def SHLLV_PH_MM : DspMMRel, SHLLV_PH_MM_ENC, SHLLV_PH_MM_DESC; +def SHLLV_S_PH_MM : DspMMRel, SHLLV_S_PH_MM_ENC, SHLLV_S_PH_MM_DESC; +def SHLLV_QB_MM : DspMMRel, SHLLV_QB_MM_ENC, SHLLV_QB_MM_DESC; +def SHLLV_S_W_MM : DspMMRel, SHLLV_S_W_MM_ENC, SHLLV_S_W_MM_DESC; +def SHLL_S_W_MM : DspMMRel, SHLL_S_W_MM_ENC, SHLL_S_W_MM_DESC; +def SHRA_PH_MM : DspMMRel, SHRA_PH_MM_ENC, SHRA_PH_MM_DESC; +def SHRA_R_PH_MM : DspMMRel, SHRA_R_PH_MM_ENC, SHRA_R_PH_MM_DESC; +def SHRAV_PH_MM : DspMMRel, SHRAV_PH_MM_ENC, SHRAV_PH_MM_DESC; +def SHRAV_R_PH_MM : DspMMRel, SHRAV_R_PH_MM_ENC, SHRAV_R_PH_MM_DESC; +def SHRAV_R_W_MM : DspMMRel, SHRAV_R_W_MM_ENC, SHRAV_R_W_MM_DESC; +def SHRA_R_W_MM : DspMMRel, SHRA_R_W_MM_ENC, SHRA_R_W_MM_DESC; +def SHRL_QB_MM : DspMMRel, SHRL_QB_MM_ENC, SHRL_QB_MM_DESC; +def SHRLV_QB_MM : DspMMRel, SHRLV_QB_MM_ENC, SHRLV_QB_MM_DESC; +def PRECEQ_W_PHL_MM : DspMMRel, PRECEQ_W_PHL_MM_ENC, PRECEQ_W_PHL_MM_DESC; +def PRECEQ_W_PHR_MM : DspMMRel, PRECEQ_W_PHR_MM_ENC, PRECEQ_W_PHR_MM_DESC; +def PRECEQU_PH_QBL_MM : DspMMRel, PRECEQU_PH_QBL_MM_ENC, PRECEQU_PH_QBL_MM_DESC; +def PRECEQU_PH_QBLA_MM : DspMMRel, PRECEQU_PH_QBLA_MM_ENC, + PRECEQU_PH_QBLA_MM_DESC; +def PRECEQU_PH_QBR_MM : DspMMRel, PRECEQU_PH_QBR_MM_ENC, PRECEQU_PH_QBR_MM_DESC; +def PRECEQU_PH_QBRA_MM : DspMMRel, PRECEQU_PH_QBRA_MM_ENC, + PRECEQU_PH_QBRA_MM_DESC; +def PRECEU_PH_QBL_MM : DspMMRel, PRECEU_PH_QBL_MM_ENC, PRECEU_PH_QBL_MM_DESC; +def PRECEU_PH_QBLA_MM : DspMMRel, PRECEU_PH_QBLA_MM_ENC, PRECEU_PH_QBLA_MM_DESC; +def PRECEU_PH_QBR_MM : DspMMRel, PRECEU_PH_QBR_MM_ENC, PRECEU_PH_QBR_MM_DESC; +def PRECEU_PH_QBRA_MM : DspMMRel, PRECEU_PH_QBRA_MM_ENC, PRECEU_PH_QBRA_MM_DESC; +def SUBQ_PH_MM : DspMMRel, SUBQ_PH_MM_ENC, SUBQ_PH_DESC; +def SUBQ_S_PH_MM : DspMMRel, SUBQ_S_PH_MM_ENC, SUBQ_S_PH_DESC; +def SUBQ_S_W_MM : DspMMRel, SUBQ_S_W_MM_ENC, SUBQ_S_W_DESC; +def SUBU_QB_MM : DspMMRel, SUBU_QB_MM_ENC, SUBU_QB_DESC; +def SUBU_S_QB_MM : DspMMRel, SUBU_S_QB_MM_ENC, SUBU_S_QB_DESC; +def EXTP_MM : DspMMRel, EXTP_MM_ENC, EXTP_MM_DESC; +def EXTPDP_MM : DspMMRel, EXTPDP_MM_ENC, EXTPDP_MM_DESC; +def EXTPDPV_MM : DspMMRel, EXTPDPV_MM_ENC, EXTPDPV_MM_DESC; +def EXTPV_MM : DspMMRel, EXTPV_MM_ENC, EXTPV_MM_DESC; +def EXTR_W_MM : DspMMRel, EXTR_W_MM_ENC, EXTR_W_MM_DESC; +def EXTR_R_W_MM : DspMMRel, EXTR_R_W_MM_ENC, EXTR_R_W_MM_DESC; +def EXTR_RS_W_MM : DspMMRel, EXTR_RS_W_MM_ENC, EXTR_RS_W_MM_DESC; +def EXTR_S_H_MM : DspMMRel, EXTR_S_H_MM_ENC, EXTR_S_H_MM_DESC; +def EXTRV_W_MM : DspMMRel, EXTRV_W_MM_ENC, EXTRV_W_MM_DESC; +def EXTRV_R_W_MM : DspMMRel, EXTRV_R_W_MM_ENC, EXTRV_R_W_MM_DESC; +def EXTRV_RS_W_MM : DspMMRel, EXTRV_RS_W_MM_ENC, EXTRV_RS_W_MM_DESC; +def EXTRV_S_H_MM : DspMMRel, EXTRV_S_H_MM_ENC, EXTRV_S_H_MM_DESC; +def DPSQ_S_W_PH_MM : DspMMRel, DPSQ_S_W_PH_MM_ENC, DPSQ_S_W_PH_DESC; +def DPSQ_SA_L_W_MM : DspMMRel, DPSQ_SA_L_W_MM_ENC, DPSQ_SA_L_W_DESC; +def DPSU_H_QBL_MM : DspMMRel, DPSU_H_QBL_MM_ENC, DPSU_H_QBL_DESC; +def DPSU_H_QBR_MM : DspMMRel, DPSU_H_QBR_MM_ENC, DPSU_H_QBR_DESC; +def MULEQ_S_W_PHL_MM : DspMMRel, MULEQ_S_W_PHL_MM_ENC, MULEQ_S_W_PHL_DESC; +def MULEQ_S_W_PHR_MM : DspMMRel, MULEQ_S_W_PHR_MM_ENC, MULEQ_S_W_PHR_DESC; +def MULEU_S_PH_QBL_MM : DspMMRel, MULEU_S_PH_QBL_MM_ENC, MULEU_S_PH_QBL_DESC; +def MULEU_S_PH_QBR_MM : DspMMRel, MULEU_S_PH_QBR_MM_ENC, MULEU_S_PH_QBR_DESC; +def MULQ_RS_PH_MM : DspMMRel, MULQ_RS_PH_MM_ENC, MULQ_RS_PH_DESC; +def PRECRQ_PH_W_MM : DspMMRel, PRECRQ_PH_W_MM_ENC, PRECRQ_PH_W_DESC; +def PRECRQ_QB_PH_MM : DspMMRel, PRECRQ_QB_PH_MM_ENC, PRECRQ_QB_PH_DESC; +def PRECRQU_S_QB_PH_MM : DspMMRel, PRECRQU_S_QB_PH_MM_ENC, PRECRQU_S_QB_PH_DESC; +def PRECRQ_RS_PH_W_MM : DspMMRel, PRECRQ_RS_PH_W_MM_ENC, PRECRQ_RS_PH_W_DESC; +def LBUX_MM : DspMMRel, LBUX_MM_ENC, LBUX_DESC; +def LHX_MM : DspMMRel, LHX_MM_ENC, LHX_DESC; +def LWX_MM : DspMMRel, LWX_MM_ENC, LWX_DESC; +def MAQ_S_W_PHL_MM : DspMMRel, MAQ_S_W_PHL_MM_ENC, MAQ_S_W_PHL_DESC; +def MAQ_SA_W_PHL_MM : DspMMRel, MAQ_SA_W_PHL_MM_ENC, MAQ_SA_W_PHL_DESC; +def MAQ_S_W_PHR_MM : DspMMRel, MAQ_S_W_PHR_MM_ENC, MAQ_S_W_PHR_DESC; +def MAQ_SA_W_PHR_MM : DspMMRel, MAQ_SA_W_PHR_MM_ENC, MAQ_SA_W_PHR_DESC; +def MFHI_DSP_MM : DspMMRel, MFHI_MM_ENC, MFHI_MM_DESC; +def MFLO_DSP_MM : DspMMRel, MFLO_MM_ENC, MFLO_MM_DESC; +def MTHI_DSP_MM : DspMMRel, MTHI_MM_ENC, MTHI_DESC; +def MTLO_DSP_MM : DspMMRel, MTLO_MM_ENC, MTLO_DESC; +def RADDU_W_QB_MM : DspMMRel, RADDU_W_QB_MM_ENC, RADDU_W_QB_MM_DESC; +def RDDSP_MM : DspMMRel, RDDSP_MM_ENC, RDDSP_MM_DESC; +def REPL_PH_MM : DspMMRel, REPL_PH_MM_ENC, REPL_PH_DESC; +def REPL_QB_MM : DspMMRel, REPL_QB_MM_ENC, REPL_QB_MM_DESC; +def REPLV_PH_MM : DspMMRel, REPLV_PH_MM_ENC, REPLV_PH_MM_DESC; +def REPLV_QB_MM : DspMMRel, REPLV_QB_MM_ENC, REPLV_QB_MM_DESC; +def MTHLIP_MM : DspMMRel, MTHLIP_MM_ENC, MTHLIP_DESC; +def PACKRL_PH_MM : DspMMRel, PACKRL_PH_MM_ENC, PACKRL_PH_DESC; +def PICK_PH_MM : DspMMRel, PICK_PH_MM_ENC, PICK_PH_DESC; +def PICK_QB_MM : DspMMRel, PICK_QB_MM_ENC, PICK_QB_DESC; +def SHILO_MM : DspMMRel, SHILO_MM_ENC, SHILO_DESC; +def SHILOV_MM : DspMMRel, SHILOV_MM_ENC, SHILOV_DESC; +def WRDSP_MM : DspMMRel, WRDSP_MM_ENC, WRDSP_MM_DESC; +// microMIPS DSP Rev 2 +def ABSQ_S_QB_MMR2 : DspMMRel, ABSQ_S_QB_MMR2_ENC, ABSQ_S_QB_MMR2_DESC, + ISA_DSPR2; +def ADDQH_PH_MMR2 : DspMMRel, ADDQH_PH_MMR2_ENC, ADDQH_PH_DESC, ISA_DSPR2; +def ADDQH_R_PH_MMR2 : DspMMRel, ADDQH_R_PH_MMR2_ENC, ADDQH_R_PH_DESC, ISA_DSPR2; +def ADDQH_W_MMR2 : DspMMRel, ADDQH_W_MMR2_ENC, ADDQH_W_DESC, ISA_DSPR2; +def ADDQH_R_W_MMR2 : DspMMRel, ADDQH_R_W_MMR2_ENC, ADDQH_R_W_DESC, ISA_DSPR2; +def ADDU_PH_MMR2 : DspMMRel, ADDU_PH_MMR2_ENC, ADDU_PH_DESC, ISA_DSPR2; +def ADDU_S_PH_MMR2 : DspMMRel, ADDU_S_PH_MMR2_ENC, ADDU_S_PH_DESC, ISA_DSPR2; +def ADDUH_QB_MMR2 : DspMMRel, ADDUH_QB_MMR2_ENC, ADDUH_QB_DESC, ISA_DSPR2; +def ADDUH_R_QB_MMR2 : DspMMRel, ADDUH_R_QB_MMR2_ENC, ADDUH_R_QB_DESC, ISA_DSPR2; +def DPA_W_PH_MMR2 : DspMMRel, DPA_W_PH_MMR2_ENC, DPA_W_PH_DESC, ISA_DSPR2; +def DPAQX_S_W_PH_MMR2 : DspMMRel, DPAQX_S_W_PH_MMR2_ENC, DPAQX_S_W_PH_DESC, + ISA_DSPR2; +def DPAQX_SA_W_PH_MMR2 : DspMMRel, DPAQX_SA_W_PH_MMR2_ENC, DPAQX_SA_W_PH_DESC, + ISA_DSPR2; +def DPAX_W_PH_MMR2 : DspMMRel, DPAX_W_PH_MMR2_ENC, DPAX_W_PH_DESC, ISA_DSPR2; +def SHRA_QB_MMR2 : DspMMRel, SHRA_QB_MMR2_ENC, SHRA_QB_MMR2_DESC, ISA_DSPR2; +def SHRA_R_QB_MMR2 : DspMMRel, SHRA_R_QB_MMR2_ENC, SHRA_R_QB_MMR2_DESC, + ISA_DSPR2; +def SHRAV_QB_MMR2 : DspMMRel, SHRAV_QB_MMR2_ENC, SHRAV_QB_MMR2_DESC, ISA_DSPR2; +def SHRAV_R_QB_MMR2 : DspMMRel, SHRAV_R_QB_MMR2_ENC, SHRAV_R_QB_MMR2_DESC, + ISA_DSPR2; +def SHRL_PH_MMR2 : DspMMRel, SHRL_PH_MMR2_ENC, SHRL_PH_MMR2_DESC, ISA_DSPR2; +def SHRLV_PH_MMR2 : DspMMRel, SHRLV_PH_MMR2_ENC, SHRLV_PH_MMR2_DESC, ISA_DSPR2; +def SUBQH_PH_MMR2 : DspMMRel, SUBQH_PH_MMR2_ENC, SUBQH_PH_DESC, ISA_DSPR2; +def SUBQH_R_PH_MMR2 : DspMMRel, SUBQH_R_PH_MMR2_ENC, SUBQH_R_PH_DESC, ISA_DSPR2; +def SUBQH_W_MMR2 : DspMMRel, SUBQH_W_MMR2_ENC, SUBQH_W_DESC, ISA_DSPR2; +def SUBQH_R_W_MMR2 : DspMMRel, SUBQH_R_W_MMR2_ENC, SUBQH_R_W_DESC, ISA_DSPR2; +def SUBU_PH_MMR2 : DspMMRel, SUBU_PH_MMR2_ENC, SUBU_PH_DESC, ISA_DSPR2; +def SUBU_S_PH_MMR2 : DspMMRel, SUBU_S_PH_MMR2_ENC, SUBU_S_PH_DESC, ISA_DSPR2; +def SUBUH_QB_MMR2 : DspMMRel, SUBUH_QB_MMR2_ENC, SUBUH_QB_DESC, ISA_DSPR2; +def SUBUH_R_QB_MMR2 : DspMMRel, SUBUH_R_QB_MMR2_ENC, SUBUH_R_QB_DESC, ISA_DSPR2; +def DPS_W_PH_MMR2 : DspMMRel, DPS_W_PH_MMR2_ENC, DPS_W_PH_DESC, ISA_DSPR2; +def DPSQX_S_W_PH_MMR2 : DspMMRel, DPSQX_S_W_PH_MMR2_ENC, DPSQX_S_W_PH_DESC, + ISA_DSPR2; +def DPSQX_SA_W_PH_MMR2 : DspMMRel, DPSQX_SA_W_PH_MMR2_ENC, DPSQX_SA_W_PH_DESC, + ISA_DSPR2; +def DPSX_W_PH_MMR2 : DspMMRel, DPSX_W_PH_MMR2_ENC, DPSX_W_PH_DESC, ISA_DSPR2; +def MUL_PH_MMR2 : DspMMRel, MUL_PH_MMR2_ENC, MUL_PH_DESC, ISA_DSPR2; +def MUL_S_PH_MMR2 : DspMMRel, MUL_S_PH_MMR2_ENC, MUL_S_PH_DESC, ISA_DSPR2; +def MULQ_RS_W_MMR2 : DspMMRel, MULQ_RS_W_MMR2_ENC, MULQ_RS_W_DESC, ISA_DSPR2; +def MULQ_S_PH_MMR2 : DspMMRel, MULQ_S_PH_MMR2_ENC, MULQ_S_PH_DESC, ISA_DSPR2; +def MULQ_S_W_MMR2 : DspMMRel, MULQ_S_W_MMR2_ENC, MULQ_S_W_DESC, ISA_DSPR2; +def PRECR_QB_PH_MMR2 : DspMMRel, PRECR_QB_PH_MMR2_ENC, PRECR_QB_PH_DESC, + ISA_DSPR2; +def PRECR_SRA_PH_W_MMR2 : DspMMRel, PRECR_SRA_PH_W_MMR2_ENC, + PRECR_SRA_PH_W_DESC, ISA_DSPR2; +def PRECR_SRA_R_PH_W_MMR2 : DspMMRel, PRECR_SRA_R_PH_W_MMR2_ENC, + PRECR_SRA_R_PH_W_DESC, ISA_DSPR2; +def PREPEND_MMR2 : DspMMRel, PREPEND_MMR2_ENC, PREPEND_DESC, ISA_DSPR2; + +// Instruction alias. +def : MMDSPInstAlias<"wrdsp $rt", (WRDSP_MM GPR32Opnd:$rt, 0x1F), 1>; diff --git a/lib/Target/Mips/MicroMipsInstrFPU.td b/lib/Target/Mips/MicroMipsInstrFPU.td index 004b0d51f4b4..756e6c92c1d1 100644 --- a/lib/Target/Mips/MicroMipsInstrFPU.td +++ b/lib/Target/Mips/MicroMipsInstrFPU.td @@ -37,23 +37,14 @@ def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM_MM<1>; -def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, IIBranch, MIPS_BRANCH_F>, +def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, II_BC1F, MIPS_BRANCH_F>, BC1F_FM_MM<0x1c>, ISA_MIPS1_NOT_32R6_64R6; -def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, IIBranch, MIPS_BRANCH_T>, +def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, II_BC1T, MIPS_BRANCH_T>, BC1F_FM_MM<0x1d>, ISA_MIPS1_NOT_32R6_64R6; - -def CEIL_W_S_MM : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>, - ROUND_W_FM_MM<0, 0x6c>; def CVT_W_S_MM : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>, ROUND_W_FM_MM<0, 0x24>; -def FLOOR_W_S_MM : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>, - ROUND_W_FM_MM<0, 0x2c>; -def ROUND_W_S_MM : MMRel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>, +def ROUND_W_S_MM : MMRel, StdMMR6Rel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>, ROUND_W_FM_MM<0, 0xec>; -def TRUNC_W_S_MM : MMRel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>, - ROUND_W_FM_MM<0, 0xac>; -def FSQRT_S_MM : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S, - fsqrt>, ROUND_W_FM_MM<0, 0x28>; def CEIL_W_MM : MMRel, ABSS_FT<"ceil.w.d", FGR32Opnd, AFGR64Opnd, II_CEIL>, ROUND_W_FM_MM<1, 0x6c>; @@ -61,7 +52,7 @@ def CVT_W_MM : MMRel, ABSS_FT<"cvt.w.d", FGR32Opnd, AFGR64Opnd, II_CVT>, ROUND_W_FM_MM<1, 0x24>; def FLOOR_W_MM : MMRel, ABSS_FT<"floor.w.d", FGR32Opnd, AFGR64Opnd, II_FLOOR>, ROUND_W_FM_MM<1, 0x2c>; -def ROUND_W_MM : MMRel, ABSS_FT<"round.w.d", FGR32Opnd, AFGR64Opnd, II_ROUND>, +def ROUND_W_MM : MMRel, StdMMR6Rel, ABSS_FT<"round.w.d", FGR32Opnd, AFGR64Opnd, II_ROUND>, ROUND_W_FM_MM<1, 0xec>; def TRUNC_W_MM : MMRel, ABSS_FT<"trunc.w.d", FGR32Opnd, AFGR64Opnd, II_TRUNC>, ROUND_W_FM_MM<1, 0xac>; @@ -146,3 +137,14 @@ def NMADD_D32_MM : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>, def NMSUB_D32_MM : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>, MADDS_FM_MM<0x2a>; } + +let AdditionalPredicates = [InMicroMips] in { + def FLOOR_W_S_MM : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, + II_FLOOR>, ROUND_W_FM_MM<0, 0x2c>; + def TRUNC_W_S_MM : MMRel, StdMMR6Rel, ABSS_FT<"trunc.w.s", FGR32Opnd, + FGR32Opnd, II_TRUNC>, ROUND_W_FM_MM<0, 0xac>; + def CEIL_W_S_MM : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>, + ROUND_W_FM_MM<0, 0x6c>; + def FSQRT_S_MM : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S, + fsqrt>, ROUND_W_FM_MM<0, 0x28>; +} diff --git a/lib/Target/Mips/MicroMipsInstrFormats.td b/lib/Target/Mips/MicroMipsInstrFormats.td index 560afa48908c..b736367ee5fa 100644 --- a/lib/Target/Mips/MicroMipsInstrFormats.td +++ b/lib/Target/Mips/MicroMipsInstrFormats.td @@ -389,6 +389,22 @@ class LW_FM_MM op> : MMArch { let Inst{15-0} = addr{15-0}; } +class POOL32C_LHUE_FM_MM op, bits<4> fmt, bits<3> funct> : MMArch { + bits<5> rt; + bits<21> addr; + bits<5> base = addr{20-16}; + bits<9> offset = addr{8-0}; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = rt; + let Inst{20-16} = base; + let Inst{15-12} = fmt; + let Inst{11-9} = funct; + let Inst{8-0} = offset; +} + class LWL_FM_MM funct> { bits<5> rt; bits<21> addr; @@ -402,6 +418,22 @@ class LWL_FM_MM funct> { let Inst{11-0} = addr{11-0}; } +class POOL32C_STEVA_LDEVA_FM_MM type, bits<3> funct> { + bits<5> rt; + bits<21> addr; + bits<5> base = addr{20-16}; + bits<9> offset = addr{8-0}; + + bits<32> Inst; + + let Inst{31-26} = 0x18; + let Inst{25-21} = rt; + let Inst{20-16} = base; + let Inst{15-12} = type; + let Inst{11-9} = funct; + let Inst{8-0} = offset; +} + class CMov_F_I_FM_MM func> : MMArch { bits<5> rd; bits<5> rs; @@ -655,6 +687,22 @@ class LL_FM_MM funct> { let Inst{11-0} = addr{11-0}; } +class LLE_FM_MM funct> { + bits<5> rt; + bits<21> addr; + bits<5> base = addr{20-16}; + bits<9> offset = addr{8-0}; + + bits<32> Inst; + + let Inst{31-26} = 0x18; + let Inst{25-21} = rt; + let Inst{20-16} = base; + let Inst{15-12} = funct; + let Inst{11-9} = 0x6; + let Inst{8-0} = offset; +} + class ADDS_FM_MM fmt, bits<8> funct> : MMArch { bits<5> ft; bits<5> fs; @@ -895,7 +943,7 @@ class LWM_FM_MM funct> : MMArch { let Inst{11-0} = addr{11-0}; } -class LWM_FM_MM16 funct> : MMArch { +class LWM_FM_MM16 funct> : MMArch, PredicateControl { bits<2> rt; bits<4> addr; @@ -922,6 +970,37 @@ class CACHE_PREF_FM_MM op, bits<4> funct> : MMArch { let Inst{11-0} = offset; } +class CACHE_PREFE_FM_MM op, bits<3> funct> : MMArch { + bits<21> addr; + bits<5> hint; + bits<5> base = addr{20-16}; + bits<9> offset = addr{8-0}; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = hint; + let Inst{20-16} = base; + let Inst{15-12} = 0xA; + let Inst{11-9} = funct; + let Inst{8-0} = offset; +} + +class POOL32F_PREFX_FM_MM op, bits<9> funct> : MMArch { + bits<5> index; + bits<5> base; + bits<5> hint; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = index; + let Inst{20-16} = base; + let Inst{15-11} = hint; + let Inst{10-9} = 0x0; + let Inst{8-0} = funct; +} + class BARRIER_FM_MM op> : MMArch { bits<32> Inst; diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 39393840c6f2..99f0f446deab 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -13,11 +13,6 @@ def simm12 : Operand { let DecoderMethod = "DecodeSimm12"; } -def uimm5_lsl2 : Operand { - let EncoderMethod = "getUImm5Lsl2Encoding"; - let DecoderMethod = "DecodeUImm5lsl2"; -} - def uimm6_lsl2 : Operand { let EncoderMethod = "getUImm6Lsl2Encoding"; let DecoderMethod = "DecodeUImm6Lsl2"; @@ -30,6 +25,7 @@ def simm9_addiusp : Operand { def uimm3_shift : Operand { let EncoderMethod = "getUImm3Mod8Encoding"; + let DecoderMethod = "DecodePOOL16BEncodedField"; } def simm3_lsa2 : Operand { @@ -105,6 +101,14 @@ def mem_mm_gp_imm7_lsl2 : Operand { let EncoderMethod = "getMemEncodingMMGPImm7Lsl2"; } +def mem_mm_9 : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops GPR32, simm9); + let EncoderMethod = "getMemEncodingMMImm9"; + let ParserMatchClass = MipsMemAsmOperand; + let OperandType = "OPERAND_MEMORY"; +} + def mem_mm_12 : Operand { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops GPR32, simm12); @@ -113,6 +117,14 @@ def mem_mm_12 : Operand { let OperandType = "OPERAND_MEMORY"; } +def mem_mm_16 : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops GPR32, simm16); + let EncoderMethod = "getMemEncodingMMImm16"; + let ParserMatchClass = MipsMemAsmOperand; + let OperandType = "OPERAND_MEMORY"; +} + def MipsMemUimm4AsmOperand : AsmOperandClass { let Name = "MemOffsetUimm4"; let SuperClasses = [MipsMemAsmOperand]; @@ -166,7 +178,7 @@ def simm23_lsl2 : Operand { class CompactBranchMM : InstSE<(outs), (ins RO:$rs, opnd:$offset), - !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI> { + !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZC, FrmI> { let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 0; @@ -251,6 +263,13 @@ class LLBaseMM : let mayLoad = 1; } +class LLEBaseMM : + InstSE<(outs RO:$rt), (ins mem_mm_12:$addr), + !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> { + let DecoderMethod = "DecodeMemMMImm9"; + let mayLoad = 1; +} + class SCBaseMM : InstSE<(outs RO:$dst), (ins RO:$rt, mem_mm_12:$addr), !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> { @@ -259,6 +278,14 @@ class SCBaseMM : let Constraints = "$rt = $dst"; } +class SCEBaseMM : + InstSE<(outs RO:$dst), (ins RO:$rt, mem_mm_12:$addr), + !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> { + let DecoderMethod = "DecodeMemMMImm9"; + let mayStore = 1; + let Constraints = "$rt = $dst"; +} + class LoadMM : InstSE<(outs RO:$rt), (ins mem_mm_12:$addr), @@ -392,7 +419,7 @@ class LoadImmMM16 : // 16-bit Jump and Link (Call) class JumpLinkRegMM16 : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), - [(MipsJmpLink RO:$rs)], IIBranch, FrmR> { + [(MipsJmpLink RO:$rs)], II_JALR, FrmR>, PredicateControl { let isCall = 1; let hasDelaySlot = 1; let Defs = [RA]; @@ -401,7 +428,7 @@ class JumpLinkRegMM16 : // 16-bit Jump Reg class JumpRegMM16 : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), - [], IIBranch, FrmR> { + [], II_JR, FrmR> { let hasDelaySlot = 1; let isBranch = 1; let isIndirectBranch = 1; @@ -410,7 +437,7 @@ class JumpRegMM16 : // Base class for JRADDIUSP instruction. class JumpRAddiuStackMM16 : MicroMipsInst16<(outs), (ins uimm5_lsl2:$imm), "jraddiusp\t$imm", - [], IIBranch, FrmR> { + [], II_JRADDIUSP, FrmR> { let isTerminator = 1; let isBarrier = 1; let isBranch = 1; @@ -420,7 +447,7 @@ class JumpRAddiuStackMM16 : // 16-bit Jump and Link (Call) - Short Delay Slot class JumpLinkRegSMM16 : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), - [], IIBranch, FrmR> { + [], II_JALRS, FrmR> { let isCall = 1; let hasDelaySlot = 1; let Defs = [RA]; @@ -429,7 +456,7 @@ class JumpLinkRegSMM16 : // 16-bit Jump Register Compact - No delay slot class JumpRegCMM16 : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), - [], IIBranch, FrmR> { + [], II_JRC, FrmR> { let isTerminator = 1; let isBarrier = 1; let isBranch = 1; @@ -444,7 +471,7 @@ class BrkSdbbp16MM : class CBranchZeroMM : MicroMipsInst16<(outs), (ins RO:$rs, opnd:$offset), - !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI> { + !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZ, FrmI> { let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; @@ -455,18 +482,18 @@ class CBranchZeroMM : let isCall = 1, hasDelaySlot = 1, Defs = [RA] in { class JumpLinkMM : InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"), - [], IIBranch, FrmJ, opstr> { + [], II_JALS, FrmJ, opstr> { let DecoderMethod = "DecodeJumpTargetMM"; } class JumpLinkRegMM: InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"), - [], IIBranch, FrmR>; + [], II_JALRS, FrmR>; class BranchCompareToZeroLinkMM : InstSE<(outs), (ins RO:$rs, opnd:$offset), - !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI, opstr>; + !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZALS, FrmI, opstr>; } class LoadWordIndexedScaledMM; +class PrefetchIndexed : + InstSE<(outs), (ins PtrRC:$base, PtrRC:$index, uimm5:$hint), + !strconcat(opstr, "\t$hint, ${index}(${base})"), [], NoItinerary, FrmOther>; + class AddImmUPC : InstSE<(outs RO:$rs), (ins simm23_lsl2:$imm), !strconcat(opstr, "\t$rs, $imm"), [], NoItinerary, FrmR>; @@ -543,7 +574,7 @@ class LoadMultMM16 : MicroMipsInst16<(outs), (ins brtarget10_mm:$offset), !strconcat(opstr, "\t$offset"), - [], IIBranch, FrmI> { + [], II_B, FrmI> { let isBranch = 1; let isTerminator = 1; let isBarrier = 1; @@ -553,21 +584,24 @@ class UncondBranchMM16 : } def ADDU16_MM : ArithRMM16<"addu16", GPRMM16Opnd, 1, II_ADDU, add>, - ARITH_FM_MM16<0>; -def SUBU16_MM : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>, - ARITH_FM_MM16<1>; -def ANDI16_MM : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>, ANDI_FM_MM16<0x0b>; + ARITH_FM_MM16<0>, ISA_MICROMIPS_NOT_32R6_64R6; def AND16_MM : LogicRMM16<"and16", GPRMM16Opnd, II_AND, and>, - LOGIC_FM_MM16<0x2>; -def OR16_MM : LogicRMM16<"or16", GPRMM16Opnd, II_OR, or>, - LOGIC_FM_MM16<0x3>; -def XOR16_MM : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>, - LOGIC_FM_MM16<0x1>; -def NOT16_MM : NotMM16<"not16", GPRMM16Opnd>, LOGIC_FM_MM16<0x0>; + LOGIC_FM_MM16<0x2>, ISA_MICROMIPS_NOT_32R6_64R6; +def ANDI16_MM : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>, ANDI_FM_MM16<0x0b>, + ISA_MICROMIPS_NOT_32R6_64R6; +def NOT16_MM : NotMM16<"not16", GPRMM16Opnd>, LOGIC_FM_MM16<0x0>, + ISA_MICROMIPS_NOT_32R6_64R6; +def OR16_MM : LogicRMM16<"or16", GPRMM16Opnd, II_OR, or>, LOGIC_FM_MM16<0x3>, + ISA_MICROMIPS_NOT_32R6_64R6; def SLL16_MM : ShiftIMM16<"sll16", uimm3_shift, GPRMM16Opnd, II_SLL>, - SHIFT_FM_MM16<0>; + SHIFT_FM_MM16<0>, ISA_MICROMIPS_NOT_32R6_64R6; def SRL16_MM : ShiftIMM16<"srl16", uimm3_shift, GPRMM16Opnd, II_SRL>, - SHIFT_FM_MM16<1>; + SHIFT_FM_MM16<1>, ISA_MICROMIPS_NOT_32R6_64R6; + +def SUBU16_MM : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>, + ARITH_FM_MM16<1>, ISA_MICROMIPS_NOT_32R6_64R6; +def XOR16_MM : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>, + LOGIC_FM_MM16<0x1>, ISA_MICROMIPS_NOT_32R6_64R6; def LBU16_MM : LoadMM16<"lbu16", GPRMM16Opnd, zextloadi8, II_LBU, mem_mm_4>, LOAD_STORE_FM_MM16<0x02>; def LHU16_MM : LoadMM16<"lhu16", GPRMM16Opnd, zextloadi16, II_LHU, @@ -597,7 +631,8 @@ def MOVE16_MM : MoveMM16<"move", GPR32Opnd>, MOVE_FM_MM16<0x03>; def MOVEP_MM : MovePMM16<"movep", GPRMM16OpndMoveP>, MOVEP_FM_MM16; def LI16_MM : LoadImmMM16<"li16", li_simm7, GPRMM16Opnd>, LI_FM_MM16, IsAsCheapAsAMove; -def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>; +def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>, + ISA_MICROMIPS32_NOT_MIPS32R6; def JALRS16_MM : JumpLinkRegSMM16<"jalrs16", GPR32Opnd>, JALR_FM_MM16<0x0f>; def JRC16_MM : JumpRegCMM16<"jrc", GPR32Opnd>, JALR_FM_MM16<0x0d>; def JRADDIUSP : JumpRAddiuStackMM16, JRADDIUSP_FM_MM16<0x18>; @@ -607,8 +642,18 @@ def BEQZ16_MM : CBranchZeroMM<"beqz16", brtarget7_mm, GPRMM16Opnd>, def BNEZ16_MM : CBranchZeroMM<"bnez16", brtarget7_mm, GPRMM16Opnd>, BEQNEZ_FM_MM16<0x2b>; def B16_MM : UncondBranchMM16<"b16">, B16_FM; -def BREAK16_MM : BrkSdbbp16MM<"break16">, BRKSDBBP16_FM_MM<0x28>; -def SDBBP16_MM : BrkSdbbp16MM<"sdbbp16">, BRKSDBBP16_FM_MM<0x2C>; +def BREAK16_MM : BrkSdbbp16MM<"break16">, BRKSDBBP16_FM_MM<0x28>, + ISA_MICROMIPS_NOT_32R6_64R6; +def SDBBP16_MM : BrkSdbbp16MM<"sdbbp16">, BRKSDBBP16_FM_MM<0x2C>, + ISA_MICROMIPS_NOT_32R6_64R6; + +let DecoderNamespace = "MicroMips" in { + /// Load and Store Instructions - multiple + def SWM16_MM : StoreMultMM16<"swm16">, LWM_FM_MM16<0x5>, + ISA_MICROMIPS32_NOT_MIPS32R6; + def LWM16_MM : LoadMultMM16<"lwm16">, LWM_FM_MM16<0x4>, + ISA_MICROMIPS32_NOT_MIPS32R6; +} class WaitMM : InstSE<(outs), (ins uimm10:$code_), !strconcat(opstr, "\t$code_"), [], @@ -701,6 +746,18 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { def SW_MM : Store<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>; } + let DecoderMethod = "DecodeMemMMImm9" in { + def LBE_MM : Load<"lbe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x4>; + def LBuE_MM : Load<"lbue", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x0>; + def LHE_MM : Load<"lhe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x5>; + def LHuE_MM : Load<"lhue", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x1>; + def LWE_MM : Load<"lwe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x7>; + def SBE_MM : Store<"sbe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0xa, 0x4>; + def SHE_MM : Store<"she", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0xa, 0x5>; + def SWE_MM : StoreMemory<"swe", GPR32Opnd, mem_simm9gpr>, + POOL32C_LHUE_FM_MM<0x18, 0xa, 0x7>; + } + def LWXS_MM : LoadWordIndexedScaledMM<"lwxs", GPR32Opnd>, LWXS_FM_MM<0x118>; def LWU_MM : LoadMM<"lwu", GPR32Opnd, zextloadi32, II_LWU>, LL_FM_MM<0xe>; @@ -714,12 +771,20 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { LWL_FM_MM<0x8>; def SWR_MM : StoreLeftRightMM<"swr", MipsSWR, GPR32Opnd, mem_mm_12>, LWL_FM_MM<0x9>; + let DecoderMethod = "DecodeMemMMImm9" in { + def LWLE_MM : LoadLeftRightMM<"lwle", MipsLWL, GPR32Opnd, mem_mm_12>, + POOL32C_STEVA_LDEVA_FM_MM<0x6, 0x2>; + def LWRE_MM : LoadLeftRightMM<"lwre", MipsLWR, GPR32Opnd, mem_mm_12>, + POOL32C_STEVA_LDEVA_FM_MM<0x6, 0x3>; + def SWLE_MM : StoreLeftRightMM<"swle", MipsSWL, GPR32Opnd, mem_mm_12>, + POOL32C_STEVA_LDEVA_FM_MM<0xa, 0x0>; + def SWRE_MM : StoreLeftRightMM<"swre", MipsSWR, GPR32Opnd, mem_mm_12>, + POOL32C_STEVA_LDEVA_FM_MM<0xa, 0x1>, ISA_MIPS1_NOT_32R6_64R6; + } /// Load and Store Instructions - multiple def SWM32_MM : StoreMultMM<"swm32">, LWM_FM_MM<0xd>; def LWM32_MM : LoadMultMM<"lwm32">, LWM_FM_MM<0x5>; - def SWM16_MM : StoreMultMM16<"swm16">, LWM_FM_MM16<0x5>; - def LWM16_MM : LoadMultMM16<"lwm16">, LWM_FM_MM16<0x4>; /// Load and Store Pair Instructions def SWP_MM : StorePairMM<"swp">, LWM_FM_MM<0x9>; @@ -777,11 +842,11 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { SEB_FM_MM<0x0ec>, ISA_MIPS32R2; /// Word Swap Bytes Within Halfwords - def WSBH_MM : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM_MM<0x1ec>, - ISA_MIPS32R2; - - def EXT_MM : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, - EXT_FM_MM<0x2c>; + def WSBH_MM : MMRel, SubwordSwap<"wsbh", GPR32Opnd, II_WSBH>, + SEB_FM_MM<0x1ec>, ISA_MIPS32R2; + // TODO: Add '0 < pos+size <= 32' constraint check to ext instruction + def EXT_MM : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, uimm5_plus1, + MipsExt>, EXT_FM_MM<0x2c>; def INS_MM : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM_MM<0x0c>; @@ -854,12 +919,22 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { def LL_MM : LLBaseMM<"ll", GPR32Opnd>, LL_FM_MM<0x3>; def SC_MM : SCBaseMM<"sc", GPR32Opnd>, LL_FM_MM<0xb>; + def LLE_MM : LLEBaseMM<"lle", GPR32Opnd>, LLE_FM_MM<0x6>; + def SCE_MM : SCEBaseMM<"sce", GPR32Opnd>, LLE_FM_MM<0xA>; + let DecoderMethod = "DecodeCacheOpMM" in { def CACHE_MM : MMRel, CacheOp<"cache", mem_mm_12>, CACHE_PREF_FM_MM<0x08, 0x6>; def PREF_MM : MMRel, CacheOp<"pref", mem_mm_12>, CACHE_PREF_FM_MM<0x18, 0x2>; } + + let DecoderMethod = "DecodePrefeOpMM" in { + def PREFE_MM : MMRel, CacheOp<"prefe", mem_mm_9>, + CACHE_PREFE_FM_MM<0x18, 0x2>; + def CACHEE_MM : MMRel, CacheOp<"cachee", mem_mm_9>, + CACHE_PREFE_FM_MM<0x18, 0x3>; + } def SSNOP_MM : MMRel, Barrier<"ssnop">, BARRIER_FM_MM<0x1>; def EHB_MM : MMRel, Barrier<"ehb">, BARRIER_FM_MM<0x3>; def PAUSE_MM : MMRel, Barrier<"pause">, BARRIER_FM_MM<0x5>; @@ -870,7 +945,13 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { def TLBWR_MM : MMRel, TLB<"tlbwr">, COP0_TLB_FM_MM<0xcd>; def SDBBP_MM : MMRel, SYS_FT<"sdbbp">, SDBBP_FM_MM; - def RDHWR_MM : MMRel, ReadHardware, RDHWR_FM_MM; + + def PREFX_MM : PrefetchIndexed<"prefx">, POOL32F_PREFX_FM_MM<0x15, 0x1A0>; +} + +let DecoderNamespace = "MicroMips" in { + def RDHWR_MM : MMRel, R6MMR6Rel, ReadHardware, + RDHWR_FM_MM, ISA_MICROMIPS32_NOT_MIPS32R6; } let Predicates = [InMicroMips] in { @@ -928,7 +1009,7 @@ class UncondBranchMMPseudo : MipsAsmPseudoInst<(outs), (ins brtarget_mm:$offset), !strconcat(opstr, "\t$offset")>; - def B_MM_Pseudo : UncondBranchMMPseudo<"b">; +def B_MM_Pseudo : UncondBranchMMPseudo<"b">, ISA_MICROMIPS; def : MipsInstAlias<"wait", (WAIT_MM 0x0), 1>; def : MipsInstAlias<"nop", (SLL_MM ZERO, ZERO, 0), 1>; @@ -937,4 +1018,17 @@ class UncondBranchMMPseudo : let Predicates = [InMicroMips] in { def : MipsInstAlias<"ei", (EI_MM ZERO), 1>, ISA_MIPS32R2; +def : MipsInstAlias<"di", (DI_MM ZERO), 1>, ISA_MIPS32R2; +def : MipsInstAlias<"teq $rs, $rt", + (TEQ_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; +def : MipsInstAlias<"tge $rs, $rt", + (TGE_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; +def : MipsInstAlias<"tgeu $rs, $rt", + (TGEU_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; +def : MipsInstAlias<"tlt $rs, $rt", + (TLT_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; +def : MipsInstAlias<"tltu $rs, $rt", + (TLTU_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; +def : MipsInstAlias<"tne $rs, $rt", + (TNE_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; } diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index dbb5f7b71d82..35352b6115c5 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -154,9 +154,14 @@ def FeatureMips16 : SubtargetFeature<"mips16", "InMips16Mode", "true", def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Mips DSP ASE">; def FeatureDSPR2 : SubtargetFeature<"dspr2", "HasDSPR2", "true", "Mips DSP-R2 ASE", [FeatureDSP]>; +def FeatureDSPR3 + : SubtargetFeature<"dspr3", "HasDSPR3", "true", "Mips DSP-R3 ASE", + [ FeatureDSP, FeatureDSPR2 ]>; def FeatureMSA : SubtargetFeature<"msa", "HasMSA", "true", "Mips MSA ASE">; +def FeatureEVA : SubtargetFeature<"eva", "HasEVA", "true", "Mips EVA ASE">; + def FeatureMicroMips : SubtargetFeature<"micromips", "InMicroMipsMode", "true", "microMips mode">; @@ -164,10 +169,19 @@ def FeatureCnMips : SubtargetFeature<"cnmips", "HasCnMips", "true", "Octeon cnMIPS Support", [FeatureMips64r2]>; +def FeatureUseTCCInDIV : SubtargetFeature< + "use-tcc-in-div", + "UseTCCInDIV", "false", + "Force the assembler to use trapping">; + //===----------------------------------------------------------------------===// // Mips processors supported. //===----------------------------------------------------------------------===// +def ImplP5600 : SubtargetFeature<"p5600", "ProcImpl", + "MipsSubtarget::CPU::P5600", + "The P5600 Processor", [FeatureMips32r5]>; + class Proc Features> : Processor; @@ -187,12 +201,11 @@ def : Proc<"mips64r2", [FeatureMips64r2]>; def : Proc<"mips64r3", [FeatureMips64r3]>; def : Proc<"mips64r5", [FeatureMips64r5]>; def : Proc<"mips64r6", [FeatureMips64r6]>; -def : Proc<"mips16", [FeatureMips16]>; def : Proc<"octeon", [FeatureMips64r2, FeatureCnMips]>; +def : ProcessorModel<"p5600", MipsP5600Model, [ImplP5600]>; def MipsAsmParser : AsmParser { let ShouldEmitMatchRegisterName = 0; - let MnemonicContainsDot = 1; } def MipsAsmParserVariant : AsmParserVariant { diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp index 46cc99c62393..26426c087164 100644 --- a/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -39,7 +39,11 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF, const Mips16InstrInfo &TII = *static_cast(STI.getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc dl; + uint64_t StackSize = MFI->getStackSize(); // No need to allocate space on the stack. @@ -107,7 +111,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, const std::vector &CSI, const TargetRegisterInfo *TRI) const { MachineFunction *MF = MBB.getParent(); - MachineBasicBlock *EntryBlock = MF->begin(); + MachineBasicBlock *EntryBlock = &MF->front(); // // Registers RA, S0,S1 are the callee saved registers and they diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index 893fc7cdf473..b2bc7e74c706 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -40,26 +40,17 @@ namespace { const MipsTargetMachine &TM; }; - class InlineAsmHelper { - LLVMContext &C; - BasicBlock *BB; - public: - InlineAsmHelper(LLVMContext &C_, BasicBlock *BB_) : - C(C_), BB(BB_) { - } + static void EmitInlineAsm(LLVMContext &C, BasicBlock *BB, StringRef AsmText) { + std::vector AsmArgTypes; + std::vector AsmArgs; - void Out(StringRef AsmString) { - std::vector AsmArgTypes; - std::vector AsmArgs; - - llvm::FunctionType *AsmFTy = llvm::FunctionType::get(Type::getVoidTy(C), - AsmArgTypes, false); - llvm::InlineAsm *IA = llvm::InlineAsm::get(AsmFTy, AsmString, "", true, - /* IsAlignStack */ false, - llvm::InlineAsm::AD_ATT); - CallInst::Create(IA, AsmArgs, "", BB); - } - }; + llvm::FunctionType *AsmFTy = + llvm::FunctionType::get(Type::getVoidTy(C), AsmArgTypes, false); + llvm::InlineAsm *IA = + llvm::InlineAsm::get(AsmFTy, AsmText, "", true, + /* IsAlignStack */ false, llvm::InlineAsm::AD_ATT); + CallInst::Create(IA, AsmArgs, "", BB); + } char Mips16HardFloat::ID = 0; } @@ -182,7 +173,7 @@ static bool needsFPReturnHelper(Function &F) { return whichFPReturnVariant(RetType) != NoFPRet; } -static bool needsFPReturnHelper(const FunctionType &FT) { +static bool needsFPReturnHelper(FunctionType &FT) { Type* RetType = FT.getReturnType(); return whichFPReturnVariant(RetType) != NoFPRet; } @@ -195,63 +186,72 @@ static bool needsFPHelperFromSig(Function &F) { // We swap between FP and Integer registers to allow Mips16 and Mips32 to // interoperate // -static void swapFPIntParams(FPParamVariant PV, Module *M, InlineAsmHelper &IAH, - bool LE, bool ToFP) { - //LLVMContext &Context = M->getContext(); - std::string MI = ToFP? "mtc1 ": "mfc1 "; +static std::string swapFPIntParams(FPParamVariant PV, Module *M, bool LE, + bool ToFP) { + std::string MI = ToFP ? "mtc1 ": "mfc1 "; + std::string AsmText; + switch (PV) { case FSig: - IAH.Out(MI + "$$4,$$f12"); + AsmText += MI + "$$4, $$f12\n"; break; + case FFSig: - IAH.Out(MI +"$$4,$$f12"); - IAH.Out(MI + "$$5,$$f14"); + AsmText += MI + "$$4, $$f12\n"; + AsmText += MI + "$$5, $$f14\n"; break; + case FDSig: - IAH.Out(MI + "$$4,$$f12"); + AsmText += MI + "$$4, $$f12\n"; if (LE) { - IAH.Out(MI + "$$6,$$f14"); - IAH.Out(MI + "$$7,$$f15"); + AsmText += MI + "$$6, $$f14\n"; + AsmText += MI + "$$7, $$f15\n"; } else { - IAH.Out(MI + "$$7,$$f14"); - IAH.Out(MI + "$$6,$$f15"); + AsmText += MI + "$$7, $$f14\n"; + AsmText += MI + "$$6, $$f15\n"; } break; + case DSig: if (LE) { - IAH.Out(MI + "$$4,$$f12"); - IAH.Out(MI + "$$5,$$f13"); + AsmText += MI + "$$4, $$f12\n"; + AsmText += MI + "$$5, $$f13\n"; } else { - IAH.Out(MI + "$$5,$$f12"); - IAH.Out(MI + "$$4,$$f13"); + AsmText += MI + "$$5, $$f12\n"; + AsmText += MI + "$$4, $$f13\n"; } break; + case DDSig: if (LE) { - IAH.Out(MI + "$$4,$$f12"); - IAH.Out(MI + "$$5,$$f13"); - IAH.Out(MI + "$$6,$$f14"); - IAH.Out(MI + "$$7,$$f15"); + AsmText += MI + "$$4, $$f12\n"; + AsmText += MI + "$$5, $$f13\n"; + AsmText += MI + "$$6, $$f14\n"; + AsmText += MI + "$$7, $$f15\n"; } else { - IAH.Out(MI + "$$5,$$f12"); - IAH.Out(MI + "$$4,$$f13"); - IAH.Out(MI + "$$7,$$f14"); - IAH.Out(MI + "$$6,$$f15"); + AsmText += MI + "$$5, $$f12\n"; + AsmText += MI + "$$4, $$f13\n"; + AsmText += MI + "$$7, $$f14\n"; + AsmText += MI + "$$6, $$f15\n"; } break; + case DFSig: if (LE) { - IAH.Out(MI + "$$4,$$f12"); - IAH.Out(MI + "$$5,$$f13"); + AsmText += MI + "$$4, $$f12\n"; + AsmText += MI + "$$5, $$f13\n"; } else { - IAH.Out(MI + "$$5,$$f12"); - IAH.Out(MI + "$$4,$$f13"); + AsmText += MI + "$$5, $$f12\n"; + AsmText += MI + "$$4, $$f13\n"; } - IAH.Out(MI + "$$6,$$f14"); + AsmText += MI + "$$6, $$f14\n"; break; + case NoSig: - return; + break; } + + return AsmText; } // @@ -282,68 +282,77 @@ static void assureFPCallStub(Function &F, Module *M, FStub->addFnAttr("nomips16"); FStub->setSection(SectionName); BasicBlock *BB = BasicBlock::Create(Context, "entry", FStub); - InlineAsmHelper IAH(Context, BB); - IAH.Out(".set reorder"); FPReturnVariant RV = whichFPReturnVariant(FStub->getReturnType()); FPParamVariant PV = whichFPParamVariantNeeded(F); - swapFPIntParams(PV, M, IAH, LE, true); + + std::string AsmText; + AsmText += ".set reorder\n"; + AsmText += swapFPIntParams(PV, M, LE, true); if (RV != NoFPRet) { - IAH.Out("move $$18, $$31"); - IAH.Out("jal " + Name); + AsmText += "move $$18, $$31\n"; + AsmText += "jal " + Name + "\n"; } else { - IAH.Out("lui $$25,%hi(" + Name + ")"); - IAH.Out("addiu $$25,$$25,%lo(" + Name + ")" ); + AsmText += "lui $$25, %hi(" + Name + ")\n"; + AsmText += "addiu $$25, $$25, %lo(" + Name + ")\n"; } + switch (RV) { case FRet: - IAH.Out("mfc1 $$2,$$f0"); + AsmText += "mfc1 $$2, $$f0\n"; break; + case DRet: if (LE) { - IAH.Out("mfc1 $$2,$$f0"); - IAH.Out("mfc1 $$3,$$f1"); + AsmText += "mfc1 $$2, $$f0\n"; + AsmText += "mfc1 $$3, $$f1\n"; } else { - IAH.Out("mfc1 $$3,$$f0"); - IAH.Out("mfc1 $$2,$$f1"); + AsmText += "mfc1 $$3, $$f0\n"; + AsmText += "mfc1 $$2, $$f1\n"; } break; + case CFRet: if (LE) { - IAH.Out("mfc1 $$2,$$f0"); - IAH.Out("mfc1 $$3,$$f2"); + AsmText += "mfc1 $$2, $$f0\n"; + AsmText += "mfc1 $$3, $$f2\n"; } else { - IAH.Out("mfc1 $$3,$$f0"); - IAH.Out("mfc1 $$3,$$f2"); + AsmText += "mfc1 $$3, $$f0\n"; + AsmText += "mfc1 $$3, $$f2\n"; } break; + case CDRet: if (LE) { - IAH.Out("mfc1 $$4,$$f2"); - IAH.Out("mfc1 $$5,$$f3"); - IAH.Out("mfc1 $$2,$$f0"); - IAH.Out("mfc1 $$3,$$f1"); + AsmText += "mfc1 $$4, $$f2\n"; + AsmText += "mfc1 $$5, $$f3\n"; + AsmText += "mfc1 $$2, $$f0\n"; + AsmText += "mfc1 $$3, $$f1\n"; } else { - IAH.Out("mfc1 $$5,$$f2"); - IAH.Out("mfc1 $$4,$$f3"); - IAH.Out("mfc1 $$3,$$f0"); - IAH.Out("mfc1 $$2,$$f1"); + AsmText += "mfc1 $$5, $$f2\n"; + AsmText += "mfc1 $$4, $$f3\n"; + AsmText += "mfc1 $$3, $$f0\n"; + AsmText += "mfc1 $$2, $$f1\n"; } break; + case NoFPRet: break; } + if (RV != NoFPRet) - IAH.Out("jr $$18"); + AsmText += "jr $$18\n"; else - IAH.Out("jr $$25"); + AsmText += "jr $$25\n"; + EmitInlineAsm(Context, BB, AsmText); + new UnreachableInst(Context, BB); } // // Functions that are llvm intrinsics and don't need helpers. // -static const char *IntrinsicInline[] = { +static const char *const IntrinsicInline[] = { "fabs", "fabsf", "llvm.ceil.f32", "llvm.ceil.f64", "llvm.copysign.f32", "llvm.copysign.f64", @@ -395,7 +404,7 @@ static bool fixupFPReturnAndCall(Function &F, Module *M, Type *T = RVal->getType(); FPReturnVariant RV = whichFPReturnVariant(T); if (RV == NoFPRet) continue; - static const char* Helper[NoFPRet] = { + static const char *const Helper[NoFPRet] = { "__mips16_ret_sf", "__mips16_ret_df", "__mips16_ret_sc", "__mips16_ret_dc" }; @@ -419,11 +428,11 @@ static bool fixupFPReturnAndCall(Function &F, Module *M, CallInst::Create(F, Params, "", &Inst ); } else if (const CallInst *CI = dyn_cast(I)) { const Value* V = CI->getCalledValue(); - const Type* T = nullptr; + Type* T = nullptr; if (V) T = V->getType(); - const PointerType *PFT=nullptr; + PointerType *PFT = nullptr; if (T) PFT = dyn_cast(T); - const FunctionType *FT=nullptr; + FunctionType *FT = nullptr; if (PFT) FT = dyn_cast(PFT->getElementType()); Function *F_ = CI->getCalledFunction(); if (FT && needsFPReturnHelper(*FT) && @@ -469,20 +478,21 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV, FStub->addFnAttr("nomips16"); FStub->setSection(SectionName); BasicBlock *BB = BasicBlock::Create(Context, "entry", FStub); - InlineAsmHelper IAH(Context, BB); + + std::string AsmText; if (PicMode) { - IAH.Out(".set noreorder"); - IAH.Out(".cpload $$25"); - IAH.Out(".set reorder"); - IAH.Out(".reloc 0,R_MIPS_NONE," + Name); - IAH.Out("la $$25," + LocalName); - } - else { - IAH.Out("la $$25," + Name); - } - swapFPIntParams(PV, M, IAH, LE, false); - IAH.Out("jr $$25"); - IAH.Out(LocalName + " = " + Name); + AsmText += ".set noreorder\n"; + AsmText += ".cpload $$25\n"; + AsmText += ".set reorder\n"; + AsmText += ".reloc 0, R_MIPS_NONE, " + Name + "\n"; + AsmText += "la $$25, " + LocalName + "\n"; + } else + AsmText += "la $$25, " + Name + "\n"; + AsmText += swapFPIntParams(PV, M, LE, false); + AsmText += "jr $$25\n"; + AsmText += LocalName + " = " + Name + "\n"; + EmitInlineAsm(Context, BB, AsmText); + new UnreachableInst(FStub->getContext(), BB); } @@ -535,7 +545,7 @@ bool Mips16HardFloat::runOnModule(Module &M) { FPParamVariant V = whichFPParamVariantNeeded(*F); if (V != NoSig) { Modified = true; - createFPFnStub(F, &M, V, TM); + createFPFnStub(&*F, &M, V, TM); } } return Modified; diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp index bce2c1eb4485..5a1c2c67cc70 100644 --- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -73,7 +73,7 @@ void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { MachineBasicBlock::iterator I = MBB.begin(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + DebugLoc DL; unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); const TargetRegisterClass *RC = &Mips::CPU16RegsRegClass; diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index 3522cbb1f36a..e7483253e61d 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -530,8 +530,7 @@ emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const { // destination vreg to set, the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); // thisMBB: // ... @@ -592,8 +591,7 @@ Mips16TargetLowering::emitSelT16(unsigned Opc1, unsigned Opc2, MachineInstr *MI, // destination vreg to set, the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); // thisMBB: // ... @@ -657,8 +655,7 @@ Mips16TargetLowering::emitSeliT16(unsigned Opc1, unsigned Opc2, // destination vreg to set, the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); // thisMBB: // ... diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index a49572efdbf9..da8ada4e5391 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -196,7 +196,7 @@ static void addSaveRestoreRegs(MachineInstrBuilder &MIB, void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + DebugLoc DL; MachineFunction &MF = *MBB.getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); const BitVector Reserved = RI.getReservedRegs(MF); @@ -263,7 +263,7 @@ void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Reg1, unsigned Reg2) const { - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + DebugLoc DL; // // li reg1, constant // move reg2, sp @@ -446,7 +446,7 @@ const MCInstrDesc &Mips16InstrInfo::AddiuSpImm(int64_t Imm) const { void Mips16InstrInfo::BuildAddiuSpImm (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const { - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + DebugLoc DL; BuildMI(MBB, I, DL, AddiuSpImm(Imm)).addImm(Imm); } diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 10fff03b7240..dad6ea4c9e98 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -530,19 +530,19 @@ class MayStore { // Purpose: Add Immediate Unsigned Word (2-Operand, Extended) // To add a constant to a 32-bit integer. // -def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>; +def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIM16Alu>; -def AddiuRxRxImm16: F2RI16_ins<0b01001, "addiu", IIAlu>, +def AddiuRxRxImm16: F2RI16_ins<0b01001, "addiu", IIM16Alu>, ArithLogic16Defs<0> { let AddedComplexity = 5; } -def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>, +def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIM16Alu>, ArithLogic16Defs<0> { let isCodeGenOnly = 1; } def AddiuRxRyOffMemX16: - FEXT_RRI_A16_mem_ins<0, "addiu", mem16_ea, IIAlu>; + FEXT_RRI_A16_mem_ins<0, "addiu", mem16_ea, IIM16Alu>; // @@ -550,7 +550,7 @@ def AddiuRxRyOffMemX16: // Purpose: Add Immediate Unsigned Word (3-Operand, PC-Relative, Extended) // To add a constant to the program counter. // -def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>; +def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIM16Alu>; // // Format: ADDIU sp, immediate MIPS16e @@ -558,14 +558,14 @@ def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>; // To add a constant to the stack pointer. // def AddiuSpImm16 - : FI816_SP_ins<0b011, "addiu", IIAlu> { + : FI816_SP_ins<0b011, "addiu", IIM16Alu> { let Defs = [SP]; let Uses = [SP]; let AddedComplexity = 5; } def AddiuSpImmX16 - : FEXT_I816_SP_ins<0b011, "addiu", IIAlu> { + : FEXT_I816_SP_ins<0b011, "addiu", IIM16Alu> { let Defs = [SP]; let Uses = [SP]; } @@ -576,14 +576,14 @@ def AddiuSpImmX16 // To add 32-bit integers. // -def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>; +def AdduRxRyRz16: FRRR16_ins<01, "addu", IIM16Alu>, ArithLogic16Defs<1>; // // Format: AND rx, ry MIPS16e // Purpose: AND // To do a bitwise logical AND. -def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>; +def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIM16Alu>, ArithLogic16Defs<1>; // @@ -591,7 +591,7 @@ def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>; // Purpose: Branch on Equal to Zero // To test a GPR then do a PC-relative conditional branch. // -def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16; +def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIM16Alu>, cbranch16; // @@ -599,7 +599,7 @@ def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16; // Purpose: Branch on Equal to Zero (Extended) // To test a GPR then do a PC-relative conditional branch. // -def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16; +def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIM16Alu>, cbranch16; // // Format: B offset MIPS16e @@ -607,27 +607,27 @@ def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16; // To do an unconditional PC-relative branch. // -def Bimm16: FI16_ins<0b00010, "b", IIAlu>, branch16; +def Bimm16: FI16_ins<0b00010, "b", IIM16Alu>, branch16; // Format: B offset MIPS16e // Purpose: Unconditional Branch // To do an unconditional PC-relative branch. // -def BimmX16: FEXT_I16_ins<0b00010, "b", IIAlu>, branch16; +def BimmX16: FEXT_I16_ins<0b00010, "b", IIM16Alu>, branch16; // // Format: BNEZ rx, offset MIPS16e // Purpose: Branch on Not Equal to Zero // To test a GPR then do a PC-relative conditional branch. // -def BnezRxImm16: FRI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16; +def BnezRxImm16: FRI16_B_ins<0b00101, "bnez", IIM16Alu>, cbranch16; // // Format: BNEZ rx, offset MIPS16e // Purpose: Branch on Not Equal to Zero (Extended) // To test a GPR then do a PC-relative conditional branch. // -def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16; +def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIM16Alu>, cbranch16; // @@ -641,11 +641,11 @@ def Break16: FRRBreakNull16_ins<"break 0", NoItinerary>; // Purpose: Branch on T Equal to Zero (Extended) // To test special register T then do a PC-relative conditional branch. // -def Bteqz16: FI816_ins<0b000, "bteqz", IIAlu>, cbranch16 { +def Bteqz16: FI816_ins<0b000, "bteqz", IIM16Alu>, cbranch16 { let Uses = [T8]; } -def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16 { +def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIM16Alu>, cbranch16 { let Uses = [T8]; } @@ -669,11 +669,11 @@ def BteqzT8SltiuX16: FEXT_T8I8I16_ins<"bteqz", "sltiu">, // To test special register T then do a PC-relative conditional branch. // -def Btnez16: FI816_ins<0b001, "btnez", IIAlu>, cbranch16 { +def Btnez16: FI816_ins<0b001, "btnez", IIM16Alu>, cbranch16 { let Uses = [T8]; } -def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16 { +def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIM16Alu> ,cbranch16 { let Uses = [T8]; } @@ -695,7 +695,7 @@ def BtnezT8SltiuX16: FEXT_T8I8I16_ins<"btnez", "sltiu">, // Purpose: Compare // To compare the contents of two GPRs. // -def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIAlu> { +def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIM16Alu> { let Defs = [T8]; } @@ -704,7 +704,7 @@ def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIAlu> { // Purpose: Compare Immediate // To compare a constant with the contents of a GPR. // -def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIAlu> { +def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIM16Alu> { let Defs = [T8]; } @@ -713,7 +713,7 @@ def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIAlu> { // Purpose: Compare Immediate (Extended) // To compare a constant with the contents of a GPR. // -def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> { +def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIM16Alu> { let Defs = [T8]; } @@ -723,7 +723,7 @@ def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> { // Purpose: Divide Word // To divide 32-bit signed integers. // -def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> { +def DivRxRy16: FRR16_div_ins<0b11010, "div", IIM16Alu> { let Defs = [HI0, LO0]; } @@ -732,7 +732,7 @@ def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> { // Purpose: Divide Unsigned Word // To divide 32-bit unsigned integers. // -def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> { +def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIM16Alu> { let Defs = [HI0, LO0]; } // @@ -742,13 +742,13 @@ def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> { // region and preserve the current ISA. // -def Jal16 : FJAL16_ins<0b0, "jal", IIAlu> { +def Jal16 : FJAL16_ins<0b0, "jal", IIM16Alu> { let hasDelaySlot = 0; // not true, but we add the nop for now let isCall=1; let Defs = [RA]; } -def JalB16 : FJALB16_ins<0b0, "jal", IIAlu>, branch16 { +def JalB16 : FJALB16_ins<0b0, "jal", IIM16Alu>, branch16 { let hasDelaySlot = 0; // not true, but we add the nop for now let isBranch=1; let Defs = [RA]; @@ -761,7 +761,7 @@ def JalB16 : FJALB16_ins<0b0, "jal", IIAlu>, branch16 { // address register. // -def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu> { +def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIM16Alu> { let isBranch = 1; let isIndirectBranch = 1; let hasDelaySlot = 1; @@ -769,14 +769,14 @@ def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu> { let isBarrier=1; } -def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIAlu> { +def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIM16Alu> { let isBranch = 1; let isIndirectBranch = 1; let isTerminator=1; let isBarrier=1; } -def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIAlu> { +def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIM16Alu> { let isBranch = 1; let isIndirectBranch = 1; let isTerminator=1; @@ -825,16 +825,16 @@ def LhuRxRyOffMemX16: // Purpose: Load Immediate // To load a constant into a GPR. // -def LiRxImm16: FRI16_ins<0b01101, "li", IIAlu>; +def LiRxImm16: FRI16_ins<0b01101, "li", IIM16Alu>; // // Format: LI rx, immediate MIPS16e // Purpose: Load Immediate (Extended) // To load a constant into a GPR. // -def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>; +def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIM16Alu>; -def LiRxImmAlignX16: FEXT_RI16_ins<0b01101, ".align 2\n\tli", IIAlu> { +def LiRxImmAlignX16: FEXT_RI16_ins<0b01101, ".align 2\n\tli", IIM16Alu> { let isCodeGenOnly = 1; } @@ -863,21 +863,21 @@ def LwRxPcTcpX16: FEXT_RI16_TCP_ins<0b10110, "lw", II_LW>, MayLoad; // Purpose: Move // To move the contents of a GPR to a GPR. // -def Move32R16: FI8_MOV32R16_ins<"move", IIAlu>; +def Move32R16: FI8_MOV32R16_ins<"move", IIM16Alu>; // // Format: MOVE ry, r32 MIPS16e //Purpose: Move // To move the contents of a GPR to a GPR. // -def MoveR3216: FI8_MOVR3216_ins<"move", IIAlu>; +def MoveR3216: FI8_MOVR3216_ins<"move", IIM16Alu>; // // Format: MFHI rx MIPS16e // Purpose: Move From HI Register // To copy the special purpose HI register to a GPR. // -def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> { +def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIM16Alu> { let Uses = [HI0]; let hasSideEffects = 0; } @@ -887,7 +887,7 @@ def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> { // Purpose: Move From LO Register // To copy the special purpose LO register to a GPR. // -def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> { +def Mflo16: FRR16_M_ins<0b10010, "mflo", IIM16Alu> { let Uses = [LO0]; let hasSideEffects = 0; } @@ -895,13 +895,13 @@ def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> { // // Pseudo Instruction for mult // -def MultRxRy16: FMULT16_ins<"mult", IIAlu> { +def MultRxRy16: FMULT16_ins<"mult", IIM16Alu> { let isCommutable = 1; let hasSideEffects = 0; let Defs = [HI0, LO0]; } -def MultuRxRy16: FMULT16_ins<"multu", IIAlu> { +def MultuRxRy16: FMULT16_ins<"multu", IIM16Alu> { let isCommutable = 1; let hasSideEffects = 0; let Defs = [HI0, LO0]; @@ -912,7 +912,7 @@ def MultuRxRy16: FMULT16_ins<"multu", IIAlu> { // Purpose: Multiply Word // To multiply 32-bit signed integers. // -def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> { +def MultRxRyRz16: FMULT16_LO_ins<"mult", IIM16Alu> { let isCommutable = 1; let hasSideEffects = 0; let Defs = [HI0, LO0]; @@ -923,7 +923,7 @@ def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> { // Purpose: Multiply Unsigned Word // To multiply 32-bit unsigned integers. // -def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIAlu> { +def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIM16Alu> { let isCommutable = 1; let hasSideEffects = 0; let Defs = [HI0, LO0]; @@ -934,21 +934,21 @@ def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIAlu> { // Purpose: Negate // To negate an integer value. // -def NegRxRy16: FUnaryRR16_ins<0b11101, "neg", IIAlu>; +def NegRxRy16: FUnaryRR16_ins<0b11101, "neg", IIM16Alu>; // // Format: NOT rx, ry MIPS16e // Purpose: Not // To complement an integer value // -def NotRxRy16: FUnaryRR16_ins<0b01111, "not", IIAlu>; +def NotRxRy16: FUnaryRR16_ins<0b01111, "not", IIM16Alu>; // // Format: OR rx, ry MIPS16e // Purpose: Or // To do a bitwise logical OR. // -def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>; +def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIM16Alu>, ArithLogic16Defs<1>; // // Format: RESTORE {ra,}{s0/s1/s0-1,}{framesize} @@ -1012,7 +1012,7 @@ def SbRxRyOffMemX16: // Sign-extend least significant byte in register rx. // def SebRx16 - : FRR_SF16_ins<0b10001, 0b100, "seb", IIAlu>; + : FRR_SF16_ins<0b10001, 0b100, "seb", IIM16Alu>; // // Format: SEH rx MIPS16e @@ -1020,7 +1020,7 @@ def SebRx16 // Sign-extend least significant word in register rx. // def SehRx16 - : FRR_SF16_ins<0b10001, 0b101, "seh", IIAlu>; + : FRR_SF16_ins<0b10001, 0b101, "seh", IIM16Alu>; // // The Sel(T) instructions are pseudos @@ -1149,21 +1149,21 @@ def ShRxRyOffMemX16: // Purpose: Shift Word Left Logical (Extended) // To execute a left-shift of a word by a fixed number of bits-0 to 31 bits. // -def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>; +def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIM16Alu>; // // Format: SLLV ry, rx MIPS16e // Purpose: Shift Word Left Logical Variable // To execute a left-shift of a word by a variable number of bits. // -def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>; +def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIM16Alu>; // Format: SLTI rx, immediate MIPS16e // Purpose: Set on Less Than Immediate // To record the result of a less-than comparison with a constant. // // -def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIAlu> { +def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIM16Alu> { let Defs = [T8]; } @@ -1173,7 +1173,7 @@ def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIAlu> { // To record the result of a less-than comparison with a constant. // // -def SltiRxImmX16: FEXT_RI16R_ins<0b01010, "slti", IIAlu> { +def SltiRxImmX16: FEXT_RI16R_ins<0b01010, "slti", IIM16Alu> { let Defs = [T8]; } @@ -1184,7 +1184,7 @@ def SltiCCRxImmX16: FEXT_CCRXI16_ins<"slti">; // To record the result of a less-than comparison with a constant. // // -def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIAlu> { +def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIM16Alu> { let Defs = [T8]; } @@ -1194,7 +1194,7 @@ def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIAlu> { // To record the result of a less-than comparison with a constant. // // -def SltiuRxImmX16: FEXT_RI16R_ins<0b01011, "sltiu", IIAlu> { +def SltiuRxImmX16: FEXT_RI16R_ins<0b01011, "sltiu", IIM16Alu> { let Defs = [T8]; } // @@ -1209,7 +1209,7 @@ def SltiuCCRxImmX16: FEXT_CCRXI16_ins<"sltiu">; // Purpose: Set on Less Than // To record the result of a less-than comparison. // -def SltRxRy16: FRR16R_ins<0b00010, "slt", IIAlu>{ +def SltRxRy16: FRR16R_ins<0b00010, "slt", IIM16Alu>{ let Defs = [T8]; } @@ -1219,7 +1219,7 @@ def SltCCRxRy16: FCCRR16_ins<"slt">; // Purpose: Set on Less Than Unsigned // To record the result of an unsigned less-than comparison. // -def SltuRxRy16: FRR16R_ins<0b00011, "sltu", IIAlu>{ +def SltuRxRy16: FRR16R_ins<0b00011, "sltu", IIM16Alu>{ let Defs = [T8]; } @@ -1236,7 +1236,7 @@ def SltuCCRxRy16: FCCRR16_ins<"sltu">; // To execute an arithmetic right-shift of a word by a variable // number of bits. // -def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>; +def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIM16Alu>; // @@ -1245,7 +1245,7 @@ def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>; // To execute an arithmetic right-shift of a word by a fixed // number of bits-1 to 8 bits. // -def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>; +def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIM16Alu>; // @@ -1254,7 +1254,7 @@ def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>; // To execute a logical right-shift of a word by a variable // number of bits. // -def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>; +def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIM16Alu>; // @@ -1263,14 +1263,14 @@ def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>; // To execute a logical right-shift of a word by a fixed // number of bits-1 to 31 bits. // -def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIAlu>; +def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIM16Alu>; // // Format: SUBU rz, rx, ry MIPS16e // Purpose: Subtract Unsigned Word // To subtract 32-bit integers // -def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIAlu>, ArithLogic16Defs<0>; +def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIM16Alu>, ArithLogic16Defs<0>; // // Format: SW ry, offset(rx) MIPS16e @@ -1294,7 +1294,7 @@ def SwRxSpImmX16: FEXT_RI16_SP_Store_explicit_ins // Purpose: Xor // To do a bitwise logical XOR. // -def XorRxRxRy16: FRxRxRy16_ins<0b01110, "xor", IIAlu>, ArithLogic16Defs<1>; +def XorRxRxRy16: FRxRxRy16_ins<0b01110, "xor", IIM16Alu>, ArithLogic16Defs<1>; class Mips16Pat : Pat { let Predicates = [InMips16Mode]; @@ -1380,7 +1380,7 @@ def: Mips16Pat<(brind CPU16Regs:$rs), (JrcRx16 CPU16Regs:$rs)> { let isCall=1, hasDelaySlot=0 in def JumpLinkReg16: FRR16_JALRC<0, 0, 0, (outs), (ins CPU16Regs:$rs), - "jalrc \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch> { + "jalrc \t$rs", [(MipsJmpLink CPU16Regs:$rs)], II_JALRC> { let Defs = [RA]; } diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index d6ab8a6e5411..82d2c8ee9905 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -186,54 +186,56 @@ class CMP_CONDN_DESC_BASE{ - def CMP_F_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>, - ISA_MIPS32R6, HARDFLOAT; - def CMP_UN_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, setuo>, - ISA_MIPS32R6, HARDFLOAT; - def CMP_EQ_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, setoeq>, - ISA_MIPS32R6, HARDFLOAT; - def CMP_UEQ_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, setueq>, + let AdditionalPredicates = [NotInMicroMips] in { + def CMP_F_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>, ISA_MIPS32R6, HARDFLOAT; - def CMP_LT_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, setolt>, - ISA_MIPS32R6, HARDFLOAT; - def CMP_ULT_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, setult>, - ISA_MIPS32R6, HARDFLOAT; - def CMP_LE_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, setole>, - ISA_MIPS32R6, HARDFLOAT; - def CMP_ULE_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, setule>, - ISA_MIPS32R6, HARDFLOAT; - def CMP_SAF_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>, - ISA_MIPS32R6, HARDFLOAT; - def CMP_SUN_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>, - ISA_MIPS32R6, HARDFLOAT; - def CMP_SEQ_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>, - ISA_MIPS32R6, HARDFLOAT; - def CMP_SUEQ_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>, + def CMP_UN_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, setuo>, ISA_MIPS32R6, HARDFLOAT; - def CMP_SLT_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>, - ISA_MIPS32R6, HARDFLOAT; - def CMP_SULT_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>, + def CMP_EQ_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, setoeq>, ISA_MIPS32R6, HARDFLOAT; - def CMP_SLE_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>, - ISA_MIPS32R6, HARDFLOAT; - def CMP_SULE_#NAME : COP1_CMP_CONDN_FM, - CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>, + def CMP_UEQ_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, setueq>, + ISA_MIPS32R6, HARDFLOAT; + def CMP_LT_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, setolt>, ISA_MIPS32R6, HARDFLOAT; + def CMP_ULT_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, setult>, + ISA_MIPS32R6, HARDFLOAT; + def CMP_LE_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, setole>, + ISA_MIPS32R6, HARDFLOAT; + def CMP_ULE_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, setule>, + ISA_MIPS32R6, HARDFLOAT; + def CMP_SAF_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>, + ISA_MIPS32R6, HARDFLOAT; + def CMP_SUN_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>, + ISA_MIPS32R6, HARDFLOAT; + def CMP_SEQ_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>, + ISA_MIPS32R6, HARDFLOAT; + def CMP_SUEQ_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>, + ISA_MIPS32R6, HARDFLOAT; + def CMP_SLT_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>, + ISA_MIPS32R6, HARDFLOAT; + def CMP_SULT_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>, + ISA_MIPS32R6, HARDFLOAT; + def CMP_SLE_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>, + ISA_MIPS32R6, HARDFLOAT; + def CMP_SULE_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>, + ISA_MIPS32R6, HARDFLOAT; + } } //===----------------------------------------------------------------------===// @@ -557,7 +559,7 @@ class CACHE_HINT_DESC Pattern = []; - string DecoderMethod = "DecodeCacheOpR6"; + string DecoderMethod = "DecodeCacheeOp_CacheOpR6"; } class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, GPR32Opnd>; @@ -595,7 +597,7 @@ class LSA_R6_DESC_BASE Pattern = []; } -class LSA_R6_DESC : LSA_R6_DESC_BASE<"lsa", GPR32Opnd, uimm2>; +class LSA_R6_DESC : LSA_R6_DESC_BASE<"lsa", GPR32Opnd, uimm2_plus1>; class LL_R6_DESC_BASE { dag OutOperandList = (outs GPROpnd:$rt); @@ -685,8 +687,10 @@ def BNEZC : BNEZC_ENC, BNEZC_DESC, ISA_MIPS32R6; def BNVC : BNVC_ENC, BNVC_DESC, ISA_MIPS32R6; def BOVC : BOVC_ENC, BOVC_DESC, ISA_MIPS32R6; def CACHE_R6 : R6MMR6Rel, CACHE_ENC, CACHE_DESC, ISA_MIPS32R6; -def CLASS_D : CLASS_D_ENC, CLASS_D_DESC, ISA_MIPS32R6, HARDFLOAT; -def CLASS_S : CLASS_S_ENC, CLASS_S_DESC, ISA_MIPS32R6, HARDFLOAT; +let AdditionalPredicates = [NotInMicroMips] in { + def CLASS_D : CLASS_D_ENC, CLASS_D_DESC, ISA_MIPS32R6, HARDFLOAT; + def CLASS_S : CLASS_S_ENC, CLASS_S_DESC, ISA_MIPS32R6, HARDFLOAT; +} def CLO_R6 : R6MMR6Rel, CLO_R6_ENC, CLO_R6_DESC, ISA_MIPS32R6; def CLZ_R6 : R6MMR6Rel, CLZ_R6_ENC, CLZ_R6_DESC, ISA_MIPS32R6; defm S : CMP_CC_M; @@ -702,39 +706,51 @@ def LSA_R6 : R6MMR6Rel, LSA_R6_ENC, LSA_R6_DESC, ISA_MIPS32R6; def LWC2_R6 : LWC2_R6_ENC, LWC2_R6_DESC, ISA_MIPS32R6; def LWPC : R6MMR6Rel, LWPC_ENC, LWPC_DESC, ISA_MIPS32R6; def LWUPC : LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6; -def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6, HARDFLOAT; -def MADDF_D : MADDF_D_ENC, MADDF_D_DESC, ISA_MIPS32R6, HARDFLOAT; -def MAXA_D : MAXA_D_ENC, MAXA_D_DESC, ISA_MIPS32R6, HARDFLOAT; -def MAXA_S : MAXA_S_ENC, MAXA_S_DESC, ISA_MIPS32R6, HARDFLOAT; -def MAX_D : MAX_D_ENC, MAX_D_DESC, ISA_MIPS32R6, HARDFLOAT; -def MAX_S : MAX_S_ENC, MAX_S_DESC, ISA_MIPS32R6, HARDFLOAT; -def MINA_D : MINA_D_ENC, MINA_D_DESC, ISA_MIPS32R6, HARDFLOAT; -def MINA_S : MINA_S_ENC, MINA_S_DESC, ISA_MIPS32R6, HARDFLOAT; -def MIN_D : MIN_D_ENC, MIN_D_DESC, ISA_MIPS32R6, HARDFLOAT; -def MIN_S : MIN_S_ENC, MIN_S_DESC, ISA_MIPS32R6, HARDFLOAT; +let AdditionalPredicates = [NotInMicroMips] in { + def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6, HARDFLOAT; + def MADDF_D : MADDF_D_ENC, MADDF_D_DESC, ISA_MIPS32R6, HARDFLOAT; + def MAXA_D : MAXA_D_ENC, MAXA_D_DESC, ISA_MIPS32R6, HARDFLOAT; + def MAXA_S : MAXA_S_ENC, MAXA_S_DESC, ISA_MIPS32R6, HARDFLOAT; + def MAX_D : MAX_D_ENC, MAX_D_DESC, ISA_MIPS32R6, HARDFLOAT; + def MAX_S : MAX_S_ENC, MAX_S_DESC, ISA_MIPS32R6, HARDFLOAT; + def MINA_D : MINA_D_ENC, MINA_D_DESC, ISA_MIPS32R6, HARDFLOAT; + def MINA_S : MINA_S_ENC, MINA_S_DESC, ISA_MIPS32R6, HARDFLOAT; + def MIN_D : MIN_D_ENC, MIN_D_DESC, ISA_MIPS32R6, HARDFLOAT; + def MIN_S : MIN_S_ENC, MIN_S_DESC, ISA_MIPS32R6, HARDFLOAT; +} def MOD : R6MMR6Rel, MOD_ENC, MOD_DESC, ISA_MIPS32R6; def MODU : R6MMR6Rel, MODU_ENC, MODU_DESC, ISA_MIPS32R6; -def MSUBF_S : MSUBF_S_ENC, MSUBF_S_DESC, ISA_MIPS32R6, HARDFLOAT; -def MSUBF_D : MSUBF_D_ENC, MSUBF_D_DESC, ISA_MIPS32R6, HARDFLOAT; +let AdditionalPredicates = [NotInMicroMips] in { + def MSUBF_S : MSUBF_S_ENC, MSUBF_S_DESC, ISA_MIPS32R6, HARDFLOAT; + def MSUBF_D : MSUBF_D_ENC, MSUBF_D_DESC, ISA_MIPS32R6, HARDFLOAT; +} def MUH : R6MMR6Rel, MUH_ENC, MUH_DESC, ISA_MIPS32R6; def MUHU : R6MMR6Rel, MUHU_ENC, MUHU_DESC, ISA_MIPS32R6; def MUL_R6 : R6MMR6Rel, MUL_R6_ENC, MUL_R6_DESC, ISA_MIPS32R6; def MULU : R6MMR6Rel, MULU_ENC, MULU_DESC, ISA_MIPS32R6; def NAL; // BAL with rd=0 def PREF_R6 : R6MMR6Rel, PREF_ENC, PREF_DESC, ISA_MIPS32R6; -def RINT_D : RINT_D_ENC, RINT_D_DESC, ISA_MIPS32R6, HARDFLOAT; -def RINT_S : RINT_S_ENC, RINT_S_DESC, ISA_MIPS32R6, HARDFLOAT; +let AdditionalPredicates = [NotInMicroMips] in { + def RINT_D : RINT_D_ENC, RINT_D_DESC, ISA_MIPS32R6, HARDFLOAT; + def RINT_S : RINT_S_ENC, RINT_S_DESC, ISA_MIPS32R6, HARDFLOAT; +} def SC_R6 : SC_R6_ENC, SC_R6_DESC, ISA_MIPS32R6; +let AdditionalPredicates = [NotInMicroMips] in { def SDBBP_R6 : SDBBP_R6_ENC, SDBBP_R6_DESC, ISA_MIPS32R6; +} def SDC2_R6 : SDC2_R6_ENC, SDC2_R6_DESC, ISA_MIPS32R6; def SELEQZ : R6MMR6Rel, SELEQZ_ENC, SELEQZ_DESC, ISA_MIPS32R6, GPR_32; -def SELEQZ_D : SELEQZ_D_ENC, SELEQZ_D_DESC, ISA_MIPS32R6, HARDFLOAT; -def SELEQZ_S : SELEQZ_S_ENC, SELEQZ_S_DESC, ISA_MIPS32R6, HARDFLOAT; +let AdditionalPredicates = [NotInMicroMips] in { + def SELEQZ_D : SELEQZ_D_ENC, SELEQZ_D_DESC, ISA_MIPS32R6, HARDFLOAT; + def SELEQZ_S : SELEQZ_S_ENC, SELEQZ_S_DESC, ISA_MIPS32R6, HARDFLOAT; +} def SELNEZ : R6MMR6Rel, SELNEZ_ENC, SELNEZ_DESC, ISA_MIPS32R6, GPR_32; -def SELNEZ_D : SELNEZ_D_ENC, SELNEZ_D_DESC, ISA_MIPS32R6, HARDFLOAT; -def SELNEZ_S : SELNEZ_S_ENC, SELNEZ_S_DESC, ISA_MIPS32R6, HARDFLOAT; -def SEL_D : SEL_D_ENC, SEL_D_DESC, ISA_MIPS32R6, HARDFLOAT; -def SEL_S : SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6, HARDFLOAT; +let AdditionalPredicates = [NotInMicroMips] in { + def SELNEZ_D : SELNEZ_D_ENC, SELNEZ_D_DESC, ISA_MIPS32R6, HARDFLOAT; + def SELNEZ_S : SELNEZ_S_ENC, SELNEZ_S_DESC, ISA_MIPS32R6, HARDFLOAT; + def SEL_D : SEL_D_ENC, SEL_D_DESC, ISA_MIPS32R6, HARDFLOAT; + def SEL_S : SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6, HARDFLOAT; +} def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6; //===----------------------------------------------------------------------===// @@ -743,7 +759,9 @@ def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6; // //===----------------------------------------------------------------------===// +let AdditionalPredicates = [NotInMicroMips] in { def : MipsInstAlias<"sdbbp", (SDBBP_R6 0)>, ISA_MIPS32R6; +} def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6; //===----------------------------------------------------------------------===// @@ -752,84 +770,78 @@ def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6; // //===----------------------------------------------------------------------===// -// f32 comparisons supported via another comparison -def : MipsPat<(setone f32:$lhs, f32:$rhs), - (NOR (CMP_UEQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6; -def : MipsPat<(seto f32:$lhs, f32:$rhs), - (NOR (CMP_UN_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6; -def : MipsPat<(setune f32:$lhs, f32:$rhs), - (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6; -def : MipsPat<(seteq f32:$lhs, f32:$rhs), (CMP_EQ_S f32:$lhs, f32:$rhs)>, - ISA_MIPS32R6; -def : MipsPat<(setgt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$rhs, f32:$lhs)>, - ISA_MIPS32R6; -def : MipsPat<(setge f32:$lhs, f32:$rhs), (CMP_LT_S f32:$rhs, f32:$lhs)>, - ISA_MIPS32R6; -def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LT_S f32:$lhs, f32:$rhs)>, - ISA_MIPS32R6; -def : MipsPat<(setle f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>, - ISA_MIPS32R6; -def : MipsPat<(setne f32:$lhs, f32:$rhs), - (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6; +// comparisons supported via another comparison +multiclass Cmp_Pats { +def : MipsPat<(setone VT:$lhs, VT:$rhs), + (NOROp (!cast("CMP_UEQ_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>; +def : MipsPat<(seto VT:$lhs, VT:$rhs), + (NOROp (!cast("CMP_UN_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>; +def : MipsPat<(setune VT:$lhs, VT:$rhs), + (NOROp (!cast("CMP_EQ_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>; +def : MipsPat<(seteq VT:$lhs, VT:$rhs), + (!cast("CMP_EQ_"#NAME) VT:$lhs, VT:$rhs)>; +def : MipsPat<(setgt VT:$lhs, VT:$rhs), + (!cast("CMP_LE_"#NAME) VT:$rhs, VT:$lhs)>; +def : MipsPat<(setge VT:$lhs, VT:$rhs), + (!cast("CMP_LT_"#NAME) VT:$rhs, VT:$lhs)>; +def : MipsPat<(setlt VT:$lhs, VT:$rhs), + (!cast("CMP_LT_"#NAME) VT:$lhs, VT:$rhs)>; +def : MipsPat<(setle VT:$lhs, VT:$rhs), + (!cast("CMP_LE_"#NAME) VT:$lhs, VT:$rhs)>; +def : MipsPat<(setne VT:$lhs, VT:$rhs), + (NOROp (!cast("CMP_EQ_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>; +} -// f64 comparisons supported via another comparison -def : MipsPat<(setone f64:$lhs, f64:$rhs), - (NOR (CMP_UEQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6; -def : MipsPat<(seto f64:$lhs, f64:$rhs), - (NOR (CMP_UN_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6; -def : MipsPat<(setune f64:$lhs, f64:$rhs), - (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6; -def : MipsPat<(seteq f64:$lhs, f64:$rhs), (CMP_EQ_D f64:$lhs, f64:$rhs)>, - ISA_MIPS32R6; -def : MipsPat<(setgt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$rhs, f64:$lhs)>, - ISA_MIPS32R6; -def : MipsPat<(setge f64:$lhs, f64:$rhs), (CMP_LT_D f64:$rhs, f64:$lhs)>, - ISA_MIPS32R6; -def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LT_D f64:$lhs, f64:$rhs)>, - ISA_MIPS32R6; -def : MipsPat<(setle f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>, - ISA_MIPS32R6; -def : MipsPat<(setne f64:$lhs, f64:$rhs), - (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6; +defm S : Cmp_Pats, ISA_MIPS32R6; +defm D : Cmp_Pats, ISA_MIPS32R6; // i32 selects -def : MipsPat<(select i32:$cond, i32:$t, i32:$f), - (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>, - ISA_MIPS32R6; -def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, i32:$f), - (OR (SELEQZ i32:$t, i32:$cond), (SELNEZ i32:$f, i32:$cond))>, - ISA_MIPS32R6; -def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, i32:$f), - (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>, - ISA_MIPS32R6; -def : MipsPat<(select (i32 (seteq i32:$cond, immZExt16:$imm)), i32:$t, i32:$f), - (OR (SELEQZ i32:$t, (XORi i32:$cond, immZExt16:$imm)), - (SELNEZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>, - ISA_MIPS32R6; -def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i32:$t, i32:$f), - (OR (SELNEZ i32:$t, (XORi i32:$cond, immZExt16:$imm)), - (SELEQZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>, - ISA_MIPS32R6; -def : MipsPat<(select (i32 (setgt i32:$cond, immSExt16Plus1:$imm)), i32:$t, - i32:$f), - (OR (SELEQZ i32:$t, (SLTi i32:$cond, (Plus1 imm:$imm))), - (SELNEZ i32:$f, (SLTi i32:$cond, (Plus1 imm:$imm))))>, - ISA_MIPS32R6; -def : MipsPat<(select (i32 (setugt i32:$cond, immSExt16Plus1:$imm)), - i32:$t, i32:$f), - (OR (SELEQZ i32:$t, (SLTiu i32:$cond, (Plus1 imm:$imm))), - (SELNEZ i32:$f, (SLTiu i32:$cond, (Plus1 imm:$imm))))>, - ISA_MIPS32R6; +multiclass SelectInt_Pats { +// reg, immz +def : MipsPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, RC:$f), + (OROp (SELEQZOp RC:$t, RC:$cond), (SELNEZOp RC:$f, RC:$cond))>; +def : MipsPat<(select (Opg (setne RC:$cond, immz)), RC:$t, RC:$f), + (OROp (SELNEZOp RC:$t, RC:$cond), (SELEQZOp RC:$f, RC:$cond))>; +// reg, immZExt16[_64] +def : MipsPat<(select (Opg (seteq RC:$cond, imm_type:$imm)), RC:$t, RC:$f), + (OROp (SELEQZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)), + (SELNEZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>; +def : MipsPat<(select (Opg (setne RC:$cond, imm_type:$imm)), RC:$t, RC:$f), + (OROp (SELNEZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)), + (SELEQZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>; + +// reg, immSExt16Plus1 +def : MipsPat<(select (Opg (setgt RC:$cond, immSExt16Plus1:$imm)), RC:$t, RC:$f), + (OROp (SELEQZOp RC:$t, (SLTiOp RC:$cond, (Plus1 imm:$imm))), + (SELNEZOp RC:$f, (SLTiOp RC:$cond, (Plus1 imm:$imm))))>; +def : MipsPat<(select (Opg (setugt RC:$cond, immSExt16Plus1:$imm)), RC:$t, RC:$f), + (OROp (SELEQZOp RC:$t, (SLTiuOp RC:$cond, (Plus1 imm:$imm))), + (SELNEZOp RC:$f, (SLTiuOp RC:$cond, (Plus1 imm:$imm))))>; + +def : MipsPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, immz), + (SELEQZOp RC:$t, RC:$cond)>; +def : MipsPat<(select (Opg (setne RC:$cond, immz)), RC:$t, immz), + (SELNEZOp RC:$t, RC:$cond)>; +def : MipsPat<(select (Opg (seteq RC:$cond, immz)), immz, RC:$f), + (SELNEZOp RC:$f, RC:$cond)>; +def : MipsPat<(select (Opg (setne RC:$cond, immz)), immz, RC:$f), + (SELEQZOp RC:$f, RC:$cond)>; +} + +defm : SelectInt_Pats, ISA_MIPS32R6; + +def : MipsPat<(select i32:$cond, i32:$t, i32:$f), + (OR (SELNEZ i32:$t, i32:$cond), + (SELEQZ i32:$f, i32:$cond))>, + ISA_MIPS32R6; def : MipsPat<(select i32:$cond, i32:$t, immz), - (SELNEZ i32:$t, i32:$cond)>, ISA_MIPS32R6; -def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, immz), - (SELNEZ i32:$t, i32:$cond)>, ISA_MIPS32R6; -def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, immz), - (SELEQZ i32:$t, i32:$cond)>, ISA_MIPS32R6; + (SELNEZ i32:$t, i32:$cond)>, + ISA_MIPS32R6; def : MipsPat<(select i32:$cond, immz, i32:$f), - (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6; -def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i32:$f), - (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6; -def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i32:$f), - (SELNEZ i32:$f, i32:$cond)>, ISA_MIPS32R6; + (SELEQZ i32:$f, i32:$cond)>, + ISA_MIPS32R6; diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index f917ecad4a53..cbdcdd788bec 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -16,10 +16,6 @@ //===----------------------------------------------------------------------===// // Unsigned Operand -def uimm5_64 : Operand { - let PrintMethod = "printUnsignedImm"; -} - def uimm16_64 : Operand { let PrintMethod = "printUnsignedImm"; } @@ -276,12 +272,20 @@ def LEA_ADDiu64 : EffectiveAddress<"daddiu", GPR64Opnd>, LW_FM<0x19>; let isCodeGenOnly = 1 in def RDHWR64 : ReadHardware, RDHWR_FM; -def DEXT : ExtBase<"dext", GPR64Opnd, uimm6, MipsExt>, EXT_FM<3>; -def DEXTU : ExtBase<"dextu", GPR64Opnd, uimm6>, EXT_FM<2>; -def DEXTM : ExtBase<"dextm", GPR64Opnd, uimm5>, EXT_FM<1>; +let AdditionalPredicates = [NotInMicroMips] in { + // TODO: Add 'pos + size' constraint check to dext* instructions + // DEXT: 0 < pos + size <= 63 + // DEXTM, DEXTU: 32 < pos + size <= 64 + def DEXT : ExtBase<"dext", GPR64Opnd, uimm5, uimm5_plus1, MipsExt>, + EXT_FM<3>; + def DEXTM : ExtBase<"dextm", GPR64Opnd, uimm5, uimm5_plus33, MipsExt>, + EXT_FM<1>; + def DEXTU : ExtBase<"dextu", GPR64Opnd, uimm5_plus32, uimm5_plus1, + MipsExt>, EXT_FM<2>; +} def DINS : InsBase<"dins", GPR64Opnd, uimm6, MipsIns>, EXT_FM<7>; -def DINSU : InsBase<"dinsu", GPR64Opnd, uimm6>, EXT_FM<6>; +def DINSU : InsBase<"dinsu", GPR64Opnd, uimm5_plus32>, EXT_FM<6>; def DINSM : InsBase<"dinsm", GPR64Opnd, uimm5>, EXT_FM<5>; let isCodeGenOnly = 1, rs = 0, shamt = 0 in { @@ -341,11 +345,11 @@ class SetCC64_I: } class CBranchBitNum shift = 1> : - InstSE<(outs), (ins RO:$rs, uimm5_64:$p, opnd:$offset), + RegisterOperand RO, Operand ImmOp, bits<64> shift = 1> : + InstSE<(outs), (ins RO:$rs, ImmOp:$p, opnd:$offset), !strconcat(opstr, "\t$rs, $p, $offset"), [(brcond (i32 (cond_op (and RO:$rs, (shl shift, immZExt5_64:$p)), 0)), - bb:$offset)], IIBranch, FrmI, opstr> { + bb:$offset)], II_BBIT, FrmI, opstr> { let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; @@ -363,14 +367,17 @@ def BADDu : ArithLogicR<"baddu", GPR64Opnd, 1, II_BADDU>, ADD_FM<0x1c, 0x28>; // Branch on Bit Clear /+32 -def BBIT0 : CBranchBitNum<"bbit0", brtarget, seteq, GPR64Opnd>, BBIT_FM<0x32>; -def BBIT032: CBranchBitNum<"bbit032", brtarget, seteq, GPR64Opnd, 0x100000000>, +def BBIT0 : CBranchBitNum<"bbit0", brtarget, seteq, GPR64Opnd, + uimm5_64_report_uimm6>, BBIT_FM<0x32>; +def BBIT032: CBranchBitNum<"bbit032", brtarget, seteq, GPR64Opnd, uimm5_64, + 0x100000000>, BBIT_FM<0x36>; // Branch on Bit Set /+32 -def BBIT1 : CBranchBitNum<"bbit1", brtarget, setne, GPR64Opnd>, BBIT_FM<0x3a>; -def BBIT132: CBranchBitNum<"bbit132", brtarget, setne, GPR64Opnd, 0x100000000>, - BBIT_FM<0x3e>; +def BBIT1 : CBranchBitNum<"bbit1", brtarget, setne, GPR64Opnd, + uimm5_64_report_uimm6>, BBIT_FM<0x3a>; +def BBIT132: CBranchBitNum<"bbit132", brtarget, setne, GPR64Opnd, uimm5_64, + 0x100000000>, BBIT_FM<0x3e>; // Multiply Doubleword to GPR let Defs = [HI0, LO0, P0, P1, P2] in @@ -544,9 +551,24 @@ def : MipsPat<(brcond (i32 (setne (and i64:$lhs, PowerOf2HI:$mask), 0)), bb:$dst (BBIT132 i64:$lhs, (Log2HI PowerOf2HI:$mask), bb:$dst)>; } +// Atomic load patterns. +def : MipsPat<(atomic_load_8 addr:$a), (LB64 addr:$a)>; +def : MipsPat<(atomic_load_16 addr:$a), (LH64 addr:$a)>; +def : MipsPat<(atomic_load_32 addr:$a), (LW64 addr:$a)>; +def : MipsPat<(atomic_load_64 addr:$a), (LD addr:$a)>; + +// Atomic store patterns. +def : MipsPat<(atomic_store_8 addr:$a, GPR64:$v), (SB64 GPR64:$v, addr:$a)>; +def : MipsPat<(atomic_store_16 addr:$a, GPR64:$v), (SH64 GPR64:$v, addr:$a)>; +def : MipsPat<(atomic_store_32 addr:$a, GPR64:$v), (SW64 GPR64:$v, addr:$a)>; +def : MipsPat<(atomic_store_64 addr:$a, GPR64:$v), (SD GPR64:$v, addr:$a)>; + //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// +def : MipsInstAlias<"move $dst, $src", + (OR64 GPR64Opnd:$dst, GPR64Opnd:$src, ZERO_64), 1>, + GPR_64; def : MipsInstAlias<"move $dst, $src", (DADDu GPR64Opnd:$dst, GPR64Opnd:$src, ZERO_64), 1>, GPR_64; @@ -617,6 +639,38 @@ def : MipsInstAlias<"syncw", (SYNC 0x4), 0>; def : MipsInstAlias<"syncws", (SYNC 0x5), 0>; } +// cnMIPS Aliases. + +// bbit* with $p 32-63 converted to bbit*32 with $p 0-31 +def : MipsInstAlias<"bbit0 $rs, $p, $offset", + (BBIT032 GPR64Opnd:$rs, uimm5_plus32_normalize_64:$p, + brtarget:$offset), 0>, + ASE_CNMIPS; +def : MipsInstAlias<"bbit1 $rs, $p, $offset", + (BBIT132 GPR64Opnd:$rs, uimm5_plus32_normalize_64:$p, + brtarget:$offset), 0>, + ASE_CNMIPS; + +// exts with $pos 32-63 in converted to exts32 with $pos 0-31 +def : MipsInstAlias<"exts $rt, $rs, $pos, $lenm1", + (EXTS32 GPR64Opnd:$rt, GPR64Opnd:$rs, + uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>, + ASE_CNMIPS; +def : MipsInstAlias<"exts $rt, $pos, $lenm1", + (EXTS32 GPR64Opnd:$rt, GPR64Opnd:$rt, + uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>, + ASE_CNMIPS; + +// cins with $pos 32-63 in converted to cins32 with $pos 0-31 +def : MipsInstAlias<"cins $rt, $rs, $pos, $lenm1", + (CINS32 GPR64Opnd:$rt, GPR64Opnd:$rs, + uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>, + ASE_CNMIPS; +def : MipsInstAlias<"cins $rt, $pos, $lenm1", + (CINS32 GPR64Opnd:$rt, GPR64Opnd:$rt, + uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>, + ASE_CNMIPS; + //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// @@ -625,3 +679,8 @@ class LoadImmediate64 : MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm64), !strconcat(instr_asm, "\t$rt, $imm64")> ; def LoadImm64 : LoadImmediate64<"dli", imm64, GPR64Opnd>; + +def LoadAddrReg64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rt), (ins mem:$addr), + "dla\t$rt, $addr">; +def LoadAddrImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rt), (ins imm64:$imm64), + "dla\t$rt, $imm64">; diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td index 6b546e864bd3..6f34dbe28d30 100644 --- a/lib/Target/Mips/Mips64r6InstrInfo.td +++ b/lib/Target/Mips/Mips64r6InstrInfo.td @@ -62,7 +62,7 @@ class DCLO_R6_DESC : CLO_R6_DESC_BASE<"dclo", GPR64Opnd>; class DCLZ_R6_DESC : CLZ_R6_DESC_BASE<"dclz", GPR64Opnd>; class DDIV_DESC : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd, sdiv>; class DDIVU_DESC : DIVMOD_DESC_BASE<"ddivu", GPR64Opnd, udiv>; -class DLSA_R6_DESC : LSA_R6_DESC_BASE<"dlsa", GPR64Opnd, uimm2>; +class DLSA_R6_DESC : LSA_R6_DESC_BASE<"dlsa", GPR64Opnd, uimm2_plus1>; class DMOD_DESC : DIVMOD_DESC_BASE<"dmod", GPR64Opnd, srem>; class DMODU_DESC : DIVMOD_DESC_BASE<"dmodu", GPR64Opnd, urem>; class DMUH_DESC : MUL_R6_DESC_BASE<"dmuh", GPR64Opnd, mulhs>; @@ -81,10 +81,12 @@ class SELNEZ64_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR64Opnd>; // //===----------------------------------------------------------------------===// -def DAHI : DAHI_ENC, DAHI_DESC, ISA_MIPS64R6; -def DALIGN : DALIGN_ENC, DALIGN_DESC, ISA_MIPS64R6; -def DATI : DATI_ENC, DATI_DESC, ISA_MIPS64R6; -def DAUI : DAUI_ENC, DAUI_DESC, ISA_MIPS64R6; +let AdditionalPredicates = [NotInMicroMips] in { + def DATI : DATI_ENC, DATI_DESC, ISA_MIPS64R6; + def DAHI : DAHI_ENC, DAHI_DESC, ISA_MIPS64R6; + def DAUI : DAUI_ENC, DAUI_DESC, ISA_MIPS64R6; + def DALIGN : DALIGN_ENC, DALIGN_DESC, ISA_MIPS64R6; +} def DBITSWAP : DBITSWAP_ENC, DBITSWAP_DESC, ISA_MIPS64R6; def DCLO_R6 : DCLO_R6_ENC, DCLO_R6_DESC, ISA_MIPS64R6; def DCLZ_R6 : DCLZ_R6_ENC, DCLZ_R6_DESC, ISA_MIPS64R6; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index fdba064b5c5e..957529376b37 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -169,12 +169,12 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (MCPE.isMachineConstantPoolEntry()) EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); else - EmitGlobalConstant(MCPE.Val.ConstVal); + EmitGlobalConstant(MF->getDataLayout(), MCPE.Val.ConstVal); return; } - MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator I = MI->getIterator(); MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); do { @@ -202,7 +202,7 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { llvm_unreachable("Pseudo opcode found in EmitInstruction()"); MCInst TmpInst0; - MCInstLowering.Lower(I, TmpInst0); + MCInstLowering.Lower(&*I, TmpInst0); EmitToStreamer(*OutStreamer, TmpInst0); } while ((++I != E) && I->isInsideBundle()); // Delay slot check } @@ -405,7 +405,7 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock* // If this is a landing pad, it isn't a fall through. If it has no preds, // then nothing falls through to it. - if (MBB->isLandingPad() || MBB->pred_empty()) + if (MBB->isEHPad() || MBB->pred_empty()) return false; // If there isn't exactly one predecessor, it can't be a fall through. @@ -559,7 +559,6 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { - const DataLayout *DL = TM.getDataLayout(); const MachineOperand &MO = MI->getOperand(opNum); bool closeP = false; @@ -608,7 +607,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, } case MachineOperand::MO_ConstantPoolIndex: - O << DL->getPrivateGlobalPrefix() << "CPI" + O << getDataLayout().getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" << MO.getIndex(); if (MO.getOffset()) O << "+" << MO.getOffset(); @@ -1009,7 +1008,7 @@ void MipsAsmPrinter::EmitFPCallStub( // // Mov $18, $31 - EmitInstrRegRegReg(*STI, Mips::ADDu, Mips::S2, Mips::RA, Mips::ZERO); + EmitInstrRegRegReg(*STI, Mips::OR, Mips::S2, Mips::RA, Mips::ZERO); EmitSwapFPIntParams(*STI, Signature->ParamSig, LE, true); diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp index b8081295ca64..d82063e3d2a9 100644 --- a/lib/Target/Mips/MipsCCState.cpp +++ b/lib/Target/Mips/MipsCCState.cpp @@ -29,22 +29,16 @@ static bool isF128SoftLibCall(const char *CallSym) { "powl", "rintl", "sinl", "sqrtl", "truncl"}; - const char *const *End = LibCalls + array_lengthof(LibCalls); - // Check that LibCalls is sorted alphabetically. - MipsTargetLowering::LTStr Comp; - -#ifndef NDEBUG - for (const char *const *I = LibCalls; I < End - 1; ++I) - assert(Comp(*I, *(I + 1))); -#endif - - return std::binary_search(LibCalls, End, CallSym, Comp); + auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; }; + assert(std::is_sorted(std::begin(LibCalls), std::end(LibCalls), Comp)); + return std::binary_search(std::begin(LibCalls), std::end(LibCalls), + CallSym, Comp); } /// This function returns true if Ty is fp128, {f128} or i128 which was /// originally a fp128. -static bool originalTypeIsF128(const Type *Ty, const SDNode *CallNode) { +static bool originalTypeIsF128(Type *Ty, const SDNode *CallNode) { if (Ty->isFP128Ty()) return true; diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 93e1908083cb..0b4b7785af67 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -427,3 +427,28 @@ def CSR_Mips16RetHelper : CalleeSavedRegs<(add V0, V1, FP, (sequence "A%u", 3, 0), (sequence "S%u", 7, 0), (sequence "D%u", 15, 10))>; + +def CSR_Interrupt_32R6 : CalleeSavedRegs<(add (sequence "A%u", 3, 0), + (sequence "S%u", 7, 0), + (sequence "V%u", 1, 0), + (sequence "T%u", 9, 0), + RA, FP, GP, AT)>; + +def CSR_Interrupt_32 : CalleeSavedRegs<(add (sequence "A%u", 3, 0), + (sequence "S%u", 7, 0), + (sequence "V%u", 1, 0), + (sequence "T%u", 9, 0), + RA, FP, GP, AT, LO0, HI0)>; + +def CSR_Interrupt_64R6 : CalleeSavedRegs<(add (sequence "A%u_64", 3, 0), + (sequence "V%u_64", 1, 0), + (sequence "S%u_64", 7, 0), + (sequence "T%u_64", 9, 0), + RA_64, FP_64, GP_64, AT_64)>; + +def CSR_Interrupt_64 : CalleeSavedRegs<(add (sequence "A%u_64", 3, 0), + (sequence "S%u_64", 7, 0), + (sequence "T%u_64", 9, 0), + (sequence "V%u_64", 1, 0), + RA_64, FP_64, GP_64, AT_64, + LO0_64, HI0_64)>; diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp index 96553d28fc57..ea8c5871fa0e 100644 --- a/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -560,7 +560,7 @@ MipsConstantIslands::doInitialPlacement(std::vector &CPEMIs) { // identity mapping of CPI's to CPE's. const std::vector &CPs = MCP->getConstants(); - const DataLayout &TD = *MF->getTarget().getDataLayout(); + const DataLayout &TD = MF->getDataLayout(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); assert(Size >= 4 && "Too small constant pool entry"); @@ -598,12 +598,12 @@ MipsConstantIslands::doInitialPlacement(std::vector &CPEMIs) { /// into the block immediately after it. static bool BBHasFallthrough(MachineBasicBlock *MBB) { // Get the next machine basic block in the function. - MachineFunction::iterator MBBI = MBB; + MachineFunction::iterator MBBI = MBB->getIterator(); // Can't fall off end of function. if (std::next(MBBI) == MBB->getParent()->end()) return false; - MachineBasicBlock *NextBB = std::next(MBBI); + MachineBasicBlock *NextBB = &*std::next(MBBI); for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) if (*I == NextBB) @@ -656,11 +656,11 @@ initializeFunctionInfo(const std::vector &CPEMIs) { // alignment assumptions, as we don't know for sure the size of any // instructions in the inline assembly. for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - computeBlockSize(I); + computeBlockSize(&*I); // Compute block offsets. - adjustBBOffsetsAfter(MF->begin()); + adjustBBOffsetsAfter(&MF->front()); // Now go back through the instructions and build up our data structures. for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); @@ -879,7 +879,7 @@ MachineBasicBlock *MipsConstantIslands::splitBlockBeforeInstr // Create a new MBB for the code after the OrigBB. MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); - MachineFunction::iterator MBBI = OrigBB; ++MBBI; + MachineFunction::iterator MBBI = ++OrigBB->getIterator(); MF->insert(MBBI, NewBB); // Splice the instructions starting with MI over to NewBB. @@ -967,8 +967,8 @@ bool MipsConstantIslands::isWaterInRange(unsigned UserOffset, unsigned CPELogAlign = getCPELogAlign(U.CPEMI); unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); unsigned NextBlockOffset, NextBlockAlignment; - MachineFunction::const_iterator NextBlock = Water; - if (++NextBlock == MF->end()) { + MachineFunction::const_iterator NextBlock = ++Water->getIterator(); + if (NextBlock == MF->end()) { NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); NextBlockAlignment = 0; } else { @@ -1261,7 +1261,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex, if (isOffsetInRange(UserOffset, CPEOffset, U)) { DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() << format(", expected CPE offset %#x\n", CPEOffset)); - NewMBB = std::next(MachineFunction::iterator(UserMBB)); + NewMBB = &*++UserMBB->getIterator(); // Add an unconditional branch from UserMBB to fallthrough block. Record // it for branch lengthening; this new branch will not get out of range, // but if the preceding conditional branch is out of range, the targets @@ -1371,8 +1371,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { NewWaterList.insert(NewIsland); // The new CPE goes before the following block (NewMBB). - NewMBB = std::next(MachineFunction::iterator(WaterBB)); - + NewMBB = &*++WaterBB->getIterator(); } else { // No water found. // we first see if a longer form of the instrucion could have reached @@ -1389,7 +1388,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { // next iteration for constant pools, but in this context, we don't want // it. Check for this so it will be removed from the WaterList. // Also remove any entry from NewWaterList. - MachineBasicBlock *WaterBB = std::prev(MachineFunction::iterator(NewMBB)); + MachineBasicBlock *WaterBB = &*--NewMBB->getIterator(); IP = std::find(WaterList.begin(), WaterList.end(), WaterBB); if (IP != WaterList.end()) NewWaterList.erase(WaterBB); @@ -1406,7 +1405,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { WaterList.erase(IP); // Okay, we know we can put an island before NewMBB now, do it! - MF->insert(NewMBB, NewIsland); + MF->insert(NewMBB->getIterator(), NewIsland); // Update internal data structures to account for the newly inserted MBB. updateForInsertedWaterBlock(NewIsland); @@ -1431,9 +1430,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { // Increase the size of the island block to account for the new entry. BBInfo[NewIsland->getNumber()].Size += Size; - adjustBBOffsetsAfter(std::prev(MachineFunction::iterator(NewIsland))); - - + adjustBBOffsetsAfter(&*--NewIsland->getIterator()); // Finally, change the CPI in the instruction operand to be ID. for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) @@ -1645,7 +1642,7 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) { MBB->back().eraseFromParent(); // BBInfo[SplitBB].Offset is wrong temporarily, fixed below } - MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB)); + MachineBasicBlock *NextBB = &*++MBB->getIterator(); DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() << " also invert condition and change dest. to BB#" diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td index b5d52ced9d3d..f959bd4d8db3 100644 --- a/lib/Target/Mips/MipsDSPInstrFormats.td +++ b/lib/Target/Mips/MipsDSPInstrFormats.td @@ -7,10 +7,30 @@ // //===----------------------------------------------------------------------===// +class DspMMRel; + +def Dsp2MicroMips : InstrMapping { + let FilterClass = "DspMMRel"; + // Instructions with the same BaseOpcode and isNVStore values form a row. + let RowFields = ["BaseOpcode"]; + // Instructions with the same predicate sense form a column. + let ColFields = ["Arch"]; + // The key column is the unpredicated instructions. + let KeyCol = ["dsp"]; + // Value columns are PredSense=true and PredSense=false + let ValueCols = [["dsp"], ["mmdsp"]]; +} + def HasDSP : Predicate<"Subtarget->hasDSP()">, AssemblerPredicate<"FeatureDSP">; def HasDSPR2 : Predicate<"Subtarget->hasDSPR2()">, AssemblerPredicate<"FeatureDSPR2">; +def HasDSPR3 : Predicate<"Subtarget->hasDSPR3()">, + AssemblerPredicate<"FeatureDSPR3">; + +class ISA_DSPR2 { + list InsnPredicates = [HasDSPR2]; +} // Fields. class Field6 val> { @@ -20,14 +40,22 @@ class Field6 val> { def SPECIAL3_OPCODE : Field6<0b011111>; def REGIMM_OPCODE : Field6<0b000001>; -class DSPInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> { - let Predicates = [HasDSP]; +class DSPInst + : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>, PredicateControl { + let InsnPredicates = [HasDSP]; + string BaseOpcode = opstr; + string Arch = "dsp"; } class PseudoDSP pattern, - InstrItinClass itin = IIPseudo>: - MipsPseudo { - let Predicates = [HasDSP]; + InstrItinClass itin = IIPseudo> + : MipsPseudo, PredicateControl { + let InsnPredicates = [HasDSP]; +} + +class DSPInstAlias + : InstAlias, PredicateControl { + let InsnPredicates = [HasDSP]; } // ADDU.QB sub-class format. diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index d26838404451..da6f174e2a19 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -12,9 +12,11 @@ //===----------------------------------------------------------------------===// // ImmLeaf +def immZExt1 : ImmLeaf(Imm);}]>; def immZExt2 : ImmLeaf(Imm);}]>; def immZExt3 : ImmLeaf(Imm);}]>; def immZExt4 : ImmLeaf(Imm);}]>; +def immZExt7 : ImmLeaf(Imm);}]>; def immZExt8 : ImmLeaf(Imm);}]>; def immZExt10 : ImmLeaf(Imm);}]>; def immSExt6 : ImmLeaf(Imm);}]>; @@ -263,6 +265,7 @@ class ADDU_QB_DESC_BASE Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))]; InstrItinClass Itinerary = itin; + string BaseOpcode = instr_asm; } class RADDU_W_QB_DESC_BASE Pattern = [(set ROD:$rd, (OpNode ROS:$rs))]; InstrItinClass Itinerary = itin; + string BaseOpcode = instr_asm; } class CMP_EQ_QB_R2_DESC_BASE Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))]; InstrItinClass Itinerary = itin; + string BaseOpcode = instr_asm; } class PRECR_SRA_PH_W_DESC_BASE Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, immZExt5:$sa))]; InstrItinClass Itinerary = itin; string Constraints = "$src = $rt"; + string BaseOpcode = instr_asm; } class ABSQ_S_PH_R2_DESC_BASE Pattern = [(set ROD:$rd, (OpNode ROT:$rt))]; InstrItinClass Itinerary = itin; + string BaseOpcode = instr_asm; } class REPL_DESC_BASE Pattern = [(set RO:$rd, (OpNode immPat:$imm))]; InstrItinClass Itinerary = itin; + string BaseOpcode = instr_asm; } class SHLL_QB_R3_DESC_BASE Pattern = [(set RO:$rd, (OpNode RO:$rt, GPR32Opnd:$rs_sa))]; InstrItinClass Itinerary = itin; + string BaseOpcode = instr_asm; } class SHLL_QB_R2_DESC_BASE { + RegisterOperand RO, Operand ImmOpnd> { dag OutOperandList = (outs RO:$rd); - dag InOperandList = (ins RO:$rt, uimm16:$rs_sa); + dag InOperandList = (ins RO:$rt, ImmOpnd:$rs_sa); string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa"); list Pattern = [(set RO:$rd, (OpNode RO:$rt, ImmPat:$rs_sa))]; InstrItinClass Itinerary = itin; bit hasSideEffects = 1; + string BaseOpcode = instr_asm; } class LX_DESC_BASE Pattern = [(set GPR32Opnd:$rd, (OpNode iPTR:$base, iPTR:$index))]; InstrItinClass Itinerary = itin; bit mayLoad = 1; + string BaseOpcode = instr_asm; } class ADDUH_QB_DESC_BASE Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))]; InstrItinClass Itinerary = itin; + string BaseOpcode = instr_asm; } class APPEND_DESC_BASE { + Operand ImmOp, SDPatternOperator Imm, InstrItinClass itin> { dag OutOperandList = (outs GPR32Opnd:$rt); - dag InOperandList = (ins GPR32Opnd:$rs, uimm5:$sa, GPR32Opnd:$src); + dag InOperandList = (ins GPR32Opnd:$rs, ImmOp:$sa, GPR32Opnd:$src); string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); list Pattern = [(set GPR32Opnd:$rt, - (OpNode GPR32Opnd:$src, GPR32Opnd:$rs, ImmOp:$sa))]; + (OpNode GPR32Opnd:$src, GPR32Opnd:$rs, Imm:$sa))]; InstrItinClass Itinerary = itin; string Constraints = "$src = $rt"; + string BaseOpcode = instr_asm; } class EXTR_W_TY1_R2_DESC_BASE { dag OutOperandList = (outs ACC64DSPOpnd:$ac); - dag InOperandList = (ins simm16:$shift, ACC64DSPOpnd:$acin); + dag InOperandList = (ins simm6:$shift, ACC64DSPOpnd:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $shift"); list Pattern = [(set ACC64DSPOpnd:$ac, (OpNode immSExt6:$shift, ACC64DSPOpnd:$acin))]; string Constraints = "$acin = $ac"; + string BaseOpcode = instr_asm; } class SHILO_R2_DESC_BASE { @@ -408,6 +424,7 @@ class SHILO_R2_DESC_BASE { list Pattern = [(set ACC64DSPOpnd:$ac, (OpNode GPR32Opnd:$rs, ACC64DSPOpnd:$acin))]; string Constraints = "$acin = $ac"; + string BaseOpcode = instr_asm; } class MTHLIP_DESC_BASE { @@ -417,6 +434,7 @@ class MTHLIP_DESC_BASE { list Pattern = [(set ACC64DSPOpnd:$ac, (OpNode GPR32Opnd:$rs, ACC64DSPOpnd:$acin))]; string Constraints = "$acin = $ac"; + string BaseOpcode = instr_asm; } class RDDSP_DESC_BASE Pattern = [(set GPR32Opnd:$rd, (OpNode immZExt10:$mask))]; InstrItinClass Itinerary = itin; + string BaseOpcode = instr_asm; } class WRDSP_DESC_BASE { dag OutOperandList = (outs); - dag InOperandList = (ins GPR32Opnd:$rs, uimm16:$mask); + dag InOperandList = (ins GPR32Opnd:$rs, uimm10:$mask); string AsmString = !strconcat(instr_asm, "\t$rs, $mask"); list Pattern = [(OpNode GPR32Opnd:$rs, immZExt10:$mask)]; InstrItinClass Itinerary = itin; + string BaseOpcode = instr_asm; } class DPA_W_PH_DESC_BASE { @@ -444,6 +464,7 @@ class DPA_W_PH_DESC_BASE { list Pattern = [(set ACC64DSPOpnd:$ac, (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin))]; string Constraints = "$acin = $ac"; + string BaseOpcode = instr_asm; } class MULT_DESC_BASE Pattern = [(set ACC64DSPOpnd:$ac, (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt))]; InstrItinClass Itinerary = itin; bit isCommutable = 1; + string BaseOpcode = instr_asm; } class MADD_DESC_BASE Pattern = [(set GPR32Opnd:$rd, (OpNode RO:$ac))]; InstrItinClass Itinerary = itin; + string BaseOpcode = instr_asm; } class MTHI_DESC_BASE { @@ -481,6 +505,7 @@ class MTHI_DESC_BASE dag InOperandList = (ins GPR32Opnd:$rs); string AsmString = !strconcat(instr_asm, "\t$rs, $ac"); InstrItinClass Itinerary = itin; + string BaseOpcode = instr_asm; } class BPOSGE32_PSEUDO_DESC_BASE : @@ -506,6 +531,7 @@ class INSV_DESC_BASE Pattern = [(set GPR32Opnd:$rt, (OpNode GPR32Opnd:$src, GPR32Opnd:$rs))]; InstrItinClass Itinerary = itin; string Constraints = "$src = $rt"; + string BaseOpcode = instr_asm; } //===----------------------------------------------------------------------===// @@ -639,7 +665,7 @@ class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra", // Shift class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", null_frag, immZExt3, - NoItinerary, DSPROpnd>, + NoItinerary, DSPROpnd, uimm3>, Defs<[DSPOutFlag22]>; class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb, @@ -647,13 +673,13 @@ class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb, Defs<[DSPOutFlag22]>; class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", null_frag, immZExt3, - NoItinerary, DSPROpnd>; + NoItinerary, DSPROpnd, uimm3>; class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb, NoItinerary, DSPROpnd>; class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", null_frag, immZExt4, - NoItinerary, DSPROpnd>, + NoItinerary, DSPROpnd, uimm4>, Defs<[DSPOutFlag22]>; class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph, @@ -661,7 +687,8 @@ class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph, Defs<[DSPOutFlag22]>; class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph, - immZExt4, NoItinerary, DSPROpnd>, + immZExt4, NoItinerary, DSPROpnd, + uimm4>, Defs<[DSPOutFlag22]>; class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph, @@ -669,19 +696,21 @@ class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph, Defs<[DSPOutFlag22]>; class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", null_frag, immZExt4, - NoItinerary, DSPROpnd>; + NoItinerary, DSPROpnd, uimm4>; class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph, NoItinerary, DSPROpnd>; class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph, - immZExt4, NoItinerary, DSPROpnd>; + immZExt4, NoItinerary, DSPROpnd, + uimm4>; class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph, NoItinerary, DSPROpnd>; class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w, - immZExt5, NoItinerary, GPR32Opnd>, + immZExt5, NoItinerary, GPR32Opnd, + uimm5>, Defs<[DSPOutFlag22]>; class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w, @@ -689,7 +718,8 @@ class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w, Defs<[DSPOutFlag22]>; class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w, - immZExt5, NoItinerary, GPR32Opnd>; + immZExt5, NoItinerary, GPR32Opnd, + uimm5>; class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w, NoItinerary, GPR32Opnd>; @@ -1039,32 +1069,33 @@ class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w", // Shift class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", null_frag, immZExt3, - NoItinerary, DSPROpnd>; + NoItinerary, DSPROpnd, uimm3>; class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb, NoItinerary, DSPROpnd>; class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb, - immZExt3, NoItinerary, DSPROpnd>; + immZExt3, NoItinerary, DSPROpnd, + uimm3>; class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb, NoItinerary, DSPROpnd>; class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", null_frag, immZExt4, - NoItinerary, DSPROpnd>; + NoItinerary, DSPROpnd, uimm4>; class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph, NoItinerary, DSPROpnd>; // Misc -class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5, +class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, immZExt5, NoItinerary>; -class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, immZExt2, +class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, immZExt2, NoItinerary>; -class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, immZExt5, - NoItinerary>; +class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, uimm5, + immZExt5, NoItinerary>; // Pseudos. def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE; def : IndexedLoadPat; } + +// Instruction alias. +let AdditionalPredicates = [NotInMicroMips] in { + def : DSPInstAlias<"wrdsp $rt", (WRDSP GPR32Opnd:$rt, 0x1F), 1>; +} diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index 4faeb3321621..8313d909df2a 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -355,9 +355,8 @@ void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB, for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) if (*SI != &SuccBB) - for (MachineBasicBlock::livein_iterator LI = (*SI)->livein_begin(), - LE = (*SI)->livein_end(); LI != LE; ++LI) - Uses.set(*LI); + for (const auto &LI : (*SI)->liveins()) + Uses.set(LI.PhysReg); } bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) { @@ -431,7 +430,7 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) { (*MI.memoperands_begin())->getPseudoValue()) { if (isa(PSV)) return false; - return !PSV->isConstant(nullptr) && PSV != PseudoSourceValue::getStack(); + return !PSV->isConstant(nullptr) && !PSV->isStack(); } return true; @@ -598,7 +597,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { // Get instruction with delay slot. MachineBasicBlock::instr_iterator DSI(I); - if (InMicroMipsMode && TII->GetInstSizeInBytes(std::next(DSI)) == 2 && + if (InMicroMipsMode && TII->GetInstSizeInBytes(&*std::next(DSI)) == 2 && DSI->isCall()) { // If instruction in delay slot is 16b change opcode to // corresponding instruction with short delay slot. @@ -713,8 +712,9 @@ bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const { if (DisableBackwardSearch) return false; - RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo()); - MemDefsUses MemDU(*TM.getDataLayout(), MBB.getParent()->getFrameInfo()); + auto *Fn = MBB.getParent(); + RegDefsUses RegDU(*Fn->getSubtarget().getRegisterInfo()); + MemDefsUses MemDU(Fn->getDataLayout(), Fn->getFrameInfo()); ReverseIter Filler; RegDU.init(*Slot); @@ -763,6 +763,7 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const { BB2BrMap BrMap; std::unique_ptr IM; Iter Filler; + auto *Fn = MBB.getParent(); // Iterate over SuccBB's predecessor list. for (MachineBasicBlock::pred_iterator PI = SuccBB->pred_begin(), @@ -772,15 +773,15 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const { // Do not allow moving instructions which have unallocatable register operands // across basic block boundaries. - RegDU.setUnallocatableRegs(*MBB.getParent()); + RegDU.setUnallocatableRegs(*Fn); // Only allow moving loads from stack or constants if any of the SuccBB's // predecessors have multiple successors. if (HasMultipleSuccs) { IM.reset(new LoadFromStackOrConst()); } else { - const MachineFrameInfo *MFI = MBB.getParent()->getFrameInfo(); - IM.reset(new MemDefsUses(*TM.getDataLayout(), MFI)); + const MachineFrameInfo *MFI = Fn->getFrameInfo(); + IM.reset(new MemDefsUses(Fn->getDataLayout(), MFI)); } if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Slot, @@ -800,12 +801,13 @@ MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const { // Select the successor with the larget edge weight. auto &Prob = getAnalysis(); - MachineBasicBlock *S = *std::max_element(B.succ_begin(), B.succ_end(), - [&](const MachineBasicBlock *Dst0, - const MachineBasicBlock *Dst1) { - return Prob.getEdgeWeight(&B, Dst0) < Prob.getEdgeWeight(&B, Dst1); - }); - return S->isLandingPad() ? nullptr : S; + MachineBasicBlock *S = *std::max_element( + B.succ_begin(), B.succ_end(), + [&](const MachineBasicBlock *Dst0, const MachineBasicBlock *Dst1) { + return Prob.getEdgeProbability(&B, Dst0) < + Prob.getEdgeProbability(&B, Dst1); + }); + return S->isEHPad() ? nullptr : S; } std::pair diff --git a/lib/Target/Mips/MipsEVAInstrFormats.td b/lib/Target/Mips/MipsEVAInstrFormats.td new file mode 100644 index 000000000000..11e191ad6d82 --- /dev/null +++ b/lib/Target/Mips/MipsEVAInstrFormats.td @@ -0,0 +1,84 @@ +//===- MipsEVAInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes Mips32r6 instruction formats. +// +//===----------------------------------------------------------------------===// + +class MipsEVAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>, + PredicateControl, StdArch { + let DecoderNamespace = "Mips"; + let EncodingPredicates = [HasStdEnc]; +} + +//===----------------------------------------------------------------------===// +// +// Field Values +// +//===----------------------------------------------------------------------===// + +// Memory Load/Store EVA +def OPCODE6_LBE : OPCODE6<0b101100>; +def OPCODE6_LBuE : OPCODE6<0b101000>; +def OPCODE6_LHE : OPCODE6<0b101101>; +def OPCODE6_LHuE : OPCODE6<0b101001>; +def OPCODE6_LWE : OPCODE6<0b101111>; + +def OPCODE6_SBE : OPCODE6<0b011100>; +def OPCODE6_SHE : OPCODE6<0b011101>; +def OPCODE6_SWE : OPCODE6<0b011111>; + +// load/store left/right EVA +def OPCODE6_LWLE : OPCODE6<0b011001>; +def OPCODE6_LWRE : OPCODE6<0b011010>; +def OPCODE6_SWLE : OPCODE6<0b100001>; +def OPCODE6_SWRE : OPCODE6<0b100010>; + +// Load-linked EVA, Store-conditional EVA +def OPCODE6_LLE : OPCODE6<0b101110>; +def OPCODE6_SCE : OPCODE6<0b011110>; + +def OPCODE6_TLBINV : OPCODE6<0b000011>; +def OPCODE6_TLBINVF : OPCODE6<0b000100>; + +def OPCODE6_CACHEE : OPCODE6<0b011011>; +def OPCODE6_PREFE : OPCODE6<0b100011>; + +def OPGROUP_COP0 : OPGROUP<0b010000>; + +//===----------------------------------------------------------------------===// +// +// Encoding Formats +// +//===----------------------------------------------------------------------===// + +class SPECIAL3_EVA_LOAD_STORE_FM : MipsEVAInst { + bits<21> addr; + bits<5> hint; + bits<5> base = addr{20-16}; + bits<9> offset = addr{8-0}; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_SPECIAL3.Value; + let Inst{25-21} = base; + let Inst{20-16} = hint; + let Inst{15-7} = offset; + let Inst{6} = 0; + let Inst{5-0} = Operation.Value; +} + +class TLB_FM : MipsEVAInst { + bits<32> Inst; + + let Inst{31-26} = OPGROUP_COP0.Value; + let Inst{25} = 1; // CO + let Inst{24-6} = 0; + let Inst{5-0} = Operation.Value; +} diff --git a/lib/Target/Mips/MipsEVAInstrInfo.td b/lib/Target/Mips/MipsEVAInstrInfo.td new file mode 100644 index 000000000000..36c9694cbadd --- /dev/null +++ b/lib/Target/Mips/MipsEVAInstrInfo.td @@ -0,0 +1,192 @@ +//===- MipsEVAInstrInfo.td - EVA ASE instructions -*- tablegen ------------*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes Mips EVA ASE instructions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// Instruction encodings +// +//===----------------------------------------------------------------------===// + +// Memory Load/Store EVA encodings +class LBE_ENC : SPECIAL3_EVA_LOAD_STORE_FM; +class LBuE_ENC : SPECIAL3_EVA_LOAD_STORE_FM; +class LHE_ENC : SPECIAL3_EVA_LOAD_STORE_FM; +class LHuE_ENC : SPECIAL3_EVA_LOAD_STORE_FM; +class LWE_ENC : SPECIAL3_EVA_LOAD_STORE_FM; + +class SBE_ENC : SPECIAL3_EVA_LOAD_STORE_FM; +class SHE_ENC : SPECIAL3_EVA_LOAD_STORE_FM; +class SWE_ENC : SPECIAL3_EVA_LOAD_STORE_FM; + +// load/store left/right EVA encodings +class LWLE_ENC : SPECIAL3_EVA_LOAD_STORE_FM; +class LWRE_ENC : SPECIAL3_EVA_LOAD_STORE_FM; +class SWLE_ENC : SPECIAL3_EVA_LOAD_STORE_FM; +class SWRE_ENC : SPECIAL3_EVA_LOAD_STORE_FM; + +// Load-linked EVA, Store-conditional EVA encodings +class LLE_ENC : SPECIAL3_EVA_LOAD_STORE_FM; +class SCE_ENC : SPECIAL3_EVA_LOAD_STORE_FM; + +class TLBINV_ENC : TLB_FM; +class TLBINVF_ENC : TLB_FM; + +class CACHEE_ENC : SPECIAL3_EVA_LOAD_STORE_FM; +class PREFE_ENC : SPECIAL3_EVA_LOAD_STORE_FM; + +//===----------------------------------------------------------------------===// +// +// Instruction descriptions +// +//===----------------------------------------------------------------------===// + +// Memory Load/Store EVA descriptions +class LOAD_EVA_DESC_BASE { + dag OutOperandList = (outs GPROpnd:$rt); + dag InOperandList = (ins mem_simm9:$addr); + string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); + list Pattern = []; + string DecoderMethod = "DecodeMemEVA"; + bit canFoldAsLoad = 1; + bit mayLoad = 1; +} + +class LBE_DESC : LOAD_EVA_DESC_BASE<"lbe", GPR32Opnd>; +class LBuE_DESC : LOAD_EVA_DESC_BASE<"lbue", GPR32Opnd>; +class LHE_DESC : LOAD_EVA_DESC_BASE<"lhe", GPR32Opnd>; +class LHuE_DESC : LOAD_EVA_DESC_BASE<"lhue", GPR32Opnd>; +class LWE_DESC : LOAD_EVA_DESC_BASE<"lwe", GPR32Opnd>; + +class STORE_EVA_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr); + string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); + list Pattern = []; + string DecoderMethod = "DecodeMemEVA"; + bit mayStore = 1; +} + +class SBE_DESC : STORE_EVA_DESC_BASE<"sbe", GPR32Opnd>; +class SHE_DESC : STORE_EVA_DESC_BASE<"she", GPR32Opnd>; +class SWE_DESC : STORE_EVA_DESC_BASE<"swe", GPR32Opnd>; + +// Load/Store Left/Right EVA descriptions +class LOAD_LEFT_RIGHT_EVA_DESC_BASE { + dag OutOperandList = (outs GPROpnd:$rt); + dag InOperandList = (ins mem_simm9:$addr, GPROpnd:$src); + string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); + list Pattern = []; + string DecoderMethod = "DecodeMemEVA"; + string Constraints = "$src = $rt"; + bit canFoldAsLoad = 1; +} + +class LWLE_DESC : LOAD_LEFT_RIGHT_EVA_DESC_BASE<"lwle", GPR32Opnd>; +class LWRE_DESC : LOAD_LEFT_RIGHT_EVA_DESC_BASE<"lwre", GPR32Opnd>; + +class STORE_LEFT_RIGHT_EVA_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr); + string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); + list Pattern = []; + string DecoderMethod = "DecodeMemEVA"; +} + +class SWLE_DESC : LOAD_LEFT_RIGHT_EVA_DESC_BASE<"swle", GPR32Opnd>; +class SWRE_DESC : LOAD_LEFT_RIGHT_EVA_DESC_BASE<"swre", GPR32Opnd>; + +// Load-linked EVA, Store-conditional EVA descriptions +class LLE_DESC_BASE { + dag OutOperandList = (outs GPROpnd:$rt); + dag InOperandList = (ins mem_simm9:$addr); + string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); + list Pattern = []; + bit mayLoad = 1; + string DecoderMethod = "DecodeMemEVA"; +} + +class LLE_DESC : LLE_DESC_BASE<"lle", GPR32Opnd>; + +class SCE_DESC_BASE { + dag OutOperandList = (outs GPROpnd:$dst); + dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr); + string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); + list Pattern = []; + bit mayStore = 1; + string Constraints = "$rt = $dst"; + string DecoderMethod = "DecodeMemEVA"; +} + +class SCE_DESC : SCE_DESC_BASE<"sce", GPR32Opnd>; + +class TLB_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins); + string AsmString = instr_asm; + list Pattern = []; +} + +class TLBINV_DESC : TLB_DESC_BASE<"tlbinv">; +class TLBINVF_DESC : TLB_DESC_BASE<"tlbinvf">; + +class CACHEE_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint); + string AsmString = !strconcat(instr_asm, "\t$hint, $addr"); + list Pattern = []; + string DecoderMethod = "DecodeCacheeOp_CacheOpR6"; +} + +class CACHEE_DESC : CACHEE_DESC_BASE<"cachee", mem>; +class PREFE_DESC : CACHEE_DESC_BASE<"prefe", mem>; + +//===----------------------------------------------------------------------===// +// +// Instruction definitions +// +//===----------------------------------------------------------------------===// + +/// Load and Store EVA Instructions +def LBE : LBE_ENC, LBE_DESC, INSN_EVA; +def LBuE : LBuE_ENC, LBuE_DESC, INSN_EVA; +def LHE : LHE_ENC, LHE_DESC, INSN_EVA; +def LHuE : LHuE_ENC, LHuE_DESC, INSN_EVA; +let AdditionalPredicates = [NotInMicroMips] in { +def LWE : LWE_ENC, LWE_DESC, INSN_EVA; +} +def SBE : SBE_ENC, SBE_DESC, INSN_EVA; +def SHE : SHE_ENC, SHE_DESC, INSN_EVA; +let AdditionalPredicates = [NotInMicroMips] in { +def SWE : SWE_ENC, SWE_DESC, INSN_EVA; +} + +/// load/store left/right EVA +let AdditionalPredicates = [NotInMicroMips] in { +def LWLE : LWLE_ENC, LWLE_DESC, INSN_EVA_NOT_32R6_64R6; +def LWRE : LWRE_ENC, LWRE_DESC, INSN_EVA_NOT_32R6_64R6; +def SWLE : SWLE_ENC, SWLE_DESC, INSN_EVA_NOT_32R6_64R6; +def SWRE : SWRE_ENC, SWRE_DESC, INSN_EVA_NOT_32R6_64R6; +} + +/// Load-linked EVA, Store-conditional EVA +let AdditionalPredicates = [NotInMicroMips] in { +def LLE : LLE_ENC, LLE_DESC, INSN_EVA; +def SCE : SCE_ENC, SCE_DESC, INSN_EVA; +} + +def TLBINV : TLBINV_ENC, TLBINV_DESC, INSN_EVA; +def TLBINVF : TLBINVF_ENC, TLBINVF_DESC, INSN_EVA; + +def CACHEE : CACHEE_ENC, CACHEE_DESC, INSN_EVA; +def PREFE : PREFE_ENC, PREFE_DESC, INSN_EVA; diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index 5152a072b3a2..e9eaf810637a 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -192,10 +192,10 @@ public: TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()) { MFI = funcInfo.MF->getInfo(); Context = &funcInfo.Fn->getContext(); + bool ISASupported = !Subtarget->hasMips32r6() && Subtarget->hasMips32(); TargetSupported = - ((TM.getRelocationModel() == Reloc::PIC_) && - ((Subtarget->hasMips32r2() || Subtarget->hasMips32()) && - (static_cast(TM).getABI().IsO32()))); + ISASupported && (TM.getRelocationModel() == Reloc::PIC_) && + (static_cast(TM).getABI().IsO32()); UnsupportedFPMode = Subtarget->isFP64bit(); } @@ -236,32 +236,36 @@ unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, std::swap(LHS, RHS); unsigned Opc; - if (ISDOpc == ISD::AND) { + switch (ISDOpc) { + case ISD::AND: Opc = Mips::AND; - } else if (ISDOpc == ISD::OR) { + break; + case ISD::OR: Opc = Mips::OR; - } else if (ISDOpc == ISD::XOR) { + break; + case ISD::XOR: Opc = Mips::XOR; - } else + break; + default: llvm_unreachable("unexpected opcode"); + } unsigned LHSReg = getRegForValue(LHS); - unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); - if (!ResultReg) - return 0; - - unsigned RHSReg; if (!LHSReg) return 0; + unsigned RHSReg; if (const auto *C = dyn_cast(RHS)) RHSReg = materializeInt(C, MVT::i32); else RHSReg = getRegForValue(RHS); - if (!RHSReg) return 0; + unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); + if (!ResultReg) + return 0; + emitInst(Opc, ResultReg).addReg(LHSReg).addReg(RHSReg); return ResultReg; } @@ -747,7 +751,7 @@ bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr, unsigned Offset = Addr.getOffset(); MachineFrameInfo &MFI = *MF->getFrameInfo(); MachineMemOperand *MMO = MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad, + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addFrameIndex(FI) @@ -798,7 +802,7 @@ bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr, unsigned Offset = Addr.getOffset(); MachineFrameInfo &MFI = *MF->getFrameInfo(); MachineMemOperand *MMO = MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad, + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg) @@ -912,8 +916,7 @@ bool MipsFastISel::selectBranch(const Instruction *I) { BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ)) .addReg(CondReg) .addMBB(TBB); - fastEmitBranch(FBB, DbgLoc); - FuncInfo.MBB->addSuccessor(TBB); + finishCondBranch(BI->getParent(), TBB, FBB); return true; } return false; @@ -1057,22 +1060,16 @@ bool MipsFastISel::selectFPToInt(const Instruction *I, bool IsSigned) { // entirely within FPRs. unsigned DestReg = createResultReg(&Mips::GPR32RegClass); unsigned TempReg = createResultReg(&Mips::FGR32RegClass); - unsigned Opc; - - if (SrcVT == MVT::f32) - Opc = Mips::TRUNC_W_S; - else - Opc = Mips::TRUNC_W_D32; + unsigned Opc = (SrcVT == MVT::f32) ? Mips::TRUNC_W_S : Mips::TRUNC_W_D32; // Generate the convert. emitInst(Opc, TempReg).addReg(SrcReg); - emitInst(Mips::MFC1, DestReg).addReg(TempReg); updateValueMap(I, DestReg); return true; } -// + bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl &OutVTs, unsigned &NumBytes) { @@ -1196,7 +1193,7 @@ bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI, unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getStack(Addr.getOffset()), + MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); (void)(MMO); // if (!emitStore(ArgVT, ArgReg, Addr, MMO)) @@ -1607,19 +1604,23 @@ bool MipsFastISel::emitIntSExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool MipsFastISel::emitIntZExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg) { + int64_t Imm; + switch (SrcVT.SimpleTy) { default: return false; case MVT::i1: - emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(1); + Imm = 1; break; case MVT::i8: - emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(0xff); + Imm = 0xff; break; case MVT::i16: - emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(0xffff); + Imm = 0xffff; break; } + + emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(Imm); return true; } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index fab2fdfef8cf..6756c1702f76 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -117,6 +117,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::GPRel: return "MipsISD::GPRel"; case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer"; case MipsISD::Ret: return "MipsISD::Ret"; + case MipsISD::ERet: return "MipsISD::ERet"; case MipsISD::EH_RETURN: return "MipsISD::EH_RETURN"; case MipsISD::FPBrcond: return "MipsISD::FPBrcond"; case MipsISD::FPCmp: return "MipsISD::FPCmp"; @@ -390,10 +391,10 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); - setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); + if (!Subtarget.isGP64bit()) { + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); + } setInsertFencesForAtomic(true); @@ -437,9 +438,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setStackPointerRegisterToSaveRestore(ABI.IsN64() ? Mips::SP_64 : Mips::SP); - setExceptionPointerRegister(ABI.IsN64() ? Mips::A0_64 : Mips::A0); - setExceptionSelectorRegister(ABI.IsN64() ? Mips::A1_64 : Mips::A1); - MaxStoresPerMemcpy = 16; isMicroMips = Subtarget.inMicroMipsMode(); @@ -836,6 +834,14 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return SDValue(); } +bool MipsTargetLowering::isCheapToSpeculateCttz() const { + return Subtarget.hasMips32(); +} + +bool MipsTargetLowering::isCheapToSpeculateCtlz() const { + return Subtarget.hasMips32(); +} + void MipsTargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, @@ -1092,8 +1098,7 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); MF->insert(It, loopMBB); MF->insert(It, exitMBB); @@ -1204,8 +1209,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword( MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); MF->insert(It, loopMBB); MF->insert(It, sinkMBB); MF->insert(It, exitMBB); @@ -1330,15 +1334,20 @@ MachineBasicBlock * MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI, DebugLoc DL = MI->getDebugLoc(); unsigned LL, SC, ZERO, BNE, BEQ; - if (Size == 4) { - LL = isMicroMips ? Mips::LL_MM : Mips::LL; - SC = isMicroMips ? Mips::SC_MM : Mips::SC; + if (Size == 4) { + if (isMicroMips) { + LL = Mips::LL_MM; + SC = Mips::SC_MM; + } else { + LL = Subtarget.hasMips32r6() ? Mips::LL_R6 : Mips::LL; + SC = Subtarget.hasMips32r6() ? Mips::SC_R6 : Mips::SC; + } ZERO = Mips::ZERO; BNE = Mips::BNE; BEQ = Mips::BEQ; } else { - LL = Mips::LLD; - SC = Mips::SCD; + LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD; + SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD; ZERO = Mips::ZERO_64; BNE = Mips::BNE64; BEQ = Mips::BEQ64; @@ -1356,8 +1365,7 @@ MachineBasicBlock * MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); MF->insert(It, loop1MBB); MF->insert(It, loop2MBB); MF->insert(It, exitMBB); @@ -1440,8 +1448,7 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI, MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); MF->insert(It, loop1MBB); MF->insert(It, loop2MBB); MF->insert(It, sinkMBB); @@ -1586,9 +1593,10 @@ SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue Addr = DAG.getNode(ISD::ADD, DL, PTy, Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); - Addr = DAG.getExtLoad(ISD::SEXTLOAD, DL, PTy, Chain, Addr, - MachinePointerInfo::getJumpTable(), MemVT, false, false, - false, 0); + Addr = + DAG.getExtLoad(ISD::SEXTLOAD, DL, PTy, Chain, Addr, + MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), + MemVT, false, false, false, 0); Chain = Addr.getValue(1); if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) || ABI.IsN64()) { @@ -1690,14 +1698,15 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64()); if (LargeGOT) - return getAddrGlobalLargeGOT(N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16, - MipsII::MO_GOT_LO16, DAG.getEntryNode(), - MachinePointerInfo::getGOT()); + return getAddrGlobalLargeGOT( + N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16, MipsII::MO_GOT_LO16, + DAG.getEntryNode(), + MachinePointerInfo::getGOT(DAG.getMachineFunction())); - return getAddrGlobal(N, SDLoc(N), Ty, DAG, - (ABI.IsN32() || ABI.IsN64()) ? MipsII::MO_GOT_DISP - : MipsII::MO_GOT16, - DAG.getEntryNode(), MachinePointerInfo::getGOT()); + return getAddrGlobal( + N, SDLoc(N), Ty, DAG, + (ABI.IsN32() || ABI.IsN64()) ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16, + DAG.getEntryNode(), MachinePointerInfo::getGOT(DAG.getMachineFunction())); } SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op, @@ -1719,6 +1728,9 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const // Local Exec TLS Model. GlobalAddressSDNode *GA = cast(Op); + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); + SDLoc DL(GA); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -1813,7 +1825,8 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const static_cast( getTargetMachine().getObjFileLowering()); - if (TLOF->IsConstantInSmallSection(N->getConstVal(), getTargetMachine())) + if (TLOF->IsConstantInSmallSection(DAG.getDataLayout(), N->getConstVal(), + getTargetMachine())) // %gp_rel relocation return getAddrGPRel(N, SDLoc(N), Ty, DAG); @@ -2946,8 +2959,12 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1); - Function::const_arg_iterator FuncArg = - DAG.getMachineFunction().getFunction()->arg_begin(); + const Function *Func = DAG.getMachineFunction().getFunction(); + Function::const_arg_iterator FuncArg = Func->arg_begin(); + + if (Func->hasFnAttribute("interrupt") && !Func->arg_empty()) + report_fatal_error( + "Functions with the interrupt attribute cannot have arguments!"); CCInfo.AnalyzeFormalArguments(Ins, CC_Mips_FixedArg); MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(), @@ -3019,7 +3036,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // We ought to be able to use LocVT directly but O32 sets it to i32 // when allocating floating point values to integer registers. // This shouldn't influence how we load the value into registers unless - // we are targetting softfloat. + // we are targeting softfloat. if (VA.getValVT().isFloatingPoint() && !Subtarget.useSoftFloat()) LocVT = VA.getValVT(); } @@ -3033,9 +3050,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // Create load nodes to retrieve arguments from the stack SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); - SDValue ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, false, 0); + SDValue ArgValue = DAG.getLoad( + LocVT, DL, Chain, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), + false, false, false, 0); OutChains.push_back(ArgValue.getValue(1)); ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG); @@ -3098,8 +3116,20 @@ MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const } SDValue -MipsTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool IsVarArg, +MipsTargetLowering::LowerInterruptReturn(SmallVectorImpl &RetOps, + SDLoc DL, SelectionDAG &DAG) const { + + MachineFunction &MF = DAG.getMachineFunction(); + MipsFunctionInfo *MipsFI = MF.getInfo(); + + MipsFI->setISR(); + + return DAG.getNode(MipsISD::ERet, DL, MVT::Other, RetOps); +} + +SDValue +MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, SDLoc DL, SelectionDAG &DAG) const { @@ -3192,7 +3222,11 @@ MipsTargetLowering::LowerReturn(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); - // Return on Mips is always a "jr $ra" + // ISRs must use "eret". + if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt")) + return LowerInterruptReturn(RetOps, DL, DAG); + + // Standard return on Mips is a "jr $ra" return DAG.getNode(MipsISD::Ret, DL, MVT::Other, RetOps); } @@ -3300,7 +3334,7 @@ static std::pair parsePhysicalReg(StringRef C, StringRef &Prefix, // Search for the first numeric character. StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1; - I = std::find_if(B, E, std::ptr_fun(isdigit)); + I = std::find_if(B, E, isdigit); Prefix = StringRef(B, I - B); @@ -3669,7 +3703,7 @@ void MipsTargetLowering::passByValArg( unsigned NumRegs = LastReg - FirstReg; if (NumRegs) { - const ArrayRef ArgRegs = ABI.GetByValArgRegs(); + ArrayRef ArgRegs = ABI.GetByValArgRegs(); bool LeftoverBytes = (NumRegs * RegSizeInBytes > ByValSizeInBytes); unsigned I = 0; @@ -3755,7 +3789,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector &OutChains, SDValue Chain, SDLoc DL, SelectionDAG &DAG, CCState &State) const { - const ArrayRef ArgRegs = ABI.GetVarArgRegs(); + ArrayRef ArgRegs = ABI.GetVarArgRegs(); unsigned Idx = State.getFirstUnallocated(ArgRegs); unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); @@ -3812,7 +3846,7 @@ void MipsTargetLowering::HandleByVal(CCState *State, unsigned &Size, if (State->getCallingConv() != CallingConv::Fast) { unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); - const ArrayRef IntArgRegs = ABI.GetByValArgRegs(); + ArrayRef IntArgRegs = ABI.GetByValArgRegs(); // FIXME: The O32 case actually describes no shadow registers. const MCPhysReg *ShadowRegs = ABI.IsO32() ? IntArgRegs.data() : Mips64DPRegs; @@ -3860,8 +3894,7 @@ MipsTargetLowering::emitPseudoSELECT(MachineInstr *MI, MachineBasicBlock *BB, // destination vreg to set, the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); // thisMBB: // ... diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index b3d861d34da7..b33e125b81b7 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -67,6 +67,10 @@ namespace llvm { // Return Ret, + // Interrupt, exception, error trap Return + ERet, + + // Software Exception Return. EH_RETURN, // Node used to extract integer from accumulator. @@ -231,6 +235,9 @@ namespace llvm { return MVT::i32; } + bool isCheapToSpeculateCttz() const override; + bool isCheapToSpeculateCtlz() const override; + void LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const override; @@ -258,17 +265,25 @@ namespace llvm { EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const override; - struct LTStr { - bool operator()(const char *S1, const char *S2) const { - return strcmp(S1, S2) < 0; - } - }; - void HandleByVal(CCState *, unsigned &, unsigned) const override; unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + unsigned + getExceptionPointerRegister(const Constant *PersonalityFn) const override { + return ABI.IsN64() ? Mips::A0_64 : Mips::A0; + } + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + unsigned + getExceptionSelectorRegister(const Constant *PersonalityFn) const override { + return ABI.IsN64() ? Mips::A1_64 : Mips::A1; + } + /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { // Mips doesn't have any special address spaces so we just reserve @@ -290,9 +305,10 @@ namespace llvm { unsigned GOTFlag = IsN32OrN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), getTargetNode(N, Ty, DAG, GOTFlag)); - SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT, - MachinePointerInfo::getGOT(), false, false, - false, 0); + SDValue Load = + DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT, + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + false, false, false, 0); unsigned LoFlag = IsN32OrN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; SDValue Lo = DAG.getNode(MipsISD::Lo, DL, Ty, getTargetNode(N, Ty, DAG, LoFlag)); @@ -487,6 +503,9 @@ namespace llvm { const SmallVectorImpl &OutVals, SDLoc dl, SelectionDAG &DAG) const override; + SDValue LowerInterruptReturn(SmallVectorImpl &RetOps, SDLoc DL, + SelectionDAG &DAG) const; + bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; // Inline asm support diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index cb912253b28c..377260f89d10 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -136,7 +136,7 @@ multiclass ABSS_M { def _D32 : MMRel, ABSS_FT, FGR_32; - def _D64 : ABSS_FT, FGR_64 { + def _D64 : StdMMR6Rel, ABSS_FT, FGR_64 { let DecoderNamespace = "Mips64"; } } @@ -267,24 +267,25 @@ defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6, //===----------------------------------------------------------------------===// // Floating Point Instructions //===----------------------------------------------------------------------===// -def ROUND_W_S : MMRel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>, +def ROUND_W_S : MMRel, StdMMR6Rel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>, ABSS_FM<0xc, 16>, ISA_MIPS2; -def TRUNC_W_S : MMRel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>, +defm ROUND_W : ROUND_M<"round.w.d", II_ROUND>, ABSS_FM<0xc, 17>, ISA_MIPS2; +def TRUNC_W_S : MMRel, StdMMR6Rel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>, ABSS_FM<0xd, 16>, ISA_MIPS2; -def CEIL_W_S : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>, +def CEIL_W_S : MMRel, StdMMR6Rel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>, ABSS_FM<0xe, 16>, ISA_MIPS2; -def FLOOR_W_S : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>, +def FLOOR_W_S : MMRel, StdMMR6Rel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>, ABSS_FM<0xf, 16>, ISA_MIPS2; def CVT_W_S : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>, ABSS_FM<0x24, 16>; -defm ROUND_W : ROUND_M<"round.w.d", II_ROUND>, ABSS_FM<0xc, 17>, ISA_MIPS2; defm TRUNC_W : ROUND_M<"trunc.w.d", II_TRUNC>, ABSS_FM<0xd, 17>, ISA_MIPS2; defm CEIL_W : ROUND_M<"ceil.w.d", II_CEIL>, ABSS_FM<0xe, 17>, ISA_MIPS2; defm FLOOR_W : ROUND_M<"floor.w.d", II_FLOOR>, ABSS_FM<0xf, 17>, ISA_MIPS2; defm CVT_W : ROUND_M<"cvt.w.d", II_CVT>, ABSS_FM<0x24, 17>; let DecoderNamespace = "Mips64" in { + let AdditionalPredicates = [NotInMicroMips] in { def ROUND_L_S : ABSS_FT<"round.l.s", FGR64Opnd, FGR32Opnd, II_ROUND>, ABSS_FM<0x8, 16>, FGR_64; def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64Opnd, FGR64Opnd, II_ROUND>, @@ -301,14 +302,17 @@ let DecoderNamespace = "Mips64" in { ABSS_FM<0xb, 16>, FGR_64; def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64Opnd, FGR64Opnd, II_FLOOR>, ABSS_FM<0xb, 17>, FGR_64; + } } def CVT_S_W : MMRel, ABSS_FT<"cvt.s.w", FGR32Opnd, FGR32Opnd, II_CVT>, ABSS_FM<0x20, 20>; -def CVT_L_S : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>, - ABSS_FM<0x25, 16>, INSN_MIPS3_32R2; -def CVT_L_D64: MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>, - ABSS_FM<0x25, 17>, INSN_MIPS3_32R2; +let AdditionalPredicates = [NotInMicroMips] in{ + def CVT_L_S : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>, + ABSS_FM<0x25, 16>, INSN_MIPS3_32R2; + def CVT_L_D64: MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>, + ABSS_FM<0x25, 17>, INSN_MIPS3_32R2; +} def CVT_S_D32 : MMRel, ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, II_CVT>, ABSS_FM<0x20, 17>, FGR_32; @@ -320,8 +324,10 @@ def CVT_D32_S : MMRel, ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, II_CVT>, let DecoderNamespace = "Mips64" in { def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32Opnd, FGR64Opnd, II_CVT>, ABSS_FM<0x20, 17>, FGR_64; - def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, II_CVT>, - ABSS_FM<0x20, 21>, FGR_64; + let AdditionalPredicates = [NotInMicroMips] in{ + def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, II_CVT>, + ABSS_FM<0x20, 21>, FGR_64; + } def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64Opnd, FGR32Opnd, II_CVT>, ABSS_FM<0x21, 20>, FGR_64; def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64Opnd, FGR32Opnd, II_CVT>, @@ -345,8 +351,8 @@ def FNEG_S : MMRel, ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>, defm FABS : ABSS_M<"abs.d", II_ABS, fabs>, ABSS_FM<0x5, 17>; defm FNEG : ABSS_M<"neg.d", II_NEG, fneg>, ABSS_FM<0x7, 17>; -def FSQRT_S : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S, fsqrt>, - ABSS_FM<0x4, 16>, ISA_MIPS2; +def FSQRT_S : MMRel, StdMMR6Rel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, + II_SQRT_S, fsqrt>, ABSS_FM<0x4, 16>, ISA_MIPS2; defm FSQRT : ABSS_M<"sqrt.d", II_SQRT_D, fsqrt>, ABSS_FM<0x4, 17>, ISA_MIPS2; // The odd-numbered registers are only referenced when doing loads, @@ -503,13 +509,13 @@ let AdditionalPredicates = [NoNaNsFPMath], def MIPS_BRANCH_F : PatLeaf<(i32 0)>; def MIPS_BRANCH_T : PatLeaf<(i32 1)>; -def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, IIBranch, MIPS_BRANCH_F>, +def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, II_BC1F, MIPS_BRANCH_F>, BC1F_FM<0, 0>, ISA_MIPS1_NOT_32R6_64R6; -def BC1FL : MMRel, BC1F_FT<"bc1fl", brtarget, IIBranch, MIPS_BRANCH_F, 0>, +def BC1FL : MMRel, BC1F_FT<"bc1fl", brtarget, II_BC1FL, MIPS_BRANCH_F, 0>, BC1F_FM<1, 0>, ISA_MIPS2_NOT_32R6_64R6; -def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, IIBranch, MIPS_BRANCH_T>, +def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, II_BC1T, MIPS_BRANCH_T>, BC1F_FM<0, 1>, ISA_MIPS1_NOT_32R6_64R6; -def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, IIBranch, MIPS_BRANCH_T, 0>, +def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, II_BC1TL, MIPS_BRANCH_T, 0>, BC1F_FM<1, 1>, ISA_MIPS2_NOT_32R6_64R6; /// Floating Point Compare diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 5f4fcc354616..45baf27be518 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -132,7 +132,7 @@ class PseudoSE pattern, // These are aliases that require C++ handling to convert to the target // instruction, while InstAliases can be handled directly by tblgen. class MipsAsmPseudoInst: - MipsInst { + MipsInst, PredicateControl { let isPseudo = 1; let Pattern = []; } @@ -644,16 +644,16 @@ class BRK_FM funct> : StdArch // Exception return format //===----------------------------------------------------------------------===// -class ER_FM funct> : StdArch +class ER_FM funct, bit LLBit> : StdArch { bits<32> Inst; let Inst{31-26} = 0x10; let Inst{25} = 1; - let Inst{24-6} = 0; + let Inst{24-7} = 0; + let Inst{6} = LLBit; let Inst{5-0} = funct; } - //===----------------------------------------------------------------------===// // Enable/disable interrupt instruction format //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index bb23cc04e696..b1d69506c16f 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -60,8 +60,8 @@ MachineMemOperand *MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI, MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), Flag, - MFI.getObjectSize(FI), Align); + return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), + Flag, MFI.getObjectSize(FI), Align); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index ab98c9054e74..d9fb8c890739 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -77,6 +77,9 @@ def MipsThreadPointer: SDNode<"MipsISD::ThreadPointer", SDT_MipsThreadPointer>; def MipsRet : SDNode<"MipsISD::Ret", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def MipsERet : SDNode<"MipsISD::ERet", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPSideEffect]>; + // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart, [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; @@ -157,7 +160,7 @@ def HasMips3 : Predicate<"Subtarget->hasMips3()">, def HasMips4_32 : Predicate<"Subtarget->hasMips4_32()">, AssemblerPredicate<"FeatureMips4_32">; def NotMips4_32 : Predicate<"!Subtarget->hasMips4_32()">, - AssemblerPredicate<"FeatureMips4_32">; + AssemblerPredicate<"!FeatureMips4_32">; def HasMips4_32r2 : Predicate<"Subtarget->hasMips4_32r2()">, AssemblerPredicate<"FeatureMips4_32r2">; def HasMips5_32r2 : Predicate<"Subtarget->hasMips5_32r2()">, @@ -166,6 +169,8 @@ def HasMips32 : Predicate<"Subtarget->hasMips32()">, AssemblerPredicate<"FeatureMips32">; def HasMips32r2 : Predicate<"Subtarget->hasMips32r2()">, AssemblerPredicate<"FeatureMips32r2">; +def HasMips32r5 : Predicate<"Subtarget->hasMips32r5()">, + AssemblerPredicate<"FeatureMips32r5">; def HasMips32r6 : Predicate<"Subtarget->hasMips32r6()">, AssemblerPredicate<"FeatureMips32r6">; def NotMips32r6 : Predicate<"!Subtarget->hasMips32r6()">, @@ -176,6 +181,8 @@ def IsGP32bit : Predicate<"!Subtarget->isGP64bit()">, AssemblerPredicate<"!FeatureGP64Bit">; def HasMips64 : Predicate<"Subtarget->hasMips64()">, AssemblerPredicate<"FeatureMips64">; +def NotMips64 : Predicate<"!Subtarget->hasMips64()">, + AssemblerPredicate<"!FeatureMips64">; def HasMips64r2 : Predicate<"Subtarget->hasMips64r2()">, AssemblerPredicate<"FeatureMips64r2">; def HasMips64r6 : Predicate<"Subtarget->hasMips64r6()">, @@ -184,6 +191,8 @@ def NotMips64r6 : Predicate<"!Subtarget->hasMips64r6()">, AssemblerPredicate<"!FeatureMips64r6">; def HasMicroMips32r6 : Predicate<"Subtarget->inMicroMips32r6Mode()">, AssemblerPredicate<"FeatureMicroMips,FeatureMips32r6">; +def HasMicroMips64r6 : Predicate<"Subtarget->inMicroMips64r6Mode()">, + AssemblerPredicate<"FeatureMicroMips,FeatureMips64r6">; def InMips16Mode : Predicate<"Subtarget->inMips16Mode()">, AssemblerPredicate<"FeatureMips16">; def HasCnMips : Predicate<"Subtarget->hasCnMips()">, @@ -201,6 +210,12 @@ def NotInMicroMips : Predicate<"!Subtarget->inMicroMipsMode()">, def IsLE : Predicate<"Subtarget->isLittle()">; def IsBE : Predicate<"!Subtarget->isLittle()">; def IsNotNaCl : Predicate<"!Subtarget->isTargetNaCl()">; +def UseTCCInDIV : AssemblerPredicate<"FeatureUseTCCInDIV">; +def HasEVA : Predicate<"Subtarget->hasEVA()">, + AssemblerPredicate<"FeatureEVA,FeatureMips32r2">; +def HasMSA : Predicate<"Subtarget->hasMSA()">, + AssemblerPredicate<"FeatureMSA">; + //===----------------------------------------------------------------------===// // Mips GPR size adjectives. @@ -242,6 +257,7 @@ class ISA_MIPS32R2 { list InsnPredicates = [HasMips32r2]; } class ISA_MIPS32R2_NOT_32R6_64R6 { list InsnPredicates = [HasMips32r2, NotMips32r6, NotMips64r6]; } +class ISA_MIPS32R5 { list InsnPredicates = [HasMips32r5]; } class ISA_MIPS64 { list InsnPredicates = [HasMips64]; } class ISA_MIPS64_NOT_64R6 { list InsnPredicates = [HasMips64, NotMips64r6]; @@ -249,9 +265,21 @@ class ISA_MIPS64_NOT_64R6 { class ISA_MIPS64R2 { list InsnPredicates = [HasMips64r2]; } class ISA_MIPS32R6 { list InsnPredicates = [HasMips32r6]; } class ISA_MIPS64R6 { list InsnPredicates = [HasMips64r6]; } +class ISA_MICROMIPS { list InsnPredicates = [InMicroMips]; } class ISA_MICROMIPS32R6 { list InsnPredicates = [HasMicroMips32r6]; } +class ISA_MICROMIPS64R6 { + list InsnPredicates = [HasMicroMips64r6]; +} +class ISA_MICROMIPS32_NOT_MIPS32R6 { + list InsnPredicates = [InMicroMips, NotMips32r6]; +} + +class INSN_EVA { list InsnPredicates = [HasEVA]; } +class INSN_EVA_NOT_32R6_64R6 { + list InsnPredicates = [NotMips32r6, NotMips64r6, HasEVA]; +} // The portions of MIPS-III that were also added to MIPS32 class INSN_MIPS3_32 { list InsnPredicates = [HasMips3_32]; } @@ -283,6 +311,28 @@ class INSN_MIPS5_32R2_NOT_32R6_64R6 { list InsnPredicates = [HasMips5_32r2, NotMips32r6, NotMips64r6]; } +class ASE_CNMIPS { + list InsnPredicates = [HasCnMips]; +} + +class ASE_MSA { + list InsnPredicates = [HasMSA]; +} + +class ASE_MSA_NOT_MSA64 { + list InsnPredicates = [HasMSA, NotMips64]; +} + +class ASE_MSA64 { + list InsnPredicates = [HasMSA, HasMips64]; +} + +// Class used for separating microMIPSr6 and microMIPS (r3) instruction. +// It can be used only on instructions that doesn't inherit PredicateControl. +class ISA_MICROMIPS_NOT_32R6_64R6 : PredicateControl { + let InsnPredicates = [InMicroMips, NotMips32r6, NotMips64r6]; +} + //===----------------------------------------------------------------------===// class MipsPat : Pat, PredicateControl { @@ -335,6 +385,81 @@ include "MipsInstrFormats.td" // Mips Operand, Complex Patterns and Transformations Definitions. //===----------------------------------------------------------------------===// +class ConstantSImmAsmOperandClass Supers = []> + : AsmOperandClass { + let Name = "ConstantSImm" # Bits; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isConstantSImm<" # Bits # ">"; + let SuperClasses = Supers; + let DiagnosticType = "SImm" # Bits; +} + +class ConstantUImmAsmOperandClass Supers = [], + int Offset = 0> : AsmOperandClass { + let Name = "ConstantUImm" # Bits # "_" # Offset; + let RenderMethod = "addConstantUImmOperands<" # Bits # ", " # Offset # ">"; + let PredicateMethod = "isConstantUImm<" # Bits # ", " # Offset # ">"; + let SuperClasses = Supers; + let DiagnosticType = "UImm" # Bits # "_" # Offset; +} + +def ConstantUImm10AsmOperandClass + : ConstantUImmAsmOperandClass<10, []>; +def ConstantUImm8AsmOperandClass + : ConstantUImmAsmOperandClass<8, [ConstantUImm10AsmOperandClass]>; +def ConstantUImm7AsmOperandClass + : ConstantUImmAsmOperandClass<7, [ConstantUImm8AsmOperandClass]>; +def ConstantUImm6AsmOperandClass + : ConstantUImmAsmOperandClass<6, [ConstantUImm7AsmOperandClass]>; +def ConstantSImm6AsmOperandClass + : ConstantSImmAsmOperandClass<6, [ConstantUImm7AsmOperandClass]>; +def ConstantUImm5Plus1AsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 1>; +def ConstantUImm5Plus32AsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 32>; +def ConstantUImm5Plus33AsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 33>; +def ConstantUImm5Plus32NormalizeAsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 32> { + let Name = "ConstantUImm5_32_Norm"; + // We must also subtract 32 when we render the operand. + let RenderMethod = "addConstantUImmOperands<5, 32, -32>"; +} +def ConstantUImm5Lsl2AsmOperandClass : AsmOperandClass { + let Name = "UImm5Lsl2"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledUImm<5, 2>"; + let SuperClasses = [ConstantUImm6AsmOperandClass]; + let DiagnosticType = "UImm5_Lsl2"; +} +def ConstantUImm5ReportUImm6AsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass]> { + let Name = "ConstantUImm5_0_Report_UImm6"; + let DiagnosticType = "UImm5_0_Report_UImm6"; +} +def ConstantUImm5AsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass]>; +def ConstantUImm4AsmOperandClass + : ConstantUImmAsmOperandClass< + 4, [ConstantUImm5AsmOperandClass, + ConstantUImm5Plus32AsmOperandClass, + ConstantUImm5Plus32NormalizeAsmOperandClass]>; +def ConstantUImm3AsmOperandClass + : ConstantUImmAsmOperandClass<3, [ConstantUImm4AsmOperandClass]>; +def ConstantUImm2Plus1AsmOperandClass + : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass], 1>; +def ConstantUImm2AsmOperandClass + : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass]>; +def ConstantUImm1AsmOperandClass + : ConstantUImmAsmOperandClass<1, [ConstantUImm2AsmOperandClass]>; +def ConstantImmzAsmOperandClass : AsmOperandClass { + let Name = "ConstantImmz"; + let RenderMethod = "addConstantUImmOperands<1>"; + let PredicateMethod = "isConstantImmz"; + let SuperClasses = [ConstantUImm1AsmOperandClass]; + let DiagnosticType = "Immz"; +} + def MipsJumpTargetAsmOperand : AsmOperandClass { let Name = "JumpTarget"; let ParserMethod = "parseJumpTarget"; @@ -360,6 +485,10 @@ def calltarget : Operand { def imm64: Operand; +def simm6 : Operand { + let ParserMatchClass = ConstantSImm6AsmOperandClass; + let OperandType = "OPERAND_IMMEDIATE"; +} def simm9 : Operand; def simm10 : Operand; def simm11 : Operand; @@ -380,23 +509,12 @@ def simm18_lsl3 : Operand { let ParserMatchClass = MipsJumpTargetAsmOperand; } -def simm20 : Operand { -} +def simm20 : Operand; +def simm32 : Operand; def uimm20 : Operand { } -def MipsUImm10AsmOperand : AsmOperandClass { - let Name = "UImm10"; - let RenderMethod = "addImmOperands"; - let ParserMethod = "parseImm"; - let PredicateMethod = "isUImm<10>"; -} - -def uimm10 : Operand { - let ParserMatchClass = MipsUImm10AsmOperand; -} - def simm16_64 : Operand { let DecoderMethod = "DecodeSimm16"; } @@ -404,23 +522,71 @@ def simm16_64 : Operand { // Zero def uimmz : Operand { let PrintMethod = "printUnsignedImm"; + let ParserMatchClass = ConstantImmzAsmOperandClass; } -// Unsigned Operand -def uimm2 : Operand { +// Unsigned Operands +foreach I = {1, 2, 3, 4, 5, 6, 7, 8, 10} in + def uimm # I : Operand { + let PrintMethod = "printUnsignedImm"; + let ParserMatchClass = + !cast("ConstantUImm" # I # "AsmOperandClass"); + } + +def uimm2_plus1 : Operand { let PrintMethod = "printUnsignedImm"; + let EncoderMethod = "getUImmWithOffsetEncoding<2, 1>"; + let DecoderMethod = "DecodeUImmWithOffset<2, 1>"; + let ParserMatchClass = ConstantUImm2Plus1AsmOperandClass; } -def uimm3 : Operand { +def uimm5_plus1 : Operand { let PrintMethod = "printUnsignedImm"; + let EncoderMethod = "getUImmWithOffsetEncoding<5, 1>"; + let DecoderMethod = "DecodeUImmWithOffset<5, 1>"; + let ParserMatchClass = ConstantUImm5Plus1AsmOperandClass; } -def uimm5 : Operand { +def uimm5_plus32 : Operand { let PrintMethod = "printUnsignedImm"; + let ParserMatchClass = ConstantUImm5Plus32AsmOperandClass; } -def uimm6 : Operand { +def uimm5_plus33 : Operand { let PrintMethod = "printUnsignedImm"; + let EncoderMethod = "getUImmWithOffsetEncoding<5, 1>"; + let DecoderMethod = "DecodeUImmWithOffset<5, 1>"; + let ParserMatchClass = ConstantUImm5Plus33AsmOperandClass; +} + +def uimm5_plus32_normalize : Operand { + let PrintMethod = "printUnsignedImm"; + let ParserMatchClass = ConstantUImm5Plus32NormalizeAsmOperandClass; +} + +def uimm5_lsl2 : Operand { + let EncoderMethod = "getUImm5Lsl2Encoding"; + let DecoderMethod = "DecodeUImm5lsl2"; + let ParserMatchClass = ConstantUImm5Lsl2AsmOperandClass; +} + +def uimm5_plus32_normalize_64 : Operand { + let PrintMethod = "printUnsignedImm"; + let ParserMatchClass = ConstantUImm5Plus32NormalizeAsmOperandClass; +} + +foreach I = {5} in + def uimm # I # _64 : Operand { + let PrintMethod = "printUnsignedImm"; + let ParserMatchClass = + !cast("ConstantUImm" # I # "AsmOperandClass"); + } + +// Like uimm5_64 but reports a less confusing error for 32-63 when +// an instruction alias permits that. +def uimm5_64_report_uimm6 : Operand { + let PrintMethod = "printUnsignedImm"; + let ParserMatchClass = ConstantUImm5ReportUImm6AsmOperandClass; } def uimm16 : Operand { @@ -435,6 +601,22 @@ def MipsMemAsmOperand : AsmOperandClass { let ParserMethod = "parseMemOperand"; } +def MipsMemSimm9AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm9"; + let SuperClasses = [MipsMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<9>"; +} + +def MipsMemSimm9GPRAsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm9GPR"; + let SuperClasses = [MipsMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffsetGPR<9>"; +} + def MipsMemSimm11AsmOperand : AsmOperandClass { let Name = "MemOffsetSimm11"; let SuperClasses = [MipsMemAsmOperand]; @@ -485,6 +667,13 @@ def mem_msa : mem_generic { def mem_simm9 : mem_generic { let MIOperandInfo = (ops ptr_rc, simm9); let EncoderMethod = "getMemEncoding"; + let ParserMatchClass = MipsMemSimm9AsmOperand; +} + +def mem_simm9gpr : mem_generic { + let MIOperandInfo = (ops ptr_rc, simm9); + let EncoderMethod = "getMemEncoding"; + let ParserMatchClass = MipsMemSimm9GPRAsmOperand; } def mem_simm11 : mem_generic { @@ -512,12 +701,6 @@ def PtrRC : Operand { let ParserMatchClass = GPR32AsmOperand; } -// size operand of ext instruction -def size_ext : Operand { - let EncoderMethod = "getSizeExtEncoding"; - let DecoderMethod = "DecodeExtSize"; -} - // size operand of ins instruction def size_ins : Operand { let EncoderMethod = "getSizeInsEncoding"; @@ -657,7 +840,7 @@ class shift_rotate_reg; -// Load Upper Imediate +// Load Upper Immediate class LoadUpper: InstSE<(outs RO:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"), [], II_LUI, FrmI, opstr>, IsAsCheapAsAMove { @@ -675,14 +858,19 @@ class Load : - InstSE<(outs), (ins RO:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"), + InstSE<(outs), (ins RO:$rt, MO:$addr), !strconcat(opstr, "\t$rt, $addr"), [(OpNode RO:$rt, Addr:$addr)], Itin, FrmI, opstr> { let DecoderMethod = "DecodeMem"; let mayStore = 1; } +class Store : + StoreMemory; + // Load/Store Left/Right let canFoldAsLoad = 1 in class LoadLeftRight : InstSE<(outs), (ins RO:$rs, RO:$rt, opnd:$offset), !strconcat(opstr, "\t$rs, $rt, $offset"), - [(brcond (i32 (cond_op RO:$rs, RO:$rt)), bb:$offset)], IIBranch, + [(brcond (i32 (cond_op RO:$rs, RO:$rt)), bb:$offset)], II_BCC, FrmI, opstr> { let isBranch = 1; let isTerminator = 1; @@ -752,7 +940,7 @@ class CBranchZero : InstSE<(outs), (ins RO:$rs, opnd:$offset), !strconcat(opstr, "\t$rs, $offset"), - [(brcond (i32 (cond_op RO:$rs, 0)), bb:$offset)], IIBranch, + [(brcond (i32 (cond_op RO:$rs, 0)), bb:$offset)], II_BCCZ, FrmI, opstr> { let isBranch = 1; let isTerminator = 1; @@ -778,7 +966,7 @@ class SetCC_I : InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"), - [(operator targetoperator:$target)], IIBranch, FrmJ, bopstr> { + [(operator targetoperator:$target)], II_J, FrmJ, bopstr> { let isTerminator=1; let isBarrier=1; let hasDelaySlot = 1; @@ -788,7 +976,7 @@ class JumpFJ : - PseudoSE<(outs), (ins brtarget:$offset), [(br bb:$offset)], IIBranch>, + PseudoSE<(outs), (ins brtarget:$offset), [(br bb:$offset)], II_B>, PseudoInstExpansion<(BEQInst ZERO, ZERO, brtarget:$offset)> { let isBranch = 1; let isTerminator = 1; @@ -802,7 +990,7 @@ class UncondBranch : let isTerminator=1, isBarrier=1, hasDelaySlot = 1 in class JumpFR: - InstSE<(outs), (ins RO:$rs), "jr\t$rs", [(operator RO:$rs)], IIBranch, + InstSE<(outs), (ins RO:$rs), "jr\t$rs", [(operator RO:$rs)], II_JR, FrmR, opstr>; // Indirect branch @@ -815,23 +1003,23 @@ class IndirectBranch : JumpFR { let isCall=1, hasDelaySlot=1, Defs = [RA] in { class JumpLink : InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"), - [(MipsJmpLink imm:$target)], IIBranch, FrmJ, opstr> { + [(MipsJmpLink imm:$target)], II_JAL, FrmJ, opstr> { let DecoderMethod = "DecodeJumpTarget"; } class JumpLinkRegPseudo: - PseudoSE<(outs), (ins RO:$rs), [(MipsJmpLink RO:$rs)], IIBranch>, + PseudoSE<(outs), (ins RO:$rs), [(MipsJmpLink RO:$rs)], II_JALR>, PseudoInstExpansion<(JALRInst RetReg, ResRO:$rs)>; class JumpLinkReg: InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"), - [], IIBranch, FrmR>; + [], II_JALR, FrmR, opstr>; class BGEZAL_FT : InstSE<(outs), (ins RO:$rs, opnd:$offset), - !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI, opstr> { + !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZAL, FrmI, opstr> { let hasDelaySlot = DelaySlot; } @@ -840,17 +1028,17 @@ let isCall=1, hasDelaySlot=1, Defs = [RA] in { let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1, hasExtraSrcRegAllocReq = 1, Defs = [AT] in { class TailCall : - PseudoSE<(outs), (ins calltarget:$target), [], IIBranch>, + PseudoSE<(outs), (ins calltarget:$target), [], II_J>, PseudoInstExpansion<(JumpInst jmptarget:$target)>; class TailCallReg : - PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], IIBranch>, + PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>, PseudoInstExpansion<(JRInst ResRO:$rs)>; } class BAL_BR_Pseudo : - PseudoSE<(outs), (ins brtarget:$offset), [], IIBranch>, + PseudoSE<(outs), (ins brtarget:$offset), [], II_BCCZAL>, PseudoInstExpansion<(RealInst ZERO, brtarget:$offset)> { let isBranch = 1; let isTerminator = 1; @@ -997,9 +1185,10 @@ class SignExtInReg; // Subword Swap -class SubwordSwap: - InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [], - NoItinerary, FrmR, opstr> { +class SubwordSwap: + InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [], itin, + FrmR, opstr> { let hasSideEffects = 0; } @@ -1010,8 +1199,8 @@ class ReadHardware : // Ext and Ins class ExtBase: - InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ext:$size), + Operand SizeOpnd, SDPatternOperator Op = null_frag> : + InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, SizeOpnd:$size), !strconcat(opstr, " $rt, $rs, $pos, $size"), [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))], II_EXT, FrmR, opstr>, ISA_MIPS32R2; @@ -1074,6 +1263,9 @@ class TrapBase let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in def RetRA : PseudoSE<(outs), (ins), [(MipsRet)]>; +let isReturn=1, isTerminator=1, isBarrier=1, hasCtrlDep=1, hasSideEffects=1 in +def ERet : PseudoSE<(outs), (ins), [(MipsERet)]>; + let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt), [(callseq_start timm:$amt)]>; @@ -1215,10 +1407,11 @@ def LH : Load<"lh", GPR32Opnd, sextloadi16, II_LH, addrDefault>, MMRel, LW_FM<0x21>; def LHu : Load<"lhu", GPR32Opnd, zextloadi16, II_LHU>, MMRel, LW_FM<0x25>; let AdditionalPredicates = [NotInMicroMips] in { -def LW : Load<"lw", GPR32Opnd, load, II_LW, addrDefault>, MMRel, +def LW : StdMMR6Rel, Load<"lw", GPR32Opnd, load, II_LW, addrDefault>, MMRel, LW_FM<0x23>; } -def SB : Store<"sb", GPR32Opnd, truncstorei8, II_SB>, MMRel, LW_FM<0x28>; +def SB : StdMMR6Rel, Store<"sb", GPR32Opnd, truncstorei8, II_SB>, MMRel, + LW_FM<0x28>; def SH : Store<"sh", GPR32Opnd, truncstorei16, II_SH>, MMRel, LW_FM<0x29>; let AdditionalPredicates = [NotInMicroMips] in { def SW : Store<"sw", GPR32Opnd, store, II_SW>, MMRel, LW_FM<0x2b>; @@ -1259,15 +1452,17 @@ let DecoderNamespace = "COP3_" in { } } -def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS32; -def SYNCI : MMRel, SYNCI_FT<"synci">, SYNCI_FM, ISA_MIPS32R2; +def SYNC : MMRel, StdMMR6Rel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS32; +def SYNCI : MMRel, StdMMR6Rel, SYNCI_FT<"synci">, SYNCI_FM, ISA_MIPS32R2; -def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>, ISA_MIPS2; -def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>, ISA_MIPS2; -def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>, ISA_MIPS2; -def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>, ISA_MIPS2; -def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>, ISA_MIPS2; -def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>, ISA_MIPS2; +let AdditionalPredicates = [NotInMicroMips] in { + def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>, ISA_MIPS2; + def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>, ISA_MIPS2; + def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>, ISA_MIPS2; + def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>, ISA_MIPS2; + def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>, ISA_MIPS2; + def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>, ISA_MIPS2; +} def TEQI : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM<0xc>, ISA_MIPS2_NOT_32R6_64R6; @@ -1290,14 +1485,15 @@ def TRAP : TrapBase; def SDBBP : MMRel, SYS_FT<"sdbbp">, SDBBP_FM, ISA_MIPS32_NOT_32R6_64R6; let AdditionalPredicates = [NotInMicroMips] in { -def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>, INSN_MIPS3_32; + def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18, 0x0>, INSN_MIPS3_32; + def ERETNC : MMRel, ER_FT<"eretnc">, ER_FM<0x18, 0x1>, ISA_MIPS32R5; + def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f, 0x0>, ISA_MIPS32; } -def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>, ISA_MIPS32; let AdditionalPredicates = [NotInMicroMips] in { -def EI : MMRel, StdMMR6Rel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2; + def EI : MMRel, StdMMR6Rel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2; + def DI : MMRel, StdMMR6Rel, DEI_FT<"di", GPR32Opnd>, EI_FM<0>, ISA_MIPS32R2; } -def DI : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM<0>, ISA_MIPS32R2; let EncodingPredicates = [], // FIXME: Lack of HasStdEnc is probably a bug AdditionalPredicates = [NotInMicroMips] in { @@ -1359,7 +1555,8 @@ def TAILCALL_R : TailCallReg; // Indirect branches are matched as PseudoIndirectBranch/PseudoIndirectBranch64 // then are expanded to JR, JR64, JALR, or JALR64 depending on the ISA. class PseudoIndirectBranchBase : - MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)], IIBranch> { + MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)], + II_IndirectBranchPseudo> { let isTerminator=1; let isBarrier=1; let hasDelaySlot = 1; @@ -1369,12 +1566,12 @@ class PseudoIndirectBranchBase : def PseudoIndirectBranch : PseudoIndirectBranchBase; -// Return instructions are matched as a RetRA instruction, then ar expanded +// Return instructions are matched as a RetRA instruction, then are expanded // into PseudoReturn/PseudoReturn64 after register allocation. Finally, // MipsAsmPrinter expands this into JR, JR64, JALR, or JALR64 depending on the // ISA. class PseudoReturnBase : MipsPseudo<(outs), (ins RO:$rs), - [], IIBranch> { + [], II_ReturnPseudo> { let isTerminator = 1; let isBarrier = 1; let hasDelaySlot = 1; @@ -1441,8 +1638,11 @@ def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>, def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>, ISA_MIPS32_NOT_32R6_64R6; -/// Word Swap Bytes Within Halfwords -def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>, ISA_MIPS32R2; +let AdditionalPredicates = [NotInMicroMips] in { + /// Word Swap Bytes Within Halfwords + def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd, II_WSBH>, SEB_FM<2, 0x20>, + ISA_MIPS32R2; +} /// No operation. def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>; @@ -1485,10 +1685,12 @@ def PseudoSDIV : MultDivPseudo, ISA_MIPS1_NOT_32R6_64R6; def PseudoUDIV : MultDivPseudo, ISA_MIPS1_NOT_32R6_64R6; - +let AdditionalPredicates = [NotInMicroMips] in { def RDHWR : MMRel, ReadHardware, RDHWR_FM; - -def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM<0>; +} +// TODO: Add '0 < pos+size <= 32' constraint check to ext instruction +def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, uimm5_plus1, MipsExt>, + EXT_FM<0>; def INS : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM<4>; /// Move Control Registers From/To CPU Registers @@ -1499,9 +1701,9 @@ def MTC2 : MTC3OP<"mtc2", COP2Opnd, GPR32Opnd>, MFC3OP_FM<0x12, 4>; class Barrier : InstSE<(outs), (ins), asmstr, [], NoItinerary, FrmOther, asmstr>; -def SSNOP : MMRel, Barrier<"ssnop">, BARRIER_FM<1>; +def SSNOP : MMRel, StdMMR6Rel, Barrier<"ssnop">, BARRIER_FM<1>; def EHB : MMRel, Barrier<"ehb">, BARRIER_FM<3>; -def PAUSE : MMRel, Barrier<"pause">, BARRIER_FM<5>, ISA_MIPS32R2; +def PAUSE : MMRel, StdMMR6Rel, Barrier<"pause">, BARRIER_FM<5>, ISA_MIPS32R2; // JR_HB and JALR_HB are defined here using the new style naming // scheme because some of this code is shared with Mips32r6InstrInfo.td @@ -1562,11 +1764,60 @@ def CACHE : MMRel, CacheOp<"cache", mem>, CACHEOP_FM<0b101111>, def PREF : MMRel, CacheOp<"pref", mem>, CACHEOP_FM<0b110011>, INSN_MIPS3_32_NOT_32R6_64R6; +def ROL : MipsAsmPseudoInst<(outs), + (ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd), + "rol\t$rs, $rt, $rd">; +def ROLImm : MipsAsmPseudoInst<(outs), + (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), + "rol\t$rs, $rt, $imm">; +def : MipsInstAlias<"rol $rd, $rs", + (ROL GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>; +def : MipsInstAlias<"rol $rd, $imm", + (ROLImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>; + +def ROR : MipsAsmPseudoInst<(outs), + (ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd), + "ror\t$rs, $rt, $rd">; +def RORImm : MipsAsmPseudoInst<(outs), + (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), + "ror\t$rs, $rt, $imm">; +def : MipsInstAlias<"ror $rd, $rs", + (ROR GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>; +def : MipsInstAlias<"ror $rd, $imm", + (RORImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>; + +def DROL : MipsAsmPseudoInst<(outs), + (ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd), + "drol\t$rs, $rt, $rd">, ISA_MIPS64; +def DROLImm : MipsAsmPseudoInst<(outs), + (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), + "drol\t$rs, $rt, $imm">, ISA_MIPS64; +def : MipsInstAlias<"drol $rd, $rs", + (DROL GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>, ISA_MIPS64; +def : MipsInstAlias<"drol $rd, $imm", + (DROLImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>, ISA_MIPS64; + +def DROR : MipsAsmPseudoInst<(outs), + (ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd), + "dror\t$rs, $rt, $rd">, ISA_MIPS64; +def DRORImm : MipsAsmPseudoInst<(outs), + (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), + "dror\t$rs, $rt, $imm">, ISA_MIPS64; +def : MipsInstAlias<"dror $rd, $rs", + (DROR GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>, ISA_MIPS64; +def : MipsInstAlias<"dror $rd, $imm", + (DRORImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>, ISA_MIPS64; + //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// def : MipsInstAlias<"move $dst, $src", - (ADDu GPR32Opnd:$dst, GPR32Opnd:$src,ZERO), 1>, + (OR GPR32Opnd:$dst, GPR32Opnd:$src, ZERO), 1>, + GPR_32 { + let AdditionalPredicates = [NotInMicroMips]; +} +def : MipsInstAlias<"move $dst, $src", + (ADDu GPR32Opnd:$dst, GPR32Opnd:$src, ZERO), 1>, GPR_32 { let AdditionalPredicates = [NotInMicroMips]; } @@ -1630,27 +1881,27 @@ def : MipsInstAlias<"beqz $rs,$offset", def : MipsInstAlias<"beqzl $rs,$offset", (BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>; def : MipsInstAlias<"syscall", (SYSCALL 0), 1>; - + def : MipsInstAlias<"break", (BREAK 0, 0), 1>; def : MipsInstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>; let AdditionalPredicates = [NotInMicroMips] in { -def : MipsInstAlias<"ei", (EI ZERO), 1>, ISA_MIPS32R2; + def : MipsInstAlias<"ei", (EI ZERO), 1>, ISA_MIPS32R2; + def : MipsInstAlias<"di", (DI ZERO), 1>, ISA_MIPS32R2; +} +let AdditionalPredicates = [NotInMicroMips] in { + def : MipsInstAlias<"teq $rs, $rt", + (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2; + def : MipsInstAlias<"tge $rs, $rt", + (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2; + def : MipsInstAlias<"tgeu $rs, $rt", + (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2; + def : MipsInstAlias<"tlt $rs, $rt", + (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2; + def : MipsInstAlias<"tltu $rs, $rt", + (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2; + def : MipsInstAlias<"tne $rs, $rt", + (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2; } -def : MipsInstAlias<"di", (DI ZERO), 1>, ISA_MIPS32R2; - -def : MipsInstAlias<"teq $rs, $rt", - (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2; -def : MipsInstAlias<"tge $rs, $rt", - (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2; -def : MipsInstAlias<"tgeu $rs, $rt", - (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2; -def : MipsInstAlias<"tlt $rs, $rt", - (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2; -def : MipsInstAlias<"tltu $rs, $rt", - (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2; -def : MipsInstAlias<"tne $rs, $rt", - (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2; - def : MipsInstAlias<"sll $rd, $rt, $rs", (SLLV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>; def : MipsInstAlias<"sub, $rd, $rs, $imm", @@ -1678,7 +1929,7 @@ def : MipsInstAlias<"sync", class LoadImmediate32 : MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32), !strconcat(instr_asm, "\t$rt, $imm32")> ; -def LoadImm32 : LoadImmediate32<"li", uimm5, GPR32Opnd>; +def LoadImm32 : LoadImmediate32<"li", simm32, GPR32Opnd>; class LoadAddressFromReg32 : @@ -1689,13 +1940,16 @@ def LoadAddrReg32 : LoadAddressFromReg32<"la", mem, GPR32Opnd>; class LoadAddressFromImm32 : MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32), !strconcat(instr_asm, "\t$rt, $imm32")> ; -def LoadAddrImm32 : LoadAddressFromImm32<"la", uimm5, GPR32Opnd>; +def LoadAddrImm32 : LoadAddressFromImm32<"la", simm32, GPR32Opnd>; def JalTwoReg : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rs), "jal\t$rd, $rs"> ; def JalOneReg : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs), "jal\t$rs"> ; +def NORImm : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), + "nor\t$rs, $rt, $imm"> ; + let hasDelaySlot = 1 in { def BneImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins imm64:$imm64, brtarget:$offset), @@ -1718,12 +1972,62 @@ def BLTU : CondBranchPseudo<"bltu">; def BLEU : CondBranchPseudo<"bleu">; def BGEU : CondBranchPseudo<"bgeu">; def BGTU : CondBranchPseudo<"bgtu">; +def BLTL : CondBranchPseudo<"bltl">, ISA_MIPS2_NOT_32R6_64R6; +def BLEL : CondBranchPseudo<"blel">, ISA_MIPS2_NOT_32R6_64R6; +def BGEL : CondBranchPseudo<"bgel">, ISA_MIPS2_NOT_32R6_64R6; +def BGTL : CondBranchPseudo<"bgtl">, ISA_MIPS2_NOT_32R6_64R6; +def BLTUL: CondBranchPseudo<"bltul">, ISA_MIPS2_NOT_32R6_64R6; +def BLEUL: CondBranchPseudo<"bleul">, ISA_MIPS2_NOT_32R6_64R6; +def BGEUL: CondBranchPseudo<"bgeul">, ISA_MIPS2_NOT_32R6_64R6; +def BGTUL: CondBranchPseudo<"bgtul">, ISA_MIPS2_NOT_32R6_64R6; + +class CondBranchImmPseudo : + MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, imm64:$imm, brtarget:$offset), + !strconcat(instr_asm, "\t$rs, $imm, $offset")>; + +def BLTImmMacro : CondBranchImmPseudo<"blt">; +def BLEImmMacro : CondBranchImmPseudo<"ble">; +def BGEImmMacro : CondBranchImmPseudo<"bge">; +def BGTImmMacro : CondBranchImmPseudo<"bgt">; +def BLTUImmMacro : CondBranchImmPseudo<"bltu">; +def BLEUImmMacro : CondBranchImmPseudo<"bleu">; +def BGEUImmMacro : CondBranchImmPseudo<"bgeu">; +def BGTUImmMacro : CondBranchImmPseudo<"bgtu">; +def BLTLImmMacro : CondBranchImmPseudo<"bltl">, ISA_MIPS2_NOT_32R6_64R6; +def BLELImmMacro : CondBranchImmPseudo<"blel">, ISA_MIPS2_NOT_32R6_64R6; +def BGELImmMacro : CondBranchImmPseudo<"bgel">, ISA_MIPS2_NOT_32R6_64R6; +def BGTLImmMacro : CondBranchImmPseudo<"bgtl">, ISA_MIPS2_NOT_32R6_64R6; +def BLTULImmMacro : CondBranchImmPseudo<"bltul">, ISA_MIPS2_NOT_32R6_64R6; +def BLEULImmMacro : CondBranchImmPseudo<"bleul">, ISA_MIPS2_NOT_32R6_64R6; +def BGEULImmMacro : CondBranchImmPseudo<"bgeul">, ISA_MIPS2_NOT_32R6_64R6; +def BGTULImmMacro : CondBranchImmPseudo<"bgtul">, ISA_MIPS2_NOT_32R6_64R6; + +// FIXME: Predicates are removed because instructions are matched regardless of +// predicates, because PredicateControl was not in the hierarchy. This was +// done to emit more precise error message from expansion function. +// Once the tablegen-erated errors are made better, this needs to be fixed and +// predicates needs to be restored. + +def SDivMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt), + "div\t$rs, $rt">; //, ISA_MIPS1_NOT_32R6_64R6; + +def UDivMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt), + "divu\t$rs, $rt">; //, ISA_MIPS1_NOT_32R6_64R6; + +def DSDivMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt), + "ddiv\t$rs, $rt">; //, ISA_MIPS64_NOT_64R6; + +def DUDivMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt), + "ddivu\t$rs, $rt">; //, ISA_MIPS64_NOT_64R6; + +def Ulh : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr), + "ulh\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6; def Ulhu : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr), - "ulhu\t$rt, $addr">, ISA_MIPS1_NOT_32R6_64R6; + "ulhu\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6; def Ulw : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr), - "ulw\t$rt, $addr">, ISA_MIPS1_NOT_32R6_64R6; + "ulw\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6; //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions @@ -1939,6 +2243,16 @@ let AddedComplexity = 40 in { } } +// Atomic load patterns. +def : MipsPat<(atomic_load_8 addr:$a), (LB addr:$a)>; +def : MipsPat<(atomic_load_16 addr:$a), (LH addr:$a)>; +def : MipsPat<(atomic_load_32 addr:$a), (LW addr:$a)>; + +// Atomic store patterns. +def : MipsPat<(atomic_store_8 addr:$a, GPR32:$v), (SB GPR32:$v, addr:$a)>; +def : MipsPat<(atomic_store_16 addr:$a, GPR32:$v), (SH GPR32:$v, addr:$a)>; +def : MipsPat<(atomic_store_32 addr:$a, GPR32:$v), (SW GPR32:$v, addr:$a)>; + //===----------------------------------------------------------------------===// // Floating Point Support //===----------------------------------------------------------------------===// @@ -1964,6 +2278,10 @@ include "MipsDSPInstrInfo.td" include "MipsMSAInstrFormats.td" include "MipsMSAInstrInfo.td" +// EVA +include "MipsEVAInstrFormats.td" +include "MipsEVAInstrInfo.td" + // Micromips include "MicroMipsInstrFormats.td" include "MicroMipsInstrInfo.td" @@ -1972,3 +2290,11 @@ include "MicroMipsInstrFPU.td" // Micromips r6 include "MicroMips32r6InstrFormats.td" include "MicroMips32r6InstrInfo.td" + +// Micromips64 r6 +include "MicroMips64r6InstrFormats.td" +include "MicroMips64r6InstrInfo.td" + +// Micromips DSP +include "MicroMipsDSPInstrFormats.td" +include "MicroMipsDSPInstrInfo.td" diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index 90f8cc0cacfd..49fb99a8ec43 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -148,7 +148,7 @@ void MipsLongBranch::splitMBB(MachineBasicBlock *MBB) { // Insert NewMBB and fix control flow. MachineBasicBlock *Tgt = getTargetMBB(*FirstBr); NewMBB->transferSuccessors(MBB); - NewMBB->removeSuccessor(Tgt); + NewMBB->removeSuccessor(Tgt, true); MBB->addSuccessor(NewMBB); MBB->addSuccessor(Tgt); MF->insert(std::next(MachineFunction::iterator(MBB)), NewMBB); @@ -161,7 +161,7 @@ void MipsLongBranch::initMBBInfo() { // Split the MBBs if they have two branches. Each basic block should have at // most one branch after this loop is executed. for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E;) - splitMBB(I++); + splitMBB(&*I++); MF->RenumberBlocks(); MBBInfos.clear(); @@ -262,8 +262,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { static_cast(Subtarget.getInstrInfo()); MF->insert(FallThroughMBB, LongBrMBB); - MBB->removeSuccessor(TgtMBB); - MBB->addSuccessor(LongBrMBB); + MBB->replaceSuccessor(TgtMBB, LongBrMBB); if (IsPIC) { MachineBasicBlock *BalTgtMBB = MF->CreateMachineBasicBlock(BB); @@ -434,7 +433,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { I.Br->addOperand(MachineOperand::CreateMBB(LongBrMBB)); } else // Change branch destination and reverse condition. - replaceBranch(*MBB, I.Br, DL, FallThroughMBB); + replaceBranch(*MBB, I.Br, DL, &*FallThroughMBB); } static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) { diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td index bff2d0fab1ec..7d25ea56e3d5 100644 --- a/lib/Target/Mips/MipsMSAInstrFormats.td +++ b/lib/Target/Mips/MipsMSAInstrFormats.td @@ -7,18 +7,12 @@ // //===----------------------------------------------------------------------===// -def HasMSA : Predicate<"Subtarget->hasMSA()">, - AssemblerPredicate<"FeatureMSA">; - -class MSAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> { - let Predicates = [HasMSA]; +class MSAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>, + PredicateControl, ASE_MSA { + let EncodingPredicates = [HasStdEnc]; let Inst{31-26} = 0b011110; } -class MSA64Inst : MSAInst { - let Predicates = [HasMSA, HasMips64]; -} - class MSACBranch : MSAInst { let Inst{31-26} = 0b010001; } @@ -27,10 +21,6 @@ class MSASpecial : MSAInst { let Inst{31-26} = 0b000000; } -class MSA64Special : MSA64Inst { - let Inst{31-26} = 0b000000; -} - class MSAPseudo pattern, InstrItinClass itin = IIPseudo>: MipsPseudo { @@ -100,7 +90,7 @@ class MSA_2R_FILL_FMT major, bits<2> df, bits<6> minor>: MSAInst { let Inst{5-0} = minor; } -class MSA_2R_FILL_D_FMT major, bits<2> df, bits<6> minor>: MSA64Inst { +class MSA_2R_FILL_D_FMT major, bits<2> df, bits<6> minor>: MSAInst { bits<5> rs; bits<5> wd; @@ -293,7 +283,7 @@ class MSA_ELM_COPY_W_FMT major, bits<6> minor>: MSAInst { let Inst{5-0} = minor; } -class MSA_ELM_COPY_D_FMT major, bits<6> minor>: MSA64Inst { +class MSA_ELM_COPY_D_FMT major, bits<6> minor>: MSAInst { bits<4> n; bits<5> ws; bits<5> rd; @@ -345,7 +335,7 @@ class MSA_ELM_INSERT_W_FMT major, bits<6> minor>: MSAInst { let Inst{5-0} = minor; } -class MSA_ELM_INSERT_D_FMT major, bits<6> minor>: MSA64Inst { +class MSA_ELM_INSERT_D_FMT major, bits<6> minor>: MSAInst { bits<6> n; bits<5> rs; bits<5> wd; @@ -450,7 +440,7 @@ class SPECIAL_LSA_FMT minor>: MSASpecial { let Inst{5-0} = minor; } -class SPECIAL_DLSA_FMT minor>: MSA64Special { +class SPECIAL_DLSA_FMT minor>: MSASpecial { bits<5> rs; bits<5> rt; bits<5> rd; diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 970e98ea9e1e..eacfcec78bc7 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -63,30 +63,13 @@ def MipsVExtractSExt : SDNode<"MipsISD::VEXTRACT_SEXT_ELT", def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT", SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; +def immZExt1Ptr : ImmLeaf(Imm);}]>; +def immZExt2Ptr : ImmLeaf(Imm);}]>; def immZExt4Ptr : ImmLeaf(Imm);}]>; def immZExt6Ptr : ImmLeaf(Imm);}]>; // Operands -// The immediate of an LSA instruction needs special handling -// as the encoded value should be subtracted by one. -def uimm2LSAAsmOperand : AsmOperandClass { - let Name = "LSAImm"; - let ParserMethod = "parseLSAImm"; - let RenderMethod = "addImmOperands"; -} - -def LSAImm : Operand { - let PrintMethod = "printUnsignedImm"; - let EncoderMethod = "getLSAImmEncoding"; - let DecoderMethod = "DecodeLSAImm"; - let ParserMatchClass = uimm2LSAAsmOperand; -} - -def uimm4 : Operand { - let PrintMethod = "printUnsignedImm8"; -} - def uimm4_ptr : Operand { let PrintMethod = "printUnsignedImm8"; } @@ -95,10 +78,6 @@ def uimm6_ptr : Operand { let PrintMethod = "printUnsignedImm8"; } -def uimm8 : Operand { - let PrintMethod = "printUnsignedImm8"; -} - def simm5 : Operand; def vsplat_uimm1 : Operand { @@ -639,7 +618,6 @@ class COPY_S_D_ENC : MSA_ELM_COPY_D_FMT<0b0010, 0b011001>; class COPY_U_B_ENC : MSA_ELM_COPY_B_FMT<0b0011, 0b011001>; class COPY_U_H_ENC : MSA_ELM_COPY_H_FMT<0b0011, 0b011001>; class COPY_U_W_ENC : MSA_ELM_COPY_W_FMT<0b0011, 0b011001>; -class COPY_U_D_ENC : MSA_ELM_COPY_D_FMT<0b0011, 0b011001>; class CTCMSA_ENC : MSA_ELM_CTCMSA_FMT<0b0000111110, 0b011001>; @@ -1195,47 +1173,14 @@ class MSA_BIT_D_DESC_BASE { +class MSA_BIT_X_DESC_BASE { dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm3:$m); + dag InOperandList = (ins ROWS:$ws, ImmOp:$m); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt3:$m))]; - InstrItinClass Itinerary = itin; -} - -// This class is deprecated and will be removed soon. -class MSA_BIT_H_X_DESC_BASE { - dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm4:$m); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt4:$m))]; - InstrItinClass Itinerary = itin; -} - -// This class is deprecated and will be removed soon. -class MSA_BIT_W_X_DESC_BASE { - dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm5:$m); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt5:$m))]; - InstrItinClass Itinerary = itin; -} - -// This class is deprecated and will be removed soon. -class MSA_BIT_D_X_DESC_BASE { - dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm6:$m); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt6:$m))]; + list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))]; InstrItinClass Itinerary = itin; } @@ -1291,13 +1236,14 @@ class MSA_COPY_DESC_BASE { dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, uimm4:$n); + dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, ImmOp:$n); string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$n]"); list Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in, ROWS:$ws, - immZExt4:$n))]; + Imm:$n))]; string Constraints = "$wd = $wd_in"; InstrItinClass Itinerary = itin; } @@ -1479,7 +1425,7 @@ class MSA_CBRANCH_DESC_BASE { dag InOperandList = (ins ROWD:$wt, brtarget:$offset); string AsmString = !strconcat(instr_asm, "\t$wt, $offset"); list Pattern = []; - InstrItinClass Itinerary = IIBranch; + InstrItinClass Itinerary = NoItinerary; bit isBranch = 1; bit isTerminator = 1; bit hasDelaySlot = 1; @@ -1519,13 +1465,14 @@ class MSA_INSERT_VIDX_PSEUDO_BASE { dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWD:$wd_in, uimm6:$n, ROWS:$ws, uimmz:$n2); + dag InOperandList = (ins ROWD:$wd_in, ImmOp:$n, ROWS:$ws, uimmz:$n2); string AsmString = !strconcat(instr_asm, "\t$wd[$n], $ws[$n2]"); list Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in, - immZExt6:$n, + Imm:$n, ROWS:$ws, immz:$n2))]; InstrItinClass Itinerary = itin; @@ -1934,8 +1881,6 @@ class COPY_U_H_DESC : MSA_COPY_DESC_BASE<"copy_u.h", vextract_zext_i16, v8i16, GPR32Opnd, MSA128HOpnd>; class COPY_U_W_DESC : MSA_COPY_DESC_BASE<"copy_u.w", vextract_zext_i32, v4i32, GPR32Opnd, MSA128WOpnd>; -class COPY_U_D_DESC : MSA_COPY_DESC_BASE<"copy_u.d", vextract_zext_i64, v2i64, - GPR64Opnd, MSA128DOpnd>; class COPY_FW_PSEUDO_DESC : MSA_COPY_PSEUDO_BASE; @@ -2346,13 +2291,13 @@ class INSERT_FW_VIDX64_PSEUDO_DESC : class INSERT_FD_VIDX64_PSEUDO_DESC : MSA_INSERT_VIDX_PSEUDO_BASE; -class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, +class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, immZExt4, MSA128BOpnd>; -class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, +class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, immZExt3, MSA128HOpnd>; -class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, +class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, immZExt2, MSA128WOpnd>; -class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, +class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, immZExt1, MSA128DOpnd>; class LD_DESC_BASE { dag OutOperandList = (outs RORD:$rd); - dag InOperandList = (ins RORS:$rs, RORT:$rt, LSAImm:$sa); + dag InOperandList = (ins RORS:$rs, RORT:$rt, uimm2_plus1:$sa); string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $sa"); list Pattern = [(set RORD:$rd, (add RORT:$rt, (shl RORS:$rs, @@ -2561,23 +2506,23 @@ class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", ctpop, MSA128HOpnd>; class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", ctpop, MSA128WOpnd>; class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>; -class SAT_S_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, - MSA128BOpnd>; -class SAT_S_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, - MSA128HOpnd>; -class SAT_S_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, - MSA128WOpnd>; -class SAT_S_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, - MSA128DOpnd>; +class SAT_S_B_DESC : MSA_BIT_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, uimm3, + immZExt3, MSA128BOpnd>; +class SAT_S_H_DESC : MSA_BIT_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, uimm4, + immZExt4, MSA128HOpnd>; +class SAT_S_W_DESC : MSA_BIT_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, uimm5, + immZExt5, MSA128WOpnd>; +class SAT_S_D_DESC : MSA_BIT_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, uimm6, + immZExt6, MSA128DOpnd>; -class SAT_U_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, - MSA128BOpnd>; -class SAT_U_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, - MSA128HOpnd>; -class SAT_U_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, - MSA128WOpnd>; -class SAT_U_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, - MSA128DOpnd>; +class SAT_U_B_DESC : MSA_BIT_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, uimm3, + immZExt3, MSA128BOpnd>; +class SAT_U_H_DESC : MSA_BIT_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, uimm4, + immZExt4, MSA128HOpnd>; +class SAT_U_W_DESC : MSA_BIT_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, uimm5, + immZExt5, MSA128WOpnd>; +class SAT_U_D_DESC : MSA_BIT_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, uimm6, + immZExt6, MSA128DOpnd>; class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>; class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>; @@ -2589,13 +2534,17 @@ class SLD_W_DESC : MSA_3R_SLD_DESC_BASE<"sld.w", int_mips_sld_w, MSA128WOpnd>; class SLD_D_DESC : MSA_3R_SLD_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>; class SLDI_B_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.b", int_mips_sldi_b, - MSA128BOpnd>; + MSA128BOpnd, MSA128BOpnd, uimm4, + immZExt4>; class SLDI_H_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.h", int_mips_sldi_h, - MSA128HOpnd>; + MSA128HOpnd, MSA128HOpnd, uimm3, + immZExt3>; class SLDI_W_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.w", int_mips_sldi_w, - MSA128WOpnd>; + MSA128WOpnd, MSA128WOpnd, uimm2, + immZExt2>; class SLDI_D_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.d", int_mips_sldi_d, - MSA128DOpnd>; + MSA128DOpnd, MSA128DOpnd, uimm1, + immZExt1>; class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>; class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>; @@ -2648,14 +2597,14 @@ class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128HOpnd>; class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>; class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>; -class SRARI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srari.b", int_mips_srari_b, - MSA128BOpnd>; -class SRARI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srari.h", int_mips_srari_h, - MSA128HOpnd>; -class SRARI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srari.w", int_mips_srari_w, - MSA128WOpnd>; -class SRARI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srari.d", int_mips_srari_d, - MSA128DOpnd>; +class SRARI_B_DESC : MSA_BIT_X_DESC_BASE<"srari.b", int_mips_srari_b, uimm3, + immZExt3, MSA128BOpnd>; +class SRARI_H_DESC : MSA_BIT_X_DESC_BASE<"srari.h", int_mips_srari_h, uimm4, + immZExt4, MSA128HOpnd>; +class SRARI_W_DESC : MSA_BIT_X_DESC_BASE<"srari.w", int_mips_srari_w, uimm5, + immZExt5, MSA128WOpnd>; +class SRARI_D_DESC : MSA_BIT_X_DESC_BASE<"srari.d", int_mips_srari_d, uimm6, + immZExt6, MSA128DOpnd>; class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>; class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>; @@ -2676,14 +2625,14 @@ class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128HOpnd>; class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>; class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>; -class SRLRI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srlri.b", int_mips_srlri_b, - MSA128BOpnd>; -class SRLRI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srlri.h", int_mips_srlri_h, - MSA128HOpnd>; -class SRLRI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srlri.w", int_mips_srlri_w, - MSA128WOpnd>; -class SRLRI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srlri.d", int_mips_srlri_d, - MSA128DOpnd>; +class SRLRI_B_DESC : MSA_BIT_X_DESC_BASE<"srlri.b", int_mips_srlri_b, uimm3, + immZExt3, MSA128BOpnd>; +class SRLRI_H_DESC : MSA_BIT_X_DESC_BASE<"srlri.h", int_mips_srlri_h, uimm4, + immZExt4, MSA128HOpnd>; +class SRLRI_W_DESC : MSA_BIT_X_DESC_BASE<"srlri.w", int_mips_srlri_w, uimm5, + immZExt5, MSA128WOpnd>; +class SRLRI_D_DESC : MSA_BIT_X_DESC_BASE<"srlri.d", int_mips_srlri_d, uimm6, + immZExt6, MSA128DOpnd>; class ST_DESC_BASE; +// Vector extraction with fixed index. +// +// Extracting 32-bit values on MSA32 should always use COPY_S_W rather than +// COPY_U_W, even for the zero-extended case. This is because our forward +// compatibility strategy is to consider registers to be infinitely +// sign-extended so that a MIPS64 can execute MIPS32 code without getting +// different register values. +def : MSAPat<(vextract_zext_i32 (v4i32 MSA128W:$ws), immZExt2Ptr:$idx), + (COPY_S_W MSA128W:$ws, immZExt2:$idx)>, ASE_MSA_NOT_MSA64; +def : MSAPat<(vextract_zext_i32 (v4f32 MSA128W:$ws), immZExt2Ptr:$idx), + (COPY_S_W MSA128W:$ws, immZExt2:$idx)>, ASE_MSA_NOT_MSA64; + +// Extracting 64-bit values on MSA64 should always use COPY_S_D rather than +// COPY_U_D, even for the zero-extended case. This is because our forward +// compatibility strategy is to consider registers to be infinitely +// sign-extended so that a hypothetical MIPS128 would be able to execute MIPS64 +// code without getting different register values. +def : MSAPat<(vextract_zext_i64 (v2i64 MSA128D:$ws), immZExt1Ptr:$idx), + (COPY_S_D MSA128D:$ws, immZExt1:$idx)>, ASE_MSA64; +def : MSAPat<(vextract_zext_i64 (v2f64 MSA128D:$ws), immZExt1Ptr:$idx), + (COPY_S_D MSA128D:$ws, immZExt1:$idx)>, ASE_MSA64; + // Vector extraction with variable index def : MSAPat<(i32 (vextract_sext_i8 v16i8:$ws, i32:$idx)), (SRA (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (SPLAT_B v16i8:$ws, diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp index 0d1ee046f0dc..c7d2738af1d4 100644 --- a/lib/Target/Mips/MipsMachineFunction.cpp +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -24,42 +24,6 @@ static cl::opt FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true), cl::desc("Always use $gp as the global base register.")); -// class MipsCallEntry. -MipsCallEntry::MipsCallEntry(StringRef N) { -#ifndef NDEBUG - Name = N; - Val = nullptr; -#endif -} - -MipsCallEntry::MipsCallEntry(const GlobalValue *V) { -#ifndef NDEBUG - Val = V; -#endif -} - -bool MipsCallEntry::isConstant(const MachineFrameInfo *) const { - return false; -} - -bool MipsCallEntry::isAliased(const MachineFrameInfo *) const { - return false; -} - -bool MipsCallEntry::mayAlias(const MachineFrameInfo *) const { - return false; -} - -void MipsCallEntry::printCustom(raw_ostream &O) const { - O << "MipsCallEntry: "; -#ifndef NDEBUG - if (Val) - O << Val->getName(); - else - O << Name; -#endif -} - MipsFunctionInfo::~MipsFunctionInfo() {} bool MipsFunctionInfo::globalBaseRegSet() const { @@ -111,27 +75,32 @@ void MipsFunctionInfo::createEhDataRegsFI() { } } +void MipsFunctionInfo::createISRRegFI() { + // ISRs require spill slots for Status & ErrorPC Coprocessor 0 registers. + // The current implementation only supports Mips32r2+ not Mips64rX. Status + // is always 32 bits, ErrorPC is 32 or 64 bits dependant on architecture, + // however Mips32r2+ is the supported architecture. + const TargetRegisterClass *RC = &Mips::GPR32RegClass; + + for (int I = 0; I < 2; ++I) + ISRDataRegFI[I] = MF.getFrameInfo()->CreateStackObject( + RC->getSize(), RC->getAlignment(), false); +} + bool MipsFunctionInfo::isEhDataRegFI(int FI) const { return CallsEhReturn && (FI == EhDataRegFI[0] || FI == EhDataRegFI[1] || FI == EhDataRegFI[2] || FI == EhDataRegFI[3]); } -MachinePointerInfo MipsFunctionInfo::callPtrInfo(StringRef Name) { - std::unique_ptr &E = ExternalCallEntries[Name]; - - if (!E) - E = llvm::make_unique(Name); - - return MachinePointerInfo(E.get()); +bool MipsFunctionInfo::isISRRegFI(int FI) const { + return IsISR && (FI == ISRDataRegFI[0] || FI == ISRDataRegFI[1]); +} +MachinePointerInfo MipsFunctionInfo::callPtrInfo(const char *ES) { + return MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)); } -MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *Val) { - std::unique_ptr &E = GlobalCallEntries[Val]; - - if (!E) - E = llvm::make_unique(Val); - - return MachinePointerInfo(E.get()); +MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *GV) { + return MachinePointerInfo(MF.getPSVManager().getGlobalValueCallEntry(GV)); } int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) { diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index 32436efa2eda..a2f6ee03604f 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -15,12 +15,10 @@ #define LLVM_LIB_TARGET_MIPS_MIPSMACHINEFUNCTION_H #include "Mips16HardFloatInfo.h" -#include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/IR/GlobalValue.h" #include "llvm/IR/ValueMap.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" @@ -30,31 +28,13 @@ namespace llvm { -/// \brief A class derived from PseudoSourceValue that represents a GOT entry -/// resolved by lazy-binding. -class MipsCallEntry : public PseudoSourceValue { -public: - explicit MipsCallEntry(StringRef N); - explicit MipsCallEntry(const GlobalValue *V); - bool isConstant(const MachineFrameInfo *) const override; - bool isAliased(const MachineFrameInfo *) const override; - bool mayAlias(const MachineFrameInfo *) const override; - -private: - void printCustom(raw_ostream &O) const override; -#ifndef NDEBUG - std::string Name; - const GlobalValue *Val; -#endif -}; - /// MipsFunctionInfo - This class is derived from MachineFunction private /// Mips target-specific information for each MachineFunction. class MipsFunctionInfo : public MachineFunctionInfo { public: MipsFunctionInfo(MachineFunction &MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0), - VarArgsFrameIndex(0), CallsEhReturn(false), SaveS2(false), + VarArgsFrameIndex(0), CallsEhReturn(false), IsISR(false), SaveS2(false), MoveF64ViaSpillFI(-1) {} ~MipsFunctionInfo(); @@ -86,13 +66,21 @@ public: int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; } bool isEhDataRegFI(int FI) const; - /// \brief Create a MachinePointerInfo that has a MipsCallEntr object - /// representing a GOT entry for an external function. - MachinePointerInfo callPtrInfo(StringRef Name); + /// Create a MachinePointerInfo that has an ExternalSymbolPseudoSourceValue + /// object representing a GOT entry for an external function. + MachinePointerInfo callPtrInfo(const char *ES); - /// \brief Create a MachinePointerInfo that has a MipsCallEntr object + // Functions with the "interrupt" attribute require special prologues, + // epilogues and additional spill slots. + bool isISR() const { return IsISR; } + void setISR() { IsISR = true; } + void createISRRegFI(); + int getISRRegFI(unsigned Reg) const { return ISRDataRegFI[Reg]; } + bool isISRRegFI(int FI) const; + + /// Create a MachinePointerInfo that has a GlobalValuePseudoSourceValue object /// representing a GOT entry for a global function. - MachinePointerInfo callPtrInfo(const GlobalValue *Val); + MachinePointerInfo callPtrInfo(const GlobalValue *GV); void setSaveS2() { SaveS2 = true; } bool hasSaveS2() const { return SaveS2; } @@ -136,17 +124,18 @@ private: /// Frame objects for spilling eh data registers. int EhDataRegFI[4]; + /// ISR - Whether the function is an Interrupt Service Routine. + bool IsISR; + + /// Frame objects for spilling C0_STATUS, C0_EPC + int ISRDataRegFI[2]; + // saveS2 bool SaveS2; /// FrameIndex for expanding BuildPairF64 nodes to spill and reload when the /// O32 FPXX ABI is enabled. -1 is used to denote invalid index. int MoveF64ViaSpillFI; - - /// MipsCallEntry maps. - StringMap> ExternalCallEntries; - ValueMap> - GlobalCallEntries; }; } // end of namespace llvm diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index f6647e6a8468..28e5a425849f 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -84,6 +84,16 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, const MCPhysReg * MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const MipsSubtarget &Subtarget = MF->getSubtarget(); + const Function *F = MF->getFunction(); + if (F->hasFnAttribute("interrupt")) { + if (Subtarget.hasMips64()) + return Subtarget.hasMips64r6() ? CSR_Interrupt_64R6_SaveList + : CSR_Interrupt_64_SaveList; + else + return Subtarget.hasMips32r6() ? CSR_Interrupt_32R6_SaveList + : CSR_Interrupt_32_SaveList; + } + if (Subtarget.isSingleFloat()) return CSR_SingleFloatOnly_SaveList; @@ -284,6 +294,16 @@ getFrameRegister(const MachineFunction &MF) const { } bool MipsRegisterInfo::canRealignStack(const MachineFunction &MF) const { + // Avoid realigning functions that explicitly do not want to be realigned. + // Normally, we should report an error when a function should be dynamically + // realigned but also has the attribute no-realign-stack. Unfortunately, + // with this attribute, MachineFrameInfo clamps each new object's alignment + // to that of the stack's alignment as specified by the ABI. As a result, + // the information of whether we have objects with larger alignment + // requirement than the stack's alignment is already lost at this point. + if (!TargetRegisterInfo::canRealignStack(MF)) + return false; + const MipsSubtarget &Subtarget = MF.getSubtarget(); unsigned FP = Subtarget.isGP32bit() ? Mips::FP : Mips::FP_64; unsigned BP = Subtarget.isGP32bit() ? Mips::S7 : Mips::S7_64; @@ -306,42 +326,3 @@ bool MipsRegisterInfo::canRealignStack(const MachineFunction &MF) const { // sized objects. return MF.getRegInfo().canReserveReg(BP); } - -bool MipsRegisterInfo::needsStackRealignment(const MachineFunction &MF) const { - const MipsSubtarget &Subtarget = MF.getSubtarget(); - const MachineFrameInfo *MFI = MF.getFrameInfo(); - - bool CanRealign = canRealignStack(MF); - - // Avoid realigning functions that explicitly do not want to be realigned. - // Normally, we should report an error when a function should be dynamically - // realigned but also has the attribute no-realign-stack. Unfortunately, - // with this attribute, MachineFrameInfo clamps each new object's alignment - // to that of the stack's alignment as specified by the ABI. As a result, - // the information of whether we have objects with larger alignment - // requirement than the stack's alignment is already lost at this point. - if (MF.getFunction()->hasFnAttribute("no-realign-stack")) - return false; - - const Function *F = MF.getFunction(); - if (F->hasFnAttribute(Attribute::StackAlignment)) { -#ifdef DEBUG - if (!CanRealign) - DEBUG(dbgs() << "It's not possible to realign the stack of the function: " - << F->getName() << "\n"); -#endif - return CanRealign; - } - - unsigned StackAlignment = Subtarget.getFrameLowering()->getStackAlignment(); - if (MFI->getMaxAlignment() > StackAlignment) { -#ifdef DEBUG - if (!CanRealign) - DEBUG(dbgs() << "It's not possible to realign the stack of the function: " - << F->getName() << "\n"); -#endif - return CanRealign; - } - - return false; -} diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index ee1f6bcd7390..5de68a21b73e 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -61,9 +61,7 @@ public: RegScavenger *RS = nullptr) const; // Stack realignment queries. - bool canRealignStack(const MachineFunction &MF) const; - - bool needsStackRealignment(const MachineFunction &MF) const override; + bool canRealignStack(const MachineFunction &MF) const override; /// Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const override; diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 096b3bee5d07..a4abd62ee607 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -17,6 +17,7 @@ #include "MipsMachineFunction.h" #include "MipsSEInstrInfo.h" #include "MipsSubtarget.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -319,6 +320,15 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB, bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool FP64) const { + const MachineOperand &Op1 = I->getOperand(1); + const MachineOperand &Op2 = I->getOperand(2); + + if ((Op1.isReg() && Op1.isUndef()) || (Op2.isReg() && Op2.isUndef())) { + unsigned DstReg = I->getOperand(0).getReg(); + BuildMI(MBB, I, I->getDebugLoc(), TII.get(Mips::IMPLICIT_DEF), DstReg); + return true; + } + // For fpxx and when mfhc1 is not available, use: // spill + reload via ldc1 // @@ -335,8 +345,8 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB, if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) || (FP64 && !Subtarget.useOddSPReg())) { unsigned DstReg = I->getOperand(0).getReg(); - unsigned SrcReg = I->getOperand(1).getReg(); - unsigned N = I->getOperand(2).getImm(); + unsigned SrcReg = Op1.getReg(); + unsigned N = Op2.getImm(); int64_t Offset = 4 * (Subtarget.isLittle() ? N : (1 - N)); // It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are @@ -352,8 +362,7 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB, // We re-use the same spill slot each time so that the stack frame doesn't // grow too much in functions with a large number of moves. int FI = MF.getInfo()->getMoveF64ViaSpillFI(RC); - TII.storeRegToStack(MBB, I, SrcReg, I->getOperand(1).isKill(), FI, RC, - &RegInfo, 0); + TII.storeRegToStack(MBB, I, SrcReg, Op1.isKill(), FI, RC, &RegInfo, 0); TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &RegInfo, Offset); return true; } @@ -376,12 +385,12 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, *static_cast(STI.getRegisterInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + DebugLoc dl; MipsABIInfo ABI = STI.getABI(); unsigned SP = ABI.GetStackPtr(); unsigned FP = ABI.GetFramePtr(); unsigned ZERO = ABI.GetNullPtr(); - unsigned ADDu = ABI.GetPtrAdduOp(); + unsigned MOVE = ABI.GetGPRMoveOp(); unsigned ADDiu = ABI.GetPtrAddiuOp(); unsigned AND = ABI.IsN64() ? Mips::AND64 : Mips::AND; @@ -407,6 +416,9 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); + if (MF.getFunction()->hasFnAttribute("interrupt")) + emitInterruptPrologueStub(MF, MBB); + const std::vector &CSI = MFI->getCalleeSavedInfo(); if (CSI.size()) { @@ -491,7 +503,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Insert instruction "move $fp, $sp" at this location. - BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO) + BuildMI(MBB, MBBI, dl, TII.get(MOVE), FP).addReg(SP).addReg(ZERO) .setMIFlag(MachineInstr::FrameSetup); // emit ".cfi_def_cfa_register $fp" @@ -514,7 +526,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, if (hasBP(MF)) { // move $s7, $sp unsigned BP = STI.isABI_N64() ? Mips::S7_64 : Mips::S7; - BuildMI(MBB, MBBI, dl, TII.get(ADDu), BP) + BuildMI(MBB, MBBI, dl, TII.get(MOVE), BP) .addReg(SP) .addReg(ZERO); } @@ -522,6 +534,135 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, } } +void MipsSEFrameLowering::emitInterruptPrologueStub( + MachineFunction &MF, MachineBasicBlock &MBB) const { + + MipsFunctionInfo *MipsFI = MF.getInfo(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Report an error the target doesn't support Mips32r2 or later. + // The epilogue relies on the use of the "ehb" to clear execution + // hazards. Pre R2 Mips relies on an implementation defined number + // of "ssnop"s to clear the execution hazard. Support for ssnop hazard + // clearing is not provided so reject that configuration. + if (!STI.hasMips32r2()) + report_fatal_error( + "\"interrupt\" attribute is not supported on pre-MIPS32R2 or " + "MIPS16 targets."); + + // The GP register contains the "user" value, so we cannot perform + // any gp relative loads until we restore the "kernel" or "system" gp + // value. Until support is written we shall only accept the static + // relocation model. + if ((STI.getRelocationModel() != Reloc::Static)) + report_fatal_error("\"interrupt\" attribute is only supported for the " + "static relocation model on MIPS at the present time."); + + if (!STI.isABI_O32() || STI.hasMips64()) + report_fatal_error("\"interrupt\" attribute is only supported for the " + "O32 ABI on MIPS32R2+ at the present time."); + + // Perform ISR handling like GCC + StringRef IntKind = + MF.getFunction()->getFnAttribute("interrupt").getValueAsString(); + const TargetRegisterClass *PtrRC = &Mips::GPR32RegClass; + + // EIC interrupt handling needs to read the Cause register to disable + // interrupts. + if (IntKind == "eic") { + // Coprocessor registers are always live per se. + MBB.addLiveIn(Mips::COP013); + BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K0) + .addReg(Mips::COP013) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + + BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::EXT), Mips::K0) + .addReg(Mips::K0) + .addImm(10) + .addImm(6) + .setMIFlag(MachineInstr::FrameSetup); + } + + // Fetch and spill EPC + MBB.addLiveIn(Mips::COP014); + BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K1) + .addReg(Mips::COP014) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + + STI.getInstrInfo()->storeRegToStack(MBB, MBBI, Mips::K1, false, + MipsFI->getISRRegFI(0), PtrRC, + STI.getRegisterInfo(), 0); + + // Fetch and Spill Status + MBB.addLiveIn(Mips::COP012); + BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K1) + .addReg(Mips::COP012) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + + STI.getInstrInfo()->storeRegToStack(MBB, MBBI, Mips::K1, false, + MipsFI->getISRRegFI(1), PtrRC, + STI.getRegisterInfo(), 0); + + // Build the configuration for disabling lower priority interrupts. Non EIC + // interrupts need to be masked off with zero, EIC from the Cause register. + unsigned InsPosition = 8; + unsigned InsSize = 0; + unsigned SrcReg = Mips::ZERO; + + // If the interrupt we're tied to is the EIC, switch the source for the + // masking off interrupts to the cause register. + if (IntKind == "eic") { + SrcReg = Mips::K0; + InsPosition = 10; + InsSize = 6; + } else + InsSize = StringSwitch(IntKind) + .Case("sw0", 1) + .Case("sw1", 2) + .Case("hw0", 3) + .Case("hw1", 4) + .Case("hw2", 5) + .Case("hw3", 6) + .Case("hw4", 7) + .Case("hw5", 8) + .Default(0); + assert(InsSize != 0 && "Unknown interrupt type!"); + + BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1) + .addReg(SrcReg) + .addImm(InsPosition) + .addImm(InsSize) + .addReg(Mips::K1) + .setMIFlag(MachineInstr::FrameSetup); + + // Mask off KSU, ERL, EXL + BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1) + .addReg(Mips::ZERO) + .addImm(1) + .addImm(4) + .addReg(Mips::K1) + .setMIFlag(MachineInstr::FrameSetup); + + // Disable the FPU as we are not spilling those register sets. + if (!STI.useSoftFloat()) + BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1) + .addReg(Mips::ZERO) + .addImm(29) + .addImm(1) + .addReg(Mips::K1) + .setMIFlag(MachineInstr::FrameSetup); + + // Set the new status + BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP012) + .addReg(Mips::K1) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); +} + void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); @@ -533,12 +674,12 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, const MipsRegisterInfo &RegInfo = *static_cast(STI.getRegisterInfo()); - DebugLoc dl = MBBI->getDebugLoc(); + DebugLoc DL = MBBI->getDebugLoc(); MipsABIInfo ABI = STI.getABI(); unsigned SP = ABI.GetStackPtr(); unsigned FP = ABI.GetFramePtr(); unsigned ZERO = ABI.GetNullPtr(); - unsigned ADDu = ABI.GetPtrAdduOp(); + unsigned MOVE = ABI.GetGPRMoveOp(); // if framepointer enabled, restore the stack pointer. if (hasFP(MF)) { @@ -549,7 +690,7 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, --I; // Insert instruction "move $sp, $fp" at this location. - BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO); + BuildMI(MBB, I, DL, TII.get(MOVE), SP).addReg(FP).addReg(ZERO); } if (MipsFI->callsEhReturn()) { @@ -568,6 +709,9 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, } } + if (MF.getFunction()->hasFnAttribute("interrupt")) + emitInterruptEpilogueStub(MF, MBB); + // Get the number of bytes from FrameInfo uint64_t StackSize = MFI->getStackSize(); @@ -578,13 +722,59 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, TII.adjustStackPtr(SP, StackSize, MBB, MBBI); } +void MipsSEFrameLowering::emitInterruptEpilogueStub( + MachineFunction &MF, MachineBasicBlock &MBB) const { + + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + MipsFunctionInfo *MipsFI = MF.getInfo(); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Perform ISR handling like GCC + const TargetRegisterClass *PtrRC = &Mips::GPR32RegClass; + + // Disable Interrupts. + BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::DI), Mips::ZERO); + BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::EHB)); + + // Restore EPC + STI.getInstrInfo()->loadRegFromStackSlot(MBB, MBBI, Mips::K1, + MipsFI->getISRRegFI(0), PtrRC, + STI.getRegisterInfo()); + BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP014) + .addReg(Mips::K1) + .addImm(0); + + // Restore Status + STI.getInstrInfo()->loadRegFromStackSlot(MBB, MBBI, Mips::K1, + MipsFI->getISRRegFI(1), PtrRC, + STI.getRegisterInfo()); + BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP012) + .addReg(Mips::K1) + .addImm(0); +} + +int MipsSEFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsABIInfo ABI = STI.getABI(); + + if (MFI->isFixedObjectIndex(FI)) + FrameReg = hasFP(MF) ? ABI.GetFramePtr() : ABI.GetStackPtr(); + else + FrameReg = hasBP(MF) ? ABI.GetBasePtr() : ABI.GetStackPtr(); + + return MFI->getObjectOffset(FI) + MFI->getStackSize() - + getOffsetOfLocalArea() + MFI->getOffsetAdjustment(); +} + bool MipsSEFrameLowering:: spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI) const { MachineFunction *MF = MBB.getParent(); - MachineBasicBlock *EntryBlock = MF->begin(); + MachineBasicBlock *EntryBlock = &MF->front(); const TargetInstrInfo &TII = *STI.getInstrInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { @@ -599,6 +789,26 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, if (!IsRAAndRetAddrIsTaken) EntryBlock->addLiveIn(Reg); + // ISRs require HI/LO to be spilled into kernel registers to be then + // spilled to the stack frame. + bool IsLOHI = (Reg == Mips::LO0 || Reg == Mips::LO0_64 || + Reg == Mips::HI0 || Reg == Mips::HI0_64); + const Function *Func = MBB.getParent()->getFunction(); + if (IsLOHI && Func->hasFnAttribute("interrupt")) { + DebugLoc DL = MI->getDebugLoc(); + + unsigned Op = 0; + if (!STI.getABI().ArePtrs64bit()) { + Op = (Reg == Mips::HI0) ? Mips::MFHI : Mips::MFLO; + Reg = Mips::K0; + } else { + Op = (Reg == Mips::HI0) ? Mips::MFHI64 : Mips::MFLO64; + Reg = Mips::K0_64; + } + BuildMI(MBB, MI, DL, TII.get(Op), Mips::K0) + .setMIFlag(MachineInstr::FrameSetup); + } + // Insert the spill to the stack frame. bool IsKill = !IsRAAndRetAddrIsTaken; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); @@ -622,7 +832,8 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { } /// Mark \p Reg and all registers aliasing it in the bitset. -void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs, unsigned Reg) { +static void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs, + unsigned Reg) { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) SavedRegs.set(*AI); @@ -648,6 +859,10 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, if (MipsFI->callsEhReturn()) MipsFI->createEhDataRegsFI(); + // Create spill slots for Coprocessor 0 registers if function is an ISR. + if (MipsFI->isISR()) + MipsFI->createISRRegFI(); + // Expand pseudo instructions which load, store or copy accumulators. // Add an emergency spill slot if a pseudo was expanded. if (ExpandPseudo(MF).expand()) { diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h index 9cb32e6c7829..63cd3cebc56a 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.h +++ b/lib/Target/Mips/MipsSEFrameLowering.h @@ -27,6 +27,9 @@ public: void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, @@ -37,8 +40,13 @@ public: void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; unsigned ehDataReg(unsigned I) const; -}; +private: + void emitInterruptEpilogueStub(MachineFunction &MF, + MachineBasicBlock &MBB) const; + void emitInterruptPrologueStub(MachineFunction &MF, + MachineBasicBlock &MBB) const; +}; } // End llvm namespace #endif diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 2ebfbd17d7d0..6f001ea74b30 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -136,7 +136,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { MachineBasicBlock::iterator I = MBB.begin(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + DebugLoc DL; unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg(); const TargetRegisterClass *RC; const MipsABIInfo &ABI = static_cast(TM).getABI(); diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index b319fd07884b..efe22fba98ce 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1181,6 +1181,10 @@ bool MipsSETargetLowering::isEligibleForTailCallOptimization( if (!EnableMipsTailCalls) return false; + // Exception has to be cleared with eret. + if (FI.isISR()) + return false; + // Return false if either the callee or caller has a byval argument. if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) return false; @@ -1786,9 +1790,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); case Intrinsic::mips_fadd_w: - case Intrinsic::mips_fadd_d: + case Intrinsic::mips_fadd_d: { + // TODO: If intrinsics have fast-math-flags, propagate them. return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); + } // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away case Intrinsic::mips_fceq_w: case Intrinsic::mips_fceq_d: @@ -1831,9 +1837,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), ISD::SETUNE); case Intrinsic::mips_fdiv_w: - case Intrinsic::mips_fdiv_d: + case Intrinsic::mips_fdiv_d: { + // TODO: If intrinsics have fast-math-flags, propagate them. return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); + } case Intrinsic::mips_ffint_u_w: case Intrinsic::mips_ffint_u_d: return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), @@ -1856,6 +1864,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, } case Intrinsic::mips_fexp2_w: case Intrinsic::mips_fexp2_d: { + // TODO: If intrinsics have fast-math-flags, propagate them. EVT ResTy = Op->getValueType(0); return DAG.getNode( ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), @@ -1869,11 +1878,14 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); case Intrinsic::mips_fmul_w: - case Intrinsic::mips_fmul_d: + case Intrinsic::mips_fmul_d: { + // TODO: If intrinsics have fast-math-flags, propagate them. return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); + } case Intrinsic::mips_fmsub_w: case Intrinsic::mips_fmsub_d: { + // TODO: If intrinsics have fast-math-flags, propagate them. EVT ResTy = Op->getValueType(0); return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1), DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy, @@ -1886,9 +1898,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_fsqrt_d: return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); case Intrinsic::mips_fsub_w: - case Intrinsic::mips_fsub_d: + case Intrinsic::mips_fsub_d: { + // TODO: If intrinsics have fast-math-flags, propagate them. return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), Op->getOperand(2)); + } case Intrinsic::mips_ftrunc_u_w: case Intrinsic::mips_ftrunc_u_d: return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index 786307b95f88..e6f7fe9aae1d 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -88,7 +88,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (isMicroMips) Opc = Mips::MOVE16_MM; else - Opc = Mips::ADDu, ZeroReg = Mips::ZERO; + Opc = Mips::OR, ZeroReg = Mips::ZERO; } else if (Mips::CCRRegClass.contains(SrcReg)) Opc = Mips::CFC1; else if (Mips::FGR32RegClass.contains(SrcReg)) @@ -141,7 +141,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = Mips::FMOV_D64; else if (Mips::GPR64RegClass.contains(DestReg)) { // Copy to CPU64 Reg. if (Mips::GPR64RegClass.contains(SrcReg)) - Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64; + Opc = Mips::OR64, ZeroReg = Mips::ZERO_64; else if (Mips::HI64RegClass.contains(SrcReg)) Opc = Mips::MFHI64, SrcReg = 0; else if (Mips::LO64RegClass.contains(SrcReg)) @@ -182,7 +182,6 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, int64_t Offset) const { DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); unsigned Opc = 0; @@ -213,6 +212,33 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = Mips::ST_W; else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64)) Opc = Mips::ST_D; + else if (Mips::LO32RegClass.hasSubClassEq(RC)) + Opc = Mips::SW; + else if (Mips::LO64RegClass.hasSubClassEq(RC)) + Opc = Mips::SD; + else if (Mips::HI32RegClass.hasSubClassEq(RC)) + Opc = Mips::SW; + else if (Mips::HI64RegClass.hasSubClassEq(RC)) + Opc = Mips::SD; + + // Hi, Lo are normally caller save but they are callee save + // for interrupt handling. + const Function *Func = MBB.getParent()->getFunction(); + if (Func->hasFnAttribute("interrupt")) { + if (Mips::HI32RegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Mips::MFHI), Mips::K0); + SrcReg = Mips::K0; + } else if (Mips::HI64RegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Mips::MFHI64), Mips::K0_64); + SrcReg = Mips::K0_64; + } else if (Mips::LO32RegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Mips::MFLO), Mips::K0); + SrcReg = Mips::K0; + } else if (Mips::LO64RegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Mips::MFLO64), Mips::K0_64); + SrcReg = Mips::K0_64; + } + } assert(Opc && "Register class not handled!"); BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) @@ -228,6 +254,11 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); unsigned Opc = 0; + const Function *Func = MBB.getParent()->getFunction(); + bool ReqIndirectLoad = Func->hasFnAttribute("interrupt") && + (DestReg == Mips::LO0 || DestReg == Mips::LO0_64 || + DestReg == Mips::HI0 || DestReg == Mips::HI0_64); + if (Mips::GPR32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; else if (Mips::GPR64RegClass.hasSubClassEq(RC)) @@ -254,10 +285,44 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = Mips::LD_W; else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64)) Opc = Mips::LD_D; + else if (Mips::HI32RegClass.hasSubClassEq(RC)) + Opc = Mips::LW; + else if (Mips::HI64RegClass.hasSubClassEq(RC)) + Opc = Mips::LD; + else if (Mips::LO32RegClass.hasSubClassEq(RC)) + Opc = Mips::LW; + else if (Mips::LO64RegClass.hasSubClassEq(RC)) + Opc = Mips::LD; assert(Opc && "Register class not handled!"); - BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset) - .addMemOperand(MMO); + + if (!ReqIndirectLoad) + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addFrameIndex(FI) + .addImm(Offset) + .addMemOperand(MMO); + else { + // Load HI/LO through K0. Notably the DestReg is encoded into the + // instruction itself. + unsigned Reg = Mips::K0; + unsigned LdOp = Mips::MTLO; + if (DestReg == Mips::HI0) + LdOp = Mips::MTHI; + + if (Subtarget.getABI().ArePtrs64bit()) { + Reg = Mips::K0_64; + if (DestReg == Mips::HI0_64) + LdOp = Mips::MTHI64; + else + LdOp = Mips::MTLO64; + } + + BuildMI(MBB, I, DL, get(Opc), Reg) + .addFrameIndex(FI) + .addImm(Offset) + .addMemOperand(MMO); + BuildMI(MBB, I, DL, get(LdOp)).addReg(Reg); + } } bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { @@ -271,6 +336,9 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case Mips::RetRA: expandRetRA(MBB, MI); break; + case Mips::ERet: + expandERet(MBB, MI); + break; case Mips::PseudoMFHI: Opc = isMicroMips ? Mips::MFHI16_MM : Mips::MFHI; expandPseudoMFHiLo(MBB, MI, Opc); @@ -360,7 +428,7 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { MipsABIInfo ABI = Subtarget.getABI(); - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + DebugLoc DL; unsigned ADDu = ABI.GetPtrAdduOp(); unsigned ADDiu = ABI.GetPtrAddiuOp(); @@ -438,6 +506,11 @@ void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB, BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn)).addReg(Mips::RA); } +void MipsSEInstrInfo::expandERet(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + BuildMI(MBB, I, I->getDebugLoc(), get(Mips::ERET)); +} + std::pair MipsSEInstrInfo::compareOpndSize(unsigned Opc, const MachineFunction &MF) const { diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index bebbabf7b838..5d73545ef6b9 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -82,6 +82,8 @@ private: void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + void expandERet(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + std::pair compareOpndSize(unsigned Opc, const MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index 132c3a1001ad..b1e2885f5ba3 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -126,17 +126,19 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, } bool EhDataRegFI = MipsFI->isEhDataRegFI(FrameIndex); - + bool IsISRRegFI = MipsFI->isISRRegFI(FrameIndex); // The following stack frame objects are always referenced relative to $sp: // 1. Outgoing arguments. // 2. Pointer to dynamically allocated stack space. // 3. Locations for callee-saved registers. // 4. Locations for eh data registers. + // 5. Locations for ISR saved Coprocessor 0 registers 12 & 14. // Everything else is referenced relative to whatever register // getFrameRegister() returns. unsigned FrameReg; - if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI) + if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI || + IsISRRegFI) FrameReg = ABI.GetStackPtr(); else if (RegInfo->needsStackRealignment(MF)) { if (MFI->hasVarSizedObjects() && !MFI->isFixedObjectIndex(FrameIndex)) diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td index 54b5d2811701..37f9e491d546 100644 --- a/lib/Target/Mips/MipsSchedule.td +++ b/lib/Target/Mips/MipsSchedule.td @@ -16,8 +16,8 @@ def IMULDIV : FuncUnit; //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for Mips //===----------------------------------------------------------------------===// -def IIAlu : InstrItinClass; -def IIBranch : InstrItinClass; +// IIM16Alu is a placeholder class for most MIPS16 instructions. +def IIM16Alu : InstrItinClass; def IIPseudo : InstrItinClass; def II_ABS : InstrItinClass; @@ -28,7 +28,19 @@ def II_ADD_D : InstrItinClass; def II_ADD_S : InstrItinClass; def II_AND : InstrItinClass; def II_ANDI : InstrItinClass; +def II_B : InstrItinClass; def II_BADDU : InstrItinClass; +def II_BBIT : InstrItinClass; // bbit[01], bbit[01]32 +def II_BC : InstrItinClass; +def II_BC1F : InstrItinClass; +def II_BC1FL : InstrItinClass; +def II_BC1T : InstrItinClass; +def II_BC1TL : InstrItinClass; +def II_BCC : InstrItinClass; // beq and bne +def II_BCCZ : InstrItinClass; // b[gl][et]z +def II_BCCZAL : InstrItinClass; // bgezal and bltzal +def II_BCCZALS : InstrItinClass; // bgezals and bltzals +def II_BCCZC : InstrItinClass; // beqzc, bnezc def II_CEIL : InstrItinClass; def II_CFC1 : InstrItinClass; def II_CLO : InstrItinClass; @@ -68,21 +80,39 @@ def II_DSUB : InstrItinClass; def II_EXT : InstrItinClass; // Any EXT instruction def II_FLOOR : InstrItinClass; def II_INS : InstrItinClass; // Any INS instruction +def II_IndirectBranchPseudo : InstrItinClass; // Indirect branch pseudo. +def II_J : InstrItinClass; +def II_JAL : InstrItinClass; +def II_JALR : InstrItinClass; +def II_JALRC : InstrItinClass; +def II_JALRS : InstrItinClass; +def II_JALS : InstrItinClass; +def II_JR : InstrItinClass; +def II_JRADDIUSP : InstrItinClass; +def II_JRC : InstrItinClass; +def II_ReturnPseudo : InstrItinClass; // Return pseudo. def II_LB : InstrItinClass; +def II_LBE : InstrItinClass; def II_LBU : InstrItinClass; +def II_LBUE : InstrItinClass; def II_LD : InstrItinClass; def II_LDC1 : InstrItinClass; def II_LDL : InstrItinClass; def II_LDR : InstrItinClass; def II_LDXC1 : InstrItinClass; def II_LH : InstrItinClass; +def II_LHE : InstrItinClass; def II_LHU : InstrItinClass; +def II_LHUE : InstrItinClass; def II_LUI : InstrItinClass; def II_LUXC1 : InstrItinClass; def II_LW : InstrItinClass; +def II_LWE : InstrItinClass; def II_LWC1 : InstrItinClass; def II_LWL : InstrItinClass; +def II_LWLE : InstrItinClass; def II_LWR : InstrItinClass; +def II_LWRE : InstrItinClass; def II_LWU : InstrItinClass; def II_LWXC1 : InstrItinClass; def II_MADD : InstrItinClass; @@ -134,6 +164,7 @@ def II_ROTRV : InstrItinClass; def II_ROUND : InstrItinClass; def II_SAVE : InstrItinClass; def II_SB : InstrItinClass; +def II_SBE : InstrItinClass; def II_SD : InstrItinClass; def II_SDC1 : InstrItinClass; def II_SDL : InstrItinClass; @@ -144,6 +175,7 @@ def II_SEH : InstrItinClass; def II_SEQ_SNE : InstrItinClass; // seq and sne def II_SEQI_SNEI : InstrItinClass; // seqi and snei def II_SH : InstrItinClass; +def II_SHE : InstrItinClass; def II_SLL : InstrItinClass; def II_SLLV : InstrItinClass; def II_SLTI_SLTIU : InstrItinClass; // slti and sltiu @@ -159,11 +191,15 @@ def II_SUB_D : InstrItinClass; def II_SUB_S : InstrItinClass; def II_SUXC1 : InstrItinClass; def II_SW : InstrItinClass; +def II_SWE : InstrItinClass; def II_SWC1 : InstrItinClass; def II_SWL : InstrItinClass; +def II_SWLE : InstrItinClass; def II_SWR : InstrItinClass; +def II_SWRE : InstrItinClass; def II_SWXC1 : InstrItinClass; def II_TRUNC : InstrItinClass; +def II_WSBH : InstrItinClass; def II_XOR : InstrItinClass; def II_XORI : InstrItinClass; @@ -171,7 +207,7 @@ def II_XORI : InstrItinClass; // Mips Generic instruction itineraries. //===----------------------------------------------------------------------===// def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ - InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, @@ -240,7 +276,29 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, - InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, @@ -313,3 +371,5 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData]>, InstrItinData]> ]>; + +include "MipsScheduleP5600.td" diff --git a/lib/Target/Mips/MipsScheduleP5600.td b/lib/Target/Mips/MipsScheduleP5600.td new file mode 100644 index 000000000000..d32ae4f55eaf --- /dev/null +++ b/lib/Target/Mips/MipsScheduleP5600.td @@ -0,0 +1,392 @@ +//==- MipsScheduleP5600.td - P5600 Scheduling Definitions --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def MipsP5600Model : SchedMachineModel { + int IssueWidth = 2; // 2x dispatched per cycle + int MicroOpBufferSize = 48; // min(48, 48, 64) + int LoadLatency = 4; + int MispredictPenalty = 8; // TODO: Estimated + + let CompleteModel = 1; +} + +let SchedModel = MipsP5600Model in { + +// ALQ Pipelines +// ============= + +def P5600ALQ : ProcResource<1> { let BufferSize = 16; } +def P5600IssueALU : ProcResource<1> { let Super = P5600ALQ; } + +// ALU Pipeline +// ------------ + +def P5600WriteALU : SchedWriteRes<[P5600IssueALU]>; + +// and, lui, nor, or, slti, sltiu, sub, subu, xor +def : ItinRW<[P5600WriteALU], + [II_AND, II_LUI, II_NOR, II_OR, II_SLTI_SLTIU, II_SUBU, II_XOR]>; + +// AGQ Pipelines +// ============= + +def P5600AGQ : ProcResource<3> { let BufferSize = 16; } +def P5600IssueAL2 : ProcResource<1> { let Super = P5600AGQ; } +def P5600IssueCTISTD : ProcResource<1> { let Super = P5600AGQ; } +def P5600IssueLDST : ProcResource<1> { let Super = P5600AGQ; } + +def P5600AL2Div : ProcResource<1>; +// Pseudo-resource used to block CTISTD when handling multi-pipeline splits. +def P5600CTISTD : ProcResource<1>; + +// CTISTD Pipeline +// --------------- + +def P5600WriteJump : SchedWriteRes<[P5600IssueCTISTD, P5600CTISTD]>; +def P5600WriteJumpAndLink : SchedWriteRes<[P5600IssueCTISTD, P5600CTISTD]> { + let Latency = 2; +} + +// b, beq, beql, bg[et]z, bl[et]z, bne, bnel, j, syscall, jal, bltzal, jalx, +// jalr, jr.hb, jr +def : ItinRW<[P5600WriteJump], [II_B, II_BCC, II_BCCZ, II_BCCZAL, II_J, II_JR]>; +def : ItinRW<[P5600WriteJumpAndLink], [II_JAL, II_JALR]>; + +// LDST Pipeline +// ------------- + +def P5600WriteLoad : SchedWriteRes<[P5600IssueLDST]> { + let Latency = 4; +} + +def P5600WriteLoadShifted : SchedWriteRes<[P5600IssueLDST, P5600CTISTD]> { + let Latency = 4; +} + +def P5600WritePref : SchedWriteRes<[P5600IssueLDST]>; + +def P5600WriteStore : SchedWriteRes<[P5600IssueLDST, P5600CTISTD]> { + // FIXME: This is a bit pessimistic. P5600CTISTD is only used during cycle 2 + // not during 0, 1, and 2. + let ResourceCycles = [ 1, 3 ]; +} + +def P5600WriteGPRFromBypass : SchedWriteRes<[P5600IssueLDST]> { + let Latency = 2; +} + +def P5600WriteStoreFromOtherUnits : SchedWriteRes<[P5600IssueLDST]>; +def P5600WriteLoadToOtherUnits : SchedWriteRes<[P5600IssueLDST]> { + let Latency = 0; +} + +// l[bhw], l[bh]u, ll +def : ItinRW<[P5600WriteLoad], [II_LB, II_LBU, II_LH, II_LHU, II_LW, II_LWU]>; + +// lw[lr] +def : ItinRW<[P5600WriteLoadShifted], [II_LWL, II_LWR]>; + +// s[bhw], sw[lr] +def : ItinRW<[P5600WriteStore], [II_SB, II_SH, II_SW, II_SWL, II_SWR]>; + +// pref +// (this instruction does not exist in the backend yet) +def : ItinRW<[P5600WritePref], []>; + +// sc +// (this instruction does not exist in the backend yet) +def : ItinRW<[P5600WriteStore], []>; + +// LDST is also used in moves from general purpose registers to floating point +// and MSA. +def P5600WriteMoveGPRToOtherUnits : SchedWriteRes<[P5600IssueLDST]> { + let Latency = 0; +} + +// AL2 Pipeline +// ------------ + +def P5600WriteAL2 : SchedWriteRes<[P5600IssueAL2]>; +def P5600WriteAL2BitExt : SchedWriteRes<[P5600IssueAL2]> { let Latency = 2; } +def P5600WriteAL2ShadowMov : SchedWriteRes<[P5600IssueAL2]> { let Latency = 2; } +def P5600WriteAL2CondMov : SchedWriteRes<[P5600IssueAL2, P5600CTISTD]> { + let Latency = 2; +} +def P5600WriteAL2Div : SchedWriteRes<[P5600IssueAL2, P5600AL2Div]> { + // Estimated worst case + let Latency = 34; + let ResourceCycles = [1, 34]; +} +def P5600WriteAL2DivU : SchedWriteRes<[P5600IssueAL2, P5600AL2Div]> { + // Estimated worst case + let Latency = 34; + let ResourceCycles = [1, 34]; +} +def P5600WriteAL2Mul : SchedWriteRes<[P5600IssueAL2]> { let Latency = 3; } +def P5600WriteAL2Mult: SchedWriteRes<[P5600IssueAL2]> { let Latency = 5; } +def P5600WriteAL2MAdd: SchedWriteRes<[P5600IssueAL2, P5600CTISTD]> { + let Latency = 5; +} + +// clo, clz, di, mfhi, mflo +def : ItinRW<[P5600WriteAL2], [II_CLO, II_CLZ, II_MFHI_MFLO]>; + +// ehb, rdhwr, rdpgpr, wrpgpr, wsbh +def : ItinRW<[P5600WriteAL2ShadowMov], [II_RDHWR]>; + +// mov[nz] +def : ItinRW<[P5600WriteAL2CondMov], [II_MOVN, II_MOVZ]>; + +// divu? +def : ItinRW<[P5600WriteAL2Div], [II_DIV]>; +def : ItinRW<[P5600WriteAL2DivU], [II_DIVU]>; + +// mul +def : ItinRW<[P5600WriteAL2Mul], [II_MUL]>; +// multu?, multu? +def : ItinRW<[P5600WriteAL2Mult], [II_MULT, II_MULTU]>; +// maddu?, msubu?, mthi, mtlo +def : ItinRW<[P5600WriteAL2MAdd], + [II_MADD, II_MADDU, II_MSUB, II_MSUBU, II_MTHI_MTLO]>; + +// ext, ins +def : ItinRW<[P5600WriteAL2BitExt], + [II_EXT, II_INS]>; + +// Either ALU or AL2 Pipelines +// --------------------------- +// +// Some instructions can choose between ALU and AL2, but once dispatched to +// ALQ or AGQ respectively they are committed to that path. +// The decision is based on the outcome of the most recent selection when the +// choice was last available. For now, we assume ALU is always chosen. + +def P5600WriteEitherALU : SchedWriteVariant< + // FIXME: Implement selection predicate + [SchedVar, [P5600WriteALU]>, + SchedVar, [P5600WriteAL2]> + ]>; + +// add, addi, addiu, addu, andi, ori, rotr, se[bh], sllv?, sr[al]v?, slt, sltu, +// xori +def : ItinRW<[P5600WriteEitherALU], + [II_ADDI, II_ADDIU, II_ANDI, II_ORI, II_ROTR, II_SEB, II_SEH, + II_SLT_SLTU, II_SLL, II_SRA, II_SRL, II_XORI, II_ADDU, II_SLLV, + II_SRAV, II_SRLV]>; + +// FPU Pipelines +// ============= + +def P5600FPQ : ProcResource<3> { let BufferSize = 16; } +def P5600IssueFPUS : ProcResource<1> { let Super = P5600FPQ; } +def P5600IssueFPUL : ProcResource<1> { let Super = P5600FPQ; } +def P5600IssueFPULoad : ProcResource<1> { let Super = P5600FPQ; } + +def P5600FPUDivSqrt : ProcResource<2>; + +def P5600WriteFPUS : SchedWriteRes<[P5600IssueFPUS]>; +def P5600WriteFPUL : SchedWriteRes<[P5600IssueFPUL]> { let Latency = 4; } +def P5600WriteFPUL_MADDSUB : SchedWriteRes<[P5600IssueFPUL]> { let Latency = 6; } +def P5600WriteFPUDivS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> { + // Best/Common/Worst case = 7 / 23 / 27 + let Latency = 23; // Using common case + let ResourceCycles = [ 1, 23 ]; +} +def P5600WriteFPUDivD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> { + // Best/Common/Worst case = 7 / 31 / 35 + let Latency = 31; // Using common case + let ResourceCycles = [ 1, 31 ]; +} +def P5600WriteFPURcpS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> { + // Best/Common/Worst case = 7 / 19 / 23 + let Latency = 19; // Using common case + let ResourceCycles = [ 1, 19 ]; +} +def P5600WriteFPURcpD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> { + // Best/Common/Worst case = 7 / 27 / 31 + let Latency = 27; // Using common case + let ResourceCycles = [ 1, 27 ]; +} +def P5600WriteFPURsqrtS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> { + // Best/Common/Worst case = 7 / 27 / 27 + let Latency = 27; // Using common case + let ResourceCycles = [ 1, 27 ]; +} +def P5600WriteFPURsqrtD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> { + // Best/Common/Worst case = 7 / 27 / 31 + let Latency = 27; // Using common case + let ResourceCycles = [ 1, 27 ]; +} +def P5600WriteFPUSqrtS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> { + // Best/Common/Worst case = 7 / 27 / 31 + let Latency = 27; // Using common case + let ResourceCycles = [ 1, 27 ]; +} +def P5600WriteFPUSqrtD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> { + // Best/Common/Worst case = 7 / 35 / 39 + let Latency = 35; // Using common case + let ResourceCycles = [ 1, 35 ]; +} +def P5600WriteMSAShortLogic : SchedWriteRes<[P5600IssueFPUS]>; +def P5600WriteMSAShortInt : SchedWriteRes<[P5600IssueFPUS]> { let Latency = 2; } +def P5600WriteMoveOtherUnitsToFPU : SchedWriteRes<[P5600IssueFPUS]>; + +// FPUS is also used in moves from floating point and MSA registers to general +// purpose registers. +def P5600WriteMoveFPUSToOtherUnits : SchedWriteRes<[P5600IssueFPUS]> { + let Latency = 0; +} + +// FPUL is also used in moves from floating point and MSA registers to general +// purpose registers. +def P5600WriteMoveFPULToOtherUnits : SchedWriteRes<[P5600IssueFPUL]>; + +// Short Pipe +// ---------- +// +// abs.[ds], abs.ps, bc1[tf]l?, mov[tf].[ds], mov[tf], mov.[ds], [cm][ft]c1, +// m[ft]hc1, neg.[ds], neg.ps, nor.v, nori.b, or.v, ori.b, xor.v, xori.b, +// sdxc1, sdc1, st.[bhwd], swc1, swxc1 +def : ItinRW<[P5600WriteFPUS], [II_ABS, II_MOVF_D, II_MOVF_S, II_MOVT_D, + II_MOVT_S, II_MOV_D, II_MOV_S, II_NEG]>; + +// adds_a.[bhwd], adds_[asu].[bhwd], addvi?.[bhwd], asub_[us].[bhwd], +// aver?_[us].[bhwd] +def : InstRW<[P5600WriteMSAShortInt], (instregex "^ADD_A_[BHWD]$")>; +def : InstRW<[P5600WriteMSAShortInt], (instregex "^ADDS_[ASU]_[BHWD]$")>; +// TODO: ADDVI_[BHW] might be 1 cycle latency rather than 2. Need to confirm it. +def : InstRW<[P5600WriteMSAShortInt], (instregex "^ADDVI?_[BHWD]$")>; +def : InstRW<[P5600WriteMSAShortInt], (instregex "^ASUB_[US].[BHWD]$")>; +def : InstRW<[P5600WriteMSAShortInt], (instregex "^AVER?_[US].[BHWD]$")>; + +// and.v, andi.b, move.v, ldi.[bhwd] +def : InstRW<[P5600WriteMSAShortLogic], (instregex "^MOVE_V$")>; +def : InstRW<[P5600WriteMSAShortLogic], (instregex "^LDI_[BHWD]$")>; +def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)_V$")>; +def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)I_B$")>; + +// Long Pipe +// ---------- +// +// add.[ds], add.ps, cvt.d.[sw], cvt.s.[dw], cvt.w.[sd], cvt.[sw].ps, +// cvt.ps.[sw], c..[ds], c..ps, mul.[ds], mul.ps, sub.[ds], sub.ps, +// trunc.w.[ds], trunc.w.ps +def : ItinRW<[P5600WriteFPUL], + [II_ADD_D, II_ADD_S, II_CVT, II_C_CC_D, II_C_CC_S, II_MUL_D, + II_MUL_S, II_SUB_D, II_SUB_S, II_TRUNC]>; + +// div.[ds], div.ps +def : ItinRW<[P5600WriteFPUDivS], [II_DIV_S]>; +def : ItinRW<[P5600WriteFPUDivD], [II_DIV_D]>; + +// sqrt.[ds], sqrt.ps +def : ItinRW<[P5600WriteFPUSqrtS], [II_SQRT_S]>; +def : ItinRW<[P5600WriteFPUSqrtD], [II_SQRT_D]>; + +// madd.[ds], msub.[ds], nmadd.[ds], nmsub.[ds], +// Operand 0 is read on cycle 5. All other operands are read on operand 0. +def : ItinRW<[SchedReadAdvance<5>, P5600WriteFPUL_MADDSUB], + [II_MADD_D, II_MADD_S, II_MSUB_D, II_MSUB_S, II_NMADD_D, + II_NMADD_S, II_NMSUB_D, II_NMSUB_S]>; + +// madd.ps, msub.ps, nmadd.ps, nmsub.ps +// Operand 0 and 1 are read on cycle 5. All others are read on operand 0. +// (none of these instructions exist in the backend yet) + +// Load Pipe +// --------- +// +// This is typically used in conjunction with the load pipeline under the AGQ +// All the instructions are in the 'Tricky Instructions' section. + +def P5600WriteLoadOtherUnitsToFPU : SchedWriteRes<[P5600IssueFPULoad]> { + let Latency = 4; +} + +// Tricky Instructions +// =================== +// +// These instructions are split across multiple uops (in different pipelines) +// that must cooperate to complete the operation + +// FIXME: This isn't quite right since the implementation of WriteSequence +// current aggregates the resources and ignores the exact cycle they are +// used. +def P5600WriteMoveGPRToFPU : WriteSequence<[P5600WriteMoveGPRToOtherUnits, + P5600WriteMoveOtherUnitsToFPU]>; + +// FIXME: This isn't quite right since the implementation of WriteSequence +// current aggregates the resources and ignores the exact cycle they are +// used. +def P5600WriteMoveFPUToGPR : WriteSequence<[P5600WriteMoveFPUSToOtherUnits, + P5600WriteGPRFromBypass]>; + +// FIXME: This isn't quite right since the implementation of WriteSequence +// current aggregates the resources and ignores the exact cycle they are +// used. +def P5600WriteStoreFPUS : WriteSequence<[P5600WriteMoveFPUSToOtherUnits, + P5600WriteStoreFromOtherUnits]>; + +// FIXME: This isn't quite right since the implementation of WriteSequence +// current aggregates the resources and ignores the exact cycle they are +// used. +def P5600WriteStoreFPUL : WriteSequence<[P5600WriteMoveFPULToOtherUnits, + P5600WriteStoreFromOtherUnits]>; + +// FIXME: This isn't quite right since the implementation of WriteSequence +// current aggregates the resources and ignores the exact cycle they are +// used. +def P5600WriteLoadFPU : WriteSequence<[P5600WriteLoadToOtherUnits, + P5600WriteLoadOtherUnitsToFPU]>; + +// ctc1, mtc1, mthc1 +def : ItinRW<[P5600WriteMoveGPRToFPU], [II_CTC1, II_MTC1, II_MTHC1]>; + +// bc1[ft], cfc1, mfc1, mfhc1, movf, movt +def : ItinRW<[P5600WriteMoveFPUToGPR], + [II_BC1F, II_BC1T, II_CFC1, II_MFC1, II_MFHC1, II_MOVF, II_MOVT]>; + +// swc1, swxc1, st.[bhwd] +def : ItinRW<[P5600WriteStoreFPUS], [II_SWC1, II_SWXC1]>; +def : InstRW<[P5600WriteStoreFPUS], (instregex "^ST_[BHWD]$")>; + +// movn.[ds], movz.[ds] +def : ItinRW<[P5600WriteStoreFPUL], [II_MOVN_D, II_MOVN_S, II_MOVZ_D, II_MOVZ_S]>; + +// l[dw]x?c1, ld.[bhwd] +def : ItinRW<[P5600WriteLoadFPU], [II_LDC1, II_LDXC1, II_LWC1, II_LWXC1]>; +def : InstRW<[P5600WriteLoadFPU], (instregex "LD_[BHWD]")>; + +// Unsupported Instructions +// ======================== +// +// The following instruction classes are never valid on P5600. +// II_DADDIU, II_DADDU, II_DMFC1, II_DMTC1, II_DMULT, II_DMULTU, II_DROTR, +// II_DROTR32, II_DROTRV, II_DDIV, II_DSLL, II_DSLL32, II_DSLLV, II_DSRA, +// II_DSRA32, II_DSRAV, II_DSRL, II_DSRL32, II_DSRLV, II_DSUBU, II_DDIVU, +// II_JALRC, II_LD, II_LD[LR], II_LUXC1, II_RESTORE, II_SAVE, II_SD, II_SDC1, +// II_SDL, II_SDR, II_SDXC1 +// +// The following instructions are never valid on P5600. +// addq.ph, rdhwr, repl.ph, repl.qb, subq.ph, subu_s.qb +// +// Guesswork +// ========= +// +// This section is largely temporary guesswork. + +// ceil.[lw].[ds], floor.[lw].[ds] +// Reason behind guess: trunc.[lw].ds and the various cvt's are in FPUL +def : ItinRW<[P5600WriteFPUL], [II_CEIL, II_FLOOR, II_ROUND]>; + +// rotrv +// Reason behind guess: rotr is in the same category and the two register forms +// generally follow the immediate forms in this category +def : ItinRW<[P5600WriteEitherALU], [II_ROTRV]>; +} diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 471b6e19a8bb..8a18b517d16b 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -69,8 +69,9 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU, HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false), HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false), InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false), - HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), - HasMSA(false), TM(TM), TargetTriple(TT), TSInfo(), + HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), + Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasEVA(false), TM(TM), + TargetTriple(TT), TSInfo(), InstrInfo( MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))), FrameLowering(MipsFrameLowering::create(*this)), diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 1db8881404c9..fbb01fe77029 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -42,9 +42,15 @@ class MipsSubtarget : public MipsGenSubtargetInfo { Mips3, Mips4, Mips5, Mips64, Mips64r2, Mips64r3, Mips64r5, Mips64r6 }; + enum class CPU { P5600 }; + // Mips architecture version MipsArchEnum MipsArchVersion; + // Processor implementation (unused but required to exist by + // tablegen-erated code). + CPU ProcImpl; + // IsLittle - The target is Little Endian bool IsLittle; @@ -116,8 +122,8 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // InMicroMips -- can process MicroMips instructions bool InMicroMipsMode; - // HasDSP, HasDSPR2 -- supports DSP ASE. - bool HasDSP, HasDSPR2; + // HasDSP, HasDSPR2, HasDSPR3 -- supports DSP ASE. + bool HasDSP, HasDSPR2, HasDSPR3; // Allow mixed Mips16 and Mips32 in one source file bool AllowMixed16_32; @@ -130,6 +136,12 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // HasMSA -- supports MSA ASE. bool HasMSA; + // UseTCCInDIV -- Enables the use of trapping in the assembler. + bool UseTCCInDIV; + + // HasEVA -- supports EVA ASE. + bool HasEVA; + InstrItineraryData InstrItins; // We can override the determination of whether we are in mips16 mode @@ -189,7 +201,7 @@ public: } bool hasMips32r5() const { return (MipsArchVersion >= Mips32r5 && MipsArchVersion < Mips32Max) || - hasMips64r2(); + hasMips64r5(); } bool hasMips32r6() const { return (MipsArchVersion >= Mips32r6 && MipsArchVersion < Mips32Max) || @@ -228,9 +240,12 @@ public: } bool inMicroMipsMode() const { return InMicroMipsMode; } bool inMicroMips32r6Mode() const { return InMicroMipsMode && hasMips32r6(); } + bool inMicroMips64r6Mode() const { return InMicroMipsMode && hasMips64r6(); } bool hasDSP() const { return HasDSP; } bool hasDSPR2() const { return HasDSPR2; } + bool hasDSPR3() const { return HasDSPR3; } bool hasMSA() const { return HasMSA; } + bool hasEVA() const { return HasEVA; } bool useSmallSection() const { return UseSmallSection; } bool hasStandardEncoding() const { return !inMips16Mode(); } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 1c77745d130b..3e638720e839 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -233,7 +233,7 @@ void MipsPassConfig::addPreRegAlloc() { } TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](Function &F) { + return TargetIRAnalysis([this](const Function &F) { if (Subtarget->allowMixed16_32()) { DEBUG(errs() << "No Target Transform Info Pass Added\n"); // FIXME: This is no longer necessary as the TTI returned is per-function. diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp index 0f2db6039b6a..146f33bda249 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -76,7 +76,7 @@ bool MipsTargetObjectFile:: IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, SectionKind Kind) const { return (IsGlobalInSmallSectionImpl(GV, TM) && - (Kind.isDataRel() || Kind.isBSS() || Kind.isCommon())); + (Kind.isData() || Kind.isBSS() || Kind.isCommon())); } /// Return true if this global address should be placed into small data/bss @@ -107,7 +107,8 @@ IsGlobalInSmallSectionImpl(const GlobalValue *GV, return false; Type *Ty = GV->getType()->getElementType(); - return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty)); + return IsInSmallSection( + GV->getParent()->getDataLayout().getTypeAllocSize(Ty)); } MCSection * @@ -120,7 +121,7 @@ MipsTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, // Handle Small Section classification here. if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind)) return SmallBSSSection; - if (Kind.isDataRel() && IsGlobalInSmallSection(GV, TM, Kind)) + if (Kind.isData() && IsGlobalInSmallSection(GV, TM, Kind)) return SmallDataSection; // Otherwise, we work the same as ELF. @@ -128,21 +129,20 @@ MipsTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, } /// Return true if this constant should be placed into small data section. -bool MipsTargetObjectFile:: -IsConstantInSmallSection(const Constant *CN, const TargetMachine &TM) const { +bool MipsTargetObjectFile::IsConstantInSmallSection( + const DataLayout &DL, const Constant *CN, const TargetMachine &TM) const { return (static_cast(TM) .getSubtargetImpl() ->useSmallSection() && - LocalSData && IsInSmallSection(TM.getDataLayout()->getTypeAllocSize( - CN->getType()))); + LocalSData && IsInSmallSection(DL.getTypeAllocSize(CN->getType()))); } -MCSection * -MipsTargetObjectFile::getSectionForConstant(SectionKind Kind, - const Constant *C) const { - if (IsConstantInSmallSection(C, *TM)) +/// Return true if this constant should be placed into small data section. +MCSection *MipsTargetObjectFile::getSectionForConstant( + const DataLayout &DL, SectionKind Kind, const Constant *C) const { + if (IsConstantInSmallSection(DL, C, *TM)) return SmallDataSection; // Otherwise, we work the same as ELF. - return TargetLoweringObjectFileELF::getSectionForConstant(Kind, C); + return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C); } diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h index 725f2ffd93dd..ba04343bad87 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.h +++ b/lib/Target/Mips/MipsTargetObjectFile.h @@ -36,10 +36,10 @@ class MipsTargetMachine; const TargetMachine &TM) const override; /// Return true if this constant should be placed into small data section. - bool IsConstantInSmallSection(const Constant *CN, + bool IsConstantInSmallSection(const DataLayout &DL, const Constant *CN, const TargetMachine &TM) const; - MCSection *getSectionForConstant(SectionKind Kind, + MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, const Constant *C) const override; }; } // end namespace llvm diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h index 6ce1be707d04..b3222f5d89ef 100644 --- a/lib/Target/Mips/MipsTargetStreamer.h +++ b/lib/Target/Mips/MipsTargetStreamer.h @@ -12,6 +12,7 @@ #include "MCTargetDesc/MipsABIFlagsSection.h" #include "MCTargetDesc/MipsABIInfo.h" +#include "llvm/ADT/Optional.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" @@ -77,8 +78,12 @@ public: // PIC support virtual void emitDirectiveCpLoad(unsigned RegNo); + virtual void emitDirectiveCpRestore(SmallVector &StoreInsts, + int Offset); virtual void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, bool IsReg); + virtual void emitDirectiveCpreturn(unsigned SaveLocation, + bool SaveLocationIsRegister); // FP abiflags directives virtual void emitDirectiveModuleFP(); @@ -97,18 +102,18 @@ public: // structure values. template void updateABIInfo(const PredicateLibrary &P) { - ABI = &P.getABI(); + ABI = P.getABI(); ABIFlagsSection.setAllFromPredicates(P); } MipsABIFlagsSection &getABIFlagsSection() { return ABIFlagsSection; } const MipsABIInfo &getABI() const { - assert(ABI && "ABI hasn't been set!"); + assert(ABI.hasValue() && "ABI hasn't been set!"); return *ABI; } protected: - const MipsABIInfo *ABI; + llvm::Optional ABI; MipsABIFlagsSection ABIFlagsSection; bool GPRInfoSet; @@ -188,8 +193,12 @@ public: // PIC support void emitDirectiveCpLoad(unsigned RegNo) override; + void emitDirectiveCpRestore(SmallVector &StoreInsts, + int Offset) override; void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, bool IsReg) override; + void emitDirectiveCpreturn(unsigned SaveLocation, + bool SaveLocationIsRegister) override; // FP abiflags directives void emitDirectiveModuleFP() override; @@ -237,8 +246,12 @@ public: // PIC support void emitDirectiveCpLoad(unsigned RegNo) override; + void emitDirectiveCpRestore(SmallVector &StoreInsts, + int Offset) override; void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, bool IsReg) override; + void emitDirectiveCpreturn(unsigned SaveLocation, + bool SaveLocationIsRegister) override; void emitMipsAbiFlags(); }; diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h index 02c5a210d099..f0f223aa057b 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h @@ -15,11 +15,9 @@ #define LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" -#include "llvm/Support/raw_ostream.h" namespace llvm { -class MCOperand; class MCSubtargetInfo; class NVPTXInstPrinter : public MCInstPrinter { diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h index b432e065c2f4..9ac3c8850f75 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h @@ -22,6 +22,7 @@ class Triple; class NVPTXMCAsmInfo : public MCAsmInfo { virtual void anchor(); + public: explicit NVPTXMCAsmInfo(const Triple &TheTriple); }; diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index fe28214e9588..e5fae85bacf2 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -41,24 +41,6 @@ enum CondCodes { }; } -inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) { - switch (CC) { - case NVPTXCC::NE: - return "ne"; - case NVPTXCC::EQ: - return "eq"; - case NVPTXCC::LT: - return "lt"; - case NVPTXCC::LE: - return "le"; - case NVPTXCC::GT: - return "gt"; - case NVPTXCC::GE: - return "ge"; - } - llvm_unreachable("Unknown condition code"); -} - FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); ModulePass *createNVPTXAssignValidGlobalNamesPass(); diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index ecb0f0a1d0a1..e8c36089a779 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -355,7 +355,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { if (isABI) { if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) { unsigned size = 0; - if (const IntegerType *ITy = dyn_cast(Ty)) { + if (auto *ITy = dyn_cast(Ty)) { size = ITy->getBitWidth(); if (size < 32) size = 32; @@ -635,9 +635,7 @@ static bool usedInGlobalVarDef(const Constant *C) { return false; if (const GlobalVariable *GV = dyn_cast(C)) { - if (GV->getName() == "llvm.used") - return false; - return true; + return GV->getName() != "llvm.used"; } for (const User *U : C->users()) @@ -682,7 +680,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) { static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { if (!gv->hasInternalLinkage()) return false; - const PointerType *Pty = gv->getType(); + PointerType *Pty = gv->getType(); if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) return false; @@ -720,7 +718,7 @@ static bool useFuncSeen(const Constant *C, void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { llvm::DenseMap seenMap; for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { - const Function *F = FI; + const Function *F = &*FI; if (F->isDeclaration()) { if (F->use_empty()) @@ -870,9 +868,8 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) { DenseSet GVVisiting; // Visit each global variable, in order - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); + for (const GlobalVariable &I : M.globals()) + VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting); assert(GVVisited.size() == M.getGlobalList().size() && "Missed a global variable"); @@ -1029,10 +1026,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, GVar->getName().startswith("nvvm.")) return; - const DataLayout *TD = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); // GlobalVariables are always constant pointers themselves. - const PointerType *PTy = GVar->getType(); + PointerType *PTy = GVar->getType(); Type *ETy = PTy->getElementType(); if (GVar->hasExternalLinkage()) { @@ -1159,7 +1156,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, } if (GVar->getAlignment() == 0) - O << " .align " << (int) TD->getPrefTypeAlignment(ETy); + O << " .align " << (int)DL.getPrefTypeAlignment(ETy); else O << " .align " << GVar->getAlignment(); @@ -1185,9 +1182,11 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, printScalarConstant(Initializer, O); } } else { - // The frontend adds zero-initializer to variables that don't have an - // initial value, so skip warning for this case. - if (!GVar->getInitializer()->isNullValue()) { + // The frontend adds zero-initializer to device and constant variables + // that don't have an initial value, and UndefValue to shared + // variables, so skip warning for this case. + if (!GVar->getInitializer()->isNullValue() && + !isa(GVar->getInitializer())) { report_fatal_error("initial value of '" + GVar->getName() + "' is not allowed in addrspace(" + Twine(PTy->getAddressSpace()) + ")"); @@ -1205,7 +1204,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, case Type::StructTyID: case Type::ArrayTyID: case Type::VectorTyID: - ElementSize = TD->getTypeStoreSize(ETy); + ElementSize = DL.getTypeStoreSize(ETy); // Ptx allows variable initilization only for constant and // global state spaces. if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || @@ -1299,7 +1298,7 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, } std::string -NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { +NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const { switch (Ty->getTypeID()) { default: llvm_unreachable("unexpected type"); @@ -1339,16 +1338,16 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); // GlobalVariables are always constant pointers themselves. - const PointerType *PTy = GVar->getType(); + PointerType *PTy = GVar->getType(); Type *ETy = PTy->getElementType(); O << "."; emitPTXAddressSpace(PTy->getAddressSpace(), O); if (GVar->getAlignment() == 0) - O << " .align " << (int) TD->getPrefTypeAlignment(ETy); + O << " .align " << (int)DL.getPrefTypeAlignment(ETy); else O << " .align " << GVar->getAlignment(); @@ -1370,7 +1369,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, case Type::StructTyID: case Type::ArrayTyID: case Type::VectorTyID: - ElementSize = TD->getTypeStoreSize(ETy); + ElementSize = DL.getTypeStoreSize(ETy); O << " .b8 "; getSymbol(GVar)->print(O, MAI); O << "["; @@ -1385,32 +1384,32 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, return; } -static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { +static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) { if (Ty->isSingleValueType()) - return TD->getPrefTypeAlignment(Ty); + return DL.getPrefTypeAlignment(Ty); - const ArrayType *ATy = dyn_cast(Ty); + auto *ATy = dyn_cast(Ty); if (ATy) - return getOpenCLAlignment(TD, ATy->getElementType()); + return getOpenCLAlignment(DL, ATy->getElementType()); - const StructType *STy = dyn_cast(Ty); + auto *STy = dyn_cast(Ty); if (STy) { unsigned int alignStruct = 1; // Go through each element of the struct and find the // largest alignment. for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { Type *ETy = STy->getElementType(i); - unsigned int align = getOpenCLAlignment(TD, ETy); + unsigned int align = getOpenCLAlignment(DL, ETy); if (align > alignStruct) alignStruct = align; } return alignStruct; } - const FunctionType *FTy = dyn_cast(Ty); + auto *FTy = dyn_cast(Ty); if (FTy) - return TD->getPointerPrefAlignment(); - return TD->getPrefTypeAlignment(Ty); + return DL.getPointerPrefAlignment(); + return DL.getPrefTypeAlignment(Ty); } void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, @@ -1419,13 +1418,8 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, O << "_param_" << paramIndex; } -void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { - CurrentFnSym->print(O, MAI); - O << "_param_" << paramIndex; -} - void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); const AttributeSet &PAL = F->getAttributes(); const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); Function::const_arg_iterator I, E; @@ -1433,7 +1427,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { bool first = true; bool isKernelFunc = llvm::isKernelFunction(*F); bool isABI = (nvptxSubtarget->getSmVersion() >= 20); - MVT thePointerTy = TLI->getPointerTy(*TD); + MVT thePointerTy = TLI->getPointerTy(DL); O << "(\n"; @@ -1485,9 +1479,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // size = typeallocsize of element type unsigned align = PAL.getParamAlignment(paramIndex + 1); if (align == 0) - align = TD->getABITypeAlignment(Ty); + align = DL.getABITypeAlignment(Ty); - unsigned sz = TD->getTypeAllocSize(Ty); + unsigned sz = DL.getTypeAllocSize(Ty); O << "\t.param .align " << align << " .b8 "; printParamName(I, paramIndex, O); O << "[" << sz << "]"; @@ -1495,7 +1489,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { continue; } // Just a scalar - const PointerType *PTy = dyn_cast(Ty); + auto *PTy = dyn_cast(Ty); if (isKernelFunc) { if (PTy) { // Special handling for pointer arguments to kernel @@ -1519,7 +1513,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { O << ".ptr .global "; break; } - O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " "; + O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " "; } printParamName(I, paramIndex, O); continue; @@ -1556,7 +1550,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { } // param has byVal attribute. So should be a pointer - const PointerType *PTy = dyn_cast(Ty); + auto *PTy = dyn_cast(Ty); assert(PTy && "Param with byval attribute should be a pointer type"); Type *ETy = PTy->getElementType(); @@ -1566,9 +1560,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // size = typeallocsize of element type unsigned align = PAL.getParamAlignment(paramIndex + 1); if (align == 0) - align = TD->getABITypeAlignment(ETy); + align = DL.getABITypeAlignment(ETy); - unsigned sz = TD->getTypeAllocSize(ETy); + unsigned sz = DL.getTypeAllocSize(ETy); O << "\t.param .align " << align << " .b8 "; printParamName(I, paramIndex, O); O << "[" << sz << "]"; @@ -1579,7 +1573,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // Further, if a part is vector, print the above for // each vector element. SmallVector vtparts; - ComputeValueVTs(*TLI, getDataLayout(), ETy, vtparts); + ComputeValueVTs(*TLI, DL, ETy, vtparts); for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; @@ -1786,10 +1780,10 @@ static void ConvertDoubleToBytes(unsigned char *p, double val) { void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); if (isa(CPV) || CPV->isNullValue()) { - int s = TD->getTypeAllocSize(CPV->getType()); + int s = DL.getTypeAllocSize(CPV->getType()); if (s < Bytes) s = Bytes; aggBuffer->addZeros(s); @@ -1800,7 +1794,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, switch (CPV->getType()->getTypeID()) { case Type::IntegerTyID: { - const Type *ETy = CPV->getType(); + Type *ETy = CPV->getType(); if (ETy == Type::getInt8Ty(CPV->getContext())) { unsigned char c = (unsigned char)cast(CPV)->getZExtValue(); ConvertIntToBytes<>(ptr, c); @@ -1817,7 +1811,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, break; } else if (const ConstantExpr *Cexpr = dyn_cast(CPV)) { if (const ConstantInt *constInt = dyn_cast( - ConstantFoldConstantExpression(Cexpr, *TD))) { + ConstantFoldConstantExpression(Cexpr, DL))) { int int32 = (int)(constInt->getZExtValue()); ConvertIntToBytes<>(ptr, int32); aggBuffer->addBytes(ptr, 4, Bytes); @@ -1839,7 +1833,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, break; } else if (const ConstantExpr *Cexpr = dyn_cast(CPV)) { if (const ConstantInt *constInt = dyn_cast( - ConstantFoldConstantExpression(Cexpr, *TD))) { + ConstantFoldConstantExpression(Cexpr, DL))) { long long int64 = (long long)(constInt->getZExtValue()); ConvertIntToBytes<>(ptr, int64); aggBuffer->addBytes(ptr, 8, Bytes); @@ -1860,7 +1854,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, case Type::FloatTyID: case Type::DoubleTyID: { const ConstantFP *CFP = dyn_cast(CPV); - const Type *Ty = CFP->getType(); + Type *Ty = CFP->getType(); if (Ty == Type::getFloatTy(CPV->getContext())) { float float32 = (float) CFP->getValueAPF().convertToFloat(); ConvertFloatToBytes(ptr, float32); @@ -1881,7 +1875,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, const Value *v = Cexpr->stripPointerCasts(); aggBuffer->addSymbol(v, Cexpr); } - unsigned int s = TD->getTypeAllocSize(CPV->getType()); + unsigned int s = DL.getTypeAllocSize(CPV->getType()); aggBuffer->addZeros(s); break; } @@ -1891,7 +1885,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, case Type::StructTyID: { if (isa(CPV) || isa(CPV) || isa(CPV) || isa(CPV)) { - int ElementSize = TD->getTypeAllocSize(CPV->getType()); + int ElementSize = DL.getTypeAllocSize(CPV->getType()); bufferAggregateConstant(CPV, aggBuffer); if (Bytes > ElementSize) aggBuffer->addZeros(Bytes - ElementSize); @@ -1909,7 +1903,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, AggBuffer *aggBuffer) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); int Bytes; // Old constants @@ -1934,12 +1928,12 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, StructType *ST = cast(CPV->getType()); for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { if (i == (e - 1)) - Bytes = TD->getStructLayout(ST)->getElementOffset(0) + - TD->getTypeAllocSize(ST) - - TD->getStructLayout(ST)->getElementOffset(i); + Bytes = DL.getStructLayout(ST)->getElementOffset(0) + + DL.getTypeAllocSize(ST) - + DL.getStructLayout(ST)->getElementOffset(i); else - Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) - - TD->getStructLayout(ST)->getElementOffset(i); + Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) - + DL.getStructLayout(ST)->getElementOffset(i); bufferLEByte(cast(CPV->getOperand(i)), Bytes, aggBuffer); } } @@ -1951,18 +1945,6 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, // buildTypeNameMap - Run through symbol table looking for type names. // -bool NVPTXAsmPrinter::isImageType(const Type *Ty) { - - std::map::iterator PI = TypeNameMap.find(Ty); - - if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") || - !PI->second.compare("struct._image2d_t") || - !PI->second.compare("struct._image3d_t"))) - return true; - - return false; -} - bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { switch (MI.getOpcode()) { @@ -2054,7 +2036,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout())) + if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout())) if (C != CE) return lowerConstantForGV(C, ProcessingGeneric); @@ -2083,7 +2065,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) } case Instruction::GetElementPtr: { - const DataLayout &DL = *TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); // Generate a symbolic expression for the byte address APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); @@ -2109,7 +2091,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric); case Instruction::IntToPtr: { - const DataLayout &DL = *TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. @@ -2120,7 +2102,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) } case Instruction::PtrToInt: { - const DataLayout &DL = *TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index f6f7685e76f9..76bf179896a8 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -212,28 +212,21 @@ private: MCOperand GetSymbolRef(const MCSymbol *Symbol); unsigned encodeVirtualRegister(unsigned Reg); - void EmitAlignment(unsigned NumBits, const GlobalValue *GV = nullptr) const {} - void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier, raw_ostream &O); void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier = nullptr); - void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const; void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O, bool = false); - void printParamName(int paramIndex, raw_ostream &O); void printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O); void emitGlobals(const Module &M); void emitHeader(Module &M, raw_ostream &O, const NVPTXSubtarget &STI); void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const; void emitVirtualRegister(unsigned int vr, raw_ostream &); - void emitFunctionExternParamList(const MachineFunction &MF); void emitFunctionParamList(const Function *, raw_ostream &O); void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O); void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF); - void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize); - bool isImageType(const Type *Ty); void printReturnValStr(const Function *, raw_ostream &O); void printReturnValStr(const MachineFunction &MF, raw_ostream &O); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -271,7 +264,7 @@ private: // Build the map between type name and ID based on module's type // symbol table. - std::map TypeNameMap; + std::map TypeNameMap; // List of variables demoted to a function scope. std::map > localDecls; @@ -282,19 +275,15 @@ private: void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O); void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const; - std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const; + std::string getPTXFundamentalTypeStr(Type *Ty, bool = true) const; void printScalarConstant(const Constant *CPV, raw_ostream &O); void printFPConstant(const ConstantFP *Fp, raw_ostream &O); void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer); void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer); - void printOperandProper(const MachineOperand &MO); - void emitLinkageDirective(const GlobalValue *V, raw_ostream &O); void emitDeclarations(const Module &, raw_ostream &O); void emitDeclaration(const Function *, raw_ostream &O); - - static const char *getRegisterName(unsigned RegNo); void emitDemotedVars(const Function *, raw_ostream &); bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo, diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp index 69a229e32f43..95813c8430d1 100644 --- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp +++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @@ -98,7 +98,7 @@ private: /// This reordering exposes to optimizeMemoryInstruction more /// optimization opportunities on loads and stores. /// - /// If this function succesfully hoists an eliminable addrspacecast or V is + /// If this function successfully hoists an eliminable addrspacecast or V is /// already such an addrspacecast, it returns the transformed value (which is /// guaranteed to be an addrspacecast); otherwise, it returns nullptr. Value *hoistAddrSpaceCastFrom(Value *V, int Depth = 0); @@ -267,14 +267,14 @@ bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) { return false; bool Changed = false; - for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) { - for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ++I) { + for (BasicBlock &B : F) { + for (Instruction &I : B) { if (isa(I)) { // V = load P - Changed |= optimizeMemoryInstruction(I, 0); + Changed |= optimizeMemoryInstruction(&I, 0); } else if (isa(I)) { // store V, P - Changed |= optimizeMemoryInstruction(I, 1); + Changed |= optimizeMemoryInstruction(&I, 1); } } } diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 6fd09c405260..62ca5e9f9f62 100644 --- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -81,7 +81,7 @@ bool GenericToNVVM::runOnModule(Module &M) { for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;) { - GlobalVariable *GV = I++; + GlobalVariable *GV = &*I++; if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC && !llvm::isTexture(*GV) && !llvm::isSurface(*GV) && !llvm::isSampler(*GV) && !GV->getName().startswith("llvm.")) { @@ -117,7 +117,7 @@ bool GenericToNVVM::runOnModule(Module &M) { Value *Operand = II->getOperand(i); if (isa(Operand)) { II->setOperand( - i, remapConstant(&M, I, cast(Operand), Builder)); + i, remapConstant(&M, &*I, cast(Operand), Builder)); } } } @@ -132,10 +132,8 @@ bool GenericToNVVM::runOnModule(Module &M) { // Walk through the metadata section and update the debug information // associated with the global variables in the default address space. - for (Module::named_metadata_iterator I = M.named_metadata_begin(), - E = M.named_metadata_end(); - I != E; I++) { - remapNamedMDNode(VM, I); + for (NamedMDNode &I : M.named_metadata()) { + remapNamedMDNode(VM, &I); } // Walk through the global variable initializers, and replace any use of @@ -318,9 +316,8 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, NewOperands[0], NewOperands[1]); case Instruction::FCmp: // CompareConstantExpr (fcmp) - assert(false && "Address space conversion should have no effect " - "on float point CompareConstantExpr (fcmp)!"); - return C; + llvm_unreachable("Address space conversion should have no effect " + "on float point CompareConstantExpr (fcmp)!"); case Instruction::ExtractElement: // ExtractElementConstantExpr return Builder.CreateExtractElement(NewOperands[0], NewOperands[1]); @@ -364,8 +361,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, return Builder.CreateCast(Instruction::CastOps(C->getOpcode()), NewOperands[0], C->getType()); } - assert(false && "GenericToNVVM encountered an unsupported ConstantExpr"); - return C; + llvm_unreachable("GenericToNVVM encountered an unsupported ConstantExpr"); } } diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 232a611d1760..2d0098b392f4 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "NVPTXISelDAGToDAG.h" +#include "NVPTXUtilities.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" #include "llvm/Support/CommandLine.h" @@ -530,7 +532,7 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) { if (!Src) return NVPTX::PTXLdStInstCode::GENERIC; - if (const PointerType *PT = dyn_cast(Src->getType())) { + if (auto *PT = dyn_cast(Src->getType())) { switch (PT->getAddressSpace()) { case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; @@ -544,6 +546,39 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) { return NVPTX::PTXLdStInstCode::GENERIC; } +static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, + unsigned CodeAddrSpace, MachineFunction *F) { + // To use non-coherent caching, the load has to be from global + // memory and we have to prove that the memory area is not written + // to anywhere for the duration of the kernel call, not even after + // the load. + // + // To ensure that there are no writes to the memory, we require the + // underlying pointer to be a noalias (__restrict) kernel parameter + // that is never used for a write. We can only do this for kernel + // functions since from within a device function, we cannot know if + // there were or will be writes to the memory from the caller - or we + // could, but then we would have to do inter-procedural analysis. + if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL || + !isKernelFunction(*F->getFunction())) { + return false; + } + + // We use GetUnderlyingObjects() here instead of + // GetUnderlyingObject() mainly because the former looks through phi + // nodes while the latter does not. We need to look through phi + // nodes to handle pointer induction variables. + SmallVector Objs; + GetUnderlyingObjects(const_cast(N->getMemOperand()->getValue()), + Objs, F->getDataLayout()); + for (Value *Obj : Objs) { + auto *A = dyn_cast(Obj); + if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false; + } + + return true; +} + SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) { unsigned IID = cast(N->getOperand(0))->getZExtValue(); switch (IID) { @@ -638,6 +673,10 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { // Address Space Setting unsigned int codeAddrSpace = getCodeAddrSpace(LD); + if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) { + return SelectLDGLDU(N); + } + // Volatile Setting // - .volatile is only availalble for .global and .shared bool isVolatile = LD->isVolatile(); @@ -872,6 +911,10 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { // Address Space Setting unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD); + if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) { + return SelectLDGLDU(N); + } + // Volatile Setting // - .volatile is only availalble for .global and .shared bool IsVolatile = MemSD->isVolatile(); @@ -1425,6 +1468,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { switch (N->getOpcode()) { default: return nullptr; + case ISD::LOAD: case ISD::INTRINSIC_W_CHAIN: if (IsLDG) { switch (EltVT.getSimpleVT().SimpleTy) { @@ -1474,6 +1518,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { } } break; + case NVPTXISD::LoadV2: case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1522,6 +1567,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { break; } break; + case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1563,6 +1609,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { switch (N->getOpcode()) { default: return nullptr; + case ISD::LOAD: case ISD::INTRINSIC_W_CHAIN: if (IsLDG) { switch (EltVT.getSimpleVT().SimpleTy) { @@ -1612,6 +1659,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { } } break; + case NVPTXISD::LoadV2: case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1660,6 +1708,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { break; } break; + case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1707,6 +1756,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { switch (N->getOpcode()) { default: return nullptr; + case ISD::LOAD: case ISD::INTRINSIC_W_CHAIN: if (IsLDG) { switch (EltVT.getSimpleVT().SimpleTy) { @@ -1756,6 +1806,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { } } break; + case NVPTXISD::LoadV2: case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1804,6 +1855,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { break; } break; + case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1845,6 +1897,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { switch (N->getOpcode()) { default: return nullptr; + case ISD::LOAD: case ISD::INTRINSIC_W_CHAIN: if (IsLDG) { switch (EltVT.getSimpleVT().SimpleTy) { @@ -1894,6 +1947,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { } } break; + case NVPTXISD::LoadV2: case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1942,6 +1996,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { break; } break; + case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -5039,7 +5094,7 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, } if (!Src) return false; - if (const PointerType *PT = dyn_cast(Src->getType())) + if (auto *PT = dyn_cast(Src->getType())) return (PT->getAddressSpace() == spN); return false; } diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index b75cf4040312..766369631e14 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -124,6 +124,10 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, // condition branches. setJumpIsExpensive(true); + // Wide divides are _very_ slow. Try to reduce the width of the divide if + // possible. + addBypassSlowDiv(64, 32); + // By default, use the Source scheduling if (sched4reg) setSchedulingPreference(Sched::RegPressure); @@ -275,6 +279,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SELECT); // Now deduce the information based on the above mentioned // actions @@ -910,7 +915,7 @@ std::string NVPTXTargetLowering::getPrototype( O << "("; if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) { unsigned size = 0; - if (const IntegerType *ITy = dyn_cast(retTy)) { + if (auto *ITy = dyn_cast(retTy)) { size = ITy->getBitWidth(); if (size < 32) size = 32; @@ -981,7 +986,7 @@ std::string NVPTXTargetLowering::getPrototype( O << "_"; continue; } - const PointerType *PTy = dyn_cast(Ty); + auto *PTy = dyn_cast(Ty); assert(PTy && "Param with byval attribute should be a pointer type"); Type *ETy = PTy->getElementType(); @@ -1318,7 +1323,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // struct or vector SmallVector vtparts; SmallVector Offsets; - const PointerType *PTy = dyn_cast(Args[i].Ty); + auto *PTy = dyn_cast(Args[i].Ty); assert(PTy && "Type of a byval parameter should be pointer"); ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(), vtparts, &Offsets, 0); @@ -2007,15 +2012,6 @@ SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { return Result; } -SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, - int idx, EVT v) const { - std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); - std::stringstream suffix; - suffix << idx; - *name += suffix.str(); - return DAG.getTargetExternalSymbol(name->c_str(), v); -} - SDValue NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { std::string ParamSym; @@ -2029,10 +2025,6 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); } -SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { - return getExtSymb(DAG, ".HLPPARAM", idx); -} - // Check to see if the kernel argument is image*_t or sampler_t bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { @@ -2040,8 +2032,8 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { "struct._image3d_t", "struct._sampler_t" }; - const Type *Ty = arg->getType(); - const PointerType *PTy = dyn_cast(Ty); + Type *Ty = arg->getType(); + auto *PTy = dyn_cast(Ty); if (!PTy) return false; @@ -2049,14 +2041,11 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { if (!context) return false; - const StructType *STy = dyn_cast(PTy->getElementType()); + auto *STy = dyn_cast(PTy->getElementType()); const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : ""; - for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i) - if (TypeName == specialTypes[i]) - return true; - - return false; + return std::find(std::begin(specialTypes), std::end(specialTypes), + TypeName) != std::end(specialTypes); } SDValue NVPTXTargetLowering::LowerFormalArguments( @@ -2082,10 +2071,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( std::vector argTypes; std::vector theArgs; - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) { - theArgs.push_back(I); - argTypes.push_back(I->getType()); + for (const Argument &I : F->args()) { + theArgs.push_back(&I); + argTypes.push_back(I.getType()); } // argTypes.size() (or theArgs.size()) and Ins.size() need not match. // Ins.size() will be larger @@ -2545,20 +2533,6 @@ void NVPTXTargetLowering::LowerAsmOperandForConstraint( TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } -// NVPTX suuport vector of legal types of any length in Intrinsics because the -// NVPTX specific type legalizer -// will legalize them to the PTX supported length. -bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { - if (isTypeLegal(VT)) - return true; - if (VT.isVector()) { - MVT eVT = VT.getVectorElementType(); - if (isTypeLegal(eVT)) - return true; - } - return false; -} - static unsigned getOpcForTextureInstr(unsigned Intrinsic) { switch (Intrinsic) { default: @@ -3747,9 +3721,7 @@ bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL, // - [immAddr] if (AM.BaseGV) { - if (AM.BaseOffs || AM.HasBaseReg || AM.Scale) - return false; - return true; + return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale; } switch (AM.Scale) { @@ -3820,11 +3792,6 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } -/// getFunctionAlignment - Return the Log2 alignment of this function. -unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { - return 4; -} - //===----------------------------------------------------------------------===// // NVPTX DAG Combining //===----------------------------------------------------------------------===// @@ -4057,6 +4024,67 @@ static SDValue PerformANDCombine(SDNode *N, return SDValue(); } +static SDValue PerformSELECTCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // Currently this detects patterns for integer min and max and + // lowers them to PTX-specific intrinsics that enable hardware + // support. + + const SDValue Cond = N->getOperand(0); + if (Cond.getOpcode() != ISD::SETCC) return SDValue(); + + const SDValue LHS = Cond.getOperand(0); + const SDValue RHS = Cond.getOperand(1); + const SDValue True = N->getOperand(1); + const SDValue False = N->getOperand(2); + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) + return SDValue(); + + const EVT VT = N->getValueType(0); + if (VT != MVT::i32 && VT != MVT::i64) return SDValue(); + + const ISD::CondCode CC = cast(Cond.getOperand(2))->get(); + SDValue Larger; // The larger of LHS and RHS when condition is true. + switch (CC) { + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETLT: + case ISD::SETLE: + Larger = RHS; + break; + + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETUGT: + case ISD::SETUGE: + Larger = LHS; + break; + + default: + return SDValue(); + } + const bool IsMax = (Larger == True); + const bool IsSigned = ISD::isSignedIntSetCC(CC); + + unsigned IntrinsicId; + if (VT == MVT::i32) { + if (IsSigned) + IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i; + else + IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui; + } else { + assert(VT == MVT::i64); + if (IsSigned) + IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll; + else + IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull; + } + + SDLoc DL(N); + return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS); +} + enum OperandSignedness { Signed = 0, Unsigned, @@ -4113,25 +4141,16 @@ static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, if (ConstantSDNode *CI = dyn_cast(RHS)) { APInt Val = CI->getAPIntValue(); if (LHSSign == Unsigned) { - if (Val.isIntN(OptSize)) { - return true; - } - return false; + return Val.isIntN(OptSize); } else { - if (Val.isSignedIntN(OptSize)) { - return true; - } - return false; + return Val.isSignedIntN(OptSize); } } else { OperandSignedness RHSSign; if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) return false; - if (LHSSign != RHSSign) - return false; - - return true; + return LHSSign == RHSSign; } } @@ -4247,6 +4266,8 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, return PerformSHLCombine(N, DCI, OptLevel); case ISD::AND: return PerformANDCombine(N, DCI); + case ISD::SELECT: + return PerformSELECTCombine(N, DCI); } return SDValue(); } @@ -4509,25 +4530,25 @@ void NVPTXTargetLowering::ReplaceNodeResults( void NVPTXSection::anchor() {} NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { - delete TextSection; - delete DataSection; - delete BSSSection; - delete ReadOnlySection; + delete static_cast(TextSection); + delete static_cast(DataSection); + delete static_cast(BSSSection); + delete static_cast(ReadOnlySection); - delete StaticCtorSection; - delete StaticDtorSection; - delete LSDASection; - delete EHFrameSection; - delete DwarfAbbrevSection; - delete DwarfInfoSection; - delete DwarfLineSection; - delete DwarfFrameSection; - delete DwarfPubTypesSection; - delete DwarfDebugInlineSection; - delete DwarfStrSection; - delete DwarfLocSection; - delete DwarfARangesSection; - delete DwarfRangesSection; + delete static_cast(StaticCtorSection); + delete static_cast(StaticDtorSection); + delete static_cast(LSDASection); + delete static_cast(EHFrameSection); + delete static_cast(DwarfAbbrevSection); + delete static_cast(DwarfInfoSection); + delete static_cast(DwarfLineSection); + delete static_cast(DwarfFrameSection); + delete static_cast(DwarfPubTypesSection); + delete static_cast(DwarfDebugInlineSection); + delete static_cast(DwarfStrSection); + delete static_cast(DwarfLocSection); + delete static_cast(DwarfARangesSection); + delete static_cast(DwarfRangesSection); } MCSection * diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index e5c37321a33b..60914c1d09b4 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -441,13 +441,9 @@ public: SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset, - SelectionDAG &DAG) const; const char *getTargetNodeName(unsigned Opcode) const override; - bool isTypeSupportedInIntrinsic(MVT VT) const; - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const override; @@ -459,8 +455,13 @@ public: bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; - /// getFunctionAlignment - Return the Log2 alignment of this function. - unsigned getFunctionAlignment(const Function *F) const; + bool isTruncateFree(Type *SrcTy, Type *DstTy) const override { + // Truncating 64-bit to 32-bit is free in SASS. + if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) + return false; + return SrcTy->getPrimitiveSizeInBits() == 64 && + DstTy->getPrimitiveSizeInBits() == 32; + } EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, EVT VT) const override { @@ -515,11 +516,7 @@ public: private: const NVPTXSubtarget &STI; // cache the subtarget here - - SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, - EVT = MVT::i32) const; SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; - SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 76d6597c6e20..9f3cf4551955 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -37,30 +37,31 @@ void NVPTXInstrInfo::copyPhysReg( const TargetRegisterClass *DestRC = MRI.getRegClass(DestReg); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); - if (DestRC != SrcRC) - report_fatal_error("Attempted to created cross-class register copy"); + if (DestRC->getSize() != SrcRC->getSize()) + report_fatal_error("Copy one register into another with a different width"); - if (DestRC == &NVPTX::Int32RegsRegClass) - BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (DestRC == &NVPTX::Int1RegsRegClass) - BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (DestRC == &NVPTX::Float32RegsRegClass) - BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (DestRC == &NVPTX::Int16RegsRegClass) - BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (DestRC == &NVPTX::Int64RegsRegClass) - BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (DestRC == &NVPTX::Float64RegsRegClass) - BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else { + unsigned Op; + if (DestRC == &NVPTX::Int1RegsRegClass) { + Op = NVPTX::IMOV1rr; + } else if (DestRC == &NVPTX::Int16RegsRegClass) { + Op = NVPTX::IMOV16rr; + } else if (DestRC == &NVPTX::Int32RegsRegClass) { + Op = (SrcRC == &NVPTX::Int32RegsRegClass ? NVPTX::IMOV32rr + : NVPTX::BITCONVERT_32_F2I); + } else if (DestRC == &NVPTX::Int64RegsRegClass) { + Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr + : NVPTX::BITCONVERT_64_F2I); + } else if (DestRC == &NVPTX::Float32RegsRegClass) { + Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32rr + : NVPTX::BITCONVERT_32_I2F); + } else if (DestRC == &NVPTX::Float64RegsRegClass) { + Op = (SrcRC == &NVPTX::Float64RegsRegClass ? NVPTX::FMOV64rr + : NVPTX::BITCONVERT_64_I2F); + } else { llvm_unreachable("Bad register copy"); } + BuildMI(MBB, I, DL, get(Op), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, @@ -86,27 +87,6 @@ bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, return false; } -bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const { - switch (MI.getOpcode()) { - default: - return false; - case NVPTX::INT_PTX_SREG_NTID_X: - case NVPTX::INT_PTX_SREG_NTID_Y: - case NVPTX::INT_PTX_SREG_NTID_Z: - case NVPTX::INT_PTX_SREG_TID_X: - case NVPTX::INT_PTX_SREG_TID_Y: - case NVPTX::INT_PTX_SREG_TID_Z: - case NVPTX::INT_PTX_SREG_CTAID_X: - case NVPTX::INT_PTX_SREG_CTAID_Y: - case NVPTX::INT_PTX_SREG_CTAID_Z: - case NVPTX::INT_PTX_SREG_NCTAID_X: - case NVPTX::INT_PTX_SREG_NCTAID_Y: - case NVPTX::INT_PTX_SREG_NCTAID_Z: - case NVPTX::INT_PTX_SREG_WARPSIZE: - return true; - } -} - bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const { bool isLoad = false; diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h index 179c06887198..3e407223f010 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -56,7 +56,6 @@ public: unsigned &DestReg) const; bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const; bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const; - bool isReadSpecialReg(MachineInstr &MI) const; virtual bool CanTailMerge(const MachineInstr *MI) const; // Branch analysis. diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 0bf72febc4a0..f770c2acaab5 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -6,6 +6,8 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// \file // Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when // the size is large or is not a compile-time constant. // @@ -18,19 +20,20 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #define DEBUG_TYPE "nvptx" using namespace llvm; namespace { + // actual analysis class, which is a functionpass struct NVPTXLowerAggrCopies : public FunctionPass { static char ID; @@ -50,179 +53,299 @@ struct NVPTXLowerAggrCopies : public FunctionPass { return "Lower aggregate copies/intrinsics into loops"; } }; -} // namespace char NVPTXLowerAggrCopies::ID = 0; -// Lower MemTransferInst or load-store pair to loop -static void convertTransferToLoop( - Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len, - bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) { - Type *indType = len->getType(); +// Lower memcpy to loop. +void convertMemCpyToLoop(Instruction *ConvertedInst, Value *SrcAddr, + Value *DstAddr, Value *CopyLen, bool SrcIsVolatile, + bool DstIsVolatile, LLVMContext &Context, + Function &F) { + Type *TypeOfCopyLen = CopyLen->getType(); - BasicBlock *origBB = splitAt->getParent(); - BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split"); - BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB); + BasicBlock *OrigBB = ConvertedInst->getParent(); + BasicBlock *NewBB = + ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split"); + BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB); - origBB->getTerminator()->setSuccessor(0, loopBB); - IRBuilder<> builder(origBB, origBB->getTerminator()); + OrigBB->getTerminator()->setSuccessor(0, LoopBB); + IRBuilder<> Builder(OrigBB->getTerminator()); - // srcAddr and dstAddr are expected to be pointer types, + // SrcAddr and DstAddr are expected to be pointer types, // so no check is made here. - unsigned srcAS = cast(srcAddr->getType())->getAddressSpace(); - unsigned dstAS = cast(dstAddr->getType())->getAddressSpace(); + unsigned SrcAS = cast(SrcAddr->getType())->getAddressSpace(); + unsigned DstAS = cast(DstAddr->getType())->getAddressSpace(); // Cast pointers to (char *) - srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS)); - dstAddr = builder.CreateBitCast(dstAddr, Type::getInt8PtrTy(Context, dstAS)); + SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS)); + DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS)); - IRBuilder<> loop(loopBB); - // The loop index (ind) is a phi node. - PHINode *ind = loop.CreatePHI(indType, 0); - // Incoming value for ind is 0 - ind->addIncoming(ConstantInt::get(indType, 0), origBB); + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); + LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); - // load from srcAddr+ind + // load from SrcAddr+LoopIndex // TODO: we can leverage the align parameter of llvm.memcpy for more efficient // word-sized loads and stores. - Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind), - srcVolatile); - // store at dstAddr+ind - loop.CreateStore(val, loop.CreateGEP(loop.getInt8Ty(), dstAddr, ind), - dstVolatile); + Value *Element = + LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP( + LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex), + SrcIsVolatile); + // store at DstAddr+LoopIndex + LoopBuilder.CreateStore(Element, + LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(), + DstAddr, LoopIndex), + DstIsVolatile); - // The value for ind coming from backedge is (ind + 1) - Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1)); - ind->addIncoming(newind, loopBB); + // The value for LoopIndex coming from backedge is (LoopIndex + 1) + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); + LoopIndex->addIncoming(NewIndex, LoopBB); - loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB); + LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, + NewBB); } -// Lower MemSetInst to loop -static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr, - Value *len, Value *val, LLVMContext &Context, - Function &F) { - BasicBlock *origBB = splitAt->getParent(); - BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split"); - BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB); +// Lower memmove to IR. memmove is required to correctly copy overlapping memory +// regions; therefore, it has to check the relative positions of the source and +// destination pointers and choose the copy direction accordingly. +// +// The code below is an IR rendition of this C function: +// +// void* memmove(void* dst, const void* src, size_t n) { +// unsigned char* d = dst; +// const unsigned char* s = src; +// if (s < d) { +// // copy backwards +// while (n--) { +// d[n] = s[n]; +// } +// } else { +// // copy forward +// for (size_t i = 0; i < n; ++i) { +// d[i] = s[i]; +// } +// } +// return dst; +// } +void convertMemMoveToLoop(Instruction *ConvertedInst, Value *SrcAddr, + Value *DstAddr, Value *CopyLen, bool SrcIsVolatile, + bool DstIsVolatile, LLVMContext &Context, + Function &F) { + Type *TypeOfCopyLen = CopyLen->getType(); + BasicBlock *OrigBB = ConvertedInst->getParent(); - origBB->getTerminator()->setSuccessor(0, loopBB); - IRBuilder<> builder(origBB, origBB->getTerminator()); + // Create the a comparison of src and dst, based on which we jump to either + // the forward-copy part of the function (if src >= dst) or the backwards-copy + // part (if src < dst). + // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else + // structure. Its block terminators (unconditional branches) are replaced by + // the appropriate conditional branches when the loop is built. + ICmpInst *PtrCompare = new ICmpInst(ConvertedInst, ICmpInst::ICMP_ULT, + SrcAddr, DstAddr, "compare_src_dst"); + TerminatorInst *ThenTerm, *ElseTerm; + SplitBlockAndInsertIfThenElse(PtrCompare, ConvertedInst, &ThenTerm, + &ElseTerm); - unsigned dstAS = cast(dstAddr->getType())->getAddressSpace(); + // Each part of the function consists of two blocks: + // copy_backwards: used to skip the loop when n == 0 + // copy_backwards_loop: the actual backwards loop BB + // copy_forward: used to skip the loop when n == 0 + // copy_forward_loop: the actual forward loop BB + BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); + CopyBackwardsBB->setName("copy_backwards"); + BasicBlock *CopyForwardBB = ElseTerm->getParent(); + CopyForwardBB->setName("copy_forward"); + BasicBlock *ExitBB = ConvertedInst->getParent(); + ExitBB->setName("memmove_done"); + + // Initial comparison of n == 0 that lets us skip the loops altogether. Shared + // between both backwards and forward copy clauses. + ICmpInst *CompareN = + new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen, + ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0"); + + // Copying backwards. + BasicBlock *LoopBB = + BasicBlock::Create(Context, "copy_backwards_loop", &F, CopyForwardBB); + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); + Value *IndexPtr = LoopBuilder.CreateSub( + LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr"); + Value *Element = LoopBuilder.CreateLoad( + LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element"); + LoopBuilder.CreateStore(Element, + LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr)); + LoopBuilder.CreateCondBr( + LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)), + ExitBB, LoopBB); + LoopPhi->addIncoming(IndexPtr, LoopBB); + LoopPhi->addIncoming(CopyLen, CopyBackwardsBB); + BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm); + ThenTerm->eraseFromParent(); + + // Copying forward. + BasicBlock *FwdLoopBB = + BasicBlock::Create(Context, "copy_forward_loop", &F, ExitBB); + IRBuilder<> FwdLoopBuilder(FwdLoopBB); + PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr"); + Value *FwdElement = FwdLoopBuilder.CreateLoad( + FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element"); + FwdLoopBuilder.CreateStore( + FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi)); + Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( + FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment"); + FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen), + ExitBB, FwdLoopBB); + FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB); + FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB); + + BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm); + ElseTerm->eraseFromParent(); +} + +// Lower memset to loop. +void convertMemSetToLoop(Instruction *ConvertedInst, Value *DstAddr, + Value *CopyLen, Value *SetValue, LLVMContext &Context, + Function &F) { + BasicBlock *OrigBB = ConvertedInst->getParent(); + BasicBlock *NewBB = + ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split"); + BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB); + + OrigBB->getTerminator()->setSuccessor(0, LoopBB); + IRBuilder<> Builder(OrigBB->getTerminator()); // Cast pointer to the type of value getting stored - dstAddr = - builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS)); + unsigned dstAS = cast(DstAddr->getType())->getAddressSpace(); + DstAddr = Builder.CreateBitCast(DstAddr, + PointerType::get(SetValue->getType(), dstAS)); - IRBuilder<> loop(loopBB); - PHINode *ind = loop.CreatePHI(len->getType(), 0); - ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB); + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLen->getType(), 0); + LoopIndex->addIncoming(ConstantInt::get(CopyLen->getType(), 0), OrigBB); - loop.CreateStore(val, loop.CreateGEP(val->getType(), dstAddr, ind), false); + LoopBuilder.CreateStore( + SetValue, + LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), + false); - Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1)); - ind->addIncoming(newind, loopBB); + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLen->getType(), 1)); + LoopIndex->addIncoming(NewIndex, LoopBB); - loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB); + LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, + NewBB); } bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { - SmallVector aggrLoads; - SmallVector aggrMemcpys; - SmallVector aggrMemsets; + SmallVector AggrLoads; + SmallVector MemCalls; const DataLayout &DL = F.getParent()->getDataLayout(); LLVMContext &Context = F.getParent()->getContext(); - // - // Collect all the aggrLoads, aggrMemcpys and addrMemsets. - // + // Collect all aggregate loads and mem* calls. for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; ++II) { - if (LoadInst *load = dyn_cast(II)) { - if (!load->hasOneUse()) + if (LoadInst *LI = dyn_cast(II)) { + if (!LI->hasOneUse()) continue; - if (DL.getTypeStoreSize(load->getType()) < MaxAggrCopySize) + if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize) continue; - User *use = load->user_back(); - if (StoreInst *store = dyn_cast(use)) { - if (store->getOperand(0) != load) + if (StoreInst *SI = dyn_cast(LI->user_back())) { + if (SI->getOperand(0) != LI) continue; - aggrLoads.push_back(load); + AggrLoads.push_back(LI); } - } else if (MemTransferInst *intr = dyn_cast(II)) { - Value *len = intr->getLength(); - // If the number of elements being copied is greater - // than MaxAggrCopySize, lower it to a loop - if (ConstantInt *len_int = dyn_cast(len)) { - if (len_int->getZExtValue() >= MaxAggrCopySize) { - aggrMemcpys.push_back(intr); + } else if (MemIntrinsic *IntrCall = dyn_cast(II)) { + // Convert intrinsic calls with variable size or with constant size + // larger than the MaxAggrCopySize threshold. + if (ConstantInt *LenCI = dyn_cast(IntrCall->getLength())) { + if (LenCI->getZExtValue() >= MaxAggrCopySize) { + MemCalls.push_back(IntrCall); } } else { - // turn variable length memcpy/memmov into loop - aggrMemcpys.push_back(intr); - } - } else if (MemSetInst *memsetintr = dyn_cast(II)) { - Value *len = memsetintr->getLength(); - if (ConstantInt *len_int = dyn_cast(len)) { - if (len_int->getZExtValue() >= MaxAggrCopySize) { - aggrMemsets.push_back(memsetintr); - } - } else { - // turn variable length memset into loop - aggrMemsets.push_back(memsetintr); + MemCalls.push_back(IntrCall); } } } } - if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) && - (aggrMemsets.size() == 0)) + + if (AggrLoads.size() == 0 && MemCalls.size() == 0) { return false; + } // // Do the transformation of an aggr load/copy/set to a loop // - for (LoadInst *load : aggrLoads) { - StoreInst *store = dyn_cast(*load->user_begin()); - Value *srcAddr = load->getOperand(0); - Value *dstAddr = store->getOperand(1); - unsigned numLoads = DL.getTypeStoreSize(load->getType()); - Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads); + for (LoadInst *LI : AggrLoads) { + StoreInst *SI = dyn_cast(*LI->user_begin()); + Value *SrcAddr = LI->getOperand(0); + Value *DstAddr = SI->getOperand(1); + unsigned NumLoads = DL.getTypeStoreSize(LI->getType()); + Value *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads); - convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(), - store->isVolatile(), Context, F); + convertMemCpyToLoop(/* ConvertedInst */ SI, + /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, + /* CopyLen */ CopyLen, + /* SrcIsVolatile */ LI->isVolatile(), + /* DstIsVolatile */ SI->isVolatile(), + /* Context */ Context, + /* Function F */ F); - store->eraseFromParent(); - load->eraseFromParent(); + SI->eraseFromParent(); + LI->eraseFromParent(); } - for (MemTransferInst *cpy : aggrMemcpys) { - convertTransferToLoop(/* splitAt */ cpy, - /* srcAddr */ cpy->getSource(), - /* dstAddr */ cpy->getDest(), - /* len */ cpy->getLength(), - /* srcVolatile */ cpy->isVolatile(), - /* dstVolatile */ cpy->isVolatile(), + // Transform mem* intrinsic calls. + for (MemIntrinsic *MemCall : MemCalls) { + if (MemCpyInst *Memcpy = dyn_cast(MemCall)) { + convertMemCpyToLoop(/* ConvertedInst */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ Memcpy->getLength(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile(), /* Context */ Context, /* Function F */ F); - cpy->eraseFromParent(); - } + } else if (MemMoveInst *Memmove = dyn_cast(MemCall)) { + convertMemMoveToLoop(/* ConvertedInst */ Memmove, + /* SrcAddr */ Memmove->getRawSource(), + /* DstAddr */ Memmove->getRawDest(), + /* CopyLen */ Memmove->getLength(), + /* SrcIsVolatile */ Memmove->isVolatile(), + /* DstIsVolatile */ Memmove->isVolatile(), + /* Context */ Context, + /* Function F */ F); - for (MemSetInst *memsetinst : aggrMemsets) { - Value *len = memsetinst->getLength(); - Value *val = memsetinst->getValue(); - convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context, - F); - memsetinst->eraseFromParent(); + } else if (MemSetInst *Memset = dyn_cast(MemCall)) { + convertMemSetToLoop(/* ConvertedInst */ Memset, + /* DstAddr */ Memset->getRawDest(), + /* CopyLen */ Memset->getLength(), + /* SetValue */ Memset->getValue(), + /* Context */ Context, + /* Function F */ F); + } + MemCall->eraseFromParent(); } return true; } +} // namespace + +namespace llvm { +void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); +} + +INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies", + "Lower aggregate copies, and llvm.mem* intrinsics into loops", + false, false) + FunctionPass *llvm::createLowerAggrCopies() { return new NVPTXLowerAggrCopies(); } diff --git a/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/lib/Target/NVPTX/NVPTXLowerAlloca.cpp index 93d0025d8f53..624052e9b981 100644 --- a/lib/Target/NVPTX/NVPTXLowerAlloca.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAlloca.cpp @@ -81,7 +81,7 @@ bool NVPTXLowerAlloca::runOnBasicBlock(BasicBlock &BB) { // Check Load, Store, GEP, and BitCast Uses on alloca and make them // use the converted generic address, in order to expose non-generic // addrspacecast to NVPTXFavorNonGenericAddrSpace. For other types - // of instructions this is unecessary and may introduce redudant + // of instructions this is unnecessary and may introduce redundant // address cast. const auto &AllocaUse = *UI++; auto LI = dyn_cast(AllocaUse.getUser()); diff --git a/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp index b533f316d8a9..6656077348a1 100644 --- a/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp +++ b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp @@ -47,6 +47,36 @@ // ... // } // +// 3. Convert pointers in a byval kernel parameter to pointers in the global +// address space. As #2, it allows NVPTX to emit more ld/st.global. E.g., +// +// struct S { +// int *x; +// int *y; +// }; +// __global__ void foo(S s) { +// int *b = s.y; +// // use b +// } +// +// "b" points to the global address space. In the IR level, +// +// define void @foo({i32*, i32*}* byval %input) { +// %b_ptr = getelementptr {i32*, i32*}, {i32*, i32*}* %input, i64 0, i32 1 +// %b = load i32*, i32** %b_ptr +// ; use %b +// } +// +// becomes +// +// define void @foo({i32*, i32*}* byval %input) { +// %b_ptr = getelementptr {i32*, i32*}, {i32*, i32*}* %input, i64 0, i32 1 +// %b = load i32*, i32** %b_ptr +// %b_global = addrspacecast i32* %b to i32 addrspace(1)* +// %b_generic = addrspacecast i32 addrspace(1)* %b_global to i32* +// ; use %b_generic +// } +// // TODO: merge this pass with NVPTXFavorNonGenericAddrSpace so that other passes // don't cancel the addrspacecast pair this pass emits. //===----------------------------------------------------------------------===// @@ -54,6 +84,7 @@ #include "NVPTX.h" #include "NVPTXUtilities.h" #include "NVPTXTargetMachine.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -71,9 +102,12 @@ class NVPTXLowerKernelArgs : public FunctionPass { bool runOnFunction(Function &F) override; // handle byval parameters - void handleByValParam(Argument *); - // handle non-byval pointer parameters - void handlePointerParam(Argument *); + void handleByValParam(Argument *Arg); + // Knowing Ptr must point to the global address space, this function + // addrspacecasts Ptr to global and then back to generic. This allows + // NVPTXFavorNonGenericAddrSpace to fold the global-to-generic cast into + // loads/stores that appear later. + void markPointerAsGlobal(Value *Ptr); public: static char ID; // Pass identification, replacement for typeid @@ -104,7 +138,7 @@ INITIALIZE_PASS(NVPTXLowerKernelArgs, "nvptx-lower-kernel-args", // // The above code allocates some space in the stack and copies the incoming // struct from param space to local space. -// Then replace all occurences of %d by %temp. +// Then replace all occurrences of %d by %temp. // ============================================================================= void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) { Function *Func = Arg->getParent(); @@ -128,26 +162,32 @@ void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) { new StoreInst(LI, AllocA, FirstInst); } -void NVPTXLowerKernelArgs::handlePointerParam(Argument *Arg) { - assert(!Arg->hasByValAttr() && - "byval params should be handled by handleByValParam"); - - // Do nothing if the argument already points to the global address space. - if (Arg->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL) +void NVPTXLowerKernelArgs::markPointerAsGlobal(Value *Ptr) { + if (Ptr->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL) return; - Instruction *FirstInst = Arg->getParent()->getEntryBlock().begin(); - Instruction *ArgInGlobal = new AddrSpaceCastInst( - Arg, PointerType::get(Arg->getType()->getPointerElementType(), - ADDRESS_SPACE_GLOBAL), - Arg->getName(), FirstInst); - Value *ArgInGeneric = new AddrSpaceCastInst(ArgInGlobal, Arg->getType(), - Arg->getName(), FirstInst); - // Replace with ArgInGeneric all uses of Args except ArgInGlobal. - Arg->replaceAllUsesWith(ArgInGeneric); - ArgInGlobal->setOperand(0, Arg); -} + // Deciding where to emit the addrspacecast pair. + BasicBlock::iterator InsertPt; + if (Argument *Arg = dyn_cast(Ptr)) { + // Insert at the functon entry if Ptr is an argument. + InsertPt = Arg->getParent()->getEntryBlock().begin(); + } else { + // Insert right after Ptr if Ptr is an instruction. + InsertPt = ++cast(Ptr)->getIterator(); + assert(InsertPt != InsertPt->getParent()->end() && + "We don't call this function with Ptr being a terminator."); + } + Instruction *PtrInGlobal = new AddrSpaceCastInst( + Ptr, PointerType::get(Ptr->getType()->getPointerElementType(), + ADDRESS_SPACE_GLOBAL), + Ptr->getName(), &*InsertPt); + Value *PtrInGeneric = new AddrSpaceCastInst(PtrInGlobal, Ptr->getType(), + Ptr->getName(), &*InsertPt); + // Replace with PtrInGeneric all uses of Ptr except PtrInGlobal. + Ptr->replaceAllUsesWith(PtrInGeneric); + PtrInGlobal->setOperand(0, Ptr); +} // ============================================================================= // Main function for this pass. @@ -157,12 +197,32 @@ bool NVPTXLowerKernelArgs::runOnFunction(Function &F) { if (!isKernelFunction(F)) return false; + if (TM && TM->getDrvInterface() == NVPTX::CUDA) { + // Mark pointers in byval structs as global. + for (auto &B : F) { + for (auto &I : B) { + if (LoadInst *LI = dyn_cast(&I)) { + if (LI->getType()->isPointerTy()) { + Value *UO = GetUnderlyingObject(LI->getPointerOperand(), + F.getParent()->getDataLayout()); + if (Argument *Arg = dyn_cast(UO)) { + if (Arg->hasByValAttr()) { + // LI is a load from a pointer within a byval kernel parameter. + markPointerAsGlobal(LI); + } + } + } + } + } + } + } + for (Argument &Arg : F.args()) { if (Arg.getType()->isPointerTy()) { if (Arg.hasByValAttr()) handleByValParam(&Arg); else if (TM && TM->getDrvInterface() == NVPTX::CUDA) - handlePointerParam(&Arg); + markPointerAsGlobal(&Arg); } } return true; diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h index 46b4b33e7e40..81a606d7535c 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.h +++ b/lib/Target/NVPTX/NVPTXMCExpr.h @@ -68,7 +68,7 @@ public: return false; } void visitUsedExpr(MCStreamer &Streamer) const override {}; - MCSection *findAssociatedSection() const override { return nullptr; } + MCFragment *findAssociatedFragment() const override { return nullptr; } // There are no TLS NVPTXMCExprs at the moment. void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} @@ -110,7 +110,7 @@ public: return false; } void visitUsedExpr(MCStreamer &Streamer) const override {}; - MCSection *findAssociatedSection() const override { return nullptr; } + MCFragment *findAssociatedFragment() const override { return nullptr; } // There are no TLS NVPTXMCExprs at the moment. void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp index 5fd69a6815a8..17019d7b364d 100644 --- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -72,7 +72,7 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { // If last instruction is a return instruction, add an epilogue - if (!I->empty() && I->back().isReturn()) + if (I->isReturnBlock()) TFI.emitEpilogue(MF, *I); } diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h index 0d2627d62ebd..45a7309479ee 100644 --- a/lib/Target/NVPTX/NVPTXSection.h +++ b/lib/Target/NVPTX/NVPTXSection.h @@ -19,15 +19,14 @@ #include namespace llvm { -/// NVPTXSection - Represents a section in PTX -/// PTX does not have sections. We create this class in order to use -/// the ASMPrint interface. +/// Represents a section in PTX PTX does not have sections. We create this class +/// in order to use the ASMPrint interface. /// -class NVPTXSection : public MCSection { +class NVPTXSection final : public MCSection { virtual void anchor(); public: NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K, nullptr) {} - virtual ~NVPTXSection() {} + ~NVPTXSection() {} /// Override this as NVPTX has its own way of printing switching /// to a section. diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 248f9e117d83..aa931b134da9 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -53,6 +53,7 @@ void initializeGenericToNVVMPass(PassRegistry&); void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); +void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); void initializeNVPTXLowerKernelArgsPass(PassRegistry &); void initializeNVPTXLowerAllocaPass(PassRegistry &); } @@ -64,14 +65,15 @@ extern "C" void LLVMInitializeNVPTXTarget() { // FIXME: This pass is really intended to be invoked during IR optimization, // but it's very NVPTX-specific. - initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); - initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); - initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry()); - initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); - initializeNVPTXFavorNonGenericAddrSpacesPass( - *PassRegistry::getPassRegistry()); - initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry()); - initializeNVPTXLowerAllocaPass(*PassRegistry::getPassRegistry()); + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializeNVVMReflectPass(PR); + initializeGenericToNVVMPass(PR); + initializeNVPTXAllocaHoistingPass(PR); + initializeNVPTXAssignValidGlobalNamesPass(PR); + initializeNVPTXFavorNonGenericAddrSpacesPass(PR); + initializeNVPTXLowerKernelArgsPass(PR); + initializeNVPTXLowerAllocaPass(PR); + initializeNVPTXLowerAggrCopiesPass(PR); } static std::string computeDataLayout(bool is64Bit) { @@ -139,6 +141,10 @@ public: FunctionPass *createTargetRegisterAllocator(bool) override; void addFastRegAlloc(FunctionPass *RegAllocPass) override; void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; + +private: + // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE. + void addEarlyCSEOrGVNPass(); }; } // end anonymous namespace @@ -148,11 +154,18 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { } TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](Function &F) { + return TargetIRAnalysis([this](const Function &F) { return TargetTransformInfo(NVPTXTTIImpl(this, F)); }); } +void NVPTXPassConfig::addEarlyCSEOrGVNPass() { + if (getOptLevel() == CodeGenOpt::Aggressive) + addPass(createGVNPass()); + else + addPass(createEarlyCSEPass()); +} + void NVPTXPassConfig::addIRPasses() { // The following passes are known to not play well with virtual regs hanging // around after register allocation (which in our case, is *all* registers). @@ -161,13 +174,14 @@ void NVPTXPassConfig::addIRPasses() { // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). disablePass(&PrologEpilogCodeInserterID); disablePass(&MachineCopyPropagationID); - disablePass(&BranchFolderPassID); disablePass(&TailDuplicateID); + addPass(createNVVMReflectPass()); addPass(createNVPTXImageOptimizerPass()); - TargetPassConfig::addIRPasses(); addPass(createNVPTXAssignValidGlobalNamesPass()); addPass(createGenericToNVVMPass()); + + // === Propagate special address spaces === addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); // NVPTXLowerKernelArgs emits alloca for byval parameters which can often // be eliminated by SROA. @@ -178,22 +192,38 @@ void NVPTXPassConfig::addIRPasses() { // them unused. We could remove dead code in an ad-hoc manner, but that // requires manual work and might be error-prone. addPass(createDeadCodeEliminationPass()); + + // === Straight-line scalar optimizations === addPass(createSeparateConstOffsetFromGEPPass()); + addPass(createSpeculativeExecutionPass()); // ReassociateGEPs exposes more opportunites for SLSR. See // the example in reassociate-geps-and-slsr.ll. addPass(createStraightLineStrengthReducePass()); // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE // for some of our benchmarks. - if (getOptLevel() == CodeGenOpt::Aggressive) - addPass(createGVNPass()); - else - addPass(createEarlyCSEPass()); + addEarlyCSEOrGVNPass(); // Run NaryReassociate after EarlyCSE/GVN to be more effective. addPass(createNaryReassociatePass()); // NaryReassociate on GEPs creates redundant common expressions, so run // EarlyCSE after it. addPass(createEarlyCSEPass()); + + // === LSR and other generic IR passes === + TargetPassConfig::addIRPasses(); + // EarlyCSE is not always strong enough to clean up what LSR produces. For + // example, GVN can combine + // + // %0 = add %a, %b + // %1 = add %b, %a + // + // and + // + // %0 = shl nsw %a, 2 + // %1 = shl %a, 2 + // + // but EarlyCSE can do neither of them. + addEarlyCSEOrGVNPass(); } bool NVPTXPassConfig::addInstSelector() { diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h index 5ecdc8748830..0f88ddfaa934 100644 --- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -48,8 +48,7 @@ public: void Initialize(MCContext &ctx, const TargetMachine &TM) override { TargetLoweringObjectFile::Initialize(ctx, TM); TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText()); - DataSection = - new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel()); + DataSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getData()); BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS()); ReadOnlySection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly()); @@ -84,7 +83,7 @@ public: new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); } - MCSection *getSectionForConstant(SectionKind Kind, + MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, const Constant *C) const override { return ReadOnlySection; } diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index e7250cdba5ac..6e679dd0257c 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -89,12 +89,12 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) { return false; } -unsigned NVPTXTTIImpl::getArithmeticInstrCost( +int NVPTXTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); int ISD = TLI->InstructionOpcodeToISD(Opcode); diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 5bcd1e27a558..0946a3293eec 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -52,7 +52,7 @@ public: bool isSourceOfDivergence(const Value *V); - unsigned getArithmeticInstrCost( + int getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp index 1f178af41670..578b466568ae 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -335,106 +335,7 @@ bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) { return false; } -bool llvm::isBarrierIntrinsic(Intrinsic::ID id) { - if ((id == Intrinsic::nvvm_barrier0) || - (id == Intrinsic::nvvm_barrier0_popc) || - (id == Intrinsic::nvvm_barrier0_and) || - (id == Intrinsic::nvvm_barrier0_or) || - (id == Intrinsic::cuda_syncthreads)) - return true; - return false; -} - -// Interface for checking all memory space transfer related intrinsics -bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) { - if (id == Intrinsic::nvvm_ptr_local_to_gen || - id == Intrinsic::nvvm_ptr_shared_to_gen || - id == Intrinsic::nvvm_ptr_global_to_gen || - id == Intrinsic::nvvm_ptr_constant_to_gen || - id == Intrinsic::nvvm_ptr_gen_to_global || - id == Intrinsic::nvvm_ptr_gen_to_shared || - id == Intrinsic::nvvm_ptr_gen_to_local || - id == Intrinsic::nvvm_ptr_gen_to_constant || - id == Intrinsic::nvvm_ptr_gen_to_param) { - return true; - } - - return false; -} - -// consider several special intrinsics in striping pointer casts, and -// provide an option to ignore GEP indicies for find out the base address only -// which could be used in simple alias disambigurate. -const Value * -llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) { - V = V->stripPointerCasts(); - while (true) { - if (const IntrinsicInst *IS = dyn_cast(V)) { - if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) { - V = IS->getArgOperand(0)->stripPointerCasts(); - continue; - } - } else if (ignore_GEP_indices) - if (const GEPOperator *GEP = dyn_cast(V)) { - V = GEP->getPointerOperand()->stripPointerCasts(); - continue; - } - break; - } - return V; -} - -// consider several special intrinsics in striping pointer casts, and -// - ignore GEP indicies for find out the base address only, and -// - tracking PHINode -// which could be used in simple alias disambigurate. -const Value * -llvm::skipPointerTransfer(const Value *V, std::set &processed) { - if (processed.find(V) != processed.end()) - return nullptr; - processed.insert(V); - - const Value *V2 = V->stripPointerCasts(); - if (V2 != V && processed.find(V2) != processed.end()) - return nullptr; - processed.insert(V2); - - V = V2; - - while (true) { - if (const IntrinsicInst *IS = dyn_cast(V)) { - if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) { - V = IS->getArgOperand(0)->stripPointerCasts(); - continue; - } - } else if (const GEPOperator *GEP = dyn_cast(V)) { - V = GEP->getPointerOperand()->stripPointerCasts(); - continue; - } else if (const PHINode *PN = dyn_cast(V)) { - if (V != V2 && processed.find(V) != processed.end()) - return nullptr; - processed.insert(PN); - const Value *common = nullptr; - for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { - const Value *pv = PN->getIncomingValue(i); - const Value *base = skipPointerTransfer(pv, processed); - if (base) { - if (!common) - common = base; - else if (common != base) - return PN; - } - } - if (!common) - return PN; - V = common; - } - break; - } - return V; -} - -// The following are some useful utilities for debuggung +// The following are some useful utilities for debugging BasicBlock *llvm::getParentBlock(Value *v) { if (BasicBlock *B = dyn_cast(v)) @@ -466,7 +367,7 @@ void llvm::dumpBlock(Value *v, char *blockName) { return; for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) { - BasicBlock *B = it; + BasicBlock *B = &*it; if (strcmp(B->getName().data(), blockName) == 0) { B->dump(); return; @@ -490,7 +391,7 @@ Instruction *llvm::getInst(Value *base, char *instName) { return nullptr; } -// Dump an instruction by nane +// Dump an instruction by name void llvm::dumpInst(Value *base, char *instName) { Instruction *I = getInst(base, instName); if (I) diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h index 7e2ce73daaa3..a5262cb7412f 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.h +++ b/lib/Target/NVPTX/NVPTXUtilities.h @@ -61,27 +61,6 @@ bool isKernelFunction(const llvm::Function &); bool getAlign(const llvm::Function &, unsigned index, unsigned &); bool getAlign(const llvm::CallInst &, unsigned index, unsigned &); -bool isBarrierIntrinsic(llvm::Intrinsic::ID); - -/// make_vector - Helper function which is useful for building temporary vectors -/// to pass into type construction of CallInst ctors. This turns a null -/// terminated list of pointers (or other value types) into a real live vector. -/// -template inline std::vector make_vector(T A, ...) { - va_list Args; - va_start(Args, A); - std::vector Result; - Result.push_back(A); - while (T Val = va_arg(Args, T)) - Result.push_back(Val); - va_end(Args); - return Result; -} - -bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id); -const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices); -const Value * -skipPointerTransfer(const Value *V, std::set &processed); BasicBlock *getParentBlock(Value *v); Function *getParentFunction(Value *v); void dumpBlock(Value *v, char *blockName); diff --git a/lib/Target/NVPTX/NVPTXVector.td b/lib/Target/NVPTX/NVPTXVector.td index a237247e4833..e69bbba9f193 100644 --- a/lib/Target/NVPTX/NVPTXVector.td +++ b/lib/Target/NVPTX/NVPTXVector.td @@ -26,7 +26,7 @@ let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in { def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), (ins V2I16Regs:$src, i8imm:$c), "mov.u16 \t$dst, $src${c:vecelem};", - [(set Int16Regs:$dst, (vector_extract + [(set Int16Regs:$dst, (extractelt (v2i16 V2I16Regs:$src), imm:$c))], IMOV16rr>; @@ -34,7 +34,7 @@ def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), (ins V4I16Regs:$src, i8imm:$c), "mov.u16 \t$dst, $src${c:vecelem};", - [(set Int16Regs:$dst, (vector_extract + [(set Int16Regs:$dst, (extractelt (v4i16 V4I16Regs:$src), imm:$c))], IMOV16rr>; @@ -42,7 +42,7 @@ def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), (ins V2I8Regs:$src, i8imm:$c), "mov.u16 \t$dst, $src${c:vecelem};", - [(set Int8Regs:$dst, (vector_extract + [(set Int8Regs:$dst, (extractelt (v2i8 V2I8Regs:$src), imm:$c))], IMOV8rr>; @@ -50,7 +50,7 @@ def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), (ins V4I8Regs:$src, i8imm:$c), "mov.u16 \t$dst, $src${c:vecelem};", - [(set Int8Regs:$dst, (vector_extract + [(set Int8Regs:$dst, (extractelt (v4i8 V4I8Regs:$src), imm:$c))], IMOV8rr>; @@ -58,7 +58,7 @@ def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), (ins V2I32Regs:$src, i8imm:$c), "mov.u32 \t$dst, $src${c:vecelem};", - [(set Int32Regs:$dst, (vector_extract + [(set Int32Regs:$dst, (extractelt (v2i32 V2I32Regs:$src), imm:$c))], IMOV32rr>; @@ -66,7 +66,7 @@ def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), (ins V2F32Regs:$src, i8imm:$c), "mov.f32 \t$dst, $src${c:vecelem};", - [(set Float32Regs:$dst, (vector_extract + [(set Float32Regs:$dst, (extractelt (v2f32 V2F32Regs:$src), imm:$c))], FMOV32rr>; @@ -74,7 +74,7 @@ def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst), (ins V2I64Regs:$src, i8imm:$c), "mov.u64 \t$dst, $src${c:vecelem};", - [(set Int64Regs:$dst, (vector_extract + [(set Int64Regs:$dst, (extractelt (v2i64 V2I64Regs:$src), imm:$c))], IMOV64rr>; @@ -82,7 +82,7 @@ def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst), def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst), (ins V2F64Regs:$src, i8imm:$c), "mov.f64 \t$dst, $src${c:vecelem};", - [(set Float64Regs:$dst, (vector_extract + [(set Float64Regs:$dst, (extractelt (v2f64 V2F64Regs:$src), imm:$c))], FMOV64rr>; @@ -90,7 +90,7 @@ def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst), def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), (ins V4I32Regs:$src, i8imm:$c), "mov.u32 \t$dst, $src${c:vecelem};", - [(set Int32Regs:$dst, (vector_extract + [(set Int32Regs:$dst, (extractelt (v4i32 V4I32Regs:$src), imm:$c))], IMOV32rr>; @@ -98,7 +98,7 @@ def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), (ins V4F32Regs:$src, i8imm:$c), "mov.f32 \t$dst, $src${c:vecelem};", - [(set Float32Regs:$dst, (vector_extract + [(set Float32Regs:$dst, (extractelt (v4f32 V4F32Regs:$src), imm:$c))], FMOV32rr>; } @@ -110,8 +110,7 @@ def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst), "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.u16 \t$dst${c:vecelem}, $val;", [(set V2I8Regs:$dst, - (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))], - IMOV8rr>; + (insertelt V2I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>; // Insert v4i8 def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst), @@ -119,8 +118,7 @@ def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst), "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.u16 \t$dst${c:vecelem}, $val;", [(set V4I8Regs:$dst, - (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))], - IMOV8rr>; + (insertelt V4I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>; // Insert v2i16 def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst), @@ -128,8 +126,8 @@ def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst), "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.u16 \t$dst${c:vecelem}, $val;", [(set V2I16Regs:$dst, - (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))], - IMOV16rr>; + (insertelt V2I16Regs:$src, Int16Regs:$val, imm:$c))], + IMOV16rr>; // Insert v4i16 def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst), @@ -137,8 +135,8 @@ def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst), "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.u16 \t$dst${c:vecelem}, $val;", [(set V4I16Regs:$dst, - (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))], - IMOV16rr>; + (insertelt V4I16Regs:$src, Int16Regs:$val, imm:$c))], + IMOV16rr>; // Insert v2i32 def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst), @@ -146,8 +144,8 @@ def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst), "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.u32 \t$dst${c:vecelem}, $val;", [(set V2I32Regs:$dst, - (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))], - IMOV32rr>; + (insertelt V2I32Regs:$src, Int32Regs:$val, imm:$c))], + IMOV32rr>; // Insert v2f32 def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst), @@ -155,8 +153,8 @@ def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst), "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.f32 \t$dst${c:vecelem}, $val;", [(set V2F32Regs:$dst, - (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))], - FMOV32rr>; + (insertelt V2F32Regs:$src, Float32Regs:$val, imm:$c))], + FMOV32rr>; // Insert v2i64 def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst), @@ -164,8 +162,8 @@ def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst), "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.u64 \t$dst${c:vecelem}, $val;", [(set V2I64Regs:$dst, - (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))], - IMOV64rr>; + (insertelt V2I64Regs:$src, Int64Regs:$val, imm:$c))], + IMOV64rr>; // Insert v2f64 def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst), @@ -173,8 +171,8 @@ def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst), "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.f64 \t$dst${c:vecelem}, $val;", [(set V2F64Regs:$dst, - (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))], - FMOV64rr>; + (insertelt V2F64Regs:$src, Float64Regs:$val, imm:$c))], + FMOV64rr>; // Insert v4i32 def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst), @@ -182,8 +180,8 @@ def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst), "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.u32 \t$dst${c:vecelem}, $val;", [(set V4I32Regs:$dst, - (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))], - IMOV32rr>; + (insertelt V4I32Regs:$src, Int32Regs:$val, imm:$c))], + IMOV32rr>; // Insert v4f32 def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst), @@ -191,8 +189,8 @@ def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst), "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};" "\n\tmov.f32 \t$dst${c:vecelem}, $val;", [(set V4F32Regs:$dst, - (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))], - FMOV32rr>; + (insertelt V4F32Regs:$src, Float32Regs:$val, imm:$c))], + FMOV32rr>; } class BinOpAsmString { diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp index 5e375b7852e4..20ab5db584d2 100644 --- a/lib/Target/NVPTX/NVVMReflect.cpp +++ b/lib/Target/NVPTX/NVVMReflect.cpp @@ -109,10 +109,10 @@ void NVVMReflect::setVarMap() { for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) { DEBUG(dbgs() << "Option : " << ReflectList[i] << "\n"); SmallVector NameValList; - StringRef(ReflectList[i]).split(NameValList, ","); + StringRef(ReflectList[i]).split(NameValList, ','); for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) { SmallVector NameValPair; - NameValList[j].split(NameValPair, "="); + NameValList[j].split(NameValPair, '='); assert(NameValPair.size() == 2 && "name=val expected"); std::stringstream ValStream(NameValPair[1]); int Val; diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index a699a55d3cbf..220c70a48542 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -243,7 +243,6 @@ namespace { struct PPCOperand; class PPCAsmParser : public MCTargetAsmParser { - MCSubtargetInfo &STI; const MCInstrInfo &MII; bool IsPPC64; bool IsDarwin; @@ -291,9 +290,9 @@ class PPCAsmParser : public MCTargetAsmParser { public: - PPCAsmParser(MCSubtargetInfo &STI, MCAsmParser &, const MCInstrInfo &MII, - const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(STI), MII(MII) { + PPCAsmParser(const MCSubtargetInfo &STI, MCAsmParser &, + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(Options, STI), MII(MII) { // Check for 64-bit vs. 32-bit pointer mode. Triple TheTriple(STI.getTargetTriple()); IsPPC64 = (TheTriple.getArch() == Triple::ppc64 || @@ -1185,7 +1184,7 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst, break; } case PPC::MFTB: { - if (STI.getFeatureBits()[PPC::FeatureMFTB]) { + if (getSTI().getFeatureBits()[PPC::FeatureMFTB]) { assert(Inst.getNumOperands() == 2 && "Expecting two operands"); Inst.setOpcode(PPC::MFSPR); } @@ -1205,7 +1204,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // Post-process instructions (typically extended mnemonics) ProcessInstruction(Inst, Operands); Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst, STI); + Out.EmitInstruction(Inst, getSTI()); return false; case Match_MissingFeature: return Error(IDLoc, "instruction use requires an option to be enabled"); @@ -1690,7 +1689,7 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // where th can be omitted when it is 0. dcbtst is the same. We take the // server form to be the default, so swap the operands if we're parsing for // an embedded core (they'll be swapped again upon printing). - if (STI.getFeatureBits()[PPC::FeatureBookE] && + if (getSTI().getFeatureBits()[PPC::FeatureBookE] && Operands.size() == 4 && (Name == "dcbt" || Name == "dcbtst")) { std::swap(Operands[1], Operands[3]); @@ -1730,10 +1729,19 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; + SMLoc ExprLoc = getLexer().getLoc(); if (getParser().parseExpression(Value)) return false; - getParser().getStreamer().EmitValue(Value, Size); + if (const auto *MCE = dyn_cast(Value)) { + assert(Size <= 8 && "Invalid size"); + uint64_t IntValue = MCE->getValue(); + if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue)) + return Error(ExprLoc, "literal value out of range for directive"); + getStreamer().EmitIntValue(IntValue, Size); + } else { + getStreamer().EmitValue(Value, Size, ExprLoc); + } if (getLexer().is(AsmToken::EndOfStatement)) break; diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index c0c83cc258b8..c31ababafbe7 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -13,6 +13,7 @@ tablegen(LLVM PPCGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(PowerPCCommonTableGen) add_llvm_target(PowerPCCodeGen + PPCBoolRetToInt.cpp PPCAsmPrinter.cpp PPCBranchSelector.cpp PPCCTRLoops.cpp @@ -27,6 +28,7 @@ add_llvm_target(PowerPCCodeGen PPCLoopPreIncPrep.cpp PPCMCInstLower.cpp PPCMachineFunctionInfo.cpp + PPCMIPeephole.cpp PPCRegisterInfo.cpp PPCSubtarget.cpp PPCTargetMachine.cpp diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 93a503c3758d..1fc84fb76551 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -401,8 +401,6 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size, decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI); if (result != MCDisassembler::Fail) return result; - - MI.clear(); } return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI); diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 8e1878344302..53eb727d0b07 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -18,8 +18,6 @@ namespace llvm { -class MCOperand; - class PPCInstPrinter : public MCInstPrinter { bool IsDarwin; public: diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 992be5b966c1..dd994956870f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -113,6 +113,10 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, break; } break; + case PPC::fixup_ppc_half16ds: + Target.print(errs()); + errs() << '\n'; + report_fatal_error("Invalid PC-relative half16ds relocation"); case FK_Data_4: case FK_PCRel_4: Type = ELF::R_PPC_REL32; @@ -305,13 +309,13 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, break; case MCSymbolRefExpr::VK_GOT: Type = ELF::R_PPC64_GOT16_DS; - break; + break; case MCSymbolRefExpr::VK_PPC_GOT_LO: Type = ELF::R_PPC64_GOT16_LO_DS; break; case MCSymbolRefExpr::VK_PPC_TOC: Type = ELF::R_PPC64_TOC16_DS; - break; + break; case MCSymbolRefExpr::VK_PPC_TOC_LO: Type = ELF::R_PPC64_TOC16_LO_DS; break; @@ -372,16 +376,16 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, break; case MCSymbolRefExpr::VK_None: Type = ELF::R_PPC64_ADDR64; - break; + break; case MCSymbolRefExpr::VK_PPC_DTPMOD: Type = ELF::R_PPC64_DTPMOD64; - break; + break; case MCSymbolRefExpr::VK_PPC_TPREL: Type = ELF::R_PPC64_TPREL64; - break; + break; case MCSymbolRefExpr::VK_PPC_DTPREL: Type = ELF::R_PPC64_DTPREL64; - break; + break; } break; case FK_Data_4: diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h index 86ad3859b72c..e252ac944d40 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h @@ -20,18 +20,19 @@ namespace llvm { class Triple; - class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin { - virtual void anchor(); +class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin { + virtual void anchor(); - public: - explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&); - }; +public: + explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple &); +}; - class PPCELFMCAsmInfo : public MCAsmInfoELF { - void anchor() override; - public: - explicit PPCELFMCAsmInfo(bool is64Bit, const Triple&); - }; +class PPCELFMCAsmInfo : public MCAsmInfoELF { + void anchor() override; + +public: + explicit PPCELFMCAsmInfo(bool is64Bit, const Triple &); +}; } // namespace llvm diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index a641780516b3..d42a111cc43e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -82,8 +82,8 @@ public: const MCAsmLayout *Layout, const MCFixup *Fixup) const override; void visitUsedExpr(MCStreamer &Streamer) const override; - MCSection *findAssociatedSection() const override { - return getSubExpr()->findAssociatedSection(); + MCFragment *findAssociatedFragment() const override { + return getSubExpr()->findAssociatedFragment(); } // There are no TLS PPCMCExprs at the moment. diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index 9d7289658f0f..b54a0e1b86b1 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -241,12 +241,12 @@ bool PPCMachObjectWriter::recordScatteredRelocation( if (FixupOffset > 0xffffff) { char Buffer[32]; format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer)); - Asm.getContext().reportFatalError(Fixup.getLoc(), + Asm.getContext().reportError(Fixup.getLoc(), Twine("Section too large, can't encode " "r_address (") + Buffer + ") into 24 bits of scattered " "relocation entry."); - llvm_unreachable("fatal error returned?!"); + return false; } // Is this supposed to follow MCTarget/PPCAsmBackend.cpp:adjustFixupValue()? diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index 6075631a541f..acea600fbb0d 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -56,6 +56,14 @@ namespace PPC { PRED_BIT_UNSET = 1025 }; + // Bit for branch taken (plus) or not-taken (minus) hint + enum BranchHintBit { + BR_NO_HINT = 0x0, + BR_NONTAKEN_HINT = 0x2, + BR_TAKEN_HINT = 0x3, + BR_HINT_MASK = 0X3 + }; + /// Invert the specified predicate. != -> ==, < -> >=. Predicate InvertPredicate(Predicate Opcode); diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index ae8d8b4f5dfe..a259ed3fd327 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -41,13 +41,16 @@ namespace llvm { FunctionPass *createPPCVSXCopyPass(); FunctionPass *createPPCVSXFMAMutatePass(); FunctionPass *createPPCVSXSwapRemovalPass(); + FunctionPass *createPPCMIPeepholePass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCTLSDynamicCallPass(); + FunctionPass *createPPCBoolRetToIntPass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); void initializePPCVSXFMAMutatePass(PassRegistry&); + void initializePPCBoolRetToIntPass(PassRegistry&); extern char &PPCVSXFMAMutateID; namespace PPCII { diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 641b2377de40..b03be12cfd97 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -50,6 +50,8 @@ def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; +def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", + "Use software emulation for floating point">; def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true", "Enable 64-bit registers usage for ppc32 [beta]">; def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true", @@ -137,6 +139,12 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true", "Enable Hardware Transactional Memory instructions">; def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true", "Implement mftb using the mfspr instruction">; +def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true", + "Target supports add/load integer fusion.">; +def FeatureFloat128 : + SubtargetFeature<"float128", "HasFloat128", "true", + "Enable the __float128 data type for IEEE-754R Binary128.", + [FeatureVSX]>; def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", "Treat vector data stream cache control instructions as deprecated">; @@ -168,7 +176,8 @@ def ProcessorFeatures { FeatureMFTB, DeprecatedDST]; list Power8SpecificFeatures = [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto, - FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic]; + FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic, + FeatureFusion]; list Power8FeatureList = !listconcat(Power7FeatureList, Power8SpecificFeatures); } @@ -309,7 +318,7 @@ def : ProcessorModel<"g5", G5Model, Feature64Bit /*, Feature64BitRegs */, FeatureMFTB, DeprecatedDST]>; def : ProcessorModel<"e500mc", PPCE500mcModel, - [DirectiveE500mc, FeatureMFOCRF, + [DirectiveE500mc, FeatureSTFIWX, FeatureICBT, FeatureBookE, FeatureISEL, FeatureMFTB]>; def : ProcessorModel<"e5500", PPCE5500Model, @@ -403,6 +412,7 @@ def PPCAsmParserVariant : AsmParserVariant { // InstAlias definitions use immediate literals. Set RegisterPrefix // so that those are not misinterpreted as registers. string RegisterPrefix = "%"; + string BreakCharacters = "."; } def PPC : Target { diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 8e118ec27e67..9a63c14b5053 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -65,19 +65,20 @@ using namespace llvm; #define DEBUG_TYPE "asmprinter" namespace { - class PPCAsmPrinter : public AsmPrinter { - protected: - MapVector TOC; - const PPCSubtarget *Subtarget; - StackMaps SM; - public: - explicit PPCAsmPrinter(TargetMachine &TM, - std::unique_ptr Streamer) - : AsmPrinter(TM, std::move(Streamer)), SM(*this) {} +class PPCAsmPrinter : public AsmPrinter { +protected: + MapVector TOC; + const PPCSubtarget *Subtarget; + StackMaps SM; - const char *getPassName() const override { - return "PowerPC Assembly Printer"; - } +public: + explicit PPCAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)), SM(*this) {} + + const char *getPassName() const override { + return "PowerPC Assembly Printer"; + } MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym); @@ -94,10 +95,8 @@ namespace { void EmitEndOfAsmFile(Module &M) override; - void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, - const MachineInstr &MI); - void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, - const MachineInstr &MI); + void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI); + void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI); void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK); bool runOnMachineFunction(MachineFunction &MF) override { Subtarget = &MF.getSubtarget(); @@ -157,15 +156,15 @@ static const char *stripRegisterPrefix(const char *RegName) { return RegName + 1; case 'c': if (RegName[1] == 'r') return RegName + 2; } - + return RegName; } void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); const MachineOperand &MO = MI->getOperand(OpNo); - + switch (MO.getType()) { case MachineOperand::MO_Register: { const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg()); @@ -184,8 +183,8 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, MO.getMBB()->getSymbol()->print(O, MAI); return; case MachineOperand::MO_ConstantPoolIndex: - O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() - << '_' << MO.getIndex(); + O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' + << MO.getIndex(); return; case MachineOperand::MO_BlockAddress: GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI); @@ -200,19 +199,19 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, !GV->isStrongDefinitionForLinker()) { if (!GV->hasHiddenVisibility()) { SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo() - .getGVStubEntry(SymToPrint); + MachineModuleInfoImpl::StubValueTy &StubSym = + MMI->getObjFileInfo().getGVStubEntry( + SymToPrint); if (!StubSym.getPointer()) StubSym = MachineModuleInfoImpl:: StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); } else if (GV->isDeclaration() || GV->hasCommonLinkage() || GV->hasAvailableExternallyLinkage()) { SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - - MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo(). - getHiddenGVStubEntry(SymToPrint); + + MachineModuleInfoImpl::StubValueTy &StubSym = + MMI->getObjFileInfo().getHiddenGVStubEntry( + SymToPrint); if (!StubSym.getPointer()) StubSym = MachineModuleInfoImpl:: StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); @@ -295,16 +294,16 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, } case 'U': // Print 'u' for update form. case 'X': // Print 'x' for indexed form. - { - // FIXME: Currently for PowerPC memory operands are always loaded - // into a register, so we never get an update or indexed form. - // This is bad even for offset forms, since even if we know we - // have a value in -16(r1), we will generate a load into r - // and then load from 0(r). Until that issue is fixed, - // tolerate 'U' and 'X' but don't output anything. - assert(MI->getOperand(OpNo).isReg()); - return false; - } + { + // FIXME: Currently for PowerPC memory operands are always loaded + // into a register, so we never get an update or indexed form. + // This is bad even for offset forms, since even if we know we + // have a value in -16(r1), we will generate a load into r + // and then load from 0(r). Until that issue is fixed, + // tolerate 'U' and 'X' but don't output anything. + assert(MI->getOperand(OpNo).isReg()); + return false; + } } } @@ -315,7 +314,6 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, return false; } - /// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry /// exists for it. If not, create one. Then return a symbol that references /// the TOC entry. @@ -330,8 +328,7 @@ void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) { SM.serializeToStackMapSection(); } -void PPCAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, - const MachineInstr &MI) { +void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) { unsigned NumNOPBytes = MI.getOperand(1).getImm(); SM.recordStackMap(MI); @@ -353,13 +350,12 @@ void PPCAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, // Emit nops. for (unsigned i = 0; i < NumNOPBytes; i += 4) - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP)); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP)); } // Lower a patchpoint of the form: // [], , , , -void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, - const MachineInstr &MI) { +void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) { SM.recordPatchPoint(MI); PatchPointOpers Opers(&MI); @@ -375,60 +371,59 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); EncodedBytes = 0; // Materialize the jump address: - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8) + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI8) .addReg(ScratchReg) .addImm((CallTarget >> 32) & 0xFFFF)); ++EncodedBytes; - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC) + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::RLDIC) .addReg(ScratchReg) .addReg(ScratchReg) .addImm(32).addImm(16)); ++EncodedBytes; - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8) + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ORIS8) .addReg(ScratchReg) .addReg(ScratchReg) .addImm((CallTarget >> 16) & 0xFFFF)); ++EncodedBytes; - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8) + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ORI8) .addReg(ScratchReg) .addReg(ScratchReg) .addImm(CallTarget & 0xFFFF)); // Save the current TOC pointer before the remote call. int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40; - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::STD) + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::STD) .addReg(PPC::X2) .addImm(TOCSaveOffset) .addReg(PPC::X1)); ++EncodedBytes; - // If we're on ELFv1, then we need to load the actual function pointer // from the function descriptor. if (!Subtarget->isELFv2ABI()) { - // Load the new TOC pointer and the function address, but not r11 - // (needing this is rare, and loading it here would prevent passing it - // via a 'nest' parameter. - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD) + // Load the new TOC pointer and the function address, but not r11 + // (needing this is rare, and loading it here would prevent passing it + // via a 'nest' parameter. + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD) .addReg(PPC::X2) .addImm(8) .addReg(ScratchReg)); ++EncodedBytes; - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD) + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD) .addReg(ScratchReg) .addImm(0) .addReg(ScratchReg)); ++EncodedBytes; } - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8) + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTCTR8) .addReg(ScratchReg)); ++EncodedBytes; - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8)); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCTRL8)); ++EncodedBytes; // Restore the TOC pointer after the call. - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD) + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD) .addReg(PPC::X2) .addImm(TOCSaveOffset) .addReg(PPC::X1)); @@ -439,7 +434,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, MCSymbol *MOSymbol = getSymbol(GValue); const MCExpr *SymVar = MCSymbolRefExpr::create(MOSymbol, OutContext); - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP) + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL8_NOP) .addExpr(SymVar)); EncodedBytes += 2; } @@ -454,7 +449,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, assert((NumBytes - EncodedBytes) % 4 == 0 && "Invalid number of NOP bytes requested!"); for (unsigned i = EncodedBytes; i < NumBytes; i += 4) - EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP)); + EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP)); } /// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a @@ -499,16 +494,16 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { bool isDarwin = TM.getTargetTriple().isOSDarwin(); const Module *M = MF->getFunction()->getParent(); PICLevel::Level PL = M->getPICLevel(); - + // Lower multi-instruction pseudo operations. switch (MI->getOpcode()) { default: break; case TargetOpcode::DBG_VALUE: llvm_unreachable("Should be handled target independently"); case TargetOpcode::STACKMAP: - return LowerSTACKMAP(*OutStreamer, SM, *MI); + return LowerSTACKMAP(SM, *MI); case TargetOpcode::PATCHPOINT: - return LowerPATCHPOINT(*OutStreamer, SM, *MI); + return LowerPATCHPOINT(SM, *MI); case PPC::MoveGOTtoLR: { // Transform %LR = MoveGOTtoLR @@ -533,17 +528,18 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::MovePCtoLR: case PPC::MovePCtoLR8: { // Transform %LR = MovePCtoLR - // Into this, where the label is the PIC base: + // Into this, where the label is the PIC base: // bl L1$pb // L1$pb: MCSymbol *PICBase = MF->getPICBaseSymbol(); - + // Emit the 'bl'. - EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL) - // FIXME: We would like an efficient form for this, so we don't have to do - // a lot of extra uniquing. - .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); - + EmitToStreamer(*OutStreamer, + MCInstBuilder(PPC::BL) + // FIXME: We would like an efficient form for this, so we + // don't have to do a lot of extra uniquing. + .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); + // Emit the label. OutStreamer->EmitLabel(PICBase); return; @@ -654,7 +650,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, TmpInst); return; } - + case PPC::ADDIStocHA: { // Transform %Xd = ADDIStocHA %X2, LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); @@ -669,28 +665,22 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MO.isBlockAddress()) && "Invalid operand for ADDIStocHA!"); MCSymbol *MOSymbol = nullptr; - bool IsExternal = false; - bool IsNonLocalFunction = false; - bool IsCommon = false; - bool IsAvailExt = false; + bool GlobalToc = false; if (MO.isGlobal()) { const GlobalValue *GV = MO.getGlobal(); MOSymbol = getSymbol(GV); - IsExternal = GV->isDeclaration(); - IsCommon = GV->hasCommonLinkage(); - IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() && - !GV->isStrongDefinitionForLinker(); - IsAvailExt = GV->hasAvailableExternallyLinkage(); - } else if (MO.isCPI()) + unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); + GlobalToc = (GVFlags & PPCII::MO_NLP_FLAG); + } else if (MO.isCPI()) { MOSymbol = GetCPISymbol(MO.getIndex()); - else if (MO.isJTI()) + } else if (MO.isJTI()) { MOSymbol = GetJTISymbol(MO.getIndex()); - else if (MO.isBlockAddress()) + } else if (MO.isBlockAddress()) { MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); + } - if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt || - MO.isJTI() || MO.isBlockAddress() || + if (GlobalToc || MO.isJTI() || MO.isBlockAddress() || TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); @@ -727,13 +717,14 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); } else if (MO.isGlobal()) { - const GlobalValue *GValue = MO.getGlobal(); - MOSymbol = getSymbol(GValue); - if (GValue->getType()->getElementType()->isFunctionTy() || - GValue->isDeclaration() || GValue->hasCommonLinkage() || - GValue->hasAvailableExternallyLinkage() || - TM.getCodeModel() == CodeModel::Large) - MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + const GlobalValue *GV = MO.getGlobal(); + MOSymbol = getSymbol(GV); + DEBUG( + unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); + assert((GVFlags & PPCII::MO_NLP_FLAG) && + "LDtocL used on symbol that could be accessed directly is " + "invalid. Must match ADDIStocHA.")); + MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); } const MCExpr *Exp = @@ -754,21 +745,18 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MachineOperand &MO = MI->getOperand(2); assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL"); MCSymbol *MOSymbol = nullptr; - bool IsExternal = false; - bool IsNonLocalFunction = false; if (MO.isGlobal()) { const GlobalValue *GV = MO.getGlobal(); + DEBUG( + unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); + assert ( + !(GVFlags & PPCII::MO_NLP_FLAG) && + "Interposable definitions must use indirect access.")); MOSymbol = getSymbol(GV); - IsExternal = GV->isDeclaration(); - IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() && - !GV->isStrongDefinitionForLinker(); - } else if (MO.isCPI()) + } else if (MO.isCPI()) { MOSymbol = GetCPISymbol(MO.getIndex()); - - if (IsNonLocalFunction || IsExternal || - TM.getCodeModel() == CodeModel::Large) - MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + } const MCExpr *Exp = MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO, @@ -840,13 +828,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::PPC32GOT: { - MCSymbol *GOTSymbol = OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); - const MCExpr *SymGotTlsL = - MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO, - OutContext); - const MCExpr *SymGotTlsHA = - MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA, - OutContext); + MCSymbol *GOTSymbol = + OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); + const MCExpr *SymGotTlsL = MCSymbolRefExpr::create( + GOTSymbol, MCSymbolRefExpr::VK_PPC_LO, OutContext); + const MCExpr *SymGotTlsHA = MCSymbolRefExpr::create( + GOTSymbol, MCSymbolRefExpr::VK_PPC_HA, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI) .addReg(MI->getOperand(0).getReg()) .addExpr(SymGotTlsL)); @@ -1079,14 +1066,14 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) { void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { // linux/ppc32 - Normal entry label. - if (!Subtarget->isPPC64() && - (TM.getRelocationModel() != Reloc::PIC_ || + if (!Subtarget->isPPC64() && + (TM.getRelocationModel() != Reloc::PIC_ || MF->getFunction()->getParent()->getPICLevel() == PICLevel::Small)) return AsmPrinter::EmitFunctionEntryLabel(); if (!Subtarget->isPPC64()) { const PPCFunctionInfo *PPCFI = MF->getInfo(); - if (PPCFI->usesPICBase()) { + if (PPCFI->usesPICBase()) { MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol(); MCSymbol *PICBase = MF->getPICBaseSymbol(); OutStreamer->EmitLabel(RelocSymbol); @@ -1130,11 +1117,10 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { OutStreamer->SwitchSection(Current.first, Current.second); } - bool PPCLinuxAsmPrinter::doFinalization(Module &M) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); - bool isPPC64 = TD->getPointerSizeInBits() == 64; + bool isPPC64 = DL.getPointerSizeInBits() == 64; PPCTargetStreamer &TS = static_cast(*OutStreamer->getTargetStreamer()); @@ -1293,8 +1279,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { // Prime text sections so they are adjacent. This reduces the likelihood a // large data or debug section causes a branch to exceed 16M limit. - const TargetLoweringObjectFileMachO &TLOFMacho = - static_cast(getObjFileLowering()); + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast(getObjFileLowering()); OutStreamer->SwitchSection(TLOFMacho.getTextCoalSection()); if (TM.getRelocationModel() == Reloc::PIC_) { OutStreamer->SwitchSection( @@ -1325,7 +1311,7 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) { void PPCDarwinAsmPrinter:: EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { - bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64; + bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64; // Construct a local MCSubtargetInfo and shadow EmitToStreamer here. // This is because the MachineFunction won't exist (but have not yet been @@ -1338,8 +1324,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { S.EmitInstruction(Inst, *STI); }; - const TargetLoweringObjectFileMachO &TLOFMacho = - static_cast(getObjFileLowering()); + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast(getObjFileLowering()); // .lazy_symbol_pointer MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection(); @@ -1353,12 +1339,12 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer->SwitchSection(StubSection); EmitAlignment(4); - + MCSymbol *Stub = Stubs[i].first; MCSymbol *RawSym = Stubs[i].second.getPointer(); MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext); MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext); - + OutStreamer->EmitLabel(Stub); OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); @@ -1463,20 +1449,19 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 4); } } - + OutStreamer->AddBlankLine(); } - bool PPCDarwinAsmPrinter::doFinalization(Module &M) { - bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64; + bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64; // Darwin/PPC always uses mach-o. - const TargetLoweringObjectFileMachO &TLOFMacho = - static_cast(getObjFileLowering()); + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast(getObjFileLowering()); MachineModuleInfoMachO &MMIMacho = - MMI->getObjFileInfo(); - + MMI->getObjFileInfo(); + MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList(); if (!Stubs.empty()) EmitFunctionStubs(Stubs); @@ -1484,27 +1469,27 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { if (MAI->doesSupportExceptionHandling() && MMI) { // Add the (possibly multiple) personalities to the set of global values. // Only referenced functions get into the Personalities list. - const std::vector &Personalities = MMI->getPersonalities(); - for (std::vector::const_iterator I = Personalities.begin(), - E = Personalities.end(); I != E; ++I) { - if (*I) { - MCSymbol *NLPSym = getSymbolWithGlobalValueBase(*I, "$non_lazy_ptr"); + for (const Function *Personality : MMI->getPersonalities()) { + if (Personality) { + MCSymbol *NLPSym = + getSymbolWithGlobalValueBase(Personality, "$non_lazy_ptr"); MachineModuleInfoImpl::StubValueTy &StubSym = - MMIMacho.getGVStubEntry(NLPSym); - StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(*I), true); + MMIMacho.getGVStubEntry(NLPSym); + StubSym = + MachineModuleInfoImpl::StubValueTy(getSymbol(Personality), true); } } } // Output stubs for dynamically-linked functions. Stubs = MMIMacho.GetGVStubList(); - + // Output macho stubs for external and common global variables. if (!Stubs.empty()) { // Switch with ".non_lazy_symbol_pointer" directive. OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); EmitAlignment(isPPC64 ? 3 : 2); - + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { // L_foo$stub: OutStreamer->EmitLabel(Stubs[i].first); @@ -1535,7 +1520,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { if (!Stubs.empty()) { OutStreamer->SwitchSection(getObjFileLowering().getDataSection()); EmitAlignment(isPPC64 ? 3 : 2); - + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { // L_foo$stub: OutStreamer->EmitLabel(Stubs[i].first); @@ -1573,7 +1558,7 @@ createPPCAsmPrinterPass(TargetMachine &tm, } // Force static initialization. -extern "C" void LLVMInitializePowerPCAsmPrinter() { +extern "C" void LLVMInitializePowerPCAsmPrinter() { TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass); TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass); TargetRegistry::RegisterAsmPrinter(ThePPC64LETarget, createPPCAsmPrinterPass); diff --git a/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/lib/Target/PowerPC/PPCBoolRetToInt.cpp new file mode 100644 index 000000000000..7920240bc2b9 --- /dev/null +++ b/lib/Target/PowerPC/PPCBoolRetToInt.cpp @@ -0,0 +1,253 @@ +//===- PPCBoolRetToInt.cpp - Convert bool literals to i32 if they are returned ==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements converting i1 values to i32 if they could be more +// profitably allocated as GPRs rather than CRs. This pass will become totally +// unnecessary if Register Bank Allocation and Global Instruction Selection ever +// go upstream. +// +// Presently, the pass converts i1 Constants, and Arguments to i32 if the +// transitive closure of their uses includes only PHINodes, CallInsts, and +// ReturnInsts. The rational is that arguments are generally passed and returned +// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will +// actually save casts at the Machine Instruction level. +// +// It might be useful to expand this pass to add bit-wise operations to the list +// of safe transitive closure types. Also, we miss some opportunities when LLVM +// represents logical AND and OR operations with control flow rather than data +// flow. For example by lowering the expression: return (A && B && C) +// +// as: return A ? true : B && C. +// +// There's code in SimplifyCFG that code be used to turn control flow in data +// flow using SelectInsts. Selects are slow on some architectures (P7/P8), so +// this probably isn't good in general, but for the special case of i1, the +// Selects could be further lowered to bit operations that are fast everywhere. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace { + +#define DEBUG_TYPE "bool-ret-to-int" + +STATISTIC(NumBoolRetPromotion, + "Number of times a bool feeding a RetInst was promoted to an int"); +STATISTIC(NumBoolCallPromotion, + "Number of times a bool feeding a CallInst was promoted to an int"); +STATISTIC(NumBoolToIntPromotion, + "Total number of times a bool was promoted to an int"); + +class PPCBoolRetToInt : public FunctionPass { + + static SmallPtrSet findAllDefs(Value *V) { + SmallPtrSet Defs; + SmallVector WorkList; + WorkList.push_back(V); + Defs.insert(V); + while (!WorkList.empty()) { + Value *Curr = WorkList.back(); + WorkList.pop_back(); + if (User *CurrUser = dyn_cast(Curr)) + for (auto &Op : CurrUser->operands()) + if (Defs.insert(Op).second) + WorkList.push_back(Op); + } + return Defs; + } + + // Translate a i1 value to an equivalent i32 value: + static Value *translate(Value *V) { + Type *Int32Ty = Type::getInt32Ty(V->getContext()); + if (Constant *C = dyn_cast(V)) + return ConstantExpr::getZExt(C, Int32Ty); + if (PHINode *P = dyn_cast(V)) { + // Temporarily set the operands to 0. We'll fix this later in + // runOnUse. + Value *Zero = Constant::getNullValue(Int32Ty); + PHINode *Q = + PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P); + for (unsigned i = 0; i < P->getNumOperands(); ++i) + Q->addIncoming(Zero, P->getIncomingBlock(i)); + return Q; + } + + Argument *A = dyn_cast(V); + Instruction *I = dyn_cast(V); + assert((A || I) && "Unknown value type"); + + auto InstPt = + A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode(); + return new ZExtInst(V, Int32Ty, "", InstPt); + } + + typedef SmallPtrSet PHINodeSet; + + // A PHINode is Promotable if: + // 1. Its type is i1 AND + // 2. All of its uses are ReturnInt, CallInst, PHINode, or DbgInfoIntrinsic + // AND + // 3. All of its operands are Constant or Argument or + // CallInst or PHINode AND + // 4. All of its PHINode uses are Promotable AND + // 5. All of its PHINode operands are Promotable + static PHINodeSet getPromotablePHINodes(const Function &F) { + PHINodeSet Promotable; + // Condition 1 + for (auto &BB : F) + for (auto &I : BB) + if (const PHINode *P = dyn_cast(&I)) + if (P->getType()->isIntegerTy(1)) + Promotable.insert(P); + + SmallVector ToRemove; + for (const auto &P : Promotable) { + // Condition 2 and 3 + auto IsValidUser = [] (const Value *V) -> bool { + return isa(V) || isa(V) || isa(V) || + isa(V); + }; + auto IsValidOperand = [] (const Value *V) -> bool { + return isa(V) || isa(V) || isa(V) || + isa(V); + }; + const auto &Users = P->users(); + const auto &Operands = P->operands(); + if (!std::all_of(Users.begin(), Users.end(), IsValidUser) || + !std::all_of(Operands.begin(), Operands.end(), IsValidOperand)) + ToRemove.push_back(P); + } + + // Iterate to convergence + auto IsPromotable = [&Promotable] (const Value *V) -> bool { + const PHINode *Phi = dyn_cast(V); + return !Phi || Promotable.count(Phi); + }; + while (!ToRemove.empty()) { + for (auto &User : ToRemove) + Promotable.erase(User); + ToRemove.clear(); + + for (const auto &P : Promotable) { + // Condition 4 and 5 + const auto &Users = P->users(); + const auto &Operands = P->operands(); + if (!std::all_of(Users.begin(), Users.end(), IsPromotable) || + !std::all_of(Operands.begin(), Operands.end(), IsPromotable)) + ToRemove.push_back(P); + } + } + + return Promotable; + } + + typedef DenseMap B2IMap; + + public: + static char ID; + PPCBoolRetToInt() : FunctionPass(ID) { + initializePPCBoolRetToIntPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) { + PHINodeSet PromotablePHINodes = getPromotablePHINodes(F); + B2IMap Bool2IntMap; + bool Changed = false; + for (auto &BB : F) { + for (auto &I : BB) { + if (ReturnInst *R = dyn_cast(&I)) + if (F.getReturnType()->isIntegerTy(1)) + Changed |= + runOnUse(R->getOperandUse(0), PromotablePHINodes, Bool2IntMap); + + if (CallInst *CI = dyn_cast(&I)) + for (auto &U : CI->operands()) + if (U->getType()->isIntegerTy(1)) + Changed |= runOnUse(U, PromotablePHINodes, Bool2IntMap); + } + } + + return Changed; + } + + static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes, + B2IMap &BoolToIntMap) { + auto Defs = findAllDefs(U); + + // If the values are all Constants or Arguments, don't bother + if (!std::any_of(Defs.begin(), Defs.end(), isa)) + return false; + + // Presently, we only know how to handle PHINode, Constant, and Arguments. + // Potentially, bitwise operations (AND, OR, XOR, NOT) and sign extension + // could also be handled in the future. + for (const auto &V : Defs) + if (!isa(V) && !isa(V) && !isa(V)) + return false; + + for (const auto &V : Defs) + if (const PHINode *P = dyn_cast(V)) + if (!PromotablePHINodes.count(P)) + return false; + + if (isa(U.getUser())) + ++NumBoolRetPromotion; + if (isa(U.getUser())) + ++NumBoolCallPromotion; + ++NumBoolToIntPromotion; + + for (const auto &V : Defs) + if (!BoolToIntMap.count(V)) + BoolToIntMap[V] = translate(V); + + // Replace the operands of the translated instructions. There were set to + // zero in the translate function. + for (auto &Pair : BoolToIntMap) { + User *First = dyn_cast(Pair.first); + User *Second = dyn_cast(Pair.second); + assert((!First || Second) && "translated from user to non-user!?"); + if (First) + for (unsigned i = 0; i < First->getNumOperands(); ++i) + Second->setOperand(i, BoolToIntMap[First->getOperand(i)]); + } + + Value *IntRetVal = BoolToIntMap[U]; + Type *Int1Ty = Type::getInt1Ty(U->getContext()); + Instruction *I = cast(U.getUser()); + Value *BackToBool = new TruncInst(IntRetVal, Int1Ty, "backToBool", I); + U.set(BackToBool); + + return true; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + FunctionPass::getAnalysisUsage(AU); + } +}; +} + +char PPCBoolRetToInt::ID = 0; +INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int", + "Convert i1 constants to i32 if they are returned", + false, false) + +FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); } diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 940d55ac1f36..73a5305197ad 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -91,7 +91,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { unsigned FuncSize = 0; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { - MachineBasicBlock *MBB = MFI; + MachineBasicBlock *MBB = &*MFI; // The end of the previous block may have extra nops if this block has an // alignment requirement. diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index fd150beeb5a9..b6ac4d54d4c7 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -98,7 +98,7 @@ namespace { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); + AU.addRequired(); } private: @@ -112,6 +112,7 @@ namespace { const DataLayout *DL; DominatorTree *DT; const TargetLibraryInfo *LibInfo; + bool PreserveLCSSA; }; char PPCCTRLoops::ID = 0; @@ -147,7 +148,7 @@ INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) @@ -169,11 +170,12 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() { bool PPCCTRLoops::runOnFunction(Function &F) { LI = &getAnalysis().getLoopInfo(); - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); DT = &getAnalysis().getDomTree(); DL = &F.getParent()->getDataLayout(); auto *TLIP = getAnalysisIfAvailable(); LibInfo = TLIP ? &TLIP->getTLI() : nullptr; + PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); bool MadeChange = false; @@ -250,8 +252,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr // we're definitely using CTR. case Intrinsic::ppc_is_decremented_ctr_nonzero: - case Intrinsic::ppc_mtctr: - return true; + case Intrinsic::ppc_mtctr: + return true; // VisualStudio defines setjmp as _setjmp #if defined(_MSC_VER) && defined(setjmp) && \ @@ -369,7 +371,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { true); if (VTy == MVT::Other) return true; - + if (TLI->isOperationLegalOrCustom(Opcode, VTy)) continue; else if (VTy.isVector() && @@ -537,7 +539,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // the CTR register because some such uses might be reordered by the // selection DAG after the mtctr instruction). if (!Preheader || mightUseCTR(TT, Preheader)) - Preheader = InsertPreheaderForLoop(L, this); + Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); if (!Preheader) return MadeChange; @@ -554,10 +556,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { if (!ExitCount->getType()->isPointerTy() && ExitCount->getType() != CountType) ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); - ExitCount = SE->getAddExpr(ExitCount, - SE->getConstant(CountType, 1)); - Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType, - Preheader->getTerminator()); + ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType)); + Value *ECValue = + SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator()); IRBuilder<> CountBuilder(Preheader->getTerminator()); Module *M = Preheader->getParent()->getParent(); @@ -677,7 +678,7 @@ bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { // any other instructions that might clobber the ctr register. for (MachineFunction::iterator I = MF.begin(), IE = MF.end(); I != IE; ++I) { - MachineBasicBlock *MBB = I; + MachineBasicBlock *MBB = &*I; if (!MDT->isReachableFromEntry(MBB)) continue; @@ -694,4 +695,3 @@ bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { return false; } #endif // NDEBUG - diff --git a/lib/Target/PowerPC/PPCEarlyReturn.cpp b/lib/Target/PowerPC/PPCEarlyReturn.cpp index fc89753ed94e..7cb1bb54c725 100644 --- a/lib/Target/PowerPC/PPCEarlyReturn.cpp +++ b/lib/Target/PowerPC/PPCEarlyReturn.cpp @@ -71,15 +71,20 @@ protected: for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(), PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) { bool OtherReference = false, BlockChanged = false; + + if ((*PI)->empty()) + continue; + for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) { - MachineInstrBuilder MIB; + if (J == (*PI)->end()) + break; + if (J->getOpcode() == PPC::B) { if (J->getOperand(0).getMBB() == &ReturnMBB) { // This is an unconditional branch to the return. Replace the // branch with a blr. - MIB = - BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode())); - MIB.copyImplicitOps(I); + BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode())) + .copyImplicitOps(I); MachineBasicBlock::iterator K = J--; K->eraseFromParent(); BlockChanged = true; @@ -90,10 +95,10 @@ protected: if (J->getOperand(2).getMBB() == &ReturnMBB) { // This is a conditional branch to the return. Replace the branch // with a bclr. - MIB = BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR)) - .addImm(J->getOperand(0).getImm()) - .addReg(J->getOperand(1).getReg()); - MIB.copyImplicitOps(I); + BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR)) + .addImm(J->getOperand(0).getImm()) + .addReg(J->getOperand(1).getReg()) + .copyImplicitOps(I); MachineBasicBlock::iterator K = J--; K->eraseFromParent(); BlockChanged = true; @@ -104,11 +109,11 @@ protected: if (J->getOperand(1).getMBB() == &ReturnMBB) { // This is a conditional branch to the return. Replace the branch // with a bclr. - MIB = BuildMI(**PI, J, J->getDebugLoc(), - TII->get(J->getOpcode() == PPC::BC ? - PPC::BCLR : PPC::BCLRn)) - .addReg(J->getOperand(0).getReg()); - MIB.copyImplicitOps(I); + BuildMI( + **PI, J, J->getDebugLoc(), + TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn)) + .addReg(J->getOperand(0).getReg()) + .copyImplicitOps(I); MachineBasicBlock::iterator K = J--; K->eraseFromParent(); BlockChanged = true; @@ -146,7 +151,7 @@ protected: } for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i) - PredToRemove[i]->removeSuccessor(&ReturnMBB); + PredToRemove[i]->removeSuccessor(&ReturnMBB, true); if (Changed && !ReturnMBB.hasAddressTaken()) { // We now might be able to merge this blr-only block into its @@ -156,7 +161,7 @@ protected: if (PrevMBB.isLayoutSuccessor(&ReturnMBB) && PrevMBB.canFallThrough()) { // Move the blr into the preceding block. PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I); - PrevMBB.removeSuccessor(&ReturnMBB); + PrevMBB.removeSuccessor(&ReturnMBB, true); } } diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 5f236f744fc4..b451ebf7f27a 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -164,7 +164,8 @@ class PPCFastISel final : public FastISel { unsigned DestReg, bool IsZExt); unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT); unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT); - unsigned PPCMaterializeInt(const Constant *C, MVT VT, bool UseSExt = true); + unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT, + bool UseSExt = true); unsigned PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC); unsigned PPCMaterialize64BitInt(int64_t Imm, @@ -292,10 +293,7 @@ bool PPCFastISel::isValueAvailable(const Value *V) const { return true; const auto *I = cast(V); - if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) - return true; - - return false; + return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; } // Given a value Obj, create an Address object Addr that represents its @@ -527,9 +525,9 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // VSX only provides an indexed load. if (Is32VSXLoad || Is64VSXLoad) return false; - MachineMemOperand *MMO = - FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset), + MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI, + Addr.Offset), MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI), MFI.getObjectAlignment(Addr.Base.FI)); @@ -660,9 +658,9 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) { // VSX only provides an indexed store. if (Is32VSXStore || Is64VSXStore) return false; - MachineMemOperand *MMO = - FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset), + MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI, + Addr.Offset), MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI), MFI.getObjectAlignment(Addr.Base.FI)); @@ -774,8 +772,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) { BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC)) .addImm(PPCPred).addReg(CondReg).addMBB(TBB); - fastEmitBranch(FBB, DbgLoc); - FuncInfo.MBB->addSuccessor(TBB); + finishCondBranch(BI->getParent(), TBB, FBB); return true; } } else if (const ConstantInt *CI = @@ -1607,21 +1604,18 @@ bool PPCFastISel::SelectRet(const Instruction *I) { if (ValLocs.size() > 1) return false; - // Special case for returning a constant integer of any size. - // Materialize the constant as an i64 and copy it to the return - // register. We still need to worry about properly extending the sign. E.g: - // If the constant has only one bit, it means it is a boolean. Therefore - // we can't use PPCMaterializeInt because it extends the sign which will - // cause negations of the returned value to be incorrect as they are - // implemented as the flip of the least significant bit. - if (isa(*RV)) { - const Constant *C = cast(RV); - + // Special case for returning a constant integer of any size - materialize + // the constant as an i64 and copy it to the return register. + if (const ConstantInt *CI = dyn_cast(RV)) { CCValAssign &VA = ValLocs[0]; unsigned RetReg = VA.getLocReg(); - unsigned SrcReg = PPCMaterializeInt(C, MVT::i64, - VA.getLocInfo() == CCValAssign::SExt); + // We still need to worry about properly extending the sign. For example, + // we could have only a single bit or a constant that needs zero + // extension rather than sign extension. Make sure we pass the return + // value extension property to integer materialization. + unsigned SrcReg = + PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() == CCValAssign::SExt); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg); @@ -1761,8 +1755,8 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8)); const IndirectBrInst *IB = cast(I); - for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i) - FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]); + for (const BasicBlock *SuccBB : IB->successors()) + FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]); return true; } @@ -1898,10 +1892,9 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) { unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); CodeModel::Model CModel = TM.getCodeModel(); - MachineMemOperand *MMO = - FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, - (VT == MVT::f32) ? 4 : 8, Align); + MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( + MachinePointerInfo::getConstantPool(*FuncInfo.MF), + MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align); unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD; unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass); @@ -1976,19 +1969,15 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), HighPartReg).addReg(PPC::X2).addGlobalAddress(GV); - // If/when switches are implemented, jump tables should be handled - // on the "if" path here. - if (CModel == CodeModel::Large || - (GV->getType()->getElementType()->isFunctionTy() && - !GV->isStrongDefinitionForLinker()) || - GV->isDeclaration() || GV->hasCommonLinkage() || - GV->hasAvailableExternallyLinkage()) + unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV); + if (GVFlags & PPCII::MO_NLP_FLAG) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), DestReg).addGlobalAddress(GV).addReg(HighPartReg); - else + } else { // Otherwise generate the ADDItocL. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL), DestReg).addReg(HighPartReg).addGlobalAddress(GV); + } } return DestReg; @@ -2085,12 +2074,11 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, // Materialize an integer constant into a register, and return // the register number (or zero if we failed to handle it). -unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT, - bool UseSExt) { +unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT, + bool UseSExt) { // If we're using CR bit registers for i1 values, handle that as a special // case first. if (VT == MVT::i1 && PPCSubTarget->useCRBits()) { - const ConstantInt *CI = cast(C); unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg); @@ -2105,12 +2093,17 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT, &PPC::GPRCRegClass); // If the constant is in range, use a load-immediate. - const ConstantInt *CI = cast(C); - if (isInt<16>(CI->getSExtValue())) { + if (UseSExt && isInt<16>(CI->getSExtValue())) { unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI; unsigned ImmReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) - .addImm( (UseSExt) ? CI->getSExtValue() : CI->getZExtValue() ); + .addImm(CI->getSExtValue()); + return ImmReg; + } else if (!UseSExt && isUInt<16>(CI->getZExtValue())) { + unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI; + unsigned ImmReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) + .addImm(CI->getZExtValue()); return ImmReg; } @@ -2138,8 +2131,8 @@ unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) { return PPCMaterializeFP(CFP, VT); else if (const GlobalValue *GV = dyn_cast(C)) return PPCMaterializeGV(GV, VT); - else if (isa(C)) - return PPCMaterializeInt(C, VT, VT != MVT::i1); + else if (const ConstantInt *CI = dyn_cast(C)) + return PPCMaterializeInt(CI, VT, VT != MVT::i1); return 0; } diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 08ae7174244a..beab844c6025 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -30,7 +30,7 @@ using namespace llvm; /// VRRegNo - Map from a numbered VR register to its enum value. /// -static const uint16_t VRRegNo[] = { +static const MCPhysReg VRRegNo[] = { PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, @@ -270,7 +270,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) { // epilog blocks. for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { // If last instruction is a return instruction, add an epilogue - if (!I->empty() && I->back().isReturn()) { + if (I->isReturnBlock()) { bool FoundIt = false; for (MBBI = I->end(); MBBI != I->begin(); ) { --MBBI; @@ -306,9 +306,10 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); DebugLoc dl = MI->getDebugLoc(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UsedRegMask = 0; for (unsigned i = 0; i != 32; ++i) - if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i])) + if (MRI.isPhysRegModified(VRRegNo[i])) UsedRegMask |= 1 << (31-i); // Live in and live out values already must be in the mask, so don't bother @@ -325,7 +326,7 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); UsedRegMask != 0 && BI != BE; ++BI) { const MachineBasicBlock &MBB = *BI; - if (MBB.empty() || !MBB.back().isReturn()) + if (!MBB.isReturnBlock()) continue; const MachineInstr &Ret = MBB.back(); for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { @@ -555,9 +556,67 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { } } +bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, + bool UseAtEnd, + unsigned *ScratchRegister) const { + RegScavenger RS; + unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; + + if (ScratchRegister) + *ScratchRegister = R0; + + // If MBB is an entry or exit block, use R0 as the scratch register + if ((UseAtEnd && MBB->isReturnBlock()) || + (!UseAtEnd && (&MBB->getParent()->front() == MBB))) + return true; + + RS.enterBasicBlock(MBB); + + if (UseAtEnd && !MBB->empty()) { + // The scratch register will be used at the end of the block, so must consider + // all registers used within the block + + MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); + // If no terminator, back iterator up to previous instruction. + if (MBBI == MBB->end()) + MBBI = std::prev(MBBI); + + if (MBBI != MBB->begin()) + RS.forward(MBBI); + } + + if (!RS.isRegUsed(R0)) + return true; + + unsigned Reg = RS.FindUnusedReg(Subtarget.isPPC64() ? &PPC::G8RCRegClass + : &PPC::GPRCRegClass); + + // Make sure the register scavenger was able to find an available register + // If not, use R0 but return false to indicate no register was available and + // R0 must be used (as recommended by the ABI) + if (Reg == 0) + return false; + + if (ScratchRegister) + *ScratchRegister = Reg; + + return true; +} + +bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { + MachineBasicBlock *TmpMBB = const_cast(&MBB); + + return findScratchRegister(TmpMBB, false, nullptr); +} + +bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { + MachineBasicBlock *TmpMBB = const_cast(&MBB); + + return findScratchRegister(TmpMBB, true, nullptr); +} + void PPCFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); const PPCInstrInfo &TII = @@ -589,7 +648,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, } } - // Move MBBI back to the beginning of the function. + // Move MBBI back to the beginning of the prologue block. MBBI = MBB.begin(); // Work out frame sizes. @@ -613,7 +672,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, unsigned BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; - unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0; + unsigned ScratchReg = 0; unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 @@ -642,6 +701,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); + findScratchRegister(&MBB, false, &ScratchReg); + assert(ScratchReg && "No scratch register!"); + int LROffset = getReturnSaveOffset(); int FPOffset = 0; @@ -916,27 +978,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, } void PPCFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI != MBB.end() && "Returning block has no terminator"); + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + DebugLoc dl; + + if (MBBI != MBB.end()) + dl = MBBI->getDebugLoc(); + const PPCInstrInfo &TII = *static_cast(Subtarget.getInstrInfo()); const PPCRegisterInfo *RegInfo = static_cast(Subtarget.getRegisterInfo()); - unsigned RetOpcode = MBBI->getOpcode(); - DebugLoc dl; - - assert((RetOpcode == PPC::BLR || - RetOpcode == PPC::BLR8 || - RetOpcode == PPC::TCRETURNri || - RetOpcode == PPC::TCRETURNdi || - RetOpcode == PPC::TCRETURNai || - RetOpcode == PPC::TCRETURNri8 || - RetOpcode == PPC::TCRETURNdi8 || - RetOpcode == PPC::TCRETURNai8) && - "Can only insert epilog into returning blocks"); - // Get alignment info so we know how to restore the SP. const MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -959,7 +1012,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; unsigned BPReg = RegInfo->getBaseRegister(MF); unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; - unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0; + unsigned ScratchReg = 0; unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 : PPC::MTLR ); @@ -973,10 +1026,14 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, : PPC::ADDI ); const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 : PPC::ADD4 ); - + int LROffset = getReturnSaveOffset(); int FPOffset = 0; + + findScratchRegister(&MBB, true, &ScratchReg); + assert(ScratchReg && "No scratch register!"); + if (HasFP) { if (isSVR4ABI) { MachineFrameInfo *FFI = MF.getFrameInfo(); @@ -1008,25 +1065,30 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, PBPOffset = FFI->getObjectOffset(PBPIndex); } - bool UsesTCRet = RetOpcode == PPC::TCRETURNri || - RetOpcode == PPC::TCRETURNdi || - RetOpcode == PPC::TCRETURNai || - RetOpcode == PPC::TCRETURNri8 || - RetOpcode == PPC::TCRETURNdi8 || - RetOpcode == PPC::TCRETURNai8; + bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); + + if (IsReturnBlock) { + unsigned RetOpcode = MBBI->getOpcode(); + bool UsesTCRet = RetOpcode == PPC::TCRETURNri || + RetOpcode == PPC::TCRETURNdi || + RetOpcode == PPC::TCRETURNai || + RetOpcode == PPC::TCRETURNri8 || + RetOpcode == PPC::TCRETURNdi8 || + RetOpcode == PPC::TCRETURNai8; - if (UsesTCRet) { - int MaxTCRetDelta = FI->getTailCallSPDelta(); - MachineOperand &StackAdjust = MBBI->getOperand(1); - assert(StackAdjust.isImm() && "Expecting immediate value."); - // Adjust stack pointer. - int StackAdj = StackAdjust.getImm(); - int Delta = StackAdj - MaxTCRetDelta; - assert((Delta >= 0) && "Delta must be positive"); - if (MaxTCRetDelta>0) - FrameSize += (StackAdj +Delta); - else - FrameSize += StackAdj; + if (UsesTCRet) { + int MaxTCRetDelta = FI->getTailCallSPDelta(); + MachineOperand &StackAdjust = MBBI->getOperand(1); + assert(StackAdjust.isImm() && "Expecting immediate value."); + // Adjust stack pointer. + int StackAdj = StackAdjust.getImm(); + int Delta = StackAdj - MaxTCRetDelta; + assert((Delta >= 0) && "Delta must be positive"); + if (MaxTCRetDelta>0) + FrameSize += (StackAdj +Delta); + else + FrameSize += StackAdj; + } } // Frames of 32KB & larger require special handling because they cannot be @@ -1066,7 +1128,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addImm(0) .addReg(SPReg); } - } if (MustSaveLR) @@ -1109,52 +1170,55 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // Callee pop calling convention. Pop parameter/linkage area. Used for tail // call optimization - if (MF.getTarget().Options.GuaranteedTailCallOpt && - (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && - MF.getFunction()->getCallingConv() == CallingConv::Fast) { - PPCFunctionInfo *FI = MF.getInfo(); - unsigned CallerAllocatedAmt = FI->getMinReservedArea(); + if (IsReturnBlock) { + unsigned RetOpcode = MBBI->getOpcode(); + if (MF.getTarget().Options.GuaranteedTailCallOpt && + (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && + MF.getFunction()->getCallingConv() == CallingConv::Fast) { + PPCFunctionInfo *FI = MF.getInfo(); + unsigned CallerAllocatedAmt = FI->getMinReservedArea(); - if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { - BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) - .addReg(SPReg).addImm(CallerAllocatedAmt); - } else { - BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) + if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { + BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) + .addReg(SPReg).addImm(CallerAllocatedAmt); + } else { + BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) .addImm(CallerAllocatedAmt >> 16); - BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) + BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) .addReg(ScratchReg, RegState::Kill) .addImm(CallerAllocatedAmt & 0xFFFF); - BuildMI(MBB, MBBI, dl, AddInst) + BuildMI(MBB, MBBI, dl, AddInst) .addReg(SPReg) .addReg(FPReg) .addReg(ScratchReg); - } - } else if (RetOpcode == PPC::TCRETURNdi) { - MBBI = MBB.getLastNonDebugInstr(); - MachineOperand &JumpTarget = MBBI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). - addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); - } else if (RetOpcode == PPC::TCRETURNri) { - MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); - } else if (RetOpcode == PPC::TCRETURNai) { - MBBI = MBB.getLastNonDebugInstr(); - MachineOperand &JumpTarget = MBBI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); - } else if (RetOpcode == PPC::TCRETURNdi8) { - MBBI = MBB.getLastNonDebugInstr(); - MachineOperand &JumpTarget = MBBI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). - addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); - } else if (RetOpcode == PPC::TCRETURNri8) { - MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); - } else if (RetOpcode == PPC::TCRETURNai8) { - MBBI = MBB.getLastNonDebugInstr(); - MachineOperand &JumpTarget = MBBI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); + } + } else if (RetOpcode == PPC::TCRETURNdi) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + } else if (RetOpcode == PPC::TCRETURNri) { + MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); + } else if (RetOpcode == PPC::TCRETURNai) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); + } else if (RetOpcode == PPC::TCRETURNdi8) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + } else if (RetOpcode == PPC::TCRETURNri8) { + MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); + } else if (RetOpcode == PPC::TCRETURNai8) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); + } } } @@ -1200,8 +1264,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, // Reserve stack space for the PIC Base register (R30). // Only used in SVR4 32-bit. if (FI->usesPICBase()) { - int PBPSI = FI->getPICBasePointerSaveIndex(); - PBPSI = MFI->CreateFixedObject(4, -8, true); + int PBPSI = MFI->CreateFixedObject(4, -8, true); FI->setPICBasePointerSaveIndex(PBPSI); } @@ -1710,3 +1773,8 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } + +bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { + return (MF.getSubtarget().isSVR4ABI() && + MF.getSubtarget().isPPC64()); +} diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index d6a389bfbf0d..bbe1329a5352 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -29,6 +29,30 @@ class PPCFrameLowering: public TargetFrameLowering { const unsigned LinkageSize; const unsigned BasePointerSaveOffset; + /** + * \brief Find a register that can be used in function prologue and epilogue + * + * Find a register that can be use as the scratch register in function + * prologue and epilogue to save various registers (Link Register, Base + * Pointer, etc.). Prefer R0, if it is available. If it is not available, + * then choose a different register. + * + * This method will return true if an available register was found (including + * R0). If no available registers are found, the method returns false and sets + * ScratchRegister to R0, as per the recommendation in the ABI. + * + * \param[in] MBB The machine basic block to find an available register for + * \param[in] UseAtEnd Specify whether the scratch register will be used at + * the end of the basic block (i.e., will the scratch + * register kill a register defined in the basic block) + * \param[out] ScratchRegister The scratch register to use + * \return true if a scratch register was found. false of a scratch register + * was not found and R0 is being used as the default. + */ + bool findScratchRegister(MachineBasicBlock *MBB, + bool UseAtEnd, + unsigned *ScratchRegister) const; + public: PPCFrameLowering(const PPCSubtarget &STI); @@ -92,6 +116,13 @@ public: const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override; + + bool enableShrinkWrapping(const MachineFunction &MF) const override; + + /// Methods used by shrink wrapping to determine if MBB can be used for the + /// function prologue/epilogue. + bool canUseAsPrologue(const MachineBasicBlock &MBB) const override; + bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override; }; } // End llvm namespace diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 932226842bb7..1eaa8118ba0a 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -16,6 +16,8 @@ #include "MCTargetDesc/PPCPredicates.h" #include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -52,6 +54,11 @@ static cl::opt BPermRewriterNoMasking( "bit permutations"), cl::Hidden); +static cl::opt EnableBranchHint( + "ppc-use-branch-hint", cl::init(true), + cl::desc("Enable static hinting of branches on ppc"), + cl::Hidden); + namespace llvm { void initializePPCDAGToDAGISelPass(PassRegistry&); } @@ -286,7 +293,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { // Find all return blocks, outputting a restore in each epilog. for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { - if (!BB->empty() && BB->back().isReturn()) { + if (BB->isReturnBlock()) { IP = BB->end(); --IP; // Skip over all terminator instructions, which are part of the return @@ -393,6 +400,55 @@ static bool isInt32Immediate(SDValue N, unsigned &Imm) { return isInt32Immediate(N.getNode(), Imm); } +static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo, + const SDValue &DestMBB) { + assert(isa(DestMBB)); + + if (!FuncInfo->BPI) return PPC::BR_NO_HINT; + + const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); + const TerminatorInst *BBTerm = BB->getTerminator(); + + if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT; + + const BasicBlock *TBB = BBTerm->getSuccessor(0); + const BasicBlock *FBB = BBTerm->getSuccessor(1); + + auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB); + auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB); + + // We only want to handle cases which are easy to predict at static time, e.g. + // C++ throw statement, that is very likely not taken, or calling never + // returned function, e.g. stdlib exit(). So we set Threshold to filter + // unwanted cases. + // + // Below is LLVM branch weight table, we only want to handle case 1, 2 + // + // Case Taken:Nontaken Example + // 1. Unreachable 1048575:1 C++ throw, stdlib exit(), + // 2. Invoke-terminating 1:1048575 + // 3. Coldblock 4:64 __builtin_expect + // 4. Loop Branch 124:4 For loop + // 5. PH/ZH/FPH 20:12 + const uint32_t Threshold = 10000; + + if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb)) + return PPC::BR_NO_HINT; + + DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::" + << BB->getName() << "'\n" + << " -> " << TBB->getName() << ": " << TProb << "\n" + << " -> " << FBB->getName() << ": " << FProb << "\n"); + + const BasicBlockSDNode *BBDN = cast(DestMBB); + + // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities, + // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock + if (BBDN->getBasicBlock()->getBasicBlock() != TBB) + std::swap(TProb, FProb); + + return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT; +} // isOpcWithIntImmediate - This method tests to see if the node is a specific // opcode and that it has a immediate integer right operand. @@ -564,7 +620,6 @@ static unsigned SelectInt64CountDirect(int64_t Imm) { // Handle first 32 bits. unsigned Lo = Imm & 0xFFFF; - unsigned Hi = (Imm >> 16) & 0xFFFF; // Simple value. if (isInt<16>(Imm)) { @@ -586,9 +641,9 @@ static unsigned SelectInt64CountDirect(int64_t Imm) { ++Result; // Add in the last bits as required. - if ((Hi = (Remainder >> 16) & 0xFFFF)) + if ((Remainder >> 16) & 0xFFFF) ++Result; - if ((Lo = Remainder & 0xFFFF)) + if (Remainder & 0xFFFF) ++Result; return Result; @@ -1028,7 +1083,7 @@ class BitPermutationSelector { BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 && BitGroups[0].V == BitGroups[BitGroups.size()-1].V && BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) { - DEBUG(dbgs() << "\tcombining final bit group with inital one\n"); + DEBUG(dbgs() << "\tcombining final bit group with initial one\n"); BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx; BitGroups.erase(BitGroups.begin()); } @@ -1557,10 +1612,7 @@ class BitPermutationSelector { return false; } - if (VRI.RLAmt != EffRLAmt) - return false; - - return true; + return VRI.RLAmt == EffRLAmt; }; for (auto &BG : BitGroups) { @@ -2781,7 +2833,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || N->getValueType(0) == MVT::v2i64)) { ShuffleVectorSDNode *SVN = cast(N); - + SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1), Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1); unsigned DM[2]; @@ -2798,7 +2850,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { LoadSDNode *LD = cast(Op1.getOperand(0)); SDValue Base, Offset; - if (LD->isUnindexed() && + if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() && (LD->getMemoryVT() == MVT::f64 || LD->getMemoryVT() == MVT::i64) && SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { @@ -2841,8 +2893,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { // Op #3 is the Dest MBB // Op #4 is the Flag. // Prevent PPC::PRED_* from being selected into LI. - SDValue Pred = - getI32Imm(cast(N->getOperand(1))->getZExtValue(), dl); + unsigned PCC = cast(N->getOperand(1))->getZExtValue(); + if (EnableBranchHint) + PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3)); + + SDValue Pred = getI32Imm(PCC, dl); SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), N->getOperand(0), N->getOperand(4) }; return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); @@ -2871,6 +2926,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { BitComp, N->getOperand(4), N->getOperand(0)); } + if (EnableBranchHint) + PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4)); + SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); SDValue Ops[] = { getI32Imm(PCC, dl), CondCode, N->getOperand(4), N->getOperand(0) }; @@ -2903,9 +2961,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { break; // The first source operand is a TargetGlobalAddress or a TargetJumpTable. - // If it is an externally defined symbol, a symbol with common linkage, - // a non-local function address, or a jump table address, or if we are - // generating code for large code model, we generate: + // If it must be toc-referenced according to PPCSubTarget, we generate: // LDtocL(, ADDIStocHA(%X2, )) // Otherwise we generate: // ADDItocL(ADDIStocHA(%X2, ), ) @@ -2920,13 +2976,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { MVT::i64, GA, SDValue(Tmp, 0))); if (GlobalAddressSDNode *G = dyn_cast(GA)) { - const GlobalValue *GValue = G->getGlobal(); - if ((GValue->getType()->getElementType()->isFunctionTy() && - !GValue->isStrongDefinitionForLinker()) || - GValue->isDeclaration() || GValue->hasCommonLinkage() || - GValue->hasAvailableExternallyLinkage()) + const GlobalValue *GV = G->getGlobal(); + unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV); + if (GVFlags & PPCII::MO_NLP_FLAG) { return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, SDValue(Tmp, 0))); + } } return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, @@ -3110,7 +3165,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { if (!CurDAG->MaskedValueIsZero(Op0, APInt::getHighBitsSet(Bits, Bits - (b+1)*8))) return false; - + LHS = Op0.getOperand(0); RHS = Op0.getOperand(1); return true; @@ -3305,7 +3360,7 @@ void PPCDAGToDAGISel::PreprocessISelDAG() { bool MadeChange = false; while (Position != CurDAG->allnodes_begin()) { - SDNode *N = --Position; + SDNode *N = &*--Position; if (N->use_empty()) continue; @@ -3989,7 +4044,7 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() { bool MadeChange = false; while (Position != CurDAG->allnodes_begin()) { - SDNode *N = --Position; + SDNode *N = &*--Position; // Skip dead nodes and any non-machine opcodes. if (N->use_empty() || !N->isMachineOpcode()) continue; @@ -4145,7 +4200,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { ++Position; while (Position != CurDAG->allnodes_begin()) { - SDNode *N = --Position; + SDNode *N = &*--Position; // Skip dead nodes and any non-machine opcodes. if (N->use_empty() || !N->isMachineOpcode()) continue; @@ -4184,16 +4239,24 @@ void PPCDAGToDAGISel::PeepholePPC64() { break; } - // If this is a load or store with a zero offset, we may be able to - // fold an add-immediate into the memory operation. - if (!isa(N->getOperand(FirstOp)) || - N->getConstantOperandVal(FirstOp) != 0) + // If this is a load or store with a zero offset, or within the alignment, + // we may be able to fold an add-immediate into the memory operation. + // The check against alignment is below, as it can't occur until we check + // the arguments to N + if (!isa(N->getOperand(FirstOp))) continue; SDValue Base = N->getOperand(FirstOp + 1); if (!Base.isMachineOpcode()) continue; + // On targets with fusion, we don't want this to fire and remove a fusion + // opportunity, unless a) it results in another fusion opportunity or + // b) optimizing for size. + if (PPCSubTarget->hasFusion() && + (!MF->getFunction()->optForSize() && !Base.hasOneUse())) + continue; + unsigned Flags = 0; bool ReplaceFlags = true; @@ -4237,6 +4300,17 @@ void PPCDAGToDAGISel::PeepholePPC64() { break; } + SDValue ImmOpnd = Base.getOperand(1); + int MaxDisplacement = 0; + if (GlobalAddressSDNode *GA = dyn_cast(ImmOpnd)) { + const GlobalValue *GV = GA->getGlobal(); + MaxDisplacement = GV->getAlignment() - 1; + } + + int Offset = N->getConstantOperandVal(FirstOp); + if (Offset < 0 || Offset > MaxDisplacement) + continue; + // We found an opportunity. Reverse the operands from the add // immediate and substitute them into the load or store. If // needed, update the target flags for the immediate operand to @@ -4247,8 +4321,6 @@ void PPCDAGToDAGISel::PeepholePPC64() { DEBUG(N->dump(CurDAG)); DEBUG(dbgs() << "\n"); - SDValue ImmOpnd = Base.getOperand(1); - // If the relocation information isn't already present on the // immediate operand, add it now. if (ReplaceFlags) { @@ -4259,17 +4331,17 @@ void PPCDAGToDAGISel::PeepholePPC64() { // is insufficient for the instruction encoding. if (GV->getAlignment() < 4 && (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD || - StorageOpcode == PPC::LWA)) { + StorageOpcode == PPC::LWA || (Offset % 4) != 0)) { DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); continue; } - ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags); + ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); } else if (ConstantPoolSDNode *CP = dyn_cast(ImmOpnd)) { const Constant *C = CP->getConstVal(); ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlignment(), - 0, Flags); + Offset, Flags); } } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 1b8f8fb2f45b..af9ad077a7ce 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -42,10 +42,6 @@ using namespace llvm; -// FIXME: Remove this once soft-float is supported. -static cl::opt DisablePPCFloatInVariadic("disable-ppc-float-in-variadic", -cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden); - static cl::opt DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); @@ -72,8 +68,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // Set up the register classes. addRegisterClass(MVT::i32, &PPC::GPRCRegClass); - addRegisterClass(MVT::f32, &PPC::F4RCRegClass); - addRegisterClass(MVT::f64, &PPC::F8RCRegClass); + if (!Subtarget.useSoftFloat()) { + addRegisterClass(MVT::f32, &PPC::F4RCRegClass); + addRegisterClass(MVT::f64, &PPC::F8RCRegClass); + } // PowerPC has an i16 but no i8 (or i1) SEXTLOAD for (MVT VT : MVT::integer_valuetypes()) { @@ -107,8 +105,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); - AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, - isPPC64 ? MVT::i64 : MVT::i32); + AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); } else { setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); @@ -257,10 +255,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); - setOperationAction(ISD::BITCAST, MVT::f32, Expand); - setOperationAction(ISD::BITCAST, MVT::i32, Expand); - setOperationAction(ISD::BITCAST, MVT::i64, Expand); - setOperationAction(ISD::BITCAST, MVT::f64, Expand); + if (Subtarget.hasDirectMove()) { + setOperationAction(ISD::BITCAST, MVT::f32, Legal); + setOperationAction(ISD::BITCAST, MVT::i32, Legal); + setOperationAction(ISD::BITCAST, MVT::i64, Legal); + setOperationAction(ISD::BITCAST, MVT::f64, Legal); + } else { + setOperationAction(ISD::BITCAST, MVT::f32, Expand); + setOperationAction(ISD::BITCAST, MVT::i32, Expand); + setOperationAction(ISD::BITCAST, MVT::i64, Expand); + setOperationAction(ISD::BITCAST, MVT::f64, Expand); + } // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); @@ -329,6 +334,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); + setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom); + setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom); // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -403,9 +410,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // will selectively turn on ones that can be effectively codegen'd. for (MVT VT : MVT::vector_valuetypes()) { // add/sub are legal for all supported vector VT's. - setOperationAction(ISD::ADD , VT, Legal); - setOperationAction(ISD::SUB , VT, Legal); - + setOperationAction(ISD::ADD, VT, Legal); + setOperationAction(ISD::SUB, VT, Legal); + // Vector instructions introduced in P8 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { setOperationAction(ISD::CTPOP, VT, Legal); @@ -477,6 +484,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); + setOperationAction(ISD::ROTL, VT, Expand); + setOperationAction(ISD::ROTR, VT, Expand); for (MVT InnerVT : MVT::vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); @@ -519,12 +528,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } - - if (Subtarget.hasP8Altivec()) + if (Subtarget.hasP8Altivec()) setOperationAction(ISD::MUL, MVT::v4i32, Legal); else setOperationAction(ISD::MUL, MVT::v4i32, Custom); - + setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); @@ -545,6 +553,21 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (Subtarget.hasVSX()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); + if (Subtarget.hasP8Vector()) { + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); + } + if (Subtarget.hasDirectMove()) { + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); + } + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); @@ -813,15 +836,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLibcallName(RTLIB::SRA_I128, nullptr); } - if (isPPC64) { - setStackPointerRegisterToSaveRestore(PPC::X1); - setExceptionPointerRegister(PPC::X3); - setExceptionSelectorRegister(PPC::X4); - } else { - setStackPointerRegisterToSaveRestore(PPC::R1); - setExceptionPointerRegister(PPC::R3); - setExceptionSelectorRegister(PPC::R4); - } + setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::SINT_TO_FP); @@ -942,9 +957,9 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, if (EltAlign > MaxAlign) MaxAlign = EltAlign; } else if (StructType *STy = dyn_cast(Ty)) { - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + for (auto *EltTy : STy->elements()) { unsigned EltAlign = 0; - getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign); + getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; if (MaxAlign == MaxMaxAlign) @@ -969,6 +984,10 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, return Align; } +bool PPCTargetLowering::useSoftFloat() const { + return Subtarget.useSoftFloat(); +} + const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; @@ -992,6 +1011,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::Lo: return "PPCISD::Lo"; case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; + case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; case PPCISD::SRL: return "PPCISD::SRL"; case PPCISD::SRA: return "PPCISD::SRA"; @@ -1236,7 +1256,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). -/// The ShuffleKind distinguishes between big-endian merges with two +/// The ShuffleKind distinguishes between big-endian merges with two /// different inputs (0), either-endian merges with two identical inputs (1), /// and little-endian merges with two different inputs (2). For the latter, /// the input operands are swapped (see PPCInstrAltivec.td). @@ -1261,7 +1281,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). -/// The ShuffleKind distinguishes between big-endian merges with two +/// The ShuffleKind distinguishes between big-endian merges with two /// different inputs (0), either-endian merges with two identical inputs (1), /// and little-endian merges with two different inputs (2). For the latter, /// the input operands are swapped (see PPCInstrAltivec.td). @@ -1353,7 +1373,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, * - 2 = little-endian merge with two different inputs (inputs are swapped for * little-endian merges). * \param[in] DAG The current SelectionDAG - * \return true iff this shuffle mask + * \return true iff this shuffle mask */ bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG) { @@ -1380,7 +1400,7 @@ bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. -/// The ShuffleKind distinguishes between big-endian operations with two +/// The ShuffleKind distinguishes between big-endian operations with two /// different inputs (0), either-endian operations with two identical inputs /// (1), and little-endian operations with two different inputs (2). For the /// latter, the input operands are swapped (see PPCInstrAltivec.td). @@ -1513,8 +1533,8 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { for (unsigned i = 0; i != Multiple-1; ++i) { if (!UniquedVals[i].getNode()) continue; // Must have been undefs. - LeadingZero &= cast(UniquedVals[i])->isNullValue(); - LeadingOnes &= cast(UniquedVals[i])->isAllOnesValue(); + LeadingZero &= isNullConstant(UniquedVals[i]); + LeadingOnes &= isAllOnesConstant(UniquedVals[i]); } // Finally, check the least significant entry. if (LeadingZero) { @@ -1629,7 +1649,6 @@ static bool isIntS16Immediate(SDValue Op, short &Imm) { return isIntS16Immediate(Op.getNode(), Imm); } - /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it /// can be more efficiently represented with [r+imm]. @@ -1998,10 +2017,10 @@ static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit, DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); SDValue Ops[] = { GA, Reg }; - return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl, - DAG.getVTList(VT, MVT::Other), Ops, VT, - MachinePointerInfo::getGOT(), 0, false, true, - false, 0); + return DAG.getMemIntrinsicNode( + PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, + MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true, + false, 0); } SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, @@ -2092,6 +2111,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, // large models could be added if users need it, at the cost of // additional complexity. GlobalAddressSDNode *GA = cast(Op); + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); + SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -2480,7 +2502,6 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, // */ // } va_list[1]; - SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); @@ -2536,7 +2557,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, #include "PPCGenCallingConv.inc" -// Function whose sole purpose is to kill compiler warnings +// Function whose sole purpose is to kill compiler warnings // stemming from unused functions included from PPCGenCallingConv.inc. CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; @@ -2933,8 +2954,9 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( PPC::F8 }; unsigned NumFPArgRegs = array_lengthof(FPArgRegs); - if (DisablePPCFloatInVariadic) - NumFPArgRegs = 0; + + if (Subtarget.useSoftFloat()) + NumFPArgRegs = 0; FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs)); @@ -3177,15 +3199,15 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( EVT ObjType = (ObjSize == 1 ? MVT::i8 : (ObjSize == 2 ? MVT::i16 : MVT::i32)); Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, - MachinePointerInfo(FuncArg), - ObjType, false, false, 0); + MachinePointerInfo(&*FuncArg), ObjType, + false, false, 0); } else { // For sizes that don't fit a truncating store (3, 5, 6, 7), // store the whole register as-is to the parameter save area // slot. - Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg), - false, false, 0); + Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(&*FuncArg), false, false, 0); } MemOps.push_back(Store); @@ -3212,9 +3234,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue Off = DAG.getConstant(j, dl, PtrVT); Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); } - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, - MachinePointerInfo(FuncArg, j), - false, false, 0); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, Addr, + MachinePointerInfo(&*FuncArg, j), false, false, 0); MemOps.push_back(Store); ++GPR_idx; } @@ -3592,7 +3614,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg), + MachinePointerInfo(&*FuncArg), ObjType, false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -3615,9 +3637,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(FuncArg, j), - false, false, 0); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(&*FuncArg, j), false, false, 0); MemOps.push_back(Store); ++GPR_idx; ArgOffset += PtrByteSize; @@ -3880,7 +3902,6 @@ struct TailCallArgumentInfo { TailCallArgumentInfo() : FrameIdx(0) {} }; - } /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. @@ -3895,9 +3916,10 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue FIN = TailCallArgs[i].FrameIdxOp; int FI = TailCallArgs[i].FrameIdx; // Store relative to framepointer. - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, 0)); + MemOpChains.push_back(DAG.getStore( + Chain, dl, Arg, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false, + false, 0)); } } @@ -3922,9 +3944,10 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, NewRetAddrLoc, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); - Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, - MachinePointerInfo::getFixedStack(NewRetAddr), - false, false, 0); + Chain = DAG.getStore( + Chain, dl, OldRetAddr, NewRetAddrFrIdx, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewRetAddr), + false, false, 0); // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack // slot as the FP is never overwritten. @@ -3933,9 +3956,10 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, true); SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); - Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, - MachinePointerInfo::getFixedStack(NewFPIdx), - false, false, 0); + Chain = DAG.getStore( + Chain, dl, OldFP, NewFramePtrIdx, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewFPIdx), + false, false, 0); } } return Chain; @@ -4812,8 +4836,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, continue; break; case MVT::v4f32: - // When using QPX, this is handled like a FP register, otherwise, it - // is an Altivec register. + // When using QPX, this is handled like a FP register, otherwise, it + // is an Altivec register. if (Subtarget.hasQPX()) { if (++NumFPRsUsed <= NumFPRs) continue; @@ -5318,9 +5342,10 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); - Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, - MachinePointerInfo::getStack(TOCSaveOffset), - false, false, 0); + Chain = DAG.getStore( + Val.getValue(1), dl, Val, AddPtr, + MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset), + false, false, 0); // In the ELFv2 ABI, R12 must contain the address of an indirect callee. // This does not mean the MTCTR instruction must use R12; it's easier // to model this as an extra parameter, so do that. @@ -5341,9 +5366,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, FPOp, true, TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, - hasNest, DAG, RegsToPass, InFlag, Chain, CallSeqStart, - Callee, SPDiff, NumBytes, Ins, InVals, CS); + return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, hasNest, + DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, + SPDiff, NumBytes, Ins, InVals, CS); } SDValue @@ -5798,6 +5823,22 @@ PPCTargetLowering::LowerReturn(SDValue Chain, return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); } +SDValue PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET( + SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const { + SDLoc dl(Op); + + // Get the corect type for integers. + EVT IntVT = Op.getValueType(); + + // Get the inputs. + SDValue Chain = Op.getOperand(0); + SDValue FPSIdx = getFramePointerFrameIndex(DAG); + // Build a DYNAREAOFFSET node. + SDValue Ops[2] = {Chain, FPSIdx}; + SDVTList VTs = DAG.getVTList(IntVT); + return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops); +} + SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const { // When we pop the dynamic allocation we need to restore the SP link. @@ -5828,10 +5869,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, false, false, 0); } - - -SDValue -PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { +SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); @@ -5983,6 +6021,10 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { if (!DAG.getTarget().Options.NoInfsFPMath || !DAG.getTarget().Options.NoNaNsFPMath) return Op; + // TODO: Propagate flags from the select rather than global settings. + SDNodeFlags Flags; + Flags.setNoInfs(true); + Flags.setNoNaNs(true); ISD::CondCode CC = cast(Op.getOperand(4))->get(); @@ -6033,7 +6075,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { case ISD::SETNE: std::swap(TV, FV); case ISD::SETEQ: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); @@ -6043,25 +6085,25 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); case ISD::SETULT: case ISD::SETLT: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOGE: case ISD::SETGE: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); case ISD::SETUGT: case ISD::SETGT: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOLE: case ISD::SETLE: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); @@ -6101,7 +6143,8 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); int FI = cast(FIPtr)->getIndex(); - MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); + MachinePointerInfo MPI = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // Emit a store to the stack slot. SDValue Chain; @@ -6291,11 +6334,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); - + SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64); - FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, - FPHalfs, FPHalfs, FPHalfs, FPHalfs); - + FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, FPHalfs, FPHalfs, + FPHalfs, FPHalfs); + Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); if (Op.getValueType() != MVT::v4f64) @@ -6421,17 +6464,18 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Store = - DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, - MachinePointerInfo::getFixedStack(FrameIdx), - false, false, 0); + SDValue Store = DAG.getStore( + DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx), + false, false, 0); assert(cast(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); RLI.Ptr = FIdx; RLI.Chain = Store; - RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx); + RLI.MPI = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); RLI.Alignment = 4; MachineMemOperand *MMO = @@ -6472,16 +6516,18 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, - MachinePointerInfo::getFixedStack(FrameIdx), - false, false, 0); + SDValue Store = DAG.getStore( + DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx), + false, false, 0); assert(cast(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); RLI.Ptr = FIdx; RLI.Chain = Store; - RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx); + RLI.MPI = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); RLI.Alignment = 4; } @@ -6506,14 +6552,16 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, Op.getOperand(0)); // STD the extended value into the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx, - MachinePointerInfo::getFixedStack(FrameIdx), - false, false, 0); + SDValue Store = DAG.getStore( + DAG.getEntryNode(), dl, Ext64, FIdx, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx), + false, false, 0); // Load the value as a double. - Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, - MachinePointerInfo::getFixedStack(FrameIdx), - false, false, false, 0); + Ld = DAG.getLoad( + MVT::f64, dl, Store, FIdx, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx), + false, false, false, 0); } // FCFID it and return it. @@ -6735,7 +6783,6 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2); } - /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified /// amount. The result has the specified value type. static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, @@ -6768,7 +6815,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // to a zero vector to get the boolean result. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); @@ -6794,8 +6842,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, for (unsigned i = 0; i < 4; ++i) { if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); - else if (cast(BVN->getOperand(i))-> - getConstantIntValue()->isZero()) + else if (isNullConstant(BVN->getOperand(i))) continue; else CV[i] = One; @@ -6814,9 +6861,9 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, ValueVTs.push_back(MVT::Other); // chain SDVTList VTs = DAG.getVTList(ValueVTs); - return DAG.getMemIntrinsicNode(PPCISD::QVLFSb, - dl, VTs, Ops, MVT::v4f32, - MachinePointerInfo::getConstantPool()); + return DAG.getMemIntrinsicNode( + PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } SmallVector Stores; @@ -6915,7 +6962,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, if (SextVal >= -16 && SextVal <= 15) return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); - // Two instruction sequences. // If this value is in the range [-32,30] and is even, use: @@ -7304,11 +7350,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, V1, V2, VPermMask); } -/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an -/// altivec comparison. If it is, return true and fill in Opc/isDot with +/// getVectorCompareInfo - Given an intrinsic, return false if it is not a +/// vector comparison. If it is, return true and fill in Opc/isDot with /// information about the intrinsic. -static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, - bool &isDot, const PPCSubtarget &Subtarget) { +static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, + bool &isDot, const PPCSubtarget &Subtarget) { unsigned IntrinsicID = cast(Intrin.getOperand(0))->getZExtValue(); CompareOpc = -1; @@ -7321,12 +7367,11 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpequd_p: + case Intrinsic::ppc_altivec_vcmpequd_p: if (Subtarget.hasP8Altivec()) { - CompareOpc = 199; - isDot = 1; - } - else + CompareOpc = 199; + isDot = 1; + } else return false; break; @@ -7335,28 +7380,48 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpgtsd_p: + case Intrinsic::ppc_altivec_vcmpgtsd_p: if (Subtarget.hasP8Altivec()) { - CompareOpc = 967; - isDot = 1; - } - else + CompareOpc = 967; + isDot = 1; + } else return false; break; case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpgtud_p: + case Intrinsic::ppc_altivec_vcmpgtud_p: if (Subtarget.hasP8Altivec()) { - CompareOpc = 711; - isDot = 1; - } - else + CompareOpc = 711; + isDot = 1; + } else return false; break; - + // VSX predicate comparisons use the same infrastructure + case Intrinsic::ppc_vsx_xvcmpeqdp_p: + case Intrinsic::ppc_vsx_xvcmpgedp_p: + case Intrinsic::ppc_vsx_xvcmpgtdp_p: + case Intrinsic::ppc_vsx_xvcmpeqsp_p: + case Intrinsic::ppc_vsx_xvcmpgesp_p: + case Intrinsic::ppc_vsx_xvcmpgtsp_p: + if (Subtarget.hasVSX()) { + switch (IntrinsicID) { + case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break; + case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break; + case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break; + case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break; + case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break; + case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break; + } + isDot = 1; + } + else + return false; + + break; + // Normal Comparisons. case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; @@ -7365,10 +7430,9 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpequd: if (Subtarget.hasP8Altivec()) { - CompareOpc = 199; - isDot = 0; - } - else + CompareOpc = 199; + isDot = 0; + } else return false; break; @@ -7377,24 +7441,22 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpgtsd: + case Intrinsic::ppc_altivec_vcmpgtsd: if (Subtarget.hasP8Altivec()) { - CompareOpc = 967; - isDot = 0; - } - else + CompareOpc = 967; + isDot = 0; + } else return false; break; case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpgtud: + case Intrinsic::ppc_altivec_vcmpgtud: if (Subtarget.hasP8Altivec()) { - CompareOpc = 711; - isDot = 0; - } - else + CompareOpc = 711; + isDot = 0; + } else return false; break; @@ -7411,7 +7473,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDLoc dl(Op); int CompareOpc; bool isDot; - if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget)) + if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget)) return SDValue(); // Don't custom lower most intrinsics. // If this is a non-dot comparison, make the VCMP node and we are done. @@ -7536,7 +7598,7 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, FPHalfs, FPHalfs, FPHalfs, FPHalfs); - Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); + Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); // Now convert to an integer and store. Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, @@ -7545,7 +7607,8 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); @@ -7752,7 +7815,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, FPHalfs, FPHalfs, FPHalfs, FPHalfs); - Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); + Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); // Now convert to an integer and store. Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, @@ -7761,7 +7824,8 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); - MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); @@ -7798,11 +7862,10 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); - Stores.push_back(DAG.getTruncStore(StoreChain, dl, Loads[i], Idx, - SN->getPointerInfo().getWithOffset(i), - MVT::i8 /* memory type */, - SN->isNonTemporal(), SN->isVolatile(), - 1 /* alignment */, SN->getAAInfo())); + Stores.push_back(DAG.getTruncStore( + StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i), + MVT::i8 /* memory type */, SN->isNonTemporal(), SN->isVolatile(), + 1 /* alignment */, SN->getAAInfo())); } StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); @@ -7906,6 +7969,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget); + case ISD::GET_DYNAMIC_AREA_OFFSET: return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG, Subtarget); case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); @@ -7971,7 +8035,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, N->getValueType(0)); SDVTList VTs = DAG.getVTList(SVT, MVT::Other); SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), - N->getOperand(1)); + N->getOperand(1)); Results.push_back(NewInt); Results.push_back(NewInt.getValue(1)); @@ -8020,7 +8084,6 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, } } - //===----------------------------------------------------------------------===// // Other Lowering Code //===----------------------------------------------------------------------===// @@ -8089,8 +8152,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); unsigned dest = MI->getOperand(0).getReg(); unsigned ptrA = MI->getOperand(1).getReg(); @@ -8160,8 +8222,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); unsigned dest = MI->getOperand(0).getReg(); unsigned ptrA = MI->getOperand(1).getReg(); @@ -8283,8 +8344,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, MachineRegisterInfo &MRI = MF->getRegInfo(); const BasicBlock *BB = MBB->getBasicBlock(); - MachineFunction::iterator I = MBB; - ++I; + MachineFunction::iterator I = ++MBB->getIterator(); // Memory Reference MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); @@ -8384,8 +8444,8 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, .addMBB(mainMBB); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); - thisMBB->addSuccessor(mainMBB, /* weight */ 0); - thisMBB->addSuccessor(sinkMBB, /* weight */ 1); + thisMBB->addSuccessor(mainMBB, BranchProbability::getZero()); + thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne()); // mainMBB: // mainDstReg = 0 @@ -8562,8 +8622,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // To "insert" these instructions we actually have to insert their // control-flow patterns. const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); MachineFunction *F = BB->getParent(); @@ -8675,7 +8734,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // mfspr Rx,TBU # load from TBU // mfspr Ry,TB # load from TB // mfspr Rz,TBU # load from TBU - // cmpw crX,Rx,Rz # check if ‘old’=’new’ + // cmpw crX,Rx,Rz # check if 'old'='new' // bne readLoop # branch if they're not equal // ... @@ -9137,7 +9196,7 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, return SDValue(); } -bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { +unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { // Note: This functionality is used only when unsafe-fp-math is enabled, and // on cores with reciprocal estimates (which are used when unsafe-fp-math is // enabled for division), this functionality is redundant with the default @@ -9150,12 +9209,26 @@ bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { // one FP pipeline) for three or more FDIVs (for generic OOO cores). switch (Subtarget.getDarwinDirective()) { default: - return NumUsers > 2; + return 3; case PPC::DIR_440: case PPC::DIR_A2: case PPC::DIR_E500mc: case PPC::DIR_E5500: - return NumUsers > 1; + return 2; + } +} + +// isConsecutiveLSLoc needs to work even if all adds have not yet been +// collapsed, and so we need to look through chains of them. +static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, + int64_t& Offset, SelectionDAG &DAG) { + if (DAG.isBaseWithConstantOffset(Loc)) { + Base = Loc.getOperand(0); + Offset += cast(Loc.getOperand(1))->getSExtValue(); + + // The base might itself be a base plus an offset, and if so, accumulate + // that as well. + getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG); } } @@ -9178,16 +9251,18 @@ static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); } - // Handle X+C - if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && - cast(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) + SDValue Base1 = Loc, Base2 = BaseLoc; + int64_t Offset1 = 0, Offset2 = 0; + getBaseWithConstantOffset(Loc, Base1, Offset1, DAG); + getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG); + if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes)) return true; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const GlobalValue *GV1 = nullptr; const GlobalValue *GV2 = nullptr; - int64_t Offset1 = 0; - int64_t Offset2 = 0; + Offset1 = 0; + Offset2 = 0; bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); if (isGA1 && isGA2 && GV1 == GV2) @@ -9343,7 +9418,7 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { for (SmallSet::iterator I = LoadRoots.begin(), IE = LoadRoots.end(); I != IE; ++I) { Queue.push_back(*I); - + while (!Queue.empty()) { SDNode *LoadRoot = Queue.pop_back_val(); if (!Visited.insert(LoadRoot).second) @@ -9470,7 +9545,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, } // Visit all inputs, collect all binary operations (and, or, xor and - // select) that are all fed by extensions. + // select) that are all fed by extensions. while (!BinOps.empty()) { SDValue BinOp = BinOps.back(); BinOps.pop_back(); @@ -9492,7 +9567,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || isa(BinOp.getOperand(i))) { - Inputs.push_back(BinOp.getOperand(i)); + Inputs.push_back(BinOp.getOperand(i)); } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || BinOp.getOperand(i).getOpcode() == ISD::OR || BinOp.getOperand(i).getOpcode() == ISD::XOR || @@ -9572,7 +9647,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, if (isa(Inputs[i])) continue; else - DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); + DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); } // Replace all operations (these are all the same, but have a different @@ -9682,7 +9757,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, SmallPtrSet Visited; // Visit all inputs, collect all binary operations (and, or, xor and - // select) that are all fed by truncations. + // select) that are all fed by truncations. while (!BinOps.empty()) { SDValue BinOp = BinOps.back(); BinOps.pop_back(); @@ -9701,7 +9776,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || isa(BinOp.getOperand(i))) { - Inputs.push_back(BinOp.getOperand(i)); + Inputs.push_back(BinOp.getOperand(i)); } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || BinOp.getOperand(i).getOpcode() == ISD::OR || BinOp.getOperand(i).getOpcode() == ISD::XOR || @@ -9915,10 +9990,11 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, "Invalid extension type"); EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); SDValue ShiftCst = - DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); - return DAG.getNode(ISD::SRA, dl, N->getValueType(0), - DAG.getNode(ISD::SHL, dl, N->getValueType(0), - N->getOperand(0), ShiftCst), ShiftCst); + DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); + return DAG.getNode( + ISD::SRA, dl, N->getValueType(0), + DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst), + ShiftCst); } SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, @@ -10102,16 +10178,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: break; case PPCISD::SHL: - if (ConstantSDNode *C = dyn_cast(N->getOperand(0))) { - if (C->isNullValue()) // 0 << V -> 0. + if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. return N->getOperand(0); - } break; case PPCISD::SRL: - if (ConstantSDNode *C = dyn_cast(N->getOperand(0))) { - if (C->isNullValue()) // 0 >>u V -> 0. + if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0. return N->getOperand(0); - } break; case PPCISD::SRA: if (ConstantSDNode *C = dyn_cast(N->getOperand(0))) { @@ -10122,7 +10194,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, break; case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: + case ISD::ANY_EXTEND: return DAGCombineExtBoolTrunc(N, DCI); case ISD::TRUNCATE: case ISD::SETCC: @@ -10277,7 +10349,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // original unaligned load. MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *BaseMMO = - MF.getMachineMemOperand(LD->getMemOperand(), -MemVT.getStoreSize()+1, + MF.getMachineMemOperand(LD->getMemOperand(), + -(long)MemVT.getStoreSize()+1, 2*MemVT.getStoreSize()-1); // Create the new base load. @@ -10527,7 +10600,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, case ISD::BRCOND: { SDValue Cond = N->getOperand(1); SDValue Target = N->getOperand(2); - + if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && cast(Cond.getOperand(1))->getZExtValue() == Intrinsic::ppc_is_decremented_ctr_nonzero) { @@ -10558,8 +10631,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, cast(LHS.getOperand(0).getOperand(1))->getZExtValue() == Intrinsic::ppc_is_decremented_ctr_nonzero && isa(LHS.getOperand(1)) && - !cast(LHS.getOperand(1))->getConstantIntValue()-> - isZero()) + !isNullConstant(LHS.getOperand(1))) LHS = LHS.getOperand(0); if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && @@ -10588,7 +10660,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && isa(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && - getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { + getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { assert(isDot && "Can't compare against a vector result!"); // If this is a comparison against something other than 0/1, then we know @@ -10739,8 +10811,11 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { // boundary so that the entire loop fits in one instruction-cache line. uint64_t LoopSize = 0; for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) - for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) + for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { LoopSize += TII->GetInstSizeInBytes(J); + if (LoopSize > 32) + break; + } if (LoopSize > 16 && LoopSize <= 32) return 5; @@ -10868,17 +10943,19 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &PPC::QFRCRegClass); if (VT == MVT::v4f32 && Subtarget.hasQPX()) return std::make_pair(0U, &PPC::QSRCRegClass); - return std::make_pair(0U, &PPC::VRRCRegClass); + if (Subtarget.hasAltivec()) + return std::make_pair(0U, &PPC::VRRCRegClass); case 'y': // crrc return std::make_pair(0U, &PPC::CRRCRegClass); } - } else if (Constraint == "wc") { // an individual CR bit. + } else if (Constraint == "wc" && Subtarget.useCRBits()) { + // An individual CR bit. return std::make_pair(0U, &PPC::CRBITRCRegClass); - } else if (Constraint == "wa" || Constraint == "wd" || - Constraint == "wf") { + } else if ((Constraint == "wa" || Constraint == "wd" || + Constraint == "wf") && Subtarget.hasVSX()) { return std::make_pair(0U, &PPC::VSRCRegClass); - } else if (Constraint == "ws") { - if (VT == MVT::f32) + } else if (Constraint == "ws" && Subtarget.hasVSX()) { + if (VT == MVT::f32 && Subtarget.hasP8Vector()) return std::make_pair(0U, &PPC::VSSRCRegClass); else return std::make_pair(0U, &PPC::VSFRCRegClass); @@ -10908,7 +10985,6 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return R; } - /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, @@ -11358,9 +11434,7 @@ bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); - if (BitSize == 0 || BitSize > 64) - return false; - return true; + return !(BitSize == 0 || BitSize > 64); } bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { @@ -11477,11 +11551,21 @@ PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { return ScratchRegs; } +unsigned PPCTargetLowering::getExceptionPointerRegister( + const Constant *PersonalityFn) const { + return Subtarget.isPPC64() ? PPC::X3 : PPC::R3; +} + +unsigned PPCTargetLowering::getExceptionSelectorRegister( + const Constant *PersonalityFn) const { + return Subtarget.isPPC64() ? PPC::X4 : PPC::R4; +} + bool PPCTargetLowering::shouldExpandBuildVectorWithShuffles( EVT VT , unsigned DefinedValues) const { if (VT == MVT::v2i64) - return false; + return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves if (Subtarget.hasQPX()) { if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1) diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 6e13533cfdb3..44bcb8942cfc 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -79,6 +79,11 @@ namespace llvm { /// compute an allocation on the stack. DYNALLOC, + /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to + /// compute an offset from native SP to the address of the most recent + /// dynamic alloca. + DYNAREAOFFSET, + /// GlobalBaseReg - On Darwin, this node represents the result of the mflr /// at function entry, used for PIC code. GlobalBaseReg, @@ -423,6 +428,8 @@ namespace llvm { /// DAG node. const char *getTargetNodeName(unsigned Opcode) const override; + bool useSoftFloat() const override; + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { return MVT::i32; } @@ -655,8 +662,17 @@ namespace llvm { return Ty->isArrayTy(); } - private: + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + unsigned + getExceptionPointerRegister(const Constant *PersonalityFn) const override; + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + unsigned + getExceptionSelectorRegister(const Constant *PersonalityFn) const override; + + private: struct ReuseLoadInfo { SDValue Ptr; SDValue Chain; @@ -719,6 +735,8 @@ namespace llvm { const PPCSubtarget &Subtarget) const; SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const; + SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; @@ -853,7 +871,7 @@ namespace llvm { bool &UseOneConstNR) const override; SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const override; - bool combineRepeatedFPDivisors(unsigned NumUsers) const override; + unsigned combineRepeatedFPDivisors() const override; CCAssignFn *useFastISelCCs(unsigned Flag) const; }; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index d62833037db5..075e093e41a1 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -369,6 +369,8 @@ let Defs = [X1], Uses = [X1] in def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8", [(set i64:$result, (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>; +def DYNAREAOFFSET8 : Pseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8", + [(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>; let Defs = [LR8] in { def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS), diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index d4e666cc1f3e..c17603a7718a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -144,6 +144,9 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + if (!DefMI->getParent()) + return Latency; + const MachineOperand &DefMO = DefMI->getOperand(DefIdx); unsigned Reg = DefMO.getReg(); @@ -186,6 +189,60 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } +// This function does not list all associative and commutative operations, but +// only those worth feeding through the machine combiner in an attempt to +// reduce the critical path. Mostly, this means floating-point operations, +// because they have high latencies (compared to other operations, such and +// and/or, which are also associative and commutative, but have low latencies). +bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { + switch (Inst.getOpcode()) { + // FP Add: + case PPC::FADD: + case PPC::FADDS: + // FP Multiply: + case PPC::FMUL: + case PPC::FMULS: + // Altivec Add: + case PPC::VADDFP: + // VSX Add: + case PPC::XSADDDP: + case PPC::XVADDDP: + case PPC::XVADDSP: + case PPC::XSADDSP: + // VSX Multiply: + case PPC::XSMULDP: + case PPC::XVMULDP: + case PPC::XVMULSP: + case PPC::XSMULSP: + // QPX Add: + case PPC::QVFADD: + case PPC::QVFADDS: + case PPC::QVFADDSs: + // QPX Multiply: + case PPC::QVFMUL: + case PPC::QVFMULS: + case PPC::QVFMULSs: + return true; + default: + return false; + } +} + +bool PPCInstrInfo::getMachineCombinerPatterns( + MachineInstr &Root, + SmallVectorImpl &Patterns) const { + // Using the machine combiner in this way is potentially expensive, so + // restrict to when aggressive optimizations are desired. + if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive) + return false; + + // FP reassociation is only legal when we don't need strict IEEE semantics. + if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath) + return false; + + return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); +} + // Detect 32 -> 64-bit extensions where we may reuse the low sub-register. bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, @@ -259,16 +316,16 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } -// commuteInstruction - We can commute rlwimi instructions, but only if the -// rotate amt is zero. We also have to munge the immediates a bit. -MachineInstr * -PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { +MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr *MI, + bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const { MachineFunction &MF = *MI->getParent()->getParent(); // Normal instructions can be commuted the obvious way. if (MI->getOpcode() != PPC::RLWIMI && MI->getOpcode() != PPC::RLWIMIo) - return TargetInstrInfo::commuteInstruction(MI, NewMI); + return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because // changing the relative order of the mask operands might change what happens @@ -286,6 +343,8 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // Op0 = (Op2 & ~M) | (Op1 & M) // Swap op1/op2 + assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) && + "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMIo."); unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); unsigned Reg2 = MI->getOperand(2).getReg(); @@ -353,9 +412,9 @@ bool PPCInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, if (AltOpc == -1) return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); - SrcOpIdx1 = 2; - SrcOpIdx2 = 3; - return true; + // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1 + // and SrcOpIdx2. + return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3); } void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, @@ -996,11 +1055,10 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MBB.insert(MI, NewMIs[i]); const MachineFrameInfo &MFI = *MF.getFrameInfo(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - MachineMemOperand::MOStore, - MFI.getObjectSize(FrameIdx), - MFI.getObjectAlignment(FrameIdx)); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FrameIdx), + MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); NewMIs.back()->addMemOperand(MF, MMO); } @@ -1109,11 +1167,10 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MBB.insert(MI, NewMIs[i]); const MachineFrameInfo &MFI = *MF.getFrameInfo(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - MachineMemOperand::MOLoad, - MFI.getObjectSize(FrameIdx), - MFI.getObjectAlignment(FrameIdx)); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FrameIdx), + MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); NewMIs.back()->addMemOperand(MF, MMO); } @@ -1214,7 +1271,7 @@ bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT, unsigned ExtraT, MachineBasicBlock &FMBB, unsigned NumF, unsigned ExtraF, - const BranchProbability &Probability) const { + BranchProbability Probability) const { return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB)); } @@ -1691,13 +1748,13 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, MI->setDesc(NewDesc); if (NewDesc.ImplicitDefs) - for (const uint16_t *ImpDefs = NewDesc.getImplicitDefs(); + for (const MCPhysReg *ImpDefs = NewDesc.getImplicitDefs(); *ImpDefs; ++ImpDefs) if (!MI->definesRegister(*ImpDefs)) MI->addOperand(*MI->getParent()->getParent(), MachineOperand::CreateReg(*ImpDefs, true, true)); if (NewDesc.ImplicitUses) - for (const uint16_t *ImpUses = NewDesc.getImplicitUses(); + for (const MCPhysReg *ImpUses = NewDesc.getImplicitUses(); *ImpUses; ++ImpUses) if (!MI->readsRegister(*ImpUses)) MI->addOperand(*MI->getParent()->getParent(), @@ -1737,3 +1794,35 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { } } +std::pair +PPCInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { + const unsigned Mask = PPCII::MO_ACCESS_MASK; + return std::make_pair(TF & Mask, TF & ~Mask); +} + +ArrayRef> +PPCInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { + using namespace PPCII; + static const std::pair TargetFlags[] = { + {MO_LO, "ppc-lo"}, + {MO_HA, "ppc-ha"}, + {MO_TPREL_LO, "ppc-tprel-lo"}, + {MO_TPREL_HA, "ppc-tprel-ha"}, + {MO_DTPREL_LO, "ppc-dtprel-lo"}, + {MO_TLSLD_LO, "ppc-tlsld-lo"}, + {MO_TOC_LO, "ppc-toc-lo"}, + {MO_TLS, "ppc-tls"}}; + return makeArrayRef(TargetFlags); +} + +ArrayRef> +PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { + using namespace PPCII; + static const std::pair TargetFlags[] = { + {MO_PLT_OR_STUB, "ppc-plt-or-stub"}, + {MO_PIC_FLAG, "ppc-pic"}, + {MO_NLP_FLAG, "ppc-nlp"}, + {MO_NLP_HIDDEN_FLAG, "ppc-nlp-hidden"}}; + return makeArrayRef(TargetFlags); +} + diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 40badae644d6..c3c3a480a6aa 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -79,6 +79,23 @@ class PPCInstrInfo : public PPCGenInstrInfo { SmallVectorImpl &NewMIs, bool &NonRI, bool &SpillsVRS) const; virtual void anchor(); + +protected: + /// Commutes the operands in the given instruction. + /// The commutable operands are specified by their indices OpIdx1 and OpIdx2. + /// + /// Do not call this method for a non-commutable instruction or for + /// non-commutable pair of operand indices OpIdx1 and OpIdx2. + /// Even though the instruction is commutable, the method may still + /// fail to commute the operands, null pointer is returned in such cases. + /// + /// For example, we can commute rlwimi instructions, but only if the + /// rotate amt is zero. We also have to munge the immediates a bit. + MachineInstr *commuteInstructionImpl(MachineInstr *MI, + bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const override; + public: explicit PPCInstrInfo(PPCSubtarget &STI); @@ -119,6 +136,19 @@ public: return false; } + bool useMachineCombiner() const override { + return true; + } + + /// Return true when there is potentially a faster code sequence + /// for an instruction chain ending in . All potential patterns are + /// output in the array. + bool getMachineCombinerPatterns( + MachineInstr &Root, + SmallVectorImpl &P) const override; + + bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; + bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const override; @@ -127,10 +157,6 @@ public: unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const override; - // commuteInstruction - We can commute rlwimi instructions, but only if the - // rotate amt is zero. We also have to munge the immediates a bit. - MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const override; - bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; @@ -183,7 +209,7 @@ public: // profitable to use the predicated branches. bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, - const BranchProbability &Probability) const override { + BranchProbability Probability) const override { return true; } @@ -191,12 +217,10 @@ public: unsigned NumT, unsigned ExtraT, MachineBasicBlock &FMBB, unsigned NumF, unsigned ExtraF, - const BranchProbability &Probability) const override; + BranchProbability Probability) const override; - bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, - unsigned NumCycles, - const BranchProbability - &Probability) const override { + bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + BranchProbability Probability) const override { return true; } @@ -239,6 +263,15 @@ public: unsigned GetInstSizeInBytes(const MachineInstr *MI) const; void getNoopForMachoTarget(MCInst &NopInst) const override; + + std::pair + decomposeMachineOperandsTargetFlags(unsigned TF) const override; + + ArrayRef> + getSerializableDirectMachineOperandTargetFlags() const override; + + ArrayRef> + getSerializableBitmaskMachineOperandTargetFlags() const override; }; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 24fd9bd5c1f7..6c4364aad331 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -226,7 +226,9 @@ def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, // Instructions to support dynamic alloca. def SDTDynOp : SDTypeProfile<1, 2, []>; +def SDTDynAreaOp : SDTypeProfile<1, 1, []>; def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>; +def PPCdynareaoffset : SDNode<"PPCISD::DYNAREAOFFSET", SDTDynAreaOp, [SDNPHasChain]>; //===----------------------------------------------------------------------===// // PowerPC specific transformation functions and pattern fragments. @@ -1029,6 +1031,8 @@ let Defs = [R1], Uses = [R1] in def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC", [(set i32:$result, (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>; +def DYNAREAOFFSET : Pseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET", + [(set i32:$result, (PPCdynareaoffset iaddr:$fpsi))]>; // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. @@ -3883,8 +3887,11 @@ def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNMo gprc:$rA, gprc:$rS, gprc:$rB, 0, def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>; def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>; -def : InstAlias<"cntlz $rA, $rS", (CNTLZW gprc:$rA, gprc:$rS)>; -def : InstAlias<"cntlz. $rA, $rS", (CNTLZWo gprc:$rA, gprc:$rS)>; +def : InstAlias<"cntlzw $rA, $rS", (CNTLZW gprc:$rA, gprc:$rS)>; +def : InstAlias<"cntlzw. $rA, $rS", (CNTLZWo gprc:$rA, gprc:$rS)>; +// The POWER variant +def : MnemonicAlias<"cntlz", "cntlzw">; +def : MnemonicAlias<"cntlz.", "cntlzw.">; def EXTLDI : PPCAsmPseudo<"extldi $rA, $rS, $n, $b", (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; diff --git a/lib/Target/PowerPC/PPCInstrQPX.td b/lib/Target/PowerPC/PPCInstrQPX.td index 0a044c5c6ea4..43120070d799 100644 --- a/lib/Target/PowerPC/PPCInstrQPX.td +++ b/lib/Target/PowerPC/PPCInstrQPX.td @@ -839,31 +839,31 @@ def : Pat<(v4f64 (scalar_to_vector f64:$A)), def : Pat<(v4f32 (scalar_to_vector f32:$A)), (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $A, sub_64)>; -def : Pat<(f64 (vector_extract v4f64:$S, 0)), +def : Pat<(f64 (extractelt v4f64:$S, 0)), (EXTRACT_SUBREG $S, sub_64)>; -def : Pat<(f32 (vector_extract v4f32:$S, 0)), +def : Pat<(f32 (extractelt v4f32:$S, 0)), (EXTRACT_SUBREG $S, sub_64)>; -def : Pat<(f64 (vector_extract v4f64:$S, 1)), +def : Pat<(f64 (extractelt v4f64:$S, 1)), (EXTRACT_SUBREG (QVESPLATI $S, 1), sub_64)>; -def : Pat<(f64 (vector_extract v4f64:$S, 2)), +def : Pat<(f64 (extractelt v4f64:$S, 2)), (EXTRACT_SUBREG (QVESPLATI $S, 2), sub_64)>; -def : Pat<(f64 (vector_extract v4f64:$S, 3)), +def : Pat<(f64 (extractelt v4f64:$S, 3)), (EXTRACT_SUBREG (QVESPLATI $S, 3), sub_64)>; -def : Pat<(f32 (vector_extract v4f32:$S, 1)), +def : Pat<(f32 (extractelt v4f32:$S, 1)), (EXTRACT_SUBREG (QVESPLATIs $S, 1), sub_64)>; -def : Pat<(f32 (vector_extract v4f32:$S, 2)), +def : Pat<(f32 (extractelt v4f32:$S, 2)), (EXTRACT_SUBREG (QVESPLATIs $S, 2), sub_64)>; -def : Pat<(f32 (vector_extract v4f32:$S, 3)), +def : Pat<(f32 (extractelt v4f32:$S, 3)), (EXTRACT_SUBREG (QVESPLATIs $S, 3), sub_64)>; -def : Pat<(f64 (vector_extract v4f64:$S, i64:$F)), +def : Pat<(f64 (extractelt v4f64:$S, i64:$F)), (EXTRACT_SUBREG (QVFPERM $S, $S, (QVLPCLSXint (RLDICR $F, 2, /* 63-2 = */ 61))), sub_64)>; -def : Pat<(f32 (vector_extract v4f32:$S, i64:$F)), +def : Pat<(f32 (extractelt v4f32:$S, i64:$F)), (EXTRACT_SUBREG (QVFPERMs $S, $S, (QVLPCLSXint (RLDICR $F, 2, /* 63-2 = */ 61))), diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index ce63c22992e8..df1142cb42f3 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -67,17 +67,19 @@ def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; -multiclass XX3Form_Rcr opcode, bits<7> xo, dag OOL, dag IOL, - string asmbase, string asmstr, InstrItinClass itin, - list pattern> { +multiclass XX3Form_Rcr opcode, bits<7> xo, string asmbase, + string asmstr, InstrItinClass itin, Intrinsic Int, + ValueType OutTy, ValueType InTy> { let BaseName = asmbase in { - def NAME : XX3Form_Rc; + [(set OutTy:$XT, (Int InTy:$XA, InTy:$XB))]>; let Defs = [CR6] in - def o : XX3Form_Rc, isDOT; + [(set InTy:$XT, + (InTy (PPCvcmp_o InTy:$XA, InTy:$XB, xo)))]>, + isDOT; } } @@ -456,35 +458,23 @@ let Uses = [RM] in { "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>; defm XVCMPEQDP : XX3Form_Rcr<60, 99, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, - [(set v2i64:$XT, - (int_ppc_vsx_xvcmpeqdp v2f64:$XA, v2f64:$XB))]>; + int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>; defm XVCMPEQSP : XX3Form_Rcr<60, 67, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare, - [(set v4i32:$XT, - (int_ppc_vsx_xvcmpeqsp v4f32:$XA, v4f32:$XB))]>; + int_ppc_vsx_xvcmpeqsp, v4i32, v4f32>; defm XVCMPGEDP : XX3Form_Rcr<60, 115, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare, - [(set v2i64:$XT, - (int_ppc_vsx_xvcmpgedp v2f64:$XA, v2f64:$XB))]>; + int_ppc_vsx_xvcmpgedp, v2i64, v2f64>; defm XVCMPGESP : XX3Form_Rcr<60, 83, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare, - [(set v4i32:$XT, - (int_ppc_vsx_xvcmpgesp v4f32:$XA, v4f32:$XB))]>; + int_ppc_vsx_xvcmpgesp, v4i32, v4f32>; defm XVCMPGTDP : XX3Form_Rcr<60, 107, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare, - [(set v2i64:$XT, - (int_ppc_vsx_xvcmpgtdp v2f64:$XA, v2f64:$XB))]>; + int_ppc_vsx_xvcmpgtdp, v2i64, v2f64>; defm XVCMPGTSP : XX3Form_Rcr<60, 75, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare, - [(set v4i32:$XT, - (int_ppc_vsx_xvcmpgtsp v4f32:$XA, v4f32:$XB))]>; + int_ppc_vsx_xvcmpgtsp, v4i32, v4f32>; // Move Instructions def XSABSDP : XX2Form<60, 345, @@ -845,9 +835,9 @@ let Predicates = [IsBigEndian] in { def : Pat<(v2f64 (scalar_to_vector f64:$A)), (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>; -def : Pat<(f64 (vector_extract v2f64:$S, 0)), +def : Pat<(f64 (extractelt v2f64:$S, 0)), (f64 (EXTRACT_SUBREG $S, sub_64))>; -def : Pat<(f64 (vector_extract v2f64:$S, 1)), +def : Pat<(f64 (extractelt v2f64:$S, 1)), (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>; } @@ -856,9 +846,9 @@ def : Pat<(v2f64 (scalar_to_vector f64:$A)), (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64), (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>; -def : Pat<(f64 (vector_extract v2f64:$S, 0)), +def : Pat<(f64 (extractelt v2f64:$S, 0)), (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>; -def : Pat<(f64 (vector_extract v2f64:$S, 1)), +def : Pat<(f64 (extractelt v2f64:$S, 1)), (f64 (EXTRACT_SUBREG $S, sub_64))>; } @@ -1206,6 +1196,23 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } + + // Single Precision Conversions (FP <-> INT) + def XSCVSXDSP : XX2Form<60, 312, + (outs vssrc:$XT), (ins vsfrc:$XB), + "xscvsxdsp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfcfids f64:$XB))]>; + def XSCVUXDSP : XX2Form<60, 296, + (outs vssrc:$XT), (ins vsfrc:$XB), + "xscvuxdsp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfcfidus f64:$XB))]>; + + // Conversions between vector and scalar single precision + def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB), + "xscvdpspn $XT, $XB", IIC_VecFP, []>; + def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB), + "xscvspdpn $XT, $XB", IIC_VecFP, []>; + } // AddedComplexity = 400 } // HasP8Vector @@ -1229,3 +1236,550 @@ let Predicates = [HasDirectMove, HasVSX] in { "mtvsrwz $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; } // HasDirectMove, HasVSX + +/* Direct moves of various widths from GPR's into VSR's. Each move lines + the value up into element 0 (both BE and LE). Namely, entities smaller than + a doubleword are shifted left and moved for BE. For LE, they're moved, then + swapped to go into the least significant element of the VSR. +*/ +def MovesToVSR { + dag BE_BYTE_0 = + (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7)); + dag BE_HALF_0 = + (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15)); + dag BE_WORD_0 = + (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31)); + dag BE_DWORD_0 = (MTVSRD $A); + + dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32)); + dag LE_WORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), + LE_MTVSRW, sub_64)); + dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2); + dag LE_DWORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), + BE_DWORD_0, sub_64)); + dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2); +} + +/* Patterns for extracting elements out of vectors. Integer elements are + extracted using direct move operations. Patterns for extracting elements + whose indices are not available at compile time are also provided with + various _VARIABLE_ patterns. + The numbering for the DAG's is for LE, but when used on BE, the correct + LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13). +*/ +def VectorExtractions { + // Doubleword extraction + dag LE_DWORD_0 = + (MFVSRD + (EXTRACT_SUBREG + (XXPERMDI (COPY_TO_REGCLASS $S, VSRC), + (COPY_TO_REGCLASS $S, VSRC), 2), sub_64)); + dag LE_DWORD_1 = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); + + // Word extraction + dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 2), sub_64)); + dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64)); + dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); + dag LE_WORD_3 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 3), sub_64)); + + // Halfword extraction + dag LE_HALF_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 48), sub_32)); + dag LE_HALF_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 48), sub_32)); + dag LE_HALF_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 48), sub_32)); + dag LE_HALF_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 48), sub_32)); + dag LE_HALF_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 48), sub_32)); + dag LE_HALF_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 48), sub_32)); + dag LE_HALF_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 48), sub_32)); + dag LE_HALF_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 48), sub_32)); + + // Byte extraction + dag LE_BYTE_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 56), sub_32)); + dag LE_BYTE_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 56, 56), sub_32)); + dag LE_BYTE_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 56), sub_32)); + dag LE_BYTE_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 40, 56), sub_32)); + dag LE_BYTE_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 56), sub_32)); + dag LE_BYTE_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 24, 56), sub_32)); + dag LE_BYTE_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 56), sub_32)); + dag LE_BYTE_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 8, 56), sub_32)); + dag LE_BYTE_8 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 56), sub_32)); + dag LE_BYTE_9 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 56, 56), sub_32)); + dag LE_BYTE_10 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 56), sub_32)); + dag LE_BYTE_11 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 40, 56), sub_32)); + dag LE_BYTE_12 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 56), sub_32)); + dag LE_BYTE_13 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 24, 56), sub_32)); + dag LE_BYTE_14 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 56), sub_32)); + dag LE_BYTE_15 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 8, 56), sub_32)); + + /* Variable element number (BE and LE patterns must be specified separately) + This is a rather involved process. + + Conceptually, this is how the move is accomplished: + 1. Identify which doubleword contains the element + 2. Shift in the VMX register so that the correct doubleword is correctly + lined up for the MFVSRD + 3. Perform the move so that the element (along with some extra stuff) + is in the GPR + 4. Right shift within the GPR so that the element is right-justified + + Of course, the index is an element number which has a different meaning + on LE/BE so the patterns have to be specified separately. + + Note: The final result will be the element right-justified with high + order bits being arbitrarily defined (namely, whatever was in the + vector register to the left of the value originally). + */ + + /* LE variable byte + Number 1. above: + - For elements 0-7, we shift left by 8 bytes since they're on the right + - For elements 8-15, we need not shift (shift left by zero bytes) + This is accomplished by inverting the bits of the index and AND-ing + with 0x8 (i.e. clearing all bits of the index and inverting bit 60). + */ + dag LE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDC8 (LI8 8), $Idx)); + + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VBYTE_PERMUTE = (VPERM $S, $S, LE_VBYTE_PERM_VEC); + + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + dag LE_MV_VBYTE = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VBYTE_PERMUTE, VSRC)), + sub_64)); + + /* Number 4. above: + - Truncate the element number to the range 0-7 (8-15 are symmetrical + and out of range values are truncated accordingly) + - Multiply by 8 as we need to shift right by the number of bits, not bytes + - Shift right in the GPR by the calculated value + */ + dag LE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 7), $Idx), 3, 60), + sub_32); + dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT), + sub_32); + + /* LE variable halfword + Number 1. above: + - For elements 0-3, we shift left by 8 since they're on the right + - For elements 4-7, we need not shift (shift left by zero bytes) + Similarly to the byte pattern, we invert the bits of the index, but we + AND with 0x4 (i.e. clear all bits of the index and invert bit 61). + Of course, the shift is still by 8 bytes, so we must multiply by 2. + */ + dag LE_VHALF_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62)); + + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VHALF_PERMUTE = (VPERM $S, $S, LE_VHALF_PERM_VEC); + + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + dag LE_MV_VHALF = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VHALF_PERMUTE, VSRC)), + sub_64)); + + /* Number 4. above: + - Truncate the element number to the range 0-3 (4-7 are symmetrical + and out of range values are truncated accordingly) + - Multiply by 16 as we need to shift right by the number of bits + - Shift right in the GPR by the calculated value + */ + dag LE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 3), $Idx), 4, 59), + sub_32); + dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT), + sub_32); + + /* LE variable word + Number 1. above: + - For elements 0-1, we shift left by 8 since they're on the right + - For elements 2-3, we need not shift + */ + dag LE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61)); + + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VWORD_PERMUTE = (VPERM $S, $S, LE_VWORD_PERM_VEC); + + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + dag LE_MV_VWORD = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VWORD_PERMUTE, VSRC)), + sub_64)); + + /* Number 4. above: + - Truncate the element number to the range 0-1 (2-3 are symmetrical + and out of range values are truncated accordingly) + - Multiply by 32 as we need to shift right by the number of bits + - Shift right in the GPR by the calculated value + */ + dag LE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 1), $Idx), 5, 58), + sub_32); + dag LE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD LE_MV_VWORD, LE_VWORD_SHIFT), + sub_32); + + /* LE variable doubleword + Number 1. above: + - For element 0, we shift left by 8 since it's on the right + - For element 1, we need not shift + */ + dag LE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60)); + + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VDWORD_PERMUTE = (VPERM $S, $S, LE_VDWORD_PERM_VEC); + + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + // - Number 4. is not needed for the doubleword as the value is 64-bits + dag LE_VARIABLE_DWORD = + (MFVSRD (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VDWORD_PERMUTE, VSRC)), + sub_64)); + + /* LE variable float + - Shift the vector to line up the desired element to BE Word 0 + - Convert 32-bit float to a 64-bit single precision float + */ + dag LE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR (XOR8 (LI8 3), $Idx), 2, 61)); + dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC); + dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE); + + /* LE variable double + Same as the LE doubleword except there is no move. + */ + dag LE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC), + (COPY_TO_REGCLASS $S, VRRC), + LE_VDWORD_PERM_VEC); + dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC); + + /* BE variable byte + The algorithm here is the same as the LE variable byte except: + - The shift in the VMX register is by 0/8 for opposite element numbers so + we simply AND the element number with 0x8 + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-7 + */ + dag BE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDIo8 $Idx, 8)); + dag BE_VBYTE_PERMUTE = (VPERM $S, $S, BE_VBYTE_PERM_VEC); + dag BE_MV_VBYTE = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)), + sub_64)); + dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60), + sub_32); + dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT), + sub_32); + + /* BE variable halfword + The algorithm here is the same as the LE variable halfword except: + - The shift in the VMX register is by 0/8 for opposite element numbers so + we simply AND the element number with 0x4 and multiply by 2 + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-3 + */ + dag BE_VHALF_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 4), 1, 62)); + dag BE_VHALF_PERMUTE = (VPERM $S, $S, BE_VHALF_PERM_VEC); + dag BE_MV_VHALF = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)), + sub_64)); + dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 59), + sub_32); + dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT), + sub_32); + + /* BE variable word + The algorithm is the same as the LE variable word except: + - The shift in the VMX register happens for opposite element numbers + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-1 + */ + dag BE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 2), 2, 61)); + dag BE_VWORD_PERMUTE = (VPERM $S, $S, BE_VWORD_PERM_VEC); + dag BE_MV_VWORD = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)), + sub_64)); + dag BE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 1), $Idx), 5, 58), + sub_32); + dag BE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD BE_MV_VWORD, BE_VWORD_SHIFT), + sub_32); + + /* BE variable doubleword + Same as the LE doubleword except we shift in the VMX register for opposite + element indices. + */ + dag BE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 1), 3, 60)); + dag BE_VDWORD_PERMUTE = (VPERM $S, $S, BE_VDWORD_PERM_VEC); + dag BE_VARIABLE_DWORD = + (MFVSRD (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)), + sub_64)); + + /* BE variable float + - Shift the vector to line up the desired element to BE Word 0 + - Convert 32-bit float to a 64-bit single precision float + */ + dag BE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR $Idx, 2, 61)); + dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC); + dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE); + + /* BE variable double + Same as the BE doubleword except there is no move. + */ + dag BE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC), + (COPY_TO_REGCLASS $S, VRRC), + BE_VDWORD_PERM_VEC); + dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC); +} + +// v4f32 scalar <-> vector conversions (BE) +let Predicates = [IsBigEndian, HasP8Vector] in { + def : Pat<(v4f32 (scalar_to_vector f32:$A)), + (v4f32 (XSCVDPSPN $A))>; + def : Pat<(f32 (vector_extract v4f32:$S, 0)), + (f32 (XSCVSPDPN $S))>; + def : Pat<(f32 (vector_extract v4f32:$S, 1)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; + def : Pat<(f32 (vector_extract v4f32:$S, 2)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>; + def : Pat<(f32 (vector_extract v4f32:$S, 3)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; + def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), + (f32 VectorExtractions.BE_VARIABLE_FLOAT)>; +} // IsBigEndian, HasP8Vector + +// Variable index vector_extract for v2f64 does not require P8Vector +let Predicates = [IsBigEndian, HasVSX] in + def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), + (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>; + +let Predicates = [IsBigEndian, HasDirectMove] in { + // v16i8 scalar <-> vector conversions (BE) + def : Pat<(v16i8 (scalar_to_vector i32:$A)), + (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>; + def : Pat<(v8i16 (scalar_to_vector i32:$A)), + (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>; + def : Pat<(v4i32 (scalar_to_vector i32:$A)), + (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>; + def : Pat<(v2i64 (scalar_to_vector i64:$A)), + (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>; + def : Pat<(i32 (vector_extract v16i8:$S, 0)), + (i32 VectorExtractions.LE_BYTE_15)>; + def : Pat<(i32 (vector_extract v16i8:$S, 1)), + (i32 VectorExtractions.LE_BYTE_14)>; + def : Pat<(i32 (vector_extract v16i8:$S, 2)), + (i32 VectorExtractions.LE_BYTE_13)>; + def : Pat<(i32 (vector_extract v16i8:$S, 3)), + (i32 VectorExtractions.LE_BYTE_12)>; + def : Pat<(i32 (vector_extract v16i8:$S, 4)), + (i32 VectorExtractions.LE_BYTE_11)>; + def : Pat<(i32 (vector_extract v16i8:$S, 5)), + (i32 VectorExtractions.LE_BYTE_10)>; + def : Pat<(i32 (vector_extract v16i8:$S, 6)), + (i32 VectorExtractions.LE_BYTE_9)>; + def : Pat<(i32 (vector_extract v16i8:$S, 7)), + (i32 VectorExtractions.LE_BYTE_8)>; + def : Pat<(i32 (vector_extract v16i8:$S, 8)), + (i32 VectorExtractions.LE_BYTE_7)>; + def : Pat<(i32 (vector_extract v16i8:$S, 9)), + (i32 VectorExtractions.LE_BYTE_6)>; + def : Pat<(i32 (vector_extract v16i8:$S, 10)), + (i32 VectorExtractions.LE_BYTE_5)>; + def : Pat<(i32 (vector_extract v16i8:$S, 11)), + (i32 VectorExtractions.LE_BYTE_4)>; + def : Pat<(i32 (vector_extract v16i8:$S, 12)), + (i32 VectorExtractions.LE_BYTE_3)>; + def : Pat<(i32 (vector_extract v16i8:$S, 13)), + (i32 VectorExtractions.LE_BYTE_2)>; + def : Pat<(i32 (vector_extract v16i8:$S, 14)), + (i32 VectorExtractions.LE_BYTE_1)>; + def : Pat<(i32 (vector_extract v16i8:$S, 15)), + (i32 VectorExtractions.LE_BYTE_0)>; + def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), + (i32 VectorExtractions.BE_VARIABLE_BYTE)>; + + // v8i16 scalar <-> vector conversions (BE) + def : Pat<(i32 (vector_extract v8i16:$S, 0)), + (i32 VectorExtractions.LE_HALF_7)>; + def : Pat<(i32 (vector_extract v8i16:$S, 1)), + (i32 VectorExtractions.LE_HALF_6)>; + def : Pat<(i32 (vector_extract v8i16:$S, 2)), + (i32 VectorExtractions.LE_HALF_5)>; + def : Pat<(i32 (vector_extract v8i16:$S, 3)), + (i32 VectorExtractions.LE_HALF_4)>; + def : Pat<(i32 (vector_extract v8i16:$S, 4)), + (i32 VectorExtractions.LE_HALF_3)>; + def : Pat<(i32 (vector_extract v8i16:$S, 5)), + (i32 VectorExtractions.LE_HALF_2)>; + def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 VectorExtractions.LE_HALF_1)>; + def : Pat<(i32 (vector_extract v8i16:$S, 7)), + (i32 VectorExtractions.LE_HALF_0)>; + def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), + (i32 VectorExtractions.BE_VARIABLE_HALF)>; + + // v4i32 scalar <-> vector conversions (BE) + def : Pat<(i32 (vector_extract v4i32:$S, 0)), + (i32 VectorExtractions.LE_WORD_3)>; + def : Pat<(i32 (vector_extract v4i32:$S, 1)), + (i32 VectorExtractions.LE_WORD_2)>; + def : Pat<(i32 (vector_extract v4i32:$S, 2)), + (i32 VectorExtractions.LE_WORD_1)>; + def : Pat<(i32 (vector_extract v4i32:$S, 3)), + (i32 VectorExtractions.LE_WORD_0)>; + def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 VectorExtractions.BE_VARIABLE_WORD)>; + + // v2i64 scalar <-> vector conversions (BE) + def : Pat<(i64 (vector_extract v2i64:$S, 0)), + (i64 VectorExtractions.LE_DWORD_1)>; + def : Pat<(i64 (vector_extract v2i64:$S, 1)), + (i64 VectorExtractions.LE_DWORD_0)>; + def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), + (i64 VectorExtractions.BE_VARIABLE_DWORD)>; +} // IsBigEndian, HasDirectMove + +// v4f32 scalar <-> vector conversions (LE) +let Predicates = [IsLittleEndian, HasP8Vector] in { + def : Pat<(v4f32 (scalar_to_vector f32:$A)), + (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>; + def : Pat<(f32 (vector_extract v4f32:$S, 0)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; + def : Pat<(f32 (vector_extract v4f32:$S, 1)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>; + def : Pat<(f32 (vector_extract v4f32:$S, 2)), + (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; + def : Pat<(f32 (vector_extract v4f32:$S, 3)), + (f32 (XSCVSPDPN $S))>; + def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), + (f32 VectorExtractions.LE_VARIABLE_FLOAT)>; +} // IsLittleEndian, HasP8Vector + +// Variable index vector_extract for v2f64 does not require P8Vector +let Predicates = [IsLittleEndian, HasVSX] in + def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), + (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>; + +let Predicates = [IsLittleEndian, HasDirectMove] in { + // v16i8 scalar <-> vector conversions (LE) + def : Pat<(v16i8 (scalar_to_vector i32:$A)), + (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>; + def : Pat<(v8i16 (scalar_to_vector i32:$A)), + (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>; + def : Pat<(v4i32 (scalar_to_vector i32:$A)), + (v4i32 MovesToVSR.LE_WORD_0)>; + def : Pat<(v2i64 (scalar_to_vector i64:$A)), + (v2i64 MovesToVSR.LE_DWORD_0)>; + def : Pat<(i32 (vector_extract v16i8:$S, 0)), + (i32 VectorExtractions.LE_BYTE_0)>; + def : Pat<(i32 (vector_extract v16i8:$S, 1)), + (i32 VectorExtractions.LE_BYTE_1)>; + def : Pat<(i32 (vector_extract v16i8:$S, 2)), + (i32 VectorExtractions.LE_BYTE_2)>; + def : Pat<(i32 (vector_extract v16i8:$S, 3)), + (i32 VectorExtractions.LE_BYTE_3)>; + def : Pat<(i32 (vector_extract v16i8:$S, 4)), + (i32 VectorExtractions.LE_BYTE_4)>; + def : Pat<(i32 (vector_extract v16i8:$S, 5)), + (i32 VectorExtractions.LE_BYTE_5)>; + def : Pat<(i32 (vector_extract v16i8:$S, 6)), + (i32 VectorExtractions.LE_BYTE_6)>; + def : Pat<(i32 (vector_extract v16i8:$S, 7)), + (i32 VectorExtractions.LE_BYTE_7)>; + def : Pat<(i32 (vector_extract v16i8:$S, 8)), + (i32 VectorExtractions.LE_BYTE_8)>; + def : Pat<(i32 (vector_extract v16i8:$S, 9)), + (i32 VectorExtractions.LE_BYTE_9)>; + def : Pat<(i32 (vector_extract v16i8:$S, 10)), + (i32 VectorExtractions.LE_BYTE_10)>; + def : Pat<(i32 (vector_extract v16i8:$S, 11)), + (i32 VectorExtractions.LE_BYTE_11)>; + def : Pat<(i32 (vector_extract v16i8:$S, 12)), + (i32 VectorExtractions.LE_BYTE_12)>; + def : Pat<(i32 (vector_extract v16i8:$S, 13)), + (i32 VectorExtractions.LE_BYTE_13)>; + def : Pat<(i32 (vector_extract v16i8:$S, 14)), + (i32 VectorExtractions.LE_BYTE_14)>; + def : Pat<(i32 (vector_extract v16i8:$S, 15)), + (i32 VectorExtractions.LE_BYTE_15)>; + def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), + (i32 VectorExtractions.LE_VARIABLE_BYTE)>; + + // v8i16 scalar <-> vector conversions (LE) + def : Pat<(i32 (vector_extract v8i16:$S, 0)), + (i32 VectorExtractions.LE_HALF_0)>; + def : Pat<(i32 (vector_extract v8i16:$S, 1)), + (i32 VectorExtractions.LE_HALF_1)>; + def : Pat<(i32 (vector_extract v8i16:$S, 2)), + (i32 VectorExtractions.LE_HALF_2)>; + def : Pat<(i32 (vector_extract v8i16:$S, 3)), + (i32 VectorExtractions.LE_HALF_3)>; + def : Pat<(i32 (vector_extract v8i16:$S, 4)), + (i32 VectorExtractions.LE_HALF_4)>; + def : Pat<(i32 (vector_extract v8i16:$S, 5)), + (i32 VectorExtractions.LE_HALF_5)>; + def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 VectorExtractions.LE_HALF_6)>; + def : Pat<(i32 (vector_extract v8i16:$S, 7)), + (i32 VectorExtractions.LE_HALF_7)>; + def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), + (i32 VectorExtractions.LE_VARIABLE_HALF)>; + + // v4i32 scalar <-> vector conversions (LE) + def : Pat<(i32 (vector_extract v4i32:$S, 0)), + (i32 VectorExtractions.LE_WORD_0)>; + def : Pat<(i32 (vector_extract v4i32:$S, 1)), + (i32 VectorExtractions.LE_WORD_1)>; + def : Pat<(i32 (vector_extract v4i32:$S, 2)), + (i32 VectorExtractions.LE_WORD_2)>; + def : Pat<(i32 (vector_extract v4i32:$S, 3)), + (i32 VectorExtractions.LE_WORD_3)>; + def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 VectorExtractions.LE_VARIABLE_WORD)>; + + // v2i64 scalar <-> vector conversions (LE) + def : Pat<(i64 (vector_extract v2i64:$S, 0)), + (i64 VectorExtractions.LE_DWORD_0)>; + def : Pat<(i64 (vector_extract v2i64:$S, 1)), + (i64 VectorExtractions.LE_DWORD_1)>; + def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), + (i64 VectorExtractions.LE_VARIABLE_DWORD)>; +} // IsLittleEndian, HasDirectMove + +let Predicates = [HasDirectMove, HasVSX] in { +// bitconvert f32 -> i32 +// (convert to 32-bit fp single, shift right 1 word, move to GPR) +def : Pat<(i32 (bitconvert f32:$S)), + (i32 (MFVSRWZ (EXTRACT_SUBREG + (XXSLDWI (XSCVDPSPN $S),(XSCVDPSPN $S), 3), + sub_64)))>; +// bitconvert i32 -> f32 +// (move to FPR, shift left 1 word, convert to 64-bit fp single) +def : Pat<(f32 (bitconvert i32:$A)), + (f32 (XSCVSPDPN + (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>; + +// bitconvert f64 -> i64 +// (move to GPR, nothing else needed) +def : Pat<(i64 (bitconvert f64:$S)), + (i64 (MFVSRD $S))>; + +// bitconvert i64 -> f64 +// (move to FPR, nothing else needed) +def : Pat<(f64 (bitconvert i64:$S)), + (f64 (MTVSRD $S))>; +} diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp index b4e1c099f190..e3a35d5df358 100644 --- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp +++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -71,10 +72,10 @@ namespace { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); + AU.addRequired(); // FIXME: For some reason, preserving SE here breaks LSR (even if // this pass changes nothing). - // AU.addPreserved(); + // AU.addPreserved(); AU.addRequired(); } @@ -96,7 +97,7 @@ INITIALIZE_PASS_BEGIN(PPCLoopDataPrefetch, "ppc-loop-data-prefetch", INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(PPCLoopDataPrefetch, "ppc-loop-data-prefetch", "PPC Loop Data Prefetch", false, false) @@ -104,7 +105,7 @@ FunctionPass *llvm::createPPCLoopDataPrefetchPass() { return new PPCLoopDataPref bool PPCLoopDataPrefetch::runOnFunction(Function &F) { LI = &getAnalysis().getLoopInfo(); - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); DL = &F.getParent()->getDataLayout(); AC = &getAnalysis().getAssumptionCache(F); TTI = &getAnalysis().getTTI(F); diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp index b6e7799402e1..5e188268fee9 100644 --- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp +++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp @@ -73,7 +73,7 @@ namespace { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); + AU.addRequired(); } bool runOnFunction(Function &F) override; @@ -84,8 +84,10 @@ namespace { private: PPCTargetMachine *TM; + DominatorTree *DT; LoopInfo *LI; ScalarEvolution *SE; + bool PreserveLCSSA; }; } @@ -93,7 +95,7 @@ char PPCLoopPreIncPrep::ID = 0; static const char *name = "Prepare loop for pre-inc. addressing modes"; INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false) FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) { @@ -101,17 +103,20 @@ FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) { } namespace { - struct SCEVLess : std::binary_function - { - SCEVLess(ScalarEvolution *SE) : SE(SE) {} + struct BucketElement { + BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {} + BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {} - bool operator() (const SCEV *X, const SCEV *Y) const { - const SCEV *Diff = SE->getMinusSCEV(X, Y); - return cast(Diff)->getValue()->getSExtValue() < 0; - } + const SCEVConstant *Offset; + Instruction *Instr; + }; - protected: - ScalarEvolution *SE; + struct Bucket { + Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B), + Elements(1, BucketElement(I)) {} + + const SCEV *BaseSCEV; + SmallVector Elements; }; } @@ -140,7 +145,10 @@ static Value *GetPointerOperand(Value *MemI) { bool PPCLoopPreIncPrep::runOnFunction(Function &F) { LI = &getAnalysis().getLoopInfo(); - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); + auto *DTWP = getAnalysisIfAvailable(); + DT = DTWP ? &DTWP->getDomTree() : nullptr; + PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); bool MadeChange = false; @@ -169,7 +177,6 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { std::distance(pred_begin(Header), pred_end(Header)); // Collect buckets of comparable addresses used by loads and stores. - typedef std::multimap Bucket; SmallVector Buckets; for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); I != IE; ++I) { @@ -212,25 +219,24 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { } bool FoundBucket = false; - for (unsigned i = 0, e = Buckets.size(); i != e; ++i) - for (Bucket::iterator K = Buckets[i].begin(), KE = Buckets[i].end(); - K != KE; ++K) { - const SCEV *Diff = SE->getMinusSCEV(K->first, LSCEV); - if (isa(Diff)) { - Buckets[i].insert(std::make_pair(LSCEV, MemI)); - FoundBucket = true; - break; - } + for (auto &B : Buckets) { + const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV); + if (const auto *CDiff = dyn_cast(Diff)) { + B.Elements.push_back(BucketElement(CDiff, MemI)); + FoundBucket = true; + break; } + } if (!FoundBucket) { - Buckets.push_back(Bucket(SCEVLess(SE))); - Buckets[Buckets.size()-1].insert(std::make_pair(LSCEV, MemI)); + if (Buckets.size() == MaxVars) + return MadeChange; + Buckets.push_back(Bucket(LSCEV, MemI)); } } } - if (Buckets.empty() || Buckets.size() > MaxVars) + if (Buckets.empty()) return MadeChange; BasicBlock *LoopPredecessor = L->getLoopPredecessor(); @@ -239,7 +245,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { // iteration space), insert a new preheader for the loop. if (!LoopPredecessor || !LoopPredecessor->getTerminator()->getType()->isVoidTy()) { - LoopPredecessor = InsertPreheaderForLoop(L, this); + LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); if (LoopPredecessor) MadeChange = true; } @@ -253,8 +259,45 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { // The base address of each bucket is transformed into a phi and the others // are rewritten as offsets of that variable. + // We have a choice now of which instruction's memory operand we use as the + // base for the generated PHI. Always picking the first instruction in each + // bucket does not work well, specifically because that instruction might + // be a prefetch (and there are no pre-increment dcbt variants). Otherwise, + // the choice is somewhat arbitrary, because the backend will happily + // generate direct offsets from both the pre-incremented and + // post-incremented pointer values. Thus, we'll pick the first non-prefetch + // instruction in each bucket, and adjust the recurrence and other offsets + // accordingly. + for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) { + if (auto *II = dyn_cast(Buckets[i].Elements[j].Instr)) + if (II->getIntrinsicID() == Intrinsic::prefetch) + continue; + + // If we'd otherwise pick the first element anyway, there's nothing to do. + if (j == 0) + break; + + // If our chosen element has no offset from the base pointer, there's + // nothing to do. + if (!Buckets[i].Elements[j].Offset || + Buckets[i].Elements[j].Offset->isZero()) + break; + + const SCEV *Offset = Buckets[i].Elements[j].Offset; + Buckets[i].BaseSCEV = SE->getAddExpr(Buckets[i].BaseSCEV, Offset); + for (auto &E : Buckets[i].Elements) { + if (E.Offset) + E.Offset = cast(SE->getMinusSCEV(E.Offset, Offset)); + else + E.Offset = cast(SE->getNegativeSCEV(Offset)); + } + + std::swap(Buckets[i].Elements[j], Buckets[i].Elements[0]); + break; + } + const SCEVAddRecExpr *BasePtrSCEV = - cast(Buckets[i].begin()->first); + cast(Buckets[i].BaseSCEV); if (!BasePtrSCEV->isAffine()) continue; @@ -262,7 +305,9 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?"); - Instruction *MemI = Buckets[i].begin()->second; + // The instruction corresponding to the Bucket's BaseSCEV must be the first + // in the vector of elements. + Instruction *MemI = Buckets[i].Elements.begin()->Instr; Value *BasePtr = GetPointerOperand(MemI); assert(BasePtr && "No pointer operand"); @@ -302,7 +347,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { NewPHI->addIncoming(BasePtrStart, LoopPredecessor); } - Instruction *InsPoint = Header->getFirstInsertionPt(); + Instruction *InsPoint = &*Header->getFirstInsertionPt(); GetElementPtrInst *PtrInc = GetElementPtrInst::Create( I8Ty, NewPHI, BasePtrIncSCEV->getValue(), MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint); @@ -327,18 +372,20 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { BasePtr->replaceAllUsesWith(NewBasePtr); RecursivelyDeleteTriviallyDeadInstructions(BasePtr); - Value *LastNewPtr = NewBasePtr; - for (Bucket::iterator I = std::next(Buckets[i].begin()), - IE = Buckets[i].end(); I != IE; ++I) { - Value *Ptr = GetPointerOperand(I->second); + // Keep track of the replacement pointer values we've inserted so that we + // don't generate more pointer values than necessary. + SmallPtrSet NewPtrs; + NewPtrs.insert( NewBasePtr); + + for (auto I = std::next(Buckets[i].Elements.begin()), + IE = Buckets[i].Elements.end(); I != IE; ++I) { + Value *Ptr = GetPointerOperand(I->Instr); assert(Ptr && "No pointer operand"); - if (Ptr == LastNewPtr) + if (NewPtrs.count(Ptr)) continue; Instruction *RealNewPtr; - const SCEVConstant *Diff = - cast(SE->getMinusSCEV(I->first, BasePtrSCEV)); - if (Diff->isZero()) { + if (!I->Offset || I->Offset->getValue()->isZero()) { RealNewPtr = NewBasePtr; } else { Instruction *PtrIP = dyn_cast(Ptr); @@ -346,13 +393,13 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { cast(NewBasePtr)->getParent() == PtrIP->getParent()) PtrIP = 0; else if (isa(PtrIP)) - PtrIP = PtrIP->getParent()->getFirstInsertionPt(); + PtrIP = &*PtrIP->getParent()->getFirstInsertionPt(); else if (!PtrIP) - PtrIP = I->second; + PtrIP = I->Instr; GetElementPtrInst *NewPtr = GetElementPtrInst::Create( - I8Ty, PtrInc, Diff->getValue(), - I->second->hasName() ? I->second->getName() + ".off" : "", PtrIP); + I8Ty, PtrInc, I->Offset->getValue(), + I->Instr->hasName() ? I->Instr->getName() + ".off" : "", PtrIP); if (!PtrIP) NewPtr->insertAfter(cast(PtrInc)); NewPtr->setIsInBounds(IsPtrInBounds(Ptr)); @@ -373,7 +420,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { Ptr->replaceAllUsesWith(ReplNewPtr); RecursivelyDeleteTriviallyDeadInstructions(Ptr); - LastNewPtr = RealNewPtr; + NewPtrs.insert(RealNewPtr); } MadeChange = true; diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 76837ecb32de..44a692d4bb42 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -38,7 +38,7 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) { static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ const TargetMachine &TM = AP.TM; Mangler *Mang = AP.Mang; - const DataLayout *DL = TM.getDataLayout(); + const DataLayout &DL = AP.getDataLayout(); MCContext &Ctx = AP.OutContext; bool isDarwin = TM.getTargetTriple().isOSDarwin(); @@ -51,13 +51,13 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ Suffix = "$non_lazy_ptr"; if (!Suffix.empty()) - Name += DL->getPrivateGlobalPrefix(); + Name += DL.getPrivateGlobalPrefix(); unsigned PrefixLen = Name.size(); if (!MO.isGlobal()) { assert(MO.isSymbol() && "Isn't a symbol reference"); - Mangler::getNameWithPrefix(Name, MO.getSymbolName(), *DL); + Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL); } else { const GlobalValue *GV = MO.getGlobal(); TM.getNameWithPrefix(Name, GV, *Mang); diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp new file mode 100644 index 000000000000..fe339d70d7de --- /dev/null +++ b/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -0,0 +1,230 @@ +//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This pass performs peephole optimizations to clean up ugly code +// sequences at the MachineInstruction layer. It runs at the end of +// the SSA phases, following VSX swap removal. A pass of dead code +// elimination follows this one for quick clean-up of any dead +// instructions introduced here. Although we could do this as callbacks +// from the generic peephole pass, this would have a couple of bad +// effects: it might remove optimization opportunities for VSX swap +// removal, and it would miss cleanups made possible following VSX +// swap removal. +// +//===---------------------------------------------------------------------===// + +#include "PPCInstrInfo.h" +#include "PPC.h" +#include "PPCInstrBuilder.h" +#include "PPCTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "ppc-mi-peepholes" + +namespace llvm { + void initializePPCMIPeepholePass(PassRegistry&); +} + +namespace { + +struct PPCMIPeephole : public MachineFunctionPass { + + static char ID; + const PPCInstrInfo *TII; + MachineFunction *MF; + MachineRegisterInfo *MRI; + + PPCMIPeephole() : MachineFunctionPass(ID) { + initializePPCMIPeepholePass(*PassRegistry::getPassRegistry()); + } + +private: + // Initialize class variables. + void initialize(MachineFunction &MFParm); + + // Perform peepholes. + bool simplifyCode(void); + + // Find the "true" register represented by SrcReg (following chains + // of copies and subreg_to_reg operations). + unsigned lookThruCopyLike(unsigned SrcReg); + +public: + // Main entry point for this pass. + bool runOnMachineFunction(MachineFunction &MF) override { + initialize(MF); + return simplifyCode(); + } +}; + +// Initialize class variables. +void PPCMIPeephole::initialize(MachineFunction &MFParm) { + MF = &MFParm; + MRI = &MF->getRegInfo(); + TII = MF->getSubtarget().getInstrInfo(); + DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n"); + DEBUG(MF->dump()); +} + +// Perform peephole optimizations. +bool PPCMIPeephole::simplifyCode(void) { + bool Simplified = false; + MachineInstr* ToErase = nullptr; + + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + + // If the previous instruction was marked for elimination, + // remove it now. + if (ToErase) { + ToErase->eraseFromParent(); + ToErase = nullptr; + } + + // Ignore debug instructions. + if (MI.isDebugValue()) + continue; + + // Per-opcode peepholes. + switch (MI.getOpcode()) { + + default: + break; + + case PPC::XXPERMDI: { + // Perform simplifications of 2x64 vector swaps and splats. + // A swap is identified by an immediate value of 2, and a splat + // is identified by an immediate value of 0 or 3. + int Immed = MI.getOperand(3).getImm(); + + if (Immed != 1) { + + // For each of these simplifications, we need the two source + // regs to match. Unfortunately, MachineCSE ignores COPY and + // SUBREG_TO_REG, so for example we can see + // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed. + // We have to look through chains of COPY and SUBREG_TO_REG + // to find the real source values for comparison. + unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg()); + unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg()); + + if (TrueReg1 == TrueReg2 + && TargetRegisterInfo::isVirtualRegister(TrueReg1)) { + MachineInstr *DefMI = MRI->getVRegDef(TrueReg1); + + // If this is a splat or a swap fed by another splat, we + // can replace it with a copy. + if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) { + unsigned FeedImmed = DefMI->getOperand(3).getImm(); + unsigned FeedReg1 + = lookThruCopyLike(DefMI->getOperand(1).getReg()); + unsigned FeedReg2 + = lookThruCopyLike(DefMI->getOperand(2).getReg()); + + if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) { + DEBUG(dbgs() + << "Optimizing splat/swap or splat/splat " + "to splat/copy: "); + DEBUG(MI.dump()); + BuildMI(MBB, &MI, MI.getDebugLoc(), + TII->get(PPC::COPY), MI.getOperand(0).getReg()) + .addOperand(MI.getOperand(1)); + ToErase = &MI; + Simplified = true; + } + + // If this is a splat fed by a swap, we can simplify modify + // the splat to splat the other value from the swap's input + // parameter. + else if ((Immed == 0 || Immed == 3) + && FeedImmed == 2 && FeedReg1 == FeedReg2) { + DEBUG(dbgs() << "Optimizing swap/splat => splat: "); + DEBUG(MI.dump()); + MI.getOperand(1).setReg(DefMI->getOperand(1).getReg()); + MI.getOperand(2).setReg(DefMI->getOperand(2).getReg()); + MI.getOperand(3).setImm(3 - Immed); + Simplified = true; + } + + // If this is a swap fed by a swap, we can replace it + // with a copy from the first swap's input. + else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) { + DEBUG(dbgs() << "Optimizing swap/swap => copy: "); + DEBUG(MI.dump()); + BuildMI(MBB, &MI, MI.getDebugLoc(), + TII->get(PPC::COPY), MI.getOperand(0).getReg()) + .addOperand(DefMI->getOperand(1)); + ToErase = &MI; + Simplified = true; + } + } + } + } + break; + } + } + } + + // If the last instruction was marked for elimination, + // remove it now. + if (ToErase) { + ToErase->eraseFromParent(); + ToErase = nullptr; + } + } + + return Simplified; +} + +// This is used to find the "true" source register for an +// XXPERMDI instruction, since MachineCSE does not handle the +// "copy-like" operations (Copy and SubregToReg). Returns +// the original SrcReg unless it is the target of a copy-like +// operation, in which case we chain backwards through all +// such operations to the ultimate source register. If a +// physical register is encountered, we stop the search. +unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) { + + while (true) { + + MachineInstr *MI = MRI->getVRegDef(SrcReg); + if (!MI->isCopyLike()) + return SrcReg; + + unsigned CopySrcReg; + if (MI->isCopy()) + CopySrcReg = MI->getOperand(1).getReg(); + else { + assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike"); + CopySrcReg = MI->getOperand(2).getReg(); + } + + if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) + return CopySrcReg; + + SrcReg = CopySrcReg; + } +} + +} // end default namespace + +INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE, + "PowerPC MI Peephole Optimization", false, false) +INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE, + "PowerPC MI Peephole Optimization", false, false) + +char PPCMIPeephole::ID = 0; +FunctionPass* +llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); } + diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp index ec4e0a5fa81b..95f163153c74 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp @@ -18,8 +18,8 @@ using namespace llvm; void PPCFunctionInfo::anchor() { } MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const { - const DataLayout *DL = MF.getTarget().getDataLayout(); - return MF.getContext().getOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) + + const DataLayout &DL = MF.getDataLayout(); + return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) + Twine(MF.getFunctionNumber()) + "$poff"); } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 2b09b2f625de..934bdf622418 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -200,7 +200,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R2); // System-reserved register Reserved.set(PPC::R13); // Small Data Area pointer register } - + // On PPC64, r13 is the thread pointer. Never allocate this register. if (TM.isPPC64()) { Reserved.set(PPC::R13); @@ -262,7 +262,7 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, default: return 0; case PPC::G8RC_NOX0RegClassID: - case PPC::GPRC_NOR0RegClassID: + case PPC::GPRC_NOR0RegClassID: case PPC::G8RCRegClassID: case PPC::GPRCRegClassID: { unsigned FP = TFI->hasFP(MF) ? 1 : 0; @@ -311,7 +311,7 @@ PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, //===----------------------------------------------------------------------===// /// lowerDynamicAlloc - Generate the code for allocating an object in the -/// current frame. The sequence of code with be in the general form +/// current frame. The sequence of code will be in the general form /// /// addi R0, SP, \#frameSize ; get the address of the previous frame /// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size @@ -337,7 +337,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); // Get the total frame size. unsigned FrameSize = MFI->getStackSize(); - + // Get stack alignments. const PPCFrameLowering *TFI = getFrameLowering(MF); unsigned TargetAlign = TFI->getStackAlignment(); @@ -347,14 +347,14 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { // Determine the previous frame's address. If FrameSize can't be // represented as 16 bits or we need special alignment, then we load the - // previous frame's address from 0(SP). Why not do an addis of the hi? - // Because R0 is our only safe tmp register and addi/addis treat R0 as zero. - // Constructing the constant and adding would take 3 instructions. + // previous frame's address from 0(SP). Why not do an addis of the hi? + // Because R0 is our only safe tmp register and addi/addis treat R0 as zero. + // Constructing the constant and adding would take 3 instructions. // Fortunately, a frame greater than 32K is rare. const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); - + if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) { BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg) .addReg(PPC::R31) @@ -425,11 +425,32 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { .addReg(PPC::R1) .addImm(maxCallFrameSize); } - + // Discard the DYNALLOC instruction. MBB.erase(II); } +void PPCRegisterInfo::lowerDynamicAreaOffset( + MachineBasicBlock::iterator II) const { + // Get the instruction. + MachineInstr &MI = *II; + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + // Get the basic block's function. + MachineFunction &MF = *MBB.getParent(); + // Get the frame info. + MachineFrameInfo *MFI = MF.getFrameInfo(); + const PPCSubtarget &Subtarget = MF.getSubtarget(); + // Get the instruction info. + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + + unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); + DebugLoc dl = MI.getDebugLoc(); + BuildMI(MBB, II, dl, TII.get(PPC::LI), MI.getOperand(0).getReg()) + .addImm(maxCallFrameSize); + MBB.erase(II); +} + /// lowerCRSpilling - Generate the code for spilling a CR register. Instead of /// reserving a whole register (R0), we scrounge for one here. This generates /// code like this: @@ -459,8 +480,8 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, // We need to store the CR in the low 4-bits of the saved value. First, issue // an MFOCRF to save all of the CRBits and, if needed, kill the SrcReg. BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg) - .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); - + .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); + // If the saved register wasn't CR0, shift the bits left so that they are in // CR0's slot. if (SrcReg != PPC::CR0) { @@ -549,8 +570,8 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg) - .addReg(getCRFromCRBit(SrcReg)); - + .addReg(getCRFromCRBit(SrcReg)); + // If the saved register wasn't CR0LT, shift the bits left so that the bit to // store is the first one. Mask all but that bit. unsigned Reg1 = Reg; @@ -602,17 +623,19 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II, unsigned ShiftBits = getEncodingValue(DestReg); // rlwimi r11, r10, 32-ShiftBits, ..., ... BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWIMI8 : PPC::RLWIMI), RegO) - .addReg(RegO, RegState::Kill).addReg(Reg, RegState::Kill) - .addImm(ShiftBits ? 32-ShiftBits : 0) - .addImm(ShiftBits).addImm(ShiftBits); - + .addReg(RegO, RegState::Kill) + .addReg(Reg, RegState::Kill) + .addImm(ShiftBits ? 32 - ShiftBits : 0) + .addImm(ShiftBits) + .addImm(ShiftBits); + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTOCRF8 : PPC::MTOCRF), getCRFromCRBit(DestReg)) - .addReg(RegO, RegState::Kill) - // Make sure we have a use dependency all the way through this - // sequence of instructions. We can't have the other bits in the CR - // modified in between the mfocrf and the mtocrf. - .addReg(getCRFromCRBit(DestReg), RegState::Implicit); + .addReg(RegO, RegState::Kill) + // Make sure we have a use dependency all the way through this + // sequence of instructions. We can't have the other bits in the CR + // modified in between the mfocrf and the mtocrf. + .addReg(getCRFromCRBit(DestReg), RegState::Implicit); // Discard the pseudo instruction. MBB.erase(II); @@ -634,11 +657,11 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, unsigned SrcReg = MI.getOperand(0).getReg(); BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg) - .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); - - addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW)) - .addReg(Reg, RegState::Kill), - FrameIndex); + .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); + + addFrameReference( + BuildMI(MBB, II, dl, TII.get(PPC::STW)).addReg(Reg, RegState::Kill), + FrameIndex); // Discard the pseudo instruction. MBB.erase(II); @@ -671,9 +694,8 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, MBB.erase(II); } -bool -PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, - unsigned Reg, int &FrameIdx) const { +bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, + unsigned Reg, int &FrameIdx) const { const PPCSubtarget &Subtarget = MF.getSubtarget(); // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4 // ABI, return true to prevent allocating an additional frame slot. @@ -752,7 +774,12 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FPSI = FI->getFramePointerSaveIndex(); // Get the instruction opcode. unsigned OpC = MI.getOpcode(); - + + if ((OpC == PPC::DYNAREAOFFSET || OpC == PPC::DYNAREAOFFSET8)) { + lowerDynamicAreaOffset(II); + return; + } + // Special case for dynamic alloca. if (FPSI && FrameIndex == FPSI && (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) { @@ -800,8 +827,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // If we're not using a Frame Pointer that has been set to the value of the // SP before having the stack size subtracted from it, then add the stack size // to Offset to get the correct offset. - // Naked functions have stack size 0, although getStackSize may not reflect that - // because we didn't call all the pieces that compute it for naked functions. + // Naked functions have stack size 0, although getStackSize may not reflect + // that because we didn't call all the pieces that compute it for naked + // functions. if (!MF.getFunction()->hasFnAttribute(Attribute::Naked)) { if (!(hasBasePointer(MF) && FrameIndex < 0)) Offset += MFI->getStackSize(); @@ -840,7 +868,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, .addImm(Offset); // Convert into indexed form of the instruction: - // + // // sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0 // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0 unsigned OperandBase; @@ -898,24 +926,6 @@ bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const { return needsStackRealignment(MF); } -bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const { - if (MF.getFunction()->hasFnAttribute("no-realign-stack")) - return false; - - return true; -} - -bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const { - const PPCFrameLowering *TFI = getFrameLowering(MF); - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const Function *F = MF.getFunction(); - unsigned StackAlign = TFI->getStackAlignment(); - bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || - F->hasFnAttribute(Attribute::StackAlignment)); - - return requiresRealignment && canRealignStack(MF); -} - /// Returns true if the instruction's frame index /// reference would be better served by a base register other than FP /// or SP. Used by LocalStackFrameAllocation to determine which frame index diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index d304e1d8b5ec..b15fde83c9f3 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -54,13 +54,13 @@ inline static unsigned getCRFromCRBit(unsigned SrcReg) { return Reg; } - class PPCRegisterInfo : public PPCGenRegisterInfo { DenseMap ImmToIdxMap; const PPCTargetMachine &TM; + public: PPCRegisterInfo(const PPCTargetMachine &TM); - + /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. const TargetRegisterClass * @@ -77,7 +77,7 @@ public: const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const override; - const uint32_t *getNoPreservedMask() const; + const uint32_t *getNoPreservedMask() const override; void adjustStackMapLiveOutMask(uint32_t *Mask) const override; @@ -101,6 +101,7 @@ public: } void lowerDynamicAlloc(MachineBasicBlock::iterator II) const; + void lowerDynamicAreaOffset(MachineBasicBlock::iterator II) const; void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex) const; void lowerCRRestore(MachineBasicBlock::iterator II, @@ -115,9 +116,9 @@ public: unsigned FrameIndex) const; bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, - int &FrameIdx) const override; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, unsigned FIOperandNum, + int &FrameIdx) const override; + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; // Support for virtual base registers. @@ -136,8 +137,6 @@ public: // Base pointer (stack realignment) support. unsigned getBaseRegister(const MachineFunction &MF) const; bool hasBasePointer(const MachineFunction &MF) const; - bool canRealignStack(const MachineFunction &MF) const; - bool needsStackRealignment(const MachineFunction &MF) const override; }; } // end namespace llvm diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 58daccae90f2..c0fcb6cbb9dc 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -62,6 +62,7 @@ void PPCSubtarget::initializeEnvironment() { Has64BitSupport = false; Use64BitRegs = false; UseCRBits = false; + UseSoftFloat = false; HasAltivec = false; HasSPE = false; HasQPX = false; @@ -100,6 +101,8 @@ void PPCSubtarget::initializeEnvironment() { HasDirectMove = false; IsQPXStackUnaligned = false; HasHTM = false; + HasFusion = false; + HasFloat128 = false; } void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -210,5 +213,33 @@ bool PPCSubtarget::enableSubRegLiveness() const { return UseSubRegLiveness; } +unsigned char PPCSubtarget::classifyGlobalReference( + const GlobalValue *GV) const { + // Note that currently we don't generate non-pic references. + // If a caller wants that, this will have to be updated. + + // Large code model always uses the TOC even for local symbols. + if (TM.getCodeModel() == CodeModel::Large) + return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG; + + unsigned char flags = PPCII::MO_PIC_FLAG; + + // Only if the relocation mode is PIC do we have to worry about + // interposition. In all other cases we can use a slightly looser standard to + // decide how to access the symbol. + if (TM.getRelocationModel() == Reloc::PIC_) { + // If it's local, or it's non-default, it can't be interposed. + if (!GV->hasLocalLinkage() && + GV->hasDefaultVisibility()) { + flags |= PPCII::MO_NLP_FLAG; + } + return flags; + } + + if (GV->isStrongDefinitionForLinker()) + return flags; + return flags | PPCII::MO_NLP_FLAG; +} + bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); } bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); } diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 0616c1f65604..4f5c95c1483f 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -83,6 +83,7 @@ protected: bool Has64BitSupport; bool Use64BitRegs; bool UseCRBits; + bool UseSoftFloat; bool IsPPC64; bool HasAltivec; bool HasSPE; @@ -119,6 +120,8 @@ protected: bool HasPartwordAtomics; bool HasDirectMove; bool HasHTM; + bool HasFusion; + bool HasFloat128; /// When targeting QPX running a stock PPC64 Linux kernel where the stack /// alignment has not been changed, we need to keep the 16-byte alignment @@ -188,6 +191,8 @@ public: /// has64BitSupport - Return true if the selected CPU supports 64-bit /// instructions, regardless of whether we are in 32-bit or 64-bit mode. bool has64BitSupport() const { return Has64BitSupport; } + // useSoftFloat - Return true if soft-float option is turned on. + bool useSoftFloat() const { return UseSoftFloat; } /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit /// registers in 32-bit mode when possible. This can only true if @@ -254,6 +259,8 @@ public: return 16; } bool hasHTM() const { return HasHTM; } + bool hasFusion() const { return HasFusion; } + bool hasFloat128() const { return HasFloat128; } const Triple &getTargetTriple() const { return TargetTriple; } @@ -285,6 +292,10 @@ public: bool useAA() const override; bool enableSubRegLiveness() const override; + + /// classifyGlobalReference - Classify a global variable reference for the + /// current subtarget accourding to how we should reference it. + unsigned char classifyGlobalReference(const GlobalValue *GV) const; }; } // End llvm namespace diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 1daf244fed44..d24b590317f5 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -42,6 +42,10 @@ static cl:: opt DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden, cl::desc("Disable VSX Swap Removal for PPC")); +static cl:: +opt DisableMIPeephole("disable-ppc-peephole", cl::Hidden, + cl::desc("Disable machine peepholes for PPC")); + static cl::opt EnableGEPOpt("ppc-gep-opt", cl::Hidden, cl::desc("Enable optimizations on complex GEPs"), @@ -57,11 +61,19 @@ EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps", cl::desc("Add extra TOC register dependencies"), cl::init(true), cl::Hidden); +static cl::opt +EnableMachineCombinerPass("ppc-machine-combiner", + cl::desc("Enable the machine combiner pass"), + cl::init(true), cl::Hidden); + extern "C" void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine A(ThePPC32Target); RegisterTargetMachine B(ThePPC64Target); RegisterTargetMachine C(ThePPC64LETarget); + + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializePPCBoolRetToIntPass(PR); } /// Return the datalayout string of a subtarget. @@ -118,7 +130,7 @@ static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, } if (OL != CodeGenOpt::None) { - if (!FullFS.empty()) + if (!FullFS.empty()) FullFS = "+invariant-function-descriptors," + FullFS; else FullFS = "+invariant-function-descriptors"; @@ -144,7 +156,7 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, return PPCTargetMachine::PPC_ABI_ELFv2; assert(Options.MCOptions.getABIName().empty() && - "Unknown target-abi option!"); + "Unknown target-abi option!"); if (!TT.isMacOSX()) { switch (TT.getArch()) { @@ -160,9 +172,9 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, return PPCTargetMachine::PPC_ABI_UNKNOWN; } -// The FeatureString here is a little subtle. We are modifying the feature string -// with what are (currently) non-function specific overrides as it goes into the -// LLVMTargetMachine constructor and then using the stored value in the +// The FeatureString here is a little subtle. We are modifying the feature +// string with what are (currently) non-function specific overrides as it goes +// into the LLVMTargetMachine constructor and then using the stored value in the // Subtarget constructor below it. PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -227,6 +239,19 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const { ? FSAttr.getValueAsString().str() : TargetFS; + // FIXME: This is related to the code below to reset the target options, + // we need to know whether or not the soft float flag is set on the + // function before we can generate a subtarget. We also need to use + // it as a key for the subtarget since that can be the only difference + // between two functions. + bool SoftFloat = + F.hasFnAttribute("use-soft-float") && + F.getFnAttribute("use-soft-float").getValueAsString() == "true"; + // If the soft float attribute is set on the function turn on the soft float + // subtarget feature. + if (SoftFloat) + FS += FS.empty() ? "+soft-float" : ",+soft-float"; + auto &I = SubtargetMap[CPU + FS]; if (!I) { // This needs to be done before we create a new subtarget since any @@ -277,6 +302,8 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { } void PPCPassConfig::addIRPasses() { + if (TM->getOptLevel() != CodeGenOpt::None) + addPass(createPPCBoolRetToIntPass()); addPass(createAtomicExpandPass(&getPPCTargetMachine())); // For the BG/Q (or if explicitly requested), add explicit data prefetch @@ -316,6 +343,10 @@ bool PPCPassConfig::addPreISel() { bool PPCPassConfig::addILPOpts() { addPass(&EarlyIfConverterID); + + if (EnableMachineCombinerPass) + addPass(&MachineCombinerID); + return true; } @@ -339,6 +370,12 @@ void PPCPassConfig::addMachineSSAOptimization() { if (TM->getTargetTriple().getArch() == Triple::ppc64le && !DisableVSXSwapRemoval) addPass(createPPCVSXSwapRemovalPass()); + // Target-specific peephole cleanups performed after instruction + // selection. + if (!DisableMIPeephole) { + addPass(createPPCMIPeepholePass()); + addPass(&DeadMachineInstructionElimID); + } } void PPCPassConfig::addPreRegAlloc() { @@ -364,6 +401,7 @@ void PPCPassConfig::addPreEmitPass() { } TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis( - [this](Function &F) { return TargetTransformInfo(PPCTTIImpl(this, F)); }); + return TargetIRAnalysis([this](const Function &F) { + return TargetTransformInfo(PPCTTIImpl(this, F)); + }); } diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp index 9ee5db938b67..798bb9d6b892 100644 --- a/lib/Target/PowerPC/PPCTargetObjectFile.cpp +++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp @@ -42,9 +42,7 @@ MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal( if (Kind.isReadOnly()) { const GlobalVariable *GVar = dyn_cast(GV); - if (GVar && GVar->isConstant() && - (GVar->getInitializer()->getRelocationInfo() == - Constant::GlobalRelocations)) + if (GVar && GVar->isConstant() && GVar->getInitializer()->needsRelocation()) Kind = SectionKind::getReadOnlyWithRel(); } diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index e21c2b77f4d7..cd86dabd5abe 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -35,7 +35,7 @@ PPCTTIImpl::getPopcntSupport(unsigned TyWidth) { return TTI::PSK_Software; } -unsigned PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { if (DisablePPCConstHoist) return BaseT::getIntImmCost(Imm, Ty); @@ -64,8 +64,8 @@ unsigned PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { return 4 * TTI::TCC_Basic; } -unsigned PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { +int PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty) { if (DisablePPCConstHoist) return BaseT::getIntImmCost(IID, Idx, Imm, Ty); @@ -98,8 +98,8 @@ unsigned PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, return PPCTTIImpl::getIntImmCost(Imm, Ty); } -unsigned PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) { +int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty) { if (DisablePPCConstHoist) return BaseT::getIntImmCost(Opcode, Idx, Imm, Ty); @@ -197,9 +197,20 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, } bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) { + // On the A2, always unroll aggressively. For QPX unaligned loads, we depend + // on combining the loads generated for consecutive accesses, and failure to + // do so is particularly expensive. This makes it much more likely (compared + // to only using concatenation unrolling). + if (ST->getDarwinDirective() == PPC::DIR_A2) + return true; + return LoopHasReductions; } +bool PPCTTIImpl::enableInterleavedAccessVectorization() { + return true; +} + unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) { if (Vector && !ST->hasAltivec() && !ST->hasQPX()) return 0; @@ -246,7 +257,7 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) { return 2; } -unsigned PPCTTIImpl::getArithmeticInstrCost( +int PPCTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo) { @@ -257,24 +268,30 @@ unsigned PPCTTIImpl::getArithmeticInstrCost( Opd1PropInfo, Opd2PropInfo); } -unsigned PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) { - return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); +int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) { + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + + // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations + // (at least in the sense that there need only be one non-loop-invariant + // instruction). We need one such shuffle instruction for each actual + // register (this is not true for arbitrary shuffles, but is true for the + // structured types of shuffles covered by TTI::ShuffleKind). + return LT.first; } -unsigned PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { +int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); return BaseT::getCastInstrCost(Opcode, Dst, Src); } -unsigned PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) { +int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); } -unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) { +int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { assert(Val->isVectorTy() && "This must be a vector type"); int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -313,41 +330,83 @@ unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, return BaseT::getVectorInstrCost(Opcode, Val, Index); } -unsigned PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) { +int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Src); + std::pair LT = TLI->getTypeLegalizationCost(DL, Src); assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && "Invalid Opcode"); - unsigned Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); - // VSX loads/stores support unaligned access. - if (ST->hasVSX()) { - if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64) - return Cost; - } + // Aligned loads and stores are easy. + unsigned SrcBytes = LT.second.getStoreSize(); + if (!SrcBytes || !Alignment || Alignment >= SrcBytes) + return Cost; - bool UnalignedAltivec = - Src->isVectorTy() && - Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() && - LT.second.getSizeInBits() == 128 && - Opcode == Instruction::Load; + bool IsAltivecType = ST->hasAltivec() && + (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 || + LT.second == MVT::v4i32 || LT.second == MVT::v4f32); + bool IsVSXType = ST->hasVSX() && + (LT.second == MVT::v2f64 || LT.second == MVT::v2i64); + bool IsQPXType = ST->hasQPX() && + (LT.second == MVT::v4f64 || LT.second == MVT::v4f32); + + // If we can use the permutation-based load sequence, then this is also + // relatively cheap (not counting loop-invariant instructions): one load plus + // one permute (the last load in a series has extra cost, but we're + // neglecting that here). Note that on the P7, we should do unaligned loads + // for Altivec types using the VSX instructions, but that's more expensive + // than using the permutation-based load sequence. On the P8, that's no + // longer true. + if (Opcode == Instruction::Load && + ((!ST->hasP8Vector() && IsAltivecType) || IsQPXType) && + Alignment >= LT.second.getScalarType().getStoreSize()) + return Cost + LT.first; // Add the cost of the permutations. + + // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the + // P7, unaligned vector loads are more expensive than the permutation-based + // load sequence, so that might be used instead, but regardless, the net cost + // is about the same (not counting loop-invariant instructions). + if (IsVSXType || (ST->hasVSX() && IsAltivecType)) + return Cost; // PPC in general does not support unaligned loads and stores. They'll need // to be decomposed based on the alignment factor. - unsigned SrcBytes = LT.second.getStoreSize(); - if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) { - Cost += LT.first*(SrcBytes/Alignment-1); - // For a vector type, there is also scalarization overhead (only for - // stores, loads are expanded using the vector-load + permutation sequence, - // which is much less expensive). - if (Src->isVectorTy() && Opcode == Instruction::Store) - for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i) - Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i); - } + // Add the cost of each scalar load or store. + Cost += LT.first*(SrcBytes/Alignment-1); + + // For a vector type, there is also scalarization overhead (only for + // stores, loads are expanded using the vector-load + permutation sequence, + // which is much less expensive). + if (Src->isVectorTy() && Opcode == Instruction::Store) + for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i) + Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i); + + return Cost; +} + +int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef Indices, + unsigned Alignment, + unsigned AddressSpace) { + assert(isa(VecTy) && + "Expect a vector type for interleaved memory op"); + + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(DL, VecTy); + + // Firstly, the cost of load/store operation. + int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace); + + // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations + // (at least in the sense that there need only be one non-loop-invariant + // instruction). For each result vector, we need one shuffle per incoming + // vector (except that the first shuffle can take two incoming vectors + // because it does not need to take itself). + Cost += Factor*(LT.first-1); return Cost; } diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h index 368bef93f0dd..04c1b02235f0 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -37,7 +37,7 @@ class PPCTTIImpl : public BasicTTIImplBase { const PPCTargetLowering *getTLI() const { return TLI; } public: - explicit PPCTTIImpl(const PPCTargetMachine *TM, Function &F) + explicit PPCTTIImpl(const PPCTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} @@ -52,12 +52,11 @@ public: /// @{ using BaseT::getIntImmCost; - unsigned getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty); - unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty); - unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty); + int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); @@ -68,22 +67,27 @@ public: /// @{ bool enableAggressiveInterleaving(bool LoopHasReductions); + bool enableInterleavedAccessVectorization(); unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); unsigned getMaxInterleaveFactor(unsigned VF); - unsigned getArithmeticInstrCost( + int getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None); - unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp); - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace); + int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); + int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace); + int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef Indices, + unsigned Alignment, + unsigned AddressSpace); /// @} }; diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp index 5e3ae2a4471b..782583ce3423 100644 --- a/lib/Target/PowerPC/PPCVSXCopy.cpp +++ b/lib/Target/PowerPC/PPCVSXCopy.cpp @@ -77,6 +77,14 @@ namespace { return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI); } + bool IsVSFReg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::VSFRCRegClass, MRI); + } + + bool IsVSSReg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::VSSRCRegClass, MRI); + } + protected: bool processBlock(MachineBasicBlock &MBB) { bool Changed = false; @@ -100,7 +108,9 @@ protected: IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass : &PPC::VSLRCRegClass; assert((IsF8Reg(SrcMO.getReg(), MRI) || - IsVRReg(SrcMO.getReg(), MRI)) && + IsVRReg(SrcMO.getReg(), MRI) || + IsVSSReg(SrcMO.getReg(), MRI) || + IsVSFReg(SrcMO.getReg(), MRI)) && "Unknown source for a VSX copy"); unsigned NewVReg = MRI.createVirtualRegister(SrcRC); @@ -123,6 +133,8 @@ protected: IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass : &PPC::VSLRCRegClass; assert((IsF8Reg(DstMO.getReg(), MRI) || + IsVSFReg(DstMO.getReg(), MRI) || + IsVSSReg(DstMO.getReg(), MRI) || IsVRReg(DstMO.getReg(), MRI)) && "Unknown destination for a VSX copy"); diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index 46b8d13e47b9..6b19a2f7118b 100644 --- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -103,10 +103,10 @@ protected: VNInfo *AddendValNo = LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn(); - if (!AddendValNo) { - // This can be null if the register is undef. + + // This can be null if the register is undef. + if (!AddendValNo) continue; - } MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def); @@ -186,18 +186,17 @@ protected: if (!KilledProdOp) continue; - // If the addend copy is used only by this MI, then the addend source - // register is likely not live here. This could be fixed (based on the - // legality checks above, the live range for the addend source register - // could be extended), but it seems likely that such a trivial copy can - // be coalesced away later, and thus is not worth the effort. - if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) && + // If the addend copy is used only by this MI, then the addend source + // register is likely not live here. This could be fixed (based on the + // legality checks above, the live range for the addend source register + // could be extended), but it seems likely that such a trivial copy can + // be coalesced away later, and thus is not worth the effort. + if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) && !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) continue; // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3. - unsigned AddReg = AddendMI->getOperand(1).getReg(); unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg(); unsigned OtherProdReg = MI->getOperand(OtherProdOp).getReg(); @@ -221,6 +220,14 @@ protected: if (OldFMAReg == KilledProdReg) continue; + // If there isn't a class that fits, we can't perform the transform. + // This is needed for correctness with a mixture of VSX and Altivec + // instructions to make sure that a low VSX register is not assigned to + // the Altivec instruction. + if (!MRI.constrainRegClass(KilledProdReg, + MRI.getRegClass(OldFMAReg))) + continue; + assert(OldFMAReg == AddendMI->getOperand(0).getReg() && "Addend copy not tied to old FMA output!"); @@ -228,7 +235,7 @@ protected: MI->getOperand(0).setReg(KilledProdReg); MI->getOperand(1).setReg(KilledProdReg); - MI->getOperand(3).setReg(AddReg); + MI->getOperand(3).setReg(AddendSrcReg); MI->getOperand(2).setReg(OtherProdReg); MI->getOperand(0).setSubReg(KilledProdSubReg); @@ -263,8 +270,7 @@ protected: if (UseMI == AddendMI) continue; - UseMO.setReg(KilledProdReg); - UseMO.setSubReg(KilledProdSubReg); + UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI); } // Extend the live intervals of the killed product operand to hold the @@ -286,6 +292,20 @@ protected: } DEBUG(dbgs() << " extended: " << NewFMAInt << '\n'); + // Extend the live interval of the addend source (it might end at the + // copy to be removed, or somewhere in between there and here). This + // is necessary only if it is a physical register. + if (!TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) + for (MCRegUnitIterator Units(AddendSrcReg, TRI); Units.isValid(); + ++Units) { + unsigned Unit = *Units; + + LiveRange &AddendSrcRange = LIS->getRegUnit(Unit); + AddendSrcRange.extendInBlock(LIS->getMBBStartIdx(&MBB), + FMAIdx.getRegSlot()); + DEBUG(dbgs() << " extended: " << AddendSrcRange << '\n'); + } + FMAInt.removeValNo(FMAValNo); DEBUG(dbgs() << " trimmed: " << FMAInt << '\n'); @@ -347,7 +367,6 @@ INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE, char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID; char PPCVSXFMAMutate::ID = 0; -FunctionPass* -llvm::createPPCVSXFMAMutatePass() { return new PPCVSXFMAMutate(); } - - +FunctionPass *llvm::createPPCVSXFMAMutatePass() { + return new PPCVSXFMAMutate(); +} diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index d7132d5272d8..27c540fcf211 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -94,7 +94,7 @@ enum SHValues { SH_NOSWAP_ST, SH_SPLAT, SH_XXPERMDI, - SH_COPYSCALAR + SH_COPYWIDEN }; struct PPCVSXSwapRemoval : public MachineFunctionPass { @@ -149,6 +149,11 @@ private: // handling. Return true iff any changes are made. bool removeSwaps(); + // Insert a swap instruction from SrcReg to DstReg at the given + // InsertPoint. + void insertSwap(MachineInstr *MI, MachineBasicBlock::iterator InsertPoint, + unsigned DstReg, unsigned SrcReg); + // Update instructions requiring special handling. void handleSpecialSwappables(int EntryIdx); @@ -159,9 +164,7 @@ private: bool isRegInClass(unsigned Reg, const TargetRegisterClass *RC) { if (TargetRegisterInfo::isVirtualRegister(Reg)) return RC->hasSubClassEq(MRI->getRegClass(Reg)); - if (RC->contains(Reg)) - return true; - return false; + return RC->contains(Reg); } // Return true iff the given register is a full vector register. @@ -215,7 +218,7 @@ public: void PPCVSXSwapRemoval::initialize(MachineFunction &MFParm) { MF = &MFParm; MRI = &MF->getRegInfo(); - TII = static_cast(MF->getSubtarget().getInstrInfo()); + TII = MF->getSubtarget().getInstrInfo(); // An initial vector size of 256 appears to work well in practice. // Small/medium functions with vector content tend not to incur a @@ -343,6 +346,15 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { SwapVector[VecIdx].IsLoad = 1; SwapVector[VecIdx].IsSwap = 1; break; + case PPC::LXSDX: + case PPC::LXSSPX: + // A load of a floating-point value into the high-order half of + // a vector register is safe, provided that we introduce a swap + // following the load, which will be done by the SUBREG_TO_REG + // support. So just mark these as safe. + SwapVector[VecIdx].IsLoad = 1; + SwapVector[VecIdx].IsSwappable = 1; + break; case PPC::STVX: // Non-permuting stores are currently unsafe. We can use special // handling for this in the future. By not marking these as @@ -385,7 +397,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { else if (isVecReg(MI.getOperand(0).getReg()) && isScalarVecReg(MI.getOperand(2).getReg())) { SwapVector[VecIdx].IsSwappable = 1; - SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYSCALAR; + SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYWIDEN; } break; } @@ -420,7 +432,14 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { case PPC::STVEHX: case PPC::STVEWX: case PPC::STVXL: + // We can handle STXSDX and STXSSPX similarly to LXSDX and LXSSPX, + // by adding special handling for narrowing copies as well as + // widening ones. However, I've experimented with this, and in + // practice we currently do not appear to use STXSDX fed by + // a narrowing copy from a full vector register. Since I can't + // generate any useful test cases, I've left this alone for now. case PPC::STXSDX: + case PPC::STXSSPX: case PPC::VCIPHER: case PPC::VCIPHERLAST: case PPC::VMRGHB: @@ -543,7 +562,8 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg, } if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) { - SwapVector[VecIdx].MentionsPhysVR = 1; + if (!isScalarVecReg(CopySrcReg)) + SwapVector[VecIdx].MentionsPhysVR = 1; return CopySrcReg; } @@ -629,8 +649,8 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { SwapVector[Repr].WebRejected = 1; DEBUG(dbgs() << - format("Web %d rejected for physreg, partial reg, or not swap[pable]\n", - Repr)); + format("Web %d rejected for physreg, partial reg, or not " + "swap[pable]\n", Repr)); DEBUG(dbgs() << " in " << EntryIdx << ": "); DEBUG(SwapVector[EntryIdx].VSEMI->dump()); DEBUG(dbgs() << "\n"); @@ -743,6 +763,21 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() { } } +// Create an xxswapd instruction and insert it prior to the given point. +// MI is used to determine basic block and debug loc information. +// FIXME: When inserting a swap, we should check whether SrcReg is +// defined by another swap: SrcReg = XXPERMDI Reg, Reg, 2; If so, +// then instead we should generate a copy from Reg to DstReg. +void PPCVSXSwapRemoval::insertSwap(MachineInstr *MI, + MachineBasicBlock::iterator InsertPoint, + unsigned DstReg, unsigned SrcReg) { + BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(), + TII->get(PPC::XXPERMDI), DstReg) + .addReg(SrcReg) + .addReg(SrcReg) + .addImm(2); +} + // The identified swap entry requires special handling to allow its // containing computation to be optimized. Perform that handling // here. @@ -752,8 +787,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { switch (SwapVector[EntryIdx].SpecialHandling) { default: - assert(false && "Unexpected special handling type"); - break; + llvm_unreachable("Unexpected special handling type"); // For splats based on an index into a vector, add N/2 modulo N // to the index, where N is the number of vector elements. @@ -766,7 +800,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { switch (MI->getOpcode()) { default: - assert(false && "Unexpected splat opcode"); + llvm_unreachable("Unexpected splat opcode"); case PPC::VSPLTB: NElts = 16; break; case PPC::VSPLTH: NElts = 8; break; case PPC::VSPLTW: NElts = 4; break; @@ -811,7 +845,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { // For a copy from a scalar floating-point register to a vector // register, removing swaps will leave the copied value in the // wrong lane. Insert a swap following the copy to fix this. - case SHValues::SH_COPYSCALAR: { + case SHValues::SH_COPYWIDEN: { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; DEBUG(dbgs() << "Changing SUBREG_TO_REG: "); @@ -825,14 +859,13 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { DEBUG(dbgs() << " Into: "); DEBUG(MI->dump()); - MachineBasicBlock::iterator InsertPoint = MI->getNextNode(); + auto InsertPoint = ++MachineBasicBlock::iterator(MI); // Note that an XXPERMDI requires a VSRC, so if the SUBREG_TO_REG // is copying to a VRRC, we need to be careful to avoid a register // assignment problem. In this case we must copy from VRRC to VSRC // prior to the swap, and from VSRC to VRRC following the swap. // Coalescing will usually remove all this mess. - if (DstRC == &PPC::VRRCRegClass) { unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass); unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass); @@ -840,29 +873,19 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(), TII->get(PPC::COPY), VSRCTmp1) .addReg(NewVReg); - DEBUG(MI->getNextNode()->dump()); + DEBUG(std::prev(InsertPoint)->dump()); - BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(), - TII->get(PPC::XXPERMDI), VSRCTmp2) - .addReg(VSRCTmp1) - .addReg(VSRCTmp1) - .addImm(2); - DEBUG(MI->getNextNode()->getNextNode()->dump()); + insertSwap(MI, InsertPoint, VSRCTmp2, VSRCTmp1); + DEBUG(std::prev(InsertPoint)->dump()); BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(), TII->get(PPC::COPY), DstReg) .addReg(VSRCTmp2); - DEBUG(MI->getNextNode()->getNextNode()->getNextNode()->dump()); + DEBUG(std::prev(InsertPoint)->dump()); } else { - - BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(), - TII->get(PPC::XXPERMDI), DstReg) - .addReg(NewVReg) - .addReg(NewVReg) - .addImm(2); - - DEBUG(MI->getNextNode()->dump()); + insertSwap(MI, InsertPoint, DstReg, NewVReg); + DEBUG(std::prev(InsertPoint)->dump()); } break; } @@ -947,8 +970,8 @@ void PPCVSXSwapRemoval::dumpSwapVector() { case SH_XXPERMDI: DEBUG(dbgs() << "special:xxpermdi "); break; - case SH_COPYSCALAR: - DEBUG(dbgs() << "special:copyscalar "); + case SH_COPYWIDEN: + DEBUG(dbgs() << "special:copywiden "); break; } } diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 1c4e486da418..a55274744fd1 100644 --- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -14,6 +14,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" @@ -34,7 +35,6 @@ namespace { class SparcOperand; class SparcAsmParser : public MCTargetAsmParser { - MCSubtargetInfo &STI; MCAsmParser &Parser; /// @name Auto-generated Match Functions @@ -69,6 +69,10 @@ class SparcAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseBranchModifiers(OperandVector &Operands); + // Helper function for dealing with %lo / %hi in PIC mode. + const SparcMCExpr *adjustPICRelocation(SparcMCExpr::VariantKind VK, + const MCExpr *subExpr); + // returns true if Tok is matched to a register and returns register in RegNo. bool matchRegisterName(const AsmToken &Tok, unsigned &RegNo, unsigned &RegKind); @@ -77,24 +81,24 @@ class SparcAsmParser : public MCTargetAsmParser { bool parseDirectiveWord(unsigned Size, SMLoc L); bool is64Bit() const { - return STI.getTargetTriple().getArch() == Triple::sparcv9; + return getSTI().getTargetTriple().getArch() == Triple::sparcv9; } void expandSET(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions); public: - SparcAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser, + SparcAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(sti), Parser(parser) { + : MCTargetAsmParser(Options, sti), Parser(parser) { // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); } }; - static unsigned IntRegs[32] = { + static const MCPhysReg IntRegs[32] = { Sparc::G0, Sparc::G1, Sparc::G2, Sparc::G3, Sparc::G4, Sparc::G5, Sparc::G6, Sparc::G7, Sparc::O0, Sparc::O1, Sparc::O2, Sparc::O3, @@ -104,7 +108,7 @@ public: Sparc::I0, Sparc::I1, Sparc::I2, Sparc::I3, Sparc::I4, Sparc::I5, Sparc::I6, Sparc::I7 }; - static unsigned FloatRegs[32] = { + static const MCPhysReg FloatRegs[32] = { Sparc::F0, Sparc::F1, Sparc::F2, Sparc::F3, Sparc::F4, Sparc::F5, Sparc::F6, Sparc::F7, Sparc::F8, Sparc::F9, Sparc::F10, Sparc::F11, @@ -114,7 +118,7 @@ public: Sparc::F24, Sparc::F25, Sparc::F26, Sparc::F27, Sparc::F28, Sparc::F29, Sparc::F30, Sparc::F31 }; - static unsigned DoubleRegs[32] = { + static const MCPhysReg DoubleRegs[32] = { Sparc::D0, Sparc::D1, Sparc::D2, Sparc::D3, Sparc::D4, Sparc::D5, Sparc::D6, Sparc::D7, Sparc::D8, Sparc::D7, Sparc::D8, Sparc::D9, @@ -124,13 +128,13 @@ public: Sparc::D24, Sparc::D25, Sparc::D26, Sparc::D27, Sparc::D28, Sparc::D29, Sparc::D30, Sparc::D31 }; - static unsigned QuadFPRegs[32] = { + static const MCPhysReg QuadFPRegs[32] = { Sparc::Q0, Sparc::Q1, Sparc::Q2, Sparc::Q3, Sparc::Q4, Sparc::Q5, Sparc::Q6, Sparc::Q7, Sparc::Q8, Sparc::Q9, Sparc::Q10, Sparc::Q11, Sparc::Q12, Sparc::Q13, Sparc::Q14, Sparc::Q15 }; - static unsigned ASRRegs[32] = { + static const MCPhysReg ASRRegs[32] = { SP::Y, SP::ASR1, SP::ASR2, SP::ASR3, SP::ASR4, SP::ASR5, SP::ASR6, SP::ASR7, SP::ASR8, SP::ASR9, SP::ASR10, SP::ASR11, @@ -140,6 +144,12 @@ public: SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27, SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31}; + static const MCPhysReg IntPairRegs[] = { + Sparc::G0_G1, Sparc::G2_G3, Sparc::G4_G5, Sparc::G6_G7, + Sparc::O0_O1, Sparc::O2_O3, Sparc::O4_O5, Sparc::O6_O7, + Sparc::L0_L1, Sparc::L2_L3, Sparc::L4_L5, Sparc::L6_L7, + Sparc::I0_I1, Sparc::I2_I3, Sparc::I4_I5, Sparc::I6_I7}; + /// SparcOperand - Instances of this class represent a parsed Sparc machine /// instruction. class SparcOperand : public MCParsedAsmOperand { @@ -147,6 +157,7 @@ public: enum RegisterKind { rk_None, rk_IntReg, + rk_IntPairReg, rk_FloatReg, rk_DoubleReg, rk_QuadReg, @@ -200,6 +211,10 @@ public: bool isMEMrr() const { return Kind == k_MemoryReg; } bool isMEMri() const { return Kind == k_MemoryImm; } + bool isIntReg() const { + return (Kind == k_Register && Reg.Kind == rk_IntReg); + } + bool isFloatReg() const { return (Kind == k_Register && Reg.Kind == rk_FloatReg); } @@ -330,6 +345,25 @@ public: return Op; } + static bool MorphToIntPairReg(SparcOperand &Op) { + unsigned Reg = Op.getReg(); + assert(Op.Reg.Kind == rk_IntReg); + unsigned regIdx = 32; + if (Reg >= Sparc::G0 && Reg <= Sparc::G7) + regIdx = Reg - Sparc::G0; + else if (Reg >= Sparc::O0 && Reg <= Sparc::O7) + regIdx = Reg - Sparc::O0 + 8; + else if (Reg >= Sparc::L0 && Reg <= Sparc::L7) + regIdx = Reg - Sparc::L0 + 16; + else if (Reg >= Sparc::I0 && Reg <= Sparc::I7) + regIdx = Reg - Sparc::I0 + 24; + if (regIdx % 2 || regIdx > 31) + return false; + Op.Reg.RegNum = IntPairRegs[regIdx / 2]; + Op.Reg.Kind = rk_IntPairReg; + return true; + } + static bool MorphToDoubleReg(SparcOperand &Op) { unsigned Reg = Op.getReg(); assert(Op.Reg.Kind == rk_FloatReg); @@ -407,7 +441,22 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc, // the imm operand can be either an expression or an immediate. bool IsImm = Inst.getOperand(1).isImm(); - uint64_t ImmValue = IsImm ? MCValOp.getImm() : 0; + int64_t RawImmValue = IsImm ? MCValOp.getImm() : 0; + + // Allow either a signed or unsigned 32-bit immediate. + if (RawImmValue < -2147483648LL || RawImmValue > 4294967295LL) { + Error(IDLoc, "set: argument must be between -2147483648 and 4294967295"); + return; + } + + // If the value was expressed as a large unsigned number, that's ok. + // We want to see if it "looks like" a small signed number. + int32_t ImmValue = RawImmValue; + // For 'set' you can't use 'or' with a negative operand on V9 because + // that would splat the sign bit across the upper half of the destination + // register, whereas 'set' is defined to zero the high 32 bits. + bool IsEffectivelyImm13 = + IsImm && ((is64Bit() ? 0 : -4096) <= ImmValue && ImmValue < 4096); const MCExpr *ValExpr; if (IsImm) ValExpr = MCConstantExpr::create(ImmValue, getContext()); @@ -416,10 +465,12 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc, MCOperand PrevReg = MCOperand::createReg(Sparc::G0); - if (!IsImm || (ImmValue & ~0x1fff)) { + // If not just a signed imm13 value, then either we use a 'sethi' with a + // following 'or', or a 'sethi' by itself if there are no more 1 bits. + // In either case, start with the 'sethi'. + if (!IsEffectivelyImm13) { MCInst TmpInst; - const MCExpr *Expr = - SparcMCExpr::create(SparcMCExpr::VK_Sparc_HI, ValExpr, getContext()); + const MCExpr *Expr = adjustPICRelocation(SparcMCExpr::VK_Sparc_HI, ValExpr); TmpInst.setLoc(IDLoc); TmpInst.setOpcode(SP::SETHIi); TmpInst.addOperand(MCRegOp); @@ -428,10 +479,23 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc, PrevReg = MCRegOp; } - if (!IsImm || ((ImmValue & 0x1fff) != 0 || ImmValue == 0)) { + // The low bits require touching in 3 cases: + // * A non-immediate value will always require both instructions. + // * An effectively imm13 value needs only an 'or' instruction. + // * Otherwise, an immediate that is not effectively imm13 requires the + // 'or' only if bits remain after clearing the 22 bits that 'sethi' set. + // If the low bits are known zeros, there's nothing to do. + // In the second case, and only in that case, must we NOT clear + // bits of the immediate value via the %lo() assembler function. + // Note also, the 'or' instruction doesn't mind a large value in the case + // where the operand to 'set' was 0xFFFFFzzz - it does exactly what you mean. + if (!IsImm || IsEffectivelyImm13 || (ImmValue & 0x3ff)) { MCInst TmpInst; - const MCExpr *Expr = - SparcMCExpr::create(SparcMCExpr::VK_Sparc_LO, ValExpr, getContext()); + const MCExpr *Expr; + if (IsEffectivelyImm13) + Expr = ValExpr; + else + Expr = adjustPICRelocation(SparcMCExpr::VK_Sparc_LO, ValExpr); TmpInst.setLoc(IDLoc); TmpInst.setOpcode(SP::ORri); TmpInst.addOperand(MCRegOp); @@ -463,7 +527,7 @@ bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } for (const MCInst &I : Instructions) { - Out.EmitInstruction(I, STI); + Out.EmitInstruction(I, getSTI()); } return false; } @@ -742,6 +806,9 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr &Op, case Sparc::PSR: Op = SparcOperand::CreateToken("%psr", S); break; + case Sparc::FSR: + Op = SparcOperand::CreateToken("%fsr", S); + break; case Sparc::WIM: Op = SparcOperand::CreateToken("%wim", S); break; @@ -766,6 +833,7 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr &Op, case AsmToken::Minus: case AsmToken::Integer: case AsmToken::LParen: + case AsmToken::Dot: if (!getParser().parseExpression(EVal, E)) Op = SparcOperand::CreateImm(EVal, S, E); break; @@ -848,6 +916,13 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok, return true; } + // %fprs is an alias of %asr6. + if (name.equals("fprs")) { + RegNo = ASRRegs[6]; + RegKind = SparcOperand::rk_Special; + return true; + } + if (name.equals("icc")) { RegNo = Sparc::ICC; RegKind = SparcOperand::rk_Special; @@ -860,6 +935,12 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok, return true; } + if (name.equals("fsr")) { + RegNo = Sparc::FSR; + RegKind = SparcOperand::rk_Special; + return true; + } + if (name.equals("wim")) { RegNo = Sparc::WIM; RegKind = SparcOperand::rk_Special; @@ -943,6 +1024,82 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok, RegKind = SparcOperand::rk_IntReg; return true; } + + if (name.equals("tpc")) { + RegNo = Sparc::TPC; + RegKind = SparcOperand::rk_Special; + return true; + } + if (name.equals("tnpc")) { + RegNo = Sparc::TNPC; + RegKind = SparcOperand::rk_Special; + return true; + } + if (name.equals("tstate")) { + RegNo = Sparc::TSTATE; + RegKind = SparcOperand::rk_Special; + return true; + } + if (name.equals("tt")) { + RegNo = Sparc::TT; + RegKind = SparcOperand::rk_Special; + return true; + } + if (name.equals("tick")) { + RegNo = Sparc::TICK; + RegKind = SparcOperand::rk_Special; + return true; + } + if (name.equals("tba")) { + RegNo = Sparc::TBA; + RegKind = SparcOperand::rk_Special; + return true; + } + if (name.equals("pstate")) { + RegNo = Sparc::PSTATE; + RegKind = SparcOperand::rk_Special; + return true; + } + if (name.equals("tl")) { + RegNo = Sparc::TL; + RegKind = SparcOperand::rk_Special; + return true; + } + if (name.equals("pil")) { + RegNo = Sparc::PIL; + RegKind = SparcOperand::rk_Special; + return true; + } + if (name.equals("cwp")) { + RegNo = Sparc::CWP; + RegKind = SparcOperand::rk_Special; + return true; + } + if (name.equals("cansave")) { + RegNo = Sparc::CANSAVE; + RegKind = SparcOperand::rk_Special; + return true; + } + if (name.equals("canrestore")) { + RegNo = Sparc::CANRESTORE; + RegKind = SparcOperand::rk_Special; + return true; + } + if (name.equals("cleanwin")) { + RegNo = Sparc::CLEANWIN; + RegKind = SparcOperand::rk_Special; + return true; + } + if (name.equals("otherwin")) { + RegNo = Sparc::OTHERWIN; + RegKind = SparcOperand::rk_Special; + return true; + } + if (name.equals("wstate")) { + RegNo = Sparc::WSTATE; + RegKind = SparcOperand::rk_Special; + return true; + } } return false; } @@ -975,6 +1132,32 @@ static bool hasGOTReference(const MCExpr *Expr) { return false; } +const SparcMCExpr * +SparcAsmParser::adjustPICRelocation(SparcMCExpr::VariantKind VK, + const MCExpr *subExpr) +{ + // When in PIC mode, "%lo(...)" and "%hi(...)" behave differently. + // If the expression refers contains _GLOBAL_OFFSETE_TABLE, it is + // actually a %pc10 or %pc22 relocation. Otherwise, they are interpreted + // as %got10 or %got22 relocation. + + if (getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_) { + switch(VK) { + default: break; + case SparcMCExpr::VK_Sparc_LO: + VK = (hasGOTReference(subExpr) ? SparcMCExpr::VK_Sparc_PC10 + : SparcMCExpr::VK_Sparc_GOT10); + break; + case SparcMCExpr::VK_Sparc_HI: + VK = (hasGOTReference(subExpr) ? SparcMCExpr::VK_Sparc_PC22 + : SparcMCExpr::VK_Sparc_GOT22); + break; + } + } + + return SparcMCExpr::create(VK, subExpr, getContext()); +} + bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal, SMLoc &EndLoc) { @@ -998,30 +1181,7 @@ bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal, if (Parser.parseParenExpression(subExpr, EndLoc)) return false; - bool isPIC = getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_; - - // Ugly: if a sparc assembly expression says "%hi(...)" but the - // expression within contains _GLOBAL_OFFSET_TABLE_, it REALLY means - // %pc22. Same with %lo -> %pc10. Worse, if it doesn't contain that, - // the meaning depends on whether the assembler was invoked with - // -KPIC or not: if so, it really means %got22/%got10; if not, it - // actually means what it said! Sigh, historical mistakes... - - switch(VK) { - default: break; - case SparcMCExpr::VK_Sparc_LO: - VK = (hasGOTReference(subExpr) - ? SparcMCExpr::VK_Sparc_PC10 - : (isPIC ? SparcMCExpr::VK_Sparc_GOT10 : VK)); - break; - case SparcMCExpr::VK_Sparc_HI: - VK = (hasGOTReference(subExpr) - ? SparcMCExpr::VK_Sparc_PC22 - : (isPIC ? SparcMCExpr::VK_Sparc_GOT22 : VK)); - break; - } - - EVal = SparcMCExpr::create(VK, subExpr, getContext()); + EVal = adjustPICRelocation(VK, subExpr); return true; } @@ -1051,5 +1211,9 @@ unsigned SparcAsmParser::validateTargetOperandClass(MCParsedAsmOperand &GOp, break; } } + if (Op.isIntReg() && Kind == MCK_IntPair) { + if (SparcOperand::MorphToIntPairReg(Op)) + return MCTargetAsmParser::Match_Success; + } return Match_InvalidOperand; } diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index 38bff44e7542..c689b7f7201e 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -122,6 +122,8 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { continue; } + // TODO: If we ever want to support v7, this needs to be extended + // to cover all floating point operations. if (!Subtarget->isV9() && (MI->getOpcode() == SP::FCMPS || MI->getOpcode() == SP::FCMPD || MI->getOpcode() == SP::FCMPQ)) { diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp index 3e56b9e9b883..51751ec511c9 100644 --- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp +++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp @@ -117,6 +117,19 @@ static const unsigned ASRRegDecoderTable[] = { SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27, SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31}; +static const unsigned PRRegDecoderTable[] = { + SP::TPC, SP::TNPC, SP::TSTATE, SP::TT, SP::TICK, SP::TBA, SP::PSTATE, + SP::TL, SP::PIL, SP::CWP, SP::CANSAVE, SP::CANRESTORE, SP::CLEANWIN, + SP::OTHERWIN, SP::WSTATE +}; + +static const uint16_t IntPairDecoderTable[] = { + SP::G0_G1, SP::G2_G3, SP::G4_G5, SP::G6_G7, + SP::O0_O1, SP::O2_O3, SP::O4_O5, SP::O6_O7, + SP::L0_L1, SP::L2_L3, SP::L4_L5, SP::L6_L7, + SP::I0_I1, SP::I2_I3, SP::I4_I5, SP::I6_I7, +}; + static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -196,9 +209,34 @@ static DecodeStatus DecodeASRRegsRegisterClass(MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } +static DecodeStatus DecodePRRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo >= array_lengthof(PRRegDecoderTable)) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createReg(PRRegDecoderTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeIntPairRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + if (RegNo > 31) + return MCDisassembler::Fail; + + if ((RegNo & 1)) + S = MCDisassembler::SoftFail; + + unsigned RegisterPair = IntPairDecoderTable[RegNo/2]; + Inst.addOperand(MCOperand::createReg(RegisterPair)); + return S; +} static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address, + const void *Decoder); static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeLoadDFP(MCInst &Inst, unsigned insn, uint64_t Address, @@ -207,6 +245,8 @@ static DecodeStatus DecodeLoadQFP(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeStoreDFP(MCInst &Inst, unsigned insn, @@ -326,6 +366,12 @@ static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address, DecodeIntRegsRegisterClass); } +static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address, + const void *Decoder) { + return DecodeMem(Inst, insn, Address, Decoder, true, + DecodeIntPairRegisterClass); +} + static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, true, @@ -350,6 +396,12 @@ static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn, DecodeIntRegsRegisterClass); } +static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn, + uint64_t Address, const void *Decoder) { + return DecodeMem(Inst, insn, Address, Decoder, false, + DecodeIntPairRegisterClass); +} + static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, false, diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h index 0b01b88e5250..6f06d1ddae32 100644 --- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h +++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h @@ -15,12 +15,9 @@ #define LLVM_LIB_TARGET_SPARC_INSTPRINTER_SPARCINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCSubtargetInfo.h" namespace llvm { -class MCOperand; - class SparcInstPrinter : public MCInstPrinter { public: SparcInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h index 12386f14443e..ad441227600e 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h @@ -21,6 +21,7 @@ class Triple; class SparcELFMCAsmInfo : public MCAsmInfoELF { void anchor() override; + public: explicit SparcELFMCAsmInfo(const Triple &TheTriple); const MCExpr* diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h index d08ad86dbe04..13f08195c764 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h @@ -90,8 +90,8 @@ public: const MCAsmLayout *Layout, const MCFixup *Fixup) const override; void visitUsedExpr(MCStreamer &Streamer) const override; - MCSection *findAssociatedSection() const override { - return getSubExpr()->findAssociatedSection(); + MCFragment *findAssociatedFragment() const override { + return getSubExpr()->findAssociatedFragment(); } void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index c5f046bfc5bb..e3b0f5266747 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -267,11 +267,11 @@ void SparcAsmPrinter::EmitInstruction(const MachineInstr *MI) LowerGETPCXAndEmitMCInsts(MI, getSubtargetInfo()); return; } - MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator I = MI->getIterator(); MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); do { MCInst TmpInst; - LowerSparcMachineInstrToMCInst(I, TmpInst, *this); + LowerSparcMachineInstrToMCInst(&*I, TmpInst, *this); EmitToStreamer(*OutStreamer, TmpInst); } while ((++I != E) && I->isInsideBundle()); // Delay slot check. } @@ -296,7 +296,7 @@ void SparcAsmPrinter::EmitFunctionBodyStart() { void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); const MachineOperand &MO = MI->getOperand (opNum); SparcMCExpr::VariantKind TF = (SparcMCExpr::VariantKind) MO.getTargetFlags(); @@ -373,7 +373,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, O << MO.getSymbolName(); break; case MachineOperand::MO_ConstantPoolIndex: - O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" + O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" << MO.getIndex(); break; default: diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td index dfaaabf344a3..0aa29d186dc1 100644 --- a/lib/Target/Sparc/SparcCallingConv.td +++ b/lib/Target/Sparc/SparcCallingConv.td @@ -21,7 +21,11 @@ def CC_Sparc32 : CallingConv<[ // i32 f32 arguments get passed in integer registers if there is space. CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, // f64 arguments are split and passed through registers or through stack. - CCIfType<[f64], CCCustom<"CC_Sparc_Assign_f64">>, + CCIfType<[f64], CCCustom<"CC_Sparc_Assign_Split_64">>, + // As are v2i32 arguments (this would be the default behavior for + // v2i32 if it wasn't allocated to the IntPair register-class) + CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Split_64">>, + // Alternatively, they are assigned to the stack in 4-byte aligned units. CCAssignToStack<4, 4> @@ -30,7 +34,8 @@ def CC_Sparc32 : CallingConv<[ def RetCC_Sparc32 : CallingConv<[ CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1]>> + CCIfType<[f64], CCAssignToReg<[D0, D1]>>, + CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Ret_Split_64">> ]>; diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp index c0279daa63d9..39b5e809c9be 100644 --- a/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/lib/Target/Sparc/SparcFrameLowering.cpp @@ -44,7 +44,7 @@ void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF, unsigned ADDrr, unsigned ADDri) const { - DebugLoc dl = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc(); + DebugLoc dl; const SparcInstrInfo &TII = *static_cast(MF.getSubtarget().getInstrInfo()); @@ -90,8 +90,23 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF, MachineFrameInfo *MFI = MF.getFrameInfo(); const SparcInstrInfo &TII = *static_cast(MF.getSubtarget().getInstrInfo()); + const SparcRegisterInfo &RegInfo = + *static_cast(MF.getSubtarget().getRegisterInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc dl; + bool NeedsStackRealignment = RegInfo.needsStackRealignment(MF); + + // FIXME: unfortunately, returning false from canRealignStack + // actually just causes needsStackRealignment to return false, + // rather than reporting an error, as would be sensible. This is + // poor, but fixing that bogosity is going to be a large project. + // For now, just see if it's lied, and report an error here. + if (!NeedsStackRealignment && MFI->getMaxAlignment() > getStackAlignment()) + report_fatal_error("Function \"" + Twine(MF.getName()) + "\" required " + "stack re-alignment, but LLVM couldn't handle it " + "(probably because it has a dynamic alloca)."); // Get the number of bytes to allocate from the FrameInfo int NumBytes = (int) MFI->getStackSize(); @@ -104,12 +119,43 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF, SAVEri = SP::ADDri; SAVErr = SP::ADDrr; } - NumBytes = -MF.getSubtarget().getAdjustedFrameSize(NumBytes); - emitSPAdjustment(MF, MBB, MBBI, NumBytes, SAVErr, SAVEri); + + // The SPARC ABI is a bit odd in that it requires a reserved 92-byte + // (128 in v9) area in the user's stack, starting at %sp. Thus, the + // first part of the stack that can actually be used is located at + // %sp + 92. + // + // We therefore need to add that offset to the total stack size + // after all the stack objects are placed by + // PrologEpilogInserter calculateFrameObjectOffsets. However, since the stack needs to be + // aligned *after* the extra size is added, we need to disable + // calculateFrameObjectOffsets's built-in stack alignment, by having + // targetHandlesStackFrameRounding return true. + + + // Add the extra call frame stack size, if needed. (This is the same + // code as in PrologEpilogInserter, but also gets disabled by + // targetHandlesStackFrameRounding) + if (MFI->adjustsStack() && hasReservedCallFrame(MF)) + NumBytes += MFI->getMaxCallFrameSize(); + + // Adds the SPARC subtarget-specific spill area to the stack + // size. Also ensures target-required alignment. + NumBytes = MF.getSubtarget().getAdjustedFrameSize(NumBytes); + + // Finally, ensure that the size is sufficiently aligned for the + // data on the stack. + if (MFI->getMaxAlignment() > 0) { + NumBytes = RoundUpToAlignment(NumBytes, MFI->getMaxAlignment()); + } + + // Update stack size with corrected value. + MFI->setStackSize(NumBytes); + + emitSPAdjustment(MF, MBB, MBBI, -NumBytes, SAVErr, SAVEri); MachineModuleInfo &MMI = MF.getMMI(); - const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - unsigned regFP = MRI->getDwarfRegNum(SP::I6, true); + unsigned regFP = RegInfo.getDwarfRegNum(SP::I6, true); // Emit ".cfi_def_cfa_register 30". unsigned CFIIndex = @@ -122,13 +168,19 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); - unsigned regInRA = MRI->getDwarfRegNum(SP::I7, true); - unsigned regOutRA = MRI->getDwarfRegNum(SP::O7, true); + unsigned regInRA = RegInfo.getDwarfRegNum(SP::I7, true); + unsigned regOutRA = RegInfo.getDwarfRegNum(SP::O7, true); // Emit ".cfi_register 15, 31". CFIIndex = MMI.addFrameInst( MCCFIInstruction::createRegister(nullptr, regOutRA, regInRA)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); + + if (NeedsStackRealignment) { + // andn %o6, MaxAlign-1, %o6 + int MaxAlign = MFI->getMaxAlignment(); + BuildMI(MBB, MBBI, dl, TII.get(SP::ANDNri), SP::O6).addReg(SP::O6).addImm(MaxAlign - 1); + } } void SparcFrameLowering:: @@ -167,7 +219,6 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF, if (NumBytes == 0) return; - NumBytes = MF.getSubtarget().getAdjustedFrameSize(NumBytes); emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri); } @@ -180,21 +231,69 @@ bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { // pointer register. This is true if the function has variable sized allocas or // if frame pointer elimination is disabled. bool SparcFrameLowering::hasFP(const MachineFunction &MF) const { + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); return MF.getTarget().Options.DisableFramePointerElim(MF) || - MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); + RegInfo->needsStackRealignment(MF) || + MFI->hasVarSizedObjects() || + MFI->isFrameAddressTaken(); } +int SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const { + const SparcSubtarget &Subtarget = MF.getSubtarget(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const SparcRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + const SparcMachineFunctionInfo *FuncInfo = MF.getInfo(); + bool isFixed = MFI->isFixedObjectIndex(FI); + + // Addressable stack objects are accessed using neg. offsets from + // %fp, or positive offsets from %sp. + bool UseFP; + + // Sparc uses FP-based references in general, even when "hasFP" is + // false. That function is rather a misnomer, because %fp is + // actually always available, unless isLeafProc. + if (FuncInfo->isLeafProc()) { + // If there's a leaf proc, all offsets need to be %sp-based, + // because we haven't caused %fp to actually point to our frame. + UseFP = false; + } else if (isFixed) { + // Otherwise, argument access should always use %fp. + UseFP = true; + } else if (RegInfo->needsStackRealignment(MF)) { + // If there is dynamic stack realignment, all local object + // references need to be via %sp, to take account of the + // re-alignment. + UseFP = false; + } else { + // Finally, default to using %fp. + UseFP = true; + } + + int64_t FrameOffset = MF.getFrameInfo()->getObjectOffset(FI) + + Subtarget.getStackPointerBias(); + + if (UseFP) { + FrameReg = RegInfo->getFrameRegister(MF); + return FrameOffset; + } else { + FrameReg = SP::O6; // %sp + return FrameOffset + MF.getFrameInfo()->getStackSize(); + } +} + static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI) { for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) - if (MRI->isPhysRegUsed(reg)) + if (!MRI->reg_nodbg_empty(reg)) return false; for (unsigned reg = SP::L0; reg <= SP::L7; ++reg) - if (MRI->isPhysRegUsed(reg)) + if (!MRI->reg_nodbg_empty(reg)) return false; return true; @@ -206,33 +305,42 @@ bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineFrameInfo *MFI = MF.getFrameInfo(); - return !(MFI->hasCalls() // has calls - || MRI.isPhysRegUsed(SP::L0) // Too many registers needed - || MRI.isPhysRegUsed(SP::O6) // %SP is used - || hasFP(MF)); // need %FP + return !(MFI->hasCalls() // has calls + || !MRI.reg_nodbg_empty(SP::L0) // Too many registers needed + || !MRI.reg_nodbg_empty(SP::O6) // %SP is used + || hasFP(MF)); // need %FP } void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const { - MachineRegisterInfo &MRI = MF.getRegInfo(); - // Remap %i[0-7] to %o[0-7]. for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) { - if (!MRI.isPhysRegUsed(reg)) + if (MRI.reg_nodbg_empty(reg)) continue; - unsigned mapped_reg = (reg - SP::I0 + SP::O0); - assert(!MRI.isPhysRegUsed(mapped_reg)); + + unsigned mapped_reg = reg - SP::I0 + SP::O0; + assert(MRI.reg_nodbg_empty(mapped_reg)); // Replace I register with O register. MRI.replaceRegWith(reg, mapped_reg); - // Mark the reg unused. - MRI.setPhysRegUnused(reg); + // Also replace register pair super-registers. + if ((reg - SP::I0) % 2 == 0) { + unsigned preg = (reg - SP::I0) / 2 + SP::I0_I1; + unsigned mapped_preg = preg - SP::I0_I1 + SP::O0_O1; + MRI.replaceRegWith(preg, mapped_preg); + } } // Rewrite MBB's Live-ins. for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { + for (unsigned reg = SP::I0_I1; reg <= SP::I6_I7; ++reg) { + if (!MBB->isLiveIn(reg)) + continue; + MBB->removeLiveIn(reg); + MBB->addLiveIn(reg - SP::I0_I1 + SP::O0_O1); + } for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) { if (!MBB->isLiveIn(reg)) continue; diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h index 29fc7b7ba036..cbb4dc04fc23 100644 --- a/lib/Target/Sparc/SparcFrameLowering.h +++ b/lib/Target/Sparc/SparcFrameLowering.h @@ -39,6 +39,14 @@ public: void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS = nullptr) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; + + /// targetHandlesStackFrameRounding - Returns true if the target is + /// responsible for rounding up the stack frame (probably at emitPrologue + /// time). + bool targetHandlesStackFrameRounding() const override { return true; } + private: // Remap input registers to output registers for leaf procedure. void remapRegsForLeafProc(MachineFunction &MF) const; diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index 340b72e7940f..c4c641659df3 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "SparcTargetMachine.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Compiler.h" @@ -62,6 +63,7 @@ public: private: SDNode* getGlobalBaseReg(); + SDNode *SelectInlineAsm(SDNode *N); }; } // end anonymous namespace @@ -141,6 +143,181 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) { return true; } + +// Re-assemble i64 arguments split up in SelectionDAGBuilder's +// visitInlineAsm / GetRegistersForValue functions. +// +// Note: This function was copied from, and is essentially identical +// to ARMISelDAGToDAG::SelectInlineAsm. It is very unfortunate that +// such hacking-up is necessary; a rethink of how inline asm operands +// are handled may be in order to make doing this more sane. +// +// TODO: fix inline asm support so I can simply tell it that 'i64' +// inputs to asm need to be allocated to the IntPair register type, +// and have that work. Then, delete this function. +SDNode *SparcDAGToDAGISel::SelectInlineAsm(SDNode *N){ + std::vector AsmNodeOperands; + unsigned Flag, Kind; + bool Changed = false; + unsigned NumOps = N->getNumOperands(); + + // Normally, i64 data is bounded to two arbitrary GPRs for "%r" + // constraint. However, some instructions (e.g. ldd/std) require + // (even/even+1) GPRs. + + // So, here, we check for this case, and mutate the inlineasm to use + // a single IntPair register instead, which guarantees such even/odd + // placement. + + SDLoc dl(N); + SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) + : SDValue(nullptr,0); + + SmallVector OpChanged; + // Glue node will be appended late. + for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { + SDValue op = N->getOperand(i); + AsmNodeOperands.push_back(op); + + if (i < InlineAsm::Op_FirstOperand) + continue; + + if (ConstantSDNode *C = dyn_cast(N->getOperand(i))) { + Flag = C->getZExtValue(); + Kind = InlineAsm::getKind(Flag); + } + else + continue; + + // Immediate operands to inline asm in the SelectionDAG are modeled with + // two operands. The first is a constant of value InlineAsm::Kind_Imm, and + // the second is a constant with the value of the immediate. If we get here + // and we have a Kind_Imm, skip the next operand, and continue. + if (Kind == InlineAsm::Kind_Imm) { + SDValue op = N->getOperand(++i); + AsmNodeOperands.push_back(op); + continue; + } + + unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); + if (NumRegs) + OpChanged.push_back(false); + + unsigned DefIdx = 0; + bool IsTiedToChangedOp = false; + // If it's a use that is tied with a previous def, it has no + // reg class constraint. + if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) + IsTiedToChangedOp = OpChanged[DefIdx]; + + if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef + && Kind != InlineAsm::Kind_RegDefEarlyClobber) + continue; + + unsigned RC; + bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); + if ((!IsTiedToChangedOp && (!HasRC || RC != SP::IntRegsRegClassID)) + || NumRegs != 2) + continue; + + assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); + SDValue V0 = N->getOperand(i+1); + SDValue V1 = N->getOperand(i+2); + unsigned Reg0 = cast(V0)->getReg(); + unsigned Reg1 = cast(V1)->getReg(); + SDValue PairedReg; + MachineRegisterInfo &MRI = MF->getRegInfo(); + + if (Kind == InlineAsm::Kind_RegDef || + Kind == InlineAsm::Kind_RegDefEarlyClobber) { + // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to + // the original GPRs. + + unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32); + SDValue Chain = SDValue(N,0); + + SDNode *GU = N->getGluedUser(); + SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::v2i32, + Chain.getValue(1)); + + // Extract values from a GPRPair reg and copy to the original GPR reg. + SDValue Sub0 = CurDAG->getTargetExtractSubreg(SP::sub_even, dl, MVT::i32, + RegCopy); + SDValue Sub1 = CurDAG->getTargetExtractSubreg(SP::sub_odd, dl, MVT::i32, + RegCopy); + SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, + RegCopy.getValue(1)); + SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); + + // Update the original glue user. + std::vector Ops(GU->op_begin(), GU->op_end()-1); + Ops.push_back(T1.getValue(1)); + CurDAG->UpdateNodeOperands(GU, Ops); + } + else { + // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a + // GPRPair and then pass the GPRPair to the inline asm. + SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; + + // As REG_SEQ doesn't take RegisterSDNode, we copy them first. + SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, + Chain.getValue(1)); + SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, + T0.getValue(1)); + SDValue Pair = SDValue( + CurDAG->getMachineNode( + TargetOpcode::REG_SEQUENCE, dl, MVT::v2i32, + { + CurDAG->getTargetConstant(SP::IntPairRegClassID, dl, + MVT::i32), + T0, + CurDAG->getTargetConstant(SP::sub_even, dl, MVT::i32), + T1, + CurDAG->getTargetConstant(SP::sub_odd, dl, MVT::i32), + }), + 0); + + // Copy REG_SEQ into a GPRPair-typed VR and replace the original two + // i32 VRs of inline asm with it. + unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32); + Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); + + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; + Glue = Chain.getValue(1); + } + + Changed = true; + + if(PairedReg.getNode()) { + OpChanged[OpChanged.size() -1 ] = true; + Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); + if (IsTiedToChangedOp) + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); + else + Flag = InlineAsm::getFlagWordForRegClass(Flag, SP::IntPairRegClassID); + // Replace the current flag. + AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( + Flag, dl, MVT::i32); + // Add the new register node and skip the original two GPRs. + AsmNodeOperands.push_back(PairedReg); + // Skip the next two GPRs. + i += 2; + } + } + + if (Glue.getNode()) + AsmNodeOperands.push_back(Glue); + if (!Changed) + return nullptr; + + SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), + CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); + New->setNodeId(-1); + return New.getNode(); +} + SDNode *SparcDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); if (N->isMachineOpcode()) { @@ -150,6 +327,12 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; + case ISD::INLINEASM: { + SDNode *ResNode = SelectInlineAsm(N); + if (ResNode) + return ResNode; + break; + } case SPISD::GLOBAL_BASE_REG: return getGlobalBaseReg(); diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 4879d4ee79e5..5e70ffe2223c 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -49,9 +49,9 @@ static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT, return true; } -static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, CCState &State) +static bool CC_Sparc_Assign_Split_64(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { static const MCPhysReg RegList[] = { SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 @@ -77,6 +77,29 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT, return true; } +static bool CC_Sparc_Assign_Ret_Split_64(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) +{ + static const MCPhysReg RegList[] = { + SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 + }; + + // Try to get first reg. + if (unsigned Reg = State.AllocateReg(RegList)) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + else + return false; + + // Try to get second reg. + if (unsigned Reg = State.AllocateReg(RegList)) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + else + return false; + + return true; +} + // Allocate a full-sized argument for the 64-bit ABI. static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, @@ -202,12 +225,34 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain, RetOps.push_back(SDValue()); // Copy the result values into the output registers. - for (unsigned i = 0; i != RVLocs.size(); ++i) { + for (unsigned i = 0, realRVLocIdx = 0; + i != RVLocs.size(); + ++i, ++realRVLocIdx) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), - OutVals[i], Flag); + SDValue Arg = OutVals[realRVLocIdx]; + + if (VA.needsCustom()) { + assert(VA.getLocVT() == MVT::v2i32); + // Legalize ret v2i32 -> ret 2 x i32 (Basically: do what would + // happen by default if this wasn't a legal type) + + SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, + Arg, + DAG.getConstant(0, DL, getVectorIdxTy(DAG.getDataLayout()))); + SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, + Arg, + DAG.getConstant(1, DL, getVectorIdxTy(DAG.getDataLayout()))); + + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part0, Flag); + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + VA = RVLocs[++i]; // skip ahead to next loc + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part1, + Flag); + } else + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag); // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); @@ -355,6 +400,7 @@ LowerFormalArguments_32(SDValue Chain, CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32); const unsigned StackOffset = 92; + bool IsLittleEndian = DAG.getDataLayout().isLittleEndian(); unsigned InIdx = 0; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i, ++InIdx) { @@ -375,7 +421,8 @@ LowerFormalArguments_32(SDValue Chain, if (VA.isRegLoc()) { if (VA.needsCustom()) { - assert(VA.getLocVT() == MVT::f64); + assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32); + unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi); SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32); @@ -396,9 +443,13 @@ LowerFormalArguments_32(SDValue Chain, &SP::IntRegsRegClass); LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32); } + + if (IsLittleEndian) + std::swap(LoVal, HiVal); + SDValue WholeValue = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal); - WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue); + WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), WholeValue); InVals.push_back(WholeValue); continue; } @@ -422,7 +473,7 @@ LowerFormalArguments_32(SDValue Chain, auto PtrVT = getPointerTy(DAG.getDataLayout()); if (VA.needsCustom()) { - assert(VA.getValVT() == MVT::f64); + assert(VA.getValVT() == MVT::f64 || MVT::v2i32); // If it is double-word aligned, just load. if (Offset % 8 == 0) { int FI = MF.getFrameInfo()->CreateFixedObject(8, @@ -452,9 +503,12 @@ LowerFormalArguments_32(SDValue Chain, MachinePointerInfo(), false, false, false, 0); + if (IsLittleEndian) + std::swap(LoVal, HiVal); + SDValue WholeValue = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal); - WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue); + WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), WholeValue); InVals.push_back(WholeValue); continue; } @@ -468,16 +522,12 @@ LowerFormalArguments_32(SDValue Chain, Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr, MachinePointerInfo(), false, false, false, 0); + } else if (VA.getValVT() == MVT::f128) { + report_fatal_error("SPARCv8 does not handle f128 in calls; " + "pass indirectly"); } else { - ISD::LoadExtType LoadOp = ISD::SEXTLOAD; - // Sparc is big endian, so add an offset based on the ObjectVT. - unsigned Offset = 4-std::max(1U, VA.getValVT().getSizeInBits()/8); - FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr, - DAG.getConstant(Offset, dl, MVT::i32)); - Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr, - MachinePointerInfo(), - VA.getValVT(), false, false, false,0); - Load = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Load); + // We shouldn't see any other value types here. + llvm_unreachable("Unexpected ValVT encountered in frame lowering."); } InVals.push_back(Load); } @@ -612,7 +662,7 @@ LowerFormalArguments_64(SDValue Chain, InVals.push_back(DAG.getLoad( VA.getValVT(), DL, Chain, DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())), - MachinePointerInfo::getFixedStack(FI), false, false, false, 0)); + MachinePointerInfo::getFixedStack(MF, FI), false, false, false, 0)); } if (!IsVarArg) @@ -640,9 +690,9 @@ LowerFormalArguments_64(SDValue Chain, SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); int FI = MF.getFrameInfo()->CreateFixedObject(8, ArgOffset + ArgArea, true); auto PtrVT = getPointerTy(MF.getDataLayout()); - OutChains.push_back( - DAG.getStore(Chain, DL, VArg, DAG.getFrameIndex(FI, PtrVT), - MachinePointerInfo::getFixedStack(FI), false, false, 0)); + OutChains.push_back(DAG.getStore( + Chain, DL, VArg, DAG.getFrameIndex(FI, PtrVT), + MachinePointerInfo::getFixedStack(MF, FI), false, false, 0)); } if (!OutChains.empty()) @@ -788,7 +838,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, } if (VA.needsCustom()) { - assert(VA.getLocVT() == MVT::f64); + assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32); if (VA.isMemLoc()) { unsigned Offset = VA.getLocMemOffset() + StackOffset; @@ -804,49 +854,53 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, } } - SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, - Arg, StackPtr, MachinePointerInfo(), - false, false, 0); - // Sparc is big-endian, so the high part comes first. - SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, - MachinePointerInfo(), false, false, false, 0); - // Increment the pointer to the other half. - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getIntPtrConstant(4, dl)); - // Load the low part. - SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, - MachinePointerInfo(), false, false, false, 0); + if (VA.getLocVT() == MVT::f64) { + // Move from the float value from float registers into the + // integer registers. + + // TODO: The f64 -> v2i32 conversion is super-inefficient for + // constants: it sticks them in the constant pool, then loads + // to a fp register, then stores to temp memory, then loads to + // integer registers. + Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg); + } + + SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, + Arg, + DAG.getConstant(0, dl, getVectorIdxTy(DAG.getDataLayout()))); + SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, + Arg, + DAG.getConstant(1, dl, getVectorIdxTy(DAG.getDataLayout()))); if (VA.isRegLoc()) { - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi)); + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Part0)); assert(i+1 != e); CCValAssign &NextVA = ArgLocs[++i]; if (NextVA.isRegLoc()) { - RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo)); + RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Part1)); } else { - // Store the low part in stack. + // Store the second part in stack. unsigned Offset = NextVA.getLocMemOffset() + StackOffset; SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff, + MemOpChains.push_back(DAG.getStore(Chain, dl, Part1, PtrOff, MachinePointerInfo(), false, false, 0)); } } else { unsigned Offset = VA.getLocMemOffset() + StackOffset; - // Store the high part. + // Store the first part. SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff, + MemOpChains.push_back(DAG.getStore(Chain, dl, Part0, PtrOff, MachinePointerInfo(), false, false, 0)); - // Store the low part. + // Store the second part. PtrOff = DAG.getIntPtrConstant(Offset + 4, dl); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff, + MemOpChains.push_back(DAG.getStore(Chain, dl, Part1, PtrOff, MachinePointerInfo(), false, false, 0)); } @@ -990,8 +1044,8 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const if (!CalleeFn) return 0; - assert(CalleeFn->hasStructRetAttr() && - "Callee does not have the StructRet attribute."); + // It would be nice to check for the sret attribute on CalleeFn here, + // but since it is not part of the function type, any check will misfire. PointerType *Ty = cast(CalleeFn->arg_begin()->getType()); Type *ElementTy = Ty->getElementType(); @@ -1370,15 +1424,60 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) { SparcTargetLowering::SparcTargetLowering(TargetMachine &TM, const SparcSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { - auto &DL = *TM.getDataLayout(); + MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize()); + + // Instructions which use registers as conditionals examine all the + // bits (as does the pseudo SELECT_CC expansion). I don't think it + // matters much whether it's ZeroOrOneBooleanContent, or + // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the + // former. + setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // Set up the register classes. addRegisterClass(MVT::i32, &SP::IntRegsRegClass); addRegisterClass(MVT::f32, &SP::FPRegsRegClass); addRegisterClass(MVT::f64, &SP::DFPRegsRegClass); addRegisterClass(MVT::f128, &SP::QFPRegsRegClass); - if (Subtarget->is64Bit()) + if (Subtarget->is64Bit()) { addRegisterClass(MVT::i64, &SP::I64RegsRegClass); + } else { + // On 32bit sparc, we define a double-register 32bit register + // class, as well. This is modeled in LLVM as a 2-vector of i32. + addRegisterClass(MVT::v2i32, &SP::IntPairRegClass); + + // ...but almost all operations must be expanded, so set that as + // the default. + for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { + setOperationAction(Op, MVT::v2i32, Expand); + } + // Truncating/extending stores/loads are also not supported. + for (MVT VT : MVT::integer_vector_valuetypes()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Expand); + + setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, VT, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, VT, Expand); + + setTruncStoreAction(VT, MVT::v2i32, Expand); + setTruncStoreAction(MVT::v2i32, VT, Expand); + } + // However, load and store *are* legal. + setOperationAction(ISD::LOAD, MVT::v2i32, Legal); + setOperationAction(ISD::STORE, MVT::v2i32, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Legal); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Legal); + + // And we need to promote i64 loads/stores into vector load/store + setOperationAction(ISD::LOAD, MVT::i64, Custom); + setOperationAction(ISD::STORE, MVT::i64, Custom); + + // Sadly, this doesn't work: + // AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); + // AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32); + } // Turn FP extload into load/fextend for (MVT VT : MVT::fp_valuetypes()) { @@ -1396,10 +1495,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM, setTruncStoreAction(MVT::f128, MVT::f64, Expand); // Custom legalize GlobalAddress nodes into LO/HI parts. - setOperationAction(ISD::GlobalAddress, getPointerTy(DL), Custom); - setOperationAction(ISD::GlobalTLSAddress, getPointerTy(DL), Custom); - setOperationAction(ISD::ConstantPool, getPointerTy(DL), Custom); - setOperationAction(ISD::BlockAddress, getPointerTy(DL), Custom); + setOperationAction(ISD::GlobalAddress, PtrVT, Custom); + setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); + setOperationAction(ISD::ConstantPool, PtrVT, Custom); + setOperationAction(ISD::BlockAddress, PtrVT, Custom); // Sparc doesn't have sext_inreg, replace them with shl/sra setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); @@ -1579,9 +1678,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM, setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); - setExceptionPointerRegister(SP::I0); - setExceptionSelectorRegister(SP::I1); - setStackPointerRegisterToSaveRestore(SP::O6); setOperationAction(ISD::CTPOP, MVT::i32, @@ -1744,18 +1840,15 @@ void SparcTargetLowering::computeKnownBitsForTargetNode // set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition. static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, unsigned &SPCC) { - if (isa(RHS) && - cast(RHS)->isNullValue() && + if (isNullConstant(RHS) && CC == ISD::SETNE && (((LHS.getOpcode() == SPISD::SELECT_ICC || LHS.getOpcode() == SPISD::SELECT_XCC) && LHS.getOperand(3).getOpcode() == SPISD::CMPICC) || (LHS.getOpcode() == SPISD::SELECT_FCC && LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) && - isa(LHS.getOperand(0)) && - isa(LHS.getOperand(1)) && - cast(LHS.getOperand(0))->isOne() && - cast(LHS.getOperand(1))->isNullValue()) { + isOneConstant(LHS.getOperand(0)) && + isNullConstant(LHS.getOperand(1))) { SDValue CMPCC = LHS.getOperand(3); SPCC = cast(LHS.getOperand(2))->getZExtValue(); LHS = CMPCC.getOperand(0); @@ -1821,7 +1914,8 @@ SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setHasCalls(true); return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr, - MachinePointerInfo::getGOT(), false, false, false, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + false, false, false, 0); } // This is one of the absolute code models. @@ -1872,6 +1966,9 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *GA = cast(Op); + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); + SDLoc DL(GA); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -2601,6 +2698,17 @@ static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG) return DAG.getMergeValues(Ops, dl); } +static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) +{ + LoadSDNode *LdNode = cast(Op.getNode()); + + EVT MemVT = LdNode->getMemoryVT(); + if (MemVT == MVT::f128) + return LowerF128Load(Op, DAG); + + return Op; +} + // Lower a f128 store into two f64 stores. static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) { SDLoc dl(Op); @@ -2645,6 +2753,29 @@ static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } +static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) +{ + SDLoc dl(Op); + StoreSDNode *St = cast(Op.getNode()); + + EVT MemVT = St->getMemoryVT(); + if (MemVT == MVT::f128) + return LowerF128Store(Op, DAG); + + if (MemVT == MVT::i64) { + // Custom handling for i64 stores: turn it into a bitcast and a + // v2i32 store. + SDValue Val = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, St->getValue()); + SDValue Chain = DAG.getStore( + St->getChain(), dl, Val, St->getBasePtr(), St->getPointerInfo(), + St->isVolatile(), St->isNonTemporal(), St->getAlignment(), + St->getAAInfo()); + return Chain; + } + + return SDValue(); +} + static SDValue LowerFNEGorFABS(SDValue Op, SelectionDAG &DAG, bool isV9) { assert((Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::FABS) && "invalid opcode"); @@ -2752,7 +2883,7 @@ static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG, SDValue MulResult = TLI.makeLibCall(DAG, RTLIB::MUL_I128, WideVT, - Args, 4, isSigned, dl).first; + Args, isSigned, dl).first; SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, MulResult, DAG.getIntPtrConstant(0, dl)); SDValue TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, @@ -2783,7 +2914,6 @@ static SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) { return SDValue(); } - SDValue SparcTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -2818,8 +2948,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget); - case ISD::LOAD: return LowerF128Load(Op, DAG); - case ISD::STORE: return LowerF128Store(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::FADD: return LowerF128Op(Op, DAG, getLibcallName(RTLIB::ADD_F128), 2); case ISD::FSUB: return LowerF128Op(Op, DAG, @@ -2921,8 +3051,7 @@ SparcTargetLowering::expandSelectCC(MachineInstr *MI, // to set, the condition code register to branch on, the true/false values to // select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); // thisMBB: // ... @@ -3007,7 +3136,7 @@ SparcTargetLowering::expandAtomicRMW(MachineInstr *MI, .addReg(AddrReg).addImm(0); // Split the basic block MBB before MI and insert the loop block in the hole. - MachineFunction::iterator MFI = MBB; + MachineFunction::iterator MFI = MBB->getIterator(); const BasicBlock *LLVM_BB = MBB->getBasicBlock(); MachineFunction *MF = MBB->getParent(); MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB); @@ -3149,9 +3278,12 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': - return std::make_pair(0U, &SP::IntRegsRegClass); + if (VT == MVT::v2i32) + return std::make_pair(0U, &SP::IntPairRegClass); + else + return std::make_pair(0U, &SP::IntRegsRegClass); } - } else if (!Constraint.empty() && Constraint.size() <= 5 + } else if (!Constraint.empty() && Constraint.size() <= 5 && Constraint[0] == '{' && *(Constraint.end()-1) == '}') { // constraint = '{r}' // Remove the braces from around the name. @@ -3227,5 +3359,24 @@ void SparcTargetLowering::ReplaceNodeResults(SDNode *N, getLibcallName(libCall), 1)); return; + case ISD::LOAD: { + LoadSDNode *Ld = cast(N); + // Custom handling only for i64: turn i64 load into a v2i32 load, + // and a bitcast. + if (Ld->getValueType(0) != MVT::i64 || Ld->getMemoryVT() != MVT::i64) + return; + + SDLoc dl(N); + SDValue LoadRes = DAG.getExtLoad( + Ld->getExtensionType(), dl, MVT::v2i32, + Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), + MVT::v2i32, Ld->isVolatile(), Ld->isNonTemporal(), + Ld->isInvariant(), Ld->getAlignment(), Ld->getAAInfo()); + + SDValue Res = DAG.getNode(ISD::BITCAST, dl, MVT::i64, LoadRes); + Results.push_back(Res); + Results.push_back(LoadRes.getValue(1)); + return; + } } } diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index bbc91a493c9d..4e46709cfc09 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -89,6 +89,20 @@ namespace llvm { return MVT::i32; } + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + unsigned + getExceptionPointerRegister(const Constant *PersonalityFn) const override { + return SP::I0; + } + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + unsigned + getExceptionSelectorRegister(const Constant *PersonalityFn) const override { + return SP::I1; + } + /// getSetCCResultType - Return the ISD::SETCC ValueType EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; @@ -167,8 +181,8 @@ namespace llvm { } void ReplaceNodeResults(SDNode *N, - SmallVectorImpl& Results, - SelectionDAG &DAG) const override; + SmallVectorImpl& Results, + SelectionDAG &DAG) const override; MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB, unsigned BROpcode) const; diff --git a/lib/Target/Sparc/SparcInstrAliases.td b/lib/Target/Sparc/SparcInstrAliases.td index 25cc652dbd9e..d51e2ccc8a35 100644 --- a/lib/Target/Sparc/SparcInstrAliases.td +++ b/lib/Target/Sparc/SparcInstrAliases.td @@ -250,6 +250,7 @@ defm : int_cond_alias<"n", 0b0000>; defm : int_cond_alias<"ne", 0b1001>; defm : int_cond_alias<"nz", 0b1001>; // same as ne defm : int_cond_alias<"e", 0b0001>; +defm : int_cond_alias<"eq", 0b0001>; // same as e defm : int_cond_alias<"z", 0b0001>; // same as e defm : int_cond_alias<"g", 0b1010>; defm : int_cond_alias<"le", 0b0010>; @@ -429,6 +430,9 @@ def : InstAlias<"wr $simm13, %tbr", (WRTBRri G0, i32imm:$simm13), 0>; def : InstAlias<"flush", (FLUSH), 0>; +def : MnemonicAlias<"lduw", "ld">, Requires<[HasV9]>; +def : MnemonicAlias<"lduwa", "lda">, Requires<[HasV9]>; + def : MnemonicAlias<"return", "rett">, Requires<[HasV9]>; def : MnemonicAlias<"addc", "addx">, Requires<[HasV9]>; @@ -450,3 +454,8 @@ def : InstAlias<"fcmpeq $rs1, $rs2", (V9FCMPEQ FCC0, QFPRegs:$rs1, QFPRegs:$rs2)>, Requires<[HasHardQuad]>; +// signx rd -> sra rd, %g0, rd +def : InstAlias<"signx $rd", (SRArr IntRegs:$rd, IntRegs:$rd, G0), 0>, Requires<[HasV9]>; + +// signx reg, rd -> sra reg, %g0, rd +def : InstAlias<"signx $rs1, $rd", (SRArr IntRegs:$rd, IntRegs:$rs1, G0), 0>, Requires<[HasV9]>; diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 6167c532db80..733027a5d2be 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -284,7 +284,9 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned numSubRegs = 0; unsigned movOpc = 0; const unsigned *subRegIdx = nullptr; + bool ExtraG0 = false; + const unsigned DW_SubRegsIdx[] = { SP::sub_even, SP::sub_odd }; const unsigned DFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd }; const unsigned QFP_DFP_SubRegsIdx[] = { SP::sub_even64, SP::sub_odd64 }; const unsigned QFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd, @@ -294,7 +296,12 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (SP::IntRegsRegClass.contains(DestReg, SrcReg)) BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0) .addReg(SrcReg, getKillRegState(KillSrc)); - else if (SP::FPRegsRegClass.contains(DestReg, SrcReg)) + else if (SP::IntPairRegClass.contains(DestReg, SrcReg)) { + subRegIdx = DW_SubRegsIdx; + numSubRegs = 2; + movOpc = SP::ORrr; + ExtraG0 = true; + } else if (SP::FPRegsRegClass.contains(DestReg, SrcReg)) BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) { @@ -347,7 +354,11 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]); assert(Dst && Src && "Bad sub-register"); - MovMI = BuildMI(MBB, I, DL, get(movOpc), Dst).addReg(Src); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(movOpc), Dst); + if (ExtraG0) + MIB.addReg(SP::G0); + MIB.addReg(Src); + MovMI = MIB.getInstr(); } // Add implicit super-register defs and kills to the last MovMI. MovMI->addRegisterDefined(DestReg, TRI); @@ -365,19 +376,20 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction *MF = MBB.getParent(); const MachineFrameInfo &MFI = *MF->getFrameInfo(); - MachineMemOperand *MMO = - MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOStore, - MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); // On the order of operands here: think "[FrameIdx + 0] = SrcReg". - if (RC == &SP::I64RegsRegClass) + if (RC == &SP::I64RegsRegClass) BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &SP::IntRegsRegClass) BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + else if (RC == &SP::IntPairRegClass) + BuildMI(MBB, I, DL, get(SP::STDri)).addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &SP::FPRegsRegClass) BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); @@ -403,11 +415,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction *MF = MBB.getParent(); const MachineFrameInfo &MFI = *MF->getFrameInfo(); - MachineMemOperand *MMO = - MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOLoad, - MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); if (RC == &SP::I64RegsRegClass) BuildMI(MBB, I, DL, get(SP::LDXri), DestReg).addFrameIndex(FI).addImm(0) @@ -415,6 +425,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, else if (RC == &SP::IntRegsRegClass) BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0) .addMemOperand(MMO); + else if (RC == &SP::IntPairRegClass) + BuildMI(MBB, I, DL, get(SP::LDDri), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); else if (RC == &SP::FPRegsRegClass) BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0) .addMemOperand(MMO); diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 3b9e048ea8b3..ec37c22a5b33 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -283,17 +283,32 @@ multiclass Load Op3Val, SDPatternOperator OpNode, [(set Ty:$dst, (OpNode ADDRri:$addr))]>; } +// TODO: Instructions of the LoadASI class are currently asm only; hooking up +// CodeGen's address spaces to use these is a future task. +class LoadASI Op3Val, SDPatternOperator OpNode, + RegisterClass RC, ValueType Ty> : + F3_1_asi<3, Op3Val, (outs RC:$dst), (ins MEMrr:$addr, i8imm:$asi), + !strconcat(OpcStr, "a [$addr] $asi, $dst"), + []>; + // LoadA multiclass - As above, but also define alternate address space variant multiclass LoadA Op3Val, bits<6> LoadAOp3Val, SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> : Load { - // TODO: The LD*Arr instructions are currently asm only; hooking up - // CodeGen's address spaces to use these is a future task. - def Arr : F3_1_asi<3, LoadAOp3Val, (outs RC:$dst), (ins MEMrr:$addr, i8imm:$asi), - !strconcat(OpcStr, "a [$addr] $asi, $dst"), - []>; + def Arr : LoadASI; } +// The LDSTUB instruction is supported for asm only. +// It is unlikely that general-purpose code could make use of it. +// CAS is preferred for sparc v9. +def LDSTUBrr : F3_1<3, 0b001101, (outs IntRegs:$dst), (ins MEMrr:$addr), + "ldstub [$addr], $dst", []>; +def LDSTUBri : F3_2<3, 0b001101, (outs IntRegs:$dst), (ins MEMri:$addr), + "ldstub [$addr], $dst", []>; +def LDSTUBArr : F3_1_asi<3, 0b011101, (outs IntRegs:$dst), + (ins MEMrr:$addr, i8imm:$asi), + "ldstuba [$addr] $asi, $dst", []>; + // Store multiclass - Define both Reg+Reg/Reg+Imm patterns in one shot. multiclass Store Op3Val, SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> { @@ -307,14 +322,18 @@ multiclass Store Op3Val, SDPatternOperator OpNode, [(OpNode Ty:$rd, ADDRri:$addr)]>; } +// TODO: Instructions of the StoreASI class are currently asm only; hooking up +// CodeGen's address spaces to use these is a future task. +class StoreASI Op3Val, + SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> : + F3_1_asi<3, Op3Val, (outs), (ins MEMrr:$addr, RC:$rd, i8imm:$asi), + !strconcat(OpcStr, "a $rd, [$addr] $asi"), + []>; + multiclass StoreA Op3Val, bits<6> StoreAOp3Val, SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> : Store { - // TODO: The ST*Arr instructions are currently asm only; hooking up - // CodeGen's address spaces to use these is a future task. - def Arr : F3_1_asi<3, StoreAOp3Val, (outs), (ins MEMrr:$addr, RC:$rd, i8imm:$asi), - !strconcat(OpcStr, "a $rd, [$addr] $asi"), - []>; + def Arr : StoreASI; } //===----------------------------------------------------------------------===// @@ -408,15 +427,40 @@ let DecoderMethod = "DecodeLoadInt" in { defm LD : LoadA<"ld", 0b000000, 0b010000, load, IntRegs, i32>; } +let DecoderMethod = "DecodeLoadIntPair" in + defm LDD : LoadA<"ldd", 0b000011, 0b010011, load, IntPair, v2i32>; + // Section B.2 - Load Floating-point Instructions, p. 92 -let DecoderMethod = "DecodeLoadFP" in - defm LDF : Load<"ld", 0b100000, load, FPRegs, f32>; -let DecoderMethod = "DecodeLoadDFP" in - defm LDDF : Load<"ldd", 0b100011, load, DFPRegs, f64>; +let DecoderMethod = "DecodeLoadFP" in { + defm LDF : Load<"ld", 0b100000, load, FPRegs, f32>; + def LDFArr : LoadASI<"ld", 0b110000, load, FPRegs, f32>, + Requires<[HasV9]>; +} +let DecoderMethod = "DecodeLoadDFP" in { + defm LDDF : Load<"ldd", 0b100011, load, DFPRegs, f64>; + def LDDFArr : LoadASI<"ldd", 0b110011, load, DFPRegs, f64>, + Requires<[HasV9]>; +} let DecoderMethod = "DecodeLoadQFP" in - defm LDQF : Load<"ldq", 0b100010, load, QFPRegs, f128>, + defm LDQF : LoadA<"ldq", 0b100010, 0b110010, load, QFPRegs, f128>, Requires<[HasV9, HasHardQuad]>; +let DecoderMethod = "DecodeLoadFP" in + let Defs = [FSR] in { + let rd = 0 in { + def LDFSRrr : F3_1<3, 0b100001, (outs), (ins MEMrr:$addr), + "ld [$addr], %fsr", []>; + def LDFSRri : F3_2<3, 0b100001, (outs), (ins MEMri:$addr), + "ld [$addr], %fsr", []>; + } + let rd = 1 in { + def LDXFSRrr : F3_1<3, 0b100001, (outs), (ins MEMrr:$addr), + "ldx [$addr], %fsr", []>, Requires<[HasV9]>; + def LDXFSRri : F3_2<3, 0b100001, (outs), (ins MEMri:$addr), + "ldx [$addr], %fsr", []>, Requires<[HasV9]>; + } + } + // Section B.4 - Store Integer Instructions, p. 95 let DecoderMethod = "DecodeStoreInt" in { defm STB : StoreA<"stb", 0b000101, 0b010101, truncstorei8, IntRegs, i32>; @@ -424,15 +468,40 @@ let DecoderMethod = "DecodeStoreInt" in { defm ST : StoreA<"st", 0b000100, 0b010100, store, IntRegs, i32>; } +let DecoderMethod = "DecodeStoreIntPair" in + defm STD : StoreA<"std", 0b000111, 0b010111, store, IntPair, v2i32>; + // Section B.5 - Store Floating-point Instructions, p. 97 -let DecoderMethod = "DecodeStoreFP" in +let DecoderMethod = "DecodeStoreFP" in { defm STF : Store<"st", 0b100100, store, FPRegs, f32>; -let DecoderMethod = "DecodeStoreDFP" in - defm STDF : Store<"std", 0b100111, store, DFPRegs, f64>; + def STFArr : StoreASI<"st", 0b110100, store, FPRegs, f32>, + Requires<[HasV9]>; +} +let DecoderMethod = "DecodeStoreDFP" in { + defm STDF : Store<"std", 0b100111, store, DFPRegs, f64>; + def STDFArr : StoreASI<"std", 0b110111, store, DFPRegs, f64>, + Requires<[HasV9]>; +} let DecoderMethod = "DecodeStoreQFP" in - defm STQF : Store<"stq", 0b100110, store, QFPRegs, f128>, + defm STQF : StoreA<"stq", 0b100110, 0b110110, store, QFPRegs, f128>, Requires<[HasV9, HasHardQuad]>; +let DecoderMethod = "DecodeStoreFP" in + let Defs = [FSR] in { + let rd = 0 in { + def STFSRrr : F3_1<3, 0b100101, (outs MEMrr:$addr), (ins), + "st %fsr, [$addr]", []>; + def STFSRri : F3_2<3, 0b100101, (outs MEMri:$addr), (ins), + "st %fsr, [$addr]", []>; + } + let rd = 1 in { + def STXFSRrr : F3_1<3, 0b100101, (outs MEMrr:$addr), (ins), + "stx %fsr, [$addr]", []>, Requires<[HasV9]>; + def STXFSRri : F3_2<3, 0b100101, (outs MEMri:$addr), (ins), + "stx %fsr, [$addr]", []>, Requires<[HasV9]>; + } + } + // Section B.8 - SWAP Register with Memory Instruction // (Atomic swap) let Constraints = "$val = $dst", DecoderMethod = "DecodeSWAP" in { @@ -559,6 +628,10 @@ let Defs = [Y, ICC] in { defm SMULCC : F3_12np<"smulcc", 0b011011>; } +let Defs = [Y, ICC], Uses = [Y, ICC] in { + defm MULSCC : F3_12np<"mulscc", 0b100100>; +} + // Section B.19 - Divide Instructions, p. 115 let Uses = [Y], Defs = [Y] in { defm UDIV : F3_12np<"udiv", 0b001110>; @@ -1221,8 +1294,8 @@ let Predicates = [HasV9] in { // the top 32-bits before using it. To do this clearing, we use a SRLri X,0. let rs1 = 0 in def POPCrr : F3_1<2, 0b101110, - (outs IntRegs:$dst), (ins IntRegs:$src), - "popc $src, $dst", []>, Requires<[HasV9]>; + (outs IntRegs:$rd), (ins IntRegs:$rs2), + "popc $rs2, $rd", []>, Requires<[HasV9]>; def : Pat<(ctpop i32:$src), (POPCrr (SRLri $src, 0))>; @@ -1254,6 +1327,25 @@ let hasSideEffects = 1 in { } } + +// Section A.43 - Read Privileged Register Instructions +let Predicates = [HasV9] in { +let rs2 = 0 in + def RDPR : F3_1<2, 0b101010, + (outs IntRegs:$rd), (ins PRRegs:$rs1), + "rdpr $rs1, $rd", []>; +} + +// Section A.62 - Write Privileged Register Instructions +let Predicates = [HasV9] in { + def WRPRrr : F3_1<2, 0b110010, + (outs PRRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs2), + "wrpr $rs1, $rs2, $rd", []>; + def WRPRri : F3_2<2, 0b110010, + (outs PRRegs:$rd), (ins IntRegs:$rs1, simm13Op:$simm13), + "wrpr $rs1, $simm13, $rd", []>; +} + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// @@ -1327,6 +1419,18 @@ def : Pat<(i32 (atomic_load ADDRri:$src)), (LDri ADDRri:$src)>; def : Pat<(atomic_store ADDRrr:$dst, i32:$val), (STrr ADDRrr:$dst, $val)>; def : Pat<(atomic_store ADDRri:$dst, i32:$val), (STri ADDRri:$dst, $val)>; +// extract_vector +def : Pat<(extractelt (v2i32 IntPair:$Rn), 0), + (i32 (EXTRACT_SUBREG IntPair:$Rn, sub_even))>; +def : Pat<(extractelt (v2i32 IntPair:$Rn), 1), + (i32 (EXTRACT_SUBREG IntPair:$Rn, sub_odd))>; + +// build_vector +def : Pat<(build_vector (i32 IntRegs:$a1), (i32 IntRegs:$a2)), + (INSERT_SUBREG + (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (i32 IntRegs:$a1), sub_even), + (i32 IntRegs:$a2), sub_odd)>; + include "SparcInstr64Bit.td" include "SparcInstrVIS.td" diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 9667bc059f18..da31783ba248 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -75,6 +75,18 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(SP::G6); Reserved.set(SP::G7); + // Also reserve the register pair aliases covering the above + // registers, with the same conditions. + Reserved.set(SP::G0_G1); + if (ReserveAppRegisters) + Reserved.set(SP::G2_G3); + if (ReserveAppRegisters || !Subtarget.is64Bit()) + Reserved.set(SP::G4_G5); + + Reserved.set(SP::O6_O7); + Reserved.set(SP::I6_I7); + Reserved.set(SP::G6_G7); + // Unaliased double registers are not available in non-V9 targets. if (!Subtarget.isV9()) { for (unsigned n = 0; n != 16; ++n) { @@ -158,21 +170,15 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; DebugLoc dl = MI.getDebugLoc(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); - - // Addressable stack objects are accessed using neg. offsets from %fp MachineFunction &MF = *MI.getParent()->getParent(); const SparcSubtarget &Subtarget = MF.getSubtarget(); - int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MI.getOperand(FIOperandNum + 1).getImm() + - Subtarget.getStackPointerBias(); - SparcMachineFunctionInfo *FuncInfo = MF.getInfo(); - unsigned FramePtr = SP::I6; - if (FuncInfo->isLeafProc()) { - // Use %sp and adjust offset if needed. - FramePtr = SP::O6; - int stackSize = MF.getFrameInfo()->getStackSize(); - Offset += (stackSize) ? Subtarget.getAdjustedFrameSize(stackSize) : 0 ; - } + const SparcFrameLowering *TFI = getFrameLowering(MF); + + unsigned FrameReg; + int Offset; + Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg); + + Offset += MI.getOperand(FIOperandNum + 1).getImm(); if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) { if (MI.getOpcode() == SP::STQFri) { @@ -182,8 +188,8 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned SrcOddReg = getSubReg(SrcReg, SP::sub_odd64); MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri)) - .addReg(FramePtr).addImm(0).addReg(SrcEvenReg); - replaceFI(MF, II, *StMI, dl, 0, Offset, FramePtr); + .addReg(FrameReg).addImm(0).addReg(SrcEvenReg); + replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg); MI.setDesc(TII.get(SP::STDFri)); MI.getOperand(2).setReg(SrcOddReg); Offset += 8; @@ -194,8 +200,8 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned DestOddReg = getSubReg(DestReg, SP::sub_odd64); MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg) - .addReg(FramePtr).addImm(0); - replaceFI(MF, II, *StMI, dl, 1, Offset, FramePtr); + .addReg(FrameReg).addImm(0); + replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg); MI.setDesc(TII.get(SP::LDDFri)); MI.getOperand(0).setReg(DestOddReg); @@ -203,7 +209,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } - replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FramePtr); + replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg); } @@ -211,3 +217,25 @@ unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return SP::I6; } +// Sparc has no architectural need for stack realignment support, +// except that LLVM unfortunately currently implements overaligned +// stack objects by depending upon stack realignment support. +// If that ever changes, this can probably be deleted. +bool SparcRegisterInfo::canRealignStack(const MachineFunction &MF) const { + if (!TargetRegisterInfo::canRealignStack(MF)) + return false; + + // Sparc always has a fixed frame pointer register, so don't need to + // worry about needing to reserve it. [even if we don't have a frame + // pointer for our frame, it still cannot be used for other things, + // or register window traps will be SADNESS.] + + // If there's a reserved call frame, we can use SP to access locals. + if (getFrameLowering(MF)->hasReservedCallFrame(MF)) + return true; + + // Otherwise, we'd need a base pointer, but those aren't implemented + // for SPARC at the moment. + + return false; +} diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 764a894fe9a3..32075b1df410 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -42,8 +42,10 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS = nullptr) const; - // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const override; + + bool canRealignStack(const MachineFunction &MF) const override; + }; } // end namespace llvm diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td index db8a7e86962d..cca9463562a4 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.td +++ b/lib/Target/Sparc/SparcRegisterInfo.td @@ -32,6 +32,12 @@ def sub_odd64 : SubRegIndex<64, 64>; // Ri - 32-bit integer registers class Ri Enc, string n> : SparcReg; +// Rdi - pairs of 32-bit integer registers +class Rdi Enc, string n, list subregs> : SparcReg { + let SubRegs = subregs; + let SubRegIndices = [sub_even, sub_odd]; + let CoveredBySubRegs = 1; +} // Rf - 32-bit floating-point registers class Rf Enc, string n> : SparcReg; @@ -54,6 +60,8 @@ def ICC : SparcCtrlReg<0, "ICC">; // This represents icc and xcc in 64-bit code. foreach I = 0-3 in def FCC#I : SparcCtrlReg; +def FSR : SparcCtrlReg<0, "FSR">; // Floating-point state register. + // Y register def Y : SparcCtrlReg<0, "Y">, DwarfRegNum<[64]>; // Ancillary state registers (implementation defined) @@ -94,6 +102,22 @@ def PSR : SparcCtrlReg<0, "PSR">; def WIM : SparcCtrlReg<0, "WIM">; def TBR : SparcCtrlReg<0, "TBR">; +def TPC : SparcCtrlReg<0, "TPC">; +def TNPC : SparcCtrlReg<1, "TNPC">; +def TSTATE : SparcCtrlReg<2, "TSTATE">; +def TT : SparcCtrlReg<3, "TT">; +def TICK : SparcCtrlReg<4, "TICK">; +def TBA : SparcCtrlReg<5, "TBA">; +def PSTATE : SparcCtrlReg<6, "PSTATE">; +def TL : SparcCtrlReg<7, "TL">; +def PIL : SparcCtrlReg<8, "PIL">; +def CWP : SparcCtrlReg<9, "CWP">; +def CANSAVE : SparcCtrlReg<10, "CANSAVE">; +def CANRESTORE : SparcCtrlReg<11, "CANRESTORE">; +def CLEANWIN : SparcCtrlReg<12, "CLEANWIN">; +def OTHERWIN : SparcCtrlReg<13, "OTHERWIN">; +def WSTATE : SparcCtrlReg<14, "WSTATE">; + // Integer registers def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>; def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>; @@ -217,6 +241,24 @@ def Q13 : Rq<21, "F52", [D26, D27]>; def Q14 : Rq<25, "F56", [D28, D29]>; def Q15 : Rq<29, "F60", [D30, D31]>; +// Aliases of the integer registers used for LDD/STD double-word operations +def G0_G1 : Rdi<0, "G0", [G0, G1]>; +def G2_G3 : Rdi<2, "G2", [G2, G3]>; +def G4_G5 : Rdi<4, "G4", [G4, G5]>; +def G6_G7 : Rdi<6, "G6", [G6, G7]>; +def O0_O1 : Rdi<8, "O0", [O0, O1]>; +def O2_O3 : Rdi<10, "O2", [O2, O3]>; +def O4_O5 : Rdi<12, "O4", [O4, O5]>; +def O6_O7 : Rdi<14, "O6", [O6, O7]>; +def L0_L1 : Rdi<16, "L0", [L0, L1]>; +def L2_L3 : Rdi<18, "L2", [L2, L3]>; +def L4_L5 : Rdi<20, "L4", [L4, L5]>; +def L6_L7 : Rdi<22, "L6", [L6, L7]>; +def I0_I1 : Rdi<24, "I0", [I0, I1]>; +def I2_I3 : Rdi<26, "I2", [I2, I3]>; +def I4_I5 : Rdi<28, "I4", [I4, I5]>; +def I6_I7 : Rdi<30, "I6", [I6, I7]>; + // Register classes. // // FIXME: the register order should be defined in terms of the preferred @@ -231,6 +273,13 @@ def IntRegs : RegisterClass<"SP", [i32, i64], 32, (sequence "L%u", 0, 7), (sequence "O%u", 0, 7))>; +// Should be in the same order as IntRegs. +def IntPair : RegisterClass<"SP", [v2i32], 64, + (add I0_I1, I2_I3, I4_I5, I6_I7, + G0_G1, G2_G3, G4_G5, G6_G7, + L0_L1, L2_L3, L4_L5, L6_L7, + O0_O1, O2_O3, O4_O5, O6_O7)>; + // Register class for 64-bit mode, with a 64-bit spill slot size. // These are the same as the 32-bit registers, so TableGen will consider this // to be a sub-class of IntRegs. That works out because requiring a 64-bit @@ -252,3 +301,8 @@ def ASRRegs : RegisterClass<"SP", [i32], 32, (add Y, (sequence "ASR%u", 1, 31))> { let isAllocatable = 0; } + +// Privileged Registers +def PRRegs : RegisterClass<"SP", [i64], 64, + (add TPC, TNPC, TSTATE, TT, TICK, TBA, PSTATE, TL, PIL, CWP, + CANSAVE, CANRESTORE, CLEANWIN, OTHERWIN, WSTATE)>; diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp index d69da409e428..d701594d27af 100644 --- a/lib/Target/Sparc/SparcSubtarget.cpp +++ b/lib/Target/Sparc/SparcSubtarget.cpp @@ -64,7 +64,7 @@ int SparcSubtarget::getAdjustedFrameSize(int frameSize) const { frameSize += 128; // Frames with calls must also reserve space for 6 outgoing arguments // whether they are used or not. LowerCall_64 takes care of that. - assert(frameSize % 16 == 0 && "Stack size not 16-byte aligned"); + frameSize = RoundUpToAlignment(frameSize, 16); } else { // Emit the correct save instruction based on the number of bytes in // the frame. Minimum stack frame size according to V8 ABI is: @@ -81,3 +81,7 @@ int SparcSubtarget::getAdjustedFrameSize(int frameSize) const { } return frameSize; } + +bool SparcSubtarget::enableMachineScheduler() const { + return true; +} diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h index 9d21911d88f0..e2fd2f04528a 100644 --- a/lib/Target/Sparc/SparcSubtarget.h +++ b/lib/Target/Sparc/SparcSubtarget.h @@ -60,6 +60,8 @@ public: return &TSInfo; } + bool enableMachineScheduler() const override; + bool isV9() const { return IsV9; } bool isVIS() const { return IsVIS; } bool isVIS2() const { return IsVIS2; } @@ -85,7 +87,6 @@ public: /// returns adjusted framesize which includes space for register window /// spills and arguments. int getAdjustedFrameSize(int stackSize) const; - }; } // end namespace llvm diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 3aa4c6bd32d6..9c995bf42b0b 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -349,7 +349,6 @@ class SystemZAsmParser : public MCTargetAsmParser { #include "SystemZGenAsmMatcher.inc" private: - MCSubtargetInfo &STI; MCAsmParser &Parser; enum RegisterGroup { RegGR, @@ -386,14 +385,14 @@ private: bool parseOperand(OperandVector &Operands, StringRef Mnemonic); public: - SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser, + SystemZAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(sti), Parser(parser) { + : MCTargetAsmParser(Options, sti), Parser(parser) { MCAsmParserExtension::Initialize(Parser); // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); } // Override MCTargetAsmParser. @@ -533,14 +532,16 @@ bool SystemZAsmParser::parseRegister(Register &Reg) { } // Parse a register of group Group. If Regs is nonnull, use it to map -// the raw register number to LLVM numbering, with zero entries indicating -// an invalid register. IsAddress says whether the register appears in an -// address context. +// the raw register number to LLVM numbering, with zero entries +// indicating an invalid register. IsAddress says whether the +// register appears in an address context. Allow FP Group if expecting +// RegV Group, since the f-prefix yields the FP group even while used +// with vector instructions. bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group, const unsigned *Regs, bool IsAddress) { if (parseRegister(Reg)) return true; - if (Reg.Group != Group) + if (Reg.Group != Group && !(Reg.Group == RegFP && Group == RegV)) return Error(Reg.StartLoc, "invalid operand for instruction"); if (Regs && Regs[Reg.Num] == 0) return Error(Reg.StartLoc, "invalid register pair"); @@ -791,7 +792,7 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, switch (MatchResult) { case Match_Success: Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst, STI); + Out.EmitInstruction(Inst, getSTI()); return false; case Match_MissingFeature: { diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp index 059ae3f7fb09..6444cf8e464d 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp @@ -60,15 +60,15 @@ void SystemZInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { O << '%' << getRegisterName(RegNo); } -template -void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { +template +static void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { int64_t Value = MI->getOperand(OpNum).getImm(); assert(isUInt(Value) && "Invalid uimm argument"); O << Value; } -template -void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { +template +static void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { int64_t Value = MI->getOperand(OpNum).getImm(); assert(isInt(Value) && "Invalid simm argument"); O << Value; diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h index ba55e686f3ef..7ca386fc4cb9 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h @@ -15,7 +15,6 @@ #define LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" -#include "llvm/Support/Compiler.h" namespace llvm { class MCOperand; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 5fefa315a4cf..2115d4480eef 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -226,7 +226,7 @@ extern "C" void LLVMInitializeSystemZTargetMC() { // Register the MCCodeEmitter. TargetRegistry::RegisterMCCodeEmitter(TheSystemZTarget, - createSystemZMCCodeEmitter); + createSystemZMCCodeEmitter); // Register the MCInstrInfo. TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget, diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index e089047d013e..cd367d60bab7 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -52,12 +52,6 @@ We don't use the TEST DATA CLASS instructions. -- -We could use the generic floating-point forms of LOAD COMPLEMENT, -LOAD NEGATIVE and LOAD POSITIVE in cases where we don't need the -condition codes. For example, we could use LCDFR instead of LCDBR. - --- - We only use MVC, XC and CLC for constant-length block operations. We could extend them to variable-length operations too, using EXECUTE RELATIVE LONG. diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 3dca7bd89f05..75273114d62f 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -288,7 +288,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { MCSymbolRefExpr::create(getSymbol(ZCPV->getGlobalValue()), getModifierVariantKind(ZCPV->getModifier()), OutContext); - uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType()); + uint64_t Size = getDataLayout().getTypeAllocSize(ZCPV->getType()); OutStreamer->EmitValue(Expr, Size); } diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp index 44ea1d25f08e..4a6beb67f182 100644 --- a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp +++ b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp @@ -26,21 +26,6 @@ SystemZConstantPoolValue::Create(const GlobalValue *GV, return new SystemZConstantPoolValue(GV, Modifier); } -unsigned SystemZConstantPoolValue::getRelocationInfo() const { - switch (Modifier) { - case SystemZCP::TLSGD: - case SystemZCP::TLSLDM: - case SystemZCP::DTPOFF: - // May require a dynamic relocation. - return 2; - case SystemZCP::NTPOFF: - // May require a relocation, but the relocations are always resolved - // by the static linker. - return 1; - } - llvm_unreachable("Unknown modifier"); -} - int SystemZConstantPoolValue:: getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { unsigned AlignMask = Alignment - 1; diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.h b/lib/Target/SystemZ/SystemZConstantPoolValue.h index e5f1bb18581b..a71b595560d2 100644 --- a/lib/Target/SystemZ/SystemZConstantPoolValue.h +++ b/lib/Target/SystemZ/SystemZConstantPoolValue.h @@ -43,7 +43,6 @@ public: Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier); // Override MachineConstantPoolValue. - unsigned getRelocationInfo() const override; int getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) override; void addSelectionDAGCSEId(FoldingSetNodeID &ID) override; diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp index 16f9adc79f17..4818ed015522 100644 --- a/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -37,13 +37,11 @@ namespace { // instructions. struct Reference { Reference() - : Def(false), Use(false), IndirectDef(false), IndirectUse(false) {} + : Def(false), Use(false) {} Reference &operator|=(const Reference &Other) { Def |= Other.Def; - IndirectDef |= Other.IndirectDef; Use |= Other.Use; - IndirectUse |= Other.IndirectUse; return *this; } @@ -53,11 +51,6 @@ struct Reference { // via a sub- or super-register. bool Def; bool Use; - - // True if the register is defined or used indirectly, by a sub- or - // super-register. - bool IndirectDef; - bool IndirectUse; }; class SystemZElimCompare : public MachineFunctionPass { @@ -104,14 +97,12 @@ static bool isCCLiveOut(MachineBasicBlock &MBB) { return false; } -// Return true if any CC result of MI would reflect the value of subreg -// SubReg of Reg. -static bool resultTests(MachineInstr *MI, unsigned Reg, unsigned SubReg) { +// Return true if any CC result of MI would reflect the value of Reg. +static bool resultTests(MachineInstr *MI, unsigned Reg) { if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && - MI->getOperand(0).getReg() == Reg && - MI->getOperand(0).getSubReg() == SubReg) + MI->getOperand(0).getReg() == Reg) return true; switch (MI->getOpcode()) { @@ -127,30 +118,25 @@ static bool resultTests(MachineInstr *MI, unsigned Reg, unsigned SubReg) { case SystemZ::LTEBR: case SystemZ::LTDBR: case SystemZ::LTXBR: - if (MI->getOperand(1).getReg() == Reg && - MI->getOperand(1).getSubReg() == SubReg) + if (MI->getOperand(1).getReg() == Reg) return true; } return false; } -// Describe the references to Reg in MI, including sub- and super-registers. +// Describe the references to Reg or any of its aliases in MI. Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) { Reference Ref; for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI->getOperand(I); if (MO.isReg()) { if (unsigned MOReg = MO.getReg()) { - if (MOReg == Reg || TRI->regsOverlap(MOReg, Reg)) { - if (MO.isUse()) { + if (TRI->regsOverlap(MOReg, Reg)) { + if (MO.isUse()) Ref.Use = true; - Ref.IndirectUse |= (MOReg != Reg); - } - if (MO.isDef()) { + else if (MO.isDef()) Ref.Def = true; - Ref.IndirectDef |= (MOReg != Reg); - } } } } @@ -158,6 +144,30 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) { return Ref; } +// Return true if this is a load and test which can be optimized the +// same way as compare instruction. +static bool isLoadAndTestAsCmp(MachineInstr *MI) { + // If we during isel used a load-and-test as a compare with 0, the + // def operand is dead. + return ((MI->getOpcode() == SystemZ::LTEBR || + MI->getOpcode() == SystemZ::LTDBR || + MI->getOpcode() == SystemZ::LTXBR) && + MI->getOperand(0).isDead()); +} + +// Return the source register of Compare, which is the unknown value +// being tested. +static unsigned getCompareSourceReg(MachineInstr *Compare) { + unsigned reg = 0; + if (Compare->isCompare()) + reg = Compare->getOperand(0).getReg(); + else if (isLoadAndTestAsCmp(Compare)) + reg = Compare->getOperand(1).getReg(); + assert (reg); + + return reg; +} + // Compare compares the result of MI against zero. If MI is an addition // of -1 and if CCUsers is a single branch on nonzero, eliminate the addition // and convert the branch to a BRCT(G). Return true on success. @@ -188,7 +198,7 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare, // We already know that there are no references to the register between // MI and Compare. Make sure that there are also no references between // Compare and Branch. - unsigned SrcReg = Compare->getOperand(0).getReg(); + unsigned SrcReg = getCompareSourceReg(Compare); MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; for (++MBBI; MBBI != MBBE; ++MBBI) if (getRegReferences(MBBI, SrcReg)) @@ -196,16 +206,15 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare, // The transformation is OK. Rebuild Branch as a BRCT(G). MachineOperand Target(Branch->getOperand(2)); - Branch->RemoveOperand(2); - Branch->RemoveOperand(1); - Branch->RemoveOperand(0); + while (Branch->getNumOperands()) + Branch->RemoveOperand(0); Branch->setDesc(TII->get(BRCT)); MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) .addOperand(Target) .addReg(SystemZ::CC, RegState::ImplicitDefine); - MI->removeFromParent(); + MI->eraseFromParent(); return true; } @@ -308,6 +317,10 @@ static bool isCompareZero(MachineInstr *Compare) { return true; default: + + if (isLoadAndTestAsCmp(Compare)) + return true; + return (Compare->getNumExplicitOperands() == 2 && Compare->getOperand(1).isImm() && Compare->getOperand(1).getImm() == 0); @@ -325,8 +338,7 @@ optimizeCompareZero(MachineInstr *Compare, return false; // Search back for CC results that are based on the first operand. - unsigned SrcReg = Compare->getOperand(0).getReg(); - unsigned SrcSubReg = Compare->getOperand(0).getSubReg(); + unsigned SrcReg = getCompareSourceReg(Compare); MachineBasicBlock &MBB = *Compare->getParent(); MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB.begin(); Reference CCRefs; @@ -334,7 +346,7 @@ optimizeCompareZero(MachineInstr *Compare, while (MBBI != MBBE) { --MBBI; MachineInstr *MI = MBBI; - if (resultTests(MI, SrcReg, SrcSubReg)) { + if (resultTests(MI, SrcReg)) { // Try to remove both MI and Compare by converting a branch to BRCT(G). // We don't care in this case whether CC is modified between MI and // Compare. @@ -435,23 +447,21 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { while (MBBI != MBB.begin()) { MachineInstr *MI = --MBBI; if (CompleteCCUsers && - MI->isCompare() && + (MI->isCompare() || isLoadAndTestAsCmp(MI)) && (optimizeCompareZero(MI, CCUsers) || fuseCompareAndBranch(MI, CCUsers))) { ++MBBI; - MI->removeFromParent(); + MI->eraseFromParent(); Changed = true; CCUsers.clear(); - CompleteCCUsers = true; continue; } - Reference CCRefs(getRegReferences(MI, SystemZ::CC)); - if (CCRefs.Def) { + if (MI->definesRegister(SystemZ::CC)) { CCUsers.clear(); - CompleteCCUsers = !CCRefs.IndirectDef; + CompleteCCUsers = true; } - if (CompleteCCUsers && CCRefs.Use) + if (MI->readsRegister(SystemZ::CC) && CompleteCCUsers) CCUsers.push_back(MI); } return Changed; diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index 397de472a6ee..e1b20d0536d1 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -48,7 +48,8 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = { SystemZFrameLowering::SystemZFrameLowering() : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, - -SystemZMC::CallFrameSize, 8) { + -SystemZMC::CallFrameSize, 8, + false /* StackRealignable */) { // Create a mapping from register number to save slot offset. RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I) @@ -133,7 +134,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); SystemZMachineFunctionInfo *ZFI = MF.getInfo(); bool IsVarArg = MF.getFunction()->isVarArg(); - DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + DebugLoc DL; // Scan the call-saved GPRs and find the bounds of the register spill area. unsigned LowGPR = 0; @@ -322,7 +323,10 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF, const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); const std::vector &CSI = MFFrame->getCalleeSavedInfo(); bool HasFP = hasFP(MF); - DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc DL; // The current offset of the stack pointer from the CFA. int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP; @@ -394,7 +398,10 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF, // Add CFI for the this save. unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - int64_t Offset = getFrameIndexOffset(MF, Save.getFrameIdx()); + unsigned IgnoredFrameReg; + int64_t Offset = + getFrameIndexReference(MF, Save.getFrameIdx(), IgnoredFrameReg); + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, DwarfReg, SPOffsetFromCFA + Offset)); CFIIndexes.push_back(CFIIndex); @@ -455,9 +462,14 @@ bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const { MF.getInfo()->getManipulatesSP()); } -int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { +int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg) const { const MachineFrameInfo *MFFrame = MF.getFrameInfo(); + const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + + // Fill in FrameReg output argument. + FrameReg = RI->getFrameRegister(MF); // Start with the offset of FI from the top of the caller-allocated frame // (i.e. the top of the 160 bytes allocated by the caller). This initial diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h index 5ade757f17f7..46bb6b7a7573 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/lib/Target/SystemZ/SystemZFrameLowering.h @@ -43,7 +43,8 @@ public: void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool hasFP(const MachineFunction &MF) const override; - int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 75fd37f01a19..a9093094d884 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -585,7 +585,7 @@ bool SystemZDAGToDAGISel::selectAddress(SDValue Addr, static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) { if (N.getNode()->getNodeId() == -1 || N.getNode()->getNodeId() > Pos->getNodeId()) { - DAG->RepositionNode(Pos, N.getNode()); + DAG->RepositionNode(Pos->getIterator(), N.getNode()); N.getNode()->setNodeId(Pos->getNodeId()); } } @@ -801,7 +801,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { RxSBG.Input = N.getOperand(0); return true; } - + case ISD::ANY_EXTEND: // Bits above the extended operand are don't-care. RxSBG.Input = N.getOperand(0); @@ -818,7 +818,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { return true; } // Fall through. - + case ISD::SIGN_EXTEND: { // Check that the extension bits are don't-care (i.e. are masked out // by the final mask). @@ -938,7 +938,23 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { } return nullptr; } - } + } + + // If the RISBG operands require no rotation and just masks the bottom + // 8/16 bits, attempt to convert this to a LLC zero extension. + if (RISBG.Rotate == 0 && (RISBG.Mask == 0xff || RISBG.Mask == 0xffff)) { + unsigned OpCode = (RISBG.Mask == 0xff ? SystemZ::LLGCR : SystemZ::LLGHR); + if (VT == MVT::i32) { + if (Subtarget->hasHighWord()) + OpCode = (RISBG.Mask == 0xff ? SystemZ::LLCRMux : SystemZ::LLHRMux); + else + OpCode = (RISBG.Mask == 0xff ? SystemZ::LLCR : SystemZ::LLHR); + } + + SDValue In = convertTo(DL, VT, RISBG.Input); + N = CurDAG->getMachineNode(OpCode, DL, VT, In); + return convertTo(DL, VT, SDValue(N, 0)).getNode(); + } unsigned Opcode = SystemZ::RISBG; // Prefer RISBGN if available, since it does not clobber CC. diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 9a753c897519..ee732675fb39 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -84,8 +84,7 @@ static MachineOperand earlyUseOperand(MachineOperand Op) { SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { - auto &DL = *TM.getDataLayout(); - MVT PtrVT = getPointerTy(DL); + MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize()); // Set up the register classes. if (Subtarget.hasHighWord()) @@ -115,8 +114,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, computeRegisterProperties(Subtarget.getRegisterInfo()); // Set up special registers. - setExceptionPointerRegister(SystemZ::R6D); - setExceptionSelectorRegister(SystemZ::R7D); setStackPointerRegisterToSaveRestore(SystemZ::R15D); // TODO: It may be better to default to latency-oriented scheduling, however @@ -370,7 +367,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // No special instructions for these. setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FPOW, VT, Expand); } } @@ -776,9 +775,7 @@ bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, } bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { - if (!CI->isTailCall()) - return false; - return true; + return CI->isTailCall(); } // We do not yet support 128-bit single-element vector types. If the user @@ -939,8 +936,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL)); ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, false, 0); + MachinePointerInfo::getFixedStack(MF, FI), false, + false, false, 0); } // Convert the value of the argument register into the value that's @@ -976,9 +973,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, &SystemZ::FP64BitRegClass); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN, - MachinePointerInfo::getFixedStack(FI), + MachinePointerInfo::getFixedStack(MF, FI), false, false, 0); - } // Join the stores, which are independent of one another. Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, @@ -1060,9 +1056,9 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Store the argument in a stack slot and pass its address. SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); int FI = cast(SpillSlot)->getIndex(); - MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, SpillSlot, - MachinePointerInfo::getFixedStack(FI), - false, false, 0)); + MemOpChains.push_back(DAG.getStore( + Chain, DL, ArgValue, SpillSlot, + MachinePointerInfo::getFixedStack(MF, FI), false, false, 0)); ArgValue = SpillSlot; } else ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); @@ -1607,8 +1603,8 @@ static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { if (Value > Mask) return; - assert(C.ICmpType == SystemZICMP::Any && - "Signedness shouldn't matter here."); + // If the constant is in range, we can use any comparison. + C.ICmpType = SystemZICMP::Any; } else return; @@ -2439,7 +2435,8 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(), false, false, false, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + false, false, false, 0); } // If there was a non-zero offset that we didn't fold, create an explicit @@ -2499,7 +2496,9 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, } SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(Node, DAG); SDLoc DL(Node); const GlobalValue *GV = Node->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -2529,9 +2528,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); Offset = DAG.getConstantPool(CPV, PtrVT, 8); - Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), - Offset, MachinePointerInfo::getConstantPool(), - false, false, false, 0); + Offset = DAG.getLoad( + PtrVT, DL, DAG.getEntryNode(), Offset, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); // Call __tls_get_offset to retrieve the offset. Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); @@ -2544,9 +2544,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); Offset = DAG.getConstantPool(CPV, PtrVT, 8); - Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), - Offset, MachinePointerInfo::getConstantPool(), - false, false, false, 0); + Offset = DAG.getLoad( + PtrVT, DL, DAG.getEntryNode(), Offset, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); // Call __tls_get_offset to retrieve the module base offset. Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); @@ -2562,9 +2563,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8); - DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), - DTPOffset, MachinePointerInfo::getConstantPool(), - false, false, false, 0); + DTPOffset = DAG.getLoad( + PtrVT, DL, DAG.getEntryNode(), DTPOffset, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); break; @@ -2575,8 +2577,8 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_INDNTPOFF); Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); - Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), - Offset, MachinePointerInfo::getGOT(), + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset, + MachinePointerInfo::getGOT(DAG.getMachineFunction()), false, false, false, 0); break; } @@ -2587,9 +2589,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); Offset = DAG.getConstantPool(CPV, PtrVT, 8); - Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), - Offset, MachinePointerInfo::getConstantPool(), - false, false, false, 0); + Offset = DAG.getLoad( + PtrVT, DL, DAG.getEntryNode(), Offset, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, 0); break; } } @@ -2628,10 +2631,10 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, SDValue Result; if (CP->isMachineConstantPoolEntry()) Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, - CP->getAlignment()); + CP->getAlignment()); else Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, - CP->getAlignment(), CP->getOffset()); + CP->getAlignment(), CP->getOffset()); // Use LARL to load the address of the constant pool entry. return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); @@ -2736,17 +2739,37 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, SDValue SystemZTargetLowering:: lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + bool RealignOpt = !DAG.getMachineFunction().getFunction()-> + hasFnAttribute("no-realign-stack"); + SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); + SDValue Align = Op.getOperand(2); SDLoc DL(Op); + // If user has set the no alignment function attribute, ignore + // alloca alignments. + uint64_t AlignVal = (RealignOpt ? + dyn_cast(Align)->getZExtValue() : 0); + + uint64_t StackAlign = TFI->getStackAlignment(); + uint64_t RequiredAlign = std::max(AlignVal, StackAlign); + uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; + unsigned SPReg = getStackPointerRegisterToSaveRestore(); + SDValue NeededSpace = Size; // Get a reference to the stack pointer. SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); + // Add extra space for alignment if needed. + if (ExtraAlignSpace) + NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace, + DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); + // Get the new stack pointer value. - SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size); + SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); // Copy the new stack pointer back. Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); @@ -2757,6 +2780,16 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); + // Dynamically realign if needed. + if (RequiredAlign > StackAlign) { + Result = + DAG.getNode(ISD::ADD, DL, MVT::i64, Result, + DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); + Result = + DAG.getNode(ISD::AND, DL, MVT::i64, Result, + DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64)); + } + SDValue Ops[2] = { Result, Chain }; return DAG.getMergeValues(Ops, DL); } @@ -2837,7 +2870,7 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, } else if (DAG.ComputeNumSignBits(Op1) > 32) { Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); Opcode = SystemZISD::SDIVREM32; - } else + } else Opcode = SystemZISD::SDIVREM64; // DSG(F) takes a 64-bit dividend, so the even register in the GR128 @@ -3247,8 +3280,8 @@ SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, if (Op->getNumValues() == 1) return CC; assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); - return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), - Glued, CC); + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), Glued, + CC); } unsigned Id = cast(Op.getOperand(0))->getZExtValue(); @@ -3890,7 +3923,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, GS.addUndef(); } else { GS.add(SDValue(), ResidueOps.size()); - ResidueOps.push_back(Op); + ResidueOps.push_back(BVN->getOperand(I)); } } @@ -3901,7 +3934,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, // Create the BUILD_VECTOR for the remaining elements, if any. if (!ResidueOps.empty()) { while (ResidueOps.size() < NumElements) - ResidueOps.push_back(DAG.getUNDEF(VT.getVectorElementType())); + ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType())); for (auto &Op : GS.Ops) { if (!Op.getNode()) { Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps); @@ -4204,7 +4237,7 @@ SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, - unsigned UnpackHigh) const { + unsigned UnpackHigh) const { SDValue PackedOp = Op.getOperand(0); EVT OutVT = Op.getValueType(); EVT InVT = PackedOp.getValueType(); @@ -4566,9 +4599,9 @@ SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT, } return Op; } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || - Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || - Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && - canTreatAsByteVector(Op.getValueType()) && + Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || + Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && + canTreatAsByteVector(Op.getValueType()) && canTreatAsByteVector(Op.getOperand(0).getValueType())) { // Make sure that only the unextended bits are significant. EVT ExtVT = Op.getValueType(); @@ -4579,14 +4612,14 @@ SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT, unsigned SubByte = Byte % ExtBytesPerElement; unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement; if (SubByte < MinSubByte || - SubByte + BytesPerElement > ExtBytesPerElement) - break; + SubByte + BytesPerElement > ExtBytesPerElement) + break; // Get the byte offset of the unextended element Byte = Byte / ExtBytesPerElement * OpBytesPerElement; // ...then add the byte offset relative to that element. Byte += SubByte - MinSubByte; if (Byte % BytesPerElement != 0) - break; + break; Op = Op.getOperand(0); Index = Byte / BytesPerElement; Force = true; @@ -5611,6 +5644,31 @@ SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, return MBB; } +MachineBasicBlock * +SystemZTargetLowering::emitLoadAndTestCmp0(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode) const { + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo *MRI = &MF.getRegInfo(); + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); + DebugLoc DL = MI->getDebugLoc(); + + unsigned SrcReg = MI->getOperand(0).getReg(); + + // Create new virtual register of the same class as source. + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + unsigned DstReg = MRI->createVirtualRegister(RC); + + // Replace pseudo with a normal load-and-test that models the def as + // well. + BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) + .addReg(SrcReg); + MI->eraseFromParent(); + + return MBB; +} + MachineBasicBlock *SystemZTargetLowering:: EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { switch (MI->getOpcode()) { @@ -5858,6 +5916,13 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); case SystemZ::TBEGINC: return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); + case SystemZ::LTEBRCompare_VecPseudo: + return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR); + case SystemZ::LTDBRCompare_VecPseudo: + return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR); + case SystemZ::LTXBRCompare_VecPseudo: + return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR); + default: llvm_unreachable("Unexpected instr type to insert"); } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 07ff25144581..391636e5467f 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -409,6 +409,20 @@ public: return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + unsigned + getExceptionPointerRegister(const Constant *PersonalityFn) const override { + return SystemZ::R6D; + } + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + unsigned + getExceptionSelectorRegister(const Constant *PersonalityFn) const override { + return SystemZ::R7D; + } + MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const override; @@ -481,7 +495,7 @@ private: SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, - unsigned UnpackHigh) const; + unsigned UnpackHigh) const; SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const; SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp, @@ -530,6 +544,10 @@ private: MachineBasicBlock *MBB, unsigned Opcode, bool NoFloat) const; + MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode) const; + }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h index 464f79a3bac9..5a1c874dfa36 100644 --- a/lib/Target/SystemZ/SystemZInstrBuilder.h +++ b/lib/Target/SystemZ/SystemZInstrBuilder.h @@ -35,11 +35,9 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI) { if (MCID.mayStore()) Flags |= MachineMemOperand::MOStore; int64_t Offset = 0; - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo( - PseudoSourceValue::getFixedStack(FI), Offset), - Flags, MFFrame->getObjectSize(FI), - MFFrame->getObjectAlignment(FI)); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags, + MFFrame->getObjectSize(FI), MFFrame->getObjectAlignment(FI)); return MIB.addFrameIndex(FI).addImm(Offset).addReg(0).addMemOperand(MMO); } diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 27fbd7df2882..0cb267290cc1 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -46,15 +46,28 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>; defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>; } -// Note that the comparison against zero operation is not available if we -// have vector support, since load-and-test instructions will partially -// clobber the target (vector) register. +// Note that LTxBRCompare is not available if we have vector support, +// since load-and-test instructions will partially clobber the target +// (vector) register. let Predicates = [FeatureNoVector] in { defm : CompareZeroFP; defm : CompareZeroFP; defm : CompareZeroFP; } +// Use a normal load-and-test for compare against zero in case of +// vector support (via a pseudo to simplify instruction selection). +let Defs = [CC], usesCustomInserter = 1 in { + def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>; + def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>; + def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>; +} +let Predicates = [FeatureVector] in { + defm : CompareZeroFP; + defm : CompareZeroFP; + defm : CompareZeroFP; +} + // Moves between 64-bit integer and floating-point registers. def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>; def LDGR : UnaryRRE<"ldg", 0xB3C1, bitconvert, FP64, GR64>; @@ -238,26 +251,46 @@ let Predicates = [FeatureFPExtension] in { // Unary arithmetic //===----------------------------------------------------------------------===// +// We prefer generic instructions during isel, because they do not +// clobber CC and therefore give the scheduler more freedom. In cases +// the CC is actually useful, the SystemZElimCompare pass will try to +// convert generic instructions into opcodes that also set CC. Note +// that lcdf / lpdf / lndf only affect the sign bit, and can therefore +// be used with fp32 as well. This could be done for fp128, in which +// case the operands would have to be tied. + // Negation (Load Complement). let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { - def LCEBR : UnaryRRE<"lceb", 0xB303, fneg, FP32, FP32>; - def LCDBR : UnaryRRE<"lcdb", 0xB313, fneg, FP64, FP64>; + def LCEBR : UnaryRRE<"lceb", 0xB303, null_frag, FP32, FP32>; + def LCDBR : UnaryRRE<"lcdb", 0xB313, null_frag, FP64, FP64>; def LCXBR : UnaryRRE<"lcxb", 0xB343, fneg, FP128, FP128>; } +// Generic form, which does not set CC. +def LCDFR : UnaryRRE<"lcdf", 0xB373, fneg, FP64, FP64>; +let isCodeGenOnly = 1 in + def LCDFR_32 : UnaryRRE<"lcdf", 0xB373, fneg, FP32, FP32>; // Absolute value (Load Positive). let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { - def LPEBR : UnaryRRE<"lpeb", 0xB300, fabs, FP32, FP32>; - def LPDBR : UnaryRRE<"lpdb", 0xB310, fabs, FP64, FP64>; + def LPEBR : UnaryRRE<"lpeb", 0xB300, null_frag, FP32, FP32>; + def LPDBR : UnaryRRE<"lpdb", 0xB310, null_frag, FP64, FP64>; def LPXBR : UnaryRRE<"lpxb", 0xB340, fabs, FP128, FP128>; } +// Generic form, which does not set CC. +def LPDFR : UnaryRRE<"lpdf", 0xB370, fabs, FP64, FP64>; +let isCodeGenOnly = 1 in + def LPDFR_32 : UnaryRRE<"lpdf", 0xB370, fabs, FP32, FP32>; // Negative absolute value (Load Negative). let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { - def LNEBR : UnaryRRE<"lneb", 0xB301, fnabs, FP32, FP32>; - def LNDBR : UnaryRRE<"lndb", 0xB311, fnabs, FP64, FP64>; + def LNEBR : UnaryRRE<"lneb", 0xB301, null_frag, FP32, FP32>; + def LNDBR : UnaryRRE<"lndb", 0xB311, null_frag, FP64, FP64>; def LNXBR : UnaryRRE<"lnxb", 0xB341, fnabs, FP128, FP128>; } +// Generic form, which does not set CC. +def LNDFR : UnaryRRE<"lndf", 0xB371, fnabs, FP64, FP64>; +let isCodeGenOnly = 1 in + def LNDFR_32 : UnaryRRE<"lndf", 0xB371, fnabs, FP32, FP32>; // Square root. def SQEBR : UnaryRRE<"sqeb", 0xB314, fsqrt, FP32, FP32>; @@ -414,6 +447,6 @@ let Defs = [CC], CCValues = 0xF in { // Peepholes //===----------------------------------------------------------------------===// -def : Pat<(f32 fpimmneg0), (LCEBR (LZER))>; -def : Pat<(f64 fpimmneg0), (LCDBR (LZDR))>; +def : Pat<(f32 fpimmneg0), (LCDFR_32 (LZER))>; +def : Pat<(f64 fpimmneg0), (LCDFR (LZDR))>; def : Pat<(f128 fpimmneg0), (LCXBR (LZXR))>; diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 71eb9986499b..01f4cdec05cb 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -2381,6 +2381,7 @@ multiclass StringRRE opcode, def "" : InstRRE { + let Uses = [R0L]; let Constraints = "$R1 = $R1src, $R2 = $R2src"; let DisableEncoding = "$R1src, $R2src"; } diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 5d4a34f7131c..e6b5fc8e6235 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -69,6 +69,11 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI, MachineOperand &LowOffsetOp = MI->getOperand(2); LowOffsetOp.setImm(LowOffsetOp.getImm() + 8); + // Clear the kill flags for the base and index registers in the first + // instruction. + EarlierMI->getOperand(1).setIsKill(false); + EarlierMI->getOperand(3).setIsKill(false); + // Set the opcodes. unsigned HighOpcode = getOpcodeForOffset(NewOpcode, HighOffsetOp.getImm()); unsigned LowOpcode = getOpcodeForOffset(NewOpcode, LowOffsetOp.getImm()); @@ -111,7 +116,7 @@ void SystemZInstrInfo::expandRIPseudo(MachineInstr *MI, unsigned LowOpcode, } // MI is a three-operand RIE-style pseudo instruction. Replace it with -// LowOpcode3 if the registers are both low GR32s, otherwise use a move +// LowOpcodeK if the registers are both low GR32s, otherwise use a move // followed by HighOpcode or LowOpcode, depending on whether the target // is a high or low GR32. void SystemZInstrInfo::expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode, @@ -129,6 +134,7 @@ void SystemZInstrInfo::expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode, MI->getOperand(1).isKill()); MI->setDesc(get(DestIsHigh ? HighOpcode : LowOpcode)); MI->getOperand(1).setReg(DestReg); + MI->tieOperands(0, 1); } } @@ -486,11 +492,8 @@ SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare, const MachineRegisterInfo *MRI) const { assert(!SrcReg2 && "Only optimizing constant comparisons so far"); bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0; - if (Value == 0 && - !IsLogical && - removeIPMBasedCompare(Compare, SrcReg, MRI, &RI)) - return true; - return false; + return Value == 0 && !IsLogical && + removeIPMBasedCompare(Compare, SrcReg, MRI, &RI); } // If Opcode is a move that has a conditional variant, return that variant, @@ -505,16 +508,13 @@ static unsigned getConditionalMove(unsigned Opcode) { bool SystemZInstrInfo::isPredicable(MachineInstr *MI) const { unsigned Opcode = MI->getOpcode(); - if (STI.hasLoadStoreOnCond() && - getConditionalMove(Opcode)) - return true; - return false; + return STI.hasLoadStoreOnCond() && getConditionalMove(Opcode); } bool SystemZInstrInfo:: isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, - const BranchProbability &Probability) const { + BranchProbability Probability) const { // For now only convert single instructions. return NumCycles == 1; } @@ -524,7 +524,7 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumCyclesT, unsigned ExtraPredCyclesT, MachineBasicBlock &FMBB, unsigned NumCyclesF, unsigned ExtraPredCyclesF, - const BranchProbability &Probability) const { + BranchProbability Probability) const { // For now avoid converting mutually-exclusive cases. return false; } @@ -548,11 +548,10 @@ PredicateInstruction(MachineInstr *MI, ArrayRef Pred) const { return false; } -void -SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { +void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { // Split 128-bit GPR moves into two 64-bit moves. This handles ADDR128 too. if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) { copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_h64), @@ -590,13 +589,10 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); } -void -SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, - int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void SystemZInstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, + bool isKill, int FrameIdx, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Callers may expect a single instruction, so keep 128-bit moves @@ -604,15 +600,14 @@ SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, unsigned LoadOpcode, StoreOpcode; getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode); addFrameReference(BuildMI(MBB, MBBI, DL, get(StoreOpcode)) - .addReg(SrcReg, getKillRegState(isKill)), FrameIdx); + .addReg(SrcReg, getKillRegState(isKill)), + FrameIdx); } -void -SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void SystemZInstrInfo::loadRegFromStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, + int FrameIdx, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Callers may expect a single instruction, so keep 128-bit moves @@ -681,7 +676,8 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, LiveVariables *LV) const { MachineInstr *MI = MBBI; MachineBasicBlock *MBB = MI->getParent(); - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned Opcode = MI->getOpcode(); unsigned NumOps = MI->getNumOperands(); @@ -708,14 +704,19 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode); if (ThreeOperandOpcode >= 0) { - MachineInstrBuilder MIB = - BuildMI(*MBB, MBBI, MI->getDebugLoc(), get(ThreeOperandOpcode)) - .addOperand(Dest); + // Create three address instruction without adding the implicit + // operands. Those will instead be copied over from the original + // instruction by the loop below. + MachineInstrBuilder MIB(*MF, + MF->CreateMachineInstr(get(ThreeOperandOpcode), + MI->getDebugLoc(), /*NoImplicit=*/true)); + MIB.addOperand(Dest); // Keep the kill state, but drop the tied flag. MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg()); // Keep the remaining operands as-is. for (unsigned I = 2; I < NumOps; ++I) MIB.addOperand(MI->getOperand(I)); + MBB->insert(MI, MIB); return finishConvertToThreeAddress(MI, MIB, LV); } } @@ -1191,6 +1192,12 @@ unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const { case SystemZ::LER: return SystemZ::LTEBR; case SystemZ::LDR: return SystemZ::LTDBR; case SystemZ::LXR: return SystemZ::LTXBR; + case SystemZ::LCDFR: return SystemZ::LCDBR; + case SystemZ::LPDFR: return SystemZ::LPDBR; + case SystemZ::LNDFR: return SystemZ::LNDBR; + case SystemZ::LCDFR_32: return SystemZ::LCEBR; + case SystemZ::LPDFR_32: return SystemZ::LPEBR; + case SystemZ::LNDFR_32: return SystemZ::LNEBR; // On zEC12 we prefer to use RISBGN. But if there is a chance to // actually use the condition code, we may turn it back into RISGB. // Note that RISBG is not really a "load-and-test" instruction, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 31c9db209585..d9094ba93658 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -159,12 +159,12 @@ public: bool isPredicable(MachineInstr *MI) const override; bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, - const BranchProbability &Probability) const override; + BranchProbability Probability) const override; bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumCyclesT, unsigned ExtraPredCyclesT, MachineBasicBlock &FMBB, unsigned NumCyclesF, unsigned ExtraPredCyclesF, - const BranchProbability &Probability) const override; + BranchProbability Probability) const override; bool PredicateInstruction(MachineInstr *MI, ArrayRef Pred) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 820f30bc173d..b9f2eb5514a5 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -397,7 +397,7 @@ let mayLoad = 1, mayStore = 1 in defm MVC : MemorySS<"mvc", 0xD2, z_mvc, z_mvc_loop>; // String moves. -let mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L] in +let mayLoad = 1, mayStore = 1, Defs = [CC] in defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>; //===----------------------------------------------------------------------===// @@ -424,7 +424,7 @@ let hasSideEffects = 0 in { def LGFR : UnaryRRE<"lgf", 0xB914, sext32, GR64, GR32>; } let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in - def LTGFR : UnaryRRE<"ltgf", 0xB912, null_frag, GR64, GR64>; + def LTGFR : UnaryRRE<"ltgf", 0xB912, null_frag, GR64, GR32>; // Match 32-to-64-bit sign extensions in which the source is already // in a 64-bit register. @@ -490,7 +490,7 @@ def : Pat<(and GR64:$src, 0xffffffff), def LLCMux : UnaryRXYPseudo<"llc", azextloadi8, GRX32, 1>, Requires<[FeatureHighWord]>; def LLC : UnaryRXY<"llc", 0xE394, azextloadi8, GR32, 1>; -def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GR32, 1>, +def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GRH32, 1>, Requires<[FeatureHighWord]>; // 32-bit extensions from 16-bit memory. LLHMux expands to LLH or LLHH, @@ -498,7 +498,7 @@ def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GR32, 1>, def LLHMux : UnaryRXYPseudo<"llh", azextloadi16, GRX32, 2>, Requires<[FeatureHighWord]>; def LLH : UnaryRXY<"llh", 0xE395, azextloadi16, GR32, 2>; -def LLHH : UnaryRXY<"llhh", 0xE3C6, azextloadi16, GR32, 2>, +def LLHH : UnaryRXY<"llhh", 0xE3C6, azextloadi16, GRH32, 2>, Requires<[FeatureHighWord]>; def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_azextloadi16, GR32>; @@ -1147,7 +1147,7 @@ let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { def CLFIMux : CompareRIPseudo, Requires<[FeatureHighWord]>; def CLFI : CompareRIL<"clfi", 0xC2F, z_ucmp, GR32, uimm32>; - def CLIH : CompareRIL<"clih", 0xCCF, z_ucmp, GR32, uimm32>, + def CLIH : CompareRIL<"clih", 0xCCF, z_ucmp, GRH32, uimm32>, Requires<[FeatureHighWord]>; def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>; @@ -1185,7 +1185,7 @@ let mayLoad = 1, Defs = [CC] in defm CLC : MemorySS<"clc", 0xD5, z_clc, z_clc_loop>; // String comparison. -let mayLoad = 1, Defs = [CC], Uses = [R0L] in +let mayLoad = 1, Defs = [CC] in defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>; // Test under mask. @@ -1459,9 +1459,29 @@ let usesCustomInserter = 1 in { } // Search a block of memory for a character. -let mayLoad = 1, Defs = [CC], Uses = [R0L] in +let mayLoad = 1, Defs = [CC] in defm SRST : StringRRE<"srst", 0xb25e, z_search_string>; +// Other instructions for inline assembly +let hasSideEffects = 1, Defs = [CC], mayStore = 1 in + def STCK : InstS<0xB205, (outs), (ins bdaddr12only:$BD2), + "stck\t$BD2", + []>; +let hasSideEffects = 1, Defs = [CC], mayStore = 1 in + def STCKF : InstS<0xB27C, (outs), (ins bdaddr12only:$BD2), + "stckf\t$BD2", + []>; +let hasSideEffects = 1, Defs = [CC], mayStore = 1 in + def STCKE : InstS<0xB278, (outs), (ins bdaddr12only:$BD2), + "stcke\t$BD2", + []>; +let hasSideEffects = 1, Defs = [CC], mayStore = 1 in + def STFLE : InstS<0xB2B0, (outs), (ins bdaddr12only:$BD2), + "stfle\t$BD2", + []>; + + + //===----------------------------------------------------------------------===// // Peepholes. //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp b/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp index 00572d0b9d79..1a7c0d7f687a 100644 --- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp +++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp @@ -1,4 +1,4 @@ -//== SystemZMachineFuctionInfo.cpp - SystemZ machine function info-*- C++ -*-=// +//=== SystemZMachineFunctionInfo.cpp - SystemZ machine function info ------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index 34fc36d6bf6c..f4a517bd54df 100644 --- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -1,4 +1,4 @@ -//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=// +//=== SystemZMachineFunctionInfo.h - SystemZ machine function info -*- C++ -*-// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index dc7bd25d7ed5..6fd24e3df625 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -69,8 +69,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, // Decompose the frame index into a base and offset. int FrameIndex = MI->getOperand(FIOperandNum).getIndex(); - unsigned BasePtr = getFrameRegister(MF); - int64_t Offset = (TFI->getFrameIndexOffset(MF, FrameIndex) + + unsigned BasePtr; + int64_t Offset = (TFI->getFrameIndexReference(MF, FrameIndex, BasePtr) + MI->getOperand(FIOperandNum + 1).getImm()); // Special handling of dbg_value instructions. diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index 85aa0a62cc76..0d8b08b9cbdd 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -282,4 +282,5 @@ def v128any : TypedReg; // The 2-bit condition code field of the PSW. Every register named in an // inline asm needs a class associated with it. def CC : SystemZReg<"cc">; -def CCRegs : RegisterClass<"SystemZ", [i32], 32, (add CC)>; +let isAllocatable = 0 in + def CCRegs : RegisterClass<"SystemZ", [i32], 32, (add CC)>; diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp index d1a17c5500d6..846edd51341a 100644 --- a/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -16,6 +16,8 @@ #include "SystemZTargetMachine.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; @@ -35,19 +37,16 @@ public: bool runOnMachineFunction(MachineFunction &F) override; private: - bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther, - unsigned LLIxL, unsigned LLIxH); + bool shortenIIF(MachineInstr &MI, unsigned LLIxL, unsigned LLIxH); bool shortenOn0(MachineInstr &MI, unsigned Opcode); bool shortenOn01(MachineInstr &MI, unsigned Opcode); bool shortenOn001(MachineInstr &MI, unsigned Opcode); + bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode); bool shortenFPConv(MachineInstr &MI, unsigned Opcode); const SystemZInstrInfo *TII; - - // LowGPRs[I] has bit N set if LLVM register I includes the low - // word of GPR N. HighGPRs is the same for the high word. - unsigned LowGPRs[SystemZ::NUM_TARGET_REGS]; - unsigned HighGPRs[SystemZ::NUM_TARGET_REGS]; + const TargetRegisterInfo *TRI; + LivePhysRegs LiveRegs; }; char SystemZShortenInst::ID = 0; @@ -58,33 +57,31 @@ FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) { } SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm) - : MachineFunctionPass(ID), TII(nullptr), LowGPRs(), HighGPRs() { - // Set up LowGPRs and HighGPRs. - for (unsigned I = 0; I < 16; ++I) { - LowGPRs[SystemZMC::GR32Regs[I]] |= 1 << I; - LowGPRs[SystemZMC::GR64Regs[I]] |= 1 << I; - HighGPRs[SystemZMC::GRH32Regs[I]] |= 1 << I; - HighGPRs[SystemZMC::GR64Regs[I]] |= 1 << I; - if (unsigned GR128 = SystemZMC::GR128Regs[I]) { - LowGPRs[GR128] |= 3 << I; - HighGPRs[GR128] |= 3 << I; - } - } + : MachineFunctionPass(ID), TII(nullptr) {} + +// Tie operands if MI has become a two-address instruction. +static void tieOpsIfNeeded(MachineInstr &MI) { + if (MI.getDesc().getOperandConstraint(0, MCOI::TIED_TO) && + !MI.getOperand(0).isTied()) + MI.tieOperands(0, 1); } // MI loads one word of a GPR using an IIxF instruction and LLIxL and LLIxH // are the halfword immediate loads for the same word. Try to use one of them -// instead of IIxF. If MI loads the high word, GPRMap[X] is the set of high -// words referenced by LLVM register X while LiveOther is the mask of low -// words that are currently live, and vice versa. -bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned *GPRMap, - unsigned LiveOther, unsigned LLIxL, - unsigned LLIxH) { +// instead of IIxF. +bool SystemZShortenInst::shortenIIF(MachineInstr &MI, + unsigned LLIxL, unsigned LLIxH) { unsigned Reg = MI.getOperand(0).getReg(); - assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number"); - unsigned GPRs = GPRMap[Reg]; - assert(GPRs != 0 && "Register must be a GPR"); - if (GPRs & LiveOther) + // The new opcode will clear the other half of the GR64 reg, so + // cancel if that is live. + unsigned thisSubRegIdx = (SystemZ::GRH32BitRegClass.contains(Reg) ? + SystemZ::subreg_h32 : SystemZ::subreg_l32); + unsigned otherSubRegIdx = (thisSubRegIdx == SystemZ::subreg_l32 ? + SystemZ::subreg_h32 : SystemZ::subreg_l32); + unsigned GR64BitReg = TRI->getMatchingSuperReg(Reg, thisSubRegIdx, + &SystemZ::GR64BitRegClass); + unsigned OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx); + if (LiveRegs.contains(OtherReg)) return false; uint64_t Imm = MI.getOperand(1).getImm(); @@ -123,12 +120,26 @@ bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) { } // Change MI's opcode to Opcode if register operands 0, 1 and 2 have a -// 4-bit encoding and if operands 0 and 1 are tied. +// 4-bit encoding and if operands 0 and 1 are tied. Also ties op 0 +// with op 1, if MI becomes 2-address. bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) { if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && MI.getOperand(1).getReg() == MI.getOperand(0).getReg() && SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) { MI.setDesc(TII->get(Opcode)); + tieOpsIfNeeded(MI); + return true; + } + return false; +} + +// Calls shortenOn001 if CCLive is false. CC def operand is added in +// case of success. +bool SystemZShortenInst::shortenOn001AddCC(MachineInstr &MI, + unsigned Opcode) { + if (!LiveRegs.contains(SystemZ::CC) && shortenOn001(MI, Opcode)) { + MachineInstrBuilder(*MI.getParent()->getParent(), &MI) + .addReg(SystemZ::CC, RegState::ImplicitDefine); return true; } return false; @@ -164,35 +175,24 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) { bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { bool Changed = false; - // Work out which words are live on exit from the block. - unsigned LiveLow = 0; - unsigned LiveHigh = 0; - for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) { - for (auto LI = (*SI)->livein_begin(), LE = (*SI)->livein_end(); - LI != LE; ++LI) { - unsigned Reg = *LI; - assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number"); - LiveLow |= LowGPRs[Reg]; - LiveHigh |= HighGPRs[Reg]; - } - } + // Set up the set of live registers at the end of MBB (live out) + LiveRegs.clear(); + LiveRegs.addLiveOuts(&MBB); // Iterate backwards through the block looking for instructions to change. for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) { MachineInstr &MI = *MBBI; switch (MI.getOpcode()) { case SystemZ::IILF: - Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL, - SystemZ::LLILH); + Changed |= shortenIIF(MI, SystemZ::LLILL, SystemZ::LLILH); break; case SystemZ::IIHF: - Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL, - SystemZ::LLIHH); + Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH); break; case SystemZ::WFADB: - Changed |= shortenOn001(MI, SystemZ::ADBR); + Changed |= shortenOn001AddCC(MI, SystemZ::ADBR); break; case SystemZ::WFDDB: @@ -216,15 +216,15 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { break; case SystemZ::WFLCDB: - Changed |= shortenOn01(MI, SystemZ::LCDBR); + Changed |= shortenOn01(MI, SystemZ::LCDFR); break; case SystemZ::WFLNDB: - Changed |= shortenOn01(MI, SystemZ::LNDBR); + Changed |= shortenOn01(MI, SystemZ::LNDFR); break; case SystemZ::WFLPDB: - Changed |= shortenOn01(MI, SystemZ::LPDBR); + Changed |= shortenOn01(MI, SystemZ::LPDFR); break; case SystemZ::WFSQDB: @@ -232,7 +232,7 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { break; case SystemZ::WFSDB: - Changed |= shortenOn001(MI, SystemZ::SDBR); + Changed |= shortenOn001AddCC(MI, SystemZ::SDBR); break; case SystemZ::WFCDB: @@ -257,33 +257,17 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { break; } - unsigned UsedLow = 0; - unsigned UsedHigh = 0; - for (auto MOI = MI.operands_begin(), MOE = MI.operands_end(); - MOI != MOE; ++MOI) { - MachineOperand &MO = *MOI; - if (MO.isReg()) { - if (unsigned Reg = MO.getReg()) { - assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number"); - if (MO.isDef()) { - LiveLow &= ~LowGPRs[Reg]; - LiveHigh &= ~HighGPRs[Reg]; - } else if (!MO.isUndef()) { - UsedLow |= LowGPRs[Reg]; - UsedHigh |= HighGPRs[Reg]; - } - } - } - } - LiveLow |= UsedLow; - LiveHigh |= UsedHigh; + LiveRegs.stepBackward(MI); } return Changed; } bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) { - TII = static_cast(F.getSubtarget().getInstrInfo()); + const SystemZSubtarget &ST = F.getSubtarget(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + LiveRegs.init(TRI); bool Changed = false; for (auto &MBB : F) diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 00cbbd10a819..f305e85f6cfe 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -16,6 +16,7 @@ using namespace llvm; +extern cl::opt MISchedPostRA; extern "C" void LLVMInitializeSystemZTarget() { // Register the target. RegisterTargetMachine X(TheSystemZTarget); @@ -32,7 +33,7 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) { VectorABI = false; SmallVector Features; - FS.split(Features, ",", -1, false /* KeepEmpty */); + FS.split(Features, ',', -1, false /* KeepEmpty */); for (auto &Feature : Features) { if (Feature == "vector" || Feature == "+vector") VectorABI = true; @@ -130,6 +131,13 @@ void SystemZPassConfig::addPreSched2() { } void SystemZPassConfig::addPreEmitPass() { + + // Do instruction shortening before compare elimination because some + // vector instructions will be shortened into opcodes that compare + // elimination recognizes. + if (getOptLevel() != CodeGenOpt::None) + addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false); + // We eliminate comparisons here rather than earlier because some // transformations can change the set of available CC values and we // generally want those transformations to have priority. This is @@ -155,9 +163,17 @@ void SystemZPassConfig::addPreEmitPass() { // preventing that would be a win or not. if (getOptLevel() != CodeGenOpt::None) addPass(createSystemZElimComparePass(getSystemZTargetMachine()), false); - if (getOptLevel() != CodeGenOpt::None) - addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false); addPass(createSystemZLongBranchPass(getSystemZTargetMachine())); + + // Do final scheduling after all other optimizations, to get an + // optimal input for the decoder (branch relaxation must happen + // after block placement). + if (getOptLevel() != CodeGenOpt::None) { + if (MISchedPostRA) + addPass(&PostMachineSchedulerID); + else + addPass(&PostRASchedulerID); + } } TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { @@ -165,7 +181,7 @@ TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { } TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](Function &F) { + return TargetIRAnalysis([this](const Function &F) { return TargetTransformInfo(SystemZTTIImpl(this, F)); }); } diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index 0a81e1f9fdf9..1a8f1f7f3aaa 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -43,6 +43,9 @@ public: TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + bool targetSchedulesPostRAScheduling() const override { return true; }; + }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 5a87df1976c3..5ff5b21f49b8 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -31,7 +31,7 @@ using namespace llvm; // //===----------------------------------------------------------------------===// -unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -63,8 +63,8 @@ unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { return 4 * TTI::TCC_Basic; } -unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) { +int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -181,8 +181,8 @@ unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, return SystemZTTIImpl::getIntImmCost(Imm, Ty); } -unsigned SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { +int SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 4b80973ed879..9ae736d8413a 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -28,7 +28,7 @@ class SystemZTTIImpl : public BasicTTIImplBase { const SystemZTargetLowering *getTLI() const { return TLI; } public: - explicit SystemZTTIImpl(const SystemZTargetMachine *TM, Function &F) + explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} @@ -42,12 +42,11 @@ public: /// \name Scalar TTI Implementations /// @{ - unsigned getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty); - unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty); - unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty); + int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 19b5e2a0f978..a0b0d8f24046 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -43,7 +43,6 @@ using namespace llvm; void TargetLoweringObjectFile::Initialize(MCContext &ctx, const TargetMachine &TM) { Ctx = &ctx; - DL = TM.getDataLayout(); InitMCObjectFileInfo(TM.getTargetTriple(), TM.getRelocationModel(), TM.getCodeModel(), *Ctx); } @@ -107,7 +106,7 @@ MCSymbol *TargetLoweringObjectFile::getSymbolWithGlobalValueBase( assert(!Suffix.empty()); SmallString<60> NameStr; - NameStr += DL->getPrivateGlobalPrefix(); + NameStr += GV->getParent()->getDataLayout().getPrivateGlobalPrefix(); TM.getNameWithPrefix(NameStr, GV, Mang); NameStr.append(Suffix.begin(), Suffix.end()); return Ctx->getOrCreateSymbol(NameStr); @@ -120,7 +119,7 @@ MCSymbol *TargetLoweringObjectFile::getCFIPersonalitySymbol( } void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer, - const TargetMachine &TM, + const DataLayout &, const MCSymbol *Sym) const { } @@ -170,14 +169,13 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, // If the initializer for the global contains something that requires a // relocation, then we may have to drop this into a writable data section // even though it is marked const. - switch (C->getRelocationInfo()) { - case Constant::NoRelocation: + if (!C->needsRelocation()) { // If the global is required to have a unique address, it can't be put // into a mergable section: just drop it into the general read-only // section instead. if (!GVar->hasUnnamedAddr()) return SectionKind::getReadOnly(); - + // If initializer is a null-terminated string, put it in a "cstring" // section of the right width. if (ArrayType *ATy = dyn_cast(C->getType())) { @@ -200,7 +198,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, // Otherwise, just drop it into a mergable constant section. If we have // a section for this size, use it, otherwise use the arbitrary sized // mergable section. - switch (TM.getDataLayout()->getTypeAllocSize(C->getType())) { + switch (GV->getParent()->getDataLayout().getTypeAllocSize(C->getType())) { case 4: return SectionKind::getMergeableConst4(); case 8: return SectionKind::getMergeableConst8(); case 16: return SectionKind::getMergeableConst16(); @@ -208,20 +206,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, return SectionKind::getReadOnly(); } - case Constant::LocalRelocation: - // In static relocation model, the linker will resolve all addresses, so - // the relocation entries will actually be constants by the time the app - // starts up. However, we can't put this into a mergable section, because - // the linker doesn't take relocations into consideration when it tries to - // merge entries in the section. - if (ReloModel == Reloc::Static) - return SectionKind::getReadOnly(); - - // Otherwise, the dynamic linker needs to fix it up, put it in the - // writable data.rel.local section. - return SectionKind::getReadOnlyWithRelLocal(); - - case Constant::GlobalRelocations: + } else { // In static relocation model, the linker will resolve all addresses, so // the relocation entries will actually be constants by the time the app // starts up. However, we can't put this into a mergable section, because @@ -242,17 +227,11 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, // globals together onto fewer pages, improving the locality of the dynamic // linker. if (ReloModel == Reloc::Static) - return SectionKind::getDataNoRel(); + return SectionKind::getData(); - switch (C->getRelocationInfo()) { - case Constant::NoRelocation: - return SectionKind::getDataNoRel(); - case Constant::LocalRelocation: - return SectionKind::getDataRelLocal(); - case Constant::GlobalRelocations: - return SectionKind::getDataRel(); - } - llvm_unreachable("Invalid relocation"); + if (C->needsRelocation()) + return SectionKind::getData(); + return SectionKind::getData(); } /// This method computes the appropriate section to emit the specified global @@ -273,7 +252,8 @@ TargetLoweringObjectFile::SectionForGlobal(const GlobalValue *GV, MCSection *TargetLoweringObjectFile::getSectionForJumpTable( const Function &F, Mangler &Mang, const TargetMachine &TM) const { - return getSectionForConstant(SectionKind::getReadOnly(), /*C=*/nullptr); + return getSectionForConstant(F.getParent()->getDataLayout(), + SectionKind::getReadOnly(), /*C=*/nullptr); } bool TargetLoweringObjectFile::shouldPutJumpTableInFunctionSection( @@ -296,9 +276,8 @@ bool TargetLoweringObjectFile::shouldPutJumpTableInFunctionSection( /// Given a mergable constant with the specified size and relocation /// information, return a section that it should be placed in. -MCSection * -TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind, - const Constant *C) const { +MCSection *TargetLoweringObjectFile::getSectionForConstant( + const DataLayout &DL, SectionKind Kind, const Constant *C) const { if (Kind.isReadOnly() && ReadOnlySection != nullptr) return ReadOnlySection; @@ -345,7 +324,7 @@ const MCExpr *TargetLoweringObjectFile::getDebugThreadLocalSymbol(const MCSymbol } void TargetLoweringObjectFile::getNameWithPrefix( - SmallVectorImpl &OutName, const GlobalValue *GV, - bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const { - Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel); + SmallVectorImpl &OutName, const GlobalValue *GV, Mangler &Mang, + const TargetMachine &TM) const { + Mang.getNameWithPrefix(OutName, GV, /*CannotUsePrivateLabel=*/false); } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 83174c20c8e9..850c93cb21b8 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -150,24 +150,11 @@ void TargetMachine::setOptLevel(CodeGenOpt::Level Level) const { } TargetIRAnalysis TargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](Function &F) { + return TargetIRAnalysis([this](const Function &F) { return TargetTransformInfo(F.getParent()->getDataLayout()); }); } -static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo, - const MCSection &Section) { - if (!AsmInfo.isSectionAtomizableBySymbols(Section)) - return true; - - // If it is not dead stripped, it is safe to use private labels. - const MCSectionMachO &SMO = cast(Section); - if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP)) - return true; - - return false; -} - void TargetMachine::getNameWithPrefix(SmallVectorImpl &Name, const GlobalValue *GV, Mangler &Mang, bool MayAlwaysUsePrivate) const { @@ -177,11 +164,8 @@ void TargetMachine::getNameWithPrefix(SmallVectorImpl &Name, Mang.getNameWithPrefix(Name, GV, false); return; } - SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, *this); const TargetLoweringObjectFile *TLOF = getObjFileLowering(); - const MCSection *TheSection = TLOF->SectionForGlobal(GV, GVKind, Mang, *this); - bool CannotUsePrivateLabel = !canUsePrivateLabel(*AsmInfo, *TheSection); - TLOF->getNameWithPrefix(Name, GV, CannotUsePrivateLabel, Mang, *this); + TLOF->getNameWithPrefix(Name, GV, Mang, *this); } MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV, Mangler &Mang) const { diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp index 719923558de4..f82566c37baa 100644 --- a/lib/Target/TargetMachineC.cpp +++ b/lib/Target/TargetMachineC.cpp @@ -32,17 +32,25 @@ using namespace llvm; -inline TargetMachine *unwrap(LLVMTargetMachineRef P) { - return reinterpret_cast(P); +namespace llvm { +// Friend to the TargetMachine, access legacy API that are made private in C++ +struct C_API_PRIVATE_ACCESS { + static const DataLayout &getDataLayout(const TargetMachine &T) { + return T.getDataLayout(); + } +}; } -inline Target *unwrap(LLVMTargetRef P) { + +static TargetMachine *unwrap(LLVMTargetMachineRef P) { + return reinterpret_cast(P); +} +static Target *unwrap(LLVMTargetRef P) { return reinterpret_cast(P); } -inline LLVMTargetMachineRef wrap(const TargetMachine *P) { - return - reinterpret_cast(const_cast(P)); +static LLVMTargetMachineRef wrap(const TargetMachine *P) { + return reinterpret_cast(const_cast(P)); } -inline LLVMTargetRef wrap(const Target * P) { +static LLVMTargetRef wrap(const Target * P) { return reinterpret_cast(const_cast(P)); } @@ -69,16 +77,16 @@ LLVMTargetRef LLVMGetTargetFromName(const char *Name) { LLVMBool LLVMGetTargetFromTriple(const char* TripleStr, LLVMTargetRef *T, char **ErrorMessage) { std::string Error; - + *T = wrap(TargetRegistry::lookupTarget(TripleStr, Error)); - + if (!*T) { if (ErrorMessage) *ErrorMessage = strdup(Error.c_str()); return 1; } - + return 0; } @@ -145,10 +153,7 @@ LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, CM, OL)); } - -void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) { - delete unwrap(T); -} +void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) { delete unwrap(T); } LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T) { const Target* target = &(unwrap(T)->getTarget()); @@ -170,8 +175,9 @@ char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) { return strdup(StringRep.c_str()); } +/** Deprecated: use LLVMGetDataLayout(LLVMModuleRef M) instead. */ LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) { - return wrap(unwrap(T)->getDataLayout()); + return wrap(&C_API_PRIVATE_ACCESS::getDataLayout(*unwrap(T))); } void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T, @@ -190,14 +196,7 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M, std::string error; - const DataLayout *td = TM->getDataLayout(); - - if (!td) { - error = "No DataLayout in TargetMachine"; - *ErrorMessage = strdup(error.c_str()); - return true; - } - Mod->setDataLayout(*td); + Mod->setDataLayout(TM->createDataLayout()); TargetMachine::CodeGenFileType ft; switch (codegen) { @@ -239,7 +238,6 @@ LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T, SmallString<0> CodeString; raw_svector_ostream OStream(CodeString); bool Result = LLVMTargetMachineEmit(T, M, OStream, codegen, ErrorMessage); - OStream.flush(); StringRef Data = OStream.str(); *OutMemBuf = diff --git a/lib/Target/TargetRecip.cpp b/lib/Target/TargetRecip.cpp index 42bc487fe6d8..d41b6436928b 100644 --- a/lib/Target/TargetRecip.cpp +++ b/lib/Target/TargetRecip.cpp @@ -26,7 +26,7 @@ using namespace llvm; // the key strings for queries and command-line inputs. // In addition, the command-line interface recognizes the global parameters // "all", "none", and "default". -static const char *RecipOps[] = { +static const char *const RecipOps[] = { "divd", "divf", "vec-divd", @@ -46,7 +46,7 @@ TargetRecip::TargetRecip() { RecipMap.insert(std::make_pair(RecipOps[i], RecipParams())); } -static bool parseRefinementStep(const StringRef &In, size_t &Position, +static bool parseRefinementStep(StringRef In, size_t &Position, uint8_t &Value) { const char RefStepToken = ':'; Position = In.find(RefStepToken); @@ -175,7 +175,7 @@ TargetRecip::TargetRecip(const std::vector &Args) : parseIndividualParams(Args); } -bool TargetRecip::isEnabled(const StringRef &Key) const { +bool TargetRecip::isEnabled(StringRef Key) const { ConstRecipIter Iter = RecipMap.find(Key); assert(Iter != RecipMap.end() && "Unknown name for reciprocal map"); assert(Iter->second.Enabled != Uninitialized && @@ -183,7 +183,7 @@ bool TargetRecip::isEnabled(const StringRef &Key) const { return Iter->second.Enabled; } -unsigned TargetRecip::getRefinementSteps(const StringRef &Key) const { +unsigned TargetRecip::getRefinementSteps(StringRef Key) const { ConstRecipIter Iter = RecipMap.find(Key); assert(Iter != RecipMap.end() && "Unknown name for reciprocal map"); assert(Iter->second.RefinementSteps != Uninitialized && @@ -192,7 +192,7 @@ unsigned TargetRecip::getRefinementSteps(const StringRef &Key) const { } /// Custom settings (previously initialized values) override target defaults. -void TargetRecip::setDefaults(const StringRef &Key, bool Enable, +void TargetRecip::setDefaults(StringRef Key, bool Enable, unsigned RefSteps) { if (Key == "all") { for (auto &KV : RecipMap) { @@ -213,7 +213,7 @@ void TargetRecip::setDefaults(const StringRef &Key, bool Enable, bool TargetRecip::operator==(const TargetRecip &Other) const { for (const auto &KV : RecipMap) { - const StringRef &Op = KV.first; + StringRef Op = KV.first; const RecipParams &RP = KV.second; const RecipParams &OtherRP = Other.RecipMap.find(Op)->second; if (RP.RefinementSteps != OtherRP.RefinementSteps) diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt index 25de9eee0831..284a7d91bc62 100644 --- a/lib/Target/WebAssembly/CMakeLists.txt +++ b/lib/Target/WebAssembly/CMakeLists.txt @@ -1,20 +1,39 @@ set(LLVM_TARGET_DEFINITIONS WebAssembly.td) +tablegen(LLVM WebAssemblyGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM WebAssemblyGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM WebAssemblyGenFastISel.inc -gen-fast-isel) +tablegen(LLVM WebAssemblyGenInstrInfo.inc -gen-instr-info) tablegen(LLVM WebAssemblyGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM WebAssemblyGenRegisterInfo.inc -gen-register-info) tablegen(LLVM WebAssemblyGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(WebAssemblyCommonTableGen) add_llvm_target(WebAssemblyCodeGen + Relooper.cpp + WebAssemblyArgumentMove.cpp + WebAssemblyAsmPrinter.cpp + WebAssemblyCFGStackify.cpp + WebAssemblyFastISel.cpp WebAssemblyFrameLowering.cpp - WebAssemblyInstrInfo.cpp WebAssemblyISelDAGToDAG.cpp WebAssemblyISelLowering.cpp + WebAssemblyInstrInfo.cpp + WebAssemblyLowerBrUnless.cpp WebAssemblyMachineFunctionInfo.cpp + WebAssemblyMCInstLower.cpp + WebAssemblyOptimizeReturned.cpp + WebAssemblyPeephole.cpp + WebAssemblyPEI.cpp WebAssemblyRegisterInfo.cpp + WebAssemblyRegColoring.cpp + WebAssemblyRegNumbering.cpp + WebAssemblyRegStackify.cpp WebAssemblySelectionDAGInfo.cpp + WebAssemblyStoreResults.cpp WebAssemblySubtarget.cpp WebAssemblyTargetMachine.cpp + WebAssemblyTargetObjectFile.cpp WebAssemblyTargetTransformInfo.cpp ) diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp index fbb985aaafbb..7ce3a00ae360 100644 --- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp +++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp @@ -13,7 +13,9 @@ //===----------------------------------------------------------------------===// #include "InstPrinter/WebAssemblyInstPrinter.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" @@ -21,11 +23,13 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "asm-printer" +#include "WebAssemblyGenAsmWriter.inc" + WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI) @@ -33,11 +37,93 @@ WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI, void WebAssemblyInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - llvm_unreachable("TODO: implement printRegName"); + assert(RegNo != WebAssemblyFunctionInfo::UnusedReg); + // Note that there's an implicit get_local/set_local here! + OS << "$" << RegNo; } void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, - const MCSubtargetInfo &STI) { - llvm_unreachable("TODO: implement printInst"); + const MCSubtargetInfo & /*STI*/) { + // Print the instruction (this uses the AsmStrings from the .td files). + printInstruction(MI, OS); + + // Print any additional variadic operands. + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + if (Desc.isVariadic()) + for (auto i = Desc.getNumOperands(), e = MI->getNumOperands(); i < e; ++i) { + if (i != 0) + OS << ", "; + printOperand(MI, i, OS); + } + + // Print any added annotation. + printAnnotation(OS, Annot); +} + +static std::string toString(const APFloat &FP) { + static const size_t BufBytes = 128; + char buf[BufBytes]; + if (FP.isNaN()) + assert((FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) || + FP.bitwiseIsEqual( + APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) && + "convertToHexString handles neither SNaN nor NaN payloads"); + // Use C99's hexadecimal floating-point representation. + auto Written = FP.convertToHexString( + buf, /*hexDigits=*/0, /*upperCase=*/false, APFloat::rmNearestTiesToEven); + (void)Written; + assert(Written != 0); + assert(Written < BufBytes); + return buf; +} + +void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + unsigned WAReg = Op.getReg(); + if (int(WAReg) >= 0) + printRegName(O, WAReg); + else if (OpNo >= MII.get(MI->getOpcode()).getNumDefs()) + O << "$pop" << (WAReg & INT32_MAX); + else if (WAReg != WebAssemblyFunctionInfo::UnusedReg) + O << "$push" << (WAReg & INT32_MAX); + else + O << "$discard"; + // Add a '=' suffix if this is a def. + if (OpNo < MII.get(MI->getOpcode()).getNumDefs()) + O << '='; + } else if (Op.isImm()) { + switch (MI->getOpcode()) { + case WebAssembly::PARAM: + case WebAssembly::RESULT: + case WebAssembly::LOCAL: + O << WebAssembly::TypeToString(MVT::SimpleValueType(Op.getImm())); + break; + default: + O << Op.getImm(); + break; + } + } else if (Op.isFPImm()) + O << toString(APFloat(Op.getFPImm())); + else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + Op.getExpr()->print(O, &MAI); + } +} + +const char *llvm::WebAssembly::TypeToString(MVT Ty) { + switch (Ty.SimpleTy) { + case MVT::i32: + return "i32"; + case MVT::i64: + return "i64"; + case MVT::f32: + return "f32"; + case MVT::f64: + return "f64"; + default: + llvm_unreachable("unsupported type"); + } } diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h index 70fcef214ce2..39a16f59fd78 100644 --- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h +++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h @@ -16,14 +16,13 @@ #define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/MachineValueType.h" namespace llvm { -class MCOperand; class MCSubtargetInfo; -class WebAssemblyInstPrinter : public MCInstPrinter { +class WebAssemblyInstPrinter final : public MCInstPrinter { public: WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI); @@ -31,8 +30,21 @@ public: void printRegName(raw_ostream &OS, unsigned RegNo) const override; void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, const MCSubtargetInfo &STI) override; + + // Used by tblegen code. + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); }; +namespace WebAssembly { + +const char *TypeToString(MVT Ty); + +} // end namespace WebAssembly + } // end namespace llvm #endif diff --git a/lib/Target/WebAssembly/LLVMBuild.txt b/lib/Target/WebAssembly/LLVMBuild.txt index 04ef9c4e4bcf..9c4d6dcb35a3 100644 --- a/lib/Target/WebAssembly/LLVMBuild.txt +++ b/lib/Target/WebAssembly/LLVMBuild.txt @@ -28,5 +28,5 @@ has_asmprinter = 1 type = Library name = WebAssemblyCodeGen parent = WebAssembly -required_libraries = Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target WebAssemblyDesc WebAssemblyInfo +required_libraries = Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target WebAssemblyAsmPrinter WebAssemblyDesc WebAssemblyInfo add_to_library_groups = WebAssembly diff --git a/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt b/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt index ccc0f0d7ccbc..c8d1d821861a 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt @@ -1,4 +1,7 @@ add_llvm_library(LLVMWebAssemblyDesc + WebAssemblyAsmBackend.cpp + WebAssemblyELFObjectWriter.cpp WebAssemblyMCAsmInfo.cpp + WebAssemblyMCCodeEmitter.cpp WebAssemblyMCTargetDesc.cpp ) diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp new file mode 100644 index 000000000000..b158ccb46f99 --- /dev/null +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -0,0 +1,103 @@ +//===-- WebAssemblyAsmBackend.cpp - WebAssembly Assembler Backend ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements the WebAssemblyAsmBackend class. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { +class WebAssemblyAsmBackend final : public MCAsmBackend { + bool Is64Bit; + +public: + explicit WebAssemblyAsmBackend(bool Is64Bit) + : MCAsmBackend(), Is64Bit(Is64Bit) {} + ~WebAssemblyAsmBackend() override {} + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value, bool IsPCRel) const override; + + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; + + // No instruction requires relaxation + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + return false; + } + + unsigned getNumFixupKinds() const override { + // We currently just use the generic fixups in MCFixup.h and don't have any + // target-specific fixups. + return 0; + } + + bool mayNeedRelaxation(const MCInst &Inst) const override { return false; } + + void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {} + + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; +}; + +bool WebAssemblyAsmBackend::writeNopData(uint64_t Count, + MCObjectWriter *OW) const { + if (Count == 0) + return true; + + // FIXME: Do something. + return false; +} + +void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, + unsigned DataSize, uint64_t Value, + bool IsPCRel) const { + const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind()); + unsigned NumBytes = RoundUpToAlignment(Info.TargetSize, 8); + if (!Value) + return; // Doesn't change encoding. + + // Shift the value into position. + Value <<= Info.TargetOffset; + + unsigned Offset = Fixup.getOffset(); + assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + + // For each byte of the fragment that the fixup touches, mask in the + // bits from the fixup value. + for (unsigned i = 0; i != NumBytes; ++i) + Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); +} + +MCObjectWriter * +WebAssemblyAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { + return createWebAssemblyELFObjectWriter(OS, Is64Bit, 0); +} +} // end anonymous namespace + +MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + const Triple &TT, + StringRef CPU) { + return new WebAssemblyAsmBackend(TT.isArch64Bit()); +} diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp new file mode 100644 index 000000000000..c47a3d9094e5 --- /dev/null +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp @@ -0,0 +1,54 @@ +//===-- WebAssemblyELFObjectWriter.cpp - WebAssembly ELF Writer -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file handles ELF-specific object emission, converting LLVM's +/// internal fixups into the appropriate relocations. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/Support/ErrorHandling.h" +using namespace llvm; + +namespace { +class WebAssemblyELFObjectWriter final : public MCELFObjectTargetWriter { +public: + WebAssemblyELFObjectWriter(bool Is64Bit, uint8_t OSABI); + +protected: + unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel) const override; +}; +} // end anonymous namespace + +// FIXME: Use EM_NONE as a temporary hack. Should we decide to pursue ELF +// writing seriously, we should email generic-abi@googlegroups.com and ask +// for our own ELF code. +WebAssemblyELFObjectWriter::WebAssemblyELFObjectWriter(bool Is64Bit, + uint8_t OSABI) + : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_NONE, + /*HasRelocationAddend=*/true) {} + +unsigned WebAssemblyELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + // FIXME: Do we need our own relocs? + return Fixup.getKind(); +} + +MCObjectWriter *llvm::createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, + uint8_t OSABI) { + MCELFObjectTargetWriter *MOTW = + new WebAssemblyELFObjectWriter(Is64Bit, OSABI); + return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); +} diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp index 55346f71c6fc..d2617796ca99 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp @@ -23,7 +23,7 @@ using namespace llvm; WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {} WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) { - PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit(); + PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4; // TODO: What should MaxInstLength be? @@ -41,9 +41,6 @@ WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) { COMMDirectiveAlignmentIsInBytes = false; LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment; - HasDotTypeDotSizeDirective = false; - HasSingleParameterDotFile = false; - SupportsDebugInformation = true; // For now, WebAssembly does not support exceptions. diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h index d2b8fb7748fc..2dcf2cd3c892 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h @@ -15,13 +15,13 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAsmInfoELF.h" namespace llvm { class Triple; -class WebAssemblyMCAsmInfo final : public MCAsmInfo { +class WebAssemblyMCAsmInfo final : public MCAsmInfoELF { public: explicit WebAssemblyMCAsmInfo(const Triple &T); ~WebAssemblyMCAsmInfo() override; diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp new file mode 100644 index 000000000000..7c6c79eb5db2 --- /dev/null +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -0,0 +1,100 @@ +//=- WebAssemblyMCCodeEmitter.cpp - Convert WebAssembly code to machine code -// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements the WebAssemblyMCCodeEmitter class. +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "mccodeemitter" + +namespace { +class WebAssemblyMCCodeEmitter final : public MCCodeEmitter { + const MCRegisterInfo &MRI; + +public: + WebAssemblyMCCodeEmitter(const MCInstrInfo &, const MCRegisterInfo &mri, + MCContext &) + : MRI(mri) {} + + ~WebAssemblyMCCodeEmitter() override {} + + /// TableGen'erated function for getting the binary encoding for an + /// instruction. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + /// Return binary encoding of operand. If the machine operand requires + /// relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + uint64_t getMemoryOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + void encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; +}; +} // end anonymous namespace + +MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new WebAssemblyMCCodeEmitter(MCII, MRI, Ctx); +} + +unsigned WebAssemblyMCCodeEmitter::getMachineOpValue( + const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + if (MO.isReg()) + return MRI.getEncodingValue(MO.getReg()); + if (MO.isImm()) + return static_cast(MO.getImm()); + + assert(MO.isExpr()); + + assert(MO.getExpr()->getKind() == MCExpr::SymbolRef); + + assert(false && "FIXME: not implemented yet"); + + return 0; +} + +void WebAssemblyMCCodeEmitter::encodeInstruction( + const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + assert(false && "FIXME: not implemented yet"); +} + +// Encode WebAssembly Memory Operand +uint64_t +WebAssemblyMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + assert(false && "FIXME: not implemented yet"); + return 0; +} + +#include "WebAssemblyGenMCCodeEmitter.inc" diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp index 224aa773a80e..14cd295353d5 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -26,25 +26,40 @@ using namespace llvm; #define DEBUG_TYPE "wasm-mc-target-desc" +#define GET_INSTRINFO_MC_DESC +#include "WebAssemblyGenInstrInfo.inc" + #define GET_SUBTARGETINFO_MC_DESC #include "WebAssemblyGenSubtargetInfo.inc" #define GET_REGINFO_MC_DESC #include "WebAssemblyGenRegisterInfo.inc" -static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo &MRI, +static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo & /*MRI*/, const Triple &TT) { - MCAsmInfo *MAI = new WebAssemblyMCAsmInfo(TT); - return MAI; + return new WebAssemblyMCAsmInfo(TT); +} + +static MCInstrInfo *createWebAssemblyMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitWebAssemblyMCInstrInfo(X); + return X; +} + +static MCStreamer *createWebAssemblyMCStreamer(const Triple &T, MCContext &Ctx, + MCAsmBackend &MAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll) { + return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll); } static MCInstPrinter * -createWebAssemblyMCInstPrinter(const Triple &T, unsigned SyntaxVariant, +createWebAssemblyMCInstPrinter(const Triple & /*T*/, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI) { - if (SyntaxVariant == 0 || SyntaxVariant == 1) - return new WebAssemblyInstPrinter(MAI, MII, MRI); - return nullptr; + assert(SyntaxVariant == 0); + return new WebAssemblyInstPrinter(MAI, MII, MRI); } // Force static initialization. @@ -53,7 +68,19 @@ extern "C" void LLVMInitializeWebAssemblyTargetMC() { // Register the MC asm info. RegisterMCAsmInfoFn X(*T, createWebAssemblyMCAsmInfo); + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createWebAssemblyMCInstrInfo); + + // Register the object streamer + TargetRegistry::RegisterELFStreamer(*T, createWebAssemblyMCStreamer); + // Register the MCInstPrinter. TargetRegistry::RegisterMCInstPrinter(*T, createWebAssemblyMCInstPrinter); + + // Register the MC code emitter + TargetRegistry::RegisterMCCodeEmitter(*T, createWebAssemblyMCCodeEmitter); + + // Register the ASM Backend + TargetRegistry::RegisterMCAsmBackend(*T, createWebAssemblyAsmBackend); } } diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index eebf5b72f62b..e78f73e3da95 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -16,7 +16,6 @@ #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H #include "llvm/Support/DataTypes.h" -#include namespace llvm { @@ -34,13 +33,21 @@ class StringRef; class Target; class Triple; class raw_ostream; +class raw_pwrite_stream; extern Target TheWebAssemblyTarget32; extern Target TheWebAssemblyTarget64; +MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx); + MCAsmBackend *createWebAssemblyAsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU); + const Triple &TT, StringRef CPU); + +MCObjectWriter *createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, uint8_t OSABI); } // end namespace llvm @@ -50,6 +57,11 @@ MCAsmBackend *createWebAssemblyAsmBackend(const Target &T, #define GET_REGINFO_ENUM #include "WebAssemblyGenRegisterInfo.inc" +// Defines symbolic names for the WebAssembly instructions. +// +#define GET_INSTRINFO_ENUM +#include "WebAssemblyGenInstrInfo.inc" + #define GET_SUBTARGETINFO_ENUM #include "WebAssemblyGenSubtargetInfo.inc" diff --git a/lib/Target/WebAssembly/Makefile b/lib/Target/WebAssembly/Makefile index f102d73f6e86..ccf63f0be554 100644 --- a/lib/Target/WebAssembly/Makefile +++ b/lib/Target/WebAssembly/Makefile @@ -12,8 +12,14 @@ LIBRARYNAME = LLVMWebAssemblyCodeGen TARGET = WebAssembly # Make sure that tblgen is run, first thing. -BUILT_SOURCES = WebAssemblyGenRegisterInfo.inc WebAssemblyGenSubtargetInfo.inc \ - WebAssemblyGenMCCodeEmitter.inc +BUILT_SOURCES = \ + WebAssemblyGenAsmWriter.inc \ + WebAssemblyGenDAGISel.inc \ + WebAssemblyGenFastISel.inc \ + WebAssemblyGenInstrInfo.inc \ + WebAssemblyGenMCCodeEmitter.inc \ + WebAssemblyGenRegisterInfo.inc \ + WebAssemblyGenSubtargetInfo.inc DIRS = InstPrinter TargetInfo MCTargetDesc diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt index 63e02c455895..b97ea454165c 100644 --- a/lib/Target/WebAssembly/README.txt +++ b/lib/Target/WebAssembly/README.txt @@ -12,6 +12,16 @@ binary encoding of WebAssembly itself: * https://github.com/WebAssembly/design/blob/master/AstSemantics.md * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md +The backend is built, tested and archived on the following waterfall: + https://build.chromium.org/p/client.wasm.llvm/console + +The backend's bringup is done using the GCC torture test suite first since it +doesn't require C library support. Current known failures are in +known_gcc_test_failures.txt, all other tests should pass. The waterfall will +turn red if not. Once most of these pass, further testing will use LLVM's own +test suite. The tests can be run locally using: + github.com/WebAssembly/experimental/blob/master/buildbot/torture_test.py + Interesting work that remains to be done: * Write a pass to restructurize irreducible control flow. This needs to be done before register allocation to be efficient, because it may duplicate basic @@ -19,8 +29,60 @@ Interesting work that remains to be done: level. Note that LLVM's GPU code has such a pass, but it linearizes control flow (e.g. both sides of branches execute and are masked) which is undesirable for WebAssembly. -* Basic relooper to expose control flow as an AST. -* Figure out how to properly use MC for virtual ISAs. This may require some - refactoring of MC. + +//===---------------------------------------------------------------------===// + +set_local instructions have a return value. We should (a) model this, +and (b) write optimizations which take advantage of it. Keep in mind that +many set_local instructions are implicit! + +//===---------------------------------------------------------------------===// + +Br, br_if, and tableswitch instructions can support having a value on the +expression stack across the jump (sometimes). We should (a) model this, and +(b) extend the stackifier to utilize it. + +//===---------------------------------------------------------------------===// + +The min/max operators aren't exactly a +#include +#include +#include +#include +#include + +#define DEBUG_TYPE "relooper" + +using namespace llvm; +using namespace Relooper; + +static cl::opt RelooperSplittingFactor( + "relooper-splitting-factor", + cl::desc( + "How much to discount code size when deciding whether to split a node"), + cl::init(5)); + +static cl::opt RelooperMultipleSwitchThreshold( + "relooper-multiple-switch-threshold", + cl::desc( + "How many entries to allow in a multiple before we use a switch"), + cl::init(10)); + +static cl::opt RelooperNestingLimit( + "relooper-nesting-limit", + cl::desc( + "How much nesting is acceptable"), + cl::init(20)); + + +namespace { +/// +/// Implements the relooper algorithm for a function's blocks. +/// +/// Implementation details: The Relooper instance has +/// ownership of the blocks and shapes, and frees them when done. +/// +struct RelooperAlgorithm { + std::deque Blocks; + std::deque Shapes; + Shape *Root; + bool MinSize; + int BlockIdCounter; + int ShapeIdCounter; + + RelooperAlgorithm(); + ~RelooperAlgorithm(); + + void AddBlock(Block *New, int Id = -1); + + // Calculates the shapes + void Calculate(Block *Entry); + + // Sets us to try to minimize size + void SetMinSize(bool MinSize_) { MinSize = MinSize_; } +}; + +struct RelooperAnalysis final : public FunctionPass { + static char ID; + RelooperAnalysis() : FunctionPass(ID) {} + const char *getPassName() const override { return "relooper"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + bool runOnFunction(Function &F) override; +}; +} + +// RelooperAnalysis + +char RelooperAnalysis::ID = 0; +FunctionPass *llvm::createWebAssemblyRelooper() { + return new RelooperAnalysis(); +} + +bool RelooperAnalysis::runOnFunction(Function &F) { + DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n"); + RelooperAlgorithm R; + // FIXME: remove duplication between relooper's and LLVM's BBs. + std::map BB2B; + std::map B2BB; + for (const BasicBlock &BB : F) { + // FIXME: getName is wrong here, Code is meant to represent amount of code. + // FIXME: use BranchVarInit for switch. + Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr); + R.AddBlock(B); + assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice"); + assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice"); + BB2B[&BB] = B; + B2BB[B] = &BB; + } + for (Block *B : R.Blocks) { + const BasicBlock *BB = B2BB[B]; + for (const BasicBlock *Successor : successors(BB)) + // FIXME: add branch's Condition and Code below. + B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr); + } + R.Calculate(BB2B[&F.getEntryBlock()]); + return false; // Analysis passes don't modify anything. +} + +// Helpers + +typedef MapVector BlockBlockSetMap; +typedef std::list BlockList; + +template +static bool contains(const T &container, const U &contained) { + return container.count(contained); +} + + +// Branch + +Branch::Branch(const char *ConditionInit, const char *CodeInit) + : Ancestor(nullptr), Labeled(true) { + // FIXME: move from char* to LLVM data structures + Condition = ConditionInit ? strdup(ConditionInit) : nullptr; + Code = CodeInit ? strdup(CodeInit) : nullptr; +} + +Branch::~Branch() { + // FIXME: move from char* to LLVM data structures + free(static_cast(const_cast(Condition))); + free(static_cast(const_cast(Code))); +} + +// Block + +Block::Block(const char *CodeInit, const char *BranchVarInit) + : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) { + // FIXME: move from char* to LLVM data structures + Code = strdup(CodeInit); + BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr; +} + +Block::~Block() { + // FIXME: move from char* to LLVM data structures + free(static_cast(const_cast(Code))); + free(static_cast(const_cast(BranchVar))); +} + +void Block::AddBranchTo(Block *Target, const char *Condition, + const char *Code) { + assert(!contains(BranchesOut, Target) && + "cannot add more than one branch to the same target"); + BranchesOut[Target] = make_unique(Condition, Code); +} + +// Relooper + +RelooperAlgorithm::RelooperAlgorithm() + : Root(nullptr), MinSize(false), BlockIdCounter(1), + ShapeIdCounter(0) { // block ID 0 is reserved for clearings +} + +RelooperAlgorithm::~RelooperAlgorithm() { + for (auto Curr : Blocks) + delete Curr; + for (auto Curr : Shapes) + delete Curr; +} + +void RelooperAlgorithm::AddBlock(Block *New, int Id) { + New->Id = Id == -1 ? BlockIdCounter++ : Id; + Blocks.push_back(New); +} + +struct RelooperRecursor { + RelooperAlgorithm *Parent; + RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {} +}; + +void RelooperAlgorithm::Calculate(Block *Entry) { + // Scan and optimize the input + struct PreOptimizer : public RelooperRecursor { + PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {} + BlockSet Live; + + void FindLive(Block *Root) { + BlockList ToInvestigate; + ToInvestigate.push_back(Root); + while (!ToInvestigate.empty()) { + Block *Curr = ToInvestigate.front(); + ToInvestigate.pop_front(); + if (contains(Live, Curr)) + continue; + Live.insert(Curr); + for (const auto &iter : Curr->BranchesOut) + ToInvestigate.push_back(iter.first); + } + } + + // If a block has multiple entries but no exits, and it is small enough, it + // is useful to split it. A common example is a C++ function where + // everything ends up at a final exit block and does some RAII cleanup. + // Without splitting, we will be forced to introduce labelled loops to + // allow reaching the final block + void SplitDeadEnds() { + unsigned TotalCodeSize = 0; + for (const auto &Curr : Live) { + TotalCodeSize += strlen(Curr->Code); + } + BlockSet Splits; + BlockSet Removed; + for (const auto &Original : Live) { + if (Original->BranchesIn.size() <= 1 || + !Original->BranchesOut.empty()) + continue; // only dead ends, for now + if (contains(Original->BranchesOut, Original)) + continue; // cannot split a looping node + if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) > + TotalCodeSize / RelooperSplittingFactor) + continue; // if splitting increases raw code size by a significant + // amount, abort + // Split the node (for simplicity, we replace all the blocks, even + // though we could have reused the original) + DEBUG(dbgs() << " Splitting '" << Original->Code << "'\n"); + for (const auto &Prior : Original->BranchesIn) { + Block *Split = new Block(Original->Code, Original->BranchVar); + Parent->AddBlock(Split, Original->Id); + Split->BranchesIn.insert(Prior); + std::unique_ptr Details; + Details.swap(Prior->BranchesOut[Original]); + Prior->BranchesOut[Split] = make_unique(Details->Condition, + Details->Code); + for (const auto &iter : Original->BranchesOut) { + Block *Post = iter.first; + Branch *Details = iter.second.get(); + Split->BranchesOut[Post] = make_unique(Details->Condition, + Details->Code); + Post->BranchesIn.insert(Split); + } + Splits.insert(Split); + Removed.insert(Original); + } + for (const auto &iter : Original->BranchesOut) { + Block *Post = iter.first; + Post->BranchesIn.remove(Original); + } + } + for (const auto &iter : Splits) + Live.insert(iter); + for (const auto &iter : Removed) + Live.remove(iter); + } + }; + PreOptimizer Pre(this); + Pre.FindLive(Entry); + + // Add incoming branches from live blocks, ignoring dead code + for (unsigned i = 0; i < Blocks.size(); i++) { + Block *Curr = Blocks[i]; + if (!contains(Pre.Live, Curr)) + continue; + for (const auto &iter : Curr->BranchesOut) + iter.first->BranchesIn.insert(Curr); + } + + if (!MinSize) + Pre.SplitDeadEnds(); + + // Recursively process the graph + + struct Analyzer : public RelooperRecursor { + Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {} + + // Add a shape to the list of shapes in this Relooper calculation + void Notice(Shape *New) { + New->Id = Parent->ShapeIdCounter++; + Parent->Shapes.push_back(New); + } + + // Create a list of entries from a block. If LimitTo is provided, only + // results in that set will appear + void GetBlocksOut(Block *Source, BlockSet &Entries, + BlockSet *LimitTo = nullptr) { + for (const auto &iter : Source->BranchesOut) + if (!LimitTo || contains(*LimitTo, iter.first)) + Entries.insert(iter.first); + } + + // Converts/processes all branchings to a specific target + void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor, + BlockSet &From) { + DEBUG(dbgs() << " Solipsize '" << Target->Code << "' type " << Type + << "\n"); + for (auto iter = Target->BranchesIn.begin(); + iter != Target->BranchesIn.end();) { + Block *Prior = *iter; + if (!contains(From, Prior)) { + iter++; + continue; + } + std::unique_ptr PriorOut; + PriorOut.swap(Prior->BranchesOut[Target]); + PriorOut->Ancestor = Ancestor; + PriorOut->Type = Type; + if (MultipleShape *Multiple = dyn_cast(Ancestor)) + Multiple->Breaks++; // We are breaking out of this Multiple, so need a + // loop + iter++; // carefully increment iter before erasing + Target->BranchesIn.remove(Prior); + Target->ProcessedBranchesIn.insert(Prior); + Prior->ProcessedBranchesOut[Target].swap(PriorOut); + } + } + + Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) { + DEBUG(dbgs() << " MakeSimple inner block '" << Inner->Code << "'\n"); + SimpleShape *Simple = new SimpleShape; + Notice(Simple); + Simple->Inner = Inner; + Inner->Parent = Simple; + if (Blocks.size() > 1) { + Blocks.remove(Inner); + GetBlocksOut(Inner, NextEntries, &Blocks); + BlockSet JustInner; + JustInner.insert(Inner); + for (const auto &iter : NextEntries) + Solipsize(iter, Branch::Direct, Simple, JustInner); + } + return Simple; + } + + Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries, + BlockSet &NextEntries) { + // Find the inner blocks in this loop. Proceed backwards from the entries + // until + // you reach a seen block, collecting as you go. + BlockSet InnerBlocks; + BlockSet Queue = Entries; + while (!Queue.empty()) { + Block *Curr = *(Queue.begin()); + Queue.remove(*Queue.begin()); + if (!contains(InnerBlocks, Curr)) { + // This element is new, mark it as inner and remove from outer + InnerBlocks.insert(Curr); + Blocks.remove(Curr); + // Add the elements prior to it + for (const auto &iter : Curr->BranchesIn) + Queue.insert(iter); + } + } + assert(!InnerBlocks.empty()); + + for (const auto &Curr : InnerBlocks) { + for (const auto &iter : Curr->BranchesOut) { + Block *Possible = iter.first; + if (!contains(InnerBlocks, Possible)) + NextEntries.insert(Possible); + } + } + + LoopShape *Loop = new LoopShape(); + Notice(Loop); + + // Solipsize the loop, replacing with break/continue and marking branches + // as Processed (will not affect later calculations) + // A. Branches to the loop entries become a continue to this shape + for (const auto &iter : Entries) + Solipsize(iter, Branch::Continue, Loop, InnerBlocks); + // B. Branches to outside the loop (a next entry) become breaks on this + // shape + for (const auto &iter : NextEntries) + Solipsize(iter, Branch::Break, Loop, InnerBlocks); + // Finish up + Shape *Inner = Process(InnerBlocks, Entries, nullptr); + Loop->Inner = Inner; + return Loop; + } + + // For each entry, find the independent group reachable by it. The + // independent group is the entry itself, plus all the blocks it can + // reach that cannot be directly reached by another entry. Note that we + // ignore directly reaching the entry itself by another entry. + // @param Ignore - previous blocks that are irrelevant + void FindIndependentGroups(BlockSet &Entries, + BlockBlockSetMap &IndependentGroups, + BlockSet *Ignore = nullptr) { + typedef std::map BlockBlockMap; + + struct HelperClass { + BlockBlockSetMap &IndependentGroups; + BlockBlockMap Ownership; // For each block, which entry it belongs to. + // We have reached it from there. + + HelperClass(BlockBlockSetMap &IndependentGroupsInit) + : IndependentGroups(IndependentGroupsInit) {} + void InvalidateWithChildren(Block *New) { + // Being in the list means you need to be invalidated + BlockList ToInvalidate; + ToInvalidate.push_back(New); + while (!ToInvalidate.empty()) { + Block *Invalidatee = ToInvalidate.front(); + ToInvalidate.pop_front(); + Block *Owner = Ownership[Invalidatee]; + // Owner may have been invalidated, do not add to + // IndependentGroups! + if (contains(IndependentGroups, Owner)) + IndependentGroups[Owner].remove(Invalidatee); + if (Ownership[Invalidatee]) { // may have been seen before and + // invalidated already + Ownership[Invalidatee] = nullptr; + for (const auto &iter : Invalidatee->BranchesOut) { + Block *Target = iter.first; + BlockBlockMap::iterator Known = Ownership.find(Target); + if (Known != Ownership.end()) { + Block *TargetOwner = Known->second; + if (TargetOwner) + ToInvalidate.push_back(Target); + } + } + } + } + } + }; + HelperClass Helper(IndependentGroups); + + // We flow out from each of the entries, simultaneously. + // When we reach a new block, we add it as belonging to the one we got to + // it from. + // If we reach a new block that is already marked as belonging to someone, + // it is reachable by two entries and is not valid for any of them. + // Remove it and all it can reach that have been visited. + + // Being in the queue means we just added this item, and + // we need to add its children + BlockList Queue; + for (const auto &Entry : Entries) { + Helper.Ownership[Entry] = Entry; + IndependentGroups[Entry].insert(Entry); + Queue.push_back(Entry); + } + while (!Queue.empty()) { + Block *Curr = Queue.front(); + Queue.pop_front(); + Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership + // map if we are in the queue + if (!Owner) + continue; // we have been invalidated meanwhile after being reached + // from two entries + // Add all children + for (const auto &iter : Curr->BranchesOut) { + Block *New = iter.first; + BlockBlockMap::iterator Known = Helper.Ownership.find(New); + if (Known == Helper.Ownership.end()) { + // New node. Add it, and put it in the queue + Helper.Ownership[New] = Owner; + IndependentGroups[Owner].insert(New); + Queue.push_back(New); + continue; + } + Block *NewOwner = Known->second; + if (!NewOwner) + continue; // We reached an invalidated node + if (NewOwner != Owner) + // Invalidate this and all reachable that we have seen - we reached + // this from two locations + Helper.InvalidateWithChildren(New); + // otherwise, we have the same owner, so do nothing + } + } + + // Having processed all the interesting blocks, we remain with just one + // potential issue: + // If a->b, and a was invalidated, but then b was later reached by + // someone else, we must invalidate b. To check for this, we go over all + // elements in the independent groups, if an element has a parent which + // does *not* have the same owner, we/ must remove it and all its + // children. + + for (const auto &iter : Entries) { + BlockSet &CurrGroup = IndependentGroups[iter]; + BlockList ToInvalidate; + for (const auto &iter : CurrGroup) { + Block *Child = iter; + for (const auto &iter : Child->BranchesIn) { + Block *Parent = iter; + if (Ignore && contains(*Ignore, Parent)) + continue; + if (Helper.Ownership[Parent] != Helper.Ownership[Child]) + ToInvalidate.push_back(Child); + } + } + while (!ToInvalidate.empty()) { + Block *Invalidatee = ToInvalidate.front(); + ToInvalidate.pop_front(); + Helper.InvalidateWithChildren(Invalidatee); + } + } + + // Remove empty groups + for (const auto &iter : Entries) + if (IndependentGroups[iter].empty()) + IndependentGroups.erase(iter); + } + + Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries, + BlockBlockSetMap &IndependentGroups, Shape *Prev, + BlockSet &NextEntries) { + bool Fused = isa(Prev); + MultipleShape *Multiple = new MultipleShape(); + Notice(Multiple); + BlockSet CurrEntries; + for (auto &iter : IndependentGroups) { + Block *CurrEntry = iter.first; + BlockSet &CurrBlocks = iter.second; + // Create inner block + CurrEntries.clear(); + CurrEntries.insert(CurrEntry); + for (const auto &CurrInner : CurrBlocks) { + // Remove the block from the remaining blocks + Blocks.remove(CurrInner); + // Find new next entries and fix branches to them + for (auto iter = CurrInner->BranchesOut.begin(); + iter != CurrInner->BranchesOut.end();) { + Block *CurrTarget = iter->first; + auto Next = iter; + Next++; + if (!contains(CurrBlocks, CurrTarget)) { + NextEntries.insert(CurrTarget); + Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks); + } + iter = Next; // increment carefully because Solipsize can remove us + } + } + Multiple->InnerMap[CurrEntry->Id] = + Process(CurrBlocks, CurrEntries, nullptr); + // If we are not fused, then our entries will actually be checked + if (!Fused) + CurrEntry->IsCheckedMultipleEntry = true; + } + // Add entries not handled as next entries, they are deferred + for (const auto &Entry : Entries) + if (!contains(IndependentGroups, Entry)) + NextEntries.insert(Entry); + // The multiple has been created, we can decide how to implement it + if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) { + Multiple->UseSwitch = true; + Multiple->Breaks++; // switch captures breaks + } + return Multiple; + } + + // Main function. + // Process a set of blocks with specified entries, returns a shape + // The Make* functions receive a NextEntries. If they fill it with data, + // those are the entries for the ->Next block on them, and the blocks + // are what remains in Blocks (which Make* modify). In this way + // we avoid recursing on Next (imagine a long chain of Simples, if we + // recursed we could blow the stack). + Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) { + BlockSet *Entries = &InitialEntries; + BlockSet TempEntries[2]; + int CurrTempIndex = 0; + BlockSet *NextEntries; + Shape *Ret = nullptr; + + auto Make = [&](Shape *Temp) { + if (Prev) + Prev->Next = Temp; + if (!Ret) + Ret = Temp; + Prev = Temp; + Entries = NextEntries; + }; + + while (1) { + CurrTempIndex = 1 - CurrTempIndex; + NextEntries = &TempEntries[CurrTempIndex]; + NextEntries->clear(); + + if (Entries->empty()) + return Ret; + if (Entries->size() == 1) { + Block *Curr = *(Entries->begin()); + if (Curr->BranchesIn.empty()) { + // One entry, no looping ==> Simple + Make(MakeSimple(Blocks, Curr, *NextEntries)); + if (NextEntries->empty()) + return Ret; + continue; + } + // One entry, looping ==> Loop + Make(MakeLoop(Blocks, *Entries, *NextEntries)); + if (NextEntries->empty()) + return Ret; + continue; + } + + // More than one entry, try to eliminate through a Multiple groups of + // independent blocks from an entry/ies. It is important to remove + // through multiples as opposed to looping since the former is more + // performant. + BlockBlockSetMap IndependentGroups; + FindIndependentGroups(*Entries, IndependentGroups); + + if (!IndependentGroups.empty()) { + // We can handle a group in a multiple if its entry cannot be reached + // by another group. + // Note that it might be reachable by itself - a loop. But that is + // fine, we will create a loop inside the multiple block (which + // is the performant order to do it). + for (auto iter = IndependentGroups.begin(); + iter != IndependentGroups.end();) { + Block *Entry = iter->first; + BlockSet &Group = iter->second; + auto curr = iter++; // iterate carefully, we may delete + for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin(); + iterBranch != Entry->BranchesIn.end(); iterBranch++) { + Block *Origin = *iterBranch; + if (!contains(Group, Origin)) { + // Reached from outside the group, so we cannot handle this + IndependentGroups.erase(curr); + break; + } + } + } + + // As an optimization, if we have 2 independent groups, and one is a + // small dead end, we can handle only that dead end. + // The other then becomes a Next - without nesting in the code and + // recursion in the analysis. + // TODO: if the larger is the only dead end, handle that too + // TODO: handle >2 groups + // TODO: handle not just dead ends, but also that do not branch to the + // NextEntries. However, must be careful there since we create a + // Next, and that Next can prevent eliminating a break (since we no + // longer naturally reach the same place), which may necessitate a + // one-time loop, which makes the unnesting pointless. + if (IndependentGroups.size() == 2) { + // Find the smaller one + auto iter = IndependentGroups.begin(); + Block *SmallEntry = iter->first; + auto SmallSize = iter->second.size(); + iter++; + Block *LargeEntry = iter->first; + auto LargeSize = iter->second.size(); + if (SmallSize != LargeSize) { // ignore the case where they are + // identical - keep things symmetrical + // there + if (SmallSize > LargeSize) { + Block *Temp = SmallEntry; + SmallEntry = LargeEntry; + LargeEntry = Temp; // Note: we did not flip the Sizes too, they + // are now invalid. TODO: use the smaller + // size as a limit? + } + // Check if dead end + bool DeadEnd = true; + BlockSet &SmallGroup = IndependentGroups[SmallEntry]; + for (const auto &Curr : SmallGroup) { + for (const auto &iter : Curr->BranchesOut) { + Block *Target = iter.first; + if (!contains(SmallGroup, Target)) { + DeadEnd = false; + break; + } + } + if (!DeadEnd) + break; + } + if (DeadEnd) + IndependentGroups.erase(LargeEntry); + } + } + + if (!IndependentGroups.empty()) + // Some groups removable ==> Multiple + Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev, + *NextEntries)); + if (NextEntries->empty()) + return Ret; + continue; + } + // No independent groups, must be loopable ==> Loop + Make(MakeLoop(Blocks, *Entries, *NextEntries)); + if (NextEntries->empty()) + return Ret; + continue; + } + } + }; + + // Main + + BlockSet AllBlocks; + for (const auto &Curr : Pre.Live) { + AllBlocks.insert(Curr); + } + + BlockSet Entries; + Entries.insert(Entry); + Root = Analyzer(this).Process(AllBlocks, Entries, nullptr); + assert(Root); + + /// + /// Relooper post-optimizer + /// + struct PostOptimizer { + RelooperAlgorithm *Parent; + std::stack LoopStack; + + PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {} + + void ShapeSwitch(Shape* var, + std::function simple, + std::function multiple, + std::function loop) { + switch (var->getKind()) { + case Shape::SK_Simple: { + simple(cast(var)); + break; + } + case Shape::SK_Multiple: { + multiple(cast(var)); + break; + } + case Shape::SK_Loop: { + loop(cast(var)); + break; + } + } + } + + // Find the blocks that natural control flow can get us directly to, or + // through a multiple that we ignore + void FollowNaturalFlow(Shape *S, BlockSet &Out) { + ShapeSwitch(S, [&](SimpleShape* Simple) { + Out.insert(Simple->Inner); + }, [&](MultipleShape* Multiple) { + for (const auto &iter : Multiple->InnerMap) { + FollowNaturalFlow(iter.second, Out); + } + FollowNaturalFlow(Multiple->Next, Out); + }, [&](LoopShape* Loop) { + FollowNaturalFlow(Loop->Inner, Out); + }); + } + + void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) { + if (Root->Next) { + Root->Natural = Root->Next; + FindNaturals(Root->Next, Otherwise); + } else { + Root->Natural = Otherwise; + } + + ShapeSwitch(Root, [](SimpleShape* Simple) { + }, [&](MultipleShape* Multiple) { + for (const auto &iter : Multiple->InnerMap) { + FindNaturals(iter.second, Root->Natural); + } + }, [&](LoopShape* Loop){ + FindNaturals(Loop->Inner, Loop->Inner); + }); + } + + // Remove unneeded breaks and continues. + // A flow operation is trivially unneeded if the shape we naturally get to + // by normal code execution is the same as the flow forces us to. + void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr, + LoopShape *LastLoop = nullptr, + unsigned Depth = 0) { + BlockSet NaturalBlocks; + FollowNaturalFlow(Natural, NaturalBlocks); + Shape *Next = Root; + while (Next) { + Root = Next; + Next = nullptr; + ShapeSwitch( + Root, + [&](SimpleShape* Simple) { + if (Simple->Inner->BranchVar) + LastLoop = + nullptr; // a switch clears out the loop (TODO: only for + // breaks, not continue) + + if (Simple->Next) { + if (!Simple->Inner->BranchVar && + Simple->Inner->ProcessedBranchesOut.size() == 2 && + Depth < RelooperNestingLimit) { + // If there is a next block, we already know at Simple + // creation time to make direct branches, and we can do + // nothing more in general. But, we try to optimize the + // case of a break and a direct: This would normally be + // if (break?) { break; } .. + // but if we make sure to nest the else, we can save the + // break, + // if (!break?) { .. } + // This is also better because the more canonical nested + // form is easier to further optimize later. The + // downside is more nesting, which adds to size in builds with + // whitespace. + // Note that we avoid switches, as it complicates control flow + // and is not relevant for the common case we optimize here. + bool Found = false; + bool Abort = false; + for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { + Block *Target = iter.first; + Branch *Details = iter.second.get(); + if (Details->Type == Branch::Break) { + Found = true; + if (!contains(NaturalBlocks, Target)) + Abort = true; + } else if (Details->Type != Branch::Direct) + Abort = true; + } + if (Found && !Abort) { + for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { + Branch *Details = iter.second.get(); + if (Details->Type == Branch::Break) { + Details->Type = Branch::Direct; + if (MultipleShape *Multiple = + dyn_cast(Details->Ancestor)) + Multiple->Breaks--; + } else { + assert(Details->Type == Branch::Direct); + Details->Type = Branch::Nested; + } + } + } + Depth++; // this optimization increases depth, for us and all + // our next chain (i.e., until this call returns) + } + Next = Simple->Next; + } else { + // If there is no next then Natural is where we will + // go to by doing nothing, so we can potentially optimize some + // branches to direct. + for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { + Block *Target = iter.first; + Branch *Details = iter.second.get(); + if (Details->Type != Branch::Direct && + contains(NaturalBlocks, + Target)) { // note: cannot handle split blocks + Details->Type = Branch::Direct; + if (MultipleShape *Multiple = + dyn_cast(Details->Ancestor)) + Multiple->Breaks--; + } else if (Details->Type == Branch::Break && LastLoop && + LastLoop->Natural == Details->Ancestor->Natural) { + // it is important to simplify breaks, as simpler breaks + // enable other optimizations + Details->Labeled = false; + if (MultipleShape *Multiple = + dyn_cast(Details->Ancestor)) + Multiple->Breaks--; + } + } + } + }, [&](MultipleShape* Multiple) + { + for (const auto &iter : Multiple->InnerMap) { + RemoveUnneededFlows(iter.second, Multiple->Next, + Multiple->Breaks ? nullptr : LastLoop, + Depth + 1); + } + Next = Multiple->Next; + }, [&](LoopShape* Loop) + { + RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1); + Next = Loop->Next; + }); + } + } + + // After we know which loops exist, we can calculate which need to be + // labeled + void FindLabeledLoops(Shape *Root) { + Shape *Next = Root; + while (Next) { + Root = Next; + Next = nullptr; + + ShapeSwitch( + Root, + [&](SimpleShape *Simple) { + MultipleShape *Fused = dyn_cast(Root->Next); + // If we are fusing a Multiple with a loop into this Simple, then + // visit it now + if (Fused && Fused->Breaks) + LoopStack.push(Fused); + if (Simple->Inner->BranchVar) + LoopStack.push(nullptr); // a switch means breaks are now useless, + // push a dummy + if (Fused) { + if (Fused->UseSwitch) + LoopStack.push(nullptr); // a switch means breaks are now + // useless, push a dummy + for (const auto &iter : Fused->InnerMap) { + FindLabeledLoops(iter.second); + } + } + for (const auto &iter : Simple->Inner->ProcessedBranchesOut) { + Branch *Details = iter.second.get(); + if (Details->Type == Branch::Break || + Details->Type == Branch::Continue) { + assert(!LoopStack.empty()); + if (Details->Ancestor != LoopStack.top() && Details->Labeled) { + if (MultipleShape *Multiple = + dyn_cast(Details->Ancestor)) { + Multiple->Labeled = true; + } else { + LoopShape *Loop = cast(Details->Ancestor); + Loop->Labeled = true; + } + } else { + Details->Labeled = false; + } + } + if (Fused && Fused->UseSwitch) + LoopStack.pop(); + if (Simple->Inner->BranchVar) + LoopStack.pop(); + if (Fused && Fused->Breaks) + LoopStack.pop(); + if (Fused) + Next = Fused->Next; + else + Next = Root->Next; + } + } + , [&](MultipleShape* Multiple) { + if (Multiple->Breaks) + LoopStack.push(Multiple); + for (const auto &iter : Multiple->InnerMap) + FindLabeledLoops(iter.second); + if (Multiple->Breaks) + LoopStack.pop(); + Next = Root->Next; + } + , [&](LoopShape* Loop) { + LoopStack.push(Loop); + FindLabeledLoops(Loop->Inner); + LoopStack.pop(); + Next = Root->Next; + }); + } + } + + void Process(Shape * Root) { + FindNaturals(Root); + RemoveUnneededFlows(Root); + FindLabeledLoops(Root); + } + }; + + PostOptimizer(this).Process(Root); +} diff --git a/lib/Target/WebAssembly/Relooper.h b/lib/Target/WebAssembly/Relooper.h new file mode 100644 index 000000000000..7c564de82f34 --- /dev/null +++ b/lib/Target/WebAssembly/Relooper.h @@ -0,0 +1,186 @@ +//===-- Relooper.h - Top-level interface for WebAssembly ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===-------------------------------------------------------------------===// +/// +/// \file +/// \brief This defines an optimized C++ implemention of the Relooper +/// algorithm, originally developed as part of Emscripten, which +/// generates a structured AST from arbitrary control flow. +/// +//===-------------------------------------------------------------------===// + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Support/Casting.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace llvm { + +namespace Relooper { + +struct Block; +struct Shape; + +/// +/// Info about a branching from one block to another +/// +struct Branch { + enum FlowType { + Direct = 0, // We will directly reach the right location through other + // means, no need for continue or break + Break = 1, + Continue = 2, + Nested = 3 // This code is directly reached, but we must be careful to + // ensure it is nested in an if - it is not reached + // unconditionally, other code paths exist alongside it that we need to make + // sure do not intertwine + }; + Shape + *Ancestor; // If not nullptr, this shape is the relevant one for purposes + // of getting to the target block. We break or continue on it + Branch::FlowType + Type; // If Ancestor is not nullptr, this says whether to break or + // continue + bool Labeled; // If a break or continue, whether we need to use a label + const char *Condition; // The condition for which we branch. For example, + // "my_var == 1". Conditions are checked one by one. + // One of the conditions should have nullptr as the + // condition, in which case it is the default + // FIXME: move from char* to LLVM data structures + const char *Code; // If provided, code that is run right before the branch is + // taken. This is useful for phis + // FIXME: move from char* to LLVM data structures + + Branch(const char *ConditionInit, const char *CodeInit = nullptr); + ~Branch(); +}; + +typedef SetVector BlockSet; +typedef MapVector BlockBranchMap; +typedef MapVector> OwningBlockBranchMap; + +/// +/// Represents a basic block of code - some instructions that end with a +/// control flow modifier (a branch, return or throw). +/// +struct Block { + // Branches become processed after we finish the shape relevant to them. For + // example, when we recreate a loop, branches to the loop start become + // continues and are now processed. When we calculate what shape to generate + // from a set of blocks, we ignore processed branches. Blocks own the Branch + // objects they use, and destroy them when done. + OwningBlockBranchMap BranchesOut; + BlockSet BranchesIn; + OwningBlockBranchMap ProcessedBranchesOut; + BlockSet ProcessedBranchesIn; + Shape *Parent; // The shape we are directly inside + int Id; // A unique identifier, defined when added to relooper. Note that this + // uniquely identifies a *logical* block - if we split it, the two + // instances have the same content *and* the same Id + const char *Code; // The string representation of the code in this block. + // Owning pointer (we copy the input) + // FIXME: move from char* to LLVM data structures + const char *BranchVar; // A variable whose value determines where we go; if + // this is not nullptr, emit a switch on that variable + // FIXME: move from char* to LLVM data structures + bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching + // us requires setting the label variable + + Block(const char *CodeInit, const char *BranchVarInit); + ~Block(); + + void AddBranchTo(Block *Target, const char *Condition, + const char *Code = nullptr); +}; + +/// +/// Represents a structured control flow shape +/// +struct Shape { + int Id; // A unique identifier. Used to identify loops, labels are Lx where x + // is the Id. Defined when added to relooper + Shape *Next; // The shape that will appear in the code right after this one + Shape *Natural; // The shape that control flow gets to naturally (if there is + // Next, then this is Next) + + /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.) + enum ShapeKind { SK_Simple, SK_Multiple, SK_Loop }; + +private: + ShapeKind Kind; + +public: + ShapeKind getKind() const { return Kind; } + + Shape(ShapeKind KindInit) : Id(-1), Next(nullptr), Kind(KindInit) {} +}; + +/// +/// Simple: No control flow at all, just instructions. +/// +struct SimpleShape : public Shape { + Block *Inner; + + SimpleShape() : Shape(SK_Simple), Inner(nullptr) {} + + static bool classof(const Shape *S) { return S->getKind() == SK_Simple; } +}; + +/// +/// A shape that may be implemented with a labeled loop. +/// +struct LabeledShape : public Shape { + bool Labeled; // If we have a loop, whether it needs to be labeled + + LabeledShape(ShapeKind KindInit) : Shape(KindInit), Labeled(false) {} +}; + +// Blocks with the same id were split and are identical, so we just care about +// ids in Multiple entries +typedef std::map IdShapeMap; + +/// +/// Multiple: A shape with more than one entry. If the next block to +/// be entered is among them, we run it and continue to +/// the next shape, otherwise we continue immediately to the +/// next shape. +/// +struct MultipleShape : public LabeledShape { + IdShapeMap InnerMap; // entry block ID -> shape + int Breaks; // If we have branches on us, we need a loop (or a switch). This + // is a counter of requirements, + // if we optimize it to 0, the loop is unneeded + bool UseSwitch; // Whether to switch on label as opposed to an if-else chain + + MultipleShape() : LabeledShape(SK_Multiple), Breaks(0), UseSwitch(false) {} + + static bool classof(const Shape *S) { return S->getKind() == SK_Multiple; } +}; + +/// +/// Loop: An infinite loop. +/// +struct LoopShape : public LabeledShape { + Shape *Inner; + + LoopShape() : LabeledShape(SK_Loop), Inner(nullptr) {} + + static bool classof(const Shape *S) { return S->getKind() == SK_Loop; } +}; + +} // namespace Relooper + +} // namespace llvm diff --git a/lib/Target/WebAssembly/WebAssembly.h b/lib/Target/WebAssembly/WebAssembly.h index 3ff19d46f437..e972da5af74f 100644 --- a/lib/Target/WebAssembly/WebAssembly.h +++ b/lib/Target/WebAssembly/WebAssembly.h @@ -23,8 +23,22 @@ namespace llvm { class WebAssemblyTargetMachine; class FunctionPass; +FunctionPass *createWebAssemblyOptimizeReturned(); + FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM, CodeGenOpt::Level OptLevel); +FunctionPass *createWebAssemblyArgumentMove(); + +FunctionPass *createWebAssemblyStoreResults(); +FunctionPass *createWebAssemblyRegStackify(); +FunctionPass *createWebAssemblyRegColoring(); +FunctionPass *createWebAssemblyPEI(); +FunctionPass *createWebAssemblyCFGStackify(); +FunctionPass *createWebAssemblyLowerBrUnless(); +FunctionPass *createWebAssemblyRegNumbering(); +FunctionPass *createWebAssemblyPeephole(); + +FunctionPass *createWebAssemblyRelooper(); } // end namespace llvm diff --git a/lib/Target/WebAssembly/WebAssembly.td b/lib/Target/WebAssembly/WebAssembly.td index a123bf6f66b6..551ad9345154 100644 --- a/lib/Target/WebAssembly/WebAssembly.td +++ b/lib/Target/WebAssembly/WebAssembly.td @@ -6,10 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This is a target description file for the WebAssembly architecture, which is -// also known as "wasm". -// +/// +/// \file +/// \brief This is a target description file for the WebAssembly architecture, +/// which is also known as "wasm". +/// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -50,6 +51,9 @@ def WebAssemblyInstrInfo : InstrInfo; // Minimal Viable Product. def : ProcessorModel<"mvp", NoSchedModel, []>; +// Generic processor: latest stable version. +def : ProcessorModel<"generic", NoSchedModel, []>; + // Latest and greatest experimental version of WebAssembly. Bugs included! def : ProcessorModel<"bleeding-edge", NoSchedModel, [FeatureSIMD128]>; diff --git a/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp b/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp new file mode 100644 index 000000000000..3893c408cf63 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp @@ -0,0 +1,110 @@ +//===-- WebAssemblyArgumentMove.cpp - Argument instruction moving ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file moves ARGUMENT instructions after ScheduleDAG scheduling. +/// +/// Arguments are really live-in registers, however, since we use virtual +/// registers and LLVM doesn't support live-in virtual registers, we're +/// currently making do with ARGUMENT instructions which are placed at the top +/// of the entry block. The trick is to get them to *stay* at the top of the +/// entry block. +/// +/// The ARGUMENTS physical register keeps these instructions pinned in place +/// during liveness-aware CodeGen passes, however one thing which does not +/// respect this is the ScheduleDAG scheduler. This pass is therefore run +/// immediately after that. +/// +/// This is all hopefully a temporary solution until we find a better solution +/// for describing the live-in nature of arguments. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-argument-move" + +namespace { +class WebAssemblyArgumentMove final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyArgumentMove() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { + return "WebAssembly Argument Move"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreserved(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // end anonymous namespace + +char WebAssemblyArgumentMove::ID = 0; +FunctionPass *llvm::createWebAssemblyArgumentMove() { + return new WebAssemblyArgumentMove(); +} + +/// Test whether the given instruction is an ARGUMENT. +static bool IsArgument(const MachineInstr *MI) { + switch (MI->getOpcode()) { + case WebAssembly::ARGUMENT_I32: + case WebAssembly::ARGUMENT_I64: + case WebAssembly::ARGUMENT_F32: + case WebAssembly::ARGUMENT_F64: + return true; + default: + return false; + } +} + +bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Argument Move **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + bool Changed = false; + MachineBasicBlock &EntryMBB = MF.front(); + MachineBasicBlock::iterator InsertPt = EntryMBB.end(); + + // Look for the first NonArg instruction. + for (auto MII = EntryMBB.begin(), MIE = EntryMBB.end(); MII != MIE; ++MII) { + MachineInstr *MI = MII; + if (!IsArgument(MI)) { + InsertPt = MII; + break; + } + } + + // Now move any argument instructions later in the block + // to before our first NonArg instruction. + for (auto I = InsertPt, E = EntryMBB.end(); I != E; ++I) { + MachineInstr *MI = I; + if (IsArgument(MI)) { + EntryMBB.insert(InsertPt, MI->removeFromParent()); + Changed = true; + } + } + + return Changed; +} diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp new file mode 100644 index 000000000000..0d2b4d9debb9 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -0,0 +1,285 @@ +//===-- WebAssemblyAsmPrinter.cpp - WebAssembly LLVM assembly writer ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains a printer that converts from our internal +/// representation of machine-dependent LLVM code to the WebAssembly assembly +/// language. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "InstPrinter/WebAssemblyInstPrinter.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMCInstLower.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblyRegisterInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +namespace { + +class WebAssemblyAsmPrinter final : public AsmPrinter { + const MachineRegisterInfo *MRI; + const WebAssemblyFunctionInfo *MFI; + +public: + WebAssemblyAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)), MRI(nullptr), MFI(nullptr) {} + +private: + const char *getPassName() const override { + return "WebAssembly Assembly Printer"; + } + + //===------------------------------------------------------------------===// + // MachineFunctionPass Implementation. + //===------------------------------------------------------------------===// + + bool runOnMachineFunction(MachineFunction &MF) override { + MRI = &MF.getRegInfo(); + MFI = MF.getInfo(); + return AsmPrinter::runOnMachineFunction(MF); + } + + //===------------------------------------------------------------------===// + // AsmPrinter Implementation. + //===------------------------------------------------------------------===// + + void EmitJumpTableInfo() override; + void EmitConstantPool() override; + void EmitFunctionBodyStart() override; + void EmitInstruction(const MachineInstr *MI) override; + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS) override; + + MVT getRegType(unsigned RegNo) const; + const char *toString(MVT VT) const; + std::string regToString(const MachineOperand &MO); +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Helpers. +//===----------------------------------------------------------------------===// + +MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const { + const TargetRegisterClass *TRC = + TargetRegisterInfo::isVirtualRegister(RegNo) ? + MRI->getRegClass(RegNo) : + MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo); + for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) + if (TRC->hasType(T)) + return T; + DEBUG(errs() << "Unknown type for register number: " << RegNo); + llvm_unreachable("Unknown register type"); + return MVT::Other; +} + +std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) { + unsigned RegNo = MO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(RegNo) && + "Unlowered physical register encountered during assembly printing"); + assert(!MFI->isVRegStackified(RegNo)); + unsigned WAReg = MFI->getWAReg(RegNo); + assert(WAReg != WebAssemblyFunctionInfo::UnusedReg); + return '$' + utostr(WAReg); +} + +const char *WebAssemblyAsmPrinter::toString(MVT VT) const { + return WebAssembly::TypeToString(VT); +} + +//===----------------------------------------------------------------------===// +// WebAssemblyAsmPrinter Implementation. +//===----------------------------------------------------------------------===// + +void WebAssemblyAsmPrinter::EmitConstantPool() { + assert(MF->getConstantPool()->getConstants().empty() && + "WebAssembly disables constant pools"); +} + +void WebAssemblyAsmPrinter::EmitJumpTableInfo() { + // Nothing to do; jump tables are incorporated into the instruction stream. +} + +static void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM, + Type *Ty, SmallVectorImpl &ValueVTs) { + const DataLayout &DL(F.getParent()->getDataLayout()); + const WebAssemblyTargetLowering &TLI = + *TM.getSubtarget(F).getTargetLowering(); + SmallVector VTs; + ComputeValueVTs(TLI, DL, Ty, VTs); + + for (EVT VT : VTs) { + unsigned NumRegs = TLI.getNumRegisters(F.getContext(), VT); + MVT RegisterVT = TLI.getRegisterType(F.getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) + ValueVTs.push_back(RegisterVT); + } +} + +void WebAssemblyAsmPrinter::EmitFunctionBodyStart() { + if (!MFI->getParams().empty()) { + MCInst Param; + Param.setOpcode(WebAssembly::PARAM); + for (MVT VT : MFI->getParams()) + Param.addOperand(MCOperand::createImm(VT.SimpleTy)); + EmitToStreamer(*OutStreamer, Param); + } + + SmallVector ResultVTs; + const Function &F(*MF->getFunction()); + ComputeLegalValueVTs(F, TM, F.getReturnType(), ResultVTs); + // If the return type needs to be legalized it will get converted into + // passing a pointer. + if (ResultVTs.size() == 1) { + MCInst Result; + Result.setOpcode(WebAssembly::RESULT); + Result.addOperand(MCOperand::createImm(ResultVTs.front().SimpleTy)); + EmitToStreamer(*OutStreamer, Result); + } + + bool AnyWARegs = false; + MCInst Local; + Local.setOpcode(WebAssembly::LOCAL); + for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) { + unsigned VReg = TargetRegisterInfo::index2VirtReg(Idx); + unsigned WAReg = MFI->getWAReg(VReg); + // Don't declare unused registers. + if (WAReg == WebAssemblyFunctionInfo::UnusedReg) + continue; + // Don't redeclare parameters. + if (WAReg < MFI->getParams().size()) + continue; + // Don't declare stackified registers. + if (int(WAReg) < 0) + continue; + Local.addOperand(MCOperand::createImm(getRegType(VReg).SimpleTy)); + AnyWARegs = true; + } + auto &PhysRegs = MFI->getPhysRegs(); + for (unsigned PReg = 0; PReg < PhysRegs.size(); ++PReg) { + if (PhysRegs[PReg] == -1U) + continue; + Local.addOperand(MCOperand::createImm(getRegType(PReg).SimpleTy)); + AnyWARegs = true; + } + if (AnyWARegs) + EmitToStreamer(*OutStreamer, Local); + + AsmPrinter::EmitFunctionBodyStart(); +} + +void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { + DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n'); + + switch (MI->getOpcode()) { + case WebAssembly::ARGUMENT_I32: + case WebAssembly::ARGUMENT_I64: + case WebAssembly::ARGUMENT_F32: + case WebAssembly::ARGUMENT_F64: + // These represent values which are live into the function entry, so there's + // no instruction to emit. + break; + case WebAssembly::LOOP_END: + // This is a no-op which just exists to tell AsmPrinter.cpp that there's a + // fallthrough which nevertheless requires a label for the destination here. + break; + default: { + WebAssemblyMCInstLower MCInstLowering(OutContext, *this); + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + EmitToStreamer(*OutStreamer, TmpInst); + break; + } + } +} + +bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + if (AsmVariant != 0) + report_fatal_error("There are no defined alternate asm variants"); + + // First try the generic code, which knows about modifiers like 'c' and 'n'. + if (!AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS)) + return false; + + if (!ExtraCode) { + const MachineOperand &MO = MI->getOperand(OpNo); + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + OS << MO.getImm(); + return false; + case MachineOperand::MO_Register: + OS << regToString(MO); + return false; + case MachineOperand::MO_GlobalAddress: + getSymbol(MO.getGlobal())->print(OS, MAI); + printOffset(MO.getOffset(), OS); + return false; + case MachineOperand::MO_ExternalSymbol: + GetExternalSymbolSymbol(MO.getSymbolName())->print(OS, MAI); + printOffset(MO.getOffset(), OS); + return false; + case MachineOperand::MO_MachineBasicBlock: + MO.getMBB()->getSymbol()->print(OS, MAI); + return false; + default: + break; + } + } + + return true; +} + +bool WebAssemblyAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + if (AsmVariant != 0) + report_fatal_error("There are no defined alternate asm variants"); + + if (!ExtraCode) { + // TODO: For now, we just hard-code 0 as the constant offset; teach + // SelectInlineAsmMemoryOperand how to do address mode matching. + OS << "0(" + regToString(MI->getOperand(OpNo)) + ')'; + return false; + } + + return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, AsmVariant, ExtraCode, OS); +} + +// Force static initialization. +extern "C" void LLVMInitializeWebAssemblyAsmPrinter() { + RegisterAsmPrinter X(TheWebAssemblyTarget32); + RegisterAsmPrinter Y(TheWebAssemblyTarget64); +} diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp new file mode 100644 index 000000000000..e9671ee07e69 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -0,0 +1,468 @@ +//===-- WebAssemblyCFGStackify.cpp - CFG Stackification -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements a CFG stacking pass. +/// +/// This pass reorders the blocks in a function to put them into a reverse +/// post-order [0], with special care to keep the order as similar as possible +/// to the original order, and to keep loops contiguous even in the case of +/// split backedges. +/// +/// Then, it inserts BLOCK and LOOP markers to mark the start of scopes, since +/// scope boundaries serve as the labels for WebAssembly's control transfers. +/// +/// This is sufficient to convert arbitrary CFGs into a form that works on +/// WebAssembly, provided that all loops are single-entry. +/// +/// [0] https://en.wikipedia.org/wiki/Depth-first_search#Vertex_orderings +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblySubtarget.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-cfg-stackify" + +namespace { +class WebAssemblyCFGStackify final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly CFG Stackify"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyCFGStackify() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyCFGStackify::ID = 0; +FunctionPass *llvm::createWebAssemblyCFGStackify() { + return new WebAssemblyCFGStackify(); +} + +static void EliminateMultipleEntryLoops(MachineFunction &MF, + const MachineLoopInfo &MLI) { + SmallPtrSet InSet; + for (scc_iterator I = scc_begin(&MF), E = scc_end(&MF); + I != E; ++I) { + const std::vector &CurrentSCC = *I; + + // Skip trivial SCCs. + if (CurrentSCC.size() == 1) + continue; + + InSet.insert(CurrentSCC.begin(), CurrentSCC.end()); + MachineBasicBlock *Header = nullptr; + for (MachineBasicBlock *MBB : CurrentSCC) { + for (MachineBasicBlock *Pred : MBB->predecessors()) { + if (InSet.count(Pred)) + continue; + if (!Header) { + Header = MBB; + break; + } + // TODO: Implement multiple-entry loops. + report_fatal_error("multiple-entry loops are not supported yet"); + } + } + assert(MLI.isLoopHeader(Header)); + + InSet.clear(); + } +} + +namespace { +/// Post-order traversal stack entry. +struct POStackEntry { + MachineBasicBlock *MBB; + SmallVector Succs; + + POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF, + const MachineLoopInfo &MLI); +}; +} // end anonymous namespace + +static bool LoopContains(const MachineLoop *Loop, + const MachineBasicBlock *MBB) { + return Loop ? Loop->contains(MBB) : true; +} + +POStackEntry::POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF, + const MachineLoopInfo &MLI) + : MBB(MBB), Succs(MBB->successors()) { + // RPO is not a unique form, since at every basic block with multiple + // successors, the DFS has to pick which order to visit the successors in. + // Sort them strategically (see below). + MachineLoop *Loop = MLI.getLoopFor(MBB); + MachineFunction::iterator Next = next(MachineFunction::iterator(MBB)); + MachineBasicBlock *LayoutSucc = Next == MF.end() ? nullptr : &*Next; + std::stable_sort( + Succs.begin(), Succs.end(), + [=, &MLI](const MachineBasicBlock *A, const MachineBasicBlock *B) { + if (A == B) + return false; + + // Keep loops contiguous by preferring the block that's in the same + // loop. + bool LoopContainsA = LoopContains(Loop, A); + bool LoopContainsB = LoopContains(Loop, B); + if (LoopContainsA && !LoopContainsB) + return true; + if (!LoopContainsA && LoopContainsB) + return false; + + // Minimize perturbation by preferring the block which is the immediate + // layout successor. + if (A == LayoutSucc) + return true; + if (B == LayoutSucc) + return false; + + // TODO: More sophisticated orderings may be profitable here. + + return false; + }); +} + +/// Return the "bottom" block of a loop. This differs from +/// MachineLoop::getBottomBlock in that it works even if the loop is +/// discontiguous. +static MachineBasicBlock *LoopBottom(const MachineLoop *Loop) { + MachineBasicBlock *Bottom = Loop->getHeader(); + for (MachineBasicBlock *MBB : Loop->blocks()) + if (MBB->getNumber() > Bottom->getNumber()) + Bottom = MBB; + return Bottom; +} + +/// Sort the blocks in RPO, taking special care to make sure that loops are +/// contiguous even in the case of split backedges. +/// +/// TODO: Determine whether RPO is actually worthwhile, or whether we should +/// move to just a stable-topological-sort-based approach that would preserve +/// more of the original order. +static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) { + // Note that we do our own RPO rather than using + // "llvm/ADT/PostOrderIterator.h" because we want control over the order that + // successors are visited in (see above). Also, we can sort the blocks in the + // MachineFunction as we go. + SmallPtrSet Visited; + SmallVector Stack; + + MachineBasicBlock *EntryBlock = &*MF.begin(); + Visited.insert(EntryBlock); + Stack.push_back(POStackEntry(EntryBlock, MF, MLI)); + + for (;;) { + POStackEntry &Entry = Stack.back(); + SmallVectorImpl &Succs = Entry.Succs; + if (!Succs.empty()) { + MachineBasicBlock *Succ = Succs.pop_back_val(); + if (Visited.insert(Succ).second) + Stack.push_back(POStackEntry(Succ, MF, MLI)); + continue; + } + + // Put the block in its position in the MachineFunction. + MachineBasicBlock &MBB = *Entry.MBB; + MBB.moveBefore(&*MF.begin()); + + // Branch instructions may utilize a fallthrough, so update them if a + // fallthrough has been added or removed. + if (!MBB.empty() && MBB.back().isTerminator() && !MBB.back().isBranch() && + !MBB.back().isBarrier()) + report_fatal_error( + "Non-branch terminator with fallthrough cannot yet be rewritten"); + if (MBB.empty() || !MBB.back().isTerminator() || MBB.back().isBranch()) + MBB.updateTerminator(); + + Stack.pop_back(); + if (Stack.empty()) + break; + } + + // Now that we've sorted the blocks in RPO, renumber them. + MF.RenumberBlocks(); + +#ifndef NDEBUG + SmallSetVector OnStack; + + // Insert a sentinel representing the degenerate loop that starts at the + // function entry block and includes the entire function as a "loop" that + // executes once. + OnStack.insert(nullptr); + + for (auto &MBB : MF) { + assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative."); + + MachineLoop *Loop = MLI.getLoopFor(&MBB); + if (Loop && &MBB == Loop->getHeader()) { + // Loop header. The loop predecessor should be sorted above, and the other + // predecessors should be backedges below. + for (auto Pred : MBB.predecessors()) + assert( + (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) && + "Loop header predecessors must be loop predecessors or backedges"); + assert(OnStack.insert(Loop) && "Loops should be declared at most once."); + } else { + // Not a loop header. All predecessors should be sorted above. + for (auto Pred : MBB.predecessors()) + assert(Pred->getNumber() < MBB.getNumber() && + "Non-loop-header predecessors should be topologically sorted"); + assert(OnStack.count(MLI.getLoopFor(&MBB)) && + "Blocks must be nested in their loops"); + } + while (OnStack.size() > 1 && &MBB == LoopBottom(OnStack.back())) + OnStack.pop_back(); + } + assert(OnStack.pop_back_val() == nullptr && + "The function entry block shouldn't actually be a loop header"); + assert(OnStack.empty() && + "Control flow stack pushes and pops should be balanced."); +#endif +} + +/// Test whether Pred has any terminators explicitly branching to MBB, as +/// opposed to falling through. Note that it's possible (eg. in unoptimized +/// code) for a branch instruction to both branch to a block and fallthrough +/// to it, so we check the actual branch operands to see if there are any +/// explicit mentions. +static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred, MachineBasicBlock *MBB) { + for (MachineInstr &MI : Pred->terminators()) + for (MachineOperand &MO : MI.explicit_operands()) + if (MO.isMBB() && MO.getMBB() == MBB) + return true; + return false; +} + +/// Insert a BLOCK marker for branches to MBB (if needed). +static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF, + SmallVectorImpl &ScopeTops, + const WebAssemblyInstrInfo &TII, + const MachineLoopInfo &MLI, + MachineDominatorTree &MDT) { + // First compute the nearest common dominator of all forward non-fallthrough + // predecessors so that we minimize the time that the BLOCK is on the stack, + // which reduces overall stack height. + MachineBasicBlock *Header = nullptr; + bool IsBranchedTo = false; + int MBBNumber = MBB.getNumber(); + for (MachineBasicBlock *Pred : MBB.predecessors()) + if (Pred->getNumber() < MBBNumber) { + Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred; + if (ExplicitlyBranchesTo(Pred, &MBB)) + IsBranchedTo = true; + } + if (!Header) + return; + if (!IsBranchedTo) + return; + + assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors"); + MachineBasicBlock *LayoutPred = &*prev(MachineFunction::iterator(&MBB)); + + // If the nearest common dominator is inside a more deeply nested context, + // walk out to the nearest scope which isn't more deeply nested. + for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) { + if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) { + if (ScopeTop->getNumber() > Header->getNumber()) { + // Skip over an intervening scope. + I = next(MachineFunction::iterator(ScopeTop)); + } else { + // We found a scope level at an appropriate depth. + Header = ScopeTop; + break; + } + } + } + + // If there's a loop which ends just before MBB which contains Header, we can + // reuse its label instead of inserting a new BLOCK. + for (MachineLoop *Loop = MLI.getLoopFor(LayoutPred); + Loop && Loop->contains(LayoutPred); Loop = Loop->getParentLoop()) + if (Loop && LoopBottom(Loop) == LayoutPred && Loop->contains(Header)) + return; + + // Decide where in Header to put the BLOCK. + MachineBasicBlock::iterator InsertPos; + MachineLoop *HeaderLoop = MLI.getLoopFor(Header); + if (HeaderLoop && MBB.getNumber() > LoopBottom(HeaderLoop)->getNumber()) { + // Header is the header of a loop that does not lexically contain MBB, so + // the BLOCK needs to be above the LOOP. + InsertPos = Header->begin(); + } else { + // Otherwise, insert the BLOCK as late in Header as we can, but before the + // beginning of the local expression tree and any nested BLOCKs. + InsertPos = Header->getFirstTerminator(); + while (InsertPos != Header->begin() && + prev(InsertPos)->definesRegister(WebAssembly::EXPR_STACK) && + prev(InsertPos)->getOpcode() != WebAssembly::LOOP) + --InsertPos; + } + + // Add the BLOCK. + BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK)) + .addMBB(&MBB); + + // Track the farthest-spanning scope that ends at this point. + int Number = MBB.getNumber(); + if (!ScopeTops[Number] || + ScopeTops[Number]->getNumber() > Header->getNumber()) + ScopeTops[Number] = Header; +} + +/// Insert a LOOP marker for a loop starting at MBB (if it's a loop header). +static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF, + SmallVectorImpl &ScopeTops, + const WebAssemblyInstrInfo &TII, + const MachineLoopInfo &MLI) { + MachineLoop *Loop = MLI.getLoopFor(&MBB); + if (!Loop || Loop->getHeader() != &MBB) + return; + + // The operand of a LOOP is the first block after the loop. If the loop is the + // bottom of the function, insert a dummy block at the end. + MachineBasicBlock *Bottom = LoopBottom(Loop); + auto Iter = next(MachineFunction::iterator(Bottom)); + if (Iter == MF.end()) { + MachineBasicBlock *Label = MF.CreateMachineBasicBlock(); + // Give it a fake predecessor so that AsmPrinter prints its label. + Label->addSuccessor(Label); + MF.push_back(Label); + Iter = next(MachineFunction::iterator(Bottom)); + } + MachineBasicBlock *AfterLoop = &*Iter; + BuildMI(MBB, MBB.begin(), DebugLoc(), TII.get(WebAssembly::LOOP)) + .addMBB(AfterLoop); + + // Emit a special no-op telling the asm printer that we need a label to close + // the loop scope, even though the destination is only reachable by + // fallthrough. + if (!Bottom->back().isBarrier()) + BuildMI(*Bottom, Bottom->end(), DebugLoc(), TII.get(WebAssembly::LOOP_END)); + + assert((!ScopeTops[AfterLoop->getNumber()] || + ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) && + "With RPO we should visit the outer-most loop for a block first."); + if (!ScopeTops[AfterLoop->getNumber()]) + ScopeTops[AfterLoop->getNumber()] = &MBB; +} + +/// Insert LOOP and BLOCK markers at appropriate places. +static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI, + const WebAssemblyInstrInfo &TII, + MachineDominatorTree &MDT) { + // For each block whose label represents the end of a scope, record the block + // which holds the beginning of the scope. This will allow us to quickly skip + // over scoped regions when walking blocks. We allocate one more than the + // number of blocks in the function to accommodate for the possible fake block + // we may insert at the end. + SmallVector ScopeTops(MF.getNumBlockIDs() + 1); + + for (auto &MBB : MF) { + // Place the LOOP for MBB if MBB is the header of a loop. + PlaceLoopMarker(MBB, MF, ScopeTops, TII, MLI); + + // Place the BLOCK for MBB if MBB is branched to from above. + PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT); + } +} + +#ifndef NDEBUG +static bool +IsOnStack(const SmallVectorImpl> &Stack, + const MachineBasicBlock *MBB) { + for (const auto &Pair : Stack) + if (Pair.first == MBB) + return true; + return false; +} +#endif + +bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** CFG Stackifying **********\n" + "********** Function: " + << MF.getName() << '\n'); + + const auto &MLI = getAnalysis(); + auto &MDT = getAnalysis(); + const auto &TII = *MF.getSubtarget().getInstrInfo(); + + // RPO sorting needs all loops to be single-entry. + EliminateMultipleEntryLoops(MF, MLI); + + // Sort the blocks in RPO, with contiguous loops. + SortBlocks(MF, MLI); + + // Place the BLOCK and LOOP markers to indicate the beginnings of scopes. + PlaceMarkers(MF, MLI, TII, MDT); + +#ifndef NDEBUG + // Verify that block and loop beginnings and endings are in LIFO order, and + // that all references to blocks are to blocks on the stack at the point of + // the reference. + SmallVector, 0> Stack; + for (auto &MBB : MF) { + while (!Stack.empty() && Stack.back().first == &MBB) + if (Stack.back().second) { + assert(Stack.size() >= 2); + Stack.pop_back(); + Stack.pop_back(); + } else { + assert(Stack.size() >= 1); + Stack.pop_back(); + } + for (auto &MI : MBB) + switch (MI.getOpcode()) { + case WebAssembly::LOOP: + Stack.push_back(std::make_pair(&MBB, false)); + Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), true)); + break; + case WebAssembly::BLOCK: + Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), false)); + break; + default: + // Verify that all referenced blocks are in scope. A reference to a + // block with a negative number is invalid, but can happen with inline + // asm, so we shouldn't assert on it, but instead let CodeGen properly + // fail on it. + for (const MachineOperand &MO : MI.explicit_operands()) + if (MO.isMBB() && MO.getMBB()->getNumber() >= 0) + assert(IsOnStack(Stack, MO.getMBB())); + break; + } + } + assert(Stack.empty()); +#endif + + return true; +} diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp new file mode 100644 index 000000000000..1b761b1a9d73 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -0,0 +1,81 @@ +//===-- WebAssemblyFastISel.cpp - WebAssembly FastISel implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file defines the WebAssembly-specific support for the FastISel +/// class. Some of the target-specific code is generated by tablegen in the file +/// WebAssemblyGenFastISel.inc, which is #included here. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblySubtarget.h" +#include "WebAssemblyTargetMachine.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-fastisel" + +namespace { + +class WebAssemblyFastISel final : public FastISel { + /// Keep a pointer to the WebAssemblySubtarget around so that we can make the + /// right decision when generating code for different targets. + const WebAssemblySubtarget *Subtarget; + LLVMContext *Context; + + // Call handling routines. +private: +public: + // Backend specific FastISel code. + WebAssemblyFastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo) + : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { + Subtarget = &FuncInfo.MF->getSubtarget(); + Context = &FuncInfo.Fn->getContext(); + } + + bool fastSelectInstruction(const Instruction *I) override; + +#include "WebAssemblyGenFastISel.inc" +}; + +} // end anonymous namespace + +bool WebAssemblyFastISel::fastSelectInstruction(const Instruction *I) { + switch (I->getOpcode()) { + default: + break; + // TODO: add fast-isel selection cases here... + } + + // Fall back to target-independent instruction selection. + return selectOperator(I, I->getOpcode()); +} + +FastISel *WebAssembly::createFastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo) { + return new WebAssemblyFastISel(FuncInfo, LibInfo); +} diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index e4ca82e963c2..0eefd57f1f2c 100644 --- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -35,11 +35,20 @@ using namespace llvm; #define DEBUG_TYPE "wasm-frame-info" // TODO: Implement a red zone? +// TODO: wasm64 +// TODO: Prolog/epilog should be stackified too. This pass runs after register +// stackification, so we'll have to do it manually. +// TODO: Emit TargetOpcode::CFI_INSTRUCTION instructions /// Return true if the specified function should have a dedicated frame pointer /// register. bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const { - llvm_unreachable("TODO: implement hasFP"); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const auto *RegInfo = + MF.getSubtarget().getRegisterInfo(); + return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || + MFI->hasStackMap() || MFI->hasPatchPoint() || + RegInfo->needsStackRealignment(MF); } /// Under normal circumstances, when a frame pointer is not required, we reserve @@ -52,23 +61,115 @@ bool WebAssemblyFrameLowering::hasReservedCallFrame( return !MF.getFrameInfo()->hasVarSizedObjects(); } + +/// Adjust the stack pointer by a constant amount. +static void adjustStackPointer(unsigned StackSize, + bool AdjustUp, + MachineFunction& MF, + MachineBasicBlock& MBB, + const TargetInstrInfo* TII, + MachineBasicBlock::iterator InsertPt, + const DebugLoc& DL) { + auto &MRI = MF.getRegInfo(); + unsigned SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer"); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPReg) + .addExternalSymbol(SPSymbol); + // This MachinePointerInfo should reference __stack_pointer as well but + // doesn't because MachinePointerInfo() takes a GV which we don't have for + // __stack_pointer. TODO: check if PseudoSourceValue::ExternalSymbolCallEntry + // is appropriate instead. (likewise for EmitEpologue below) + auto *LoadMMO = new MachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOLoad, 4, 4); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg) + .addImm(0) + .addReg(SPReg) + .addMemOperand(LoadMMO); + // Add/Subtract the frame size + unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addImm(StackSize); + BuildMI(MBB, InsertPt, DL, + TII->get(AdjustUp ? WebAssembly::ADD_I32 : WebAssembly::SUB_I32), + WebAssembly::SP32) + .addReg(SPReg) + .addReg(OffsetReg); + // The SP32 register now has the new stacktop. Also write it back to memory. + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addExternalSymbol(SPSymbol); + auto *MMO = new MachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, 4, 4); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32) + .addImm(0) + .addReg(OffsetReg) + .addReg(WebAssembly::SP32) + .addMemOperand(MMO); +} + void WebAssemblyFrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - llvm_unreachable("TODO: implement eliminateCallFramePseudoInstr"); + const auto *TII = + static_cast(MF.getSubtarget().getInstrInfo()); + DebugLoc DL = I->getDebugLoc(); + unsigned Opc = I->getOpcode(); + bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); + unsigned Amount = I->getOperand(0).getImm(); + if (Amount) + adjustStackPointer(Amount, IsDestroy, MF, MBB, + TII, I, DL); + MBB.erase(I); } void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - llvm_unreachable("TODO: implement emitPrologue"); + // TODO: Do ".setMIFlag(MachineInstr::FrameSetup)" on emitted instructions + auto *MFI = MF.getFrameInfo(); + assert(MFI->getCalleeSavedInfo().empty() && + "WebAssembly should not have callee-saved registers"); + assert(!hasFP(MF) && "Functions needing frame pointers not yet supported"); + uint64_t StackSize = MFI->getStackSize(); + if (!StackSize && (!MFI->adjustsStack() || MFI->getMaxCallFrameSize() == 0)) + return; + + const auto *TII = MF.getSubtarget().getInstrInfo(); + + auto InsertPt = MBB.begin(); + DebugLoc DL; + + adjustStackPointer(StackSize, false, MF, MBB, TII, InsertPt, DL); } void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - llvm_unreachable("TODO: implement emitEpilogue"); -} + uint64_t StackSize = MF.getFrameInfo()->getStackSize(); + if (!StackSize) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + auto &MRI = MF.getRegInfo(); + unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + auto InsertPt = MBB.getFirstTerminator(); + DebugLoc DL; -void WebAssemblyFrameLowering::processFunctionBeforeCalleeSavedScan( - MachineFunction &MF, RegScavenger *RS) const { - llvm_unreachable("TODO: implement processFunctionBeforeCalleeSavedScan"); + if (InsertPt != MBB.end()) { + DL = InsertPt->getDebugLoc(); + } + + // Restore the stack pointer. Without FP its value is just SP32 - stacksize + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addImm(StackSize); + auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer"); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), WebAssembly::SP32) + .addReg(WebAssembly::SP32) + .addReg(OffsetReg); + // Re-use OffsetReg to hold the address of the stacktop + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addExternalSymbol(SPSymbol); + auto *MMO = new MachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, 4, 4); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32) + .addImm(0) + .addReg(OffsetReg) + .addReg(WebAssembly::SP32) + .addMemOperand(MMO); } diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h index 0b112d02c0bf..5f4708fe77ed 100644 --- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h +++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -38,9 +38,6 @@ public: bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const override; }; } // end namespace llvm diff --git a/lib/Target/WebAssembly/WebAssemblyISD.def b/lib/Target/WebAssembly/WebAssemblyISD.def new file mode 100644 index 000000000000..3a03fa55b220 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyISD.def @@ -0,0 +1,25 @@ +//- WebAssemblyISD.def - WebAssembly ISD ---------------------------*- C++ -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file describes the various WebAssembly ISD node types. +/// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +HANDLE_NODETYPE(CALL1) +HANDLE_NODETYPE(CALL0) +HANDLE_NODETYPE(RETURN) +HANDLE_NODETYPE(ARGUMENT) +HANDLE_NODETYPE(Wrapper) +HANDLE_NODETYPE(BR_IF) +HANDLE_NODETYPE(TABLESWITCH) + +// add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here... diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index 518ef332a6c7..8390f797c43e 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -56,13 +56,68 @@ public: SDNode *Select(SDNode *Node) override; + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + std::vector &OutOps) override; + +// Include the pieces autogenerated from the target description. +#include "WebAssemblyGenDAGISel.inc" + private: // add select functions here... }; } // end anonymous namespace SDNode *WebAssemblyDAGToDAGISel::Select(SDNode *Node) { - llvm_unreachable("TODO: implement Select"); + // Dump information about the Node being selected. + DEBUG(errs() << "Selecting: "); + DEBUG(Node->dump(CurDAG)); + DEBUG(errs() << "\n"); + + // If we have a custom node, we already have selected! + if (Node->isMachineOpcode()) { + DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + Node->setNodeId(-1); + return nullptr; + } + + // Few custom selection stuff. + SDNode *ResNode = nullptr; + EVT VT = Node->getValueType(0); + + switch (Node->getOpcode()) { + default: + break; + // If we need WebAssembly-specific selection, it would go here. + (void)VT; + } + + // Select the default instruction. + ResNode = SelectCode(Node); + + DEBUG(errs() << "=> "); + if (ResNode == nullptr || ResNode == Node) + DEBUG(Node->dump(CurDAG)); + else + DEBUG(ResNode->dump(CurDAG)); + DEBUG(errs() << "\n"); + + return ResNode; +} + +bool WebAssemblyDAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) { + switch (ConstraintID) { + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_m: + // We just support simple memory operands that just have a single address + // operand and need no special handling. + OutOps.push_back(Op); + return false; + default: + break; + } + + return true; } /// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 4184eb6dc5a6..7a89f788c1ad 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -17,10 +17,13 @@ #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyTargetMachine.h" -#include "WebAssemblyTargetObjectFile.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" @@ -32,14 +35,254 @@ using namespace llvm; #define DEBUG_TYPE "wasm-lower" +namespace { +// Diagnostic information for unimplemented or unsupported feature reporting. +// TODO: This code is copied from BPF and AMDGPU; consider factoring it out +// and sharing code. +class DiagnosticInfoUnsupported final : public DiagnosticInfo { +private: + // Debug location where this diagnostic is triggered. + DebugLoc DLoc; + const Twine &Description; + const Function &Fn; + SDValue Value; + + static int KindID; + + static int getKindID() { + if (KindID == 0) + KindID = llvm::getNextAvailablePluginDiagnosticKind(); + return KindID; + } + +public: + DiagnosticInfoUnsupported(SDLoc DLoc, const Function &Fn, const Twine &Desc, + SDValue Value) + : DiagnosticInfo(getKindID(), DS_Error), DLoc(DLoc.getDebugLoc()), + Description(Desc), Fn(Fn), Value(Value) {} + + void print(DiagnosticPrinter &DP) const override { + std::string Str; + raw_string_ostream OS(Str); + + if (DLoc) { + auto DIL = DLoc.get(); + StringRef Filename = DIL->getFilename(); + unsigned Line = DIL->getLine(); + unsigned Column = DIL->getColumn(); + OS << Filename << ':' << Line << ':' << Column << ' '; + } + + OS << "in function " << Fn.getName() << ' ' << *Fn.getFunctionType() << '\n' + << Description; + if (Value) + Value->print(OS); + OS << '\n'; + OS.flush(); + DP << Str; + } + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == getKindID(); + } +}; + +int DiagnosticInfoUnsupported::KindID = 0; +} // end anonymous namespace + WebAssemblyTargetLowering::WebAssemblyTargetLowering( const TargetMachine &TM, const WebAssemblySubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { + auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32; + + // Booleans always contain 0 or 1. + setBooleanContents(ZeroOrOneBooleanContent); // WebAssembly does not produce floating-point exceptions on normal floating // point operations. setHasFloatingPointExceptions(false); // We don't know the microarchitecture here, so just reduce register pressure. setSchedulingPreference(Sched::RegPressure); + // Tell ISel that we have a stack pointer. + setStackPointerRegisterToSaveRestore( + Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32); + // Set up the register classes. + addRegisterClass(MVT::i32, &WebAssembly::I32RegClass); + addRegisterClass(MVT::i64, &WebAssembly::I64RegClass); + addRegisterClass(MVT::f32, &WebAssembly::F32RegClass); + addRegisterClass(MVT::f64, &WebAssembly::F64RegClass); + // Compute derived properties from the register classes. + computeRegisterProperties(Subtarget->getRegisterInfo()); + + setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); + setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom); + setOperationAction(ISD::JumpTable, MVTPtr, Custom); + + // Take the default expansion for va_arg, va_copy, and va_end. There is no + // default action for va_start, so we do that custom. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + + for (auto T : {MVT::f32, MVT::f64}) { + // Don't expand the floating-point types to constant pools. + setOperationAction(ISD::ConstantFP, T, Legal); + // Expand floating-point comparisons. + for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE, + ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) + setCondCodeAction(CC, T, Expand); + // Expand floating-point library function operators. + for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOWI, ISD::FPOW, + ISD::FREM, ISD::FMA}) + setOperationAction(Op, T, Expand); + // Note supported floating-point library function operators that otherwise + // default to expand. + for (auto Op : + {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT}) + setOperationAction(Op, T, Legal); + // Support minnan and maxnan, which otherwise default to expand. + setOperationAction(ISD::FMINNAN, T, Legal); + setOperationAction(ISD::FMAXNAN, T, Legal); + } + + for (auto T : {MVT::i32, MVT::i64}) { + // Expand unavailable integer operations. + for (auto Op : + {ISD::BSWAP, ISD::ROTL, ISD::ROTR, ISD::SMUL_LOHI, ISD::UMUL_LOHI, + ISD::MULHS, ISD::MULHU, ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, + ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, + ISD::SUBE}) { + setOperationAction(Op, T, Expand); + } + } + + // As a special case, these operators use the type to mean the type to + // sign-extend from. + for (auto T : {MVT::i1, MVT::i8, MVT::i16, MVT::i32}) + setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand); + + // Dynamic stack allocation: use the default expansion. + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand); + + setOperationAction(ISD::FrameIndex, MVT::i32, Custom); + + // Expand these forms; we pattern-match the forms that we can handle in isel. + for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) + for (auto Op : {ISD::BR_CC, ISD::SELECT_CC}) + setOperationAction(Op, T, Expand); + + // We have custom switch handling. + setOperationAction(ISD::BR_JT, MVT::Other, Custom); + + // WebAssembly doesn't have: + // - Floating-point extending loads. + // - Floating-point truncating stores. + // - i1 extending loads. + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + for (auto T : MVT::integer_valuetypes()) + for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) + setLoadExtAction(Ext, T, MVT::i1, Promote); + + // Trap lowers to wasm unreachable + setOperationAction(ISD::TRAP, MVT::Other, Legal); +} + +FastISel *WebAssemblyTargetLowering::createFastISel( + FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { + return WebAssembly::createFastISel(FuncInfo, LibInfo); +} + +bool WebAssemblyTargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode * /*GA*/) const { + // All offsets can be folded. + return true; +} + +MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/, + EVT VT) const { + unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1); + if (BitWidth > 1 && BitWidth < 8) + BitWidth = 8; + + if (BitWidth > 64) { + BitWidth = 64; + assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) && + "64-bit shift counts ought to be enough for anyone"); + } + + MVT Result = MVT::getIntegerVT(BitWidth); + assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE && + "Unable to represent scalar shift amount type"); + return Result; +} + +const char * +WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (static_cast(Opcode)) { + case WebAssemblyISD::FIRST_NUMBER: + break; +#define HANDLE_NODETYPE(NODE) \ + case WebAssemblyISD::NODE: \ + return "WebAssemblyISD::" #NODE; +#include "WebAssemblyISD.def" +#undef HANDLE_NODETYPE + } + return nullptr; +} + +std::pair +WebAssemblyTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { + // First, see if this is a constraint that directly corresponds to a + // WebAssembly register class. + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + assert(VT != MVT::iPTR && "Pointer MVT not expected here"); + if (VT.isInteger() && !VT.isVector()) { + if (VT.getSizeInBits() <= 32) + return std::make_pair(0U, &WebAssembly::I32RegClass); + if (VT.getSizeInBits() <= 64) + return std::make_pair(0U, &WebAssembly::I64RegClass); + } + break; + default: + break; + } + } + + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const { + // Assume ctz is a relatively cheap operation. + return true; +} + +bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const { + // Assume clz is a relatively cheap operation. + return true; +} + +bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, + Type *Ty, + unsigned AS) const { + // WebAssembly offsets are added as unsigned without wrapping. The + // isLegalAddressingMode gives us no way to determine if wrapping could be + // happening, so we approximate this by accepting only non-negative offsets. + if (AM.BaseOffs < 0) + return false; + + // WebAssembly has no scale register operands. + if (AM.Scale != 0) + return false; + + // Everything else is legal. + return true; } //===----------------------------------------------------------------------===// @@ -50,16 +293,359 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // Lowering Code //===----------------------------------------------------------------------===// +static void fail(SDLoc DL, SelectionDAG &DAG, const char *msg) { + MachineFunction &MF = DAG.getMachineFunction(); + DAG.getContext()->diagnose( + DiagnosticInfoUnsupported(DL, *MF.getFunction(), msg, SDValue())); +} + +// Test whether the given calling convention is supported. +static bool CallingConvSupported(CallingConv::ID CallConv) { + // We currently support the language-independent target-independent + // conventions. We don't yet have a way to annotate calls with properties like + // "cold", and we don't have any call-clobbered registers, so these are mostly + // all handled the same. + return CallConv == CallingConv::C || CallConv == CallingConv::Fast || + CallConv == CallingConv::Cold || + CallConv == CallingConv::PreserveMost || + CallConv == CallingConv::PreserveAll || + CallConv == CallingConv::CXX_FAST_TLS; +} + +SDValue +WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc DL = CLI.DL; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + MachineFunction &MF = DAG.getMachineFunction(); + + CallingConv::ID CallConv = CLI.CallConv; + if (!CallingConvSupported(CallConv)) + fail(DL, DAG, + "WebAssembly doesn't support language-specific or target-specific " + "calling conventions yet"); + if (CLI.IsPatchPoint) + fail(DL, DAG, "WebAssembly doesn't support patch point yet"); + + // WebAssembly doesn't currently support explicit tail calls. If they are + // required, fail. Otherwise, just disable them. + if ((CallConv == CallingConv::Fast && CLI.IsTailCall && + MF.getTarget().Options.GuaranteedTailCallOpt) || + (CLI.CS && CLI.CS->isMustTailCall())) + fail(DL, DAG, "WebAssembly doesn't support tail call yet"); + CLI.IsTailCall = false; + + SmallVectorImpl &OutVals = CLI.OutVals; + + SmallVectorImpl &Ins = CLI.Ins; + if (Ins.size() > 1) + fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet"); + + SmallVectorImpl &Outs = CLI.Outs; + for (const ISD::OutputArg &Out : Outs) { + if (Out.Flags.isByVal()) + fail(DL, DAG, "WebAssembly hasn't implemented byval arguments"); + if (Out.Flags.isNest()) + fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); + if (Out.Flags.isInAlloca()) + fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); + if (Out.Flags.isInConsecutiveRegs()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); + if (Out.Flags.isInConsecutiveRegsLast()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); + } + + bool IsVarArg = CLI.IsVarArg; + unsigned NumFixedArgs = CLI.NumFixedArgs; + auto PtrVT = getPointerTy(MF.getDataLayout()); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + + if (IsVarArg) { + // Outgoing non-fixed arguments are placed at the top of the stack. First + // compute their offsets and the total amount of argument stack space + // needed. + for (SDValue Arg : + make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { + EVT VT = Arg.getValueType(); + assert(VT != MVT::iPTR && "Legalized args should be concrete"); + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + unsigned Offset = + CCInfo.AllocateStack(MF.getDataLayout().getTypeAllocSize(Ty), + MF.getDataLayout().getABITypeAlignment(Ty)); + CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), + Offset, VT.getSimpleVT(), + CCValAssign::Full)); + } + } + + unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); + + SDValue NB; + if (NumBytes) { + NB = DAG.getConstant(NumBytes, DL, PtrVT, true); + Chain = DAG.getCALLSEQ_START(Chain, NB, DL); + } + + if (IsVarArg) { + // For non-fixed arguments, next emit stores to store the argument values + // to the stack at the offsets computed above. + SDValue SP = DAG.getCopyFromReg( + Chain, DL, getStackPointerRegisterToSaveRestore(), PtrVT); + unsigned ValNo = 0; + SmallVector Chains; + for (SDValue Arg : + make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { + assert(ArgLocs[ValNo].getValNo() == ValNo && + "ArgLocs should remain in order and only hold varargs args"); + unsigned Offset = ArgLocs[ValNo++].getLocMemOffset(); + SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, SP, + DAG.getConstant(Offset, DL, PtrVT)); + Chains.push_back(DAG.getStore(Chain, DL, Arg, Add, + MachinePointerInfo::getStack(MF, Offset), + false, false, 0)); + } + if (!Chains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + } + + // Compute the operands for the CALLn node. + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs + // isn't reliable. + Ops.append(OutVals.begin(), + IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end()); + + SmallVector Tys; + for (const auto &In : Ins) { + assert(!In.Flags.isByVal() && "byval is not valid for return values"); + assert(!In.Flags.isNest() && "nest is not valid for return values"); + if (In.Flags.isInAlloca()) + fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values"); + if (In.Flags.isInConsecutiveRegs()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values"); + if (In.Flags.isInConsecutiveRegsLast()) + fail(DL, DAG, + "WebAssembly hasn't implemented cons regs last return values"); + // Ignore In.getOrigAlign() because all our arguments are passed in + // registers. + Tys.push_back(In.VT); + } + Tys.push_back(MVT::Other); + SDVTList TyList = DAG.getVTList(Tys); + SDValue Res = + DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1, + DL, TyList, Ops); + if (Ins.empty()) { + Chain = Res; + } else { + InVals.push_back(Res); + Chain = Res.getValue(1); + } + + if (NumBytes) { + SDValue Unused = DAG.getTargetConstant(0, DL, PtrVT); + Chain = DAG.getCALLSEQ_END(Chain, NB, Unused, SDValue(), DL); + } + + return Chain; +} + +bool WebAssemblyTargetLowering::CanLowerReturn( + CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/, + const SmallVectorImpl &Outs, + LLVMContext & /*Context*/) const { + // WebAssembly can't currently handle returning tuples. + return Outs.size() <= 1; +} + +SDValue WebAssemblyTargetLowering::LowerReturn( + SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, SDLoc DL, + SelectionDAG &DAG) const { + assert(Outs.size() <= 1 && "WebAssembly can only return up to one value"); + if (!CallingConvSupported(CallConv)) + fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); + + SmallVector RetOps(1, Chain); + RetOps.append(OutVals.begin(), OutVals.end()); + Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps); + + // Record the number and types of the return values. + for (const ISD::OutputArg &Out : Outs) { + assert(!Out.Flags.isByVal() && "byval is not valid for return values"); + assert(!Out.Flags.isNest() && "nest is not valid for return values"); + assert(Out.IsFixed && "non-fixed return value is not valid"); + if (Out.Flags.isInAlloca()) + fail(DL, DAG, "WebAssembly hasn't implemented inalloca results"); + if (Out.Flags.isInConsecutiveRegs()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs results"); + if (Out.Flags.isInConsecutiveRegsLast()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results"); + } + + return Chain; +} + +SDValue WebAssemblyTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, + const SmallVectorImpl &Ins, SDLoc DL, SelectionDAG &DAG, + SmallVectorImpl &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + + if (!CallingConvSupported(CallConv)) + fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); + + // Set up the incoming ARGUMENTS value, which serves to represent the liveness + // of the incoming values before they're represented by virtual registers. + MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS); + + for (const ISD::InputArg &In : Ins) { + if (In.Flags.isByVal()) + fail(DL, DAG, "WebAssembly hasn't implemented byval arguments"); + if (In.Flags.isInAlloca()) + fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); + if (In.Flags.isNest()) + fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); + if (In.Flags.isInConsecutiveRegs()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); + if (In.Flags.isInConsecutiveRegsLast()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); + // Ignore In.getOrigAlign() because all our arguments are passed in + // registers. + InVals.push_back( + In.Used + ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT, + DAG.getTargetConstant(InVals.size(), DL, MVT::i32)) + : DAG.getUNDEF(In.VT)); + + // Record the number and types of arguments. + MF.getInfo()->addParam(In.VT); + } + + // Incoming varargs arguments are on the stack and will be accessed through + // va_arg, so we don't need to do anything for them here. + + return Chain; +} + //===----------------------------------------------------------------------===// -// Other Lowering Code +// Custom lowering hooks. //===----------------------------------------------------------------------===// +SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: + llvm_unreachable("unimplemented operation lowering"); + return SDValue(); + case ISD::FrameIndex: + return LowerFrameIndex(Op, DAG); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::ExternalSymbol: + return LowerExternalSymbol(Op, DAG); + case ISD::JumpTable: + return LowerJumpTable(Op, DAG); + case ISD::BR_JT: + return LowerBR_JT(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG); + } +} + +SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op, + SelectionDAG &DAG) const { + int FI = cast(Op)->getIndex(); + return DAG.getTargetFrameIndex(FI, Op.getValueType()); +} + +SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + const auto *GA = cast(Op); + EVT VT = Op.getValueType(); + assert(GA->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); + if (GA->getAddressSpace() != 0) + fail(DL, DAG, "WebAssembly only expects the 0 address space"); + return DAG.getNode( + WebAssemblyISD::Wrapper, DL, VT, + DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset())); +} + +SDValue +WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + const auto *ES = cast(Op); + EVT VT = Op.getValueType(); + assert(ES->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); + return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, + DAG.getTargetExternalSymbol(ES->getSymbol(), VT)); +} + +SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op, + SelectionDAG &DAG) const { + // There's no need for a Wrapper node because we always incorporate a jump + // table operand into a TABLESWITCH instruction, rather than ever + // materializing it in a register. + const JumpTableSDNode *JT = cast(Op); + return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(), + JT->getTargetFlags()); +} + +SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Chain = Op.getOperand(0); + const auto *JT = cast(Op.getOperand(1)); + SDValue Index = Op.getOperand(2); + assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); + + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(Index); + + MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo(); + const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs; + + // TODO: For now, we just pick something arbitrary for a default case for now. + // We really want to sniff out the guard and put in the real default case (and + // delete the guard). + Ops.push_back(DAG.getBasicBlock(MBBs[0])); + + // Add an operand for each case. + for (auto MBB : MBBs) + Ops.push_back(DAG.getBasicBlock(MBB)); + + return DAG.getNode(WebAssemblyISD::TABLESWITCH, DL, MVT::Other, Ops); +} + +SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout()); + + // The incoming non-fixed arguments are placed on the top of the stack, with + // natural alignment, at the point of the call, so the base pointer is just + // the current frame pointer. + DAG.getMachineFunction().getFrameInfo()->setFrameAddressIsTaken(true); + unsigned FP = + Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FP, PtrVT); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FrameAddr, Op.getOperand(1), + MachinePointerInfo(SV), false, false, 0); +} + //===----------------------------------------------------------------------===// // WebAssembly Optimization Hooks //===----------------------------------------------------------------------===// - -MCSection *WebAssemblyTargetObjectFile::SelectSectionForGlobal( - const GlobalValue *GV, SectionKind Kind, Mangler &Mang, - const TargetMachine &TM) const { - return getDataSection(); -} diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h index efd60a7bacd6..e7232a042e12 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -22,10 +22,11 @@ namespace llvm { namespace WebAssemblyISD { -enum { +enum NodeType : unsigned { FIRST_NUMBER = ISD::BUILTIN_OP_END, - - // add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here... +#define HANDLE_NODETYPE(NODE) NODE, +#include "WebAssemblyISD.def" +#undef HANDLE_NODETYPE }; } // end namespace WebAssemblyISD @@ -42,8 +43,51 @@ private: /// Keep a pointer to the WebAssemblySubtarget around so that we can make the /// right decision when generating code for different targets. const WebAssemblySubtarget *Subtarget; + + FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo) const override; + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; + const char *getTargetNodeName(unsigned Opcode) const override; + std::pair + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + bool isCheapToSpeculateCttz() const override; + bool isCheapToSpeculateCtlz() const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, + unsigned AS) const override; + + SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, SDLoc dl, + SelectionDAG &DAG) const override; + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl &Ins, + SDLoc DL, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; + + // Custom lowering hooks. + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; }; +namespace WebAssembly { +FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo); +} // end namespace WebAssembly + } // end namespace llvm #endif diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td index 6b5b6cd54173..cfa1519e6d99 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrCall.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -12,10 +12,63 @@ /// //===----------------------------------------------------------------------===// -/* - * TODO(jfb): Add the following. - * - * call_direct: call function directly - * call_indirect: call function indirectly - * addressof: obtain a function pointer value for a given function - */ +// TODO: addr64: These currently assume the callee address is 32-bit. + +let Defs = [ARGUMENTS] in { + +// Call sequence markers. These have an immediate which represents the amount of +// stack space to allocate or free, which is used for varargs lowering. +let Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1 in { +def ADJCALLSTACKDOWN : I<(outs), (ins i32imm:$amt), + [(WebAssemblycallseq_start timm:$amt)]>; +def ADJCALLSTACKUP : I<(outs), (ins i32imm:$amt, i32imm:$amt2), + [(WebAssemblycallseq_end timm:$amt, timm:$amt2)]>; +} // isCodeGenOnly = 1 + +multiclass CALL { + def CALL_#vt : I<(outs vt:$dst), (ins i32imm:$callee, variable_ops), + [(set vt:$dst, (WebAssemblycall1 (i32 imm:$callee)))], + !strconcat(prefix, "call\t$dst, $callee")>; + def CALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops), + [(set vt:$dst, (WebAssemblycall1 I32:$callee))], + !strconcat(prefix, "call_indirect\t$dst, $callee")>; +} +let Uses = [SP32, SP64], isCall = 1 in { + defm : CALL; + defm : CALL; + defm : CALL; + defm : CALL; + + def CALL_VOID : I<(outs), (ins i32imm:$callee, variable_ops), + [(WebAssemblycall0 (i32 imm:$callee))], + "call \t$callee">; + def CALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops), + [(WebAssemblycall0 I32:$callee)], + "call_indirect\t$callee">; +} // Uses = [SP32,SP64], isCall = 1 + +} // Defs = [ARGUMENTS] + +// Patterns for matching a direct call to a global address. +def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_I32 tglobaladdr:$callee)>; +def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_I64 tglobaladdr:$callee)>; +def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_F32 tglobaladdr:$callee)>; +def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_F64 tglobaladdr:$callee)>; +def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)), + (CALL_VOID tglobaladdr:$callee)>; + +// Patterns for matching a direct call to an external symbol. +def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_I32 texternalsym:$callee)>; +def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_I64 texternalsym:$callee)>; +def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_F32 texternalsym:$callee)>; +def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_F64 texternalsym:$callee)>; +def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)), + (CALL_VOID texternalsym:$callee)>; diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td new file mode 100644 index 000000000000..05efe8903413 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -0,0 +1,82 @@ +//===- WebAssemblyInstrControl.td-WebAssembly control-flow ------*- tablegen -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief WebAssembly control-flow code-gen constructs. +/// +//===----------------------------------------------------------------------===// + +let Defs = [ARGUMENTS] in { + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { +// The condition operand is a boolean value which WebAssembly represents as i32. +def BR_IF : I<(outs), (ins I32:$cond, bb_op:$dst), + [(brcond I32:$cond, bb:$dst)], + "br_if \t$cond, $dst">; +let isCodeGenOnly = 1 in +def BR_UNLESS : I<(outs), (ins I32:$cond, bb_op:$dst), [], + "br_unless\t$cond, $dst">; +let isBarrier = 1 in { +def BR : I<(outs), (ins bb_op:$dst), + [(br bb:$dst)], + "br \t$dst">; +} // isBarrier = 1 +} // isBranch = 1, isTerminator = 1, hasCtrlDep = 1 + +} // Defs = [ARGUMENTS] + +def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst), + (BR_IF I32:$cond, bb_op:$dst)>; +def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst), + (BR_UNLESS I32:$cond, bb_op:$dst)>; + +let Defs = [ARGUMENTS] in { + +// TODO: SelectionDAG's lowering insists on using a pointer as the index for +// jump tables, so in practice we don't ever use TABLESWITCH_I64 in wasm32 mode +// currently. +let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { +def TABLESWITCH_I32 : I<(outs), (ins I32:$index, bb_op:$default, variable_ops), + [(WebAssemblytableswitch I32:$index, bb:$default)], + "tableswitch\t$index, $default">; +def TABLESWITCH_I64 : I<(outs), (ins I64:$index, bb_op:$default, variable_ops), + [(WebAssemblytableswitch I64:$index, bb:$default)], + "tableswitch\t$index, $default">; +} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 + +// Placemarkers to indicate the start of a block or loop scope. These +// use/clobber EXPR_STACK to prevent them from being moved into the middle of +// an expression tree. +let Uses = [EXPR_STACK], Defs = [EXPR_STACK] in { +def BLOCK : I<(outs), (ins bb_op:$dst), [], "block \t$dst">; +def LOOP : I<(outs), (ins bb_op:$dst), [], "loop \t$dst">; +} // Uses = [EXPR_STACK], Defs = [EXPR_STACK] + +// No-op to indicate to the AsmPrinter that a loop ends here, so a +// basic block label is needed even if it wouldn't otherwise appear so. +let isTerminator = 1, hasCtrlDep = 1 in +def LOOP_END : I<(outs), (ins), []>; + +multiclass RETURN { + def RETURN_#vt : I<(outs), (ins vt:$val), [(WebAssemblyreturn vt:$val)], + "return \t$val">; +} + +let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { +let isReturn = 1 in { + defm : RETURN; + defm : RETURN; + defm : RETURN; + defm : RETURN; + def RETURN_VOID : I<(outs), (ins), [(WebAssemblyreturn)], "return">; +} // isReturn = 1 + def UNREACHABLE : I<(outs), (ins), [(trap)], "unreachable">; +} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 + +} // Defs = [ARGUMENTS] diff --git a/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/lib/Target/WebAssembly/WebAssemblyInstrConv.td index 3fa29061b1de..931f4a913d0f 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrConv.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrConv.td @@ -13,32 +13,99 @@ /// //===----------------------------------------------------------------------===// -/* - * TODO(jfb): Add the following. - * - * int32.wrap[int64]: wrap a 64-bit integer to a 32-bit integer - * int32.trunc_signed[float32]: truncate a 32-bit float to a signed 32-bit integer - * int32.trunc_signed[float64]: truncate a 64-bit float to a signed 32-bit integer - * int32.trunc_unsigned[float32]: truncate a 32-bit float to an unsigned 32-bit integer - * int32.trunc_unsigned[float64]: truncate a 64-bit float to an unsigned 32-bit integer - * int32.reinterpret[float32]: reinterpret the bits of a 32-bit float as a 32-bit integer - * int64.extend_signed[int32]: extend a signed 32-bit integer to a 64-bit integer - * int64.extend_unsigned[int32]: extend an unsigned 32-bit integer to a 64-bit integer - * int64.trunc_signed[float32]: truncate a 32-bit float to a signed 64-bit integer - * int64.trunc_signed[float64]: truncate a 64-bit float to a signed 64-bit integer - * int64.trunc_unsigned[float32]: truncate a 32-bit float to an unsigned 64-bit integer - * int64.trunc_unsigned[float64]: truncate a 64-bit float to an unsigned 64-bit integer - * int64.reinterpret[float64]: reinterpret the bits of a 64-bit float as a 64-bit integer - * float32.demote[float64]: demote a 64-bit float to a 32-bit float - * float32.cvt_signed[int32]: convert a signed 32-bit integer to a 32-bit float - * float32.cvt_signed[int64]: convert a signed 64-bit integer to a 32-bit float - * float32.cvt_unsigned[int32]: convert an unsigned 32-bit integer to a 32-bit float - * float32.cvt_unsigned[int64]: convert an unsigned 64-bit integer to a 32-bit float - * float32.reinterpret[int32]: reinterpret the bits of a 32-bit integer as a 32-bit float - * float64.promote[float32]: promote a 32-bit float to a 64-bit float - * float64.cvt_signed[int32]: convert a signed 32-bit integer to a 64-bit float - * float64.cvt_signed[int64]: convert a signed 64-bit integer to a 64-bit float - * float64.cvt_unsigned[int32]: convert an unsigned 32-bit integer to a 64-bit float - * float64.cvt_unsigned[int64]: convert an unsigned 64-bit integer to a 64-bit float - * float64.reinterpret[int64]: reinterpret the bits of a 64-bit integer as a 64-bit float - */ +let Defs = [ARGUMENTS] in { + +def I32_WRAP_I64 : I<(outs I32:$dst), (ins I64:$src), + [(set I32:$dst, (trunc I64:$src))], + "i32.wrap/i64\t$dst, $src">; + +def I64_EXTEND_S_I32 : I<(outs I64:$dst), (ins I32:$src), + [(set I64:$dst, (sext I32:$src))], + "i64.extend_s/i32\t$dst, $src">; +def I64_EXTEND_U_I32 : I<(outs I64:$dst), (ins I32:$src), + [(set I64:$dst, (zext I32:$src))], + "i64.extend_u/i32\t$dst, $src">; + +} // defs = [ARGUMENTS] + +// Expand a "don't care" extend into zero-extend (chosen over sign-extend +// somewhat arbitrarily, although it favors popular hardware architectures +// and is conceptually a simpler operation). +def : Pat<(i64 (anyext I32:$src)), (I64_EXTEND_U_I32 I32:$src)>; + +let Defs = [ARGUMENTS] in { + +// Conversion from floating point to integer traps on overflow and invalid. +let hasSideEffects = 1 in { +def I32_TRUNC_S_F32 : I<(outs I32:$dst), (ins F32:$src), + [(set I32:$dst, (fp_to_sint F32:$src))], + "i32.trunc_s/f32\t$dst, $src">; +def I32_TRUNC_U_F32 : I<(outs I32:$dst), (ins F32:$src), + [(set I32:$dst, (fp_to_uint F32:$src))], + "i32.trunc_u/f32\t$dst, $src">; +def I64_TRUNC_S_F32 : I<(outs I64:$dst), (ins F32:$src), + [(set I64:$dst, (fp_to_sint F32:$src))], + "i64.trunc_s/f32\t$dst, $src">; +def I64_TRUNC_U_F32 : I<(outs I64:$dst), (ins F32:$src), + [(set I64:$dst, (fp_to_uint F32:$src))], + "i64.trunc_u/f32\t$dst, $src">; +def I32_TRUNC_S_F64 : I<(outs I32:$dst), (ins F64:$src), + [(set I32:$dst, (fp_to_sint F64:$src))], + "i32.trunc_s/f64\t$dst, $src">; +def I32_TRUNC_U_F64 : I<(outs I32:$dst), (ins F64:$src), + [(set I32:$dst, (fp_to_uint F64:$src))], + "i32.trunc_u/f64\t$dst, $src">; +def I64_TRUNC_S_F64 : I<(outs I64:$dst), (ins F64:$src), + [(set I64:$dst, (fp_to_sint F64:$src))], + "i64.trunc_s/f64\t$dst, $src">; +def I64_TRUNC_U_F64 : I<(outs I64:$dst), (ins F64:$src), + [(set I64:$dst, (fp_to_uint F64:$src))], + "i64.trunc_u/f64\t$dst, $src">; +} // hasSideEffects = 1 + +def F32_CONVERT_S_I32 : I<(outs F32:$dst), (ins I32:$src), + [(set F32:$dst, (sint_to_fp I32:$src))], + "f32.convert_s/i32\t$dst, $src">; +def F32_CONVERT_U_I32 : I<(outs F32:$dst), (ins I32:$src), + [(set F32:$dst, (uint_to_fp I32:$src))], + "f32.convert_u/i32\t$dst, $src">; +def F64_CONVERT_S_I32 : I<(outs F64:$dst), (ins I32:$src), + [(set F64:$dst, (sint_to_fp I32:$src))], + "f64.convert_s/i32\t$dst, $src">; +def F64_CONVERT_U_I32 : I<(outs F64:$dst), (ins I32:$src), + [(set F64:$dst, (uint_to_fp I32:$src))], + "f64.convert_u/i32\t$dst, $src">; +def F32_CONVERT_S_I64 : I<(outs F32:$dst), (ins I64:$src), + [(set F32:$dst, (sint_to_fp I64:$src))], + "f32.convert_s/i64\t$dst, $src">; +def F32_CONVERT_U_I64 : I<(outs F32:$dst), (ins I64:$src), + [(set F32:$dst, (uint_to_fp I64:$src))], + "f32.convert_u/i64\t$dst, $src">; +def F64_CONVERT_S_I64 : I<(outs F64:$dst), (ins I64:$src), + [(set F64:$dst, (sint_to_fp I64:$src))], + "f64.convert_s/i64\t$dst, $src">; +def F64_CONVERT_U_I64 : I<(outs F64:$dst), (ins I64:$src), + [(set F64:$dst, (uint_to_fp I64:$src))], + "f64.convert_u/i64\t$dst, $src">; + +def F64_PROMOTE_F32 : I<(outs F64:$dst), (ins F32:$src), + [(set F64:$dst, (fextend F32:$src))], + "f64.promote/f32\t$dst, $src">; +def F32_DEMOTE_F64 : I<(outs F32:$dst), (ins F64:$src), + [(set F32:$dst, (fround F64:$src))], + "f32.demote/f64\t$dst, $src">; + +def I32_REINTERPRET_F32 : I<(outs I32:$dst), (ins F32:$src), + [(set I32:$dst, (bitconvert F32:$src))], + "i32.reinterpret/f32\t$dst, $src">; +def F32_REINTERPRET_I32 : I<(outs F32:$dst), (ins I32:$src), + [(set F32:$dst, (bitconvert I32:$src))], + "f32.reinterpret/i32\t$dst, $src">; +def I64_REINTERPRET_F64 : I<(outs I64:$dst), (ins F64:$src), + [(set I64:$dst, (bitconvert F64:$src))], + "i64.reinterpret/f64\t$dst, $src">; +def F64_REINTERPRET_I64 : I<(outs F64:$dst), (ins I64:$src), + [(set F64:$dst, (bitconvert I64:$src))], + "f64.reinterpret/i64\t$dst, $src">; + +} // Defs = [ARGUMENTS] diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td index 30ef6339d65a..5520c6de6732 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td @@ -12,33 +12,90 @@ /// //===----------------------------------------------------------------------===// -defm FADD : BinaryFP; -defm FSUB : BinaryFP; -defm FMUL : BinaryFP; -defm FDIV : BinaryFP; -defm FABS : UnaryFP; -defm FNEG : UnaryFP; -defm COPYSIGN : BinaryFP; -defm CEIL : UnaryFP; -defm FLOOR : UnaryFP; -defm TRUNC : UnaryFP; -defm NEARESTINT : UnaryFP; +let Defs = [ARGUMENTS] in { -/* - * TODO(jfb): Add the following for 32-bit and 64-bit. - * - * float32.eq: compare equal - * float32.lt: less than - * float32.le: less than or equal - * float32.gt: greater than - * float32.ge: greater than or equal - */ +let isCommutable = 1 in +defm ADD : BinaryFP; +defm SUB : BinaryFP; +let isCommutable = 1 in +defm MUL : BinaryFP; +defm DIV : BinaryFP; +defm SQRT : UnaryFP; -defm SQRT : UnaryFP; +defm ABS : UnaryFP; +defm NEG : UnaryFP; +defm COPYSIGN : BinaryFP; -/* - * TODO(jfb): Add the following for 32-bit and 64-bit. - * - * float32.min: minimum (binary operator); if either operand is NaN, returns NaN - * float32.max: maximum (binary operator); if either operand is NaN, returns NaN - */ +let isCommutable = 1 in { +defm MIN : BinaryFP; +defm MAX : BinaryFP; +} // isCommutable = 1 + +defm CEIL : UnaryFP; +defm FLOOR : UnaryFP; +defm TRUNC : UnaryFP; +defm NEAREST : UnaryFP; + +} // Defs = [ARGUMENTS] + +// DAGCombine oddly folds casts into the rhs of copysign. Unfold them. +def : Pat<(fcopysign F64:$lhs, F32:$rhs), + (COPYSIGN_F64 F64:$lhs, (F64_PROMOTE_F32 F32:$rhs))>; +def : Pat<(fcopysign F32:$lhs, F64:$rhs), + (COPYSIGN_F32 F32:$lhs, (F32_DEMOTE_F64 F64:$rhs))>; + +// WebAssembly doesn't expose inexact exceptions, so map frint to fnearbyint. +def : Pat<(frint f32:$src), (NEAREST_F32 f32:$src)>; +def : Pat<(frint f64:$src), (NEAREST_F64 f64:$src)>; + +let Defs = [ARGUMENTS] in { + +let isCommutable = 1 in { +defm EQ : ComparisonFP; +defm NE : ComparisonFP; +} // isCommutable = 1 +defm LT : ComparisonFP; +defm LE : ComparisonFP; +defm GT : ComparisonFP; +defm GE : ComparisonFP; + +} // Defs = [ARGUMENTS] + +// Don't care floating-point comparisons, supported via other comparisons. +def : Pat<(seteq f32:$lhs, f32:$rhs), (EQ_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(setne f32:$lhs, f32:$rhs), (NE_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(setlt f32:$lhs, f32:$rhs), (LT_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(setle f32:$lhs, f32:$rhs), (LE_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(setgt f32:$lhs, f32:$rhs), (GT_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(setge f32:$lhs, f32:$rhs), (GE_F32 f32:$lhs, f32:$rhs)>; +def : Pat<(seteq f64:$lhs, f64:$rhs), (EQ_F64 f64:$lhs, f64:$rhs)>; +def : Pat<(setne f64:$lhs, f64:$rhs), (NE_F64 f64:$lhs, f64:$rhs)>; +def : Pat<(setlt f64:$lhs, f64:$rhs), (LT_F64 f64:$lhs, f64:$rhs)>; +def : Pat<(setle f64:$lhs, f64:$rhs), (LE_F64 f64:$lhs, f64:$rhs)>; +def : Pat<(setgt f64:$lhs, f64:$rhs), (GT_F64 f64:$lhs, f64:$rhs)>; +def : Pat<(setge f64:$lhs, f64:$rhs), (GE_F64 f64:$lhs, f64:$rhs)>; + +let Defs = [ARGUMENTS] in { + +def SELECT_F32 : I<(outs F32:$dst), (ins I32:$cond, F32:$lhs, F32:$rhs), + [(set F32:$dst, (select I32:$cond, F32:$lhs, F32:$rhs))], + "f32.select\t$dst, $cond, $lhs, $rhs">; +def SELECT_F64 : I<(outs F64:$dst), (ins I32:$cond, F64:$lhs, F64:$rhs), + [(set F64:$dst, (select I32:$cond, F64:$lhs, F64:$rhs))], + "f64.select\t$dst, $cond, $lhs, $rhs">; + +} // Defs = [ARGUMENTS] + +// ISD::SELECT requires its operand to conform to getBooleanContents, but +// WebAssembly's select interprets any non-zero value as true, so we can fold +// a setne with 0 into a select. +def : Pat<(select (i32 (setne I32:$cond, 0)), F32:$lhs, F32:$rhs), + (SELECT_F32 I32:$cond, F32:$lhs, F32:$rhs)>; +def : Pat<(select (i32 (setne I32:$cond, 0)), F64:$lhs, F64:$rhs), + (SELECT_F64 I32:$cond, F64:$lhs, F64:$rhs)>; + +// And again, this time with seteq instead of setne and the arms reversed. +def : Pat<(select (i32 (seteq I32:$cond, 0)), F32:$lhs, F32:$rhs), + (SELECT_F32 I32:$cond, F32:$rhs, F32:$lhs)>; +def : Pat<(select (i32 (seteq I32:$cond, 0)), F64:$lhs, F64:$rhs), + (SELECT_F64 I32:$cond, F64:$rhs, F64:$lhs)>; diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td index 513c36fa2ec2..8008dd32353a 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td @@ -1,4 +1,4 @@ -// WebAssemblyInstrFormats.td - WebAssembly Instruction Formats -*- tblgen -*-// +//=- WebAssemblyInstrFormats.td - WebAssembly Instr. Formats -*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -12,44 +12,68 @@ /// //===----------------------------------------------------------------------===// -// WebAssembly Instruction Format -class WebAssemblyInst : Instruction { +// WebAssembly Instruction Format. +class WebAssemblyInst : Instruction { field bits<0> Inst; // Instruction encoding. let Namespace = "WebAssembly"; let Pattern = []; - let Constraints = cstr; + let AsmString = asmstr; } -// Normal instructions -class I pattern, string cstr = ""> - : WebAssemblyInst { +// Normal instructions. +class I pattern, string asmstr = ""> + : WebAssemblyInst { dag OutOperandList = oops; dag InOperandList = iops; let Pattern = pattern; } // Unary and binary instructions, for the local types that WebAssembly supports. -multiclass UnaryInt { - def _I32 : I<(outs Int32:$dst), (ins Int32:$src), - [(set Int32:$dst, (node Int32:$src))]>; - def _I64 : I<(outs Int64:$dst), (ins Int64:$src), - [(set Int64:$dst, (node Int64:$src))]>; +multiclass UnaryInt { + def _I32 : I<(outs I32:$dst), (ins I32:$src), + [(set I32:$dst, (node I32:$src))], + !strconcat("i32.", !strconcat(name, "\t$dst, $src"))>; + def _I64 : I<(outs I64:$dst), (ins I64:$src), + [(set I64:$dst, (node I64:$src))], + !strconcat("i64.", !strconcat(name, "\t$dst, $src"))>; } -multiclass BinaryInt { - def _I32 : I<(outs Int32:$dst), (ins Int32:$lhs, Int32:$rhs), - [(set Int32:$dst, (node Int32:$lhs, Int32:$rhs))]>; - def _I64 : I<(outs Int64:$dst), (ins Int64:$lhs, Int64:$rhs), - [(set Int64:$dst, (node Int64:$lhs, Int64:$rhs))]>; +multiclass BinaryInt { + def _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), + [(set I32:$dst, (node I32:$lhs, I32:$rhs))], + !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; + def _I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs), + [(set I64:$dst, (node I64:$lhs, I64:$rhs))], + !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; } -multiclass UnaryFP { - def _F32 : I<(outs Float32:$dst), (ins Float32:$src), - [(set Float32:$dst, (node Float32:$src))]>; - def _F64 : I<(outs Float64:$dst), (ins Float64:$src), - [(set Float64:$dst, (node Float64:$src))]>; +multiclass UnaryFP { + def _F32 : I<(outs F32:$dst), (ins F32:$src), + [(set F32:$dst, (node F32:$src))], + !strconcat("f32.", !strconcat(name, "\t$dst, $src"))>; + def _F64 : I<(outs F64:$dst), (ins F64:$src), + [(set F64:$dst, (node F64:$src))], + !strconcat("f64.", !strconcat(name, "\t$dst, $src"))>; } -multiclass BinaryFP { - def _F32 : I<(outs Float32:$dst), (ins Float32:$lhs, Float32:$rhs), - [(set Float32:$dst, (node Float32:$lhs, Float32:$rhs))]>; - def _F64 : I<(outs Float64:$dst), (ins Float64:$lhs, Float64:$rhs), - [(set Float64:$dst, (node Float64:$lhs, Float64:$rhs))]>; +multiclass BinaryFP { + def _F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs), + [(set F32:$dst, (node F32:$lhs, F32:$rhs))], + !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; + def _F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs), + [(set F64:$dst, (node F64:$lhs, F64:$rhs))], + !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; +} +multiclass ComparisonInt { + def _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), + [(set I32:$dst, (setcc I32:$lhs, I32:$rhs, cond))], + !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; + def _I64 : I<(outs I32:$dst), (ins I64:$lhs, I64:$rhs), + [(set I32:$dst, (setcc I64:$lhs, I64:$rhs, cond))], + !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; +} +multiclass ComparisonFP { + def _F32 : I<(outs I32:$dst), (ins F32:$lhs, F32:$rhs), + [(set I32:$dst, (setcc F32:$lhs, F32:$rhs, cond))], + !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; + def _F64 : I<(outs I32:$dst), (ins F64:$lhs, F64:$rhs), + [(set I32:$dst, (setcc F64:$lhs, F64:$rhs, cond))], + !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; } diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index ea8937c8f9f2..5e7663cdb506 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -24,5 +24,136 @@ using namespace llvm; #define DEBUG_TYPE "wasm-instr-info" +#define GET_INSTRINFO_CTOR_DTOR +#include "WebAssemblyGenInstrInfo.inc" + WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) - : RI(STI.getTargetTriple()) {} + : WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN, + WebAssembly::ADJCALLSTACKUP), + RI(STI.getTargetTriple()) {} + +void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + DebugLoc DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { + // This method is called by post-RA expansion, which expects only pregs to + // exist. However we need to handle both here. + auto &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RC = TargetRegisterInfo::isVirtualRegister(DestReg) ? + MRI.getRegClass(DestReg) : + MRI.getTargetRegisterInfo()->getMinimalPhysRegClass(SrcReg); + + unsigned CopyLocalOpcode; + if (RC == &WebAssembly::I32RegClass) + CopyLocalOpcode = WebAssembly::COPY_LOCAL_I32; + else if (RC == &WebAssembly::I64RegClass) + CopyLocalOpcode = WebAssembly::COPY_LOCAL_I64; + else if (RC == &WebAssembly::F32RegClass) + CopyLocalOpcode = WebAssembly::COPY_LOCAL_F32; + else if (RC == &WebAssembly::F64RegClass) + CopyLocalOpcode = WebAssembly::COPY_LOCAL_F64; + else + llvm_unreachable("Unexpected register class"); + + BuildMI(MBB, I, DL, get(CopyLocalOpcode), DestReg) + .addReg(SrcReg, KillSrc ? RegState::Kill : 0); +} + +// Branch analysis. +bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool /*AllowModify*/) const { + bool HaveCond = false; + for (MachineInstr &MI : MBB.terminators()) { + switch (MI.getOpcode()) { + default: + // Unhandled instruction; bail out. + return true; + case WebAssembly::BR_IF: + if (HaveCond) + return true; + Cond.push_back(MachineOperand::CreateImm(true)); + Cond.push_back(MI.getOperand(0)); + TBB = MI.getOperand(1).getMBB(); + HaveCond = true; + break; + case WebAssembly::BR_UNLESS: + if (HaveCond) + return true; + Cond.push_back(MachineOperand::CreateImm(false)); + Cond.push_back(MI.getOperand(0)); + TBB = MI.getOperand(1).getMBB(); + HaveCond = true; + break; + case WebAssembly::BR: + if (!HaveCond) + TBB = MI.getOperand(0).getMBB(); + else + FBB = MI.getOperand(0).getMBB(); + break; + } + if (MI.isBarrier()) + break; + } + + return false; +} + +unsigned WebAssemblyInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::instr_iterator I = MBB.instr_end(); + unsigned Count = 0; + + while (I != MBB.instr_begin()) { + --I; + if (I->isDebugValue()) + continue; + if (!I->isTerminator()) + break; + // Remove the branch. + I->eraseFromParent(); + I = MBB.instr_end(); + ++Count; + } + + return Count; +} + +unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + DebugLoc DL) const { + if (Cond.empty()) { + if (!TBB) + return 0; + + BuildMI(&MBB, DL, get(WebAssembly::BR)).addMBB(TBB); + return 1; + } + + assert(Cond.size() == 2 && "Expected a flag and a successor block"); + + if (Cond[0].getImm()) { + BuildMI(&MBB, DL, get(WebAssembly::BR_IF)) + .addOperand(Cond[1]) + .addMBB(TBB); + } else { + BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS)) + .addOperand(Cond[1]) + .addMBB(TBB); + } + if (!FBB) + return 1; + + BuildMI(&MBB, DL, get(WebAssembly::BR)).addMBB(FBB); + return 2; +} + +bool WebAssemblyInstrInfo::ReverseBranchCondition( + SmallVectorImpl &Cond) const { + assert(Cond.size() == 2 && "Expected a flag and a successor block"); + Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm()); + return false; +} diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h index 1c4ae22f16d6..5ddd9b36f243 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -19,17 +19,35 @@ #include "WebAssemblyRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#define GET_INSTRINFO_HEADER +#include "WebAssemblyGenInstrInfo.inc" + namespace llvm { class WebAssemblySubtarget; -class WebAssemblyInstrInfo final { +class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo { const WebAssemblyRegisterInfo RI; public: explicit WebAssemblyInstrInfo(const WebAssemblySubtarget &STI); const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; } + + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc DL, unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; + + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify = false) const override; + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, ArrayRef Cond, + DebugLoc DL) const override; + bool + ReverseBranchCondition(SmallVectorImpl &Cond) const override; }; } // end namespace llvm diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index fe3ca76dc08a..f0b4ce7caf51 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -25,20 +25,48 @@ def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">, // WebAssembly-specific DAG Node Types. //===----------------------------------------------------------------------===// +def SDT_WebAssemblyCallSeqStart : SDCallSeqStart<[SDTCisVT<0, iPTR>]>; +def SDT_WebAssemblyCallSeqEnd : + SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; +def SDT_WebAssemblyCall0 : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def SDT_WebAssemblyCall1 : SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>; +def SDT_WebAssemblyTableswitch : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>; +def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>; +def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, + SDTCisPtrTy<0>]>; + //===----------------------------------------------------------------------===// // WebAssembly-specific DAG Nodes. //===----------------------------------------------------------------------===// +def WebAssemblycallseq_start : + SDNode<"ISD::CALLSEQ_START", SDT_WebAssemblyCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +def WebAssemblycallseq_end : + SDNode<"ISD::CALLSEQ_END", SDT_WebAssemblyCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def WebAssemblycall0 : SDNode<"WebAssemblyISD::CALL0", + SDT_WebAssemblyCall0, + [SDNPHasChain, SDNPVariadic]>; +def WebAssemblycall1 : SDNode<"WebAssemblyISD::CALL1", + SDT_WebAssemblyCall1, + [SDNPHasChain, SDNPVariadic]>; +def WebAssemblytableswitch : SDNode<"WebAssemblyISD::TABLESWITCH", + SDT_WebAssemblyTableswitch, + [SDNPHasChain, SDNPVariadic]>; +def WebAssemblyargument : SDNode<"WebAssemblyISD::ARGUMENT", + SDT_WebAssemblyArgument>; +def WebAssemblyreturn : SDNode<"WebAssemblyISD::RETURN", + SDT_WebAssemblyReturn, [SDNPHasChain]>; +def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper", + SDT_WebAssemblyWrapper>; + //===----------------------------------------------------------------------===// // WebAssembly-specific Operands. //===----------------------------------------------------------------------===// -/* - * TODO(jfb): Add the following. - * - * get_local: read the current value of a local variable - * set_local: set the current value of a local variable -*/ +def bb_op : Operand; //===----------------------------------------------------------------------===// // WebAssembly Instruction Format Definitions. @@ -46,14 +74,87 @@ def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">, include "WebAssemblyInstrFormats.td" +//===----------------------------------------------------------------------===// +// Additional instructions. +//===----------------------------------------------------------------------===// + +multiclass ARGUMENT { + let hasSideEffects = 1, Uses = [ARGUMENTS], isCodeGenOnly = 1 in + def ARGUMENT_#vt : I<(outs vt:$res), (ins i32imm:$argno), + [(set vt:$res, (WebAssemblyargument timm:$argno))]>; +} +defm : ARGUMENT; +defm : ARGUMENT; +defm : ARGUMENT; +defm : ARGUMENT; + +let Defs = [ARGUMENTS] in { + +// get_local and set_local are not generated by instruction selection; they +// are implied by virtual register uses and defs in most contexts. However, +// they are explicitly emitted for special purposes. +multiclass LOCAL { + def GET_LOCAL_#vt : I<(outs vt:$res), (ins i32imm:$regno), [], + "get_local\t$res, $regno">; + // TODO: set_local returns its operand value + def SET_LOCAL_#vt : I<(outs), (ins i32imm:$regno, vt:$src), [], + "set_local\t$regno, $src">; + + // COPY_LOCAL is not an actual instruction in wasm, but since we allow + // get_local and set_local to be implicit, we can have a COPY_LOCAL which + // is actually a no-op because all the work is done in the implied + // get_local and set_local. + let isAsCheapAsAMove = 1 in + def COPY_LOCAL_#vt : I<(outs vt:$res), (ins vt:$src), [], + "copy_local\t$res, $src">; +} +defm : LOCAL; +defm : LOCAL; +defm : LOCAL; +defm : LOCAL; + +let isMoveImm = 1 in { +def CONST_I32 : I<(outs I32:$res), (ins i32imm:$imm), + [(set I32:$res, imm:$imm)], + "i32.const\t$res, $imm">; +def CONST_I64 : I<(outs I64:$res), (ins i64imm:$imm), + [(set I64:$res, imm:$imm)], + "i64.const\t$res, $imm">; +def CONST_F32 : I<(outs F32:$res), (ins f32imm:$imm), + [(set F32:$res, fpimm:$imm)], + "f32.const\t$res, $imm">; +def CONST_F64 : I<(outs F64:$res), (ins f64imm:$imm), + [(set F64:$res, fpimm:$imm)], + "f64.const\t$res, $imm">; +} // isMoveImm = 1 + +} // Defs = [ARGUMENTS] + +def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$dst)), + (CONST_I32 tglobaladdr:$dst)>; +def : Pat<(i32 (WebAssemblywrapper texternalsym:$dst)), + (CONST_I32 texternalsym:$dst)>; +def : Pat<(i32 (WebAssemblywrapper tjumptable:$dst)), + (CONST_I32 tjumptable:$dst)>; + +let Defs = [ARGUMENTS] in { + +// Function signature and local variable declaration "instructions". +def PARAM : I<(outs), (ins variable_ops), [], ".param \t">; +def RESULT : I<(outs), (ins variable_ops), [], ".result \t">; +def LOCAL : I<(outs), (ins variable_ops), [], ".local \t">; + +} // Defs = [ARGUMENTS] + //===----------------------------------------------------------------------===// // Additional sets of instructions. //===----------------------------------------------------------------------===// include "WebAssemblyInstrMemory.td" include "WebAssemblyInstrCall.td" +include "WebAssemblyInstrControl.td" include "WebAssemblyInstrInteger.td" -include "WebAssemblyInstrFloat.td" include "WebAssemblyInstrConv.td" +include "WebAssemblyInstrFloat.td" include "WebAssemblyInstrAtomics.td" include "WebAssemblyInstrSIMD.td" diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td index 5f60fe81b1a2..09e5eafb85e9 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInteger.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td @@ -12,34 +12,77 @@ /// //===----------------------------------------------------------------------===// -defm ADD : BinaryInt; -defm SUB : BinaryInt; -defm MUL : BinaryInt; -defm SDIV : BinaryInt; -defm UDIV : BinaryInt; -defm SREM : BinaryInt; -defm UREM : BinaryInt; -defm AND : BinaryInt; -defm IOR : BinaryInt; -defm XOR : BinaryInt; -defm SHL : BinaryInt; -defm SHR : BinaryInt; -defm SAR : BinaryInt; +let Defs = [ARGUMENTS] in { -/* - * TODO(jfb): Add the following for 32-bit and 64-bit. - * - * int32.eq: signed-less compare equal - * int32.slt: signed less than - * int32.sle: signed less than or equal - * int32.ult: unsigned less than - * int32.ule: unsigned less than or equal - * int32.sgt: signed greater than - * int32.sge: signed greater than or equal - * int32.ugt: unsigned greater than - * int32.uge: unsigned greater than or equal - */ +// The spaces after the names are for aesthetic purposes only, to make +// operands line up vertically after tab expansion. +let isCommutable = 1 in +defm ADD : BinaryInt; +defm SUB : BinaryInt; +let isCommutable = 1 in +defm MUL : BinaryInt; +// Divide and remainder trap on a zero denominator. +let hasSideEffects = 1 in { +defm DIV_S : BinaryInt; +defm DIV_U : BinaryInt; +defm REM_S : BinaryInt; +defm REM_U : BinaryInt; +} // hasSideEffects = 1 +let isCommutable = 1 in { +defm AND : BinaryInt; +defm OR : BinaryInt; +defm XOR : BinaryInt; +} // isCommutable = 1 +defm SHL : BinaryInt; +defm SHR_U : BinaryInt; +defm SHR_S : BinaryInt; -defm CLZ : UnaryInt; -defm CTZ : UnaryInt; -defm POPCNT : UnaryInt; +let isCommutable = 1 in { +defm EQ : ComparisonInt; +defm NE : ComparisonInt; +} // isCommutable = 1 +defm LT_S : ComparisonInt; +defm LE_S : ComparisonInt; +defm LT_U : ComparisonInt; +defm LE_U : ComparisonInt; +defm GT_S : ComparisonInt; +defm GE_S : ComparisonInt; +defm GT_U : ComparisonInt; +defm GE_U : ComparisonInt; + +defm CLZ : UnaryInt; +defm CTZ : UnaryInt; +defm POPCNT : UnaryInt; + +} // Defs = [ARGUMENTS] + +// Expand the "don't care" operations to supported operations. +def : Pat<(ctlz_zero_undef I32:$src), (CLZ_I32 I32:$src)>; +def : Pat<(ctlz_zero_undef I64:$src), (CLZ_I64 I64:$src)>; +def : Pat<(cttz_zero_undef I32:$src), (CTZ_I32 I32:$src)>; +def : Pat<(cttz_zero_undef I64:$src), (CTZ_I64 I64:$src)>; + +let Defs = [ARGUMENTS] in { + +def SELECT_I32 : I<(outs I32:$dst), (ins I32:$cond, I32:$lhs, I32:$rhs), + [(set I32:$dst, (select I32:$cond, I32:$lhs, I32:$rhs))], + "i32.select\t$dst, $cond, $lhs, $rhs">; +def SELECT_I64 : I<(outs I64:$dst), (ins I32:$cond, I64:$lhs, I64:$rhs), + [(set I64:$dst, (select I32:$cond, I64:$lhs, I64:$rhs))], + "i64.select\t$dst, $cond, $lhs, $rhs">; + +} // Defs = [ARGUMENTS] + +// ISD::SELECT requires its operand to conform to getBooleanContents, but +// WebAssembly's select interprets any non-zero value as true, so we can fold +// a setne with 0 into a select. +def : Pat<(select (i32 (setne I32:$cond, 0)), I32:$lhs, I32:$rhs), + (SELECT_I32 I32:$cond, I32:$lhs, I32:$rhs)>; +def : Pat<(select (i32 (setne I32:$cond, 0)), I64:$lhs, I64:$rhs), + (SELECT_I64 I32:$cond, I64:$lhs, I64:$rhs)>; + +// And again, this time with seteq instead of setne and the arms reversed. +def : Pat<(select (i32 (seteq I32:$cond, 0)), I32:$lhs, I32:$rhs), + (SELECT_I32 I32:$cond, I32:$rhs, I32:$lhs)>; +def : Pat<(select (i32 (seteq I32:$cond, 0)), I64:$lhs, I64:$rhs), + (SELECT_I64 I32:$cond, I64:$rhs, I64:$lhs)>; diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index 5ab40e826caa..74ec45d58644 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -12,35 +12,500 @@ /// //===----------------------------------------------------------------------===// -/* - * TODO(jfb): Add the following. - * Each has optional alignment and immediate byte offset. - * - * int32.load_sx[int8]: sign-extend to int32 - * int32.load_sx[int16]: sign-extend to int32 - * int32.load_zx[int8]: zero-extend to int32 - * int32.load_zx[int16]: zero-extend to int32 - * int32.load[int32]: (no conversion) - * int64.load_sx[int8]: sign-extend to int64 - * int64.load_sx[int16]: sign-extend to int64 - * int64.load_sx[int32]: sign-extend to int64 - * int64.load_zx[int8]: zero-extend to int64 - * int64.load_zx[int16]: zero-extend to int64 - * int64.load_zx[int32]: zero-extend to int64 - * int64.load[int64]: (no conversion) - * float32.load[float32]: (no conversion) - * float64.load[float64]: (no conversion) - * - * int32.store[int8]: wrap int32 to int8 - * int32.store[int16]: wrap int32 to int16 - * int32.store[int32]: (no conversion) - * int64.store[int8]: wrap int64 to int8 - * int64.store[int16]: wrap int64 to int16 - * int64.store[int32]: wrap int64 to int32 - * int64.store[int64]: (no conversion) - * float32.store[float32]: (no conversion) - * float64.store[float64]: (no conversion) - * - * load_global: load the value of a given global variable - * store_global: store a given value to a given global variable - */ +// TODO: +// - HasAddr64 +// - WebAssemblyTargetLowering having to do with atomics +// - Each has optional alignment. + +// WebAssembly has i8/i16/i32/i64/f32/f64 memory types, but doesn't have i8/i16 +// local types. These memory-only types instead zero- or sign-extend into local +// types when loading, and truncate when storing. + +// WebAssembly constant offsets are performed as unsigned with infinite +// precision, so we need to check for NoUnsignedWrap so that we don't fold an +// offset for an add that needs wrapping. +def regPlusImm : PatFrag<(ops node:$off, node:$addr), + (add node:$addr, node:$off), + [{ return N->getFlags()->hasNoUnsignedWrap(); }]>; + +let Defs = [ARGUMENTS] in { + +// Basic load. +def LOAD_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], + "i32.load\t$dst, ${off}(${addr})">; +def LOAD_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], + "i64.load\t$dst, ${off}(${addr})">; +def LOAD_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr), [], + "f32.load\t$dst, ${off}(${addr})">; +def LOAD_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr), [], + "f64.load\t$dst, ${off}(${addr})">; + +} // Defs = [ARGUMENTS] + +// Select loads with no constant offset. +def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr)>; +def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr)>; +def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>; +def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>; + +// Select loads with a constant offset. +def : Pat<(i32 (load (regPlusImm imm:$off, I32:$addr))), + (LOAD_I32 imm:$off, $addr)>; +def : Pat<(i64 (load (regPlusImm imm:$off, I32:$addr))), + (LOAD_I64 imm:$off, $addr)>; +def : Pat<(f32 (load (regPlusImm imm:$off, I32:$addr))), + (LOAD_F32 imm:$off, $addr)>; +def : Pat<(f64 (load (regPlusImm imm:$off, I32:$addr))), + (LOAD_F64 imm:$off, $addr)>; +def : Pat<(i32 (load (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (load (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD_I64 tglobaladdr:$off, $addr)>; +def : Pat<(f32 (load (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD_F32 tglobaladdr:$off, $addr)>; +def : Pat<(f64 (load (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD_F64 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (load (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD_I32 texternalsym:$off, $addr)>; +def : Pat<(i64 (load (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD_I64 texternalsym:$off, $addr)>; +def : Pat<(f32 (load (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD_F32 texternalsym:$off, $addr)>; +def : Pat<(f64 (load (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD_F64 texternalsym:$off, $addr)>; + +// Select loads with just a constant offset. +def : Pat<(i32 (load imm:$off)), (LOAD_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (load imm:$off)), (LOAD_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(f32 (load imm:$off)), (LOAD_F32 imm:$off, (CONST_I32 0))>; +def : Pat<(f64 (load imm:$off)), (LOAD_F64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (load (WebAssemblywrapper tglobaladdr:$off))), + (LOAD_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (load (WebAssemblywrapper tglobaladdr:$off))), + (LOAD_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(f32 (load (WebAssemblywrapper tglobaladdr:$off))), + (LOAD_F32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(f64 (load (WebAssemblywrapper tglobaladdr:$off))), + (LOAD_F64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (load (WebAssemblywrapper texternalsym:$off))), + (LOAD_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (load (WebAssemblywrapper texternalsym:$off))), + (LOAD_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(f32 (load (WebAssemblywrapper texternalsym:$off))), + (LOAD_F32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(f64 (load (WebAssemblywrapper texternalsym:$off))), + (LOAD_F64 texternalsym:$off, (CONST_I32 0))>; + +let Defs = [ARGUMENTS] in { + +// Extending load. +def LOAD8_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], + "i32.load8_s\t$dst, ${off}(${addr})">; +def LOAD8_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], + "i32.load8_u\t$dst, ${off}(${addr})">; +def LOAD16_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], + "i32.load16_s\t$dst, ${off}(${addr})">; +def LOAD16_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], + "i32.load16_u\t$dst, ${off}(${addr})">; +def LOAD8_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], + "i64.load8_s\t$dst, ${off}(${addr})">; +def LOAD8_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], + "i64.load8_u\t$dst, ${off}(${addr})">; +def LOAD16_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], + "i64.load16_s\t$dst, ${off}(${addr})">; +def LOAD16_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], + "i64.load16_u\t$dst, ${off}(${addr})">; +def LOAD32_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], + "i64.load32_s\t$dst, ${off}(${addr})">; +def LOAD32_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], + "i64.load32_u\t$dst, ${off}(${addr})">; + +} // Defs = [ARGUMENTS] + +// Select extending loads with no constant offset. +def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr)>; +def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>; +def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, $addr)>; +def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>; +def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, $addr)>; +def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>; +def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, $addr)>; +def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>; +def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr)>; +def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; + +// Select extending loads with a constant offset. +def : Pat<(i32 (sextloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_S_I32 imm:$off, $addr)>; +def : Pat<(i32 (zextloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_U_I32 imm:$off, $addr)>; +def : Pat<(i32 (sextloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_S_I32 imm:$off, $addr)>; +def : Pat<(i32 (zextloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_U_I32 imm:$off, $addr)>; +def : Pat<(i64 (sextloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_S_I64 imm:$off, $addr)>; +def : Pat<(i64 (zextloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_U_I64 imm:$off, $addr)>; +def : Pat<(i64 (sextloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_S_I64 imm:$off, $addr)>; +def : Pat<(i64 (zextloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_U_I64 imm:$off, $addr)>; +def : Pat<(i64 (sextloadi32 (regPlusImm imm:$off, I32:$addr))), + (LOAD32_S_I64 imm:$off, $addr)>; +def : Pat<(i64 (zextloadi32 (regPlusImm imm:$off, I32:$addr))), + (LOAD32_U_I64 imm:$off, $addr)>; +def : Pat<(i32 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_S_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_U_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_S_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_U_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_S_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_S_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (sextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD32_S_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (zextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD32_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_S_I32 texternalsym:$off, $addr)>; +def : Pat<(i32 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_U_I32 texternalsym:$off, $addr)>; +def : Pat<(i32 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_S_I32 texternalsym:$off, $addr)>; +def : Pat<(i32 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_U_I32 texternalsym:$off, $addr)>; +def : Pat<(i64 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_S_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_U_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_S_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_U_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (sextloadi32 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD32_S_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (zextloadi32 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD32_U_I64 texternalsym:$off, $addr)>; + +// Select extending loads with just a constant offset. +def : Pat<(i32 (sextloadi8 imm:$off)), (LOAD8_S_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi16 imm:$off)), (LOAD16_S_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi8 imm:$off)), (LOAD8_S_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi16 imm:$off)), (LOAD16_S_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi32 imm:$off)), (LOAD32_S_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_S_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_S_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_S_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_S_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi32 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD32_S_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi32 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_S_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_S_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_S_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_S_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi32 (WebAssemblywrapper texternalsym:$off))), + (LOAD32_S_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi32 (WebAssemblywrapper texternalsym:$off))), + (LOAD32_U_I64 texternalsym:$off, (CONST_I32 0))>; + +// Resolve "don't care" extending loads to zero-extending loads. This is +// somewhat arbitrary, but zero-extending is conceptually simpler. + +// Select "don't care" extending loads with no constant offset. +def : Pat<(i32 (extloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>; +def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>; +def : Pat<(i64 (extloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>; +def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>; +def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; + +// Select "don't care" extending loads with a constant offset. +def : Pat<(i32 (extloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_U_I32 imm:$off, $addr)>; +def : Pat<(i32 (extloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_U_I32 imm:$off, $addr)>; +def : Pat<(i64 (extloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_U_I64 imm:$off, $addr)>; +def : Pat<(i64 (extloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_U_I64 imm:$off, $addr)>; +def : Pat<(i64 (extloadi32 (regPlusImm imm:$off, I32:$addr))), + (LOAD32_U_I64 imm:$off, $addr)>; +def : Pat<(i32 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_U_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_U_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (extloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD32_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_U_I32 texternalsym:$off, $addr)>; +def : Pat<(i32 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_U_I32 texternalsym:$off, $addr)>; +def : Pat<(i64 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_U_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_U_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (extloadi32 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD32_U_I64 texternalsym:$off, $addr)>; + +// Select "don't care" extending loads with just a constant offset. +def : Pat<(i32 (extloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi32 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi32 (WebAssemblywrapper texternalsym:$off))), + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + +let Defs = [ARGUMENTS] in { + +// Basic store. +// Note that we split the patterns out of the instruction definitions because +// WebAssembly's stores return their operand value, and tablegen doesn't like +// instruction definition patterns that don't reference all of the output +// operands. +// Note: WebAssembly inverts SelectionDAG's usual operand order. +def STORE_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [], + "i32.store\t$dst, ${off}(${addr}), $val">; +def STORE_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], + "i64.store\t$dst, ${off}(${addr}), $val">; +def STORE_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr, F32:$val), [], + "f32.store\t$dst, ${off}(${addr}), $val">; +def STORE_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr, F64:$val), [], + "f64.store\t$dst, ${off}(${addr}), $val">; + +} // Defs = [ARGUMENTS] + +// Select stores with no constant offset. +def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, I32:$val)>; +def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, I64:$val)>; +def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, F32:$val)>; +def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, F64:$val)>; + +// Select stores with a constant offset. +def : Pat<(store I32:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE_I32 imm:$off, I32:$addr, I32:$val)>; +def : Pat<(store I64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE_I64 imm:$off, I32:$addr, I64:$val)>; +def : Pat<(store F32:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE_F32 imm:$off, I32:$addr, F32:$val)>; +def : Pat<(store F64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE_F64 imm:$off, I32:$addr, F64:$val)>; +def : Pat<(store I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; +def : Pat<(store I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; +def : Pat<(store F32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE_F32 tglobaladdr:$off, I32:$addr, F32:$val)>; +def : Pat<(store F64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE_F64 tglobaladdr:$off, I32:$addr, F64:$val)>; +def : Pat<(store I32:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE_I32 texternalsym:$off, I32:$addr, I32:$val)>; +def : Pat<(store I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE_I64 texternalsym:$off, I32:$addr, I64:$val)>; +def : Pat<(store F32:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE_F32 texternalsym:$off, I32:$addr, F32:$val)>; +def : Pat<(store F64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE_F64 texternalsym:$off, I32:$addr, F64:$val)>; + +// Select stores with just a constant offset. +def : Pat<(store I32:$val, imm:$off), + (STORE_I32 imm:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(store I64:$val, imm:$off), + (STORE_I64 imm:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(store F32:$val, imm:$off), + (STORE_F32 imm:$off, (CONST_I32 0), F32:$val)>; +def : Pat<(store F64:$val, imm:$off), + (STORE_F64 imm:$off, (CONST_I32 0), F64:$val)>; +def : Pat<(store I32:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(store I64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(store F32:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_F32 tglobaladdr:$off, (CONST_I32 0), F32:$val)>; +def : Pat<(store F64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_F64 tglobaladdr:$off, (CONST_I32 0), F64:$val)>; +def : Pat<(store I32:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(store I64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(store F32:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE_F32 texternalsym:$off, (CONST_I32 0), F32:$val)>; +def : Pat<(store F64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE_F64 texternalsym:$off, (CONST_I32 0), F64:$val)>; + +let Defs = [ARGUMENTS] in { + +// Truncating store. +def STORE8_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [], + "i32.store8\t$dst, ${off}(${addr}), $val">; +def STORE16_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [], + "i32.store16\t$dst, ${off}(${addr}), $val">; +def STORE8_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], + "i64.store8\t$dst, ${off}(${addr}), $val">; +def STORE16_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], + "i64.store16\t$dst, ${off}(${addr}), $val">; +def STORE32_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], + "i64.store32\t$dst, ${off}(${addr}), $val">; + +} // Defs = [ARGUMENTS] + +// Select truncating stores with no constant offset. +def : Pat<(truncstorei8 I32:$val, I32:$addr), + (STORE8_I32 0, I32:$addr, I32:$val)>; +def : Pat<(truncstorei16 I32:$val, I32:$addr), + (STORE16_I32 0, I32:$addr, I32:$val)>; +def : Pat<(truncstorei8 I64:$val, I32:$addr), + (STORE8_I64 0, I32:$addr, I64:$val)>; +def : Pat<(truncstorei16 I64:$val, I32:$addr), + (STORE16_I64 0, I32:$addr, I64:$val)>; +def : Pat<(truncstorei32 I64:$val, I32:$addr), + (STORE32_I64 0, I32:$addr, I64:$val)>; + +// Select truncating stores with a constant offset. +def : Pat<(truncstorei8 I32:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE8_I32 imm:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE16_I32 imm:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE8_I64 imm:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE16_I64 imm:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE32_I64 imm:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE8_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE16_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE8_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE16_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE32_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE8_I32 texternalsym:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE16_I32 texternalsym:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE8_I64 texternalsym:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE16_I64 texternalsym:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE32_I64 texternalsym:$off, I32:$addr, I64:$val)>; + +// Select truncating stores with just a constant offset. +def : Pat<(truncstorei8 I32:$val, imm:$off), + (STORE8_I32 imm:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei16 I32:$val, imm:$off), + (STORE16_I32 imm:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei8 I64:$val, imm:$off), + (STORE8_I64 imm:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei16 I64:$val, imm:$off), + (STORE16_I64 imm:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei32 I64:$val, imm:$off), + (STORE32_I64 imm:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE8_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE16_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE8_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE16_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE32_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE8_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE16_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE8_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE16_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE32_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; + +let Defs = [ARGUMENTS] in { + +// Memory size. +def MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins), + [(set I32:$dst, (int_wasm_memory_size))], + "memory_size\t$dst">, + Requires<[HasAddr32]>; +def MEMORY_SIZE_I64 : I<(outs I64:$dst), (ins), + [(set I64:$dst, (int_wasm_memory_size))], + "memory_size\t$dst">, + Requires<[HasAddr64]>; + +// Grow memory. +def GROW_MEMORY_I32 : I<(outs), (ins I32:$delta), + [(int_wasm_grow_memory I32:$delta)], + "grow_memory\t$delta">, + Requires<[HasAddr32]>; +def GROW_MEMORY_I64 : I<(outs), (ins I64:$delta), + [(int_wasm_grow_memory I64:$delta)], + "grow_memory\t$delta">, + Requires<[HasAddr64]>; + +} // Defs = [ARGUMENTS] diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp new file mode 100644 index 000000000000..b009a4e054cc --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp @@ -0,0 +1,133 @@ +//===-- WebAssemblyLowerBrUnless.cpp - Lower br_unless --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file lowers br_unless into br_if with an inverted condition. +/// +/// br_unless is not currently in the spec, but it's very convenient for LLVM +/// to use. This pass allows LLVM to use it, for now. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-lower-br_unless" + +namespace { +class WebAssemblyLowerBrUnless final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly Lower br_unless"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyLowerBrUnless() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyLowerBrUnless::ID = 0; +FunctionPass *llvm::createWebAssemblyLowerBrUnless() { + return new WebAssemblyLowerBrUnless(); +} + +bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** Lowering br_unless **********\n" + "********** Function: " + << MF.getName() << '\n'); + + auto &MFI = *MF.getInfo(); + const auto &TII = *MF.getSubtarget().getInstrInfo(); + auto &MRI = MF.getRegInfo(); + + for (auto &MBB : MF) { + for (auto MII = MBB.begin(); MII != MBB.end(); ) { + MachineInstr *MI = &*MII++; + if (MI->getOpcode() != WebAssembly::BR_UNLESS) + continue; + + unsigned Cond = MI->getOperand(0).getReg(); + bool Inverted = false; + + // Attempt to invert the condition in place. + if (MFI.isVRegStackified(Cond)) { + assert(MRI.hasOneDef(Cond)); + MachineInstr *Def = MRI.getVRegDef(Cond); + switch (Def->getOpcode()) { + using namespace WebAssembly; + case EQ_I32: Def->setDesc(TII.get(NE_I32)); Inverted = true; break; + case NE_I32: Def->setDesc(TII.get(EQ_I32)); Inverted = true; break; + case GT_S_I32: Def->setDesc(TII.get(LE_S_I32)); Inverted = true; break; + case GE_S_I32: Def->setDesc(TII.get(LT_S_I32)); Inverted = true; break; + case LT_S_I32: Def->setDesc(TII.get(GE_S_I32)); Inverted = true; break; + case LE_S_I32: Def->setDesc(TII.get(GT_S_I32)); Inverted = true; break; + case GT_U_I32: Def->setDesc(TII.get(LE_U_I32)); Inverted = true; break; + case GE_U_I32: Def->setDesc(TII.get(LT_U_I32)); Inverted = true; break; + case LT_U_I32: Def->setDesc(TII.get(GE_U_I32)); Inverted = true; break; + case LE_U_I32: Def->setDesc(TII.get(GT_U_I32)); Inverted = true; break; + case EQ_I64: Def->setDesc(TII.get(NE_I64)); Inverted = true; break; + case NE_I64: Def->setDesc(TII.get(EQ_I64)); Inverted = true; break; + case GT_S_I64: Def->setDesc(TII.get(LE_S_I64)); Inverted = true; break; + case GE_S_I64: Def->setDesc(TII.get(LT_S_I64)); Inverted = true; break; + case LT_S_I64: Def->setDesc(TII.get(GE_S_I64)); Inverted = true; break; + case LE_S_I64: Def->setDesc(TII.get(GT_S_I64)); Inverted = true; break; + case GT_U_I64: Def->setDesc(TII.get(LE_U_I64)); Inverted = true; break; + case GE_U_I64: Def->setDesc(TII.get(LT_U_I64)); Inverted = true; break; + case LT_U_I64: Def->setDesc(TII.get(GE_U_I64)); Inverted = true; break; + case LE_U_I64: Def->setDesc(TII.get(GT_U_I64)); Inverted = true; break; + case EQ_F32: Def->setDesc(TII.get(NE_F32)); Inverted = true; break; + case NE_F32: Def->setDesc(TII.get(EQ_F32)); Inverted = true; break; + case EQ_F64: Def->setDesc(TII.get(NE_F64)); Inverted = true; break; + case NE_F64: Def->setDesc(TII.get(EQ_F64)); Inverted = true; break; + default: break; + } + } + + // If we weren't able to invert the condition in place. Insert an + // expression to invert it. + if (!Inverted) { + unsigned ZeroReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + MFI.stackifyVReg(ZeroReg); + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::CONST_I32), ZeroReg) + .addImm(0); + unsigned Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + MFI.stackifyVReg(Tmp); + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQ_I32), Tmp) + .addReg(Cond) + .addReg(ZeroReg); + Cond = Tmp; + Inverted = true; + } + + // The br_unless condition has now been inverted. Insert a br_if and + // delete the br_unless. + assert(Inverted); + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::BR_IF)) + .addReg(Cond) + .addOperand(MI->getOperand(1)); + MBB.erase(MI); + } + } + + return true; +} diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp new file mode 100644 index 000000000000..a953f8247006 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -0,0 +1,106 @@ +// WebAssemblyMCInstLower.cpp - Convert WebAssembly MachineInstr to an MCInst // +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains code to lower WebAssembly MachineInstrs to their +/// corresponding MCInst records. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyMCInstLower.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Constants.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +MCSymbol * +WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { + return Printer.getSymbol(MO.getGlobal()); +} + +MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol( + const MachineOperand &MO) const { + return Printer.GetExternalSymbolSymbol(MO.getSymbolName()); +} + +MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(const MachineOperand &MO, + MCSymbol *Sym) const { + assert(MO.getTargetFlags() == 0 && "WebAssembly does not use target flags"); + + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); + + int64_t Offset = MO.getOffset(); + if (Offset != 0) { + assert(!MO.isJTI() && "Unexpected offset with jump table index"); + Expr = + MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx); + } + + return MCOperand::createExpr(Expr); +} + +void WebAssemblyMCInstLower::Lower(const MachineInstr *MI, + MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + switch (MO.getType()) { + default: + MI->dump(); + llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: { + // Ignore all implicit register operands. + if (MO.isImplicit()) + continue; + const WebAssemblyFunctionInfo &MFI = + *MI->getParent()->getParent()->getInfo(); + unsigned WAReg = MFI.getWAReg(MO.getReg()); + MCOp = MCOperand::createReg(WAReg); + break; + } + case MachineOperand::MO_Immediate: + MCOp = MCOperand::createImm(MO.getImm()); + break; + case MachineOperand::MO_FPImmediate: { + // TODO: MC converts all floating point immediate operands to double. + // This is fine for numeric values, but may cause NaNs to change bits. + const ConstantFP *Imm = MO.getFPImm(); + if (Imm->getType()->isFloatTy()) + MCOp = MCOperand::createFPImm(Imm->getValueAPF().convertToFloat()); + else if (Imm->getType()->isDoubleTy()) + MCOp = MCOperand::createFPImm(Imm->getValueAPF().convertToDouble()); + else + llvm_unreachable("unknown floating point immediate type"); + break; + } + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::createExpr( + MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx)); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); + break; + } + + OutMI.addOperand(MCOp); + } +} diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h new file mode 100644 index 000000000000..6d704704f576 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h @@ -0,0 +1,45 @@ +//===-- WebAssemblyMCInstLower.h - Lower MachineInstr to MCInst -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares the class to lower WebAssembly MachineInstrs to +/// their corresponding MCInst records. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H +#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H + +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { +class AsmPrinter; +class MCContext; +class MCSymbol; +class MachineInstr; +class MachineOperand; + +/// This class is used to lower an MachineInstr into an MCInst. +class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower { + MCContext &Ctx; + AsmPrinter &Printer; + + MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; + MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; + +public: + WebAssemblyMCInstLower(MCContext &ctx, AsmPrinter &printer) + : Ctx(ctx), Printer(printer) {} + void Lower(const MachineInstr *MI, MCInst &OutMI) const; +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp index 542d984b9006..225c5d32cb5d 100644 --- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp @@ -17,3 +17,9 @@ using namespace llvm; WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() {} + +void WebAssemblyFunctionInfo::initWARegs() { + assert(WARegs.empty()); + unsigned Reg = UnusedReg; + WARegs.resize(MF.getRegInfo().getNumVirtRegs(), Reg); +} diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h index fc5e910b09ef..6a60280900a9 100644 --- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -1,4 +1,4 @@ -// WebAssemblyMachineFuctionInfo.h-WebAssembly machine function info -*- C++ -*- +// WebAssemblyMachineFunctionInfo.h-WebAssembly machine function info-*- C++ -*- // // The LLVM Compiler Infrastructure // @@ -16,8 +16,7 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H -#include "WebAssemblyRegisterInfo.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/CodeGen/MachineRegisterInfo.h" namespace llvm { @@ -27,9 +26,70 @@ namespace llvm { class WebAssemblyFunctionInfo final : public MachineFunctionInfo { MachineFunction &MF; + std::vector Params; + + /// A mapping from CodeGen vreg index to WebAssembly register number. + std::vector WARegs; + + /// A mapping from CodeGen vreg index to a boolean value indicating whether + /// the given register is considered to be "stackified", meaning it has been + /// determined or made to meet the stack requirements: + /// - single use (per path) + /// - single def (per path) + /// - defined and used in LIFO order with other stack registers + BitVector VRegStackified; + + // One entry for each possible target reg. we expect it to be small. + std::vector PhysRegs; + public: - explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {} + explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) { + PhysRegs.resize(WebAssembly::NUM_TARGET_REGS, -1U); + } ~WebAssemblyFunctionInfo() override; + + void addParam(MVT VT) { Params.push_back(VT); } + const std::vector &getParams() const { return Params; } + + static const unsigned UnusedReg = -1u; + + void stackifyVReg(unsigned VReg) { + if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size()) + VRegStackified.resize(TargetRegisterInfo::virtReg2Index(VReg) + 1); + VRegStackified.set(TargetRegisterInfo::virtReg2Index(VReg)); + } + bool isVRegStackified(unsigned VReg) const { + if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size()) + return false; + return VRegStackified.test(TargetRegisterInfo::virtReg2Index(VReg)); + } + + void initWARegs(); + void setWAReg(unsigned VReg, unsigned WAReg) { + assert(WAReg != UnusedReg); + assert(TargetRegisterInfo::virtReg2Index(VReg) < WARegs.size()); + WARegs[TargetRegisterInfo::virtReg2Index(VReg)] = WAReg; + } + unsigned getWAReg(unsigned Reg) const { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + assert(TargetRegisterInfo::virtReg2Index(Reg) < WARegs.size()); + return WARegs[TargetRegisterInfo::virtReg2Index(Reg)]; + } + return PhysRegs[Reg]; + } + // If new virtual registers are created after initWARegs has been called, + // this function can be used to add WebAssembly register mappings for them. + void addWAReg(unsigned VReg, unsigned WAReg) { + assert(VReg = WARegs.size()); + WARegs.push_back(WAReg); + } + + void addPReg(unsigned PReg, unsigned WAReg) { + assert(PReg < WebAssembly::NUM_TARGET_REGS); + assert(WAReg < -1U); + PhysRegs[PReg] = WAReg; + } + const std::vector &getPhysRegs() const { return PhysRegs; } }; } // end namespace llvm diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp new file mode 100644 index 000000000000..4dc401a2c7cc --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp @@ -0,0 +1,76 @@ +//===-- WebAssemblyOptimizeReturned.cpp - Optimize "returned" attributes --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Optimize calls with "returned" attributes for WebAssembly. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-optimize-returned" + +namespace { +class OptimizeReturned final : public FunctionPass, + public InstVisitor { + const char *getPassName() const override { + return "WebAssembly Optimize Returned"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + FunctionPass::getAnalysisUsage(AU); + } + + bool runOnFunction(Function &F) override; + + DominatorTree *DT; + +public: + static char ID; + OptimizeReturned() : FunctionPass(ID), DT(nullptr) {} + + void visitCallSite(CallSite CS); +}; +} // End anonymous namespace + +char OptimizeReturned::ID = 0; +FunctionPass *llvm::createWebAssemblyOptimizeReturned() { + return new OptimizeReturned(); +} + +void OptimizeReturned::visitCallSite(CallSite CS) { + for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i) + if (CS.paramHasAttr(1 + i, Attribute::Returned)) { + Instruction *Inst = CS.getInstruction(); + Value *Arg = CS.getArgOperand(i); + // Ignore constants, globals, undef, etc. + if (isa(Arg)) + continue; + // Like replaceDominatedUsesWith but using Instruction/Use dominance. + for (auto UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE;) { + Use &U = *UI++; + if (DT->dominates(Inst, U)) + U.set(Inst); + } + } +} + +bool OptimizeReturned::runOnFunction(Function &F) { + DT = &getAnalysis().getDomTree(); + visit(F); + return true; +} diff --git a/lib/Target/WebAssembly/WebAssemblyPEI.cpp b/lib/Target/WebAssembly/WebAssemblyPEI.cpp new file mode 100644 index 000000000000..d570d4266110 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyPEI.cpp @@ -0,0 +1,1066 @@ +//===-- WebAssemblyPEI.cpp - Insert Prolog/Epilog code in function --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is responsible for finalizing the functions frame layout, saving +// callee saved registers, and for emitting prolog & epilog code for the +// function. +// +// This pass must be run after register allocation. After this pass is +// executed, it is illegal to construct MO_FrameIndex operands. +// +// This is a copy of lib/CodeGen/PrologEpilogInserter.cpp except that it does +// not assert that all virtual registers are gone (because WebAssembly currently +// uses virtual rather than physical registers), and only runs +// MRI.clearVirtRegs() if scavenging happened (which it never does). It also +// uses a different class name so it can be registered via INITIALIZE_PASS. +// It is otherwise unmodified, so any changes to the target-independent PEI +// can be easily applied. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "pei" +namespace llvm { +void initializeWasmPEIPass(PassRegistry&); +} +namespace { +class WasmPEI : public MachineFunctionPass { +public: + static char ID; + WasmPEI() : MachineFunctionPass(ID) { + initializeWasmPEIPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract + /// frame indexes with appropriate references. + /// + bool runOnMachineFunction(MachineFunction &Fn) override; + +private: + RegScavenger *RS; + + // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved + // stack frame indexes. + unsigned MinCSFrameIndex, MaxCSFrameIndex; + + // Save and Restore blocks of the current function. Typically there is a + // single save block, unless Windows EH funclets are involved. + SmallVector SaveBlocks; + SmallVector RestoreBlocks; + + // Flag to control whether to use the register scavenger to resolve + // frame index materialization registers. Set according to + // TRI->requiresFrameIndexScavenging() for the current function. + bool FrameIndexVirtualScavenging; + + void calculateSets(MachineFunction &Fn); + void calculateCallsInformation(MachineFunction &Fn); + void assignCalleeSavedSpillSlots(MachineFunction &Fn, + const BitVector &SavedRegs); + void insertCSRSpillsAndRestores(MachineFunction &Fn); + void calculateFrameObjectOffsets(MachineFunction &Fn); + void replaceFrameIndices(MachineFunction &Fn); + void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj); + void scavengeFrameVirtualRegs(MachineFunction &Fn); + void insertPrologEpilogCode(MachineFunction &Fn); +}; +} // namespace + +char WasmPEI::ID = 0; + +namespace llvm { +FunctionPass *createWebAssemblyPEI() { + return new WasmPEI(); +} +} + +static cl::opt +WarnStackSize("wasm-warn-stack-size", cl::Hidden, cl::init((unsigned)-1), + cl::desc("Warn for stack size bigger than the given" + " number")); + +INITIALIZE_PASS_BEGIN(WasmPEI, "wasmprologepilog", + "Wasm Prologue/Epilogue Insertion", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(StackProtector) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(WasmPEI, "wasmprologepilog", + "Wasm Prologue/Epilogue Insertion & Frame Finalization", + false, false) + +STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); +STATISTIC(NumBytesStackSpace, + "Number of bytes used for stack in all functions"); + +void WasmPEI::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved(); + AU.addPreserved(); + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// Compute the set of return blocks +void WasmPEI::calculateSets(MachineFunction &Fn) { + const MachineFrameInfo *MFI = Fn.getFrameInfo(); + + // Even when we do not change any CSR, we still want to insert the + // prologue and epilogue of the function. + // So set the save points for those. + + // Use the points found by shrink-wrapping, if any. + if (MFI->getSavePoint()) { + SaveBlocks.push_back(MFI->getSavePoint()); + assert(MFI->getRestorePoint() && "Both restore and save must be set"); + MachineBasicBlock *RestoreBlock = MFI->getRestorePoint(); + // If RestoreBlock does not have any successor and is not a return block + // then the end point is unreachable and we do not need to insert any + // epilogue. + if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) + RestoreBlocks.push_back(RestoreBlock); + return; + } + + // Save refs to entry and return blocks. + SaveBlocks.push_back(&Fn.front()); + for (MachineBasicBlock &MBB : Fn) { + if (MBB.isEHFuncletEntry()) + SaveBlocks.push_back(&MBB); + if (MBB.isReturnBlock()) + RestoreBlocks.push_back(&MBB); + } +} + +/// StackObjSet - A set of stack object indexes +typedef SmallSetVector StackObjSet; + +/// runOnMachineFunction - Insert prolog/epilog code and replace abstract +/// frame indexes with appropriate references. +/// +bool WasmPEI::runOnMachineFunction(MachineFunction &Fn) { + const Function* F = Fn.getFunction(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + + // LOCALMOD: assert removed from target-independent PEI + //assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs"); + + RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr; + FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); + + // Calculate the MaxCallFrameSize and AdjustsStack variables for the + // function's frame information. Also eliminates call frame pseudo + // instructions. + calculateCallsInformation(Fn); + + // Determine which of the registers in the callee save list should be saved. + BitVector SavedRegs; + TFI->determineCalleeSaves(Fn, SavedRegs, RS); + + // Insert spill code for any callee saved registers that are modified. + assignCalleeSavedSpillSlots(Fn, SavedRegs); + + // Determine placement of CSR spill/restore code: + // place all spills in the entry block, all restores in return blocks. + calculateSets(Fn); + + // Add the code to save and restore the callee saved registers. + if (!F->hasFnAttribute(Attribute::Naked)) + insertCSRSpillsAndRestores(Fn); + + // Allow the target machine to make final modifications to the function + // before the frame layout is finalized. + TFI->processFunctionBeforeFrameFinalized(Fn, RS); + + // Calculate actual frame offsets for all abstract stack objects... + calculateFrameObjectOffsets(Fn); + + // Add prolog and epilog code to the function. This function is required + // to align the stack frame as necessary for any stack variables or + // called functions. Because of this, calculateCalleeSavedRegisters() + // must be called before this function in order to set the AdjustsStack + // and MaxCallFrameSize variables. + if (!F->hasFnAttribute(Attribute::Naked)) + insertPrologEpilogCode(Fn); + + // Replace all MO_FrameIndex operands with physical register references + // and actual offsets. + // + replaceFrameIndices(Fn); + + // If register scavenging is needed, as we've enabled doing it as a + // post-pass, scavenge the virtual registers that frame index elimination + // inserted. + if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) { + scavengeFrameVirtualRegs(Fn); + // Clear any vregs created by virtual scavenging. + // LOCALMOD: made this call conditional with scavengeFrameVirtualregs() + Fn.getRegInfo().clearVirtRegs(); + } + + // Warn on stack size when we exceeds the given limit. + MachineFrameInfo *MFI = Fn.getFrameInfo(); + uint64_t StackSize = MFI->getStackSize(); + if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { + DiagnosticInfoStackSize DiagStackSize(*F, StackSize); + F->getContext().diagnose(DiagStackSize); + } + + delete RS; + SaveBlocks.clear(); + RestoreBlocks.clear(); + return true; +} + +/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack +/// variables for the function's frame information and eliminate call frame +/// pseudo instructions. +void WasmPEI::calculateCallsInformation(MachineFunction &Fn) { + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); + + unsigned MaxCallFrameSize = 0; + bool AdjustsStack = MFI->adjustsStack(); + + // Get the function call frame set-up and tear-down instruction opcode + unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + + // Early exit for targets which have no call frame setup/destroy pseudo + // instructions. + if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u) + return; + + std::vector FrameSDOps; + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo" + " instructions should have a single immediate argument!"); + unsigned Size = I->getOperand(0).getImm(); + if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; + AdjustsStack = true; + FrameSDOps.push_back(I); + } else if (I->isInlineAsm()) { + // Some inline asm's need a stack frame, as indicated by operand 1. + unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_IsAlignStack) + AdjustsStack = true; + } + + MFI->setAdjustsStack(AdjustsStack); + MFI->setMaxCallFrameSize(MaxCallFrameSize); + + for (std::vector::iterator + i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { + MachineBasicBlock::iterator I = *i; + + // If call frames are not being included as part of the stack frame, and + // the target doesn't indicate otherwise, remove the call frame pseudos + // here. The sub/add sp instruction pairs are still inserted, but we don't + // need to track the SP adjustment for frame index elimination. + if (TFI->canSimplifyCallFramePseudos(Fn)) + TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); + } +} + +void WasmPEI::assignCalleeSavedSpillSlots(MachineFunction &F, + const BitVector &SavedRegs) { + // These are used to keep track the callee-save area. Initialize them. + MinCSFrameIndex = INT_MAX; + MaxCSFrameIndex = 0; + + if (SavedRegs.empty()) + return; + + const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo(); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F); + + std::vector CSI; + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + if (SavedRegs.test(Reg)) + CSI.push_back(CalleeSavedInfo(Reg)); + } + + const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); + MachineFrameInfo *MFI = F.getFrameInfo(); + if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) { + // If target doesn't implement this, use generic code. + + if (CSI.empty()) + return; // Early exit if no callee saved registers are modified! + + unsigned NumFixedSpillSlots; + const TargetFrameLowering::SpillSlot *FixedSpillSlots = + TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); + + // Now that we know which registers need to be saved and restored, allocate + // stack slots for them. + for (std::vector::iterator I = CSI.begin(), E = CSI.end(); + I != E; ++I) { + unsigned Reg = I->getReg(); + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + + int FrameIdx; + if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { + I->setFrameIdx(FrameIdx); + continue; + } + + // Check to see if this physreg must be spilled to a particular stack slot + // on this target. + const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; + while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots && + FixedSlot->Reg != Reg) + ++FixedSlot; + + if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { + // Nope, just spill it anywhere convenient. + unsigned Align = RC->getAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); + + // We may not be able to satisfy the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. Use the + // min. + Align = std::min(Align, StackAlign); + FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); + if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; + if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; + } else { + // Spill it to the stack where we must. + FrameIdx = + MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset); + } + + I->setFrameIdx(FrameIdx); + } + } + + MFI->setCalleeSavedInfo(CSI); +} + +/// Helper function to update the liveness information for the callee-saved +/// registers. +static void updateLiveness(MachineFunction &MF) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + // Visited will contain all the basic blocks that are in the region + // where the callee saved registers are alive: + // - Anything that is not Save or Restore -> LiveThrough. + // - Save -> LiveIn. + // - Restore -> LiveOut. + // The live-out is not attached to the block, so no need to keep + // Restore in this set. + SmallPtrSet Visited; + SmallVector WorkList; + MachineBasicBlock *Entry = &MF.front(); + MachineBasicBlock *Save = MFI->getSavePoint(); + + if (!Save) + Save = Entry; + + if (Entry != Save) { + WorkList.push_back(Entry); + Visited.insert(Entry); + } + Visited.insert(Save); + + MachineBasicBlock *Restore = MFI->getRestorePoint(); + if (Restore) + // By construction Restore cannot be visited, otherwise it + // means there exists a path to Restore that does not go + // through Save. + WorkList.push_back(Restore); + + while (!WorkList.empty()) { + const MachineBasicBlock *CurBB = WorkList.pop_back_val(); + // By construction, the region that is after the save point is + // dominated by the Save and post-dominated by the Restore. + if (CurBB == Save && Save != Restore) + continue; + // Enqueue all the successors not already visited. + // Those are by construction either before Save or after Restore. + for (MachineBasicBlock *SuccBB : CurBB->successors()) + if (Visited.insert(SuccBB).second) + WorkList.push_back(SuccBB); + } + + const std::vector &CSI = MFI->getCalleeSavedInfo(); + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + for (MachineBasicBlock *MBB : Visited) { + MCPhysReg Reg = CSI[i].getReg(); + // Add the callee-saved register as live-in. + // It's killed at the spill. + if (!MBB->isLiveIn(Reg)) + MBB->addLiveIn(Reg); + } + } +} + +/// insertCSRSpillsAndRestores - Insert spill and restore code for +/// callee saved registers used in the function. +/// +void WasmPEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { + // Get callee saved register information. + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const std::vector &CSI = MFI->getCalleeSavedInfo(); + + MFI->setCalleeSavedInfoValid(true); + + // Early exit if no callee saved registers are modified! + if (CSI.empty()) + return; + + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + MachineBasicBlock::iterator I; + + // Spill using target interface. + for (MachineBasicBlock *SaveBlock : SaveBlocks) { + I = SaveBlock->begin(); + if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Insert the spill to the stack frame. + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(), + RC, TRI); + } + } + // Update the live-in information of all the blocks up to the save point. + updateLiveness(Fn); + } + + // Restore using target interface. + for (MachineBasicBlock *MBB : RestoreBlocks) { + I = MBB->end(); + + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->isTerminator()) + I = I2; + + bool AtStart = I == MBB->begin(); + MachineBasicBlock::iterator BeforeI = I; + if (!AtStart) + --BeforeI; + + // Restore all registers immediately before the return and any + // terminators that precede it. + if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } + } + } + } +} + +/// AdjustStackOffset - Helper function used to adjust the stack frame offset. +static inline void +AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, + bool StackGrowsDown, int64_t &Offset, + unsigned &MaxAlign, unsigned Skew) { + // If the stack grows down, add the object size to find the lowest address. + if (StackGrowsDown) + Offset += MFI->getObjectSize(FrameIdx); + + unsigned Align = MFI->getObjectAlignment(FrameIdx); + + // If the alignment of this object is greater than that of the stack, then + // increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + + // Adjust to alignment boundary. + Offset = RoundUpToAlignment(Offset, Align, Skew); + + if (StackGrowsDown) { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n"); + MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset + } else { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n"); + MFI->setObjectOffset(FrameIdx, Offset); + Offset += MFI->getObjectSize(FrameIdx); + } +} + +/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., +/// those required to be close to the Stack Protector) to stack offsets. +static void +AssignProtectedObjSet(const StackObjSet &UnassignedObjs, + SmallSet &ProtectedObjs, + MachineFrameInfo *MFI, bool StackGrowsDown, + int64_t &Offset, unsigned &MaxAlign, unsigned Skew) { + + for (StackObjSet::const_iterator I = UnassignedObjs.begin(), + E = UnassignedObjs.end(); I != E; ++I) { + int i = *I; + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew); + ProtectedObjs.insert(i); + } +} + +/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the +/// abstract stack objects. +/// +void WasmPEI::calculateFrameObjectOffsets(MachineFunction &Fn) { + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); + StackProtector *SP = &getAnalysis(); + + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; + + // Loop over all of the stack objects, assigning sequential addresses... + MachineFrameInfo *MFI = Fn.getFrameInfo(); + + // Start at the beginning of the local area. + // The Offset is the distance from the stack top in the direction + // of stack growth -- so it's always nonnegative. + int LocalAreaOffset = TFI.getOffsetOfLocalArea(); + if (StackGrowsDown) + LocalAreaOffset = -LocalAreaOffset; + assert(LocalAreaOffset >= 0 + && "Local area offset should be in direction of stack growth"); + int64_t Offset = LocalAreaOffset; + + // Skew to be applied to alignment. + unsigned Skew = TFI.getStackAlignmentSkew(Fn); + + // If there are fixed sized objects that are preallocated in the local area, + // non-fixed objects can't be allocated right at the start of local area. + // We currently don't support filling in holes in between fixed sized + // objects, so we adjust 'Offset' to point to the end of last fixed sized + // preallocated object. + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { + int64_t FixedOff; + if (StackGrowsDown) { + // The maximum distance from the stack pointer is at lower address of + // the object -- which is given by offset. For down growing stack + // the offset is negative, so we negate the offset to get the distance. + FixedOff = -MFI->getObjectOffset(i); + } else { + // The maximum distance from the start pointer is at the upper + // address of the object. + FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); + } + if (FixedOff > Offset) Offset = FixedOff; + } + + // First assign frame offsets to stack objects that are used to spill + // callee saved registers. + if (StackGrowsDown) { + for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { + // If the stack grows down, we need to add the size to find the lowest + // address of the object. + Offset += MFI->getObjectSize(i); + + unsigned Align = MFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = RoundUpToAlignment(Offset, Align, Skew); + + MFI->setObjectOffset(i, -Offset); // Set the computed offset + } + } else { + int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; + for (int i = MaxCSFI; i >= MinCSFI ; --i) { + unsigned Align = MFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = RoundUpToAlignment(Offset, Align, Skew); + + MFI->setObjectOffset(i, Offset); + Offset += MFI->getObjectSize(i); + } + } + + unsigned MaxAlign = MFI->getMaxAlignment(); + + // Make sure the special register scavenging spill slot is closest to the + // incoming stack pointer if a frame pointer is required and is closer + // to the incoming rather than the final stack pointer. + const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo(); + bool EarlyScavengingSlots = (TFI.hasFP(Fn) && + TFI.isFPCloseToIncomingSP() && + RegInfo->useFPForScavengingIndex(Fn) && + !RegInfo->needsStackRealignment(Fn)); + if (RS && EarlyScavengingSlots) { + SmallVector SFIs; + RS->getScavengingFrameIndices(SFIs); + for (SmallVectorImpl::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); + } + + // FIXME: Once this is working, then enable flag will change to a target + // check for whether the frame is large enough to want to use virtual + // frame index registers. Functions which don't want/need this optimization + // will continue to use the existing code path. + if (MFI->getUseLocalStackAllocationBlock()) { + unsigned Align = MFI->getLocalFrameMaxAlign(); + + // Adjust to alignment boundary. + Offset = RoundUpToAlignment(Offset, Align, Skew); + + DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); + + // Resolve offsets for objects in the local block. + for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { + std::pair Entry = MFI->getLocalFrameObjectMap(i); + int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; + DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << + FIOffset << "]\n"); + MFI->setObjectOffset(Entry.first, FIOffset); + } + // Allocate the local block + Offset += MFI->getLocalFrameSize(); + + MaxAlign = std::max(Align, MaxAlign); + } + + // Make sure that the stack protector comes before the local variables on the + // stack. + SmallSet ProtectedObjs; + if (MFI->getStackProtectorIndex() >= 0) { + StackObjSet LargeArrayObjs; + StackObjSet SmallArrayObjs; + StackObjSet AddrOfObjs; + + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, + Offset, MaxAlign, Skew); + + // Assign large stack objects first. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i) && + MFI->getUseLocalStackAllocationBlock()) + continue; + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && RS->isScavengingFrameIndex((int)i)) + continue; + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + + switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { + case StackProtector::SSPLK_None: + continue; + case StackProtector::SSPLK_SmallArray: + SmallArrayObjs.insert(i); + continue; + case StackProtector::SSPLK_AddrOf: + AddrOfObjs.insert(i); + continue; + case StackProtector::SSPLK_LargeArray: + LargeArrayObjs.insert(i); + continue; + } + llvm_unreachable("Unexpected SSPLayoutKind."); + } + + AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign, Skew); + AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign, Skew); + AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign, Skew); + } + + // Then assign frame offsets to stack objects that are not used to spill + // callee saved registers. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i) && + MFI->getUseLocalStackAllocationBlock()) + continue; + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && RS->isScavengingFrameIndex((int)i)) + continue; + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (ProtectedObjs.count(i)) + continue; + + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew); + } + + // Make sure the special register scavenging spill slot is closest to the + // stack pointer. + if (RS && !EarlyScavengingSlots) { + SmallVector SFIs; + RS->getScavengingFrameIndices(SFIs); + for (SmallVectorImpl::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); + } + + if (!TFI.targetHandlesStackFrameRounding()) { + // If we have reserved argument space for call sites in the function + // immediately on entry to the current function, count it as part of the + // overall stack size. + if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn)) + Offset += MFI->getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) + StackAlign = TFI.getStackAlignment(); + else + StackAlign = TFI.getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + Offset = RoundUpToAlignment(Offset, StackAlign, Skew); + } + + // Update frame info to pretend that this is part of the stack... + int64_t StackSize = Offset - LocalAreaOffset; + MFI->setStackSize(StackSize); + NumBytesStackSpace += StackSize; +} + +/// insertPrologEpilogCode - Scan the function for modified callee saved +/// registers, insert spill code for these callee saved registers, then add +/// prolog and epilog code to the function. +/// +void WasmPEI::insertPrologEpilogCode(MachineFunction &Fn) { + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); + + // Add prologue to the function... + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.emitPrologue(Fn, *SaveBlock); + + // Add epilogue to restore the callee-save registers in each exiting block. + for (MachineBasicBlock *RestoreBlock : RestoreBlocks) + TFI.emitEpilogue(Fn, *RestoreBlock); + + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.inlineStackProbe(Fn, *SaveBlock); + + // Emit additional code that is required to support segmented stacks, if + // we've been asked for it. This, when linked with a runtime with support + // for segmented stacks (libgcc is one), will result in allocating stack + // space in small chunks instead of one large contiguous block. + if (Fn.shouldSplitStack()) { + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.adjustForSegmentedStacks(Fn, *SaveBlock); + } + + // Emit additional code that is required to explicitly handle the stack in + // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The + // approach is rather similar to that of Segmented Stacks, but it uses a + // different conditional check and another BIF for allocating more stack + // space. + if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE) + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.adjustForHiPEPrologue(Fn, *SaveBlock); +} + +/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical +/// register references and actual offsets. +/// +void WasmPEI::replaceFrameIndices(MachineFunction &Fn) { + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); + if (!TFI.needsFrameIndexResolution(Fn)) return; + + // Store SPAdj at exit of a basic block. + SmallVector SPState; + SPState.resize(Fn.getNumBlockIDs()); + SmallPtrSet Reachable; + + // Iterate over the reachable blocks in DFS order. + for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); + DFI != DFE; ++DFI) { + int SPAdj = 0; + // Check the exit state of the DFS stack predecessor. + if (DFI.getPathLength() >= 2) { + MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2); + assert(Reachable.count(StackPred) && + "DFS stack predecessor is already visited.\n"); + SPAdj = SPState[StackPred->getNumber()]; + } + MachineBasicBlock *BB = *DFI; + replaceFrameIndices(BB, Fn, SPAdj); + SPState[BB->getNumber()] = SPAdj; + } + + // Handle the unreachable blocks. + for (auto &BB : Fn) { + if (Reachable.count(&BB)) + // Already handled in DFS traversal. + continue; + int SPAdj = 0; + replaceFrameIndices(&BB, Fn, SPAdj); + } +} + +void WasmPEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj) { + assert(Fn.getSubtarget().getRegisterInfo() && + "getRegisterInfo() must be implemented!"); + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + + if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); + + bool InsideCallSequence = false; + + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + InsideCallSequence = (I->getOpcode() == FrameSetupOpcode); + SPAdj += TII.getSPAdjust(I); + + MachineBasicBlock::iterator PrevI = BB->end(); + if (I != BB->begin()) PrevI = std::prev(I); + TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); + + // Visit the instructions created by eliminateCallFramePseudoInstr(). + if (PrevI == BB->end()) + I = BB->begin(); // The replaced instr was the first in the block. + else + I = std::next(PrevI); + continue; + } + + MachineInstr *MI = I; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (!MI->getOperand(i).isFI()) + continue; + + // Frame indices in debug values are encoded in a target independent + // way with simply the frame index and offset rather than any + // target-specific addressing mode. + if (MI->isDebugValue()) { + assert(i == 0 && "Frame indices can only appear as the first " + "operand of a DBG_VALUE machine instruction"); + unsigned Reg; + MachineOperand &Offset = MI->getOperand(1); + Offset.setImm(Offset.getImm() + + TFI->getFrameIndexReference( + Fn, MI->getOperand(0).getIndex(), Reg)); + MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/); + continue; + } + + // TODO: This code should be commoned with the code for + // PATCHPOINT. There's no good reason for the difference in + // implementation other than historical accident. The only + // remaining difference is the unconditional use of the stack + // pointer as the base register. + if (MI->getOpcode() == TargetOpcode::STATEPOINT) { + assert((!MI->isDebugValue() || i == 0) && + "Frame indicies can only appear as the first operand of a " + "DBG_VALUE machine instruction"); + unsigned Reg; + MachineOperand &Offset = MI->getOperand(i + 1); + const unsigned refOffset = + TFI->getFrameIndexReferenceFromSP(Fn, MI->getOperand(i).getIndex(), + Reg); + + Offset.setImm(Offset.getImm() + refOffset); + MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/); + continue; + } + + // Some instructions (e.g. inline asm instructions) can have + // multiple frame indices and/or cause eliminateFrameIndex + // to insert more than one instruction. We need the register + // scavenger to go through all of these instructions so that + // it can update its register information. We keep the + // iterator at the point before insertion so that we can + // revisit them in full. + bool AtBeginning = (I == BB->begin()); + if (!AtBeginning) --I; + + // If this instruction has a FrameIndex operand, we need to + // use that target machine register info object to eliminate + // it. + TRI.eliminateFrameIndex(MI, SPAdj, i, + FrameIndexVirtualScavenging ? nullptr : RS); + + // Reset the iterator if we were at the beginning of the BB. + if (AtBeginning) { + I = BB->begin(); + DoIncr = false; + } + + MI = nullptr; + break; + } + + // If we are looking at a call sequence, we need to keep track of + // the SP adjustment made by each instruction in the sequence. + // This includes both the frame setup/destroy pseudos (handled above), + // as well as other instructions that have side effects w.r.t the SP. + // Note that this must come after eliminateFrameIndex, because + // if I itself referred to a frame index, we shouldn't count its own + // adjustment. + if (MI && InsideCallSequence) + SPAdj += TII.getSPAdjust(MI); + + if (DoIncr && I != BB->end()) ++I; + + // Update register states. + if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); + } +} + +/// scavengeFrameVirtualRegs - Replace all frame index virtual registers +/// with physical registers. Use the register scavenger to find an +/// appropriate register to use. +/// +/// FIXME: Iterating over the instruction stream is unnecessary. We can simply +/// iterate over the vreg use list, which at this point only contains machine +/// operands for which eliminateFrameIndex need a new scratch reg. +void +WasmPEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { + // Run through the instructions and find any virtual registers. + for (MachineFunction::iterator BB = Fn.begin(), + E = Fn.end(); BB != E; ++BB) { + RS->enterBasicBlock(&*BB); + + int SPAdj = 0; + + // The instruction stream may change in the loop, so check BB->end() + // directly. + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + // We might end up here again with a NULL iterator if we scavenged a + // register for which we inserted spill code for definition by what was + // originally the first instruction in BB. + if (I == MachineBasicBlock::iterator(nullptr)) + I = BB->begin(); + + MachineInstr *MI = I; + MachineBasicBlock::iterator J = std::next(I); + MachineBasicBlock::iterator P = + I == BB->begin() ? MachineBasicBlock::iterator(nullptr) + : std::prev(I); + + // RS should process this instruction before we might scavenge at this + // location. This is because we might be replacing a virtual register + // defined by this instruction, and if so, registers killed by this + // instruction are available, and defined registers are not. + RS->forward(I); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).isReg()) { + MachineOperand &MO = MI->getOperand(i); + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + // When we first encounter a new virtual register, it + // must be a definition. + assert(MI->getOperand(i).isDef() && + "frame index virtual missing def!"); + // Scavenge a new scratch register + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); + unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj); + + ++NumScavengedRegs; + + // Replace this reference to the virtual register with the + // scratch register. + assert (ScratchReg && "Missing scratch register!"); + Fn.getRegInfo().replaceRegWith(Reg, ScratchReg); + + // Because this instruction was processed by the RS before this + // register was allocated, make sure that the RS now records the + // register as being used. + RS->setRegUsed(ScratchReg); + } + } + + // If the scavenger needed to use one of its spill slots, the + // spill code will have been inserted in between I and J. This is a + // problem because we need the spill code before I: Move I to just + // prior to J. + if (I != std::prev(J)) { + BB->splice(J, &*BB, I); + + // Before we move I, we need to prepare the RS to visit I again. + // Specifically, RS will assert if it sees uses of registers that + // it believes are undefined. Because we have already processed + // register kills in I, when it visits I again, it will believe that + // those registers are undefined. To avoid this situation, unprocess + // the instruction I. + assert(RS->getCurrentPosition() == I && + "The register scavenger has an unexpected position"); + I = P; + RS->unprocess(P); + } else + ++I; + } + } +} diff --git a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp new file mode 100644 index 000000000000..4ad6eed7385b --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -0,0 +1,86 @@ +//===-- WebAssemblyPeephole.cpp - WebAssembly Peephole Optimiztions -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Late peephole optimizations for WebAssembly. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-peephole" + +namespace { +class WebAssemblyPeephole final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly late peephole optimizer"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; + WebAssemblyPeephole() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyPeephole::ID = 0; +FunctionPass *llvm::createWebAssemblyPeephole() { + return new WebAssemblyPeephole(); +} + +bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + + MachineRegisterInfo &MRI = MF.getRegInfo(); + WebAssemblyFunctionInfo &MFI = *MF.getInfo(); + + for (auto &MBB : MF) + for (auto &MI : MBB) + switch (MI.getOpcode()) { + default: + break; + case WebAssembly::STORE8_I32: + case WebAssembly::STORE16_I32: + case WebAssembly::STORE8_I64: + case WebAssembly::STORE16_I64: + case WebAssembly::STORE32_I64: + case WebAssembly::STORE_F32: + case WebAssembly::STORE_F64: + case WebAssembly::STORE_I32: + case WebAssembly::STORE_I64: { + // Store instructions return their value operand. If we ended up using + // the same register for both, replace it with a dead def so that it + // can use $discard instead. + MachineOperand &MO = MI.getOperand(0); + unsigned OldReg = MO.getReg(); + // TODO: Handle SP/physregs + if (OldReg == MI.getOperand(3).getReg() + && TargetRegisterInfo::isVirtualRegister(MI.getOperand(3).getReg())) { + Changed = true; + unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + MO.setReg(NewReg); + MO.setIsDead(); + MFI.stackifyVReg(NewReg); + MFI.addWAReg(NewReg, WebAssemblyFunctionInfo::UnusedReg); + } + } + } + + return Changed; +} diff --git a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp new file mode 100644 index 000000000000..9ec66595d8da --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp @@ -0,0 +1,175 @@ +//===-- WebAssemblyRegColoring.cpp - Register coloring --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements a virtual register coloring pass. +/// +/// WebAssembly doesn't have a fixed number of registers, but it is still +/// desirable to minimize the total number of registers used in each function. +/// +/// This code is modeled after lib/CodeGen/StackSlotColoring.cpp. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-reg-coloring" + +namespace { +class WebAssemblyRegColoring final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyRegColoring() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { + return "WebAssembly Register Coloring"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: +}; +} // end anonymous namespace + +char WebAssemblyRegColoring::ID = 0; +FunctionPass *llvm::createWebAssemblyRegColoring() { + return new WebAssemblyRegColoring(); +} + +// Compute the total spill weight for VReg. +static float computeWeight(const MachineRegisterInfo *MRI, + const MachineBlockFrequencyInfo *MBFI, + unsigned VReg) { + float weight = 0.0f; + for (MachineOperand &MO : MRI->reg_nodbg_operands(VReg)) + weight += LiveIntervals::getSpillWeight(MO.isDef(), MO.isUse(), MBFI, + MO.getParent()); + return weight; +} + +bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Register Coloring **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + // If there are calls to setjmp or sigsetjmp, don't perform coloring. Virtual + // registers could be modified before the longjmp is executed, resulting in + // the wrong value being used afterwards. (See .) + // TODO: Does WebAssembly need to care about setjmp for register coloring? + if (MF.exposesReturnsTwice()) + return false; + + MachineRegisterInfo *MRI = &MF.getRegInfo(); + LiveIntervals *Liveness = &getAnalysis(); + const MachineBlockFrequencyInfo *MBFI = + &getAnalysis(); + WebAssemblyFunctionInfo &MFI = *MF.getInfo(); + + // Gather all register intervals into a list and sort them. + unsigned NumVRegs = MRI->getNumVirtRegs(); + SmallVector SortedIntervals; + SortedIntervals.reserve(NumVRegs); + + DEBUG(dbgs() << "Interesting register intervals:\n"); + for (unsigned i = 0; i < NumVRegs; ++i) { + unsigned VReg = TargetRegisterInfo::index2VirtReg(i); + if (MFI.isVRegStackified(VReg)) + continue; + // Skip unused registers, which can use $discard. + if (MRI->use_empty(VReg)) + continue; + + LiveInterval *LI = &Liveness->getInterval(VReg); + assert(LI->weight == 0.0f); + LI->weight = computeWeight(MRI, MBFI, VReg); + DEBUG(LI->dump()); + SortedIntervals.push_back(LI); + } + DEBUG(dbgs() << '\n'); + + // Sort them to put arguments first (since we don't want to rename live-in + // registers), by weight next, and then by position. + // TODO: Investigate more intelligent sorting heuristics. For starters, we + // should try to coalesce adjacent live intervals before non-adjacent ones. + std::sort(SortedIntervals.begin(), SortedIntervals.end(), + [MRI](LiveInterval *LHS, LiveInterval *RHS) { + if (MRI->isLiveIn(LHS->reg) != MRI->isLiveIn(RHS->reg)) + return MRI->isLiveIn(LHS->reg); + if (LHS->weight != RHS->weight) + return LHS->weight > RHS->weight; + if (LHS->empty() || RHS->empty()) + return !LHS->empty() && RHS->empty(); + return *LHS < *RHS; + }); + + DEBUG(dbgs() << "Coloring register intervals:\n"); + SmallVector SlotMapping(SortedIntervals.size(), -1u); + SmallVector, 16> Assignments( + SortedIntervals.size()); + BitVector UsedColors(SortedIntervals.size()); + bool Changed = false; + for (size_t i = 0, e = SortedIntervals.size(); i < e; ++i) { + LiveInterval *LI = SortedIntervals[i]; + unsigned Old = LI->reg; + size_t Color = i; + const TargetRegisterClass *RC = MRI->getRegClass(Old); + + // Check if it's possible to reuse any of the used colors. + if (!MRI->isLiveIn(Old)) + for (int C(UsedColors.find_first()); C != -1; + C = UsedColors.find_next(C)) { + if (MRI->getRegClass(SortedIntervals[C]->reg) != RC) + continue; + for (LiveInterval *OtherLI : Assignments[C]) + if (!OtherLI->empty() && OtherLI->overlaps(*LI)) + goto continue_outer; + Color = C; + break; + continue_outer:; + } + + unsigned New = SortedIntervals[Color]->reg; + SlotMapping[i] = New; + Changed |= Old != New; + UsedColors.set(Color); + Assignments[Color].push_back(LI); + DEBUG(dbgs() << "Assigning vreg" + << TargetRegisterInfo::virtReg2Index(LI->reg) << " to vreg" + << TargetRegisterInfo::virtReg2Index(New) << "\n"); + } + if (!Changed) + return false; + + // Rewrite register operands. + for (size_t i = 0, e = SortedIntervals.size(); i < e; ++i) { + unsigned Old = SortedIntervals[i]->reg; + unsigned New = SlotMapping[i]; + if (Old != New) + MRI->replaceRegWith(Old, New); + } + return true; +} diff --git a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp new file mode 100644 index 000000000000..f621db070b5b --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -0,0 +1,109 @@ +//===-- WebAssemblyRegNumbering.cpp - Register Numbering ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements a pass which assigns WebAssembly register +/// numbers for CodeGen virtual registers. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-reg-numbering" + +namespace { +class WebAssemblyRegNumbering final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly Register Numbering"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyRegNumbering() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyRegNumbering::ID = 0; +FunctionPass *llvm::createWebAssemblyRegNumbering() { + return new WebAssemblyRegNumbering(); +} + +bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** Register Numbering **********\n" + "********** Function: " + << MF.getName() << '\n'); + + WebAssemblyFunctionInfo &MFI = *MF.getInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineFrameInfo &FrameInfo = *MF.getFrameInfo(); + + MFI.initWARegs(); + + // WebAssembly argument registers are in the same index space as local + // variables. Assign the numbers for them first. + MachineBasicBlock &EntryMBB = MF.front(); + for (MachineInstr &MI : EntryMBB) { + switch (MI.getOpcode()) { + case WebAssembly::ARGUMENT_I32: + case WebAssembly::ARGUMENT_I64: + case WebAssembly::ARGUMENT_F32: + case WebAssembly::ARGUMENT_F64: + MFI.setWAReg(MI.getOperand(0).getReg(), MI.getOperand(1).getImm()); + break; + default: + break; + } + } + + // Then assign regular WebAssembly registers for all remaining used + // virtual registers. TODO: Consider sorting the registers by frequency of + // use, to maximize usage of small immediate fields. + unsigned NumArgRegs = MFI.getParams().size(); + unsigned NumVRegs = MF.getRegInfo().getNumVirtRegs(); + unsigned NumStackRegs = 0; + unsigned CurReg = 0; + for (unsigned VRegIdx = 0; VRegIdx < NumVRegs; ++VRegIdx) { + unsigned VReg = TargetRegisterInfo::index2VirtReg(VRegIdx); + // Handle stackified registers. + if (MFI.isVRegStackified(VReg)) { + MFI.setWAReg(VReg, INT32_MIN | NumStackRegs++); + continue; + } + // Skip unused registers. + if (MRI.use_empty(VReg)) + continue; + if (MFI.getWAReg(VReg) == WebAssemblyFunctionInfo::UnusedReg) + MFI.setWAReg(VReg, NumArgRegs + CurReg++); + } + // Allocate locals for used physical registers + if (FrameInfo.getStackSize() > 0) + MFI.addPReg(WebAssembly::SP32, CurReg++); + + return true; +} diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp new file mode 100644 index 000000000000..89ef5cdb2bef --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -0,0 +1,265 @@ +//===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements a register stacking pass. +/// +/// This pass reorders instructions to put register uses and defs in an order +/// such that they form single-use expression trees. Registers fitting this form +/// are then marked as "stackified", meaning references to them are replaced by +/// "push" and "pop" from the stack. +/// +/// This is primarily a code size optimization, since temporary values on the +/// expression don't need to be named. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-reg-stackify" + +namespace { +class WebAssemblyRegStackify final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly Register Stackify"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreservedID(MachineDominatorsID); + AU.addPreservedID(LiveVariablesID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyRegStackify() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyRegStackify::ID = 0; +FunctionPass *llvm::createWebAssemblyRegStackify() { + return new WebAssemblyRegStackify(); +} + +// Decorate the given instruction with implicit operands that enforce the +// expression stack ordering constraints for an instruction which is on +// the expression stack. +static void ImposeStackOrdering(MachineInstr *MI) { + // Write the opaque EXPR_STACK register. + if (!MI->definesRegister(WebAssembly::EXPR_STACK)) + MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK, + /*isDef=*/true, + /*isImp=*/true)); + + // Also read the opaque EXPR_STACK register. + if (!MI->readsRegister(WebAssembly::EXPR_STACK)) + MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK, + /*isDef=*/false, + /*isImp=*/true)); +} + +// Test whether it's safe to move Def to just before Insert. +// TODO: Compute memory dependencies in a way that doesn't require always +// walking the block. +// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be +// more precise. +static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, + AliasAnalysis &AA, LiveIntervals &LIS, + MachineRegisterInfo &MRI) { + assert(Def->getParent() == Insert->getParent()); + bool SawStore = false, SawSideEffects = false; + MachineBasicBlock::const_iterator D(Def), I(Insert); + + // Check for register dependencies. + for (const MachineOperand &MO : Def->operands()) { + if (!MO.isReg() || MO.isUndef()) + continue; + unsigned Reg = MO.getReg(); + + // If the register is dead here and at Insert, ignore it. + if (MO.isDead() && Insert->definesRegister(Reg) && + !Insert->readsRegister(Reg)) + continue; + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // If the physical register is never modified, ignore it. + if (!MRI.isPhysRegModified(Reg)) + continue; + // Otherwise, it's a physical register with unknown liveness. + return false; + } + + // Ask LiveIntervals whether moving this virtual register use or def to + // Insert will change value numbers are seen. + const LiveInterval &LI = LIS.getInterval(Reg); + VNInfo *DefVNI = MO.isDef() ? + LI.getVNInfoAt(LIS.getInstructionIndex(Def).getRegSlot()) : + LI.getVNInfoBefore(LIS.getInstructionIndex(Def)); + assert(DefVNI && "Instruction input missing value number"); + VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(Insert)); + if (InsVNI && DefVNI != InsVNI) + return false; + } + + // Check for memory dependencies and side effects. + for (--I; I != D; --I) + SawSideEffects |= I->isSafeToMove(&AA, SawStore); + return !(SawStore && Def->mayLoad() && !Def->isInvariantLoad(&AA)) && + !(SawSideEffects && !Def->isSafeToMove(&AA, SawStore)); +} + +bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** Register Stackifying **********\n" + "********** Function: " + << MF.getName() << '\n'); + + bool Changed = false; + MachineRegisterInfo &MRI = MF.getRegInfo(); + WebAssemblyFunctionInfo &MFI = *MF.getInfo(); + AliasAnalysis &AA = getAnalysis().getAAResults(); + LiveIntervals &LIS = getAnalysis(); + + // Walk the instructions from the bottom up. Currently we don't look past + // block boundaries, and the blocks aren't ordered so the block visitation + // order isn't significant, but we may want to change this in the future. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : reverse(MBB)) { + MachineInstr *Insert = &MI; + // Don't nest anything inside a phi. + if (Insert->getOpcode() == TargetOpcode::PHI) + break; + + // Don't nest anything inside an inline asm, because we don't have + // constraints for $push inputs. + if (Insert->getOpcode() == TargetOpcode::INLINEASM) + break; + + // Iterate through the inputs in reverse order, since we'll be pulling + // operands off the stack in LIFO order. + bool AnyStackified = false; + for (MachineOperand &Op : reverse(Insert->uses())) { + // We're only interested in explicit virtual register operands. + if (!Op.isReg() || Op.isImplicit() || !Op.isUse()) + continue; + + unsigned Reg = Op.getReg(); + + // Only consider registers with a single definition. + // TODO: Eventually we may relax this, to stackify phi transfers. + MachineInstr *Def = MRI.getUniqueVRegDef(Reg); + if (!Def) + continue; + + // There's no use in nesting implicit defs inside anything. + if (Def->getOpcode() == TargetOpcode::IMPLICIT_DEF) + continue; + + // Don't nest an INLINE_ASM def into anything, because we don't have + // constraints for $pop outputs. + if (Def->getOpcode() == TargetOpcode::INLINEASM) + continue; + + // Don't nest PHIs inside of anything. + if (Def->getOpcode() == TargetOpcode::PHI) + continue; + + // Argument instructions represent live-in registers and not real + // instructions. + if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 || + Def->getOpcode() == WebAssembly::ARGUMENT_I64 || + Def->getOpcode() == WebAssembly::ARGUMENT_F32 || + Def->getOpcode() == WebAssembly::ARGUMENT_F64) + continue; + + // Single-use expression trees require defs that have one use. + // TODO: Eventually we'll relax this, to take advantage of set_local + // returning its result. + if (!MRI.hasOneUse(Reg)) + continue; + + // For now, be conservative and don't look across block boundaries. + // TODO: Be more aggressive? + if (Def->getParent() != &MBB) + continue; + + // Don't move instructions that have side effects or memory dependencies + // or other complications. + if (!IsSafeToMove(Def, Insert, AA, LIS, MRI)) + continue; + + Changed = true; + AnyStackified = true; + // Move the def down and nest it in the current instruction. + MBB.splice(Insert, &MBB, Def); + LIS.handleMove(Def); + MFI.stackifyVReg(Reg); + ImposeStackOrdering(Def); + Insert = Def; + } + if (AnyStackified) + ImposeStackOrdering(&MI); + } + } + + // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere + // so that it never looks like a use-before-def. + if (Changed) { + MF.getRegInfo().addLiveIn(WebAssembly::EXPR_STACK); + for (MachineBasicBlock &MBB : MF) + MBB.addLiveIn(WebAssembly::EXPR_STACK); + } + +#ifndef NDEBUG + // Verify that pushes and pops are performed in FIFO order. + SmallVector Stack; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + for (MachineOperand &MO : reverse(MI.explicit_operands())) { + if (!MO.isReg()) + continue; + unsigned VReg = MO.getReg(); + + // Don't stackify physregs like SP or FP. + if (!TargetRegisterInfo::isVirtualRegister(VReg)) + continue; + + if (MFI.isVRegStackified(VReg)) { + if (MO.isDef()) + Stack.push_back(VReg); + else + assert(Stack.pop_back_val() == VReg); + } + } + } + // TODO: Generalize this code to support keeping values on the stack across + // basic block boundaries. + assert(Stack.empty()); + } +#endif + + return Changed; +} diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp index 385c40bf6693..dcada45f96d1 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -43,7 +43,7 @@ WebAssemblyRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const { } BitVector -WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction &MF) const { +WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction & /*MF*/) const { BitVector Reserved(getNumRegs()); for (auto Reg : {WebAssembly::SP32, WebAssembly::SP64, WebAssembly::FP32, WebAssembly::FP64}) @@ -52,9 +52,37 @@ WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction &MF) const { } void WebAssemblyRegisterInfo::eliminateFrameIndex( - MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS) const { - llvm_unreachable("WebAssemblyRegisterInfo::eliminateFrameIndex"); // FIXME + MachineBasicBlock::iterator II, int SPAdj, + unsigned FIOperandNum, RegScavenger * /*RS*/) const { + assert(SPAdj == 0); + MachineInstr &MI = *II; + + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + const MachineFrameInfo& MFI = *MF.getFrameInfo(); + int FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex); + + if (MI.mayLoadOrStore()) { + // If this is a load or store, make it relative to SP and fold the frame + // offset directly in + assert(MI.getOperand(1).getImm() == 0 && + "Can't eliminate FI yet if offset is already set"); + MI.getOperand(1).setImm(FrameOffset); + MI.getOperand(2).ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false); + } else { + // Otherwise create an i32.add SP, offset and make it the operand + auto &MRI = MF.getRegInfo(); + const auto *TII = MF.getSubtarget().getInstrInfo(); + + unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::CONST_I32), OffsetReg) + .addImm(FrameOffset); + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::ADD_I32), OffsetReg) + .addReg(WebAssembly::SP32) + .addReg(OffsetReg); + MI.getOperand(FIOperandNum).ChangeToRegister(OffsetReg, /*IsDef=*/false); + } } unsigned @@ -67,21 +95,11 @@ WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return Regs[TFI->hasFP(MF)][TT.isArch64Bit()]; } -bool WebAssemblyRegisterInfo::canRealignStack(const MachineFunction &MF) const { - return !MF.getFunction()->hasFnAttribute("no-realign-stack"); -} - -// FIXME: share this with other backends with identical implementation? -bool WebAssemblyRegisterInfo::needsStackRealignment( - const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const WebAssemblyFrameLowering *TFI = getFrameLowering(MF); - const Function *F = MF.getFunction(); - unsigned StackAlign = TFI->getStackAlignment(); - bool requiresRealignment = - ((MFI->getMaxAlignment() > StackAlign) || - F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackAlignment)); - - return requiresRealignment && canRealignStack(MF); +const TargetRegisterClass * +WebAssemblyRegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { + assert(Kind == 0 && "Only one kind of pointer on WebAssembly"); + if (MF.getSubtarget().hasAddr64()) + return &WebAssembly::I64RegClass; + return &WebAssembly::I32RegClass; } diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h index dbdb9d0457af..ad1d71eebf22 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h @@ -42,9 +42,9 @@ public: // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const override; - // Base pointer (stack realignment) support. - bool canRealignStack(const MachineFunction &MF) const; - bool needsStackRealignment(const MachineFunction &MF) const override; + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, + unsigned Kind = 0) const override; }; } // end namespace llvm diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td index 2ba42eb94a40..80a83fa76b57 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td @@ -33,22 +33,26 @@ def FP64 : WebAssemblyReg<"%FP64">; def SP32 : WebAssemblyReg<"%SP32">; def SP64 : WebAssemblyReg<"%SP64">; -// TODO(jfb) The following comes from NVPTX. Is it really needed, or can we do -// away with it? Try deleting once the backend works. -// WebAssembly uses virtual registers, but the backend defines a few physical -// registers here to keep SDAG and the MachineInstr layers happy. -foreach i = 0-4 in { - def I#i : WebAssemblyReg<"%i."#i>; // i32 - def L#i : WebAssemblyReg<"%l."#i>; // i64 - def F#i : WebAssemblyReg<"%f."#i>; // f32 - def D#i : WebAssemblyReg<"%d."#i>; // f64 -} +// The register allocation framework requires register classes have at least +// one register, so we define a few for the floating point register classes +// since we otherwise don't need a physical register in those classes. +def F32_0 : WebAssemblyReg<"%f32.0">; +def F64_0 : WebAssemblyReg<"%f64.0">; + +// The expression stack "register". This is an opaque entity which serves to +// order uses and defs that must remain in LIFO order. +def EXPR_STACK : WebAssemblyReg<"STACK">; + +// The incoming arguments "register". This is an opaque entity which serves to +// order the ARGUMENT instructions that are emulating live-in registers and +// must not be scheduled below other instructions. +def ARGUMENTS : WebAssemblyReg<"ARGUMENTS">; //===----------------------------------------------------------------------===// // Register classes //===----------------------------------------------------------------------===// -def Int32 : WebAssemblyRegClass<[i32], 32, (add (sequence "I%u", 0, 4), SP32)>; -def Int64 : WebAssemblyRegClass<[i64], 64, (add (sequence "L%u", 0, 4), SP64)>; -def Float32 : WebAssemblyRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>; -def Float64 : WebAssemblyRegClass<[f64], 64, (add (sequence "D%u", 0, 4))>; +def I32 : WebAssemblyRegClass<[i32], 32, (add FP32, SP32)>; +def I64 : WebAssemblyRegClass<[i64], 64, (add FP64, SP64)>; +def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>; +def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>; diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp new file mode 100644 index 000000000000..4e08b2b079eb --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -0,0 +1,124 @@ +//===-- WebAssemblyStoreResults.cpp - Optimize using store result values --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements an optimization pass using store result values. +/// +/// WebAssembly's store instructions return the stored value. This is to enable +/// an optimization wherein uses of the stored value can be replaced by uses of +/// the store's result value, making the stored value register more likely to +/// be single-use, thus more likely to be useful to register stackifying, and +/// potentially also exposing the store to register stackifying. These both can +/// reduce get_local/set_local traffic. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-store-results" + +namespace { +class WebAssemblyStoreResults final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyStoreResults() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { + return "WebAssembly Store Results"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: +}; +} // end anonymous namespace + +char WebAssemblyStoreResults::ID = 0; +FunctionPass *llvm::createWebAssemblyStoreResults() { + return new WebAssemblyStoreResults(); +} + +bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Store Results **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineDominatorTree &MDT = getAnalysis(); + bool Changed = false; + + assert(MRI.isSSA() && "StoreResults depends on SSA form"); + + for (auto &MBB : MF) { + DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n'); + for (auto &MI : MBB) + switch (MI.getOpcode()) { + default: + break; + case WebAssembly::STORE8_I32: + case WebAssembly::STORE16_I32: + case WebAssembly::STORE8_I64: + case WebAssembly::STORE16_I64: + case WebAssembly::STORE32_I64: + case WebAssembly::STORE_F32: + case WebAssembly::STORE_F64: + case WebAssembly::STORE_I32: + case WebAssembly::STORE_I64: + unsigned ToReg = MI.getOperand(0).getReg(); + unsigned FromReg = MI.getOperand(3).getReg(); + for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) { + MachineOperand &O = *I++; + MachineInstr *Where = O.getParent(); + if (Where->getOpcode() == TargetOpcode::PHI) { + // PHIs use their operands on their incoming CFG edges rather than + // in their parent blocks. Get the basic block paired with this use + // of FromReg and check that MI's block dominates it. + MachineBasicBlock *Pred = + Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB(); + if (!MDT.dominates(&MBB, Pred)) + continue; + } else { + // For a non-PHI, check that MI dominates the instruction in the + // normal way. + if (&MI == Where || !MDT.dominates(&MI, Where)) + continue; + } + Changed = true; + DEBUG(dbgs() << "Setting operand " << O << " in " << *Where + << " from " << MI << "\n"); + O.setReg(ToReg); + // If the store's def was previously dead, it is no longer. But the + // dead flag shouldn't be set yet. + assert(!MI.getOperand(0).isDead() && "Dead flag set on store result"); + } + } + } + + return Changed; +} diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index 3d9e7aacbfbf..cb2d5a63a19f 100644 --- a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -46,3 +46,4 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT, TLInfo(TM, *this) {} bool WebAssemblySubtarget::enableMachineScheduler() const { return true; } +bool WebAssemblySubtarget::useAA() const { return true; } diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.h b/lib/Target/WebAssembly/WebAssemblySubtarget.h index 6f1761940930..f530a290fa0e 100644 --- a/lib/Target/WebAssembly/WebAssemblySubtarget.h +++ b/lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -61,9 +61,15 @@ public: const WebAssemblyTargetLowering *getTargetLowering() const override { return &TLInfo; } + const WebAssemblyInstrInfo *getInstrInfo() const override { + return &InstrInfo; + } + const WebAssemblyRegisterInfo *getRegisterInfo() const override { + return &getInstrInfo()->getRegisterInfo(); + } const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override; - bool useAA() const override { return true; } + bool useAA() const override; // Predicates used by WebAssemblyInstrInfo.td. bool hasAddr64() const { return TargetTriple.isArch64Bit(); } diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 6f93248bd13c..e31ea46de9f5 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -45,11 +45,16 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine( const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT.isArch64Bit() - ? "e-p:64:64-i64:64-v128:8:128-n32:64-S128" - : "e-p:32:32-i64:64-v128:8:128-n32:64-S128", + : LLVMTargetMachine(T, TT.isArch64Bit() ? "e-p:64:64-i64:64-n32:64-S128" + : "e-p:32:32-i64:64-n32:64-S128", TT, CPU, FS, Options, RM, CM, OL), TLOF(make_unique()) { + // WebAssembly type-checks expressions, but a noreturn function with a return + // type that doesn't match the context will cause a check failure. So we lower + // LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's + // 'unreachable' expression which is meant for that case. + this->Options.TrapUnreachable = true; + initAsmInfo(); // We need a reducible CFG, so disable some optimizations which tend to @@ -77,7 +82,7 @@ WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = make_unique(TargetTriple, CPU, FS, *this); + I = llvm::make_unique(TargetTriple, CPU, FS, *this); } return I.get(); } @@ -94,23 +99,18 @@ public: } FunctionPass *createTargetRegisterAllocator(bool) override; - void addFastRegAlloc(FunctionPass *RegAllocPass) override; - void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; void addIRPasses() override; - bool addPreISel() override; bool addInstSelector() override; bool addILPOpts() override; void addPreRegAlloc() override; - void addRegAllocPasses(bool Optimized); void addPostRegAlloc() override; - void addPreSched2() override; void addPreEmitPass() override; }; } // end anonymous namespace TargetIRAnalysis WebAssemblyTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](Function &F) { + return TargetIRAnalysis([this](const Function &F) { return TargetTransformInfo(WebAssemblyTTIImpl(this, F)); }); } @@ -124,50 +124,86 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) { return nullptr; // No reg alloc } -void WebAssemblyPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { - assert(!RegAllocPass && "WebAssembly uses no regalloc!"); - addRegAllocPasses(false); -} - -void WebAssemblyPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { - assert(!RegAllocPass && "WebAssembly uses no regalloc!"); - addRegAllocPasses(true); -} - //===----------------------------------------------------------------------===// // The following functions are called from lib/CodeGen/Passes.cpp to modify // the CodeGen pass sequence. //===----------------------------------------------------------------------===// void WebAssemblyPassConfig::addIRPasses() { - // FIXME: the default for this option is currently POSIX, whereas - // WebAssembly's MVP should default to Single. if (TM->Options.ThreadModel == ThreadModel::Single) + // In "single" mode, atomics get lowered to non-atomics. addPass(createLowerAtomicPass()); else // Expand some atomic operations. WebAssemblyTargetLowering has hooks which // control specifically what gets lowered. addPass(createAtomicExpandPass(TM)); + // Optimize "returned" function attributes. + addPass(createWebAssemblyOptimizeReturned()); + TargetPassConfig::addIRPasses(); } -bool WebAssemblyPassConfig::addPreISel() { return false; } - bool WebAssemblyPassConfig::addInstSelector() { + (void)TargetPassConfig::addInstSelector(); addPass( createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel())); + // Run the argument-move pass immediately after the ScheduleDAG scheduler + // so that we can fix up the ARGUMENT instructions before anything else + // sees them in the wrong place. + addPass(createWebAssemblyArgumentMove()); return false; } -bool WebAssemblyPassConfig::addILPOpts() { return true; } +bool WebAssemblyPassConfig::addILPOpts() { + (void)TargetPassConfig::addILPOpts(); + return true; +} -void WebAssemblyPassConfig::addPreRegAlloc() {} +void WebAssemblyPassConfig::addPreRegAlloc() { + TargetPassConfig::addPreRegAlloc(); -void WebAssemblyPassConfig::addRegAllocPasses(bool Optimized) {} + // Prepare store instructions for register stackifying. + addPass(createWebAssemblyStoreResults()); +} -void WebAssemblyPassConfig::addPostRegAlloc() {} +void WebAssemblyPassConfig::addPostRegAlloc() { + // TODO: The following CodeGen passes don't currently support code containing + // virtual registers. Consider removing their restrictions and re-enabling + // them. + // + // We use our own PrologEpilogInserter which is very slightly modified to + // tolerate virtual registers. + disablePass(&PrologEpilogCodeInserterID); + // Fails with: should be run after register allocation. + disablePass(&MachineCopyPropagationID); -void WebAssemblyPassConfig::addPreSched2() {} + // Mark registers as representing wasm's expression stack. + addPass(createWebAssemblyRegStackify()); -void WebAssemblyPassConfig::addPreEmitPass() {} + // Run the register coloring pass to reduce the total number of registers. + addPass(createWebAssemblyRegColoring()); + + TargetPassConfig::addPostRegAlloc(); + + // Run WebAssembly's version of the PrologEpilogInserter. Target-independent + // PEI runs after PostRegAlloc and after ShrinkWrap. Putting it here will run + // PEI before ShrinkWrap but otherwise in the same position in the order. + addPass(createWebAssemblyPEI()); +} + +void WebAssemblyPassConfig::addPreEmitPass() { + TargetPassConfig::addPreEmitPass(); + + // Put the CFG in structured form; insert BLOCK and LOOP markers. + addPass(createWebAssemblyCFGStackify()); + + // Lower br_unless into br_if. + addPass(createWebAssemblyLowerBrUnless()); + + // Create a mapping from LLVM CodeGen virtual registers to wasm registers. + addPass(createWebAssemblyRegNumbering()); + + // Perform the very last peephole optimizations on the code. + addPass(createWebAssemblyPeephole()); +} diff --git a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp new file mode 100644 index 000000000000..74e33b93e00d --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp @@ -0,0 +1,24 @@ +//===-- WebAssemblyTargetObjectFile.cpp - WebAssembly Object Info ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file defines the functions of the WebAssembly-specific subclass +/// of TargetLoweringObjectFile. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssemblyTargetObjectFile.h" +#include "WebAssemblyTargetMachine.h" +using namespace llvm; + +void WebAssemblyTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} diff --git a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h index ee78b945ada2..39e50c9c575d 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h +++ b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h @@ -16,50 +16,13 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" namespace llvm { -class GlobalVariable; - -class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFile { +class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFileELF { public: - WebAssemblyTargetObjectFile() { - TextSection = nullptr; - DataSection = nullptr; - BSSSection = nullptr; - ReadOnlySection = nullptr; - - StaticCtorSection = nullptr; - StaticDtorSection = nullptr; - LSDASection = nullptr; - EHFrameSection = nullptr; - DwarfAbbrevSection = nullptr; - DwarfInfoSection = nullptr; - DwarfLineSection = nullptr; - DwarfFrameSection = nullptr; - DwarfPubTypesSection = nullptr; - DwarfDebugInlineSection = nullptr; - DwarfStrSection = nullptr; - DwarfLocSection = nullptr; - DwarfARangesSection = nullptr; - DwarfRangesSection = nullptr; - } - - MCSection *getSectionForConstant(SectionKind Kind, - const Constant *C) const override { - return ReadOnlySection; - } - - MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler &Mang, - const TargetMachine &TM) const override { - return DataSection; - } - - MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler &Mang, - const TargetMachine &TM) const override; + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; }; } // end namespace llvm diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index fa88ed526df2..356631711921 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -21,8 +21,7 @@ using namespace llvm; #define DEBUG_TYPE "wasmtti" TargetTransformInfo::PopcntSupportKind -WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) { +WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - // TODO: Make Math.popcount32 happen in WebAssembly. - return TTI::PSK_Software; + return TargetTransformInfo::PSK_FastHardware; } diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 7ffb6047b963..26dc388cc922 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -38,7 +38,7 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase { const WebAssemblyTargetLowering *getTLI() const { return TLI; } public: - WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, Function &F) + WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} @@ -54,7 +54,7 @@ public: // TODO: Implement more Scalar TTI for WebAssembly - TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; /// @} diff --git a/lib/Target/WebAssembly/known_gcc_test_failures.txt b/lib/Target/WebAssembly/known_gcc_test_failures.txt new file mode 100644 index 000000000000..ee9d060f339e --- /dev/null +++ b/lib/Target/WebAssembly/known_gcc_test_failures.txt @@ -0,0 +1,311 @@ +# Tests which are known to fail from the GCC torture test suite. + +# Core dump. +920908-1.c +pr38151.c +va-arg-22.c + +# TargetRegisterInfo.h:315: static unsigned int llvm::TargetRegisterInfo::virtReg2Index(unsigned int): Assertion `isVirtualRegister(Reg) && "Not a virtual register"' failed. +struct-ret-1.c +va-arg-11.c +va-arg-21.c +va-arg-24.c +va-arg-trap-1.c + +# WebAssemblyCFGStackify.cpp:211: void SortBlocks(llvm::MachineFunction&, const llvm::MachineLoopInfo&): Assertion `L->contains( MLI.getLoopFor(&*prev(MachineFunction::iterator(&MBB)))) && "Loop isn't contiguous"' failed. +20000815-1.c +20010129-1.c +930628-1.c +980707-1.c + +# WebAssemblyISelLowering.cpp:316: virtual llvm::SDValue llvm::WebAssemblyTargetLowering::LowerCall(llvm::TargetLowering::CallLoweringInfo&, llvm::SmallVectorImpl&) const: Assertion `!Out.Flags.isByVal() && "byval is not valid for return values"' failed. +20030914-2.c +20040703-1.c +20081117-1.c +920625-1.c +931004-11.c +931004-13.c +980223.c +bitfld-5.c +complex-7.c +pr38969.c +pr51323.c +pr52129.c +pr57130.c + +# These were previously "Cannot select FrameIndex." Now most of them fail +# because they contain call frame pseudos (e.g. call a vararg func), +# frame pointers, or similar. This list will be updated again soon. +20000519-1.c +20000706-4.c +20000706-5.c +20000801-2.c +20000801-4.c +20011126-2.c + +20020529-1.c +20021024-1.c + +20030828-1.c +20030914-1.c + +20040302-1.c +20040625-1.c +20040823-1.c + +20041113-1.c + +20041214-1.c + +20050826-2.c + +20071213-1.c + +20080506-2.c +20080519-1.c + +20081103-1.c +20090113-1.c +20090113-2.c +20090113-3.c + +20090623-1.c + +920501-6.c +920501-8.c +920726-1.c +930518-1.c + +931004-10.c +931004-12.c +931004-14.c +931004-2.c +931004-4.c +931004-6.c +931004-8.c + +980205.c +980608-1.c +980709-1.c +980716-1.c +990127-1.c + +991216-2.c + +#cbrt.c +complex-5.c +complex-6.c + +enum-3.c +fprintf-chk-1.c +frame-address.c +loop-15.c +loop-ivopts-2.c +mayalias-3.c + +multi-ix.c + +pr20466-1.c + + +pr28778.c +pr28982b.c + +pr30778.c +pr31448-2.c +pr31448.c + +pr33870-1.c +pr33870.c + +pr38051.c + +pr39100.c + +pr39339.c +pr40022.c +pr40657.c + +pr43987.c + +pr44575.c + +pr44942.c +pr46309.c +pr47538.c +pr47925.c + +pr49390.c +pr49419.c + +#pr51877.c + +#pr52979-1.c +#pr52979-2.c +pr53645-2.c +pr53645.c + +pr56205.c + +pr56866.c + +pr57876.c +pr58277-1.c + +pr59643.c + +printf-chk-1.c +pta-field-1.c +pta-field-2.c + +stdarg-1.c +stdarg-2.c +stdarg-3.c +stdarg-4.c +strct-stdarg-1.c +strct-varg-1.c + +va-arg-1.c +va-arg-10.c +va-arg-12.c +va-arg-13.c +va-arg-14.c +va-arg-15.c +va-arg-16.c +va-arg-17.c +va-arg-18.c +va-arg-19.c +va-arg-2.c +va-arg-20.c +va-arg-23.c +va-arg-26.c +va-arg-4.c +va-arg-5.c +va-arg-6.c +va-arg-7.c +va-arg-8.c +va-arg-9.c +va-arg-pack-1.c +vfprintf-1.c +vfprintf-chk-1.c +vprintf-1.c +vprintf-chk-1.c + +# Cannot select callseq_end. +20040811-1.c +pr43220.c +vla-dealloc-1.c + +# Cannot select brind. +20071210-1.c +920501-4.c +920501-5.c + +# Cannot select BlockAddress. +comp-goto-1.c +980526-1.c +990208-1.c + +# WebAssembly hasn't implemented byval arguments. +20000412-3.c +20000419-1.c +20000706-1.c +20000706-2.c +20000707-1.c +20000717-1.c +20000717-5.c +20000808-1.c +20010605-2.c +20011113-1.c +20020215-1.c +20020810-1.c +20021118-1.c +20040707-1.c +20040709-1.c +20040709-2.c +20041201-1.c +20050713-1.c +20070614-1.c +920908-2.c +921112-1.c +921117-1.c +921123-2.c +921204-1.c +930126-1.c +930208-1.c +931004-5.c +931004-9.c +931031-1.c +950607-2.c +960416-1.c +990525-1.c +991118-1.c +bf64-1.c +complex-1.c +complex-2.c +pr15262-2.c +pr20621-1.c +pr23135.c +pr30185.c +pr42248.c + +# unimplemented operation lowering. +20010122-1.c +20030323-1.c +20030811-1.c +pr17377.c + +# Error: invalid output constraint '=t' in asm. +990413-2.c +990826-0.c + +# Error: __builtin_setjmp / __builtin_longjmp is not supported for the current target. +built-in-setjmp.c +pr60003.c + +# Error in the program / unsupported by Clang. +scal-to-vec1.c +scal-to-vec2.c +scal-to-vec3.c +20000822-1.c +20010209-1.c +20010605-1.c +20030501-1.c +20040520-1.c +20061220-1.c +20090219-1.c +920415-1.c +920428-2.c +920501-7.c +920612-2.c +920721-4.c +921017-1.c +921215-1.c +931002-1.c +comp-goto-2.c +nest-align-1.c +nest-stdar-1.c +nestfunc-1.c +nestfunc-2.c +nestfunc-3.c +nestfunc-5.c +nestfunc-6.c +nestfunc-7.c +pr22061-3.c +pr22061-4.c +pr24135.c +pr51447.c +20020412-1.c +20040308-1.c +20040423-1.c +20041218-2.c +20070919-1.c +align-nest.c +pr41935.c +20050107-1.c +20050119-1.c +20050119-2.c +920302-1.c +920501-3.c +920728-1.c +pr28865.c diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt index 2c1926edc26b..b022a41b192f 100644 --- a/lib/Target/X86/AsmParser/CMakeLists.txt +++ b/lib/Target/X86/AsmParser/CMakeLists.txt @@ -1,7 +1,4 @@ add_llvm_library(LLVMX86AsmParser X86AsmInstrumentation.cpp X86AsmParser.cpp - - LINK_LIBS - LLVMX86CodeGen ) diff --git a/lib/Target/X86/AsmParser/LLVMBuild.txt b/lib/Target/X86/AsmParser/LLVMBuild.txt index 284bfd052ccf..9f94d5d38864 100644 --- a/lib/Target/X86/AsmParser/LLVMBuild.txt +++ b/lib/Target/X86/AsmParser/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = X86AsmParser parent = X86 -required_libraries = MC MCParser Support X86CodeGen X86Desc X86Info +required_libraries = MC MCParser Support X86Desc X86Info add_to_library_groups = X86 diff --git a/lib/Target/X86/AsmParser/Makefile b/lib/Target/X86/AsmParser/Makefile index fb9760796622..f834dfc300a1 100644 --- a/lib/Target/X86/AsmParser/Makefile +++ b/lib/Target/X86/AsmParser/Makefile @@ -9,7 +9,7 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMX86AsmParser -# Hack: we need to include 'main' x86 target directory to grab private headers +# Hack: we need to include 'main' X86 target directory to grab private headers CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp index 9eee4a0f3d82..09cc53a8e6d3 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp @@ -10,10 +10,8 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "X86AsmInstrumentation.h" #include "X86Operand.h" -#include "X86RegisterInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" @@ -118,11 +116,6 @@ bool IsStackReg(unsigned Reg) { return Reg == X86::RSP || Reg == X86::ESP; } bool IsSmallMemAccess(unsigned AccessSize) { return AccessSize < 8; } -std::string FuncName(unsigned AccessSize, bool IsWrite) { - return std::string("__asan_report_") + (IsWrite ? "store" : "load") + - utostr(AccessSize); -} - class X86AddressSanitizer : public X86AsmInstrumentation { public: struct RegisterContext { @@ -136,26 +129,26 @@ public: public: RegisterContext(unsigned AddressReg, unsigned ShadowReg, unsigned ScratchReg) { - BusyRegs.push_back(convReg(AddressReg, MVT::i64)); - BusyRegs.push_back(convReg(ShadowReg, MVT::i64)); - BusyRegs.push_back(convReg(ScratchReg, MVT::i64)); + BusyRegs.push_back(convReg(AddressReg, 64)); + BusyRegs.push_back(convReg(ShadowReg, 64)); + BusyRegs.push_back(convReg(ScratchReg, 64)); } - unsigned AddressReg(MVT::SimpleValueType VT) const { - return convReg(BusyRegs[REG_OFFSET_ADDRESS], VT); + unsigned AddressReg(unsigned Size) const { + return convReg(BusyRegs[REG_OFFSET_ADDRESS], Size); } - unsigned ShadowReg(MVT::SimpleValueType VT) const { - return convReg(BusyRegs[REG_OFFSET_SHADOW], VT); + unsigned ShadowReg(unsigned Size) const { + return convReg(BusyRegs[REG_OFFSET_SHADOW], Size); } - unsigned ScratchReg(MVT::SimpleValueType VT) const { - return convReg(BusyRegs[REG_OFFSET_SCRATCH], VT); + unsigned ScratchReg(unsigned Size) const { + return convReg(BusyRegs[REG_OFFSET_SCRATCH], Size); } void AddBusyReg(unsigned Reg) { if (Reg != X86::NoRegister) - BusyRegs.push_back(convReg(Reg, MVT::i64)); + BusyRegs.push_back(convReg(Reg, 64)); } void AddBusyRegs(const X86Operand &Op) { @@ -163,36 +156,36 @@ public: AddBusyReg(Op.getMemIndexReg()); } - unsigned ChooseFrameReg(MVT::SimpleValueType VT) const { + unsigned ChooseFrameReg(unsigned Size) const { static const MCPhysReg Candidates[] = { X86::RBP, X86::RAX, X86::RBX, X86::RCX, X86::RDX, X86::RDI, X86::RSI }; for (unsigned Reg : Candidates) { if (!std::count(BusyRegs.begin(), BusyRegs.end(), Reg)) - return convReg(Reg, VT); + return convReg(Reg, Size); } return X86::NoRegister; } private: - unsigned convReg(unsigned Reg, MVT::SimpleValueType VT) const { - return Reg == X86::NoRegister ? Reg : getX86SubSuperRegister(Reg, VT); + unsigned convReg(unsigned Reg, unsigned Size) const { + return Reg == X86::NoRegister ? Reg : getX86SubSuperRegister(Reg, Size); } std::vector BusyRegs; }; - X86AddressSanitizer(const MCSubtargetInfo &STI) + X86AddressSanitizer(const MCSubtargetInfo *&STI) : X86AsmInstrumentation(STI), RepPrefix(false), OrigSPOffset(0) {} - virtual ~X86AddressSanitizer() {} + ~X86AddressSanitizer() override {} // X86AsmInstrumentation implementation: - virtual void InstrumentAndEmitInstruction(const MCInst &Inst, - OperandVector &Operands, - MCContext &Ctx, - const MCInstrInfo &MII, - MCStreamer &Out) override { + void InstrumentAndEmitInstruction(const MCInst &Inst, + OperandVector &Operands, + MCContext &Ctx, + const MCInstrInfo &MII, + MCStreamer &Out) override { InstrumentMOVS(Inst, Operands, Ctx, MII, Out); if (RepPrefix) EmitInstruction(Out, MCInstBuilder(X86::REP_PREFIX)); @@ -240,17 +233,16 @@ public: protected: void EmitLabel(MCStreamer &Out, MCSymbol *Label) { Out.EmitLabel(Label); } - void EmitLEA(X86Operand &Op, MVT::SimpleValueType VT, unsigned Reg, - MCStreamer &Out) { - assert(VT == MVT::i32 || VT == MVT::i64); + void EmitLEA(X86Operand &Op, unsigned Size, unsigned Reg, MCStreamer &Out) { + assert(Size == 32 || Size == 64); MCInst Inst; - Inst.setOpcode(VT == MVT::i32 ? X86::LEA32r : X86::LEA64r); - Inst.addOperand(MCOperand::createReg(getX86SubSuperRegister(Reg, VT))); + Inst.setOpcode(Size == 32 ? X86::LEA32r : X86::LEA64r); + Inst.addOperand(MCOperand::createReg(getX86SubSuperRegister(Reg, Size))); Op.addMemOperands(Inst, 5); EmitInstruction(Out, Inst); } - void ComputeMemOperandAddress(X86Operand &Op, MVT::SimpleValueType VT, + void ComputeMemOperandAddress(X86Operand &Op, unsigned Size, unsigned Reg, MCContext &Ctx, MCStreamer &Out); // Creates new memory operand with Displacement added to an original @@ -261,13 +253,13 @@ protected: MCContext &Ctx, int64_t *Residue); bool is64BitMode() const { - return STI.getFeatureBits()[X86::Mode64Bit]; + return STI->getFeatureBits()[X86::Mode64Bit]; } bool is32BitMode() const { - return STI.getFeatureBits()[X86::Mode32Bit]; + return STI->getFeatureBits()[X86::Mode32Bit]; } bool is16BitMode() const { - return STI.getFeatureBits()[X86::Mode16Bit]; + return STI->getFeatureBits()[X86::Mode16Bit]; } unsigned getPointerWidth() { @@ -437,7 +429,7 @@ void X86AddressSanitizer::InstrumentMOV(const MCInst &Inst, } void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op, - MVT::SimpleValueType VT, + unsigned Size, unsigned Reg, MCContext &Ctx, MCStreamer &Out) { int64_t Displacement = 0; @@ -450,14 +442,14 @@ void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op, // Emit Op as is. if (Displacement == 0) { - EmitLEA(Op, VT, Reg, Out); + EmitLEA(Op, Size, Reg, Out); return; } int64_t Residue; std::unique_ptr NewOp = AddDisplacement(Op, Displacement, Ctx, &Residue); - EmitLEA(*NewOp, VT, Reg, Out); + EmitLEA(*NewOp, Size, Reg, Out); while (Residue != 0) { const MCConstantExpr *Disp = @@ -465,7 +457,7 @@ void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op, std::unique_ptr DispOp = X86Operand::CreateMem(getPointerWidth(), 0, Disp, Reg, 0, 1, SMLoc(), SMLoc()); - EmitLEA(*DispOp, VT, Reg, Out); + EmitLEA(*DispOp, Size, Reg, Out); Residue -= Disp->getValue(); } } @@ -503,16 +495,16 @@ class X86AddressSanitizer32 : public X86AddressSanitizer { public: static const long kShadowOffset = 0x20000000; - X86AddressSanitizer32(const MCSubtargetInfo &STI) + X86AddressSanitizer32(const MCSubtargetInfo *&STI) : X86AddressSanitizer(STI) {} - virtual ~X86AddressSanitizer32() {} + ~X86AddressSanitizer32() override {} unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) { unsigned FrameReg = GetFrameRegGeneric(Ctx, Out); if (FrameReg == X86::NoRegister) return FrameReg; - return getX86SubSuperRegister(FrameReg, MVT::i32); + return getX86SubSuperRegister(FrameReg, 32); } void SpillReg(MCStreamer &Out, unsigned Reg) { @@ -535,10 +527,10 @@ public: OrigSPOffset += 4; } - virtual void InstrumentMemOperandPrologue(const RegisterContext &RegCtx, - MCContext &Ctx, - MCStreamer &Out) override { - unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i32); + void InstrumentMemOperandPrologue(const RegisterContext &RegCtx, + MCContext &Ctx, + MCStreamer &Out) override { + unsigned LocalFrameReg = RegCtx.ChooseFrameReg(32); assert(LocalFrameReg != X86::NoRegister); const MCRegisterInfo *MRI = Ctx.getRegisterInfo(); @@ -558,24 +550,24 @@ public: MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */)); } - SpillReg(Out, RegCtx.AddressReg(MVT::i32)); - SpillReg(Out, RegCtx.ShadowReg(MVT::i32)); - if (RegCtx.ScratchReg(MVT::i32) != X86::NoRegister) - SpillReg(Out, RegCtx.ScratchReg(MVT::i32)); + SpillReg(Out, RegCtx.AddressReg(32)); + SpillReg(Out, RegCtx.ShadowReg(32)); + if (RegCtx.ScratchReg(32) != X86::NoRegister) + SpillReg(Out, RegCtx.ScratchReg(32)); StoreFlags(Out); } - virtual void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx, - MCContext &Ctx, - MCStreamer &Out) override { - unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i32); + void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx, + MCContext &Ctx, + MCStreamer &Out) override { + unsigned LocalFrameReg = RegCtx.ChooseFrameReg(32); assert(LocalFrameReg != X86::NoRegister); RestoreFlags(Out); - if (RegCtx.ScratchReg(MVT::i32) != X86::NoRegister) - RestoreReg(Out, RegCtx.ScratchReg(MVT::i32)); - RestoreReg(Out, RegCtx.ShadowReg(MVT::i32)); - RestoreReg(Out, RegCtx.AddressReg(MVT::i32)); + if (RegCtx.ScratchReg(32) != X86::NoRegister) + RestoreReg(Out, RegCtx.ScratchReg(32)); + RestoreReg(Out, RegCtx.ShadowReg(32)); + RestoreReg(Out, RegCtx.AddressReg(32)); unsigned FrameReg = GetFrameReg(Ctx, Out); if (Ctx.getRegisterInfo() && FrameReg != X86::NoRegister) { @@ -586,18 +578,18 @@ public: } } - virtual void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize, - bool IsWrite, - const RegisterContext &RegCtx, - MCContext &Ctx, - MCStreamer &Out) override; - virtual void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize, - bool IsWrite, - const RegisterContext &RegCtx, - MCContext &Ctx, - MCStreamer &Out) override; - virtual void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx, - MCStreamer &Out) override; + void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize, + bool IsWrite, + const RegisterContext &RegCtx, + MCContext &Ctx, + MCStreamer &Out) override; + void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize, + bool IsWrite, + const RegisterContext &RegCtx, + MCContext &Ctx, + MCStreamer &Out) override; + void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx, + MCStreamer &Out) override; private: void EmitCallAsanReport(unsigned AccessSize, bool IsWrite, MCContext &Ctx, @@ -610,10 +602,11 @@ private: .addReg(X86::ESP) .addImm(-16)); EmitInstruction( - Out, MCInstBuilder(X86::PUSH32r).addReg(RegCtx.AddressReg(MVT::i32))); + Out, MCInstBuilder(X86::PUSH32r).addReg(RegCtx.AddressReg(32))); - const std::string &Fn = FuncName(AccessSize, IsWrite); - MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn)); + MCSymbol *FnSym = Ctx.getOrCreateSymbol(llvm::Twine("__asan_report_") + + (IsWrite ? "store" : "load") + + llvm::Twine(AccessSize)); const MCSymbolRefExpr *FnExpr = MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx); EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FnExpr)); @@ -623,14 +616,14 @@ private: void X86AddressSanitizer32::InstrumentMemOperandSmall( X86Operand &Op, unsigned AccessSize, bool IsWrite, const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) { - unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32); - unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32); - unsigned ShadowRegI8 = RegCtx.ShadowReg(MVT::i8); + unsigned AddressRegI32 = RegCtx.AddressReg(32); + unsigned ShadowRegI32 = RegCtx.ShadowReg(32); + unsigned ShadowRegI8 = RegCtx.ShadowReg(8); - assert(RegCtx.ScratchReg(MVT::i32) != X86::NoRegister); - unsigned ScratchRegI32 = RegCtx.ScratchReg(MVT::i32); + assert(RegCtx.ScratchReg(32) != X86::NoRegister); + unsigned ScratchRegI32 = RegCtx.ScratchReg(32); - ComputeMemOperandAddress(Op, MVT::i32, AddressRegI32, Ctx, Out); + ComputeMemOperandAddress(Op, 32, AddressRegI32, Ctx, Out); EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ShadowRegI32).addReg( AddressRegI32)); @@ -673,7 +666,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall( std::unique_ptr Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1, SMLoc(), SMLoc())); - EmitLEA(*Op, MVT::i32, ScratchRegI32, Out); + EmitLEA(*Op, 32, ScratchRegI32, Out); break; } case 4: @@ -698,10 +691,10 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall( void X86AddressSanitizer32::InstrumentMemOperandLarge( X86Operand &Op, unsigned AccessSize, bool IsWrite, const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) { - unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32); - unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32); + unsigned AddressRegI32 = RegCtx.AddressReg(32); + unsigned ShadowRegI32 = RegCtx.ShadowReg(32); - ComputeMemOperandAddress(Op, MVT::i32, AddressRegI32, Ctx, Out); + ComputeMemOperandAddress(Op, 32, AddressRegI32, Ctx, Out); EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ShadowRegI32).addReg( AddressRegI32)); @@ -760,16 +753,16 @@ class X86AddressSanitizer64 : public X86AddressSanitizer { public: static const long kShadowOffset = 0x7fff8000; - X86AddressSanitizer64(const MCSubtargetInfo &STI) + X86AddressSanitizer64(const MCSubtargetInfo *&STI) : X86AddressSanitizer(STI) {} - virtual ~X86AddressSanitizer64() {} + ~X86AddressSanitizer64() override {} unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) { unsigned FrameReg = GetFrameRegGeneric(Ctx, Out); if (FrameReg == X86::NoRegister) return FrameReg; - return getX86SubSuperRegister(FrameReg, MVT::i64); + return getX86SubSuperRegister(FrameReg, 64); } void SpillReg(MCStreamer &Out, unsigned Reg) { @@ -792,10 +785,10 @@ public: OrigSPOffset += 8; } - virtual void InstrumentMemOperandPrologue(const RegisterContext &RegCtx, - MCContext &Ctx, - MCStreamer &Out) override { - unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i64); + void InstrumentMemOperandPrologue(const RegisterContext &RegCtx, + MCContext &Ctx, + MCStreamer &Out) override { + unsigned LocalFrameReg = RegCtx.ChooseFrameReg(64); assert(LocalFrameReg != X86::NoRegister); const MCRegisterInfo *MRI = Ctx.getRegisterInfo(); @@ -816,24 +809,24 @@ public: } EmitAdjustRSP(Ctx, Out, -128); - SpillReg(Out, RegCtx.ShadowReg(MVT::i64)); - SpillReg(Out, RegCtx.AddressReg(MVT::i64)); - if (RegCtx.ScratchReg(MVT::i64) != X86::NoRegister) - SpillReg(Out, RegCtx.ScratchReg(MVT::i64)); + SpillReg(Out, RegCtx.ShadowReg(64)); + SpillReg(Out, RegCtx.AddressReg(64)); + if (RegCtx.ScratchReg(64) != X86::NoRegister) + SpillReg(Out, RegCtx.ScratchReg(64)); StoreFlags(Out); } - virtual void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx, - MCContext &Ctx, - MCStreamer &Out) override { - unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i64); + void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx, + MCContext &Ctx, + MCStreamer &Out) override { + unsigned LocalFrameReg = RegCtx.ChooseFrameReg(64); assert(LocalFrameReg != X86::NoRegister); RestoreFlags(Out); - if (RegCtx.ScratchReg(MVT::i64) != X86::NoRegister) - RestoreReg(Out, RegCtx.ScratchReg(MVT::i64)); - RestoreReg(Out, RegCtx.AddressReg(MVT::i64)); - RestoreReg(Out, RegCtx.ShadowReg(MVT::i64)); + if (RegCtx.ScratchReg(64) != X86::NoRegister) + RestoreReg(Out, RegCtx.ScratchReg(64)); + RestoreReg(Out, RegCtx.AddressReg(64)); + RestoreReg(Out, RegCtx.ShadowReg(64)); EmitAdjustRSP(Ctx, Out, 128); unsigned FrameReg = GetFrameReg(Ctx, Out); @@ -845,18 +838,18 @@ public: } } - virtual void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize, - bool IsWrite, - const RegisterContext &RegCtx, - MCContext &Ctx, - MCStreamer &Out) override; - virtual void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize, - bool IsWrite, - const RegisterContext &RegCtx, - MCContext &Ctx, - MCStreamer &Out) override; - virtual void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx, - MCStreamer &Out) override; + void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize, + bool IsWrite, + const RegisterContext &RegCtx, + MCContext &Ctx, + MCStreamer &Out) override; + void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize, + bool IsWrite, + const RegisterContext &RegCtx, + MCContext &Ctx, + MCStreamer &Out) override; + void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx, + MCStreamer &Out) override; private: void EmitAdjustRSP(MCContext &Ctx, MCStreamer &Out, long Offset) { @@ -864,7 +857,7 @@ private: std::unique_ptr Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, X86::RSP, 0, 1, SMLoc(), SMLoc())); - EmitLEA(*Op, MVT::i64, X86::RSP, Out); + EmitLEA(*Op, 64, X86::RSP, Out); OrigSPOffset += Offset; } @@ -878,12 +871,13 @@ private: .addReg(X86::RSP) .addImm(-16)); - if (RegCtx.AddressReg(MVT::i64) != X86::RDI) { + if (RegCtx.AddressReg(64) != X86::RDI) { EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(X86::RDI).addReg( - RegCtx.AddressReg(MVT::i64))); + RegCtx.AddressReg(64))); } - const std::string &Fn = FuncName(AccessSize, IsWrite); - MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn)); + MCSymbol *FnSym = Ctx.getOrCreateSymbol(llvm::Twine("__asan_report_") + + (IsWrite ? "store" : "load") + + llvm::Twine(AccessSize)); const MCSymbolRefExpr *FnExpr = MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx); EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FnExpr)); @@ -893,16 +887,16 @@ private: void X86AddressSanitizer64::InstrumentMemOperandSmall( X86Operand &Op, unsigned AccessSize, bool IsWrite, const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) { - unsigned AddressRegI64 = RegCtx.AddressReg(MVT::i64); - unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32); - unsigned ShadowRegI64 = RegCtx.ShadowReg(MVT::i64); - unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32); - unsigned ShadowRegI8 = RegCtx.ShadowReg(MVT::i8); + unsigned AddressRegI64 = RegCtx.AddressReg(64); + unsigned AddressRegI32 = RegCtx.AddressReg(32); + unsigned ShadowRegI64 = RegCtx.ShadowReg(64); + unsigned ShadowRegI32 = RegCtx.ShadowReg(32); + unsigned ShadowRegI8 = RegCtx.ShadowReg(8); - assert(RegCtx.ScratchReg(MVT::i32) != X86::NoRegister); - unsigned ScratchRegI32 = RegCtx.ScratchReg(MVT::i32); + assert(RegCtx.ScratchReg(32) != X86::NoRegister); + unsigned ScratchRegI32 = RegCtx.ScratchReg(32); - ComputeMemOperandAddress(Op, MVT::i64, AddressRegI64, Ctx, Out); + ComputeMemOperandAddress(Op, 64, AddressRegI64, Ctx, Out); EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(ShadowRegI64).addReg( AddressRegI64)); @@ -944,7 +938,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall( std::unique_ptr Op( X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1, SMLoc(), SMLoc())); - EmitLEA(*Op, MVT::i32, ScratchRegI32, Out); + EmitLEA(*Op, 32, ScratchRegI32, Out); break; } case 4: @@ -969,10 +963,10 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall( void X86AddressSanitizer64::InstrumentMemOperandLarge( X86Operand &Op, unsigned AccessSize, bool IsWrite, const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) { - unsigned AddressRegI64 = RegCtx.AddressReg(MVT::i64); - unsigned ShadowRegI64 = RegCtx.ShadowReg(MVT::i64); + unsigned AddressRegI64 = RegCtx.AddressReg(64); + unsigned ShadowRegI64 = RegCtx.ShadowReg(64); - ComputeMemOperandAddress(Op, MVT::i64, AddressRegI64, Ctx, Out); + ComputeMemOperandAddress(Op, 64, AddressRegI64, Ctx, Out); EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(ShadowRegI64).addReg( AddressRegI64)); @@ -1030,7 +1024,7 @@ void X86AddressSanitizer64::InstrumentMOVSImpl(unsigned AccessSize, } // End anonymous namespace -X86AsmInstrumentation::X86AsmInstrumentation(const MCSubtargetInfo &STI) +X86AsmInstrumentation::X86AsmInstrumentation(const MCSubtargetInfo *&STI) : STI(STI), InitialFrameReg(0) {} X86AsmInstrumentation::~X86AsmInstrumentation() {} @@ -1043,7 +1037,7 @@ void X86AsmInstrumentation::InstrumentAndEmitInstruction( void X86AsmInstrumentation::EmitInstruction(MCStreamer &Out, const MCInst &Inst) { - Out.EmitInstruction(Inst, STI); + Out.EmitInstruction(Inst, *STI); } unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx, @@ -1067,17 +1061,17 @@ unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx, X86AsmInstrumentation * CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions, - const MCContext &Ctx, const MCSubtargetInfo &STI) { - Triple T(STI.getTargetTriple()); + const MCContext &Ctx, const MCSubtargetInfo *&STI) { + Triple T(STI->getTargetTriple()); const bool hasCompilerRTSupport = T.isOSLinux(); if (ClAsanInstrumentAssembly && hasCompilerRTSupport && MCOptions.SanitizeAddress) { - if (STI.getFeatureBits()[X86::Mode32Bit] != 0) + if (STI->getFeatureBits()[X86::Mode32Bit] != 0) return new X86AddressSanitizer32(STI); - if (STI.getFeatureBits()[X86::Mode64Bit] != 0) + if (STI->getFeatureBits()[X86::Mode64Bit] != 0) return new X86AddressSanitizer64(STI); } return new X86AsmInstrumentation(STI); } -} // End llvm namespace +} // end llvm namespace diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h index 19ebcc44f61e..470ceadb0aa6 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h @@ -28,7 +28,8 @@ class X86AsmInstrumentation; X86AsmInstrumentation * CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions, - const MCContext &Ctx, const MCSubtargetInfo &STI); + const MCContext &Ctx, + const MCSubtargetInfo *&STI); class X86AsmInstrumentation { public: @@ -48,15 +49,16 @@ public: protected: friend X86AsmInstrumentation * CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions, - const MCContext &Ctx, const MCSubtargetInfo &STI); + const MCContext &Ctx, + const MCSubtargetInfo *&STI); - X86AsmInstrumentation(const MCSubtargetInfo &STI); + X86AsmInstrumentation(const MCSubtargetInfo *&STI); unsigned GetFrameRegGeneric(const MCContext &Ctx, MCStreamer &Out); void EmitInstruction(MCStreamer &Out, const MCInst &Inst); - const MCSubtargetInfo &STI; + const MCSubtargetInfo *&STI; unsigned InitialFrameReg; }; diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index bca059d8c383..4d8ffac1a82b 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -11,7 +11,6 @@ #include "X86AsmInstrumentation.h" #include "X86AsmParserCommon.h" #include "X86Operand.h" -#include "X86ISelLowering.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" @@ -26,6 +25,7 @@ #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" @@ -57,10 +57,10 @@ static const char OpPrecedence[] = { }; class X86AsmParser : public MCTargetAsmParser { - MCSubtargetInfo &STI; const MCInstrInfo &MII; ParseInstructionInfo *InstInfo; std::unique_ptr Instrumentation; + private: SMLoc consumeToken() { MCAsmParser &Parser = getParser(); @@ -154,6 +154,7 @@ private: // Push the new operator. InfixOperatorStack.push_back(Op); } + int64_t execute() { // Push any remaining operators onto the postfix stack. while (!InfixOperatorStack.empty()) { @@ -268,6 +269,7 @@ private: bool StopOnLBrac, AddImmPrefix; InfixCalculator IC; InlineAsmIdentifierInfo Info; + public: IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) : State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0), @@ -712,10 +714,10 @@ private: SMLoc End, unsigned Size, StringRef Identifier, InlineAsmIdentifierInfo &Info); + bool parseDirectiveEven(SMLoc L); bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); - bool validateInstruction(MCInst &Inst, const OperandVector &Ops); bool processInstruction(MCInst &Inst, const OperandVector &Ops); /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds @@ -758,23 +760,24 @@ private: bool is64BitMode() const { // FIXME: Can tablegen auto-generate this? - return STI.getFeatureBits()[X86::Mode64Bit]; + return getSTI().getFeatureBits()[X86::Mode64Bit]; } bool is32BitMode() const { // FIXME: Can tablegen auto-generate this? - return STI.getFeatureBits()[X86::Mode32Bit]; + return getSTI().getFeatureBits()[X86::Mode32Bit]; } bool is16BitMode() const { // FIXME: Can tablegen auto-generate this? - return STI.getFeatureBits()[X86::Mode16Bit]; + return getSTI().getFeatureBits()[X86::Mode16Bit]; } void SwitchMode(unsigned mode) { + MCSubtargetInfo &STI = copySTI(); FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit}); FeatureBitset OldMode = STI.getFeatureBits() & AllModes; unsigned FB = ComputeAvailableFeatures( STI.ToggleFeature(OldMode.flip(mode))); setAvailableFeatures(FB); - + assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes)); } @@ -798,12 +801,12 @@ private: /// } public: - X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser, + X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser, const MCInstrInfo &mii, const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) { + : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr) { // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); Instrumentation.reset( CreateX86AsmInstrumentation(Options, Parser.getContext(), STI)); } @@ -912,6 +915,11 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, if (RegNo == 0) RegNo = MatchRegisterName(Tok.getString().lower()); + // The "flags" register cannot be referenced directly. + // Treat it as an identifier instead. + if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS) + RegNo = 0; + if (!is64BitMode()) { // FIXME: This should be done using Requires and // Requires so "eiz" usage in 64-bit instructions can be also @@ -1042,8 +1050,11 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) { .Cases("BYTE", "byte", 8) .Cases("WORD", "word", 16) .Cases("DWORD", "dword", 32) + .Cases("FWORD", "fword", 48) .Cases("QWORD", "qword", 64) + .Cases("MMWORD","mmword", 64) .Cases("XWORD", "xword", 80) + .Cases("TBYTE", "tbyte", 80) .Cases("XMMWORD", "xmmword", 128) .Cases("YMMWORD", "ymmword", 256) .Cases("ZMMWORD", "zmmword", 512) @@ -1062,8 +1073,8 @@ std::unique_ptr X86AsmParser::CreateMemForInlineAsm( // Insert an explicit size if the user didn't have one. if (!Size) { Size = getPointerWidth(); - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start, - /*Len=*/0, Size)); + InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start, + /*Len=*/0, Size); } // Create an absolute memory reference in order to match against @@ -1082,8 +1093,8 @@ std::unique_ptr X86AsmParser::CreateMemForInlineAsm( if (!Size) { Size = Info.Type * 8; // Size is in terms of bits in this context. if (Size) - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start, - /*Len=*/0, Size)); + InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start, + /*Len=*/0, Size); } } @@ -1097,13 +1108,13 @@ std::unique_ptr X86AsmParser::CreateMemForInlineAsm( } static void -RewriteIntelBracExpression(SmallVectorImpl *AsmRewrites, +RewriteIntelBracExpression(SmallVectorImpl &AsmRewrites, StringRef SymName, int64_t ImmDisp, int64_t FinalImmDisp, SMLoc &BracLoc, SMLoc &StartInBrac, SMLoc &End) { // Remove the '[' and ']' from the IR string. - AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1)); - AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1)); + AsmRewrites.emplace_back(AOK_Skip, BracLoc, 1); + AsmRewrites.emplace_back(AOK_Skip, End, 1); // If ImmDisp is non-zero, then we parsed a displacement before the // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp]) @@ -1114,15 +1125,14 @@ RewriteIntelBracExpression(SmallVectorImpl *AsmRewrites, // We have an immediate displacement before the bracketed expression. // Adjust this to match the final immediate displacement. bool Found = false; - for (SmallVectorImpl::iterator I = AsmRewrites->begin(), - E = AsmRewrites->end(); I != E; ++I) { - if ((*I).Loc.getPointer() > BracLoc.getPointer()) + for (AsmRewrite &AR : AsmRewrites) { + if (AR.Loc.getPointer() > BracLoc.getPointer()) continue; - if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) { + if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) { assert (!Found && "ImmDisp already rewritten."); - (*I).Kind = AOK_Imm; - (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer(); - (*I).Val = FinalImmDisp; + AR.Kind = AOK_Imm; + AR.Len = BracLoc.getPointer() - AR.Loc.getPointer(); + AR.Val = FinalImmDisp; Found = true; break; } @@ -1133,28 +1143,27 @@ RewriteIntelBracExpression(SmallVectorImpl *AsmRewrites, // We have a symbolic and an immediate displacement, but no displacement // before the bracketed expression. Put the immediate displacement // before the bracketed expression. - AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp)); + AsmRewrites.emplace_back(AOK_Imm, BracLoc, 0, FinalImmDisp); } } // Remove all the ImmPrefix rewrites within the brackets. - for (SmallVectorImpl::iterator I = AsmRewrites->begin(), - E = AsmRewrites->end(); I != E; ++I) { - if ((*I).Loc.getPointer() < StartInBrac.getPointer()) + for (AsmRewrite &AR : AsmRewrites) { + if (AR.Loc.getPointer() < StartInBrac.getPointer()) continue; - if ((*I).Kind == AOK_ImmPrefix) - (*I).Kind = AOK_Delete; + if (AR.Kind == AOK_ImmPrefix) + AR.Kind = AOK_Delete; } const char *SymLocPtr = SymName.data(); // Skip everything before the symbol. if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) { assert(Len > 0 && "Expected a non-negative length."); - AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len)); + AsmRewrites.emplace_back(AOK_Skip, StartInBrac, Len); } // Skip everything after the symbol. if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) { SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size()); assert(Len > 0 && "Expected a non-negative length."); - AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len)); + AsmRewrites.emplace_back(AOK_Skip, Loc, Len); } } @@ -1162,6 +1171,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); + AsmToken::TokenKind PrevTK = AsmToken::Error; bool Done = false; while (!Done) { bool UpdateLocLex = true; @@ -1205,7 +1215,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { return Error(Tok.getLoc(), "Unexpected identifier!"); } else { // This is a dot operator, not an adjacent identifier. - if (Identifier.find('.') != StringRef::npos) { + if (Identifier.find('.') != StringRef::npos && + PrevTK == AsmToken::RBrac) { return false; } else { InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); @@ -1223,8 +1234,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { case AsmToken::Integer: { StringRef ErrMsg; if (isParsingInlineAsm() && SM.getAddImmPrefix()) - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, - Tok.getLoc())); + InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Tok.getLoc()); // Look for 'b' or 'f' following an Integer as a directional label SMLoc Loc = getTok().getLoc(); int64_t IntVal = getTok().getIntVal(); @@ -1237,7 +1247,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b"); MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; const MCExpr *Val = - MCSymbolRefExpr::create(Sym, Variant, getContext()); + MCSymbolRefExpr::create(Sym, Variant, getContext()); if (IDVal == "b" && Sym->isUndefined()) return Error(Loc, "invalid reference to undefined symbol"); StringRef Identifier = Sym->getName(); @@ -1275,6 +1285,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { if (!Done && UpdateLocLex) End = consumeToken(); + + PrevTK = TK; } return false; } @@ -1302,7 +1314,7 @@ X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, // A symbolic displacement. Disp = Sym; if (isParsingInlineAsm()) - RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(), + RewriteIntelBracExpression(*InstInfo->AsmRewrites, SM.getSymName(), ImmDisp, SM.getImm(), BracLoc, StartInBrac, End); } @@ -1359,7 +1371,7 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, InlineAsmIdentifierInfo &Info, bool IsUnevaluatedOperand, SMLoc &End) { MCAsmParser &Parser = getParser(); - assert (isParsingInlineAsm() && "Expected to be parsing inline assembly."); + assert(isParsingInlineAsm() && "Expected to be parsing inline assembly."); Val = nullptr; StringRef LineBuf(Identifier.data()); @@ -1372,15 +1384,17 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, // Advance the token stream until the end of the current token is // after the end of what the frontend claimed. const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); - while (true) { + do { End = Tok.getEndLoc(); getLexer().Lex(); - - assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?"); - if (End.getPointer() == EndPtr) break; - } + } while (End.getPointer() < EndPtr); Identifier = LineBuf; + // The frontend should end parsing on an assembler token boundary, unless it + // failed parsing. + assert((End.getPointer() == EndPtr || !Result) && + "frontend claimed part of a token?"); + // If the identifier lookup was unsuccessful, assume that we are dealing with // a label. if (!Result) { @@ -1389,9 +1403,8 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, Loc, false); assert(InternalName.size() && "We should have an internal name here."); // Push a rewrite for replacing the identifier name with the internal name. - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc, - Identifier.size(), - InternalName)); + InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(), + InternalName); } // Create the symbol reference. @@ -1418,8 +1431,7 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, AsmToken ImmDispToken = Parser.Lex(); // Eat the integer. if (isParsingInlineAsm()) - InstInfo->AsmRewrites->push_back( - AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc())); + InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, ImmDispToken.getLoc()); if (getLexer().isNot(AsmToken::LBrac)) { // An immediate following a 'segment register', 'colon' token sequence can @@ -1588,8 +1600,7 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); unsigned Len = DotDispStr.size(); unsigned Val = OrigDispVal + DotDispVal; - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len, - Val)); + InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, Val); } NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext()); @@ -1613,7 +1624,7 @@ std::unique_ptr X86AsmParser::ParseIntelOffsetOfOperator() { return nullptr; // Don't emit the offset operator. - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7)); + InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7); // The offset operator will have an 'r' constraint, thus we need to create // register operand to ensure proper matching. Just pick a GPR based on @@ -1664,7 +1675,7 @@ std::unique_ptr X86AsmParser::ParseIntelOperator(unsigned OpKind) { // Rewrite the type operator and the C or C++ type or variable in terms of an // immediate. E.g. TYPE foo -> $$4 unsigned Len = End.getPointer() - TypeLoc.getPointer(); - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal)); + InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal); const MCExpr *Imm = MCConstantExpr::create(CVal, getContext()); return X86Operand::CreateImm(Imm, Start, End); @@ -1688,12 +1699,14 @@ std::unique_ptr X86AsmParser::ParseIntelOperand() { return ParseIntelOperator(IOK_TYPE); } + bool PtrInOperand = false; unsigned Size = getIntelMemOperandSize(Tok.getString()); if (Size) { Parser.Lex(); // Eat operand size (e.g., byte, word). if (Tok.getString() != "PTR" && Tok.getString() != "ptr") return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!"); Parser.Lex(); // Eat ptr. + PtrInOperand = true; } Start = Tok.getLoc(); @@ -1711,10 +1724,10 @@ std::unique_ptr X86AsmParser::ParseIntelOperand() { unsigned Len = Tok.getLoc().getPointer() - Start.getPointer(); if (StartTok.getString().size() == Len) // Just add a prefix if this wasn't a complex immediate expression. - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start)); + InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start); else // Otherwise, rewrite the complex expression as a single immediate. - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm)); + InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm); } if (getLexer().isNot(AsmToken::LBrac)) { @@ -1740,7 +1753,7 @@ std::unique_ptr X86AsmParser::ParseIntelOperand() { } // rounding mode token - if (STI.getFeatureBits()[X86::FeatureAVX512] && + if (getSTI().getFeatureBits()[X86::FeatureAVX512] && getLexer().is(AsmToken::LCurly)) return ParseRoundingModeOp(Start, End); @@ -1749,9 +1762,16 @@ std::unique_ptr X86AsmParser::ParseIntelOperand() { if (!ParseRegister(RegNo, Start, End)) { // If this is a segment register followed by a ':', then this is the start // of a segment override, otherwise this is a normal register reference. - if (getLexer().isNot(AsmToken::Colon)) + // In case it is a normal register and there is ptr in the operand this + // is an error + if (getLexer().isNot(AsmToken::Colon)){ + if (PtrInOperand){ + return ErrorOperand(Start, "expected memory operand after " + "'ptr', found register operand instead"); + } return X86Operand::CreateReg(RegNo, Start, End); - + } + return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size); } @@ -1798,7 +1818,7 @@ std::unique_ptr X86AsmParser::ParseATTOperand() { } case AsmToken::LCurly:{ SMLoc Start = Parser.getTok().getLoc(), End; - if (STI.getFeatureBits()[X86::FeatureAVX512]) + if (getSTI().getFeatureBits()[X86::FeatureAVX512]) return ParseRoundingModeOp(Start, End); return ErrorOperand(Start, "unknown token in expression"); } @@ -1808,7 +1828,7 @@ std::unique_ptr X86AsmParser::ParseATTOperand() { bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands, const MCParsedAsmOperand &Op) { MCAsmParser &Parser = getParser(); - if(STI.getFeatureBits()[X86::FeatureAVX512]) { + if(getSTI().getFeatureBits()[X86::FeatureAVX512]) { if (getLexer().is(AsmToken::LCurly)) { // Eat "{" and mark the current place. const SMLoc consumedToken = consumeToken(); @@ -1983,12 +2003,13 @@ std::unique_ptr X86AsmParser::ParseMemOperand(unsigned SegReg, } // Validate the scale amount. - if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && + if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && ScaleVal != 1) { Error(Loc, "scale factor in 16-bit address must be 1"); return nullptr; - } - if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ + } + if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && + ScaleVal != 8) { Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); return nullptr; } @@ -2175,7 +2196,6 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, Name == "repne" || Name == "repnz" || Name == "rex64" || Name == "data16"; - // This does the actual operand parsing. Don't parse any more if we have a // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we // just want to parse the "lock" as the first instruction and the "incl" as @@ -2213,6 +2233,20 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, (isPrefix && getLexer().is(AsmToken::Slash))) Parser.Lex(); + // This is for gas compatibility and cannot be done in td. + // Adding "p" for some floating point with no argument. + // For example: fsub --> fsubp + bool IsFp = + Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr"; + if (IsFp && Operands.size() == 1) { + const char *Repl = StringSwitch(Name) + .Case("fsub", "fsubp") + .Case("fdiv", "fdivp") + .Case("fsubr", "fsubrp") + .Case("fdivr", "fdivrp"); + static_cast(*Operands[0]).setTokenValue(Repl); + } + // This is a terrible hack to handle "out[bwl]? %al, (%dx)" -> // "outb %al, %dx". Out doesn't take a memory form, but this is a widely // documented form in various unofficial manuals, so a lot of code uses it. @@ -2242,9 +2276,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // Append default arguments to "ins[bwld]" if (Name.startswith("ins") && Operands.size() == 1 && - (Name == "insb" || Name == "insw" || Name == "insl" || - Name == "insd" )) { - AddDefaultSrcDestOperands(Operands, + (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd")) { + AddDefaultSrcDestOperands(Operands, X86Operand::CreateReg(X86::DX, NameLoc, NameLoc), DefaultMemDIOperand(NameLoc)); } @@ -2346,98 +2379,21 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // instalias with an immediate operand yet. if (Name == "int" && Operands.size() == 2) { X86Operand &Op1 = static_cast(*Operands[1]); - if (Op1.isImm() && isa(Op1.getImm()) && - cast(Op1.getImm())->getValue() == 3) { - Operands.erase(Operands.begin() + 1); - static_cast(*Operands[0]).setTokenValue("int3"); - } + if (Op1.isImm()) + if (auto *CE = dyn_cast(Op1.getImm())) + if (CE->getValue() == 3) { + Operands.erase(Operands.begin() + 1); + static_cast(*Operands[0]).setTokenValue("int3"); + } } return false; } -static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg, - bool isCmp) { - MCInst TmpInst; - TmpInst.setOpcode(Opcode); - if (!isCmp) - TmpInst.addOperand(MCOperand::createReg(Reg)); - TmpInst.addOperand(MCOperand::createReg(Reg)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; -} - -static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode, - bool isCmp = false) { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - return convertToSExti8(Inst, Opcode, X86::AX, isCmp); -} - -static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode, - bool isCmp = false) { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - return convertToSExti8(Inst, Opcode, X86::EAX, isCmp); -} - -static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode, - bool isCmp = false) { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - return convertToSExti8(Inst, Opcode, X86::RAX, isCmp); -} - -bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { - switch (Inst.getOpcode()) { - default: return true; - case X86::INT: - X86Operand &Op = static_cast(*Ops[1]); - assert(Op.isImm() && "expected immediate"); - int64_t Res; - if (!Op.getImm()->evaluateAsAbsolute(Res) || Res > 255) { - Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]"); - return false; - } - return true; - } - llvm_unreachable("handle the instruction appropriately"); -} - bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { switch (Inst.getOpcode()) { default: return false; - case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8); - case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8); - case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8); - case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8); - case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8); - case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8); - case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8); - case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8); - case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8); - case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true); - case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true); - case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true); - case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8); - case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8); - case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8); - case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8); - case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8); - case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8); - case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8); - case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8); - case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8); - case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8); - case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8); - case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8); + case X86::VMOVZPQILo2PQIrr: case X86::VMOVAPDrr: case X86::VMOVAPDYrr: case X86::VMOVAPSrr: @@ -2457,18 +2413,19 @@ bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("Invalid opcode"); - case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; - case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; - case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; - case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; - case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; - case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; - case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; - case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; - case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; - case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; - case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; - case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; + case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break; + case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; + case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; + case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; + case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; + case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; + case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; + case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; + case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; + case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; + case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; + case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; + case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; } Inst.setOpcode(NewOpc); return true; @@ -2573,9 +2530,6 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, isParsingIntelSyntax())) { default: llvm_unreachable("Unexpected match result!"); case Match_Success: - if (!validateInstruction(Inst, Operands)) - return true; - // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the // individual transformations can chain off each other. @@ -2819,9 +2773,6 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, unsigned NumSuccessfulMatches = std::count(std::begin(Match), std::end(Match), Match_Success); if (NumSuccessfulMatches == 1) { - if (!validateInstruction(Inst, Operands)) - return true; - // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the individual // transformations can chain off each other. @@ -2898,10 +2849,29 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { "a '%' prefix in .intel_syntax"); } return false; - } + } else if (IDVal == ".even") + return parseDirectiveEven(DirectiveID.getLoc()); return true; } +/// parseDirectiveEven +/// ::= .even +bool X86AsmParser::parseDirectiveEven(SMLoc L) { + const MCSection *Section = getStreamer().getCurrentSection().first; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + TokError("unexpected token in directive"); + return false; + } + if (!Section) { + getStreamer().InitSections(false); + Section = getStreamer().getCurrentSection().first; + } + if (Section->UseCodeAlign()) + getStreamer().EmitCodeAlignment(2, 0); + else + getStreamer().EmitValueToAlignment(2, 0, 1, 0); + return false; +} /// ParseDirectiveWord /// ::= .word [ expression (, expression)* ] bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { @@ -2909,10 +2879,19 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; + SMLoc ExprLoc = getLexer().getLoc(); if (getParser().parseExpression(Value)) return false; - getParser().getStreamer().EmitValue(Value, Size); + if (const auto *MCE = dyn_cast(Value)) { + assert(Size <= 8 && "Invalid size"); + uint64_t IntValue = MCE->getValue(); + if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue)) + return Error(ExprLoc, "literal value out of range for directive"); + getStreamer().EmitIntValue(IntValue, Size); + } else { + getStreamer().EmitValue(Value, Size, ExprLoc); + } if (getLexer().is(AsmToken::EndOfStatement)) break; diff --git a/lib/Target/X86/AsmParser/X86AsmParserCommon.h b/lib/Target/X86/AsmParser/X86AsmParserCommon.h index 7610806c4578..54538c804a03 100644 --- a/lib/Target/X86/AsmParser/X86AsmParserCommon.h +++ b/lib/Target/X86/AsmParser/X86AsmParserCommon.h @@ -13,30 +13,25 @@ namespace llvm { inline bool isImmSExti16i8Value(uint64_t Value) { - return (( Value <= 0x000000000000007FULL)|| - (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| - (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + return isInt<8>(Value) || + (isUInt<16>(Value) && isInt<8>(static_cast(Value))); } inline bool isImmSExti32i8Value(uint64_t Value) { - return (( Value <= 0x000000000000007FULL)|| - (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| - (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + return isInt<8>(Value) || + (isUInt<32>(Value) && isInt<8>(static_cast(Value))); } inline bool isImmSExti64i8Value(uint64_t Value) { - return (( Value <= 0x000000000000007FULL)|| - (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + return isInt<8>(Value); } inline bool isImmSExti64i32Value(uint64_t Value) { - return (( Value <= 0x000000007FFFFFFFULL)|| - (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + return isInt<32>(Value); } inline bool isImmUnsignedi8Value(uint64_t Value) { - return (( Value <= 0x00000000000000FFULL)|| - (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + return isUInt<8>(Value) || isInt<8>(Value); } } // End of namespace llvm diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index fba2c280d200..b23f5c353013 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -34,18 +34,9 @@ set(sources X86VZeroUpper.cpp X86FixupLEAs.cpp X86WinEHState.cpp + X86OptimizeLEAs.cpp ) -if( CMAKE_CL_64 ) - enable_language(ASM_MASM) - ADD_CUSTOM_COMMAND( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj - MAIN_DEPENDENCY X86CompilationCallback_Win64.asm - COMMAND ${CMAKE_ASM_MASM_COMPILER} /nologo /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm - ) - set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj) -endif() - add_llvm_target(X86CodeGen ${sources}) add_subdirectory(AsmParser) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index cfc3ee2fb08f..ce8fcf164668 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -95,11 +95,13 @@ X86GenericDisassembler::X86GenericDisassembler( llvm_unreachable("Invalid CPU mode"); } +namespace { struct Region { ArrayRef Bytes; uint64_t Base; Region(ArrayRef Bytes, uint64_t Base) : Bytes(Bytes), Base(Base) {} }; +} // end anonymous namespace /// A callback function that wraps the readByte method from Region. /// @@ -831,8 +833,12 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, case TYPE_XMM256: case TYPE_XMM512: case TYPE_VK1: + case TYPE_VK2: + case TYPE_VK4: case TYPE_VK8: case TYPE_VK16: + case TYPE_VK32: + case TYPE_VK64: case TYPE_DEBUGREG: case TYPE_CONTROLREG: case TYPE_BNDR: @@ -962,6 +968,7 @@ static bool translateInstruction(MCInst &mcInst, return true; } + mcInst.clear(); mcInst.setOpcode(insn.instructionID); // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 // prefix bytes should be disassembled as xrelease and xacquire then set the diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index f73fa75f888e..040143b15587 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -361,7 +361,7 @@ static int readPrefixes(struct InternalInstruction* insn) { * then it should be disassembled as a xacquire/xrelease not repne/rep. */ if ((byte == 0xf2 || byte == 0xf3) && - ((nextByte == 0xf0) | + ((nextByte == 0xf0) || ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) insn->xAcquireRelease = true; /* @@ -980,6 +980,47 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { insn->opcode == 0xE3) attrMask ^= ATTR_ADSIZE; + /* + * In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix + * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes + */ + + if (insn->mode == MODE_64BIT && + isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) { + switch (insn->opcode) { + case 0xE8: + case 0xE9: + // Take care of psubsb and other mmx instructions. + if (insn->opcodeType == ONEBYTE) { + attrMask ^= ATTR_OPSIZE; + insn->immediateSize = 4; + insn->displacementSize = 4; + } + break; + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + // Take care of lea and three byte ops. + if (insn->opcodeType == TWOBYTE) { + attrMask ^= ATTR_OPSIZE; + insn->immediateSize = 4; + insn->displacementSize = 4; + } + break; + } + } + if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; @@ -1447,8 +1488,12 @@ static int readModRM(struct InternalInstruction* insn) { case TYPE_XMM: \ return prefix##_XMM0 + index; \ case TYPE_VK1: \ + case TYPE_VK2: \ + case TYPE_VK4: \ case TYPE_VK8: \ case TYPE_VK16: \ + case TYPE_VK32: \ + case TYPE_VK64: \ if (index > 7) \ *valid = 0; \ return prefix##_K0 + index; \ diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index a79a923ac525..28a628e5066b 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -572,8 +572,6 @@ struct InternalInstruction { // The last byte of the opcode, not counting any ModR/M extension uint8_t opcode; - // The ModR/M byte of the instruction, if it is an opcode extension - uint8_t modRMExtension; // decode state diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index ea727e6e82fb..b4c0bc4cd4d9 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index 62b6b73e7864..bbb309076610 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -15,12 +15,9 @@ #define LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCSubtargetInfo.h" namespace llvm { -class MCOperand; - class X86ATTInstPrinter final : public MCInstPrinter { public: X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 91b144a44824..82f0ee5a5ebc 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -21,6 +21,27 @@ using namespace llvm; +static unsigned getVectorRegSize(unsigned RegNo) { + if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31) + return 512; + if (X86::YMM0 <= RegNo && RegNo <= X86::YMM31) + return 256; + if (X86::XMM0 <= RegNo && RegNo <= X86::XMM31) + return 128; + if (X86::MM0 <= RegNo && RegNo <= X86::MM7) + return 64; + + llvm_unreachable("Unknown vector reg!"); + return 0; +} + +static MVT getRegOperandVectorVT(const MCInst *MI, const MVT &ScalarVT, + unsigned OperandIndex) { + unsigned OpReg = MI->getOperand(OperandIndex).getReg(); + return MVT::getVectorVT(ScalarVT, + getVectorRegSize(OpReg)/ScalarVT.getSizeInBits()); +} + /// \brief Extracts the src/dst types for a given zero extension instruction. /// \note While the number of elements in DstVT type correct, the /// number in the SrcVT type is expanded to fill the src xmm register and the @@ -107,6 +128,75 @@ static void getZeroExtensionTypes(const MCInst *MI, MVT &SrcVT, MVT &DstVT) { } } +#define CASE_MASK_INS_COMMON(Inst, Suffix, src) \ + case X86::V##Inst##Suffix##src: \ + case X86::V##Inst##Suffix##src##k: \ + case X86::V##Inst##Suffix##src##kz: + +#define CASE_SSE_INS_COMMON(Inst, src) \ + case X86::Inst##src: + +#define CASE_AVX_INS_COMMON(Inst, Suffix, src) \ + case X86::V##Inst##Suffix##src: + +#define CASE_MOVDUP(Inst, src) \ + CASE_MASK_INS_COMMON(Inst, Z, r##src) \ + CASE_MASK_INS_COMMON(Inst, Z256, r##src) \ + CASE_MASK_INS_COMMON(Inst, Z128, r##src) \ + CASE_AVX_INS_COMMON(Inst, , r##src) \ + CASE_AVX_INS_COMMON(Inst, Y, r##src) \ + CASE_SSE_INS_COMMON(Inst, r##src) \ + +#define CASE_UNPCK(Inst, src) \ + CASE_MASK_INS_COMMON(Inst, Z, r##src) \ + CASE_MASK_INS_COMMON(Inst, Z256, r##src) \ + CASE_MASK_INS_COMMON(Inst, Z128, r##src) \ + CASE_AVX_INS_COMMON(Inst, , r##src) \ + CASE_AVX_INS_COMMON(Inst, Y, r##src) \ + CASE_SSE_INS_COMMON(Inst, r##src) \ + +#define CASE_SHUF(Inst, src) \ + CASE_MASK_INS_COMMON(Inst, Z, r##src##i) \ + CASE_MASK_INS_COMMON(Inst, Z256, r##src##i) \ + CASE_MASK_INS_COMMON(Inst, Z128, r##src##i) \ + CASE_AVX_INS_COMMON(Inst, , r##src##i) \ + CASE_AVX_INS_COMMON(Inst, Y, r##src##i) \ + CASE_SSE_INS_COMMON(Inst, r##src##i) \ + +#define CASE_VPERM(Inst, src) \ + CASE_MASK_INS_COMMON(Inst, Z, src##i) \ + CASE_MASK_INS_COMMON(Inst, Z256, src##i) \ + CASE_MASK_INS_COMMON(Inst, Z128, src##i) \ + CASE_AVX_INS_COMMON(Inst, , src##i) \ + CASE_AVX_INS_COMMON(Inst, Y, src##i) \ + +#define CASE_VSHUF(Inst, src) \ + CASE_MASK_INS_COMMON(SHUFF##Inst, Z, r##src##i) \ + CASE_MASK_INS_COMMON(SHUFI##Inst, Z, r##src##i) \ + CASE_MASK_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \ + CASE_MASK_INS_COMMON(SHUFI##Inst, Z256, r##src##i) \ + +/// \brief Extracts the types and if it has memory operand for a given +/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) instruction. +static void getVSHUF64x2FamilyInfo(const MCInst *MI, MVT &VT, bool &HasMemOp) { + HasMemOp = false; + switch (MI->getOpcode()) { + default: + llvm_unreachable("Unknown VSHUF64x2 family instructions."); + break; + CASE_VSHUF(64X2, m) + HasMemOp = true; // FALL THROUGH. + CASE_VSHUF(64X2, r) + VT = getRegOperandVectorVT(MI, MVT::i64, 0); + break; + CASE_VSHUF(32X4, m) + HasMemOp = true; // FALL THROUGH. + CASE_VSHUF(32X4, r) + VT = getRegOperandVectorVT(MI, MVT::i32, 0); + break; + } +} + //===----------------------------------------------------------------------===// // Top Level Entrypoint //===----------------------------------------------------------------------===// @@ -127,23 +217,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::BLENDPDrri: case X86::VBLENDPDrri: + case X86::VBLENDPDYrri: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::BLENDPDrmi: case X86::VBLENDPDrmi: - if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodeBLENDMask(MVT::v2f64, - MI->getOperand(MI->getNumOperands() - 1).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - break; - case X86::VBLENDPDYrri: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. case X86::VBLENDPDYrmi: if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodeBLENDMask(MVT::v4f64, + DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::f64, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -152,23 +233,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::BLENDPSrri: case X86::VBLENDPSrri: + case X86::VBLENDPSYrri: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::BLENDPSrmi: case X86::VBLENDPSrmi: - if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodeBLENDMask(MVT::v4f32, - MI->getOperand(MI->getNumOperands() - 1).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - break; - case X86::VBLENDPSYrri: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. case X86::VBLENDPSYrmi: if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodeBLENDMask(MVT::v8f32, + DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::f32, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -177,23 +249,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::PBLENDWrri: case X86::VPBLENDWrri: + case X86::VPBLENDWYrri: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PBLENDWrmi: case X86::VPBLENDWrmi: - if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodeBLENDMask(MVT::v8i16, - MI->getOperand(MI->getNumOperands() - 1).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - break; - case X86::VPBLENDWYrri: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. case X86::VPBLENDWYrmi: if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodeBLENDMask(MVT::v16i16, + DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::i16, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -201,23 +264,13 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, break; case X86::VPBLENDDrri: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VPBLENDDrmi: - if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodeBLENDMask(MVT::v4i32, - MI->getOperand(MI->getNumOperands() - 1).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - break; - case X86::VPBLENDDYrri: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. + case X86::VPBLENDDrmi: case X86::VPBLENDDYrmi: if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodeBLENDMask(MVT::v8i32, + DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::i32, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -239,6 +292,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::MOVLHPSrr: case X86::VMOVLHPSrr: + case X86::VMOVLHPSZrr: Src2Name = getRegName(MI->getOperand(2).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); @@ -247,569 +301,327 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::MOVHLPSrr: case X86::VMOVHLPSrr: + case X86::VMOVHLPSZrr: Src2Name = getRegName(MI->getOperand(2).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); DecodeMOVHLPSMask(2, ShuffleMask); break; - case X86::MOVSLDUPrr: - case X86::VMOVSLDUPrr: - Src1Name = getRegName(MI->getOperand(1).getReg()); + CASE_MOVDUP(MOVSLDUP, r) + Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg()); // FALL THROUGH. - case X86::MOVSLDUPrm: - case X86::VMOVSLDUPrm: + CASE_MOVDUP(MOVSLDUP, m) DestName = getRegName(MI->getOperand(0).getReg()); - DecodeMOVSLDUPMask(MVT::v4f32, ShuffleMask); + DecodeMOVSLDUPMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask); break; - case X86::VMOVSHDUPYrr: - Src1Name = getRegName(MI->getOperand(1).getReg()); + CASE_MOVDUP(MOVSHDUP, r) + Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg()); // FALL THROUGH. - case X86::VMOVSHDUPYrm: + CASE_MOVDUP(MOVSHDUP, m) DestName = getRegName(MI->getOperand(0).getReg()); - DecodeMOVSHDUPMask(MVT::v8f32, ShuffleMask); + DecodeMOVSHDUPMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask); break; - case X86::VMOVSLDUPYrr: - Src1Name = getRegName(MI->getOperand(1).getReg()); + CASE_MOVDUP(MOVDDUP, r) + Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg()); // FALL THROUGH. - case X86::VMOVSLDUPYrm: + CASE_MOVDUP(MOVDDUP, m) DestName = getRegName(MI->getOperand(0).getReg()); - DecodeMOVSLDUPMask(MVT::v8f32, ShuffleMask); - break; - - case X86::MOVSHDUPrr: - case X86::VMOVSHDUPrr: - Src1Name = getRegName(MI->getOperand(1).getReg()); - // FALL THROUGH. - case X86::MOVSHDUPrm: - case X86::VMOVSHDUPrm: - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask); - break; - - case X86::VMOVDDUPYrr: - Src1Name = getRegName(MI->getOperand(1).getReg()); - // FALL THROUGH. - case X86::VMOVDDUPYrm: - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeMOVDDUPMask(MVT::v4f64, ShuffleMask); - break; - - case X86::MOVDDUPrr: - case X86::VMOVDDUPrr: - Src1Name = getRegName(MI->getOperand(1).getReg()); - // FALL THROUGH. - case X86::MOVDDUPrm: - case X86::VMOVDDUPrm: - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeMOVDDUPMask(MVT::v2f64, ShuffleMask); + DecodeMOVDDUPMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask); break; case X86::PSLLDQri: case X86::VPSLLDQri: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodePSLLDQMask(MVT::v16i8, - MI->getOperand(MI->getNumOperands() - 1).getImm(), - ShuffleMask); - break; - case X86::VPSLLDQYri: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodePSLLDQMask(MVT::v32i8, + DecodePSLLDQMask(getRegOperandVectorVT(MI, MVT::i8, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), ShuffleMask); break; case X86::PSRLDQri: case X86::VPSRLDQri: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodePSRLDQMask(MVT::v16i8, - MI->getOperand(MI->getNumOperands() - 1).getImm(), - ShuffleMask); - break; - case X86::VPSRLDQYri: Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodePSRLDQMask(MVT::v32i8, + DecodePSRLDQMask(getRegOperandVectorVT(MI, MVT::i8, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), ShuffleMask); break; case X86::PALIGNR128rr: case X86::VPALIGNR128rr: + case X86::VPALIGNR256rr: Src1Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PALIGNR128rm: case X86::VPALIGNR128rm: - Src2Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodePALIGNRMask(MVT::v16i8, - MI->getOperand(MI->getNumOperands() - 1).getImm(), - ShuffleMask); - break; - case X86::VPALIGNR256rr: - Src1Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. case X86::VPALIGNR256rm: Src2Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodePALIGNRMask(MVT::v32i8, + DecodePALIGNRMask(getRegOperandVectorVT(MI, MVT::i8, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), ShuffleMask); break; case X86::PSHUFDri: case X86::VPSHUFDri: + case X86::VPSHUFDYri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::PSHUFDmi: case X86::VPSHUFDmi: - DestName = getRegName(MI->getOperand(0).getReg()); - if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodePSHUFMask(MVT::v4i32, - MI->getOperand(MI->getNumOperands() - 1).getImm(), - ShuffleMask); - break; - case X86::VPSHUFDYri: - Src1Name = getRegName(MI->getOperand(1).getReg()); - // FALL THROUGH. case X86::VPSHUFDYmi: DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodePSHUFMask(MVT::v8i32, + DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::i32, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), ShuffleMask); break; case X86::PSHUFHWri: case X86::VPSHUFHWri: + case X86::VPSHUFHWYri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::PSHUFHWmi: case X86::VPSHUFHWmi: - DestName = getRegName(MI->getOperand(0).getReg()); - if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodePSHUFHWMask(MVT::v8i16, - MI->getOperand(MI->getNumOperands() - 1).getImm(), - ShuffleMask); - break; - case X86::VPSHUFHWYri: - Src1Name = getRegName(MI->getOperand(1).getReg()); - // FALL THROUGH. case X86::VPSHUFHWYmi: DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodePSHUFHWMask(MVT::v16i16, + DecodePSHUFHWMask(getRegOperandVectorVT(MI, MVT::i16, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), ShuffleMask); break; + case X86::PSHUFLWri: case X86::VPSHUFLWri: + case X86::VPSHUFLWYri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::PSHUFLWmi: case X86::VPSHUFLWmi: - DestName = getRegName(MI->getOperand(0).getReg()); - if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodePSHUFLWMask(MVT::v8i16, - MI->getOperand(MI->getNumOperands() - 1).getImm(), - ShuffleMask); - break; - case X86::VPSHUFLWYri: - Src1Name = getRegName(MI->getOperand(1).getReg()); - // FALL THROUGH. case X86::VPSHUFLWYmi: DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodePSHUFLWMask(MVT::v16i16, + DecodePSHUFLWMask(getRegOperandVectorVT(MI, MVT::i16, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), ShuffleMask); break; - case X86::PUNPCKHBWrr: - case X86::VPUNPCKHBWrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHBWrm: - case X86::VPUNPCKHBWrm: + case X86::MMX_PSHUFWri: Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v16i8, ShuffleMask); - break; - case X86::VPUNPCKHBWYrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. - case X86::VPUNPCKHBWYrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); + case X86::MMX_PSHUFWmi: DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v32i8, ShuffleMask); - break; - case X86::PUNPCKHWDrr: - case X86::VPUNPCKHWDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHWDrm: - case X86::VPUNPCKHWDrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v8i16, ShuffleMask); - break; - case X86::VPUNPCKHWDYrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VPUNPCKHWDYrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v16i16, ShuffleMask); - break; - case X86::PUNPCKHDQrr: - case X86::VPUNPCKHDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHDQrm: - case X86::VPUNPCKHDQrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v4i32, ShuffleMask); - break; - case X86::VPUNPCKHDQYrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VPUNPCKHDQYrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v8i32, ShuffleMask); - break; - case X86::VPUNPCKHDQZrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VPUNPCKHDQZrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v16i32, ShuffleMask); - break; - case X86::PUNPCKHQDQrr: - case X86::VPUNPCKHQDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHQDQrm: - case X86::VPUNPCKHQDQrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v2i64, ShuffleMask); - break; - case X86::VPUNPCKHQDQYrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VPUNPCKHQDQYrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v4i64, ShuffleMask); - break; - case X86::VPUNPCKHQDQZrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VPUNPCKHQDQZrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKHMask(MVT::v8i64, ShuffleMask); - break; - - case X86::PUNPCKLBWrr: - case X86::VPUNPCKLBWrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLBWrm: - case X86::VPUNPCKLBWrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v16i8, ShuffleMask); - break; - case X86::VPUNPCKLBWYrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VPUNPCKLBWYrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v32i8, ShuffleMask); - break; - case X86::PUNPCKLWDrr: - case X86::VPUNPCKLWDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLWDrm: - case X86::VPUNPCKLWDrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v8i16, ShuffleMask); - break; - case X86::VPUNPCKLWDYrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VPUNPCKLWDYrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v16i16, ShuffleMask); - break; - case X86::PUNPCKLDQrr: - case X86::VPUNPCKLDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLDQrm: - case X86::VPUNPCKLDQrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v4i32, ShuffleMask); - break; - case X86::VPUNPCKLDQYrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VPUNPCKLDQYrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v8i32, ShuffleMask); - break; - case X86::VPUNPCKLDQZrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VPUNPCKLDQZrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v16i32, ShuffleMask); - break; - case X86::PUNPCKLQDQrr: - case X86::VPUNPCKLQDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLQDQrm: - case X86::VPUNPCKLQDQrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v2i64, ShuffleMask); - break; - case X86::VPUNPCKLQDQYrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VPUNPCKLQDQYrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v4i64, ShuffleMask); - break; - case X86::VPUNPCKLQDQZrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VPUNPCKLQDQZrm: - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - DecodeUNPCKLMask(MVT::v8i64, ShuffleMask); - break; - - case X86::SHUFPDrri: - case X86::VSHUFPDrri: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::SHUFPDrmi: - case X86::VSHUFPDrmi: if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodeSHUFPMask(MVT::v2f64, + DecodePSHUFMask(MVT::v4i16, MI->getOperand(MI->getNumOperands() - 1).getImm(), ShuffleMask); - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); break; - case X86::VSHUFPDYrri: + + case X86::PSWAPDrr: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::PSWAPDrm: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePSWAPMask(MVT::v2i32, ShuffleMask); + break; + + CASE_UNPCK(PUNPCKHBW, r) + case X86::MMX_PUNPCKHBWirr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. - case X86::VSHUFPDYrmi: + CASE_UNPCK(PUNPCKHBW, m) + case X86::MMX_PUNPCKHBWirm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i8, 0), ShuffleMask); + break; + + CASE_UNPCK(PUNPCKHWD, r) + case X86::MMX_PUNPCKHWDirr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + CASE_UNPCK(PUNPCKHWD, m) + case X86::MMX_PUNPCKHWDirm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i16, 0), ShuffleMask); + break; + + CASE_UNPCK(PUNPCKHDQ, r) + case X86::MMX_PUNPCKHDQirr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + CASE_UNPCK(PUNPCKHDQ, m) + case X86::MMX_PUNPCKHDQirm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i32, 0), ShuffleMask); + break; + + CASE_UNPCK(PUNPCKHQDQ, r) + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + CASE_UNPCK(PUNPCKHQDQ, m) + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask); + break; + + CASE_UNPCK(PUNPCKLBW, r) + case X86::MMX_PUNPCKLBWirr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + CASE_UNPCK(PUNPCKLBW, m) + case X86::MMX_PUNPCKLBWirm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i8, 0), ShuffleMask); + break; + + CASE_UNPCK(PUNPCKLWD, r) + case X86::MMX_PUNPCKLWDirr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + CASE_UNPCK(PUNPCKLWD, m) + case X86::MMX_PUNPCKLWDirm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i16, 0), ShuffleMask); + break; + + CASE_UNPCK(PUNPCKLDQ, r) + case X86::MMX_PUNPCKLDQirr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + CASE_UNPCK(PUNPCKLDQ, m) + case X86::MMX_PUNPCKLDQirm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i32, 0), ShuffleMask); + break; + + CASE_UNPCK(PUNPCKLQDQ, r) + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + CASE_UNPCK(PUNPCKLQDQ, m) + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask); + break; + + CASE_SHUF(SHUFPD, r) + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + CASE_SHUF(SHUFPD, m) if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodeSHUFPMask(MVT::v4f64, + DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f64, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; - case X86::SHUFPSrri: - case X86::VSHUFPSrri: + CASE_SHUF(SHUFPS, r) Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. - case X86::SHUFPSrmi: - case X86::VSHUFPSrmi: + CASE_SHUF(SHUFPS, m) if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodeSHUFPMask(MVT::v4f32, - MI->getOperand(MI->getNumOperands() - 1).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - break; - case X86::VSHUFPSYrri: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VSHUFPSYrmi: - if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodeSHUFPMask(MVT::v8f32, + DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f32, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; - case X86::UNPCKLPDrr: - case X86::VUNPCKLPDrr: + CASE_VSHUF(64X2, r) + CASE_VSHUF(64X2, m) + CASE_VSHUF(32X4, r) + CASE_VSHUF(32X4, m) { + MVT VT; + bool HasMemOp; + unsigned NumOp = MI->getNumOperands(); + getVSHUF64x2FamilyInfo(MI, VT, HasMemOp); + decodeVSHUF64x2FamilyMask(VT, MI->getOperand(NumOp - 1).getImm(), + ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); + if (HasMemOp) { + assert((NumOp >= 8) && "Expected at least 8 operands!"); + Src1Name = getRegName(MI->getOperand(NumOp - 7).getReg()); + } else { + assert((NumOp >= 4) && "Expected at least 4 operands!"); + Src2Name = getRegName(MI->getOperand(NumOp - 2).getReg()); + Src1Name = getRegName(MI->getOperand(NumOp - 3).getReg()); + } + break; + } + + CASE_UNPCK(UNPCKLPD, r) Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. - case X86::UNPCKLPDrm: - case X86::VUNPCKLPDrm: - DecodeUNPCKLMask(MVT::v2f64, ShuffleMask); + CASE_UNPCK(UNPCKLPD, m) + DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; - case X86::VUNPCKLPDYrr: + + CASE_UNPCK(UNPCKLPS, r) Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. - case X86::VUNPCKLPDYrm: - DecodeUNPCKLMask(MVT::v4f64, ShuffleMask); + CASE_UNPCK(UNPCKLPS, m) + DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; - case X86::VUNPCKLPDZrr: + + CASE_UNPCK(UNPCKHPD, r) Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. - case X86::VUNPCKLPDZrm: - DecodeUNPCKLMask(MVT::v8f64, ShuffleMask); + CASE_UNPCK(UNPCKHPD, m) + DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; - case X86::UNPCKLPSrr: - case X86::VUNPCKLPSrr: + + CASE_UNPCK(UNPCKHPS, r) Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. - case X86::UNPCKLPSrm: - case X86::VUNPCKLPSrm: - DecodeUNPCKLMask(MVT::v4f32, ShuffleMask); + CASE_UNPCK(UNPCKHPS, m) + DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; - case X86::VUNPCKLPSYrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VUNPCKLPSYrm: - DecodeUNPCKLMask(MVT::v8f32, ShuffleMask); - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - break; - case X86::VUNPCKLPSZrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VUNPCKLPSZrm: - DecodeUNPCKLMask(MVT::v16f32, ShuffleMask); - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - break; - case X86::UNPCKHPDrr: - case X86::VUNPCKHPDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKHPDrm: - case X86::VUNPCKHPDrm: - DecodeUNPCKHMask(MVT::v2f64, ShuffleMask); - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - break; - case X86::VUNPCKHPDYrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VUNPCKHPDYrm: - DecodeUNPCKHMask(MVT::v4f64, ShuffleMask); - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - break; - case X86::VUNPCKHPDZrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VUNPCKHPDZrm: - DecodeUNPCKHMask(MVT::v8f64, ShuffleMask); - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - break; - case X86::UNPCKHPSrr: - case X86::VUNPCKHPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKHPSrm: - case X86::VUNPCKHPSrm: - DecodeUNPCKHMask(MVT::v4f32, ShuffleMask); - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - break; - case X86::VUNPCKHPSYrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VUNPCKHPSYrm: - DecodeUNPCKHMask(MVT::v8f32, ShuffleMask); - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - break; - case X86::VUNPCKHPSZrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::VUNPCKHPSZrm: - DecodeUNPCKHMask(MVT::v16f32, ShuffleMask); - Src1Name = getRegName(MI->getOperand(1).getReg()); - DestName = getRegName(MI->getOperand(0).getReg()); - break; - case X86::VPERMILPSri: + + CASE_VPERM(PERMILPS, r) Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. - case X86::VPERMILPSmi: + CASE_VPERM(PERMILPS, m) if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodePSHUFMask(MVT::v4f32, + DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f32, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; - case X86::VPERMILPSYri: + + CASE_VPERM(PERMILPD, r) Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. - case X86::VPERMILPSYmi: + CASE_VPERM(PERMILPD, m) if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodePSHUFMask(MVT::v8f32, - MI->getOperand(MI->getNumOperands() - 1).getImm(), - ShuffleMask); - DestName = getRegName(MI->getOperand(0).getReg()); - break; - case X86::VPERMILPDri: - Src1Name = getRegName(MI->getOperand(1).getReg()); - // FALL THROUGH. - case X86::VPERMILPDmi: - if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodePSHUFMask(MVT::v2f64, - MI->getOperand(MI->getNumOperands() - 1).getImm(), - ShuffleMask); - DestName = getRegName(MI->getOperand(0).getReg()); - break; - case X86::VPERMILPDYri: - Src1Name = getRegName(MI->getOperand(1).getReg()); - // FALL THROUGH. - case X86::VPERMILPDYmi: - if (MI->getOperand(MI->getNumOperands() - 1).isImm()) - DecodePSHUFMask(MVT::v4f64, + DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f64, 0), MI->getOperand(MI->getNumOperands() - 1).getImm(), ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; + case X86::VPERM2F128rr: case X86::VPERM2I128rr: Src2Name = getRegName(MI->getOperand(2).getReg()); @@ -824,6 +636,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; + case X86::VPERMQYri: case X86::VPERMPDYri: Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -846,6 +659,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeScalarMoveMask(MVT::v2f64, nullptr == Src2Name, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; + case X86::MOVSSrr: case X86::VMOVSSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); @@ -861,6 +675,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::MOVZPQILo2PQIrr: case X86::VMOVPQI2QIrr: case X86::VMOVZPQILo2PQIrr: + case X86::VMOVZPQILo2PQIZrr: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::MOVQI2PQIrm: @@ -869,9 +684,11 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::VMOVQI2PQIrm: case X86::VMOVZQI2PQIrm: case X86::VMOVZPQILo2PQIrm: + case X86::VMOVZPQILo2PQIZrm: DecodeZeroMoveLowMask(MVT::v2i64, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; + case X86::MOVDI2PDIrm: case X86::VMOVDI2PDIrm: DecodeZeroMoveLowMask(MVT::v4i32, ShuffleMask); diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index 6e371da37290..20cd7ffb2e63 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -19,8 +19,6 @@ namespace llvm { -class MCOperand; - class X86IntelInstPrinter final : public MCInstPrinter { public: X86IntelInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 629802f5dc5e..133bd0e1772a 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -69,15 +69,19 @@ public: class X86AsmBackend : public MCAsmBackend { const StringRef CPU; bool HasNopl; - const uint64_t MaxNopLength; + uint64_t MaxNopLength; public: - X86AsmBackend(const Target &T, StringRef CPU) - : MCAsmBackend(), CPU(CPU), MaxNopLength(CPU == "slm" ? 7 : 15) { + X86AsmBackend(const Target &T, StringRef CPU) : MCAsmBackend(), CPU(CPU) { HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" && CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" && CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" && CPU != "geode" && CPU != "winchip-c6" && CPU != "winchip2" && CPU != "c3" && CPU != "c3-2"; + // Max length of true long nop instruction is 15 bytes. + // Max length of long nop replacement instruction is 7 bytes. + // Taking into account SilverMont architecture features max length of nops + // is reduced for it to achieve better performance. + MaxNopLength = (!HasNopl || CPU == "slm") ? 7 : 15; } unsigned getNumFixupKinds() const override { @@ -200,6 +204,14 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) { case X86::ADD64ri8: return X86::ADD64ri32; case X86::ADD64mi8: return X86::ADD64mi32; + // ADC + case X86::ADC16ri8: return X86::ADC16ri; + case X86::ADC16mi8: return X86::ADC16mi; + case X86::ADC32ri8: return X86::ADC32ri; + case X86::ADC32mi8: return X86::ADC32mi; + case X86::ADC64ri8: return X86::ADC64ri32; + case X86::ADC64mi8: return X86::ADC64mi32; + // SUB case X86::SUB16ri8: return X86::SUB16ri; case X86::SUB16mi8: return X86::SUB16mi; @@ -208,6 +220,14 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) { case X86::SUB64ri8: return X86::SUB64ri32; case X86::SUB64mi8: return X86::SUB64mi32; + // SBB + case X86::SBB16ri8: return X86::SBB16ri; + case X86::SBB16mi8: return X86::SBB16mi; + case X86::SBB32ri8: return X86::SBB32ri; + case X86::SBB32mi8: return X86::SBB32mi; + case X86::SBB64ri8: return X86::SBB64ri32; + case X86::SBB64mi8: return X86::SBB64mi32; + // CMP case X86::CMP16ri8: return X86::CMP16ri; case X86::CMP16mi8: return X86::CMP16mi; @@ -279,7 +299,7 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { /// bytes. /// \return - true on success, false on failure bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { - static const uint8_t Nops[10][10] = { + static const uint8_t TrueNops[10][10] = { // nop {0x90}, // xchg %ax,%ax @@ -302,17 +322,31 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, }; - // This CPU doesn't support long nops. If needed add more. - // FIXME: Can we get this from the subtarget somehow? - // FIXME: We could generated something better than plain 0x90. - if (!HasNopl) { - for (uint64_t i = 0; i < Count; ++i) - OW->write8(0x90); - return true; - } + // Alternative nop instructions for CPUs which don't support long nops. + static const uint8_t AltNops[7][10] = { + // nop + {0x90}, + // xchg %ax,%ax + {0x66, 0x90}, + // lea 0x0(%esi),%esi + {0x8d, 0x76, 0x00}, + // lea 0x0(%esi),%esi + {0x8d, 0x74, 0x26, 0x00}, + // nop + lea 0x0(%esi),%esi + {0x90, 0x8d, 0x74, 0x26, 0x00}, + // lea 0x0(%esi),%esi + {0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00 }, + // lea 0x0(%esi),%esi + {0x8d, 0xb4, 0x26, 0x00, 0x00, 0x00, 0x00}, + }; - // 15 is the longest single nop instruction. Emit as many 15-byte nops as - // needed, then emit a nop of the remaining length. + // Select the right NOP table. + // FIXME: Can we get if CPU supports long nops from the subtarget somehow? + const uint8_t (*Nops)[10] = HasNopl ? TrueNops : AltNops; + assert(HasNopl || MaxNopLength <= 7); + + // Emit as many largest nops as needed, then emit a nop of the remaining + // length. do { const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength); const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; @@ -359,6 +393,17 @@ public: } }; +class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend { +public: + ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) + : ELFX86AsmBackend(T, OSABI, CPU) {} + + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, + ELF::EM_IAMCU); + } +}; + class ELFX86_64AsmBackend : public ELFX86AsmBackend { public: ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) @@ -610,13 +655,13 @@ private: /// \brief Get the compact unwind number for a given register. The number /// corresponds to the enum lists in compact_unwind_encoding.h. int getCompactUnwindRegNum(unsigned Reg) const { - static const uint16_t CU32BitRegs[7] = { + static const MCPhysReg CU32BitRegs[7] = { X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 }; - static const uint16_t CU64BitRegs[] = { + static const MCPhysReg CU64BitRegs[] = { X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 }; - const uint16_t *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; + const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; for (int Idx = 1; *CURegs; ++CURegs, ++Idx) if (*CURegs == Reg) return Idx; @@ -780,6 +825,10 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, return new WindowsX86AsmBackend(T, false, CPU); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); + + if (TheTriple.isOSIAMCU()) + return new ELFX86_IAMCUAsmBackend(T, OSABI, CPU); + return new ELFX86_32AsmBackend(T, OSABI, CPU); } diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index f0d00b0c1bc3..9ff85b9154f8 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -41,6 +41,16 @@ namespace X86 { /// AddrNumOperands - Total number of operands in a memory reference. AddrNumOperands = 5 }; + + /// AVX512 static rounding constants. These need to match the values in + /// avx512fintrin.h. + enum STATIC_ROUNDING { + TO_NEAREST_INT = 0, + TO_NEG_INF = 1, + TO_POS_INF = 2, + TO_ZERO = 3, + CUR_DIRECTION = 4 + }; } // end namespace X86; /// X86II - This namespace holds all of the target specific flags that @@ -675,7 +685,7 @@ namespace X86II { case X86II::RawFrmSrc: case X86II::RawFrmDst: case X86II::RawFrmDstSrc: - return -1; + return -1; case X86II::MRMDestMem: return 0; case X86II::MRMSrcMem: @@ -696,23 +706,27 @@ namespace X86II { // Start from 0, skip registers encoded in VEX_VVVV or a mask register. return 0 + HasVEX_4V + HasEVEX_K; case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2: - case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8: + case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C5: + case X86II::MRM_C6: case X86II::MRM_C7: case X86II::MRM_C8: case X86II::MRM_C9: case X86II::MRM_CA: case X86II::MRM_CB: + case X86II::MRM_CC: case X86II::MRM_CD: case X86II::MRM_CE: case X86II::MRM_CF: case X86II::MRM_D0: case X86II::MRM_D1: - case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6: - case X86II::MRM_D7: case X86II::MRM_D8: case X86II::MRM_D9: - case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC: - case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF: - case X86II::MRM_E0: case X86II::MRM_E1: case X86II::MRM_E2: - case X86II::MRM_E3: case X86II::MRM_E4: case X86II::MRM_E5: - case X86II::MRM_E8: case X86II::MRM_E9: case X86II::MRM_EA: - case X86II::MRM_EB: case X86II::MRM_EC: case X86II::MRM_ED: - case X86II::MRM_EE: case X86II::MRM_F0: case X86II::MRM_F1: - case X86II::MRM_F2: case X86II::MRM_F3: case X86II::MRM_F4: - case X86II::MRM_F5: case X86II::MRM_F6: case X86II::MRM_F7: - case X86II::MRM_F8: case X86II::MRM_F9: case X86II::MRM_FA: - case X86II::MRM_FB: case X86II::MRM_FC: case X86II::MRM_FD: - case X86II::MRM_FE: case X86II::MRM_FF: + case X86II::MRM_D2: case X86II::MRM_D3: case X86II::MRM_D4: + case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D7: + case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA: + case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD: + case X86II::MRM_DE: case X86II::MRM_DF: case X86II::MRM_E0: + case X86II::MRM_E1: case X86II::MRM_E2: case X86II::MRM_E3: + case X86II::MRM_E4: case X86II::MRM_E5: case X86II::MRM_E6: + case X86II::MRM_E7: case X86II::MRM_E8: case X86II::MRM_E9: + case X86II::MRM_EA: case X86II::MRM_EB: case X86II::MRM_EC: + case X86II::MRM_ED: case X86II::MRM_EE: case X86II::MRM_EF: + case X86II::MRM_F0: case X86II::MRM_F1: case X86II::MRM_F2: + case X86II::MRM_F3: case X86II::MRM_F4: case X86II::MRM_F5: + case X86II::MRM_F6: case X86II::MRM_F7: case X86II::MRM_F8: + case X86II::MRM_F9: case X86II::MRM_FA: case X86II::MRM_FB: + case X86II::MRM_FC: case X86II::MRM_FD: case X86II::MRM_FE: + case X86II::MRM_FF: return -1; } } @@ -740,7 +754,7 @@ namespace X86II { case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11: case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15: - return true; + return true; } return false; } diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index a33468dc4769..736c39dfb6f1 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -32,9 +32,11 @@ namespace { X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine) - : MCELFObjectTargetWriter(IsELF64, OSABI, EMachine, - // Only i386 uses Rel instead of RelA. - /*HasRelocationAddend*/ EMachine != ELF::EM_386) {} + : MCELFObjectTargetWriter(IsELF64, OSABI, EMachine, + // Only i386 and IAMCU use Rel instead of RelA. + /*HasRelocationAddend*/ + (EMachine != ELF::EM_386) && + (EMachine != ELF::EM_IAMCU)) {} X86ELFObjectWriter::~X86ELFObjectWriter() {} @@ -246,7 +248,8 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, if (getEMachine() == ELF::EM_X86_64) return getRelocType64(Modifier, Type, IsPCRel); - assert(getEMachine() == ELF::EM_386 && "Unsupported ELF machine type."); + assert((getEMachine() == ELF::EM_386 || getEMachine() == ELF::EM_IAMCU) && + "Unsupported ELF machine type."); return getRelocType32(Modifier, getType32(Type), IsPCRel); } diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h index deaad2a5b8e8..30d5c802d1ed 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h @@ -20,39 +20,42 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { - class Triple; +class Triple; - class X86MCAsmInfoDarwin : public MCAsmInfoDarwin { - virtual void anchor(); +class X86MCAsmInfoDarwin : public MCAsmInfoDarwin { + virtual void anchor(); - public: - explicit X86MCAsmInfoDarwin(const Triple &Triple); - }; +public: + explicit X86MCAsmInfoDarwin(const Triple &Triple); +}; - struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin { - explicit X86_64MCAsmInfoDarwin(const Triple &Triple); - const MCExpr * - getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding, - MCStreamer &Streamer) const override; - }; +struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin { + explicit X86_64MCAsmInfoDarwin(const Triple &Triple); + const MCExpr * + getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding, + MCStreamer &Streamer) const override; +}; - class X86ELFMCAsmInfo : public MCAsmInfoELF { - void anchor() override; - public: - explicit X86ELFMCAsmInfo(const Triple &Triple); - }; +class X86ELFMCAsmInfo : public MCAsmInfoELF { + void anchor() override; - class X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft { - void anchor() override; - public: - explicit X86MCAsmInfoMicrosoft(const Triple &Triple); - }; +public: + explicit X86ELFMCAsmInfo(const Triple &Triple); +}; - class X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF { - void anchor() override; - public: - explicit X86MCAsmInfoGNUCOFF(const Triple &Triple); - }; +class X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft { + void anchor() override; + +public: + explicit X86MCAsmInfoMicrosoft(const Triple &Triple); +}; + +class X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF { + void anchor() override; + +public: + explicit X86MCAsmInfoGNUCOFF(const Triple &Triple); +}; } // namespace llvm #endif diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 10c434c8b1b4..dfab6ec10775 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -510,8 +510,8 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Otherwise, emit the most general non-SIB encoding: [REG+disp32] EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS); - EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS, - Fixups); + EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte), + CurByte, OS, Fixups); return; } @@ -988,6 +988,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, const MCInstrDesc &Desc) { unsigned REX = 0; + bool UsesHighByteReg = false; + if (TSFlags & X86II::REX_W) REX |= 1 << 3; // set REX.W @@ -1004,6 +1006,8 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, const MCOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); + if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH) + UsesHighByteReg = true; if (!X86II::isX86_64NonExtLowByteReg(Reg)) continue; // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything // that returns non-zero. @@ -1073,6 +1077,9 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, } break; } + if (REX && UsesHighByteReg) + report_fatal_error("Cannot encode high byte register in REX-prefixed instruction"); + return REX; } diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 83b4091d7665..53a6550acdd5 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -122,7 +122,8 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI, } else if (TheTriple.isOSBinFormatELF()) { // Force the use of an ELF container. MAI = new X86ELFMCAsmInfo(TheTriple); - } else if (TheTriple.isWindowsMSVCEnvironment()) { + } else if (TheTriple.isWindowsMSVCEnvironment() || + TheTriple.isWindowsCoreCLREnvironment()) { MAI = new X86MCAsmInfoMicrosoft(TheTriple); } else if (TheTriple.isOSCygMing() || TheTriple.isWindowsItaniumEnvironment()) { @@ -267,3 +268,184 @@ extern "C" void LLVMInitializeX86TargetMC() { TargetRegistry::RegisterMCAsmBackend(TheX86_64Target, createX86_64AsmBackend); } + +unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size, + bool High) { + switch (Size) { + default: return 0; + case 8: + if (High) { + switch (Reg) { + default: return getX86SubSuperRegisterOrZero(Reg, 64); + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::SI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::DI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::BP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::SP; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::AH; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::DH; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::CH; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::BH; + } + } else { + switch (Reg) { + default: return 0; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::AL; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::DL; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::CL; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::BL; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::SIL; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::DIL; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::BPL; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::SPL; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8B; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9B; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10B; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11B; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12B; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13B; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14B; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15B; + } + } + case 16: + switch (Reg) { + default: return 0; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::AX; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::DX; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::CX; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::BX; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::SI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::DI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::BP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::SP; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8W; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9W; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10W; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11W; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12W; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13W; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14W; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15W; + } + case 32: + switch (Reg) { + default: return 0; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::EAX; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::EDX; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::ECX; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::EBX; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::ESI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::EDI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::EBP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::ESP; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8D; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9D; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10D; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11D; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12D; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13D; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14D; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15D; + } + case 64: + switch (Reg) { + default: return 0; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::RAX; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::RDX; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::RCX; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::RBX; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::RSI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::RDI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::RBP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::RSP; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15; + } + } +} + +unsigned llvm::getX86SubSuperRegister(unsigned Reg, unsigned Size, bool High) { + unsigned Res = getX86SubSuperRegisterOrZero(Reg, Size, High); + assert(Res != 0 && "Unexpected register or VT"); + return Res; +} + + diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 6221baba1793..2d2836ff07c5 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -79,7 +79,7 @@ MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, /// Takes ownership of \p AB and \p CE. MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, raw_pwrite_stream &OS, MCCodeEmitter *CE, - bool RelaxAll); + bool RelaxAll, bool IncrementalLinkerCompatible); /// Construct an X86 Mach-O object writer. MCObjectWriter *createX86MachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, @@ -98,6 +98,17 @@ MCRelocationInfo *createX86_64MachORelocationInfo(MCContext &Ctx); /// Construct X86-64 ELF relocation info. MCRelocationInfo *createX86_64ELFRelocationInfo(MCContext &Ctx); + +/// Returns the sub or super register of a specific X86 register. +/// e.g. getX86SubSuperRegister(X86::EAX, 16) returns X86::AX. +/// Aborts on error. +unsigned getX86SubSuperRegister(unsigned, unsigned, bool High=false); + +/// Returns the sub or super register of a specific X86 register. +/// Like getX86SubSuperRegister() but returns 0 on error. +unsigned getX86SubSuperRegisterOrZero(unsigned, unsigned, + bool High = false); + } // End llvm namespace diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 9e801fc8f191..191ebeac7265 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -149,14 +149,19 @@ void X86MachObjectWriter::RecordX86_64Relocation( // Neither symbol can be modified. if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || - Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported relocation of modified symbol", false); + Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) { + Asm.getContext().reportError(Fixup.getLoc(), + "unsupported relocation of modified symbol"); + return; + } // We don't support PCrel relocations of differences. Darwin 'as' doesn't // implement most of these correctly. - if (IsPCRel) - report_fatal_error("unsupported pc-relative relocation of difference", - false); + if (IsPCRel) { + Asm.getContext().reportError( + Fixup.getLoc(), "unsupported pc-relative relocation of difference"); + return; + } // The support for the situation where one or both of the symbols would // require a local relocation is handled just like if the symbols were @@ -168,16 +173,20 @@ void X86MachObjectWriter::RecordX86_64Relocation( // Darwin 'as' doesn't emit correct relocations for this (it ends up with a // single SIGNED relocation); reject it for now. Except the case where both // symbols don't have a base, equal but both NULL. - if (A_Base == B_Base && A_Base) - report_fatal_error("unsupported relocation with identical base", false); + if (A_Base == B_Base && A_Base) { + Asm.getContext().reportError( + Fixup.getLoc(), "unsupported relocation with identical base"); + return; + } // A subtraction expression where either symbol is undefined is a // non-relocatable expression. if (A->isUndefined() || B->isUndefined()) { StringRef Name = A->isUndefined() ? A->getName() : B->getName(); - Asm.getContext().reportFatalError(Fixup.getLoc(), + Asm.getContext().reportError(Fixup.getLoc(), "unsupported relocation with subtraction expression, symbol '" + Name + "' can not be undefined in a subtraction expression"); + return; } Value += Writer->getSymbolAddress(*A, Layout) - @@ -244,12 +253,16 @@ void X86MachObjectWriter::RecordX86_64Relocation( FixedValue = Res; return; } else { - report_fatal_error("unsupported relocation of variable '" + - Symbol->getName() + "'", false); + Asm.getContext().reportError(Fixup.getLoc(), + "unsupported relocation of variable '" + + Symbol->getName() + "'"); + return; } } else { - report_fatal_error("unsupported relocation of undefined symbol '" + - Symbol->getName() + "'", false); + Asm.getContext().reportError( + Fixup.getLoc(), "unsupported relocation of undefined symbol '" + + Symbol->getName() + "'"); + return; } MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); @@ -266,8 +279,9 @@ void X86MachObjectWriter::RecordX86_64Relocation( } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { Type = MachO::X86_64_RELOC_TLV; } else if (Modifier != MCSymbolRefExpr::VK_None) { - report_fatal_error("unsupported symbol modifier in relocation", - false); + Asm.getContext().reportError( + Fixup.getLoc(), "unsupported symbol modifier in relocation"); + return; } else { Type = MachO::X86_64_RELOC_SIGNED; @@ -292,9 +306,12 @@ void X86MachObjectWriter::RecordX86_64Relocation( } } } else { - if (Modifier != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported symbol modifier in branch " - "relocation", false); + if (Modifier != MCSymbolRefExpr::VK_None) { + Asm.getContext().reportError( + Fixup.getLoc(), + "unsupported symbol modifier in branch relocation"); + return; + } Type = MachO::X86_64_RELOC_BRANCH; } @@ -309,16 +326,22 @@ void X86MachObjectWriter::RecordX86_64Relocation( Type = MachO::X86_64_RELOC_GOT; IsPCRel = 1; } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { - report_fatal_error("TLVP symbol modifier should have been rip-rel", - false); - } else if (Modifier != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported symbol modifier in relocation", false); - else { + Asm.getContext().reportError( + Fixup.getLoc(), "TLVP symbol modifier should have been rip-rel"); + return; + } else if (Modifier != MCSymbolRefExpr::VK_None) { + Asm.getContext().reportError( + Fixup.getLoc(), "unsupported symbol modifier in relocation"); + return; + } else { Type = MachO::X86_64_RELOC_UNSIGNED; unsigned Kind = Fixup.getKind(); - if (Kind == X86::reloc_signed_4byte) - report_fatal_error("32-bit absolute addressing is not supported in " - "64-bit mode", false); + if (Kind == X86::reloc_signed_4byte) { + Asm.getContext().reportError( + Fixup.getLoc(), + "32-bit absolute addressing is not supported in 64-bit mode"); + return; + } } } } @@ -350,10 +373,13 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer, // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); - if (!A->getFragment()) - report_fatal_error("symbol '" + A->getName() + - "' can not be undefined in a subtraction expression", - false); + if (!A->getFragment()) { + Asm.getContext().reportError( + Fixup.getLoc(), + "symbol '" + A->getName() + + "' can not be undefined in a subtraction expression"); + return false; + } uint32_t Value = Writer->getSymbolAddress(*A, Layout); uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent()); @@ -363,10 +389,13 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer, if (const MCSymbolRefExpr *B = Target.getSymB()) { const MCSymbol *SB = &B->getSymbol(); - if (!SB->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + - "' can not be undefined in a subtraction expression", - false); + if (!SB->getFragment()) { + Asm.getContext().reportError( + Fixup.getLoc(), + "symbol '" + B->getSymbol().getName() + + "' can not be undefined in a subtraction expression"); + return false; + } // Select the appropriate difference relocation type. // @@ -387,12 +416,12 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer, if (FixupOffset > 0xffffff) { char Buffer[32]; format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer)); - Asm.getContext().reportFatalError(Fixup.getLoc(), + Asm.getContext().reportError(Fixup.getLoc(), Twine("Section too large, can't encode " "r_address (") + Buffer + ") into 24 bits of scattered " "relocation entry."); - llvm_unreachable("fatal error returned?!"); + return false; } MachO::any_relocation_info MRE; diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp index 92f42b68ae51..d04511873b46 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp @@ -50,9 +50,11 @@ void X86WinCOFFStreamer::FinishImpl() { MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, raw_pwrite_stream &OS, - MCCodeEmitter *CE, bool RelaxAll) { + MCCodeEmitter *CE, bool RelaxAll, + bool IncrementalLinkerCompatible) { X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, AB, CE, OS); S->getAssembler().setRelaxAll(RelaxAll); + S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible); return S; } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index cae865a40819..4fdd527d87c8 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -140,13 +140,14 @@ void DecodePALIGNRMask(MVT VT, unsigned Imm, } } -/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. +/// DecodePSHUFMask - This decodes the shuffle masks for pshufw, pshufd, and vpermilp*. /// VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); unsigned NumLanes = VT.getSizeInBits() / 128; + if (NumLanes == 0) NumLanes = 1; // Handle MMX unsigned NumLaneElts = NumElts / NumLanes; unsigned NewImm = Imm; @@ -191,6 +192,16 @@ void DecodePSHUFLWMask(MVT VT, unsigned Imm, } } +void DecodePSWAPMask(MVT VT, SmallVectorImpl &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumHalfElts = NumElts / 2; + + for (unsigned l = 0; l != NumHalfElts; ++l) + ShuffleMask.push_back(l + NumHalfElts); + for (unsigned h = 0; h != NumHalfElts; ++h) + ShuffleMask.push_back(h); +} + /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates /// the type of the vector allowing it to handle different datatypes and vector /// widths. @@ -222,7 +233,7 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl &ShuffleMask) { // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate // independently on 128-bit lanes. unsigned NumLanes = VT.getSizeInBits() / 128; - if (NumLanes == 0 ) NumLanes = 1; // Handle MMX + if (NumLanes == 0) NumLanes = 1; // Handle MMX unsigned NumLaneElts = NumElts / NumLanes; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { @@ -253,6 +264,26 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl &ShuffleMask) { } } +/// \brief Decode a shuffle packed values at 128-bit granularity +/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) +/// immediate mask into a shuffle mask. +void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm, + SmallVectorImpl &ShuffleMask) { + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumElementsInLane = 128 / VT.getScalarSizeInBits(); + unsigned ControlBitsMask = NumLanes - 1; + unsigned NumControlBits = NumLanes / 2; + + for (unsigned l = 0; l != NumLanes; ++l) { + unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask; + // We actually need the other source. + if (l >= NumLanes / 2) + LaneMask += NumLanes; + for (unsigned i = 0; i != NumElementsInLane; ++i) + ShuffleMask.push_back(LaneMask * NumElementsInLane + i); + } +} + void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { unsigned HalfSize = VT.getVectorNumElements() / 2; @@ -277,10 +308,10 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl &ShuffleMask) { // <4 x i32> +#ifndef NDEBUG unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); - - if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512. - return; + assert(MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512); +#endif // This is a straightforward byte vector. if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) { @@ -290,7 +321,7 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl &ShuffleMask) { for (int i = 0; i < NumElements; ++i) { // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte // lane of the vector we're inside. - int Base = i < 16 ? 0 : 16; + int Base = i & ~0xf; Constant *COp = C->getAggregateElement(i); if (!COp) { ShuffleMask.clear(); @@ -357,44 +388,66 @@ void DecodeVPERMMask(unsigned Imm, SmallVectorImpl &ShuffleMask) { } } -void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl &ShuffleMask) { +void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, + SmallVectorImpl &ShuffleMask) { Type *MaskTy = C->getType(); - assert(MaskTy->isVectorTy() && "Expected a vector constant mask!"); - assert(MaskTy->getVectorElementType()->isIntegerTy() && - "Expected integer constant mask elements!"); - int ElementBits = MaskTy->getScalarSizeInBits(); - int NumElements = MaskTy->getVectorNumElements(); + // It is not an error for the PSHUFB mask to not be a vector of i8 because the + // constant pool uniques constants by their bit representation. + // e.g. the following take up the same space in the constant pool: + // i128 -170141183420855150465331762880109871104 + // + // <2 x i64> + // + // <4 x i32> + + unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits(); + + if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512. + return; + + // Only support vector types. + if (!MaskTy->isVectorTy()) + return; + + // Make sure its an integer type. + Type *VecEltTy = MaskTy->getVectorElementType(); + if (!VecEltTy->isIntegerTy()) + return; + + // Support any element type from byte up to element size. + // This is necesary primarily because 64-bit elements get split to 32-bit + // in the constant pool on 32-bit target. + unsigned EltTySize = VecEltTy->getIntegerBitWidth(); + if (EltTySize < 8 || EltTySize > ElSize) + return; + + unsigned NumElements = MaskTySize / ElSize; assert((NumElements == 2 || NumElements == 4 || NumElements == 8) && "Unexpected number of vector elements."); ShuffleMask.reserve(NumElements); - if (auto *CDS = dyn_cast(C)) { - assert((unsigned)NumElements == CDS->getNumElements() && - "Constant mask has a different number of elements!"); + unsigned NumElementsPerLane = 128 / ElSize; + unsigned Factor = ElSize / EltTySize; - for (int i = 0; i < NumElements; ++i) { - int Base = (i * ElementBits / 128) * (128 / ElementBits); - uint64_t Element = CDS->getElementAsInteger(i); - // Only the least significant 2 bits of the integer are used. - int Index = Base + (Element & 0x3); - ShuffleMask.push_back(Index); - } - } else if (auto *CV = dyn_cast(C)) { - assert((unsigned)NumElements == C->getNumOperands() && - "Constant mask has a different number of elements!"); - - for (int i = 0; i < NumElements; ++i) { - int Base = (i * ElementBits / 128) * (128 / ElementBits); - Constant *COp = CV->getOperand(i); - if (isa(COp)) { - ShuffleMask.push_back(SM_SentinelUndef); - continue; - } - uint64_t Element = cast(COp)->getZExtValue(); - // Only the least significant 2 bits of the integer are used. - int Index = Base + (Element & 0x3); - ShuffleMask.push_back(Index); + for (unsigned i = 0; i < NumElements; ++i) { + Constant *COp = C->getAggregateElement(i * Factor); + if (!COp) { + ShuffleMask.clear(); + return; + } else if (isa(COp)) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; } + int Index = i & ~(NumElementsPerLane - 1); + uint64_t Element = cast(COp)->getZExtValue(); + if (ElSize == 64) + Index += (Element >> 1) & 0x1; + else + Index += Element & 0x3; + ShuffleMask.push_back(Index); } + + // TODO: Handle funny-looking vectors too. } void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl &Mask) { @@ -503,4 +556,74 @@ void DecodeINSERTQIMask(int Len, int Idx, ShuffleMask.push_back(SM_SentinelUndef); } +void DecodeVPERMVMask(ArrayRef RawMask, + SmallVectorImpl &ShuffleMask) { + for (int i = 0, e = RawMask.size(); i < e; ++i) { + uint64_t M = RawMask[i]; + ShuffleMask.push_back((int)M); + } +} + +void DecodeVPERMV3Mask(ArrayRef RawMask, + SmallVectorImpl &ShuffleMask) { + for (int i = 0, e = RawMask.size(); i < e; ++i) { + uint64_t M = RawMask[i]; + ShuffleMask.push_back((int)M); + } +} + +void DecodeVPERMVMask(const Constant *C, MVT VT, + SmallVectorImpl &ShuffleMask) { + Type *MaskTy = C->getType(); + if (MaskTy->isVectorTy()) { + unsigned NumElements = MaskTy->getVectorNumElements(); + if (NumElements == VT.getVectorNumElements()) { + for (unsigned i = 0; i < NumElements; ++i) { + Constant *COp = C->getAggregateElement(i); + if (!COp || (!isa(COp) && !isa(COp))) { + ShuffleMask.clear(); + return; + } + if (isa(COp)) + ShuffleMask.push_back(SM_SentinelUndef); + else { + uint64_t Element = cast(COp)->getZExtValue(); + Element &= (1 << NumElements) - 1; + ShuffleMask.push_back(Element); + } + } + } + return; + } + // Scalar value; just broadcast it + if (!isa(C)) + return; + uint64_t Element = cast(C)->getZExtValue(); + int NumElements = VT.getVectorNumElements(); + Element &= (1 << NumElements) - 1; + for (int i = 0; i < NumElements; ++i) + ShuffleMask.push_back(Element); +} + +void DecodeVPERMV3Mask(const Constant *C, MVT VT, + SmallVectorImpl &ShuffleMask) { + Type *MaskTy = C->getType(); + unsigned NumElements = MaskTy->getVectorNumElements(); + if (NumElements == VT.getVectorNumElements()) { + for (unsigned i = 0; i < NumElements; ++i) { + Constant *COp = C->getAggregateElement(i); + if (!COp) { + ShuffleMask.clear(); + return; + } + if (isa(COp)) + ShuffleMask.push_back(SM_SentinelUndef); + else { + uint64_t Element = cast(COp)->getZExtValue(); + Element &= (1 << NumElements*2) - 1; + ShuffleMask.push_back(Element); + } + } + } +} } // llvm namespace diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 3d10d18e860e..ab18e6438ec9 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -54,6 +54,9 @@ void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); void DecodePSHUFLWMask(MVT, unsigned Imm, SmallVectorImpl &ShuffleMask); +/// \brief Decodes a PSWAPD 3DNow! instruction. +void DecodePSWAPMask(MVT VT, SmallVectorImpl &ShuffleMask); + /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates /// the type of the vector allowing it to handle different datatypes and vector /// widths. @@ -83,12 +86,18 @@ void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); +/// \brief Decode a shuffle packed values at 128-bit granularity +/// immediate mask into a shuffle mask. +void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm, + SmallVectorImpl &ShuffleMask); + /// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. /// No VT provided since it only works on 256-bit, 4 element vectors. void DecodeVPERMMask(unsigned Imm, SmallVectorImpl &ShuffleMask); /// \brief Decode a VPERMILP variable mask from an IR-level vector constant. -void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl &ShuffleMask); +void DecodeVPERMILPMask(const Constant *C, unsigned ElSize, + SmallVectorImpl &ShuffleMask); /// \brief Decode a zero extension instruction as a shuffle mask. void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, @@ -108,6 +117,22 @@ void DecodeEXTRQIMask(int Len, int Idx, /// \brief Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask. void DecodeINSERTQIMask(int Len, int Idx, SmallVectorImpl &ShuffleMask); + +/// \brief Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant. +void DecodeVPERMVMask(const Constant *C, MVT VT, + SmallVectorImpl &ShuffleMask); + +/// \brief Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants. +void DecodeVPERMVMask(ArrayRef RawMask, + SmallVectorImpl &ShuffleMask); + +/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant. +void DecodeVPERMV3Mask(const Constant *C, MVT VT, + SmallVectorImpl &ShuffleMask); + +/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants. +void DecodeVPERMV3Mask(ArrayRef RawMask, + SmallVectorImpl &ShuffleMask); } // llvm namespace #endif diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 8403ae6101df..fbec6626d99d 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -23,56 +23,47 @@ class FunctionPass; class ImmutablePass; class X86TargetMachine; -/// createX86ISelDag - This pass converts a legalized DAG into a -/// X86-specific DAG, ready for instruction scheduling. -/// +/// This pass converts a legalized DAG into a X86-specific DAG, ready for +/// instruction scheduling. FunctionPass *createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel); -/// createX86GlobalBaseRegPass - This pass initializes a global base -/// register for PIC on x86-32. +/// This pass initializes a global base register for PIC on x86-32. FunctionPass* createX86GlobalBaseRegPass(); -/// createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses -/// to local-dynamic TLS variables so that the TLS base address for the module -/// is only fetched once per execution path through the function. +/// This pass combines multiple accesses to local-dynamic TLS variables so that +/// the TLS base address for the module is only fetched once per execution path +/// through the function. FunctionPass *createCleanupLocalDynamicTLSPass(); -/// createX86FloatingPointStackifierPass - This function returns a pass which -/// converts floating point register references and pseudo instructions into -/// floating point stack references and physical instructions. -/// +/// This function returns a pass which converts floating-point register +/// references and pseudo instructions into floating-point stack references and +/// physical instructions. FunctionPass *createX86FloatingPointStackifierPass(); -/// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions -/// before each call to avoid transition penalty between functions encoded with -/// AVX and SSE. +/// This pass inserts AVX vzeroupper instructions before each call to avoid +/// transition penalty between functions encoded with AVX and SSE. FunctionPass *createX86IssueVZeroUpperPass(); -/// createX86EmitCodeToMemory - Returns a pass that converts a register -/// allocated function into raw machine code in a dynamically -/// allocated chunk of memory. -/// -FunctionPass *createEmitX86CodeToMemory(); - -/// createX86PadShortFunctions - Return a pass that pads short functions -/// with NOOPs. This will prevent a stall when returning on the Atom. +/// Return a pass that pads short functions with NOOPs. +/// This will prevent a stall when returning on the Atom. FunctionPass *createX86PadShortFunctions(); -/// createX86FixupLEAs - Return a a pass that selectively replaces -/// certain instructions (like add, sub, inc, dec, some shifts, -/// and some multiplies) by equivalent LEA instructions, in order -/// to eliminate execution delays in some Atom processors. + +/// Return a a pass that selectively replaces certain instructions (like add, +/// sub, inc, dec, some shifts, and some multiplies) by equivalent LEA +/// instructions, in order to eliminate execution delays in some processors. FunctionPass *createX86FixupLEAs(); -/// createX86CallFrameOptimization - Return a pass that optimizes -/// the code-size of x86 call sequences. This is done by replacing -/// esp-relative movs with pushes. +/// Return a pass that removes redundant address recalculations. +FunctionPass *createX86OptimizeLEAs(); + +/// Return a pass that optimizes the code-size of x86 call sequences. This is +/// done by replacing esp-relative movs with pushes. FunctionPass *createX86CallFrameOptimization(); -/// createX86WinEHStatePass - Return an IR pass that inserts EH registration -/// stack objects and explicit EH state updates. This pass must run after EH -/// preparation, which does Windows-specific but architecture-neutral -/// preparation. +/// Return an IR pass that inserts EH registration stack objects and explicit +/// EH state updates. This pass must run after EH preparation, which does +/// Windows-specific but architecture-neutral preparation. FunctionPass *createX86WinEHStatePass(); /// Return a Machine IR pass that expands X86-specific pseudo diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 852267400bba..8902a8534256 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -37,14 +37,26 @@ def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", "Support POPCNT instruction">; +def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true", + "Support fxsave/fxrestore instructions">; + +def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true", + "Support xsave instructions">; + +def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true", + "Support xsaveopt instructions">; + +def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true", + "Support xsavec instructions">; + +def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true", + "Support xsaves instructions">; -def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX", - "Enable MMX instructions">; def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", "Enable SSE instructions", // SSE codegen depends on cmovs, and all // SSE1+ processors support them. - [FeatureMMX, FeatureCMOV]>; + [FeatureCMOV]>; def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", "Enable SSE2 instructions", [FeatureSSE1]>; @@ -60,6 +72,11 @@ def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41", def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", "Enable SSE 4.2 instructions", [FeatureSSE41]>; +// The MMX subtarget feature is separate from the rest of the SSE features +// because it's important (for odd compatibility reasons) to be able to +// turn it off explicitly while allowing SSE+ to be on. +def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX", + "Enable MMX instructions">; def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", "Enable 3DNow! instructions", [FeatureMMX]>; @@ -79,16 +96,13 @@ def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true", "Bit testing of memory is slow">; def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", "SHLD instruction is slow">; -// FIXME: This is a 16-byte (SSE/AVX) feature; we should rename it to make that -// explicit. Also, it seems this would be the default state for most chips -// going forward, so it would probably be better to negate the logic and -// match the 32-byte "slow mem" feature below. -def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem", - "IsUAMemFast", "true", - "Fast unaligned memory access">; +// FIXME: This should not apply to CPUs that do not have SSE. +def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", + "IsUAMem16Slow", "true", + "Slow unaligned 16-byte memory access">; def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", - "IsUAMem32Slow", "true", - "Slow unaligned 32-byte memory access">; + "IsUAMem32Slow", "true", + "Slow unaligned 32-byte memory access">; def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", "Support SSE 4a instructions", [FeatureSSE3]>; @@ -120,6 +134,8 @@ def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", "Enable AVX-512 Vector Length eXtensions", [FeatureAVX512]>; +def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", + "Enable protection keys">; def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", "Enable packed carry-less multiplication instructions", [FeatureSSE2]>; @@ -168,9 +184,11 @@ def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", "Support PRFCHW instructions">; def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", "Support RDSEED instruction">; +def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true", + "Support LAHF and SAHF instructions">; def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true", "Support MPX instructions">; -def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", +def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb", "HasSlowDivide32", "true", @@ -181,6 +199,11 @@ def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw", def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", "PadShortFunctions", "true", "Pad short functions">; +// TODO: This feature ought to be renamed. +// What it really refers to are CPUs for which certain instructions +// (which ones besides the example below?) are microcoded. +// The best examples of this are the memory forms of CALL and PUSH +// instructions, which should be avoided in favor of a MOV + register CALL/PUSH. def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect", "CallRegIndirect", "true", "Call register indirect">; @@ -208,278 +231,473 @@ def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM", class Proc Features> : ProcessorModel; -def : Proc<"generic", []>; -def : Proc<"i386", []>; -def : Proc<"i486", []>; -def : Proc<"i586", []>; -def : Proc<"pentium", []>; -def : Proc<"pentium-mmx", [FeatureMMX]>; -def : Proc<"i686", []>; -def : Proc<"pentiumpro", [FeatureCMOV]>; -def : Proc<"pentium2", [FeatureMMX, FeatureCMOV]>; -def : Proc<"pentium3", [FeatureSSE1]>; -def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>; -def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>; -def : Proc<"pentium4", [FeatureSSE2]>; -def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>; +def : Proc<"generic", [FeatureSlowUAMem16]>; +def : Proc<"i386", [FeatureSlowUAMem16]>; +def : Proc<"i486", [FeatureSlowUAMem16]>; +def : Proc<"i586", [FeatureSlowUAMem16]>; +def : Proc<"pentium", [FeatureSlowUAMem16]>; +def : Proc<"pentium-mmx", [FeatureSlowUAMem16, FeatureMMX]>; +def : Proc<"i686", [FeatureSlowUAMem16]>; +def : Proc<"pentiumpro", [FeatureSlowUAMem16, FeatureCMOV]>; +def : Proc<"pentium2", [FeatureSlowUAMem16, FeatureMMX, FeatureCMOV, + FeatureFXSR]>; +def : Proc<"pentium3", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, + FeatureFXSR]>; +def : Proc<"pentium3m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, + FeatureFXSR, FeatureSlowBTMem]>; +def : Proc<"pentium-m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2, + FeatureFXSR, FeatureSlowBTMem]>; +def : Proc<"pentium4", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2, + FeatureFXSR]>; +def : Proc<"pentium4m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2, + FeatureFXSR, FeatureSlowBTMem]>; // Intel Core Duo. def : ProcessorModel<"yonah", SandyBridgeModel, - [FeatureSSE3, FeatureSlowBTMem]>; + [FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR, + FeatureSlowBTMem]>; // NetBurst. -def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>; -def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>; +def : Proc<"prescott", + [FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR, + FeatureSlowBTMem]>; +def : Proc<"nocona", [ + FeatureSlowUAMem16, + FeatureMMX, + FeatureSSE3, + FeatureFXSR, + FeatureCMPXCHG16B, + FeatureSlowBTMem +]>; // Intel Core 2 Solo/Duo. -def : ProcessorModel<"core2", SandyBridgeModel, - [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>; -def : ProcessorModel<"penryn", SandyBridgeModel, - [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; +def : ProcessorModel<"core2", SandyBridgeModel, [ + FeatureSlowUAMem16, + FeatureMMX, + FeatureSSSE3, + FeatureFXSR, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeatureLAHFSAHF +]>; +def : ProcessorModel<"penryn", SandyBridgeModel, [ + FeatureSlowUAMem16, + FeatureMMX, + FeatureSSE41, + FeatureFXSR, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeatureLAHFSAHF +]>; // Atom CPUs. class BonnellProc : ProcessorModel; + ProcIntelAtom, + FeatureSlowUAMem16, + FeatureMMX, + FeatureSSSE3, + FeatureFXSR, + FeatureCMPXCHG16B, + FeatureMOVBE, + FeatureSlowBTMem, + FeatureLEAForSP, + FeatureSlowDivide32, + FeatureSlowDivide64, + FeatureCallRegIndirect, + FeatureLEAUsesAG, + FeaturePadShortFunctions, + FeatureLAHFSAHF +]>; def : BonnellProc<"bonnell">; def : BonnellProc<"atom">; // Pin the generic name to the baseline. class SilvermontProc : ProcessorModel; + ProcIntelSLM, + FeatureMMX, + FeatureSSE42, + FeatureFXSR, + FeatureCMPXCHG16B, + FeatureMOVBE, + FeaturePOPCNT, + FeaturePCLMUL, + FeatureAES, + FeatureSlowDivide64, + FeatureCallRegIndirect, + FeaturePRFCHW, + FeatureSlowLEA, + FeatureSlowIncDec, + FeatureSlowBTMem, + FeatureLAHFSAHF +]>; def : SilvermontProc<"silvermont">; def : SilvermontProc<"slm">; // Legacy alias. // "Arrandale" along with corei3 and corei5 class NehalemProc : ProcessorModel; + FeatureMMX, + FeatureSSE42, + FeatureFXSR, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeaturePOPCNT, + FeatureLAHFSAHF +]>; def : NehalemProc<"nehalem">; def : NehalemProc<"corei7">; // Westmere is a similar machine to nehalem with some additional features. // Westmere is the corei3/i5/i7 path from nehalem to sandybridge class WestmereProc : ProcessorModel; + FeatureMMX, + FeatureSSE42, + FeatureFXSR, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeaturePOPCNT, + FeatureAES, + FeaturePCLMUL, + FeatureLAHFSAHF +]>; def : WestmereProc<"westmere">; // SSE is not listed here since llvm treats AVX as a reimplementation of SSE, // rather than a superset. class SandyBridgeProc : ProcessorModel; + FeatureMMX, + FeatureAVX, + FeatureFXSR, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeatureSlowUAMem32, + FeaturePOPCNT, + FeatureAES, + FeaturePCLMUL, + FeatureXSAVE, + FeatureXSAVEOPT, + FeatureLAHFSAHF +]>; def : SandyBridgeProc<"sandybridge">; def : SandyBridgeProc<"corei7-avx">; // Legacy alias. class IvyBridgeProc : ProcessorModel; + FeatureMMX, + FeatureAVX, + FeatureFXSR, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeatureSlowUAMem32, + FeaturePOPCNT, + FeatureAES, + FeaturePCLMUL, + FeatureXSAVE, + FeatureXSAVEOPT, + FeatureRDRAND, + FeatureF16C, + FeatureFSGSBase, + FeatureLAHFSAHF +]>; def : IvyBridgeProc<"ivybridge">; def : IvyBridgeProc<"core-avx-i">; // Legacy alias. class HaswellProc : ProcessorModel; + FeatureMMX, + FeatureAVX2, + FeatureFXSR, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeaturePOPCNT, + FeatureAES, + FeaturePCLMUL, + FeatureRDRAND, + FeatureXSAVE, + FeatureXSAVEOPT, + FeatureF16C, + FeatureFSGSBase, + FeatureMOVBE, + FeatureLZCNT, + FeatureBMI, + FeatureBMI2, + FeatureFMA, + FeatureRTM, + FeatureHLE, + FeatureSlowIncDec, + FeatureLAHFSAHF +]>; def : HaswellProc<"haswell">; def : HaswellProc<"core-avx2">; // Legacy alias. class BroadwellProc : ProcessorModel; + FeatureMMX, + FeatureAVX2, + FeatureFXSR, + FeatureCMPXCHG16B, + FeatureSlowBTMem, + FeaturePOPCNT, + FeatureAES, + FeaturePCLMUL, + FeatureXSAVE, + FeatureXSAVEOPT, + FeatureRDRAND, + FeatureF16C, + FeatureFSGSBase, + FeatureMOVBE, + FeatureLZCNT, + FeatureBMI, + FeatureBMI2, + FeatureFMA, + FeatureRTM, + FeatureHLE, + FeatureADX, + FeatureRDSEED, + FeatureSlowIncDec, + FeatureLAHFSAHF +]>; def : BroadwellProc<"broadwell">; // FIXME: define KNL model -class KnightsLandingProc : ProcessorModel; +class KnightsLandingProc : ProcessorModel; def : KnightsLandingProc<"knl">; // FIXME: define SKX model -class SkylakeProc : ProcessorModel; +class SkylakeProc : ProcessorModel; def : SkylakeProc<"skylake">; def : SkylakeProc<"skx">; // Legacy alias. // AMD CPUs. -def : Proc<"k6", [FeatureMMX]>; -def : Proc<"k6-2", [Feature3DNow]>; -def : Proc<"k6-3", [Feature3DNow]>; -def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem, - FeatureSlowSHLD]>; -def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem, - FeatureSlowSHLD]>; -def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem, - FeatureSlowSHLD]>; -def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem, - FeatureSlowSHLD]>; -def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem, - FeatureSlowSHLD]>; -def : Proc<"k8", [FeatureSSE2, Feature3DNowA, Feature64Bit, +def : Proc<"k6", [FeatureSlowUAMem16, FeatureMMX]>; +def : Proc<"k6-2", [FeatureSlowUAMem16, Feature3DNow]>; +def : Proc<"k6-3", [FeatureSlowUAMem16, Feature3DNow]>; +def : Proc<"athlon", [FeatureSlowUAMem16, Feature3DNowA, FeatureSlowBTMem, FeatureSlowSHLD]>; -def : Proc<"opteron", [FeatureSSE2, Feature3DNowA, Feature64Bit, +def : Proc<"athlon-tbird", [FeatureSlowUAMem16, Feature3DNowA, FeatureSlowBTMem, FeatureSlowSHLD]>; -def : Proc<"athlon64", [FeatureSSE2, Feature3DNowA, Feature64Bit, - FeatureSlowBTMem, FeatureSlowSHLD]>; -def : Proc<"athlon-fx", [FeatureSSE2, Feature3DNowA, Feature64Bit, - FeatureSlowBTMem, FeatureSlowSHLD]>; -def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureSlowSHLD]>; -def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureSlowSHLD]>; -def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureSlowSHLD]>; -def : Proc<"amdfam10", [FeatureSSE4A, - Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, - FeaturePOPCNT, FeatureSlowBTMem, +def : Proc<"athlon-4", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA, + FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>; +def : Proc<"athlon-xp", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA, + FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>; +def : Proc<"athlon-mp", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA, + FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>; +def : Proc<"k8", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA, + FeatureFXSR, Feature64Bit, FeatureSlowBTMem, FeatureSlowSHLD]>; -def : Proc<"barcelona", [FeatureSSE4A, - Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, - FeaturePOPCNT, FeatureSlowBTMem, +def : Proc<"opteron", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA, + FeatureFXSR, Feature64Bit, FeatureSlowBTMem, FeatureSlowSHLD]>; +def : Proc<"athlon64", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA, + FeatureFXSR, Feature64Bit, FeatureSlowBTMem, + FeatureSlowSHLD]>; +def : Proc<"athlon-fx", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA, + FeatureFXSR, Feature64Bit, FeatureSlowBTMem, + FeatureSlowSHLD]>; +def : Proc<"k8-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA, + FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem, + FeatureSlowSHLD]>; +def : Proc<"opteron-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA, + FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem, + FeatureSlowSHLD]>; +def : Proc<"athlon64-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA, + FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem, + FeatureSlowSHLD]>; +def : Proc<"amdfam10", [FeatureSSE4A, Feature3DNowA, FeatureFXSR, + FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT, + FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>; +def : Proc<"barcelona", [FeatureSSE4A, Feature3DNowA, FeatureFXSR, + FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT, + FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>; + // Bobcat -def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B, - FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT, - FeatureSlowSHLD]>; +def : Proc<"btver1", [ + FeatureMMX, + FeatureSSSE3, + FeatureSSE4A, + FeatureFXSR, + FeatureCMPXCHG16B, + FeaturePRFCHW, + FeatureLZCNT, + FeaturePOPCNT, + FeatureXSAVE, + FeatureSlowSHLD, + FeatureLAHFSAHF +]>; // Jaguar -def : ProcessorModel<"btver2", BtVer2Model, - [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B, - FeaturePRFCHW, FeatureAES, FeaturePCLMUL, - FeatureBMI, FeatureF16C, FeatureMOVBE, - FeatureLZCNT, FeaturePOPCNT, FeatureFastUAMem, - FeatureSlowSHLD]>; - -// TODO: We should probably add 'FeatureFastUAMem' to all of the AMD chips. +def : ProcessorModel<"btver2", BtVer2Model, [ + FeatureMMX, + FeatureAVX, + FeatureFXSR, + FeatureSSE4A, + FeatureCMPXCHG16B, + FeaturePRFCHW, + FeatureAES, + FeaturePCLMUL, + FeatureBMI, + FeatureF16C, + FeatureMOVBE, + FeatureLZCNT, + FeaturePOPCNT, + FeatureXSAVE, + FeatureXSAVEOPT, + FeatureSlowSHLD, + FeatureLAHFSAHF +]>; // Bulldozer -def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, - FeatureAES, FeaturePRFCHW, FeaturePCLMUL, - FeatureAVX, FeatureSSE4A, FeatureLZCNT, - FeaturePOPCNT, FeatureSlowSHLD]>; +def : Proc<"bdver1", [ + FeatureXOP, + FeatureFMA4, + FeatureCMPXCHG16B, + FeatureAES, + FeaturePRFCHW, + FeaturePCLMUL, + FeatureMMX, + FeatureAVX, + FeatureFXSR, + FeatureSSE4A, + FeatureLZCNT, + FeaturePOPCNT, + FeatureXSAVE, + FeatureSlowSHLD, + FeatureLAHFSAHF +]>; // Piledriver -def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, - FeatureAES, FeaturePRFCHW, FeaturePCLMUL, - FeatureAVX, FeatureSSE4A, FeatureF16C, - FeatureLZCNT, FeaturePOPCNT, FeatureBMI, - FeatureTBM, FeatureFMA, FeatureSlowSHLD]>; +def : Proc<"bdver2", [ + FeatureXOP, + FeatureFMA4, + FeatureCMPXCHG16B, + FeatureAES, + FeaturePRFCHW, + FeaturePCLMUL, + FeatureMMX, + FeatureAVX, + FeatureFXSR, + FeatureSSE4A, + FeatureF16C, + FeatureLZCNT, + FeaturePOPCNT, + FeatureXSAVE, + FeatureBMI, + FeatureTBM, + FeatureFMA, + FeatureSlowSHLD, + FeatureLAHFSAHF +]>; // Steamroller -def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, - FeatureAES, FeaturePRFCHW, FeaturePCLMUL, - FeatureAVX, FeatureSSE4A, FeatureF16C, - FeatureLZCNT, FeaturePOPCNT, FeatureBMI, - FeatureTBM, FeatureFMA, FeatureSlowSHLD, - FeatureFSGSBase]>; +def : Proc<"bdver3", [ + FeatureXOP, + FeatureFMA4, + FeatureCMPXCHG16B, + FeatureAES, + FeaturePRFCHW, + FeaturePCLMUL, + FeatureMMX, + FeatureAVX, + FeatureFXSR, + FeatureSSE4A, + FeatureF16C, + FeatureLZCNT, + FeaturePOPCNT, + FeatureXSAVE, + FeatureBMI, + FeatureTBM, + FeatureFMA, + FeatureXSAVEOPT, + FeatureSlowSHLD, + FeatureFSGSBase, + FeatureLAHFSAHF +]>; // Excavator -def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4, - FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, - FeaturePCLMUL, FeatureF16C, FeatureLZCNT, - FeaturePOPCNT, FeatureBMI, FeatureBMI2, - FeatureTBM, FeatureFMA, FeatureSSE4A, - FeatureFSGSBase]>; +def : Proc<"bdver4", [ + FeatureMMX, + FeatureAVX2, + FeatureFXSR, + FeatureXOP, + FeatureFMA4, + FeatureCMPXCHG16B, + FeatureAES, + FeaturePRFCHW, + FeaturePCLMUL, + FeatureF16C, + FeatureLZCNT, + FeaturePOPCNT, + FeatureXSAVE, + FeatureBMI, + FeatureBMI2, + FeatureTBM, + FeatureFMA, + FeatureXSAVEOPT, + FeatureFSGSBase, + FeatureLAHFSAHF +]>; -def : Proc<"geode", [Feature3DNowA]>; +def : Proc<"geode", [FeatureSlowUAMem16, Feature3DNowA]>; -def : Proc<"winchip-c6", [FeatureMMX]>; -def : Proc<"winchip2", [Feature3DNow]>; -def : Proc<"c3", [Feature3DNow]>; -def : Proc<"c3-2", [FeatureSSE1]>; +def : Proc<"winchip-c6", [FeatureSlowUAMem16, FeatureMMX]>; +def : Proc<"winchip2", [FeatureSlowUAMem16, Feature3DNow]>; +def : Proc<"c3", [FeatureSlowUAMem16, Feature3DNow]>; +def : Proc<"c3-2", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, FeatureFXSR]>; // We also provide a generic 64-bit specific x86 processor model which tries to // be good for modern chips without enabling instruction set encodings past the @@ -492,8 +710,8 @@ def : Proc<"c3-2", [FeatureSSE1]>; // knobs which need to be tuned differently for AMD chips, we might consider // forming a common base for them. def : ProcessorModel<"x86-64", SandyBridgeModel, - [FeatureSSE2, Feature64Bit, FeatureSlowBTMem, - FeatureFastUAMem]>; + [FeatureMMX, FeatureSSE2, FeatureFXSR, Feature64Bit, + FeatureSlowBTMem ]>; //===----------------------------------------------------------------------===// // Register File Description @@ -520,10 +738,6 @@ include "X86CallingConv.td" // Assembly Parser //===----------------------------------------------------------------------===// -def ATTAsmParser : AsmParser { - string AsmParserClassName = "AsmParser"; -} - def ATTAsmParserVariant : AsmParserVariant { int Variant = 0; @@ -568,7 +782,6 @@ def IntelAsmWriter : AsmWriter { def X86 : Target { // Information about the instructions... let InstructionSet = X86InstrInfo; - let AssemblyParsers = [ATTAsmParser]; let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant]; let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; } diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index ba33248d2039..2170e62e30fd 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -217,10 +217,10 @@ static void printOperand(X86AsmPrinter &P, const MachineInstr *MI, if (AsmVariant == 0) O << '%'; unsigned Reg = MO.getReg(); if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) { - MVT::SimpleValueType VT = (strcmp(Modifier+6,"64") == 0) ? - MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 : - ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8)); - Reg = getX86SubSuperRegister(Reg, VT); + unsigned Size = (strcmp(Modifier+6,"64") == 0) ? 64 : + (strcmp(Modifier+6,"32") == 0) ? 32 : + (strcmp(Modifier+6,"16") == 0) ? 16 : 8; + Reg = getX86SubSuperRegister(Reg, Size); } O << X86ATTInstPrinter::getRegisterName(Reg); return; @@ -361,22 +361,21 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO, switch (Mode) { default: return true; // Unknown mode. case 'b': // Print QImode register - Reg = getX86SubSuperRegister(Reg, MVT::i8); + Reg = getX86SubSuperRegister(Reg, 8); break; case 'h': // Print QImode high register - Reg = getX86SubSuperRegister(Reg, MVT::i8, true); + Reg = getX86SubSuperRegister(Reg, 8, true); break; case 'w': // Print HImode register - Reg = getX86SubSuperRegister(Reg, MVT::i16); + Reg = getX86SubSuperRegister(Reg, 16); break; case 'k': // Print SImode register - Reg = getX86SubSuperRegister(Reg, MVT::i32); + Reg = getX86SubSuperRegister(Reg, 32); break; case 'q': // Print 64-bit register names if 64-bit integer registers are available. // Otherwise, print 32-bit register names. - MVT::SimpleValueType Ty = P.getSubtarget().is64Bit() ? MVT::i64 : MVT::i32; - Reg = getX86SubSuperRegister(Reg, Ty); + Reg = getX86SubSuperRegister(Reg, P.getSubtarget().is64Bit() ? 64 : 32); break; } @@ -535,6 +534,7 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { S, MCConstantExpr::create(int64_t(1), MMI->getContext())); } } + OutStreamer->EmitSyntaxDirective(); } static void @@ -565,10 +565,11 @@ MCSymbol *X86AsmPrinter::GetCPISymbol(unsigned CPID) const { const MachineConstantPoolEntry &CPE = MF->getConstantPool()->getConstants()[CPID]; if (!CPE.isMachineConstantPoolEntry()) { - SectionKind Kind = CPE.getSectionKind(TM.getDataLayout()); + const DataLayout &DL = MF->getDataLayout(); + SectionKind Kind = CPE.getSectionKind(&DL); const Constant *C = CPE.Val.ConstVal; if (const MCSectionCOFF *S = dyn_cast( - getObjFileLowering().getSectionForConstant(Kind, C))) { + getObjFileLowering().getSectionForConstant(DL, Kind, C))) { if (MCSymbol *Sym = S->getCOMDATSymbol()) { if (Sym->isUndefined()) OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global); diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index 7f5d127c68d5..9c8bd98dbade 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -78,8 +78,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { // outputting it to the OutStream. This allows the shadow tracker to minimise // the number of NOPs used for stackmap padding. void EmitAndCountInstruction(MCInst &Inst); - - void InsertStackMapShadows(MachineFunction &MF); void LowerSTACKMAP(const MachineInstr &MI); void LowerPATCHPOINT(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerSTATEPOINT(const MachineInstr &MI, X86MCInstLower &MCIL); diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp index 031ba4ba9e66..fc6ee1752f1f 100644 --- a/lib/Target/X86/X86CallFrameOptimization.cpp +++ b/lib/Target/X86/X86CallFrameOptimization.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" @@ -53,10 +54,13 @@ private: // Information we know about a particular call site struct CallContext { CallContext() - : Call(nullptr), SPCopy(nullptr), ExpectedDist(0), - MovVector(4, nullptr), NoStackParams(false), UsePush(false){}; + : FrameSetup(nullptr), Call(nullptr), SPCopy(nullptr), ExpectedDist(0), + MovVector(4, nullptr), NoStackParams(false), UsePush(false){} - // Actuall call instruction + // Iterator referring to the frame setup instruction + MachineBasicBlock::iterator FrameSetup; + + // Actual call instruction MachineInstr *Call; // A copy of the stack pointer @@ -75,17 +79,16 @@ private: bool UsePush; }; - typedef DenseMap ContextMap; + typedef SmallVector ContextVector; bool isLegal(MachineFunction &MF); - bool isProfitable(MachineFunction &MF, ContextMap &CallSeqMap); + bool isProfitable(MachineFunction &MF, ContextVector &CallSeqMap); void collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, CallContext &Context); - bool adjustCallSequence(MachineFunction &MF, MachineBasicBlock::iterator I, - const CallContext &Context); + bool adjustCallSequence(MachineFunction &MF, const CallContext &Context); MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup, unsigned Reg); @@ -100,7 +103,8 @@ private: const char *getPassName() const override { return "X86 Optimize Call Frame"; } const TargetInstrInfo *TII; - const TargetFrameLowering *TFL; + const X86FrameLowering *TFL; + const X86Subtarget *STI; const MachineRegisterInfo *MRI; static char ID; }; @@ -124,8 +128,15 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) { // No point in running this in 64-bit mode, since some arguments are // passed in-register in all common calling conventions, so the pattern // we're looking for will never match. - const X86Subtarget &STI = MF.getSubtarget(); - if (STI.is64Bit()) + if (STI->is64Bit()) + return false; + + // We can't encode multiple DW_CFA_GNU_args_size or DW_CFA_def_cfa_offset + // in the compact unwind encoding that Darwin uses. So, bail if there + // is a danger of that being generated. + if (STI->isTargetDarwin() && + (!MF.getMMI().getLandingPads().empty() || + (MF.getFunction()->needsUnwindTableEntry() && !TFL->hasFP(MF)))) return false; // You would expect straight-line code between call-frame setup and @@ -161,7 +172,7 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) { // Check whether this trasnformation is profitable for a particular // function - in terms of code size. bool X86CallFrameOptimization::isProfitable(MachineFunction &MF, - ContextMap &CallSeqMap) { + ContextVector &CallSeqVector) { // This transformation is always a win when we do not expect to have // a reserved call frame. Under other circumstances, it may be either // a win or a loss, and requires a heuristic. @@ -170,24 +181,20 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF, return true; // Don't do this when not optimizing for size. - bool OptForSize = - MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) || - MF.getFunction()->hasFnAttribute(Attribute::MinSize); - - if (!OptForSize) + if (!MF.getFunction()->optForSize()) return false; unsigned StackAlign = TFL->getStackAlignment(); int64_t Advantage = 0; - for (auto CC : CallSeqMap) { + for (auto CC : CallSeqVector) { // Call sites where no parameters are passed on the stack // do not affect the cost, since there needs to be no // stack adjustment. - if (CC.second.NoStackParams) + if (CC.NoStackParams) continue; - if (!CC.second.UsePush) { + if (!CC.UsePush) { // If we don't use pushes for a particular call site, // we pay for not having a reserved call frame with an // additional sub/add esp pair. The cost is ~3 bytes per instruction, @@ -200,11 +207,11 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF, // We'll need a add after the call. Advantage -= 3; // If we have to realign the stack, we'll also need and sub before - if (CC.second.ExpectedDist % StackAlign) + if (CC.ExpectedDist % StackAlign) Advantage -= 3; // Now, for each push, we save ~3 bytes. For small constants, we actually, // save more (up to 5 bytes), but 3 should be a good approximation. - Advantage += (CC.second.ExpectedDist / 4) * 3; + Advantage += (CC.ExpectedDist / 4) * 3; } } @@ -212,8 +219,9 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF, } bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) { - TII = MF.getSubtarget().getInstrInfo(); - TFL = MF.getSubtarget().getFrameLowering(); + STI = &MF.getSubtarget(); + TII = STI->getInstrInfo(); + TFL = STI->getFrameLowering(); MRI = &MF.getRegInfo(); if (!isLegal(MF)) @@ -223,21 +231,22 @@ bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - ContextMap CallSeqMap; + ContextVector CallSeqVector; for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) if (I->getOpcode() == FrameSetupOpcode) { - CallContext &Context = CallSeqMap[I]; + CallContext Context; collectCallInfo(MF, *BB, I, Context); + CallSeqVector.push_back(Context); } - if (!isProfitable(MF, CallSeqMap)) + if (!isProfitable(MF, CallSeqVector)) return false; - for (auto CC : CallSeqMap) - if (CC.second.UsePush) - Changed |= adjustCallSequence(MF, CC.first, CC.second); + for (auto CC : CallSeqVector) + if (CC.UsePush) + Changed |= adjustCallSequence(MF, CC); return Changed; } @@ -307,13 +316,13 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, // Check that this particular call sequence is amenable to the // transformation. const X86RegisterInfo &RegInfo = *static_cast( - MF.getSubtarget().getRegisterInfo()); - unsigned StackPtr = RegInfo.getStackRegister(); + STI->getRegisterInfo()); unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); // We expect to enter this at the beginning of a call sequence assert(I->getOpcode() == TII->getCallFrameSetupOpcode()); MachineBasicBlock::iterator FrameSetup = I++; + Context.FrameSetup = FrameSetup; // How much do we adjust the stack? This puts an upper bound on // the number of parameters actually passed on it. @@ -338,7 +347,8 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, if (!I->isCopy() || !I->getOperand(0).isReg()) return; Context.SPCopy = I++; - StackPtr = Context.SPCopy->getOperand(0).getReg(); + + unsigned StackPtr = Context.SPCopy->getOperand(0).getReg(); // Scan the call setup sequence for the pattern we're looking for. // We only handle a simple case - a sequence of MOV32mi or MOV32mr @@ -434,22 +444,22 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, } bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, - MachineBasicBlock::iterator I, const CallContext &Context) { // Ok, we can in fact do the transformation for this call. // Do not remove the FrameSetup instruction, but adjust the parameters. // PEI will end up finalizing the handling of this. - MachineBasicBlock::iterator FrameSetup = I; - MachineBasicBlock &MBB = *(I->getParent()); + MachineBasicBlock::iterator FrameSetup = Context.FrameSetup; + MachineBasicBlock &MBB = *(FrameSetup->getParent()); FrameSetup->getOperand(1).setImm(Context.ExpectedDist); - DebugLoc DL = I->getDebugLoc(); + DebugLoc DL = FrameSetup->getDebugLoc(); // Now, iterate through the vector in reverse order, and replace the movs // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to // replace uses. for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) { MachineBasicBlock::iterator MOV = *Context.MovVector[Idx]; MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); + MachineBasicBlock::iterator Push = nullptr; if (MOV->getOpcode() == X86::MOV32mi) { unsigned PushOpcode = X86::PUSHi32; // If the operand is a small (8-bit) immediate, we can use a @@ -461,21 +471,20 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, if (isInt<8>(Val)) PushOpcode = X86::PUSH32i8; } - BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).addOperand(PushOp); + Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)) + .addOperand(PushOp); } else { unsigned int Reg = PushOp.getReg(); // If PUSHrmm is not slow on this target, try to fold the source of the // push into the instruction. - const X86Subtarget &ST = MF.getSubtarget(); - bool SlowPUSHrmm = ST.isAtom() || ST.isSLM(); + bool SlowPUSHrmm = STI->isAtom() || STI->isSLM(); // Check that this is legal to fold. Right now, we're extremely // conservative about that. MachineInstr *DefMov = nullptr; if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) { - MachineInstr *Push = - BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm)); + Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm)); unsigned NumOps = DefMov->getDesc().getNumOperands(); for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) @@ -483,12 +492,19 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, DefMov->eraseFromParent(); } else { - BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r)) + Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r)) .addReg(Reg) .getInstr(); } } + // For debugging, when using SP-based CFA, we need to adjust the CFA + // offset after each push. + // TODO: This is needed only if we require precise CFA. + if (!TFL->hasFP(MF)) + TFL->BuildCFI(MBB, std::next(Push), DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, 4)); + MBB.erase(MOV); } @@ -532,13 +548,10 @@ MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush( DefMI->getParent() != FrameSetup->getParent()) return nullptr; - // Now, make sure everything else up until the ADJCALLSTACK is a sequence - // of MOVs. To be less conservative would require duplicating a lot of the - // logic from PeepholeOptimizer. - // FIXME: A possibly better approach would be to teach the PeepholeOptimizer - // to be smarter about folding into pushes. + // Make sure we don't have any instructions between DefMI and the + // push that make folding the load illegal. for (auto I = DefMI; I != FrameSetup; ++I) - if (I->getOpcode() != X86::MOV32rm) + if (I->isLoadFoldBarrier()) return nullptr; return DefMI; diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h index 0eb2494f1d63..a08160f9feba 100644 --- a/lib/Target/X86/X86CallingConv.h +++ b/lib/Target/X86/X86CallingConv.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_X86_X86CALLINGCONV_H #define LLVM_LIB_TARGET_X86_X86CALLINGCONV_H +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/IR/CallingConv.h" @@ -42,6 +43,64 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &, return false; } +inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure + // not to split i64 and double between a register and stack + static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX}; + static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]); + + SmallVectorImpl &PendingMembers = State.getPendingLocs(); + + // If this is the first part of an double/i64/i128, or if we're already + // in the middle of a split, add to the pending list. If this is not + // the end of the split, return, otherwise go on to process the pending + // list + if (ArgFlags.isSplit() || !PendingMembers.empty()) { + PendingMembers.push_back( + CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); + if (!ArgFlags.isSplitEnd()) + return true; + } + + // If there are no pending members, we are not in the middle of a split, + // so do the usual inreg stuff. + if (PendingMembers.empty()) { + if (unsigned Reg = State.AllocateReg(RegList)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return true; + } + return false; + } + + assert(ArgFlags.isSplitEnd()); + + // We now have the entire original argument in PendingMembers, so decide + // whether to use registers or the stack. + // Per the MCU ABI: + // a) To use registers, we need to have enough of them free to contain + // the entire argument. + // b) We never want to use more than 2 registers for a single argument. + + unsigned FirstFree = State.getFirstUnallocated(RegList); + bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree); + + for (auto &It : PendingMembers) { + if (UseRegs) + It.convertToReg(State.AllocateReg(RegList[FirstFree++])); + else + It.convertToMem(State.AllocateStack(4, 4)); + State.addLoc(It); + } + + PendingMembers.clear(); + + return true; +} + } // End llvm namespace #endif diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 8f88888f5ce3..54d88cbb244e 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -158,6 +158,7 @@ def RetCC_X86_64_C : CallingConv<[ // The X86-64 calling convention always returns FP values in XMM0. CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>, CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>, + CCIfType<[f128], CCAssignToReg<[XMM0, XMM1]>>, // MMX vector types are always returned in XMM0. CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>, @@ -202,6 +203,16 @@ def RetCC_X86_64_AnyReg : CallingConv<[ CCCustom<"CC_X86_AnyReg_Error"> ]>; +// X86-64 HHVM return-value convention. +def RetCC_X86_64_HHVM: CallingConv<[ + // Promote all types to i64 + CCIfType<[i8, i16, i32], CCPromoteToType>, + + // Return: could return in any GP register save RSP and R12. + CCIfType<[i64], CCAssignToReg<[RBX, RBP, RDI, RSI, RDX, RCX, R8, R9, + RAX, R10, R11, R13, R14, R15]>> +]>; + // This is the root return-value convention for the X86-32 backend. def RetCC_X86_32 : CallingConv<[ // If FastCC, use RetCC_X86_32_Fast. @@ -227,6 +238,9 @@ def RetCC_X86_64 : CallingConv<[ CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo>, CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo>, + // Handle HHVM calls. + CCIfCC<"CallingConv::HHVM", CCDelegateTo>, + // Mingw64 and native Win64 use Win64 CC CCIfSubtarget<"isTargetWin64()", CCDelegateTo>, @@ -280,7 +294,7 @@ def CC_X86_64_C : CallingConv<[ CCIfType<[v64i1], CCPromoteToType>, // The first 8 FP/Vector arguments are passed in XMM registers. - CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfType<[f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCIfSubtarget<"hasSSE1()", CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, @@ -305,7 +319,7 @@ def CC_X86_64_C : CallingConv<[ // Long doubles get stack slots whose size and alignment depends on the // subtarget. - CCIfType<[f80], CCAssignToStack<0, 0>>, + CCIfType<[f80, f128], CCAssignToStack<0, 0>>, // Vectors get 16-byte stack slots that are 16-byte aligned. CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, @@ -319,6 +333,23 @@ def CC_X86_64_C : CallingConv<[ CCAssignToStack<64, 64>> ]>; +// Calling convention for X86-64 HHVM. +def CC_X86_64_HHVM : CallingConv<[ + // Use all/any GP registers for args, except RSP. + CCIfType<[i64], CCAssignToReg<[RBX, R12, RBP, R15, + RDI, RSI, RDX, RCX, R8, R9, + RAX, R10, R11, R13, R14]>> +]>; + +// Calling convention for helper functions in HHVM. +def CC_X86_64_HHVM_C : CallingConv<[ + // Pass the first argument in RBP. + CCIfType<[i64], CCAssignToReg<[RBP]>>, + + // Otherwise it's the same as the regular C calling convention. + CCDelegateTo +]>; + // Calling convention used on Win64 def CC_X86_Win64_C : CallingConv<[ // FIXME: Handle byval stuff. @@ -561,6 +592,23 @@ def CC_X86_32_C : CallingConv<[ CCDelegateTo ]>; +def CC_X86_32_MCU : CallingConv<[ + // Handles byval parameters. Note that, like FastCC, we can't rely on + // the delegation to CC_X86_32_Common because that happens after code that + // puts arguments in registers. + CCIfByVal>, + + // Promote i1/i8/i16 arguments to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, + + // If the call is not a vararg call, some arguments may be passed + // in integer registers. + CCIfNotVarArg>>, + + // Otherwise, same as everything else. + CCDelegateTo +]>; + def CC_X86_32_FastCall : CallingConv<[ // Promote i1/i8/i16 arguments to i32. CCIfType<[i1, i8, i16], CCPromoteToType>, @@ -708,18 +756,28 @@ def CC_Intel_OCL_BI : CallingConv<[ CCDelegateTo ]>; +def CC_X86_32_Intr : CallingConv<[ + CCAssignToStack<4, 4> +]>; + +def CC_X86_64_Intr : CallingConv<[ + CCAssignToStack<8, 8> +]>; + //===----------------------------------------------------------------------===// // X86 Root Argument Calling Conventions //===----------------------------------------------------------------------===// // This is the root argument convention for the X86-32 backend. def CC_X86_32 : CallingConv<[ + CCIfSubtarget<"isTargetMCU()", CCDelegateTo>, CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo>, CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo>, CCIfCC<"CallingConv::Fast", CCDelegateTo>, CCIfCC<"CallingConv::GHC", CCDelegateTo>, CCIfCC<"CallingConv::HiPE", CCDelegateTo>, + CCIfCC<"CallingConv::X86_INTR", CCDelegateTo>, // Otherwise, drop to normal X86-32 CC CCDelegateTo @@ -734,6 +792,9 @@ def CC_X86_64 : CallingConv<[ CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo>, CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo>, CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, + CCIfCC<"CallingConv::HHVM", CCDelegateTo>, + CCIfCC<"CallingConv::HHVM_C", CCDelegateTo>, + CCIfCC<"CallingConv::X86_INTR", CCDelegateTo>, // Mingw64 and native Win64 use Win64 CC CCIfSubtarget<"isTargetWin64()", CCDelegateTo>, @@ -764,6 +825,12 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>; def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15, (sequence "XMM%u", 6, 15))>; +// The function used by Darwin to obtain the address of a thread-local variable +// uses rdi to pass a single parameter and rax for the return value. All other +// GPRs are preserved. +def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI, + R8, R9, R10, R11)>; + // All GPRs - except r11 def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI, R8, R9, R10, RSP)>; @@ -778,6 +845,11 @@ def CSR_64_MostRegs : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15, RBP, (sequence "XMM%u", 0, 15))>; +def CSR_32_AllRegs : CalleeSavedRegs<(add EAX, EBX, ECX, EDX, EBP, ESI, + EDI, ESP)>; +def CSR_32_AllRegs_SSE : CalleeSavedRegs<(add CSR_32_AllRegs, + (sequence "XMM%u", 0, 7))>; + def CSR_64_AllRegs : CalleeSavedRegs<(add CSR_64_MostRegs, RAX, RSP, (sequence "XMM%u", 16, 31))>; def CSR_64_AllRegs_AVX : CalleeSavedRegs<(sub (add CSR_64_MostRegs, RAX, RSP, @@ -804,3 +876,6 @@ def CSR_64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add CSR_64, def CSR_64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RDI, RSI, R14, R15, (sequence "ZMM%u", 16, 31), K4, K5, K6, K7)>; + +// Only R12 is preserved for PHP calls in HHVM. +def CSR_64_HHVM : CalleeSavedRegs<(add R12)>; diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm deleted file mode 100644 index 69b4c71651d7..000000000000 --- a/lib/Target/X86/X86CompilationCallback_Win64.asm +++ /dev/null @@ -1,68 +0,0 @@ -;;===-- X86CompilationCallback_Win64.asm - Implement Win64 JIT callback ---=== -;; -;; The LLVM Compiler Infrastructure -;; -;; This file is distributed under the University of Illinois Open Source -;; License. See LICENSE.TXT for details. -;; -;;===----------------------------------------------------------------------=== -;; -;; This file implements the JIT interfaces for the X86 target. -;; -;;===----------------------------------------------------------------------=== - -extrn LLVMX86CompilationCallback2: PROC - -.code -X86CompilationCallback proc - push rbp - - ; Save RSP. - mov rbp, rsp - - ; Save all int arg registers - ; WARNING: We cannot use register spill area - we're generating stubs by hands! - push rcx - push rdx - push r8 - push r9 - - ; Align stack on 16-byte boundary. - and rsp, -16 - - ; Save all XMM arg registers. Also allocate reg spill area. - sub rsp, 96 - movaps [rsp +32], xmm0 - movaps [rsp+16+32], xmm1 - movaps [rsp+32+32], xmm2 - movaps [rsp+48+32], xmm3 - - ; JIT callee - - ; Pass prev frame and return address. - mov rcx, rbp - mov rdx, qword ptr [rbp+8] - call LLVMX86CompilationCallback2 - - ; Restore all XMM arg registers. - movaps xmm3, [rsp+48+32] - movaps xmm2, [rsp+32+32] - movaps xmm1, [rsp+16+32] - movaps xmm0, [rsp +32] - - ; Restore RSP. - mov rsp, rbp - - ; Restore all int arg registers - sub rsp, 32 - pop r9 - pop r8 - pop rdx - pop rcx - - ; Restore RBP. - pop rbp - ret -X86CompilationCallback endp - -End diff --git a/lib/Target/X86/X86ExpandPseudo.cpp b/lib/Target/X86/X86ExpandPseudo.cpp index 6a5a28e546f2..a09d06519376 100644 --- a/lib/Target/X86/X86ExpandPseudo.cpp +++ b/lib/Target/X86/X86ExpandPseudo.cpp @@ -19,9 +19,10 @@ #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" -#include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved. +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved. #include "llvm/IR/GlobalValue.h" using namespace llvm; @@ -141,6 +142,24 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // The EH_RETURN pseudo is really removed during the MC Lowering. return true; } + case X86::IRET: { + // Adjust stack to erase error code + int64_t StackAdj = MBBI->getOperand(0).getImm(); + X86FL->emitSPUpdate(MBB, MBBI, StackAdj, true); + // Replace pseudo with machine iret + BuildMI(MBB, MBBI, DL, + TII->get(STI->is64Bit() ? X86::IRET64 : X86::IRET32)); + MBB.erase(MBBI); + return true; + } + case X86::EH_RESTORE: { + // Restore ESP and EBP, and optionally ESI if required. + bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality( + MBB.getParent()->getFunction()->getPersonalityFn())); + X86FL->restoreWin32EHStackPointers(MBB, MBBI, DL, /*RestoreSP=*/IsSEH); + MBBI->eraseFromParent(); + return true; + } } llvm_unreachable("Previous switch has a fallthrough?"); } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index b4319c8bb04f..de94a138d865 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -298,8 +298,8 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, return false; // Make sure nothing is in the way - BasicBlock::const_iterator Start = I; - BasicBlock::const_iterator End = II; + BasicBlock::const_iterator Start(I); + BasicBlock::const_iterator End(II); for (auto Itr = std::prev(Start); Itr != End; --Itr) { // We only expect extractvalue instructions between the intrinsic and the // instruction to be selected. @@ -433,6 +433,11 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, X86AddressMode &AM, MachineMemOperand *MMO, bool Aligned) { + bool HasSSE2 = Subtarget->hasSSE2(); + bool HasSSE4A = Subtarget->hasSSE4A(); + bool HasAVX = Subtarget->hasAVX(); + bool IsNonTemporal = MMO && MMO->isNonTemporal(); + // Get opcode and regclass of the output for the given store instruction. unsigned Opc = 0; switch (VT.getSimpleVT().SimpleTy) { @@ -449,35 +454,59 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, // FALLTHROUGH, handling i1 as i8. case MVT::i8: Opc = X86::MOV8mr; break; case MVT::i16: Opc = X86::MOV16mr; break; - case MVT::i32: Opc = X86::MOV32mr; break; - case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode. + case MVT::i32: + Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr; + break; + case MVT::i64: + // Must be in x86-64 mode. + Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr; + break; case MVT::f32: - Opc = X86ScalarSSEf32 ? - (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m; + if (X86ScalarSSEf32) { + if (IsNonTemporal && HasSSE4A) + Opc = X86::MOVNTSS; + else + Opc = HasAVX ? X86::VMOVSSmr : X86::MOVSSmr; + } else + Opc = X86::ST_Fp32m; break; case MVT::f64: - Opc = X86ScalarSSEf64 ? - (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m; + if (X86ScalarSSEf32) { + if (IsNonTemporal && HasSSE4A) + Opc = X86::MOVNTSD; + else + Opc = HasAVX ? X86::VMOVSDmr : X86::MOVSDmr; + } else + Opc = X86::ST_Fp64m; break; case MVT::v4f32: - if (Aligned) - Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; - else - Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; + if (Aligned) { + if (IsNonTemporal) + Opc = HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr; + else + Opc = HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr; + } else + Opc = HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr; break; case MVT::v2f64: - if (Aligned) - Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr; - else - Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr; + if (Aligned) { + if (IsNonTemporal) + Opc = HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr; + else + Opc = HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr; + } else + Opc = HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr; break; case MVT::v4i32: case MVT::v2i64: case MVT::v8i16: case MVT::v16i8: - if (Aligned) - Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr; - else + if (Aligned) { + if (IsNonTemporal) + Opc = HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr; + else + Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr; + } else Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr; break; } @@ -1069,12 +1098,11 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { RetRegs.push_back(VA.getLocReg()); } - // The x86-64 ABI for returning structs by value requires that we copy - // the sret argument into %rax for the return. We saved the argument into - // a virtual register in the entry block, so now we copy the value out - // and into %rax. We also do the same with %eax for Win32. - if (F.hasStructRetAttr() && - (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) { + // All x86 ABIs require that for returning structs by value we copy + // the sret argument into %rax/%eax (depending on ABI) for the return. + // We saved the argument into a virtual register in the entry block, + // so now we copy the value out and into %rax/%eax. + if (F.hasStructRetAttr()) { unsigned Reg = X86MFInfo->getSRetReturnReg(); assert(Reg && "SRetReturnReg should have been set in LowerFormalArguments()!"); @@ -1431,17 +1459,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { .addMBB(TrueMBB); } - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TrueMBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); - - // Emits an unconditional branch to the FalseBB, obtains the branch - // weight, and adds it to the successor list. - fastEmitBranch(FalseMBB, DbgLoc); - + finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); return true; } } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { @@ -1472,12 +1490,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc)) .addMBB(TrueMBB); - fastEmitBranch(FalseMBB, DbgLoc); - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TrueMBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); + + finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); return true; } } @@ -1492,12 +1506,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc)) .addMBB(TrueMBB); - fastEmitBranch(FalseMBB, DbgLoc); - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TrueMBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); + finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); return true; } @@ -1511,12 +1520,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { .addReg(OpReg).addImm(1); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1)) .addMBB(TrueMBB); - fastEmitBranch(FalseMBB, DbgLoc); - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TrueMBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); + finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); return true; } @@ -1945,6 +1949,9 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { unsigned ResultReg; if (Subtarget->hasAVX()) { + const TargetRegisterClass *FR32 = &X86::FR32RegClass; + const TargetRegisterClass *VR128 = &X86::VR128RegClass; + // If we have AVX, create 1 blendv instead of 3 logic instructions. // Blendv was introduced with SSE 4.1, but the 2 register form implicitly // uses XMM0 as the selection register. That may need just as many @@ -1955,10 +1962,13 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { unsigned BlendOpcode = (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr; - unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill, + unsigned CmpReg = fastEmitInst_rri(CmpOpcode, FR32, CmpLHSReg, CmpLHSIsKill, CmpRHSReg, CmpRHSIsKill, CC); - ResultReg = fastEmitInst_rrr(BlendOpcode, RC, RHSReg, RHSIsKill, - LHSReg, LHSIsKill, CmpReg, true); + unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill, + LHSReg, LHSIsKill, CmpReg, true); + ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg); } else { unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill, CmpRHSReg, CmpRHSIsKill, CC); @@ -2806,10 +2816,12 @@ static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget, if (CC == CallingConv::Fast || CC == CallingConv::GHC || CC == CallingConv::HiPE) return 0; - if (CS && !CS->paramHasAttr(1, Attribute::StructRet)) - return 0; - if (CS && CS->paramHasAttr(1, Attribute::InReg)) - return 0; + + if (CS) + if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) || + CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU()) + return 0; + return 4; } @@ -2924,7 +2936,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86); // Get a count of how many bytes are to be pushed on the stack. - unsigned NumBytes = CCInfo.getNextStackOffset(); + unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); @@ -3020,8 +3032,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()]; unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getStack(LocMemOffset), MachineMemOperand::MOStore, - ArgVT.getStoreSize(), Alignment); + MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset), + MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); if (Flags.isByVal()) { X86AddressMode SrcAM; SrcAM.Base.Reg = ArgReg; @@ -3252,6 +3264,30 @@ X86FastISel::fastSelectInstruction(const Instruction *I) { updateValueMap(I, Reg); return true; } + case Instruction::BitCast: { + // Select SSE2/AVX bitcasts between 128/256 bit vector types. + if (!Subtarget->hasSSE2()) + return false; + + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(DL, I->getType()); + + if (!SrcVT.isSimple() || !DstVT.isSimple()) + return false; + + if (!SrcVT.is128BitVector() && + !(Subtarget->hasAVX() && SrcVT.is256BitVector())) + return false; + + unsigned Reg = getRegForValue(I->getOperand(0)); + if (Reg == 0) + return false; + + // No instruction is needed for conversion. Reuse the register used by + // the fist operand. + updateValueMap(I, Reg); + return true; + } } return false; @@ -3384,8 +3420,8 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) { TII.get(Opc), ResultReg); addDirectMem(MIB, AddrReg); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, - DL.getPointerSize(), Align); + MachinePointerInfo::getConstantPool(*FuncInfo.MF), + MachineMemOperand::MOLoad, DL.getPointerSize(), Align); MIB->addMemOperand(*FuncInfo.MF, MMO); return ResultReg; } diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp index 5eb4faeedff4..1dd69e8a6a5f 100644 --- a/lib/Target/X86/X86FixupLEAs.cpp +++ b/lib/Target/X86/X86FixupLEAs.cpp @@ -9,6 +9,7 @@ // // This file defines the pass that finds instructions that can be // re-written as LEA instructions in order to reduce pipeline delays. +// When optimizing for size it replaces suitable LEAs with INC or DEC. // //===----------------------------------------------------------------------===// @@ -61,6 +62,11 @@ class FixupLEAPass : public MachineFunctionPass { void processInstructionForSLM(MachineBasicBlock::iterator &I, MachineFunction::iterator MFI); + /// \brief Look for LEAs that add 1 to reg or subtract 1 from reg + /// and convert them to INC or DEC respectively. + bool fixupIncDec(MachineBasicBlock::iterator &I, + MachineFunction::iterator MFI) const; + /// \brief Determine if an instruction references a machine register /// and, if so, whether it reads or writes the register. RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I); @@ -89,6 +95,8 @@ public: private: MachineFunction *MF; const X86InstrInfo *TII; // Machine instruction info. + bool OptIncDec; + bool OptLEA; }; char FixupLEAPass::ID = 0; } @@ -150,7 +158,10 @@ FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); } bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) { MF = &Func; const X86Subtarget &ST = Func.getSubtarget(); - if (!ST.LEAusesAG() && !ST.slowLEA()) + OptIncDec = !ST.slowIncDec() || Func.getFunction()->optForMinSize(); + OptLEA = ST.LEAusesAG() || ST.slowLEA(); + + if (!OptLEA && !OptIncDec) return false; TII = ST.getInstrInfo(); @@ -187,7 +198,7 @@ FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) { static inline bool getPreviousInstr(MachineBasicBlock::iterator &I, MachineFunction::iterator MFI) { if (I == MFI->begin()) { - if (MFI->isPredecessor(MFI)) { + if (MFI->isPredecessor(&*MFI)) { I = --MFI->end(); return true; } else @@ -222,6 +233,60 @@ FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I, return nullptr; } +static inline bool isLEA(const int opcode) { + return opcode == X86::LEA16r || opcode == X86::LEA32r || + opcode == X86::LEA64r || opcode == X86::LEA64_32r; +} + +/// isLEASimpleIncOrDec - Does this LEA have one these forms: +/// lea %reg, 1(%reg) +/// lea %reg, -1(%reg) +static inline bool isLEASimpleIncOrDec(MachineInstr *LEA) { + unsigned SrcReg = LEA->getOperand(1 + X86::AddrBaseReg).getReg(); + unsigned DstReg = LEA->getOperand(0).getReg(); + unsigned AddrDispOp = 1 + X86::AddrDisp; + return SrcReg == DstReg && + LEA->getOperand(1 + X86::AddrIndexReg).getReg() == 0 && + LEA->getOperand(1 + X86::AddrSegmentReg).getReg() == 0 && + LEA->getOperand(AddrDispOp).isImm() && + (LEA->getOperand(AddrDispOp).getImm() == 1 || + LEA->getOperand(AddrDispOp).getImm() == -1); +} + +bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I, + MachineFunction::iterator MFI) const { + MachineInstr *MI = I; + int Opcode = MI->getOpcode(); + if (!isLEA(Opcode)) + return false; + + if (isLEASimpleIncOrDec(MI) && TII->isSafeToClobberEFLAGS(*MFI, I)) { + int NewOpcode; + bool isINC = MI->getOperand(4).getImm() == 1; + switch (Opcode) { + case X86::LEA16r: + NewOpcode = isINC ? X86::INC16r : X86::DEC16r; + break; + case X86::LEA32r: + case X86::LEA64_32r: + NewOpcode = isINC ? X86::INC32r : X86::DEC32r; + break; + case X86::LEA64r: + NewOpcode = isINC ? X86::INC64r : X86::DEC64r; + break; + } + + MachineInstr *NewMI = + BuildMI(*MFI, I, MI->getDebugLoc(), TII->get(NewOpcode)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)); + MFI->erase(I); + I = static_cast(NewMI); + return true; + } + return false; +} + void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I, MachineFunction::iterator MFI) { // Process a load, store, or LEA instruction. @@ -265,8 +330,7 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I, MachineFunction::iterator MFI) { MachineInstr *MI = I; const int opcode = MI->getOpcode(); - if (opcode != X86::LEA16r && opcode != X86::LEA32r && opcode != X86::LEA64r && - opcode != X86::LEA64_32r) + if (!isLEA(opcode)) return; if (MI->getOperand(5).getReg() != 0 || !MI->getOperand(4).isImm() || !TII->isSafeToClobberEFLAGS(*MFI, I)) @@ -280,7 +344,8 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I, return; int addrr_opcode, addri_opcode; switch (opcode) { - default: llvm_unreachable("Unexpected LEA instruction"); + default: + llvm_unreachable("Unexpected LEA instruction"); case X86::LEA16r: addrr_opcode = X86::ADD16rr; addri_opcode = X86::ADD16ri; @@ -330,10 +395,16 @@ bool FixupLEAPass::processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI) { for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) { - if (MF.getSubtarget().isSLM()) - processInstructionForSLM(I, MFI); - else - processInstruction(I, MFI); + if (OptIncDec) + if (fixupIncDec(I, MFI)) + continue; + + if (OptLEA) { + if (MF.getSubtarget().isSLM()) + processInstructionForSLM(I, MFI); + else + processInstruction(I, MFI); + } } return false; } diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 40b9c8a863a3..97bb8ab653a6 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -120,12 +120,10 @@ namespace { // Return a bitmask of FP registers in block's live-in list. static unsigned calcLiveInMask(MachineBasicBlock *MBB) { unsigned Mask = 0; - for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), - E = MBB->livein_end(); I != E; ++I) { - unsigned Reg = *I; - if (Reg < X86::FP0 || Reg > X86::FP6) + for (const auto &LI : MBB->liveins()) { + if (LI.PhysReg < X86::FP0 || LI.PhysReg > X86::FP6) continue; - Mask |= 1 << (Reg - X86::FP0); + Mask |= 1 << (LI.PhysReg - X86::FP0); } return Mask; } @@ -301,8 +299,9 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { bool FPIsUsed = false; static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!"); + const MachineRegisterInfo &MRI = MF.getRegInfo(); for (unsigned i = 0; i <= 6; ++i) - if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) { + if (!MRI.reg_nodbg_empty(X86::FP0 + i)) { FPIsUsed = true; break; } @@ -321,7 +320,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { // Process the function in depth first order so that we process at least one // of the predecessors for every reachable block in the function. SmallPtrSet Processed; - MachineBasicBlock *Entry = MF.begin(); + MachineBasicBlock *Entry = &MF.front(); bool Changed = false; for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed)) @@ -329,9 +328,9 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { // Process any unreachable blocks in arbitrary order now. if (MF.size() != Processed.size()) - for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) - if (Processed.insert(BB).second) - Changed |= processBasicBlock(MF, *BB); + for (MachineBasicBlock &BB : MF) + if (Processed.insert(&BB).second) + Changed |= processBasicBlock(MF, BB); LiveBundles.clear(); @@ -348,13 +347,12 @@ void FPS::bundleCFG(MachineFunction &MF) { LiveBundles.resize(Bundles->getNumBundles()); // Gather the actual live-in masks for all MBBs. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock *MBB = I; - const unsigned Mask = calcLiveInMask(MBB); + for (MachineBasicBlock &MBB : MF) { + const unsigned Mask = calcLiveInMask(&MBB); if (!Mask) continue; // Update MBB ingoing bundle mask. - LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask; + LiveBundles[Bundles->getBundle(MBB.getNumber(), false)].Mask |= Mask; } } @@ -546,17 +544,9 @@ namespace { }; } -#ifndef NDEBUG -static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) { - for (unsigned i = 0; i != NumEntries-1; ++i) - if (!(Table[i] < Table[i+1])) return false; - return true; -} -#endif - -static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) { - const TableEntry *I = std::lower_bound(Table, Table+N, Opcode); - if (I != Table+N && I->from == Opcode) +static int Lookup(ArrayRef Table, unsigned Opcode) { + const TableEntry *I = std::lower_bound(Table.begin(), Table.end(), Opcode); + if (I != Table.end() && I->from == Opcode) return I->to; return -1; } @@ -567,7 +557,7 @@ static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) { #define ASSERT_SORTED(TABLE) \ { static bool TABLE##Checked = false; \ if (!TABLE##Checked) { \ - assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \ + assert(std::is_sorted(std::begin(TABLE), std::end(TABLE)) && \ "All lookup tables must be sorted for efficient access!"); \ TABLE##Checked = true; \ } \ @@ -746,7 +736,7 @@ static const TableEntry OpcodeTable[] = { static unsigned getConcreteOpcode(unsigned Opcode) { ASSERT_SORTED(OpcodeTable); - int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode); + int Opc = Lookup(OpcodeTable, Opcode); assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!"); return Opc; } @@ -797,7 +787,7 @@ void FPS::popStackAfter(MachineBasicBlock::iterator &I) { RegMap[Stack[--StackTop]] = ~0; // Update state // Check to see if there is a popping version of this instruction... - int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode()); + int Opcode = Lookup(PopTable, I->getOpcode()); if (Opcode != -1) { I->setDesc(TII->get(Opcode)); if (Opcode == X86::UCOM_FPPr) @@ -1193,7 +1183,7 @@ void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) { // We decide which form to use based on what is on the top of the stack, and // which operand is killed by this instruction. - const TableEntry *InstTable; + ArrayRef InstTable; bool isForward = TOS == Op0; bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0); if (updateST0) { @@ -1208,8 +1198,7 @@ void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) { InstTable = ReverseSTiTable; } - int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table), - MI->getOpcode()); + int Opcode = Lookup(InstTable, MI->getOpcode()); assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!"); // NotTOS - The register which is not on the top of stack... @@ -1520,31 +1509,6 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) { return; } - case X86::WIN_FTOL_32: - case X86::WIN_FTOL_64: { - // Push the operand into ST0. - MachineOperand &Op = MI->getOperand(0); - assert(Op.isUse() && Op.isReg() && - Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6); - unsigned FPReg = getFPReg(Op); - if (Op.isKill()) - moveToTop(FPReg, Inst); - else - duplicateToTop(FPReg, ScratchFPReg, Inst); - - // Emit the call. This will pop the operand. - BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::CALLpcrel32)) - .addExternalSymbol("_ftol2") - .addReg(X86::ST0, RegState::ImplicitKill) - .addReg(X86::ECX, RegState::ImplicitDefine) - .addReg(X86::EAX, RegState::Define | RegState::Implicit) - .addReg(X86::EDX, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - --StackTop; - - break; - } - case X86::RETQ: case X86::RETL: case X86::RETIL: diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 3a21b57f0157..242d0333ef9a 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -18,25 +18,23 @@ #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" #include using namespace llvm; -// FIXME: completely move here. -extern cl::opt ForceStackAlign; - X86FrameLowering::X86FrameLowering(const X86Subtarget &STI, unsigned StackAlignOverride) : TargetFrameLowering(StackGrowsDown, StackAlignOverride, @@ -80,6 +78,27 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { MF.getInfo()->getHasPushSequences(); } +/// usesTheStack - This function checks if any of the users of EFLAGS +/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has +/// to use the stack, and if we don't adjust the stack we clobber the first +/// frame index. +/// See X86InstrInfo::copyPhysReg. +static bool usesTheStack(const MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Conservativley assume that inline assembly might use the stack. + if (MF.hasInlineAsm()) + return true; + + return any_of(MRI.reg_instructions(X86::EFLAGS), + [](const MachineInstr &RI) { return RI.isCopy(); }); +} + +static bool doesStackUseImplyFP(const MachineFunction &MF) { + bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + return IsWin64Prologue && usesTheStack(MF); +} + /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. @@ -92,8 +111,9 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() || MF.getInfo()->getForceFramePointer() || - MMI.callsUnwindInit() || MMI.callsEHReturn() || - MFI->hasStackMap() || MFI->hasPatchPoint()); + MMI.callsUnwindInit() || MMI.hasEHFunclets() || MMI.callsEHReturn() || + MFI->hasStackMap() || MFI->hasPatchPoint() || + doesStackUseImplyFP(MF)); } static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) { @@ -148,21 +168,14 @@ static unsigned getLEArOpcode(unsigned IsLP64) { /// to this register without worry about clobbering it. static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - const TargetRegisterInfo *TRI, + const X86RegisterInfo *TRI, bool Is64Bit) { const MachineFunction *MF = MBB.getParent(); const Function *F = MF->getFunction(); if (!F || MF->getMMI().callsEHReturn()) return 0; - static const uint16_t CallerSavedRegs32Bit[] = { - X86::EAX, X86::EDX, X86::ECX, 0 - }; - - static const uint16_t CallerSavedRegs64Bit[] = { - X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, - X86::R8, X86::R9, X86::R10, X86::R11, 0 - }; + const TargetRegisterClass &AvailableRegs = *TRI->getGPRsForTailCall(*MF); unsigned Opc = MBBI->getOpcode(); switch (Opc) { @@ -191,10 +204,9 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, Uses.insert(*AI); } - const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; - for (; *CS; ++CS) - if (!Uses.count(*CS)) - return *CS; + for (auto CS : AvailableRegs) + if (!Uses.count(CS) && CS != X86::RIP) + return CS; } } @@ -214,8 +226,12 @@ static bool isEAXLiveIn(MachineFunction &MF) { return false; } -/// Check whether or not the terminators of \p MBB needs to read EFLAGS. -static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) { +/// Check if the flags need to be preserved before the terminators. +/// This would be the case, if the eflags is live-in of the region +/// composed by the terminators or live-out of that region, without +/// being defined by a terminator. +static bool +flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) { for (const MachineInstr &MI : MBB.terminators()) { bool BreakNext = false; for (const MachineOperand &MO : MI.operands()) { @@ -225,15 +241,27 @@ static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) { if (Reg != X86::EFLAGS) continue; - // This terminator needs an eflag that is not defined - // by a previous terminator. + // This terminator needs an eflags that is not defined + // by a previous another terminator: + // EFLAGS is live-in of the region composed by the terminators. if (!MO.isDef()) return true; + // This terminator defines the eflags, i.e., we don't need to preserve it. + // However, we still need to check this specific terminator does not + // read a live-in value. BreakNext = true; } + // We found a definition of the eflags, no need to preserve them. if (BreakNext) - break; + return false; } + + // None of the terminators use or define the eflags. + // Check if they are live-out, that would imply we need to preserve them. + for (const MachineBasicBlock *Succ : MBB.successors()) + if (Succ->isLiveIn(X86::EFLAGS)) + return true; + return false; } @@ -289,6 +317,8 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); if (isSub) MI->setFlag(MachineInstr::FrameSetup); + else + MI->setFlag(MachineInstr::FrameDestroy); Offset -= ThisVal; continue; } @@ -298,6 +328,8 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue); if (isSub) MI.setMIFlag(MachineInstr::FrameSetup); + else + MI.setMIFlag(MachineInstr::FrameDestroy); Offset -= ThisVal; } @@ -312,7 +344,11 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( // is tricky. bool UseLEA; if (!InEpilogue) { - UseLEA = STI.useLeaForSP(); + // Check if inserting the prologue at the beginning + // of MBB would require to use LEA operations. + // We need to use LEA operations if EFLAGS is live in, because + // it means an instruction will read it before it gets defined. + UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS); } else { // If we can use LEA for SP but we shouldn't, check that none // of the terminators uses the eflags. Otherwise we will insert @@ -321,10 +357,10 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( // and is an optimization anyway. UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent()); if (UseLEA && !STI.useLeaForSP()) - UseLEA = terminatorsNeedFlagsAsInput(MBB); + UseLEA = flagsNeedToBePreservedBeforeTheTerminators(MBB); // If that assert breaks, that means we do not do the right thing // in canUseAsEpilogue. - assert((UseLEA || !terminatorsNeedFlagsAsInput(MBB)) && + assert((UseLEA || !flagsNeedToBePreservedBeforeTheTerminators(MBB)) && "We shouldn't have allowed this insertion point"); } @@ -347,30 +383,6 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( return MI; } -/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. -static -void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, uint64_t *NumBytes = nullptr) { - if (MBBI == MBB.begin()) return; - - MachineBasicBlock::iterator PI = std::prev(MBBI); - unsigned Opc = PI->getOpcode(); - if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || - Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || - Opc == X86::LEA32r || Opc == X86::LEA64_32r) && - PI->getOperand(0).getReg() == StackPtr) { - if (NumBytes) - *NumBytes += PI->getOperand(2).getImm(); - MBB.erase(PI); - } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || - Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && - PI->getOperand(0).getReg() == StackPtr) { - if (NumBytes) - *NumBytes -= PI->getOperand(2).getImm(); - MBB.erase(PI); - } -} - int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, bool doMergeWithPrevious) const { @@ -436,27 +448,265 @@ X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, } } -/// usesTheStack - This function checks if any of the users of EFLAGS -/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has -/// to use the stack, and if we don't adjust the stack we clobber the first -/// frame index. -/// See X86InstrInfo::copyPhysReg. -static bool usesTheStack(const MachineFunction &MF) { - const MachineRegisterInfo &MRI = MF.getRegInfo(); - - for (MachineRegisterInfo::reg_instr_iterator - ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end(); - ri != re; ++ri) - if (ri->isCopy()) - return true; - - return false; +MachineInstr *X86FrameLowering::emitStackProbe(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, + bool InProlog) const { + const X86Subtarget &STI = MF.getSubtarget(); + if (STI.isTargetWindowsCoreCLR()) { + if (InProlog) { + return emitStackProbeInlineStub(MF, MBB, MBBI, DL, true); + } else { + return emitStackProbeInline(MF, MBB, MBBI, DL, false); + } + } else { + return emitStackProbeCall(MF, MBB, MBBI, DL, InProlog); + } } -void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - DebugLoc DL) const { +void X86FrameLowering::inlineStackProbe(MachineFunction &MF, + MachineBasicBlock &PrologMBB) const { + const StringRef ChkStkStubSymbol = "__chkstk_stub"; + MachineInstr *ChkStkStub = nullptr; + + for (MachineInstr &MI : PrologMBB) { + if (MI.isCall() && MI.getOperand(0).isSymbol() && + ChkStkStubSymbol == MI.getOperand(0).getSymbolName()) { + ChkStkStub = &MI; + break; + } + } + + if (ChkStkStub != nullptr) { + MachineBasicBlock::iterator MBBI = std::next(ChkStkStub->getIterator()); + assert(std::prev(MBBI).operator==(ChkStkStub) && + "MBBI expected after __chkstk_stub."); + DebugLoc DL = PrologMBB.findDebugLoc(MBBI); + emitStackProbeInline(MF, PrologMBB, MBBI, DL, true); + ChkStkStub->eraseFromParent(); + } +} + +MachineInstr *X86FrameLowering::emitStackProbeInline( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, bool InProlog) const { + const X86Subtarget &STI = MF.getSubtarget(); + assert(STI.is64Bit() && "different expansion needed for 32 bit"); + assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR"); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + const BasicBlock *LLVM_BB = MBB.getBasicBlock(); + + // RAX contains the number of bytes of desired stack adjustment. + // The handling here assumes this value has already been updated so as to + // maintain stack alignment. + // + // We need to exit with RSP modified by this amount and execute suitable + // page touches to notify the OS that we're growing the stack responsibly. + // All stack probing must be done without modifying RSP. + // + // MBB: + // SizeReg = RAX; + // ZeroReg = 0 + // CopyReg = RSP + // Flags, TestReg = CopyReg - SizeReg + // FinalReg = !Flags.Ovf ? TestReg : ZeroReg + // LimitReg = gs magic thread env access + // if FinalReg >= LimitReg goto ContinueMBB + // RoundBB: + // RoundReg = page address of FinalReg + // LoopMBB: + // LoopReg = PHI(LimitReg,ProbeReg) + // ProbeReg = LoopReg - PageSize + // [ProbeReg] = 0 + // if (ProbeReg > RoundReg) goto LoopMBB + // ContinueMBB: + // RSP = RSP - RAX + // [rest of original MBB] + + // Set up the new basic blocks + MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB); + + MachineFunction::iterator MBBIter = std::next(MBB.getIterator()); + MF.insert(MBBIter, RoundMBB); + MF.insert(MBBIter, LoopMBB); + MF.insert(MBBIter, ContinueMBB); + + // Split MBB and move the tail portion down to ContinueMBB. + MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI); + ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end()); + ContinueMBB->transferSuccessorsAndUpdatePHIs(&MBB); + + // Some useful constants + const int64_t ThreadEnvironmentStackLimit = 0x10; + const int64_t PageSize = 0x1000; + const int64_t PageMask = ~(PageSize - 1); + + // Registers we need. For the normal case we use virtual + // registers. For the prolog expansion we use RAX, RCX and RDX. + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterClass *RegClass = &X86::GR64RegClass; + const unsigned SizeReg = InProlog ? (unsigned)X86::RAX + : MRI.createVirtualRegister(RegClass), + ZeroReg = InProlog ? (unsigned)X86::RCX + : MRI.createVirtualRegister(RegClass), + CopyReg = InProlog ? (unsigned)X86::RDX + : MRI.createVirtualRegister(RegClass), + TestReg = InProlog ? (unsigned)X86::RDX + : MRI.createVirtualRegister(RegClass), + FinalReg = InProlog ? (unsigned)X86::RDX + : MRI.createVirtualRegister(RegClass), + RoundedReg = InProlog ? (unsigned)X86::RDX + : MRI.createVirtualRegister(RegClass), + LimitReg = InProlog ? (unsigned)X86::RCX + : MRI.createVirtualRegister(RegClass), + JoinReg = InProlog ? (unsigned)X86::RCX + : MRI.createVirtualRegister(RegClass), + ProbeReg = InProlog ? (unsigned)X86::RCX + : MRI.createVirtualRegister(RegClass); + + // SP-relative offsets where we can save RCX and RDX. + int64_t RCXShadowSlot = 0; + int64_t RDXShadowSlot = 0; + + // If inlining in the prolog, save RCX and RDX. + // Future optimization: don't save or restore if not live in. + if (InProlog) { + // Compute the offsets. We need to account for things already + // pushed onto the stack at this point: return address, frame + // pointer (if used), and callee saves. + X86MachineFunctionInfo *X86FI = MF.getInfo(); + const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize(); + const bool HasFP = hasFP(MF); + RCXShadowSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0); + RDXShadowSlot = RCXShadowSlot + 8; + // Emit the saves. + addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, + RCXShadowSlot) + .addReg(X86::RCX); + addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, + RDXShadowSlot) + .addReg(X86::RDX); + } else { + // Not in the prolog. Copy RAX to a virtual reg. + BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX); + } + + // Add code to MBB to check for overflow and set the new target stack pointer + // to zero if so. + BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg) + .addReg(ZeroReg, RegState::Undef) + .addReg(ZeroReg, RegState::Undef); + BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP); + BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg) + .addReg(CopyReg) + .addReg(SizeReg); + BuildMI(&MBB, DL, TII.get(X86::CMOVB64rr), FinalReg) + .addReg(TestReg) + .addReg(ZeroReg); + + // FinalReg now holds final stack pointer value, or zero if + // allocation would overflow. Compare against the current stack + // limit from the thread environment block. Note this limit is the + // lowest touched page on the stack, not the point at which the OS + // will cause an overflow exception, so this is just an optimization + // to avoid unnecessarily touching pages that are below the current + // SP but already commited to the stack by the OS. + BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg) + .addReg(0) + .addImm(1) + .addReg(0) + .addImm(ThreadEnvironmentStackLimit) + .addReg(X86::GS); + BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg); + // Jump if the desired stack pointer is at or above the stack limit. + BuildMI(&MBB, DL, TII.get(X86::JAE_1)).addMBB(ContinueMBB); + + // Add code to roundMBB to round the final stack pointer to a page boundary. + BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg) + .addReg(FinalReg) + .addImm(PageMask); + BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB); + + // LimitReg now holds the current stack limit, RoundedReg page-rounded + // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page + // and probe until we reach RoundedReg. + if (!InProlog) { + BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg) + .addReg(LimitReg) + .addMBB(RoundMBB) + .addReg(ProbeReg) + .addMBB(LoopMBB); + } + + addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg, + false, -PageSize); + + // Probe by storing a byte onto the stack. + BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi)) + .addReg(ProbeReg) + .addImm(1) + .addReg(0) + .addImm(0) + .addReg(0) + .addImm(0); + BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr)) + .addReg(RoundedReg) + .addReg(ProbeReg); + BuildMI(LoopMBB, DL, TII.get(X86::JNE_1)).addMBB(LoopMBB); + + MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI(); + + // If in prolog, restore RDX and RCX. + if (InProlog) { + addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm), + X86::RCX), + X86::RSP, false, RCXShadowSlot); + addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm), + X86::RDX), + X86::RSP, false, RDXShadowSlot); + } + + // Now that the probing is done, add code to continueMBB to update + // the stack pointer for real. + BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP) + .addReg(X86::RSP) + .addReg(SizeReg); + + // Add the control flow edges we need. + MBB.addSuccessor(ContinueMBB); + MBB.addSuccessor(RoundMBB); + RoundMBB->addSuccessor(LoopMBB); + LoopMBB->addSuccessor(ContinueMBB); + LoopMBB->addSuccessor(LoopMBB); + + // Mark all the instructions added to the prolog as frame setup. + if (InProlog) { + for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) { + BeforeMBBI->setFlag(MachineInstr::FrameSetup); + } + for (MachineInstr &MI : *RoundMBB) { + MI.setFlag(MachineInstr::FrameSetup); + } + for (MachineInstr &MI : *LoopMBB) { + MI.setFlag(MachineInstr::FrameSetup); + } + for (MachineBasicBlock::iterator CMBBI = ContinueMBB->begin(); + CMBBI != ContinueMBBI; ++CMBBI) { + CMBBI->setFlag(MachineInstr::FrameSetup); + } + } + + // Possible TODO: physreg liveness for InProlog case. + + return ContinueMBBI; +} + +MachineInstr *X86FrameLowering::emitStackProbeCall( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, bool InProlog) const { bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; unsigned CallOp; @@ -478,6 +728,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, Symbol = "_chkstk"; MachineInstrBuilder CI; + MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI); // All current stack probes take AX and SP as input, clobber flags, and // preserve all registers. x86_64 probes leave RSP unmodified. @@ -507,6 +758,26 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, .addReg(X86::RSP) .addReg(X86::RAX); } + + if (InProlog) { + // Apply the frame setup flag to all inserted instrs. + for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI) + ExpansionMBBI->setFlag(MachineInstr::FrameSetup); + } + + return MBBI; +} + +MachineInstr *X86FrameLowering::emitStackProbeInlineStub( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, bool InProlog) const { + + assert(InProlog && "ChkStkStub called outside prolog!"); + + BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) + .addExternalSymbol("__chkstk_stub"); + + return MBBI; } static unsigned calculateSetFPREG(uint64_t SPAdjust) { @@ -526,7 +797,7 @@ uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) con const MachineFrameInfo *MFI = MF.getFrameInfo(); uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. unsigned StackAlign = getStackAlignment(); - if (ForceStackAlign) { + if (MF.getFunction()->hasFnAttribute("stackrealign")) { if (MFI->hasCalls()) MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; else if (MaxAlign < SlotSize) @@ -537,15 +808,14 @@ uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) con void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc DL, + DebugLoc DL, unsigned Reg, uint64_t MaxAlign) const { uint64_t Val = -MaxAlign; - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), - StackPtr) - .addReg(StackPtr) - .addImm(Val) - .setMIFlag(MachineInstr::FrameSetup); + unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val); + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg) + .addReg(Reg) + .addImm(Val) + .setMIFlag(MachineInstr::FrameSetup); // The EFLAGS implicit def is dead. MI->getOperand(3).setIsDead(); @@ -646,6 +916,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, X86MachineFunctionInfo *X86FI = MF.getInfo(); uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. + bool IsFunclet = MBB.isEHFuncletEntry(); + EHPersonality Personality = EHPersonality::Unknown; + if (Fn->hasPersonalityFn()) + Personality = classifyEHPersonality(Fn->getPersonalityFn()); + bool FnHasClrFunclet = + MMI.hasEHFunclets() && Personality == EHPersonality::CoreCLR; + bool IsClrFunclet = IsFunclet && FnHasClrFunclet; bool HasFP = hasFP(MF); bool IsWin64CC = STI.isCallingConvWin64(Fn->getCallingConv()); bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); @@ -655,9 +932,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, unsigned FramePtr = TRI->getFrameRegister(MF); const unsigned MachineFramePtr = STI.isTarget64BitILP32() - ? getX86SubSuperRegister(FramePtr, MVT::i64, false) - : FramePtr; + ? getX86SubSuperRegister(FramePtr, 64) : FramePtr; unsigned BasePtr = TRI->getBaseRegister(); + + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. DebugLoc DL; // Add RETADDR move area to callee saved frame size. @@ -723,6 +1002,24 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, uint64_t NumBytes = 0; int stackGrowth = -SlotSize; + // Find the funclet establisher parameter + unsigned Establisher = X86::NoRegister; + if (IsClrFunclet) + Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX; + else if (IsFunclet) + Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX; + + if (IsWin64Prologue && IsFunclet && !IsClrFunclet) { + // Immediately spill establisher into the home slot. + // The runtime cares about this. + // MOV64mr %rdx, 16(%rsp) + unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16) + .addReg(Establisher) + .setMIFlag(MachineInstr::FrameSetup); + MBB.addLiveIn(Establisher); + } + if (HasFP) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; @@ -739,7 +1036,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Get the offset of the stack slot for the EBP register, which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. // Update the frame offset adjustment. - MFI->setOffsetAdjustment(-NumBytes); + if (!IsFunclet) + MFI->setOffsetAdjustment(-NumBytes); + else + assert(MFI->getOffsetAdjustment() == -(int)NumBytes && + "should calculate same local variable offset for funclets"); // Save EBP/RBP into the appropriate stack slot. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) @@ -765,35 +1066,46 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } - if (!IsWin64Prologue) { + if (!IsWin64Prologue && !IsFunclet) { // Update EBP with the new base value. BuildMI(MBB, MBBI, DL, TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); + + if (NeedsDwarfCFI) { + // Mark effective beginning of when frame pointer becomes valid. + // Define the current CFA to use the EBP/RBP register. + unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); + BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaRegister( + nullptr, DwarfFramePtr)); + } } - if (NeedsDwarfCFI) { - // Mark effective beginning of when frame pointer becomes valid. - // Define the current CFA to use the EBP/RBP register. - unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr)); + // Mark the FramePtr as live-in in every block. Don't do this again for + // funclet prologues. + if (!IsFunclet) { + for (MachineBasicBlock &EveryMBB : MF) + EveryMBB.addLiveIn(MachineFramePtr); } - - // Mark the FramePtr as live-in in every block. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - I->addLiveIn(MachineFramePtr); } else { + assert(!IsFunclet && "funclets without FPs not yet implemented"); NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); } + // For EH funclets, only allocate enough space for outgoing calls. Save the + // NumBytes value that we would've used for the parent frame. + unsigned ParentFrameNumBytes = NumBytes; + if (IsFunclet) + NumBytes = getWinEHFuncletFrameSize(MF); + // Skip the callee-saved push instructions. bool PushedRegs = false; int StackOffset = 2 * stackGrowth; while (MBBI != MBB.end() && + MBBI->getFlag(MachineInstr::FrameSetup) && (MBBI->getOpcode() == X86::PUSH32r || MBBI->getOpcode() == X86::PUSH64r)) { PushedRegs = true; @@ -818,9 +1130,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Realign stack after we pushed callee-saved registers (so that we'll be // able to calculate their offsets from the frame pointer). // Don't do this for Win64, it needs to realign the stack after the prologue. - if (!IsWin64Prologue && TRI->needsStackRealignment(MF)) { + if (!IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) { assert(HasFP && "There should be a frame pointer if stack is realigned."); - BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); + BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); } // If there is an SUB32ri of ESP immediately before this instruction, merge @@ -839,7 +1151,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // increments is necessary to ensure that the guard pages used by the OS // virtual memory manager are allocated in correct sequence. uint64_t AlignedNumBytes = NumBytes; - if (IsWin64Prologue && TRI->needsStackRealignment(MF)) + if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign); if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { // Check whether EAX is livein for this function. @@ -876,26 +1188,18 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. // We'll also use 4 already allocated bytes for EAX. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) - .setMIFlag(MachineInstr::FrameSetup); + .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) + .setMIFlag(MachineInstr::FrameSetup); } - // Save a pointer to the MI where we set AX. - MachineBasicBlock::iterator SetRAX = MBBI; - --SetRAX; - // Call __chkstk, __chkstk_ms, or __alloca. - emitStackProbeCall(MF, MBB, MBBI, DL); - - // Apply the frame setup flag to all inserted instrs. - for (; SetRAX != MBBI; ++SetRAX) - SetRAX->setFlag(MachineInstr::FrameSetup); + emitStackProbe(MF, MBB, MBBI, DL, true); if (isEAXAlive) { // Restore EAX - MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), - X86::EAX), - StackPtr, false, NumBytes - 4); + MachineInstr *MI = + addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX), + StackPtr, false, NumBytes - 4); MI->setFlag(MachineInstr::FrameSetup); MBB.insert(MBBI, MI); } @@ -909,19 +1213,72 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); int SEHFrameOffset = 0; + unsigned SPOrEstablisher; + if (IsFunclet) { + if (IsClrFunclet) { + // The establisher parameter passed to a CLR funclet is actually a pointer + // to the (mostly empty) frame of its nearest enclosing funclet; we have + // to find the root function establisher frame by loading the PSPSym from + // the intermediate frame. + unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF); + MachinePointerInfo NoInfo; + MBB.addLiveIn(Establisher); + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher), + Establisher, false, PSPSlotOffset) + .addMemOperand(MF.getMachineMemOperand( + NoInfo, MachineMemOperand::MOLoad, SlotSize, SlotSize)); + ; + // Save the root establisher back into the current funclet's (mostly + // empty) frame, in case a sub-funclet or the GC needs it. + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, + false, PSPSlotOffset) + .addReg(Establisher) + .addMemOperand( + MF.getMachineMemOperand(NoInfo, MachineMemOperand::MOStore | + MachineMemOperand::MOVolatile, + SlotSize, SlotSize)); + } + SPOrEstablisher = Establisher; + } else { + SPOrEstablisher = StackPtr; + } + if (IsWin64Prologue && HasFP) { - SEHFrameOffset = calculateSetFPREG(NumBytes); + // Set RBP to a small fixed offset from RSP. In the funclet case, we base + // this calculation on the incoming establisher, which holds the value of + // RSP from the parent frame at the end of the prologue. + SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes); if (SEHFrameOffset) addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), - StackPtr, false, SEHFrameOffset); + SPOrEstablisher, false, SEHFrameOffset); else - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr).addReg(StackPtr); + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr) + .addReg(SPOrEstablisher); - if (NeedsWinCFI) + // If this is not a funclet, emit the CFI describing our frame pointer. + if (NeedsWinCFI && !IsFunclet) { BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) .addImm(FramePtr) .addImm(SEHFrameOffset) .setMIFlag(MachineInstr::FrameSetup); + if (isAsynchronousEHPersonality(Personality)) + MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset; + } + } else if (IsFunclet && STI.is32Bit()) { + // Reset EBP / ESI to something good for funclets. + MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL); + // If we're a catch funclet, we can be returned to via catchret. Save ESP + // into the registration node so that the runtime will restore it for us. + if (!MBB.isCleanupFuncletEntry()) { + assert(Personality == EHPersonality::MSVC_CXX); + unsigned FrameReg; + int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex; + int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg); + // ESP is the first field, so no extra displacement is needed. + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg, + false, EHRegOffset) + .addReg(X86::ESP); + } } while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) { @@ -932,7 +1289,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, int FI; if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) { if (X86::FR64RegClass.contains(Reg)) { - int Offset = getFrameIndexOffset(MF, FI); + unsigned IgnoredFrameReg; + int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg); Offset += SEHFrameOffset; BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) @@ -948,14 +1306,33 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) .setMIFlag(MachineInstr::FrameSetup); + if (FnHasClrFunclet && !IsFunclet) { + // Save the so-called Initial-SP (i.e. the value of the stack pointer + // immediately after the prolog) into the PSPSlot so that funclets + // and the GC can recover it. + unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF); + auto PSPInfo = MachinePointerInfo::getFixedStack( + MF, MF.getWinEHFuncInfo()->PSPSymFrameIdx); + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false, + PSPSlotOffset) + .addReg(StackPtr) + .addMemOperand(MF.getMachineMemOperand( + PSPInfo, MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, + SlotSize, SlotSize)); + } + // Realign stack after we spilled callee-saved registers (so that we'll be // able to calculate their offsets from the frame pointer). // Win64 requires aligning the stack after the prologue. if (IsWin64Prologue && TRI->needsStackRealignment(MF)) { assert(HasFP && "There should be a frame pointer if stack is realigned."); - BuildStackAlignAND(MBB, MBBI, DL, MaxAlign); + BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign); } + // We already dealt with stack realignment and funclets above. + if (IsFunclet && STI.is32Bit()) + return; + // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer @@ -964,7 +1341,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Update the base pointer with the current stack pointer. unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) - .addReg(StackPtr) + .addReg(SPOrEstablisher) .setMIFlag(MachineInstr::FrameSetup); if (X86FI->getRestoreBasePointer()) { // Stash value of base pointer. Saving RSP instead of EBP shortens @@ -972,18 +1349,21 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true, X86FI->getRestoreBasePointerOffset()) - .addReg(StackPtr) + .addReg(SPOrEstablisher) .setMIFlag(MachineInstr::FrameSetup); } - if (X86FI->getHasSEHFramePtrSave()) { + if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) { // Stash the value of the frame pointer relative to the base pointer for // Win32 EH. This supports Win32 EH, which does the inverse of the above: // it recovers the frame pointer from the base pointer rather than the // other way around. unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; - addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), BasePtr, true, - getFrameIndexOffset(MF, X86FI->getSEHFramePtrSaveIndex())) + unsigned UsedReg; + int Offset = + getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg); + assert(UsedReg == BasePtr); + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset) .addReg(FramePtr) .setMIFlag(MachineInstr::FrameSetup); } @@ -1015,6 +1395,69 @@ bool X86FrameLowering::canUseLEAForSPInEpilogue( return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF); } +static bool isFuncletReturnInstr(MachineInstr *MI) { + switch (MI->getOpcode()) { + case X86::CATCHRET: + case X86::CLEANUPRET: + return true; + default: + return false; + } + llvm_unreachable("impossible"); +} + +// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the +// stack. It holds a pointer to the bottom of the root function frame. The +// establisher frame pointer passed to a nested funclet may point to the +// (mostly empty) frame of its parent funclet, but it will need to find +// the frame of the root function to access locals. To facilitate this, +// every funclet copies the pointer to the bottom of the root function +// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the +// same offset for the PSPSym in the root function frame that's used in the +// funclets' frames allows each funclet to dynamically accept any ancestor +// frame as its establisher argument (the runtime doesn't guarantee the +// immediate parent for some reason lost to history), and also allows the GC, +// which uses the PSPSym for some bookkeeping, to find it in any funclet's +// frame with only a single offset reported for the entire method. +unsigned +X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const { + const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo(); + // getFrameIndexReferenceFromSP has an out ref parameter for the stack + // pointer register; pass a dummy that we ignore + unsigned SPReg; + int Offset = getFrameIndexReferenceFromSP(MF, Info.PSPSymFrameIdx, SPReg); + assert(Offset >= 0); + return static_cast(Offset); +} + +unsigned +X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { + // This is the size of the pushed CSRs. + unsigned CSSize = + MF.getInfo()->getCalleeSavedFrameSize(); + // This is the amount of stack a funclet needs to allocate. + unsigned UsedSize; + EHPersonality Personality = + classifyEHPersonality(MF.getFunction()->getPersonalityFn()); + if (Personality == EHPersonality::CoreCLR) { + // CLR funclets need to hold enough space to include the PSPSym, at the + // same offset from the stack pointer (immediately after the prolog) as it + // resides at in the main function. + UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize; + } else { + // Other funclets just need enough stack for outgoing call arguments. + UsedSize = MF.getFrameInfo()->getMaxCallFrameSize(); + } + // RBP is not included in the callee saved register block. After pushing RBP, + // everything is 16 byte aligned. Everything we allocate before an outgoing + // call must also be 16 byte aligned. + unsigned FrameSizeMinusRBP = + RoundUpToAlignment(CSSize + UsedSize, getStackAlignment()); + // Subtract out the size of the callee saved registers. This is how much stack + // each funclet will allocate. + return FrameSizeMinusRBP - CSSize; +} + void X86FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -1027,12 +1470,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, const bool Is64BitILP32 = STI.isTarget64BitILP32(); unsigned FramePtr = TRI->getFrameRegister(MF); unsigned MachineFramePtr = - Is64BitILP32 ? getX86SubSuperRegister(FramePtr, MVT::i64, false) - : FramePtr; + Is64BitILP32 ? getX86SubSuperRegister(FramePtr, 64) : FramePtr; bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); bool NeedsWinCFI = IsWin64Prologue && MF.getFunction()->needsUnwindTableEntry(); + bool IsFunclet = isFuncletReturnInstr(MBBI); + MachineBasicBlock *TargetMBB = nullptr; // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI->getStackSize(); @@ -1040,7 +1484,27 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; - if (hasFP(MF)) { + if (MBBI->getOpcode() == X86::CATCHRET) { + // SEH shouldn't use catchret. + assert(!isAsynchronousEHPersonality( + classifyEHPersonality(MF.getFunction()->getPersonalityFn())) && + "SEH should not use CATCHRET"); + + NumBytes = getWinEHFuncletFrameSize(MF); + assert(hasFP(MF) && "EH funclets without FP not yet implemented"); + TargetMBB = MBBI->getOperand(0).getMBB(); + + // Pop EBP. + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), + MachineFramePtr) + .setMIFlag(MachineInstr::FrameDestroy); + } else if (MBBI->getOpcode() == X86::CLEANUPRET) { + NumBytes = getWinEHFuncletFrameSize(MF); + assert(hasFP(MF) && "EH funclets without FP not yet implemented"); + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), + MachineFramePtr) + .setMIFlag(MachineInstr::FrameDestroy); + } else if (hasFP(MF)) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; NumBytes = FrameSize - CSSize; @@ -1052,7 +1516,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // Pop EBP. BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr); + TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr) + .setMIFlag(MachineInstr::FrameDestroy); } else { NumBytes = StackSize - CSSize; } @@ -1063,26 +1528,50 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock::iterator PI = std::prev(MBBI); unsigned Opc = PI->getOpcode(); - if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && - !PI->isTerminator()) + if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) && + (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) && + Opc != X86::DBG_VALUE && !PI->isTerminator()) break; --MBBI; } MachineBasicBlock::iterator FirstCSPop = MBBI; + if (TargetMBB) { + // Fill EAX/RAX with the address of the target block. + unsigned ReturnReg = STI.is64Bit() ? X86::RAX : X86::EAX; + if (STI.is64Bit()) { + // LEA64r TargetMBB(%rip), %rax + BuildMI(MBB, FirstCSPop, DL, TII.get(X86::LEA64r), ReturnReg) + .addReg(X86::RIP) + .addImm(0) + .addReg(0) + .addMBB(TargetMBB) + .addReg(0); + } else { + // MOV32ri $TargetMBB, %eax + BuildMI(MBB, FirstCSPop, DL, TII.get(X86::MOV32ri), ReturnReg) + .addMBB(TargetMBB); + } + // Record that we've taken the address of TargetMBB and no longer just + // reference it in a terminator. + TargetMBB->setHasAddressTaken(); + } + if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); // If there is an ADD32ri or SUB32ri of ESP immediately before this // instruction, merge the two instructions. if (NumBytes || MFI->hasVarSizedObjects()) - mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); + NumBytes += mergeSPUpdates(MBB, MBBI, true); // If dynamic alloca is used, then reset esp to point to the last callee-saved // slot before popping them off! Same applies for the case, when stack was - // realigned. - if (TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) { + // realigned. Don't do this if this was a funclet epilogue, since the funclets + // will not do realignment or dynamic stack allocation. + if ((TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) && + !IsFunclet) { if (TRI->needsStackRealignment(MF)) MBBI = FirstCSPop; unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt); @@ -1134,9 +1623,24 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } } -int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { +// NOTE: this only has a subset of the full frame index logic. In +// particular, the FI < 0 and AfterFPPop logic is handled in +// X86RegisterInfo::eliminateFrameIndex, but not here. Possibly +// (probably?) it should be moved into here. +int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // We can't calculate offset from frame pointer if the stack is realigned, + // so enforce usage of stack/base pointer. The base pointer is used when we + // have dynamic allocas in addition to dynamic realignment. + if (TRI->hasBasePointer(MF)) + FrameReg = TRI->getBaseRegister(); + else if (TRI->needsStackRealignment(MF)) + FrameReg = TRI->getStackRegister(); + else + FrameReg = TRI->getFrameRegister(MF); + // Offset will hold the offset from the stack pointer at function entry to the // object. // We need to factor in additional offsets applied during the prologue to the @@ -1207,48 +1711,62 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, return Offset + FPDelta; } -int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg) const { - // We can't calculate offset from frame pointer if the stack is realigned, - // so enforce usage of stack/base pointer. The base pointer is used when we - // have dynamic allocas in addition to dynamic realignment. - if (TRI->hasBasePointer(MF)) - FrameReg = TRI->getBaseRegister(); - else if (TRI->needsStackRealignment(MF)) - FrameReg = TRI->getStackRegister(); - else - FrameReg = TRI->getFrameRegister(MF); - return getFrameIndexOffset(MF, FI); -} - -// Simplified from getFrameIndexOffset keeping only StackPointer cases -int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const { +// Simplified from getFrameIndexReference keeping only StackPointer cases +int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, + int FI, + unsigned &FrameReg) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); // Does not include any dynamic realign. const uint64_t StackSize = MFI->getStackSize(); { #ifndef NDEBUG - // Note: LLVM arranges the stack as: - // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP) - // > "Stack Slots" (<--SP) - // We can always address StackSlots from RSP. We can usually (unless - // needsStackRealignment) address CSRs from RSP, but sometimes need to - // address them from RBP. FixedObjects can be placed anywhere in the stack - // frame depending on their specific requirements (i.e. we can actually - // refer to arguments to the function which are stored in the *callers* - // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs - // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject. + // LLVM arranges the stack as follows: + // ... + // ARG2 + // ARG1 + // RETADDR + // PUSH RBP <-- RBP points here + // PUSH CSRs + // ~~~~~~~ <-- possible stack realignment (non-win64) + // ... + // STACK OBJECTS + // ... <-- RSP after prologue points here + // ~~~~~~~ <-- possible stack realignment (win64) + // + // if (hasVarSizedObjects()): + // ... <-- "base pointer" (ESI/RBX) points here + // DYNAMIC ALLOCAS + // ... <-- RSP points here + // + // Case 1: In the simple case of no stack realignment and no dynamic + // allocas, both "fixed" stack objects (arguments and CSRs) are addressable + // with fixed offsets from RSP. + // + // Case 2: In the case of stack realignment with no dynamic allocas, fixed + // stack objects are addressed with RBP and regular stack objects with RSP. + // + // Case 3: In the case of dynamic allocas and stack realignment, RSP is used + // to address stack arguments for outgoing calls and nothing else. The "base + // pointer" points to local variables, and RBP points to fixed objects. + // + // In cases 2 and 3, we can only answer for non-fixed stack objects, and the + // answer we give is relative to the SP after the prologue, and not the + // SP in the middle of the function. - assert(!TRI->hasBasePointer(MF) && "we don't handle this case"); + assert((!MFI->isFixedObjectIndex(FI) || !TRI->needsStackRealignment(MF) || + STI.isTargetWin64()) && + "offset from fixed object to SP is not static"); - // We don't handle tail calls, and shouldn't be seeing them - // either. + // We don't handle tail calls, and shouldn't be seeing them either. int TailCallReturnAddrDelta = MF.getInfo()->getTCReturnAddrDelta(); assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!"); #endif } + // Fill in FrameReg output argument. + FrameReg = TRI->getStackRegister(); + // This is how the math works out: // // %rsp grows (i.e. gets lower) left to right. Each box below is @@ -1280,15 +1798,6 @@ int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int F return Offset + StackSize; } -// Simplified from getFrameIndexReference keeping only StackPointer cases -int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, - int FI, - unsigned &FrameReg) const { - assert(!TRI->hasBasePointer(MF) && "we don't handle this case"); - - FrameReg = TRI->getStackRegister(); - return getFrameIndexOffsetFromSP(MF, FI); -} bool X86FrameLowering::assignCalleeSavedSpillSlots( MachineFunction &MF, const TargetRegisterInfo *TRI, @@ -1358,6 +1867,11 @@ bool X86FrameLowering::spillCalleeSavedRegisters( const TargetRegisterInfo *TRI) const { DebugLoc DL = MBB.findDebugLoc(MI); + // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI + // for us, and there are no XMM CSRs on Win32. + if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows()) + return true; + // Push GPRs. It increases frame size. unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { @@ -1399,6 +1913,22 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (CSI.empty()) return false; + if (isFuncletReturnInstr(MI) && STI.isOSWindows()) { + // Don't restore CSRs in 32-bit EH funclets. Matches + // spillCalleeSavedRegisters. + if (STI.is32Bit()) + return true; + // Don't restore CSRs before an SEH catchret. SEH except blocks do not form + // funclets. emitEpilogue transforms these to normal jumps. + if (MI->getOpcode() == X86::CATCHRET) { + const Function *Func = MBB.getParent()->getFunction(); + bool IsSEH = isAsynchronousEHPersonality( + classifyEHPersonality(Func->getPersonalityFn())); + if (IsSEH) + return true; + } + } + DebugLoc DL = MBB.findDebugLoc(MI); // Reload XMMs from stack frame. @@ -1420,7 +1950,8 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, !X86::GR32RegClass.contains(Reg)) continue; - BuildMI(MBB, MI, DL, TII.get(Opc), Reg); + BuildMI(MBB, MI, DL, TII.get(Opc), Reg) + .setMIFlag(MachineInstr::FrameDestroy); } return true; } @@ -1450,8 +1981,16 @@ void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, } // Spill the BasePtr if it's used. - if (TRI->hasBasePointer(MF)) + if (TRI->hasBasePointer(MF)) { SavedRegs.set(TRI->getBaseRegister()); + + // Allocate a spill slot for EBP if we have a base pointer and EH funclets. + if (MF.getMMI().hasEHFunclets()) { + int FI = MFI->CreateSpillStackObject(SlotSize, SlotSize); + X86FI->setHasSEHFramePtrSave(true); + X86FI->setSEHFramePtrSaveIndex(FI); + } + } } static bool @@ -1545,11 +2084,9 @@ void X86FrameLowering::adjustForSegmentedStacks( // The MOV R10, RAX needs to be in a different block, since the RET we emit in // allocMBB needs to be last (terminating) instruction. - for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(), - e = PrologueMBB.livein_end(); - i != e; i++) { - allocMBB->addLiveIn(*i); - checkMBB->addLiveIn(*i); + for (const auto &LI : PrologueMBB.liveins()) { + allocMBB->addLiveIn(LI); + checkMBB->addLiveIn(LI); } if (IsNested) @@ -1682,8 +2219,6 @@ void X86FrameLowering::adjustForSegmentedStacks( .addImm(StackSize); BuildMI(allocMBB, DL, TII.get(MOVri), Reg11) .addImm(X86FI->getArgumentStackSize()); - MF.getRegInfo().setPhysRegUsed(Reg10); - MF.getRegInfo().setPhysRegUsed(Reg11); } else { BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) .addImm(X86FI->getArgumentStackSize()); @@ -1821,11 +2356,9 @@ void X86FrameLowering::adjustForHiPEPrologue( MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); - for (MachineBasicBlock::livein_iterator I = PrologueMBB.livein_begin(), - E = PrologueMBB.livein_end(); - I != E; I++) { - stackCheckMBB->addLiveIn(*I); - incStackMBB->addLiveIn(*I); + for (const auto &LI : PrologueMBB.liveins()) { + stackCheckMBB->addLiveIn(LI); + incStackMBB->addLiveIn(LI); } MF.push_front(incStackMBB); @@ -1870,16 +2403,84 @@ void X86FrameLowering::adjustForHiPEPrologue( .addReg(ScratchReg), PReg, false, SPLimitOffset); BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB); - stackCheckMBB->addSuccessor(&PrologueMBB, 99); - stackCheckMBB->addSuccessor(incStackMBB, 1); - incStackMBB->addSuccessor(&PrologueMBB, 99); - incStackMBB->addSuccessor(incStackMBB, 1); + stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100}); + stackCheckMBB->addSuccessor(incStackMBB, {1, 100}); + incStackMBB->addSuccessor(&PrologueMBB, {99, 100}); + incStackMBB->addSuccessor(incStackMBB, {1, 100}); } #ifdef XDEBUG MF.verify(); #endif } +bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, int Offset) const { + + if (Offset <= 0) + return false; + + if (Offset % SlotSize) + return false; + + int NumPops = Offset / SlotSize; + // This is only worth it if we have at most 2 pops. + if (NumPops != 1 && NumPops != 2) + return false; + + // Handle only the trivial case where the adjustment directly follows + // a call. This is the most common one, anyway. + if (MBBI == MBB.begin()) + return false; + MachineBasicBlock::iterator Prev = std::prev(MBBI); + if (!Prev->isCall() || !Prev->getOperand(1).isRegMask()) + return false; + + unsigned Regs[2]; + unsigned FoundRegs = 0; + + auto RegMask = Prev->getOperand(1); + + auto &RegClass = + Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass; + // Try to find up to NumPops free registers. + for (auto Candidate : RegClass) { + + // Poor man's liveness: + // Since we're immediately after a call, any register that is clobbered + // by the call and not defined by it can be considered dead. + if (!RegMask.clobbersPhysReg(Candidate)) + continue; + + bool IsDef = false; + for (const MachineOperand &MO : Prev->implicit_operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg() == Candidate) { + IsDef = true; + break; + } + } + + if (IsDef) + continue; + + Regs[FoundRegs++] = Candidate; + if (FoundRegs == (unsigned)NumPops) + break; + } + + if (FoundRegs == 0) + return false; + + // If we found only one free register, but need two, reuse the same one twice. + while (FoundRegs < (unsigned)NumPops) + Regs[FoundRegs++] = Regs[0]; + + for (int i = 0; i < NumPops; ++i) + BuildMI(MBB, MBBI, DL, + TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]); + + return true; +} + void X86FrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { @@ -1895,8 +2496,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // If the stack pointer can be changed after prologue, turn the // adjcallstackup instruction into a 'sub ESP, ' and the // adjcallstackdown instruction into 'add ESP, ' - if (Amount == 0) - return; // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next @@ -1904,15 +2503,68 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, unsigned StackAlign = getStackAlignment(); Amount = RoundUpToAlignment(Amount, StackAlign); + MachineModuleInfo &MMI = MF.getMMI(); + const Function *Fn = MF.getFunction(); + bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + bool DwarfCFI = !WindowsCFI && + (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); + + // If we have any exception handlers in this function, and we adjust + // the SP before calls, we may need to indicate this to the unwinder + // using GNU_ARGS_SIZE. Note that this may be necessary even when + // Amount == 0, because the preceding function may have set a non-0 + // GNU_ARGS_SIZE. + // TODO: We don't need to reset this between subsequent functions, + // if it didn't change. + bool HasDwarfEHHandlers = !WindowsCFI && + !MF.getMMI().getLandingPads().empty(); + + if (HasDwarfEHHandlers && !isDestroy && + MF.getInfo()->getHasPushSequences()) + BuildCFI(MBB, I, DL, + MCCFIInstruction::createGnuArgsSize(nullptr, Amount)); + + if (Amount == 0) + return; + // Factor out the amount that gets handled inside the sequence // (Pushes of argument for frame setup, callee pops for frame destroy) Amount -= InternalAmt; + // TODO: This is needed only if we require precise CFA. + // If this is a callee-pop calling convention, emit a CFA adjust for + // the amount the callee popped. + if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF)) + BuildCFI(MBB, I, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt)); + if (Amount) { // Add Amount to SP to destroy a frame, and subtract to setup. int Offset = isDestroy ? Amount : -Amount; - BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false); + + if (!(Fn->optForMinSize() && + adjustStackWithPops(MBB, I, DL, Offset))) + BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false); } + + if (DwarfCFI && !hasFP(MF)) { + // If we don't have FP, but need to generate unwind information, + // we need to set the correct CFA offset after the stack adjustment. + // How much we adjust the CFA offset depends on whether we're emitting + // CFI only for EH purposes or for debugging. EH only requires the CFA + // offset to be correct at each call site, while for debugging we want + // it to be more precise. + int CFAOffset = Amount; + // TODO: When not using precise CFA, we also need to adjust for the + // InternalAmt here. + + if (CFAOffset) { + CFAOffset = isDestroy ? -CFAOffset : CFAOffset; + BuildCFI(MBB, I, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset)); + } + } + return; } @@ -1933,12 +2585,136 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { assert(MBB.getParent() && "Block is not attached to a function!"); + // Win64 has strict requirements in terms of epilogue and we are + // not taking a chance at messing with them. + // I.e., unless this block is already an exit block, we can't use + // it as an epilogue. + if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock()) + return false; + if (canUseLEAForSPInEpilogue(*MBB.getParent())) return true; // If we cannot use LEA to adjust SP, we may need to use ADD, which - // clobbers the EFLAGS. Check that none of the terminators reads the - // EFLAGS, and if one uses it, conservatively assume this is not + // clobbers the EFLAGS. Check that we do not need to preserve it, + // otherwise, conservatively assume this is not // safe to insert the epilogue here. - return !terminatorsNeedFlagsAsInput(MBB); + return !flagsNeedToBePreservedBeforeTheTerminators(MBB); +} + +bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { + // If we may need to emit frameless compact unwind information, give + // up as this is currently broken: PR25614. + return MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF); +} + +MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + DebugLoc DL, bool RestoreSP) const { + assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env"); + assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32"); + assert(STI.is32Bit() && !Uses64BitFramePtr && + "restoring EBP/ESI on non-32-bit target"); + + MachineFunction &MF = *MBB.getParent(); + unsigned FramePtr = TRI->getFrameRegister(MF); + unsigned BasePtr = TRI->getBaseRegister(); + WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // FIXME: Don't set FrameSetup flag in catchret case. + + int FI = FuncInfo.EHRegNodeFrameIndex; + int EHRegSize = MFI->getObjectSize(FI); + + if (RestoreSP) { + // MOV32rm -EHRegSize(%ebp), %esp + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP), + X86::EBP, true, -EHRegSize) + .setMIFlag(MachineInstr::FrameSetup); + } + + unsigned UsedReg; + int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg); + int EndOffset = -EHRegOffset - EHRegSize; + FuncInfo.EHRegNodeEndOffset = EndOffset; + + if (UsedReg == FramePtr) { + // ADD $offset, %ebp + unsigned ADDri = getADDriOpcode(false, EndOffset); + BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr) + .addReg(FramePtr) + .addImm(EndOffset) + .setMIFlag(MachineInstr::FrameSetup) + ->getOperand(3) + .setIsDead(); + assert(EndOffset >= 0 && + "end of registration object above normal EBP position!"); + } else if (UsedReg == BasePtr) { + // LEA offset(%ebp), %esi + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr), + FramePtr, false, EndOffset) + .setMIFlag(MachineInstr::FrameSetup); + // MOV32rm SavedEBPOffset(%esi), %ebp + assert(X86FI->getHasSEHFramePtrSave()); + int Offset = + getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg); + assert(UsedReg == BasePtr); + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr), + UsedReg, true, Offset) + .setMIFlag(MachineInstr::FrameSetup); + } else { + llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr"); + } + return MBBI; +} + +unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const { + // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue. + unsigned Offset = 16; + // RBP is immediately pushed. + Offset += SlotSize; + // All callee-saved registers are then pushed. + Offset += MF.getInfo()->getCalleeSavedFrameSize(); + // Every funclet allocates enough stack space for the largest outgoing call. + Offset += getWinEHFuncletFrameSize(MF); + return Offset; +} + +void X86FrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + // If this function isn't doing Win64-style C++ EH, we don't need to do + // anything. + const Function *Fn = MF.getFunction(); + if (!STI.is64Bit() || !MF.getMMI().hasEHFunclets() || + classifyEHPersonality(Fn->getPersonalityFn()) != EHPersonality::MSVC_CXX) + return; + + // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset + // relative to RSP after the prologue. Find the offset of the last fixed + // object, so that we can allocate a slot immediately following it. If there + // were no fixed objects, use offset -SlotSize, which is immediately after the + // return address. Fixed objects have negative frame indices. + MachineFrameInfo *MFI = MF.getFrameInfo(); + int64_t MinFixedObjOffset = -SlotSize; + for (int I = MFI->getObjectIndexBegin(); I < 0; ++I) + MinFixedObjOffset = std::min(MinFixedObjOffset, MFI->getObjectOffset(I)); + + int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize; + int UnwindHelpFI = + MFI->CreateFixedObject(SlotSize, UnwindHelpOffset, /*Immutable=*/false); + MF.getWinEHFuncInfo()->UnwindHelpFrameIdx = UnwindHelpFI; + + // Store -2 into UnwindHelp on function entry. We have to scan forwards past + // other frame setup instructions. + MachineBasicBlock &MBB = MF.front(); + auto MBBI = MBB.begin(); + while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) + ++MBBI; + + DebugLoc DL = MBB.findDebugLoc(MBBI); + addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)), + UnwindHelpFI) + .addImm(-2); } diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index 495cfcd1c3f7..3ab41b4a5789 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -47,11 +47,17 @@ public: unsigned StackPtr; - /// Emit a call to the target's stack probe function. This is required for all + /// Emit target stack probe code. This is required for all /// large stack allocations on Windows. The caller is required to materialize - /// the number of bytes to probe in RAX/EAX. - void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, DebugLoc DL) const; + /// the number of bytes to probe in RAX/EAX. Returns instruction just + /// after the expansion. + MachineInstr *emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, + bool InProlog) const; + + /// Replace a StackProbe inline-stub with the actual probe code inline. + void inlineStackProbe(MachineFunction &MF, + MachineBasicBlock &PrologMBB) const override; void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -91,11 +97,9 @@ public: bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override; bool needsFrameIndexResolution(const MachineFunction &MF) const override; - int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; - int getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const; int getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; @@ -103,6 +107,11 @@ public: MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override; + + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const override; + /// Check the instruction before/after the passed instruction. If /// it is an ADD/SUB/LEA instruction it is deleted argument and the /// stack adjustment is returned as a positive value for ADD/LEA and @@ -125,7 +134,9 @@ public: /// \p MBB will be correctly handled by the target. bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override; -private: + /// Returns true if the target will correctly handle shrink wrapping. + bool enableShrinkWrapping(const MachineFunction &MF) const override; + /// convertArgMovsToPushes - This method tries to convert a call sequence /// that uses sub and mov instructions to put the argument onto the stack /// into a series of pushes. @@ -135,22 +146,56 @@ private: MachineBasicBlock::iterator I, uint64_t Amount) const; - uint64_t calculateMaxStackAlign(const MachineFunction &MF) const; - /// Wraps up getting a CFI index and building a MachineInstr for it. void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, MCCFIInstruction CFIInst) const; + /// Sets up EBP and optionally ESI based on the incoming EBP value. Only + /// needed for 32-bit. Used in funclet prologues and at catchret destinations. + MachineBasicBlock::iterator + restoreWin32EHStackPointers(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, + bool RestoreSP = false) const; + +private: + uint64_t calculateMaxStackAlign(const MachineFunction &MF) const; + + /// Emit target stack probe as a call to a helper function + MachineInstr *emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, bool InProlog) const; + + /// Emit target stack probe as an inline sequence. + MachineInstr *emitStackProbeInline(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, bool InProlog) const; + + /// Emit a stub to later inline the target stack probe. + MachineInstr *emitStackProbeInlineStub(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, bool InProlog) const; + /// Aligns the stack pointer by ANDing it with -MaxAlign. void BuildStackAlignAND(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, - uint64_t MaxAlign) const; + unsigned Reg, uint64_t MaxAlign) const; + + /// Make small positive stack adjustments using POPs. + bool adjustStackWithPops(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, + int Offset) const; /// Adjusts the stack pointer using LEA, SUB, or ADD. MachineInstrBuilder BuildStackAdjustment(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, int64_t Offset, bool InEpilogue) const; + + unsigned getPSPSlotOffsetFromSP(const MachineFunction &MF) const; + + unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index d5351d25d6ed..4414e478b99b 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -46,9 +46,8 @@ STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); //===----------------------------------------------------------------------===// namespace { - /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses - /// SDValue's instead of register numbers for the leaves of the matched - /// tree. + /// This corresponds to X86AddressMode, but uses SDValue's instead of register + /// numbers for the leaves of the matched tree. struct X86ISelAddressMode { enum { RegBase, @@ -87,8 +86,7 @@ namespace { IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr; } - /// isRIPRelative - Return true if this addressing mode is already RIP - /// relative. + /// Return true if this addressing mode is already RIP-relative. bool isRIPRelative() const { if (BaseType != RegBase) return false; if (RegisterSDNode *RegNode = @@ -147,21 +145,25 @@ namespace { namespace { //===--------------------------------------------------------------------===// - /// ISel - X86 specific code to select X86 machine instructions for + /// ISel - X86-specific code to select X86 machine instructions for /// SelectionDAG operations. /// class X86DAGToDAGISel final : public SelectionDAGISel { - /// Subtarget - Keep a pointer to the X86Subtarget around so that we can + /// Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; - /// OptForSize - If true, selector should try to optimize for code size - /// instead of performance. + /// If true, selector should try to optimize for code size instead of + /// performance. bool OptForSize; + /// If true, selector should try to optimize for minimum code size. + bool OptForMinSize; + public: explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), OptForSize(false) {} + : SelectionDAGISel(tm, OptLevel), OptForSize(false), + OptForMinSize(false) {} const char *getPassName() const override { return "X86 DAG->DAG Instruction Selection"; @@ -184,8 +186,7 @@ namespace { return isInt<8>(cast(N)->getSExtValue()); } - // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit - // sign extended field. + // True if the 64-bit immediate fits in a 32-bit sign-extended field. inline bool i64immSExt32(SDNode *N) const { uint64_t v = cast(N)->getZExtValue(); return (int64_t)v == (int32_t)v; @@ -196,50 +197,50 @@ namespace { private: SDNode *Select(SDNode *N) override; - SDNode *SelectGather(SDNode *N, unsigned Opc); - SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT); + SDNode *selectGather(SDNode *N, unsigned Opc); + SDNode *selectAtomicLoadArith(SDNode *Node, MVT NVT); - bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); - bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); - bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); - bool MatchAddress(SDValue N, X86ISelAddressMode &AM); - bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, + bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); + bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); + bool matchWrapper(SDValue N, X86ISelAddressMode &AM); + bool matchAddress(SDValue N, X86ISelAddressMode &AM); + bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth); + bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth); - bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); - bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, + bool matchAddressBase(SDValue N, X86ISelAddressMode &AM); + bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - bool SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, + bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - bool SelectMOV64Imm32(SDValue N, SDValue &Imm); - bool SelectLEAAddr(SDValue N, SDValue &Base, + bool selectMOV64Imm32(SDValue N, SDValue &Imm); + bool selectLEAAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - bool SelectLEA64_32Addr(SDValue N, SDValue &Base, + bool selectLEA64_32Addr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - bool SelectTLSADDRAddr(SDValue N, SDValue &Base, + bool selectTLSADDRAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - bool SelectScalarSSELoad(SDNode *Root, SDValue N, + bool selectScalarSSELoad(SDNode *Root, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, SDValue &NodeWithChain); - bool TryFoldLoad(SDNode *P, SDValue N, + bool tryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for - /// inline asm expressions. + /// Implement addressing mode selection for inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) override; - void EmitSpecialCodeForMain(); + void emitSpecialCodeForMain(); inline void getAddressOperands(X86ISelAddressMode &AM, SDLoc DL, SDValue &Base, SDValue &Scale, @@ -252,7 +253,7 @@ namespace { : AM.Base_Reg; Scale = getI8Imm(AM.Scale, DL); Index = AM.IndexReg; - // These are 32-bit even in 64-bit mode since RIP relative offset + // These are 32-bit even in 64-bit mode since RIP-relative offset // is 32-bit. if (AM.GV) Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(), @@ -283,32 +284,105 @@ namespace { Segment = CurDAG->getRegister(0, MVT::i32); } - /// getI8Imm - Return a target constant with the specified value, of type - /// i8. + // Utility function to determine whether we should avoid selecting + // immediate forms of instructions for better code size or not. + // At a high level, we'd like to avoid such instructions when + // we have similar constants used within the same basic block + // that can be kept in a register. + // + bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const { + uint32_t UseCount = 0; + + // Do not want to hoist if we're not optimizing for size. + // TODO: We'd like to remove this restriction. + // See the comment in X86InstrInfo.td for more info. + if (!OptForSize) + return false; + + // Walk all the users of the immediate. + for (SDNode::use_iterator UI = N->use_begin(), + UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) { + + SDNode *User = *UI; + + // This user is already selected. Count it as a legitimate use and + // move on. + if (User->isMachineOpcode()) { + UseCount++; + continue; + } + + // We want to count stores of immediates as real uses. + if (User->getOpcode() == ISD::STORE && + User->getOperand(1).getNode() == N) { + UseCount++; + continue; + } + + // We don't currently match users that have > 2 operands (except + // for stores, which are handled above) + // Those instruction won't match in ISEL, for now, and would + // be counted incorrectly. + // This may change in the future as we add additional instruction + // types. + if (User->getNumOperands() != 2) + continue; + + // Immediates that are used for offsets as part of stack + // manipulation should be left alone. These are typically + // used to indicate SP offsets for argument passing and + // will get pulled into stores/pushes (implicitly). + if (User->getOpcode() == X86ISD::ADD || + User->getOpcode() == ISD::ADD || + User->getOpcode() == X86ISD::SUB || + User->getOpcode() == ISD::SUB) { + + // Find the other operand of the add/sub. + SDValue OtherOp = User->getOperand(0); + if (OtherOp.getNode() == N) + OtherOp = User->getOperand(1); + + // Don't count if the other operand is SP. + RegisterSDNode *RegNode; + if (OtherOp->getOpcode() == ISD::CopyFromReg && + (RegNode = dyn_cast_or_null( + OtherOp->getOperand(1).getNode()))) + if ((RegNode->getReg() == X86::ESP) || + (RegNode->getReg() == X86::RSP)) + continue; + } + + // ... otherwise, count this and move on. + UseCount++; + } + + // If we have more than 1 use, then recommend for hoisting. + return (UseCount > 1); + } + + /// Return a target constant with the specified value of type i8. inline SDValue getI8Imm(unsigned Imm, SDLoc DL) { return CurDAG->getTargetConstant(Imm, DL, MVT::i8); } - /// getI32Imm - Return a target constant with the specified value, of type - /// i32. + /// Return a target constant with the specified value, of type i32. inline SDValue getI32Imm(unsigned Imm, SDLoc DL) { return CurDAG->getTargetConstant(Imm, DL, MVT::i32); } - /// getGlobalBaseReg - Return an SDNode that returns the value of - /// the global base register. Output instructions required to - /// initialize the global base register, if necessary. - /// + /// Return an SDNode that returns the value of the global base register. + /// Output instructions required to initialize the global base register, + /// if necessary. SDNode *getGlobalBaseReg(); - /// getTargetMachine - Return a reference to the TargetMachine, casted - /// to the target-specific type. + /// Return a reference to the TargetMachine, casted to the target-specific + /// type. const X86TargetMachine &getTargetMachine() const { return static_cast(TM); } - /// getInstrInfo - Return a reference to the TargetInstrInfo, casted - /// to the target-specific type. + /// Return a reference to the TargetInstrInfo, casted to the target-specific + /// type. const X86InstrInfo *getInstrInfo() const { return Subtarget->getInstrInfo(); } @@ -386,9 +460,9 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { return true; } -/// MoveBelowCallOrigChain - Replace the original chain operand of the call with +/// Replace the original chain operand of the call with /// load's chain operand and move load below the call's chain operand. -static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, +static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, SDValue Call, SDValue OrigChain) { SmallVector Ops; SDValue Chain = OrigChain.getOperand(0); @@ -418,7 +492,7 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, CurDAG->UpdateNodeOperands(Call.getNode(), Ops); } -/// isCalleeLoad - Return true if call address is a load and it can be +/// Return true if call address is a load and it can be /// moved below CALLSEQ_START and the chains leading up to the call. /// Return the CALLSEQ_START by reference as a second output. /// In the case of a tail call, there isn't a callseq node between the call @@ -461,12 +535,14 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { } void X86DAGToDAGISel::PreprocessISelDAG() { - // OptForSize is used in pattern predicates that isel is matching. - OptForSize = MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + // OptFor[Min]Size are used in pattern predicates that isel is matching. + OptForSize = MF->getFunction()->optForSize(); + OptForMinSize = MF->getFunction()->optForMinSize(); + assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize"); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { - SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. if (OptLevel != CodeGenOpt::None && // Only does this when target favors doesn't favor register indirect @@ -500,7 +576,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { SDValue Load = N->getOperand(1); if (!isCalleeLoad(Load, Chain, HasCallSeq)) continue; - MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain); + moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain); ++NumLoadMoved; continue; } @@ -577,9 +653,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() { } -/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in -/// the main function. -void X86DAGToDAGISel::EmitSpecialCodeForMain() { +/// Emit any code that needs to be executed only in the main function. +void X86DAGToDAGISel::emitSpecialCodeForMain() { if (Subtarget->isTargetCygMing()) { TargetLowering::ArgListTy Args; auto &DL = CurDAG->getDataLayout(); @@ -599,7 +674,7 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() { // If this is main, emit special code for main. if (const Function *Fn = MF->getFunction()) if (Fn->hasExternalLinkage() && Fn->getName() == "main") - EmitSpecialCodeForMain(); + emitSpecialCodeForMain(); } static bool isDispSafeForFrameIndex(int64_t Val) { @@ -612,7 +687,7 @@ static bool isDispSafeForFrameIndex(int64_t Val) { return isInt<31>(Val); } -bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset, +bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM) { // Cannot combine ExternalSymbol displacements with integer offsets. if (Offset != 0 && (AM.ES || AM.MCSym)) @@ -634,7 +709,7 @@ bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset, } -bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ +bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ SDValue Address = N->getOperand(1); // load gs:0 -> GS segment register. @@ -658,11 +733,10 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ return true; } -/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes -/// into an addressing mode. These wrap things that will resolve down into a -/// symbol reference. If no match is possible, this returns true, otherwise it -/// returns false. -bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { +/// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing +/// mode. These wrap things that will resolve down into a symbol reference. +/// If no match is possible, this returns true, otherwise it returns false. +bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) { // If the addressing mode already has a symbol as the displacement, we can // never match another symbol. if (AM.hasSymbolicDisplacement()) @@ -685,7 +759,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { X86ISelAddressMode Backup = AM; AM.GV = G->getGlobal(); AM.SymbolFlags = G->getTargetFlags(); - if (FoldOffsetIntoAddress(G->getOffset(), AM)) { + if (foldOffsetIntoAddress(G->getOffset(), AM)) { AM = Backup; return true; } @@ -694,7 +768,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { AM.CP = CP->getConstVal(); AM.Align = CP->getAlignment(); AM.SymbolFlags = CP->getTargetFlags(); - if (FoldOffsetIntoAddress(CP->getOffset(), AM)) { + if (foldOffsetIntoAddress(CP->getOffset(), AM)) { AM = Backup; return true; } @@ -710,7 +784,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { X86ISelAddressMode Backup = AM; AM.BlockAddr = BA->getBlockAddress(); AM.SymbolFlags = BA->getTargetFlags(); - if (FoldOffsetIntoAddress(BA->getOffset(), AM)) { + if (foldOffsetIntoAddress(BA->getOffset(), AM)) { AM = Backup; return true; } @@ -758,11 +832,10 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { return true; } -/// MatchAddress - Add the specified node to the specified addressing mode, -/// returning true if it cannot be done. This just pattern matches for the -/// addressing mode. -bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { - if (MatchAddressRecursively(N, AM, 0)) +/// Add the specified node to the specified addressing mode, returning true if +/// it cannot be done. This just pattern matches for the addressing mode. +bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) { + if (matchAddressRecursively(N, AM, 0)) return true; // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has @@ -790,15 +863,49 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { return false; } +bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM, + unsigned Depth) { + // Add an artificial use to this node so that we can keep track of + // it if it gets CSE'd with a different node. + HandleSDNode Handle(N); + + X86ISelAddressMode Backup = AM; + if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) && + !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)) + return false; + AM = Backup; + + // Try again after commuting the operands. + if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) && + !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1)) + return false; + AM = Backup; + + // If we couldn't fold both operands into the address at the same time, + // see if we can just put each operand into a register and fold at least + // the add. + if (AM.BaseType == X86ISelAddressMode::RegBase && + !AM.Base_Reg.getNode() && + !AM.IndexReg.getNode()) { + N = Handle.getValue(); + AM.Base_Reg = N.getOperand(0); + AM.IndexReg = N.getOperand(1); + AM.Scale = 1; + return false; + } + N = Handle.getValue(); + return true; +} + // Insert a node into the DAG at least before the Pos node's position. This // will reposition the node as needed, and will assign it a node ID that is <= // the Pos node's ID. Note that this does *not* preserve the uniqueness of node // IDs! The selection DAG must no longer depend on their uniqueness when this // is used. -static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { +static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { if (N.getNode()->getNodeId() == -1 || N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) { - DAG.RepositionNode(Pos.getNode(), N.getNode()); + DAG.RepositionNode(Pos.getNode()->getIterator(), N.getNode()); N.getNode()->setNodeId(Pos.getNode()->getNodeId()); } } @@ -807,7 +914,7 @@ static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { // safe. This allows us to convert the shift and and into an h-register // extract and a scaled index. Returns false if the simplification is // performed. -static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, +static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM) { @@ -835,12 +942,12 @@ static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, // these nodes. We continually insert before 'N' in sequence as this is // essentially a pre-flattened and pre-sorted sequence of nodes. There is no // hierarchy left to express. - InsertDAGNode(DAG, N, Eight); - InsertDAGNode(DAG, N, Srl); - InsertDAGNode(DAG, N, NewMask); - InsertDAGNode(DAG, N, And); - InsertDAGNode(DAG, N, ShlCount); - InsertDAGNode(DAG, N, Shl); + insertDAGNode(DAG, N, Eight); + insertDAGNode(DAG, N, Srl); + insertDAGNode(DAG, N, NewMask); + insertDAGNode(DAG, N, And); + insertDAGNode(DAG, N, ShlCount); + insertDAGNode(DAG, N, Shl); DAG.ReplaceAllUsesWith(N, Shl); AM.IndexReg = And; AM.Scale = (1 << ScaleLog); @@ -850,7 +957,7 @@ static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this // allows us to fold the shift into this addressing mode. Returns false if the // transform succeeded. -static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, +static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM) { @@ -880,9 +987,9 @@ static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, // these nodes. We continually insert before 'N' in sequence as this is // essentially a pre-flattened and pre-sorted sequence of nodes. There is no // hierarchy left to express. - InsertDAGNode(DAG, N, NewMask); - InsertDAGNode(DAG, N, NewAnd); - InsertDAGNode(DAG, N, NewShift); + insertDAGNode(DAG, N, NewMask); + insertDAGNode(DAG, N, NewAnd); + insertDAGNode(DAG, N, NewShift); DAG.ReplaceAllUsesWith(N, NewShift); AM.Scale = 1 << ShiftAmt; @@ -917,7 +1024,7 @@ static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, // Note that this function assumes the mask is provided as a mask *after* the // value is shifted. The input chain may or may not match that, but computing // such a mask is trivial. -static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, +static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, uint64_t Mask, SDValue Shift, SDValue X, X86ISelAddressMode &AM) { @@ -973,7 +1080,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, assert(X.getValueType() != VT); // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND. SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X); - InsertDAGNode(DAG, N, NewX); + insertDAGNode(DAG, N, NewX); X = NewX; } SDLoc DL(N); @@ -987,10 +1094,10 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, // these nodes. We continually insert before 'N' in sequence as this is // essentially a pre-flattened and pre-sorted sequence of nodes. There is no // hierarchy left to express. - InsertDAGNode(DAG, N, NewSRLAmt); - InsertDAGNode(DAG, N, NewSRL); - InsertDAGNode(DAG, N, NewSHLAmt); - InsertDAGNode(DAG, N, NewSHL); + insertDAGNode(DAG, N, NewSRLAmt); + insertDAGNode(DAG, N, NewSRL); + insertDAGNode(DAG, N, NewSHLAmt); + insertDAGNode(DAG, N, NewSHL); DAG.ReplaceAllUsesWith(N, NewSHL); AM.Scale = 1 << AMShiftAmt; @@ -998,7 +1105,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, return false; } -bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, +bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth) { SDLoc dl(N); DEBUG({ @@ -1007,7 +1114,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, }); // Limit recursion. if (Depth > 5) - return MatchAddressBase(N, AM); + return matchAddressBase(N, AM); // If this is already a %rip relative address, we can only merge immediates // into it. Instead of handling this in every case, we handle it here. @@ -1020,7 +1127,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, return true; if (ConstantSDNode *Cst = dyn_cast(N)) - if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM)) + if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM)) return false; return true; } @@ -1038,19 +1145,19 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, } case ISD::Constant: { uint64_t Val = cast(N)->getSExtValue(); - if (!FoldOffsetIntoAddress(Val, AM)) + if (!foldOffsetIntoAddress(Val, AM)) return false; break; } case X86ISD::Wrapper: case X86ISD::WrapperRIP: - if (!MatchWrapper(N, AM)) + if (!matchWrapper(N, AM)) return false; break; case ISD::LOAD: - if (!MatchLoadInAddress(cast(N), AM)) + if (!matchLoadInAddress(cast(N), AM)) return false; break; @@ -1087,7 +1194,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, ConstantSDNode *AddVal = cast(ShVal.getNode()->getOperand(1)); uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val; - if (!FoldOffsetIntoAddress(Disp, AM)) + if (!foldOffsetIntoAddress(Disp, AM)) return false; } @@ -1119,7 +1226,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // Try to fold the mask and shift into the scale, and return false if we // succeed. - if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM)) + if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM)) return false; break; } @@ -1153,7 +1260,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, ConstantSDNode *AddVal = cast(MulVal.getNode()->getOperand(1)); uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue(); - if (FoldOffsetIntoAddress(Disp, AM)) + if (foldOffsetIntoAddress(Disp, AM)) Reg = N.getNode()->getOperand(0); } else { Reg = N.getNode()->getOperand(0); @@ -1179,7 +1286,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // Test if the LHS of the sub can be folded. X86ISelAddressMode Backup = AM; - if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { + if (matchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { AM = Backup; break; } @@ -1227,56 +1334,26 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM.Scale = 1; // Insert the new nodes into the topological ordering. - InsertDAGNode(*CurDAG, N, Zero); - InsertDAGNode(*CurDAG, N, Neg); + insertDAGNode(*CurDAG, N, Zero); + insertDAGNode(*CurDAG, N, Neg); return false; } - case ISD::ADD: { - // Add an artificial use to this node so that we can keep track of - // it if it gets CSE'd with a different node. - HandleSDNode Handle(N); - - X86ISelAddressMode Backup = AM; - if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && - !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)) + case ISD::ADD: + if (!matchAdd(N, AM, Depth)) return false; - AM = Backup; - - // Try again after commuting the operands. - if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&& - !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1)) - return false; - AM = Backup; - - // If we couldn't fold both operands into the address at the same time, - // see if we can just put each operand into a register and fold at least - // the add. - if (AM.BaseType == X86ISelAddressMode::RegBase && - !AM.Base_Reg.getNode() && - !AM.IndexReg.getNode()) { - N = Handle.getValue(); - AM.Base_Reg = N.getOperand(0); - AM.IndexReg = N.getOperand(1); - AM.Scale = 1; - return false; - } - N = Handle.getValue(); break; - } case ISD::OR: - // Handle "X | C" as "X + C" iff X is known to have C bits clear. - if (CurDAG->isBaseWithConstantOffset(N)) { - X86ISelAddressMode Backup = AM; - ConstantSDNode *CN = cast(N.getOperand(1)); - - // Start with the LHS as an addr mode. - if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && - !FoldOffsetIntoAddress(CN->getSExtValue(), AM)) - return false; - AM = Backup; - } + // We want to look through a transform in InstCombine and DAGCombiner that + // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'. + // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3)) + // An 'lea' can then be used to match the shift (multiply) and add: + // and $1, %esi + // lea (%rsi, %rdi, 8), %rax + if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) && + !matchAdd(N, AM, Depth)) + return false; break; case ISD::AND: { @@ -1299,27 +1376,27 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, uint64_t Mask = N.getConstantOperandVal(1); // Try to fold the mask and shift into an extract and scale. - if (!FoldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM)) + if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM)) return false; // Try to fold the mask and shift directly into the scale. - if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM)) + if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM)) return false; // Try to swap the mask and shift to place shifts which can be done as // a scale on the outside of the mask. - if (!FoldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM)) + if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM)) return false; break; } } - return MatchAddressBase(N, AM); + return matchAddressBase(N, AM); } -/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the +/// Helper for MatchAddress. Add the specified node to the /// specified addressing mode without any further recursion. -bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { +bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) { // Is the base register already occupied? if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { // If so, check to see if the scale index register is set. @@ -1339,7 +1416,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { return false; } -bool X86DAGToDAGISel::SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, +bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { @@ -1362,7 +1439,7 @@ bool X86DAGToDAGISel::SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, // If Base is 0, the whole address is in index and the Scale is 1 if (isa(Base)) { - assert(dyn_cast(Base)->isNullValue() && + assert(cast(Base)->isNullValue() && "Unexpected base in gather/scatter"); Scale = getI8Imm(1, DL); Base = CurDAG->getRegister(0, MVT::i32); @@ -1375,14 +1452,14 @@ bool X86DAGToDAGISel::SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, return true; } -/// SelectAddr - returns true if it is able pattern match an addressing mode. +/// Returns true if it is able to pattern match an addressing mode. /// It returns the operands which make up the maximal addressing mode it can /// match by reference. /// /// Parent is the parent node of the addr operand that is being matched. It /// is always a load, store, atomic node, or null. It is only null when /// checking memory operands for inline asm nodes. -bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, +bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; @@ -1404,7 +1481,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); } - if (MatchAddress(N, AM)) + if (matchAddress(N, AM)) return false; MVT VT = N.getSimpleValueType(); @@ -1420,14 +1497,14 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, return true; } -/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to -/// match a load whose top elements are either undef or zeros. The load flavor -/// is derived from the type of N, which is either v4f32 or v2f64. +/// Match a scalar SSE load. In particular, we want to match a load whose top +/// elements are either undef or zeros. The load flavor is derived from the +/// type of N, which is either v4f32 or v2f64. /// /// We also return: /// PatternChainNode: this is the matched node that has a chain input and /// output. -bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, +bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, @@ -1439,7 +1516,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { LoadSDNode *LD = cast(PatternNodeWithChain); - if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; return true; } @@ -1457,7 +1534,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { // Okay, this is a zero extending load. Fold it. LoadSDNode *LD = cast(N.getOperand(0).getOperand(0)); - if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; PatternNodeWithChain = SDValue(LD, 0); return true; @@ -1466,7 +1543,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, } -bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) { +bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) { if (const ConstantSDNode *CN = dyn_cast(N)) { uint64_t ImmVal = CN->getZExtValue(); if ((uint32_t)ImmVal != (uint64_t)ImmVal) @@ -1495,10 +1572,10 @@ bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) { return TM.getCodeModel() == CodeModel::Small; } -bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base, +bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { - if (!SelectLEAAddr(N, Base, Scale, Index, Disp, Segment)) + if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment)) return false; SDLoc DL(N); @@ -1533,9 +1610,9 @@ bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base, return true; } -/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing +/// Calls SelectAddr and determines if the maximal addressing /// mode it matches can be cost effectively emitted as an LEA instruction. -bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, +bool X86DAGToDAGISel::selectLEAAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { @@ -1546,7 +1623,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, SDValue Copy = AM.Segment; SDValue T = CurDAG->getRegister(0, MVT::i32); AM.Segment = T; - if (MatchAddress(N, AM)) + if (matchAddress(N, AM)) return false; assert (T == AM.Segment); AM.Segment = Copy; @@ -1572,13 +1649,12 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, Complexity++; // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA - // to a LEA. This is determined with some expermentation but is by no means + // to a LEA. This is determined with some experimentation but is by no means // optimal (especially for code size consideration). LEA is nice because of // its three-address nature. Tweak the cost function again when we can run // convertToThreeAddress() at register allocation time. if (AM.hasSymbolicDisplacement()) { - // For X86-64, we should always use lea to materialize RIP relative - // addresses. + // For X86-64, always use LEA to materialize RIP-relative addresses. if (Subtarget->is64Bit()) Complexity = 4; else @@ -1596,8 +1672,8 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, return true; } -/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. -bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base, +/// This is only run on TargetGlobalTLSAddress nodes. +bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); @@ -1621,7 +1697,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base, } -bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, +bool X86DAGToDAGISel::tryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { @@ -1630,14 +1706,13 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, !IsLegalToFold(N, P, P, OptLevel)) return false; - return SelectAddr(N.getNode(), + return selectAddr(N.getNode(), N.getOperand(1), Base, Scale, Index, Disp, Segment); } -/// getGlobalBaseReg - Return an SDNode that returns the value of -/// the global base register. Output instructions required to -/// initialize the global base register, if necessary. -/// +/// Return an SDNode that returns the value of the global base register. +/// Output instructions required to initialize the global base register, +/// if necessary. SDNode *X86DAGToDAGISel::getGlobalBaseReg() { unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); auto &DL = MF->getDataLayout(); @@ -1828,7 +1903,7 @@ static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, return Val; } -SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) { +SDNode *X86DAGToDAGISel::selectAtomicLoadArith(SDNode *Node, MVT NVT) { if (Node->hasAnyUseOfValue(0)) return nullptr; @@ -1841,7 +1916,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) { SDValue Ptr = Node->getOperand(1); SDValue Val = Node->getOperand(2); SDValue Base, Scale, Index, Disp, Segment; - if (!SelectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment)) + if (!selectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment)) return nullptr; // Which index into the table. @@ -1933,9 +2008,9 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) { return CurDAG->getMergeValues(RetVals, dl).getNode(); } -/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has -/// any uses which require the SF or OF bits to be accurate. -static bool HasNoSignedComparisonUses(SDNode *N) { +/// Test whether the given X86ISD::CMP node has any uses which require the SF +/// or OF bits to be accurate. +static bool hasNoSignedComparisonUses(SDNode *N) { // Examine each user of the node. for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; ++UI) { @@ -1995,9 +2070,8 @@ static bool HasNoSignedComparisonUses(SDNode *N) { return true; } -/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode -/// is suitable for doing the {load; increment or decrement; store} to modify -/// transformation. +/// Check whether or not the chain ending in StoreNode is suitable for doing +/// the {load; increment or decrement; store} to modify transformation. static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, SDValue StoredVal, SelectionDAG *CurDAG, LoadSDNode* &LoadNode, SDValue &InputChain) { @@ -2081,8 +2155,8 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, return true; } -/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory -/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC. +/// Get the appropriate X86 opcode for an in-memory increment or decrement. +/// Opc should be X86ISD::DEC or X86ISD::INC. static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) { if (Opc == X86ISD::DEC) { if (LdVT == MVT::i64) return X86::DEC64m; @@ -2099,9 +2173,8 @@ static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) { llvm_unreachable("unrecognized size for LdVT"); } -/// SelectGather - Customized ISel for GATHER operations. -/// -SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { +/// Customized ISel for GATHER operations. +SDNode *X86DAGToDAGISel::selectGather(SDNode *Node, unsigned Opc) { // Operands of Gather: VSrc, Base, VIdx, VMask, Scale SDValue Chain = Node->getOperand(0); SDValue VSrc = Node->getOperand(2); @@ -2148,6 +2221,27 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { switch (Opcode) { default: break; + case ISD::BRIND: { + if (Subtarget->isTargetNaCl()) + // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We + // leave the instruction alone. + break; + if (Subtarget->isTarget64BitILP32()) { + // Converts a 32-bit register to a 64-bit, zero-extended version of + // it. This is needed because x86-64 can do many things, but jmp %r32 + // ain't one of them. + const SDValue &Target = Node->getOperand(1); + assert(Target.getSimpleValueType() == llvm::MVT::i32); + SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64)); + SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other, + Node->getOperand(0), ZextTarget); + ReplaceUses(SDValue(Node, 0), Brind); + SelectCode(ZextTarget.getNode()); + SelectCode(Brind.getNode()); + return nullptr; + } + break; + } case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); switch (IntNo) { @@ -2190,7 +2284,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break; case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break; } - SDNode *RetVal = SelectGather(Node, Opc); + SDNode *RetVal = selectGather(Node, Opc); if (RetVal) // We already called ReplaceUses inside SelectGather. return nullptr; @@ -2217,7 +2311,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case ISD::ATOMIC_LOAD_AND: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_ADD: { - SDNode *RetVal = SelectAtomicLoadArith(Node, NVT); + SDNode *RetVal = selectAtomicLoadArith(Node, NVT); if (RetVal) return RetVal; break; @@ -2404,10 +2498,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); // Multiply is commmutative. if (!foldedLoad) { - foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); if (foldedLoad) std::swap(N0, N1); } @@ -2549,7 +2643,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); bool signBitIsZero = CurDAG->SignBitIsZero(N0); SDValue InFlag; @@ -2557,7 +2651,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Special case for div8, just use a move with zero extension to AX to // clear the upper 8 bits (AH). SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; - if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { + if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; Move = SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32, @@ -2692,7 +2786,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue N1 = Node->getOperand(1); if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && - HasNoSignedComparisonUses(Node)) + hasNoSignedComparisonUses(Node)) N0 = N0.getOperand(0); // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to @@ -2709,7 +2803,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // For example, convert "testl %eax, $8" to "testb %al, $8" if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 && (!(C->getZExtValue() & 0x80) || - HasNoSignedComparisonUses(Node))) { + hasNoSignedComparisonUses(Node))) { SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Reg = N0.getNode()->getOperand(0); @@ -2743,7 +2837,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // For example, "testl %eax, $2048" to "testb %ah, $8". if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 && (!(C->getZExtValue() & 0x8000) || - HasNoSignedComparisonUses(Node))) { + hasNoSignedComparisonUses(Node))) { // Shift the immediate right by 8 bits. SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8, dl, MVT::i8); @@ -2781,7 +2875,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 && N0.getValueType() != MVT::i16 && (!(C->getZExtValue() & 0x8000) || - HasNoSignedComparisonUses(Node))) { + hasNoSignedComparisonUses(Node))) { SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, MVT::i16); SDValue Reg = N0.getNode()->getOperand(0); @@ -2804,7 +2898,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 && N0.getValueType() == MVT::i64 && (!(C->getZExtValue() & 0x80000000) || - HasNoSignedComparisonUses(Node))) { + hasNoSignedComparisonUses(Node))) { SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, MVT::i32); SDValue Reg = N0.getNode()->getOperand(0); @@ -2854,7 +2948,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { break; SDValue Base, Scale, Index, Disp, Segment; - if (!SelectAddr(LoadNode, LoadNode->getBasePtr(), + if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp, Segment)) break; @@ -2903,7 +2997,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, case InlineAsm::Constraint_v: // not offsetable ?? case InlineAsm::Constraint_m: // memory case InlineAsm::Constraint_X: - if (!SelectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4)) + if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4)) return true; break; } @@ -2916,9 +3010,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, return false; } -/// createX86ISelDag - This pass converts a legalized DAG into a -/// X86-specific DAG, ready for instruction scheduling. -/// +/// This pass converts a legalized DAG into a X86-specific DAG, +/// ready for instruction scheduling. FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel) { return new X86DAGToDAGISel(TM, OptLevel); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0f29b514146c..0927c2f4fa50 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -67,19 +68,14 @@ static cl::opt ExperimentalVectorWideningLegalization( "rather than promotion."), cl::Hidden); -// Forward declarations. -static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, - SDValue V2); - X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, const X86Subtarget &STI) : TargetLowering(TM), Subtarget(&STI) { X86ScalarSSEf64 = Subtarget->hasSSE2(); X86ScalarSSEf32 = Subtarget->hasSSE1(); - TD = TM.getDataLayout(); + MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize()); // Set up the TargetLowering object. - static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; // X86 is weird. It always uses i8 for shift amounts and setcc results. setBooleanContents(ZeroOrOneBooleanContent); @@ -118,13 +114,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall); - - // The _ftol2 runtime function has an unusual calling conv, which - // is modeled by a special pseudo-instruction. - setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr); - setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr); - setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr); - setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr); } if (Subtarget->isTargetDarwin()) { @@ -175,14 +164,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); if (Subtarget->is64Bit()) { - setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); + if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) + // f32/f64 are legal, f80 is custom. + setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); + else + setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); } else if (!Subtarget->useSoftFloat()) { // We have an algorithm for SSE2->double, and we turn this into a // 64-bit FILD followed by conditional FADD for other targets. setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); // We have an algorithm for SSE2, and we turn this into a 64-bit - // FILD for other targets. + // FILD or VCVTUSI2SS/SD for other targets. setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); } @@ -206,23 +199,29 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote); } - // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 - // are Legal, f80 is custom lowered. - setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); - setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); - // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have // this operation. setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); - if (X86ScalarSSEf32) { - setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); - // f32 and f64 cases are Legal, f80 case is not - setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); + if (!Subtarget->useSoftFloat()) { + // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 + // are Legal, f80 is custom lowered. + setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); + setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); + + if (X86ScalarSSEf32) { + setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); + // f32 and f64 cases are Legal, f80 case is not + setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); + } else { + setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); + setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); + } } else { - setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); - setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); + setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); + setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand); + setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand); } // Handle FP_TO_UINT by promoting the destination to a larger signed @@ -232,8 +231,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); if (Subtarget->is64Bit()) { - setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); + if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) { + // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80. + setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); + setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); + } else { + setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); + setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); + } } else if (!Subtarget->useSoftFloat()) { // Since AVX is a superset of SSE3, only check for SSE here. if (Subtarget->hasSSE1() && !Subtarget->hasSSE3()) @@ -242,14 +247,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // the optimal thing for SSE vs. the default expansion in the legalizer. setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); else + // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom. // With SSE3 we can use fisttpll to convert to a signed i64; without // SSE, we're stuck with a fistpll. setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); - } - if (isTargetFTOL()) { - // Use the _ftol2 runtime function, which has a pseudo-instruction - // to handle its weird calling convention. setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); } @@ -274,8 +276,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // (low) operations are left as Legal, as there are single-result // instructions for this in x86. Using the two-result multiply instructions // when both high and low results are needed must be arranged by dagcombine. - for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) { - MVT VT = IntVTs[i]; + for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::SDIV, VT, Expand); @@ -295,6 +296,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::BR_CC , MVT::f32, Expand); setOperationAction(ISD::BR_CC , MVT::f64, Expand); setOperationAction(ISD::BR_CC , MVT::f80, Expand); + setOperationAction(ISD::BR_CC , MVT::f128, Expand); setOperationAction(ISD::BR_CC , MVT::i8, Expand); setOperationAction(ISD::BR_CC , MVT::i16, Expand); setOperationAction(ISD::BR_CC , MVT::i32, Expand); @@ -302,6 +304,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SELECT_CC , MVT::f32, Expand); setOperationAction(ISD::SELECT_CC , MVT::f64, Expand); setOperationAction(ISD::SELECT_CC , MVT::f80, Expand); + setOperationAction(ISD::SELECT_CC , MVT::f128, Expand); setOperationAction(ISD::SELECT_CC , MVT::i8, Expand); setOperationAction(ISD::SELECT_CC , MVT::i16, Expand); setOperationAction(ISD::SELECT_CC , MVT::i32, Expand); @@ -312,7 +315,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); - setOperationAction(ISD::FREM , MVT::f32 , Expand); + + if (Subtarget->is32Bit() && Subtarget->isTargetKnownWindowsMSVC()) { + // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)` + // is. We should promote the value to 64-bits to solve this. + // This is what the CRT headers do - `fmodf` is an inline header + // function casting to f64 and calling `fmod`. + setOperationAction(ISD::FREM , MVT::f32 , Promote); + } else { + setOperationAction(ISD::FREM , MVT::f32 , Expand); + } + setOperationAction(ISD::FREM , MVT::f64 , Expand); setOperationAction(ISD::FREM , MVT::f80 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); @@ -404,15 +417,21 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SELECT , MVT::f32 , Custom); setOperationAction(ISD::SELECT , MVT::f64 , Custom); setOperationAction(ISD::SELECT , MVT::f80 , Custom); + setOperationAction(ISD::SELECT , MVT::f128 , Custom); setOperationAction(ISD::SETCC , MVT::i8 , Custom); setOperationAction(ISD::SETCC , MVT::i16 , Custom); setOperationAction(ISD::SETCC , MVT::i32 , Custom); setOperationAction(ISD::SETCC , MVT::f32 , Custom); setOperationAction(ISD::SETCC , MVT::f64 , Custom); setOperationAction(ISD::SETCC , MVT::f80 , Custom); + setOperationAction(ISD::SETCC , MVT::f128 , Custom); + setOperationAction(ISD::SETCCE , MVT::i8 , Custom); + setOperationAction(ISD::SETCCE , MVT::i16 , Custom); + setOperationAction(ISD::SETCCE , MVT::i32 , Custom); if (Subtarget->is64Bit()) { setOperationAction(ISD::SELECT , MVT::i64 , Custom); setOperationAction(ISD::SETCC , MVT::i64 , Custom); + setOperationAction(ISD::SETCCE , MVT::i64 , Custom); } setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support @@ -456,8 +475,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); // Expand certain atomics - for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) { - MVT VT = IntVTs[i]; + for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); setOperationAction(ISD::ATOMIC_STORE, VT, Custom); @@ -473,13 +491,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); } - if (Subtarget->is64Bit()) { - setExceptionPointerRegister(X86::RAX); - setExceptionSelectorRegister(X86::RDX); - } else { - setExceptionPointerRegister(X86::EAX); - setExceptionSelectorRegister(X86::EDX); - } setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); @@ -492,8 +503,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); setOperationAction(ISD::VAEND , MVT::Other, Expand); - if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) { - // TargetInfo::X86_64ABIBuiltinVaList + if (Subtarget->is64Bit()) { setOperationAction(ISD::VAARG , MVT::Other, Custom); setOperationAction(ISD::VACOPY , MVT::Other, Custom); } else { @@ -505,7 +515,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(*TD), Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering. setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom); @@ -613,8 +623,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); - // Long double always uses X87. + // Long double always uses X87, except f128 in MMX. if (!Subtarget->useSoftFloat()) { + if (Subtarget->is64Bit() && Subtarget->hasMMX()) { + addRegisterClass(MVT::f128, &X86::FR128RegClass); + ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); + setOperationAction(ISD::FABS , MVT::f128, Custom); + setOperationAction(ISD::FNEG , MVT::f128, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); + } + addRegisterClass(MVT::f80, &X86::RFP80RegClass); setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); @@ -846,15 +864,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); setOperationAction(ISD::CTPOP, MVT::v2i64, Custom); + setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); + setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); + setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); + // ISD::CTTZ v2i64 - scalarization is faster. + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); + // ISD::CTTZ_ZERO_UNDEF v2i64 - scalarization is faster. + // Custom lower build_vector, vector_shuffle, and extract_vector_elt. - for (int i = MVT::v16i8; i != MVT::v2i64; ++i) { - MVT VT = (MVT::SimpleValueType)i; - // Do not attempt to custom lower non-power-of-2 vectors - if (!isPowerOf2_32(VT.getVectorNumElements())) - continue; - // Do not attempt to custom lower non-128-bit vectors - if (!VT.is128BitVector()) - continue; + for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); @@ -892,13 +912,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. - for (int i = MVT::v16i8; i != MVT::v2i64; ++i) { - MVT VT = (MVT::SimpleValueType)i; - - // Do not attempt to promote non-128-bit vectors - if (!VT.is128BitVector()) - continue; - + for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { setOperationAction(ISD::AND, VT, Promote); AddPromotedToType (ISD::AND, VT, MVT::v2i64); setOperationAction(ISD::OR, VT, Promote); @@ -1036,6 +1050,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SRA, MVT::v4i32, Custom); } + if (Subtarget->hasXOP()) { + setOperationAction(ISD::ROTL, MVT::v16i8, Custom); + setOperationAction(ISD::ROTL, MVT::v8i16, Custom); + setOperationAction(ISD::ROTL, MVT::v4i32, Custom); + setOperationAction(ISD::ROTL, MVT::v2i64, Custom); + setOperationAction(ISD::ROTL, MVT::v32i8, Custom); + setOperationAction(ISD::ROTL, MVT::v16i16, Custom); + setOperationAction(ISD::ROTL, MVT::v8i32, Custom); + setOperationAction(ISD::ROTL, MVT::v4i64, Custom); + } + if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) { addRegisterClass(MVT::v32i8, &X86::VR256RegClass); addRegisterClass(MVT::v16i16, &X86::VR256RegClass); @@ -1126,7 +1151,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTPOP, MVT::v8i32, Custom); setOperationAction(ISD::CTPOP, MVT::v4i64, Custom); - if (Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()) { + setOperationAction(ISD::CTTZ, MVT::v32i8, Custom); + setOperationAction(ISD::CTTZ, MVT::v16i16, Custom); + setOperationAction(ISD::CTTZ, MVT::v8i32, Custom); + setOperationAction(ISD::CTTZ, MVT::v4i64, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v32i8, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i16, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom); + + if (Subtarget->hasAnyFMA()) { setOperationAction(ISD::FMA, MVT::v8f32, Legal); setOperationAction(ISD::FMA, MVT::v4f64, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); @@ -1202,6 +1236,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MUL, MVT::v8i32, Custom); setOperationAction(ISD::MUL, MVT::v16i16, Custom); setOperationAction(ISD::MUL, MVT::v32i8, Custom); + + setOperationAction(ISD::SMAX, MVT::v32i8, Custom); + setOperationAction(ISD::SMAX, MVT::v16i16, Custom); + setOperationAction(ISD::SMAX, MVT::v8i32, Custom); + setOperationAction(ISD::UMAX, MVT::v32i8, Custom); + setOperationAction(ISD::UMAX, MVT::v16i16, Custom); + setOperationAction(ISD::UMAX, MVT::v8i32, Custom); + setOperationAction(ISD::SMIN, MVT::v32i8, Custom); + setOperationAction(ISD::SMIN, MVT::v16i16, Custom); + setOperationAction(ISD::SMIN, MVT::v8i32, Custom); + setOperationAction(ISD::UMIN, MVT::v32i8, Custom); + setOperationAction(ISD::UMIN, MVT::v16i16, Custom); + setOperationAction(ISD::UMIN, MVT::v8i32, Custom); } // In the customized shift lowering, the legal cases in AVX2 will be @@ -1243,15 +1290,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (Subtarget->hasInt256()) setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); - // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64. - for (int i = MVT::v32i8; i != MVT::v4i64; ++i) { - MVT VT = (MVT::SimpleValueType)i; - - // Do not attempt to promote non-256-bit vectors - if (!VT.is256BitVector()) - continue; - + for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { setOperationAction(ISD::AND, VT, Promote); AddPromotedToType (ISD::AND, VT, MVT::v4i64); setOperationAction(ISD::OR, VT, Promote); @@ -1293,6 +1333,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::BR_CC, MVT::i1, Expand); setOperationAction(ISD::SETCC, MVT::i1, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); setOperationAction(ISD::XOR, MVT::i1, Legal); setOperationAction(ISD::OR, MVT::i1, Legal); setOperationAction(ISD::AND, MVT::i1, Legal); @@ -1311,6 +1352,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FDIV, MVT::v16f32, Legal); setOperationAction(ISD::FSQRT, MVT::v16f32, Legal); setOperationAction(ISD::FNEG, MVT::v16f32, Custom); + setOperationAction(ISD::FABS, MVT::v16f32, Custom); setOperationAction(ISD::FADD, MVT::v8f64, Legal); setOperationAction(ISD::FSUB, MVT::v8f64, Legal); @@ -1318,19 +1360,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FDIV, MVT::v8f64, Legal); setOperationAction(ISD::FSQRT, MVT::v8f64, Legal); setOperationAction(ISD::FNEG, MVT::v8f64, Custom); + setOperationAction(ISD::FABS, MVT::v8f64, Custom); setOperationAction(ISD::FMA, MVT::v8f64, Legal); setOperationAction(ISD::FMA, MVT::v16f32, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); - if (Subtarget->is64Bit()) { - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal); - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal); - } setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); @@ -1348,12 +1381,62 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal); setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); + setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); + setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); + setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); + setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal); + setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal); + if (Subtarget->hasVLX()){ + setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); + setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); + setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); + setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal); + setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal); + + setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal); + setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal); + setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal); + setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); + setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); + } else { + setOperationAction(ISD::MLOAD, MVT::v8i32, Custom); + setOperationAction(ISD::MLOAD, MVT::v8f32, Custom); + setOperationAction(ISD::MSTORE, MVT::v8i32, Custom); + setOperationAction(ISD::MSTORE, MVT::v8f32, Custom); + } setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom); if (Subtarget->hasDQI()) { - setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom); + + setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); + if (Subtarget->hasVLX()) { + setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); + } + } + if (Subtarget->hasVLX()) { + setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); } setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom); @@ -1386,7 +1469,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom); setOperationAction(ISD::SETCC, MVT::v16i1, Custom); setOperationAction(ISD::SETCC, MVT::v8i1, Custom); @@ -1395,6 +1478,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom); @@ -1439,9 +1523,49 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::XOR, MVT::v16i32, Legal); if (Subtarget->hasCDI()) { - setOperationAction(ISD::CTLZ, MVT::v8i64, Legal); + setOperationAction(ISD::CTLZ, MVT::v8i64, Legal); setOperationAction(ISD::CTLZ, MVT::v16i32, Legal); - } + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i32, Expand); + + setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); + setOperationAction(ISD::CTLZ, MVT::v16i8, Custom); + setOperationAction(ISD::CTLZ, MVT::v16i16, Custom); + setOperationAction(ISD::CTLZ, MVT::v32i8, Custom); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i16, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i8, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i16, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i8, Expand); + + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom); + + if (Subtarget->hasVLX()) { + setOperationAction(ISD::CTLZ, MVT::v4i64, Legal); + setOperationAction(ISD::CTLZ, MVT::v8i32, Legal); + setOperationAction(ISD::CTLZ, MVT::v2i64, Legal); + setOperationAction(ISD::CTLZ, MVT::v4i32, Legal); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Expand); + + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); + } else { + setOperationAction(ISD::CTLZ, MVT::v4i64, Custom); + setOperationAction(ISD::CTLZ, MVT::v8i32, Custom); + setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); + setOperationAction(ISD::CTLZ, MVT::v4i32, Custom); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Expand); + } + } // Subtarget->hasCDI() + if (Subtarget->hasDQI()) { setOperationAction(ISD::MUL, MVT::v2i64, Legal); setOperationAction(ISD::MUL, MVT::v4i64, Legal); @@ -1455,7 +1579,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::OR, VT, Legal); setOperationAction(ISD::XOR, VT, Legal); } - if (EltSize >= 32 && VT.getSizeInBits() <= 512) { + if ((VT.is128BitVector() || VT.is256BitVector()) && EltSize >= 32) { setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); } @@ -1481,15 +1605,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::MLOAD, VT, Legal); setOperationAction(ISD::MSTORE, VT, Legal); + setOperationAction(ISD::MGATHER, VT, Legal); + setOperationAction(ISD::MSCATTER, VT, Custom); } } - for (int i = MVT::v32i8; i != MVT::v8i64; ++i) { - MVT VT = (MVT::SimpleValueType)i; - - // Do not attempt to promote non-512-bit vectors. - if (!VT.is512BitVector()) - continue; - + for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) { setOperationAction(ISD::SELECT, VT, Promote); AddPromotedToType (ISD::SELECT, VT, MVT::v8i64); } @@ -1515,22 +1635,35 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MULHU, MVT::v32i16, Legal); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom); setOperationAction(ISD::SELECT, MVT::v32i1, Custom); setOperationAction(ISD::SELECT, MVT::v64i1, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom); setOperationAction(ISD::VSELECT, MVT::v32i16, Legal); setOperationAction(ISD::VSELECT, MVT::v64i8, Legal); setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom); setOperationAction(ISD::SMAX, MVT::v64i8, Legal); setOperationAction(ISD::SMAX, MVT::v32i16, Legal); @@ -1541,19 +1674,31 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UMIN, MVT::v64i8, Legal); setOperationAction(ISD::UMIN, MVT::v32i16, Legal); - for (int i = MVT::v32i8; i != MVT::v8i64; ++i) { - const MVT VT = (MVT::SimpleValueType)i; + setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); + setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); + if (Subtarget->hasVLX()) + setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); - const unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + if (Subtarget->hasCDI()) { + setOperationAction(ISD::CTLZ, MVT::v32i16, Custom); + setOperationAction(ISD::CTLZ, MVT::v64i8, Custom); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i16, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v64i8, Expand); + } - // Do not attempt to promote non-512-bit vectors. - if (!VT.is512BitVector()) - continue; + for (auto VT : { MVT::v64i8, MVT::v32i16 }) { + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::SRL, VT, Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); - if (EltSize < 32) { - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Legal); - } + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, MVT::v8i64); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, MVT::v8i64); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, MVT::v8i64); } } @@ -1571,6 +1716,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::v2i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i1, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i1, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i1, Custom); setOperationAction(ISD::AND, MVT::v8i32, Legal); setOperationAction(ISD::OR, MVT::v8i32, Legal); @@ -1595,8 +1742,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); - if (!Subtarget->is64Bit()) + if (!Subtarget->is64Bit()) { setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); + } // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't // handle type legalization for these operations here. @@ -1604,9 +1753,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // FIXME: We really should do custom legalization for addition and // subtraction on x86-32 once PR3203 is fixed. We really can't do much better // than generic legalization for 64-bit multiplication-with-overflow, though. - for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) { + for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { + if (VT == MVT::i64 && !Subtarget->is64Bit()) + continue; // Add/Sub/Mul with overflow operations are custom lowered. - MVT VT = IntVTs[i]; setOperationAction(ISD::SADDO, VT, Custom); setOperationAction(ISD::UADDO, VT, Custom); setOperationAction(ISD::SSUBO, VT, Custom); @@ -1615,7 +1765,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UMULO, VT, Custom); } - if (!Subtarget->is64Bit()) { // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, nullptr); @@ -1658,12 +1807,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); + setTargetDAGCombine(ISD::FNEG); setTargetDAGCombine(ISD::FMA); + setTargetDAGCombine(ISD::FMINNUM); + setTargetDAGCombine(ISD::FMAXNUM); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::MLOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::MSTORE); + setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); @@ -1671,24 +1824,24 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::SETCC); - setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::BUILD_VECTOR); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::XOR); + setTargetDAGCombine(ISD::MSCATTER); + setTargetDAGCombine(ISD::MGATHER); computeRegisterProperties(Subtarget->getRegisterInfo()); - // On Darwin, -Os means optimize for size without hurting performance, - // do not reduce the limit. MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores - MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8; + MaxStoresPerMemsetOptSize = 8; MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores - MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemcpyOptSize = 4; MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores - MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemmoveOptSize = 4; setPrefLoopAlignment(4); // 2^4 bytes. - // Predictable cmov don't hurt on atom because it's in-order. + // A predictable cmov does not hurt on an in-order CPU. + // FIXME: Use a CPU attribute to trigger this, not a CPU model. PredictableSelectIsExpensive = !Subtarget->isAtom(); EnableExtLdPromotion = true; setPrefFunctionAlignment(4); // 2^4 bytes. @@ -1716,40 +1869,43 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, if (!VT.isVector()) return Subtarget->hasAVX512() ? MVT::i1: MVT::i8; - const unsigned NumElts = VT.getVectorNumElements(); - const EVT EltVT = VT.getVectorElementType(); - if (VT.is512BitVector()) { - if (Subtarget->hasAVX512()) - if (EltVT == MVT::i32 || EltVT == MVT::i64 || - EltVT == MVT::f32 || EltVT == MVT::f64) - switch(NumElts) { - case 8: return MVT::v8i1; - case 16: return MVT::v16i1; - } - if (Subtarget->hasBWI()) - if (EltVT == MVT::i8 || EltVT == MVT::i16) - switch(NumElts) { - case 32: return MVT::v32i1; - case 64: return MVT::v64i1; - } - } + if (VT.isSimple()) { + MVT VVT = VT.getSimpleVT(); + const unsigned NumElts = VVT.getVectorNumElements(); + const MVT EltVT = VVT.getVectorElementType(); + if (VVT.is512BitVector()) { + if (Subtarget->hasAVX512()) + if (EltVT == MVT::i32 || EltVT == MVT::i64 || + EltVT == MVT::f32 || EltVT == MVT::f64) + switch(NumElts) { + case 8: return MVT::v8i1; + case 16: return MVT::v16i1; + } + if (Subtarget->hasBWI()) + if (EltVT == MVT::i8 || EltVT == MVT::i16) + switch(NumElts) { + case 32: return MVT::v32i1; + case 64: return MVT::v64i1; + } + } - if (VT.is256BitVector() || VT.is128BitVector()) { - if (Subtarget->hasVLX()) - if (EltVT == MVT::i32 || EltVT == MVT::i64 || - EltVT == MVT::f32 || EltVT == MVT::f64) - switch(NumElts) { - case 2: return MVT::v2i1; - case 4: return MVT::v4i1; - case 8: return MVT::v8i1; - } - if (Subtarget->hasBWI() && Subtarget->hasVLX()) - if (EltVT == MVT::i8 || EltVT == MVT::i16) - switch(NumElts) { - case 8: return MVT::v8i1; - case 16: return MVT::v16i1; - case 32: return MVT::v32i1; - } + if (VVT.is256BitVector() || VVT.is128BitVector()) { + if (Subtarget->hasVLX()) + if (EltVT == MVT::i32 || EltVT == MVT::i64 || + EltVT == MVT::f32 || EltVT == MVT::f64) + switch(NumElts) { + case 2: return MVT::v2i1; + case 4: return MVT::v4i1; + case 8: return MVT::v8i1; + } + if (Subtarget->hasBWI() && Subtarget->hasVLX()) + if (EltVT == MVT::i8 || EltVT == MVT::i16) + switch(NumElts) { + case 8: return MVT::v8i1; + case 16: return MVT::v16i1; + case 32: return MVT::v32i1; + } + } } return VT.changeVectorElementTypeToInteger(); @@ -1769,9 +1925,9 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) { if (EltAlign > MaxAlign) MaxAlign = EltAlign; } else if (StructType *STy = dyn_cast(Ty)) { - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + for (auto *EltTy : STy->elements()) { unsigned EltAlign = 0; - getMaxByValAlign(STy->getElementType(i), EltAlign); + getMaxByValAlign(EltTy, EltAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; if (MaxAlign == 16) @@ -1821,10 +1977,11 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, if ((!IsMemset || ZeroMemset) && !F->hasFnAttribute(Attribute::NoImplicitFloat)) { if (Size >= 16 && - (Subtarget->isUnalignedMemAccessFast() || + (!Subtarget->isUnalignedMem16Slow() || ((DstAlign == 0 || DstAlign >= 16) && (SrcAlign == 0 || SrcAlign >= 16)))) { if (Size >= 32) { + // FIXME: Check if unaligned 32-byte accesses are slow. if (Subtarget->hasInt256()) return MVT::v8i32; if (Subtarget->hasFp256()) @@ -1842,6 +1999,9 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::f64; } } + // This is a compromise. If we reach here, unaligned accesses may be slow on + // this target. However, creating smaller, aligned accesses could be even + // slower and would certainly be a lot more code. if (Subtarget->is64Bit() && Size >= 8) return MVT::i64; return MVT::i32; @@ -1860,8 +2020,22 @@ X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, bool *Fast) const { - if (Fast) - *Fast = Subtarget->isUnalignedMemAccessFast(); + if (Fast) { + switch (VT.getSizeInBits()) { + default: + // 8-byte and under are always assumed to be fast. + *Fast = true; + break; + case 128: + *Fast = !Subtarget->isUnalignedMem16Slow(); + break; + case 256: + *Fast = !Subtarget->isUnalignedMem32Slow(); + break; + // TODO: What about AVX-512 (512-bit) accesses? + } + } + // Misaligned accesses of any size are always allowed. return true; } @@ -1964,6 +2138,32 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, return true; } +Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const { + if (!Subtarget->isTargetAndroid()) + return TargetLowering::getSafeStackPointerLocation(IRB); + + // Android provides a fixed TLS slot for the SafeStack pointer. See the + // definition of TLS_SLOT_SAFESTACK in + // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h + unsigned AddressSpace, Offset; + if (Subtarget->is64Bit()) { + // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs: + Offset = 0x48; + if (getTargetMachine().getCodeModel() == CodeModel::Kernel) + AddressSpace = 256; + else + AddressSpace = 257; + } else { + // %gs:0x24 on i386 + Offset = 0x24; + AddressSpace = 256; + } + + return ConstantExpr::getIntToPtr( + ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset), + Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace)); +} + bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { assert(SrcAS != DestAS && "Expected different address spaces!"); @@ -1977,11 +2177,9 @@ bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, #include "X86GenCallingConv.inc" -bool -X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, - MachineFunction &MF, bool isVarArg, - const SmallVectorImpl &Outs, - LLVMContext &Context) const { +bool X86TargetLowering::CanLowerReturn( + CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, + const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, RetCC_X86); @@ -2001,6 +2199,9 @@ X86TargetLowering::LowerReturn(SDValue Chain, MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); + if (CallConv == CallingConv::X86_INTR && !Outs.empty()) + report_fatal_error("X86 interrupts may not return any value"); + SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_X86); @@ -2025,7 +2226,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, else if (VA.getLocInfo() == CCValAssign::ZExt) ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy); else if (VA.getLocInfo() == CCValAssign::AExt) { - if (ValVT.isVector() && ValVT.getScalarType() == MVT::i1) + if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); else ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); @@ -2114,7 +2315,10 @@ X86TargetLowering::LowerReturn(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); - return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps); + X86ISD::NodeType opcode = X86ISD::RET_FLAG; + if (CallConv == CallingConv::X86_INTR) + opcode = X86ISD::IRET; + return DAG.getNode(opcode, dl, MVT::Other, RetOps); } bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { @@ -2193,7 +2397,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, EVT CopyVT = VA.getLocVT(); // If this is x86-64, and we disabled SSE, we can't return FP values - if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && + if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) && ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { report_fatal_error("SSE register return with SSE disabled"); } @@ -2244,28 +2448,28 @@ enum StructReturnType { StackStructReturn }; static StructReturnType -callIsStructReturn(const SmallVectorImpl &Outs) { +callIsStructReturn(const SmallVectorImpl &Outs, bool IsMCU) { if (Outs.empty()) return NotStructReturn; const ISD::ArgFlagsTy &Flags = Outs[0].Flags; if (!Flags.isSRet()) return NotStructReturn; - if (Flags.isInReg()) + if (Flags.isInReg() || IsMCU) return RegStructReturn; return StackStructReturn; } /// Determines whether a function uses struct return semantics. static StructReturnType -argsAreStructReturn(const SmallVectorImpl &Ins) { +argsAreStructReturn(const SmallVectorImpl &Ins, bool IsMCU) { if (Ins.empty()) return NotStructReturn; const ISD::ArgFlagsTy &Flags = Ins[0].Flags; if (!Flags.isSRet()) return NotStructReturn; - if (Flags.isInReg()) + if (Flags.isInReg() || IsMCU) return RegStructReturn; return StackStructReturn; } @@ -2285,17 +2489,34 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, MachinePointerInfo(), MachinePointerInfo()); } -/// Return true if the calling convention is one that -/// supports tail call optimization. -static bool IsTailCallConvention(CallingConv::ID CC) { +/// Return true if the calling convention is one that we can guarantee TCO for. +static bool canGuaranteeTCO(CallingConv::ID CC) { return (CC == CallingConv::Fast || CC == CallingConv::GHC || - CC == CallingConv::HiPE); + CC == CallingConv::HiPE || CC == CallingConv::HHVM); } -/// \brief Return true if the calling convention is a C calling convention. -static bool IsCCallConvention(CallingConv::ID CC) { - return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 || - CC == CallingConv::X86_64_SysV); +/// Return true if we might ever do TCO for calls with this calling convention. +static bool mayTailCallThisCC(CallingConv::ID CC) { + switch (CC) { + // C calling conventions: + case CallingConv::C: + case CallingConv::X86_64_Win64: + case CallingConv::X86_64_SysV: + // Callee pop conventions: + case CallingConv::X86_ThisCall: + case CallingConv::X86_StdCall: + case CallingConv::X86_VectorCall: + case CallingConv::X86_FastCall: + return true; + default: + return canGuaranteeTCO(CC); + } +} + +/// Return true if the function is being made into a tailcall target by +/// changing its ABI. +static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { + return GuaranteedTailCallOpt && canGuaranteeTCO(CC); } bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { @@ -2306,19 +2527,12 @@ bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { CallSite CS(CI); CallingConv::ID CalleeCC = CS.getCallingConv(); - if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC)) + if (!mayTailCallThisCC(CalleeCC)) return false; return true; } -/// Return true if the function is being made into -/// a tailcall target by changing its ABI. -static bool FuncIsMadeTailCallSafe(CallingConv::ID CC, - bool GuaranteedTailCallOpt) { - return GuaranteedTailCallOpt && IsTailCallConvention(CC); -} - SDValue X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, @@ -2329,7 +2543,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, unsigned i) const { // Create the nodes corresponding to a load from this parameter slot. ISD::ArgFlagsTy Flags = Ins[i].Flags; - bool AlwaysUseMutable = FuncIsMadeTailCallSafe( + bool AlwaysUseMutable = shouldGuaranteeTCO( CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt); bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); EVT ValVT; @@ -2344,6 +2558,19 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, else ValVT = VA.getValVT(); + // Calculate SP offset of interrupt parameter, re-arrange the slot normally + // taken by a return address. + int Offset = 0; + if (CallConv == CallingConv::X86_INTR) { + const X86Subtarget& Subtarget = + static_cast(DAG.getSubtarget()); + // X86 interrupts may take one or two arguments. + // On the stack there will be no return address as in regular call. + // Offset of last argument need to be set to -4/-8 bytes. + // Where offset of the first argument out of two, should be set to 0 bytes. + Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1); + } + // FIXME: For now, all byval parameter objects are marked mutable. This can be // changed with more analysis. // In case of tail call optimization mark all arguments mutable. Since they @@ -2352,14 +2579,24 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, unsigned Bytes = Flags.getByValSize(); if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable); + // Adjust SP offset of interrupt parameter. + if (CallConv == CallingConv::X86_INTR) { + MFI->setObjectOffset(FI, Offset); + } return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); } else { int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), isImmutable); + // Adjust SP offset of interrupt parameter. + if (CallConv == CallingConv::X86_INTR) { + MFI->setObjectOffset(FI, Offset); + } + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); - SDValue Val = DAG.getLoad(ValVT, dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, false, 0); + SDValue Val = DAG.getLoad( + ValVT, dl, Chain, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false, + false, false, 0); return ExtendedInMem ? DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val; } @@ -2413,15 +2650,10 @@ static ArrayRef get64BitArgumentXMMs(MachineFunction &MF, return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit)); } -SDValue -X86TargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl &Ins, - SDLoc dl, - SelectionDAG &DAG, - SmallVectorImpl &InVals) - const { +SDValue X86TargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); @@ -2436,9 +2668,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isCallingConvWin64(CallConv); - assert(!(isVarArg && IsTailCallConvention(CallConv)) && + assert(!(isVarArg && canGuaranteeTCO(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"); + if (CallConv == CallingConv::X86_INTR) { + bool isLegal = Ins.size() == 1 || + (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) || + (!Is64Bit && Ins[1].VT == MVT::i32))); + if (!isLegal) + report_fatal_error("X86 interrupts may take one or two arguments"); + } + // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); @@ -2471,6 +2711,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, RC = &X86::FR32RegClass; else if (RegVT == MVT::f64) RC = &X86::FR64RegClass; + else if (RegVT == MVT::f128) + RC = &X86::FR128RegClass; else if (RegVT.is512BitVector()) RC = &X86::VR512RegClass; else if (RegVT.is256BitVector()) @@ -2547,8 +2789,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, unsigned StackSize = CCInfo.getNextStackOffset(); // Align stack specially for tail calls. - if (FuncIsMadeTailCallSafe(CallConv, - MF.getTarget().Options.GuaranteedTailCallOpt)) + if (shouldGuaranteeTCO(CallConv, + MF.getTarget().Options.GuaranteedTailCallOpt)) StackSize = GetAlignedArgumentStackSize(StackSize, DAG); // If the function takes variable number of arguments, make a frame index for @@ -2561,13 +2803,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, MFI->CreateFixedObject(1, StackSize, true)); } - MachineModuleInfo &MMI = MF.getMMI(); - const Function *WinEHParent = nullptr; - if (MMI.hasWinEHFuncInfo(Fn)) - WinEHParent = MMI.getWinEHParent(Fn); - bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn; - bool IsWinEHParent = WinEHParent && WinEHParent == Fn; - // Figure out if XMM registers are in use. assert(!(Subtarget->useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && @@ -2631,10 +2866,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), RSFIN, DAG.getIntPtrConstant(Offset, dl)); SDValue Store = - DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo::getFixedStack( - FuncInfo->getRegSaveFrameIndex(), Offset), - false, false, 0); + DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), + FuncInfo->getRegSaveFrameIndex(), Offset), + false, false, 0); MemOps.push_back(Store); Offset += 8; } @@ -2656,27 +2892,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); - } else if (IsWin64 && IsWinEHOutlined) { - // Get to the caller-allocated home save location. Add 8 to account - // for the return address. - int HomeOffset = TFI.getOffsetOfLocalArea() + 8; - FuncInfo->setRegSaveFrameIndex(MFI->CreateFixedObject( - /*Size=*/1, /*SPOffset=*/HomeOffset + 8, /*Immutable=*/false)); - - MMI.getWinEHFuncInfo(Fn) - .CatchHandlerParentFrameObjIdx[const_cast(Fn)] = - FuncInfo->getRegSaveFrameIndex(); - - // Store the second integer parameter (rdx) into rsp+16 relative to the - // stack pointer at the entry of the function. - SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), - getPointerTy(DAG.getDataLayout())); - unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass); - SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64); - Chain = DAG.getStore( - Val.getValue(1), dl, Val, RSFIN, - MachinePointerInfo::getFixedStack(FuncInfo->getRegSaveFrameIndex()), - /*isVolatile=*/true, /*isNonTemporal=*/false, /*Alignment=*/0); } if (isVarArg && MFI->hasMustTailInVarArgFunc()) { @@ -2723,12 +2938,15 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, MF.getTarget().Options.GuaranteedTailCallOpt)) { FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. + } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) { + // X86 interrupts must pop the error code if present + FuncInfo->setBytesToPopOnReturn(Is64Bit ? 8 : 4); } else { FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. - if (!Is64Bit && !IsTailCallConvention(CallConv) && + if (!Is64Bit && !canGuaranteeTCO(CallConv) && !Subtarget->getTargetTriple().isOSMSVCRT() && - argsAreStructReturn(Ins) == StackStructReturn) + argsAreStructReturn(Ins, Subtarget->isTargetMCU()) == StackStructReturn) FuncInfo->setBytesToPopOnReturn(4); } @@ -2743,21 +2961,20 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setArgumentStackSize(StackSize); - if (IsWinEHParent) { - if (Is64Bit) { - int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false); - SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64); - MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI; - SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64); - Chain = DAG.getStore(Chain, dl, Neg2, StackSlot, - MachinePointerInfo::getFixedStack(UnwindHelpFI), - /*isVolatile=*/true, - /*isNonTemporal=*/false, /*Alignment=*/0); - } else { - // Functions using Win32 EH are considered to have opaque SP adjustments - // to force local variables to be addressed from the frame or base - // pointers. - MFI->setHasOpaqueSPAdjustment(true); + if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) { + EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn()); + if (Personality == EHPersonality::CoreCLR) { + assert(Is64Bit); + // TODO: Add a mechanism to frame lowering that will allow us to indicate + // that we'd prefer this slot be allocated towards the bottom of the frame + // (i.e. near the stack pointer after allocating the frame). Every + // funclet needs a copy of this slot in its (mostly empty) frame, and the + // offset from the bottom of this and each funclet's frame must be the + // same, so the size of funclets' (mostly empty) frames is dictated by + // how far this slot is from the bottom (since they allocate just enough + // space to accomodate holding this slot at the correct offset). + int PSPSymFI = MFI->CreateStackObject(8, 8, /*isSS=*/false); + EHInfo->PSPSymFrameIdx = PSPSymFI; } } @@ -2777,9 +2994,10 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain, if (Flags.isByVal()) return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); - return DAG.getStore(Chain, dl, Arg, PtrOff, - MachinePointerInfo::getStack(LocMemOffset), - false, false, 0); + return DAG.getStore( + Chain, dl, Arg, PtrOff, + MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset), + false, false, 0); } /// Emit a load of return address if tail call @@ -2813,11 +3031,24 @@ static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, false); SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, - MachinePointerInfo::getFixedStack(NewReturnAddrFI), + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), NewReturnAddrFI), false, false, 0); return Chain; } +/// Returns a vector_shuffle mask for an movs{s|d}, movd +/// operation of specified width. +static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, + SDValue V2) { + unsigned NumElems = VT.getVectorNumElements(); + SmallVector Mask; + Mask.push_back(NumElems); + for (unsigned i = 1; i != NumElems; ++i) + Mask.push_back(i); + return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); +} + SDValue X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { @@ -2835,11 +3066,14 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MachineFunction &MF = DAG.getMachineFunction(); bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isCallingConvWin64(CallConv); - StructReturnType SR = callIsStructReturn(Outs); + StructReturnType SR = callIsStructReturn(Outs, Subtarget->isTargetMCU()); bool IsSibcall = false; X86MachineFunctionInfo *X86Info = MF.getInfo(); auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); + if (CallConv == CallingConv::X86_INTR) + report_fatal_error("X86 interrupts may not be called directly"); + if (Attr.getValueAsString() == "true") isTailCall = false; @@ -2878,7 +3112,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ++NumTailCalls; } - assert(!(isVarArg && IsTailCallConvention(CallConv)) && + assert(!(isVarArg && canGuaranteeTCO(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"); // Analyze operands of the call, assigning locations to each operand. @@ -2892,13 +3126,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CCInfo.AnalyzeCallOperands(Outs, CC_X86); // Get a count of how many bytes are to be pushed on the stack. - unsigned NumBytes = CCInfo.getNextStackOffset(); + unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); if (IsSibcall) // This is a sibcall. The memory operands are available in caller's // own caller's stack. NumBytes = 0; else if (MF.getTarget().Options.GuaranteedTailCallOpt && - IsTailCallConvention(CallConv)) + canGuaranteeTCO(CallConv)) NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; @@ -2970,7 +3204,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, break; case CCValAssign::AExt: if (Arg.getValueType().isVector() && - Arg.getValueType().getScalarType() == MVT::i1) + Arg.getValueType().getVectorElementType() == MVT::i1) Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); else if (RegVT.is128BitVector()) { // Special case: passing MMX values in XMM registers. @@ -2987,9 +3221,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Store the argument. SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); int FI = cast(SpillSlot)->getIndex(); - Chain = DAG.getStore(Chain, dl, Arg, SpillSlot, - MachinePointerInfo::getFixedStack(FI), - false, false, 0); + Chain = DAG.getStore( + Chain, dl, Arg, SpillSlot, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), + false, false, 0); Arg = SpillSlot; break; } @@ -3125,10 +3360,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Flags, DAG, dl)); } else { // Store relative to framepointer. - MemOpChains2.push_back( - DAG.getStore(ArgChain, dl, Arg, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, 0)); + MemOpChains2.push_back(DAG.getStore( + ArgChain, dl, Arg, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), + false, false, 0)); } } @@ -3207,7 +3442,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (ExtraLoad) Callee = DAG.getLoad( getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee, - MachinePointerInfo::getGOT(), false, false, false, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction()), false, false, + false, 0); } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { unsigned char OpFlags = 0; @@ -3261,9 +3497,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); - // If this is an invoke in a 32-bit function using an MSVC personality, assume - // the function clobbers all registers. If an exception is thrown, the runtime - // will not restore CSRs. + // If this is an invoke in a 32-bit function using a funclet-based + // personality, assume the function clobbers all registers. If an exception + // is thrown, the runtime will not restore CSRs. // FIXME: Model this more precisely so that we can register allocate across // the normal edge and spill and fill across the exceptional edge. if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) { @@ -3272,7 +3508,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallerFn->hasPersonalityFn() ? classifyEHPersonality(CallerFn->getPersonalityFn()) : EHPersonality::Unknown; - if (isMSVCEHPersonality(Pers)) + if (isFuncletEHPersonality(Pers)) Mask = RegInfo->getNoPreservedMask(); } @@ -3300,7 +3536,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, DAG.getTarget().Options.GuaranteedTailCallOpt)) NumBytesForCalleeToPop = NumBytes; // Callee pops everything - else if (!Is64Bit && !IsTailCallConvention(CallConv) && + else if (!Is64Bit && !canGuaranteeTCO(CallConv) && !Subtarget->getTargetTriple().isOSMSVCRT() && SR == StackStructReturn) // If this is a call to a struct-return function, the callee @@ -3358,8 +3594,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // EDI // local1 .. -/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned -/// for a 16 byte align requirement. +/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align +/// requirement. unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG) const { @@ -3380,9 +3616,8 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, return Offset; } -/// MatchingStackOffset - Return true if the given stack call argument is -/// already available in the same position (relatively) of the caller's -/// incoming argument stack. +/// Return true if the given stack call argument is already available in the +/// same position (relatively) of the caller's incoming argument stack. static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, @@ -3435,25 +3670,19 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); } -/// IsEligibleForTailCallOptimization - Check whether the call is eligible -/// for tail call optimization. Targets which want to do tail call -/// optimization should implement this function. -bool -X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, - bool isVarArg, - bool isCalleeStructRet, - bool isCallerStructRet, - Type *RetTy, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - SelectionDAG &DAG) const { - if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC)) +/// Check whether the call is eligible for tail call optimization. Targets +/// that want to do tail call optimization should implement this function. +bool X86TargetLowering::IsEligibleForTailCallOptimization( + SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, + bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, SelectionDAG &DAG) const { + if (!mayTailCallThisCC(CalleeCC)) return false; // If -tailcallopt is specified, make fastcc functions tail-callable. - const MachineFunction &MF = DAG.getMachineFunction(); + MachineFunction &MF = DAG.getMachineFunction(); const Function *CallerF = MF.getFunction(); // If the function return type is x86_fp80 and the callee return type is not, @@ -3474,7 +3703,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, return false; if (DAG.getTarget().Options.GuaranteedTailCallOpt) { - if (IsTailCallConvention(CalleeCC) && CCMatch) + if (canGuaranteeTCO(CalleeCC) && CCMatch) return true; return false; } @@ -3493,19 +3722,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (isCalleeStructRet || isCallerStructRet) return false; - // An stdcall/thiscall caller is expected to clean up its arguments; the - // callee isn't going to do that. - // FIXME: this is more restrictive than needed. We could produce a tailcall - // when the stack adjustment matches. For example, with a thiscall that takes - // only one argument. - if (!CCMatch && (CallerCC == CallingConv::X86_StdCall || - CallerCC == CallingConv::X86_ThisCall)) - return false; - // Do not sibcall optimize vararg calls unless all arguments are passed via // registers. if (isVarArg && !Outs.empty()) { - // Optimizing for varargs on Win64 is unlikely to be safe without // additional testing. if (IsCalleeWin64 || IsCallerWin64) @@ -3573,6 +3792,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } } + unsigned StackArgsSize = 0; + // If the callee takes no arguments then go on to check the results of the // call. if (!Outs.empty()) { @@ -3587,11 +3808,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CCInfo.AllocateStack(32, 8); CCInfo.AnalyzeCallOperands(Outs, CC_X86); - if (CCInfo.getNextStackOffset()) { - MachineFunction &MF = DAG.getMachineFunction(); - if (MF.getInfo()->getBytesToPopOnReturn()) - return false; + StackArgsSize = CCInfo.getNextStackOffset(); + if (CCInfo.getNextStackOffset()) { // Check if the arguments are already laid out in the right way as // the caller's fixed stack objects. MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -3642,6 +3861,21 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } } + bool CalleeWillPop = + X86::isCalleePop(CalleeCC, Subtarget->is64Bit(), isVarArg, + MF.getTarget().Options.GuaranteedTailCallOpt); + + if (unsigned BytesToPop = + MF.getInfo()->getBytesToPopOnReturn()) { + // If we have bytes to pop, the callee must pop them. + bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize; + if (!CalleePopMatches) + return false; + } else if (CalleeWillPop && StackArgsSize > 0) { + // If we don't have bytes to pop, make sure the callee doesn't pop any. + return false; + } + return true; } @@ -3688,11 +3922,13 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::VPERMILPI: case X86ISD::VPERM2X128: case X86ISD::VPERMI: + case X86ISD::VPERMV: + case X86ISD::VPERMV3: return true; } } -static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, +static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT, SDValue V1, unsigned TargetMask, SelectionDAG &DAG) { switch(Opc) { @@ -3707,7 +3943,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, } } -static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, +static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG) { switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); @@ -3772,23 +4008,23 @@ bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, return false; } -/// isCalleePop - Determines whether the callee is required to pop its -/// own arguments. Callee pop is necessary to support tail calls. +/// Determines whether the callee is required to pop its own arguments. +/// Callee pop is necessary to support tail calls. bool X86::isCalleePop(CallingConv::ID CallingConv, - bool is64Bit, bool IsVarArg, bool TailCallOpt) { + bool is64Bit, bool IsVarArg, bool GuaranteeTCO) { + // If GuaranteeTCO is true, we force some calls to be callee pop so that we + // can guarantee TCO. + if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO)) + return true; + switch (CallingConv) { default: return false; case CallingConv::X86_StdCall: case CallingConv::X86_FastCall: case CallingConv::X86_ThisCall: + case CallingConv::X86_VectorCall: return !is64Bit; - case CallingConv::Fast: - case CallingConv::GHC: - case CallingConv::HiPE: - if (IsVarArg) - return false; - return TailCallOpt; } } @@ -3807,11 +4043,26 @@ static bool isX86CCUnsigned(unsigned X86CC) { case X86::COND_BE: return true; case X86::COND_AE: return true; } - llvm_unreachable("covered switch fell through?!"); } -/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86 -/// specific condition code, returning the condition code and the LHS/RHS of the +static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) { + switch (SetCCOpcode) { + default: llvm_unreachable("Invalid integer condition!"); + case ISD::SETEQ: return X86::COND_E; + case ISD::SETGT: return X86::COND_G; + case ISD::SETGE: return X86::COND_GE; + case ISD::SETLT: return X86::COND_L; + case ISD::SETLE: return X86::COND_LE; + case ISD::SETNE: return X86::COND_NE; + case ISD::SETULT: return X86::COND_B; + case ISD::SETUGT: return X86::COND_A; + case ISD::SETULE: return X86::COND_BE; + case ISD::SETUGE: return X86::COND_AE; + } +} + +/// Do a one-to-one translation of a ISD::CondCode to the X86-specific +/// condition code, returning the condition code and the LHS/RHS of the /// comparison to make. static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP, SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) { @@ -3833,19 +4084,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP, } } - switch (SetCCOpcode) { - default: llvm_unreachable("Invalid integer condition!"); - case ISD::SETEQ: return X86::COND_E; - case ISD::SETGT: return X86::COND_G; - case ISD::SETGE: return X86::COND_GE; - case ISD::SETLT: return X86::COND_L; - case ISD::SETLE: return X86::COND_LE; - case ISD::SETNE: return X86::COND_NE; - case ISD::SETULT: return X86::COND_B; - case ISD::SETUGT: return X86::COND_A; - case ISD::SETULE: return X86::COND_BE; - case ISD::SETUGE: return X86::COND_AE; - } + return TranslateIntegerX86CC(SetCCOpcode); } // First determine if it is required or is profitable to flip the operands. @@ -3898,8 +4137,8 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP, } } -/// hasFPCMov - is there a floating point cmov for the specific X86 condition -/// code. Current x86 isa includes the following FP cmov instructions: +/// Is there a floating point cmov for the specific X86 condition code? +/// Current x86 isa includes the following FP cmov instructions: /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. static bool hasFPCMov(unsigned X86CC) { switch (X86CC) { @@ -3917,7 +4156,7 @@ static bool hasFPCMov(unsigned X86CC) { } } -/// isFPImmLegal - Returns true if the target can instruction select the +/// Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { @@ -3970,7 +4209,7 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const { return Subtarget->hasLZCNT(); } -/// isUndefInRange - Return true if every element in Mask, beginning +/// Return true if every element in Mask, beginning /// from position Pos and ending in Pos+Size is undef. static bool isUndefInRange(ArrayRef Mask, unsigned Pos, unsigned Size) { for (unsigned i = Pos, e = Pos + Size; i != e; ++i) @@ -3979,19 +4218,18 @@ static bool isUndefInRange(ArrayRef Mask, unsigned Pos, unsigned Size) { return true; } -/// isUndefOrInRange - Return true if Val is undef or if its value falls within -/// the specified range (L, H]. +/// Return true if Val is undef or if its value falls within the +/// specified range (L, H]. static bool isUndefOrInRange(int Val, int Low, int Hi) { return (Val < 0) || (Val >= Low && Val < Hi); } -/// isUndefOrEqual - Val is either less than zero (undef) or equal to the -/// specified value. +/// Val is either less than zero (undef) or equal to the specified value. static bool isUndefOrEqual(int Val, int CmpVal) { return (Val < 0 || Val == CmpVal); } -/// isSequentialOrUndefInRange - Return true if every element in Mask, beginning +/// Return true if every element in Mask, beginning /// from position Pos and ending in Pos+Size, falls within the specified /// sequential range (Low, Low+Size]. or is undef. static bool isSequentialOrUndefInRange(ArrayRef Mask, @@ -4002,9 +4240,8 @@ static bool isSequentialOrUndefInRange(ArrayRef Mask, return true; } -/// isVEXTRACTIndex - Return true if the specified -/// EXTRACT_SUBVECTOR operand specifies a vector extract that is -/// suitable for instruction that extract 128 or 256 bit vectors +/// Return true if the specified EXTRACT_SUBVECTOR operand specifies a vector +/// extract that is suitable for instruction that extract 128 or 256 bit vectors static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) { assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width"); if (!isa(N->getOperand(1).getNode())) @@ -4021,7 +4258,7 @@ static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) { return Result; } -/// isVINSERTIndex - Return true if the specified INSERT_SUBVECTOR +/// Return true if the specified INSERT_SUBVECTOR /// operand specifies a subvector insert that is suitable for input to /// insertion of 128 or 256-bit subvectors static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) { @@ -4057,8 +4294,8 @@ bool X86::isVEXTRACT256Index(SDNode *N) { static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) { assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width"); - if (!isa(N->getOperand(1).getNode())) - llvm_unreachable("Illegal extract subvector for VEXTRACT"); + assert(isa(N->getOperand(1).getNode()) && + "Illegal extract subvector for VEXTRACT"); uint64_t Index = cast(N->getOperand(1).getNode())->getZExtValue(); @@ -4072,8 +4309,8 @@ static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) { static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) { assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width"); - if (!isa(N->getOperand(2).getNode())) - llvm_unreachable("Illegal insert subvector for VINSERT"); + assert(isa(N->getOperand(2).getNode()) && + "Illegal insert subvector for VINSERT"); uint64_t Index = cast(N->getOperand(2).getNode())->getZExtValue(); @@ -4085,53 +4322,71 @@ static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) { return Index / NumElemsPerChunk; } -/// getExtractVEXTRACT128Immediate - Return the appropriate immediate -/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128 -/// and VINSERTI128 instructions. +/// Return the appropriate immediate to extract the specified +/// EXTRACT_SUBVECTOR index with VEXTRACTF128 and VINSERTI128 instructions. unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) { return getExtractVEXTRACTImmediate(N, 128); } -/// getExtractVEXTRACT256Immediate - Return the appropriate immediate -/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF64x4 -/// and VINSERTI64x4 instructions. +/// Return the appropriate immediate to extract the specified +/// EXTRACT_SUBVECTOR index with VEXTRACTF64x4 and VINSERTI64x4 instructions. unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) { return getExtractVEXTRACTImmediate(N, 256); } -/// getInsertVINSERT128Immediate - Return the appropriate immediate -/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128 -/// and VINSERTI128 instructions. +/// Return the appropriate immediate to insert at the specified +/// INSERT_SUBVECTOR index with VINSERTF128 and VINSERTI128 instructions. unsigned X86::getInsertVINSERT128Immediate(SDNode *N) { return getInsertVINSERTImmediate(N, 128); } -/// getInsertVINSERT256Immediate - Return the appropriate immediate -/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF46x4 -/// and VINSERTI64x4 instructions. +/// Return the appropriate immediate to insert at the specified +/// INSERT_SUBVECTOR index with VINSERTF46x4 and VINSERTI64x4 instructions. unsigned X86::getInsertVINSERT256Immediate(SDNode *N) { return getInsertVINSERTImmediate(N, 256); } -/// isZero - Returns true if Elt is a constant integer zero -static bool isZero(SDValue V) { - ConstantSDNode *C = dyn_cast(V); - return C && C->isNullValue(); -} - -/// isZeroNode - Returns true if Elt is a constant zero or a floating point -/// constant +0.0. +/// Returns true if Elt is a constant zero or a floating point constant +0.0. bool X86::isZeroNode(SDValue Elt) { - if (isZero(Elt)) - return true; - if (ConstantFPSDNode *CFP = dyn_cast(Elt)) - return CFP->getValueAPF().isPosZero(); - return false; + return isNullConstant(Elt) || isNullFPConstant(Elt); } -/// getZeroVector - Returns a vector of specified type with all zero elements. -/// -static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, +// Build a vector of constants +// Use an UNDEF node if MaskElt == -1. +// Spilt 64-bit constants in the 32-bit mode. +static SDValue getConstVector(ArrayRef Values, MVT VT, + SelectionDAG &DAG, + SDLoc dl, bool IsMask = false) { + + SmallVector Ops; + bool Split = false; + + MVT ConstVecVT = VT; + unsigned NumElts = VT.getVectorNumElements(); + bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64); + if (!In64BitMode && VT.getVectorElementType() == MVT::i64) { + ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2); + Split = true; + } + + MVT EltVT = ConstVecVT.getVectorElementType(); + for (unsigned i = 0; i < NumElts; ++i) { + bool IsUndef = Values[i] < 0 && IsMask; + SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) : + DAG.getConstant(Values[i], dl, EltVT); + Ops.push_back(OpNode); + if (Split) + Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) : + DAG.getConstant(0, dl, EltVT)); + } + SDValue ConstsNode = DAG.getNode(ISD::BUILD_VECTOR, dl, ConstVecVT, Ops); + if (Split) + ConstsNode = DAG.getBitcast(VT, ConstsNode); + return ConstsNode; +} + +/// Returns a vector of specified type with all zero elements. +static SDValue getZeroVector(MVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, SDLoc dl) { assert(VT.isVector() && "Expected a vector type"); @@ -4163,7 +4418,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops); - } else if (VT.getScalarType() == MVT::i1) { + } else if (VT.getVectorElementType() == MVT::i1) { assert((Subtarget->hasBWI() || VT.getVectorNumElements() <= 16) && "Unexpected vector type"); @@ -4195,19 +4450,18 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal, // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits(); + assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); // This is the index of the first element of the vectorWidth-bit chunk - // we want. - unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth) - * ElemsPerChunk); + // we want. Since ElemsPerChunk is a power of 2 just need to clear bits. + IdxVal &= ~(ElemsPerChunk - 1); // If the input is a buildvector just emit a smaller one. if (Vec.getOpcode() == ISD::BUILD_VECTOR) return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT, - makeArrayRef(Vec->op_begin() + NormalizedIdxVal, - ElemsPerChunk)); + makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk)); - SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl); + SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); } @@ -4245,13 +4499,13 @@ static SDValue InsertSubVector(SDValue Result, SDValue Vec, // Insert the relevant vectorWidth bits. unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits(); + assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); // This is the index of the first element of the vectorWidth-bit chunk - // we want. - unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth) - * ElemsPerChunk); + // we want. Since ElemsPerChunk is a power of 2 just need to clear bits. + IdxVal &= ~(ElemsPerChunk - 1); - SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl); + SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); } @@ -4279,7 +4533,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, Vec, ZeroIndex); // The blend instruction, and therefore its mask, depend on the data type. - MVT ScalarType = ResultVT.getScalarType().getSimpleVT(); + MVT ScalarType = ResultVT.getVectorElementType().getSimpleVT(); if (ScalarType.isFloatingPoint()) { // Choose either vblendps (float) or vblendpd (double). unsigned ScalarSize = ScalarType.getSizeInBits(); @@ -4316,6 +4570,81 @@ static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256); } +/// Insert i1-subvector to i1-vector. +static SDValue Insert1BitVector(SDValue Op, SelectionDAG &DAG) { + + SDLoc dl(Op); + SDValue Vec = Op.getOperand(0); + SDValue SubVec = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + + if (!isa(Idx)) + return SDValue(); + + unsigned IdxVal = cast(Idx)->getZExtValue(); + if (IdxVal == 0 && Vec.isUndef()) // the operation is legal + return Op; + + MVT OpVT = Op.getSimpleValueType(); + MVT SubVecVT = SubVec.getSimpleValueType(); + unsigned NumElems = OpVT.getVectorNumElements(); + unsigned SubVecNumElems = SubVecVT.getVectorNumElements(); + + assert(IdxVal + SubVecNumElems <= NumElems && + IdxVal % SubVecVT.getSizeInBits() == 0 && + "Unexpected index value in INSERT_SUBVECTOR"); + + // There are 3 possible cases: + // 1. Subvector should be inserted in the lower part (IdxVal == 0) + // 2. Subvector should be inserted in the upper part + // (IdxVal + SubVecNumElems == NumElems) + // 3. Subvector should be inserted in the middle (for example v2i1 + // to v16i1, index 2) + + SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); + SDValue Undef = DAG.getUNDEF(OpVT); + SDValue WideSubVec = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef, SubVec, ZeroIdx); + if (Vec.isUndef()) + return DAG.getNode(X86ISD::VSHLI, dl, OpVT, WideSubVec, + DAG.getConstant(IdxVal, dl, MVT::i8)); + + if (ISD::isBuildVectorAllZeros(Vec.getNode())) { + unsigned ShiftLeft = NumElems - SubVecNumElems; + unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; + WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, WideSubVec, + DAG.getConstant(ShiftLeft, dl, MVT::i8)); + return ShiftRight ? DAG.getNode(X86ISD::VSRLI, dl, OpVT, WideSubVec, + DAG.getConstant(ShiftRight, dl, MVT::i8)) : WideSubVec; + } + + if (IdxVal == 0) { + // Zero lower bits of the Vec + SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8); + Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits); + Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits); + // Merge them together + return DAG.getNode(ISD::OR, dl, OpVT, Vec, WideSubVec); + } + + // Simple case when we put subvector in the upper part + if (IdxVal + SubVecNumElems == NumElems) { + // Zero upper bits of the Vec + WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, + DAG.getConstant(IdxVal, dl, MVT::i8)); + SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8); + Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits); + Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits); + return DAG.getNode(ISD::OR, dl, OpVT, Vec, WideSubVec); + } + // Subvector should be inserted in the middle - use shuffle + SmallVector Mask; + for (unsigned i = 0; i < NumElems; ++i) + Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ? + i : i + NumElems); + return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask); +} + /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128 /// instructions. This is used because creating CONCAT_VECTOR nodes of /// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower @@ -4334,18 +4663,22 @@ static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT, return Insert256BitVector(V, V2, NumElems/2, DAG, dl); } -/// getOnesVector - Returns a vector of specified type with all bits set. +/// Returns a vector of specified type with all bits set. /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately. /// Then bitcast to their original type, ensuring they get CSE'd. -static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG, - SDLoc dl) { +static SDValue getOnesVector(EVT VT, const X86Subtarget *Subtarget, + SelectionDAG &DAG, SDLoc dl) { assert(VT.isVector() && "Expected a vector type"); SDValue Cst = DAG.getConstant(~0U, dl, MVT::i32); SDValue Vec; - if (VT.is256BitVector()) { - if (HasInt256) { // AVX2 + if (VT.is512BitVector()) { + SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, + Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops); + } else if (VT.is256BitVector()) { + if (Subtarget->hasInt256()) { // AVX2 SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops); } else { // AVX @@ -4360,19 +4693,7 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG, return DAG.getBitcast(VT, Vec); } -/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd -/// operation of specified width. -static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, - SDValue V2) { - unsigned NumElems = VT.getVectorNumElements(); - SmallVector Mask; - Mask.push_back(NumElems); - for (unsigned i = 1; i != NumElems; ++i) - Mask.push_back(i); - return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); -} - -/// getUnpackl - Returns a vector_shuffle node for an unpackl operation. +/// Returns a vector_shuffle node for an unpackl operation. static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); @@ -4384,7 +4705,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } -/// getUnpackh - Returns a vector_shuffle node for an unpackh operation. +/// Returns a vector_shuffle node for an unpackh operation. static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); @@ -4396,10 +4717,10 @@ static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } -/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified -/// vector of zero or undef vector. This produces a shuffle where the low -/// element of V2 is swizzled into the zero/undef vector, landing at element -/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). +/// Return a vector_shuffle of the specified vector of zero or undef vector. +/// This produces a shuffle where the low element of V2 is swizzled into the +/// zero/undef vector, landing at element Idx. +/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, bool IsZero, const X86Subtarget *Subtarget, @@ -4415,10 +4736,10 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, &MaskVec[0]); } -/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the -/// target specific opcode. Returns true if the Mask could be calculated. Sets -/// IsUnary to true if only uses one source. Note that this will set IsUnary for -/// shuffles which use a single input multiple times, and in those cases it will +/// Calculates the shuffle mask corresponding to the target-specific opcode. +/// Returns true if the Mask could be calculated. Sets IsUnary to true if only +/// uses one source. Note that this will set IsUnary for shuffles which use a +/// single input multiple times, and in those cases it will /// adjust the mask to only have indices within that single input. /// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero. static bool getTargetShuffleMask(SDNode *N, MVT VT, @@ -4482,7 +4803,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) { // If we have a build-vector, then things are easy. - EVT VT = MaskNode.getValueType(); + MVT VT = MaskNode.getSimpleValueType(); assert(VT.isVector() && "Can't produce a non-vector with a build_vector!"); if (!VT.isInteger()) @@ -4572,6 +4893,119 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, case X86ISD::MOVLPS: // Not yet implemented return false; + case X86ISD::VPERMV: { + IsUnary = true; + SDValue MaskNode = N->getOperand(0); + while (MaskNode->getOpcode() == ISD::BITCAST) + MaskNode = MaskNode->getOperand(0); + + unsigned MaskLoBits = Log2_64(VT.getVectorNumElements()); + SmallVector RawMask; + if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) { + // If we have a build-vector, then things are easy. + assert(MaskNode.getSimpleValueType().isInteger() && + MaskNode.getSimpleValueType().getVectorNumElements() == + VT.getVectorNumElements()); + + for (unsigned i = 0; i < MaskNode->getNumOperands(); ++i) { + SDValue Op = MaskNode->getOperand(i); + if (Op->getOpcode() == ISD::UNDEF) + RawMask.push_back((uint64_t)SM_SentinelUndef); + else if (isa(Op)) { + APInt MaskElement = cast(Op)->getAPIntValue(); + RawMask.push_back(MaskElement.getLoBits(MaskLoBits).getZExtValue()); + } else + return false; + } + DecodeVPERMVMask(RawMask, Mask); + break; + } + if (MaskNode->getOpcode() == X86ISD::VBROADCAST) { + unsigned NumEltsInMask = MaskNode->getNumOperands(); + MaskNode = MaskNode->getOperand(0); + if (auto *CN = dyn_cast(MaskNode)) { + APInt MaskEltValue = CN->getAPIntValue(); + for (unsigned i = 0; i < NumEltsInMask; ++i) + RawMask.push_back(MaskEltValue.getLoBits(MaskLoBits).getZExtValue()); + DecodeVPERMVMask(RawMask, Mask); + break; + } + // It may be a scalar load + } + + auto *MaskLoad = dyn_cast(MaskNode); + if (!MaskLoad) + return false; + + SDValue Ptr = MaskLoad->getBasePtr(); + if (Ptr->getOpcode() == X86ISD::Wrapper || + Ptr->getOpcode() == X86ISD::WrapperRIP) + Ptr = Ptr->getOperand(0); + + auto *MaskCP = dyn_cast(Ptr); + if (!MaskCP || MaskCP->isMachineConstantPoolEntry()) + return false; + + if (auto *C = dyn_cast(MaskCP->getConstVal())) { + DecodeVPERMVMask(C, VT, Mask); + if (Mask.empty()) + return false; + break; + } + return false; + } + case X86ISD::VPERMV3: { + IsUnary = false; + SDValue MaskNode = N->getOperand(1); + while (MaskNode->getOpcode() == ISD::BITCAST) + MaskNode = MaskNode->getOperand(1); + + if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) { + // If we have a build-vector, then things are easy. + assert(MaskNode.getSimpleValueType().isInteger() && + MaskNode.getSimpleValueType().getVectorNumElements() == + VT.getVectorNumElements()); + + SmallVector RawMask; + unsigned MaskLoBits = Log2_64(VT.getVectorNumElements()*2); + + for (unsigned i = 0; i < MaskNode->getNumOperands(); ++i) { + SDValue Op = MaskNode->getOperand(i); + if (Op->getOpcode() == ISD::UNDEF) + RawMask.push_back((uint64_t)SM_SentinelUndef); + else { + auto *CN = dyn_cast(Op.getNode()); + if (!CN) + return false; + APInt MaskElement = CN->getAPIntValue(); + RawMask.push_back(MaskElement.getLoBits(MaskLoBits).getZExtValue()); + } + } + DecodeVPERMV3Mask(RawMask, Mask); + break; + } + + auto *MaskLoad = dyn_cast(MaskNode); + if (!MaskLoad) + return false; + + SDValue Ptr = MaskLoad->getBasePtr(); + if (Ptr->getOpcode() == X86ISD::Wrapper || + Ptr->getOpcode() == X86ISD::WrapperRIP) + Ptr = Ptr->getOperand(0); + + auto *MaskCP = dyn_cast(Ptr); + if (!MaskCP || MaskCP->isMachineConstantPoolEntry()) + return false; + + if (auto *C = dyn_cast(MaskCP->getConstVal())) { + DecodeVPERMV3Mask(C, VT, Mask); + if (Mask.empty()) + return false; + break; + } + return false; + } default: llvm_unreachable("unknown target shuffle node"); } @@ -4586,7 +5020,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, return true; } -/// getShuffleScalarElt - Returns the scalar element that will make up the ith +/// Returns the scalar element that will make up the ith /// element of the result of the vector shuffle. static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, unsigned Depth) { @@ -4650,8 +5084,7 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, return SDValue(); } -/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. -/// +/// Custom lower build_vector of v16i8. static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, @@ -4721,8 +5154,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, return DAG.getBitcast(MVT::v16i8, V); } -/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. -/// +/// Custom lower build_vector of v8i16. static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, @@ -4753,7 +5185,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, return V; } -/// LowerBuildVectorv4x32 - Custom lower build_vector of v4i32 or v4f32. +/// Custom lower build_vector of v4i32 or v4f32. static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget, const TargetLowering &TLI) { @@ -4924,7 +5356,7 @@ LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) { return SDValue(); if ((Offset % RequiredAlign) & 3) return SDValue(); - int64_t StartOffset = Offset & ~(RequiredAlign-1); + int64_t StartOffset = Offset & ~int64_t(RequiredAlign - 1); if (StartOffset) { SDLoc DL(Ptr); Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, @@ -5157,8 +5589,7 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, // TODO: If multiple splats are generated to load the same constant, // it may be detrimental to overall size. There needs to be a way to detect // that condition to know if this is truly a size win. - const Function *F = DAG.getMachineFunction().getFunction(); - bool OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize); + bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize(); // Handle broadcasting a single constant scalar from the constant pool // into a vector. @@ -5188,9 +5619,10 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast(CP)->getAlignment(); - Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); + Ld = DAG.getLoad( + CVT, dl, DAG.getEntryNode(), CP, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, Alignment); return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); } @@ -5329,7 +5761,7 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { return NV; } -static SDValue ConvertI1VectorToInterger(SDValue Op, SelectionDAG &DAG) { +static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) { assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && "Can not convert non-constant vector"); @@ -5366,7 +5798,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { } if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { - SDValue Imm = ConvertI1VectorToInterger(Op, DAG); + SDValue Imm = ConvertI1VectorToInteger(Op, DAG); if (Imm.getValueSizeInBits() == VT.getSizeInBits()) return DAG.getBitcast(VT, Imm); SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm); @@ -5600,7 +6032,7 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, /// node. static SDValue LowerToAddSub(const BuildVectorSDNode *BV, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - EVT VT = BV->getValueType(0); + MVT VT = BV->getSimpleValueType(0); if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64))) return SDValue(); @@ -5662,12 +6094,12 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV, // Update InVec0 and InVec1. if (InVec0.getOpcode() == ISD::UNDEF) { InVec0 = Op0.getOperand(0); - if (InVec0.getValueType() != VT) + if (InVec0.getSimpleValueType() != VT) return SDValue(); } if (InVec1.getOpcode() == ISD::UNDEF) { InVec1 = Op1.getOperand(0); - if (InVec1.getValueType() != VT) + if (InVec1.getSimpleValueType() != VT) return SDValue(); } @@ -5703,7 +6135,7 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV, static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - EVT VT = BV->getValueType(0); + MVT VT = BV->getSimpleValueType(0); unsigned NumElts = VT.getVectorNumElements(); unsigned NumUndefsLO = 0; unsigned NumUndefsHI = 0; @@ -5845,7 +6277,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { unsigned NumElems = Op.getNumOperands(); // Generate vectors for predicate vectors. - if (VT.getScalarType() == MVT::i1 && Subtarget->hasAVX512()) + if (VT.getVectorElementType() == MVT::i1 && Subtarget->hasAVX512()) return LowerBUILD_VECTORvXi1(Op, DAG); // Vectors containing all zeros can be matched by pxor and xorps later @@ -5866,7 +6298,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return Op; if (!VT.is512BitVector()) - return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl); + return getOnesVector(VT, Subtarget, DAG, dl); } BuildVectorSDNode *BV = cast(Op.getNode()); @@ -5881,7 +6313,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { unsigned NumZero = 0; unsigned NumNonZero = 0; - unsigned NonZeros = 0; + uint64_t NonZeros = 0; bool IsAllConstants = true; SmallSet Values; for (unsigned i = 0; i < NumElems; ++i) { @@ -5895,7 +6327,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (X86::isZeroNode(Elt)) NumZero++; else { - NonZeros |= (1 << i); + assert(i < sizeof(NonZeros) * 8); // Make sure the shift is within range. + NonZeros |= ((uint64_t)1 << i); NumNonZero++; } } @@ -5919,7 +6352,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { // Handle SSE only. assert(VT == MVT::v2i64 && "Expected an SSE value type!"); - EVT VecVT = MVT::v4i32; + MVT VecVT = MVT::v4i32; // Truncate the value (which may itself be a constant) to i32, and // convert it to a vector with movd (S2V+shuffle to zero extend). @@ -6051,7 +6484,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // One half is zero or undef. unsigned Idx = countTrailingZeros(NonZeros); SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, - Op.getOperand(Idx)); + Op.getOperand(Idx)); return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG); } return SDValue(); @@ -6059,13 +6492,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // If element VT is < 32 bits, convert it to inserts into a zero vector. if (EVTBits == 8 && NumElems == 16) - if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, - Subtarget, *this)) + if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero, + DAG, Subtarget, *this)) return V; if (EVTBits == 16 && NumElems == 8) - if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, - Subtarget, *this)) + if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero, + DAG, Subtarget, *this)) return V; // If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS @@ -6077,7 +6510,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SmallVector V(NumElems); if (NumElems == 4 && NumZero > 0) { for (unsigned i = 0; i < 4; ++i) { - bool isZero = !(NonZeros & (1 << i)); + bool isZero = !(NonZeros & (1ULL << i)); if (isZero) V[i] = getZeroVector(VT, Subtarget, DAG, dl); else @@ -6177,7 +6610,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -// LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction +// 256-bit AVX can use the vinsertf128 instruction // to create 256-bit vectors from two other 128-bit ones. static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { SDLoc dl(Op); @@ -6193,8 +6626,8 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl); if (Op.getNumOperands() == 4) { - MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(), - ResVT.getVectorNumElements()/2); + MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), + ResVT.getVectorNumElements()/2); SDValue V3 = Op.getOperand(2); SDValue V4 = Op.getOperand(3); return Concat256BitVectors(Concat128BitVectors(V1, V2, HalfVT, NumElems/2, DAG, dl), @@ -6213,8 +6646,27 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, assert(isPowerOf2_32(NumOfOperands) && "Unexpected number of operands in CONCAT_VECTORS"); + SDValue Undef = DAG.getUNDEF(ResVT); if (NumOfOperands > 2) { - MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(), + // Specialize the cases when all, or all but one, of the operands are undef. + unsigned NumOfDefinedOps = 0; + unsigned OpIdx = 0; + for (unsigned i = 0; i < NumOfOperands; i++) + if (!Op.getOperand(i).isUndef()) { + NumOfDefinedOps++; + OpIdx = i; + } + if (NumOfDefinedOps == 0) + return Undef; + if (NumOfDefinedOps == 1) { + unsigned SubVecNumElts = + Op.getOperand(OpIdx).getValueType().getVectorNumElements(); + SDValue IdxVal = DAG.getIntPtrConstant(SubVecNumElts * OpIdx, dl); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, + Op.getOperand(OpIdx), IdxVal); + } + + MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), ResVT.getVectorNumElements()/2); SmallVector Ops; for (unsigned i = 0; i < NumOfOperands/2; i++) @@ -6227,31 +6679,38 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } + // 2 operands SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); + unsigned NumElems = ResVT.getVectorNumElements(); + assert(V1.getValueType() == V2.getValueType() && + V1.getValueType().getVectorNumElements() == NumElems/2 && + "Unexpected operands in CONCAT_VECTORS"); + + if (ResVT.getSizeInBits() >= 16) + return Op; // The operation is legal with KUNPCK + bool IsZeroV1 = ISD::isBuildVectorAllZeros(V1.getNode()); bool IsZeroV2 = ISD::isBuildVectorAllZeros(V2.getNode()); - + SDValue ZeroVec = getZeroVector(ResVT, Subtarget, DAG, dl); if (IsZeroV1 && IsZeroV2) - return getZeroVector(ResVT, Subtarget, DAG, dl); + return ZeroVec; SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); - SDValue Undef = DAG.getUNDEF(ResVT); - unsigned NumElems = ResVT.getVectorNumElements(); - SDValue ShiftBits = DAG.getConstant(NumElems/2, dl, MVT::i8); + if (V2.isUndef()) + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx); + if (IsZeroV2) + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V1, ZeroIdx); + + SDValue IdxVal = DAG.getIntPtrConstant(NumElems/2, dl); + if (V1.isUndef()) + V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal); - V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, ZeroIdx); - V2 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V2, ShiftBits); if (IsZeroV1) - return V2; + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V2, IdxVal); V1 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx); - // Zero the upper bits of V1 - V1 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V1, ShiftBits); - V1 = DAG.getNode(X86ISD::VSRLI, dl, ResVT, V1, ShiftBits); - if (IsZeroV2) - return V1; - return DAG.getNode(ISD::OR, dl, ResVT, V1, V2); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, V1, V2, IdxVal); } static SDValue LowerCONCAT_VECTORS(SDValue Op, @@ -6272,7 +6731,6 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, return LowerAVXCONCAT_VECTORS(Op, DAG); } - //===----------------------------------------------------------------------===// // Vector shuffle lowering // @@ -6422,6 +6880,127 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef Mask, SDLoc DL, return DAG.getConstant(Imm, DL, MVT::i8); } +/// \brief Compute whether each element of a shuffle is zeroable. +/// +/// A "zeroable" vector shuffle element is one which can be lowered to zero. +/// Either it is an undef element in the shuffle mask, the element of the input +/// referenced is undef, or the element of the input referenced is known to be +/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle +/// as many lanes with this technique as possible to simplify the remaining +/// shuffle. +static SmallBitVector computeZeroableShuffleElements(ArrayRef Mask, + SDValue V1, SDValue V2) { + SmallBitVector Zeroable(Mask.size(), false); + + while (V1.getOpcode() == ISD::BITCAST) + V1 = V1->getOperand(0); + while (V2.getOpcode() == ISD::BITCAST) + V2 = V2->getOperand(0); + + bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode()); + bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); + + for (int i = 0, Size = Mask.size(); i < Size; ++i) { + int M = Mask[i]; + // Handle the easy cases. + if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) { + Zeroable[i] = true; + continue; + } + + // If this is an index into a build_vector node (which has the same number + // of elements), dig out the input value and use it. + SDValue V = M < Size ? V1 : V2; + if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands()) + continue; + + SDValue Input = V.getOperand(M % Size); + // The UNDEF opcode check really should be dead code here, but not quite + // worth asserting on (it isn't invalid, just unexpected). + if (Input.getOpcode() == ISD::UNDEF || X86::isZeroNode(Input)) + Zeroable[i] = true; + } + + return Zeroable; +} + +// X86 has dedicated unpack instructions that can handle specific blend +// operations: UNPCKH and UNPCKL. +static SDValue lowerVectorShuffleWithUNPCK(SDLoc DL, MVT VT, ArrayRef Mask, + SDValue V1, SDValue V2, + SelectionDAG &DAG) { + int NumElts = VT.getVectorNumElements(); + int NumEltsInLane = 128 / VT.getScalarSizeInBits(); + SmallVector Unpckl; + SmallVector Unpckh; + + for (int i = 0; i < NumElts; ++i) { + unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane; + int LoPos = (i % NumEltsInLane) / 2 + LaneStart + NumElts * (i % 2); + int HiPos = LoPos + NumEltsInLane / 2; + Unpckl.push_back(LoPos); + Unpckh.push_back(HiPos); + } + + if (isShuffleEquivalent(V1, V2, Mask, Unpckl)) + return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); + if (isShuffleEquivalent(V1, V2, Mask, Unpckh)) + return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); + + // Commute and try again. + ShuffleVectorSDNode::commuteMask(Unpckl); + if (isShuffleEquivalent(V1, V2, Mask, Unpckl)) + return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1); + + ShuffleVectorSDNode::commuteMask(Unpckh); + if (isShuffleEquivalent(V1, V2, Mask, Unpckh)) + return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1); + + return SDValue(); +} + +/// \brief Try to emit a bitmask instruction for a shuffle. +/// +/// This handles cases where we can model a blend exactly as a bitmask due to +/// one of the inputs being zeroable. +static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + SelectionDAG &DAG) { + MVT EltVT = VT.getVectorElementType(); + int NumEltBits = EltVT.getSizeInBits(); + MVT IntEltVT = MVT::getIntegerVT(NumEltBits); + SDValue Zero = DAG.getConstant(0, DL, IntEltVT); + SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL, + IntEltVT); + if (EltVT.isFloatingPoint()) { + Zero = DAG.getBitcast(EltVT, Zero); + AllOnes = DAG.getBitcast(EltVT, AllOnes); + } + SmallVector VMaskOps(Mask.size(), Zero); + SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); + SDValue V; + for (int i = 0, Size = Mask.size(); i < Size; ++i) { + if (Zeroable[i]) + continue; + if (Mask[i] % Size != i) + return SDValue(); // Not a blend. + if (!V) + V = Mask[i] < Size ? V1 : V2; + else if (V != (Mask[i] < Size ? V1 : V2)) + return SDValue(); // Can only let one input through the mask. + + VMaskOps[i] = AllOnes; + } + if (!V) + return SDValue(); // No non-zeroable elements! + + SDValue VMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, VMaskOps); + V = DAG.getNode(VT.isFloatingPoint() + ? (unsigned) X86ISD::FAND : (unsigned) ISD::AND, + DL, VT, V, VMask); + return V; +} + /// \brief Try to emit a blend instruction for a shuffle using bit math. /// /// This is used as a fallback approach when first class blend instructions are @@ -6431,7 +7010,7 @@ static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { assert(VT.isInteger() && "Only supports integer vector types!"); - MVT EltVT = VT.getScalarType(); + MVT EltVT = VT.getVectorElementType(); int NumEltBits = EltVT.getSizeInBits(); SDValue Zero = DAG.getConstant(0, DL, EltVT); SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL, @@ -6458,22 +7037,62 @@ static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1, /// This doesn't do any checks for the availability of instructions for blending /// these values. It relies on the availability of the X86ISD::BLENDI pattern to /// be matched in the backend with the type given. What it does check for is -/// that the shuffle mask is in fact a blend. +/// that the shuffle mask is a blend, or convertible into a blend with zero. static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, - SDValue V2, ArrayRef Mask, + SDValue V2, ArrayRef Original, const X86Subtarget *Subtarget, SelectionDAG &DAG) { + bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode()); + bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); + SmallVector Mask(Original.begin(), Original.end()); + SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); + bool ForceV1Zero = false, ForceV2Zero = false; + + // Attempt to generate the binary blend mask. If an input is zero then + // we can use any lane. + // TODO: generalize the zero matching to any scalar like isShuffleEquivalent. unsigned BlendMask = 0; for (int i = 0, Size = Mask.size(); i < Size; ++i) { - if (Mask[i] >= Size) { - if (Mask[i] != i + Size) - return SDValue(); // Shuffled V2 input! + int M = Mask[i]; + if (M < 0) + continue; + if (M == i) + continue; + if (M == i + Size) { BlendMask |= 1u << i; continue; } - if (Mask[i] >= 0 && Mask[i] != i) - return SDValue(); // Shuffled V1 input! + if (Zeroable[i]) { + if (V1IsZero) { + ForceV1Zero = true; + Mask[i] = i; + continue; + } + if (V2IsZero) { + ForceV2Zero = true; + BlendMask |= 1u << i; + Mask[i] = i + Size; + continue; + } + } + return SDValue(); // Shuffled input! } + + // Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs. + if (ForceV1Zero) + V1 = getZeroVector(VT, Subtarget, DAG, DL); + if (ForceV2Zero) + V2 = getZeroVector(VT, Subtarget, DAG, DL); + + auto ScaleBlendMask = [](unsigned BlendMask, int Size, int Scale) { + unsigned ScaledMask = 0; + for (int i = 0; i != Size; ++i) + if (BlendMask & (1u << i)) + for (int j = 0; j != Scale; ++j) + ScaledMask |= 1u << (i * Scale + j); + return ScaledMask; + }; + switch (VT.SimpleTy) { case MVT::v2f64: case MVT::v4f32: @@ -6493,12 +7112,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, if (Subtarget->hasAVX2()) { // Scale the blend by the number of 32-bit dwords per element. int Scale = VT.getScalarSizeInBits() / 32; - BlendMask = 0; - for (int i = 0, Size = Mask.size(); i < Size; ++i) - if (Mask[i] >= Size) - for (int j = 0; j < Scale; ++j) - BlendMask |= 1u << (i * Scale + j); - + BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale); MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32; V1 = DAG.getBitcast(BlendVT, V1); V2 = DAG.getBitcast(BlendVT, V2); @@ -6511,12 +7125,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, // For integer shuffles we need to expand the mask and cast the inputs to // v8i16s prior to blending. int Scale = 8 / VT.getVectorNumElements(); - BlendMask = 0; - for (int i = 0, Size = Mask.size(); i < Size; ++i) - if (Mask[i] >= Size) - for (int j = 0; j < Scale; ++j) - BlendMask |= 1u << (i * Scale + j); - + BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale); V1 = DAG.getBitcast(MVT::v8i16, V1); V2 = DAG.getBitcast(MVT::v8i16, V2); return DAG.getBitcast(VT, @@ -6541,9 +7150,13 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, // FALLTHROUGH case MVT::v16i8: case MVT::v32i8: { - assert((VT.getSizeInBits() == 128 || Subtarget->hasAVX2()) && + assert((VT.is128BitVector() || Subtarget->hasAVX2()) && "256-bit byte-blends require AVX2 support!"); + // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB. + if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, DAG)) + return Masked; + // Scale the blend by the number of bytes per element. int Scale = VT.getScalarSizeInBits() / 8; @@ -6760,11 +7373,11 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1, Hi = DAG.getBitcast(AlignVT, Hi); return DAG.getBitcast( - VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo, + VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Lo, Hi, DAG.getConstant(Rotation * Scale, DL, MVT::i8))); } - assert(VT.getSizeInBits() == 128 && + assert(VT.is128BitVector() && "Rotate-based lowering only supports 128-bit lowering!"); assert(Mask.size() <= 16 && "Can shuffle at most 16 bytes in a 128-bit vector!"); @@ -6785,92 +7398,6 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1, DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift)); } -/// \brief Compute whether each element of a shuffle is zeroable. -/// -/// A "zeroable" vector shuffle element is one which can be lowered to zero. -/// Either it is an undef element in the shuffle mask, the element of the input -/// referenced is undef, or the element of the input referenced is known to be -/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle -/// as many lanes with this technique as possible to simplify the remaining -/// shuffle. -static SmallBitVector computeZeroableShuffleElements(ArrayRef Mask, - SDValue V1, SDValue V2) { - SmallBitVector Zeroable(Mask.size(), false); - - while (V1.getOpcode() == ISD::BITCAST) - V1 = V1->getOperand(0); - while (V2.getOpcode() == ISD::BITCAST) - V2 = V2->getOperand(0); - - bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode()); - bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); - - for (int i = 0, Size = Mask.size(); i < Size; ++i) { - int M = Mask[i]; - // Handle the easy cases. - if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) { - Zeroable[i] = true; - continue; - } - - // If this is an index into a build_vector node (which has the same number - // of elements), dig out the input value and use it. - SDValue V = M < Size ? V1 : V2; - if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands()) - continue; - - SDValue Input = V.getOperand(M % Size); - // The UNDEF opcode check really should be dead code here, but not quite - // worth asserting on (it isn't invalid, just unexpected). - if (Input.getOpcode() == ISD::UNDEF || X86::isZeroNode(Input)) - Zeroable[i] = true; - } - - return Zeroable; -} - -/// \brief Try to emit a bitmask instruction for a shuffle. -/// -/// This handles cases where we can model a blend exactly as a bitmask due to -/// one of the inputs being zeroable. -static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1, - SDValue V2, ArrayRef Mask, - SelectionDAG &DAG) { - MVT EltVT = VT.getScalarType(); - int NumEltBits = EltVT.getSizeInBits(); - MVT IntEltVT = MVT::getIntegerVT(NumEltBits); - SDValue Zero = DAG.getConstant(0, DL, IntEltVT); - SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL, - IntEltVT); - if (EltVT.isFloatingPoint()) { - Zero = DAG.getBitcast(EltVT, Zero); - AllOnes = DAG.getBitcast(EltVT, AllOnes); - } - SmallVector VMaskOps(Mask.size(), Zero); - SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); - SDValue V; - for (int i = 0, Size = Mask.size(); i < Size; ++i) { - if (Zeroable[i]) - continue; - if (Mask[i] % Size != i) - return SDValue(); // Not a blend. - if (!V) - V = Mask[i] < Size ? V1 : V2; - else if (V != (Mask[i] < Size ? V1 : V2)) - return SDValue(); // Can only let one input through the mask. - - VMaskOps[i] = AllOnes; - } - if (!V) - return SDValue(); // No non-zeroable elements! - - SDValue VMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, VMaskOps); - V = DAG.getNode(VT.isFloatingPoint() - ? (unsigned) X86ISD::FAND : (unsigned) ISD::AND, - DL, VT, V, VMask); - return V; -} - /// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros). /// /// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and @@ -6982,7 +7509,7 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1, // Determine the extraction length from the part of the // lower half that isn't zeroable. int Len = HalfSize; - for (; Len >= 0; --Len) + for (; Len > 0; --Len) if (!Zeroable[Len - 1]) break; assert(Len > 0 && "Zeroable shuffle mask"); @@ -6997,8 +7524,9 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1, SDValue &V = (M < Size ? V1 : V2); M = M % Size; - // All mask elements must be in the lower half. - if (M > HalfSize) + // The extracted elements must start at a valid index and all mask + // elements must be in the lower half. + if (i > M || M >= HalfSize) return SDValue(); if (Idx < 0 || (Src == V && Idx == (M - i))) { @@ -7095,64 +7623,104 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1, /// /// Given a specific number of elements, element bit width, and extension /// stride, produce either a zero or any extension based on the available -/// features of the subtarget. +/// features of the subtarget. The extended elements are consecutive and +/// begin and can start from an offseted element index in the input; to +/// avoid excess shuffling the offset must either being in the bottom lane +/// or at the start of a higher lane. All extended elements must be from +/// the same lane. static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( - SDLoc DL, MVT VT, int Scale, bool AnyExt, SDValue InputV, + SDLoc DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV, ArrayRef Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(Scale > 1 && "Need a scale to extend."); - int NumElements = VT.getVectorNumElements(); int EltBits = VT.getScalarSizeInBits(); + int NumElements = VT.getVectorNumElements(); + int NumEltsPerLane = 128 / EltBits; + int OffsetLane = Offset / NumEltsPerLane; assert((EltBits == 8 || EltBits == 16 || EltBits == 32) && "Only 8, 16, and 32 bit elements can be extended."); assert(Scale * EltBits <= 64 && "Cannot zero extend past 64 bits."); + assert(0 <= Offset && "Extension offset must be positive."); + assert((Offset < NumEltsPerLane || Offset % NumEltsPerLane == 0) && + "Extension offset must be in the first lane or start an upper lane."); + + // Check that an index is in same lane as the base offset. + auto SafeOffset = [&](int Idx) { + return OffsetLane == (Idx / NumEltsPerLane); + }; + + // Shift along an input so that the offset base moves to the first element. + auto ShuffleOffset = [&](SDValue V) { + if (!Offset) + return V; + + SmallVector ShMask((unsigned)NumElements, -1); + for (int i = 0; i * Scale < NumElements; ++i) { + int SrcIdx = i + Offset; + ShMask[i] = SafeOffset(SrcIdx) ? SrcIdx : -1; + } + return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask); + }; // Found a valid zext mask! Try various lowering strategies based on the // input type and available ISA extensions. if (Subtarget->hasSSE41()) { + // Not worth offseting 128-bit vectors if scale == 2, a pattern using + // PUNPCK will catch this in a later shuffle match. + if (Offset && Scale == 2 && VT.is128BitVector()) + return SDValue(); MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale), NumElements / Scale); - return DAG.getBitcast(VT, DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV)); + InputV = DAG.getNode(X86ISD::VZEXT, DL, ExtVT, ShuffleOffset(InputV)); + return DAG.getBitcast(VT, InputV); } + assert(VT.is128BitVector() && "Only 128-bit vectors can be extended."); + // For any extends we can cheat for larger element sizes and use shuffle // instructions that can fold with a load and/or copy. if (AnyExt && EltBits == 32) { - int PSHUFDMask[4] = {0, -1, 1, -1}; + int PSHUFDMask[4] = {Offset, -1, SafeOffset(Offset + 1) ? Offset + 1 : -1, + -1}; return DAG.getBitcast( VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, InputV), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); } if (AnyExt && EltBits == 16 && Scale > 2) { - int PSHUFDMask[4] = {0, -1, 0, -1}; + int PSHUFDMask[4] = {Offset / 2, -1, + SafeOffset(Offset + 1) ? (Offset + 1) / 2 : -1, -1}; InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, InputV), getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)); - int PSHUFHWMask[4] = {1, -1, -1, -1}; + int PSHUFWMask[4] = {1, -1, -1, -1}; + unsigned OddEvenOp = (Offset & 1 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW); return DAG.getBitcast( - VT, DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, + VT, DAG.getNode(OddEvenOp, DL, MVT::v8i16, DAG.getBitcast(MVT::v8i16, InputV), - getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG))); + getV4X86ShuffleImm8ForMask(PSHUFWMask, DL, DAG))); } // The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes // to 64-bits. if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget->hasSSE4A()) { assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!"); - assert(VT.getSizeInBits() == 128 && "Unexpected vector width!"); + assert(VT.is128BitVector() && "Unexpected vector width!"); + int LoIdx = Offset * EltBits; SDValue Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, DAG.getConstant(EltBits, DL, MVT::i8), - DAG.getConstant(0, DL, MVT::i8))); - if (isUndefInRange(Mask, NumElements/2, NumElements/2)) + DAG.getConstant(LoIdx, DL, MVT::i8))); + + if (isUndefInRange(Mask, NumElements / 2, NumElements / 2) || + !SafeOffset(Offset + 1)) return DAG.getNode(ISD::BITCAST, DL, VT, Lo); - SDValue Hi = - DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, - DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, - DAG.getConstant(EltBits, DL, MVT::i8), - DAG.getConstant(EltBits, DL, MVT::i8))); + int HiIdx = (Offset + 1) * EltBits; + SDValue Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, + DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, + DAG.getConstant(EltBits, DL, MVT::i8), + DAG.getConstant(HiIdx, DL, MVT::i8))); return DAG.getNode(ISD::BITCAST, DL, VT, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi)); } @@ -7163,9 +7731,11 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( if (Scale > 4 && EltBits == 8 && Subtarget->hasSSSE3()) { assert(NumElements == 16 && "Unexpected byte vector width!"); SDValue PSHUFBMask[16]; - for (int i = 0; i < 16; ++i) - PSHUFBMask[i] = - DAG.getConstant((i % Scale == 0) ? i / Scale : 0x80, DL, MVT::i8); + for (int i = 0; i < 16; ++i) { + int Idx = Offset + (i / Scale); + PSHUFBMask[i] = DAG.getConstant( + (i % Scale == 0 && SafeOffset(Idx)) ? Idx : 0x80, DL, MVT::i8); + } InputV = DAG.getBitcast(MVT::v16i8, InputV); return DAG.getBitcast(VT, DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV, @@ -7173,13 +7743,30 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( MVT::v16i8, PSHUFBMask))); } + // If we are extending from an offset, ensure we start on a boundary that + // we can unpack from. + int AlignToUnpack = Offset % (NumElements / Scale); + if (AlignToUnpack) { + SmallVector ShMask((unsigned)NumElements, -1); + for (int i = AlignToUnpack; i < NumElements; ++i) + ShMask[i - AlignToUnpack] = i; + InputV = DAG.getVectorShuffle(VT, DL, InputV, DAG.getUNDEF(VT), ShMask); + Offset -= AlignToUnpack; + } + // Otherwise emit a sequence of unpacks. do { + unsigned UnpackLoHi = X86ISD::UNPCKL; + if (Offset >= (NumElements / 2)) { + UnpackLoHi = X86ISD::UNPCKH; + Offset -= (NumElements / 2); + } + MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements); SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT) : getZeroVector(InputVT, Subtarget, DAG, DL); InputV = DAG.getBitcast(InputVT, InputV); - InputV = DAG.getNode(X86ISD::UNPCKL, DL, InputVT, InputV, Ext); + InputV = DAG.getNode(UnpackLoHi, DL, InputVT, InputV, Ext); Scale /= 2; EltBits *= 2; NumElements /= 2; @@ -7205,7 +7792,9 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend( SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); int Bits = VT.getSizeInBits(); + int NumLanes = Bits / 128; int NumElements = VT.getVectorNumElements(); + int NumEltsPerLane = NumElements / NumLanes; assert(VT.getScalarSizeInBits() <= 32 && "Exceeds 32-bit integer zero extension limit"); assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size"); @@ -7215,8 +7804,11 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend( auto Lower = [&](int Scale) -> SDValue { SDValue InputV; bool AnyExt = true; + int Offset = 0; + int Matches = 0; for (int i = 0; i < NumElements; ++i) { - if (Mask[i] == -1) + int M = Mask[i]; + if (M == -1) continue; // Valid anywhere but doesn't tell us anything. if (i % Scale != 0) { // Each of the extended elements need to be zeroable. @@ -7230,14 +7822,29 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend( // Each of the base elements needs to be consecutive indices into the // same input vector. - SDValue V = Mask[i] < NumElements ? V1 : V2; - if (!InputV) + SDValue V = M < NumElements ? V1 : V2; + M = M % NumElements; + if (!InputV) { InputV = V; - else if (InputV != V) + Offset = M - (i / Scale); + } else if (InputV != V) return SDValue(); // Flip-flopping inputs. - if (Mask[i] % NumElements != i / Scale) + // Offset must start in the lowest 128-bit lane or at the start of an + // upper lane. + // FIXME: Is it ever worth allowing a negative base offset? + if (!((0 <= Offset && Offset < NumEltsPerLane) || + (Offset % NumEltsPerLane) == 0)) + return SDValue(); + + // If we are offsetting, all referenced entries must come from the same + // lane. + if (Offset && (Offset / NumEltsPerLane) != (M / NumEltsPerLane)) + return SDValue(); + + if ((M % NumElements) != (Offset + (i / Scale))) return SDValue(); // Non-consecutive strided elements. + Matches++; } // If we fail to find an input, we have a zero-shuffle which should always @@ -7246,8 +7853,13 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend( if (!InputV) return SDValue(); + // If we are offsetting, don't extend if we only match a single input, we + // can always do better by using a basic PSHUF or PUNPCK. + if (Offset != 0 && Matches < 2) + return SDValue(); + return lowerVectorShuffleAsSpecificZeroOrAnyExtend( - DL, VT, Scale, AnyExt, InputV, Mask, Subtarget, DAG); + DL, VT, Scale, Offset, AnyExt, InputV, Mask, Subtarget, DAG); }; // The widest scale possible for extending is to a 64-bit integer. @@ -7355,8 +7967,9 @@ static SDValue lowerVectorShuffleAsElementInsertion( // all the smarts here sunk into that routine. However, the current // lowering of BUILD_VECTOR makes that nearly impossible until the old // vector shuffle lowering is dead. - if (SDValue V2S = getScalarValueForVectorElement( - V2, Mask[V2Index] - Mask.size(), DAG)) { + SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(), + DAG); + if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) { // We need to zext the scalar if it is smaller than an i32. V2S = DAG.getBitcast(EltVT, V2S); if (EltVT == MVT::i8 || EltVT == MVT::i16) { @@ -7431,11 +8044,65 @@ static SDValue lowerVectorShuffleAsElementInsertion( return V2; } +/// \brief Try to lower broadcast of a single - truncated - integer element, +/// coming from a scalar_to_vector/build_vector node \p V0 with larger elements. +/// +/// This assumes we have AVX2. +static SDValue lowerVectorShuffleAsTruncBroadcast(SDLoc DL, MVT VT, SDValue V0, + int BroadcastIdx, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + assert(Subtarget->hasAVX2() && + "We can only lower integer broadcasts with AVX2!"); + + EVT EltVT = VT.getVectorElementType(); + EVT V0VT = V0.getValueType(); + + assert(VT.isInteger() && "Unexpected non-integer trunc broadcast!"); + assert(V0VT.isVector() && "Unexpected non-vector vector-sized value!"); + + EVT V0EltVT = V0VT.getVectorElementType(); + if (!V0EltVT.isInteger()) + return SDValue(); + + const unsigned EltSize = EltVT.getSizeInBits(); + const unsigned V0EltSize = V0EltVT.getSizeInBits(); + + // This is only a truncation if the original element type is larger. + if (V0EltSize <= EltSize) + return SDValue(); + + assert(((V0EltSize % EltSize) == 0) && + "Scalar type sizes must all be powers of 2 on x86!"); + + const unsigned V0Opc = V0.getOpcode(); + const unsigned Scale = V0EltSize / EltSize; + const unsigned V0BroadcastIdx = BroadcastIdx / Scale; + + if ((V0Opc != ISD::SCALAR_TO_VECTOR || V0BroadcastIdx != 0) && + V0Opc != ISD::BUILD_VECTOR) + return SDValue(); + + SDValue Scalar = V0.getOperand(V0BroadcastIdx); + + // If we're extracting non-least-significant bits, shift so we can truncate. + // Hopefully, we can fold away the trunc/srl/load into the broadcast. + // Even if we can't (and !isShuffleFoldableLoad(Scalar)), prefer + // vpbroadcast+vmovd+shr to vpshufb(m)+vmovd. + if (const int OffsetIdx = BroadcastIdx % Scale) + Scalar = DAG.getNode(ISD::SRL, DL, Scalar.getValueType(), Scalar, + DAG.getConstant(OffsetIdx * EltSize, DL, Scalar.getValueType())); + + return DAG.getNode(X86ISD::VBROADCAST, DL, VT, + DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar)); +} + /// \brief Try to lower broadcast of a single element. /// /// For convenience, this code also bundles all of the subtarget feature set /// filtering. While a little annoying to re-dispatch on type here, there isn't /// a convenient way to factor it out. +/// FIXME: This is very similar to LowerVectorBroadcast - can we merge them? static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, ArrayRef Mask, const X86Subtarget *Subtarget, @@ -7476,7 +8143,7 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, int BeginIdx = (int)ConstantIdx->getZExtValue(); int EndIdx = - BeginIdx + (int)VInner.getValueType().getVectorNumElements(); + BeginIdx + (int)VInner.getSimpleValueType().getVectorNumElements(); if (BroadcastIdx >= BeginIdx && BroadcastIdx < EndIdx) { BroadcastIdx -= BeginIdx; V = VInner; @@ -7491,6 +8158,15 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, // Check if this is a broadcast of a scalar. We special case lowering // for scalars so that we can more effectively fold with loads. + // First, look through bitcast: if the original value has a larger element + // type than the shuffle, the broadcast element is in essence truncated. + // Make that explicit to ease folding. + if (V.getOpcode() == ISD::BITCAST && VT.isInteger()) + if (SDValue TruncBroadcast = lowerVectorShuffleAsTruncBroadcast( + DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG)) + return TruncBroadcast; + + // Also check the simpler case, where we can directly reuse the scalar. if (V.getOpcode() == ISD::BUILD_VECTOR || (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) { V = V.getOperand(BroadcastIdx); @@ -7499,6 +8175,20 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, // Only AVX2 has register broadcasts. if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V)) return SDValue(); + } else if (MayFoldLoad(V) && !cast(V)->isVolatile()) { + // If we are broadcasting a load that is only used by the shuffle + // then we can reduce the vector load to the broadcasted scalar load. + LoadSDNode *Ld = cast(V); + SDValue BaseAddr = Ld->getOperand(1); + EVT AddrVT = BaseAddr.getValueType(); + EVT SVT = VT.getScalarType(); + unsigned Offset = BroadcastIdx * SVT.getStoreSize(); + SDValue NewAddr = DAG.getNode( + ISD::ADD, DL, AddrVT, BaseAddr, + DAG.getConstant(Offset, DL, AddrVT)); + V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, + DAG.getMachineFunction().getMachineMemOperand( + Ld->getMemOperand(), Offset, SVT.getStoreSize())); } else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) { // We can't broadcast from a vector register without AVX2, and we can only // broadcast from the zero-element of a vector register. @@ -7595,9 +8285,10 @@ static SDValue lowerVectorShuffleAsInsertPS(SDValue Op, SDValue V1, SDValue V2, /// because for floating point vectors we have a generalized SHUFPS lowering /// strategy that handles everything that doesn't *exactly* match an unpack, /// making this clever lowering unnecessary. -static SDValue lowerVectorShuffleAsUnpack(SDLoc DL, MVT VT, SDValue V1, - SDValue V2, ArrayRef Mask, - SelectionDAG &DAG) { +static SDValue lowerVectorShuffleAsPermuteAndUnpack(SDLoc DL, MVT VT, + SDValue V1, SDValue V2, + ArrayRef Mask, + SelectionDAG &DAG) { assert(!VT.isFloatingPoint() && "This routine only supports integer vectors."); assert(!isSingleInputShuffleMask(Mask) && @@ -7774,10 +8465,9 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return Blend; // Use dedicated unpack instructions for masks that match their pattern. - if (isShuffleEquivalent(V1, V2, Mask, {0, 2})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {1, 3})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2); + if (SDValue V = + lowerVectorShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG)) + return V; unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1); return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2, @@ -7869,10 +8559,9 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return Blend; // Use dedicated unpack instructions for masks that match their pattern. - if (isShuffleEquivalent(V1, V2, Mask, {0, 2})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {1, 3})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2); + if (SDValue V = + lowerVectorShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG)) + return V; // Try to use byte rotation instructions. // Its more profitable for pre-SSSE3 to use shuffles/unpacks. @@ -8077,14 +8766,9 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, } // Use dedicated unpack instructions for masks that match their pattern. - if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 1, 5})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {2, 6, 3, 7})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 5, 1})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V2, V1); - if (isShuffleEquivalent(V1, V2, Mask, {6, 2, 7, 3})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V2, V1); + if (SDValue V = + lowerVectorShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG)) + return V; // Otherwise fall back to a SHUFPS lowering strategy. return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG); @@ -8161,14 +8845,9 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return Masked; // Use dedicated unpack instructions for masks that match their pattern. - if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 1, 5})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {2, 6, 3, 7})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 5, 1})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V2, V1); - if (isShuffleEquivalent(V1, V2, Mask, {6, 2, 7, 3})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V2, V1); + if (SDValue V = + lowerVectorShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG)) + return V; // Try to use byte rotation instructions. // Its more profitable for pre-SSSE3 to use shuffles/unpacks. @@ -8184,8 +8863,8 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, Mask, DAG); // Try to lower by permuting the inputs into an unpack instruction. - if (SDValue Unpack = - lowerVectorShuffleAsUnpack(DL, MVT::v4i32, V1, V2, Mask, DAG)) + if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v4i32, V1, + V2, Mask, DAG)) return Unpack; // We implement this with SHUFPS because it can blend from two vectors. @@ -8218,7 +8897,7 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, static SDValue lowerV8I16GeneralSingleInputVectorShuffle( SDLoc DL, MVT VT, SDValue V, MutableArrayRef Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - assert(VT.getScalarType() == MVT::i16 && "Bad input type!"); + assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!"); MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); assert(Mask.size() == 8 && "Shuffle mask length doen't match!"); @@ -8286,16 +8965,18 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( assert(AToAInputs.size() + BToAInputs.size() == 4 && "Must call this with either 3:1 or 1:3 inputs (summing to 4)."); + bool ThreeAInputs = AToAInputs.size() == 3; + // Compute the index of dword with only one word among the three inputs in // a half by taking the sum of the half with three inputs and subtracting // the sum of the actual three inputs. The difference is the remaining // slot. int ADWord, BDWord; - int &TripleDWord = AToAInputs.size() == 3 ? ADWord : BDWord; - int &OneInputDWord = AToAInputs.size() == 3 ? BDWord : ADWord; - int TripleInputOffset = AToAInputs.size() == 3 ? AOffset : BOffset; - ArrayRef TripleInputs = AToAInputs.size() == 3 ? AToAInputs : BToAInputs; - int OneInput = AToAInputs.size() == 3 ? BToAInputs[0] : AToAInputs[0]; + int &TripleDWord = ThreeAInputs ? ADWord : BDWord; + int &OneInputDWord = ThreeAInputs ? BDWord : ADWord; + int TripleInputOffset = ThreeAInputs ? AOffset : BOffset; + ArrayRef TripleInputs = ThreeAInputs ? AToAInputs : BToAInputs; + int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0]; int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset); int TripleNonInputIdx = TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0); @@ -8364,8 +9045,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs); } else { assert(NumFlippedAToBInputs != 0 && "Impossible given predicates!"); - int APinnedIdx = - AToAInputs.size() == 3 ? TripleNonInputIdx : OneInput; + int APinnedIdx = ThreeAInputs ? TripleNonInputIdx : OneInput; FixFlippedInputs(APinnedIdx, ADWord, AToBInputs); } } @@ -8751,10 +9431,9 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return Shift; // Use dedicated unpack instructions for masks that match their pattern. - if (isShuffleEquivalent(V1, V1, Mask, {0, 0, 1, 1, 2, 2, 3, 3})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V1); - if (isShuffleEquivalent(V1, V1, Mask, {4, 4, 5, 5, 6, 6, 7, 7})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V1); + if (SDValue V = + lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) + return V; // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i16, V1, V1, @@ -8798,10 +9477,9 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return Masked; // Use dedicated unpack instructions for masks that match their pattern. - if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 2, 10, 3, 11})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {4, 12, 5, 13, 6, 14, 7, 15})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V2); + if (SDValue V = + lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) + return V; // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( @@ -8812,8 +9490,8 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, lowerVectorShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG)) return BitBlend; - if (SDValue Unpack = - lowerVectorShuffleAsUnpack(DL, MVT::v8i16, V1, V2, Mask, DAG)) + if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1, + V2, Mask, DAG)) return Unpack; // If we can't directly blend but can use PSHUFB, that will be better as it @@ -9037,17 +9715,14 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return V; } + if (SDValue Masked = + lowerVectorShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask, DAG)) + return Masked; + // Use dedicated unpack instructions for masks that match their pattern. - if (isShuffleEquivalent(V1, V2, Mask, {// Low half. - 0, 16, 1, 17, 2, 18, 3, 19, - // High half. - 4, 20, 5, 21, 6, 22, 7, 23})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {// Low half. - 8, 24, 9, 25, 10, 26, 11, 27, - // High half. - 12, 28, 13, 29, 14, 30, 15, 31})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V1, V2); + if (SDValue V = + lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG)) + return V; // Check for SSSE3 which lets us lower all v16i8 shuffles much more directly // with PSHUFB. It is important to do this before we attempt to generate any @@ -9086,8 +9761,8 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // FIXME: It might be worth trying to detect if the unpack-feeding // shuffles will both be pshufb, in which case we shouldn't bother with // this. - if (SDValue Unpack = - lowerVectorShuffleAsUnpack(DL, MVT::v16i8, V1, V2, Mask, DAG)) + if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack( + DL, MVT::v16i8, V1, V2, Mask, DAG)) return Unpack; } @@ -9296,7 +9971,7 @@ static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1, int NumElements = VT.getVectorNumElements(); int SplitNumElements = NumElements / 2; - MVT ScalarVT = VT.getScalarType(); + MVT ScalarVT = VT.getVectorElementType(); MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2); // Rather than splitting build-vectors, just build two narrower build @@ -9308,7 +9983,7 @@ static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1, MVT OrigVT = V.getSimpleValueType(); int OrigNumElements = OrigVT.getVectorNumElements(); int OrigSplitNumElements = OrigNumElements / 2; - MVT OrigScalarVT = OrigVT.getScalarType(); + MVT OrigScalarVT = OrigVT.getVectorElementType(); MVT OrigSplitVT = MVT::getVectorVT(OrigScalarVT, OrigNumElements / 2); SDValue LoV, HiV; @@ -9478,7 +10153,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(SDLoc DL, MVT VT, ArrayRef Mask, SelectionDAG &DAG) { // FIXME: This should probably be generalized for 512-bit vectors as well. - assert(VT.getSizeInBits() == 256 && "Only for 256-bit vector shuffles!"); + assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!"); int LaneSize = Mask.size() / 2; // If there are only inputs from one 128-bit lane, splitting will in fact be @@ -9682,6 +10357,108 @@ static SDValue lowerVectorShuffleByMerging128BitLanes( return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask); } +/// Lower shuffles where an entire half of a 256-bit vector is UNDEF. +/// This allows for fast cases such as subvector extraction/insertion +/// or shuffling smaller vector types which can lower more efficiently. +static SDValue lowerVectorShuffleWithUndefHalf(SDLoc DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + assert(VT.getSizeInBits() == 256 && "Expected 256-bit vector"); + + unsigned NumElts = VT.getVectorNumElements(); + unsigned HalfNumElts = NumElts / 2; + MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts); + + bool UndefLower = isUndefInRange(Mask, 0, HalfNumElts); + bool UndefUpper = isUndefInRange(Mask, HalfNumElts, HalfNumElts); + if (!UndefLower && !UndefUpper) + return SDValue(); + + // Upper half is undef and lower half is whole upper subvector. + // e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u> + if (UndefUpper && + isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) { + SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, + DAG.getIntPtrConstant(HalfNumElts, DL)); + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi, + DAG.getIntPtrConstant(0, DL)); + } + + // Lower half is undef and upper half is whole lower subvector. + // e.g. vector_shuffle or + if (UndefLower && + isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) { + SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, + DAG.getIntPtrConstant(0, DL)); + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi, + DAG.getIntPtrConstant(HalfNumElts, DL)); + } + + // AVX2 supports efficient immediate 64-bit element cross-lane shuffles. + if (UndefLower && Subtarget->hasAVX2() && + (VT == MVT::v4f64 || VT == MVT::v4i64)) + return SDValue(); + + // If the shuffle only uses the lower halves of the input operands, + // then extract them and perform the 'half' shuffle at half width. + // e.g. vector_shuffle or + int HalfIdx1 = -1, HalfIdx2 = -1; + SmallVector HalfMask; + unsigned Offset = UndefLower ? HalfNumElts : 0; + for (unsigned i = 0; i != HalfNumElts; ++i) { + int M = Mask[i + Offset]; + if (M < 0) { + HalfMask.push_back(M); + continue; + } + + // Determine which of the 4 half vectors this element is from. + // i.e. 0 = Lower V1, 1 = Upper V1, 2 = Lower V2, 3 = Upper V2. + int HalfIdx = M / HalfNumElts; + + // Only shuffle using the lower halves of the inputs. + // TODO: Investigate usefulness of shuffling with upper halves. + if (HalfIdx != 0 && HalfIdx != 2) + return SDValue(); + + // Determine the element index into its half vector source. + int HalfElt = M % HalfNumElts; + + // We can shuffle with up to 2 half vectors, set the new 'half' + // shuffle mask accordingly. + if (-1 == HalfIdx1 || HalfIdx1 == HalfIdx) { + HalfMask.push_back(HalfElt); + HalfIdx1 = HalfIdx; + continue; + } + if (-1 == HalfIdx2 || HalfIdx2 == HalfIdx) { + HalfMask.push_back(HalfElt + HalfNumElts); + HalfIdx2 = HalfIdx; + continue; + } + + // Too many half vectors referenced. + return SDValue(); + } + assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length"); + + auto GetHalfVector = [&](int HalfIdx) { + if (HalfIdx < 0) + return DAG.getUNDEF(HalfVT); + SDValue V = (HalfIdx < 2 ? V1 : V2); + HalfIdx = (HalfIdx % 2) * HalfNumElts; + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V, + DAG.getIntPtrConstant(HalfIdx, DL)); + }; + + SDValue Half1 = GetHalfVector(HalfIdx1); + SDValue Half2 = GetHalfVector(HalfIdx2); + SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask); + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, + DAG.getIntPtrConstant(Offset, DL)); +} + /// \brief Test whether the specified input (0 or 1) is in-place blended by the /// given mask. /// @@ -9776,16 +10553,10 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, DAG); } - // X86 has dedicated unpack instructions that can handle specific blend - // operations: UNPCKH and UNPCKL. - if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 2, 6})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {1, 5, 3, 7})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 6, 2})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V2, V1); - if (isShuffleEquivalent(V1, V2, Mask, {5, 1, 7, 3})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1); + // Use dedicated unpack instructions for masks that match their pattern. + if (SDValue V = + lowerVectorShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG)) + return V; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) @@ -9876,14 +10647,9 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return Shift; // Use dedicated unpack instructions for masks that match their pattern. - if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 2, 6})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {1, 5, 3, 7})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 6, 2})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V2, V1); - if (isShuffleEquivalent(V1, V2, Mask, {5, 1, 7, 3})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V2, V1); + if (SDValue V = + lowerVectorShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG)) + return V; // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. However, if we have AVX2 and either inputs are already in place, @@ -9941,14 +10707,9 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); // Use dedicated unpack instructions for masks that match their pattern. - if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 4, 12, 5, 13})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {2, 10, 3, 11, 6, 14, 7, 15})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {8, 0, 9, 1, 12, 4, 13, 5})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V2, V1); - if (isShuffleEquivalent(V1, V2, Mask, {10, 2, 11, 3, 14, 6, 15, 7})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V2, V1); + if (SDValue V = + lowerVectorShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG)) + return V; // Otherwise, fall back to a SHUFPS sequence. Here it is important that we // have already handled any direct blends. We also need to squash the @@ -9974,9 +10735,7 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, if (Subtarget->hasAVX2()) return DAG.getNode( X86ISD::VPERMV, DL, MVT::v8f32, - DAG.getBitcast(MVT::v8f32, DAG.getNode(ISD::BUILD_VECTOR, DL, - MVT::v8i32, VPermMask)), - V1); + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1); // Otherwise, fall back. return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask, @@ -10041,14 +10800,9 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); // Use dedicated unpack instructions for masks that match their pattern. - if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 4, 12, 5, 13})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {2, 10, 3, 11, 6, 14, 7, 15})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {8, 0, 9, 1, 12, 4, 13, 5})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V2, V1); - if (isShuffleEquivalent(V1, V2, Mask, {10, 2, 11, 3, 14, 6, 15, 7})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V2, V1); + if (SDValue V = + lowerVectorShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG)) + return V; } // Try to use shift instructions. @@ -10115,18 +10869,9 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return Blend; // Use dedicated unpack instructions for masks that match their pattern. - if (isShuffleEquivalent(V1, V2, Mask, - {// First 128-bit lane: - 0, 16, 1, 17, 2, 18, 3, 19, - // Second 128-bit lane: - 8, 24, 9, 25, 10, 26, 11, 27})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i16, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, - {// First 128-bit lane: - 4, 20, 5, 21, 6, 22, 7, 23, - // Second 128-bit lane: - 12, 28, 13, 29, 14, 30, 15, 31})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i16, V1, V2); + if (SDValue V = + lowerVectorShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG)) + return V; // Try to use shift instructions. if (SDValue Shift = @@ -10215,22 +10960,9 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return Blend; // Use dedicated unpack instructions for masks that match their pattern. - // Note that these are repeated 128-bit lane unpacks, not unpacks across all - // 256-bit lanes. - if (isShuffleEquivalent( - V1, V2, Mask, - {// First 128-bit lane: - 0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, - // Second 128-bit lane: - 16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v32i8, V1, V2); - if (isShuffleEquivalent( - V1, V2, Mask, - {// First 128-bit lane: - 8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47, - // Second 128-bit lane: - 24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v32i8, V1, V2); + if (SDValue V = + lowerVectorShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG)) + return V; // Try to use shift instructions. if (SDValue Shift = @@ -10296,12 +11028,17 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, DL, VT, V1, V2, Mask, Subtarget, DAG)) return Insertion; - // There is a really nice hard cut-over between AVX1 and AVX2 that means we can - // check for those subtargets here and avoid much of the subtarget querying in - // the per-vector-type lowering routines. With AVX1 we have essentially *zero* - // ability to manipulate a 256-bit vector with integer types. Since we'll use - // floating point types there eventually, just immediately cast everything to - // a float and operate entirely in that domain. + // Handle special cases where the lower or upper half is UNDEF. + if (SDValue V = + lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) + return V; + + // There is a really nice hard cut-over between AVX1 and AVX2 that means we + // can check for those subtargets here and avoid much of the subtarget + // querying in the per-vector-type lowering routines. With AVX1 we have + // essentially *zero* ability to manipulate a 256-bit vector with integer + // types. Since we'll use floating point types there eventually, just + // immediately cast everything to a float and operate entirely in that domain. if (VT.isInteger() && !Subtarget->hasAVX2()) { int ElementBits = VT.getScalarSizeInBits(); if (ElementBits < 32) @@ -10334,6 +11071,57 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, } } +/// \brief Try to lower a vector shuffle as a 128-bit shuffles. +static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT, + ArrayRef Mask, + SDValue V1, SDValue V2, + SelectionDAG &DAG) { + assert(VT.getScalarSizeInBits() == 64 && + "Unexpected element type size for 128bit shuffle."); + + // To handle 256 bit vector requires VLX and most probably + // function lowerV2X128VectorShuffle() is better solution. + assert(VT.is512BitVector() && "Unexpected vector size for 128bit shuffle."); + + SmallVector WidenedMask; + if (!canWidenShuffleElements(Mask, WidenedMask)) + return SDValue(); + + // Form a 128-bit permutation. + // Convert the 64-bit shuffle mask selection values into 128-bit selection + // bits defined by a vshuf64x2 instruction's immediate control byte. + unsigned PermMask = 0, Imm = 0; + unsigned ControlBitsNum = WidenedMask.size() / 2; + + for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) { + if (WidenedMask[i] == SM_SentinelZero) + return SDValue(); + + // Use first element in place of undef mask. + Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i]; + PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum); + } + + return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, + DAG.getConstant(PermMask, DL, MVT::i8)); +} + +static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT, + ArrayRef Mask, SDValue V1, + SDValue V2, SelectionDAG &DAG) { + + assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV"); + + MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); + MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements()); + + SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true); + if (isSingleInputShuffleMask(Mask)) + return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1); + + return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2); +} + /// \brief Handle lowering of 8-lane 64-bit floating point shuffles. static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, const X86Subtarget *Subtarget, @@ -10345,21 +11133,21 @@ static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - // X86 has dedicated unpack instructions that can handle specific blend - // operations: UNPCKH and UNPCKL. - if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2); + if (SDValue Shuf128 = + lowerV4X128VectorShuffle(DL, MVT::v8f64, Mask, V1, V2, DAG)) + return Shuf128; - // FIXME: Implement direct support for this type! - return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG); + if (SDValue Unpck = + lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG)) + return Unpck; + + return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG); } /// \brief Handle lowering of 16-lane 32-bit floating point shuffles. static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, - SelectionDAG &DAG) { + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); @@ -10367,22 +11155,11 @@ static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); - // Use dedicated unpack instructions for masks that match their pattern. - if (isShuffleEquivalent(V1, V2, Mask, - {// First 128-bit lane. - 0, 16, 1, 17, 4, 20, 5, 21, - // Second 128-bit lane. - 8, 24, 9, 25, 12, 28, 13, 29})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, - {// First 128-bit lane. - 2, 18, 3, 19, 6, 22, 7, 23, - // Second 128-bit lane. - 10, 26, 11, 27, 14, 30, 15, 31})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2); + if (SDValue Unpck = + lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG)) + return Unpck; - // FIXME: Implement direct support for this type! - return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG); + return lowerVectorShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG); } /// \brief Handle lowering of 8-lane 64-bit integer shuffles. @@ -10396,21 +11173,21 @@ static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - // X86 has dedicated unpack instructions that can handle specific blend - // operations: UNPCKH and UNPCKL. - if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2); + if (SDValue Shuf128 = + lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG)) + return Shuf128; - // FIXME: Implement direct support for this type! - return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG); + if (SDValue Unpck = + lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG)) + return Unpck; + + return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG); } /// \brief Handle lowering of 16-lane 32-bit integer shuffles. static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, - const X86Subtarget *Subtarget, - SelectionDAG &DAG) { + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { SDLoc DL(Op); assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); @@ -10418,22 +11195,11 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); - // Use dedicated unpack instructions for masks that match their pattern. - if (isShuffleEquivalent(V1, V2, Mask, - {// First 128-bit lane. - 0, 16, 1, 17, 4, 20, 5, 21, - // Second 128-bit lane. - 8, 24, 9, 25, 12, 28, 13, 29})) - return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2); - if (isShuffleEquivalent(V1, V2, Mask, - {// First 128-bit lane. - 2, 18, 3, 19, 6, 22, 7, 23, - // Second 128-bit lane. - 10, 26, 11, 27, 14, 30, 15, 31})) - return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2); + if (SDValue Unpck = + lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG)) + return Unpck; - // FIXME: Implement direct support for this type! - return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG); + return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG); } /// \brief Handle lowering of 32-lane 16-bit integer shuffles. @@ -10448,8 +11214,7 @@ static SDValue lowerV32I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); assert(Subtarget->hasBWI() && "We can only lower v32i16 with AVX-512-BWI!"); - // FIXME: Implement direct support for this type! - return splitAndLowerVectorShuffle(DL, MVT::v32i16, V1, V2, Mask, DAG); + return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG); } /// \brief Handle lowering of 64-lane 8-bit integer shuffles. @@ -10517,6 +11282,60 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG); } +// Lower vXi1 vector shuffles. +// There is no a dedicated instruction on AVX-512 that shuffles the masks. +// The only way to shuffle bits is to sign-extend the mask vector to SIMD +// vector, shuffle and then truncate it back. +static SDValue lower1BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, + MVT VT, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDLoc DL(Op); + ShuffleVectorSDNode *SVOp = cast(Op); + ArrayRef Mask = SVOp->getMask(); + assert(Subtarget->hasAVX512() && + "Cannot lower 512-bit vectors w/o basic ISA!"); + MVT ExtVT; + switch (VT.SimpleTy) { + default: + llvm_unreachable("Expected a vector of i1 elements"); + case MVT::v2i1: + ExtVT = MVT::v2i64; + break; + case MVT::v4i1: + ExtVT = MVT::v4i32; + break; + case MVT::v8i1: + ExtVT = MVT::v8i64; // Take 512-bit type, more shuffles on KNL + break; + case MVT::v16i1: + ExtVT = MVT::v16i32; + break; + case MVT::v32i1: + ExtVT = MVT::v32i16; + break; + case MVT::v64i1: + ExtVT = MVT::v64i8; + break; + } + + if (ISD::isBuildVectorAllZeros(V1.getNode())) + V1 = getZeroVector(ExtVT, Subtarget, DAG, DL); + else if (ISD::isBuildVectorAllOnes(V1.getNode())) + V1 = getOnesVector(ExtVT, Subtarget, DAG, DL); + else + V1 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V1); + + if (V2.isUndef()) + V2 = DAG.getUNDEF(ExtVT); + else if (ISD::isBuildVectorAllZeros(V2.getNode())) + V2 = getZeroVector(ExtVT, Subtarget, DAG, DL); + else if (ISD::isBuildVectorAllOnes(V2.getNode())) + V2 = getOnesVector(ExtVT, Subtarget, DAG, DL); + else + V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2); + return DAG.getNode(ISD::TRUNCATE, DL, VT, + DAG.getVectorShuffle(ExtVT, DL, V1, V2, Mask)); +} /// \brief Top-level lowering for x86 vector shuffles. /// /// This handles decomposition, canonicalization, and lowering of all x86 @@ -10533,8 +11352,10 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, MVT VT = Op.getSimpleValueType(); int NumElements = VT.getVectorNumElements(); SDLoc dl(Op); + bool Is1BitVector = (VT.getVectorElementType() == MVT::i1); - assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles"); + assert((VT.getSizeInBits() != 64 || Is1BitVector) && + "Can't lower MMX shuffles"); bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; @@ -10572,7 +11393,7 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, // elements wider than 64 bits, but it might be interesting to form i128 // integers to handle flipping the low and high halves of AVX 256-bit vectors. SmallVector WidenedMask; - if (VT.getScalarSizeInBits() < 64 && + if (VT.getScalarSizeInBits() < 64 && !Is1BitVector && canWidenShuffleElements(Mask, WidenedMask)) { MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2) @@ -10640,17 +11461,17 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, } // For each vector width, delegate to a specialized lowering routine. - if (VT.getSizeInBits() == 128) + if (VT.is128BitVector()) return lower128BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG); - if (VT.getSizeInBits() == 256) + if (VT.is256BitVector()) return lower256BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG); - // Force AVX-512 vectors to be scalarized for now. - // FIXME: Implement AVX-512 support! - if (VT.getSizeInBits() == 512) + if (VT.is512BitVector()) return lower512BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG); + if (Is1BitVector) + return lower1BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG); llvm_unreachable("Unimplemented!"); } @@ -10661,11 +11482,16 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector, unsigned &MaskValue) { MaskValue = 0; unsigned NumElems = BuildVector->getNumOperands(); + // There are 2 lanes if (NumElems > 8), and 1 lane otherwise. + // We don't handle the >2 lanes case right now. unsigned NumLanes = (NumElems - 1) / 8 + 1; + if (NumLanes > 2) + return false; + unsigned NumElemsInLane = NumElems / NumLanes; - // Blend for v16i16 should be symetric for the both lanes. + // Blend for v16i16 should be symmetric for the both lanes. for (unsigned i = 0; i < NumElemsInLane; ++i) { SDValue EltCond = BuildVector->getOperand(i); SDValue SndLaneEltCond = @@ -10673,20 +11499,25 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector, int Lane1Cond = -1, Lane2Cond = -1; if (isa(EltCond)) - Lane1Cond = !isZero(EltCond); + Lane1Cond = !isNullConstant(EltCond); if (isa(SndLaneEltCond)) - Lane2Cond = !isZero(SndLaneEltCond); + Lane2Cond = !isNullConstant(SndLaneEltCond); + unsigned LaneMask = 0; if (Lane1Cond == Lane2Cond || Lane2Cond < 0) // Lane1Cond != 0, means we want the first argument. // Lane1Cond == 0, means we want the second argument. // The encoding of this argument is 0 for the first argument, 1 // for the second. Therefore, invert the condition. - MaskValue |= !Lane1Cond << i; + LaneMask = !Lane1Cond << i; else if (Lane1Cond < 0) - MaskValue |= !Lane2Cond << i; + LaneMask = !Lane2Cond << i; else return false; + + MaskValue |= LaneMask; + if (NumLanes == 2) + MaskValue |= LaneMask << NumElemsInLane; } return true; } @@ -10711,7 +11542,8 @@ static SDValue lowerVSELECTtoVectorShuffle(SDValue Op, for (int i = 0, Size = VT.getVectorNumElements(); i < Size; ++i) { SDValue CondElt = CondBV->getOperand(i); Mask.push_back( - isa(CondElt) ? i + (isZero(CondElt) ? Size : 0) : -1); + isa(CondElt) ? i + (isNullConstant(CondElt) ? Size : 0) + : -1); } return DAG.getVectorShuffle(VT, dl, LHS, RHS, Mask); } @@ -10776,9 +11608,8 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { } if (VT.getSizeInBits() == 16) { - unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); // If Idx is 0, it's cheaper to do a move instead of a pextrw. - if (Idx == 0) + if (isNullConstant(Op.getOperand(1))) return DAG.getNode( ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, @@ -10801,8 +11632,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { return SDValue(); SDNode *User = *Op.getNode()->use_begin(); if ((User->getOpcode() != ISD::STORE || - (isa(Op.getOperand(1)) && - cast(Op.getOperand(1))->isNullValue())) && + isNullConstant(Op.getOperand(1))) && (User->getOpcode() != ISD::BITCAST || User->getValueType(0) != MVT::i32)) return SDValue(); @@ -10900,10 +11730,11 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, MVT EltVT = VecVT.getVectorElementType(); unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits(); + assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); - //if (IdxVal >= NumElems/2) - // IdxVal -= NumElems/2; - IdxVal -= (IdxVal/ElemsPerChunk)*ElemsPerChunk; + // Find IdxVal modulo ElemsPerChunk. Since ElemsPerChunk is a power of 2 + // this can be done with a mask. + IdxVal &= ElemsPerChunk - 1; return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, DAG.getConstant(IdxVal, dl, MVT::i32)); } @@ -10918,8 +11749,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // TODO: handle v16i8. if (VT.getSizeInBits() == 16) { SDValue Vec = Op.getOperand(0); - unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); - if (Idx == 0) + if (isNullConstant(Op.getOperand(1))) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Vec), @@ -10951,8 +11781,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught // to match extract_elt for f64. - unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); - if (Idx == 0) + if (isNullConstant(Op.getOperand(1))) return Op; // UNPCKHPD the element to the lowest double word, then movsd. @@ -11039,7 +11868,9 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, // Insert the element into the desired chunk. unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits(); - unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128; + assert(isPowerOf2_32(NumEltsIn128)); + // Since NumEltsIn128 is a power of 2 we can use mask instead of modulo. + unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1); V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1, DAG.getConstant(IdxIn128, dl, MVT::i32)); @@ -11078,8 +11909,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, // Bits [3:0] of the constant are the zero mask. The DAG Combiner may // combine either bitwise AND or insert of float 0.0 to set these bits. - const Function *F = DAG.getMachineFunction().getFunction(); - bool MinSize = F->hasFnAttribute(Attribute::MinSize); + bool MinSize = DAG.getMachineFunction().getFunction()->optForMinSize(); if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) { // If this is an insertion of 32-bits into the low 32-bits of // a vector, we prefer to generate a blend with immediate rather @@ -11199,14 +12029,25 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, // --> load32 addr if ((IdxVal == OpVT.getVectorNumElements() / 2) && Vec.getOpcode() == ISD::INSERT_SUBVECTOR && - OpVT.is256BitVector() && SubVecVT.is128BitVector() && - !Subtarget->isUnalignedMem32Slow()) { - SDValue SubVec2 = Vec.getOperand(1); - if (auto *Idx2 = dyn_cast(Vec.getOperand(2))) { - if (Idx2->getZExtValue() == 0) { - SDValue Ops[] = { SubVec2, SubVec }; - if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false)) - return Ld; + OpVT.is256BitVector() && SubVecVT.is128BitVector()) { + auto *Idx2 = dyn_cast(Vec.getOperand(2)); + if (Idx2 && Idx2->getZExtValue() == 0) { + SDValue SubVec2 = Vec.getOperand(1); + // If needed, look through a bitcast to get to the load. + if (SubVec2.getNode() && SubVec2.getOpcode() == ISD::BITCAST) + SubVec2 = SubVec2.getOperand(0); + + if (auto *FirstLd = dyn_cast(SubVec2)) { + bool Fast; + unsigned Alignment = FirstLd->getAlignment(); + unsigned AS = FirstLd->getAddressSpace(); + const X86TargetLowering *TLI = Subtarget->getTargetLowering(); + if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), + OpVT, AS, Alignment, &Fast) && Fast) { + SDValue Ops[] = { SubVec2, SubVec }; + if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false)) + return Ld; + } } } } @@ -11218,37 +12059,9 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, if (OpVT.is512BitVector() && SubVecVT.is256BitVector()) return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl); - if (OpVT.getVectorElementType() == MVT::i1) { - if (IdxVal == 0 && Vec.getOpcode() == ISD::UNDEF) // the operation is legal - return Op; - SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); - SDValue Undef = DAG.getUNDEF(OpVT); - unsigned NumElems = OpVT.getVectorNumElements(); - SDValue ShiftBits = DAG.getConstant(NumElems/2, dl, MVT::i8); + if (OpVT.getVectorElementType() == MVT::i1) + return Insert1BitVector(Op, DAG); - if (IdxVal == OpVT.getVectorNumElements() / 2) { - // Zero upper bits of the Vec - Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits); - Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits); - - SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef, - SubVec, ZeroIdx); - Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits); - return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2); - } - if (IdxVal == 0) { - SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef, - SubVec, ZeroIdx); - // Zero upper bits of the Vec2 - Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits); - Vec2 = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec2, ShiftBits); - // Zero lower bits of the Vec - Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits); - Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits); - // Merge them together - return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2); - } - } return SDValue(); } @@ -11363,7 +12176,8 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { // load. if (isGlobalStubReference(OpFlag)) Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(), false, false, false, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + false, false, false, 0); return Result; } @@ -11430,7 +12244,8 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl, // load. if (isGlobalStubReference(OpFlags)) Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(), false, false, false, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + false, false, false, 0); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. @@ -11587,7 +12402,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, } Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, - MachinePointerInfo::getGOT(), false, false, false, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + false, false, false, 0); } // The address of the thread local variable is the add of the thread @@ -11599,10 +12415,18 @@ SDValue X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *GA = cast(Op); + + // Cygwin uses emutls. + // FIXME: It may be EmulatedTLS-generic also for X86-Android. + if (Subtarget->isTargetWindowsCygwin()) + return LowerToTLSEmulatedModel(GA, DAG); + const GlobalValue *GV = GA->getGlobal(); auto PtrVT = getPointerTy(DAG.getDataLayout()); if (Subtarget->isTargetELF()) { + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); TLSModel::Model model = DAG.getTarget().getTLSModel(GV); switch (model) { case TLSModel::GeneralDynamic: @@ -11830,10 +12654,10 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, auto PtrVT = getPointerTy(MF.getDataLayout()); int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); - SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), - StackSlot, - MachinePointerInfo::getFixedStack(SSFI), - false, false, 0); + SDValue Chain = DAG.getStore( + DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), false, + false, 0); return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); } @@ -11855,10 +12679,9 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, MachineMemOperand *MMO; if (FI) { int SSFI = FI->getIndex(); - MMO = - DAG.getMachineFunction() - .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), - MachineMemOperand::MOLoad, ByteSize, ByteSize); + MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), + MachineMemOperand::MOLoad, ByteSize, ByteSize); } else { MMO = cast(StackSlot)->getMemOperand(); StackSlot = StackSlot.getOperand(1); @@ -11884,16 +12707,16 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue Ops[] = { Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag }; - MachineMemOperand *MMO = - DAG.getMachineFunction() - .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), - MachineMemOperand::MOStore, SSFISize, SSFISize); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), + MachineMemOperand::MOStore, SSFISize, SSFISize); Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys, Ops, Op.getValueType(), MMO); - Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot, - MachinePointerInfo::getFixedStack(SSFI), - false, false, false, 0); + Result = DAG.getLoad( + Op.getValueType(), DL, Chain, StackSlot, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), + false, false, false, 0); } return Result; @@ -11937,16 +12760,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, // Load the 64-bit value into an XMM register. SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(0)); - SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, - MachinePointerInfo::getConstantPool(), - false, false, false, 16); + SDValue CLod0 = + DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, 16); SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0); - SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, - MachinePointerInfo::getConstantPool(), - false, false, false, 16); + SDValue CLod1 = + DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, 16); SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1); + // TODO: Are there any fast-math-flags to propagate here? SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); SDValue Result; @@ -11996,10 +12822,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl)); // Subtract the bias. + // TODO: Are there any fast-math-flags to propagate here? SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); // Handle final rounding. - EVT DestVT = Op.getValueType(); + MVT DestVT = Op.getSimpleValueType(); if (DestVT.bitsLT(MVT::f64)) return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, @@ -12025,14 +12852,23 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f); // return (float4) lo + fhi; + // We shouldn't use it when unsafe-fp-math is enabled though: we might later + // reassociate the two FADDs, and if we do that, the algorithm fails + // spectacularly (PR24512). + // FIXME: If we ever have some kind of Machine FMF, this should be marked + // as non-fast and always be enabled. Why isn't SDAG FMF enough? Because + // there's also the MachineCombiner reassociations happening on Machine IR. + if (DAG.getTarget().Options.UnsafeFPMath) + return SDValue(); + SDLoc DL(Op); SDValue V = Op->getOperand(0); - EVT VecIntVT = V.getValueType(); + MVT VecIntVT = V.getSimpleValueType(); bool Is128 = VecIntVT == MVT::v4i32; - EVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32; + MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32; // If we convert to something else than the supported type, e.g., to v4f64, // abort early. - if (VecFloatVT != Op->getValueType(0)) + if (VecFloatVT != Op->getSimpleValueType(0)) return SDValue(); unsigned NumElts = VecIntVT.getVectorNumElements(); @@ -12070,7 +12906,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, SDValue Low, High; if (Subtarget.hasSSE41()) { - EVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16; + MVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16; // uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa); SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow); SDValue VecBitcast = DAG.getBitcast(VecI16VT, V); @@ -12108,6 +12944,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f); SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High); + // TODO: Are there any fast-math-flags to propagate here? SDValue FHigh = DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd); // return (float4) lo + fhi; @@ -12137,11 +12974,10 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, return lowerUINT_TO_FP_vXi32(Op, DAG, *Subtarget); case MVT::v16i8: case MVT::v16i16: - if (Subtarget->hasAVX512()) - return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), - DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0)); + assert(Subtarget->hasAVX512()); + return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), + DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0)); } - llvm_unreachable(nullptr); } SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, @@ -12150,7 +12986,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDLoc dl(Op); auto PtrVT = getPointerTy(DAG.getDataLayout()); - if (Op.getValueType().isVector()) + if (Op.getSimpleValueType().isVector()) return lowerUINT_TO_FP_vec(Op, DAG); // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't @@ -12161,6 +12997,14 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, MVT SrcVT = N0.getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); + + if (Subtarget->hasAVX512() && isScalarFPTypeInSSEReg(DstVT) && + (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget->is64Bit()))) { + // Conversions from unsigned i32 to f32/f64 are legal, + // using VCVTUSI2SS/SD. Same for i64 in 64-bit mode. + return Op; + } + if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64) return LowerUINT_TO_FP_i64(Op, DAG); if (SrcVT == MVT::i32 && X86ScalarSSEf64) @@ -12193,10 +13037,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // we must be careful to do the computation in x87 extended precision, not // in SSE. (The generic code can't know it's OK to do this, or how to.) int SSFI = cast(StackSlot)->getIndex(); - MachineMemOperand *MMO = - DAG.getMachineFunction() - .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), - MachineMemOperand::MOLoad, 8, 8); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), + MachineMemOperand::MOLoad, 8, 8); SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) }; @@ -12223,24 +13066,52 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // Load the value out, extending it from f32 to f80. // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), - FudgePtr, MachinePointerInfo::getConstantPool(), - MVT::f32, false, false, false, 4); + SDValue Fudge = DAG.getExtLoad( + ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), FudgePtr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, + false, false, false, 4); // Extend everything to 80 bits to force it to be done on x87. + // TODO: Are there any fast-math-flags to propagate here? SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge); return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0, dl)); } +// If the given FP_TO_SINT (IsSigned) or FP_TO_UINT (!IsSigned) operation +// is legal, or has an fp128 or f16 source (which needs to be promoted to f32), +// just return an pair. +// Otherwise it is assumed to be a conversion from one of f32, f64 or f80 +// to i16, i32 or i64, and we lower it to a legal sequence. +// If lowered to the final integer result we return a pair. +// Otherwise we lower it to a sequence ending with a FIST, return a +// pair, and the caller is responsible for loading +// the final integer result from StackSlot. std::pair -X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, - bool IsSigned, bool IsReplace) const { +X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, + bool IsSigned, bool IsReplace) const { SDLoc DL(Op); EVT DstTy = Op.getValueType(); + EVT TheVT = Op.getOperand(0).getValueType(); auto PtrVT = getPointerTy(DAG.getDataLayout()); - if (!IsSigned && !isIntegerTypeFTOL(DstTy)) { + if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) { + // f16 must be promoted before using the lowering in this routine. + // fp128 does not use this lowering. + return std::make_pair(SDValue(), SDValue()); + } + + // If using FIST to compute an unsigned i64, we'll need some fixup + // to handle values above the maximum signed i64. A FIST is always + // used for the 32-bit subtarget, but also for f80 on a 64-bit target. + bool UnsignedFixup = !IsSigned && + DstTy == MVT::i64 && + (!Subtarget->is64Bit() || + !isScalarFPTypeInSSEReg(TheVT)); + + if (!IsSigned && DstTy != MVT::i64 && !Subtarget->hasAVX512()) { + // Replace the fp-to-uint32 operation with an fp-to-sint64 FIST. + // The low 32 bits of the fist result will have the correct uint32 result. assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT"); DstTy = MVT::i64; } @@ -12258,42 +13129,87 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) return std::make_pair(SDValue(), SDValue()); - // We lower FP->int64 either into FISTP64 followed by a load from a temporary - // stack slot, or into the FTOL runtime function. + // We lower FP->int64 into FISTP64 followed by a load from a temporary + // stack slot. MachineFunction &MF = DAG.getMachineFunction(); unsigned MemSize = DstTy.getSizeInBits()/8; int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); unsigned Opc; - if (!IsSigned && isIntegerTypeFTOL(DstTy)) - Opc = X86ISD::WIN_FTOL; - else - switch (DstTy.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Invalid FP_TO_SINT to lower!"); - case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; - case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; - case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; - } + switch (DstTy.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid FP_TO_SINT to lower!"); + case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; + case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; + case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; + } SDValue Chain = DAG.getEntryNode(); SDValue Value = Op.getOperand(0); - EVT TheVT = Op.getOperand(0).getValueType(); + SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment. + + if (UnsignedFixup) { + // + // Conversion to unsigned i64 is implemented with a select, + // depending on whether the source value fits in the range + // of a signed i64. Let Thresh be the FP equivalent of + // 0x8000000000000000ULL. + // + // Adjust i32 = (Value < Thresh) ? 0 : 0x80000000; + // FistSrc = (Value < Thresh) ? Value : (Value - Thresh); + // Fist-to-mem64 FistSrc + // Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent + // to XOR'ing the high 32 bits with Adjust. + // + // Being a power of 2, Thresh is exactly representable in all FP formats. + // For X87 we'd like to use the smallest FP type for this constant, but + // for DAG type consistency we have to match the FP operand type. + + APFloat Thresh(APFloat::IEEEsingle, APInt(32, 0x5f000000)); + LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK; + bool LosesInfo = false; + if (TheVT == MVT::f64) + // The rounding mode is irrelevant as the conversion should be exact. + Status = Thresh.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &LosesInfo); + else if (TheVT == MVT::f80) + Status = Thresh.convert(APFloat::x87DoubleExtended, + APFloat::rmNearestTiesToEven, &LosesInfo); + + assert(Status == APFloat::opOK && !LosesInfo && + "FP conversion should have been exact"); + + SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT); + + SDValue Cmp = DAG.getSetCC(DL, + getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), TheVT), + Value, ThreshVal, ISD::SETLT); + Adjust = DAG.getSelect(DL, MVT::i32, Cmp, + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(0x80000000, DL, MVT::i32)); + SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal); + Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), TheVT), + Value, ThreshVal, ISD::SETLT); + Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub); + } + // FIXME This causes a redundant load/store if the SSE-class value is already // in memory, such as if it is on the callstack. if (isScalarFPTypeInSSEReg(TheVT)) { assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); Chain = DAG.getStore(Chain, DL, Value, StackSlot, - MachinePointerInfo::getFixedStack(SSFI), - false, false, 0); + MachinePointerInfo::getFixedStack(MF, SSFI), false, + false, 0); SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(TheVT) }; MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), - MachineMemOperand::MOLoad, MemSize, MemSize); + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI), + MachineMemOperand::MOLoad, MemSize, MemSize); Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO); Chain = Value.getValue(1); SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); @@ -12301,28 +13217,52 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, } MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), - MachineMemOperand::MOStore, MemSize, MemSize); + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI), + MachineMemOperand::MOStore, MemSize, MemSize); - if (Opc != X86ISD::WIN_FTOL) { + if (UnsignedFixup) { + + // Insert the FIST, load its result as two i32's, + // and XOR the high i32 with Adjust. + + SDValue FistOps[] = { Chain, Value, StackSlot }; + SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), + FistOps, DstTy, MMO); + + SDValue Low32 = DAG.getLoad(MVT::i32, DL, FIST, StackSlot, + MachinePointerInfo(), + false, false, false, 0); + SDValue HighAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackSlot, + DAG.getConstant(4, DL, PtrVT)); + + SDValue High32 = DAG.getLoad(MVT::i32, DL, FIST, HighAddr, + MachinePointerInfo(), + false, false, false, 0); + High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust); + + if (Subtarget->is64Bit()) { + // Join High32 and Low32 into a 64-bit result. + // (High32 << 32) | Low32 + Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32); + High32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, High32); + High32 = DAG.getNode(ISD::SHL, DL, MVT::i64, High32, + DAG.getConstant(32, DL, MVT::i8)); + SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i64, High32, Low32); + return std::make_pair(Result, SDValue()); + } + + SDValue ResultOps[] = { Low32, High32 }; + + SDValue pair = IsReplace + ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResultOps) + : DAG.getMergeValues(ResultOps, DL); + return std::make_pair(pair, SDValue()); + } else { // Build the FP_TO_INT*_IN_MEM SDValue Ops[] = { Chain, Value, StackSlot }; SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), Ops, DstTy, MMO); return std::make_pair(FIST, StackSlot); - } else { - SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL, - DAG.getVTList(MVT::Other, MVT::Glue), - Chain, Value); - SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX, - MVT::i32, ftol.getValue(1)); - SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX, - MVT::i32, eax.getValue(2)); - SDValue Ops[] = { eax, edx }; - SDValue pair = IsReplace - ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops) - : DAG.getMergeValues(Ops, DL); - return std::make_pair(pair, SDValue()); } } @@ -12333,7 +13273,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, MVT InVT = In.getSimpleValueType(); SDLoc dl(Op); - if (VT.is512BitVector() || InVT.getScalarType() == MVT::i1) + if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1) return DAG.getNode(ISD::ZERO_EXTEND, dl, VT, In); // Optimize vectors in AVX mode: @@ -12426,6 +13366,62 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget, return SDValue(); } +static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + SDValue In = Op.getOperand(0); + MVT InVT = In.getSimpleValueType(); + + assert(VT.getVectorElementType() == MVT::i1 && "Unexected vector type."); + + // Shift LSB to MSB and use VPMOVB2M - SKX. + unsigned ShiftInx = InVT.getScalarSizeInBits() - 1; + if ((InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 && + Subtarget->hasBWI()) || // legal, will go to VPMOVB2M, VPMOVW2M + ((InVT.is256BitVector() || InVT.is128BitVector()) && + InVT.getScalarSizeInBits() <= 16 && Subtarget->hasBWI() && + Subtarget->hasVLX())) { // legal, will go to VPMOVB2M, VPMOVW2M + // Shift packed bytes not supported natively, bitcast to dword + MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16); + SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, ExtVT, + DAG.getBitcast(ExtVT, In), + DAG.getConstant(ShiftInx, DL, ExtVT)); + ShiftNode = DAG.getBitcast(InVT, ShiftNode); + return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode); + } + if ((InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 && + Subtarget->hasDQI()) || // legal, will go to VPMOVD2M, VPMOVQ2M + ((InVT.is256BitVector() || InVT.is128BitVector()) && + InVT.getScalarSizeInBits() >= 32 && Subtarget->hasDQI() && + Subtarget->hasVLX())) { // legal, will go to VPMOVD2M, VPMOVQ2M + + SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In, + DAG.getConstant(ShiftInx, DL, InVT)); + return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode); + } + + // Shift LSB to MSB, extend if necessary and use TESTM. + unsigned NumElts = InVT.getVectorNumElements(); + if (InVT.getSizeInBits() < 512 && + (InVT.getScalarType() == MVT::i8 || InVT.getScalarType() == MVT::i16 || + !Subtarget->hasVLX())) { + assert((NumElts == 8 || NumElts == 16) && "Unexected vector type."); + + // TESTD/Q should be used (if BW supported we use CVT2MASK above), + // so vector should be extended to packed dword/qword. + MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts); + In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In); + InVT = ExtVT; + ShiftInx = InVT.getScalarSizeInBits() - 1; + } + + SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In, + DAG.getConstant(ShiftInx, DL, InVT)); + return DAG.getNode(X86ISD::TESTM, DL, VT, ShiftNode, ShiftNode); +} + SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); @@ -12443,42 +13439,17 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && "Invalid TRUNCATE operation"); - // move vector to mask - truncate solution for SKX - if (VT.getVectorElementType() == MVT::i1) { - if (InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 && - Subtarget->hasBWI()) - return Op; // legal, will go to VPMOVB2M, VPMOVW2M - if ((InVT.is256BitVector() || InVT.is128BitVector()) - && InVT.getScalarSizeInBits() <= 16 && - Subtarget->hasBWI() && Subtarget->hasVLX()) - return Op; // legal, will go to VPMOVB2M, VPMOVW2M - if (InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 && - Subtarget->hasDQI()) - return Op; // legal, will go to VPMOVD2M, VPMOVQ2M - if ((InVT.is256BitVector() || InVT.is128BitVector()) - && InVT.getScalarSizeInBits() >= 32 && - Subtarget->hasDQI() && Subtarget->hasVLX()) - return Op; // legal, will go to VPMOVB2M, VPMOVQ2M + if (VT.getVectorElementType() == MVT::i1) + return LowerTruncateVecI1(Op, DAG, Subtarget); + + // vpmovqb/w/d, vpmovdb/w, vpmovwb + if (Subtarget->hasAVX512()) { + // word to byte only under BWI + if (InVT == MVT::v16i16 && !Subtarget->hasBWI()) // v16i16 -> v16i8 + return DAG.getNode(X86ISD::VTRUNC, DL, VT, + DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In)); + return DAG.getNode(X86ISD::VTRUNC, DL, VT, In); } - if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) { - if (VT.getVectorElementType().getSizeInBits() >=8) - return DAG.getNode(X86ISD::VTRUNC, DL, VT, In); - - assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type"); - unsigned NumElts = InVT.getVectorNumElements(); - assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type"); - if (InVT.getSizeInBits() < 512) { - MVT ExtVT = (NumElts == 16)? MVT::v16i32 : MVT::v8i64; - In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In); - InVT = ExtVT; - } - - SDValue OneV = - DAG.getConstant(APInt::getSignBit(InVT.getScalarSizeInBits()), DL, InVT); - SDValue And = DAG.getNode(ISD::AND, DL, InVT, OneV, In); - return DAG.getNode(X86ISD::TESTM, DL, VT, And, And); - } - if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) { // On AVX2, v4i64 -> v4i32 becomes VPERMD. if (Subtarget->hasInt256()) { @@ -12583,7 +13554,8 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, /*IsSigned=*/ true, /*IsReplace=*/ false); SDValue FIST = Vals.first, StackSlot = Vals.second; // If FP_TO_INTHelper failed, the node is actually supposed to be Legal. - if (!FIST.getNode()) return Op; + if (!FIST.getNode()) + return Op; if (StackSlot.getNode()) // Load the result. @@ -12600,7 +13572,9 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, std::pair Vals = FP_TO_INTHelper(Op, DAG, /*IsSigned=*/ false, /*IsReplace=*/ false); SDValue FIST = Vals.first, StackSlot = Vals.second; - assert(FIST.getNode() && "Unexpected failure"); + // If FP_TO_INTHelper failed, the node is actually supposed to be Legal. + if (!FIST.getNode()) + return Op; if (StackSlot.getNode()) // Load the result. @@ -12643,6 +13617,8 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); + bool IsF128 = (VT == MVT::f128); + // FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to // decide if we should generate a 16-byte constant mask when we only need 4 or // 8 bytes for the scalar case. @@ -12650,11 +13626,16 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { MVT LogicVT; MVT EltVT; unsigned NumElts; - + if (VT.isVector()) { LogicVT = VT; EltVT = VT.getVectorElementType(); NumElts = VT.getVectorNumElements(); + } else if (IsF128) { + // SSE instructions are used for optimized f128 logical operations. + LogicVT = MVT::f128; + EltVT = VT; + NumElts = 1; } else { // There are no scalar bitwise logical SSE/AVX instructions, so we // generate a 16-byte vector constant and logic op even for the scalar case. @@ -12675,9 +13656,10 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast(CPIdx)->getAlignment(); - SDValue Mask = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); + SDValue Mask = + DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); SDValue Op0 = Op.getOperand(0); bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS); @@ -12685,7 +13667,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR; SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0; - if (VT.isVector()) + if (VT.isVector() || IsF128) return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask); // For the scalar case extend to a 128-bit vector, perform the logic op, @@ -12704,6 +13686,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); MVT SrcVT = Op1.getSimpleValueType(); + bool IsF128 = (VT == MVT::f128); // If second operand is smaller, extend it first. if (SrcVT.bitsLT(VT)) { @@ -12718,13 +13701,16 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { // At this point the operands and the result should have the same // type, and that won't be f80 since that is not custom lowered. + assert((VT == MVT::f64 || VT == MVT::f32 || IsF128) && + "Unexpected type in LowerFCOPYSIGN"); const fltSemantics &Sem = - VT == MVT::f64 ? APFloat::IEEEdouble : APFloat::IEEEsingle; + VT == MVT::f64 ? APFloat::IEEEdouble : + (IsF128 ? APFloat::IEEEquad : APFloat::IEEEsingle); const unsigned SizeInBits = VT.getSizeInBits(); SmallVector CV( - VT == MVT::f64 ? 2 : 4, + VT == MVT::f64 ? 2 : (IsF128 ? 1 : 4), ConstantFP::get(*Context, APFloat(Sem, APInt(SizeInBits, 0)))); // First, clear all bits but the sign bit from the second operand (sign). @@ -12737,11 +13723,13 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { // Perform all logic operations as 16-byte vectors because there are no // scalar FP logic instructions in SSE. This allows load folding of the // constants into the logic instructions. - MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32; - SDValue Mask1 = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, 16); - Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1); + MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : (IsF128 ? MVT::f128 : MVT::v4f32); + SDValue Mask1 = + DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, 16); + if (!IsF128) + Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1); SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op1, Mask1); // Next, clear the sign bit from the first operand (magnitude). @@ -12750,8 +13738,9 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { APFloat APF = Op0CN->getValueAPF(); // If the magnitude is a positive zero, the sign bit alone is enough. if (APF.isPosZero()) - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit, - DAG.getIntPtrConstant(0, dl)); + return IsF128 ? SignBit : + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit, + DAG.getIntPtrConstant(0, dl)); APF.clearSign(); CV[0] = ConstantFP::get(*Context, APF); } else { @@ -12761,18 +13750,21 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { } C = ConstantVector::get(CV); CPIdx = DAG.getConstantPool(C, PtrVT, 16); - SDValue Val = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, 16); + SDValue Val = + DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, 16); // If the magnitude operand wasn't a constant, we need to AND out the sign. if (!isa(Op0)) { - Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0); + if (!IsF128) + Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0); Val = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op0, Val); } // OR the magnitude value with the sign bit. Val = DAG.getNode(X86ISD::FOR, dl, LogicVT, Val, SignBit); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val, - DAG.getIntPtrConstant(0, dl)); + return IsF128 ? Val : + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val, + DAG.getIntPtrConstant(0, dl)); } static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { @@ -12859,7 +13851,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget, return SDValue(); } - EVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; + MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; // Cast all vectors into TestVT for PTEST. for (unsigned i = 0, e = VecIns.size(); i < e; ++i) @@ -12999,14 +13991,14 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl, if (ConstantSDNode *C = dyn_cast(ArithOp.getNode()->getOperand(1))) { // An add of one will be selected as an INC. - if (C->getAPIntValue() == 1 && !Subtarget->slowIncDec()) { + if (C->isOne() && !Subtarget->slowIncDec()) { Opcode = X86ISD::INC; NumOperands = 1; break; } // An add of negative one (subtract of one) will be selected as a DEC. - if (C->getAPIntValue().isAllOnesValue() && !Subtarget->slowIncDec()) { + if (C->isAllOnesValue() && !Subtarget->slowIncDec()) { Opcode = X86ISD::DEC; NumOperands = 1; break; @@ -13135,13 +14127,11 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl, /// equivalent. SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, SDLoc dl, SelectionDAG &DAG) const { - if (ConstantSDNode *C = dyn_cast(Op1)) { - if (C->getAPIntValue() == 0) - return EmitTest(Op0, X86CC, dl, DAG); + if (isNullConstant(Op1)) + return EmitTest(Op0, X86CC, dl, DAG); - if (Op0.getValueType() == MVT::i1) - llvm_unreachable("Unexpected comparison operation for MVT::i1 operands"); - } + assert(!(isa(Op1) && Op0.getValueType() == MVT::i1) && + "Unexpected comparison operation for MVT::i1 operands"); if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 || Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) { @@ -13150,8 +14140,7 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, // if we're optimizing for size, however, as that'll allow better folding // of memory operations. if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 && - !DAG.getMachineFunction().getFunction()->hasFnAttribute( - Attribute::MinSize) && + !DAG.getMachineFunction().getFunction()->optForMinSize() && !Subtarget->isAtom()) { unsigned ExtendOp = isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; @@ -13188,6 +14177,9 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp, SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW, DAG.getConstant(8, dl, MVT::i8)); SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl); + + // Some 64-bit targets lack SAHF support, but they do support FCOMI. + assert(Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?"); return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl); } @@ -13261,13 +14253,8 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, /// This is because we still need one division to calculate the reciprocal and /// then we need two multiplies by that reciprocal as replacements for the /// original divisions. -bool X86TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { - return NumUsers > 1; -} - -static bool isAllOnes(SDValue V) { - ConstantSDNode *C = dyn_cast(V); - return C && C->isAllOnesValue(); +unsigned X86TargetLowering::combineRepeatedFPDivisors() const { + return 2; } /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node @@ -13285,8 +14272,7 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, if (Op1.getOpcode() == ISD::SHL) std::swap(Op0, Op1); if (Op0.getOpcode() == ISD::SHL) { - if (ConstantSDNode *And00C = dyn_cast(Op0.getOperand(0))) - if (And00C->getZExtValue() == 1) { + if (isOneConstant(Op0.getOperand(0))) { // If we looked past a truncate, check that it's only truncating away // known zeros. unsigned BitWidth = Op0.getValueSizeInBits(); @@ -13423,7 +14409,7 @@ static SDValue LowerBoolVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); - assert(Op0.getValueType().getVectorElementType() == MVT::i1 && + assert(Op0.getSimpleValueType().getVectorElementType() == MVT::i1 && "Unexpected type for boolean compare operation"); ISD::CondCode SetCCOpcode = cast(CC)->get(); SDValue NotOp0 = DAG.getNode(ISD::XOR, dl, VT, Op0, @@ -13467,8 +14453,8 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG, MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); - assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 8 && - Op.getValueType().getScalarType() == MVT::i1 && + assert(Op0.getSimpleValueType().getVectorElementType().getSizeInBits() >= 8 && + Op.getSimpleValueType().getVectorElementType() == MVT::i1 && "Cannot set masked compare for this operation"); ISD::CondCode SetCCOpcode = cast(CC)->get(); @@ -13515,7 +14501,7 @@ static SDValue ChangeVSETULTtoVSETULE(SDLoc dl, SDValue Op1, SelectionDAG &DAG) for (unsigned i = 0; i < n; ++i) { ConstantSDNode *Elt = dyn_cast(BV->getOperand(i)); - if (!Elt || Elt->isOpaque() || Elt->getValueType(0) != EVT) + if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EVT) return SDValue(); // Avoid underflow. @@ -13606,13 +14592,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, if (VT.is256BitVector() && !Subtarget->hasInt256()) return Lower256IntVSETCC(Op, DAG); - EVT OpVT = Op1.getValueType(); + MVT OpVT = Op1.getSimpleValueType(); if (OpVT.getVectorElementType() == MVT::i1) return LowerBoolVSETCC_AVX512(Op, DAG); bool MaskResult = (VT.getVectorElementType() == MVT::i1); if (Subtarget->hasAVX512()) { - if (Op1.getValueType().is512BitVector() || + if (Op1.getSimpleValueType().is512BitVector() || (Subtarget->hasBWI() && Subtarget->hasVLX()) || (MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32)) return LowerIntVSETCC_AVX512(Op, DAG, Subtarget); @@ -13628,6 +14614,33 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC)); } + // Lower using XOP integer comparisons. + if ((VT == MVT::v16i8 || VT == MVT::v8i16 || + VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget->hasXOP()) { + // Translate compare code to XOP PCOM compare mode. + unsigned CmpMode = 0; + switch (SetCCOpcode) { + default: llvm_unreachable("Unexpected SETCC condition"); + case ISD::SETULT: + case ISD::SETLT: CmpMode = 0x00; break; + case ISD::SETULE: + case ISD::SETLE: CmpMode = 0x01; break; + case ISD::SETUGT: + case ISD::SETGT: CmpMode = 0x02; break; + case ISD::SETUGE: + case ISD::SETGE: CmpMode = 0x03; break; + case ISD::SETEQ: CmpMode = 0x04; break; + case ISD::SETNE: CmpMode = 0x05; break; + } + + // Are we comparing unsigned or signed integers? + unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode) + ? X86ISD::VPCOMU : X86ISD::VPCOM; + + return DAG.getNode(Opc, dl, VT, Op0, Op1, + DAG.getConstant(CmpMode, dl, MVT::i8)); + } + // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. @@ -13777,7 +14790,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. if (FlipSigns) { - EVT EltVT = VT.getVectorElementType(); + MVT EltVT = VT.getVectorElementType(); SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), dl, VT); Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB); @@ -13818,11 +14831,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // Lower ((X >>u N) & 1) != 0 to BT(X, N). // Lower ((X >>s N) & 1) != 0 to BT(X, N). if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && - Op1.getOpcode() == ISD::Constant && - cast(Op1)->isNullValue() && + isNullConstant(Op1) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); - if (NewSetCC.getNode()) { + if (SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG)) { if (VT == MVT::i1) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewSetCC); return NewSetCC; @@ -13831,17 +14842,14 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of // these. - if (Op1.getOpcode() == ISD::Constant && - (cast(Op1)->getZExtValue() == 1 || - cast(Op1)->isNullValue()) && + if ((isOneConstant(Op1) || isNullConstant(Op1)) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { // If the input is a setcc, then reuse the input setcc or use a new one with // the inverted condition. if (Op0.getOpcode() == X86ISD::SETCC) { X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); - bool Invert = (CC == ISD::SETNE) ^ - cast(Op1)->isNullValue(); + bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1); if (!Invert) return Op0; @@ -13854,8 +14862,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return SetCC; } } - if ((Op0.getValueType() == MVT::i1) && (Op1.getOpcode() == ISD::Constant) && - (cast(Op1)->getZExtValue() == 1) && + if ((Op0.getValueType() == MVT::i1) && isOneConstant(Op1) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { ISD::CondCode NewCC = ISD::getSetCCInverse(CC, true); @@ -13876,6 +14883,23 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return SetCC; } +SDValue X86TargetLowering::LowerSETCCE(SDValue Op, SelectionDAG &DAG) const { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue Carry = Op.getOperand(2); + SDValue Cond = Op.getOperand(3); + SDLoc DL(Op); + + assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only."); + X86::CondCode CC = TranslateIntegerX86CC(cast(Cond)->get()); + + assert(Carry.getOpcode() != ISD::CARRY_FALSE); + SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); + SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry); + return DAG.getNode(X86ISD::SETCC, DL, Op.getValueType(), + DAG.getConstant(CC, DL, MVT::i8), Cmp.getValue(1)); +} + // isX86LogicalCmp - Return true if opcode is a X86 logical comparison. static bool isX86LogicalCmp(SDValue Op) { unsigned Opc = Op.getNode()->getOpcode(); @@ -13918,7 +14942,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Op1 = Op.getOperand(1); SDValue Op2 = Op.getOperand(2); SDLoc DL(Op); - EVT VT = Op1.getValueType(); + MVT VT = Op1.getSimpleValueType(); SDValue CC; // Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops @@ -13927,7 +14951,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { if (Cond.getOpcode() == ISD::SETCC && ((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) || (Subtarget->hasSSE1() && VT == MVT::f32)) && - VT == Cond.getOperand(0).getValueType() && Cond->hasOneUse()) { + VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) { SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1); int SSECC = translateX86FSETCC( cast(Cond.getOperand(2))->get(), CondOp0, CondOp1); @@ -13961,12 +14985,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // Convert to vectors, do a VSELECT, and convert back to scalar. // All of the conversions should be optimized away. - EVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64; + MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64; SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1); SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2); SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp); - EVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64; + MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64; VCmp = DAG.getBitcast(VCmpVT, VCmp); SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2); @@ -13980,26 +15004,26 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } } - if (VT.isVector() && VT.getScalarType() == MVT::i1) { - SDValue Op1Scalar; - if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode())) - Op1Scalar = ConvertI1VectorToInterger(Op1, DAG); - else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0)) - Op1Scalar = Op1.getOperand(0); - SDValue Op2Scalar; - if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode())) - Op2Scalar = ConvertI1VectorToInterger(Op2, DAG); - else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0)) - Op2Scalar = Op2.getOperand(0); - if (Op1Scalar.getNode() && Op2Scalar.getNode()) { - SDValue newSelect = DAG.getNode(ISD::SELECT, DL, - Op1Scalar.getValueType(), - Cond, Op1Scalar, Op2Scalar); - if (newSelect.getValueSizeInBits() == VT.getSizeInBits()) - return DAG.getBitcast(VT, newSelect); - SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec, - DAG.getIntPtrConstant(0, DL)); + if (VT.isVector() && VT.getVectorElementType() == MVT::i1) { + SDValue Op1Scalar; + if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode())) + Op1Scalar = ConvertI1VectorToInteger(Op1, DAG); + else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0)) + Op1Scalar = Op1.getOperand(0); + SDValue Op2Scalar; + if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode())) + Op2Scalar = ConvertI1VectorToInteger(Op2, DAG); + else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0)) + Op2Scalar = Op2.getOperand(0); + if (Op1Scalar.getNode() && Op2Scalar.getNode()) { + SDValue newSelect = DAG.getNode(ISD::SELECT, DL, + Op1Scalar.getValueType(), + Cond, Op1Scalar, Op2Scalar); + if (newSelect.getValueSizeInBits() == VT.getSizeInBits()) + return DAG.getBitcast(VT, newSelect); + SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec, + DAG.getIntPtrConstant(0, DL)); } } @@ -14026,22 +15050,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y if (Cond.getOpcode() == X86ISD::SETCC && Cond.getOperand(1).getOpcode() == X86ISD::CMP && - isZero(Cond.getOperand(1).getOperand(1))) { + isNullConstant(Cond.getOperand(1).getOperand(1))) { SDValue Cmp = Cond.getOperand(1); unsigned CondCode =cast(Cond.getOperand(0))->getZExtValue(); - if ((isAllOnes(Op1) || isAllOnes(Op2)) && + if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (CondCode == X86::COND_E || CondCode == X86::COND_NE)) { - SDValue Y = isAllOnes(Op2) ? Op1 : Op2; + SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2; SDValue CmpOp0 = Cmp.getOperand(0); // Apply further optimizations for special cases // (select (x != 0), -1, 0) -> neg & sbb // (select (x == 0), 0, -1) -> neg & sbb - if (ConstantSDNode *YC = dyn_cast(Y)) - if (YC->isNullValue() && - (isAllOnes(Op1) == (CondCode == X86::COND_NE))) { + if (isNullConstant(Y) && + (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) { SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32); SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, DAG.getConstant(0, DL, @@ -14061,11 +15084,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp); - if (isAllOnes(Op1) != (CondCode == X86::COND_E)) + if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E)) Res = DAG.getNOT(DL, Res, Res.getValueType()); - ConstantSDNode *N2C = dyn_cast(Op2); - if (!N2C || !N2C->isNullValue()) + if (!isNullConstant(Op2)) Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y); return Res; } @@ -14073,11 +15095,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // Look past (and (setcc_carry (cmp ...)), 1). if (Cond.getOpcode() == ISD::AND && - Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { - ConstantSDNode *C = dyn_cast(Cond.getOperand(1)); - if (C && C->getAPIntValue() == 1) - Cond = Cond.getOperand(0); - } + Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY && + isOneConstant(Cond.getOperand(1))) + Cond = Cond.getOperand(0); // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. @@ -14136,15 +15156,14 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } if (addTest) { - // Look pass the truncate if the high bits are known zero. + // Look past the truncate if the high bits are known zero. if (isTruncWithZeroHighBitsInput(Cond, DAG)) - Cond = Cond.getOperand(0); + Cond = Cond.getOperand(0); // We know the result of AND is compared against zero. Try to match // it to BT. if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { - SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG); - if (NewSetCC.getNode()) { + if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG)) { CC = NewSetCC.getOperand(0); Cond = NewSetCC.getOperand(1); addTest = false; @@ -14166,11 +15185,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { unsigned CondCode = cast(CC)->getZExtValue(); if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && - (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) { + (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && + (isNullConstant(Op1) || isNullConstant(Op2))) { SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), DAG.getConstant(X86::COND_B, DL, MVT::i8), Cond); - if (isAllOnes(Op1) != (CondCode == X86::COND_B)) + if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B)) return DAG.getNOT(DL, Res, Res.getValueType()); return Res; } @@ -14256,8 +15276,8 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, MVT InVT = In.getSimpleValueType(); assert(VT.getSizeInBits() == InVT.getSizeInBits()); - MVT InSVT = InVT.getScalarType(); - assert(VT.getScalarType().getScalarSizeInBits() > InSVT.getScalarSizeInBits()); + MVT InSVT = InVT.getVectorElementType(); + assert(VT.getVectorElementType().getSizeInBits() > InSVT.getSizeInBits()); if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16) return SDValue(); @@ -14276,7 +15296,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, // As SRAI is only available on i16/i32 types, we expand only up to i32 // and handle i64 separately. - while (CurrVT != VT && CurrVT.getScalarType() != MVT::i32) { + while (CurrVT != VT && CurrVT.getVectorElementType() != MVT::i32) { Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr); MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2); CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2); @@ -14286,7 +15306,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SDValue SignExt = Curr; if (CurrVT != InVT) { unsigned SignExtShift = - CurrVT.getScalarSizeInBits() - InSVT.getScalarSizeInBits(); + CurrVT.getVectorElementType().getSizeInBits() - InSVT.getSizeInBits(); SignExt = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr, DAG.getConstant(SignExtShift, dl, MVT::i8)); } @@ -14346,7 +15366,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget, SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]); - MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), + MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), VT.getVectorNumElements()/2); OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo); @@ -14470,7 +15490,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, // memory. In practice, we ''widen'' MemVT. EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), - loadRegZize / MemVT.getScalarType().getSizeInBits()); + loadRegZize / MemVT.getScalarSizeInBits()); assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() && "Invalid vector type"); @@ -14518,29 +15538,12 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, return Sext; } - // Otherwise we'll shuffle the small elements in the high bits of the - // larger type and perform an arithmetic shift. If the shift is not legal - // it's better to scalarize. - assert(TLI.isOperationLegalOrCustom(ISD::SRA, RegVT) && - "We can't implement a sext load without an arithmetic right shift!"); - - // Redistribute the loaded elements into the different locations. - SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i != NumElems; ++i) - ShuffleVec[i * SizeRatio + SizeRatio - 1] = i; - - SDValue Shuff = DAG.getVectorShuffle( - WideVecVT, dl, SlicedVec, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]); - - Shuff = DAG.getBitcast(RegVT, Shuff); - - // Build the arithmetic shift. - unsigned Amt = RegVT.getVectorElementType().getSizeInBits() - - MemVT.getVectorElementType().getSizeInBits(); - Shuff = - DAG.getNode(ISD::SRA, dl, RegVT, Shuff, - DAG.getConstant(Amt, dl, RegVT)); + // Otherwise we'll use SIGN_EXTEND_VECTOR_INREG to sign extend the lowest + // lanes. + assert(TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND_VECTOR_INREG, RegVT) && + "We can't implement a sext load without SIGN_EXTEND_VECTOR_INREG!"); + SDValue Shuff = DAG.getSignExtendVectorInReg(SlicedVec, dl, RegVT); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF); return Shuff; } @@ -14577,11 +15580,9 @@ static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) { static bool isXor1OfSetCC(SDValue Op) { if (Op.getOpcode() != ISD::XOR) return false; - ConstantSDNode *N1C = dyn_cast(Op.getOperand(1)); - if (N1C && N1C->getAPIntValue() == 1) { + if (isOneConstant(Op.getOperand(1))) return Op.getOperand(0).getOpcode() == X86ISD::SETCC && - Op.getOperand(0).hasOneUse(); - } + Op.getOperand(0).hasOneUse(); return false; } @@ -14597,8 +15598,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { if (Cond.getOpcode() == ISD::SETCC) { // Check for setcc([su]{add,sub,mul}o == 0). if (cast(Cond.getOperand(2))->get() == ISD::SETEQ && - isa(Cond.getOperand(1)) && - cast(Cond.getOperand(1))->isNullValue() && + isNullConstant(Cond.getOperand(1)) && Cond.getOperand(0).getResNo() == 1 && (Cond.getOperand(0).getOpcode() == ISD::SADDO || Cond.getOperand(0).getOpcode() == ISD::UADDO || @@ -14625,11 +15625,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // Look pass (and (setcc_carry (cmp ...)), 1). if (Cond.getOpcode() == ISD::AND && - Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { - ConstantSDNode *C = dyn_cast(Cond.getOperand(1)); - if (C && C->getAPIntValue() == 1) - Cond = Cond.getOperand(0); - } + Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY && + isOneConstant(Cond.getOperand(1))) + Cond = Cond.getOperand(0); // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. @@ -14673,16 +15671,14 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { switch (CondOpcode) { case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break; case ISD::SADDO: - if (ConstantSDNode *C = dyn_cast(RHS)) - if (C->isOne()) { + if (isOneConstant(RHS)) { X86Opcode = X86ISD::INC; X86Cond = X86::COND_O; break; } X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break; case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break; case ISD::SSUBO: - if (ConstantSDNode *C = dyn_cast(RHS)) - if (C->isOne()) { + if (isOneConstant(RHS)) { X86Opcode = X86ISD::DEC; X86Cond = X86::COND_O; break; } @@ -14844,8 +15840,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // We know the result of AND is compared against zero. Try to match // it to BT. if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { - SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG); - if (NewSetCC.getNode()) { + if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG)) { CC = NewSetCC.getOperand(0); Cond = NewSetCC.getOperand(1); addTest = false; @@ -14877,54 +15872,40 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SplitStack; SDLoc dl(Op); + // Get the inputs. + SDNode *Node = Op.getNode(); + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + unsigned Align = cast(Op.getOperand(2))->getZExtValue(); + EVT VT = Node->getValueType(0); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); + + bool Is64Bit = Subtarget->is64Bit(); + MVT SPTy = getPointerTy(DAG.getDataLayout()); + + SDValue Result; if (!Lower) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDNode* Node = Op.getNode(); - unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" - " not tell us which reg is the stack pointer!"); + " not tell us which reg is the stack pointer!"); EVT VT = Node->getValueType(0); - SDValue Tmp1 = SDValue(Node, 0); - SDValue Tmp2 = SDValue(Node, 1); SDValue Tmp3 = Node->getOperand(2); - SDValue Chain = Tmp1.getOperand(0); - // Chain the dynamic stack allocation so that it doesn't modify the stack - // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), - SDLoc(Node)); - - SDValue Size = Tmp2.getOperand(1); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast(Tmp3)->getZExtValue(); const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); unsigned StackAlign = TFI.getStackAlignment(); - Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) - Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, - DAG.getConstant(-(uint64_t)Align, dl, VT)); - Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain - - Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), - DAG.getIntPtrConstant(0, dl, true), SDValue(), - SDLoc(Node)); - - SDValue Ops[2] = { Tmp1, Tmp2 }; - return DAG.getMergeValues(Ops, dl); - } - - // Get the inputs. - SDValue Chain = Op.getOperand(0); - SDValue Size = Op.getOperand(1); - unsigned Align = cast(Op.getOperand(2))->getZExtValue(); - EVT VT = Op.getNode()->getValueType(0); - - bool Is64Bit = Subtarget->is64Bit(); - MVT SPTy = getPointerTy(DAG.getDataLayout()); - - if (SplitStack) { + Result = DAG.getNode(ISD::AND, dl, VT, Result, + DAG.getConstant(-(uint64_t)Align, dl, VT)); + Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain + } else if (SplitStack) { MachineRegisterInfo &MRI = MF.getRegInfo(); if (Is64Bit) { @@ -14942,10 +15923,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); unsigned Vreg = MRI.createVirtualRegister(AddrRegClass); Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); - SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, + Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, DAG.getRegister(Vreg, SPTy)); - SDValue Ops1[2] = { Value, Chain }; - return DAG.getMergeValues(Ops1, dl); } else { SDValue Flag; const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX); @@ -14967,9 +15946,14 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP); } - SDValue Ops1[2] = { SP, Chain }; - return DAG.getMergeValues(Ops1, dl); + Result = SP; } + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), + DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); + + SDValue Ops[2] = {Result, Chain}; + return DAG.getMergeValues(Ops, dl); } SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { @@ -14980,7 +15964,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { const Value *SV = cast(Op.getOperand(2))->getValue(); SDLoc DL(Op); - if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) { + if (!Subtarget->is64Bit() || + Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); @@ -15019,10 +16004,11 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MemOps.push_back(Store); // Store ptr to reg_save_area. - FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(8, DL)); + FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant( + Subtarget->isTarget64BitLP64() ? 8 : 4, DL)); SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT); - Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, - MachinePointerInfo(SV, 16), false, false, 0); + Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, MachinePointerInfo( + SV, Subtarget->isTarget64BitLP64() ? 16 : 12), false, false, 0); MemOps.push_back(Store); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); } @@ -15030,10 +16016,13 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->is64Bit() && "LowerVAARG only handles 64-bit va_arg!"); - assert((Subtarget->isTargetLinux() || - Subtarget->isTargetDarwin()) && - "Unhandled target in LowerVAARG"); assert(Op.getNode()->getNumOperands() == 4); + + MachineFunction &MF = DAG.getMachineFunction(); + if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) + // The Win64 ABI uses char* instead of a structure. + return DAG.expandVAArg(Op.getNode()); + SDValue Chain = Op.getOperand(0); SDValue SrcPtr = Op.getOperand(1); const Value *SV = cast(Op.getOperand(2))->getValue(); @@ -15061,8 +16050,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { if (ArgMode == 2) { // Sanity Check: Make sure using fp_offset makes sense. assert(!Subtarget->useSoftFloat() && - !(DAG.getMachineFunction().getFunction()->hasFnAttribute( - Attribute::NoImplicitFloat)) && + !(MF.getFunction()->hasFnAttribute(Attribute::NoImplicitFloat)) && Subtarget->hasSSE1()); } @@ -15091,8 +16079,14 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - // X86-64 va_list is a struct { i32, i32, i8*, i8* }. + // X86-64 va_list is a struct { i32, i32, i8*, i8* }, except on Windows, + // where a va_list is still an i8*. assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!"); + if (Subtarget->isCallingConvWin64( + DAG.getMachineFunction().getFunction()->getCallingConv())) + // Probably a Win64 va_copy. + return DAG.expandVACopy(Op.getNode()); + SDValue Chain = Op.getOperand(0); SDValue DstPtr = Op.getOperand(1); SDValue SrcPtr = Op.getOperand(2); @@ -15230,72 +16224,126 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, // The return type has to be a 128-bit type with the same element // type as the input type. MVT EltVT = VT.getVectorElementType(); - EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits()); + MVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits()); ShAmt = DAG.getBitcast(ShVT, ShAmt); return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); } -/// \brief Return (and \p Op, \p Mask) for compare instructions or -/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the -/// necessary casting for \p Mask when lowering masking intrinsics. -static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, - SDValue PreservedSrc, - const X86Subtarget *Subtarget, - SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), - MVT::i1, VT.getVectorNumElements()); - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); - SDLoc dl(Op); +/// \brief Return Mask with the necessary casting or extending +/// for \p Mask according to \p MaskVT when lowering masking intrinsics +static SDValue getMaskNode(SDValue Mask, MVT MaskVT, + const X86Subtarget *Subtarget, + SelectionDAG &DAG, SDLoc dl) { - assert(MaskVT.isSimple() && "invalid mask type"); + if (MaskVT.bitsGT(Mask.getSimpleValueType())) { + // Mask should be extended + Mask = DAG.getNode(ISD::ANY_EXTEND, dl, + MVT::getIntegerVT(MaskVT.getSizeInBits()), Mask); + } - if (isAllOnes(Mask)) - return Op; + if (Mask.getSimpleValueType() == MVT::i64 && Subtarget->is32Bit()) { + if (MaskVT == MVT::v64i1) { + assert(Subtarget->hasBWI() && "Expected AVX512BW target!"); + // In case 32bit mode, bitcast i64 is illegal, extend/split it. + SDValue Lo, Hi; + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, + DAG.getConstant(0, dl, MVT::i32)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, + DAG.getConstant(1, dl, MVT::i32)); + Lo = DAG.getBitcast(MVT::v32i1, Lo); + Hi = DAG.getBitcast(MVT::v32i1, Hi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi); + } else { + // MaskVT require < 64bit. Truncate mask (should succeed in any case), + // and bitcast. + MVT TruncVT = MVT::getIntegerVT(MaskVT.getSizeInBits()); + return DAG.getBitcast(MaskVT, + DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Mask)); + } + + } else { + MVT BitcastVT = MVT::getVectorVT(MVT::i1, + Mask.getSimpleValueType().getSizeInBits()); // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements // are extracted by EXTRACT_SUBVECTOR. - SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getBitcast(BitcastVT, Mask), - DAG.getIntPtrConstant(0, dl)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, + DAG.getBitcast(BitcastVT, Mask), + DAG.getIntPtrConstant(0, dl)); + } +} - switch (Op.getOpcode()) { - default: break; - case X86ISD::PCMPEQM: - case X86ISD::PCMPGTM: - case X86ISD::CMPM: - case X86ISD::CMPMU: - return DAG.getNode(ISD::AND, dl, VT, Op, VMask); - } - if (PreservedSrc.getOpcode() == ISD::UNDEF) - PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); - return DAG.getNode(ISD::VSELECT, dl, VT, VMask, Op, PreservedSrc); +/// \brief Return (and \p Op, \p Mask) for compare instructions or +/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the +/// necessary casting or extending for \p Mask when lowering masking intrinsics +static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, + SDValue PreservedSrc, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + unsigned OpcodeSelect = ISD::VSELECT; + SDLoc dl(Op); + + if (isAllOnesConstant(Mask)) + return Op; + + SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + + switch (Op.getOpcode()) { + default: break; + case X86ISD::PCMPEQM: + case X86ISD::PCMPGTM: + case X86ISD::CMPM: + case X86ISD::CMPMU: + return DAG.getNode(ISD::AND, dl, VT, Op, VMask); + case X86ISD::VFPCLASS: + case X86ISD::VFPCLASSS: + return DAG.getNode(ISD::OR, dl, VT, Op, VMask); + case X86ISD::VTRUNC: + case X86ISD::VTRUNCS: + case X86ISD::VTRUNCUS: + // We can't use ISD::VSELECT here because it is not always "Legal" + // for the destination type. For example vpmovqb require only AVX512 + // and vselect that can operate on byte element type require BWI + OpcodeSelect = X86ISD::SELECT; + break; + } + if (PreservedSrc.getOpcode() == ISD::UNDEF) + PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); + return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc); } /// \brief Creates an SDNode for a predicated scalar operation. /// \returns (X86vselect \p Mask, \p Op, \p PreservedSrc). -/// The mask is comming as MVT::i8 and it should be truncated +/// The mask is coming as MVT::i8 and it should be truncated /// to MVT::i1 while lowering masking intrinsics. /// The main difference between ScalarMaskingNode and VectorMaskingNode is using -/// "X86select" instead of "vselect". We just can't create the "vselect" node for -/// a scalar instruction. +/// "X86select" instead of "vselect". We just can't create the "vselect" node +/// for a scalar instruction. static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - if (isAllOnes(Mask)) - return Op; + if (isAllOnesConstant(Mask)) + return Op; - EVT VT = Op.getValueType(); - SDLoc dl(Op); - // The mask should be of type MVT::i1 - SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask); + MVT VT = Op.getSimpleValueType(); + SDLoc dl(Op); + // The mask should be of type MVT::i1 + SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask); - if (PreservedSrc.getOpcode() == ISD::UNDEF) - PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); - return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc); + if (Op.getOpcode() == X86ISD::FSETCC) + return DAG.getNode(ISD::AND, dl, VT, Op, IMask); + if (Op.getOpcode() == X86ISD::VFPCLASS || + Op.getOpcode() == X86ISD::VFPCLASSS) + return DAG.getNode(ISD::OR, dl, VT, Op, IMask); + + if (PreservedSrc.getOpcode() == ISD::UNDEF) + PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); + return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc); } static int getSEHRegistrationNodeSize(const Function *Fn) { @@ -15309,15 +16357,16 @@ static int getSEHRegistrationNodeSize(const Function *Fn) { case EHPersonality::MSVC_CXX: return 16; default: break; } - report_fatal_error("can only recover FP for MSVC EH personality functions"); + report_fatal_error( + "can only recover FP for 32-bit MSVC EH personality functions"); } -/// When the 32-bit MSVC runtime transfers control to us, either to an outlined +/// When the MSVC runtime transfers control to us, either to an outlined /// function or when returning to a parent frame after catching an exception, we /// recover the parent frame pointer by doing arithmetic on the incoming EBP. /// Here's the math: /// RegNodeBase = EntryEBP - RegNodeSize -/// ParentFP = RegNodeBase - RegNodeFrameOffset +/// ParentFP = RegNodeBase - ParentFrameOffset /// Subtracting RegNodeSize takes us to the offset of the registration node, and /// subtracting the offset (negative on x86) takes us back to the parent FP. static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, @@ -15334,29 +16383,35 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, if (!Fn->hasPersonalityFn()) return EntryEBP; - int RegNodeSize = getSEHRegistrationNodeSize(Fn); - // Get an MCSymbol that will ultimately resolve to the frame offset of the EH - // registration. + // registration, or the .set_setframe offset. MCSymbol *OffsetSym = MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol( GlobalValue::getRealLinkageName(Fn->getName())); SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT); - SDValue RegNodeFrameOffset = + SDValue ParentFrameOffset = DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal); + // Return EntryEBP + ParentFrameOffset for x64. This adjusts from RSP after + // prologue to RBP in the parent function. + const X86Subtarget &Subtarget = + static_cast(DAG.getSubtarget()); + if (Subtarget.is64Bit()) + return DAG.getNode(ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset); + + int RegNodeSize = getSEHRegistrationNodeSize(Fn); // RegNodeBase = EntryEBP - RegNodeSize - // ParentFP = RegNodeBase - RegNodeFrameOffset + // ParentFP = RegNodeBase - ParentFrameOffset SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP, DAG.getConstant(RegNodeSize, dl, PtrVT)); - return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, RegNodeFrameOffset); + return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset); } static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo); if (IntrData) { switch(IntrData->Type) { @@ -15365,6 +16420,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget case INTR_TYPE_2OP: return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case INTR_TYPE_2OP_IMM8: + return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), + DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(2))); case INTR_TYPE_3OP: return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); @@ -15376,28 +16434,53 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue PassThru = Op.getOperand(2); SDValue Mask = Op.getOperand(3); SDValue RoundingMode; + // We allways add rounding mode to the Node. + // If the rounding mode is not specified, we add the + // "current direction" mode. if (Op.getNumOperands() == 4) - RoundingMode = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); + RoundingMode = + DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); else RoundingMode = Op.getOperand(4); unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; - if (IntrWithRoundingModeOpcode != 0) { - unsigned Round = cast(RoundingMode)->getZExtValue(); - if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) + if (IntrWithRoundingModeOpcode != 0) + if (cast(RoundingMode)->getZExtValue() != + X86::STATIC_ROUNDING::CUR_DIRECTION) return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), Src, RoundingMode), Mask, PassThru, Subtarget, DAG); - } return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src, RoundingMode), Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_1OP_MASK: { SDValue Src = Op.getOperand(1); - SDValue Passthru = Op.getOperand(2); + SDValue PassThru = Op.getOperand(2); SDValue Mask = Op.getOperand(3); + // We add rounding mode to the Node when + // - RM Opcode is specified and + // - RM is not "current direction". + unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; + if (IntrWithRoundingModeOpcode != 0) { + SDValue Rnd = Op.getOperand(4); + unsigned Round = cast(Rnd)->getZExtValue(); + if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) { + return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, + dl, Op.getValueType(), + Src, Rnd), + Mask, PassThru, Subtarget, DAG); + } + } return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src), - Mask, Passthru, Subtarget, DAG); + Mask, PassThru, Subtarget, DAG); + } + case INTR_TYPE_SCALAR_MASK: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue passThru = Op.getOperand(3); + SDValue Mask = Op.getOperand(4); + return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2), + Mask, passThru, Subtarget, DAG); } case INTR_TYPE_SCALAR_MASK_RM: { SDValue Src1 = Op.getOperand(1); @@ -15405,7 +16488,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue Src0 = Op.getOperand(3); SDValue Mask = Op.getOperand(4); // There are 2 kinds of intrinsics in this group: - // (1) With supress-all-exceptions (sae) or rounding mode- 6 operands + // (1) With suppress-all-exceptions (sae) or rounding mode- 6 operands // (2) With rounding mode and sae - 7 operands. if (Op.getNumOperands() == 6) { SDValue Sae = Op.getOperand(5); @@ -15421,11 +16504,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget RoundingMode, Sae), Mask, Src0, Subtarget, DAG); } - case INTR_TYPE_2OP_MASK: { + case INTR_TYPE_2OP_MASK: + case INTR_TYPE_2OP_IMM8_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue PassThru = Op.getOperand(3); SDValue Mask = Op.getOperand(4); + + if (IntrData->Type == INTR_TYPE_2OP_IMM8_MASK) + Src2 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src2); + // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. @@ -15440,8 +16528,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Mask, PassThru, Subtarget, DAG); } } - return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, - Src1,Src2), + // TODO: Intrinsics should have fast-math-flags to propagate. + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src1,Src2), Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_2OP_MASK_RM: { @@ -15449,7 +16537,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue Src2 = Op.getOperand(2); SDValue PassThru = Op.getOperand(3); SDValue Mask = Op.getOperand(4); - // We specify 2 possible modes for intrinsics, with/without rounding modes. + // We specify 2 possible modes for intrinsics, with/without rounding + // modes. // First, we check if the intrinsic have rounding mode (6 operands), // if not, we set rounding mode to "current". SDValue Rnd; @@ -15461,12 +16550,56 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Src1, Src2, Rnd), Mask, PassThru, Subtarget, DAG); } - case INTR_TYPE_3OP_MASK: { + case INTR_TYPE_3OP_SCALAR_MASK_RM: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); SDValue PassThru = Op.getOperand(4); SDValue Mask = Op.getOperand(5); + SDValue Sae = Op.getOperand(6); + + return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, + Src2, Src3, Sae), + Mask, PassThru, Subtarget, DAG); + } + case INTR_TYPE_3OP_MASK_RM: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue Imm = Op.getOperand(3); + SDValue PassThru = Op.getOperand(4); + SDValue Mask = Op.getOperand(5); + // We specify 2 possible modes for intrinsics, with/without rounding + // modes. + // First, we check if the intrinsic have rounding mode (7 operands), + // if not, we set rounding mode to "current". + SDValue Rnd; + if (Op.getNumOperands() == 7) + Rnd = Op.getOperand(6); + else + Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, + Src1, Src2, Imm, Rnd), + Mask, PassThru, Subtarget, DAG); + } + case INTR_TYPE_3OP_IMM8_MASK: + case INTR_TYPE_3OP_MASK: + case INSERT_SUBVEC: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue Src3 = Op.getOperand(3); + SDValue PassThru = Op.getOperand(4); + SDValue Mask = Op.getOperand(5); + + if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK) + Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3); + else if (IntrData->Type == INSERT_SUBVEC) { + // imm should be adapted to ISD::INSERT_SUBVECTOR behavior + assert(isa(Src3) && "Expected a ConstantSDNode here!"); + unsigned Imm = cast(Src3)->getZExtValue(); + Imm *= Src2.getSimpleValueType().getVectorNumElements(); + Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32); + } + // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. @@ -15486,7 +16619,27 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Mask, PassThru, Subtarget, DAG); } case VPERM_3OP_MASKZ: - case VPERM_3OP_MASK: + case VPERM_3OP_MASK:{ + // Src2 is the PassThru + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue Src3 = Op.getOperand(3); + SDValue Mask = Op.getOperand(4); + MVT VT = Op.getSimpleValueType(); + SDValue PassThru = SDValue(); + + // set PassThru element + if (IntrData->Type == VPERM_3OP_MASKZ) + PassThru = getZeroVector(VT, Subtarget, DAG, dl); + else + PassThru = DAG.getBitcast(VT, Src2); + + // Swap Src1 and Src2 in the node creation + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, + dl, Op.getValueType(), + Src2, Src1, Src3), + Mask, PassThru, Subtarget, DAG); + } case FMA_OP_MASK3: case FMA_OP_MASKZ: case FMA_OP_MASK: { @@ -15494,11 +16647,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); SDValue Mask = Op.getOperand(4); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); SDValue PassThru = SDValue(); // set PassThru element - if (IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ) + if (IntrData->Type == FMA_OP_MASKZ) PassThru = getZeroVector(VT, Subtarget, DAG, dl); else if (IntrData->Type == FMA_OP_MASK3) PassThru = Src3; @@ -15523,6 +16676,50 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Src1, Src2, Src3), Mask, PassThru, Subtarget, DAG); } + case TERLOG_OP_MASK: + case TERLOG_OP_MASKZ: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue Src3 = Op.getOperand(3); + SDValue Src4 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(4)); + SDValue Mask = Op.getOperand(5); + MVT VT = Op.getSimpleValueType(); + SDValue PassThru = Src1; + // Set PassThru element. + if (IntrData->Type == TERLOG_OP_MASKZ) + PassThru = getZeroVector(VT, Subtarget, DAG, dl); + + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, + Src1, Src2, Src3, Src4), + Mask, PassThru, Subtarget, DAG); + } + case FPCLASS: { + // FPclass intrinsics with mask + SDValue Src1 = Op.getOperand(1); + MVT VT = Src1.getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + SDValue Imm = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + MVT BitcastVT = MVT::getVectorVT(MVT::i1, + Mask.getSimpleValueType().getSizeInBits()); + SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Imm); + SDValue FPclassMask = getVectorMaskingNode(FPclass, Mask, + DAG.getTargetConstant(0, dl, MaskVT), + Subtarget, DAG); + SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT, + DAG.getUNDEF(BitcastVT), FPclassMask, + DAG.getIntPtrConstant(0, dl)); + return DAG.getBitcast(Op.getValueType(), Res); + } + case FPCLASSS: { + SDValue Src1 = Op.getOperand(1); + SDValue Imm = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Imm); + SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, + DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG); + return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i8, FPclassMask); + } case CMP_MASK: case CMP_MASK_CC: { // Comparison intrinsics with masks. @@ -15534,12 +16731,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget // (v2i1 (and (PCMPEQM %a, %b), // (extract_subvector // (v8i1 (bitcast %mask)), 0))), 0)))) - EVT VT = Op.getOperand(1).getValueType(); - EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - VT.getVectorNumElements()); + MVT VT = Op.getOperand(1).getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); SDValue Mask = Op.getOperand((IntrData->Type == CMP_MASK_CC) ? 4 : 3); - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); + MVT BitcastVT = MVT::getVectorVT(MVT::i1, + Mask.getSimpleValueType().getSizeInBits()); SDValue Cmp; if (IntrData->Type == CMP_MASK_CC) { SDValue CC = Op.getOperand(3); @@ -15573,6 +16769,32 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget DAG.getIntPtrConstant(0, dl)); return DAG.getBitcast(Op.getValueType(), Res); } + case CMP_MASK_SCALAR_CC: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3)); + SDValue Mask = Op.getOperand(4); + + SDValue Cmp; + if (IntrData->Opc1 != 0) { + SDValue Rnd = Op.getOperand(5); + if (cast(Rnd)->getZExtValue() != + X86::STATIC_ROUNDING::CUR_DIRECTION) + Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::i1, Src1, Src2, CC, Rnd); + } + //default rounding mode + if(!Cmp.getNode()) + Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Src2, CC); + + SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, + DAG.getTargetConstant(0, dl, + MVT::i1), + Subtarget, DAG); + + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i8, + DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, CmpMask), + DAG.getValueType(MVT::i1)); + } case COMI: { // Comparison intrinsics ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1; SDValue LHS = Op.getOperand(1); @@ -15584,6 +16806,24 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget DAG.getConstant(X86CC, dl, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } + case COMI_RM: { // Comparison intrinsics with Sae + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + SDValue CC = Op.getOperand(3); + SDValue Sae = Op.getOperand(4); + auto ComiType = TranslateX86ConstCondToX86CC(CC); + // choose between ordered and unordered (comi/ucomi) + unsigned comiOp = std::get<0>(ComiType) ? IntrData->Opc0 : IntrData->Opc1; + SDValue Cond; + if (cast(Sae)->getZExtValue() != + X86::STATIC_ROUNDING::CUR_DIRECTION) + Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS, Sae); + else + Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS); + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(std::get<1>(ComiType), dl, MVT::i8), Cond); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); + } case VSHIFT: return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), Op.getOperand(1), Op.getOperand(2), DAG); @@ -15598,27 +16838,75 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue Mask = Op.getOperand(3); SDValue DataToCompress = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); - if (isAllOnes(Mask)) // return data as is + if (isAllOnesConstant(Mask)) // return data as is return Op.getOperand(1); return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress), Mask, PassThru, Subtarget, DAG); } + case BROADCASTM: { + SDValue Mask = Op.getOperand(1); + MVT MaskVT = MVT::getVectorVT(MVT::i1, + Mask.getSimpleValueType().getSizeInBits()); + Mask = DAG.getBitcast(MaskVT, Mask); + return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Mask); + } case BLEND: { SDValue Mask = Op.getOperand(3); - EVT VT = Op.getValueType(); - EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - VT.getVectorNumElements()); - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); - SDLoc dl(Op); - SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getBitcast(BitcastVT, Mask), - DAG.getIntPtrConstant(0, dl)); + MVT VT = Op.getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1), Op.getOperand(2)); } + case KUNPCK: { + MVT VT = Op.getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()/2); + + SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl); + SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl); + // Arguments should be swapped. + SDValue Res = DAG.getNode(IntrData->Opc0, dl, + MVT::getVectorVT(MVT::i1, VT.getSizeInBits()), + Src2, Src1); + return DAG.getBitcast(VT, Res); + } + case CONVERT_TO_MASK: { + MVT SrcVT = Op.getOperand(1).getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); + MVT BitcastVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()); + + SDValue CvtMask = DAG.getNode(IntrData->Opc0, dl, MaskVT, + Op.getOperand(1)); + SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT, + DAG.getUNDEF(BitcastVT), CvtMask, + DAG.getIntPtrConstant(0, dl)); + return DAG.getBitcast(Op.getValueType(), Res); + } + case CONVERT_MASK_TO_VEC: { + SDValue Mask = Op.getOperand(1); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + return DAG.getNode(IntrData->Opc0, dl, VT, VMask); + } + case BRCST_SUBVEC_TO_VEC: { + SDValue Src = Op.getOperand(1); + SDValue Passthru = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + EVT resVT = Passthru.getValueType(); + SDValue subVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, resVT, + DAG.getUNDEF(resVT), Src, + DAG.getIntPtrConstant(0, dl)); + SDValue immVal; + if (Src.getSimpleValueType().is256BitVector() && resVT.is512BitVector()) + immVal = DAG.getConstant(0x44, dl, MVT::i8); + else + immVal = DAG.getConstant(0, dl, MVT::i8); + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, + subVec, subVec, immVal), + Mask, Passthru, Subtarget, DAG); + } default: break; } @@ -15832,23 +17120,17 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget * Subtarget) { SDLoc dl(Op); - ConstantSDNode *C = dyn_cast(ScaleOp); - if (!C) - llvm_unreachable("Invalid scale type"); - unsigned ScaleVal = C->getZExtValue(); - if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8) - llvm_unreachable("Valid scale values are 1, 2, 4, 8"); - + auto *C = cast(ScaleOp); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); - EVT MaskVT = MVT::getVectorVT(MVT::i1, + MVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); SDValue MaskInReg; ConstantSDNode *MaskC = dyn_cast(Mask); if (MaskC) MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT); else { - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); + MVT BitcastVT = MVT::getVectorVT(MVT::i1, + Mask.getSimpleValueType().getSizeInBits()); // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements // are extracted by EXTRACT_SUBVECTOR. @@ -15860,7 +17142,7 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); if (Src.getOpcode() == ISD::UNDEF) - Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl); + Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) }; @@ -15871,25 +17153,19 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain) { SDLoc dl(Op); - ConstantSDNode *C = dyn_cast(ScaleOp); - if (!C) - llvm_unreachable("Invalid scale type"); - unsigned ScaleVal = C->getZExtValue(); - if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8) - llvm_unreachable("Valid scale values are 1, 2, 4, 8"); - + auto *C = cast(ScaleOp); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); - EVT MaskVT = MVT::getVectorVT(MVT::i1, + MVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); SDValue MaskInReg; ConstantSDNode *MaskC = dyn_cast(Mask); if (MaskC) MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT); else { - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); + MVT BitcastVT = MVT::getVectorVT(MVT::i1, + Mask.getSimpleValueType().getSizeInBits()); // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements // are extracted by EXTRACT_SUBVECTOR. @@ -15907,12 +17183,11 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain) { SDLoc dl(Op); - ConstantSDNode *C = dyn_cast(ScaleOp); - assert(C && "Invalid scale type"); + auto *C = cast(ScaleOp); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); - EVT MaskVT = + MVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); SDValue MaskInReg; ConstantSDNode *MaskC = dyn_cast(Mask); @@ -16034,64 +17309,59 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget, return DAG.getMergeValues(Results, DL); } -static SDValue LowerSEHRESTOREFRAME(SDValue Op, const X86Subtarget *Subtarget, - SelectionDAG &DAG) { +static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); - const Function *Fn = MF.getFunction(); + SDValue Chain = Op.getOperand(0); + SDValue RegNode = Op.getOperand(2); + WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo(); + if (!EHInfo) + report_fatal_error("EH registrations only live in functions using WinEH"); + + // Cast the operand to an alloca, and remember the frame index. + auto *FINode = dyn_cast(RegNode); + if (!FINode) + report_fatal_error("llvm.x86.seh.ehregnode expects a static alloca"); + EHInfo->EHRegNodeFrameIndex = FINode->getIndex(); + + // Return the chain operand without making any DAG nodes. + return Chain; +} + +/// \brief Lower intrinsics for TRUNCATE_TO_MEM case +/// return truncate Store/MaskedStore Node +static SDValue LowerINTRINSIC_TRUNCATE_TO_MEM(const SDValue & Op, + SelectionDAG &DAG, + MVT ElementType) { SDLoc dl(Op); + SDValue Mask = Op.getOperand(4); + SDValue DataToTruncate = Op.getOperand(3); + SDValue Addr = Op.getOperand(2); SDValue Chain = Op.getOperand(0); - assert(Subtarget->getFrameLowering()->hasFP(MF) && - "using llvm.x86.seh.restoreframe requires a frame pointer"); + MVT VT = DataToTruncate.getSimpleValueType(); + MVT SVT = MVT::getVectorVT(ElementType, VT.getVectorNumElements()); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - MVT VT = TLI.getPointerTy(DAG.getDataLayout()); + if (isAllOnesConstant(Mask)) // return just a truncate store + return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, + MachinePointerInfo(), SVT, false, false, + SVT.getScalarSizeInBits()/8); - const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); - unsigned FrameReg = - RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); - unsigned SPReg = RegInfo->getStackRegister(); - unsigned SlotSize = RegInfo->getSlotSize(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + MVT BitcastVT = MVT::getVectorVT(MVT::i1, + Mask.getSimpleValueType().getSizeInBits()); + // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements + // are extracted by EXTRACT_SUBVECTOR. + SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, + DAG.getBitcast(BitcastVT, Mask), + DAG.getIntPtrConstant(0, dl)); - // Get incoming EBP. - SDValue IncomingEBP = - DAG.getCopyFromReg(Chain, dl, FrameReg, VT); + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(), + MachineMemOperand::MOStore, SVT.getStoreSize(), + SVT.getScalarSizeInBits()/8); - // SP is saved in the first field of every registration node, so load - // [EBP-RegNodeSize] into SP. - int RegNodeSize = getSEHRegistrationNodeSize(Fn); - SDValue SPAddr = DAG.getNode(ISD::ADD, dl, VT, IncomingEBP, - DAG.getConstant(-RegNodeSize, dl, VT)); - SDValue NewSP = - DAG.getLoad(VT, dl, Chain, SPAddr, MachinePointerInfo(), false, false, - false, VT.getScalarSizeInBits() / 8); - Chain = DAG.getCopyToReg(Chain, dl, SPReg, NewSP); - - if (!RegInfo->needsStackRealignment(MF)) { - // Adjust EBP to point back to the original frame position. - SDValue NewFP = recoverFramePointer(DAG, Fn, IncomingEBP); - Chain = DAG.getCopyToReg(Chain, dl, FrameReg, NewFP); - } else { - assert(RegInfo->hasBasePointer(MF) && - "functions with Win32 EH must use frame or base pointer register"); - - // Reload the base pointer (ESI) with the adjusted incoming EBP. - SDValue NewBP = recoverFramePointer(DAG, Fn, IncomingEBP); - Chain = DAG.getCopyToReg(Chain, dl, RegInfo->getBaseRegister(), NewBP); - - // Reload the spilled EBP value, now that the stack and base pointers are - // set up. - X86MachineFunctionInfo *X86FI = MF.getInfo(); - X86FI->setHasSEHFramePtrSave(true); - int FI = MF.getFrameInfo()->CreateSpillStackObject(SlotSize, SlotSize); - X86FI->setSEHFramePtrSaveIndex(FI); - SDValue NewFP = DAG.getLoad(VT, dl, Chain, DAG.getFrameIndex(FI, VT), - MachinePointerInfo(), false, false, false, - VT.getScalarSizeInBits() / 8); - Chain = DAG.getCopyToReg(NewFP, dl, FrameReg, NewFP); - } - - return Chain; + return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, + VMask, SVT, MMO, true); } static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, @@ -16100,16 +17370,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo); if (!IntrData) { - if (IntNo == llvm::Intrinsic::x86_seh_restoreframe) - return LowerSEHRESTOREFRAME(Op, Subtarget, DAG); + if (IntNo == llvm::Intrinsic::x86_seh_ehregnode) + return MarkEHRegistrationNode(Op, DAG); return SDValue(); } SDLoc dl(Op); switch(IntrData->Type) { - default: - llvm_unreachable("Unknown Intrinsic Type"); - break; + default: llvm_unreachable("Unknown Intrinsic Type"); case RDSEED: case RDRAND: { // Emit the node with the right value type. @@ -16214,8 +17482,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SDValue Addr = Op.getOperand(2); SDValue Chain = Op.getOperand(0); - EVT VT = DataToCompress.getValueType(); - if (isAllOnes(Mask)) // return just a store + MVT VT = DataToCompress.getSimpleValueType(); + if (isAllOnesConstant(Mask)) // return just a store return DAG.getStore(Chain, dl, DataToCompress, Addr, MachinePointerInfo(), false, false, VT.getScalarSizeInBits()/8); @@ -16227,15 +17495,21 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, MachinePointerInfo(), false, false, VT.getScalarSizeInBits()/8); } + case TRUNCATE_TO_MEM_VI8: + return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i8); + case TRUNCATE_TO_MEM_VI16: + return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i16); + case TRUNCATE_TO_MEM_VI32: + return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i32); case EXPAND_FROM_MEM: { SDLoc dl(Op); SDValue Mask = Op.getOperand(4); SDValue PassThru = Op.getOperand(3); SDValue Addr = Op.getOperand(2); SDValue Chain = Op.getOperand(0); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); - if (isAllOnes(Mask)) // return just a load + if (isAllOnesConstant(Mask)) // return just a load return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false, false, VT.getScalarSizeInBits()/8); @@ -16359,6 +17633,21 @@ SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op)); } +unsigned X86TargetLowering::getExceptionPointerRegister( + const Constant *PersonalityFn) const { + if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR) + return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX; + + return Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX; +} + +unsigned X86TargetLowering::getExceptionSelectorRegister( + const Constant *PersonalityFn) const { + // Funclet personalities don't use selectors (the runtime does the selection). + assert(!isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))); + return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX; +} + SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Offset = Op.getOperand(1); @@ -16497,9 +17786,11 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, for (FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); I != E; ++I, ++Idx) - if (Attrs.hasAttribute(Idx, Attribute::InReg)) + if (Attrs.hasAttribute(Idx, Attribute::InReg)) { + auto &DL = DAG.getDataLayout(); // FIXME: should only count parameters that are lowered to integers. - InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; + InRegCount += (DL.getTypeSizeInBits(*I) + 31) / 32; + } if (InRegCount > 2) { report_fatal_error("Nest register in use - reduce number of inreg" @@ -16588,8 +17879,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout())); MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), - MachineMemOperand::MOStore, 2, 2); + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI), + MachineMemOperand::MOStore, 2, 2); SDValue Ops[] = { DAG.getEntryNode(), StackSlot }; SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL, @@ -16623,12 +17914,75 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal); } -static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) { +/// \brief Lower a vector CTLZ using native supported vector CTLZ instruction. +// +// 1. i32/i64 128/256-bit vector (native support require VLX) are expended +// to 512-bit vector. +// 2. i8/i16 vector implemented using dword LZCNT vector instruction +// ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal, +// split the vector, perform operation on it's Lo a Hi part and +// concatenate the results. +static SDValue LowerVectorCTLZ_AVX512(SDValue Op, SelectionDAG &DAG) { + SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); - EVT OpVT = VT; + MVT EltVT = VT.getVectorElementType(); + unsigned NumElems = VT.getVectorNumElements(); + + if (EltVT == MVT::i64 || EltVT == MVT::i32) { + // Extend to 512 bit vector. + assert((VT.is256BitVector() || VT.is128BitVector()) && + "Unsupported value type for operation"); + + MVT NewVT = MVT::getVectorVT(EltVT, 512 / VT.getScalarSizeInBits()); + SDValue Vec512 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT, + DAG.getUNDEF(NewVT), + Op.getOperand(0), + DAG.getIntPtrConstant(0, dl)); + SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Vec512); + + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CtlzNode, + DAG.getIntPtrConstant(0, dl)); + } + + assert((EltVT == MVT::i8 || EltVT == MVT::i16) && + "Unsupported element type"); + + if (16 < NumElems) { + // Split vector, it's Lo and Hi parts will be handled in next iteration. + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl); + MVT OutVT = MVT::getVectorVT(EltVT, NumElems/2); + + Lo = DAG.getNode(Op.getOpcode(), dl, OutVT, Lo); + Hi = DAG.getNode(Op.getOpcode(), dl, OutVT, Hi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); + } + + MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems); + + assert((NewVT.is256BitVector() || NewVT.is512BitVector()) && + "Unsupported value type for operation"); + + // Use native supported vector instruction vplzcntd. + Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0)); + SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Op); + SDValue TruncNode = DAG.getNode(ISD::TRUNCATE, dl, VT, CtlzNode); + SDValue Delta = DAG.getConstant(32 - EltVT.getSizeInBits(), dl, VT); + + return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta); +} + +static SDValue LowerCTLZ(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + MVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); SDLoc dl(Op); + if (VT.isVector() && Subtarget->hasAVX512()) + return LowerVectorCTLZ_AVX512(Op, DAG); + Op = Op.getOperand(0); if (VT == MVT::i8) { // Zero extend to i32 since there is not an i8 bsr. @@ -16658,7 +18012,8 @@ static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) { return Op; } -static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); EVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); @@ -16686,13 +18041,39 @@ static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) { static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); - unsigned NumBits = VT.getSizeInBits(); + unsigned NumBits = VT.getScalarSizeInBits(); SDLoc dl(Op); - Op = Op.getOperand(0); + + if (VT.isVector()) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + SDValue N0 = Op.getOperand(0); + SDValue Zero = DAG.getConstant(0, dl, VT); + + // lsb(x) = (x & -x) + SDValue LSB = DAG.getNode(ISD::AND, dl, VT, N0, + DAG.getNode(ISD::SUB, dl, VT, Zero, N0)); + + // cttz_undef(x) = (width - 1) - ctlz(lsb) + if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF && + TLI.isOperationLegal(ISD::CTLZ, VT)) { + SDValue WidthMinusOne = DAG.getConstant(NumBits - 1, dl, VT); + return DAG.getNode(ISD::SUB, dl, VT, WidthMinusOne, + DAG.getNode(ISD::CTLZ, dl, VT, LSB)); + } + + // cttz(x) = ctpop(lsb - 1) + SDValue One = DAG.getConstant(1, dl, VT); + return DAG.getNode(ISD::CTPOP, dl, VT, + DAG.getNode(ISD::SUB, dl, VT, LSB, One)); + } + + assert(Op.getOpcode() == ISD::CTTZ && + "Only scalar CTTZ requires custom lowering"); // Issue a bsf (scan bits forward) which also sets EFLAGS. SDVTList VTs = DAG.getVTList(VT, MVT::i32); - Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op); + Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op.getOperand(0)); // If src is zero (i.e. bsf sets ZF), returns NumBits. SDValue Ops[] = { @@ -16753,6 +18134,13 @@ static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) { return Lower256IntArith(Op, DAG); } +static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) { + assert(Op.getSimpleValueType().is256BitVector() && + Op.getSimpleValueType().isInteger() && + "Only handle AVX 256-bit vector integer operation"); + return Lower256IntArith(Op, DAG); +} + static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); @@ -16885,7 +18273,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, SDValue AhiBlo = Ahi; SDValue AloBhi = Bhi; // Bit cast to 32-bit vectors for MULUDQ - EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 : + MVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 : (VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32; A = DAG.getBitcast(MulVT, A); B = DAG.getBitcast(MulVT, B); @@ -16962,7 +18350,7 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); - EVT VT = Op0.getValueType(); + MVT VT = Op0.getSimpleValueType(); SDLoc dl(Op); assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) || @@ -17034,7 +18422,7 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, return DAG.getMergeValues(Ops, dl); } -// Return true if the requred (according to Opcode) shift-imm form is natively +// Return true if the required (according to Opcode) shift-imm form is natively // supported by the Subtarget static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget, unsigned Opcode) { @@ -17054,14 +18442,14 @@ static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget, } // The shift amount is a variable, but it is the same for all vector lanes. -// These instrcutions are defined together with shift-immediate. +// These instructions are defined together with shift-immediate. static bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget, unsigned Opcode) { return SupportedVectorShiftWithImm(VT, Subtarget, Opcode); } -// Return true if the requred (according to Opcode) variable-shift form is +// Return true if the required (according to Opcode) variable-shift form is // natively supported by the Subtarget static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget, unsigned Opcode) { @@ -17133,27 +18521,37 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // i64 SRA needs to be performed as partial shifts. if ((VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && - Op.getOpcode() == ISD::SRA) + Op.getOpcode() == ISD::SRA && !Subtarget->hasXOP()) return ArithmeticShiftRight64(ShiftAmt); - if (VT == MVT::v16i8 || (Subtarget->hasInt256() && VT == MVT::v32i8)) { + if (VT == MVT::v16i8 || + (Subtarget->hasInt256() && VT == MVT::v32i8) || + VT == MVT::v64i8) { unsigned NumElts = VT.getVectorNumElements(); MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2); - if (Op.getOpcode() == ISD::SHL) { - // Simple i8 add case - if (ShiftAmt == 1) - return DAG.getNode(ISD::ADD, dl, VT, R, R); + // Simple i8 add case + if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) + return DAG.getNode(ISD::ADD, dl, VT, R, R); + // ashr(R, 7) === cmp_slt(R, 0) + if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) { + SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl); + return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R); + } + + // XOP can shift v16i8 directly instead of as shift v8i16 + mask. + if (VT == MVT::v16i8 && Subtarget->hasXOP()) + return SDValue(); + + if (Op.getOpcode() == ISD::SHL) { // Make a large shift. SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R, ShiftAmt, DAG); SHL = DAG.getBitcast(VT, SHL); // Zero out the rightmost bits. - SmallVector V( - NumElts, DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, SHL, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V)); + DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT)); } if (Op.getOpcode() == ISD::SRL) { // Make a large shift. @@ -17161,24 +18559,14 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, R, ShiftAmt, DAG); SRL = DAG.getBitcast(VT, SRL); // Zero out the leftmost bits. - SmallVector V( - NumElts, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, SRL, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V)); + DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT)); } if (Op.getOpcode() == ISD::SRA) { - if (ShiftAmt == 7) { - // R s>> 7 === R s< 0 - SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl); - return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R); - } - - // R s>> a === ((R u>> a) ^ m) - m + // ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask) SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); - SmallVector V(NumElts, - DAG.getConstant(128 >> ShiftAmt, dl, - MVT::i8)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V); + + SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT); Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); return Res; @@ -17189,35 +18577,51 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, } // Special case in 32-bit mode, where i64 is expanded into high and low parts. - if (!Subtarget->is64Bit() && - (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && - Amt.getOpcode() == ISD::BITCAST && - Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { + if (!Subtarget->is64Bit() && !Subtarget->hasXOP() && + (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64))) { + + // Peek through any splat that was introduced for i64 shift vectorization. + int SplatIndex = -1; + if (ShuffleVectorSDNode *SVN = dyn_cast(Amt.getNode())) + if (SVN->isSplat()) { + SplatIndex = SVN->getSplatIndex(); + Amt = Amt.getOperand(0); + assert(SplatIndex < (int)VT.getVectorNumElements() && + "Splat shuffle referencing second operand"); + } + + if (Amt.getOpcode() != ISD::BITCAST || + Amt.getOperand(0).getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + Amt = Amt.getOperand(0); unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() / VT.getVectorNumElements(); unsigned RatioInLog2 = Log2_32_Ceil(Ratio); uint64_t ShiftAmt = 0; + unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio); for (unsigned i = 0; i != Ratio; ++i) { - ConstantSDNode *C = dyn_cast(Amt.getOperand(i)); + ConstantSDNode *C = dyn_cast(Amt.getOperand(i + BaseOp)); if (!C) return SDValue(); // 6 == Log2(64) ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2))); } - // Check remaining shift amounts. - for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { - uint64_t ShAmt = 0; - for (unsigned j = 0; j != Ratio; ++j) { - ConstantSDNode *C = - dyn_cast(Amt.getOperand(i + j)); - if (!C) + + // Check remaining shift amounts (if not a splat). + if (SplatIndex < 0) { + for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { + uint64_t ShAmt = 0; + for (unsigned j = 0; j != Ratio; ++j) { + ConstantSDNode *C = dyn_cast(Amt.getOperand(i + j)); + if (!C) + return SDValue(); + // 6 == Log2(64) + ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2))); + } + if (ShAmt != ShiftAmt) return SDValue(); - // 6 == Log2(64) - ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2))); } - if (ShAmt != ShiftAmt) - return SDValue(); } if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) @@ -17245,7 +18649,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) { SDValue BaseShAmt; - EVT EltVT = VT.getVectorElementType(); + MVT EltVT = VT.getVectorElementType(); if (BuildVectorSDNode *BV = dyn_cast(Amt)) { // Check if this build_vector node is doing a splat. @@ -17262,7 +18666,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, unsigned SplatIdx = (unsigned)SVN->getSplatIndex(); SDValue InVec = Amt.getOperand(0); if (InVec.getOpcode() == ISD::BUILD_VECTOR) { - assert((SplatIdx < InVec.getValueType().getVectorNumElements()) && + assert((SplatIdx < InVec.getSimpleValueType().getVectorNumElements()) && "Unexpected shuffle index found!"); BaseShAmt = InVec.getOperand(SplatIdx); } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { @@ -17327,11 +18731,26 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, return V; if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget)) - return V; + return V; if (SupportedVectorVarShift(VT, Subtarget, Op.getOpcode())) return Op; + // XOP has 128-bit variable logical/arithmetic shifts. + // +ve/-ve Amt = shift left/right. + if (Subtarget->hasXOP() && + (VT == MVT::v2i64 || VT == MVT::v4i32 || + VT == MVT::v8i16 || VT == MVT::v16i8)) { + if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) { + SDValue Zero = getZeroVector(VT, Subtarget, DAG, dl); + Amt = DAG.getNode(ISD::SUB, dl, VT, Zero, Amt); + } + if (Op.getOpcode() == ISD::SHL || Op.getOpcode() == ISD::SRL) + return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt); + if (Op.getOpcode() == ISD::SRA) + return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt); + } + // 2i64 vector logical shifts can efficiently avoid scalarization - do the // shifts per-lane and then shuffle the partial results back together. if (VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) { @@ -17343,6 +18762,19 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3}); } + // i64 vector arithmetic shift can be emulated with the transform: + // M = lshr(SIGN_BIT, Amt) + // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M) + if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget->hasInt256())) && + Op.getOpcode() == ISD::SRA) { + SDValue S = DAG.getConstant(APInt::getSignBit(64), dl, VT); + SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt); + R = DAG.getNode(ISD::SRL, dl, VT, R, Amt); + R = DAG.getNode(ISD::XOR, dl, VT, R, M); + R = DAG.getNode(ISD::SUB, dl, VT, R, M); + return R; + } + // If possible, lower this packed shift into a vector multiply instead of // expanding it into a sequence of scalar shifts. // Do this only if the vector shift count is a constant build_vector. @@ -17351,9 +18783,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, (Subtarget->hasInt256() && VT == MVT::v16i16)) && ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) { SmallVector Elts; - EVT SVT = VT.getScalarType(); + MVT SVT = VT.getVectorElementType(); unsigned SVTBits = SVT.getSizeInBits(); - const APInt &One = APInt(SVTBits, 1); + APInt One(SVTBits, 1); unsigned NumElems = VT.getVectorNumElements(); for (unsigned i=0; i !=NumElems; ++i) { @@ -17364,7 +18796,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, } ConstantSDNode *ND = cast(Op); - const APInt &C = APInt(SVTBits, ND->getAPIntValue().getZExtValue()); + APInt C(SVTBits, ND->getAPIntValue().getZExtValue()); uint64_t ShAmt = C.getZExtValue(); if (ShAmt >= SVTBits) { Elts.push_back(DAG.getUNDEF(SVT)); @@ -17443,7 +18875,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, if (CanBeSimplified && isa(Amt1) && isa(Amt2)) { // Replace this node with two shifts followed by a MOVSS/MOVSD. - EVT CastVT = MVT::v4i32; + MVT CastVT = MVT::v4i32; SDValue Splat1 = DAG.getConstant(cast(Amt1)->getAPIntValue(), dl, VT); SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1); @@ -17507,7 +18939,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7}); } - if (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget->hasInt256())) { + if (VT == MVT::v16i8 || + (VT == MVT::v32i8 && Subtarget->hasInt256() && !Subtarget->hasXOP())) { MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); unsigned ShiftOpcode = Op->getOpcode(); @@ -17627,7 +19060,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt)); } - if (Subtarget->hasInt256() && VT == MVT::v16i16) { + if (Subtarget->hasInt256() && !Subtarget->hasXOP() && VT == MVT::v16i16) { MVT ExtVT = MVT::v8i32; SDValue Z = getZeroVector(VT, Subtarget, DAG, dl); SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z); @@ -17710,7 +19143,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, if (VT.is256BitVector()) { unsigned NumElems = VT.getVectorNumElements(); MVT EltVT = VT.getVectorElementType(); - EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); // Extract the two vectors SDValue V1 = Extract128BitVector(R, 0, DAG, dl); @@ -17743,6 +19176,40 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, return SDValue(); } +static SDValue LowerRotate(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + SDLoc DL(Op); + SDValue R = Op.getOperand(0); + SDValue Amt = Op.getOperand(1); + + assert(VT.isVector() && "Custom lowering only for vector rotates!"); + assert(Subtarget->hasXOP() && "XOP support required for vector rotates!"); + assert((Op.getOpcode() == ISD::ROTL) && "Only ROTL supported"); + + // XOP has 128-bit vector variable + immediate rotates. + // +ve/-ve Amt = rotate left/right. + + // Split 256-bit integers. + if (VT.is256BitVector()) + return Lower256IntArith(Op, DAG); + + assert(VT.is128BitVector() && "Only rotate 128-bit vectors!"); + + // Attempt to rotate by immediate. + if (auto *BVAmt = dyn_cast(Amt)) { + if (auto *RotateConst = BVAmt->getConstantSplatNode()) { + uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue(); + assert(RotateAmt < VT.getScalarSizeInBits() && "Rotation out of range"); + return DAG.getNode(X86ISD::VPROTI, DL, VT, R, + DAG.getConstant(RotateAmt, DL, MVT::i8)); + } + } + + // Use general rotate by variable (per-element). + return DAG.getNode(X86ISD::VPROT, DL, VT, R, Amt); +} + static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { // Lower the "add/sub/mul with overflow" instruction into a regular ins plus // a "setcc" instruction that checks the overflow flag. The "brcond" lowering @@ -17759,8 +19226,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { case ISD::SADDO: // A subtract of one will be selected as a INC. Note that INC doesn't // set CF, so we can't do this for UADDO. - if (ConstantSDNode *C = dyn_cast(RHS)) - if (C->isOne()) { + if (isOneConstant(RHS)) { BaseOp = X86ISD::INC; Cond = X86::COND_O; break; @@ -17775,8 +19241,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { case ISD::SSUBO: // A subtract of one will be selected as a DEC. Note that DEC doesn't // set CF, so we can't do this for USUBO. - if (ConstantSDNode *C = dyn_cast(RHS)) - if (C->isOne()) { + if (isOneConstant(RHS)) { BaseOp = X86ISD::DEC; Cond = X86::COND_O; break; @@ -17827,7 +19292,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { /// the corresponding cmpxchg8b or cmpxchg16b instruction is available. /// Used to know whether to use cmpxchg8/16b when expanding atomic operations /// (otherwise we leave them alone to become __sync_fetch_and_... calls). -bool X86TargetLowering::needsCmpXchgNb(const Type *MemType) const { +bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const { unsigned OpWidth = MemType->getPrimitiveSizeInBits(); if (OpWidth == 64) @@ -17844,21 +19309,23 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { // Note: this turns large loads into lock cmpxchg8b/16b. // FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b. -bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { +TargetLowering::AtomicExpansionKind +X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { auto PTy = cast(LI->getPointerOperand()->getType()); - return needsCmpXchgNb(PTy->getElementType()); + return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg + : AtomicExpansionKind::None; } -TargetLoweringBase::AtomicRMWExpansionKind +TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32; - const Type *MemType = AI->getType(); + Type *MemType = AI->getType(); // If the operand is too big, we must see if cmpxchg8/16b is available // and default to library calls otherwise. if (MemType->getPrimitiveSizeInBits() > NativeWidth) { - return needsCmpXchgNb(MemType) ? AtomicRMWExpansionKind::CmpXChg - : AtomicRMWExpansionKind::None; + return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg + : AtomicExpansionKind::None; } AtomicRMWInst::BinOp Op = AI->getOperation(); @@ -17869,14 +19336,14 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { case AtomicRMWInst::Add: case AtomicRMWInst::Sub: // It's better to use xadd, xsub or xchg for these in all cases. - return AtomicRMWExpansionKind::None; + return AtomicExpansionKind::None; case AtomicRMWInst::Or: case AtomicRMWInst::And: case AtomicRMWInst::Xor: // If the atomicrmw's result isn't actually used, we can just add a "lock" // prefix to a normal instruction for these operations. - return !AI->use_empty() ? AtomicRMWExpansionKind::CmpXChg - : AtomicRMWExpansionKind::None; + return !AI->use_empty() ? AtomicExpansionKind::CmpXChg + : AtomicExpansionKind::None; case AtomicRMWInst::Nand: case AtomicRMWInst::Max: case AtomicRMWInst::Min: @@ -17884,7 +19351,7 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { case AtomicRMWInst::UMin: // These always require a non-trivial set of data operations on x86. We must // use a cmpxchg loop. - return AtomicRMWExpansionKind::CmpXChg; + return AtomicExpansionKind::CmpXChg; } } @@ -17898,7 +19365,7 @@ static bool hasMFENCE(const X86Subtarget& Subtarget) { LoadInst * X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32; - const Type *MemType = AI->getType(); + Type *MemType = AI->getType(); // Accesses larger than the native width are turned into cmpxchg/libcalls, so // there is no benefit in turning such RMWs into loads, and it is actually // harmful as it introduces a mfence. @@ -17926,7 +19393,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { // lowered to just a load without a fence. A mfence flushes the store buffer, // making the optimization clearly correct. // FIXME: it is required if isAtLeastRelease(Order) but it is not clear - // otherwise, we might be able to be more agressive on relaxed idempotent + // otherwise, we might be able to be more aggressive on relaxed idempotent // rmw. In practice, they do not look useful, so we don't try to be // especially clever. if (SynchScope == SingleThread) @@ -18043,7 +19510,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, SDValue InVec = Op->getOperand(0); SDLoc dl(Op); unsigned NumElts = SrcVT.getVectorNumElements(); - EVT SVT = SrcVT.getVectorElementType(); + MVT SVT = SrcVT.getVectorElementType(); // Widen the vector in input in the case of MVT::v2i32. // Example: from MVT::v2i32 to MVT::v4i32. @@ -18103,7 +19570,8 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, // chunks, thus directly computes the pop count for v2i64 and v4i64. if (EltVT == MVT::i64) { SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL); - V = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, V, Zeros); + MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); + V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros); return DAG.getBitcast(VT, V); } @@ -18119,9 +19587,10 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, // Do the horizontal sums into two v2i64s. Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL); - Low = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, + MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); + Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, DAG.getBitcast(ByteVecVT, Low), Zeros); - High = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, + High = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, DAG.getBitcast(ByteVecVT, High), Zeros); // Merge them together. @@ -18311,7 +19780,7 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget *Subtarget, static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - assert(Op.getValueType().isVector() && + assert(Op.getSimpleValueType().isVector() && "We only do custom lowering for vector population count."); return LowerVectorCTPOP(Op, Subtarget, DAG); } @@ -18357,7 +19826,7 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getNode()->getSimpleValueType(0); + MVT VT = Op.getNode()->getSimpleValueType(0); // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) @@ -18435,31 +19904,203 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget, return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal); } +/// Widen a vector input to a vector of NVT. The +/// input vector must have the same element type as NVT. +static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG, + bool FillWithZeroes = false) { + // Check if InOp already has the right width. + MVT InVT = InOp.getSimpleValueType(); + if (InVT == NVT) + return InOp; + + if (InOp.isUndef()) + return DAG.getUNDEF(NVT); + + assert(InVT.getVectorElementType() == NVT.getVectorElementType() && + "input and widen element type must match"); + + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = NVT.getVectorNumElements(); + assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 && + "Unexpected request for vector widening"); + + EVT EltVT = NVT.getVectorElementType(); + + SDLoc dl(InOp); + if (InOp.getOpcode() == ISD::CONCAT_VECTORS && + InOp.getNumOperands() == 2) { + SDValue N1 = InOp.getOperand(1); + if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) || + N1.isUndef()) { + InOp = InOp.getOperand(0); + InVT = InOp.getSimpleValueType(); + InNumElts = InVT.getVectorNumElements(); + } + } + if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) { + SmallVector Ops; + for (unsigned i = 0; i < InNumElts; ++i) + Ops.push_back(InOp.getOperand(i)); + + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : + DAG.getUNDEF(EltVT); + for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i) + Ops.push_back(FillVal); + return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops); + } + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) : + DAG.getUNDEF(NVT); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal, + InOp, DAG.getIntPtrConstant(0, dl)); +} + static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(Subtarget->hasAVX512() && "MGATHER/MSCATTER are supported on AVX-512 arch only"); + // X86 scatter kills mask register, so its type should be added to + // the list of return values. + // If the "scatter" has 2 return values, it is already handled. + if (Op.getNode()->getNumValues() == 2) + return Op; + MaskedScatterSDNode *N = cast(Op.getNode()); - EVT VT = N->getValue().getValueType(); + SDValue Src = N->getValue(); + MVT VT = Src.getSimpleValueType(); assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op"); SDLoc dl(Op); - // X86 scatter kills mask register, so its type should be added to - // the list of return values - if (N->getNumValues() == 1) { - SDValue Index = N->getIndex(); - if (!Subtarget->hasVLX() && !VT.is512BitVector() && - !Index.getValueType().is512BitVector()) + SDValue NewScatter; + SDValue Index = N->getIndex(); + SDValue Mask = N->getMask(); + SDValue Chain = N->getChain(); + SDValue BasePtr = N->getBasePtr(); + MVT MemVT = N->getMemoryVT().getSimpleVT(); + MVT IndexVT = Index.getSimpleValueType(); + MVT MaskVT = Mask.getSimpleValueType(); + + if (MemVT.getScalarSizeInBits() < VT.getScalarSizeInBits()) { + // The v2i32 value was promoted to v2i64. + // Now we "redo" the type legalizer's work and widen the original + // v2i32 value to v4i32. The original v2i32 is retrieved from v2i64 + // with a shuffle. + assert((MemVT == MVT::v2i32 && VT == MVT::v2i64) && + "Unexpected memory type"); + int ShuffleMask[] = {0, 2, -1, -1}; + Src = DAG.getVectorShuffle(MVT::v4i32, dl, DAG.getBitcast(MVT::v4i32, Src), + DAG.getUNDEF(MVT::v4i32), ShuffleMask); + // Now we have 4 elements instead of 2. + // Expand the index. + MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), 4); + Index = ExtendToType(Index, NewIndexVT, DAG); + + // Expand the mask with zeroes + // Mask may be <2 x i64> or <2 x i1> at this moment + assert((MaskVT == MVT::v2i1 || MaskVT == MVT::v2i64) && + "Unexpected mask type"); + MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), 4); + Mask = ExtendToType(Mask, ExtMaskVT, DAG, true); + VT = MVT::v4i32; + } + + unsigned NumElts = VT.getVectorNumElements(); + if (!Subtarget->hasVLX() && !VT.is512BitVector() && + !Index.getSimpleValueType().is512BitVector()) { + // AVX512F supports only 512-bit vectors. Or data or index should + // be 512 bit wide. If now the both index and data are 256-bit, but + // the vector contains 8 elements, we just sign-extend the index + if (IndexVT == MVT::v8i32) + // Just extend index Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); + else { + // The minimal number of elts in scatter is 8 + NumElts = 8; + // Index + MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts); + // Use original index here, do not modify the index twice + Index = ExtendToType(N->getIndex(), NewIndexVT, DAG); + if (IndexVT.getScalarType() == MVT::i32) + Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); - SDVTList VTs = DAG.getVTList(N->getMask().getValueType(), MVT::Other); - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), - N->getOperand(3), Index }; + // Mask + // At this point we have promoted mask operand + assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type"); + MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts); + // Use the original mask here, do not modify the mask twice + Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true); - SDValue NewScatter = DAG.getMaskedScatter(VTs, VT, dl, Ops, N->getMemOperand()); - DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1)); - return SDValue(NewScatter.getNode(), 0); + // The value that should be stored + MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts); + Src = ExtendToType(Src, NewVT, DAG); + } + } + // If the mask is "wide" at this point - truncate it to i1 vector + MVT BitMaskVT = MVT::getVectorVT(MVT::i1, NumElts); + Mask = DAG.getNode(ISD::TRUNCATE, dl, BitMaskVT, Mask); + + // The mask is killed by scatter, add it to the values + SDVTList VTs = DAG.getVTList(BitMaskVT, MVT::Other); + SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index}; + NewScatter = DAG.getMaskedScatter(VTs, N->getMemoryVT(), dl, Ops, + N->getMemOperand()); + DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1)); + return SDValue(NewScatter.getNode(), 0); +} + +static SDValue LowerMLOAD(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + + MaskedLoadSDNode *N = cast(Op.getNode()); + MVT VT = Op.getSimpleValueType(); + SDValue Mask = N->getMask(); + SDLoc dl(Op); + + if (Subtarget->hasAVX512() && !Subtarget->hasVLX() && + !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) { + // This operation is legal for targets with VLX, but without + // VLX the vector should be widened to 512 bit + unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits(); + MVT WideDataVT = MVT::getVectorVT(VT.getScalarType(), NumEltsInWideVec); + MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); + SDValue Src0 = N->getSrc0(); + Src0 = ExtendToType(Src0, WideDataVT, DAG); + Mask = ExtendToType(Mask, WideMaskVT, DAG, true); + SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(), + N->getBasePtr(), Mask, Src0, + N->getMemoryVT(), N->getMemOperand(), + N->getExtensionType()); + + SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, + NewLoad.getValue(0), + DAG.getIntPtrConstant(0, dl)); + SDValue RetOps[] = {Exract, NewLoad.getValue(1)}; + return DAG.getMergeValues(RetOps, dl); + } + return Op; +} + +static SDValue LowerMSTORE(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MaskedStoreSDNode *N = cast(Op.getNode()); + SDValue DataToStore = N->getValue(); + MVT VT = DataToStore.getSimpleValueType(); + SDValue Mask = N->getMask(); + SDLoc dl(Op); + + if (Subtarget->hasAVX512() && !Subtarget->hasVLX() && + !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) { + // This operation is legal for targets with VLX, but without + // VLX the vector should be widened to 512 bit + unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits(); + MVT WideDataVT = MVT::getVectorVT(VT.getScalarType(), NumEltsInWideVec); + MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); + DataToStore = ExtendToType(DataToStore, WideDataVT, DAG); + Mask = ExtendToType(Mask, WideMaskVT, DAG, true); + return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), + Mask, N->getMemoryVT(), N->getMemOperand(), + N->isTruncatingStore()); } return Op; } @@ -18470,17 +20111,59 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget *Subtarget, "MGATHER/MSCATTER are supported on AVX-512 arch only"); MaskedGatherSDNode *N = cast(Op.getNode()); - EVT VT = Op.getValueType(); - assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op"); SDLoc dl(Op); - + MVT VT = Op.getSimpleValueType(); SDValue Index = N->getIndex(); + SDValue Mask = N->getMask(); + SDValue Src0 = N->getValue(); + MVT IndexVT = Index.getSimpleValueType(); + MVT MaskVT = Mask.getSimpleValueType(); + + unsigned NumElts = VT.getVectorNumElements(); + assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op"); + if (!Subtarget->hasVLX() && !VT.is512BitVector() && - !Index.getValueType().is512BitVector()) { - Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), - N->getOperand(3), Index }; - DAG.UpdateNodeOperands(N, Ops); + !Index.getSimpleValueType().is512BitVector()) { + // AVX512F supports only 512-bit vectors. Or data or index should + // be 512 bit wide. If now the both index and data are 256-bit, but + // the vector contains 8 elements, we just sign-extend the index + if (NumElts == 8) { + Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), Index }; + DAG.UpdateNodeOperands(N, Ops); + return Op; + } + + // Minimal number of elements in Gather + NumElts = 8; + // Index + MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts); + Index = ExtendToType(Index, NewIndexVT, DAG); + if (IndexVT.getScalarType() == MVT::i32) + Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); + + // Mask + MVT MaskBitVT = MVT::getVectorVT(MVT::i1, NumElts); + // At this point we have promoted mask operand + assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type"); + MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts); + Mask = ExtendToType(Mask, ExtMaskVT, DAG, true); + Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask); + + // The pass-thru value + MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts); + Src0 = ExtendToType(Src0, NewVT, DAG); + + SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; + SDValue NewGather = DAG.getMaskedGather(DAG.getVTList(NewVT, MVT::Other), + N->getMemoryVT(), dl, Ops, + N->getMemOperand()); + SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, + NewGather.getValue(0), + DAG.getIntPtrConstant(0, dl)); + SDValue RetOps[] = {Exract, NewGather.getValue(1)}; + return DAG.getMergeValues(RetOps, dl); } return Op; } @@ -18572,6 +20255,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::SETCCE: return LowerSETCCE(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); @@ -18592,12 +20276,14 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); - case ISD::CTLZ: return LowerCTLZ(Op, DAG); - case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG); - case ISD::CTTZ: return LowerCTTZ(Op, DAG); + case ISD::CTLZ: return LowerCTLZ(Op, Subtarget, DAG); + case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, Subtarget, DAG); + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, DAG); case ISD::MUL: return LowerMUL(Op, Subtarget, DAG); case ISD::UMUL_LOHI: case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG); + case ISD::ROTL: return LowerRotate(Op, Subtarget, DAG); case ISD::SRA: case ISD::SRL: case ISD::SHL: return LowerShift(Op, Subtarget, DAG); @@ -18615,7 +20301,13 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ADD: return LowerADD(Op, DAG); case ISD::SUB: return LowerSUB(Op, DAG); + case ISD::SMAX: + case ISD::SMIN: + case ISD::UMAX: + case ISD::UMIN: return LowerMINMAX(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); + case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG); + case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG); case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG); case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG); case ISD::GC_TRANSITION_START: @@ -18634,14 +20326,43 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: llvm_unreachable("Do not know how to custom type legalize this operation!"); + case X86ISD::AVG: { + // Legalize types for X86ISD::AVG by expanding vectors. + assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); + + auto InVT = N->getValueType(0); + auto InVTSize = InVT.getSizeInBits(); + const unsigned RegSize = + (InVTSize > 128) ? ((InVTSize > 256) ? 512 : 256) : 128; + assert((!Subtarget->hasAVX512() || RegSize < 512) && + "512-bit vector requires AVX512"); + assert((!Subtarget->hasAVX2() || RegSize < 256) && + "256-bit vector requires AVX2"); + + auto ElemVT = InVT.getVectorElementType(); + auto RegVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, + RegSize / ElemVT.getSizeInBits()); + assert(RegSize % InVT.getSizeInBits() == 0); + unsigned NumConcat = RegSize / InVT.getSizeInBits(); + + SmallVector Ops(NumConcat, DAG.getUNDEF(InVT)); + Ops[0] = N->getOperand(0); + SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops); + Ops[0] = N->getOperand(1); + SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops); + + SDValue Res = DAG.getNode(X86ISD::AVG, dl, RegVT, InVec0, InVec1); + Results.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InVT, Res, + DAG.getIntPtrConstant(0, dl))); + return; + } // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32. case X86ISD::FMINC: case X86ISD::FMIN: case X86ISD::FMAXC: case X86ISD::FMAX: { EVT VT = N->getValueType(0); - if (VT != MVT::v2f32) - llvm_unreachable("Unexpected type (!= v2f32) on FMIN/FMAX."); + assert(VT == MVT::v2f32 && "Unexpected type (!= v2f32) on FMIN/FMAX."); SDValue UNDEF = DAG.getUNDEF(VT); SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, N->getOperand(0), UNDEF); @@ -18668,17 +20389,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::FP_TO_SINT: - // FP_TO_INT*_IN_MEM is not legal for f16 inputs. Do not convert - // (FP_TO_SINT (load f16)) to FP_TO_INT*. - if (N->getOperand(0).getValueType() == MVT::f16) - break; - // fallthrough case ISD::FP_TO_UINT: { bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; - if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType())) - return; - std::pair Vals = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true); SDValue FIST = Vals.first, StackSlot = Vals.second; @@ -18707,6 +20420,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn, DAG.getBitcast(MVT::v2i64, VBias)); Or = DAG.getBitcast(MVT::v2f64, Or); + // TODO: Are there any fast-math-flags to propagate here? SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias); Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub)); return; @@ -18740,6 +20454,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results); } } + case ISD::INTRINSIC_WO_CHAIN: { + if (SDValue V = LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), Subtarget, DAG)) + Results.push_back(V); + return; + } case ISD::READCYCLECOUNTER: { return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget, Results); @@ -18748,7 +20467,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, EVT T = N->getValueType(0); assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair"); bool Regs64bit = T == MVT::i128; - EVT HalfT = Regs64bit ? MVT::i64 : MVT::i32; + MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32; SDValue cpInL, cpInH; cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), DAG.getConstant(0, dl, HalfT)); @@ -18884,6 +20603,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::CMOV: return "X86ISD::CMOV"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; + case X86ISD::IRET: return "X86ISD::IRET"; case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; @@ -18910,6 +20630,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FHADD: return "X86ISD::FHADD"; case X86ISD::FHSUB: return "X86ISD::FHSUB"; case X86ISD::ABS: return "X86ISD::ABS"; + case X86ISD::CONFLICT: return "X86ISD::CONFLICT"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND"; case X86ISD::FMIN: return "X86ISD::FMIN"; @@ -18937,12 +20658,14 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VZEXT: return "X86ISD::VZEXT"; case X86ISD::VSEXT: return "X86ISD::VSEXT"; case X86ISD::VTRUNC: return "X86ISD::VTRUNC"; - case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM"; + case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS"; + case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS"; case X86ISD::VINSERT: return "X86ISD::VINSERT"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD"; case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD"; + case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ"; case X86ISD::VSHL: return "X86ISD::VSHL"; @@ -18978,6 +20701,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::TESTM: return "X86ISD::TESTM"; case X86ISD::TESTNM: return "X86ISD::TESTNM"; case X86ISD::KORTEST: return "X86ISD::KORTEST"; + case X86ISD::KTEST: return "X86ISD::KTEST"; case X86ISD::PACKSS: return "X86ISD::PACKSS"; case X86ISD::PACKUS: return "X86ISD::PACKUS"; case X86ISD::PALIGNR: return "X86ISD::PALIGNR"; @@ -19000,6 +20724,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::UNPCKL: return "X86ISD::UNPCKL"; case X86ISD::UNPCKH: return "X86ISD::UNPCKH"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; + case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM"; case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST"; case X86ISD::VEXTRACT: return "X86ISD::VEXTRACT"; case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV"; @@ -19009,11 +20734,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VPERMV3: return "X86ISD::VPERMV3"; case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3"; case X86ISD::VPERMI: return "X86ISD::VPERMI"; + case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG"; case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM"; case X86ISD::VRANGE: return "X86ISD::VRANGE"; case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ"; case X86ISD::PMULDQ: return "X86ISD::PMULDQ"; case X86ISD::PSADBW: return "X86ISD::PSADBW"; + case X86ISD::DBPSADBW: return "X86ISD::DBPSADBW"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; @@ -19022,10 +20749,17 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::SFENCE: return "X86ISD::SFENCE"; case X86ISD::LFENCE: return "X86ISD::LFENCE"; case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA"; - case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL"; case X86ISD::SAHF: return "X86ISD::SAHF"; case X86ISD::RDRAND: return "X86ISD::RDRAND"; case X86ISD::RDSEED: return "X86ISD::RDSEED"; + case X86ISD::VPMADDUBSW: return "X86ISD::VPMADDUBSW"; + case X86ISD::VPMADDWD: return "X86ISD::VPMADDWD"; + case X86ISD::VPROT: return "X86ISD::VPROT"; + case X86ISD::VPROTI: return "X86ISD::VPROTI"; + case X86ISD::VPSHA: return "X86ISD::VPSHA"; + case X86ISD::VPSHL: return "X86ISD::VPSHL"; + case X86ISD::VPCOM: return "X86ISD::VPCOM"; + case X86ISD::VPCOMU: return "X86ISD::VPCOMU"; case X86ISD::FMADD: return "X86ISD::FMADD"; case X86ISD::FMSUB: return "X86ISD::FMSUB"; case X86ISD::FNMADD: return "X86ISD::FNMADD"; @@ -19038,7 +20772,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND"; case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND"; case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND"; - case X86ISD::RNDSCALE: return "X86ISD::RNDSCALE"; + case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE"; + case X86ISD::VREDUCE: return "X86ISD::VREDUCE"; + case X86ISD::VGETMANT: return "X86ISD::VGETMANT"; case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI"; case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI"; case X86ISD::XTEST: return "X86ISD::XTEST"; @@ -19064,6 +20800,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND"; case X86ISD::FP_TO_SINT_RND: return "X86ISD::FP_TO_SINT_RND"; case X86ISD::FP_TO_UINT_RND: return "X86ISD::FP_TO_UINT_RND"; + case X86ISD::VFPCLASS: return "X86ISD::VFPCLASS"; + case X86ISD::VFPCLASSS: return "X86ISD::VFPCLASSS"; } return nullptr; } @@ -19218,7 +20956,7 @@ bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; } bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { - if (!(Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512())) + if (!Subtarget->hasAnyFMA()) return false; VT = VT.getScalarType(); @@ -19253,11 +20991,11 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, return false; // Not for i1 vectors - if (VT.getScalarType() == MVT::i1) + if (VT.getSimpleVT().getScalarType() == MVT::i1) return false; // Very little shuffling can be done for 64-bit vectors right now. - if (VT.getSizeInBits() == 64) + if (VT.getSimpleVT().getSizeInBits() == 64) return false; // We only care that the types being shuffled are legal. The lowering can @@ -19282,8 +21020,7 @@ static MachineBasicBlock *EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB, DebugLoc DL = MI->getDebugLoc(); const BasicBlock *BB = MBB->getBasicBlock(); - MachineFunction::iterator I = MBB; - ++I; + MachineFunction::iterator I = ++MBB->getIterator(); // For the v = xbegin(), we generate // @@ -19531,8 +21268,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr *MI, offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB); endMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineFunction::iterator MBBIter = MBB; - ++MBBIter; + MachineFunction::iterator MBBIter = ++MBB->getIterator(); // Insert the new basic blocks MF->insert(MBBIter, offsetMBB); @@ -19702,8 +21438,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( // stores were performed. const BasicBlock *LLVM_BB = MBB->getBasicBlock(); MachineFunction *F = MBB->getParent(); - MachineFunction::iterator MBBIter = MBB; - ++MBBIter; + MachineFunction::iterator MBBIter = ++MBB->getIterator(); MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(MBBIter, XMMSaveMBB); @@ -19727,7 +21462,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( int64_t RegSaveFrameIndex = MI->getOperand(1).getImm(); int64_t VarArgsFPOffset = MI->getOperand(2).getImm(); - if (!Subtarget->isTargetWin64()) { + if (!Subtarget->isCallingConvWin64(F->getFunction()->getCallingConv())) { // If %al is 0, branch around the XMM save block. BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB); @@ -19744,9 +21479,8 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( // In the XMM save block, save all the XMM argument registers. for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) { int64_t Offset = (i - 3) * 16 + VarArgsFPOffset; - MachineMemOperand *MMO = - F->getMachineMemOperand( - MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset), + MachineMemOperand *MMO = F->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*F, RegSaveFrameIndex, Offset), MachineMemOperand::MOStore, /*Size=*/16, /*Align=*/16); BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc)) @@ -19800,6 +21534,39 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr, return true; } +// Return true if it is OK for this CMOV pseudo-opcode to be cascaded +// together with other CMOV pseudo-opcodes into a single basic-block with +// conditional jump around it. +static bool isCMOVPseudo(MachineInstr *MI) { + switch (MI->getOpcode()) { + case X86::CMOV_FR32: + case X86::CMOV_FR64: + case X86::CMOV_GR8: + case X86::CMOV_GR16: + case X86::CMOV_GR32: + case X86::CMOV_RFP32: + case X86::CMOV_RFP64: + case X86::CMOV_RFP80: + case X86::CMOV_V2F64: + case X86::CMOV_V2I64: + case X86::CMOV_V4F32: + case X86::CMOV_V4F64: + case X86::CMOV_V4I64: + case X86::CMOV_V16F32: + case X86::CMOV_V8F32: + case X86::CMOV_V8F64: + case X86::CMOV_V8I64: + case X86::CMOV_V8I1: + case X86::CMOV_V16I1: + case X86::CMOV_V32I1: + case X86::CMOV_V64I1: + return true; + + default: + return false; + } +} + MachineBasicBlock * X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -19811,8 +21578,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // destination vreg to set, the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); // thisMBB: // ... @@ -19823,8 +21589,41 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, MachineBasicBlock *thisMBB = BB; MachineFunction *F = BB->getParent(); - // We also lower double CMOVs: + // This code lowers all pseudo-CMOV instructions. Generally it lowers these + // as described above, by inserting a BB, and then making a PHI at the join + // point to select the true and false operands of the CMOV in the PHI. + // + // The code also handles two different cases of multiple CMOV opcodes + // in a row. + // + // Case 1: + // In this case, there are multiple CMOVs in a row, all which are based on + // the same condition setting (or the exact opposite condition setting). + // In this case we can lower all the CMOVs using a single inserted BB, and + // then make a number of PHIs at the join point to model the CMOVs. The only + // trickiness here, is that in a case like: + // + // t2 = CMOV cond1 t1, f1 + // t3 = CMOV cond1 t2, f2 + // + // when rewriting this into PHIs, we have to perform some renaming on the + // temps since you cannot have a PHI operand refer to a PHI result earlier + // in the same block. The "simple" but wrong lowering would be: + // + // t2 = PHI t1(BB1), f1(BB2) + // t3 = PHI t2(BB1), f2(BB2) + // + // but clearly t2 is not defined in BB1, so that is incorrect. The proper + // renaming is to note that on the path through BB1, t2 is really just a + // copy of t1, and do that renaming, properly generating: + // + // t2 = PHI t1(BB1), f1(BB2) + // t3 = PHI t1(BB1), f2(BB2) + // + // Case 2, we lower cascaded CMOVs such as + // // (CMOV (CMOV F, T, cc1), T, cc2) + // // to two successives branches. For that, we look for another CMOV as the // following instruction. // @@ -19890,19 +21689,42 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // .LBB5_4: // retq // - MachineInstr *NextCMOV = nullptr; + MachineInstr *CascadedCMOV = nullptr; + MachineInstr *LastCMOV = MI; + X86::CondCode CC = X86::CondCode(MI->getOperand(3).getImm()); + X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); MachineBasicBlock::iterator NextMIIt = std::next(MachineBasicBlock::iterator(MI)); - if (NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() && + + // Check for case 1, where there are multiple CMOVs with the same condition + // first. Of the two cases of multiple CMOV lowerings, case 1 reduces the + // number of jumps the most. + + if (isCMOVPseudo(MI)) { + // See if we have a string of CMOVS with the same condition. + while (NextMIIt != BB->end() && + isCMOVPseudo(NextMIIt) && + (NextMIIt->getOperand(3).getImm() == CC || + NextMIIt->getOperand(3).getImm() == OppCC)) { + LastCMOV = &*NextMIIt; + ++NextMIIt; + } + } + + // This checks for case 2, but only do this if we didn't already find + // case 1, as indicated by LastCMOV == MI. + if (LastCMOV == MI && + NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() && NextMIIt->getOperand(2).getReg() == MI->getOperand(2).getReg() && - NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg()) - NextCMOV = &*NextMIIt; + NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg()) { + CascadedCMOV = &*NextMIIt; + } MachineBasicBlock *jcc1MBB = nullptr; - // If we have a double CMOV, we lower it to two successive branches to + // If we have a cascaded CMOV, we lower it to two successive branches to // the same block. EFLAGS is used by both, so mark it as live in the second. - if (NextCMOV) { + if (CascadedCMOV) { jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, jcc1MBB); jcc1MBB->addLiveIn(X86::EFLAGS); @@ -19917,7 +21739,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // live into the sink and copy blocks. const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); - MachineInstr *LastEFLAGSUser = NextCMOV ? NextCMOV : MI; + MachineInstr *LastEFLAGSUser = CascadedCMOV ? CascadedCMOV : LastCMOV; if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) && !checkAndUpdateEFLAGSKill(LastEFLAGSUser, BB, TRI)) { copy0MBB->addLiveIn(X86::EFLAGS); @@ -19926,12 +21748,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); + std::next(MachineBasicBlock::iterator(LastCMOV)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Add the true and fallthrough blocks as its successors. - if (NextCMOV) { - // The fallthrough block may be jcc1MBB, if we have a double CMOV. + if (CascadedCMOV) { + // The fallthrough block may be jcc1MBB, if we have a cascaded CMOV. BB->addSuccessor(jcc1MBB); // In that case, jcc1MBB will itself fallthrough the copy0MBB, and @@ -19946,13 +21768,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, BB->addSuccessor(sinkMBB); // Create the conditional branch instruction. - unsigned Opc = - X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); + unsigned Opc = X86::GetCondBranchFromCond(CC); BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); - if (NextCMOV) { + if (CascadedCMOV) { unsigned Opc2 = X86::GetCondBranchFromCond( - (X86::CondCode)NextCMOV->getOperand(3).getImm()); + (X86::CondCode)CascadedCMOV->getOperand(3).getImm()); BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB); } @@ -19964,27 +21785,109 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... - MachineInstrBuilder MIB = - BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), - MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) - .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); + MachineBasicBlock::iterator MIItEnd = + std::next(MachineBasicBlock::iterator(LastCMOV)); + MachineBasicBlock::iterator SinkInsertionPoint = sinkMBB->begin(); + DenseMap> RegRewriteTable; + MachineInstrBuilder MIB; - // If we have a double CMOV, the second Jcc provides the same incoming + // As we are creating the PHIs, we have to be careful if there is more than + // one. Later CMOVs may reference the results of earlier CMOVs, but later + // PHIs have to reference the individual true/false inputs from earlier PHIs. + // That also means that PHI construction must work forward from earlier to + // later, and that the code must maintain a mapping from earlier PHI's + // destination registers, and the registers that went into the PHI. + + for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { + unsigned DestReg = MIIt->getOperand(0).getReg(); + unsigned Op1Reg = MIIt->getOperand(1).getReg(); + unsigned Op2Reg = MIIt->getOperand(2).getReg(); + + // If this CMOV we are generating is the opposite condition from + // the jump we generated, then we have to swap the operands for the + // PHI that is going to be generated. + if (MIIt->getOperand(3).getImm() == OppCC) + std::swap(Op1Reg, Op2Reg); + + if (RegRewriteTable.find(Op1Reg) != RegRewriteTable.end()) + Op1Reg = RegRewriteTable[Op1Reg].first; + + if (RegRewriteTable.find(Op2Reg) != RegRewriteTable.end()) + Op2Reg = RegRewriteTable[Op2Reg].second; + + MIB = BuildMI(*sinkMBB, SinkInsertionPoint, DL, + TII->get(X86::PHI), DestReg) + .addReg(Op1Reg).addMBB(copy0MBB) + .addReg(Op2Reg).addMBB(thisMBB); + + // Add this PHI to the rewrite table. + RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg); + } + + // If we have a cascaded CMOV, the second Jcc provides the same incoming // value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes). - if (NextCMOV) { + if (CascadedCMOV) { MIB.addReg(MI->getOperand(2).getReg()).addMBB(jcc1MBB); // Copy the PHI result to the register defined by the second CMOV. BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())), - DL, TII->get(TargetOpcode::COPY), NextCMOV->getOperand(0).getReg()) + DL, TII->get(TargetOpcode::COPY), + CascadedCMOV->getOperand(0).getReg()) .addReg(MI->getOperand(0).getReg()); - NextCMOV->eraseFromParent(); + CascadedCMOV->eraseFromParent(); } - MI->eraseFromParent(); // The pseudo instruction is gone now. + // Now remove the CMOV(s). + for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ) + (MIIt++)->eraseFromParent(); + return sinkMBB; } +MachineBasicBlock * +X86TargetLowering::EmitLoweredAtomicFP(MachineInstr *MI, + MachineBasicBlock *BB) const { + // Combine the following atomic floating-point modification pattern: + // a.store(reg OP a.load(acquire), release) + // Transform them into: + // OPss (%gpr), %xmm + // movss %xmm, (%gpr) + // Or sd equivalent for 64-bit operations. + unsigned MOp, FOp; + switch (MI->getOpcode()) { + default: llvm_unreachable("unexpected instr type for EmitLoweredAtomicFP"); + case X86::RELEASE_FADD32mr: MOp = X86::MOVSSmr; FOp = X86::ADDSSrm; break; + case X86::RELEASE_FADD64mr: MOp = X86::MOVSDmr; FOp = X86::ADDSDrm; break; + } + const X86InstrInfo *TII = Subtarget->getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + MachineOperand MSrc = MI->getOperand(0); + unsigned VSrc = MI->getOperand(5).getReg(); + const MachineOperand &Disp = MI->getOperand(3); + MachineOperand ZeroDisp = MachineOperand::CreateImm(0); + bool hasDisp = Disp.isGlobal() || Disp.isImm(); + if (hasDisp && MSrc.isReg()) + MSrc.setIsKill(false); + MachineInstrBuilder MIM = BuildMI(*BB, MI, DL, TII->get(MOp)) + .addOperand(/*Base=*/MSrc) + .addImm(/*Scale=*/1) + .addReg(/*Index=*/0) + .addDisp(hasDisp ? Disp : ZeroDisp, /*off=*/0) + .addReg(0); + MachineInstr *MIO = BuildMI(*BB, (MachineInstr *)MIM, DL, TII->get(FOp), + MRI.createVirtualRegister(MRI.getRegClass(VSrc))) + .addReg(VSrc) + .addOperand(/*Base=*/MSrc) + .addImm(/*Scale=*/1) + .addReg(/*Index=*/0) + .addDisp(hasDisp ? Disp : ZeroDisp, /*off=*/0) + .addReg(/*Segment=*/0); + MIM.addReg(MIO->getOperand(0).getReg(), RegState::Kill); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + MachineBasicBlock * X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -20032,8 +21935,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, sizeVReg = MI->getOperand(1).getReg(), physSPReg = IsLP64 || Subtarget->isTargetNaCl64() ? X86::RSP : X86::ESP; - MachineFunction::iterator MBBIter = BB; - ++MBBIter; + MachineFunction::iterator MBBIter = ++BB->getIterator(); MF->insert(MBBIter, bumpMBB); MF->insert(MBBIter, mallocMBB); @@ -20120,14 +22022,60 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock * X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, MachineBasicBlock *BB) const { + assert(!Subtarget->isTargetMachO()); + DebugLoc DL = MI->getDebugLoc(); + MachineInstr *ResumeMI = Subtarget->getFrameLowering()->emitStackProbe( + *BB->getParent(), *BB, MI, DL, false); + MachineBasicBlock *ResumeBB = ResumeMI->getParent(); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return ResumeBB; +} + +MachineBasicBlock * +X86TargetLowering::EmitLoweredCatchRet(MachineInstr *MI, + MachineBasicBlock *BB) const { + MachineFunction *MF = BB->getParent(); + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); + MachineBasicBlock *TargetMBB = MI->getOperand(0).getMBB(); DebugLoc DL = MI->getDebugLoc(); - assert(!Subtarget->isTargetMachO()); + assert(!isAsynchronousEHPersonality( + classifyEHPersonality(MF->getFunction()->getPersonalityFn())) && + "SEH does not use catchret!"); - Subtarget->getFrameLowering()->emitStackProbeCall(*BB->getParent(), *BB, MI, - DL); + // Only 32-bit EH needs to worry about manually restoring stack pointers. + if (!Subtarget->is32Bit()) + return BB; - MI->eraseFromParent(); // The pseudo instruction is gone now. + // C++ EH creates a new target block to hold the restore code, and wires up + // the new block to the return destination with a normal JMP_4. + MachineBasicBlock *RestoreMBB = + MF->CreateMachineBasicBlock(BB->getBasicBlock()); + assert(BB->succ_size() == 1); + MF->insert(std::next(BB->getIterator()), RestoreMBB); + RestoreMBB->transferSuccessorsAndUpdatePHIs(BB); + BB->addSuccessor(RestoreMBB); + MI->getOperand(0).setMBB(RestoreMBB); + + auto RestoreMBBI = RestoreMBB->begin(); + BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::EH_RESTORE)); + BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB); + return BB; +} + +MachineBasicBlock * +X86TargetLowering::EmitLoweredCatchPad(MachineInstr *MI, + MachineBasicBlock *BB) const { + MachineFunction *MF = BB->getParent(); + const Constant *PerFn = MF->getFunction()->getPersonalityFn(); + bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(PerFn)); + // Only 32-bit SEH requires special handling for catchpad. + if (IsSEH && Subtarget->is32Bit()) { + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + BuildMI(*BB, MI, DL, TII.get(X86::EH_RESTORE)); + } + MI->eraseFromParent(); return BB; } @@ -20149,6 +22097,8 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, // FIXME: The 32-bit calls have non-standard calling conventions. Use a // proper register mask. const uint32_t *RegMask = + Subtarget->is64Bit() ? + Subtarget->getRegisterInfo()->getDarwinTLSCallPreservedMask() : Subtarget->getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C); if (Subtarget->is64Bit()) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, @@ -20198,8 +22148,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, MachineRegisterInfo &MRI = MF->getRegInfo(); const BasicBlock *BB = MBB->getBasicBlock(); - MachineFunction::iterator I = MBB; - ++I; + MachineFunction::iterator I = ++MBB->getIterator(); // Memory Reference MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); @@ -20225,7 +22174,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, // For v = setjmp(buf), we generate // // thisMBB: - // buf[LabelOffset] = restoreMBB + // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB // SjLjSetup restoreMBB // // mainMBB: @@ -20245,6 +22194,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, MF->insert(I, mainMBB); MF->insert(I, sinkMBB); MF->push_back(restoreMBB); + restoreMBB->setHasAddressTaken(); MachineInstrBuilder MIB; @@ -20511,35 +22461,44 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return BB; case X86::WIN_ALLOCA: return EmitLoweredWinAlloca(MI, BB); + case X86::CATCHRET: + return EmitLoweredCatchRet(MI, BB); + case X86::CATCHPAD: + return EmitLoweredCatchPad(MI, BB); case X86::SEG_ALLOCA_32: case X86::SEG_ALLOCA_64: return EmitLoweredSegAlloca(MI, BB); case X86::TLSCall_32: case X86::TLSCall_64: return EmitLoweredTLSCall(MI, BB); - case X86::CMOV_GR8: case X86::CMOV_FR32: case X86::CMOV_FR64: - case X86::CMOV_V4F32: - case X86::CMOV_V2F64: - case X86::CMOV_V2I64: - case X86::CMOV_V8F32: - case X86::CMOV_V4F64: - case X86::CMOV_V4I64: - case X86::CMOV_V16F32: - case X86::CMOV_V8F64: - case X86::CMOV_V8I64: + case X86::CMOV_FR128: + case X86::CMOV_GR8: case X86::CMOV_GR16: case X86::CMOV_GR32: case X86::CMOV_RFP32: case X86::CMOV_RFP64: case X86::CMOV_RFP80: + case X86::CMOV_V2F64: + case X86::CMOV_V2I64: + case X86::CMOV_V4F32: + case X86::CMOV_V4F64: + case X86::CMOV_V4I64: + case X86::CMOV_V16F32: + case X86::CMOV_V8F32: + case X86::CMOV_V8F64: + case X86::CMOV_V8I64: case X86::CMOV_V8I1: case X86::CMOV_V16I1: case X86::CMOV_V32I1: case X86::CMOV_V64I1: return EmitLoweredSelect(MI, BB); + case X86::RELEASE_FADD32mr: + case X86::RELEASE_FADD64mr: + return EmitLoweredAtomicFP(MI, BB); + case X86::FP32_TO_INT16_IN_MEM: case X86::FP32_TO_INT32_IN_MEM: case X86::FP32_TO_INT64_IN_MEM: @@ -20793,7 +22752,7 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( unsigned Depth) const { // SETCC_CARRY sets the dest to ~0 for true or 0 for false. if (Op.getOpcode() == X86ISD::SETCC_CARRY) - return Op.getValueType().getScalarType().getSizeInBits(); + return Op.getValueType().getScalarSizeInBits(); // Fallback case. return 1; @@ -20814,39 +22773,8 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, return TargetLowering::isGAPlusOffset(N, GA, Offset); } -/// isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the -/// same as extracting the high 128-bit part of 256-bit vector and then -/// inserting the result into the low part of a new 256-bit vector -static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) { - EVT VT = SVOp->getValueType(0); - unsigned NumElems = VT.getVectorNumElements(); - - // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u> - for (unsigned i = 0, j = NumElems/2; i != NumElems/2; ++i, ++j) - if (!isUndefOrEqual(SVOp->getMaskElt(i), j) || - SVOp->getMaskElt(j) >= 0) - return false; - - return true; -} - -/// isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the -/// same as extracting the low 128-bit part of 256-bit vector and then -/// inserting the result into the high part of a new 256-bit vector -static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) { - EVT VT = SVOp->getValueType(0); - unsigned NumElems = VT.getVectorNumElements(); - - // vector_shuffle or - for (unsigned i = NumElems/2, j = 0; i != NumElems; ++i, ++j) - if (!isUndefOrEqual(SVOp->getMaskElt(i), j) || - SVOp->getMaskElt(j) >= 0) - return false; - - return true; -} - /// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors. +/// FIXME: This could be expanded to support 512 bit vectors as well. static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget* Subtarget) { @@ -20854,7 +22782,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, ShuffleVectorSDNode *SVOp = cast(N); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); - EVT VT = SVOp->getValueType(0); + MVT VT = SVOp->getSimpleValueType(0); unsigned NumElems = VT.getVectorNumElements(); if (V1.getOpcode() == ISD::CONCAT_VECTORS && @@ -20920,24 +22848,6 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, return DCI.CombineTo(N, InsV); } - //===--------------------------------------------------------------------===// - // Combine some shuffles into subvector extracts and inserts: - // - - // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u> - if (isShuffleHigh128VectorInsertLow(SVOp)) { - SDValue V = Extract128BitVector(V1, NumElems/2, DAG, dl); - SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, 0, DAG, dl); - return DCI.CombineTo(N, InsV); - } - - // vector_shuffle or - if (isShuffleLow128VectorInsertHigh(SVOp)) { - SDValue V = Extract128BitVector(V1, 0, DAG, dl); - SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, NumElems/2, DAG, dl); - return DCI.CombineTo(N, InsV); - } - return SDValue(); } @@ -20966,10 +22876,22 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef Mask, MVT RootVT = Root.getSimpleValueType(); SDLoc DL(Root); - // Just remove no-op shuffle masks. if (Mask.size() == 1) { - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input), - /*AddTo*/ true); + int Index = Mask[0]; + assert((Index >= 0 || Index == SM_SentinelUndef || + Index == SM_SentinelZero) && + "Invalid shuffle index found!"); + + // We may end up with an accumulated mask of size 1 as a result of + // widening of shuffle operands (see function canWidenShuffleElements). + // If the only shuffle index is equal to SM_SentinelZero then propagate + // a zero vector. Otherwise, the combine shuffle mask is a no-op shuffle + // mask, and therefore the entire chain of shuffles can be folded away. + if (Index == SM_SentinelZero) + DCI.CombineTo(Root.getNode(), getZeroVector(RootVT, Subtarget, DAG, DL)); + else + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input), + /*AddTo*/ true); return true; } @@ -20985,7 +22907,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef Mask, // doesn't preclude something switching to the shorter encoding post-RA. // // FIXME: Should teach these routines about AVX vector widths. - if (FloatDomain && VT.getSizeInBits() == 128) { + if (FloatDomain && VT.is128BitVector()) { if (Mask.equals({0, 0}) || Mask.equals({1, 1})) { bool Lo = Mask.equals({0, 0}); unsigned Shuffle; @@ -21049,7 +22971,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef Mask, // We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK // variants as none of these have single-instruction variants that are // superior to the UNPCK formulation. - if (!FloatDomain && VT.getSizeInBits() == 128 && + if (!FloatDomain && VT.is128BitVector() && (Mask.equals({0, 0, 1, 1, 2, 2, 3, 3}) || Mask.equals({4, 4, 5, 5, 6, 6, 7, 7}) || Mask.equals({0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}) || @@ -21226,26 +23148,28 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, // See if we can recurse into the operand to combine more things. switch (Op.getOpcode()) { - case X86ISD::PSHUFB: - HasPSHUFB = true; - case X86ISD::PSHUFD: - case X86ISD::PSHUFHW: - case X86ISD::PSHUFLW: - if (Op.getOperand(0).hasOneUse() && - combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1, - HasPSHUFB, DAG, DCI, Subtarget)) - return true; - break; + case X86ISD::PSHUFB: + HasPSHUFB = true; + case X86ISD::PSHUFD: + case X86ISD::PSHUFHW: + case X86ISD::PSHUFLW: + if (Op.getOperand(0).hasOneUse() && + combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1, + HasPSHUFB, DAG, DCI, Subtarget)) + return true; + break; - case X86ISD::UNPCKL: - case X86ISD::UNPCKH: - assert(Op.getOperand(0) == Op.getOperand(1) && "We only combine unary shuffles!"); - // We can't check for single use, we have to check that this shuffle is the only user. - if (Op->isOnlyUserOf(Op.getOperand(0).getNode()) && - combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1, - HasPSHUFB, DAG, DCI, Subtarget)) - return true; - break; + case X86ISD::UNPCKL: + case X86ISD::UNPCKH: + assert(Op.getOperand(0) == Op.getOperand(1) && + "We only combine unary shuffles!"); + // We can't check for single use, we have to check that this shuffle is the + // only user. + if (Op->isOnlyUserOf(Op.getOperand(0).getNode()) && + combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1, + HasPSHUFB, DAG, DCI, Subtarget)) + return true; + break; } // Minor canonicalization of the accumulated shuffle mask to make it easier @@ -21360,8 +23284,8 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, case X86ISD::UNPCKH: // For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword // shuffle into a preceding word shuffle. - if (V.getSimpleValueType().getScalarType() != MVT::i8 && - V.getSimpleValueType().getScalarType() != MVT::i16) + if (V.getSimpleValueType().getVectorElementType() != MVT::i8 && + V.getSimpleValueType().getVectorElementType() != MVT::i16) return SDValue(); // Search for a half-shuffle which we can combine with. @@ -21438,7 +23362,8 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, return V; } -/// \brief Search for a combinable shuffle across a chain ending in pshuflw or pshufhw. +/// \brief Search for a combinable shuffle across a chain ending in pshuflw or +/// pshufhw. /// /// We walk up the chain, skipping shuffles of the other half and looking /// through shuffles which switch halves trying to find a shuffle of the same @@ -21520,6 +23445,41 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, Mask = getPSHUFShuffleMask(N); assert(Mask.size() == 4); break; + case X86ISD::UNPCKL: { + // Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in + // which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE + // moves upper half elements into the lower half part. For example: + // + // t2: v16i8 = vector_shuffle<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> t1, + // undef:v16i8 + // t3: v16i8 = X86ISD::UNPCKL undef:v16i8, t2 + // + // will be combined to: + // + // t3: v16i8 = X86ISD::UNPCKH undef:v16i8, t1 + + // This is only for 128-bit vectors. From SSE4.1 onward this combine may not + // happen due to advanced instructions. + if (!VT.is128BitVector()) + return SDValue(); + + auto Op0 = N.getOperand(0); + auto Op1 = N.getOperand(1); + if (Op0.getOpcode() == ISD::UNDEF && + Op1.getNode()->getOpcode() == ISD::VECTOR_SHUFFLE) { + ArrayRef Mask = cast(Op1.getNode())->getMask(); + + unsigned NumElts = VT.getVectorNumElements(); + SmallVector ExpectedMask(NumElts, -1); + std::iota(ExpectedMask.begin(), ExpectedMask.begin() + NumElts / 2, + NumElts / 2); + + auto ShufOp = Op1.getOperand(0); + if (isShuffleEquivalent(Op1, ShufOp, Mask, ExpectedMask)) + return DAG.getNode(X86ISD::UNPCKH, DL, VT, N.getOperand(0), ShufOp); + } + return SDValue(); + } default: return SDValue(); } @@ -21535,7 +23495,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, break; case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: - assert(VT.getScalarType() == MVT::i16 && "Bad word shuffle type!"); + assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!"); if (combineRedundantHalfShuffle(N, Mask, DAG, DCI)) return SDValue(); // We combined away this shuffle, so we're done. @@ -21624,14 +23584,19 @@ static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) { return SDValue(); auto *SVN = cast(N); - ArrayRef Mask = SVN->getMask(); + SmallVector Mask; + for (int M : SVN->getMask()) + Mask.push_back(M); + SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); - // We require the first shuffle operand to be the SUB node, and the second to - // be the ADD node. - // FIXME: We should support the commuted patterns. - if (V1->getOpcode() != ISD::FSUB || V2->getOpcode() != ISD::FADD) + // We require the first shuffle operand to be the FSUB node, and the second to + // be the FADD node. + if (V1.getOpcode() == ISD::FADD && V2.getOpcode() == ISD::FSUB) { + ShuffleVectorSDNode::commuteMask(Mask); + std::swap(V1, V2); + } else if (V1.getOpcode() != ISD::FSUB || V2.getOpcode() != ISD::FADD) return SDValue(); // If there are other uses of these operations we can't fold them. @@ -21682,7 +23647,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, return AddSub; // Combine 256-bit vector shuffles. This is only profitable when in AVX mode - if (Subtarget->hasFp256() && VT.is256BitVector() && + if (TLI.isTypeLegal(VT) && Subtarget->hasFp256() && VT.is256BitVector() && N->getOpcode() == ISD::VECTOR_SHUFFLE) return PerformShuffleCombine256(N, DAG, DCI, Subtarget); @@ -21866,21 +23831,45 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, EltNo); } -/// \brief Detect bitcasts between i32 to x86mmx low word. Since MMX types are -/// special and don't usually play with other vector types, it's better to -/// handle them early to be sure we emit efficient code by avoiding -/// store-load conversions. -static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) { - if (N->getValueType(0) != MVT::x86mmx || - N->getOperand(0)->getOpcode() != ISD::BUILD_VECTOR || - N->getOperand(0)->getValueType(0) != MVT::v2i32) - return SDValue(); +static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); - SDValue V = N->getOperand(0); - ConstantSDNode *C = dyn_cast(V.getOperand(1)); - if (C && C->getZExtValue() == 0 && V.getOperand(0).getValueType() == MVT::i32) - return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(V.getOperand(0)), - N->getValueType(0), V.getOperand(0)); + // Detect bitcasts between i32 to x86mmx low word. Since MMX types are + // special and don't usually play with other vector types, it's better to + // handle them early to be sure we emit efficient code by avoiding + // store-load conversions. + if (VT == MVT::x86mmx && N0.getOpcode() == ISD::BUILD_VECTOR && + N0.getValueType() == MVT::v2i32 && + isNullConstant(N0.getOperand(1))) { + SDValue N00 = N0->getOperand(0); + if (N00.getValueType() == MVT::i32) + return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00); + } + + // Convert a bitcasted integer logic operation that has one bitcasted + // floating-point operand and one constant operand into a floating-point + // logic operation. This may create a load of the constant, but that is + // cheaper than materializing the constant in an integer register and + // transferring it to an SSE register or transferring the SSE operand to + // integer register and back. + unsigned FPOpcode; + switch (N0.getOpcode()) { + case ISD::AND: FPOpcode = X86ISD::FAND; break; + case ISD::OR: FPOpcode = X86ISD::FOR; break; + case ISD::XOR: FPOpcode = X86ISD::FXOR; break; + default: return SDValue(); + } + if (((Subtarget->hasSSE1() && VT == MVT::f32) || + (Subtarget->hasSSE2() && VT == MVT::f64)) && + isa(N0.getOperand(1)) && + N0.getOperand(0).getOpcode() == ISD::BITCAST && + N0.getOperand(0).getOperand(0).getValueType() == VT) { + SDValue N000 = N0.getOperand(0).getOperand(0); + SDValue FPConst = DAG.getBitcast(VT, N0.getOperand(1)); + return DAG.getNode(FPOpcode, SDLoc(N0), VT, N000, FPConst); + } return SDValue(); } @@ -21910,26 +23899,26 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, InputVector.getNode()->getOperand(0)); // The mmx is indirect: (i64 extract_elt (v1i64 bitcast (x86mmx ...))). - SDValue MMXSrcOp = MMXSrc.getOperand(0); if (MMXSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT && MMXSrc.hasOneUse() && - MMXSrc.getValueType() == MVT::i64 && MMXSrcOp.hasOneUse() && - MMXSrcOp.getOpcode() == ISD::BITCAST && - MMXSrcOp.getValueType() == MVT::v1i64 && - MMXSrcOp.getOperand(0).getValueType() == MVT::x86mmx) - return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector), - N->getValueType(0), - MMXSrcOp.getOperand(0)); + MMXSrc.getValueType() == MVT::i64) { + SDValue MMXSrcOp = MMXSrc.getOperand(0); + if (MMXSrcOp.hasOneUse() && MMXSrcOp.getOpcode() == ISD::BITCAST && + MMXSrcOp.getValueType() == MVT::v1i64 && + MMXSrcOp.getOperand(0).getValueType() == MVT::x86mmx) + return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector), + N->getValueType(0), MMXSrcOp.getOperand(0)); + } } EVT VT = N->getValueType(0); - if (VT == MVT::i1 && dyn_cast(N->getOperand(1)) && + if (VT == MVT::i1 && isa(N->getOperand(1)) && InputVector.getOpcode() == ISD::BITCAST && - dyn_cast(InputVector.getOperand(0))) { + isa(InputVector.getOperand(0))) { uint64_t ExtractedElt = - cast(N->getOperand(1))->getZExtValue(); + cast(N->getOperand(1))->getZExtValue(); uint64_t InputValue = - cast(InputVector.getOperand(0))->getZExtValue(); + cast(InputVector.getOperand(0))->getZExtValue(); uint64_t Res = (InputValue >> ExtractedElt) & 1; return DAG.getConstant(Res, dl, MVT::i1); } @@ -22036,96 +24025,6 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match. -static std::pair -matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS, - SelectionDAG &DAG, const X86Subtarget *Subtarget) { - if (!VT.isVector()) - return std::make_pair(0, false); - - bool NeedSplit = false; - switch (VT.getSimpleVT().SimpleTy) { - default: return std::make_pair(0, false); - case MVT::v4i64: - case MVT::v2i64: - if (!Subtarget->hasVLX()) - return std::make_pair(0, false); - break; - case MVT::v64i8: - case MVT::v32i16: - if (!Subtarget->hasBWI()) - return std::make_pair(0, false); - break; - case MVT::v16i32: - case MVT::v8i64: - if (!Subtarget->hasAVX512()) - return std::make_pair(0, false); - break; - case MVT::v32i8: - case MVT::v16i16: - case MVT::v8i32: - if (!Subtarget->hasAVX2()) - NeedSplit = true; - if (!Subtarget->hasAVX()) - return std::make_pair(0, false); - break; - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - if (!Subtarget->hasSSE2()) - return std::make_pair(0, false); - } - - // SSE2 has only a small subset of the operations. - bool hasUnsigned = Subtarget->hasSSE41() || - (Subtarget->hasSSE2() && VT == MVT::v16i8); - bool hasSigned = Subtarget->hasSSE41() || - (Subtarget->hasSSE2() && VT == MVT::v8i16); - - ISD::CondCode CC = cast(Cond.getOperand(2))->get(); - - unsigned Opc = 0; - // Check for x CC y ? x : y. - if (DAG.isEqualTo(LHS, Cond.getOperand(0)) && - DAG.isEqualTo(RHS, Cond.getOperand(1))) { - switch (CC) { - default: break; - case ISD::SETULT: - case ISD::SETULE: - Opc = hasUnsigned ? ISD::UMIN : 0; break; - case ISD::SETUGT: - case ISD::SETUGE: - Opc = hasUnsigned ? ISD::UMAX : 0; break; - case ISD::SETLT: - case ISD::SETLE: - Opc = hasSigned ? ISD::SMIN : 0; break; - case ISD::SETGT: - case ISD::SETGE: - Opc = hasSigned ? ISD::SMAX : 0; break; - } - // Check for x CC y ? y : x -- a min/max with reversed arms. - } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && - DAG.isEqualTo(RHS, Cond.getOperand(0))) { - switch (CC) { - default: break; - case ISD::SETULT: - case ISD::SETULE: - Opc = hasUnsigned ? ISD::UMAX : 0; break; - case ISD::SETUGT: - case ISD::SETUGE: - Opc = hasUnsigned ? ISD::UMIN : 0; break; - case ISD::SETLT: - case ISD::SETLE: - Opc = hasSigned ? ISD::SMAX : 0; break; - case ISD::SETGT: - case ISD::SETGE: - Opc = hasSigned ? ISD::SMIN : 0; break; - } - } - - return std::make_pair(Opc, NeedSplit); -} - static SDValue transformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { @@ -22189,7 +24088,8 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // ignored in unsafe-math mode). // We also try to create v2f32 min/max nodes, which we later widen to v4f32. if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() && - VT != MVT::f80 && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) && + VT != MVT::f80 && VT != MVT::f128 && + (TLI.isTypeLegal(VT) || VT == MVT::v2f32) && (Subtarget->hasSSE2() || (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); @@ -22535,32 +24435,6 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, } } - // Try to match a min/max vector operation. - if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) { - std::pair ret = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget); - unsigned Opc = ret.first; - bool NeedSplit = ret.second; - - if (Opc && NeedSplit) { - unsigned NumElems = VT.getVectorNumElements(); - // Extract the LHS vectors - SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, DL); - SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, DL); - - // Extract the RHS vectors - SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, DL); - SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, DL); - - // Create min/max for each subvector - LHS = DAG.getNode(Opc, DL, LHS1.getValueType(), LHS1, RHS1); - RHS = DAG.getNode(Opc, DL, LHS2.getValueType(), LHS2, RHS2); - - // Merge the result - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS, RHS); - } else if (Opc) - return DAG.getNode(Opc, DL, VT, LHS, RHS); - } - // Simplify vector selection if condition value type matches vselect // operand type if (N->getOpcode() == ISD::VSELECT && CondVT == VT) { @@ -22635,7 +24509,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() && !DCI.isBeforeLegalize() && !ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) { - unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits(); + unsigned BitWidth = Cond.getValueType().getScalarSizeInBits(); // Don't optimize vector selects that map to mask-registers. if (BitWidth == 1) @@ -22656,14 +24530,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // FIXME: We don't support i16-element blends currently. We could and // should support them by making *all* the bits in the condition be set // rather than just the high bit and using an i8-element blend. - if (VT.getScalarType() == MVT::i16) + if (VT.getVectorElementType() == MVT::i16) return SDValue(); // Dynamic blending was only available from SSE4.1 onward. - if (VT.getSizeInBits() == 128 && !Subtarget->hasSSE41()) + if (VT.is128BitVector() && !Subtarget->hasSSE41()) return SDValue(); // Byte blends are only available in AVX2 - if (VT.getSizeInBits() == 256 && VT.getScalarType() == MVT::i8 && - !Subtarget->hasAVX2()) + if (VT == MVT::v32i8 && !Subtarget->hasAVX2()) return SDValue(); assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); @@ -22773,12 +24646,9 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { SetCC.getOpcode() == ISD::AND) { if (SetCC.getOpcode() == ISD::AND) { int OpIdx = -1; - ConstantSDNode *CS; - if ((CS = dyn_cast(SetCC.getOperand(0))) && - CS->getZExtValue() == 1) + if (isOneConstant(SetCC.getOperand(0))) OpIdx = 1; - if ((CS = dyn_cast(SetCC.getOperand(1))) && - CS->getZExtValue() == 1) + if (isOneConstant(SetCC.getOperand(1))) OpIdx = 0; if (OpIdx == -1) break; @@ -22857,8 +24727,7 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0, X86::CondCode &CC1, SDValue &Flags, bool &isAnd) { if (Cond->getOpcode() == X86ISD::CMP) { - ConstantSDNode *CondOp1C = dyn_cast(Cond->getOperand(1)); - if (!CondOp1C || !CondOp1C->isNullValue()) + if (!isNullConstant(Cond->getOperand(1))) return false; Cond = Cond->getOperand(0); @@ -23102,106 +24971,15 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { - unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); - switch (IntNo) { - default: return SDValue(); - // SSE/AVX/AVX2 blend intrinsics. - case Intrinsic::x86_avx2_pblendvb: - // Don't try to simplify this intrinsic if we don't have AVX2. - if (!Subtarget->hasAVX2()) - return SDValue(); - // FALL-THROUGH - case Intrinsic::x86_avx_blendv_pd_256: - case Intrinsic::x86_avx_blendv_ps_256: - // Don't try to simplify this intrinsic if we don't have AVX. - if (!Subtarget->hasAVX()) - return SDValue(); - // FALL-THROUGH - case Intrinsic::x86_sse41_blendvps: - case Intrinsic::x86_sse41_blendvpd: - case Intrinsic::x86_sse41_pblendvb: { - SDValue Op0 = N->getOperand(1); - SDValue Op1 = N->getOperand(2); - SDValue Mask = N->getOperand(3); - - // Don't try to simplify this intrinsic if we don't have SSE4.1. - if (!Subtarget->hasSSE41()) - return SDValue(); - - // fold (blend A, A, Mask) -> A - if (Op0 == Op1) - return Op0; - // fold (blend A, B, allZeros) -> A - if (ISD::isBuildVectorAllZeros(Mask.getNode())) - return Op0; - // fold (blend A, B, allOnes) -> B - if (ISD::isBuildVectorAllOnes(Mask.getNode())) - return Op1; - - // Simplify the case where the mask is a constant i32 value. - if (ConstantSDNode *C = dyn_cast(Mask)) { - if (C->isNullValue()) - return Op0; - if (C->isAllOnesValue()) - return Op1; - } - - return SDValue(); - } - - // Packed SSE2/AVX2 arithmetic shift immediate intrinsics. - case Intrinsic::x86_sse2_psrai_w: - case Intrinsic::x86_sse2_psrai_d: - case Intrinsic::x86_avx2_psrai_w: - case Intrinsic::x86_avx2_psrai_d: - case Intrinsic::x86_sse2_psra_w: - case Intrinsic::x86_sse2_psra_d: - case Intrinsic::x86_avx2_psra_w: - case Intrinsic::x86_avx2_psra_d: { - SDValue Op0 = N->getOperand(1); - SDValue Op1 = N->getOperand(2); - EVT VT = Op0.getValueType(); - assert(VT.isVector() && "Expected a vector type!"); - - if (isa(Op1)) - Op1 = Op1.getOperand(0); - - if (!isa(Op1)) - return SDValue(); - - EVT SVT = VT.getVectorElementType(); - unsigned SVTBits = SVT.getSizeInBits(); - - ConstantSDNode *CND = cast(Op1); - const APInt &C = APInt(SVTBits, CND->getAPIntValue().getZExtValue()); - uint64_t ShAmt = C.getZExtValue(); - - // Don't try to convert this shift into a ISD::SRA if the shift - // count is bigger than or equal to the element size. - if (ShAmt >= SVTBits) - return SDValue(); - - // Trivial case: if the shift count is zero, then fold this - // into the first operand. - if (ShAmt == 0) - return Op0; - - // Replace this packed shift intrinsic with a target independent - // shift dag node. - SDLoc DL(N); - SDValue Splat = DAG.getConstant(C, DL, VT); - return DAG.getNode(ISD::SRA, DL, VT, Op0, Splat); - } - } -} - /// PerformMulCombine - Optimize a single multiply with constant into two /// in order to implement it with two cheaper instructions, e.g. /// LEA + SHL, LEA + LEA. static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { + // An imul is usually smaller than the alternative sequence. + if (DAG.getMachineFunction().getFunction()->optForMinSize()) + return SDValue(); + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); @@ -23228,9 +25006,11 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, MulAmt1 = 3; MulAmt2 = MulAmt / 3; } + + SDLoc DL(N); + SDValue NewMul; if (MulAmt2 && (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){ - SDLoc DL(N); if (isPowerOf2_64(MulAmt2) && !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD)) @@ -23239,7 +25019,6 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, // is an add. std::swap(MulAmt1, MulAmt2); - SDValue NewMul; if (isPowerOf2_64(MulAmt1)) NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8)); @@ -23253,10 +25032,31 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, DAG.getConstant(MulAmt2, DL, VT)); + } + if (!NewMul) { + assert(MulAmt != 0 && MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) + && "Both cases that could cause potential overflows should have " + "already been handled."); + if (isPowerOf2_64(MulAmt - 1)) + // (mul x, 2^N + 1) => (add (shl x, N), x) + NewMul = DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(Log2_64(MulAmt - 1), DL, + MVT::i8))); + + else if (isPowerOf2_64(MulAmt + 1)) + // (mul x, 2^N - 1) => (sub (shl x, N), x) + NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::SHL, DL, VT, + N->getOperand(0), + DAG.getConstant(Log2_64(MulAmt + 1), + DL, MVT::i8)), N->getOperand(0)); + } + + if (NewMul) // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, NewMul, false); - } + return SDValue(); } @@ -23272,18 +25072,34 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { N1C && N0.getOpcode() == ISD::AND && N0.getOperand(1).getOpcode() == ISD::Constant) { SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == X86ISD::SETCC_CARRY || - ((N00.getOpcode() == ISD::ANY_EXTEND || - N00.getOpcode() == ISD::ZERO_EXTEND) && - N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) { - APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); - APInt ShAmt = N1C->getAPIntValue(); - Mask = Mask.shl(ShAmt); - if (Mask != 0) { - SDLoc DL(N); - return DAG.getNode(ISD::AND, DL, VT, - N00, DAG.getConstant(Mask, DL, VT)); - } + APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); + APInt ShAmt = N1C->getAPIntValue(); + Mask = Mask.shl(ShAmt); + bool MaskOK = false; + // We can handle cases concerning bit-widening nodes containing setcc_c if + // we carefully interrogate the mask to make sure we are semantics + // preserving. + // The transform is not safe if the result of C1 << C2 exceeds the bitwidth + // of the underlying setcc_c operation if the setcc_c was zero extended. + // Consider the following example: + // zext(setcc_c) -> i32 0x0000FFFF + // c1 -> i32 0x0000FFFF + // c2 -> i32 0x00000001 + // (shl (and (setcc_c), c1), c2) -> i32 0x0001FFFE + // (and setcc_c, (c1 << c2)) -> i32 0x0000FFFE + if (N00.getOpcode() == X86ISD::SETCC_CARRY) { + MaskOK = true; + } else if (N00.getOpcode() == ISD::SIGN_EXTEND && + N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { + MaskOK = true; + } else if ((N00.getOpcode() == ISD::ZERO_EXTEND || + N00.getOpcode() == ISD::ANY_EXTEND) && + N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { + MaskOK = Mask.isIntN(N00.getOperand(0).getValueSizeInBits()); + } + if (MaskOK && Mask != 0) { + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, N00, DAG.getConstant(Mask, DL, VT)); } } @@ -23304,6 +25120,59 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static SDValue PerformSRACombine(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + unsigned Size = VT.getSizeInBits(); + + // fold (ashr (shl, a, [56,48,32,24,16]), SarConst) + // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or + // into (lshr, (sext (a), SarConst - [56,48,32,24,16])) + // depending on sign of (SarConst - [56,48,32,24,16]) + + // sexts in X86 are MOVs. The MOVs have the same code size + // as above SHIFTs (only SHIFT on 1 has lower code size). + // However the MOVs have 2 advantages to a SHIFT: + // 1. MOVs can write to a register that differs from source + // 2. MOVs accept memory operands + + if (!VT.isInteger() || VT.isVector() || N1.getOpcode() != ISD::Constant || + N0.getOpcode() != ISD::SHL || !N0.hasOneUse() || + N0.getOperand(1).getOpcode() != ISD::Constant) + return SDValue(); + + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + APInt ShlConst = (cast(N01))->getAPIntValue(); + APInt SarConst = (cast(N1))->getAPIntValue(); + EVT CVT = N1.getValueType(); + + if (SarConst.isNegative()) + return SDValue(); + + for (MVT SVT : MVT::integer_valuetypes()) { + unsigned ShiftSize = SVT.getSizeInBits(); + // skipping types without corresponding sext/zext and + // ShlConst that is not one of [56,48,32,24,16] + if (ShiftSize < 8 || ShiftSize > 64 || ShlConst != Size - ShiftSize) + continue; + SDLoc DL(N); + SDValue NN = + DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT)); + SarConst = SarConst - (Size - ShiftSize); + if (SarConst == 0) + return NN; + else if (SarConst.isNegative()) + return DAG.getNode(ISD::SHL, DL, VT, NN, + DAG.getConstant(-SarConst, DL, CVT)); + else + return DAG.getNode(ISD::SRA, DL, VT, NN, + DAG.getConstant(SarConst, DL, CVT)); + } + return SDValue(); +} + /// \brief Returns a vector of 0s if the node in input is a vector logical /// shift by a constant amount which is known to be bigger than or equal /// to the vector element size in bits. @@ -23321,14 +25190,15 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG, if (auto *AmtBV = dyn_cast(Amt)) if (auto *AmtSplat = AmtBV->getConstantSplatNode()) { APInt ShiftAmt = AmtSplat->getAPIntValue(); - unsigned MaxAmount = VT.getVectorElementType().getSizeInBits(); + unsigned MaxAmount = + VT.getSimpleVT().getVectorElementType().getSizeInBits(); // SSE2/AVX2 logical shifts always return a vector of 0s // if the shift amount is bigger than or equal to // the element size. The constant shift amount will be // encoded as a 8-bit immediate. if (ShiftAmt.trunc(8).uge(MaxAmount)) - return getZeroVector(VT, Subtarget, DAG, DL); + return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, DL); } return SDValue(); @@ -23342,6 +25212,10 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, if (SDValue V = PerformSHLCombine(N, DAG)) return V; + if (N->getOpcode() == ISD::SRA) + if (SDValue V = PerformSRACombine(N, DAG)) + return V; + // Try to fold this logical shift into a zero vector. if (N->getOpcode() != ISD::SRA) if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget)) @@ -23537,7 +25411,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, // Set N0 and N1 to hold the inputs to the new wide operation. N0 = N0->getOperand(0); if (RHSConstSplat) { - N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(), + N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getVectorElementType(), SDValue(RHSConstSplat, 0)); SmallVector C(WideVT.getVectorNumElements(), N1); N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, C); @@ -23552,9 +25426,9 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, case ISD::ANY_EXTEND: return Op; case ISD::ZERO_EXTEND: { - unsigned InBits = NarrowVT.getScalarType().getSizeInBits(); + unsigned InBits = NarrowVT.getScalarSizeInBits(); APInt Mask = APInt::getAllOnesValue(InBits); - Mask = Mask.zext(VT.getScalarType().getSizeInBits()); + Mask = Mask.zext(VT.getScalarSizeInBits()); return DAG.getNode(ISD::AND, DL, VT, Op, DAG.getConstant(Mask, DL, VT)); } @@ -23656,6 +25530,41 @@ static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG, return DAG.getBitcast(N0.getValueType(), NewShuffle); } +/// If both input operands of a logic op are being cast from floating point +/// types, try to convert this into a floating point logic node to avoid +/// unnecessary moves from SSE to integer registers. +static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + unsigned FPOpcode = ISD::DELETED_NODE; + if (N->getOpcode() == ISD::AND) + FPOpcode = X86ISD::FAND; + else if (N->getOpcode() == ISD::OR) + FPOpcode = X86ISD::FOR; + else if (N->getOpcode() == ISD::XOR) + FPOpcode = X86ISD::FXOR; + + assert(FPOpcode != ISD::DELETED_NODE && + "Unexpected input node for FP logic conversion"); + + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDLoc DL(N); + if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST && + ((Subtarget->hasSSE1() && VT == MVT::i32) || + (Subtarget->hasSSE2() && VT == MVT::i64))) { + SDValue N00 = N0.getOperand(0); + SDValue N10 = N1.getOperand(0); + EVT N00Type = N00.getValueType(); + EVT N10Type = N10.getValueType(); + if (N00Type.isFloatingPoint() && N10Type.isFloatingPoint()) { + SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10); + return DAG.getBitcast(VT, FPLogic); + } + } + return SDValue(); +} + static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { @@ -23668,6 +25577,9 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget)) return R; + if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) + return FPLogic; + EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -23728,6 +25640,9 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget)) return R; + if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) + return FPLogic; + SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); @@ -23799,7 +25714,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (!Subtarget->hasSSE41()) return SDValue(); - EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8; + MVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8; X = DAG.getBitcast(BlendVT, X); Y = DAG.getBitcast(BlendVT, Y); @@ -23813,9 +25728,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) - MachineFunction &MF = DAG.getMachineFunction(); - bool OptForSize = - MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize(); // SHLD/SHRD instructions have lower register pressure, but on some // platforms they have higher latency than the equivalent @@ -23913,17 +25826,188 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// PerformXorCombine - Attempts to turn XOR nodes into BLSMSK nodes +// Try to turn tests against the signbit in the form of: +// XOR(TRUNCATE(SRL(X, size(X)-1)), 1) +// into: +// SETGT(X, -1) +static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) { + // This is only worth doing if the output type is i8. + if (N->getValueType(0) != MVT::i8) + return SDValue(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // We should be performing an xor against a truncated shift. + if (N0.getOpcode() != ISD::TRUNCATE || !N0.hasOneUse()) + return SDValue(); + + // Make sure we are performing an xor against one. + if (!isOneConstant(N1)) + return SDValue(); + + // SetCC on x86 zero extends so only act on this if it's a logical shift. + SDValue Shift = N0.getOperand(0); + if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse()) + return SDValue(); + + // Make sure we are truncating from one of i16, i32 or i64. + EVT ShiftTy = Shift.getValueType(); + if (ShiftTy != MVT::i16 && ShiftTy != MVT::i32 && ShiftTy != MVT::i64) + return SDValue(); + + // Make sure the shift amount extracts the sign bit. + if (!isa(Shift.getOperand(1)) || + Shift.getConstantOperandVal(1) != ShiftTy.getSizeInBits() - 1) + return SDValue(); + + // Create a greater-than comparison against -1. + // N.B. Using SETGE against 0 works but we want a canonical looking + // comparison, using SETGT matches up with what TranslateX86CC. + SDLoc DL(N); + SDValue ShiftOp = Shift.getOperand(0); + EVT ShiftOpTy = ShiftOp.getValueType(); + SDValue Cond = DAG.getSetCC(DL, MVT::i8, ShiftOp, + DAG.getConstant(-1, DL, ShiftOpTy), ISD::SETGT); + return Cond; +} + static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); + if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG)) + return RV; + if (Subtarget->hasCMov()) if (SDValue RV = performIntegerAbsCombine(N, DAG)) return RV; + if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) + return FPLogic; + + return SDValue(); +} + +/// This function detects the AVG pattern between vectors of unsigned i8/i16, +/// which is c = (a + b + 1) / 2, and replace this operation with the efficient +/// X86ISD::AVG instruction. +static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, + const X86Subtarget *Subtarget, SDLoc DL) { + if (!VT.isVector() || !VT.isSimple()) + return SDValue(); + EVT InVT = In.getValueType(); + unsigned NumElems = VT.getVectorNumElements(); + + EVT ScalarVT = VT.getVectorElementType(); + if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) && + isPowerOf2_32(NumElems))) + return SDValue(); + + // InScalarVT is the intermediate type in AVG pattern and it should be greater + // than the original input type (i8/i16). + EVT InScalarVT = InVT.getVectorElementType(); + if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits()) + return SDValue(); + + if (Subtarget->hasAVX512()) { + if (VT.getSizeInBits() > 512) + return SDValue(); + } else if (Subtarget->hasAVX2()) { + if (VT.getSizeInBits() > 256) + return SDValue(); + } else { + if (VT.getSizeInBits() > 128) + return SDValue(); + } + + // Detect the following pattern: + // + // %1 = zext %a to + // %2 = zext %b to + // %3 = add nuw nsw %1, + // %4 = add nuw nsw %3, %2 + // %5 = lshr %N, + // %6 = trunc %5 to + // + // In AVX512, the last instruction can also be a trunc store. + + if (In.getOpcode() != ISD::SRL) + return SDValue(); + + // A lambda checking the given SDValue is a constant vector and each element + // is in the range [Min, Max]. + auto IsConstVectorInRange = [](SDValue V, unsigned Min, unsigned Max) { + BuildVectorSDNode *BV = dyn_cast(V); + if (!BV || !BV->isConstant()) + return false; + for (unsigned i = 0, e = V.getNumOperands(); i < e; i++) { + ConstantSDNode *C = dyn_cast(V.getOperand(i)); + if (!C) + return false; + uint64_t Val = C->getZExtValue(); + if (Val < Min || Val > Max) + return false; + } + return true; + }; + + // Check if each element of the vector is left-shifted by one. + auto LHS = In.getOperand(0); + auto RHS = In.getOperand(1); + if (!IsConstVectorInRange(RHS, 1, 1)) + return SDValue(); + if (LHS.getOpcode() != ISD::ADD) + return SDValue(); + + // Detect a pattern of a + b + 1 where the order doesn't matter. + SDValue Operands[3]; + Operands[0] = LHS.getOperand(0); + Operands[1] = LHS.getOperand(1); + + // Take care of the case when one of the operands is a constant vector whose + // element is in the range [1, 256]. + if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) && + Operands[0].getOpcode() == ISD::ZERO_EXTEND && + Operands[0].getOperand(0).getValueType() == VT) { + // The pattern is detected. Subtract one from the constant vector, then + // demote it and emit X86ISD::AVG instruction. + SDValue One = DAG.getConstant(1, DL, InScalarVT); + SDValue Ones = DAG.getNode(ISD::BUILD_VECTOR, DL, InVT, + SmallVector(NumElems, One)); + Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], Ones); + Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]); + return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0), + Operands[1]); + } + + if (Operands[0].getOpcode() == ISD::ADD) + std::swap(Operands[0], Operands[1]); + else if (Operands[1].getOpcode() != ISD::ADD) + return SDValue(); + Operands[2] = Operands[1].getOperand(0); + Operands[1] = Operands[1].getOperand(1); + + // Now we have three operands of two additions. Check that one of them is a + // constant vector with ones, and the other two are promoted from i8/i16. + for (int i = 0; i < 3; ++i) { + if (!IsConstVectorInRange(Operands[i], 1, 1)) + continue; + std::swap(Operands[i], Operands[2]); + + // Check if Operands[0] and Operands[1] are results of type promotion. + for (int j = 0; j < 2; ++j) + if (Operands[j].getOpcode() != ISD::ZERO_EXTEND || + Operands[j].getOperand(0).getValueType() != VT) + return SDValue(); + + // The pattern is detected, emit X86ISD::AVG instruction. + return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0), + Operands[1].getOperand(0)); + } + return SDValue(); } @@ -23940,10 +26024,13 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, // For chips with slow 32-byte unaligned loads, break the 32-byte operation // into two 16-byte operations. ISD::LoadExtType Ext = Ld->getExtensionType(); + bool Fast; + unsigned AddressSpace = Ld->getAddressSpace(); unsigned Alignment = Ld->getAlignment(); - bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8; - if (RegVT.is256BitVector() && Subtarget->isUnalignedMem32Slow() && - !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) { + if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() && + Ext == ISD::NON_EXTLOAD && + TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT, + AddressSpace, Alignment, &Fast) && !Fast) { unsigned NumElems = RegVT.getVectorNumElements(); if (NumElems < 2) return SDValue(); @@ -24012,8 +26099,8 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG, ShuffleVec[i] = i * SizeRatio; // Can't shuffle using an illegal type. - assert (DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) - && "WideVecVT should be legal"); + assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && + "WideVecVT should be legal"); WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]); } @@ -24026,8 +26113,8 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG, SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) ShuffleVec[i] = i * SizeRatio; - for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i) - ShuffleVec[i] = NumElems*SizeRatio; + for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i) + ShuffleVec[i] = NumElems * SizeRatio; NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, DAG.getConstant(0, dl, WideVecVT), &ShuffleVec[0]); @@ -24055,7 +26142,6 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG, ISD::NON_EXTLOAD); SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd); return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true); - } /// PerformMSTORECombine - Resolve truncating stores static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG, @@ -24073,6 +26159,15 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG, unsigned FromSz = VT.getVectorElementType().getSizeInBits(); unsigned ToSz = StVT.getVectorElementType().getSizeInBits(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // The truncating store is legal in some cases. For example + // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw + // are designated for truncate store. + // In this case we don't need any further transformations. + if (TLI.isTruncStoreLegal(VT, StVT)) + return SDValue(); + // From, To sizes and ElemCount must be pow of two assert (isPowerOf2_32(NumElems * FromSz * ToSz) && "Unexpected size for truncating masked store"); @@ -24096,12 +26191,12 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG, ShuffleVec[i] = i * SizeRatio; // Can't shuffle using an illegal type. - assert (DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) - && "WideVecVT should be legal"); + assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) && + "WideVecVT should be legal"); SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec, - DAG.getUNDEF(WideVecVT), - &ShuffleVec[0]); + DAG.getUNDEF(WideVecVT), + &ShuffleVec[0]); SDValue NewMask; SDValue Mask = Mst->getMask(); @@ -24133,8 +26228,9 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG, NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops); } - return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, Mst->getBasePtr(), - NewMask, StVT, Mst->getMemOperand(), false); + return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, + Mst->getBasePtr(), NewMask, StVT, + Mst->getMemOperand(), false); } /// PerformSTORECombine - Do target-specific dag combines on STORE nodes. static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, @@ -24148,10 +26244,12 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, // If we are saving a concatenation of two XMM registers and 32-byte stores // are slow, such as on Sandy Bridge, perform two 16-byte stores. + bool Fast; + unsigned AddressSpace = St->getAddressSpace(); unsigned Alignment = St->getAlignment(); - bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8; - if (VT.is256BitVector() && Subtarget->isUnalignedMem32Slow() && - StVT == VT && !IsAligned) { + if (VT.is256BitVector() && StVT == VT && + TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + AddressSpace, Alignment, &Fast) && !Fast) { unsigned NumElems = VT.getVectorNumElements(); if (NumElems < 2) return SDValue(); @@ -24178,12 +26276,29 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, // First, pack all of the elements in one place. Next, store to memory // in fewer chunks. if (St->isTruncatingStore() && VT.isVector()) { + // Check if we can detect an AVG pattern from the truncation. If yes, + // replace the trunc store by a normal store with the result of X86ISD::AVG + // instruction. + SDValue Avg = + detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG, Subtarget, dl); + if (Avg.getNode()) + return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(), + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NumElems = VT.getVectorNumElements(); assert(StVT != VT && "Cannot truncate to the same type"); unsigned FromSz = VT.getVectorElementType().getSizeInBits(); unsigned ToSz = StVT.getVectorElementType().getSizeInBits(); + // The truncating store is legal in some cases. For example + // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw + // are designated for truncate store. + // In this case we don't need any further transformations. + if (TLI.isTruncStoreLegal(VT, StVT)) + return SDValue(); + // From, To sizes and ElemCount must be pow of two if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue(); // We are going to use the original vector elt for storing. @@ -24306,7 +26421,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store // pair instead. if (Subtarget->is64Bit() || F64IsLegal) { - EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64; + MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64; SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), @@ -24539,8 +26654,234 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS. +static SDValue +combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG, + SmallVector &Regs) { + assert(Regs.size() > 0 && (Regs[0].getValueType() == MVT::v4i32 || + Regs[0].getValueType() == MVT::v2i64)); + EVT OutVT = N->getValueType(0); + EVT OutSVT = OutVT.getVectorElementType(); + EVT InVT = Regs[0].getValueType(); + EVT InSVT = InVT.getVectorElementType(); + SDLoc DL(N); + + // First, use mask to unset all bits that won't appear in the result. + assert((OutSVT == MVT::i8 || OutSVT == MVT::i16) && + "OutSVT can only be either i8 or i16."); + SDValue MaskVal = + DAG.getConstant(OutSVT == MVT::i8 ? 0xFF : 0xFFFF, DL, InSVT); + SDValue MaskVec = DAG.getNode( + ISD::BUILD_VECTOR, DL, InVT, + SmallVector(InVT.getVectorNumElements(), MaskVal)); + for (auto &Reg : Regs) + Reg = DAG.getNode(ISD::AND, DL, InVT, MaskVec, Reg); + + MVT UnpackedVT, PackedVT; + if (OutSVT == MVT::i8) { + UnpackedVT = MVT::v8i16; + PackedVT = MVT::v16i8; + } else { + UnpackedVT = MVT::v4i32; + PackedVT = MVT::v8i16; + } + + // In each iteration, truncate the type by a half size. + auto RegNum = Regs.size(); + for (unsigned j = 1, e = InSVT.getSizeInBits() / OutSVT.getSizeInBits(); + j < e; j *= 2, RegNum /= 2) { + for (unsigned i = 0; i < RegNum; i++) + Regs[i] = DAG.getNode(ISD::BITCAST, DL, UnpackedVT, Regs[i]); + for (unsigned i = 0; i < RegNum / 2; i++) + Regs[i] = DAG.getNode(X86ISD::PACKUS, DL, PackedVT, Regs[i * 2], + Regs[i * 2 + 1]); + } + + // If the type of the result is v8i8, we need do one more X86ISD::PACKUS, and + // then extract a subvector as the result since v8i8 is not a legal type. + if (OutVT == MVT::v8i8) { + Regs[0] = DAG.getNode(X86ISD::PACKUS, DL, PackedVT, Regs[0], Regs[0]); + Regs[0] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, Regs[0], + DAG.getIntPtrConstant(0, DL)); + return Regs[0]; + } else if (RegNum > 1) { + Regs.resize(RegNum); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Regs); + } else + return Regs[0]; +} + +/// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS. +static SDValue +combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG, + SmallVector &Regs) { + assert(Regs.size() > 0 && Regs[0].getValueType() == MVT::v4i32); + EVT OutVT = N->getValueType(0); + SDLoc DL(N); + + // Shift left by 16 bits, then arithmetic-shift right by 16 bits. + SDValue ShAmt = DAG.getConstant(16, DL, MVT::i32); + for (auto &Reg : Regs) { + Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt, DAG); + Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt, DAG); + } + + for (unsigned i = 0, e = Regs.size() / 2; i < e; i++) + Regs[i] = DAG.getNode(X86ISD::PACKSS, DL, MVT::v8i16, Regs[i * 2], + Regs[i * 2 + 1]); + + if (Regs.size() > 2) { + Regs.resize(Regs.size() / 2); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Regs); + } else + return Regs[0]; +} + +/// This function transforms truncation from vXi32/vXi64 to vXi8/vXi16 into +/// X86ISD::PACKUS/X86ISD::PACKSS operations. We do it here because after type +/// legalization the truncation will be translated into a BUILD_VECTOR with each +/// element that is extracted from a vector and then truncated, and it is +/// diffcult to do this optimization based on them. +static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT OutVT = N->getValueType(0); + if (!OutVT.isVector()) + return SDValue(); + + SDValue In = N->getOperand(0); + if (!In.getValueType().isSimple()) + return SDValue(); + + EVT InVT = In.getValueType(); + unsigned NumElems = OutVT.getVectorNumElements(); + + // TODO: On AVX2, the behavior of X86ISD::PACKUS is different from that on + // SSE2, and we need to take care of it specially. + // AVX512 provides vpmovdb. + if (!Subtarget->hasSSE2() || Subtarget->hasAVX2()) + return SDValue(); + + EVT OutSVT = OutVT.getVectorElementType(); + EVT InSVT = InVT.getVectorElementType(); + if (!((InSVT == MVT::i32 || InSVT == MVT::i64) && + (OutSVT == MVT::i8 || OutSVT == MVT::i16) && isPowerOf2_32(NumElems) && + NumElems >= 8)) + return SDValue(); + + // SSSE3's pshufb results in less instructions in the cases below. + if (Subtarget->hasSSSE3() && NumElems == 8 && + ((OutSVT == MVT::i8 && InSVT != MVT::i64) || + (InSVT == MVT::i32 && OutSVT == MVT::i16))) + return SDValue(); + + SDLoc DL(N); + + // Split a long vector into vectors of legal type. + unsigned RegNum = InVT.getSizeInBits() / 128; + SmallVector SubVec(RegNum); + if (InSVT == MVT::i32) { + for (unsigned i = 0; i < RegNum; i++) + SubVec[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, + DAG.getIntPtrConstant(i * 4, DL)); + } else { + for (unsigned i = 0; i < RegNum; i++) + SubVec[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, + DAG.getIntPtrConstant(i * 2, DL)); + } + + // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PAKCUS + // for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to + // truncate 2 x v4i32 to v8i16. + if (Subtarget->hasSSE41() || OutSVT == MVT::i8) + return combineVectorTruncationWithPACKUS(N, DAG, SubVec); + else if (InSVT == MVT::i32) + return combineVectorTruncationWithPACKSS(N, DAG, SubVec); + else + return SDValue(); +} + +static SDValue PerformTRUNCATECombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + // Try to detect AVG pattern first. + SDValue Avg = detectAVGPattern(N->getOperand(0), N->getValueType(0), DAG, + Subtarget, SDLoc(N)); + if (Avg.getNode()) + return Avg; + + return combineVectorTruncation(N, DAG, Subtarget); +} + +/// Do target-specific dag combines on floating point negations. +static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + EVT SVT = VT.getScalarType(); + SDValue Arg = N->getOperand(0); + SDLoc DL(N); + + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + // If we're negating a FMUL node on a target with FMA, then we can avoid the + // use of a constant by performing (-0 - A*B) instead. + // FIXME: Check rounding control flags as well once it becomes available. + if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) && + Arg->getFlags()->hasNoSignedZeros() && Subtarget->hasAnyFMA()) { + SDValue Zero = DAG.getConstantFP(0.0, DL, VT); + return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0), + Arg.getOperand(1), Zero); + } + + // If we're negating a FMA node, then we can adjust the + // instruction to include the extra negation. + if (Arg.hasOneUse()) { + switch (Arg.getOpcode()) { + case X86ISD::FMADD: + return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0), + Arg.getOperand(1), Arg.getOperand(2)); + case X86ISD::FMSUB: + return DAG.getNode(X86ISD::FNMADD, DL, VT, Arg.getOperand(0), + Arg.getOperand(1), Arg.getOperand(2)); + case X86ISD::FNMADD: + return DAG.getNode(X86ISD::FMSUB, DL, VT, Arg.getOperand(0), + Arg.getOperand(1), Arg.getOperand(2)); + case X86ISD::FNMSUB: + return DAG.getNode(X86ISD::FMADD, DL, VT, Arg.getOperand(0), + Arg.getOperand(1), Arg.getOperand(2)); + } + } + return SDValue(); +} + +static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (VT.is512BitVector() && !Subtarget->hasDQI()) { + // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extention. + // These logic operations may be executed in the integer domain. + SDLoc dl(N); + MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits()); + MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements()); + + SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0)); + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1)); + unsigned IntOpcode = 0; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected FP logic op"); + case X86ISD::FOR: IntOpcode = ISD::OR; break; + case X86ISD::FXOR: IntOpcode = ISD::XOR; break; + case X86ISD::FAND: IntOpcode = ISD::AND; break; + case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break; + } + SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); + return DAG.getNode(ISD::BITCAST, dl, VT, IntOp); + } + return SDValue(); +} /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes. -static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); // F[X]OR(0.0, x) -> x @@ -24552,7 +26893,8 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) { if (ConstantFPSDNode *C = dyn_cast(N->getOperand(1))) if (C->getValueAPF().isPosZero()) return N->getOperand(0); - return SDValue(); + + return lowerX86FPLogicOp(N, DAG, Subtarget); } /// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes. @@ -24576,8 +26918,65 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) { N->getOperand(0), N->getOperand(1)); } +static SDValue performFMinNumFMaxNumCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + if (Subtarget->useSoftFloat()) + return SDValue(); + + // TODO: Check for global or instruction-level "nnan". In that case, we + // should be able to lower to FMAX/FMIN alone. + // TODO: If an operand is already known to be a NaN or not a NaN, this + // should be an optional swap and FMAX/FMIN. + + EVT VT = N->getValueType(0); + if (!((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) || + (Subtarget->hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) || + (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64)))) + return SDValue(); + + // This takes at least 3 instructions, so favor a library call when operating + // on a scalar and minimizing code size. + if (!VT.isVector() && DAG.getMachineFunction().getFunction()->optForMinSize()) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDLoc DL(N); + EVT SetCCType = DAG.getTargetLoweringInfo().getSetCCResultType( + DAG.getDataLayout(), *DAG.getContext(), VT); + + // There are 4 possibilities involving NaN inputs, and these are the required + // outputs: + // Op1 + // Num NaN + // ---------------- + // Num | Max | Op0 | + // Op0 ---------------- + // NaN | Op1 | NaN | + // ---------------- + // + // The SSE FP max/min instructions were not designed for this case, but rather + // to implement: + // Min = Op1 < Op0 ? Op1 : Op0 + // Max = Op1 > Op0 ? Op1 : Op0 + // + // So they always return Op0 if either input is a NaN. However, we can still + // use those instructions for fmaxnum by selecting away a NaN input. + + // If either operand is NaN, the 2nd source operand (Op0) is passed through. + auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN; + SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0); + SDValue IsOp0Nan = DAG.getSetCC(DL, SetCCType , Op0, Op0, ISD::SETUO); + + // If Op0 is a NaN, select Op1. Otherwise, select the max. If both operands + // are NaN, the NaN value of Op1 is the result. + auto SelectOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; + return DAG.getNode(SelectOpcode, DL, VT, IsOp0Nan, Op1, MinOrMax); +} + /// Do target-specific dag combines on X86ISD::FAND nodes. -static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { // FAND(0.0, x) -> 0.0 if (ConstantFPSDNode *C = dyn_cast(N->getOperand(0))) if (C->getValueAPF().isPosZero()) @@ -24588,11 +26987,12 @@ static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { if (C->getValueAPF().isPosZero()) return N->getOperand(1); - return SDValue(); + return lowerX86FPLogicOp(N, DAG, Subtarget); } /// Do target-specific dag combines on X86ISD::FANDN nodes -static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { // FANDN(0.0, x) -> x if (ConstantFPSDNode *C = dyn_cast(N->getOperand(0))) if (C->getValueAPF().isPosZero()) @@ -24603,7 +27003,7 @@ static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) { if (C->getValueAPF().isPosZero()) return N->getOperand(1); - return SDValue(); + return lowerX86FPLogicOp(N, DAG, Subtarget); } static SDValue PerformBTCombine(SDNode *N, @@ -24673,6 +27073,57 @@ static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// sext(add_nsw(x, C)) --> add(sext(x), C_sext) +/// Promoting a sign extension ahead of an 'add nsw' exposes opportunities +/// to combine math ops, use an LEA, or use a complex addressing mode. This can +/// eliminate extend, add, and shift instructions. +static SDValue promoteSextBeforeAddNSW(SDNode *Sext, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + // TODO: This should be valid for other integer types. + EVT VT = Sext->getValueType(0); + if (VT != MVT::i64) + return SDValue(); + + // We need an 'add nsw' feeding into the 'sext'. + SDValue Add = Sext->getOperand(0); + if (Add.getOpcode() != ISD::ADD || !Add->getFlags()->hasNoSignedWrap()) + return SDValue(); + + // Having a constant operand to the 'add' ensures that we are not increasing + // the instruction count because the constant is extended for free below. + // A constant operand can also become the displacement field of an LEA. + auto *AddOp1 = dyn_cast(Add.getOperand(1)); + if (!AddOp1) + return SDValue(); + + // Don't make the 'add' bigger if there's no hope of combining it with some + // other 'add' or 'shl' instruction. + // TODO: It may be profitable to generate simpler LEA instructions in place + // of single 'add' instructions, but the cost model for selecting an LEA + // currently has a high threshold. + bool HasLEAPotential = false; + for (auto *User : Sext->uses()) { + if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SHL) { + HasLEAPotential = true; + break; + } + } + if (!HasLEAPotential) + return SDValue(); + + // Everything looks good, so pull the 'sext' ahead of the 'add'. + int64_t AddConstant = AddOp1->getSExtValue(); + SDValue AddOp0 = Add.getOperand(0); + SDValue NewSext = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Sext), VT, AddOp0); + SDValue NewConstant = DAG.getConstant(AddConstant, SDLoc(Add), VT); + + // The wider add is guaranteed to not wrap because both operands are + // sign-extended. + SDNodeFlags Flags; + Flags.setNoSignedWrap(true); + return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewSext, NewConstant, &Flags); +} + static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { @@ -24763,13 +27214,13 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, } } - if (!Subtarget->hasFp256()) - return SDValue(); - - if (VT.isVector() && VT.getSizeInBits() == 256) + if (Subtarget->hasAVX() && VT.is256BitVector()) if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget)) return R; + if (SDValue NewAdd = promoteSextBeforeAddNSW(N, DAG, Subtarget)) + return NewAdd; + return SDValue(); } @@ -24783,9 +27234,7 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG, return SDValue(); EVT ScalarVT = VT.getScalarType(); - if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || - (!Subtarget->hasFMA() && !Subtarget->hasFMA4() && - !Subtarget->hasAVX512())) + if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasAnyFMA()) return SDValue(); SDValue A = N->getOperand(0); @@ -24830,8 +27279,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, N0.getOperand(0).hasOneUse()) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == X86ISD::SETCC_CARRY) { - ConstantSDNode *C = dyn_cast(N0.getOperand(1)); - if (!C || C->getZExtValue() != 1) + if (!isOneConstant(N0.getOperand(1))) return SDValue(); return DAG.getNode(ISD::AND, dl, VT, DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, @@ -24884,21 +27332,19 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG, SDLoc DL(N); if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB) - if (ConstantSDNode *C = dyn_cast(LHS.getOperand(0))) - if (C->getAPIntValue() == 0 && LHS.hasOneUse()) { - SDValue addV = DAG.getNode(ISD::ADD, DL, LHS.getValueType(), RHS, - LHS.getOperand(1)); - return DAG.getSetCC(DL, N->getValueType(0), addV, - DAG.getConstant(0, DL, addV.getValueType()), CC); - } + if (isNullConstant(LHS.getOperand(0)) && LHS.hasOneUse()) { + SDValue addV = DAG.getNode(ISD::ADD, DL, LHS.getValueType(), RHS, + LHS.getOperand(1)); + return DAG.getSetCC(DL, N->getValueType(0), addV, + DAG.getConstant(0, DL, addV.getValueType()), CC); + } if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB) - if (ConstantSDNode *C = dyn_cast(RHS.getOperand(0))) - if (C->getAPIntValue() == 0 && RHS.hasOneUse()) { - SDValue addV = DAG.getNode(ISD::ADD, DL, RHS.getValueType(), LHS, - RHS.getOperand(1)); - return DAG.getSetCC(DL, N->getValueType(0), addV, - DAG.getConstant(0, DL, addV.getValueType()), CC); - } + if (isNullConstant(RHS.getOperand(0)) && RHS.hasOneUse()) { + SDValue addV = DAG.getNode(ISD::ADD, DL, RHS.getValueType(), LHS, + RHS.getOperand(1)); + return DAG.getSetCC(DL, N->getValueType(0), addV, + DAG.getConstant(0, DL, addV.getValueType()), CC); + } if (VT.getScalarType() == MVT::i1 && (CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) { @@ -24936,52 +27382,6 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue NarrowVectorLoadToElement(LoadSDNode *Load, unsigned Index, - SelectionDAG &DAG) { - SDLoc dl(Load); - MVT VT = Load->getSimpleValueType(0); - MVT EVT = VT.getVectorElementType(); - SDValue Addr = Load->getOperand(1); - SDValue NewAddr = DAG.getNode( - ISD::ADD, dl, Addr.getSimpleValueType(), Addr, - DAG.getConstant(Index * EVT.getStoreSize(), dl, - Addr.getSimpleValueType())); - - SDValue NewLoad = - DAG.getLoad(EVT, dl, Load->getChain(), NewAddr, - DAG.getMachineFunction().getMachineMemOperand( - Load->getMemOperand(), 0, EVT.getStoreSize())); - return NewLoad; -} - -static SDValue PerformINSERTPSCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { - SDLoc dl(N); - MVT VT = N->getOperand(1)->getSimpleValueType(0); - assert((VT == MVT::v4f32 || VT == MVT::v4i32) && - "X86insertps is only defined for v4x32"); - - SDValue Ld = N->getOperand(1); - if (MayFoldLoad(Ld)) { - // Extract the countS bits from the immediate so we can get the proper - // address when narrowing the vector load to a specific element. - // When the second source op is a memory address, insertps doesn't use - // countS and just gets an f32 from that address. - unsigned DestIndex = - cast(N->getOperand(2))->getZExtValue() >> 6; - - Ld = NarrowVectorLoadToElement(cast(Ld), DestIndex, DAG); - - // Create this as a scalar to vector to match the instruction pattern. - SDValue LoadScalarToVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Ld); - // countS bits are ignored when loading from memory on insertps, which - // means we don't need to explicitly set them to 0. - return DAG.getNode(X86ISD::INSERTPS, dl, VT, N->getOperand(0), - LoadScalarToVector, N->getOperand(2)); - } - return SDValue(); -} - static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) { SDValue V0 = N->getOperand(0); SDValue V1 = N->getOperand(1); @@ -25008,6 +27408,20 @@ static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static SDValue PerformGatherScatterCombine(SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + // Gather and Scatter instructions use k-registers for masks. The type of + // the masks is v*i1. So the mask will be truncated anyway. + // The SIGN_EXTEND_INREG my be dropped. + SDValue Mask = N->getOperand(2); + if (Mask.getOpcode() == ISD::SIGN_EXTEND_INREG) { + SmallVector NewOps(N->op_begin(), N->op_end()); + NewOps[2] = Mask.getOperand(0); + DAG.UpdateNodeOperands(N, NewOps); + } + return SDValue(); +} + // Helper function of PerformSETCCCombine. It is to materialize "setb reg" // as "sbb reg,reg", since it can be extended without zext and produces // an all-ones bit which is more useful than 0/1 in some cases. @@ -25182,7 +27596,7 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have // a 32-bit target where SSE doesn't support i64->FP operations. - if (Op0.getOpcode() == ISD::LOAD) { + if (!Subtarget->useSoftFloat() && Op0.getOpcode() == ISD::LOAD) { LoadSDNode *Ld = cast(Op0.getNode()); EVT LdVT = Ld->getValueType(0); @@ -25357,15 +27771,14 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG, } // Check if we can bypass extracting and re-inserting an element of an input - // vector. Essentialy: + // vector. Essentially: // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x) if (V.getOpcode() == ISD::SCALAR_TO_VECTOR && V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) { SDValue ExtractedV = V.getOperand(0); SDValue OrigV = ExtractedV.getOperand(0); - if (auto *ExtractIdx = dyn_cast(ExtractedV.getOperand(1))) - if (ExtractIdx->getZExtValue() == 0) { + if (isNullConstant(ExtractedV.getOperand(1))) { MVT OrigVT = OrigV.getSimpleValueType(); // Extract a subvector if necessary... if (OrigVT.getSizeInBits() > OpVT.getSizeInBits()) { @@ -25394,7 +27807,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SELECT: case X86ISD::SHRUNKBLEND: return PerformSELECTCombine(N, DAG, DCI, Subtarget); - case ISD::BITCAST: return PerformBITCASTCombine(N, DAG); + case ISD::BITCAST: return PerformBITCASTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget); case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget); case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget); @@ -25414,12 +27827,17 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG, Subtarget); case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget); case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget); + case ISD::FNEG: return PerformFNEGCombine(N, DAG, Subtarget); + case ISD::TRUNCATE: return PerformTRUNCATECombine(N, DAG, Subtarget); case X86ISD::FXOR: - case X86ISD::FOR: return PerformFORCombine(N, DAG); + case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget); case X86ISD::FMIN: case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG); - case X86ISD::FAND: return PerformFANDCombine(N, DAG); - case X86ISD::FANDN: return PerformFANDNCombine(N, DAG); + case ISD::FMINNUM: + case ISD::FMAXNUM: return performFMinNumFMaxNumCombine(N, DAG, + Subtarget); + case X86ISD::FAND: return PerformFANDCombine(N, DAG, Subtarget); + case X86ISD::FANDN: return PerformFANDNCombine(N, DAG, Subtarget); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); case ISD::ANY_EXTEND: @@ -25447,14 +27865,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::VPERM2X128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget); - case ISD::INTRINSIC_WO_CHAIN: - return PerformINTRINSIC_WO_CHAINCombine(N, DAG, Subtarget); - case X86ISD::INSERTPS: { - if (getTargetMachine().getOptLevel() > CodeGenOpt::None) - return PerformINSERTPSCombine(N, DAG, Subtarget); - break; - } case X86ISD::BLENDI: return PerformBLENDICombine(N, DAG); + case ISD::MGATHER: + case ISD::MSCATTER: return PerformGatherScatterCombine(N, DAG); } return SDValue(); @@ -26084,6 +28497,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case MVT::f64: case MVT::i64: return std::make_pair(0U, &X86::FR64RegClass); + // TODO: Handle f128 and i128 in FR128RegClass after it is tested well. // Vector types. case MVT::v16i8: case MVT::v8i16: @@ -26168,17 +28582,13 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, if (Class == &X86::GR8RegClass || Class == &X86::GR16RegClass || Class == &X86::GR32RegClass || Class == &X86::GR64RegClass) { unsigned Size = VT.getSizeInBits(); - MVT::SimpleValueType SimpleTy = Size == 1 || Size == 8 ? MVT::i8 - : Size == 16 ? MVT::i16 - : Size == 32 ? MVT::i32 - : Size == 64 ? MVT::i64 - : MVT::Other; - unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, SimpleTy); + if (Size == 1) Size = 8; + unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, Size); if (DestReg > 0) { Res.first = DestReg; - Res.second = SimpleTy == MVT::i8 ? &X86::GR8RegClass - : SimpleTy == MVT::i16 ? &X86::GR16RegClass - : SimpleTy == MVT::i32 ? &X86::GR32RegClass + Res.second = Size == 8 ? &X86::GR8RegClass + : Size == 16 ? &X86::GR16RegClass + : Size == 32 ? &X86::GR32RegClass : &X86::GR64RegClass; assert(Res.second->contains(Res.first) && "Register in register class"); } else { @@ -26196,6 +28606,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // target independent register mapper will just pick the first match it can // find, ignoring the required type. + // TODO: Handle f128 and i128 in FR128RegClass after it is tested well. if (VT == MVT::f32 || VT == MVT::i32) Res.second = &X86::FR32RegClass; else if (VT == MVT::f64 || VT == MVT::i64) @@ -26244,6 +28655,15 @@ int X86TargetLowering::getScalingFactorCost(const DataLayout &DL, return -1; } -bool X86TargetLowering::isTargetFTOL() const { - return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit(); +bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const { + // Integer division on x86 is expensive. However, when aggressively optimizing + // for code size, we prefer to use a div instruction, as it is usually smaller + // than the alternative sequence. + // The exception to this is vector division. Since x86 doesn't have vector + // integer division, leaving the division as-is is a loss even in terms of + // size, because it will have to be scalarized, while the alternative code + // sequence can be performed in vector form. + bool OptSize = Attr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::MinSize); + return OptSize && !VT.isVector(); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 723d5304495c..a29dc9af54f6 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -126,6 +126,9 @@ namespace llvm { /// 1 is the number of bytes of stack to pop. RET_FLAG, + /// Return from interrupt. Operand 0 is the number of bytes to pop. + IRET, + /// Repeat fill, corresponds to X86::REP_STOSx. REP_STOS, @@ -182,6 +185,8 @@ namespace llvm { /// Compute Sum of Absolute Differences. PSADBW, + /// Compute Double Block Packed Sum-Absolute-Differences + DBPSADBW, /// Bitwise Logical AND NOT of Packed FP values. ANDNP, @@ -211,6 +216,8 @@ namespace llvm { // FP vector get exponent FGETEXP_RND, + // Extract Normalized Mantissas + VGETMANT, // FP Scale SCALEF, // Integer add/sub with unsigned saturation. @@ -236,6 +243,9 @@ namespace llvm { // Integer absolute value ABS, + // Detect Conflicts Within a Vector + CONFLICT, + /// Floating point max and min. FMAX, FMIN, @@ -282,9 +292,8 @@ namespace llvm { // Vector integer truncate. VTRUNC, - - // Vector integer truncate with mask. - VTRUNCM, + // Vector integer truncate with unsigned/signed saturation. + VTRUNCUS, VTRUNCS, // Vector FP extend. VFPEXT, @@ -295,6 +304,9 @@ namespace llvm { // Vector signed/unsigned integer to double. CVTDQ2PD, CVTUDQ2PD, + // Convert a vector to mask, set bits base on MSB. + CVT2MASK, + // 128-bit vector logical left / right shift VSHLDQ, VSRLDQ, @@ -349,6 +361,7 @@ namespace llvm { // OR/AND test for masks KORTEST, + KTEST, // Several flavors of instructions with vector shuffle behaviors. PACKSS, @@ -382,12 +395,24 @@ namespace llvm { VPERMIV3, VPERMI, VPERM2X128, - //Fix Up Special Packed Float32/64 values + // Bitwise ternary logic + VPTERNLOG, + // Fix Up Special Packed Float32/64 values VFIXUPIMM, - //Range Restriction Calculation For Packed Pairs of Float32/64 values + // Range Restriction Calculation For Packed Pairs of Float32/64 values VRANGE, + // Reduce - Perform Reduction Transformation on scalar\packed FP + VREDUCE, + // RndScale - Round FP Values To Include A Given Number Of Fraction Bits + VRNDSCALE, + // VFPCLASS - Tests Types Of a FP Values for packed types. + VFPCLASS, + // VFPCLASSS - Tests Types Of a FP Values for scalar types. + VFPCLASSS, // Broadcast scalar to vector VBROADCAST, + // Broadcast mask to vector + VBROADCASTM, // Broadcast subvector to vector SUBV_BROADCAST, // Insert/Extract vector element @@ -397,13 +422,21 @@ namespace llvm { /// SSE4A Extraction and Insertion. EXTRQI, INSERTQI, + // XOP variable/immediate rotations + VPROT, VPROTI, + // XOP arithmetic/logical shifts + VPSHA, VPSHL, + // XOP signed/unsigned integer comparisons + VPCOM, VPCOMU, + // Vector multiply packed unsigned doubleword integers PMULUDQ, // Vector multiply packed signed doubleword integers PMULDQ, // Vector Multiply Packed UnsignedIntegers with Round and Scale MULHRS, - + // Multiply and Add Packed Integers + VPMADDUBSW, VPMADDWD, // FMA nodes FMADD, FNMADD, @@ -418,7 +451,6 @@ namespace llvm { FNMSUB_RND, FMADDSUB_RND, FMSUBADD_RND, - RNDSCALE, // Compress and expand COMPRESS, @@ -443,9 +475,6 @@ namespace llvm { // falls back to heap allocation if not. SEG_ALLOCA, - // Windows's _ftol2 runtime routine to do fptoui. - WIN_FTOL, - // Memory barrier MEMBARRIER, MFENCE, @@ -580,15 +609,6 @@ namespace llvm { bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool TailCallOpt); - /// AVX512 static rounding constants. These need to match the values in - /// avx512fintrin.h. - enum STATIC_ROUNDING { - TO_NEAREST_INT = 0, - TO_NEG_INF = 1, - TO_POS_INF = 2, - TO_ZERO = 3, - CUR_DIRECTION = 4 - }; } //===--------------------------------------------------------------------===// @@ -850,16 +870,7 @@ namespace llvm { /// register, not on the X87 floating point stack. bool isScalarFPTypeInSSEReg(EVT VT) const { return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 - (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 - } - - /// Return true if the target uses the MSVC _ftol2 routine for fptoui. - bool isTargetFTOL() const; - - /// Return true if the MSVC _ftol2 routine should be used for fptoui to the - /// given type. - bool isIntegerTypeFTOL(EVT VT) const { - return isTargetFTOL() && VT == MVT::i64; + (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 } /// \brief Returns true if it is beneficial to convert a load of a constant @@ -879,6 +890,16 @@ namespace llvm { unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + unsigned + getExceptionPointerRegister(const Constant *PersonalityFn) const override; + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + unsigned + getExceptionSelectorRegister(const Constant *PersonalityFn) const override; + /// This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. FastISel *createFastISel(FunctionLoweringInfo &funcInfo, @@ -890,6 +911,11 @@ namespace llvm { bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const override; + /// Return true if the target stores SafeStack pointer at a fixed offset in + /// some non-standard address space, and populates the address space and + /// offset as appropriate. + Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override; + SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, SelectionDAG &DAG) const; @@ -899,6 +925,8 @@ namespace llvm { /// \brief Customize the preferred legalization strategy for certain types. LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; + bool isIntDivCheap(EVT VT, AttributeSet Attr) const override; + protected: std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, @@ -908,7 +936,6 @@ namespace llvm { /// Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; - const DataLayout *TD; /// Select between SSE or x87 floating point ops. /// When SSE is available, use it for f32 operations. @@ -955,7 +982,6 @@ namespace llvm { const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG& DAG) const; - bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, bool Is64Bit, int FPDiff, SDLoc dl) const; @@ -969,7 +995,6 @@ namespace llvm { SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const; @@ -994,9 +1019,9 @@ namespace llvm { SDValue LowerToBT(SDValue And, ISD::CondCode CC, SDLoc dl, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; @@ -1042,27 +1067,16 @@ namespace llvm { const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; - bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicLoadInIR(LoadInst *SI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; - TargetLoweringBase::AtomicRMWExpansionKind + TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; - bool needsCmpXchgNb(const Type *MemType) const; - - /// Utility function to emit atomic-load-arith operations (and, or, xor, - /// nand, max, min, umax, umin). It takes the corresponding instruction to - /// expand, the associated machine basic block, and the associated X86 - /// opcodes for reg/reg. - MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI, - MachineBasicBlock *MBB) const; - - /// Utility function to emit atomic-load-arith operations (and, or, xor, - /// nand, add, sub, swap) for 64-bit operands on 32-bit target. - MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI, - MachineBasicBlock *MBB) const; + bool needsCmpXchgNb(Type *MemType) const; // Utility function to emit the low-level va_arg code for X86-64. MachineBasicBlock *EmitVAARG64WithCustomInserter( @@ -1077,18 +1091,24 @@ namespace llvm { MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr *I, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredCatchRet(MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *EmitLoweredCatchPad(MachineInstr *MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB) const; MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, MachineBasicBlock *BB) const; - MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, MachineBasicBlock *MBB) const; @@ -1121,7 +1141,7 @@ namespace llvm { unsigned &RefinementSteps) const override; /// Reassociate floating point divisions into multiply by reciprocal. - bool combineRepeatedFPDivisors(unsigned NumUsers) const override; + unsigned combineRepeatedFPDivisors() const override; }; namespace X86 { diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index faa91500b181..8bf2925a75db 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -79,7 +79,7 @@ class X86VectorVTInfo; // We map scalar types to the smallest (128-bit) vector type // with the appropriate element type. This allows to use the same masking logic. +def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; +def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; @@ -274,6 +276,22 @@ multiclass AVX512_maskable_3src O, Format F, X86VectorVTInfo _, OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, (vselect _.KRCWM:$mask, RHS, _.RC:$src1)>; +// Similar to AVX512_maskable_3rc but in this case the input VT for the tied +// operand differs from the output VT. This requires a bitconvert on +// the preserved vector going into the vselect. +multiclass AVX512_maskable_3src_cast O, Format F, X86VectorVTInfo OutVT, + X86VectorVTInfo InVT, + dag Outs, dag NonTiedIns, string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS> : + AVX512_maskable_common; + multiclass AVX512_maskable_3src_scalar O, Format F, X86VectorVTInfo _, dag Outs, dag NonTiedIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, @@ -471,84 +489,123 @@ def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; //===----------------------------------------------------------------------===// // AVX-512 - VECTOR INSERT // - -multiclass vinsert_for_size_no_alt { +multiclass vinsert_for_size { let hasSideEffects = 0, ExeDomain = To.ExeDomain in { - def rr : AVX512AIi8, - EVEX_4V, EVEX_V512; + defm rr : AVX512_maskable, AVX512AIi8Base, EVEX_4V; - let mayLoad = 1 in - def rm : AVX512AIi8, - EVEX_4V, EVEX_V512, EVEX_CD8; + let mayLoad = 1 in + defm rm : AVX512_maskable, AVX512AIi8Base, EVEX_4V, + EVEX_CD8; } } -multiclass vinsert_for_size : - vinsert_for_size_no_alt { - // Codegen pattern with the alternative types, e.g. v2i64 -> v8i64 for - // vinserti32x4. Only add this if 64x2 and friends are not supported - // natively via AVX512DQ. - let Predicates = [NoDQI] in +multiclass vinsert_for_size_lowering p> { + let Predicates = p in { def : Pat<(vinsert_insert:$ins - (AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)), - (AltTo.VT (!cast(NAME # From.EltSize # "x4rr") - VR512:$src1, From.RC:$src2, - (INSERT_get_vinsert_imm VR512:$ins)))>; + (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), + (To.VT (!cast(InstrStr#"rr") + To.RC:$src1, From.RC:$src2, + (INSERT_get_vinsert_imm To.RC:$ins)))>; + + def : Pat<(vinsert_insert:$ins + (To.VT To.RC:$src1), + (From.VT (bitconvert (From.LdFrag addr:$src2))), + (iPTR imm)), + (To.VT (!cast(InstrStr#"rm") + To.RC:$src1, addr:$src2, + (INSERT_get_vinsert_imm To.RC:$ins)))>; + } } multiclass vinsert_for_type { - defm NAME # "32x4" : vinsert_for_size, + X86VectorVTInfo< 8, EltVT32, VR256X>, + vinsert128_insert>, EVEX_V256; + + defm NAME # "32x4Z" : vinsert_for_size, X86VectorVTInfo<16, EltVT32, VR512>, - X86VectorVTInfo< 2, EltVT64, VR128X>, - X86VectorVTInfo< 8, EltVT64, VR512>, - vinsert128_insert, - INSERT_get_vinsert128_imm>; - let Predicates = [HasDQI] in - defm NAME # "64x2" : vinsert_for_size_no_alt, - X86VectorVTInfo< 8, EltVT64, VR512>, - vinsert128_insert, - INSERT_get_vinsert128_imm>, VEX_W; - defm NAME # "64x4" : vinsert_for_size, EVEX_V512; + + defm NAME # "64x4Z" : vinsert_for_size, X86VectorVTInfo< 8, EltVT64, VR512>, - X86VectorVTInfo< 8, EltVT32, VR256>, - X86VectorVTInfo<16, EltVT32, VR512>, - vinsert256_insert, - INSERT_get_vinsert256_imm>, VEX_W; - let Predicates = [HasDQI] in - defm NAME # "32x8" : vinsert_for_size_no_alt, - X86VectorVTInfo<16, EltVT32, VR512>, - vinsert256_insert, - INSERT_get_vinsert256_imm>; + vinsert256_insert>, VEX_W, EVEX_V512; + + let Predicates = [HasVLX, HasDQI] in + defm NAME # "64x2Z256" : vinsert_for_size, + X86VectorVTInfo< 4, EltVT64, VR256X>, + vinsert128_insert>, VEX_W, EVEX_V256; + + let Predicates = [HasDQI] in { + defm NAME # "64x2Z" : vinsert_for_size, + X86VectorVTInfo< 8, EltVT64, VR512>, + vinsert128_insert>, VEX_W, EVEX_V512; + + defm NAME # "32x8Z" : vinsert_for_size, + X86VectorVTInfo<16, EltVT32, VR512>, + vinsert256_insert>, EVEX_V512; + } } defm VINSERTF : vinsert_for_type; defm VINSERTI : vinsert_for_type; +// Codegen pattern with the alternative types, +// Only add this if 64x2 and its friends are not supported natively via AVX512DQ. +defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>; +defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>; + +defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>; +defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>; + +defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, + vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>; +defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, + vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>; + +// Codegen pattern with the alternative types insert VEC128 into VEC256 +defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; +defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; +// Codegen pattern with the alternative types insert VEC128 into VEC512 +defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; +defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; +// Codegen pattern with the alternative types insert VEC256 into VEC512 +defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, + vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; +defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, + vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; + // vinsertps - insert f32 to XMM def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), @@ -566,90 +623,158 @@ def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), // AVX-512 VECTOR EXTRACT //--- -multiclass vextract_for_size { - let hasSideEffects = 0, ExeDomain = To.ExeDomain in { - defm rr : AVX512_maskable_in_asm, - AVX512AIi8Base, EVEX, EVEX_V512; - let mayStore = 1 in - def rm : AVX512AIi8, EVEX, EVEX_V512, EVEX_CD8; - } - - // Codegen pattern with the alternative types, e.g. v8i64 -> v2i64 for - // vextracti32x4 - def : Pat<(vextract_extract:$ext (AltFrom.VT VR512:$src1), (iPTR imm)), - (AltTo.VT (!cast(NAME # To.EltSize # "x4rr") - VR512:$src1, - (EXTRACT_get_vextract_imm To.RC:$ext)))>; - - // A 128/256-bit subvector extract from the first 512-bit vector position is +multiclass vextract_for_size_first_position_lowering { + // A subvector extract from the first vector position is // a subregister copy that needs no instruction. - def : Pat<(To.VT (extract_subvector (From.VT VR512:$src), (iPTR 0))), - (To.VT - (EXTRACT_SUBREG (From.VT VR512:$src), To.SubRegIdx))>; + def NAME # To.NumElts: + Pat<(To.VT (extract_subvector (From.VT From.RC:$src),(iPTR 0))), + (To.VT (EXTRACT_SUBREG (From.VT From.RC:$src), To.SubRegIdx))>; +} - // And for the alternative types. - def : Pat<(AltTo.VT (extract_subvector (AltFrom.VT VR512:$src), (iPTR 0))), - (AltTo.VT - (EXTRACT_SUBREG (AltFrom.VT VR512:$src), AltTo.SubRegIdx))>; +multiclass vextract_for_size : + vextract_for_size_first_position_lowering { + + let hasSideEffects = 0, ExeDomain = To.ExeDomain in { + // use AVX512_maskable_in_asm (AVX512_maskable can't be used due to + // vextract_extract), we interesting only in patterns without mask, + // intrinsics pattern match generated bellow. + defm rr : AVX512_maskable_in_asm, + AVX512AIi8Base, EVEX; + let mayStore = 1 in { + def rm : AVX512AIi8, EVEX; + + def rmk : AVX512AIi8, EVEX_K, EVEX; + }//mayStore = 1 + } // Intrinsic call with masking. def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # - "x4_512") - VR512:$src1, (iPTR imm:$idx), To.RC:$src0, GR8:$mask), - (!cast(NAME # To.EltSize # "x4rrk") To.RC:$src0, - (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)), - VR512:$src1, imm:$idx)>; + "x" # To.NumElts # "_" # From.Size) + From.RC:$src1, (iPTR imm:$idx), To.RC:$src0, To.MRC:$mask), + (!cast(NAME # To.EltSize # "x" # To.NumElts # + From.ZSuffix # "rrk") + To.RC:$src0, + (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM), + From.RC:$src1, imm:$idx)>; // Intrinsic call with zero-masking. def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # - "x4_512") - VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, GR8:$mask), - (!cast(NAME # To.EltSize # "x4rrkz") - (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)), - VR512:$src1, imm:$idx)>; + "x" # To.NumElts # "_" # From.Size) + From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, To.MRC:$mask), + (!cast(NAME # To.EltSize # "x" # To.NumElts # + From.ZSuffix # "rrkz") + (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM), + From.RC:$src1, imm:$idx)>; // Intrinsic call without masking. def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # - "x4_512") - VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)), - (!cast(NAME # To.EltSize # "x4rr") - VR512:$src1, imm:$idx)>; + "x" # To.NumElts # "_" # From.Size) + From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)), + (!cast(NAME # To.EltSize # "x" # To.NumElts # + From.ZSuffix # "rr") + From.RC:$src1, imm:$idx)>; } -multiclass vextract_for_type { - defm NAME # "32x4" : vextract_for_size p> : + vextract_for_size_first_position_lowering { + + let Predicates = p in + def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), + (To.VT (!cast(InstrStr#"rr") + From.RC:$src1, + (EXTRACT_get_vextract_imm To.RC:$ext)))>; +} + +multiclass vextract_for_type { + defm NAME # "32x4Z" : vextract_for_size, X86VectorVTInfo< 4, EltVT32, VR128X>, - X86VectorVTInfo< 8, EltVT64, VR512>, - X86VectorVTInfo< 2, EltVT64, VR128X>, - vextract128_extract, - EXTRACT_get_vextract128_imm>; - defm NAME # "64x4" : vextract_for_size, + EVEX_V512, EVEX_CD8<32, CD8VT4>; + defm NAME # "64x4Z" : vextract_for_size, X86VectorVTInfo< 4, EltVT64, VR256X>, + vextract256_extract>, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; + let Predicates = [HasVLX] in + defm NAME # "32x4Z256" : vextract_for_size, + X86VectorVTInfo< 4, EltVT32, VR128X>, + vextract128_extract>, + EVEX_V256, EVEX_CD8<32, CD8VT4>; + let Predicates = [HasVLX, HasDQI] in + defm NAME # "64x2Z256" : vextract_for_size, + X86VectorVTInfo< 2, EltVT64, VR128X>, + vextract128_extract>, + VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>; + let Predicates = [HasDQI] in { + defm NAME # "64x2Z" : vextract_for_size, + X86VectorVTInfo< 2, EltVT64, VR128X>, + vextract128_extract>, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; + defm NAME # "32x8Z" : vextract_for_size, - X86VectorVTInfo< 8, EltVT32, VR256>, - vextract256_extract, - EXTRACT_get_vextract256_imm>, VEX_W; + X86VectorVTInfo< 8, EltVT32, VR256X>, + vextract256_extract>, + EVEX_V512, EVEX_CD8<32, CD8VT8>; + } } defm VEXTRACTF : vextract_for_type; defm VEXTRACTI : vextract_for_type; +// extract_subvector codegen patterns with the alternative types. +// Only add this if 64x2 and its friends are not supported natively via AVX512DQ. +defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, + vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>; +defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, + vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>; + +defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, + vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>; +defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, + vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>; + +defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, + vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>; +defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, + vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>; + +// Codegen pattern with the alternative types extract VEC128 from VEC512 +defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, + vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; +defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, + vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; +// Codegen pattern with the alternative types extract VEC256 from VEC512 +defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, + vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; +defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, + vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; + // A 128-bit subvector insert to the first 512-bit vector position // is a subregister copy that needs no instruction. def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)), @@ -677,6 +802,10 @@ def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)), (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)), (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; +def : Pat<(insert_subvector undef, (v16i16 VR256X:$src), (iPTR 0)), + (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; +def : Pat<(insert_subvector undef, (v32i8 VR256X:$src), (iPTR 0)), + (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>; // vextractps - extract 32 bits from XMM def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), @@ -694,50 +823,49 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs), //===---------------------------------------------------------------------===// // AVX-512 BROADCAST //--- -multiclass avx512_fp_broadcast opc, SDNode OpNode, RegisterClass SrcRC, - ValueType svt, X86VectorVTInfo _> { - defm r : AVX512_maskable, - T8PD, EVEX; - let mayLoad = 1 in { - defm m : AVX512_maskable, - T8PD, EVEX; - } +multiclass avx512_broadcast_rm opc, string OpcodeStr, + X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> { + + defm r : AVX512_maskable, + T8PD, EVEX; + let mayLoad = 1 in + defm m : AVX512_maskable, + T8PD, EVEX, EVEX_CD8; } -multiclass avx512_fp_broadcast_vl opc, SDNode OpNode, - AVX512VLVectorVTInfo _> { - defm Z : avx512_fp_broadcast, +multiclass avx512_fp_broadcast_vl opc, string OpcodeStr, + AVX512VLVectorVTInfo _> { + defm Z : avx512_broadcast_rm, EVEX_V512; let Predicates = [HasVLX] in { - defm Z256 : avx512_fp_broadcast, - EVEX_V256; + defm Z256 : avx512_broadcast_rm, + EVEX_V256; } } let ExeDomain = SSEPackedSingle in { - defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, X86VBroadcast, - avx512vl_f32_info>, EVEX_CD8<32, CD8VT1>; + defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, "vbroadcastss", + avx512vl_f32_info>; let Predicates = [HasVLX] in { - defm VBROADCASTSSZ128 : avx512_fp_broadcast<0x18, X86VBroadcast, VR128X, - v4f32, v4f32x_info>, EVEX_V128, - EVEX_CD8<32, CD8VT1>; + defm VBROADCASTSSZ128 : avx512_broadcast_rm<0x18, "vbroadcastss", + v4f32x_info, v4f32x_info>, EVEX_V128; } } let ExeDomain = SSEPackedDouble in { - defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, X86VBroadcast, - avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>; + defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, "vbroadcastsd", + avx512vl_f64_info>, VEX_W; } // avx512_broadcast_pat introduces patterns for broadcast with a scalar argument. -// Later, we can canonize broadcast instructions before ISel phase and +// Later, we can canonize broadcast instructions before ISel phase and // eliminate additional patterns on ISel. // SrcRC_v and SrcRC_s are RegisterClasses for vector and scalar // representations of source @@ -834,70 +962,50 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src), (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))), (VPBROADCASTQrZrkz (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>; -multiclass avx512_int_broadcast_rm opc, string OpcodeStr, - X86MemOperand x86memop, PatFrag ld_frag, - RegisterClass DstRC, ValueType OpVT, ValueType SrcVT, - RegisterClass KRC> { - def rr : AVX5128I, EVEX; - def rrk : AVX5128I, EVEX, EVEX_K; - def rrkz : AVX5128I, EVEX, EVEX_KZ; - let mayLoad = 1 in { - def rm : AVX5128I, EVEX; - def rmk : AVX5128I, EVEX, EVEX_K; - def rmkz : AVX5128I, EVEX, EVEX_KZ; +// Provide aliases for broadcast from the same register class that +// automatically does the extract. +multiclass avx512_int_broadcast_rm_lowering { + def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))), + (!cast(NAME#DestInfo.ZSuffix#"r") + (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>; +} + +multiclass avx512_int_broadcast_rm_vl opc, string OpcodeStr, + AVX512VLVectorVTInfo _, Predicate prd> { + let Predicates = [prd] in { + defm Z : avx512_broadcast_rm, + avx512_int_broadcast_rm_lowering<_.info512, _.info256>, + EVEX_V512; + // Defined separately to avoid redefinition. + defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>; + } + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_broadcast_rm, + avx512_int_broadcast_rm_lowering<_.info256, _.info256>, + EVEX_V256; + defm Z128 : avx512_broadcast_rm, + EVEX_V128; } } -defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem, - loadi32, VR512, v16i32, v4i32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; -defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem, - loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VT1>; +defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", + avx512vl_i8_info, HasBWI>; +defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", + avx512vl_i16_info, HasBWI>; +defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", + avx512vl_i32_info, HasAVX512>; +defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", + avx512vl_i64_info, HasAVX512>, VEX_W; multiclass avx512_subvec_broadcast_rm opc, string OpcodeStr, X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { - let mayLoad = 1 in { - def rm : AVX5128I, EVEX; - def rmk : AVX5128I, EVEX, EVEX_K; - def rmkz : AVX5128I, EVEX, EVEX_KZ; - } + let mayLoad = 1 in + defm rm : AVX512_maskable, + AVX5128IBase, EVEX; } defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", @@ -944,10 +1052,45 @@ defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8", EVEX_V512, EVEX_CD8<32, CD8VT8>; } -def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))), - (VPBROADCASTDZrr VR128X:$src)>; -def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))), - (VPBROADCASTQZrr VR128X:$src)>; +multiclass avx512_broadcast_32x2 opc, string OpcodeStr, + X86VectorVTInfo _Dst, X86VectorVTInfo _Src, + SDNode OpNode = X86SubVBroadcast> { + + defm r : AVX512_maskable, + T8PD, EVEX; + let mayLoad = 1 in + defm m : AVX512_maskable, + T8PD, EVEX, EVEX_CD8<_Src.EltSize, CD8VT2>; +} + +multiclass avx512_common_broadcast_32x2 opc, string OpcodeStr, + AVX512VLVectorVTInfo _> { + let Predicates = [HasDQI] in + defm Z : avx512_broadcast_32x2, + EVEX_V512; + let Predicates = [HasDQI, HasVLX] in + defm Z256 : avx512_broadcast_32x2, + EVEX_V256; +} + +multiclass avx512_common_broadcast_i32x2 opc, string OpcodeStr, + AVX512VLVectorVTInfo _> : + avx512_common_broadcast_32x2 { + + let Predicates = [HasDQI, HasVLX] in + defm Z128 : avx512_broadcast_32x2, EVEX_V128; +} + +defm VPBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", + avx512vl_i32_info>; +defm VPBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", + avx512vl_f32_info>; def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; @@ -959,21 +1102,6 @@ def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))), def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))), (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>; -def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))), - (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>; -def : Pat<(v16i32 (X86VBroadcast (v8i32 VR256X:$src))), - (VPBROADCASTDZrr (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm))>; - -def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))), - (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>; -def : Pat<(v8i64 (X86VBroadcast (v4i64 VR256X:$src))), - (VPBROADCASTQZrr (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm))>; - -def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))), - (VBROADCASTSSZr VR128X:$src)>; -def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))), - (VBROADCASTSDZr VR128X:$src)>; - // Provide fallback in case the load node that is used in the patterns above // is used by additional users, which prevents the pattern selection. def : Pat<(v16f32 (X86VBroadcast FR32X:$src)), @@ -985,170 +1113,178 @@ def : Pat<(v8f64 (X86VBroadcast FR64X:$src)), //===----------------------------------------------------------------------===// // AVX-512 BROADCAST MASK TO VECTOR REGISTER //--- - -multiclass avx512_mask_broadcast opc, string OpcodeStr, - RegisterClass KRC> { -let Predicates = [HasCDI] in -def Zrr : AVX512XS8I opc, string OpcodeStr, + X86VectorVTInfo _, RegisterClass KRC> { + def rr : AVX512XS8I, EVEX, EVEX_V512; - -let Predicates = [HasCDI, HasVLX] in { -def Z128rr : AVX512XS8I, EVEX, EVEX_V128; -def Z256rr : AVX512XS8I, EVEX, EVEX_V256; -} + [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, EVEX; } -let Predicates = [HasCDI] in { -defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", - VK16>; -defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", - VK8>, VEX_W; -} - -//===----------------------------------------------------------------------===// -// AVX-512 - VPERM -// -// -- immediate form -- -multiclass avx512_perm_imm opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in { - def ri : AVX512AIi8, - EVEX; - def mi : AVX512AIi8, - EVEX, EVEX_CD8<_.EltSize, CD8VF>; -} -} - -multiclass avx512_permil OpcImm, bits<8> OpcVar, X86VectorVTInfo _, - X86VectorVTInfo Ctrl> : - avx512_perm_imm { - let ExeDomain = _.ExeDomain in { - def rr : AVX5128I, - EVEX_4V; - def rm : AVX5128I, - EVEX_4V; +multiclass avx512_mask_broadcast opc, string OpcodeStr, + AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { + let Predicates = [HasCDI] in + defm Z : avx512_mask_broadcastm, EVEX_V512; + let Predicates = [HasCDI, HasVLX] in { + defm Z256 : avx512_mask_broadcastm, EVEX_V256; + defm Z128 : avx512_mask_broadcastm, EVEX_V128; } } -defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>, - EVEX_V512; -defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>, - EVEX_V512, VEX_W; -def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), - (VPERMILPSZri VR512:$src1, imm:$imm)>; -def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), - (VPERMILPDZri VR512:$src1, imm:$imm)>; +defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", + avx512vl_i32_info, VK16>; +defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", + avx512vl_i64_info, VK8>, VEX_W; -// -- VPERM2I - 3 source operands form -- -multiclass avx512_perm_3src opc, string OpcodeStr, - SDNode OpNode, X86VectorVTInfo _> { +//===----------------------------------------------------------------------===// +// -- VPERMI2 - 3 source operands form -- +multiclass avx512_perm_i opc, string OpcodeStr, + X86VectorVTInfo _, X86VectorVTInfo IdxVT> { let Constraints = "$src1 = $dst" in { - defm rr: AVX512_maskable_3src, EVEX_4V, + (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V, AVX5128IBase; let mayLoad = 1 in - defm rm: AVX512_maskable_3src, EVEX_4V, AVX5128IBase; } } -multiclass avx512_perm_3src_mb opc, string OpcodeStr, - SDNode OpNode, X86VectorVTInfo _> { +multiclass avx512_perm_i_mb opc, string OpcodeStr, + X86VectorVTInfo _, X86VectorVTInfo IdxVT> { let mayLoad = 1, Constraints = "$src1 = $dst" in - defm rmb: AVX512_maskable_3src, + (_.VT (X86VPermi2X IdxVT.RC:$src1, + _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>, AVX5128IBase, EVEX_4V, EVEX_B; } -multiclass avx512_perm_3src_sizes opc, string OpcodeStr, - SDNode OpNode, AVX512VLVectorVTInfo VTInfo> { - let Predicates = [HasAVX512] in - defm NAME: avx512_perm_3src, - avx512_perm_3src_mb, EVEX_V512; +multiclass avx512_perm_i_sizes opc, string OpcodeStr, + AVX512VLVectorVTInfo VTInfo, + AVX512VLVectorVTInfo ShuffleMask> { + defm NAME: avx512_perm_i, + avx512_perm_i_mb, EVEX_V512; let Predicates = [HasVLX] in { - defm NAME#128: avx512_perm_3src, - avx512_perm_3src_mb, - EVEX_V128; - defm NAME#256: avx512_perm_3src, - avx512_perm_3src_mb, - EVEX_V256; + defm NAME#128: avx512_perm_i, + avx512_perm_i_mb, EVEX_V128; + defm NAME#256: avx512_perm_i, + avx512_perm_i_mb, EVEX_V256; } } -multiclass avx512_perm_3src_sizes_w opc, string OpcodeStr, - SDNode OpNode, AVX512VLVectorVTInfo VTInfo> { + +multiclass avx512_perm_i_sizes_w opc, string OpcodeStr, + AVX512VLVectorVTInfo VTInfo, + AVX512VLVectorVTInfo Idx> { let Predicates = [HasBWI] in - defm NAME: avx512_perm_3src, - avx512_perm_3src_mb, - EVEX_V512; + defm NAME: avx512_perm_i, EVEX_V512; let Predicates = [HasBWI, HasVLX] in { - defm NAME#128: avx512_perm_3src, - avx512_perm_3src_mb, - EVEX_V128; - defm NAME#256: avx512_perm_3src, - avx512_perm_3src_mb, - EVEX_V256; + defm NAME#128: avx512_perm_i, EVEX_V128; + defm NAME#256: avx512_perm_i, EVEX_V256; } } -defm VPERMI2D : avx512_perm_3src_sizes<0x76, "vpermi2d", X86VPermiv3, - avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMI2Q : avx512_perm_3src_sizes<0x76, "vpermi2q", X86VPermiv3, - avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMI2PS : avx512_perm_3src_sizes<0x77, "vpermi2ps", X86VPermiv3, - avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMI2PD : avx512_perm_3src_sizes<0x77, "vpermi2pd", X86VPermiv3, - avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMT2D : avx512_perm_3src_sizes<0x7E, "vpermt2d", X86VPermv3, - avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMT2Q : avx512_perm_3src_sizes<0x7E, "vpermt2q", X86VPermv3, - avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMT2PS : avx512_perm_3src_sizes<0x7F, "vpermt2ps", X86VPermv3, - avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMT2PD : avx512_perm_3src_sizes<0x7F, "vpermt2pd", X86VPermv3, - avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", + avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", + avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2W : avx512_perm_i_sizes_w<0x75, "vpermi2w", + avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; +defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", + avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", + avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMT2W : avx512_perm_3src_sizes_w<0x7D, "vpermt2w", X86VPermv3, - avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; -defm VPERMI2W : avx512_perm_3src_sizes_w<0x75, "vpermi2w", X86VPermiv3, - avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; +// VPERMT2 +multiclass avx512_perm_t opc, string OpcodeStr, + X86VectorVTInfo _, X86VectorVTInfo IdxVT> { +let Constraints = "$src1 = $dst" in { + defm rr: AVX512_maskable_3src, EVEX_4V, + AVX5128IBase; + + let mayLoad = 1 in + defm rm: AVX512_maskable_3src, + EVEX_4V, AVX5128IBase; + } +} +multiclass avx512_perm_t_mb opc, string OpcodeStr, + X86VectorVTInfo _, X86VectorVTInfo IdxVT> { + let mayLoad = 1, Constraints = "$src1 = $dst" in + defm rmb: AVX512_maskable_3src, + AVX5128IBase, EVEX_4V, EVEX_B; +} + +multiclass avx512_perm_t_sizes opc, string OpcodeStr, + AVX512VLVectorVTInfo VTInfo, + AVX512VLVectorVTInfo ShuffleMask> { + defm NAME: avx512_perm_t, + avx512_perm_t_mb, EVEX_V512; + let Predicates = [HasVLX] in { + defm NAME#128: avx512_perm_t, + avx512_perm_t_mb, EVEX_V128; + defm NAME#256: avx512_perm_t, + avx512_perm_t_mb, EVEX_V256; + } +} + +multiclass avx512_perm_t_sizes_w opc, string OpcodeStr, + AVX512VLVectorVTInfo VTInfo, + AVX512VLVectorVTInfo Idx> { + let Predicates = [HasBWI] in + defm NAME: avx512_perm_t, EVEX_V512; + let Predicates = [HasBWI, HasVLX] in { + defm NAME#128: avx512_perm_t, EVEX_V128; + defm NAME#256: avx512_perm_t, EVEX_V256; + } +} + +defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", + avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", + avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMT2W : avx512_perm_t_sizes_w<0x7D, "vpermt2w", + avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; +defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", + avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", + avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// // AVX-512 - BLEND using mask @@ -1265,41 +1401,85 @@ def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1), //===----------------------------------------------------------------------===// // avx512_cmp_scalar - AVX512 CMPSS and CMPSD -multiclass avx512_cmp_scalar { - def rr : AVX512Ii8<0xC2, MRMSrcReg, - (outs VK1:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc), - !strconcat("vcmp${cc}", Suffix, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], - IIC_SSE_ALU_F32S_RR>, EVEX_4V; - def rm : AVX512Ii8<0xC2, MRMSrcMem, - (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc), - !strconcat("vcmp${cc}", Suffix, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VK1:$dst, (OpNode (VT RC:$src1), - (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + +multiclass avx512_cmp_scalar{ + + defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "$src2, $src1", "$src1, $src2", + (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc)>, EVEX_4V; + let mayLoad = 1 in + defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "$src2, $src1", "$src1, $src2", + (OpNode (_.VT _.RC:$src1), + (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))), + imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; + + defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), + "vcmp${cc}"#_.Suffix, + "{sae}, $src2, $src1", "$src1, $src2,{sae}", + (OpNodeRnd (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc, + (i32 FROUND_NO_EXC))>, EVEX_4V, EVEX_B; + // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { - def rri_alt : AVX512Ii8<0xC2, MRMSrcReg, - (outs VK1:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), - !strconcat("vcmp", Suffix, - "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), - [], IIC_SSE_ALU_F32S_RR>, EVEX_4V; + defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, + (outs VK1:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V; + defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc">, + EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; + + defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc,{sae}, $src2, $src1","$src1, $src2,{sae}, $cc">, + EVEX_4V, EVEX_B; + }// let isAsmParserOnly = 1, hasSideEffects = 0 + + let isCodeGenOnly = 1 in { + def rr : AVX512Ii8<0xC2, MRMSrcReg, + (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc), + !strconcat("vcmp${cc}", _.Suffix, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set _.KRC:$dst, (OpNode _.FRC:$src1, + _.FRC:$src2, + imm:$cc))], + IIC_SSE_ALU_F32S_RR>, EVEX_4V; let mayLoad = 1 in - def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem, - (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), - !strconcat("vcmp", Suffix, - "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), - [], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + def rm : AVX512Ii8<0xC2, MRMSrcMem, + (outs _.KRC:$dst), + (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc), + !strconcat("vcmp${cc}", _.Suffix, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set _.KRC:$dst, (OpNode _.FRC:$src1, + (_.ScalarLdFrag addr:$src2), + imm:$cc))], + IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; } } let Predicates = [HasAVX512] in { -defm VCMPSSZ : avx512_cmp_scalar, - XS; -defm VCMPSDZ : avx512_cmp_scalar, - XD, VEX_W; + defm VCMPSSZ : avx512_cmp_scalar, + AVX512XSIi8Base; + defm VCMPSDZ : avx512_cmp_scalar, + AVX512XDIi8Base, VEX_W; } multiclass avx512_icmp_packed opc, string OpcodeStr, SDNode OpNode, @@ -1700,6 +1880,128 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), imm:$cc), VK8)>; +// ---------------------------------------------------------------- +// FPClass +//handle fpclass instruction mask = op(reg_scalar,imm) +// op(mem_scalar,imm) +multiclass avx512_scalar_fpclass opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, Predicate prd> { + let Predicates = [prd] in { + def rr : AVX512; + def rrk : AVX512, EVEX_K; + let mayLoad = 1, AddedComplexity = 20 in { + def rm : AVX512; + def rmk : AVX512, EVEX_K; + } + } +} + +//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) +// fpclass(reg_vec, mem_vec, imm) +// fpclass(reg_vec, broadcast(eltVt), imm) +multiclass avx512_vector_fpclass opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, string mem, string broadcast>{ + def rr : AVX512; + def rrk : AVX512, EVEX_K; + let mayLoad = 1 in { + def rm : AVX512; + def rmk : AVX512, EVEX_K; + def rmb : AVX512,EVEX_B; + def rmbk : AVX512, + EVEX_B, EVEX_K; + } +} + +multiclass avx512_vector_fpclass_all opc, SDNode OpNode, Predicate prd, + string broadcast>{ + let Predicates = [prd] in { + defm Z : avx512_vector_fpclass, EVEX_V512; + } + let Predicates = [prd, HasVLX] in { + defm Z128 : avx512_vector_fpclass, EVEX_V128; + defm Z256 : avx512_vector_fpclass, EVEX_V256; + } +} + +multiclass avx512_fp_fpclass_all opcVec, + bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{ + defm PS : avx512_vector_fpclass_all, EVEX_CD8<32, CD8VF>; + defm PD : avx512_vector_fpclass_all,EVEX_CD8<64, CD8VF> , VEX_W; + defm SS : avx512_scalar_fpclass, EVEX_CD8<32, CD8VT1>; + defm SD : avx512_scalar_fpclass, EVEX_CD8<64, CD8VT1>, VEX_W; +} + +defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass, + X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX; + //----------------------------------------------------------------- // Mask register copy, including // - copy between mask registers @@ -1786,6 +2088,11 @@ let Predicates = [HasDQI] in { (KMOVBmk addr:$dst, VK8:$src)>; def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), (KMOVBkm addr:$src)>; + + def : Pat<(store VK4:$src, addr:$dst), + (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>; + def : Pat<(store VK2:$src, addr:$dst), + (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>; } let Predicates = [HasAVX512, NoDQI] in { def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst), @@ -1837,10 +2144,15 @@ let Predicates = [HasAVX512] in { (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>; def : Pat<(i32 (anyext VK1:$src)), (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>; + def : Pat<(i8 (zext VK1:$src)), (EXTRACT_SUBREG (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>; + def : Pat<(i8 (anyext VK1:$src)), + (EXTRACT_SUBREG + (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>; + def : Pat<(i64 (zext VK1:$src)), (AND64ri8 (SUBREG_TO_REG (i64 0), (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>; @@ -1848,17 +2160,19 @@ let Predicates = [HasAVX512] in { (EXTRACT_SUBREG (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_16bit)>; - def : Pat<(v16i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK16)>; - def : Pat<(v8i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK8)>; -} -let Predicates = [HasBWI] in { - def : Pat<(v32i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK32)>; - def : Pat<(v64i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK64)>; } +def : Pat<(v16i1 (scalar_to_vector VK1:$src)), + (COPY_TO_REGCLASS VK1:$src, VK16)>; +def : Pat<(v8i1 (scalar_to_vector VK1:$src)), + (COPY_TO_REGCLASS VK1:$src, VK8)>; +def : Pat<(v4i1 (scalar_to_vector VK1:$src)), + (COPY_TO_REGCLASS VK1:$src, VK4)>; +def : Pat<(v2i1 (scalar_to_vector VK1:$src)), + (COPY_TO_REGCLASS VK1:$src, VK2)>; +def : Pat<(v32i1 (scalar_to_vector VK1:$src)), + (COPY_TO_REGCLASS VK1:$src, VK32)>; +def : Pat<(v64i1 (scalar_to_vector VK1:$src)), + (COPY_TO_REGCLASS VK1:$src, VK64)>; // With AVX-512 only, 8-bit mask is promoted to 16-bit mask. @@ -1955,11 +2269,12 @@ multiclass avx512_mask_binop opc, string OpcodeStr, } multiclass avx512_mask_binop_all opc, string OpcodeStr, - SDPatternOperator OpNode, bit IsCommutable> { + SDPatternOperator OpNode, bit IsCommutable, + Predicate prdW = HasAVX512> { defm B : avx512_mask_binop, VEX_4V, VEX_L, PD; defm W : avx512_mask_binop, VEX_4V, VEX_L, PS; + prdW, IsCommutable>, VEX_4V, VEX_L, PS; defm D : avx512_mask_binop, VEX_4V, VEX_L, VEX_W, PD; defm Q : avx512_mask_binop; defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor, 1>; defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>; defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn, 0>; +defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>; multiclass avx512_mask_binop_int { let Predicates = [HasAVX512] in @@ -2047,59 +2363,48 @@ def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)), (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; // Mask unpacking -multiclass avx512_mask_unpck opc, string OpcodeStr, - RegisterClass KRC> { - let Predicates = [HasAVX512] in - def rr : I; +multiclass avx512_mask_unpck { + let Predicates = [prd] in { + def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst), + (ins KRC:$src1, KRC:$src2), + "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V, VEX_L; + + def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)), + (!cast(NAME##rr) + (COPY_TO_REGCLASS KRCSrc:$src2, KRC), + (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>; + } } -multiclass avx512_mask_unpck_bw opc, string OpcodeStr> { - defm BW : avx512_mask_unpck, - VEX_4V, VEX_L, PD; -} - -defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">; -def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))), - (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16), - (COPY_TO_REGCLASS VK8:$src1, VK16))>; - - -multiclass avx512_mask_unpck_int { - let Predicates = [HasAVX512] in - def : Pat<(!cast("int_x86_avx512_"##IntName##"_bw") - (i16 GR16:$src1), (i16 GR16:$src2)), - (COPY_TO_REGCLASS (!cast(InstName##"BWrr") - (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)), - (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>; -} -defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">; +defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD; +defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS; +defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W; // Mask bit testing multiclass avx512_mask_testop opc, string OpcodeStr, RegisterClass KRC, - SDNode OpNode> { - let Predicates = [HasAVX512], Defs = [EFLAGS] in + SDNode OpNode, Predicate prd> { + let Predicates = [prd], Defs = [EFLAGS] in def rr : I; } -multiclass avx512_mask_testop_w opc, string OpcodeStr, SDNode OpNode> { - defm W : avx512_mask_testop, - VEX, PS; - let Predicates = [HasDQI] in - defm B : avx512_mask_testop, - VEX, PD; - let Predicates = [HasBWI] in { - defm Q : avx512_mask_testop, - VEX, PS, VEX_W; - defm D : avx512_mask_testop, - VEX, PD, VEX_W; - } +multiclass avx512_mask_testop_w opc, string OpcodeStr, SDNode OpNode, + Predicate prdW = HasAVX512> { + defm B : avx512_mask_testop, + VEX, PD; + defm W : avx512_mask_testop, + VEX, PS; + defm Q : avx512_mask_testop, + VEX, PS, VEX_W; + defm D : avx512_mask_testop, + VEX, PD, VEX_W; } defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>; +defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>; // Mask shift multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, @@ -2124,7 +2429,7 @@ multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, let Predicates = [HasDQI] in defm D : avx512_mask_shiftop, VEX, TAPD; - } + } } defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>; @@ -2167,24 +2472,52 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))), def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))), (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>; +def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 0))), + (v16i1 (COPY_TO_REGCLASS VK32:$src, VK16))>; + +def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 16))), + (v16i1 (COPY_TO_REGCLASS (KSHIFTRDri VK32:$src, (i8 16)), VK16))>; + def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 0))), (v32i1 (COPY_TO_REGCLASS VK64:$src, VK32))>; def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 32))), (v32i1 (COPY_TO_REGCLASS (KSHIFTRQri VK64:$src, (i8 32)), VK32))>; -let Predicates = [HasVLX] in { - def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))), - (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>; - def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))), - (v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>; - def : Pat<(v4i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))), - (v4i1 (COPY_TO_REGCLASS VK2:$src, VK4))>; - def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))), - (v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>; - def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))), - (v2i1 (COPY_TO_REGCLASS VK8:$src, VK2))>; -} +def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))), + (v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>; + +def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))), + (v2i1 (COPY_TO_REGCLASS VK8:$src, VK2))>; + +def : Pat<(v4i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))), + (v4i1 (COPY_TO_REGCLASS VK2:$src, VK4))>; + +def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))), + (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>; +def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))), + (v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>; + +def : Pat<(v32i1 (insert_subvector undef, VK2:$src, (iPTR 0))), + (v32i1 (COPY_TO_REGCLASS VK2:$src, VK32))>; +def : Pat<(v32i1 (insert_subvector undef, VK4:$src, (iPTR 0))), + (v32i1 (COPY_TO_REGCLASS VK4:$src, VK32))>; +def : Pat<(v32i1 (insert_subvector undef, VK8:$src, (iPTR 0))), + (v32i1 (COPY_TO_REGCLASS VK8:$src, VK32))>; +def : Pat<(v32i1 (insert_subvector undef, VK16:$src, (iPTR 0))), + (v32i1 (COPY_TO_REGCLASS VK16:$src, VK32))>; + +def : Pat<(v64i1 (insert_subvector undef, VK2:$src, (iPTR 0))), + (v64i1 (COPY_TO_REGCLASS VK2:$src, VK64))>; +def : Pat<(v64i1 (insert_subvector undef, VK4:$src, (iPTR 0))), + (v64i1 (COPY_TO_REGCLASS VK4:$src, VK64))>; +def : Pat<(v64i1 (insert_subvector undef, VK8:$src, (iPTR 0))), + (v64i1 (COPY_TO_REGCLASS VK8:$src, VK64))>; +def : Pat<(v64i1 (insert_subvector undef, VK16:$src, (iPTR 0))), + (v64i1 (COPY_TO_REGCLASS VK16:$src, VK64))>; +def : Pat<(v64i1 (insert_subvector undef, VK32:$src, (iPTR 0))), + (v64i1 (COPY_TO_REGCLASS VK32:$src, VK64))>; + def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))), (v8i1 (COPY_TO_REGCLASS @@ -2304,23 +2637,21 @@ multiclass avx512_load_vl opc, string OpcodeStr, multiclass avx512_store opc, string OpcodeStr, X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore> { - let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { - def rr_alt : AVX512PI, EVEX; - let Constraints = "$src1 = $dst" in - def rrk_alt : AVX512PI, EVEX; + def rrk_REV : AVX512PI, EVEX, EVEX_K; - def rrkz_alt : AVX512PI, EVEX, EVEX_KZ; - } + let mayStore = 1 in { def mr : AVX512PI; -let Predicates = [HasAVX512, NoVLX] in { -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)), - (VMOVUPSZmrk addr:$ptr, - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), - (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; - -def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; - -def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src0))), - (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmk - (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src0, sub_ymm), - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; -} - defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, HasAVX512>, avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, @@ -2502,17 +2817,6 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), (v16i32 VR512:$src))), (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; } -// NoVLX patterns -let Predicates = [HasAVX512, NoVLX] in { -def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), - (VMOVDQU32Zmrk addr:$ptr, - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), - (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; - -def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; -} // Move Int Doubleword to Packed Double Int // @@ -2520,32 +2824,37 @@ def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src "vmovd\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, - EVEX, VEX_LIG; + EVEX; def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], - IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG; + IIC_SSE_MOVDQ>, EVEX, VEX_W; +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in +def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), + (ins i64mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", []>, + EVEX, VEX_W, EVEX_CD8<64, CD8VT1>; let isCodeGenOnly = 1 in { -def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), +def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), "vmovq\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (bitconvert GR64:$src))], + [(set FR64X:$dst, (bitconvert GR64:$src))], IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; -def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), +def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), "vmovq\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (bitconvert FR64:$src))], + [(set GR64:$dst, (bitconvert FR64X:$src))], IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; -} -def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), +def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src), "vmovq\t{$src, $dst|$dst, $src}", - [(store (i64 (bitconvert FR64:$src)), addr:$dst)], + [(store (i64 (bitconvert FR64X:$src)), addr:$dst)], IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>, EVEX_CD8<64, CD8VT1>; +} // Move Int Doubleword to Single Scalar // @@ -2553,27 +2862,27 @@ let isCodeGenOnly = 1 in { def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set FR32X:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>, EVEX, VEX_LIG; + IIC_SSE_MOVDQ>, EVEX; def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))], - IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; } // Move doubleword from xmm register to r/m32 // def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), "vmovd\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src), + [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, - EVEX, VEX_LIG; + EVEX; def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128X:$src), "vmovd\t{$src, $dst|$dst, $src}", - [(store (i32 (vector_extract (v4i32 VR128X:$src), + [(store (i32 (extractelt (v4i32 VR128X:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, - EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + EVEX, EVEX_CD8<32, CD8VT1>; // Move quadword from xmm1 register to r/m64 // @@ -2581,16 +2890,28 @@ def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), (iPTR 0)))], - IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W, + IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Requires<[HasAVX512, In64BitMode]>; -def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs), - (ins i64mem:$dst, VR128X:$src), - "vmovq\t{$src, $dst|$dst, $src}", - [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), - addr:$dst)], IIC_SSE_MOVDQ>, - EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>, - Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>; +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in +def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, + Requires<[HasAVX512, In64BitMode]>; + +def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), + (ins i64mem:$dst, VR128X:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), + addr:$dst)], IIC_SSE_MOVDQ>, + EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>, + Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>; + +let hasSideEffects = 0 in +def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovq.s\t{$src, $dst|$dst, $src}",[]>, + EVEX, VEX_W; // Move Scalar Single to Double Int // @@ -2599,92 +2920,95 @@ def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32X:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32X:$src))], - IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG; + IIC_SSE_MOVD_ToGP>, EVEX; def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32X:$src), "vmovd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32X:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; + IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; } // Move Quadword Int to Packed Quadword Int // -def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), +def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, - EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + EVEX, VEX_W, EVEX_CD8<8, CD8VT8>; //===----------------------------------------------------------------------===// // AVX-512 MOVSS, MOVSD //===----------------------------------------------------------------------===// -multiclass avx512_move_scalar { - let hasSideEffects = 0 in { - def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128X:$dst, (vt (OpNode VR128X:$src1, - (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG; - let Constraints = "$src1 = $dst" in - def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst), - (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3), - !strconcat(asm, - "\t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"), - [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K; - def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>, - EVEX, VEX_LIG; +multiclass avx512_move_scalar { + defm rr_Int : AVX512_maskable_scalar<0x10, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2), + asm, "$src2, $src1","$src1, $src2", + (_.VT (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2))), + IIC_SSE_MOV_S_RR>, EVEX_4V; + let Constraints = "$src1 = $dst" , mayLoad = 1 in + defm rm_Int : AVX512_maskable_3src_scalar<0x10, MRMSrcMem, _, + (outs _.RC:$dst), + (ins _.ScalarMemOp:$src), + asm,"$src","$src", + (_.VT (OpNode (_.VT _.RC:$src1), + (_.VT (scalar_to_vector + (_.ScalarLdFrag addr:$src)))))>, EVEX; + let isCodeGenOnly = 1 in { + def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), + (ins _.RC:$src1, _.FRC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, + (scalar_to_vector _.FRC:$src2))))], + _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V; + let mayLoad = 1 in + def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], + _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX; + } let mayStore = 1 in { - def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - EVEX, VEX_LIG; - def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src), - !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), - [], IIC_SSE_MOV_S_MR>, - EVEX, VEX_LIG, EVEX_K; + def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>, + EVEX; + def mrk: AVX512PI<0x11, MRMDestMem, (outs), + (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src), + !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), + [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K; } // mayStore - } //hasSideEffects = 0 } -let ExeDomain = SSEPackedSingle in -defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem, - loadf32>, XS, EVEX_CD8<32, CD8VT1>; +defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>, + VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; -let ExeDomain = SSEPackedDouble in -defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem, - loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>, + VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), - (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), - VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>; + (COPY_TO_REGCLASS (VMOVSSZrr_Intk (COPY_TO_REGCLASS FR32X:$src2, VR128X), + VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>; def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), - (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), - VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; + (COPY_TO_REGCLASS (VMOVSDZrr_Intk (COPY_TO_REGCLASS FR64X:$src2, VR128X), + VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>; def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; -// For the disassembler -let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { - def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), - (ins VR128X:$src1, FR32X:$src2), - "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_MOV_S_RR>, - XS, EVEX_4V, VEX_LIG; - def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), - (ins VR128X:$src1, FR64X:$src2), - "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_MOV_S_RR>, - XD, EVEX_4V, VEX_LIG, VEX_W; -} +defm VMOVSSZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f32x_info, + (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), + "vmovss.s", "$src2, $src1", "$src1, $src2", []>, + XS, EVEX_4V, VEX_LIG; + +defm VMOVSSDrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f64x_info, + (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), + "vmovsd.s", "$src2, $src1", "$src1, $src2", []>, + XD, EVEX_4V, VEX_LIG, VEX_W; let Predicates = [HasAVX512] in { let AddedComplexity = 15 in { @@ -2768,10 +3092,10 @@ let Predicates = [HasAVX512] in { (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>; // Extract and store. - def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))), + def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))), addr:$dst), (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>; - def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))), + def : Pat<(store (f64 (extractelt (v2f64 VR128X:$src), (iPTR 0))), addr:$dst), (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>; @@ -2835,7 +3159,7 @@ def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), (v2i64 VR128X:$src))))], IIC_SSE_MOVQ_RR>, EVEX, VEX_W; -let AddedComplexity = 20 in +let AddedComplexity = 20 , isCodeGenOnly = 1 in def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), (ins i128mem:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -2964,7 +3288,7 @@ multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, OpndItins itins, bit IsCommutable = 0> { defm rr : AVX512_maskable, @@ -2972,7 +3296,7 @@ multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, let mayLoad = 1 in defm rm : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, avx512_binop_rm { let mayLoad = 1 in defm rmb : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, multiclass avx512_binop_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate prd, bit IsCommutable = 0> { - defm Q : avx512_binop_rm_vl_q; - defm D : avx512_binop_rm_vl_d; } multiclass avx512_binop_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate prd, bit IsCommutable = 0> { - defm W : avx512_binop_rm_vl_w; - defm B : avx512_binop_rm_vl_b; } @@ -3086,15 +3410,15 @@ multiclass avx512_binop_rm_vl_all opc_b, bits<8> opc_w, } multiclass avx512_binop_rm2 opc, string OpcodeStr, OpndItins itins, - SDNode OpNode,X86VectorVTInfo _Src, + SDNode OpNode,X86VectorVTInfo _Src, X86VectorVTInfo _Dst, bit IsCommutable = 0> { - defm rr : AVX512_maskable, + itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V; let mayLoad = 1 in { defm rm : AVX512_maskable opc, string OpcodeStr, OpndItins itins, AVX512BIBase, EVEX_4V; defm rmb : AVX512_maskable, AVX512BIBase, EVEX_4V, EVEX_B; @@ -3127,24 +3451,24 @@ defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds, defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs, SSE_INTALU_ITINS_P, HasBWI, 0>; defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus, - SSE_INTALU_ITINS_P, HasBWI, 1>; + SSE_INTALU_ITINS_P, HasBWI, 1>; defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus, - SSE_INTALU_ITINS_P, HasBWI, 0>; -defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmull", mul, - SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul, - SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul, - SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; -defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulh", mulhs, SSE_INTALU_ITINS_P, - HasBWI, 1>; -defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhu", mulhu, SSE_INTMUL_ITINS_P, - HasBWI, 1>; -defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrs", X86mulhrs, SSE_INTMUL_ITINS_P, - HasBWI, 1>, T8PD; -defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, + SSE_INTALU_ITINS_P, HasBWI, 0>; +defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, + SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; +defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, SSE_INTALU_ITINS_P, HasBWI, 1>; - +defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, + SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; +defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P, + HasBWI, 1>; +defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P, + HasBWI, 1>; +defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P, + HasBWI, 1>, T8PD; +defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, + SSE_INTALU_ITINS_P, HasBWI, 1>; + multiclass avx512_binop_all opc, string OpcodeStr, OpndItins itins, SDNode OpNode, bit IsCommutable = 0> { @@ -3159,7 +3483,7 @@ multiclass avx512_binop_all opc, string OpcodeStr, OpndItins itins, v4i32x_info, v2i64x_info, IsCommutable>, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; } -} +} defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P, X86pmuldq, 1>,T8PD; @@ -3170,25 +3494,25 @@ multiclass avx512_packs_rmb opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _Src, X86VectorVTInfo _Dst> { let mayLoad = 1 in { defm rmb : AVX512_maskable, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>; } } -multiclass avx512_packs_rm opc, string OpcodeStr, - SDNode OpNode,X86VectorVTInfo _Src, +multiclass avx512_packs_rm opc, string OpcodeStr, + SDNode OpNode,X86VectorVTInfo _Src, X86VectorVTInfo _Dst> { - defm rr : AVX512_maskable, EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V; let mayLoad = 1 in { @@ -3229,102 +3553,59 @@ multiclass avx512_packs_all_i16_i8 opc, string OpcodeStr, v16i8x_info>, EVEX_V128; } } + +multiclass avx512_vpmadd opc, string OpcodeStr, + SDNode OpNode, AVX512VLVectorVTInfo _Src, + AVX512VLVectorVTInfo _Dst> { + defm NAME#Z : avx512_packs_rm, EVEX_V512; + let Predicates = [HasVLX] in { + defm NAME#Z256 : avx512_packs_rm, EVEX_V256; + defm NAME#Z128 : avx512_packs_rm, EVEX_V128; + } +} + let Predicates = [HasBWI] in { defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, PD; defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, T8PD; defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase, VEX_W; defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W; + + defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, + avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD; + defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, + avx512vl_i16_info, avx512vl_i32_info>, AVX512BIBase; } -defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", smax, +defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; -defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", smax, +defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, SSE_INTALU_ITINS_P, HasBWI, 1>; defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", umax, +defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", umax, +defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", smin, +defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; -defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", smin, +defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, SSE_INTALU_ITINS_P, HasBWI, 1>; defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", umin, +defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", umin, +defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; - -//===----------------------------------------------------------------------===// -// AVX-512 - Unpack Instructions -//===----------------------------------------------------------------------===// - -multiclass avx512_unpack_fp opc, SDNode OpNode, ValueType vt, - PatFrag mem_frag, RegisterClass RC, - X86MemOperand x86memop, string asm, - Domain d> { - def rr : AVX512PI, EVEX_4V; - def rm : AVX512PI, EVEX_4V; -} - -defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, loadv8f64, - VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, loadv8f64, - VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, loadv8f64, - VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, loadv8f64, - VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -multiclass avx512_unpack_int opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop> { - def rr : AVX512BI, EVEX_4V; - def rm : AVX512BI, EVEX_4V; -} -defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32, - VR512, loadv16i32, i512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64, - VR512, loadv8i64, i512mem>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; -defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32, - VR512, loadv16i32, i512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64, - VR512, loadv8i64, i512mem>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// @@ -3362,12 +3643,12 @@ multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, let isCodeGenOnly = 1, isCommutable = IsCommutable, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), - (ins _.FRC:$src1, _.FRC:$src2), + (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))], itins.rr>; def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), - (ins _.FRC:$src1, _.ScalarMemOp:$src2), + (ins _.FRC:$src1, _.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))], itins.rr>; @@ -3375,7 +3656,7 @@ multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, } multiclass avx512_fp_scalar_round opc, string OpcodeStr,X86VectorVTInfo _, - SDNode VecNode, OpndItins itins, bit IsCommutable> { + SDNode VecNode, OpndItins itins, bit IsCommutable = 0> { defm rrb : AVX512_maskable_scalar opc, string OpcodeStr, SDNode OpNodeRnd, EVEX_4V, EVEX_B; } -multiclass avx512_fp_binop_p opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_fp_binop_p opc, string OpcodeStr, SDNode OpNode, bit IsCommutable = 0> { defm PSZ : avx512_fp_packed, EVEX_V512, PS, @@ -3514,7 +3795,7 @@ defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>, avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>; defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>, avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>; -defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>, +defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>, avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>; defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>, avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>; @@ -3550,13 +3831,34 @@ multiclass avx512_fp_scalef_p opc, string OpcodeStr, SDNode OpNode, }//let mayLoad = 1 } -multiclass avx512_fp_scalef_all opc, string OpcodeStr, SDNode OpNode> { - defm PSZ : avx512_fp_scalef_p, +multiclass avx512_fp_scalef_scalar opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr: AVX512_maskable_scalar; + let mayLoad = 1 in { + defm rm: AVX512_maskable_scalar; + }//let mayLoad = 1 +} + +multiclass avx512_fp_scalef_all opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode> { + defm PSZ : avx512_fp_scalef_p, avx512_fp_round_packed, EVEX_V512, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_scalef_p, + defm PDZ : avx512_fp_scalef_p, avx512_fp_round_packed, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + defm SSZ128 : avx512_fp_scalef_scalar, + avx512_fp_scalar_round, + EVEX_4V,EVEX_CD8<32, CD8VT1>; + defm SDZ128 : avx512_fp_scalef_scalar, + avx512_fp_scalar_round, + EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; + // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_fp_scalef_p, @@ -3569,7 +3871,7 @@ multiclass avx512_fp_scalef_all opc, string OpcodeStr, SDNode OpNode> { EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; } } -defm VSCALEF : avx512_fp_scalef_all<0x2C, "vscalef", X86scalef>, T8PD; +defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD; //===----------------------------------------------------------------------===// // AVX-512 VPTESTM instructions @@ -3586,7 +3888,7 @@ multiclass avx512_vptest opc, string OpcodeStr, SDNode OpNode, defm rm : AVX512_maskable_cmp, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; @@ -3748,12 +4050,12 @@ multiclass avx512_shift_rmi_sizes opc, Format ImmFormR, Format ImmFormM, VTInfo.info256>, EVEX_V256; defm Z128: avx512_shift_rmi, - avx512_shift_rmbi, EVEX_V128; } } -multiclass avx512_shift_rmi_w opcw, +multiclass avx512_shift_rmi_w opcw, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode> { let Predicates = [HasBWI] in @@ -3846,6 +4148,27 @@ multiclass avx512_var_shift_types opc, string OpcodeStr, avx512vl_i64_info>, VEX_W; } +// Use 512bit version to implement 128/256 bit in case NoVLX. +multiclass avx512_var_shift_w_lowering { + let Predicates = [HasBWI, NoVLX] in { + def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), + (_.info256.VT _.info256.RC:$src2))), + (EXTRACT_SUBREG + (!cast(NAME#"WZrr") + (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), + (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), + sub_ymm)>; + + def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), + (_.info128.VT _.info128.RC:$src2))), + (EXTRACT_SUBREG + (!cast(NAME#"WZrr") + (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), + (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), + sub_xmm)>; + } +} + multiclass avx512_var_shift_w opc, string OpcodeStr, SDNode OpNode> { let Predicates = [HasBWI] in @@ -3861,11 +4184,14 @@ multiclass avx512_var_shift_w opc, string OpcodeStr, } defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>, - avx512_var_shift_w<0x12, "vpsllvw", shl>; + avx512_var_shift_w<0x12, "vpsllvw", shl>, + avx512_var_shift_w_lowering; defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>, - avx512_var_shift_w<0x11, "vpsravw", sra>; + avx512_var_shift_w<0x11, "vpsravw", sra>, + avx512_var_shift_w_lowering; defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>, - avx512_var_shift_w<0x10, "vpsrlvw", srl>; + avx512_var_shift_w<0x10, "vpsrlvw", srl>, + avx512_var_shift_w_lowering; defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>; defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>; @@ -3916,19 +4242,77 @@ defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", X86VPermi, avx512vl_f64_info>, EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; +//===----------------------------------------------------------------------===// +// AVX-512 - VPERMIL +//===----------------------------------------------------------------------===// +multiclass avx512_permil_vec OpcVar, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, X86VectorVTInfo Ctrl> { + defm rr: AVX512_maskable, + T8PD, EVEX_4V; + let mayLoad = 1 in { + defm rm: AVX512_maskable, + T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + defm rmb: AVX512_maskable, + T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; + }//let mayLoad = 1 +} + +multiclass avx512_permil_vec_common OpcVar, + AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ + let Predicates = [HasAVX512] in { + defm Z : avx512_permil_vec, EVEX_V512; + } + let Predicates = [HasAVX512, HasVLX] in { + defm Z128 : avx512_permil_vec, EVEX_V128; + defm Z256 : avx512_permil_vec, EVEX_V256; + } +} + +multiclass avx512_permil OpcImm, bits<8> OpcVar, + AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ + + defm NAME: avx512_permil_vec_common; + defm NAME: avx512_shift_rmi_sizes, + EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; +} + +defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, + avx512vl_i32_info>; +defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, + avx512vl_i64_info>, VEX_W; //===----------------------------------------------------------------------===// // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW //===----------------------------------------------------------------------===// defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", - X86PShufd, avx512vl_i32_info>, + X86PShufd, avx512vl_i32_info>, EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", - X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W; + X86PShufhw>, EVEX, AVX512XSIi8Base; defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", - X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W; - + X86PShuflw>, EVEX, AVX512XDIi8Base; + multiclass avx512_pshufb_sizes opc, string OpcodeStr, SDNode OpNode> { let Predicates = [HasBWI] in defm Z: avx512_var_shift, EVEX_V512; @@ -3941,55 +4325,6 @@ multiclass avx512_pshufb_sizes opc, string OpcodeStr, SDNode OpNode> { defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>; -//===----------------------------------------------------------------------===// -// AVX-512 - MOVDDUP -//===----------------------------------------------------------------------===// - -multiclass avx512_movddup { -def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX; -def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, - (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX; -} - -defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, loadv8f64>, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; -def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))), - (VMOVDDUPZrm addr:$src)>; - -//===---------------------------------------------------------------------===// -// Replicate Single FP - MOVSHDUP and MOVSLDUP -//===---------------------------------------------------------------------===// -multiclass avx512_replicate_sfp op, SDNode OpNode, string OpcodeStr, - ValueType vt, RegisterClass RC, PatFrag mem_frag, - X86MemOperand x86memop> { - def rr : AVX512XSI, EVEX; - let mayLoad = 1 in - def rm : AVX512XSI, EVEX; -} - -defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup", - v16f32, VR512, loadv16f32, f512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup", - v16f32, VR512, loadv16f32, f512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - -def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>; -def : Pat<(v16i32 (X86Movshdup (loadv16i32 addr:$src))), - (VMOVSHDUPZrm addr:$src)>; -def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>; -def : Pat<(v16i32 (X86Movsldup (loadv16i32 addr:$src))), - (VMOVSLDUPZrm addr:$src)>; - //===----------------------------------------------------------------------===// // Move Low to High and High to Low packed FP Instructions //===----------------------------------------------------------------------===// @@ -4016,6 +4351,115 @@ let Predicates = [HasAVX512] in { (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>; } +//===----------------------------------------------------------------------===// +// VMOVHPS/PD VMOVLPS Instructions +// All patterns was taken from SSS implementation. +//===----------------------------------------------------------------------===// +multiclass avx512_mov_hilo_packed opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + let mayLoad = 1 in + def rm : AVX512, EVEX_4V; +} + +defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps, + v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; +defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Movlhpd, + v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; +defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps, + v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; +defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd, + v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; + +let Predicates = [HasAVX512] in { + // VMOVHPS patterns + def : Pat<(X86Movlhps VR128X:$src1, + (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), + (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>; + def : Pat<(X86Movlhps VR128X:$src1, + (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>; + // VMOVHPD patterns + def : Pat<(v2f64 (X86Unpckl VR128X:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Unpckl VR128X:$src1, + (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), + (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; + // VMOVLPS patterns + def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))), + (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86Movlps VR128X:$src1, (load addr:$src2))), + (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>; + // VMOVLPD patterns + def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))), + (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; + def : Pat<(v2i64 (X86Movlpd VR128X:$src1, (load addr:$src2))), + (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Movsd VR128X:$src1, + (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; +} + +let mayStore = 1 in { +def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), + (ins f64mem:$dst, VR128X:$src), + "vmovhps\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)), + (bc_v2f64 (v4f32 VR128X:$src))), + (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, + EVEX, EVEX_CD8<32, CD8VT2>; +def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), + (ins f64mem:$dst, VR128X:$src), + "vmovhpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), + (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, + EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; +def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), + (ins f64mem:$dst, VR128X:$src), + "vmovlps\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128X:$src)), + (iPTR 0))), addr:$dst)], + IIC_SSE_MOV_LH>, + EVEX, EVEX_CD8<32, CD8VT2>; +def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), + (ins f64mem:$dst, VR128X:$src), + "vmovlpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (v2f64 VR128X:$src), + (iPTR 0))), addr:$dst)], + IIC_SSE_MOV_LH>, + EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; +} +let Predicates = [HasAVX512] in { + // VMOVHPD patterns + def : Pat<(store (f64 (vector_extract + (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), + (iPTR 0))), addr:$dst), + (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; + // VMOVLPS patterns + def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)), + addr:$src1), + (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>; + def : Pat<(store (v4i32 (X86Movlps + (bc_v4i32 (loadv2i64 addr:$src1)), VR128X:$src2)), addr:$src1), + (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>; + // VMOVLPD patterns + def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)), + addr:$src1), + (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>; + def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128X:$src2)), + addr:$src1), + (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>; +} //===----------------------------------------------------------------------===// // FMA - Fused Multiply Operations // @@ -4034,7 +4478,7 @@ multiclass avx512_fma3p_213_rm opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>, - AVX512FMA3Base; + AVX512FMA3Base; defm mb: AVX512_maskable_3src opc, RegisterClass SrcRC, RegisterClass DstRC, - Intrinsic Int, Operand memop, ComplexPattern mem_cpat, - string asm> { -let hasSideEffects = 0 in { - def rr : SI, EVEX, VEX_LIG, - Requires<[HasAVX512]>; - let mayLoad = 1 in - def rm : SI, EVEX, VEX_LIG, - Requires<[HasAVX512]>; -} // hasSideEffects = 0 +multiclass avx512_cvt_s_int_round opc, RegisterClass SrcRC, + RegisterClass DstRC, Intrinsic Int, + Operand memop, ComplexPattern mem_cpat, string asm> { + let hasSideEffects = 0, Predicates = [HasAVX512] in { + def rr : SI, EVEX, VEX_LIG; + def rb : SI, + EVEX, VEX_LIG, EVEX_B, EVEX_RC; + let mayLoad = 1 in + def rm : SI, EVEX, VEX_LIG; + } // hasSideEffects = 0, Predicates = [HasAVX512] } -let Predicates = [HasAVX512] in { + // Convert float/double to signed/unsigned int 32/64 -defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si, +defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse_cvtss2si, ssmem, sse_load_f32, "cvtss2si">, XS, EVEX_CD8<32, CD8VT1>; -defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64, +defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64, + int_x86_sse_cvtss2si64, ssmem, sse_load_f32, "cvtss2si">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; -defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi, +defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, VR128X, GR32, + int_x86_avx512_cvtss2usi, ssmem, sse_load_f32, "cvtss2usi">, XS, EVEX_CD8<32, CD8VT1>; -defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64, +defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64, int_x86_avx512_cvtss2usi64, ssmem, sse_load_f32, "cvtss2usi">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; -defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si, +defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si">, XD, EVEX_CD8<64, CD8VT1>; -defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64, +defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64, + int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi, +defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, VR128X, GR32, + int_x86_avx512_cvtsd2usi, sdmem, sse_load_f64, "cvtsd2usi">, XD, EVEX_CD8<64, CD8VT1>; -defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64, +defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64, int_x86_avx512_cvtsd2usi64, sdmem, sse_load_f64, "cvtsd2usi">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1 , Predicates = [HasAVX512] in { defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", SSE_CVT_Scalar, 0>, XS, EVEX_4V; @@ -4495,121 +4944,170 @@ let isCodeGenOnly = 1 in { defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}", SSE_CVT_Scalar, 0>, XD, EVEX_4V; -} // isCodeGenOnly = 1 +} // isCodeGenOnly = 1, Predicates = [HasAVX512] // Convert float/double to signed/unsigned int 32/64 with truncation -let isCodeGenOnly = 1 in { - defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si, - ssmem, sse_load_f32, "cvttss2si">, - XS, EVEX_CD8<32, CD8VT1>; - defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64, - int_x86_sse_cvttss2si64, ssmem, sse_load_f32, - "cvttss2si">, XS, VEX_W, - EVEX_CD8<32, CD8VT1>; - defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si, - sdmem, sse_load_f64, "cvttsd2si">, XD, - EVEX_CD8<64, CD8VT1>; - defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64, - int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, - "cvttsd2si">, XD, VEX_W, - EVEX_CD8<64, CD8VT1>; - defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32, - int_x86_avx512_cvttss2usi, ssmem, sse_load_f32, - "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>; - defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64, - int_x86_avx512_cvttss2usi64, ssmem, - sse_load_f32, "cvttss2usi">, XS, VEX_W, - EVEX_CD8<32, CD8VT1>; - defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32, - int_x86_avx512_cvttsd2usi, - sdmem, sse_load_f64, "cvttsd2usi">, XD, - EVEX_CD8<64, CD8VT1>; - defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64, - int_x86_avx512_cvttsd2usi64, sdmem, - sse_load_f64, "cvttsd2usi">, XD, VEX_W, - EVEX_CD8<64, CD8VT1>; -} // isCodeGenOnly = 1 +multiclass avx512_cvt_s_all opc, string asm, X86VectorVTInfo _SrcRC, + X86VectorVTInfo _DstRC, SDNode OpNode, + SDNode OpNodeRnd>{ +let Predicates = [HasAVX512] in { + def rr : SI, EVEX; + def rb : SI, EVEX, EVEX_B; + def rm : SI, + EVEX; -multiclass avx512_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, - SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, - string asm> { - def rr : SI, EVEX; - def rm : SI, EVEX; + let isCodeGenOnly = 1,hasSideEffects = 0 in { + def rr_Int : SI, EVEX, VEX_LIG; + def rb_Int : SI, + EVEX,VEX_LIG , EVEX_B; + let mayLoad = 1 in + def rm_Int : SI, EVEX, VEX_LIG; + + } // isCodeGenOnly = 1, hasSideEffects = 0 +} //HasAVX512 } -defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem, - loadf32, "cvttss2si">, XS, - EVEX_CD8<32, CD8VT1>; -defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem, - loadf32, "cvttss2usi">, XS, - EVEX_CD8<32, CD8VT1>; -defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem, - loadf32, "cvttss2si">, XS, VEX_W, - EVEX_CD8<32, CD8VT1>; -defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem, - loadf32, "cvttss2usi">, XS, VEX_W, - EVEX_CD8<32, CD8VT1>; -defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem, - loadf64, "cvttsd2si">, XD, - EVEX_CD8<64, CD8VT1>; -defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem, - loadf64, "cvttsd2usi">, XD, - EVEX_CD8<64, CD8VT1>; -defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem, - loadf64, "cvttsd2si">, XD, VEX_W, - EVEX_CD8<64, CD8VT1>; -defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem, - loadf64, "cvttsd2usi">, XD, VEX_W, - EVEX_CD8<64, CD8VT1>; + +defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i32x_info, + fp_to_sint,X86cvttss2IntRnd>, + XS, EVEX_CD8<32, CD8VT1>; +defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i64x_info, + fp_to_sint,X86cvttss2IntRnd>, + VEX_W, XS, EVEX_CD8<32, CD8VT1>; +defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i32x_info, + fp_to_sint,X86cvttsd2IntRnd>, + XD, EVEX_CD8<64, CD8VT1>; +defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i64x_info, + fp_to_sint,X86cvttsd2IntRnd>, + VEX_W, XD, EVEX_CD8<64, CD8VT1>; + +defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i32x_info, + fp_to_uint,X86cvttss2UIntRnd>, + XS, EVEX_CD8<32, CD8VT1>; +defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i64x_info, + fp_to_uint,X86cvttss2UIntRnd>, + XS,VEX_W, EVEX_CD8<32, CD8VT1>; +defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i32x_info, + fp_to_uint,X86cvttsd2UIntRnd>, + XD, EVEX_CD8<64, CD8VT1>; +defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i64x_info, + fp_to_uint,X86cvttsd2UIntRnd>, + XD, VEX_W, EVEX_CD8<64, CD8VT1>; +let Predicates = [HasAVX512] in { + def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))), + (VCVTTSS2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>; + def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))), + (VCVTTSS2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>; + def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))), + (VCVTTSD2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>; + def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))), + (VCVTTSD2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>; + } // HasAVX512 //===----------------------------------------------------------------------===// // AVX-512 Convert form float to double and back //===----------------------------------------------------------------------===// -let hasSideEffects = 0 in { -def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst), - (ins FR32X:$src1, FR32X:$src2), - "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; -let mayLoad = 1 in -def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst), - (ins FR32X:$src1, f32mem:$src2), - "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, - EVEX_CD8<32, CD8VT1>; - -// Convert scalar double to scalar single -def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst), - (ins FR64X:$src1, FR64X:$src2), - "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>; -let mayLoad = 1 in -def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst), - (ins FR64X:$src1, f64mem:$src2), - "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX_4V, VEX_LIG, VEX_W, - Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>; +multiclass avx512_cvt_fp_scalar opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNode> { + defm rr : AVX512_maskable_scalar, + EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; + defm rm : AVX512_maskable_scalar, + EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>; } -def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>, - Requires<[HasAVX512]>; -def : Pat<(fextend (loadf32 addr:$src)), - (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>; +// Scalar Coversion with SAE - suppress all exceptions +multiclass avx512_cvt_fp_sae_scalar opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNodeRnd> { + defm rrb : AVX512_maskable_scalar, + EVEX_4V, VEX_LIG, EVEX_B; +} -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, +// Scalar Conversion with rounding control (RC) +multiclass avx512_cvt_fp_rc_scalar opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNodeRnd> { + defm rrb : AVX512_maskable_scalar, + EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, + EVEX_B, EVEX_RC; +} +multiclass avx512_cvt_fp_scalar_sd2ss opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, X86VectorVTInfo _src, + X86VectorVTInfo _dst> { + let Predicates = [HasAVX512] in { + defm Z : avx512_cvt_fp_scalar, + avx512_cvt_fp_rc_scalar, VEX_W, EVEX_CD8<64, CD8VT1>, + EVEX_V512, XD; + } +} + +multiclass avx512_cvt_fp_scalar_ss2sd opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, X86VectorVTInfo _src, + X86VectorVTInfo _dst> { + let Predicates = [HasAVX512] in { + defm Z : avx512_cvt_fp_scalar, + avx512_cvt_fp_sae_scalar, + EVEX_CD8<32, CD8VT1>, XS, EVEX_V512; + } +} +defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86fround, + X86froundRnd, f64x_info, f32x_info>; +defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpext, + X86fpextRnd,f32x_info, f64x_info >; + +def : Pat<(f64 (fextend FR32X:$src)), + (COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X), + (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>, + Requires<[HasAVX512]>; +def : Pat<(f64 (fextend (loadf32 addr:$src))), + (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>, + Requires<[HasAVX512]>; + +def : Pat<(f64 (extloadf32 addr:$src)), + (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>, Requires<[HasAVX512, OptForSize]>; -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>, - Requires<[HasAVX512, OptForSpeed]>; +def : Pat<(f64 (extloadf32 addr:$src)), + (COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>, + Requires<[HasAVX512, OptForSpeed]>; -def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>, +def : Pat<(f32 (fround FR64X:$src)), + (COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X), + (COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>, Requires<[HasAVX512]>; - //===----------------------------------------------------------------------===// // AVX-512 Vector convert from signed/unsigned integer to float/double // and from float/double to signed/unsigned integer @@ -4992,7 +5490,7 @@ defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUlongToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>; -let Predicates = [NoVLX] in { +let Predicates = [HasAVX512, NoVLX] in { def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; @@ -5024,40 +5522,102 @@ let Predicates = [HasAVX512] in { //===----------------------------------------------------------------------===// // Half precision conversion instructions //===----------------------------------------------------------------------===// -multiclass avx512_cvtph2ps { - def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", - []>, EVEX; - let hasSideEffects = 0, mayLoad = 1 in - def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX; +multiclass avx512_cvtph2ps { + defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src), + "vcvtph2ps", "$src", "$src", + (X86cvtph2ps (_src.VT _src.RC:$src), + (i32 FROUND_CURRENT))>, T8PD; + let hasSideEffects = 0, mayLoad = 1 in { + defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src), + "vcvtph2ps", "$src", "$src", + (X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))), + (i32 FROUND_CURRENT))>, T8PD; + } } -multiclass avx512_cvtps2ph { - def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst), - (ins srcRC:$src1, i32u8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, EVEX; - let hasSideEffects = 0, mayStore = 1 in - def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), - (ins x86memop:$dst, srcRC:$src1, i32u8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX; +multiclass avx512_cvtph2ps_sae { + defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src), + "vcvtph2ps", "{sae}, $src", "$src, {sae}", + (X86cvtph2ps (_src.VT _src.RC:$src), + (i32 FROUND_NO_EXC))>, T8PD, EVEX_B; + } -defm VCVTPH2PSZ : avx512_cvtph2ps, EVEX_V512, - EVEX_CD8<32, CD8VH>; -defm VCVTPS2PHZ : avx512_cvtps2ph, EVEX_V512, - EVEX_CD8<32, CD8VH>; +let Predicates = [HasAVX512] in { + defm VCVTPH2PSZ : avx512_cvtph2ps, + avx512_cvtph2ps_sae, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; + let Predicates = [HasVLX] in { + defm VCVTPH2PSZ256 : avx512_cvtph2ps,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; + defm VCVTPH2PSZ128 : avx512_cvtph2ps, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; + } +} -def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src), - imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))), - (VCVTPS2PHZrr VR512:$src, imm:$rc)>; +multiclass avx512_cvtps2ph { + defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst), + (ins _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph", "$src2, $src1", "$src1, $src2", + (X86cvtps2ph (_src.VT _src.RC:$src1), + (i32 imm:$src2), + (i32 FROUND_CURRENT))>, AVX512AIi8Base; + let hasSideEffects = 0, mayStore = 1 in { + def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), + (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1), + (i32 imm:$src2), (i32 FROUND_CURRENT) )), + addr:$dst)]>; + def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), + (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", + []>, EVEX_K; + } +} +multiclass avx512_cvtps2ph_sae { + defm rb : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst), + (ins _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph", "$src2, {sae}, $src1", "$src1, $src2, {sae}", + (X86cvtps2ph (_src.VT _src.RC:$src1), + (i32 imm:$src2), + (i32 FROUND_NO_EXC))>, EVEX_B, AVX512AIi8Base; +} +let Predicates = [HasAVX512] in { + defm VCVTPS2PHZ : avx512_cvtps2ph, + avx512_cvtps2ph_sae, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; + let Predicates = [HasVLX] in { + defm VCVTPS2PHZ256 : avx512_cvtps2ph, + EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; + defm VCVTPS2PHZ128 : avx512_cvtps2ph, + EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; + } +} -def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src), - (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))), - (VCVTPH2PSZrr VR256X:$src)>; +// Unordered/Ordered scalar fp compare with Sea and set EFLAGS +multiclass avx512_ord_cmp_sae opc, X86VectorVTInfo _, SDNode OpNode, + string OpcodeStr> { + def rb: AVX512, EVEX, EVEX_B, VEX_LIG, EVEX_V128, + Sched<[WriteFAdd]>; +} + +let Defs = [EFLAGS], Predicates = [HasAVX512] in { + defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, X86ucomiSae, "vucomiss">, + AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; + defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, X86ucomiSae, "vucomisd">, + AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; + defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, X86comiSae, "vcomiss">, + AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; + defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, X86comiSae, "vcomisd">, + AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; +} let Defs = [EFLAGS], Predicates = [HasAVX512] in { defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, @@ -5067,10 +5627,10 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { "ucomisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; let Pattern = [] in { - defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load, + defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32, "comiss">, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; - defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load, + defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64, "comisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } @@ -5092,50 +5652,31 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { } /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd -multiclass avx512_fp14_s opc, string OpcodeStr, RegisterClass RC, - X86MemOperand x86memop> { - let hasSideEffects = 0 in { - def rr : AVX5128I, EVEX_4V; +multiclass avx512_fp14_s opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + let hasSideEffects = 0, AddedComplexity = 20 , Predicates = [HasAVX512] in { + defm rr : AVX512_maskable_scalar, EVEX_4V; let mayLoad = 1 in { - def rm : AVX5128I, EVEX_4V; + defm rm : AVX512_maskable_scalar, EVEX_4V; } } } -defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>, - EVEX_CD8<32, CD8VT1>; -defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>, - VEX_W, EVEX_CD8<64, CD8VT1>; -defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>, - EVEX_CD8<32, CD8VT1>; -defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>, - VEX_W, EVEX_CD8<64, CD8VT1>; - -def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1), - (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))), - (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X), - (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>; - -def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1), - (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))), - (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X), - (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>; - -def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1), - (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))), - (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X), - (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>; - -def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1), - (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))), - (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X), - (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>; +defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>, + EVEX_CD8<32, CD8VT1>, T8PD; +defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>, + VEX_W, EVEX_CD8<64, CD8VT1>, T8PD; +defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>, + EVEX_CD8<32, CD8VT1>, T8PD; +defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>, + VEX_W, EVEX_CD8<64, CD8VT1>, T8PD; /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, @@ -5183,20 +5724,6 @@ multiclass avx512_fp14_p_vl_all opc, string OpcodeStr, SDNode OpNode> { defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>; defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>; -def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))), - (VRSQRT14PSZr VR512:$src)>; -def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))), - (VRSQRT14PDZr VR512:$src)>; - -def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))), - (VRCP14PSZr VR512:$src)>; -def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))), - (VRCP14PDZr VR512:$src)>; - /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd multiclass avx512_fp28_s opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode> { @@ -5232,6 +5759,8 @@ let hasSideEffects = 0, Predicates = [HasERI] in { defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V; defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V; } + +defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V; /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, @@ -5322,67 +5851,6 @@ multiclass avx512_sqrt_packed opc, string OpcodeStr, } } -multiclass avx512_sqrt_scalar opc, string OpcodeStr, - Intrinsic F32Int, Intrinsic F64Int, - OpndItins itins_s, OpndItins itins_d> { - def SSZr : SI, XS, EVEX_4V; - let isCodeGenOnly = 1 in - def SSZr_Int : SIi8, XS, EVEX_4V; - let mayLoad = 1 in { - def SSZm : SI, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; - let isCodeGenOnly = 1 in - def SSZm_Int : SIi8, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; - } - def SDZr : SI, - XD, EVEX_4V, VEX_W; - let isCodeGenOnly = 1 in - def SDZr_Int : SIi8, XD, EVEX_4V, VEX_W; - let mayLoad = 1 in { - def SDZm : SI, - XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; - let isCodeGenOnly = 1 in - def SDZm_Int : SIi8, - XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; - } -} - multiclass avx512_sqrt_packed_all opc, string OpcodeStr, SDNode OpNode> { defm PSZ : avx512_sqrt_packed opc, string OpcodeStr, v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; } +multiclass avx512_sqrt_scalar opc, string OpcodeStr,X86VectorVTInfo _, + string SUFF, SDNode OpNode, SDNode OpNodeRnd> { + + defm r_Int : AVX512_maskable_scalar; + let mayLoad = 1 in + defm m_Int : AVX512_maskable_scalar; + + defm rb_Int : AVX512_maskable_scalar, + EVEX_B, EVEX_RC; + + let isCodeGenOnly = 1 in { + def r : I; + + let mayLoad = 1 in + def m : I; + } + + def : Pat<(_.EltVT (OpNode _.FRC:$src)), + (!cast(NAME#SUFF#Zr) + (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; + + def : Pat<(_.EltVT (OpNode (load addr:$src))), + (!cast(NAME#SUFF#Zm) + (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[OptForSize]>; +} + +multiclass avx512_sqrt_scalar_all opc, string OpcodeStr> { + defm SSZ : avx512_sqrt_scalar, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; + defm SDZ : avx512_sqrt_scalar, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W; +} + defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>, avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>; -defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", - int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, - SSE_SQRTSS, SSE_SQRTSD>; +defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG; let Predicates = [HasAVX512] in { - def : Pat<(f32 (fsqrt FR32X:$src)), - (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; - def : Pat<(f32 (fsqrt (load addr:$src))), - (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[OptForSize]>; - def : Pat<(f64 (fsqrt FR64X:$src)), - (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>; - def : Pat<(f64 (fsqrt (load addr:$src))), - (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>, - Requires<[OptForSize]>; - def : Pat<(f32 (X86frsqrt FR32X:$src)), - (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>; + (COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>; def : Pat<(f32 (X86frsqrt (load addr:$src))), - (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>, + (COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>, Requires<[OptForSize]>; - def : Pat<(f32 (X86frcp FR32X:$src)), - (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>; + (COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>; def : Pat<(f32 (X86frcp (load addr:$src))), - (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>, + (COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>, Requires<[OptForSize]>; - - def : Pat<(int_x86_sse_sqrt_ss VR128X:$src), - (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)), - (COPY_TO_REGCLASS VR128X:$src, FR32)), - VR128X)>; - def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src), - (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; - - def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src), - (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)), - (COPY_TO_REGCLASS VR128X:$src, FR64)), - VR128X)>; - def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src), - (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>; } - -multiclass avx512_rndscale opc, string OpcodeStr, - X86MemOperand x86memop, RegisterClass RC, - PatFrag mem_frag, Domain d> { -let ExeDomain = d in { - // Intrinsic operation, reg. - // Vector intrinsic operation, reg - def r : AVX512AIi8, EVEX; - - // Vector intrinsic operation, mem - def m : AVX512AIi8, EVEX; -} // ExeDomain -} - -defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512, - loadv16f32, SSEPackedSingle>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - -def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1), - imm:$src2, (v16f32 VR512:$src1), (i16 -1), - FROUND_CURRENT)), - (VRNDSCALEPSZr VR512:$src1, imm:$src2)>; - - -defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512, - loadv8f64, SSEPackedDouble>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; - -def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1), - imm:$src2, (v8f64 VR512:$src1), (i8 -1), - FROUND_CURRENT)), - (VRNDSCALEPDZr VR512:$src1, imm:$src2)>; - multiclass avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { @@ -5510,20 +5962,20 @@ avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { defm r : AVX512_maskable_scalar; defm rb : AVX512_maskable_scalar, EVEX_B; let mayLoad = 1 in defm m : AVX512_maskable_scalar; } @@ -5568,109 +6020,238 @@ defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>, defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>; -let Predicates = [HasAVX512] in { -def : Pat<(v16f32 (ffloor VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x1))>; -def : Pat<(v16f32 (fnearbyint VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0xC))>; -def : Pat<(v16f32 (fceil VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x2))>; -def : Pat<(v16f32 (frint VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x4))>; -def : Pat<(v16f32 (ftrunc VR512:$src)), - (VRNDSCALEPSZr VR512:$src, (i32 0x3))>; - -def : Pat<(v8f64 (ffloor VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x1))>; -def : Pat<(v8f64 (fnearbyint VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0xC))>; -def : Pat<(v8f64 (fceil VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x2))>; -def : Pat<(v8f64 (frint VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x4))>; -def : Pat<(v8f64 (ftrunc VR512:$src)), - (VRNDSCALEPDZr VR512:$src, (i32 0x3))>; -} //------------------------------------------------- // Integer truncate and extend operations //------------------------------------------------- -multiclass avx512_trunc_sat opc, string OpcodeStr, - RegisterClass dstRC, RegisterClass srcRC, - RegisterClass KRC, X86MemOperand x86memop> { - def rr : AVX512XS8I opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo, + X86MemOperand x86memop> { + + defm rr : AVX512_maskable, + EVEX, T8XS; + + // for intrinsic patter match + def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask, + (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), + undef)), + (!cast(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask , + SrcInfo.RC:$src1)>; + + def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask, + (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), + DestInfo.ImmAllZerosV)), + (!cast(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask , + SrcInfo.RC:$src1)>; + + def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask, + (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), + DestInfo.RC:$src0)), + (!cast(NAME#SrcInfo.ZSuffix##rrk) DestInfo.RC:$src0, + DestInfo.KRCWM:$mask , + SrcInfo.RC:$src1)>; + + let mayStore = 1 in { + def mr : AVX512XS8I, EVEX; - def rrk : AVX512XS8I, EVEX, EVEX_K; - - def rrkz : AVX512XS8I, EVEX, EVEX_KZ; - - def mr : AVX512XS8I, EVEX; - - def mrk : AVX512XS8I, EVEX, EVEX_K; - + }//mayStore = 1 } -defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM, - i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM, - i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; -defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM, - i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; -defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; -defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM, - i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; -def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>; -def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>; -def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>; -def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>; -def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>; +multiclass avx512_trunc_mr_lowering { -def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), - (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>; -def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), - (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>; -def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), - (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>; -def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), - (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>; + def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), + (!cast(NAME#SrcInfo.ZSuffix##mr) + addr:$dst, SrcInfo.RC:$src)>; + def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask, + (SrcInfo.VT SrcInfo.RC:$src)), + (!cast(NAME#SrcInfo.ZSuffix##mrk) + addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; +} + +multiclass avx512_trunc_sat_mr_lowering { + + def: Pat<(!cast("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix# + DestInfo.Suffix#"_mem_"#SrcInfo.Size) + addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), SrcInfo.MRC:$mask), + (!cast(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr, + (COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM), + (SrcInfo.VT SrcInfo.RC:$src))>; + + def: Pat<(!cast("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix# + DestInfo.Suffix#"_mem_"#SrcInfo.Size) + addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1), + (!cast(NAME#SrcInfo.ZSuffix##mr) addr:$ptr, + (SrcInfo.VT SrcInfo.RC:$src))>; +} + +multiclass avx512_trunc opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, + X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, + X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, + X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag, + Predicate prd = HasAVX512>{ + + let Predicates = [HasVLX, prd] in { + defm Z128: avx512_trunc_common, + avx512_trunc_mr_lowering, EVEX_V128; + + defm Z256: avx512_trunc_common, + avx512_trunc_mr_lowering, EVEX_V256; + } + let Predicates = [prd] in + defm Z: avx512_trunc_common, + avx512_trunc_mr_lowering, EVEX_V512; +} + +multiclass avx512_trunc_sat opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, + X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, + X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, + X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{ + + let Predicates = [HasVLX, prd] in { + defm Z128: avx512_trunc_common, + avx512_trunc_sat_mr_lowering, EVEX_V128; + + defm Z256: avx512_trunc_common, + avx512_trunc_sat_mr_lowering, EVEX_V256; + } + let Predicates = [prd] in + defm Z: avx512_trunc_common, + avx512_trunc_sat_mr_lowering, EVEX_V512; +} + +multiclass avx512_trunc_qb opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<8, CD8VO>; +} +multiclass avx512_trunc_sat_qb opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<8, CD8VO>; +} + +multiclass avx512_trunc_qw opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VQ>; +} +multiclass avx512_trunc_sat_qw opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VQ>; +} + +multiclass avx512_trunc_qd opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<32, CD8VH>; +} +multiclass avx512_trunc_sat_qd opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<32, CD8VH>; +} + +multiclass avx512_trunc_db opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<8, CD8VQ>; +} +multiclass avx512_trunc_sat_db opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<8, CD8VQ>; +} + +multiclass avx512_trunc_dw opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; +} +multiclass avx512_trunc_sat_dw opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VH>; +} + +multiclass avx512_trunc_wb opc, string OpcodeStr, SDNode OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; +} +multiclass avx512_trunc_sat_wb opc, string sat, SDNode OpNode> { + defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VH>; +} + +defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc>; +defm VPMOVSQB : avx512_trunc_sat_qb<0x22, "s", X86vtruncs>; +defm VPMOVUSQB : avx512_trunc_sat_qb<0x12, "us", X86vtruncus>; + +defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc>; +defm VPMOVSQW : avx512_trunc_sat_qw<0x24, "s", X86vtruncs>; +defm VPMOVUSQW : avx512_trunc_sat_qw<0x14, "us", X86vtruncus>; + +defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc>; +defm VPMOVSQD : avx512_trunc_sat_qd<0x25, "s", X86vtruncs>; +defm VPMOVUSQD : avx512_trunc_sat_qd<0x15, "us", X86vtruncus>; + +defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc>; +defm VPMOVSDB : avx512_trunc_sat_db<0x21, "s", X86vtruncs>; +defm VPMOVUSDB : avx512_trunc_sat_db<0x11, "us", X86vtruncus>; + +defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc>; +defm VPMOVSDW : avx512_trunc_sat_dw<0x23, "s", X86vtruncs>; +defm VPMOVUSDW : avx512_trunc_sat_dw<0x13, "us", X86vtruncus>; + +defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>; +defm VPMOVSWB : avx512_trunc_sat_wb<0x20, "s", X86vtruncs>; +defm VPMOVUSWB : avx512_trunc_sat_wb<0x10, "us", X86vtruncus>; + +let Predicates = [HasAVX512, NoVLX] in { +def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))), + (v8i16 (EXTRACT_SUBREG + (v16i16 (VPMOVDWZrr (v16i32 (SUBREG_TO_REG (i32 0), + VR256X:$src, sub_ymm)))), sub_xmm))>; +def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))), + (v4i32 (EXTRACT_SUBREG + (v8i32 (VPMOVQDZrr (v8i64 (SUBREG_TO_REG (i32 0), + VR256X:$src, sub_ymm)))), sub_xmm))>; +} + +let Predicates = [HasBWI, NoVLX] in { +def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))), + (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (SUBREG_TO_REG (i32 0), + VR256X:$src, sub_ymm))), sub_xmm))>; +} multiclass avx512_extend_common opc, string OpcodeStr, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, @@ -5985,163 +6566,11 @@ defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; -//===----------------------------------------------------------------------===// -// VSHUFPS - VSHUFPD Operations - -multiclass avx512_shufp { - def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86memop:$src2, u8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), - (i8 imm:$src3))))], d, IIC_SSE_SHUFP>, - EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>; - def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, u8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, - (i8 imm:$src3))))], d, IIC_SSE_SHUFP>, - EVEX_4V, Sched<[WriteShuffle]>; -} - -defm VSHUFPSZ : avx512_shufp, PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VSHUFPDZ : avx512_shufp, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; - -def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))), - (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>; -def : Pat<(v16i32 (X86Shufp VR512:$src1, - (loadv16i32 addr:$src2), (i8 imm:$imm))), - (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>; - -def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))), - (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>; -def : Pat<(v8i64 (X86Shufp VR512:$src1, - (loadv8i64 addr:$src2), (i8 imm:$imm))), - (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>; // Helper fragments to match sext vXi1 to vXiY. def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>; def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>; -multiclass avx512_conflict opc, string OpcodeStr, - RegisterClass RC, RegisterClass KRC, - X86MemOperand x86memop, - X86MemOperand x86scalar_mop, string BrdcstStr> { - let hasSideEffects = 0 in { - def rr : AVX5128I, EVEX; - let mayLoad = 1 in - def rm : AVX5128I, EVEX; - let mayLoad = 1 in - def rmb : AVX5128I, EVEX, EVEX_B; - def rrkz : AVX5128I, EVEX, EVEX_KZ; - let mayLoad = 1 in - def rmkz : AVX5128I, EVEX, EVEX_KZ; - let mayLoad = 1 in - def rmbkz : AVX5128I, EVEX, EVEX_KZ, EVEX_B; - - let Constraints = "$src1 = $dst" in { - def rrk : AVX5128I, EVEX, EVEX_K; - let mayLoad = 1 in - def rmk : AVX5128I, EVEX, EVEX_K; - let mayLoad = 1 in - def rmbk : AVX5128I, EVEX, EVEX_K, EVEX_B; - } - } -} - -let Predicates = [HasCDI] in { -defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM, - i512mem, i32mem, "{1to16}">, - EVEX_V512, EVEX_CD8<32, CD8VF>; - - -defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM, - i512mem, i64mem, "{1to8}">, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -} - -def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1, - GR16:$mask), - (VPCONFLICTDrrk VR512:$src1, - (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>; - -def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1, - GR8:$mask), - (VPCONFLICTQrrk VR512:$src1, - (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>; - -let Predicates = [HasCDI] in { -defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM, - i512mem, i32mem, "{1to16}">, - EVEX_V512, EVEX_CD8<32, CD8VF>; - - -defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM, - i512mem, i64mem, "{1to8}">, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -} - -def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1, - GR16:$mask), - (VPLZCNTDrrk VR512:$src1, - (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>; - -def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1, - GR8:$mask), - (VPLZCNTQrrk VR512:$src1, - (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>; - -def : Pat<(v16i32 (ctlz (loadv16i32 addr:$src))), - (VPLZCNTDrm addr:$src)>; -def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))), - (VPLZCNTDrr VR512:$src)>; -def : Pat<(v8i64 (ctlz (loadv8i64 addr:$src))), - (VPLZCNTQrm addr:$src)>; -def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))), - (VPLZCNTQrr VR512:$src)>; - def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>; @@ -6197,7 +6626,7 @@ defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">; multiclass convert_vector_to_mask_common opc, X86VectorVTInfo _, string OpcodeStr > { def rr : AVX512XS8I, EVEX; + [(set _.KRC:$dst, (X86cvt2mask (_.VT _.RC:$src)))]>, EVEX; } multiclass avx512_convert_vector_to_mask opc, string OpcodeStr, @@ -6230,7 +6659,7 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", multiclass compress_by_vec_width opc, X86VectorVTInfo _, string OpcodeStr> { defm rr : AVX512_maskable, AVX5128IBase; let mayStore = 1 in { @@ -6242,7 +6671,7 @@ multiclass compress_by_vec_width opc, X86VectorVTInfo _, def mrk : AVX5128I, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; @@ -6272,7 +6701,7 @@ defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info multiclass expand_by_vec_width opc, X86VectorVTInfo _, string OpcodeStr> { defm rr : AVX512_maskable, AVX5128IBase; let mayLoad = 1 in @@ -6302,6 +6731,62 @@ defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>, defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>, EVEX, VEX_W; +//handle instruction reg_vec1 = op(reg_vec,imm) +// op(mem_vec,imm) +// op(broadcast(eltVt),imm) +//all instruction created with FROUND_CURRENT +multiclass avx512_unary_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + defm rri : AVX512_maskable; + let mayLoad = 1 in { + defm rmi : AVX512_maskable; + defm rmbi : AVX512_maskable, EVEX_B; + } +} + +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} +multiclass avx512_unary_fp_sae_packed_imm opc, string OpcodeStr, + SDNode OpNode, X86VectorVTInfo _>{ + defm rrib : AVX512_maskable, EVEX_B; +} + +multiclass avx512_common_unary_fp_sae_packed_imm opc, SDNode OpNode, Predicate prd>{ + let Predicates = [prd] in { + defm Z : avx512_unary_fp_packed_imm, + avx512_unary_fp_sae_packed_imm, + EVEX_V512; + } + let Predicates = [prd, HasVLX] in { + defm Z128 : avx512_unary_fp_packed_imm, + EVEX_V128; + defm Z256 : avx512_unary_fp_packed_imm, + EVEX_V256; + } +} + //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) // op(reg_vec2,mem_vec,imm) // op(reg_vec2,broadcast(eltVt),imm) @@ -6309,49 +6794,60 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>, multiclass avx512_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>{ defm rri : AVX512_maskable; let mayLoad = 1 in { defm rmi : AVX512_maskable; defm rmbi : AVX512_maskable, EVEX_B; } } +//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) +// op(reg_vec2,mem_vec,imm) +multiclass avx512_3Op_rm_imm8 opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{ + + defm rri : AVX512_maskable; + let mayLoad = 1 in + defm rmi : AVX512_maskable; +} + //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) // op(reg_vec2,mem_vec,imm) // op(reg_vec2,broadcast(eltVt),imm) multiclass avx512_3Op_imm8 opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _>{ - defm rri : AVX512_maskable; - let mayLoad = 1 in { - defm rmi : AVX512_maskable; + X86VectorVTInfo _>: + avx512_3Op_rm_imm8{ + + let mayLoad = 1 in defm rmbi : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), (i8 imm:$src3))>, EVEX_B; - } } //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) @@ -6369,20 +6864,20 @@ multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { defm rri : AVX512_maskable_scalar; let mayLoad = 1 in { defm rmi : AVX512_maskable_scalar; let isAsmParserOnly = 1 in { @@ -6398,18 +6893,25 @@ multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, multiclass avx512_fp_sae_packed_imm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>{ defm rrib : AVX512_maskable, EVEX_B; } //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} multiclass avx512_fp_sae_scalar_imm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { - defm NAME: avx512_fp_sae_packed_imm; + defm NAME#rrib : AVX512_maskable_scalar, EVEX_B; } multiclass avx512_common_fp_sae_packed_imm opc, SDNode OpNode, string OpStr, + AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo>{ + let Predicates = [HasBWI] in { + defm Z : avx512_3Op_rm_imm8, EVEX_V512, AVX512AIi8Base, EVEX_4V; + } + let Predicates = [HasBWI, HasVLX] in { + defm Z128 : avx512_3Op_rm_imm8, EVEX_V128, AVX512AIi8Base, EVEX_4V; + defm Z256 : avx512_3Op_rm_imm8, EVEX_V256, AVX512AIi8Base, EVEX_4V; + } +} + multiclass avx512_common_3Op_imm8 opc, SDNode OpNode>{ let Predicates = [HasAVX512] in { @@ -6447,6 +6963,14 @@ multiclass avx512_common_fp_sae_scalar_imm opcPs, bits<8> opcPd, SDNode OpNode, Predicate prd>{ + defm PS : avx512_common_unary_fp_sae_packed_imm, EVEX_CD8<32, CD8VF>; + defm PD : avx512_common_unary_fp_sae_packed_imm, EVEX_CD8<64, CD8VF>, VEX_W; +} + defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd", avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; @@ -6461,6 +6985,14 @@ defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info, 0x55, X86VFixupimm, HasAVX512>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; +defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, + X86VReduce, HasDQI>, AVX512AIi8Base, EVEX; +defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, + X86VRndScale, HasAVX512>, AVX512AIi8Base, EVEX; +defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, + X86VGetMant, HasAVX512>, AVX512AIi8Base, EVEX; + + defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 0x50, X86VRange, HasDQI>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; @@ -6475,6 +7007,19 @@ defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 0x51, X86VRange, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; +defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, + 0x57, X86Reduces, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, + 0x57, X86Reduces, HasDQI>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; + +defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, + 0x27, X86GetMants, HasAVX512>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; +defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, + 0x27, X86GetMants, HasAVX512>, + AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; multiclass avx512_shuff_packed_128 opc, SDNode OpNode = X86Shuf128>{ @@ -6486,6 +7031,29 @@ multiclass avx512_shuff_packed_128, EVEX_V256; } } +let Predicates = [HasAVX512] in { +def : Pat<(v16f32 (ffloor VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x1))>; +def : Pat<(v16f32 (fnearbyint VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>; +def : Pat<(v16f32 (fceil VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x2))>; +def : Pat<(v16f32 (frint VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>; +def : Pat<(v16f32 (ftrunc VR512:$src)), + (VRNDSCALEPSZrri VR512:$src, (i32 0x3))>; + +def : Pat<(v8f64 (ffloor VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x1))>; +def : Pat<(v8f64 (fnearbyint VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>; +def : Pat<(v8f64 (fceil VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x2))>; +def : Pat<(v8f64 (frint VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>; +def : Pat<(v8f64 (ftrunc VR512:$src)), + (VRNDSCALEPDZrri VR512:$src, (i32 0x3))>; +} defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; @@ -6496,31 +7064,51 @@ defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>, defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; -multiclass avx512_valign{ +multiclass avx512_valign { defm NAME: avx512_common_3Op_imm8, AVX512AIi8Base, EVEX_4V; - let isCodeGenOnly = 1 in { - defm NAME#_FP: avx512_common_3Op_imm8, - AVX512AIi8Base, EVEX_4V; - } } -defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>, +defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; -defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>, +defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; +multiclass avx512_vpalign_lowering p>{ + let Predicates = p in + def NAME#_.VTName#rri: + Pat<(_.VT (X86PAlignr _.RC:$src1, _.RC:$src2, (i8 imm:$imm))), + (!cast(NAME#_.ZSuffix#rri) + _.RC:$src1, _.RC:$src2, imm:$imm)>; +} + +multiclass avx512_vpalign_lowering_common: + avx512_vpalign_lowering<_.info512, [HasBWI]>, + avx512_vpalign_lowering<_.info128, [HasBWI, HasVLX]>, + avx512_vpalign_lowering<_.info256, [HasBWI, HasVLX]>; + +defm VPALIGN: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" , + avx512vl_i8_info, avx512vl_i8_info>, + avx512_vpalign_lowering_common, + avx512_vpalign_lowering_common, + avx512_vpalign_lowering_common, + avx512_vpalign_lowering_common, + avx512_vpalign_lowering_common, + EVEX_CD8<8, CD8VF>; + +defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" , + avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; + multiclass avx512_unary_rm opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { defm rr : AVX512_maskable, EVEX, AVX5128IBase; let mayLoad = 1 in defm rm : AVX512_maskable, EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>; @@ -6531,7 +7119,7 @@ multiclass avx512_unary_rmb opc, string OpcodeStr, SDNode OpNode, avx512_unary_rm { let mayLoad = 1 in defm rmb : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, multiclass avx512_unary_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, SDNode OpNode, Predicate prd> { - defm Q : avx512_unary_rmb_vl, VEX_W; - defm D : avx512_unary_rmb_vl; + defm D : avx512_unary_rmb_vl; } multiclass avx512_unary_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, SDNode OpNode, Predicate prd> { - defm W : avx512_unary_rm_vl; - defm B : avx512_unary_rm_vl; + defm W : avx512_unary_rm_vl; + defm B : avx512_unary_rm_vl; } multiclass avx512_unary_rm_vl_all opc_b, bits<8> opc_w, @@ -6598,3 +7187,332 @@ def : Pat<(xor (bc_v8i64 (v8i1sextv8i64)), (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), (VPABSQZrr VR512:$src)>; + +multiclass avx512_ctlz opc, string OpcodeStr, Predicate prd>{ + + defm NAME : avx512_unary_rm_vl_dq; +} + +defm VPLZCNT : avx512_ctlz<0x44, "vplzcnt", HasCDI>; +defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>; + +//===---------------------------------------------------------------------===// +// Replicate Single FP - MOVSHDUP and MOVSLDUP +//===---------------------------------------------------------------------===// +multiclass avx512_replicate opc, string OpcodeStr, SDNode OpNode>{ + defm NAME: avx512_unary_rm_vl, XS; +} + +defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup>; +defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>; + +//===----------------------------------------------------------------------===// +// AVX-512 - MOVDDUP +//===----------------------------------------------------------------------===// + +multiclass avx512_movddup_128 opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr : AVX512_maskable, EVEX; + let mayLoad = 1 in + defm rm : AVX512_maskable, + EVEX, EVEX_CD8<_.EltSize, CD8VH>; +} + +multiclass avx512_movddup_common opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo> { + + defm Z : avx512_unary_rm, EVEX_V512; + + let Predicates = [HasAVX512, HasVLX] in { + defm Z256 : avx512_unary_rm, + EVEX_V256; + defm Z128 : avx512_movddup_128, + EVEX_V128; + } +} + +multiclass avx512_movddup opc, string OpcodeStr, SDNode OpNode>{ + defm NAME: avx512_movddup_common, XD, VEX_W; +} + +defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>; + +def : Pat<(X86Movddup (loadv2f64 addr:$src)), + (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>; +def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), + (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>; + +//===----------------------------------------------------------------------===// +// AVX-512 - Unpack Instructions +//===----------------------------------------------------------------------===// +defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh>; +defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl>; + +defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, + SSE_INTALU_ITINS_P, HasBWI>; +defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, + SSE_INTALU_ITINS_P, HasBWI>; +defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, + SSE_INTALU_ITINS_P, HasBWI>; +defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, + SSE_INTALU_ITINS_P, HasBWI>; + +defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, + SSE_INTALU_ITINS_P, HasAVX512>; +defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, + SSE_INTALU_ITINS_P, HasAVX512>; +defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, + SSE_INTALU_ITINS_P, HasAVX512>; +defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, + SSE_INTALU_ITINS_P, HasAVX512>; + +//===----------------------------------------------------------------------===// +// AVX-512 - Extract & Insert Integer Instructions +//===----------------------------------------------------------------------===// + +multiclass avx512_extract_elt_bw_m opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + let mayStore = 1 in + def mr : AVX512Ii8, + EVEX, EVEX_CD8<_.EltSize, CD8VT1>; +} + +multiclass avx512_extract_elt_b { + let Predicates = [HasBWI] in { + def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), + (ins _.RC:$src1, u8imm:$src2), + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32orGR64:$dst, + (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>, + EVEX, TAPD; + + defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; + } +} + +multiclass avx512_extract_elt_w { + let Predicates = [HasBWI] in { + def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), + (ins _.RC:$src1, u8imm:$src2), + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32orGR64:$dst, + (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>, + EVEX, PD; + + def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), + (ins _.RC:$src1, u8imm:$src2), + OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + EVEX, TAPD; + + defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; + } +} + +multiclass avx512_extract_elt_dq { + let Predicates = [HasDQI] in { + def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), + (ins _.RC:$src1, u8imm:$src2), + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GRC:$dst, + (extractelt (_.VT _.RC:$src1), imm:$src2))]>, + EVEX, TAPD; + + let mayStore = 1 in + def mr : AVX512Ii8<0x16, MRMDestMem, (outs), + (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(store (extractelt (_.VT _.RC:$src1), + imm:$src2),addr:$dst)]>, + EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD; + } +} + +defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>; +defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>; +defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; +defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; + +multiclass avx512_insert_elt_m opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, PatFrag LdFrag> { + def rm : AVX512Ii8, + EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; +} + +multiclass avx512_insert_elt_bw opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, PatFrag LdFrag> { + let Predicates = [HasBWI] in { + def rr : AVX512Ii8, EVEX_4V; + + defm NAME : avx512_insert_elt_m; + } +} + +multiclass avx512_insert_elt_dq opc, string OpcodeStr, + X86VectorVTInfo _, RegisterClass GRC> { + let Predicates = [HasDQI] in { + def rr : AVX512Ii8, + EVEX_4V, TAPD; + + defm NAME : avx512_insert_elt_m, TAPD; + } +} + +defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, + extloadi8>, TAPD; +defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, + extloadi16>, PD; +defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; +defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; +//===----------------------------------------------------------------------===// +// VSHUFPS - VSHUFPD Operations +//===----------------------------------------------------------------------===// +multiclass avx512_shufp{ + defm NAME: avx512_common_3Op_imm8, + EVEX_CD8, + AVX512AIi8Base, EVEX_4V; +} + +defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS; +defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W; +//===----------------------------------------------------------------------===// +// AVX-512 - Byte shift Left/Right +//===----------------------------------------------------------------------===// + +multiclass avx512_shift_packed opc, SDNode OpNode, Format MRMr, + Format MRMm, string OpcodeStr, X86VectorVTInfo _>{ + def rr : AVX512; + let mayLoad = 1 in + def rm : AVX512; +} + +multiclass avx512_shift_packed_all opc, SDNode OpNode, Format MRMr, + Format MRMm, string OpcodeStr, Predicate prd>{ + let Predicates = [prd] in + defm Z512 : avx512_shift_packed, EVEX_V512; + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_shift_packed, EVEX_V256; + defm Z128 : avx512_shift_packed, EVEX_V128; + } +} +defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", + HasBWI>, AVX512PDIi8Base, EVEX_4V; +defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", + HasBWI>, AVX512PDIi8Base, EVEX_4V; + + +multiclass avx512_psadbw_packed opc, SDNode OpNode, + string OpcodeStr, X86VectorVTInfo _dst, + X86VectorVTInfo _src>{ + def rr : AVX512BI; + let mayLoad = 1 in + def rm : AVX512BI; +} + +multiclass avx512_psadbw_packed_all opc, SDNode OpNode, + string OpcodeStr, Predicate prd> { + let Predicates = [prd] in + defm Z512 : avx512_psadbw_packed, EVEX_V512; + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_psadbw_packed, EVEX_V256; + defm Z128 : avx512_psadbw_packed, EVEX_V128; + } +} + +defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", + HasBWI>, EVEX_4V; + +multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + let Constraints = "$src1 = $dst" in { + defm rri : AVX512_maskable_3src, AVX512AIi8Base, EVEX_4V; + let mayLoad = 1 in { + defm rmi : AVX512_maskable_3src, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + defm rmbi : AVX512_maskable_3src, EVEX_B, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + } + }// Constraints = "$src1 = $dst" +} + +multiclass avx512_common_ternlog{ + let Predicates = [HasAVX512] in + defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info512>, EVEX_V512; + let Predicates = [HasAVX512, HasVLX] in { + defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info128>, EVEX_V128; + defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info256>, EVEX_V256; + } +} + +defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>; +defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W; + diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 5e19ad448fc7..1a2e786661e9 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -615,14 +615,14 @@ class X86TypeInfo>", SDTIntLeaf,[],"<>">; -def Xi8 : X86TypeInfo; def Xi16 : X86TypeInfo; def Xi32 : X86TypeInfo; def Xi64 : X86TypeInfo opcode, string mnemonic, X86TypeInfo typeinfo, let hasSideEffects = 0; } -// BinOpAI_FF - Instructions like "adc %eax, %eax, imm", that implicitly define +// BinOpAI_RFF - Instructions like "adc %eax, %eax, imm", that implicitly define // and use EFLAGS. -class BinOpAI_FF opcode, string mnemonic, X86TypeInfo typeinfo, - Register areg, string operands> +class BinOpAI_RFF opcode, string mnemonic, X86TypeInfo typeinfo, + Register areg, string operands> : BinOpAI { let Uses = [areg, EFLAGS]; } +// BinOpAI_F - Instructions like "cmp %eax, %eax, imm", that imp-def EFLAGS. +class BinOpAI_F opcode, string mnemonic, X86TypeInfo typeinfo, + Register areg, string operands> + : BinOpAI { + let Defs = [EFLAGS]; +} + /// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is /// defined with "(set GPR:$dst, EFLAGS, (...". /// @@ -1092,14 +1099,14 @@ multiclass ArithBinOp_RFF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } } // Uses = [EFLAGS], Defs = [EFLAGS] - def NAME#8i8 : BinOpAI_FF; - def NAME#16i16 : BinOpAI_FF; - def NAME#32i32 : BinOpAI_FF; - def NAME#64i32 : BinOpAI_FF; + def NAME#8i8 : BinOpAI_RFF; + def NAME#16i16 : BinOpAI_RFF; + def NAME#32i32 : BinOpAI_RFF; + def NAME#64i32 : BinOpAI_RFF; } /// ArithBinOp_F - This is an arithmetic binary operator where the pattern is @@ -1170,14 +1177,14 @@ multiclass ArithBinOp_F BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } } // Defs = [EFLAGS] - def NAME#8i8 : BinOpAI; - def NAME#16i16 : BinOpAI; - def NAME#32i32 : BinOpAI; - def NAME#64i32 : BinOpAI; + def NAME#8i8 : BinOpAI_F; + def NAME#16i16 : BinOpAI_F; + def NAME#32i32 : BinOpAI_F; + def NAME#64i32 : BinOpAI_F; } @@ -1246,14 +1253,14 @@ let isCompare = 1 in { "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>; } // Defs = [EFLAGS] - def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL, - "{$src, %al|al, $src}">; - def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX, - "{$src, %ax|ax, $src}">; - def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX, - "{$src, %eax|eax, $src}">; - def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX, - "{$src, %rax|rax, $src}">; + def TEST8i8 : BinOpAI_F<0xA8, "test", Xi8 , AL, + "{$src, %al|al, $src}">; + def TEST16i16 : BinOpAI_F<0xA8, "test", Xi16, AX, + "{$src, %ax|ax, $src}">; + def TEST32i32 : BinOpAI_F<0xA8, "test", Xi32, EAX, + "{$src, %eax|eax, $src}">; + def TEST64i32 : BinOpAI_F<0xA8, "test", Xi64, RAX, + "{$src, %rax|rax, $src}">; } // isCompare //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 2056056d23a5..787f15bc628e 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -156,10 +156,9 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { Flags |= MachineMemOperand::MOLoad; if (MCID.mayStore()) Flags |= MachineMemOperand::MOStore; - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset), - Flags, MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); return addOffset(MIB.addFrameIndex(FI), Offset) .addMemOperand(MMO); } diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td index 315f21308c0d..c73c95019f8d 100644 --- a/lib/Target/X86/X86InstrCMovSetCC.td +++ b/lib/Target/X86/X86InstrCMovSetCC.td @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// -// SetCC instructions. +// CMOV instructions. multiclass CMOV opc, string Mnemonic, PatLeaf CondNode> { let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", isCommutable = 1, SchedRW = [WriteALU] in { diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 7f850d6830e1..5d7283f7bd57 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -132,26 +132,6 @@ def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), Requires<[In64BitMode]>; } -// The MSVC runtime contains an _ftol2 routine for converting floating-point -// to integer values. It has a strange calling convention: the input is -// popped from the x87 stack, and the return value is given in EDX:EAX. ECX is -// used as a temporary register. No other registers (aside from flags) are -// touched. -// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80 -// variant is unnecessary. - -let Defs = [EAX, EDX, ECX, EFLAGS], FPForm = SpecialFP in { - def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src), - "# win32 fptoui", - [(X86WinFTOL RFP32:$src)]>, - Requires<[Not64BitMode]>; - - def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src), - "# win32 fptoui", - [(X86WinFTOL RFP64:$src)]>, - Requires<[Not64BitMode]>; -} - //===----------------------------------------------------------------------===// // EH Pseudo Instructions // @@ -172,6 +152,29 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), } +let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, + isCodeGenOnly = 1, isReturn = 1 in { + def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET", [(cleanupret)]>; + + // CATCHRET needs a custom inserter for SEH. + let usesCustomInserter = 1 in + def CATCHRET : I<0, Pseudo, (outs), (ins brtarget32:$dst, brtarget32:$from), + "# CATCHRET", + [(catchret bb:$dst, bb:$from)]>; +} + +let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1, + usesCustomInserter = 1 in +def CATCHPAD : I<0, Pseudo, (outs), (ins), "# CATCHPAD", [(catchpad)]>; + +// This instruction is responsible for re-establishing stack pointers after an +// exception has been caught and we are rejoining normal control flow in the +// parent function or funclet. It generally sets ESP and EBP, and optionally +// ESI. It is only needed for 32-bit WinEH, as the runtime restores CSRs for us +// elsewhere. +let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in +def EH_RESTORE : I<0, Pseudo, (outs), (ins), "# EH_RESTORE", []>; + let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf), @@ -247,7 +250,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), // Alias instruction mapping movr0 to xor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, - isPseudo = 1 in + isPseudo = 1, AddedComplexity = 20 in def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; @@ -259,6 +262,33 @@ def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> { let AddedComplexity = 20; } +let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode], + AddedComplexity = 15 in { + // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC, + // which only require 3 bytes compared to MOV32ri which requires 5. + let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in { + def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "", + [(set GR32:$dst, 1)]>; + def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "", + [(set GR32:$dst, -1)]>; + } + + // MOV16ri is 4 bytes, so the instructions above are smaller. + def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>; + def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>; +} + +let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in { +// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1. +// FIXME: Add itinerary class and Schedule. +def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "", + [(set GR32:$dst, i32immSExt8:$src)]>, + Requires<[OptForMinSize]>; +def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "", + [(set GR64:$dst, i64immSExt8:$src)]>, + Requires<[OptForMinSize, NotWin64WithoutFP]>; +} + // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however // that would make it more difficult to rematerialize. @@ -268,9 +298,9 @@ def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src), "", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>; // This 64-bit pseudo-move can be used for both a 64-bit constant that is -// actually the zero-extension of a 32-bit constant, and for labels in the +// actually the zero-extension of a 32-bit constant and for labels in the // x86-64 small code model. -def mov64imm32 : ComplexPattern; +def mov64imm32 : ComplexPattern; let AddedComplexity = 1 in def : Pat<(i64 mov64imm32:$src), @@ -509,6 +539,7 @@ let usesCustomInserter = 1, Uses = [EFLAGS] in { defm _FR32 : CMOVrr_PSEUDO; defm _FR64 : CMOVrr_PSEUDO; + defm _FR128 : CMOVrr_PSEUDO; defm _V4F32 : CMOVrr_PSEUDO; defm _V2F64 : CMOVrr_PSEUDO; defm _V2I64 : CMOVrr_PSEUDO; @@ -752,67 +783,111 @@ defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add", /* The following multiclass tries to make sure that in code like * x.store (immediate op x.load(acquire), release) + * and + * x.store (register op x.load(acquire), release) * an operation directly on memory is generated instead of wasting a register. * It is not automatic as atomic_store/load are only lowered to MOV instructions * extremely late to prevent them from being accidentally reordered in the backend * (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions) */ -multiclass RELEASE_BINOP_MI { +multiclass RELEASE_BINOP_MI { def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src), - "#RELEASE_BINOP PSEUDO!", - [(atomic_store_8 addr:$dst, (!cast(op) + "#BINOP "#NAME#"8mi PSEUDO!", + [(atomic_store_8 addr:$dst, (op (atomic_load_8 addr:$dst), (i8 imm:$src)))]>; + def NAME#8mr : I<0, Pseudo, (outs), (ins i8mem:$dst, GR8:$src), + "#BINOP "#NAME#"8mr PSEUDO!", + [(atomic_store_8 addr:$dst, (op + (atomic_load_8 addr:$dst), GR8:$src))]>; // NAME#16 is not generated as 16-bit arithmetic instructions are considered // costly and avoided as far as possible by this backend anyway def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src), - "#RELEASE_BINOP PSEUDO!", - [(atomic_store_32 addr:$dst, (!cast(op) + "#BINOP "#NAME#"32mi PSEUDO!", + [(atomic_store_32 addr:$dst, (op (atomic_load_32 addr:$dst), (i32 imm:$src)))]>; + def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src), + "#BINOP "#NAME#"32mr PSEUDO!", + [(atomic_store_32 addr:$dst, (op + (atomic_load_32 addr:$dst), GR32:$src))]>; def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src), - "#RELEASE_BINOP PSEUDO!", - [(atomic_store_64 addr:$dst, (!cast(op) + "#BINOP "#NAME#"64mi32 PSEUDO!", + [(atomic_store_64 addr:$dst, (op (atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>; + def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src), + "#BINOP "#NAME#"64mr PSEUDO!", + [(atomic_store_64 addr:$dst, (op + (atomic_load_64 addr:$dst), GR64:$src))]>; +} +let Defs = [EFLAGS] in { + defm RELEASE_ADD : RELEASE_BINOP_MI; + defm RELEASE_AND : RELEASE_BINOP_MI; + defm RELEASE_OR : RELEASE_BINOP_MI; + defm RELEASE_XOR : RELEASE_BINOP_MI; + // Note: we don't deal with sub, because substractions of constants are + // optimized into additions before this code can run. +} + +// Same as above, but for floating-point. +// FIXME: imm version. +// FIXME: Version that doesn't clobber $src, using AVX's VADDSS. +// FIXME: This could also handle SIMD operations with *ps and *pd instructions. +let usesCustomInserter = 1 in { +multiclass RELEASE_FP_BINOP_MI { + def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, FR32:$src), + "#BINOP "#NAME#"32mr PSEUDO!", + [(atomic_store_32 addr:$dst, + (i32 (bitconvert (op + (f32 (bitconvert (i32 (atomic_load_32 addr:$dst)))), + FR32:$src))))]>, Requires<[HasSSE1]>; + def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, FR64:$src), + "#BINOP "#NAME#"64mr PSEUDO!", + [(atomic_store_64 addr:$dst, + (i64 (bitconvert (op + (f64 (bitconvert (i64 (atomic_load_64 addr:$dst)))), + FR64:$src))))]>, Requires<[HasSSE2]>; +} +defm RELEASE_FADD : RELEASE_FP_BINOP_MI; +// FIXME: Add fsub, fmul, fdiv, ... } -defm RELEASE_ADD : RELEASE_BINOP_MI<"add">; -defm RELEASE_AND : RELEASE_BINOP_MI<"and">; -defm RELEASE_OR : RELEASE_BINOP_MI<"or">; -defm RELEASE_XOR : RELEASE_BINOP_MI<"xor">; -// Note: we don't deal with sub, because substractions of constants are -// optimized into additions before this code can run multiclass RELEASE_UNOP { def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst), - "#RELEASE_UNOP PSEUDO!", + "#UNOP "#NAME#"8m PSEUDO!", [(atomic_store_8 addr:$dst, dag8)]>; def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst), - "#RELEASE_UNOP PSEUDO!", + "#UNOP "#NAME#"16m PSEUDO!", [(atomic_store_16 addr:$dst, dag16)]>; def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst), - "#RELEASE_UNOP PSEUDO!", + "#UNOP "#NAME#"32m PSEUDO!", [(atomic_store_32 addr:$dst, dag32)]>; def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst), - "#RELEASE_UNOP PSEUDO!", + "#UNOP "#NAME#"64m PSEUDO!", [(atomic_store_64 addr:$dst, dag64)]>; } -defm RELEASE_INC : RELEASE_UNOP< - (add (atomic_load_8 addr:$dst), (i8 1)), - (add (atomic_load_16 addr:$dst), (i16 1)), - (add (atomic_load_32 addr:$dst), (i32 1)), - (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>; -defm RELEASE_DEC : RELEASE_UNOP< - (add (atomic_load_8 addr:$dst), (i8 -1)), - (add (atomic_load_16 addr:$dst), (i16 -1)), - (add (atomic_load_32 addr:$dst), (i32 -1)), - (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>; +let Defs = [EFLAGS] in { + defm RELEASE_INC : RELEASE_UNOP< + (add (atomic_load_8 addr:$dst), (i8 1)), + (add (atomic_load_16 addr:$dst), (i16 1)), + (add (atomic_load_32 addr:$dst), (i32 1)), + (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>; + defm RELEASE_DEC : RELEASE_UNOP< + (add (atomic_load_8 addr:$dst), (i8 -1)), + (add (atomic_load_16 addr:$dst), (i16 -1)), + (add (atomic_load_32 addr:$dst), (i32 -1)), + (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>; +} /* TODO: These don't work because the type inference of TableGen fails. TODO: find a way to fix it. -defm RELEASE_NEG : RELEASE_UNOP< - (ineg (atomic_load_8 addr:$dst)), - (ineg (atomic_load_16 addr:$dst)), - (ineg (atomic_load_32 addr:$dst)), - (ineg (atomic_load_64 addr:$dst))>; +let Defs = [EFLAGS] in { + defm RELEASE_NEG : RELEASE_UNOP< + (ineg (atomic_load_8 addr:$dst)), + (ineg (atomic_load_16 addr:$dst)), + (ineg (atomic_load_32 addr:$dst)), + (ineg (atomic_load_64 addr:$dst))>; +} +// NOT doesn't set flags. defm RELEASE_NOT : RELEASE_UNOP< (not (atomic_load_8 addr:$dst)), (not (atomic_load_16 addr:$dst)), @@ -821,42 +896,42 @@ defm RELEASE_NOT : RELEASE_UNOP< */ def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src), - "#RELEASE_MOV PSEUDO !", + "#RELEASE_MOV8mi PSEUDO!", [(atomic_store_8 addr:$dst, (i8 imm:$src))]>; def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src), - "#RELEASE_MOV PSEUDO !", + "#RELEASE_MOV16mi PSEUDO!", [(atomic_store_16 addr:$dst, (i16 imm:$src))]>; def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src), - "#RELEASE_MOV PSEUDO !", + "#RELEASE_MOV32mi PSEUDO!", [(atomic_store_32 addr:$dst, (i32 imm:$src))]>; def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src), - "#RELEASE_MOV PSEUDO !", + "#RELEASE_MOV64mi32 PSEUDO!", [(atomic_store_64 addr:$dst, i64immSExt32:$src)]>; def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src), - "#RELEASE_MOV PSEUDO!", + "#RELEASE_MOV8mr PSEUDO!", [(atomic_store_8 addr:$dst, GR8 :$src)]>; def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src), - "#RELEASE_MOV PSEUDO!", + "#RELEASE_MOV16mr PSEUDO!", [(atomic_store_16 addr:$dst, GR16:$src)]>; def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src), - "#RELEASE_MOV PSEUDO!", + "#RELEASE_MOV32mr PSEUDO!", [(atomic_store_32 addr:$dst, GR32:$src)]>; def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src), - "#RELEASE_MOV PSEUDO!", + "#RELEASE_MOV64mr PSEUDO!", [(atomic_store_64 addr:$dst, GR64:$src)]>; def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src), - "#ACQUIRE_MOV PSEUDO!", + "#ACQUIRE_MOV8rm PSEUDO!", [(set GR8:$dst, (atomic_load_8 addr:$src))]>; def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src), - "#ACQUIRE_MOV PSEUDO!", + "#ACQUIRE_MOV16rm PSEUDO!", [(set GR16:$dst, (atomic_load_16 addr:$src))]>; def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src), - "#ACQUIRE_MOV PSEUDO!", + "#ACQUIRE_MOV32rm PSEUDO!", [(set GR32:$dst, (atomic_load_32 addr:$src))]>; def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src), - "#ACQUIRE_MOV PSEUDO!", + "#ACQUIRE_MOV64rm PSEUDO!", [(set GR64:$dst, (atomic_load_64 addr:$src))]>; //===----------------------------------------------------------------------===// @@ -1077,11 +1152,11 @@ defm : CMOVmr; // zextload bool -> zextload byte def : Pat<(zextloadi8i1 addr:$src), (AND8ri (MOV8rm addr:$src), (i8 1))>; -def : Pat<(zextloadi16i1 addr:$src), (AND16ri (MOVZX16rm8 addr:$src), (i16 1))>; -def : Pat<(zextloadi32i1 addr:$src), (AND32ri (MOVZX32rm8 addr:$src), (i32 1))>; +def : Pat<(zextloadi16i1 addr:$src), (AND16ri8 (MOVZX16rm8 addr:$src), (i16 1))>; +def : Pat<(zextloadi32i1 addr:$src), (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1))>; def : Pat<(zextloadi64i1 addr:$src), (SUBREG_TO_REG (i64 0), - (AND32ri (MOVZX32rm8 addr:$src), (i32 1)), sub_32bit)>; + (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), sub_32bit)>; // extload bool -> extload byte // When extloading from 16-bit and smaller memory locations into 64-bit @@ -1298,7 +1373,6 @@ def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), (MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)), sub_32bit)>; // r & (2^16-1) ==> movz -let AddedComplexity = 1 in // Give priority over i64immZExt32. def : Pat<(and GR64:$src, 0xffff), (SUBREG_TO_REG (i64 0), (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))), diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index 4cd5563ce727..8c351a51c460 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -53,6 +53,19 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, "{l}ret{|f}q\t$amt", [], IIC_RET>, Requires<[In64BitMode]>; def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), "{l}ret{w|f}\t$amt", [], IIC_RET>, OpSize16; + + // The machine return from interrupt instruction, but sometimes we need to + // perform a post-epilogue stack adjustment. Codegen emits the pseudo form + // which expands to include an SP adjustment if necessary. + def IRET16 : I <0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, + OpSize16; + def IRET32 : I <0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], + IIC_IRET>, OpSize32; + def IRET64 : RI <0xcf, RawFrm, (outs), (ins), "iretq", [], + IIC_IRET>, Requires<[In64BitMode]>; + let isCodeGenOnly = 1 in + def IRET : PseudoI<(outs), (ins i16imm:$adj), [(X86iret timm:$adj)]>; + } // Unconditional branches. diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 7cc3b599a737..fd800cf077f7 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -15,13 +15,31 @@ // FMA3 - Intel 3 operand Fused Multiply-Add instructions //===----------------------------------------------------------------------===// -let Constraints = "$src1 = $dst" in { +// For all FMA opcodes declared in fma3p_rm and fma3s_rm milticlasses defined +// below, both the register and memory variants are commutable. +// For the register form the commutable operands are 1, 2 and 3. +// For the memory variant the folded operand must be in 3. Thus, +// in that case, only the operands 1 and 2 can be swapped. +// Commuting some of operands may require the opcode change. +// FMA*213*: +// operands 1 and 2 (memory & register forms): *213* --> *213*(no changes); +// operands 1 and 3 (register forms only): *213* --> *231*; +// operands 2 and 3 (register forms only): *213* --> *132*. +// FMA*132*: +// operands 1 and 2 (memory & register forms): *132* --> *231*; +// operands 1 and 3 (register forms only): *132* --> *132*(no changes); +// operands 2 and 3 (register forms only): *132* --> *213*. +// FMA*231*: +// operands 1 and 2 (memory & register forms): *231* --> *132*; +// operands 1 and 3 (register forms only): *231* --> *213*; +// operands 2 and 3 (register forms only): *231* --> *231*(no changes). + +let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in multiclass fma3p_rm opc, string OpcodeStr, PatFrag MemFrag128, PatFrag MemFrag256, ValueType OpVT128, ValueType OpVT256, - bit IsRVariantCommutable = 0, bit IsMVariantCommutable = 0, SDPatternOperator Op = null_frag> { - let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in + let usesCustomInserter = 1 in def r : FMA3 opc, string OpcodeStr, [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1, VR128:$src3)))]>; - let mayLoad = 1, isCommutable = IsMVariantCommutable in + let mayLoad = 1 in def m : FMA3 opc, string OpcodeStr, [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1, (MemFrag128 addr:$src3))))]>; - let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in + let usesCustomInserter = 1 in def rY : FMA3 opc, string OpcodeStr, [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1, VR256:$src3)))]>, VEX_L; - let mayLoad = 1, isCommutable = IsMVariantCommutable in + let mayLoad = 1 in def mY : FMA3 opc, string OpcodeStr, (OpVT256 (Op VR256:$src2, VR256:$src1, (MemFrag256 addr:$src3))))]>, VEX_L; } -} // Constraints = "$src1 = $dst" multiclass fma3p_forms opc132, bits<8> opc213, bits<8> opc231, string OpcodeStr, string PackTy, PatFrag MemFrag128, PatFrag MemFrag256, SDNode Op, ValueType OpTy128, ValueType OpTy256> { - // For 213, both the register and memory variant are commutable. - // Indeed, the commutable operands are 1 and 2 and both live in registers - // for both variants. defm r213 : fma3p_rm; -let hasSideEffects = 0 in { + MemFrag128, MemFrag256, OpTy128, OpTy256, Op>; defm r132 : fma3p_rm; - // For 231, only the register variant is commutable. - // For the memory variant the folded operand must be in 3. Thus, - // in that case, it cannot be swapped with 2. defm r231 : fma3p_rm; -} // hasSideEffects = 0 + MemFrag128, MemFrag256, OpTy128, OpTy256>; } // Fused Multiply-Add @@ -126,83 +130,122 @@ let ExeDomain = SSEPackedDouble in { v4f64>, VEX_W; } -let Constraints = "$src1 = $dst" in { -multiclass fma3s_rm opc, string OpcodeStr, X86MemOperand x86memop, - RegisterClass RC, ValueType OpVT, PatFrag mem_frag, - bit IsRVariantCommutable = 0, bit IsMVariantCommutable = 0, +// All source register operands of FMA opcodes defined in fma3s_rm multiclass +// can be commuted. In many cases such commute transformation requres an opcode +// adjustment, for example, commuting the operands 1 and 2 in FMA*132 form +// would require an opcode change to FMA*231: +// FMA*132* reg1, reg2, reg3; // reg1 * reg3 + reg2; +// --> +// FMA*231* reg2, reg1, reg3; // reg1 * reg3 + reg2; +// Please see more detailed comment at the very beginning of the section +// defining FMA3 opcodes above. +let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in +multiclass fma3s_rm opc, string OpcodeStr, + X86MemOperand x86memop, RegisterClass RC, SDPatternOperator OpNode = null_frag> { - let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in + let usesCustomInserter = 1 in def r : FMA3; + [(set RC:$dst, (OpNode RC:$src2, RC:$src1, RC:$src3))]>; - let mayLoad = 1, isCommutable = IsMVariantCommutable in + let mayLoad = 1 in def m : FMA3; + (OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>; +} + +// These FMA*_Int instructions are defined specially for being used when +// the scalar FMA intrinsics are lowered to machine instructions, and in that +// sense, they are similar to existing ADD*_Int, SUB*_Int, MUL*_Int, etc. +// instructions. +// +// All of the FMA*_Int opcodes are defined as commutable here. +// Commuting the 2nd and 3rd source register operands of FMAs is quite trivial +// and the corresponding optimizations have been developed. +// Commuting the 1st operand of FMA*_Int requires some additional analysis, +// the commute optimization is legal only if all users of FMA*_Int use only +// the lowest element of the FMA*_Int instruction. Even though such analysis +// may be not implemented yet we allow the routines doing the actual commute +// transformation to decide if one or another instruction is commutable or not. +let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1, + hasSideEffects = 0 in +multiclass fma3s_rm_int opc, string OpcodeStr, + Operand memopr, RegisterClass RC> { + def r_Int : FMA3; + + let mayLoad = 1 in + def m_Int : FMA3; } -} // Constraints = "$src1 = $dst" multiclass fma3s_forms opc132, bits<8> opc213, bits<8> opc231, - string OpStr, string PackTy, string PT2, Intrinsic Int, - SDNode OpNode, RegisterClass RC, ValueType OpVT, - X86MemOperand x86memop, Operand memop, PatFrag mem_frag, - ComplexPattern mem_cpat> { -let hasSideEffects = 0 in { - defm r132 : fma3s_rm; - // See the other defm of r231 for the explanation regarding the - // commutable flags. - defm r231 : fma3s_rm; + string OpStr, string PackTy, + SDNode OpNode, RegisterClass RC, + X86MemOperand x86memop> { + defm r132 : fma3s_rm; + defm r213 : fma3s_rm; + defm r231 : fma3s_rm; } -// See the other defm of r213 for the explanation regarding the -// commutable flags. -defm r213 : fma3s_rm; +// The FMA 213 form is created for lowering of scalar FMA intrinscis +// to machine instructions. +// The FMA 132 form can trivially be get by commuting the 2nd and 3rd operands +// of FMA 213 form. +// The FMA 231 form can be get only by commuting the 1st operand of 213 or 132 +// forms and is possible only after special analysis of all uses of the initial +// instruction. Such analysis do not exist yet and thus introducing the 231 +// form of FMA*_Int instructions is done using an optimistic assumption that +// such analysis will be implemented eventually. +multiclass fma3s_int_forms opc132, bits<8> opc213, bits<8> opc231, + string OpStr, string PackTy, + RegisterClass RC, Operand memop> { + defm r132 : fma3s_rm_int; + defm r213 : fma3s_rm_int; + defm r231 : fma3s_rm_int; } multiclass fma3s opc132, bits<8> opc213, bits<8> opc231, string OpStr, Intrinsic IntF32, Intrinsic IntF64, SDNode OpNode> { - defm SS : fma3s_forms; - defm SD : fma3s_forms, VEX_W; + let ExeDomain = SSEPackedSingle in + defm SS : fma3s_forms, + fma3s_int_forms; -// These patterns use the 123 ordering, instead of 213, even though -// they match the intrinsic to the 213 version of the instruction. -// This is because src1 is tied to dest, and the scalar intrinsics -// require the pass-through values to come from the first source -// operand, not the second. + let ExeDomain = SSEPackedDouble in + defm SD : fma3s_forms, + fma3s_int_forms, + VEX_W; + + // These patterns use the 123 ordering, instead of 213, even though + // they match the intrinsic to the 213 version of the instruction. + // This is because src1 is tied to dest, and the scalar intrinsics + // require the pass-through values to come from the first source + // operand, not the second. def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3), - (COPY_TO_REGCLASS - (!cast(NAME#"SSr213r") - (COPY_TO_REGCLASS $src1, FR32), - (COPY_TO_REGCLASS $src2, FR32), - (COPY_TO_REGCLASS $src3, FR32)), - VR128)>; + (COPY_TO_REGCLASS(!cast(NAME#"SSr213r_Int") + $src1, $src2, $src3), VR128)>; def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3), - (COPY_TO_REGCLASS - (!cast(NAME#"SDr213r") - (COPY_TO_REGCLASS $src1, FR64), - (COPY_TO_REGCLASS $src2, FR64), - (COPY_TO_REGCLASS $src3, FR64)), - VR128)>; + (COPY_TO_REGCLASS(!cast(NAME#"SDr213r_Int") + $src1, $src2, $src3), VR128)>; } defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss, @@ -334,36 +377,23 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { } // isCodeGenOnly = 1 } -defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>, - fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32, - int_x86_fma_vfmadd_ss>; -defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, - fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfmadd_sd>; -defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, - fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32, - int_x86_fma_vfmsub_ss>; -defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, - fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfmsub_sd>; -defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32, - X86Fnmadd, loadf32>, - fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32, - int_x86_fma_vfnmadd_ss>; -defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64, - X86Fnmadd, loadf64>, - fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfnmadd_sd>; -defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32, - X86Fnmsub, loadf32>, - fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32, - int_x86_fma_vfnmsub_ss>; -defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, - X86Fnmsub, loadf64>, - fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfnmsub_sd>; - let ExeDomain = SSEPackedSingle in { + // Scalar Instructions + defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>, + fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32, + int_x86_fma_vfmadd_ss>; + defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, + fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32, + int_x86_fma_vfmsub_ss>; + defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32, + X86Fnmadd, loadf32>, + fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32, + int_x86_fma_vfnmadd_ss>; + defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32, + X86Fnmsub, loadf32>, + fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32, + int_x86_fma_vfnmsub_ss>; + // Packed Instructions defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32, loadv4f32, loadv8f32>; defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32, @@ -379,6 +409,22 @@ let ExeDomain = SSEPackedSingle in { } let ExeDomain = SSEPackedDouble in { + // Scalar Instructions + defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, + fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64, + int_x86_fma_vfmadd_sd>; + defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, + fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64, + int_x86_fma_vfmsub_sd>; + defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64, + X86Fnmadd, loadf64>, + fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64, + int_x86_fma_vfnmadd_sd>; + defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, + X86Fnmsub, loadf64>, + fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64, + int_x86_fma_vfnmsub_sd>; + // Packed Instructions defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64, loadv2f64, loadv4f64>; defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64, diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 49068e9c37d3..03ae21125b0e 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -137,69 +137,99 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP, // The FopST0 series are not included here because of the irregularities // in where the 'r' goes in assembly output. // These instructions cannot address 80-bit memory. -multiclass FPBinary { +multiclass FPBinary { // ST(0) = ST(0) + [mem] def _Fp32m : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP32:$dst, - (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>; + [!if(Forward, + (set RFP32:$dst, + (OpNode RFP32:$src1, (loadf32 addr:$src2))), + (set RFP32:$dst, + (OpNode (loadf32 addr:$src2), RFP32:$src1)))]>; def _Fp64m : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f64mem:$src2), OneArgFPRW, - [(set RFP64:$dst, - (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>; + [!if(Forward, + (set RFP64:$dst, + (OpNode RFP64:$src1, (loadf64 addr:$src2))), + (set RFP64:$dst, + (OpNode (loadf64 addr:$src2), RFP64:$src1)))]>; def _Fp64m32: FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP64:$dst, - (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>; + [!if(Forward, + (set RFP64:$dst, + (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2)))), + (set RFP64:$dst, + (OpNode (f64 (extloadf32 addr:$src2)), RFP64:$src1)))]>; def _Fp80m32: FpI_<(outs RFP80:$dst), (ins RFP80:$src1, f32mem:$src2), OneArgFPRW, - [(set RFP80:$dst, - (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>; + [!if(Forward, + (set RFP80:$dst, + (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2)))), + (set RFP80:$dst, + (OpNode (f80 (extloadf32 addr:$src2)), RFP80:$src1)))]>; def _Fp80m64: FpI_<(outs RFP80:$dst), (ins RFP80:$src1, f64mem:$src2), OneArgFPRW, - [(set RFP80:$dst, - (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>; + [!if(Forward, + (set RFP80:$dst, + (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))), + (set RFP80:$dst, + (OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>; +let mayLoad = 1 in def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src), - !strconcat("f", asmstring, "{s}\t$src")> { - let mayLoad = 1; -} + !strconcat("f", asmstring, "{s}\t$src")>; +let mayLoad = 1 in def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src), - !strconcat("f", asmstring, "{l}\t$src")> { - let mayLoad = 1; -} + !strconcat("f", asmstring, "{l}\t$src")>; // ST(0) = ST(0) + [memint] def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP32:$dst, (OpNode RFP32:$src1, - (X86fild addr:$src2, i16)))]>; + [!if(Forward, + (set RFP32:$dst, + (OpNode RFP32:$src1, (X86fild addr:$src2, i16))), + (set RFP32:$dst, + (OpNode (X86fild addr:$src2, i16), RFP32:$src1)))]>; def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2), OneArgFPRW, - [(set RFP32:$dst, (OpNode RFP32:$src1, - (X86fild addr:$src2, i32)))]>; + [!if(Forward, + (set RFP32:$dst, + (OpNode RFP32:$src1, (X86fild addr:$src2, i32))), + (set RFP32:$dst, + (OpNode (X86fild addr:$src2, i32), RFP32:$src1)))]>; def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2), OneArgFPRW, - [(set RFP64:$dst, (OpNode RFP64:$src1, - (X86fild addr:$src2, i16)))]>; + [!if(Forward, + (set RFP64:$dst, + (OpNode RFP64:$src1, (X86fild addr:$src2, i16))), + (set RFP64:$dst, + (OpNode (X86fild addr:$src2, i16), RFP64:$src1)))]>; def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2), OneArgFPRW, - [(set RFP64:$dst, (OpNode RFP64:$src1, - (X86fild addr:$src2, i32)))]>; + [!if(Forward, + (set RFP64:$dst, + (OpNode RFP64:$src1, (X86fild addr:$src2, i32))), + (set RFP64:$dst, + (OpNode (X86fild addr:$src2, i32), RFP64:$src1)))]>; def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2), - OneArgFPRW, - [(set RFP80:$dst, (OpNode RFP80:$src1, - (X86fild addr:$src2, i16)))]>; + OneArgFPRW, + [!if(Forward, + (set RFP80:$dst, + (OpNode RFP80:$src1, (X86fild addr:$src2, i16))), + (set RFP80:$dst, + (OpNode (X86fild addr:$src2, i16), RFP80:$src1)))]>; def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2), - OneArgFPRW, - [(set RFP80:$dst, (OpNode RFP80:$src1, - (X86fild addr:$src2, i32)))]>; + OneArgFPRW, + [!if(Forward, + (set RFP80:$dst, + (OpNode RFP80:$src1, (X86fild addr:$src2, i32))), + (set RFP80:$dst, + (OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>; +let mayLoad = 1 in def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src), - !strconcat("fi", asmstring, "{s}\t$src")> { - let mayLoad = 1; -} + !strconcat("fi", asmstring, "{s}\t$src")>; +let mayLoad = 1 in def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src), - !strconcat("fi", asmstring, "{l}\t$src")> { - let mayLoad = 1; -} + !strconcat("fi", asmstring, "{l}\t$src")>; } let Defs = [FPSW] in { @@ -213,14 +243,14 @@ defm DIV : FPBinary_rr; let SchedRW = [WriteFAddLd] in { defm ADD : FPBinary; defm SUB : FPBinary; -defm SUBR: FPBinary; +defm SUBR: FPBinary; } let SchedRW = [WriteFMulLd] in { defm MUL : FPBinary; } let SchedRW = [WriteFDivLd] in { defm DIV : FPBinary; -defm DIVR: FPBinary; +defm DIVR: FPBinary; } } @@ -306,13 +336,13 @@ def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">; def FRSTORm : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">; def FSAVEm : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">; -def FNSTSWm : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">; +def FNSTSWm : FPI<0xDD, MRM7m, (outs i16mem:$dst), (ins), "fnstsw\t$dst">; def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">; def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">; -def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">; -def FBSTPm : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">; +def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f80mem:$src), "fbld\t$src">; +def FBSTPm : FPI<0xDF, MRM6m, (outs f80mem:$dst), (ins), "fbstp\t$dst">; // Floating point cmovs. class FpIf32CMov pattern> : @@ -633,16 +663,18 @@ def FRNDINT : I<0xD9, MRM_FC, (outs), (ins), "frndint", [], IIC_FRNDINT>; def FSCALE : I<0xD9, MRM_FD, (outs), (ins), "fscale", [], IIC_FSCALE>; def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", [], IIC_FCOMPP>; -def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst), - "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB; -def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst), - "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)], - IIC_FXSAVE>, TB, Requires<[In64BitMode]>; -def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), - "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, TB; -def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src), - "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)], - IIC_FXRSTOR>, TB, Requires<[In64BitMode]>; +let Predicates = [HasFXSR] in { + def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst), + "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB; + def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst), + "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)], + IIC_FXSAVE>, TB, Requires<[In64BitMode]>; + def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), + "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, TB; + def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src), + "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)], + IIC_FXRSTOR>, TB, Requires<[In64BitMode]>; +} // Predicates = [FeatureFXSR] } // SchedRW //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 1f61ffa84e9a..829cedd55fb3 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -38,6 +38,8 @@ def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>; def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisFP<1>, SDTCisVT<3, i8>, SDTCisVec<1>]>; +def SDTX86CmpTestSae : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, + SDTCisSameAs<1, 2>, SDTCisInt<3>]>; def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; @@ -58,13 +60,17 @@ def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; +def X86frsqrt14s: SDNode<"X86ISD::FRSQRT", SDTFPBinOp>; +def X86frcp14s : SDNode<"X86ISD::FRCP", SDTFPBinOp>; def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>; def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>; def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>; def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>; def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; +def X86comiSae : SDNode<"X86ISD::COMI", SDTX86CmpTestSae>; def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; +def X86ucomiSae: SDNode<"X86ISD::UCOMI", SDTX86CmpTestSae>; def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>; //def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>; def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD", @@ -74,11 +80,18 @@ def X86cvtudq2pd: SDNode<"X86ISD::CVTUDQ2PD", SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>, SDTCisVT<1, v4i32>]>>; def X86pshufb : SDNode<"X86ISD::PSHUFB", - SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86psadbw : SDNode<"X86ISD::PSADBW", - SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>]>>; + SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>, + SDTCVecEltisVT<1, i8>, + SDTCisSameSizeAs<0,1>, + SDTCisSameAs<1,2>]>>; +def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW", + SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>, + SDTCVecEltisVT<1, i8>, + SDTCisSameSizeAs<0,1>, + SDTCisSameAs<1,2>, SDTCisInt<3>]>>; def X86andnp : SDNode<"X86ISD::ANDNP", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; @@ -86,9 +99,11 @@ def X86psign : SDNode<"X86ISD::PSIGN", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86pextrb : SDNode<"X86ISD::PEXTRB", - SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; + SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v16i8>, + SDTCisPtrTy<2>]>>; def X86pextrw : SDNode<"X86ISD::PEXTRW", - SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; + SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v8i16>, + SDTCisPtrTy<2>]>>; def X86pinsrb : SDNode<"X86ISD::PINSRB", SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; @@ -114,19 +129,17 @@ def X86vsext : SDNode<"X86ISD::VSEXT", SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>]>>; -def X86vtrunc : SDNode<"X86ISD::VTRUNC", - SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, - SDTCisInt<0>, SDTCisInt<1>, - SDTCisOpSmallerThanOp<0, 1>]>>; +def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCisInt<1>, + SDTCisOpSmallerThanOp<0, 1>]>; + +def X86vtrunc : SDNode<"X86ISD::VTRUNC", SDTVtrunc>; +def X86vtruncs : SDNode<"X86ISD::VTRUNCS", SDTVtrunc>; +def X86vtruncus : SDNode<"X86ISD::VTRUNCUS", SDTVtrunc>; + def X86trunc : SDNode<"X86ISD::TRUNC", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<0, 1>]>>; - -def X86vtruncm : SDNode<"X86ISD::VTRUNCM", - SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, - SDTCisInt<0>, SDTCisInt<1>, - SDTCisVec<2>, SDTCisInt<2>, - SDTCisOpSmallerThanOp<0, 2>]>>; def X86vfpext : SDNode<"X86ISD::VFPEXT", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisFP<0>, SDTCisFP<1>, @@ -136,6 +149,35 @@ def X86vfpround: SDNode<"X86ISD::VFPROUND", SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>]>>; +def X86fround: SDNode<"X86ISD::VFPROUND", + SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>, + SDTCVecEltisVT<0, f32>, + SDTCVecEltisVT<1, f64>, + SDTCVecEltisVT<2, f64>, + SDTCisOpSmallerThanOp<0, 1>]>>; +def X86froundRnd: SDNode<"X86ISD::VFPROUND", + SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>, + SDTCVecEltisVT<0, f32>, + SDTCVecEltisVT<1, f64>, + SDTCVecEltisVT<2, f64>, + SDTCisOpSmallerThanOp<0, 1>, + SDTCisInt<3>]>>; + +def X86fpext : SDNode<"X86ISD::VFPEXT", + SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>, + SDTCVecEltisVT<0, f64>, + SDTCVecEltisVT<1, f32>, + SDTCVecEltisVT<2, f32>, + SDTCisOpSmallerThanOp<1, 0>]>>; + +def X86fpextRnd : SDNode<"X86ISD::VFPEXT", + SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>, + SDTCVecEltisVT<0, f64>, + SDTCVecEltisVT<1, f32>, + SDTCVecEltisVT<2, f32>, + SDTCisOpSmallerThanOp<1, 0>, + SDTCisInt<3>]>>; + def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>; def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>; def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>; @@ -159,10 +201,15 @@ def X86CmpMaskCCRound : def X86CmpMaskCCScalar : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; -def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; -def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>; -def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>; -def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>; +def X86CmpMaskCCScalarRound : + SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>, + SDTCisInt<4>]>; + +def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; +def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>; +def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>; +def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>; +def X86cmpmsRnd : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalarRound>; def X86vshl : SDNode<"X86ISD::VSHL", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, @@ -178,6 +225,29 @@ def X86vshli : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>; def X86vsrli : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>; def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>; +def X86vprot : SDNode<"X86ISD::VPROT", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; +def X86vproti : SDNode<"X86ISD::VPROTI", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisVT<2, i8>]>>; + +def X86vpshl : SDNode<"X86ISD::VPSHL", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; +def X86vpsha : SDNode<"X86ISD::VPSHA", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; + +def X86vpcom : SDNode<"X86ISD::VPCOM", + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, + SDTCisVT<3, i8>]>>; +def X86vpcomu : SDNode<"X86ISD::VPCOMU", + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, + SDTCisVT<3, i8>]>>; + def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; @@ -190,6 +260,7 @@ def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp>; def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>; +def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>; def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<2, 1>, SDTCVecEltisVT<0, i1>, @@ -201,11 +272,15 @@ def X86testnm : SDNode<"X86ISD::TESTNM", SDTypeProfile<1, 2, [SDTCisVec<0>, def X86select : SDNode<"X86ISD::SELECT" , SDTSelect>; def X86pmuludq : SDNode<"X86ISD::PMULUDQ", - SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, - SDTCisSameAs<1,2>]>>; + SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>, + SDTCVecEltisVT<1, i32>, + SDTCisSameSizeAs<0,1>, + SDTCisSameAs<1,2>]>>; def X86pmuldq : SDNode<"X86ISD::PMULDQ", - SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, - SDTCisSameAs<1,2>]>>; + SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>, + SDTCVecEltisVT<1, i32>, + SDTCisSameSizeAs<0,1>, + SDTCisSameAs<1,2>]>>; def X86extrqi : SDNode<"X86ISD::EXTRQI", SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>, @@ -221,24 +296,30 @@ def X86insertqi : SDNode<"X86ISD::INSERTQI", def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>; -def SDTShuff3Op : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>; def SDTShuff2OpM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisVec<2>]>; + SDTCisSameSizeAs<0,2>, + SDTCisSameNumEltsAs<0,2>]>; def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>, - SDTCisSameAs<0,1>, SDTCisInt<2>]>; + SDTCisSameAs<0,1>, SDTCisVT<2, i8>]>; def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, SDTCisInt<3>]>; + SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>; def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>, SDTCisInt<4>]>; +def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisInt<2>, SDTCisInt<3>]>; def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; -def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>; +def SDTVBroadcastm : SDTypeProfile<1, 1, [SDTCisVec<0>, + SDTCisInt<0>, SDTCisInt<1>]>; def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>; +def SDTTernlog : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, + SDTCisVT<4, i8>]>; + def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>; @@ -250,15 +331,17 @@ def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>, SDTCisInt<4>]>; def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, - SDTCisVec<0>, SDTCisInt<2>]>; + SDTCisVec<0>, SDTCisVT<2, i32>]>; def STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, - SDTCisVec<0>, SDTCisInt<3>]>; + SDTCisVec<0>, SDTCisVT<3, i32>]>; def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>, - SDTCisVec<0>, SDTCisInt<3>, SDTCisInt<4>]>; + SDTCisVec<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>; def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>; -def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>; + +def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>; +def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>; def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; @@ -281,33 +364,74 @@ def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>; def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>; def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; -def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; +def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisSameSizeAs<0,1>, + SDTCisSameAs<1,2>]>; def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>; def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>; def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>; def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>; +def X86vpmaddubsw : SDNode<"X86ISD::VPMADDUBSW" , SDTPack>; +def X86vpmaddwd : SDNode<"X86ISD::VPMADDWD" , SDTPack>; + def X86VPermilpv : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>; def X86VPermilpi : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>; -def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>; +def X86VPermv : SDNode<"X86ISD::VPERMV", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<1>, + SDTCisSameNumEltsAs<0,1>, + SDTCisSameSizeAs<0,1>, + SDTCisSameAs<0,2>]>>; def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>; -def X86VPermv3 : SDNode<"X86ISD::VPERMV3", SDTShuff3Op>; -def X86VPermiv3 : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>; +def X86VPermt2 : SDNode<"X86ISD::VPERMV3", + SDTypeProfile<1, 3, [SDTCisVec<0>, + SDTCisSameAs<0,1>, SDTCisInt<2>, + SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2>, + SDTCisSameSizeAs<0,2>, + SDTCisSameAs<0,3>]>, []>; + +def X86VPermi2X : SDNode<"X86ISD::VPERMIV3", + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>, + SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, + SDTCisSameSizeAs<0,1>, + SDTCisSameAs<0,2>, + SDTCisSameAs<0,3>]>, []>; + +def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>; def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; -def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>; -def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>; +def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>; +def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>; +def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImmRound>; +def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImmRound>; +def X86VGetMant : SDNode<"X86ISD::VGETMANT", SDTFPUnaryOpImmRound>; +def X86Vfpclass : SDNode<"X86ISD::VFPCLASS", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, + SDTCisVec<1>, SDTCisFP<1>, + SDTCisSameNumEltsAs<0,1>, + SDTCisVT<2, i32>]>, []>; +def X86Vfpclasss : SDNode<"X86ISD::VFPCLASSS", + SDTypeProfile<1, 2, [SDTCisVT<0, i1>, + SDTCisFP<1>, SDTCisVT<2, i32>]>,[]>; def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSubVecOfVec<1, 0>]>, []>; +// SDTCisSubVecOfVec restriction cannot be applied for 128 bit version of VBROADCASTI32x2. +def X86SubV32x2Broadcast : SDNode<"X86ISD::SUBV_BROADCAST", + SDTypeProfile<1, 1, [SDTCisVec<0>, + SDTCisSameAs<0,1>]>, []>; + def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; +def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>; def X86Vinsert : SDNode<"X86ISD::VINSERT", SDTypeProfile<1, 3, - [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>; + [SDTCisSameAs<0, 1>, SDTCisEltOfVec<2, 1>, + SDTCisPtrTy<3>]>, []>; def X86Vextract : SDNode<"X86ISD::VEXTRACT", SDTypeProfile<1, 2, - [SDTCisVec<1>, SDTCisPtrTy<2>]>, []>; + [SDTCisEltOfVec<0, 1>, SDTCisVec<1>, + SDTCisPtrTy<2>]>, []>; def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>; @@ -317,11 +441,13 @@ def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>; def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>; def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>; def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>; -def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>; -def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>; -def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>; -def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>; -def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>; +def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>; +def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>; +def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>; +def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>; +def X86fsqrtRnds : SDNode<"X86ISD::FSQRT_RND", STDFp2SrcRm>; +def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>; +def X86fgetexpRnds : SDNode<"X86ISD::FGETEXP_RND", STDFp2SrcRm>; def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; @@ -341,9 +467,11 @@ def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", STDFp1SrcRm>; def X86rcp28 : SDNode<"X86ISD::RCP28", STDFp1SrcRm>; def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>; -def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>; -def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>; -def X86RndScale : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>; +def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>; +def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>; +def X86RndScales : SDNode<"X86ISD::VRNDSCALE", STDFp3SrcRm>; +def X86Reduces : SDNode<"X86ISD::VREDUCE", STDFp3SrcRm>; +def X86GetMants : SDNode<"X86ISD::VGETMANT", STDFp3SrcRm>; def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>, @@ -362,7 +490,8 @@ def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>; def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>, - SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>; + SDTCisSameAs<0,1>, SDTCisInt<2>, + SDTCisVT<3, i32>]>; def SDTDoubleToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>; @@ -371,9 +500,12 @@ def SDTFloatToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, def SDTDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>; +def SDTSDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>,SDTCisFP<1>, + SDTCVecEltisVT<1, f64>, SDTCisInt<2>]>; def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCVecEltisVT<1, f32>]>; - +def SDTSFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisFP<1>, + SDTCVecEltisVT<1, f32>, SDTCisInt<2>]>; def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisFP<0>, SDTCVecEltisVT<1, i32>, SDTCisInt<2>]>; @@ -392,6 +524,10 @@ def SDTVFPToLongRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, def X86SintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTintToFPRound>; def X86UintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTintToFPRound>; +def X86cvttss2IntRnd : SDNode<"X86ISD::FP_TO_SINT_RND", SDTSFloatToIntRnd>; +def X86cvttss2UIntRnd : SDNode<"X86ISD::FP_TO_UINT_RND", SDTSFloatToIntRnd>; +def X86cvttsd2IntRnd : SDNode<"X86ISD::FP_TO_SINT_RND", SDTSDoubleToIntRnd>; +def X86cvttsd2UIntRnd : SDNode<"X86ISD::FP_TO_UINT_RND", SDTSDoubleToIntRnd>; // Vector with rounding mode // cvtt fp-to-int staff @@ -417,17 +553,35 @@ def X86cvtps2UInt : SDNode<"X86ISD::FP_TO_UINT_RND", SDTFloatToInt>; def X86cvtpd2Int : SDNode<"X86ISD::FP_TO_SINT_RND", SDTDoubleToInt>; def X86cvtpd2UInt : SDNode<"X86ISD::FP_TO_UINT_RND", SDTDoubleToInt>; +def X86cvtph2ps : SDNode<"ISD::FP16_TO_FP", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCVecEltisVT<0, f32>, + SDTCVecEltisVT<1, i16>, + SDTCisFP<0>, + SDTCisVT<2, i32>]> >; + +def X86cvtps2ph : SDNode<"ISD::FP_TO_FP16", + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, + SDTCVecEltisVT<0, i16>, + SDTCVecEltisVT<1, f32>, + SDTCisFP<1>, SDTCisVT<2, i32>, + SDTCisVT<3, i32>]> >; def X86vfpextRnd : SDNode<"X86ISD::VFPEXT", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisFP<0>, SDTCisFP<1>, + SDTCVecEltisVT<0, f64>, + SDTCVecEltisVT<1, f32>, SDTCisOpSmallerThanOp<1, 0>, - SDTCisInt<2>]>>; + SDTCisVT<2, i32>]>>; def X86vfproundRnd: SDNode<"X86ISD::VFPROUND", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisFP<0>, SDTCisFP<1>, SDTCVecEltisVT<0, f32>, SDTCVecEltisVT<1, f64>, - SDTCisInt<2>]>>; + SDTCisOpSmallerThanOp<0, 1>, + SDTCisVT<2, i32>]>>; + +def X86cvt2mask : SDNode<"X86ISD::CVT2MASK", SDTIntTruncOp>; //===----------------------------------------------------------------------===// // SSE Complex Patterns @@ -436,10 +590,10 @@ def X86vfproundRnd: SDNode<"X86ISD::VFPROUND", // These are 'extloads' from a scalar to the low element of a vector, zeroing // the top elements. These are used for the SSE 'ss' and 'sd' instruction // forms. -def sse_load_f32 : ComplexPattern; -def sse_load_f64 : ComplexPattern; @@ -490,9 +644,9 @@ def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>; // The memory operand is required to be a 128-bit load, so it must be converted // from a vector to a scalar. def loadf32_128 : PatFrag<(ops node:$ptr), - (f32 (vector_extract (loadv4f32 node:$ptr), (iPTR 0)))>; + (f32 (extractelt (loadv4f32 node:$ptr), (iPTR 0)))>; def loadf64_128 : PatFrag<(ops node:$ptr), - (f64 (vector_extract (loadv2f64 node:$ptr), (iPTR 0)))>; + (f64 (extractelt (loadv2f64 node:$ptr), (iPTR 0)))>; // Like 'store', but always requires 128-bit vector alignment. def alignedstore : PatFrag<(ops node:$val, node:$ptr), @@ -590,9 +744,9 @@ def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; // The memory operand is required to be a 128-bit load, so it must be converted // from a vector to a scalar. def memopfsf32_128 : PatFrag<(ops node:$ptr), - (f32 (vector_extract (memopv4f32 node:$ptr), (iPTR 0)))>; + (f32 (extractelt (memopv4f32 node:$ptr), (iPTR 0)))>; def memopfsf64_128 : PatFrag<(ops node:$ptr), - (f64 (vector_extract (memopv2f64 node:$ptr), (iPTR 0)))>; + (f64 (extractelt (memopv2f64 node:$ptr), (iPTR 0)))>; // SSSE3 uses MMX registers for some instructions. They aren't aligned on a @@ -604,32 +758,6 @@ def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ def memopmmx : PatFrag<(ops node:$ptr), (x86mmx (memop64 node:$ptr))>; -// MOVNT Support -// Like 'store', but requires the non-temporal bit to be set -def nontemporalstore : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ - if (StoreSDNode *ST = dyn_cast(N)) - return ST->isNonTemporal(); - return false; -}]>; - -def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ - if (StoreSDNode *ST = dyn_cast(N)) - return ST->isNonTemporal() && !ST->isTruncatingStore() && - ST->getAddressingMode() == ISD::UNINDEXED && - ST->getAlignment() >= 16; - return false; -}]>; - -def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ - if (StoreSDNode *ST = dyn_cast(N)) - return ST->isNonTemporal() && - ST->getAlignment() < 16; - return false; -}]>; - def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (masked_gather node:$src1, node:$src2, node:$src3) , [{ if (MaskedGatherSDNode *Mgt = dyn_cast(N)) @@ -851,29 +979,59 @@ def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), return isa(N); }]>; +// masked store fragments. +// X86mstore can't be implemented in core DAG files because some targets +// doesn't support vector type ( llvm-tblgen will fail) +def X86mstore : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_store node:$src1, node:$src2, node:$src3), [{ + return !cast(N)->isTruncatingStore(); +}]>; + def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_store node:$src1, node:$src2, node:$src3), [{ + (X86mstore node:$src1, node:$src2, node:$src3), [{ if (auto *Store = dyn_cast(N)) return Store->getAlignment() >= 16; return false; }]>; def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_store node:$src1, node:$src2, node:$src3), [{ + (X86mstore node:$src1, node:$src2, node:$src3), [{ if (auto *Store = dyn_cast(N)) return Store->getAlignment() >= 32; return false; }]>; def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_store node:$src1, node:$src2, node:$src3), [{ + (X86mstore node:$src1, node:$src2, node:$src3), [{ if (auto *Store = dyn_cast(N)) return Store->getAlignment() >= 64; return false; }]>; def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_store node:$src1, node:$src2, node:$src3), [{ + (X86mstore node:$src1, node:$src2, node:$src3), [{ return isa(N); }]>; +// masked truncstore fragments +// X86mtruncstore can't be implemented in core DAG files because some targets +// doesn't support vector type ( llvm-tblgen will fail) +def X86mtruncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (masked_store node:$src1, node:$src2, node:$src3), [{ + return cast(N)->isTruncatingStore(); +}]>; +def masked_truncstorevi8 : + PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86mtruncstore node:$src1, node:$src2, node:$src3), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def masked_truncstorevi16 : + PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86mtruncstore node:$src1, node:$src2, node:$src3), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i16; +}]>; +def masked_truncstorevi32 : + PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86mtruncstore node:$src1, node:$src2, node:$src3), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i32; +}]>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index cf68ef053361..63e78de69bc9 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DerivedTypes.h" @@ -101,9 +102,11 @@ struct X86MemoryFoldTableEntry { void X86InstrInfo::anchor() {} X86InstrInfo::X86InstrInfo(X86Subtarget &STI) - : X86GenInstrInfo( - (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32), - (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)), + : X86GenInstrInfo((STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 + : X86::ADJCALLSTACKDOWN32), + (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 + : X86::ADJCALLSTACKUP32), + X86::CATCHRET), Subtarget(STI), RI(STI.getTargetTriple()) { static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = { @@ -332,6 +335,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD }, { X86::PEXTRDrr, X86::PEXTRDmr, TB_FOLDED_STORE }, { X86::PEXTRQrr, X86::PEXTRQmr, TB_FOLDED_STORE }, + { X86::PUSH16r, X86::PUSH16rmm, TB_FOLDED_LOAD }, + { X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD }, + { X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD }, { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE }, { X86::SETAr, X86::SETAm, TB_FOLDED_STORE }, { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE }, @@ -495,7 +501,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, { X86::MOVUPDrr, X86::MOVUPDrm, TB_ALIGN_16 }, { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, - { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, TB_ALIGN_16 }, { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, @@ -605,7 +610,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, 0 }, { X86::VMOVUPDrr, X86::VMOVUPDrm, 0 }, { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 }, - { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 }, { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 }, { X86::VPABSBrr128, X86::VPABSBrm128, 0 }, { X86::VPABSDrr128, X86::VPABSDrm128, 0 }, @@ -1647,6 +1651,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::PEXT32rr, X86::PEXT32rm, 0 }, { X86::PEXT64rr, X86::PEXT64rm, 0 }, + // ADX foldable instructions + { X86::ADCX32rr, X86::ADCX32rm, 0 }, + { X86::ADCX64rr, X86::ADCX64rm, 0 }, + { X86::ADOX32rr, X86::ADOX32rm, 0 }, + { X86::ADOX64rr, X86::ADOX64rm, 0 }, + // AVX-512 foldable instructions { X86::VADDPSZrr, X86::VADDPSZrm, 0 }, { X86::VADDPDZrr, X86::VADDPDZrm, 0 }, @@ -1729,11 +1739,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) static const X86MemoryFoldTableEntry MemoryFoldTable3[] = { // FMA foldable instructions { X86::VFMADDSSr231r, X86::VFMADDSSr231m, TB_ALIGN_NONE }, + { X86::VFMADDSSr231r_Int, X86::VFMADDSSr231m_Int, TB_ALIGN_NONE }, { X86::VFMADDSDr231r, X86::VFMADDSDr231m, TB_ALIGN_NONE }, + { X86::VFMADDSDr231r_Int, X86::VFMADDSDr231m_Int, TB_ALIGN_NONE }, { X86::VFMADDSSr132r, X86::VFMADDSSr132m, TB_ALIGN_NONE }, + { X86::VFMADDSSr132r_Int, X86::VFMADDSSr132m_Int, TB_ALIGN_NONE }, { X86::VFMADDSDr132r, X86::VFMADDSDr132m, TB_ALIGN_NONE }, + { X86::VFMADDSDr132r_Int, X86::VFMADDSDr132m_Int, TB_ALIGN_NONE }, { X86::VFMADDSSr213r, X86::VFMADDSSr213m, TB_ALIGN_NONE }, + { X86::VFMADDSSr213r_Int, X86::VFMADDSSr213m_Int, TB_ALIGN_NONE }, { X86::VFMADDSDr213r, X86::VFMADDSDr213m, TB_ALIGN_NONE }, + { X86::VFMADDSDr213r_Int, X86::VFMADDSDr213m_Int, TB_ALIGN_NONE }, { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_NONE }, { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_NONE }, @@ -1749,11 +1765,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_NONE }, { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, TB_ALIGN_NONE }, + { X86::VFNMADDSSr231r_Int, X86::VFNMADDSSr231m_Int, TB_ALIGN_NONE }, { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, TB_ALIGN_NONE }, + { X86::VFNMADDSDr231r_Int, X86::VFNMADDSDr231m_Int, TB_ALIGN_NONE }, { X86::VFNMADDSSr132r, X86::VFNMADDSSr132m, TB_ALIGN_NONE }, + { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr132m_Int, TB_ALIGN_NONE }, { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, TB_ALIGN_NONE }, + { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr132m_Int, TB_ALIGN_NONE }, { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, TB_ALIGN_NONE }, + { X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr213m_Int, TB_ALIGN_NONE }, { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, TB_ALIGN_NONE }, + { X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr213m_Int, TB_ALIGN_NONE }, { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_NONE }, { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_NONE }, @@ -1769,11 +1791,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_NONE }, { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, TB_ALIGN_NONE }, + { X86::VFMSUBSSr231r_Int, X86::VFMSUBSSr231m_Int, TB_ALIGN_NONE }, { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, TB_ALIGN_NONE }, + { X86::VFMSUBSDr231r_Int, X86::VFMSUBSDr231m_Int, TB_ALIGN_NONE }, { X86::VFMSUBSSr132r, X86::VFMSUBSSr132m, TB_ALIGN_NONE }, + { X86::VFMSUBSSr132r_Int, X86::VFMSUBSSr132m_Int, TB_ALIGN_NONE }, { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, TB_ALIGN_NONE }, + { X86::VFMSUBSDr132r_Int, X86::VFMSUBSDr132m_Int, TB_ALIGN_NONE }, { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, TB_ALIGN_NONE }, + { X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr213m_Int, TB_ALIGN_NONE }, { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, TB_ALIGN_NONE }, + { X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr213m_Int, TB_ALIGN_NONE }, { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_NONE }, { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_NONE }, @@ -1789,11 +1817,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_NONE }, { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, TB_ALIGN_NONE }, + { X86::VFNMSUBSSr231r_Int, X86::VFNMSUBSSr231m_Int, TB_ALIGN_NONE }, { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, TB_ALIGN_NONE }, + { X86::VFNMSUBSDr231r_Int, X86::VFNMSUBSDr231m_Int, TB_ALIGN_NONE }, { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr132m, TB_ALIGN_NONE }, + { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr132m_Int, TB_ALIGN_NONE }, { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, TB_ALIGN_NONE }, + { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr132m_Int, TB_ALIGN_NONE }, { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, TB_ALIGN_NONE }, + { X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr213m_Int, TB_ALIGN_NONE }, { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, TB_ALIGN_NONE }, + { X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr213m_Int, TB_ALIGN_NONE }, { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_NONE }, { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_NONE }, @@ -2282,7 +2316,35 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::FsVMOVAPSrm: case X86::FsVMOVAPDrm: case X86::FsMOVAPSrm: - case X86::FsMOVAPDrm: { + case X86::FsMOVAPDrm: + // AVX-512 + case X86::VMOVAPDZ128rm: + case X86::VMOVAPDZ256rm: + case X86::VMOVAPDZrm: + case X86::VMOVAPSZ128rm: + case X86::VMOVAPSZ256rm: + case X86::VMOVAPSZrm: + case X86::VMOVDQA32Z128rm: + case X86::VMOVDQA32Z256rm: + case X86::VMOVDQA32Zrm: + case X86::VMOVDQA64Z128rm: + case X86::VMOVDQA64Z256rm: + case X86::VMOVDQA64Zrm: + case X86::VMOVDQU16Z128rm: + case X86::VMOVDQU16Z256rm: + case X86::VMOVDQU16Zrm: + case X86::VMOVDQU32Z128rm: + case X86::VMOVDQU32Z256rm: + case X86::VMOVDQU32Zrm: + case X86::VMOVDQU64Z128rm: + case X86::VMOVDQU64Z256rm: + case X86::VMOVDQU64Zrm: + case X86::VMOVDQU8Z128rm: + case X86::VMOVDQU8Z256rm: + case X86::VMOVDQU8Zrm: + case X86::VMOVUPSZ128rm: + case X86::VMOVUPSZ256rm: + case X86::VMOVUPSZrm: { // Loads from constant pools are trivially rematerializable. if (MI->getOperand(1+X86::AddrBaseReg).isReg() && MI->getOperand(1+X86::AddrScaleAmt).isImm() && @@ -2363,9 +2425,8 @@ bool X86InstrInfo::isSafeToClobberEFLAGS(MachineBasicBlock &MBB, // It is safe to clobber EFLAGS at the end of a block of no successor has it // live in. if (Iter == E) { - for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), - SE = MBB.succ_end(); SI != SE; ++SI) - if ((*SI)->isLiveIn(X86::EFLAGS)) + for (MachineBasicBlock *S : MBB.successors()) + if (S->isLiveIn(X86::EFLAGS)) return false; return true; } @@ -2411,13 +2472,29 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, const TargetRegisterInfo &TRI) const { - // MOV32r0 is implemented with a xor which clobbers condition code. - // Re-materialize it as movri instructions to avoid side effects. - unsigned Opc = Orig->getOpcode(); - if (Opc == X86::MOV32r0 && !isSafeToClobberEFLAGS(MBB, I)) { + bool ClobbersEFLAGS = false; + for (const MachineOperand &MO : Orig->operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) { + ClobbersEFLAGS = true; + break; + } + } + + if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) { + // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side + // effects. + int Value; + switch (Orig->getOpcode()) { + case X86::MOV32r0: Value = 0; break; + case X86::MOV32r1: Value = 1; break; + case X86::MOV32r_1: Value = -1; break; + default: + llvm_unreachable("Unexpected instruction!"); + } + DebugLoc DL = Orig->getDebugLoc(); BuildMI(MBB, I, DL, get(X86::MOV32ri)).addOperand(Orig->getOperand(0)) - .addImm(0); + .addImm(Value); } else { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); MBB.insert(I, MI); @@ -2428,7 +2505,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, } /// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead. -static bool hasLiveCondCodeDef(MachineInstr *MI) { +bool X86InstrInfo::hasLiveCondCodeDef(MachineInstr *MI) const { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef() && @@ -2453,7 +2530,7 @@ inline static unsigned getTruncatedShiftCount(MachineInstr *MI, inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) { // Left shift instructions can be transformed into load-effective-address // instructions if we can encode them appropriately. - // A LEA instruction utilizes a SIB byte to encode it's scale factor. + // A LEA instruction utilizes a SIB byte to encode its scale factor. // The SIB.scale field is two bits wide which means that we can encode any // shift amount less than 4. return ShAmt < 4 && ShAmt > 0; @@ -2493,7 +2570,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr *MI, const MachineOperand &Src, ImplicitOp = Src; ImplicitOp.setImplicit(); - NewSrc = getX86SubSuperRegister(Src.getReg(), MVT::i64); + NewSrc = getX86SubSuperRegister(Src.getReg(), 64); MachineBasicBlock::LivenessQueryResult LQR = MI->getParent()->computeRegisterLiveness(&getRegisterInfo(), NewSrc, MI); @@ -2914,10 +2991,162 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return NewMI; } -/// We have a few instructions that must be hacked on to commute them. -/// -MachineInstr * -X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { +/// Returns true if the given instruction opcode is FMA3. +/// Otherwise, returns false. +/// The second parameter is optional and is used as the second return from +/// the function. It is set to true if the given instruction has FMA3 opcode +/// that is used for lowering of scalar FMA intrinsics, and it is set to false +/// otherwise. +static bool isFMA3(unsigned Opcode, bool *IsIntrinsic = nullptr) { + if (IsIntrinsic) + *IsIntrinsic = false; + + switch (Opcode) { + case X86::VFMADDSDr132r: case X86::VFMADDSDr132m: + case X86::VFMADDSSr132r: case X86::VFMADDSSr132m: + case X86::VFMSUBSDr132r: case X86::VFMSUBSDr132m: + case X86::VFMSUBSSr132r: case X86::VFMSUBSSr132m: + case X86::VFNMADDSDr132r: case X86::VFNMADDSDr132m: + case X86::VFNMADDSSr132r: case X86::VFNMADDSSr132m: + case X86::VFNMSUBSDr132r: case X86::VFNMSUBSDr132m: + case X86::VFNMSUBSSr132r: case X86::VFNMSUBSSr132m: + + case X86::VFMADDSDr213r: case X86::VFMADDSDr213m: + case X86::VFMADDSSr213r: case X86::VFMADDSSr213m: + case X86::VFMSUBSDr213r: case X86::VFMSUBSDr213m: + case X86::VFMSUBSSr213r: case X86::VFMSUBSSr213m: + case X86::VFNMADDSDr213r: case X86::VFNMADDSDr213m: + case X86::VFNMADDSSr213r: case X86::VFNMADDSSr213m: + case X86::VFNMSUBSDr213r: case X86::VFNMSUBSDr213m: + case X86::VFNMSUBSSr213r: case X86::VFNMSUBSSr213m: + + case X86::VFMADDSDr231r: case X86::VFMADDSDr231m: + case X86::VFMADDSSr231r: case X86::VFMADDSSr231m: + case X86::VFMSUBSDr231r: case X86::VFMSUBSDr231m: + case X86::VFMSUBSSr231r: case X86::VFMSUBSSr231m: + case X86::VFNMADDSDr231r: case X86::VFNMADDSDr231m: + case X86::VFNMADDSSr231r: case X86::VFNMADDSSr231m: + case X86::VFNMSUBSDr231r: case X86::VFNMSUBSDr231m: + case X86::VFNMSUBSSr231r: case X86::VFNMSUBSSr231m: + + case X86::VFMADDSUBPDr132r: case X86::VFMADDSUBPDr132m: + case X86::VFMADDSUBPSr132r: case X86::VFMADDSUBPSr132m: + case X86::VFMSUBADDPDr132r: case X86::VFMSUBADDPDr132m: + case X86::VFMSUBADDPSr132r: case X86::VFMSUBADDPSr132m: + case X86::VFMADDSUBPDr132rY: case X86::VFMADDSUBPDr132mY: + case X86::VFMADDSUBPSr132rY: case X86::VFMADDSUBPSr132mY: + case X86::VFMSUBADDPDr132rY: case X86::VFMSUBADDPDr132mY: + case X86::VFMSUBADDPSr132rY: case X86::VFMSUBADDPSr132mY: + + case X86::VFMADDPDr132r: case X86::VFMADDPDr132m: + case X86::VFMADDPSr132r: case X86::VFMADDPSr132m: + case X86::VFMSUBPDr132r: case X86::VFMSUBPDr132m: + case X86::VFMSUBPSr132r: case X86::VFMSUBPSr132m: + case X86::VFNMADDPDr132r: case X86::VFNMADDPDr132m: + case X86::VFNMADDPSr132r: case X86::VFNMADDPSr132m: + case X86::VFNMSUBPDr132r: case X86::VFNMSUBPDr132m: + case X86::VFNMSUBPSr132r: case X86::VFNMSUBPSr132m: + case X86::VFMADDPDr132rY: case X86::VFMADDPDr132mY: + case X86::VFMADDPSr132rY: case X86::VFMADDPSr132mY: + case X86::VFMSUBPDr132rY: case X86::VFMSUBPDr132mY: + case X86::VFMSUBPSr132rY: case X86::VFMSUBPSr132mY: + case X86::VFNMADDPDr132rY: case X86::VFNMADDPDr132mY: + case X86::VFNMADDPSr132rY: case X86::VFNMADDPSr132mY: + case X86::VFNMSUBPDr132rY: case X86::VFNMSUBPDr132mY: + case X86::VFNMSUBPSr132rY: case X86::VFNMSUBPSr132mY: + + case X86::VFMADDSUBPDr213r: case X86::VFMADDSUBPDr213m: + case X86::VFMADDSUBPSr213r: case X86::VFMADDSUBPSr213m: + case X86::VFMSUBADDPDr213r: case X86::VFMSUBADDPDr213m: + case X86::VFMSUBADDPSr213r: case X86::VFMSUBADDPSr213m: + case X86::VFMADDSUBPDr213rY: case X86::VFMADDSUBPDr213mY: + case X86::VFMADDSUBPSr213rY: case X86::VFMADDSUBPSr213mY: + case X86::VFMSUBADDPDr213rY: case X86::VFMSUBADDPDr213mY: + case X86::VFMSUBADDPSr213rY: case X86::VFMSUBADDPSr213mY: + + case X86::VFMADDPDr213r: case X86::VFMADDPDr213m: + case X86::VFMADDPSr213r: case X86::VFMADDPSr213m: + case X86::VFMSUBPDr213r: case X86::VFMSUBPDr213m: + case X86::VFMSUBPSr213r: case X86::VFMSUBPSr213m: + case X86::VFNMADDPDr213r: case X86::VFNMADDPDr213m: + case X86::VFNMADDPSr213r: case X86::VFNMADDPSr213m: + case X86::VFNMSUBPDr213r: case X86::VFNMSUBPDr213m: + case X86::VFNMSUBPSr213r: case X86::VFNMSUBPSr213m: + case X86::VFMADDPDr213rY: case X86::VFMADDPDr213mY: + case X86::VFMADDPSr213rY: case X86::VFMADDPSr213mY: + case X86::VFMSUBPDr213rY: case X86::VFMSUBPDr213mY: + case X86::VFMSUBPSr213rY: case X86::VFMSUBPSr213mY: + case X86::VFNMADDPDr213rY: case X86::VFNMADDPDr213mY: + case X86::VFNMADDPSr213rY: case X86::VFNMADDPSr213mY: + case X86::VFNMSUBPDr213rY: case X86::VFNMSUBPDr213mY: + case X86::VFNMSUBPSr213rY: case X86::VFNMSUBPSr213mY: + + case X86::VFMADDSUBPDr231r: case X86::VFMADDSUBPDr231m: + case X86::VFMADDSUBPSr231r: case X86::VFMADDSUBPSr231m: + case X86::VFMSUBADDPDr231r: case X86::VFMSUBADDPDr231m: + case X86::VFMSUBADDPSr231r: case X86::VFMSUBADDPSr231m: + case X86::VFMADDSUBPDr231rY: case X86::VFMADDSUBPDr231mY: + case X86::VFMADDSUBPSr231rY: case X86::VFMADDSUBPSr231mY: + case X86::VFMSUBADDPDr231rY: case X86::VFMSUBADDPDr231mY: + case X86::VFMSUBADDPSr231rY: case X86::VFMSUBADDPSr231mY: + + case X86::VFMADDPDr231r: case X86::VFMADDPDr231m: + case X86::VFMADDPSr231r: case X86::VFMADDPSr231m: + case X86::VFMSUBPDr231r: case X86::VFMSUBPDr231m: + case X86::VFMSUBPSr231r: case X86::VFMSUBPSr231m: + case X86::VFNMADDPDr231r: case X86::VFNMADDPDr231m: + case X86::VFNMADDPSr231r: case X86::VFNMADDPSr231m: + case X86::VFNMSUBPDr231r: case X86::VFNMSUBPDr231m: + case X86::VFNMSUBPSr231r: case X86::VFNMSUBPSr231m: + case X86::VFMADDPDr231rY: case X86::VFMADDPDr231mY: + case X86::VFMADDPSr231rY: case X86::VFMADDPSr231mY: + case X86::VFMSUBPDr231rY: case X86::VFMSUBPDr231mY: + case X86::VFMSUBPSr231rY: case X86::VFMSUBPSr231mY: + case X86::VFNMADDPDr231rY: case X86::VFNMADDPDr231mY: + case X86::VFNMADDPSr231rY: case X86::VFNMADDPSr231mY: + case X86::VFNMSUBPDr231rY: case X86::VFNMSUBPDr231mY: + case X86::VFNMSUBPSr231rY: case X86::VFNMSUBPSr231mY: + return true; + + case X86::VFMADDSDr132r_Int: case X86::VFMADDSDr132m_Int: + case X86::VFMADDSSr132r_Int: case X86::VFMADDSSr132m_Int: + case X86::VFMSUBSDr132r_Int: case X86::VFMSUBSDr132m_Int: + case X86::VFMSUBSSr132r_Int: case X86::VFMSUBSSr132m_Int: + case X86::VFNMADDSDr132r_Int: case X86::VFNMADDSDr132m_Int: + case X86::VFNMADDSSr132r_Int: case X86::VFNMADDSSr132m_Int: + case X86::VFNMSUBSDr132r_Int: case X86::VFNMSUBSDr132m_Int: + case X86::VFNMSUBSSr132r_Int: case X86::VFNMSUBSSr132m_Int: + + case X86::VFMADDSDr213r_Int: case X86::VFMADDSDr213m_Int: + case X86::VFMADDSSr213r_Int: case X86::VFMADDSSr213m_Int: + case X86::VFMSUBSDr213r_Int: case X86::VFMSUBSDr213m_Int: + case X86::VFMSUBSSr213r_Int: case X86::VFMSUBSSr213m_Int: + case X86::VFNMADDSDr213r_Int: case X86::VFNMADDSDr213m_Int: + case X86::VFNMADDSSr213r_Int: case X86::VFNMADDSSr213m_Int: + case X86::VFNMSUBSDr213r_Int: case X86::VFNMSUBSDr213m_Int: + case X86::VFNMSUBSSr213r_Int: case X86::VFNMSUBSSr213m_Int: + + case X86::VFMADDSDr231r_Int: case X86::VFMADDSDr231m_Int: + case X86::VFMADDSSr231r_Int: case X86::VFMADDSSr231m_Int: + case X86::VFMSUBSDr231r_Int: case X86::VFMSUBSDr231m_Int: + case X86::VFMSUBSSr231r_Int: case X86::VFMSUBSSr231m_Int: + case X86::VFNMADDSDr231r_Int: case X86::VFNMADDSDr231m_Int: + case X86::VFNMADDSSr231r_Int: case X86::VFNMADDSSr231m_Int: + case X86::VFNMSUBSDr231r_Int: case X86::VFNMSUBSDr231m_Int: + case X86::VFNMSUBSSr231r_Int: case X86::VFNMSUBSSr231m_Int: + if (IsIntrinsic) + *IsIntrinsic = true; + return true; + default: + return false; + } + llvm_unreachable("Opcode not handled by the switch"); +} + +MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr *MI, + bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const { switch (MI->getOpcode()) { case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I) case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I) @@ -2944,7 +3173,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { } MI->setDesc(get(Opc)); MI->getOperand(3).setImm(Size-Amt); - return TargetInstrInfo::commuteInstruction(MI, NewMI); + return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); } case X86::BLENDPDrri: case X86::BLENDPSrri: @@ -2980,7 +3209,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { NewMI = false; } MI->getOperand(3).setImm(Mask ^ Imm); - return TargetInstrInfo::commuteInstruction(MI, NewMI); + return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); } case X86::PCLMULQDQrr: case X86::VPCLMULQDQrr:{ @@ -2995,7 +3224,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { NewMI = false; } MI->getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4)); - return TargetInstrInfo::commuteInstruction(MI, NewMI); + return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); } case X86::CMPPDrri: case X86::CMPPSrri: @@ -3016,7 +3245,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { MI = MF.CloneMachineInstr(MI); NewMI = false; } - return TargetInstrInfo::commuteInstruction(MI, NewMI); + return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); default: return nullptr; } @@ -3045,7 +3274,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { NewMI = false; } MI->getOperand(3).setImm(Imm); - return TargetInstrInfo::commuteInstruction(MI, NewMI); + return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); } case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr: case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr: @@ -3124,11 +3353,272 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // Fallthrough intended. } default: - return TargetInstrInfo::commuteInstruction(MI, NewMI); + if (isFMA3(MI->getOpcode())) { + unsigned Opc = getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2); + if (Opc == 0) + return nullptr; + if (NewMI) { + MachineFunction &MF = *MI->getParent()->getParent(); + MI = MF.CloneMachineInstr(MI); + NewMI = false; + } + MI->setDesc(get(Opc)); + } + return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); } } -bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, +bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr *MI, + unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const { + + unsigned RegOpsNum = isMem(MI, 3) ? 2 : 3; + + // Only the first RegOpsNum operands are commutable. + // Also, the value 'CommuteAnyOperandIndex' is valid here as it means + // that the operand is not specified/fixed. + if (SrcOpIdx1 != CommuteAnyOperandIndex && + (SrcOpIdx1 < 1 || SrcOpIdx1 > RegOpsNum)) + return false; + if (SrcOpIdx2 != CommuteAnyOperandIndex && + (SrcOpIdx2 < 1 || SrcOpIdx2 > RegOpsNum)) + return false; + + // Look for two different register operands assumed to be commutable + // regardless of the FMA opcode. The FMA opcode is adjusted later. + if (SrcOpIdx1 == CommuteAnyOperandIndex || + SrcOpIdx2 == CommuteAnyOperandIndex) { + unsigned CommutableOpIdx1 = SrcOpIdx1; + unsigned CommutableOpIdx2 = SrcOpIdx2; + + // At least one of operands to be commuted is not specified and + // this method is free to choose appropriate commutable operands. + if (SrcOpIdx1 == SrcOpIdx2) + // Both of operands are not fixed. By default set one of commutable + // operands to the last register operand of the instruction. + CommutableOpIdx2 = RegOpsNum; + else if (SrcOpIdx2 == CommuteAnyOperandIndex) + // Only one of operands is not fixed. + CommutableOpIdx2 = SrcOpIdx1; + + // CommutableOpIdx2 is well defined now. Let's choose another commutable + // operand and assign its index to CommutableOpIdx1. + unsigned Op2Reg = MI->getOperand(CommutableOpIdx2).getReg(); + for (CommutableOpIdx1 = RegOpsNum; CommutableOpIdx1 > 0; CommutableOpIdx1--) { + // The commuted operands must have different registers. + // Otherwise, the commute transformation does not change anything and + // is useless then. + if (Op2Reg != MI->getOperand(CommutableOpIdx1).getReg()) + break; + } + + // No appropriate commutable operands were found. + if (CommutableOpIdx1 == 0) + return false; + + // Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2 + // to return those values. + if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, + CommutableOpIdx1, CommutableOpIdx2)) + return false; + } + + // Check if we can adjust the opcode to preserve the semantics when + // commute the register operands. + return getFMA3OpcodeToCommuteOperands(MI, SrcOpIdx1, SrcOpIdx2) != 0; +} + +unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(MachineInstr *MI, + unsigned SrcOpIdx1, + unsigned SrcOpIdx2) const { + unsigned Opc = MI->getOpcode(); + + // Define the array that holds FMA opcodes in groups + // of 3 opcodes(132, 213, 231) in each group. + static const unsigned RegularOpcodeGroups[][3] = { + { X86::VFMADDSSr132r, X86::VFMADDSSr213r, X86::VFMADDSSr231r }, + { X86::VFMADDSDr132r, X86::VFMADDSDr213r, X86::VFMADDSDr231r }, + { X86::VFMADDPSr132r, X86::VFMADDPSr213r, X86::VFMADDPSr231r }, + { X86::VFMADDPDr132r, X86::VFMADDPDr213r, X86::VFMADDPDr231r }, + { X86::VFMADDPSr132rY, X86::VFMADDPSr213rY, X86::VFMADDPSr231rY }, + { X86::VFMADDPDr132rY, X86::VFMADDPDr213rY, X86::VFMADDPDr231rY }, + { X86::VFMADDSSr132m, X86::VFMADDSSr213m, X86::VFMADDSSr231m }, + { X86::VFMADDSDr132m, X86::VFMADDSDr213m, X86::VFMADDSDr231m }, + { X86::VFMADDPSr132m, X86::VFMADDPSr213m, X86::VFMADDPSr231m }, + { X86::VFMADDPDr132m, X86::VFMADDPDr213m, X86::VFMADDPDr231m }, + { X86::VFMADDPSr132mY, X86::VFMADDPSr213mY, X86::VFMADDPSr231mY }, + { X86::VFMADDPDr132mY, X86::VFMADDPDr213mY, X86::VFMADDPDr231mY }, + + { X86::VFMSUBSSr132r, X86::VFMSUBSSr213r, X86::VFMSUBSSr231r }, + { X86::VFMSUBSDr132r, X86::VFMSUBSDr213r, X86::VFMSUBSDr231r }, + { X86::VFMSUBPSr132r, X86::VFMSUBPSr213r, X86::VFMSUBPSr231r }, + { X86::VFMSUBPDr132r, X86::VFMSUBPDr213r, X86::VFMSUBPDr231r }, + { X86::VFMSUBPSr132rY, X86::VFMSUBPSr213rY, X86::VFMSUBPSr231rY }, + { X86::VFMSUBPDr132rY, X86::VFMSUBPDr213rY, X86::VFMSUBPDr231rY }, + { X86::VFMSUBSSr132m, X86::VFMSUBSSr213m, X86::VFMSUBSSr231m }, + { X86::VFMSUBSDr132m, X86::VFMSUBSDr213m, X86::VFMSUBSDr231m }, + { X86::VFMSUBPSr132m, X86::VFMSUBPSr213m, X86::VFMSUBPSr231m }, + { X86::VFMSUBPDr132m, X86::VFMSUBPDr213m, X86::VFMSUBPDr231m }, + { X86::VFMSUBPSr132mY, X86::VFMSUBPSr213mY, X86::VFMSUBPSr231mY }, + { X86::VFMSUBPDr132mY, X86::VFMSUBPDr213mY, X86::VFMSUBPDr231mY }, + + { X86::VFNMADDSSr132r, X86::VFNMADDSSr213r, X86::VFNMADDSSr231r }, + { X86::VFNMADDSDr132r, X86::VFNMADDSDr213r, X86::VFNMADDSDr231r }, + { X86::VFNMADDPSr132r, X86::VFNMADDPSr213r, X86::VFNMADDPSr231r }, + { X86::VFNMADDPDr132r, X86::VFNMADDPDr213r, X86::VFNMADDPDr231r }, + { X86::VFNMADDPSr132rY, X86::VFNMADDPSr213rY, X86::VFNMADDPSr231rY }, + { X86::VFNMADDPDr132rY, X86::VFNMADDPDr213rY, X86::VFNMADDPDr231rY }, + { X86::VFNMADDSSr132m, X86::VFNMADDSSr213m, X86::VFNMADDSSr231m }, + { X86::VFNMADDSDr132m, X86::VFNMADDSDr213m, X86::VFNMADDSDr231m }, + { X86::VFNMADDPSr132m, X86::VFNMADDPSr213m, X86::VFNMADDPSr231m }, + { X86::VFNMADDPDr132m, X86::VFNMADDPDr213m, X86::VFNMADDPDr231m }, + { X86::VFNMADDPSr132mY, X86::VFNMADDPSr213mY, X86::VFNMADDPSr231mY }, + { X86::VFNMADDPDr132mY, X86::VFNMADDPDr213mY, X86::VFNMADDPDr231mY }, + + { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr213r, X86::VFNMSUBSSr231r }, + { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr213r, X86::VFNMSUBSDr231r }, + { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr213r, X86::VFNMSUBPSr231r }, + { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr213r, X86::VFNMSUBPDr231r }, + { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr231rY }, + { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr231rY }, + { X86::VFNMSUBSSr132m, X86::VFNMSUBSSr213m, X86::VFNMSUBSSr231m }, + { X86::VFNMSUBSDr132m, X86::VFNMSUBSDr213m, X86::VFNMSUBSDr231m }, + { X86::VFNMSUBPSr132m, X86::VFNMSUBPSr213m, X86::VFNMSUBPSr231m }, + { X86::VFNMSUBPDr132m, X86::VFNMSUBPDr213m, X86::VFNMSUBPDr231m }, + { X86::VFNMSUBPSr132mY, X86::VFNMSUBPSr213mY, X86::VFNMSUBPSr231mY }, + { X86::VFNMSUBPDr132mY, X86::VFNMSUBPDr213mY, X86::VFNMSUBPDr231mY }, + + { X86::VFMADDSUBPSr132r, X86::VFMADDSUBPSr213r, X86::VFMADDSUBPSr231r }, + { X86::VFMADDSUBPDr132r, X86::VFMADDSUBPDr213r, X86::VFMADDSUBPDr231r }, + { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr231rY }, + { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr231rY }, + { X86::VFMADDSUBPSr132m, X86::VFMADDSUBPSr213m, X86::VFMADDSUBPSr231m }, + { X86::VFMADDSUBPDr132m, X86::VFMADDSUBPDr213m, X86::VFMADDSUBPDr231m }, + { X86::VFMADDSUBPSr132mY, X86::VFMADDSUBPSr213mY, X86::VFMADDSUBPSr231mY }, + { X86::VFMADDSUBPDr132mY, X86::VFMADDSUBPDr213mY, X86::VFMADDSUBPDr231mY }, + + { X86::VFMSUBADDPSr132r, X86::VFMSUBADDPSr213r, X86::VFMSUBADDPSr231r }, + { X86::VFMSUBADDPDr132r, X86::VFMSUBADDPDr213r, X86::VFMSUBADDPDr231r }, + { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr231rY }, + { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr231rY }, + { X86::VFMSUBADDPSr132m, X86::VFMSUBADDPSr213m, X86::VFMSUBADDPSr231m }, + { X86::VFMSUBADDPDr132m, X86::VFMSUBADDPDr213m, X86::VFMSUBADDPDr231m }, + { X86::VFMSUBADDPSr132mY, X86::VFMSUBADDPSr213mY, X86::VFMSUBADDPSr231mY }, + { X86::VFMSUBADDPDr132mY, X86::VFMSUBADDPDr213mY, X86::VFMSUBADDPDr231mY } + }; + + // Define the array that holds FMA*_Int opcodes in groups + // of 3 opcodes(132, 213, 231) in each group. + static const unsigned IntrinOpcodeGroups[][3] = { + { X86::VFMADDSSr132r_Int, X86::VFMADDSSr213r_Int, X86::VFMADDSSr231r_Int }, + { X86::VFMADDSDr132r_Int, X86::VFMADDSDr213r_Int, X86::VFMADDSDr231r_Int }, + { X86::VFMADDSSr132m_Int, X86::VFMADDSSr213m_Int, X86::VFMADDSSr231m_Int }, + { X86::VFMADDSDr132m_Int, X86::VFMADDSDr213m_Int, X86::VFMADDSDr231m_Int }, + + { X86::VFMSUBSSr132r_Int, X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr231r_Int }, + { X86::VFMSUBSDr132r_Int, X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr231r_Int }, + { X86::VFMSUBSSr132m_Int, X86::VFMSUBSSr213m_Int, X86::VFMSUBSSr231m_Int }, + { X86::VFMSUBSDr132m_Int, X86::VFMSUBSDr213m_Int, X86::VFMSUBSDr231m_Int }, + + { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr231r_Int }, + { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr231r_Int }, + { X86::VFNMADDSSr132m_Int, X86::VFNMADDSSr213m_Int, X86::VFNMADDSSr231m_Int }, + { X86::VFNMADDSDr132m_Int, X86::VFNMADDSDr213m_Int, X86::VFNMADDSDr231m_Int }, + + { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr231r_Int }, + { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr231r_Int }, + { X86::VFNMSUBSSr132m_Int, X86::VFNMSUBSSr213m_Int, X86::VFNMSUBSSr231m_Int }, + { X86::VFNMSUBSDr132m_Int, X86::VFNMSUBSDr213m_Int, X86::VFNMSUBSDr231m_Int }, + }; + + const unsigned Form132Index = 0; + const unsigned Form213Index = 1; + const unsigned Form231Index = 2; + const unsigned FormsNum = 3; + + bool IsIntrinOpcode; + isFMA3(Opc, &IsIntrinOpcode); + + size_t GroupsNum; + const unsigned (*OpcodeGroups)[3]; + if (IsIntrinOpcode) { + GroupsNum = array_lengthof(IntrinOpcodeGroups); + OpcodeGroups = IntrinOpcodeGroups; + } else { + GroupsNum = array_lengthof(RegularOpcodeGroups); + OpcodeGroups = RegularOpcodeGroups; + } + + const unsigned *FoundOpcodesGroup = nullptr; + size_t FormIndex; + + // Look for the input opcode in the corresponding opcodes table. + for (size_t GroupIndex = 0; GroupIndex < GroupsNum && !FoundOpcodesGroup; + ++GroupIndex) { + for (FormIndex = 0; FormIndex < FormsNum; ++FormIndex) { + if (OpcodeGroups[GroupIndex][FormIndex] == Opc) { + FoundOpcodesGroup = OpcodeGroups[GroupIndex]; + break; + } + } + } + + // The input opcode does not match with any of the opcodes from the tables. + // The unsupported FMA opcode must be added to one of the two opcode groups + // defined above. + assert(FoundOpcodesGroup != nullptr && "Unexpected FMA3 opcode"); + + // Put the lowest index to SrcOpIdx1 to simplify the checks below. + if (SrcOpIdx1 > SrcOpIdx2) + std::swap(SrcOpIdx1, SrcOpIdx2); + + // TODO: Commuting the 1st operand of FMA*_Int requires some additional + // analysis. The commute optimization is legal only if all users of FMA*_Int + // use only the lowest element of the FMA*_Int instruction. Such analysis are + // not implemented yet. So, just return 0 in that case. + // When such analysis are available this place will be the right place for + // calling it. + if (IsIntrinOpcode && SrcOpIdx1 == 1) + return 0; + + unsigned Case; + if (SrcOpIdx1 == 1 && SrcOpIdx2 == 2) + Case = 0; + else if (SrcOpIdx1 == 1 && SrcOpIdx2 == 3) + Case = 1; + else if (SrcOpIdx1 == 2 && SrcOpIdx2 == 3) + Case = 2; + else + return 0; + + // Define the FMA forms mapping array that helps to map input FMA form + // to output FMA form to preserve the operation semantics after + // commuting the operands. + static const unsigned FormMapping[][3] = { + // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2; + // FMA132 A, C, b; ==> FMA231 C, A, b; + // FMA213 B, A, c; ==> FMA213 A, B, c; + // FMA231 C, A, b; ==> FMA132 A, C, b; + { Form231Index, Form213Index, Form132Index }, + // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3; + // FMA132 A, c, B; ==> FMA132 B, c, A; + // FMA213 B, a, C; ==> FMA231 C, a, B; + // FMA231 C, a, B; ==> FMA213 B, a, C; + { Form132Index, Form231Index, Form213Index }, + // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3; + // FMA132 a, C, B; ==> FMA213 a, B, C; + // FMA213 b, A, C; ==> FMA132 b, C, A; + // FMA231 c, A, B; ==> FMA231 c, B, A; + { Form213Index, Form132Index, Form231Index } + }; + + // Everything is ready, just adjust the FMA opcode and return it. + FormIndex = FormMapping[Case][FormIndex]; + return FoundOpcodesGroup[FormIndex]; +} + +bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, + unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { switch (MI->getOpcode()) { case X86::CMPPDrri: @@ -3141,46 +3631,22 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, // Ordered/Unordered/Equal/NotEqual tests unsigned Imm = MI->getOperand(3).getImm() & 0x7; switch (Imm) { - case 0x00: // EQUAL - case 0x03: // UNORDERED - case 0x04: // NOT EQUAL - case 0x07: // ORDERED - SrcOpIdx1 = 1; - SrcOpIdx2 = 2; - return true; + case 0x00: // EQUAL + case 0x03: // UNORDERED + case 0x04: // NOT EQUAL + case 0x07: // ORDERED + // The indices of the commutable operands are 1 and 2. + // Assign them to the returned operand indices here. + return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2); } return false; } - case X86::VFMADDPDr231r: - case X86::VFMADDPSr231r: - case X86::VFMADDSDr231r: - case X86::VFMADDSSr231r: - case X86::VFMSUBPDr231r: - case X86::VFMSUBPSr231r: - case X86::VFMSUBSDr231r: - case X86::VFMSUBSSr231r: - case X86::VFNMADDPDr231r: - case X86::VFNMADDPSr231r: - case X86::VFNMADDSDr231r: - case X86::VFNMADDSSr231r: - case X86::VFNMSUBPDr231r: - case X86::VFNMSUBPSr231r: - case X86::VFNMSUBSDr231r: - case X86::VFNMSUBSSr231r: - case X86::VFMADDPDr231rY: - case X86::VFMADDPSr231rY: - case X86::VFMSUBPDr231rY: - case X86::VFMSUBPSr231rY: - case X86::VFNMADDPDr231rY: - case X86::VFNMADDPSr231rY: - case X86::VFNMSUBPDr231rY: - case X86::VFNMSUBPSr231rY: - SrcOpIdx1 = 2; - SrcOpIdx2 = 3; - return true; default: + if (isFMA3(MI->getOpcode())) + return findFMA3CommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); } + return false; } static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) { @@ -3821,15 +4287,58 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, return 0; } -inline static bool MaskRegClassContains(unsigned Reg) { +static bool MaskRegClassContains(unsigned Reg) { return X86::VK8RegClass.contains(Reg) || X86::VK16RegClass.contains(Reg) || X86::VK32RegClass.contains(Reg) || X86::VK64RegClass.contains(Reg) || X86::VK1RegClass.contains(Reg); } + +static bool GRRegClassContains(unsigned Reg) { + return X86::GR64RegClass.contains(Reg) || + X86::GR32RegClass.contains(Reg) || + X86::GR16RegClass.contains(Reg) || + X86::GR8RegClass.contains(Reg); +} static -unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) { +unsigned copyPhysRegOpcode_AVX512_DQ(unsigned& DestReg, unsigned& SrcReg) { + if (MaskRegClassContains(SrcReg) && X86::GR8RegClass.contains(DestReg)) { + DestReg = getX86SubSuperRegister(DestReg, 32); + return X86::KMOVBrk; + } + if (MaskRegClassContains(DestReg) && X86::GR8RegClass.contains(SrcReg)) { + SrcReg = getX86SubSuperRegister(SrcReg, 32); + return X86::KMOVBkr; + } + return 0; +} + +static +unsigned copyPhysRegOpcode_AVX512_BW(unsigned& DestReg, unsigned& SrcReg) { + if (MaskRegClassContains(SrcReg) && MaskRegClassContains(DestReg)) + return X86::KMOVQkk; + if (MaskRegClassContains(SrcReg) && X86::GR32RegClass.contains(DestReg)) + return X86::KMOVDrk; + if (MaskRegClassContains(SrcReg) && X86::GR64RegClass.contains(DestReg)) + return X86::KMOVQrk; + if (MaskRegClassContains(DestReg) && X86::GR32RegClass.contains(SrcReg)) + return X86::KMOVDkr; + if (MaskRegClassContains(DestReg) && X86::GR64RegClass.contains(SrcReg)) + return X86::KMOVQkr; + return 0; +} + +static +unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg, + const X86Subtarget &Subtarget) +{ + if (Subtarget.hasDQI()) + if (auto Opc = copyPhysRegOpcode_AVX512_DQ(DestReg, SrcReg)) + return Opc; + if (Subtarget.hasBWI()) + if (auto Opc = copyPhysRegOpcode_AVX512_BW(DestReg, SrcReg)) + return Opc; if (X86::VR128XRegClass.contains(DestReg, SrcReg) || X86::VR256XRegClass.contains(DestReg, SrcReg) || X86::VR512RegClass.contains(DestReg, SrcReg)) { @@ -3837,21 +4346,14 @@ unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) { SrcReg = get512BitSuperRegister(SrcReg); return X86::VMOVAPSZrr; } - if (MaskRegClassContains(DestReg) && - MaskRegClassContains(SrcReg)) + if (MaskRegClassContains(DestReg) && MaskRegClassContains(SrcReg)) return X86::KMOVWkk; - if (MaskRegClassContains(DestReg) && - (X86::GR32RegClass.contains(SrcReg) || - X86::GR16RegClass.contains(SrcReg) || - X86::GR8RegClass.contains(SrcReg))) { - SrcReg = getX86SubSuperRegister(SrcReg, MVT::i32); + if (MaskRegClassContains(DestReg) && GRRegClassContains(SrcReg)) { + SrcReg = getX86SubSuperRegister(SrcReg, 32); return X86::KMOVWkr; } - if ((X86::GR32RegClass.contains(DestReg) || - X86::GR16RegClass.contains(DestReg) || - X86::GR8RegClass.contains(DestReg)) && - MaskRegClassContains(SrcReg)) { - DestReg = getX86SubSuperRegister(DestReg, MVT::i32); + if (GRRegClassContains(DestReg) && MaskRegClassContains(SrcReg)) { + DestReg = getX86SubSuperRegister(DestReg, 32); return X86::KMOVWrk; } return 0; @@ -3886,7 +4388,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (X86::VR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MMX_MOVQ64rr; else if (HasAVX512) - Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg); + Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg, Subtarget); else if (X86::VR128RegClass.contains(DestReg, SrcReg)) Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr; else if (X86::VR256RegClass.contains(DestReg, SrcReg)) @@ -3900,34 +4402,86 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - // Moving EFLAGS to / from another register requires a push and a pop. - // Notice that we have to adjust the stack if we don't want to clobber the - // first frame index. See X86FrameLowering.cpp - clobbersTheStack. - if (SrcReg == X86::EFLAGS) { - if (X86::GR64RegClass.contains(DestReg)) { - BuildMI(MBB, MI, DL, get(X86::PUSHF64)); - BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); + bool FromEFLAGS = SrcReg == X86::EFLAGS; + bool ToEFLAGS = DestReg == X86::EFLAGS; + int Reg = FromEFLAGS ? DestReg : SrcReg; + bool is32 = X86::GR32RegClass.contains(Reg); + bool is64 = X86::GR64RegClass.contains(Reg); + + if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) { + int Mov = is64 ? X86::MOV64rr : X86::MOV32rr; + int Push = is64 ? X86::PUSH64r : X86::PUSH32r; + int PushF = is64 ? X86::PUSHF64 : X86::PUSHF32; + int Pop = is64 ? X86::POP64r : X86::POP32r; + int PopF = is64 ? X86::POPF64 : X86::POPF32; + int AX = is64 ? X86::RAX : X86::EAX; + + if (!Subtarget.hasLAHFSAHF()) { + assert(Subtarget.is64Bit() && + "Not having LAHF/SAHF only happens on 64-bit."); + // Moving EFLAGS to / from another register requires a push and a pop. + // Notice that we have to adjust the stack if we don't want to clobber the + // first frame index. See X86FrameLowering.cpp - usesTheStack. + if (FromEFLAGS) { + BuildMI(MBB, MI, DL, get(PushF)); + BuildMI(MBB, MI, DL, get(Pop), DestReg); + } + if (ToEFLAGS) { + BuildMI(MBB, MI, DL, get(Push)) + .addReg(SrcReg, getKillRegState(KillSrc)); + BuildMI(MBB, MI, DL, get(PopF)); + } return; } - if (X86::GR32RegClass.contains(DestReg)) { - BuildMI(MBB, MI, DL, get(X86::PUSHF32)); - BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); - return; + + // The flags need to be saved, but saving EFLAGS with PUSHF/POPF is + // inefficient. Instead: + // - Save the overflow flag OF into AL using SETO, and restore it using a + // signed 8-bit addition of AL and INT8_MAX. + // - Save/restore the bottom 8 EFLAGS bits (CF, PF, AF, ZF, SF) to/from AH + // using LAHF/SAHF. + // - When RAX/EAX is live and isn't the destination register, make sure it + // isn't clobbered by PUSH/POP'ing it before and after saving/restoring + // the flags. + // This approach is ~2.25x faster than using PUSHF/POPF. + // + // This is still somewhat inefficient because we don't know which flags are + // actually live inside EFLAGS. Were we able to do a single SETcc instead of + // SETO+LAHF / ADDB+SAHF the code could be 1.02x faster. + // + // PUSHF/POPF is also potentially incorrect because it affects other flags + // such as TF/IF/DF, which LLVM doesn't model. + // + // Notice that we have to adjust the stack if we don't want to clobber the + // first frame index. See X86FrameLowering.cpp - usesTheStack. + + + bool AXDead = (Reg == AX) || + (MachineBasicBlock::LQR_Dead == + MBB.computeRegisterLiveness(&getRegisterInfo(), AX, MI)); + if (!AXDead) { + // FIXME: If computeRegisterLiveness() reported LQR_Unknown then AX may + // actually be dead. This is not a problem for correctness as we are just + // (unnecessarily) saving+restoring a dead register. However the + // MachineVerifier expects operands that read from dead registers + // to be marked with the "undef" flag. + BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true)); } - } - if (DestReg == X86::EFLAGS) { - if (X86::GR64RegClass.contains(SrcReg)) { - BuildMI(MBB, MI, DL, get(X86::PUSH64r)) - .addReg(SrcReg, getKillRegState(KillSrc)); - BuildMI(MBB, MI, DL, get(X86::POPF64)); - return; + if (FromEFLAGS) { + BuildMI(MBB, MI, DL, get(X86::SETOr), X86::AL); + BuildMI(MBB, MI, DL, get(X86::LAHF)); + BuildMI(MBB, MI, DL, get(Mov), Reg).addReg(AX); } - if (X86::GR32RegClass.contains(SrcReg)) { - BuildMI(MBB, MI, DL, get(X86::PUSH32r)) - .addReg(SrcReg, getKillRegState(KillSrc)); - BuildMI(MBB, MI, DL, get(X86::POPF32)); - return; + if (ToEFLAGS) { + BuildMI(MBB, MI, DL, get(Mov), AX).addReg(Reg, getKillRegState(KillSrc)); + BuildMI(MBB, MI, DL, get(X86::ADD8ri), X86::AL) + .addReg(X86::AL) + .addImm(INT8_MAX); + BuildMI(MBB, MI, DL, get(X86::SAHF)); } + if (!AXDead) + BuildMI(MBB, MI, DL, get(Pop), AX); + return; } DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) @@ -4602,9 +5156,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // live-out. If it is live-out, do not optimize. if ((IsCmpZero || IsSwapped) && !IsSafe) { MachineBasicBlock *MBB = CmpInstr->getParent(); - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) - if ((*SI)->isLiveIn(X86::EFLAGS)) + for (MachineBasicBlock *Successor : MBB->successors()) + if (Successor->isLiveIn(X86::EFLAGS)) return false; } @@ -4645,8 +5198,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, CmpInstr->eraseFromParent(); // Modify the condition code of instructions in OpsToUpdate. - for (unsigned i = 0, e = OpsToUpdate.size(); i < e; i++) - OpsToUpdate[i].first->setDesc(get(OpsToUpdate[i].second)); + for (auto &Op : OpsToUpdate) + Op.first->setDesc(get(Op.second)); return true; } @@ -4694,8 +5247,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI, return nullptr; // Check whether we can fold the def into SrcOperandId. - MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandId, DefMI); - if (FoldMI) { + if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandId, DefMI)) { FoldAsLoadDefReg = 0; return FoldMI; } @@ -4725,6 +5277,82 @@ static bool Expand2AddrUndef(MachineInstrBuilder &MIB, return true; } +/// Expand a single-def pseudo instruction to a two-addr +/// instruction with two %k0 reads. +/// This is used for mapping: +/// %k4 = K_SET1 +/// to: +/// %k4 = KXNORrr %k0, %k0 +static bool Expand2AddrKreg(MachineInstrBuilder &MIB, + const MCInstrDesc &Desc, unsigned Reg) { + assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); + MIB->setDesc(Desc); + MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); + return true; +} + +static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, + bool MinusOne) { + MachineBasicBlock &MBB = *MIB->getParent(); + DebugLoc DL = MIB->getDebugLoc(); + unsigned Reg = MIB->getOperand(0).getReg(); + + // Insert the XOR. + BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + + // Turn the pseudo into an INC or DEC. + MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r)); + MIB.addReg(Reg); + + return true; +} + +bool X86InstrInfo::ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const { + MachineBasicBlock &MBB = *MIB->getParent(); + DebugLoc DL = MIB->getDebugLoc(); + int64_t Imm = MIB->getOperand(1).getImm(); + assert(Imm != 0 && "Using push/pop for 0 is not efficient."); + MachineBasicBlock::iterator I = MIB.getInstr(); + + int StackAdjustment; + + if (Subtarget.is64Bit()) { + assert(MIB->getOpcode() == X86::MOV64ImmSExti8 || + MIB->getOpcode() == X86::MOV32ImmSExti8); + // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and + // widen the register if necessary. + StackAdjustment = 8; + BuildMI(MBB, I, DL, get(X86::PUSH64i8)).addImm(Imm); + MIB->setDesc(get(X86::POP64r)); + MIB->getOperand(0) + .setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), 64)); + } else { + assert(MIB->getOpcode() == X86::MOV32ImmSExti8); + StackAdjustment = 4; + BuildMI(MBB, I, DL, get(X86::PUSH32i8)).addImm(Imm); + MIB->setDesc(get(X86::POP32r)); + } + + // Build CFI if necessary. + MachineFunction &MF = *MBB.getParent(); + const X86FrameLowering *TFL = Subtarget.getFrameLowering(); + bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + bool NeedsDwarfCFI = + !IsWin64Prologue && + (MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry()); + bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI; + if (EmitCFI) { + TFL->BuildCFI(MBB, I, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment)); + TFL->BuildCFI(MBB, std::next(I), DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment)); + } + + return true; +} + // LoadStackGuard has so far only been implemented for 64-bit MachO. Different // code sequence is needed for other targets. static void expandLoadStackGuard(MachineInstrBuilder &MIB, @@ -4735,8 +5363,8 @@ static void expandLoadStackGuard(MachineInstrBuilder &MIB, const GlobalValue *GV = cast((*MIB->memoperands_begin())->getValue()); unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; - MachineMemOperand *MMO = MBB.getParent()-> - getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 8, 8); + MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( + MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 8, 8); MachineBasicBlock::iterator I = MIB.getInstr(); BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1) @@ -4753,6 +5381,13 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { switch (MI->getOpcode()) { case X86::MOV32r0: return Expand2AddrUndef(MIB, get(X86::XOR32rr)); + case X86::MOV32r1: + return expandMOV32r1(MIB, *this, /*MinusOne=*/ false); + case X86::MOV32r_1: + return expandMOV32r1(MIB, *this, /*MinusOne=*/ true); + case X86::MOV32ImmSExti8: + case X86::MOV64ImmSExti8: + return ExpandMOVImmSExti8(MIB); case X86::SETB_C8r: return Expand2AddrUndef(MIB, get(X86::SBB8rr)); case X86::SETB_C16r: @@ -4777,10 +5412,22 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case X86::TEST8ri_NOREX: MI->setDesc(get(X86::TEST8ri)); return true; + + // KNL does not recognize dependency-breaking idioms for mask registers, + // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1. + // Using %k0 as the undef input register is a performance heuristic based + // on the assumption that %k0 is used less frequently than the other mask + // registers, since it is not usable as a write mask. + // FIXME: A more advanced approach would be to choose the best input mask + // register based on context. case X86::KSET0B: - case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr)); + case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0); + case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0); + case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0); case X86::KSET1B: - case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr)); + case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0); + case X86::KSET1D: return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0); + case X86::KSET1Q: return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0); case TargetOpcode::LOAD_STACK_GUARD: expandLoadStackGuard(MIB, *this); return true; @@ -4788,12 +5435,28 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { return false; } -static void addOperands(MachineInstrBuilder &MIB, ArrayRef MOs) { +static void addOperands(MachineInstrBuilder &MIB, ArrayRef MOs, + int PtrOffset = 0) { unsigned NumAddrOps = MOs.size(); - for (unsigned i = 0; i != NumAddrOps; ++i) - MIB.addOperand(MOs[i]); - if (NumAddrOps < 4) // FrameIndex only - addOffset(MIB, 0); + + if (NumAddrOps < 4) { + // FrameIndex only - add an immediate offset (whether its zero or not). + for (unsigned i = 0; i != NumAddrOps; ++i) + MIB.addOperand(MOs[i]); + addOffset(MIB, PtrOffset); + } else { + // General Memory Addressing - we need to add any offset to an existing + // offset. + assert(MOs.size() == 5 && "Unexpected memory operand list length"); + for (unsigned i = 0; i != NumAddrOps; ++i) { + const MachineOperand &MO = MOs[i]; + if (i == 3 && PtrOffset != 0) { + MIB.addDisp(MO, PtrOffset); + } else { + MIB.addOperand(MO); + } + } + } } static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, @@ -4828,7 +5491,8 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode, unsigned OpNo, ArrayRef MOs, MachineBasicBlock::iterator InsertPt, - MachineInstr *MI, const TargetInstrInfo &TII) { + MachineInstr *MI, const TargetInstrInfo &TII, + int PtrOffset = 0) { // Omit the implicit operands, something BuildMI can't do. MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), MI->getDebugLoc(), true); @@ -4838,7 +5502,7 @@ static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode, MachineOperand &MO = MI->getOperand(i); if (i == OpNo) { assert(MO.isReg() && "Expected to fold into reg operand!"); - addOperands(MIB, MOs); + addOperands(MIB, MOs, PtrOffset); } else { MIB.addOperand(MO); } @@ -4860,6 +5524,40 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, return MIB.addImm(0); } +MachineInstr *X86InstrInfo::foldMemoryOperandCustom( + MachineFunction &MF, MachineInstr *MI, unsigned OpNum, + ArrayRef MOs, MachineBasicBlock::iterator InsertPt, + unsigned Size, unsigned Align) const { + switch (MI->getOpcode()) { + case X86::INSERTPSrr: + case X86::VINSERTPSrr: + // Attempt to convert the load of inserted vector into a fold load + // of a single float. + if (OpNum == 2) { + unsigned Imm = MI->getOperand(MI->getNumOperands() - 1).getImm(); + unsigned ZMask = Imm & 15; + unsigned DstIdx = (Imm >> 4) & 3; + unsigned SrcIdx = (Imm >> 6) & 3; + + unsigned RCSize = getRegClass(MI->getDesc(), OpNum, &RI, MF)->getSize(); + if (Size <= RCSize && 4 <= Align) { + int PtrOffset = SrcIdx * 4; + unsigned NewImm = (DstIdx << 4) | ZMask; + unsigned NewOpCode = + (MI->getOpcode() == X86::VINSERTPSrr ? X86::VINSERTPSrm + : X86::INSERTPSrm); + MachineInstr *NewMI = + FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, PtrOffset); + NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm); + return NewMI; + } + } + break; + }; + + return nullptr; +} + MachineInstr *X86InstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr *MI, unsigned OpNum, ArrayRef MOs, MachineBasicBlock::iterator InsertPt, @@ -4869,10 +5567,13 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( bool isCallRegIndirect = Subtarget.callRegIndirect(); bool isTwoAddrFold = false; - // For CPUs that favor the register form of a call, - // do not fold loads into calls. - if (isCallRegIndirect && - (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) + // For CPUs that favor the register form of a call or push, + // do not fold loads into calls or pushes, unless optimizing for size + // aggressively. + if (isCallRegIndirect && !MF.getFunction()->optForMinSize() && + (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r || + MI->getOpcode() == X86::PUSH16r || MI->getOpcode() == X86::PUSH32r || + MI->getOpcode() == X86::PUSH64r)) return nullptr; unsigned NumOps = MI->getDesc().getNumOperands(); @@ -4886,6 +5587,12 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( return nullptr; MachineInstr *NewMI = nullptr; + + // Attempt to fold any custom cases we have. + if (MachineInstr *CustomMI = + foldMemoryOperandCustom(MF, MI, OpNum, MOs, InsertPt, Size, Align)) + return CustomMI; + // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires // replacing the *two* registers with the memory location. @@ -4963,60 +5670,56 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( // If the instruction and target operand are commutable, commute the // instruction and try again. if (AllowCommute) { - unsigned OriginalOpIdx = OpNum, CommuteOpIdx1, CommuteOpIdx2; + unsigned CommuteOpIdx1 = OpNum, CommuteOpIdx2 = CommuteAnyOperandIndex; if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) { bool HasDef = MI->getDesc().getNumDefs(); unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0; unsigned Reg1 = MI->getOperand(CommuteOpIdx1).getReg(); unsigned Reg2 = MI->getOperand(CommuteOpIdx2).getReg(); - bool Tied0 = - 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO); bool Tied1 = + 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO); + bool Tied2 = 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx2, MCOI::TIED_TO); // If either of the commutable operands are tied to the destination // then we can not commute + fold. - if ((HasDef && Reg0 == Reg1 && Tied0) || - (HasDef && Reg0 == Reg2 && Tied1)) + if ((HasDef && Reg0 == Reg1 && Tied1) || + (HasDef && Reg0 == Reg2 && Tied2)) return nullptr; - if ((CommuteOpIdx1 == OriginalOpIdx) || - (CommuteOpIdx2 == OriginalOpIdx)) { - MachineInstr *CommutedMI = commuteInstruction(MI, false); - if (!CommutedMI) { - // Unable to commute. - return nullptr; - } - if (CommutedMI != MI) { - // New instruction. We can't fold from this. - CommutedMI->eraseFromParent(); - return nullptr; - } - - // Attempt to fold with the commuted version of the instruction. - unsigned CommuteOp = - (CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1); - NewMI = - foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, InsertPt, Size, Align, - /*AllowCommute=*/false); - if (NewMI) - return NewMI; - - // Folding failed again - undo the commute before returning. - MachineInstr *UncommutedMI = commuteInstruction(MI, false); - if (!UncommutedMI) { - // Unable to commute. - return nullptr; - } - if (UncommutedMI != MI) { - // New instruction. It doesn't need to be kept. - UncommutedMI->eraseFromParent(); - return nullptr; - } - - // Return here to prevent duplicate fuse failure report. + MachineInstr *CommutedMI = + commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2); + if (!CommutedMI) { + // Unable to commute. return nullptr; } + if (CommutedMI != MI) { + // New instruction. We can't fold from this. + CommutedMI->eraseFromParent(); + return nullptr; + } + + // Attempt to fold with the commuted version of the instruction. + NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt, + Size, Align, /*AllowCommute=*/false); + if (NewMI) + return NewMI; + + // Folding failed again - undo the commute before returning. + MachineInstr *UncommutedMI = + commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2); + if (!UncommutedMI) { + // Unable to commute. + return nullptr; + } + if (UncommutedMI != MI) { + // New instruction. It doesn't need to be kept. + UncommutedMI->eraseFromParent(); + return nullptr; + } + + // Return here to prevent duplicate fuse failure report. + return nullptr; } } @@ -5208,13 +5911,14 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, // If MI kills this register, the false dependence is already broken. if (MI->killsRegister(Reg, TRI)) return; + if (X86::VR128RegClass.contains(Reg)) { // These instructions are all floating point domain, so xorps is the best // choice. - bool HasAVX = Subtarget.hasAVX(); - unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr; + unsigned Opc = Subtarget.hasAVX() ? X86::VXORPSrr : X86::XORPSrr; BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg) .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); + MI->addRegisterKilled(Reg, TRI, true); } else if (X86::VR256RegClass.contains(Reg)) { // Use vxorps to clear the full ymm register. // It wants to read and write the xmm sub-register. @@ -5222,21 +5926,20 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(X86::VXORPSrr), XReg) .addReg(XReg, RegState::Undef).addReg(XReg, RegState::Undef) .addReg(Reg, RegState::ImplicitDefine); - } else - return; - MI->addRegisterKilled(Reg, TRI, true); + MI->addRegisterKilled(Reg, TRI, true); + } } MachineInstr *X86InstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr *MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex) const { // Check switch flag - if (NoFusing) return nullptr; + if (NoFusing) + return nullptr; // Unless optimizing for size, don't fold to avoid partial // register update stalls - if (!MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) && - hasPartialRegUpdate(MI->getOpcode())) + if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI->getOpcode())) return nullptr; const MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -5303,6 +6006,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int: case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: + case X86::VFMADDSSr132r_Int: case X86::VFNMADDSSr132r_Int: + case X86::VFMADDSSr213r_Int: case X86::VFNMADDSSr213r_Int: + case X86::VFMADDSSr231r_Int: case X86::VFNMADDSSr231r_Int: + case X86::VFMSUBSSr132r_Int: case X86::VFNMSUBSSr132r_Int: + case X86::VFMSUBSSr213r_Int: case X86::VFNMSUBSSr213r_Int: + case X86::VFMSUBSSr231r_Int: case X86::VFNMSUBSSr231r_Int: return false; default: return true; @@ -5318,6 +6027,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int: case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: + case X86::VFMADDSDr132r_Int: case X86::VFNMADDSDr132r_Int: + case X86::VFMADDSDr213r_Int: case X86::VFNMADDSDr213r_Int: + case X86::VFMADDSDr231r_Int: case X86::VFNMADDSDr231r_Int: + case X86::VFMSUBSDr132r_Int: case X86::VFNMSUBSDr132r_Int: + case X86::VFMSUBSDr213r_Int: case X86::VFNMSUBSDr213r_Int: + case X86::VFMSUBSDr231r_Int: case X86::VFNMSUBSDr231r_Int: return false; default: return true; @@ -5342,10 +6057,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( // Check switch flag if (NoFusing) return nullptr; - // Unless optimizing for size, don't fold to avoid partial - // register update stalls - if (!MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) && - hasPartialRegUpdate(MI->getOpcode())) + // Avoid partial register update stalls unless optimizing for size. + if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI->getOpcode())) return nullptr; // Determine the alignment of the load. @@ -5460,62 +6173,6 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( /*Size=*/0, Alignment, /*AllowCommute=*/true); } -bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - ArrayRef Ops) const { - // Check switch flag - if (NoFusing) return 0; - - if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { - switch (MI->getOpcode()) { - default: return false; - case X86::TEST8rr: - case X86::TEST16rr: - case X86::TEST32rr: - case X86::TEST64rr: - return true; - case X86::ADD32ri: - // FIXME: AsmPrinter doesn't know how to handle - // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. - if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) - return false; - break; - } - } - - if (Ops.size() != 1) - return false; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - unsigned NumOps = MI->getDesc().getNumOperands(); - bool isTwoAddr = NumOps > 1 && - MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; - - // Folding a memory location into the two-address part of a two-address - // instruction is different than folding it other places. It requires - // replacing the *two* registers with the memory location. - const DenseMap > *OpcodeTablePtr = nullptr; - if (isTwoAddr && NumOps >= 2 && OpNum < 2) { - OpcodeTablePtr = &RegOp2MemOpTable2Addr; - } else if (OpNum == 0) { - if (Opc == X86::MOV32r0) - return true; - - OpcodeTablePtr = &RegOp2MemOpTable0; - } else if (OpNum == 1) { - OpcodeTablePtr = &RegOp2MemOpTable1; - } else if (OpNum == 2) { - OpcodeTablePtr = &RegOp2MemOpTable2; - } else if (OpNum == 3) { - OpcodeTablePtr = &RegOp2MemOpTable3; - } - - if (OpcodeTablePtr && OpcodeTablePtr->count(Opc)) - return true; - return TargetInstrInfo::canFoldMemoryOperand(MI, Ops); -} - bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl &NewMIs) const { @@ -5536,9 +6193,10 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, const MCInstrDesc &MCID = get(Opc); const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); + // TODO: Check if 32-byte or greater accesses are slow too? if (!MI->hasOneMemOperand() && RC == &X86::VR128RegClass && - !Subtarget.isUnalignedMemAccessFast()) + Subtarget.isUnalignedMem16Slow()) // Without memoperands, loadRegFromAddr and storeRegToStackSlot will // conservatively assume the address is unaligned. That's bad for // performance. @@ -5582,20 +6240,19 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, if (FoldedStore) MIB.addReg(Reg, RegState::Define); - for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) - MIB.addOperand(BeforeOps[i]); + for (MachineOperand &BeforeOp : BeforeOps) + MIB.addOperand(BeforeOp); if (FoldedLoad) MIB.addReg(Reg); - for (unsigned i = 0, e = AfterOps.size(); i != e; ++i) - MIB.addOperand(AfterOps[i]); - for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) { - MachineOperand &MO = ImpOps[i]; - MIB.addReg(MO.getReg(), - getDefRegState(MO.isDef()) | + for (MachineOperand &AfterOp : AfterOps) + MIB.addOperand(AfterOp); + for (MachineOperand &ImpOp : ImpOps) { + MIB.addReg(ImpOp.getReg(), + getDefRegState(ImpOp.isDef()) | RegState::Implicit | - getKillRegState(MO.isKill()) | - getDeadRegState(MO.isDead()) | - getUndefRegState(MO.isUndef())); + getKillRegState(ImpOp.isKill()) | + getDeadRegState(ImpOp.isDead()) | + getUndefRegState(ImpOp.isUndef())); } // Change CMP32ri r, 0 back to TEST32rr r, r, etc. switch (DataMI->getOpcode()) { @@ -5686,9 +6343,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, cast(N)->memoperands_end()); if (!(*MMOs.first) && RC == &X86::VR128RegClass && - !Subtarget.isUnalignedMemAccessFast()) + Subtarget.isUnalignedMem16Slow()) // Do not introduce a slow unaligned load. return false; + // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte + // memory access is slow above. unsigned Alignment = RC->getSize() == 32 ? 32 : 16; bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= Alignment; @@ -5729,9 +6388,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, cast(N)->memoperands_end()); if (!(*MMOs.first) && RC == &X86::VR128RegClass && - !Subtarget.isUnalignedMemAccessFast()) + Subtarget.isUnalignedMem16Slow()) // Do not introduce a slow unaligned store. return false; + // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte + // memory access is slow above. unsigned Alignment = RC->getSize() == 32 ? 32 : 16; bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= Alignment; @@ -6192,16 +6853,16 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = { // domains, but they require a bit more work than just switching opcodes. static const uint16_t *lookup(unsigned opcode, unsigned domain) { - for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i) - if (ReplaceableInstrs[i][domain-1] == opcode) - return ReplaceableInstrs[i]; + for (const uint16_t (&Row)[3] : ReplaceableInstrs) + if (Row[domain-1] == opcode) + return Row; return nullptr; } static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) { - for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i) - if (ReplaceableInstrsAVX2[i][domain-1] == opcode) - return ReplaceableInstrsAVX2[i]; + for (const uint16_t (&Row)[3] : ReplaceableInstrsAVX2) + if (Row[domain-1] == opcode) + return Row; return nullptr; } @@ -6347,230 +7008,181 @@ hasHighOperandLatency(const TargetSchedModel &SchedModel, return isHighLatencyDef(DefMI->getOpcode()); } -static bool hasVirtualRegDefsInBasicBlock(const MachineInstr &Inst, - const MachineBasicBlock *MBB) { - assert(Inst.getNumOperands() == 3 && "Reassociation needs binary operators"); - const MachineOperand &Op1 = Inst.getOperand(1); - const MachineOperand &Op2 = Inst.getOperand(2); - const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); +bool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const { + assert((Inst.getNumOperands() == 3 || Inst.getNumOperands() == 4) && + "Reassociation needs binary operators"); - // We need virtual register definitions. - MachineInstr *MI1 = nullptr; - MachineInstr *MI2 = nullptr; - if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg())) - MI1 = MRI.getUniqueVRegDef(Op1.getReg()); - if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg())) - MI2 = MRI.getUniqueVRegDef(Op2.getReg()); + // Integer binary math/logic instructions have a third source operand: + // the EFLAGS register. That operand must be both defined here and never + // used; ie, it must be dead. If the EFLAGS operand is live, then we can + // not change anything because rearranging the operands could affect other + // instructions that depend on the exact status flags (zero, sign, etc.) + // that are set by using these particular operands with this operation. + if (Inst.getNumOperands() == 4) { + assert(Inst.getOperand(3).isReg() && + Inst.getOperand(3).getReg() == X86::EFLAGS && + "Unexpected operand in reassociable instruction"); + if (!Inst.getOperand(3).isDead()) + return false; + } - // And they need to be in the trace (otherwise, they won't have a depth). - if (MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB) - return true; - - return false; -} - -static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) { - const MachineBasicBlock *MBB = Inst.getParent(); - const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg()); - MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); - unsigned AssocOpcode = Inst.getOpcode(); - - // If only one operand has the same opcode and it's the second source operand, - // the operands must be commuted. - Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode; - if (Commuted) - std::swap(MI1, MI2); - - // 1. The previous instruction must be the same type as Inst. - // 2. The previous instruction must have virtual register definitions for its - // operands in the same basic block as Inst. - // 3. The previous instruction's result must only be used by Inst. - if (MI1->getOpcode() == AssocOpcode && - hasVirtualRegDefsInBasicBlock(*MI1, MBB) && - MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg())) - return true; - - return false; + return TargetInstrInfo::hasReassociableOperands(Inst, MBB); } // TODO: There are many more machine instruction opcodes to match: // 1. Other data types (integer, vectors) -// 2. Other math / logic operations (and, or) -static bool isAssociativeAndCommutative(unsigned Opcode) { - switch (Opcode) { +// 2. Other math / logic operations (xor, or) +// 3. Other forms of the same operation (intrinsics and other variants) +bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { + switch (Inst.getOpcode()) { + case X86::AND8rr: + case X86::AND16rr: + case X86::AND32rr: + case X86::AND64rr: + case X86::OR8rr: + case X86::OR16rr: + case X86::OR32rr: + case X86::OR64rr: + case X86::XOR8rr: + case X86::XOR16rr: + case X86::XOR32rr: + case X86::XOR64rr: + case X86::IMUL16rr: + case X86::IMUL32rr: + case X86::IMUL64rr: + case X86::PANDrr: + case X86::PORrr: + case X86::PXORrr: + case X86::VPANDrr: + case X86::VPANDYrr: + case X86::VPORrr: + case X86::VPORYrr: + case X86::VPXORrr: + case X86::VPXORYrr: + // Normal min/max instructions are not commutative because of NaN and signed + // zero semantics, but these are. Thus, there's no need to check for global + // relaxed math; the instructions themselves have the properties we need. + case X86::MAXCPDrr: + case X86::MAXCPSrr: + case X86::MAXCSDrr: + case X86::MAXCSSrr: + case X86::MINCPDrr: + case X86::MINCPSrr: + case X86::MINCSDrr: + case X86::MINCSSrr: + case X86::VMAXCPDrr: + case X86::VMAXCPSrr: + case X86::VMAXCPDYrr: + case X86::VMAXCPSYrr: + case X86::VMAXCSDrr: + case X86::VMAXCSSrr: + case X86::VMINCPDrr: + case X86::VMINCPSrr: + case X86::VMINCPDYrr: + case X86::VMINCPSYrr: + case X86::VMINCSDrr: + case X86::VMINCSSrr: + return true; + case X86::ADDPDrr: + case X86::ADDPSrr: case X86::ADDSDrr: case X86::ADDSSrr: - case X86::VADDSDrr: - case X86::VADDSSrr: + case X86::MULPDrr: + case X86::MULPSrr: case X86::MULSDrr: case X86::MULSSrr: + case X86::VADDPDrr: + case X86::VADDPSrr: + case X86::VADDPDYrr: + case X86::VADDPSYrr: + case X86::VADDSDrr: + case X86::VADDSSrr: + case X86::VMULPDrr: + case X86::VMULPSrr: + case X86::VMULPDYrr: + case X86::VMULPSYrr: case X86::VMULSDrr: case X86::VMULSSrr: - return true; + return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; default: return false; } } -/// Return true if the input instruction is part of a chain of dependent ops -/// that are suitable for reassociation, otherwise return false. -/// If the instruction's operands must be commuted to have a previous -/// instruction of the same type define the first source operand, Commuted will -/// be set to true. -static bool isReassocCandidate(const MachineInstr &Inst, bool &Commuted) { - // 1. The operation must be associative and commutative. - // 2. The instruction must have virtual register definitions for its - // operands in the same basic block. - // 3. The instruction must have a reassociable sibling. - if (isAssociativeAndCommutative(Inst.getOpcode()) && - hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) && - hasReassocSibling(Inst, Commuted)) - return true; +/// This is an architecture-specific helper function of reassociateOps. +/// Set special operand attributes for new instructions after reassociation. +void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, + MachineInstr &OldMI2, + MachineInstr &NewMI1, + MachineInstr &NewMI2) const { + // Integer instructions define an implicit EFLAGS source register operand as + // the third source (fourth total) operand. + if (OldMI1.getNumOperands() != 4 || OldMI2.getNumOperands() != 4) + return; - return false; + assert(NewMI1.getNumOperands() == 4 && NewMI2.getNumOperands() == 4 && + "Unexpected instruction type for reassociation"); + + MachineOperand &OldOp1 = OldMI1.getOperand(3); + MachineOperand &OldOp2 = OldMI2.getOperand(3); + MachineOperand &NewOp1 = NewMI1.getOperand(3); + MachineOperand &NewOp2 = NewMI2.getOperand(3); + + assert(OldOp1.isReg() && OldOp1.getReg() == X86::EFLAGS && OldOp1.isDead() && + "Must have dead EFLAGS operand in reassociable instruction"); + assert(OldOp2.isReg() && OldOp2.getReg() == X86::EFLAGS && OldOp2.isDead() && + "Must have dead EFLAGS operand in reassociable instruction"); + + (void)OldOp1; + (void)OldOp2; + + assert(NewOp1.isReg() && NewOp1.getReg() == X86::EFLAGS && + "Unexpected operand in reassociable instruction"); + assert(NewOp2.isReg() && NewOp2.getReg() == X86::EFLAGS && + "Unexpected operand in reassociable instruction"); + + // Mark the new EFLAGS operands as dead to be helpful to subsequent iterations + // of this pass or other passes. The EFLAGS operands must be dead in these new + // instructions because the EFLAGS operands in the original instructions must + // be dead in order for reassociation to occur. + NewOp1.setIsDead(); + NewOp2.setIsDead(); } -// FIXME: This has the potential to be expensive (compile time) while not -// improving the code at all. Some ways to limit the overhead: -// 1. Track successful transforms; bail out if hit rate gets too low. -// 2. Only enable at -O3 or some other non-default optimization level. -// 3. Pre-screen pattern candidates here: if an operand of the previous -// instruction is known to not increase the critical path, then don't match -// that pattern. -bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns) const { - if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath) - return false; - - // TODO: There is nothing x86-specific here except the instruction type. - // This logic could be hoisted into the machine combiner pass itself. - - // Look for this reassociation pattern: - // B = A op X (Prev) - // C = B op Y (Root) - - bool Commute; - if (isReassocCandidate(Root, Commute)) { - // We found a sequence of instructions that may be suitable for a - // reassociation of operands to increase ILP. Specify each commutation - // possibility for the Prev instruction in the sequence and let the - // machine combiner decide if changing the operands is worthwhile. - if (Commute) { - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB); - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB); - } else { - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY); - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY); - } - return true; - } - - return false; +std::pair +X86InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { + return std::make_pair(TF, 0u); } -/// Attempt the following reassociation to reduce critical path length: -/// B = A op X (Prev) -/// C = B op Y (Root) -/// ===> -/// B = X op Y -/// C = A op B -static void reassociateOps(MachineInstr &Root, MachineInstr &Prev, - MachineCombinerPattern::MC_PATTERN Pattern, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg) { - MachineFunction *MF = Root.getParent()->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI); - - // This array encodes the operand index for each parameter because the - // operands may be commuted. Each row corresponds to a pattern value, - // and each column specifies the index of A, B, X, Y. - unsigned OpIdx[4][4] = { - { 1, 1, 2, 2 }, - { 1, 2, 2, 1 }, - { 2, 1, 1, 2 }, - { 2, 2, 1, 1 } - }; - - MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]); - MachineOperand &OpB = Root.getOperand(OpIdx[Pattern][1]); - MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]); - MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]); - MachineOperand &OpC = Root.getOperand(0); - - unsigned RegA = OpA.getReg(); - unsigned RegB = OpB.getReg(); - unsigned RegX = OpX.getReg(); - unsigned RegY = OpY.getReg(); - unsigned RegC = OpC.getReg(); - - if (TargetRegisterInfo::isVirtualRegister(RegA)) - MRI.constrainRegClass(RegA, RC); - if (TargetRegisterInfo::isVirtualRegister(RegB)) - MRI.constrainRegClass(RegB, RC); - if (TargetRegisterInfo::isVirtualRegister(RegX)) - MRI.constrainRegClass(RegX, RC); - if (TargetRegisterInfo::isVirtualRegister(RegY)) - MRI.constrainRegClass(RegY, RC); - if (TargetRegisterInfo::isVirtualRegister(RegC)) - MRI.constrainRegClass(RegC, RC); - - // Create a new virtual register for the result of (X op Y) instead of - // recycling RegB because the MachineCombiner's computation of the critical - // path requires a new register definition rather than an existing one. - unsigned NewVR = MRI.createVirtualRegister(RC); - InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); - - unsigned Opcode = Root.getOpcode(); - bool KillA = OpA.isKill(); - bool KillX = OpX.isKill(); - bool KillY = OpY.isKill(); - - // Create new instructions for insertion. - MachineInstrBuilder MIB1 = - BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) - .addReg(RegX, getKillRegState(KillX)) - .addReg(RegY, getKillRegState(KillY)); - InsInstrs.push_back(MIB1); - - MachineInstrBuilder MIB2 = - BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) - .addReg(RegA, getKillRegState(KillA)) - .addReg(NewVR, getKillRegState(true)); - InsInstrs.push_back(MIB2); - - // Record old instructions for deletion. - DelInstrs.push_back(&Prev); - DelInstrs.push_back(&Root); -} - -void X86InstrInfo::genAlternativeCodeSequence( - MachineInstr &Root, - MachineCombinerPattern::MC_PATTERN Pattern, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstIdxForVirtReg) const { - MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo(); - - // Select the previous instruction in the sequence based on the input pattern. - MachineInstr *Prev = nullptr; - switch (Pattern) { - case MachineCombinerPattern::MC_REASSOC_AX_BY: - case MachineCombinerPattern::MC_REASSOC_XA_BY: - Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); - break; - case MachineCombinerPattern::MC_REASSOC_AX_YB: - case MachineCombinerPattern::MC_REASSOC_XA_YB: - Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); - } - assert(Prev && "Unknown pattern for machine combiner"); - - reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); - return; +ArrayRef> +X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { + using namespace X86II; + static const std::pair TargetFlags[] = { + {MO_GOT_ABSOLUTE_ADDRESS, "x86-got-absolute-address"}, + {MO_PIC_BASE_OFFSET, "x86-pic-base-offset"}, + {MO_GOT, "x86-got"}, + {MO_GOTOFF, "x86-gotoff"}, + {MO_GOTPCREL, "x86-gotpcrel"}, + {MO_PLT, "x86-plt"}, + {MO_TLSGD, "x86-tlsgd"}, + {MO_TLSLD, "x86-tlsld"}, + {MO_TLSLDM, "x86-tlsldm"}, + {MO_GOTTPOFF, "x86-gottpoff"}, + {MO_INDNTPOFF, "x86-indntpoff"}, + {MO_TPOFF, "x86-tpoff"}, + {MO_DTPOFF, "x86-dtpoff"}, + {MO_NTPOFF, "x86-ntpoff"}, + {MO_GOTNTPOFF, "x86-gotntpoff"}, + {MO_DLLIMPORT, "x86-dllimport"}, + {MO_DARWIN_STUB, "x86-darwin-stub"}, + {MO_DARWIN_NONLAZY, "x86-darwin-nonlazy"}, + {MO_DARWIN_NONLAZY_PIC_BASE, "x86-darwin-nonlazy-pic-base"}, + {MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE, "x86-darwin-hidden-nonlazy-pic-base"}, + {MO_TLVP, "x86-tlvp"}, + {MO_TLVP_PIC_BASE, "x86-tlvp-pic-base"}, + {MO_SECREL, "x86-secrel"}}; + return makeArrayRef(TargetFlags); } namespace { diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index bf63336c7005..9d40334206b2 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -23,22 +23,10 @@ #include "X86GenInstrInfo.inc" namespace llvm { + class MachineInstrBuilder; class X86RegisterInfo; class X86Subtarget; - namespace MachineCombinerPattern { - enum MC_PATTERN : int { - // These are commutative variants for reassociating a computation chain - // of the form: - // B = A op X (Prev) - // C = B op Y (Root) - MC_REASSOC_AX_BY = 0, - MC_REASSOC_AX_YB = 1, - MC_REASSOC_XA_BY = 2, - MC_REASSOC_XA_YB = 3, - }; - } // end namespace MachineCombinerPattern - namespace X86 { // X86 specific condition code. These correspond to X86_*_COND in // X86InstrInfo.td. They must be kept in synch. @@ -259,14 +247,64 @@ public: MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const override; - /// commuteInstruction - We have a few instructions that must be hacked on to - /// commute them. + /// Returns true iff the routine could find two commutable operands in the + /// given machine instruction. + /// The 'SrcOpIdx1' and 'SrcOpIdx2' are INPUT and OUTPUT arguments. Their + /// input values can be re-defined in this method only if the input values + /// are not pre-defined, which is designated by the special value + /// 'CommuteAnyOperandIndex' assigned to it. + /// If both of indices are pre-defined and refer to some operands, then the + /// method simply returns true if the corresponding operands are commutable + /// and returns false otherwise. /// - MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const override; - + /// For example, calling this method this way: + /// unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex; + /// findCommutedOpIndices(MI, Op1, Op2); + /// can be interpreted as a query asking to find an operand that would be + /// commutable with the operand#1. bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; + /// Returns true if the routine could find two commutable operands + /// in the given FMA instruction. Otherwise, returns false. + /// + /// \p SrcOpIdx1 and \p SrcOpIdx2 are INPUT and OUTPUT arguments. + /// The output indices of the commuted operands are returned in these + /// arguments. Also, the input values of these arguments may be preset either + /// to indices of operands that must be commuted or be equal to a special + /// value 'CommuteAnyOperandIndex' which means that the corresponding + /// operand index is not set and this method is free to pick any of + /// available commutable operands. + /// + /// For example, calling this method this way: + /// unsigned Idx1 = 1, Idx2 = CommuteAnyOperandIndex; + /// findFMA3CommutedOpIndices(MI, Idx1, Idx2); + /// can be interpreted as a query asking if the operand #1 can be swapped + /// with any other available operand (e.g. operand #2, operand #3, etc.). + /// + /// The returned FMA opcode may differ from the opcode in the given MI. + /// For example, commuting the operands #1 and #3 in the following FMA + /// FMA213 #1, #2, #3 + /// results into instruction with adjusted opcode: + /// FMA231 #3, #2, #1 + bool findFMA3CommutedOpIndices(MachineInstr *MI, + unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const; + + /// Returns an adjusted FMA opcode that must be used in FMA instruction that + /// performs the same computations as the given MI but which has the operands + /// \p SrcOpIdx1 and \p SrcOpIdx2 commuted. + /// It may return 0 if it is unsafe to commute the operands. + /// + /// The returned FMA opcode may differ from the opcode in the given \p MI. + /// For example, commuting the operands #1 and #3 in the following FMA + /// FMA213 #1, #2, #3 + /// results into instruction with adjusted opcode: + /// FMA231 #3, #2, #1 + unsigned getFMA3OpcodeToCommuteOperands(MachineInstr *MI, + unsigned SrcOpIdx1, + unsigned SrcOpIdx2) const; + // Branch analysis. bool isUnpredicatedTerminator(const MachineInstr* MI) const override; bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, @@ -342,11 +380,6 @@ public: MachineBasicBlock::iterator InsertPt, MachineInstr *LoadMI) const override; - /// canFoldMemoryOperand - Returns true if the specified load / store is - /// folding is possible. - bool canFoldMemoryOperand(const MachineInstr *, - ArrayRef) const override; - /// unfoldMemoryOperand - Separate a single instruction which folded a load or /// a store or a load and a store into two or more instruction. If this is /// possible, returns true as well as the new instructions by reference. @@ -406,10 +439,9 @@ public: bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - static bool isX86_64ExtendedReg(const MachineOperand &MO) { - if (!MO.isReg()) return false; - return X86II::isX86_64ExtendedReg(MO.getReg()); - } + /// True if MI has a condition code def, e.g. EFLAGS, that is + /// not marked dead. + bool hasLiveCondCodeDef(MachineInstr *MI) const; /// getGlobalBaseReg - Return a virtual register initialized with the /// the global base register value. Output instructions required to @@ -452,26 +484,19 @@ public: const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx) const override; - bool useMachineCombiner() const override { return true; } - - /// Return true when there is potentially a faster code sequence - /// for an instruction chain ending in . All potential patterns are - /// output in the array. - bool getMachineCombinerPatterns( - MachineInstr &Root, - SmallVectorImpl &P) const override; - - /// When getMachineCombinerPatterns() finds a pattern, this function generates - /// the instructions that could replace the original code sequence. - void genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg) const override; + + bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; + + bool hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const override; + + void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, + MachineInstr &NewMI1, + MachineInstr &NewMI2) const override; /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2 if having two register operands, and the value it @@ -500,16 +525,49 @@ public: unsigned &FoldAsLoadDefReg, MachineInstr *&DefMI) const override; + std::pair + decomposeMachineOperandsTargetFlags(unsigned TF) const override; + + ArrayRef> + getSerializableDirectMachineOperandTargetFlags() const override; + +protected: + /// Commutes the operands in the given instruction by changing the operands + /// order and/or changing the instruction's opcode and/or the immediate value + /// operand. + /// + /// The arguments 'CommuteOpIdx1' and 'CommuteOpIdx2' specify the operands + /// to be commuted. + /// + /// Do not call this method for a non-commutable instruction or + /// non-commutable operands. + /// Even though the instruction is commutable, the method may still + /// fail to commute the operands, null pointer is returned in such cases. + MachineInstr *commuteInstructionImpl(MachineInstr *MI, bool NewMI, + unsigned CommuteOpIdx1, + unsigned CommuteOpIdx2) const override; + private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const; + /// Handles memory folding for special case instructions, for instance those + /// requiring custom manipulation of the address. + MachineInstr *foldMemoryOperandCustom(MachineFunction &MF, MachineInstr *MI, + unsigned OpNum, + ArrayRef MOs, + MachineBasicBlock::iterator InsertPt, + unsigned Size, unsigned Align) const; + /// isFrameOperand - Return true and the FrameIndex if the specified /// operand and follow operands form a reference to the stack frame. bool isFrameOperand(const MachineInstr *MI, unsigned int Op, int &FrameIndex) const; + + /// Expand the MOVImmSExti8 pseudo-instructions. + bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 52bab9c79b45..f4ca2b880bad 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -106,8 +106,6 @@ def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; -def SDT_X86WIN_FTOL : SDTypeProfile<0, 1, [SDTCisFP<0>]>; - def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; @@ -158,6 +156,8 @@ def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair, def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret, + [SDNPHasChain, SDNPOptInGlue]>; def X86vastart_save_xmm_regs : SDNode<"X86ISD::VASTART_SAVE_XMM_REGS", @@ -250,9 +250,6 @@ def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA, def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def X86WinFTOL : SDNode<"X86ISD::WIN_FTOL", SDT_X86WIN_FTOL, - [SDNPHasChain, SDNPOutGlue]>; - //===----------------------------------------------------------------------===// // X86 Operand Definitions. // @@ -344,18 +341,21 @@ def vy64xmem : X86VMemOperand; def vz32mem : X86VMemOperand; def vz64mem : X86VMemOperand; -// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of -// plain GR64, so that it doesn't potentially require a REX prefix. -def i8mem_NOREX : Operand { +// A version of i8mem for use on x86-64 and x32 that uses a NOREX GPR instead +// of a plain GPR, so that it doesn't potentially require a REX prefix. +def ptr_rc_norex : PointerLikeRegClass<2>; +def ptr_rc_norex_nosp : PointerLikeRegClass<3>; + +def i8mem_NOREX : Operand { let PrintMethod = "printi8mem"; - let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm); + let MIOperandInfo = (ops ptr_rc_norex, i8imm, ptr_rc_norex_nosp, i32imm, i8imm); let ParserMatchClass = X86Mem8AsmOperand; let OperandType = "OPERAND_MEMORY"; } // GPRs available for tailcall. // It represents GR32_TC, GR64_TC or GR64_TCW64. -def ptr_rc_tailcall : PointerLikeRegClass<2>; +def ptr_rc_tailcall : PointerLikeRegClass<4>; // Special i32mem for addresses of load folding tail calls. These are not // allowed to use callee-saved registers since they must be scheduled @@ -697,34 +697,34 @@ def lea64mem : Operand { // X86 Complex Pattern Definitions. // -// Define X86 specific addressing mode. -def addr : ComplexPattern; -def lea32addr : ComplexPattern; +def lea32addr : ComplexPattern; // In 64-bit mode 32-bit LEAs can use RIP-relative addressing. -def lea64_32addr : ComplexPattern; -def tls32addr : ComplexPattern; -def tls32baseaddr : ComplexPattern; -def lea64addr : ComplexPattern; -def tls64addr : ComplexPattern; -def tls64baseaddr : ComplexPattern; -def vectoraddr : ComplexPattern; +def vectoraddr : ComplexPattern; //===----------------------------------------------------------------------===// // X86 Instruction Predicate Definitions. @@ -767,12 +767,21 @@ def HasDQI : Predicate<"Subtarget->hasDQI()">, def NoDQI : Predicate<"!Subtarget->hasDQI()">; def HasBWI : Predicate<"Subtarget->hasBWI()">, AssemblerPredicate<"FeatureBWI", "AVX-512 BW ISA">; +def NoBWI : Predicate<"!Subtarget->hasBWI()">; def HasVLX : Predicate<"Subtarget->hasVLX()">, AssemblerPredicate<"FeatureVLX", "AVX-512 VL ISA">; def NoVLX : Predicate<"!Subtarget->hasVLX()">; +def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">; +def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">; +def PKU : Predicate<"!Subtarget->hasPKU()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; +def HasFXSR : Predicate<"Subtarget->hasFXSR()">; +def HasXSAVE : Predicate<"Subtarget->hasXSAVE()">; +def HasXSAVEOPT : Predicate<"Subtarget->hasXSAVEOPT()">; +def HasXSAVEC : Predicate<"Subtarget->hasXSAVEC()">; +def HasXSAVES : Predicate<"Subtarget->hasXSAVES()">; def HasPCLMUL : Predicate<"Subtarget->hasPCLMUL()">; def HasFMA : Predicate<"Subtarget->hasFMA()">; def UseFMAOnAVX : Predicate<"Subtarget->hasFMA() && !Subtarget->hasAVX512()">; @@ -794,6 +803,7 @@ def HasSHA : Predicate<"Subtarget->hasSHA()">; def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">; def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">; def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">; +def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasMPX : Predicate<"Subtarget->hasMPX()">; @@ -812,6 +822,8 @@ def In32BitMode : Predicate<"Subtarget->is32Bit()">, AssemblerPredicate<"Mode32Bit", "32-bit mode">; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; +def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||" + "Subtarget->getFrameLowering()->hasFP(*MF)">; def IsPS4 : Predicate<"Subtarget->isTargetPS4()">; def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; @@ -825,6 +837,7 @@ def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||" def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">; def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">; def OptForSize : Predicate<"OptForSize">; +def OptForMinSize : Predicate<"OptForMinSize">; def OptForSpeed : Predicate<"!OptForSize">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; @@ -867,20 +880,54 @@ def X86_COND_E_OR_NE : ImmLeaf; -def i16immSExt8 : ImmLeaf; -def i32immSExt8 : ImmLeaf; -def i64immSExt8 : ImmLeaf; +def i16immSExt8 : ImmLeaf(Imm); }]>; +def i32immSExt8 : ImmLeaf(Imm); }]>; +def i64immSExt8 : ImmLeaf(Imm); }]>; + +// If we have multiple users of an immediate, it's much smaller to reuse +// the register, rather than encode the immediate in every instruction. +// This has the risk of increasing register pressure from stretched live +// ranges, however, the immediates should be trivial to rematerialize by +// the RA in the event of high register pressure. +// TODO : This is currently enabled for stores and binary ops. There are more +// cases for which this can be enabled, though this catches the bulk of the +// issues. +// TODO2 : This should really also be enabled under O2, but there's currently +// an issue with RA where we don't pull the constants into their users +// when we rematerialize them. I'll follow-up on enabling O2 after we fix that +// issue. +// TODO3 : This is currently limited to single basic blocks (DAG creation +// pulls block immediates to the top and merges them if necessary). +// Eventually, it would be nice to allow ConstantHoisting to merge constants +// globally for potentially added savings. +// +def imm8_su : PatLeaf<(i8 imm), [{ + return !shouldAvoidImmediateInstFormsForSize(N); +}]>; +def imm16_su : PatLeaf<(i16 imm), [{ + return !shouldAvoidImmediateInstFormsForSize(N); +}]>; +def imm32_su : PatLeaf<(i32 imm), [{ + return !shouldAvoidImmediateInstFormsForSize(N); +}]>; + +def i16immSExt8_su : PatLeaf<(i16immSExt8), [{ + return !shouldAvoidImmediateInstFormsForSize(N); +}]>; +def i32immSExt8_su : PatLeaf<(i32immSExt8), [{ + return !shouldAvoidImmediateInstFormsForSize(N); +}]>; -def i64immSExt32 : ImmLeaf; +def i64immSExt32 : ImmLeaf(Imm); }]>; // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit // unsigned field. -def i64immZExt32 : ImmLeaf; +def i64immZExt32 : ImmLeaf(Imm); }]>; def i64immZExt32SExt8 : ImmLeaf(Imm) && isInt<8>(static_cast(Imm)); }]>; // Helper fragments for loads. @@ -914,11 +961,12 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ return false; }]>; -def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>; -def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>; -def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>; -def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>; -def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>; +def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>; +def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>; +def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>; +def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>; +def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>; +def loadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr))>; def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>; def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>; @@ -1020,12 +1068,8 @@ def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[], IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>; def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[], IIC_PUSH_REG>, OpSize16; -def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[], - IIC_PUSH_MEM>, OpSize16; def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[], IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>; -def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[], - IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>; def PUSH16i8 : Ii8<0x6a, RawFrm, (outs), (ins i16i8imm:$imm), "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16; @@ -1039,6 +1083,14 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), "push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32, Requires<[Not64BitMode]>; } // mayStore, SchedRW + +let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in { +def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[], + IIC_PUSH_MEM>, OpSize16; +def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[], + IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>; +} // mayLoad, mayStore, SchedRW + } let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, hasSideEffects=0, @@ -1071,9 +1123,11 @@ def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>; def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>; +} // mayStore, SchedRW +let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in { def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [], IIC_PUSH_MEM>, OpSize32, Requires<[In64BitMode]>; -} // mayStore, SchedRW +} // mayLoad, mayStore, SchedRW } let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1, @@ -1275,13 +1329,13 @@ def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src), let SchedRW = [WriteStore] in { def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", - [(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>; + [(store (i8 imm8_su:$src), addr:$dst)], IIC_MOV_MEM>; def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src), "mov{w}\t{$src, $dst|$dst, $src}", - [(store (i16 imm:$src), addr:$dst)], IIC_MOV_MEM>, OpSize16; + [(store (i16 imm16_su:$src), addr:$dst)], IIC_MOV_MEM>, OpSize16; def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", - [(store (i32 imm:$src), addr:$dst)], IIC_MOV_MEM>, OpSize32; + [(store (i32 imm32_su:$src), addr:$dst)], IIC_MOV_MEM>, OpSize32; def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>; @@ -1457,10 +1511,12 @@ def MOV8rm_NOREX : I<0x8A, MRMSrcMem, let SchedRW = [WriteALU] in { let Defs = [EFLAGS], Uses = [AH] in def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", - [(set EFLAGS, (X86sahf AH))], IIC_AHF>; + [(set EFLAGS, (X86sahf AH))], IIC_AHF>, + Requires<[HasLAHFSAHF]>; let Defs = [AH], Uses = [EFLAGS], hasSideEffects = 0 in def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [], - IIC_AHF>; // AH = flags + IIC_AHF>, // AH = flags + Requires<[HasLAHFSAHF]>; } // SchedRW //===----------------------------------------------------------------------===// @@ -1894,37 +1950,38 @@ def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB; } // Table lookup instructions +let Uses = [AL,EBX], Defs = [AL], hasSideEffects = 0, mayLoad = 1 in def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>, Sched<[WriteLoad]>; let SchedRW = [WriteMicrocoded] in { // ASCII Adjust After Addition -// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS +let Uses = [AL,EFLAGS], Defs = [AX,EFLAGS], hasSideEffects = 0 in def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>, Requires<[Not64BitMode]>; // ASCII Adjust AX Before Division -// sets AL, AH and EFLAGS and uses AL and AH +let Uses = [AX], Defs = [AX,EFLAGS], hasSideEffects = 0 in def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src), "aad\t$src", [], IIC_AAD>, Requires<[Not64BitMode]>; // ASCII Adjust AX After Multiply -// sets AL, AH and EFLAGS and uses AL +let Uses = [AL], Defs = [AX,EFLAGS], hasSideEffects = 0 in def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src), "aam\t$src", [], IIC_AAM>, Requires<[Not64BitMode]>; // ASCII Adjust AL After Subtraction - sets -// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS +let Uses = [AL,EFLAGS], Defs = [AX,EFLAGS], hasSideEffects = 0 in def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", [], IIC_AAS>, Requires<[Not64BitMode]>; // Decimal Adjust AL after Addition -// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS +let Uses = [AL,EFLAGS], Defs = [AL,EFLAGS], hasSideEffects = 0 in def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>, Requires<[Not64BitMode]>; // Decimal Adjust AL after Subtraction -// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS +let Uses = [AL,EFLAGS], Defs = [AL,EFLAGS], hasSideEffects = 0 in def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>, Requires<[Not64BitMode]>; } // SchedRW @@ -2356,6 +2413,32 @@ defm T1MSKC : tbm_binary_intr<0x01, "t1mskc", MRM7r, MRM7m>; defm TZMSK : tbm_binary_intr<0x01, "tzmsk", MRM4r, MRM4m>; } // HasTBM, EFLAGS +//===----------------------------------------------------------------------===// +// MONITORX/MWAITX Instructions +// +let SchedRW = [WriteSystem] in { +let Uses = [EAX, ECX, EDX] in +def MONITORXrrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", [], + IIC_SSE_MONITOR>, TB; +let Uses = [ECX, EAX, EBX] in +def MWAITXrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx", [], IIC_SSE_MWAIT>, + TB; +} // SchedRW + +def : InstAlias<"mwaitx\t{%eax, %ecx, %ebx|ebx, ecx, eax}", (MWAITXrr)>, Requires<[Not64BitMode]>; +def : InstAlias<"mwaitx\t{%rax, %rcx, %rbx|rbx, rcx, rax}", (MWAITXrr)>, Requires<[In64BitMode]>; + +def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORXrrr)>, + Requires<[Not64BitMode]>; +def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>, + Requires<[In64BitMode]>; + +//===----------------------------------------------------------------------===// +// CLZERO Instruction +// +let Uses = [EAX] in +def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, TB; + //===----------------------------------------------------------------------===// // Pattern fragments to auto generate TBM instructions. //===----------------------------------------------------------------------===// @@ -2498,8 +2581,8 @@ def : MnemonicAlias<"lret", "lretl", "att">, Requires<[Not16BitMode]>; def : MnemonicAlias<"leavel", "leave", "att">, Requires<[Not64BitMode]>; def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>; -def : MnemonicAlias<"loopz", "loope", "att">; -def : MnemonicAlias<"loopnz", "loopne", "att">; +def : MnemonicAlias<"loopz", "loope">; +def : MnemonicAlias<"loopnz", "loopne">; def : MnemonicAlias<"pop", "popw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"pop", "popl", "att">, Requires<[In32BitMode]>; @@ -2532,14 +2615,15 @@ def : MnemonicAlias<"pusha", "pushaw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"popa", "popal", "att">, Requires<[In32BitMode]>; def : MnemonicAlias<"pusha", "pushal", "att">, Requires<[In32BitMode]>; -def : MnemonicAlias<"repe", "rep", "att">; -def : MnemonicAlias<"repz", "rep", "att">; -def : MnemonicAlias<"repnz", "repne", "att">; +def : MnemonicAlias<"repe", "rep">; +def : MnemonicAlias<"repz", "rep">; +def : MnemonicAlias<"repnz", "repne">; def : MnemonicAlias<"ret", "retw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"ret", "retl", "att">, Requires<[In32BitMode]>; def : MnemonicAlias<"ret", "retq", "att">, Requires<[In64BitMode]>; +def : MnemonicAlias<"sal", "shl", "intel">; def : MnemonicAlias<"salb", "shlb", "att">; def : MnemonicAlias<"salw", "shlw", "att">; def : MnemonicAlias<"sall", "shll", "att">; @@ -2579,14 +2663,14 @@ def : MnemonicAlias<"fcmova", "fcmovnbe", "att">; def : MnemonicAlias<"fcmovnae", "fcmovb", "att">; def : MnemonicAlias<"fcmovna", "fcmovbe", "att">; def : MnemonicAlias<"fcmovae", "fcmovnb", "att">; -def : MnemonicAlias<"fcomip", "fcompi", "att">; +def : MnemonicAlias<"fcomip", "fcompi">; def : MnemonicAlias<"fildq", "fildll", "att">; def : MnemonicAlias<"fistpq", "fistpll", "att">; def : MnemonicAlias<"fisttpq", "fisttpll", "att">; def : MnemonicAlias<"fldcww", "fldcw", "att">; def : MnemonicAlias<"fnstcww", "fnstcw", "att">; def : MnemonicAlias<"fnstsww", "fnstsw", "att">; -def : MnemonicAlias<"fucomip", "fucompi", "att">; +def : MnemonicAlias<"fucomip", "fucompi">; def : MnemonicAlias<"fwait", "wait">; def : MnemonicAlias<"fxsaveq", "fxsave64", "att">; @@ -2594,7 +2678,9 @@ def : MnemonicAlias<"fxrstorq", "fxrstor64", "att">; def : MnemonicAlias<"xsaveq", "xsave64", "att">; def : MnemonicAlias<"xrstorq", "xrstor64", "att">; def : MnemonicAlias<"xsaveoptq", "xsaveopt64", "att">; - +def : MnemonicAlias<"xrstorsq", "xrstors64", "att">; +def : MnemonicAlias<"xsavecq", "xsavec64", "att">; +def : MnemonicAlias<"xsavesq", "xsaves64", "att">; class CondCodeAlias @@ -2640,8 +2726,8 @@ defm : IntegerCondCodeMnemonicAlias<"cmov", "", "intel">; //===----------------------------------------------------------------------===// // aad/aam default to base 10 if no operand is specified. -def : InstAlias<"aad", (AAD8i8 10)>; -def : InstAlias<"aam", (AAM8i8 10)>; +def : InstAlias<"aad", (AAD8i8 10)>, Requires<[Not64BitMode]>; +def : InstAlias<"aam", (AAM8i8 10)>, Requires<[Not64BitMode]>; // Disambiguate the mem/imm form of bt-without-a-suffix as btl. // Likewise for btc/btr/bts. @@ -2719,8 +2805,10 @@ def : InstAlias<"idiv{q}\t{$src, %rax|rax, $src}", (IDIV64m i64mem:$src)>; // Various unary fpstack operations default to operating on on ST1. // For example, "fxch" -> "fxch %st(1)" def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>; +def: InstAlias<"fadd", (ADD_FPrST0 ST1), 0>; def : InstAlias<"fsub{|r}p", (SUBR_FPrST0 ST1), 0>; def : InstAlias<"fsub{r|}p", (SUB_FPrST0 ST1), 0>; +def : InstAlias<"fmul", (MUL_FPrST0 ST1), 0>; def : InstAlias<"fmulp", (MUL_FPrST0 ST1), 0>; def : InstAlias<"fdiv{|r}p", (DIVR_FPrST0 ST1), 0>; def : InstAlias<"fdiv{r|}p", (DIV_FPrST0 ST1), 0>; @@ -2798,20 +2886,20 @@ def : InstAlias<"jmp {*}$dst", (JMP16m i16mem:$dst), 0>, Requires<[In16Bit // "imul , B" is an alias for "imul , B, B". -def : InstAlias<"imulw {$imm, $r|$r, $imm}", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm), 0>; -def : InstAlias<"imulw {$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>; -def : InstAlias<"imull {$imm, $r|$r, $imm}", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm), 0>; -def : InstAlias<"imull {$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>; -def : InstAlias<"imulq {$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>; -def : InstAlias<"imulq {$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>; +def : InstAlias<"imul{w} {$imm, $r|$r, $imm}", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm), 0>; +def : InstAlias<"imul{w} {$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>; +def : InstAlias<"imul{l} {$imm, $r|$r, $imm}", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm), 0>; +def : InstAlias<"imul{l} {$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>; +def : InstAlias<"imul{q} {$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>; +def : InstAlias<"imul{q} {$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>; // inb %dx -> inb %al, %dx def : InstAlias<"inb\t{%dx|dx}", (IN8rr), 0>; def : InstAlias<"inw\t{%dx|dx}", (IN16rr), 0>; def : InstAlias<"inl\t{%dx|dx}", (IN32rr), 0>; -def : InstAlias<"inb\t$port", (IN8ri i8imm:$port), 0>; -def : InstAlias<"inw\t$port", (IN16ri i8imm:$port), 0>; -def : InstAlias<"inl\t$port", (IN32ri i8imm:$port), 0>; +def : InstAlias<"inb\t$port", (IN8ri u8imm:$port), 0>; +def : InstAlias<"inw\t$port", (IN16ri u8imm:$port), 0>; +def : InstAlias<"inl\t$port", (IN32ri u8imm:$port), 0>; // jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp @@ -2861,9 +2949,9 @@ def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX64rr16_Q GR64:$dst, GR16: def : InstAlias<"outb\t{%dx|dx}", (OUT8rr), 0>; def : InstAlias<"outw\t{%dx|dx}", (OUT16rr), 0>; def : InstAlias<"outl\t{%dx|dx}", (OUT32rr), 0>; -def : InstAlias<"outb\t$port", (OUT8ir i8imm:$port), 0>; -def : InstAlias<"outw\t$port", (OUT16ir i8imm:$port), 0>; -def : InstAlias<"outl\t$port", (OUT32ir i8imm:$port), 0>; +def : InstAlias<"outb\t$port", (OUT8ir u8imm:$port), 0>; +def : InstAlias<"outw\t$port", (OUT16ir u8imm:$port), 0>; +def : InstAlias<"outl\t$port", (OUT32ir u8imm:$port), 0>; // 'sldt ' can be encoded with either sldtw or sldtq with the same // effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity @@ -2940,3 +3028,34 @@ def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", (XCHG32ar64 GR32_NOAX:$src), 0>, Requires<[In64BitMode]>; def : InstAlias<"xchg{q}\t{%rax, $src|$src, rax}", (XCHG64ar GR64:$src), 0>; + +// These aliases exist to get the parser to prioritize matching 8-bit +// immediate encodings over matching the implicit ax/eax/rax encodings. By +// explicitly mentioning the A register here, these entries will be ordered +// first due to the more explicit immediate type. +def : InstAlias<"adc{w}\t{$imm, %ax|ax, $imm}", (ADC16ri8 AX, i16i8imm:$imm), 0>; +def : InstAlias<"add{w}\t{$imm, %ax|ax, $imm}", (ADD16ri8 AX, i16i8imm:$imm), 0>; +def : InstAlias<"and{w}\t{$imm, %ax|ax, $imm}", (AND16ri8 AX, i16i8imm:$imm), 0>; +def : InstAlias<"cmp{w}\t{$imm, %ax|ax, $imm}", (CMP16ri8 AX, i16i8imm:$imm), 0>; +def : InstAlias<"or{w}\t{$imm, %ax|ax, $imm}", (OR16ri8 AX, i16i8imm:$imm), 0>; +def : InstAlias<"sbb{w}\t{$imm, %ax|ax, $imm}", (SBB16ri8 AX, i16i8imm:$imm), 0>; +def : InstAlias<"sub{w}\t{$imm, %ax|ax, $imm}", (SUB16ri8 AX, i16i8imm:$imm), 0>; +def : InstAlias<"xor{w}\t{$imm, %ax|ax, $imm}", (XOR16ri8 AX, i16i8imm:$imm), 0>; + +def : InstAlias<"adc{l}\t{$imm, %eax|eax, $imm}", (ADC32ri8 EAX, i32i8imm:$imm), 0>; +def : InstAlias<"add{l}\t{$imm, %eax|eax, $imm}", (ADD32ri8 EAX, i32i8imm:$imm), 0>; +def : InstAlias<"and{l}\t{$imm, %eax|eax, $imm}", (AND32ri8 EAX, i32i8imm:$imm), 0>; +def : InstAlias<"cmp{l}\t{$imm, %eax|eax, $imm}", (CMP32ri8 EAX, i32i8imm:$imm), 0>; +def : InstAlias<"or{l}\t{$imm, %eax|eax, $imm}", (OR32ri8 EAX, i32i8imm:$imm), 0>; +def : InstAlias<"sbb{l}\t{$imm, %eax|eax, $imm}", (SBB32ri8 EAX, i32i8imm:$imm), 0>; +def : InstAlias<"sub{l}\t{$imm, %eax|eax, $imm}", (SUB32ri8 EAX, i32i8imm:$imm), 0>; +def : InstAlias<"xor{l}\t{$imm, %eax|eax, $imm}", (XOR32ri8 EAX, i32i8imm:$imm), 0>; + +def : InstAlias<"adc{q}\t{$imm, %rax|rax, $imm}", (ADC64ri8 RAX, i64i8imm:$imm), 0>; +def : InstAlias<"add{q}\t{$imm, %rax|rax, $imm}", (ADD64ri8 RAX, i64i8imm:$imm), 0>; +def : InstAlias<"and{q}\t{$imm, %rax|rax, $imm}", (AND64ri8 RAX, i64i8imm:$imm), 0>; +def : InstAlias<"cmp{q}\t{$imm, %rax|rax, $imm}", (CMP64ri8 RAX, i64i8imm:$imm), 0>; +def : InstAlias<"or{q}\t{$imm, %rax|rax, $imm}", (OR64ri8 RAX, i64i8imm:$imm), 0>; +def : InstAlias<"sbb{q}\t{$imm, %rax|rax, $imm}", (SBB64ri8 RAX, i64i8imm:$imm), 0>; +def : InstAlias<"sub{q}\t{$imm, %rax|rax, $imm}", (SUB64ri8 RAX, i64i8imm:$imm), 0>; +def : InstAlias<"xor{q}\t{$imm, %rax|rax, $imm}", (XOR64ri8 RAX, i64i8imm:$imm), 0>; diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index eaa7894004cb..11dc1e7d466b 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -249,6 +249,7 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src), (MMX_X86movd2w (x86mmx VR64:$src)))], IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>; +let isBitcast = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (bitconvert GR64:$src))], @@ -262,7 +263,7 @@ def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst), // These are 64 bit moves, but since the OS X assembler doesn't // recognize a register-register movq, we write them as // movd. -let SchedRW = [WriteMove] in { +let SchedRW = [WriteMove], isBitcast = 1 in { def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", @@ -303,7 +304,7 @@ def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (x86mmx (bitconvert - (i64 (vector_extract (v2i64 VR128:$src), + (i64 (extractelt (v2i64 VR128:$src), (iPTR 0))))))], IIC_MMX_MOVQ_RR>; @@ -326,6 +327,7 @@ def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), } } // SchedRW +let Predicates = [HasSSE1] in def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movntq\t{$src, $dst|$dst, $src}", [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)], @@ -355,6 +357,7 @@ defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, MMX_INTALU_ITINS, 1>; defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, MMX_INTALU_ITINS, 1>; +let Predicates = [HasSSE2] in defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, MMX_INTALUQ_ITINS, 1>; defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, @@ -382,6 +385,7 @@ defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w, MMX_INTALU_ITINS>; defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d, MMX_INTALU_ITINS>; +let Predicates = [HasSSE2] in defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q, MMX_INTALUQ_ITINS>; @@ -408,8 +412,10 @@ defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, MMX_PMUL_ITINS, 1>; +let Predicates = [HasSSE1] in defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, MMX_PMUL_ITINS, 1>; +let Predicates = [HasSSE2] in defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, MMX_PMUL_ITINS, 1>; let isCommutable = 1 in @@ -422,6 +428,7 @@ defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw", int_x86_ssse3_pmadd_ub_sw, MMX_PMUL_ITINS>; +let Predicates = [HasSSE1] in { defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, MMX_MISC_FUNC_ITINS, 1>; defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, @@ -439,6 +446,7 @@ defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, MMX_PSADBW_ITINS, 1>; +} defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b, MMX_MISC_FUNC_ITINS>; @@ -594,6 +602,7 @@ let Constraints = "$src1 = $dst" in { } // Extract / Insert +let Predicates = [HasSSE1] in def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR64:$src1, i32u8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -601,6 +610,7 @@ def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg, imm:$src2))], IIC_MMX_PEXTR>, Sched<[WriteShuffle]>; let Constraints = "$src1 = $dst" in { +let Predicates = [HasSSE1] in { def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, GR32orGR64:$src2, i32u8imm:$src3), @@ -618,8 +628,10 @@ let Constraints = "$src1 = $dst" in { imm:$src3))], IIC_MMX_PINSRW>, Sched<[WriteShuffleLd, ReadAfterLd]>; } +} // Mask creation +let Predicates = [HasSSE1] in def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR64:$src), "pmovmskb\t{$src, $dst|$dst, $src}", @@ -639,12 +651,12 @@ def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))), // Misc. let SchedRW = [WriteShuffle] in { -let Uses = [EDI] in +let Uses = [EDI], Predicates = [HasSSE1,In32BitMode] in def MMX_MASKMOVQ : MMXI32<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), "maskmovq\t{$mask, $src|$src, $mask}", [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)], IIC_MMX_MASKMOV>; -let Uses = [RDI] in +let Uses = [RDI], Predicates = [HasSSE1,In64BitMode] in def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), "maskmovq\t{$mask, $src|$src, $mask}", [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)], @@ -653,10 +665,6 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), // 64-bit bit convert. let Predicates = [HasSSE2] in { -def : Pat<(x86mmx (bitconvert (i64 GR64:$src))), - (MMX_MOVD64to64rr GR64:$src)>; -def : Pat<(i64 (bitconvert (x86mmx VR64:$src))), - (MMX_MOVD64from64rr VR64:$src)>; def : Pat<(f64 (bitconvert (x86mmx VR64:$src))), (MMX_MOVQ2FR64rr VR64:$src)>; def : Pat<(x86mmx (bitconvert (f64 FR64:$src))), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 99386b0658ad..7a44212bd829 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -330,9 +330,9 @@ multiclass sse12_fp_packed_logical_rm opc, RegisterClass RC, Domain d, //===----------------------------------------------------------------------===// // A vector extract of the first f32/f64 position is a subregister copy -def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), +def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))), (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>; -def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), +def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))), (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>; // A 128-bit subvector extract from the first 256-bit vector position @@ -413,6 +413,8 @@ let Predicates = [HasSSE2] in { def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; + def : Pat<(f128 (bitconvert (i128 FR128:$src))), (f128 FR128:$src)>; + def : Pat<(i128 (bitconvert (f128 FR128:$src))), (i128 FR128:$src)>; } // Bitcasts between 256-bit vector types. Return the original type since @@ -650,10 +652,10 @@ let Predicates = [UseAVX] in { } // Extract and store. - def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))), addr:$dst), (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>; - def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + def : Pat<(store (f64 (extractelt (v2f64 VR128:$src), (iPTR 0))), addr:$dst), (VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>; @@ -736,7 +738,7 @@ let Predicates = [UseSSE1] in { } // Extract and store. - def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))), addr:$dst), (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>; @@ -770,7 +772,7 @@ let Predicates = [UseSSE2] in { } // Extract and store. - def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + def : Pat<(store (f64 (extractelt (v2f64 VR128:$src), (iPTR 0))), addr:$dst), (MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>; @@ -935,22 +937,6 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, IIC_SSE_MOVU_P_RR>, VEX, VEX_L; } -let Predicates = [HasAVX] in { -def : Pat<(v8i32 (X86vzmovl - (insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; -def : Pat<(v4i64 (X86vzmovl - (insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; -def : Pat<(v8f32 (X86vzmovl - (insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; -def : Pat<(v4f64 (X86vzmovl - (insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; -} - - def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), (VMOVUPSYmr addr:$dst, VR256:$src)>; def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), @@ -1172,12 +1158,13 @@ multiclass sse12_mov_hilo_packed_baseopc, SDNode psnode, SDNode pdnode, multiclass sse12_mov_hilo_packedopc, SDNode psnode, SDNode pdnode, string base_opc, InstrItinClass itin> { - defm V#NAME : sse12_mov_hilo_packed_base, VEX_4V; -let Constraints = "$src1 = $dst" in - defm NAME : sse12_mov_hilo_packed_base; } @@ -1188,29 +1175,31 @@ let AddedComplexity = 20 in { } let SchedRW = [WriteStore] in { +let Predicates = [UseAVX] in { def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), + [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlpd\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract (v2f64 VR128:$src), + [(store (f64 (extractelt (v2f64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; +}// UseAVX def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), + [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlpd\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract (v2f64 VR128:$src), + [(store (f64 (extractelt (v2f64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; } // SchedRW -let Predicates = [HasAVX] in { +let Predicates = [UseAVX] in { // Shuffle with VMOVLPS def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), (VMOVLPSrm VR128:$src1, addr:$src2)>; @@ -1243,7 +1232,7 @@ let Predicates = [HasAVX] in { let Predicates = [UseSSE1] in { // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS - def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)), + def : Pat<(store (i64 (extractelt (bc_v2i64 (v4f32 VR128:$src2)), (iPTR 0))), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>; @@ -1297,31 +1286,33 @@ let AddedComplexity = 20 in { let SchedRW = [WriteStore] in { // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. +let Predicates = [UseAVX] in { def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract + [(store (f64 (extractelt (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), (bc_v2f64 (v4f32 VR128:$src))), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhpd\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract + [(store (f64 (extractelt (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; +} // UseAVX def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract + [(store (f64 (extractelt (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), (bc_v2f64 (v4f32 VR128:$src))), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhpd\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract + [(store (f64 (extractelt (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; } // SchedRW -let Predicates = [HasAVX] in { +let Predicates = [UseAVX] in { // VMOVHPS patterns def : Pat<(X86Movlhps VR128:$src1, (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), @@ -1345,7 +1336,7 @@ let Predicates = [HasAVX] in { (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), (VMOVHPDrm VR128:$src1, addr:$src2)>; - def : Pat<(store (f64 (vector_extract + def : Pat<(store (f64 (extractelt (v2f64 (X86VPermilpi VR128:$src, (i8 1))), (iPTR 0))), addr:$dst), (VMOVHPDmr addr:$dst, VR128:$src)>; @@ -1377,7 +1368,7 @@ let Predicates = [UseSSE2] in { (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), (MOVHPDrm VR128:$src1, addr:$src2)>; - def : Pat<(store (f64 (vector_extract + def : Pat<(store (f64 (extractelt (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))), (iPTR 0))), addr:$dst), (MOVHPDmr addr:$dst, VR128:$src)>; @@ -2073,15 +2064,17 @@ def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; let Predicates = [HasAVX] in { - def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), - (VCVTDQ2PSrr VR128:$src)>; - def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))), - (VCVTDQ2PSrm addr:$src)>; - def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), (VCVTDQ2PSrr VR128:$src)>; def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))), (VCVTDQ2PSrm addr:$src)>; +} + +let Predicates = [HasAVX, NoVLX] in { + def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), + (VCVTDQ2PSrr VR128:$src)>; + def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))), + (VCVTDQ2PSrm addr:$src)>; def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), (VCVTTPS2DQrr VR128:$src)>; @@ -2149,7 +2142,7 @@ def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>; -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), (VCVTTPD2DQYrr VR256:$src)>; def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))), @@ -2306,7 +2299,9 @@ let Predicates = [HasAVX] in { (VCVTDQ2PSYrr VR256:$src)>; def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))), (VCVTDQ2PSYrm addr:$src)>; +} +let Predicates = [HasAVX, NoVLX] in { // Match fround and fextend for 128/256-bit conversions def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), (VCVTPD2PSrr VR128:$src)>; @@ -2452,9 +2447,9 @@ let Defs = [EFLAGS] in { defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, "ucomisd">, PD, VEX, VEX_LIG; let Pattern = [] in { - defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, + defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, "comiss">, PS, VEX, VEX_LIG; - defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, + defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, "comisd">, PD, VEX, VEX_LIG; } @@ -2475,9 +2470,9 @@ let Defs = [EFLAGS] in { "ucomisd">, PD; let Pattern = [] in { - defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, + defm COMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, "comiss">, PS; - defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, + defm COMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, "comisd">, PD; } @@ -2605,19 +2600,20 @@ multiclass sse12_shuffle; } -defm VSHUFPS : sse12_shuffle, PS, VEX_4V; -defm VSHUFPSY : sse12_shuffle, PS, VEX_4V, VEX_L; -defm VSHUFPD : sse12_shuffle, PD, VEX_4V; -defm VSHUFPDY : sse12_shuffle, PD, VEX_4V, VEX_L; - +} let Constraints = "$src1 = $dst" in { defm SHUFPS : sse12_shuffle, PD; } -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4i32 (X86Shufp VR128:$src1, (bc_v4i32 (loadv2i64 addr:$src2)), (i8 imm:$imm))), (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; @@ -2694,6 +2690,7 @@ multiclass sse12_unpack_interleave opc, SDNode OpNode, ValueType vt, Sched<[WriteFShuffleLd, ReadAfterLd]>; } +let Predicates = [HasAVX, NoVLX] in { defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32, VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedSingle>, PS, VEX_4V; @@ -2719,7 +2716,7 @@ defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32, defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64, VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedDouble>, PD, VEX_4V, VEX_L; - +}// Predicates = [HasAVX, NoVLX] let Constraints = "$src1 = $dst" in { defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", @@ -2845,8 +2842,8 @@ multiclass PDI_binop_rm opc, string OpcodeStr, SDNode OpNode, multiclass PDI_binop_all opc, string OpcodeStr, SDNode Opcode, ValueType OpVT128, ValueType OpVT256, - OpndItins itins, bit IsCommutable = 0> { -let Predicates = [HasAVX, NoVLX] in + OpndItins itins, bit IsCommutable = 0, Predicate prd> { +let Predicates = [HasAVX, prd] in defm V#NAME : PDI_binop_rm, VEX_4V; @@ -2854,7 +2851,7 @@ let Constraints = "$src1 = $dst" in defm NAME : PDI_binop_rm; -let Predicates = [HasAVX2, NoVLX] in +let Predicates = [HasAVX2, prd] in defm V#NAME#Y : PDI_binop_rm, VEX_4V, VEX_L; @@ -2863,13 +2860,13 @@ let Predicates = [HasAVX2, NoVLX] in // These are ordered here for pattern ordering requirements with the fp versions defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, - SSE_VEC_BIT_ITINS_P, 1>; + SSE_VEC_BIT_ITINS_P, 1, NoVLX>; defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, - SSE_VEC_BIT_ITINS_P, 1>; + SSE_VEC_BIT_ITINS_P, 1, NoVLX>; defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, - SSE_VEC_BIT_ITINS_P, 1>; + SSE_VEC_BIT_ITINS_P, 1, NoVLX>; defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, - SSE_VEC_BIT_ITINS_P, 0>; + SSE_VEC_BIT_ITINS_P, 0, NoVLX>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Logical Instructions @@ -2911,7 +2908,7 @@ let isCodeGenOnly = 1 in { // Multiclass for vectors using the X86 logical operation aliases for FP. multiclass sse12_fp_packed_vector_logical_alias< bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { - let Predicates = [HasAVX, NoVLX] in { + let Predicates = [HasAVX, NoVLX_Or_NoDQI] in { defm V#NAME#PS : sse12_fp_packed, PS, VEX_4V; @@ -2923,7 +2920,7 @@ multiclass sse12_fp_packed_vector_logical_alias< defm V#NAME#PSY : sse12_fp_packed, PS, VEX_4V, VEX_L; - + defm V#NAME#PDY : sse12_fp_packed, PD, VEX_4V, VEX_L; @@ -3183,7 +3180,7 @@ multiclass scalar_math_f32_patterns { let Predicates = [UseSSE1] in { // extracted scalar math op with insert via movss def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector - (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), + (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))), FR32:$src))))), (!cast(OpcPrefix#SSrr_Int) v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; @@ -3198,7 +3195,7 @@ multiclass scalar_math_f32_patterns { let Predicates = [UseSSE41] in { // extracted scalar math op with insert via blend def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector - (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), + (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))), FR32:$src))), (i8 1))), (!cast(OpcPrefix#SSrr_Int) v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; @@ -3215,7 +3212,7 @@ multiclass scalar_math_f32_patterns { let Predicates = [HasAVX] in { // extracted scalar math op with insert via blend def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector - (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), + (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))), FR32:$src))), (i8 1))), (!cast("V"#OpcPrefix#SSrr_Int) v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; @@ -3241,7 +3238,7 @@ multiclass scalar_math_f64_patterns { let Predicates = [UseSSE2] in { // extracted scalar math op with insert via movsd def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector - (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), + (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))), FR64:$src))))), (!cast(OpcPrefix#SDrr_Int) v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; @@ -3256,7 +3253,7 @@ multiclass scalar_math_f64_patterns { let Predicates = [UseSSE41] in { // extracted scalar math op with insert via blend def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector - (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), + (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))), FR64:$src))), (i8 1))), (!cast(OpcPrefix#SDrr_Int) v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; @@ -3271,14 +3268,14 @@ multiclass scalar_math_f64_patterns { let Predicates = [HasAVX] in { // extracted scalar math op with insert via movsd def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector - (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), + (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))), FR64:$src))))), (!cast("V"#OpcPrefix#SDrr_Int) v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; // extracted scalar math op with insert via blend def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector - (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), + (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))), FR64:$src))), (i8 1))), (!cast("V"#OpcPrefix#SDrr_Int) v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; @@ -3449,8 +3446,8 @@ multiclass avx_fp_unop_s opc, string OpcodeStr, RegisterClass RC, /// sse1_fp_unop_p - SSE1 unops in packed form. multiclass sse1_fp_unop_p opc, string OpcodeStr, SDNode OpNode, - OpndItins itins> { -let Predicates = [HasAVX] in { + OpndItins itins, list prds> { +let Predicates = prds in { def V#NAME#PSr : PSI opc, string OpcodeStr, SDNode OpNode, // Square root. defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>, - sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>, + sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS, [HasAVX]>, sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD>, sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>, - sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS>; + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS, [HasAVX, NoVLX] >; defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>, - sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>; + sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP, [HasAVX, NoVLX]>; // There is no f64 version of the reciprocal approximation instructions. @@ -4018,39 +4015,43 @@ multiclass PDI_binop_rm2 opc, string OpcodeStr, SDNode OpNode, } // ExeDomain = SSEPackedInt defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>; defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>; defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoVLX>; defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, - SSE_INTALUQ_ITINS_P, 1>; + SSE_INTALUQ_ITINS_P, 1, NoVLX>; defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, - SSE_INTMUL_ITINS_P, 1>; + SSE_INTMUL_ITINS_P, 1, NoVLX_Or_NoBWI>; defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16, - SSE_INTMUL_ITINS_P, 1>; + SSE_INTMUL_ITINS_P, 1, NoVLX_Or_NoBWI>; defm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16, - SSE_INTMUL_ITINS_P, 1>; + SSE_INTMUL_ITINS_P, 1, NoVLX_Or_NoBWI>; defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, - SSE_INTALU_ITINS_P, 0>; + SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>; defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, - SSE_INTALU_ITINS_P, 0>; + SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>; defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, - SSE_INTALU_ITINS_P, 0>; + SSE_INTALU_ITINS_P, 0, NoVLX>; defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, - SSE_INTALUQ_ITINS_P, 0>; + SSE_INTALUQ_ITINS_P, 0, NoVLX>; defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8, - SSE_INTALU_ITINS_P, 0>; + SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>; defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16, - SSE_INTALU_ITINS_P, 0>; + SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>; defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>; defm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>; defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>; defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>; +defm PAVGB : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8, + SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>; +defm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16, + SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>; // Intrinsic forms defm PSUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b, @@ -4067,26 +4068,18 @@ defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w, int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>; defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, int_x86_avx2_pmadd_wd, SSE_PMADD, 1>; -defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b, - int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>; -defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w, - int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>; -defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw, - int_x86_avx2_psad_bw, SSE_PMADD, 1>; - -let Predicates = [HasAVX2] in - def : Pat<(v32i8 (X86psadbw (v32i8 VR256:$src1), - (v32i8 VR256:$src2))), - (VPSADBWYrr VR256:$src2, VR256:$src1)>; let Predicates = [HasAVX] in - def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1), - (v16i8 VR128:$src2))), - (VPSADBWrr VR128:$src2, VR128:$src1)>; - -def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1), - (v16i8 VR128:$src2))), - (PSADBWrr VR128:$src2, VR128:$src1)>; +defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128, + loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>, + VEX_4V; +let Predicates = [HasAVX2] in +defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256, + loadv4i64, i256mem, SSE_INTMUL_ITINS_P, 1, 0>, + VEX_4V, VEX_L; +let Constraints = "$src1 = $dst" in +defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128, + memopv2i64, i128mem, SSE_INTALU_ITINS_P, 1>; let Predicates = [HasAVX] in defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, @@ -4105,9 +4098,6 @@ defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128, //===---------------------------------------------------------------------===// let Predicates = [HasAVX, NoVLX] in { -defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, - VR128, v8i16, v8i16, bc_v8i16, loadv2i64, - SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, VR128, v4i32, v4i32, bc_v4i32, loadv2i64, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; @@ -4115,9 +4105,6 @@ defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, VR128, v2i64, v2i64, bc_v2i64, loadv2i64, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; -defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, - VR128, v8i16, v8i16, bc_v8i16, loadv2i64, - SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, VR128, v4i32, v4i32, bc_v4i32, loadv2i64, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; @@ -4125,14 +4112,26 @@ defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, VR128, v2i64, v2i64, bc_v2i64, loadv2i64, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; -defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, - VR128, v8i16, v8i16, bc_v8i16, loadv2i64, - SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, loadv2i64, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; +} // Predicates = [HasAVX, NoVLX] -let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { +let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { +defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, + VR128, v8i16, v8i16, bc_v8i16, loadv2i64, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; +defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, + VR128, v8i16, v8i16, bc_v8i16, loadv2i64, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; +defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, + VR128, v8i16, v8i16, bc_v8i16, loadv2i64, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; +} // Predicates = [HasAVX, NoVLX_Or_NoBWI] + + +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] , + Predicates = [HasAVX, NoVLX_Or_NoBWI]in { // 128-bit logical shifts. def VPSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), @@ -4147,13 +4146,9 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { (v2i64 (X86vshrdq VR128:$src1, (i8 imm:$src2))))]>, VEX_4V; // PSRADQri doesn't exist in SSE[1-3]. -} -} // Predicates = [HasAVX] +} // Predicates = [HasAVX, NoVLX_Or_NoBWI] let Predicates = [HasAVX2, NoVLX] in { -defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, - VR256, v16i16, v8i16, bc_v8i16, loadv2i64, - SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, VR256, v8i32, v4i32, bc_v4i32, loadv2i64, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; @@ -4161,9 +4156,6 @@ defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, VR256, v4i64, v2i64, bc_v2i64, loadv2i64, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; -defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, - VR256, v16i16, v8i16, bc_v8i16, loadv2i64, - SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, VR256, v8i32, v4i32, bc_v4i32, loadv2i64, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; @@ -4171,14 +4163,25 @@ defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, VR256, v4i64, v2i64, bc_v2i64, loadv2i64, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; -defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, - VR256, v16i16, v8i16, bc_v8i16, loadv2i64, - SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR256, v8i32, v4i32, bc_v4i32, loadv2i64, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; +}// Predicates = [HasAVX2, NoVLX] -let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in { +let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { +defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, + VR256, v16i16, v8i16, bc_v8i16, loadv2i64, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; +defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, + VR256, v16i16, v8i16, bc_v8i16, loadv2i64, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; +defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, + VR256, v16i16, v8i16, bc_v8i16, loadv2i64, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; +}// Predicates = [HasAVX2, NoVLX_Or_NoBWI] + +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 , + Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { // 256-bit logical shifts. def VPSLLDQYri : PDIi8<0x73, MRM7r, (outs VR256:$dst), (ins VR256:$src1, u8imm:$src2), @@ -4193,8 +4196,7 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in { (v4i64 (X86vshrdq VR256:$src1, (i8 imm:$src2))))]>, VEX_4V, VEX_L; // PSRADQYri doesn't exist in SSE[1-3]. -} -} // Predicates = [HasAVX2] +} // Predicates = [HasAVX2, NoVLX_Or_NoBWI] let Constraints = "$src1 = $dst" in { defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, @@ -4247,17 +4249,17 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in { //===---------------------------------------------------------------------===// defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>; defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>; defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, - SSE_INTALU_ITINS_P, 1>; + SSE_INTALU_ITINS_P, 1, NoVLX>; defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, - SSE_INTALU_ITINS_P, 0>; + SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>; defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, - SSE_INTALU_ITINS_P, 0>; + SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>; defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, - SSE_INTALU_ITINS_P, 0>; + SSE_INTALU_ITINS_P, 0, NoVLX>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Shuffle Instructions @@ -4511,40 +4513,43 @@ multiclass sse2_unpack_y opc, string OpcodeStr, ValueType vt, Sched<[WriteShuffleLd, ReadAfterLd]>; } -let Predicates = [HasAVX] in { + +let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, bc_v16i8, loadv2i64, 0>, VEX_4V; defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, bc_v8i16, loadv2i64, 0>, VEX_4V; - defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, - bc_v4i32, loadv2i64, 0>, VEX_4V; - defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, - bc_v2i64, loadv2i64, 0>, VEX_4V; - defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, bc_v16i8, loadv2i64, 0>, VEX_4V; defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, bc_v8i16, loadv2i64, 0>, VEX_4V; +} +let Predicates = [HasAVX, NoVLX] in { + defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, + bc_v4i32, loadv2i64, 0>, VEX_4V; + defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, + bc_v2i64, loadv2i64, 0>, VEX_4V; defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, bc_v4i32, loadv2i64, 0>, VEX_4V; defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, bc_v2i64, loadv2i64, 0>, VEX_4V; } -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl, bc_v32i8>, VEX_4V, VEX_L; defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl, bc_v16i16>, VEX_4V, VEX_L; - defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl, - bc_v8i32>, VEX_4V, VEX_L; - defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, - bc_v4i64>, VEX_4V, VEX_L; - defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh, bc_v32i8>, VEX_4V, VEX_L; defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh, bc_v16i16>, VEX_4V, VEX_L; +} +let Predicates = [HasAVX2, NoVLX] in { + defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl, + bc_v8i32>, VEX_4V, VEX_L; + defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, + bc_v4i64>, VEX_4V, VEX_L; defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh, bc_v8i32>, VEX_4V, VEX_L; defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, @@ -4600,7 +4605,7 @@ multiclass sse2_pinsrw { } // Extract -let Predicates = [HasAVX] in +let Predicates = [HasAVX, NoBWI] in def VPEXTRWri : Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -4615,7 +4620,7 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg, Sched<[WriteShuffleLd, ReadAfterLd]>; // Insert -let Predicates = [HasAVX] in +let Predicates = [HasAVX, NoBWI] in defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V; let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in @@ -4683,7 +4688,7 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), } // ExeDomain = SSEPackedInt //===---------------------------------------------------------------------===// -// SSE2 - Move Doubleword +// SSE2 - Move Doubleword/Quadword //===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===// @@ -4770,23 +4775,23 @@ let isCodeGenOnly = 1 in { // def VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), + [(set GR32:$dst, (extractelt (v4i32 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>; def VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", - [(store (i32 (vector_extract (v4i32 VR128:$src), + [(store (i32 (extractelt (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; def MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), + [(set GR32:$dst, (extractelt (v4i32 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", - [(store (i32 (vector_extract (v4i32 VR128:$src), + [(store (i32 (extractelt (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, Sched<[WriteStore]>; @@ -4808,24 +4813,25 @@ def : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))), let SchedRW = [WriteMove] in { def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), - (iPTR 0)))], + [(set GR64:$dst, (extractelt (v2i64 VR128:$src), + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX; def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), + [(set GR64:$dst, (extractelt (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>; } //SchedRW let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in -def VMOVPQIto64rm : VRS2I<0x7E, MRMDestMem, (outs i64mem:$dst), - (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", +def VMOVPQIto64rm : VRS2I<0x7E, MRMDestMem, (outs), + (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in -def MOVPQIto64rm : RS2I<0x7E, MRMDestMem, (outs i64mem:$dst), (ins VR128:$src), +def MOVPQIto64rm : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, Sched<[WriteStore]>; @@ -4883,30 +4889,18 @@ let isCodeGenOnly = 1 in { IIC_SSE_MOVDQ>, Sched<[WriteStore]>; } -//===---------------------------------------------------------------------===// -// Patterns and instructions to describe movd/movq to XMM register zero-extends -// -let isCodeGenOnly = 1, SchedRW = [WriteMove] in { -let AddedComplexity = 15 in { -def VMOVZQI2PQIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), - "movq\t{$src, $dst|$dst, $src}", // X86-64 only - [(set VR128:$dst, (v2i64 (X86vzmovl - (v2i64 (scalar_to_vector GR64:$src)))))], - IIC_SSE_MOVDQ>, - VEX, VEX_W; -def MOVZQI2PQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only - [(set VR128:$dst, (v2i64 (X86vzmovl - (v2i64 (scalar_to_vector GR64:$src)))))], - IIC_SSE_MOVDQ>; -} -} // isCodeGenOnly, SchedRW - let Predicates = [UseAVX] in { - let AddedComplexity = 15 in + let AddedComplexity = 15 in { def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), (VMOVDI2PDIrr GR32:$src)>; + def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), + (VMOV64toPQIrr GR64:$src)>; + + def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, + (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), + (SUBREG_TO_REG (i64 0), (VMOV64toPQIrr GR64:$src), sub_xmm)>; + } // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part. // These instructions also write zeros in the high part of a 256-bit register. let AddedComplexity = 20 in { @@ -4924,16 +4918,16 @@ let Predicates = [UseAVX] in { def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src), sub_xmm)>; - def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, - (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), - (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>; } let Predicates = [UseSSE2] in { - let AddedComplexity = 15 in + let AddedComplexity = 15 in { def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), (MOVDI2PDIrr GR32:$src)>; + def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), + (MOV64toPQIrr GR64:$src)>; + } let AddedComplexity = 20 in { def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), (MOVDI2PDIrm addr:$src)>; @@ -4985,12 +4979,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in { def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", - [(store (i64 (vector_extract (v2i64 VR128:$src), + [(store (i64 (extractelt (v2i64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, VEX; def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", - [(store (i64 (vector_extract (v2i64 VR128:$src), + [(store (i64 (extractelt (v2i64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>; } // ExeDomain, SchedRW @@ -5119,7 +5113,7 @@ def rm : S3SI, Sched<[WriteLoad]>; } -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX] in { defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", v4f32, VR128, loadv4f32, f128mem>, VEX; defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", @@ -5134,7 +5128,7 @@ defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, memopv4f32, f128mem>; -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4i32 (X86Movshdup VR128:$src)), (VMOVSHDUPrr VR128:$src)>; def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (loadv2i64 addr:$src)))), @@ -5190,21 +5184,30 @@ def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, - (v4f64 (X86Movddup - (scalar_to_vector (loadf64 addr:$src)))))]>, + (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>, Sched<[WriteLoad]>; } -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX] in { defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L; } defm MOVDDUP : sse3_replicate_dfp<"movddup">; -let Predicates = [HasAVX] in { + +let Predicates = [HasAVX, NoVLX] in { def : Pat<(X86Movddup (loadv2f64 addr:$src)), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; + + // 256-bit version + def : Pat<(X86Movddup (loadv4i64 addr:$src)), + (VMOVDDUPYrm addr:$src)>; + def : Pat<(X86Movddup (v4i64 VR256:$src)), + (VMOVDDUPYrr VR256:$src)>; +} + +let Predicates = [HasAVX] in { def : Pat<(X86Movddup (bc_v2f64 (loadv4f32 addr:$src))), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; def : Pat<(X86Movddup (bc_v2f64 (loadv2i64 addr:$src))), @@ -5212,16 +5215,6 @@ let Predicates = [HasAVX] in { def : Pat<(X86Movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src))))), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; - - // 256-bit version - def : Pat<(X86Movddup (loadv4f64 addr:$src)), - (VMOVDDUPYrm addr:$src)>; - def : Pat<(X86Movddup (loadv4i64 addr:$src)), - (VMOVDDUPYrm addr:$src)>; - def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))), - (VMOVDDUPYrm addr:$src)>; - def : Pat<(X86Movddup (v4i64 VR256:$src)), - (VMOVDDUPYrr VR256:$src)>; } let Predicates = [UseAVX, OptForSize] in { @@ -5791,37 +5784,37 @@ let Predicates = [HasAVX2] in let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in defm PALIGN : ssse3_palignr<"palignr">; -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; + (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; + (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; + (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; + (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>; } -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>; def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>; def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>; def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>; } let Predicates = [UseSSSE3] in { def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>; def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>; def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>; def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>; } //===---------------------------------------------------------------------===// @@ -6145,7 +6138,7 @@ multiclass SS41I_extract8 opc, string OpcodeStr> { imm:$src2)))), addr:$dst)]>; } -let Predicates = [HasAVX] in +let Predicates = [HasAVX, NoBWI] in defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; defm PEXTRB : SS41I_extract8<0x14, "pextrb">; @@ -6170,7 +6163,7 @@ multiclass SS41I_extract16 opc, string OpcodeStr> { imm:$src2)))), addr:$dst)]>; } -let Predicates = [HasAVX] in +let Predicates = [HasAVX, NoBWI] in defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX; defm PEXTRW : SS41I_extract16<0x15, "pextrw">; @@ -6194,7 +6187,7 @@ multiclass SS41I_extract32 opc, string OpcodeStr> { addr:$dst)]>; } -let Predicates = [HasAVX] in +let Predicates = [HasAVX, NoDQI] in defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; defm PEXTRD : SS41I_extract32<0x16, "pextrd">; @@ -6217,7 +6210,7 @@ multiclass SS41I_extract64 opc, string OpcodeStr> { addr:$dst)]>, REX_W; } -let Predicates = [HasAVX] in +let Predicates = [HasAVX, NoDQI] in defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; @@ -6285,7 +6278,7 @@ multiclass SS41I_insert8 opc, string asm, bit Is2Addr = 1> { imm:$src3))]>, Sched<[WriteShuffleLd, ReadAfterLd]>; } -let Predicates = [HasAVX] in +let Predicates = [HasAVX, NoBWI] in defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V; let Constraints = "$src1 = $dst" in defm PINSRB : SS41I_insert8<0x20, "pinsrb">; @@ -6311,7 +6304,7 @@ multiclass SS41I_insert32 opc, string asm, bit Is2Addr = 1> { imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>; } -let Predicates = [HasAVX] in +let Predicates = [HasAVX, NoDQI] in defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; let Constraints = "$src1 = $dst" in defm PINSRD : SS41I_insert32<0x22, "pinsrd">; @@ -6337,7 +6330,7 @@ multiclass SS41I_insert64 opc, string asm, bit Is2Addr = 1> { imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>; } -let Predicates = [HasAVX] in +let Predicates = [HasAVX, NoDQI] in defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; let Constraints = "$src1 = $dst" in defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; @@ -6543,71 +6536,71 @@ let Predicates = [HasAVX] in { let Predicates = [UseAVX] in { def : Pat<(ffloor FR32:$src), - (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; + (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x9))>; def : Pat<(f64 (ffloor FR64:$src)), - (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; + (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x9))>; def : Pat<(f32 (fnearbyint FR32:$src)), (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; def : Pat<(f64 (fnearbyint FR64:$src)), (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; def : Pat<(f32 (fceil FR32:$src)), - (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; + (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xA))>; def : Pat<(f64 (fceil FR64:$src)), - (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; + (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xA))>; def : Pat<(f32 (frint FR32:$src)), (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; def : Pat<(f64 (frint FR64:$src)), (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; def : Pat<(f32 (ftrunc FR32:$src)), - (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; + (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xB))>; def : Pat<(f64 (ftrunc FR64:$src)), - (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; + (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xB))>; } let Predicates = [HasAVX] in { def : Pat<(v4f32 (ffloor VR128:$src)), - (VROUNDPSr VR128:$src, (i32 0x1))>; + (VROUNDPSr VR128:$src, (i32 0x9))>; def : Pat<(v4f32 (fnearbyint VR128:$src)), (VROUNDPSr VR128:$src, (i32 0xC))>; def : Pat<(v4f32 (fceil VR128:$src)), - (VROUNDPSr VR128:$src, (i32 0x2))>; + (VROUNDPSr VR128:$src, (i32 0xA))>; def : Pat<(v4f32 (frint VR128:$src)), (VROUNDPSr VR128:$src, (i32 0x4))>; def : Pat<(v4f32 (ftrunc VR128:$src)), - (VROUNDPSr VR128:$src, (i32 0x3))>; + (VROUNDPSr VR128:$src, (i32 0xB))>; def : Pat<(v2f64 (ffloor VR128:$src)), - (VROUNDPDr VR128:$src, (i32 0x1))>; + (VROUNDPDr VR128:$src, (i32 0x9))>; def : Pat<(v2f64 (fnearbyint VR128:$src)), (VROUNDPDr VR128:$src, (i32 0xC))>; def : Pat<(v2f64 (fceil VR128:$src)), - (VROUNDPDr VR128:$src, (i32 0x2))>; + (VROUNDPDr VR128:$src, (i32 0xA))>; def : Pat<(v2f64 (frint VR128:$src)), (VROUNDPDr VR128:$src, (i32 0x4))>; def : Pat<(v2f64 (ftrunc VR128:$src)), - (VROUNDPDr VR128:$src, (i32 0x3))>; + (VROUNDPDr VR128:$src, (i32 0xB))>; def : Pat<(v8f32 (ffloor VR256:$src)), - (VROUNDYPSr VR256:$src, (i32 0x1))>; + (VROUNDYPSr VR256:$src, (i32 0x9))>; def : Pat<(v8f32 (fnearbyint VR256:$src)), (VROUNDYPSr VR256:$src, (i32 0xC))>; def : Pat<(v8f32 (fceil VR256:$src)), - (VROUNDYPSr VR256:$src, (i32 0x2))>; + (VROUNDYPSr VR256:$src, (i32 0xA))>; def : Pat<(v8f32 (frint VR256:$src)), (VROUNDYPSr VR256:$src, (i32 0x4))>; def : Pat<(v8f32 (ftrunc VR256:$src)), - (VROUNDYPSr VR256:$src, (i32 0x3))>; + (VROUNDYPSr VR256:$src, (i32 0xB))>; def : Pat<(v4f64 (ffloor VR256:$src)), - (VROUNDYPDr VR256:$src, (i32 0x1))>; + (VROUNDYPDr VR256:$src, (i32 0x9))>; def : Pat<(v4f64 (fnearbyint VR256:$src)), (VROUNDYPDr VR256:$src, (i32 0xC))>; def : Pat<(v4f64 (fceil VR256:$src)), - (VROUNDYPDr VR256:$src, (i32 0x2))>; + (VROUNDYPDr VR256:$src, (i32 0xA))>; def : Pat<(v4f64 (frint VR256:$src)), (VROUNDYPDr VR256:$src, (i32 0x4))>; def : Pat<(v4f64 (ftrunc VR256:$src)), - (VROUNDYPDr VR256:$src, (i32 0x3))>; + (VROUNDYPDr VR256:$src, (i32 0xB))>; } defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, @@ -6619,47 +6612,47 @@ defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", let Predicates = [UseSSE41] in { def : Pat<(ffloor FR32:$src), - (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x9))>; def : Pat<(f64 (ffloor FR64:$src)), - (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x9))>; def : Pat<(f32 (fnearbyint FR32:$src)), (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; def : Pat<(f64 (fnearbyint FR64:$src)), (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; def : Pat<(f32 (fceil FR32:$src)), - (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xA))>; def : Pat<(f64 (fceil FR64:$src)), - (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xA))>; def : Pat<(f32 (frint FR32:$src)), (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; def : Pat<(f64 (frint FR64:$src)), (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; def : Pat<(f32 (ftrunc FR32:$src)), - (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xB))>; def : Pat<(f64 (ftrunc FR64:$src)), - (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xB))>; def : Pat<(v4f32 (ffloor VR128:$src)), - (ROUNDPSr VR128:$src, (i32 0x1))>; + (ROUNDPSr VR128:$src, (i32 0x9))>; def : Pat<(v4f32 (fnearbyint VR128:$src)), (ROUNDPSr VR128:$src, (i32 0xC))>; def : Pat<(v4f32 (fceil VR128:$src)), - (ROUNDPSr VR128:$src, (i32 0x2))>; + (ROUNDPSr VR128:$src, (i32 0xA))>; def : Pat<(v4f32 (frint VR128:$src)), (ROUNDPSr VR128:$src, (i32 0x4))>; def : Pat<(v4f32 (ftrunc VR128:$src)), - (ROUNDPSr VR128:$src, (i32 0x3))>; + (ROUNDPSr VR128:$src, (i32 0xB))>; def : Pat<(v2f64 (ffloor VR128:$src)), - (ROUNDPDr VR128:$src, (i32 0x1))>; + (ROUNDPDr VR128:$src, (i32 0x9))>; def : Pat<(v2f64 (fnearbyint VR128:$src)), (ROUNDPDr VR128:$src, (i32 0xC))>; def : Pat<(v2f64 (fceil VR128:$src)), - (ROUNDPDr VR128:$src, (i32 0x2))>; + (ROUNDPDr VR128:$src, (i32 0xA))>; def : Pat<(v2f64 (frint VR128:$src)), (ROUNDPDr VR128:$src, (i32 0x4))>; def : Pat<(v2f64 (ftrunc VR128:$src)), - (ROUNDPDr VR128:$src, (i32 0x3))>; + (ROUNDPDr VR128:$src, (i32 0xB))>; } //===----------------------------------------------------------------------===// @@ -7815,13 +7808,7 @@ def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), // VBROADCAST - Load from memory and broadcast to all elements of the // destination operand // -class avx_broadcast opc, string OpcodeStr, RegisterClass RC, - X86MemOperand x86memop, Intrinsic Int, SchedWrite Sched> : - AVX8I, Sched<[Sched]>, VEX; - -class avx_broadcast_no_int opc, string OpcodeStr, RegisterClass RC, +class avx_broadcast_rm opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, ValueType VT, PatFrag ld_frag, SchedWrite Sched> : AVX8I opc, string OpcodeStr, RegisterClass RC, } // AVX2 adds register forms -class avx2_broadcast_reg opc, string OpcodeStr, RegisterClass RC, - Intrinsic Int, SchedWrite Sched> : +class avx2_broadcast_rr opc, string OpcodeStr, RegisterClass RC, + ValueType ResVT, ValueType OpVT, SchedWrite Sched> : AVX28I, Sched<[Sched]>, VEX; + [(set RC:$dst, (ResVT (X86VBroadcast (OpVT VR128:$src))))]>, + Sched<[Sched]>, VEX; let ExeDomain = SSEPackedSingle in { - def VBROADCASTSSrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR128, + def VBROADCASTSSrm : avx_broadcast_rm<0x18, "vbroadcastss", VR128, f32mem, v4f32, loadf32, WriteLoad>; - def VBROADCASTSSYrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR256, + def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256, f32mem, v8f32, loadf32, WriteFShuffleLd>, VEX_L; } let ExeDomain = SSEPackedDouble in -def VBROADCASTSDYrm : avx_broadcast_no_int<0x19, "vbroadcastsd", VR256, f64mem, +def VBROADCASTSDYrm : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem, v4f64, loadf64, WriteFShuffleLd>, VEX_L; -def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, - int_x86_avx_vbroadcastf128_pd_256, - WriteFShuffleLd>, VEX_L; let ExeDomain = SSEPackedSingle in { - def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128, - int_x86_avx2_vbroadcast_ss_ps, - WriteFShuffle>; - def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256, - int_x86_avx2_vbroadcast_ss_ps_256, - WriteFShuffle256>, VEX_L; + def VBROADCASTSSrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR128, + v4f32, v4f32, WriteFShuffle>; + def VBROADCASTSSYrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR256, + v8f32, v4f32, WriteFShuffle256>, VEX_L; } let ExeDomain = SSEPackedDouble in -def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, - int_x86_avx2_vbroadcast_sd_pd_256, - WriteFShuffle256>, VEX_L; +def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256, + v4f64, v2f64, WriteFShuffle256>, VEX_L; let mayLoad = 1, Predicates = [HasAVX2] in def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst), @@ -7871,6 +7853,13 @@ def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst), "vbroadcasti128\t{$src, $dst|$dst, $src}", []>, Sched<[WriteLoad]>, VEX, VEX_L; +def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst), + (ins f128mem:$src), + "vbroadcastf128\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_vbroadcastf128_pd_256 addr:$src))]>, + Sched<[WriteFShuffleLd]>, VEX, VEX_L; + let Predicates = [HasAVX] in def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), (VBROADCASTF128 addr:$src)>; @@ -7891,7 +7880,7 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), []>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L; } -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX] in { def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, @@ -8080,17 +8069,19 @@ multiclass avx_permil opc_rm, bits<8> opc_rmi, string OpcodeStr, (bitconvert (i_frag addr:$src2))))]>, VEX_4V, Sched<[WriteFShuffleLd, ReadAfterLd]>; - def ri : AVXAIi8, VEX, Sched<[WriteFShuffle]>; - def mi : AVXAIi8, VEX, Sched<[WriteFShuffleLd]>; + }// Predicates = [HasAVX, NoVLX] } let ExeDomain = SSEPackedSingle in { @@ -8106,7 +8097,7 @@ let ExeDomain = SSEPackedDouble in { loadv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L; } -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX] in { def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (v8i32 VR256:$src2))), (VPERMILPSYrr VR256:$src1, VR256:$src2)>; def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))), @@ -8245,11 +8236,11 @@ let Predicates = [HasF16C] in { def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)), (VCVTPH2PSrm addr:$src)>; - def : Pat<(store (f64 (vector_extract (bc_v2f64 (v8i16 + def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16 (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))), addr:$dst), (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; - def : Pat<(store (i64 (vector_extract (bc_v2i64 (v8i16 + def : Pat<(store (i64 (extractelt (bc_v2i64 (v8i16 (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))), addr:$dst), (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>; @@ -8309,97 +8300,62 @@ defm VPBLENDDY : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v8i32, // multiclass avx2_broadcast opc, string OpcodeStr, X86MemOperand x86memop, PatFrag ld_frag, - Intrinsic Int128, Intrinsic Int256> { - def rr : AVX28I, - Sched<[WriteShuffle]>, VEX; - def rm : AVX28I { + let Predicates = [HasAVX2, prd] in { + def rr : AVX28I, + (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>, + Sched<[WriteShuffle]>, VEX; + def rm : AVX28I, Sched<[WriteLoad]>, VEX; - def Yrr : AVX28I, - Sched<[WriteShuffle256]>, VEX, VEX_L; - def Yrm : AVX28I, + (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>, + Sched<[WriteShuffle256]>, VEX, VEX_L; + def Yrm : AVX28I, Sched<[WriteLoad]>, VEX, VEX_L; + + // Provide aliases for broadcast from the same register class that + // automatically does the extract. + def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))), + (!cast(NAME#"Yrr") + (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>; + } } defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8, - int_x86_avx2_pbroadcastb_128, - int_x86_avx2_pbroadcastb_256>; + v16i8, v32i8, NoVLX_Or_NoBWI>; defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16, - int_x86_avx2_pbroadcastw_128, - int_x86_avx2_pbroadcastw_256>; + v8i16, v16i16, NoVLX_Or_NoBWI>; defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, - int_x86_avx2_pbroadcastd_128, - int_x86_avx2_pbroadcastd_256>; + v4i32, v8i32, NoVLX>; defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, - int_x86_avx2_pbroadcastq_128, - int_x86_avx2_pbroadcastq_256>; + v2i64, v4i64, NoVLX>; let Predicates = [HasAVX2] in { - def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))), - (VPBROADCASTBrm addr:$src)>; - def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))), - (VPBROADCASTBYrm addr:$src)>; - def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), - (VPBROADCASTWrm addr:$src)>; - def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), - (VPBROADCASTWYrm addr:$src)>; - def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), - (VPBROADCASTDrm addr:$src)>; - def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), - (VPBROADCASTDYrm addr:$src)>; - def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))), - (VPBROADCASTQrm addr:$src)>; - def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), - (VPBROADCASTQYrm addr:$src)>; - - def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))), - (VPBROADCASTBrr VR128:$src)>; - def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))), - (VPBROADCASTBYrr VR128:$src)>; - def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))), - (VPBROADCASTWrr VR128:$src)>; - def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))), - (VPBROADCASTWYrr VR128:$src)>; - def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))), - (VPBROADCASTDrr VR128:$src)>; - def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))), - (VPBROADCASTDYrr VR128:$src)>; - def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))), - (VPBROADCASTQrr VR128:$src)>; - def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))), - (VPBROADCASTQYrr VR128:$src)>; - def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))), - (VBROADCASTSSrr VR128:$src)>; - def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))), - (VBROADCASTSSYrr VR128:$src)>; - def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))), - (VPBROADCASTQrr VR128:$src)>; - def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))), - (VBROADCASTSDYrr VR128:$src)>; + // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. + // This means we'll encounter truncated i32 loads; match that here. + def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), + (VPBROADCASTWrm addr:$src)>; + def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), + (VPBROADCASTWYrm addr:$src)>; + def : Pat<(v8i16 (X86VBroadcast + (i16 (trunc (i32 (zextloadi16 addr:$src)))))), + (VPBROADCASTWrm addr:$src)>; + def : Pat<(v16i16 (X86VBroadcast + (i16 (trunc (i32 (zextloadi16 addr:$src)))))), + (VPBROADCASTWYrm addr:$src)>; // Provide aliases for broadcast from the same register class that // automatically does the extract. - def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))), - (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), - sub_xmm)))>; - def : Pat<(v16i16 (X86VBroadcast (v16i16 VR256:$src))), - (VPBROADCASTWYrr (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), - sub_xmm)))>; - def : Pat<(v8i32 (X86VBroadcast (v8i32 VR256:$src))), - (VPBROADCASTDYrr (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), - sub_xmm)))>; - def : Pat<(v4i64 (X86VBroadcast (v4i64 VR256:$src))), - (VPBROADCASTQYrr (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), - sub_xmm)))>; def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))), (VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)))>; @@ -8598,7 +8554,7 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), []>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L; } -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2, NoVLX] in { def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR imm)), (VINSERTI128rr VR256:$src1, VR128:$src2, @@ -8722,16 +8678,16 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", int_x86_avx2_maskstore_q, int_x86_avx2_maskstore_q_256>, VEX_W; -def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)), +def: Pat<(X86mstore addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)), (VMASKMOVPSYmr addr:$ptr, VR256:$mask, VR256:$src)>; -def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)), +def: Pat<(X86mstore addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)), (VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>; -def: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src)), +def: Pat<(X86mstore addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src)), (VMASKMOVPSmr addr:$ptr, VR128:$mask, VR128:$src)>; -def: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src)), +def: Pat<(X86mstore addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src)), (VPMASKMOVDmr addr:$ptr, VR128:$mask, VR128:$src)>; def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)), @@ -8776,10 +8732,10 @@ def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src0) (VBLENDVPSrr VR128:$src0, (VPMASKMOVDrm VR128:$mask, addr:$ptr), VR128:$mask)>; -def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)), +def: Pat<(X86mstore addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)), (VMASKMOVPDYmr addr:$ptr, VR256:$mask, VR256:$src)>; -def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)), +def: Pat<(X86mstore addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)), (VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>; def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)), @@ -8804,10 +8760,10 @@ def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src0) (VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr), VR256:$mask)>; -def: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src)), +def: Pat<(X86mstore addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src)), (VMASKMOVPDmr addr:$ptr, VR128:$mask, VR128:$src)>; -def: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src)), +def: Pat<(X86mstore addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src)), (VPMASKMOVQmr addr:$ptr, VR128:$mask, VR128:$src)>; def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)), @@ -8865,12 +8821,13 @@ multiclass avx2_var_shift opc, string OpcodeStr, SDNode OpNode, VEX_4V, VEX_L, Sched<[WriteVarVecShiftLd, ReadAfterLd]>; } -defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>; -defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W; -defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>; -defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; -defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; - +let Predicates = [HasAVX2, NoVLX] in { + defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>; + defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W; + defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>; + defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; + defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; +} //===----------------------------------------------------------------------===// // VGATHER - GATHER Operations multiclass avx2_gather opc, string OpcodeStr, RegisterClass RC256, @@ -8905,3 +8862,59 @@ let mayLoad = 1, Constraints defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>; } } + +//===----------------------------------------------------------------------===// +// Extra selection patterns for FR128, f128, f128mem + +// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2. +def : Pat<(store (f128 FR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 FR128:$src), VR128))>; + +def : Pat<(loadf128 addr:$src), + (COPY_TO_REGCLASS (MOVAPSrm addr:$src), FR128)>; + +// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2 +def : Pat<(X86fand FR128:$src1, (loadf128 addr:$src2)), + (COPY_TO_REGCLASS + (ANDPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2), + FR128)>; + +def : Pat<(X86fand FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS + (ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +def : Pat<(and FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS + (ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +def : Pat<(X86for FR128:$src1, (loadf128 addr:$src2)), + (COPY_TO_REGCLASS + (ORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2), + FR128)>; + +def : Pat<(X86for FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS + (ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +def : Pat<(or FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS + (ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +def : Pat<(X86fxor FR128:$src1, (loadf128 addr:$src2)), + (COPY_TO_REGCLASS + (XORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2), + FR128)>; + +def : Pat<(X86fxor FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS + (XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +def : Pat<(xor FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS + (XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index caecf7001ef5..c1df9780a0e0 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -31,21 +31,21 @@ def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1), [(set GR64:$dst, (shl GR64:$src1, CL))], IIC_SR>; } // Uses = [CL] -def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), +def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2), "shl{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))], IIC_SR>; let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. -def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), +def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2), "shl{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))], IIC_SR>, OpSize16; -def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), +def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2), "shl{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))], IIC_SR>, OpSize32; def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), - (ins GR64:$src1, i8imm:$src2), + (ins GR64:$src1, u8imm:$src2), "shl{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))], IIC_SR>; @@ -85,19 +85,19 @@ def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst), "shl{q}\t{%cl, $dst|$dst, cl}", [(store (shl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>; } -def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src), +def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, u8imm:$src), "shl{b}\t{$src, $dst|$dst, $src}", [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)], IIC_SR>; -def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src), +def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, u8imm:$src), "shl{w}\t{$src, $dst|$dst, $src}", [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)], IIC_SR>, OpSize16; -def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src), +def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, u8imm:$src), "shl{l}\t{$src, $dst|$dst, $src}", [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)], IIC_SR>, OpSize32; -def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src), +def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, u8imm:$src), "shl{q}\t{$src, $dst|$dst, $src}", [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)], IIC_SR>; @@ -137,18 +137,18 @@ def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1), [(set GR64:$dst, (srl GR64:$src1, CL))], IIC_SR>; } -def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), +def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$src2), "shr{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))], IIC_SR>; -def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), +def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2), "shr{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))], IIC_SR>, OpSize16; -def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), +def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2), "shr{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))], IIC_SR>, OpSize32; -def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), +def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$src2), "shr{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))], IIC_SR>; @@ -185,19 +185,19 @@ def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst), "shr{q}\t{%cl, $dst|$dst, cl}", [(store (srl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>; } -def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src), +def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, u8imm:$src), "shr{b}\t{$src, $dst|$dst, $src}", [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)], IIC_SR>; -def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src), +def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, u8imm:$src), "shr{w}\t{$src, $dst|$dst, $src}", [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)], IIC_SR>, OpSize16; -def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src), +def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, u8imm:$src), "shr{l}\t{$src, $dst|$dst, $src}", [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)], IIC_SR>, OpSize32; -def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src), +def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, u8imm:$src), "shr{q}\t{$src, $dst|$dst, $src}", [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)], IIC_SR>; @@ -241,20 +241,20 @@ def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1), IIC_SR>; } -def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), +def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2), "sar{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))], IIC_SR>; -def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), +def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2), "sar{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))], IIC_SR>, OpSize16; -def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), +def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2), "sar{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))], IIC_SR>, OpSize32; def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), - (ins GR64:$src1, i8imm:$src2), + (ins GR64:$src1, u8imm:$src2), "sar{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))], IIC_SR>; @@ -298,19 +298,19 @@ def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), [(store (sra (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>; } -def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src), +def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, u8imm:$src), "sar{b}\t{$src, $dst|$dst, $src}", [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)], IIC_SR>; -def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src), +def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, u8imm:$src), "sar{w}\t{$src, $dst|$dst, $src}", [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)], IIC_SR>, OpSize16; -def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src), +def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, u8imm:$src), "sar{l}\t{$src, $dst|$dst, $src}", [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)], IIC_SR>, OpSize32; -def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src), +def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, u8imm:$src), "sar{q}\t{$src, $dst|$dst, $src}", [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)], IIC_SR>; @@ -342,7 +342,7 @@ let hasSideEffects = 0 in { let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1), "rcl{b}\t$dst", [], IIC_SR>; -def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), +def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$cnt), "rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1), @@ -350,7 +350,7 @@ def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1), def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1), "rcl{w}\t$dst", [], IIC_SR>, OpSize16; -def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), +def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$cnt), "rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16; let Uses = [CL] in def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1), @@ -358,7 +358,7 @@ def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1), def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1), "rcl{l}\t$dst", [], IIC_SR>, OpSize32; -def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), +def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$cnt), "rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32; let Uses = [CL] in def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1), @@ -367,7 +367,7 @@ def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1), def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "rcl{q}\t$dst", [], IIC_SR>; -def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt), +def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt), "rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1), @@ -376,7 +376,7 @@ def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1), def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1), "rcr{b}\t$dst", [], IIC_SR>; -def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), +def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$cnt), "rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1), @@ -384,7 +384,7 @@ def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1), def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1), "rcr{w}\t$dst", [], IIC_SR>, OpSize16; -def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), +def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$cnt), "rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16; let Uses = [CL] in def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1), @@ -392,7 +392,7 @@ def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1), def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1), "rcr{l}\t$dst", [], IIC_SR>, OpSize32; -def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), +def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32; let Uses = [CL] in def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1), @@ -400,7 +400,7 @@ def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1), def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "rcr{q}\t$dst", [], IIC_SR>; -def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt), +def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt), "rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1), @@ -411,36 +411,36 @@ def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1), let SchedRW = [WriteShiftLd, WriteRMW] in { def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), "rcl{b}\t$dst", [], IIC_SR>; -def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), +def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, u8imm:$cnt), "rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst), "rcl{w}\t$dst", [], IIC_SR>, OpSize16; -def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt), +def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, u8imm:$cnt), "rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16; def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst), "rcl{l}\t$dst", [], IIC_SR>, OpSize32; -def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt), +def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, u8imm:$cnt), "rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32; def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst), "rcl{q}\t$dst", [], IIC_SR>; -def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt), +def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, u8imm:$cnt), "rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst), "rcr{b}\t$dst", [], IIC_SR>; -def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt), +def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, u8imm:$cnt), "rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst), "rcr{w}\t$dst", [], IIC_SR>, OpSize16; -def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt), +def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, u8imm:$cnt), "rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16; def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst), "rcr{l}\t$dst", [], IIC_SR>, OpSize32; -def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt), +def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, u8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32; def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst), "rcr{q}\t$dst", [], IIC_SR>; -def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt), +def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, u8imm:$cnt), "rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in { @@ -482,19 +482,19 @@ def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1), [(set GR64:$dst, (rotl GR64:$src1, CL))], IIC_SR>; } -def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), +def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2), "rol{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))], IIC_SR>; -def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), +def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2), "rol{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))], IIC_SR>, OpSize16; -def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), +def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2), "rol{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))], IIC_SR>, OpSize32; def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), - (ins GR64:$src1, i8imm:$src2), + (ins GR64:$src1, u8imm:$src2), "rol{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))], IIC_SR>; @@ -537,19 +537,19 @@ def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst), [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>; } -def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1), +def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, u8imm:$src1), "rol{b}\t{$src1, $dst|$dst, $src1}", [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)], IIC_SR>; -def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1), +def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, u8imm:$src1), "rol{w}\t{$src1, $dst|$dst, $src1}", [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)], IIC_SR>, OpSize16; -def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1), +def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, u8imm:$src1), "rol{l}\t{$src1, $dst|$dst, $src1}", [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)], IIC_SR>, OpSize32; -def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1), +def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, u8imm:$src1), "rol{q}\t{$src1, $dst|$dst, $src1}", [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)], IIC_SR>; @@ -589,19 +589,19 @@ def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1), [(set GR64:$dst, (rotr GR64:$src1, CL))], IIC_SR>; } -def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), +def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2), "ror{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))], IIC_SR>; -def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), +def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2), "ror{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))], IIC_SR>, OpSize16; -def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), +def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2), "ror{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))], IIC_SR>, OpSize32; def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), - (ins GR64:$src1, i8imm:$src2), + (ins GR64:$src1, u8imm:$src2), "ror{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))], IIC_SR>; @@ -644,19 +644,19 @@ def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>; } -def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src), +def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, u8imm:$src), "ror{b}\t{$src, $dst|$dst, $src}", [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)], IIC_SR>; -def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src), +def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, u8imm:$src), "ror{w}\t{$src, $dst|$dst, $src}", [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)], IIC_SR>, OpSize16; -def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src), +def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, u8imm:$src), "ror{l}\t{$src, $dst|$dst, $src}", [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)], IIC_SR>, OpSize32; -def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src), +def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, u8imm:$src), "ror{q}\t{$src, $dst|$dst, $src}", [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)], IIC_SR>; @@ -727,42 +727,42 @@ def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), let isCommutable = 1 in { // These instructions commute to each other. def SHLD16rri8 : Ii8<0xA4, MRMDestReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2, i8imm:$src3), + (ins GR16:$src1, GR16:$src2, u8imm:$src3), "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, (i8 imm:$src3)))], IIC_SHD16_REG_IM>, TB, OpSize16; def SHRD16rri8 : Ii8<0xAC, MRMDestReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2, i8imm:$src3), + (ins GR16:$src1, GR16:$src2, u8imm:$src3), "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, (i8 imm:$src3)))], IIC_SHD16_REG_IM>, TB, OpSize16; def SHLD32rri8 : Ii8<0xA4, MRMDestReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2, i8imm:$src3), + (ins GR32:$src1, GR32:$src2, u8imm:$src3), "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, (i8 imm:$src3)))], IIC_SHD32_REG_IM>, TB, OpSize32; def SHRD32rri8 : Ii8<0xAC, MRMDestReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2, i8imm:$src3), + (ins GR32:$src1, GR32:$src2, u8imm:$src3), "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, (i8 imm:$src3)))], IIC_SHD32_REG_IM>, TB, OpSize32; def SHLD64rri8 : RIi8<0xA4, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2, i8imm:$src3), + (ins GR64:$src1, GR64:$src2, u8imm:$src3), "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, (i8 imm:$src3)))], IIC_SHD64_REG_IM>, TB; def SHRD64rri8 : RIi8<0xAC, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2, i8imm:$src3), + (ins GR64:$src1, GR64:$src2, u8imm:$src3), "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, (i8 imm:$src3)))], IIC_SHD64_REG_IM>, @@ -801,14 +801,14 @@ def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), } def SHLD16mri8 : Ii8<0xA4, MRMDestMem, - (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), + (outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3), "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shld (loadi16 addr:$dst), GR16:$src2, (i8 imm:$src3)), addr:$dst)], IIC_SHD16_MEM_IM>, TB, OpSize16; def SHRD16mri8 : Ii8<0xAC, MRMDestMem, - (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), + (outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3), "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, (i8 imm:$src3)), addr:$dst)], @@ -816,14 +816,14 @@ def SHRD16mri8 : Ii8<0xAC, MRMDestMem, TB, OpSize16; def SHLD32mri8 : Ii8<0xA4, MRMDestMem, - (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), + (outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3), "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shld (loadi32 addr:$dst), GR32:$src2, (i8 imm:$src3)), addr:$dst)], IIC_SHD32_MEM_IM>, TB, OpSize32; def SHRD32mri8 : Ii8<0xAC, MRMDestMem, - (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), + (outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3), "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, (i8 imm:$src3)), addr:$dst)], @@ -831,14 +831,14 @@ def SHRD32mri8 : Ii8<0xAC, MRMDestMem, TB, OpSize32; def SHLD64mri8 : RIi8<0xA4, MRMDestMem, - (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), + (outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3), "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shld (loadi64 addr:$dst), GR64:$src2, (i8 imm:$src3)), addr:$dst)], IIC_SHD64_MEM_IM>, TB; def SHRD64mri8 : RIi8<0xAC, MRMDestMem, - (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), + (outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3), "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, (i8 imm:$src3)), addr:$dst)], @@ -860,12 +860,12 @@ def ROT64L2R_imm8 : SDNodeXForm { let hasSideEffects = 0 in { - def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2), + def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, TAXD, VEX, Sched<[WriteShift]>; let mayLoad = 1 in def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst), - (ins x86memop:$src1, i8imm:$src2), + (ins x86memop:$src1, u8imm:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, TAXD, VEX, Sched<[WriteShiftLd]>; } diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 0350566f8b9b..85e17f516f91 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -44,7 +44,7 @@ def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", let SchedRW = [WriteSystem] in { -def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", +def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap", [(int_x86_int imm:$trap)], IIC_INT>; @@ -60,12 +60,6 @@ def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", [], IIC_SYS_ENTER_EXIT>, TB; def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit{q}", [], IIC_SYS_ENTER_EXIT>, TB, Requires<[In64BitMode]>; - -def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize16; -def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>, - OpSize32; -def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>, - Requires<[In64BitMode]>; } // SchedRW def : Pat<(debugtrap), @@ -88,13 +82,13 @@ def IN32rr : I<0xED, RawFrm, (outs), (ins), "in{l}\t{%dx, %eax|eax, dx}", [], IIC_IN_RR>, OpSize32; let Defs = [AL] in -def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port), +def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins u8imm:$port), "in{b}\t{$port, %al|al, $port}", [], IIC_IN_RI>; let Defs = [AX] in -def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port), +def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port), "in{w}\t{$port, %ax|ax, $port}", [], IIC_IN_RI>, OpSize16; let Defs = [EAX] in -def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port), +def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port), "in{l}\t{$port, %eax|eax, $port}", [], IIC_IN_RI>, OpSize32; let Uses = [DX, AL] in @@ -108,13 +102,13 @@ def OUT32rr : I<0xEF, RawFrm, (outs), (ins), "out{l}\t{%eax, %dx|dx, eax}", [], IIC_OUT_RR>, OpSize32; let Uses = [AL] in -def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port), +def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins u8imm:$port), "out{b}\t{%al, $port|$port, al}", [], IIC_OUT_IR>; let Uses = [AX] in -def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), +def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port), "out{w}\t{%ax, $port|$port, ax}", [], IIC_OUT_IR>, OpSize16; let Uses = [EAX] in -def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), +def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port), "out{l}\t{%eax, $port|$port, eax}", [], IIC_OUT_IR>, OpSize32; } // SchedRW @@ -478,39 +472,60 @@ def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB; //===----------------------------------------------------------------------===// // XSAVE instructions let SchedRW = [WriteSystem] in { +let Predicates = [HasXSAVE] in { let Defs = [EDX, EAX], Uses = [ECX] in def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB; let Uses = [EDX, EAX, ECX] in def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB; - -let Uses = [RDX, RAX] in { - def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins), - "xsave\t$dst", []>, TB; - def XSAVE64 : RI<0xAE, MRM4m, (outs opaque512mem:$dst), (ins), - "xsave64\t$dst", []>, TB, Requires<[In64BitMode]>; - def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), - "xrstor\t$dst", []>, TB; - def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), - "xrstor64\t$dst", []>, TB, Requires<[In64BitMode]>; - def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins), - "xsaveopt\t$dst", []>, PS; - def XSAVEOPT64 : RI<0xAE, MRM6m, (outs opaque512mem:$dst), (ins), - "xsaveopt64\t$dst", []>, PS, Requires<[In64BitMode]>; - - def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaque512mem:$dst), - "xrstors\t$dst", []>, TB; - def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaque512mem:$dst), - "xrstors64\t$dst", []>, TB, Requires<[In64BitMode]>; - def XSAVEC : I<0xC7, MRM4m, (outs opaque512mem:$dst), (ins), - "xsavec\t$dst", []>, TB; - def XSAVEC64 : RI<0xC7, MRM4m, (outs opaque512mem:$dst), (ins), - "xsavec64\t$dst", []>, TB, Requires<[In64BitMode]>; - def XSAVES : I<0xC7, MRM5m, (outs opaque512mem:$dst), (ins), - "xsaves\t$dst", []>, TB; - def XSAVES64 : RI<0xC7, MRM5m, (outs opaque512mem:$dst), (ins), - "xsaves64\t$dst", []>, TB, Requires<[In64BitMode]>; } + +let Uses = [EDX, EAX] in { +let Predicates = [HasXSAVE] in { + def XSAVE : I<0xAE, MRM4m, (outs), (ins opaque512mem:$dst), + "xsave\t$dst", + [(int_x86_xsave addr:$dst, EDX, EAX)]>, TB; + def XSAVE64 : RI<0xAE, MRM4m, (outs), (ins opaque512mem:$dst), + "xsave64\t$dst", + [(int_x86_xsave64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>; + def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), + "xrstor\t$dst", + [(int_x86_xrstor addr:$dst, EDX, EAX)]>, TB; + def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), + "xrstor64\t$dst", + [(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>; +} +let Predicates = [HasXSAVEOPT] in { + def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaque512mem:$dst), + "xsaveopt\t$dst", + [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, TB; + def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaque512mem:$dst), + "xsaveopt64\t$dst", + [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>; +} +let Predicates = [HasXSAVEC] in { + def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaque512mem:$dst), + "xsavec\t$dst", + [(int_x86_xsavec addr:$dst, EDX, EAX)]>, TB; + def XSAVEC64 : RI<0xC7, MRM4m, (outs), (ins opaque512mem:$dst), + "xsavec64\t$dst", + [(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>; +} +let Predicates = [HasXSAVES] in { + def XSAVES : I<0xC7, MRM5m, (outs), (ins opaque512mem:$dst), + "xsaves\t$dst", + [(int_x86_xsaves addr:$dst, EDX, EAX)]>, TB; + def XSAVES64 : RI<0xC7, MRM5m, (outs), (ins opaque512mem:$dst), + "xsaves64\t$dst", + [(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>; + def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaque512mem:$dst), + "xrstors\t$dst", + [(int_x86_xrstors addr:$dst, EDX, EAX)]>, TB; + def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaque512mem:$dst), + "xrstors64\t$dst", + [(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>; +} +} // Uses } // SchedRW //===----------------------------------------------------------------------===// @@ -534,6 +549,12 @@ let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in { } let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in def MONTMUL : I<0xa6, MRM_C0, (outs), (ins), "montmul", []>, TB; +//==-----------------------------------------------------------------------===// +// PKU - enable protection key +let Defs = [EAX, EDX], Uses = [ECX] in + def RDPKRU : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB; +let Uses = [EAX, ECX, EDX] in + def WRPKRU : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB; //===----------------------------------------------------------------------===// // FS/GS Base Instructions diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td index 8455b8d8467c..4cb2304e464d 100644 --- a/lib/Target/X86/X86InstrXOP.td +++ b/lib/Target/X86/X86InstrXOP.td @@ -83,57 +83,64 @@ let ExeDomain = SSEPackedDouble in { defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64>; } -multiclass xop3op opc, string OpcodeStr, Intrinsic Int> { +multiclass xop3op opc, string OpcodeStr, SDNode OpNode, + ValueType vt128> { def rr : IXOP, XOP_4VOp3; + [(set VR128:$dst, + (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2))))]>, + XOP_4VOp3, Sched<[WriteVarVecShift]>; def rm : IXOP, - XOP_4V, VEX_W; + (vt128 (OpNode (vt128 VR128:$src1), + (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>, + XOP_4V, VEX_W, Sched<[WriteVarVecShift, ReadAfterLd]>; def mr : IXOP, - XOP_4VOp3; + (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))), + (vt128 VR128:$src2))))]>, + XOP_4VOp3, Sched<[WriteVarVecShift, ReadAfterLd]>; } let ExeDomain = SSEPackedInt in { - defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>; - defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>; - defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>; - defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>; - defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>; - defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>; - defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>; - defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>; - defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>; - defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>; - defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>; - defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>; + defm VPROTB : xop3op<0x90, "vprotb", X86vprot, v16i8>; + defm VPROTD : xop3op<0x92, "vprotd", X86vprot, v4i32>; + defm VPROTQ : xop3op<0x93, "vprotq", X86vprot, v2i64>; + defm VPROTW : xop3op<0x91, "vprotw", X86vprot, v8i16>; + defm VPSHAB : xop3op<0x98, "vpshab", X86vpsha, v16i8>; + defm VPSHAD : xop3op<0x9A, "vpshad", X86vpsha, v4i32>; + defm VPSHAQ : xop3op<0x9B, "vpshaq", X86vpsha, v2i64>; + defm VPSHAW : xop3op<0x99, "vpshaw", X86vpsha, v8i16>; + defm VPSHLB : xop3op<0x94, "vpshlb", X86vpshl, v16i8>; + defm VPSHLD : xop3op<0x96, "vpshld", X86vpshl, v4i32>; + defm VPSHLQ : xop3op<0x97, "vpshlq", X86vpshl, v2i64>; + defm VPSHLW : xop3op<0x95, "vpshlw", X86vpshl, v8i16>; } -multiclass xop3opimm opc, string OpcodeStr, Intrinsic Int> { +multiclass xop3opimm opc, string OpcodeStr, SDNode OpNode, + ValueType vt128> { def ri : IXOPi8, XOP; - def mi : IXOPi8, XOP; + (vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>, XOP; + def mi : IXOPi8, XOP; } let ExeDomain = SSEPackedInt in { - defm VPROTW : xop3opimm<0xC1, "vprotw", int_x86_xop_vprotwi>; - defm VPROTQ : xop3opimm<0xC3, "vprotq", int_x86_xop_vprotqi>; - defm VPROTD : xop3opimm<0xC2, "vprotd", int_x86_xop_vprotdi>; - defm VPROTB : xop3opimm<0xC0, "vprotb", int_x86_xop_vprotbi>; + defm VPROTB : xop3opimm<0xC0, "vprotb", X86vproti, v16i8>; + defm VPROTD : xop3opimm<0xC2, "vprotd", X86vproti, v4i32>; + defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vproti, v2i64>; + defm VPROTW : xop3opimm<0xC1, "vprotw", X86vproti, v8i16>; } // Instruction where second source can be memory, but third must be register @@ -170,30 +177,34 @@ let ExeDomain = SSEPackedInt in { } // Instruction where second source can be memory, third must be imm8 -multiclass xopvpcom opc, string Suffix, Intrinsic Int> { +multiclass xopvpcom opc, string Suffix, SDNode OpNode, ValueType vt128> { let isCommutable = 1 in def ri : IXOPi8, + [(set VR128:$dst, + (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), + i8immZExt3:$cc)))]>, XOP_4V; def mi : IXOPi8, XOP_4V; + (vt128 (OpNode (vt128 VR128:$src1), + (vt128 (bitconvert (loadv2i64 addr:$src2))), + i8immZExt3:$cc)))]>, + XOP_4V; let isAsmParserOnly = 1, hasSideEffects = 0 in { def ri_alt : IXOPi8, XOP_4V; let mayLoad = 1 in def mi_alt : IXOPi8, XOP_4V; @@ -201,14 +212,14 @@ multiclass xopvpcom opc, string Suffix, Intrinsic Int> { } let ExeDomain = SSEPackedInt in { // SSE integer instructions - defm VPCOMB : xopvpcom<0xCC, "b", int_x86_xop_vpcomb>; - defm VPCOMW : xopvpcom<0xCD, "w", int_x86_xop_vpcomw>; - defm VPCOMD : xopvpcom<0xCE, "d", int_x86_xop_vpcomd>; - defm VPCOMQ : xopvpcom<0xCF, "q", int_x86_xop_vpcomq>; - defm VPCOMUB : xopvpcom<0xEC, "ub", int_x86_xop_vpcomub>; - defm VPCOMUW : xopvpcom<0xED, "uw", int_x86_xop_vpcomuw>; - defm VPCOMUD : xopvpcom<0xEE, "ud", int_x86_xop_vpcomud>; - defm VPCOMUQ : xopvpcom<0xEF, "uq", int_x86_xop_vpcomuq>; + defm VPCOMB : xopvpcom<0xCC, "b", X86vpcom, v16i8>; + defm VPCOMW : xopvpcom<0xCD, "w", X86vpcom, v8i16>; + defm VPCOMD : xopvpcom<0xCE, "d", X86vpcom, v4i32>; + defm VPCOMQ : xopvpcom<0xCF, "q", X86vpcom, v2i64>; + defm VPCOMUB : xopvpcom<0xEC, "ub", X86vpcomu, v16i8>; + defm VPCOMUW : xopvpcom<0xED, "uw", X86vpcomu, v8i16>; + defm VPCOMUD : xopvpcom<0xEE, "ud", X86vpcomu, v4i32>; + defm VPCOMUQ : xopvpcom<0xEF, "uq", X86vpcomu, v2i64>; } // Instruction where either second or third source can be memory @@ -270,42 +281,52 @@ multiclass xop4op256 opc, string OpcodeStr, Intrinsic Int> { let ExeDomain = SSEPackedInt in defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>; +let Predicates = [HasXOP] in { + def : Pat<(v2i64 (or (and VR128:$src3, VR128:$src1), + (X86andnp VR128:$src3, VR128:$src2))), + (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>; + + def : Pat<(v4i64 (or (and VR256:$src3, VR256:$src1), + (X86andnp VR256:$src3, VR256:$src2))), + (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>; +} + multiclass xop5op opc, string OpcodeStr, Intrinsic Int128, Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> { def rr : IXOP5; def rm : IXOP5, VEX_W, MemOp4; def mr : IXOP5; def rrY : IXOP5, VEX_L; def rmY : IXOP5, VEX_W, MemOp4, VEX_L; def mrY : IXOP5 +*/ +static std::tuple TranslateX86ConstCondToX86CC(SDValue &imm) { + ConstantSDNode *CImm = dyn_cast(imm); + unsigned IntImm = CImm->getZExtValue(); + // On a floating point condition, the flags are set as follows: + // ZF PF CF op + // 0 | 0 | 0 | X > Y + // 0 | 0 | 1 | X < Y + // 1 | 0 | 0 | X == Y + // 1 | 1 | 1 | unordered + switch (IntImm) { + default: llvm_unreachable("Invalid floating point compare value for Comi!"); + case _X86_CMP_EQ_OQ: // 0x00 - Equal (ordered, nonsignaling) + case _X86_CMP_EQ_OS: // 0x10 - Equal (ordered, signaling) + return std::make_tuple(true, X86::COND_E); + case _X86_CMP_EQ_UQ: // 0x08 - Equal (unordered, non-signaling) + case _X86_CMP_EQ_US: // 0x18 - Equal (unordered, signaling) + return std::make_tuple(false , X86::COND_E); + case _X86_CMP_LT_OS: // 0x01 - Less-than (ordered, signaling) + case _X86_CMP_LT_OQ: // 0x11 - Less-than (ordered, nonsignaling) + return std::make_tuple(true, X86::COND_B); + case _X86_CMP_NGE_US: // 0x09 - Not-greater-than-or-equal (unordered, signaling) + case _X86_CMP_NGE_UQ: // 0x19 - Not-greater-than-or-equal (unordered, nonsignaling) + return std::make_tuple(false , X86::COND_B); + case _X86_CMP_LE_OS: // 0x02 - Less-than-or-equal (ordered, signaling) + case _X86_CMP_LE_OQ: // 0x12 - Less-than-or-equal (ordered, nonsignaling) + return std::make_tuple(true, X86::COND_BE); + case _X86_CMP_NGT_US: // 0x0A - Not-greater-than (unordered, signaling) + case _X86_CMP_NGT_UQ: // 0x1A - Not-greater-than (unordered, nonsignaling) + return std::make_tuple(false, X86::COND_BE); + case _X86_CMP_GT_OS: // 0x0E - Greater-than (ordered, signaling) + case _X86_CMP_GT_OQ: // 0x1E - Greater-than (ordered, nonsignaling) + return std::make_tuple(true, X86::COND_A); + case _X86_CMP_NLE_US: // 0x06 - Not-less-than-or-equal (unordered,signaling) + case _X86_CMP_NLE_UQ: // 0x16 - Not-less-than-or-equal (unordered, nonsignaling) + return std::make_tuple(false, X86::COND_A); + case _X86_CMP_GE_OS: // 0x0D - Greater-than-or-equal (ordered, signaling) + case _X86_CMP_GE_OQ: // 0x1D - Greater-than-or-equal (ordered, nonsignaling) + return std::make_tuple(true, X86::COND_AE); + case _X86_CMP_NLT_US: // 0x05 - Not-less-than (unordered, signaling) + case _X86_CMP_NLT_UQ: // 0x15 - Not-less-than (unordered, nonsignaling) + return std::make_tuple(false, X86::COND_AE); + case _X86_CMP_NEQ_OQ: // 0x0C - Not-equal (ordered, non-signaling) + case _X86_CMP_NEQ_OS: // 0x1C - Not-equal (ordered, signaling) + return std::make_tuple(true, X86::COND_NE); + case _X86_CMP_NEQ_UQ: // 0x04 - Not-equal (unordered, nonsignaling) + case _X86_CMP_NEQ_US: // 0x14 - Not-equal (unordered, signaling) + return std::make_tuple(false, X86::COND_NE); + } } } // End llvm namespace diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 3415cedc6fea..e186f7039b43 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -92,7 +92,6 @@ namespace llvm { SmallVector Fixups; raw_svector_ostream VecOS(Code); CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI); - VecOS.flush(); CurrentShadowSize += Code.size(); if (CurrentShadowSize >= RequiredShadowSize) InShadow = false; // The shadow is big enough. Stop counting. @@ -128,7 +127,7 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { /// operand to an MCSymbol. MCSymbol *X86MCInstLower:: GetSymbolFromOperand(const MachineOperand &MO) const { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout &DL = MF.getDataLayout(); assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference"); MCSymbol *Sym = nullptr; @@ -151,7 +150,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { } if (!Suffix.empty()) - Name += DL->getPrivateGlobalPrefix(); + Name += DL.getPrivateGlobalPrefix(); unsigned PrefixLen = Name.size(); @@ -159,7 +158,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { const GlobalValue *GV = MO.getGlobal(); AsmPrinter.getNameWithPrefix(Name, GV); } else if (MO.isSymbol()) { - Mangler::getNameWithPrefix(Name, MO.getSymbolName(), *DL); + Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL); } else if (MO.isMBB()) { assert(Suffix.empty()); Sym = MO.getMBB()->getSymbol(); @@ -461,6 +460,7 @@ ReSimplify: // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B // if one of the registers is extended, but other isn't. + case X86::VMOVZPQILo2PQIrr: case X86::VMOVAPDrr: case X86::VMOVAPDYrr: case X86::VMOVAPSrr: @@ -478,18 +478,19 @@ ReSimplify: unsigned NewOpc; switch (OutMI.getOpcode()) { default: llvm_unreachable("Invalid opcode"); - case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; - case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; - case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; - case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; - case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; - case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; - case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; - case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; - case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; - case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; - case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; - case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; + case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break; + case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; + case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; + case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; + case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; + case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; + case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; + case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; + case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; + case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; + case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; + case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; + case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; } OutMI.setOpcode(NewOpc); } @@ -532,6 +533,23 @@ ReSimplify: break; } + case X86::CLEANUPRET: { + // Replace CATCHRET with the appropriate RET. + OutMI = MCInst(); + OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget())); + break; + } + + case X86::CATCHRET: { + // Replace CATCHRET with the appropriate RET. + const X86Subtarget &Subtarget = AsmPrinter.getSubtarget(); + unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; + OutMI = MCInst(); + OutMI.setOpcode(getRetOpcode(Subtarget)); + OutMI.addOperand(MCOperand::createReg(ReturnReg)); + break; + } + // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions. case X86::TAILJMPr: case X86::TAILJMPd: @@ -598,17 +616,29 @@ ReSimplify: case X86::RELEASE_MOV32mi: OutMI.setOpcode(X86::MOV32mi); goto ReSimplify; case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify; case X86::RELEASE_ADD8mi: OutMI.setOpcode(X86::ADD8mi); goto ReSimplify; + case X86::RELEASE_ADD8mr: OutMI.setOpcode(X86::ADD8mr); goto ReSimplify; case X86::RELEASE_ADD32mi: OutMI.setOpcode(X86::ADD32mi); goto ReSimplify; + case X86::RELEASE_ADD32mr: OutMI.setOpcode(X86::ADD32mr); goto ReSimplify; case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify; + case X86::RELEASE_ADD64mr: OutMI.setOpcode(X86::ADD64mr); goto ReSimplify; case X86::RELEASE_AND8mi: OutMI.setOpcode(X86::AND8mi); goto ReSimplify; + case X86::RELEASE_AND8mr: OutMI.setOpcode(X86::AND8mr); goto ReSimplify; case X86::RELEASE_AND32mi: OutMI.setOpcode(X86::AND32mi); goto ReSimplify; + case X86::RELEASE_AND32mr: OutMI.setOpcode(X86::AND32mr); goto ReSimplify; case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify; + case X86::RELEASE_AND64mr: OutMI.setOpcode(X86::AND64mr); goto ReSimplify; case X86::RELEASE_OR8mi: OutMI.setOpcode(X86::OR8mi); goto ReSimplify; + case X86::RELEASE_OR8mr: OutMI.setOpcode(X86::OR8mr); goto ReSimplify; case X86::RELEASE_OR32mi: OutMI.setOpcode(X86::OR32mi); goto ReSimplify; + case X86::RELEASE_OR32mr: OutMI.setOpcode(X86::OR32mr); goto ReSimplify; case X86::RELEASE_OR64mi32: OutMI.setOpcode(X86::OR64mi32); goto ReSimplify; + case X86::RELEASE_OR64mr: OutMI.setOpcode(X86::OR64mr); goto ReSimplify; case X86::RELEASE_XOR8mi: OutMI.setOpcode(X86::XOR8mi); goto ReSimplify; + case X86::RELEASE_XOR8mr: OutMI.setOpcode(X86::XOR8mr); goto ReSimplify; case X86::RELEASE_XOR32mi: OutMI.setOpcode(X86::XOR32mi); goto ReSimplify; + case X86::RELEASE_XOR32mr: OutMI.setOpcode(X86::XOR32mr); goto ReSimplify; case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify; + case X86::RELEASE_XOR64mr: OutMI.setOpcode(X86::XOR64mr); goto ReSimplify; case X86::RELEASE_INC8m: OutMI.setOpcode(X86::INC8m); goto ReSimplify; case X86::RELEASE_INC16m: OutMI.setOpcode(X86::INC16m); goto ReSimplify; case X86::RELEASE_INC32m: OutMI.setOpcode(X86::INC32m); goto ReSimplify; @@ -875,7 +905,10 @@ void X86AsmPrinter::LowerFAULTING_LOAD_OP(const MachineInstr &MI, MCInst LoadMI; LoadMI.setOpcode(LoadOpcode); - LoadMI.addOperand(MCOperand::createReg(LoadDefRegister)); + + if (LoadDefRegister != X86::NoRegister) + LoadMI.addOperand(MCOperand::createReg(LoadDefRegister)); + for (auto I = MI.operands_begin() + LoadOperandsBeginIdx, E = MI.operands_end(); I != E; ++I) @@ -1062,6 +1095,18 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86ATTInstPrinter::getRegisterName(Reg)); break; } + case X86::CLEANUPRET: { + // Lower these as normal, but add some comments. + OutStreamer->AddComment("CLEANUPRET"); + break; + } + + case X86::CATCHRET: { + // Lower these as normal, but add some comments. + OutStreamer->AddComment("CATCHRET"); + break; + } + case X86::TAILJMPr: case X86::TAILJMPm: case X86::TAILJMPd: @@ -1095,12 +1140,30 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitAndCountInstruction(MCInstBuilder(X86::CALLpcrel32) .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); + const X86FrameLowering* FrameLowering = + MF->getSubtarget().getFrameLowering(); + bool hasFP = FrameLowering->hasFP(*MF); + + // TODO: This is needed only if we require precise CFA. + bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && + !OutStreamer->getDwarfFrameInfos().back().End; + + int stackGrowth = -RI->getSlotSize(); + + if (HasActiveDwarfFrame && !hasFP) { + OutStreamer->EmitCFIAdjustCfaOffset(-stackGrowth); + } + // Emit the label. OutStreamer->EmitLabel(PICBase); // popl $reg EmitAndCountInstruction(MCInstBuilder(X86::POP32r) .addReg(MI->getOperand(0).getReg())); + + if (HasActiveDwarfFrame && !hasFP) { + OutStreamer->EmitCFIAdjustCfaOffset(stackGrowth); + } return; } @@ -1206,19 +1269,48 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } - // Lower PSHUFB and VPERMILP normally but add a comment if we can find - // a constant shuffle mask. We won't be able to do this at the MC layer - // because the mask isn't an immediate. + // Lower PSHUFB and VPERMILP normally but add a comment if we can find + // a constant shuffle mask. We won't be able to do this at the MC layer + // because the mask isn't an immediate. case X86::PSHUFBrm: case X86::VPSHUFBrm: - case X86::VPSHUFBYrm: { + case X86::VPSHUFBYrm: + case X86::VPSHUFBZ128rm: + case X86::VPSHUFBZ128rmk: + case X86::VPSHUFBZ128rmkz: + case X86::VPSHUFBZ256rm: + case X86::VPSHUFBZ256rmk: + case X86::VPSHUFBZ256rmkz: + case X86::VPSHUFBZrm: + case X86::VPSHUFBZrmk: + case X86::VPSHUFBZrmkz: { if (!OutStreamer->isVerboseAsm()) break; - assert(MI->getNumOperands() > 5 && - "We should always have at least 5 operands!"); + unsigned SrcIdx, MaskIdx; + switch (MI->getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::PSHUFBrm: + case X86::VPSHUFBrm: + case X86::VPSHUFBYrm: + case X86::VPSHUFBZ128rm: + case X86::VPSHUFBZ256rm: + case X86::VPSHUFBZrm: + SrcIdx = 1; MaskIdx = 5; break; + case X86::VPSHUFBZ128rmkz: + case X86::VPSHUFBZ256rmkz: + case X86::VPSHUFBZrmkz: + SrcIdx = 2; MaskIdx = 6; break; + case X86::VPSHUFBZ128rmk: + case X86::VPSHUFBZ256rmk: + case X86::VPSHUFBZrmk: + SrcIdx = 3; MaskIdx = 7; break; + } + + assert(MI->getNumOperands() >= 6 && + "We should always have at least 6 operands!"); const MachineOperand &DstOp = MI->getOperand(0); - const MachineOperand &SrcOp = MI->getOperand(1); - const MachineOperand &MaskOp = MI->getOperand(5); + const MachineOperand &SrcOp = MI->getOperand(SrcIdx); + const MachineOperand &MaskOp = MI->getOperand(MaskIdx); if (auto *C = getConstantFromPool(*MI, MaskOp)) { SmallVector Mask; @@ -1240,35 +1332,53 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { const MachineOperand &SrcOp = MI->getOperand(1); const MachineOperand &MaskOp = MI->getOperand(5); + unsigned ElSize; + switch (MI->getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VPERMILPSrm: case X86::VPERMILPSYrm: ElSize = 32; break; + case X86::VPERMILPDrm: case X86::VPERMILPDYrm: ElSize = 64; break; + } + if (auto *C = getConstantFromPool(*MI, MaskOp)) { SmallVector Mask; - DecodeVPERMILPMask(C, Mask); + DecodeVPERMILPMask(C, ElSize, Mask); if (!Mask.empty()) OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask)); } break; } - // For loads from a constant pool to a vector register, print the constant - // loaded. - case X86::MOVAPDrm: - case X86::VMOVAPDrm: - case X86::VMOVAPDYrm: - case X86::MOVUPDrm: - case X86::VMOVUPDrm: - case X86::VMOVUPDYrm: - case X86::MOVAPSrm: - case X86::VMOVAPSrm: - case X86::VMOVAPSYrm: - case X86::MOVUPSrm: - case X86::VMOVUPSrm: - case X86::VMOVUPSYrm: - case X86::MOVDQArm: - case X86::VMOVDQArm: - case X86::VMOVDQAYrm: - case X86::MOVDQUrm: - case X86::VMOVDQUrm: - case X86::VMOVDQUYrm: +#define MOV_CASE(Prefix, Suffix) \ + case X86::Prefix##MOVAPD##Suffix##rm: \ + case X86::Prefix##MOVAPS##Suffix##rm: \ + case X86::Prefix##MOVUPD##Suffix##rm: \ + case X86::Prefix##MOVUPS##Suffix##rm: \ + case X86::Prefix##MOVDQA##Suffix##rm: \ + case X86::Prefix##MOVDQU##Suffix##rm: + +#define MOV_AVX512_CASE(Suffix) \ + case X86::VMOVDQA64##Suffix##rm: \ + case X86::VMOVDQA32##Suffix##rm: \ + case X86::VMOVDQU64##Suffix##rm: \ + case X86::VMOVDQU32##Suffix##rm: \ + case X86::VMOVDQU16##Suffix##rm: \ + case X86::VMOVDQU8##Suffix##rm: \ + case X86::VMOVAPS##Suffix##rm: \ + case X86::VMOVAPD##Suffix##rm: \ + case X86::VMOVUPS##Suffix##rm: \ + case X86::VMOVUPD##Suffix##rm: + +#define CASE_ALL_MOV_RM() \ + MOV_CASE(, ) /* SSE */ \ + MOV_CASE(V, ) /* AVX-128 */ \ + MOV_CASE(V, Y) /* AVX-256 */ \ + MOV_AVX512_CASE(Z) \ + MOV_AVX512_CASE(Z256) \ + MOV_AVX512_CASE(Z128) + + // For loads from a constant pool to a vector register, print the constant + // loaded. + CASE_ALL_MOV_RM() if (!OutStreamer->isVerboseAsm()) break; if (MI->getNumOperands() > 4) @@ -1302,7 +1412,19 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { if (isa(COp)) { CS << "u"; } else if (auto *CI = dyn_cast(COp)) { - CS << CI->getZExtValue(); + if (CI->getBitWidth() <= 64) { + CS << CI->getZExtValue(); + } else { + // print multi-word constant as (w0,w1) + auto Val = CI->getValue(); + CS << "("; + for (int i = 0, N = Val.getNumWords(); i < N; ++i) { + if (i > 0) + CS << ","; + CS << Val.getRawData()[i]; + } + CS << ")"; + } } else if (auto *CF = dyn_cast(COp)) { SmallString<32> Str; CF->getValueAPF().toString(Str); diff --git a/lib/Target/X86/X86MachineFunctionInfo.cpp b/lib/Target/X86/X86MachineFunctionInfo.cpp index ac2cdc8c6567..c9e636f1eb00 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.cpp +++ b/lib/Target/X86/X86MachineFunctionInfo.cpp @@ -1,4 +1,4 @@ -//===-- X86MachineFuctionInfo.cpp - X86 machine function info -------------===// +//===-- X86MachineFunctionInfo.cpp - X86 machine function info ------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index e6db9708b677..3a7a98db50f4 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -1,4 +1,4 @@ -//===-- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===// +//===-- X86MachineFunctionInfo.h - X86 machine function info ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -84,8 +84,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// of pushes to pass function parameters. bool HasPushSequences = false; - /// True if the function uses llvm.x86.seh.restoreframe, and it needed a spill - /// slot for the frame pointer. + /// True if the function recovers from an SEH exception, and therefore needs + /// to spill and restore the frame pointer. bool HasSEHFramePtrSave = false; /// The frame index of a stack object containing the original frame pointer @@ -100,7 +100,7 @@ private: public: X86MachineFunctionInfo() = default; - explicit X86MachineFunctionInfo(MachineFunction &MF) {}; + explicit X86MachineFunctionInfo(MachineFunction &MF) {} bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp new file mode 100644 index 000000000000..58020d909a43 --- /dev/null +++ b/lib/Target/X86/X86OptimizeLEAs.cpp @@ -0,0 +1,326 @@ +//===-- X86OptimizeLEAs.cpp - optimize usage of LEA instructions ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the pass that performs some optimizations with LEA +// instructions in order to improve code size. +// Currently, it does one thing: +// 1) Address calculations in load and store instructions are replaced by +// existing LEA def registers where possible. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86-optimize-LEAs" + +static cl::opt EnableX86LEAOpt("enable-x86-lea-opt", cl::Hidden, + cl::desc("X86: Enable LEA optimizations."), + cl::init(false)); + +STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions"); + +namespace { +class OptimizeLEAPass : public MachineFunctionPass { +public: + OptimizeLEAPass() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { return "X86 LEA Optimize"; } + + /// \brief Loop over all of the basic blocks, replacing address + /// calculations in load and store instructions, if it's already + /// been calculated by LEA. Also, remove redundant LEAs. + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + /// \brief Returns a distance between two instructions inside one basic block. + /// Negative result means, that instructions occur in reverse order. + int calcInstrDist(const MachineInstr &First, const MachineInstr &Last); + + /// \brief Choose the best \p LEA instruction from the \p List to replace + /// address calculation in \p MI instruction. Return the address displacement + /// and the distance between \p MI and the choosen \p LEA in \p AddrDispShift + /// and \p Dist. + bool chooseBestLEA(const SmallVectorImpl &List, + const MachineInstr &MI, MachineInstr *&LEA, + int64_t &AddrDispShift, int &Dist); + + /// \brief Returns true if two machine operand are identical and they are not + /// physical registers. + bool isIdenticalOp(const MachineOperand &MO1, const MachineOperand &MO2); + + /// \brief Returns true if the instruction is LEA. + bool isLEA(const MachineInstr &MI); + + /// \brief Returns true if two instructions have memory operands that only + /// differ by displacement. The numbers of the first memory operands for both + /// instructions are specified through \p N1 and \p N2. The address + /// displacement is returned through AddrDispShift. + bool isSimilarMemOp(const MachineInstr &MI1, unsigned N1, + const MachineInstr &MI2, unsigned N2, + int64_t &AddrDispShift); + + /// \brief Find all LEA instructions in the basic block. + void findLEAs(const MachineBasicBlock &MBB, + SmallVectorImpl &List); + + /// \brief Removes redundant address calculations. + bool removeRedundantAddrCalc(const SmallVectorImpl &List); + + MachineRegisterInfo *MRI; + const X86InstrInfo *TII; + const X86RegisterInfo *TRI; + + static char ID; +}; +char OptimizeLEAPass::ID = 0; +} + +FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); } + +int OptimizeLEAPass::calcInstrDist(const MachineInstr &First, + const MachineInstr &Last) { + const MachineBasicBlock *MBB = First.getParent(); + + // Both instructions must be in the same basic block. + assert(Last.getParent() == MBB && + "Instructions are in different basic blocks"); + + return std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&Last)) - + std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&First)); +} + +// Find the best LEA instruction in the List to replace address recalculation in +// MI. Such LEA must meet these requirements: +// 1) The address calculated by the LEA differs only by the displacement from +// the address used in MI. +// 2) The register class of the definition of the LEA is compatible with the +// register class of the address base register of MI. +// 3) Displacement of the new memory operand should fit in 1 byte if possible. +// 4) The LEA should be as close to MI as possible, and prior to it if +// possible. +bool OptimizeLEAPass::chooseBestLEA(const SmallVectorImpl &List, + const MachineInstr &MI, MachineInstr *&LEA, + int64_t &AddrDispShift, int &Dist) { + const MachineFunction *MF = MI.getParent()->getParent(); + const MCInstrDesc &Desc = MI.getDesc(); + int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()) + + X86II::getOperandBias(Desc); + + LEA = nullptr; + + // Loop over all LEA instructions. + for (auto DefMI : List) { + int64_t AddrDispShiftTemp = 0; + + // Compare instructions memory operands. + if (!isSimilarMemOp(MI, MemOpNo, *DefMI, 1, AddrDispShiftTemp)) + continue; + + // Make sure address displacement fits 4 bytes. + if (!isInt<32>(AddrDispShiftTemp)) + continue; + + // Check that LEA def register can be used as MI address base. Some + // instructions can use a limited set of registers as address base, for + // example MOV8mr_NOREX. We could constrain the register class of the LEA + // def to suit MI, however since this case is very rare and hard to + // reproduce in a test it's just more reliable to skip the LEA. + if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg, TRI, *MF) != + MRI->getRegClass(DefMI->getOperand(0).getReg())) + continue; + + // Choose the closest LEA instruction from the list, prior to MI if + // possible. Note that we took into account resulting address displacement + // as well. Also note that the list is sorted by the order in which the LEAs + // occur, so the break condition is pretty simple. + int DistTemp = calcInstrDist(*DefMI, MI); + assert(DistTemp != 0 && + "The distance between two different instructions cannot be zero"); + if (DistTemp > 0 || LEA == nullptr) { + // Do not update return LEA, if the current one provides a displacement + // which fits in 1 byte, while the new candidate does not. + if (LEA != nullptr && !isInt<8>(AddrDispShiftTemp) && + isInt<8>(AddrDispShift)) + continue; + + LEA = DefMI; + AddrDispShift = AddrDispShiftTemp; + Dist = DistTemp; + } + + // FIXME: Maybe we should not always stop at the first LEA after MI. + if (DistTemp < 0) + break; + } + + return LEA != nullptr; +} + +bool OptimizeLEAPass::isIdenticalOp(const MachineOperand &MO1, + const MachineOperand &MO2) { + return MO1.isIdenticalTo(MO2) && + (!MO1.isReg() || + !TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); +} + +bool OptimizeLEAPass::isLEA(const MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); + return Opcode == X86::LEA16r || Opcode == X86::LEA32r || + Opcode == X86::LEA64r || Opcode == X86::LEA64_32r; +} + +// Check if MI1 and MI2 have memory operands which represent addresses that +// differ only by displacement. +bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1, + const MachineInstr &MI2, unsigned N2, + int64_t &AddrDispShift) { + // Address base, scale, index and segment operands must be identical. + static const int IdenticalOpNums[] = {X86::AddrBaseReg, X86::AddrScaleAmt, + X86::AddrIndexReg, X86::AddrSegmentReg}; + for (auto &N : IdenticalOpNums) + if (!isIdenticalOp(MI1.getOperand(N1 + N), MI2.getOperand(N2 + N))) + return false; + + // Address displacement operands may differ by a constant. + const MachineOperand *Op1 = &MI1.getOperand(N1 + X86::AddrDisp); + const MachineOperand *Op2 = &MI2.getOperand(N2 + X86::AddrDisp); + if (!isIdenticalOp(*Op1, *Op2)) { + if (Op1->isImm() && Op2->isImm()) + AddrDispShift = Op1->getImm() - Op2->getImm(); + else if (Op1->isGlobal() && Op2->isGlobal() && + Op1->getGlobal() == Op2->getGlobal()) + AddrDispShift = Op1->getOffset() - Op2->getOffset(); + else + return false; + } + + return true; +} + +void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, + SmallVectorImpl &List) { + for (auto &MI : MBB) { + if (isLEA(MI)) + List.push_back(const_cast(&MI)); + } +} + +// Try to find load and store instructions which recalculate addresses already +// calculated by some LEA and replace their memory operands with its def +// register. +bool OptimizeLEAPass::removeRedundantAddrCalc( + const SmallVectorImpl &List) { + bool Changed = false; + + assert(List.size() > 0); + MachineBasicBlock *MBB = List[0]->getParent(); + + // Process all instructions in basic block. + for (auto I = MBB->begin(), E = MBB->end(); I != E;) { + MachineInstr &MI = *I++; + unsigned Opcode = MI.getOpcode(); + + // Instruction must be load or store. + if (!MI.mayLoadOrStore()) + continue; + + // Get the number of the first memory operand. + const MCInstrDesc &Desc = MI.getDesc(); + int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, Opcode); + + // If instruction has no memory operand - skip it. + if (MemOpNo < 0) + continue; + + MemOpNo += X86II::getOperandBias(Desc); + + // Get the best LEA instruction to replace address calculation. + MachineInstr *DefMI; + int64_t AddrDispShift; + int Dist; + if (!chooseBestLEA(List, MI, DefMI, AddrDispShift, Dist)) + continue; + + // If LEA occurs before current instruction, we can freely replace + // the instruction. If LEA occurs after, we can lift LEA above the + // instruction and this way to be able to replace it. Since LEA and the + // instruction have similar memory operands (thus, the same def + // instructions for these operands), we can always do that, without + // worries of using registers before their defs. + if (Dist < 0) { + DefMI->removeFromParent(); + MBB->insert(MachineBasicBlock::iterator(&MI), DefMI); + } + + // Since we can possibly extend register lifetime, clear kill flags. + MRI->clearKillFlags(DefMI->getOperand(0).getReg()); + + ++NumSubstLEAs; + DEBUG(dbgs() << "OptimizeLEAs: Candidate to replace: "; MI.dump();); + + // Change instruction operands. + MI.getOperand(MemOpNo + X86::AddrBaseReg) + .ChangeToRegister(DefMI->getOperand(0).getReg(), false); + MI.getOperand(MemOpNo + X86::AddrScaleAmt).ChangeToImmediate(1); + MI.getOperand(MemOpNo + X86::AddrIndexReg) + .ChangeToRegister(X86::NoRegister, false); + MI.getOperand(MemOpNo + X86::AddrDisp).ChangeToImmediate(AddrDispShift); + MI.getOperand(MemOpNo + X86::AddrSegmentReg) + .ChangeToRegister(X86::NoRegister, false); + + DEBUG(dbgs() << "OptimizeLEAs: Replaced by: "; MI.dump();); + + Changed = true; + } + + return Changed; +} + +bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + + // Perform this optimization only if we care about code size. + if (!EnableX86LEAOpt || !MF.getFunction()->optForSize()) + return false; + + MRI = &MF.getRegInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + + // Process all basic blocks. + for (auto &MBB : MF) { + SmallVector LEAs; + + // Find all LEA instructions in basic block. + findLEAs(MBB, LEAs); + + // If current basic block has no LEAs, move on to the next one. + if (LEAs.empty()) + continue; + + // Remove redundant address calculations. + Changed |= removeRedundantAddrCalc(LEAs); + } + + return Changed; +} diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp index 143e70bda9e7..0f425e28fa7d 100644 --- a/lib/Target/X86/X86PadShortFunction.cpp +++ b/lib/Target/X86/X86PadShortFunction.cpp @@ -93,8 +93,7 @@ FunctionPass *llvm::createX86PadShortFunctions() { /// runOnMachineFunction - Loop over all of the basic blocks, inserting /// NOOP instructions before early exits. bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { - if (MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) || - MF.getFunction()->hasFnAttribute(Attribute::MinSize)) { + if (MF.getFunction()->optForSize()) { return false; } @@ -107,7 +106,7 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { // Search through basic blocks and mark the ones that have early returns ReturnBBs.clear(); VisitedBBs.clear(); - findReturns(MF.begin()); + findReturns(&MF.front()); bool MadeChange = false; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index d8495e53e0e3..58404433e1ae 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -27,7 +27,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" @@ -44,12 +43,6 @@ using namespace llvm; #define GET_REGINFO_TARGET_DESC #include "X86GenRegisterInfo.inc" -cl::opt -ForceStackAlign("force-align-stack", - cl::desc("Force align the stack to the minimum alignment" - " needed for the function."), - cl::init(false), cl::Hidden); - static cl::opt EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), cl::desc("Enable use of a base pointer for complex stack frames")); @@ -174,20 +167,33 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, if (Subtarget.isTarget64BitLP64()) return &X86::GR64_NOSPRegClass; return &X86::GR32_NOSPRegClass; - case 2: // Available for tailcall (not callee-saved GPRs). - const Function *F = MF.getFunction(); - if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64)) - return &X86::GR64_TCW64RegClass; - else if (Is64Bit) - return &X86::GR64_TCRegClass; - - bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false); - if (hasHipeCC) - return &X86::GR32RegClass; - return &X86::GR32_TCRegClass; + case 2: // NOREX GPRs. + if (Subtarget.isTarget64BitLP64()) + return &X86::GR64_NOREXRegClass; + return &X86::GR32_NOREXRegClass; + case 3: // NOREX GPRs except the stack pointer (for encoding reasons). + if (Subtarget.isTarget64BitLP64()) + return &X86::GR64_NOREX_NOSPRegClass; + return &X86::GR32_NOREX_NOSPRegClass; + case 4: // Available for tailcall (not callee-saved GPRs). + return getGPRsForTailCall(MF); } } +const TargetRegisterClass * +X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const { + const Function *F = MF.getFunction(); + if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64)) + return &X86::GR64_TCW64RegClass; + else if (Is64Bit) + return &X86::GR64_TCRegClass; + + bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false); + if (hasHipeCC) + return &X86::GR32RegClass; + return &X86::GR32_TCRegClass; +} + const TargetRegisterClass * X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { if (RC == &X86::CCRRegClass) { @@ -222,6 +228,7 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, const MCPhysReg * X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const X86Subtarget &Subtarget = MF->getSubtarget(); + bool HasSSE = Subtarget.hasSSE1(); bool HasAVX = Subtarget.hasAVX(); bool HasAVX512 = Subtarget.hasAVX512(); bool CallsEHReturn = MF->getMMI().callsEHReturn(); @@ -241,6 +248,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (HasAVX) return CSR_64_RT_AllRegs_AVX_SaveList; return CSR_64_RT_AllRegs_SaveList; + case CallingConv::CXX_FAST_TLS: + if (Is64Bit) + return CSR_64_TLS_Darwin_SaveList; + break; case CallingConv::Intel_OCL_BI: { if (HasAVX512 && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX512_SaveList; @@ -254,6 +265,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_64_Intel_OCL_BI_SaveList; break; } + case CallingConv::HHVM: + return CSR_64_HHVM_SaveList; case CallingConv::Cold: if (Is64Bit) return CSR_64_MostRegs_SaveList; @@ -264,6 +277,18 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (CallsEHReturn) return CSR_64EHRet_SaveList; return CSR_64_SaveList; + case CallingConv::X86_INTR: + if (Is64Bit) { + if (HasAVX) + return CSR_64_AllRegs_AVX_SaveList; + else + return CSR_64_AllRegs_SaveList; + } else { + if (HasSSE) + return CSR_32_AllRegs_SSE_SaveList; + else + return CSR_32_AllRegs_SaveList; + } default: break; } @@ -284,6 +309,7 @@ const uint32_t * X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { const X86Subtarget &Subtarget = MF.getSubtarget(); + bool HasSSE = Subtarget.hasSSE1(); bool HasAVX = Subtarget.hasAVX(); bool HasAVX512 = Subtarget.hasAVX512(); @@ -301,6 +327,10 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, if (HasAVX) return CSR_64_RT_AllRegs_AVX_RegMask; return CSR_64_RT_AllRegs_RegMask; + case CallingConv::CXX_FAST_TLS: + if (Is64Bit) + return CSR_64_TLS_Darwin_RegMask; + break; case CallingConv::Intel_OCL_BI: { if (HasAVX512 && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX512_RegMask; @@ -314,16 +344,30 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, return CSR_64_Intel_OCL_BI_RegMask; break; } + case CallingConv::HHVM: + return CSR_64_HHVM_RegMask; case CallingConv::Cold: if (Is64Bit) return CSR_64_MostRegs_RegMask; break; - default: - break; case CallingConv::X86_64_Win64: return CSR_Win64_RegMask; case CallingConv::X86_64_SysV: return CSR_64_RegMask; + case CallingConv::X86_INTR: + if (Is64Bit) { + if (HasAVX) + return CSR_64_AllRegs_AVX_RegMask; + else + return CSR_64_AllRegs_RegMask; + } else { + if (HasSSE) + return CSR_32_AllRegs_SSE_RegMask; + else + return CSR_32_AllRegs_RegMask; + } + default: + break; } // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check @@ -341,6 +385,10 @@ X86RegisterInfo::getNoPreservedMask() const { return CSR_NoRegs_RegMask; } +const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const { + return CSR_64_TLS_Darwin_RegMask; +} + BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const X86FrameLowering *TFI = getFrameLowering(MF); @@ -371,8 +419,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { "Stack realignment in presence of dynamic allocas is not supported with" "this calling convention."); - unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), MVT::i64, - false); + unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), 64); for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true); I.isValid(); ++I) Reserved.set(*I); @@ -439,6 +486,10 @@ void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// +static bool CantUseSP(const MachineFrameInfo *MFI) { + return MFI->hasVarSizedObjects() || MFI->hasOpaqueSPAdjustment(); +} + bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -451,13 +502,11 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { // reference locals while also adjusting the stack pointer. When we can't // use both the SP and the FP, we need a separate base pointer register. bool CantUseFP = needsStackRealignment(MF); - bool CantUseSP = - MFI->hasVarSizedObjects() || MFI->hasOpaqueSPAdjustment(); - return CantUseFP && CantUseSP; + return CantUseFP && CantUseSP(MFI); } bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { - if (MF.getFunction()->hasFnAttribute("no-realign-stack")) + if (!TargetRegisterInfo::canRealignStack(MF)) return false; const MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -470,26 +519,11 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { // If a base pointer is necessary. Check that it isn't too late to reserve // it. - if (MFI->hasVarSizedObjects()) + if (CantUseSP(MFI)) return MRI->canReserveReg(BasePtr); return true; } -bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const X86FrameLowering *TFI = getFrameLowering(MF); - const Function *F = MF.getFunction(); - unsigned StackAlign = TFI->getStackAlignment(); - bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || - F->hasFnAttribute(Attribute::StackAlignment)); - - // If we've requested that we force align the stack do so now. - if (ForceStackAlign) - return canRealignStack(MF); - - return requiresRealignment && canRealignStack(MF); -} - bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const { // Since X86 defines assignCalleeSavedSpillSlots which always return true @@ -510,6 +544,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned Opc = MI.getOpcode(); bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm || Opc == X86::TCRETURNmi || Opc == X86::TCRETURNmi64; + if (hasBasePointer(MF)) BasePtr = (FrameIndex < 0 ? FramePtr : getBaseRegister()); else if (needsStackRealignment(MF)) @@ -524,14 +559,11 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // offset is from the traditional base pointer location. On 64-bit, the // offset is from the SP at the end of the prologue, not the FP location. This // matches the behavior of llvm.frameaddress. + unsigned IgnoredFrameReg; if (Opc == TargetOpcode::LOCAL_ESCAPE) { MachineOperand &FI = MI.getOperand(FIOperandNum); - bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); int Offset; - if (IsWinEH) - Offset = TFI->getFrameIndexOffsetFromSP(MF, FrameIndex); - else - Offset = TFI->getFrameIndexOffset(MF, FrameIndex); + Offset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg); FI.ChangeToImmediate(Offset); return; } @@ -540,7 +572,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // register as source operand, semantic is the same and destination is // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided. if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr)) - BasePtr = getX86SubSuperRegister(BasePtr, MVT::i64, false); + BasePtr = getX86SubSuperRegister(BasePtr, 64); // This must be part of a four operand memory reference. Replace the // FrameIndex with base register with EBP. Add an offset to the offset. @@ -553,7 +585,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, const MachineFrameInfo *MFI = MF.getFrameInfo(); FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea(); } else - FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex); + FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg); if (BasePtr == StackPtr) FIOffset += SPAdj; @@ -592,193 +624,11 @@ X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { const X86Subtarget &Subtarget = MF.getSubtarget(); unsigned FrameReg = getFrameRegister(MF); if (Subtarget.isTarget64BitILP32()) - FrameReg = getX86SubSuperRegister(FrameReg, MVT::i32, false); + FrameReg = getX86SubSuperRegister(FrameReg, 32); return FrameReg; } -namespace llvm { -unsigned getX86SubSuperRegisterOrZero(unsigned Reg, MVT::SimpleValueType VT, - bool High) { - switch (VT) { - default: return 0; - case MVT::i8: - if (High) { - switch (Reg) { - default: return getX86SubSuperRegister(Reg, MVT::i64); - case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: - return X86::SI; - case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: - return X86::DI; - case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: - return X86::BP; - case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: - return X86::SP; - case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: - return X86::AH; - case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: - return X86::DH; - case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: - return X86::CH; - case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: - return X86::BH; - } - } else { - switch (Reg) { - default: return 0; - case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: - return X86::AL; - case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: - return X86::DL; - case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: - return X86::CL; - case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: - return X86::BL; - case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: - return X86::SIL; - case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: - return X86::DIL; - case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: - return X86::BPL; - case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: - return X86::SPL; - case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: - return X86::R8B; - case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: - return X86::R9B; - case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: - return X86::R10B; - case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: - return X86::R11B; - case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: - return X86::R12B; - case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: - return X86::R13B; - case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: - return X86::R14B; - case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: - return X86::R15B; - } - } - case MVT::i16: - switch (Reg) { - default: return 0; - case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: - return X86::AX; - case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: - return X86::DX; - case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: - return X86::CX; - case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: - return X86::BX; - case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: - return X86::SI; - case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: - return X86::DI; - case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: - return X86::BP; - case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: - return X86::SP; - case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: - return X86::R8W; - case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: - return X86::R9W; - case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: - return X86::R10W; - case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: - return X86::R11W; - case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: - return X86::R12W; - case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: - return X86::R13W; - case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: - return X86::R14W; - case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: - return X86::R15W; - } - case MVT::i32: - switch (Reg) { - default: return 0; - case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: - return X86::EAX; - case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: - return X86::EDX; - case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: - return X86::ECX; - case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: - return X86::EBX; - case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: - return X86::ESI; - case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: - return X86::EDI; - case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: - return X86::EBP; - case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: - return X86::ESP; - case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: - return X86::R8D; - case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: - return X86::R9D; - case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: - return X86::R10D; - case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: - return X86::R11D; - case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: - return X86::R12D; - case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: - return X86::R13D; - case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: - return X86::R14D; - case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: - return X86::R15D; - } - case MVT::i64: - switch (Reg) { - default: return 0; - case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: - return X86::RAX; - case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: - return X86::RDX; - case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: - return X86::RCX; - case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: - return X86::RBX; - case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: - return X86::RSI; - case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: - return X86::RDI; - case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: - return X86::RBP; - case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: - return X86::RSP; - case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: - return X86::R8; - case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: - return X86::R9; - case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: - return X86::R10; - case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: - return X86::R11; - case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: - return X86::R12; - case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: - return X86::R13; - case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: - return X86::R14; - case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: - return X86::R15; - } - } -} - -unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, - bool High) { - unsigned Res = getX86SubSuperRegisterOrZero(Reg, VT, High); - if (Res == 0) - llvm_unreachable("Unexpected register or VT"); - return Res; -} - -unsigned get512BitSuperRegister(unsigned Reg) { +unsigned llvm::get512BitSuperRegister(unsigned Reg) { if (Reg >= X86::XMM0 && Reg <= X86::XMM31) return X86::ZMM0 + (Reg - X86::XMM0); if (Reg >= X86::YMM0 && Reg <= X86::YMM31) @@ -787,5 +637,3 @@ unsigned get512BitSuperRegister(unsigned Reg) { return Reg; llvm_unreachable("Unexpected SIMD register"); } - -} diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 8de1d0bf8ec8..f014c8f6ff61 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -87,6 +87,11 @@ public: const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override; + /// getGPRsForTailCall - Returns a register class with registers that can be + /// used in forming tail calls. + const TargetRegisterClass * + getGPRsForTailCall(const MachineFunction &MF) const; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; @@ -96,7 +101,11 @@ public: getCalleeSavedRegs(const MachineFunction* MF) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override; - const uint32_t *getNoPreservedMask() const; + const uint32_t *getNoPreservedMask() const override; + + // Calls involved in thread-local variable lookup save more registers than + // normal calls, so they need a different mask to represent this. + const uint32_t *getDarwinTLSCallPreservedMask() const; /// getReservedRegs - Returns a bitset indexed by physical register number /// indicating if a register is a special register that has particular uses and @@ -108,9 +117,7 @@ public: bool hasBasePointer(const MachineFunction &MF) const; - bool canRealignStack(const MachineFunction &MF) const; - - bool needsStackRealignment(const MachineFunction &MF) const override; + bool canRealignStack(const MachineFunction &MF) const override; bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const override; @@ -128,16 +135,6 @@ public: unsigned getSlotSize() const { return SlotSize; } }; -/// Returns the sub or super register of a specific X86 register. -/// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) returns X86::AX. -/// Aborts on error. -unsigned getX86SubSuperRegister(unsigned, MVT::SimpleValueType, bool High=false); - -/// Returns the sub or super register of a specific X86 register. -/// Like getX86SubSuperRegister() but returns 0 on error. -unsigned getX86SubSuperRegisterOrZero(unsigned, MVT::SimpleValueType, - bool High = false); - //get512BitRegister - X86 utility - returns 512-bit super register unsigned get512BitSuperRegister(unsigned Reg); diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index cdb151c26a05..56f0d9352d30 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -225,15 +225,15 @@ let SubRegIndices = [sub_ymm] in { } } - // Mask Registers, used by AVX-512 instructions. - def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, -2, -2]>; - def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, -2, -2]>; - def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120, -2, -2]>; - def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121, -2, -2]>; - def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122, -2, -2]>; - def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123, -2, -2]>; - def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, -2, -2]>; - def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, -2, -2]>; +// Mask Registers, used by AVX-512 instructions. +def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, -2, -2]>; +def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, -2, -2]>; +def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120, -2, -2]>; +def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121, -2, -2]>; +def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122, -2, -2]>; +def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123, -2, -2]>; +def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, -2, -2]>; +def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, -2, -2]>; // Floating point stack registers. These don't map one-to-one to the FP // pseudo registers, but we still mark them as aliasing FP registers. That @@ -375,7 +375,7 @@ def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>; def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, R8, R9, R11, RIP)>; def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, - R8, R9, R11)>; + R8, R9, R10, R11, RIP)>; // GR8_NOREX - GR8 registers which do not require a REX prefix. def GR8_NOREX : RegisterClass<"X86", [i8], 8, @@ -423,6 +423,8 @@ def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>; def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>; +def FR128 : RegisterClass<"X86", [i128, f128], 128, (add FR32)>; + // FIXME: This sets up the floating point register files as though they are f64 // values, though they really are f80 values. This will cause us to spill @@ -442,10 +444,11 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> { } // Generic vector registers: VR64 and VR128. +// Ensure that float types are declared first - only float is legal on SSE1. def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>; -def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], +def VR128 : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64], 128, (add FR32)>; -def VR256 : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], +def VR256 : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64], 256, (sequence "YMM%u", 0, 15)>; // Status flags registers. @@ -459,8 +462,8 @@ def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> { } // AVX-512 vector/mask registers. -def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64], 512, - (sequence "ZMM%u", 0, 31)>; +def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64], + 512, (sequence "ZMM%u", 0, 31)>; // Scalar AVX-512 floating point registers. def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>; @@ -468,10 +471,10 @@ def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>; def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>; // Extended VR128 and VR256 for AVX-512 instructions -def VR128X : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - 128, (add FR32X)>; -def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], - 256, (sequence "YMM%u", 0, 31)>; +def VR128X : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64], + 128, (add FR32X)>; +def VR256X : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64], + 256, (sequence "YMM%u", 0, 31)>; // Mask registers def VK1 : RegisterClass<"X86", [i1], 8, (sequence "K%u", 0, 7)> {let Size = 8;} @@ -491,4 +494,4 @@ def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;} def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;} // Bound registers -def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>; \ No newline at end of file +def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>; diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index ce79fcf9ad81..b1a01614b4a1 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -44,13 +44,10 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible( return false; } -SDValue -X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, - MachinePointerInfo DstPtrInfo) const { +SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( + SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast(Size); const X86Subtarget &Subtarget = DAG.getMachineFunction().getSubtarget(); @@ -74,10 +71,10 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *V = dyn_cast(Src); - if (const char *bzeroEntry = V && + if (const char *bzeroEntry = V && V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) { - EVT IntPtr = - DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -94,7 +91,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, 0) .setDiscardResult(); - std::pair CallResult = DAG.getTargetLoweringInfo().LowerCallTo(CLI); + std::pair CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } @@ -144,8 +141,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, BytesLeft = SizeVal % UBytes; } - Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT), - InFlag); + Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT), + InFlag); InFlag = Chain.getValue(1); } else { AVT = MVT::i8; @@ -172,9 +169,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, DAG.getConstant((AVT == MVT::i64) ? 7 : 3, dl, CVT)); - Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : - X86::ECX, - Left, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX, + Left, InFlag); InFlag = Chain.getValue(1); Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; @@ -249,17 +245,14 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( unsigned BytesLeft = SizeVal % UBytes; SDValue InFlag; - Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : - X86::ECX, - Count, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, + Count, InFlag); InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : - X86::EDI, - Dst, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, + Dst, InFlag); InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : - X86::ESI, - Src, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI, + Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index dff3624b7efe..8ef08c960f0b 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -44,9 +44,8 @@ X86EarlyIfConv("x86-early-ifcvt", cl::Hidden, cl::desc("Enable early if-conversion on X86")); -/// ClassifyBlockAddressReference - Classify a blockaddress reference for the -/// current subtarget according to how we should reference it in a non-pcrel -/// context. +/// Classify a blockaddress reference for the current subtarget according to how +/// we should reference it in a non-pcrel context. unsigned char X86Subtarget::ClassifyBlockAddressReference() const { if (isPICStyleGOT()) // 32-bit ELF targets. return X86II::MO_GOTOFF; @@ -58,9 +57,8 @@ unsigned char X86Subtarget::ClassifyBlockAddressReference() const { return X86II::MO_NO_FLAG; } -/// ClassifyGlobalReference - Classify a global variable reference for the -/// current subtarget according to how we should reference it in a non-pcrel -/// context. +/// Classify a global variable reference for the current subtarget according to +/// how we should reference it in a non-pcrel context. unsigned char X86Subtarget:: ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // DLLImport only exists on windows, it is implemented as a load from a @@ -147,9 +145,9 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { } -/// getBZeroEntry - This function returns the name of a function which has an -/// interface like the non-standard bzero function, if such a function exists on -/// the current subtarget and it is considered prefereable over memset with zero +/// This function returns the name of a function which has an interface like +/// the non-standard bzero function, if such a function exists on the +/// current subtarget and it is considered preferable over memset with zero /// passed as the second argument. Otherwise it returns null. const char *X86Subtarget::getBZeroEntry() const { // Darwin 10 has a __bzero entry point for this purpose. @@ -166,8 +164,7 @@ bool X86Subtarget::hasSinCos() const { is64Bit(); } -/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls -/// to immediate address. +/// Return true if the subtarget allows calls to immediate address. bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const { // FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32 // but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does, @@ -192,9 +189,25 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { FullFS = "+64bit,+sse2"; } + // LAHF/SAHF are always supported in non-64-bit mode. + if (!In64BitMode) { + if (!FullFS.empty()) + FullFS = "+sahf," + FullFS; + else + FullFS = "+sahf"; + } + + // Parse features string and set the CPU. ParseSubtargetFeatures(CPUName, FullFS); + // All CPUs that implement SSE4.2 or SSE4A support unaligned accesses of + // 16-bytes and under that are reasonably fast. These features were + // introduced with Intel's Nehalem/Silvermont and AMD's Family10h + // micro-architectures respectively. + if (hasSSE42() || hasSSE4A()) + IsUAMem16Slow = false; + InstrItins = getInstrItineraryForCPU(CPUName); // It's important to keep the MCSubtargetInfo feature bits in sync with @@ -224,13 +237,18 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { } void X86Subtarget::initializeEnvironment() { - X86SSELevel = NoMMXSSE; + X86SSELevel = NoSSE; X863DNowLevel = NoThreeDNow; HasCMov = false; HasX86_64 = false; HasPOPCNT = false; HasSSE4A = false; HasAES = false; + HasFXSR = false; + HasXSAVE = false; + HasXSAVEOPT = false; + HasXSAVEC = false; + HasXSAVES = false; HasPCLMUL = false; HasFMA = false; HasFMA4 = false; @@ -252,13 +270,15 @@ void X86Subtarget::initializeEnvironment() { HasBWI = false; HasVLX = false; HasADX = false; + HasPKU = false; HasSHA = false; HasPRFCHW = false; HasRDSEED = false; + HasLAHFSAHF = false; HasMPX = false; IsBTMemSlow = false; IsSHLDSlow = false; - IsUAMemFast = false; + IsUAMem16Slow = false; IsUAMem32Slow = false; HasSSEUnalignedMem = false; HasCmpxchg16b = false; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index f026d4295f71..13d1026dcaa0 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -47,11 +47,11 @@ class X86Subtarget final : public X86GenSubtargetInfo { protected: enum X86SSEEnum { - NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F + NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F }; enum X863DNowEnum { - NoThreeDNow, ThreeDNow, ThreeDNowA + NoThreeDNow, MMX, ThreeDNow, ThreeDNowA }; enum X86ProcFamilyEnum { @@ -64,10 +64,10 @@ protected: /// Which PIC style to use PICStyles::Style PICStyle; - /// MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported. + /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported. X86SSEEnum X86SSELevel; - /// 3DNow, 3DNow Athlon, or none supported. + /// MMX, 3DNow, 3DNow Athlon, or none supported. X863DNowEnum X863DNowLevel; /// True if this processor has conditional move instructions @@ -86,6 +86,18 @@ protected: /// Target has AES instructions bool HasAES; + /// Target has FXSAVE/FXRESTOR instructions + bool HasFXSR; + + /// Target has XSAVE instructions + bool HasXSAVE; + /// Target has XSAVEOPT instructions + bool HasXSAVEOPT; + /// Target has XSAVEC instructions + bool HasXSAVEC; + /// Target has XSAVES instructions + bool HasXSAVES; + /// Target has carry-less multiplication bool HasPCLMUL; @@ -140,16 +152,19 @@ protected: /// Processor has RDSEED instructions. bool HasRDSEED; + /// Processor has LAHF/SAHF instructions. + bool HasLAHFSAHF; + /// True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; /// True if SHLD instructions are slow. bool IsSHLDSlow; - /// True if unaligned memory access is fast. - bool IsUAMemFast; + /// True if unaligned memory accesses of 16-bytes are slow. + bool IsUAMem16Slow; - /// True if unaligned 32-byte memory accesses are slow. + /// True if unaligned memory accesses of 32-bytes are slow. bool IsUAMem32Slow; /// True if SSE operations can have unaligned memory operands. @@ -208,6 +223,9 @@ protected: /// Processor has AVX-512 Vector Length eXtenstions bool HasVLX; + /// Processor has PKU extenstions + bool HasPKU; + /// Processot supports MPX - Memory Protection Extensions bool HasMPX; @@ -319,7 +337,6 @@ public: void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } bool hasCMov() const { return HasCMov; } - bool hasMMX() const { return X86SSELevel >= MMX; } bool hasSSE1() const { return X86SSELevel >= SSE1; } bool hasSSE2() const { return X86SSELevel >= SSE2; } bool hasSSE3() const { return X86SSELevel >= SSE3; } @@ -332,14 +349,22 @@ public: bool hasFp256() const { return hasAVX(); } bool hasInt256() const { return hasAVX2(); } bool hasSSE4A() const { return HasSSE4A; } + bool hasMMX() const { return X863DNowLevel >= MMX; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } bool hasPOPCNT() const { return HasPOPCNT; } bool hasAES() const { return HasAES; } + bool hasFXSR() const { return HasFXSR; } + bool hasXSAVE() const { return HasXSAVE; } + bool hasXSAVEOPT() const { return HasXSAVEOPT; } + bool hasXSAVEC() const { return HasXSAVEC; } + bool hasXSAVES() const { return HasXSAVES; } bool hasPCLMUL() const { return HasPCLMUL; } - bool hasFMA() const { return HasFMA; } - // FIXME: Favor FMA when both are enabled. Is this the right thing to do? - bool hasFMA4() const { return HasFMA4 && !HasFMA; } + // Prefer FMA4 to FMA - its better for commutation/memory folding and + // has equal or better performance on all supported targets. + bool hasFMA() const { return HasFMA && !HasFMA4; } + bool hasFMA4() const { return HasFMA4; } + bool hasAnyFMA() const { return hasFMA() || hasFMA4() || hasAVX512(); } bool hasXOP() const { return HasXOP; } bool hasTBM() const { return HasTBM; } bool hasMOVBE() const { return HasMOVBE; } @@ -355,9 +380,10 @@ public: bool hasSHA() const { return HasSHA; } bool hasPRFCHW() const { return HasPRFCHW; } bool hasRDSEED() const { return HasRDSEED; } + bool hasLAHFSAHF() const { return HasLAHFSAHF; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isSHLDSlow() const { return IsSHLDSlow; } - bool isUnalignedMemAccessFast() const { return IsUAMemFast; } + bool isUnalignedMem16Slow() const { return IsUAMem16Slow; } bool isUnalignedMem32Slow() const { return IsUAMem32Slow; } bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } @@ -375,6 +401,7 @@ public: bool hasDQI() const { return HasDQI; } bool hasBWI() const { return HasBWI; } bool hasVLX() const { return HasVLX; } + bool hasPKU() const { return HasPKU; } bool hasMPX() const { return HasMPX; } bool isAtom() const { return X86ProcFamily == IntelAtom; } @@ -394,9 +421,11 @@ public: bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } + bool isTargetAndroid() const { return TargetTriple.isAndroid(); } bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); } bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); } + bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); } bool isTargetWindowsMSVC() const { return TargetTriple.isWindowsMSVCEnvironment(); @@ -406,6 +435,10 @@ public: return TargetTriple.isKnownWindowsMSVCEnvironment(); } + bool isTargetWindowsCoreCLR() const { + return TargetTriple.isWindowsCoreCLREnvironment(); + } + bool isTargetWindowsCygwin() const { return TargetTriple.isWindowsCygwinEnvironment(); } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index fb9cb4ba4c86..0e7e4c0c84a9 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -28,10 +28,17 @@ static cl::opt EnableMachineCombinerPass("x86-machine-combiner", cl::desc("Enable the machine combiner pass"), cl::init(true), cl::Hidden); +namespace llvm { +void initializeWinEHStatePassPass(PassRegistry &); +} + extern "C" void LLVMInitializeX86Target() { // Register the target. RegisterTargetMachine X(TheX86_32Target); RegisterTargetMachine Y(TheX86_64Target); + + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializeWinEHStatePassPass(PR); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -45,7 +52,7 @@ static std::unique_ptr createTLOF(const Triple &TT) { return make_unique(); if (TT.isOSBinFormatELF()) return make_unique(); - if (TT.isKnownWindowsMSVCEnvironment()) + if (TT.isKnownWindowsMSVCEnvironment() || TT.isWindowsCoreCLREnvironment()) return make_unique(); if (TT.isOSBinFormatCOFF()) return make_unique(); @@ -175,8 +182,9 @@ UseVZeroUpper("x86-use-vzeroupper", cl::Hidden, //===----------------------------------------------------------------------===// TargetIRAnalysis X86TargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis( - [this](Function &F) { return TargetTransformInfo(X86TTIImpl(this, F)); }); + return TargetIRAnalysis([this](const Function &F) { + return TargetTransformInfo(X86TTIImpl(this, F)); + }); } @@ -246,6 +254,9 @@ bool X86PassConfig::addPreISel() { } void X86PassConfig::addPreRegAlloc() { + if (getOptLevel() != CodeGenOpt::None) + addPass(createX86OptimizeLEAs()); + addPass(createX86CallFrameOptimization()); } diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 6f900ea351ef..782768d0ab16 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/COFF.h" #include "llvm/Support/Dwarf.h" #include "llvm/Target/TargetLowering.h" @@ -152,9 +153,8 @@ static std::string scalarConstantToHexString(const Constant *C) { } } -MCSection * -X86WindowsTargetObjectFile::getSectionForConstant(SectionKind Kind, - const Constant *C) const { +MCSection *X86WindowsTargetObjectFile::getSectionForConstant( + const DataLayout &DL, SectionKind Kind, const Constant *C) const { if (Kind.isMergeableConst() && C) { const unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | @@ -171,5 +171,5 @@ X86WindowsTargetObjectFile::getSectionForConstant(SectionKind Kind, COFF::IMAGE_COMDAT_SELECT_ANY); } - return TargetLoweringObjectFile::getSectionForConstant(Kind, C); + return TargetLoweringObjectFile::getSectionForConstant(DL, Kind, C); } diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index 66366b2373cd..6b2448cc9de6 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -58,7 +58,7 @@ namespace llvm { /// \brief Given a mergeable constant with the specified size and relocation /// information, return a section that it should be placed in. - MCSection *getSectionForConstant(SectionKind Kind, + MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, const Constant *C) const override; }; diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 7df726091843..2e7bbb208743 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" + using namespace llvm; #define DEBUG_TYPE "x86tti" @@ -62,8 +63,8 @@ unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) { if (ST->is64Bit()) return 64; - return 32; + return 32; } unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) { @@ -84,12 +85,12 @@ unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) { return 2; } -unsigned X86TTIImpl::getArithmeticInstrCost( +int X86TTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -101,10 +102,9 @@ unsigned X86TTIImpl::getArithmeticInstrCost( // normally expanded to the sequence SRA + SRL + ADD + SRA. // The OperandValue properties many not be same as that of previous // operation;conservatively assume OP_None. - unsigned Cost = - 2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info, - TargetTransformInfo::OP_None, - TargetTransformInfo::OP_None); + int Cost = 2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, + Op2Info, TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None); Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); @@ -115,8 +115,7 @@ unsigned X86TTIImpl::getArithmeticInstrCost( return Cost; } - static const CostTblEntry - AVX2UniformConstCostTable[] = { + static const CostTblEntry AVX2UniformConstCostTable[] = { { ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle. { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence @@ -127,12 +126,12 @@ unsigned X86TTIImpl::getArithmeticInstrCost( if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && ST->hasAVX2()) { - int Idx = CostTableLookup(AVX2UniformConstCostTable, ISD, LT.second); - if (Idx != -1) - return LT.first * AVX2UniformConstCostTable[Idx].Cost; + if (const auto *Entry = CostTableLookup(AVX2UniformConstCostTable, ISD, + LT.second)) + return LT.first * Entry->Cost; } - static const CostTblEntry AVX512CostTable[] = { + static const CostTblEntry AVX512CostTable[] = { { ISD::SHL, MVT::v16i32, 1 }, { ISD::SRL, MVT::v16i32, 1 }, { ISD::SRA, MVT::v16i32, 1 }, @@ -141,7 +140,12 @@ unsigned X86TTIImpl::getArithmeticInstrCost( { ISD::SRA, MVT::v8i64, 1 }, }; - static const CostTblEntry AVX2CostTable[] = { + if (ST->hasAVX512()) { + if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + } + + static const CostTblEntry AVX2CostTable[] = { // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to // customize them to detect the cases where shift amount is a scalar one. { ISD::SHL, MVT::v4i32, 1 }, @@ -154,7 +158,57 @@ unsigned X86TTIImpl::getArithmeticInstrCost( { ISD::SRL, MVT::v2i64, 1 }, { ISD::SHL, MVT::v4i64, 1 }, { ISD::SRL, MVT::v4i64, 1 }, + }; + // Look for AVX2 lowering tricks. + if (ST->hasAVX2()) { + if (ISD == ISD::SHL && LT.second == MVT::v16i16 && + (Op2Info == TargetTransformInfo::OK_UniformConstantValue || + Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) + // On AVX2, a packed v16i16 shift left by a constant build_vector + // is lowered into a vector multiply (vpmullw). + return LT.first; + + if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + } + + static const CostTblEntry XOPCostTable[] = { + // 128bit shifts take 1cy, but right shifts require negation beforehand. + { ISD::SHL, MVT::v16i8, 1 }, + { ISD::SRL, MVT::v16i8, 2 }, + { ISD::SRA, MVT::v16i8, 2 }, + { ISD::SHL, MVT::v8i16, 1 }, + { ISD::SRL, MVT::v8i16, 2 }, + { ISD::SRA, MVT::v8i16, 2 }, + { ISD::SHL, MVT::v4i32, 1 }, + { ISD::SRL, MVT::v4i32, 2 }, + { ISD::SRA, MVT::v4i32, 2 }, + { ISD::SHL, MVT::v2i64, 1 }, + { ISD::SRL, MVT::v2i64, 2 }, + { ISD::SRA, MVT::v2i64, 2 }, + // 256bit shifts require splitting if AVX2 didn't catch them above. + { ISD::SHL, MVT::v32i8, 2 }, + { ISD::SRL, MVT::v32i8, 4 }, + { ISD::SRA, MVT::v32i8, 4 }, + { ISD::SHL, MVT::v16i16, 2 }, + { ISD::SRL, MVT::v16i16, 4 }, + { ISD::SRA, MVT::v16i16, 4 }, + { ISD::SHL, MVT::v8i32, 2 }, + { ISD::SRL, MVT::v8i32, 4 }, + { ISD::SRA, MVT::v8i32, 4 }, + { ISD::SHL, MVT::v4i64, 2 }, + { ISD::SRL, MVT::v4i64, 4 }, + { ISD::SRA, MVT::v4i64, 4 }, + }; + + // Look for XOP lowering tricks. + if (ST->hasXOP()) { + if (const auto *Entry = CostTableLookup(XOPCostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + } + + static const CostTblEntry AVX2CustomCostTable[] = { { ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence. { ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence. @@ -163,7 +217,8 @@ unsigned X86TTIImpl::getArithmeticInstrCost( { ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence. { ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence. - { ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized. + { ISD::SRA, MVT::v2i64, 4 }, // srl/xor/sub sequence. + { ISD::SRA, MVT::v4i64, 4 }, // srl/xor/sub sequence. // Vectorizing division is a bad idea. See the SSE2 table for more comments. { ISD::SDIV, MVT::v32i8, 32*20 }, @@ -176,44 +231,44 @@ unsigned X86TTIImpl::getArithmeticInstrCost( { ISD::UDIV, MVT::v4i64, 4*20 }, }; - if (ST->hasAVX512()) { - int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second); - if (Idx != -1) - return LT.first * AVX512CostTable[Idx].Cost; - } - // Look for AVX2 lowering tricks. + // Look for AVX2 lowering tricks for custom cases. if (ST->hasAVX2()) { - if (ISD == ISD::SHL && LT.second == MVT::v16i16 && - (Op2Info == TargetTransformInfo::OK_UniformConstantValue || - Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) - // On AVX2, a packed v16i16 shift left by a constant build_vector - // is lowered into a vector multiply (vpmullw). - return LT.first; - - int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second); - if (Idx != -1) - return LT.first * AVX2CostTable[Idx].Cost; + if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD, + LT.second)) + return LT.first * Entry->Cost; } - static const CostTblEntry + static const CostTblEntry SSE2UniformConstCostTable[] = { // We don't correctly identify costs of casts because they are marked as // custom. // Constant splats are cheaper for the following instructions. { ISD::SHL, MVT::v16i8, 1 }, // psllw. + { ISD::SHL, MVT::v32i8, 2 }, // psllw. { ISD::SHL, MVT::v8i16, 1 }, // psllw. + { ISD::SHL, MVT::v16i16, 2 }, // psllw. { ISD::SHL, MVT::v4i32, 1 }, // pslld + { ISD::SHL, MVT::v8i32, 2 }, // pslld { ISD::SHL, MVT::v2i64, 1 }, // psllq. + { ISD::SHL, MVT::v4i64, 2 }, // psllq. { ISD::SRL, MVT::v16i8, 1 }, // psrlw. + { ISD::SRL, MVT::v32i8, 2 }, // psrlw. { ISD::SRL, MVT::v8i16, 1 }, // psrlw. + { ISD::SRL, MVT::v16i16, 2 }, // psrlw. { ISD::SRL, MVT::v4i32, 1 }, // psrld. + { ISD::SRL, MVT::v8i32, 2 }, // psrld. { ISD::SRL, MVT::v2i64, 1 }, // psrlq. + { ISD::SRL, MVT::v4i64, 2 }, // psrlq. { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. + { ISD::SRA, MVT::v32i8, 8 }, // psrlw, pand, pxor, psubb. { ISD::SRA, MVT::v8i16, 1 }, // psraw. + { ISD::SRA, MVT::v16i16, 2 }, // psraw. { ISD::SRA, MVT::v4i32, 1 }, // psrad. + { ISD::SRA, MVT::v8i32, 2 }, // psrad. { ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle. + { ISD::SRA, MVT::v4i64, 8 }, // 2 x psrad + shuffle. { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence @@ -227,27 +282,34 @@ unsigned X86TTIImpl::getArithmeticInstrCost( if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41()) return LT.first * 15; - int Idx = CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second); - if (Idx != -1) - return LT.first * SSE2UniformConstCostTable[Idx].Cost; + if (const auto *Entry = CostTableLookup(SSE2UniformConstCostTable, ISD, + LT.second)) + return LT.first * Entry->Cost; } if (ISD == ISD::SHL && Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) { - EVT VT = LT.second; + MVT VT = LT.second; + // Vector shift left by non uniform constant can be lowered + // into vector multiply (pmullw/pmulld). if ((VT == MVT::v8i16 && ST->hasSSE2()) || (VT == MVT::v4i32 && ST->hasSSE41())) - // Vector shift left by non uniform constant can be lowered - // into vector multiply (pmullw/pmulld). return LT.first; + + // v16i16 and v8i32 shifts by non-uniform constants are lowered into a + // sequence of extract + two vector multiply + insert. + if ((VT == MVT::v8i32 || VT == MVT::v16i16) && + (ST->hasAVX() && !ST->hasAVX2())) + ISD = ISD::MUL; + + // A vector shift left by non uniform constant is converted + // into a vector multiply; the new multiply is eventually + // lowered into a sequence of shuffles and 2 x pmuludq. if (VT == MVT::v4i32 && ST->hasSSE2()) - // A vector shift left by non uniform constant is converted - // into a vector multiply; the new multiply is eventually - // lowered into a sequence of shuffles and 2 x pmuludq. ISD = ISD::MUL; } - static const CostTblEntry SSE2CostTable[] = { + static const CostTblEntry SSE2CostTable[] = { // We don't correctly identify costs of casts because they are marked as // custom. // For some cases, where the shift amount is a scalar we would be able @@ -257,20 +319,31 @@ unsigned X86TTIImpl::getArithmeticInstrCost( // used for vectorization and we don't want to make vectorized code worse // than scalar code. { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence. + { ISD::SHL, MVT::v32i8, 2*26 }, // cmpgtb sequence. { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence. + { ISD::SHL, MVT::v16i16, 2*32 }, // cmpgtb sequence. { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul. - { ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized. - { ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized. + { ISD::SHL, MVT::v8i32, 2*2*5 }, // We optimized this using mul. + { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence. + { ISD::SHL, MVT::v4i64, 2*4 }, // splat+shuffle sequence. { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence. + { ISD::SRL, MVT::v32i8, 2*26 }, // cmpgtb sequence. { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence. + { ISD::SRL, MVT::v16i16, 2*32 }, // cmpgtb sequence. { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend. - { ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized. + { ISD::SRL, MVT::v8i32, 2*16 }, // Shift each lane + blend. + { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence. + { ISD::SRL, MVT::v4i64, 2*4 }, // splat+shuffle sequence. { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence. + { ISD::SRA, MVT::v32i8, 2*54 }, // unpacked cmpgtb sequence. { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence. + { ISD::SRA, MVT::v16i16, 2*32 }, // cmpgtb sequence. { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend. - { ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized. + { ISD::SRA, MVT::v8i32, 2*16 }, // Shift each lane + blend. + { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence. + { ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence. // It is not a good idea to vectorize division. We have to scalarize it and // in the process we will often end up having to spilling regular @@ -289,12 +362,11 @@ unsigned X86TTIImpl::getArithmeticInstrCost( }; if (ST->hasSSE2()) { - int Idx = CostTableLookup(SSE2CostTable, ISD, LT.second); - if (Idx != -1) - return LT.first * SSE2CostTable[Idx].Cost; + if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second)) + return LT.first * Entry->Cost; } - static const CostTblEntry AVX1CostTable[] = { + static const CostTblEntry AVX1CostTable[] = { // We don't have to scalarize unsupported ops. We can issue two half-sized // operations and we only need to extract the upper YMM half. // Two ops + 1 extract + 1 insert = 4. @@ -314,29 +386,21 @@ unsigned X86TTIImpl::getArithmeticInstrCost( // Look for AVX1 lowering tricks. if (ST->hasAVX() && !ST->hasAVX2()) { - EVT VT = LT.second; + MVT VT = LT.second; - // v16i16 and v8i32 shifts by non-uniform constants are lowered into a - // sequence of extract + two vector multiply + insert. - if (ISD == ISD::SHL && (VT == MVT::v8i32 || VT == MVT::v16i16) && - Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) - ISD = ISD::MUL; - - int Idx = CostTableLookup(AVX1CostTable, ISD, VT); - if (Idx != -1) - return LT.first * AVX1CostTable[Idx].Cost; + if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, VT)) + return LT.first * Entry->Cost; } // Custom lowering of vectors. - static const CostTblEntry CustomLowered[] = { + static const CostTblEntry CustomLowered[] = { // A v2i64/v4i64 and multiply is custom lowered as a series of long // multiplies(3), shifts(4) and adds(2). { ISD::MUL, MVT::v2i64, 9 }, { ISD::MUL, MVT::v4i64, 9 }, }; - int Idx = CostTableLookup(CustomLowered, ISD, LT.second); - if (Idx != -1) - return LT.first * CustomLowered[Idx].Cost; + if (const auto *Entry = CostTableLookup(CustomLowered, ISD, LT.second)) + return LT.first * Entry->Cost; // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle, // 2x pmuludq, 2x shuffle. @@ -348,15 +412,15 @@ unsigned X86TTIImpl::getArithmeticInstrCost( return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); } -unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) { +int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) { // We only estimate the cost of reverse and alternate shuffles. if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate) return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); if (Kind == TTI::SK_Reverse) { - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - unsigned Cost = 1; + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + int Cost = 1; if (LT.second.getSizeInBits() > 128) Cost = 3; // Extract + insert + copy. @@ -367,14 +431,14 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, if (Kind == TTI::SK_Alternate) { // 64-bit packed float vectors (v2f32) are widened to type v4f32. // 64-bit packed integer vectors (v2i32) are promoted to type v2i64. - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); // The backend knows how to generate a single VEX.256 version of // instruction VPBLENDW if the target supports AVX2. if (ST->hasAVX2() && LT.second == MVT::v16i16) return LT.first; - static const CostTblEntry AVXAltShuffleTbl[] = { + static const CostTblEntry AVXAltShuffleTbl[] = { {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd @@ -390,13 +454,12 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, {ISD::VECTOR_SHUFFLE, MVT::v32i8, 9} }; - if (ST->hasAVX()) { - int Idx = CostTableLookup(AVXAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); - if (Idx != -1) - return LT.first * AVXAltShuffleTbl[Idx].Cost; - } + if (ST->hasAVX()) + if (const auto *Entry = CostTableLookup(AVXAltShuffleTbl, + ISD::VECTOR_SHUFFLE, LT.second)) + return LT.first * Entry->Cost; - static const CostTblEntry SSE41AltShuffleTbl[] = { + static const CostTblEntry SSE41AltShuffleTbl[] = { // These are lowered into movsd. {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, @@ -414,13 +477,12 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} }; - if (ST->hasSSE41()) { - int Idx = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); - if (Idx != -1) - return LT.first * SSE41AltShuffleTbl[Idx].Cost; - } + if (ST->hasSSE41()) + if (const auto *Entry = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE, + LT.second)) + return LT.first * Entry->Cost; - static const CostTblEntry SSSE3AltShuffleTbl[] = { + static const CostTblEntry SSSE3AltShuffleTbl[] = { {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd @@ -433,13 +495,12 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or }; - if (ST->hasSSSE3()) { - int Idx = CostTableLookup(SSSE3AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); - if (Idx != -1) - return LT.first * SSSE3AltShuffleTbl[Idx].Cost; - } + if (ST->hasSSSE3()) + if (const auto *Entry = CostTableLookup(SSSE3AltShuffleTbl, + ISD::VECTOR_SHUFFLE, LT.second)) + return LT.first * Entry->Cost; - static const CostTblEntry SSEAltShuffleTbl[] = { + static const CostTblEntry SSEAltShuffleTbl[] = { {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd @@ -454,65 +515,47 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, }; // Fall-back (SSE3 and SSE2). - int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); - if (Idx != -1) - return LT.first * SSEAltShuffleTbl[Idx].Cost; + if (const auto *Entry = CostTableLookup(SSEAltShuffleTbl, + ISD::VECTOR_SHUFFLE, LT.second)) + return LT.first * Entry->Cost; return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } -unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { +int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - std::pair LTSrc = TLI->getTypeLegalizationCost(DL, Src); - std::pair LTDest = TLI->getTypeLegalizationCost(DL, Dst); + // FIXME: Need a better design of the cost table to handle non-simple types of + // potential massive combinations (elem_num x src_type x dst_type). - static const TypeConversionCostTblEntry - SSE2ConvTbl[] = { - // These are somewhat magic numbers justified by looking at the output of - // Intel's IACA, running some kernels and making sure when we take - // legalization into account the throughput will be overestimated. - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, - // There are faster sequences for float conversions. - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, + static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = { + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 }, + { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 }, + + { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, + { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f64, 1 }, + { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f64, 1 }, + { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 1 }, + { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f32, 1 }, + { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f32, 1 }, }; - if (ST->hasSSE2() && !ST->hasAVX()) { - int Idx = - ConvertCostTableLookup(SSE2ConvTbl, ISD, LTDest.second, LTSrc.second); - if (Idx != -1) - return LTSrc.first * SSE2ConvTbl[Idx].Cost; - } - - static const TypeConversionCostTblEntry - AVX512ConversionTbl[] = { + static const TypeConversionCostTblEntry AVX512FConversionTbl[] = { { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 }, { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 }, { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 }, - { ISD::FP_ROUND, MVT::v16f32, MVT::v8f64, 3 }, { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 1 }, { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 1 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 1 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 }, - { ISD::TRUNCATE, MVT::v16i32, MVT::v8i64, 4 }, // v16i1 -> v16i32 - load + broadcast { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 }, @@ -522,33 +565,49 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 }, { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, - { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i32, 3 }, - { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i32, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 }, { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 }, { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 }, { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 }, + { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 }, { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 }, { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 }, + + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 }, + { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 }, + { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 }, + { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 2 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 5 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 12 }, + { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 26 }, + + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 }, + { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, + { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 1 }, + { ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 }, }; - if (ST->hasAVX512()) { - int Idx = ConvertCostTableLookup(AVX512ConversionTbl, ISD, LTDest.second, - LTSrc.second); - if (Idx != -1) - return AVX512ConversionTbl[Idx].Cost; - } - EVT SrcTy = TLI->getValueType(DL, Src); - EVT DstTy = TLI->getValueType(DL, Dst); - - // The function getSimpleVT only handles simple value types. - if (!SrcTy.isSimple() || !DstTy.isSimple()) - return BaseT::getCastInstrCost(Opcode, Dst, Src); - - static const TypeConversionCostTblEntry - AVX2ConversionTbl[] = { + static const TypeConversionCostTblEntry AVX2ConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, @@ -579,8 +638,7 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 }, }; - static const TypeConversionCostTblEntry - AVXConversionTbl[] = { + static const TypeConversionCostTblEntry AVXConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 }, @@ -650,34 +708,158 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4*4 }, }; + static const TypeConversionCostTblEntry SSE41ConversionTbl[] = { + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 4 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 4 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 2 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 2 }, + + { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 }, + { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 30 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, + }; + + static const TypeConversionCostTblEntry SSE2ConversionTbl[] = { + // These are somewhat magic numbers justified by looking at the output of + // Intel's IACA, running some kernels and making sure when we take + // legalization into account the throughput will be overestimated. + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, + // There are faster sequences for float conversions. + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, + + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 8 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 9 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 12 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 6 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 6 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 3 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, + { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 }, + + { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 10 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, + { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 4 }, + }; + + std::pair LTSrc = TLI->getTypeLegalizationCost(DL, Src); + std::pair LTDest = TLI->getTypeLegalizationCost(DL, Dst); + + if (ST->hasSSE2() && !ST->hasAVX()) { + if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, + LTDest.second, LTSrc.second)) + return LTSrc.first * Entry->Cost; + } + + EVT SrcTy = TLI->getValueType(DL, Src); + EVT DstTy = TLI->getValueType(DL, Dst); + + // The function getSimpleVT only handles simple value types. + if (!SrcTy.isSimple() || !DstTy.isSimple()) + return BaseT::getCastInstrCost(Opcode, Dst, Src); + + if (ST->hasDQI()) + if (const auto *Entry = ConvertCostTableLookup(AVX512DQConversionTbl, ISD, + DstTy.getSimpleVT(), + SrcTy.getSimpleVT())) + return Entry->Cost; + + if (ST->hasAVX512()) + if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD, + DstTy.getSimpleVT(), + SrcTy.getSimpleVT())) + return Entry->Cost; + if (ST->hasAVX2()) { - int Idx = ConvertCostTableLookup(AVX2ConversionTbl, ISD, - DstTy.getSimpleVT(), SrcTy.getSimpleVT()); - if (Idx != -1) - return AVX2ConversionTbl[Idx].Cost; + if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD, + DstTy.getSimpleVT(), + SrcTy.getSimpleVT())) + return Entry->Cost; } if (ST->hasAVX()) { - int Idx = ConvertCostTableLookup(AVXConversionTbl, ISD, DstTy.getSimpleVT(), - SrcTy.getSimpleVT()); - if (Idx != -1) - return AVXConversionTbl[Idx].Cost; + if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD, + DstTy.getSimpleVT(), + SrcTy.getSimpleVT())) + return Entry->Cost; + } + + if (ST->hasSSE41()) { + if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD, + DstTy.getSimpleVT(), + SrcTy.getSimpleVT())) + return Entry->Cost; + } + + if (ST->hasSSE2()) { + if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, + DstTy.getSimpleVT(), + SrcTy.getSimpleVT())) + return Entry->Cost; } return BaseT::getCastInstrCost(Opcode, Dst, Src); } -unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) { +int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); MVT MTy = LT.second; int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - static const CostTblEntry SSE42CostTbl[] = { + static const CostTblEntry SSE42CostTbl[] = { { ISD::SETCC, MVT::v2f64, 1 }, { ISD::SETCC, MVT::v4f32, 1 }, { ISD::SETCC, MVT::v2i64, 1 }, @@ -686,7 +868,7 @@ unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SETCC, MVT::v16i8, 1 }, }; - static const CostTblEntry AVX1CostTbl[] = { + static const CostTblEntry AVX1CostTbl[] = { { ISD::SETCC, MVT::v4f64, 1 }, { ISD::SETCC, MVT::v8f32, 1 }, // AVX1 does not support 8-wide integer compare. @@ -696,54 +878,45 @@ unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SETCC, MVT::v32i8, 4 }, }; - static const CostTblEntry AVX2CostTbl[] = { + static const CostTblEntry AVX2CostTbl[] = { { ISD::SETCC, MVT::v4i64, 1 }, { ISD::SETCC, MVT::v8i32, 1 }, { ISD::SETCC, MVT::v16i16, 1 }, { ISD::SETCC, MVT::v32i8, 1 }, }; - static const CostTblEntry AVX512CostTbl[] = { + static const CostTblEntry AVX512CostTbl[] = { { ISD::SETCC, MVT::v8i64, 1 }, { ISD::SETCC, MVT::v16i32, 1 }, { ISD::SETCC, MVT::v8f64, 1 }, { ISD::SETCC, MVT::v16f32, 1 }, }; - if (ST->hasAVX512()) { - int Idx = CostTableLookup(AVX512CostTbl, ISD, MTy); - if (Idx != -1) - return LT.first * AVX512CostTbl[Idx].Cost; - } + if (ST->hasAVX512()) + if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; - if (ST->hasAVX2()) { - int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy); - if (Idx != -1) - return LT.first * AVX2CostTbl[Idx].Cost; - } + if (ST->hasAVX2()) + if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; - if (ST->hasAVX()) { - int Idx = CostTableLookup(AVX1CostTbl, ISD, MTy); - if (Idx != -1) - return LT.first * AVX1CostTbl[Idx].Cost; - } + if (ST->hasAVX()) + if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; - if (ST->hasSSE42()) { - int Idx = CostTableLookup(SSE42CostTbl, ISD, MTy); - if (Idx != -1) - return LT.first * SSE42CostTbl[Idx].Cost; - } + if (ST->hasSSE42()) + if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); } -unsigned X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) { +int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { assert(Val->isVectorTy() && "This must be a vector type"); if (Index != -1U) { // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Val); + std::pair LT = TLI->getTypeLegalizationCost(DL, Val); // This type is legalized to a scalar type. if (!LT.second.isVector()) @@ -761,10 +934,9 @@ unsigned X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, return BaseT::getVectorInstrCost(Opcode, Val, Index); } -unsigned X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, - bool Extract) { +int X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { assert (Ty->isVectorTy() && "Can only scalarize vectors"); - unsigned Cost = 0; + int Cost = 0; for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { if (Insert) @@ -776,9 +948,8 @@ unsigned X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, return Cost; } -unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) { +int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) { // Handle non-power-of-two vectors such as <3 x float> if (VectorType *VTy = dyn_cast(Src)) { unsigned NumElem = VTy->getVectorNumElements(); @@ -796,22 +967,21 @@ unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, // Assume that all other non-power-of-two numbers are scalarized. if (!isPowerOf2_32(NumElem)) { - unsigned Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(), - Alignment, AddressSpace); - unsigned SplitCost = getScalarizationOverhead(Src, - Opcode == Instruction::Load, - Opcode==Instruction::Store); + int Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(), Alignment, + AddressSpace); + int SplitCost = getScalarizationOverhead(Src, Opcode == Instruction::Load, + Opcode == Instruction::Store); return NumElem * Cost + SplitCost; } } // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, Src); + std::pair LT = TLI->getTypeLegalizationCost(DL, Src); assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && "Invalid Opcode"); // Each load/store unit costs 1. - unsigned Cost = LT.first * 1; + int Cost = LT.first * 1; // On Sandybridge 256bit load/stores are double pumped // (but not on Haswell). @@ -821,9 +991,9 @@ unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, return Cost; } -unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, - unsigned Alignment, - unsigned AddressSpace) { +int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, + unsigned Alignment, + unsigned AddressSpace) { VectorType *SrcVTy = dyn_cast(SrcTy); if (!SrcVTy) // To calculate scalar take the regular cost, without mask @@ -832,34 +1002,33 @@ unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, unsigned NumElem = SrcVTy->getVectorNumElements(); VectorType *MaskTy = VectorType::get(Type::getInt8Ty(getGlobalContext()), NumElem); - if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy, 1)) || - (Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy, 1)) || + if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy)) || + (Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy)) || !isPowerOf2_32(NumElem)) { // Scalarization - unsigned MaskSplitCost = getScalarizationOverhead(MaskTy, false, true); - unsigned ScalarCompareCost = - getCmpSelInstrCost(Instruction::ICmp, - Type::getInt8Ty(getGlobalContext()), NULL); - unsigned BranchCost = getCFInstrCost(Instruction::Br); - unsigned MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost); + int MaskSplitCost = getScalarizationOverhead(MaskTy, false, true); + int ScalarCompareCost = getCmpSelInstrCost( + Instruction::ICmp, Type::getInt8Ty(getGlobalContext()), nullptr); + int BranchCost = getCFInstrCost(Instruction::Br); + int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost); - unsigned ValueSplitCost = - getScalarizationOverhead(SrcVTy, Opcode == Instruction::Load, - Opcode == Instruction::Store); - unsigned MemopCost = + int ValueSplitCost = getScalarizationOverhead( + SrcVTy, Opcode == Instruction::Load, Opcode == Instruction::Store); + int MemopCost = NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(), Alignment, AddressSpace); return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost; } // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(DL, SrcVTy); - unsigned Cost = 0; - if (LT.second != TLI->getValueType(DL, SrcVTy).getSimpleVT() && + std::pair LT = TLI->getTypeLegalizationCost(DL, SrcVTy); + auto VT = TLI->getValueType(DL, SrcVTy); + int Cost = 0; + if (VT.isSimple() && LT.second != VT.getSimpleVT() && LT.second.getVectorNumElements() == NumElem) // Promotion requires expand/truncate for data and a shuffle for mask. - Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, 0) + - getShuffleCost(TTI::SK_Alternate, MaskTy, 0, 0); + Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, nullptr) + + getShuffleCost(TTI::SK_Alternate, MaskTy, 0, nullptr); else if (LT.second.getVectorNumElements() > NumElem) { VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(), @@ -874,7 +1043,7 @@ unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, return Cost+LT.first; } -unsigned X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { +int X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { // Address computations in vectorized code with non-consecutive addresses will // likely result in more instructions compared to scalar code where the // computation can more often be merged into the index mode. The resulting @@ -887,10 +1056,10 @@ unsigned X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { return BaseT::getAddressComputationCost(Ty, IsComplex); } -unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy, - bool IsPairwise) { +int X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy, + bool IsPairwise) { - std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); MVT MTy = LT.second; @@ -900,7 +1069,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy, // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput // and make it as the cost. - static const CostTblEntry SSE42CostTblPairWise[] = { + static const CostTblEntry SSE42CostTblPairWise[] = { { ISD::FADD, MVT::v2f64, 2 }, { ISD::FADD, MVT::v4f32, 4 }, { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6". @@ -908,7 +1077,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy, { ISD::ADD, MVT::v8i16, 5 }, }; - static const CostTblEntry AVX1CostTblPairWise[] = { + static const CostTblEntry AVX1CostTblPairWise[] = { { ISD::FADD, MVT::v4f32, 4 }, { ISD::FADD, MVT::v4f64, 5 }, { ISD::FADD, MVT::v8f32, 7 }, @@ -919,7 +1088,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy, { ISD::ADD, MVT::v8i32, 5 }, }; - static const CostTblEntry SSE42CostTblNoPairWise[] = { + static const CostTblEntry SSE42CostTblNoPairWise[] = { { ISD::FADD, MVT::v2f64, 2 }, { ISD::FADD, MVT::v4f32, 4 }, { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6". @@ -927,7 +1096,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy, { ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3". }; - static const CostTblEntry AVX1CostTblNoPairWise[] = { + static const CostTblEntry AVX1CostTblNoPairWise[] = { { ISD::FADD, MVT::v4f32, 3 }, { ISD::FADD, MVT::v4f64, 3 }, { ISD::FADD, MVT::v8f32, 4 }, @@ -939,29 +1108,21 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy, }; if (IsPairwise) { - if (ST->hasAVX()) { - int Idx = CostTableLookup(AVX1CostTblPairWise, ISD, MTy); - if (Idx != -1) - return LT.first * AVX1CostTblPairWise[Idx].Cost; - } + if (ST->hasAVX()) + if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy)) + return LT.first * Entry->Cost; - if (ST->hasSSE42()) { - int Idx = CostTableLookup(SSE42CostTblPairWise, ISD, MTy); - if (Idx != -1) - return LT.first * SSE42CostTblPairWise[Idx].Cost; - } + if (ST->hasSSE42()) + if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy)) + return LT.first * Entry->Cost; } else { - if (ST->hasAVX()) { - int Idx = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy); - if (Idx != -1) - return LT.first * AVX1CostTblNoPairWise[Idx].Cost; - } + if (ST->hasAVX()) + if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) + return LT.first * Entry->Cost; - if (ST->hasSSE42()) { - int Idx = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy); - if (Idx != -1) - return LT.first * SSE42CostTblNoPairWise[Idx].Cost; - } + if (ST->hasSSE42()) + if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy)) + return LT.first * Entry->Cost; } return BaseT::getReductionCost(Opcode, ValTy, IsPairwise); @@ -970,7 +1131,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy, /// \brief Calculate the cost of materializing a 64-bit value. This helper /// method might only calculate a fraction of a larger immediate. Therefore it /// is valid to return a cost of ZERO. -unsigned X86TTIImpl::getIntImmCost(int64_t Val) { +int X86TTIImpl::getIntImmCost(int64_t Val) { if (Val == 0) return TTI::TCC_Free; @@ -980,7 +1141,7 @@ unsigned X86TTIImpl::getIntImmCost(int64_t Val) { return 2 * TTI::TCC_Basic; } -unsigned X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -1004,18 +1165,18 @@ unsigned X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { // Split the constant into 64-bit chunks and calculate the cost for each // chunk. - unsigned Cost = 0; + int Cost = 0; for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) { APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64); int64_t Val = Tmp.getSExtValue(); Cost += getIntImmCost(Val); } // We need at least one instruction to materialze the constant. - return std::max(1U, Cost); + return std::max(1, Cost); } -unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) { +int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -1038,6 +1199,26 @@ unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, case Instruction::Store: ImmIdx = 0; break; + case Instruction::ICmp: + // This is an imperfect hack to prevent constant hoisting of + // compares that might be trying to check if a 64-bit value fits in + // 32-bits. The backend can optimize these cases using a right shift by 32. + // Ideally we would check the compare predicate here. There also other + // similar immediates the backend can use shifts for. + if (Idx == 1 && Imm.getBitWidth() == 64) { + uint64_t ImmVal = Imm.getZExtValue(); + if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff) + return TTI::TCC_Free; + } + ImmIdx = 1; + break; + case Instruction::And: + // We support 64-bit ANDs with immediates with 32-bits of leading zeroes + // by using a 32-bit operation with implicit zero extension. Detect such + // immediates here as the normal path expects bit 31 to be sign extended. + if (Idx == 1 && Imm.getBitWidth() == 64 && isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + // Fallthrough case Instruction::Add: case Instruction::Sub: case Instruction::Mul: @@ -1045,10 +1226,8 @@ unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, case Instruction::SDiv: case Instruction::URem: case Instruction::SRem: - case Instruction::And: case Instruction::Or: case Instruction::Xor: - case Instruction::ICmp: ImmIdx = 1; break; // Always return TCC_Free for the shift value of a shift instruction. @@ -1073,18 +1252,18 @@ unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, } if (Idx == ImmIdx) { - unsigned NumConstants = (BitSize + 63) / 64; - unsigned Cost = X86TTIImpl::getIntImmCost(Imm, Ty); + int NumConstants = (BitSize + 63) / 64; + int Cost = X86TTIImpl::getIntImmCost(Imm, Ty); return (Cost <= NumConstants * TTI::TCC_Basic) - ? static_cast(TTI::TCC_Free) + ? static_cast(TTI::TCC_Free) : Cost; } return X86TTIImpl::getIntImmCost(Imm, Ty); } -unsigned X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { +int X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -1118,23 +1297,181 @@ unsigned X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, return X86TTIImpl::getIntImmCost(Imm, Ty); } -bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, int Consecutive) { - int DataWidth = DataTy->getPrimitiveSizeInBits(); +// Return an average cost of Gather / Scatter instruction, maybe improved later +int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr, + unsigned Alignment, unsigned AddressSpace) { - // Todo: AVX512 allows gather/scatter, works with strided and random as well - if ((DataWidth < 32) || (Consecutive == 0)) + assert(isa(SrcVTy) && "Unexpected type in getGSVectorCost"); + unsigned VF = SrcVTy->getVectorNumElements(); + + // Try to reduce index size from 64 bit (default for GEP) + // to 32. It is essential for VF 16. If the index can't be reduced to 32, the + // operation will use 16 x 64 indices which do not fit in a zmm and needs + // to split. Also check that the base pointer is the same for all lanes, + // and that there's at most one variable index. + auto getIndexSizeInBits = [](Value *Ptr, const DataLayout& DL) { + unsigned IndexSize = DL.getPointerSizeInBits(); + GetElementPtrInst *GEP = dyn_cast(Ptr); + if (IndexSize < 64 || !GEP) + return IndexSize; + + unsigned NumOfVarIndices = 0; + Value *Ptrs = GEP->getPointerOperand(); + if (Ptrs->getType()->isVectorTy() && !getSplatValue(Ptrs)) + return IndexSize; + for (unsigned i = 1; i < GEP->getNumOperands(); ++i) { + if (isa(GEP->getOperand(i))) + continue; + Type *IndxTy = GEP->getOperand(i)->getType(); + if (IndxTy->isVectorTy()) + IndxTy = IndxTy->getVectorElementType(); + if ((IndxTy->getPrimitiveSizeInBits() == 64 && + !isa(GEP->getOperand(i))) || + ++NumOfVarIndices > 1) + return IndexSize; // 64 + } + return (unsigned)32; + }; + + + // Trying to reduce IndexSize to 32 bits for vector 16. + // By default the IndexSize is equal to pointer size. + unsigned IndexSize = (VF >= 16) ? getIndexSizeInBits(Ptr, DL) : + DL.getPointerSizeInBits(); + + Type *IndexVTy = VectorType::get(IntegerType::get(getGlobalContext(), + IndexSize), VF); + std::pair IdxsLT = TLI->getTypeLegalizationCost(DL, IndexVTy); + std::pair SrcLT = TLI->getTypeLegalizationCost(DL, SrcVTy); + int SplitFactor = std::max(IdxsLT.first, SrcLT.first); + if (SplitFactor > 1) { + // Handle splitting of vector of pointers + Type *SplitSrcTy = VectorType::get(SrcVTy->getScalarType(), VF / SplitFactor); + return SplitFactor * getGSVectorCost(Opcode, SplitSrcTy, Ptr, Alignment, + AddressSpace); + } + + // The gather / scatter cost is given by Intel architects. It is a rough + // number since we are looking at one instruction in a time. + const int GSOverhead = 2; + return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), + Alignment, AddressSpace); +} + +/// Return the cost of full scalarization of gather / scatter operation. +/// +/// Opcode - Load or Store instruction. +/// SrcVTy - The type of the data vector that should be gathered or scattered. +/// VariableMask - The mask is non-constant at compile time. +/// Alignment - Alignment for one element. +/// AddressSpace - pointer[s] address space. +/// +int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy, + bool VariableMask, unsigned Alignment, + unsigned AddressSpace) { + unsigned VF = SrcVTy->getVectorNumElements(); + + int MaskUnpackCost = 0; + if (VariableMask) { + VectorType *MaskTy = + VectorType::get(Type::getInt1Ty(getGlobalContext()), VF); + MaskUnpackCost = getScalarizationOverhead(MaskTy, false, true); + int ScalarCompareCost = + getCmpSelInstrCost(Instruction::ICmp, Type::getInt1Ty(getGlobalContext()), + nullptr); + int BranchCost = getCFInstrCost(Instruction::Br); + MaskUnpackCost += VF * (BranchCost + ScalarCompareCost); + } + + // The cost of the scalar loads/stores. + int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), + Alignment, AddressSpace); + + int InsertExtractCost = 0; + if (Opcode == Instruction::Load) + for (unsigned i = 0; i < VF; ++i) + // Add the cost of inserting each scalar load into the vector + InsertExtractCost += + getVectorInstrCost(Instruction::InsertElement, SrcVTy, i); + else + for (unsigned i = 0; i < VF; ++i) + // Add the cost of extracting each element out of the data vector + InsertExtractCost += + getVectorInstrCost(Instruction::ExtractElement, SrcVTy, i); + + return MemoryOpCost + MaskUnpackCost + InsertExtractCost; +} + +/// Calculate the cost of Gather / Scatter operation +int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy, + Value *Ptr, bool VariableMask, + unsigned Alignment) { + assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter"); + unsigned VF = SrcVTy->getVectorNumElements(); + PointerType *PtrTy = dyn_cast(Ptr->getType()); + if (!PtrTy && Ptr->getType()->isVectorTy()) + PtrTy = dyn_cast(Ptr->getType()->getVectorElementType()); + assert(PtrTy && "Unexpected type for Ptr argument"); + unsigned AddressSpace = PtrTy->getAddressSpace(); + + bool Scalarize = false; + if ((Opcode == Instruction::Load && !isLegalMaskedGather(SrcVTy)) || + (Opcode == Instruction::Store && !isLegalMaskedScatter(SrcVTy))) + Scalarize = true; + // Gather / Scatter for vector 2 is not profitable on KNL / SKX + // Vector-4 of gather/scatter instruction does not exist on KNL. + // We can extend it to 8 elements, but zeroing upper bits of + // the mask vector will add more instructions. Right now we give the scalar + // cost of vector-4 for KNL. TODO: Check, maybe the gather/scatter instruction is + // better in the VariableMask case. + if (VF == 2 || (VF == 4 && !ST->hasVLX())) + Scalarize = true; + + if (Scalarize) + return getGSScalarCost(Opcode, SrcVTy, VariableMask, Alignment, AddressSpace); + + return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace); +} + +bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) { + Type *ScalarTy = DataTy->getScalarType(); + int DataWidth = isa(ScalarTy) ? + DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits(); + + return (DataWidth >= 32 && ST->hasAVX2()); +} + +bool X86TTIImpl::isLegalMaskedStore(Type *DataType) { + return isLegalMaskedLoad(DataType); +} + +bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) { + // This function is called now in two cases: from the Loop Vectorizer + // and from the Scalarizer. + // When the Loop Vectorizer asks about legality of the feature, + // the vectorization factor is not calculated yet. The Loop Vectorizer + // sends a scalar type and the decision is based on the width of the + // scalar element. + // Later on, the cost model will estimate usage this intrinsic based on + // the vector type. + // The Scalarizer asks again about legality. It sends a vector type. + // In this case we can reject non-power-of-2 vectors. + if (isa(DataTy) && !isPowerOf2_32(DataTy->getVectorNumElements())) return false; - if (ST->hasAVX512() || ST->hasAVX2()) - return true; - return false; + Type *ScalarTy = DataTy->getScalarType(); + int DataWidth = isa(ScalarTy) ? + DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits(); + + // AVX-512 allows gather and scatter + return DataWidth >= 32 && ST->hasAVX512(); } -bool X86TTIImpl::isLegalMaskedStore(Type *DataType, int Consecutive) { - return isLegalMaskedLoad(DataType, Consecutive); +bool X86TTIImpl::isLegalMaskedScatter(Type *DataType) { + return isLegalMaskedGather(DataType); } -bool X86TTIImpl::hasCompatibleFunctionAttributes(const Function *Caller, - const Function *Callee) const { +bool X86TTIImpl::areInlineCompatible(const Function *Caller, + const Function *Callee) const { const TargetMachine &TM = getTLI()->getTargetMachine(); // Work this as a subsetting of subtarget features. diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h index da3f36c2e27e..adb745e912d1 100644 --- a/lib/Target/X86/X86TargetTransformInfo.h +++ b/lib/Target/X86/X86TargetTransformInfo.h @@ -33,13 +33,13 @@ class X86TTIImpl : public BasicTTIImplBase { const X86Subtarget *ST; const X86TargetLowering *TLI; - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); + int getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); const X86Subtarget *getST() const { return ST; } const X86TargetLowering *getTLI() const { return TLI; } public: - explicit X86TTIImpl(const X86TargetMachine *TM, Function &F) + explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} @@ -62,38 +62,44 @@ public: unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); unsigned getMaxInterleaveFactor(unsigned VF); - unsigned getArithmeticInstrCost( + int getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None); - unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp); - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace); - unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace); + int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); + int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace); + int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace); + int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, + bool VariableMask, unsigned Alignment); + int getAddressComputationCost(Type *PtrTy, bool IsComplex); - unsigned getAddressComputationCost(Type *PtrTy, bool IsComplex); + int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm); - unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm); + int getIntImmCost(int64_t); - unsigned getIntImmCost(int64_t); + int getIntImmCost(const APInt &Imm, Type *Ty); - unsigned getIntImmCost(const APInt &Imm, Type *Ty); - - unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty); - unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty); - bool isLegalMaskedLoad(Type *DataType, int Consecutive); - bool isLegalMaskedStore(Type *DataType, int Consecutive); - bool hasCompatibleFunctionAttributes(const Function *Caller, - const Function *Callee) const; + int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty); + bool isLegalMaskedLoad(Type *DataType); + bool isLegalMaskedStore(Type *DataType); + bool isLegalMaskedGather(Type *DataType); + bool isLegalMaskedScatter(Type *DataType); + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; +private: + int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask, + unsigned Alignment, unsigned AddressSpace); + int getGSVectorCost(unsigned Opcode, Type *DataTy, Value *Ptr, + unsigned Alignment, unsigned AddressSpace); /// @} }; diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp index 9190d0be9e4d..dce94a9e9ef7 100644 --- a/lib/Target/X86/X86WinEHState.cpp +++ b/lib/Target/X86/X86WinEHState.cpp @@ -15,7 +15,8 @@ //===----------------------------------------------------------------------===// #include "X86.h" -#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" @@ -38,12 +39,16 @@ using namespace llvm::PatternMatch; #define DEBUG_TYPE "winehstate" +namespace llvm { void initializeWinEHStatePassPass(PassRegistry &); } + namespace { class WinEHStatePass : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid. - WinEHStatePass() : FunctionPass(ID) {} + WinEHStatePass() : FunctionPass(ID) { + initializeWinEHStatePassPass(*PassRegistry::getPassRegistry()); + } bool runOnFunction(Function &Fn) override; @@ -62,18 +67,13 @@ private: void linkExceptionRegistration(IRBuilder<> &Builder, Function *Handler); void unlinkExceptionRegistration(IRBuilder<> &Builder); - void addCXXStateStores(Function &F, MachineModuleInfo &MMI); - void addSEHStateStores(Function &F, MachineModuleInfo &MMI); - void addCXXStateStoresToFunclet(Value *ParentRegNode, WinEHFuncInfo &FuncInfo, - Function &F, int BaseState); + void addStateStores(Function &F, WinEHFuncInfo &FuncInfo); void insertStateNumberStore(Value *ParentRegNode, Instruction *IP, int State); Value *emitEHLSDA(IRBuilder<> &Builder, Function *F); Function *generateLSDAInEAXThunk(Function *ParentFunc); - int escapeRegNode(Function &F); - // Module-level type getters. Type *getEHLinkRegistrationType(); Type *getSEHRegistrationType(); @@ -111,6 +111,9 @@ FunctionPass *llvm::createX86WinEHStatePass() { return new WinEHStatePass(); } char WinEHStatePass::ID = 0; +INITIALIZE_PASS(WinEHStatePass, "x86-winehstate", + "Insert stores for EH state numbers", false, false) + bool WinEHStatePass::doInitialization(Module &M) { TheModule = &M; FrameEscape = Intrinsic::getDeclaration(TheModule, Intrinsic::localescape); @@ -138,14 +141,7 @@ void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const { } bool WinEHStatePass::runOnFunction(Function &F) { - // If this is an outlined handler, don't do anything. We'll do state insertion - // for it in the parent. - StringRef WinEHParentName = - F.getFnAttribute("wineh-parent").getValueAsString(); - if (WinEHParentName != F.getName() && !WinEHParentName.empty()) - return false; - - // Check the personality. Do nothing if this is not an MSVC personality. + // Check the personality. Do nothing if this personality doesn't use funclets. if (!F.hasPersonalityFn()) return false; PersonalityFn = @@ -153,7 +149,19 @@ bool WinEHStatePass::runOnFunction(Function &F) { if (!PersonalityFn) return false; Personality = classifyEHPersonality(PersonalityFn); - if (!isMSVCEHPersonality(Personality)) + if (!isFuncletEHPersonality(Personality)) + return false; + + // Skip this function if there are no EH pads and we aren't using IR-level + // outlining. + bool HasPads = false; + for (BasicBlock &BB : F) { + if (BB.isEHPad()) { + HasPads = true; + break; + } + } + if (!HasPads) return false; // Disable frame pointer elimination in this function. @@ -163,14 +171,13 @@ bool WinEHStatePass::runOnFunction(Function &F) { emitExceptionRegistrationRecord(&F); - auto *MMIPtr = getAnalysisIfAvailable(); - assert(MMIPtr && "MachineModuleInfo should always be available"); - MachineModuleInfo &MMI = *MMIPtr; - switch (Personality) { - default: llvm_unreachable("unexpected personality function"); - case EHPersonality::MSVC_CXX: addCXXStateStores(F, MMI); break; - case EHPersonality::MSVC_X86SEH: addSEHStateStores(F, MMI); break; - } + // The state numbers calculated here in IR must agree with what we calculate + // later on for the MachineFunction. In particular, if an IR pass deletes an + // unreachable EH pad after this point before machine CFG construction, we + // will be in trouble. If this assumption is ever broken, we should turn the + // numbers into an immutable analysis pass. + WinEHFuncInfo FuncInfo; + addStateStores(F, FuncInfo); // Reset per-function state. PersonalityFn = nullptr; @@ -261,7 +268,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) { Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0)); // TryLevel = -1 StateFieldIndex = 2; - insertStateNumberStore(RegNode, Builder.GetInsertPoint(), -1); + insertStateNumberStore(RegNode, &*Builder.GetInsertPoint(), -1); // Handler = __ehhandler$F Function *Trampoline = generateLSDAInEAXThunk(F); Link = Builder.CreateStructGEP(RegNodeTy, RegNode, 1); @@ -278,7 +285,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) { Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0)); // TryLevel = -2 / -1 StateFieldIndex = 4; - insertStateNumberStore(RegNode, Builder.GetInsertPoint(), + insertStateNumberStore(RegNode, &*Builder.GetInsertPoint(), UseStackGuard ? -2 : -1); // ScopeTable = llvm.x86.seh.lsda(F) Value *FI8 = Builder.CreateBitCast(F, Int8PtrType); @@ -347,7 +354,7 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) { Value *CastPersonality = Builder.CreateBitCast(PersonalityFn, TargetFuncTy->getPointerTo()); auto AI = Trampoline->arg_begin(); - Value *Args[5] = {LSDA, AI++, AI++, AI++, AI++}; + Value *Args[5] = {LSDA, &*AI++, &*AI++, &*AI++, &*AI++}; CallInst *Call = Builder.CreateCall(CastPersonality, Args); // Can't use musttail due to prototype mismatch, but we can use tail. Call->setTailCall(true); @@ -391,160 +398,53 @@ void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder) { Builder.CreateStore(Next, FSZero); } -void WinEHStatePass::addCXXStateStores(Function &F, MachineModuleInfo &MMI) { - WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F); - calculateWinCXXEHStateNumbers(&F, FuncInfo); +void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) { + // Mark the registration node. The backend needs to know which alloca it is so + // that it can recover the original frame pointer. + IRBuilder<> Builder(RegNode->getParent(), std::next(RegNode->getIterator())); + Value *RegNodeI8 = Builder.CreateBitCast(RegNode, Builder.getInt8PtrTy()); + Builder.CreateCall( + Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_ehregnode), + {RegNodeI8}); - // The base state for the parent is -1. - addCXXStateStoresToFunclet(RegNode, FuncInfo, F, -1); + // Calculate state numbers. + if (isAsynchronousEHPersonality(Personality)) + calculateSEHStateNumbers(&F, FuncInfo); + else + calculateWinCXXEHStateNumbers(&F, FuncInfo); - // Set up RegNodeEscapeIndex - int RegNodeEscapeIndex = escapeRegNode(F); - FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex; - - // Only insert stores in catch handlers. - Constant *FI8 = - ConstantExpr::getBitCast(&F, Type::getInt8PtrTy(TheModule->getContext())); - for (auto P : FuncInfo.HandlerBaseState) { - Function *Handler = const_cast(P.first); - int BaseState = P.second; - IRBuilder<> Builder(&Handler->getEntryBlock(), - Handler->getEntryBlock().begin()); - // FIXME: Find and reuse such a call if present. - Value *ParentFP = Builder.CreateCall(FrameAddress, {Builder.getInt32(1)}); - Value *RecoveredRegNode = Builder.CreateCall( - FrameRecover, {FI8, ParentFP, Builder.getInt32(RegNodeEscapeIndex)}); - RecoveredRegNode = - Builder.CreateBitCast(RecoveredRegNode, RegNodeTy->getPointerTo(0)); - addCXXStateStoresToFunclet(RecoveredRegNode, FuncInfo, *Handler, BaseState); - } -} - -/// Escape RegNode so that we can access it from child handlers. Find the call -/// to localescape, if any, in the entry block and append RegNode to the list -/// of arguments. -int WinEHStatePass::escapeRegNode(Function &F) { - // Find the call to localescape and extract its arguments. - IntrinsicInst *EscapeCall = nullptr; - for (Instruction &I : F.getEntryBlock()) { - IntrinsicInst *II = dyn_cast(&I); - if (II && II->getIntrinsicID() == Intrinsic::localescape) { - EscapeCall = II; - break; - } - } - SmallVector Args; - if (EscapeCall) { - auto Ops = EscapeCall->arg_operands(); - Args.append(Ops.begin(), Ops.end()); - } - Args.push_back(RegNode); - - // Replace the call (if it exists) with new one. Otherwise, insert at the end - // of the entry block. - Instruction *InsertPt = EscapeCall; - if (!EscapeCall) - InsertPt = F.getEntryBlock().getTerminator(); - IRBuilder<> Builder(&F.getEntryBlock(), InsertPt); - Builder.CreateCall(FrameEscape, Args); - if (EscapeCall) - EscapeCall->eraseFromParent(); - return Args.size() - 1; -} - -void WinEHStatePass::addCXXStateStoresToFunclet(Value *ParentRegNode, - WinEHFuncInfo &FuncInfo, - Function &F, int BaseState) { // Iterate all the instructions and emit state number stores. + DenseMap BlockColors = colorEHFunclets(F); for (BasicBlock &BB : F) { + // Figure out what state we should assign calls in this block. + int BaseState = -1; + auto &BBColors = BlockColors[&BB]; + + assert(BBColors.size() == 1 && + "multi-color BB not removed by preparation"); + BasicBlock *FuncletEntryBB = BBColors.front(); + if (auto *FuncletPad = + dyn_cast(FuncletEntryBB->getFirstNonPHI())) { + auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad); + if (BaseStateI != FuncInfo.FuncletBaseStateMap.end()) + BaseState = BaseStateI->second; + } + for (Instruction &I : BB) { if (auto *CI = dyn_cast(&I)) { // Possibly throwing call instructions have no actions to take after // an unwind. Ensure they are in the -1 state. if (CI->doesNotThrow()) continue; - insertStateNumberStore(ParentRegNode, CI, BaseState); + insertStateNumberStore(RegNode, CI, BaseState); } else if (auto *II = dyn_cast(&I)) { // Look up the state number of the landingpad this unwinds to. - LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst(); - // FIXME: Why does this assertion fail? - //assert(FuncInfo.LandingPadStateMap.count(LPI) && "LP has no state!"); - int State = FuncInfo.LandingPadStateMap[LPI]; - insertStateNumberStore(ParentRegNode, II, State); - } - } - } -} - -/// Assign every distinct landingpad a unique state number for SEH. Unlike C++ -/// EH, we can use this very simple algorithm while C++ EH cannot because catch -/// handlers aren't outlined and the runtime doesn't have to figure out which -/// catch handler frame to unwind to. -/// FIXME: __finally blocks are outlined, so this approach may break down there. -void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) { - WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F); - - // Remember and return the index that we used. We save it in WinEHFuncInfo so - // that we can lower llvm.x86.seh.recoverfp later in filter functions without - // too much trouble. - int RegNodeEscapeIndex = escapeRegNode(F); - FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex; - - // Iterate all the instructions and emit state number stores. - int CurState = 0; - SmallPtrSet ExceptBlocks; - for (BasicBlock &BB : F) { - for (auto I = BB.begin(), E = BB.end(); I != E; ++I) { - if (auto *CI = dyn_cast(I)) { - auto *Intrin = dyn_cast(CI); - if (Intrin) { - // Calls that "don't throw" are considered to be able to throw asynch - // exceptions, but intrinsics cannot. - continue; - } - insertStateNumberStore(RegNode, CI, -1); - } else if (auto *II = dyn_cast(I)) { - // Look up the state number of the landingpad this unwinds to. - LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst(); - auto InsertionPair = - FuncInfo.LandingPadStateMap.insert(std::make_pair(LPI, CurState)); - auto Iter = InsertionPair.first; - int &State = Iter->second; - bool Inserted = InsertionPair.second; - if (Inserted) { - // Each action consumes a state number. - auto *EHActions = cast(LPI->getNextNode()); - SmallVector, 4> ActionList; - parseEHActions(EHActions, ActionList); - assert(!ActionList.empty()); - CurState += ActionList.size(); - State += ActionList.size() - 1; - - // Remember all the __except block targets. - for (auto &Handler : ActionList) { - if (auto *CH = dyn_cast(Handler.get())) { - auto *BA = cast(CH->getHandlerBlockOrFunc()); -#ifndef NDEBUG - for (BasicBlock *Pred : predecessors(BA->getBasicBlock())) - assert(Pred->isLandingPad() && - "WinEHPrepare failed to split block"); -#endif - ExceptBlocks.insert(BA->getBasicBlock()); - } - } - } + assert(FuncInfo.InvokeStateMap.count(II) && "invoke has no state!"); + int State = FuncInfo.InvokeStateMap[II]; insertStateNumberStore(RegNode, II, State); } } } - - // Insert llvm.x86.seh.restoreframe() into each __except block. - Function *RestoreFrame = - Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_restoreframe); - for (BasicBlock *ExceptBB : ExceptBlocks) { - IRBuilder<> Builder(ExceptBB->begin()); - Builder.CreateCall(RestoreFrame, {}); - } } void WinEHStatePass::insertStateNumberStore(Value *ParentRegNode, diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index 2e44ac949b2c..aaf267af5311 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -224,7 +224,7 @@ static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val > 11) return MCDisassembler::Fail; - static unsigned Values[] = { + static const unsigned Values[] = { 32 /*bpw*/, 1, 2, 3, 4, 5, 6, 7, 8, 16, 24, 32 }; Inst.addOperand(MCOperand::createImm(Values[Val])); diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h index 6fd2dec1d13e..dc513f7b225b 100644 --- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h +++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h @@ -19,8 +19,6 @@ namespace llvm { -class TargetMachine; - class XCoreInstPrinter : public MCInstPrinter { public: XCoreInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index 702056d781d0..b00cdd5040eb 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -115,14 +115,14 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { EmitSpecialLLVMGlobal(GV)) return; - const DataLayout *TD = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); OutStreamer->SwitchSection( getObjFileLowering().SectionForGlobal(GV, *Mang, TM)); MCSymbol *GVSym = getSymbol(GV); const Constant *C = GV->getInitializer(); - unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType()); - + unsigned Align = (unsigned)DL.getPreferredTypeAlignmentShift(C->getType()); + // Mark the start of the global getTargetStreamer().emitCCTopData(GVSym->getName()); @@ -154,15 +154,15 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (GV->isThreadLocal()) { report_fatal_error("TLS is not supported by this target!"); } - unsigned Size = TD->getTypeAllocSize(C->getType()); + unsigned Size = DL.getTypeAllocSize(C->getType()); if (MAI->hasDotTypeDotSizeDirective()) { OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); OutStreamer->emitELFSize(cast(GVSym), MCConstantExpr::create(Size, OutContext)); } OutStreamer->EmitLabel(GVSym); - - EmitGlobalConstant(C); + + EmitGlobalConstant(DL, C); // The ABI requires that unsigned scalar types smaller than 32 bits // are padded to 32 bits. if (Size < 4) @@ -208,7 +208,7 @@ printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O, void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); const MachineOperand &MO = MI->getOperand(opNum); switch (MO.getType()) { case MachineOperand::MO_Register: @@ -224,8 +224,8 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum, getSymbol(MO.getGlobal())->print(O, MAI); break; case MachineOperand::MO_ConstantPoolIndex: - O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() - << '_' << MO.getIndex(); + O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' + << MO.getIndex(); break; case MachineOperand::MO_BlockAddress: GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI); diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 76c3d8130e75..ae493de083b8 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -160,27 +160,26 @@ static void GetSpillList(SmallVectorImpl &SpillList, /// As offsets are negative, the largest offsets will be first. static void GetEHSpillList(SmallVectorImpl &SpillList, MachineFrameInfo *MFI, XCoreFunctionInfo *XFI, + const Constant *PersonalityFn, const TargetLowering *TL) { assert(XFI->hasEHSpillSlot() && "There are no EH register spill slots"); - const int* EHSlot = XFI->getEHSpillSlot(); - SpillList.push_back(StackSlotInfo(EHSlot[0], - MFI->getObjectOffset(EHSlot[0]), - TL->getExceptionPointerRegister())); - SpillList.push_back(StackSlotInfo(EHSlot[0], - MFI->getObjectOffset(EHSlot[1]), - TL->getExceptionSelectorRegister())); + const int *EHSlot = XFI->getEHSpillSlot(); + SpillList.push_back( + StackSlotInfo(EHSlot[0], MFI->getObjectOffset(EHSlot[0]), + TL->getExceptionPointerRegister(PersonalityFn))); + SpillList.push_back( + StackSlotInfo(EHSlot[0], MFI->getObjectOffset(EHSlot[1]), + TL->getExceptionSelectorRegister(PersonalityFn))); std::sort(SpillList.begin(), SpillList.end(), CompareSSIOffset); } - static MachineMemOperand * getFrameIndexMMO(MachineBasicBlock &MBB, int FrameIndex, unsigned flags) { MachineFunction *MF = MBB.getParent(); const MachineFrameInfo &MFI = *MF->getFrameInfo(); - MachineMemOperand *MMO = - MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIndex), - flags, MFI.getObjectSize(FrameIndex), - MFI.getObjectAlignment(FrameIndex)); + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FrameIndex), flags, + MFI.getObjectSize(FrameIndex), MFI.getObjectAlignment(FrameIndex)); return MMO; } @@ -323,8 +322,11 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF, if (XFI->hasEHSpillSlot()) { // The unwinder requires stack slot & CFI offsets for the exception info. // We do not save/spill these registers. - SmallVector SpillList; - GetEHSpillList(SpillList, MFI, XFI, + const Function *Fn = MF.getFunction(); + const Constant *PersonalityFn = + Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr; + SmallVector SpillList; + GetEHSpillList(SpillList, MFI, XFI, PersonalityFn, MF.getSubtarget().getTargetLowering()); assert(SpillList.size()==2 && "Unexpected SpillList size"); EmitCfiOffset(MBB, MBBI, dl, TII, MMI, @@ -355,8 +357,12 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF, if (RetOpcode == XCore::EH_RETURN) { // 'Restore' the exception info the unwinder has placed into the stack // slots. - SmallVector SpillList; - GetEHSpillList(SpillList, MFI, XFI, MF.getSubtarget().getTargetLowering()); + const Function *Fn = MF.getFunction(); + const Constant *PersonalityFn = + Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr; + SmallVector SpillList; + GetEHSpillList(SpillList, MFI, XFI, PersonalityFn, + MF.getSubtarget().getTargetLowering()); RestoreSpillList(MBB, MBBI, dl, TII, RemainingAdj, SpillList); // Return to the landing pad. diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 9d4a966dfba4..9f61c84cd445 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -151,8 +151,9 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { MVT::Other, CPIdx, CurDAG->getEntryNode()); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = MF->getMachineMemOperand( - MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, 4, 4); + MemOp[0] = + MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF), + MachineMemOperand::MOLoad, 4, 4); cast(node)->setMemRefs(MemOp, MemOp + 1); return node; } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index d62e7428299d..105b2cfb1be6 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -79,9 +79,6 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM, // Compute derived properties from the register classes computeRegisterProperties(Subtarget.getRegisterInfo()); - // Division is expensive - setIntDivIsCheap(false); - setStackPointerRegisterToSaveRestore(XCore::SP); setSchedulingPreference(Sched::Source); @@ -154,8 +151,6 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM, // Exception handling setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); - setExceptionPointerRegister(XCore::R0); - setExceptionSelectorRegister(XCore::R1); setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); // Atomic operations @@ -839,7 +834,7 @@ LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); return DAG.getLoad( getPointerTy(DAG.getDataLayout()), SDLoc(Op), DAG.getEntryNode(), FIN, - MachinePointerInfo::getFixedStack(FI), false, false, false, 0); + MachinePointerInfo::getFixedStack(MF, FI), false, false, false, 0); } SDValue XCoreTargetLowering:: @@ -1367,8 +1362,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); ArgIn = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, false, 0); + MachinePointerInfo::getFixedStack(MF, FI), false, + false, false, 0); } const ArgDataPair ADP = { ArgIn, Ins[i].Flags }; ArgData.push_back(ADP); @@ -1517,9 +1512,10 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, // Create a SelectionDAG node corresponding to a store // to this memory location. SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - MemOpChains.push_back(DAG.getStore(Chain, dl, OutVals[i], FIN, - MachinePointerInfo::getFixedStack(FI), false, false, - 0)); + MemOpChains.push_back(DAG.getStore( + Chain, dl, OutVals[i], FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false, + false, 0)); } // Transform all store nodes into one single node because @@ -1567,8 +1563,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // to set, the condition code register to branch on, the true/false values to // select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; + MachineFunction::iterator It = ++BB->getIterator(); // thisMBB: // ... @@ -1828,9 +1823,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, SDValue Chain = ST->getChain(); unsigned StoreBits = ST->getMemoryVT().getStoreSizeInBits(); - if (StoreBits % 8) { - break; - } + assert((StoreBits % 8) == 0 && + "Store size in bits must be a multiple of 8"); unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment( ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext())); unsigned Alignment = ST->getAlignment(); diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index ddd675c5164d..b6f09ff418b5 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -125,6 +125,20 @@ namespace llvm { bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + unsigned + getExceptionPointerRegister(const Constant *PersonalityFn) const override { + return XCore::R0; + } + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + unsigned + getExceptionSelectorRegister(const Constant *PersonalityFn) const override { + return XCore::R1; + } + private: const TargetMachine &TM; const XCoreSubtarget &Subtarget; diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index ee30344dcc25..e4129aee9479 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -368,11 +368,10 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, DL = I->getDebugLoc(); MachineFunction *MF = MBB.getParent(); const MachineFrameInfo &MFI = *MF->getFrameInfo(); - MachineMemOperand *MMO = - MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIndex), - MachineMemOperand::MOStore, - MFI.getObjectSize(FrameIndex), - MFI.getObjectAlignment(FrameIndex)); + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FrameIndex), + MachineMemOperand::MOStore, MFI.getObjectSize(FrameIndex), + MFI.getObjectAlignment(FrameIndex)); BuildMI(MBB, I, DL, get(XCore::STWFI)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FrameIndex) @@ -391,11 +390,10 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, DL = I->getDebugLoc(); MachineFunction *MF = MBB.getParent(); const MachineFrameInfo &MFI = *MF->getFrameInfo(); - MachineMemOperand *MMO = - MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIndex), - MachineMemOperand::MOLoad, - MFI.getObjectSize(FrameIndex), - MFI.getObjectAlignment(FrameIndex)); + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FrameIndex), + MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex), + MFI.getObjectAlignment(FrameIndex)); BuildMI(MBB, I, DL, get(XCore::LDWFI), DestReg) .addFrameIndex(FrameIndex) .addImm(0) diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp index 996c6f59346d..f0b720151b17 100644 --- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp +++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp @@ -228,12 +228,9 @@ bool XCoreLowerThreadLocal::runOnModule(Module &M) { // Find thread local globals. bool MadeChange = false; SmallVector ThreadLocalGlobals; - for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); - GVI != E; ++GVI) { - GlobalVariable *GV = GVI; - if (GV->isThreadLocal()) - ThreadLocalGlobals.push_back(GV); - } + for (GlobalVariable &GV : M.globals()) + if (GV.isThreadLocal()) + ThreadLocalGlobals.push_back(&GV); for (unsigned I = 0, E = ThreadLocalGlobals.size(); I != E; ++I) { MadeChange |= lowerGlobal(ThreadLocalGlobals[I]); } diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp index 9ef9752d0a5b..6c770969e32e 100644 --- a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp +++ b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp @@ -1,4 +1,4 @@ -//===-- XCoreMachineFuctionInfo.cpp - XCore machine function info ---------===// +//===-- XCoreMachineFunctionInfo.cpp - XCore machine function info --------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h index 078ffde18fb9..cdcc52fdc32d 100644 --- a/lib/Target/XCore/XCoreMachineFunctionInfo.h +++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h @@ -1,4 +1,4 @@ -//===-- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===// +//===- XCoreMachineFunctionInfo.h - XCore machine function info -*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index f420081868f9..4a79dac0bed9 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -85,7 +85,7 @@ extern "C" void LLVMInitializeXCoreTarget() { } TargetIRAnalysis XCoreTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](Function &F) { + return TargetIRAnalysis([this](const Function &F) { return TargetTransformInfo(XCoreTTIImpl(this, F)); }); } diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp index b5a99058f46e..aa16ecc148db 100644 --- a/lib/Target/XCore/XCoreTargetObjectFile.cpp +++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp @@ -123,18 +123,21 @@ XCoreTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, if (Kind.isMergeableConst16()) return MergeableConst16Section; } Type *ObjType = GV->getType()->getPointerElementType(); + auto &DL = GV->getParent()->getDataLayout(); if (TM.getCodeModel() == CodeModel::Small || !ObjType->isSized() || - TM.getDataLayout()->getTypeAllocSize(ObjType) < CodeModelLargeSize) { + DL.getTypeAllocSize(ObjType) < CodeModelLargeSize) { if (Kind.isReadOnly()) return UseCPRel? ReadOnlySection : DataRelROSection; if (Kind.isBSS() || Kind.isCommon())return BSSSection; - if (Kind.isDataRel()) return DataSection; + if (Kind.isData()) + return DataSection; if (Kind.isReadOnlyWithRel()) return DataRelROSection; } else { if (Kind.isReadOnly()) return UseCPRel? ReadOnlySectionLarge : DataRelROSectionLarge; if (Kind.isBSS() || Kind.isCommon())return BSSSectionLarge; - if (Kind.isDataRel()) return DataSectionLarge; + if (Kind.isData()) + return DataSectionLarge; if (Kind.isReadOnlyWithRel()) return DataRelROSectionLarge; } @@ -142,9 +145,8 @@ XCoreTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, report_fatal_error("Target does not support TLS or Common sections"); } -MCSection * -XCoreTargetObjectFile::getSectionForConstant(SectionKind Kind, - const Constant *C) const { +MCSection *XCoreTargetObjectFile::getSectionForConstant( + const DataLayout &DL, SectionKind Kind, const Constant *C) const { if (Kind.isMergeableConst4()) return MergeableConst4Section; if (Kind.isMergeableConst8()) return MergeableConst8Section; if (Kind.isMergeableConst16()) return MergeableConst16Section; diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h index 2a5ac238a447..6701c661a73e 100644 --- a/lib/Target/XCore/XCoreTargetObjectFile.h +++ b/lib/Target/XCore/XCoreTargetObjectFile.h @@ -33,7 +33,7 @@ static const unsigned CodeModelLargeSize = 256; Mangler &Mang, const TargetMachine &TM) const override; - MCSection *getSectionForConstant(SectionKind Kind, + MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, const Constant *C) const override; }; } // end namespace llvm diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.h b/lib/Target/XCore/XCoreTargetTransformInfo.h index e23aef3e3b4a..b2cb889f1fc0 100644 --- a/lib/Target/XCore/XCoreTargetTransformInfo.h +++ b/lib/Target/XCore/XCoreTargetTransformInfo.h @@ -37,7 +37,7 @@ class XCoreTTIImpl : public BasicTTIImplBase { const XCoreTargetLowering *getTLI() const { return TLI; } public: - explicit XCoreTTIImpl(const XCoreTargetMachine *TM, Function &F) + explicit XCoreTTIImpl(const XCoreTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {} diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 4762011d63d8..0e05129b5261 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -34,8 +34,11 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" @@ -63,7 +66,8 @@ namespace { /// struct ArgPromotion : public CallGraphSCCPass { void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); + AU.addRequired(); + AU.addRequired(); CallGraphSCCPass::getAnalysisUsage(AU); } @@ -81,7 +85,8 @@ namespace { bool isDenselyPacked(Type *type, const DataLayout &DL); bool canPaddingBeAccessed(Argument *Arg); CallGraphNode *PromoteArguments(CallGraphNode *CGN); - bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; + bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, + AAResults &AAR) const; CallGraphNode *DoPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, SmallPtrSetImpl &ByValArgsToTransform); @@ -90,15 +95,15 @@ namespace { bool doInitialization(CallGraph &CG) override; /// The maximum number of elements to expand, or 0 for unlimited. unsigned maxElements; - DenseMap FunctionDIs; }; } char ArgPromotion::ID = 0; INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", "Promote 'by reference' arguments to scalars", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(ArgPromotion, "argpromotion", "Promote 'by reference' arguments to scalars", false, false) @@ -217,9 +222,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // First check: see if there are any pointer arguments! If not, quick exit. SmallVector PointerArgs; - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) - if (I->getType()->isPointerTy()) - PointerArgs.push_back(I); + for (Argument &I : F->args()) + if (I.getType()->isPointerTy()) + PointerArgs.push_back(&I); if (PointerArgs.empty()) return nullptr; // Second check: make sure that all callers are direct callers. We can't @@ -237,6 +242,14 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { const DataLayout &DL = F->getParent()->getDataLayout(); + // We need to manually construct BasicAA directly in order to disable its use + // of other function analyses. + BasicAAResult BAR(createLegacyPMBasicAAResult(*this, *F)); + + // Construct our own AA results for this function. We do this manually to + // work around the limitations of the legacy pass manager. + AAResults AAR(createLegacyPMAAResults(*this, *F, BAR)); + // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet ArgsToPromote; @@ -281,8 +294,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // If all the elements are single-value types, we can promote it. bool AllSimple = true; - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - if (!STy->getElementType(i)->isSingleValueType()) { + for (const auto *EltTy : STy->elements()) { + if (!EltTy->isSingleValueType()) { AllSimple = false; break; } @@ -303,8 +316,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { if (isSelfRecursive) { if (StructType *STy = dyn_cast(AgTy)) { bool RecursiveType = false; - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - if (STy->getElementType(i) == PtrArg->getType()) { + for (const auto *EltTy : STy->elements()) { + if (EltTy == PtrArg->getType()) { RecursiveType = true; break; } @@ -315,7 +328,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { } // Otherwise, see if we can promote the pointer to its value. - if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr())) + if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR)) ArgsToPromote.insert(PtrArg); } @@ -416,7 +429,8 @@ static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark, /// elements of the aggregate in order to avoid exploding the number of /// arguments passed in. bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, - bool isByValOrInAlloca) const { + bool isByValOrInAlloca, + AAResults &AAR) const { typedef std::set GEPIndicesSet; // Quick exit for unused arguments @@ -453,12 +467,11 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, // First, iterate the entry block and mark loads of (geps of) arguments as // safe. - BasicBlock *EntryBlock = Arg->getParent()->begin(); + BasicBlock &EntryBlock = Arg->getParent()->front(); // Declare this here so we can reuse it IndicesVector Indices; - for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end(); - I != E; ++I) - if (LoadInst *LI = dyn_cast(I)) { + for (Instruction &I : EntryBlock) + if (LoadInst *LI = dyn_cast(&I)) { Value *V = LI->getPointerOperand(); if (GetElementPtrInst *GEP = dyn_cast(V)) { V = GEP->getPointerOperand(); @@ -501,12 +514,11 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, if (GEP->use_empty()) { // Dead GEP's cause trouble later. Just remove them if we run into // them. - getAnalysis().deleteValue(GEP); GEP->eraseFromParent(); // TODO: This runs the above loop over and over again for dead GEPs // Couldn't we just do increment the UI iterator earlier and erase the // use? - return isSafeToPromoteArgument(Arg, isByValOrInAlloca); + return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR); } // Ensure that all of the indices are constants. @@ -563,8 +575,6 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, // blocks we know to be transparent to the load. SmallPtrSet TranspBlocks; - AliasAnalysis &AA = getAnalysis(); - for (unsigned i = 0, e = Loads.size(); i != e; ++i) { // Check to see if the load is invalidated from the start of the block to // the load itself. @@ -572,8 +582,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, BasicBlock *BB = Load->getParent(); MemoryLocation Loc = MemoryLocation::get(Load); - if (AA.canInstructionRangeModRef(BB->front(), *Load, Loc, - AliasAnalysis::Mod)) + if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod)) return false; // Pointer is invalidated! // Now check every path from the entry block to the load for transparency. @@ -581,7 +590,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, // loading block. for (BasicBlock *P : predecessors(BB)) { for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks)) - if (AA.canBasicBlockModify(*TranspBB, Loc)) + if (AAR.canBasicBlockModify(*TranspBB, Loc)) return false; } } @@ -637,13 +646,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, unsigned ArgIndex = 1; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgIndex) { - if (ByValArgsToTransform.count(I)) { + if (ByValArgsToTransform.count(&*I)) { // Simple byval argument? Just add all the struct element types. Type *AgTy = cast(I->getType())->getElementType(); StructType *STy = cast(AgTy); Params.insert(Params.end(), STy->element_begin(), STy->element_end()); ++NumByValArgsPromoted; - } else if (!ArgsToPromote.count(I)) { + } else if (!ArgsToPromote.count(&*I)) { // Unchanged argument Params.push_back(I->getType()); AttributeSet attrs = PAL.getParamAttributes(ArgIndex); @@ -661,7 +670,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // In this table, we will track which indices are loaded from the argument // (where direct loads are tracked as no indices). - ScalarizeTable &ArgIndices = ScalarizedElements[I]; + ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; for (User *U : I->users()) { Instruction *UI = cast(U); Type *SrcTy; @@ -687,7 +696,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, else // Take any load, we will use it only to update Alias Analysis OrigLoad = cast(UI->user_back()); - OriginalLoads[std::make_pair(I, Indices)] = OrigLoad; + OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; } // Add a parameter to the function for each element passed in. @@ -722,15 +731,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, NF->copyAttributesFrom(F); // Patch the pointer to LLVM function in debug info descriptor. - auto DI = FunctionDIs.find(F); - if (DI != FunctionDIs.end()) { - DISubprogram *SP = DI->second; - SP->replaceFunction(NF); - // Ensure the map is updated so it can be reused on subsequent argument - // promotions of the same function. - FunctionDIs.erase(DI); - FunctionDIs[NF] = SP; - } + NF->setSubprogram(F->getSubprogram()); + F->setSubprogram(nullptr); DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); @@ -740,13 +742,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec)); AttributesVec.clear(); - F->getParent()->getFunctionList().insert(F, NF); + F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); - // Get the alias analysis information that we need to update to reflect our - // changes. - AliasAnalysis &AA = getAnalysis(); - // Get the callgraph information that we need to update to reflect our // changes. CallGraph &CG = getAnalysis().getCallGraph(); @@ -775,7 +773,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, ArgIndex = 1; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++AI, ++ArgIndex) - if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { + if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { Args.push_back(*AI); // Unmodified argument if (CallPAL.hasAttributes(ArgIndex)) { @@ -783,7 +781,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, AttributesVec. push_back(AttributeSet::get(F->getContext(), Args.size(), B)); } - } else if (ByValArgsToTransform.count(I)) { + } else if (ByValArgsToTransform.count(&*I)) { // Emit a GEP and load for each element of the struct. Type *AgTy = cast(I->getType())->getElementType(); StructType *STy = cast(AgTy); @@ -798,14 +796,14 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } } else if (!I->use_empty()) { // Non-dead argument: insert GEPs and loads as appropriate. - ScalarizeTable &ArgIndices = ScalarizedElements[I]; + ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; // Store the Value* version of the indices in here, but declare it now // for reuse. std::vector Ops; for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { Value *V = *AI; - LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, SI->second)]; + LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, SI->second)]; if (!SI->second.empty()) { Ops.reserve(SI->second.size()); Type *ElTy = V->getType(); @@ -873,10 +871,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Args.clear(); AttributesVec.clear(); - // Update the alias analysis implementation to know that we are replacing - // the old call with a new one. - AA.replaceWithNewValue(Call, New); - // Update the callgraph to know that the callsite has been transformed. CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN); @@ -901,20 +895,19 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { - if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { + if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { // If this is an unmodified argument, move the name and users over to the // new version. - I->replaceAllUsesWith(I2); - I2->takeName(I); - AA.replaceWithNewValue(I, I2); + I->replaceAllUsesWith(&*I2); + I2->takeName(&*I); ++I2; continue; } - if (ByValArgsToTransform.count(I)) { + if (ByValArgsToTransform.count(&*I)) { // In the callee, we create an alloca, and store each of the new incoming // arguments into the alloca. - Instruction *InsertPt = NF->begin()->begin(); + Instruction *InsertPt = &NF->begin()->front(); // Just add all the struct element types. Type *AgTy = cast(I->getType())->getElementType(); @@ -929,13 +922,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), InsertPt); I2->setName(I->getName()+"."+Twine(i)); - new StoreInst(I2++, Idx, InsertPt); + new StoreInst(&*I2++, Idx, InsertPt); } // Anything that used the arg should now use the alloca. I->replaceAllUsesWith(TheAlloca); - TheAlloca->takeName(I); - AA.replaceWithNewValue(I, TheAlloca); + TheAlloca->takeName(&*I); // If the alloca is used in a call, we must clear the tail flag since // the callee now uses an alloca from the caller. @@ -948,23 +940,20 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, continue; } - if (I->use_empty()) { - AA.deleteValue(I); + if (I->use_empty()) continue; - } // Otherwise, if we promoted this argument, then all users are load // instructions (or GEPs with only load users), and all loads should be // using the new argument that we added. - ScalarizeTable &ArgIndices = ScalarizedElements[I]; + ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; while (!I->use_empty()) { if (LoadInst *LI = dyn_cast(I->user_back())) { assert(ArgIndices.begin()->second.empty() && "Load element should sort to front!"); I2->setName(I->getName()+".val"); - LI->replaceAllUsesWith(I2); - AA.replaceWithNewValue(LI, I2); + LI->replaceAllUsesWith(&*I2); LI->eraseFromParent(); DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() << "' in function '" << F->getName() << "'\n"); @@ -1000,11 +989,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // the argument specified by ArgNo. while (!GEP->use_empty()) { LoadInst *L = cast(GEP->user_back()); - L->replaceAllUsesWith(TheArg); - AA.replaceWithNewValue(L, TheArg); + L->replaceAllUsesWith(&*TheArg); L->eraseFromParent(); } - AA.deleteValue(GEP); GEP->eraseFromParent(); } } @@ -1013,10 +1000,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, std::advance(I2, ArgIndices.size()); } - // Tell the alias analysis that the old function is about to disappear. - AA.replaceWithNewValue(F, NF); - - NF_CGN->stealCalledFunctionsFrom(CG[F]); // Now that the old function is dead, delete it. If there is a dangling @@ -1032,6 +1015,5 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } bool ArgPromotion::doInitialization(CallGraph &CG) { - FunctionDIs = makeSubprogramMap(CG.getModule()); return CallGraphSCCPass::doInitialization(CG); } diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 336dac45e13a..351b88fe2aa0 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -2,14 +2,18 @@ add_llvm_library(LLVMipo ArgumentPromotion.cpp BarrierNoopPass.cpp ConstantMerge.cpp + CrossDSOCFI.cpp DeadArgumentElimination.cpp ElimAvailExtern.cpp ExtractGV.cpp + ForceFunctionAttrs.cpp FunctionAttrs.cpp + FunctionImport.cpp GlobalDCE.cpp GlobalOpt.cpp IPConstantPropagation.cpp IPO.cpp + InferFunctionAttrs.cpp InlineAlways.cpp InlineSimple.cpp Inliner.cpp @@ -20,6 +24,7 @@ add_llvm_library(LLVMipo PartialInlining.cpp PassManagerBuilder.cpp PruneEH.cpp + SampleProfile.cpp StripDeadPrototypes.cpp StripSymbols.cpp diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 8ce7646621ff..0aa49d6fde01 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -119,7 +119,7 @@ bool ConstantMerge::runOnModule(Module &M) { // First: Find the canonical constants others will be merged with. for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { - GlobalVariable *GV = GVI++; + GlobalVariable *GV = &*GVI++; // If this GV is dead, remove it. GV->removeDeadConstantUsers(); @@ -160,7 +160,7 @@ bool ConstantMerge::runOnModule(Module &M) { // invalidating the Constant* pointers in CMap. for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { - GlobalVariable *GV = GVI++; + GlobalVariable *GV = &*GVI++; // Only process constants with initializers in the default address space. if (!GV->isConstant() || !GV->hasDefinitiveInitializer() || diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp new file mode 100644 index 000000000000..5bbb7513005c --- /dev/null +++ b/lib/Transforms/IPO/CrossDSOCFI.cpp @@ -0,0 +1,166 @@ +//===-- CrossDSOCFI.cpp - Externalize this module's CFI checks ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass exports all llvm.bitset's found in the module in the form of a +// __cfi_check function, which can be used to verify cross-DSO call targets. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "cross-dso-cfi" + +STATISTIC(TypeIds, "Number of unique type identifiers"); + +namespace { + +struct CrossDSOCFI : public ModulePass { + static char ID; + CrossDSOCFI() : ModulePass(ID) { + initializeCrossDSOCFIPass(*PassRegistry::getPassRegistry()); + } + + Module *M; + MDNode *VeryLikelyWeights; + + ConstantInt *extractBitSetTypeId(MDNode *MD); + void buildCFICheck(); + + bool doInitialization(Module &M) override; + bool runOnModule(Module &M) override; +}; + +} // anonymous namespace + +INITIALIZE_PASS_BEGIN(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false, + false) +INITIALIZE_PASS_END(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false, false) +char CrossDSOCFI::ID = 0; + +ModulePass *llvm::createCrossDSOCFIPass() { return new CrossDSOCFI; } + +bool CrossDSOCFI::doInitialization(Module &Mod) { + M = &Mod; + VeryLikelyWeights = + MDBuilder(M->getContext()).createBranchWeights((1U << 20) - 1, 1); + + return false; +} + +/// extractBitSetTypeId - Extracts TypeId from a hash-based bitset MDNode. +ConstantInt *CrossDSOCFI::extractBitSetTypeId(MDNode *MD) { + // This check excludes vtables for classes inside anonymous namespaces. + auto TM = dyn_cast(MD->getOperand(0)); + if (!TM) + return nullptr; + auto C = dyn_cast_or_null(TM->getValue()); + if (!C) return nullptr; + // We are looking for i64 constants. + if (C->getBitWidth() != 64) return nullptr; + + // Sanity check. + auto FM = dyn_cast_or_null(MD->getOperand(1)); + // Can be null if a function was removed by an optimization. + if (FM) { + auto F = dyn_cast(FM->getValue()); + // But can never be a function declaration. + assert(!F || !F->isDeclaration()); + (void)F; // Suppress unused variable warning in the no-asserts build. + } + return C; +} + +/// buildCFICheck - emits __cfi_check for the current module. +void CrossDSOCFI::buildCFICheck() { + // FIXME: verify that __cfi_check ends up near the end of the code section, + // but before the jump slots created in LowerBitSets. + llvm::DenseSet BitSetIds; + NamedMDNode *BitSetNM = M->getNamedMetadata("llvm.bitsets"); + + if (BitSetNM) + for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) + if (ConstantInt *TypeId = extractBitSetTypeId(BitSetNM->getOperand(I))) + BitSetIds.insert(TypeId->getZExtValue()); + + LLVMContext &Ctx = M->getContext(); + Constant *C = M->getOrInsertFunction( + "__cfi_check", + FunctionType::get( + Type::getVoidTy(Ctx), + {Type::getInt64Ty(Ctx), PointerType::getUnqual(Type::getInt8Ty(Ctx))}, + false)); + Function *F = dyn_cast(C); + F->setAlignment(4096); + auto args = F->arg_begin(); + Argument &CallSiteTypeId = *(args++); + CallSiteTypeId.setName("CallSiteTypeId"); + Argument &Addr = *(args++); + Addr.setName("Addr"); + assert(args == F->arg_end()); + + BasicBlock *BB = BasicBlock::Create(Ctx, "entry", F); + + BasicBlock *TrapBB = BasicBlock::Create(Ctx, "trap", F); + IRBuilder<> IRBTrap(TrapBB); + Function *TrapFn = Intrinsic::getDeclaration(M, Intrinsic::trap); + llvm::CallInst *TrapCall = IRBTrap.CreateCall(TrapFn); + TrapCall->setDoesNotReturn(); + TrapCall->setDoesNotThrow(); + IRBTrap.CreateUnreachable(); + + BasicBlock *ExitBB = BasicBlock::Create(Ctx, "exit", F); + IRBuilder<> IRBExit(ExitBB); + IRBExit.CreateRetVoid(); + + IRBuilder<> IRB(BB); + SwitchInst *SI = IRB.CreateSwitch(&CallSiteTypeId, TrapBB, BitSetIds.size()); + for (uint64_t TypeId : BitSetIds) { + ConstantInt *CaseTypeId = ConstantInt::get(Type::getInt64Ty(Ctx), TypeId); + BasicBlock *TestBB = BasicBlock::Create(Ctx, "test", F); + IRBuilder<> IRBTest(TestBB); + Function *BitsetTestFn = + Intrinsic::getDeclaration(M, Intrinsic::bitset_test); + + Value *Test = IRBTest.CreateCall( + BitsetTestFn, {&Addr, MetadataAsValue::get( + Ctx, ConstantAsMetadata::get(CaseTypeId))}); + BranchInst *BI = IRBTest.CreateCondBr(Test, ExitBB, TrapBB); + BI->setMetadata(LLVMContext::MD_prof, VeryLikelyWeights); + + SI->addCase(CaseTypeId, TestBB); + ++TypeIds; + } +} + +bool CrossDSOCFI::runOnModule(Module &M) { + if (M.getModuleFlag("Cross-DSO CFI") == nullptr) + return false; + buildCFICheck(); + return true; +} diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index d0447640259e..4de3d95ab11d 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -35,6 +35,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include #include #include @@ -121,14 +122,6 @@ namespace { typedef SmallVector UseVector; - // Map each LLVM function to corresponding metadata with debug info. If - // the function is replaced with another one, we should patch the pointer - // to LLVM function in metadata. - // As the code generation for module is finished (and DIBuilder is - // finalized) we assume that subprogram descriptors won't be changed, and - // they are stored in map for short duration anyway. - DenseMap FunctionDIs; - protected: // DAH uses this to specify a different ID. explicit DAE(char &ID) : ModulePass(ID) {} @@ -198,6 +191,13 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { if (Fn.hasAddressTaken()) return false; + // Don't touch naked functions. The assembly might be using an argument, or + // otherwise rely on the frame layout in a way that this analysis will not + // see. + if (Fn.hasFnAttribute(Attribute::Naked)) { + return false; + } + // Okay, we know we can transform this function if safe. Scan its body // looking for calls marked musttail or calls to llvm.vastart. for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { @@ -229,7 +229,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // Create the new function body and insert it into the module... Function *NF = Function::Create(NFTy, Fn.getLinkage()); NF->copyAttributesFrom(&Fn); - Fn.getParent()->getFunctionList().insert(&Fn, NF); + Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF); NF->takeName(&Fn); // Loop over all of the callers of the function, transforming the call sites @@ -296,20 +296,12 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(), I2 = NF->arg_begin(); I != E; ++I, ++I2) { // Move the name and users over to the new version. - I->replaceAllUsesWith(I2); - I2->takeName(I); + I->replaceAllUsesWith(&*I2); + I2->takeName(&*I); } // Patch the pointer to LLVM function in debug info descriptor. - auto DI = FunctionDIs.find(&Fn); - if (DI != FunctionDIs.end()) { - DISubprogram *SP = DI->second; - SP->replaceFunction(NF); - // Ensure the map is updated so it can be reused on non-varargs argument - // eliminations of the same function. - FunctionDIs.erase(DI); - FunctionDIs[NF] = SP; - } + NF->setSubprogram(Fn.getSubprogram()); // Fix up any BlockAddresses that refer to the function. Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType())); @@ -345,16 +337,19 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg()) return false; + // Don't touch naked functions. The assembly might be using an argument, or + // otherwise rely on the frame layout in a way that this analysis will not + // see. + if (Fn.hasFnAttribute(Attribute::Naked)) + return false; + if (Fn.use_empty()) return false; SmallVector UnusedArgs; - for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); - I != E; ++I) { - Argument *Arg = I; - - if (Arg->use_empty() && !Arg->hasByValOrInAllocaAttr()) - UnusedArgs.push_back(Arg->getArgNo()); + for (Argument &Arg : Fn.args()) { + if (Arg.use_empty() && !Arg.hasByValOrInAllocaAttr()) + UnusedArgs.push_back(Arg.getArgNo()); } if (UnusedArgs.empty()) @@ -485,6 +480,10 @@ DAE::Liveness DAE::SurveyUse(const Use *U, if (F) { // Used in a direct call. + // The function argument is live if it is used as a bundle operand. + if (CS.isBundleOperand(U)) + return Live; + // Find the argument number. We know for sure that this use is an // argument, since if it was the function argument this would be an // indirect call and the we know can't be looking at a value of the @@ -543,6 +542,14 @@ void DAE::SurveyFunction(const Function &F) { return; } + // Don't touch naked functions. The assembly might be using an argument, or + // otherwise rely on the frame layout in a way that this analysis will not + // see. + if (F.hasFnAttribute(Attribute::Naked)) { + MarkLive(F); + return; + } + unsigned RetCount = NumRetVals(&F); // Assume all return values are dead typedef SmallVector RetVals; @@ -648,7 +655,7 @@ void DAE::SurveyFunction(const Function &F) { } else { // See what the effect of this use is (recording any uses that cause // MaybeLive in MaybeLiveArgUses). - Result = SurveyUses(AI, MaybeLiveArgUses); + Result = SurveyUses(&*AI, MaybeLiveArgUses); } // Mark the result. @@ -878,7 +885,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { NF->setAttributes(NewPAL); // Insert the new function before the old function, so we won't be processing // it again. - F->getParent()->getFunctionList().insert(F, NF); + F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); // Loop over all of the callers of the function, transforming the call sites @@ -946,7 +953,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Instruction *New; if (InvokeInst *II = dyn_cast(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args, "", Call); + Args, "", Call->getParent()); cast(New)->setCallingConv(CS.getCallingConv()); cast(New)->setAttributes(NewCallPAL); } else { @@ -976,9 +983,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { " must have been a struct or an array!"); Instruction *InsertPt = Call; if (InvokeInst *II = dyn_cast(Call)) { - BasicBlock::iterator IP = II->getNormalDest()->begin(); - while (isa(IP)) ++IP; - InsertPt = IP; + BasicBlock *NewEdge = SplitEdge(New->getParent(), II->getNormalDest()); + InsertPt = &*NewEdge->getFirstInsertionPt(); } // We used to return a struct or array. Instead of doing smart stuff @@ -1026,8 +1032,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (ArgAlive[i]) { // If this is a live argument, move the name and users over to the new // version. - I->replaceAllUsesWith(I2); - I2->takeName(I); + I->replaceAllUsesWith(&*I2); + I2->takeName(&*I); ++I2; } else { // If this argument is dead, replace any uses of it with null constants @@ -1079,9 +1085,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } // Patch the pointer to LLVM function in debug info descriptor. - auto DI = FunctionDIs.find(F); - if (DI != FunctionDIs.end()) - DI->second->replaceFunction(NF); + NF->setSubprogram(F->getSubprogram()); // Now that the old function is dead, delete it. F->eraseFromParent(); @@ -1092,9 +1096,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { bool DAE::runOnModule(Module &M) { bool Changed = false; - // Collect debug info descriptors for functions. - FunctionDIs = makeSubprogramMap(M); - // First pass: Do a simple check to see if any functions can have their "..." // removed. We can do this if they never call va_start. This loop cannot be // fused with the next loop, because deleting a function invalidates @@ -1119,7 +1120,7 @@ bool DAE::runOnModule(Module &M) { for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { // Increment now, because the function will probably get removed (ie. // replaced by a new one). - Function *F = I++; + Function *F = &*I++; Changed |= RemoveDeadStuffFromFunction(F); } diff --git a/lib/Transforms/IPO/ElimAvailExtern.cpp b/lib/Transforms/IPO/ElimAvailExtern.cpp index 67ba72d6a360..af313a6b001d 100644 --- a/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -1,4 +1,5 @@ -//===-- ElimAvailExtern.cpp - DCE unreachable internal functions ----------------===// +//===-- ElimAvailExtern.cpp - DCE unreachable internal functions +//----------------===// // // The LLVM Compiler Infrastructure // @@ -15,9 +16,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/Pass.h" using namespace llvm; @@ -28,18 +27,18 @@ STATISTIC(NumFunctions, "Number of functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); namespace { - struct EliminateAvailableExternally : public ModulePass { - static char ID; // Pass identification, replacement for typeid - EliminateAvailableExternally() : ModulePass(ID) { - initializeEliminateAvailableExternallyPass( - *PassRegistry::getPassRegistry()); - } +struct EliminateAvailableExternally : public ModulePass { + static char ID; // Pass identification, replacement for typeid + EliminateAvailableExternally() : ModulePass(ID) { + initializeEliminateAvailableExternallyPass( + *PassRegistry::getPassRegistry()); + } - // run - Do the EliminateAvailableExternally pass on the specified module, - // optionally updating the specified callgraph to reflect the changes. - // - bool runOnModule(Module &M) override; - }; + // run - Do the EliminateAvailableExternally pass on the specified module, + // optionally updating the specified callgraph to reflect the changes. + // + bool runOnModule(Module &M) override; +}; } char EliminateAvailableExternally::ID = 0; @@ -54,30 +53,31 @@ bool EliminateAvailableExternally::runOnModule(Module &M) { bool Changed = false; // Drop initializers of available externally global variables. - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - if (!I->hasAvailableExternallyLinkage()) + for (GlobalVariable &GV : M.globals()) { + if (!GV.hasAvailableExternallyLinkage()) continue; - if (I->hasInitializer()) { - Constant *Init = I->getInitializer(); - I->setInitializer(nullptr); + if (GV.hasInitializer()) { + Constant *Init = GV.getInitializer(); + GV.setInitializer(nullptr); if (isSafeToDestroyConstant(Init)) Init->destroyConstant(); } - I->removeDeadConstantUsers(); - I->setLinkage(GlobalValue::ExternalLinkage); + GV.removeDeadConstantUsers(); + GV.setLinkage(GlobalValue::ExternalLinkage); NumVariables++; + Changed = true; } // Drop the bodies of available externally functions. - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - if (!I->hasAvailableExternallyLinkage()) + for (Function &F : M) { + if (!F.hasAvailableExternallyLinkage()) continue; - if (!I->isDeclaration()) + if (!F.isDeclaration()) // This will set the linkage to external - I->deleteBody(); - I->removeDeadConstantUsers(); + F.deleteBody(); + F.removeDeadConstantUsers(); NumFunctions++; + Changed = true; } return Changed; diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index b9462f2ffc72..1a3b9253d72f 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -83,7 +83,7 @@ namespace { for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { bool Delete = - deleteStuff == (bool)Named.count(I) && !I->isDeclaration(); + deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration(); if (!Delete) { if (I->hasAvailableExternallyLinkage()) continue; @@ -103,7 +103,7 @@ namespace { // Visit the Functions. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { bool Delete = - deleteStuff == (bool)Named.count(I) && !I->isDeclaration(); + deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration(); if (!Delete) { if (I->hasAvailableExternallyLinkage()) continue; @@ -124,7 +124,7 @@ namespace { Module::alias_iterator CurI = I; ++I; - bool Delete = deleteStuff == (bool)Named.count(CurI); + bool Delete = deleteStuff == (bool)Named.count(&*CurI); makeVisible(*CurI, Delete); if (Delete) { @@ -143,7 +143,7 @@ namespace { } CurI->replaceAllUsesWith(Declaration); - delete CurI; + delete &*CurI; } } diff --git a/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/lib/Transforms/IPO/ForceFunctionAttrs.cpp new file mode 100644 index 000000000000..816291dac9e8 --- /dev/null +++ b/lib/Transforms/IPO/ForceFunctionAttrs.cpp @@ -0,0 +1,121 @@ +//===- ForceFunctionAttrs.cpp - Force function attrs for debugging --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/ForceFunctionAttrs.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "forceattrs" + +static cl::list + ForceAttributes("force-attribute", cl::Hidden, + cl::desc("Add an attribute to a function. This should be a " + "pair of 'function-name:attribute-name', for " + "example -force-add-attribute=foo:noinline. This " + "option can be specified multiple times.")); + +static Attribute::AttrKind parseAttrKind(StringRef Kind) { + return StringSwitch(Kind) + .Case("alwaysinline", Attribute::AlwaysInline) + .Case("builtin", Attribute::Builtin) + .Case("cold", Attribute::Cold) + .Case("convergent", Attribute::Convergent) + .Case("inlinehint", Attribute::InlineHint) + .Case("jumptable", Attribute::JumpTable) + .Case("minsize", Attribute::MinSize) + .Case("naked", Attribute::Naked) + .Case("nobuiltin", Attribute::NoBuiltin) + .Case("noduplicate", Attribute::NoDuplicate) + .Case("noimplicitfloat", Attribute::NoImplicitFloat) + .Case("noinline", Attribute::NoInline) + .Case("nonlazybind", Attribute::NonLazyBind) + .Case("noredzone", Attribute::NoRedZone) + .Case("noreturn", Attribute::NoReturn) + .Case("norecurse", Attribute::NoRecurse) + .Case("nounwind", Attribute::NoUnwind) + .Case("optnone", Attribute::OptimizeNone) + .Case("optsize", Attribute::OptimizeForSize) + .Case("readnone", Attribute::ReadNone) + .Case("readonly", Attribute::ReadOnly) + .Case("argmemonly", Attribute::ArgMemOnly) + .Case("returns_twice", Attribute::ReturnsTwice) + .Case("safestack", Attribute::SafeStack) + .Case("sanitize_address", Attribute::SanitizeAddress) + .Case("sanitize_memory", Attribute::SanitizeMemory) + .Case("sanitize_thread", Attribute::SanitizeThread) + .Case("ssp", Attribute::StackProtect) + .Case("sspreq", Attribute::StackProtectReq) + .Case("sspstrong", Attribute::StackProtectStrong) + .Case("uwtable", Attribute::UWTable) + .Default(Attribute::None); +} + +/// If F has any forced attributes given on the command line, add them. +static void addForcedAttributes(Function &F) { + for (auto &S : ForceAttributes) { + auto KV = StringRef(S).split(':'); + if (KV.first != F.getName()) + continue; + + auto Kind = parseAttrKind(KV.second); + if (Kind == Attribute::None) { + DEBUG(dbgs() << "ForcedAttribute: " << KV.second + << " unknown or not handled!\n"); + continue; + } + if (F.hasFnAttribute(Kind)) + continue; + F.addFnAttr(Kind); + } +} + +PreservedAnalyses ForceFunctionAttrsPass::run(Module &M) { + if (ForceAttributes.empty()) + return PreservedAnalyses::all(); + + for (Function &F : M.functions()) + addForcedAttributes(F); + + // Just conservatively invalidate analyses, this isn't likely to be important. + return PreservedAnalyses::none(); +} + +namespace { +struct ForceFunctionAttrsLegacyPass : public ModulePass { + static char ID; // Pass identification, replacement for typeid + ForceFunctionAttrsLegacyPass() : ModulePass(ID) { + initializeForceFunctionAttrsLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override { + if (ForceAttributes.empty()) + return false; + + for (Function &F : M.functions()) + addForcedAttributes(F); + + // Conservatively assume we changed something. + return true; + } +}; +} + +char ForceFunctionAttrsLegacyPass::ID = 0; +INITIALIZE_PASS(ForceFunctionAttrsLegacyPass, "forceattrs", + "Force set function attributes", false, false) + +Pass *llvm::createForceFunctionAttrsLegacyPass() { + return new ForceFunctionAttrsLegacyPass(); +} diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index bb5e64aef338..6dcfb3f83004 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -23,14 +23,21 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; @@ -42,230 +49,191 @@ STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); STATISTIC(NumNoAlias, "Number of function returns marked noalias"); -STATISTIC(NumAnnotated, "Number of attributes added to library functions"); +STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); +STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); namespace { - struct FunctionAttrs : public CallGraphSCCPass { - static char ID; // Pass identification, replacement for typeid - FunctionAttrs() : CallGraphSCCPass(ID), AA(nullptr) { - initializeFunctionAttrsPass(*PassRegistry::getPassRegistry()); - } +typedef SmallSetVector SCCNodeSet; +} - // runOnSCC - Analyze the SCC, performing the transformation if possible. - bool runOnSCC(CallGraphSCC &SCC) override; +namespace { +struct FunctionAttrs : public CallGraphSCCPass { + static char ID; // Pass identification, replacement for typeid + FunctionAttrs() : CallGraphSCCPass(ID) { + initializeFunctionAttrsPass(*PassRegistry::getPassRegistry()); + } - // AddReadAttrs - Deduce readonly/readnone attributes for the SCC. - bool AddReadAttrs(const CallGraphSCC &SCC); + bool runOnSCC(CallGraphSCC &SCC) override; + bool doInitialization(CallGraph &CG) override { + Revisit.clear(); + return false; + } + bool doFinalization(CallGraph &CG) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + CallGraphSCCPass::getAnalysisUsage(AU); + } - // AddArgumentAttrs - Deduce nocapture attributes for the SCC. - bool AddArgumentAttrs(const CallGraphSCC &SCC); - - // IsFunctionMallocLike - Does this function allocate new memory? - bool IsFunctionMallocLike(Function *F, - SmallPtrSet &) const; - - // AddNoAliasAttrs - Deduce noalias attributes for the SCC. - bool AddNoAliasAttrs(const CallGraphSCC &SCC); - - // Utility methods used by inferPrototypeAttributes to add attributes - // and maintain annotation statistics. - - void setDoesNotAccessMemory(Function &F) { - if (!F.doesNotAccessMemory()) { - F.setDoesNotAccessMemory(); - ++NumAnnotated; - } - } - - void setOnlyReadsMemory(Function &F) { - if (!F.onlyReadsMemory()) { - F.setOnlyReadsMemory(); - ++NumAnnotated; - } - } - - void setDoesNotThrow(Function &F) { - if (!F.doesNotThrow()) { - F.setDoesNotThrow(); - ++NumAnnotated; - } - } - - void setDoesNotCapture(Function &F, unsigned n) { - if (!F.doesNotCapture(n)) { - F.setDoesNotCapture(n); - ++NumAnnotated; - } - } - - void setOnlyReadsMemory(Function &F, unsigned n) { - if (!F.onlyReadsMemory(n)) { - F.setOnlyReadsMemory(n); - ++NumAnnotated; - } - } - - void setDoesNotAlias(Function &F, unsigned n) { - if (!F.doesNotAlias(n)) { - F.setDoesNotAlias(n); - ++NumAnnotated; - } - } - - // inferPrototypeAttributes - Analyze the name and prototype of the - // given function and set any applicable attributes. Returns true - // if any attributes were set and false otherwise. - bool inferPrototypeAttributes(Function &F); - - // annotateLibraryCalls - Adds attributes to well-known standard library - // call declarations. - bool annotateLibraryCalls(const CallGraphSCC &SCC); - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addRequired(); - CallGraphSCCPass::getAnalysisUsage(AU); - } - - private: - AliasAnalysis *AA; - TargetLibraryInfo *TLI; - }; +private: + TargetLibraryInfo *TLI; + SmallVector Revisit; +}; } char FunctionAttrs::ID = 0; INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs", - "Deduce function attributes", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) + "Deduce function attributes", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(FunctionAttrs, "functionattrs", - "Deduce function attributes", false, false) + "Deduce function attributes", false, false) Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); } +namespace { +/// The three kinds of memory access relevant to 'readonly' and +/// 'readnone' attributes. +enum MemoryAccessKind { + MAK_ReadNone = 0, + MAK_ReadOnly = 1, + MAK_MayWrite = 2 +}; +} -/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC. -bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { - SmallPtrSet SCCNodes; +static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR, + const SCCNodeSet &SCCNodes) { + FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F); + if (MRB == FMRB_DoesNotAccessMemory) + // Already perfect! + return MAK_ReadNone; - // Fill SCCNodes with the elements of the SCC. Used for quickly - // looking up whether a given CallGraphNode is in this SCC. - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) - SCCNodes.insert((*I)->getFunction()); + // Definitions with weak linkage may be overridden at linktime with + // something that writes memory, so treat them like declarations. + if (F.isDeclaration() || F.mayBeOverridden()) { + if (AliasAnalysis::onlyReadsMemory(MRB)) + return MAK_ReadOnly; + // Conservatively assume it writes to memory. + return MAK_MayWrite; + } + + // Scan the function body for instructions that may read or write memory. + bool ReadsMemory = false; + for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { + Instruction *I = &*II; + + // Some instructions can be ignored even if they read or write memory. + // Detect these now, skipping to the next instruction if one is found. + CallSite CS(cast(I)); + if (CS) { + // Ignore calls to functions in the same SCC. + if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) + continue; + FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS); + + // If the call doesn't access memory, we're done. + if (!(MRB & MRI_ModRef)) + continue; + + if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) { + // The call could access any memory. If that includes writes, give up. + if (MRB & MRI_Mod) + return MAK_MayWrite; + // If it reads, note it. + if (MRB & MRI_Ref) + ReadsMemory = true; + continue; + } + + // Check whether all pointer arguments point to local memory, and + // ignore calls that only access local memory. + for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI) { + Value *Arg = *CI; + if (!Arg->getType()->isPtrOrPtrVectorTy()) + continue; + + AAMDNodes AAInfo; + I->getAAMetadata(AAInfo); + MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo); + + // Skip accesses to local or constant memory as they don't impact the + // externally visible mod/ref behavior. + if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + + if (MRB & MRI_Mod) + // Writes non-local memory. Give up. + return MAK_MayWrite; + if (MRB & MRI_Ref) + // Ok, it reads non-local memory. + ReadsMemory = true; + } + continue; + } else if (LoadInst *LI = dyn_cast(I)) { + // Ignore non-volatile loads from local memory. (Atomic is okay here.) + if (!LI->isVolatile()) { + MemoryLocation Loc = MemoryLocation::get(LI); + if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + } + } else if (StoreInst *SI = dyn_cast(I)) { + // Ignore non-volatile stores to local memory. (Atomic is okay here.) + if (!SI->isVolatile()) { + MemoryLocation Loc = MemoryLocation::get(SI); + if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + } + } else if (VAArgInst *VI = dyn_cast(I)) { + // Ignore vaargs on local memory. + MemoryLocation Loc = MemoryLocation::get(VI); + if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) + continue; + } + + // Any remaining instructions need to be taken seriously! Check if they + // read or write memory. + if (I->mayWriteToMemory()) + // Writes memory. Just give up. + return MAK_MayWrite; + + // If this instruction may read memory, remember that. + ReadsMemory |= I->mayReadFromMemory(); + } + + return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone; +} + +/// Deduce readonly/readnone attributes for the SCC. +template +static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) { // Check if any of the functions in the SCC read or write memory. If they // write memory then they can't be marked readnone or readonly. bool ReadsMemory = false; - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); + for (Function *F : SCCNodes) { + // Call the callable parameter to look up AA results for this function. + AAResults &AAR = AARGetter(*F); - if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) - // External node or node we don't want to optimize - assume it may write - // memory and give up. + switch (checkFunctionMemoryAccess(*F, AAR, SCCNodes)) { + case MAK_MayWrite: return false; - - AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F); - if (MRB == AliasAnalysis::DoesNotAccessMemory) - // Already perfect! - continue; - - // Definitions with weak linkage may be overridden at linktime with - // something that writes memory, so treat them like declarations. - if (F->isDeclaration() || F->mayBeOverridden()) { - if (!AliasAnalysis::onlyReadsMemory(MRB)) - // May write memory. Just give up. - return false; - + case MAK_ReadOnly: ReadsMemory = true; - continue; - } - - // Scan the function body for instructions that may read or write memory. - for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { - Instruction *I = &*II; - - // Some instructions can be ignored even if they read or write memory. - // Detect these now, skipping to the next instruction if one is found. - CallSite CS(cast(I)); - if (CS) { - // Ignore calls to functions in the same SCC. - if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) - continue; - AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS); - // If the call doesn't access arbitrary memory, we may be able to - // figure out something. - if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { - // If the call does access argument pointees, check each argument. - if (AliasAnalysis::doesAccessArgPointees(MRB)) - // Check whether all pointer arguments point to local memory, and - // ignore calls that only access local memory. - for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); - CI != CE; ++CI) { - Value *Arg = *CI; - if (Arg->getType()->isPointerTy()) { - AAMDNodes AAInfo; - I->getAAMetadata(AAInfo); - - MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo); - if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) { - if (MRB & AliasAnalysis::Mod) - // Writes non-local memory. Give up. - return false; - if (MRB & AliasAnalysis::Ref) - // Ok, it reads non-local memory. - ReadsMemory = true; - } - } - } - continue; - } - // The call could access any memory. If that includes writes, give up. - if (MRB & AliasAnalysis::Mod) - return false; - // If it reads, note it. - if (MRB & AliasAnalysis::Ref) - ReadsMemory = true; - continue; - } else if (LoadInst *LI = dyn_cast(I)) { - // Ignore non-volatile loads from local memory. (Atomic is okay here.) - if (!LI->isVolatile()) { - MemoryLocation Loc = MemoryLocation::get(LI); - if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) - continue; - } - } else if (StoreInst *SI = dyn_cast(I)) { - // Ignore non-volatile stores to local memory. (Atomic is okay here.) - if (!SI->isVolatile()) { - MemoryLocation Loc = MemoryLocation::get(SI); - if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) - continue; - } - } else if (VAArgInst *VI = dyn_cast(I)) { - // Ignore vaargs on local memory. - MemoryLocation Loc = MemoryLocation::get(VI); - if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) - continue; - } - - // Any remaining instructions need to be taken seriously! Check if they - // read or write memory. - if (I->mayWriteToMemory()) - // Writes memory. Just give up. - return false; - - // If this instruction may read memory, remember that. - ReadsMemory |= I->mayReadFromMemory(); + break; + case MAK_ReadNone: + // Nothing to do! + break; } } // Success! Functions in this SCC do not access memory, or only read memory. // Give them the appropriate attribute. bool MadeChange = false; - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); - + for (Function *F : SCCNodes) { if (F->doesNotAccessMemory()) // Already perfect! continue; @@ -278,11 +246,10 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { // Clear out any existing attributes. AttrBuilder B; - B.addAttribute(Attribute::ReadOnly) - .addAttribute(Attribute::ReadNone); - F->removeAttributes(AttributeSet::FunctionIndex, - AttributeSet::get(F->getContext(), - AttributeSet::FunctionIndex, B)); + B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); + F->removeAttributes( + AttributeSet::FunctionIndex, + AttributeSet::get(F->getContext(), AttributeSet::FunctionIndex, B)); // Add in the new attribute. F->addAttribute(AttributeSet::FunctionIndex, @@ -298,124 +265,140 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { } namespace { - // For a given pointer Argument, this retains a list of Arguments of functions - // in the same SCC that the pointer data flows into. We use this to build an - // SCC of the arguments. - struct ArgumentGraphNode { - Argument *Definition; - SmallVector Uses; - }; +/// For a given pointer Argument, this retains a list of Arguments of functions +/// in the same SCC that the pointer data flows into. We use this to build an +/// SCC of the arguments. +struct ArgumentGraphNode { + Argument *Definition; + SmallVector Uses; +}; - class ArgumentGraph { - // We store pointers to ArgumentGraphNode objects, so it's important that - // that they not move around upon insert. - typedef std::map ArgumentMapTy; +class ArgumentGraph { + // We store pointers to ArgumentGraphNode objects, so it's important that + // that they not move around upon insert. + typedef std::map ArgumentMapTy; - ArgumentMapTy ArgumentMap; + ArgumentMapTy ArgumentMap; - // There is no root node for the argument graph, in fact: - // void f(int *x, int *y) { if (...) f(x, y); } - // is an example where the graph is disconnected. The SCCIterator requires a - // single entry point, so we maintain a fake ("synthetic") root node that - // uses every node. Because the graph is directed and nothing points into - // the root, it will not participate in any SCCs (except for its own). - ArgumentGraphNode SyntheticRoot; + // There is no root node for the argument graph, in fact: + // void f(int *x, int *y) { if (...) f(x, y); } + // is an example where the graph is disconnected. The SCCIterator requires a + // single entry point, so we maintain a fake ("synthetic") root node that + // uses every node. Because the graph is directed and nothing points into + // the root, it will not participate in any SCCs (except for its own). + ArgumentGraphNode SyntheticRoot; - public: - ArgumentGraph() { SyntheticRoot.Definition = nullptr; } +public: + ArgumentGraph() { SyntheticRoot.Definition = nullptr; } - typedef SmallVectorImpl::iterator iterator; + typedef SmallVectorImpl::iterator iterator; - iterator begin() { return SyntheticRoot.Uses.begin(); } - iterator end() { return SyntheticRoot.Uses.end(); } - ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } + iterator begin() { return SyntheticRoot.Uses.begin(); } + iterator end() { return SyntheticRoot.Uses.end(); } + ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } - ArgumentGraphNode *operator[](Argument *A) { - ArgumentGraphNode &Node = ArgumentMap[A]; - Node.Definition = A; - SyntheticRoot.Uses.push_back(&Node); - return &Node; - } - }; + ArgumentGraphNode *operator[](Argument *A) { + ArgumentGraphNode &Node = ArgumentMap[A]; + Node.Definition = A; + SyntheticRoot.Uses.push_back(&Node); + return &Node; + } +}; - // This tracker checks whether callees are in the SCC, and if so it does not - // consider that a capture, instead adding it to the "Uses" list and - // continuing with the analysis. - struct ArgumentUsesTracker : public CaptureTracker { - ArgumentUsesTracker(const SmallPtrSet &SCCNodes) +/// This tracker checks whether callees are in the SCC, and if so it does not +/// consider that a capture, instead adding it to the "Uses" list and +/// continuing with the analysis. +struct ArgumentUsesTracker : public CaptureTracker { + ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : Captured(false), SCCNodes(SCCNodes) {} - void tooManyUses() override { Captured = true; } + void tooManyUses() override { Captured = true; } - bool captured(const Use *U) override { - CallSite CS(U->getUser()); - if (!CS.getInstruction()) { Captured = true; return true; } - - Function *F = CS.getCalledFunction(); - if (!F || !SCCNodes.count(F)) { Captured = true; return true; } - - bool Found = false; - Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - for (CallSite::arg_iterator PI = CS.arg_begin(), PE = CS.arg_end(); - PI != PE; ++PI, ++AI) { - if (AI == AE) { - assert(F->isVarArg() && "More params than args in non-varargs call"); - Captured = true; - return true; - } - if (PI == U) { - Uses.push_back(AI); - Found = true; - break; - } - } - assert(Found && "Capturing call-site captured nothing?"); - (void)Found; - return false; + bool captured(const Use *U) override { + CallSite CS(U->getUser()); + if (!CS.getInstruction()) { + Captured = true; + return true; } - bool Captured; // True only if certainly captured (used outside our SCC). - SmallVector Uses; // Uses within our SCC. + Function *F = CS.getCalledFunction(); + if (!F || F->isDeclaration() || F->mayBeOverridden() || + !SCCNodes.count(F)) { + Captured = true; + return true; + } - const SmallPtrSet &SCCNodes; - }; + // Note: the callee and the two successor blocks *follow* the argument + // operands. This means there is no need to adjust UseIndex to account for + // these. + + unsigned UseIndex = + std::distance(const_cast(CS.arg_begin()), U); + + assert(UseIndex < CS.data_operands_size() && + "Indirect function calls should have been filtered above!"); + + if (UseIndex >= CS.getNumArgOperands()) { + // Data operand, but not a argument operand -- must be a bundle operand + assert(CS.hasOperandBundles() && "Must be!"); + + // CaptureTracking told us that we're being captured by an operand bundle + // use. In this case it does not matter if the callee is within our SCC + // or not -- we've been captured in some unknown way, and we have to be + // conservative. + Captured = true; + return true; + } + + if (UseIndex >= F->arg_size()) { + assert(F->isVarArg() && "More params than args in non-varargs call"); + Captured = true; + return true; + } + + Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); + return false; + } + + bool Captured; // True only if certainly captured (used outside our SCC). + SmallVector Uses; // Uses within our SCC. + + const SCCNodeSet &SCCNodes; +}; } namespace llvm { - template<> struct GraphTraits { - typedef ArgumentGraphNode NodeType; - typedef SmallVectorImpl::iterator ChildIteratorType; +template <> struct GraphTraits { + typedef ArgumentGraphNode NodeType; + typedef SmallVectorImpl::iterator ChildIteratorType; - static inline NodeType *getEntryNode(NodeType *A) { return A; } - static inline ChildIteratorType child_begin(NodeType *N) { - return N->Uses.begin(); - } - static inline ChildIteratorType child_end(NodeType *N) { - return N->Uses.end(); - } - }; - template<> struct GraphTraits - : public GraphTraits { - static NodeType *getEntryNode(ArgumentGraph *AG) { - return AG->getEntryNode(); - } - static ChildIteratorType nodes_begin(ArgumentGraph *AG) { - return AG->begin(); - } - static ChildIteratorType nodes_end(ArgumentGraph *AG) { - return AG->end(); - } - }; + static inline NodeType *getEntryNode(NodeType *A) { return A; } + static inline ChildIteratorType child_begin(NodeType *N) { + return N->Uses.begin(); + } + static inline ChildIteratorType child_end(NodeType *N) { + return N->Uses.end(); + } +}; +template <> +struct GraphTraits : public GraphTraits { + static NodeType *getEntryNode(ArgumentGraph *AG) { + return AG->getEntryNode(); + } + static ChildIteratorType nodes_begin(ArgumentGraph *AG) { + return AG->begin(); + } + static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } +}; } -// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. +/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. static Attribute::AttrKind determinePointerReadAttrs(Argument *A, - const SmallPtrSet &SCCNodes) { - - SmallVector Worklist; - SmallSet Visited; - int Count = 0; + const SmallPtrSet &SCCNodes) { + + SmallVector Worklist; + SmallSet Visited; // inalloca arguments are always clobbered by the call. if (A->hasInAllocaAttr()) @@ -425,9 +408,6 @@ determinePointerReadAttrs(Argument *A, // We don't need to track IsWritten. If A is written to, return immediately. for (Use &U : A->uses()) { - if (Count++ >= 20) - return Attribute::None; - Visited.insert(&U); Worklist.push_back(&U); } @@ -435,7 +415,6 @@ determinePointerReadAttrs(Argument *A, while (!Worklist.empty()) { Use *U = Worklist.pop_back_val(); Instruction *I = cast(U->getUser()); - Value *V = U->get(); switch (I->getOpcode()) { case Instruction::BitCast: @@ -479,24 +458,44 @@ determinePointerReadAttrs(Argument *A, return Attribute::None; } - Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); - for (CallSite::arg_iterator A = B; A != E; ++A, ++AI) { - if (A->get() == V) { - if (AI == AE) { - assert(F->isVarArg() && - "More params than args in non-varargs call."); - return Attribute::None; - } - Captures &= !CS.doesNotCapture(A - B); - if (SCCNodes.count(AI)) - continue; - if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(A - B)) - return Attribute::None; - if (!CS.doesNotAccessMemory(A - B)) - IsRead = true; - } + // Note: the callee and the two successor blocks *follow* the argument + // operands. This means there is no need to adjust UseIndex to account + // for these. + + unsigned UseIndex = std::distance(CS.arg_begin(), U); + + // U cannot be the callee operand use: since we're exploring the + // transitive uses of an Argument, having such a use be a callee would + // imply the CallSite is an indirect call or invoke; and we'd take the + // early exit above. + assert(UseIndex < CS.data_operands_size() && + "Data operand use expected!"); + + bool IsOperandBundleUse = UseIndex >= CS.getNumArgOperands(); + + if (UseIndex >= F->arg_size() && !IsOperandBundleUse) { + assert(F->isVarArg() && "More params than args in non-varargs call"); + return Attribute::None; } + + Captures &= !CS.doesNotCapture(UseIndex); + + // Since the optimizer (by design) cannot see the data flow corresponding + // to a operand bundle use, these cannot participate in the optimistic SCC + // analysis. Instead, we model the operand bundle uses as arguments in + // call to a function external to the SCC. + if (!SCCNodes.count(&*std::next(F->arg_begin(), UseIndex)) || + IsOperandBundleUse) { + + // The accessors used on CallSite here do the right thing for calls and + // invokes with operand bundles. + + if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(UseIndex)) + return Attribute::None; + if (!CS.doesNotAccessMemory(UseIndex)) + IsRead = true; + } + AddUsersToWorklistIfCapturing(); break; } @@ -517,21 +516,10 @@ determinePointerReadAttrs(Argument *A, return IsRead ? Attribute::ReadOnly : Attribute::ReadNone; } -/// AddArgumentAttrs - Deduce nocapture attributes for the SCC. -bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { +/// Deduce nocapture attributes for the SCC. +static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { bool Changed = false; - SmallPtrSet SCCNodes; - - // Fill SCCNodes with the elements of the SCC. Used for quickly - // looking up whether a given CallGraphNode is in this SCC. - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); - if (F && !F->isDeclaration() && !F->mayBeOverridden() && - !F->hasFnAttribute(Attribute::OptimizeNone)) - SCCNodes.insert(F); - } - ArgumentGraph AG; AttrBuilder B; @@ -539,14 +527,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { // Check each function in turn, determining which pointer arguments are not // captured. - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); - - if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) - // External node or function we're trying not to optimize - only a problem - // for arguments that we pass to it. - continue; - + for (Function *F : SCCNodes) { // Definitions with weak linkage may be overridden at linktime with // something that captures pointers, so treat them like declarations. if (F->isDeclaration() || F->mayBeOverridden()) @@ -556,8 +537,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { // a value can't capture arguments. Don't analyze them. if (F->onlyReadsMemory() && F->doesNotThrow() && F->getReturnType()->isVoidTy()) { - for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); - A != E; ++A) { + for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; + ++A) { if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B)); ++NumNoCapture; @@ -567,26 +548,30 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { continue; } - for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); - A != E; ++A) { - if (!A->getType()->isPointerTy()) continue; + for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; + ++A) { + if (!A->getType()->isPointerTy()) + continue; bool HasNonLocalUses = false; if (!A->hasNoCaptureAttr()) { ArgumentUsesTracker Tracker(SCCNodes); - PointerMayBeCaptured(A, &Tracker); + PointerMayBeCaptured(&*A, &Tracker); if (!Tracker.Captured) { if (Tracker.Uses.empty()) { // If it's trivially not captured, mark it nocapture now. - A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo()+1, B)); + A->addAttr( + AttributeSet::get(F->getContext(), A->getArgNo() + 1, B)); ++NumNoCapture; Changed = true; } else { // If it's not trivially captured and not trivially not captured, // then it must be calling into another function in our SCC. Save // its particulars for Argument-SCC analysis later. - ArgumentGraphNode *Node = AG[A]; - for (SmallVectorImpl::iterator UI = Tracker.Uses.begin(), - UE = Tracker.Uses.end(); UI != UE; ++UI) { + ArgumentGraphNode *Node = AG[&*A]; + for (SmallVectorImpl::iterator + UI = Tracker.Uses.begin(), + UE = Tracker.Uses.end(); + UI != UE; ++UI) { Node->Uses.push_back(AG[*UI]); if (*UI != A) HasNonLocalUses = true; @@ -600,9 +585,9 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { // Note that we don't allow any calls at all here, or else our result // will be dependent on the iteration order through the functions in the // SCC. - SmallPtrSet Self; - Self.insert(A); - Attribute::AttrKind R = determinePointerReadAttrs(A, Self); + SmallPtrSet Self; + Self.insert(&*A); + Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); if (R != Attribute::None) { AttrBuilder B; B.addAttribute(R); @@ -621,10 +606,11 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { // made. If the definition doesn't have a 'nocapture' attribute by now, it // captures. - for (scc_iterator I = scc_begin(&AG); !I.isAtEnd(); ++I) { + for (scc_iterator I = scc_begin(&AG); !I.isAtEnd(); ++I) { const std::vector &ArgumentSCC = *I; if (ArgumentSCC.size() == 1) { - if (!ArgumentSCC[0]->Definition) continue; // synthetic root node + if (!ArgumentSCC[0]->Definition) + continue; // synthetic root node // eg. "void f(int* x) { if (...) f(x); }" if (ArgumentSCC[0]->Uses.size() == 1 && @@ -646,9 +632,10 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { SCCCaptured = true; } } - if (SCCCaptured) continue; + if (SCCCaptured) + continue; - SmallPtrSet ArgumentSCCNodes; + SmallPtrSet ArgumentSCCNodes; // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for // quickly looking up whether a given Argument is in this ArgumentSCC. for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) { @@ -658,8 +645,9 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) { ArgumentGraphNode *N = *I; - for (SmallVectorImpl::iterator UI = N->Uses.begin(), - UE = N->Uses.end(); UI != UE; ++UI) { + for (SmallVectorImpl::iterator UI = N->Uses.begin(), + UE = N->Uses.end(); + UI != UE; ++UI) { Argument *A = (*UI)->Definition; if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) continue; @@ -667,7 +655,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { break; } } - if (SCCCaptured) continue; + if (SCCCaptured) + continue; for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; @@ -704,8 +693,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { if (ReadAttr != Attribute::None) { AttrBuilder B, R; B.addAttribute(ReadAttr); - R.addAttribute(Attribute::ReadOnly) - .addAttribute(Attribute::ReadNone); + R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; // Clear out existing readonly/readnone attributes @@ -720,10 +708,11 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { return Changed; } -/// IsFunctionMallocLike - A function is malloc-like if it returns either null -/// or a pointer that doesn't alias any other pointer visible to the caller. -bool FunctionAttrs::IsFunctionMallocLike(Function *F, - SmallPtrSet &SCCNodes) const { +/// Tests whether a function is "malloc-like". +/// +/// A function is "malloc-like" if it returns either null or a pointer that +/// doesn't alias any other pointer visible to the caller. +static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { SmallSetVector FlowsToReturn; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) if (ReturnInst *Ret = dyn_cast(I->getTerminator())) @@ -744,39 +733,38 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F, if (Instruction *RVI = dyn_cast(RetVal)) switch (RVI->getOpcode()) { - // Extend the analysis by looking upwards. - case Instruction::BitCast: - case Instruction::GetElementPtr: - case Instruction::AddrSpaceCast: - FlowsToReturn.insert(RVI->getOperand(0)); - continue; - case Instruction::Select: { - SelectInst *SI = cast(RVI); - FlowsToReturn.insert(SI->getTrueValue()); - FlowsToReturn.insert(SI->getFalseValue()); - continue; - } - case Instruction::PHI: { - PHINode *PN = cast(RVI); - for (Value *IncValue : PN->incoming_values()) - FlowsToReturn.insert(IncValue); - continue; - } + // Extend the analysis by looking upwards. + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::AddrSpaceCast: + FlowsToReturn.insert(RVI->getOperand(0)); + continue; + case Instruction::Select: { + SelectInst *SI = cast(RVI); + FlowsToReturn.insert(SI->getTrueValue()); + FlowsToReturn.insert(SI->getFalseValue()); + continue; + } + case Instruction::PHI: { + PHINode *PN = cast(RVI); + for (Value *IncValue : PN->incoming_values()) + FlowsToReturn.insert(IncValue); + continue; + } - // Check whether the pointer came from an allocation. - case Instruction::Alloca: + // Check whether the pointer came from an allocation. + case Instruction::Alloca: + break; + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS(RVI); + if (CS.paramHasAttr(0, Attribute::NoAlias)) break; - case Instruction::Call: - case Instruction::Invoke: { - CallSite CS(RVI); - if (CS.paramHasAttr(0, Attribute::NoAlias)) - break; - if (CS.getCalledFunction() && - SCCNodes.count(CS.getCalledFunction())) - break; - } // fall-through - default: - return false; // Did not come from an allocation. + if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) + break; + } // fall-through + default: + return false; // Did not come from an allocation. } if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) @@ -786,24 +774,11 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F, return true; } -/// AddNoAliasAttrs - Deduce noalias attributes for the SCC. -bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { - SmallPtrSet SCCNodes; - - // Fill SCCNodes with the elements of the SCC. Used for quickly - // looking up whether a given CallGraphNode is in this SCC. - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) - SCCNodes.insert((*I)->getFunction()); - +/// Deduce noalias attributes for the SCC. +static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { // Check each function in turn, determining which functions return noalias // pointers. - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); - - if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) - // External node or node we don't want to optimize - skip it; - return false; - + for (Function *F : SCCNodes) { // Already noalias. if (F->doesNotAlias(0)) continue; @@ -813,18 +788,17 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { if (F->isDeclaration() || F->mayBeOverridden()) return false; - // We annotate noalias return values, which are only applicable to + // We annotate noalias return values, which are only applicable to // pointer types. if (!F->getReturnType()->isPointerTy()) continue; - if (!IsFunctionMallocLike(F, SCCNodes)) + if (!isFunctionMallocLike(F, SCCNodes)) return false; } bool MadeChange = false; - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); + for (Function *F : SCCNodes) { if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy()) continue; @@ -836,880 +810,249 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { return MadeChange; } -/// inferPrototypeAttributes - Analyze the name and prototype of the -/// given function and set any applicable attributes. Returns true -/// if any attributes were set and false otherwise. -bool FunctionAttrs::inferPrototypeAttributes(Function &F) { - if (F.hasFnAttribute(Attribute::OptimizeNone)) - return false; +/// Tests whether this function is known to not return null. +/// +/// Requires that the function returns a pointer. +/// +/// Returns true if it believes the function will not return a null, and sets +/// \p Speculative based on whether the returned conclusion is a speculative +/// conclusion due to SCC calls. +static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, + const TargetLibraryInfo &TLI, bool &Speculative) { + assert(F->getReturnType()->isPointerTy() && + "nonnull only meaningful on pointer types"); + Speculative = false; - FunctionType *FTy = F.getFunctionType(); - LibFunc::Func TheLibFunc; - if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc))) - return false; + SmallSetVector FlowsToReturn; + for (BasicBlock &BB : *F) + if (auto *Ret = dyn_cast(BB.getTerminator())) + FlowsToReturn.insert(Ret->getReturnValue()); - switch (TheLibFunc) { - case LibFunc::strlen: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::strchr: - case LibFunc::strrchr: - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isIntegerTy()) - return false; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - break; - case LibFunc::strtol: - case LibFunc::strtod: - case LibFunc::strtof: - case LibFunc::strtoul: - case LibFunc::strtoll: - case LibFunc::strtold: - case LibFunc::strtoull: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::strcpy: - case LibFunc::stpcpy: - case LibFunc::strcat: - case LibFunc::strncat: - case LibFunc::strncpy: - case LibFunc::stpncpy: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::strxfrm: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::strcmp: //0,1 - case LibFunc::strspn: // 0,1 - case LibFunc::strncmp: // 0,1 - case LibFunc::strcspn: //0,1 - case LibFunc::strcoll: //0,1 - case LibFunc::strcasecmp: // 0,1 - case LibFunc::strncasecmp: // - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - break; - case LibFunc::strstr: - case LibFunc::strpbrk: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - break; - case LibFunc::strtok: - case LibFunc::strtok_r: - if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::scanf: - if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::setbuf: - case LibFunc::setvbuf: - if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::strdup: - case LibFunc::strndup: - if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::stat: - case LibFunc::statvfs: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::sscanf: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::sprintf: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::snprintf: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 3); - setOnlyReadsMemory(F, 3); - break; - case LibFunc::setitimer: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setDoesNotCapture(F, 3); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::system: - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - return false; - // May throw; "system" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::malloc: - if (FTy->getNumParams() != 1 || - !FTy->getReturnType()->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - break; - case LibFunc::memcmp: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - break; - case LibFunc::memchr: - case LibFunc::memrchr: - if (FTy->getNumParams() != 3) - return false; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - break; - case LibFunc::modf: - case LibFunc::modff: - case LibFunc::modfl: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - break; - case LibFunc::memcpy: - case LibFunc::memccpy: - case LibFunc::memmove: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::memalign: - if (!FTy->getReturnType()->isPointerTy()) - return false; - setDoesNotAlias(F, 0); - break; - case LibFunc::mkdir: - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::mktime: - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::realloc: - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getReturnType()->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - break; - case LibFunc::read: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - return false; - // May throw; "read" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - break; - case LibFunc::rewind: - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::rmdir: - case LibFunc::remove: - case LibFunc::realpath: - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::rename: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::readlink: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::write: - if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) - return false; - // May throw; "write" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::bcopy: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::bcmp: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - break; - case LibFunc::bzero: - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::calloc: - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - break; - case LibFunc::chmod: - case LibFunc::chown: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::ctermid: - case LibFunc::clearerr: - case LibFunc::closedir: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::atoi: - case LibFunc::atol: - case LibFunc::atof: - case LibFunc::atoll: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::access: - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::fopen: - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::fdopen: - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::feof: - case LibFunc::free: - case LibFunc::fseek: - case LibFunc::ftell: - case LibFunc::fgetc: - case LibFunc::fseeko: - case LibFunc::ftello: - case LibFunc::fileno: - case LibFunc::fflush: - case LibFunc::fclose: - case LibFunc::fsetpos: - case LibFunc::flockfile: - case LibFunc::funlockfile: - case LibFunc::ftrylockfile: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::ferror: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F); - break; - case LibFunc::fputc: - case LibFunc::fstat: - case LibFunc::frexp: - case LibFunc::frexpf: - case LibFunc::frexpl: - case LibFunc::fstatvfs: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - break; - case LibFunc::fgets: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 3); - break; - case LibFunc::fread: - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(3)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 4); - break; - case LibFunc::fwrite: - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(3)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 4); - break; - case LibFunc::fputs: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::fscanf: - case LibFunc::fprintf: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::fgetpos: - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - break; - case LibFunc::getc: - case LibFunc::getlogin_r: - case LibFunc::getc_unlocked: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::getenv: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::gets: - case LibFunc::getchar: - setDoesNotThrow(F); - break; - case LibFunc::getitimer: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - break; - case LibFunc::getpwnam: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::ungetc: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - break; - case LibFunc::uname: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::unlink: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::unsetenv: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::utime: - case LibFunc::utimes: - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::putc: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - break; - case LibFunc::puts: - case LibFunc::printf: - case LibFunc::perror: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::pread: - if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) - return false; - // May throw; "pread" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - break; - case LibFunc::pwrite: - if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) - return false; - // May throw; "pwrite" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::putchar: - setDoesNotThrow(F); - break; - case LibFunc::popen: - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::pclose: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::vscanf: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::vsscanf: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::vfscanf: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::valloc: - if (!FTy->getReturnType()->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - break; - case LibFunc::vprintf: - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::vfprintf: - case LibFunc::vsprintf: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::vsnprintf: - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 3); - setOnlyReadsMemory(F, 3); - break; - case LibFunc::open: - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) - return false; - // May throw; "open" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::opendir: - if (FTy->getNumParams() != 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::tmpfile: - if (!FTy->getReturnType()->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - break; - case LibFunc::times: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::htonl: - case LibFunc::htons: - case LibFunc::ntohl: - case LibFunc::ntohs: - setDoesNotThrow(F); - setDoesNotAccessMemory(F); - break; - case LibFunc::lstat: - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::lchown: - if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::qsort: - if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) - return false; - // May throw; places call through function pointer. - setDoesNotCapture(F, 4); - break; - case LibFunc::dunder_strdup: - case LibFunc::dunder_strndup: - if (FTy->getNumParams() < 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::dunder_strtok_r: - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::under_IO_getc: - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::under_IO_putc: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - break; - case LibFunc::dunder_isoc99_scanf: - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::stat64: - case LibFunc::lstat64: - case LibFunc::statvfs64: - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::dunder_isoc99_sscanf: - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::fopen64: - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - setOnlyReadsMemory(F, 1); - setOnlyReadsMemory(F, 2); - break; - case LibFunc::fseeko64: - case LibFunc::ftello64: - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - break; - case LibFunc::tmpfile64: - if (!FTy->getReturnType()->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - break; - case LibFunc::fstat64: - case LibFunc::fstatvfs64: - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return false; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - break; - case LibFunc::open64: - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) - return false; - // May throw; "open" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F, 1); - break; - case LibFunc::gettimeofday: - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return false; - // Currently some platforms have the restrict keyword on the arguments to - // gettimeofday. To be conservative, do not add noalias to gettimeofday's - // arguments. - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - break; - default: - // Didn't mark any attributes. - return false; + for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { + Value *RetVal = FlowsToReturn[i]; + + // If this value is locally known to be non-null, we're good + if (isKnownNonNull(RetVal, &TLI)) + continue; + + // Otherwise, we need to look upwards since we can't make any local + // conclusions. + Instruction *RVI = dyn_cast(RetVal); + if (!RVI) + return false; + switch (RVI->getOpcode()) { + // Extend the analysis by looking upwards. + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::AddrSpaceCast: + FlowsToReturn.insert(RVI->getOperand(0)); + continue; + case Instruction::Select: { + SelectInst *SI = cast(RVI); + FlowsToReturn.insert(SI->getTrueValue()); + FlowsToReturn.insert(SI->getFalseValue()); + continue; + } + case Instruction::PHI: { + PHINode *PN = cast(RVI); + for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + FlowsToReturn.insert(PN->getIncomingValue(i)); + continue; + } + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS(RVI); + Function *Callee = CS.getCalledFunction(); + // A call to a node within the SCC is assumed to return null until + // proven otherwise + if (Callee && SCCNodes.count(Callee)) { + Speculative = true; + continue; + } + return false; + } + default: + return false; // Unknown source, may be null + }; + llvm_unreachable("should have either continued or returned"); } return true; } -/// annotateLibraryCalls - Adds attributes to well-known standard library -/// call declarations. -bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) { +/// Deduce nonnull attributes for the SCC. +static bool addNonNullAttrs(const SCCNodeSet &SCCNodes, + const TargetLibraryInfo &TLI) { + // Speculative that all functions in the SCC return only nonnull + // pointers. We may refute this as we analyze functions. + bool SCCReturnsNonNull = true; + bool MadeChange = false; - // Check each function in turn annotating well-known library function - // declarations with attributes. - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - Function *F = (*I)->getFunction(); + // Check each function in turn, determining which functions return nonnull + // pointers. + for (Function *F : SCCNodes) { + // Already nonnull. + if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::NonNull)) + continue; - if (F && F->isDeclaration()) - MadeChange |= inferPrototypeAttributes(*F); + // Definitions with weak linkage may be overridden at linktime, so + // treat them like declarations. + if (F->isDeclaration() || F->mayBeOverridden()) + return false; + + // We annotate nonnull return values, which are only applicable to + // pointer types. + if (!F->getReturnType()->isPointerTy()) + continue; + + bool Speculative = false; + if (isReturnNonNull(F, SCCNodes, TLI, Speculative)) { + if (!Speculative) { + // Mark the function eagerly since we may discover a function + // which prevents us from speculating about the entire SCC + DEBUG(dbgs() << "Eagerly marking " << F->getName() << " as nonnull\n"); + F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull); + ++NumNonNullReturn; + MadeChange = true; + } + continue; + } + // At least one function returns something which could be null, can't + // speculate any more. + SCCReturnsNonNull = false; + } + + if (SCCReturnsNonNull) { + for (Function *F : SCCNodes) { + if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::NonNull) || + !F->getReturnType()->isPointerTy()) + continue; + + DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); + F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull); + ++NumNonNullReturn; + MadeChange = true; + } } return MadeChange; } -bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { - AA = &getAnalysis(); - TLI = &getAnalysis().getTLI(); +static bool setDoesNotRecurse(Function &F) { + if (F.doesNotRecurse()) + return false; + F.setDoesNotRecurse(); + ++NumNoRecurse; + return true; +} - bool Changed = annotateLibraryCalls(SCC); - Changed |= AddReadAttrs(SCC); - Changed |= AddArgumentAttrs(SCC); - Changed |= AddNoAliasAttrs(SCC); +static bool addNoRecurseAttrs(const CallGraphSCC &SCC, + SmallVectorImpl &Revisit) { + // Try and identify functions that do not recurse. + + // If the SCC contains multiple nodes we know for sure there is recursion. + if (!SCC.isSingular()) + return false; + + const CallGraphNode *CGN = *SCC.begin(); + Function *F = CGN->getFunction(); + if (!F || F->isDeclaration() || F->doesNotRecurse()) + return false; + + // If all of the calls in F are identifiable and are to norecurse functions, F + // is norecurse. This check also detects self-recursion as F is not currently + // marked norecurse, so any called from F to F will not be marked norecurse. + if (std::all_of(CGN->begin(), CGN->end(), + [](const CallGraphNode::CallRecord &CR) { + Function *F = CR.second->getFunction(); + return F && F->doesNotRecurse(); + })) + // Function calls a potentially recursive function. + return setDoesNotRecurse(*F); + + // We know that F is not obviously recursive, but we haven't been able to + // prove that it doesn't actually recurse. Add it to the Revisit list to try + // again top-down later. + Revisit.push_back(F); + return false; +} + +static bool addNoRecurseAttrsTopDownOnly(Function *F) { + // If F is internal and all uses are in norecurse functions, then F is also + // norecurse. + if (F->doesNotRecurse()) + return false; + if (F->hasInternalLinkage()) { + for (auto *U : F->users()) + if (auto *I = dyn_cast(U)) { + if (!I->getParent()->getParent()->doesNotRecurse()) + return false; + } else { + return false; + } + return setDoesNotRecurse(*F); + } + return false; +} + +bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { + TLI = &getAnalysis().getTLI(); + bool Changed = false; + + // We compute dedicated AA results for each function in the SCC as needed. We + // use a lambda referencing external objects so that they live long enough to + // be queried, but we re-use them each time. + Optional BAR; + Optional AAR; + auto AARGetter = [&](Function &F) -> AAResults & { + BAR.emplace(createLegacyPMBasicAAResult(*this, F)); + AAR.emplace(createLegacyPMAAResults(*this, F, *BAR)); + return *AAR; + }; + + // Fill SCCNodes with the elements of the SCC. Used for quickly looking up + // whether a given CallGraphNode is in this SCC. Also track whether there are + // any external or opt-none nodes that will prevent us from optimizing any + // part of the SCC. + SCCNodeSet SCCNodes; + bool ExternalNode = false; + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); + if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) { + // External node or function we're trying not to optimize - we both avoid + // transform them and avoid leveraging information they provide. + ExternalNode = true; + continue; + } + + SCCNodes.insert(F); + } + + Changed |= addReadAttrs(SCCNodes, AARGetter); + Changed |= addArgumentAttrs(SCCNodes); + + // If we have no external nodes participating in the SCC, we can deduce some + // more precise attributes as well. + if (!ExternalNode) { + Changed |= addNoAliasAttrs(SCCNodes); + Changed |= addNonNullAttrs(SCCNodes, *TLI); + } + + Changed |= addNoRecurseAttrs(SCC, Revisit); + return Changed; +} + +bool FunctionAttrs::doFinalization(CallGraph &CG) { + bool Changed = false; + // When iterating over SCCs we visit functions in a bottom-up fashion. Some of + // the rules we have for identifying norecurse functions work best with a + // top-down walk, so look again at all the functions we previously marked as + // worth revisiting, in top-down order. + for (auto &F : reverse(Revisit)) + if (F) + Changed |= addNoRecurseAttrsTopDownOnly(cast((Value*)F)); return Changed; } diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp new file mode 100644 index 000000000000..d8b677b966f2 --- /dev/null +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -0,0 +1,433 @@ +//===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements Function import based on summaries. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/FunctionImport.h" + +#include "llvm/ADT/StringSet.h" +#include "llvm/IR/AutoUpgrade.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Linker/Linker.h" +#include "llvm/Object/FunctionIndexObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/SourceMgr.h" + +#include + +using namespace llvm; + +#define DEBUG_TYPE "function-import" + +/// Limit on instruction count of imported functions. +static cl::opt ImportInstrLimit( + "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"), + cl::desc("Only import functions with less than N instructions")); + +// Load lazily a module from \p FileName in \p Context. +static std::unique_ptr loadFile(const std::string &FileName, + LLVMContext &Context) { + SMDiagnostic Err; + DEBUG(dbgs() << "Loading '" << FileName << "'\n"); + std::unique_ptr Result = getLazyIRFileModule(FileName, Err, Context); + if (!Result) { + Err.print("function-import", errs()); + return nullptr; + } + + Result->materializeMetadata(); + UpgradeDebugInfo(*Result); + + return Result; +} + +namespace { +/// Helper to load on demand a Module from file and cache it for subsequent +/// queries. It can be used with the FunctionImporter. +class ModuleLazyLoaderCache { + /// Cache of lazily loaded module for import. + StringMap> ModuleMap; + + /// Retrieve a Module from the cache or lazily load it on demand. + std::function(StringRef FileName)> createLazyModule; + +public: + /// Create the loader, Module will be initialized in \p Context. + ModuleLazyLoaderCache(std::function< + std::unique_ptr(StringRef FileName)> createLazyModule) + : createLazyModule(createLazyModule) {} + + /// Retrieve a Module from the cache or lazily load it on demand. + Module &operator()(StringRef FileName); + + std::unique_ptr takeModule(StringRef FileName) { + auto I = ModuleMap.find(FileName); + assert(I != ModuleMap.end()); + std::unique_ptr Ret = std::move(I->second); + ModuleMap.erase(I); + return Ret; + } +}; + +// Get a Module for \p FileName from the cache, or load it lazily. +Module &ModuleLazyLoaderCache::operator()(StringRef Identifier) { + auto &Module = ModuleMap[Identifier]; + if (!Module) + Module = createLazyModule(Identifier); + return *Module; +} +} // anonymous namespace + +/// Walk through the instructions in \p F looking for external +/// calls not already in the \p CalledFunctions set. If any are +/// found they are added to the \p Worklist for importing. +static void findExternalCalls(const Module &DestModule, Function &F, + const FunctionInfoIndex &Index, + StringSet<> &CalledFunctions, + SmallVector &Worklist) { + // We need to suffix internal function calls imported from other modules, + // prepare the suffix ahead of time. + std::string Suffix; + if (F.getParent() != &DestModule) + Suffix = + (Twine(".llvm.") + + Twine(Index.getModuleId(F.getParent()->getModuleIdentifier()))).str(); + + for (auto &BB : F) { + for (auto &I : BB) { + if (isa(I)) { + auto CalledFunction = cast(I).getCalledFunction(); + // Insert any new external calls that have not already been + // added to set/worklist. + if (!CalledFunction || !CalledFunction->hasName()) + continue; + // Ignore intrinsics early + if (CalledFunction->isIntrinsic()) { + assert(CalledFunction->getIntrinsicID() != 0); + continue; + } + auto ImportedName = CalledFunction->getName(); + auto Renamed = (ImportedName + Suffix).str(); + // Rename internal functions + if (CalledFunction->hasInternalLinkage()) { + ImportedName = Renamed; + } + auto It = CalledFunctions.insert(ImportedName); + if (!It.second) { + // This is a call to a function we already considered, skip. + continue; + } + // Ignore functions already present in the destination module + auto *SrcGV = DestModule.getNamedValue(ImportedName); + if (SrcGV) { + assert(isa(SrcGV) && "Name collision during import"); + if (!cast(SrcGV)->isDeclaration()) { + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring " + << ImportedName << " already in DestinationModule\n"); + continue; + } + } + + Worklist.push_back(It.first->getKey()); + DEBUG(dbgs() << DestModule.getModuleIdentifier() + << ": Adding callee for : " << ImportedName << " : " + << F.getName() << "\n"); + } + } + } +} + +// Helper function: given a worklist and an index, will process all the worklist +// and decide what to import based on the summary information. +// +// Nothing is actually imported, functions are materialized in their source +// module and analyzed there. +// +// \p ModuleToFunctionsToImportMap is filled with the set of Function to import +// per Module. +static void GetImportList(Module &DestModule, + SmallVector &Worklist, + StringSet<> &CalledFunctions, + std::map> + &ModuleToFunctionsToImportMap, + const FunctionInfoIndex &Index, + ModuleLazyLoaderCache &ModuleLoaderCache) { + while (!Worklist.empty()) { + auto CalledFunctionName = Worklist.pop_back_val(); + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Process import for " + << CalledFunctionName << "\n"); + + // Try to get a summary for this function call. + auto InfoList = Index.findFunctionInfoList(CalledFunctionName); + if (InfoList == Index.end()) { + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": No summary for " + << CalledFunctionName << " Ignoring.\n"); + continue; + } + assert(!InfoList->second.empty() && "No summary, error at import?"); + + // Comdat can have multiple entries, FIXME: what do we do with them? + auto &Info = InfoList->second[0]; + assert(Info && "Nullptr in list, error importing summaries?\n"); + + auto *Summary = Info->functionSummary(); + if (!Summary) { + // FIXME: in case we are lazyloading summaries, we can do it now. + DEBUG(dbgs() << DestModule.getModuleIdentifier() + << ": Missing summary for " << CalledFunctionName + << ", error at import?\n"); + llvm_unreachable("Missing summary"); + } + + if (Summary->instCount() > ImportInstrLimit) { + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Skip import of " + << CalledFunctionName << " with " << Summary->instCount() + << " instructions (limit " << ImportInstrLimit << ")\n"); + continue; + } + + // Get the module path from the summary. + auto ModuleIdentifier = Summary->modulePath(); + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Importing " + << CalledFunctionName << " from " << ModuleIdentifier << "\n"); + + auto &SrcModule = ModuleLoaderCache(ModuleIdentifier); + + // The function that we will import! + GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName); + + if (!SGV) { + // The destination module is referencing function using their renamed name + // when importing a function that was originally local in the source + // module. The source module we have might not have been renamed so we try + // to remove the suffix added during the renaming to recover the original + // name in the source module. + std::pair Split = + CalledFunctionName.split(".llvm."); + SGV = SrcModule.getNamedValue(Split.first); + assert(SGV && "Can't find function to import in source module"); + } + if (!SGV) { + report_fatal_error(Twine("Can't load function '") + CalledFunctionName + + "' in Module '" + SrcModule.getModuleIdentifier() + + "', error in the summary?\n"); + } + + Function *F = dyn_cast(SGV); + if (!F && isa(SGV)) { + auto *SGA = dyn_cast(SGV); + F = dyn_cast(SGA->getBaseObject()); + CalledFunctionName = F->getName(); + } + assert(F && "Imported Function is ... not a Function"); + + // We cannot import weak_any functions/aliases without possibly affecting + // the order they are seen and selected by the linker, changing program + // semantics. + if (SGV->hasWeakAnyLinkage()) { + DEBUG(dbgs() << DestModule.getModuleIdentifier() + << ": Ignoring import request for weak-any " + << (isa(SGV) ? "function " : "alias ") + << CalledFunctionName << " from " + << SrcModule.getModuleIdentifier() << "\n"); + continue; + } + + // Add the function to the import list + auto &Entry = ModuleToFunctionsToImportMap[SrcModule.getModuleIdentifier()]; + Entry.insert(F); + + // Process the newly imported functions and add callees to the worklist. + F->materialize(); + findExternalCalls(DestModule, *F, Index, CalledFunctions, Worklist); + } +} + +// Automatically import functions in Module \p DestModule based on the summaries +// index. +// +// The current implementation imports every called functions that exists in the +// summaries index. +bool FunctionImporter::importFunctions(Module &DestModule) { + DEBUG(dbgs() << "Starting import for Module " + << DestModule.getModuleIdentifier() << "\n"); + unsigned ImportedCount = 0; + + /// First step is collecting the called external functions. + StringSet<> CalledFunctions; + SmallVector Worklist; + for (auto &F : DestModule) { + if (F.isDeclaration() || F.hasFnAttribute(Attribute::OptimizeNone)) + continue; + findExternalCalls(DestModule, F, Index, CalledFunctions, Worklist); + } + if (Worklist.empty()) + return false; + + /// Second step: for every call to an external function, try to import it. + + // Linker that will be used for importing function + Linker TheLinker(DestModule); + + // Map of Module -> List of Function to import from the Module + std::map> + ModuleToFunctionsToImportMap; + + // Analyze the summaries and get the list of functions to import by + // populating ModuleToFunctionsToImportMap + ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader); + GetImportList(DestModule, Worklist, CalledFunctions, + ModuleToFunctionsToImportMap, Index, ModuleLoaderCache); + assert(Worklist.empty() && "Worklist hasn't been flushed in GetImportList"); + + StringMap>> + ModuleToTempMDValsMap; + + // Do the actual import of functions now, one Module at a time + for (auto &FunctionsToImportPerModule : ModuleToFunctionsToImportMap) { + // Get the module for the import + auto &FunctionsToImport = FunctionsToImportPerModule.second; + std::unique_ptr SrcModule = + ModuleLoaderCache.takeModule(FunctionsToImportPerModule.first); + assert(&DestModule.getContext() == &SrcModule->getContext() && + "Context mismatch"); + + // Save the mapping of value ids to temporary metadata created when + // importing this function. If we have already imported from this module, + // add new temporary metadata to the existing mapping. + auto &TempMDVals = ModuleToTempMDValsMap[SrcModule->getModuleIdentifier()]; + if (!TempMDVals) + TempMDVals = llvm::make_unique>(); + + // Link in the specified functions. + if (TheLinker.linkInModule(std::move(SrcModule), Linker::Flags::None, + &Index, &FunctionsToImport, TempMDVals.get())) + report_fatal_error("Function Import: link error"); + + ImportedCount += FunctionsToImport.size(); + } + + // Now link in metadata for all modules from which we imported functions. + for (StringMapEntry>> &SME : + ModuleToTempMDValsMap) { + // Load the specified source module. + auto &SrcModule = ModuleLoaderCache(SME.getKey()); + + // Link in all necessary metadata from this module. + if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get())) + return false; + } + + DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module " + << DestModule.getModuleIdentifier() << "\n"); + return ImportedCount; +} + +/// Summary file to use for function importing when using -function-import from +/// the command line. +static cl::opt + SummaryFile("summary-file", + cl::desc("The summary file to use for function importing.")); + +static void diagnosticHandler(const DiagnosticInfo &DI) { + raw_ostream &OS = errs(); + DiagnosticPrinterRawOStream DP(OS); + DI.print(DP); + OS << '\n'; +} + +/// Parse the function index out of an IR file and return the function +/// index object if found, or nullptr if not. +static std::unique_ptr +getFunctionIndexForFile(StringRef Path, std::string &Error, + DiagnosticHandlerFunction DiagnosticHandler) { + std::unique_ptr Buffer; + ErrorOr> BufferOrErr = + MemoryBuffer::getFile(Path); + if (std::error_code EC = BufferOrErr.getError()) { + Error = EC.message(); + return nullptr; + } + Buffer = std::move(BufferOrErr.get()); + ErrorOr> ObjOrErr = + object::FunctionIndexObjectFile::create(Buffer->getMemBufferRef(), + DiagnosticHandler); + if (std::error_code EC = ObjOrErr.getError()) { + Error = EC.message(); + return nullptr; + } + return (*ObjOrErr)->takeIndex(); +} + +namespace { +/// Pass that performs cross-module function import provided a summary file. +class FunctionImportPass : public ModulePass { + /// Optional function summary index to use for importing, otherwise + /// the summary-file option must be specified. + const FunctionInfoIndex *Index; + +public: + /// Pass identification, replacement for typeid + static char ID; + + /// Specify pass name for debug output + const char *getPassName() const override { + return "Function Importing"; + } + + explicit FunctionImportPass(const FunctionInfoIndex *Index = nullptr) + : ModulePass(ID), Index(Index) {} + + bool runOnModule(Module &M) override { + if (SummaryFile.empty() && !Index) + report_fatal_error("error: -function-import requires -summary-file or " + "file from frontend\n"); + std::unique_ptr IndexPtr; + if (!SummaryFile.empty()) { + if (Index) + report_fatal_error("error: -summary-file and index from frontend\n"); + std::string Error; + IndexPtr = getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler); + if (!IndexPtr) { + errs() << "Error loading file '" << SummaryFile << "': " << Error + << "\n"; + return false; + } + Index = IndexPtr.get(); + } + + // Perform the import now. + auto ModuleLoader = [&M](StringRef Identifier) { + return loadFile(Identifier, M.getContext()); + }; + FunctionImporter Importer(*Index, ModuleLoader); + return Importer.importFunctions(M); + + return false; + } +}; +} // anonymous namespace + +char FunctionImportPass::ID = 0; +INITIALIZE_PASS_BEGIN(FunctionImportPass, "function-import", + "Summary Based Function Import", false, false) +INITIALIZE_PASS_END(FunctionImportPass, "function-import", + "Summary Based Function Import", false, false) + +namespace llvm { +Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr) { + return new FunctionImportPass(Index); +} +} diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 61d0ff94a343..9b276ed28e2e 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -92,33 +92,28 @@ bool GlobalDCE::runOnModule(Module &M) { ComdatMembers.insert(std::make_pair(C, &GA)); // Loop over the module, adding globals which are obviously necessary. - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - Changed |= RemoveUnusedGlobalValue(*I); + for (Function &F : M) { + Changed |= RemoveUnusedGlobalValue(F); // Functions with external linkage are needed if they have a body - if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) { - if (!I->isDiscardableIfUnused()) - GlobalIsNeeded(I); - } + if (!F.isDeclaration() && !F.hasAvailableExternallyLinkage()) + if (!F.isDiscardableIfUnused()) + GlobalIsNeeded(&F); } - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - Changed |= RemoveUnusedGlobalValue(*I); + for (GlobalVariable &GV : M.globals()) { + Changed |= RemoveUnusedGlobalValue(GV); // Externally visible & appending globals are needed, if they have an // initializer. - if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) { - if (!I->isDiscardableIfUnused()) - GlobalIsNeeded(I); - } + if (!GV.isDeclaration() && !GV.hasAvailableExternallyLinkage()) + if (!GV.isDiscardableIfUnused()) + GlobalIsNeeded(&GV); } - for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); - I != E; ++I) { - Changed |= RemoveUnusedGlobalValue(*I); + for (GlobalAlias &GA : M.aliases()) { + Changed |= RemoveUnusedGlobalValue(GA); // Externally visible aliases are needed. - if (!I->isDiscardableIfUnused()) { - GlobalIsNeeded(I); - } + if (!GA.isDiscardableIfUnused()) + GlobalIsNeeded(&GA); } // Now that all globals which are needed are in the AliveGlobals set, we loop @@ -126,52 +121,50 @@ bool GlobalDCE::runOnModule(Module &M) { // // The first pass is to drop initializers of global variables which are dead. - std::vector DeadGlobalVars; // Keep track of dead globals - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (!AliveGlobals.count(I)) { - DeadGlobalVars.push_back(I); // Keep track of dead globals - if (I->hasInitializer()) { - Constant *Init = I->getInitializer(); - I->setInitializer(nullptr); + std::vector DeadGlobalVars; // Keep track of dead globals + for (GlobalVariable &GV : M.globals()) + if (!AliveGlobals.count(&GV)) { + DeadGlobalVars.push_back(&GV); // Keep track of dead globals + if (GV.hasInitializer()) { + Constant *Init = GV.getInitializer(); + GV.setInitializer(nullptr); if (isSafeToDestroyConstant(Init)) Init->destroyConstant(); } } // The second pass drops the bodies of functions which are dead... - std::vector DeadFunctions; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!AliveGlobals.count(I)) { - DeadFunctions.push_back(I); // Keep track of dead globals - if (!I->isDeclaration()) - I->deleteBody(); + std::vector DeadFunctions; + for (Function &F : M) + if (!AliveGlobals.count(&F)) { + DeadFunctions.push_back(&F); // Keep track of dead globals + if (!F.isDeclaration()) + F.deleteBody(); } // The third pass drops targets of aliases which are dead... std::vector DeadAliases; - for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; - ++I) - if (!AliveGlobals.count(I)) { - DeadAliases.push_back(I); - I->setAliasee(nullptr); + for (GlobalAlias &GA : M.aliases()) + if (!AliveGlobals.count(&GA)) { + DeadAliases.push_back(&GA); + GA.setAliasee(nullptr); } if (!DeadFunctions.empty()) { // Now that all interferences have been dropped, delete the actual objects // themselves. - for (unsigned i = 0, e = DeadFunctions.size(); i != e; ++i) { - RemoveUnusedGlobalValue(*DeadFunctions[i]); - M.getFunctionList().erase(DeadFunctions[i]); + for (Function *F : DeadFunctions) { + RemoveUnusedGlobalValue(*F); + M.getFunctionList().erase(F); } NumFunctions += DeadFunctions.size(); Changed = true; } if (!DeadGlobalVars.empty()) { - for (unsigned i = 0, e = DeadGlobalVars.size(); i != e; ++i) { - RemoveUnusedGlobalValue(*DeadGlobalVars[i]); - M.getGlobalList().erase(DeadGlobalVars[i]); + for (GlobalVariable *GV : DeadGlobalVars) { + RemoveUnusedGlobalValue(*GV); + M.getGlobalList().erase(GV); } NumVariables += DeadGlobalVars.size(); Changed = true; @@ -179,9 +172,9 @@ bool GlobalDCE::runOnModule(Module &M) { // Now delete any dead aliases. if (!DeadAliases.empty()) { - for (unsigned i = 0, e = DeadAliases.size(); i != e; ++i) { - RemoveUnusedGlobalValue(*DeadAliases[i]); - M.getAliasList().erase(DeadAliases[i]); + for (GlobalAlias *GA : DeadAliases) { + RemoveUnusedGlobalValue(*GA); + M.getAliasList().erase(GA); } NumAliases += DeadAliases.size(); Changed = true; @@ -222,21 +215,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { // any globals used will be marked as needed. Function *F = cast(G); - if (F->hasPrefixData()) - MarkUsedGlobalsAsNeeded(F->getPrefixData()); + for (Use &U : F->operands()) + MarkUsedGlobalsAsNeeded(cast(U.get())); - if (F->hasPrologueData()) - MarkUsedGlobalsAsNeeded(F->getPrologueData()); - - if (F->hasPersonalityFn()) - MarkUsedGlobalsAsNeeded(F->getPersonalityFn()); - - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U) - if (GlobalValue *GV = dyn_cast(*U)) + for (BasicBlock &BB : *F) + for (Instruction &I : BB) + for (Use &U : I.operands()) + if (GlobalValue *GV = dyn_cast(U)) GlobalIsNeeded(GV); - else if (Constant *C = dyn_cast(*U)) + else if (Constant *C = dyn_cast(U)) MarkUsedGlobalsAsNeeded(C); } } @@ -247,9 +234,9 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) { // Loop over all of the operands of the constant, adding any globals they // use to the list of needed globals. - for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) { + for (Use &U : C->operands()) { // If we've already processed this constant there's no need to do it again. - Constant *Op = dyn_cast(*I); + Constant *Op = dyn_cast(U); if (Op && SeenConstants.insert(Op).second) MarkUsedGlobalsAsNeeded(Op); } @@ -262,7 +249,8 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) { // might make it deader. // bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) { - if (GV.use_empty()) return false; + if (GV.use_empty()) + return false; GV.removeDeadConstantUsers(); return GV.use_empty(); } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 5ffe15dbd31d..fd7736905fe8 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -54,7 +55,6 @@ STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); STATISTIC(NumDeleted , "Number of globals deleted"); -STATISTIC(NumFnDeleted , "Number of functions deleted"); STATISTIC(NumGlobUses , "Number of global uses devirtualized"); STATISTIC(NumLocalized , "Number of globals localized"); STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans"); @@ -69,6 +69,7 @@ namespace { struct GlobalOpt : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); } static char ID; // Pass identification, replacement for typeid GlobalOpt() : ModulePass(ID) { @@ -81,11 +82,14 @@ namespace { bool OptimizeFunctions(Module &M); bool OptimizeGlobalVars(Module &M); bool OptimizeGlobalAliases(Module &M); - bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI); - bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI, - const GlobalStatus &GS); + bool deleteIfDead(GlobalValue &GV); + bool processGlobal(GlobalValue &GV); + bool processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS); bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); + bool isPointerValueDeadOnEntryToFunction(const Function *F, + GlobalValue *GV); + TargetLibraryInfo *TLI; SmallSet NotDiscardableComdats; }; @@ -95,13 +99,14 @@ char GlobalOpt::ID = 0; INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt", "Global Variable Optimizer", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(GlobalOpt, "globalopt", "Global Variable Optimizer", false, false) ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } -/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker -/// as a root? If so, we might not really want to eliminate the stores to it. +/// Is this global variable possibly used by a leak checker as a root? If so, +/// we might not really want to eliminate the stores to it. static bool isLeakCheckerRoot(GlobalVariable *GV) { // A global variable is a root if it is a pointer, or could plausibly contain // a pointer. There are two challenges; one is that we could have a struct @@ -176,10 +181,9 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { } while (1); } -/// CleanupPointerRootUsers - This GV is a pointer root. Loop over all users -/// of the global and clean up any that obviously don't assign the global a -/// value that isn't dynamically allocated. -/// +/// This GV is a pointer root. Loop over all users of the global and clean up +/// any that obviously don't assign the global a value that isn't dynamically +/// allocated. static bool CleanupPointerRootUsers(GlobalVariable *GV, const TargetLibraryInfo *TLI) { // A brief explanation of leak checkers. The goal is to find bugs where @@ -263,10 +267,9 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, return Changed; } -/// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all -/// users of the global, cleaning up the obvious ones. This is largely just a -/// quick scan over the use list to clean up the easy and obvious cruft. This -/// returns true if it made a change. +/// We just marked GV constant. Loop over all users of the global, cleaning up +/// the obvious ones. This is largely just a quick scan over the use list to +/// clean up the easy and obvious cruft. This returns true if it made a change. static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, const DataLayout &DL, TargetLibraryInfo *TLI) { @@ -353,8 +356,8 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, return Changed; } -/// isSafeSROAElementUse - Return true if the specified instruction is a safe -/// user of a derived expression from a global that we want to SROA. +/// Return true if the specified instruction is a safe user of a derived +/// expression from a global that we want to SROA. static bool isSafeSROAElementUse(Value *V) { // We might have a dead and dangling constant hanging off of here. if (Constant *C = dyn_cast(V)) @@ -385,9 +388,8 @@ static bool isSafeSROAElementUse(Value *V) { } -/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value. -/// Look at it and its uses and decide whether it is safe to SROA this global. -/// +/// U is a direct user of the specified global value. Look at it and its uses +/// and decide whether it is safe to SROA this global. static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { // The user of the global must be a GEP Inst or a ConstantExpr GEP. if (!isa(U) && @@ -452,9 +454,8 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { return true; } -/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it -/// is safe for us to perform this transformation. -/// +/// Look at all uses of the global and decide whether it is safe for us to +/// perform this transformation. static bool GlobalUsersSafeToSRA(GlobalValue *GV) { for (User *U : GV->users()) if (!IsUserOfGlobalSafeForSRA(U, GV)) @@ -464,10 +465,10 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) { } -/// SRAGlobal - Perform scalar replacement of aggregates on the specified global -/// variable. This opens the door for other optimizations by exposing the -/// behavior of the program in a more fine-grained way. We have determined that -/// this transformation is safe already. We return the first global variable we +/// Perform scalar replacement of aggregates on the specified global variable. +/// This opens the door for other optimizations by exposing the behavior of the +/// program in a more fine-grained way. We have determined that this +/// transformation is safe already. We return the first global variable we /// insert so that the caller can reprocess it. static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // Make sure this global only has simple uses that we can SRA. @@ -497,7 +498,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { In, GV->getName()+"."+Twine(i), GV->getThreadLocalMode(), GV->getType()->getAddressSpace()); - Globals.insert(GV, NGV); + NGV->setExternallyInitialized(GV->isExternallyInitialized()); + Globals.push_back(NGV); NewGlobals.push_back(NGV); // Calculate the known alignment of the field. If the original aggregate @@ -530,7 +532,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { In, GV->getName()+"."+Twine(i), GV->getThreadLocalMode(), GV->getType()->getAddressSpace()); - Globals.insert(GV, NGV); + NGV->setExternallyInitialized(GV->isExternallyInitialized()); + Globals.push_back(NGV); NewGlobals.push_back(NGV); // Calculate the known alignment of the field. If the original aggregate @@ -545,7 +548,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { if (NewGlobals.empty()) return nullptr; - DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); + DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n"); Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); @@ -610,9 +613,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr; } -/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified -/// value will trap if the value is dynamically null. PHIs keeps track of any -/// phi nodes we've seen to avoid reprocessing them. +/// Return true if all users of the specified value will trap if the value is +/// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid +/// reprocessing them. static bool AllUsesOfValueWillTrapIfNull(const Value *V, SmallPtrSetImpl &PHIs) { for (const User *U : V->users()) @@ -653,9 +656,9 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V, return true; } -/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads -/// from GV will trap if the loaded value is null. Note that this also permits -/// comparisons of the loaded value against null, as a special case. +/// Return true if all uses of any loads from GV will trap if the loaded value +/// is null. Note that this also permits comparisons of the loaded value +/// against null, as a special case. static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { for (const User *U : GV->users()) if (const LoadInst *LI = dyn_cast(U)) { @@ -735,10 +738,10 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { } -/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null -/// value stored into it. If there are uses of the loaded value that would trap -/// if the loaded value is dynamically null, then we know that they cannot be -/// reachable with a null optimize away the load. +/// The specified global has only one non-null value stored into it. If there +/// are uses of the loaded value that would trap if the loaded value is +/// dynamically null, then we know that they cannot be reachable with a null +/// optimize away the load. static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, const DataLayout &DL, TargetLibraryInfo *TLI) { @@ -778,7 +781,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, } if (Changed) { - DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV); + DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV << "\n"); ++NumGlobUses; } @@ -801,8 +804,8 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, return Changed; } -/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the -/// instructions that are foldable. +/// Walk the use list of V, constant folding all of the instructions that are +/// foldable. static void ConstantPropUsersOf(Value *V, const DataLayout &DL, TargetLibraryInfo *TLI) { for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; ) @@ -818,11 +821,11 @@ static void ConstantPropUsersOf(Value *V, const DataLayout &DL, } } -/// OptimizeGlobalAddressOfMalloc - This function takes the specified global -/// variable, and transforms the program as if it always contained the result of -/// the specified malloc. Because it is always the result of the specified -/// malloc, there is no reason to actually DO the malloc. Instead, turn the -/// malloc into a global, and any loads of GV as uses of the new global. +/// This function takes the specified global variable, and transforms the +/// program as if it always contained the result of the specified malloc. +/// Because it is always the result of the specified malloc, there is no reason +/// to actually DO the malloc. Instead, turn the malloc into a global, and any +/// loads of GV as uses of the new global. static GlobalVariable * OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, ConstantInt *NElements, const DataLayout &DL, @@ -838,13 +841,10 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, // Create the new global variable. The contents of the malloc'd memory is // undefined, so initialize with an undef value. - GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), - GlobalType, false, - GlobalValue::InternalLinkage, - UndefValue::get(GlobalType), - GV->getName()+".body", - GV, - GV->getThreadLocalMode()); + GlobalVariable *NewGV = new GlobalVariable( + *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage, + UndefValue::get(GlobalType), GV->getName() + ".body", nullptr, + GV->getThreadLocalMode()); // If there are bitcast users of the malloc (which is typical, usually we have // a malloc + bitcast) then replace them with uses of the new global. Update @@ -935,7 +935,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, cast(InitBool->user_back())->eraseFromParent(); delete InitBool; } else - GV->getParent()->getGlobalList().insert(GV, InitBool); + GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool); // Now the GV is dead, nuke it and the malloc.. GV->eraseFromParent(); @@ -951,10 +951,9 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, return NewGV; } -/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking -/// to make sure that there are no complex uses of V. We permit simple things -/// like dereferencing the pointer, but not storing through the address, unless -/// it is to the specified global. +/// Scan the use-list of V checking to make sure that there are no complex uses +/// of V. We permit simple things like dereferencing the pointer, but not +/// storing through the address, unless it is to the specified global. static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, const GlobalVariable *GV, SmallPtrSetImpl &PHIs) { @@ -998,10 +997,9 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, return true; } -/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV -/// somewhere. Transform all uses of the allocation into loads from the -/// global and uses of the resultant pointer. Further, delete the store into -/// GV. This assumes that these value pass the +/// The Alloc pointer is stored into GV somewhere. Transform all uses of the +/// allocation into loads from the global and uses of the resultant pointer. +/// Further, delete the store into GV. This assumes that these value pass the /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, GlobalVariable *GV) { @@ -1043,9 +1041,9 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, } } -/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi -/// of a load) are simple enough to perform heap SRA on. This permits GEP's -/// that index through the array and struct field, icmps of null, and PHIs. +/// Verify that all uses of V (a load, or a phi of a load) are simple enough to +/// perform heap SRA on. This permits GEP's that index through the array and +/// struct field, icmps of null, and PHIs. static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, SmallPtrSetImpl &LoadUsingPHIs, SmallPtrSetImpl &LoadUsingPHIsPerLoad) { @@ -1096,8 +1094,8 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, } -/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from -/// GV are simple enough to perform HeapSRA, return true. +/// If all users of values loaded from GV are simple enough to perform HeapSRA, +/// return true. static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, Instruction *StoredVal) { SmallPtrSet LoadUsingPHIs; @@ -1186,8 +1184,8 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, return FieldVals[FieldNo] = Result; } -/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from -/// the load, rewrite the derived value to use the HeapSRoA'd load. +/// Given a load instruction and a value derived from the load, rewrite the +/// derived value to use the HeapSRoA'd load. static void RewriteHeapSROALoadUser(Instruction *LoadUser, DenseMap > &InsertedScalarizedValues, std::vector > &PHIsToRewrite) { @@ -1248,10 +1246,9 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, } } -/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global. Ptr -/// is a value loaded from the global. Eliminate all uses of Ptr, making them -/// use FieldGlobals instead. All uses of loaded values satisfy -/// AllGlobalLoadUsesSimpleEnoughForHeapSRA. +/// We are performing Heap SRoA on a global. Ptr is a value loaded from the +/// global. Eliminate all uses of Ptr, making them use FieldGlobals instead. +/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA. static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, DenseMap > &InsertedScalarizedValues, std::vector > &PHIsToRewrite) { @@ -1266,8 +1263,8 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, } } -/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break -/// it up into multiple allocations of arrays of the fields. +/// CI is an allocation of an array of structures. Break it up into multiple +/// allocations of arrays of the fields. static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Value *NElems, const DataLayout &DL, const TargetLibraryInfo *TLI) { @@ -1291,12 +1288,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Type *FieldTy = STy->getElementType(FieldNo); PointerType *PFieldTy = PointerType::get(FieldTy, AS); - GlobalVariable *NGV = - new GlobalVariable(*GV->getParent(), - PFieldTy, false, GlobalValue::InternalLinkage, - Constant::getNullValue(PFieldTy), - GV->getName() + ".f" + Twine(FieldNo), GV, - GV->getThreadLocalMode()); + GlobalVariable *NGV = new GlobalVariable( + *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo), + nullptr, GV->getThreadLocalMode()); FieldGlobals.push_back(NGV); unsigned TypeSize = DL.getTypeAllocSize(FieldTy); @@ -1336,7 +1331,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // Split the basic block at the old malloc. BasicBlock *OrigBB = CI->getParent(); - BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont"); + BasicBlock *ContBB = + OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont"); // Create the block to check the first condition. Put all these blocks at the // end of the function as they are unlikely to be executed. @@ -1376,9 +1372,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // CI is no longer needed, remove it. CI->eraseFromParent(); - /// InsertedScalarizedLoads - As we process loads, if we can't immediately - /// update all uses of the load, keep track of what scalarized loads are - /// inserted for a given load. + /// As we process loads, if we can't immediately update all uses of the load, + /// keep track of what scalarized loads are inserted for a given load. DenseMap > InsertedScalarizedValues; InsertedScalarizedValues[GV] = FieldGlobals; @@ -1454,13 +1449,11 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, return cast(FieldGlobals[0]); } -/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a -/// pointer global variable with a single value stored it that is a malloc or -/// cast of malloc. -static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, +/// This function is called when we see a pointer global variable with a single +/// value stored it that is a malloc or cast of malloc. +static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, Type *AllocTy, AtomicOrdering Ordering, - Module::global_iterator &GVI, const DataLayout &DL, TargetLibraryInfo *TLI) { // If this is a malloc of an abstract type, don't touch it. @@ -1499,7 +1492,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, // (2048 bytes currently), as we don't want to introduce a 16M global or // something. if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI); + OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI); return true; } @@ -1544,19 +1537,18 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, CI = cast(Malloc); } - GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), - DL, TLI); + PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL, + TLI); return true; } return false; } -// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge -// that only one value (besides its initializer) is ever stored to the global. -static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, +// Try to optimize globals based on the knowledge that only one value (besides +// its initializer) is ever stored to the global. +static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, AtomicOrdering Ordering, - Module::global_iterator &GVI, const DataLayout &DL, TargetLibraryInfo *TLI) { // Ignore no-op GEPs and bitcasts. @@ -1577,9 +1569,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, return true; } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) { Type *MallocType = getMallocAllocatedType(CI, TLI); - if (MallocType && - TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI, - DL, TLI)) + if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, + Ordering, DL, TLI)) return true; } } @@ -1587,10 +1578,10 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, return false; } -/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only -/// two values ever stored into GV are its initializer and OtherVal. See if we -/// can shrink the global into a boolean and select between the two values -/// whenever it is used. This exposes the values to other scalar optimizations. +/// At this point, we have learned that the only two values ever stored into GV +/// are its initializer and OtherVal. See if we can shrink the global into a +/// boolean and select between the two values whenever it is used. This exposes +/// the values to other scalar optimizations. static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { Type *GVElType = GV->getType()->getElementType(); @@ -1610,7 +1601,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { if (!isa(U) && !isa(U)) return false; - DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); + DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n"); // Create the new global, initializing it to false. GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), @@ -1620,7 +1611,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { GV->getName()+".b", GV->getThreadLocalMode(), GV->getType()->getAddressSpace()); - GV->getParent()->getGlobalList().insert(GV, NewGV); + GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV); Constant *InitVal = GV->getInitializer(); assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) && @@ -1688,61 +1679,213 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { return true; } +bool GlobalOpt::deleteIfDead(GlobalValue &GV) { + GV.removeDeadConstantUsers(); -/// ProcessGlobal - Analyze the specified global variable and optimize it if -/// possible. If we make a change, return true. -bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, - Module::global_iterator &GVI) { + if (!GV.isDiscardableIfUnused()) + return false; + + if (const Comdat *C = GV.getComdat()) + if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C)) + return false; + + bool Dead; + if (auto *F = dyn_cast(&GV)) + Dead = F->isDefTriviallyDead(); + else + Dead = GV.use_empty(); + if (!Dead) + return false; + + DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n"); + GV.eraseFromParent(); + ++NumDeleted; + return true; +} + +/// Analyze the specified global variable and optimize it if possible. If we +/// make a change, return true. +bool GlobalOpt::processGlobal(GlobalValue &GV) { // Do more involved optimizations if the global is internal. - GV->removeDeadConstantUsers(); - - if (GV->use_empty()) { - DEBUG(dbgs() << "GLOBAL DEAD: " << *GV); - GV->eraseFromParent(); - ++NumDeleted; - return true; - } - - if (!GV->hasLocalLinkage()) + if (!GV.hasLocalLinkage()) return false; GlobalStatus GS; - if (GlobalStatus::analyzeGlobal(GV, GS)) + if (GlobalStatus::analyzeGlobal(&GV, GS)) return false; - if (!GS.IsCompared && !GV->hasUnnamedAddr()) { - GV->setUnnamedAddr(true); + bool Changed = false; + if (!GS.IsCompared && !GV.hasUnnamedAddr()) { + GV.setUnnamedAddr(true); NumUnnamed++; + Changed = true; } - if (GV->isConstant() || !GV->hasInitializer()) - return false; + auto *GVar = dyn_cast(&GV); + if (!GVar) + return Changed; - return ProcessInternalGlobal(GV, GVI, GS); + if (GVar->isConstant() || !GVar->hasInitializer()) + return Changed; + + return processInternalGlobal(GVar, GS) || Changed; } -/// ProcessInternalGlobal - Analyze the specified global variable and optimize +bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV) { + // Find all uses of GV. We expect them all to be in F, and if we can't + // identify any of the uses we bail out. + // + // On each of these uses, identify if the memory that GV points to is + // used/required/live at the start of the function. If it is not, for example + // if the first thing the function does is store to the GV, the GV can + // possibly be demoted. + // + // We don't do an exhaustive search for memory operations - simply look + // through bitcasts as they're quite common and benign. + const DataLayout &DL = GV->getParent()->getDataLayout(); + SmallVector Loads; + SmallVector Stores; + for (auto *U : GV->users()) { + if (Operator::getOpcode(U) == Instruction::BitCast) { + for (auto *UU : U->users()) { + if (auto *LI = dyn_cast(UU)) + Loads.push_back(LI); + else if (auto *SI = dyn_cast(UU)) + Stores.push_back(SI); + else + return false; + } + continue; + } + + Instruction *I = dyn_cast(U); + if (!I) + return false; + assert(I->getParent()->getParent() == F); + + if (auto *LI = dyn_cast(I)) + Loads.push_back(LI); + else if (auto *SI = dyn_cast(I)) + Stores.push_back(SI); + else + return false; + } + + // We have identified all uses of GV into loads and stores. Now check if all + // of them are known not to depend on the value of the global at the function + // entry point. We do this by ensuring that every load is dominated by at + // least one store. + auto &DT = getAnalysis(*const_cast(F)) + .getDomTree(); + + // The below check is quadratic. Check we're not going to do too many tests. + // FIXME: Even though this will always have worst-case quadratic time, we + // could put effort into minimizing the average time by putting stores that + // have been shown to dominate at least one load at the beginning of the + // Stores array, making subsequent dominance checks more likely to succeed + // early. + // + // The threshold here is fairly large because global->local demotion is a + // very powerful optimization should it fire. + const unsigned Threshold = 100; + if (Loads.size() * Stores.size() > Threshold) + return false; + + for (auto *L : Loads) { + auto *LTy = L->getType(); + if (!std::any_of(Stores.begin(), Stores.end(), [&](StoreInst *S) { + auto *STy = S->getValueOperand()->getType(); + // The load is only dominated by the store if DomTree says so + // and the number of bits loaded in L is less than or equal to + // the number of bits stored in S. + return DT.dominates(S, L) && + DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy); + })) + return false; + } + // All loads have known dependences inside F, so the global can be localized. + return true; +} + +/// C may have non-instruction users. Can all of those users be turned into +/// instructions? +static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) { + // We don't do this exhaustively. The most common pattern that we really need + // to care about is a constant GEP or constant bitcast - so just looking + // through one single ConstantExpr. + // + // The set of constants that this function returns true for must be able to be + // handled by makeAllConstantUsesInstructions. + for (auto *U : C->users()) { + if (isa(U)) + continue; + if (!isa(U)) + // Non instruction, non-constantexpr user; cannot convert this. + return false; + for (auto *UU : U->users()) + if (!isa(UU)) + // A constantexpr used by another constant. We don't try and recurse any + // further but just bail out at this point. + return false; + } + + return true; +} + +/// C may have non-instruction users, and +/// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the +/// non-instruction users to instructions. +static void makeAllConstantUsesInstructions(Constant *C) { + SmallVector Users; + for (auto *U : C->users()) { + if (isa(U)) + Users.push_back(cast(U)); + else + // We should never get here; allNonInstructionUsersCanBeMadeInstructions + // should not have returned true for C. + assert( + isa(U) && + "Can't transform non-constantexpr non-instruction to instruction!"); + } + + SmallVector UUsers; + for (auto *U : Users) { + UUsers.clear(); + for (auto *UU : U->users()) + UUsers.push_back(UU); + for (auto *UU : UUsers) { + Instruction *UI = cast(UU); + Instruction *NewU = U->getAsInstruction(); + NewU->insertBefore(UI); + UI->replaceUsesOfWith(U, NewU); + } + U->dropAllReferences(); + } +} + +/// Analyze the specified global variable and optimize /// it if possible. If we make a change, return true. -bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, - Module::global_iterator &GVI, +bool GlobalOpt::processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS) { auto &DL = GV->getParent()->getDataLayout(); - // If this is a first class global and has only one accessing function - // and this function is main (which we know is not recursive), we replace - // the global with a local alloca in this function. + // If this is a first class global and has only one accessing function and + // this function is non-recursive, we replace the global with a local alloca + // in this function. // // NOTE: It doesn't make sense to promote non-single-value types since we // are just replacing static memory to stack memory. // // If the global is in different address space, don't bring it to stack. if (!GS.HasMultipleAccessingFunctions && - GS.AccessingFunction && !GS.HasNonInstructionUser && + GS.AccessingFunction && GV->getType()->getElementType()->isSingleValueType() && - GS.AccessingFunction->getName() == "main" && - GS.AccessingFunction->hasExternalLinkage() && - GV->getType()->getAddressSpace() == 0) { - DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); + GV->getType()->getAddressSpace() == 0 && + !GV->isExternallyInitialized() && + allNonInstructionUsersCanBeMadeInstructions(GV) && + GS.AccessingFunction->doesNotRecurse() && + isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV) ) { + DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n"); Instruction &FirstI = const_cast(*GS.AccessingFunction ->getEntryBlock().begin()); Type *ElemTy = GV->getType()->getElementType(); @@ -1752,6 +1895,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, if (!isa(GV->getInitializer())) new StoreInst(GV->getInitializer(), Alloca, &FirstI); + makeAllConstantUsesInstructions(GV); + GV->replaceAllUsesWith(Alloca); GV->eraseFromParent(); ++NumLocalized; @@ -1761,7 +1906,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // If the global is never loaded (but may be stored to), it is dead. // Delete it now. if (!GS.IsLoaded) { - DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); + DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n"); bool Changed; if (isLeakCheckerRoot(GV)) { @@ -1800,11 +1945,9 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return true; } else if (!GV->getInitializer()->getType()->isSingleValueType()) { const DataLayout &DL = GV->getParent()->getDataLayout(); - if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) { - GVI = FirstNewGV; // Don't skip the newly produced globals! + if (SRAGlobal(GV, DL)) return true; - } - } else if (GS.StoredType == GlobalStatus::StoredOnce) { + } else if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) { // If the initial value for the global was an undef value, and if only // one other value was stored into it, we can just change the // initializer to be the stored value, then delete all stores to the @@ -1822,8 +1965,6 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, << "simplify all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; - } else { - GVI = GV; } ++NumSubstitute; return true; @@ -1831,8 +1972,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // Try to optimize globals based on the knowledge that only one value // (besides its initializer) is ever stored to the global. - if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI, - DL, TLI)) + if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI)) return true; // Otherwise, if the global was not a boolean, we can shrink it to be a @@ -1850,8 +1990,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return false; } -/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified -/// function, changing them to FastCC. +/// Walk all of the direct calls of the specified function, changing them to +/// FastCC. static void ChangeCalleesToFastCall(Function *F) { for (User *U : F->users()) { if (isa(U)) @@ -1898,38 +2038,38 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { bool Changed = false; // Optimize functions. for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { - Function *F = FI++; + Function *F = &*FI++; // Functions without names cannot be referenced outside this module. if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage()) F->setLinkage(GlobalValue::InternalLinkage); - const Comdat *C = F->getComdat(); - bool inComdat = C && NotDiscardableComdats.count(C); - F->removeDeadConstantUsers(); - if ((!inComdat || F->hasLocalLinkage()) && F->isDefTriviallyDead()) { - F->eraseFromParent(); + if (deleteIfDead(*F)) { Changed = true; - ++NumFnDeleted; - } else if (F->hasLocalLinkage()) { - if (isProfitableToMakeFastCC(F) && !F->isVarArg() && - !F->hasAddressTaken()) { - // If this function has a calling convention worth changing, is not a - // varargs function, and is only called directly, promote it to use the - // Fast calling convention. - F->setCallingConv(CallingConv::Fast); - ChangeCalleesToFastCall(F); - ++NumFastCallFns; - Changed = true; - } + continue; + } - if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && - !F->hasAddressTaken()) { - // The function is not used by a trampoline intrinsic, so it is safe - // to remove the 'nest' attribute. - RemoveNestAttribute(F); - ++NumNestRemoved; - Changed = true; - } + Changed |= processGlobal(*F); + + if (!F->hasLocalLinkage()) + continue; + if (isProfitableToMakeFastCC(F) && !F->isVarArg() && + !F->hasAddressTaken()) { + // If this function has a calling convention worth changing, is not a + // varargs function, and is only called directly, promote it to use the + // Fast calling convention. + F->setCallingConv(CallingConv::Fast); + ChangeCalleesToFastCall(F); + ++NumFastCallFns; + Changed = true; + } + + if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && + !F->hasAddressTaken()) { + // The function is not used by a trampoline intrinsic, so it is safe + // to remove the 'nest' attribute. + RemoveNestAttribute(F); + ++NumNestRemoved; + Changed = true; } } return Changed; @@ -1940,7 +2080,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { - GlobalVariable *GV = GVI++; + GlobalVariable *GV = &*GVI++; // Global variables without names cannot be referenced outside this module. if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage()) GV->setLinkage(GlobalValue::InternalLinkage); @@ -1953,12 +2093,12 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { GV->setInitializer(New); } - if (GV->isDiscardableIfUnused()) { - if (const Comdat *C = GV->getComdat()) - if (NotDiscardableComdats.count(C) && !GV->hasLocalLinkage()) - continue; - Changed |= ProcessGlobal(GV, GVI); + if (deleteIfDead(*GV)) { + Changed = true; + continue; } + + Changed |= processGlobal(*GV); } return Changed; } @@ -1968,8 +2108,8 @@ isSimpleEnoughValueToCommit(Constant *C, SmallPtrSetImpl &SimpleConstants, const DataLayout &DL); -/// isSimpleEnoughValueToCommit - Return true if the specified constant can be -/// handled by the code generator. We don't want to generate something like: +/// Return true if the specified constant can be handled by the code generator. +/// We don't want to generate something like: /// void *X = &X/42; /// because the code generator doesn't have a relocation that can handle that. /// @@ -2044,11 +2184,11 @@ isSimpleEnoughValueToCommit(Constant *C, } -/// isSimpleEnoughPointerToCommit - Return true if this constant is simple -/// enough for us to understand. In particular, if it is a cast to anything -/// other than from one pointer type to another pointer type, we punt. -/// We basically just support direct accesses to globals and GEP's of -/// globals. This should be kept up to date with CommitValueTo. +/// Return true if this constant is simple enough for us to understand. In +/// particular, if it is a cast to anything other than from one pointer type to +/// another pointer type, we punt. We basically just support direct accesses to +/// globals and GEP's of globals. This should be kept up to date with +/// CommitValueTo. static bool isSimpleEnoughPointerToCommit(Constant *C) { // Conservatively, avoid aggregate types. This is because we don't // want to worry about them partially overlapping other stores. @@ -2095,9 +2235,9 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; } -/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global -/// initializer. This returns 'Init' modified to reflect 'Val' stored into it. -/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into. +/// Evaluate a piece of a constantexpr store into a global initializer. This +/// returns 'Init' modified to reflect 'Val' stored into it. At this point, the +/// GEP operands of Addr [0, OpNo) have been stepped into. static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, ConstantExpr *Addr, unsigned OpNo) { // Base case of the recursion. @@ -2144,7 +2284,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, return ConstantVector::get(Elts); } -/// CommitValueTo - We have decided that Addr (which satisfies the predicate +/// We have decided that Addr (which satisfies the predicate /// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. static void CommitValueTo(Constant *Val, Constant *Addr) { if (GlobalVariable *GV = dyn_cast(Addr)) { @@ -2160,10 +2300,10 @@ static void CommitValueTo(Constant *Val, Constant *Addr) { namespace { -/// Evaluator - This class evaluates LLVM IR, producing the Constant -/// representing each SSA instruction. Changes to global variables are stored -/// in a mapping that can be iterated over after the evaluation is complete. -/// Once an evaluation call fails, the evaluation object should not be reused. +/// This class evaluates LLVM IR, producing the Constant representing each SSA +/// instruction. Changes to global variables are stored in a mapping that can +/// be iterated over after the evaluation is complete. Once an evaluation call +/// fails, the evaluation object should not be reused. class Evaluator { public: Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI) @@ -2180,15 +2320,15 @@ public: Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); } - /// EvaluateFunction - Evaluate a call to function F, returning true if - /// successful, false if we can't evaluate it. ActualArgs contains the formal - /// arguments for the function. + /// Evaluate a call to function F, returning true if successful, false if we + /// can't evaluate it. ActualArgs contains the formal arguments for the + /// function. bool EvaluateFunction(Function *F, Constant *&RetVal, const SmallVectorImpl &ActualArgs); - /// EvaluateBlock - Evaluate all instructions in block BB, returning true if - /// successful, false if we can't evaluate it. NewBB returns the next BB that - /// control flows into, or null upon return. + /// Evaluate all instructions in block BB, returning true if successful, false + /// if we can't evaluate it. NewBB returns the next BB that control flows + /// into, or null upon return. bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB); Constant *getVal(Value *V) { @@ -2213,32 +2353,31 @@ public: private: Constant *ComputeLoadResult(Constant *P); - /// ValueStack - As we compute SSA register values, we store their contents - /// here. The back of the deque contains the current function and the stack - /// contains the values in the calling frames. + /// As we compute SSA register values, we store their contents here. The back + /// of the deque contains the current function and the stack contains the + /// values in the calling frames. std::deque> ValueStack; - /// CallStack - This is used to detect recursion. In pathological situations - /// we could hit exponential behavior, but at least there is nothing - /// unbounded. + /// This is used to detect recursion. In pathological situations we could hit + /// exponential behavior, but at least there is nothing unbounded. SmallVector CallStack; - /// MutatedMemory - For each store we execute, we update this map. Loads - /// check this to get the most up-to-date value. If evaluation is successful, - /// this state is committed to the process. + /// For each store we execute, we update this map. Loads check this to get + /// the most up-to-date value. If evaluation is successful, this state is + /// committed to the process. DenseMap MutatedMemory; - /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable - /// to represent its body. This vector is needed so we can delete the - /// temporary globals when we are done. + /// To 'execute' an alloca, we create a temporary global variable to represent + /// its body. This vector is needed so we can delete the temporary globals + /// when we are done. SmallVector, 32> AllocaTmps; - /// Invariants - These global variables have been marked invariant by the - /// static constructor. + /// These global variables have been marked invariant by the static + /// constructor. SmallPtrSet Invariants; - /// SimpleConstants - These are constants we have checked and know to be - /// simple enough to live in a static initializer of a global. + /// These are constants we have checked and know to be simple enough to live + /// in a static initializer of a global. SmallPtrSet SimpleConstants; const DataLayout &DL; @@ -2247,9 +2386,8 @@ private: } // anonymous namespace -/// ComputeLoadResult - Return the value that would be computed by a load from -/// P after the stores reflected by 'memory' have been performed. If we can't -/// decide, return null. +/// Return the value that would be computed by a load from P after the stores +/// reflected by 'memory' have been performed. If we can't decide, return null. Constant *Evaluator::ComputeLoadResult(Constant *P) { // If this memory location has been recently stored, use the stored value: it // is the most up-to-date. @@ -2275,9 +2413,9 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) { return nullptr; // don't know how to evaluate. } -/// EvaluateBlock - Evaluate all instructions in block BB, returning true if -/// successful, false if we can't evaluate it. NewBB returns the next BB that -/// control flows into, or null upon return. +/// Evaluate all instructions in block BB, returning true if successful, false +/// if we can't evaluate it. NewBB returns the next BB that control flows into, +/// or null upon return. bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB) { // This is the main evaluation loop. @@ -2438,7 +2576,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, InstResult = AllocaTmps.back().get(); DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); } else if (isa(CurInst) || isa(CurInst)) { - CallSite CS(CurInst); + CallSite CS(&*CurInst); // Debug info can safely be ignored here. if (isa(CS.getInstruction())) { @@ -2504,6 +2642,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // Continue even if we do nothing. ++CurInst; continue; + } else if (II->getIntrinsicID() == Intrinsic::assume) { + DEBUG(dbgs() << "Skipping assume intrinsic.\n"); + ++CurInst; + continue; } DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); @@ -2600,7 +2742,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (ConstantExpr *CE = dyn_cast(InstResult)) InstResult = ConstantFoldConstantExpression(CE, DL, TLI); - setVal(CurInst, InstResult); + setVal(&*CurInst, InstResult); } // If we just processed an invoke, we finished evaluating the block. @@ -2615,9 +2757,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, } } -/// EvaluateFunction - Evaluate a call to function F, returning true if -/// successful, false if we can't evaluate it. ActualArgs contains the formal -/// arguments for the function. +/// Evaluate a call to function F, returning true if successful, false if we +/// can't evaluate it. ActualArgs contains the formal arguments for the +/// function. bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, const SmallVectorImpl &ActualArgs) { // Check to see if this function is already executing (recursion). If so, @@ -2631,7 +2773,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, unsigned ArgNo = 0; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI, ++ArgNo) - setVal(AI, ActualArgs[ArgNo]); + setVal(&*AI, ActualArgs[ArgNo]); // ExecutedBlocks - We only handle non-looping, non-recursive code. As such, // we can only evaluate any one basic block at most once. This set keeps @@ -2639,7 +2781,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, SmallPtrSet ExecutedBlocks; // CurBB - The current basic block we're evaluating. - BasicBlock *CurBB = F->begin(); + BasicBlock *CurBB = &F->front(); BasicBlock::iterator CurInst = CurBB->begin(); @@ -2679,8 +2821,8 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, } } -/// EvaluateStaticConstructor - Evaluate static constructors in the function, if -/// we can. Return true if we can, false otherwise. +/// Evaluate static constructors in the function, if we can. Return true if we +/// can, false otherwise. static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, const TargetLibraryInfo *TLI) { // Call the function. @@ -2708,7 +2850,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, } static int compareNames(Constant *const *A, Constant *const *B) { - return (*A)->getName().compare((*B)->getName()); + return (*A)->stripPointerCasts()->getName().compare( + (*B)->stripPointerCasts()->getName()); } static void setUsedInitializer(GlobalVariable &V, @@ -2742,7 +2885,7 @@ static void setUsedInitializer(GlobalVariable &V, } namespace { -/// \brief An easy to access representation of llvm.used and llvm.compiler.used. +/// An easy to access representation of llvm.used and llvm.compiler.used. class LLVMUsed { SmallPtrSet Used; SmallPtrSet CompilerUsed; @@ -2861,10 +3004,17 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E;) { - Module::alias_iterator J = I++; + GlobalAlias *J = &*I++; + // Aliases without names cannot be referenced outside this module. if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage()) J->setLinkage(GlobalValue::InternalLinkage); + + if (deleteIfDead(*J)) { + Changed = true; + continue; + } + // If the aliasee may change at link time, nothing can be done - bail out. if (J->mayBeOverridden()) continue; @@ -2889,15 +3039,15 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { if (RenameTarget) { // Give the aliasee the name, linkage and other attributes of the alias. - Target->takeName(J); + Target->takeName(&*J); Target->setLinkage(J->getLinkage()); Target->setVisibility(J->getVisibility()); Target->setDLLStorageClass(J->getDLLStorageClass()); - if (Used.usedErase(J)) + if (Used.usedErase(&*J)) Used.usedInsert(Target); - if (Used.compilerUsedErase(J)) + if (Used.compilerUsedErase(&*J)) Used.compilerUsedInsert(Target); } else if (mayHaveOtherReferences(*J, Used)) continue; @@ -2936,8 +3086,8 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { return Fn; } -/// cxxDtorIsEmpty - Returns whether the given function is an empty C++ -/// destructor and can therefore be eliminated. +/// Returns whether the given function is an empty C++ destructor and can +/// therefore be eliminated. /// Note that we assume that other optimization passes have already simplified /// the code so we only look for a function with a single basic block, where /// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and @@ -3081,3 +3231,4 @@ bool GlobalOpt::runOnModule(Module &M) { return Changed; } + diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 50f56b0f2afe..7ea6c08b2e66 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file implements the common infrastructure (including C bindings) for -// libLLVMIPO.a, which implements several transformations over the LLVM +// This file implements the common infrastructure (including C bindings) for +// libLLVMIPO.a, which implements several transformations over the LLVM // intermediate representation. // //===----------------------------------------------------------------------===// @@ -24,14 +24,17 @@ using namespace llvm; void llvm::initializeIPO(PassRegistry &Registry) { initializeArgPromotionPass(Registry); initializeConstantMergePass(Registry); + initializeCrossDSOCFIPass(Registry); initializeDAEPass(Registry); initializeDAHPass(Registry); + initializeForceFunctionAttrsLegacyPassPass(Registry); initializeFunctionAttrsPass(Registry); initializeGlobalDCEPass(Registry); initializeGlobalOptPass(Registry); initializeIPCPPass(Registry); initializeAlwaysInlinerPass(Registry); initializeSimpleInlinerPass(Registry); + initializeInferFunctionAttrsLegacyPassPass(Registry); initializeInternalizePassPass(Registry); initializeLoopExtractorPass(Registry); initializeBlockExtractorPassPass(Registry); @@ -40,13 +43,15 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeMergeFunctionsPass(Registry); initializePartialInlinerPass(Registry); initializePruneEHPass(Registry); - initializeStripDeadPrototypesPassPass(Registry); + initializeStripDeadPrototypesLegacyPassPass(Registry); initializeStripSymbolsPass(Registry); initializeStripDebugDeclarePass(Registry); initializeStripDeadDebugInfoPass(Registry); initializeStripNonDebugSymbolsPass(Registry); initializeBarrierNoopPass(Registry); initializeEliminateAvailableExternallyPass(Registry); + initializeSampleProfileLoaderPass(Registry); + initializeFunctionImportPassPass(Registry); } void LLVMInitializeIPO(LLVMPassRegistryRef R) { diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp new file mode 100644 index 000000000000..d02c861a2948 --- /dev/null +++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -0,0 +1,937 @@ +//===- InferFunctionAttrs.cpp - Infer implicit function attributes --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/InferFunctionAttrs.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "inferattrs" + +STATISTIC(NumReadNone, "Number of functions inferred as readnone"); +STATISTIC(NumReadOnly, "Number of functions inferred as readonly"); +STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind"); +STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture"); +STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly"); +STATISTIC(NumNoAlias, "Number of function returns inferred as noalias"); + +static bool setDoesNotAccessMemory(Function &F) { + if (F.doesNotAccessMemory()) + return false; + F.setDoesNotAccessMemory(); + ++NumReadNone; + return true; +} + +static bool setOnlyReadsMemory(Function &F) { + if (F.onlyReadsMemory()) + return false; + F.setOnlyReadsMemory(); + ++NumReadOnly; + return true; +} + +static bool setDoesNotThrow(Function &F) { + if (F.doesNotThrow()) + return false; + F.setDoesNotThrow(); + ++NumNoUnwind; + return true; +} + +static bool setDoesNotCapture(Function &F, unsigned n) { + if (F.doesNotCapture(n)) + return false; + F.setDoesNotCapture(n); + ++NumNoCapture; + return true; +} + +static bool setOnlyReadsMemory(Function &F, unsigned n) { + if (F.onlyReadsMemory(n)) + return false; + F.setOnlyReadsMemory(n); + ++NumReadOnlyArg; + return true; +} + +static bool setDoesNotAlias(Function &F, unsigned n) { + if (F.doesNotAlias(n)) + return false; + F.setDoesNotAlias(n); + ++NumNoAlias; + return true; +} + +/// Analyze the name and prototype of the given function and set any applicable +/// attributes. +/// +/// Returns true if any attributes were set and false otherwise. +static bool inferPrototypeAttributes(Function &F, + const TargetLibraryInfo &TLI) { + if (F.hasFnAttribute(Attribute::OptimizeNone)) + return false; + + FunctionType *FTy = F.getFunctionType(); + LibFunc::Func TheLibFunc; + if (!(TLI.getLibFunc(F.getName(), TheLibFunc) && TLI.has(TheLibFunc))) + return false; + + bool Changed = false; + + switch (TheLibFunc) { + case LibFunc::strlen: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::strchr: + case LibFunc::strrchr: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isIntegerTy()) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::strtol: + case LibFunc::strtod: + case LibFunc::strtof: + case LibFunc::strtoul: + case LibFunc::strtoll: + case LibFunc::strtold: + case LibFunc::strtoull: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::strcpy: + case LibFunc::stpcpy: + case LibFunc::strcat: + case LibFunc::strncat: + case LibFunc::strncpy: + case LibFunc::stpncpy: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::strxfrm: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::strcmp: // 0,1 + case LibFunc::strspn: // 0,1 + case LibFunc::strncmp: // 0,1 + case LibFunc::strcspn: // 0,1 + case LibFunc::strcoll: // 0,1 + case LibFunc::strcasecmp: // 0,1 + case LibFunc::strncasecmp: // + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::strstr: + case LibFunc::strpbrk: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::strtok: + case LibFunc::strtok_r: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::scanf: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::setbuf: + case LibFunc::setvbuf: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::strdup: + case LibFunc::strndup: + if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::stat: + case LibFunc::statvfs: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::sscanf: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::sprintf: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::snprintf: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 3); + Changed |= setOnlyReadsMemory(F, 3); + return Changed; + case LibFunc::setitimer: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 3); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::system: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "system" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::malloc: + if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::memcmp: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::memchr: + case LibFunc::memrchr: + if (FTy->getNumParams() != 3) + return false; + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::modf: + case LibFunc::modff: + case LibFunc::modfl: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::memcpy: + case LibFunc::memccpy: + case LibFunc::memmove: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::memalign: + if (!FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::mkdir: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::mktime: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::realloc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::read: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "read" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::rewind: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::rmdir: + case LibFunc::remove: + case LibFunc::realpath: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::rename: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::readlink: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::write: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "write" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::bcopy: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::bcmp: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::bzero: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::calloc: + if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::chmod: + case LibFunc::chown: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::ctermid: + case LibFunc::clearerr: + case LibFunc::closedir: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::atoi: + case LibFunc::atol: + case LibFunc::atof: + case LibFunc::atoll: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::access: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::fopen: + if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fdopen: + if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::feof: + case LibFunc::free: + case LibFunc::fseek: + case LibFunc::ftell: + case LibFunc::fgetc: + case LibFunc::fseeko: + case LibFunc::ftello: + case LibFunc::fileno: + case LibFunc::fflush: + case LibFunc::fclose: + case LibFunc::fsetpos: + case LibFunc::flockfile: + case LibFunc::funlockfile: + case LibFunc::ftrylockfile: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::ferror: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F); + return Changed; + case LibFunc::fputc: + case LibFunc::fstat: + case LibFunc::frexp: + case LibFunc::frexpf: + case LibFunc::frexpl: + case LibFunc::fstatvfs: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::fgets: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 3); + return Changed; + case LibFunc::fread: + if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(3)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 4); + return Changed; + case LibFunc::fwrite: + if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(3)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 4); + return Changed; + case LibFunc::fputs: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::fscanf: + case LibFunc::fprintf: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fgetpos: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::getc: + case LibFunc::getlogin_r: + case LibFunc::getc_unlocked: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::getenv: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::gets: + case LibFunc::getchar: + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::getitimer: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::getpwnam: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::ungetc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::uname: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::unlink: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::unsetenv: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::utime: + case LibFunc::utimes: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::putc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::puts: + case LibFunc::printf: + case LibFunc::perror: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::pread: + if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "pread" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::pwrite: + if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "pwrite" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::putchar: + Changed |= setDoesNotThrow(F); + return Changed; + case LibFunc::popen: + if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::pclose: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::vscanf: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::vsscanf: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::vfscanf: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::valloc: + if (!FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::vprintf: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::vfprintf: + case LibFunc::vsprintf: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::vsnprintf: + if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 3); + Changed |= setOnlyReadsMemory(F, 3); + return Changed; + case LibFunc::open: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "open" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::opendir: + if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::tmpfile: + if (!FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::times: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::htonl: + case LibFunc::htons: + case LibFunc::ntohl: + case LibFunc::ntohs: + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAccessMemory(F); + return Changed; + case LibFunc::lstat: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::lchown: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::qsort: + if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) + return false; + // May throw; places call through function pointer. + Changed |= setDoesNotCapture(F, 4); + return Changed; + case LibFunc::dunder_strdup: + case LibFunc::dunder_strndup: + if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::dunder_strtok_r: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::under_IO_getc: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::under_IO_putc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::dunder_isoc99_scanf: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::stat64: + case LibFunc::lstat64: + case LibFunc::statvfs64: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::dunder_isoc99_sscanf: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fopen64: + if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 2); + return Changed; + case LibFunc::fseeko64: + case LibFunc::ftello64: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + return Changed; + case LibFunc::tmpfile64: + if (!FTy->getReturnType()->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotAlias(F, 0); + return Changed; + case LibFunc::fstat64: + case LibFunc::fstatvfs64: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 2); + return Changed; + case LibFunc::open64: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "open" is a valid pthread cancellation point. + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); + return Changed; + case LibFunc::gettimeofday: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + // Currently some platforms have the restrict keyword on the arguments to + // gettimeofday. To be conservative, do not add noalias to gettimeofday's + // arguments. + Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 2); + return Changed; + + default: + // FIXME: It'd be really nice to cover all the library functions we're + // aware of here. + return false; + } +} + +static bool inferAllPrototypeAttributes(Module &M, + const TargetLibraryInfo &TLI) { + bool Changed = false; + + for (Function &F : M.functions()) + // We only infer things using the prototype if the definition isn't around + // to analyze directly. + if (F.isDeclaration()) + Changed |= inferPrototypeAttributes(F, TLI); + + return Changed; +} + +PreservedAnalyses InferFunctionAttrsPass::run(Module &M, + AnalysisManager *AM) { + auto &TLI = AM->getResult(M); + + if (!inferAllPrototypeAttributes(M, TLI)) + // If we didn't infer anything, preserve all analyses. + return PreservedAnalyses::all(); + + // Otherwise, we may have changed fundamental function attributes, so clear + // out all the passes. + return PreservedAnalyses::none(); +} + +namespace { +struct InferFunctionAttrsLegacyPass : public ModulePass { + static char ID; // Pass identification, replacement for typeid + InferFunctionAttrsLegacyPass() : ModulePass(ID) { + initializeInferFunctionAttrsLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } + + bool runOnModule(Module &M) override { + auto &TLI = getAnalysis().getTLI(); + return inferAllPrototypeAttributes(M, TLI); + } +}; +} + +char InferFunctionAttrsLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(InferFunctionAttrsLegacyPass, "inferattrs", + "Infer set function attributes", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(InferFunctionAttrsLegacyPass, "inferattrs", + "Infer set function attributes", false, false) + +Pass *llvm::createInferFunctionAttrsLegacyPass() { + return new InferFunctionAttrsLegacyPass(); +} diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index dc56a02e7b7d..1704bfea0b86 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -14,10 +14,10 @@ #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" @@ -35,17 +35,15 @@ namespace { /// \brief Inliner pass which only handles "always inline" functions. class AlwaysInliner : public Inliner { - InlineCostAnalysis *ICA; public: // Use extremely low threshold. - AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true), - ICA(nullptr) { + AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } AlwaysInliner(bool InsertLifetime) - : Inliner(ID, -2000000000, InsertLifetime), ICA(nullptr) { + : Inliner(ID, -2000000000, InsertLifetime) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } @@ -53,9 +51,6 @@ public: InlineCost getInlineCost(CallSite CS) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnSCC(CallGraphSCC &SCC) override; - using llvm::Pass::doFinalization; bool doFinalization(CallGraph &CG) override { return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true); @@ -67,10 +62,9 @@ public: char AlwaysInliner::ID = 0; INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) @@ -99,19 +93,8 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) { // that are viable for inlining. FIXME: We shouldn't even get here for // declarations. if (Callee && !Callee->isDeclaration() && - CS.hasFnAttr(Attribute::AlwaysInline) && - ICA->isInlineViable(*Callee)) + CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee)) return InlineCost::getAlways(); return InlineCost::getNever(); } - -bool AlwaysInliner::runOnSCC(CallGraphSCC &SCC) { - ICA = &getAnalysis(); - return Inliner::runOnSCC(SCC); -} - -void AlwaysInliner::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - Inliner::getAnalysisUsage(AU); -} diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index 9b01d81b3c7c..45609f891ed8 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" @@ -23,6 +23,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" using namespace llvm; @@ -37,26 +38,30 @@ namespace { /// inliner pass and the always inliner pass. The two passes use different cost /// analyses to determine when to inline. class SimpleInliner : public Inliner { - InlineCostAnalysis *ICA; public: - SimpleInliner() : Inliner(ID), ICA(nullptr) { + SimpleInliner() : Inliner(ID) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } SimpleInliner(int Threshold) - : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(nullptr) { + : Inliner(ID, Threshold, /*InsertLifetime*/ true) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) override { - return ICA->getInlineCost(CS, getInlineThreshold(CS)); + Function *Callee = CS.getCalledFunction(); + TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); + return llvm::getInlineCost(CS, getInlineThreshold(CS), TTI, ACT); } bool runOnSCC(CallGraphSCC &SCC) override; void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + TargetTransformInfoWrapperPass *TTIWP; }; static int computeThresholdFromOptLevels(unsigned OptLevel, @@ -75,10 +80,10 @@ static int computeThresholdFromOptLevels(unsigned OptLevel, char SimpleInliner::ID = 0; INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", "Function Integration/Inlining", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(SimpleInliner, "inline", "Function Integration/Inlining", false, false) @@ -95,11 +100,11 @@ Pass *llvm::createFunctionInliningPass(unsigned OptLevel, } bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) { - ICA = &getAnalysis(); + TTIWP = &getAnalysis(); return Inliner::runOnSCC(SCC); } void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); + AU.addRequired(); Inliner::getAnalysisUsage(AU); } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 5273c3dc3ca2..bbe5f8761d5f 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -64,20 +65,22 @@ ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225), // Threshold to use when optsize is specified (and there is no -inline-limit). const int OptSizeThreshold = 75; -Inliner::Inliner(char &ID) - : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {} +Inliner::Inliner(char &ID) + : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) { +} Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime) - : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? - InlineLimit : Threshold), - InsertLifetime(InsertLifetime) {} + : CallGraphSCCPass(ID), + InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? InlineLimit + : Threshold), + InsertLifetime(InsertLifetime) {} /// For this class, we declare that we require and preserve the call graph. /// If the derived class implements this method, it should /// always explicitly call the implementation here. void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); AU.addRequired(); + AU.addRequired(); CallGraphSCCPass::getAnalysisUsage(AU); } @@ -85,39 +88,6 @@ void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { typedef DenseMap > InlinedArrayAllocasTy; -/// \brief If the inlined function had a higher stack protection level than the -/// calling function, then bump up the caller's stack protection level. -static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) { - // If upgrading the SSP attribute, clear out the old SSP Attributes first. - // Having multiple SSP attributes doesn't actually hurt, but it adds useless - // clutter to the IR. - AttrBuilder B; - B.addAttribute(Attribute::StackProtect) - .addAttribute(Attribute::StackProtectStrong) - .addAttribute(Attribute::StackProtectReq); - AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(), - AttributeSet::FunctionIndex, - B); - - if (Callee->hasFnAttribute(Attribute::SafeStack)) { - Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); - Caller->addFnAttr(Attribute::SafeStack); - } else if (Callee->hasFnAttribute(Attribute::StackProtectReq) && - !Caller->hasFnAttribute(Attribute::SafeStack)) { - Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); - Caller->addFnAttr(Attribute::StackProtectReq); - } else if (Callee->hasFnAttribute(Attribute::StackProtectStrong) && - !Caller->hasFnAttribute(Attribute::SafeStack) && - !Caller->hasFnAttribute(Attribute::StackProtectReq)) { - Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); - Caller->addFnAttr(Attribute::StackProtectStrong); - } else if (Callee->hasFnAttribute(Attribute::StackProtect) && - !Caller->hasFnAttribute(Attribute::SafeStack) && - !Caller->hasFnAttribute(Attribute::StackProtectReq) && - !Caller->hasFnAttribute(Attribute::StackProtectStrong)) - Caller->addFnAttr(Attribute::StackProtect); -} - /// If it is possible to inline the specified call site, /// do so and update the CallGraph for this operation. /// @@ -126,18 +96,26 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) { /// available from other functions inlined into the caller. If we are able to /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. -static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, +static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); + // We need to manually construct BasicAA directly in order to disable + // its use of other function analyses. + BasicAAResult BAR(createLegacyPMBasicAAResult(P, *Callee)); + + // Construct our own AA results for this function. We do this manually to + // work around the limitations of the legacy pass manager. + AAResults AAR(createLegacyPMAAResults(P, *Callee, BAR)); + // Try to inline the function. Get the list of static allocas that were // inlined. - if (!InlineFunction(CS, IFI, InsertLifetime)) + if (!InlineFunction(CS, IFI, &AAR, InsertLifetime)) return false; - AdjustCallerSSPLevel(Caller, Callee); + AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee); // Look at all of the allocas that we inlined through this call site. If we // have already inlined other allocas through other calls into this function, @@ -219,6 +197,14 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: " << *AvailableAlloca << '\n'); + // Move affected dbg.declare calls immediately after the new alloca to + // avoid the situation when a dbg.declare preceeds its alloca. + if (auto *L = LocalAsMetadata::getIfExists(AI)) + if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L)) + for (User *U : MDV->users()) + if (DbgDeclareInst *DDI = dyn_cast(U)) + DDI->moveBefore(AvailableAlloca->getNextNode()); + AI->replaceAllUsesWith(AvailableAlloca); if (Align1 != Align2) { @@ -258,39 +244,64 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, } unsigned Inliner::getInlineThreshold(CallSite CS) const { - int thres = InlineThreshold; // -inline-threshold or else selected by - // overall opt level + int Threshold = InlineThreshold; // -inline-threshold or else selected by + // overall opt level // If -inline-threshold is not given, listen to the optsize attribute when it // would decrease the threshold. Function *Caller = CS.getCaller(); bool OptSize = Caller && !Caller->isDeclaration() && + // FIXME: Use Function::optForSize(). Caller->hasFnAttribute(Attribute::OptimizeForSize); if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && - OptSizeThreshold < thres) - thres = OptSizeThreshold; + OptSizeThreshold < Threshold) + Threshold = OptSizeThreshold; - // Listen to the inlinehint attribute when it would increase the threshold - // and the caller does not need to minimize its size. Function *Callee = CS.getCalledFunction(); - bool InlineHint = Callee && !Callee->isDeclaration() && - Callee->hasFnAttribute(Attribute::InlineHint); - if (InlineHint && HintThreshold > thres && - !Caller->hasFnAttribute(Attribute::MinSize)) - thres = HintThreshold; + if (!Callee || Callee->isDeclaration()) + return Threshold; - // Listen to the cold attribute when it would decrease the threshold. - bool ColdCallee = Callee && !Callee->isDeclaration() && - Callee->hasFnAttribute(Attribute::Cold); + // If profile information is available, use that to adjust threshold of hot + // and cold functions. + // FIXME: The heuristic used below for determining hotness and coldness are + // based on preliminary SPEC tuning and may not be optimal. Replace this with + // a well-tuned heuristic based on *callsite* hotness and not callee hotness. + uint64_t FunctionCount = 0, MaxFunctionCount = 0; + bool HasPGOCounts = false; + if (Callee->getEntryCount() && + Callee->getParent()->getMaximumFunctionCount()) { + HasPGOCounts = true; + FunctionCount = Callee->getEntryCount().getValue(); + MaxFunctionCount = + Callee->getParent()->getMaximumFunctionCount().getValue(); + } + + // Listen to the inlinehint attribute or profile based hotness information + // when it would increase the threshold and the caller does not need to + // minimize its size. + bool InlineHint = + Callee->hasFnAttribute(Attribute::InlineHint) || + (HasPGOCounts && + FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount)); + if (InlineHint && HintThreshold > Threshold && + !Caller->hasFnAttribute(Attribute::MinSize)) + Threshold = HintThreshold; + + // Listen to the cold attribute or profile based coldness information + // when it would decrease the threshold. + bool ColdCallee = + Callee->hasFnAttribute(Attribute::Cold) || + (HasPGOCounts && + FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount)); // Command line argument for InlineLimit will override the default // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold, // do not use the default cold threshold even if it is smaller. if ((InlineLimit.getNumOccurrences() == 0 || ColdThreshold.getNumOccurrences() > 0) && ColdCallee && - ColdThreshold < thres) - thres = ColdThreshold; + ColdThreshold < Threshold) + Threshold = ColdThreshold; - return thres; + return Threshold; } static void emitAnalysis(CallSite CS, const Twine &Msg) { @@ -430,10 +441,8 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis().getCallGraph(); - AssumptionCacheTracker *ACT = &getAnalysis(); - auto *TLIP = getAnalysisIfAvailable(); - const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr; - AliasAnalysis *AA = &getAnalysis(); + ACT = &getAnalysis(); + auto &TLI = getAnalysis().getTLI(); SmallPtrSet SCCFunctions; DEBUG(dbgs() << "Inliner visiting SCC:"); @@ -469,8 +478,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // If this is a direct call to an external function, we can never inline // it. If it is an indirect call, inlining may resolve it to be a // direct call, so we keep it. - if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration()) - continue; + if (Function *Callee = CS.getCalledFunction()) + if (Callee->isDeclaration()) + continue; CallSites.push_back(std::make_pair(CS, -1)); } @@ -492,7 +502,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG, AA, ACT); + InlineFunctionInfo InlineInfo(&CG, ACT); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -513,7 +523,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // just delete the call instead of trying to inline it, regardless of // size. This happens because IPSCCP propagates the result out of the // call and then we're left with the dead call. - if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) { + if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) { DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction() << "\n"); // Update the call graph by deleting the edge from Callee to Caller. @@ -550,7 +560,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { } // Attempt to inline the function. - if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, + if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID, InsertLifetime)) { emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, Twine(Callee->getName() + @@ -647,8 +657,8 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { // Scan for all of the functions, looking for ones that should now be removed // from the program. Insert the dead ones in the FunctionsToRemove set. - for (auto I : CG) { - CallGraphNode *CGN = I.second; + for (const auto &I : CG) { + CallGraphNode *CGN = I.second.get(); Function *F = CGN->getFunction(); if (!F || F->isDeclaration()) continue; diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index 7950163f757d..21bb5d000bc7 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -60,6 +60,10 @@ namespace { explicit InternalizePass(); explicit InternalizePass(ArrayRef ExportList); void LoadFile(const char *Filename); + bool maybeInternalize(GlobalValue &GV, + const std::set &ExternalComdats); + void checkComdatVisibility(GlobalValue &GV, + std::set &ExternalComdats); bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -105,40 +109,85 @@ void InternalizePass::LoadFile(const char *Filename) { } } -static bool shouldInternalize(const GlobalValue &GV, - const std::set &ExternalNames) { +static bool isExternallyVisible(const GlobalValue &GV, + const std::set &ExternalNames) { // Function must be defined here if (GV.isDeclaration()) - return false; + return true; // Available externally is really just a "declaration with a body". if (GV.hasAvailableExternallyLinkage()) - return false; + return true; // Assume that dllexported symbols are referenced elsewhere if (GV.hasDLLExportStorageClass()) - return false; - - // Already has internal linkage - if (GV.hasLocalLinkage()) - return false; + return true; // Marked to keep external? - if (ExternalNames.count(GV.getName())) - return false; + if (!GV.hasLocalLinkage() && ExternalNames.count(GV.getName())) + return true; + return false; +} + +// Internalize GV if it is possible to do so, i.e. it is not externally visible +// and is not a member of an externally visible comdat. +bool InternalizePass::maybeInternalize( + GlobalValue &GV, const std::set &ExternalComdats) { + if (Comdat *C = GV.getComdat()) { + if (ExternalComdats.count(C)) + return false; + + // If a comdat is not externally visible we can drop it. + if (auto GO = dyn_cast(&GV)) + GO->setComdat(nullptr); + + if (GV.hasLocalLinkage()) + return false; + } else { + if (GV.hasLocalLinkage()) + return false; + + if (isExternallyVisible(GV, ExternalNames)) + return false; + } + + GV.setVisibility(GlobalValue::DefaultVisibility); + GV.setLinkage(GlobalValue::InternalLinkage); return true; } +// If GV is part of a comdat and is externally visible, keep track of its +// comdat so that we don't internalize any of its members. +void InternalizePass::checkComdatVisibility( + GlobalValue &GV, std::set &ExternalComdats) { + Comdat *C = GV.getComdat(); + if (!C) + return; + + if (isExternallyVisible(GV, ExternalNames)) + ExternalComdats.insert(C); +} + bool InternalizePass::runOnModule(Module &M) { CallGraphWrapperPass *CGPass = getAnalysisIfAvailable(); CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr; CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr; - bool Changed = false; SmallPtrSet Used; collectUsedGlobalVariables(M, Used, false); + // Collect comdat visiblity information for the module. + std::set ExternalComdats; + if (!M.getComdatSymbolTable().empty()) { + for (Function &F : M) + checkComdatVisibility(F, ExternalComdats); + for (GlobalVariable &GV : M.globals()) + checkComdatVisibility(GV, ExternalComdats); + for (GlobalAlias &GA : M.aliases()) + checkComdatVisibility(GA, ExternalComdats); + } + // We must assume that globals in llvm.used have a reference that not even // the linker can see, so we don't internalize them. // For llvm.compiler.used the situation is a bit fuzzy. The assembler and @@ -153,20 +202,16 @@ bool InternalizePass::runOnModule(Module &M) { } // Mark all functions not in the api as internal. - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - if (!shouldInternalize(*I, ExternalNames)) + for (Function &I : M) { + if (!maybeInternalize(I, ExternalComdats)) continue; - I->setVisibility(GlobalValue::DefaultVisibility); - I->setLinkage(GlobalValue::InternalLinkage); - if (ExternalNode) // Remove a callgraph edge from the external node to this function. - ExternalNode->removeOneAbstractEdgeTo((*CG)[I]); + ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]); - Changed = true; ++NumFunctions; - DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n"); + DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n"); } // Never internalize the llvm.used symbol. It is used to implement @@ -191,12 +236,9 @@ bool InternalizePass::runOnModule(Module &M) { // internal as well. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { - if (!shouldInternalize(*I, ExternalNames)) + if (!maybeInternalize(*I, ExternalComdats)) continue; - I->setVisibility(GlobalValue::DefaultVisibility); - I->setLinkage(GlobalValue::InternalLinkage); - Changed = true; ++NumGlobals; DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n"); } @@ -204,17 +246,20 @@ bool InternalizePass::runOnModule(Module &M) { // Mark all aliases that are not in the api as internal as well. for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) { - if (!shouldInternalize(*I, ExternalNames)) + if (!maybeInternalize(*I, ExternalComdats)) continue; - I->setVisibility(GlobalValue::DefaultVisibility); - I->setLinkage(GlobalValue::InternalLinkage); - Changed = true; ++NumAliases; DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n"); } - return Changed; + // We do not keep track of whether this pass changed the module because + // it adds unnecessary complexity: + // 1) This pass will generally be near the start of the pass pipeline, so + // there will be no analyses to invalidate. + // 2) This pass will most likely end up changing the module and it isn't worth + // worrying about optimizing the case where the module is unchanged. + return true; } ModulePass *llvm::createInternalizePass() { return new InternalizePass(); } diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt index 575dce4b33df..b5410f5f7757 100644 --- a/lib/Transforms/IPO/LLVMBuild.txt +++ b/lib/Transforms/IPO/LLVMBuild.txt @@ -20,4 +20,4 @@ type = Library name = IPO parent = Transforms library_name = ipo -required_libraries = Analysis Core IPA InstCombine Scalar Support TransformUtils Vectorize +required_libraries = Analysis Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index 41334ca5b429..8e4ad642ddd5 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -43,12 +43,13 @@ namespace { initializeLoopExtractorPass(*PassRegistry::getPassRegistry()); } - bool runOnLoop(Loop *L, LPPassManager &LPM) override; + bool runOnLoop(Loop *L, LPPassManager &) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequiredID(BreakCriticalEdgesID); AU.addRequiredID(LoopSimplifyID); AU.addRequired(); + AU.addRequired(); } }; } @@ -79,7 +80,7 @@ INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single", // Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); } -bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { +bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) { if (skipOptnoneFunction(L)) return false; @@ -92,6 +93,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { return false; DominatorTree &DT = getAnalysis().getDomTree(); + LoopInfo &LI = getAnalysis().getLoopInfo(); bool Changed = false; // If there is more than one top-level loop in this function, extract all of @@ -120,14 +122,14 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { } if (ShouldExtractLoop) { - // We must omit landing pads. Landing pads must accompany the invoke + // We must omit EH pads. EH pads must accompany the invoke // instruction. But this would result in a loop in the extracted // function. An infinite cycle occurs when it tries to extract that loop as // well. SmallVector ExitBlocks; L->getExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - if (ExitBlocks[i]->isLandingPad()) { + if (ExitBlocks[i]->isEHPad()) { ShouldExtractLoop = false; break; } @@ -141,7 +143,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { Changed = true; // After extraction, the loop is replaced by a function call, so // we shouldn't try to run any more loop passes on it. - LPM.deleteLoopFromQueue(L); + LI.updateUnloop(L); } ++NumExtracted; } @@ -259,7 +261,7 @@ bool BlockExtractorPass::runOnModule(Module &M) { // Figure out which index the basic block is in its function. Function::iterator BBI = MF->begin(); std::advance(BBI, std::distance(F->begin(), Function::iterator(BB))); - TranslatedBlocksToNotExtract.insert(BBI); + TranslatedBlocksToNotExtract.insert(&*BBI); } while (!BlocksToNotExtractByName.empty()) { @@ -278,7 +280,7 @@ bool BlockExtractorPass::runOnModule(Module &M) { BasicBlock &BB = *BI; if (BB.getName() != BlockName) continue; - TranslatedBlocksToNotExtract.insert(BI); + TranslatedBlocksToNotExtract.insert(&*BI); } } @@ -291,8 +293,8 @@ bool BlockExtractorPass::runOnModule(Module &M) { for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { SplitLandingPadPreds(&*F); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - if (!TranslatedBlocksToNotExtract.count(BB)) - BlocksToExtract.push_back(BB); + if (!TranslatedBlocksToNotExtract.count(&*BB)) + BlocksToExtract.push_back(&*BB); } for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) { diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerBitSets.cpp index c6795c623eff..7b515745c312 100644 --- a/lib/Transforms/IPO/LowerBitSets.cpp +++ b/lib/Transforms/IPO/LowerBitSets.cpp @@ -19,6 +19,8 @@ #include "llvm/ADT/Triple.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -26,6 +28,8 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; @@ -59,9 +63,9 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { bool BitSetInfo::containsValue( const DataLayout &DL, - const DenseMap &GlobalLayout, Value *V, + const DenseMap &GlobalLayout, Value *V, uint64_t COffset) const { - if (auto GV = dyn_cast(V)) { + if (auto GV = dyn_cast(V)) { auto I = GlobalLayout.find(GV); if (I == GlobalLayout.end()) return false; @@ -90,6 +94,21 @@ bool BitSetInfo::containsValue( return false; } +void BitSetInfo::print(raw_ostream &OS) const { + OS << "offset " << ByteOffset << " size " << BitSize << " align " + << (1 << AlignLog2); + + if (isAllOnes()) { + OS << " all-ones\n"; + return; + } + + OS << " { "; + for (uint64_t B : Bits) + OS << B << ' '; + OS << "}\n"; +} + BitSetInfo BitSetBuilder::build() { if (Min > Max) Min = 0; @@ -193,34 +212,48 @@ struct LowerBitSets : public ModulePass { Module *M; bool LinkerSubsectionsViaSymbols; + Triple::ArchType Arch; + Triple::ObjectFormatType ObjectFormat; IntegerType *Int1Ty; IntegerType *Int8Ty; IntegerType *Int32Ty; Type *Int32PtrTy; IntegerType *Int64Ty; - Type *IntPtrTy; + IntegerType *IntPtrTy; // The llvm.bitsets named metadata. NamedMDNode *BitSetNM; - // Mapping from bitset mdstrings to the call sites that test them. - DenseMap> BitSetTestCallSites; + // Mapping from bitset identifiers to the call sites that test them. + DenseMap> BitSetTestCallSites; std::vector ByteArrayInfos; BitSetInfo - buildBitSet(MDString *BitSet, - const DenseMap &GlobalLayout); + buildBitSet(Metadata *BitSet, + const DenseMap &GlobalLayout); ByteArrayInfo *createByteArray(BitSetInfo &BSI); void allocateByteArrays(); Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI, Value *BitOffset); + void lowerBitSetCalls(ArrayRef BitSets, + Constant *CombinedGlobalAddr, + const DenseMap &GlobalLayout); Value * lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, - GlobalVariable *CombinedGlobal, - const DenseMap &GlobalLayout); - void buildBitSetsFromGlobals(const std::vector &BitSets, - const std::vector &Globals); + Constant *CombinedGlobal, + const DenseMap &GlobalLayout); + void buildBitSetsFromGlobalVariables(ArrayRef BitSets, + ArrayRef Globals); + unsigned getJumpTableEntrySize(); + Type *getJumpTableEntryType(); + Constant *createJumpTableEntry(GlobalObject *Src, Function *Dest, + unsigned Distance); + void verifyBitSetMDNode(MDNode *Op); + void buildBitSetsFromFunctions(ArrayRef BitSets, + ArrayRef Functions); + void buildBitSetsFromDisjointSet(ArrayRef BitSets, + ArrayRef Globals); bool buildBitSets(); bool eraseBitSetMetadata(); @@ -228,7 +261,7 @@ struct LowerBitSets : public ModulePass { bool runOnModule(Module &M) override; }; -} // namespace +} // anonymous namespace INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets", "Lower bitset metadata", false, false) @@ -244,6 +277,8 @@ bool LowerBitSets::doInitialization(Module &Mod) { Triple TargetTriple(M->getTargetTriple()); LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX(); + Arch = TargetTriple.getArch(); + ObjectFormat = TargetTriple.getObjectFormat(); Int1Ty = Type::getInt1Ty(M->getContext()); Int8Ty = Type::getInt8Ty(M->getContext()); @@ -262,8 +297,8 @@ bool LowerBitSets::doInitialization(Module &Mod) { /// Build a bit set for BitSet using the object layouts in /// GlobalLayout. BitSetInfo LowerBitSets::buildBitSet( - MDString *BitSet, - const DenseMap &GlobalLayout) { + Metadata *BitSet, + const DenseMap &GlobalLayout) { BitSetBuilder BSB; // Compute the byte offset of each element of this bitset. @@ -271,8 +306,11 @@ BitSetInfo LowerBitSets::buildBitSet( for (MDNode *Op : BitSetNM->operands()) { if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) continue; - auto OpGlobal = dyn_cast( - cast(Op->getOperand(1))->getValue()); + Constant *OpConst = + cast(Op->getOperand(1))->getValue(); + if (auto GA = dyn_cast(OpConst)) + OpConst = GA->getAliasee(); + auto OpGlobal = dyn_cast(OpConst); if (!OpGlobal) continue; uint64_t Offset = @@ -360,9 +398,8 @@ void LowerBitSets::allocateByteArrays() { if (LinkerSubsectionsViaSymbols) { BAI->ByteArray->replaceAllUsesWith(GEP); } else { - GlobalAlias *Alias = - GlobalAlias::create(PointerType::getUnqual(Int8Ty), - GlobalValue::PrivateLinkage, "bits", GEP, M); + GlobalAlias *Alias = GlobalAlias::create( + Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M); BAI->ByteArray->replaceAllUsesWith(Alias); } BAI->ByteArray->eraseFromParent(); @@ -404,7 +441,7 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, // Each use of the byte array uses a different alias. This makes the // backend less likely to reuse previously computed byte array addresses, // improving the security of the CFI mechanism based on this pass. - ByteArray = GlobalAlias::create(BAI->ByteArray->getType(), + ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0, GlobalValue::PrivateLinkage, "bits_use", ByteArray, M); } @@ -421,17 +458,16 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, /// replace the call with. Value *LowerBitSets::lowerBitSetCall( CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, - GlobalVariable *CombinedGlobal, - const DenseMap &GlobalLayout) { + Constant *CombinedGlobalIntAddr, + const DenseMap &GlobalLayout) { Value *Ptr = CI->getArgOperand(0); const DataLayout &DL = M->getDataLayout(); if (BSI.containsValue(DL, GlobalLayout, Ptr)) - return ConstantInt::getTrue(CombinedGlobal->getParent()->getContext()); + return ConstantInt::getTrue(M->getContext()); - Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy); Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd( - GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset)); + CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)); BasicBlock *InitialBB = CI->getParent(); @@ -490,18 +526,19 @@ Value *LowerBitSets::lowerBitSetCall( /// Given a disjoint set of bitsets and globals, layout the globals, build the /// bit sets and lower the llvm.bitset.test calls. -void LowerBitSets::buildBitSetsFromGlobals( - const std::vector &BitSets, - const std::vector &Globals) { +void LowerBitSets::buildBitSetsFromGlobalVariables( + ArrayRef BitSets, ArrayRef Globals) { // Build a new global with the combined contents of the referenced globals. + // This global is a struct whose even-indexed elements contain the original + // contents of the referenced globals and whose odd-indexed elements contain + // any padding required to align the next element to the next power of 2. std::vector GlobalInits; const DataLayout &DL = M->getDataLayout(); for (GlobalVariable *G : Globals) { GlobalInits.push_back(G->getInitializer()); - uint64_t InitSize = DL.getTypeAllocSize(G->getInitializer()->getType()); + uint64_t InitSize = DL.getTypeAllocSize(G->getValueType()); - // Compute the amount of padding required to align the next element to the - // next power of 2. + // Compute the amount of padding required. uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize; // Cap at 128 was found experimentally to have a good data/instruction @@ -515,34 +552,20 @@ void LowerBitSets::buildBitSetsFromGlobals( if (!GlobalInits.empty()) GlobalInits.pop_back(); Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits); - auto CombinedGlobal = + auto *CombinedGlobal = new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true, GlobalValue::PrivateLinkage, NewInit); - const StructLayout *CombinedGlobalLayout = - DL.getStructLayout(cast(NewInit->getType())); + StructType *NewTy = cast(NewInit->getType()); + const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy); // Compute the offsets of the original globals within the new global. - DenseMap GlobalLayout; + DenseMap GlobalLayout; for (unsigned I = 0; I != Globals.size(); ++I) // Multiply by 2 to account for padding elements. GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2); - // For each bitset in this disjoint set... - for (MDString *BS : BitSets) { - // Build the bitset. - BitSetInfo BSI = buildBitSet(BS, GlobalLayout); - - ByteArrayInfo *BAI = 0; - - // Lower each call to llvm.bitset.test for this bitset. - for (CallInst *CI : BitSetTestCallSites[BS]) { - ++NumBitSetCallsLowered; - Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobal, GlobalLayout); - CI->replaceAllUsesWith(Lowered); - CI->eraseFromParent(); - } - } + lowerBitSetCalls(BitSets, CombinedGlobal, GlobalLayout); // Build aliases pointing to offsets into the combined global for each // global from which we built the combined global, and replace references @@ -556,9 +579,11 @@ void LowerBitSets::buildBitSetsFromGlobals( if (LinkerSubsectionsViaSymbols) { Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr); } else { - GlobalAlias *GAlias = - GlobalAlias::create(Globals[I]->getType(), Globals[I]->getLinkage(), - "", CombinedGlobalElemPtr, M); + assert(Globals[I]->getType()->getAddressSpace() == 0); + GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0, + Globals[I]->getLinkage(), "", + CombinedGlobalElemPtr, M); + GAlias->setVisibility(Globals[I]->getVisibility()); GAlias->takeName(Globals[I]); Globals[I]->replaceAllUsesWith(GAlias); } @@ -566,6 +591,331 @@ void LowerBitSets::buildBitSetsFromGlobals( } } +void LowerBitSets::lowerBitSetCalls( + ArrayRef BitSets, Constant *CombinedGlobalAddr, + const DenseMap &GlobalLayout) { + Constant *CombinedGlobalIntAddr = + ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy); + + // For each bitset in this disjoint set... + for (Metadata *BS : BitSets) { + // Build the bitset. + BitSetInfo BSI = buildBitSet(BS, GlobalLayout); + DEBUG({ + if (auto BSS = dyn_cast(BS)) + dbgs() << BSS->getString() << ": "; + else + dbgs() << ": "; + BSI.print(dbgs()); + }); + + ByteArrayInfo *BAI = nullptr; + + // Lower each call to llvm.bitset.test for this bitset. + for (CallInst *CI : BitSetTestCallSites[BS]) { + ++NumBitSetCallsLowered; + Value *Lowered = + lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalLayout); + CI->replaceAllUsesWith(Lowered); + CI->eraseFromParent(); + } + } +} + +void LowerBitSets::verifyBitSetMDNode(MDNode *Op) { + if (Op->getNumOperands() != 3) + report_fatal_error( + "All operands of llvm.bitsets metadata must have 3 elements"); + if (!Op->getOperand(1)) + return; + + auto OpConstMD = dyn_cast(Op->getOperand(1)); + if (!OpConstMD) + report_fatal_error("Bit set element must be a constant"); + auto OpGlobal = dyn_cast(OpConstMD->getValue()); + if (!OpGlobal) + return; + + if (OpGlobal->isThreadLocal()) + report_fatal_error("Bit set element may not be thread-local"); + if (OpGlobal->hasSection()) + report_fatal_error("Bit set element may not have an explicit section"); + + if (isa(OpGlobal) && OpGlobal->isDeclarationForLinker()) + report_fatal_error("Bit set global var element must be a definition"); + + auto OffsetConstMD = dyn_cast(Op->getOperand(2)); + if (!OffsetConstMD) + report_fatal_error("Bit set element offset must be a constant"); + auto OffsetInt = dyn_cast(OffsetConstMD->getValue()); + if (!OffsetInt) + report_fatal_error("Bit set element offset must be an integer constant"); +} + +static const unsigned kX86JumpTableEntrySize = 8; + +unsigned LowerBitSets::getJumpTableEntrySize() { + if (Arch != Triple::x86 && Arch != Triple::x86_64) + report_fatal_error("Unsupported architecture for jump tables"); + + return kX86JumpTableEntrySize; +} + +// Create a constant representing a jump table entry for the target. This +// consists of an instruction sequence containing a relative branch to Dest. The +// constant will be laid out at address Src+(Len*Distance) where Len is the +// target-specific jump table entry size. +Constant *LowerBitSets::createJumpTableEntry(GlobalObject *Src, Function *Dest, + unsigned Distance) { + if (Arch != Triple::x86 && Arch != Triple::x86_64) + report_fatal_error("Unsupported architecture for jump tables"); + + const unsigned kJmpPCRel32Code = 0xe9; + const unsigned kInt3Code = 0xcc; + + ConstantInt *Jmp = ConstantInt::get(Int8Ty, kJmpPCRel32Code); + + // Build a constant representing the displacement between the constant's + // address and Dest. This will resolve to a PC32 relocation referring to Dest. + Constant *DestInt = ConstantExpr::getPtrToInt(Dest, IntPtrTy); + Constant *SrcInt = ConstantExpr::getPtrToInt(Src, IntPtrTy); + Constant *Disp = ConstantExpr::getSub(DestInt, SrcInt); + ConstantInt *DispOffset = + ConstantInt::get(IntPtrTy, Distance * kX86JumpTableEntrySize + 5); + Constant *OffsetedDisp = ConstantExpr::getSub(Disp, DispOffset); + OffsetedDisp = ConstantExpr::getTruncOrBitCast(OffsetedDisp, Int32Ty); + + ConstantInt *Int3 = ConstantInt::get(Int8Ty, kInt3Code); + + Constant *Fields[] = { + Jmp, OffsetedDisp, Int3, Int3, Int3, + }; + return ConstantStruct::getAnon(Fields, /*Packed=*/true); +} + +Type *LowerBitSets::getJumpTableEntryType() { + if (Arch != Triple::x86 && Arch != Triple::x86_64) + report_fatal_error("Unsupported architecture for jump tables"); + + return StructType::get(M->getContext(), + {Int8Ty, Int32Ty, Int8Ty, Int8Ty, Int8Ty}, + /*Packed=*/true); +} + +/// Given a disjoint set of bitsets and functions, build a jump table for the +/// functions, build the bit sets and lower the llvm.bitset.test calls. +void LowerBitSets::buildBitSetsFromFunctions(ArrayRef BitSets, + ArrayRef Functions) { + // Unlike the global bitset builder, the function bitset builder cannot + // re-arrange functions in a particular order and base its calculations on the + // layout of the functions' entry points, as we have no idea how large a + // particular function will end up being (the size could even depend on what + // this pass does!) Instead, we build a jump table, which is a block of code + // consisting of one branch instruction for each of the functions in the bit + // set that branches to the target function, and redirect any taken function + // addresses to the corresponding jump table entry. In the object file's + // symbol table, the symbols for the target functions also refer to the jump + // table entries, so that addresses taken outside the module will pass any + // verification done inside the module. + // + // In more concrete terms, suppose we have three functions f, g, h which are + // members of a single bitset, and a function foo that returns their + // addresses: + // + // f: + // mov 0, %eax + // ret + // + // g: + // mov 1, %eax + // ret + // + // h: + // mov 2, %eax + // ret + // + // foo: + // mov f, %eax + // mov g, %edx + // mov h, %ecx + // ret + // + // To create a jump table for these functions, we instruct the LLVM code + // generator to output a jump table in the .text section. This is done by + // representing the instructions in the jump table as an LLVM constant and + // placing them in a global variable in the .text section. The end result will + // (conceptually) look like this: + // + // f: + // jmp .Ltmp0 ; 5 bytes + // int3 ; 1 byte + // int3 ; 1 byte + // int3 ; 1 byte + // + // g: + // jmp .Ltmp1 ; 5 bytes + // int3 ; 1 byte + // int3 ; 1 byte + // int3 ; 1 byte + // + // h: + // jmp .Ltmp2 ; 5 bytes + // int3 ; 1 byte + // int3 ; 1 byte + // int3 ; 1 byte + // + // .Ltmp0: + // mov 0, %eax + // ret + // + // .Ltmp1: + // mov 1, %eax + // ret + // + // .Ltmp2: + // mov 2, %eax + // ret + // + // foo: + // mov f, %eax + // mov g, %edx + // mov h, %ecx + // ret + // + // Because the addresses of f, g, h are evenly spaced at a power of 2, in the + // normal case the check can be carried out using the same kind of simple + // arithmetic that we normally use for globals. + + assert(!Functions.empty()); + + // Build a simple layout based on the regular layout of jump tables. + DenseMap GlobalLayout; + unsigned EntrySize = getJumpTableEntrySize(); + for (unsigned I = 0; I != Functions.size(); ++I) + GlobalLayout[Functions[I]] = I * EntrySize; + + // Create a constant to hold the jump table. + ArrayType *JumpTableType = + ArrayType::get(getJumpTableEntryType(), Functions.size()); + auto JumpTable = new GlobalVariable(*M, JumpTableType, + /*isConstant=*/true, + GlobalValue::PrivateLinkage, nullptr); + JumpTable->setSection(ObjectFormat == Triple::MachO + ? "__TEXT,__text,regular,pure_instructions" + : ".text"); + lowerBitSetCalls(BitSets, JumpTable, GlobalLayout); + + // Build aliases pointing to offsets into the jump table, and replace + // references to the original functions with references to the aliases. + for (unsigned I = 0; I != Functions.size(); ++I) { + Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( + ConstantExpr::getGetElementPtr( + JumpTableType, JumpTable, + ArrayRef{ConstantInt::get(IntPtrTy, 0), + ConstantInt::get(IntPtrTy, I)}), + Functions[I]->getType()); + if (LinkerSubsectionsViaSymbols || Functions[I]->isDeclarationForLinker()) { + Functions[I]->replaceAllUsesWith(CombinedGlobalElemPtr); + } else { + assert(Functions[I]->getType()->getAddressSpace() == 0); + GlobalAlias *GAlias = GlobalAlias::create(Functions[I]->getValueType(), 0, + Functions[I]->getLinkage(), "", + CombinedGlobalElemPtr, M); + GAlias->setVisibility(Functions[I]->getVisibility()); + GAlias->takeName(Functions[I]); + Functions[I]->replaceAllUsesWith(GAlias); + } + if (!Functions[I]->isDeclarationForLinker()) + Functions[I]->setLinkage(GlobalValue::PrivateLinkage); + } + + // Build and set the jump table's initializer. + std::vector JumpTableEntries; + for (unsigned I = 0; I != Functions.size(); ++I) + JumpTableEntries.push_back( + createJumpTableEntry(JumpTable, Functions[I], I)); + JumpTable->setInitializer( + ConstantArray::get(JumpTableType, JumpTableEntries)); +} + +void LowerBitSets::buildBitSetsFromDisjointSet( + ArrayRef BitSets, ArrayRef Globals) { + llvm::DenseMap BitSetIndices; + llvm::DenseMap GlobalIndices; + for (unsigned I = 0; I != BitSets.size(); ++I) + BitSetIndices[BitSets[I]] = I; + for (unsigned I = 0; I != Globals.size(); ++I) + GlobalIndices[Globals[I]] = I; + + // For each bitset, build a set of indices that refer to globals referenced by + // the bitset. + std::vector> BitSetMembers(BitSets.size()); + if (BitSetNM) { + for (MDNode *Op : BitSetNM->operands()) { + // Op = { bitset name, global, offset } + if (!Op->getOperand(1)) + continue; + auto I = BitSetIndices.find(Op->getOperand(0)); + if (I == BitSetIndices.end()) + continue; + + auto OpGlobal = dyn_cast( + cast(Op->getOperand(1))->getValue()); + if (!OpGlobal) + continue; + BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]); + } + } + + // Order the sets of indices by size. The GlobalLayoutBuilder works best + // when given small index sets first. + std::stable_sort( + BitSetMembers.begin(), BitSetMembers.end(), + [](const std::set &O1, const std::set &O2) { + return O1.size() < O2.size(); + }); + + // Create a GlobalLayoutBuilder and provide it with index sets as layout + // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as + // close together as possible. + GlobalLayoutBuilder GLB(Globals.size()); + for (auto &&MemSet : BitSetMembers) + GLB.addFragment(MemSet); + + // Build the bitsets from this disjoint set. + if (Globals.empty() || isa(Globals[0])) { + // Build a vector of global variables with the computed layout. + std::vector OrderedGVs(Globals.size()); + auto OGI = OrderedGVs.begin(); + for (auto &&F : GLB.Fragments) { + for (auto &&Offset : F) { + auto GV = dyn_cast(Globals[Offset]); + if (!GV) + report_fatal_error( + "Bit set may not contain both global variables and functions"); + *OGI++ = GV; + } + } + + buildBitSetsFromGlobalVariables(BitSets, OrderedGVs); + } else { + // Build a vector of functions with the computed layout. + std::vector OrderedFns(Globals.size()); + auto OFI = OrderedFns.begin(); + for (auto &&F : GLB.Fragments) { + for (auto &&Offset : F) { + auto Fn = dyn_cast(Globals[Offset]); + if (!Fn) + report_fatal_error( + "Bit set may not contain both global variables and functions"); + *OFI++ = Fn; + } + } + + buildBitSetsFromFunctions(BitSets, OrderedFns); + } +} + /// Lower all bit sets in this module. bool LowerBitSets::buildBitSets() { Function *BitSetTestFunc = @@ -576,24 +926,36 @@ bool LowerBitSets::buildBitSets() { // Equivalence class set containing bitsets and the globals they reference. // This is used to partition the set of bitsets in the module into disjoint // sets. - typedef EquivalenceClasses> + typedef EquivalenceClasses> GlobalClassesTy; GlobalClassesTy GlobalClasses; + // Verify the bitset metadata and build a mapping from bitset identifiers to + // their last observed index in BitSetNM. This will used later to + // deterministically order the list of bitset identifiers. + llvm::DenseMap BitSetIdIndices; + if (BitSetNM) { + for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) { + MDNode *Op = BitSetNM->getOperand(I); + verifyBitSetMDNode(Op); + BitSetIdIndices[Op->getOperand(0)] = I; + } + } + for (const Use &U : BitSetTestFunc->uses()) { auto CI = cast(U.getUser()); auto BitSetMDVal = dyn_cast(CI->getArgOperand(1)); - if (!BitSetMDVal || !isa(BitSetMDVal->getMetadata())) + if (!BitSetMDVal) report_fatal_error( - "Second argument of llvm.bitset.test must be metadata string"); - auto BitSet = cast(BitSetMDVal->getMetadata()); + "Second argument of llvm.bitset.test must be metadata"); + auto BitSet = BitSetMDVal->getMetadata(); // Add the call site to the list of call sites for this bit set. We also use // BitSetTestCallSites to keep track of whether we have seen this bit set // before. If we have, we don't need to re-add the referenced globals to the // equivalence class. - std::pair>::iterator, + std::pair>::iterator, bool> Ins = BitSetTestCallSites.insert( std::make_pair(BitSet, std::vector())); @@ -608,31 +970,16 @@ bool LowerBitSets::buildBitSets() { if (!BitSetNM) continue; - // Verify the bitset metadata and add the referenced globals to the bitset's - // equivalence class. + // Add the referenced globals to the bitset's equivalence class. for (MDNode *Op : BitSetNM->operands()) { - if (Op->getNumOperands() != 3) - report_fatal_error( - "All operands of llvm.bitsets metadata must have 3 elements"); - if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) continue; - auto OpConstMD = dyn_cast(Op->getOperand(1)); - if (!OpConstMD) - report_fatal_error("Bit set element must be a constant"); - auto OpGlobal = dyn_cast(OpConstMD->getValue()); + auto OpGlobal = dyn_cast( + cast(Op->getOperand(1))->getValue()); if (!OpGlobal) continue; - auto OffsetConstMD = dyn_cast(Op->getOperand(2)); - if (!OffsetConstMD) - report_fatal_error("Bit set element offset must be a constant"); - auto OffsetInt = dyn_cast(OffsetConstMD->getValue()); - if (!OffsetInt) - report_fatal_error( - "Bit set element offset must be an integer constant"); - CurSet = GlobalClasses.unionSets( CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal))); } @@ -641,79 +988,51 @@ bool LowerBitSets::buildBitSets() { if (GlobalClasses.empty()) return false; - // For each disjoint set we found... + // Build a list of disjoint sets ordered by their maximum BitSetNM index + // for determinism. + std::vector> Sets; for (GlobalClassesTy::iterator I = GlobalClasses.begin(), E = GlobalClasses.end(); I != E; ++I) { if (!I->isLeader()) continue; - ++NumBitSetDisjointSets; - // Build the list of bitsets and referenced globals in this disjoint set. - std::vector BitSets; - std::vector Globals; - llvm::DenseMap BitSetIndices; - llvm::DenseMap GlobalIndices; + unsigned MaxIndex = 0; for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I); MI != GlobalClasses.member_end(); ++MI) { - if ((*MI).is()) { - BitSetIndices[MI->get()] = BitSets.size(); - BitSets.push_back(MI->get()); - } else { - GlobalIndices[MI->get()] = Globals.size(); - Globals.push_back(MI->get()); - } + if ((*MI).is()) + MaxIndex = std::max(MaxIndex, BitSetIdIndices[MI->get()]); + } + Sets.emplace_back(I, MaxIndex); + } + std::sort(Sets.begin(), Sets.end(), + [](const std::pair &S1, + const std::pair &S2) { + return S1.second < S2.second; + }); + + // For each disjoint set we found... + for (const auto &S : Sets) { + // Build the list of bitsets in this disjoint set. + std::vector BitSets; + std::vector Globals; + for (GlobalClassesTy::member_iterator MI = + GlobalClasses.member_begin(S.first); + MI != GlobalClasses.member_end(); ++MI) { + if ((*MI).is()) + BitSets.push_back(MI->get()); + else + Globals.push_back(MI->get()); } - // For each bitset, build a set of indices that refer to globals referenced - // by the bitset. - std::vector> BitSetMembers(BitSets.size()); - if (BitSetNM) { - for (MDNode *Op : BitSetNM->operands()) { - // Op = { bitset name, global, offset } - if (!Op->getOperand(1)) - continue; - auto I = BitSetIndices.find(cast(Op->getOperand(0))); - if (I == BitSetIndices.end()) - continue; - - auto OpGlobal = dyn_cast( - cast(Op->getOperand(1))->getValue()); - if (!OpGlobal) - continue; - BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]); - } - } - - // Order the sets of indices by size. The GlobalLayoutBuilder works best - // when given small index sets first. - std::stable_sort( - BitSetMembers.begin(), BitSetMembers.end(), - [](const std::set &O1, const std::set &O2) { - return O1.size() < O2.size(); - }); - - // Create a GlobalLayoutBuilder and provide it with index sets as layout - // fragments. The GlobalLayoutBuilder tries to lay out members of fragments - // as close together as possible. - GlobalLayoutBuilder GLB(Globals.size()); - for (auto &&MemSet : BitSetMembers) - GLB.addFragment(MemSet); - - // Build a vector of globals with the computed layout. - std::vector OrderedGlobals(Globals.size()); - auto OGI = OrderedGlobals.begin(); - for (auto &&F : GLB.Fragments) - for (auto &&Offset : F) - *OGI++ = Globals[Offset]; - - // Order bitsets by name for determinism. - std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) { - return S1->getString() < S2->getString(); + // Order bitsets by BitSetNM index for determinism. This ordering is stable + // as there is a one-to-one mapping between metadata and indices. + std::sort(BitSets.begin(), BitSets.end(), [&](Metadata *M1, Metadata *M2) { + return BitSetIdIndices[M1] < BitSetIdIndices[M2]; }); - // Build the bitsets from this disjoint set. - buildBitSetsFromGlobals(BitSets, OrderedGlobals); + // Lower the bitsets in this disjoint set. + buildBitSetsFromDisjointSet(BitSets, Globals); } allocateByteArrays(); diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 2e3519eac6a5..8a209a18c540 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -27,6 +27,14 @@ // -- We define Function* container class with custom "operator<" (FunctionPtr). // -- "FunctionPtr" instances are stored in std::set collection, so every // std::set::insert operation will give you result in log(N) time. +// +// As an optimization, a hash of the function structure is calculated first, and +// two functions are only compared if they have the same hash. This hash is +// cheap to compute, and has the property that if function F == G according to +// the comparison function, then hash(F) == hash(G). This consistency property +// is critical to ensuring all possible merging opportunities are exploited. +// Collisions in the hash affect the speed of the pass but not the correctness +// or determinism of the resulting transformation. // // When a match is found the functions are folded. If both functions are // overridable, we move the functionality into a new internal function and @@ -87,6 +95,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Hashing.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -97,12 +106,14 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include + using namespace llvm; #define DEBUG_TYPE "mergefunc" @@ -121,21 +132,64 @@ static cl::opt NumFunctionsForSanityCheck( namespace { +/// GlobalNumberState assigns an integer to each global value in the program, +/// which is used by the comparison routine to order references to globals. This +/// state must be preserved throughout the pass, because Functions and other +/// globals need to maintain their relative order. Globals are assigned a number +/// when they are first visited. This order is deterministic, and so the +/// assigned numbers are as well. When two functions are merged, neither number +/// is updated. If the symbols are weak, this would be incorrect. If they are +/// strong, then one will be replaced at all references to the other, and so +/// direct callsites will now see one or the other symbol, and no update is +/// necessary. Note that if we were guaranteed unique names, we could just +/// compare those, but this would not work for stripped bitcodes or for those +/// few symbols without a name. +class GlobalNumberState { + struct Config : ValueMapConfig { + enum { FollowRAUW = false }; + }; + // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW + // occurs, the mapping does not change. Tracking changes is unnecessary, and + // also problematic for weak symbols (which may be overwritten). + typedef ValueMap ValueNumberMap; + ValueNumberMap GlobalNumbers; + // The next unused serial number to assign to a global. + uint64_t NextNumber; + public: + GlobalNumberState() : GlobalNumbers(), NextNumber(0) {} + uint64_t getNumber(GlobalValue* Global) { + ValueNumberMap::iterator MapIter; + bool Inserted; + std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber}); + if (Inserted) + NextNumber++; + return MapIter->second; + } + void clear() { + GlobalNumbers.clear(); + } +}; + /// FunctionComparator - Compares two functions to determine whether or not /// they will generate machine code with the same behaviour. DataLayout is /// used if available. The comparator always fails conservatively (erring on the /// side of claiming that two functions are different). class FunctionComparator { public: - FunctionComparator(const Function *F1, const Function *F2) - : FnL(F1), FnR(F2) {} + FunctionComparator(const Function *F1, const Function *F2, + GlobalNumberState* GN) + : FnL(F1), FnR(F2), GlobalNumbers(GN) {} /// Test whether the two functions have equivalent behaviour. int compare(); + /// Hash a function. Equivalent functions will have the same hash, and unequal + /// functions will have different hashes with high probability. + typedef uint64_t FunctionHash; + static FunctionHash functionHash(Function &); private: /// Test whether two basic blocks have equivalent behaviour. - int compare(const BasicBlock *BBL, const BasicBlock *BBR); + int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR); /// Constants comparison. /// Its analog to lexicographical comparison between hypothetical numbers @@ -241,6 +295,10 @@ private: /// If these properties are equal - compare their contents. int cmpConstants(const Constant *L, const Constant *R); + /// Compares two global values by number. Uses the GlobalNumbersState to + /// identify the same gobals across function calls. + int cmpGlobalValues(GlobalValue *L, GlobalValue *R); + /// Assign or look up previously assigned numbers for the two values, and /// return whether the numbers are equal. Numbers are assigned in the order /// visited. @@ -320,8 +378,9 @@ private: /// /// 1. If types are of different kind (different type IDs). /// Return result of type IDs comparison, treating them as numbers. - /// 2. If types are vectors or integers, compare Type* values as numbers. - /// 3. Types has same ID, so check whether they belongs to the next group: + /// 2. If types are integers, check that they have the same width. If they + /// are vectors, check that they have the same count and subtype. + /// 3. Types have the same ID, so check whether they are one of: /// * Void /// * Float /// * Double @@ -330,8 +389,7 @@ private: /// * PPC_FP128 /// * Label /// * Metadata - /// If so - return 0, yes - we can treat these types as equal only because - /// their IDs are same. + /// We can treat these types as equal whenever their IDs are same. /// 4. If Left and Right are pointers, return result of address space /// comparison (numbers comparison). We can treat pointer types of same /// address space as equal. @@ -343,11 +401,13 @@ private: int cmpTypes(Type *TyL, Type *TyR) const; int cmpNumbers(uint64_t L, uint64_t R) const; - int cmpAPInts(const APInt &L, const APInt &R) const; int cmpAPFloats(const APFloat &L, const APFloat &R) const; - int cmpStrings(StringRef L, StringRef R) const; + int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const; + int cmpMem(StringRef L, StringRef R) const; int cmpAttrs(const AttributeSet L, const AttributeSet R) const; + int cmpRangeMetadata(const MDNode* L, const MDNode* R) const; + int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const; // The two functions undergoing comparison. const Function *FnL, *FnR; @@ -386,30 +446,30 @@ private: /// could be operands from further BBs we didn't scan yet. /// So it's impossible to use dominance properties in general. DenseMap sn_mapL, sn_mapR; + + // The global state we will use + GlobalNumberState* GlobalNumbers; }; class FunctionNode { mutable AssertingVH F; - + FunctionComparator::FunctionHash Hash; public: - FunctionNode(Function *F) : F(F) {} + // Note the hash is recalculated potentially multiple times, but it is cheap. + FunctionNode(Function *F) + : F(F), Hash(FunctionComparator::functionHash(*F)) {} Function *getFunc() const { return F; } + FunctionComparator::FunctionHash getHash() const { return Hash; } /// Replace the reference to the function F by the function G, assuming their /// implementations are equal. void replaceBy(Function *G) const { - assert(!(*this < FunctionNode(G)) && !(FunctionNode(G) < *this) && - "The two functions must be equal"); - F = G; } - void release() { F = 0; } - bool operator<(const FunctionNode &RHS) const { - return (FunctionComparator(F, RHS.getFunc()).compare()) == -1; - } + void release() { F = nullptr; } }; -} +} // end anonymous namespace int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { if (L < R) return -1; @@ -426,13 +486,25 @@ int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const { } int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const { - if (int Res = cmpNumbers((uint64_t)&L.getSemantics(), - (uint64_t)&R.getSemantics())) + // Floats are ordered first by semantics (i.e. float, double, half, etc.), + // then by value interpreted as a bitstring (aka APInt). + const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics(); + if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL), + APFloat::semanticsPrecision(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL), + APFloat::semanticsMaxExponent(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL), + APFloat::semanticsMinExponent(SR))) + return Res; + if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL), + APFloat::semanticsSizeInBits(SR))) return Res; return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt()); } -int FunctionComparator::cmpStrings(StringRef L, StringRef R) const { +int FunctionComparator::cmpMem(StringRef L, StringRef R) const { // Prevent heavy comparison, compare sizes first. if (int Res = cmpNumbers(L.size(), R.size())) return Res; @@ -466,6 +538,59 @@ int FunctionComparator::cmpAttrs(const AttributeSet L, return 0; } +int FunctionComparator::cmpRangeMetadata(const MDNode* L, + const MDNode* R) const { + if (L == R) + return 0; + if (!L) + return -1; + if (!R) + return 1; + // Range metadata is a sequence of numbers. Make sure they are the same + // sequence. + // TODO: Note that as this is metadata, it is possible to drop and/or merge + // this data when considering functions to merge. Thus this comparison would + // return 0 (i.e. equivalent), but merging would become more complicated + // because the ranges would need to be unioned. It is not likely that + // functions differ ONLY in this metadata if they are actually the same + // function semantically. + if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) + return Res; + for (size_t I = 0; I < L->getNumOperands(); ++I) { + ConstantInt* LLow = mdconst::extract(L->getOperand(I)); + ConstantInt* RLow = mdconst::extract(R->getOperand(I)); + if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue())) + return Res; + } + return 0; +} + +int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L, + const Instruction *R) const { + ImmutableCallSite LCS(L); + ImmutableCallSite RCS(R); + + assert(LCS && RCS && "Must be calls or invokes!"); + assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!"); + + if (int Res = + cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles())) + return Res; + + for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) { + auto OBL = LCS.getOperandBundleAt(i); + auto OBR = RCS.getOperandBundleAt(i); + + if (int Res = OBL.getTagName().compare(OBR.getTagName())) + return Res; + + if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size())) + return Res; + } + + return 0; +} + /// Constants comparison: /// 1. Check whether type of L constant could be losslessly bitcasted to R /// type. @@ -500,9 +625,9 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { unsigned TyLWidth = 0; unsigned TyRWidth = 0; - if (const VectorType *VecTyL = dyn_cast(TyL)) + if (auto *VecTyL = dyn_cast(TyL)) TyLWidth = VecTyL->getBitWidth(); - if (const VectorType *VecTyR = dyn_cast(TyR)) + if (auto *VecTyR = dyn_cast(TyR)) TyRWidth = VecTyR->getBitWidth(); if (TyLWidth != TyRWidth) @@ -538,11 +663,29 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { if (!L->isNullValue() && R->isNullValue()) return -1; + auto GlobalValueL = const_cast(dyn_cast(L)); + auto GlobalValueR = const_cast(dyn_cast(R)); + if (GlobalValueL && GlobalValueR) { + return cmpGlobalValues(GlobalValueL, GlobalValueR); + } + if (int Res = cmpNumbers(L->getValueID(), R->getValueID())) return Res; + if (const auto *SeqL = dyn_cast(L)) { + const auto *SeqR = cast(R); + // This handles ConstantDataArray and ConstantDataVector. Note that we + // compare the two raw data arrays, which might differ depending on the host + // endianness. This isn't a problem though, because the endiness of a module + // will affect the order of the constants, but this order is the same + // for a given input module and host platform. + return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues()); + } + switch (L->getValueID()) { - case Value::UndefValueVal: return TypesRes; + case Value::UndefValueVal: + case Value::ConstantTokenNoneVal: + return TypesRes; case Value::ConstantIntVal: { const APInt &LInt = cast(L)->getValue(); const APInt &RInt = cast(R)->getValue(); @@ -609,19 +752,55 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { } return 0; } - case Value::FunctionVal: - case Value::GlobalVariableVal: - case Value::GlobalAliasVal: - default: // Unknown constant, cast L and R pointers to numbers and compare. - return cmpNumbers((uint64_t)L, (uint64_t)R); + case Value::BlockAddressVal: { + const BlockAddress *LBA = cast(L); + const BlockAddress *RBA = cast(R); + if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction())) + return Res; + if (LBA->getFunction() == RBA->getFunction()) { + // They are BBs in the same function. Order by which comes first in the + // BB order of the function. This order is deterministic. + Function* F = LBA->getFunction(); + BasicBlock *LBB = LBA->getBasicBlock(); + BasicBlock *RBB = RBA->getBasicBlock(); + if (LBB == RBB) + return 0; + for(BasicBlock &BB : F->getBasicBlockList()) { + if (&BB == LBB) { + assert(&BB != RBB); + return -1; + } + if (&BB == RBB) + return 1; + } + llvm_unreachable("Basic Block Address does not point to a basic block in " + "its function."); + return -1; + } else { + // cmpValues said the functions are the same. So because they aren't + // literally the same pointer, they must respectively be the left and + // right functions. + assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR); + // cmpValues will tell us if these are equivalent BasicBlocks, in the + // context of their respective functions. + return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock()); + } } + default: // Unknown constant, abort. + DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n"); + llvm_unreachable("Constant ValueID not recognized."); + return -1; + } +} + +int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue* R) { + return cmpNumbers(GlobalNumbers->getNumber(L), GlobalNumbers->getNumber(R)); } /// cmpType - compares two types, /// defines total ordering among the types set. /// See method declaration comments for more details. int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { - PointerType *PTyL = dyn_cast(TyL); PointerType *PTyR = dyn_cast(TyR); @@ -642,10 +821,15 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { llvm_unreachable("Unknown type!"); // Fall through in Release mode. case Type::IntegerTyID: - case Type::VectorTyID: - // TyL == TyR would have returned true earlier. - return cmpNumbers((uint64_t)TyL, (uint64_t)TyR); - + return cmpNumbers(cast(TyL)->getBitWidth(), + cast(TyR)->getBitWidth()); + case Type::VectorTyID: { + VectorType *VTyL = cast(TyL), *VTyR = cast(TyR); + if (int Res = cmpNumbers(VTyL->getNumElements(), VTyR->getNumElements())) + return Res; + return cmpTypes(VTyL->getElementType(), VTyR->getElementType()); + } + // TyL == TyR would have returned true earlier, because types are uniqued. case Type::VoidTyID: case Type::FloatTyID: case Type::DoubleTyID: @@ -654,6 +838,7 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { case Type::PPC_FP128TyID: case Type::LabelTyID: case Type::MetadataTyID: + case Type::TokenTyID: return 0; case Type::PointerTyID: { @@ -759,8 +944,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpNumbers(LI->getSynchScope(), cast(R)->getSynchScope())) return Res; - return cmpNumbers((uint64_t)LI->getMetadata(LLVMContext::MD_range), - (uint64_t)cast(R)->getMetadata(LLVMContext::MD_range)); + return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range), + cast(R)->getMetadata(LLVMContext::MD_range)); } if (const StoreInst *SI = dyn_cast(L)) { if (int Res = @@ -783,20 +968,24 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpAttrs(CI->getAttributes(), cast(R)->getAttributes())) return Res; - return cmpNumbers( - (uint64_t)CI->getMetadata(LLVMContext::MD_range), - (uint64_t)cast(R)->getMetadata(LLVMContext::MD_range)); + if (int Res = cmpOperandBundlesSchema(CI, R)) + return Res; + return cmpRangeMetadata( + CI->getMetadata(LLVMContext::MD_range), + cast(R)->getMetadata(LLVMContext::MD_range)); } - if (const InvokeInst *CI = dyn_cast(L)) { - if (int Res = cmpNumbers(CI->getCallingConv(), + if (const InvokeInst *II = dyn_cast(L)) { + if (int Res = cmpNumbers(II->getCallingConv(), cast(R)->getCallingConv())) return Res; if (int Res = - cmpAttrs(CI->getAttributes(), cast(R)->getAttributes())) + cmpAttrs(II->getAttributes(), cast(R)->getAttributes())) return Res; - return cmpNumbers( - (uint64_t)CI->getMetadata(LLVMContext::MD_range), - (uint64_t)cast(R)->getMetadata(LLVMContext::MD_range)); + if (int Res = cmpOperandBundlesSchema(II, R)) + return Res; + return cmpRangeMetadata( + II->getMetadata(LLVMContext::MD_range), + cast(R)->getMetadata(LLVMContext::MD_range)); } if (const InsertValueInst *IVI = dyn_cast(L)) { ArrayRef LIndices = IVI->getIndices(); @@ -876,9 +1065,8 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, if (GEPL->accumulateConstantOffset(DL, OffsetL) && GEPR->accumulateConstantOffset(DL, OffsetR)) return cmpAPInts(OffsetL, OffsetR); - - if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(), - (uint64_t)GEPR->getPointerOperand()->getType())) + if (int Res = cmpTypes(GEPL->getSourceElementType(), + GEPR->getSourceElementType())) return Res; if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands())) @@ -892,6 +1080,28 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, return 0; } +int FunctionComparator::cmpInlineAsm(const InlineAsm *L, + const InlineAsm *R) const { + // InlineAsm's are uniqued. If they are the same pointer, obviously they are + // the same, otherwise compare the fields. + if (L == R) + return 0; + if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType())) + return Res; + if (int Res = cmpMem(L->getAsmString(), R->getAsmString())) + return Res; + if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString())) + return Res; + if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects())) + return Res; + if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack())) + return Res; + if (int Res = cmpNumbers(L->getDialect(), R->getDialect())) + return Res; + llvm_unreachable("InlineAsm blocks were not uniqued."); + return 0; +} + /// Compare two values used by the two functions under pair-wise comparison. If /// this is the first time the values are seen, they're added to the mapping so /// that we will detect mismatches on next use. @@ -926,7 +1136,7 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) { const InlineAsm *InlineAsmR = dyn_cast(R); if (InlineAsmL && InlineAsmR) - return cmpNumbers((uint64_t)L, (uint64_t)R); + return cmpInlineAsm(InlineAsmL, InlineAsmR); if (InlineAsmL) return 1; if (InlineAsmR) @@ -938,12 +1148,13 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) { return cmpNumbers(LeftSN.first->second, RightSN.first->second); } // Test whether two basic blocks have equivalent behaviour. -int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) { +int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL, + const BasicBlock *BBR) { BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end(); BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end(); do { - if (int Res = cmpValues(InstL, InstR)) + if (int Res = cmpValues(&*InstL, &*InstR)) return Res; const GetElementPtrInst *GEPL = dyn_cast(InstL); @@ -961,7 +1172,7 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) { if (int Res = cmpGEPs(GEPL, GEPR)) return Res; } else { - if (int Res = cmpOperations(InstL, InstR)) + if (int Res = cmpOperations(&*InstL, &*InstR)) return Res; assert(InstL->getNumOperands() == InstR->getNumOperands()); @@ -970,11 +1181,8 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) { Value *OpR = InstR->getOperand(i); if (int Res = cmpValues(OpL, OpR)) return Res; - if (int Res = cmpNumbers(OpL->getValueID(), OpR->getValueID())) - return Res; - // TODO: Already checked in cmpOperation - if (int Res = cmpTypes(OpL->getType(), OpR->getType())) - return Res; + // cmpValues should ensure this is true. + assert(cmpTypes(OpL->getType(), OpR->getType()) == 0); } } @@ -990,7 +1198,6 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) { // Test whether the two functions have equivalent behaviour. int FunctionComparator::compare() { - sn_mapL.clear(); sn_mapR.clear(); @@ -1001,7 +1208,7 @@ int FunctionComparator::compare() { return Res; if (FnL->hasGC()) { - if (int Res = cmpNumbers((uint64_t)FnL->getGC(), (uint64_t)FnR->getGC())) + if (int Res = cmpMem(FnL->getGC(), FnR->getGC())) return Res; } @@ -1009,7 +1216,7 @@ int FunctionComparator::compare() { return Res; if (FnL->hasSection()) { - if (int Res = cmpStrings(FnL->getSection(), FnR->getSection())) + if (int Res = cmpMem(FnL->getSection(), FnR->getSection())) return Res; } @@ -1033,7 +1240,7 @@ int FunctionComparator::compare() { ArgRI = FnR->arg_begin(), ArgLE = FnL->arg_end(); ArgLI != ArgLE; ++ArgLI, ++ArgRI) { - if (cmpValues(ArgLI, ArgRI) != 0) + if (cmpValues(&*ArgLI, &*ArgRI) != 0) llvm_unreachable("Arguments repeat!"); } @@ -1055,7 +1262,7 @@ int FunctionComparator::compare() { if (int Res = cmpValues(BBL, BBR)) return Res; - if (int Res = compare(BBL, BBR)) + if (int Res = cmpBasicBlocks(BBL, BBR)) return Res; const TerminatorInst *TermL = BBL->getTerminator(); @@ -1073,6 +1280,68 @@ int FunctionComparator::compare() { return 0; } +namespace { +// Accumulate the hash of a sequence of 64-bit integers. This is similar to a +// hash of a sequence of 64bit ints, but the entire input does not need to be +// available at once. This interface is necessary for functionHash because it +// needs to accumulate the hash as the structure of the function is traversed +// without saving these values to an intermediate buffer. This form of hashing +// is not often needed, as usually the object to hash is just read from a +// buffer. +class HashAccumulator64 { + uint64_t Hash; +public: + // Initialize to random constant, so the state isn't zero. + HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; } + void add(uint64_t V) { + Hash = llvm::hashing::detail::hash_16_bytes(Hash, V); + } + // No finishing is required, because the entire hash value is used. + uint64_t getHash() { return Hash; } +}; +} // end anonymous namespace + +// A function hash is calculated by considering only the number of arguments and +// whether a function is varargs, the order of basic blocks (given by the +// successors of each basic block in depth first order), and the order of +// opcodes of each instruction within each of these basic blocks. This mirrors +// the strategy compare() uses to compare functions by walking the BBs in depth +// first order and comparing each instruction in sequence. Because this hash +// does not look at the operands, it is insensitive to things such as the +// target of calls and the constants used in the function, which makes it useful +// when possibly merging functions which are the same modulo constants and call +// targets. +FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) { + HashAccumulator64 H; + H.add(F.isVarArg()); + H.add(F.arg_size()); + + SmallVector BBs; + SmallSet VisitedBBs; + + // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(), + // accumulating the hash of the function "structure." (BB and opcode sequence) + BBs.push_back(&F.getEntryBlock()); + VisitedBBs.insert(BBs[0]); + while (!BBs.empty()) { + const BasicBlock *BB = BBs.pop_back_val(); + // This random value acts as a block header, as otherwise the partition of + // opcodes into BBs wouldn't affect the hash, only the order of the opcodes + H.add(45798); + for (auto &Inst : *BB) { + H.add(Inst.getOpcode()); + } + const TerminatorInst *Term = BB->getTerminator(); + for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(Term->getSuccessor(i)).second) + continue; + BBs.push_back(Term->getSuccessor(i)); + } + } + return H.getHash(); +} + + namespace { /// MergeFunctions finds functions which will generate identical machine code, @@ -1084,14 +1353,31 @@ class MergeFunctions : public ModulePass { public: static char ID; MergeFunctions() - : ModulePass(ID), HasGlobalAliases(false) { + : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)), FNodesInTree(), + HasGlobalAliases(false) { initializeMergeFunctionsPass(*PassRegistry::getPassRegistry()); } bool runOnModule(Module &M) override; private: - typedef std::set FnTreeType; + // The function comparison operator is provided here so that FunctionNodes do + // not need to become larger with another pointer. + class FunctionNodeCmp { + GlobalNumberState* GlobalNumbers; + public: + FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {} + bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const { + // Order first by hashes, then full function comparison. + if (LHS.getHash() != RHS.getHash()) + return LHS.getHash() < RHS.getHash(); + FunctionComparator FCmp(LHS.getFunc(), RHS.getFunc(), GlobalNumbers); + return FCmp.compare() == -1; + } + }; + typedef std::set FnTreeType; + + GlobalNumberState GlobalNumbers; /// A work queue of functions that may have been modified and should be /// analyzed again. @@ -1133,17 +1419,23 @@ private: void writeAlias(Function *F, Function *G); /// Replace function F with function G in the function tree. - void replaceFunctionInTree(FnTreeType::iterator &IterToF, Function *G); + void replaceFunctionInTree(const FunctionNode &FN, Function *G); /// The set of all distinct functions. Use the insert() and remove() methods - /// to modify it. + /// to modify it. The map allows efficient lookup and deferring of Functions. FnTreeType FnTree; + // Map functions to the iterators of the FunctionNode which contains them + // in the FnTree. This must be updated carefully whenever the FnTree is + // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid + // dangling iterators into FnTree. The invariant that preserves this is that + // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree. + ValueMap FNodesInTree; /// Whether or not the target supports global aliases. bool HasGlobalAliases; }; -} // end anonymous namespace +} // end anonymous namespace char MergeFunctions::ID = 0; INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false) @@ -1166,8 +1458,8 @@ bool MergeFunctions::doSanityCheck(std::vector &Worklist) { for (std::vector::iterator J = I; J != E && j < Max; ++J, ++j) { Function *F1 = cast(*I); Function *F2 = cast(*J); - int Res1 = FunctionComparator(F1, F2).compare(); - int Res2 = FunctionComparator(F2, F1).compare(); + int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare(); + int Res2 = FunctionComparator(F2, F1, &GlobalNumbers).compare(); // If F1 <= F2, then F2 >= F1, otherwise report failure. if (Res1 != -Res2) { @@ -1188,8 +1480,8 @@ bool MergeFunctions::doSanityCheck(std::vector &Worklist) { continue; Function *F3 = cast(*K); - int Res3 = FunctionComparator(F1, F3).compare(); - int Res4 = FunctionComparator(F2, F3).compare(); + int Res3 = FunctionComparator(F1, F3, &GlobalNumbers).compare(); + int Res4 = FunctionComparator(F2, F3, &GlobalNumbers).compare(); bool Transitive = true; @@ -1227,11 +1519,33 @@ bool MergeFunctions::doSanityCheck(std::vector &Worklist) { bool MergeFunctions::runOnModule(Module &M) { bool Changed = false; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) - Deferred.push_back(WeakVH(I)); + // All functions in the module, ordered by hash. Functions with a unique + // hash value are easily eliminated. + std::vector> + HashedFuncs; + for (Function &Func : M) { + if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) { + HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func}); + } } + std::stable_sort( + HashedFuncs.begin(), HashedFuncs.end(), + [](const std::pair &a, + const std::pair &b) { + return a.first < b.first; + }); + + auto S = HashedFuncs.begin(); + for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) { + // If the hash value matches the previous value or the next one, we must + // consider merging it. Otherwise it is dropped and never considered again. + if ((I != S && std::prev(I)->first == I->first) || + (std::next(I) != IE && std::next(I)->first == I->first) ) { + Deferred.push_back(WeakVH(I->second)); + } + } + do { std::vector Worklist; Deferred.swap(Worklist); @@ -1270,6 +1584,7 @@ bool MergeFunctions::runOnModule(Module &M) { } while (!Deferred.empty()); FnTree.clear(); + GlobalNumbers.clear(); return Changed; } @@ -1282,6 +1597,32 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { ++UI; CallSite CS(U->getUser()); if (CS && CS.isCallee(U)) { + // Transfer the called function's attributes to the call site. Due to the + // bitcast we will 'lose' ABI changing attributes because the 'called + // function' is no longer a Function* but the bitcast. Code that looks up + // the attributes from the called function will fail. + + // FIXME: This is not actually true, at least not anymore. The callsite + // will always have the same ABI affecting attributes as the callee, + // because otherwise the original input has UB. Note that Old and New + // always have matching ABI, so no attributes need to be changed. + // Transferring other attributes may help other optimizations, but that + // should be done uniformly and not in this ad-hoc way. + auto &Context = New->getContext(); + auto NewFuncAttrs = New->getAttributes(); + auto CallSiteAttrs = CS.getAttributes(); + + CallSiteAttrs = CallSiteAttrs.addAttributes( + Context, AttributeSet::ReturnIndex, NewFuncAttrs.getRetAttributes()); + + for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) { + AttributeSet Attrs = NewFuncAttrs.getParamAttributes(argIdx); + if (Attrs.getNumSlots()) + CallSiteAttrs = CallSiteAttrs.addAttributes(Context, argIdx, Attrs); + } + + CS.setAttributes(CallSiteAttrs); + remove(CS.getInstruction()->getParent()->getParent()); U->set(BitcastNew); } @@ -1352,15 +1693,15 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { SmallVector Args; unsigned i = 0; FunctionType *FFTy = F->getFunctionType(); - for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); - AI != AE; ++AI) { - Args.push_back(createCast(Builder, (Value*)AI, FFTy->getParamType(i))); + for (Argument & AI : NewG->args()) { + Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i))); ++i; } CallInst *CI = Builder.CreateCall(F, Args); CI->setTailCall(); CI->setCallingConv(F->getCallingConv()); + CI->setAttributes(F->getAttributes()); if (NewG->getReturnType()->isVoidTy()) { Builder.CreateRetVoid(); } else { @@ -1379,8 +1720,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { // Replace G with an alias to F and delete G. void MergeFunctions::writeAlias(Function *F, Function *G) { - PointerType *PTy = G->getType(); - auto *GA = GlobalAlias::create(PTy, G->getLinkage(), "", F); + auto *GA = GlobalAlias::create(G->getLinkage(), "", F); F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); GA->takeName(G); GA->setVisibility(G->getVisibility()); @@ -1425,19 +1765,24 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { ++NumFunctionsMerged; } -/// Replace function F for function G in the map. -void MergeFunctions::replaceFunctionInTree(FnTreeType::iterator &IterToF, +/// Replace function F by function G. +void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN, Function *G) { - Function *F = IterToF->getFunc(); - - // A total order is already guaranteed otherwise because we process strong - // functions before weak functions. - assert(((F->mayBeOverridden() && G->mayBeOverridden()) || - (!F->mayBeOverridden() && !G->mayBeOverridden())) && - "Only change functions if both are strong or both are weak"); - (void)F; - - IterToF->replaceBy(G); + Function *F = FN.getFunc(); + assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 && + "The two functions must be equal"); + + auto I = FNodesInTree.find(F); + assert(I != FNodesInTree.end() && "F should be in FNodesInTree"); + assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G"); + + FnTreeType::iterator IterToFNInFnTree = I->second; + assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree."); + // Remove F -> FN and insert G -> FN + FNodesInTree.erase(I); + FNodesInTree.insert({G, IterToFNInFnTree}); + // Replace F with G in FN, which is stored inside the FnTree. + FN.replaceBy(G); } // Insert a ComparableFunction into the FnTree, or merge it away if equal to one @@ -1447,6 +1792,8 @@ bool MergeFunctions::insert(Function *NewFunction) { FnTree.insert(FunctionNode(NewFunction)); if (Result.second) { + assert(FNodesInTree.count(NewFunction) == 0); + FNodesInTree.insert({NewFunction, Result.first}); DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n'); return false; } @@ -1476,7 +1823,7 @@ bool MergeFunctions::insert(Function *NewFunction) { if (OldF.getFunc()->getName() > NewFunction->getName()) { // Swap the two functions. Function *F = OldF.getFunc(); - replaceFunctionInTree(Result.first, NewFunction); + replaceFunctionInTree(*Result.first, NewFunction); NewFunction = F; assert(OldF.getFunc() != F && "Must have swapped the functions."); } @@ -1495,18 +1842,13 @@ bool MergeFunctions::insert(Function *NewFunction) { // Remove a function from FnTree. If it was already in FnTree, add // it to Deferred so that we'll look at it in the next round. void MergeFunctions::remove(Function *F) { - // We need to make sure we remove F, not a function "equal" to F per the - // function equality comparator. - FnTreeType::iterator found = FnTree.find(FunctionNode(F)); - size_t Erased = 0; - if (found != FnTree.end() && found->getFunc() == F) { - Erased = 1; - FnTree.erase(found); - } - - if (Erased) { - DEBUG(dbgs() << "Removed " << F->getName() - << " from set and deferred it.\n"); + auto I = FNodesInTree.find(F); + if (I != FNodesInTree.end()) { + DEBUG(dbgs() << "Deferred " << F->getName()<< ".\n"); + FnTree.erase(I->second); + // I->second has been invalidated, remove it from the FNodesInTree map to + // preserve the invariant. + FNodesInTree.erase(I); Deferred.emplace_back(F); } } @@ -1516,6 +1858,8 @@ void MergeFunctions::remove(Function *F) { void MergeFunctions::removeUsers(Value *V) { std::vector Worklist; Worklist.push_back(V); + SmallSet Visited; + Visited.insert(V); while (!Worklist.empty()) { Value *V = Worklist.back(); Worklist.pop_back(); @@ -1526,8 +1870,10 @@ void MergeFunctions::removeUsers(Value *V) { } else if (isa(U)) { // do nothing } else if (Constant *C = dyn_cast(U)) { - for (User *UU : C->users()) - Worklist.push_back(UU); + for (User *UU : C->users()) { + if (!Visited.insert(UU).second) + Worklist.push_back(UU); + } } } } diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 4a7cb7ba7d12..0c5c84bbccab 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -50,7 +50,7 @@ ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); } Function* PartialInliner::unswitchFunction(Function* F) { // First, verify that this function is an unswitching candidate... - BasicBlock* entryBlock = F->begin(); + BasicBlock *entryBlock = &F->front(); BranchInst *BR = dyn_cast(entryBlock->getTerminator()); if (!BR || BR->isUnconditional()) return nullptr; @@ -89,18 +89,18 @@ Function* PartialInliner::unswitchFunction(Function* F) { // of which will go outside. BasicBlock* preReturn = newReturnBlock; newReturnBlock = newReturnBlock->splitBasicBlock( - newReturnBlock->getFirstNonPHI()); + newReturnBlock->getFirstNonPHI()->getIterator()); BasicBlock::iterator I = preReturn->begin(); - BasicBlock::iterator Ins = newReturnBlock->begin(); + Instruction *Ins = &newReturnBlock->front(); while (I != preReturn->end()) { PHINode* OldPhi = dyn_cast(I); if (!OldPhi) break; - - PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); + + PHINode *retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); OldPhi->replaceAllUsesWith(retPhi); Ins = newReturnBlock->getFirstNonPHI(); - - retPhi->addIncoming(I, preReturn); + + retPhi->addIncoming(&*I, preReturn); retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock), newEntryBlock); OldPhi->removeIncomingValue(newEntryBlock); @@ -116,8 +116,8 @@ Function* PartialInliner::unswitchFunction(Function* F) { FE = duplicateFunction->end(); FI != FE; ++FI) if (&*FI != newEntryBlock && &*FI != newReturnBlock && &*FI != newNonReturnBlock) - toExtract.push_back(FI); - + toExtract.push_back(&*FI); + // The CodeExtractor needs a dominator tree. DominatorTree DT; DT.recalculate(*duplicateFunction); diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 909baae92548..9876efa7b235 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -12,19 +12,26 @@ // //===----------------------------------------------------------------------===// - #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm-c/Transforms/PassManagerBuilder.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CFLAliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Verifier.h" +#include "llvm/IR/FunctionInfo.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/ForceFunctionAttrs.h" +#include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Vectorize.h" @@ -89,11 +96,21 @@ static cl::opt EnableLoopDistribute( "enable-loop-distribute", cl::init(false), cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass")); +static cl::opt EnableNonLTOGlobalsModRef( + "enable-non-lto-gmr", cl::init(true), cl::Hidden, + cl::desc( + "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline.")); + +static cl::opt EnableLoopLoadElim( + "enable-loop-load-elim", cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental LoopLoadElimination Pass")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; LibraryInfo = nullptr; Inliner = nullptr; + FunctionIndex = nullptr; DisableUnitAtATime = false; DisableUnrollLoops = false; BBVectorize = RunBBVectorization; @@ -143,10 +160,9 @@ void PassManagerBuilder::addInitialAliasAnalysisPasses( // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. if (UseCFLAA) - PM.add(createCFLAliasAnalysisPass()); - PM.add(createTypeBasedAliasAnalysisPass()); - PM.add(createScopedNoAliasAAPass()); - PM.add(createBasicAliasAnalysisPass()); + PM.add(createCFLAAWrapperPass()); + PM.add(createTypeBasedAAWrapperPass()); + PM.add(createScopedNoAliasAAWrapperPass()); } void PassManagerBuilder::populateFunctionPassManager( @@ -172,6 +188,9 @@ void PassManagerBuilder::populateFunctionPassManager( void PassManagerBuilder::populateModulePassManager( legacy::PassManagerBase &MPM) { + // Allow forcing function attributes as a debugging and tuning aid. + MPM.add(createForceFunctionAttrsLegacyPass()); + // If all optimizations are disabled, just run the always-inline pass and, // if enabled, the function merging pass. if (OptLevel == 0) { @@ -201,10 +220,15 @@ void PassManagerBuilder::populateModulePassManager( addInitialAliasAnalysisPasses(MPM); if (!DisableUnitAtATime) { + // Infer attributes about declarations if possible. + MPM.add(createInferFunctionAttrsLegacyPass()); + addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createGlobalOptimizerPass()); // Optimize out global vars + // Promote any localized global vars + MPM.add(createPromoteMemoryToRegisterPass()); MPM.add(createDeadArgEliminationPass()); // Dead argument elimination @@ -213,6 +237,12 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE } + if (EnableNonLTOGlobalsModRef) + // We add a module alias analysis pass here. In part due to bugs in the + // analysis infrastructure this "works" in that the analysis stays alive + // for the entire SCC pass run below. + MPM.add(createGlobalsAAWrapperPass()); + // Start of CallGraph SCC passes. if (!DisableUnitAtATime) MPM.add(createPruneEHPass()); // Remove dead EH info @@ -245,6 +275,7 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); MPM.add(createLICMPass()); // Hoist loop invariants MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); + MPM.add(createCFGSimplificationPass()); MPM.add(createInstructionCombiningPass()); MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. @@ -315,9 +346,42 @@ void PassManagerBuilder::populateModulePassManager( // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) { + // Remove avail extern fns and globals definitions if we aren't + // compiling an object file for later LTO. For LTO we want to preserve + // these so they are eligible for inlining at link-time. Note if they + // are unreferenced they will be removed by GlobalDCE later, so + // this only impacts referenced available externally globals. + // Eventually they will be suppressed during codegen, but eliminating + // here enables more opportunity for GlobalDCE as it may make + // globals referenced by available external functions dead + // and saves running remaining passes on the eliminated functions. + MPM.add(createEliminateAvailableExternallyPass()); + } + + if (EnableNonLTOGlobalsModRef) + // We add a fresh GlobalsModRef run at this point. This is particularly + // useful as the above will have inlined, DCE'ed, and function-attr + // propagated everything. We should at this point have a reasonably minimal + // and richly annotated call graph. By computing aliasing and mod/ref + // information for all local globals here, the late loop passes and notably + // the vectorizer will be able to use them to help recognize vectorizable + // memory operations. + // + // Note that this relies on a bug in the pass manager which preserves + // a module analysis into a function pass pipeline (and throughout it) so + // long as the first function pass doesn't invalidate the module analysis. + // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for + // this to work. Fortunately, it is trivial to preserve AliasAnalysis + // (doing nothing preserves it as it is required to be conservatively + // correct in the face of IR changes). + MPM.add(createGlobalsAAWrapperPass()); + if (RunFloat2Int) MPM.add(createFloat2IntPass()); + addExtensionsToPM(EP_VectorizerStart, MPM); + // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies // on the rotated form. Disable header duplication at -Oz. @@ -329,6 +393,12 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createLoopDistributePass()); MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize)); + + // Eliminate loads by forwarding stores from the previous iteration to loads + // of the current iteration. + if (EnableLoopLoadElim) + MPM.add(createLoopLoadEliminationPass()); + // FIXME: Because of #pragma vectorize enable, the passes below are always // inserted in the pipeline, even when the vectorizer doesn't run (ex. when // on -O1 and no #pragma is found). Would be good to have these two passes @@ -402,17 +472,6 @@ void PassManagerBuilder::populateModulePassManager( // GlobalOpt already deletes dead functions and globals, at -O2 try a // late pass of GlobalDCE. It is capable of deleting dead cycles. if (OptLevel > 1) { - if (!PrepareForLTO) { - // Remove avail extern fns and globals definitions if we aren't - // compiling an object file for later LTO. For LTO we want to preserve - // these so they are eligible for inlining at link-time. Note if they - // are unreferenced they will be removed by GlobalDCE below, so - // this only impacts referenced available externally globals. - // Eventually they will be suppressed during codegen, but eliminating - // here enables more opportunity for GlobalDCE as it may make - // globals referenced by available external functions dead. - MPM.add(createEliminateAvailableExternallyPass()); - } MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. MPM.add(createConstantMergePass()); // Merge dup global constants } @@ -428,13 +487,25 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Provide AliasAnalysis services for optimizations. addInitialAliasAnalysisPasses(PM); + if (FunctionIndex) + PM.add(createFunctionImportPass(FunctionIndex)); + + // Allow forcing function attributes as a debugging and tuning aid. + PM.add(createForceFunctionAttrsLegacyPass()); + + // Infer attributes about declarations if possible. + PM.add(createInferFunctionAttrsLegacyPass()); + // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. PM.add(createIPSCCPPass()); // Now that we internalized some globals, see if we can hack on them! + PM.add(createFunctionAttrsPass()); // Add norecurse if possible. PM.add(createGlobalOptimizerPass()); + // Promote any localized global vars. + PM.add(createPromoteMemoryToRegisterPass()); // Linking modules together can lead to duplicated global constants, only // keep one copy of each constant. @@ -481,7 +552,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Run a few AA driven optimizations here and now, to cleanup the code. PM.add(createFunctionAttrsPass()); // Add nocapture. - PM.add(createGlobalsModRefPass()); // IP alias analysis. + PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. PM.add(createLICMPass()); // Hoist loop invariants. if (EnableMLSM) @@ -500,6 +571,15 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createLoopVectorizePass(true, LoopVectorize)); + // Now that we've optimized loops (in particular loop induction variables), + // we may have exposed more scalar opportunities. Run parts of the scalar + // optimizer again at this point. + PM.add(createInstructionCombiningPass()); // Initial cleanup + PM.add(createCFGSimplificationPass()); // if-convert + PM.add(createSCCPPass()); // Propagate exposed constants + PM.add(createInstructionCombiningPass()); // Clean up again + PM.add(createBitTrackingDCEPass()); + // More scalar chains could be vectorized due to more alias information if (RunSLPAfterLoopVectorization) if (SLPVectorize) @@ -524,6 +604,9 @@ void PassManagerBuilder::addLateLTOOptimizationPasses( // Delete basic blocks, which optimization passes may have killed. PM.add(createCFGSimplificationPass()); + // Drop bodies of available externally objects to improve GlobalDCE. + PM.add(createEliminateAvailableExternallyPass()); + // Now that we have optimized the program, discard unreachable functions. PM.add(createGlobalDCEPass()); @@ -543,6 +626,10 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { if (OptLevel > 1) addLTOOptimizationPasses(PM); + // Create a function that performs CFI checks for cross-DSO calls with targets + // in the current module. + PM.add(createCrossDSOCFIPass()); + // Lower bit sets to globals. This pass supports Clang's control flow // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI // is enabled. The pass does nothing if CFI is disabled. diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index b2f1010c9a07..3af4afb903fe 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -21,7 +21,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" -#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -153,21 +153,16 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { // If the SCC doesn't unwind or doesn't throw, note this fact. if (!SCCMightUnwind || !SCCMightReturn) for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { - AttrBuilder NewAttributes; - - if (!SCCMightUnwind) - NewAttributes.addAttribute(Attribute::NoUnwind); - if (!SCCMightReturn) - NewAttributes.addAttribute(Attribute::NoReturn); - Function *F = (*I)->getFunction(); - const AttributeSet &PAL = F->getAttributes().getFnAttributes(); - const AttributeSet &NPAL = AttributeSet::get( - F->getContext(), AttributeSet::FunctionIndex, NewAttributes); - if (PAL != NPAL) { + if (!SCCMightUnwind && !F->hasFnAttribute(Attribute::NoUnwind)) { + F->addFnAttr(Attribute::NoUnwind); + MadeChange = true; + } + + if (!SCCMightReturn && !F->hasFnAttribute(Attribute::NoReturn)) { + F->addFnAttr(Attribute::NoReturn); MadeChange = true; - F->addAttributes(AttributeSet::FunctionIndex, NPAL); } } @@ -191,9 +186,13 @@ bool PruneEH::SimplifyFunction(Function *F) { for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { if (InvokeInst *II = dyn_cast(BB->getTerminator())) if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(F)) { - SmallVector Args(II->op_begin(), II->op_end() - 3); + SmallVector Args(II->arg_begin(), II->arg_end()); + SmallVector OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + // Insert a call instruction before the invoke. - CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); + CallInst *Call = CallInst::Create(II->getCalledValue(), Args, OpBundles, + "", II); Call->takeName(II); Call->setCallingConv(II->getCallingConv()); Call->setAttributes(II->getAttributes()); @@ -233,7 +232,7 @@ bool PruneEH::SimplifyFunction(Function *F) { // Remove the uncond branch and add an unreachable. BB->getInstList().pop_back(); - new UnreachableInst(BB->getContext(), BB); + new UnreachableInst(BB->getContext(), &*BB); DeleteBasicBlock(New); // Delete the new BB. MadeChange = true; diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp similarity index 50% rename from lib/Transforms/Scalar/SampleProfile.cpp rename to lib/Transforms/IPO/SampleProfile.cpp index c8dfa54a4aa0..928d92ef9d12 100644 --- a/lib/Transforms/Scalar/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -22,7 +22,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -44,7 +43,11 @@ #include "llvm/ProfileData/SampleProfReader.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/Cloning.h" #include using namespace llvm; @@ -61,27 +64,51 @@ static cl::opt SampleProfileMaxPropagateIterations( "sample-profile-max-propagate-iterations", cl::init(100), cl::desc("Maximum number of iterations to go through when propagating " "sample block/edge weights through the CFG.")); +static cl::opt SampleProfileRecordCoverage( + "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"), + cl::desc("Emit a warning if less than N% of records in the input profile " + "are matched to the IR.")); +static cl::opt SampleProfileSampleCoverage( + "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"), + cl::desc("Emit a warning if less than N% of samples in the input profile " + "are matched to the IR.")); +static cl::opt SampleProfileHotThreshold( + "sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"), + cl::desc("Inlined functions that account for more than N% of all samples " + "collected in the parent function, will be inlined again.")); +static cl::opt SampleProfileGlobalHotThreshold( + "sample-profile-global-hot-threshold", cl::init(30), cl::value_desc("N"), + cl::desc("Top-level functions that account for more than N% of all samples " + "collected in the profile, will be marked as hot for the inliner " + "to consider.")); +static cl::opt SampleProfileGlobalColdThreshold( + "sample-profile-global-cold-threshold", cl::init(0.5), cl::value_desc("N"), + cl::desc("Top-level functions that account for less than N% of all samples " + "collected in the profile, will be marked as cold for the inliner " + "to consider.")); namespace { -typedef DenseMap BlockWeightMap; -typedef DenseMap EquivalenceClassMap; -typedef std::pair Edge; -typedef DenseMap EdgeWeightMap; -typedef DenseMap> BlockEdgeMap; +typedef DenseMap BlockWeightMap; +typedef DenseMap EquivalenceClassMap; +typedef std::pair Edge; +typedef DenseMap EdgeWeightMap; +typedef DenseMap> + BlockEdgeMap; /// \brief Sample profile pass. /// /// This pass reads profile data from the file specified by /// -sample-profile-file and annotates every affected function with the /// profile information found in that file. -class SampleProfileLoader : public FunctionPass { +class SampleProfileLoader : public ModulePass { public: // Class identification, replacement for typeinfo static char ID; SampleProfileLoader(StringRef Name = SampleProfileFile) - : FunctionPass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Ctx(nullptr), - Reader(), Samples(nullptr), Filename(Name), ProfileIsValid(false) { + : ModulePass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Reader(), + Samples(nullptr), Filename(Name), ProfileIsValid(false), + TotalCollectedSamples(0) { initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry()); } @@ -91,36 +118,37 @@ public: const char *getPassName() const override { return "Sample profile pass"; } - bool runOnFunction(Function &F) override; + bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); } protected: + bool runOnFunction(Function &F); unsigned getFunctionLoc(Function &F); bool emitAnnotations(Function &F); - unsigned getInstWeight(Instruction &I); - unsigned getBlockWeight(BasicBlock *BB); + ErrorOr getInstWeight(const Instruction &I) const; + ErrorOr getBlockWeight(const BasicBlock *BB) const; + const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const; + const FunctionSamples *findFunctionSamples(const Instruction &I) const; + bool inlineHotFunctions(Function &F); + bool emitInlineHints(Function &F); void printEdgeWeight(raw_ostream &OS, Edge E); - void printBlockWeight(raw_ostream &OS, BasicBlock *BB); - void printBlockEquivalence(raw_ostream &OS, BasicBlock *BB); + void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const; + void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); bool computeBlockWeights(Function &F); void findEquivalenceClasses(Function &F); void findEquivalencesFor(BasicBlock *BB1, SmallVector Descendants, DominatorTreeBase *DomTree); void propagateWeights(Function &F); - unsigned visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); + uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); void buildEdges(Function &F); bool propagateThroughEdges(Function &F); - - /// \brief Line number for the function header. Used to compute absolute - /// line numbers from the relative line numbers found in the profile. - unsigned HeaderLineno; + void computeDominanceAndLoopInfo(Function &F); + unsigned getOffset(unsigned L, unsigned H) const; + void clearFunctionData(); /// \brief Map basic blocks to their computed weights. /// @@ -135,7 +163,7 @@ protected: EdgeWeightMap EdgeWeights; /// \brief Set of visited blocks during propagation. - SmallPtrSet VisitedBlocks; + SmallPtrSet VisitedBlocks; /// \brief Set of visited edges during propagation. SmallSet VisitedEdges; @@ -149,9 +177,9 @@ protected: EquivalenceClassMap EquivalenceClass; /// \brief Dominance, post-dominance and loop information. - DominatorTree *DT; - PostDominatorTree *PDT; - LoopInfo *LI; + std::unique_ptr DT; + std::unique_ptr> PDT; + std::unique_ptr LI; /// \brief Predecessors for each basic block in the CFG. BlockEdgeMap Predecessors; @@ -159,9 +187,6 @@ protected: /// \brief Successors for each basic block in the CFG. BlockEdgeMap Successors; - /// \brief LLVM context holding the debug data we need. - LLVMContext *Ctx; - /// \brief Profile reader object. std::unique_ptr Reader; @@ -173,7 +198,207 @@ protected: /// \brief Flag indicating whether the profile input loaded successfully. bool ProfileIsValid; + + /// \brief Total number of samples collected in this profile. + /// + /// This is the sum of all the samples collected in all the functions executed + /// at runtime. + uint64_t TotalCollectedSamples; }; + +class SampleCoverageTracker { +public: + SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {} + + bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset, + uint32_t Discriminator, uint64_t Samples); + unsigned computeCoverage(unsigned Used, unsigned Total) const; + unsigned countUsedRecords(const FunctionSamples *FS) const; + unsigned countBodyRecords(const FunctionSamples *FS) const; + uint64_t getTotalUsedSamples() const { return TotalUsedSamples; } + uint64_t countBodySamples(const FunctionSamples *FS) const; + void clear() { + SampleCoverage.clear(); + TotalUsedSamples = 0; + } + +private: + typedef std::map BodySampleCoverageMap; + typedef DenseMap + FunctionSamplesCoverageMap; + + /// Coverage map for sampling records. + /// + /// This map keeps a record of sampling records that have been matched to + /// an IR instruction. This is used to detect some form of staleness in + /// profiles (see flag -sample-profile-check-coverage). + /// + /// Each entry in the map corresponds to a FunctionSamples instance. This is + /// another map that counts how many times the sample record at the + /// given location has been used. + FunctionSamplesCoverageMap SampleCoverage; + + /// Number of samples used from the profile. + /// + /// When a sampling record is used for the first time, the samples from + /// that record are added to this accumulator. Coverage is later computed + /// based on the total number of samples available in this function and + /// its callsites. + /// + /// Note that this accumulator tracks samples used from a single function + /// and all the inlined callsites. Strictly, we should have a map of counters + /// keyed by FunctionSamples pointers, but these stats are cleared after + /// every function, so we just need to keep a single counter. + uint64_t TotalUsedSamples; +}; + +SampleCoverageTracker CoverageTracker; + +/// Return true if the given callsite is hot wrt to its caller. +/// +/// Functions that were inlined in the original binary will be represented +/// in the inline stack in the sample profile. If the profile shows that +/// the original inline decision was "good" (i.e., the callsite is executed +/// frequently), then we will recreate the inline decision and apply the +/// profile from the inlined callsite. +/// +/// To decide whether an inlined callsite is hot, we compute the fraction +/// of samples used by the callsite with respect to the total number of samples +/// collected in the caller. +/// +/// If that fraction is larger than the default given by +/// SampleProfileHotThreshold, the callsite will be inlined again. +bool callsiteIsHot(const FunctionSamples *CallerFS, + const FunctionSamples *CallsiteFS) { + if (!CallsiteFS) + return false; // The callsite was not inlined in the original binary. + + uint64_t ParentTotalSamples = CallerFS->getTotalSamples(); + if (ParentTotalSamples == 0) + return false; // Avoid division by zero. + + uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples(); + if (CallsiteTotalSamples == 0) + return false; // Callsite is trivially cold. + + double PercentSamples = + (double)CallsiteTotalSamples / (double)ParentTotalSamples * 100.0; + return PercentSamples >= SampleProfileHotThreshold; +} + +} + +/// Mark as used the sample record for the given function samples at +/// (LineOffset, Discriminator). +/// +/// \returns true if this is the first time we mark the given record. +bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS, + uint32_t LineOffset, + uint32_t Discriminator, + uint64_t Samples) { + LineLocation Loc(LineOffset, Discriminator); + unsigned &Count = SampleCoverage[FS][Loc]; + bool FirstTime = (++Count == 1); + if (FirstTime) + TotalUsedSamples += Samples; + return FirstTime; +} + +/// Return the number of sample records that were applied from this profile. +/// +/// This count does not include records from cold inlined callsites. +unsigned +SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const { + auto I = SampleCoverage.find(FS); + + // The size of the coverage map for FS represents the number of records + // that were marked used at least once. + unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0; + + // If there are inlined callsites in this function, count the samples found + // in the respective bodies. However, do not bother counting callees with 0 + // total samples, these are callees that were never invoked at runtime. + for (const auto &I : FS->getCallsiteSamples()) { + const FunctionSamples *CalleeSamples = &I.second; + if (callsiteIsHot(FS, CalleeSamples)) + Count += countUsedRecords(CalleeSamples); + } + + return Count; +} + +/// Return the number of sample records in the body of this profile. +/// +/// This count does not include records from cold inlined callsites. +unsigned +SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const { + unsigned Count = FS->getBodySamples().size(); + + // Only count records in hot callsites. + for (const auto &I : FS->getCallsiteSamples()) { + const FunctionSamples *CalleeSamples = &I.second; + if (callsiteIsHot(FS, CalleeSamples)) + Count += countBodyRecords(CalleeSamples); + } + + return Count; +} + +/// Return the number of samples collected in the body of this profile. +/// +/// This count does not include samples from cold inlined callsites. +uint64_t +SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const { + uint64_t Total = 0; + for (const auto &I : FS->getBodySamples()) + Total += I.second.getSamples(); + + // Only count samples in hot callsites. + for (const auto &I : FS->getCallsiteSamples()) { + const FunctionSamples *CalleeSamples = &I.second; + if (callsiteIsHot(FS, CalleeSamples)) + Total += countBodySamples(CalleeSamples); + } + + return Total; +} + +/// Return the fraction of sample records used in this profile. +/// +/// The returned value is an unsigned integer in the range 0-100 indicating +/// the percentage of sample records that were used while applying this +/// profile to the associated function. +unsigned SampleCoverageTracker::computeCoverage(unsigned Used, + unsigned Total) const { + assert(Used <= Total && + "number of used records cannot exceed the total number of records"); + return Total > 0 ? Used * 100 / Total : 100; +} + +/// Clear all the per-function data used to load samples and propagate weights. +void SampleProfileLoader::clearFunctionData() { + BlockWeights.clear(); + EdgeWeights.clear(); + VisitedBlocks.clear(); + VisitedEdges.clear(); + EquivalenceClass.clear(); + DT = nullptr; + PDT = nullptr; + LI = nullptr; + Predecessors.clear(); + Successors.clear(); + CoverageTracker.clear(); +} + +/// \brief Returns the offset of lineno \p L to head_lineno \p H +/// +/// \param L Lineno +/// \param H Header lineno of the function +/// +/// \returns offset to the header lineno. 16 bits are used to represent offset. +/// We assume that a single function will not exceed 65535 LOC. +unsigned SampleProfileLoader::getOffset(unsigned L, unsigned H) const { + return (L - H) & 0xffff; } /// \brief Print the weight of edge \p E on stream \p OS. @@ -190,8 +415,8 @@ void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) { /// \param OS Stream to emit the output to. /// \param BB Block to print. void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS, - BasicBlock *BB) { - BasicBlock *Equiv = EquivalenceClass[BB]; + const BasicBlock *BB) { + const BasicBlock *Equiv = EquivalenceClass[BB]; OS << "equivalence[" << BB->getName() << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n"; } @@ -200,8 +425,11 @@ void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS, /// /// \param OS Stream to emit the output to. /// \param BB Block to print. -void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) { - OS << "weight[" << BB->getName() << "]: " << BlockWeights[BB] << "\n"; +void SampleProfileLoader::printBlockWeight(raw_ostream &OS, + const BasicBlock *BB) const { + const auto &I = BlockWeights.find(BB); + uint64_t W = (I == BlockWeights.end() ? 0 : I->second); + OS << "weight[" << BB->getName() << "]: " << W << "\n"; } /// \brief Get the weight for an instruction. @@ -214,51 +442,67 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) { /// /// \param Inst Instruction to query. /// -/// \returns The profiled weight of I. -unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) { +/// \returns the weight of \p Inst. +ErrorOr +SampleProfileLoader::getInstWeight(const Instruction &Inst) const { DebugLoc DLoc = Inst.getDebugLoc(); if (!DLoc) - return 0; + return std::error_code(); - unsigned Lineno = DLoc.getLine(); - if (Lineno < HeaderLineno) - return 0; + const FunctionSamples *FS = findFunctionSamples(Inst); + if (!FS) + return std::error_code(); const DILocation *DIL = DLoc; - int LOffset = Lineno - HeaderLineno; - unsigned Discriminator = DIL->getDiscriminator(); - unsigned Weight = Samples->samplesAt(LOffset, Discriminator); - DEBUG(dbgs() << " " << Lineno << "." << Discriminator << ":" << Inst - << " (line offset: " << LOffset << "." << Discriminator - << " - weight: " << Weight << ")\n"); - return Weight; + unsigned Lineno = DLoc.getLine(); + unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine(); + + uint32_t LineOffset = getOffset(Lineno, HeaderLineno); + uint32_t Discriminator = DIL->getDiscriminator(); + ErrorOr R = FS->findSamplesAt(LineOffset, Discriminator); + if (R) { + bool FirstMark = + CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get()); + if (FirstMark) { + const Function *F = Inst.getParent()->getParent(); + LLVMContext &Ctx = F->getContext(); + emitOptimizationRemark( + Ctx, DEBUG_TYPE, *F, DLoc, + Twine("Applied ") + Twine(*R) + " samples from profile (offset: " + + Twine(LineOffset) + + ((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")"); + } + DEBUG(dbgs() << " " << Lineno << "." << DIL->getDiscriminator() << ":" + << Inst << " (line offset: " << Lineno - HeaderLineno << "." + << DIL->getDiscriminator() << " - weight: " << R.get() + << ")\n"); + } + return R; } /// \brief Compute the weight of a basic block. /// /// The weight of basic block \p BB is the maximum weight of all the -/// instructions in BB. The weight of \p BB is computed and cached in -/// the BlockWeights map. +/// instructions in BB. /// /// \param BB The basic block to query. /// -/// \returns The computed weight of BB. -unsigned SampleProfileLoader::getBlockWeight(BasicBlock *BB) { - // If we've computed BB's weight before, return it. - std::pair Entry = - BlockWeights.insert(std::make_pair(BB, 0)); - if (!Entry.second) - return Entry.first->second; - - // Otherwise, compute and cache BB's weight. - unsigned Weight = 0; +/// \returns the weight for \p BB. +ErrorOr +SampleProfileLoader::getBlockWeight(const BasicBlock *BB) const { + bool Found = false; + uint64_t Weight = 0; for (auto &I : BB->getInstList()) { - unsigned InstWeight = getInstWeight(I); - if (InstWeight > Weight) - Weight = InstWeight; + const ErrorOr &R = getInstWeight(I); + if (R && R.get() >= Weight) { + Weight = R.get(); + Found = true; + } } - Entry.first->second = Weight; - return Weight; + if (Found) + return Weight; + else + return std::error_code(); } /// \brief Compute and store the weights of every basic block. @@ -270,15 +514,199 @@ unsigned SampleProfileLoader::getBlockWeight(BasicBlock *BB) { bool SampleProfileLoader::computeBlockWeights(Function &F) { bool Changed = false; DEBUG(dbgs() << "Block weights\n"); - for (auto &BB : F) { - unsigned Weight = getBlockWeight(&BB); - Changed |= (Weight > 0); + for (const auto &BB : F) { + ErrorOr Weight = getBlockWeight(&BB); + if (Weight) { + BlockWeights[&BB] = Weight.get(); + VisitedBlocks.insert(&BB); + Changed = true; + } DEBUG(printBlockWeight(dbgs(), &BB)); } return Changed; } +/// \brief Get the FunctionSamples for a call instruction. +/// +/// The FunctionSamples of a call instruction \p Inst is the inlined +/// instance in which that call instruction is calling to. It contains +/// all samples that resides in the inlined instance. We first find the +/// inlined instance in which the call instruction is from, then we +/// traverse its children to find the callsite with the matching +/// location and callee function name. +/// +/// \param Inst Call instruction to query. +/// +/// \returns The FunctionSamples pointer to the inlined instance. +const FunctionSamples * +SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const { + const DILocation *DIL = Inst.getDebugLoc(); + if (!DIL) { + return nullptr; + } + DISubprogram *SP = DIL->getScope()->getSubprogram(); + if (!SP) + return nullptr; + + Function *CalleeFunc = Inst.getCalledFunction(); + if (!CalleeFunc) { + return nullptr; + } + + StringRef CalleeName = CalleeFunc->getName(); + const FunctionSamples *FS = findFunctionSamples(Inst); + if (FS == nullptr) + return nullptr; + + return FS->findFunctionSamplesAt( + CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()), + DIL->getDiscriminator(), CalleeName)); +} + +/// \brief Get the FunctionSamples for an instruction. +/// +/// The FunctionSamples of an instruction \p Inst is the inlined instance +/// in which that instruction is coming from. We traverse the inline stack +/// of that instruction, and match it with the tree nodes in the profile. +/// +/// \param Inst Instruction to query. +/// +/// \returns the FunctionSamples pointer to the inlined instance. +const FunctionSamples * +SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { + SmallVector S; + const DILocation *DIL = Inst.getDebugLoc(); + if (!DIL) { + return Samples; + } + StringRef CalleeName; + for (const DILocation *DIL = Inst.getDebugLoc(); DIL; + DIL = DIL->getInlinedAt()) { + DISubprogram *SP = DIL->getScope()->getSubprogram(); + if (!SP) + return nullptr; + if (!CalleeName.empty()) { + S.push_back(CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()), + DIL->getDiscriminator(), CalleeName)); + } + CalleeName = SP->getLinkageName(); + } + if (S.size() == 0) + return Samples; + const FunctionSamples *FS = Samples; + for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) { + FS = FS->findFunctionSamplesAt(S[i]); + } + return FS; +} + +/// \brief Emit an inline hint if \p F is globally hot or cold. +/// +/// If \p F consumes a significant fraction of samples (indicated by +/// SampleProfileGlobalHotThreshold), apply the InlineHint attribute for the +/// inliner to consider the function hot. +/// +/// If \p F consumes a small fraction of samples (indicated by +/// SampleProfileGlobalColdThreshold), apply the Cold attribute for the inliner +/// to consider the function cold. +/// +/// FIXME - This setting of inline hints is sub-optimal. Instead of marking a +/// function globally hot or cold, we should be annotating individual callsites. +/// This is not currently possible, but work on the inliner will eventually +/// provide this ability. See http://reviews.llvm.org/D15003 for details and +/// discussion. +/// +/// \returns True if either attribute was applied to \p F. +bool SampleProfileLoader::emitInlineHints(Function &F) { + if (TotalCollectedSamples == 0) + return false; + + uint64_t FunctionSamples = Samples->getTotalSamples(); + double SamplesPercent = + (double)FunctionSamples / (double)TotalCollectedSamples * 100.0; + + // If the function collected more samples than the hot threshold, mark + // it globally hot. + if (SamplesPercent >= SampleProfileGlobalHotThreshold) { + F.addFnAttr(llvm::Attribute::InlineHint); + std::string Msg; + raw_string_ostream S(Msg); + S << "Applied inline hint to globally hot function '" << F.getName() + << "' with " << format("%.2f", SamplesPercent) + << "% of samples (threshold: " + << format("%.2f", SampleProfileGlobalHotThreshold.getValue()) << "%)"; + S.flush(); + emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg); + return true; + } + + // If the function collected fewer samples than the cold threshold, mark + // it globally cold. + if (SamplesPercent <= SampleProfileGlobalColdThreshold) { + F.addFnAttr(llvm::Attribute::Cold); + std::string Msg; + raw_string_ostream S(Msg); + S << "Applied cold hint to globally cold function '" << F.getName() + << "' with " << format("%.2f", SamplesPercent) + << "% of samples (threshold: " + << format("%.2f", SampleProfileGlobalColdThreshold.getValue()) << "%)"; + S.flush(); + emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg); + return true; + } + + return false; +} + +/// \brief Iteratively inline hot callsites of a function. +/// +/// Iteratively traverse all callsites of the function \p F, and find if +/// the corresponding inlined instance exists and is hot in profile. If +/// it is hot enough, inline the callsites and adds new callsites of the +/// callee into the caller. +/// +/// TODO: investigate the possibility of not invoking InlineFunction directly. +/// +/// \param F function to perform iterative inlining. +/// +/// \returns True if there is any inline happened. +bool SampleProfileLoader::inlineHotFunctions(Function &F) { + bool Changed = false; + LLVMContext &Ctx = F.getContext(); + while (true) { + bool LocalChanged = false; + SmallVector CIS; + for (auto &BB : F) { + for (auto &I : BB.getInstList()) { + CallInst *CI = dyn_cast(&I); + if (CI && callsiteIsHot(Samples, findCalleeFunctionSamples(*CI))) + CIS.push_back(CI); + } + } + for (auto CI : CIS) { + InlineFunctionInfo IFI; + Function *CalledFunction = CI->getCalledFunction(); + DebugLoc DLoc = CI->getDebugLoc(); + uint64_t NumSamples = findCalleeFunctionSamples(*CI)->getTotalSamples(); + if (InlineFunction(CI, IFI)) { + LocalChanged = true; + emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc, + Twine("inlined hot callee '") + + CalledFunction->getName() + "' with " + + Twine(NumSamples) + " samples into '" + + F.getName() + "'"); + } + } + if (LocalChanged) { + Changed = true; + } else { + break; + } + } + return Changed; +} + /// \brief Find equivalence classes for the given block. /// /// This finds all the blocks that are guaranteed to execute the same @@ -305,12 +733,13 @@ bool SampleProfileLoader::computeBlockWeights(Function &F) { void SampleProfileLoader::findEquivalencesFor( BasicBlock *BB1, SmallVector Descendants, DominatorTreeBase *DomTree) { - for (auto *BB2 : Descendants) { + const BasicBlock *EC = EquivalenceClass[BB1]; + uint64_t Weight = BlockWeights[EC]; + for (const auto *BB2 : Descendants) { bool IsDomParent = DomTree->dominates(BB2, BB1); bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2); - if (BB1 != BB2 && VisitedBlocks.insert(BB2).second && IsDomParent && - IsInSameLoop) { - EquivalenceClass[BB2] = BB1; + if (BB1 != BB2 && IsDomParent && IsInSameLoop) { + EquivalenceClass[BB2] = EC; // If BB2 is heavier than BB1, make BB2 have the same weight // as BB1. @@ -320,11 +749,10 @@ void SampleProfileLoader::findEquivalencesFor( // during the propagation phase. Right now, we just want to // make sure that BB1 has the largest weight of all the // members of its equivalence set. - unsigned &BB1Weight = BlockWeights[BB1]; - unsigned &BB2Weight = BlockWeights[BB2]; - BB1Weight = std::max(BB1Weight, BB2Weight); + Weight = std::max(Weight, BlockWeights[BB2]); } } + BlockWeights[EC] = Weight; } /// \brief Find equivalence classes. @@ -364,19 +792,7 @@ void SampleProfileLoader::findEquivalenceClasses(Function &F) { // class by making BB2's equivalence class be BB1. DominatedBBs.clear(); DT->getDescendants(BB1, DominatedBBs); - findEquivalencesFor(BB1, DominatedBBs, PDT->DT); - - // Repeat the same logic for all the blocks post-dominated by BB1. - // We are looking for every basic block BB2 such that: - // - // 1- BB1 post-dominates BB2. - // 2- BB2 dominates BB1. - // 3- BB1 and BB2 are in the same loop nest. - // - // If all those conditions hold, BB2's equivalence class is BB1. - DominatedBBs.clear(); - PDT->getDescendants(BB1, DominatedBBs); - findEquivalencesFor(BB1, DominatedBBs, DT); + findEquivalencesFor(BB1, DominatedBBs, PDT.get()); DEBUG(printBlockEquivalence(dbgs(), BB1)); } @@ -389,8 +805,8 @@ void SampleProfileLoader::findEquivalenceClasses(Function &F) { // to all the blocks in that equivalence class. DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n"); for (auto &BI : F) { - BasicBlock *BB = &BI; - BasicBlock *EquivBB = EquivalenceClass[BB]; + const BasicBlock *BB = &BI; + const BasicBlock *EquivBB = EquivalenceClass[BB]; if (BB != EquivBB) BlockWeights[BB] = BlockWeights[EquivBB]; DEBUG(printBlockWeight(dbgs(), BB)); @@ -407,7 +823,7 @@ void SampleProfileLoader::findEquivalenceClasses(Function &F) { /// \param UnknownEdge Set if E has not been visited before. /// /// \returns E's weight, if known. Otherwise, return 0. -unsigned SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges, +uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge) { if (!VisitedEdges.count(E)) { (*NumUnknownEdges)++; @@ -432,8 +848,9 @@ unsigned SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges, bool SampleProfileLoader::propagateThroughEdges(Function &F) { bool Changed = false; DEBUG(dbgs() << "\nPropagation through edges\n"); - for (auto &BI : F) { - BasicBlock *BB = &BI; + for (const auto &BI : F) { + const BasicBlock *BB = &BI; + const BasicBlock *EC = EquivalenceClass[BB]; // Visit all the predecessor and successor edges to determine // which ones have a weight assigned already. Note that it doesn't @@ -441,7 +858,7 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) { // only case we are interested in handling is when only a single // edge is unknown (see setEdgeOrBlockWeight). for (unsigned i = 0; i < 2; i++) { - unsigned TotalWeight = 0; + uint64_t TotalWeight = 0; unsigned NumUnknownEdges = 0; Edge UnknownEdge, SelfReferentialEdge; @@ -485,7 +902,7 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) { // all edges will get a weight, or iteration will stop when // it reaches SampleProfileMaxPropagateIterations. if (NumUnknownEdges <= 1) { - unsigned &BBWeight = BlockWeights[BB]; + uint64_t &BBWeight = BlockWeights[EC]; if (NumUnknownEdges == 0) { // If we already know the weight of all edges, the weight of the // basic block can be computed. It should be no larger than the sum @@ -497,9 +914,9 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) { << " known. Set weight for block: "; printBlockWeight(dbgs(), BB);); } - if (VisitedBlocks.insert(BB).second) + if (VisitedBlocks.insert(EC).second) Changed = true; - } else if (NumUnknownEdges == 1 && VisitedBlocks.count(BB)) { + } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) { // If there is a single unknown edge and the block has been // visited, then we can compute E's weight. if (BBWeight >= TotalWeight) @@ -511,8 +928,8 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) { DEBUG(dbgs() << "Set weight for edge: "; printEdgeWeight(dbgs(), UnknownEdge)); } - } else if (SelfReferentialEdge.first && VisitedBlocks.count(BB)) { - unsigned &BBWeight = BlockWeights[BB]; + } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) { + uint64_t &BBWeight = BlockWeights[BB]; // We have a self-referential edge and the weight of BB is known. if (BBWeight >= TotalWeight) EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight; @@ -578,7 +995,7 @@ void SampleProfileLoader::buildEdges(Function &F) { /// known). void SampleProfileLoader::propagateWeights(Function &F) { bool Changed = true; - unsigned i = 0; + unsigned I = 0; // Add an entry count to the function using the samples gathered // at the function entry. @@ -592,14 +1009,15 @@ void SampleProfileLoader::propagateWeights(Function &F) { buildEdges(F); // Propagate until we converge or we go past the iteration limit. - while (Changed && i++ < SampleProfileMaxPropagateIterations) { + while (Changed && I++ < SampleProfileMaxPropagateIterations) { Changed = propagateThroughEdges(F); } // Generate MD_prof metadata for every branch instruction using the // edge weights computed during propagation. DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n"); - MDBuilder MDB(F.getContext()); + LLVMContext &Ctx = F.getContext(); + MDBuilder MDB(Ctx); for (auto &BI : F) { BasicBlock *BB = &BI; TerminatorInst *TI = BB->getTerminator(); @@ -610,24 +1028,44 @@ void SampleProfileLoader::propagateWeights(Function &F) { DEBUG(dbgs() << "\nGetting weights for branch at line " << TI->getDebugLoc().getLine() << ".\n"); - SmallVector Weights; - bool AllWeightsZero = true; + SmallVector Weights; + uint32_t MaxWeight = 0; + DebugLoc MaxDestLoc; for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) { BasicBlock *Succ = TI->getSuccessor(I); Edge E = std::make_pair(BB, Succ); - unsigned Weight = EdgeWeights[E]; + uint64_t Weight = EdgeWeights[E]; DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E)); - Weights.push_back(Weight); - if (Weight != 0) - AllWeightsZero = false; + // Use uint32_t saturated arithmetic to adjust the incoming weights, + // if needed. Sample counts in profiles are 64-bit unsigned values, + // but internally branch weights are expressed as 32-bit values. + if (Weight > std::numeric_limits::max()) { + DEBUG(dbgs() << " (saturated due to uint32_t overflow)"); + Weight = std::numeric_limits::max(); + } + Weights.push_back(static_cast(Weight)); + if (Weight != 0) { + if (Weight > MaxWeight) { + MaxWeight = Weight; + MaxDestLoc = Succ->getFirstNonPHIOrDbgOrLifetime()->getDebugLoc(); + } + } } // Only set weights if there is at least one non-zero weight. // In any other case, let the analyzer set weights. - if (!AllWeightsZero) { + if (MaxWeight > 0) { DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n"); TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); + DebugLoc BranchLoc = TI->getDebugLoc(); + emitOptimizationRemark( + Ctx, DEBUG_TYPE, F, MaxDestLoc, + Twine("most popular destination for conditional branches at ") + + ((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" + + Twine(BranchLoc.getLine()) + ":" + + Twine(BranchLoc.getCol())) + : Twine(""))); } else { DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n"); } @@ -649,7 +1087,7 @@ unsigned SampleProfileLoader::getFunctionLoc(Function &F) { if (DISubprogram *S = getDISubprogram(&F)) return S->getLine(); - // If could not find the start of \p F, emit a diagnostic to inform the user + // If the start of \p F is missing, emit a diagnostic to inform the user // about the missed opportunity. F.getContext().diagnose(DiagnosticInfoSampleProfile( "No debug information found in function " + F.getName() + @@ -658,6 +1096,17 @@ unsigned SampleProfileLoader::getFunctionLoc(Function &F) { return 0; } +void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) { + DT.reset(new DominatorTree); + DT->recalculate(F); + + PDT.reset(new DominatorTreeBase(true)); + PDT->recalculate(F); + + LI.reset(new LoopInfo); + LI->analyze(*DT); +} + /// \brief Generate branch weight metadata for all branches in \p F. /// /// Branch weights are computed out of instruction samples using a @@ -710,18 +1159,23 @@ unsigned SampleProfileLoader::getFunctionLoc(Function &F) { bool SampleProfileLoader::emitAnnotations(Function &F) { bool Changed = false; - // Initialize invariants used during computation and propagation. - HeaderLineno = getFunctionLoc(F); - if (HeaderLineno == 0) + if (getFunctionLoc(F) == 0) return false; DEBUG(dbgs() << "Line number for the first instruction in " << F.getName() - << ": " << HeaderLineno << "\n"); + << ": " << getFunctionLoc(F) << "\n"); + + Changed |= emitInlineHints(F); + + Changed |= inlineHotFunctions(F); // Compute basic block weights. Changed |= computeBlockWeights(F); if (Changed) { + // Compute dominance and loop info needed for propagation. + computeDominanceAndLoopInfo(F); + // Find equivalence classes. findEquivalenceClasses(F); @@ -729,24 +1183,48 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { propagateWeights(F); } + // If coverage checking was requested, compute it now. + if (SampleProfileRecordCoverage) { + unsigned Used = CoverageTracker.countUsedRecords(Samples); + unsigned Total = CoverageTracker.countBodyRecords(Samples); + unsigned Coverage = CoverageTracker.computeCoverage(Used, Total); + if (Coverage < SampleProfileRecordCoverage) { + F.getContext().diagnose(DiagnosticInfoSampleProfile( + getDISubprogram(&F)->getFilename(), getFunctionLoc(F), + Twine(Used) + " of " + Twine(Total) + " available profile records (" + + Twine(Coverage) + "%) were applied", + DS_Warning)); + } + } + + if (SampleProfileSampleCoverage) { + uint64_t Used = CoverageTracker.getTotalUsedSamples(); + uint64_t Total = CoverageTracker.countBodySamples(Samples); + unsigned Coverage = CoverageTracker.computeCoverage(Used, Total); + if (Coverage < SampleProfileSampleCoverage) { + F.getContext().diagnose(DiagnosticInfoSampleProfile( + getDISubprogram(&F)->getFilename(), getFunctionLoc(F), + Twine(Used) + " of " + Twine(Total) + " available profile samples (" + + Twine(Coverage) + "%) were applied", + DS_Warning)); + } + } return Changed; } char SampleProfileLoader::ID = 0; INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile", "Sample Profile loader", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AddDiscriminators) INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile", "Sample Profile loader", false, false) bool SampleProfileLoader::doInitialization(Module &M) { - auto ReaderOrErr = SampleProfileReader::create(Filename, M.getContext()); + auto &Ctx = M.getContext(); + auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx); if (std::error_code EC = ReaderOrErr.getError()) { std::string Msg = "Could not open profile: " + EC.message(); - M.getContext().diagnose(DiagnosticInfoSampleProfile(Filename.data(), Msg)); + Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); return false; } Reader = std::move(ReaderOrErr.get()); @@ -754,22 +1232,32 @@ bool SampleProfileLoader::doInitialization(Module &M) { return true; } -FunctionPass *llvm::createSampleProfileLoaderPass() { +ModulePass *llvm::createSampleProfileLoaderPass() { return new SampleProfileLoader(SampleProfileFile); } -FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) { +ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { return new SampleProfileLoader(Name); } -bool SampleProfileLoader::runOnFunction(Function &F) { +bool SampleProfileLoader::runOnModule(Module &M) { if (!ProfileIsValid) return false; - DT = &getAnalysis().getDomTree(); - PDT = &getAnalysis(); - LI = &getAnalysis().getLoopInfo(); - Ctx = &F.getParent()->getContext(); + // Compute the total number of samples collected in this profile. + for (const auto &I : Reader->getProfiles()) + TotalCollectedSamples += I.second.getTotalSamples(); + + bool retval = false; + for (auto &F : M) + if (!F.isDeclaration()) { + clearFunctionData(); + retval |= runOnFunction(F); + } + return retval; +} + +bool SampleProfileLoader::runOnFunction(Function &F) { Samples = Reader->getSamplesFor(F); if (!Samples->empty()) return emitAnnotations(F); diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp index 956991ad1f95..c94cc7c74a89 100644 --- a/lib/Transforms/IPO/StripDeadPrototypes.cpp +++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp @@ -7,47 +7,31 @@ // //===----------------------------------------------------------------------===// // -// This pass loops over all of the functions in the input module, looking for +// This pass loops over all of the functions in the input module, looking for // dead declarations and removes them. Dead declarations are declarations of // functions for which no implementation is available (i.e., declarations for // unused library functions). // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/StripDeadPrototypes.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" + using namespace llvm; #define DEBUG_TYPE "strip-dead-prototypes" STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed"); -namespace { - -/// @brief Pass to remove unused function declarations. -class StripDeadPrototypesPass : public ModulePass { -public: - static char ID; // Pass identification, replacement for typeid - StripDeadPrototypesPass() : ModulePass(ID) { - initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry()); - } - bool runOnModule(Module &M) override; -}; - -} // end anonymous namespace - -char StripDeadPrototypesPass::ID = 0; -INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes", - "Strip Unused Function Prototypes", false, false) - -bool StripDeadPrototypesPass::runOnModule(Module &M) { +static bool stripDeadPrototypes(Module &M) { bool MadeChange = false; - + // Erase dead function prototypes. for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { - Function *F = I++; + Function *F = &*I++; // Function must be a prototype and unused. if (F->isDeclaration() && F->use_empty()) { F->eraseFromParent(); @@ -59,16 +43,42 @@ bool StripDeadPrototypesPass::runOnModule(Module &M) { // Erase dead global var prototypes. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ) { - GlobalVariable *GV = I++; + GlobalVariable *GV = &*I++; // Global must be a prototype and unused. if (GV->isDeclaration() && GV->use_empty()) GV->eraseFromParent(); } - + // Return an indication of whether we changed anything or not. return MadeChange; } -ModulePass *llvm::createStripDeadPrototypesPass() { - return new StripDeadPrototypesPass(); +PreservedAnalyses StripDeadPrototypesPass::run(Module &M) { + if (stripDeadPrototypes(M)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); +} + +namespace { + +class StripDeadPrototypesLegacyPass : public ModulePass { +public: + static char ID; // Pass identification, replacement for typeid + StripDeadPrototypesLegacyPass() : ModulePass(ID) { + initializeStripDeadPrototypesLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M) override { + return stripDeadPrototypes(M); + } +}; + +} // end anonymous namespace + +char StripDeadPrototypesLegacyPass::ID = 0; +INITIALIZE_PASS(StripDeadPrototypesLegacyPass, "strip-dead-prototypes", + "Strip Unused Function Prototypes", false, false) + +ModulePass *llvm::createStripDeadPrototypesPass() { + return new StripDeadPrototypesLegacyPass(); } diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index a4f30c58f936..46f352f7f9f1 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -211,13 +211,13 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { - if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0) + if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0) if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg")) I->setName(""); // Internal symbols can't participate in linkage } for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0) + if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0) if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg")) I->setName(""); // Internal symbols can't participate in linkage StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo); @@ -305,6 +305,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { SmallVector LiveSubprograms; DenseSet VisitedSet; + std::set LiveSPs; + for (Function &F : M) { + if (DISubprogram *SP = F.getSubprogram()) + LiveSPs.insert(SP); + } + for (DICompileUnit *DIC : F.compile_units()) { // Create our live subprogram list. bool SubprogramChange = false; @@ -314,7 +320,7 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { continue; // If the function referenced by DISP is not null, the function is live. - if (DISP->getFunction()) + if (LiveSPs.count(DISP)) LiveSubprograms.push_back(DISP); else SubprogramChange = true; diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 2d2c109f3243..6f49399f57bf 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1,4 +1,4 @@ -//===- InstCombineAddSub.cpp ----------------------------------------------===// +//===- InstCombineAddSub.cpp ------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,6 +17,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/PatternMatch.h" + using namespace llvm; using namespace PatternMatch; @@ -67,17 +68,17 @@ namespace { private: bool insaneIntVal(int V) { return V > 4 || V < -4; } - APFloat *getFpValPtr(void) + APFloat *getFpValPtr() { return reinterpret_cast(&FpValBuf.buffer[0]); } - const APFloat *getFpValPtr(void) const + const APFloat *getFpValPtr() const { return reinterpret_cast(&FpValBuf.buffer[0]); } - const APFloat &getFpVal(void) const { + const APFloat &getFpVal() const { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); } - APFloat &getFpVal(void) { + APFloat &getFpVal() { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); } @@ -92,8 +93,8 @@ namespace { // TODO: We should get rid of this function when APFloat can be constructed // from an *SIGNED* integer. APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val); - private: + private: bool IsFp; // True iff FpValBuf contains an instance of APFloat. @@ -114,10 +115,10 @@ namespace { /// class FAddend { public: - FAddend() { Val = nullptr; } + FAddend() : Val(nullptr) {} - Value *getSymVal (void) const { return Val; } - const FAddendCoef &getCoef(void) const { return Coeff; } + Value *getSymVal() const { return Val; } + const FAddendCoef &getCoef() const { return Coeff; } bool isConstant() const { return Val == nullptr; } bool isZero() const { return Coeff.isZero(); } @@ -182,7 +183,6 @@ namespace { InstCombiner::BuilderTy *Builder; Instruction *Instr; - private: // Debugging stuff are clustered here. #ifndef NDEBUG unsigned CreateInstrNum; @@ -193,7 +193,8 @@ namespace { void incCreateInstNum() {} #endif }; -} + +} // anonymous namespace //===----------------------------------------------------------------------===// // @@ -602,7 +603,6 @@ Value *FAddCombine::simplify(Instruction *I) { } Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { - unsigned AddendNum = Addends.size(); assert(AddendNum <= 4 && "Too many addends"); @@ -886,7 +886,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero, return Op0ZeroPosition >= Op1OnePosition; } -/// WillNotOverflowSignedAdd - Return true if we can prove that: +/// Return true if we can prove that: /// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) /// This basically requires proving that the add in the original type would not /// overflow to change the sign bit or have a carry out. @@ -1118,8 +1118,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C, // transform them into (X + (signbit ^ C)) if (XorRHS->getValue().isSignBit()) - return BinaryOperator::CreateAdd(XorLHS, - ConstantExpr::getXor(XorRHS, CI)); + return BinaryOperator::CreateAdd(XorLHS, + ConstantExpr::getXor(XorRHS, CI)); } } @@ -1421,7 +1421,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { return Changed ? &I : nullptr; } - /// Optimize pointer differences into the same array into a size. Consider: /// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. @@ -1589,7 +1588,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { } } - { Value *Y; // X-(X+Y) == -Y X-(Y+X) == -Y @@ -1611,32 +1609,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return BinaryOperator::CreateAnd(A, B); } - // (sub (select (a, c, b)), (select (a, d, b))) -> (select (a, (sub c, d), 0)) - // (sub (select (a, b, c)), (select (a, b, d))) -> (select (a, 0, (sub c, d))) - if (auto *SI0 = dyn_cast(Op0)) { - if (auto *SI1 = dyn_cast(Op1)) { - if (SI0->getCondition() == SI1->getCondition()) { - if (Value *V = SimplifySubInst( - SI0->getFalseValue(), SI1->getFalseValue(), I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, TLI, DT, AC)) - return SelectInst::Create( - SI0->getCondition(), - Builder->CreateSub(SI0->getTrueValue(), SI1->getTrueValue(), "", - /*HasNUW=*/I.hasNoUnsignedWrap(), - /*HasNSW=*/I.hasNoSignedWrap()), - V); - if (Value *V = SimplifySubInst(SI0->getTrueValue(), SI1->getTrueValue(), - I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, TLI, DT, AC)) - return SelectInst::Create( - SI0->getCondition(), V, - Builder->CreateSub(SI0->getFalseValue(), SI1->getFalseValue(), "", - /*HasNUW=*/I.hasNoUnsignedWrap(), - /*HasNSW=*/I.hasNoSignedWrap())); - } - } - } - if (Op0->hasOneUse()) { Value *Y = nullptr; // ((X | Y) - X) --> (~X & Y) diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 15e0889b51b7..95c50d32c820 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -37,9 +37,9 @@ static inline Value *dyn_castNotVal(Value *V) { return nullptr; } -/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp -/// predicate into a three bit mask. It also returns whether it is an ordered -/// predicate by reference. +/// Similar to getICmpCode but for FCmpInst. This encodes a fcmp predicate into +/// a three bit mask. It also returns whether it is an ordered predicate by +/// reference. static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { isOrdered = false; switch (CC) { @@ -64,10 +64,10 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { } } -/// getNewICmpValue - This is the complement of getICmpCode, which turns an -/// opcode and two operands into either a constant true or false, or a brand -/// new ICmp instruction. The sign is passed in to determine which kind -/// of predicate to use in the new icmp instruction. +/// This is the complement of getICmpCode, which turns an opcode and two +/// operands into either a constant true or false, or a brand new ICmp +/// instruction. The sign is passed in to determine which kind of predicate to +/// use in the new icmp instruction. static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, InstCombiner::BuilderTy *Builder) { ICmpInst::Predicate NewPred; @@ -76,9 +76,9 @@ static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, return Builder->CreateICmp(NewPred, LHS, RHS); } -/// getFCmpValue - This is the complement of getFCmpCode, which turns an -/// opcode and two operands into either a FCmp instruction. isordered is passed -/// in to determine which kind of predicate to use in the new fcmp instruction. +/// This is the complement of getFCmpCode, which turns an opcode and two +/// operands into either a FCmp instruction. isordered is passed in to determine +/// which kind of predicate to use in the new fcmp instruction. static Value *getFCmpValue(bool isordered, unsigned code, Value *LHS, Value *RHS, InstCombiner::BuilderTy *Builder) { @@ -150,14 +150,13 @@ Value *InstCombiner::SimplifyBSwap(BinaryOperator &I) { else //if (Op == Instruction::Xor) BinOp = Builder->CreateXor(NewLHS, NewRHS); - Module *M = I.getParent()->getParent()->getParent(); - Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy); + Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, ITy); return Builder->CreateCall(F, BinOp); } -// OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where -// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is -// guaranteed to be a binary operator. +/// This handles expressions of the form ((val OP C1) & C2). Where +/// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is +/// guaranteed to be a binary operator. Instruction *InstCombiner::OptAndOp(Instruction *Op, ConstantInt *OpRHS, ConstantInt *AndRHS, @@ -341,10 +340,10 @@ Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, return Builder->CreateICmpUGT(Add, LowerBound); } -// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with -// any number of 0s on either side. The 1s are allowed to wrap from LSB to -// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is -// not, since all 1s are not contiguous. +/// Returns true iff Val consists of one contiguous run of 1s with any number +/// of 0s on either side. The 1s are allowed to wrap from LSB to MSB, +/// so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is +/// not, since all 1s are not contiguous. static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { const APInt& V = Val->getValue(); uint32_t BitWidth = Val->getType()->getBitWidth(); @@ -357,9 +356,8 @@ static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { return true; } -/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask, -/// where isSub determines whether the operator is a sub. If we can fold one of -/// the following xforms: +/// This is part of an expression (LHS +/- RHS) & Mask, where isSub determines +/// whether the operator is a sub. If we can fold one of the following xforms: /// /// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask /// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 @@ -449,8 +447,8 @@ enum MaskedICmpType { FoldMskICmp_BMask_NotMixed = 512 }; -/// return the set of pattern classes (from MaskedICmpType) -/// that (icmp SCC (A & B), C) satisfies +/// Return the set of pattern classes (from MaskedICmpType) +/// that (icmp SCC (A & B), C) satisfies. static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, ICmpInst::Predicate SCC) { @@ -538,8 +536,8 @@ static unsigned conjugateICmpMask(unsigned Mask) { return NewMask; } -/// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z) -/// if possible. The returned predicate is either == or !=. Returns false if +/// Decompose an icmp into the form ((X & Y) pred Z) if possible. +/// The returned predicate is either == or !=. Returns false if /// decomposition fails. static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred, Value *&X, Value *&Y, Value *&Z) { @@ -585,10 +583,9 @@ static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred, return true; } -/// foldLogOpOfMaskedICmpsHelper: -/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) -/// return the set of pattern classes (from MaskedICmpType) -/// that both LHS and RHS satisfy +/// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) +/// Return the set of pattern classes (from MaskedICmpType) +/// that both LHS and RHS satisfy. static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, Value*& B, Value*& C, Value*& D, Value*& E, @@ -700,9 +697,9 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC); return left_type & right_type; } -/// foldLogOpOfMaskedICmps: -/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) -/// into a single (icmp(A & X) ==/!= Y) + +/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) +/// into a single (icmp(A & X) ==/!= Y). static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, llvm::InstCombiner::BuilderTy *Builder) { Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; @@ -879,7 +876,7 @@ Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, return Builder->CreateICmp(NewPred, Input, RangeEnd); } -/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. +/// Fold (icmp)&(icmp) if possible. Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); @@ -1123,9 +1120,8 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { return nullptr; } -/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of -/// instcombine, this returns a Value which should already be inserted into the -/// function. +/// Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of instcombine, this returns +/// a Value which should already be inserted into the function. Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { if (LHS->getPredicate() == FCmpInst::FCMP_ORD && RHS->getPredicate() == FCmpInst::FCMP_ORD) { @@ -1203,6 +1199,54 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { return nullptr; } +/// Match De Morgan's Laws: +/// (~A & ~B) == (~(A | B)) +/// (~A | ~B) == (~(A & B)) +static Instruction *matchDeMorgansLaws(BinaryOperator &I, + InstCombiner::BuilderTy *Builder) { + auto Opcode = I.getOpcode(); + assert((Opcode == Instruction::And || Opcode == Instruction::Or) && + "Trying to match De Morgan's Laws with something other than and/or"); + // Flip the logic operation. + if (Opcode == Instruction::And) + Opcode = Instruction::Or; + else + Opcode = Instruction::And; + + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + // TODO: Use pattern matchers instead of dyn_cast. + if (Value *Op0NotVal = dyn_castNotVal(Op0)) + if (Value *Op1NotVal = dyn_castNotVal(Op1)) + if (Op0->hasOneUse() && Op1->hasOneUse()) { + Value *LogicOp = Builder->CreateBinOp(Opcode, Op0NotVal, Op1NotVal, + I.getName() + ".demorgan"); + return BinaryOperator::CreateNot(LogicOp); + } + + // De Morgan's Law in disguise: + // (zext(bool A) ^ 1) & (zext(bool B) ^ 1) -> zext(~(A | B)) + // (zext(bool A) ^ 1) | (zext(bool B) ^ 1) -> zext(~(A & B)) + Value *A = nullptr; + Value *B = nullptr; + ConstantInt *C1 = nullptr; + if (match(Op0, m_OneUse(m_Xor(m_ZExt(m_Value(A)), m_ConstantInt(C1)))) && + match(Op1, m_OneUse(m_Xor(m_ZExt(m_Value(B)), m_Specific(C1))))) { + // TODO: This check could be loosened to handle different type sizes. + // Alternatively, we could fix the definition of m_Not to recognize a not + // operation hidden by a zext? + if (A->getType()->isIntegerTy(1) && B->getType()->isIntegerTy(1) && + C1->isOne()) { + Value *LogicOp = Builder->CreateBinOp(Opcode, A, B, + I.getName() + ".demorgan"); + Value *Not = Builder->CreateNot(LogicOp); + return CastInst::CreateZExtOrBitCast(Not, I.getType()); + } + } + + return nullptr; +} + Instruction *InstCombiner::visitAnd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -1273,6 +1317,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I)) return BinaryOperator::CreateAnd(V, AndRHS); + // -x & 1 -> x & 1 + if (AndRHSMask == 1 && match(Op0LHS, m_Zero())) + return BinaryOperator::CreateAnd(Op0RHS, AndRHS); + // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS // has 1's for all bits that the subtraction with A might affect. if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) { @@ -1329,15 +1377,8 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return NV; } - - // (~A & ~B) == (~(A | B)) - De Morgan's Law - if (Value *Op0NotVal = dyn_castNotVal(Op0)) - if (Value *Op1NotVal = dyn_castNotVal(Op1)) - if (Op0->hasOneUse() && Op1->hasOneUse()) { - Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, - I.getName()+".demorgan"); - return BinaryOperator::CreateNot(Or); - } + if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder)) + return DeMorgan; { Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; @@ -1446,14 +1487,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return ReplaceInstUsesWith(I, Res); - // fold (and (cast A), (cast B)) -> (cast (and A, B)) - if (CastInst *Op0C = dyn_cast(Op0)) + if (CastInst *Op0C = dyn_cast(Op0)) { + Value *Op0COp = Op0C->getOperand(0); + Type *SrcTy = Op0COp->getType(); + // fold (and (cast A), (cast B)) -> (cast (and A, B)) if (CastInst *Op1C = dyn_cast(Op1)) { - Type *SrcTy = Op0C->getOperand(0)->getType(); if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ? SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntOrIntVectorTy()) { - Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); + Value *Op1COp = Op1C->getOperand(0); // Only do this if the casts both really cause code to be generated. if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && @@ -1478,6 +1520,20 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } } + // If we are masking off the sign bit of a floating-point value, convert + // this to the canonical fabs intrinsic call and cast back to integer. + // The backend should know how to optimize fabs(). + // TODO: This transform should also apply to vectors. + ConstantInt *CI; + if (isa(Op0C) && SrcTy->isFloatingPointTy() && + match(Op1, m_ConstantInt(CI)) && CI->isMaxValue(true)) { + Module *M = I.getModule(); + Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, SrcTy); + Value *Call = Builder->CreateCall(Fabs, Op0COp, "fabs"); + return CastInst::CreateBitOrPointerCast(Call, I.getType()); + } + } + { Value *X = nullptr; bool OpsSwapped = false; @@ -1509,163 +1565,195 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return Changed ? &I : nullptr; } -/// CollectBSwapParts - Analyze the specified subexpression and see if it is -/// capable of providing pieces of a bswap. The subexpression provides pieces -/// of a bswap if it is proven that each of the non-zero bytes in the output of -/// the expression came from the corresponding "byte swapped" byte in some other -/// value. For example, if the current subexpression is "(shl i32 %X, 24)" then -/// we know that the expression deposits the low byte of %X into the high byte -/// of the bswap result and that all other bytes are zero. This expression is -/// accepted, the high byte of ByteValues is set to X to indicate a correct -/// match. + +/// Analyze the specified subexpression and see if it is capable of providing +/// pieces of a bswap or bitreverse. The subexpression provides a potential +/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in +/// the output of the expression came from a corresponding bit in some other +/// value. This function is recursive, and the end result is a mapping of +/// (value, bitnumber) to bitnumber. It is the caller's responsibility to +/// validate that all `value`s are identical and that the bitnumber to bitnumber +/// mapping is correct for a bswap or bitreverse. +/// +/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know +/// that the expression deposits the low byte of %X into the high byte of the +/// result and that all other bits are zero. This expression is accepted, +/// BitValues[24-31] are set to %X and BitProvenance[24-31] are set to [0-7]. /// /// This function returns true if the match was unsuccessful and false if so. /// On entry to the function the "OverallLeftShift" is a signed integer value -/// indicating the number of bytes that the subexpression is later shifted. For +/// indicating the number of bits that the subexpression is later shifted. For /// example, if the expression is later right shifted by 16 bits, the -/// OverallLeftShift value would be -2 on entry. This is used to specify which -/// byte of ByteValues is actually being set. +/// OverallLeftShift value would be -16 on entry. This is used to specify which +/// bits of BitValues are actually being set. /// -/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding -/// byte is masked to zero by a user. For example, in (X & 255), X will be -/// processed with a bytemask of 1. Because bytemask is 32-bits, this limits -/// this function to working on up to 32-byte (256 bit) values. ByteMask is -/// always in the local (OverallLeftShift) coordinate space. +/// Similarly, BitMask is a bitmask where a bit is clear if its corresponding +/// bit is masked to zero by a user. For example, in (X & 255), X will be +/// processed with a bytemask of 255. BitMask is always in the local +/// (OverallLeftShift) coordinate space. /// -static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, - SmallVectorImpl &ByteValues) { +static bool CollectBitParts(Value *V, int OverallLeftShift, APInt BitMask, + SmallVectorImpl &BitValues, + SmallVectorImpl &BitProvenance) { if (Instruction *I = dyn_cast(V)) { // If this is an or instruction, it may be an inner node of the bswap. - if (I->getOpcode() == Instruction::Or) { - return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, - ByteValues) || - CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask, - ByteValues); - } + if (I->getOpcode() == Instruction::Or) + return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, + BitValues, BitProvenance) || + CollectBitParts(I->getOperand(1), OverallLeftShift, BitMask, + BitValues, BitProvenance); - // If this is a logical shift by a constant multiple of 8, recurse with - // OverallLeftShift and ByteMask adjusted. + // If this is a logical shift by a constant, recurse with OverallLeftShift + // and BitMask adjusted. if (I->isLogicalShift() && isa(I->getOperand(1))) { unsigned ShAmt = - cast(I->getOperand(1))->getLimitedValue(~0U); - // Ensure the shift amount is defined and of a byte value. - if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size())) + cast(I->getOperand(1))->getLimitedValue(~0U); + // Ensure the shift amount is defined. + if (ShAmt > BitValues.size()) return true; - unsigned ByteShift = ShAmt >> 3; + unsigned BitShift = ShAmt; if (I->getOpcode() == Instruction::Shl) { - // X << 2 -> collect(X, +2) - OverallLeftShift += ByteShift; - ByteMask >>= ByteShift; + // X << C -> collect(X, +C) + OverallLeftShift += BitShift; + BitMask = BitMask.lshr(BitShift); } else { - // X >>u 2 -> collect(X, -2) - OverallLeftShift -= ByteShift; - ByteMask <<= ByteShift; - ByteMask &= (~0U >> (32-ByteValues.size())); + // X >>u C -> collect(X, -C) + OverallLeftShift -= BitShift; + BitMask = BitMask.shl(BitShift); } - if (OverallLeftShift >= (int)ByteValues.size()) return true; - if (OverallLeftShift <= -(int)ByteValues.size()) return true; + if (OverallLeftShift >= (int)BitValues.size()) + return true; + if (OverallLeftShift <= -(int)BitValues.size()) + return true; - return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, - ByteValues); + return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, + BitValues, BitProvenance); } - // If this is a logical 'and' with a mask that clears bytes, clear the - // corresponding bytes in ByteMask. + // If this is a logical 'and' with a mask that clears bits, clear the + // corresponding bits in BitMask. if (I->getOpcode() == Instruction::And && isa(I->getOperand(1))) { - // Scan every byte of the and mask, seeing if the byte is either 0 or 255. - unsigned NumBytes = ByteValues.size(); - APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255); + unsigned NumBits = BitValues.size(); + APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1); const APInt &AndMask = cast(I->getOperand(1))->getValue(); - for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) { - // If this byte is masked out by a later operation, we don't care what + for (unsigned i = 0; i != NumBits; ++i, Bit <<= 1) { + // If this bit is masked out by a later operation, we don't care what // the and mask is. - if ((ByteMask & (1 << i)) == 0) + if (BitMask[i] == 0) continue; - // If the AndMask is all zeros for this byte, clear the bit. - APInt MaskB = AndMask & Byte; + // If the AndMask is zero for this bit, clear the bit. + APInt MaskB = AndMask & Bit; if (MaskB == 0) { - ByteMask &= ~(1U << i); + BitMask.clearBit(i); continue; } - // If the AndMask is not all ones for this byte, it's not a bytezap. - if (MaskB != Byte) - return true; - - // Otherwise, this byte is kept. + // Otherwise, this bit is kept. } - return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, - ByteValues); + return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, + BitValues, BitProvenance); } } // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be - // the input value to the bswap. Some observations: 1) if more than one byte - // is demanded from this input, then it could not be successfully assembled - // into a byteswap. At least one of the two bytes would not be aligned with - // their ultimate destination. - if (!isPowerOf2_32(ByteMask)) return true; - unsigned InputByteNo = countTrailingZeros(ByteMask); - - // 2) The input and ultimate destinations must line up: if byte 3 of an i32 - // is demanded, it needs to go into byte 0 of the result. This means that the - // byte needs to be shifted until it lands in the right byte bucket. The - // shift amount depends on the position: if the byte is coming from the high - // part of the value (e.g. byte 3) then it must be shifted right. If from the - // low part, it must be shifted left. - unsigned DestByteNo = InputByteNo + OverallLeftShift; - if (ByteValues.size()-1-DestByteNo != InputByteNo) + // the input value to the bswap/bitreverse. To be part of a bswap or + // bitreverse we must be demanding a contiguous range of bits from it. + unsigned InputBitLen = BitMask.countPopulation(); + unsigned InputBitNo = BitMask.countTrailingZeros(); + if (BitMask.getBitWidth() - BitMask.countLeadingZeros() - InputBitNo != + InputBitLen) + // Not a contiguous set range of bits! return true; - // If the destination byte value is already defined, the values are or'd - // together, which isn't a bswap (unless it's an or of the same bits). - if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V) + // We know we're moving a contiguous range of bits from the input to the + // output. Record which bits in the output came from which bits in the input. + unsigned DestBitNo = InputBitNo + OverallLeftShift; + for (unsigned I = 0; I < InputBitLen; ++I) + BitProvenance[DestBitNo + I] = InputBitNo + I; + + // If the destination bit value is already defined, the values are or'd + // together, which isn't a bswap/bitreverse (unless it's an or of the same + // bits). + if (BitValues[DestBitNo] && BitValues[DestBitNo] != V) return true; - ByteValues[DestByteNo] = V; + for (unsigned I = 0; I < InputBitLen; ++I) + BitValues[DestBitNo + I] = V; + return false; } -/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. -/// If so, insert the new bswap intrinsic and return it. -Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { +static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To, + unsigned BitWidth) { + if (From % 8 != To % 8) + return false; + // Convert from bit indices to byte indices and check for a byte reversal. + From >>= 3; + To >>= 3; + BitWidth >>= 3; + return From == BitWidth - To - 1; +} + +static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, + unsigned BitWidth) { + return From == BitWidth - To - 1; +} + +/// Given an OR instruction, check to see if this is a bswap or bitreverse +/// idiom. If so, insert the new intrinsic and return it. +Instruction *InstCombiner::MatchBSwapOrBitReverse(BinaryOperator &I) { IntegerType *ITy = dyn_cast(I.getType()); - if (!ITy || ITy->getBitWidth() % 16 || - // ByteMask only allows up to 32-byte values. - ITy->getBitWidth() > 32*8) - return nullptr; // Can only bswap pairs of bytes. Can't do vectors. - - /// ByteValues - For each byte of the result, we keep track of which value - /// defines each byte. - SmallVector ByteValues; - ByteValues.resize(ITy->getBitWidth()/8); - + if (!ITy) + return nullptr; // Can't do vectors. + unsigned BW = ITy->getBitWidth(); + + /// We keep track of which bit (BitProvenance) inside which value (BitValues) + /// defines each bit in the result. + SmallVector BitValues(BW, nullptr); + SmallVector BitProvenance(BW, -1); + // Try to find all the pieces corresponding to the bswap. - uint32_t ByteMask = ~0U >> (32-ByteValues.size()); - if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) + APInt BitMask = APInt::getAllOnesValue(BitValues.size()); + if (CollectBitParts(&I, 0, BitMask, BitValues, BitProvenance)) return nullptr; - // Check to see if all of the bytes come from the same value. - Value *V = ByteValues[0]; - if (!V) return nullptr; // Didn't find a byte? Must be zero. + // Check to see if all of the bits come from the same value. + Value *V = BitValues[0]; + if (!V) return nullptr; // Didn't find a bit? Must be zero. - // Check to make sure that all of the bytes come from the same value. - for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) - if (ByteValues[i] != V) - return nullptr; - Module *M = I.getParent()->getParent()->getParent(); - Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy); + if (!std::all_of(BitValues.begin(), BitValues.end(), + [&](const Value *X) { return X == V; })) + return nullptr; + + // Now, is the bit permutation correct for a bswap or a bitreverse? We can + // only byteswap values with an even number of bytes. + bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;; + for (unsigned i = 0, e = BitValues.size(); i != e; ++i) { + OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW); + OKForBitReverse &= + bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW); + } + + Intrinsic::ID Intrin; + if (OKForBSwap) + Intrin = Intrinsic::bswap; + else if (OKForBitReverse) + Intrin = Intrinsic::bitreverse; + else + return nullptr; + + Function *F = Intrinsic::getDeclaration(I.getModule(), Intrin, ITy); return CallInst::Create(F, V); } -/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D). Check -/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then -/// we can simplify this expression to "cond ? C : D or B". +/// We have an expression of the form (A&C)|(B&D). Check if A is (cond?-1:0) +/// and either B or D is ~(cond?-1,0) or (cond?0,-1), then we can simplify this +/// expression to "cond ? C : D or B". static Instruction *MatchSelectFromAndOr(Value *A, Value *B, Value *C, Value *D) { // If A is not a select of -1/0, this cannot match. @@ -1688,7 +1776,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B, return nullptr; } -/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. +/// Fold (icmp)|(icmp) if possible. Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI) { ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); @@ -1905,14 +1993,14 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, case ICmpInst::ICMP_EQ: if (LHS->getOperand(0) == RHS->getOperand(0)) { // if LHSCst and RHSCst differ only by one bit: - // (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1 + // (A == C1 || A == C2) -> (A | (C1 ^ C2)) == C2 assert(LHSCst->getValue().ule(LHSCst->getValue())); APInt Xor = LHSCst->getValue() ^ RHSCst->getValue(); if (Xor.isPowerOf2()) { - Value *NegCst = Builder->getInt(~Xor); - Value *And = Builder->CreateAnd(LHS->getOperand(0), NegCst); - return Builder->CreateICmp(ICmpInst::ICMP_EQ, And, LHSCst); + Value *Cst = Builder->getInt(Xor); + Value *Or = Builder->CreateOr(LHS->getOperand(0), Cst); + return Builder->CreateICmp(ICmpInst::ICMP_EQ, Or, RHSCst); } } @@ -2020,9 +2108,8 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, return nullptr; } -/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of -/// instcombine, this returns a Value which should already be inserted into the -/// function. +/// Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of instcombine, this returns +/// a Value which should already be inserted into the function. Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { if (LHS->getPredicate() == FCmpInst::FCMP_UNO && RHS->getPredicate() == FCmpInst::FCMP_UNO && @@ -2080,7 +2167,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { return nullptr; } -/// FoldOrWithConstants - This helper function folds: +/// This helper function folds: /// /// ((A | B) & C1) | (B & C2) /// @@ -2199,14 +2286,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { ConstantInt *C1 = nullptr, *C2 = nullptr; // (A | B) | C and A | (B | C) -> bswap if possible. + bool OrOfOrs = match(Op0, m_Or(m_Value(), m_Value())) || + match(Op1, m_Or(m_Value(), m_Value())); // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. - if (match(Op0, m_Or(m_Value(), m_Value())) || - match(Op1, m_Or(m_Value(), m_Value())) || - (match(Op0, m_LogicalShift(m_Value(), m_Value())) && - match(Op1, m_LogicalShift(m_Value(), m_Value())))) { - if (Instruction *BSwap = MatchBSwap(I)) + bool OrOfShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) && + match(Op1, m_LogicalShift(m_Value(), m_Value())); + // (A & B) | (C & D) -> bswap if possible. + bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) && + match(Op1, m_And(m_Value(), m_Value())); + + if (OrOfOrs || OrOfShifts || OrOfAnds) + if (Instruction *BSwap = MatchBSwapOrBitReverse(I)) return BSwap; - } // (X^C)|Y -> (X|Y)^C iff Y&C == 0 if (Op0->hasOneUse() && @@ -2360,14 +2451,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A)))) return BinaryOperator::CreateOr(Op1, Builder->CreateAnd(A, C)); - // (~A | ~B) == (~(A & B)) - De Morgan's Law - if (Value *Op0NotVal = dyn_castNotVal(Op0)) - if (Value *Op1NotVal = dyn_castNotVal(Op1)) - if (Op0->hasOneUse() && Op1->hasOneUse()) { - Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal, - I.getName()+".demorgan"); - return BinaryOperator::CreateNot(And); - } + if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder)) + return DeMorgan; // Canonicalize xor to the RHS. bool SwappedForXor = false; diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 6de380bcad67..e3634f269cf5 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -67,8 +67,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { unsigned CopyAlign = MI->getAlignment(); if (CopyAlign < MinAlign) { - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), - MinAlign, false)); + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false)); return MI; } @@ -198,12 +197,140 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return nullptr; } +static Value *SimplifyX86immshift(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder) { + bool LogicalShift = false; + bool ShiftLeft = false; + + switch (II.getIntrinsicID()) { + default: + return nullptr; + case Intrinsic::x86_sse2_psra_d: + case Intrinsic::x86_sse2_psra_w: + case Intrinsic::x86_sse2_psrai_d: + case Intrinsic::x86_sse2_psrai_w: + case Intrinsic::x86_avx2_psra_d: + case Intrinsic::x86_avx2_psra_w: + case Intrinsic::x86_avx2_psrai_d: + case Intrinsic::x86_avx2_psrai_w: + LogicalShift = false; ShiftLeft = false; + break; + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: + LogicalShift = true; ShiftLeft = false; + break; + case Intrinsic::x86_sse2_psll_d: + case Intrinsic::x86_sse2_psll_q: + case Intrinsic::x86_sse2_psll_w: + case Intrinsic::x86_sse2_pslli_d: + case Intrinsic::x86_sse2_pslli_q: + case Intrinsic::x86_sse2_pslli_w: + case Intrinsic::x86_avx2_psll_d: + case Intrinsic::x86_avx2_psll_q: + case Intrinsic::x86_avx2_psll_w: + case Intrinsic::x86_avx2_pslli_d: + case Intrinsic::x86_avx2_pslli_q: + case Intrinsic::x86_avx2_pslli_w: + LogicalShift = true; ShiftLeft = true; + break; + } + assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left"); + + // Simplify if count is constant. + auto Arg1 = II.getArgOperand(1); + auto CAZ = dyn_cast(Arg1); + auto CDV = dyn_cast(Arg1); + auto CInt = dyn_cast(Arg1); + if (!CAZ && !CDV && !CInt) + return nullptr; + + APInt Count(64, 0); + if (CDV) { + // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector + // operand to compute the shift amount. + auto VT = cast(CDV->getType()); + unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits(); + assert((64 % BitWidth) == 0 && "Unexpected packed shift size"); + unsigned NumSubElts = 64 / BitWidth; + + // Concatenate the sub-elements to create the 64-bit value. + for (unsigned i = 0; i != NumSubElts; ++i) { + unsigned SubEltIdx = (NumSubElts - 1) - i; + auto SubElt = cast(CDV->getElementAsConstant(SubEltIdx)); + Count = Count.shl(BitWidth); + Count |= SubElt->getValue().zextOrTrunc(64); + } + } + else if (CInt) + Count = CInt->getValue(); + + auto Vec = II.getArgOperand(0); + auto VT = cast(Vec->getType()); + auto SVT = VT->getElementType(); + unsigned VWidth = VT->getNumElements(); + unsigned BitWidth = SVT->getPrimitiveSizeInBits(); + + // If shift-by-zero then just return the original value. + if (Count == 0) + return Vec; + + // Handle cases when Shift >= BitWidth. + if (Count.uge(BitWidth)) { + // If LogicalShift - just return zero. + if (LogicalShift) + return ConstantAggregateZero::get(VT); + + // If ArithmeticShift - clamp Shift to (BitWidth - 1). + Count = APInt(64, BitWidth - 1); + } + + // Get a constant vector of the same type as the first operand. + auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth)); + auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt); + + if (ShiftLeft) + return Builder.CreateShl(Vec, ShiftVec); + + if (LogicalShift) + return Builder.CreateLShr(Vec, ShiftVec); + + return Builder.CreateAShr(Vec, ShiftVec); +} + +static Value *SimplifyX86extend(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder, + bool SignExtend) { + VectorType *SrcTy = cast(II.getArgOperand(0)->getType()); + VectorType *DstTy = cast(II.getType()); + unsigned NumDstElts = DstTy->getNumElements(); + + // Extract a subvector of the first NumDstElts lanes and sign/zero extend. + SmallVector ShuffleMask; + for (int i = 0; i != (int)NumDstElts; ++i) + ShuffleMask.push_back(i); + + Value *SV = Builder.CreateShuffleVector(II.getArgOperand(0), + UndefValue::get(SrcTy), ShuffleMask); + return SignExtend ? Builder.CreateSExt(SV, DstTy) + : Builder.CreateZExt(SV, DstTy); +} + static Value *SimplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder) { if (auto *CInt = dyn_cast(II.getArgOperand(2))) { VectorType *VecTy = cast(II.getType()); assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type"); - + // The immediate permute control byte looks like this: // [3:0] - zero mask for each 32-bit lane // [5:4] - select one 32-bit destination lane @@ -248,12 +375,202 @@ static Value *SimplifyX86insertps(const IntrinsicInst &II, // Replace the selected destination lane with the selected source lane. ShuffleMask[DestLane] = SourceLane + 4; } - + return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask); } return nullptr; } +/// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding +/// or conversion to a shuffle vector. +static Value *SimplifyX86extrq(IntrinsicInst &II, Value *Op0, + ConstantInt *CILength, ConstantInt *CIIndex, + InstCombiner::BuilderTy &Builder) { + auto LowConstantHighUndef = [&](uint64_t Val) { + Type *IntTy64 = Type::getInt64Ty(II.getContext()); + Constant *Args[] = {ConstantInt::get(IntTy64, Val), + UndefValue::get(IntTy64)}; + return ConstantVector::get(Args); + }; + + // See if we're dealing with constant values. + Constant *C0 = dyn_cast(Op0); + ConstantInt *CI0 = + C0 ? dyn_cast(C0->getAggregateElement((unsigned)0)) + : nullptr; + + // Attempt to constant fold. + if (CILength && CIIndex) { + // From AMD documentation: "The bit index and field length are each six + // bits in length other bits of the field are ignored." + APInt APIndex = CIIndex->getValue().zextOrTrunc(6); + APInt APLength = CILength->getValue().zextOrTrunc(6); + + unsigned Index = APIndex.getZExtValue(); + + // From AMD documentation: "a value of zero in the field length is + // defined as length of 64". + unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue(); + + // From AMD documentation: "If the sum of the bit index + length field + // is greater than 64, the results are undefined". + unsigned End = Index + Length; + + // Note that both field index and field length are 8-bit quantities. + // Since variables 'Index' and 'Length' are unsigned values + // obtained from zero-extending field index and field length + // respectively, their sum should never wrap around. + if (End > 64) + return UndefValue::get(II.getType()); + + // If we are inserting whole bytes, we can convert this to a shuffle. + // Lowering can recognize EXTRQI shuffle masks. + if ((Length % 8) == 0 && (Index % 8) == 0) { + // Convert bit indices to byte indices. + Length /= 8; + Index /= 8; + + Type *IntTy8 = Type::getInt8Ty(II.getContext()); + Type *IntTy32 = Type::getInt32Ty(II.getContext()); + VectorType *ShufTy = VectorType::get(IntTy8, 16); + + SmallVector ShuffleMask; + for (int i = 0; i != (int)Length; ++i) + ShuffleMask.push_back( + Constant::getIntegerValue(IntTy32, APInt(32, i + Index))); + for (int i = Length; i != 8; ++i) + ShuffleMask.push_back( + Constant::getIntegerValue(IntTy32, APInt(32, i + 16))); + for (int i = 8; i != 16; ++i) + ShuffleMask.push_back(UndefValue::get(IntTy32)); + + Value *SV = Builder.CreateShuffleVector( + Builder.CreateBitCast(Op0, ShufTy), + ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask)); + return Builder.CreateBitCast(SV, II.getType()); + } + + // Constant Fold - shift Index'th bit to lowest position and mask off + // Length bits. + if (CI0) { + APInt Elt = CI0->getValue(); + Elt = Elt.lshr(Index).zextOrTrunc(Length); + return LowConstantHighUndef(Elt.getZExtValue()); + } + + // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI. + if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) { + Value *Args[] = {Op0, CILength, CIIndex}; + Module *M = II.getModule(); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi); + return Builder.CreateCall(F, Args); + } + } + + // Constant Fold - extraction from zero is always {zero, undef}. + if (CI0 && CI0->equalsInt(0)) + return LowConstantHighUndef(0); + + return nullptr; +} + +/// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant +/// folding or conversion to a shuffle vector. +static Value *SimplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, + APInt APLength, APInt APIndex, + InstCombiner::BuilderTy &Builder) { + + // From AMD documentation: "The bit index and field length are each six bits + // in length other bits of the field are ignored." + APIndex = APIndex.zextOrTrunc(6); + APLength = APLength.zextOrTrunc(6); + + // Attempt to constant fold. + unsigned Index = APIndex.getZExtValue(); + + // From AMD documentation: "a value of zero in the field length is + // defined as length of 64". + unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue(); + + // From AMD documentation: "If the sum of the bit index + length field + // is greater than 64, the results are undefined". + unsigned End = Index + Length; + + // Note that both field index and field length are 8-bit quantities. + // Since variables 'Index' and 'Length' are unsigned values + // obtained from zero-extending field index and field length + // respectively, their sum should never wrap around. + if (End > 64) + return UndefValue::get(II.getType()); + + // If we are inserting whole bytes, we can convert this to a shuffle. + // Lowering can recognize INSERTQI shuffle masks. + if ((Length % 8) == 0 && (Index % 8) == 0) { + // Convert bit indices to byte indices. + Length /= 8; + Index /= 8; + + Type *IntTy8 = Type::getInt8Ty(II.getContext()); + Type *IntTy32 = Type::getInt32Ty(II.getContext()); + VectorType *ShufTy = VectorType::get(IntTy8, 16); + + SmallVector ShuffleMask; + for (int i = 0; i != (int)Index; ++i) + ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i))); + for (int i = 0; i != (int)Length; ++i) + ShuffleMask.push_back( + Constant::getIntegerValue(IntTy32, APInt(32, i + 16))); + for (int i = Index + Length; i != 8; ++i) + ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i))); + for (int i = 8; i != 16; ++i) + ShuffleMask.push_back(UndefValue::get(IntTy32)); + + Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy), + Builder.CreateBitCast(Op1, ShufTy), + ConstantVector::get(ShuffleMask)); + return Builder.CreateBitCast(SV, II.getType()); + } + + // See if we're dealing with constant values. + Constant *C0 = dyn_cast(Op0); + Constant *C1 = dyn_cast(Op1); + ConstantInt *CI00 = + C0 ? dyn_cast(C0->getAggregateElement((unsigned)0)) + : nullptr; + ConstantInt *CI10 = + C1 ? dyn_cast(C1->getAggregateElement((unsigned)0)) + : nullptr; + + // Constant Fold - insert bottom Length bits starting at the Index'th bit. + if (CI00 && CI10) { + APInt V00 = CI00->getValue(); + APInt V10 = CI10->getValue(); + APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index); + V00 = V00 & ~Mask; + V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index); + APInt Val = V00 | V10; + Type *IntTy64 = Type::getInt64Ty(II.getContext()); + Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()), + UndefValue::get(IntTy64)}; + return ConstantVector::get(Args); + } + + // If we were an INSERTQ call, we'll save demanded elements if we convert to + // INSERTQI. + if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) { + Type *IntTy8 = Type::getInt8Ty(II.getContext()); + Constant *CILength = ConstantInt::get(IntTy8, Length, false); + Constant *CIIndex = ConstantInt::get(IntTy8, Index, false); + + Value *Args[] = {Op0, Op1, CILength, CIIndex}; + Module *M = II.getModule(); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi); + return Builder.CreateCall(F, Args); + } + + return nullptr; +} + /// The shuffle mask for a perm2*128 selects any two halves of two 256-bit /// source vectors, unless a zero bit is set. If a zero bit is set, /// then ignore that half of the mask and clear that half of the vector. @@ -289,7 +606,7 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II, // The high bit of the selection field chooses the 1st or 2nd operand. bool LowInputSelect = Imm & 0x02; bool HighInputSelect = Imm & 0x20; - + // The low bit of the selection field chooses the low or high half // of the selected operand. bool LowHalfSelect = Imm & 0x01; @@ -298,11 +615,11 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II, // Determine which operand(s) are actually in use for this instruction. Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0); Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0); - + // If needed, replace operands based on zero mask. V0 = LowHalfZero ? ZeroVector : V0; V1 = HighHalfZero ? ZeroVector : V1; - + // Permute low half of result. unsigned StartIndex = LowHalfSelect ? HalfSize : 0; for (unsigned i = 0; i < HalfSize; ++i) @@ -319,6 +636,43 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II, return nullptr; } +/// Decode XOP integer vector comparison intrinsics. +static Value *SimplifyX86vpcom(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder, bool IsSigned) { + if (auto *CInt = dyn_cast(II.getArgOperand(2))) { + uint64_t Imm = CInt->getZExtValue() & 0x7; + VectorType *VecTy = cast(II.getType()); + CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; + + switch (Imm) { + case 0x0: + Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; + break; + case 0x1: + Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + break; + case 0x2: + Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; + break; + case 0x3: + Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; + break; + case 0x4: + Pred = ICmpInst::ICMP_EQ; break; + case 0x5: + Pred = ICmpInst::ICMP_NE; break; + case 0x6: + return ConstantInt::getSigned(VecTy, 0); // FALSE + case 0x7: + return ConstantInt::getSigned(VecTy, -1); // TRUE + } + + if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0), II.getArgOperand(1))) + return Builder.CreateSExtOrTrunc(Cmp, VecTy); + } + return nullptr; +} + /// visitCallInst - CallInst simplification. This mostly only handles folding /// of intrinsic instructions. For normal calls, it allows visitCallSite to do /// the heavy lifting. @@ -371,7 +725,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (MemMoveInst *MMI = dyn_cast(MI)) { if (GlobalVariable *GVSrc = dyn_cast(MMI->getSource())) if (GVSrc->isConstant()) { - Module *M = CI.getParent()->getParent()->getParent(); + Module *M = CI.getModule(); Intrinsic::ID MemCpyID = Intrinsic::memcpy; Type *Tys[3] = { CI.getArgOperand(0)->getType(), CI.getArgOperand(1)->getType(), @@ -400,6 +754,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Changed) return II; } + auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width, unsigned DemandedWidth) + { + APInt UndefElts(Width, 0); + APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth); + return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts); + }; + switch (II->getIntrinsicID()) { default: break; case Intrinsic::objectsize: { @@ -427,6 +788,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::bitreverse: { + Value *IIOperand = II->getArgOperand(0); + Value *X = nullptr; + + // bitreverse(bitreverse(x)) -> x + if (match(IIOperand, m_Intrinsic(m_Value(X)))) + return ReplaceInstUsesWith(CI, X); + break; + } + case Intrinsic::powi: if (ConstantInt *Power = dyn_cast(II->getArgOperand(1))) { // powi(x, 0) -> 1.0 @@ -669,6 +1040,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return new StoreInst(II->getArgOperand(0), Ptr); } break; + case Intrinsic::x86_sse_storeu_ps: case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: @@ -682,6 +1054,50 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; + case Intrinsic::x86_vcvtph2ps_128: + case Intrinsic::x86_vcvtph2ps_256: { + auto Arg = II->getArgOperand(0); + auto ArgType = cast(Arg->getType()); + auto RetType = cast(II->getType()); + unsigned ArgWidth = ArgType->getNumElements(); + unsigned RetWidth = RetType->getNumElements(); + assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths"); + assert(ArgType->isIntOrIntVectorTy() && + ArgType->getScalarSizeInBits() == 16 && + "CVTPH2PS input type should be 16-bit integer vector"); + assert(RetType->getScalarType()->isFloatTy() && + "CVTPH2PS output type should be 32-bit float vector"); + + // Constant folding: Convert to generic half to single conversion. + if (isa(Arg)) + return ReplaceInstUsesWith(*II, ConstantAggregateZero::get(RetType)); + + if (isa(Arg)) { + auto VectorHalfAsShorts = Arg; + if (RetWidth < ArgWidth) { + SmallVector SubVecMask; + for (unsigned i = 0; i != RetWidth; ++i) + SubVecMask.push_back((int)i); + VectorHalfAsShorts = Builder->CreateShuffleVector( + Arg, UndefValue::get(ArgType), SubVecMask); + } + + auto VectorHalfType = + VectorType::get(Type::getHalfTy(II->getContext()), RetWidth); + auto VectorHalfs = + Builder->CreateBitCast(VectorHalfAsShorts, VectorHalfType); + auto VectorFloats = Builder->CreateFPExt(VectorHalfs, RetType); + return ReplaceInstUsesWith(*II, VectorFloats); + } + + // We only use the lowest lanes of the argument. + if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) { + II->setArgOperand(0, V); + return II; + } + break; + } + case Intrinsic::x86_sse_cvtss2si: case Intrinsic::x86_sse_cvtss2si64: case Intrinsic::x86_sse_cvttss2si: @@ -692,194 +1108,229 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_cvttsd2si64: { // These intrinsics only demand the 0th element of their input vectors. If // we can simplify the input based on that, do so now. - unsigned VWidth = - cast(II->getArgOperand(0)->getType())->getNumElements(); - APInt DemandedElts(VWidth, 1); - APInt UndefElts(VWidth, 0); - if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), - DemandedElts, UndefElts)) { + Value *Arg = II->getArgOperand(0); + unsigned VWidth = Arg->getType()->getVectorNumElements(); + if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) { II->setArgOperand(0, V); return II; } break; } - // Constant fold << Ci. - // FIXME: We don't handle _dq because it's a shift of an i128, but is - // represented in the IR as <2 x i64>. A per element shift is wrong. - case Intrinsic::x86_sse2_psll_d: - case Intrinsic::x86_sse2_psll_q: - case Intrinsic::x86_sse2_psll_w: - case Intrinsic::x86_sse2_pslli_d: - case Intrinsic::x86_sse2_pslli_q: - case Intrinsic::x86_sse2_pslli_w: - case Intrinsic::x86_avx2_psll_d: - case Intrinsic::x86_avx2_psll_q: - case Intrinsic::x86_avx2_psll_w: - case Intrinsic::x86_avx2_pslli_d: - case Intrinsic::x86_avx2_pslli_q: - case Intrinsic::x86_avx2_pslli_w: - case Intrinsic::x86_sse2_psrl_d: - case Intrinsic::x86_sse2_psrl_q: - case Intrinsic::x86_sse2_psrl_w: + // Constant fold ashr( , Ci ). + // Constant fold lshr( , Ci ). + // Constant fold shl( , Ci ). + case Intrinsic::x86_sse2_psrai_d: + case Intrinsic::x86_sse2_psrai_w: + case Intrinsic::x86_avx2_psrai_d: + case Intrinsic::x86_avx2_psrai_w: case Intrinsic::x86_sse2_psrli_d: case Intrinsic::x86_sse2_psrli_q: case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: + case Intrinsic::x86_sse2_pslli_d: + case Intrinsic::x86_sse2_pslli_q: + case Intrinsic::x86_sse2_pslli_w: + case Intrinsic::x86_avx2_pslli_d: + case Intrinsic::x86_avx2_pslli_q: + case Intrinsic::x86_avx2_pslli_w: + if (Value *V = SimplifyX86immshift(*II, *Builder)) + return ReplaceInstUsesWith(*II, V); + break; + + case Intrinsic::x86_sse2_psra_d: + case Intrinsic::x86_sse2_psra_w: + case Intrinsic::x86_avx2_psra_d: + case Intrinsic::x86_avx2_psra_w: + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_sse2_psrl_w: case Intrinsic::x86_avx2_psrl_d: case Intrinsic::x86_avx2_psrl_q: case Intrinsic::x86_avx2_psrl_w: - case Intrinsic::x86_avx2_psrli_d: - case Intrinsic::x86_avx2_psrli_q: - case Intrinsic::x86_avx2_psrli_w: { - // Simplify if count is constant. To 0 if >= BitWidth, - // otherwise to shl/lshr. - auto CDV = dyn_cast(II->getArgOperand(1)); - auto CInt = dyn_cast(II->getArgOperand(1)); - if (!CDV && !CInt) - break; - ConstantInt *Count; - if (CDV) - Count = cast(CDV->getElementAsConstant(0)); - else - Count = CInt; + case Intrinsic::x86_sse2_psll_d: + case Intrinsic::x86_sse2_psll_q: + case Intrinsic::x86_sse2_psll_w: + case Intrinsic::x86_avx2_psll_d: + case Intrinsic::x86_avx2_psll_q: + case Intrinsic::x86_avx2_psll_w: { + if (Value *V = SimplifyX86immshift(*II, *Builder)) + return ReplaceInstUsesWith(*II, V); - auto Vec = II->getArgOperand(0); - auto VT = cast(Vec->getType()); - if (Count->getZExtValue() > - VT->getElementType()->getPrimitiveSizeInBits() - 1) - return ReplaceInstUsesWith( - CI, ConstantAggregateZero::get(Vec->getType())); + // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector + // operand to compute the shift amount. + Value *Arg1 = II->getArgOperand(1); + assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 && + "Unexpected packed shift size"); + unsigned VWidth = Arg1->getType()->getVectorNumElements(); - bool isPackedShiftLeft = true; - switch (II->getIntrinsicID()) { - default : break; - case Intrinsic::x86_sse2_psrl_d: - case Intrinsic::x86_sse2_psrl_q: - case Intrinsic::x86_sse2_psrl_w: - case Intrinsic::x86_sse2_psrli_d: - case Intrinsic::x86_sse2_psrli_q: - case Intrinsic::x86_sse2_psrli_w: - case Intrinsic::x86_avx2_psrl_d: - case Intrinsic::x86_avx2_psrl_q: - case Intrinsic::x86_avx2_psrl_w: - case Intrinsic::x86_avx2_psrli_d: - case Intrinsic::x86_avx2_psrli_q: - case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break; - } - - unsigned VWidth = VT->getNumElements(); - // Get a constant vector of the same type as the first operand. - auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue()); - if (isPackedShiftLeft) - return BinaryOperator::CreateShl(Vec, - Builder->CreateVectorSplat(VWidth, VTCI)); - - return BinaryOperator::CreateLShr(Vec, - Builder->CreateVectorSplat(VWidth, VTCI)); - } - - case Intrinsic::x86_sse41_pmovsxbw: - case Intrinsic::x86_sse41_pmovsxwd: - case Intrinsic::x86_sse41_pmovsxdq: - case Intrinsic::x86_sse41_pmovzxbw: - case Intrinsic::x86_sse41_pmovzxwd: - case Intrinsic::x86_sse41_pmovzxdq: { - // pmov{s|z}x ignores the upper half of their input vectors. - unsigned VWidth = - cast(II->getArgOperand(0)->getType())->getNumElements(); - unsigned LowHalfElts = VWidth / 2; - APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts)); - APInt UndefElts(VWidth, 0); - if (Value *TmpV = SimplifyDemandedVectorElts( - II->getArgOperand(0), InputDemandedElts, UndefElts)) { - II->setArgOperand(0, TmpV); + if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) { + II->setArgOperand(1, V); return II; } break; } + + case Intrinsic::x86_avx2_pmovsxbd: + case Intrinsic::x86_avx2_pmovsxbq: + case Intrinsic::x86_avx2_pmovsxbw: + case Intrinsic::x86_avx2_pmovsxdq: + case Intrinsic::x86_avx2_pmovsxwd: + case Intrinsic::x86_avx2_pmovsxwq: + if (Value *V = SimplifyX86extend(*II, *Builder, true)) + return ReplaceInstUsesWith(*II, V); + break; + + case Intrinsic::x86_sse41_pmovzxbd: + case Intrinsic::x86_sse41_pmovzxbq: + case Intrinsic::x86_sse41_pmovzxbw: + case Intrinsic::x86_sse41_pmovzxdq: + case Intrinsic::x86_sse41_pmovzxwd: + case Intrinsic::x86_sse41_pmovzxwq: + case Intrinsic::x86_avx2_pmovzxbd: + case Intrinsic::x86_avx2_pmovzxbq: + case Intrinsic::x86_avx2_pmovzxbw: + case Intrinsic::x86_avx2_pmovzxdq: + case Intrinsic::x86_avx2_pmovzxwd: + case Intrinsic::x86_avx2_pmovzxwq: + if (Value *V = SimplifyX86extend(*II, *Builder, false)) + return ReplaceInstUsesWith(*II, V); + break; + case Intrinsic::x86_sse41_insertps: if (Value *V = SimplifyX86insertps(*II, *Builder)) return ReplaceInstUsesWith(*II, V); break; - + + case Intrinsic::x86_sse4a_extrq: { + Value *Op0 = II->getArgOperand(0); + Value *Op1 = II->getArgOperand(1); + unsigned VWidth0 = Op0->getType()->getVectorNumElements(); + unsigned VWidth1 = Op1->getType()->getVectorNumElements(); + assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && + Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 && + VWidth1 == 16 && "Unexpected operand sizes"); + + // See if we're dealing with constant values. + Constant *C1 = dyn_cast(Op1); + ConstantInt *CILength = + C1 ? dyn_cast(C1->getAggregateElement((unsigned)0)) + : nullptr; + ConstantInt *CIIndex = + C1 ? dyn_cast(C1->getAggregateElement((unsigned)1)) + : nullptr; + + // Attempt to simplify to a constant, shuffle vector or EXTRQI call. + if (Value *V = SimplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder)) + return ReplaceInstUsesWith(*II, V); + + // EXTRQ only uses the lowest 64-bits of the first 128-bit vector + // operands and the lowest 16-bits of the second. + if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) { + II->setArgOperand(0, V); + return II; + } + if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) { + II->setArgOperand(1, V); + return II; + } + break; + } + + case Intrinsic::x86_sse4a_extrqi: { + // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining + // bits of the lower 64-bits. The upper 64-bits are undefined. + Value *Op0 = II->getArgOperand(0); + unsigned VWidth = Op0->getType()->getVectorNumElements(); + assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 && + "Unexpected operand size"); + + // See if we're dealing with constant values. + ConstantInt *CILength = dyn_cast(II->getArgOperand(1)); + ConstantInt *CIIndex = dyn_cast(II->getArgOperand(2)); + + // Attempt to simplify to a constant or shuffle vector. + if (Value *V = SimplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder)) + return ReplaceInstUsesWith(*II, V); + + // EXTRQI only uses the lowest 64-bits of the first 128-bit vector + // operand. + if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) { + II->setArgOperand(0, V); + return II; + } + break; + } + + case Intrinsic::x86_sse4a_insertq: { + Value *Op0 = II->getArgOperand(0); + Value *Op1 = II->getArgOperand(1); + unsigned VWidth = Op0->getType()->getVectorNumElements(); + assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && + Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 && + Op1->getType()->getVectorNumElements() == 2 && + "Unexpected operand size"); + + // See if we're dealing with constant values. + Constant *C1 = dyn_cast(Op1); + ConstantInt *CI11 = + C1 ? dyn_cast(C1->getAggregateElement((unsigned)1)) + : nullptr; + + // Attempt to simplify to a constant, shuffle vector or INSERTQI call. + if (CI11) { + APInt V11 = CI11->getValue(); + APInt Len = V11.zextOrTrunc(6); + APInt Idx = V11.lshr(8).zextOrTrunc(6); + if (Value *V = SimplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder)) + return ReplaceInstUsesWith(*II, V); + } + + // INSERTQ only uses the lowest 64-bits of the first 128-bit vector + // operand. + if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) { + II->setArgOperand(0, V); + return II; + } + break; + } + case Intrinsic::x86_sse4a_insertqi: { - // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top - // ones undef - // TODO: eventually we should lower this intrinsic to IR - if (auto CIWidth = dyn_cast(II->getArgOperand(2))) { - if (auto CIStart = dyn_cast(II->getArgOperand(3))) { - unsigned Index = CIStart->getZExtValue(); - // From AMD documentation: "a value of zero in the field length is - // defined as length of 64". - unsigned Length = CIWidth->equalsInt(0) ? 64 : CIWidth->getZExtValue(); + // INSERTQI: Extract lowest Length bits from lower half of second source and + // insert over first source starting at Index bit. The upper 64-bits are + // undefined. + Value *Op0 = II->getArgOperand(0); + Value *Op1 = II->getArgOperand(1); + unsigned VWidth0 = Op0->getType()->getVectorNumElements(); + unsigned VWidth1 = Op1->getType()->getVectorNumElements(); + assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && + Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 && + VWidth1 == 2 && "Unexpected operand sizes"); - // From AMD documentation: "If the sum of the bit index + length field - // is greater than 64, the results are undefined". + // See if we're dealing with constant values. + ConstantInt *CILength = dyn_cast(II->getArgOperand(2)); + ConstantInt *CIIndex = dyn_cast(II->getArgOperand(3)); - // Note that both field index and field length are 8-bit quantities. - // Since variables 'Index' and 'Length' are unsigned values - // obtained from zero-extending field index and field length - // respectively, their sum should never wrap around. - if ((Index + Length) > 64) - return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + // Attempt to simplify to a constant or shuffle vector. + if (CILength && CIIndex) { + APInt Len = CILength->getValue().zextOrTrunc(6); + APInt Idx = CIIndex->getValue().zextOrTrunc(6); + if (Value *V = SimplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder)) + return ReplaceInstUsesWith(*II, V); + } - if (Length == 64 && Index == 0) { - Value *Vec = II->getArgOperand(1); - Value *Undef = UndefValue::get(Vec->getType()); - const uint32_t Mask[] = { 0, 2 }; - return ReplaceInstUsesWith( - CI, - Builder->CreateShuffleVector( - Vec, Undef, ConstantDataVector::get( - II->getContext(), makeArrayRef(Mask)))); + // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector + // operands. + if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) { + II->setArgOperand(0, V); + return II; + } - } else if (auto Source = - dyn_cast(II->getArgOperand(0))) { - if (Source->hasOneUse() && - Source->getArgOperand(1) == II->getArgOperand(1)) { - // If the source of the insert has only one use and it's another - // insert (and they're both inserting from the same vector), try to - // bundle both together. - auto CISourceWidth = - dyn_cast(Source->getArgOperand(2)); - auto CISourceStart = - dyn_cast(Source->getArgOperand(3)); - if (CISourceStart && CISourceWidth) { - unsigned Start = CIStart->getZExtValue(); - unsigned Width = CIWidth->getZExtValue(); - unsigned End = Start + Width; - unsigned SourceStart = CISourceStart->getZExtValue(); - unsigned SourceWidth = CISourceWidth->getZExtValue(); - unsigned SourceEnd = SourceStart + SourceWidth; - unsigned NewStart, NewWidth; - bool ShouldReplace = false; - if (Start <= SourceStart && SourceStart <= End) { - NewStart = Start; - NewWidth = std::max(End, SourceEnd) - NewStart; - ShouldReplace = true; - } else if (SourceStart <= Start && Start <= SourceEnd) { - NewStart = SourceStart; - NewWidth = std::max(SourceEnd, End) - NewStart; - ShouldReplace = true; - } - - if (ShouldReplace) { - Constant *ConstantWidth = ConstantInt::get( - II->getArgOperand(2)->getType(), NewWidth, false); - Constant *ConstantStart = ConstantInt::get( - II->getArgOperand(3)->getType(), NewStart, false); - Value *Args[4] = { Source->getArgOperand(0), - II->getArgOperand(1), ConstantWidth, - ConstantStart }; - Module *M = CI.getParent()->getParent()->getParent(); - Value *F = - Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi); - return ReplaceInstUsesWith(CI, Builder->CreateCall(F, Args)); - } - } - } - } - } + if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) { + II->setArgOperand(1, V); + return II; } break; } @@ -894,7 +1345,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // This optimization is convoluted because the intrinsic is defined as // getting a vector of floats or doubles for the ps and pd versions. // FIXME: That should be changed. + + Value *Op0 = II->getArgOperand(0); + Value *Op1 = II->getArgOperand(1); Value *Mask = II->getArgOperand(2); + + // fold (blend A, A, Mask) -> A + if (Op0 == Op1) + return ReplaceInstUsesWith(CI, Op0); + + // Zero Mask - select 1st argument. + if (isa(Mask)) + return ReplaceInstUsesWith(CI, Op0); + + // Constant Mask - select 1st/2nd argument lane based on top bit of mask. if (auto C = dyn_cast(Mask)) { auto Tyi1 = Builder->getInt1Ty(); auto SelectorType = cast(Mask->getType()); @@ -917,11 +1381,50 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1))); } auto NewSelector = ConstantVector::get(Selectors); - return SelectInst::Create(NewSelector, II->getArgOperand(1), - II->getArgOperand(0), "blendv"); - } else { - break; + return SelectInst::Create(NewSelector, Op1, Op0, "blendv"); } + break; + } + + case Intrinsic::x86_ssse3_pshuf_b_128: + case Intrinsic::x86_avx2_pshuf_b: { + // Turn pshufb(V1,mask) -> shuffle(V1,Zero,mask) if mask is a constant. + auto *V = II->getArgOperand(1); + auto *VTy = cast(V->getType()); + unsigned NumElts = VTy->getNumElements(); + assert((NumElts == 16 || NumElts == 32) && + "Unexpected number of elements in shuffle mask!"); + // Initialize the resulting shuffle mask to all zeroes. + uint32_t Indexes[32] = {0}; + + if (auto *Mask = dyn_cast(V)) { + // Each byte in the shuffle control mask forms an index to permute the + // corresponding byte in the destination operand. + for (unsigned I = 0; I < NumElts; ++I) { + int8_t Index = Mask->getElementAsInteger(I); + // If the most significant bit (bit[7]) of each byte of the shuffle + // control mask is set, then zero is written in the result byte. + // The zero vector is in the right-hand side of the resulting + // shufflevector. + + // The value of each index is the least significant 4 bits of the + // shuffle control byte. + Indexes[I] = (Index < 0) ? NumElts : Index & 0xF; + } + } else if (!isa(V)) + break; + + // The value of each index for the high 128-bit lane is the least + // significant 4 bits of the respective shuffle control byte. + for (unsigned I = 16; I < NumElts; ++I) + Indexes[I] += I & 0xF0; + + auto NewC = ConstantDataVector::get(V->getContext(), + makeArrayRef(Indexes, NumElts)); + auto V1 = II->getArgOperand(0); + auto V2 = Constant::getNullValue(II->getType()); + auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC); + return ReplaceInstUsesWith(CI, Shuffle); } case Intrinsic::x86_avx_vpermilvar_ps: @@ -972,6 +1475,22 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return ReplaceInstUsesWith(*II, V); break; + case Intrinsic::x86_xop_vpcomb: + case Intrinsic::x86_xop_vpcomd: + case Intrinsic::x86_xop_vpcomq: + case Intrinsic::x86_xop_vpcomw: + if (Value *V = SimplifyX86vpcom(*II, *Builder, true)) + return ReplaceInstUsesWith(*II, V); + break; + + case Intrinsic::x86_xop_vpcomub: + case Intrinsic::x86_xop_vpcomud: + case Intrinsic::x86_xop_vpcomuq: + case Intrinsic::x86_xop_vpcomuw: + if (Value *V = SimplifyX86vpcom(*II, *Builder, false)) + return ReplaceInstUsesWith(*II, V); + break; + case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. // Note that ppc_altivec_vperm has a big-endian bias, so when creating @@ -1115,15 +1634,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // happen when variable allocas are DCE'd. if (IntrinsicInst *SS = dyn_cast(II->getArgOperand(0))) { if (SS->getIntrinsicID() == Intrinsic::stacksave) { - BasicBlock::iterator BI = SS; - if (&*++BI == II) + if (&*++SS->getIterator() == II) return EraseInstFromFunction(CI); } } // Scan down this block to see if there is another stack restore in the // same block without an intervening call/alloca. - BasicBlock::iterator BI = II; + BasicBlock::iterator BI(II); TerminatorInst *TI = II->getParent()->getTerminator(); bool CannotRemove = false; for (++BI; &*BI != TI; ++BI) { @@ -1153,6 +1671,29 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return EraseInstFromFunction(CI); break; } + case Intrinsic::lifetime_start: { + // Remove trivially empty lifetime_start/end ranges, i.e. a start + // immediately followed by an end (ignoring debuginfo or other + // lifetime markers in between). + BasicBlock::iterator BI = II->getIterator(), BE = II->getParent()->end(); + for (++BI; BI != BE; ++BI) { + if (IntrinsicInst *LTE = dyn_cast(BI)) { + if (isa(LTE) || + LTE->getIntrinsicID() == Intrinsic::lifetime_start) + continue; + if (LTE->getIntrinsicID() == Intrinsic::lifetime_end) { + if (II->getOperand(0) == LTE->getOperand(0) && + II->getOperand(1) == LTE->getOperand(1)) { + EraseInstFromFunction(*LTE); + return EraseInstFromFunction(*II); + } + continue; + } + } + break; + } + break; + } case Intrinsic::assume: { // Canonicalize assume(a && b) -> assume(a); assume(b); // Note: New assumption intrinsics created here are registered by @@ -1233,7 +1774,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } // isKnownNonNull -> nonnull attribute - if (isKnownNonNull(DerivedPtr)) + if (isKnownNonNullAt(DerivedPtr, II, DT, TLI)) II->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull); // isDereferenceablePointer -> deref attribute @@ -1355,9 +1896,10 @@ static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem) { // Visit all the previous instructions in the basic block, and try to find a // init.trampoline which has a direct path to the adjust.trampoline. - for (BasicBlock::iterator I = AdjustTramp, - E = AdjustTramp->getParent()->begin(); I != E; ) { - Instruction *Inst = --I; + for (BasicBlock::iterator I = AdjustTramp->getIterator(), + E = AdjustTramp->getParent()->begin(); + I != E;) { + Instruction *Inst = &*--I; if (IntrinsicInst *II = dyn_cast(I)) if (II->getIntrinsicID() == Intrinsic::init_trampoline && II->getOperand(0) == TrampMem) @@ -1400,20 +1942,27 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // Mark any parameters that are known to be non-null with the nonnull // attribute. This is helpful for inlining calls to functions with null // checks on their arguments. + SmallVector Indices; unsigned ArgNo = 0; + for (Value *V : CS.args()) { - if (!CS.paramHasAttr(ArgNo+1, Attribute::NonNull) && - isKnownNonNull(V)) { - AttributeSet AS = CS.getAttributes(); - AS = AS.addAttribute(CS.getInstruction()->getContext(), ArgNo+1, - Attribute::NonNull); - CS.setAttributes(AS); - Changed = true; - } + if (V->getType()->isPointerTy() && !CS.paramHasAttr(ArgNo+1, Attribute::NonNull) && + isKnownNonNullAt(V, CS.getInstruction(), DT, TLI)) + Indices.push_back(ArgNo + 1); ArgNo++; } + assert(ArgNo == CS.arg_size() && "sanity check"); + if (!Indices.empty()) { + AttributeSet AS = CS.getAttributes(); + LLVMContext &Ctx = CS.getInstruction()->getContext(); + AS = AS.addAttribute(Ctx, Indices, + Attribute::get(Ctx, Attribute::NonNull)); + CS.setAttributes(AS); + Changed = true; + } + // If the callee is a pointer to a function, attempt to move any casts to the // arguments of the call/invoke. Value *Callee = CS.getCalledValue(); @@ -1725,16 +2274,19 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(), attrVec); + SmallVector OpBundles; + CS.getOperandBundlesAsDefs(OpBundles); + Instruction *NC; if (InvokeInst *II = dyn_cast(Caller)) { - NC = Builder->CreateInvoke(Callee, II->getNormalDest(), - II->getUnwindDest(), Args); + NC = Builder->CreateInvoke(Callee, II->getNormalDest(), II->getUnwindDest(), + Args, OpBundles); NC->takeName(II); cast(NC)->setCallingConv(II->getCallingConv()); cast(NC)->setAttributes(NewCallerPAL); } else { CallInst *CI = cast(Caller); - NC = Builder->CreateCall(Callee, Args); + NC = Builder->CreateCall(Callee, Args, OpBundles); NC->takeName(CI); if (CI->isTailCall()) cast(NC)->setTailCall(); diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 48ab0eb2c1b9..da835a192322 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -21,11 +21,11 @@ using namespace PatternMatch; #define DEBUG_TYPE "instcombine" -/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear -/// expression. If so, decompose it, returning some value X, such that Val is +/// Analyze 'Val', seeing if it is a simple linear expression. +/// If so, decompose it, returning some value X, such that Val is /// X*Scale+Offset. /// -static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, +static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale, uint64_t &Offset) { if (ConstantInt *CI = dyn_cast(Val)) { Offset = CI->getZExtValue(); @@ -62,7 +62,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, // where C1 is divisible by C2. unsigned SubScale; Value *SubVal = - DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); + decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); Offset += RHS->getZExtValue(); Scale = SubScale; return SubVal; @@ -76,14 +76,14 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, return Val; } -/// PromoteCastOfAllocation - If we find a cast of an allocation instruction, -/// try to eliminate the cast by moving the type information into the alloc. +/// If we find a cast of an allocation instruction, try to eliminate the cast by +/// moving the type information into the alloc. Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI) { PointerType *PTy = cast(CI.getType()); BuilderTy AllocaBuilder(*Builder); - AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); + AllocaBuilder.SetInsertPoint(&AI); // Get the type really allocated and the type casted to. Type *AllocElTy = AI.getAllocatedType(); @@ -114,7 +114,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, unsigned ArraySizeScale; uint64_t ArrayOffset; Value *NumElements = // See if the array size is a decomposable linear expr. - DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); + decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); // If we can now satisfy the modulus, by using a non-1 scale, we really can // do the xform. @@ -154,9 +154,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, return ReplaceInstUsesWith(CI, New); } -/// EvaluateInDifferentType - Given an expression that -/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually -/// insert the code to evaluate the expression. +/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns +/// true for, actually insert the code to evaluate the expression. Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned) { if (Constant *C = dyn_cast(V)) { @@ -261,9 +260,9 @@ isEliminableCastPair(const CastInst *CI, ///< First cast instruction return Instruction::CastOps(Res); } -/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually -/// results in any code being generated and is interesting to optimize out. If -/// the cast can be eliminated by some other simple transformation, we prefer +/// Return true if the cast from "V to Ty" actually results in any code being +/// generated and is interesting to optimize out. +/// If the cast can be eliminated by some other simple transformation, we prefer /// to do the simplification first. bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, Type *Ty) { @@ -318,9 +317,9 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { return nullptr; } -/// CanEvaluateTruncated - Return true if we can evaluate the specified -/// expression tree as type Ty instead of its larger type, and arrive with the -/// same value. This is used by code that tries to eliminate truncates. +/// Return true if we can evaluate the specified expression tree as type Ty +/// instead of its larger type, and arrive with the same value. +/// This is used by code that tries to eliminate truncates. /// /// Ty will always be a type smaller than V. We should return true if trunc(V) /// can be computed by computing V in the smaller type. If V is an instruction, @@ -329,7 +328,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { /// /// This function works on both vectors and scalars. /// -static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, +static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, Instruction *CxtI) { // We can always evaluate constants in another type. if (isa(V)) @@ -359,8 +358,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, case Instruction::Or: case Instruction::Xor: // These operators can all arbitrarily be extended or truncated. - return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) && - CanEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI); + return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) && + canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI); case Instruction::UDiv: case Instruction::URem: { @@ -371,8 +370,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth); if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) && IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) { - return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) && - CanEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI); + return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) && + canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI); } } break; @@ -383,7 +382,7 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { uint32_t BitWidth = Ty->getScalarSizeInBits(); if (CI->getLimitedValue(BitWidth) < BitWidth) - return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); + return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); } break; case Instruction::LShr: @@ -396,7 +395,7 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, if (IC.MaskedValueIsZero(I->getOperand(0), APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth), 0, CxtI) && CI->getLimitedValue(BitWidth) < BitWidth) { - return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); + return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); } } break; @@ -410,8 +409,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, return true; case Instruction::Select: { SelectInst *SI = cast(I); - return CanEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) && - CanEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI); + return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) && + canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI); } case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never @@ -419,7 +418,7 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, // instructions with a single use. PHINode *PN = cast(I); for (Value *IncValue : PN->incoming_values()) - if (!CanEvaluateTruncated(IncValue, Ty, IC, CxtI)) + if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI)) return false; return true; } @@ -431,6 +430,50 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, return false; } +/// Given a vector that is bitcast to an integer, optionally logically +/// right-shifted, and truncated, convert it to an extractelement. +/// Example (big endian): +/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32 +/// ---> +/// extractelement <4 x i32> %X, 1 +static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC, + const DataLayout &DL) { + Value *TruncOp = Trunc.getOperand(0); + Type *DestType = Trunc.getType(); + if (!TruncOp->hasOneUse() || !isa(DestType)) + return nullptr; + + Value *VecInput = nullptr; + ConstantInt *ShiftVal = nullptr; + if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)), + m_LShr(m_BitCast(m_Value(VecInput)), + m_ConstantInt(ShiftVal)))) || + !isa(VecInput->getType())) + return nullptr; + + VectorType *VecType = cast(VecInput->getType()); + unsigned VecWidth = VecType->getPrimitiveSizeInBits(); + unsigned DestWidth = DestType->getPrimitiveSizeInBits(); + unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0; + + if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0)) + return nullptr; + + // If the element type of the vector doesn't match the result type, + // bitcast it to a vector type that we can extract from. + unsigned NumVecElts = VecWidth / DestWidth; + if (VecType->getElementType() != DestType) { + VecType = VectorType::get(DestType, NumVecElts); + VecInput = IC.Builder->CreateBitCast(VecInput, VecType, "bc"); + } + + unsigned Elt = ShiftAmount / DestWidth; + if (DL.isBigEndian()) + Elt = NumVecElts - 1 - Elt; + + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); +} + Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *Result = commonCastTransforms(CI)) return Result; @@ -441,7 +484,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // min/max. Value *LHS, *RHS; if (SelectInst *SI = dyn_cast(CI.getOperand(0))) - if (matchSelectPattern(SI, LHS, RHS) != SPF_UNKNOWN) + if (matchSelectPattern(SI, LHS, RHS).Flavor != SPF_UNKNOWN) return nullptr; // See if we can simplify any instructions used by the input whose sole @@ -457,7 +500,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // expression tree to something weird like i93 unless the source is also // strange. if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && - CanEvaluateTruncated(Src, DestTy, *this, &CI)) { + canEvaluateTruncated(Src, DestTy, *this, &CI)) { // If this cast is a truncate, evaluting in a different type always // eliminates the cast, so it is always a win. @@ -470,7 +513,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector. if (DestTy->getScalarSizeInBits() == 1) { - Constant *One = ConstantInt::get(Src->getType(), 1); + Constant *One = ConstantInt::get(SrcTy, 1); Src = Builder->CreateAnd(Src, One); Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); @@ -489,31 +532,54 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // If the shift amount is larger than the size of A, then the result is // known to be zero because all the input bits got shifted out. if (Cst->getZExtValue() >= ASize) - return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType())); + return ReplaceInstUsesWith(CI, Constant::getNullValue(DestTy)); // Since we're doing an lshr and a zero extend, and know that the shift // amount is smaller than ASize, it is always safe to do the shift in A's // type, then zero extend or truncate to the result. Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue()); Shift->takeName(Src); - return CastInst::CreateIntegerCast(Shift, CI.getType(), false); + return CastInst::CreateIntegerCast(Shift, DestTy, false); + } + + // Transform trunc(lshr (sext A), Cst) to ashr A, Cst to eliminate type + // conversion. + // It works because bits coming from sign extension have the same value as + // the sign bit of the original value; performing ashr instead of lshr + // generates bits of the same value as the sign bit. + if (Src->hasOneUse() && + match(Src, m_LShr(m_SExt(m_Value(A)), m_ConstantInt(Cst))) && + cast(Src)->getOperand(0)->hasOneUse()) { + const unsigned ASize = A->getType()->getPrimitiveSizeInBits(); + // This optimization can be only performed when zero bits generated by + // the original lshr aren't pulled into the value after truncation, so we + // can only shift by values smaller than the size of destination type (in + // bits). + if (Cst->getValue().ult(ASize)) { + Value *Shift = Builder->CreateAShr(A, Cst->getZExtValue()); + Shift->takeName(Src); + return CastInst::CreateIntegerCast(Shift, CI.getType(), true); + } } // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest // type isn't non-native. - if (Src->hasOneUse() && isa(Src->getType()) && - ShouldChangeType(Src->getType(), CI.getType()) && + if (Src->hasOneUse() && isa(SrcTy) && + ShouldChangeType(SrcTy, DestTy) && match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) { - Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr"); + Value *NewTrunc = Builder->CreateTrunc(A, DestTy, A->getName() + ".tr"); return BinaryOperator::CreateAnd(NewTrunc, - ConstantExpr::getTrunc(Cst, CI.getType())); + ConstantExpr::getTrunc(Cst, DestTy)); } + if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL)) + return I; + return nullptr; } -/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations -/// in order to eliminate the icmp. +/// Transform (zext icmp) to bitwise / integer operations in order to eliminate +/// the icmp. Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, bool DoXform) { // If we are just checking for a icmp eq of a single bit and zext'ing it @@ -637,8 +703,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, return nullptr; } -/// CanEvaluateZExtd - Determine if the specified value can be computed in the -/// specified wider type and produce the same low bits. If not, return false. +/// Determine if the specified value can be computed in the specified wider type +/// and produce the same low bits. If not, return false. /// /// If this function returns true, it can also return a non-zero number of bits /// (in BitsToClear) which indicates that the value it computes is correct for @@ -655,7 +721,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, /// clear the top bits anyway, doing this has no extra cost. /// /// This function works on both vectors and scalars. -static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, +static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, InstCombiner &IC, Instruction *CxtI) { BitsToClear = 0; if (isa(V)) @@ -685,8 +751,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, case Instruction::Add: case Instruction::Sub: case Instruction::Mul: - if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) || - !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI)) + if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) || + !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI)) return false; // These can all be promoted if neither operand has 'bits to clear'. if (BitsToClear == 0 && Tmp == 0) @@ -713,7 +779,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, // We can promote shl(x, cst) if we can promote x. Since shl overwrites the // upper bits we can reduce BitsToClear by the shift amount. if (ConstantInt *Amt = dyn_cast(I->getOperand(1))) { - if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI)) + if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI)) return false; uint64_t ShiftAmt = Amt->getZExtValue(); BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0; @@ -724,7 +790,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, // We can promote lshr(x, cst) if we can promote x. This requires the // ultimate 'and' to clear out the high zero bits we're clearing out though. if (ConstantInt *Amt = dyn_cast(I->getOperand(1))) { - if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI)) + if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI)) return false; BitsToClear += Amt->getZExtValue(); if (BitsToClear > V->getType()->getScalarSizeInBits()) @@ -734,8 +800,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, // Cannot promote variable LSHR. return false; case Instruction::Select: - if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) || - !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) || + if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) || + !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) || // TODO: If important, we could handle the case when the BitsToClear are // known zero in the disagreeing side. Tmp != BitsToClear) @@ -747,10 +813,10 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, // get into trouble with cyclic PHIs here because we only consider // instructions with a single use. PHINode *PN = cast(I); - if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI)) + if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI)) return false; for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) - if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) || + if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) || // TODO: If important, we could handle the case when the BitsToClear // are known zero in the disagreeing input. Tmp != BitsToClear) @@ -787,13 +853,13 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // strange. unsigned BitsToClear; if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && - CanEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) { + canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) { assert(BitsToClear < SrcTy->getScalarSizeInBits() && "Unreasonable BitsToClear"); // Okay, we can transform this! Insert the new expression now. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" - " to avoid zero extend: " << CI); + " to avoid zero extend: " << CI << '\n'); Value *Res = EvaluateInDifferentType(Src, DestTy, false); assert(Res->getType() == DestTy); @@ -897,8 +963,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { return nullptr; } -/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations -/// in order to eliminate the icmp. +/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp. Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1); ICmpInst::Predicate Pred = ICI->getPredicate(); @@ -985,15 +1050,14 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { return nullptr; } -/// CanEvaluateSExtd - Return true if we can take the specified value -/// and return it as type Ty without inserting any new casts and without -/// changing the value of the common low bits. This is used by code that tries -/// to promote integer operations to a wider types will allow us to eliminate -/// the extension. +/// Return true if we can take the specified value and return it as type Ty +/// without inserting any new casts and without changing the value of the common +/// low bits. This is used by code that tries to promote integer operations to +/// a wider types will allow us to eliminate the extension. /// /// This function works on both vectors and scalars. /// -static bool CanEvaluateSExtd(Value *V, Type *Ty) { +static bool canEvaluateSExtd(Value *V, Type *Ty) { assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() && "Can't sign extend type to a smaller type"); // If this is a constant, it can be trivially promoted. @@ -1023,15 +1087,15 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { case Instruction::Sub: case Instruction::Mul: // These operators can all arbitrarily be extended if their inputs can. - return CanEvaluateSExtd(I->getOperand(0), Ty) && - CanEvaluateSExtd(I->getOperand(1), Ty); + return canEvaluateSExtd(I->getOperand(0), Ty) && + canEvaluateSExtd(I->getOperand(1), Ty); //case Instruction::Shl: TODO //case Instruction::LShr: TODO case Instruction::Select: - return CanEvaluateSExtd(I->getOperand(1), Ty) && - CanEvaluateSExtd(I->getOperand(2), Ty); + return canEvaluateSExtd(I->getOperand(1), Ty) && + canEvaluateSExtd(I->getOperand(2), Ty); case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never @@ -1039,7 +1103,7 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { // instructions with a single use. PHINode *PN = cast(I); for (Value *IncValue : PN->incoming_values()) - if (!CanEvaluateSExtd(IncValue, Ty)) return false; + if (!canEvaluateSExtd(IncValue, Ty)) return false; return true; } default: @@ -1081,10 +1145,10 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // expression tree to something weird like i93 unless the source is also // strange. if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && - CanEvaluateSExtd(Src, DestTy)) { + canEvaluateSExtd(Src, DestTy)) { // Okay, we can transform this! Insert the new expression now. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" - " to avoid sign extend: " << CI); + " to avoid sign extend: " << CI << '\n'); Value *Res = EvaluateInDifferentType(Src, DestTy, true); assert(Res->getType() == DestTy); @@ -1149,9 +1213,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { } -/// FitsInFPType - Return a Constant* for the specified FP constant if it fits +/// Return a Constant* for the specified floating-point constant if it fits /// in the specified FP type without changing its value. -static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) { +static Constant *fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) { bool losesInfo; APFloat F = CFP->getValueAPF(); (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo); @@ -1160,12 +1224,12 @@ static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) { return nullptr; } -/// LookThroughFPExtensions - If this is an fp extension instruction, look +/// If this is a floating-point extension instruction, look /// through it until we get the source value. -static Value *LookThroughFPExtensions(Value *V) { +static Value *lookThroughFPExtensions(Value *V) { if (Instruction *I = dyn_cast(V)) if (I->getOpcode() == Instruction::FPExt) - return LookThroughFPExtensions(I->getOperand(0)); + return lookThroughFPExtensions(I->getOperand(0)); // If this value is a constant, return the constant in the smallest FP type // that can accurately represent it. This allows us to turn @@ -1174,14 +1238,14 @@ static Value *LookThroughFPExtensions(Value *V) { if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext())) return V; // No constant folding of this. // See if the value can be truncated to half and then reextended. - if (Value *V = FitsInFPType(CFP, APFloat::IEEEhalf)) + if (Value *V = fitsInFPType(CFP, APFloat::IEEEhalf)) return V; // See if the value can be truncated to float and then reextended. - if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle)) + if (Value *V = fitsInFPType(CFP, APFloat::IEEEsingle)) return V; if (CFP->getType()->isDoubleTy()) return V; // Won't shrink. - if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble)) + if (Value *V = fitsInFPType(CFP, APFloat::IEEEdouble)) return V; // Don't try to shrink to various long double types. } @@ -1193,7 +1257,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { if (Instruction *I = commonCastTransforms(CI)) return I; // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to - // simpilify this expression to avoid one or more of the trunc/extend + // simplify this expression to avoid one or more of the trunc/extend // operations if we can do so without changing the numerical results. // // The exact manner in which the widths of the operands interact to limit @@ -1201,8 +1265,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // is explained below in the various case statements. BinaryOperator *OpI = dyn_cast(CI.getOperand(0)); if (OpI && OpI->hasOneUse()) { - Value *LHSOrig = LookThroughFPExtensions(OpI->getOperand(0)); - Value *RHSOrig = LookThroughFPExtensions(OpI->getOperand(1)); + Value *LHSOrig = lookThroughFPExtensions(OpI->getOperand(0)); + Value *RHSOrig = lookThroughFPExtensions(OpI->getOperand(1)); unsigned OpWidth = OpI->getType()->getFPMantissaWidth(); unsigned LHSWidth = LHSOrig->getType()->getFPMantissaWidth(); unsigned RHSWidth = RHSOrig->getType()->getFPMantissaWidth(); @@ -1307,10 +1371,16 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // (fptrunc (select cond, R1, Cst)) --> // (select cond, (fptrunc R1), (fptrunc Cst)) + // + // - but only if this isn't part of a min/max operation, else we'll + // ruin min/max canonical form which is to have the select and + // compare's operands be of the same type with no casts to look through. + Value *LHS, *RHS; SelectInst *SI = dyn_cast(CI.getOperand(0)); if (SI && (isa(SI->getOperand(1)) || - isa(SI->getOperand(2)))) { + isa(SI->getOperand(2))) && + matchSelectPattern(SI, LHS, RHS).Flavor == SPF_UNKNOWN) { Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1), CI.getType()); Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2), @@ -1327,9 +1397,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0), CI.getType()); Type *IntrinsicType[] = { CI.getType() }; - Function *Overload = - Intrinsic::getDeclaration(CI.getParent()->getParent()->getParent(), - II->getIntrinsicID(), IntrinsicType); + Function *Overload = Intrinsic::getDeclaration( + CI.getModule(), II->getIntrinsicID(), IntrinsicType); Value *Args[] = { InnerTrunc }; return CallInst::Create(Overload, Args, II->getName()); @@ -1483,12 +1552,12 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false); } -/// OptimizeVectorResize - This input value (which is known to have vector type) -/// is being zero extended or truncated to the specified vector type. Try to -/// replace it with a shuffle (and vector/vector bitcast) if possible. +/// This input value (which is known to have vector type) is being zero extended +/// or truncated to the specified vector type. +/// Try to replace it with a shuffle (and vector/vector bitcast) if possible. /// /// The source and destination vector types may have different element types. -static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, +static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy, InstCombiner &IC) { // We can only do this optimization if the output is a multiple of the input // element size, or the input is a multiple of the output element size. @@ -1548,8 +1617,8 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) { return Value / Ty->getPrimitiveSizeInBits(); } -/// CollectInsertionElements - V is a value which is inserted into a vector of -/// VecEltTy. Look through the value to see if we can decompose it into +/// V is a value which is inserted into a vector of VecEltTy. +/// Look through the value to see if we can decompose it into /// insertions into the vector. See the example in the comment for /// OptimizeIntegerToVectorInsertions for the pattern this handles. /// The type of V is always a non-zero multiple of VecEltTy's size. @@ -1558,7 +1627,7 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) { /// /// This returns false if the pattern can't be matched or true if it can, /// filling in Elements with the elements found here. -static bool CollectInsertionElements(Value *V, unsigned Shift, +static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl &Elements, Type *VecEltTy, bool isBigEndian) { assert(isMultipleOfTypeSize(Shift, VecEltTy) && @@ -1595,7 +1664,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, // If the constant is the size of a vector element, we just need to bitcast // it to the right type so it gets properly inserted. if (NumElts == 1) - return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), + return collectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), Shift, Elements, VecEltTy, isBigEndian); // Okay, this is a constant that covers multiple elements. Slice it up into @@ -1611,7 +1680,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), ShiftI)); Piece = ConstantExpr::getTrunc(Piece, ElementIntTy); - if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, + if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy, isBigEndian)) return false; } @@ -1625,19 +1694,19 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, switch (I->getOpcode()) { default: return false; // Unhandled case. case Instruction::BitCast: - return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, isBigEndian); case Instruction::ZExt: if (!isMultipleOfTypeSize( I->getOperand(0)->getType()->getPrimitiveSizeInBits(), VecEltTy)) return false; - return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, isBigEndian); case Instruction::Or: - return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, isBigEndian) && - CollectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy, + collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy, isBigEndian); case Instruction::Shl: { // Must be shifting by a constant that is a multiple of the element size. @@ -1645,7 +1714,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, if (!CI) return false; Shift += CI->getZExtValue(); if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false; - return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, isBigEndian); } @@ -1653,8 +1722,8 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, } -/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we -/// may be doing shifts and ors to assemble the elements of the vector manually. +/// If the input is an 'or' instruction, we may be doing shifts and ors to +/// assemble the elements of the vector manually. /// Try to rip the code out and replace it with insertelements. This is to /// optimize code like this: /// @@ -1667,13 +1736,13 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, /// %tmp43 = bitcast i64 %ins35 to <2 x float> /// /// Into two insertelements that do "buildvector{%inc, %inc5}". -static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, +static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombiner &IC) { VectorType *DestVecTy = cast(CI.getType()); Value *IntInput = CI.getOperand(0); SmallVector Elements(DestVecTy->getNumElements()); - if (!CollectInsertionElements(IntInput, 0, Elements, + if (!collectInsertionElements(IntInput, 0, Elements, DestVecTy->getElementType(), IC.getDataLayout().isBigEndian())) return nullptr; @@ -1692,63 +1761,29 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, return Result; } - -/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double -/// bitcast. The various long double bitcasts can't get in here. -static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI, InstCombiner &IC, +/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the +/// vector followed by extract element. The backend tends to handle bitcasts of +/// vectors better than bitcasts of scalars because vector registers are +/// usually not type-specific like scalar integer or scalar floating-point. +static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, + InstCombiner &IC, const DataLayout &DL) { - Value *Src = CI.getOperand(0); - Type *DestTy = CI.getType(); + // TODO: Create and use a pattern matcher for ExtractElementInst. + auto *ExtElt = dyn_cast(BitCast.getOperand(0)); + if (!ExtElt || !ExtElt->hasOneUse()) + return nullptr; - // If this is a bitcast from int to float, check to see if the int is an - // extraction from a vector. - Value *VecInput = nullptr; - // bitcast(trunc(bitcast(somevector))) - if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) && - isa(VecInput->getType())) { - VectorType *VecTy = cast(VecInput->getType()); - unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); + // The bitcast must be to a vectorizable type, otherwise we can't make a new + // type to extract from. + Type *DestType = BitCast.getType(); + if (!VectorType::isValidElementType(DestType)) + return nullptr; - if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) { - // If the element type of the vector doesn't match the result type, - // bitcast it to be a vector type we can extract from. - if (VecTy->getElementType() != DestTy) { - VecTy = VectorType::get(DestTy, - VecTy->getPrimitiveSizeInBits() / DestWidth); - VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); - } - - unsigned Elt = 0; - if (DL.isBigEndian()) - Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1; - return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); - } - } - - // bitcast(trunc(lshr(bitcast(somevector), cst)) - ConstantInt *ShAmt = nullptr; - if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), - m_ConstantInt(ShAmt)))) && - isa(VecInput->getType())) { - VectorType *VecTy = cast(VecInput->getType()); - unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); - if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 && - ShAmt->getZExtValue() % DestWidth == 0) { - // If the element type of the vector doesn't match the result type, - // bitcast it to be a vector type we can extract from. - if (VecTy->getElementType() != DestTy) { - VecTy = VectorType::get(DestTy, - VecTy->getPrimitiveSizeInBits() / DestWidth); - VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); - } - - unsigned Elt = ShAmt->getZExtValue() / DestWidth; - if (DL.isBigEndian()) - Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt; - return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); - } - } - return nullptr; + unsigned NumElts = ExtElt->getVectorOperandType()->getNumElements(); + auto *NewVecType = VectorType::get(DestType, NumElts); + auto *NewBC = IC.Builder->CreateBitCast(ExtElt->getVectorOperand(), + NewVecType, "bc"); + return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand()); } Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { @@ -1794,11 +1829,6 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } } - // Try to optimize int -> float bitcasts. - if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa(SrcTy)) - if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this, DL)) - return I; - if (VectorType *DestVTy = dyn_cast(DestTy)) { if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); @@ -1815,7 +1845,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { CastInst *SrcCast = cast(Src); if (BitCastInst *BCIn = dyn_cast(SrcCast->getOperand(0))) if (isa(BCIn->getOperand(0)->getType())) - if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0), + if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0), cast(DestTy), *this)) return I; } @@ -1823,7 +1853,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the input is an 'or' instruction, we may be doing shifts and ors to // assemble the elements of the vector manually. Try to rip the code out // and replace it with insertelements. - if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this)) + if (Value *V = optimizeIntegerToVectorInsertions(CI, *this)) return ReplaceInstUsesWith(CI, V); } } @@ -1872,6 +1902,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } } + if (Instruction *I = canonicalizeBitCastExtElt(CI, *this, DL)) + return I; + if (SrcTy->isPointerTy()) return commonPointerCastTransforms(CI); return commonCastTransforms(CI); diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 95bba3c7af7d..c0786afe965e 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -216,8 +216,6 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, Max = KnownOne|UnknownBits; } - - /// FoldCmpLoadFromIndexedGlobal - Called we see this pattern: /// cmp pred (load (gep GV, ...)), cmpcst /// where GV is a global variable with a constant initializer. Try to simplify @@ -371,7 +369,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, } } - // If this element is in range, update our magic bitvector. if (i < 64 && IsTrueForElt) MagicBitvector |= 1ULL << i; @@ -469,7 +466,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End); } - // If a magic bitvector captures the entire comparison state // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 @@ -496,7 +492,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, return nullptr; } - /// EvaluateGEPOffsetExpression - Return a value that can be used to compare /// the *offset* implied by a GEP to zero. For example, if we have &A[i], we /// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can @@ -562,8 +557,6 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC, } } - - // Okay, we know we have a single variable index, which must be a // pointer/array/vector index. If there is no offset, life is simple, return // the index. @@ -737,6 +730,83 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, return nullptr; } +Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca, + Value *Other) { + assert(ICI.isEquality() && "Cannot fold non-equality comparison."); + + // It would be tempting to fold away comparisons between allocas and any + // pointer not based on that alloca (e.g. an argument). However, even + // though such pointers cannot alias, they can still compare equal. + // + // But LLVM doesn't specify where allocas get their memory, so if the alloca + // doesn't escape we can argue that it's impossible to guess its value, and we + // can therefore act as if any such guesses are wrong. + // + // The code below checks that the alloca doesn't escape, and that it's only + // used in a comparison once (the current instruction). The + // single-comparison-use condition ensures that we're trivially folding all + // comparisons against the alloca consistently, and avoids the risk of + // erroneously folding a comparison of the pointer with itself. + + unsigned MaxIter = 32; // Break cycles and bound to constant-time. + + SmallVector Worklist; + for (Use &U : Alloca->uses()) { + if (Worklist.size() >= MaxIter) + return nullptr; + Worklist.push_back(&U); + } + + unsigned NumCmps = 0; + while (!Worklist.empty()) { + assert(Worklist.size() <= MaxIter); + Use *U = Worklist.pop_back_val(); + Value *V = U->getUser(); + --MaxIter; + + if (isa(V) || isa(V) || isa(V) || + isa(V)) { + // Track the uses. + } else if (isa(V)) { + // Loading from the pointer doesn't escape it. + continue; + } else if (auto *SI = dyn_cast(V)) { + // Storing *to* the pointer is fine, but storing the pointer escapes it. + if (SI->getValueOperand() == U->get()) + return nullptr; + continue; + } else if (isa(V)) { + if (NumCmps++) + return nullptr; // Found more than one cmp. + continue; + } else if (auto *Intrin = dyn_cast(V)) { + switch (Intrin->getIntrinsicID()) { + // These intrinsics don't escape or compare the pointer. Memset is safe + // because we don't allow ptrtoint. Memcpy and memmove are safe because + // we don't allow stores, so src cannot point to V. + case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::dbg_declare: case Intrinsic::dbg_value: + case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: + continue; + default: + return nullptr; + } + } else { + return nullptr; + } + for (Use &U : V->uses()) { + if (Worklist.size() >= MaxIter) + return nullptr; + Worklist.push_back(&U); + } + } + + Type *CmpTy = CmpInst::makeCmpResultType(Other->getType()); + return ReplaceInstUsesWith( + ICI, + ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate()))); +} + /// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI, Value *X, ConstantInt *CI, @@ -851,7 +921,6 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // to the same result value. HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false); } - } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. if (CmpRHSV == 0) { // (X / pos) op 0 // Can't overflow. e.g. X/2 op 0 --> [-1, 2) @@ -996,7 +1065,6 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, return Res; } - // If we are comparing against bits always shifted out, the // comparison cannot succeed. APInt Comp = CmpRHSV << ShAmtVal; @@ -1074,18 +1142,22 @@ Instruction *InstCombiner::FoldICmpCstShrCst(ICmpInst &I, Value *Op, Value *A, if (AP1 == AP2) return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType())); - // Get the distance between the highest bit that's set. int Shift; - // Both the constants are negative, take their positive to calculate log. if (IsAShr && AP1.isNegative()) - // Get the ones' complement of AP2 and AP1 when computing the distance. - Shift = (~AP2).logBase2() - (~AP1).logBase2(); + Shift = AP1.countLeadingOnes() - AP2.countLeadingOnes(); else - Shift = AP2.logBase2() - AP1.logBase2(); + Shift = AP1.countLeadingZeros() - AP2.countLeadingZeros(); if (Shift > 0) { - if (IsAShr ? AP1 == AP2.ashr(Shift) : AP1 == AP2.lshr(Shift)) + if (IsAShr && AP1 == AP2.ashr(Shift)) { + // There are multiple solutions if we are comparing against -1 and the LHS + // of the ashr is not a power of two. + if (AP1.isAllOnesValue() && !AP2.isPowerOf2()) + return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift)); return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift)); + } else if (AP1 == AP2.lshr(Shift)) { + return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift)); + } } // Shifting const2 will never be equal to const1. return getConstant(false); @@ -1145,6 +1217,14 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, switch (LHSI->getOpcode()) { case Instruction::Trunc: + if (RHS->isOne() && RHSV.getBitWidth() > 1) { + // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1 + Value *V = nullptr; + if (ICI.getPredicate() == ICmpInst::ICMP_SLT && + match(LHSI->getOperand(0), m_Signum(m_Value(V)))) + return new ICmpInst(ICmpInst::ICMP_SLT, V, + ConstantInt::get(V->getType(), 1)); + } if (ICI.isEquality() && LHSI->hasOneUse()) { // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all // of the high bits truncated out of x are known. @@ -1447,9 +1527,35 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, ICI.getPredicate() == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULE, LHSI->getOperand(0), SubOne(RHS)); + + // (icmp eq (and %A, C), 0) -> (icmp sgt (trunc %A), -1) + // iff C is a power of 2 + if (ICI.isEquality() && LHSI->hasOneUse() && match(RHS, m_Zero())) { + if (auto *CI = dyn_cast(LHSI->getOperand(1))) { + const APInt &AI = CI->getValue(); + int32_t ExactLogBase2 = AI.exactLogBase2(); + if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) { + Type *NTy = IntegerType::get(ICI.getContext(), ExactLogBase2 + 1); + Value *Trunc = Builder->CreateTrunc(LHSI->getOperand(0), NTy); + return new ICmpInst(ICI.getPredicate() == ICmpInst::ICMP_EQ + ? ICmpInst::ICMP_SGE + : ICmpInst::ICMP_SLT, + Trunc, Constant::getNullValue(NTy)); + } + } + } break; case Instruction::Or: { + if (RHS->isOne()) { + // icmp slt signum(V) 1 --> icmp slt V, 1 + Value *V = nullptr; + if (ICI.getPredicate() == ICmpInst::ICMP_SLT && + match(LHSI, m_Signum(m_Value(V)))) + return new ICmpInst(ICmpInst::ICMP_SLT, V, + ConstantInt::get(V->getType(), 1)); + } + if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse()) break; Value *P, *Q; @@ -2083,11 +2189,9 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // If the pattern matches, truncate the inputs to the narrower type and // use the sadd_with_overflow intrinsic to efficiently compute both the // result and the overflow bit. - Module *M = I.getParent()->getParent()->getParent(); - Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); - Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, - NewType); + Value *F = Intrinsic::getDeclaration(I.getModule(), + Intrinsic::sadd_with_overflow, NewType); InstCombiner::BuilderTy *Builder = IC.Builder; @@ -2123,6 +2227,12 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, return true; }; + // If the overflow check was an add followed by a compare, the insertion point + // may be pointing to the compare. We want to insert the new instructions + // before the add in case there are uses of the add between the add and the + // compare. + Builder->SetInsertPoint(&OrigI); + switch (OCF) { case OCF_INVALID: llvm_unreachable("bad overflow check kind!"); @@ -2223,7 +2333,9 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal, assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal); assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal); - Instruction *MulInstr = cast(MulVal); + auto *MulInstr = dyn_cast(MulVal); + if (!MulInstr) + return nullptr; assert(MulInstr->getOpcode() == Instruction::Mul); auto *LHS = cast(MulInstr->getOperand(0)), @@ -2357,7 +2469,6 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal, InstCombiner::BuilderTy *Builder = IC.Builder; Builder->SetInsertPoint(MulInstr); - Module *M = I.getParent()->getParent()->getParent(); // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B) Value *MulA = A, *MulB = B; @@ -2365,8 +2476,8 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal, MulA = Builder->CreateZExt(A, MulType); if (WidthB < MulWidth) MulB = Builder->CreateZExt(B, MulType); - Value *F = - Intrinsic::getDeclaration(M, Intrinsic::umul_with_overflow, MulType); + Value *F = Intrinsic::getDeclaration(I.getModule(), + Intrinsic::umul_with_overflow, MulType); CallInst *Call = Builder->CreateCall(F, {MulA, MulB}, "umul"); IC.Worklist.Add(MulInstr); @@ -2468,7 +2579,6 @@ static APInt DemandedBitsLHSMask(ICmpInst &I, default: return APInt::getAllOnesValue(BitWidth); } - } /// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst @@ -2905,7 +3015,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { ConstantInt::get(X->getType(), CI->countTrailingZeros())); } - break; } case ICmpInst::ICMP_NE: { @@ -2950,7 +3059,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { ConstantInt::get(X->getType(), CI->countTrailingZeros())); } - break; } case ICmpInst::ICMP_ULT: @@ -3103,7 +3211,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // comparison into the select arms, which will cause one to be // constant folded and the select turned into a bitwise or. Value *Op1 = nullptr, *Op2 = nullptr; - ConstantInt *CI = 0; + ConstantInt *CI = nullptr; if (Constant *C = dyn_cast(LHSI->getOperand(1))) { Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); CI = dyn_cast(Op1); @@ -3177,6 +3285,17 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { ICmpInst::getSwappedPredicate(I.getPredicate()), I)) return NI; + // Try to optimize equality comparisons against alloca-based pointers. + if (Op0->getType()->isPointerTy() && I.isEquality()) { + assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?"); + if (auto *Alloca = dyn_cast(GetUnderlyingObject(Op0, DL))) + if (Instruction *New = FoldAllocaCmp(I, Alloca, Op1)) + return New; + if (auto *Alloca = dyn_cast(GetUnderlyingObject(Op1, DL))) + if (Instruction *New = FoldAllocaCmp(I, Alloca, Op0)) + return New; + } + // Test to see if the operands of the icmp are casted versions of other // values. If the ptr->ptr cast can be stripped off both arguments, we do so // now. @@ -3304,6 +3423,26 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { match(B, m_One())) return new ICmpInst(CmpInst::ICMP_SGE, A, Op1); + // icmp sgt X, (Y + -1) -> icmp sge X, Y + if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT && + match(D, m_AllOnes())) + return new ICmpInst(CmpInst::ICMP_SGE, Op0, C); + + // icmp sle X, (Y + -1) -> icmp slt X, Y + if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE && + match(D, m_AllOnes())) + return new ICmpInst(CmpInst::ICMP_SLT, Op0, C); + + // icmp sge X, (Y + 1) -> icmp sgt X, Y + if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && + match(D, m_One())) + return new ICmpInst(CmpInst::ICMP_SGT, Op0, C); + + // icmp slt X, (Y + 1) -> icmp sle X, Y + if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && + match(D, m_One())) + return new ICmpInst(CmpInst::ICMP_SLE, Op0, C); + // if C1 has greater magnitude than C2: // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y // s.t. C3 = C1 - C2 @@ -3473,6 +3612,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } } + + if (BO0) { + // Transform A & (L - 1) `ult` L --> L != 0 + auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes()); + auto BitwiseAnd = + m_CombineOr(m_And(m_Value(), LSubOne), m_And(LSubOne, m_Value())); + + if (match(BO0, BitwiseAnd) && I.getPredicate() == ICmpInst::ICMP_ULT) { + auto *Zero = Constant::getNullValue(BO0->getType()); + return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero); + } + } } { Value *A, *B; @@ -3697,15 +3848,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, IntegerType *IntTy = cast(LHSI->getOperand(0)->getType()); - // Check to see that the input is converted from an integer type that is small - // enough that preserves all bits. TODO: check here for "known" sign bits. - // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. - unsigned InputSize = IntTy->getScalarSizeInBits(); - - // If this is a uitofp instruction, we need an extra bit to hold the sign. bool LHSUnsigned = isa(LHSI); - if (LHSUnsigned) - ++InputSize; if (I.isEquality()) { FCmpInst::Predicate P = I.getPredicate(); @@ -3732,13 +3875,30 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, // equality compares as integer? } - // Comparisons with zero are a special case where we know we won't lose - // information. - bool IsCmpZero = RHS.isPosZero(); + // Check to see that the input is converted from an integer type that is small + // enough that preserves all bits. TODO: check here for "known" sign bits. + // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. + unsigned InputSize = IntTy->getScalarSizeInBits(); - // If the conversion would lose info, don't hack on this. - if ((int)InputSize > MantissaWidth && !IsCmpZero) - return nullptr; + // Following test does NOT adjust InputSize downwards for signed inputs, + // because the most negative value still requires all the mantissa bits + // to distinguish it from one less than that value. + if ((int)InputSize > MantissaWidth) { + // Conversion would lose accuracy. Check if loss can impact comparison. + int Exp = ilogb(RHS); + if (Exp == APFloat::IEK_Inf) { + int MaxExponent = ilogb(APFloat::getLargest(RHS.getSemantics())); + if (MaxExponent < (int)InputSize - !LHSUnsigned) + // Conversion could create infinity. + return nullptr; + } else { + // Note that if RHS is zero or NaN, then Exp is negative + // and first condition is trivially false. + if (MantissaWidth <= Exp && Exp <= (int)InputSize - !LHSUnsigned) + // Conversion could affect comparison. + return nullptr; + } + } // Otherwise, we can potentially simplify the comparison. We know that it // will always come through as an integer value and we know the constant is diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index ac934f1bd85c..534f67008150 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -281,6 +281,7 @@ public: ICmpInst::Predicate Pred); Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I); + Instruction *FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca, Value *Other); Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1, BinaryOperator &I); Instruction *commonCastTransforms(CastInst &CI); @@ -341,6 +342,7 @@ public: const unsigned SIOpd); private: + bool ShouldChangeType(unsigned FromBitWidth, unsigned ToBitWidth) const; bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const; @@ -360,6 +362,11 @@ private: /// \brief Try to optimize a sequence of instructions checking if an operation /// on LHS and RHS overflows. /// + /// If this overflow check is done via one of the overflow check intrinsics, + /// then CtxI has to be the call instruction calling that intrinsic. If this + /// overflow check is done by arithmetic followed by a compare, then CtxI has + /// to be the arithmetic instruction. + /// /// If a simplification is possible, stores the simplified result of the /// operation in OperationResult and result of the overflow check in /// OverflowResult, and return true. If no simplification is possible, @@ -393,7 +400,7 @@ public: assert(New && !New->getParent() && "New instruction already inserted into a basic block!"); BasicBlock *BB = Old.getParent(); - BB->getInstList().insert(&Old, New); // Insert inst + BB->getInstList().insert(Old.getIterator(), New); // Insert inst Worklist.Add(New); return New; } @@ -539,6 +546,7 @@ private: Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN); Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN); Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); + Instruction *FoldPHIArgZextsIntoPHI(PHINode &PN); Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS, ConstantInt *AndRHS, BinaryOperator &TheAnd); @@ -548,7 +556,7 @@ private: Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned, bool Inside); Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI); - Instruction *MatchBSwap(BinaryOperator &I); + Instruction *MatchBSwapOrBitReverse(BinaryOperator &I); bool SimplifyStoreAtEndOfBlock(StoreInst &SI); Instruction *SimplifyMemTransfer(MemIntrinsic *MI); Instruction *SimplifyMemSet(MemSetInst *MI); diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index e3179dbeece8..47406b9a1632 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Loads.h" #include "llvm/IR/DataLayout.h" @@ -90,21 +91,23 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (CS.isCallee(&U)) continue; + unsigned DataOpNo = CS.getDataOperandNo(&U); + bool IsArgOperand = CS.isArgOperand(&U); + // Inalloca arguments are clobbered by the call. - unsigned ArgNo = CS.getArgumentNo(&U); - if (CS.isInAllocaArgument(ArgNo)) + if (IsArgOperand && CS.isInAllocaArgument(DataOpNo)) return false; // If this is a readonly/readnone call site, then we know it is just a // load (but one that potentially returns the value itself), so we can // ignore it if we know that the value isn't captured. if (CS.onlyReadsMemory() && - (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) + (CS.getInstruction()->use_empty() || CS.doesNotCapture(DataOpNo))) continue; // If this is being passed as a byval argument, the caller is making a // copy, so it is only a read of the alloca. - if (CS.isByValArgument(ArgNo)) + if (IsArgOperand && CS.isByValArgument(DataOpNo)) continue; } @@ -186,7 +189,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { // Scan to the end of the allocation instructions, to skip over a block of // allocas if possible...also skip interleaved debug info // - BasicBlock::iterator It = New; + BasicBlock::iterator It(New); while (isa(*It) || isa(*It)) ++It; @@ -367,7 +370,13 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT MDB.createRange(NonNullInt, NullInt)); } break; - + case LLVMContext::MD_align: + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: + // These only directly apply if the new type is also a pointer. + if (NewTy->isPointerTy()) + NewLoad->setMetadata(ID, N); + break; case LLVMContext::MD_range: // FIXME: It would be nice to propagate this in some way, but the type // conversions make it hard. If the new type is a pointer, we could @@ -418,6 +427,9 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value case LLVMContext::MD_invariant_load: case LLVMContext::MD_nonnull: case LLVMContext::MD_range: + case LLVMContext::MD_align: + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: // These don't apply for stores. break; } @@ -511,16 +523,46 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { if (!T->isAggregateType()) return nullptr; - assert(LI.getAlignment() && "Alignement must be set at this point"); + assert(LI.getAlignment() && "Alignment must be set at this point"); if (auto *ST = dyn_cast(T)) { // If the struct only have one element, we unpack. - if (ST->getNumElements() == 1) { + unsigned Count = ST->getNumElements(); + if (Count == 1) { LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U), ".unpack"); return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue( UndefValue::get(T), NewLoad, 0, LI.getName())); } + + // We don't want to break loads with padding here as we'd loose + // the knowledge that padding exists for the rest of the pipeline. + const DataLayout &DL = IC.getDataLayout(); + auto *SL = DL.getStructLayout(ST); + if (SL->hasPadding()) + return nullptr; + + auto Name = LI.getName(); + SmallString<16> LoadName = Name; + LoadName += ".unpack"; + SmallString<16> EltName = Name; + EltName += ".elt"; + auto *Addr = LI.getPointerOperand(); + Value *V = UndefValue::get(T); + auto *IdxType = Type::getInt32Ty(ST->getContext()); + auto *Zero = ConstantInt::get(IdxType, 0); + for (unsigned i = 0; i < Count; i++) { + Value *Indices[2] = { + Zero, + ConstantInt::get(IdxType, i), + }; + auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), EltName); + auto *L = IC.Builder->CreateLoad(ST->getTypeAtIndex(i), Ptr, LoadName); + V = IC.Builder->CreateInsertValue(V, L, i); + } + + V->setName(Name); + return IC.ReplaceInstUsesWith(LI, V); } if (auto *AT = dyn_cast(T)) { @@ -681,7 +723,7 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, // FIXME: If the GEP is not inbounds, and there are extra indices after the // one we'll replace, those could cause the address computation to wrap // (rendering the IsAllNonNegative() check below insufficient). We can do - // better, ignoring zero indicies (and other indicies we can prove small + // better, ignoring zero indices (and other indices we can prove small // enough not to wrap). if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds()) return false; @@ -748,19 +790,19 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // Do really simple store-to-load forwarding and load CSE, to catch cases // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. - BasicBlock::iterator BBI = &LI; + BasicBlock::iterator BBI(LI); AAMDNodes AATags; - if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI, - 6, AA, &AATags)) { + if (Value *AvailableVal = + FindAvailableLoadedValue(Op, LI.getParent(), BBI, + DefMaxInstsToScan, AA, &AATags)) { if (LoadInst *NLI = dyn_cast(AvailableVal)) { unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, - LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, - LLVMContext::MD_range, - LLVMContext::MD_invariant_load, - LLVMContext::MD_nonnull, - }; + LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_range, + LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group, LLVMContext::MD_align, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null}; combineMetadata(NLI, &LI, KnownIDs); }; @@ -822,7 +864,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { } // load (select (cond, null, P)) -> load P - if (isa(SI->getOperand(1)) && + if (isa(SI->getOperand(1)) && LI.getPointerAddressSpace() == 0) { LI.setOperand(0, SI->getOperand(2)); return &LI; @@ -857,7 +899,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { /// /// \returns true if the store was successfully combined away. This indicates /// the caller must erase the store instruction. We have to let the caller erase -/// the store instruction sas otherwise there is no way to signal whether it was +/// the store instruction as otherwise there is no way to signal whether it was /// combined or not: IC.EraseInstFromFunction returns a null pointer. static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) { // FIXME: We could probably with some care handle both volatile and atomic @@ -893,11 +935,38 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { if (auto *ST = dyn_cast(T)) { // If the struct only have one element, we unpack. - if (ST->getNumElements() == 1) { + unsigned Count = ST->getNumElements(); + if (Count == 1) { V = IC.Builder->CreateExtractValue(V, 0); combineStoreToNewValue(IC, SI, V); return true; } + + // We don't want to break loads with padding here as we'd loose + // the knowledge that padding exists for the rest of the pipeline. + const DataLayout &DL = IC.getDataLayout(); + auto *SL = DL.getStructLayout(ST); + if (SL->hasPadding()) + return false; + + SmallString<16> EltName = V->getName(); + EltName += ".elt"; + auto *Addr = SI.getPointerOperand(); + SmallString<16> AddrName = Addr->getName(); + AddrName += ".repack"; + auto *IdxType = Type::getInt32Ty(ST->getContext()); + auto *Zero = ConstantInt::get(IdxType, 0); + for (unsigned i = 0; i < Count; i++) { + Value *Indices[2] = { + Zero, + ConstantInt::get(IdxType, i), + }; + auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), AddrName); + auto *Val = IC.Builder->CreateExtractValue(V, i, EltName); + IC.Builder->CreateStore(Val, Ptr); + } + + return true; } if (auto *AT = dyn_cast(T)) { @@ -971,9 +1040,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { return &SI; } - // Don't hack volatile/atomic stores. - // FIXME: Some bits are legal for atomic stores; needs refactoring. - if (!SI.isSimple()) return nullptr; + // Don't hack volatile/ordered stores. + // FIXME: Some bits are legal for ordered atomic stores; needs refactoring. + if (!SI.isUnordered()) return nullptr; // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. @@ -991,7 +1060,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // Do really simple DSE, to catch cases where there are several consecutive // stores to the same location, separated by a few arithmetic operations. This // situation often occurs with bitfield accesses. - BasicBlock::iterator BBI = &SI; + BasicBlock::iterator BBI(SI); for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; --ScanInsts) { --BBI; @@ -1005,7 +1074,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (StoreInst *PrevSI = dyn_cast(BBI)) { // Prev store isn't volatile, and stores to the same location? - if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), + if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1))) { ++NumDeadStore; ++BBI; @@ -1019,9 +1088,10 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // the pointer we're loading and is producing the pointer we're storing, // then *this* store is dead (X = load P; store X -> P). if (LoadInst *LI = dyn_cast(BBI)) { - if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && - LI->isSimple()) + if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) { + assert(SI.isUnordered() && "can't eliminate ordering operation"); return EraseInstFromFunction(SI); + } // Otherwise, this is a load from some other location. Stores before it // may not be dead. @@ -1047,10 +1117,14 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (isa(Val)) return EraseInstFromFunction(SI); + // The code below needs to be audited and adjusted for unordered atomics + if (!SI.isSimple()) + return nullptr; + // If this store is the last instruction in the basic block (possibly // excepting debug info instructions), and if the block ends with an // unconditional branch, try to move it to the successor block. - BBI = &SI; + BBI = SI.getIterator(); do { ++BBI; } while (isa(BBI) || @@ -1106,7 +1180,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { return false; // Verify that the other block ends in a branch and is not otherwise empty. - BasicBlock::iterator BBI = OtherBB->getTerminator(); + BasicBlock::iterator BBI(OtherBB->getTerminator()); BranchInst *OtherBr = dyn_cast(BBI); if (!OtherBr || BBI == OtherBB->begin()) return false; diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index a554e9f628e0..7ad0efc42fb4 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -22,9 +22,9 @@ using namespace PatternMatch; #define DEBUG_TYPE "instcombine" -/// simplifyValueKnownNonZero - The specific integer value is used in a context -/// where it is known to be non-zero. If this allows us to simplify the -/// computation, do so and return the new operand, otherwise return null. +/// The specific integer value is used in a context where it is known to be +/// non-zero. If this allows us to simplify the computation, do so and return +/// the new operand, otherwise return null. static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, Instruction &CxtI) { // If V has multiple uses, then we would have to do more analysis to determine @@ -76,8 +76,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, } -/// MultiplyOverflows - True if the multiply can not be expressed in an int -/// this size. +/// True if the multiply can not be expressed in an int this size. static bool MultiplyOverflows(const APInt &C1, const APInt &C2, APInt &Product, bool IsSigned) { bool Overflow; @@ -95,6 +94,14 @@ static bool IsMultiple(const APInt &C1, const APInt &C2, APInt &Quotient, assert(C1.getBitWidth() == C2.getBitWidth() && "Inconsistent width of constants!"); + // Bail if we will divide by zero. + if (C2.isMinValue()) + return false; + + // Bail if we would divide INT_MIN by -1. + if (IsSigned && C1.isMinSignedValue() && C2.isAllOnesValue()) + return false; + APInt Remainder(C1.getBitWidth(), /*Val=*/0ULL, IsSigned); if (IsSigned) APInt::sdivrem(C1, C2, Quotient, Remainder); @@ -705,8 +712,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { return Changed ? &I : nullptr; } -/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select -/// instruction. +/// Try to fold a divide or remainder of a select instruction. bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { SelectInst *SI = cast(I.getOperand(1)); @@ -740,7 +746,7 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { return true; // Scan the current block backward, looking for other uses of SI. - BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin(); + BasicBlock::iterator BBI = I.getIterator(), BBFront = I.getParent()->begin(); while (BBI != BBFront) { --BBI; @@ -754,10 +760,10 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { I != E; ++I) { if (*I == SI) { *I = SI->getOperand(NonNullOperand); - Worklist.Add(BBI); + Worklist.Add(&*BBI); } else if (*I == SelectCond) { *I = Builder->getInt1(NonNullOperand == 1); - Worklist.Add(BBI); + Worklist.Add(&*BBI); } } diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 460f6eb6a825..f1aa98b5e359 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "instcombine" @@ -245,7 +246,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { /// non-address-taken alloca. Doing so will cause us to not promote the alloca /// to a register. static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { - BasicBlock::iterator BBI = L, E = L->getParent()->end(); + BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end(); for (++BBI; BBI != E; ++BBI) if (BBI->mayWriteToMemory()) @@ -349,24 +350,40 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { Value *InVal = FirstLI->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); + LoadInst *NewLI = new LoadInst(NewPN, "", isVolatile, LoadAlignment); - // Add all operands to the new PHI. + unsigned KnownIDs[] = { + LLVMContext::MD_tbaa, + LLVMContext::MD_range, + LLVMContext::MD_invariant_load, + LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, + LLVMContext::MD_nonnull, + LLVMContext::MD_align, + LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null, + }; + + for (unsigned ID : KnownIDs) + NewLI->setMetadata(ID, FirstLI->getMetadata(ID)); + + // Add all operands to the new PHI and combine TBAA metadata. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { - Value *NewInVal = cast(PN.getIncomingValue(i))->getOperand(0); + LoadInst *LI = cast(PN.getIncomingValue(i)); + combineMetadata(NewLI, LI, KnownIDs); + Value *NewInVal = LI->getOperand(0); if (NewInVal != InVal) InVal = nullptr; NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); } - Value *PhiVal; if (InVal) { // The new PHI unions all of the same values together. This is really // common, so we handle it intelligently here for compile-time speed. - PhiVal = InVal; + NewLI->setOperand(0, InVal); delete NewPN; } else { InsertNewInstBefore(NewPN, PN); - PhiVal = NewPN; } // If this was a volatile load that we are merging, make sure to loop through @@ -376,17 +393,94 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { for (Value *IncValue : PN.incoming_values()) cast(IncValue)->setVolatile(false); - LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment); NewLI->setDebugLoc(FirstLI->getDebugLoc()); return NewLI; } +/// TODO: This function could handle other cast types, but then it might +/// require special-casing a cast from the 'i1' type. See the comment in +/// FoldPHIArgOpIntoPHI() about pessimizing illegal integer types. +Instruction *InstCombiner::FoldPHIArgZextsIntoPHI(PHINode &Phi) { + // We cannot create a new instruction after the PHI if the terminator is an + // EHPad because there is no valid insertion point. + if (TerminatorInst *TI = Phi.getParent()->getTerminator()) + if (TI->isEHPad()) + return nullptr; + // Early exit for the common case of a phi with two operands. These are + // handled elsewhere. See the comment below where we check the count of zexts + // and constants for more details. + unsigned NumIncomingValues = Phi.getNumIncomingValues(); + if (NumIncomingValues < 3) + return nullptr; + + // Find the narrower type specified by the first zext. + Type *NarrowType = nullptr; + for (Value *V : Phi.incoming_values()) { + if (auto *Zext = dyn_cast(V)) { + NarrowType = Zext->getSrcTy(); + break; + } + } + if (!NarrowType) + return nullptr; + + // Walk the phi operands checking that we only have zexts or constants that + // we can shrink for free. Store the new operands for the new phi. + SmallVector NewIncoming; + unsigned NumZexts = 0; + unsigned NumConsts = 0; + for (Value *V : Phi.incoming_values()) { + if (auto *Zext = dyn_cast(V)) { + // All zexts must be identical and have one use. + if (Zext->getSrcTy() != NarrowType || !Zext->hasOneUse()) + return nullptr; + NewIncoming.push_back(Zext->getOperand(0)); + NumZexts++; + } else if (auto *C = dyn_cast(V)) { + // Make sure that constants can fit in the new type. + Constant *Trunc = ConstantExpr::getTrunc(C, NarrowType); + if (ConstantExpr::getZExt(Trunc, C->getType()) != C) + return nullptr; + NewIncoming.push_back(Trunc); + NumConsts++; + } else { + // If it's not a cast or a constant, bail out. + return nullptr; + } + } + + // The more common cases of a phi with no constant operands or just one + // variable operand are handled by FoldPHIArgOpIntoPHI() and FoldOpIntoPhi() + // respectively. FoldOpIntoPhi() wants to do the opposite transform that is + // performed here. It tries to replicate a cast in the phi operand's basic + // block to expose other folding opportunities. Thus, InstCombine will + // infinite loop without this check. + if (NumConsts == 0 || NumZexts < 2) + return nullptr; + + // All incoming values are zexts or constants that are safe to truncate. + // Create a new phi node of the narrow type, phi together all of the new + // operands, and zext the result back to the original type. + PHINode *NewPhi = PHINode::Create(NarrowType, NumIncomingValues, + Phi.getName() + ".shrunk"); + for (unsigned i = 0; i != NumIncomingValues; ++i) + NewPhi->addIncoming(NewIncoming[i], Phi.getIncomingBlock(i)); + + InsertNewInstBefore(NewPhi, Phi); + return CastInst::CreateZExtOrBitCast(NewPhi, Phi.getType()); +} /// If all operands to a PHI node are the same "unary" operator and they all are /// only used by the PHI, PHI together their inputs, and do the operation once, /// to the result of the PHI. Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { + // We cannot create a new instruction after the PHI if the terminator is an + // EHPad because there is no valid insertion point. + if (TerminatorInst *TI = PN.getParent()->getTerminator()) + if (TI->isEHPad()) + return nullptr; + Instruction *FirstInst = cast(PN.getIncomingValue(0)); if (isa(FirstInst)) @@ -740,7 +834,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { } // Otherwise, do an extract in the predecessor. - Builder->SetInsertPoint(Pred, Pred->getTerminator()); + Builder->SetInsertPoint(Pred->getTerminator()); Value *Res = InVal; if (Offset) Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(), @@ -787,6 +881,9 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { if (Value *V = SimplifyInstruction(&PN, DL, TLI, DT, AC)) return ReplaceInstUsesWith(PN, V); + if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN)) + return Result; + // If all PHI operands are the same operation, pull them through the PHI, // reducing code size. if (isa(PN.getIncomingValue(0)) && diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index f51442a9f36d..776704d1efa9 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -38,7 +38,8 @@ getInverseMinMaxSelectPattern(SelectPatternFlavor SPF) { } } -static CmpInst::Predicate getICmpPredicateForMinMax(SelectPatternFlavor SPF) { +static CmpInst::Predicate getCmpPredicateForMinMax(SelectPatternFlavor SPF, + bool Ordered=false) { switch (SPF) { default: llvm_unreachable("unhandled!"); @@ -51,17 +52,22 @@ static CmpInst::Predicate getICmpPredicateForMinMax(SelectPatternFlavor SPF) { return ICmpInst::ICMP_SGT; case SPF_UMAX: return ICmpInst::ICMP_UGT; + case SPF_FMINNUM: + return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; + case SPF_FMAXNUM: + return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; } } static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy *Builder, SelectPatternFlavor SPF, Value *A, Value *B) { - CmpInst::Predicate Pred = getICmpPredicateForMinMax(SPF); + CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF); + assert(CmpInst::isIntPredicate(Pred)); return Builder->CreateSelect(Builder->CreateICmp(Pred, A, B), A, B); } -/// GetSelectFoldableOperands - We want to turn code that looks like this: +/// We want to turn code that looks like this: /// %C = or %A, %B /// %D = select %cond, %C, %A /// into: @@ -90,8 +96,8 @@ static unsigned GetSelectFoldableOperands(Instruction *I) { } } -/// GetSelectFoldableConstant - For the same transformation as the previous -/// function, return the identity constant that goes into the select. +/// For the same transformation as the previous function, return the identity +/// constant that goes into the select. static Constant *GetSelectFoldableConstant(Instruction *I) { switch (I->getOpcode()) { default: llvm_unreachable("This cannot happen!"); @@ -110,7 +116,7 @@ static Constant *GetSelectFoldableConstant(Instruction *I) { } } -/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI +/// Here we have (select c, TI, FI), and we know that TI and FI /// have the same opcode and only one use each. Try to simplify this. Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI) { @@ -197,8 +203,8 @@ static bool isSelect01(Constant *C1, Constant *C2) { C2I->isOne() || C2I->isAllOnesValue(); } -/// FoldSelectIntoOp - Try fold the select into one of the operands to -/// facilitate further optimization. +/// Try to fold the select into one of the operands to allow further +/// optimization. Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, Value *FalseVal) { // See the comment above GetSelectFoldableOperands for a description of the @@ -276,7 +282,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, return nullptr; } -/// foldSelectICmpAndOr - We want to turn: +/// We want to turn: /// (select (icmp eq (and X, C1), 0), Y, (or Y, C2)) /// into: /// (or (shl (and X, C1), C3), y) @@ -394,9 +400,7 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, return nullptr; } -/// visitSelectInstWithICmp - Visit a SelectInst that has an -/// ICmpInst as its first operand. -/// +/// Visit a SelectInst that has an ICmpInst as its first operand. Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI) { bool Changed = false; @@ -595,10 +599,9 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, } -/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a -/// PHI node (but the two may be in different blocks). See if the true/false -/// values (V) are live in all of the predecessor blocks of the PHI. For -/// example, cases like this cannot be mapped: +/// SI is a select whose condition is a PHI node (but the two may be in +/// different blocks). See if the true/false values (V) are live in all of the +/// predecessor blocks of the PHI. For example, cases like this can't be mapped: /// /// X = phi [ C1, BB1], [C2, BB2] /// Y = add @@ -632,7 +635,7 @@ static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V, return false; } -/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form: +/// We have an SPF (e.g. a min or max) of an SPF of the form: /// SPF2(SPF1(A, B), C) Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, @@ -745,10 +748,10 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, return nullptr; } -/// foldSelectICmpAnd - If one of the constants is zero (we know they can't -/// both be) and we have an icmp instruction with zero, and we have an 'and' -/// with the non-constant value and a power of two we can turn the select -/// into a shift on the result of the 'and'. +/// If one of the constants is zero (we know they can't both be) and we have an +/// icmp instruction with zero, and we have an 'and' with the non-constant value +/// and a power of two we can turn the select into a shift on the result of the +/// 'and'. static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, ConstantInt *FalseVal, InstCombiner::BuilderTy *Builder) { @@ -926,6 +929,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // (X ugt Y) ? X : Y -> (X ole Y) ? Y : X if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) { FCmpInst::Predicate InvPred = FCI->getInversePredicate(); + IRBuilder<>::FastMathFlagGuard FMFG(*Builder); + Builder->SetFastMathFlags(FCI->getFastMathFlags()); Value *NewCond = Builder->CreateFCmp(InvPred, TrueVal, FalseVal, FCI->getName() + ".inv"); @@ -967,6 +972,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // (X ugt Y) ? X : Y -> (X ole Y) ? X : Y if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) { FCmpInst::Predicate InvPred = FCI->getInversePredicate(); + IRBuilder<>::FastMathFlagGuard FMFG(*Builder); + Builder->SetFastMathFlags(FCI->getFastMathFlags()); Value *NewCond = Builder->CreateFCmp(InvPred, FalseVal, TrueVal, FCI->getName() + ".inv"); @@ -1054,35 +1061,50 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } // See if we can fold the select into one of our operands. - if (SI.getType()->isIntOrIntVectorTy()) { + if (SI.getType()->isIntOrIntVectorTy() || SI.getType()->isFPOrFPVectorTy()) { if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal)) return FoldI; Value *LHS, *RHS, *LHS2, *RHS2; Instruction::CastOps CastOp; - SelectPatternFlavor SPF = matchSelectPattern(&SI, LHS, RHS, &CastOp); + SelectPatternResult SPR = matchSelectPattern(&SI, LHS, RHS, &CastOp); + auto SPF = SPR.Flavor; - if (SPF) { + if (SelectPatternResult::isMinOrMax(SPF)) { // Canonicalize so that type casts are outside select patterns. if (LHS->getType()->getPrimitiveSizeInBits() != SI.getType()->getPrimitiveSizeInBits()) { - CmpInst::Predicate Pred = getICmpPredicateForMinMax(SPF); - Value *Cmp = Builder->CreateICmp(Pred, LHS, RHS); + CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF, SPR.Ordered); + + Value *Cmp; + if (CmpInst::isIntPredicate(Pred)) { + Cmp = Builder->CreateICmp(Pred, LHS, RHS); + } else { + IRBuilder<>::FastMathFlagGuard FMFG(*Builder); + auto FMF = cast(SI.getCondition())->getFastMathFlags(); + Builder->SetFastMathFlags(FMF); + Cmp = Builder->CreateFCmp(Pred, LHS, RHS); + } + Value *NewSI = Builder->CreateCast(CastOp, Builder->CreateSelect(Cmp, LHS, RHS), SI.getType()); return ReplaceInstUsesWith(SI, NewSI); } + } + if (SPF) { // MAX(MAX(a, b), a) -> MAX(a, b) // MIN(MIN(a, b), a) -> MIN(a, b) // MAX(MIN(a, b), a) -> a // MIN(MAX(a, b), a) -> a - if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2)) + // ABS(ABS(a)) -> ABS(a) + // NABS(NABS(a)) -> NABS(a) + if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor) if (Instruction *R = FoldSPFofSPF(cast(LHS),SPF2,LHS2,RHS2, SI, SPF, RHS)) return R; - if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2)) + if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2).Flavor) if (Instruction *R = FoldSPFofSPF(cast(RHS),SPF2,LHS2,RHS2, SI, SPF, LHS)) return R; diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index d04ed58b014f..0c7defa5fff8 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -55,7 +55,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { return nullptr; } -/// CanEvaluateShifted - See if we can compute the specified value, but shifted +/// See if we can compute the specified value, but shifted /// logically to the left or right by some number of bits. This should return /// true if the expression can be computed for the same cost as the current /// expression tree. This is used to eliminate extraneous shifting from things @@ -184,7 +184,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, } } -/// GetShiftedValue - When CanEvaluateShifted returned true for an expression, +/// When CanEvaluateShifted returned true for an expression, /// this value inserts the new computation that produces the shifted value. static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, InstCombiner &IC, const DataLayout &DL) { diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 80628b23f111..743d51483ea1 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -410,9 +410,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If this is a select as part of a min/max pattern, don't simplify any // further in case we break the structure. Value *LHS, *RHS; - if (matchSelectPattern(I, LHS, RHS) != SPF_UNKNOWN) + if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN) return nullptr; - + if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero, RHSKnownOne, Depth + 1) || SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero, @@ -1057,7 +1057,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts); if (ConstantVector* CV = dyn_cast(I->getOperand(0))) { for (unsigned i = 0; i < VWidth; i++) { - if (CV->getAggregateElement(i)->isNullValue()) + Constant *CElt = CV->getAggregateElement(i); + // Method isNullValue always returns false when called on a + // ConstantExpr. If CElt is a ConstantExpr then skip it in order to + // to avoid propagating incorrect information. + if (isa(CElt)) + continue; + if (CElt->isNullValue()) LeftDemanded.clearBit(i); else RightDemanded.clearBit(i); @@ -1082,6 +1088,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (!VTy) break; unsigned InVWidth = VTy->getNumElements(); APInt InputDemandedElts(InVWidth, 0); + UndefElts2 = APInt(InVWidth, 0); unsigned Ratio; if (VWidth == InVWidth) { @@ -1089,29 +1096,25 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // elements as are demanded of us. Ratio = 1; InputDemandedElts = DemandedElts; - } else if (VWidth > InVWidth) { - // Untested so far. - break; - - // If there are more elements in the result than there are in the source, - // then an input element is live if any of the corresponding output - // elements are live. - Ratio = VWidth/InVWidth; - for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { + } else if ((VWidth % InVWidth) == 0) { + // If the number of elements in the output is a multiple of the number of + // elements in the input then an input element is live if any of the + // corresponding output elements are live. + Ratio = VWidth / InVWidth; + for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) if (DemandedElts[OutIdx]) - InputDemandedElts.setBit(OutIdx/Ratio); - } - } else { - // Untested so far. - break; - - // If there are more elements in the source than there are in the result, - // then an input element is live if the corresponding output element is - // live. - Ratio = InVWidth/VWidth; + InputDemandedElts.setBit(OutIdx / Ratio); + } else if ((InVWidth % VWidth) == 0) { + // If the number of elements in the input is a multiple of the number of + // elements in the output then an input element is live if the + // corresponding output element is live. + Ratio = InVWidth / VWidth; for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) - if (DemandedElts[InIdx/Ratio]) + if (DemandedElts[InIdx / Ratio]) InputDemandedElts.setBit(InIdx); + } else { + // Unsupported so far. + break; } // div/rem demand all inputs, because they don't want divide by zero. @@ -1122,24 +1125,26 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, MadeChange = true; } - UndefElts = UndefElts2; - if (VWidth > InVWidth) { - llvm_unreachable("Unimp"); - // If there are more elements in the result than there are in the source, - // then an output element is undef if the corresponding input element is - // undef. + if (VWidth == InVWidth) { + UndefElts = UndefElts2; + } else if ((VWidth % InVWidth) == 0) { + // If the number of elements in the output is a multiple of the number of + // elements in the input then an output element is undef if the + // corresponding input element is undef. for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) - if (UndefElts2[OutIdx/Ratio]) + if (UndefElts2[OutIdx / Ratio]) UndefElts.setBit(OutIdx); - } else if (VWidth < InVWidth) { + } else if ((InVWidth % VWidth) == 0) { + // If the number of elements in the input is a multiple of the number of + // elements in the output then an output element is undef if all of the + // corresponding input elements are undef. + for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) { + APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio); + if (SubUndef.countPopulation() == Ratio) + UndefElts.setBit(OutIdx); + } + } else { llvm_unreachable("Unimp"); - // If there are more elements in the source than there are in the result, - // then a result element is undef if all of the corresponding input - // elements are undef. - UndefElts = ~0ULL >> (64-VWidth); // Start out all undef. - for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx) - if (!UndefElts2[InIdx]) // Not undef? - UndefElts.clearBit(InIdx/Ratio); // Clear undef bit. } break; } @@ -1237,6 +1242,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // like undef&0. The result is known zero, not undef. UndefElts &= UndefElts2; break; + + // SSE4A instructions leave the upper 64-bits of the 128-bit result + // in an undefined state. + case Intrinsic::x86_sse4a_extrq: + case Intrinsic::x86_sse4a_extrqi: + case Intrinsic::x86_sse4a_insertq: + case Intrinsic::x86_sse4a_insertqi: + UndefElts |= APInt::getHighBitsSet(VWidth, VWidth / 2); + break; } break; } diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 273047279e90..e25639ae943b 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -22,10 +22,10 @@ using namespace PatternMatch; #define DEBUG_TYPE "instcombine" -/// CheapToScalarize - Return true if the value is cheaper to scalarize than it -/// is to leave as a vector operation. isConstant indicates whether we're -/// extracting one known element. If false we're extracting a variable index. -static bool CheapToScalarize(Value *V, bool isConstant) { +/// Return true if the value is cheaper to scalarize than it is to leave as a +/// vector operation. isConstant indicates whether we're extracting one known +/// element. If false we're extracting a variable index. +static bool cheapToScalarize(Value *V, bool isConstant) { if (Constant *C = dyn_cast(V)) { if (isConstant) return true; @@ -50,13 +50,13 @@ static bool CheapToScalarize(Value *V, bool isConstant) { return true; if (BinaryOperator *BO = dyn_cast(I)) if (BO->hasOneUse() && - (CheapToScalarize(BO->getOperand(0), isConstant) || - CheapToScalarize(BO->getOperand(1), isConstant))) + (cheapToScalarize(BO->getOperand(0), isConstant) || + cheapToScalarize(BO->getOperand(1), isConstant))) return true; if (CmpInst *CI = dyn_cast(I)) if (CI->hasOneUse() && - (CheapToScalarize(CI->getOperand(0), isConstant) || - CheapToScalarize(CI->getOperand(1), isConstant))) + (cheapToScalarize(CI->getOperand(0), isConstant) || + cheapToScalarize(CI->getOperand(1), isConstant))) return true; return false; @@ -82,7 +82,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { // and that it is a binary operation which is cheap to scalarize. // otherwise return NULL. if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) || - !(isa(PHIUser)) || !CheapToScalarize(PHIUser, true)) + !(isa(PHIUser)) || !cheapToScalarize(PHIUser, true)) return nullptr; // Create a scalar PHI node that will replace the vector PHI node @@ -115,8 +115,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { Instruction *pos = dyn_cast(PHIInVal); BasicBlock::iterator InsertPos; if (pos && !isa(pos)) { - InsertPos = pos; - ++InsertPos; + InsertPos = ++pos->getIterator(); } else { InsertPos = inBB->getFirstInsertionPt(); } @@ -137,7 +136,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // If vector val is constant with all elements the same, replace EI with // that element. We handle a known element # below. if (Constant *C = dyn_cast(EI.getOperand(0))) - if (CheapToScalarize(C, false)) + if (cheapToScalarize(C, false)) return ReplaceInstUsesWith(EI, C->getAggregateElement(0U)); // If extracting a specified index from the vector, see if we can recursively @@ -163,7 +162,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { } } - // If the this extractelement is directly using a bitcast from a vector of + // If this extractelement is directly using a bitcast from a vector of // the same number of elements, see if we can find the source element from // it. In this case, we will end up needing to bitcast the scalars. if (BitCastInst *BCI = dyn_cast(EI.getOperand(0))) { @@ -184,10 +183,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { if (Instruction *I = dyn_cast(EI.getOperand(0))) { // Push extractelement into predecessor operation if legal and - // profitable to do so + // profitable to do so. if (BinaryOperator *BO = dyn_cast(I)) { if (I->hasOneUse() && - CheapToScalarize(BO, isa(EI.getOperand(1)))) { + cheapToScalarize(BO, isa(EI.getOperand(1)))) { Value *newEI0 = Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), EI.getName()+".lhs"); @@ -230,8 +229,9 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { SrcIdx, false)); } } else if (CastInst *CI = dyn_cast(I)) { - // Canonicalize extractelement(cast) -> cast(extractelement) - // bitcasts can change the number of vector elements and they cost nothing + // Canonicalize extractelement(cast) -> cast(extractelement). + // Bitcasts can change the number of vector elements, and they cost + // nothing. if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { Value *EE = Builder->CreateExtractElement(CI->getOperand(0), EI.getIndexOperand()); @@ -245,7 +245,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // fight the vectorizer. // If we are extracting an element from a vector select or a select on - // vectors, a select on the scalars extracted from the vector arguments. + // vectors, create a select on the scalars extracted from the vector + // arguments. Value *TrueVal = SI->getTrueValue(); Value *FalseVal = SI->getFalseValue(); @@ -275,10 +276,9 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { return nullptr; } -/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns -/// elements from either LHS or RHS, return the shuffle mask and true. -/// Otherwise, return false. -static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, +/// If V is a shuffle of values that ONLY returns elements from either LHS or +/// RHS, return the shuffle mask and true. Otherwise, return false. +static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl &Mask) { assert(LHS->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements"); @@ -315,7 +315,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, if (isa(ScalarOp)) { // inserting undef into vector. // We can handle this if the vector we are inserting into is // transitively ok. - if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { + if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { // If so, update the mask to reflect the inserted undef. Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); return true; @@ -330,7 +330,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { // We can handle this if the vector we are inserting into is // transitively ok. - if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { + if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { // If so, update the mask to reflect the inserted value. if (EI->getOperand(0) == LHS) { Mask[InsertedIdx % NumElts] = @@ -352,6 +352,48 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, return false; } +/// If we have insertion into a vector that is wider than the vector that we +/// are extracting from, try to widen the source vector to allow a single +/// shufflevector to replace one or more insert/extract pairs. +static void replaceExtractElements(InsertElementInst *InsElt, + ExtractElementInst *ExtElt, + InstCombiner &IC) { + VectorType *InsVecType = InsElt->getType(); + VectorType *ExtVecType = ExtElt->getVectorOperandType(); + unsigned NumInsElts = InsVecType->getVectorNumElements(); + unsigned NumExtElts = ExtVecType->getVectorNumElements(); + + // The inserted-to vector must be wider than the extracted-from vector. + if (InsVecType->getElementType() != ExtVecType->getElementType() || + NumExtElts >= NumInsElts) + return; + + // Create a shuffle mask to widen the extended-from vector using undefined + // values. The mask selects all of the values of the original vector followed + // by as many undefined values as needed to create a vector of the same length + // as the inserted-to vector. + SmallVector ExtendMask; + IntegerType *IntType = Type::getInt32Ty(InsElt->getContext()); + for (unsigned i = 0; i < NumExtElts; ++i) + ExtendMask.push_back(ConstantInt::get(IntType, i)); + for (unsigned i = NumExtElts; i < NumInsElts; ++i) + ExtendMask.push_back(UndefValue::get(IntType)); + + Value *ExtVecOp = ExtElt->getVectorOperand(); + auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType), + ConstantVector::get(ExtendMask)); + + // Replace all extracts from the original narrow vector with extracts from + // the new wide vector. + WideVec->insertBefore(ExtElt); + for (User *U : ExtVecOp->users()) { + if (ExtractElementInst *OldExt = dyn_cast(U)) { + auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); + NewExt->insertAfter(WideVec); + IC.ReplaceInstUsesWith(*OldExt, NewExt); + } + } +} /// We are building a shuffle to create V, which is a sequence of insertelement, /// extractelement pairs. If PermittedRHS is set, then we must either use it or @@ -363,9 +405,10 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, /// often been chosen carefully to be efficiently implementable on the target. typedef std::pair ShuffleOps; -static ShuffleOps CollectShuffleElements(Value *V, +static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl &Mask, - Value *PermittedRHS) { + Value *PermittedRHS, + InstCombiner &IC) { assert(V->getType()->isVectorTy() && "Invalid shuffle!"); unsigned NumElts = cast(V->getType())->getNumElements(); @@ -396,10 +439,14 @@ static ShuffleOps CollectShuffleElements(Value *V, // otherwise we'd end up with a shuffle of three inputs. if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) { Value *RHS = EI->getOperand(0); - ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS); + ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC); assert(LR.second == nullptr || LR.second == RHS); if (LR.first->getType() != RHS->getType()) { + // Although we are giving up for now, see if we can create extracts + // that match the inserts for another round of combining. + replaceExtractElements(IEI, EI, IC); + // We tried our best, but we can't find anything compatible with RHS // further up the chain. Return a trivial shuffle. for (unsigned i = 0; i < NumElts; ++i) @@ -429,14 +476,14 @@ static ShuffleOps CollectShuffleElements(Value *V, // If this insertelement is a chain that comes from exactly these two // vectors, return the vector and the effective shuffle. if (EI->getOperand(0)->getType() == PermittedRHS->getType() && - CollectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS, + collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS, Mask)) return std::make_pair(EI->getOperand(0), PermittedRHS); } } } - // Otherwise, can't do anything fancy. Return an identity vector. + // Otherwise, we can't do anything fancy. Return an identity vector. for (unsigned i = 0; i != NumElts; ++i) Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); return std::make_pair(V, nullptr); @@ -512,7 +559,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { // (and any insertelements it points to), into one big shuffle. if (!IE.hasOneUse() || !isa(IE.user_back())) { SmallVector Mask; - ShuffleOps LR = CollectShuffleElements(&IE, Mask, nullptr); + ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this); // The proposed shuffle may be trivial, in which case we shouldn't // perform the combine. @@ -588,8 +635,8 @@ static bool CanEvaluateShuffled(Value *V, ArrayRef Mask, case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::GetElementPtr: { - for (int i = 0, e = I->getNumOperands(); i != e; ++i) { - if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1)) + for (Value *Operand : I->operands()) { + if (!CanEvaluateShuffled(Operand, Mask, Depth-1)) return false; } return true; @@ -617,7 +664,7 @@ static bool CanEvaluateShuffled(Value *V, ArrayRef Mask, /// Rebuild a new instruction just like 'I' but with the new operands given. /// In the event of type mismatch, the type of the operands is correct. -static Value *BuildNew(Instruction *I, ArrayRef NewOps) { +static Value *buildNew(Instruction *I, ArrayRef NewOps) { // We don't want to use the IRBuilder here because we want the replacement // instructions to appear next to 'I', not the builder's insertion point. switch (I->getOpcode()) { @@ -760,7 +807,7 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask) { NeedsRebuild |= (V != I->getOperand(i)); } if (NeedsRebuild) { - return BuildNew(I, NewOps); + return buildNew(I, NewOps); } return I; } @@ -792,7 +839,7 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask) { llvm_unreachable("failed to reorder elements of vector instruction!"); } -static void RecognizeIdentityMask(const SmallVectorImpl &Mask, +static void recognizeIdentityMask(const SmallVectorImpl &Mask, bool &isLHSID, bool &isRHSID) { isLHSID = isRHSID = true; @@ -891,7 +938,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (VWidth == LHSWidth) { // Analyze the shuffle, are the LHS or RHS and identity shuffles? bool isLHSID, isRHSID; - RecognizeIdentityMask(Mask, isLHSID, isRHSID); + recognizeIdentityMask(Mask, isLHSID, isRHSID); // Eliminate identity shuffles. if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); @@ -1177,7 +1224,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // If the result mask is an identity, replace uses of this instruction with // corresponding argument. bool isLHSID, isRHSID; - RecognizeIdentityMask(newMask, isLHSID, isRHSID); + recognizeIdentityMask(newMask, isLHSID, isRHSID); if (isLHSID && VWidth == LHSOp0Width) return ReplaceInstUsesWith(SVI, newLHS); if (isRHSID && VWidth == RHSOp0Width) return ReplaceInstUsesWith(SVI, newRHS); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index fd34a244f271..7c46cfd28fc9 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -42,8 +42,9 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LibCallSemantics.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -79,14 +80,12 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) { return llvm::EmitGEPOffset(Builder, DL, GEP); } -/// ShouldChangeType - Return true if it is desirable to convert a computation -/// from 'From' to 'To'. We don't want to convert from a legal to an illegal -/// type for example, or from a smaller to a larger illegal type. -bool InstCombiner::ShouldChangeType(Type *From, Type *To) const { - assert(From->isIntegerTy() && To->isIntegerTy()); - - unsigned FromWidth = From->getPrimitiveSizeInBits(); - unsigned ToWidth = To->getPrimitiveSizeInBits(); +/// Return true if it is desirable to convert an integer computation from a +/// given bit width to a new bit width. +/// We don't want to convert from a legal to an illegal type for example or from +/// a smaller to a larger illegal type. +bool InstCombiner::ShouldChangeType(unsigned FromWidth, + unsigned ToWidth) const { bool FromLegal = DL.isLegalInteger(FromWidth); bool ToLegal = DL.isLegalInteger(ToWidth); @@ -103,6 +102,17 @@ bool InstCombiner::ShouldChangeType(Type *From, Type *To) const { return true; } +/// Return true if it is desirable to convert a computation from 'From' to 'To'. +/// We don't want to convert from a legal to an illegal type for example or from +/// a smaller to a larger illegal type. +bool InstCombiner::ShouldChangeType(Type *From, Type *To) const { + assert(From->isIntegerTy() && To->isIntegerTy()); + + unsigned FromWidth = From->getPrimitiveSizeInBits(); + unsigned ToWidth = To->getPrimitiveSizeInBits(); + return ShouldChangeType(FromWidth, ToWidth); +} + // Return true, if No Signed Wrap should be maintained for I. // The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C", // where both B and C should be ConstantInts, results in a constant that does @@ -156,27 +166,26 @@ static void ClearSubclassDataAfterReassociation(BinaryOperator &I) { I.setFastMathFlags(FMF); } -/// SimplifyAssociativeOrCommutative - This performs a few simplifications for -/// operators which are associative or commutative: -// -// Commutative operators: -// -// 1. Order operands such that they are listed from right (least complex) to -// left (most complex). This puts constants before unary operators before -// binary operators. -// -// Associative operators: -// -// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. -// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. -// -// Associative and commutative operators: -// -// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. -// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. -// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" -// if C1 and C2 are constants. -// +/// This performs a few simplifications for operators that are associative or +/// commutative: +/// +/// Commutative operators: +/// +/// 1. Order operands such that they are listed from right (least complex) to +/// left (most complex). This puts constants before unary operators before +/// binary operators. +/// +/// Associative operators: +/// +/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. +/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. +/// +/// Associative and commutative operators: +/// +/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. +/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. +/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" +/// if C1 and C2 are constants. bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Instruction::BinaryOps Opcode = I.getOpcode(); bool Changed = false; @@ -322,7 +331,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { } while (1); } -/// LeftDistributesOverRight - Whether "X LOp (Y ROp Z)" is always equal to +/// Return whether "X LOp (Y ROp Z)" is always equal to /// "(X LOp Y) ROp (X LOp Z)". static bool LeftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp) { @@ -361,7 +370,7 @@ static bool LeftDistributesOverRight(Instruction::BinaryOps LOp, } } -/// RightDistributesOverLeft - Whether "(X LOp Y) ROp Z" is always equal to +/// Return whether "(X LOp Y) ROp Z" is always equal to /// "(X ROp Z) LOp (Y ROp Z)". static bool RightDistributesOverLeft(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp) { @@ -519,7 +528,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder, if (isa(Op1)) HasNSW &= Op1->hasNoSignedWrap(); - // We can propogate 'nsw' if we know that + // We can propagate 'nsw' if we know that // %Y = mul nsw i16 %X, C // %Z = add nsw i16 %Y, %X // => @@ -537,11 +546,11 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder, return SimplifiedInst; } -/// SimplifyUsingDistributiveLaws - This tries to simplify binary operations -/// which some other binary operation distributes over either by factorizing -/// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this -/// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is -/// a win). Returns the simplified value, or null if it didn't simplify. +/// This tries to simplify binary operations which some other binary operation +/// distributes over either by factorizing out common terms +/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in +/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win). +/// Returns the simplified value, or null if it didn't simplify. Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); BinaryOperator *Op0 = dyn_cast(LHS); @@ -623,12 +632,38 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { } } + // (op (select (a, c, b)), (select (a, d, b))) -> (select (a, (op c, d), 0)) + // (op (select (a, b, c)), (select (a, b, d))) -> (select (a, 0, (op c, d))) + if (auto *SI0 = dyn_cast(LHS)) { + if (auto *SI1 = dyn_cast(RHS)) { + if (SI0->getCondition() == SI1->getCondition()) { + Value *SI = nullptr; + if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(), + SI1->getFalseValue(), DL, TLI, DT, AC)) + SI = Builder->CreateSelect(SI0->getCondition(), + Builder->CreateBinOp(TopLevelOpcode, + SI0->getTrueValue(), + SI1->getTrueValue()), + V); + if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(), + SI1->getTrueValue(), DL, TLI, DT, AC)) + SI = Builder->CreateSelect( + SI0->getCondition(), V, + Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(), + SI1->getFalseValue())); + if (SI) { + SI->takeName(&I); + return SI; + } + } + } + } + return nullptr; } -// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction -// if the LHS is a constant zero (which is the 'negate' form). -// +/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a +/// constant zero (which is the 'negate' form). Value *InstCombiner::dyn_castNegVal(Value *V) const { if (BinaryOperator::isNeg(V)) return BinaryOperator::getNegArgument(V); @@ -644,10 +679,8 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const { return nullptr; } -// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the -// instruction if the LHS is a constant negative zero (which is the 'negate' -// form). -// +/// Given a 'fsub' instruction, return the RHS of the instruction if the LHS is +/// a constant negative zero (which is the 'negate' form). Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const { if (BinaryOperator::isFNeg(V, IgnoreZeroSign)) return BinaryOperator::getFNegArgument(V); @@ -700,10 +733,10 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, llvm_unreachable("Unknown binary instruction type!"); } -// FoldOpIntoSelect - Given an instruction with a select as one operand and a -// constant as the other operand, try to fold the binary operator into the -// select arguments. This also works for Cast instructions, which obviously do -// not have a second operand. +/// Given an instruction with a select as one operand and a constant as the +/// other operand, try to fold the binary operator into the select arguments. +/// This also works for Cast instructions, which obviously do not have a second +/// operand. Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { // Don't modify shared select instructions if (!SI->hasOneUse()) return nullptr; @@ -752,10 +785,9 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { return nullptr; } -/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which -/// has a PHI node as operand #0, see if we can fold the instruction into the -/// PHI (which is only possible if all operands to the PHI are constants). -/// +/// Given a binary operator, cast instruction, or select which has a PHI node as +/// operand #0, see if we can fold the instruction into the PHI (which is only +/// possible if all operands to the PHI are constants). Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { PHINode *PN = cast(I.getOperand(0)); unsigned NumPHIValues = PN->getNumIncomingValues(); @@ -819,7 +851,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { NewPN->takeName(PN); // If we are going to have to insert a new computation, do so right before the - // predecessors terminator. + // predecessor's terminator. if (NonConstBB) Builder->SetInsertPoint(NonConstBB->getTerminator()); @@ -893,10 +925,10 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { return ReplaceInstUsesWith(I, NewPN); } -/// FindElementAtOffset - Given a pointer type and a constant offset, determine -/// whether or not there is a sequence of GEP indices into the pointed type that -/// will land us at the specified offset. If so, fill them into NewIndices and -/// return the resultant element type, otherwise return null. +/// Given a pointer type and a constant offset, determine whether or not there +/// is a sequence of GEP indices into the pointed type that will land us at the +/// specified offset. If so, fill them into NewIndices and return the resultant +/// element type, otherwise return null. Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset, SmallVectorImpl &NewIndices) { Type *Ty = PtrTy->getElementType(); @@ -965,8 +997,8 @@ static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) { return true; } -/// Descale - Return a value X such that Val = X * Scale, or null if none. If -/// the multiplication is known not to overflow then NoSignedWrap is set. +/// Return a value X such that Val = X * Scale, or null if none. +/// If the multiplication is known not to overflow, then NoSignedWrap is set. Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { assert(isa(Val->getType()) && "Can only descale integers!"); assert(cast(Val->getType())->getBitWidth() == @@ -1008,11 +1040,11 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // 0'th operand of Val. std::pair Parent; - // RequireNoSignedWrap - Set if the transform requires a descaling at deeper - // levels that doesn't overflow. + // Set if the transform requires a descaling at deeper levels that doesn't + // overflow. bool RequireNoSignedWrap = false; - // logScale - log base 2 of the scale. Negative if not a power of 2. + // Log base 2 of the scale. Negative if not a power of 2. int32_t logScale = Scale.exactLogBase2(); for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down @@ -1213,16 +1245,11 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { /// specified one but with other operands. static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS, InstCombiner::BuilderTy *B) { - Value *BORes = B->CreateBinOp(Inst.getOpcode(), LHS, RHS); - if (BinaryOperator *NewBO = dyn_cast(BORes)) { - if (isa(NewBO)) { - NewBO->setHasNoSignedWrap(Inst.hasNoSignedWrap()); - NewBO->setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap()); - } - if (isa(NewBO)) - NewBO->setIsExact(Inst.isExact()); - } - return BORes; + Value *BO = B->CreateBinOp(Inst.getOpcode(), LHS, RHS); + // If LHS and RHS are constant, BO won't be a binary operator. + if (BinaryOperator *NewBO = dyn_cast(BO)) + NewBO->copyIRFlags(&Inst); + return BO; } /// \brief Makes transformation of binary operation specific for vector types. @@ -1256,9 +1283,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { LShuf->getMask() == RShuf->getMask()) { Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0), RShuf->getOperand(0), Builder); - Value *Res = Builder->CreateShuffleVector(NewBO, + return Builder->CreateShuffleVector(NewBO, UndefValue::get(NewBO->getType()), LShuf->getMask()); - return Res; } } @@ -1294,18 +1320,11 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { } if (MayChange) { Constant *C2 = ConstantVector::get(C2M); - Value *NewLHS, *NewRHS; - if (isa(LHS)) { - NewLHS = C2; - NewRHS = Shuffle->getOperand(0); - } else { - NewLHS = Shuffle->getOperand(0); - NewRHS = C2; - } + Value *NewLHS = isa(LHS) ? C2 : Shuffle->getOperand(0); + Value *NewRHS = isa(LHS) ? Shuffle->getOperand(0) : C2; Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder); - Value *Res = Builder->CreateShuffleVector(NewBO, + return Builder->CreateShuffleVector(NewBO, UndefValue::get(Inst.getType()), Shuffle->getMask()); - return Res; } } @@ -1323,7 +1342,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Eliminate unneeded casts for indices, and replace indices which displace // by multiples of a zero size type with zero. bool MadeChange = false; - Type *IntPtrTy = DL.getIntPtrType(GEP.getPointerOperandType()); + Type *IntPtrTy = + DL.getIntPtrType(GEP.getPointerOperandType()->getScalarType()); gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; @@ -1333,21 +1353,25 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (!SeqTy) continue; + // Index type should have the same width as IntPtr + Type *IndexTy = (*I)->getType(); + Type *NewIndexType = IndexTy->isVectorTy() ? + VectorType::get(IntPtrTy, IndexTy->getVectorNumElements()) : IntPtrTy; + // If the element type has zero size then any index over it is equivalent // to an index of zero, so replace it with zero if it is not zero already. if (SeqTy->getElementType()->isSized() && DL.getTypeAllocSize(SeqTy->getElementType()) == 0) if (!isa(*I) || !cast(*I)->isNullValue()) { - *I = Constant::getNullValue(IntPtrTy); + *I = Constant::getNullValue(NewIndexType); MadeChange = true; } - Type *IndexTy = (*I)->getType(); - if (IndexTy != IntPtrTy) { + if (IndexTy != NewIndexType) { // If we are using a wider index than needed for this platform, shrink // it to what we need. If narrower, sign-extend it to what we need. // This explicit cast can make subsequent optimizations more obvious. - *I = Builder->CreateIntCast(*I, IntPtrTy, true); + *I = Builder->CreateIntCast(*I, NewIndexType, true); MadeChange = true; } } @@ -1421,8 +1445,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } - GetElementPtrInst *NewGEP = cast(Op1->clone()); + // If not all GEPs are identical we'll have to create a new PHI node. + // Check that the old PHI node has only one use so that it will get + // removed. + if (DI != -1 && !PN->hasOneUse()) + return nullptr; + GetElementPtrInst *NewGEP = cast(Op1->clone()); if (DI == -1) { // All the GEPs feeding the PHI are identical. Clone one down into our // BB so that it can be merged with the current GEP. @@ -1432,11 +1461,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // All the GEPs feeding the PHI differ at a single offset. Clone a GEP // into the current block so it can be merged, and create a new PHI to // set that index. - Instruction *InsertPt = Builder->GetInsertPoint(); - Builder->SetInsertPoint(PN); - PHINode *NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(), - PN->getNumOperands()); - Builder->SetInsertPoint(InsertPt); + PHINode *NewPN; + { + IRBuilderBase::InsertPointGuard Guard(*Builder); + Builder->SetInsertPoint(PN); + NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(), + PN->getNumOperands()); + } for (auto &I : PN->operands()) NewPN->addIncoming(cast(I)->getOperand(DI), @@ -1790,7 +1821,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { I->takeName(BCI); - BCI->getParent()->getInstList().insert(BCI, I); + BCI->getParent()->getInstList().insert(BCI->getIterator(), I); ReplaceInstUsesWith(*BCI, I); } return &GEP; @@ -1931,7 +1962,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { if (InvokeInst *II = dyn_cast(&MI)) { // Replace invoke with a NOP intrinsic to maintain the original CFG - Module *M = II->getParent()->getParent()->getParent(); + Module *M = II->getModule(); Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing); InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(), None, "", II->getParent()); @@ -2280,9 +2311,10 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { } if (LoadInst *L = dyn_cast(Agg)) // If the (non-volatile) load only has one use, we can rewrite this to a - // load from a GEP. This reduces the size of the load. - // FIXME: If a load is used only by extractvalue instructions then this - // could be done regardless of having multiple uses. + // load from a GEP. This reduces the size of the load. If a load is used + // only by extractvalue instructions then this either must have been + // optimized before, or it is a struct with padding, in which case we + // don't want to do the transformation as it loses padding knowledge. if (L->isSimple() && L->hasOneUse()) { // extractvalue has integer indices, getelementptr has Value*s. Convert. SmallVector Indices; @@ -2294,7 +2326,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // We need to insert these at the location of the old load, not at that of // the extractvalue. - Builder->SetInsertPoint(L->getParent(), L); + Builder->SetInsertPoint(L); Value *GEP = Builder->CreateInBoundsGEP(L->getType(), L->getPointerOperand(), Indices); // Returning the load directly will cause the main loop to insert it in @@ -2312,7 +2344,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { return nullptr; } -/// isCatchAll - Return 'true' if the given typeinfo will match anything. +/// Return 'true' if the given typeinfo will match anything. static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) { switch (Personality) { case EHPersonality::GNU_C: @@ -2330,6 +2362,7 @@ static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) { case EHPersonality::MSVC_X86SEH: case EHPersonality::MSVC_Win64SEH: case EHPersonality::MSVC_CXX: + case EHPersonality::CoreCLR: return TypeInfo->isNullValue(); } llvm_unreachable("invalid enum"); @@ -2441,10 +2474,24 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { SawCatchAll = true; break; } - if (AlreadyCaught.count(TypeInfo)) - // Already caught by an earlier clause, so having it in the filter - // is pointless. - continue; + + // Even if we've seen a type in a catch clause, we don't want to + // remove it from the filter. An unexpected type handler may be + // set up for a call site which throws an exception of the same + // type caught. In order for the exception thrown by the unexpected + // handler to propogate correctly, the filter must be correctly + // described for the call site. + // + // Example: + // + // void unexpected() { throw 1;} + // void foo() throw (int) { + // std::set_unexpected(unexpected); + // try { + // throw 2.0; + // } catch (int i) {} + // } + // There is no point in having multiple copies of the same typeinfo in // a filter, so only add it if we didn't already. if (SeenInFilter.insert(TypeInfo).second) @@ -2637,15 +2684,15 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { return nullptr; } -/// TryToSinkInstruction - Try to move the specified instruction from its -/// current block into the beginning of DestBlock, which can only happen if it's -/// safe to move the instruction past all of the instructions between it and the -/// end of its block. +/// Try to move the specified instruction from its current block into the +/// beginning of DestBlock, which can only happen if it's safe to move the +/// instruction past all of the instructions between it and the end of its +/// block. static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { assert(I->hasOneUse() && "Invariants didn't hold!"); // Cannot move control-flow-involving, volatile loads, vaarg, etc. - if (isa(I) || isa(I) || I->mayHaveSideEffects() || + if (isa(I) || I->isEHPad() || I->mayHaveSideEffects() || isa(I)) return false; @@ -2654,17 +2701,24 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { &DestBlock->getParent()->getEntryBlock()) return false; + // Do not sink convergent call instructions. + if (auto *CI = dyn_cast(I)) { + if (CI->isConvergent()) + return false; + } + // We can only sink load instructions if there is nothing between the load and // the end of block that could change the value. if (I->mayReadFromMemory()) { - for (BasicBlock::iterator Scan = I, E = I->getParent()->end(); + for (BasicBlock::iterator Scan = I->getIterator(), + E = I->getParent()->end(); Scan != E; ++Scan) if (Scan->mayWriteToMemory()) return false; } BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); - I->moveBefore(InsertPos); + I->moveBefore(&*InsertPos); ++NumSunkInst; return true; } @@ -2698,6 +2752,27 @@ bool InstCombiner::run() { } } + // In general, it is possible for computeKnownBits to determine all bits in a + // value even when the operands are not all constants. + if (!I->use_empty() && I->getType()->isIntegerTy()) { + unsigned BitWidth = I->getType()->getScalarSizeInBits(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + computeKnownBits(I, KnownZero, KnownOne, /*Depth*/0, I); + if ((KnownZero | KnownOne).isAllOnesValue()) { + Constant *C = ConstantInt::get(I->getContext(), KnownOne); + DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C << + " from: " << *I << '\n'); + + // Add operands to the worklist. + ReplaceInstUsesWith(*I, C); + ++NumConstProp; + EraseInstFromFunction(*I); + MadeIRChange = true; + continue; + } + } + // See if we can trivially sink this instruction to a successor basic block. if (I->hasOneUse()) { BasicBlock *BB = I->getParent(); @@ -2738,7 +2813,7 @@ bool InstCombiner::run() { } // Now that we have an instruction, try combining it to simplify it. - Builder->SetInsertPoint(I->getParent(), I); + Builder->SetInsertPoint(I); Builder->SetCurrentDebugLocation(I->getDebugLoc()); #ifndef NDEBUG @@ -2768,7 +2843,7 @@ bool InstCombiner::run() { // Insert the new instruction into the basic block... BasicBlock *InstParent = I->getParent(); - BasicBlock::iterator InsertPos = I; + BasicBlock::iterator InsertPos = I->getIterator(); // If we replace a PHI with something that isn't a PHI, fix up the // insertion point. @@ -2801,8 +2876,8 @@ bool InstCombiner::run() { return MadeIRChange; } -/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding -/// all reachable code to the worklist. +/// Walk the function in depth-first order, adding all reachable code to the +/// worklist. /// /// This has a couple of tricks to make the code faster and more powerful. In /// particular, we constant fold and DCE instructions as we go, to avoid adding @@ -2829,7 +2904,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, continue; for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { - Instruction *Inst = BBI++; + Instruction *Inst = &*BBI++; // DCE instruction if trivially dead. if (isInstructionTriviallyDead(Inst, TLI)) { @@ -2900,8 +2975,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, } } - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - Worklist.push_back(TI->getSuccessor(i)); + for (BasicBlock *SuccBB : TI->successors()) + Worklist.push_back(SuccBB); } while (!Worklist.empty()); // Once we've found all of the instructions to add to instcombine's worklist, @@ -2909,8 +2984,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, // of the function down. This jives well with the way that it adds all uses // of instructions to the worklist after doing a transformation, thus avoiding // some N^2 behavior in pathological cases. - ICWorklist.AddInitialGroup(&InstrsForInstCombineWorklist[0], - InstrsForInstCombineWorklist.size()); + ICWorklist.AddInitialGroup(InstrsForInstCombineWorklist); return MadeIRChange; } @@ -2930,13 +3004,13 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, // track of which blocks we visit. SmallPtrSet Visited; MadeIRChange |= - AddReachableCodeToWorklist(F.begin(), DL, Visited, ICWorklist, TLI); + AddReachableCodeToWorklist(&F.front(), DL, Visited, ICWorklist, TLI); // Do a quick scan over the function. If we find any blocks that are // unreachable, remove any instructions inside of them. This prevents // the instcombine code from having to deal with some bad special cases. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (Visited.count(BB)) + if (Visited.count(&*BB)) continue; // Delete the instructions backwards, as it has a reduced likelihood of @@ -2944,11 +3018,10 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. while (EndInst != BB->begin()) { // Delete the next to last instruction. - BasicBlock::iterator I = EndInst; - Instruction *Inst = --I; - if (!Inst->use_empty()) + Instruction *Inst = &*--EndInst->getIterator(); + if (!Inst->use_empty() && !Inst->getType()->isTokenTy()) Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); - if (isa(Inst)) { + if (Inst->isEHPad()) { EndInst = Inst; continue; } @@ -2956,7 +3029,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, ++NumDeadInst; MadeIRChange = true; } - Inst->eraseFromParent(); + if (!Inst->getType()->isTokenTy()) + Inst->eraseFromParent(); } } @@ -2968,8 +3042,6 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, LoopInfo *LI = nullptr) { - // Minimizing size? - bool MinimizeSize = F.hasFnAttribute(Attribute::MinSize); auto &DL = F.getParent()->getDataLayout(); /// Builder - This is an IRBuilder that automatically inserts new @@ -2992,7 +3064,7 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, if (prepareICWorklistFromFunction(F, DL, &TLI, Worklist)) Changed = true; - InstCombiner IC(Worklist, &Builder, MinimizeSize, + InstCombiner IC(Worklist, &Builder, F.optForMinSize(), AA, &AC, &TLI, &DT, DL, LI); if (IC.run()) Changed = true; @@ -3046,11 +3118,12 @@ public: void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); } bool InstructionCombiningPass::runOnFunction(Function &F) { @@ -3058,7 +3131,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { return false; // Required analyses. - auto AA = &getAnalysis(); + auto AA = &getAnalysis().getAAResults(); auto &AC = getAnalysis().getAssumptionCache(F); auto &TLI = getAnalysis().getTLI(); auto &DT = getAnalysis().getDomTree(); @@ -3076,7 +3149,8 @@ INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine", INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index e7ef9f96edc2..a9df5e5898ae 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -90,7 +91,9 @@ static const char *const kAsanUnregisterGlobalsName = "__asan_unregister_globals"; static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; -static const char *const kAsanInitName = "__asan_init_v5"; +static const char *const kAsanInitName = "__asan_init"; +static const char *const kAsanVersionCheckName = + "__asan_version_mismatch_check_v6"; static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp"; static const char *const kAsanPtrSub = "__sanitizer_ptr_sub"; static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return"; @@ -119,6 +122,10 @@ static const unsigned kAllocaRzSize = 32; static cl::opt ClEnableKasan( "asan-kernel", cl::desc("Enable KernelAddressSanitizer instrumentation"), cl::Hidden, cl::init(false)); +static cl::opt ClRecover( + "asan-recover", + cl::desc("Enable recovery mode (continue-after-error)."), + cl::Hidden, cl::init(false)); // This flag may need to be replaced with -f[no-]asan-reads. static cl::opt ClInstrumentReads("asan-instrument-reads", @@ -177,7 +184,7 @@ static cl::opt ClMemoryAccessCallbackPrefix( cl::init("__asan_")); static cl::opt ClInstrumentAllocas("asan-instrument-allocas", cl::desc("instrument dynamic allocas"), - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); static cl::opt ClSkipPromotableAllocas( "asan-skip-promotable-allocas", cl::desc("Do not instrument promotable allocas"), cl::Hidden, @@ -273,6 +280,11 @@ class GlobalsMetadata { GlobalsMetadata() : inited_(false) {} + void reset() { + inited_ = false; + Entries.clear(); + } + void init(Module &M) { assert(!inited_); inited_ = true; @@ -321,7 +333,7 @@ struct ShadowMapping { static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, bool IsKasan) { - bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android; + bool IsAndroid = TargetTriple.isAndroid(); bool IsIOS = TargetTriple.isiOS(); bool IsFreeBSD = TargetTriple.isOSFreeBSD(); bool IsLinux = TargetTriple.isOSLinux(); @@ -338,6 +350,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, ShadowMapping Mapping; if (LongSize == 32) { + // Android is always PIE, which means that the beginning of the address + // space is always available. if (IsAndroid) Mapping.Offset = 0; else if (IsMIPS32) @@ -376,7 +390,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, // OR-ing shadow offset if more efficient (at least on x86) if the offset // is a power of two, but on ppc64 we have to use add since the shadow // offset is not necessary 1/8-th of the address space. - Mapping.OrShadowOffset = !IsPPC64 && !(Mapping.Offset & (Mapping.Offset - 1)); + Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 + && !(Mapping.Offset & (Mapping.Offset - 1)); return Mapping; } @@ -389,8 +404,9 @@ static size_t RedzoneSizeForScale(int MappingScale) { /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public FunctionPass { - explicit AddressSanitizer(bool CompileKernel = false) - : FunctionPass(ID), CompileKernel(CompileKernel || ClEnableKasan) { + explicit AddressSanitizer(bool CompileKernel = false, bool Recover = false) + : FunctionPass(ID), CompileKernel(CompileKernel || ClEnableKasan), + Recover(Recover || ClRecover) { initializeAddressSanitizerPass(*PassRegistry::getPassRegistry()); } const char *getPassName() const override { @@ -437,7 +453,9 @@ struct AddressSanitizer : public FunctionPass { Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool runOnFunction(Function &F) override; bool maybeInsertAsanInitAtFunctionEntry(Function &F); + void markEscapedLocalAllocas(Function &F); bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; static char ID; // Pass identification, replacement for typeid DominatorTree &getDominatorTree() const { return *DT; } @@ -450,10 +468,21 @@ struct AddressSanitizer : public FunctionPass { bool isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, Value *Addr, uint64_t TypeSize) const; + /// Helper to cleanup per-function state. + struct FunctionStateRAII { + AddressSanitizer *Pass; + FunctionStateRAII(AddressSanitizer *Pass) : Pass(Pass) { + assert(Pass->ProcessedAllocas.empty() && + "last pass forgot to clear cache"); + } + ~FunctionStateRAII() { Pass->ProcessedAllocas.clear(); } + }; + LLVMContext *C; Triple TargetTriple; int LongSize; bool CompileKernel; + bool Recover; Type *IntptrTy; ShadowMapping Mapping; DominatorTree *DT; @@ -477,8 +506,10 @@ struct AddressSanitizer : public FunctionPass { class AddressSanitizerModule : public ModulePass { public: - explicit AddressSanitizerModule(bool CompileKernel = false) - : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan) {} + explicit AddressSanitizerModule(bool CompileKernel = false, + bool Recover = false) + : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan), + Recover(Recover || ClRecover) {} bool runOnModule(Module &M) override; static char ID; // Pass identification, replacement for typeid const char *getPassName() const override { return "AddressSanitizerModule"; } @@ -496,6 +527,7 @@ class AddressSanitizerModule : public ModulePass { GlobalsMetadata GlobalsMD; bool CompileKernel; + bool Recover; Type *IntptrTy; LLVMContext *C; Triple TargetTriple; @@ -525,6 +557,7 @@ struct FunctionStackPoisoner : public InstVisitor { ShadowMapping Mapping; SmallVector AllocaVec; + SmallSetVector NonInstrumentedStaticAllocaVec; SmallVector RetVec; unsigned StackAlignment; @@ -545,12 +578,14 @@ struct FunctionStackPoisoner : public InstVisitor { SmallVector DynamicAllocaVec; SmallVector StackRestoreVec; AllocaInst *DynamicAllocaLayout = nullptr; + IntrinsicInst *LocalEscapeCall = nullptr; // Maps Value to an AllocaInst from which the Value is originated. typedef DenseMap AllocaForValueMapTy; AllocaForValueMapTy AllocaForValue; - bool HasNonEmptyInlineAsm; + bool HasNonEmptyInlineAsm = false; + bool HasReturnsTwiceCall = false; std::unique_ptr EmptyInlineAsm; FunctionStackPoisoner(Function &F, AddressSanitizer &ASan) @@ -562,7 +597,6 @@ struct FunctionStackPoisoner : public InstVisitor { IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping), StackAlignment(1 << Mapping.Scale), - HasNonEmptyInlineAsm(false), EmptyInlineAsm(CallInst::Create(ASan.EmptyAsm)) {} bool runOnFunction() { @@ -596,9 +630,24 @@ struct FunctionStackPoisoner : public InstVisitor { void unpoisonDynamicAllocasBeforeInst(Instruction *InstBefore, Value *SavedStack) { IRBuilder<> IRB(InstBefore); + Value *DynamicAreaPtr = IRB.CreatePtrToInt(SavedStack, IntptrTy); + // When we insert _asan_allocas_unpoison before @llvm.stackrestore, we + // need to adjust extracted SP to compute the address of the most recent + // alloca. We have a special @llvm.get.dynamic.area.offset intrinsic for + // this purpose. + if (!isa(InstBefore)) { + Function *DynamicAreaOffsetFunc = Intrinsic::getDeclaration( + InstBefore->getModule(), Intrinsic::get_dynamic_area_offset, + {IntptrTy}); + + Value *DynamicAreaOffset = IRB.CreateCall(DynamicAreaOffsetFunc, {}); + + DynamicAreaPtr = IRB.CreateAdd(IRB.CreatePtrToInt(SavedStack, IntptrTy), + DynamicAreaOffset); + } + IRB.CreateCall(AsanAllocasUnpoisonFunc, - {IRB.CreateLoad(DynamicAllocaLayout), - IRB.CreatePtrToInt(SavedStack, IntptrTy)}); + {IRB.CreateLoad(DynamicAllocaLayout), DynamicAreaPtr}); } // Unpoison dynamic allocas redzones. @@ -625,7 +674,10 @@ struct FunctionStackPoisoner : public InstVisitor { /// \brief Collect Alloca instructions we want (and can) handle. void visitAllocaInst(AllocaInst &AI) { - if (!ASan.isInterestingAlloca(AI)) return; + if (!ASan.isInterestingAlloca(AI)) { + if (AI.isStaticAlloca()) NonInstrumentedStaticAllocaVec.insert(&AI); + return; + } StackAlignment = std::max(StackAlignment, AI.getAlignment()); if (ASan.isDynamicAlloca(AI)) @@ -639,6 +691,7 @@ struct FunctionStackPoisoner : public InstVisitor { void visitIntrinsicInst(IntrinsicInst &II) { Intrinsic::ID ID = II.getIntrinsicID(); if (ID == Intrinsic::stackrestore) StackRestoreVec.push_back(&II); + if (ID == Intrinsic::localescape) LocalEscapeCall = &II; if (!ClCheckLifetime) return; if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end) return; @@ -660,9 +713,13 @@ struct FunctionStackPoisoner : public InstVisitor { AllocaPoisonCallVec.push_back(APC); } - void visitCallInst(CallInst &CI) { - HasNonEmptyInlineAsm |= - CI.isInlineAsm() && !CI.isIdenticalTo(EmptyInlineAsm.get()); + void visitCallSite(CallSite CS) { + Instruction *I = CS.getInstruction(); + if (CallInst *CI = dyn_cast(I)) { + HasNonEmptyInlineAsm |= + CI->isInlineAsm() && !CI->isIdenticalTo(EmptyInlineAsm.get()); + HasReturnsTwiceCall |= CI->canReturnTwice(); + } } // ---------------------- Helpers. @@ -689,7 +746,7 @@ struct FunctionStackPoisoner : public InstVisitor { Instruction *ThenTerm, Value *ValueIfFalse); }; -} // namespace +} // anonymous namespace char AddressSanitizer::ID = 0; INITIALIZE_PASS_BEGIN( @@ -697,12 +754,15 @@ INITIALIZE_PASS_BEGIN( "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END( AddressSanitizer, "asan", "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, false) -FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel) { - return new AddressSanitizer(CompileKernel); +FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel, + bool Recover) { + assert(!CompileKernel || Recover); + return new AddressSanitizer(CompileKernel, Recover); } char AddressSanitizerModule::ID = 0; @@ -711,8 +771,10 @@ INITIALIZE_PASS( "AddressSanitizer: detects use-after-free and out-of-bounds bugs." "ModulePass", false, false) -ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel) { - return new AddressSanitizerModule(CompileKernel); +ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel, + bool Recover) { + assert(!CompileKernel || Recover); + return new AddressSanitizerModule(CompileKernel, Recover); } static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { @@ -799,8 +861,10 @@ bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) { getAllocaSizeInBytes(&AI) > 0 && // We are only interested in allocas not promotable to registers. // Promotable allocas are common under -O0. - (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI) || - isDynamicAlloca(AI))); + (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) && + // inalloca allocas are not treated as static, and we don't want + // dynamic alloca instrumentation for them as well. + !AI.isUsedWithInAlloca()); ProcessedAllocas[&AI] = IsInteresting; return IsInteresting; @@ -868,10 +932,8 @@ static bool isInterestingPointerComparisonOrSubtraction(Instruction *I) { } else { return false; } - if (!isPointerOperand(I->getOperand(0)) || - !isPointerOperand(I->getOperand(1))) - return false; - return true; + return isPointerOperand(I->getOperand(0)) && + isPointerOperand(I->getOperand(1)); } bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) { @@ -919,7 +981,7 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, // If initialization order checking is disabled, a simple access to a // dynamically initialized global is always valid. GlobalVariable *G = dyn_cast(GetUnderlyingObject(Addr, DL)); - if (G != NULL && (!ClInitializers || GlobalIsLinkerInitialized(G)) && + if (G && (!ClInitializers || GlobalIsLinkerInitialized(G)) && isSafeAccess(ObjSizeVis, Addr, TypeSize)) { NumOptimizedAccessesToGlobalVar++; return; @@ -1041,13 +1103,17 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, BasicBlock *NextBB = CheckTerm->getSuccessor(0); IRB.SetInsertPoint(CheckTerm); Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize); - BasicBlock *CrashBlock = + if (Recover) { + CrashTerm = SplitBlockAndInsertIfThen(Cmp2, CheckTerm, false); + } else { + BasicBlock *CrashBlock = BasicBlock::Create(*C, "", NextBB->getParent(), NextBB); - CrashTerm = new UnreachableInst(*C, CrashBlock); - BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2); - ReplaceInstWithInst(CheckTerm, NewTerm); + CrashTerm = new UnreachableInst(*C, CrashBlock); + BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2); + ReplaceInstWithInst(CheckTerm, NewTerm); + } } else { - CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, true); + CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, !Recover); } Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite, @@ -1084,7 +1150,8 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment( void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName) { // Set up the arguments to our poison/unpoison functions. - IRBuilder<> IRB(GlobalInit.begin()->getFirstInsertionPt()); + IRBuilder<> IRB(&GlobalInit.front(), + GlobalInit.front().getFirstInsertionPt()); // Add a call to poison all external globals before the given function starts. Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy); @@ -1147,6 +1214,14 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { // Do not instrument globals from special LLVM sections. if (Section.find("__llvm") != StringRef::npos) return false; + // Do not instrument function pointers to initialization and termination + // routines: dynamic linker will not properly handle redzones. + if (Section.startswith(".preinit_array") || + Section.startswith(".init_array") || + Section.startswith(".fini_array")) { + return false; + } + // Callbacks put into the CRT initializer/terminator sections // should not be instrumented. // See https://code.google.com/p/address-sanitizer/issues/detail?id=305 @@ -1162,10 +1237,7 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { bool TAAParsed; std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier( Section, ParsedSegment, ParsedSection, TAA, TAAParsed, StubSize); - if (!ErrorCode.empty()) { - assert(false && "Invalid section specifier."); - return false; - } + assert(ErrorCode.empty() && "Invalid section specifier."); // Ignore the globals from the __OBJC section. The ObjC runtime assumes // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to @@ -1383,13 +1455,11 @@ void AddressSanitizer::initializeCallbacks(Module &M) { const std::string TypeStr = AccessIsWrite ? "store" : "load"; const std::string ExpStr = Exp ? "exp_" : ""; const std::string SuffixStr = CompileKernel ? "N" : "_n"; - const std::string EndingStr = CompileKernel ? "_noabort" : ""; - const Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr; - // TODO(glider): for KASan builds add _noabort to error reporting - // functions and make them actually noabort (remove the UnreachableInst). + const std::string EndingStr = Recover ? "_noabort" : ""; + Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr; AsanErrorCallbackSized[AccessIsWrite][Exp] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr, + kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr + EndingStr, IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr)); AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( @@ -1400,7 +1470,7 @@ void AddressSanitizer::initializeCallbacks(Module &M) { const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex); AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( - kAsanReportErrorTemplate + ExpStr + Suffix, + kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr, IRB.getVoidTy(), IntptrTy, ExpType, nullptr)); AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( @@ -1448,15 +1518,20 @@ bool AddressSanitizer::doInitialization(Module &M) { if (!CompileKernel) { std::tie(AsanCtorFunction, AsanInitFunction) = - createSanitizerCtorAndInitFunctions(M, kAsanModuleCtorName, kAsanInitName, - /*InitArgTypes=*/{}, - /*InitArgs=*/{}); + createSanitizerCtorAndInitFunctions( + M, kAsanModuleCtorName, kAsanInitName, + /*InitArgTypes=*/{}, /*InitArgs=*/{}, kAsanVersionCheckName); appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); } Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel); return true; } +bool AddressSanitizer::doFinalization(Module &M) { + GlobalsMD.reset(); + return false; +} + bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { // For each NSObject descendant having a +load method, this method is invoked // by the ObjC runtime before any of the static constructors is called. @@ -1466,13 +1541,41 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { // We cannot just ignore these methods, because they may call other // instrumented functions. if (F.getName().find(" load]") != std::string::npos) { - IRBuilder<> IRB(F.begin()->begin()); + IRBuilder<> IRB(&F.front(), F.front().begin()); IRB.CreateCall(AsanInitFunction, {}); return true; } return false; } +void AddressSanitizer::markEscapedLocalAllocas(Function &F) { + // Find the one possible call to llvm.localescape and pre-mark allocas passed + // to it as uninteresting. This assumes we haven't started processing allocas + // yet. This check is done up front because iterating the use list in + // isInterestingAlloca would be algorithmically slower. + assert(ProcessedAllocas.empty() && "must process localescape before allocas"); + + // Try to get the declaration of llvm.localescape. If it's not in the module, + // we can exit early. + if (!F.getParent()->getFunction("llvm.localescape")) return; + + // Look for a call to llvm.localescape call in the entry block. It can't be in + // any other block. + for (Instruction &I : F.getEntryBlock()) { + IntrinsicInst *II = dyn_cast(&I); + if (II && II->getIntrinsicID() == Intrinsic::localescape) { + // We found a call. Mark all the allocas passed in as uninteresting. + for (Value *Arg : II->arg_operands()) { + AllocaInst *AI = dyn_cast(Arg->stripPointerCasts()); + assert(AI && AI->isStaticAlloca() && + "non-static alloca arg to localescape"); + ProcessedAllocas[AI] = false; + } + break; + } + } +} + bool AddressSanitizer::runOnFunction(Function &F) { if (&F == AsanCtorFunction) return false; if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; @@ -1488,6 +1591,12 @@ bool AddressSanitizer::runOnFunction(Function &F) { if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) return false; + FunctionStateRAII CleanupObj(this); + + // We can't instrument allocas used with llvm.localescape. Only static allocas + // can be passed to that intrinsic. + markEscapedLocalAllocas(F); + // We want to instrument every address only once per basic block (unless there // are calls between uses). SmallSet TempsToInstrument; @@ -1715,6 +1824,16 @@ void FunctionStackPoisoner::createDynamicAllocasInitStorage() { void FunctionStackPoisoner::poisonStack() { assert(AllocaVec.size() > 0 || DynamicAllocaVec.size() > 0); + // Insert poison calls for lifetime intrinsics for alloca. + bool HavePoisonedAllocas = false; + for (const auto &APC : AllocaPoisonCallVec) { + assert(APC.InsBefore); + assert(APC.AI); + IRBuilder<> IRB(APC.InsBefore); + poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison); + HavePoisonedAllocas |= APC.DoPoison; + } + if (ClInstrumentAllocas && DynamicAllocaVec.size() > 0) { // Handle dynamic allocas. createDynamicAllocasInitStorage(); @@ -1723,7 +1842,7 @@ void FunctionStackPoisoner::poisonStack() { unpoisonDynamicAllocas(); } - if (AllocaVec.size() == 0) return; + if (AllocaVec.empty()) return; int StackMallocIdx = -1; DebugLoc EntryDebugLocation; @@ -1734,6 +1853,19 @@ void FunctionStackPoisoner::poisonStack() { IRBuilder<> IRB(InsBefore); IRB.SetCurrentDebugLocation(EntryDebugLocation); + // Make sure non-instrumented allocas stay in the entry block. Otherwise, + // debug info is broken, because only entry-block allocas are treated as + // regular stack slots. + auto InsBeforeB = InsBefore->getParent(); + assert(InsBeforeB == &F.getEntryBlock()); + for (BasicBlock::iterator I(InsBefore); I != InsBeforeB->end(); ++I) + if (auto *AI = dyn_cast(I)) + if (NonInstrumentedStaticAllocaVec.count(AI) > 0) + AI->moveBefore(InsBefore); + + // If we have a call to llvm.localescape, keep it in the entry block. + if (LocalEscapeCall) LocalEscapeCall->moveBefore(InsBefore); + SmallVector SVD; SVD.reserve(AllocaVec.size()); for (AllocaInst *AI : AllocaVec) { @@ -1751,10 +1883,15 @@ void FunctionStackPoisoner::poisonStack() { uint64_t LocalStackSize = L.FrameSize; bool DoStackMalloc = ClUseAfterReturn && !ASan.CompileKernel && LocalStackSize <= kMaxStackMallocSize; - // Don't do dynamic alloca or stack malloc in presence of inline asm: - // too often it makes assumptions on which registers are available. - bool DoDynamicAlloca = ClDynamicAllocaStack && !HasNonEmptyInlineAsm; - DoStackMalloc &= !HasNonEmptyInlineAsm; + bool DoDynamicAlloca = ClDynamicAllocaStack; + // Don't do dynamic alloca or stack malloc if: + // 1) There is inline asm: too often it makes assumptions on which registers + // are available. + // 2) There is a returns_twice call (typically setjmp), which is + // optimization-hostile, and doesn't play well with introduced indirect + // register-relative calculation of local variable addresses. + DoDynamicAlloca &= !HasNonEmptyInlineAsm && !HasReturnsTwiceCall; + DoStackMalloc &= !HasNonEmptyInlineAsm && !HasReturnsTwiceCall; Value *StaticAlloca = DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false); @@ -1804,16 +1941,6 @@ void FunctionStackPoisoner::poisonStack() { DoDynamicAlloca ? createAllocaForLayout(IRB, L, true) : StaticAlloca; } - // Insert poison calls for lifetime intrinsics for alloca. - bool HavePoisonedAllocas = false; - for (const auto &APC : AllocaPoisonCallVec) { - assert(APC.InsBefore); - assert(APC.AI); - IRBuilder<> IRB(APC.InsBefore); - poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison); - HavePoisonedAllocas |= APC.DoPoison; - } - // Replace Alloca instructions with base+offset. for (const auto &Desc : SVD) { AllocaInst *AI = Desc.AI; diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index f6858034d79e..fd3dfd9af033 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -106,7 +106,7 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) { } ++ChecksAdded; - Instruction *Inst = Builder->GetInsertPoint(); + BasicBlock::iterator Inst = Builder->GetInsertPoint(); BasicBlock *OldBB = Inst->getParent(); BasicBlock *Cont = OldBB->splitBasicBlock(Inst); OldBB->getTerminator()->eraseFromParent(); diff --git a/lib/Transforms/Instrumentation/CFGMST.h b/lib/Transforms/Instrumentation/CFGMST.h new file mode 100644 index 000000000000..c47fdbf68996 --- /dev/null +++ b/lib/Transforms/Instrumentation/CFGMST.h @@ -0,0 +1,217 @@ +//===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a Union-find algorithm to compute Minimum Spanning Tree +// for a given CFG. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include +#include +#include + +namespace llvm { + +#define DEBUG_TYPE "cfgmst" + +/// \brief An union-find based Minimum Spanning Tree for CFG +/// +/// Implements a Union-find algorithm to compute Minimum Spanning Tree +/// for a given CFG. +template class CFGMST { +public: + Function &F; + + // Store all the edges in CFG. It may contain some stale edges + // when Removed is set. + std::vector> AllEdges; + + // This map records the auxiliary information for each BB. + DenseMap> BBInfos; + + // Find the root group of the G and compress the path from G to the root. + BBInfo *findAndCompressGroup(BBInfo *G) { + if (G->Group != G) + G->Group = findAndCompressGroup(static_cast(G->Group)); + return static_cast(G->Group); + } + + // Union BB1 and BB2 into the same group and return true. + // Returns false if BB1 and BB2 are already in the same group. + bool unionGroups(const BasicBlock *BB1, const BasicBlock *BB2) { + BBInfo *BB1G = findAndCompressGroup(&getBBInfo(BB1)); + BBInfo *BB2G = findAndCompressGroup(&getBBInfo(BB2)); + + if (BB1G == BB2G) + return false; + + // Make the smaller rank tree a direct child or the root of high rank tree. + if (BB1G->Rank < BB2G->Rank) + BB1G->Group = BB2G; + else { + BB2G->Group = BB1G; + // If the ranks are the same, increment root of one tree by one. + if (BB1G->Rank == BB2G->Rank) + BB1G->Rank++; + } + return true; + } + + // Give BB, return the auxiliary information. + BBInfo &getBBInfo(const BasicBlock *BB) const { + auto It = BBInfos.find(BB); + assert(It->second.get() != nullptr); + return *It->second.get(); + } + + // Traverse the CFG using a stack. Find all the edges and assign the weight. + // Edges with large weight will be put into MST first so they are less likely + // to be instrumented. + void buildEdges() { + DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n"); + + const BasicBlock *BB = &(F.getEntryBlock()); + uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2); + // Add a fake edge to the entry. + addEdge(nullptr, BB, EntryWeight); + + // Special handling for single BB functions. + if (succ_empty(BB)) { + addEdge(BB, nullptr, EntryWeight); + return; + } + + static const uint32_t CriticalEdgeMultiplier = 1000; + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + uint64_t BBWeight = + (BFI != nullptr ? BFI->getBlockFreq(&*BB).getFrequency() : 2); + uint64_t Weight = 2; + if (int successors = TI->getNumSuccessors()) { + for (int i = 0; i != successors; ++i) { + BasicBlock *TargetBB = TI->getSuccessor(i); + bool Critical = isCriticalEdge(TI, i); + uint64_t scaleFactor = BBWeight; + if (Critical) { + if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier) + scaleFactor *= CriticalEdgeMultiplier; + else + scaleFactor = UINT64_MAX; + } + if (BPI != nullptr) + Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor); + addEdge(&*BB, TargetBB, Weight).IsCritical = Critical; + DEBUG(dbgs() << " Edge: from " << BB->getName() << " to " + << TargetBB->getName() << " w=" << Weight << "\n"); + } + } else { + addEdge(&*BB, nullptr, BBWeight); + DEBUG(dbgs() << " Edge: from " << BB->getName() << " to exit" + << " w = " << BBWeight << "\n"); + } + } + } + + // Sort CFG edges based on its weight. + void sortEdgesByWeight() { + std::stable_sort(AllEdges.begin(), AllEdges.end(), + [](const std::unique_ptr &Edge1, + const std::unique_ptr &Edge2) { + return Edge1->Weight > Edge2->Weight; + }); + } + + // Traverse all the edges and compute the Minimum Weight Spanning Tree + // using union-find algorithm. + void computeMinimumSpanningTree() { + // First, put all the critical edge with landing-pad as the Dest to MST. + // This works around the insufficient support of critical edges split + // when destination BB is a landing pad. + for (auto &Ei : AllEdges) { + if (Ei->Removed) + continue; + if (Ei->IsCritical) { + if (Ei->DestBB && Ei->DestBB->isLandingPad()) { + if (unionGroups(Ei->SrcBB, Ei->DestBB)) + Ei->InMST = true; + } + } + } + + for (auto &Ei : AllEdges) { + if (Ei->Removed) + continue; + if (unionGroups(Ei->SrcBB, Ei->DestBB)) + Ei->InMST = true; + } + } + + // Dump the Debug information about the instrumentation. + void dumpEdges(raw_ostream &OS, const Twine &Message) const { + if (!Message.str().empty()) + OS << Message << "\n"; + OS << " Number of Basic Blocks: " << BBInfos.size() << "\n"; + for (auto &BI : BBInfos) { + const BasicBlock *BB = BI.first; + OS << " BB: " << (BB == nullptr ? "FakeNode" : BB->getName()) << " " + << BI.second->infoString() << "\n"; + } + + OS << " Number of Edges: " << AllEdges.size() + << " (*: Instrument, C: CriticalEdge, -: Removed)\n"; + uint32_t Count = 0; + for (auto &EI : AllEdges) + OS << " Edge " << Count++ << ": " << getBBInfo(EI->SrcBB).Index << "-->" + << getBBInfo(EI->DestBB).Index << EI->infoString() << "\n"; + } + + // Add an edge to AllEdges with weight W. + Edge &addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W) { + uint32_t Index = BBInfos.size(); + auto Iter = BBInfos.end(); + bool Inserted; + std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Src, nullptr)); + if (Inserted) { + // Newly inserted, update the real info. + Iter->second = std::move(llvm::make_unique(Index)); + Index++; + } + std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Dest, nullptr)); + if (Inserted) + // Newly inserted, update the real info. + Iter->second = std::move(llvm::make_unique(Index)); + AllEdges.emplace_back(new Edge(Src, Dest, W)); + return *AllEdges.back(); + } + + BranchProbabilityInfo *BPI; + BlockFrequencyInfo *BFI; + +public: + CFGMST(Function &Func, BranchProbabilityInfo *BPI_ = nullptr, + BlockFrequencyInfo *BFI_ = nullptr) + : F(Func), BPI(BPI_), BFI(BFI_) { + buildEdges(); + sortEdgesByWeight(); + computeMinimumSpanningTree(); + } +}; + +#undef DEBUG_TYPE // "cfgmst" +} // end namespace llvm diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 9b81f4bb1619..cae1e5af7ac7 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMInstrumentation MemorySanitizer.cpp Instrumentation.cpp InstrProfiling.cpp + PGOInstrumentation.cpp SafeStack.cpp SanitizerCoverage.cpp ThreadSanitizer.cpp diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 2de6e1afaba9..d459fc50d136 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -72,6 +72,11 @@ using namespace llvm; +// External symbol to be used when generating the shadow address for +// architectures with multiple VMAs. Instead of using a constant integer +// the runtime will set the external mask based on the VMA range. +static const char *const kDFSanExternShadowPtrMask = "__dfsan_shadow_ptr_mask"; + // The -dfsan-preserve-alignment flag controls whether this pass assumes that // alignment requirements provided by the input IR are correct. For example, // if the input IR contains a load with alignment 8, this flag will cause @@ -124,6 +129,7 @@ static cl::opt ClDebugNonzeroLabels( "load or return with a nonzero label"), cl::Hidden); + namespace { StringRef GetGlobalTypeString(const GlobalValue &G) { @@ -231,6 +237,7 @@ class DataFlowSanitizer : public ModulePass { void *(*GetRetvalTLSPtr)(); Constant *GetArgTLS; Constant *GetRetvalTLS; + Constant *ExternalShadowMask; FunctionType *DFSanUnionFnTy; FunctionType *DFSanUnionLoadFnTy; FunctionType *DFSanUnimplementedFnTy; @@ -248,7 +255,7 @@ class DataFlowSanitizer : public ModulePass { DFSanABIList ABIList; DenseMap UnwrappedFnMap; AttributeSet ReadOnlyNoneAttrs; - DenseMap FunctionDIs; + bool DFSanRuntimeShadowMask; Value *getShadowAddress(Value *Addr, Instruction *Pos); bool isInstrumented(const Function *F); @@ -362,7 +369,8 @@ llvm::createDataFlowSanitizerPass(const std::vector &ABIListFiles, DataFlowSanitizer::DataFlowSanitizer( const std::vector &ABIListFiles, void *(*getArgTLS)(), void *(*getRetValTLS)()) - : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS) { + : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS), + DFSanRuntimeShadowMask(false) { std::vector AllABIListFiles(std::move(ABIListFiles)); AllABIListFiles.insert(AllABIListFiles.end(), ClABIListFiles.begin(), ClABIListFiles.end()); @@ -420,6 +428,8 @@ bool DataFlowSanitizer::doInitialization(Module &M) { bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64; bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 || TargetTriple.getArch() == llvm::Triple::mips64el; + bool IsAArch64 = TargetTriple.getArch() == llvm::Triple::aarch64 || + TargetTriple.getArch() == llvm::Triple::aarch64_be; const DataLayout &DL = M.getDataLayout(); @@ -434,6 +444,9 @@ bool DataFlowSanitizer::doInitialization(Module &M) { ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL); else if (IsMIPS64) ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0xF000000000LL); + // AArch64 supports multiple VMAs and the shadow mask is set at runtime. + else if (IsAArch64) + DFSanRuntimeShadowMask = true; else report_fatal_error("unsupported triple"); @@ -578,7 +591,7 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT, DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true); Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI; for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N) - DFSF.ValShadowMap[ValAI] = ShadowAI; + DFSF.ValShadowMap[&*ValAI] = &*ShadowAI; DFSanVisitor(DFSF).visitCallInst(*CI); if (!FT->getReturnType()->isVoidTy()) new StoreInst(DFSF.getShadow(RI->getReturnValue()), @@ -592,8 +605,6 @@ bool DataFlowSanitizer::runOnModule(Module &M) { if (ABIList.isIn(M, "skip")) return false; - FunctionDIs = makeSubprogramMap(M); - if (!GetArgTLSPtr) { Type *ArgTLSTy = ArrayType::get(ShadowTy, 64); ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy); @@ -606,6 +617,9 @@ bool DataFlowSanitizer::runOnModule(Module &M) { G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); } + ExternalShadowMask = + Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy); + DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy); if (Function *F = dyn_cast(DFSanUnionFn)) { F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); @@ -643,16 +657,16 @@ bool DataFlowSanitizer::runOnModule(Module &M) { std::vector FnsToInstrument; llvm::SmallPtrSet FnsWithNativeABI; - for (Module::iterator i = M.begin(), e = M.end(); i != e; ++i) { - if (!i->isIntrinsic() && - i != DFSanUnionFn && - i != DFSanCheckedUnionFn && - i != DFSanUnionLoadFn && - i != DFSanUnimplementedFn && - i != DFSanSetLabelFn && - i != DFSanNonzeroLabelFn && - i != DFSanVarargWrapperFn) - FnsToInstrument.push_back(&*i); + for (Function &i : M) { + if (!i.isIntrinsic() && + &i != DFSanUnionFn && + &i != DFSanCheckedUnionFn && + &i != DFSanUnionLoadFn && + &i != DFSanUnimplementedFn && + &i != DFSanSetLabelFn && + &i != DFSanNonzeroLabelFn && + &i != DFSanVarargWrapperFn) + FnsToInstrument.push_back(&i); } // Give function aliases prefixes when necessary, and build wrappers where the @@ -710,7 +724,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { NewFArg = NewF->arg_begin(), FArgEnd = F.arg_end(); FArg != FArgEnd; ++FArg, ++NewFArg) { - FArg->replaceAllUsesWith(NewFArg); + FArg->replaceAllUsesWith(&*NewFArg); } NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList()); @@ -750,11 +764,6 @@ bool DataFlowSanitizer::runOnModule(Module &M) { ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)); F.replaceAllUsesWith(WrappedFnCst); - // Patch the pointer to LLVM function in debug info descriptor. - auto DI = FunctionDIs.find(&F); - if (DI != FunctionDIs.end()) - DI->second->replaceFunction(&F); - UnwrappedFnMap[WrappedFnCst] = &F; *i = NewF; @@ -842,7 +851,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { if (Instruction *I = dyn_cast(V)) Pos = I->getNextNode(); else - Pos = DFSF.F->getEntryBlock().begin(); + Pos = &DFSF.F->getEntryBlock().front(); while (isa(Pos) || isa(Pos)) Pos = Pos->getNextNode(); IRBuilder<> IRB(Pos); @@ -864,7 +873,7 @@ Value *DFSanFunction::getArgTLSPtr() { if (DFS.ArgTLS) return ArgTLSPtr = DFS.ArgTLS; - IRBuilder<> IRB(F->getEntryBlock().begin()); + IRBuilder<> IRB(&F->getEntryBlock().front()); return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS, {}); } @@ -874,7 +883,7 @@ Value *DFSanFunction::getRetvalTLS() { if (DFS.RetvalTLS) return RetvalTLSPtr = DFS.RetvalTLS; - IRBuilder<> IRB(F->getEntryBlock().begin()); + IRBuilder<> IRB(&F->getEntryBlock().front()); return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS, {}); } @@ -906,7 +915,7 @@ Value *DFSanFunction::getShadow(Value *V) { Function::arg_iterator i = F->arg_begin(); while (ArgIdx--) ++i; - Shadow = i; + Shadow = &*i; assert(Shadow->getType() == DFS.ShadowTy); break; } @@ -928,9 +937,15 @@ void DFSanFunction::setShadow(Instruction *I, Value *Shadow) { Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) { assert(Addr != RetvalTLS && "Reinstrumenting?"); IRBuilder<> IRB(Pos); + Value *ShadowPtrMaskValue; + if (DFSanRuntimeShadowMask) + ShadowPtrMaskValue = IRB.CreateLoad(IntptrTy, ExternalShadowMask); + else + ShadowPtrMaskValue = ShadowPtrMask; return IRB.CreateIntToPtr( IRB.CreateMul( - IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), ShadowPtrMask), + IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), + IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy)), ShadowPtrMul), ShadowPtrTy); } @@ -991,7 +1006,7 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) { Call->addAttribute(2, Attribute::ZExt); BasicBlock *Tail = BI->getSuccessor(0); - PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", Tail->begin()); + PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front()); Phi->addIncoming(Call, Call->getParent()); Phi->addIncoming(V1, Head); @@ -1105,7 +1120,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow); BasicBlock *Head = Pos->getParent(); - BasicBlock *Tail = Head->splitBasicBlock(Pos); + BasicBlock *Tail = Head->splitBasicBlock(Pos->getIterator()); if (DomTreeNode *OldNode = DT.getNode(Head)) { std::vector Children(OldNode->begin(), OldNode->end()); @@ -1475,8 +1490,8 @@ void DFSanVisitor::visitCallSite(CallSite CS) { if (FT->isVarArg()) { auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy, CS.arg_size() - FT->getNumParams()); - auto *LabelVAAlloca = new AllocaInst(LabelVATy, "labelva", - DFSF.F->getEntryBlock().begin()); + auto *LabelVAAlloca = new AllocaInst( + LabelVATy, "labelva", &DFSF.F->getEntryBlock().front()); for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) { auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n); @@ -1490,7 +1505,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) { if (!DFSF.LabelReturnAlloca) { DFSF.LabelReturnAlloca = new AllocaInst(DFSF.DFS.ShadowTy, "labelreturn", - DFSF.F->getEntryBlock().begin()); + &DFSF.F->getEntryBlock().front()); } Args.push_back(DFSF.LabelReturnAlloca); } @@ -1529,13 +1544,14 @@ void DFSanVisitor::visitCallSite(CallSite CS) { if (!CS.getType()->isVoidTy()) { if (InvokeInst *II = dyn_cast(CS.getInstruction())) { if (II->getNormalDest()->getSinglePredecessor()) { - Next = II->getNormalDest()->begin(); + Next = &II->getNormalDest()->front(); } else { BasicBlock *NewBB = SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT); - Next = NewBB->begin(); + Next = &NewBB->front(); } } else { + assert(CS->getIterator() != CS->getParent()->end()); Next = CS->getNextNode(); } @@ -1568,7 +1584,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) { unsigned VarArgSize = CS.arg_size() - FT->getNumParams(); ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize); AllocaInst *VarArgShadow = - new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin()); + new AllocaInst(VarArgArrayTy, "", &DFSF.F->getEntryBlock().front()); Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0)); for (unsigned n = 0; i != e; ++i, ++n) { IRB.CreateStore( diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 9a3ed5c04efc..fa939aee252a 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -138,6 +138,7 @@ namespace { Module *M; LLVMContext *Ctx; SmallVector, 16> Funcs; + DenseMap FnMap; }; } @@ -309,13 +310,12 @@ namespace { // object users can construct, the blocks and lines will be rooted here. class GCOVFunction : public GCOVRecord { public: - GCOVFunction(const DISubprogram *SP, raw_ostream *os, uint32_t Ident, - bool UseCfgChecksum, bool ExitBlockBeforeBody) + GCOVFunction(const DISubprogram *SP, Function *F, raw_ostream *os, + uint32_t Ident, bool UseCfgChecksum, bool ExitBlockBeforeBody) : SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0), ReturnBlock(1, os) { this->os = os; - Function *F = SP->getFunction(); DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n"); uint32_t i = 0; @@ -347,8 +347,8 @@ namespace { std::string EdgeDestinations; raw_string_ostream EDOS(EdgeDestinations); Function *F = Blocks.begin()->first->getParent(); - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { - GCOVBlock &Block = getBlock(I); + for (BasicBlock &I : *F) { + GCOVBlock &Block = getBlock(&I); for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) EDOS << Block.OutEdges[i]->Number; } @@ -389,8 +389,8 @@ namespace { // Emit edges between blocks. if (Blocks.empty()) return; Function *F = Blocks.begin()->first->getParent(); - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { - GCOVBlock &Block = getBlock(I); + for (BasicBlock &I : *F) { + GCOVBlock &Block = getBlock(&I); if (Block.OutEdges.empty()) continue; writeBytes(EdgeTag, 4); @@ -405,9 +405,8 @@ namespace { } // Emit lines for each block. - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { - getBlock(I).writeOut(); - } + for (BasicBlock &I : *F) + getBlock(&I).writeOut(); } private: @@ -451,6 +450,12 @@ bool GCOVProfiler::runOnModule(Module &M) { this->M = &M; Ctx = &M.getContext(); + FnMap.clear(); + for (Function &F : M) { + if (DISubprogram *SP = F.getSubprogram()) + FnMap[SP] = &F; + } + if (Options.EmitNotes) emitProfileNotes(); if (Options.EmitData) return emitProfileArcs(); return false; @@ -495,7 +500,7 @@ void GCOVProfiler::emitProfileNotes() { unsigned FunctionIdent = 0; for (auto *SP : CU->getSubprograms()) { - Function *F = SP->getFunction(); + Function *F = FnMap[SP]; if (!F) continue; if (!functionHasLines(F)) continue; @@ -507,13 +512,13 @@ void GCOVProfiler::emitProfileNotes() { ++It; EntryBlock.splitBasicBlock(It); - Funcs.push_back(make_unique(SP, &out, FunctionIdent++, + Funcs.push_back(make_unique(SP, F, &out, FunctionIdent++, Options.UseCfgChecksum, Options.ExitBlockBeforeBody)); GCOVFunction &Func = *Funcs.back(); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - GCOVBlock &Block = Func.getBlock(BB); + GCOVBlock &Block = Func.getBlock(&*BB); TerminatorInst *TI = BB->getTerminator(); if (int successors = TI->getNumSuccessors()) { for (int i = 0; i != successors; ++i) { @@ -574,7 +579,7 @@ bool GCOVProfiler::emitProfileArcs() { auto *CU = cast(CU_Nodes->getOperand(i)); SmallVector, 8> CountersBySP; for (auto *SP : CU->getSubprograms()) { - Function *F = SP->getFunction(); + Function *F = FnMap[SP]; if (!F) continue; if (!functionHasLines(F)) continue; if (!Result) Result = true; @@ -605,7 +610,7 @@ bool GCOVProfiler::emitProfileArcs() { int Successors = isa(TI) ? 1 : TI->getNumSuccessors(); if (Successors) { if (Successors == 1) { - IRBuilder<> Builder(BB->getFirstInsertionPt()); + IRBuilder<> Builder(&*BB->getFirstInsertionPt()); Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge); Value *Count = Builder.CreateLoad(Counter); @@ -625,7 +630,7 @@ bool GCOVProfiler::emitProfileArcs() { Count = Builder.CreateAdd(Count, Builder.getInt64(1)); Builder.CreateStore(Count, Counter); } else { - ComplexEdgePreds.insert(BB); + ComplexEdgePreds.insert(&*BB); for (int i = 0; i != Successors; ++i) ComplexEdgeSuccs.insert(TI->getSuccessor(i)); } @@ -641,13 +646,13 @@ bool GCOVProfiler::emitProfileArcs() { GlobalVariable *EdgeState = getEdgeStateValue(); for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) { - IRBuilder<> Builder(ComplexEdgePreds[i + 1]->getFirstInsertionPt()); + IRBuilder<> Builder(&*ComplexEdgePreds[i + 1]->getFirstInsertionPt()); Builder.CreateStore(Builder.getInt32(i), EdgeState); } for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) { // Call runtime to perform increment. - IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstInsertionPt()); + IRBuilder<> Builder(&*ComplexEdgeSuccs[i + 1]->getFirstInsertionPt()); Value *CounterPtrArray = Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0, i * ComplexEdgePreds.size()); @@ -731,8 +736,8 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( IRBuilder<> Builder(Succ); Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge + i); - EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) + - (Preds.idFor(BB)-1)] = cast(Counter); + EdgeTable[((Succs.idFor(Succ) - 1) * Preds.size()) + + (Preds.idFor(&*BB) - 1)] = cast(Counter); } } Edge += Successors; @@ -901,7 +906,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() { // uint32_t pred = *predecessor; // if (pred == 0xffffffff) return; - Argument *Arg = Fn->arg_begin(); + Argument *Arg = &*Fn->arg_begin(); Arg->setName("predecessor"); Value *Pred = Builder.CreateLoad(Arg, "pred"); Value *Cond = Builder.CreateICmpEQ(Pred, Builder.getInt32(0xffffffff)); @@ -912,7 +917,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() { // uint64_t *counter = counters[pred]; // if (!counter) return; Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty()); - Arg = std::next(Fn->arg_begin()); + Arg = &*std::next(Fn->arg_begin()); Arg->setName("counters"); Value *GEP = Builder.CreateGEP(Type::getInt64PtrTy(*Ctx), Arg, ZExtPred); Value *Counter = Builder.CreateLoad(GEP, "counter"); diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 712bf8edc7ea..92e41ee27c09 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -7,18 +7,18 @@ // //===----------------------------------------------------------------------===// // -// This pass lowers instrprof_increment intrinsics emitted by a frontend for -// profiling. It also builds the data structures and initialization code needed -// for updating execution counts and emitting the profile at runtime. +// This pass lowers instrprof_* intrinsics emitted by a frontend for profiling. +// It also builds the data structures and initialization code needed for +// updating execution counts and emitting the profile at runtime. // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" - #include "llvm/ADT/Triple.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; @@ -49,7 +49,15 @@ public: private: InstrProfOptions Options; Module *M; - DenseMap RegionCounters; + typedef struct PerFunctionProfileData { + uint32_t NumValueSites[IPVK_Last+1]; + GlobalVariable* RegionCounters; + GlobalVariable* DataVar; + PerFunctionProfileData() : RegionCounters(nullptr), DataVar(nullptr) { + memset(NumValueSites, 0, sizeof(uint32_t) * (IPVK_Last+1)); + } + } PerFunctionProfileData; + DenseMap ProfileDataMap; std::vector UsedVars; bool isMachO() const { @@ -58,24 +66,30 @@ private: /// Get the section name for the counter variables. StringRef getCountersSection() const { - return isMachO() ? "__DATA,__llvm_prf_cnts" : "__llvm_prf_cnts"; + return getInstrProfCountersSectionName(isMachO()); } /// Get the section name for the name variables. StringRef getNameSection() const { - return isMachO() ? "__DATA,__llvm_prf_names" : "__llvm_prf_names"; + return getInstrProfNameSectionName(isMachO()); } /// Get the section name for the profile data variables. StringRef getDataSection() const { - return isMachO() ? "__DATA,__llvm_prf_data" : "__llvm_prf_data"; + return getInstrProfDataSectionName(isMachO()); } /// Get the section name for the coverage mapping data. StringRef getCoverageSection() const { - return isMachO() ? "__DATA,__llvm_covmap" : "__llvm_covmap"; + return getInstrProfCoverageSectionName(isMachO()); } + /// Count the number of instrumented value sites for the function. + void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins); + + /// Replace instrprof_value_profile with a call to runtime library. + void lowerValueProfileInst(InstrProfValueProfileInst *Ins); + /// Replace instrprof_increment with an increment of the appropriate value. void lowerIncrement(InstrProfIncrementInst *Inc); @@ -117,20 +131,37 @@ bool InstrProfiling::runOnModule(Module &M) { bool MadeChange = false; this->M = &M; - RegionCounters.clear(); + ProfileDataMap.clear(); UsedVars.clear(); + // We did not know how many value sites there would be inside + // the instrumented function. This is counting the number of instrumented + // target value sites to enter it as field in the profile data variable. for (Function &F : M) for (BasicBlock &BB : F) for (auto I = BB.begin(), E = BB.end(); I != E;) - if (auto *Inc = dyn_cast(I++)) { + if (auto *Ind = dyn_cast(I++)) + computeNumValueSiteCounts(Ind); + + for (Function &F : M) + for (BasicBlock &BB : F) + for (auto I = BB.begin(), E = BB.end(); I != E;) { + auto Instr = I++; + if (auto *Inc = dyn_cast(Instr)) { lowerIncrement(Inc); MadeChange = true; + } else if (auto *Ind = dyn_cast(Instr)) { + lowerValueProfileInst(Ind); + MadeChange = true; } - if (GlobalVariable *Coverage = M.getNamedGlobal("__llvm_coverage_mapping")) { + } + + if (GlobalVariable *Coverage = + M.getNamedGlobal(getCoverageMappingVarName())) { lowerCoverageData(Coverage); MadeChange = true; } + if (!MadeChange) return false; @@ -141,10 +172,59 @@ bool InstrProfiling::runOnModule(Module &M) { return true; } +static Constant *getOrInsertValueProfilingCall(Module &M) { + LLVMContext &Ctx = M.getContext(); + auto *ReturnTy = Type::getVoidTy(M.getContext()); + Type *ParamTypes[] = { +#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType +#include "llvm/ProfileData/InstrProfData.inc" + }; + auto *ValueProfilingCallTy = + FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false); + return M.getOrInsertFunction(getInstrProfValueProfFuncName(), + ValueProfilingCallTy); +} + +void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) { + + GlobalVariable *Name = Ind->getName(); + uint64_t ValueKind = Ind->getValueKind()->getZExtValue(); + uint64_t Index = Ind->getIndex()->getZExtValue(); + auto It = ProfileDataMap.find(Name); + if (It == ProfileDataMap.end()) { + PerFunctionProfileData PD; + PD.NumValueSites[ValueKind] = Index + 1; + ProfileDataMap[Name] = PD; + } else if (It->second.NumValueSites[ValueKind] <= Index) + It->second.NumValueSites[ValueKind] = Index + 1; +} + +void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { + + GlobalVariable *Name = Ind->getName(); + auto It = ProfileDataMap.find(Name); + assert(It != ProfileDataMap.end() && It->second.DataVar && + "value profiling detected in function with no counter incerement"); + + GlobalVariable *DataVar = It->second.DataVar; + uint64_t ValueKind = Ind->getValueKind()->getZExtValue(); + uint64_t Index = Ind->getIndex()->getZExtValue(); + for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind) + Index += It->second.NumValueSites[Kind]; + + IRBuilder<> Builder(Ind); + Value* Args[3] = {Ind->getTargetValue(), + Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), + Builder.getInt32(Index)}; + Ind->replaceAllUsesWith( + Builder.CreateCall(getOrInsertValueProfilingCall(*M), Args)); + Ind->eraseFromParent(); +} + void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { GlobalVariable *Counters = getOrCreateRegionCounters(Inc); - IRBuilder<> Builder(Inc->getParent(), *Inc); + IRBuilder<> Builder(Inc); uint64_t Index = Inc->getIndex()->getZExtValue(); Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index); Value *Count = Builder.CreateLoad(Addr, "pgocount"); @@ -172,9 +252,10 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) { GlobalVariable *Name = cast(V); // If we have region counters for this name, we've already handled it. - auto It = RegionCounters.find(Name); - if (It != RegionCounters.end()) - continue; + auto It = ProfileDataMap.find(Name); + if (It != ProfileDataMap.end()) + if (It->second.RegionCounters) + continue; // Move the name variable to the right section. Name->setSection(getNameSection()); @@ -183,69 +264,108 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) { } /// Get the name of a profiling variable for a particular function. -static std::string getVarName(InstrProfIncrementInst *Inc, StringRef VarName) { - auto *Arr = cast(Inc->getName()->getInitializer()); - StringRef Name = Arr->isCString() ? Arr->getAsCString() : Arr->getAsString(); - return ("__llvm_profile_" + VarName + "_" + Name).str(); +static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) { + StringRef NamePrefix = getInstrProfNameVarPrefix(); + StringRef Name = Inc->getName()->getName().substr(NamePrefix.size()); + return (Prefix + Name).str(); +} + +static inline bool shouldRecordFunctionAddr(Function *F) { + // Check the linkage + if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && + !F->hasAvailableExternallyLinkage()) + return true; + // Check uses of this function for other than direct calls or invokes to it. + return F->hasAddressTaken(); +} + +static inline Comdat *getOrCreateProfileComdat(Module &M, + InstrProfIncrementInst *Inc) { + // COFF format requires a COMDAT section to have a key symbol with the same + // name. The linker targeting COFF also requires that the COMDAT section + // a section is associated to must precede the associating section. For this + // reason, we must choose the name var's name as the name of the comdat. + StringRef ComdatPrefix = (Triple(M.getTargetTriple()).isOSBinFormatCOFF() + ? getInstrProfNameVarPrefix() + : getInstrProfComdatPrefix()); + return M.getOrInsertComdat(StringRef(getVarName(Inc, ComdatPrefix))); } GlobalVariable * InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { - GlobalVariable *Name = Inc->getName(); - auto It = RegionCounters.find(Name); - if (It != RegionCounters.end()) - return It->second; + GlobalVariable *NamePtr = Inc->getName(); + auto It = ProfileDataMap.find(NamePtr); + PerFunctionProfileData PD; + if (It != ProfileDataMap.end()) { + if (It->second.RegionCounters) + return It->second.RegionCounters; + PD = It->second; + } - // Move the name variable to the right section. Make sure it is placed in the - // same comdat as its associated function. Otherwise, we may get multiple - // counters for the same function in certain cases. + // Move the name variable to the right section. Place them in a COMDAT group + // if the associated function is a COMDAT. This will make sure that + // only one copy of counters of the COMDAT function will be emitted after + // linking. Function *Fn = Inc->getParent()->getParent(); - Name->setSection(getNameSection()); - Name->setAlignment(1); - Name->setComdat(Fn->getComdat()); + Comdat *ProfileVarsComdat = nullptr; + if (Fn->hasComdat()) + ProfileVarsComdat = getOrCreateProfileComdat(*M, Inc); + NamePtr->setSection(getNameSection()); + NamePtr->setAlignment(1); + NamePtr->setComdat(ProfileVarsComdat); uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); LLVMContext &Ctx = M->getContext(); ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters); // Create the counters variable. - auto *Counters = new GlobalVariable(*M, CounterTy, false, Name->getLinkage(), - Constant::getNullValue(CounterTy), - getVarName(Inc, "counters")); - Counters->setVisibility(Name->getVisibility()); - Counters->setSection(getCountersSection()); - Counters->setAlignment(8); - Counters->setComdat(Fn->getComdat()); - - RegionCounters[Inc->getName()] = Counters; + auto *CounterPtr = + new GlobalVariable(*M, CounterTy, false, NamePtr->getLinkage(), + Constant::getNullValue(CounterTy), + getVarName(Inc, getInstrProfCountersVarPrefix())); + CounterPtr->setVisibility(NamePtr->getVisibility()); + CounterPtr->setSection(getCountersSection()); + CounterPtr->setAlignment(8); + CounterPtr->setComdat(ProfileVarsComdat); // Create data variable. - auto *NameArrayTy = Name->getType()->getPointerElementType(); - auto *Int32Ty = Type::getInt32Ty(Ctx); - auto *Int64Ty = Type::getInt64Ty(Ctx); auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); - auto *Int64PtrTy = Type::getInt64PtrTy(Ctx); - - Type *DataTypes[] = {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int64PtrTy}; + auto *Int16Ty = Type::getInt16Ty(Ctx); + auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last+1); + Type *DataTypes[] = { + #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType, + #include "llvm/ProfileData/InstrProfData.inc" + }; auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes)); + + Constant *FunctionAddr = shouldRecordFunctionAddr(Fn) ? + ConstantExpr::getBitCast(Fn, Int8PtrTy) : + ConstantPointerNull::get(Int8PtrTy); + + Constant *Int16ArrayVals[IPVK_Last+1]; + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]); + Constant *DataVals[] = { - ConstantInt::get(Int32Ty, NameArrayTy->getArrayNumElements()), - ConstantInt::get(Int32Ty, NumCounters), - ConstantInt::get(Int64Ty, Inc->getHash()->getZExtValue()), - ConstantExpr::getBitCast(Name, Int8PtrTy), - ConstantExpr::getBitCast(Counters, Int64PtrTy)}; - auto *Data = new GlobalVariable(*M, DataTy, true, Name->getLinkage(), + #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init, + #include "llvm/ProfileData/InstrProfData.inc" + }; + auto *Data = new GlobalVariable(*M, DataTy, false, NamePtr->getLinkage(), ConstantStruct::get(DataTy, DataVals), - getVarName(Inc, "data")); - Data->setVisibility(Name->getVisibility()); + getVarName(Inc, getInstrProfDataVarPrefix())); + Data->setVisibility(NamePtr->getVisibility()); Data->setSection(getDataSection()); - Data->setAlignment(8); - Data->setComdat(Fn->getComdat()); + Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT); + Data->setComdat(ProfileVarsComdat); + + PD.RegionCounters = CounterPtr; + PD.DataVar = Data; + ProfileDataMap[NamePtr] = PD; // Mark the data variable as used so that it isn't stripped out. UsedVars.push_back(Data); - return Counters; + return CounterPtr; } void InstrProfiling::emitRegistration() { @@ -253,20 +373,24 @@ void InstrProfiling::emitRegistration() { if (Triple(M->getTargetTriple()).isOSDarwin()) return; + // Use linker script magic to get data/cnts/name start/end. + if (Triple(M->getTargetTriple()).isOSLinux() || + Triple(M->getTargetTriple()).isOSFreeBSD()) + return; + // Construct the function. auto *VoidTy = Type::getVoidTy(M->getContext()); auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext()); auto *RegisterFTy = FunctionType::get(VoidTy, false); auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage, - "__llvm_profile_register_functions", M); + getInstrProfRegFuncsName(), M); RegisterF->setUnnamedAddr(true); - if (Options.NoRedZone) - RegisterF->addFnAttr(Attribute::NoRedZone); + if (Options.NoRedZone) RegisterF->addFnAttr(Attribute::NoRedZone); auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false); auto *RuntimeRegisterF = Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage, - "__llvm_profile_register_function", M); + getInstrProfRegFuncName(), M); IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF)); for (Value *Data : UsedVars) @@ -275,26 +399,27 @@ void InstrProfiling::emitRegistration() { } void InstrProfiling::emitRuntimeHook() { - const char *const RuntimeVarName = "__llvm_profile_runtime"; - const char *const RuntimeUserName = "__llvm_profile_runtime_user"; + + // We expect the linker to be invoked with -u flag for linux, + // for which case there is no need to emit the user function. + if (Triple(M->getTargetTriple()).isOSLinux()) + return; // If the module's provided its own runtime, we don't need to do anything. - if (M->getGlobalVariable(RuntimeVarName)) - return; + if (M->getGlobalVariable(getInstrProfRuntimeHookVarName())) return; // Declare an external variable that will pull in the runtime initialization. auto *Int32Ty = Type::getInt32Ty(M->getContext()); auto *Var = new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage, - nullptr, RuntimeVarName); + nullptr, getInstrProfRuntimeHookVarName()); // Make a function that uses it. - auto *User = - Function::Create(FunctionType::get(Int32Ty, false), - GlobalValue::LinkOnceODRLinkage, RuntimeUserName, M); + auto *User = Function::Create(FunctionType::get(Int32Ty, false), + GlobalValue::LinkOnceODRLinkage, + getInstrProfRuntimeHookVarUseFuncName(), M); User->addFnAttr(Attribute::NoInline); - if (Options.NoRedZone) - User->addFnAttr(Attribute::NoRedZone); + if (Options.NoRedZone) User->addFnAttr(Attribute::NoRedZone); User->setVisibility(GlobalValue::HiddenVisibility); IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User)); @@ -330,26 +455,23 @@ void InstrProfiling::emitUses() { LLVMUsed = new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage, ConstantArray::get(ATy, MergedVars), "llvm.used"); - LLVMUsed->setSection("llvm.metadata"); } void InstrProfiling::emitInitialization() { std::string InstrProfileOutput = Options.InstrProfileOutput; - Constant *RegisterF = M->getFunction("__llvm_profile_register_functions"); - if (!RegisterF && InstrProfileOutput.empty()) - return; + Constant *RegisterF = M->getFunction(getInstrProfRegFuncsName()); + if (!RegisterF && InstrProfileOutput.empty()) return; // Create the initialization function. auto *VoidTy = Type::getVoidTy(M->getContext()); - auto *F = - Function::Create(FunctionType::get(VoidTy, false), - GlobalValue::InternalLinkage, "__llvm_profile_init", M); + auto *F = Function::Create(FunctionType::get(VoidTy, false), + GlobalValue::InternalLinkage, + getInstrProfInitFuncName(), M); F->setUnnamedAddr(true); F->addFnAttr(Attribute::NoInline); - if (Options.NoRedZone) - F->addFnAttr(Attribute::NoRedZone); + if (Options.NoRedZone) F->addFnAttr(Attribute::NoRedZone); // Add the basic block and the necessary calls. IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F)); @@ -358,9 +480,8 @@ void InstrProfiling::emitInitialization() { if (!InstrProfileOutput.empty()) { auto *Int8PtrTy = Type::getInt8PtrTy(M->getContext()); auto *SetNameTy = FunctionType::get(VoidTy, Int8PtrTy, false); - auto *SetNameF = - Function::Create(SetNameTy, GlobalValue::ExternalLinkage, - "__llvm_profile_override_default_filename", M); + auto *SetNameF = Function::Create(SetNameTy, GlobalValue::ExternalLinkage, + getInstrProfFileOverriderFuncName(), M); // Create variable for profile name. Constant *ProfileNameConst = diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp index 27505859100b..a05a5fa09f9a 100644 --- a/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -12,12 +12,47 @@ // //===----------------------------------------------------------------------===// -#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm-c/Initialization.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/InitializePasses.h" #include "llvm/PassRegistry.h" using namespace llvm; +/// Moves I before IP. Returns new insert point. +static BasicBlock::iterator moveBeforeInsertPoint(BasicBlock::iterator I, BasicBlock::iterator IP) { + // If I is IP, move the insert point down. + if (I == IP) + return ++IP; + // Otherwise, move I before IP and return IP. + I->moveBefore(&*IP); + return IP; +} + +/// Instrumentation passes often insert conditional checks into entry blocks. +/// Call this function before splitting the entry block to move instructions +/// that must remain in the entry block up before the split point. Static +/// allocas and llvm.localescape calls, for example, must remain in the entry +/// block. +BasicBlock::iterator llvm::PrepareToSplitEntryBlock(BasicBlock &BB, + BasicBlock::iterator IP) { + assert(&BB.getParent()->getEntryBlock() == &BB); + for (auto I = IP, E = BB.end(); I != E; ++I) { + bool KeepInEntry = false; + if (auto *AI = dyn_cast(I)) { + if (AI->isStaticAlloca()) + KeepInEntry = true; + } else if (auto *II = dyn_cast(I)) { + if (II->getIntrinsicID() == llvm::Intrinsic::localescape) + KeepInEntry = true; + } + if (KeepInEntry) + IP = moveBeforeInsertPoint(I, IP); + } + return IP; +} + /// initializeInstrumentation - Initialize all passes in the TransformUtils /// library. void llvm::initializeInstrumentation(PassRegistry &Registry) { @@ -25,6 +60,8 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeAddressSanitizerModulePass(Registry); initializeBoundsCheckingPass(Registry); initializeGCOVProfilerPass(Registry); + initializePGOInstrumentationGenPass(Registry); + initializePGOInstrumentationUsePass(Registry); initializeInstrProfilingPass(Registry); initializeMemorySanitizerPass(Registry); initializeThreadSanitizerPass(Registry); diff --git a/lib/Transforms/Instrumentation/LLVMBuild.txt b/lib/Transforms/Instrumentation/LLVMBuild.txt index 14c174332ee4..bcefe795c193 100644 --- a/lib/Transforms/Instrumentation/LLVMBuild.txt +++ b/lib/Transforms/Instrumentation/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Instrumentation parent = Transforms -required_libraries = Analysis Core MC Support TransformUtils +required_libraries = Analysis Core MC Support TransformUtils ProfileData diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 286a56330248..5a7bce5a5413 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -148,7 +148,7 @@ static cl::opt ClPoisonStackWithCall("msan-poison-stack-with-call", cl::desc("poison uninitialized stack variables with a call"), cl::Hidden, cl::init(false)); static cl::opt ClPoisonStackPattern("msan-poison-stack-pattern", - cl::desc("poison uninitialized stack variables with the given patter"), + cl::desc("poison uninitialized stack variables with the given pattern"), cl::Hidden, cl::init(0xff)); static cl::opt ClPoisonUndef("msan-poison-undef", cl::desc("poison undef temps"), @@ -222,10 +222,17 @@ static const MemoryMapParams Linux_I386_MemoryMapParams = { // x86_64 Linux static const MemoryMapParams Linux_X86_64_MemoryMapParams = { +#ifdef MSAN_LINUX_X86_64_OLD_MAPPING 0x400000000000, // AndMask 0, // XorMask (not used) 0, // ShadowBase (not used) 0x200000000000, // OriginBase +#else + 0, // AndMask (not used) + 0x500000000000, // XorMask + 0, // ShadowBase (not used) + 0x100000000000, // OriginBase +#endif }; // mips64 Linux @@ -244,6 +251,14 @@ static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = { 0x1C0000000000, // OriginBase }; +// aarch64 Linux +static const MemoryMapParams Linux_AArch64_MemoryMapParams = { + 0, // AndMask (not used) + 0x06000000000, // XorMask + 0, // ShadowBase (not used) + 0x01000000000, // OriginBase +}; + // i386 FreeBSD static const MemoryMapParams FreeBSD_I386_MemoryMapParams = { 0x000180000000, // AndMask @@ -266,15 +281,20 @@ static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = { }; static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = { - NULL, + nullptr, &Linux_MIPS64_MemoryMapParams, }; static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = { - NULL, + nullptr, &Linux_PowerPC64_MemoryMapParams, }; +static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = { + nullptr, + &Linux_AArch64_MemoryMapParams, +}; + static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = { &FreeBSD_I386_MemoryMapParams, &FreeBSD_X86_64_MemoryMapParams, @@ -353,8 +373,9 @@ class MemorySanitizer : public FunctionPass { friend struct MemorySanitizerVisitor; friend struct VarArgAMD64Helper; friend struct VarArgMIPS64Helper; + friend struct VarArgAArch64Helper; }; -} // namespace +} // anonymous namespace char MemorySanitizer::ID = 0; INITIALIZE_PASS(MemorySanitizer, "msan", @@ -377,7 +398,6 @@ static GlobalVariable *createPrivateNonConstGlobalForString(Module &M, GlobalValue::PrivateLinkage, StrConst, ""); } - /// \brief Insert extern declaration of runtime-provided functions and globals. void MemorySanitizer::initializeCallbacks(Module &M) { // Only do this once. @@ -496,6 +516,10 @@ bool MemorySanitizer::doInitialization(Module &M) { case Triple::ppc64le: MapParams = Linux_PowerPC_MemoryMapParams.bits64; break; + case Triple::aarch64: + case Triple::aarch64_be: + MapParams = Linux_ARM_MemoryMapParams.bits64; + break; default: report_fatal_error("unsupported architecture"); } @@ -697,7 +721,7 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *Cmp = IRB.CreateICmpNE( ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp"); Instruction *CheckTerm = SplitBlockAndInsertIfThen( - Cmp, IRB.GetInsertPoint(), false, MS.OriginStoreWeights); + Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights); IRBuilder<> IRBNew(CheckTerm); paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), getOriginPtr(Addr, IRBNew, Alignment), StoreSize, @@ -893,16 +917,17 @@ struct MemorySanitizerVisitor : public InstVisitor { /// /// Offset = (Addr & ~AndMask) ^ XorMask Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) { + Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy); + uint64_t AndMask = MS.MapParams->AndMask; - assert(AndMask != 0 && "AndMask shall be specified"); - Value *OffsetLong = - IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy), - ConstantInt::get(MS.IntptrTy, ~AndMask)); + if (AndMask) + OffsetLong = + IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask)); uint64_t XorMask = MS.MapParams->XorMask; - if (XorMask != 0) - OffsetLong = IRB.CreateXor(OffsetLong, - ConstantInt::get(MS.IntptrTy, XorMask)); + if (XorMask) + OffsetLong = + IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask)); return OffsetLong; } @@ -1339,6 +1364,12 @@ struct MemorySanitizerVisitor : public InstVisitor { } void visitBitCastInst(BitCastInst &I) { + // Special case: if this is the bitcast (there is exactly 1 allowed) between + // a musttail call and a ret, don't instrument. New instructions are not + // allowed after a musttail call. + if (auto *CI = dyn_cast(I.getOperand(0))) + if (CI->isMustTailCall()) + return; IRBuilder<> IRB(&I); setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I))); setOrigin(&I, getOrigin(&I, 0)); @@ -1570,18 +1601,24 @@ struct MemorySanitizerVisitor : public InstVisitor { Type *EltTy = Ty->getSequentialElementType(); SmallVector Elements; for (unsigned Idx = 0; Idx < NumElements; ++Idx) { - ConstantInt *Elt = - dyn_cast(ConstArg->getAggregateElement(Idx)); - APInt V = Elt->getValue(); - APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros(); - Elements.push_back(ConstantInt::get(EltTy, V2)); + if (ConstantInt *Elt = + dyn_cast(ConstArg->getAggregateElement(Idx))) { + APInt V = Elt->getValue(); + APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros(); + Elements.push_back(ConstantInt::get(EltTy, V2)); + } else { + Elements.push_back(ConstantInt::get(EltTy, 1)); + } } ShadowMul = ConstantVector::get(Elements); } else { - ConstantInt *Elt = dyn_cast(ConstArg); - APInt V = Elt->getValue(); - APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros(); - ShadowMul = ConstantInt::get(Elt->getType(), V2); + if (ConstantInt *Elt = dyn_cast(ConstArg)) { + APInt V = Elt->getValue(); + APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros(); + ShadowMul = ConstantInt::get(Ty, V2); + } else { + ShadowMul = ConstantInt::get(Ty, 1); + } } IRBuilder<> IRB(&I); @@ -1730,25 +1767,30 @@ struct MemorySanitizerVisitor : public InstVisitor { /// \brief Instrument signed relational comparisons. /// - /// Handle (x<0) and (x>=0) comparisons (essentially, sign bit tests) by - /// propagating the highest bit of the shadow. Everything else is delegated - /// to handleShadowOr(). + /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest + /// bit of the shadow. Everything else is delegated to handleShadowOr(). void handleSignedRelationalComparison(ICmpInst &I) { - Constant *constOp0 = dyn_cast(I.getOperand(0)); - Constant *constOp1 = dyn_cast(I.getOperand(1)); - Value* op = nullptr; - CmpInst::Predicate pre = I.getPredicate(); - if (constOp0 && constOp0->isNullValue() && - (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE)) { - op = I.getOperand(1); - } else if (constOp1 && constOp1->isNullValue() && - (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) { + Constant *constOp; + Value *op = nullptr; + CmpInst::Predicate pre; + if ((constOp = dyn_cast(I.getOperand(1)))) { op = I.getOperand(0); + pre = I.getPredicate(); + } else if ((constOp = dyn_cast(I.getOperand(0)))) { + op = I.getOperand(1); + pre = I.getSwappedPredicate(); + } else { + handleShadowOr(I); + return; } - if (op) { + + if ((constOp->isNullValue() && + (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) || + (constOp->isAllOnesValue() && + (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) { IRBuilder<> IRB(&I); - Value* Shadow = - IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op), "_msprop_icmpslt"); + Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op), + "_msprop_icmp_s"); setShadow(&I, Shadow); setOrigin(&I, getOrigin(op)); } else { @@ -1860,25 +1902,6 @@ struct MemorySanitizerVisitor : public InstVisitor { VAHelper->visitVACopyInst(I); } - enum IntrinsicKind { - IK_DoesNotAccessMemory, - IK_OnlyReadsMemory, - IK_WritesMemory - }; - - static IntrinsicKind getIntrinsicKind(Intrinsic::ID iid) { - const int DoesNotAccessMemory = IK_DoesNotAccessMemory; - const int OnlyReadsArgumentPointees = IK_OnlyReadsMemory; - const int OnlyReadsMemory = IK_OnlyReadsMemory; - const int OnlyAccessesArgumentPointees = IK_WritesMemory; - const int UnknownModRefBehavior = IK_WritesMemory; -#define GET_INTRINSIC_MODREF_BEHAVIOR -#define ModRefBehavior IntrinsicKind -#include "llvm/IR/Intrinsics.gen" -#undef ModRefBehavior -#undef GET_INTRINSIC_MODREF_BEHAVIOR - } - /// \brief Handle vector store-like intrinsics. /// /// Instrument intrinsics that look like a simple SIMD store: writes memory, @@ -1978,17 +2001,11 @@ struct MemorySanitizerVisitor : public InstVisitor { if (NumArgOperands == 0) return false; - Intrinsic::ID iid = I.getIntrinsicID(); - IntrinsicKind IK = getIntrinsicKind(iid); - bool OnlyReadsMemory = IK == IK_OnlyReadsMemory; - bool WritesMemory = IK == IK_WritesMemory; - assert(!(OnlyReadsMemory && WritesMemory)); - if (NumArgOperands == 2 && I.getArgOperand(0)->getType()->isPointerTy() && I.getArgOperand(1)->getType()->isVectorTy() && I.getType()->isVoidTy() && - WritesMemory) { + !I.onlyReadsMemory()) { // This looks like a vector store. return handleVectorStoreIntrinsic(I); } @@ -1996,12 +2013,12 @@ struct MemorySanitizerVisitor : public InstVisitor { if (NumArgOperands == 1 && I.getArgOperand(0)->getType()->isPointerTy() && I.getType()->isVectorTy() && - OnlyReadsMemory) { + I.onlyReadsMemory()) { // This looks like a vector load. return handleVectorLoadIntrinsic(I); } - if (!OnlyReadsMemory && !WritesMemory) + if (I.doesNotAccessMemory()) if (maybeHandleSimpleNomemIntrinsic(I)) return true; @@ -2493,13 +2510,16 @@ struct MemorySanitizerVisitor : public InstVisitor { // Now, get the shadow for the RetVal. if (!I.getType()->isSized()) return; + // Don't emit the epilogue for musttail call returns. + if (CS.isCall() && cast(&I)->isMustTailCall()) return; IRBuilder<> IRBBefore(&I); // Until we have full dynamic coverage, make sure the retval shadow is 0. Value *Base = getShadowPtrForRetval(&I, IRBBefore); IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment); - Instruction *NextInsn = nullptr; + BasicBlock::iterator NextInsn; if (CS.isCall()) { - NextInsn = I.getNextNode(); + NextInsn = ++I.getIterator(); + assert(NextInsn != I.getParent()->end()); } else { BasicBlock *NormalDest = cast(&I)->getNormalDest(); if (!NormalDest->getSinglePredecessor()) { @@ -2511,10 +2531,10 @@ struct MemorySanitizerVisitor : public InstVisitor { return; } NextInsn = NormalDest->getFirstInsertionPt(); - assert(NextInsn && + assert(NextInsn != NormalDest->end() && "Could not find insertion point for retval shadow load"); } - IRBuilder<> IRBAfter(NextInsn); + IRBuilder<> IRBAfter(&*NextInsn); Value *RetvalShadow = IRBAfter.CreateAlignedLoad(getShadowPtrForRetval(&I, IRBAfter), kShadowTLSAlignment, "_msret"); @@ -2523,10 +2543,22 @@ struct MemorySanitizerVisitor : public InstVisitor { setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter))); } + bool isAMustTailRetVal(Value *RetVal) { + if (auto *I = dyn_cast(RetVal)) { + RetVal = I->getOperand(0); + } + if (auto *I = dyn_cast(RetVal)) { + return I->isMustTailCall(); + } + return false; + } + void visitReturnInst(ReturnInst &I) { IRBuilder<> IRB(&I); Value *RetVal = I.getReturnValue(); if (!RetVal) return; + // Don't emit the epilogue for musttail call returns. + if (isAMustTailRetVal(RetVal)) return; Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB); if (CheckReturnValue) { insertShadowCheck(RetVal, &I); @@ -2653,6 +2685,16 @@ struct MemorySanitizerVisitor : public InstVisitor { setOrigin(&I, getCleanOrigin()); } + void visitCatchSwitchInst(CatchSwitchInst &I) { + setShadow(&I, getCleanShadow(&I)); + setOrigin(&I, getCleanOrigin()); + } + + void visitFuncletPadInst(FuncletPadInst &I) { + setShadow(&I, getCleanShadow(&I)); + setOrigin(&I, getCleanOrigin()); + } + void visitGetElementPtrInst(GetElementPtrInst &I) { handleShadowOr(I); } @@ -2696,6 +2738,16 @@ struct MemorySanitizerVisitor : public InstVisitor { // Nothing to do here. } + void visitCleanupReturnInst(CleanupReturnInst &CRI) { + DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n"); + // Nothing to do here. + } + + void visitCatchReturnInst(CatchReturnInst &CRI) { + DEBUG(dbgs() << "CatchReturn: " << CRI << "\n"); + // Nothing to do here. + } + void visitInstruction(Instruction &I) { // Everything else: stop propagating and check for poisoned shadow. if (ClDumpStrictInstructions) @@ -2808,6 +2860,8 @@ struct VarArgAMD64Helper : public VarArgHelper { } void visitVAStartInst(VAStartInst &I) override { + if (F.getCallingConv() == CallingConv::X86_64_Win64) + return; IRBuilder<> IRB(&I); VAStartInstrumentationList.push_back(&I); Value *VAListTag = I.getArgOperand(0); @@ -2820,6 +2874,8 @@ struct VarArgAMD64Helper : public VarArgHelper { } void visitVACopyInst(VACopyInst &I) override { + if (F.getCallingConv() == CallingConv::X86_64_Win64) + return; IRBuilder<> IRB(&I); Value *VAListTag = I.getArgOperand(0); Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); @@ -2979,6 +3035,242 @@ struct VarArgMIPS64Helper : public VarArgHelper { } }; + +/// \brief AArch64-specific implementation of VarArgHelper. +struct VarArgAArch64Helper : public VarArgHelper { + static const unsigned kAArch64GrArgSize = 56; + static const unsigned kAArch64VrArgSize = 128; + + static const unsigned AArch64GrBegOffset = 0; + static const unsigned AArch64GrEndOffset = kAArch64GrArgSize; + // Make VR space aligned to 16 bytes. + static const unsigned AArch64VrBegOffset = AArch64GrEndOffset + 8; + static const unsigned AArch64VrEndOffset = AArch64VrBegOffset + + kAArch64VrArgSize; + static const unsigned AArch64VAEndOffset = AArch64VrEndOffset; + + Function &F; + MemorySanitizer &MS; + MemorySanitizerVisitor &MSV; + Value *VAArgTLSCopy; + Value *VAArgOverflowSize; + + SmallVector VAStartInstrumentationList; + + VarArgAArch64Helper(Function &F, MemorySanitizer &MS, + MemorySanitizerVisitor &MSV) + : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr), + VAArgOverflowSize(nullptr) {} + + enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory }; + + ArgKind classifyArgument(Value* arg) { + Type *T = arg->getType(); + if (T->isFPOrFPVectorTy()) + return AK_FloatingPoint; + if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64) + || (T->isPointerTy())) + return AK_GeneralPurpose; + return AK_Memory; + } + + // The instrumentation stores the argument shadow in a non ABI-specific + // format because it does not know which argument is named (since Clang, + // like x86_64 case, lowers the va_args in the frontend and this pass only + // sees the low level code that deals with va_list internals). + // The first seven GR registers are saved in the first 56 bytes of the + // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then + // the remaining arguments. + // Using constant offset within the va_arg TLS array allows fast copy + // in the finalize instrumentation. + void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override { + unsigned GrOffset = AArch64GrBegOffset; + unsigned VrOffset = AArch64VrBegOffset; + unsigned OverflowOffset = AArch64VAEndOffset; + + const DataLayout &DL = F.getParent()->getDataLayout(); + for (CallSite::arg_iterator ArgIt = CS.arg_begin() + 1, End = CS.arg_end(); + ArgIt != End; ++ArgIt) { + Value *A = *ArgIt; + ArgKind AK = classifyArgument(A); + if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset) + AK = AK_Memory; + if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset) + AK = AK_Memory; + Value *Base; + switch (AK) { + case AK_GeneralPurpose: + Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset); + GrOffset += 8; + break; + case AK_FloatingPoint: + Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset); + VrOffset += 16; + break; + case AK_Memory: + uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); + Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset); + OverflowOffset += RoundUpToAlignment(ArgSize, 8); + break; + } + IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); + } + Constant *OverflowSize = + ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset); + IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS); + } + + /// Compute the shadow address for a given va_arg. + Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, + int ArgOffset) { + Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); + Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); + return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0), + "_msarg"); + } + + void visitVAStartInst(VAStartInst &I) override { + IRBuilder<> IRB(&I); + VAStartInstrumentationList.push_back(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + // Unpoison the whole __va_list_tag. + // FIXME: magic ABI constants (size of va_list). + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + /* size */32, /* alignment */8, false); + } + + void visitVACopyInst(VACopyInst &I) override { + IRBuilder<> IRB(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + // Unpoison the whole __va_list_tag. + // FIXME: magic ABI constants (size of va_list). + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + /* size */32, /* alignment */8, false); + } + + // Retrieve a va_list field of 'void*' size. + Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) { + Value *SaveAreaPtrPtr = + IRB.CreateIntToPtr( + IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), + ConstantInt::get(MS.IntptrTy, offset)), + Type::getInt64PtrTy(*MS.C)); + return IRB.CreateLoad(SaveAreaPtrPtr); + } + + // Retrieve a va_list field of 'int' size. + Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) { + Value *SaveAreaPtr = + IRB.CreateIntToPtr( + IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), + ConstantInt::get(MS.IntptrTy, offset)), + Type::getInt32PtrTy(*MS.C)); + Value *SaveArea32 = IRB.CreateLoad(SaveAreaPtr); + return IRB.CreateSExt(SaveArea32, MS.IntptrTy); + } + + void finalizeInstrumentation() override { + assert(!VAArgOverflowSize && !VAArgTLSCopy && + "finalizeInstrumentation called twice"); + if (!VAStartInstrumentationList.empty()) { + // If there is a va_start in this function, make a backup copy of + // va_arg_tls somewhere in the function entry block. + IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); + VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS); + Value *CopySize = + IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset), + VAArgOverflowSize); + VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); + IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8); + } + + Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize); + Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize); + + // Instrument va_start, copy va_list shadow from the backup copy of + // the TLS contents. + for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { + CallInst *OrigInst = VAStartInstrumentationList[i]; + IRBuilder<> IRB(OrigInst->getNextNode()); + + Value *VAListTag = OrigInst->getArgOperand(0); + + // The variadic ABI for AArch64 creates two areas to save the incoming + // argument registers (one for 64-bit general register xn-x7 and another + // for 128-bit FP/SIMD vn-v7). + // We need then to propagate the shadow arguments on both regions + // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'. + // The remaning arguments are saved on shadow for 'va::stack'. + // One caveat is it requires only to propagate the non-named arguments, + // however on the call site instrumentation 'all' the arguments are + // saved. So to copy the shadow values from the va_arg TLS array + // we need to adjust the offset for both GR and VR fields based on + // the __{gr,vr}_offs value (since they are stores based on incoming + // named arguments). + + // Read the stack pointer from the va_list. + Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0); + + // Read both the __gr_top and __gr_off and add them up. + Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8); + Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24); + + Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea); + + // Read both the __vr_top and __vr_off and add them up. + Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16); + Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28); + + Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea); + + // It does not know how many named arguments is being used and, on the + // callsite all the arguments were saved. Since __gr_off is defined as + // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic + // argument by ignoring the bytes of shadow from named arguments. + Value *GrRegSaveAreaShadowPtrOff = + IRB.CreateAdd(GrArgSize, GrOffSaveArea); + + Value *GrRegSaveAreaShadowPtr = + MSV.getShadowPtr(GrRegSaveAreaPtr, IRB.getInt8Ty(), IRB); + + Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, + GrRegSaveAreaShadowPtrOff); + Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff); + + IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, GrSrcPtr, GrCopySize, 8); + + // Again, but for FP/SIMD values. + Value *VrRegSaveAreaShadowPtrOff = + IRB.CreateAdd(VrArgSize, VrOffSaveArea); + + Value *VrRegSaveAreaShadowPtr = + MSV.getShadowPtr(VrRegSaveAreaPtr, IRB.getInt8Ty(), IRB); + + Value *VrSrcPtr = IRB.CreateInBoundsGEP( + IRB.getInt8Ty(), + IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, + IRB.getInt32(AArch64VrBegOffset)), + VrRegSaveAreaShadowPtrOff); + Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff); + + IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, VrSrcPtr, VrCopySize, 8); + + // And finally for remaining arguments. + Value *StackSaveAreaShadowPtr = + MSV.getShadowPtr(StackSaveAreaPtr, IRB.getInt8Ty(), IRB); + + Value *StackSrcPtr = + IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, + IRB.getInt32(AArch64VAEndOffset)); + + IRB.CreateMemCpy(StackSaveAreaShadowPtr, StackSrcPtr, + VAArgOverflowSize, 16); + } + } +}; + /// \brief A no-op implementation of VarArgHelper. struct VarArgNoOpHelper : public VarArgHelper { VarArgNoOpHelper(Function &F, MemorySanitizer &MS, @@ -3003,11 +3295,13 @@ VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, else if (TargetTriple.getArch() == llvm::Triple::mips64 || TargetTriple.getArch() == llvm::Triple::mips64el) return new VarArgMIPS64Helper(Func, Msan, Visitor); + else if (TargetTriple.getArch() == llvm::Triple::aarch64) + return new VarArgAArch64Helper(Func, Msan, Visitor); else return new VarArgNoOpHelper(Func, Msan, Visitor); } -} // namespace +} // anonymous namespace bool MemorySanitizer::runOnFunction(Function &F) { if (&F == MsanCtorFunction) diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp new file mode 100644 index 000000000000..4b59b93b325f --- /dev/null +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -0,0 +1,718 @@ +//===-- PGOInstrumentation.cpp - MST-based PGO Instrumentation ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements PGO instrumentation using a minimum spanning tree based +// on the following paper: +// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points +// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, +// Issue 3, pp 313-322 +// The idea of the algorithm based on the fact that for each node (except for +// the entry and exit), the sum of incoming edge counts equals the sum of +// outgoing edge counts. The count of edge on spanning tree can be derived from +// those edges not on the spanning tree. Knuth proves this method instruments +// the minimum number of edges. +// +// The minimal spanning tree here is actually a maximum weight tree -- on-tree +// edges have higher frequencies (more likely to execute). The idea is to +// instrument those less frequently executed edges to reduce the runtime +// overhead of instrumented binaries. +// +// This file contains two passes: +// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge +// count profile, and +// (2) Pass PGOInstrumentationUse which reads the edge count profile and +// annotates the branch weights. +// To get the precise counter information, These two passes need to invoke at +// the same compilation point (so they see the same IR). For pass +// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For +// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and +// the profile is opened in module level and passed to each PGOUseFunc instance. +// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put +// in class FuncPGOInstrumentation. +// +// Class PGOEdge represents a CFG edge and some auxiliary information. Class +// BBInfo contains auxiliary information for each BB. These two classes are used +// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived +// class of PGOEdge and BBInfo, respectively. They contains extra data structure +// used in populating profile counters. +// The MST implementation is in Class CFGMST (CFGMST.h). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation.h" +#include "CFGMST.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/JamCRC.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "pgo-instrumentation" + +STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); +STATISTIC(NumOfPGOEdge, "Number of edges."); +STATISTIC(NumOfPGOBB, "Number of basic-blocks."); +STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); +STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); +STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); +STATISTIC(NumOfPGOMissing, "Number of functions without profile."); + +// Command line option to specify the file to read profile from. This is +// mainly used for testing. +static cl::opt + PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, + cl::value_desc("filename"), + cl::desc("Specify the path of profile data file. This is" + "mainly for test purpose.")); + +namespace { +class PGOInstrumentationGen : public ModulePass { +public: + static char ID; + + PGOInstrumentationGen() : ModulePass(ID) { + initializePGOInstrumentationGenPass(*PassRegistry::getPassRegistry()); + } + + const char *getPassName() const override { + return "PGOInstrumentationGenPass"; + } + +private: + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } +}; + +class PGOInstrumentationUse : public ModulePass { +public: + static char ID; + + // Provide the profile filename as the parameter. + PGOInstrumentationUse(std::string Filename = "") + : ModulePass(ID), ProfileFileName(Filename) { + if (!PGOTestProfileFile.empty()) + ProfileFileName = PGOTestProfileFile; + initializePGOInstrumentationUsePass(*PassRegistry::getPassRegistry()); + } + + const char *getPassName() const override { + return "PGOInstrumentationUsePass"; + } + +private: + std::string ProfileFileName; + std::unique_ptr PGOReader; + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } +}; +} // end anonymous namespace + +char PGOInstrumentationGen::ID = 0; +INITIALIZE_PASS_BEGIN(PGOInstrumentationGen, "pgo-instr-gen", + "PGO instrumentation.", false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_END(PGOInstrumentationGen, "pgo-instr-gen", + "PGO instrumentation.", false, false) + +ModulePass *llvm::createPGOInstrumentationGenPass() { + return new PGOInstrumentationGen(); +} + +char PGOInstrumentationUse::ID = 0; +INITIALIZE_PASS_BEGIN(PGOInstrumentationUse, "pgo-instr-use", + "Read PGO instrumentation profile.", false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_END(PGOInstrumentationUse, "pgo-instr-use", + "Read PGO instrumentation profile.", false, false) + +ModulePass *llvm::createPGOInstrumentationUsePass(StringRef Filename) { + return new PGOInstrumentationUse(Filename.str()); +} + +namespace { +/// \brief An MST based instrumentation for PGO +/// +/// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO +/// in the function level. +struct PGOEdge { + // This class implements the CFG edges. Note the CFG can be a multi-graph. + // So there might be multiple edges with same SrcBB and DestBB. + const BasicBlock *SrcBB; + const BasicBlock *DestBB; + uint64_t Weight; + bool InMST; + bool Removed; + bool IsCritical; + PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) + : SrcBB(Src), DestBB(Dest), Weight(W), InMST(false), Removed(false), + IsCritical(false) {} + // Return the information string of an edge. + const std::string infoString() const { + return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + + (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); + } +}; + +// This class stores the auxiliary information for each BB. +struct BBInfo { + BBInfo *Group; + uint32_t Index; + uint32_t Rank; + + BBInfo(unsigned IX) : Group(this), Index(IX), Rank(0) {} + + // Return the information string of this object. + const std::string infoString() const { + return (Twine("Index=") + Twine(Index)).str(); + } +}; + +// This class implements the CFG edges. Note the CFG can be a multi-graph. +template class FuncPGOInstrumentation { +private: + Function &F; + void computeCFGHash(); + +public: + std::string FuncName; + GlobalVariable *FuncNameVar; + // CFG hash value for this function. + uint64_t FunctionHash; + + // The Minimum Spanning Tree of function CFG. + CFGMST MST; + + // Give an edge, find the BB that will be instrumented. + // Return nullptr if there is no BB to be instrumented. + BasicBlock *getInstrBB(Edge *E); + + // Return the auxiliary BB information. + BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } + + // Dump edges and BB information. + void dumpInfo(std::string Str = "") const { + MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + + Twine(FunctionHash) + "\t" + Str); + } + + FuncPGOInstrumentation(Function &Func, bool CreateGlobalVar = false, + BranchProbabilityInfo *BPI = nullptr, + BlockFrequencyInfo *BFI = nullptr) + : F(Func), FunctionHash(0), MST(F, BPI, BFI) { + FuncName = getPGOFuncName(F); + computeCFGHash(); + DEBUG(dumpInfo("after CFGMST")); + + NumOfPGOBB += MST.BBInfos.size(); + for (auto &E : MST.AllEdges) { + if (E->Removed) + continue; + NumOfPGOEdge++; + if (!E->InMST) + NumOfPGOInstrument++; + } + + if (CreateGlobalVar) + FuncNameVar = createPGOFuncNameVar(F, FuncName); + }; +}; + +// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index +// value of each BB in the CFG. The higher 32 bits record the number of edges. +template +void FuncPGOInstrumentation::computeCFGHash() { + std::vector Indexes; + JamCRC JC; + for (auto &BB : F) { + const TerminatorInst *TI = BB.getTerminator(); + for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { + BasicBlock *Succ = TI->getSuccessor(I); + uint32_t Index = getBBInfo(Succ).Index; + for (int J = 0; J < 4; J++) + Indexes.push_back((char)(Index >> (J * 8))); + } + } + JC.update(Indexes); + FunctionHash = (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); +} + +// Given a CFG E to be instrumented, find which BB to place the instrumented +// code. The function will split the critical edge if necessary. +template +BasicBlock *FuncPGOInstrumentation::getInstrBB(Edge *E) { + if (E->InMST || E->Removed) + return nullptr; + + BasicBlock *SrcBB = const_cast(E->SrcBB); + BasicBlock *DestBB = const_cast(E->DestBB); + // For a fake edge, instrument the real BB. + if (SrcBB == nullptr) + return DestBB; + if (DestBB == nullptr) + return SrcBB; + + // Instrument the SrcBB if it has a single successor, + // otherwise, the DestBB if this is not a critical edge. + TerminatorInst *TI = SrcBB->getTerminator(); + if (TI->getNumSuccessors() <= 1) + return SrcBB; + if (!E->IsCritical) + return DestBB; + + // For a critical edge, we have to split. Instrument the newly + // created BB. + NumOfPGOSplit++; + DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index << " --> " + << getBBInfo(DestBB).Index << "\n"); + unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); + BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); + assert(InstrBB && "Critical edge is not split"); + + E->Removed = true; + return InstrBB; +} + +// Visit all edge and instrument the edges not in MST. +// Critical edges will be split. +static void instrumentOneFunc(Function &F, Module *M, + BranchProbabilityInfo *BPI, + BlockFrequencyInfo *BFI) { + unsigned NumCounters = 0; + FuncPGOInstrumentation FuncInfo(F, true, BPI, BFI); + for (auto &E : FuncInfo.MST.AllEdges) { + if (!E->InMST && !E->Removed) + NumCounters++; + } + + uint32_t I = 0; + for (auto &E : FuncInfo.MST.AllEdges) { + BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get()); + if (!InstrBB) + continue; + + IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); + assert(Builder.GetInsertPoint() != InstrBB->end() && + "Cannot get the Instrumentation point"); + Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), + {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), + Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), + Builder.getInt32(I++)}); + } +} + +// This class represents a CFG edge in profile use compilation. +struct PGOUseEdge : public PGOEdge { + bool CountValid; + uint64_t CountValue; + PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) + : PGOEdge(Src, Dest, W), CountValid(false), CountValue(0) {} + + // Set edge count value + void setEdgeCount(uint64_t Value) { + CountValue = Value; + CountValid = true; + } + + // Return the information string for this object. + const std::string infoString() const { + if (!CountValid) + return PGOEdge::infoString(); + return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)).str(); + } +}; + +typedef SmallVector DirectEdges; + +// This class stores the auxiliary information for each BB. +struct UseBBInfo : public BBInfo { + uint64_t CountValue; + bool CountValid; + int32_t UnknownCountInEdge; + int32_t UnknownCountOutEdge; + DirectEdges InEdges; + DirectEdges OutEdges; + UseBBInfo(unsigned IX) + : BBInfo(IX), CountValue(0), CountValid(false), UnknownCountInEdge(0), + UnknownCountOutEdge(0) {} + UseBBInfo(unsigned IX, uint64_t C) + : BBInfo(IX), CountValue(C), CountValid(true), UnknownCountInEdge(0), + UnknownCountOutEdge(0) {} + + // Set the profile count value for this BB. + void setBBInfoCount(uint64_t Value) { + CountValue = Value; + CountValid = true; + } + + // Return the information string of this object. + const std::string infoString() const { + if (!CountValid) + return BBInfo::infoString(); + return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); + } +}; + +// Sum up the count values for all the edges. +static uint64_t sumEdgeCount(const ArrayRef Edges) { + uint64_t Total = 0; + for (auto &E : Edges) { + if (E->Removed) + continue; + Total += E->CountValue; + } + return Total; +} + +class PGOUseFunc { +private: + Function &F; + Module *M; + // This member stores the shared information with class PGOGenFunc. + FuncPGOInstrumentation FuncInfo; + + // Return the auxiliary BB information. + UseBBInfo &getBBInfo(const BasicBlock *BB) const { + return FuncInfo.getBBInfo(BB); + } + + // The maximum count value in the profile. This is only used in PGO use + // compilation. + uint64_t ProgramMaxCount; + + // Find the Instrumented BB and set the value. + void setInstrumentedCounts(const std::vector &CountFromProfile); + + // Set the edge counter value for the unknown edge -- there should be only + // one unknown edge. + void setEdgeCount(DirectEdges &Edges, uint64_t Value); + + // Return FuncName string; + const std::string getFuncName() const { return FuncInfo.FuncName; } + + // Set the hot/cold inline hints based on the count values. + // FIXME: This function should be removed once the functionality in + // the inliner is implemented. + void applyFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { + if (ProgramMaxCount == 0) + return; + // Threshold of the hot functions. + const BranchProbability HotFunctionThreshold(1, 100); + // Threshold of the cold functions. + const BranchProbability ColdFunctionThreshold(2, 10000); + if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount)) + F.addFnAttr(llvm::Attribute::InlineHint); + else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount)) + F.addFnAttr(llvm::Attribute::Cold); + } + +public: + PGOUseFunc(Function &Func, Module *Modu, BranchProbabilityInfo *BPI = nullptr, + BlockFrequencyInfo *BFI = nullptr) + : F(Func), M(Modu), FuncInfo(Func, false, BPI, BFI) {} + + // Read counts for the instrumented BB from profile. + bool readCounters(IndexedInstrProfReader *PGOReader); + + // Populate the counts for all BBs. + void populateCounters(); + + // Set the branch weights based on the count values. + void setBranchWeights(); +}; + +// Visit all the edges and assign the count value for the instrumented +// edges and the BB. +void PGOUseFunc::setInstrumentedCounts( + const std::vector &CountFromProfile) { + + // Use a worklist as we will update the vector during the iteration. + std::vector WorkList; + for (auto &E : FuncInfo.MST.AllEdges) + WorkList.push_back(E.get()); + + uint32_t I = 0; + for (auto &E : WorkList) { + BasicBlock *InstrBB = FuncInfo.getInstrBB(E); + if (!InstrBB) + continue; + uint64_t CountValue = CountFromProfile[I++]; + if (!E->Removed) { + getBBInfo(InstrBB).setBBInfoCount(CountValue); + E->setEdgeCount(CountValue); + continue; + } + + // Need to add two new edges. + BasicBlock *SrcBB = const_cast(E->SrcBB); + BasicBlock *DestBB = const_cast(E->DestBB); + // Add new edge of SrcBB->InstrBB. + PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0); + NewEdge.setEdgeCount(CountValue); + // Add new edge of InstrBB->DestBB. + PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0); + NewEdge1.setEdgeCount(CountValue); + NewEdge1.InMST = true; + getBBInfo(InstrBB).setBBInfoCount(CountValue); + } +} + +// Set the count value for the unknown edge. There should be one and only one +// unknown edge in Edges vector. +void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { + for (auto &E : Edges) { + if (E->CountValid) + continue; + E->setEdgeCount(Value); + + getBBInfo(E->SrcBB).UnknownCountOutEdge--; + getBBInfo(E->DestBB).UnknownCountInEdge--; + return; + } + llvm_unreachable("Cannot find the unknown count edge"); +} + +// Read the profile from ProfileFileName and assign the value to the +// instrumented BB and the edges. This function also updates ProgramMaxCount. +// Return true if the profile are successfully read, and false on errors. +bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) { + auto &Ctx = M->getContext(); + ErrorOr Result = + PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); + if (std::error_code EC = Result.getError()) { + if (EC == instrprof_error::unknown_function) + NumOfPGOMissing++; + else if (EC == instrprof_error::hash_mismatch || + EC == llvm::instrprof_error::malformed) + NumOfPGOMismatch++; + + std::string Msg = EC.message() + std::string(" ") + F.getName().str(); + Ctx.diagnose( + DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); + return false; + } + std::vector &CountFromProfile = Result.get().Counts; + + NumOfPGOFunc++; + DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); + uint64_t ValueSum = 0; + for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { + DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); + ValueSum += CountFromProfile[I]; + } + + DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); + + getBBInfo(nullptr).UnknownCountOutEdge = 2; + getBBInfo(nullptr).UnknownCountInEdge = 2; + + setInstrumentedCounts(CountFromProfile); + ProgramMaxCount = PGOReader->getMaximumFunctionCount(); + return true; +} + +// Populate the counters from instrumented BBs to all BBs. +// In the end of this operation, all BBs should have a valid count value. +void PGOUseFunc::populateCounters() { + // First set up Count variable for all BBs. + for (auto &E : FuncInfo.MST.AllEdges) { + if (E->Removed) + continue; + + const BasicBlock *SrcBB = E->SrcBB; + const BasicBlock *DestBB = E->DestBB; + UseBBInfo &SrcInfo = getBBInfo(SrcBB); + UseBBInfo &DestInfo = getBBInfo(DestBB); + SrcInfo.OutEdges.push_back(E.get()); + DestInfo.InEdges.push_back(E.get()); + SrcInfo.UnknownCountOutEdge++; + DestInfo.UnknownCountInEdge++; + + if (!E->CountValid) + continue; + DestInfo.UnknownCountInEdge--; + SrcInfo.UnknownCountOutEdge--; + } + + bool Changes = true; + unsigned NumPasses = 0; + while (Changes) { + NumPasses++; + Changes = false; + + // For efficient traversal, it's better to start from the end as most + // of the instrumented edges are at the end. + for (auto &BB : reverse(F)) { + UseBBInfo &Count = getBBInfo(&BB); + if (!Count.CountValid) { + if (Count.UnknownCountOutEdge == 0) { + Count.CountValue = sumEdgeCount(Count.OutEdges); + Count.CountValid = true; + Changes = true; + } else if (Count.UnknownCountInEdge == 0) { + Count.CountValue = sumEdgeCount(Count.InEdges); + Count.CountValid = true; + Changes = true; + } + } + if (Count.CountValid) { + if (Count.UnknownCountOutEdge == 1) { + uint64_t Total = Count.CountValue - sumEdgeCount(Count.OutEdges); + setEdgeCount(Count.OutEdges, Total); + Changes = true; + } + if (Count.UnknownCountInEdge == 1) { + uint64_t Total = Count.CountValue - sumEdgeCount(Count.InEdges); + setEdgeCount(Count.InEdges, Total); + Changes = true; + } + } + } + } + + DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); + // Assert every BB has a valid counter. + uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; + uint64_t FuncMaxCount = FuncEntryCount; + for (auto &BB : F) { + assert(getBBInfo(&BB).CountValid && "BB count is not valid"); + uint64_t Count = getBBInfo(&BB).CountValue; + if (Count > FuncMaxCount) + FuncMaxCount = Count; + } + applyFunctionAttributes(FuncEntryCount, FuncMaxCount); + + DEBUG(FuncInfo.dumpInfo("after reading profile.")); +} + +// Assign the scaled count values to the BB with multiple out edges. +void PGOUseFunc::setBranchWeights() { + // Generate MD_prof metadata for every branch instruction. + DEBUG(dbgs() << "\nSetting branch weights.\n"); + MDBuilder MDB(M->getContext()); + for (auto &BB : F) { + TerminatorInst *TI = BB.getTerminator(); + if (TI->getNumSuccessors() < 2) + continue; + if (!isa(TI) && !isa(TI)) + continue; + if (getBBInfo(&BB).CountValue == 0) + continue; + + // We have a non-zero Branch BB. + const UseBBInfo &BBCountInfo = getBBInfo(&BB); + unsigned Size = BBCountInfo.OutEdges.size(); + SmallVector EdgeCounts(Size, 0); + uint64_t MaxCount = 0; + for (unsigned s = 0; s < Size; s++) { + const PGOUseEdge *E = BBCountInfo.OutEdges[s]; + const BasicBlock *SrcBB = E->SrcBB; + const BasicBlock *DestBB = E->DestBB; + if (DestBB == 0) + continue; + unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); + uint64_t EdgeCount = E->CountValue; + if (EdgeCount > MaxCount) + MaxCount = EdgeCount; + EdgeCounts[SuccNum] = EdgeCount; + } + assert(MaxCount > 0 && "Bad max count"); + uint64_t Scale = calculateCountScale(MaxCount); + SmallVector Weights; + for (const auto &ECI : EdgeCounts) + Weights.push_back(scaleBranchCount(ECI, Scale)); + + TI->setMetadata(llvm::LLVMContext::MD_prof, + MDB.createBranchWeights(Weights)); + DEBUG(dbgs() << "Weight is: "; + for (const auto &W : Weights) { dbgs() << W << " "; } + dbgs() << "\n";); + } +} +} // end anonymous namespace + +bool PGOInstrumentationGen::runOnModule(Module &M) { + for (auto &F : M) { + if (F.isDeclaration()) + continue; + BranchProbabilityInfo *BPI = + &(getAnalysis(F).getBPI()); + BlockFrequencyInfo *BFI = + &(getAnalysis(F).getBFI()); + instrumentOneFunc(F, &M, BPI, BFI); + } + return true; +} + +static void setPGOCountOnFunc(PGOUseFunc &Func, + IndexedInstrProfReader *PGOReader) { + if (Func.readCounters(PGOReader)) { + Func.populateCounters(); + Func.setBranchWeights(); + } +} + +bool PGOInstrumentationUse::runOnModule(Module &M) { + DEBUG(dbgs() << "Read in profile counters: "); + auto &Ctx = M.getContext(); + // Read the counter array from file. + auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName); + if (std::error_code EC = ReaderOrErr.getError()) { + Ctx.diagnose( + DiagnosticInfoPGOProfile(ProfileFileName.data(), EC.message())); + return false; + } + + PGOReader = std::move(ReaderOrErr.get()); + if (!PGOReader) { + Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), + "Cannot get PGOReader")); + return false; + } + + for (auto &F : M) { + if (F.isDeclaration()) + continue; + BranchProbabilityInfo *BPI = + &(getAnalysis(F).getBPI()); + BlockFrequencyInfo *BFI = + &(getAnalysis(F).getBFI()); + PGOUseFunc Func(F, &M, BPI, BFI); + setPGOCountOnFunc(Func, PGOReader.get()); + } + return true; +} diff --git a/lib/Transforms/Instrumentation/SafeStack.cpp b/lib/Transforms/Instrumentation/SafeStack.cpp index 6b185a2b127b..abed465f102d 100644 --- a/lib/Transforms/Instrumentation/SafeStack.cpp +++ b/lib/Transforms/Instrumentation/SafeStack.cpp @@ -18,8 +18,9 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -37,6 +38,8 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_os_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -44,6 +47,17 @@ using namespace llvm; #define DEBUG_TYPE "safestack" +enum UnsafeStackPtrStorageVal { ThreadLocalUSP, SingleThreadUSP }; + +static cl::opt USPStorage("safe-stack-usp-storage", + cl::Hidden, cl::init(ThreadLocalUSP), + cl::desc("Type of storage for the unsafe stack pointer"), + cl::values(clEnumValN(ThreadLocalUSP, "thread-local", + "Thread-local storage"), + clEnumValN(SingleThreadUSP, "single-thread", + "Non-thread-local storage"), + clEnumValEnd)); + namespace llvm { STATISTIC(NumFunctions, "Total number of functions"); @@ -54,118 +68,48 @@ STATISTIC(NumUnsafeStackRestorePointsFunctions, STATISTIC(NumAllocas, "Total number of allocas"); STATISTIC(NumUnsafeStaticAllocas, "Number of unsafe static allocas"); STATISTIC(NumUnsafeDynamicAllocas, "Number of unsafe dynamic allocas"); +STATISTIC(NumUnsafeByValArguments, "Number of unsafe byval arguments"); STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads"); } // namespace llvm namespace { -/// Check whether a given alloca instruction (AI) should be put on the safe -/// stack or not. The function analyzes all uses of AI and checks whether it is -/// only accessed in a memory safe way (as decided statically). -bool IsSafeStackAlloca(const AllocaInst *AI) { - // Go through all uses of this alloca and check whether all accesses to the - // allocated object are statically known to be memory safe and, hence, the - // object can be placed on the safe stack. +/// Rewrite an SCEV expression for a memory access address to an expression that +/// represents offset from the given alloca. +/// +/// The implementation simply replaces all mentions of the alloca with zero. +class AllocaOffsetRewriter : public SCEVRewriteVisitor { + const Value *AllocaPtr; - SmallPtrSet Visited; - SmallVector WorkList; - WorkList.push_back(AI); +public: + AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr) + : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {} - // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc. - while (!WorkList.empty()) { - const Instruction *V = WorkList.pop_back_val(); - for (const Use &UI : V->uses()) { - auto I = cast(UI.getUser()); - assert(V == UI.get()); - - switch (I->getOpcode()) { - case Instruction::Load: - // Loading from a pointer is safe. - break; - case Instruction::VAArg: - // "va-arg" from a pointer is safe. - break; - case Instruction::Store: - if (V == I->getOperand(0)) - // Stored the pointer - conservatively assume it may be unsafe. - return false; - // Storing to the pointee is safe. - break; - - case Instruction::GetElementPtr: - if (!cast(I)->hasAllConstantIndices()) - // GEP with non-constant indices can lead to memory errors. - // This also applies to inbounds GEPs, as the inbounds attribute - // represents an assumption that the address is in bounds, rather than - // an assertion that it is. - return false; - - // We assume that GEP on static alloca with constant indices is safe, - // otherwise a compiler would detect it and warn during compilation. - - if (!isa(AI->getArraySize())) - // However, if the array size itself is not constant, the access - // might still be unsafe at runtime. - return false; - - /* fallthrough */ - - case Instruction::BitCast: - case Instruction::IntToPtr: - case Instruction::PHI: - case Instruction::PtrToInt: - case Instruction::Select: - // The object can be safe or not, depending on how the result of the - // instruction is used. - if (Visited.insert(I).second) - WorkList.push_back(cast(I)); - break; - - case Instruction::Call: - case Instruction::Invoke: { - // FIXME: add support for memset and memcpy intrinsics. - ImmutableCallSite CS(I); - - // LLVM 'nocapture' attribute is only set for arguments whose address - // is not stored, passed around, or used in any other non-trivial way. - // We assume that passing a pointer to an object as a 'nocapture' - // argument is safe. - // FIXME: a more precise solution would require an interprocedural - // analysis here, which would look at all uses of an argument inside - // the function being called. - ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); - for (ImmutableCallSite::arg_iterator A = B; A != E; ++A) - if (A->get() == V && !CS.doesNotCapture(A - B)) - // The parameter is not marked 'nocapture' - unsafe. - return false; - continue; - } - - default: - // The object is unsafe if it is used in any other way. - return false; - } - } + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + if (Expr->getValue() == AllocaPtr) + return SE.getZero(Expr->getType()); + return Expr; } +}; - // All uses of the alloca are safe, we can place it on the safe stack. - return true; -} - -/// The SafeStack pass splits the stack of each function into the -/// safe stack, which is only accessed through memory safe dereferences -/// (as determined statically), and the unsafe stack, which contains all -/// local variables that are accessed in unsafe ways. +/// The SafeStack pass splits the stack of each function into the safe +/// stack, which is only accessed through memory safe dereferences (as +/// determined statically), and the unsafe stack, which contains all +/// local variables that are accessed in ways that we can't prove to +/// be safe. class SafeStack : public FunctionPass { + const TargetMachine *TM; + const TargetLoweringBase *TL; const DataLayout *DL; + ScalarEvolution *SE; Type *StackPtrTy; Type *IntPtrTy; Type *Int32Ty; Type *Int8Ty; - Constant *UnsafeStackPtr = nullptr; + Value *UnsafeStackPtr = nullptr; /// Unsafe stack alignment. Each stack frame must ensure that the stack is /// aligned to this value. We need to re-align the unsafe stack if the @@ -175,26 +119,31 @@ class SafeStack : public FunctionPass { /// might expect to appear on the stack on most common targets. enum { StackAlignment = 16 }; - /// \brief Build a constant representing a pointer to the unsafe stack - /// pointer. - Constant *getOrCreateUnsafeStackPtr(Module &M); + /// \brief Build a value representing a pointer to the unsafe stack pointer. + Value *getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F); /// \brief Find all static allocas, dynamic allocas, return instructions and /// stack restore points (exception unwind blocks and setjmp calls) in the /// given function and append them to the respective vectors. void findInsts(Function &F, SmallVectorImpl &StaticAllocas, SmallVectorImpl &DynamicAllocas, + SmallVectorImpl &ByValArguments, SmallVectorImpl &Returns, SmallVectorImpl &StackRestorePoints); + /// \brief Calculate the allocation size of a given alloca. Returns 0 if the + /// size can not be statically determined. + uint64_t getStaticAllocaAllocationSize(const AllocaInst* AI); + /// \brief Allocate space for all static allocas in \p StaticAllocas, /// replace allocas with pointers into the unsafe stack and generate code to /// restore the stack pointer before all return instructions in \p Returns. /// /// \returns A pointer to the top of the unsafe stack after all unsafe static /// allocas are allocated. - Value *moveStaticAllocasToUnsafeStack(Function &F, + Value *moveStaticAllocasToUnsafeStack(IRBuilder<> &IRB, Function &F, ArrayRef StaticAllocas, + ArrayRef ByValArguments, ArrayRef Returns); /// \brief Generate code to restore the stack after all stack restore points @@ -203,7 +152,7 @@ class SafeStack : public FunctionPass { /// \returns A local variable in which to maintain the dynamic top of the /// unsafe stack if needed. AllocaInst * - createStackRestorePoints(Function &F, + createStackRestorePoints(IRBuilder<> &IRB, Function &F, ArrayRef StackRestorePoints, Value *StaticTop, bool NeedDynamicTop); @@ -214,17 +163,26 @@ class SafeStack : public FunctionPass { AllocaInst *DynamicTop, ArrayRef DynamicAllocas); + bool IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize); + + bool IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U, + const Value *AllocaPtr, uint64_t AllocaSize); + bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr, + uint64_t AllocaSize); + public: static char ID; // Pass identification, replacement for typeid. - SafeStack() : FunctionPass(ID), DL(nullptr) { + SafeStack(const TargetMachine *TM) + : FunctionPass(ID), TM(TM), TL(nullptr), DL(nullptr) { initializeSafeStackPass(*PassRegistry::getPassRegistry()); } + SafeStack() : SafeStack(nullptr) {} - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); } - virtual bool doInitialization(Module &M) { + bool doInitialization(Module &M) override { DL = &M.getDataLayout(); StackPtrTy = Type::getInt8PtrTy(M.getContext()); @@ -235,51 +193,203 @@ public: return false; } - bool runOnFunction(Function &F); - + bool runOnFunction(Function &F) override; }; // class SafeStack -Constant *SafeStack::getOrCreateUnsafeStackPtr(Module &M) { - // The unsafe stack pointer is stored in a global variable with a magic name. - const char *kUnsafeStackPtrVar = "__safestack_unsafe_stack_ptr"; +uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) { + uint64_t Size = DL->getTypeAllocSize(AI->getAllocatedType()); + if (AI->isArrayAllocation()) { + auto C = dyn_cast(AI->getArraySize()); + if (!C) + return 0; + Size *= C->getZExtValue(); + } + return Size; +} - auto UnsafeStackPtr = - dyn_cast_or_null(M.getNamedValue(kUnsafeStackPtrVar)); +bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize, + const Value *AllocaPtr, uint64_t AllocaSize) { + AllocaOffsetRewriter Rewriter(*SE, AllocaPtr); + const SCEV *Expr = Rewriter.visit(SE->getSCEV(Addr)); - if (!UnsafeStackPtr) { - // The global variable is not defined yet, define it ourselves. - // We use the initial-exec TLS model because we do not support the variable - // living anywhere other than in the main executable. - UnsafeStackPtr = new GlobalVariable( - /*Module=*/M, /*Type=*/StackPtrTy, - /*isConstant=*/false, /*Linkage=*/GlobalValue::ExternalLinkage, - /*Initializer=*/0, /*Name=*/kUnsafeStackPtrVar, - /*InsertBefore=*/nullptr, - /*ThreadLocalMode=*/GlobalValue::InitialExecTLSModel); - } else { - // The variable exists, check its type and attributes. - if (UnsafeStackPtr->getValueType() != StackPtrTy) { - report_fatal_error(Twine(kUnsafeStackPtrVar) + " must have void* type"); - } + uint64_t BitWidth = SE->getTypeSizeInBits(Expr->getType()); + ConstantRange AccessStartRange = SE->getUnsignedRange(Expr); + ConstantRange SizeRange = + ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AccessSize)); + ConstantRange AccessRange = AccessStartRange.add(SizeRange); + ConstantRange AllocaRange = + ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AllocaSize)); + bool Safe = AllocaRange.contains(AccessRange); - if (!UnsafeStackPtr->isThreadLocal()) { - report_fatal_error(Twine(kUnsafeStackPtrVar) + " must be thread-local"); + DEBUG(dbgs() << "[SafeStack] " + << (isa(AllocaPtr) ? "Alloca " : "ByValArgument ") + << *AllocaPtr << "\n" + << " Access " << *Addr << "\n" + << " SCEV " << *Expr + << " U: " << SE->getUnsignedRange(Expr) + << ", S: " << SE->getSignedRange(Expr) << "\n" + << " Range " << AccessRange << "\n" + << " AllocaRange " << AllocaRange << "\n" + << " " << (Safe ? "safe" : "unsafe") << "\n"); + + return Safe; +} + +bool SafeStack::IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U, + const Value *AllocaPtr, + uint64_t AllocaSize) { + // All MemIntrinsics have destination address in Arg0 and size in Arg2. + if (MI->getRawDest() != U) return true; + const auto *Len = dyn_cast(MI->getLength()); + // Non-constant size => unsafe. FIXME: try SCEV getRange. + if (!Len) return false; + return IsAccessSafe(U, Len->getZExtValue(), AllocaPtr, AllocaSize); +} + +/// Check whether a given allocation must be put on the safe +/// stack or not. The function analyzes all uses of AI and checks whether it is +/// only accessed in a memory safe way (as decided statically). +bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { + // Go through all uses of this alloca and check whether all accesses to the + // allocated object are statically known to be memory safe and, hence, the + // object can be placed on the safe stack. + SmallPtrSet Visited; + SmallVector WorkList; + WorkList.push_back(AllocaPtr); + + // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc. + while (!WorkList.empty()) { + const Value *V = WorkList.pop_back_val(); + for (const Use &UI : V->uses()) { + auto I = cast(UI.getUser()); + assert(V == UI.get()); + + switch (I->getOpcode()) { + case Instruction::Load: { + if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getType()), AllocaPtr, + AllocaSize)) + return false; + break; + } + case Instruction::VAArg: + // "va-arg" from a pointer is safe. + break; + case Instruction::Store: { + if (V == I->getOperand(0)) { + // Stored the pointer - conservatively assume it may be unsafe. + DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr + << "\n store of address: " << *I << "\n"); + return false; + } + + if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getOperand(0)->getType()), + AllocaPtr, AllocaSize)) + return false; + break; + } + case Instruction::Ret: { + // Information leak. + return false; + } + + case Instruction::Call: + case Instruction::Invoke: { + ImmutableCallSite CS(I); + + if (const IntrinsicInst *II = dyn_cast(I)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) + continue; + } + + if (const MemIntrinsic *MI = dyn_cast(I)) { + if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) { + DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr + << "\n unsafe memintrinsic: " << *I + << "\n"); + return false; + } + continue; + } + + // LLVM 'nocapture' attribute is only set for arguments whose address + // is not stored, passed around, or used in any other non-trivial way. + // We assume that passing a pointer to an object as a 'nocapture + // readnone' argument is safe. + // FIXME: a more precise solution would require an interprocedural + // analysis here, which would look at all uses of an argument inside + // the function being called. + ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); + for (ImmutableCallSite::arg_iterator A = B; A != E; ++A) + if (A->get() == V) + if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) || + CS.doesNotAccessMemory()))) { + DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr + << "\n unsafe call: " << *I << "\n"); + return false; + } + continue; + } + + default: + if (Visited.insert(I).second) + WorkList.push_back(cast(I)); + } } } + // All uses of the alloca are safe, we can place it on the safe stack. + return true; +} + +Value *SafeStack::getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F) { + // Check if there is a target-specific location for the unsafe stack pointer. + if (TL) + if (Value *V = TL->getSafeStackPointerLocation(IRB)) + return V; + + // Otherwise, assume the target links with compiler-rt, which provides a + // thread-local variable with a magic name. + Module &M = *F.getParent(); + const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr"; + auto UnsafeStackPtr = + dyn_cast_or_null(M.getNamedValue(UnsafeStackPtrVar)); + + bool UseTLS = USPStorage == ThreadLocalUSP; + + if (!UnsafeStackPtr) { + auto TLSModel = UseTLS ? + GlobalValue::InitialExecTLSModel : + GlobalValue::NotThreadLocal; + // The global variable is not defined yet, define it ourselves. + // We use the initial-exec TLS model because we do not support the + // variable living anywhere other than in the main executable. + UnsafeStackPtr = new GlobalVariable( + M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr, + UnsafeStackPtrVar, nullptr, TLSModel); + } else { + // The variable exists, check its type and attributes. + if (UnsafeStackPtr->getValueType() != StackPtrTy) + report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type"); + if (UseTLS != UnsafeStackPtr->isThreadLocal()) + report_fatal_error(Twine(UnsafeStackPtrVar) + " must " + + (UseTLS ? "" : "not ") + "be thread-local"); + } return UnsafeStackPtr; } void SafeStack::findInsts(Function &F, SmallVectorImpl &StaticAllocas, SmallVectorImpl &DynamicAllocas, + SmallVectorImpl &ByValArguments, SmallVectorImpl &Returns, SmallVectorImpl &StackRestorePoints) { - for (Instruction &I : inst_range(&F)) { + for (Instruction &I : instructions(&F)) { if (auto AI = dyn_cast(&I)) { ++NumAllocas; - if (IsSafeStackAlloca(AI)) + uint64_t Size = getStaticAllocaAllocationSize(AI); + if (IsSafeStackAlloca(AI, Size)) continue; if (AI->isStaticAlloca()) { @@ -304,19 +414,26 @@ void SafeStack::findInsts(Function &F, "gcroot intrinsic not compatible with safestack attribute"); } } + for (Argument &Arg : F.args()) { + if (!Arg.hasByValAttr()) + continue; + uint64_t Size = + DL->getTypeStoreSize(Arg.getType()->getPointerElementType()); + if (IsSafeStackAlloca(&Arg, Size)) + continue; + + ++NumUnsafeByValArguments; + ByValArguments.push_back(&Arg); + } } AllocaInst * -SafeStack::createStackRestorePoints(Function &F, +SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F, ArrayRef StackRestorePoints, Value *StaticTop, bool NeedDynamicTop) { if (StackRestorePoints.empty()) return nullptr; - IRBuilder<> IRB(StaticTop - ? cast(StaticTop)->getNextNode() - : (Instruction *)F.getEntryBlock().getFirstInsertionPt()); - // We need the current value of the shadow stack pointer to restore // after longjmp or exception catching. @@ -342,7 +459,7 @@ SafeStack::createStackRestorePoints(Function &F, for (Instruction *I : StackRestorePoints) { ++NumUnsafeStackRestorePoints; - IRB.SetInsertPoint(cast(I->getNextNode())); + IRB.SetInsertPoint(I->getNextNode()); Value *CurrentTop = DynamicTop ? IRB.CreateLoad(DynamicTop) : StaticTop; IRB.CreateStore(CurrentTop, UnsafeStackPtr); } @@ -350,14 +467,12 @@ SafeStack::createStackRestorePoints(Function &F, return DynamicTop; } -Value * -SafeStack::moveStaticAllocasToUnsafeStack(Function &F, - ArrayRef StaticAllocas, - ArrayRef Returns) { - if (StaticAllocas.empty()) +Value *SafeStack::moveStaticAllocasToUnsafeStack( + IRBuilder<> &IRB, Function &F, ArrayRef StaticAllocas, + ArrayRef ByValArguments, ArrayRef Returns) { + if (StaticAllocas.empty() && ByValArguments.empty()) return nullptr; - IRBuilder<> IRB(F.getEntryBlock().getFirstInsertionPt()); DIBuilder DIB(*F.getParent()); // We explicitly compute and set the unsafe stack layout for all unsafe @@ -377,6 +492,13 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F, // Compute maximum alignment among static objects on the unsafe stack. unsigned MaxAlignment = 0; + for (Argument *Arg : ByValArguments) { + Type *Ty = Arg->getType()->getPointerElementType(); + unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty), + Arg->getParamAlignment()); + if (Align > MaxAlignment) + MaxAlignment = Align; + } for (AllocaInst *AI : StaticAllocas) { Type *Ty = AI->getAllocatedType(); unsigned Align = @@ -388,22 +510,51 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F, if (MaxAlignment > StackAlignment) { // Re-align the base pointer according to the max requested alignment. assert(isPowerOf2_32(MaxAlignment)); - IRB.SetInsertPoint(cast(BasePointer->getNextNode())); + IRB.SetInsertPoint(BasePointer->getNextNode()); BasePointer = cast(IRB.CreateIntToPtr( IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy), ConstantInt::get(IntPtrTy, ~uint64_t(MaxAlignment - 1))), StackPtrTy)); } - // Allocate space for every unsafe static AllocaInst on the unsafe stack. int64_t StaticOffset = 0; // Current stack top. + IRB.SetInsertPoint(BasePointer->getNextNode()); + + for (Argument *Arg : ByValArguments) { + Type *Ty = Arg->getType()->getPointerElementType(); + + uint64_t Size = DL->getTypeStoreSize(Ty); + if (Size == 0) + Size = 1; // Don't create zero-sized stack objects. + + // Ensure the object is properly aligned. + unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty), + Arg->getParamAlignment()); + + // Add alignment. + // NOTE: we ensure that BasePointer itself is aligned to >= Align. + StaticOffset += Size; + StaticOffset = RoundUpToAlignment(StaticOffset, Align); + + Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8* + ConstantInt::get(Int32Ty, -StaticOffset)); + Value *NewArg = IRB.CreateBitCast(Off, Arg->getType(), + Arg->getName() + ".unsafe-byval"); + + // Replace alloc with the new location. + replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB, + /*Deref=*/true, -StaticOffset); + Arg->replaceAllUsesWith(NewArg); + IRB.SetInsertPoint(cast(NewArg)->getNextNode()); + IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment()); + } + + // Allocate space for every unsafe static AllocaInst on the unsafe stack. for (AllocaInst *AI : StaticAllocas) { IRB.SetInsertPoint(AI); - auto CArraySize = cast(AI->getArraySize()); Type *Ty = AI->getAllocatedType(); - - uint64_t Size = DL->getTypeAllocSize(Ty) * CArraySize->getZExtValue(); + uint64_t Size = getStaticAllocaAllocationSize(AI); if (Size == 0) Size = 1; // Don't create zero-sized stack objects. @@ -423,7 +574,7 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F, cast(NewAI)->takeName(AI); // Replace alloc with the new location. - replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/true); + replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/true, -StaticOffset); AI->replaceAllUsesWith(NewAI); AI->eraseFromParent(); } @@ -434,7 +585,7 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F, StaticOffset = RoundUpToAlignment(StaticOffset, StackAlignment); // Update shadow stack pointer in the function epilogue. - IRB.SetInsertPoint(cast(BasePointer->getNextNode())); + IRB.SetInsertPoint(BasePointer->getNextNode()); Value *StaticTop = IRB.CreateGEP(BasePointer, ConstantInt::get(Int32Ty, -StaticOffset), @@ -478,7 +629,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( if (DynamicTop) IRB.CreateStore(NewTop, DynamicTop); - Value *NewAI = IRB.CreateIntToPtr(SP, AI->getType()); + Value *NewAI = IRB.CreatePointerCast(NewTop, AI->getType()); if (AI->hasName() && isa(NewAI)) NewAI->takeName(AI); @@ -513,8 +664,6 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( } bool SafeStack::runOnFunction(Function &F) { - auto AA = &getAnalysis(); - DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n"); if (!F.hasFnAttribute(Attribute::SafeStack)) { @@ -529,6 +678,9 @@ bool SafeStack::runOnFunction(Function &F) { return false; } + TL = TM ? TM->getSubtargetImpl(F)->getTargetLowering() : nullptr; + SE = &getAnalysis().getSE(); + { // Make sure the regular stack protector won't run on this function // (safestack attribute takes precedence). @@ -541,16 +693,11 @@ bool SafeStack::runOnFunction(Function &F) { AttributeSet::get(F.getContext(), AttributeSet::FunctionIndex, B)); } - if (AA->onlyReadsMemory(&F)) { - // XXX: we don't protect against information leak attacks for now. - DEBUG(dbgs() << "[SafeStack] function only reads memory\n"); - return false; - } - ++NumFunctions; SmallVector StaticAllocas; SmallVector DynamicAllocas; + SmallVector ByValArguments; SmallVector Returns; // Collect all points where stack gets unwound and needs to be restored @@ -562,23 +709,26 @@ bool SafeStack::runOnFunction(Function &F) { // Find all static and dynamic alloca instructions that must be moved to the // unsafe stack, all return instructions and stack restore points. - findInsts(F, StaticAllocas, DynamicAllocas, Returns, StackRestorePoints); + findInsts(F, StaticAllocas, DynamicAllocas, ByValArguments, Returns, + StackRestorePoints); if (StaticAllocas.empty() && DynamicAllocas.empty() && - StackRestorePoints.empty()) + ByValArguments.empty() && StackRestorePoints.empty()) return false; // Nothing to do in this function. - if (!StaticAllocas.empty() || !DynamicAllocas.empty()) + if (!StaticAllocas.empty() || !DynamicAllocas.empty() || + !ByValArguments.empty()) ++NumUnsafeStackFunctions; // This function has the unsafe stack. if (!StackRestorePoints.empty()) ++NumUnsafeStackRestorePointsFunctions; - if (!UnsafeStackPtr) - UnsafeStackPtr = getOrCreateUnsafeStackPtr(*F.getParent()); + IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt()); + UnsafeStackPtr = getOrCreateUnsafeStackPtr(IRB, F); // The top of the unsafe stack after all unsafe static allocas are allocated. - Value *StaticTop = moveStaticAllocasToUnsafeStack(F, StaticAllocas, Returns); + Value *StaticTop = moveStaticAllocasToUnsafeStack(IRB, F, StaticAllocas, + ByValArguments, Returns); // Safe stack object that stores the current unsafe stack top. It is updated // as unsafe dynamic (non-constant-sized) allocas are allocated and freed. @@ -587,7 +737,7 @@ bool SafeStack::runOnFunction(Function &F) { // FIXME: a better alternative might be to store the unsafe stack pointer // before setjmp / invoke instructions. AllocaInst *DynamicTop = createStackRestorePoints( - F, StackRestorePoints, StaticTop, !DynamicAllocas.empty()); + IRB, F, StackRestorePoints, StaticTop, !DynamicAllocas.empty()); // Handle dynamic allocas. moveDynamicAllocasToUnsafeStack(F, UnsafeStackPtr, DynamicTop, @@ -597,13 +747,14 @@ bool SafeStack::runOnFunction(Function &F) { return true; } -} // end anonymous namespace +} // anonymous namespace char SafeStack::ID = 0; -INITIALIZE_PASS_BEGIN(SafeStack, "safe-stack", - "Safe Stack instrumentation pass", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(SafeStack, "safe-stack", "Safe Stack instrumentation pass", - false, false) +INITIALIZE_TM_PASS_BEGIN(SafeStack, "safe-stack", + "Safe Stack instrumentation pass", false, false) +INITIALIZE_TM_PASS_END(SafeStack, "safe-stack", + "Safe Stack instrumentation pass", false, false) -FunctionPass *llvm::createSafeStackPass() { return new SafeStack(); } +FunctionPass *llvm::createSafeStackPass(const llvm::TargetMachine *TM) { + return new SafeStack(TM); +} diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 7a5b4cb0178b..09de7a2cda2b 100644 --- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -31,6 +31,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" @@ -59,6 +60,7 @@ static const char *const kSanCovIndirCallName = "__sanitizer_cov_indir_call16"; static const char *const kSanCovTraceEnter = "__sanitizer_cov_trace_func_enter"; static const char *const kSanCovTraceBB = "__sanitizer_cov_trace_basic_block"; static const char *const kSanCovTraceCmp = "__sanitizer_cov_trace_cmp"; +static const char *const kSanCovTraceSwitch = "__sanitizer_cov_trace_switch"; static const char *const kSanCovModuleCtorName = "sancov.module_ctor"; static const uint64_t kSanCtorAndDtorPriority = 2; @@ -148,19 +150,25 @@ class SanitizerCoverageModule : public ModulePass { void InjectCoverageForIndirectCalls(Function &F, ArrayRef IndirCalls); void InjectTraceForCmp(Function &F, ArrayRef CmpTraceTargets); + void InjectTraceForSwitch(Function &F, + ArrayRef SwitchTraceTargets); bool InjectCoverage(Function &F, ArrayRef AllBlocks); void SetNoSanitizeMetadata(Instruction *I); void InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls); unsigned NumberOfInstrumentedBlocks() { - return SanCovFunction->getNumUses() + SanCovWithCheckFunction->getNumUses(); + return SanCovFunction->getNumUses() + + SanCovWithCheckFunction->getNumUses() + SanCovTraceBB->getNumUses() + + SanCovTraceEnter->getNumUses(); } Function *SanCovFunction; Function *SanCovWithCheckFunction; Function *SanCovIndirCallFunction; Function *SanCovTraceEnter, *SanCovTraceBB; Function *SanCovTraceCmpFunction; + Function *SanCovTraceSwitchFunction; InlineAsm *EmptyAsm; - Type *IntptrTy, *Int64Ty; + Type *IntptrTy, *Int64Ty, *Int64PtrTy; + Module *CurModule; LLVMContext *C; const DataLayout *DL; @@ -177,11 +185,13 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { return false; C = &(M.getContext()); DL = &M.getDataLayout(); + CurModule = &M; IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits()); Type *VoidTy = Type::getVoidTy(*C); IRBuilder<> IRB(*C); Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); + Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty()); Int64Ty = IRB.getInt64Ty(); SanCovFunction = checkSanitizerInterfaceFunction( @@ -194,18 +204,19 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { SanCovTraceCmpFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr)); + SanCovTraceSwitchFunction = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + kSanCovTraceSwitch, VoidTy, Int64Ty, Int64PtrTy, nullptr)); // We insert an empty inline asm after cov callbacks to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), StringRef(""), StringRef(""), /*hasSideEffects=*/true); - if (Options.TraceBB) { - SanCovTraceEnter = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr)); - SanCovTraceBB = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr)); - } + SanCovTraceEnter = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr)); + SanCovTraceBB = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr)); // At this point we create a dummy array of guards because we don't // know how many elements we will need. @@ -280,11 +291,18 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { if (F.empty()) return false; if (F.getName().find(".module_ctor") != std::string::npos) return false; // Should not instrument sanitizer init functions. + // Don't instrument functions using SEH for now. Splitting basic blocks like + // we do for coverage breaks WinEHPrepare. + // FIXME: Remove this when SEH no longer uses landingpad pattern matching. + if (F.hasPersonalityFn() && + isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) + return false; if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge) SplitAllCriticalEdges(F); SmallVector IndirCalls; SmallVector AllBlocks; SmallVector CmpTraceTargets; + SmallVector SwitchTraceTargets; for (auto &BB : F) { AllBlocks.push_back(&BB); for (auto &Inst : BB) { @@ -293,13 +311,18 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { if (CS && !CS.getCalledFunction()) IndirCalls.push_back(&Inst); } - if (Options.TraceCmp && isa(&Inst)) - CmpTraceTargets.push_back(&Inst); + if (Options.TraceCmp) { + if (isa(&Inst)) + CmpTraceTargets.push_back(&Inst); + if (isa(&Inst)) + SwitchTraceTargets.push_back(&Inst); + } } } InjectCoverage(F, AllBlocks); InjectCoverageForIndirectCalls(F, IndirCalls); InjectTraceForCmp(F, CmpTraceTargets); + InjectTraceForSwitch(F, SwitchTraceTargets); return true; } @@ -348,6 +371,45 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( } } +// For every switch statement we insert a call: +// __sanitizer_cov_trace_switch(CondValue, +// {NumCases, ValueSizeInBits, Case0Value, Case1Value, Case2Value, ... }) + +void SanitizerCoverageModule::InjectTraceForSwitch( + Function &F, ArrayRef SwitchTraceTargets) { + for (auto I : SwitchTraceTargets) { + if (SwitchInst *SI = dyn_cast(I)) { + IRBuilder<> IRB(I); + SmallVector Initializers; + Value *Cond = SI->getCondition(); + if (Cond->getType()->getScalarSizeInBits() > + Int64Ty->getScalarSizeInBits()) + continue; + Initializers.push_back(ConstantInt::get(Int64Ty, SI->getNumCases())); + Initializers.push_back( + ConstantInt::get(Int64Ty, Cond->getType()->getScalarSizeInBits())); + if (Cond->getType()->getScalarSizeInBits() < + Int64Ty->getScalarSizeInBits()) + Cond = IRB.CreateIntCast(Cond, Int64Ty, false); + for (auto It: SI->cases()) { + Constant *C = It.getCaseValue(); + if (C->getType()->getScalarSizeInBits() < + Int64Ty->getScalarSizeInBits()) + C = ConstantExpr::getCast(CastInst::ZExt, It.getCaseValue(), Int64Ty); + Initializers.push_back(C); + } + ArrayType *ArrayOfInt64Ty = ArrayType::get(Int64Ty, Initializers.size()); + GlobalVariable *GV = new GlobalVariable( + *CurModule, ArrayOfInt64Ty, false, GlobalVariable::InternalLinkage, + ConstantArray::get(ArrayOfInt64Ty, Initializers), + "__sancov_gen_cov_switch_values"); + IRB.CreateCall(SanCovTraceSwitchFunction, + {Cond, IRB.CreatePointerCast(GV, Int64PtrTy)}); + } + } +} + + void SanitizerCoverageModule::InjectTraceForCmp( Function &F, ArrayRef CmpTraceTargets) { for (auto I : CmpTraceTargets) { @@ -369,8 +431,7 @@ void SanitizerCoverageModule::InjectTraceForCmp( void SanitizerCoverageModule::SetNoSanitizeMetadata(Instruction *I) { I->setMetadata( - I->getParent()->getParent()->getParent()->getMDKindID("nosanitize"), - MDNode::get(*C, None)); + I->getModule()->getMDKindID("nosanitize"), MDNode::get(*C, None)); } void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, @@ -382,34 +443,31 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, // locations. if (isa(BB.getTerminator())) return; - BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end(); - // Skip static allocas at the top of the entry block so they don't become - // dynamic when we split the block. If we used our optimized stack layout, - // then there will only be one alloca and it will come first. - for (; IP != BE; ++IP) { - AllocaInst *AI = dyn_cast(IP); - if (!AI || !AI->isStaticAlloca()) - break; - } + BasicBlock::iterator IP = BB.getFirstInsertionPt(); bool IsEntryBB = &BB == &F.getEntryBlock(); DebugLoc EntryLoc; if (IsEntryBB) { if (auto SP = getDISubprogram(&F)) EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP); + // Keep static allocas and llvm.localescape calls in the entry block. Even + // if we aren't splitting the block, it's nice for allocas to be before + // calls. + IP = PrepareToSplitEntryBlock(BB, IP); } else { EntryLoc = IP->getDebugLoc(); } - IRBuilder<> IRB(IP); + IRBuilder<> IRB(&*IP); IRB.SetCurrentDebugLocation(EntryLoc); - SmallVector Indices; Value *GuardP = IRB.CreateAdd( IRB.CreatePointerCast(GuardArray, IntptrTy), ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); - if (UseCalls) { + if (Options.TraceBB) { + IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP); + } else if (UseCalls) { IRB.CreateCall(SanCovWithCheckFunction, GuardP); } else { LoadInst *Load = IRB.CreateLoad(GuardP); @@ -418,7 +476,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, SetNoSanitizeMetadata(Load); Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); Instruction *Ins = SplitBlockAndInsertIfThen( - Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); + Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); IRB.SetInsertPoint(Ins); IRB.SetCurrentDebugLocation(EntryLoc); // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC. @@ -427,7 +485,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } if (Options.Use8bitCounters) { - IRB.SetInsertPoint(IP); + IRB.SetInsertPoint(&*IP); Value *P = IRB.CreateAdd( IRB.CreatePointerCast(EightBitCounterArray, IntptrTy), ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1)); @@ -438,13 +496,6 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, SetNoSanitizeMetadata(LI); SetNoSanitizeMetadata(SI); } - - if (Options.TraceBB) { - // Experimental support for tracing. - // Insert a callback with the same guard variable as used for coverage. - IRB.SetInsertPoint(IP); - IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP); - } } char SanitizerCoverageModule::ID = 0; diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 1a46bbb86122..9331e1d2b3fd 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -142,37 +142,35 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), nullptr)); OrdTy = IRB.getInt32Ty(); for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { - const size_t ByteSize = 1 << i; - const size_t BitSize = ByteSize * 8; - SmallString<32> ReadName("__tsan_read" + itostr(ByteSize)); + const unsigned ByteSize = 1U << i; + const unsigned BitSize = ByteSize * 8; + std::string ByteSizeStr = utostr(ByteSize); + std::string BitSizeStr = utostr(BitSize); + SmallString<32> ReadName("__tsan_read" + ByteSizeStr); TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); - SmallString<32> WriteName("__tsan_write" + itostr(ByteSize)); + SmallString<32> WriteName("__tsan_write" + ByteSizeStr); TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); - SmallString<64> UnalignedReadName("__tsan_unaligned_read" + - itostr(ByteSize)); + SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr); TsanUnalignedRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); - SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + - itostr(ByteSize)); + SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + ByteSizeStr); TsanUnalignedWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); Type *Ty = Type::getIntNTy(M.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); - SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) + - "_load"); + SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load"); TsanAtomicLoad[i] = checkSanitizerInterfaceFunction( M.getOrInsertFunction(AtomicLoadName, Ty, PtrTy, OrdTy, nullptr)); - SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) + - "_store"); + SmallString<32> AtomicStoreName("__tsan_atomic" + BitSizeStr + "_store"); TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr)); @@ -201,7 +199,7 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { M.getOrInsertFunction(RMWName, Ty, PtrTy, Ty, OrdTy, nullptr)); } - SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) + + SmallString<32> AtomicCASName("__tsan_atomic" + BitSizeStr + "_compare_exchange_val"); TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr)); @@ -513,8 +511,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; - const size_t ByteSize = 1 << Idx; - const size_t BitSize = ByteSize * 8; + const unsigned ByteSize = 1U << Idx; + const unsigned BitSize = ByteSize * 8; Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), @@ -527,8 +525,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; - const size_t ByteSize = 1 << Idx; - const size_t BitSize = ByteSize * 8; + const unsigned ByteSize = 1U << Idx; + const unsigned BitSize = ByteSize * 8; Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), @@ -544,8 +542,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx]; if (!F) return false; - const size_t ByteSize = 1 << Idx; - const size_t BitSize = ByteSize * 8; + const unsigned ByteSize = 1U << Idx; + const unsigned BitSize = ByteSize * 8; Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), @@ -558,8 +556,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; - const size_t ByteSize = 1 << Idx; - const size_t BitSize = ByteSize * 8; + const unsigned ByteSize = 1U << Idx; + const unsigned BitSize = ByteSize * 8; Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), diff --git a/lib/Transforms/ObjCARC/CMakeLists.txt b/lib/Transforms/ObjCARC/CMakeLists.txt index fbcae29044c6..98ad37f5d230 100644 --- a/lib/Transforms/ObjCARC/CMakeLists.txt +++ b/lib/Transforms/ObjCARC/CMakeLists.txt @@ -3,8 +3,6 @@ add_llvm_library(LLVMObjCARCOpts ObjCARCOpts.cpp ObjCARCExpand.cpp ObjCARCAPElim.cpp - ObjCARCAliasAnalysis.cpp - ARCInstKind.cpp ObjCARCContract.cpp DependencyAnalysis.cpp ProvenanceAnalysis.cpp diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index 4edd02904b22..9d78e5ae3b9b 100644 --- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -49,7 +49,7 @@ bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr, assert(CS && "Only calls can alter reference counts!"); // See if AliasAnalysis can help us with the call. - AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS); + FunctionModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS); if (AliasAnalysis::onlyReadsMemory(MRB)) return false; if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { @@ -226,7 +226,7 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor, SmallPtrSetImpl &DependingInsts, SmallPtrSetImpl &Visited, ProvenanceAnalysis &PA) { - BasicBlock::iterator StartPos = StartInst; + BasicBlock::iterator StartPos = StartInst->getIterator(); SmallVector, 4> Worklist; Worklist.push_back(std::make_pair(StartBB, StartPos)); @@ -252,7 +252,7 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor, break; } - Instruction *Inst = --LocalStartPos; + Instruction *Inst = &*--LocalStartPos; if (Depends(Flavor, Inst, Arg, PA)) { DependingInsts.insert(Inst); break; diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp index 6ea038b8ba8c..d860723bb460 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.cpp +++ b/lib/Transforms/ObjCARC/ObjCARC.cpp @@ -26,18 +26,10 @@ namespace llvm { using namespace llvm; using namespace llvm::objcarc; -/// \brief A handy option to enable/disable all ARC Optimizations. -bool llvm::objcarc::EnableARCOpts; -static cl::opt -EnableARCOptimizations("enable-objc-arc-opts", - cl::desc("enable/disable all ARC Optimizations"), - cl::location(EnableARCOpts), - cl::init(true)); - /// initializeObjCARCOptsPasses - Initialize all passes linked into the /// ObjCARCOpts library. void llvm::initializeObjCARCOpts(PassRegistry &Registry) { - initializeObjCARCAliasAnalysisPass(Registry); + initializeObjCARCAAWrapperPassPass(Registry); initializeObjCARCAPElimPass(Registry); initializeObjCARCExpandPass(Registry); initializeObjCARCContractPass(Registry); diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h index 7595e2db1a7a..5fd45b00af17 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.h +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -26,6 +26,8 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Optional.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" +#include "llvm/Analysis/ObjCARCInstKind.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallSite.h" @@ -34,7 +36,6 @@ #include "llvm/Pass.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Utils/Local.h" -#include "ARCInstKind.h" namespace llvm { class raw_ostream; @@ -43,99 +44,6 @@ class raw_ostream; namespace llvm { namespace objcarc { -/// \brief A handy option to enable/disable all ARC Optimizations. -extern bool EnableARCOpts; - -/// \brief Test if the given module looks interesting to run ARC optimization -/// on. -static inline bool ModuleHasARC(const Module &M) { - return - M.getNamedValue("objc_retain") || - M.getNamedValue("objc_release") || - M.getNamedValue("objc_autorelease") || - M.getNamedValue("objc_retainAutoreleasedReturnValue") || - M.getNamedValue("objc_retainBlock") || - M.getNamedValue("objc_autoreleaseReturnValue") || - M.getNamedValue("objc_autoreleasePoolPush") || - M.getNamedValue("objc_loadWeakRetained") || - M.getNamedValue("objc_loadWeak") || - M.getNamedValue("objc_destroyWeak") || - M.getNamedValue("objc_storeWeak") || - M.getNamedValue("objc_initWeak") || - M.getNamedValue("objc_moveWeak") || - M.getNamedValue("objc_copyWeak") || - M.getNamedValue("objc_retainedObject") || - M.getNamedValue("objc_unretainedObject") || - M.getNamedValue("objc_unretainedPointer") || - M.getNamedValue("clang.arc.use"); -} - -/// \brief This is a wrapper around getUnderlyingObject which also knows how to -/// look through objc_retain and objc_autorelease calls, which we know to return -/// their argument verbatim. -static inline const Value *GetUnderlyingObjCPtr(const Value *V, - const DataLayout &DL) { - for (;;) { - V = GetUnderlyingObject(V, DL); - if (!IsForwarding(GetBasicARCInstKind(V))) - break; - V = cast(V)->getArgOperand(0); - } - - return V; -} - -/// The RCIdentity root of a value \p V is a dominating value U for which -/// retaining or releasing U is equivalent to retaining or releasing V. In other -/// words, ARC operations on \p V are equivalent to ARC operations on \p U. -/// -/// We use this in the ARC optimizer to make it easier to match up ARC -/// operations by always mapping ARC operations to RCIdentityRoots instead of -/// pointers themselves. -/// -/// The two ways that we see RCIdentical values in ObjC are via: -/// -/// 1. PointerCasts -/// 2. Forwarding Calls that return their argument verbatim. -/// -/// Thus this function strips off pointer casts and forwarding calls. *NOTE* -/// This implies that two RCIdentical values must alias. -static inline const Value *GetRCIdentityRoot(const Value *V) { - for (;;) { - V = V->stripPointerCasts(); - if (!IsForwarding(GetBasicARCInstKind(V))) - break; - V = cast(V)->getArgOperand(0); - } - return V; -} - -/// Helper which calls const Value *GetRCIdentityRoot(const Value *V) and just -/// casts away the const of the result. For documentation about what an -/// RCIdentityRoot (and by extension GetRCIdentityRoot is) look at that -/// function. -static inline Value *GetRCIdentityRoot(Value *V) { - return const_cast(GetRCIdentityRoot((const Value *)V)); -} - -/// \brief Assuming the given instruction is one of the special calls such as -/// objc_retain or objc_release, return the RCIdentity root of the argument of -/// the call. -static inline Value *GetArgRCIdentityRoot(Value *Inst) { - return GetRCIdentityRoot(cast(Inst)->getArgOperand(0)); -} - -static inline bool IsNullOrUndef(const Value *V) { - return isa(V) || isa(V); -} - -static inline bool IsNoopInstruction(const Instruction *I) { - return isa(I) || - (isa(I) && - cast(I)->hasAllZeroIndices()); -} - - /// \brief Erase the given instruction. /// /// Many ObjC calls return their argument verbatim, @@ -162,152 +70,6 @@ static inline void EraseInstruction(Instruction *CI) { RecursivelyDeleteTriviallyDeadInstructions(OldArg); } -/// \brief Test whether the given value is possible a retainable object pointer. -static inline bool IsPotentialRetainableObjPtr(const Value *Op) { - // Pointers to static or stack storage are not valid retainable object - // pointers. - if (isa(Op) || isa(Op)) - return false; - // Special arguments can not be a valid retainable object pointer. - if (const Argument *Arg = dyn_cast(Op)) - if (Arg->hasByValAttr() || - Arg->hasInAllocaAttr() || - Arg->hasNestAttr() || - Arg->hasStructRetAttr()) - return false; - // Only consider values with pointer types. - // - // It seemes intuitive to exclude function pointer types as well, since - // functions are never retainable object pointers, however clang occasionally - // bitcasts retainable object pointers to function-pointer type temporarily. - PointerType *Ty = dyn_cast(Op->getType()); - if (!Ty) - return false; - // Conservatively assume anything else is a potential retainable object - // pointer. - return true; -} - -static inline bool IsPotentialRetainableObjPtr(const Value *Op, - AliasAnalysis &AA) { - // First make the rudimentary check. - if (!IsPotentialRetainableObjPtr(Op)) - return false; - - // Objects in constant memory are not reference-counted. - if (AA.pointsToConstantMemory(Op)) - return false; - - // Pointers in constant memory are not pointing to reference-counted objects. - if (const LoadInst *LI = dyn_cast(Op)) - if (AA.pointsToConstantMemory(LI->getPointerOperand())) - return false; - - // Otherwise assume the worst. - return true; -} - -/// \brief Helper for GetARCInstKind. Determines what kind of construct CS -/// is. -static inline ARCInstKind GetCallSiteClass(ImmutableCallSite CS) { - for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) - if (IsPotentialRetainableObjPtr(*I)) - return CS.onlyReadsMemory() ? ARCInstKind::User : ARCInstKind::CallOrUser; - - return CS.onlyReadsMemory() ? ARCInstKind::None : ARCInstKind::Call; -} - -/// \brief Return true if this value refers to a distinct and identifiable -/// object. -/// -/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses -/// special knowledge of ObjC conventions. -static inline bool IsObjCIdentifiedObject(const Value *V) { - // Assume that call results and arguments have their own "provenance". - // Constants (including GlobalVariables) and Allocas are never - // reference-counted. - if (isa(V) || isa(V) || - isa(V) || isa(V) || - isa(V)) - return true; - - if (const LoadInst *LI = dyn_cast(V)) { - const Value *Pointer = - GetRCIdentityRoot(LI->getPointerOperand()); - if (const GlobalVariable *GV = dyn_cast(Pointer)) { - // A constant pointer can't be pointing to an object on the heap. It may - // be reference-counted, but it won't be deleted. - if (GV->isConstant()) - return true; - StringRef Name = GV->getName(); - // These special variables are known to hold values which are not - // reference-counted pointers. - if (Name.startswith("\01l_objc_msgSend_fixup_")) - return true; - - StringRef Section = GV->getSection(); - if (Section.find("__message_refs") != StringRef::npos || - Section.find("__objc_classrefs") != StringRef::npos || - Section.find("__objc_superrefs") != StringRef::npos || - Section.find("__objc_methname") != StringRef::npos || - Section.find("__cstring") != StringRef::npos) - return true; - } - } - - return false; -} - -enum class ARCMDKindID { - ImpreciseRelease, - CopyOnEscape, - NoObjCARCExceptions, -}; - -/// A cache of MDKinds used by various ARC optimizations. -class ARCMDKindCache { - Module *M; - - /// The Metadata Kind for clang.imprecise_release metadata. - llvm::Optional ImpreciseReleaseMDKind; - - /// The Metadata Kind for clang.arc.copy_on_escape metadata. - llvm::Optional CopyOnEscapeMDKind; - - /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata. - llvm::Optional NoObjCARCExceptionsMDKind; - -public: - void init(Module *Mod) { - M = Mod; - ImpreciseReleaseMDKind = NoneType::None; - CopyOnEscapeMDKind = NoneType::None; - NoObjCARCExceptionsMDKind = NoneType::None; - } - - unsigned get(ARCMDKindID ID) { - switch (ID) { - case ARCMDKindID::ImpreciseRelease: - if (!ImpreciseReleaseMDKind) - ImpreciseReleaseMDKind = - M->getContext().getMDKindID("clang.imprecise_release"); - return *ImpreciseReleaseMDKind; - case ARCMDKindID::CopyOnEscape: - if (!CopyOnEscapeMDKind) - CopyOnEscapeMDKind = - M->getContext().getMDKindID("clang.arc.copy_on_escape"); - return *CopyOnEscapeMDKind; - case ARCMDKindID::NoObjCARCExceptions: - if (!NoObjCARCExceptionsMDKind) - NoObjCARCExceptionsMDKind = - M->getContext().getMDKindID("clang.arc.no_objc_arc_exceptions"); - return *NoObjCARCExceptionsMDKind; - } - llvm_unreachable("Covered switch isn't covered?!"); - } -}; - } // end namespace objcarc } // end namespace llvm diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp index d318643a359a..969e77c1f888 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp @@ -72,12 +72,9 @@ bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) { if (const Function *Callee = CS.getCalledFunction()) { if (Callee->isDeclaration() || Callee->mayBeOverridden()) return true; - for (Function::const_iterator I = Callee->begin(), E = Callee->end(); - I != E; ++I) { - const BasicBlock *BB = I; - for (BasicBlock::const_iterator J = BB->begin(), F = BB->end(); - J != F; ++J) - if (ImmutableCallSite JCS = ImmutableCallSite(J)) + for (const BasicBlock &BB : *Callee) { + for (const Instruction &I : BB) + if (ImmutableCallSite JCS = ImmutableCallSite(&I)) // This recursion depth limit is arbitrary. It's just great // enough to cover known interesting testcases. if (Depth < 3 && @@ -96,7 +93,7 @@ bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) { Instruction *Push = nullptr; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { - Instruction *Inst = I++; + Instruction *Inst = &*I++; switch (GetBasicARCInstKind(Inst)) { case ARCInstKind::AutoreleasepoolPush: Push = Inst; @@ -169,7 +166,7 @@ bool ObjCARCAPElim::runOnModule(Module &M) { if (std::next(F->begin()) != F->end()) continue; // Ok, a single-block constructor function definition. Try to optimize it. - Changed |= OptimizeBB(F->begin()); + Changed |= OptimizeBB(&F->front()); } return Changed; diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h deleted file mode 100644 index eecc82fe572c..000000000000 --- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h +++ /dev/null @@ -1,74 +0,0 @@ -//===- ObjCARCAliasAnalysis.h - ObjC ARC Optimization -*- C++ -*-----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This file declares a simple ARC-aware AliasAnalysis using special knowledge -/// of Objective C to enhance other optimization passes which rely on the Alias -/// Analysis infrastructure. -/// -/// WARNING: This file knows about certain library functions. It recognizes them -/// by name, and hardwires knowledge of their semantics. -/// -/// WARNING: This file knows about how certain Objective-C library functions are -/// used. Naive LLVM IR transformations which would otherwise be -/// behavior-preserving may break these assumptions. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H -#define LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H - -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Pass.h" - -namespace llvm { -namespace objcarc { - - /// \brief This is a simple alias analysis implementation that uses knowledge - /// of ARC constructs to answer queries. - /// - /// TODO: This class could be generalized to know about other ObjC-specific - /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing - /// even though their offsets are dynamic. - class ObjCARCAliasAnalysis : public ImmutablePass, - public AliasAnalysis { - public: - static char ID; // Class identification, replacement for typeinfo - ObjCARCAliasAnalysis() : ImmutablePass(ID) { - initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry()); - } - - private: - bool doInitialization(Module &M) override; - - /// This method is used when a pass implements an analysis interface through - /// multiple inheritance. If needed, it should override this to adjust the - /// this pointer as needed for the specified pass info. - void *getAdjustedAnalysisPointer(const void *PI) override { - if (PI == &AliasAnalysis::ID) - return static_cast(this); - return this; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override; - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override; - bool pointsToConstantMemory(const MemoryLocation &Loc, - bool OrLocal) override; - ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override; - ModRefBehavior getModRefBehavior(const Function *F) override; - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override; - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override; - }; - -} // namespace objcarc -} // namespace llvm - -#endif diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index baca76ba3f2a..1cdf5689f42a 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -119,9 +119,9 @@ bool ObjCARCContract::optimizeRetainCall(Function &F, Instruction *Retain) { return false; // Check that the call is next to the retain. - BasicBlock::const_iterator I = Call; - ++I; - while (IsNoopInstruction(I)) ++I; + BasicBlock::const_iterator I = ++Call->getIterator(); + while (IsNoopInstruction(&*I)) + ++I; if (&*I != Retain) return false; @@ -247,7 +247,7 @@ static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load, // Ok, now we know we have not seen a store yet. See if Inst can write to // our load location, if it can not, just ignore the instruction. - if (!(AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod)) + if (!(AA->getModRefInfo(Inst, Loc) & MRI_Mod)) continue; Store = dyn_cast(Inst); @@ -282,9 +282,9 @@ findRetainForStoreStrongContraction(Value *New, StoreInst *Store, Instruction *Release, ProvenanceAnalysis &PA) { // Walk up from the Store to find the retain. - BasicBlock::iterator I = Store; + BasicBlock::iterator I = Store->getIterator(); BasicBlock::iterator Begin = Store->getParent()->begin(); - while (I != Begin && GetBasicARCInstKind(I) != ARCInstKind::Retain) { + while (I != Begin && GetBasicARCInstKind(&*I) != ARCInstKind::Retain) { Instruction *Inst = &*I; // It is only safe to move the retain to the store if we can prove @@ -294,7 +294,7 @@ findRetainForStoreStrongContraction(Value *New, StoreInst *Store, return nullptr; --I; } - Instruction *Retain = I; + Instruction *Retain = &*I; if (GetBasicARCInstKind(Retain) != ARCInstKind::Retain) return nullptr; if (GetArgRCIdentityRoot(Retain) != New) @@ -429,7 +429,7 @@ bool ObjCARCContract::tryToPeepholeInstruction( // insert it now. if (!RetainRVMarker) return false; - BasicBlock::iterator BBI = Inst; + BasicBlock::iterator BBI = Inst->getIterator(); BasicBlock *InstParent = Inst->getParent(); // Step up to see if the call immediately precedes the RetainRV call. @@ -440,11 +440,11 @@ bool ObjCARCContract::tryToPeepholeInstruction( BasicBlock *Pred = InstParent->getSinglePredecessor(); if (!Pred) goto decline_rv_optimization; - BBI = Pred->getTerminator(); + BBI = Pred->getTerminator()->getIterator(); break; } --BBI; - } while (IsNoopInstruction(BBI)); + } while (IsNoopInstruction(&*BBI)); if (&*BBI == GetArgRCIdentityRoot(Inst)) { DEBUG(dbgs() << "Adding inline asm marker for " @@ -511,10 +511,10 @@ bool ObjCARCContract::runOnFunction(Function &F) { return false; Changed = false; - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); DT = &getAnalysis().getDomTree(); - PA.setAA(&getAnalysis()); + PA.setAA(&getAnalysis().getAAResults()); DEBUG(llvm::dbgs() << "**** ObjCARC Contract ****\n"); @@ -629,13 +629,13 @@ bool ObjCARCContract::runOnFunction(Function &F) { char ObjCARCContract::ID = 0; INITIALIZE_PASS_BEGIN(ObjCARCContract, "objc-arc-contract", "ObjC ARC contraction", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(ObjCARCContract, "objc-arc-contract", "ObjC ARC contraction", false, false) void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.setPreservesCFG(); } diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 9edbb17e8d1b..f0ee6e2be487 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -28,7 +28,6 @@ #include "ARCRuntimeEntryPoints.h" #include "BlotMapVector.h" #include "DependencyAnalysis.h" -#include "ObjCARCAliasAnalysis.h" #include "ProvenanceAnalysis.h" #include "PtrState.h" #include "llvm/ADT/DenseMap.h" @@ -36,6 +35,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ObjCARCAliasAnalysis.h" #include "llvm/IR/CFG.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" @@ -482,7 +482,7 @@ namespace { /// A flag indicating whether this optimization pass should run. bool Run; - /// Flags which determine whether each of the interesting runtine functions + /// Flags which determine whether each of the interesting runtime functions /// is in fact used in the current function. unsigned UsedInThisFunction; @@ -556,7 +556,7 @@ namespace { char ObjCARCOpt::ID = 0; INITIALIZE_PASS_BEGIN(ObjCARCOpt, "objc-arc", "ObjC ARC optimization", false, false) -INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(ObjCARCAAWrapperPass) INITIALIZE_PASS_END(ObjCARCOpt, "objc-arc", "ObjC ARC optimization", false, false) @@ -565,8 +565,8 @@ Pass *llvm::createObjCARCOptPass() { } void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addRequired(); // ARC optimization doesn't currently split critical edges. AU.setPreservesCFG(); } @@ -581,16 +581,18 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { ImmutableCallSite CS(Arg); if (const Instruction *Call = CS.getInstruction()) { if (Call->getParent() == RetainRV->getParent()) { - BasicBlock::const_iterator I = Call; + BasicBlock::const_iterator I(Call); ++I; - while (IsNoopInstruction(I)) ++I; + while (IsNoopInstruction(&*I)) + ++I; if (&*I == RetainRV) return false; } else if (const InvokeInst *II = dyn_cast(Call)) { BasicBlock *RetainRVParent = RetainRV->getParent(); if (II->getNormalDest() == RetainRVParent) { BasicBlock::const_iterator I = RetainRVParent->begin(); - while (IsNoopInstruction(I)) ++I; + while (IsNoopInstruction(&*I)) + ++I; if (&*I == RetainRV) return false; } @@ -599,18 +601,21 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { // Check for being preceded by an objc_autoreleaseReturnValue on the same // pointer. In this case, we can delete the pair. - BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin(); + BasicBlock::iterator I = RetainRV->getIterator(), + Begin = RetainRV->getParent()->begin(); if (I != Begin) { - do --I; while (I != Begin && IsNoopInstruction(I)); - if (GetBasicARCInstKind(I) == ARCInstKind::AutoreleaseRV && - GetArgRCIdentityRoot(I) == Arg) { + do + --I; + while (I != Begin && IsNoopInstruction(&*I)); + if (GetBasicARCInstKind(&*I) == ARCInstKind::AutoreleaseRV && + GetArgRCIdentityRoot(&*I) == Arg) { Changed = true; ++NumPeeps; DEBUG(dbgs() << "Erasing autoreleaseRV,retainRV pair: " << *I << "\n" << "Erasing " << *RetainRV << "\n"); - EraseInstruction(I); + EraseInstruction(&*I); EraseInstruction(RetainRV); return true; } @@ -1216,7 +1221,7 @@ bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB, // Visit all the instructions, bottom-up. for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { - Instruction *Inst = std::prev(I); + Instruction *Inst = &*std::prev(I); // Invoke instructions are visited as part of their successors (below). if (isa(Inst)) @@ -1264,7 +1269,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, Arg = GetArgRCIdentityRoot(Inst); TopDownPtrState &S = MyStates.getPtrTopDownState(Arg); NestingDetected |= S.InitTopDown(Class, Inst); - // A retain can be a potential use; procede to the generic checking + // A retain can be a potential use; proceed to the generic checking // code below. break; } @@ -1342,12 +1347,10 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, << "Performing Dataflow:\n"); // Visit all the instructions, top-down. - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - Instruction *Inst = I; + for (Instruction &Inst : *BB) { + DEBUG(dbgs() << " Visiting " << Inst << "\n"); - DEBUG(dbgs() << " Visiting " << *Inst << "\n"); - - NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates); + NestingDetected |= VisitInstructionTopDown(&Inst, Releases, MyStates); } DEBUG(llvm::dbgs() << "\nState Before Checking for CFG Hazards:\n" @@ -1413,16 +1416,15 @@ ComputePostOrders(Function &F, // Functions may have many exits, and there also blocks which we treat // as exits due to ignored edges. SmallVector, 16> PredStack; - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - BasicBlock *ExitBB = I; - BBState &MyStates = BBStates[ExitBB]; + for (BasicBlock &ExitBB : F) { + BBState &MyStates = BBStates[&ExitBB]; if (!MyStates.isExit()) continue; MyStates.SetAsExit(); - PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin())); - Visited.insert(ExitBB); + PredStack.push_back(std::make_pair(&ExitBB, MyStates.pred_begin())); + Visited.insert(&ExitBB); while (!PredStack.empty()) { reverse_dfs_next_succ: BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end(); @@ -1830,7 +1832,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { // analysis too, but that would want caching. A better approach would be to // use the technique that EarlyCSE uses. inst_iterator Current = std::prev(I); - BasicBlock *CurrentBB = Current.getBasicBlockIterator(); + BasicBlock *CurrentBB = &*Current.getBasicBlockIterator(); for (BasicBlock::iterator B = CurrentBB->begin(), J = Current.getInstructionIterator(); J != B; --J) { @@ -2008,10 +2010,7 @@ HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain, // Check that the call is a regular call. ARCInstKind Class = GetBasicARCInstKind(Call); - if (Class != ARCInstKind::CallOrUser && Class != ARCInstKind::Call) - return false; - - return true; + return Class == ARCInstKind::CallOrUser || Class == ARCInstKind::Call; } /// Find a dependent retain that precedes the given autorelease for which there @@ -2081,9 +2080,8 @@ void ObjCARCOpt::OptimizeReturns(Function &F) { SmallPtrSet DependingInstructions; SmallPtrSet Visited; - for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { - BasicBlock *BB = FI; - ReturnInst *Ret = dyn_cast(&BB->back()); + for (BasicBlock &BB: F) { + ReturnInst *Ret = dyn_cast(&BB.back()); DEBUG(dbgs() << "Visiting: " << *Ret << "\n"); @@ -2095,19 +2093,16 @@ void ObjCARCOpt::OptimizeReturns(Function &F) { // Look for an ``autorelease'' instruction that is a predecessor of Ret and // dependent on Arg such that there are no instructions dependent on Arg // that need a positive ref count in between the autorelease and Ret. - CallInst *Autorelease = - FindPredecessorAutoreleaseWithSafePath(Arg, BB, Ret, - DependingInstructions, Visited, - PA); + CallInst *Autorelease = FindPredecessorAutoreleaseWithSafePath( + Arg, &BB, Ret, DependingInstructions, Visited, PA); DependingInstructions.clear(); Visited.clear(); if (!Autorelease) continue; - CallInst *Retain = - FindPredecessorRetainWithSafePath(Arg, BB, Autorelease, - DependingInstructions, Visited, PA); + CallInst *Retain = FindPredecessorRetainWithSafePath( + Arg, &BB, Autorelease, DependingInstructions, Visited, PA); DependingInstructions.clear(); Visited.clear(); @@ -2192,7 +2187,7 @@ bool ObjCARCOpt::runOnFunction(Function &F) { DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() << " >>>" "\n"); - PA.setAA(&getAnalysis()); + PA.setAA(&getAnalysis().getAAResults()); #ifndef NDEBUG if (AreStatisticsEnabled()) { diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h index 0ac41d3ea326..1a12b659e5a3 100644 --- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h @@ -26,10 +26,10 @@ #define LLVM_LIB_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/AliasAnalysis.h" namespace llvm { class Value; - class AliasAnalysis; class DataLayout; class PHINode; class SelectInst; diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp index 0be75af52014..c274e8182fb5 100644 --- a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp @@ -35,7 +35,7 @@ char PAEval::ID = 0; PAEval::PAEval() : FunctionPass(ID) {} void PAEval::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); + AU.addRequired(); } static StringRef getName(Value *V) { @@ -65,7 +65,7 @@ bool PAEval::runOnFunction(Function &F) { } ProvenanceAnalysis PA; - PA.setAA(&getAnalysis()); + PA.setAA(&getAnalysis().getAAResults()); const DataLayout &DL = F.getParent()->getDataLayout(); for (Value *V1 : Values) { @@ -89,6 +89,6 @@ FunctionPass *llvm::createPAEvalPass() { return new PAEval(); } INITIALIZE_PASS_BEGIN(PAEval, "pa-eval", "Evaluate ProvenanceAnalysis on all pairs", false, true) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(PAEval, "pa-eval", "Evaluate ProvenanceAnalysis on all pairs", false, true) diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp index ae20e7e6d347..df64fa32f3f8 100644 --- a/lib/Transforms/ObjCARC/PtrState.cpp +++ b/lib/Transforms/ObjCARC/PtrState.cpp @@ -256,9 +256,9 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst, // one of its successor blocks, since we can't insert code after it // in its own block, and we don't want to split critical edges. if (isa(Inst)) - InsertReverseInsertPt(BB->getFirstInsertionPt()); + InsertReverseInsertPt(&*BB->getFirstInsertionPt()); else - InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst))); + InsertReverseInsertPt(&*++Inst->getIterator()); SetSeq(S_Use); } else if (Seq == S_Release && IsUser(Class)) { DEBUG(dbgs() << " PreciseReleaseUse: Seq: " << GetSeq() << "; " @@ -268,9 +268,9 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst, assert(!HasReverseInsertPts()); // As above; handle invoke specially. if (isa(Inst)) - InsertReverseInsertPt(BB->getFirstInsertionPt()); + InsertReverseInsertPt(&*BB->getFirstInsertionPt()); else - InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst))); + InsertReverseInsertPt(&*++Inst->getIterator()); } break; case S_Stop: diff --git a/lib/Transforms/ObjCARC/PtrState.h b/lib/Transforms/ObjCARC/PtrState.h index e45e1ea96c53..9749e44822b2 100644 --- a/lib/Transforms/ObjCARC/PtrState.h +++ b/lib/Transforms/ObjCARC/PtrState.h @@ -17,8 +17,8 @@ #ifndef LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H #define LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H -#include "ARCInstKind.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/ObjCARCInstKind.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Value.h" #include "llvm/Support/raw_ostream.h" @@ -96,7 +96,7 @@ struct RRInfo { }; /// \brief This class summarizes several per-pointer runtime properties which -/// are propogated through the flow graph. +/// are propagated through the flow graph. class PtrState { protected: /// True if the reference count is known to be incremented. @@ -172,7 +172,7 @@ struct BottomUpPtrState : PtrState { bool InitBottomUp(ARCMDKindCache &Cache, Instruction *I); /// Return true if this set of releases can be paired with a release. Modifies - /// state appropriately to reflect that the matching occured if it is + /// state appropriately to reflect that the matching occurred if it is /// successful. /// /// It is assumed that one has already checked that the RCIdentity of the @@ -194,7 +194,7 @@ struct TopDownPtrState : PtrState { /// Return true if this set of retains can be paired with the given /// release. Modifies state appropriately to reflect that the matching - /// occured. + /// occurred. bool MatchWithRelease(ARCMDKindCache &Cache, Instruction *Release); void HandlePotentialUse(Instruction *Inst, const Value *Ptr, diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index d6fc91641588..590a52da6b19 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -1,4 +1,4 @@ -//===- DCE.cpp - Code to perform dead code elimination --------------------===// +//===- ADCE.cpp - Code to perform dead code elimination -------------------===// // // The LLVM Compiler Infrastructure // @@ -14,52 +14,33 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/ADCE.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; #define DEBUG_TYPE "adce" STATISTIC(NumRemoved, "Number of instructions removed"); -namespace { -struct ADCE : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - ADCE() : FunctionPass(ID) { - initializeADCEPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function& F) override; - - void getAnalysisUsage(AnalysisUsage& AU) const override { - AU.setPreservesCFG(); - } -}; -} - -char ADCE::ID = 0; -INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false) - -bool ADCE::runOnFunction(Function& F) { - if (skipOptnoneFunction(F)) - return false; - +static bool aggressiveDCE(Function& F) { SmallPtrSet Alive; SmallVector Worklist; // Collect the set of "root" instructions that are known live. - for (Instruction &I : inst_range(F)) { - if (isa(I) || isa(I) || - isa(I) || I.mayHaveSideEffects()) { + for (Instruction &I : instructions(F)) { + if (isa(I) || isa(I) || I.isEHPad() || + I.mayHaveSideEffects()) { Alive.insert(&I); Worklist.push_back(&I); } @@ -79,7 +60,7 @@ bool ADCE::runOnFunction(Function& F) { // which have no side effects and do not influence the control flow or return // value of the function, and may therefore be deleted safely. // NOTE: We reuse the Worklist vector here for memory efficiency. - for (Instruction &I : inst_range(F)) { + for (Instruction &I : instructions(F)) { if (!Alive.count(&I)) { Worklist.push_back(&I); I.dropAllReferences(); @@ -94,6 +75,34 @@ bool ADCE::runOnFunction(Function& F) { return !Worklist.empty(); } -FunctionPass *llvm::createAggressiveDCEPass() { - return new ADCE(); +PreservedAnalyses ADCEPass::run(Function &F) { + if (aggressiveDCE(F)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); } + +namespace { +struct ADCELegacyPass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + ADCELegacyPass() : FunctionPass(ID) { + initializeADCELegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function& F) override { + if (skipOptnoneFunction(F)) + return false; + return aggressiveDCE(F); + } + + void getAnalysisUsage(AnalysisUsage& AU) const override { + AU.setPreservesCFG(); + AU.addPreserved(); + } +}; +} + +char ADCELegacyPass::ID = 0; +INITIALIZE_PASS(ADCELegacyPass, "adce", "Aggressive Dead Code Elimination", + false, false) + +FunctionPass *llvm::createAggressiveDCEPass() { return new ADCELegacyPass(); } diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index 8918909f484a..4b721d38adba 100644 --- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -21,6 +21,8 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -54,13 +56,15 @@ struct AlignmentFromAssumptions : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.setPreservesCFG(); + AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved(); } // For memory transfers, we need a common alignment for both the source and @@ -84,7 +88,7 @@ INITIALIZE_PASS_BEGIN(AlignmentFromAssumptions, AA_NAME, aip_name, false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(AlignmentFromAssumptions, AA_NAME, aip_name, false, false) @@ -249,8 +253,7 @@ bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I, // The mask must have some trailing ones (otherwise the condition is // trivial and tells us nothing about the alignment of the left operand). - unsigned TrailingOnes = - MaskSCEV->getValue()->getValue().countTrailingOnes(); + unsigned TrailingOnes = MaskSCEV->getAPInt().countTrailingOnes(); if (!TrailingOnes) return false; @@ -270,7 +273,7 @@ bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I, OffSCEV = nullptr; if (PtrToIntInst *PToI = dyn_cast(AndLHS)) { AAPtr = PToI->getPointerOperand(); - OffSCEV = SE->getConstant(Int64Ty, 0); + OffSCEV = SE->getZero(Int64Ty); } else if (const SCEVAddExpr* AndLHSAddSCEV = dyn_cast(AndLHSSCEV)) { // Try to find the ptrtoint; subtract it and the rest is the offset. @@ -410,7 +413,7 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) { bool AlignmentFromAssumptions::runOnFunction(Function &F) { bool Changed = false; auto &AC = getAnalysis().getAssumptionCache(F); - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); DT = &getAnalysis().getDomTree(); NewDestAlignments.clear(); diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp index 09c605e76737..cb9b8b6fffc8 100644 --- a/lib/Transforms/Scalar/BDCE.cpp +++ b/lib/Transforms/Scalar/BDCE.cpp @@ -15,26 +15,18 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/BasicBlock.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/DemandedBits.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - using namespace llvm; #define DEBUG_TYPE "bdce" @@ -53,342 +45,42 @@ struct BDCE : public FunctionPass { void getAnalysisUsage(AnalysisUsage& AU) const override { AU.setPreservesCFG(); - AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); } - - void determineLiveOperandBits(const Instruction *UserI, - const Instruction *I, unsigned OperandNo, - const APInt &AOut, APInt &AB, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2); - - AssumptionCache *AC; - DominatorTree *DT; }; } char BDCE::ID = 0; INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DemandedBits) INITIALIZE_PASS_END(BDCE, "bdce", "Bit-Tracking Dead Code Elimination", false, false) -static bool isAlwaysLive(Instruction *I) { - return isa(I) || isa(I) || - isa(I) || I->mayHaveSideEffects(); -} - -void BDCE::determineLiveOperandBits(const Instruction *UserI, - const Instruction *I, unsigned OperandNo, - const APInt &AOut, APInt &AB, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2) { - unsigned BitWidth = AB.getBitWidth(); - - // We're called once per operand, but for some instructions, we need to - // compute known bits of both operands in order to determine the live bits of - // either (when both operands are instructions themselves). We don't, - // however, want to do this twice, so we cache the result in APInts that live - // in the caller. For the two-relevant-operands case, both operand values are - // provided here. - auto ComputeKnownBits = - [&](unsigned BitWidth, const Value *V1, const Value *V2) { - const DataLayout &DL = I->getModule()->getDataLayout(); - KnownZero = APInt(BitWidth, 0); - KnownOne = APInt(BitWidth, 0); - computeKnownBits(const_cast(V1), KnownZero, KnownOne, DL, 0, - AC, UserI, DT); - - if (V2) { - KnownZero2 = APInt(BitWidth, 0); - KnownOne2 = APInt(BitWidth, 0); - computeKnownBits(const_cast(V2), KnownZero2, KnownOne2, DL, - 0, AC, UserI, DT); - } - }; - - switch (UserI->getOpcode()) { - default: break; - case Instruction::Call: - case Instruction::Invoke: - if (const IntrinsicInst *II = dyn_cast(UserI)) - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::bswap: - // The alive bits of the input are the swapped alive bits of - // the output. - AB = AOut.byteSwap(); - break; - case Intrinsic::ctlz: - if (OperandNo == 0) { - // We need some output bits, so we need all bits of the - // input to the left of, and including, the leftmost bit - // known to be one. - ComputeKnownBits(BitWidth, I, nullptr); - AB = APInt::getHighBitsSet(BitWidth, - std::min(BitWidth, KnownOne.countLeadingZeros()+1)); - } - break; - case Intrinsic::cttz: - if (OperandNo == 0) { - // We need some output bits, so we need all bits of the - // input to the right of, and including, the rightmost bit - // known to be one. - ComputeKnownBits(BitWidth, I, nullptr); - AB = APInt::getLowBitsSet(BitWidth, - std::min(BitWidth, KnownOne.countTrailingZeros()+1)); - } - break; - } - break; - case Instruction::Add: - case Instruction::Sub: - // Find the highest live output bit. We don't need any more input - // bits than that (adds, and thus subtracts, ripple only to the - // left). - AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits()); - break; - case Instruction::Shl: - if (OperandNo == 0) - if (ConstantInt *CI = - dyn_cast(UserI->getOperand(1))) { - uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); - AB = AOut.lshr(ShiftAmt); - - // If the shift is nuw/nsw, then the high bits are not dead - // (because we've promised that they *must* be zero). - const ShlOperator *S = cast(UserI); - if (S->hasNoSignedWrap()) - AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1); - else if (S->hasNoUnsignedWrap()) - AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt); - } - break; - case Instruction::LShr: - if (OperandNo == 0) - if (ConstantInt *CI = - dyn_cast(UserI->getOperand(1))) { - uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); - AB = AOut.shl(ShiftAmt); - - // If the shift is exact, then the low bits are not dead - // (they must be zero). - if (cast(UserI)->isExact()) - AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt); - } - break; - case Instruction::AShr: - if (OperandNo == 0) - if (ConstantInt *CI = - dyn_cast(UserI->getOperand(1))) { - uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); - AB = AOut.shl(ShiftAmt); - // Because the high input bit is replicated into the - // high-order bits of the result, if we need any of those - // bits, then we must keep the highest input bit. - if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt)) - .getBoolValue()) - AB.setBit(BitWidth-1); - - // If the shift is exact, then the low bits are not dead - // (they must be zero). - if (cast(UserI)->isExact()) - AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt); - } - break; - case Instruction::And: - AB = AOut; - - // For bits that are known zero, the corresponding bits in the - // other operand are dead (unless they're both zero, in which - // case they can't both be dead, so just mark the LHS bits as - // dead). - if (OperandNo == 0) { - ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); - AB &= ~KnownZero2; - } else { - if (!isa(UserI->getOperand(0))) - ComputeKnownBits(BitWidth, UserI->getOperand(0), I); - AB &= ~(KnownZero & ~KnownZero2); - } - break; - case Instruction::Or: - AB = AOut; - - // For bits that are known one, the corresponding bits in the - // other operand are dead (unless they're both one, in which - // case they can't both be dead, so just mark the LHS bits as - // dead). - if (OperandNo == 0) { - ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); - AB &= ~KnownOne2; - } else { - if (!isa(UserI->getOperand(0))) - ComputeKnownBits(BitWidth, UserI->getOperand(0), I); - AB &= ~(KnownOne & ~KnownOne2); - } - break; - case Instruction::Xor: - case Instruction::PHI: - AB = AOut; - break; - case Instruction::Trunc: - AB = AOut.zext(BitWidth); - break; - case Instruction::ZExt: - AB = AOut.trunc(BitWidth); - break; - case Instruction::SExt: - AB = AOut.trunc(BitWidth); - // Because the high input bit is replicated into the - // high-order bits of the result, if we need any of those - // bits, then we must keep the highest input bit. - if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(), - AOut.getBitWidth() - BitWidth)) - .getBoolValue()) - AB.setBit(BitWidth-1); - break; - case Instruction::Select: - if (OperandNo != 0) - AB = AOut; - break; - } -} - bool BDCE::runOnFunction(Function& F) { if (skipOptnoneFunction(F)) return false; + DemandedBits &DB = getAnalysis(); - AC = &getAnalysis().getAssumptionCache(F); - DT = &getAnalysis().getDomTree(); - - DenseMap AliveBits; SmallVector Worklist; - - // The set of visited instructions (non-integer-typed only). - SmallPtrSet Visited; - - // Collect the set of "root" instructions that are known live. - for (Instruction &I : inst_range(F)) { - if (!isAlwaysLive(&I)) - continue; - - DEBUG(dbgs() << "BDCE: Root: " << I << "\n"); - // For integer-valued instructions, set up an initial empty set of alive - // bits and add the instruction to the work list. For other instructions - // add their operands to the work list (for integer values operands, mark - // all bits as live). - if (IntegerType *IT = dyn_cast(I.getType())) { - if (!AliveBits.count(&I)) { - AliveBits[&I] = APInt(IT->getBitWidth(), 0); - Worklist.push_back(&I); - } - - continue; - } - - // Non-integer-typed instructions... - for (Use &OI : I.operands()) { - if (Instruction *J = dyn_cast(OI)) { - if (IntegerType *IT = dyn_cast(J->getType())) - AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth()); - Worklist.push_back(J); - } - } - // To save memory, we don't add I to the Visited set here. Instead, we - // check isAlwaysLive on every instruction when searching for dead - // instructions later (we need to check isAlwaysLive for the - // integer-typed instructions anyway). - } - - // Propagate liveness backwards to operands. - while (!Worklist.empty()) { - Instruction *UserI = Worklist.pop_back_val(); - - DEBUG(dbgs() << "BDCE: Visiting: " << *UserI); - APInt AOut; - if (UserI->getType()->isIntegerTy()) { - AOut = AliveBits[UserI]; - DEBUG(dbgs() << " Alive Out: " << AOut); - } - DEBUG(dbgs() << "\n"); - - if (!UserI->getType()->isIntegerTy()) - Visited.insert(UserI); - - APInt KnownZero, KnownOne, KnownZero2, KnownOne2; - // Compute the set of alive bits for each operand. These are anded into the - // existing set, if any, and if that changes the set of alive bits, the - // operand is added to the work-list. - for (Use &OI : UserI->operands()) { - if (Instruction *I = dyn_cast(OI)) { - if (IntegerType *IT = dyn_cast(I->getType())) { - unsigned BitWidth = IT->getBitWidth(); - APInt AB = APInt::getAllOnesValue(BitWidth); - if (UserI->getType()->isIntegerTy() && !AOut && - !isAlwaysLive(UserI)) { - AB = APInt(BitWidth, 0); - } else { - // If all bits of the output are dead, then all bits of the input - // Bits of each operand that are used to compute alive bits of the - // output are alive, all others are dead. - determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB, - KnownZero, KnownOne, - KnownZero2, KnownOne2); - } - - // If we've added to the set of alive bits (or the operand has not - // been previously visited), then re-queue the operand to be visited - // again. - APInt ABPrev(BitWidth, 0); - auto ABI = AliveBits.find(I); - if (ABI != AliveBits.end()) - ABPrev = ABI->second; - - APInt ABNew = AB | ABPrev; - if (ABNew != ABPrev || ABI == AliveBits.end()) { - AliveBits[I] = std::move(ABNew); - Worklist.push_back(I); - } - } else if (!Visited.count(I)) { - Worklist.push_back(I); - } - } - } - } - bool Changed = false; - // The inverse of the live set is the dead set. These are those instructions - // which have no side effects and do not influence the control flow or return - // value of the function, and may therefore be deleted safely. - // NOTE: We reuse the Worklist vector here for memory efficiency. - for (Instruction &I : inst_range(F)) { - // For live instructions that have all dead bits, first make them dead by - // replacing all uses with something else. Then, if they don't need to - // remain live (because they have side effects, etc.) we can remove them. - if (I.getType()->isIntegerTy()) { - auto ABI = AliveBits.find(&I); - if (ABI != AliveBits.end()) { - if (ABI->second.getBoolValue()) - continue; - - DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n"); - // FIXME: In theory we could substitute undef here instead of zero. - // This should be reconsidered once we settle on the semantics of - // undef, poison, etc. - Value *Zero = ConstantInt::get(I.getType(), 0); - ++NumSimplified; - I.replaceAllUsesWith(Zero); - Changed = true; - } - } else if (Visited.count(&I)) { - continue; + for (Instruction &I : instructions(F)) { + if (I.getType()->isIntegerTy() && + !DB.getDemandedBits(&I).getBoolValue()) { + // For live instructions that have all dead bits, first make them dead by + // replacing all uses with something else. Then, if they don't need to + // remain live (because they have side effects, etc.) we can remove them. + DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n"); + // FIXME: In theory we could substitute undef here instead of zero. + // This should be reconsidered once we settle on the semantics of + // undef, poison, etc. + Value *Zero = ConstantInt::get(I.getType(), 0); + ++NumSimplified; + I.replaceAllUsesWith(Zero); + Changed = true; } - - if (isAlwaysLive(&I)) + if (!DB.isInstructionDead(&I)) continue; Worklist.push_back(&I); diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 7ee279a56600..a0ddbd085206 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_library(LLVMScalarOpts LoopIdiomRecognize.cpp LoopInstSimplify.cpp LoopInterchange.cpp + LoopLoadElimination.cpp LoopRerollPass.cpp LoopRotation.cpp LoopStrengthReduce.cpp @@ -38,7 +39,6 @@ add_llvm_library(LLVMScalarOpts RewriteStatepointsForGC.cpp SCCP.cpp SROA.cpp - SampleProfile.cpp Scalar.cpp ScalarReplAggregates.cpp Scalarizer.cpp diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index 4288742dd3eb..84f7f5fff5b5 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -223,10 +223,10 @@ Instruction *ConstantHoisting::findMatInsertPt(Instruction *Inst, } // The simple and common case. This also includes constant expressions. - if (!isa(Inst) && !isa(Inst)) + if (!isa(Inst) && !Inst->isEHPad()) return Inst; - // We can't insert directly before a phi node or landing pad. Insert before + // We can't insert directly before a phi node or an eh pad. Insert before // the terminator of the incoming or dominating block. assert(Entry != Inst->getParent() && "PHI or landing pad in entry block!"); if (Idx != ~0U && isa(Inst)) @@ -365,9 +365,9 @@ void ConstantHoisting::collectConstantCandidates(ConstCandMapType &ConstCandMap, /// into an instruction itself. void ConstantHoisting::collectConstantCandidates(Function &Fn) { ConstCandMapType ConstCandMap; - for (Function::iterator BB : Fn) - for (BasicBlock::iterator Inst : *BB) - collectConstantCandidates(ConstCandMap, Inst); + for (BasicBlock &BB : Fn) + for (Instruction &Inst : BB) + collectConstantCandidates(ConstCandMap, &Inst); } /// \brief Find the base constant within the given range and rebase all other diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 79624b2e4c47..686bd4071104 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/IR/CFG.h" @@ -32,6 +33,7 @@ STATISTIC(NumPhis, "Number of phis propagated"); STATISTIC(NumSelects, "Number of selects propagated"); STATISTIC(NumMemAccess, "Number of memory access targets propagated"); STATISTIC(NumCmps, "Number of comparisons propagated"); +STATISTIC(NumReturns, "Number of return values propagated"); STATISTIC(NumDeadCases, "Number of switch cases removed"); namespace { @@ -43,6 +45,11 @@ namespace { bool processMemAccess(Instruction *I); bool processCmp(CmpInst *C); bool processSwitch(SwitchInst *SI); + bool processCallSite(CallSite CS); + + /// Return a constant value for V usable at At and everything it + /// dominates. If no such Constant can be found, return nullptr. + Constant *getConstantAt(Value *V, Instruction *At); public: static char ID; @@ -54,6 +61,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addPreserved(); } }; } @@ -178,44 +186,33 @@ bool CorrelatedValuePropagation::processMemAccess(Instruction *I) { return true; } -/// processCmp - If the value of this comparison could be determined locally, -/// constant propagation would already have figured it out. Instead, walk -/// the predecessors and statically evaluate the comparison based on information -/// available on that edge. If a given static evaluation is true on ALL -/// incoming edges, then it's true universally and we can simplify the compare. +/// processCmp - See if LazyValueInfo's ability to exploit edge conditions, +/// or range information is sufficient to prove this comparison. Even for +/// local conditions, this can sometimes prove conditions instcombine can't by +/// exploiting range information. bool CorrelatedValuePropagation::processCmp(CmpInst *C) { Value *Op0 = C->getOperand(0); - if (isa(Op0) && - cast(Op0)->getParent() == C->getParent()) - return false; - Constant *Op1 = dyn_cast(C->getOperand(1)); if (!Op1) return false; - pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent()); - if (PI == PE) return false; + // As a policy choice, we choose not to waste compile time on anything where + // the comparison is testing local values. While LVI can sometimes reason + // about such cases, it's not its primary purpose. We do make sure to do + // the block local query for uses from terminator instructions, but that's + // handled in the code for each terminator. + auto *I = dyn_cast(Op0); + if (I && I->getParent() == C->getParent()) + return false; - LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(), - C->getOperand(0), Op1, *PI, - C->getParent(), C); + LazyValueInfo::Tristate Result = + LVI->getPredicateAt(C->getPredicate(), Op0, Op1, C); if (Result == LazyValueInfo::Unknown) return false; - ++PI; - while (PI != PE) { - LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(), - C->getOperand(0), Op1, *PI, - C->getParent(), C); - if (Res != Result) return false; - ++PI; - } - ++NumCmps; - if (Result == LazyValueInfo::True) C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext())); else C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext())); - C->eraseFromParent(); return true; @@ -307,6 +304,59 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) { return Changed; } +/// processCallSite - Infer nonnull attributes for the arguments at the +/// specified callsite. +bool CorrelatedValuePropagation::processCallSite(CallSite CS) { + SmallVector Indices; + unsigned ArgNo = 0; + + for (Value *V : CS.args()) { + PointerType *Type = dyn_cast(V->getType()); + + if (Type && !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) && + LVI->getPredicateAt(ICmpInst::ICMP_EQ, V, + ConstantPointerNull::get(Type), + CS.getInstruction()) == LazyValueInfo::False) + Indices.push_back(ArgNo + 1); + ArgNo++; + } + + assert(ArgNo == CS.arg_size() && "sanity check"); + + if (Indices.empty()) + return false; + + AttributeSet AS = CS.getAttributes(); + LLVMContext &Ctx = CS.getInstruction()->getContext(); + AS = AS.addAttribute(Ctx, Indices, Attribute::get(Ctx, Attribute::NonNull)); + CS.setAttributes(AS); + + return true; +} + +Constant *CorrelatedValuePropagation::getConstantAt(Value *V, Instruction *At) { + if (Constant *C = LVI->getConstant(V, At->getParent(), At)) + return C; + + // TODO: The following really should be sunk inside LVI's core algorithm, or + // at least the outer shims around such. + auto *C = dyn_cast(V); + if (!C) return nullptr; + + Value *Op0 = C->getOperand(0); + Constant *Op1 = dyn_cast(C->getOperand(1)); + if (!Op1) return nullptr; + + LazyValueInfo::Tristate Result = + LVI->getPredicateAt(C->getPredicate(), Op0, Op1, At); + if (Result == LazyValueInfo::Unknown) + return nullptr; + + return (Result == LazyValueInfo::True) ? + ConstantInt::getTrue(C->getContext()) : + ConstantInt::getFalse(C->getContext()); +} + bool CorrelatedValuePropagation::runOnFunction(Function &F) { if (skipOptnoneFunction(F)) return false; @@ -318,7 +368,7 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) { for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { bool BBChanged = false; for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) { - Instruction *II = BI++; + Instruction *II = &*BI++; switch (II->getOpcode()) { case Instruction::Select: BBChanged |= processSelect(cast(II)); @@ -334,6 +384,10 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) { case Instruction::Store: BBChanged |= processMemAccess(II); break; + case Instruction::Call: + case Instruction::Invoke: + BBChanged |= processCallSite(CallSite(II)); + break; } } @@ -342,7 +396,21 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) { case Instruction::Switch: BBChanged |= processSwitch(cast(Term)); break; + case Instruction::Ret: { + auto *RI = cast(Term); + // Try to determine the return value if we can. This is mainly here to + // simplify the writing of unit tests, but also helps to enable IPO by + // constant folding the return values of callees. + auto *RetVal = RI->getReturnValue(); + if (!RetVal) break; // handle "ret void" + if (isa(RetVal)) break; // nothing to do + if (auto *C = getConstantAt(RetVal, RI)) { + ++NumReturns; + RI->replaceUsesOfWith(RetVal, C); + BBChanged = true; + } } + }; FnChanged |= BBChanged; } diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 3b262a23091f..b67c3c7742fd 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" @@ -46,7 +47,7 @@ namespace { TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr; bool Changed = false; for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) { - Instruction *Inst = DI++; + Instruction *Inst = &*DI++; if (isInstructionTriviallyDead(Inst, TLI)) { Inst->eraseFromParent(); Changed = true; @@ -92,6 +93,34 @@ namespace { char DCE::ID = 0; INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false) +static bool DCEInstruction(Instruction *I, + SmallSetVector &WorkList, + const TargetLibraryInfo *TLI) { + if (isInstructionTriviallyDead(I, TLI)) { + // Null out all of the instruction's operands to see if any operand becomes + // dead as we go. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *OpV = I->getOperand(i); + I->setOperand(i, nullptr); + + if (!OpV->use_empty() || I == OpV) + continue; + + // If the operand is an instruction that became dead as we nulled out the + // operand, and if it is 'trivially' dead, delete it in a future loop + // iteration. + if (Instruction *OpI = dyn_cast(OpV)) + if (isInstructionTriviallyDead(OpI, TLI)) + WorkList.insert(OpI); + } + + I->eraseFromParent(); + ++DCEEliminated; + return true; + } + return false; +} + bool DCE::runOnFunction(Function &F) { if (skipOptnoneFunction(F)) return false; @@ -99,39 +128,24 @@ bool DCE::runOnFunction(Function &F) { auto *TLIP = getAnalysisIfAvailable(); TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr; - // Start out with all of the instructions in the worklist... - std::vector WorkList; - for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) - WorkList.push_back(&*i); - - // Loop over the worklist finding instructions that are dead. If they are - // dead make them drop all of their uses, making other instructions - // potentially dead, and work until the worklist is empty. - // bool MadeChange = false; + SmallSetVector WorkList; + // Iterate over the original function, only adding insts to the worklist + // if they actually need to be revisited. This avoids having to pre-init + // the worklist with the entire function's worth of instructions. + for (inst_iterator FI = inst_begin(F), FE = inst_end(F); FI != FE;) { + Instruction *I = &*FI; + ++FI; + + // We're visiting this instruction now, so make sure it's not in the + // worklist from an earlier visit. + if (!WorkList.count(I)) + MadeChange |= DCEInstruction(I, WorkList, TLI); + } + while (!WorkList.empty()) { - Instruction *I = WorkList.back(); - WorkList.pop_back(); - - if (isInstructionTriviallyDead(I, TLI)) { // If the instruction is dead. - // Loop over all of the values that the instruction uses, if there are - // instructions being used, add them to the worklist, because they might - // go dead after this one is removed. - // - for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) - if (Instruction *Used = dyn_cast(*OI)) - WorkList.push_back(Used); - - // Remove the instruction. - I->eraseFromParent(); - - // Remove the instruction from the worklist if it still exists in it. - WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), I), - WorkList.end()); - - MadeChange = true; - ++DCEEliminated; - } + Instruction *I = WorkList.pop_back_val(); + MadeChange |= DCEInstruction(I, WorkList, TLI); } return MadeChange; } diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index c50558434da2..36ad0a5f7b91 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -40,6 +41,7 @@ using namespace llvm; #define DEBUG_TYPE "dse" +STATISTIC(NumRedundantStores, "Number of redundant stores deleted"); STATISTIC(NumFastStores, "Number of stores deleted"); STATISTIC(NumFastOther , "Number of other instrs removed"); @@ -59,23 +61,24 @@ namespace { if (skipOptnoneFunction(F)) return false; - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); MD = &getAnalysis(); DT = &getAnalysis().getDomTree(); - TLI = AA->getTargetLibraryInfo(); + TLI = &getAnalysis().getTLI(); bool Changed = false; - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + for (BasicBlock &I : F) // Only check non-dead blocks. Dead blocks may have strange pointer // cycles that will confuse alias analysis. - if (DT->isReachableFromEntry(I)) - Changed |= runOnBasicBlock(*I); + if (DT->isReachableFromEntry(&I)) + Changed |= runOnBasicBlock(I); AA = nullptr; MD = nullptr; DT = nullptr; return Changed; } bool runOnBasicBlock(BasicBlock &BB); + bool MemoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI); bool HandleFree(CallInst *F); bool handleEndBlock(BasicBlock &BB); void RemoveAccessedObjects(const MemoryLocation &LoadedLoc, @@ -85,10 +88,11 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); } }; @@ -97,8 +101,10 @@ namespace { char DSE::ID = 0; INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false) FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } @@ -115,7 +121,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } /// static void DeleteDeadInstruction(Instruction *I, MemoryDependenceAnalysis &MD, - const TargetLibraryInfo *TLI, + const TargetLibraryInfo &TLI, SmallSetVector *ValueSet = nullptr) { SmallVector NowDeadInsts; @@ -140,7 +146,7 @@ static void DeleteDeadInstruction(Instruction *I, if (!Op->use_empty()) continue; if (Instruction *OpI = dyn_cast(Op)) - if (isInstructionTriviallyDead(OpI, TLI)) + if (isInstructionTriviallyDead(OpI, &TLI)) NowDeadInsts.push_back(OpI); } @@ -153,7 +159,7 @@ static void DeleteDeadInstruction(Instruction *I, /// hasMemoryWrite - Does this instruction write some memory? This only returns /// true for things that we can analyze with other helpers below. -static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) { +static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) { if (isa(I)) return true; if (IntrinsicInst *II = dyn_cast(I)) { @@ -170,20 +176,20 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) { } if (auto CS = CallSite(I)) { if (Function *F = CS.getCalledFunction()) { - if (TLI && TLI->has(LibFunc::strcpy) && - F->getName() == TLI->getName(LibFunc::strcpy)) { + if (TLI.has(LibFunc::strcpy) && + F->getName() == TLI.getName(LibFunc::strcpy)) { return true; } - if (TLI && TLI->has(LibFunc::strncpy) && - F->getName() == TLI->getName(LibFunc::strncpy)) { + if (TLI.has(LibFunc::strncpy) && + F->getName() == TLI.getName(LibFunc::strncpy)) { return true; } - if (TLI && TLI->has(LibFunc::strcat) && - F->getName() == TLI->getName(LibFunc::strcat)) { + if (TLI.has(LibFunc::strcat) && + F->getName() == TLI.getName(LibFunc::strcat)) { return true; } - if (TLI && TLI->has(LibFunc::strncat) && - F->getName() == TLI->getName(LibFunc::strncat)) { + if (TLI.has(LibFunc::strncat) && + F->getName() == TLI.getName(LibFunc::strncat)) { return true; } } @@ -224,9 +230,9 @@ static MemoryLocation getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { /// getLocForRead - Return the location read by the specified "hasMemoryWrite" /// instruction if any. -static MemoryLocation getLocForRead(Instruction *Inst, AliasAnalysis &AA) { - assert(hasMemoryWrite(Inst, AA.getTargetLibraryInfo()) && - "Unknown instruction case"); +static MemoryLocation getLocForRead(Instruction *Inst, + const TargetLibraryInfo &TLI) { + assert(hasMemoryWrite(Inst, TLI) && "Unknown instruction case"); // The only instructions that both read and write are the mem transfer // instructions (memcpy/memmove). @@ -313,9 +319,9 @@ static Value *getStoredPointerOperand(Instruction *I) { } static uint64_t getPointerSize(const Value *V, const DataLayout &DL, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo &TLI) { uint64_t Size; - if (getObjectSize(V, Size, DL, TLI)) + if (getObjectSize(V, Size, DL, &TLI)) return Size; return MemoryLocation::UnknownSize; } @@ -336,7 +342,7 @@ namespace { static OverwriteResult isOverwrite(const MemoryLocation &Later, const MemoryLocation &Earlier, const DataLayout &DL, - const TargetLibraryInfo *TLI, + const TargetLibraryInfo &TLI, int64_t &EarlierOff, int64_t &LaterOff) { const Value *P1 = Earlier.Ptr->stripPointerCasts(); const Value *P2 = Later.Ptr->stripPointerCasts(); @@ -442,10 +448,12 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later, /// because the DSE inducing instruction may be a self-read. static bool isPossibleSelfRead(Instruction *Inst, const MemoryLocation &InstStoreLoc, - Instruction *DepWrite, AliasAnalysis &AA) { + Instruction *DepWrite, + const TargetLibraryInfo &TLI, + AliasAnalysis &AA) { // Self reads can only happen for instructions that read memory. Get the // location read. - MemoryLocation InstReadLoc = getLocForRead(Inst, AA); + MemoryLocation InstReadLoc = getLocForRead(Inst, TLI); if (!InstReadLoc.Ptr) return false; // Not a reading instruction. // If the read and written loc obviously don't alias, it isn't a read. @@ -459,7 +467,7 @@ static bool isPossibleSelfRead(Instruction *Inst, // Here we don't know if A/B may alias, but we do know that B/B are must // aliases, so removing the first memcpy is safe (assuming it writes <= # // bytes as the second one. - MemoryLocation DepReadLoc = getLocForRead(DepWrite, AA); + MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI); if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) return false; @@ -475,11 +483,12 @@ static bool isPossibleSelfRead(Instruction *Inst, //===----------------------------------------------------------------------===// bool DSE::runOnBasicBlock(BasicBlock &BB) { + const DataLayout &DL = BB.getModule()->getDataLayout(); bool MadeChange = false; // Do a top-down walk on the BB. for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { - Instruction *Inst = BBI++; + Instruction *Inst = &*BBI++; // Handle 'free' calls specially. if (CallInst *F = isFreeCall(Inst, TLI)) { @@ -488,9 +497,61 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { } // If we find something that writes memory, get its memory dependence. - if (!hasMemoryWrite(Inst, TLI)) + if (!hasMemoryWrite(Inst, *TLI)) continue; + // If we're storing the same value back to a pointer that we just + // loaded from, then the store can be removed. + if (StoreInst *SI = dyn_cast(Inst)) { + + auto RemoveDeadInstAndUpdateBBI = [&](Instruction *DeadInst) { + // DeleteDeadInstruction can delete the current instruction. Save BBI + // in case we need it. + WeakVH NextInst(&*BBI); + + DeleteDeadInstruction(DeadInst, *MD, *TLI); + + if (!NextInst) // Next instruction deleted. + BBI = BB.begin(); + else if (BBI != BB.begin()) // Revisit this instruction if possible. + --BBI; + ++NumRedundantStores; + MadeChange = true; + }; + + if (LoadInst *DepLoad = dyn_cast(SI->getValueOperand())) { + if (SI->getPointerOperand() == DepLoad->getPointerOperand() && + isRemovable(SI) && + MemoryIsNotModifiedBetween(DepLoad, SI)) { + + DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n " + << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n'); + + RemoveDeadInstAndUpdateBBI(SI); + continue; + } + } + + // Remove null stores into the calloc'ed objects + Constant *StoredConstant = dyn_cast(SI->getValueOperand()); + + if (StoredConstant && StoredConstant->isNullValue() && + isRemovable(SI)) { + Instruction *UnderlyingPointer = dyn_cast( + GetUnderlyingObject(SI->getPointerOperand(), DL)); + + if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) && + MemoryIsNotModifiedBetween(UnderlyingPointer, SI)) { + DEBUG(dbgs() + << "DSE: Remove null store to the calloc'ed object:\n DEAD: " + << *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n'); + + RemoveDeadInstAndUpdateBBI(SI); + continue; + } + } + } + MemDepResult InstDep = MD->getDependency(Inst); // Ignore any store where we can't find a local dependence. @@ -498,32 +559,6 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { if (!InstDep.isDef() && !InstDep.isClobber()) continue; - // If we're storing the same value back to a pointer that we just - // loaded from, then the store can be removed. - if (StoreInst *SI = dyn_cast(Inst)) { - if (LoadInst *DepLoad = dyn_cast(InstDep.getInst())) { - if (SI->getPointerOperand() == DepLoad->getPointerOperand() && - SI->getOperand(0) == DepLoad && isRemovable(SI)) { - DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n " - << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n'); - - // DeleteDeadInstruction can delete the current instruction. Save BBI - // in case we need it. - WeakVH NextInst(BBI); - - DeleteDeadInstruction(SI, *MD, TLI); - - if (!NextInst) // Next instruction deleted. - BBI = BB.begin(); - else if (BBI != BB.begin()) // Revisit this instruction if possible. - --BBI; - ++NumFastStores; - MadeChange = true; - continue; - } - } - } - // Figure out what location is being stored to. MemoryLocation Loc = getLocForWrite(Inst, *AA); @@ -549,24 +584,22 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // completely obliterated by the store to 'Loc', and c) which we know that // 'Inst' doesn't load from, then we can remove it. if (isRemovable(DepWrite) && - !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) { + !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { int64_t InstWriteOffset, DepWriteOffset; - const DataLayout &DL = BB.getModule()->getDataLayout(); OverwriteResult OR = - isOverwrite(Loc, DepLoc, DL, AA->getTargetLibraryInfo(), - DepWriteOffset, InstWriteOffset); + isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset); if (OR == OverwriteComplete) { DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite << "\n KILLER: " << *Inst << '\n'); // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD, TLI); + DeleteDeadInstruction(DepWrite, *MD, *TLI); ++NumFastStores; MadeChange = true; // DeleteDeadInstruction can delete the current instruction in loop // cases, reset BBI. - BBI = Inst; + BBI = Inst->getIterator(); if (BBI != BB.begin()) --BBI; break; @@ -609,10 +642,11 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { if (DepWrite == &BB.front()) break; // Can't look past this instruction if it might read 'Loc'. - if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref) + if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref) break; - InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB); + InstDep = MD->getPointerDependencyFrom(Loc, false, + DepWrite->getIterator(), &BB); } } @@ -624,6 +658,64 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { return MadeChange; } +/// Returns true if the memory which is accessed by the second instruction is not +/// modified between the first and the second instruction. +/// Precondition: Second instruction must be dominated by the first +/// instruction. +bool DSE::MemoryIsNotModifiedBetween(Instruction *FirstI, + Instruction *SecondI) { + SmallVector WorkList; + SmallPtrSet Visited; + BasicBlock::iterator FirstBBI(FirstI); + ++FirstBBI; + BasicBlock::iterator SecondBBI(SecondI); + BasicBlock *FirstBB = FirstI->getParent(); + BasicBlock *SecondBB = SecondI->getParent(); + MemoryLocation MemLoc = MemoryLocation::get(SecondI); + + // Start checking the store-block. + WorkList.push_back(SecondBB); + bool isFirstBlock = true; + + // Check all blocks going backward until we reach the load-block. + while (!WorkList.empty()) { + BasicBlock *B = WorkList.pop_back_val(); + + // Ignore instructions before LI if this is the FirstBB. + BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin()); + + BasicBlock::iterator EI; + if (isFirstBlock) { + // Ignore instructions after SI if this is the first visit of SecondBB. + assert(B == SecondBB && "first block is not the store block"); + EI = SecondBBI; + isFirstBlock = false; + } else { + // It's not SecondBB or (in case of a loop) the second visit of SecondBB. + // In this case we also have to look at instructions after SI. + EI = B->end(); + } + for (; BI != EI; ++BI) { + Instruction *I = &*BI; + if (I->mayWriteToMemory() && I != SecondI) { + auto Res = AA->getModRefInfo(I, MemLoc); + if (Res != MRI_NoModRef) + return false; + } + } + if (B != FirstBB) { + assert(B != &FirstBB->getParent()->getEntryBlock() && + "Should not hit the entry block because SI must be dominated by LI"); + for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) { + if (!Visited.insert(*PredI).second) + continue; + WorkList.push_back(*PredI); + } + } + } + return true; +} + /// Find all blocks that will unconditionally lead to the block BB and append /// them to F. static void FindUnconditionalPreds(SmallVectorImpl &Blocks, @@ -655,10 +747,11 @@ bool DSE::HandleFree(CallInst *F) { Instruction *InstPt = BB->getTerminator(); if (BB == F->getParent()) InstPt = F; - MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB); + MemDepResult Dep = + MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB); while (Dep.isDef() || Dep.isClobber()) { Instruction *Dependency = Dep.getInst(); - if (!hasMemoryWrite(Dependency, TLI) || !isRemovable(Dependency)) + if (!hasMemoryWrite(Dependency, *TLI) || !isRemovable(Dependency)) break; Value *DepPointer = @@ -668,10 +761,10 @@ bool DSE::HandleFree(CallInst *F) { if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) break; - Instruction *Next = std::next(BasicBlock::iterator(Dependency)); + auto Next = ++Dependency->getIterator(); // DCE instructions only used to calculate that store - DeleteDeadInstruction(Dependency, *MD, TLI); + DeleteDeadInstruction(Dependency, *MD, *TLI); ++NumFastStores; MadeChange = true; @@ -704,23 +797,22 @@ bool DSE::handleEndBlock(BasicBlock &BB) { SmallSetVector DeadStackObjects; // Find all of the alloca'd pointers in the entry block. - BasicBlock *Entry = BB.getParent()->begin(); - for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) { - if (isa(I)) - DeadStackObjects.insert(I); + BasicBlock &Entry = BB.getParent()->front(); + for (Instruction &I : Entry) { + if (isa(&I)) + DeadStackObjects.insert(&I); // Okay, so these are dead heap objects, but if the pointer never escapes // then it's leaked by this function anyways. - else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true)) - DeadStackObjects.insert(I); + else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true)) + DeadStackObjects.insert(&I); } // Treat byval or inalloca arguments the same, stores to them are dead at the // end of the function. - for (Function::arg_iterator AI = BB.getParent()->arg_begin(), - AE = BB.getParent()->arg_end(); AI != AE; ++AI) - if (AI->hasByValOrInAllocaAttr()) - DeadStackObjects.insert(AI); + for (Argument &AI : BB.getParent()->args()) + if (AI.hasByValOrInAllocaAttr()) + DeadStackObjects.insert(&AI); const DataLayout &DL = BB.getModule()->getDataLayout(); @@ -729,10 +821,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) { --BBI; // If we find a store, check to see if it points into a dead stack value. - if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) { + if (hasMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { // See through pointer-to-pointer bitcasts SmallVector Pointers; - GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers, DL); + GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL); // Stores to stack values are valid candidates for removal. bool AllDead = true; @@ -744,7 +836,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } if (AllDead) { - Instruction *Dead = BBI++; + Instruction *Dead = &*BBI++; DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " << *Dead << "\n Objects: "; @@ -757,7 +849,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { dbgs() << '\n'); // DCE instructions only used to calculate that store. - DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects); + DeleteDeadInstruction(Dead, *MD, *TLI, &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; @@ -765,9 +857,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } // Remove any dead non-memory-mutating instructions. - if (isInstructionTriviallyDead(BBI, TLI)) { - Instruction *Inst = BBI++; - DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects); + if (isInstructionTriviallyDead(&*BBI, TLI)) { + Instruction *Inst = &*BBI++; + DeleteDeadInstruction(Inst, *MD, *TLI, &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; @@ -776,15 +868,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (isa(BBI)) { // Remove allocas from the list of dead stack objects; there can't be // any references before the definition. - DeadStackObjects.remove(BBI); + DeadStackObjects.remove(&*BBI); continue; } - if (auto CS = CallSite(BBI)) { + if (auto CS = CallSite(&*BBI)) { // Remove allocation function calls from the list of dead stack objects; // there can't be any references before the definition. - if (isAllocLikeFn(BBI, TLI)) - DeadStackObjects.remove(BBI); + if (isAllocLikeFn(&*BBI, TLI)) + DeadStackObjects.remove(&*BBI); // If this call does not access memory, it can't be loading any of our // pointers. @@ -795,10 +887,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // the call is live. DeadStackObjects.remove_if([&](Value *I) { // See if the call site touches the value. - AliasAnalysis::ModRefResult A = AA->getModRefInfo( - CS, I, getPointerSize(I, DL, AA->getTargetLibraryInfo())); + ModRefInfo A = AA->getModRefInfo(CS, I, getPointerSize(I, DL, *TLI)); - return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref; + return A == MRI_ModRef || A == MRI_Ref; }); // If all of the allocas were clobbered by the call then we're not going @@ -864,8 +955,7 @@ void DSE::RemoveAccessedObjects(const MemoryLocation &LoadedLoc, // Remove objects that could alias LoadedLoc. DeadStackObjects.remove_if([&](Value *I) { // See if the loaded location could alias the stack location. - MemoryLocation StackLoc(I, - getPointerSize(I, DL, AA->getTargetLibraryInfo())); + MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI)); return !AA->isNoAlias(StackLoc, LoadedLoc); }); } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 029b44c2ea80..7ef062e71ff3 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/ScopedHashTable.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -263,7 +264,6 @@ namespace { /// expected that a later pass of GVN will catch the interesting/hard cases. class EarlyCSE { public: - Function &F; const TargetLibraryInfo &TLI; const TargetTransformInfo &TTI; DominatorTree &DT; @@ -281,20 +281,37 @@ public: /// that dominated values can succeed in their lookup. ScopedHTType AvailableValues; - /// \brief A scoped hash table of the current values of loads. + /// A scoped hash table of the current values of previously encounted memory + /// locations. /// - /// This allows us to get efficient access to dominating loads when we have - /// a fully redundant load. In addition to the most recent load, we keep - /// track of a generation count of the read, which is compared against the - /// current generation count. The current generation count is incremented + /// This allows us to get efficient access to dominating loads or stores when + /// we have a fully redundant load. In addition to the most recent load, we + /// keep track of a generation count of the read, which is compared against + /// the current generation count. The current generation count is incremented /// after every possibly writing memory operation, which ensures that we only - /// CSE loads with other loads that have no intervening store. - typedef RecyclingAllocator< - BumpPtrAllocator, - ScopedHashTableVal>> + /// CSE loads with other loads that have no intervening store. Ordering + /// events (such as fences or atomic instructions) increment the generation + /// count as well; essentially, we model these as writes to all possible + /// locations. Note that atomic and/or volatile loads and stores can be + /// present the table; it is the responsibility of the consumer to inspect + /// the atomicity/volatility if needed. + struct LoadValue { + Value *Data; + unsigned Generation; + int MatchingId; + bool IsAtomic; + LoadValue() + : Data(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {} + LoadValue(Value *Data, unsigned Generation, unsigned MatchingId, + bool IsAtomic) + : Data(Data), Generation(Generation), MatchingId(MatchingId), + IsAtomic(IsAtomic) {} + }; + typedef RecyclingAllocator> LoadMapAllocator; - typedef ScopedHashTable, - DenseMapInfo, LoadMapAllocator> LoadHTType; + typedef ScopedHashTable, + LoadMapAllocator> LoadHTType; LoadHTType AvailableLoads; /// \brief A scoped hash table of the current values of read-only call @@ -308,10 +325,9 @@ public: unsigned CurrentGeneration; /// \brief Set up the EarlyCSE runner for a particular function. - EarlyCSE(Function &F, const TargetLibraryInfo &TLI, - const TargetTransformInfo &TTI, DominatorTree &DT, - AssumptionCache &AC) - : F(F), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {} + EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI, + DominatorTree &DT, AssumptionCache &AC) + : TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {} bool run(); @@ -382,57 +398,91 @@ private: class ParseMemoryInst { public: ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI) - : Load(false), Store(false), Vol(false), MayReadFromMemory(false), - MayWriteToMemory(false), MatchingId(-1), Ptr(nullptr) { - MayReadFromMemory = Inst->mayReadFromMemory(); - MayWriteToMemory = Inst->mayWriteToMemory(); - if (IntrinsicInst *II = dyn_cast(Inst)) { - MemIntrinsicInfo Info; - if (!TTI.getTgtMemIntrinsic(II, Info)) - return; - if (Info.NumMemRefs == 1) { - Store = Info.WriteMem; - Load = Info.ReadMem; - MatchingId = Info.MatchingId; - MayReadFromMemory = Info.ReadMem; - MayWriteToMemory = Info.WriteMem; - Vol = Info.Vol; - Ptr = Info.PtrVal; - } - } else if (LoadInst *LI = dyn_cast(Inst)) { - Load = true; - Vol = !LI->isSimple(); - Ptr = LI->getPointerOperand(); - } else if (StoreInst *SI = dyn_cast(Inst)) { - Store = true; - Vol = !SI->isSimple(); - Ptr = SI->getPointerOperand(); + : IsTargetMemInst(false), Inst(Inst) { + if (IntrinsicInst *II = dyn_cast(Inst)) + if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1) + IsTargetMemInst = true; + } + bool isLoad() const { + if (IsTargetMemInst) return Info.ReadMem; + return isa(Inst); + } + bool isStore() const { + if (IsTargetMemInst) return Info.WriteMem; + return isa(Inst); + } + bool isAtomic() const { + if (IsTargetMemInst) { + assert(Info.IsSimple && "need to refine IsSimple in TTI"); + return false; } + return Inst->isAtomic(); } - bool isLoad() { return Load; } - bool isStore() { return Store; } - bool isVolatile() { return Vol; } - bool isMatchingMemLoc(const ParseMemoryInst &Inst) { - return Ptr == Inst.Ptr && MatchingId == Inst.MatchingId; + bool isUnordered() const { + if (IsTargetMemInst) { + assert(Info.IsSimple && "need to refine IsSimple in TTI"); + return true; + } + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->isUnordered(); + } else if (StoreInst *SI = dyn_cast(Inst)) { + return SI->isUnordered(); + } + // Conservative answer + return !Inst->isAtomic(); } - bool isValid() { return Ptr != nullptr; } - int getMatchingId() { return MatchingId; } - Value *getPtr() { return Ptr; } - bool mayReadFromMemory() { return MayReadFromMemory; } - bool mayWriteToMemory() { return MayWriteToMemory; } - private: - bool Load; - bool Store; - bool Vol; - bool MayReadFromMemory; - bool MayWriteToMemory; + bool isVolatile() const { + if (IsTargetMemInst) { + assert(Info.IsSimple && "need to refine IsSimple in TTI"); + return false; + } + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->isVolatile(); + } else if (StoreInst *SI = dyn_cast(Inst)) { + return SI->isVolatile(); + } + // Conservative answer + return true; + } + + + bool isMatchingMemLoc(const ParseMemoryInst &Inst) const { + return (getPointerOperand() == Inst.getPointerOperand() && + getMatchingId() == Inst.getMatchingId()); + } + bool isValid() const { return getPointerOperand() != nullptr; } + // For regular (non-intrinsic) loads/stores, this is set to -1. For // intrinsic loads/stores, the id is retrieved from the corresponding // field in the MemIntrinsicInfo structure. That field contains // non-negative values only. - int MatchingId; - Value *Ptr; + int getMatchingId() const { + if (IsTargetMemInst) return Info.MatchingId; + return -1; + } + Value *getPointerOperand() const { + if (IsTargetMemInst) return Info.PtrVal; + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->getPointerOperand(); + } else if (StoreInst *SI = dyn_cast(Inst)) { + return SI->getPointerOperand(); + } + return nullptr; + } + bool mayReadFromMemory() const { + if (IsTargetMemInst) return Info.ReadMem; + return Inst->mayReadFromMemory(); + } + bool mayWriteToMemory() const { + if (IsTargetMemInst) return Info.WriteMem; + return Inst->mayWriteToMemory(); + } + + private: + bool IsTargetMemInst; + MemIntrinsicInfo Info; + Instruction *Inst; }; bool processNode(DomTreeNode *Node); @@ -497,7 +547,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // See if any instructions in the block can be eliminated. If so, do it. If // not, add them to AvailableValues. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { - Instruction *Inst = I++; + Instruction *Inst = &*I++; // Dead instructions should just be removed. if (isInstructionTriviallyDead(Inst, &TLI)) { @@ -548,24 +598,26 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { ParseMemoryInst MemInst(Inst, TTI); // If this is a non-volatile load, process it. if (MemInst.isValid() && MemInst.isLoad()) { - // Ignore volatile loads. - if (MemInst.isVolatile()) { + // (conservatively) we can't peak past the ordering implied by this + // operation, but we can add this load to our set of available values + if (MemInst.isVolatile() || !MemInst.isUnordered()) { LastStore = nullptr; - // Don't CSE across synchronization boundaries. - if (Inst->mayWriteToMemory()) - ++CurrentGeneration; - continue; + ++CurrentGeneration; } // If we have an available version of this load, and if it is the right // generation, replace this instruction. - std::pair InVal = - AvailableLoads.lookup(MemInst.getPtr()); - if (InVal.first != nullptr && InVal.second == CurrentGeneration) { - Value *Op = getOrCreateResult(InVal.first, Inst->getType()); + LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); + if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration && + InVal.MatchingId == MemInst.getMatchingId() && + // We don't yet handle removing loads with ordering of any kind. + !MemInst.isVolatile() && MemInst.isUnordered() && + // We can't replace an atomic load with one which isn't also atomic. + InVal.IsAtomic >= MemInst.isAtomic()) { + Value *Op = getOrCreateResult(InVal.Data, Inst->getType()); if (Op != nullptr) { DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst - << " to: " << *InVal.first << '\n'); + << " to: " << *InVal.Data << '\n'); if (!Inst->use_empty()) Inst->replaceAllUsesWith(Op); Inst->eraseFromParent(); @@ -576,8 +628,10 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { } // Otherwise, remember that we have this instruction. - AvailableLoads.insert(MemInst.getPtr(), std::pair( - Inst, CurrentGeneration)); + AvailableLoads.insert( + MemInst.getPointerOperand(), + LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), + MemInst.isAtomic())); LastStore = nullptr; continue; } @@ -613,6 +667,44 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { continue; } + // A release fence requires that all stores complete before it, but does + // not prevent the reordering of following loads 'before' the fence. As a + // result, we don't need to consider it as writing to memory and don't need + // to advance the generation. We do need to prevent DSE across the fence, + // but that's handled above. + if (FenceInst *FI = dyn_cast(Inst)) + if (FI->getOrdering() == Release) { + assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above"); + continue; + } + + // write back DSE - If we write back the same value we just loaded from + // the same location and haven't passed any intervening writes or ordering + // operations, we can remove the write. The primary benefit is in allowing + // the available load table to remain valid and value forward past where + // the store originally was. + if (MemInst.isValid() && MemInst.isStore()) { + LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); + if (InVal.Data && + InVal.Data == getOrCreateResult(Inst, InVal.Data->getType()) && + InVal.Generation == CurrentGeneration && + InVal.MatchingId == MemInst.getMatchingId() && + // We don't yet handle removing stores with ordering of any kind. + !MemInst.isVolatile() && MemInst.isUnordered()) { + assert((!LastStore || + ParseMemoryInst(LastStore, TTI).getPointerOperand() == + MemInst.getPointerOperand()) && + "can't have an intervening store!"); + DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n'); + Inst->eraseFromParent(); + Changed = true; + ++NumDSE; + // We can avoid incrementing the generation count since we were able + // to eliminate this store. + continue; + } + } + // Okay, this isn't something we can CSE at all. Check to see if it is // something that could modify memory. If so, our available memory values // cannot be used so bump the generation count. @@ -622,8 +714,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (MemInst.isValid() && MemInst.isStore()) { // We do a trivial form of DSE if there are two stores to the same // location with no intervening loads. Delete the earlier store. + // At the moment, we don't remove ordered stores, but do remove + // unordered atomic stores. There's no special requirement (for + // unordered atomics) about removing atomic stores only in favor of + // other atomic stores since we we're going to execute the non-atomic + // one anyway and the atomic one might never have become visible. if (LastStore) { ParseMemoryInst LastStoreMemInst(LastStore, TTI); + assert(LastStoreMemInst.isUnordered() && + !LastStoreMemInst.isVolatile() && + "Violated invariant"); if (LastStoreMemInst.isMatchingMemLoc(MemInst)) { DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: " << *Inst << '\n'); @@ -640,12 +740,22 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // version of the pointer. It is safe to forward from volatile stores // to non-volatile loads, so we don't have to check for volatility of // the store. - AvailableLoads.insert(MemInst.getPtr(), std::pair( - Inst, CurrentGeneration)); + AvailableLoads.insert( + MemInst.getPointerOperand(), + LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), + MemInst.isAtomic())); - // Remember that this was the last store we saw for DSE. - if (!MemInst.isVolatile()) + // Remember that this was the last unordered store we saw for DSE. We + // don't yet handle DSE on ordered or volatile stores since we don't + // have a good way to model the ordering requirement for following + // passes once the store is removed. We could insert a fence, but + // since fences are slightly stronger than stores in their ordering, + // it's not clear this is a profitable transform. Another option would + // be to merge the ordering with that of the post dominating store. + if (MemInst.isUnordered() && !MemInst.isVolatile()) LastStore = Inst; + else + LastStore = nullptr; } } } @@ -714,7 +824,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F, auto &DT = AM->getResult(F); auto &AC = AM->getResult(F); - EarlyCSE CSE(F, TLI, TTI, DT, AC); + EarlyCSE CSE(TLI, TTI, DT, AC); if (!CSE.run()) return PreservedAnalyses::all(); @@ -751,7 +861,7 @@ public: auto &DT = getAnalysis().getDomTree(); auto &AC = getAnalysis().getAssumptionCache(F); - EarlyCSE CSE(F, TLI, TTI, DT, AC); + EarlyCSE CSE(TLI, TTI, DT, AC); return CSE.run(); } @@ -761,6 +871,7 @@ public: AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addPreserved(); AU.setPreservesCFG(); } }; diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp index 0430c1898c8d..185cdbdda378 100644 --- a/lib/Transforms/Scalar/FlattenCFGPass.cpp +++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp @@ -30,7 +30,7 @@ public: bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); + AU.addRequired(); } private: @@ -41,7 +41,7 @@ private: char FlattenCFGPass::ID = 0; INITIALIZE_PASS_BEGIN(FlattenCFGPass, "flattencfg", "Flatten the CFG", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(FlattenCFGPass, "flattencfg", "Flatten the CFG", false, false) @@ -59,7 +59,7 @@ static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) { // Loop over all of the basic blocks and remove them if they are unneeded... // for (Function::iterator BBIt = F.begin(); BBIt != F.end();) { - if (FlattenCFG(BBIt++, AA)) { + if (FlattenCFG(&*BBIt++, AA)) { LocalChange = true; } } @@ -69,7 +69,7 @@ static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) { } bool FlattenCFGPass::runOnFunction(Function &F) { - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); bool EverChanged = false; // iterativelyFlattenCFG can make some blocks dead. while (iterativelyFlattenCFG(F, AA)) { diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp index c9314229c38b..7f5d78656b50 100644 --- a/lib/Transforms/Scalar/Float2Int.cpp +++ b/lib/Transforms/Scalar/Float2Int.cpp @@ -19,6 +19,8 @@ #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/IRBuilder.h" @@ -41,7 +43,7 @@ using namespace llvm; // integer domain inputs, produce an integer output; fadd, for example. // // If a non-mappable instruction is seen, this entire def-use graph is marked -// as non-transformable. If we see an instruction that converts from the +// as non-transformable. If we see an instruction that converts from the // integer domain to FP domain (uitofp,sitofp), we terminate our walk. /// The largest integer type worth dealing with. @@ -60,6 +62,7 @@ namespace { bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addPreserved(); } void findRoots(Function &F, SmallPtrSet &Roots); @@ -82,7 +85,9 @@ namespace { } char Float2Int::ID = 0; -INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false) +INITIALIZE_PASS_BEGIN(Float2Int, "float2int", "Float to int", false, false) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_END(Float2Int, "float2int", "Float to int", false, false) // Given a FCmp predicate, return a matching ICmp predicate if one // exists, otherwise return BAD_ICMP_PREDICATE. @@ -125,7 +130,9 @@ static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) { // Find the roots - instructions that convert from the FP domain to // integer domain. void Float2Int::findRoots(Function &F, SmallPtrSet &Roots) { - for (auto &I : inst_range(F)) { + for (auto &I : instructions(F)) { + if (isa(I.getType())) + continue; switch (I.getOpcode()) { default: break; case Instruction::FPToUI: @@ -133,7 +140,7 @@ void Float2Int::findRoots(Function &F, SmallPtrSet &Roots) { Roots.insert(&I); break; case Instruction::FCmp: - if (mapFCmpPred(cast(&I)->getPredicate()) != + if (mapFCmpPred(cast(&I)->getPredicate()) != CmpInst::BAD_ICMP_PREDICATE) Roots.insert(&I); break; @@ -176,7 +183,7 @@ ConstantRange Float2Int::validateRange(ConstantRange R) { // - walkForwards: Iterate over SeenInsts in reverse order, so we visit // defs before their uses. Calculate the real range info. -// Breadth-first walk of the use-def graph; determine the set of nodes +// Breadth-first walk of the use-def graph; determine the set of nodes // we care about and eagerly determine if some of them are poisonous. void Float2Int::walkBackwards(const SmallPtrSetImpl &Roots) { std::deque Worklist(Roots.begin(), Roots.end()); @@ -222,14 +229,14 @@ void Float2Int::walkBackwards(const SmallPtrSetImpl &Roots) { seen(I, unknownRange()); break; } - + for (Value *O : I->operands()) { if (Instruction *OI = dyn_cast(O)) { // Unify def-use chains if they interfere. ECs.unionSets(I, OI); - if (SeenInsts.find(I)->second != badRange()) + if (SeenInsts.find(I)->second != badRange()) Worklist.push_back(OI); - } else if (!isa(O)) { + } else if (!isa(O)) { // Not an instruction or ConstantFP? we can't do anything. seen(I, badRange()); } @@ -240,11 +247,11 @@ void Float2Int::walkBackwards(const SmallPtrSetImpl &Roots) { // Walk forwards down the list of seen instructions, so we visit defs before // uses. void Float2Int::walkForwards() { - for (auto It = SeenInsts.rbegin(), E = SeenInsts.rend(); It != E; ++It) { - if (It->second != unknownRange()) + for (auto &It : make_range(SeenInsts.rbegin(), SeenInsts.rend())) { + if (It.second != unknownRange()) continue; - Instruction *I = It->first; + Instruction *I = It.first; std::function)> Op; switch (I->getOpcode()) { // FIXME: Handle select and phi nodes. @@ -299,7 +306,7 @@ void Float2Int::walkForwards() { for (Value *O : I->operands()) { if (Instruction *OI = dyn_cast(O)) { assert(SeenInsts.find(OI) != SeenInsts.end() && - "def not seen before use!"); + "def not seen before use!"); OpRanges.push_back(SeenInsts.find(OI)->second); } else if (ConstantFP *CF = dyn_cast(O)) { // Work out if the floating point number can be losslessly represented @@ -314,11 +321,11 @@ void Float2Int::walkForwards() { APFloat F = CF->getValueAPF(); // First, weed out obviously incorrect values. Non-finite numbers - // can't be represented and neither can negative zero, unless + // can't be represented and neither can negative zero, unless // we're in fast math mode. if (!F.isFinite() || (F.isZero() && F.isNegative() && isa(I) && - !I->hasNoSignedZeros())) { + !I->hasNoSignedZeros())) { seen(I, badRange()); Abort = true; break; @@ -345,7 +352,7 @@ void Float2Int::walkForwards() { // Reduce the operands' ranges to a single range and return. if (!Abort) - seen(I, Op(OpRanges)); + seen(I, Op(OpRanges)); } } @@ -395,7 +402,7 @@ bool Float2Int::validateAndTransform() { R.isFullSet() || R.isSignWrappedSet()) continue; assert(ConvertedToTy && "Must have set the convertedtoty by this point!"); - + // The number of bits required is the maximum of the upper and // lower limits, plus one so it can be signed. unsigned MinBW = std::max(R.getLower().getMinSignedBits(), @@ -505,9 +512,8 @@ Value *Float2Int::convert(Instruction *I, Type *ToTy) { // Perform dead code elimination on the instructions we just modified. void Float2Int::cleanup() { - for (auto I = ConvertedInsts.rbegin(), E = ConvertedInsts.rend(); - I != E; ++I) - I->first->eraseFromParent(); + for (auto &I : make_range(ConvertedInsts.rbegin(), ConvertedInsts.rend())) + I.first->eraseFromParent(); } bool Float2Int::runOnFunction(Function &F) { @@ -534,7 +540,4 @@ bool Float2Int::runOnFunction(Function &F) { return Modified; } -FunctionPass *llvm::createFloat2IntPass() { - return new Float2Int(); -} - +FunctionPass *llvm::createFloat2IntPass() { return new Float2Int(); } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 89a0d0af93be..a028b8c444ba 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -128,6 +129,7 @@ namespace { uint32_t lookup(Value *V) const; uint32_t lookup_or_add_cmp(unsigned Opcode, CmpInst::Predicate Pred, Value *LHS, Value *RHS); + bool exists(Value *V) const; void add(Value *V, uint32_t num); void clear(); void erase(Value *v); @@ -388,6 +390,9 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) { } } +/// Returns true if a value number exists for the specified value. +bool ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; } + /// lookup_or_add - Returns the value number for the specified value, assigning /// it a new number if it did not have one before. uint32_t ValueTable::lookup_or_add(Value *V) { @@ -608,6 +613,10 @@ namespace { DenseMap LeaderTable; BumpPtrAllocator TableAllocator; + // Block-local map of equivalent values to their leader, does not + // propagate to any successors. Entries added mid-block are applied + // to the remaining instructions in the block. + SmallMapVector ReplaceWithConstMap; SmallVector InstrsToErase; typedef SmallVector LoadDepVect; @@ -689,16 +698,17 @@ namespace { AU.addRequired(); if (!NoLoads) AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved(); } - // Helper fuctions of redundant load elimination + // Helper functions of redundant load elimination bool processLoad(LoadInst *L); bool processNonLocalLoad(LoadInst *L); + bool processAssumeIntrinsic(IntrinsicInst *II); void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, AvailValInBlkVect &ValuesPerBlock, UnavailBlkVect &UnavailableBlocks); @@ -719,7 +729,9 @@ namespace { void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ); - bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root); + bool replaceOperandsWithConsts(Instruction *I) const; + bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root, + bool DominatesByEdge); bool processFoldableCondBr(BranchInst *BI); void addDeadBlock(BasicBlock *BB); void assignValNumForDeadCode(); @@ -738,7 +750,8 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1290,8 +1303,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, SSAUpdater SSAUpdate(&NewPHIs); SSAUpdate.Initialize(LI->getType(), LI->getName()); - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { - const AvailableValueInBlock &AV = ValuesPerBlock[i]; + for (const AvailableValueInBlock &AV : ValuesPerBlock) { BasicBlock *BB = AV.BB; if (SSAUpdate.HasValueForBlock(BB)) @@ -1301,24 +1313,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, } // Perform PHI construction. - Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); - - // If new PHI nodes were created, notify alias analysis. - if (V->getType()->getScalarType()->isPointerTy()) { - AliasAnalysis *AA = gvn.getAliasAnalysis(); - - // Scan the new PHIs and inform alias analysis that we've added potentially - // escaping uses to any values that are operands to these PHIs. - for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { - PHINode *P = NewPHIs[i]; - for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) { - unsigned jj = PHINode::getOperandNumForIncomingValue(ii); - AA->addEscapingUse(P->getOperandUse(jj)); - } - } - } - - return V; + return SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); } Value *AvailableValueInBlock::MaterializeAdjustedValue(LoadInst *LI, @@ -1518,9 +1513,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // that we only have to insert *one* load (which means we're basically moving // the load, not inserting a new one). - SmallPtrSet Blockers; - for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) - Blockers.insert(UnavailableBlocks[i]); + SmallPtrSet Blockers(UnavailableBlocks.begin(), + UnavailableBlocks.end()); // Let's find the first basic block with more than one predecessor. Walk // backwards through predecessors if needed. @@ -1550,15 +1544,22 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // available. MapVector PredLoads; DenseMap FullyAvailableBlocks; - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; - for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) - FullyAvailableBlocks[UnavailableBlocks[i]] = false; + for (const AvailableValueInBlock &AV : ValuesPerBlock) + FullyAvailableBlocks[AV.BB] = true; + for (BasicBlock *UnavailableBB : UnavailableBlocks) + FullyAvailableBlocks[UnavailableBB] = false; SmallVector CriticalEdgePred; - for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); - PI != E; ++PI) { - BasicBlock *Pred = *PI; + for (BasicBlock *Pred : predecessors(LoadBB)) { + // If any predecessor block is an EH pad that does not allow non-PHI + // instructions before the terminator, we can't PRE the load. + if (Pred->getTerminator()->isEHPad()) { + DEBUG(dbgs() + << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '" + << Pred->getName() << "': " << *LI << '\n'); + return false; + } + if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) { continue; } @@ -1570,9 +1571,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, return false; } - if (LoadBB->isLandingPad()) { + if (LoadBB->isEHPad()) { DEBUG(dbgs() - << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '" + << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '" << Pred->getName() << "': " << *LI << '\n'); return false; } @@ -1655,12 +1656,12 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, << *NewInsts.back() << '\n'); // Assign value numbers to the new instructions. - for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { + for (Instruction *I : NewInsts) { // FIXME: We really _ought_ to insert these value numbers into their // parent's availability map. However, in doing so, we risk getting into // ordering issues. If a block hasn't been processed yet, we would be // marking a value as AVAIL-IN, which isn't what we intend. - VN.lookup_or_add(NewInsts[i]); + VN.lookup_or_add(I); } for (const auto &PredLoad : PredLoads) { @@ -1677,6 +1678,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (Tags) NewLoad->setAAMetadata(Tags); + if (auto *MD = LI->getMetadata(LLVMContext::MD_invariant_load)) + NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD); + if (auto *InvGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group)) + NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD); + // Transfer DebugLoc. NewLoad->setDebugLoc(LI->getDebugLoc()); @@ -1704,6 +1710,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, /// Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. bool GVN::processNonLocalLoad(LoadInst *LI) { + // non-local speculations are not allowed under asan. + if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress)) + return false; + // Step 1: Find the non-local dependencies of the load. LoadDepVect Deps; MD->getNonLocalPointerDependency(LI, Deps); @@ -1777,6 +1787,63 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks); } +bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { + assert(IntrinsicI->getIntrinsicID() == Intrinsic::assume && + "This function can only be called with llvm.assume intrinsic"); + Value *V = IntrinsicI->getArgOperand(0); + + if (ConstantInt *Cond = dyn_cast(V)) { + if (Cond->isZero()) { + Type *Int8Ty = Type::getInt8Ty(V->getContext()); + // Insert a new store to null instruction before the load to indicate that + // this code is not reachable. FIXME: We could insert unreachable + // instruction directly because we can modify the CFG. + new StoreInst(UndefValue::get(Int8Ty), + Constant::getNullValue(Int8Ty->getPointerTo()), + IntrinsicI); + } + markInstructionForDeletion(IntrinsicI); + return false; + } + + Constant *True = ConstantInt::getTrue(V->getContext()); + bool Changed = false; + + for (BasicBlock *Successor : successors(IntrinsicI->getParent())) { + BasicBlockEdge Edge(IntrinsicI->getParent(), Successor); + + // This property is only true in dominated successors, propagateEquality + // will check dominance for us. + Changed |= propagateEquality(V, True, Edge, false); + } + + // We can replace assume value with true, which covers cases like this: + // call void @llvm.assume(i1 %cmp) + // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true + ReplaceWithConstMap[V] = True; + + // If one of *cmp *eq operand is const, adding it to map will cover this: + // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen + // call void @llvm.assume(i1 %cmp) + // ret float %0 ; will change it to ret float 3.000000e+00 + if (auto *CmpI = dyn_cast(V)) { + if (CmpI->getPredicate() == CmpInst::Predicate::ICMP_EQ || + CmpI->getPredicate() == CmpInst::Predicate::FCMP_OEQ || + (CmpI->getPredicate() == CmpInst::Predicate::FCMP_UEQ && + CmpI->getFastMathFlags().noNaNs())) { + Value *CmpLHS = CmpI->getOperand(0); + Value *CmpRHS = CmpI->getOperand(1); + if (isa(CmpLHS)) + std::swap(CmpLHS, CmpRHS); + auto *RHSConst = dyn_cast(CmpRHS); + + // If only one operand is constant. + if (RHSConst != nullptr && !isa(CmpLHS)) + ReplaceWithConstMap[CmpLHS] = RHSConst; + } + } + return Changed; +} static void patchReplacementInstruction(Instruction *I, Value *Repl) { // Patch the replacement so that it is not more restrictive than the value @@ -1789,7 +1856,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) { if (Instruction *ReplInst = dyn_cast(Repl)) { // FIXME: If both the original and replacement value are part of the // same control-flow region (meaning that the execution of one - // guarentees the executation of the other), then we can combine the + // guarantees the execution of the other), then we can combine the // noalias scopes here and do better than the general conservative // answer used in combineMetadata(). @@ -1797,13 +1864,10 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) { // regions, and so we need a conservative combination of the noalias // scopes. static const unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, - LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, - LLVMContext::MD_range, - LLVMContext::MD_fpmath, - LLVMContext::MD_invariant_load, - }; + LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_range, + LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, + LLVMContext::MD_invariant_group}; combineMetadata(ReplInst, I, KnownIDs); } } @@ -1890,10 +1954,8 @@ bool GVN::processLoad(LoadInst *L) { ++NumGVNLoad; return true; } - } - // If the value isn't available, don't do anything! - if (Dep.isClobber()) { + // If the value isn't available, don't do anything! DEBUG( // fast print dep, using operator<< on instruction is too slow. dbgs() << "GVN: load "; @@ -2049,11 +2111,31 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, return Pred != nullptr; } +// Tries to replace instruction with const, using information from +// ReplaceWithConstMap. +bool GVN::replaceOperandsWithConsts(Instruction *Instr) const { + bool Changed = false; + for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) { + Value *Operand = Instr->getOperand(OpNum); + auto it = ReplaceWithConstMap.find(Operand); + if (it != ReplaceWithConstMap.end()) { + assert(!isa(Operand) && + "Replacing constants with constants is invalid"); + DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " << *it->second + << " in instruction " << *Instr << '\n'); + Instr->setOperand(OpNum, it->second); + Changed = true; + } + } + return Changed; +} + /// The given values are known to be equal in every block /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with /// 'RHS' everywhere in the scope. Returns whether a change was made. -bool GVN::propagateEquality(Value *LHS, Value *RHS, - const BasicBlockEdge &Root) { +/// If DominatesByEdge is false, then it means that it is dominated by Root.End. +bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root, + bool DominatesByEdge) { SmallVector, 4> Worklist; Worklist.push_back(std::make_pair(LHS, RHS)); bool Changed = false; @@ -2065,11 +2147,13 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, std::pair Item = Worklist.pop_back_val(); LHS = Item.first; RHS = Item.second; - if (LHS == RHS) continue; + if (LHS == RHS) + continue; assert(LHS->getType() == RHS->getType() && "Equality but unequal types!"); // Don't try to propagate equalities between constants. - if (isa(LHS) && isa(RHS)) continue; + if (isa(LHS) && isa(RHS)) + continue; // Prefer a constant on the right-hand side, or an Argument if no constants. if (isa(LHS) || (isa(LHS) && !isa(RHS))) @@ -2108,7 +2192,11 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, // LHS always has at least one use that is not dominated by Root, this will // never do anything if LHS has only one use. if (!LHS->hasOneUse()) { - unsigned NumReplacements = replaceDominatedUsesWith(LHS, RHS, *DT, Root); + unsigned NumReplacements = + DominatesByEdge + ? replaceDominatedUsesWith(LHS, RHS, *DT, Root) + : replaceDominatedUsesWith(LHS, RHS, *DT, Root.getEnd()); + Changed |= NumReplacements > 0; NumGVNEqProp += NumReplacements; } @@ -2180,7 +2268,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, Value *NotCmp = findLeader(Root.getEnd(), Num); if (NotCmp && isa(NotCmp)) { unsigned NumReplacements = - replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root); + DominatesByEdge + ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root) + : replaceDominatedUsesWith(NotCmp, NotVal, *DT, + Root.getEnd()); Changed |= NumReplacements > 0; NumGVNEqProp += NumReplacements; } @@ -2220,6 +2311,10 @@ bool GVN::processInstruction(Instruction *I) { return true; } + if (IntrinsicInst *IntrinsicI = dyn_cast(I)) + if (IntrinsicI->getIntrinsicID() == Intrinsic::assume) + return processAssumeIntrinsic(IntrinsicI); + if (LoadInst *LI = dyn_cast(I)) { if (processLoad(LI)) return true; @@ -2250,11 +2345,11 @@ bool GVN::processInstruction(Instruction *I) { Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext()); BasicBlockEdge TrueE(Parent, TrueSucc); - Changed |= propagateEquality(BranchCond, TrueVal, TrueE); + Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true); Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext()); BasicBlockEdge FalseE(Parent, FalseSucc); - Changed |= propagateEquality(BranchCond, FalseVal, FalseE); + Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true); return Changed; } @@ -2276,7 +2371,7 @@ bool GVN::processInstruction(Instruction *I) { // If there is only a single edge, propagate the case value into it. if (SwitchEdges.lookup(Dst) == 1) { BasicBlockEdge E(Parent, Dst); - Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E); + Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E, true); } } return Changed; @@ -2284,7 +2379,8 @@ bool GVN::processInstruction(Instruction *I) { // Instructions with void type don't return a value, so there's // no point in trying to find redundancies in them. - if (I->getType()->isVoidTy()) return false; + if (I->getType()->isVoidTy()) + return false; uint32_t NextNum = VN.getNextUnusedValueNumber(); unsigned Num = VN.lookup_or_add(I); @@ -2306,17 +2402,21 @@ bool GVN::processInstruction(Instruction *I) { // Perform fast-path value-number based elimination of values inherited from // dominators. - Value *repl = findLeader(I->getParent(), Num); - if (!repl) { + Value *Repl = findLeader(I->getParent(), Num); + if (!Repl) { // Failure, just remember this instance for future use. addToLeaderTable(Num, I, I->getParent()); return false; + } else if (Repl == I) { + // If I was the result of a shortcut PRE, it might already be in the table + // and the best replacement for itself. Nothing to do. + return false; } // Remove it! - patchAndReplaceAllUsesWith(I, repl); - if (MD && repl->getType()->getScalarType()->isPointerTy()) - MD->invalidateCachedPointerInfo(repl); + patchAndReplaceAllUsesWith(I, Repl); + if (MD && Repl->getType()->getScalarType()->isPointerTy()) + MD->invalidateCachedPointerInfo(Repl); markInstructionForDeletion(I); return true; } @@ -2331,7 +2431,7 @@ bool GVN::runOnFunction(Function& F) { DT = &getAnalysis().getDomTree(); AC = &getAnalysis().getAssumptionCache(F); TLI = &getAnalysis().getTLI(); - VN.setAliasAnalysis(&getAnalysis()); + VN.setAliasAnalysis(&getAnalysis().getAAResults()); VN.setMemDep(MD); VN.setDomTree(DT); @@ -2341,10 +2441,10 @@ bool GVN::runOnFunction(Function& F) { // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { - BasicBlock *BB = FI++; + BasicBlock *BB = &*FI++; - bool removedBlock = MergeBlockIntoPredecessor( - BB, DT, /* LoopInfo */ nullptr, VN.getAliasAnalysis(), MD); + bool removedBlock = + MergeBlockIntoPredecessor(BB, DT, /* LoopInfo */ nullptr, MD); if (removedBlock) ++NumGVNBlocks; Changed |= removedBlock; @@ -2382,7 +2482,6 @@ bool GVN::runOnFunction(Function& F) { return Changed; } - bool GVN::processBlock(BasicBlock *BB) { // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function // (and incrementing BI before processing an instruction). @@ -2391,11 +2490,16 @@ bool GVN::processBlock(BasicBlock *BB) { if (DeadBlocks.count(BB)) return false; + // Clearing map before every BB because it can be used only for single BB. + ReplaceWithConstMap.clear(); bool ChangedFunction = false; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - ChangedFunction |= processInstruction(BI); + if (!ReplaceWithConstMap.empty()) + ChangedFunction |= replaceOperandsWithConsts(&*BI); + ChangedFunction |= processInstruction(&*BI); + if (InstrsToErase.empty()) { ++BI; continue; @@ -2439,7 +2543,14 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, Value *Op = Instr->getOperand(i); if (isa(Op) || isa(Op) || isa(Op)) continue; - + // This could be a newly inserted instruction, in which case, we won't + // find a value number, and should give up before we hurt ourselves. + // FIXME: Rewrite the infrastructure to let it easier to value number + // and process newly inserted instructions. + if (!VN.exists(Op)) { + success = false; + break; + } if (Value *V = findLeader(Pred, VN.lookup(Op))) { Instr->setOperand(i, V); } else { @@ -2499,9 +2610,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) { BasicBlock *CurrentBlock = CurInst->getParent(); predMap.clear(); - for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); - PI != PE; ++PI) { - BasicBlock *P = *PI; + for (BasicBlock *P : predecessors(CurrentBlock)) { // We're not interested in PRE where the block is its // own predecessor, or in blocks with predecessors // that are not reachable. @@ -2570,7 +2679,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) { // Create a PHI to make the value available in this block. PHINode *Phi = PHINode::Create(CurInst->getType(), predMap.size(), - CurInst->getName() + ".pre-phi", CurrentBlock->begin()); + CurInst->getName() + ".pre-phi", &CurrentBlock->front()); for (unsigned i = 0, e = predMap.size(); i != e; ++i) { if (Value *V = predMap[i].first) Phi->addIncoming(V, predMap[i].second); @@ -2582,18 +2691,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) { addToLeaderTable(ValNo, Phi, CurrentBlock); Phi->setDebugLoc(CurInst->getDebugLoc()); CurInst->replaceAllUsesWith(Phi); - if (Phi->getType()->getScalarType()->isPointerTy()) { - // Because we have added a PHI-use of the pointer value, it has now - // "escaped" from alias analysis' perspective. We need to inform - // AA of this. - for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) { - unsigned jj = PHINode::getOperandNumForIncomingValue(ii); - VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); - } - - if (MD) - MD->invalidateCachedPointerInfo(Phi); - } + if (MD && Phi->getType()->getScalarType()->isPointerTy()) + MD->invalidateCachedPointerInfo(Phi); VN.erase(CurInst); removeFromLeaderTable(ValNo, CurInst, CurrentBlock); @@ -2616,15 +2715,15 @@ bool GVN::performPRE(Function &F) { if (CurrentBlock == &F.getEntryBlock()) continue; - // Don't perform PRE on a landing pad. - if (CurrentBlock->isLandingPad()) + // Don't perform PRE on an EH pad. + if (CurrentBlock->isEHPad()) continue; for (BasicBlock::iterator BI = CurrentBlock->begin(), BE = CurrentBlock->end(); BI != BE;) { - Instruction *CurInst = BI++; - Changed = performScalarPRE(CurInst); + Instruction *CurInst = &*BI++; + Changed |= performScalarPRE(CurInst); } } @@ -2637,8 +2736,8 @@ bool GVN::performPRE(Function &F) { /// Split the critical edge connecting the given two blocks, and return /// the block inserted to the critical edge. BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { - BasicBlock *BB = SplitCriticalEdge( - Pred, Succ, CriticalEdgeSplittingOptions(getAliasAnalysis(), DT)); + BasicBlock *BB = + SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT)); if (MD) MD->invalidateCachedPredecessors(); return BB; @@ -2652,7 +2751,7 @@ bool GVN::splitCriticalEdges() { do { std::pair Edge = toSplit.pop_back_val(); SplitCriticalEdge(Edge.first, Edge.second, - CriticalEdgeSplittingOptions(getAliasAnalysis(), DT)); + CriticalEdgeSplittingOptions(DT)); } while (!toSplit.empty()); if (MD) MD->invalidateCachedPredecessors(); return true; @@ -2728,17 +2827,14 @@ void GVN::addDeadBlock(BasicBlock *BB) { DeadBlocks.insert(Dom.begin(), Dom.end()); // Figure out the dominance-frontier(D). - for (SmallVectorImpl::iterator I = Dom.begin(), - E = Dom.end(); I != E; I++) { - BasicBlock *B = *I; - for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) { - BasicBlock *S = *SI; + for (BasicBlock *B : Dom) { + for (BasicBlock *S : successors(B)) { if (DeadBlocks.count(S)) continue; bool AllPredDead = true; - for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++) - if (!DeadBlocks.count(*PI)) { + for (BasicBlock *P : predecessors(S)) + if (!DeadBlocks.count(P)) { AllPredDead = false; break; } @@ -2766,10 +2862,7 @@ void GVN::addDeadBlock(BasicBlock *BB) { continue; SmallVector Preds(pred_begin(B), pred_end(B)); - for (SmallVectorImpl::iterator PI = Preds.begin(), - PE = Preds.end(); PI != PE; PI++) { - BasicBlock *P = *PI; - + for (BasicBlock *P : Preds) { if (!DeadBlocks.count(P)) continue; @@ -2794,7 +2887,7 @@ void GVN::addDeadBlock(BasicBlock *BB) { // R be the target of the dead out-coming edge. // 1) Identify the set of dead blocks implied by the branch's dead outcoming // edge. The result of this step will be {X| X is dominated by R} -// 2) Identify those blocks which haves at least one dead prodecessor. The +// 2) Identify those blocks which haves at least one dead predecessor. The // result of this step will be dominance-frontier(R). // 3) Update the PHIs in DF(R) by replacing the operands corresponding to // dead blocks with "UndefVal" in an hope these PHIs will optimized away. @@ -2829,14 +2922,10 @@ bool GVN::processFoldableCondBr(BranchInst *BI) { // instructions, it makes more sense just to "fabricate" a val-number for the // dead code than checking if instruction involved is dead or not. void GVN::assignValNumForDeadCode() { - for (SetVector::iterator I = DeadBlocks.begin(), - E = DeadBlocks.end(); I != E; I++) { - BasicBlock *BB = *I; - for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); - II != EE; II++) { - Instruction *Inst = &*II; - unsigned ValNum = VN.lookup_or_add(Inst); - addToLeaderTable(ValNum, Inst, BB); + for (BasicBlock *BB : DeadBlocks) { + for (Instruction &Inst : *BB) { + unsigned ValNum = VN.lookup_or_add(&Inst); + addToLeaderTable(ValNum, &Inst, BB); } } } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 2a954d9961f2..ec5e15f0b8f8 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -28,9 +28,11 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" @@ -48,6 +50,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; @@ -83,64 +86,62 @@ static cl::opt ReplaceExitValue( namespace { struct RewritePhi; -} -namespace { - class IndVarSimplify : public LoopPass { - LoopInfo *LI; - ScalarEvolution *SE; - DominatorTree *DT; - TargetLibraryInfo *TLI; - const TargetTransformInfo *TTI; +class IndVarSimplify : public LoopPass { + LoopInfo *LI; + ScalarEvolution *SE; + DominatorTree *DT; + TargetLibraryInfo *TLI; + const TargetTransformInfo *TTI; - SmallVector DeadInsts; - bool Changed; - public: + SmallVector DeadInsts; + bool Changed; +public: - static char ID; // Pass identification, replacement for typeid - IndVarSimplify() - : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) { - initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); - } + static char ID; // Pass identification, replacement for typeid + IndVarSimplify() + : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) { + initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); + } - bool runOnLoop(Loop *L, LPPassManager &LPM) override; + bool runOnLoop(Loop *L, LPPassManager &LPM) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequiredID(LoopSimplifyID); - AU.addRequiredID(LCSSAID); - AU.addPreserved(); - AU.addPreservedID(LoopSimplifyID); - AU.addPreservedID(LCSSAID); - AU.setPreservesCFG(); - } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreservedID(LoopSimplifyID); + AU.addPreservedID(LCSSAID); + AU.setPreservesCFG(); + } - private: - void releaseMemory() override { - DeadInsts.clear(); - } +private: + void releaseMemory() override { + DeadInsts.clear(); + } - bool isValidRewrite(Value *FromVal, Value *ToVal); + bool isValidRewrite(Value *FromVal, Value *ToVal); - void HandleFloatingPointIV(Loop *L, PHINode *PH); - void RewriteNonIntegerIVs(Loop *L); + void handleFloatingPointIV(Loop *L, PHINode *PH); + void rewriteNonIntegerIVs(Loop *L); - void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM); + void simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); - bool CanLoopBeDeleted(Loop *L, SmallVector &RewritePhiSet); - void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); + bool canLoopBeDeleted(Loop *L, SmallVector &RewritePhiSet); + void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); - Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, - PHINode *IndVar, SCEVExpander &Rewriter); + Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, + PHINode *IndVar, SCEVExpander &Rewriter); - void SinkUnusedInvariants(Loop *L); + void sinkUnusedInvariants(Loop *L); - Value *ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L, - Instruction *InsertPt, Type *Ty, - bool &IsHighCostExpansion); - }; + Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L, + Instruction *InsertPt, Type *Ty); +}; } char IndVarSimplify::ID = 0; @@ -148,7 +149,7 @@ INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars", "Induction Variable Simplification", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_END(IndVarSimplify, "indvars", @@ -158,10 +159,10 @@ Pass *llvm::createIndVarSimplifyPass() { return new IndVarSimplify(); } -/// isValidRewrite - Return true if the SCEV expansion generated by the -/// rewriter can replace the original value. SCEV guarantees that it -/// produces the same value, but the way it is produced may be illegal IR. -/// Ideally, this function will only be called for verification. +/// Return true if the SCEV expansion generated by the rewriter can replace the +/// original value. SCEV guarantees that it produces the same value, but the way +/// it is produced may be illegal IR. Ideally, this function will only be +/// called for verification. bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { // If an SCEV expression subsumed multiple pointers, its expansion could // reassociate the GEP changing the base pointer. This is illegal because the @@ -175,10 +176,10 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { // because it understands lcssa phis while SCEV does not. Value *FromPtr = FromVal; Value *ToPtr = ToVal; - if (GEPOperator *GEP = dyn_cast(FromVal)) { + if (auto *GEP = dyn_cast(FromVal)) { FromPtr = GEP->getPointerOperand(); } - if (GEPOperator *GEP = dyn_cast(ToVal)) { + if (auto *GEP = dyn_cast(ToVal)) { ToPtr = GEP->getPointerOperand(); } if (FromPtr != FromVal || ToPtr != ToVal) { @@ -215,7 +216,7 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { /// loop. For PHI nodes, there may be multiple uses, so compute the nearest /// common dominator for the incoming blocks. static Instruction *getInsertPointForUses(Instruction *User, Value *Def, - DominatorTree *DT) { + DominatorTree *DT, LoopInfo *LI) { PHINode *PHI = dyn_cast(User); if (!PHI) return User; @@ -234,17 +235,28 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def, InsertPt = InsertBB->getTerminator(); } assert(InsertPt && "Missing phi operand"); - assert((!isa(Def) || - DT->dominates(cast(Def), InsertPt)) && - "def does not dominate all uses"); - return InsertPt; + + auto *DefI = dyn_cast(Def); + if (!DefI) + return InsertPt; + + assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); + + auto *L = LI->getLoopFor(DefI->getParent()); + assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); + + for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) + if (LI->getLoopFor(DTN->getBlock()) == L) + return DTN->getBlock()->getTerminator(); + + llvm_unreachable("DefI dominates InsertPt!"); } //===----------------------------------------------------------------------===// -// RewriteNonIntegerIVs and helpers. Prefer integer IVs. +// rewriteNonIntegerIVs and helpers. Prefer integer IVs. //===----------------------------------------------------------------------===// -/// ConvertToSInt - Convert APF to an integer, if possible. +/// Convert APF to an integer, if possible. static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { bool isExact = false; // See if we can convert this to an int64_t @@ -256,8 +268,8 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { return true; } -/// HandleFloatingPointIV - If the loop has floating induction variable -/// then insert corresponding integer induction variable if possible. +/// If the loop has floating induction variable then insert corresponding +/// integer induction variable if possible. /// For example, /// for(double i = 0; i < 10000; ++i) /// bar(i) @@ -265,13 +277,12 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { /// for(int i = 0; i < 10000; ++i) /// bar((double)i); /// -void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { +void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); unsigned BackEdge = IncomingEdge^1; // Check incoming value. - ConstantFP *InitValueVal = - dyn_cast(PN->getIncomingValue(IncomingEdge)); + auto *InitValueVal = dyn_cast(PN->getIncomingValue(IncomingEdge)); int64_t InitValue; if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) @@ -279,8 +290,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // Check IV increment. Reject this PN if increment operation is not // an add or increment value can not be represented by an integer. - BinaryOperator *Incr = - dyn_cast(PN->getIncomingValue(BackEdge)); + auto *Incr = dyn_cast(PN->getIncomingValue(BackEdge)); if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return; // If this is not an add of the PHI with a constantfp, or if the constant fp @@ -456,14 +466,14 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // platforms. if (WeakPH) { Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", - PN->getParent()->getFirstInsertionPt()); + &*PN->getParent()->getFirstInsertionPt()); PN->replaceAllUsesWith(Conv); RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); } Changed = true; } -void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { +void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { // First step. Check to see if there are any floating-point recurrences. // If there are, change them into integer recurrences, permitting analysis by // the SCEV routines. @@ -477,7 +487,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { for (unsigned i = 0, e = PHIs.size(); i != e; ++i) if (PHINode *PN = dyn_cast_or_null(&*PHIs[i])) - HandleFloatingPointIV(L, PN); + handleFloatingPointIV(L, PN); // If the loop previously had floating-point IV, ScalarEvolution // may not have been able to compute a trip count. Now that we've done some @@ -488,7 +498,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { namespace { // Collect information about PHI nodes which can be transformed in -// RewriteLoopExitValues. +// rewriteLoopExitValues. struct RewritePhi { PHINode *PN; unsigned Ith; // Ith incoming value. @@ -501,70 +511,37 @@ struct RewritePhi { }; } -Value *IndVarSimplify::ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, +Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L, Instruction *InsertPt, - Type *ResultTy, - bool &IsHighCostExpansion) { - using namespace llvm::PatternMatch; - - if (!Rewriter.isHighCostExpansion(S, L)) { - IsHighCostExpansion = false; - return Rewriter.expandCodeFor(S, ResultTy, InsertPt); - } - + Type *ResultTy) { // Before expanding S into an expensive LLVM expression, see if we can use an - // already existing value as the expansion for S. There is potential to make - // this significantly smarter, but this simple heuristic already gets some - // interesting cases. - - SmallVector Latches; - L->getLoopLatches(Latches); - - for (BasicBlock *BB : Latches) { - ICmpInst::Predicate Pred; - Instruction *LHS, *RHS; - BasicBlock *TrueBB, *FalseBB; - - if (!match(BB->getTerminator(), - m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), - TrueBB, FalseBB))) - continue; - - if (SE->getSCEV(LHS) == S && DT->dominates(LHS, InsertPt)) { - IsHighCostExpansion = false; - return LHS; - } - - if (SE->getSCEV(RHS) == S && DT->dominates(RHS, InsertPt)) { - IsHighCostExpansion = false; - return RHS; - } - } + // already existing value as the expansion for S. + if (Value *ExistingValue = Rewriter.findExistingExpansion(S, InsertPt, L)) + if (ExistingValue->getType() == ResultTy) + return ExistingValue; // We didn't find anything, fall back to using SCEVExpander. - assert(Rewriter.isHighCostExpansion(S, L) && "this should not have changed!"); - IsHighCostExpansion = true; return Rewriter.expandCodeFor(S, ResultTy, InsertPt); } //===----------------------------------------------------------------------===// -// RewriteLoopExitValues - Optimize IV users outside the loop. +// rewriteLoopExitValues - Optimize IV users outside the loop. // As a side effect, reduces the amount of IV processing within the loop. //===----------------------------------------------------------------------===// -/// RewriteLoopExitValues - Check to see if this loop has a computable -/// loop-invariant execution count. If so, this means that we can compute the -/// final value of any expressions that are recurrent in the loop, and -/// substitute the exit values from the loop into any instructions outside of -/// the loop that use the final values of the current expressions. +/// Check to see if this loop has a computable loop-invariant execution count. +/// If so, this means that we can compute the final value of any expressions +/// that are recurrent in the loop, and substitute the exit values from the loop +/// into any instructions outside of the loop that use the final values of the +/// current expressions. /// /// This is mostly redundant with the regular IndVarSimplify activities that /// happen later, except that it's more powerful in some cases, because it's /// able to brute-force evaluate arbitrary instructions as long as they have /// constant operands at the beginning of the loop. -void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { - // Verify the input to the pass in already in LCSSA form. - assert(L->isLCSSAForm(*DT)); +void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { + // Check a pre-condition. + assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!"); SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); @@ -679,9 +656,9 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { continue; } - bool HighCost = false; - Value *ExitVal = ExpandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, - PN->getType(), HighCost); + bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); + Value *ExitVal = + expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType()); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' << " LoopVal = " << *Inst << "\n"); @@ -698,7 +675,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { } } - bool LoopCanBeDel = CanLoopBeDeleted(L, RewritePhiSet); + bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); // Transformation. for (const RewritePhi &Phi : RewritePhiSet) { @@ -735,10 +712,10 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { Rewriter.clearInsertPoint(); } -/// CanLoopBeDeleted - Check whether it is possible to delete the loop after -/// rewriting exit value. If it is possible, ignore ReplaceExitValue and -/// do rewriting aggressively. -bool IndVarSimplify::CanLoopBeDeleted( +/// Check whether it is possible to delete the loop after rewriting exit +/// value. If it is possible, ignore ReplaceExitValue and do rewriting +/// aggressively. +bool IndVarSimplify::canLoopBeDeleted( Loop *L, SmallVector &RewritePhiSet) { BasicBlock *Preheader = L->getLoopPreheader(); @@ -782,14 +759,9 @@ bool IndVarSimplify::CanLoopBeDeleted( ++BI; } - for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); - LI != LE; ++LI) { - for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); BI != BE; - ++BI) { - if (BI->mayHaveSideEffects()) - return false; - } - } + for (auto *BB : L->blocks()) + if (any_of(*BB, [](Instruction &I) { return I.mayHaveSideEffects(); })) + return false; return true; } @@ -799,22 +771,19 @@ bool IndVarSimplify::CanLoopBeDeleted( //===----------------------------------------------------------------------===// namespace { - // Collect information about induction variables that are used by sign/zero - // extend operations. This information is recorded by CollectExtend and - // provides the input to WidenIV. - struct WideIVInfo { - PHINode *NarrowIV; - Type *WidestNativeType; // Widest integer type created [sz]ext - bool IsSigned; // Was a sext user seen before a zext? - - WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr), - IsSigned(false) {} - }; +// Collect information about induction variables that are used by sign/zero +// extend operations. This information is recorded by CollectExtend and provides +// the input to WidenIV. +struct WideIVInfo { + PHINode *NarrowIV = nullptr; + Type *WidestNativeType = nullptr; // Widest integer type created [sz]ext + bool IsSigned = false; // Was a sext user seen before a zext? +}; } -/// visitCast - Update information about the induction variable that is -/// extended by this sign or zero extend operation. This is used to determine -/// the final width of the IV before actually widening it. +/// Update information about the induction variable that is extended by this +/// sign or zero extend operation. This is used to determine the final width of +/// the IV before actually widening it. static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, const TargetTransformInfo *TTI) { bool IsSigned = Cast->getOpcode() == Instruction::SExt; @@ -855,24 +824,29 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, namespace { -/// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the -/// WideIV that computes the same value as the Narrow IV def. This avoids -/// caching Use* pointers. +/// Record a link in the Narrow IV def-use chain along with the WideIV that +/// computes the same value as the Narrow IV def. This avoids caching Use* +/// pointers. struct NarrowIVDefUse { - Instruction *NarrowDef; - Instruction *NarrowUse; - Instruction *WideDef; + Instruction *NarrowDef = nullptr; + Instruction *NarrowUse = nullptr; + Instruction *WideDef = nullptr; - NarrowIVDefUse(): NarrowDef(nullptr), NarrowUse(nullptr), WideDef(nullptr) {} + // True if the narrow def is never negative. Tracking this information lets + // us use a sign extension instead of a zero extension or vice versa, when + // profitable and legal. + bool NeverNegative = false; - NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD): - NarrowDef(ND), NarrowUse(NU), WideDef(WD) {} + NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, + bool NeverNegative) + : NarrowDef(ND), NarrowUse(NU), WideDef(WD), + NeverNegative(NeverNegative) {} }; -/// WidenIV - The goal of this transform is to remove sign and zero extends -/// without creating any new induction variables. To do this, it creates a new -/// phi of the wider type and redirects all users, either removing extends or -/// inserting truncs whenever we stop propagating the type. +/// The goal of this transform is to remove sign and zero extends without +/// creating any new induction variables. To do this, it creates a new phi of +/// the wider type and redirects all users, either removing extends or inserting +/// truncs whenever we stop propagating the type. /// class WidenIV { // Parameters @@ -913,32 +887,35 @@ public: assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); } - PHINode *CreateWideIV(SCEVExpander &Rewriter); + PHINode *createWideIV(SCEVExpander &Rewriter); protected: - Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned, - Instruction *Use); + Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, + Instruction *Use); - Instruction *CloneIVUser(NarrowIVDefUse DU); + Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); + Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, + const SCEVAddRecExpr *WideAR); + Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); - const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse); + const SCEVAddRecExpr *getWideRecurrence(Instruction *NarrowUse); - const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU); + const SCEVAddRecExpr* getExtendedOperandRecurrence(NarrowIVDefUse DU); - const SCEV *GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, + const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, unsigned OpCode) const; - Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); + Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); - bool WidenLoopCompare(NarrowIVDefUse DU); + bool widenLoopCompare(NarrowIVDefUse DU); void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); }; } // anonymous namespace -/// isLoopInvariant - Perform a quick domtree based check for loop invariance -/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems -/// gratuitous for this purpose. +/// Perform a quick domtree based check for loop invariance assuming that V is +/// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this +/// purpose. static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) { Instruction *Inst = dyn_cast(V); if (!Inst) @@ -947,8 +924,8 @@ static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) { return DT->properlyDominates(Inst->getParent(), L->getHeader()); } -Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned, - Instruction *Use) { +Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, + bool IsSigned, Instruction *Use) { // Set the debug location and conservative insertion point. IRBuilder<> Builder(Use); // Hoist the insertion point into loop preheaders as far as possible. @@ -961,10 +938,11 @@ Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned, Builder.CreateZExt(NarrowOper, WideType); } -/// CloneIVUser - Instantiate a wide operation to replace a narrow -/// operation. This only needs to handle operations that can evaluation to -/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone. -Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) { +/// Instantiate a wide operation to replace a narrow operation. This only needs +/// to handle operations that can evaluation to SCEVAddRec. It can safely return +/// 0 for any operation we decide not to clone. +Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, + const SCEVAddRecExpr *WideAR) { unsigned Opcode = DU.NarrowUse->getOpcode(); switch (Opcode) { default: @@ -973,40 +951,140 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) { case Instruction::Mul: case Instruction::UDiv: case Instruction::Sub: + return cloneArithmeticIVUser(DU, WideAR); + case Instruction::And: case Instruction::Or: case Instruction::Xor: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: - DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n"); - - // Replace NarrowDef operands with WideDef. Otherwise, we don't know - // anything about the narrow operand yet so must insert a [sz]ext. It is - // probably loop invariant and will be folded or hoisted. If it actually - // comes from a widened IV, it should be removed during a future call to - // WidenIVUse. - Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef : - getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse); - Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef : - getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse); - - BinaryOperator *NarrowBO = cast(DU.NarrowUse); - BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), - LHS, RHS, - NarrowBO->getName()); - IRBuilder<> Builder(DU.NarrowUse); - Builder.Insert(WideBO); - if (const OverflowingBinaryOperator *OBO = - dyn_cast(NarrowBO)) { - if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap(); - if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap(); - } - return WideBO; + return cloneBitwiseIVUser(DU); } } -const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, +Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { + Instruction *NarrowUse = DU.NarrowUse; + Instruction *NarrowDef = DU.NarrowDef; + Instruction *WideDef = DU.WideDef; + + DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); + + // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything + // about the narrow operand yet so must insert a [sz]ext. It is probably loop + // invariant and will be folded or hoisted. If it actually comes from a + // widened IV, it should be removed during a future call to widenIVUse. + Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + IsSigned, NarrowUse); + Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + IsSigned, NarrowUse); + + auto *NarrowBO = cast(NarrowUse); + auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, + NarrowBO->getName()); + IRBuilder<> Builder(NarrowUse); + Builder.Insert(WideBO); + WideBO->copyIRFlags(NarrowBO); + return WideBO; +} + +Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, + const SCEVAddRecExpr *WideAR) { + Instruction *NarrowUse = DU.NarrowUse; + Instruction *NarrowDef = DU.NarrowDef; + Instruction *WideDef = DU.WideDef; + + DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); + + unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; + + // We're trying to find X such that + // + // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X + // + // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), + // and check using SCEV if any of them are correct. + + // Returns true if extending NonIVNarrowDef according to `SignExt` is a + // correct solution to X. + auto GuessNonIVOperand = [&](bool SignExt) { + const SCEV *WideLHS; + const SCEV *WideRHS; + + auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { + if (SignExt) + return SE->getSignExtendExpr(S, Ty); + return SE->getZeroExtendExpr(S, Ty); + }; + + if (IVOpIdx == 0) { + WideLHS = SE->getSCEV(WideDef); + const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); + WideRHS = GetExtend(NarrowRHS, WideType); + } else { + const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); + WideLHS = GetExtend(NarrowLHS, WideType); + WideRHS = SE->getSCEV(WideDef); + } + + // WideUse is "WideDef `op.wide` X" as described in the comment. + const SCEV *WideUse = nullptr; + + switch (NarrowUse->getOpcode()) { + default: + llvm_unreachable("No other possibility!"); + + case Instruction::Add: + WideUse = SE->getAddExpr(WideLHS, WideRHS); + break; + + case Instruction::Mul: + WideUse = SE->getMulExpr(WideLHS, WideRHS); + break; + + case Instruction::UDiv: + WideUse = SE->getUDivExpr(WideLHS, WideRHS); + break; + + case Instruction::Sub: + WideUse = SE->getMinusSCEV(WideLHS, WideRHS); + break; + } + + return WideUse == WideAR; + }; + + bool SignExtend = IsSigned; + if (!GuessNonIVOperand(SignExtend)) { + SignExtend = !SignExtend; + if (!GuessNonIVOperand(SignExtend)) + return nullptr; + } + + Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + SignExtend, NarrowUse); + Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + SignExtend, NarrowUse); + + auto *NarrowBO = cast(NarrowUse); + auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, + NarrowBO->getName()); + + IRBuilder<> Builder(NarrowUse); + Builder.Insert(WideBO); + WideBO->copyIRFlags(NarrowBO); + return WideBO; +} + +const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, unsigned OpCode) const { if (OpCode == Instruction::Add) return SE->getAddExpr(LHS, RHS); @@ -1022,7 +1100,7 @@ const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, /// operands. Generate the SCEV value for the widened operation without /// actually modifying the IR yet. If the expression after extending the /// operands is an AddRec for this loop, return it. -const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { +const SCEVAddRecExpr* WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { // Handle the common case of add const unsigned OpCode = DU.NarrowUse->getOpcode(); @@ -1062,19 +1140,18 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { if (ExtendOperIdx == 0) std::swap(lhs, rhs); const SCEVAddRecExpr *AddRec = - dyn_cast(GetSCEVByOpCode(lhs, rhs, OpCode)); + dyn_cast(getSCEVByOpCode(lhs, rhs, OpCode)); if (!AddRec || AddRec->getLoop() != L) return nullptr; return AddRec; } -/// GetWideRecurrence - Is this instruction potentially interesting for further -/// simplification after widening it's type? In other words, can the -/// extend be safely hoisted out of the loop with SCEV reducing the value to a -/// recurrence on the same loop. If so, return the sign or zero extended -/// recurrence. Otherwise return NULL. -const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { +/// Is this instruction potentially interesting for further simplification after +/// widening it's type? In other words, can the extend be safely hoisted out of +/// the loop with SCEV reducing the value to a recurrence on the same loop. If +/// so, return the sign or zero extended recurrence. Otherwise return NULL. +const SCEVAddRecExpr *WidenIV::getWideRecurrence(Instruction *NarrowUse) { if (!SE->isSCEVable(NarrowUse->getType())) return nullptr; @@ -1097,10 +1174,11 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { /// This IV user cannot be widen. Replace this use of the original narrow IV /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. -static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) { +static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " << *DU.NarrowUse << "\n"); - IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); + IRBuilder<> Builder( + getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI)); Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); } @@ -1108,13 +1186,27 @@ static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) { /// If the narrow use is a compare instruction, then widen the compare // (and possibly the other operand). The extend operation is hoisted into the // loop preheader as far as possible. -bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) { +bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { ICmpInst *Cmp = dyn_cast(DU.NarrowUse); if (!Cmp) return false; - // Sign of IV user and compare must match. - if (IsSigned != CmpInst::isSigned(Cmp->getPredicate())) + // We can legally widen the comparison in the following two cases: + // + // - The signedness of the IV extension and comparison match + // + // - The narrow IV is always positive (and thus its sign extension is equal + // to its zero extension). For instance, let's say we're zero extending + // %narrow for the following use + // + // icmp slt i32 %narrow, %val ... (A) + // + // and %narrow is always positive. Then + // + // (A) == icmp slt i32 sext(%narrow), sext(%val) + // == icmp slt i32 zext(%narrow), sext(%val) + + if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) return false; Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); @@ -1123,20 +1215,21 @@ bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) { assert (CastWidth <= IVWidth && "Unexpected width while widening compare."); // Widen the compare instruction. - IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); + IRBuilder<> Builder( + getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI)); DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); // Widen the other operand of the compare, if necessary. if (CastWidth < IVWidth) { - Value *ExtOp = getExtend(Op, WideType, IsSigned, Cmp); + Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); } return true; } -/// WidenIVUse - Determine whether an individual user of the narrow IV can be -/// widened. If so, return the wide clone of the user. -Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { +/// Determine whether an individual user of the narrow IV can be widened. If so, +/// return the wide clone of the user. +Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // Stop traversing the def-use chain at inner-loop phis or post-loop phis. if (PHINode *UsePhi = dyn_cast(DU.NarrowUse)) { @@ -1145,13 +1238,13 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // After SimplifyCFG most loop exit targets have a single predecessor. // Otherwise fall back to a truncate within the loop. if (UsePhi->getNumOperands() != 1) - truncateIVUse(DU, DT); + truncateIVUse(DU, DT, LI); else { PHINode *WidePhi = PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", UsePhi); WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); - IRBuilder<> Builder(WidePhi->getParent()->getFirstInsertionPt()); + IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); UsePhi->replaceAllUsesWith(Trunc); DeadInsts.emplace_back(UsePhi); @@ -1200,20 +1293,20 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { } // Does this user itself evaluate to a recurrence after widening? - const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse); + const SCEVAddRecExpr *WideAddRec = getWideRecurrence(DU.NarrowUse); if (!WideAddRec) - WideAddRec = GetExtendedOperandRecurrence(DU); + WideAddRec = getExtendedOperandRecurrence(DU); if (!WideAddRec) { // If use is a loop condition, try to promote the condition instead of // truncating the IV first. - if (WidenLoopCompare(DU)) + if (widenLoopCompare(DU)) return nullptr; // This user does not evaluate to a recurence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. - truncateIVUse(DU, DT); + truncateIVUse(DU, DT, LI); return nullptr; } // Assume block terminators cannot evaluate to a recurrence. We can't to @@ -1228,7 +1321,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { && Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) WideUse = WideInc; else { - WideUse = CloneIVUser(DU); + WideUse = cloneIVUser(DU, WideAddRec); if (!WideUse) return nullptr; } @@ -1248,9 +1341,13 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { return WideUse; } -/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers. +/// Add eligible users of NarrowDef to NarrowIVUsers. /// void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { + const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); + bool NeverNegative = + SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, + SE->getConstant(NarrowSCEV->getType(), 0)); for (User *U : NarrowDef->users()) { Instruction *NarrowUser = cast(U); @@ -1258,21 +1355,21 @@ void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { if (!Widened.insert(NarrowUser).second) continue; - NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUser, WideDef)); + NarrowIVUsers.push_back( + NarrowIVDefUse(NarrowDef, NarrowUser, WideDef, NeverNegative)); } } -/// CreateWideIV - Process a single induction variable. First use the -/// SCEVExpander to create a wide induction variable that evaluates to the same -/// recurrence as the original narrow IV. Then use a worklist to forward -/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all -/// interesting IV users, the narrow IV will be isolated for removal by -/// DeleteDeadPHIs. +/// Process a single induction variable. First use the SCEVExpander to create a +/// wide induction variable that evaluates to the same recurrence as the +/// original narrow IV. Then use a worklist to forward traverse the narrow IV's +/// def-use chain. After widenIVUse has processed all interesting IV users, the +/// narrow IV will be isolated for removal by DeleteDeadPHIs. /// /// It would be simpler to delete uses as they are processed, but we must avoid /// invalidating SCEV expressions. /// -PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { +PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { // Is this phi an induction variable? const SCEVAddRecExpr *AddRec = dyn_cast(SE->getSCEV(OrigPhi)); if (!AddRec) @@ -1302,11 +1399,11 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // either find an existing phi or materialize a new one. Either way, we // expect a well-formed cyclic phi-with-increments. i.e. any operand not part // of the phi-SCC dominates the loop entry. - Instruction *InsertPt = L->getHeader()->begin(); + Instruction *InsertPt = &L->getHeader()->front(); WidePhi = cast(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); // Remembering the WideIV increment generated by SCEVExpander allows - // WidenIVUse to reuse it when widening the narrow IV's increment. We don't + // widenIVUse to reuse it when widening the narrow IV's increment. We don't // employ a general reuse mechanism because the call above is the only call to // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. if (BasicBlock *LatchBlock = L->getLoopLatch()) { @@ -1329,13 +1426,13 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // Process a def-use edge. This may replace the use, so don't hold a // use_iterator across it. - Instruction *WideUse = WidenIVUse(DU, Rewriter); + Instruction *WideUse = widenIVUse(DU, Rewriter); // Follow all def-use edges from the previous narrow use. if (WideUse) pushNarrowIVUsers(DU.NarrowUse, WideUse); - // WidenIVUse may have removed the def-use edge. + // widenIVUse may have removed the def-use edge. if (DU.NarrowDef->use_empty()) DeadInsts.emplace_back(DU.NarrowDef); } @@ -1352,38 +1449,38 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { //===----------------------------------------------------------------------===// namespace { - class IndVarSimplifyVisitor : public IVVisitor { - ScalarEvolution *SE; - const TargetTransformInfo *TTI; - PHINode *IVPhi; +class IndVarSimplifyVisitor : public IVVisitor { + ScalarEvolution *SE; + const TargetTransformInfo *TTI; + PHINode *IVPhi; - public: - WideIVInfo WI; +public: + WideIVInfo WI; - IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, - const TargetTransformInfo *TTI, - const DominatorTree *DTree) - : SE(SCEV), TTI(TTI), IVPhi(IV) { - DT = DTree; - WI.NarrowIV = IVPhi; - if (ReduceLiveIVs) - setSplitOverflowIntrinsics(); - } + IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, + const TargetTransformInfo *TTI, + const DominatorTree *DTree) + : SE(SCEV), TTI(TTI), IVPhi(IV) { + DT = DTree; + WI.NarrowIV = IVPhi; + if (ReduceLiveIVs) + setSplitOverflowIntrinsics(); + } - // Implement the interface used by simplifyUsersOfIV. - void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } - }; + // Implement the interface used by simplifyUsersOfIV. + void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } +}; } -/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV -/// users. Each successive simplification may push more users which may -/// themselves be candidates for simplification. +/// Iteratively perform simplification on a worklist of IV users. Each +/// successive simplification may push more users which may themselves be +/// candidates for simplification. /// /// Sign/Zero extend elimination is interleaved with IV simplification. /// -void IndVarSimplify::SimplifyAndExtend(Loop *L, +void IndVarSimplify::simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, - LPPassManager &LPM) { + LoopInfo *LI) { SmallVector WideIVs; SmallVector LoopPhis; @@ -1400,14 +1497,14 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L, // extension. The first time SCEV attempts to normalize sign/zero extension, // the result becomes final. So for the most predictable results, we delay // evaluation of sign/zero extend evaluation until needed, and avoid running - // other SCEV based analysis prior to SimplifyAndExtend. + // other SCEV based analysis prior to simplifyAndExtend. do { PHINode *CurrIV = LoopPhis.pop_back_val(); // Information about sign/zero extensions of CurrIV. IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); - Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor); + Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, &Visitor); if (Visitor.WI.WidestNativeType) { WideIVs.push_back(Visitor.WI); @@ -1416,7 +1513,7 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L, for (; !WideIVs.empty(); WideIVs.pop_back()) { WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts); - if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) { + if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { Changed = true; LoopPhis.push_back(WidePhi); } @@ -1425,12 +1522,12 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L, } //===----------------------------------------------------------------------===// -// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition. +// linearFunctionTestReplace and its kin. Rewrite the loop exit condition. //===----------------------------------------------------------------------===// -/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken -/// count expression can be safely and cheaply expanded into an instruction -/// sequence that can be used by LinearFunctionTestReplace. +/// Return true if this loop's backedge taken count expression can be safely and +/// cheaply expanded into an instruction sequence that can be used by +/// linearFunctionTestReplace. /// /// TODO: This fails for pointer-type loop counters with greater than one byte /// strides, consequently preventing LFTR from running. For the purpose of LFTR @@ -1461,8 +1558,7 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE, return true; } -/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop -/// invariant value to the phi. +/// Return the loop header phi IFF IncV adds a loop invariant value to the phi. static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { Instruction *IncI = dyn_cast(IncV); if (!IncI) @@ -1513,8 +1609,8 @@ static ICmpInst *getLoopTest(Loop *L) { return dyn_cast(BI->getCondition()); } -/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show -/// that the current exit test is already sufficiently canonical. +/// linearFunctionTestReplace policy. Return true unless we can show that the +/// current exit test is already sufficiently canonical. static bool needsLFTR(Loop *L, DominatorTree *DT) { // Do LFTR to simplify the exit condition to an ICMP. ICmpInst *Cond = getLoopTest(L); @@ -1574,10 +1670,10 @@ static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl &Visited, return false; // Optimistically handle other instructions. - for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { - if (!Visited.insert(*OI).second) + for (Value *Op : I->operands()) { + if (!Visited.insert(Op).second) continue; - if (!hasConcreteDefImpl(*OI, Visited, Depth+1)) + if (!hasConcreteDefImpl(Op, Visited, Depth+1)) return false; } return true; @@ -1594,8 +1690,8 @@ static bool hasConcreteDef(Value *V) { return hasConcreteDefImpl(V, Visited, 0); } -/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to -/// be rewritten) loop exit test. +/// Return true if this IV has any uses other than the (soon to be rewritten) +/// loop exit test. static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); Value *IncV = Phi->getIncomingValue(LatchIdx); @@ -1608,7 +1704,7 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { return true; } -/// FindLoopCounter - Find an affine IV in canonical form. +/// Find an affine IV in canonical form. /// /// BECount may be an i8* pointer type. The pointer difference is already /// valid count without scaling the address stride, so it remains a pointer @@ -1702,8 +1798,8 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount, return BestPhi; } -/// genLoopLimit - Help LinearFunctionTestReplace by generating a value that -/// holds the RHS of the new loop test. +/// Help linearFunctionTestReplace by generating a value that holds the RHS of +/// the new loop test. static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE) { const SCEVAddRecExpr *AR = dyn_cast(SE->getSCEV(IndVar)); @@ -1785,13 +1881,13 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, } } -/// LinearFunctionTestReplace - This method rewrites the exit condition of the -/// loop to be a canonical != comparison against the incremented loop induction -/// variable. This pass is able to rewrite the exit tests of any loop where the -/// SCEV analysis can determine a loop-invariant trip count of the loop, which -/// is actually a much broader range than just linear tests. +/// This method rewrites the exit condition of the loop to be a canonical != +/// comparison against the incremented loop induction variable. This pass is +/// able to rewrite the exit tests of any loop where the SCEV analysis can +/// determine a loop-invariant trip count of the loop, which is actually a much +/// broader range than just linear tests. Value *IndVarSimplify:: -LinearFunctionTestReplace(Loop *L, +linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter) { @@ -1809,7 +1905,7 @@ LinearFunctionTestReplace(Loop *L, // This addition may overflow, which is valid as long as the comparison is // truncated to BackedgeTakenCount->getType(). IVCount = SE->getAddExpr(BackedgeTakenCount, - SE->getConstant(BackedgeTakenCount->getType(), 1)); + SE->getOne(BackedgeTakenCount->getType())); // The BackedgeTaken expression contains the number of times that the // backedge branches to the loop header. This is one less than the // number of times the loop executes, so use the incremented indvar. @@ -1847,8 +1943,8 @@ LinearFunctionTestReplace(Loop *L, const SCEV *ARStep = AR->getStepRecurrence(*SE); // For constant IVCount, avoid truncation. if (isa(ARStart) && isa(IVCount)) { - const APInt &Start = cast(ARStart)->getValue()->getValue(); - APInt Count = cast(IVCount)->getValue()->getValue(); + const APInt &Start = cast(ARStart)->getAPInt(); + APInt Count = cast(IVCount)->getAPInt(); // Note that the post-inc value of BackedgeTakenCount may have overflowed // above such that IVCount is now zero. if (IVCount != BackedgeTakenCount && Count == 0) { @@ -1886,21 +1982,21 @@ LinearFunctionTestReplace(Loop *L, } //===----------------------------------------------------------------------===// -// SinkUnusedInvariants. A late subpass to cleanup loop preheaders. +// sinkUnusedInvariants. A late subpass to cleanup loop preheaders. //===----------------------------------------------------------------------===// /// If there's a single exit block, sink any loop-invariant values that /// were defined in the preheader but not used inside the loop into the /// exit block to reduce register pressure in the loop. -void IndVarSimplify::SinkUnusedInvariants(Loop *L) { +void IndVarSimplify::sinkUnusedInvariants(Loop *L) { BasicBlock *ExitBlock = L->getExitBlock(); if (!ExitBlock) return; BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) return; - Instruction *InsertPt = ExitBlock->getFirstInsertionPt(); - BasicBlock::iterator I = Preheader->getTerminator(); + Instruction *InsertPt = &*ExitBlock->getFirstInsertionPt(); + BasicBlock::iterator I(Preheader->getTerminator()); while (I != Preheader->begin()) { --I; // New instructions were inserted at the end of the preheader. @@ -1920,8 +2016,8 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { if (isa(I)) continue; - // Skip landingpad instructions. - if (isa(I)) + // Skip eh pad instructions. + if (I->isEHPad()) continue; // Don't sink alloca: we never want to sink static alloca's out of the @@ -1953,7 +2049,7 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { continue; // Otherwise, sink it to the exit block. - Instruction *ToMove = I; + Instruction *ToMove = &*I; bool Done = false; if (I != Preheader->begin()) { @@ -1994,7 +2090,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { return false; LI = &getAnalysis().getLoopInfo(); - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); DT = &getAnalysis().getDomTree(); auto *TLIP = getAnalysisIfAvailable(); TLI = TLIP ? &TLIP->getTLI() : nullptr; @@ -2007,7 +2103,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // If there are any floating-point recurrences, attempt to // transform them to use integer recurrences. - RewriteNonIntegerIVs(L); + rewriteNonIntegerIVs(L); const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); @@ -2024,7 +2120,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // other expressions involving loop IVs have been evaluated. This helps SCEV // set no-wrap flags before normalizing sign/zero extension. Rewriter.disableCanonicalMode(); - SimplifyAndExtend(L, Rewriter, LPM); + simplifyAndExtend(L, Rewriter, LI); // Check to see if this loop has a computable loop-invariant execution count. // If so, this means that we can compute the final value of any expressions @@ -2034,7 +2130,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // if (ReplaceExitValue != NeverRepl && !isa(BackedgeTakenCount)) - RewriteLoopExitValues(L, Rewriter); + rewriteLoopExitValues(L, Rewriter); // Eliminate redundant IV cycles. NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); @@ -2054,7 +2150,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // explicitly check any assumptions made by SCEV. Brittle. const SCEVAddRecExpr *AR = dyn_cast(BackedgeTakenCount); if (!AR || AR->getLoop()->getLoopPreheader()) - (void)LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, + (void)linearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter); } } @@ -2074,13 +2170,13 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Loop-invariant instructions in the preheader that aren't used in the // loop may be sunk below the loop to reduce register pressure. - SinkUnusedInvariants(L); + sinkUnusedInvariants(L); // Clean up dead instructions. Changed |= DeleteDeadPHIs(L->getHeader(), TLI); + // Check a post-condition. - assert(L->isLCSSAForm(*DT) && - "Indvars did not leave the loop in lcssa form!"); + assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!"); // Verify that LFTR, and any other change have not interfered with SCEV's // ability to compute trip count. diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index cbdacad8f28b..dea61f6ff3d7 100644 --- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -214,8 +214,8 @@ public: AU.addRequired(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); - AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addRequired(); } bool runOnLoop(Loop *L, LPPassManager &LPM) override; @@ -224,8 +224,15 @@ public: char InductiveRangeCheckElimination::ID = 0; } -INITIALIZE_PASS(InductiveRangeCheckElimination, "irce", - "Inductive range check elimination", false, false) +INITIALIZE_PASS_BEGIN(InductiveRangeCheckElimination, "irce", + "Inductive range check elimination", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_END(InductiveRangeCheckElimination, "irce", + "Inductive range check elimination", false, false) const char *InductiveRangeCheck::rangeCheckKindToStr( InductiveRangeCheck::RangeCheckKind RCK) { @@ -1044,9 +1051,9 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( auto BBInsertLocation = std::next(Function::iterator(LS.Latch)); RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector", - &F, BBInsertLocation); + &F, &*BBInsertLocation); RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F, - BBInsertLocation); + &*BBInsertLocation); BranchInst *PreheaderJump = cast(&*Preheader->rbegin()); bool Increasing = LS.IndVarIncreasing; @@ -1399,8 +1406,9 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { LLVMContext &Context = Preheader->getContext(); InductiveRangeCheck::AllocatorTy IRCAlloc; SmallVector RangeChecks; - ScalarEvolution &SE = getAnalysis(); - BranchProbabilityInfo &BPI = getAnalysis(); + ScalarEvolution &SE = getAnalysis().getSE(); + BranchProbabilityInfo &BPI = + getAnalysis().getBPI(); for (auto BBI : L->getBlocks()) if (BranchInst *TBI = dyn_cast(BBI->getTerminator())) diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 1130d228acb8..087ce8ac50d4 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -18,15 +18,22 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" @@ -36,6 +43,8 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include +#include using namespace llvm; #define DEBUG_TYPE "jump-threading" @@ -49,6 +58,13 @@ BBDuplicateThreshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden); +static cl::opt +ImplicationSearchThreshold( + "jump-threading-implication-search-threshold", + cl::desc("The number of predecessors to search for a stronger " + "condition to use to thread over a weaker condition"), + cl::init(3), cl::Hidden); + namespace { // These are at global scope so static functions can use them too. typedef SmallVectorImpl > PredValueInfo; @@ -80,6 +96,9 @@ namespace { class JumpThreading : public FunctionPass { TargetLibraryInfo *TLI; LazyValueInfo *LVI; + std::unique_ptr BFI; + std::unique_ptr BPI; + bool HasProfileData; #ifdef NDEBUG SmallPtrSet LoopHeaders; #else @@ -114,9 +133,15 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); AU.addRequired(); } + void releaseMemory() override { + BFI.reset(); + BPI.reset(); + } + void FindLoopHeaders(Function &F); bool ProcessBlock(BasicBlock *BB); bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl &PredBBs, @@ -134,9 +159,16 @@ namespace { bool ProcessBranchOnPHI(PHINode *PN); bool ProcessBranchOnXOR(BinaryOperator *BO); + bool ProcessImpliedCondition(BasicBlock *BB); bool SimplifyPartiallyRedundantLoad(LoadInst *LI); bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB); + + private: + BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef Preds, + const char *Suffix); + void UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB, + BasicBlock *NewBB, BasicBlock *SuccBB); }; } @@ -160,11 +192,21 @@ bool JumpThreading::runOnFunction(Function &F) { DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TLI = &getAnalysis().getTLI(); LVI = &getAnalysis(); + BFI.reset(); + BPI.reset(); + // When profile data is available, we need to update edge weights after + // successful jump threading, which requires both BPI and BFI being available. + HasProfileData = F.getEntryCount().hasValue(); + if (HasProfileData) { + LoopInfo LI{DominatorTree(F)}; + BPI.reset(new BranchProbabilityInfo(F, LI)); + BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); + } // Remove unreachable blocks from function as they may result in infinite // loop. We do threading if we found something profitable. Jump threading a // branch can create other opportunities. If these opportunities form a cycle - // i.e. if any jump treading is undoing previous threading in the path, then + // i.e. if any jump threading is undoing previous threading in the path, then // we will loop forever. We take care of this issue by not jump threading for // back edges. This works for normal cases but not for unreachable blocks as // they may have cycle with no back edge. @@ -176,7 +218,7 @@ bool JumpThreading::runOnFunction(Function &F) { do { Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E;) { - BasicBlock *BB = I; + BasicBlock *BB = &*I; // Thread all of the branches we can over this block. while (ProcessBlock(BB)) Changed = true; @@ -239,11 +281,26 @@ bool JumpThreading::runOnFunction(Function &F) { static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB, unsigned Threshold) { /// Ignore PHI nodes, these will be flattened when duplication happens. - BasicBlock::const_iterator I = BB->getFirstNonPHI(); + BasicBlock::const_iterator I(BB->getFirstNonPHI()); // FIXME: THREADING will delete values that are just used to compute the // branch, so they shouldn't count against the duplication cost. + unsigned Bonus = 0; + const TerminatorInst *BBTerm = BB->getTerminator(); + // Threading through a switch statement is particularly profitable. If this + // block ends in a switch, decrease its cost to make it more likely to happen. + if (isa(BBTerm)) + Bonus = 6; + + // The same holds for indirect branches, but slightly more so. + if (isa(BBTerm)) + Bonus = 8; + + // Bump the threshold up so the early exit from the loop doesn't skip the + // terminator-based Size adjustment at the end. + Threshold += Bonus; + // Sum up the cost of each instruction until we get to the terminator. Don't // include the terminator because the copy won't include it. unsigned Size = 0; @@ -260,6 +317,11 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB, if (isa(I) && I->getType()->isPointerTy()) continue; + // Bail out if this instruction gives back a token type, it is not possible + // to duplicate it if it is used outside this BB. + if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB)) + return ~0U; + // All other instructions count for at least one unit. ++Size; @@ -268,7 +330,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB, // as having cost of 2 total, and if they are a vector intrinsic, we model // them as having cost 1. if (const CallInst *CI = dyn_cast(I)) { - if (CI->cannotDuplicate()) + if (CI->cannotDuplicate() || CI->isConvergent()) // Blocks with NoDuplicate are modelled as having infinite cost, so they // are never duplicated. return ~0U; @@ -279,16 +341,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB, } } - // Threading through a switch statement is particularly profitable. If this - // block ends in a switch, decrease its cost to make it more likely to happen. - if (isa(I)) - Size = Size > 6 ? Size-6 : 0; - - // The same holds for indirect branches, but slightly more so. - if (isa(I)) - Size = Size > 8 ? Size-8 : 0; - - return Size; + return Size > Bonus ? Size - Bonus : 0; } /// FindLoopHeaders - We do not want jump threading to turn proper loop @@ -669,7 +722,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // because now the condition in this block can be threaded through // predecessors of our predecessor block. if (BasicBlock *SinglePred = BB->getSinglePredecessor()) { - if (SinglePred->getTerminator()->getNumSuccessors() == 1 && + const TerminatorInst *TI = SinglePred->getTerminator(); + if (!TI->isExceptional() && TI->getNumSuccessors() == 1 && SinglePred != BB && !hasAddressTakenAndUsed(BB)) { // If SinglePred was a loop header, BB becomes one. if (LoopHeaders.erase(SinglePred)) @@ -761,7 +815,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // If we're branching on a conditional, LVI might be able to determine // it's value at the branch instruction. We only handle comparisons // against a constant at this time. - // TODO: This should be extended to handle switches as well. + // TODO: This should be extended to handle switches as well. BranchInst *CondBr = dyn_cast(BB->getTerminator()); Constant *CondConst = dyn_cast(CondCmp->getOperand(1)); if (CondBr && CondConst && CondBr->isConditional()) { @@ -829,9 +883,40 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { CondInst->getParent() == BB && isa(BB->getTerminator())) return ProcessBranchOnXOR(cast(CondInst)); + // Search for a stronger dominating condition that can be used to simplify a + // conditional branch leaving BB. + if (ProcessImpliedCondition(BB)) + return true; - // TODO: If we have: "br (X > 0)" and we have a predecessor where we know - // "(X == 4)", thread through this block. + return false; +} + +bool JumpThreading::ProcessImpliedCondition(BasicBlock *BB) { + auto *BI = dyn_cast(BB->getTerminator()); + if (!BI || !BI->isConditional()) + return false; + + Value *Cond = BI->getCondition(); + BasicBlock *CurrentBB = BB; + BasicBlock *CurrentPred = BB->getSinglePredecessor(); + unsigned Iter = 0; + + auto &DL = BB->getModule()->getDataLayout(); + + while (CurrentPred && Iter++ < ImplicationSearchThreshold) { + auto *PBI = dyn_cast(CurrentPred->getTerminator()); + if (!PBI || !PBI->isConditional() || PBI->getSuccessor(0) != CurrentBB) + return false; + + if (isImpliedCondition(PBI->getCondition(), Cond, DL)) { + BI->getSuccessor(1)->removePredecessor(BB); + BranchInst::Create(BI->getSuccessor(0), BI); + BI->eraseFromParent(); + return true; + } + CurrentBB = CurrentPred; + CurrentPred = CurrentBB->getSinglePredecessor(); + } return false; } @@ -850,10 +935,10 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { if (LoadBB->getSinglePredecessor()) return false; - // If the load is defined in a landing pad, it can't be partially redundant, - // because the edges between the invoke and the landing pad cannot have other + // If the load is defined in an EH pad, it can't be partially redundant, + // because the edges between the invoke and the EH pad cannot have other // instructions between them. - if (LoadBB->isLandingPad()) + if (LoadBB->isEHPad()) return false; Value *LoadedPtr = LI->getOperand(0); @@ -866,11 +951,11 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Scan a few instructions up from the load, to see if it is obviously live at // the entry to its block. - BasicBlock::iterator BBIt = LI; + BasicBlock::iterator BBIt(LI); if (Value *AvailableVal = - FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) { - // If the value if the load is locally available within the block, just use + FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, DefMaxInstsToScan)) { + // If the value of the load is locally available within the block, just use // it. This frequently occurs for reg2mem'd allocas. //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n"; @@ -914,7 +999,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Scan the predecessor to see if the value is available in the pred. BBIt = PredBB->end(); AAMDNodes ThisAATags; - Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6, + Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, + DefMaxInstsToScan, nullptr, &ThisAATags); if (!PredAvailable) { OneUnavailablePred = PredBB; @@ -968,8 +1054,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { } // Split them out to their own block. - UnavailablePred = - SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split"); + UnavailablePred = SplitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split"); } // If the value isn't available in all predecessors, then there will be @@ -995,7 +1080,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Create a PHI node at the start of the block for the PRE'd load value. pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB); PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "", - LoadBB->begin()); + &LoadBB->front()); PN->takeName(LI); PN->setDebugLoc(LI->getDebugLoc()); @@ -1262,7 +1347,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { // Into: // BB': // %Y = icmp ne i32 %A, %B - // br i1 %Z, ... + // br i1 %Y, ... PredValueInfoTy XorOpValues; bool isLHS = true; @@ -1387,14 +1472,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, return false; } - // And finally, do it! Start by factoring the predecessors is needed. + // And finally, do it! Start by factoring the predecessors if needed. BasicBlock *PredBB; if (PredBBs.size() == 1) PredBB = PredBBs[0]; else { DEBUG(dbgs() << " Factoring out " << PredBBs.size() << " common predecessors.\n"); - PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm"); + PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm"); } // And finally, do it! @@ -1415,6 +1500,13 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BB->getParent(), BB); NewBB->moveAfter(PredBB); + // Set the block frequency of NewBB. + if (HasProfileData) { + auto NewBBFreq = + BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB); + BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency()); + } + BasicBlock::iterator BI = BB->begin(); for (; PHINode *PN = dyn_cast(BI); ++BI) ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); @@ -1425,7 +1517,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, Instruction *New = BI->clone(); New->setName(BI->getName()); NewBB->getInstList().push_back(New); - ValueMapping[BI] = New; + ValueMapping[&*BI] = New; // Remap operands to patch up intra-block references. for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) @@ -1438,7 +1530,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // We didn't copy the terminator from BB over to NewBB, because there is now // an unconditional jump to SuccBB. Insert the unconditional jump. - BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB); + BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB); NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc()); // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the @@ -1475,8 +1567,8 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks // with the two values we know. SSAUpdate.Initialize(I->getType(), I->getName()); - SSAUpdate.AddAvailableValue(BB, I); - SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]); + SSAUpdate.AddAvailableValue(BB, &*I); + SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&*I]); while (!UsesToRename.empty()) SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); @@ -1499,11 +1591,98 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // frequently happens because of phi translation. SimplifyInstructionsInBlock(NewBB, TLI); + // Update the edge weight from BB to SuccBB, which should be less than before. + UpdateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB); + // Threaded an edge! ++NumThreads; return true; } +/// Create a new basic block that will be the predecessor of BB and successor of +/// all blocks in Preds. When profile data is availble, update the frequency of +/// this new block. +BasicBlock *JumpThreading::SplitBlockPreds(BasicBlock *BB, + ArrayRef Preds, + const char *Suffix) { + // Collect the frequencies of all predecessors of BB, which will be used to + // update the edge weight on BB->SuccBB. + BlockFrequency PredBBFreq(0); + if (HasProfileData) + for (auto Pred : Preds) + PredBBFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB); + + BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix); + + // Set the block frequency of the newly created PredBB, which is the sum of + // frequencies of Preds. + if (HasProfileData) + BFI->setBlockFreq(PredBB, PredBBFreq.getFrequency()); + return PredBB; +} + +/// Update the block frequency of BB and branch weight and the metadata on the +/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 - +/// Freq(PredBB->BB) / Freq(BB->SuccBB). +void JumpThreading::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB, + BasicBlock *BB, + BasicBlock *NewBB, + BasicBlock *SuccBB) { + if (!HasProfileData) + return; + + assert(BFI && BPI && "BFI & BPI should have been created here"); + + // As the edge from PredBB to BB is deleted, we have to update the block + // frequency of BB. + auto BBOrigFreq = BFI->getBlockFreq(BB); + auto NewBBFreq = BFI->getBlockFreq(NewBB); + auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB); + auto BBNewFreq = BBOrigFreq - NewBBFreq; + BFI->setBlockFreq(BB, BBNewFreq.getFrequency()); + + // Collect updated outgoing edges' frequencies from BB and use them to update + // edge probabilities. + SmallVector BBSuccFreq; + for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + auto SuccFreq = (*I == SuccBB) + ? BB2SuccBBFreq - NewBBFreq + : BBOrigFreq * BPI->getEdgeProbability(BB, *I); + BBSuccFreq.push_back(SuccFreq.getFrequency()); + } + + uint64_t MaxBBSuccFreq = + *std::max_element(BBSuccFreq.begin(), BBSuccFreq.end()); + + SmallVector BBSuccProbs; + if (MaxBBSuccFreq == 0) + BBSuccProbs.assign(BBSuccFreq.size(), + {1, static_cast(BBSuccFreq.size())}); + else { + for (uint64_t Freq : BBSuccFreq) + BBSuccProbs.push_back( + BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq)); + // Normalize edge probabilities so that they sum up to one. + BranchProbability::normalizeProbabilities(BBSuccProbs.begin(), + BBSuccProbs.end()); + } + + // Update edge probabilities in BPI. + for (int I = 0, E = BBSuccProbs.size(); I < E; I++) + BPI->setEdgeProbability(BB, I, BBSuccProbs[I]); + + if (BBSuccProbs.size() >= 2) { + SmallVector Weights; + for (auto Prob : BBSuccProbs) + Weights.push_back(Prob.getNumerator()); + + auto TI = BB->getTerminator(); + TI->setMetadata( + LLVMContext::MD_prof, + MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights)); + } +} + /// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch /// to BB which contains an i1 PHI node and a conditional branch on that PHI. /// If we can duplicate the contents of BB up into PredBB do so now, this @@ -1530,14 +1709,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, return false; } - // And finally, do it! Start by factoring the predecessors is needed. + // And finally, do it! Start by factoring the predecessors if needed. BasicBlock *PredBB; if (PredBBs.size() == 1) PredBB = PredBBs[0]; else { DEBUG(dbgs() << " Factoring out " << PredBBs.size() << " common predecessors.\n"); - PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm"); + PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm"); } // Okay, we decided to do this! Clone all the instructions in BB onto the end @@ -1581,12 +1760,12 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, if (Value *IV = SimplifyInstruction(New, BB->getModule()->getDataLayout())) { delete New; - ValueMapping[BI] = IV; + ValueMapping[&*BI] = IV; } else { // Otherwise, insert the new instruction into the block. New->setName(BI->getName()); - PredBB->getInstList().insert(OldPredBranch, New); - ValueMapping[BI] = New; + PredBB->getInstList().insert(OldPredBranch->getIterator(), New); + ValueMapping[&*BI] = New; } } @@ -1628,8 +1807,8 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks // with the two values we know. SSAUpdate.Initialize(I->getType(), I->getName()); - SSAUpdate.AddAvailableValue(BB, I); - SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]); + SSAUpdate.AddAvailableValue(BB, &*I); + SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&*I]); while (!UsesToRename.empty()) SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 43fc50e588f8..6d70cdc3ade2 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -34,10 +34,13 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" @@ -118,9 +121,12 @@ namespace { AU.addPreservedID(LoopSimplifyID); AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); - AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); AU.addRequired(); } @@ -164,9 +170,12 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false) Pass *llvm::createLICMPass() { return new LICM(); } @@ -183,7 +192,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // Get our Loop and Alias Analysis information... LI = &getAnalysis().getLoopInfo(); - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); DT = &getAnalysis().getDomTree(); TLI = &getAnalysis().getTLI(); @@ -264,9 +273,10 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // FIXME: This is really heavy handed. It would be a bit better to use an // SSAUpdater strategy during promotion that was LCSSA aware and reformed // it as it went. - if (Changed) - formLCSSARecursively(*L, *DT, LI, - getAnalysisIfAvailable()); + if (Changed) { + auto *SEWP = getAnalysisIfAvailable(); + formLCSSARecursively(*L, *DT, LI, SEWP ? &SEWP->getSE() : nullptr); + } } // Check that neither this loop nor its parent have had LCSSA broken. LICM is @@ -402,7 +412,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, } /// Computes loop safety information, checks loop body & header -/// for the possiblity of may throw exception. +/// for the possibility of may throw exception. /// void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) { assert(CurLoop != nullptr && "CurLoop cant be null"); @@ -410,7 +420,7 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) { // Setting default safety values. SafetyInfo->MayThrow = false; SafetyInfo->HeaderMayThrow = false; - // Iterate over header and compute dafety info. + // Iterate over header and compute safety info. for (BasicBlock::iterator I = Header->begin(), E = Header->end(); (I != E) && !SafetyInfo->HeaderMayThrow; ++I) SafetyInfo->HeaderMayThrow |= I->mayThrow(); @@ -445,7 +455,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, // Don't hoist loads which have may-aliased stores in loop. uint64_t Size = 0; if (LI->getType()->isSized()) - Size = AA->getTypeStoreSize(LI->getType()); + Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType()); AAMDNodes AAInfo; LI->getAAMetadata(AAInfo); @@ -457,10 +467,21 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, return false; // Handle simple cases by querying alias analysis. - AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI); - if (Behavior == AliasAnalysis::DoesNotAccessMemory) + FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); + if (Behavior == FMRB_DoesNotAccessMemory) return true; if (AliasAnalysis::onlyReadsMemory(Behavior)) { + // A readonly argmemonly function only reads from memory pointed to by + // it's arguments with arbitrary offsets. If we can prove there are no + // writes to this memory in the loop, we can hoist or sink. + if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) { + for (Value *Op : CI->arg_operands()) + if (Op->getType()->isPointerTy() && + pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize, + AAMDNodes(), CurAST)) + return false; + return true; + } // If this call only reads from memory and there are no writes to memory // in the loop, we can hoist or sink the call as appropriate. bool FoundMod = false; @@ -566,7 +587,7 @@ static Instruction *CloneInstructionInExitBlock(const Instruction &I, if (!OLoop->contains(&PN)) { PHINode *OpPN = PHINode::Create(OInst->getType(), PN.getNumIncomingValues(), - OInst->getName() + ".lcssa", ExitBlock.begin()); + OInst->getName() + ".lcssa", &ExitBlock.front()); for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) OpPN->addIncoming(OInst, PN.getIncomingBlock(i)); *OI = OpPN; @@ -651,6 +672,10 @@ static bool hoist(Instruction &I, BasicBlock *Preheader) { // Move the new node to the Preheader, before its terminator. I.moveBefore(Preheader->getTerminator()); + // Metadata can be dependent on the condition we are hoisting above. + // Conservatively strip all metadata on the instruction. + I.dropUnknownNonDebugMetadata(); + if (isa(I)) ++NumMovedLoads; else if (isa(I)) ++NumMovedCalls; ++NumHoisted; @@ -730,9 +755,9 @@ namespace { if (!L->contains(BB)) { // We need to create an LCSSA PHI node for the incoming value and // store that. - PHINode *PN = PHINode::Create( - I->getType(), PredCache.size(BB), - I->getName() + ".lcssa", BB->begin()); + PHINode *PN = + PHINode::Create(I->getType(), PredCache.size(BB), + I->getName() + ".lcssa", &BB->front()); for (BasicBlock *Pred : PredCache.get(BB)) PN->addIncoming(I, Pred); return PN; @@ -942,7 +967,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, CurLoop->getUniqueExitBlocks(ExitBlocks); InsertPts.resize(ExitBlocks.size()); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt(); + InsertPts[i] = &*ExitBlocks[i]->getFirstInsertionPt(); } // We use the SSAUpdater interface to insert phi nodes as required. @@ -973,7 +998,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS, return Changed; } -/// Simple Analysis hook. Clone alias set info. +/// Simple analysis hook. Clone alias set info. /// void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { AliasSetTracker *AST = LoopToAliasSetMap.lookup(L); diff --git a/lib/Transforms/Scalar/LLVMBuild.txt b/lib/Transforms/Scalar/LLVMBuild.txt index deea9e2d0102..8a99df86b84a 100644 --- a/lib/Transforms/Scalar/LLVMBuild.txt +++ b/lib/Transforms/Scalar/LLVMBuild.txt @@ -20,4 +20,4 @@ type = Library name = Scalar parent = Transforms library_name = ScalarOpts -required_libraries = Analysis Core InstCombine ProfileData Support TransformUtils +required_libraries = Analysis Core InstCombine Support TransformUtils diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp index c19cd19059b2..1648878b0628 100644 --- a/lib/Transforms/Scalar/LoadCombine.cpp +++ b/lib/Transforms/Scalar/LoadCombine.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -56,7 +57,7 @@ class LoadCombine : public BasicBlockPass { public: LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) { - initializeSROAPass(*PassRegistry::getPassRegistry()); + initializeLoadCombinePass(*PassRegistry::getPassRegistry()); } using llvm::Pass::doInitialization; @@ -223,7 +224,7 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { if (skipOptnoneFunction(BB)) return false; - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); IRBuilder TheBuilder( BB.getContext(), TargetFolder(BB.getModule()->getDataLayout())); @@ -262,8 +263,8 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } char LoadCombine::ID = 0; @@ -274,7 +275,8 @@ BasicBlockPass *llvm::createLoadCombinePass() { INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", "Combine Adjacent Loads", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_END(LoadCombine, "load-combine", "Combine Adjacent Loads", false, false) diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 98b068edf582..bc00ff3f3a42 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -17,6 +17,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/Dominators.h" @@ -35,18 +36,19 @@ namespace { } // Possibly eliminate loop L if it is dead. - bool runOnLoop(Loop *L, LPPassManager &LPM) override; + bool runOnLoop(Loop *L, LPPassManager &) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); - AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); } @@ -64,7 +66,7 @@ INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion", "Delete dead loops", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_END(LoopDeletion, "loop-deletion", @@ -130,7 +132,7 @@ bool LoopDeletion::isLoopDead(Loop *L, /// so could change the halting/non-halting nature of a program. /// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA /// in order to make various safety checks work. -bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) { +bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) { if (skipOptnoneFunction(L)) return false; @@ -169,7 +171,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) { // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. - ScalarEvolution &SE = getAnalysis(); + ScalarEvolution &SE = getAnalysis().getSE(); const SCEV *S = SE.getMaxBackedgeTakenCount(L); if (isa(S)) return Changed; @@ -242,9 +244,8 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) { for (BasicBlock *BB : blocks) loopInfo.removeBlock(BB); - // The last step is to inform the loop pass manager that we've - // eliminated this loop. - LPM.deleteLoopFromQueue(L); + // The last step is to update LoopInfo now that we've eliminated this loop. + loopInfo.updateUnloop(L); Changed = true; ++NumDeleted; diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp index 1b9859b57790..3d3cf3e2890b 100644 --- a/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/lib/Transforms/Scalar/LoopDistribute.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopVersioning.h" #include @@ -54,6 +55,11 @@ static cl::opt DistributeNonIfConvertible( "if-convertible by the loop vectorizer"), cl::init(false)); +static cl::opt DistributeSCEVCheckThreshold( + "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden, + cl::desc("The maximum number of SCEV checks allowed for Loop " + "Distribution")); + STATISTIC(NumLoopsDistributed, "Number of loops distributed"); namespace { @@ -164,9 +170,7 @@ public: // Delete the instructions backwards, as it has a reduced likelihood of // having to update as many def-use and use-def chains. - for (auto I = Unused.rbegin(), E = Unused.rend(); I != E; ++I) { - auto *Inst = *I; - + for (auto *Inst : make_range(Unused.rbegin(), Unused.rend())) { if (!Inst->use_empty()) Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); Inst->eraseFromParent(); @@ -373,7 +377,7 @@ public: /// \brief This performs the main chunk of the work of cloning the loops for /// the partitions. - void cloneLoops(Pass *P) { + void cloneLoops() { BasicBlock *OrigPH = L->getLoopPreheader(); // At this point the predecessor of the preheader is either the memcheck // block or the top part of the original preheader. @@ -547,11 +551,11 @@ public: MemoryInstructionDependences( const SmallVectorImpl &Instructions, - const SmallVectorImpl &InterestingDependences) { + const SmallVectorImpl &Dependences) { Accesses.append(Instructions.begin(), Instructions.end()); DEBUG(dbgs() << "Backward dependences:\n"); - for (auto &Dep : InterestingDependences) + for (auto &Dep : Dependences) if (Dep.isPossiblyBackward()) { // Note that the designations source and destination follow the program // order, i.e. source is always first. (The direction is given by the @@ -567,25 +571,6 @@ private: AccessesType Accesses; }; -/// \brief Returns the instructions that use values defined in the loop. -static SmallVector findDefsUsedOutsideOfLoop(Loop *L) { - SmallVector UsedOutside; - - for (auto *Block : L->getBlocks()) - // FIXME: I believe that this could use copy_if if the Inst reference could - // be adapted into a pointer. - for (auto &Inst : *Block) { - auto Users = Inst.users(); - if (std::any_of(Users.begin(), Users.end(), [&](User *U) { - auto *Use = cast(U); - return !L->contains(Use->getParent()); - })) - UsedOutside.push_back(&Inst); - } - - return UsedOutside; -} - /// \brief The pass class. class LoopDistribute : public FunctionPass { public: @@ -597,6 +582,7 @@ public: LI = &getAnalysis().getLoopInfo(); LAA = &getAnalysis(); DT = &getAnalysis().getDomTree(); + SE = &getAnalysis().getSE(); // Build up a worklist of inner-loops to vectorize. This is necessary as the // act of distributing a loop creates new loops and can invalidate iterators @@ -619,6 +605,7 @@ public: } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -629,6 +616,45 @@ public: static char ID; private: + /// \brief Filter out checks between pointers from the same partition. + /// + /// \p PtrToPartition contains the partition number for pointers. Partition + /// number -1 means that the pointer is used in multiple partitions. In this + /// case we can't safely omit the check. + SmallVector + includeOnlyCrossPartitionChecks( + const SmallVectorImpl &AllChecks, + const SmallVectorImpl &PtrToPartition, + const RuntimePointerChecking *RtPtrChecking) { + SmallVector Checks; + + std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks), + [&](const RuntimePointerChecking::PointerCheck &Check) { + for (unsigned PtrIdx1 : Check.first->Members) + for (unsigned PtrIdx2 : Check.second->Members) + // Only include this check if there is a pair of pointers + // that require checking and the pointers fall into + // separate partitions. + // + // (Note that we already know at this point that the two + // pointer groups need checking but it doesn't follow + // that each pair of pointers within the two groups need + // checking as well. + // + // In other words we don't want to include a check just + // because there is a pair of pointers between the two + // pointer groups that require checks and a different + // pair whose pointers fall into different partitions.) + if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) && + !RuntimePointerChecking::arePointersInSamePartition( + PtrToPartition, PtrIdx1, PtrIdx2)) + return true; + return false; + }); + + return Checks; + } + /// \brief Try to distribute an inner-most loop. bool processLoop(Loop *L) { assert(L->empty() && "Only process inner loops."); @@ -655,9 +681,8 @@ private: DEBUG(dbgs() << "Skipping; memory operations are safe for vectorization"); return false; } - auto *InterestingDependences = - LAI.getDepChecker().getInterestingDependences(); - if (!InterestingDependences || InterestingDependences->empty()) { + auto *Dependences = LAI.getDepChecker().getDependences(); + if (!Dependences || Dependences->empty()) { DEBUG(dbgs() << "Skipping; No unsafe dependences to isolate"); return false; } @@ -685,7 +710,7 @@ private: // NumUnsafeDependencesActive reaches 0. const MemoryDepChecker &DepChecker = LAI.getDepChecker(); MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(), - *InterestingDependences); + *Dependences); int NumUnsafeDependencesActive = 0; for (auto &InstDep : MID) { @@ -735,6 +760,13 @@ private: return false; } + // Don't distribute the loop if we need too many SCEV run-time checks. + const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate(); + if (Pred.getComplexity() > DistributeSCEVCheckThreshold) { + DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n"); + return false; + } + DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n"); // We're done forming the partitions set up the reverse mapping from // instructions to partitions. @@ -746,20 +778,25 @@ private: if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator()) SplitBlock(PH, PH->getTerminator(), DT, LI); - // If we need run-time checks to disambiguate pointers are run-time, version - // the loop now. + // If we need run-time checks, version the loop now. auto PtrToPartition = Partitions.computePartitionSetForPointers(LAI); - LoopVersioning LVer(LAI, L, LI, DT, &PtrToPartition); - if (LVer.needsRuntimeChecks()) { + const auto *RtPtrChecking = LAI.getRuntimePointerChecking(); + const auto &AllChecks = RtPtrChecking->getChecks(); + auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition, + RtPtrChecking); + + if (!Pred.isAlwaysTrue() || !Checks.empty()) { DEBUG(dbgs() << "\nPointers:\n"); - DEBUG(LAI.getRuntimePointerChecking()->print(dbgs(), 0, &PtrToPartition)); - LVer.versionLoop(this); - LVer.addPHINodes(DefsUsedOutside); + DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks)); + LoopVersioning LVer(LAI, L, LI, DT, SE, false); + LVer.setAliasChecks(std::move(Checks)); + LVer.setSCEVChecks(LAI.PSE.getUnionPredicate()); + LVer.versionLoop(DefsUsedOutside); } // Create identical copies of the original loop for each partition and hook // them up sequentially. - Partitions.cloneLoops(this); + Partitions.cloneLoops(); // Now, we remove the instruction from each loop that don't belong to that // partition. @@ -780,6 +817,7 @@ private: LoopInfo *LI; LoopAccessAnalysis *LAA; DominatorTree *DT; + ScalarEvolution *SE; }; } // anonymous namespace @@ -790,6 +828,7 @@ INITIALIZE_PASS_BEGIN(LoopDistribute, LDIST_NAME, ldist_name, false, false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(LoopDistribute, LDIST_NAME, ldist_name, false, false) namespace llvm { diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index a21ca2417ca1..2d577de7c2b8 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -31,11 +31,6 @@ // void foo(_Complex float *P) // for (i) { __real__(*P) = 0; __imag__(*P) = 0; } // -// We should enhance this to handle negative strides through memory. -// Alternatively (and perhaps better) we could rely on an earlier pass to force -// forward iteration through memory, which is generally better for cache -// behavior. Negative strides *do* happen for memset/memcpy loops. -// // This could recognize common matrix multiplies and dot product idioms and // replace them with calls to BLAS (if linked in??). // @@ -44,7 +39,10 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -67,149 +65,85 @@ STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); namespace { - class LoopIdiomRecognize; +class LoopIdiomRecognize : public LoopPass { + Loop *CurLoop; + AliasAnalysis *AA; + DominatorTree *DT; + LoopInfo *LI; + ScalarEvolution *SE; + TargetLibraryInfo *TLI; + const TargetTransformInfo *TTI; + const DataLayout *DL; - /// This class defines some utility functions for loop idiom recognization. - class LIRUtil { - public: - /// Return true iff the block contains nothing but an uncondition branch - /// (aka goto instruction). - static bool isAlmostEmpty(BasicBlock *); +public: + static char ID; + explicit LoopIdiomRecognize() : LoopPass(ID) { + initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); + } - static BranchInst *getBranch(BasicBlock *BB) { - return dyn_cast(BB->getTerminator()); - } + bool runOnLoop(Loop *L, LPPassManager &LPM) override; - /// Derive the precondition block (i.e the block that guards the loop - /// preheader) from the given preheader. - static BasicBlock *getPrecondBb(BasicBlock *PreHead); - }; + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG. + /// + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } - /// This class is to recoginize idioms of population-count conducted in - /// a noncountable loop. Currently it only recognizes this pattern: - /// \code - /// while(x) {cnt++; ...; x &= x - 1; ...} - /// \endcode - class NclPopcountRecognize { - LoopIdiomRecognize &LIR; - Loop *CurLoop; - BasicBlock *PreCondBB; +private: + typedef SmallVector StoreList; + StoreList StoreRefs; - typedef IRBuilder<> IRBuilderTy; + /// \name Countable Loop Idiom Handling + /// @{ - public: - explicit NclPopcountRecognize(LoopIdiomRecognize &TheLIR); - bool recognize(); + bool runOnCountableLoop(); + bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, + SmallVectorImpl &ExitBlocks); - private: - /// Take a glimpse of the loop to see if we need to go ahead recoginizing - /// the idiom. - bool preliminaryScreen(); + void collectStores(BasicBlock *BB); + bool isLegalStore(StoreInst *SI); + bool processLoopStore(StoreInst *SI, const SCEV *BECount); + bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); - /// Check if the given conditional branch is based on the comparison - /// between a variable and zero, and if the variable is non-zero, the - /// control yields to the loop entry. If the branch matches the behavior, - /// the variable involved in the comparion is returned. This function will - /// be called to see if the precondition and postcondition of the loop - /// are in desirable form. - Value *matchCondition(BranchInst *Br, BasicBlock *NonZeroTarget) const; + bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, + unsigned StoreAlignment, Value *SplatValue, + Instruction *TheStore, const SCEVAddRecExpr *Ev, + const SCEV *BECount, bool NegStride); + bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, + const SCEVAddRecExpr *StoreEv, + const SCEV *BECount, bool NegStride); - /// Return true iff the idiom is detected in the loop. and 1) \p CntInst - /// is set to the instruction counting the population bit. 2) \p CntPhi - /// is set to the corresponding phi node. 3) \p Var is set to the value - /// whose population bits are being counted. - bool detectIdiom - (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const; + /// @} + /// \name Noncountable Loop Idiom Handling + /// @{ - /// Insert ctpop intrinsic function and some obviously dead instructions. - void transform(Instruction *CntInst, PHINode *CntPhi, Value *Var); + bool runOnNoncountableLoop(); - /// Create llvm.ctpop.* intrinsic function. - CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL); - }; + bool recognizePopcount(); + void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst, + PHINode *CntPhi, Value *Var); - class LoopIdiomRecognize : public LoopPass { - Loop *CurLoop; - DominatorTree *DT; - ScalarEvolution *SE; - TargetLibraryInfo *TLI; - const TargetTransformInfo *TTI; - public: - static char ID; - explicit LoopIdiomRecognize() : LoopPass(ID) { - initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); - DT = nullptr; - SE = nullptr; - TLI = nullptr; - TTI = nullptr; - } + /// @} +}; - bool runOnLoop(Loop *L, LPPassManager &LPM) override; - bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, - SmallVectorImpl &ExitBlocks); - - bool processLoopStore(StoreInst *SI, const SCEV *BECount); - bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); - - bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, - unsigned StoreAlignment, - Value *SplatValue, Instruction *TheStore, - const SCEVAddRecExpr *Ev, - const SCEV *BECount); - bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, - const SCEVAddRecExpr *StoreEv, - const SCEVAddRecExpr *LoadEv, - const SCEV *BECount); - - /// This transformation requires natural loop information & requires that - /// loop preheaders be inserted into the CFG. - /// - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addPreserved(); - AU.addRequiredID(LoopSimplifyID); - AU.addPreservedID(LoopSimplifyID); - AU.addRequiredID(LCSSAID); - AU.addPreservedID(LCSSAID); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } - - DominatorTree *getDominatorTree() { - return DT ? DT - : (DT = &getAnalysis().getDomTree()); - } - - ScalarEvolution *getScalarEvolution() { - return SE ? SE : (SE = &getAnalysis()); - } - - TargetLibraryInfo *getTargetLibraryInfo() { - if (!TLI) - TLI = &getAnalysis().getTLI(); - - return TLI; - } - - const TargetTransformInfo *getTargetTransformInfo() { - return TTI ? TTI - : (TTI = &getAnalysis().getTTI( - *CurLoop->getHeader()->getParent())); - } - - Loop *getLoop() const { return CurLoop; } - - private: - bool runOnNoncountableLoop(); - bool runOnCountableLoop(); - }; -} +} // End anonymous namespace. char LoopIdiomRecognize::ID = 0; INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", @@ -218,9 +152,12 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", false, false) @@ -240,429 +177,17 @@ static void deleteDeadInstruction(Instruction *I, RecursivelyDeleteTriviallyDeadInstructions(Op, TLI); } -//===----------------------------------------------------------------------===// -// -// Implementation of LIRUtil -// -//===----------------------------------------------------------------------===// - -// This function will return true iff the given block contains nothing but goto. -// A typical usage of this function is to check if the preheader function is -// "almost" empty such that generated intrinsic functions can be moved across -// the preheader and be placed at the end of the precondition block without -// the concern of breaking data dependence. -bool LIRUtil::isAlmostEmpty(BasicBlock *BB) { - if (BranchInst *Br = getBranch(BB)) { - return Br->isUnconditional() && Br == BB->begin(); - } - return false; -} - -BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) { - if (BasicBlock *BB = PreHead->getSinglePredecessor()) { - BranchInst *Br = getBranch(BB); - return Br && Br->isConditional() ? BB : nullptr; - } - return nullptr; -} - -//===----------------------------------------------------------------------===// -// -// Implementation of NclPopcountRecognize -// -//===----------------------------------------------------------------------===// - -NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR): - LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(nullptr) { -} - -bool NclPopcountRecognize::preliminaryScreen() { - const TargetTransformInfo *TTI = LIR.getTargetTransformInfo(); - if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware) - return false; - - // Counting population are usually conducted by few arithmetic instructions. - // Such instructions can be easilly "absorbed" by vacant slots in a - // non-compact loop. Therefore, recognizing popcount idiom only makes sense - // in a compact loop. - - // Give up if the loop has multiple blocks or multiple backedges. - if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) - return false; - - BasicBlock *LoopBody = *(CurLoop->block_begin()); - if (LoopBody->size() >= 20) { - // The loop is too big, bail out. - return false; - } - - // It should have a preheader containing nothing but a goto instruction. - BasicBlock *PreHead = CurLoop->getLoopPreheader(); - if (!PreHead || !LIRUtil::isAlmostEmpty(PreHead)) - return false; - - // It should have a precondition block where the generated popcount instrinsic - // function will be inserted. - PreCondBB = LIRUtil::getPrecondBb(PreHead); - if (!PreCondBB) - return false; - - return true; -} - -Value *NclPopcountRecognize::matchCondition(BranchInst *Br, - BasicBlock *LoopEntry) const { - if (!Br || !Br->isConditional()) - return nullptr; - - ICmpInst *Cond = dyn_cast(Br->getCondition()); - if (!Cond) - return nullptr; - - ConstantInt *CmpZero = dyn_cast(Cond->getOperand(1)); - if (!CmpZero || !CmpZero->isZero()) - return nullptr; - - ICmpInst::Predicate Pred = Cond->getPredicate(); - if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) || - (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry)) - return Cond->getOperand(0); - - return nullptr; -} - -bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, - PHINode *&CntPhi, - Value *&Var) const { - // Following code tries to detect this idiom: - // - // if (x0 != 0) - // goto loop-exit // the precondition of the loop - // cnt0 = init-val; - // do { - // x1 = phi (x0, x2); - // cnt1 = phi(cnt0, cnt2); - // - // cnt2 = cnt1 + 1; - // ... - // x2 = x1 & (x1 - 1); - // ... - // } while(x != 0); - // - // loop-exit: - // - - // step 1: Check to see if the look-back branch match this pattern: - // "if (a!=0) goto loop-entry". - BasicBlock *LoopEntry; - Instruction *DefX2, *CountInst; - Value *VarX1, *VarX0; - PHINode *PhiX, *CountPhi; - - DefX2 = CountInst = nullptr; - VarX1 = VarX0 = nullptr; - PhiX = CountPhi = nullptr; - LoopEntry = *(CurLoop->block_begin()); - - // step 1: Check if the loop-back branch is in desirable form. - { - if (Value *T = matchCondition (LIRUtil::getBranch(LoopEntry), LoopEntry)) - DefX2 = dyn_cast(T); - else - return false; - } - - // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)" - { - if (!DefX2 || DefX2->getOpcode() != Instruction::And) - return false; - - BinaryOperator *SubOneOp; - - if ((SubOneOp = dyn_cast(DefX2->getOperand(0)))) - VarX1 = DefX2->getOperand(1); - else { - VarX1 = DefX2->getOperand(0); - SubOneOp = dyn_cast(DefX2->getOperand(1)); - } - if (!SubOneOp) - return false; - - Instruction *SubInst = cast(SubOneOp); - ConstantInt *Dec = dyn_cast(SubInst->getOperand(1)); - if (!Dec || - !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) || - (SubInst->getOpcode() == Instruction::Add && Dec->isAllOnesValue()))) { - return false; - } - } - - // step 3: Check the recurrence of variable X - { - PhiX = dyn_cast(VarX1); - if (!PhiX || - (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) { - return false; - } - } - - // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1 - { - CountInst = nullptr; - for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(), - IterE = LoopEntry->end(); Iter != IterE; Iter++) { - Instruction *Inst = Iter; - if (Inst->getOpcode() != Instruction::Add) - continue; - - ConstantInt *Inc = dyn_cast(Inst->getOperand(1)); - if (!Inc || !Inc->isOne()) - continue; - - PHINode *Phi = dyn_cast(Inst->getOperand(0)); - if (!Phi || Phi->getParent() != LoopEntry) - continue; - - // Check if the result of the instruction is live of the loop. - bool LiveOutLoop = false; - for (User *U : Inst->users()) { - if ((cast(U))->getParent() != LoopEntry) { - LiveOutLoop = true; break; - } - } - - if (LiveOutLoop) { - CountInst = Inst; - CountPhi = Phi; - break; - } - } - - if (!CountInst) - return false; - } - - // step 5: check if the precondition is in this form: - // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;" - { - BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB); - Value *T = matchCondition (PreCondBr, CurLoop->getLoopPreheader()); - if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1)) - return false; - - CntInst = CountInst; - CntPhi = CountPhi; - Var = T; - } - - return true; -} - -void NclPopcountRecognize::transform(Instruction *CntInst, - PHINode *CntPhi, Value *Var) { - - ScalarEvolution *SE = LIR.getScalarEvolution(); - TargetLibraryInfo *TLI = LIR.getTargetLibraryInfo(); - BasicBlock *PreHead = CurLoop->getLoopPreheader(); - BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB); - const DebugLoc DL = CntInst->getDebugLoc(); - - // Assuming before transformation, the loop is following: - // if (x) // the precondition - // do { cnt++; x &= x - 1; } while(x); - - // Step 1: Insert the ctpop instruction at the end of the precondition block - IRBuilderTy Builder(PreCondBr); - Value *PopCnt, *PopCntZext, *NewCount, *TripCnt; - { - PopCnt = createPopcntIntrinsic(Builder, Var, DL); - NewCount = PopCntZext = - Builder.CreateZExtOrTrunc(PopCnt, cast(CntPhi->getType())); - - if (NewCount != PopCnt) - (cast(NewCount))->setDebugLoc(DL); - - // TripCnt is exactly the number of iterations the loop has - TripCnt = NewCount; - - // If the population counter's initial value is not zero, insert Add Inst. - Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead); - ConstantInt *InitConst = dyn_cast(CntInitVal); - if (!InitConst || !InitConst->isZero()) { - NewCount = Builder.CreateAdd(NewCount, CntInitVal); - (cast(NewCount))->setDebugLoc(DL); - } - } - - // Step 2: Replace the precondition from "if(x == 0) goto loop-exit" to - // "if(NewCount == 0) loop-exit". Withtout this change, the intrinsic - // function would be partial dead code, and downstream passes will drag - // it back from the precondition block to the preheader. - { - ICmpInst *PreCond = cast(PreCondBr->getCondition()); - - Value *Opnd0 = PopCntZext; - Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0); - if (PreCond->getOperand(0) != Var) - std::swap(Opnd0, Opnd1); - - ICmpInst *NewPreCond = - cast(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1)); - PreCondBr->setCondition(NewPreCond); - - RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI); - } - - // Step 3: Note that the population count is exactly the trip count of the - // loop in question, which enble us to to convert the loop from noncountable - // loop into a countable one. The benefit is twofold: - // - // - If the loop only counts population, the entire loop become dead after - // the transformation. It is lots easier to prove a countable loop dead - // than to prove a noncountable one. (In some C dialects, a infite loop - // isn't dead even if it computes nothing useful. In general, DCE needs - // to prove a noncountable loop finite before safely delete it.) - // - // - If the loop also performs something else, it remains alive. - // Since it is transformed to countable form, it can be aggressively - // optimized by some optimizations which are in general not applicable - // to a noncountable loop. - // - // After this step, this loop (conceptually) would look like following: - // newcnt = __builtin_ctpop(x); - // t = newcnt; - // if (x) - // do { cnt++; x &= x-1; t--) } while (t > 0); - BasicBlock *Body = *(CurLoop->block_begin()); - { - BranchInst *LbBr = LIRUtil::getBranch(Body); - ICmpInst *LbCond = cast(LbBr->getCondition()); - Type *Ty = TripCnt->getType(); - - PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", Body->begin()); - - Builder.SetInsertPoint(LbCond); - Value *Opnd1 = cast(TcPhi); - Value *Opnd2 = cast(ConstantInt::get(Ty, 1)); - Instruction *TcDec = - cast(Builder.CreateSub(Opnd1, Opnd2, "tcdec", false, true)); - - TcPhi->addIncoming(TripCnt, PreHead); - TcPhi->addIncoming(TcDec, Body); - - CmpInst::Predicate Pred = (LbBr->getSuccessor(0) == Body) ? - CmpInst::ICMP_UGT : CmpInst::ICMP_SLE; - LbCond->setPredicate(Pred); - LbCond->setOperand(0, TcDec); - LbCond->setOperand(1, cast(ConstantInt::get(Ty, 0))); - } - - // Step 4: All the references to the original population counter outside - // the loop are replaced with the NewCount -- the value returned from - // __builtin_ctpop(). - CntInst->replaceUsesOutsideBlock(NewCount, Body); - - // step 5: Forget the "non-computable" trip-count SCEV associated with the - // loop. The loop would otherwise not be deleted even if it becomes empty. - SE->forgetLoop(CurLoop); -} - -CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder, - Value *Val, DebugLoc DL) { - Value *Ops[] = { Val }; - Type *Tys[] = { Val->getType() }; - - Module *M = (*(CurLoop->block_begin()))->getParent()->getParent(); - Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys); - CallInst *CI = IRBuilder.CreateCall(Func, Ops); - CI->setDebugLoc(DL); - - return CI; -} - -/// recognize - detect population count idiom in a non-countable loop. If -/// detected, transform the relevant code to popcount intrinsic function -/// call, and return true; otherwise, return false. -bool NclPopcountRecognize::recognize() { - - if (!LIR.getTargetTransformInfo()) - return false; - - LIR.getScalarEvolution(); - - if (!preliminaryScreen()) - return false; - - Instruction *CntInst; - PHINode *CntPhi; - Value *Val; - if (!detectIdiom(CntInst, CntPhi, Val)) - return false; - - transform(CntInst, CntPhi, Val); - return true; -} - //===----------------------------------------------------------------------===// // // Implementation of LoopIdiomRecognize // //===----------------------------------------------------------------------===// -bool LoopIdiomRecognize::runOnCountableLoop() { - const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop); - assert(!isa(BECount) && - "runOnCountableLoop() called on a loop without a predictable" - "backedge-taken count"); - - // If this loop executes exactly one time, then it should be peeled, not - // optimized by this pass. - if (const SCEVConstant *BECst = dyn_cast(BECount)) - if (BECst->getValue()->getValue() == 0) - return false; - - // set DT - (void)getDominatorTree(); - - LoopInfo &LI = getAnalysis().getLoopInfo(); - TLI = &getAnalysis().getTLI(); - - // set TLI - (void)getTargetLibraryInfo(); - - SmallVector ExitBlocks; - CurLoop->getUniqueExitBlocks(ExitBlocks); - - DEBUG(dbgs() << "loop-idiom Scanning: F[" - << CurLoop->getHeader()->getParent()->getName() - << "] Loop %" << CurLoop->getHeader()->getName() << "\n"); - - bool MadeChange = false; - // Scan all the blocks in the loop that are not in subloops. - for (auto *BB : CurLoop->getBlocks()) { - // Ignore blocks in subloops. - if (LI.getLoopFor(BB) != CurLoop) - continue; - - MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks); - } - return MadeChange; -} - -bool LoopIdiomRecognize::runOnNoncountableLoop() { - NclPopcountRecognize Popcount(*this); - if (Popcount.recognize()) - return true; - - return false; -} - bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { if (skipOptnoneFunction(L)) return false; CurLoop = L; - // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. if (!L->getLoopPreheader()) @@ -673,17 +198,155 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { if (Name == "memset" || Name == "memcpy") return false; - SE = &getAnalysis(); + AA = &getAnalysis().getAAResults(); + DT = &getAnalysis().getDomTree(); + LI = &getAnalysis().getLoopInfo(); + SE = &getAnalysis().getSE(); + TLI = &getAnalysis().getTLI(); + TTI = &getAnalysis().getTTI( + *CurLoop->getHeader()->getParent()); + DL = &CurLoop->getHeader()->getModule()->getDataLayout(); + if (SE->hasLoopInvariantBackedgeTakenCount(L)) return runOnCountableLoop(); + return runOnNoncountableLoop(); } +bool LoopIdiomRecognize::runOnCountableLoop() { + const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop); + assert(!isa(BECount) && + "runOnCountableLoop() called on a loop without a predictable" + "backedge-taken count"); + + // If this loop executes exactly one time, then it should be peeled, not + // optimized by this pass. + if (const SCEVConstant *BECst = dyn_cast(BECount)) + if (BECst->getAPInt() == 0) + return false; + + SmallVector ExitBlocks; + CurLoop->getUniqueExitBlocks(ExitBlocks); + + DEBUG(dbgs() << "loop-idiom Scanning: F[" + << CurLoop->getHeader()->getParent()->getName() << "] Loop %" + << CurLoop->getHeader()->getName() << "\n"); + + bool MadeChange = false; + // Scan all the blocks in the loop that are not in subloops. + for (auto *BB : CurLoop->getBlocks()) { + // Ignore blocks in subloops. + if (LI->getLoopFor(BB) != CurLoop) + continue; + + MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks); + } + return MadeChange; +} + +static unsigned getStoreSizeInBytes(StoreInst *SI, const DataLayout *DL) { + uint64_t SizeInBits = DL->getTypeSizeInBits(SI->getValueOperand()->getType()); + assert(((SizeInBits & 7) || (SizeInBits >> 32) == 0) && + "Don't overflow unsigned."); + return (unsigned)SizeInBits >> 3; +} + +static unsigned getStoreStride(const SCEVAddRecExpr *StoreEv) { + const SCEVConstant *ConstStride = cast(StoreEv->getOperand(1)); + return ConstStride->getAPInt().getZExtValue(); +} + +/// getMemSetPatternValue - If a strided store of the specified value is safe to +/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should +/// be passed in. Otherwise, return null. +/// +/// Note that we don't ever attempt to use memset_pattern8 or 4, because these +/// just replicate their input array and then pass on to memset_pattern16. +static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) { + // If the value isn't a constant, we can't promote it to being in a constant + // array. We could theoretically do a store to an alloca or something, but + // that doesn't seem worthwhile. + Constant *C = dyn_cast(V); + if (!C) + return nullptr; + + // Only handle simple values that are a power of two bytes in size. + uint64_t Size = DL->getTypeSizeInBits(V->getType()); + if (Size == 0 || (Size & 7) || (Size & (Size - 1))) + return nullptr; + + // Don't care enough about darwin/ppc to implement this. + if (DL->isBigEndian()) + return nullptr; + + // Convert to size in bytes. + Size /= 8; + + // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see + // if the top and bottom are the same (e.g. for vectors and large integers). + if (Size > 16) + return nullptr; + + // If the constant is exactly 16 bytes, just use it. + if (Size == 16) + return C; + + // Otherwise, we'll use an array of the constants. + unsigned ArraySize = 16 / Size; + ArrayType *AT = ArrayType::get(V->getType(), ArraySize); + return ConstantArray::get(AT, std::vector(ArraySize, C)); +} + +bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) { + // Don't touch volatile stores. + if (!SI->isSimple()) + return false; + + Value *StoredVal = SI->getValueOperand(); + Value *StorePtr = SI->getPointerOperand(); + + // Reject stores that are so large that they overflow an unsigned. + uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); + if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) + return false; + + // See if the pointer expression is an AddRec like {base,+,1} on the current + // loop, which indicates a strided store. If we have something else, it's a + // random store we can't handle. + const SCEVAddRecExpr *StoreEv = + dyn_cast(SE->getSCEV(StorePtr)); + if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) + return false; + + // Check to see if we have a constant stride. + if (!isa(StoreEv->getOperand(1))) + return false; + + return true; +} + +void LoopIdiomRecognize::collectStores(BasicBlock *BB) { + StoreRefs.clear(); + for (Instruction &I : *BB) { + StoreInst *SI = dyn_cast(&I); + if (!SI) + continue; + + // Make sure this is a strided store with a constant stride. + if (!isLegalStore(SI)) + continue; + + // Save the store locations. + StoreRefs.push_back(SI); + } +} + /// runOnLoopBlock - Process the specified block, which lives in a counted loop /// with the specified backedge count. This block is known to be in the current /// loop and not in any subloops. -bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, - SmallVectorImpl &ExitBlocks) { +bool LoopIdiomRecognize::runOnLoopBlock( + BasicBlock *BB, const SCEV *BECount, + SmallVectorImpl &ExitBlocks) { // We can only promote stores in this block if they are unconditionally // executed in the loop. For a block to be unconditionally executed, it has // to dominate all the exit blocks of the loop. Verify this now. @@ -692,25 +355,18 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, return false; bool MadeChange = false; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { - Instruction *Inst = I++; - // Look for store instructions, which may be optimized to memset/memcpy. - if (StoreInst *SI = dyn_cast(Inst)) { - WeakVH InstPtr(I); - if (!processLoopStore(SI, BECount)) continue; - MadeChange = true; - - // If processing the store invalidated our iterator, start over from the - // top of the block. - if (!InstPtr) - I = BB->begin(); - continue; - } + // Look for store instructions, which may be optimized to memset/memcpy. + collectStores(BB); + for (auto &SI : StoreRefs) + MadeChange |= processLoopStore(SI, BECount); + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { + Instruction *Inst = &*I++; // Look for memset instructions, which may be optimized to a larger memset. - if (MemSetInst *MSI = dyn_cast(Inst)) { - WeakVH InstPtr(I); - if (!processLoopMemSet(MSI, BECount)) continue; + if (MemSetInst *MSI = dyn_cast(Inst)) { + WeakVH InstPtr(&*I); + if (!processLoopMemSet(MSI, BECount)) + continue; MadeChange = true; // If processing the memset invalidated our iterator, start over from the @@ -724,71 +380,38 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, return MadeChange; } - /// processLoopStore - See if this store can be promoted to a memset or memcpy. bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { - if (!SI->isSimple()) return false; + assert(SI->isSimple() && "Expected only non-volatile stores."); Value *StoredVal = SI->getValueOperand(); Value *StorePtr = SI->getPointerOperand(); - // Reject stores that are so large that they overflow an unsigned. - auto &DL = CurLoop->getHeader()->getModule()->getDataLayout(); - uint64_t SizeInBits = DL.getTypeSizeInBits(StoredVal->getType()); - if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) - return false; - - // See if the pointer expression is an AddRec like {base,+,1} on the current - // loop, which indicates a strided store. If we have something else, it's a - // random store we can't handle. - const SCEVAddRecExpr *StoreEv = - dyn_cast(SE->getSCEV(StorePtr)); - if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) - return false; - // Check to see if the stride matches the size of the store. If so, then we // know that every byte is touched in the loop. - unsigned StoreSize = (unsigned)SizeInBits >> 3; - const SCEVConstant *Stride = dyn_cast(StoreEv->getOperand(1)); - - if (!Stride || StoreSize != Stride->getValue()->getValue()) { - // TODO: Could also handle negative stride here someday, that will require - // the validity check in mayLoopAccessLocation to be updated though. - // Enable this to print exact negative strides. - if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) { - dbgs() << "NEGATIVE STRIDE: " << *SI << "\n"; - dbgs() << "BB: " << *SI->getParent(); - } - + const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); + unsigned Stride = getStoreStride(StoreEv); + unsigned StoreSize = getStoreSizeInBytes(SI, DL); + if (StoreSize != Stride && StoreSize != -Stride) return false; - } + + bool NegStride = StoreSize == -Stride; // See if we can optimize just this store in isolation. if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), - StoredVal, SI, StoreEv, BECount)) + StoredVal, SI, StoreEv, BECount, NegStride)) return true; - // If the stored value is a strided load in the same loop with the same stride - // this this may be transformable into a memcpy. This kicks in for stuff like - // for (i) A[i] = B[i]; - if (LoadInst *LI = dyn_cast(StoredVal)) { - const SCEVAddRecExpr *LoadEv = - dyn_cast(SE->getSCEV(LI->getOperand(0))); - if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() && - StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple()) - if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount)) - return true; - } - //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n"; - - return false; + // Optimize the store into a memcpy, if it feeds an similarly strided load. + return processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, BECount, NegStride); } /// processLoopMemSet - See if this memset can be promoted to a large memset. -bool LoopIdiomRecognize:: -processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { +bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, + const SCEV *BECount) { // We can only handle non-volatile memsets with a constant size. - if (MSI->isVolatile() || !isa(MSI->getLength())) return false; + if (MSI->isVolatile() || !isa(MSI->getLength())) + return false; // If we're not allowed to hack on memset, we fail. if (!TLI->has(LibFunc::memset)) @@ -818,17 +441,16 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { return false; return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, - MSI->getAlignment(), MSI->getValue(), - MSI, Ev, BECount); + MSI->getAlignment(), MSI->getValue(), MSI, Ev, + BECount, /*NegStride=*/false); } - /// mayLoopAccessLocation - Return true if the specified loop might access the /// specified pointer location, which is a loop-strided access. The 'Access' /// argument specifies what the verboten forms of access are (read or write). -static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access, - Loop *L, const SCEV *BECount, - unsigned StoreSize, AliasAnalysis &AA, +static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, + const SCEV *BECount, unsigned StoreSize, + AliasAnalysis &AA, Instruction *IgnoredStore) { // Get the location that may be stored across the loop. Since the access is // strided positively through memory, we say that the modified location starts @@ -838,7 +460,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access, // If the loop iterates a fixed number of times, we can refine the access size // to be exactly the size of the memset, which is (BECount+1)*StoreSize if (const SCEVConstant *BECst = dyn_cast(BECount)) - AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize; + AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize; // TODO: For this to be really effective, we have to dive into the pointer // operand in the store. Store to &A[i] of 100 will always return may alias @@ -849,59 +471,31 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access, for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; ++BI) for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) - if (&*I != IgnoredStore && - (AA.getModRefInfo(I, StoreLoc) & Access)) + if (&*I != IgnoredStore && (AA.getModRefInfo(&*I, StoreLoc) & Access)) return true; return false; } -/// getMemSetPatternValue - If a strided store of the specified value is safe to -/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should -/// be passed in. Otherwise, return null. -/// -/// Note that we don't ever attempt to use memset_pattern8 or 4, because these -/// just replicate their input array and then pass on to memset_pattern16. -static Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) { - // If the value isn't a constant, we can't promote it to being in a constant - // array. We could theoretically do a store to an alloca or something, but - // that doesn't seem worthwhile. - Constant *C = dyn_cast(V); - if (!C) return nullptr; - - // Only handle simple values that are a power of two bytes in size. - uint64_t Size = DL.getTypeSizeInBits(V->getType()); - if (Size == 0 || (Size & 7) || (Size & (Size-1))) - return nullptr; - - // Don't care enough about darwin/ppc to implement this. - if (DL.isBigEndian()) - return nullptr; - - // Convert to size in bytes. - Size /= 8; - - // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see - // if the top and bottom are the same (e.g. for vectors and large integers). - if (Size > 16) return nullptr; - - // If the constant is exactly 16 bytes, just use it. - if (Size == 16) return C; - - // Otherwise, we'll use an array of the constants. - unsigned ArraySize = 16/Size; - ArrayType *AT = ArrayType::get(V->getType(), ArraySize); - return ConstantArray::get(AT, std::vector(ArraySize, C)); +// If we have a negative stride, Start refers to the end of the memory location +// we're trying to memset. Therefore, we need to recompute the base pointer, +// which is just Start - BECount*Size. +static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount, + Type *IntPtr, unsigned StoreSize, + ScalarEvolution *SE) { + const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr); + if (StoreSize != 1) + Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize), + SCEV::FlagNUW); + return SE->getMinusSCEV(Start, Index); } - /// processLoopStridedStore - We see a strided store of some value. If we can /// transform this into a memset or memset_pattern in the loop preheader, do so. -bool LoopIdiomRecognize:: -processLoopStridedStore(Value *DestPtr, unsigned StoreSize, - unsigned StoreAlignment, Value *StoredVal, - Instruction *TheStore, const SCEVAddRecExpr *Ev, - const SCEV *BECount) { +bool LoopIdiomRecognize::processLoopStridedStore( + Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, + Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev, + const SCEV *BECount, bool NegStride) { // If the stored value is a byte-wise value (like i32 -1), then it may be // turned into a memset of i8 -1, assuming that all the consecutive bytes @@ -909,7 +503,6 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // but it can be turned into memset_pattern if the target supports it. Value *SplatValue = isBytewiseValue(StoredVal); Constant *PatternValue = nullptr; - auto &DL = CurLoop->getHeader()->getModule()->getDataLayout(); unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); // If we're allowed to form a memset, and the stored value would be acceptable @@ -936,9 +529,15 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // header. This allows us to insert code for it in the preheader. BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); - SCEVExpander Expander(*SE, DL, "loop-idiom"); + SCEVExpander Expander(*SE, *DL, "loop-idiom"); Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS); + Type *IntPtr = Builder.getIntPtrTy(*DL, DestAS); + + const SCEV *Start = Ev->getStart(); + // Handle negative strided loops. + if (NegStride) + Start = getStartForNegStride(Start, BECount, IntPtr, StoreSize, SE); // Okay, we have a strided store "p[i]" of a splattable value. We can turn // this into a memset in the loop preheader now if we want. However, this @@ -946,12 +545,9 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // or write to the aliased location. Check for any overlap by generating the // base pointer and checking the region. Value *BasePtr = - Expander.expandCodeFor(Ev->getStart(), DestInt8PtrTy, - Preheader->getTerminator()); - - if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef, - CurLoop, BECount, - StoreSize, getAnalysis(), TheStore)) { + Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator()); + if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize, + *AA, TheStore)) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI); @@ -962,36 +558,30 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. - Type *IntPtr = Builder.getIntPtrTy(DL, DestAS); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); - const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), - SCEV::FlagNUW); + const SCEV *NumBytesS = + SE->getAddExpr(BECount, SE->getOne(IntPtr), SCEV::FlagNUW); if (StoreSize != 1) { NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), SCEV::FlagNUW); } Value *NumBytes = - Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); + Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); CallInst *NewCall; if (SplatValue) { - NewCall = Builder.CreateMemSet(BasePtr, - SplatValue, - NumBytes, - StoreAlignment); + NewCall = + Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment); } else { // Everything is emitted in default address space Type *Int8PtrTy = DestInt8PtrTy; - Module *M = TheStore->getParent()->getParent()->getParent(); - Value *MSP = M->getOrInsertFunction("memset_pattern16", - Builder.getVoidTy(), - Int8PtrTy, - Int8PtrTy, - IntPtr, - (void*)nullptr); + Module *M = TheStore->getModule(); + Value *MSP = + M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(), + Int8PtrTy, Int8PtrTy, IntPtr, (void *)nullptr); // Otherwise we should form a memset_pattern16. PatternValue is known to be // an constant array of 16-bytes. Plop the value into a mergable global. @@ -1015,26 +605,47 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, return true; } -/// processLoopStoreOfLoopLoad - We see a strided store whose value is a -/// same-strided load. -bool LoopIdiomRecognize:: -processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, - const SCEVAddRecExpr *StoreEv, - const SCEVAddRecExpr *LoadEv, - const SCEV *BECount) { +/// If the stored value is a strided load in the same loop with the same stride +/// this may be transformable into a memcpy. This kicks in for stuff like +/// for (i) A[i] = B[i]; +bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( + StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv, + const SCEV *BECount, bool NegStride) { // If we're not allowed to form memcpy, we fail. if (!TLI->has(LibFunc::memcpy)) return false; - LoadInst *LI = cast(SI->getValueOperand()); + // The store must be feeding a non-volatile load. + LoadInst *LI = dyn_cast(SI->getValueOperand()); + if (!LI || !LI->isSimple()) + return false; + + // See if the pointer expression is an AddRec like {base,+,1} on the current + // loop, which indicates a strided load. If we have something else, it's a + // random load we can't handle. + const SCEVAddRecExpr *LoadEv = + dyn_cast(SE->getSCEV(LI->getPointerOperand())); + if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) + return false; + + // The store and load must share the same stride. + if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) + return false; // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. This allows us to insert code for it in the preheader. BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); - const DataLayout &DL = Preheader->getModule()->getDataLayout(); - SCEVExpander Expander(*SE, DL, "loop-idiom"); + SCEVExpander Expander(*SE, *DL, "loop-idiom"); + + const SCEV *StrStart = StoreEv->getStart(); + unsigned StrAS = SI->getPointerAddressSpace(); + Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS); + + // Handle negative strided loops. + if (NegStride) + StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE); // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this @@ -1042,29 +653,31 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // or write the memory region we're storing to. This includes the load that // feeds the stores. Check for an alias by generating the base address and // checking everything. - Value *StoreBasePtr = - Expander.expandCodeFor(StoreEv->getStart(), - Builder.getInt8PtrTy(SI->getPointerAddressSpace()), - Preheader->getTerminator()); + Value *StoreBasePtr = Expander.expandCodeFor( + StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator()); - if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef, - CurLoop, BECount, StoreSize, - getAnalysis(), SI)) { + if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount, + StoreSize, *AA, SI)) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); return false; } + const SCEV *LdStart = LoadEv->getStart(); + unsigned LdAS = LI->getPointerAddressSpace(); + + // Handle negative strided loops. + if (NegStride) + LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE); + // For a memcpy, we have to make sure that the input array is not being // mutated by the loop. - Value *LoadBasePtr = - Expander.expandCodeFor(LoadEv->getStart(), - Builder.getInt8PtrTy(LI->getPointerAddressSpace()), - Preheader->getTerminator()); + Value *LoadBasePtr = Expander.expandCodeFor( + LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator()); - if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount, - StoreSize, getAnalysis(), SI)) { + if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize, + *AA, SI)) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); @@ -1074,34 +687,368 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // Okay, everything is safe, we can transform this! - // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. - Type *IntPtrTy = Builder.getIntPtrTy(DL, SI->getPointerAddressSpace()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy); - const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtrTy, 1), - SCEV::FlagNUW); + const SCEV *NumBytesS = + SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW); if (StoreSize != 1) NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize), SCEV::FlagNUW); Value *NumBytes = - Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); + Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); CallInst *NewCall = - Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, - std::min(SI->getAlignment(), LI->getAlignment())); + Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, + std::min(SI->getAlignment(), LI->getAlignment())); NewCall->setDebugLoc(SI->getDebugLoc()); DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); - - // Okay, the memset has been formed. Zap the original store and anything that + // Okay, the memcpy has been formed. Zap the original store and anything that // feeds into it. deleteDeadInstruction(SI, TLI); ++NumMemCpy; return true; } + +bool LoopIdiomRecognize::runOnNoncountableLoop() { + return recognizePopcount(); +} + +/// Check if the given conditional branch is based on the comparison between +/// a variable and zero, and if the variable is non-zero, the control yields to +/// the loop entry. If the branch matches the behavior, the variable involved +/// in the comparion is returned. This function will be called to see if the +/// precondition and postcondition of the loop are in desirable form. +static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) { + if (!BI || !BI->isConditional()) + return nullptr; + + ICmpInst *Cond = dyn_cast(BI->getCondition()); + if (!Cond) + return nullptr; + + ConstantInt *CmpZero = dyn_cast(Cond->getOperand(1)); + if (!CmpZero || !CmpZero->isZero()) + return nullptr; + + ICmpInst::Predicate Pred = Cond->getPredicate(); + if ((Pred == ICmpInst::ICMP_NE && BI->getSuccessor(0) == LoopEntry) || + (Pred == ICmpInst::ICMP_EQ && BI->getSuccessor(1) == LoopEntry)) + return Cond->getOperand(0); + + return nullptr; +} + +/// Return true iff the idiom is detected in the loop. +/// +/// Additionally: +/// 1) \p CntInst is set to the instruction counting the population bit. +/// 2) \p CntPhi is set to the corresponding phi node. +/// 3) \p Var is set to the value whose population bits are being counted. +/// +/// The core idiom we are trying to detect is: +/// \code +/// if (x0 != 0) +/// goto loop-exit // the precondition of the loop +/// cnt0 = init-val; +/// do { +/// x1 = phi (x0, x2); +/// cnt1 = phi(cnt0, cnt2); +/// +/// cnt2 = cnt1 + 1; +/// ... +/// x2 = x1 & (x1 - 1); +/// ... +/// } while(x != 0); +/// +/// loop-exit: +/// \endcode +static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, + Instruction *&CntInst, PHINode *&CntPhi, + Value *&Var) { + // step 1: Check to see if the look-back branch match this pattern: + // "if (a!=0) goto loop-entry". + BasicBlock *LoopEntry; + Instruction *DefX2, *CountInst; + Value *VarX1, *VarX0; + PHINode *PhiX, *CountPhi; + + DefX2 = CountInst = nullptr; + VarX1 = VarX0 = nullptr; + PhiX = CountPhi = nullptr; + LoopEntry = *(CurLoop->block_begin()); + + // step 1: Check if the loop-back branch is in desirable form. + { + if (Value *T = matchCondition( + dyn_cast(LoopEntry->getTerminator()), LoopEntry)) + DefX2 = dyn_cast(T); + else + return false; + } + + // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)" + { + if (!DefX2 || DefX2->getOpcode() != Instruction::And) + return false; + + BinaryOperator *SubOneOp; + + if ((SubOneOp = dyn_cast(DefX2->getOperand(0)))) + VarX1 = DefX2->getOperand(1); + else { + VarX1 = DefX2->getOperand(0); + SubOneOp = dyn_cast(DefX2->getOperand(1)); + } + if (!SubOneOp) + return false; + + Instruction *SubInst = cast(SubOneOp); + ConstantInt *Dec = dyn_cast(SubInst->getOperand(1)); + if (!Dec || + !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) || + (SubInst->getOpcode() == Instruction::Add && + Dec->isAllOnesValue()))) { + return false; + } + } + + // step 3: Check the recurrence of variable X + { + PhiX = dyn_cast(VarX1); + if (!PhiX || + (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) { + return false; + } + } + + // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1 + { + CountInst = nullptr; + for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(), + IterE = LoopEntry->end(); + Iter != IterE; Iter++) { + Instruction *Inst = &*Iter; + if (Inst->getOpcode() != Instruction::Add) + continue; + + ConstantInt *Inc = dyn_cast(Inst->getOperand(1)); + if (!Inc || !Inc->isOne()) + continue; + + PHINode *Phi = dyn_cast(Inst->getOperand(0)); + if (!Phi || Phi->getParent() != LoopEntry) + continue; + + // Check if the result of the instruction is live of the loop. + bool LiveOutLoop = false; + for (User *U : Inst->users()) { + if ((cast(U))->getParent() != LoopEntry) { + LiveOutLoop = true; + break; + } + } + + if (LiveOutLoop) { + CountInst = Inst; + CountPhi = Phi; + break; + } + } + + if (!CountInst) + return false; + } + + // step 5: check if the precondition is in this form: + // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;" + { + auto *PreCondBr = dyn_cast(PreCondBB->getTerminator()); + Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader()); + if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1)) + return false; + + CntInst = CountInst; + CntPhi = CountPhi; + Var = T; + } + + return true; +} + +/// Recognizes a population count idiom in a non-countable loop. +/// +/// If detected, transforms the relevant code to issue the popcount intrinsic +/// function call, and returns true; otherwise, returns false. +bool LoopIdiomRecognize::recognizePopcount() { + if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware) + return false; + + // Counting population are usually conducted by few arithmetic instructions. + // Such instructions can be easily "absorbed" by vacant slots in a + // non-compact loop. Therefore, recognizing popcount idiom only makes sense + // in a compact loop. + + // Give up if the loop has multiple blocks or multiple backedges. + if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) + return false; + + BasicBlock *LoopBody = *(CurLoop->block_begin()); + if (LoopBody->size() >= 20) { + // The loop is too big, bail out. + return false; + } + + // It should have a preheader containing nothing but an unconditional branch. + BasicBlock *PH = CurLoop->getLoopPreheader(); + if (!PH) + return false; + if (&PH->front() != PH->getTerminator()) + return false; + auto *EntryBI = dyn_cast(PH->getTerminator()); + if (!EntryBI || EntryBI->isConditional()) + return false; + + // It should have a precondition block where the generated popcount instrinsic + // function can be inserted. + auto *PreCondBB = PH->getSinglePredecessor(); + if (!PreCondBB) + return false; + auto *PreCondBI = dyn_cast(PreCondBB->getTerminator()); + if (!PreCondBI || PreCondBI->isUnconditional()) + return false; + + Instruction *CntInst; + PHINode *CntPhi; + Value *Val; + if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val)) + return false; + + transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val); + return true; +} + +static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, + DebugLoc DL) { + Value *Ops[] = {Val}; + Type *Tys[] = {Val->getType()}; + + Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent(); + Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys); + CallInst *CI = IRBuilder.CreateCall(Func, Ops); + CI->setDebugLoc(DL); + + return CI; +} + +void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB, + Instruction *CntInst, + PHINode *CntPhi, Value *Var) { + BasicBlock *PreHead = CurLoop->getLoopPreheader(); + auto *PreCondBr = dyn_cast(PreCondBB->getTerminator()); + const DebugLoc DL = CntInst->getDebugLoc(); + + // Assuming before transformation, the loop is following: + // if (x) // the precondition + // do { cnt++; x &= x - 1; } while(x); + + // Step 1: Insert the ctpop instruction at the end of the precondition block + IRBuilder<> Builder(PreCondBr); + Value *PopCnt, *PopCntZext, *NewCount, *TripCnt; + { + PopCnt = createPopcntIntrinsic(Builder, Var, DL); + NewCount = PopCntZext = + Builder.CreateZExtOrTrunc(PopCnt, cast(CntPhi->getType())); + + if (NewCount != PopCnt) + (cast(NewCount))->setDebugLoc(DL); + + // TripCnt is exactly the number of iterations the loop has + TripCnt = NewCount; + + // If the population counter's initial value is not zero, insert Add Inst. + Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead); + ConstantInt *InitConst = dyn_cast(CntInitVal); + if (!InitConst || !InitConst->isZero()) { + NewCount = Builder.CreateAdd(NewCount, CntInitVal); + (cast(NewCount))->setDebugLoc(DL); + } + } + + // Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to + // "if (NewCount == 0) loop-exit". Without this change, the intrinsic + // function would be partial dead code, and downstream passes will drag + // it back from the precondition block to the preheader. + { + ICmpInst *PreCond = cast(PreCondBr->getCondition()); + + Value *Opnd0 = PopCntZext; + Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0); + if (PreCond->getOperand(0) != Var) + std::swap(Opnd0, Opnd1); + + ICmpInst *NewPreCond = cast( + Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1)); + PreCondBr->setCondition(NewPreCond); + + RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI); + } + + // Step 3: Note that the population count is exactly the trip count of the + // loop in question, which enable us to to convert the loop from noncountable + // loop into a countable one. The benefit is twofold: + // + // - If the loop only counts population, the entire loop becomes dead after + // the transformation. It is a lot easier to prove a countable loop dead + // than to prove a noncountable one. (In some C dialects, an infinite loop + // isn't dead even if it computes nothing useful. In general, DCE needs + // to prove a noncountable loop finite before safely delete it.) + // + // - If the loop also performs something else, it remains alive. + // Since it is transformed to countable form, it can be aggressively + // optimized by some optimizations which are in general not applicable + // to a noncountable loop. + // + // After this step, this loop (conceptually) would look like following: + // newcnt = __builtin_ctpop(x); + // t = newcnt; + // if (x) + // do { cnt++; x &= x-1; t--) } while (t > 0); + BasicBlock *Body = *(CurLoop->block_begin()); + { + auto *LbBr = dyn_cast(Body->getTerminator()); + ICmpInst *LbCond = cast(LbBr->getCondition()); + Type *Ty = TripCnt->getType(); + + PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front()); + + Builder.SetInsertPoint(LbCond); + Instruction *TcDec = cast( + Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1), + "tcdec", false, true)); + + TcPhi->addIncoming(TripCnt, PreHead); + TcPhi->addIncoming(TcDec, Body); + + CmpInst::Predicate Pred = + (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE; + LbCond->setPredicate(Pred); + LbCond->setOperand(0, TcDec); + LbCond->setOperand(1, ConstantInt::get(Ty, 0)); + } + + // Step 4: All the references to the original population counter outside + // the loop are replaced with the NewCount -- the value returned from + // __builtin_ctpop(). + CntInst->replaceUsesOutsideBlock(NewCount, Body); + + // step 5: Forget the "non-computable" trip-count SCEV associated with the + // loop. The loop would otherwise not be deleted even if it becomes empty. + SE->forgetLoop(CurLoop); +} diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index e12502654751..b4102fe9ba34 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -48,7 +48,7 @@ namespace { AU.addRequiredID(LoopSimplifyID); AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); - AU.addPreserved(); + AU.addPreserved(); AU.addRequired(); } }; @@ -112,7 +112,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Simplify instructions in the current basic block. for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - Instruction *I = BI++; + Instruction *I = &*BI++; // The first time through the loop ToSimplify is empty and we try to // simplify all instructions. On later iterations ToSimplify is not diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp index 9d7e57ffebac..4295235a3f36 100644 --- a/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/lib/Transforms/Scalar/LoopInterchange.cpp @@ -99,7 +99,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, return false; if (St && !St->isSimple()) return false; - MemInstr.push_back(I); + MemInstr.push_back(&*I); } } @@ -176,7 +176,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, } } - // We don't have a DepMatrix to check legality return false + // We don't have a DepMatrix to check legality return false. if (DepMatrix.size() == 0) return false; return true; @@ -331,9 +331,9 @@ static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) { class LoopInterchangeLegality { public: LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, - LoopInterchange *Pass) - : OuterLoop(Outer), InnerLoop(Inner), SE(SE), CurrentPass(Pass), - InnerLoopHasReduction(false) {} + LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA) + : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), + PreserveLCSSA(PreserveLCSSA), InnerLoopHasReduction(false) {} /// Check if the loops can be interchanged. bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, @@ -357,9 +357,10 @@ private: Loop *OuterLoop; Loop *InnerLoop; - /// Scev analysis. ScalarEvolution *SE; - LoopInterchange *CurrentPass; + LoopInfo *LI; + DominatorTree *DT; + bool PreserveLCSSA; bool InnerLoopHasReduction; }; @@ -371,7 +372,7 @@ public: LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE) : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {} - /// Check if the loop interchange is profitable + /// Check if the loop interchange is profitable. bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix); @@ -385,12 +386,12 @@ private: ScalarEvolution *SE; }; -/// LoopInterchangeTransform interchanges the loop +/// LoopInterchangeTransform interchanges the loop. class LoopInterchangeTransform { public: LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, - LoopInterchange *Pass, BasicBlock *LoopNestExit, + BasicBlock *LoopNestExit, bool InnerLoopContainsReductions) : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LoopExit(LoopNestExit), @@ -424,21 +425,22 @@ private: bool InnerLoopHasReduction; }; -// Main LoopInterchange Pass +// Main LoopInterchange Pass. struct LoopInterchange : public FunctionPass { static char ID; ScalarEvolution *SE; LoopInfo *LI; DependenceAnalysis *DA; DominatorTree *DT; + bool PreserveLCSSA; LoopInterchange() : FunctionPass(ID), SE(nullptr), LI(nullptr), DA(nullptr), DT(nullptr) { initializeLoopInterchangePass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -447,11 +449,13 @@ struct LoopInterchange : public FunctionPass { } bool runOnFunction(Function &F) override { - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); LI = &getAnalysis().getLoopInfo(); DA = &getAnalysis(); auto *DTWP = getAnalysisIfAvailable(); DT = DTWP ? &DTWP->getDomTree() : nullptr; + PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + // Build up a worklist of loop pairs to analyze. SmallVector Worklist; @@ -489,7 +493,7 @@ struct LoopInterchange : public FunctionPass { unsigned selectLoopForInterchange(LoopVector LoopList) { // TODO: Add a better heuristic to select the loop to be interchanged based - // on the dependece matrix. Currently we select the innermost loop. + // on the dependence matrix. Currently we select the innermost loop. return LoopList.size() - 1; } @@ -544,7 +548,7 @@ struct LoopInterchange : public FunctionPass { } unsigned SelecLoopId = selectLoopForInterchange(LoopList); - // Move the selected loop outwards to the best posible position. + // Move the selected loop outwards to the best possible position. for (unsigned i = SelecLoopId; i > 0; i--) { bool Interchanged = processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix); @@ -574,7 +578,8 @@ struct LoopInterchange : public FunctionPass { Loop *InnerLoop = LoopList[InnerLoopId]; Loop *OuterLoop = LoopList[OuterLoopId]; - LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, this); + LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT, + PreserveLCSSA); if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n"); return false; @@ -586,7 +591,7 @@ struct LoopInterchange : public FunctionPass { return false; } - LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, this, + LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit, LIL.hasInnerLoopReduction()); LIT.transform(); DEBUG(dbgs() << "Loops interchanged\n"); @@ -655,7 +660,7 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch \n"); // We do not have any basic block in between now make sure the outer header - // and outer loop latch doesnt contain any unsafe instructions. + // and outer loop latch doesn't contain any unsafe instructions. if (containsUnsafeInstructionsInHeader(OuterLoopHeader) || containsUnsafeInstructionsInLatch(OuterLoopLatch)) return false; @@ -698,9 +703,9 @@ bool LoopInterchangeLegality::findInductionAndReductions( return false; for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) { RecurrenceDescriptor RD; + InductionDescriptor ID; PHINode *PHI = cast(I); - ConstantInt *StepValue = nullptr; - if (isInductionPHI(PHI, SE, StepValue)) + if (InductionDescriptor::isInductionPHI(PHI, SE, ID)) Inductions.push_back(PHI); else if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) Reductions.push_back(PHI); @@ -836,7 +841,7 @@ bool LoopInterchangeLegality::currentLimitations() { else FoundInduction = true; } - // The loop latch ended and we didnt find the induction variable return as + // The loop latch ended and we didn't find the induction variable return as // current limitation. if (!FoundInduction) return true; @@ -867,12 +872,14 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() || isa(OuterLoopPreHeader->begin()) || !OuterLoopPreHeader->getUniquePredecessor()) { - OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, CurrentPass); + OuterLoopPreHeader = + InsertPreheaderForLoop(OuterLoop, DT, LI, PreserveLCSSA); } if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() || InnerLoopPreHeader == OuterLoop->getHeader()) { - InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, CurrentPass); + InnerLoopPreHeader = + InsertPreheaderForLoop(InnerLoop, DT, LI, PreserveLCSSA); } // TODO: The loops could not be interchanged due to current limitations in the @@ -966,7 +973,7 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { - // TODO: Add Better Profitibility checks. + // TODO: Add better profitability checks. // e.g // 1) Construct dependency matrix and move the one with no loop carried dep // inside to enable vectorization. @@ -980,7 +987,7 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, if (Cost < 0) return true; - // It is not profitable as per current cache profitibility model. But check if + // It is not profitable as per current cache profitability model. But check if // we can move this loop outside to improve parallelism. bool ImprovesPar = isProfitabileForVectorization(InnerLoopId, OuterLoopId, DepMatrix); @@ -996,7 +1003,7 @@ void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop, return; } } - assert(false && "Couldn't find loop"); + llvm_unreachable("Couldn't find loop"); } void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop, @@ -1045,7 +1052,7 @@ bool LoopInterchangeTransform::transform() { splitInnerLoopLatch(InnerIndexVar); DEBUG(dbgs() << "splitInnerLoopLatch Done\n"); - // Splits the inner loops phi nodes out into a seperate basic block. + // Splits the inner loops phi nodes out into a separate basic block. splitInnerLoopHeader(); DEBUG(dbgs() << "splitInnerLoopHeader Done\n"); } @@ -1113,8 +1120,8 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { auto &ToList = InsertBefore->getParent()->getInstList(); auto &FromList = FromBB->getInstList(); - ToList.splice(InsertBefore, FromList, FromList.begin(), - FromBB->getTerminator()); + ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(), + FromBB->getTerminator()->getIterator()); } void LoopInterchangeTransform::adjustOuterLoopPreheader() { @@ -1181,8 +1188,8 @@ bool LoopInterchangeTransform::adjustLoopBranches() { if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI) return false; - BasicBlock *InnerLoopHeaderSucessor = InnerLoopHeader->getUniqueSuccessor(); - if (!InnerLoopHeaderSucessor) + BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor(); + if (!InnerLoopHeaderSuccessor) return false; // Adjust Loop Preheader and headers @@ -1198,11 +1205,11 @@ bool LoopInterchangeTransform::adjustLoopBranches() { if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch) OuterLoopHeaderBI->setSuccessor(i, LoopExit); else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader) - OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSucessor); + OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSuccessor); } // Adjust reduction PHI's now that the incoming block has changed. - updateIncomingBlock(InnerLoopHeaderSucessor, InnerLoopHeader, + updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader, OuterLoopHeader); BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI); @@ -1286,10 +1293,10 @@ bool LoopInterchangeTransform::adjustLoopLinks() { char LoopInterchange::ID = 0; INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange", "Interchanges loops for cache reuse", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp new file mode 100644 index 000000000000..1064d088514d --- /dev/null +++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -0,0 +1,566 @@ +//===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implement a loop-aware load elimination pass. +// +// It uses LoopAccessAnalysis to identify loop-carried dependences with a +// distance of one between stores and loads. These form the candidates for the +// transformation. The source value of each store then propagated to the user +// of the corresponding load. This makes the load dead. +// +// The pass can also version the loop and add memchecks in order to prove that +// may-aliasing stores can't change the value in memory before it's read by the +// load. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" +#include + +#define LLE_OPTION "loop-load-elim" +#define DEBUG_TYPE LLE_OPTION + +using namespace llvm; + +static cl::opt CheckPerElim( + "runtime-check-per-loop-load-elim", cl::Hidden, + cl::desc("Max number of memchecks allowed per eliminated load on average"), + cl::init(1)); + +static cl::opt LoadElimSCEVCheckThreshold( + "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden, + cl::desc("The maximum number of SCEV checks allowed for Loop " + "Load Elimination")); + + +STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE"); + +namespace { + +/// \brief Represent a store-to-forwarding candidate. +struct StoreToLoadForwardingCandidate { + LoadInst *Load; + StoreInst *Store; + + StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store) + : Load(Load), Store(Store) {} + + /// \brief Return true if the dependence from the store to the load has a + /// distance of one. E.g. A[i+1] = A[i] + bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE) const { + Value *LoadPtr = Load->getPointerOperand(); + Value *StorePtr = Store->getPointerOperand(); + Type *LoadPtrType = LoadPtr->getType(); + Type *LoadType = LoadPtrType->getPointerElementType(); + + assert(LoadPtrType->getPointerAddressSpace() == + StorePtr->getType()->getPointerAddressSpace() && + LoadType == StorePtr->getType()->getPointerElementType() && + "Should be a known dependence"); + + auto &DL = Load->getParent()->getModule()->getDataLayout(); + unsigned TypeByteSize = DL.getTypeAllocSize(const_cast(LoadType)); + + auto *LoadPtrSCEV = cast(PSE.getSCEV(LoadPtr)); + auto *StorePtrSCEV = cast(PSE.getSCEV(StorePtr)); + + // We don't need to check non-wrapping here because forward/backward + // dependence wouldn't be valid if these weren't monotonic accesses. + auto *Dist = cast( + PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV)); + const APInt &Val = Dist->getAPInt(); + return Val.abs() == TypeByteSize; + } + + Value *getLoadPtr() const { return Load->getPointerOperand(); } + +#ifndef NDEBUG + friend raw_ostream &operator<<(raw_ostream &OS, + const StoreToLoadForwardingCandidate &Cand) { + OS << *Cand.Store << " -->\n"; + OS.indent(2) << *Cand.Load << "\n"; + return OS; + } +#endif +}; + +/// \brief Check if the store dominates all latches, so as long as there is no +/// intervening store this value will be loaded in the next iteration. +bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, + DominatorTree *DT) { + SmallVector Latches; + L->getLoopLatches(Latches); + return std::all_of(Latches.begin(), Latches.end(), + [&](const BasicBlock *Latch) { + return DT->dominates(StoreBlock, Latch); + }); +} + +/// \brief The per-loop class that does most of the work. +class LoadEliminationForLoop { +public: + LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI, + DominatorTree *DT) + : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.PSE) {} + + /// \brief Look through the loop-carried and loop-independent dependences in + /// this loop and find store->load dependences. + /// + /// Note that no candidate is returned if LAA has failed to analyze the loop + /// (e.g. if it's not bottom-tested, contains volatile memops, etc.) + std::forward_list + findStoreToLoadDependences(const LoopAccessInfo &LAI) { + std::forward_list Candidates; + + const auto *Deps = LAI.getDepChecker().getDependences(); + if (!Deps) + return Candidates; + + // Find store->load dependences (consequently true dep). Both lexically + // forward and backward dependences qualify. Disqualify loads that have + // other unknown dependences. + + SmallSet LoadsWithUnknownDepedence; + + for (const auto &Dep : *Deps) { + Instruction *Source = Dep.getSource(LAI); + Instruction *Destination = Dep.getDestination(LAI); + + if (Dep.Type == MemoryDepChecker::Dependence::Unknown) { + if (isa(Source)) + LoadsWithUnknownDepedence.insert(Source); + if (isa(Destination)) + LoadsWithUnknownDepedence.insert(Destination); + continue; + } + + if (Dep.isBackward()) + // Note that the designations source and destination follow the program + // order, i.e. source is always first. (The direction is given by the + // DepType.) + std::swap(Source, Destination); + else + assert(Dep.isForward() && "Needs to be a forward dependence"); + + auto *Store = dyn_cast(Source); + if (!Store) + continue; + auto *Load = dyn_cast(Destination); + if (!Load) + continue; + Candidates.emplace_front(Load, Store); + } + + if (!LoadsWithUnknownDepedence.empty()) + Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) { + return LoadsWithUnknownDepedence.count(C.Load); + }); + + return Candidates; + } + + /// \brief Return the index of the instruction according to program order. + unsigned getInstrIndex(Instruction *Inst) { + auto I = InstOrder.find(Inst); + assert(I != InstOrder.end() && "No index for instruction"); + return I->second; + } + + /// \brief If a load has multiple candidates associated (i.e. different + /// stores), it means that it could be forwarding from multiple stores + /// depending on control flow. Remove these candidates. + /// + /// Here, we rely on LAA to include the relevant loop-independent dependences. + /// LAA is known to omit these in the very simple case when the read and the + /// write within an alias set always takes place using the *same* pointer. + /// + /// However, we know that this is not the case here, i.e. we can rely on LAA + /// to provide us with loop-independent dependences for the cases we're + /// interested. Consider the case for example where a loop-independent + /// dependece S1->S2 invalidates the forwarding S3->S2. + /// + /// A[i] = ... (S1) + /// ... = A[i] (S2) + /// A[i+1] = ... (S3) + /// + /// LAA will perform dependence analysis here because there are two + /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]). + void removeDependencesFromMultipleStores( + std::forward_list &Candidates) { + // If Store is nullptr it means that we have multiple stores forwarding to + // this store. + typedef DenseMap + LoadToSingleCandT; + LoadToSingleCandT LoadToSingleCand; + + for (const auto &Cand : Candidates) { + bool NewElt; + LoadToSingleCandT::iterator Iter; + + std::tie(Iter, NewElt) = + LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand)); + if (!NewElt) { + const StoreToLoadForwardingCandidate *&OtherCand = Iter->second; + // Already multiple stores forward to this load. + if (OtherCand == nullptr) + continue; + + // Handle the very basic of case when the two stores are in the same + // block so deciding which one forwards is easy. The later one forwards + // as long as they both have a dependence distance of one to the load. + if (Cand.Store->getParent() == OtherCand->Store->getParent() && + Cand.isDependenceDistanceOfOne(PSE) && + OtherCand->isDependenceDistanceOfOne(PSE)) { + // They are in the same block, the later one will forward to the load. + if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store)) + OtherCand = &Cand; + } else + OtherCand = nullptr; + } + } + + Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) { + if (LoadToSingleCand[Cand.Load] != &Cand) { + DEBUG(dbgs() << "Removing from candidates: \n" << Cand + << " The load may have multiple stores forwarding to " + << "it\n"); + return true; + } + return false; + }); + } + + /// \brief Given two pointers operations by their RuntimePointerChecking + /// indices, return true if they require an alias check. + /// + /// We need a check if one is a pointer for a candidate load and the other is + /// a pointer for a possibly intervening store. + bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2, + const SmallSet &PtrsWrittenOnFwdingPath, + const std::set &CandLoadPtrs) { + Value *Ptr1 = + LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx1).PointerValue; + Value *Ptr2 = + LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx2).PointerValue; + return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) || + (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1))); + } + + /// \brief Return pointers that are possibly written to on the path from a + /// forwarding store to a load. + /// + /// These pointers need to be alias-checked against the forwarding candidates. + SmallSet findPointersWrittenOnForwardingPath( + const SmallVectorImpl &Candidates) { + // From FirstStore to LastLoad neither of the elimination candidate loads + // should overlap with any of the stores. + // + // E.g.: + // + // st1 C[i] + // ld1 B[i] <-------, + // ld0 A[i] <----, | * LastLoad + // ... | | + // st2 E[i] | | + // st3 B[i+1] -- | -' * FirstStore + // st0 A[i+1] ---' + // st4 D[i] + // + // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with + // ld0. + + LoadInst *LastLoad = + std::max_element(Candidates.begin(), Candidates.end(), + [&](const StoreToLoadForwardingCandidate &A, + const StoreToLoadForwardingCandidate &B) { + return getInstrIndex(A.Load) < getInstrIndex(B.Load); + }) + ->Load; + StoreInst *FirstStore = + std::min_element(Candidates.begin(), Candidates.end(), + [&](const StoreToLoadForwardingCandidate &A, + const StoreToLoadForwardingCandidate &B) { + return getInstrIndex(A.Store) < + getInstrIndex(B.Store); + }) + ->Store; + + // We're looking for stores after the first forwarding store until the end + // of the loop, then from the beginning of the loop until the last + // forwarded-to load. Collect the pointer for the stores. + SmallSet PtrsWrittenOnFwdingPath; + + auto InsertStorePtr = [&](Instruction *I) { + if (auto *S = dyn_cast(I)) + PtrsWrittenOnFwdingPath.insert(S->getPointerOperand()); + }; + const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions(); + std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1, + MemInstrs.end(), InsertStorePtr); + std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)], + InsertStorePtr); + + return PtrsWrittenOnFwdingPath; + } + + /// \brief Determine the pointer alias checks to prove that there are no + /// intervening stores. + SmallVector collectMemchecks( + const SmallVectorImpl &Candidates) { + + SmallSet PtrsWrittenOnFwdingPath = + findPointersWrittenOnForwardingPath(Candidates); + + // Collect the pointers of the candidate loads. + // FIXME: SmallSet does not work with std::inserter. + std::set CandLoadPtrs; + std::transform(Candidates.begin(), Candidates.end(), + std::inserter(CandLoadPtrs, CandLoadPtrs.begin()), + std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr)); + + const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks(); + SmallVector Checks; + + std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks), + [&](const RuntimePointerChecking::PointerCheck &Check) { + for (auto PtrIdx1 : Check.first->Members) + for (auto PtrIdx2 : Check.second->Members) + if (needsChecking(PtrIdx1, PtrIdx2, + PtrsWrittenOnFwdingPath, CandLoadPtrs)) + return true; + return false; + }); + + DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size() << "):\n"); + DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks)); + + return Checks; + } + + /// \brief Perform the transformation for a candidate. + void + propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand, + SCEVExpander &SEE) { + // + // loop: + // %x = load %gep_i + // = ... %x + // store %y, %gep_i_plus_1 + // + // => + // + // ph: + // %x.initial = load %gep_0 + // loop: + // %x.storeforward = phi [%x.initial, %ph] [%y, %loop] + // %x = load %gep_i <---- now dead + // = ... %x.storeforward + // store %y, %gep_i_plus_1 + + Value *Ptr = Cand.Load->getPointerOperand(); + auto *PtrSCEV = cast(PSE.getSCEV(Ptr)); + auto *PH = L->getLoopPreheader(); + Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(), + PH->getTerminator()); + Value *Initial = + new LoadInst(InitialPtr, "load_initial", PH->getTerminator()); + PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded", + &L->getHeader()->front()); + PHI->addIncoming(Initial, PH); + PHI->addIncoming(Cand.Store->getOperand(0), L->getLoopLatch()); + + Cand.Load->replaceAllUsesWith(PHI); + } + + /// \brief Top-level driver for each loop: find store->load forwarding + /// candidates, add run-time checks and perform transformation. + bool processLoop() { + DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName() + << "\" checking " << *L << "\n"); + // Look for store-to-load forwarding cases across the + // backedge. E.g.: + // + // loop: + // %x = load %gep_i + // = ... %x + // store %y, %gep_i_plus_1 + // + // => + // + // ph: + // %x.initial = load %gep_0 + // loop: + // %x.storeforward = phi [%x.initial, %ph] [%y, %loop] + // %x = load %gep_i <---- now dead + // = ... %x.storeforward + // store %y, %gep_i_plus_1 + + // First start with store->load dependences. + auto StoreToLoadDependences = findStoreToLoadDependences(LAI); + if (StoreToLoadDependences.empty()) + return false; + + // Generate an index for each load and store according to the original + // program order. This will be used later. + InstOrder = LAI.getDepChecker().generateInstructionOrderMap(); + + // To keep things simple for now, remove those where the load is potentially + // fed by multiple stores. + removeDependencesFromMultipleStores(StoreToLoadDependences); + if (StoreToLoadDependences.empty()) + return false; + + // Filter the candidates further. + SmallVector Candidates; + unsigned NumForwarding = 0; + for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) { + DEBUG(dbgs() << "Candidate " << Cand); + // Make sure that the stored values is available everywhere in the loop in + // the next iteration. + if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT)) + continue; + + // Check whether the SCEV difference is the same as the induction step, + // thus we load the value in the next iteration. + if (!Cand.isDependenceDistanceOfOne(PSE)) + continue; + + ++NumForwarding; + DEBUG(dbgs() + << NumForwarding + << ". Valid store-to-load forwarding across the loop backedge\n"); + Candidates.push_back(Cand); + } + if (Candidates.empty()) + return false; + + // Check intervening may-alias stores. These need runtime checks for alias + // disambiguation. + SmallVector Checks = + collectMemchecks(Candidates); + + // Too many checks are likely to outweigh the benefits of forwarding. + if (Checks.size() > Candidates.size() * CheckPerElim) { + DEBUG(dbgs() << "Too many run-time checks needed.\n"); + return false; + } + + if (LAI.PSE.getUnionPredicate().getComplexity() > + LoadElimSCEVCheckThreshold) { + DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n"); + return false; + } + + // Point of no-return, start the transformation. First, version the loop if + // necessary. + if (!Checks.empty() || !LAI.PSE.getUnionPredicate().isAlwaysTrue()) { + LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false); + LV.setAliasChecks(std::move(Checks)); + LV.setSCEVChecks(LAI.PSE.getUnionPredicate()); + LV.versionLoop(); + } + + // Next, propagate the value stored by the store to the users of the load. + // Also for the first iteration, generate the initial value of the load. + SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getModule()->getDataLayout(), + "storeforward"); + for (const auto &Cand : Candidates) + propagateStoredValueToLoadUsers(Cand, SEE); + NumLoopLoadEliminted += NumForwarding; + + return true; + } + +private: + Loop *L; + + /// \brief Maps the load/store instructions to their index according to + /// program order. + DenseMap InstOrder; + + // Analyses used. + LoopInfo *LI; + const LoopAccessInfo &LAI; + DominatorTree *DT; + PredicatedScalarEvolution PSE; +}; + +/// \brief The pass. Most of the work is delegated to the per-loop +/// LoadEliminationForLoop class. +class LoopLoadElimination : public FunctionPass { +public: + LoopLoadElimination() : FunctionPass(ID) { + initializeLoopLoadEliminationPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *LI = &getAnalysis().getLoopInfo(); + auto *LAA = &getAnalysis(); + auto *DT = &getAnalysis().getDomTree(); + + // Build up a worklist of inner-loops to vectorize. This is necessary as the + // act of distributing a loop creates new loops and can invalidate iterators + // across the loops. + SmallVector Worklist; + + for (Loop *TopLevelLoop : *LI) + for (Loop *L : depth_first(TopLevelLoop)) + // We only handle inner-most loops. + if (L->empty()) + Worklist.push_back(L); + + // Now walk the identified inner loops. + bool Changed = false; + for (Loop *L : Worklist) { + const LoopAccessInfo &LAI = LAA->getInfo(L, ValueToValueMap()); + // The actual work is performed by LoadEliminationForLoop. + LoadEliminationForLoop LEL(L, LI, LAI, DT); + Changed |= LEL.processLoop(); + } + + // Process each loop nest in the function. + return Changed; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + } + + static char ID; +}; +} + +char LoopLoadElimination::ID; +static const char LLE_name[] = "Loop Load Elimination"; + +INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false) + +namespace llvm { +FunctionPass *createLoopLoadEliminationPass() { + return new LoopLoadElimination(); +} +} diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index ed103e6b8ed6..27c2d8824df0 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -147,12 +147,12 @@ namespace { bool runOnLoop(Loop *L, LPPassManager &LPM) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); } @@ -162,11 +162,15 @@ namespace { ScalarEvolution *SE; TargetLibraryInfo *TLI; DominatorTree *DT; + bool PreserveLCSSA; typedef SmallVector SmallInstructionVector; typedef SmallSet SmallInstructionSet; - // A chain of isomorphic instructions, indentified by a single-use PHI, + // Map between induction variable and its increment + DenseMap IVToIncMap; + + // A chain of isomorphic instructions, identified by a single-use PHI // representing a reduction. Only the last value may be used outside the // loop. struct SimpleLoopReduction { @@ -300,22 +304,6 @@ namespace { // The functions below can be called after we've finished processing all // instructions in the loop, and we know which reductions were selected. - // Is the provided instruction the PHI of a reduction selected for - // rerolling? - bool isSelectedPHI(Instruction *J) { - if (!isa(J)) - return false; - - for (DenseSet::iterator RI = Reds.begin(), RIE = Reds.end(); - RI != RIE; ++RI) { - int i = *RI; - if (cast(J) == PossibleReds[i].getPHI()) - return true; - } - - return false; - } - bool validateSelected(); void replaceSelected(); @@ -335,7 +323,7 @@ namespace { // x[i*3+1] = y2 // x[i*3+2] = y3 // - // Base instruction -> i*3 + // Base instruction -> i*3 // +---+----+ // / | \ // ST[y1] +1 +2 <-- Roots @@ -366,8 +354,11 @@ namespace { struct DAGRootTracker { DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV, ScalarEvolution *SE, AliasAnalysis *AA, - TargetLibraryInfo *TLI) - : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), IV(IV) {} + TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA, + DenseMap &IncrMap) + : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI), + PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap) {} /// Stage 1: Find all the DAG roots for the induction variable. bool findRoots(); @@ -413,11 +404,14 @@ namespace { ScalarEvolution *SE; AliasAnalysis *AA; TargetLibraryInfo *TLI; + DominatorTree *DT; + LoopInfo *LI; + bool PreserveLCSSA; // The loop induction variable. Instruction *IV; // Loop step amount. - uint64_t Inc; + int64_t Inc; // Loop reroll count; if Inc == 1, this records the scaling applied // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ; // If Inc is not 1, Scale = Inc. @@ -430,6 +424,8 @@ namespace { // they are used in (or specially, IL_All for instructions // used in the loop increment mechanism). UsesTy Uses; + // Map between induction variable and its increment + DenseMap &IVToIncMap; }; void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs); @@ -442,10 +438,10 @@ namespace { char LoopReroll::ID = 0; INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false) @@ -477,21 +473,20 @@ void LoopReroll::collectPossibleIVs(Loop *L, continue; if (const SCEVAddRecExpr *PHISCEV = - dyn_cast(SE->getSCEV(I))) { + dyn_cast(SE->getSCEV(&*I))) { if (PHISCEV->getLoop() != L) continue; if (!PHISCEV->isAffine()) continue; if (const SCEVConstant *IncSCEV = dyn_cast(PHISCEV->getStepRecurrence(*SE))) { - if (!IncSCEV->getValue()->getValue().isStrictlyPositive()) + const APInt &AInt = IncSCEV->getAPInt().abs(); + if (IncSCEV->getValue()->isZero() || AInt.uge(MaxInc)) continue; - if (IncSCEV->getValue()->uge(MaxInc)) - continue; - - DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << - *PHISCEV << "\n"); - PossibleIVs.push_back(I); + IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue(); + DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV + << "\n"); + PossibleIVs.push_back(&*I); } } } @@ -552,7 +547,7 @@ void LoopReroll::collectPossibleReductions(Loop *L, if (!I->getType()->isSingleValueType()) continue; - SimpleLoopReduction SLR(I, L); + SimpleLoopReduction SLR(&*I, L); if (!SLR.valid()) continue; @@ -699,17 +694,11 @@ collectPossibleRoots(Instruction *Base, std::map &Roots) { } } - int64_t V = CI->getValue().getSExtValue(); + int64_t V = std::abs(CI->getValue().getSExtValue()); if (Roots.find(V) != Roots.end()) // No duplicates, please. return false; - // FIXME: Add support for negative values. - if (V < 0) { - DEBUG(dbgs() << "LRR: Aborting due to negative value: " << V << "\n"); - return false; - } - Roots[V] = cast(I); } @@ -731,7 +720,7 @@ collectPossibleRoots(Instruction *Base, std::map &Roots) { unsigned NumBaseUses = BaseUsers.size(); if (NumBaseUses == 0) NumBaseUses = Roots.begin()->second->getNumUses(); - + // Check that every node has the same number of users. for (auto &KV : Roots) { if (KV.first == 0) @@ -744,7 +733,7 @@ collectPossibleRoots(Instruction *Base, std::map &Roots) { } } - return true; + return true; } bool LoopReroll::DAGRootTracker:: @@ -787,7 +776,7 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) { if (!collectPossibleRoots(IVU, V)) return false; - // If we didn't get a root for index zero, then IVU must be + // If we didn't get a root for index zero, then IVU must be // subsumed. if (V.find(0) == V.end()) SubsumedInsts.insert(IVU); @@ -818,13 +807,10 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) { } bool LoopReroll::DAGRootTracker::findRoots() { - - const SCEVAddRecExpr *RealIVSCEV = cast(SE->getSCEV(IV)); - Inc = cast(RealIVSCEV->getOperand(1))-> - getValue()->getZExtValue(); + Inc = IVToIncMap[IV]; assert(RootSets.empty() && "Unclean state!"); - if (Inc == 1) { + if (std::abs(Inc) == 1) { for (auto *IVU : IV->users()) { if (isLoopIncrement(IVU, IV)) LoopIncs.push_back(cast(IVU)); @@ -996,6 +982,25 @@ bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I, return false; } +static bool isIgnorableInst(const Instruction *I) { + if (isa(I)) + return true; + const IntrinsicInst* II = dyn_cast(I); + if (!II) + return false; + switch (II->getIntrinsicID()) { + default: + return false; + case llvm::Intrinsic::annotation: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // TODO: the following intrinsics may also be whitelisted: + // lifetime_start, lifetime_end, invariant_start, invariant_end + return true; + } + return false; +} + bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) { // We now need to check for equivalence of the use graph of each root with // that of the primary induction variable (excluding the roots). Our goal @@ -1029,7 +1034,7 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) { // Make sure all instructions in the loop are in one and only one // set. for (auto &KV : Uses) { - if (KV.second.count() != 1) { + if (KV.second.count() != 1 && !isIgnorableInst(KV.first)) { DEBUG(dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: " << *KV.first << " (#uses=" << KV.second.count() << ")\n"); return false; @@ -1103,15 +1108,15 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) { " vs. " << *RootInst << "\n"); return false; } - + RootIt = TryIt; RootInst = TryIt->first; } // All instructions between the last root and this root - // may belong to some other iteration. If they belong to a + // may belong to some other iteration. If they belong to a // future iteration, then they're dangerous to alias with. - // + // // Note that because we allow a limited amount of flexibility in the order // that we visit nodes, LastRootIt might be *before* RootIt, in which // case we've already checked this set of instructions so we shouldn't @@ -1267,6 +1272,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) { ++J; } + bool Negative = IVToIncMap[IV] < 0; const DataLayout &DL = Header->getModule()->getDataLayout(); // We need to create a new induction variable for each different BaseInst. @@ -1275,13 +1281,12 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) { const SCEVAddRecExpr *RealIVSCEV = cast(SE->getSCEV(DRS.BaseInst)); const SCEV *Start = RealIVSCEV->getStart(); - const SCEVAddRecExpr *H = cast - (SE->getAddRecExpr(Start, - SE->getConstant(RealIVSCEV->getType(), 1), - L, SCEV::FlagAnyWrap)); + const SCEVAddRecExpr *H = cast(SE->getAddRecExpr( + Start, SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1), L, + SCEV::FlagAnyWrap)); { // Limit the lifetime of SCEVExpander. SCEVExpander Expander(*SE, DL, "reroll"); - Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin()); + Value *NewIV = Expander.expandCodeFor(H, IV->getType(), &Header->front()); for (auto &KV : Uses) { if (KV.second.find_first() == 0) @@ -1294,8 +1299,8 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) { const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE); // Iteration count SCEV minus 1 - const SCEV *ICMinus1SCEV = - SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1)); + const SCEV *ICMinus1SCEV = SE->getMinusSCEV( + ICSCEV, SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1)); Value *ICMinus1; // Iteration count minus 1 if (isa(ICMinus1SCEV)) { @@ -1303,7 +1308,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) { } else { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) - Preheader = InsertPreheaderForLoop(L, Parent); + Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), Preheader->getTerminator()); @@ -1444,13 +1449,14 @@ void LoopReroll::ReductionTracker::replaceSelected() { bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount, ReductionTracker &Reductions) { - DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI); + DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA, + IVToIncMap); if (!DAGRoots.findRoots()) return false; DEBUG(dbgs() << "LRR: Found all root induction increments for: " << *IV << "\n"); - + if (!DAGRoots.validate(Reductions)) return false; if (!Reductions.validateSelected()) @@ -1469,11 +1475,12 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) { if (skipOptnoneFunction(L)) return false; - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); LI = &getAnalysis().getLoopInfo(); - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); TLI = &getAnalysis().getTLI(); DT = &getAnalysis().getDomTree(); + PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); BasicBlock *Header = L->getHeader(); DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() << @@ -1490,13 +1497,13 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) { return Changed; const SCEV *LIBETC = SE->getBackedgeTakenCount(L); - const SCEV *IterCount = - SE->getAddExpr(LIBETC, SE->getConstant(LIBETC->getType(), 1)); + const SCEV *IterCount = SE->getAddExpr(LIBETC, SE->getOne(LIBETC->getType())); DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n"); // First, we need to find the induction variable with respect to which we can // reroll (there may be several possible options). SmallInstructionVector PossibleIVs; + IVToIncMap.clear(); collectPossibleIVs(L, PossibleIVs); if (PossibleIVs.empty()) { diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index a675e1289baf..5e6c2da08cc3 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -13,11 +13,15 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" @@ -41,95 +45,6 @@ DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden, cl::desc("The default maximum header size for automatic loop rotation")); STATISTIC(NumRotated, "Number of loops rotated"); -namespace { - - class LoopRotate : public LoopPass { - public: - static char ID; // Pass ID, replacement for typeid - LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) { - initializeLoopRotatePass(*PassRegistry::getPassRegistry()); - if (SpecifiedMaxHeaderSize == -1) - MaxHeaderSize = DefaultRotationThreshold; - else - MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize); - } - - // LCSSA form makes instruction renaming easier. - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequiredID(LoopSimplifyID); - AU.addPreservedID(LoopSimplifyID); - AU.addRequiredID(LCSSAID); - AU.addPreservedID(LCSSAID); - AU.addPreserved(); - AU.addRequired(); - } - - bool runOnLoop(Loop *L, LPPassManager &LPM) override; - bool simplifyLoopLatch(Loop *L); - bool rotateLoop(Loop *L, bool SimplifiedLatch); - - private: - unsigned MaxHeaderSize; - LoopInfo *LI; - const TargetTransformInfo *TTI; - AssumptionCache *AC; - DominatorTree *DT; - }; -} - -char LoopRotate::ID = 0; -INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopSimplify) -INITIALIZE_PASS_DEPENDENCY(LCSSA) -INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false) - -Pass *llvm::createLoopRotatePass(int MaxHeaderSize) { - return new LoopRotate(MaxHeaderSize); -} - -/// Rotate Loop L as many times as possible. Return true if -/// the loop is rotated at least once. -bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { - if (skipOptnoneFunction(L)) - return false; - - // Save the loop metadata. - MDNode *LoopMD = L->getLoopID(); - - Function &F = *L->getHeader()->getParent(); - - LI = &getAnalysis().getLoopInfo(); - TTI = &getAnalysis().getTTI(F); - AC = &getAnalysis().getAssumptionCache(F); - auto *DTWP = getAnalysisIfAvailable(); - DT = DTWP ? &DTWP->getDomTree() : nullptr; - - // Simplify the loop latch before attempting to rotate the header - // upward. Rotation may not be needed if the loop tail can be folded into the - // loop exit. - bool SimplifiedLatch = simplifyLoopLatch(L); - - // One loop can be rotated multiple times. - bool MadeChange = false; - while (rotateLoop(L, SimplifiedLatch)) { - MadeChange = true; - SimplifiedLatch = false; - } - - // Restore the loop metadata. - // NB! We presume LoopRotation DOESN'T ADD its own metadata. - if ((MadeChange || SimplifiedLatch) && LoopMD) - L->setLoopID(LoopMD); - - return MadeChange; -} /// RewriteUsesOfClonedInstructions - We just cloned the instructions from the /// old header into the preheader. If there were uses of the values produced by @@ -147,7 +62,7 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, // as necessary. SSAUpdater SSA; for (I = OrigHeader->begin(); I != E; ++I) { - Value *OrigHeaderVal = I; + Value *OrigHeaderVal = &*I; // If there are no uses of the value (e.g. because it returns void), there // is nothing to rewrite. @@ -196,127 +111,6 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, } } -/// Determine whether the instructions in this range may be safely and cheaply -/// speculated. This is not an important enough situation to develop complex -/// heuristics. We handle a single arithmetic instruction along with any type -/// conversions. -static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, - BasicBlock::iterator End, Loop *L) { - bool seenIncrement = false; - bool MultiExitLoop = false; - - if (!L->getExitingBlock()) - MultiExitLoop = true; - - for (BasicBlock::iterator I = Begin; I != End; ++I) { - - if (!isSafeToSpeculativelyExecute(I)) - return false; - - if (isa(I)) - continue; - - switch (I->getOpcode()) { - default: - return false; - case Instruction::GetElementPtr: - // GEPs are cheap if all indices are constant. - if (!cast(I)->hasAllConstantIndices()) - return false; - // fall-thru to increment case - case Instruction::Add: - case Instruction::Sub: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: { - Value *IVOpnd = !isa(I->getOperand(0)) - ? I->getOperand(0) - : !isa(I->getOperand(1)) - ? I->getOperand(1) - : nullptr; - if (!IVOpnd) - return false; - - // If increment operand is used outside of the loop, this speculation - // could cause extra live range interference. - if (MultiExitLoop) { - for (User *UseI : IVOpnd->users()) { - auto *UserInst = cast(UseI); - if (!L->contains(UserInst)) - return false; - } - } - - if (seenIncrement) - return false; - seenIncrement = true; - break; - } - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - // ignore type conversions - break; - } - } - return true; -} - -/// Fold the loop tail into the loop exit by speculating the loop tail -/// instructions. Typically, this is a single post-increment. In the case of a -/// simple 2-block loop, hoisting the increment can be much better than -/// duplicating the entire loop header. In the case of loops with early exits, -/// rotation will not work anyway, but simplifyLoopLatch will put the loop in -/// canonical form so downstream passes can handle it. -/// -/// I don't believe this invalidates SCEV. -bool LoopRotate::simplifyLoopLatch(Loop *L) { - BasicBlock *Latch = L->getLoopLatch(); - if (!Latch || Latch->hasAddressTaken()) - return false; - - BranchInst *Jmp = dyn_cast(Latch->getTerminator()); - if (!Jmp || !Jmp->isUnconditional()) - return false; - - BasicBlock *LastExit = Latch->getSinglePredecessor(); - if (!LastExit || !L->isLoopExiting(LastExit)) - return false; - - BranchInst *BI = dyn_cast(LastExit->getTerminator()); - if (!BI) - return false; - - if (!shouldSpeculateInstrs(Latch->begin(), Jmp, L)) - return false; - - DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " - << LastExit->getName() << "\n"); - - // Hoist the instructions from Latch into LastExit. - LastExit->getInstList().splice(BI, Latch->getInstList(), Latch->begin(), Jmp); - - unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1; - BasicBlock *Header = Jmp->getSuccessor(0); - assert(Header == L->getHeader() && "expected a backward branch"); - - // Remove Latch from the CFG so that LastExit becomes the new Latch. - BI->setSuccessor(FallThruPath, Header); - Latch->replaceSuccessorsPhiUsesWith(LastExit); - Jmp->eraseFromParent(); - - // Nuke the Latch block. - assert(Latch->empty() && "unable to evacuate Latch"); - LI->removeBlock(Latch); - if (DT) - DT->eraseNode(Latch); - Latch->eraseFromParent(); - return true; -} - /// Rotate loop LP. Return true if the loop is rotated. /// /// \param SimplifiedLatch is true if the latch was just folded into the final @@ -327,7 +121,10 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) { /// rotation. LoopRotate should be repeatable and converge to a canonical /// form. This property is satisfied because simplifying the loop latch can only /// happen once across multiple invocations of the LoopRotate pass. -bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { +static bool rotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI, + const TargetTransformInfo *TTI, AssumptionCache *AC, + DominatorTree *DT, ScalarEvolution *SE, + bool SimplifiedLatch) { // If the loop has only one block then there is not much to rotate. if (L->getBlocks().size() == 1) return false; @@ -382,7 +179,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // Anything ScalarEvolution may know about this loop or the PHI nodes // in its header will soon be invalidated. - if (ScalarEvolution *SE = getAnalysisIfAvailable()) + if (SE) SE->forgetLoop(L); DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); @@ -420,7 +217,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // possible or create a clone in the OldPreHeader if not. TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator(); while (I != E) { - Instruction *Inst = I++; + Instruction *Inst = &*I++; // If the instruction's operands are invariant and it doesn't read or write // memory, then it is safe to hoist. Doing this doesn't change the order of @@ -465,8 +262,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's // successors by duplicating their incoming values for OrigHeader. TerminatorInst *TI = OrigHeader->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin(); + for (BasicBlock *SuccBB : TI->successors()) + for (BasicBlock::iterator BI = SuccBB->begin(); PHINode *PN = dyn_cast(BI); ++BI) PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader); @@ -607,3 +404,221 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { ++NumRotated; return true; } + +/// Determine whether the instructions in this range may be safely and cheaply +/// speculated. This is not an important enough situation to develop complex +/// heuristics. We handle a single arithmetic instruction along with any type +/// conversions. +static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, + BasicBlock::iterator End, Loop *L) { + bool seenIncrement = false; + bool MultiExitLoop = false; + + if (!L->getExitingBlock()) + MultiExitLoop = true; + + for (BasicBlock::iterator I = Begin; I != End; ++I) { + + if (!isSafeToSpeculativelyExecute(&*I)) + return false; + + if (isa(I)) + continue; + + switch (I->getOpcode()) { + default: + return false; + case Instruction::GetElementPtr: + // GEPs are cheap if all indices are constant. + if (!cast(I)->hasAllConstantIndices()) + return false; + // fall-thru to increment case + case Instruction::Add: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: { + Value *IVOpnd = !isa(I->getOperand(0)) + ? I->getOperand(0) + : !isa(I->getOperand(1)) + ? I->getOperand(1) + : nullptr; + if (!IVOpnd) + return false; + + // If increment operand is used outside of the loop, this speculation + // could cause extra live range interference. + if (MultiExitLoop) { + for (User *UseI : IVOpnd->users()) { + auto *UserInst = cast(UseI); + if (!L->contains(UserInst)) + return false; + } + } + + if (seenIncrement) + return false; + seenIncrement = true; + break; + } + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + // ignore type conversions + break; + } + } + return true; +} + +/// Fold the loop tail into the loop exit by speculating the loop tail +/// instructions. Typically, this is a single post-increment. In the case of a +/// simple 2-block loop, hoisting the increment can be much better than +/// duplicating the entire loop header. In the case of loops with early exits, +/// rotation will not work anyway, but simplifyLoopLatch will put the loop in +/// canonical form so downstream passes can handle it. +/// +/// I don't believe this invalidates SCEV. +static bool simplifyLoopLatch(Loop *L, LoopInfo *LI, DominatorTree *DT) { + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch || Latch->hasAddressTaken()) + return false; + + BranchInst *Jmp = dyn_cast(Latch->getTerminator()); + if (!Jmp || !Jmp->isUnconditional()) + return false; + + BasicBlock *LastExit = Latch->getSinglePredecessor(); + if (!LastExit || !L->isLoopExiting(LastExit)) + return false; + + BranchInst *BI = dyn_cast(LastExit->getTerminator()); + if (!BI) + return false; + + if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L)) + return false; + + DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " + << LastExit->getName() << "\n"); + + // Hoist the instructions from Latch into LastExit. + LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(), + Latch->begin(), Jmp->getIterator()); + + unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1; + BasicBlock *Header = Jmp->getSuccessor(0); + assert(Header == L->getHeader() && "expected a backward branch"); + + // Remove Latch from the CFG so that LastExit becomes the new Latch. + BI->setSuccessor(FallThruPath, Header); + Latch->replaceSuccessorsPhiUsesWith(LastExit); + Jmp->eraseFromParent(); + + // Nuke the Latch block. + assert(Latch->empty() && "unable to evacuate Latch"); + LI->removeBlock(Latch); + if (DT) + DT->eraseNode(Latch); + Latch->eraseFromParent(); + return true; +} + +/// Rotate \c L as many times as possible. Return true if the loop is rotated +/// at least once. +static bool iterativelyRotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI, + const TargetTransformInfo *TTI, + AssumptionCache *AC, DominatorTree *DT, + ScalarEvolution *SE) { + // Save the loop metadata. + MDNode *LoopMD = L->getLoopID(); + + // Simplify the loop latch before attempting to rotate the header + // upward. Rotation may not be needed if the loop tail can be folded into the + // loop exit. + bool SimplifiedLatch = simplifyLoopLatch(L, LI, DT); + + // One loop can be rotated multiple times. + bool MadeChange = false; + while (rotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE, SimplifiedLatch)) { + MadeChange = true; + SimplifiedLatch = false; + } + + // Restore the loop metadata. + // NB! We presume LoopRotation DOESN'T ADD its own metadata. + if ((MadeChange || SimplifiedLatch) && LoopMD) + L->setLoopID(LoopMD); + + return MadeChange; +} + +namespace { + +class LoopRotate : public LoopPass { + unsigned MaxHeaderSize; + +public: + static char ID; // Pass ID, replacement for typeid + LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) { + initializeLoopRotatePass(*PassRegistry::getPassRegistry()); + if (SpecifiedMaxHeaderSize == -1) + MaxHeaderSize = DefaultRotationThreshold; + else + MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize); + } + + // LCSSA form makes instruction renaming easier. + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + AU.addPreserved(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override { + if (skipOptnoneFunction(L)) + return false; + Function &F = *L->getHeader()->getParent(); + + auto *LI = &getAnalysis().getLoopInfo(); + const auto *TTI = &getAnalysis().getTTI(F); + auto *AC = &getAnalysis().getAssumptionCache(F); + auto *DTWP = getAnalysisIfAvailable(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *SEWP = getAnalysisIfAvailable(); + auto *SE = SEWP ? &SEWP->getSE() : nullptr; + + return iterativelyRotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE); + } +}; +} + +char LoopRotate::ID = 0; +INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false) + +Pass *llvm::createLoopRotatePass(int MaxHeaderSize) { + return new LoopRotate(MaxHeaderSize); +} diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4b59f3d2f6cc..2101225ed9f7 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -105,10 +105,33 @@ static bool StressIVChain = false; namespace { -/// RegSortData - This class holds data which is used to order reuse candidates. +struct MemAccessTy { + /// Used in situations where the accessed memory type is unknown. + static const unsigned UnknownAddressSpace = ~0u; + + Type *MemTy; + unsigned AddrSpace; + + MemAccessTy() : MemTy(nullptr), AddrSpace(UnknownAddressSpace) {} + + MemAccessTy(Type *Ty, unsigned AS) : + MemTy(Ty), AddrSpace(AS) {} + + bool operator==(MemAccessTy Other) const { + return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace; + } + + bool operator!=(MemAccessTy Other) const { return !(*this == Other); } + + static MemAccessTy getUnknown(LLVMContext &Ctx) { + return MemAccessTy(Type::getVoidTy(Ctx), UnknownAddressSpace); + } +}; + +/// This class holds data which is used to order reuse candidates. class RegSortData { public: - /// UsedByIndices - This represents the set of LSRUse indices which reference + /// This represents the set of LSRUse indices which reference /// a particular register. SmallBitVector UsedByIndices; @@ -122,16 +145,14 @@ void RegSortData::print(raw_ostream &OS) const { OS << "[NumUses=" << UsedByIndices.count() << ']'; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RegSortData::dump() const { print(errs()); errs() << '\n'; } -#endif namespace { -/// RegUseTracker - Map register candidates to information about how they are -/// used. +/// Map register candidates to information about how they are used. class RegUseTracker { typedef DenseMap RegUsesTy; @@ -139,9 +160,9 @@ class RegUseTracker { SmallVector RegSequence; public: - void CountRegister(const SCEV *Reg, size_t LUIdx); - void DropRegister(const SCEV *Reg, size_t LUIdx); - void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx); + void countRegister(const SCEV *Reg, size_t LUIdx); + void dropRegister(const SCEV *Reg, size_t LUIdx); + void swapAndDropUse(size_t LUIdx, size_t LastLUIdx); bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const; @@ -160,7 +181,7 @@ public: } void -RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) { +RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) { std::pair Pair = RegUsesMap.insert(std::make_pair(Reg, RegSortData())); RegSortData &RSD = Pair.first->second; @@ -171,7 +192,7 @@ RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) { } void -RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) { +RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) { RegUsesTy::iterator It = RegUsesMap.find(Reg); assert(It != RegUsesMap.end()); RegSortData &RSD = It->second; @@ -180,7 +201,7 @@ RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) { } void -RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) { +RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) { assert(LUIdx <= LastLUIdx); // Update RegUses. The data structure is not optimized for this purpose; @@ -219,9 +240,8 @@ void RegUseTracker::clear() { namespace { -/// Formula - This class holds information that describes a formula for -/// computing satisfying a use. It may include broken-out immediates and scaled -/// registers. +/// This class holds information that describes a formula for computing +/// satisfying a use. It may include broken-out immediates and scaled registers. struct Formula { /// Global base address used for complex addressing. GlobalValue *BaseGV; @@ -235,8 +255,8 @@ struct Formula { /// The scale of any complex addressing. int64_t Scale; - /// BaseRegs - The list of "base" registers for this use. When this is - /// non-empty. The canonical representation of a formula is + /// The list of "base" registers for this use. When this is non-empty. The + /// canonical representation of a formula is /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty(). /// #1 enforces that the scaled register is always used when at least two @@ -247,31 +267,31 @@ struct Formula { /// form. SmallVector BaseRegs; - /// ScaledReg - The 'scaled' register for this use. This should be non-null - /// when Scale is not zero. + /// The 'scaled' register for this use. This should be non-null when Scale is + /// not zero. const SCEV *ScaledReg; - /// UnfoldedOffset - An additional constant offset which added near the - /// use. This requires a temporary register, but the offset itself can - /// live in an add immediate field rather than a register. + /// An additional constant offset which added near the use. This requires a + /// temporary register, but the offset itself can live in an add immediate + /// field rather than a register. int64_t UnfoldedOffset; Formula() : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(nullptr), UnfoldedOffset(0) {} - void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); + void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); bool isCanonical() const; - void Canonicalize(); + void canonicalize(); - bool Unscale(); + bool unscale(); size_t getNumRegs() const; Type *getType() const; - void DeleteBaseReg(const SCEV *&S); + void deleteBaseReg(const SCEV *&S); bool referencesReg(const SCEV *S) const; bool hasRegsUsedByUsesOtherThan(size_t LUIdx, @@ -283,7 +303,7 @@ struct Formula { } -/// DoInitialMatch - Recursion helper for InitialMatch. +/// Recursion helper for initialMatch. static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl &Good, SmallVectorImpl &Bad, @@ -336,10 +356,9 @@ static void DoInitialMatch(const SCEV *S, Loop *L, Bad.push_back(S); } -/// InitialMatch - Incorporate loop-variant parts of S into this Formula, -/// attempting to keep all loop-invariant and loop-computable values in a -/// single base register. -void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { +/// Incorporate loop-variant parts of S into this Formula, attempting to keep +/// all loop-invariant and loop-computable values in a single base register. +void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { SmallVector Good; SmallVector Bad; DoInitialMatch(S, L, Good, Bad, SE); @@ -355,7 +374,7 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { BaseRegs.push_back(Sum); HasBaseReg = true; } - Canonicalize(); + canonicalize(); } /// \brief Check whether or not this formula statisfies the canonical @@ -373,7 +392,7 @@ bool Formula::isCanonical() const { /// field. Otherwise, we would have to do special cases everywhere in LSR /// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ... /// On the other hand, 1*reg should be canonicalized into reg. -void Formula::Canonicalize() { +void Formula::canonicalize() { if (isCanonical()) return; // So far we did not need this case. This is easy to implement but it is @@ -394,7 +413,7 @@ void Formula::Canonicalize() { /// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2. /// \return true if it was possible to get rid of the scale, false otherwise. /// \note After this operation the formula may not be in the canonical form. -bool Formula::Unscale() { +bool Formula::unscale() { if (Scale != 1) return false; Scale = 0; @@ -403,15 +422,14 @@ bool Formula::Unscale() { return true; } -/// getNumRegs - Return the total number of register operands used by this -/// formula. This does not include register uses implied by non-constant -/// addrec strides. +/// Return the total number of register operands used by this formula. This does +/// not include register uses implied by non-constant addrec strides. size_t Formula::getNumRegs() const { return !!ScaledReg + BaseRegs.size(); } -/// getType - Return the type of this formula, if it has one, or null -/// otherwise. This type is meaningless except for the bit size. +/// Return the type of this formula, if it has one, or null otherwise. This type +/// is meaningless except for the bit size. Type *Formula::getType() const { return !BaseRegs.empty() ? BaseRegs.front()->getType() : ScaledReg ? ScaledReg->getType() : @@ -419,21 +437,21 @@ Type *Formula::getType() const { nullptr; } -/// DeleteBaseReg - Delete the given base reg from the BaseRegs list. -void Formula::DeleteBaseReg(const SCEV *&S) { +/// Delete the given base reg from the BaseRegs list. +void Formula::deleteBaseReg(const SCEV *&S) { if (&S != &BaseRegs.back()) std::swap(S, BaseRegs.back()); BaseRegs.pop_back(); } -/// referencesReg - Test if this formula references the given register. +/// Test if this formula references the given register. bool Formula::referencesReg(const SCEV *S) const { return S == ScaledReg || std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end(); } -/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers -/// which are used by uses other than the use with the given index. +/// Test whether this formula uses registers which are used by uses other than +/// the use with the given index. bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx, const RegUseTracker &RegUses) const { if (ScaledReg) @@ -481,30 +499,29 @@ void Formula::print(raw_ostream &OS) const { } } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void Formula::dump() const { print(errs()); errs() << '\n'; } -#endif -/// isAddRecSExtable - Return true if the given addrec can be sign-extended -/// without changing its value. +/// Return true if the given addrec can be sign-extended without changing its +/// value. static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { Type *WideTy = IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1); return isa(SE.getSignExtendExpr(AR, WideTy)); } -/// isAddSExtable - Return true if the given add can be sign-extended -/// without changing its value. +/// Return true if the given add can be sign-extended without changing its +/// value. static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { Type *WideTy = IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1); return isa(SE.getSignExtendExpr(A, WideTy)); } -/// isMulSExtable - Return true if the given mul can be sign-extended -/// without changing its value. +/// Return true if the given mul can be sign-extended without changing its +/// value. static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) { Type *WideTy = IntegerType::get(SE.getContext(), @@ -512,12 +529,11 @@ static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) { return isa(SE.getSignExtendExpr(M, WideTy)); } -/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined -/// and if the remainder is known to be zero, or null otherwise. If -/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified -/// to Y, ignoring that the multiplication may overflow, which is useful when -/// the result will be used in a context where the most significant bits are -/// ignored. +/// Return an expression for LHS /s RHS, if it can be determined and if the +/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits +/// is true, expressions like (X * Y) /s Y are simplified to Y, ignoring that +/// the multiplication may overflow, which is useful when the result will be +/// used in a context where the most significant bits are ignored. static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits = false) { @@ -528,7 +544,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, // Handle a few RHS special cases. const SCEVConstant *RC = dyn_cast(RHS); if (RC) { - const APInt &RA = RC->getValue()->getValue(); + const APInt &RA = RC->getAPInt(); // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do // some folding. if (RA.isAllOnesValue()) @@ -542,8 +558,8 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, if (const SCEVConstant *C = dyn_cast(LHS)) { if (!RC) return nullptr; - const APInt &LA = C->getValue()->getValue(); - const APInt &RA = RC->getValue()->getValue(); + const APInt &LA = C->getAPInt(); + const APInt &RA = RC->getAPInt(); if (LA.srem(RA) != 0) return nullptr; return SE.getConstant(LA.sdiv(RA)); @@ -603,12 +619,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, return nullptr; } -/// ExtractImmediate - If S involves the addition of a constant integer value, -/// return that integer value, and mutate S to point to a new SCEV with that -/// value excluded. +/// If S involves the addition of a constant integer value, return that integer +/// value, and mutate S to point to a new SCEV with that value excluded. static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { if (const SCEVConstant *C = dyn_cast(S)) { - if (C->getValue()->getValue().getMinSignedBits() <= 64) { + if (C->getAPInt().getMinSignedBits() <= 64) { S = SE.getConstant(C->getType(), 0); return C->getValue()->getSExtValue(); } @@ -630,9 +645,8 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { return 0; } -/// ExtractSymbol - If S involves the addition of a GlobalValue address, -/// return that symbol, and mutate S to point to a new SCEV with that -/// value excluded. +/// If S involves the addition of a GlobalValue address, return that symbol, and +/// mutate S to point to a new SCEV with that value excluded. static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { if (const SCEVUnknown *U = dyn_cast(S)) { if (GlobalValue *GV = dyn_cast(U->getValue())) { @@ -657,8 +671,8 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { return nullptr; } -/// isAddressUse - Returns true if the specified instruction is using the -/// specified value as an address. +/// Returns true if the specified instruction is using the specified value as an +/// address. static bool isAddressUse(Instruction *Inst, Value *OperandVal) { bool isAddress = isa(Inst); if (StoreInst *SI = dyn_cast(Inst)) { @@ -682,12 +696,15 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { return isAddress; } -/// getAccessType - Return the type of the memory being accessed. -static Type *getAccessType(const Instruction *Inst) { - Type *AccessTy = Inst->getType(); - if (const StoreInst *SI = dyn_cast(Inst)) - AccessTy = SI->getOperand(0)->getType(); - else if (const IntrinsicInst *II = dyn_cast(Inst)) { +/// Return the type of the memory being accessed. +static MemAccessTy getAccessType(const Instruction *Inst) { + MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace); + if (const StoreInst *SI = dyn_cast(Inst)) { + AccessTy.MemTy = SI->getOperand(0)->getType(); + AccessTy.AddrSpace = SI->getPointerAddressSpace(); + } else if (const LoadInst *LI = dyn_cast(Inst)) { + AccessTy.AddrSpace = LI->getPointerAddressSpace(); + } else if (const IntrinsicInst *II = dyn_cast(Inst)) { // Addressing modes can also be folded into prefetches and a variety // of intrinsics. switch (II->getIntrinsicID()) { @@ -696,21 +713,21 @@ static Type *getAccessType(const Instruction *Inst) { case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: case Intrinsic::x86_sse2_storel_dq: - AccessTy = II->getArgOperand(0)->getType(); + AccessTy.MemTy = II->getArgOperand(0)->getType(); break; } } // All pointers have the same requirements, so canonicalize them to an // arbitrary pointer type to minimize variation. - if (PointerType *PTy = dyn_cast(AccessTy)) - AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1), - PTy->getAddressSpace()); + if (PointerType *PTy = dyn_cast(AccessTy.MemTy)) + AccessTy.MemTy = PointerType::get(IntegerType::get(PTy->getContext(), 1), + PTy->getAddressSpace()); return AccessTy; } -/// isExistingPhi - Return true if this AddRec is already a phi in its loop. +/// Return true if this AddRec is already a phi in its loop. static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin(); PHINode *PN = dyn_cast(I); ++I) { @@ -793,9 +810,8 @@ static bool isHighCostExpansion(const SCEV *S, return true; } -/// DeleteTriviallyDeadInstructions - If any of the instructions is the -/// specified set are trivially dead, delete them and see if this makes any of -/// their operands subsequently dead. +/// If any of the instructions is the specified set are trivially dead, delete +/// them and see if this makes any of their operands subsequently dead. static bool DeleteTriviallyDeadInstructions(SmallVectorImpl &DeadInsts) { bool Changed = false; @@ -842,7 +858,7 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, namespace { -/// Cost - This class is used to measure and compare candidate formulae. +/// This class is used to measure and compare candidate formulae. class Cost { /// TODO: Some of these could be merged. Also, a lexical ordering /// isn't always optimal. @@ -905,7 +921,7 @@ private: } -/// RateRegister - Tally up interesting quantities from the given register. +/// Tally up interesting quantities from the given register. void Cost::RateRegister(const SCEV *Reg, SmallPtrSetImpl &Regs, const Loop *L, @@ -951,9 +967,9 @@ void Cost::RateRegister(const SCEV *Reg, SE.hasComputableLoopEvolution(Reg, L); } -/// RatePrimaryRegister - Record this register in the set. If we haven't seen it -/// before, rate it. Optional LoserRegs provides a way to declare any formula -/// that refers to one of those regs an instant loser. +/// Record this register in the set. If we haven't seen it before, rate +/// it. Optional LoserRegs provides a way to declare any formula that refers to +/// one of those regs an instant loser. void Cost::RatePrimaryRegister(const SCEV *Reg, SmallPtrSetImpl &Regs, const Loop *L, @@ -1024,7 +1040,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, assert(isValid() && "invalid cost"); } -/// Lose - Set this cost to a losing value. +/// Set this cost to a losing value. void Cost::Lose() { NumRegs = ~0u; AddRecCost = ~0u; @@ -1035,7 +1051,7 @@ void Cost::Lose() { ScaleCost = ~0u; } -/// operator< - Choose the lower cost. +/// Choose the lower cost. bool Cost::operator<(const Cost &Other) const { return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost, ImmCost, SetupCost) < @@ -1061,37 +1077,35 @@ void Cost::print(raw_ostream &OS) const { OS << ", plus " << SetupCost << " setup cost"; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void Cost::dump() const { print(errs()); errs() << '\n'; } -#endif namespace { -/// LSRFixup - An operand value in an instruction which is to be replaced -/// with some equivalent, possibly strength-reduced, replacement. +/// An operand value in an instruction which is to be replaced with some +/// equivalent, possibly strength-reduced, replacement. struct LSRFixup { - /// UserInst - The instruction which will be updated. + /// The instruction which will be updated. Instruction *UserInst; - /// OperandValToReplace - The operand of the instruction which will - /// be replaced. The operand may be used more than once; every instance - /// will be replaced. + /// The operand of the instruction which will be replaced. The operand may be + /// used more than once; every instance will be replaced. Value *OperandValToReplace; - /// PostIncLoops - If this user is to use the post-incremented value of an - /// induction variable, this variable is non-null and holds the loop - /// associated with the induction variable. + /// If this user is to use the post-incremented value of an induction + /// variable, this variable is non-null and holds the loop associated with the + /// induction variable. PostIncLoopSet PostIncLoops; - /// LUIdx - The index of the LSRUse describing the expression which - /// this fixup needs, minus an offset (below). + /// The index of the LSRUse describing the expression which this fixup needs, + /// minus an offset (below). size_t LUIdx; - /// Offset - A constant offset to be added to the LSRUse expression. - /// This allows multiple fixups to share the same LSRUse with different - /// offsets, for example in an unrolled loop. + /// A constant offset to be added to the LSRUse expression. This allows + /// multiple fixups to share the same LSRUse with different offsets, for + /// example in an unrolled loop. int64_t Offset; bool isUseFullyOutsideLoop(const Loop *L) const; @@ -1108,8 +1122,7 @@ LSRFixup::LSRFixup() : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)), Offset(0) {} -/// isUseFullyOutsideLoop - Test whether this fixup always uses its -/// value outside of the given loop. +/// Test whether this fixup always uses its value outside of the given loop. bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const { // PHI nodes use their value in their incoming blocks. if (const PHINode *PN = dyn_cast(UserInst)) { @@ -1149,16 +1162,15 @@ void LSRFixup::print(raw_ostream &OS) const { OS << ", Offset=" << Offset; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void LSRFixup::dump() const { print(errs()); errs() << '\n'; } -#endif namespace { -/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding -/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*. +/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted +/// SmallVectors of const SCEV*. struct UniquifierDenseMapInfo { static SmallVector getEmptyKey() { SmallVector V; @@ -1182,17 +1194,17 @@ struct UniquifierDenseMapInfo { } }; -/// LSRUse - This class holds the state that LSR keeps for each use in -/// IVUsers, as well as uses invented by LSR itself. It includes information -/// about what kinds of things can be folded into the user, information about -/// the user itself, and information about how the use may be satisfied. -/// TODO: Represent multiple users of the same expression in common? +/// This class holds the state that LSR keeps for each use in IVUsers, as well +/// as uses invented by LSR itself. It includes information about what kinds of +/// things can be folded into the user, information about the user itself, and +/// information about how the use may be satisfied. TODO: Represent multiple +/// users of the same expression in common? class LSRUse { DenseSet, UniquifierDenseMapInfo> Uniquifier; public: - /// KindType - An enum for a kind of use, indicating what types of - /// scaled and immediate operands it might support. + /// An enum for a kind of use, indicating what types of scaled and immediate + /// operands it might support. enum KindType { Basic, ///< A normal use, with no folding. Special, ///< A special case of basic, allowing -1 scales. @@ -1204,15 +1216,14 @@ public: typedef PointerIntPair SCEVUseKindPair; KindType Kind; - Type *AccessTy; + MemAccessTy AccessTy; SmallVector Offsets; int64_t MinOffset; int64_t MaxOffset; - /// AllFixupsOutsideLoop - This records whether all of the fixups using this - /// LSRUse are outside of the loop, in which case some special-case heuristics - /// may be used. + /// This records whether all of the fixups using this LSRUse are outside of + /// the loop, in which case some special-case heuristics may be used. bool AllFixupsOutsideLoop; /// RigidFormula is set to true to guarantee that this use will be associated @@ -1222,26 +1233,24 @@ public: /// changing the formula. bool RigidFormula; - /// WidestFixupType - This records the widest use type for any fixup using - /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different - /// max fixup widths to be equivalent, because the narrower one may be relying - /// on the implicit truncation to truncate away bogus bits. + /// This records the widest use type for any fixup using this + /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max + /// fixup widths to be equivalent, because the narrower one may be relying on + /// the implicit truncation to truncate away bogus bits. Type *WidestFixupType; - /// Formulae - A list of ways to build a value that can satisfy this user. - /// After the list is populated, one of these is selected heuristically and - /// used to formulate a replacement for OperandValToReplace in UserInst. + /// A list of ways to build a value that can satisfy this user. After the + /// list is populated, one of these is selected heuristically and used to + /// formulate a replacement for OperandValToReplace in UserInst. SmallVector Formulae; - /// Regs - The set of register candidates used by all formulae in this LSRUse. + /// The set of register candidates used by all formulae in this LSRUse. SmallPtrSet Regs; - LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T), - MinOffset(INT64_MAX), - MaxOffset(INT64_MIN), - AllFixupsOutsideLoop(true), - RigidFormula(false), - WidestFixupType(nullptr) {} + LSRUse(KindType K, MemAccessTy AT) + : Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN), + AllFixupsOutsideLoop(true), RigidFormula(false), + WidestFixupType(nullptr) {} bool HasFormulaWithSameRegs(const Formula &F) const; bool InsertFormula(const Formula &F); @@ -1254,8 +1263,8 @@ public: } -/// HasFormula - Test whether this use as a formula which has the same -/// registers as the given formula. +/// Test whether this use as a formula which has the same registers as the given +/// formula. bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { SmallVector Key = F.BaseRegs; if (F.ScaledReg) Key.push_back(F.ScaledReg); @@ -1264,9 +1273,8 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { return Uniquifier.count(Key); } -/// InsertFormula - If the given formula has not yet been inserted, add it to -/// the list, and return true. Return false otherwise. -/// The formula must be in canonical form. +/// If the given formula has not yet been inserted, add it to the list, and +/// return true. Return false otherwise. The formula must be in canonical form. bool LSRUse::InsertFormula(const Formula &F) { assert(F.isCanonical() && "Invalid canonical representation"); @@ -1300,14 +1308,14 @@ bool LSRUse::InsertFormula(const Formula &F) { return true; } -/// DeleteFormula - Remove the given formula from this use's list. +/// Remove the given formula from this use's list. void LSRUse::DeleteFormula(Formula &F) { if (&F != &Formulae.back()) std::swap(F, Formulae.back()); Formulae.pop_back(); } -/// RecomputeRegs - Recompute the Regs field, and update RegUses. +/// Recompute the Regs field, and update RegUses. void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) { // Now that we've filtered out some formulae, recompute the Regs set. SmallPtrSet OldRegs = std::move(Regs); @@ -1320,7 +1328,7 @@ void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) { // Update the RegTracker. for (const SCEV *S : OldRegs) if (!Regs.count(S)) - RegUses.DropRegister(S, LUIdx); + RegUses.dropRegister(S, LUIdx); } void LSRUse::print(raw_ostream &OS) const { @@ -1331,10 +1339,13 @@ void LSRUse::print(raw_ostream &OS) const { case ICmpZero: OS << "ICmpZero"; break; case Address: OS << "Address of "; - if (AccessTy->isPointerTy()) + if (AccessTy.MemTy->isPointerTy()) OS << "pointer"; // the full pointer type could be really verbose - else - OS << *AccessTy; + else { + OS << *AccessTy.MemTy; + } + + OS << " in addrspace(" << AccessTy.AddrSpace << ')'; } OS << ", Offsets={"; @@ -1353,19 +1364,19 @@ void LSRUse::print(raw_ostream &OS) const { OS << ", widest fixup type: " << *WidestFixupType; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void LSRUse::dump() const { print(errs()); errs() << '\n'; } -#endif static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, - LSRUse::KindType Kind, Type *AccessTy, + LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale) { switch (Kind) { case LSRUse::Address: - return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale); + return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset, + HasBaseReg, Scale, AccessTy.AddrSpace); case LSRUse::ICmpZero: // There's not even a target hook for querying whether it would be legal to @@ -1412,7 +1423,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, int64_t MinOffset, int64_t MaxOffset, - LSRUse::KindType Kind, Type *AccessTy, + LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale) { // Check for overflow. @@ -1433,7 +1444,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, int64_t MinOffset, int64_t MaxOffset, - LSRUse::KindType Kind, Type *AccessTy, + LSRUse::KindType Kind, MemAccessTy AccessTy, const Formula &F) { // For the purpose of isAMCompletelyFolded either having a canonical formula // or a scale not equal to zero is correct. @@ -1447,11 +1458,11 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale); } -/// isLegalUse - Test whether we know how to expand the current formula. +/// Test whether we know how to expand the current formula. static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, - int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, - GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) { + int64_t MaxOffset, LSRUse::KindType Kind, + MemAccessTy AccessTy, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, int64_t Scale) { // We know how to expand completely foldable formulae. return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale) || @@ -1463,8 +1474,8 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, } static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, - int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, - const Formula &F) { + int64_t MaxOffset, LSRUse::KindType Kind, + MemAccessTy AccessTy, const Formula &F) { return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale); } @@ -1490,14 +1501,12 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, switch (LU.Kind) { case LSRUse::Address: { // Check the scaling factor cost with both the min and max offsets. - int ScaleCostMinOffset = - TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV, - F.BaseOffset + LU.MinOffset, - F.HasBaseReg, F.Scale); - int ScaleCostMaxOffset = - TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV, - F.BaseOffset + LU.MaxOffset, - F.HasBaseReg, F.Scale); + int ScaleCostMinOffset = TTI.getScalingFactorCost( + LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg, + F.Scale, LU.AccessTy.AddrSpace); + int ScaleCostMaxOffset = TTI.getScalingFactorCost( + LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg, + F.Scale, LU.AccessTy.AddrSpace); assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 && "Legal addressing mode has an illegal cost!"); @@ -1515,7 +1524,7 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, } static bool isAlwaysFoldable(const TargetTransformInfo &TTI, - LSRUse::KindType Kind, Type *AccessTy, + LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg) { // Fast-path: zero is always foldable. @@ -1539,7 +1548,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI, static bool isAlwaysFoldable(const TargetTransformInfo &TTI, ScalarEvolution &SE, int64_t MinOffset, int64_t MaxOffset, LSRUse::KindType Kind, - Type *AccessTy, const SCEV *S, bool HasBaseReg) { + MemAccessTy AccessTy, const SCEV *S, + bool HasBaseReg) { // Fast-path: zero is always foldable. if (S->isZero()) return true; @@ -1564,9 +1574,9 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI, namespace { -/// IVInc - An individual increment in a Chain of IV increments. -/// Relate an IV user to an expression that computes the IV it uses from the IV -/// used by the previous link in the Chain. +/// An individual increment in a Chain of IV increments. Relate an IV user to +/// an expression that computes the IV it uses from the IV used by the previous +/// link in the Chain. /// /// For the head of a chain, IncExpr holds the absolute SCEV expression for the /// original IVOperand. The head of the chain's IVOperand is only valid during @@ -1582,8 +1592,8 @@ struct IVInc { UserInst(U), IVOperand(O), IncExpr(E) {} }; -// IVChain - The list of IV increments in program order. -// We typically add the head of a chain without finding subsequent links. +// The list of IV increments in program order. We typically add the head of a +// chain without finding subsequent links. struct IVChain { SmallVector Incs; const SCEV *ExprBase; @@ -1595,7 +1605,7 @@ struct IVChain { typedef SmallVectorImpl::const_iterator const_iterator; - // begin - return the first increment in the chain. + // Return the first increment in the chain. const_iterator begin() const { assert(!Incs.empty()); return std::next(Incs.begin()); @@ -1604,32 +1614,30 @@ struct IVChain { return Incs.end(); } - // hasIncs - Returns true if this chain contains any increments. + // Returns true if this chain contains any increments. bool hasIncs() const { return Incs.size() >= 2; } - // add - Add an IVInc to the end of this chain. + // Add an IVInc to the end of this chain. void add(const IVInc &X) { Incs.push_back(X); } - // tailUserInst - Returns the last UserInst in the chain. + // Returns the last UserInst in the chain. Instruction *tailUserInst() const { return Incs.back().UserInst; } - // isProfitableIncrement - Returns true if IncExpr can be profitably added to - // this chain. + // Returns true if IncExpr can be profitably added to this chain. bool isProfitableIncrement(const SCEV *OperExpr, const SCEV *IncExpr, ScalarEvolution&); }; -/// ChainUsers - Helper for CollectChains to track multiple IV increment uses. -/// Distinguish between FarUsers that definitely cross IV increments and -/// NearUsers that may be used between IV increments. +/// Helper for CollectChains to track multiple IV increment uses. Distinguish +/// between FarUsers that definitely cross IV increments and NearUsers that may +/// be used between IV increments. struct ChainUsers { SmallPtrSet FarUsers; SmallPtrSet NearUsers; }; -/// LSRInstance - This class holds state for the main loop strength reduction -/// logic. +/// This class holds state for the main loop strength reduction logic. class LSRInstance { IVUsers &IU; ScalarEvolution &SE; @@ -1639,25 +1647,25 @@ class LSRInstance { Loop *const L; bool Changed; - /// IVIncInsertPos - This is the insert position that the current loop's - /// induction variable increment should be placed. In simple loops, this is - /// the latch block's terminator. But in more complicated cases, this is a - /// position which will dominate all the in-loop post-increment users. + /// This is the insert position that the current loop's induction variable + /// increment should be placed. In simple loops, this is the latch block's + /// terminator. But in more complicated cases, this is a position which will + /// dominate all the in-loop post-increment users. Instruction *IVIncInsertPos; - /// Factors - Interesting factors between use strides. + /// Interesting factors between use strides. SmallSetVector Factors; - /// Types - Interesting use types, to facilitate truncation reuse. + /// Interesting use types, to facilitate truncation reuse. SmallSetVector Types; - /// Fixups - The list of operands which are to be replaced. + /// The list of operands which are to be replaced. SmallVector Fixups; - /// Uses - The list of interesting uses. + /// The list of interesting uses. SmallVector Uses; - /// RegUses - Track which uses use which register candidates. + /// Track which uses use which register candidates. RegUseTracker RegUses; // Limit the number of chains to avoid quadratic behavior. We don't expect to @@ -1665,10 +1673,10 @@ class LSRInstance { // back to normal LSR behavior for those uses. static const unsigned MaxChains = 8; - /// IVChainVec - IV users can form a chain of IV increments. + /// IV users can form a chain of IV increments. SmallVector IVChainVec; - /// IVIncSet - IV users that belong to profitable IVChains. + /// IV users that belong to profitable IVChains. SmallPtrSet IVIncSet; void OptimizeShadowIV(); @@ -1696,11 +1704,10 @@ class LSRInstance { UseMapTy UseMap; bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, - LSRUse::KindType Kind, Type *AccessTy); + LSRUse::KindType Kind, MemAccessTy AccessTy); - std::pair getUse(const SCEV *&Expr, - LSRUse::KindType Kind, - Type *AccessTy); + std::pair getUse(const SCEV *&Expr, LSRUse::KindType Kind, + MemAccessTy AccessTy); void DeleteUse(LSRUse &LU, size_t LUIdx); @@ -1769,18 +1776,16 @@ class LSRInstance { void RewriteForPHI(PHINode *PN, const LSRFixup &LF, const Formula &F, SCEVExpander &Rewriter, - SmallVectorImpl &DeadInsts, - Pass *P) const; + SmallVectorImpl &DeadInsts) const; void Rewrite(const LSRFixup &LF, const Formula &F, SCEVExpander &Rewriter, - SmallVectorImpl &DeadInsts, - Pass *P) const; - void ImplementSolution(const SmallVectorImpl &Solution, - Pass *P); + SmallVectorImpl &DeadInsts) const; + void ImplementSolution(const SmallVectorImpl &Solution); public: - LSRInstance(Loop *L, Pass *P); + LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, + LoopInfo &LI, const TargetTransformInfo &TTI); bool getChanged() const { return Changed; } @@ -1793,8 +1798,8 @@ public: } -/// OptimizeShadowIV - If IV is used in a int-to-float cast -/// inside the loop then try to eliminate the cast operation. +/// If IV is used in a int-to-float cast inside the loop then try to eliminate +/// the cast operation. void LSRInstance::OptimizeShadowIV() { const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L); if (isa(BackedgeTakenCount)) @@ -1902,9 +1907,8 @@ void LSRInstance::OptimizeShadowIV() { } } -/// FindIVUserForCond - If Cond has an operand that is an expression of an IV, -/// set the IV user and stride information and return true, otherwise return -/// false. +/// If Cond has an operand that is an expression of an IV, set the IV user and +/// stride information and return true, otherwise return false. bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) { for (IVStrideUse &U : IU) if (U.getUser() == Cond) { @@ -1917,8 +1921,7 @@ bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) { return false; } -/// OptimizeMax - Rewrite the loop's terminating condition if it uses -/// a max computation. +/// Rewrite the loop's terminating condition if it uses a max computation. /// /// This is a narrow solution to a specific, but acute, problem. For loops /// like this: @@ -2076,8 +2079,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { return NewCond; } -/// OptimizeLoopTermCond - Change loop terminating condition to use the -/// postinc iv when possible. +/// Change loop terminating condition to use the postinc iv when possible. void LSRInstance::OptimizeLoopTermCond() { SmallPtrSet PostIncs; @@ -2152,16 +2154,18 @@ LSRInstance::OptimizeLoopTermCond() { C->getValue().isMinSignedValue()) goto decline_post_inc; // Check for possible scaled-address reuse. - Type *AccessTy = getAccessType(UI->getUser()); + MemAccessTy AccessTy = getAccessType(UI->getUser()); int64_t Scale = C->getSExtValue(); - if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr, - /*BaseOffset=*/ 0, - /*HasBaseReg=*/ false, Scale)) + if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr, + /*BaseOffset=*/0, + /*HasBaseReg=*/false, Scale, + AccessTy.AddrSpace)) goto decline_post_inc; Scale = -Scale; - if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr, - /*BaseOffset=*/ 0, - /*HasBaseReg=*/ false, Scale)) + if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr, + /*BaseOffset=*/0, + /*HasBaseReg=*/false, Scale, + AccessTy.AddrSpace)) goto decline_post_inc; } } @@ -2180,7 +2184,7 @@ LSRInstance::OptimizeLoopTermCond() { ICmpInst *OldCond = Cond; Cond = cast(Cond->clone()); Cond->setName(L->getHeader()->getName() + ".termcond"); - ExitingBlock->getInstList().insert(TermBr, Cond); + ExitingBlock->getInstList().insert(TermBr->getIterator(), Cond); // Clone the IVUse, as the old use still exists! CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace()); @@ -2213,15 +2217,14 @@ LSRInstance::OptimizeLoopTermCond() { } } -/// reconcileNewOffset - Determine if the given use can accommodate a fixup -/// at the given offset and other details. If so, update the use and -/// return true. -bool -LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, - LSRUse::KindType Kind, Type *AccessTy) { +/// Determine if the given use can accommodate a fixup at the given offset and +/// other details. If so, update the use and return true. +bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, + bool HasBaseReg, LSRUse::KindType Kind, + MemAccessTy AccessTy) { int64_t NewMinOffset = LU.MinOffset; int64_t NewMaxOffset = LU.MaxOffset; - Type *NewAccessTy = AccessTy; + MemAccessTy NewAccessTy = AccessTy; // Check for a mismatched kind. It's tempting to collapse mismatched kinds to // something conservative, however this can pessimize in the case that one of @@ -2232,8 +2235,10 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, // Check for a mismatched access type, and fall back conservatively as needed. // TODO: Be less conservative when the type is similar and can use the same // addressing modes. - if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) - NewAccessTy = Type::getVoidTy(AccessTy->getContext()); + if (Kind == LSRUse::Address) { + if (AccessTy != LU.AccessTy) + NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext()); + } // Conservatively assume HasBaseReg is true for now. if (NewOffset < LU.MinOffset) { @@ -2257,12 +2262,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, return true; } -/// getUse - Return an LSRUse index and an offset value for a fixup which -/// needs the given expression, with the given kind and optional access type. -/// Either reuse an existing use or create a new one, as needed. -std::pair -LSRInstance::getUse(const SCEV *&Expr, - LSRUse::KindType Kind, Type *AccessTy) { +/// Return an LSRUse index and an offset value for a fixup which needs the given +/// expression, with the given kind and optional access type. Either reuse an +/// existing use or create a new one, as needed. +std::pair LSRInstance::getUse(const SCEV *&Expr, + LSRUse::KindType Kind, + MemAccessTy AccessTy) { const SCEV *Copy = Expr; int64_t Offset = ExtractImmediate(Expr, SE); @@ -2300,18 +2305,18 @@ LSRInstance::getUse(const SCEV *&Expr, return std::make_pair(LUIdx, Offset); } -/// DeleteUse - Delete the given use from the Uses list. +/// Delete the given use from the Uses list. void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) { if (&LU != &Uses.back()) std::swap(LU, Uses.back()); Uses.pop_back(); // Update RegUses. - RegUses.SwapAndDropUse(LUIdx, Uses.size()); + RegUses.swapAndDropUse(LUIdx, Uses.size()); } -/// FindUseWithFormula - Look for a use distinct from OrigLU which is has -/// a formula that has the same registers as the given formula. +/// Look for a use distinct from OrigLU which is has a formula that has the same +/// registers as the given formula. LSRUse * LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, const LSRUse &OrigLU) { @@ -2396,14 +2401,14 @@ void LSRInstance::CollectInterestingTypesAndFactors() { if (const SCEVConstant *Factor = dyn_cast_or_null(getExactSDiv(NewStride, OldStride, SE, true))) { - if (Factor->getValue()->getValue().getMinSignedBits() <= 64) - Factors.insert(Factor->getValue()->getValue().getSExtValue()); + if (Factor->getAPInt().getMinSignedBits() <= 64) + Factors.insert(Factor->getAPInt().getSExtValue()); } else if (const SCEVConstant *Factor = dyn_cast_or_null(getExactSDiv(OldStride, NewStride, SE, true))) { - if (Factor->getValue()->getValue().getMinSignedBits() <= 64) - Factors.insert(Factor->getValue()->getValue().getSExtValue()); + if (Factor->getAPInt().getMinSignedBits() <= 64) + Factors.insert(Factor->getAPInt().getSExtValue()); } } @@ -2415,9 +2420,9 @@ void LSRInstance::CollectInterestingTypesAndFactors() { DEBUG(print_factors_and_types(dbgs())); } -/// findIVOperand - Helper for CollectChains that finds an IV operand (computed -/// by an AddRec in this loop) within [OI,OE) or returns OE. If IVUsers mapped -/// Instructions to IVStrideUses, we could partially skip this. +/// Helper for CollectChains that finds an IV operand (computed by an AddRec in +/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to +/// IVStrideUses, we could partially skip this. static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE) { @@ -2436,29 +2441,28 @@ findIVOperand(User::op_iterator OI, User::op_iterator OE, return OI; } -/// getWideOperand - IVChain logic must consistenctly peek base TruncInst -/// operands, so wrap it in a convenient helper. +/// IVChain logic must consistenctly peek base TruncInst operands, so wrap it in +/// a convenient helper. static Value *getWideOperand(Value *Oper) { if (TruncInst *Trunc = dyn_cast(Oper)) return Trunc->getOperand(0); return Oper; } -/// isCompatibleIVType - Return true if we allow an IV chain to include both -/// types. +/// Return true if we allow an IV chain to include both types. static bool isCompatibleIVType(Value *LVal, Value *RVal) { Type *LType = LVal->getType(); Type *RType = RVal->getType(); return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy()); } -/// getExprBase - Return an approximation of this SCEV expression's "base", or -/// NULL for any constant. Returning the expression itself is -/// conservative. Returning a deeper subexpression is more precise and valid as -/// long as it isn't less complex than another subexpression. For expressions -/// involving multiple unscaled values, we need to return the pointer-type -/// SCEVUnknown. This avoids forming chains across objects, such as: -/// PrevOper==a[i], IVOper==b[i], IVInc==b-a. +/// Return an approximation of this SCEV expression's "base", or NULL for any +/// constant. Returning the expression itself is conservative. Returning a +/// deeper subexpression is more precise and valid as long as it isn't less +/// complex than another subexpression. For expressions involving multiple +/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids +/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i], +/// IVInc==b-a. /// /// Since SCEVUnknown is the rightmost type, and pointers are the rightmost /// SCEVUnknown, we simply return the rightmost SCEV operand. @@ -2601,8 +2605,7 @@ isProfitableChain(IVChain &Chain, SmallPtrSetImpl &Users, return cost < 0; } -/// ChainInstruction - Add this IV user to an existing chain or make it the head -/// of a new chain. +/// Add this IV user to an existing chain or make it the head of a new chain. void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, SmallVectorImpl &ChainUsersVec) { // When IVs are used as types of varying widths, they are generally converted @@ -2714,7 +2717,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, ChainUsersVec[ChainIdx].FarUsers.erase(UserInst); } -/// CollectChains - Populate the vector of Chains. +/// Populate the vector of Chains. /// /// This decreases ILP at the architecture level. Targets with ample registers, /// multiple memory ports, and no register renaming probably don't want @@ -2755,19 +2758,19 @@ void LSRInstance::CollectChains() { for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end(); I != E; ++I) { // Skip instructions that weren't seen by IVUsers analysis. - if (isa(I) || !IU.isIVUserOrOperand(I)) + if (isa(I) || !IU.isIVUserOrOperand(&*I)) continue; // Ignore users that are part of a SCEV expression. This way we only // consider leaf IV Users. This effectively rediscovers a portion of // IVUsers analysis but in program order this time. - if (SE.isSCEVable(I->getType()) && !isa(SE.getSCEV(I))) + if (SE.isSCEVable(I->getType()) && !isa(SE.getSCEV(&*I))) continue; // Remove this instruction from any NearUsers set it may be in. for (unsigned ChainIdx = 0, NChains = IVChainVec.size(); ChainIdx < NChains; ++ChainIdx) { - ChainUsersVec[ChainIdx].NearUsers.erase(I); + ChainUsersVec[ChainIdx].NearUsers.erase(&*I); } // Search for operands that can be chained. SmallPtrSet UniqueOperands; @@ -2776,7 +2779,7 @@ void LSRInstance::CollectChains() { while (IVOpIter != IVOpEnd) { Instruction *IVOpInst = cast(*IVOpIter); if (UniqueOperands.insert(IVOpInst).second) - ChainInstruction(I, IVOpInst, ChainUsersVec); + ChainInstruction(&*I, IVOpInst, ChainUsersVec); IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE); } } // Continue walking down the instructions. @@ -2828,20 +2831,20 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, if (!IncConst || !isAddressUse(UserInst, Operand)) return false; - if (IncConst->getValue()->getValue().getMinSignedBits() > 64) + if (IncConst->getAPInt().getMinSignedBits() > 64) return false; + MemAccessTy AccessTy = getAccessType(UserInst); int64_t IncOffset = IncConst->getValue()->getSExtValue(); - if (!isAlwaysFoldable(TTI, LSRUse::Address, - getAccessType(UserInst), /*BaseGV=*/ nullptr, - IncOffset, /*HaseBaseReg=*/ false)) + if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr, + IncOffset, /*HaseBaseReg=*/false)) return false; return true; } -/// GenerateIVChains - Generate an add or subtract for each IVInc in a chain to -/// materialize the IV user's operand from the previous IV user's operand. +/// Generate an add or subtract for each IVInc in a chain to materialize the IV +/// user's operand from the previous IV user's operand. void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, SmallVectorImpl &DeadInsts) { // Find the new IVOperand for the head of the chain. It may have been replaced @@ -2961,7 +2964,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LF.PostIncLoops = U.getPostIncLoops(); LSRUse::KindType Kind = LSRUse::Basic; - Type *AccessTy = nullptr; + MemAccessTy AccessTy; if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) { Kind = LSRUse::Address; AccessTy = getAccessType(LF.UserInst); @@ -3027,9 +3030,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { DEBUG(print_fixups(dbgs())); } -/// InsertInitialFormula - Insert a formula for the given expression into -/// the given use, separating out loop-variant portions from loop-invariant -/// and loop-computable portions. +/// Insert a formula for the given expression into the given use, separating out +/// loop-variant portions from loop-invariant and loop-computable portions. void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { // Mark uses whose expressions cannot be expanded. @@ -3037,13 +3039,13 @@ LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { LU.RigidFormula = true; Formula F; - F.InitialMatch(S, L, SE); + F.initialMatch(S, L, SE); bool Inserted = InsertFormula(LU, LUIdx, F); assert(Inserted && "Initial formula already exists!"); (void)Inserted; } -/// InsertSupplementalFormula - Insert a simple single-register formula for -/// the given expression into the given use. +/// Insert a simple single-register formula for the given expression into the +/// given use. void LSRInstance::InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { @@ -3054,17 +3056,16 @@ LSRInstance::InsertSupplementalFormula(const SCEV *S, assert(Inserted && "Supplemental formula already exists!"); (void)Inserted; } -/// CountRegisters - Note which registers are used by the given formula, -/// updating RegUses. +/// Note which registers are used by the given formula, updating RegUses. void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) { if (F.ScaledReg) - RegUses.CountRegister(F.ScaledReg, LUIdx); + RegUses.countRegister(F.ScaledReg, LUIdx); for (const SCEV *BaseReg : F.BaseRegs) - RegUses.CountRegister(BaseReg, LUIdx); + RegUses.countRegister(BaseReg, LUIdx); } -/// InsertFormula - If the given formula has not yet been inserted, add it to -/// the list, and return true. Return false otherwise. +/// If the given formula has not yet been inserted, add it to the list, and +/// return true. Return false otherwise. bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { // Do not insert formula that we will not be able to expand. assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && @@ -3076,9 +3077,9 @@ bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { return true; } -/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of -/// loop-invariant values which we're tracking. These other uses will pin these -/// values in registers, making them less profitable for elimination. +/// Check for other uses of loop-invariant values which we're tracking. These +/// other uses will pin these values in registers, making them less profitable +/// for elimination. /// TODO: This currently misses non-constant addrec step registers. /// TODO: Should this give more weight to users inside the loop? void @@ -3124,6 +3125,9 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { PHINode::getIncomingValueNumForOperand(U.getOperandNo())); if (!DT.dominates(L->getHeader(), UseBB)) continue; + // Don't bother if the instruction is in a BB which ends in an EHPad. + if (UseBB->getTerminator()->isEHPad()) + continue; // Ignore uses which are part of other SCEV expressions, to avoid // analyzing them multiple times. if (SE.isSCEVable(UserInst->getType())) { @@ -3148,7 +3152,8 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { LSRFixup &LF = getNewFixup(); LF.UserInst = const_cast(UserInst); LF.OperandValToReplace = U; - std::pair P = getUse(S, LSRUse::Basic, nullptr); + std::pair P = getUse( + S, LSRUse::Basic, MemAccessTy()); LF.LUIdx = P.first; LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; @@ -3165,8 +3170,8 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { } } -/// CollectSubexprs - Split S into subexpressions which can be pulled out into -/// separate registers. If C is non-null, multiply each subexpression by C. +/// Split S into subexpressions which can be pulled out into separate +/// registers. If C is non-null, multiply each subexpression by C. /// /// Return remainder expression after factoring the subexpressions captured by /// Ops. If Ops is complete, return NULL. @@ -3300,7 +3305,7 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, F.BaseRegs.push_back(*J); // We may have changed the number of register in base regs, adjust the // formula accordingly. - F.Canonicalize(); + F.canonicalize(); if (InsertFormula(LU, LUIdx, F)) // If that formula hadn't been seen before, recurse to find more like @@ -3309,8 +3314,7 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, } } -/// GenerateReassociations - Split out subexpressions from adds and the bases of -/// addrecs. +/// Split out subexpressions from adds and the bases of addrecs. void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base, unsigned Depth) { assert(Base.isCanonical() && "Input must be in the canonical form"); @@ -3326,8 +3330,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, /* Idx */ -1, /* IsScaledReg */ true); } -/// GenerateCombinations - Generate a formula consisting of all of the -/// loop-dominating registers added into a single register. +/// Generate a formula consisting of all of the loop-dominating registers added +/// into a single register. void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base) { // This method is only interesting on a plurality of registers. @@ -3336,7 +3340,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before // processing the formula. - Base.Unscale(); + Base.unscale(); Formula F = Base; F.BaseRegs.clear(); SmallVector Ops; @@ -3354,7 +3358,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, // rather than proceed with zero in a register. if (!Sum->isZero()) { F.BaseRegs.push_back(Sum); - F.Canonicalize(); + F.canonicalize(); (void)InsertFormula(LU, LUIdx, F); } } @@ -3379,7 +3383,7 @@ void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, (void)InsertFormula(LU, LUIdx, F); } -/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets. +/// Generate reuse formulae using symbolic offsets. void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base) { // We can't add a symbolic offset if the address already contains one. @@ -3410,8 +3414,8 @@ void LSRInstance::GenerateConstantOffsetsImpl( F.Scale = 0; F.ScaledReg = nullptr; } else - F.DeleteBaseReg(F.BaseRegs[Idx]); - F.Canonicalize(); + F.deleteBaseReg(F.BaseRegs[Idx]); + F.canonicalize(); } else if (IsScaledReg) F.ScaledReg = NewG; else @@ -3452,8 +3456,8 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, /* IsScaledReg */ true); } -/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up -/// the comparison. For example, x == y -> x*c == y*c. +/// For ICmpZero, check to see if we can scale up the comparison. For example, x +/// == y -> x*c == y*c. void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base) { if (LU.Kind != LSRUse::ICmpZero) return; @@ -3538,8 +3542,8 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, } } -/// GenerateScales - Generate stride factor reuse formulae by making use of -/// scaled-offset address modes, for example. +/// Generate stride factor reuse formulae by making use of scaled-offset address +/// modes, for example. void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { // Determine the integer type for the base formula. Type *IntTy = Base.getType(); @@ -3547,10 +3551,10 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { // If this Formula already has a scaled register, we can't add another one. // Try to unscale the formula to generate a better scale. - if (Base.Scale != 0 && !Base.Unscale()) + if (Base.Scale != 0 && !Base.unscale()) return; - assert(Base.Scale == 0 && "Unscale did not did its job!"); + assert(Base.Scale == 0 && "unscale did not did its job!"); // Check each interesting stride. for (int64_t Factor : Factors) { @@ -3587,7 +3591,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { // TODO: This could be optimized to avoid all the copying. Formula F = Base; F.ScaledReg = Quotient; - F.DeleteBaseReg(F.BaseRegs[i]); + F.deleteBaseReg(F.BaseRegs[i]); // The canonical representation of 1*reg is reg, which is already in // Base. In that case, do not try to insert the formula, it will be // rejected anyway. @@ -3599,7 +3603,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { } } -/// GenerateTruncates - Generate reuse formulae from different IV types. +/// Generate reuse formulae from different IV types. void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { // Don't bother truncating symbolic values. if (Base.BaseGV) return; @@ -3629,9 +3633,9 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { namespace { -/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to -/// defer modifications so that the search phase doesn't have to worry about -/// the data structures moving underneath it. +/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer +/// modifications so that the search phase doesn't have to worry about the data +/// structures moving underneath it. struct WorkItem { size_t LUIdx; int64_t Imm; @@ -3651,14 +3655,13 @@ void WorkItem::print(raw_ostream &OS) const { << " , add offset " << Imm; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void WorkItem::dump() const { print(errs()); errs() << '\n'; } -#endif -/// GenerateCrossUseConstantOffsets - Look for registers which are a constant -/// distance apart and try to form reuse opportunities between them. +/// Look for registers which are a constant distance apart and try to form reuse +/// opportunities between them. void LSRInstance::GenerateCrossUseConstantOffsets() { // Group the registers by their value without any added constant offset. typedef std::map ImmMapTy; @@ -3751,7 +3754,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // very similar but slightly different. Investigate if they // could be merged. That way, we would not have to unscale the // Formula. - F.Unscale(); + F.unscale(); // Use the immediate in the scaled register. if (F.ScaledReg == OrigReg) { int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale; @@ -3770,14 +3773,13 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // value to the immediate would produce a value closer to zero than the // immediate itself, then the formula isn't worthwhile. if (const SCEVConstant *C = dyn_cast(NewF.ScaledReg)) - if (C->getValue()->isNegative() != - (NewF.BaseOffset < 0) && - (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale)) - .ule(std::abs(NewF.BaseOffset))) + if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) && + (C->getAPInt().abs() * APInt(BitWidth, F.Scale)) + .ule(std::abs(NewF.BaseOffset))) continue; // OK, looks good. - NewF.Canonicalize(); + NewF.canonicalize(); (void)InsertFormula(LU, LUIdx, NewF); } else { // Use the immediate in a base register. @@ -3801,15 +3803,15 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // zero than the immediate itself, then the formula isn't worthwhile. for (const SCEV *NewReg : NewF.BaseRegs) if (const SCEVConstant *C = dyn_cast(NewReg)) - if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt( - std::abs(NewF.BaseOffset)) && - (C->getValue()->getValue() + - NewF.BaseOffset).countTrailingZeros() >= - countTrailingZeros(NewF.BaseOffset)) + if ((C->getAPInt() + NewF.BaseOffset) + .abs() + .slt(std::abs(NewF.BaseOffset)) && + (C->getAPInt() + NewF.BaseOffset).countTrailingZeros() >= + countTrailingZeros(NewF.BaseOffset)) goto skip_formula; // Ok, looks good. - NewF.Canonicalize(); + NewF.canonicalize(); (void)InsertFormula(LU, LUIdx, NewF); break; skip_formula:; @@ -3819,7 +3821,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { } } -/// GenerateAllReuseFormulae - Generate formulae for each use. +/// Generate formulae for each use. void LSRInstance::GenerateAllReuseFormulae() { // This is split into multiple loops so that hasRegsUsedByUsesOtherThan @@ -3959,10 +3961,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { // This is a rough guess that seems to work fairly well. static const size_t ComplexityLimit = UINT16_MAX; -/// EstimateSearchSpaceComplexity - Estimate the worst-case number of -/// solutions the solver might have to consider. It almost never considers -/// this many solutions because it prune the search space, but the pruning -/// isn't always sufficient. +/// Estimate the worst-case number of solutions the solver might have to +/// consider. It almost never considers this many solutions because it prune the +/// search space, but the pruning isn't always sufficient. size_t LSRInstance::EstimateSearchSpaceComplexity() const { size_t Power = 1; for (const LSRUse &LU : Uses) { @@ -3978,10 +3979,9 @@ size_t LSRInstance::EstimateSearchSpaceComplexity() const { return Power; } -/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset -/// of the registers of another formula, it won't help reduce register -/// pressure (though it may not necessarily hurt register pressure); remove -/// it to simplify the system. +/// When one formula uses a superset of the registers of another formula, it +/// won't help reduce register pressure (though it may not necessarily hurt +/// register pressure); remove it to simplify the system. void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { DEBUG(dbgs() << "The search space is too complex.\n"); @@ -4042,9 +4042,8 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { } } -/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers -/// for expressions like A, A+1, A+2, etc., allocate a single register for -/// them. +/// When there are many registers for expressions like A, A+1, A+2, etc., +/// allocate a single register for them. void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { if (EstimateSearchSpaceComplexity() < ComplexityLimit) return; @@ -4121,8 +4120,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } -/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call -/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that +/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that /// we've done more filtering, as it may be able to find more formulae to /// eliminate. void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ @@ -4139,9 +4137,9 @@ void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ } } -/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely -/// to be profitable, and then in any use which has any reference to that -/// register, delete all formulae which do not reference that register. +/// Pick a register which seems likely to be profitable, and then in any use +/// which has any reference to that register, delete all formulae which do not +/// reference that register. void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { // With all other options exhausted, loop until the system is simple // enough to handle. @@ -4202,10 +4200,10 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { } } -/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of -/// formulae to choose from, use some rough heuristics to prune down the number -/// of formulae. This keeps the main solver from taking an extraordinary amount -/// of time in some worst-case scenarios. +/// If there are an extraordinary number of formulae to choose from, use some +/// rough heuristics to prune down the number of formulae. This keeps the main +/// solver from taking an extraordinary amount of time in some worst-case +/// scenarios. void LSRInstance::NarrowSearchSpaceUsingHeuristics() { NarrowSearchSpaceByDetectingSupersets(); NarrowSearchSpaceByCollapsingUnrolledCode(); @@ -4213,7 +4211,7 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { NarrowSearchSpaceByPickingWinnerRegs(); } -/// SolveRecurse - This is the recursive solver. +/// This is the recursive solver. void LSRInstance::SolveRecurse(SmallVectorImpl &Solution, Cost &SolutionCost, SmallVectorImpl &Workspace, @@ -4291,8 +4289,8 @@ void LSRInstance::SolveRecurse(SmallVectorImpl &Solution, } } -/// Solve - Choose one formula from each use. Return the results in the given -/// Solution vector. +/// Choose one formula from each use. Return the results in the given Solution +/// vector. void LSRInstance::Solve(SmallVectorImpl &Solution) const { SmallVector Workspace; Cost SolutionCost; @@ -4326,10 +4324,9 @@ void LSRInstance::Solve(SmallVectorImpl &Solution) const { assert(Solution.size() == Uses.size() && "Malformed solution!"); } -/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up -/// the dominator tree far as we can go while still being dominated by the -/// input positions. This helps canonicalize the insert position, which -/// encourages sharing. +/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as +/// we can go while still being dominated by the input positions. This helps +/// canonicalize the insert position, which encourages sharing. BasicBlock::iterator LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, const SmallVectorImpl &Inputs) @@ -4365,21 +4362,21 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, // instead of at the end, so that it can be used for other expansions. if (IDom == Inst->getParent() && (!BetterPos || !DT.dominates(Inst, BetterPos))) - BetterPos = std::next(BasicBlock::iterator(Inst)); + BetterPos = &*std::next(BasicBlock::iterator(Inst)); } if (!AllDominate) break; if (BetterPos) - IP = BetterPos; + IP = BetterPos->getIterator(); else - IP = Tentative; + IP = Tentative->getIterator(); } return IP; } -/// AdjustInsertPositionForExpand - Determine an input position which will be -/// dominated by the operands and which will dominate the result. +/// Determine an input position which will be dominated by the operands and +/// which will dominate the result. BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, const LSRFixup &LF, @@ -4417,7 +4414,7 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, } } - assert(!isa(LowestIP) && !isa(LowestIP) + assert(!isa(LowestIP) && !LowestIP->isEHPad() && !isa(LowestIP) && "Insertion point must be a normal instruction"); @@ -4429,7 +4426,7 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, while (isa(IP)) ++IP; // Ignore landingpad instructions. - while (isa(IP)) ++IP; + while (!isa(IP) && IP->isEHPad()) ++IP; // Ignore debug intrinsics. while (isa(IP)) ++IP; @@ -4437,13 +4434,14 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, // Set IP below instructions recently inserted by SCEVExpander. This keeps the // IP consistent across expansions and allows the previously inserted // instructions to be reused by subsequent expansion. - while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP; + while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP) + ++IP; return IP; } -/// Expand - Emit instructions for the leading candidate expression for this -/// LSRUse (this is called "expanding"). +/// Emit instructions for the leading candidate expression for this LSRUse (this +/// is called "expanding"). Value *LSRInstance::Expand(const LSRFixup &LF, const Formula &F, BasicBlock::iterator IP, @@ -4487,7 +4485,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, LF.UserInst, LF.OperandValToReplace, Loops, SE, DT); - Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP))); + Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, &*IP))); } // Expand the ScaledReg portion. @@ -4505,14 +4503,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Expand ScaleReg as if it was part of the base regs. if (F.Scale == 1) Ops.push_back( - SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP))); + SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP))); else { // An interesting way of "folding" with an icmp is to use a negated // scale, which we'll implement by inserting it into the other operand // of the icmp. assert(F.Scale == -1 && "The only scale supported by ICmpZero uses is -1!"); - ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP); + ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, &*IP); } } else { // Otherwise just expand the scaled register and an explicit scale, @@ -4522,11 +4520,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Unless the addressing mode will not be folded. if (!Ops.empty() && LU.Kind == LSRUse::Address && isAMCompletelyFolded(TTI, LU, F)) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } - ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)); + ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP)); if (F.Scale != 1) ScaledS = SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale)); @@ -4538,7 +4536,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, if (F.BaseGV) { // Flush the operand list to suppress SCEVExpander hoisting. if (!Ops.empty()) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } @@ -4548,7 +4546,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Flush the operand list to suppress SCEVExpander hoisting of both folded and // unfolded offsets. LSR assumes they both live next to their uses. if (!Ops.empty()) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } @@ -4584,7 +4582,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, const SCEV *FullS = Ops.empty() ? SE.getConstant(IntTy, 0) : SE.getAddExpr(Ops); - Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP); + Value *FullV = Rewriter.expandCodeFor(FullS, Ty, &*IP); // We're done expanding now, so reset the rewriter. Rewriter.clearPostInc(); @@ -4626,15 +4624,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF, return FullV; } -/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use -/// of their operands effectively happens in their predecessor blocks, so the -/// expression may need to be expanded in multiple places. +/// Helper for Rewrite. PHI nodes are special because the use of their operands +/// effectively happens in their predecessor blocks, so the expression may need +/// to be expanded in multiple places. void LSRInstance::RewriteForPHI(PHINode *PN, const LSRFixup &LF, const Formula &F, SCEVExpander &Rewriter, - SmallVectorImpl &DeadInsts, - Pass *P) const { + SmallVectorImpl &DeadInsts) const { DenseMap Inserted; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == LF.OperandValToReplace) { @@ -4658,8 +4655,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN, .setDontDeleteUselessPHIs()); } else { SmallVector NewBBs; - SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, - /*AliasAnalysis*/ nullptr, &DT, &LI); + SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI); NewBB = NewBBs[0]; } // If NewBB==NULL, then SplitCriticalEdge refused to split because all @@ -4685,7 +4681,8 @@ void LSRInstance::RewriteForPHI(PHINode *PN, if (!Pair.second) PN->setIncomingValue(i, Pair.first->second); else { - Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts); + Value *FullV = Expand(LF, F, BB->getTerminator()->getIterator(), + Rewriter, DeadInsts); // If this is reuse-by-noop-cast, insert the noop cast. Type *OpTy = LF.OperandValToReplace->getType(); @@ -4702,20 +4699,20 @@ void LSRInstance::RewriteForPHI(PHINode *PN, } } -/// Rewrite - Emit instructions for the leading candidate expression for this -/// LSRUse (this is called "expanding"), and update the UserInst to reference -/// the newly expanded value. +/// Emit instructions for the leading candidate expression for this LSRUse (this +/// is called "expanding"), and update the UserInst to reference the newly +/// expanded value. void LSRInstance::Rewrite(const LSRFixup &LF, const Formula &F, SCEVExpander &Rewriter, - SmallVectorImpl &DeadInsts, - Pass *P) const { + SmallVectorImpl &DeadInsts) const { // First, find an insertion point that dominates UserInst. For PHI nodes, // find the nearest block which dominates all the relevant uses. if (PHINode *PN = dyn_cast(LF.UserInst)) { - RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P); + RewriteForPHI(PN, LF, F, Rewriter, DeadInsts); } else { - Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts); + Value *FullV = + Expand(LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts); // If this is reuse-by-noop-cast, insert the noop cast. Type *OpTy = LF.OperandValToReplace->getType(); @@ -4740,11 +4737,10 @@ void LSRInstance::Rewrite(const LSRFixup &LF, DeadInsts.emplace_back(LF.OperandValToReplace); } -/// ImplementSolution - Rewrite all the fixup locations with new values, -/// following the chosen solution. -void -LSRInstance::ImplementSolution(const SmallVectorImpl &Solution, - Pass *P) { +/// Rewrite all the fixup locations with new values, following the chosen +/// solution. +void LSRInstance::ImplementSolution( + const SmallVectorImpl &Solution) { // Keep track of instructions we may have made dead, so that // we can remove them after we are done working. SmallVector DeadInsts; @@ -4766,7 +4762,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl &Solution, // Expand the new value definitions and update the users. for (const LSRFixup &Fixup : Fixups) { - Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P); + Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts); Changed = true; } @@ -4782,13 +4778,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl &Solution, Changed |= DeleteTriviallyDeadInstructions(DeadInsts); } -LSRInstance::LSRInstance(Loop *L, Pass *P) - : IU(P->getAnalysis()), SE(P->getAnalysis()), - DT(P->getAnalysis().getDomTree()), - LI(P->getAnalysis().getLoopInfo()), - TTI(P->getAnalysis().getTTI( - *L->getHeader()->getParent())), - L(L), Changed(false), IVIncInsertPos(nullptr) { +LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, + DominatorTree &DT, LoopInfo &LI, + const TargetTransformInfo &TTI) + : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L), Changed(false), + IVIncInsertPos(nullptr) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) return; @@ -4879,7 +4873,7 @@ LSRInstance::LSRInstance(Loop *L, Pass *P) #endif // Now that we've decided what we want, make it so. - ImplementSolution(Solution, P); + ImplementSolution(Solution); } void LSRInstance::print_factors_and_types(raw_ostream &OS) const { @@ -4931,11 +4925,10 @@ void LSRInstance::print(raw_ostream &OS) const { print_uses(OS); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void LSRInstance::dump() const { print(errs()); errs() << '\n'; } -#endif namespace { @@ -4956,7 +4949,7 @@ INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", "Loop Strength Reduction", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(IVUsers) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) @@ -4982,8 +4975,8 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(LoopSimplifyID); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); // Requiring LoopSimplify a second time here prevents IVUsers from running // twice, since LoopSimplify was invalidated by running ScalarEvolution. AU.addRequiredID(LoopSimplifyID); @@ -4996,17 +4989,24 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { if (skipOptnoneFunction(L)) return false; + auto &IU = getAnalysis(); + auto &SE = getAnalysis().getSE(); + auto &DT = getAnalysis().getDomTree(); + auto &LI = getAnalysis().getLoopInfo(); + const auto &TTI = getAnalysis().getTTI( + *L->getHeader()->getParent()); bool Changed = false; // Run the main LSR transformation. - Changed |= LSRInstance(L, this).getChanged(); + Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged(); // Remove any extra phis created by processing inner loops. Changed |= DeleteDeadPHIs(L->getHeader()); if (EnablePhiElim && L->isLoopSimplifyForm()) { SmallVector DeadInsts; const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - SCEVExpander Rewriter(getAnalysis(), DL, "lsr"); + SCEVExpander Rewriter(getAnalysis().getSE(), DL, + "lsr"); #ifndef NDEBUG Rewriter.setDebugType(DEBUG_TYPE); #endif diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index d78db6c369b3..56ae5c010411 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -130,27 +131,29 @@ namespace { bool UserAllowPartial; bool UserRuntime; - bool runOnLoop(Loop *L, LPPassManager &LPM) override; + bool runOnLoop(Loop *L, LPPassManager &) override; /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG... /// void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequiredID(LoopSimplifyID); AU.addPreservedID(LoopSimplifyID); AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. // If loop unroll does not preserve dom info then LCSSA pass on next // loop will receive invalid dom info. // For now, recreate dom info, if loop is unrolled. AU.addPreserved(); + AU.addPreserved(); } // Fill in the UnrollingPreferences parameter with values from the @@ -186,7 +189,7 @@ namespace { // total unrolled size. Parameters Threshold and PartialThreshold // are set to the maximum unrolled size for fully and partially // unrolled loops respectively. - void selectThresholds(const Loop *L, bool HasPragma, + void selectThresholds(const Loop *L, bool UsePragmaThreshold, const TargetTransformInfo::UnrollingPreferences &UP, unsigned &Threshold, unsigned &PartialThreshold, unsigned &PercentDynamicCostSavedThreshold, @@ -207,12 +210,13 @@ namespace { : UP.DynamicCostSavingsDiscount; if (!UserThreshold && + // FIXME: Use Function::optForSize(). L->getHeader()->getParent()->hasFnAttribute( Attribute::OptimizeForSize)) { Threshold = UP.OptSizeThreshold; PartialThreshold = UP.PartialOptSizeThreshold; } - if (HasPragma) { + if (UsePragmaThreshold) { // If the loop has an unrolling pragma, we want to be more // aggressive with unrolling limits. Set thresholds to at // least the PragmaTheshold value which is larger than the @@ -235,10 +239,11 @@ char LoopUnroll::ID = 0; INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial, @@ -278,8 +283,8 @@ class UnrolledInstAnalyzer : private InstVisitor { public: UnrolledInstAnalyzer(unsigned Iteration, DenseMap &SimplifiedValues, - const Loop *L, ScalarEvolution &SE) - : Iteration(Iteration), SimplifiedValues(SimplifiedValues), L(L), SE(SE) { + ScalarEvolution &SE) + : SimplifiedValues(SimplifiedValues), SE(SE) { IterationNumber = SE.getConstant(APInt(64, Iteration)); } @@ -295,13 +300,6 @@ private: /// results saved. DenseMap SimplifiedAddresses; - /// \brief Number of currently simulated iteration. - /// - /// If an expression is ConstAddress+Constant, then the Constant is - /// Start + Iteration*Step, where Start and Step could be obtained from - /// SCEVGEPCache. - unsigned Iteration; - /// \brief SCEV expression corresponding to number of currently simulated /// iteration. const SCEV *IterationNumber; @@ -316,7 +314,6 @@ private: /// post-unrolling. DenseMap &SimplifiedValues; - const Loop *L; ScalarEvolution &SE; /// \brief Try to simplify instruction \param I using its SCEV expression. @@ -368,11 +365,9 @@ private: return simplifyInstWithSCEV(&I); } - /// TODO: Add visitors for other instruction types, e.g. ZExt, SExt. - /// Try to simplify binary operator I. /// - /// TODO: Probaly it's worth to hoist the code for estimating the + /// TODO: Probably it's worth to hoist the code for estimating the /// simplifications effects to a separate class, since we have a very similar /// code in InlineCost already. bool visitBinaryOperator(BinaryOperator &I) { @@ -412,7 +407,7 @@ private: auto *GV = dyn_cast(AddressIt->second.Base); // We're only interested in loads that can be completely folded to a // constant. - if (!GV || !GV->hasInitializer()) + if (!GV || !GV->hasDefinitiveInitializer() || !GV->isConstant()) return false; ConstantDataSequential *CDS = @@ -420,6 +415,12 @@ private: if (!CDS) return false; + // We might have a vector load from an array. FIXME: for now we just bail + // out in this case, but we should be able to resolve and simplify such + // loads. + if(!CDS->isElementTypeCompatible(I.getType())) + return false; + int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U; assert(SimplifiedAddrOp->getValue().getActiveBits() < 64 && "Unexpectedly large index value."); @@ -436,6 +437,59 @@ private: return true; } + + bool visitCastInst(CastInst &I) { + // Propagate constants through casts. + Constant *COp = dyn_cast(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = + ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + return Base::visitCastInst(I); + } + + bool visitCmpInst(CmpInst &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + + // First try to handle simplified comparisons. + if (!isa(LHS)) + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + LHS = SimpleLHS; + if (!isa(RHS)) + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + RHS = SimpleRHS; + + if (!isa(LHS) && !isa(RHS)) { + auto SimplifiedLHS = SimplifiedAddresses.find(LHS); + if (SimplifiedLHS != SimplifiedAddresses.end()) { + auto SimplifiedRHS = SimplifiedAddresses.find(RHS); + if (SimplifiedRHS != SimplifiedAddresses.end()) { + SimplifiedAddress &LHSAddr = SimplifiedLHS->second; + SimplifiedAddress &RHSAddr = SimplifiedRHS->second; + if (LHSAddr.Base == RHSAddr.Base) { + LHS = LHSAddr.Offset; + RHS = RHSAddr.Offset; + } + } + } + } + + if (Constant *CLHS = dyn_cast(LHS)) { + if (Constant *CRHS = dyn_cast(RHS)) { + if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) { + SimplifiedValues[&I] = C; + return true; + } + } + } + + return Base::visitCmpInst(I); + } }; } // namespace @@ -443,11 +497,11 @@ private: namespace { struct EstimatedUnrollCost { /// \brief The estimated cost after unrolling. - unsigned UnrolledCost; + int UnrolledCost; /// \brief The estimated dynamic cost of executing the instructions in the /// rolled form. - unsigned RolledDynamicCost; + int RolledDynamicCost; }; } @@ -464,10 +518,10 @@ struct EstimatedUnrollCost { /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If /// the analysis failed (no benefits expected from the unrolling, or the loop is /// too big to analyze), the returned value is None. -Optional -analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, - const TargetTransformInfo &TTI, - unsigned MaxUnrolledLoopSize) { +static Optional +analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, + ScalarEvolution &SE, const TargetTransformInfo &TTI, + int MaxUnrolledLoopSize) { // We want to be able to scale offsets by the trip count and add more offsets // to them without checking for overflows, and we already don't want to // analyze *massive* trip counts, so we force the max to be reasonably small. @@ -481,24 +535,61 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, SmallSetVector BBWorklist; DenseMap SimplifiedValues; + SmallVector, 4> SimplifiedInputValues; // The estimated cost of the unrolled form of the loop. We try to estimate // this by simplifying as much as we can while computing the estimate. - unsigned UnrolledCost = 0; + int UnrolledCost = 0; // We also track the estimated dynamic (that is, actually executed) cost in // the rolled form. This helps identify cases when the savings from unrolling // aren't just exposing dead control flows, but actual reduced dynamic // instructions due to the simplifications which we expect to occur after // unrolling. - unsigned RolledDynamicCost = 0; + int RolledDynamicCost = 0; + + // Ensure that we don't violate the loop structure invariants relied on by + // this analysis. + assert(L->isLoopSimplifyForm() && "Must put loop into normal form first."); + assert(L->isLCSSAForm(DT) && + "Must have loops in LCSSA form to track live-out values."); + + DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n"); // Simulate execution of each iteration of the loop counting instructions, // which would be simplified. // Since the same load will take different values on different iterations, // we literally have to go through all loop's iterations. for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { + DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n"); + + // Prepare for the iteration by collecting any simplified entry or backedge + // inputs. + for (Instruction &I : *L->getHeader()) { + auto *PHI = dyn_cast(&I); + if (!PHI) + break; + + // The loop header PHI nodes must have exactly two input: one from the + // loop preheader and one from the loop latch. + assert( + PHI->getNumIncomingValues() == 2 && + "Must have an incoming value only for the preheader and the latch."); + + Value *V = PHI->getIncomingValueForBlock( + Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch()); + Constant *C = dyn_cast(V); + if (Iteration != 0 && !C) + C = SimplifiedValues.lookup(V); + if (C) + SimplifiedInputValues.push_back({PHI, C}); + } + + // Now clear and re-populate the map for the next iteration. SimplifiedValues.clear(); - UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, L, SE); + while (!SimplifiedInputValues.empty()) + SimplifiedValues.insert(SimplifiedInputValues.pop_back_val()); + + UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE); BBWorklist.clear(); BBWorklist.insert(L->getHeader()); @@ -510,21 +601,67 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, // it. We don't change the actual IR, just count optimization // opportunities. for (Instruction &I : *BB) { - unsigned InstCost = TTI.getUserCost(&I); + int InstCost = TTI.getUserCost(&I); // Visit the instruction to analyze its loop cost after unrolling, // and if the visitor returns false, include this instruction in the // unrolled cost. if (!Analyzer.visit(I)) UnrolledCost += InstCost; + else { + DEBUG(dbgs() << " " << I + << " would be simplified if loop is unrolled.\n"); + (void)0; + } // Also track this instructions expected cost when executing the rolled // loop form. RolledDynamicCost += InstCost; // If unrolled body turns out to be too big, bail out. - if (UnrolledCost > MaxUnrolledLoopSize) + if (UnrolledCost > MaxUnrolledLoopSize) { + DEBUG(dbgs() << " Exceeded threshold.. exiting.\n" + << " UnrolledCost: " << UnrolledCost + << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize + << "\n"); return None; + } + } + + TerminatorInst *TI = BB->getTerminator(); + + // Add in the live successors by first checking whether we have terminator + // that may be simplified based on the values simplified by this call. + if (BranchInst *BI = dyn_cast(TI)) { + if (BI->isConditional()) { + if (Constant *SimpleCond = + SimplifiedValues.lookup(BI->getCondition())) { + BasicBlock *Succ = nullptr; + // Just take the first successor if condition is undef + if (isa(SimpleCond)) + Succ = BI->getSuccessor(0); + else + Succ = BI->getSuccessor( + cast(SimpleCond)->isZero() ? 1 : 0); + if (L->contains(Succ)) + BBWorklist.insert(Succ); + continue; + } + } + } else if (SwitchInst *SI = dyn_cast(TI)) { + if (Constant *SimpleCond = + SimplifiedValues.lookup(SI->getCondition())) { + BasicBlock *Succ = nullptr; + // Just take the first successor if condition is undef + if (isa(SimpleCond)) + Succ = SI->getSuccessor(0); + else + Succ = SI->findCaseValue(cast(SimpleCond)) + .getCaseSuccessor(); + if (L->contains(Succ)) + BBWorklist.insert(Succ); + continue; + } } // Add BB's successors to the worklist. @@ -535,9 +672,15 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, // If we found no optimization opportunities on the first iteration, we // won't find them on later ones too. - if (UnrolledCost == RolledDynamicCost) + if (UnrolledCost == RolledDynamicCost) { + DEBUG(dbgs() << " No opportunities found.. exiting.\n" + << " UnrolledCost: " << UnrolledCost << "\n"); return None; + } } + DEBUG(dbgs() << "Analysis finished:\n" + << "UnrolledCost: " << UnrolledCost << ", " + << "RolledDynamicCost: " << RolledDynamicCost << "\n"); return {{UnrolledCost, RolledDynamicCost}}; } @@ -583,6 +726,12 @@ static bool HasUnrollFullPragma(const Loop *L) { return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full"); } +// Returns true if the loop has an unroll(enable) pragma. This metadata is used +// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives. +static bool HasUnrollEnablePragma(const Loop *L) { + return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable"); +} + // Returns true if the loop has an unroll(disable) pragma. static bool HasUnrollDisablePragma(const Loop *L) { return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable"); @@ -708,7 +857,7 @@ unsigned LoopUnroll::selectUnrollCount( unsigned Count = UserCount ? CurrentCount : 0; // If there is no user-specified count, unroll pragmas have the next - // highest precendence. + // highest precedence. if (Count == 0) { if (PragmaCount) { Count = PragmaCount; @@ -737,17 +886,19 @@ unsigned LoopUnroll::selectUnrollCount( return Count; } -bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { +bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) { if (skipOptnoneFunction(L)) return false; Function &F = *L->getHeader()->getParent(); + auto &DT = getAnalysis().getDomTree(); LoopInfo *LI = &getAnalysis().getLoopInfo(); - ScalarEvolution *SE = &getAnalysis(); + ScalarEvolution *SE = &getAnalysis().getSE(); const TargetTransformInfo &TTI = getAnalysis().getTTI(F); auto &AC = getAnalysis().getAssumptionCache(F); + bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); BasicBlock *Header = L->getHeader(); DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() @@ -757,8 +908,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { return false; } bool PragmaFullUnroll = HasUnrollFullPragma(L); + bool PragmaEnableUnroll = HasUnrollEnablePragma(L); unsigned PragmaCount = UnrollCountPragmaValue(L); - bool HasPragma = PragmaFullUnroll || PragmaCount > 0; + bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0; TargetTransformInfo::UnrollingPreferences UP; getUnrollingPreferences(L, TTI, UP); @@ -806,7 +958,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { unsigned Threshold, PartialThreshold; unsigned PercentDynamicCostSavedThreshold; unsigned DynamicCostSavingsDiscount; - selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold, + // Only use the high pragma threshold when we have a target unroll factor such + // as with "#pragma unroll N" or a pragma indicating full unrolling and the + // trip count is known. Otherwise we rely on the standard threshold to + // heuristically select a reasonable unroll count. + bool UsePragmaThreshold = + PragmaCount > 0 || + ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0); + + selectThresholds(L, UsePragmaThreshold, UP, Threshold, PartialThreshold, PercentDynamicCostSavedThreshold, DynamicCostSavingsDiscount); @@ -824,8 +984,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // The loop isn't that small, but we still can fully unroll it if that // helps to remove a significant number of instructions. // To check that, run additional analysis on the loop. - if (Optional Cost = analyzeLoopUnrollCost( - L, TripCount, *SE, TTI, Threshold + DynamicCostSavingsDiscount)) + if (Optional Cost = + analyzeLoopUnrollCost(L, TripCount, DT, *SE, TTI, + Threshold + DynamicCostSavingsDiscount)) if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold, DynamicCostSavingsDiscount, Cost->UnrolledCost, Cost->RolledDynamicCost)) { @@ -840,14 +1001,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Reduce count based on the type of unrolling and the threshold values. unsigned OriginalCount = Count; - bool AllowRuntime = - (PragmaCount > 0) || (UserRuntime ? CurrentRuntime : UP.Runtime); + bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || + (UserRuntime ? CurrentRuntime : UP.Runtime); // Don't unroll a runtime trip count loop with unroll full pragma. if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) { AllowRuntime = false; } if (Unrolling == Partial) { - bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial; + bool AllowPartial = PragmaEnableUnroll || + (UserAllowPartial ? CurrentAllowPartial : UP.Partial); if (!AllowPartial && !CountSetExplicitly) { DEBUG(dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"); @@ -887,23 +1049,27 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { DebugLoc LoopLoc = L->getStartLoc(); Function *F = Header->getParent(); LLVMContext &Ctx = F->getContext(); - if (PragmaFullUnroll && PragmaCount == 0) { - if (TripCount && Count != TripCount) { - emitOptimizationRemarkMissed( - Ctx, DEBUG_TYPE, *F, LoopLoc, - "Unable to fully unroll loop as directed by unroll(full) pragma " - "because unrolled size is too large."); - } else if (!TripCount) { - emitOptimizationRemarkMissed( - Ctx, DEBUG_TYPE, *F, LoopLoc, - "Unable to fully unroll loop as directed by unroll(full) pragma " - "because loop has a runtime trip count."); - } - } else if (PragmaCount > 0 && Count != OriginalCount) { + if ((PragmaCount > 0) && Count != OriginalCount) { emitOptimizationRemarkMissed( Ctx, DEBUG_TYPE, *F, LoopLoc, "Unable to unroll loop the number of times directed by " "unroll_count pragma because unrolled size is too large."); + } else if (PragmaFullUnroll && !TripCount) { + emitOptimizationRemarkMissed( + Ctx, DEBUG_TYPE, *F, LoopLoc, + "Unable to fully unroll loop as directed by unroll(full) pragma " + "because loop has a runtime trip count."); + } else if (PragmaEnableUnroll && Count != TripCount && Count < 2) { + emitOptimizationRemarkMissed( + Ctx, DEBUG_TYPE, *F, LoopLoc, + "Unable to unroll loop as directed by unroll(enable) pragma because " + "unrolled size is too large."); + } else if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && + Count != TripCount) { + emitOptimizationRemarkMissed( + Ctx, DEBUG_TYPE, *F, LoopLoc, + "Unable to fully unroll loop as directed by unroll pragma because " + "unrolled size is too large."); } } @@ -915,7 +1081,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Unroll the loop. if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount, - TripMultiple, LI, this, &LPM, &AC)) + TripMultiple, LI, SE, &DT, &AC, PreserveLCSSA)) return false; return true; diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index cbc563bd8998..95d7f8a3beda 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -37,6 +38,10 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" @@ -70,6 +75,19 @@ static cl::opt Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden); +static cl::opt +LoopUnswitchWithBlockFrequency("loop-unswitch-with-block-frequency", + cl::init(false), cl::Hidden, + cl::desc("Enable the use of the block frequency analysis to access PGO " + "heuristics to minimize code growth in cold regions.")); + +static cl::opt +ColdnessThreshold("loop-unswitch-coldness-threshold", cl::init(1), cl::Hidden, + cl::desc("Coldness threshold in percentage. The loop header frequency " + "(relative to the entry frequency) is compared with this " + "threshold to determine if non-trivial unswitching should be " + "enabled.")); + namespace { class LUAnalysisCache { @@ -148,12 +166,19 @@ namespace { LPPassManager *LPM; AssumptionCache *AC; - // LoopProcessWorklist - Used to check if second loop needs processing - // after RewriteLoopBodyWithConditionConstant rewrites first loop. + // Used to check if second loop needs processing after + // RewriteLoopBodyWithConditionConstant rewrites first loop. std::vector LoopProcessWorklist; LUAnalysisCache BranchesInfo; + bool EnabledPGO; + + // BFI and ColdEntryFreq are only used when PGO and + // LoopUnswitchWithBlockFrequency are enabled. + BlockFrequencyInfo BFI; + BlockFrequency ColdEntryFreq; + bool OptimizeForSize; bool redoLoop; @@ -192,9 +217,11 @@ namespace { AU.addPreserved(); AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); + AU.addRequired(); AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved(); AU.addRequired(); + AU.addPreserved(); } private: @@ -210,7 +237,10 @@ namespace { /// Split all of the edges from inside the loop to their exit blocks. /// Update the appropriate Phi nodes as we do so. - void SplitExitEdges(Loop *L, const SmallVectorImpl &ExitBlocks); + void SplitExitEdges(Loop *L, + const SmallVectorImpl &ExitBlocks); + + bool TryTrivialLoopUnswitch(bool &Changed); bool UnswitchIfProfitable(Value *LoopCond, Constant *Val, TerminatorInst *TI = nullptr); @@ -229,9 +259,6 @@ namespace { TerminatorInst *TI); void SimplifyCode(std::vector &Worklist, Loop *L); - bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr, - BasicBlock **LoopExit = nullptr); - }; } @@ -367,9 +394,8 @@ Pass *llvm::createLoopUnswitchPass(bool Os) { return new LoopUnswitch(Os); } -/// FindLIVLoopCondition - Cond is a condition that occurs in L. If it is -/// invariant in the loop, or has an invariant piece, return the invariant. -/// Otherwise, return null. +/// Cond is a condition that occurs in L. If it is invariant in the loop, or has +/// an invariant piece, return the invariant. Otherwise, return null. static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { // We started analyze new instruction, increment scanned instructions counter. @@ -411,11 +437,23 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { *L->getHeader()->getParent()); LI = &getAnalysis().getLoopInfo(); LPM = &LPM_Ref; - DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable(); - DT = DTWP ? &DTWP->getDomTree() : nullptr; + DT = &getAnalysis().getDomTree(); currentLoop = L; Function *F = currentLoop->getHeader()->getParent(); + + EnabledPGO = F->getEntryCount().hasValue(); + + if (LoopUnswitchWithBlockFrequency && EnabledPGO) { + BranchProbabilityInfo BPI(*F, *LI); + BFI.calculate(*L->getHeader()->getParent(), BPI, *LI); + + // Use BranchProbability to compute a minimum frequency based on + // function entry baseline frequency. Loops with headers below this + // frequency are considered as cold. + const BranchProbability ColdProb(ColdnessThreshold, 100); + ColdEntryFreq = BlockFrequency(BFI.getEntryFreq()) * ColdProb; + } + bool Changed = false; do { assert(currentLoop->isLCSSAForm(*DT)); @@ -423,16 +461,13 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { Changed |= processCurrentLoop(); } while(redoLoop); - if (Changed) { - // FIXME: Reconstruct dom info, because it is not preserved properly. - if (DT) - DT->recalculate(*F); - } + // FIXME: Reconstruct dom info, because it is not preserved properly. + if (Changed) + DT->recalculate(*F); return Changed; } -/// processCurrentLoop - Do actual work and unswitch loop if possible -/// and profitable. +/// Do actual work and unswitch loop if possible and profitable. bool LoopUnswitch::processCurrentLoop() { bool Changed = false; @@ -452,14 +487,48 @@ bool LoopUnswitch::processCurrentLoop() { LLVMContext &Context = loopHeader->getContext(); - // Probably we reach the quota of branches for this loop. If so - // stop unswitching. + // Analyze loop cost, and stop unswitching if loop content can not be duplicated. if (!BranchesInfo.countLoop( currentLoop, getAnalysis().getTTI( *currentLoop->getHeader()->getParent()), AC)) return false; + // Try trivial unswitch first before loop over other basic blocks in the loop. + if (TryTrivialLoopUnswitch(Changed)) { + return true; + } + + // Do not unswitch loops containing convergent operations, as we might be + // making them control dependent on the unswitch value when they were not + // before. + // FIXME: This could be refined to only bail if the convergent operation is + // not already control-dependent on the unswitch value. + for (const auto BB : currentLoop->blocks()) { + for (auto &I : *BB) { + auto CS = CallSite(&I); + if (!CS) continue; + if (CS.hasFnAttr(Attribute::Convergent)) + return false; + } + } + + // Do not do non-trivial unswitch while optimizing for size. + // FIXME: Use Function::optForSize(). + if (OptimizeForSize || + loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize)) + return false; + + if (LoopUnswitchWithBlockFrequency && EnabledPGO) { + // Compute the weighted frequency of the hottest block in the + // loop (loopHeader in this case since inner loops should be + // processed before outer loop). If it is less than ColdFrequency, + // we should not unswitch. + BlockFrequency LoopEntryFreq = BFI.getBlockFreq(loopHeader); + if (LoopEntryFreq < ColdEntryFreq) + return false; + } + // Loop over all of the basic blocks in the loop. If we find an interior // block that is branching on a loop-invariant condition, we can unswitch this // loop. @@ -528,8 +597,8 @@ bool LoopUnswitch::processCurrentLoop() { return Changed; } -/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the -/// loop with no side effects (including infinite loops). +/// Check to see if all paths from BB exit the loop with no side effects +/// (including infinite loops). /// /// If true, we return true and set ExitBB to the block we /// exit through. @@ -566,9 +635,9 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, return true; } -/// isTrivialLoopExitBlock - Return true if the specified block unconditionally -/// leads to an exit from the specified loop, and has no side-effects in the -/// process. If so, return the block that is exited to, otherwise return null. +/// Return true if the specified block unconditionally leads to an exit from +/// the specified loop, and has no side-effects in the process. If so, return +/// the block that is exited to, otherwise return null. static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { std::set Visited; Visited.insert(L->getHeader()); // Branches to header make infinite loops. @@ -578,105 +647,11 @@ static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { return nullptr; } -/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is -/// trivial: that is, that the condition controls whether or not the loop does -/// anything at all. If this is a trivial condition, unswitching produces no -/// code duplications (equivalently, it produces a simpler loop and a new empty -/// loop, which gets deleted). -/// -/// If this is a trivial condition, return true, otherwise return false. When -/// returning true, this sets Cond and Val to the condition that controls the -/// trivial condition: when Cond dynamically equals Val, the loop is known to -/// exit. Finally, this sets LoopExit to the BB that the loop exits to when -/// Cond == Val. -/// -bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, - BasicBlock **LoopExit) { - BasicBlock *Header = currentLoop->getHeader(); - TerminatorInst *HeaderTerm = Header->getTerminator(); - LLVMContext &Context = Header->getContext(); - - BasicBlock *LoopExitBB = nullptr; - if (BranchInst *BI = dyn_cast(HeaderTerm)) { - // If the header block doesn't end with a conditional branch on Cond, we - // can't handle it. - if (!BI->isConditional() || BI->getCondition() != Cond) - return false; - - // Check to see if a successor of the branch is guaranteed to - // exit through a unique exit block without having any - // side-effects. If so, determine the value of Cond that causes it to do - // this. - if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, - BI->getSuccessor(0)))) { - if (Val) *Val = ConstantInt::getTrue(Context); - } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, - BI->getSuccessor(1)))) { - if (Val) *Val = ConstantInt::getFalse(Context); - } - } else if (SwitchInst *SI = dyn_cast(HeaderTerm)) { - // If this isn't a switch on Cond, we can't handle it. - if (SI->getCondition() != Cond) return false; - - // Check to see if a successor of the switch is guaranteed to go to the - // latch block or exit through a one exit block without having any - // side-effects. If so, determine the value of Cond that causes it to do - // this. - // Note that we can't trivially unswitch on the default case or - // on already unswitched cases. - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); - i != e; ++i) { - BasicBlock *LoopExitCandidate; - if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop, - i.getCaseSuccessor()))) { - // Okay, we found a trivial case, remember the value that is trivial. - ConstantInt *CaseVal = i.getCaseValue(); - - // Check that it was not unswitched before, since already unswitched - // trivial vals are looks trivial too. - if (BranchesInfo.isUnswitched(SI, CaseVal)) - continue; - LoopExitBB = LoopExitCandidate; - if (Val) *Val = CaseVal; - break; - } - } - } - - // If we didn't find a single unique LoopExit block, or if the loop exit block - // contains phi nodes, this isn't trivial. - if (!LoopExitBB || isa(LoopExitBB->begin())) - return false; // Can't handle this. - - if (LoopExit) *LoopExit = LoopExitBB; - - // We already know that nothing uses any scalar values defined inside of this - // loop. As such, we just have to check to see if this loop will execute any - // side-effecting instructions (e.g. stores, calls, volatile loads) in the - // part of the loop that the code *would* execute. We already checked the - // tail, check the header now. - for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I) - if (I->mayHaveSideEffects()) - return false; - return true; -} - -/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when -/// LoopCond == Val to simplify the loop. If we decide that this is profitable, +/// We have found that we can unswitch currentLoop when LoopCond == Val to +/// simplify the loop. If we decide that this is profitable, /// unswitch the loop, reprocess the pieces, then return true. bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val, TerminatorInst *TI) { - Function *F = loopHeader->getParent(); - Constant *CondVal = nullptr; - BasicBlock *ExitBlock = nullptr; - - if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) { - // If the condition is trivial, always unswitch. There is no code growth - // for this case. - UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock, TI); - return true; - } - // Check to see if it would be profitable to unswitch current loop. if (!BranchesInfo.CostAllowsUnswitching()) { DEBUG(dbgs() << "NOT unswitching loop %" @@ -687,32 +662,27 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val, return false; } - // Do not do non-trivial unswitch while optimizing for size. - if (OptimizeForSize || F->hasFnAttribute(Attribute::OptimizeForSize)) - return false; - UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI); return true; } -/// CloneLoop - Recursively clone the specified loop and all of its children, +/// Recursively clone the specified loop and all of its children, /// mapping the blocks with the specified map. static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM) { - Loop *New = new Loop(); - LPM->insertLoop(New, PL); + Loop &New = LPM->addLoop(PL); // Add all of the blocks in L to the new loop. for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) if (LI->getLoopFor(*I) == L) - New->addBasicBlockToLoop(cast(VM[*I]), *LI); + New.addBasicBlockToLoop(cast(VM[*I]), *LI); // Add all of the subloops to the new loop. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) - CloneLoop(*I, New, VM, LI, LPM); + CloneLoop(*I, &New, VM, LI, LPM); - return New; + return &New; } static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst, @@ -744,15 +714,15 @@ static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst, } } // fallthrough. + case LLVMContext::MD_make_implicit: case LLVMContext::MD_dbg: DstInst->setMetadata(MD.first, MD.second); } } } -/// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values -/// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest. Insert the -/// code immediately before InsertPt. +/// Emit a conditional branch on two values if LIC == Val, branch to TrueDst, +/// otherwise branch to FalseDest. Insert the code immediately before InsertPt. void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest, @@ -782,11 +752,11 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, SplitCriticalEdge(BI, 1, Options); } -/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable -/// condition in it (a cond branch from its header block to its latch block, -/// where the path through the loop that doesn't execute its body has no -/// side-effects), unswitch it. This doesn't involve any code duplication, just -/// moving the conditional branch outside of the loop and updating loop info. +/// Given a loop that has a trivial unswitchable condition in it (a cond branch +/// from its header block to its latch block, where the path through the loop +/// that doesn't execute its body has no side-effects), unswitch it. This +/// doesn't involve any code duplication, just moving the conditional branch +/// outside of the loop and updating loop info. void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, BasicBlock *ExitBlock, TerminatorInst *TI) { @@ -810,7 +780,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, // without actually branching to it (the exit block should be dominated by the // loop header, not the preheader). assert(!L->contains(ExitBlock) && "Exit block is in the loop?"); - BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), DT, LI); + BasicBlock *NewExit = SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI); // Okay, now we have a position to branch from and a position to branch to, // insert the new conditional branch. @@ -829,8 +799,155 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, ++NumTrivial; } -/// SplitExitEdges - Split all of the edges from inside the loop to their exit -/// blocks. Update the appropriate Phi nodes as we do so. +/// Check if the first non-constant condition starting from the loop header is +/// a trivial unswitch condition: that is, a condition controls whether or not +/// the loop does anything at all. If it is a trivial condition, unswitching +/// produces no code duplications (equivalently, it produces a simpler loop and +/// a new empty loop, which gets deleted). Therefore always unswitch trivial +/// condition. +bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) { + BasicBlock *CurrentBB = currentLoop->getHeader(); + TerminatorInst *CurrentTerm = CurrentBB->getTerminator(); + LLVMContext &Context = CurrentBB->getContext(); + + // If loop header has only one reachable successor (currently via an + // unconditional branch or constant foldable conditional branch, but + // should also consider adding constant foldable switch instruction in + // future), we should keep looking for trivial condition candidates in + // the successor as well. An alternative is to constant fold conditions + // and merge successors into loop header (then we only need to check header's + // terminator). The reason for not doing this in LoopUnswitch pass is that + // it could potentially break LoopPassManager's invariants. Folding dead + // branches could either eliminate the current loop or make other loops + // unreachable. LCSSA form might also not be preserved after deleting + // branches. The following code keeps traversing loop header's successors + // until it finds the trivial condition candidate (condition that is not a + // constant). Since unswitching generates branches with constant conditions, + // this scenario could be very common in practice. + SmallSet Visited; + + while (true) { + // If we exit loop or reach a previous visited block, then + // we can not reach any trivial condition candidates (unfoldable + // branch instructions or switch instructions) and no unswitch + // can happen. Exit and return false. + if (!currentLoop->contains(CurrentBB) || !Visited.insert(CurrentBB).second) + return false; + + // Check if this loop will execute any side-effecting instructions (e.g. + // stores, calls, volatile loads) in the part of the loop that the code + // *would* execute. Check the header first. + for (Instruction &I : *CurrentBB) + if (I.mayHaveSideEffects()) + return false; + + // FIXME: add check for constant foldable switch instructions. + if (BranchInst *BI = dyn_cast(CurrentTerm)) { + if (BI->isUnconditional()) { + CurrentBB = BI->getSuccessor(0); + } else if (BI->getCondition() == ConstantInt::getTrue(Context)) { + CurrentBB = BI->getSuccessor(0); + } else if (BI->getCondition() == ConstantInt::getFalse(Context)) { + CurrentBB = BI->getSuccessor(1); + } else { + // Found a trivial condition candidate: non-foldable conditional branch. + break; + } + } else { + break; + } + + CurrentTerm = CurrentBB->getTerminator(); + } + + // CondVal is the condition that controls the trivial condition. + // LoopExitBB is the BasicBlock that loop exits when meets trivial condition. + Constant *CondVal = nullptr; + BasicBlock *LoopExitBB = nullptr; + + if (BranchInst *BI = dyn_cast(CurrentTerm)) { + // If this isn't branching on an invariant condition, we can't unswitch it. + if (!BI->isConditional()) + return false; + + Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), + currentLoop, Changed); + + // Unswitch only if the trivial condition itself is an LIV (not + // partial LIV which could occur in and/or) + if (!LoopCond || LoopCond != BI->getCondition()) + return false; + + // Check to see if a successor of the branch is guaranteed to + // exit through a unique exit block without having any + // side-effects. If so, determine the value of Cond that causes + // it to do this. + if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, + BI->getSuccessor(0)))) { + CondVal = ConstantInt::getTrue(Context); + } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, + BI->getSuccessor(1)))) { + CondVal = ConstantInt::getFalse(Context); + } + + // If we didn't find a single unique LoopExit block, or if the loop exit + // block contains phi nodes, this isn't trivial. + if (!LoopExitBB || isa(LoopExitBB->begin())) + return false; // Can't handle this. + + UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB, + CurrentTerm); + ++NumBranches; + return true; + } else if (SwitchInst *SI = dyn_cast(CurrentTerm)) { + // If this isn't switching on an invariant condition, we can't unswitch it. + Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), + currentLoop, Changed); + + // Unswitch only if the trivial condition itself is an LIV (not + // partial LIV which could occur in and/or) + if (!LoopCond || LoopCond != SI->getCondition()) + return false; + + // Check to see if a successor of the switch is guaranteed to go to the + // latch block or exit through a one exit block without having any + // side-effects. If so, determine the value of Cond that causes it to do + // this. + // Note that we can't trivially unswitch on the default case or + // on already unswitched cases. + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + BasicBlock *LoopExitCandidate; + if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop, + i.getCaseSuccessor()))) { + // Okay, we found a trivial case, remember the value that is trivial. + ConstantInt *CaseVal = i.getCaseValue(); + + // Check that it was not unswitched before, since already unswitched + // trivial vals are looks trivial too. + if (BranchesInfo.isUnswitched(SI, CaseVal)) + continue; + LoopExitBB = LoopExitCandidate; + CondVal = CaseVal; + break; + } + } + + // If we didn't find a single unique LoopExit block, or if the loop exit + // block contains phi nodes, this isn't trivial. + if (!LoopExitBB || isa(LoopExitBB->begin())) + return false; // Can't handle this. + + UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB, + nullptr); + ++NumSwitches; + return true; + } + return false; +} + +/// Split all of the edges from inside the loop to their exit blocks. +/// Update the appropriate Phi nodes as we do so. void LoopUnswitch::SplitExitEdges(Loop *L, const SmallVectorImpl &ExitBlocks){ @@ -841,15 +958,14 @@ void LoopUnswitch::SplitExitEdges(Loop *L, // Although SplitBlockPredecessors doesn't preserve loop-simplify in // general, if we call it on all predecessors of all exits then it does. - SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", - /*AliasAnalysis*/ nullptr, DT, LI, + SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI, /*PreserveLCSSA*/ true); } } -/// UnswitchNontrivialCondition - We determined that the loop is profitable -/// to unswitch when LIC equal Val. Split it into loop versions and test the -/// condition outside of either loop. Return the loops created as Out1/Out2. +/// We determined that the loop is profitable to unswitch when LIC equal Val. +/// Split it into loop versions and test the condition outside of either loop. +/// Return the loops created as Out1/Out2. void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, Loop *L, TerminatorInst *TI) { Function *F = loopHeader->getParent(); @@ -858,8 +974,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, << " blocks] in Function " << F->getName() << " when '" << *Val << "' == " << *LIC << "\n"); - if (ScalarEvolution *SE = getAnalysisIfAvailable()) - SE->forgetLoop(L); + if (auto *SEWP = getAnalysisIfAvailable()) + SEWP->getSE().forgetLoop(L); LoopBlocks.clear(); NewBlocks.clear(); @@ -901,8 +1017,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, // Splice the newly inserted blocks into the function right before the // original preheader. - F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(), - NewBlocks[0], F->end()); + F->getBasicBlockList().splice(NewPreheader->getIterator(), + F->getBasicBlockList(), + NewBlocks[0]->getIterator(), F->end()); // FIXME: We could register any cloned assumptions instead of clearing the // whole function's cache. @@ -944,7 +1061,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, if (LandingPadInst *LPad = NewExit->getLandingPadInst()) { PHINode *PN = PHINode::Create(LPad->getType(), 0, "", - ExitSucc->getFirstInsertionPt()); + &*ExitSucc->getFirstInsertionPt()); for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc); I != E; ++I) { @@ -960,7 +1077,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) - RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); + RemapInstruction(&*I, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); // Rewrite the original preheader to select between versions of the loop. BranchInst *OldBR = cast(loopPreheader->getTerminator()); @@ -994,8 +1112,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true); } -/// RemoveFromWorklist - Remove all instances of I from the worklist vector -/// specified. +/// Remove all instances of I from the worklist vector specified. static void RemoveFromWorklist(Instruction *I, std::vector &Worklist) { @@ -1003,7 +1120,7 @@ static void RemoveFromWorklist(Instruction *I, Worklist.end()); } -/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the +/// When we find that I really equals V, remove I from the /// program, replacing all uses with V and update the worklist. static void ReplaceUsesOfWith(Instruction *I, Value *V, std::vector &Worklist, @@ -1025,9 +1142,9 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V, ++NumSimplify; } -// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has -// the value specified by Val in the specified loop, or we know it does NOT have -// that value. Rewrite any uses of LIC or of properties correlated to it. +/// We know either that the value LIC has the value specified by Val in the +/// specified loop, or we know it does NOT have that value. +/// Rewrite any uses of LIC or of properties correlated to it. void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, Constant *Val, bool IsEqual) { @@ -1138,18 +1255,16 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // domtree here -- instead we force it to do a full recomputation // after the pass is complete -- but we do need to inform it of // new blocks. - if (DT) - DT->addNewBlock(Abort, NewSISucc); + DT->addNewBlock(Abort, NewSISucc); } SimplifyCode(Worklist, L); } -/// SimplifyCode - Okay, now that we have simplified some instructions in the -/// loop, walk over it and constant prop, dce, and fold control flow where -/// possible. Note that this is effectively a very simple loop-structure-aware -/// optimizer. During processing of this loop, L could very well be deleted, so -/// it must not be used. +/// Now that we have simplified some instructions in the loop, walk over it and +/// constant prop, dce, and fold control flow where possible. Note that this is +/// effectively a very simple loop-structure-aware optimizer. During processing +/// of this loop, L could very well be deleted, so it must not be used. /// /// FIXME: When the loop optimizer is more mature, separate this out to a new /// pass. @@ -1207,8 +1322,8 @@ void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { Succ->replaceAllUsesWith(Pred); // Move all of the successor contents from Succ to Pred. - Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(), - Succ->end()); + Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(), + Succ->begin(), Succ->end()); LPM->deleteSimpleAnalysisValue(BI, L); BI->eraseFromParent(); RemoveFromWorklist(BI, Worklist); diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp index 3314e1ed41ab..41511bcb7b04 100644 --- a/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/lib/Transforms/Scalar/LowerAtomic.cpp @@ -22,7 +22,7 @@ using namespace llvm; #define DEBUG_TYPE "loweratomic" static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { - IRBuilder<> Builder(CXI->getParent(), CXI); + IRBuilder<> Builder(CXI); Value *Ptr = CXI->getPointerOperand(); Value *Cmp = CXI->getCompareOperand(); Value *Val = CXI->getNewValOperand(); @@ -41,7 +41,7 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { } static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) { - IRBuilder<> Builder(RMWI->getParent(), RMWI); + IRBuilder<> Builder(RMWI); Value *Ptr = RMWI->getPointerOperand(); Value *Val = RMWI->getValOperand(); @@ -120,7 +120,7 @@ namespace { return false; bool Changed = false; for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) { - Instruction *Inst = DI++; + Instruction *Inst = &*DI++; if (FenceInst *FI = dyn_cast(Inst)) Changed |= LowerFenceInst(FI); else if (AtomicCmpXchgInst *CXI = dyn_cast(Inst)) diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp index 0c47cbd5bfda..2ace902a7a1b 100644 --- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -139,7 +139,7 @@ static bool lowerExpectIntrinsic(Function &F) { ExpectIntrinsicsHandled++; } - // remove llvm.expect intrinsics. + // Remove llvm.expect intrinsics. for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { CallInst *CI = dyn_cast(BI++); if (!CI) diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 85012afc80ac..0333bf2284e1 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -30,7 +31,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" -#include +#include using namespace llvm; #define DEBUG_TYPE "memcpyopt" @@ -71,9 +72,9 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, return Offset; } -/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a -/// constant offset, and return that constant offset. For example, Ptr1 might -/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8. +/// Return true if Ptr1 is provably equal to Ptr2 plus a constant offset, and +/// return that constant offset. For example, Ptr1 might be &A[42], and Ptr2 +/// might be &A[40]. In this case offset would be -8. static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, const DataLayout &DL) { Ptr1 = Ptr1->stripPointerCasts(); @@ -125,7 +126,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, } -/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value. +/// Represents a range of memset'd bytes with the ByteVal value. /// This allows us to analyze stores like: /// store 0 -> P+1 /// store 0 -> P+0 @@ -164,8 +165,8 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const { // If any of the stores are a memset, then it is always good to extend the // memset. - for (unsigned i = 0, e = TheStores.size(); i != e; ++i) - if (!isa(TheStores[i])) + for (Instruction *SI : TheStores) + if (!isa(SI)) return true; // Assume that the code generator is capable of merging pairs of stores @@ -189,7 +190,7 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const { unsigned NumPointerStores = Bytes / MaxIntSize; // Assume the remaining bytes if any are done a byte at a time. - unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize; + unsigned NumByteStores = Bytes % MaxIntSize; // If we will reduce the # stores (according to this heuristic), do the // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 @@ -200,15 +201,14 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const { namespace { class MemsetRanges { - /// Ranges - A sorted list of the memset ranges. We use std::list here - /// because each element is relatively large and expensive to copy. - std::list Ranges; - typedef std::list::iterator range_iterator; + /// A sorted list of the memset ranges. + SmallVector Ranges; + typedef SmallVectorImpl::iterator range_iterator; const DataLayout &DL; public: MemsetRanges(const DataLayout &DL) : DL(DL) {} - typedef std::list::const_iterator const_iterator; + typedef SmallVectorImpl::const_iterator const_iterator; const_iterator begin() const { return Ranges.begin(); } const_iterator end() const { return Ranges.end(); } bool empty() const { return Ranges.empty(); } @@ -240,26 +240,20 @@ public: } // end anon namespace -/// addRange - Add a new store to the MemsetRanges data structure. This adds a +/// Add a new store to the MemsetRanges data structure. This adds a /// new range for the specified store at the specified offset, merging into /// existing ranges as appropriate. -/// -/// Do a linear search of the ranges to see if this can be joined and/or to -/// find the insertion point in the list. We keep the ranges sorted for -/// simplicity here. This is a linear search of a linked list, which is ugly, -/// however the number of ranges is limited, so this won't get crazy slow. void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, unsigned Alignment, Instruction *Inst) { int64_t End = Start+Size; - range_iterator I = Ranges.begin(), E = Ranges.end(); - while (I != E && Start > I->End) - ++I; + range_iterator I = std::lower_bound(Ranges.begin(), Ranges.end(), Start, + [](const MemsetRange &LHS, int64_t RHS) { return LHS.End < RHS; }); // We now know that I == E, in which case we didn't find anything to merge // with, or that Start <= I->End. If End < I->Start or I == E, then we need // to insert a new range. Handle this now. - if (I == E || End < I->Start) { + if (I == Ranges.end() || End < I->Start) { MemsetRange &R = *Ranges.insert(I, MemsetRange()); R.Start = Start; R.End = End; @@ -295,7 +289,7 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, if (End > I->End) { I->End = End; range_iterator NextI = I; - while (++NextI != E && End >= NextI->Start) { + while (++NextI != Ranges.end() && End >= NextI->Start) { // Merge the range in. I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end()); if (NextI->End > I->End) @@ -331,9 +325,9 @@ namespace { AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); - AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); } @@ -357,7 +351,7 @@ namespace { char MemCpyOpt::ID = 0; } -// createMemCpyOptPass - The public interface to this file... +/// The public interface to this file... FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); } INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization", @@ -366,14 +360,15 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false) -/// tryMergingIntoMemset - When scanning forward over instructions, we look for -/// some other patterns to fold away. In particular, this looks for stores to -/// neighboring locations of memory. If it sees enough consecutive ones, it -/// attempts to merge them together into a memcpy/memset. +/// When scanning forward over instructions, we look for some other patterns to +/// fold away. In particular, this looks for stores to neighboring locations of +/// memory. If it sees enough consecutive ones, it attempts to merge them +/// together into a memcpy/memset. Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, Value *StartPtr, Value *ByteVal) { const DataLayout &DL = StartInst->getModule()->getDataLayout(); @@ -384,7 +379,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, // are stored. MemsetRanges Ranges(DL); - BasicBlock::iterator BI = StartInst; + BasicBlock::iterator BI(StartInst); for (++BI; !isa(BI); ++BI) { if (!isa(BI) && !isa(BI)) { // If the instruction is readnone, ignore it, otherwise bail out. We @@ -439,14 +434,12 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, // If we create any memsets, we put it right before the first instruction that // isn't part of the memset block. This ensure that the memset is dominated // by any addressing instruction needed by the start of the block. - IRBuilder<> Builder(BI); + IRBuilder<> Builder(&*BI); // Now that we have full information about ranges, loop over the ranges and // emit memset's for anything big enough to be worthwhile. Instruction *AMemSet = nullptr; - for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); - I != E; ++I) { - const MemsetRange &Range = *I; + for (const MemsetRange &Range : Ranges) { if (Range.TheStores.size() == 1) continue; @@ -470,19 +463,17 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); DEBUG(dbgs() << "Replace stores:\n"; - for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) - dbgs() << *Range.TheStores[i] << '\n'; + for (Instruction *SI : Range.TheStores) + dbgs() << *SI << '\n'; dbgs() << "With: " << *AMemSet << '\n'); if (!Range.TheStores.empty()) AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); // Zap all the stores. - for (SmallVectorImpl::const_iterator - SI = Range.TheStores.begin(), - SE = Range.TheStores.end(); SI != SE; ++SI) { - MD->removeInstruction(*SI); - (*SI)->eraseFromParent(); + for (Instruction *SI : Range.TheStores) { + MD->removeInstruction(SI); + SI->eraseFromParent(); } ++NumMemSetInfer; } @@ -493,6 +484,16 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!SI->isSimple()) return false; + + // Avoid merging nontemporal stores since the resulting + // memcpy/memset would not be able to preserve the nontemporal hint. + // In theory we could teach how to propagate the !nontemporal metadata to + // memset calls. However, that change would force the backend to + // conservatively expand !nontemporal memset calls back to sequences of + // store instructions (effectively undoing the merging). + if (SI->getMetadata(LLVMContext::MD_nontemporal)) + return false; + const DataLayout &DL = SI->getModule()->getDataLayout(); // Detect cases where we're performing call slot forwarding, but @@ -509,11 +510,11 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (C) { // Check that nothing touches the dest of the "copy" between // the call and the store. - AliasAnalysis &AA = getAnalysis(); + AliasAnalysis &AA = getAnalysis().getAAResults(); MemoryLocation StoreLoc = MemoryLocation::get(SI); - for (BasicBlock::iterator I = --BasicBlock::iterator(SI), - E = C; I != E; --I) { - if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) { + for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator(); + I != E; --I) { + if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) { C = nullptr; break; } @@ -554,7 +555,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (Value *ByteVal = isBytewiseValue(SI->getOperand(0))) if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) { - BBI = I; // Don't invalidate iterator. + BBI = I->getIterator(); // Don't invalidate iterator. return true; } @@ -567,14 +568,14 @@ bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { if (isa(MSI->getLength()) && !MSI->isVolatile()) if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(), MSI->getValue())) { - BBI = I; // Don't invalidate iterator. + BBI = I->getIterator(); // Don't invalidate iterator. return true; } return false; } -/// performCallSlotOptzn - takes a memcpy and a call that it depends on, +/// Takes a memcpy and a call that it depends on, /// and checks for the possibility of a call slot optimization by having /// the call write its result directly into the destination of the memcpy. bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, @@ -710,12 +711,12 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, // unexpected manner, for example via a global, which we deduce from // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. - AliasAnalysis &AA = getAnalysis(); - AliasAnalysis::ModRefResult MR = AA.getModRefInfo(C, cpyDest, srcSize); + AliasAnalysis &AA = getAnalysis().getAAResults(); + ModRefInfo MR = AA.getModRefInfo(C, cpyDest, srcSize); // If necessary, perform additional analysis. - if (MR != AliasAnalysis::NoModRef) + if (MR != MRI_NoModRef) MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT); - if (MR != AliasAnalysis::NoModRef) + if (MR != MRI_NoModRef) return false; // All the checks have passed, so do the transformation. @@ -749,11 +750,9 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, // Update AA metadata // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be // handled here, but combineMetadata doesn't support them yet - unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, - LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, - }; + unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, + LLVMContext::MD_invariant_group}; combineMetadata(C, cpy, KnownIDs); // Remove the memcpy. @@ -763,10 +762,8 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, return true; } -/// processMemCpyMemCpyDependence - We've found that the (upward scanning) -/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to -/// copy from MDep's input if we can. -/// +/// We've found that the (upward scanning) memory dependence of memcpy 'M' is +/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can. bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) { // We can only transforms memcpy's where the dest of one is the source of the // other. @@ -788,7 +785,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) { if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue()) return false; - AliasAnalysis &AA = getAnalysis(); + AliasAnalysis &AA = getAnalysis().getAAResults(); // Verify that the copied-from memory doesn't change in between the two // transfers. For example, in: @@ -802,8 +799,9 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) { // // NOTE: This is conservative, it will stop on any read from the source loc, // not just the defining memcpy. - MemDepResult SourceDep = MD->getPointerDependencyFrom( - MemoryLocation::getForSource(MDep), false, M, M->getParent()); + MemDepResult SourceDep = + MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, + M->getIterator(), M->getParent()); if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) return false; @@ -860,8 +858,9 @@ bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy, return false; // Check that there are no other dependencies on the memset destination. - MemDepResult DstDepInfo = MD->getPointerDependencyFrom( - MemoryLocation::getForDest(MemSet), false, MemCpy, MemCpy->getParent()); + MemDepResult DstDepInfo = + MD->getPointerDependencyFrom(MemoryLocation::getForDest(MemSet), false, + MemCpy->getIterator(), MemCpy->getParent()); if (DstDepInfo.getInst() != MemSet) return false; @@ -936,7 +935,7 @@ bool MemCpyOpt::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, return true; } -/// processMemCpy - perform simplification of memcpy's. If we have memcpy A +/// Perform simplification of memcpy's. If we have memcpy A /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite /// B to be a memcpy from X to Z (or potentially a memmove, depending on /// circumstances). This allows later passes to remove the first memcpy @@ -998,8 +997,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { } MemoryLocation SrcLoc = MemoryLocation::getForSource(M); - MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true, - M, M->getParent()); + MemDepResult SrcDepInfo = MD->getPointerDependencyFrom( + SrcLoc, true, M->getIterator(), M->getParent()); if (SrcDepInfo.isClobber()) { if (MemCpyInst *MDep = dyn_cast(SrcDepInfo.getInst())) @@ -1037,10 +1036,10 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { return false; } -/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst -/// are guaranteed not to alias. +/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed +/// not to alias. bool MemCpyOpt::processMemMove(MemMoveInst *M) { - AliasAnalysis &AA = getAnalysis(); + AliasAnalysis &AA = getAnalysis().getAAResults(); if (!TLI->has(LibFunc::memmove)) return false; @@ -1053,12 +1052,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); // If not, then we know we can transform this. - Module *Mod = M->getParent()->getParent()->getParent(); Type *ArgTys[3] = { M->getRawDest()->getType(), M->getRawSource()->getType(), M->getLength()->getType() }; - M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, - ArgTys)); + M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(), + Intrinsic::memcpy, ArgTys)); // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. @@ -1068,7 +1066,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { return true; } -/// processByValArgument - This is called on every byval argument in call sites. +/// This is called on every byval argument in call sites. bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout(); // Find out what feeds this byval argument. @@ -1076,8 +1074,8 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { Type *ByValTy = cast(ByValArg->getType())->getElementType(); uint64_t ByValSize = DL.getTypeAllocSize(ByValTy); MemDepResult DepInfo = MD->getPointerDependencyFrom( - MemoryLocation(ByValArg, ByValSize), true, CS.getInstruction(), - CS.getInstruction()->getParent()); + MemoryLocation(ByValArg, ByValSize), true, + CS.getInstruction()->getIterator(), CS.getInstruction()->getParent()); if (!DepInfo.isClobber()) return false; @@ -1119,9 +1117,9 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { // // NOTE: This is conservative, it will stop on any read from the source loc, // not just the defining memcpy. - MemDepResult SourceDep = - MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, - CS.getInstruction(), MDep->getParent()); + MemDepResult SourceDep = MD->getPointerDependencyFrom( + MemoryLocation::getForSource(MDep), false, + CS.getInstruction()->getIterator(), MDep->getParent()); if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) return false; @@ -1140,7 +1138,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { return true; } -/// iterateOnFunction - Executes one iteration of MemCpyOpt. +/// Executes one iteration of MemCpyOpt. bool MemCpyOpt::iterateOnFunction(Function &F) { bool MadeChange = false; @@ -1148,7 +1146,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) { for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { // Avoid invalidating the iterator. - Instruction *I = BI++; + Instruction *I = &*BI++; bool RepeatInstruction = false; @@ -1177,9 +1175,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) { return MadeChange; } -// MemCpyOpt::runOnFunction - This is the main transformation entry point for a -// function. -// +/// This is the main transformation entry point for a function. bool MemCpyOpt::runOnFunction(Function &F) { if (skipOptnoneFunction(F)) return false; diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp index 643f3740eedd..c812d618c16a 100644 --- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -78,6 +78,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" @@ -91,6 +92,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include + using namespace llvm; #define DEBUG_TYPE "mldst-motion" @@ -106,7 +108,7 @@ class MergedLoadStoreMotion : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid - explicit MergedLoadStoreMotion(void) + MergedLoadStoreMotion() : FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) { initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry()); } @@ -116,10 +118,11 @@ public: private: // This transformation requires dominator postdominator info void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); - AU.addPreserved(); } // Helper routines @@ -156,7 +159,7 @@ private: }; char MergedLoadStoreMotion::ID = 0; -} +} // anonymous namespace /// /// \brief createMergedLoadStoreMotionPass - The public interface to this file. @@ -169,7 +172,8 @@ INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion", "MergedLoadStoreMotion", false, false) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_END(MergedLoadStoreMotion, "mldst-motion", "MergedLoadStoreMotion", false, false) @@ -236,12 +240,11 @@ bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) { /// being loaded or protect against the load from happening /// it is considered a hoist barrier. /// - bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start, const Instruction& End, LoadInst* LI) { MemoryLocation Loc = MemoryLocation::get(LI); - return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Mod); + return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod); } /// @@ -256,7 +259,7 @@ LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1, for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE; ++BBI) { - Instruction *Inst = BBI; + Instruction *Inst = &*BBI; // Only merge and hoist loads when their result in used only in BB if (!isa(Inst) || Inst->isUsedOutsideOfBlock(BB1)) @@ -293,7 +296,7 @@ void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB, // Intersect optional metadata. HoistCand->intersectOptionalDataWith(ElseInst); - HoistCand->dropUnknownMetadata(); + HoistCand->dropUnknownNonDebugMetadata(); // Prepend point for instruction insert Instruction *HoistPt = BB->getTerminator(); @@ -363,8 +366,7 @@ bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) { int NLoads = 0; for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end(); BBI != BBE;) { - - Instruction *I = BBI; + Instruction *I = &*BBI; ++BBI; // Only move non-simple (atomic, volatile) loads. @@ -394,11 +396,10 @@ bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) { /// value being stored or protect against the store from /// happening it is considered a sink barrier. /// - bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start, const Instruction &End, MemoryLocation Loc) { - return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::ModRef); + return AA->canInstructionRangeModRef(Start, End, Loc, MRI_ModRef); } /// @@ -438,23 +439,16 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1, PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1) { // Create a phi if the values mismatch. - PHINode *NewPN = 0; + PHINode *NewPN = nullptr; Value *Opd1 = S0->getValueOperand(); Value *Opd2 = S1->getValueOperand(); if (Opd1 != Opd2) { NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink", - BB->begin()); + &BB->front()); NewPN->addIncoming(Opd1, S0->getParent()); NewPN->addIncoming(Opd2, S1->getParent()); - if (NewPN->getType()->getScalarType()->isPointerTy()) { - // AA needs to be informed when a PHI-use of the pointer value is added - for (unsigned I = 0, E = NewPN->getNumIncomingValues(); I != E; ++I) { - unsigned J = PHINode::getOperandNumForIncomingValue(I); - AA->addEscapingUse(NewPN->getOperandUse(J)); - } - if (MD) - MD->invalidateCachedPointerInfo(NewPN); - } + if (MD && NewPN->getType()->getScalarType()->isPointerTy()) + MD->invalidateCachedPointerInfo(NewPN); } return NewPN; } @@ -479,12 +473,12 @@ bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0, BasicBlock::iterator InsertPt = BB->getFirstInsertionPt(); // Intersect optional metadata. S0->intersectOptionalDataWith(S1); - S0->dropUnknownMetadata(); + S0->dropUnknownNonDebugMetadata(); // Create the new store to be inserted at the join point. StoreInst *SNew = (StoreInst *)(S0->clone()); Instruction *ANew = A0->clone(); - SNew->insertBefore(InsertPt); + SNew->insertBefore(&*InsertPt); ANew->insertBefore(SNew); assert(S0->getParent() == A0->getParent()); @@ -566,12 +560,13 @@ bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) { } return MergedStores; } + /// /// \brief Run the transformation for each function /// bool MergedLoadStoreMotion::runOnFunction(Function &F) { MD = getAnalysisIfAvailable(); - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); bool Changed = false; DEBUG(dbgs() << "Instruction Merger\n"); @@ -579,7 +574,7 @@ bool MergedLoadStoreMotion::runOnFunction(Function &F) { // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { - BasicBlock *BB = FI++; + BasicBlock *BB = &*FI++; // Hoist equivalent loads and sink stores // outside diamonds when possible diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp index f42f8306fccc..c8f885e7eec5 100644 --- a/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/lib/Transforms/Scalar/NaryReassociate.cpp @@ -71,8 +71,8 @@ // // Limitations and TODO items: // -// 1) We only considers n-ary adds for now. This should be extended and -// generalized. +// 1) We only considers n-ary adds and muls for now. This should be extended +// and generalized. // //===----------------------------------------------------------------------===// @@ -110,11 +110,11 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.setPreservesCFG(); @@ -145,12 +145,23 @@ private: unsigned I, Value *LHS, Value *RHS, Type *IndexedType); - // Reassociate Add for better CSE. - Instruction *tryReassociateAdd(BinaryOperator *I); - // A helper function for tryReassociateAdd. LHS and RHS are explicitly passed. - Instruction *tryReassociateAdd(Value *LHS, Value *RHS, Instruction *I); - // Rewrites I to LHS + RHS if LHS is computed already. - Instruction *tryReassociatedAdd(const SCEV *LHS, Value *RHS, Instruction *I); + // Reassociate binary operators for better CSE. + Instruction *tryReassociateBinaryOp(BinaryOperator *I); + + // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly + // passed. + Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS, + BinaryOperator *I); + // Rewrites I to (LHS op RHS) if LHS is computed already. + Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS, + BinaryOperator *I); + + // Tries to match Op1 and Op2 by using V. + bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2); + + // Gets SCEV for (LHS op RHS). + const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS, + const SCEV *RHS); // Returns the closest dominator of \c Dominatee that computes // \c CandidateExpr. Returns null if not found. @@ -161,11 +172,6 @@ private: // GEP's pointer size, i.e., whether Index needs to be sign-extended in order // to be an index of GEP. bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP); - // Returns whether V is known to be non-negative at context \c Ctxt. - bool isKnownNonNegative(Value *V, Instruction *Ctxt); - // Returns whether AO may sign overflow at context \c Ctxt. It computes a - // conservative result -- it answers true when not sure. - bool maySignOverflow(AddOperator *AO, Instruction *Ctxt); AssumptionCache *AC; const DataLayout *DL; @@ -182,7 +188,7 @@ private: // foo(a + b); // if (p2) // bar(a + b); - DenseMap> SeenExprs; + DenseMap> SeenExprs; }; } // anonymous namespace @@ -191,7 +197,7 @@ INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation", @@ -207,7 +213,7 @@ bool NaryReassociate::runOnFunction(Function &F) { AC = &getAnalysis().getAssumptionCache(F); DT = &getAnalysis().getDomTree(); - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); TLI = &getAnalysis().getTLI(); TTI = &getAnalysis().getTTI(F); @@ -224,6 +230,7 @@ static bool isPotentiallyNaryReassociable(Instruction *I) { switch (I->getOpcode()) { case Instruction::Add: case Instruction::GetElementPtr: + case Instruction::Mul: return true; default: return false; @@ -239,19 +246,21 @@ bool NaryReassociate::doOneIteration(Function &F) { Node != GraphTraits::nodes_end(DT); ++Node) { BasicBlock *BB = Node->getBlock(); for (auto I = BB->begin(); I != BB->end(); ++I) { - if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(I)) { - const SCEV *OldSCEV = SE->getSCEV(I); - if (Instruction *NewI = tryReassociate(I)) { + if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(&*I)) { + const SCEV *OldSCEV = SE->getSCEV(&*I); + if (Instruction *NewI = tryReassociate(&*I)) { Changed = true; - SE->forgetValue(I); + SE->forgetValue(&*I); I->replaceAllUsesWith(NewI); - RecursivelyDeleteTriviallyDeadInstructions(I, TLI); - I = NewI; + // If SeenExprs constains I's WeakVH, that entry will be replaced with + // nullptr. + RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI); + I = NewI->getIterator(); } // Add the rewritten instruction to SeenExprs; the original instruction // is deleted. - const SCEV *NewSCEV = SE->getSCEV(I); - SeenExprs[NewSCEV].push_back(I); + const SCEV *NewSCEV = SE->getSCEV(&*I); + SeenExprs[NewSCEV].push_back(WeakVH(&*I)); // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I) // is equivalent to I. However, ScalarEvolution::getSCEV may // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose @@ -271,7 +280,7 @@ bool NaryReassociate::doOneIteration(Function &F) { // // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll. if (NewSCEV != OldSCEV) - SeenExprs[OldSCEV].push_back(I); + SeenExprs[OldSCEV].push_back(WeakVH(&*I)); } } } @@ -281,7 +290,8 @@ bool NaryReassociate::doOneIteration(Function &F) { Instruction *NaryReassociate::tryReassociate(Instruction *I) { switch (I->getOpcode()) { case Instruction::Add: - return tryReassociateAdd(cast(I)); + case Instruction::Mul: + return tryReassociateBinaryOp(cast(I)); case Instruction::GetElementPtr: return tryReassociateGEP(cast(I)); default: @@ -352,27 +362,6 @@ bool NaryReassociate::requiresSignExtension(Value *Index, return cast(Index->getType())->getBitWidth() < PointerSizeInBits; } -bool NaryReassociate::isKnownNonNegative(Value *V, Instruction *Ctxt) { - bool NonNegative, Negative; - // TODO: ComputeSignBits is expensive. Consider caching the results. - ComputeSignBit(V, NonNegative, Negative, *DL, 0, AC, Ctxt, DT); - return NonNegative; -} - -bool NaryReassociate::maySignOverflow(AddOperator *AO, Instruction *Ctxt) { - if (AO->hasNoSignedWrap()) - return false; - - Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1); - // If LHS or RHS has the same sign as the sum, AO doesn't sign overflow. - // TODO: handle the negative case as well. - if (isKnownNonNegative(AO, Ctxt) && - (isKnownNonNegative(LHS, Ctxt) || isKnownNonNegative(RHS, Ctxt))) - return false; - - return true; -} - GetElementPtrInst * NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I, Type *IndexedType) { @@ -381,7 +370,7 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I, IndexToSplit = SExt->getOperand(0); } else if (ZExtInst *ZExt = dyn_cast(IndexToSplit)) { // zext can be treated as sext if the source is non-negative. - if (isKnownNonNegative(ZExt->getOperand(0), GEP)) + if (isKnownNonNegative(ZExt->getOperand(0), *DL, 0, AC, GEP, DT)) IndexToSplit = ZExt->getOperand(0); } @@ -389,8 +378,11 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I, // If the I-th index needs sext and the underlying add is not equipped with // nsw, we cannot split the add because // sext(LHS + RHS) != sext(LHS) + sext(RHS). - if (requiresSignExtension(IndexToSplit, GEP) && maySignOverflow(AO, GEP)) + if (requiresSignExtension(IndexToSplit, GEP) && + computeOverflowForSignedAdd(AO, *DL, AC, GEP, DT) != + OverflowResult::NeverOverflows) return nullptr; + Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1); // IndexToSplit = LHS + RHS. if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I, LHS, RHS, IndexedType)) @@ -415,7 +407,7 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex( IndexExprs.push_back(SE->getSCEV(*Index)); // Replace the I-th index with LHS. IndexExprs[I] = SE->getSCEV(LHS); - if (isKnownNonNegative(LHS, GEP) && + if (isKnownNonNegative(LHS, *DL, 0, AC, GEP, DT) && DL->getTypeSizeInBits(LHS->getType()) < DL->getTypeSizeInBits(GEP->getOperand(I)->getType())) { // Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to @@ -429,19 +421,20 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex( GEP->getSourceElementType(), SE->getSCEV(GEP->getPointerOperand()), IndexExprs, GEP->isInBounds()); - auto *Candidate = findClosestMatchingDominator(CandidateExpr, GEP); + Value *Candidate = findClosestMatchingDominator(CandidateExpr, GEP); if (Candidate == nullptr) return nullptr; - PointerType *TypeOfCandidate = dyn_cast(Candidate->getType()); - // Pretty rare but theoretically possible when a numeric value happens to - // share CandidateExpr. - if (TypeOfCandidate == nullptr) - return nullptr; + IRBuilder<> Builder(GEP); + // Candidate does not necessarily have the same pointer type as GEP. Use + // bitcast or pointer cast to make sure they have the same type, so that the + // later RAUW doesn't complain. + Candidate = Builder.CreateBitOrPointerCast(Candidate, GEP->getType()); + assert(Candidate->getType() == GEP->getType()); // NewGEP = (char *)Candidate + RHS * sizeof(IndexedType) uint64_t IndexedSize = DL->getTypeAllocSize(IndexedType); - Type *ElementType = TypeOfCandidate->getElementType(); + Type *ElementType = GEP->getType()->getElementType(); uint64_t ElementSize = DL->getTypeAllocSize(ElementType); // Another less rare case: because I is not necessarily the last index of the // GEP, the size of the type at the I-th index (IndexedSize) is not @@ -461,8 +454,7 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex( return nullptr; // NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0]))); - IRBuilder<> Builder(GEP); - Type *IntPtrTy = DL->getIntPtrType(TypeOfCandidate); + Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); if (RHS->getType() != IntPtrTy) RHS = Builder.CreateSExtOrTrunc(RHS, IntPtrTy); if (IndexedSize != ElementSize) { @@ -476,54 +468,89 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex( return NewGEP; } -Instruction *NaryReassociate::tryReassociateAdd(BinaryOperator *I) { +Instruction *NaryReassociate::tryReassociateBinaryOp(BinaryOperator *I) { Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); - if (auto *NewI = tryReassociateAdd(LHS, RHS, I)) + if (auto *NewI = tryReassociateBinaryOp(LHS, RHS, I)) return NewI; - if (auto *NewI = tryReassociateAdd(RHS, LHS, I)) + if (auto *NewI = tryReassociateBinaryOp(RHS, LHS, I)) return NewI; return nullptr; } -Instruction *NaryReassociate::tryReassociateAdd(Value *LHS, Value *RHS, - Instruction *I) { +Instruction *NaryReassociate::tryReassociateBinaryOp(Value *LHS, Value *RHS, + BinaryOperator *I) { Value *A = nullptr, *B = nullptr; - // To be conservative, we reassociate I only when it is the only user of A+B. - if (LHS->hasOneUse() && match(LHS, m_Add(m_Value(A), m_Value(B)))) { - // I = (A + B) + RHS - // = (A + RHS) + B or (B + RHS) + A + // To be conservative, we reassociate I only when it is the only user of (A op + // B). + if (LHS->hasOneUse() && matchTernaryOp(I, LHS, A, B)) { + // I = (A op B) op RHS + // = (A op RHS) op B or (B op RHS) op A const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B); const SCEV *RHSExpr = SE->getSCEV(RHS); if (BExpr != RHSExpr) { - if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(AExpr, RHSExpr), B, I)) + if (auto *NewI = + tryReassociatedBinaryOp(getBinarySCEV(I, AExpr, RHSExpr), B, I)) return NewI; } if (AExpr != RHSExpr) { - if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(BExpr, RHSExpr), A, I)) + if (auto *NewI = + tryReassociatedBinaryOp(getBinarySCEV(I, BExpr, RHSExpr), A, I)) return NewI; } } return nullptr; } -Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr, - Value *RHS, Instruction *I) { - auto Pos = SeenExprs.find(LHSExpr); - // Bail out if LHSExpr is not previously seen. - if (Pos == SeenExprs.end()) - return nullptr; - +Instruction *NaryReassociate::tryReassociatedBinaryOp(const SCEV *LHSExpr, + Value *RHS, + BinaryOperator *I) { // Look for the closest dominator LHS of I that computes LHSExpr, and replace - // I with LHS + RHS. + // I with LHS op RHS. auto *LHS = findClosestMatchingDominator(LHSExpr, I); if (LHS == nullptr) return nullptr; - Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I); + Instruction *NewI = nullptr; + switch (I->getOpcode()) { + case Instruction::Add: + NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I); + break; + case Instruction::Mul: + NewI = BinaryOperator::CreateMul(LHS, RHS, "", I); + break; + default: + llvm_unreachable("Unexpected instruction."); + } NewI->takeName(I); return NewI; } +bool NaryReassociate::matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, + Value *&Op2) { + switch (I->getOpcode()) { + case Instruction::Add: + return match(V, m_Add(m_Value(Op1), m_Value(Op2))); + case Instruction::Mul: + return match(V, m_Mul(m_Value(Op1), m_Value(Op2))); + default: + llvm_unreachable("Unexpected instruction."); + } + return false; +} + +const SCEV *NaryReassociate::getBinarySCEV(BinaryOperator *I, const SCEV *LHS, + const SCEV *RHS) { + switch (I->getOpcode()) { + case Instruction::Add: + return SE->getAddExpr(LHS, RHS); + case Instruction::Mul: + return SE->getMulExpr(LHS, RHS); + default: + llvm_unreachable("Unexpected instruction."); + } + return nullptr; +} + Instruction * NaryReassociate::findClosestMatchingDominator(const SCEV *CandidateExpr, Instruction *Dominatee) { @@ -537,9 +564,13 @@ NaryReassociate::findClosestMatchingDominator(const SCEV *CandidateExpr, // future instruction either. Therefore, we pop it out of the stack. This // optimization makes the algorithm O(n). while (!Candidates.empty()) { - Instruction *Candidate = Candidates.back(); - if (DT->dominates(Candidate, Dominatee)) - return Candidate; + // Candidates stores WeakVHs, so a candidate can be nullptr if it's removed + // during rewriting. + if (Value *Candidate = Candidates.back()) { + Instruction *CandidateInstruction = cast(Candidate); + if (DT->dominates(CandidateInstruction, Dominatee)) + return CandidateInstruction; + } Candidates.pop_back(); } return nullptr; diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp index 31d7df39c781..9f26f78892c6 100644 --- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp +++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp @@ -154,7 +154,7 @@ bool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call, Phi->addIncoming(Call, &CurrBB); Phi->addIncoming(LibCall, LibCallBB); - BB = JoinBB; + BB = JoinBB->getIterator(); return true; } diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp index 366301ad731a..28c610c2486a 100644 --- a/lib/Transforms/Scalar/PlaceSafepoints.cpp +++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp @@ -27,7 +27,7 @@ // well defined state for inspection by the collector. In the current // implementation, this is done via the insertion of poll sites at method entry // and the backedge of most loops. We try to avoid inserting more polls than -// are neccessary to ensure a finite period between poll sites. This is not +// are necessary to ensure a finite period between poll sites. This is not // because the poll itself is expensive in the generated code; it's not. Polls // do tend to impact the optimizer itself in negative ways; we'd like to avoid // perturbing the optimization of the method as much as we can. @@ -91,13 +91,15 @@ STATISTIC(FiniteExecution, "Number of loops w/o safepoints finite execution"); using namespace llvm; -// Ignore oppurtunities to avoid placing safepoints on backedges, useful for +// Ignore opportunities to avoid placing safepoints on backedges, useful for // validation static cl::opt AllBackedges("spp-all-backedges", cl::Hidden, cl::init(false)); -/// If true, do not place backedge safepoints in counted loops. -static cl::opt SkipCounted("spp-counted", cl::Hidden, cl::init(true)); +/// How narrow does the trip count of a loop have to be to have to be considered +/// "counted"? Counted loops do not get safepoints at backedges. +static cl::opt CountedLoopTripWidth("spp-counted-loop-trip-width", + cl::Hidden, cl::init(32)); // If true, split the backedge of a loop when placing the safepoint, otherwise // split the latch block itself. Both are useful to support for @@ -121,7 +123,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass { std::vector PollLocations; /// True unless we're running spp-no-calls in which case we need to disable - /// the call dependend placement opts. + /// the call-dependent placement opts. bool CallSafepointsEnabled; ScalarEvolution *SE = nullptr; @@ -142,7 +144,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass { } bool runOnFunction(Function &F) override { - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); DT = &getAnalysis().getDomTree(); LI = &getAnalysis().getLoopInfo(); for (auto I = LI->begin(), E = LI->end(); I != E; I++) { @@ -153,7 +155,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); // We no longer modify the IR at all in this pass. Thus all // analysis are preserved. @@ -190,10 +192,8 @@ static void InsertSafepointPoll(Instruction *InsertBefore, std::vector &ParsePointsNeeded /*rval*/); -static bool isGCLeafFunction(const CallSite &CS); - static bool needsStatepoint(const CallSite &CS) { - if (isGCLeafFunction(CS)) + if (callsGCLeafFunction(CS)) return false; if (CS.isCall()) { CallInst *call = cast(CS.getInstruction()); @@ -206,7 +206,7 @@ static bool needsStatepoint(const CallSite &CS) { return true; } -static Value *ReplaceWithStatepoint(const CallSite &CS, Pass *P); +static Value *ReplaceWithStatepoint(const CallSite &CS); /// Returns true if this loop is known to contain a call safepoint which /// must unconditionally execute on any iteration of the loop which returns @@ -220,7 +220,7 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header, // For the moment, we look only for the 'cuts' that consist of a single call // instruction in a block which is dominated by the Header and dominates the // loop latch (Pred) block. Somewhat surprisingly, walking the entire chain - // of such dominating blocks gets substaintially more occurences than just + // of such dominating blocks gets substantially more occurrences than just // checking the Pred and Header blocks themselves. This may be due to the // density of loop exit conditions caused by range and null checks. // TODO: structure this as an analysis pass, cache the result for subloops, @@ -255,18 +255,12 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header, /// conservatism in the analysis. static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE, BasicBlock *Pred) { - // Only used when SkipCounted is off - const unsigned upperTripBound = 8192; - // A conservative bound on the loop as a whole. const SCEV *MaxTrips = SE->getMaxBackedgeTakenCount(L); - if (MaxTrips != SE->getCouldNotCompute()) { - if (SE->getUnsignedRange(MaxTrips).getUnsignedMax().ult(upperTripBound)) - return true; - if (SkipCounted && - SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN(32)) - return true; - } + if (MaxTrips != SE->getCouldNotCompute() && + SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN( + CountedLoopTripWidth)) + return true; // If this is a conditional branch to the header with the alternate path // being outside the loop, we can ask questions about the execution frequency @@ -275,13 +269,10 @@ static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE, // This returns an exact expression only. TODO: We really only need an // upper bound here, but SE doesn't expose that. const SCEV *MaxExec = SE->getExitCount(L, Pred); - if (MaxExec != SE->getCouldNotCompute()) { - if (SE->getUnsignedRange(MaxExec).getUnsignedMax().ult(upperTripBound)) + if (MaxExec != SE->getCouldNotCompute() && + SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN( + CountedLoopTripWidth)) return true; - if (SkipCounted && - SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN(32)) - return true; - } } return /* not finite */ false; @@ -432,14 +423,14 @@ static Instruction *findLocationForEntrySafepoint(Function &F, assert(hasNextInstruction(I) && "first check if there is a next instruction!"); if (I->isTerminator()) { - return I->getParent()->getUniqueSuccessor()->begin(); + return &I->getParent()->getUniqueSuccessor()->front(); } else { - return std::next(BasicBlock::iterator(I)); + return &*++I->getIterator(); } }; Instruction *cursor = nullptr; - for (cursor = F.getEntryBlock().begin(); hasNextInstruction(cursor); + for (cursor = &F.getEntryBlock().front(); hasNextInstruction(cursor); cursor = nextInstruction(cursor)) { // We need to ensure a safepoint poll occurs before any 'real' call. The @@ -466,7 +457,7 @@ static Instruction *findLocationForEntrySafepoint(Function &F, static void findCallSafepoints(Function &F, std::vector &Found /*rval*/) { assert(Found.empty() && "must be empty!"); - for (Instruction &I : inst_range(F)) { + for (Instruction &I : instructions(F)) { Instruction *inst = &I; if (isa(inst) || isa(inst)) { CallSite CS(inst); @@ -713,7 +704,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) { Invoke->getParent()); } - Value *GCResult = ReplaceWithStatepoint(CS, nullptr); + Value *GCResult = ReplaceWithStatepoint(CS); Results.push_back(GCResult); } assert(Results.size() == ParsePointNeeded.size()); @@ -747,7 +738,7 @@ FunctionPass *llvm::createPlaceSafepointsPass() { INITIALIZE_PASS_BEGIN(PlaceBackedgeSafepointsImpl, "place-backedge-safepoints-impl", "Place Backedge Safepoints", false, false) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(PlaceBackedgeSafepointsImpl, @@ -759,31 +750,6 @@ INITIALIZE_PASS_BEGIN(PlaceSafepoints, "place-safepoints", "Place Safepoints", INITIALIZE_PASS_END(PlaceSafepoints, "place-safepoints", "Place Safepoints", false, false) -static bool isGCLeafFunction(const CallSite &CS) { - Instruction *inst = CS.getInstruction(); - if (isa(inst)) { - // Most LLVM intrinsics are things which can never take a safepoint. - // As a result, we don't need to have the stack parsable at the - // callsite. This is a highly useful optimization since intrinsic - // calls are fairly prevelent, particularly in debug builds. - return true; - } - - // If this function is marked explicitly as a leaf call, we don't need to - // place a safepoint of it. In fact, for correctness we *can't* in many - // cases. Note: Indirect calls return Null for the called function, - // these obviously aren't runtime functions with attributes - // TODO: Support attributes on the call site as well. - const Function *F = CS.getCalledFunction(); - bool isLeaf = - F && - F->getFnAttribute("gc-leaf-function").getValueAsString().equals("true"); - if (isLeaf) { - return true; - } - return false; -} - static void InsertSafepointPoll(Instruction *InsertBefore, std::vector &ParsePointsNeeded /*rval*/) { @@ -796,6 +762,7 @@ InsertSafepointPoll(Instruction *InsertBefore, // path call - where we need to insert a safepoint (parsepoint). auto *F = M->getFunction(GCSafepointPollName); + assert(F && "gc.safepoint_poll function is missing"); assert(F->getType()->getElementType() == FunctionType::get(Type::getVoidTy(M->getContext()), false) && "gc.safepoint_poll declared with wrong type"); @@ -864,10 +831,8 @@ InsertSafepointPoll(Instruction *InsertBefore, /// Replaces the given call site (Call or Invoke) with a gc.statepoint /// intrinsic with an empty deoptimization arguments list. This does /// NOT do explicit relocation for GC support. -static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */ - Pass *P) { - assert(CS.getInstruction()->getParent()->getParent()->getParent() && - "must be set"); +static Value *ReplaceWithStatepoint(const CallSite &CS /* to replace */) { + assert(CS.getInstruction()->getModule() && "must be set"); // TODO: technically, a pass is not allowed to get functions from within a // function pass since it might trigger a new function addition. Refactor @@ -917,15 +882,10 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */ CS.getInstruction()->getContext(), AttributeSet::FunctionIndex, AttrsToRemove); - Value *StatepointTarget = NumPatchBytes == 0 - ? CS.getCalledValue() - : ConstantPointerNull::get(cast( - CS.getCalledValue()->getType())); - if (CS.isCall()) { CallInst *ToReplace = cast(CS.getInstruction()); CallInst *Call = Builder.CreateGCStatepointCall( - ID, NumPatchBytes, StatepointTarget, + ID, NumPatchBytes, CS.getCalledValue(), makeArrayRef(CS.arg_begin(), CS.arg_end()), None, None, "safepoint_token"); Call->setTailCall(ToReplace->isTailCall()); @@ -938,7 +898,7 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */ Token = Call; - // Put the following gc_result and gc_relocate calls immediately after the + // Put the following gc_result and gc_relocate calls immediately after // the old call (which we're about to delete). assert(ToReplace->getNextNode() && "not a terminator, must have next"); Builder.SetInsertPoint(ToReplace->getNextNode()); @@ -951,7 +911,7 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */ // original block. Builder.SetInsertPoint(ToReplace->getParent()); InvokeInst *Invoke = Builder.CreateGCStatepointInvoke( - ID, NumPatchBytes, StatepointTarget, ToReplace->getNormalDest(), + ID, NumPatchBytes, CS.getCalledValue(), ToReplace->getNormalDest(), ToReplace->getUnwindDest(), makeArrayRef(CS.arg_begin(), CS.arg_end()), None, None, "safepoint_token"); @@ -967,7 +927,7 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */ // We'll insert the gc.result into the normal block BasicBlock *NormalDest = ToReplace->getNormalDest(); // Can not insert gc.result in case of phi nodes preset. - // Should have removed this cases prior to runnning this function + // Should have removed this cases prior to running this function assert(!isa(NormalDest->begin())); Instruction *IP = &*(NormalDest->getFirstInsertionPt()); Builder.SetInsertPoint(IP); diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index d1acf785d07e..fb970c747ce1 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -26,6 +26,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" @@ -62,7 +64,7 @@ namespace { /// Print out the expression identified in the Ops list. /// static void PrintOps(Instruction *I, const SmallVectorImpl &Ops) { - Module *M = I->getParent()->getParent()->getParent(); + Module *M = I->getModule(); dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " " << *Ops[0].Op->getType() << '\t'; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { @@ -82,20 +84,6 @@ namespace { Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {} - /// \brief Sort factors by their Base. - struct BaseSorter { - bool operator()(const Factor &LHS, const Factor &RHS) { - return LHS.Base < RHS.Base; - } - }; - - /// \brief Compare factors for equal bases. - struct BaseEqual { - bool operator()(const Factor &LHS, const Factor &RHS) { - return LHS.Base == RHS.Base; - } - }; - /// \brief Sort factors in descending order by their power. struct PowerDescendingSorter { bool operator()(const Factor &LHS, const Factor &RHS) { @@ -172,6 +160,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addPreserved(); } private: void BuildRankMap(Function &F); @@ -255,27 +244,6 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1, return nullptr; } -static bool isUnmovableInstruction(Instruction *I) { - switch (I->getOpcode()) { - case Instruction::PHI: - case Instruction::LandingPad: - case Instruction::Alloca: - case Instruction::Load: - case Instruction::Invoke: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - return true; - case Instruction::Call: - return !isa(I); - default: - return false; - } -} - void Reassociate::BuildRankMap(Function &F) { unsigned i = 2; @@ -295,7 +263,7 @@ void Reassociate::BuildRankMap(Function &F) { // we cannot move. This ensures that the ranks for these instructions are // all different in the block. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (isUnmovableInstruction(I)) + if (mayBeMemoryDependent(*I)) ValueRankMap[&*I] = ++BBRank; } } @@ -913,7 +881,11 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, /// that computes the negative version of the value specified. The negative /// version of the value is returned, and BI is left pointing at the instruction /// that should be processed next by the reassociation pass. -static Value *NegateValue(Value *V, Instruction *BI) { +/// Also add intermediate instructions to the redo list that are modified while +/// pushing the negates through adds. These will be revisited to see if +/// additional opportunities have been exposed. +static Value *NegateValue(Value *V, Instruction *BI, + SetVector> &ToRedo) { if (Constant *C = dyn_cast(V)) { if (C->getType()->isFPOrFPVectorTy()) { return ConstantExpr::getFNeg(C); @@ -934,8 +906,8 @@ static Value *NegateValue(Value *V, Instruction *BI) { if (BinaryOperator *I = isReassociableOp(V, Instruction::Add, Instruction::FAdd)) { // Push the negates through the add. - I->setOperand(0, NegateValue(I->getOperand(0), BI)); - I->setOperand(1, NegateValue(I->getOperand(1), BI)); + I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo)); + I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo)); if (I->getOpcode() == Instruction::Add) { I->setHasNoUnsignedWrap(false); I->setHasNoSignedWrap(false); @@ -948,6 +920,10 @@ static Value *NegateValue(Value *V, Instruction *BI) { // I->moveBefore(BI); I->setName(I->getName()+".neg"); + + // Add the intermediate negates to the redo list as processing them later + // could expose more reassociating opportunities. + ToRedo.insert(I); return I; } @@ -972,26 +948,28 @@ static Value *NegateValue(Value *V, Instruction *BI) { if (InvokeInst *II = dyn_cast(InstInput)) { InsertPt = II->getNormalDest()->begin(); } else { - InsertPt = InstInput; - ++InsertPt; + InsertPt = ++InstInput->getIterator(); } while (isa(InsertPt)) ++InsertPt; } else { InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin(); } - TheNeg->moveBefore(InsertPt); + TheNeg->moveBefore(&*InsertPt); if (TheNeg->getOpcode() == Instruction::Sub) { TheNeg->setHasNoUnsignedWrap(false); TheNeg->setHasNoSignedWrap(false); } else { TheNeg->andIRFlags(BI); } + ToRedo.insert(TheNeg); return TheNeg; } // Insert a 'neg' instruction that subtracts the value from zero to get the // negation. - return CreateNeg(V, V->getName() + ".neg", BI, BI); + BinaryOperator *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI); + ToRedo.insert(NewNeg); + return NewNeg; } /// Return true if we should break up this subtract of X-Y into (X + -Y). @@ -1025,14 +1003,15 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) { /// If we have (X-Y), and if either X is an add, or if this is only used by an /// add, transform this into (X+(0-Y)) to promote better reassociation. -static BinaryOperator *BreakUpSubtract(Instruction *Sub) { +static BinaryOperator * +BreakUpSubtract(Instruction *Sub, SetVector> &ToRedo) { // Convert a subtract into an add and a neg instruction. This allows sub // instructions to be commuted with other add instructions. // // Calculate the negative value of Operand 1 of the sub instruction, // and set it as the RHS of the add instruction we just made. // - Value *NegVal = NegateValue(Sub->getOperand(1), Sub); + Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo); BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub); Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op. Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op. @@ -1166,7 +1145,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { return nullptr; } - BasicBlock::iterator InsertPt = BO; ++InsertPt; + BasicBlock::iterator InsertPt = ++BO->getIterator(); // If this was just a single multiply, remove the multiply and return the only // remaining operand. @@ -1179,7 +1158,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { } if (NeedsNegate) - V = CreateNeg(V, "neg", InsertPt, BO); + V = CreateNeg(V, "neg", &*InsertPt, BO); return V; } @@ -1250,7 +1229,7 @@ static Value *OptimizeAndOrXor(unsigned Opcode, return nullptr; } -/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and +/// Helper function of CombineXorOpnd(). It creates a bitwise-and /// instruction with the given two operands, and return the resulting /// instruction. There are two special cases: 1) if the constant operand is 0, /// it will return NULL. 2) if the constant is ~0, the symbolic operand will @@ -2083,7 +2062,7 @@ void Reassociate::OptimizeInst(Instruction *I) { return; // Don't optimize floating point instructions that don't have unsafe algebra. - if (I->getType()->isFloatingPointTy() && !I->hasUnsafeAlgebra()) + if (I->getType()->isFPOrFPVectorTy() && !I->hasUnsafeAlgebra()) return; // Do not reassociate boolean (i1) expressions. We want to preserve the @@ -2099,7 +2078,7 @@ void Reassociate::OptimizeInst(Instruction *I) { // see if we can convert it to X+-Y. if (I->getOpcode() == Instruction::Sub) { if (ShouldBreakUpSubtract(I)) { - Instruction *NI = BreakUpSubtract(I); + Instruction *NI = BreakUpSubtract(I, RedoInsts); RedoInsts.insert(I); MadeChange = true; I = NI; @@ -2110,6 +2089,12 @@ void Reassociate::OptimizeInst(Instruction *I) { (!I->hasOneUse() || !isReassociableOp(I->user_back(), Instruction::Mul))) { Instruction *NI = LowerNegateToMultiply(I); + // If the negate was simplified, revisit the users to see if we can + // reassociate further. + for (User *U : NI->users()) { + if (BinaryOperator *Tmp = dyn_cast(U)) + RedoInsts.insert(Tmp); + } RedoInsts.insert(I); MadeChange = true; I = NI; @@ -2117,7 +2102,7 @@ void Reassociate::OptimizeInst(Instruction *I) { } } else if (I->getOpcode() == Instruction::FSub) { if (ShouldBreakUpSubtract(I)) { - Instruction *NI = BreakUpSubtract(I); + Instruction *NI = BreakUpSubtract(I, RedoInsts); RedoInsts.insert(I); MadeChange = true; I = NI; @@ -2127,7 +2112,13 @@ void Reassociate::OptimizeInst(Instruction *I) { if (isReassociableOp(I->getOperand(1), Instruction::FMul) && (!I->hasOneUse() || !isReassociableOp(I->user_back(), Instruction::FMul))) { + // If the negate was simplified, revisit the users to see if we can + // reassociate further. Instruction *NI = LowerNegateToMultiply(I); + for (User *U : NI->users()) { + if (BinaryOperator *Tmp = dyn_cast(U)) + RedoInsts.insert(Tmp); + } RedoInsts.insert(I); MadeChange = true; I = NI; @@ -2142,8 +2133,14 @@ void Reassociate::OptimizeInst(Instruction *I) { // If this is an interior node of a reassociable tree, ignore it until we // get to the root of the tree, to avoid N^2 analysis. unsigned Opcode = BO->getOpcode(); - if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) + if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) { + // During the initial run we will get to the root of the tree. + // But if we get here while we are redoing instructions, there is no + // guarantee that the root will be visited. So Redo later + if (BO->user_back() != BO) + RedoInsts.insert(BO->user_back()); return; + } // If this is an add tree that is used by a sub instruction, ignore it // until we process the subtract. @@ -2250,10 +2247,10 @@ bool Reassociate::runOnFunction(Function &F) { for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { // Optimize every instruction in the basic block. for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; ) - if (isInstructionTriviallyDead(II)) { - EraseInst(II++); + if (isInstructionTriviallyDead(&*II)) { + EraseInst(&*II++); } else { - OptimizeInst(II); + OptimizeInst(&*II); assert(II->getParent() == BI && "Moved to a different block!"); ++II; } diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index 1b46727c17bb..915f89780c08 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -82,10 +82,9 @@ bool RegToMem::runOnFunction(Function &F) { BasicBlock::iterator I = BBEntry->begin(); while (isa(I)) ++I; - CastInst *AllocaInsertionPoint = - new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())), - Type::getInt32Ty(F.getContext()), - "reg2mem alloca point", I); + CastInst *AllocaInsertionPoint = new BitCastInst( + Constant::getNullValue(Type::getInt32Ty(F.getContext())), + Type::getInt32Ty(F.getContext()), "reg2mem alloca point", &*I); // Find the escaped instructions. But don't create stack slots for // allocas in entry block. @@ -95,7 +94,7 @@ bool RegToMem::runOnFunction(Function &F) { for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); iib != iie; ++iib) { if (!(isa(iib) && iib->getParent() == BBEntry) && - valueEscapes(iib)) { + valueEscapes(&*iib)) { WorkList.push_front(&*iib); } } diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index ae2ae3af0c7a..db127c3f7b4e 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -14,12 +14,14 @@ #include "llvm/Pass.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/MapVector.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Dominators.h" @@ -46,10 +48,6 @@ using namespace llvm; -// Print tracing output -static cl::opt TraceLSP("trace-rewrite-statepoints", cl::Hidden, - cl::init(false)); - // Print the liveset found at the insert location static cl::opt PrintLiveSet("spp-print-liveset", cl::Hidden, cl::init(false)); @@ -74,6 +72,12 @@ static cl::opt ClobberNonLiveOverride("rs4gc-clobber-non-live", cl::location(ClobberNonLive), cl::Hidden); +static cl::opt UseDeoptBundles("rs4gc-use-deopt-bundles", cl::Hidden, + cl::init(false)); +static cl::opt + AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info", + cl::Hidden, cl::init(true)); + namespace { struct RewriteStatepointsForGC : public ModulePass { static char ID; // Pass identification, replacement for typeid @@ -88,10 +92,10 @@ struct RewriteStatepointsForGC : public ModulePass { Changed |= runOnFunction(F); if (Changed) { - // stripDereferenceabilityInfo asserts that shouldRewriteStatepointsIn + // stripNonValidAttributes asserts that shouldRewriteStatepointsIn // returns true for at least one function in the module. Since at least // one function changed, we know that the precondition is satisfied. - stripDereferenceabilityInfo(M); + stripNonValidAttributes(M); } return Changed; @@ -108,15 +112,16 @@ struct RewriteStatepointsForGC : public ModulePass { /// dereferenceability that are no longer valid/correct after /// RewriteStatepointsForGC has run. This is because semantically, after /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire - /// heap. stripDereferenceabilityInfo (conservatively) restores correctness + /// heap. stripNonValidAttributes (conservatively) restores correctness /// by erasing all attributes in the module that externally imply /// dereferenceability. - /// - void stripDereferenceabilityInfo(Module &M); + /// Similar reasoning also applies to the noalias attributes. gc.statepoint + /// can touch the entire heap including noalias objects. + void stripNonValidAttributes(Module &M); - // Helpers for stripDereferenceabilityInfo - void stripDereferenceabilityInfoFromBody(Function &F); - void stripDereferenceabilityInfoFromPrototype(Function &F); + // Helpers for stripNonValidAttributes + void stripNonValidAttributesFromBody(Function &F); + void stripNonValidAttributesFromPrototype(Function &F); }; } // namespace @@ -160,15 +165,16 @@ struct GCPtrLivenessData { // base relation will remain. Internally, we add a mixture of the two // types, then update all the second type to the first type typedef DenseMap DefiningValueMapTy; -typedef DenseSet StatepointLiveSetTy; -typedef DenseMap RematerializedValueMapTy; +typedef DenseSet StatepointLiveSetTy; +typedef DenseMap, AssertingVH> + RematerializedValueMapTy; struct PartiallyConstructedSafepointRecord { - /// The set of values known to be live accross this safepoint - StatepointLiveSetTy liveset; + /// The set of values known to be live across this safepoint + StatepointLiveSetTy LiveSet; /// Mapping from live pointers to a base-defining-value - DenseMap PointerToBase; + DenseMap PointerToBase; /// The *new* gc.statepoint instruction itself. This produces the token /// that normal path gc.relocates and the gc.result are tied to. @@ -179,12 +185,26 @@ struct PartiallyConstructedSafepointRecord { Instruction *UnwindToken; /// Record live values we are rematerialized instead of relocating. - /// They are not included into 'liveset' field. + /// They are not included into 'LiveSet' field. /// Maps rematerialized copy to it's original value. RematerializedValueMapTy RematerializedValues; }; } +static ArrayRef GetDeoptBundleOperands(ImmutableCallSite CS) { + assert(UseDeoptBundles && "Should not be called otherwise!"); + + Optional DeoptBundle = CS.getOperandBundle("deopt"); + + if (!DeoptBundle.hasValue()) { + assert(AllowStatepointWithNoDeoptInfo && + "Found non-leaf call without deopt info!"); + return None; + } + + return DeoptBundle.getValue().Inputs; +} + /// Compute the live-in set for every basic block in the function static void computeLiveInValues(DominatorTree &DT, Function &F, GCPtrLivenessData &Data); @@ -195,10 +215,10 @@ static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data, StatepointLiveSetTy &out); // TODO: Once we can get to the GCStrategy, this becomes -// Optional isGCManagedPointer(const Value *V) const override { +// Optional isGCManagedPointer(const Type *Ty) const override { -static bool isGCPointerType(const Type *T) { - if (const PointerType *PT = dyn_cast(T)) +static bool isGCPointerType(Type *T) { + if (auto *PT = dyn_cast(T)) // For the sake of this example GC, we arbitrarily pick addrspace(1) as our // GC managed heap. We know that a pointer into this heap needs to be // updated and that no other pointer does. @@ -233,9 +253,8 @@ static bool containsGCPtrType(Type *Ty) { if (ArrayType *AT = dyn_cast(Ty)) return containsGCPtrType(AT->getElementType()); if (StructType *ST = dyn_cast(Ty)) - return std::any_of( - ST->subtypes().begin(), ST->subtypes().end(), - [](Type *SubType) { return containsGCPtrType(SubType); }); + return std::any_of(ST->subtypes().begin(), ST->subtypes().end(), + containsGCPtrType); return false; } @@ -247,7 +266,7 @@ static bool isUnhandledGCPointerType(Type *Ty) { } #endif -static bool order_by_name(llvm::Value *a, llvm::Value *b) { +static bool order_by_name(Value *a, Value *b) { if (a->hasName() && b->hasName()) { return -1 == a->getName().compare(b->getName()); } else if (a->hasName() && !b->hasName()) { @@ -260,6 +279,13 @@ static bool order_by_name(llvm::Value *a, llvm::Value *b) { } } +// Return the name of the value suffixed with the provided value, or if the +// value didn't have a name, the default value specified. +static std::string suffixed_name_or(Value *V, StringRef Suffix, + StringRef DefaultName) { + return V->hasName() ? (V->getName() + Suffix).str() : DefaultName.str(); +} + // Conservatively identifies any definitions which might be live at the // given instruction. The analysis is performed immediately before the // given instruction. Values defined by that instruction are not considered @@ -269,30 +295,56 @@ static void analyzeParsePointLiveness( const CallSite &CS, PartiallyConstructedSafepointRecord &result) { Instruction *inst = CS.getInstruction(); - StatepointLiveSetTy liveset; - findLiveSetAtInst(inst, OriginalLivenessData, liveset); + StatepointLiveSetTy LiveSet; + findLiveSetAtInst(inst, OriginalLivenessData, LiveSet); if (PrintLiveSet) { // Note: This output is used by several of the test cases - // The order of elemtns in a set is not stable, put them in a vec and sort + // The order of elements in a set is not stable, put them in a vec and sort // by name - SmallVector temp; - temp.insert(temp.end(), liveset.begin(), liveset.end()); - std::sort(temp.begin(), temp.end(), order_by_name); + SmallVector Temp; + Temp.insert(Temp.end(), LiveSet.begin(), LiveSet.end()); + std::sort(Temp.begin(), Temp.end(), order_by_name); errs() << "Live Variables:\n"; - for (Value *V : temp) { - errs() << " " << V->getName(); // no newline - V->dump(); - } + for (Value *V : Temp) + dbgs() << " " << V->getName() << " " << *V << "\n"; } if (PrintLiveSetSize) { errs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n"; - errs() << "Number live values: " << liveset.size() << "\n"; + errs() << "Number live values: " << LiveSet.size() << "\n"; } - result.liveset = liveset; + result.LiveSet = LiveSet; } -static Value *findBaseDefiningValue(Value *I); +static bool isKnownBaseResult(Value *V); +namespace { +/// A single base defining value - An immediate base defining value for an +/// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'. +/// For instructions which have multiple pointer [vector] inputs or that +/// transition between vector and scalar types, there is no immediate base +/// defining value. The 'base defining value' for 'Def' is the transitive +/// closure of this relation stopping at the first instruction which has no +/// immediate base defining value. The b.d.v. might itself be a base pointer, +/// but it can also be an arbitrary derived pointer. +struct BaseDefiningValueResult { + /// Contains the value which is the base defining value. + Value * const BDV; + /// True if the base defining value is also known to be an actual base + /// pointer. + const bool IsKnownBase; + BaseDefiningValueResult(Value *BDV, bool IsKnownBase) + : BDV(BDV), IsKnownBase(IsKnownBase) { +#ifndef NDEBUG + // Check consistency between new and old means of checking whether a BDV is + // a base. + bool MustBeBase = isKnownBaseResult(BDV); + assert(!MustBeBase || MustBeBase == IsKnownBase); +#endif + } +}; +} + +static BaseDefiningValueResult findBaseDefiningValue(Value *I); /// Return a base defining value for the 'Index' element of the given vector /// instruction 'I'. If Index is null, returns a BDV for the entire vector @@ -303,8 +355,8 @@ static Value *findBaseDefiningValue(Value *I); /// vector returned is a BDV (and possibly a base) of the entire vector 'I'. /// If the later, the return pointer is a BDV (or possibly a base) for the /// particular element in 'I'. -static std::pair -findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) { +static BaseDefiningValueResult +findBaseDefiningValueOfVector(Value *I) { assert(I->getType()->isVectorTy() && cast(I->getType())->getElementType()->isPointerTy() && "Illegal to ask for the base pointer of a non-pointer type"); @@ -314,7 +366,7 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) { if (isa(I)) // An incoming argument to the function is a base pointer - return std::make_pair(I, true); + return BaseDefiningValueResult(I, true); // We shouldn't see the address of a global as a vector value? assert(!isa(I) && @@ -325,7 +377,7 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) { if (isa(I)) // utterly meaningless, but useful for dealing with partially optimized // code. - return std::make_pair(I, true); + return BaseDefiningValueResult(I, true); // Due to inheritance, this must be _after_ the global variable and undef // checks @@ -333,31 +385,17 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) { assert(!isa(I) && !isa(I) && "order of checks wrong!"); assert(Con->isNullValue() && "null is the only case which makes sense"); - return std::make_pair(Con, true); + return BaseDefiningValueResult(Con, true); } if (isa(I)) - return std::make_pair(I, true); - - // For an insert element, we might be able to look through it if we know - // something about the indexes. - if (InsertElementInst *IEI = dyn_cast(I)) { - if (Index) { - Value *InsertIndex = IEI->getOperand(2); - // This index is inserting the value, look for its BDV - if (InsertIndex == Index) - return std::make_pair(findBaseDefiningValue(IEI->getOperand(1)), false); - // Both constant, and can't be equal per above. This insert is definitely - // not relevant, look back at the rest of the vector and keep trying. - if (isa(Index) && isa(InsertIndex)) - return findBaseDefiningValueOfVector(IEI->getOperand(0), Index); - } - + return BaseDefiningValueResult(I, true); + + if (isa(I)) // We don't know whether this vector contains entirely base pointers or // not. To be conservatively correct, we treat it as a BDV and will // duplicate code as needed to construct a parallel vector of bases. - return std::make_pair(IEI, false); - } + return BaseDefiningValueResult(I, false); if (isa(I)) // We don't know whether this vector contains entirely base pointers or @@ -365,105 +403,62 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) { // duplicate code as needed to construct a parallel vector of bases. // TODO: There a number of local optimizations which could be applied here // for particular sufflevector patterns. - return std::make_pair(I, false); + return BaseDefiningValueResult(I, false); // A PHI or Select is a base defining value. The outer findBasePointer // algorithm is responsible for constructing a base value for this BDV. assert((isa(I) || isa(I)) && "unknown vector instruction - no base found for vector element"); - return std::make_pair(I, false); + return BaseDefiningValueResult(I, false); } -static bool isKnownBaseResult(Value *V); - /// Helper function for findBasePointer - Will return a value which either a) -/// defines the base pointer for the input or b) blocks the simple search -/// (i.e. a PHI or Select of two derived pointers) -static Value *findBaseDefiningValue(Value *I) { +/// defines the base pointer for the input, b) blocks the simple search +/// (i.e. a PHI or Select of two derived pointers), or c) involves a change +/// from pointer to vector type or back. +static BaseDefiningValueResult findBaseDefiningValue(Value *I) { if (I->getType()->isVectorTy()) - return findBaseDefiningValueOfVector(I).first; + return findBaseDefiningValueOfVector(I); assert(I->getType()->isPointerTy() && "Illegal to ask for the base pointer of a non-pointer type"); - // This case is a bit of a hack - it only handles extracts from vectors which - // trivially contain only base pointers or cases where we can directly match - // the index of the original extract element to an insertion into the vector. - // See note inside the function for how to improve this. - if (auto *EEI = dyn_cast(I)) { - Value *VectorOperand = EEI->getVectorOperand(); - Value *Index = EEI->getIndexOperand(); - std::pair pair = - findBaseDefiningValueOfVector(VectorOperand, Index); - Value *VectorBase = pair.first; - if (VectorBase->getType()->isPointerTy()) - // We found a BDV for this specific element with the vector. This is an - // optimization, but in practice it covers most of the useful cases - // created via scalarization. - return VectorBase; - else { - assert(VectorBase->getType()->isVectorTy()); - if (pair.second) - // If the entire vector returned is known to be entirely base pointers, - // then the extractelement is valid base for this value. - return EEI; - else { - // Otherwise, we have an instruction which potentially produces a - // derived pointer and we need findBasePointers to clone code for us - // such that we can create an instruction which produces the - // accompanying base pointer. - // Note: This code is currently rather incomplete. We don't currently - // support the general form of shufflevector of insertelement. - // Conceptually, these are just 'base defining values' of the same - // variety as phi or select instructions. We need to update the - // findBasePointers algorithm to insert new 'base-only' versions of the - // original instructions. This is relative straight forward to do, but - // the case which would motivate the work hasn't shown up in real - // workloads yet. - assert((isa(VectorBase) || isa(VectorBase)) && - "need to extend findBasePointers for generic vector" - "instruction cases"); - return VectorBase; - } - } - } - if (isa(I)) // An incoming argument to the function is a base pointer // We should have never reached here if this argument isn't an gc value - return I; + return BaseDefiningValueResult(I, true); if (isa(I)) // base case - return I; + return BaseDefiningValueResult(I, true); // inlining could possibly introduce phi node that contains // undef if callee has multiple returns if (isa(I)) // utterly meaningless, but useful for dealing with // partially optimized code. - return I; + return BaseDefiningValueResult(I, true); // Due to inheritance, this must be _after_ the global variable and undef // checks - if (Constant *Con = dyn_cast(I)) { + if (isa(I)) { assert(!isa(I) && !isa(I) && "order of checks wrong!"); - // Note: Finding a constant base for something marked for relocation - // doesn't really make sense. The most likely case is either a) some - // screwed up the address space usage or b) your validating against - // compiled C++ code w/o the proper separation. The only real exception - // is a null pointer. You could have generic code written to index of - // off a potentially null value and have proven it null. We also use - // null pointers in dead paths of relocation phis (which we might later - // want to find a base pointer for). - assert(isa(Con) && - "null is the only case which makes sense"); - return Con; + // Note: Even for frontends which don't have constant references, we can + // see constants appearing after optimizations. A simple example is + // specialization of an address computation on null feeding into a merge + // point where the actual use of the now-constant input is protected by + // another null check. (e.g. test4 in constants.ll) + return BaseDefiningValueResult(I, true); } if (CastInst *CI = dyn_cast(I)) { Value *Def = CI->stripPointerCasts(); + // If stripping pointer casts changes the address space there is an + // addrspacecast in between. + assert(cast(Def->getType())->getAddressSpace() == + cast(CI->getType())->getAddressSpace() && + "unsupported addrspacecast"); // If we find a cast instruction here, it means we've found a cast which is // not simply a pointer cast (i.e. an inttoptr). We don't know how to // handle int->ptr conversion. @@ -472,7 +467,9 @@ static Value *findBaseDefiningValue(Value *I) { } if (isa(I)) - return I; // The value loaded is an gc base itself + // The value loaded is an gc base itself + return BaseDefiningValueResult(I, true); + if (GetElementPtrInst *GEP = dyn_cast(I)) // The base of this GEP is the base @@ -480,14 +477,11 @@ static Value *findBaseDefiningValue(Value *I) { if (IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { - case Intrinsic::experimental_gc_result_ptr: default: // fall through to general call handling break; case Intrinsic::experimental_gc_statepoint: - case Intrinsic::experimental_gc_result_float: - case Intrinsic::experimental_gc_result_int: - llvm_unreachable("these don't produce pointers"); + llvm_unreachable("statepoints don't produce pointers"); case Intrinsic::experimental_gc_relocate: { // Rerunning safepoint insertion after safepoints are already // inserted is not supported. It could probably be made to work, @@ -506,17 +500,17 @@ static Value *findBaseDefiningValue(Value *I) { // pointers. This should probably be generalized via attributes to support // both source language and internal functions. if (isa(I) || isa(I)) - return I; + return BaseDefiningValueResult(I, true); // I have absolutely no idea how to implement this part yet. It's not - // neccessarily hard, I just haven't really looked at it yet. + // necessarily hard, I just haven't really looked at it yet. assert(!isa(I) && "Landing Pad is unimplemented"); if (isa(I)) // A CAS is effectively a atomic store and load combined under a // predicate. From the perspective of base pointers, we just treat it // like a load. - return I; + return BaseDefiningValueResult(I, true); assert(!isa(I) && "Xchg handled above, all others are " "binary ops which don't apply to pointers"); @@ -525,34 +519,41 @@ static Value *findBaseDefiningValue(Value *I) { // stack, but in either case, this is simply a field load. As a result, // this is a defining definition of the base just like a load is. if (isa(I)) - return I; + return BaseDefiningValueResult(I, true); // We should never see an insert vector since that would require we be // tracing back a struct value not a pointer value. assert(!isa(I) && "Base pointer for a struct is meaningless"); + // An extractelement produces a base result exactly when it's input does. + // We may need to insert a parallel instruction to extract the appropriate + // element out of the base vector corresponding to the input. Given this, + // it's analogous to the phi and select case even though it's not a merge. + if (isa(I)) + // Note: There a lot of obvious peephole cases here. This are deliberately + // handled after the main base pointer inference algorithm to make writing + // test cases to exercise that code easier. + return BaseDefiningValueResult(I, false); + // The last two cases here don't return a base pointer. Instead, they - // return a value which dynamically selects from amoung several base + // return a value which dynamically selects from among several base // derived pointers (each with it's own base potentially). It's the job of // the caller to resolve these. assert((isa(I) || isa(I)) && "missing instruction case in findBaseDefiningValing"); - return I; + return BaseDefiningValueResult(I, false); } /// Returns the base defining value for this value. static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) { Value *&Cached = Cache[I]; if (!Cached) { - Cached = findBaseDefiningValue(I); + Cached = findBaseDefiningValue(I).BDV; + DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> " + << Cached->getName() << "\n"); } assert(Cache[I] != nullptr); - - if (TraceLSP) { - dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName() - << "\n"; - } return Cached; } @@ -572,7 +573,9 @@ static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) { /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV, /// is it known to be a base pointer? Or do we need to continue searching. static bool isKnownBaseResult(Value *V) { - if (!isa(V) && !isa(V)) { + if (!isa(V) && !isa(V) && + !isa(V) && !isa(V) && + !isa(V)) { // no recursion possible return true; } @@ -587,17 +590,19 @@ static bool isKnownBaseResult(Value *V) { return false; } -// TODO: find a better name for this namespace { -class PhiState { +/// Models the state of a single base defining value in the findBasePointer +/// algorithm for determining where a new instruction is needed to propagate +/// the base of this BDV. +class BDVState { public: enum Status { Unknown, Base, Conflict }; - PhiState(Status s, Value *b = nullptr) : status(s), base(b) { + BDVState(Status s, Value *b = nullptr) : status(s), base(b) { assert(status != Base || b); } - PhiState(Value *b) : status(Base), base(b) {} - PhiState() : status(Unknown), base(nullptr) {} + explicit BDVState(Value *b) : status(Base), base(b) {} + BDVState() : status(Unknown), base(nullptr) {} Status getStatus() const { return status; } Value *getBase() const { return base; } @@ -606,72 +611,80 @@ public: bool isUnknown() const { return getStatus() == Unknown; } bool isConflict() const { return getStatus() == Conflict; } - bool operator==(const PhiState &other) const { + bool operator==(const BDVState &other) const { return base == other.base && status == other.status; } - bool operator!=(const PhiState &other) const { return !(*this == other); } + bool operator!=(const BDVState &other) const { return !(*this == other); } - void dump() { - errs() << status << " (" << base << " - " - << (base ? base->getName() : "nullptr") << "): "; + LLVM_DUMP_METHOD + void dump() const { print(dbgs()); dbgs() << '\n'; } + + void print(raw_ostream &OS) const { + switch (status) { + case Unknown: + OS << "U"; + break; + case Base: + OS << "B"; + break; + case Conflict: + OS << "C"; + break; + }; + OS << " (" << base << " - " + << (base ? base->getName() : "nullptr") << "): "; } private: Status status; - Value *base; // non null only if status == base + AssertingVH base; // non null only if status == base }; +} -typedef DenseMap ConflictStateMapTy; -// Values of type PhiState form a lattice, and this is a helper +#ifndef NDEBUG +static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) { + State.print(OS); + return OS; +} +#endif + +namespace { +// Values of type BDVState form a lattice, and this is a helper // class that implementes the meet operation. The meat of the meet -// operation is implemented in MeetPhiStates::pureMeet -class MeetPhiStates { +// operation is implemented in MeetBDVStates::pureMeet +class MeetBDVStates { public: - // phiStates is a mapping from PHINodes and SelectInst's to PhiStates. - explicit MeetPhiStates(const ConflictStateMapTy &phiStates) - : phiStates(phiStates) {} + /// Initializes the currentResult to the TOP state so that if can be met with + /// any other state to produce that state. + MeetBDVStates() {} - // Destructively meet the current result with the base V. V can - // either be a merge instruction (SelectInst / PHINode), in which - // case its status is looked up in the phiStates map; or a regular - // SSA value, in which case it is assumed to be a base. - void meetWith(Value *V) { - PhiState otherState = getStateForBDV(V); - assert((MeetPhiStates::pureMeet(otherState, currentResult) == - MeetPhiStates::pureMeet(currentResult, otherState)) && - "math is wrong: meet does not commute!"); - currentResult = MeetPhiStates::pureMeet(otherState, currentResult); + // Destructively meet the current result with the given BDVState + void meetWith(BDVState otherState) { + currentResult = meet(otherState, currentResult); } - PhiState getResult() const { return currentResult; } + BDVState getResult() const { return currentResult; } private: - const ConflictStateMapTy &phiStates; - PhiState currentResult; + BDVState currentResult; - /// Return a phi state for a base defining value. We'll generate a new - /// base state for known bases and expect to find a cached state otherwise - PhiState getStateForBDV(Value *baseValue) { - if (isKnownBaseResult(baseValue)) { - return PhiState(baseValue); - } else { - return lookupFromMap(baseValue); - } + /// Perform a meet operation on two elements of the BDVState lattice. + static BDVState meet(BDVState LHS, BDVState RHS) { + assert((pureMeet(LHS, RHS) == pureMeet(RHS, LHS)) && + "math is wrong: meet does not commute!"); + BDVState Result = pureMeet(LHS, RHS); + DEBUG(dbgs() << "meet of " << LHS << " with " << RHS + << " produced " << Result << "\n"); + return Result; } - PhiState lookupFromMap(Value *V) { - auto I = phiStates.find(V); - assert(I != phiStates.end() && "lookup failed!"); - return I->second; - } - - static PhiState pureMeet(const PhiState &stateA, const PhiState &stateB) { + static BDVState pureMeet(const BDVState &stateA, const BDVState &stateB) { switch (stateA.getStatus()) { - case PhiState::Unknown: + case BDVState::Unknown: return stateB; - case PhiState::Base: + case BDVState::Base: assert(stateA.getBase() && "can't be null"); if (stateB.isUnknown()) return stateA; @@ -681,18 +694,20 @@ private: assert(stateA == stateB && "equality broken!"); return stateA; } - return PhiState(PhiState::Conflict); + return BDVState(BDVState::Conflict); } assert(stateB.isConflict() && "only three states!"); - return PhiState(PhiState::Conflict); + return BDVState(BDVState::Conflict); - case PhiState::Conflict: + case BDVState::Conflict: return stateA; } llvm_unreachable("only three states!"); } }; } + + /// For a given value or instruction, figure out what base ptr it's derived /// from. For gc objects, this is simply itself. On success, returns a value /// which is the base pointer. (This is reliable and can be used for @@ -723,171 +738,252 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) { // // Note: A simpler form of this would be to add the conflict form of all // PHIs without running the optimistic algorithm. This would be - // analougous to pessimistic data flow and would likely lead to an + // analogous to pessimistic data flow and would likely lead to an // overall worse solution. - ConflictStateMapTy states; - states[def] = PhiState(); - // Recursively fill in all phis & selects reachable from the initial one - // for which we don't already know a definite base value for - // TODO: This should be rewritten with a worklist - bool done = false; - while (!done) { - done = true; - // Since we're adding elements to 'states' as we run, we can't keep - // iterators into the set. - SmallVector Keys; - Keys.reserve(states.size()); - for (auto Pair : states) { - Value *V = Pair.first; - Keys.push_back(V); - } - for (Value *v : Keys) { - assert(!isKnownBaseResult(v) && "why did it get added?"); - if (PHINode *phi = dyn_cast(v)) { - assert(phi->getNumIncomingValues() > 0 && - "zero input phis are illegal"); - for (Value *InVal : phi->incoming_values()) { - Value *local = findBaseOrBDV(InVal, cache); - if (!isKnownBaseResult(local) && states.find(local) == states.end()) { - states[local] = PhiState(); - done = false; - } - } - } else if (SelectInst *sel = dyn_cast(v)) { - Value *local = findBaseOrBDV(sel->getTrueValue(), cache); - if (!isKnownBaseResult(local) && states.find(local) == states.end()) { - states[local] = PhiState(); - done = false; - } - local = findBaseOrBDV(sel->getFalseValue(), cache); - if (!isKnownBaseResult(local) && states.find(local) == states.end()) { - states[local] = PhiState(); - done = false; - } +#ifndef NDEBUG + auto isExpectedBDVType = [](Value *BDV) { + return isa(BDV) || isa(BDV) || + isa(BDV) || isa(BDV); + }; +#endif + + // Once populated, will contain a mapping from each potentially non-base BDV + // to a lattice value (described above) which corresponds to that BDV. + // We use the order of insertion (DFS over the def/use graph) to provide a + // stable deterministic ordering for visiting DenseMaps (which are unordered) + // below. This is important for deterministic compilation. + MapVector States; + + // Recursively fill in all base defining values reachable from the initial + // one for which we don't already know a definite base value for + /* scope */ { + SmallVector Worklist; + Worklist.push_back(def); + States.insert(std::make_pair(def, BDVState())); + while (!Worklist.empty()) { + Value *Current = Worklist.pop_back_val(); + assert(!isKnownBaseResult(Current) && "why did it get added?"); + + auto visitIncomingValue = [&](Value *InVal) { + Value *Base = findBaseOrBDV(InVal, cache); + if (isKnownBaseResult(Base)) + // Known bases won't need new instructions introduced and can be + // ignored safely + return; + assert(isExpectedBDVType(Base) && "the only non-base values " + "we see should be base defining values"); + if (States.insert(std::make_pair(Base, BDVState())).second) + Worklist.push_back(Base); + }; + if (PHINode *Phi = dyn_cast(Current)) { + for (Value *InVal : Phi->incoming_values()) + visitIncomingValue(InVal); + } else if (SelectInst *Sel = dyn_cast(Current)) { + visitIncomingValue(Sel->getTrueValue()); + visitIncomingValue(Sel->getFalseValue()); + } else if (auto *EE = dyn_cast(Current)) { + visitIncomingValue(EE->getVectorOperand()); + } else if (auto *IE = dyn_cast(Current)) { + visitIncomingValue(IE->getOperand(0)); // vector operand + visitIncomingValue(IE->getOperand(1)); // scalar operand + } else { + // There is one known class of instructions we know we don't handle. + assert(isa(Current)); + llvm_unreachable("unimplemented instruction case"); } } } - if (TraceLSP) { - errs() << "States after initialization:\n"; - for (auto Pair : states) { - Instruction *v = cast(Pair.first); - PhiState state = Pair.second; - state.dump(); - v->dump(); - } +#ifndef NDEBUG + DEBUG(dbgs() << "States after initialization:\n"); + for (auto Pair : States) { + DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n"); } +#endif - // TODO: come back and revisit the state transitions around inputs which - // have reached conflict state. The current version seems too conservative. + // Return a phi state for a base defining value. We'll generate a new + // base state for known bases and expect to find a cached state otherwise. + auto getStateForBDV = [&](Value *baseValue) { + if (isKnownBaseResult(baseValue)) + return BDVState(baseValue); + auto I = States.find(baseValue); + assert(I != States.end() && "lookup failed!"); + return I->second; + }; bool progress = true; while (progress) { #ifndef NDEBUG - size_t oldSize = states.size(); + const size_t oldSize = States.size(); #endif progress = false; - // We're only changing keys in this loop, thus safe to keep iterators - for (auto Pair : states) { - MeetPhiStates calculateMeet(states); - Value *v = Pair.first; - assert(!isKnownBaseResult(v) && "why did it get added?"); - if (SelectInst *select = dyn_cast(v)) { - calculateMeet.meetWith(findBaseOrBDV(select->getTrueValue(), cache)); - calculateMeet.meetWith(findBaseOrBDV(select->getFalseValue(), cache)); - } else - for (Value *Val : cast(v)->incoming_values()) - calculateMeet.meetWith(findBaseOrBDV(Val, cache)); + // We're only changing values in this loop, thus safe to keep iterators. + // Since this is computing a fixed point, the order of visit does not + // effect the result. TODO: We could use a worklist here and make this run + // much faster. + for (auto Pair : States) { + Value *BDV = Pair.first; + assert(!isKnownBaseResult(BDV) && "why did it get added?"); - PhiState oldState = states[v]; - PhiState newState = calculateMeet.getResult(); + // Given an input value for the current instruction, return a BDVState + // instance which represents the BDV of that value. + auto getStateForInput = [&](Value *V) mutable { + Value *BDV = findBaseOrBDV(V, cache); + return getStateForBDV(BDV); + }; + + MeetBDVStates calculateMeet; + if (SelectInst *select = dyn_cast(BDV)) { + calculateMeet.meetWith(getStateForInput(select->getTrueValue())); + calculateMeet.meetWith(getStateForInput(select->getFalseValue())); + } else if (PHINode *Phi = dyn_cast(BDV)) { + for (Value *Val : Phi->incoming_values()) + calculateMeet.meetWith(getStateForInput(Val)); + } else if (auto *EE = dyn_cast(BDV)) { + // The 'meet' for an extractelement is slightly trivial, but it's still + // useful in that it drives us to conflict if our input is. + calculateMeet.meetWith(getStateForInput(EE->getVectorOperand())); + } else { + // Given there's a inherent type mismatch between the operands, will + // *always* produce Conflict. + auto *IE = cast(BDV); + calculateMeet.meetWith(getStateForInput(IE->getOperand(0))); + calculateMeet.meetWith(getStateForInput(IE->getOperand(1))); + } + + BDVState oldState = States[BDV]; + BDVState newState = calculateMeet.getResult(); if (oldState != newState) { progress = true; - states[v] = newState; + States[BDV] = newState; } } - assert(oldSize <= states.size()); - assert(oldSize == states.size() || progress); + assert(oldSize == States.size() && + "fixed point shouldn't be adding any new nodes to state"); } - if (TraceLSP) { - errs() << "States after meet iteration:\n"; - for (auto Pair : states) { - Instruction *v = cast(Pair.first); - PhiState state = Pair.second; - state.dump(); - v->dump(); - } +#ifndef NDEBUG + DEBUG(dbgs() << "States after meet iteration:\n"); + for (auto Pair : States) { + DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n"); } - +#endif + // Insert Phis for all conflicts - // We want to keep naming deterministic in the loop that follows, so - // sort the keys before iteration. This is useful in allowing us to - // write stable tests. Note that there is no invalidation issue here. - SmallVector Keys; - Keys.reserve(states.size()); - for (auto Pair : states) { - Value *V = Pair.first; - Keys.push_back(V); - } - std::sort(Keys.begin(), Keys.end(), order_by_name); // TODO: adjust naming patterns to avoid this order of iteration dependency - for (Value *V : Keys) { - Instruction *v = cast(V); - PhiState state = states[V]; - assert(!isKnownBaseResult(v) && "why did it get added?"); - assert(!state.isUnknown() && "Optimistic algorithm didn't complete!"); - if (!state.isConflict()) + for (auto Pair : States) { + Instruction *I = cast(Pair.first); + BDVState State = Pair.second; + assert(!isKnownBaseResult(I) && "why did it get added?"); + assert(!State.isUnknown() && "Optimistic algorithm didn't complete!"); + + // extractelement instructions are a bit special in that we may need to + // insert an extract even when we know an exact base for the instruction. + // The problem is that we need to convert from a vector base to a scalar + // base for the particular indice we're interested in. + if (State.isBase() && isa(I) && + isa(State.getBase()->getType())) { + auto *EE = cast(I); + // TODO: In many cases, the new instruction is just EE itself. We should + // exploit this, but can't do it here since it would break the invariant + // about the BDV not being known to be a base. + auto *BaseInst = ExtractElementInst::Create(State.getBase(), + EE->getIndexOperand(), + "base_ee", EE); + BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {})); + States[I] = BDVState(BDVState::Base, BaseInst); + } + + // Since we're joining a vector and scalar base, they can never be the + // same. As a result, we should always see insert element having reached + // the conflict state. + if (isa(I)) { + assert(State.isConflict()); + } + + if (!State.isConflict()) continue; - if (isa(v)) { - int num_preds = - std::distance(pred_begin(v->getParent()), pred_end(v->getParent())); - assert(num_preds > 0 && "how did we reach here"); - PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v); - // Add metadata marking this as a base value - auto *const_1 = ConstantInt::get( - Type::getInt32Ty( - v->getParent()->getParent()->getParent()->getContext()), - 1); - auto MDConst = ConstantAsMetadata::get(const_1); - MDNode *md = MDNode::get( - v->getParent()->getParent()->getParent()->getContext(), MDConst); - phi->setMetadata("is_base_value", md); - states[v] = PhiState(PhiState::Conflict, phi); - } else { - SelectInst *sel = cast(v); - // The undef will be replaced later - UndefValue *undef = UndefValue::get(sel->getType()); - SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef, - undef, "base_select", sel); - // Add metadata marking this as a base value - auto *const_1 = ConstantInt::get( - Type::getInt32Ty( - v->getParent()->getParent()->getParent()->getContext()), - 1); - auto MDConst = ConstantAsMetadata::get(const_1); - MDNode *md = MDNode::get( - v->getParent()->getParent()->getParent()->getContext(), MDConst); - basesel->setMetadata("is_base_value", md); - states[v] = PhiState(PhiState::Conflict, basesel); - } + /// Create and insert a new instruction which will represent the base of + /// the given instruction 'I'. + auto MakeBaseInstPlaceholder = [](Instruction *I) -> Instruction* { + if (isa(I)) { + BasicBlock *BB = I->getParent(); + int NumPreds = std::distance(pred_begin(BB), pred_end(BB)); + assert(NumPreds > 0 && "how did we reach here"); + std::string Name = suffixed_name_or(I, ".base", "base_phi"); + return PHINode::Create(I->getType(), NumPreds, Name, I); + } else if (SelectInst *Sel = dyn_cast(I)) { + // The undef will be replaced later + UndefValue *Undef = UndefValue::get(Sel->getType()); + std::string Name = suffixed_name_or(I, ".base", "base_select"); + return SelectInst::Create(Sel->getCondition(), Undef, + Undef, Name, Sel); + } else if (auto *EE = dyn_cast(I)) { + UndefValue *Undef = UndefValue::get(EE->getVectorOperand()->getType()); + std::string Name = suffixed_name_or(I, ".base", "base_ee"); + return ExtractElementInst::Create(Undef, EE->getIndexOperand(), Name, + EE); + } else { + auto *IE = cast(I); + UndefValue *VecUndef = UndefValue::get(IE->getOperand(0)->getType()); + UndefValue *ScalarUndef = UndefValue::get(IE->getOperand(1)->getType()); + std::string Name = suffixed_name_or(I, ".base", "base_ie"); + return InsertElementInst::Create(VecUndef, ScalarUndef, + IE->getOperand(2), Name, IE); + } + + }; + Instruction *BaseInst = MakeBaseInstPlaceholder(I); + // Add metadata marking this as a base value + BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {})); + States[I] = BDVState(BDVState::Conflict, BaseInst); } - // Fixup all the inputs of the new PHIs - for (auto Pair : states) { - Instruction *v = cast(Pair.first); - PhiState state = Pair.second; + // Returns a instruction which produces the base pointer for a given + // instruction. The instruction is assumed to be an input to one of the BDVs + // seen in the inference algorithm above. As such, we must either already + // know it's base defining value is a base, or have inserted a new + // instruction to propagate the base of it's BDV and have entered that newly + // introduced instruction into the state table. In either case, we are + // assured to be able to determine an instruction which produces it's base + // pointer. + auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) { + Value *BDV = findBaseOrBDV(Input, cache); + Value *Base = nullptr; + if (isKnownBaseResult(BDV)) { + Base = BDV; + } else { + // Either conflict or base. + assert(States.count(BDV)); + Base = States[BDV].getBase(); + } + assert(Base && "can't be null"); + // The cast is needed since base traversal may strip away bitcasts + if (Base->getType() != Input->getType() && + InsertPt) { + Base = new BitCastInst(Base, Input->getType(), "cast", + InsertPt); + } + return Base; + }; - assert(!isKnownBaseResult(v) && "why did it get added?"); - assert(!state.isUnknown() && "Optimistic algorithm didn't complete!"); - if (!state.isConflict()) + // Fixup all the inputs of the new PHIs. Visit order needs to be + // deterministic and predictable because we're naming newly created + // instructions. + for (auto Pair : States) { + Instruction *BDV = cast(Pair.first); + BDVState State = Pair.second; + + assert(!isKnownBaseResult(BDV) && "why did it get added?"); + assert(!State.isUnknown() && "Optimistic algorithm didn't complete!"); + if (!State.isConflict()) continue; - if (PHINode *basephi = dyn_cast(state.getBase())) { - PHINode *phi = cast(v); + if (PHINode *basephi = dyn_cast(State.getBase())) { + PHINode *phi = cast(BDV); unsigned NumPHIValues = phi->getNumIncomingValues(); for (unsigned i = 0; i < NumPHIValues; i++) { Value *InVal = phi->getIncomingValue(i); @@ -906,104 +1002,145 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) { if (blockIndex != -1) { Value *oldBase = basephi->getIncomingValue(blockIndex); basephi->addIncoming(oldBase, InBB); + #ifndef NDEBUG - Value *base = findBaseOrBDV(InVal, cache); - if (!isKnownBaseResult(base)) { - // Either conflict or base. - assert(states.count(base)); - base = states[base].getBase(); - assert(base != nullptr && "unknown PhiState!"); - } - - // In essense this assert states: the only way two + Value *Base = getBaseForInput(InVal, nullptr); + // In essence this assert states: the only way two // values incoming from the same basic block may be // different is by being different bitcasts of the same // value. A cleanup that remains TODO is changing // findBaseOrBDV to return an llvm::Value of the correct // type (and still remain pure). This will remove the // need to add bitcasts. - assert(base->stripPointerCasts() == oldBase->stripPointerCasts() && + assert(Base->stripPointerCasts() == oldBase->stripPointerCasts() && "sanity -- findBaseOrBDV should be pure!"); #endif continue; } - // Find either the defining value for the PHI or the normal base for - // a non-phi node - Value *base = findBaseOrBDV(InVal, cache); - if (!isKnownBaseResult(base)) { - // Either conflict or base. - assert(states.count(base)); - base = states[base].getBase(); - assert(base != nullptr && "unknown PhiState!"); - } - assert(base && "can't be null"); - // Must use original input BB since base may not be Instruction - // The cast is needed since base traversal may strip away bitcasts - if (base->getType() != basephi->getType()) { - base = new BitCastInst(base, basephi->getType(), "cast", - InBB->getTerminator()); - } - basephi->addIncoming(base, InBB); + // Find the instruction which produces the base for each input. We may + // need to insert a bitcast in the incoming block. + // TODO: Need to split critical edges if insertion is needed + Value *Base = getBaseForInput(InVal, InBB->getTerminator()); + basephi->addIncoming(Base, InBB); } assert(basephi->getNumIncomingValues() == NumPHIValues); - } else { - SelectInst *basesel = cast(state.getBase()); - SelectInst *sel = cast(v); + } else if (SelectInst *BaseSel = dyn_cast(State.getBase())) { + SelectInst *Sel = cast(BDV); // Operand 1 & 2 are true, false path respectively. TODO: refactor to // something more safe and less hacky. for (int i = 1; i <= 2; i++) { - Value *InVal = sel->getOperand(i); - // Find either the defining value for the PHI or the normal base for - // a non-phi node - Value *base = findBaseOrBDV(InVal, cache); - if (!isKnownBaseResult(base)) { - // Either conflict or base. - assert(states.count(base)); - base = states[base].getBase(); - assert(base != nullptr && "unknown PhiState!"); - } - assert(base && "can't be null"); - // Must use original input BB since base may not be Instruction - // The cast is needed since base traversal may strip away bitcasts - if (base->getType() != basesel->getType()) { - base = new BitCastInst(base, basesel->getType(), "cast", basesel); - } - basesel->setOperand(i, base); + Value *InVal = Sel->getOperand(i); + // Find the instruction which produces the base for each input. We may + // need to insert a bitcast. + Value *Base = getBaseForInput(InVal, BaseSel); + BaseSel->setOperand(i, Base); } + } else if (auto *BaseEE = dyn_cast(State.getBase())) { + Value *InVal = cast(BDV)->getVectorOperand(); + // Find the instruction which produces the base for each input. We may + // need to insert a bitcast. + Value *Base = getBaseForInput(InVal, BaseEE); + BaseEE->setOperand(0, Base); + } else { + auto *BaseIE = cast(State.getBase()); + auto *BdvIE = cast(BDV); + auto UpdateOperand = [&](int OperandIdx) { + Value *InVal = BdvIE->getOperand(OperandIdx); + Value *Base = getBaseForInput(InVal, BaseIE); + BaseIE->setOperand(OperandIdx, Base); + }; + UpdateOperand(0); // vector operand + UpdateOperand(1); // scalar operand + } + + } + + // Now that we're done with the algorithm, see if we can optimize the + // results slightly by reducing the number of new instructions needed. + // Arguably, this should be integrated into the algorithm above, but + // doing as a post process step is easier to reason about for the moment. + DenseMap ReverseMap; + SmallPtrSet NewInsts; + SmallSetVector, 16> Worklist; + // Note: We need to visit the states in a deterministic order. We uses the + // Keys we sorted above for this purpose. Note that we are papering over a + // bigger problem with the algorithm above - it's visit order is not + // deterministic. A larger change is needed to fix this. + for (auto Pair : States) { + auto *BDV = Pair.first; + auto State = Pair.second; + Value *Base = State.getBase(); + assert(BDV && Base); + assert(!isKnownBaseResult(BDV) && "why did it get added?"); + assert(isKnownBaseResult(Base) && + "must be something we 'know' is a base pointer"); + if (!State.isConflict()) + continue; + + ReverseMap[Base] = BDV; + if (auto *BaseI = dyn_cast(Base)) { + NewInsts.insert(BaseI); + Worklist.insert(BaseI); + } + } + auto ReplaceBaseInstWith = [&](Value *BDV, Instruction *BaseI, + Value *Replacement) { + // Add users which are new instructions (excluding self references) + for (User *U : BaseI->users()) + if (auto *UI = dyn_cast(U)) + if (NewInsts.count(UI) && UI != BaseI) + Worklist.insert(UI); + // Then do the actual replacement + NewInsts.erase(BaseI); + ReverseMap.erase(BaseI); + BaseI->replaceAllUsesWith(Replacement); + assert(States.count(BDV)); + assert(States[BDV].isConflict() && States[BDV].getBase() == BaseI); + States[BDV] = BDVState(BDVState::Conflict, Replacement); + BaseI->eraseFromParent(); + }; + const DataLayout &DL = cast(def)->getModule()->getDataLayout(); + while (!Worklist.empty()) { + Instruction *BaseI = Worklist.pop_back_val(); + assert(NewInsts.count(BaseI)); + Value *Bdv = ReverseMap[BaseI]; + if (auto *BdvI = dyn_cast(Bdv)) + if (BaseI->isIdenticalTo(BdvI)) { + DEBUG(dbgs() << "Identical Base: " << *BaseI << "\n"); + ReplaceBaseInstWith(Bdv, BaseI, Bdv); + continue; + } + if (Value *V = SimplifyInstruction(BaseI, DL)) { + DEBUG(dbgs() << "Base " << *BaseI << " simplified to " << *V << "\n"); + ReplaceBaseInstWith(Bdv, BaseI, V); + continue; } } // Cache all of our results so we can cheaply reuse them // NOTE: This is actually two caches: one of the base defining value // relation and one of the base pointer relation! FIXME - for (auto item : states) { - Value *v = item.first; - Value *base = item.second.getBase(); - assert(v && base); - assert(!isKnownBaseResult(v) && "why did it get added?"); + for (auto Pair : States) { + auto *BDV = Pair.first; + Value *base = Pair.second.getBase(); + assert(BDV && base); - if (TraceLSP) { - std::string fromstr = - cache.count(v) ? (cache[v]->hasName() ? cache[v]->getName() : "") - : "none"; - errs() << "Updating base value cache" - << " for: " << (v->hasName() ? v->getName() : "") - << " from: " << fromstr - << " to: " << (base->hasName() ? base->getName() : "") << "\n"; - } + std::string fromstr = cache.count(BDV) ? cache[BDV]->getName() : "none"; + DEBUG(dbgs() << "Updating base value cache" + << " for: " << BDV->getName() + << " from: " << fromstr + << " to: " << base->getName() << "\n"); - assert(isKnownBaseResult(base) && - "must be something we 'know' is a base pointer"); - if (cache.count(v)) { + if (cache.count(BDV)) { // Once we transition from the BDV relation being store in the cache to // the base relation being stored, it must be stable - assert((!isKnownBaseResult(cache[v]) || cache[v] == base) && + assert((!isKnownBaseResult(cache[BDV]) || cache[BDV] == base) && "base relation should be stable"); } - cache[v] = base; + cache[BDV] = base; } - assert(cache.find(def) != cache.end()); + assert(cache.count(def)); return cache[def]; } @@ -1024,7 +1161,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) { // pointer was a base pointer. static void findBasePointers(const StatepointLiveSetTy &live, - DenseMap &PointerToBase, + DenseMap &PointerToBase, DominatorTree *DT, DefiningValueMapTy &DVCache) { // For the naming of values inserted to be deterministic - which makes for // much cleaner and more stable tests - we need to assign an order to the @@ -1043,7 +1180,7 @@ findBasePointers(const StatepointLiveSetTy &live, // If you see this trip and like to live really dangerously, the code should // be correct, just with idioms the verifier can't handle. You can try - // disabling the verifier at your own substaintial risk. + // disabling the verifier at your own substantial risk. assert(!isa(base) && "the relocation code needs adjustment to handle the relocation of " "a null pointer constant without causing false positives in the " @@ -1056,8 +1193,8 @@ findBasePointers(const StatepointLiveSetTy &live, static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, const CallSite &CS, PartiallyConstructedSafepointRecord &result) { - DenseMap PointerToBase; - findBasePointers(result.liveset, PointerToBase, &DT, DVCache); + DenseMap PointerToBase; + findBasePointers(result.LiveSet, PointerToBase, &DT, DVCache); if (PrintBasePointers) { // Note: Need to print these in a stable order since this is checked in @@ -1071,8 +1208,11 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, std::sort(Temp.begin(), Temp.end(), order_by_name); for (Value *Ptr : Temp) { Value *Base = PointerToBase[Ptr]; - errs() << " derived %" << Ptr->getName() << " base %" << Base->getName() - << "\n"; + errs() << " derived "; + Ptr->printAsOperand(errs(), false); + errs() << " base "; + Base->printAsOperand(errs(), false); + errs() << "\n";; } } @@ -1086,10 +1226,10 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, PartiallyConstructedSafepointRecord &result); static void recomputeLiveInValues( - Function &F, DominatorTree &DT, Pass *P, ArrayRef toUpdate, + Function &F, DominatorTree &DT, ArrayRef toUpdate, MutableArrayRef records) { // TODO-PERF: reuse the original liveness, then simply run the dataflow - // again. The old values are still live and will help it stablize quickly. + // again. The old values are still live and will help it stabilize quickly. GCPtrLivenessData RevisedLivenessData; computeLiveInValues(DT, F, RevisedLivenessData); for (size_t i = 0; i < records.size(); i++) { @@ -1099,69 +1239,66 @@ static void recomputeLiveInValues( } } -// When inserting gc.relocate calls, we need to ensure there are no uses -// of the original value between the gc.statepoint and the gc.relocate call. -// One case which can arise is a phi node starting one of the successor blocks. -// We also need to be able to insert the gc.relocates only on the path which -// goes through the statepoint. We might need to split an edge to make this -// possible. +// When inserting gc.relocate and gc.result calls, we need to ensure there are +// no uses of the original value / return value between the gc.statepoint and +// the gc.relocate / gc.result call. One case which can arise is a phi node +// starting one of the successor blocks. We also need to be able to insert the +// gc.relocates only on the path which goes through the statepoint. We might +// need to split an edge to make this possible. static BasicBlock * normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, DominatorTree &DT) { BasicBlock *Ret = BB; - if (!BB->getUniquePredecessor()) { - Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, &DT); - } + if (!BB->getUniquePredecessor()) + Ret = SplitBlockPredecessors(BB, InvokeParent, "", &DT); - // Now that 'ret' has unique predecessor we can safely remove all phi nodes + // Now that 'Ret' has unique predecessor we can safely remove all phi nodes // from it FoldSingleEntryPHINodes(Ret); - assert(!isa(Ret->begin())); + assert(!isa(Ret->begin()) && + "All PHI nodes should have been removed!"); - // At this point, we can safely insert a gc.relocate as the first instruction - // in Ret if needed. + // At this point, we can safely insert a gc.relocate or gc.result as the first + // instruction in Ret if needed. return Ret; } -static int find_index(ArrayRef livevec, Value *val) { - auto itr = std::find(livevec.begin(), livevec.end(), val); - assert(livevec.end() != itr); - size_t index = std::distance(livevec.begin(), itr); - assert(index < livevec.size()); - return index; -} - -// Create new attribute set containing only attributes which can be transfered +// Create new attribute set containing only attributes which can be transferred // from original call to the safepoint. static AttributeSet legalizeCallAttributes(AttributeSet AS) { - AttributeSet ret; + AttributeSet Ret; for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) { - unsigned index = AS.getSlotIndex(Slot); + unsigned Index = AS.getSlotIndex(Slot); - if (index == AttributeSet::ReturnIndex || - index == AttributeSet::FunctionIndex) { + if (Index == AttributeSet::ReturnIndex || + Index == AttributeSet::FunctionIndex) { - for (auto it = AS.begin(Slot), it_end = AS.end(Slot); it != it_end; - ++it) { - Attribute attr = *it; + for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) { // Do not allow certain attributes - just skip them // Safepoint can not be read only or read none. - if (attr.hasAttribute(Attribute::ReadNone) || - attr.hasAttribute(Attribute::ReadOnly)) + if (Attr.hasAttribute(Attribute::ReadNone) || + Attr.hasAttribute(Attribute::ReadOnly)) continue; - ret = ret.addAttributes( - AS.getContext(), index, - AttributeSet::get(AS.getContext(), index, AttrBuilder(attr))); + // These attributes control the generation of the gc.statepoint call / + // invoke itself; and once the gc.statepoint is in place, they're of no + // use. + if (Attr.hasAttribute("statepoint-num-patch-bytes") || + Attr.hasAttribute("statepoint-id")) + continue; + + Ret = Ret.addAttributes( + AS.getContext(), Index, + AttributeSet::get(AS.getContext(), Index, AttrBuilder(Attr))); } } // Just skip parameter attributes for now } - return ret; + return Ret; } /// Helper function to place all gc relocates necessary for the given @@ -1173,225 +1310,290 @@ static AttributeSet legalizeCallAttributes(AttributeSet AS) { /// statepointToken - statepoint instruction to which relocates should be /// bound. /// Builder - Llvm IR builder to be used to construct new calls. -static void CreateGCRelocates(ArrayRef LiveVariables, +static void CreateGCRelocates(ArrayRef LiveVariables, const int LiveStart, - ArrayRef BasePtrs, + ArrayRef BasePtrs, Instruction *StatepointToken, IRBuilder<> Builder) { - SmallVector NewDefs; - NewDefs.reserve(LiveVariables.size()); + if (LiveVariables.empty()) + return; - Module *M = StatepointToken->getParent()->getParent()->getParent(); + auto FindIndex = [](ArrayRef LiveVec, Value *Val) { + auto ValIt = std::find(LiveVec.begin(), LiveVec.end(), Val); + assert(ValIt != LiveVec.end() && "Val not found in LiveVec!"); + size_t Index = std::distance(LiveVec.begin(), ValIt); + assert(Index < LiveVec.size() && "Bug in std::find?"); + return Index; + }; + + // All gc_relocate are set to i8 addrspace(1)* type. We originally generated + // unique declarations for each pointer type, but this proved problematic + // because the intrinsic mangling code is incomplete and fragile. Since + // we're moving towards a single unified pointer type anyways, we can just + // cast everything to an i8* of the right address space. A bitcast is added + // later to convert gc_relocate to the actual value's type. + Module *M = StatepointToken->getModule(); + auto AS = cast(LiveVariables[0]->getType())->getAddressSpace(); + Type *Types[] = {Type::getInt8PtrTy(M->getContext(), AS)}; + Value *GCRelocateDecl = + Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types); for (unsigned i = 0; i < LiveVariables.size(); i++) { - // We generate a (potentially) unique declaration for every pointer type - // combination. This results is some blow up the function declarations in - // the IR, but removes the need for argument bitcasts which shrinks the IR - // greatly and makes it much more readable. - SmallVector Types; // one per 'any' type - // All gc_relocate are set to i8 addrspace(1)* type. This could help avoid - // cases where the actual value's type mangling is not supported by llvm. A - // bitcast is added later to convert gc_relocate to the actual value's type. - Types.push_back(Type::getInt8PtrTy(M->getContext(), 1)); - Value *GCRelocateDecl = Intrinsic::getDeclaration( - M, Intrinsic::experimental_gc_relocate, Types); - // Generate the gc.relocate call and save the result Value *BaseIdx = - ConstantInt::get(Type::getInt32Ty(M->getContext()), - LiveStart + find_index(LiveVariables, BasePtrs[i])); - Value *LiveIdx = ConstantInt::get( - Type::getInt32Ty(M->getContext()), - LiveStart + find_index(LiveVariables, LiveVariables[i])); + Builder.getInt32(LiveStart + FindIndex(LiveVariables, BasePtrs[i])); + Value *LiveIdx = Builder.getInt32(LiveStart + i); // only specify a debug name if we can give a useful one - Value *Reloc = Builder.CreateCall( + CallInst *Reloc = Builder.CreateCall( GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx}, - LiveVariables[i]->hasName() ? LiveVariables[i]->getName() + ".relocated" - : ""); + suffixed_name_or(LiveVariables[i], ".relocated", "")); // Trick CodeGen into thinking there are lots of free registers at this // fake call. - cast(Reloc)->setCallingConv(CallingConv::Cold); - - NewDefs.push_back(cast(Reloc)); + Reloc->setCallingConv(CallingConv::Cold); } - assert(NewDefs.size() == LiveVariables.size() && - "missing or extra redefinition at safepoint"); +} + +namespace { + +/// This struct is used to defer RAUWs and `eraseFromParent` s. Using this +/// avoids having to worry about keeping around dangling pointers to Values. +class DeferredReplacement { + AssertingVH Old; + AssertingVH New; + +public: + explicit DeferredReplacement(Instruction *Old, Instruction *New) : + Old(Old), New(New) { + assert(Old != New && "Not allowed!"); + } + + /// Does the task represented by this instance. + void doReplacement() { + Instruction *OldI = Old; + Instruction *NewI = New; + + assert(OldI != NewI && "Disallowed at construction?!"); + + Old = nullptr; + New = nullptr; + + if (NewI) + OldI->replaceAllUsesWith(NewI); + OldI->eraseFromParent(); + } +}; } static void -makeStatepointExplicitImpl(const CallSite &CS, /* to replace */ - const SmallVectorImpl &basePtrs, - const SmallVectorImpl &liveVariables, - Pass *P, - PartiallyConstructedSafepointRecord &result) { - assert(basePtrs.size() == liveVariables.size()); - assert(isStatepoint(CS) && +makeStatepointExplicitImpl(const CallSite CS, /* to replace */ + const SmallVectorImpl &BasePtrs, + const SmallVectorImpl &LiveVariables, + PartiallyConstructedSafepointRecord &Result, + std::vector &Replacements) { + assert(BasePtrs.size() == LiveVariables.size()); + assert((UseDeoptBundles || isStatepoint(CS)) && "This method expects to be rewriting a statepoint"); - BasicBlock *BB = CS.getInstruction()->getParent(); - assert(BB); - Function *F = BB->getParent(); - assert(F && "must be set"); - Module *M = F->getParent(); - (void)M; - assert(M && "must be set"); - - // We're not changing the function signature of the statepoint since the gc - // arguments go into the var args section. - Function *gc_statepoint_decl = CS.getCalledFunction(); - // Then go ahead and use the builder do actually do the inserts. We insert // immediately before the previous instruction under the assumption that all // arguments will be available here. We can't insert afterwards since we may // be replacing a terminator. - Instruction *insertBefore = CS.getInstruction(); - IRBuilder<> Builder(insertBefore); - // Copy all of the arguments from the original statepoint - this includes the - // target, call args, and deopt args - SmallVector args; - args.insert(args.end(), CS.arg_begin(), CS.arg_end()); - // TODO: Clear the 'needs rewrite' flag + Instruction *InsertBefore = CS.getInstruction(); + IRBuilder<> Builder(InsertBefore); - // add all the pointers to be relocated (gc arguments) - // Capture the start of the live variable list for use in the gc_relocates - const int live_start = args.size(); - args.insert(args.end(), liveVariables.begin(), liveVariables.end()); + ArrayRef GCArgs(LiveVariables); + uint64_t StatepointID = 0xABCDEF00; + uint32_t NumPatchBytes = 0; + uint32_t Flags = uint32_t(StatepointFlags::None); + + ArrayRef CallArgs; + ArrayRef DeoptArgs; + ArrayRef TransitionArgs; + + Value *CallTarget = nullptr; + + if (UseDeoptBundles) { + CallArgs = {CS.arg_begin(), CS.arg_end()}; + DeoptArgs = GetDeoptBundleOperands(CS); + // TODO: we don't fill in TransitionArgs or Flags in this branch, but we + // could have an operand bundle for that too. + AttributeSet OriginalAttrs = CS.getAttributes(); + + Attribute AttrID = OriginalAttrs.getAttribute(AttributeSet::FunctionIndex, + "statepoint-id"); + if (AttrID.isStringAttribute()) + AttrID.getValueAsString().getAsInteger(10, StatepointID); + + Attribute AttrNumPatchBytes = OriginalAttrs.getAttribute( + AttributeSet::FunctionIndex, "statepoint-num-patch-bytes"); + if (AttrNumPatchBytes.isStringAttribute()) + AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes); + + CallTarget = CS.getCalledValue(); + } else { + // This branch will be gone soon, and we will soon only support the + // UseDeoptBundles == true configuration. + Statepoint OldSP(CS); + StatepointID = OldSP.getID(); + NumPatchBytes = OldSP.getNumPatchBytes(); + Flags = OldSP.getFlags(); + + CallArgs = {OldSP.arg_begin(), OldSP.arg_end()}; + DeoptArgs = {OldSP.vm_state_begin(), OldSP.vm_state_end()}; + TransitionArgs = {OldSP.gc_transition_args_begin(), + OldSP.gc_transition_args_end()}; + CallTarget = OldSP.getCalledValue(); + } // Create the statepoint given all the arguments - Instruction *token = nullptr; - AttributeSet return_attributes; + Instruction *Token = nullptr; + AttributeSet ReturnAttrs; if (CS.isCall()) { - CallInst *toReplace = cast(CS.getInstruction()); - CallInst *call = - Builder.CreateCall(gc_statepoint_decl, args, "safepoint_token"); - call->setTailCall(toReplace->isTailCall()); - call->setCallingConv(toReplace->getCallingConv()); + CallInst *ToReplace = cast(CS.getInstruction()); + CallInst *Call = Builder.CreateGCStatepointCall( + StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs, + TransitionArgs, DeoptArgs, GCArgs, "safepoint_token"); + + Call->setTailCall(ToReplace->isTailCall()); + Call->setCallingConv(ToReplace->getCallingConv()); // Currently we will fail on parameter attributes and on certain // function attributes. - AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes()); - // In case if we can handle this set of sttributes - set up function attrs + AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes()); + // In case if we can handle this set of attributes - set up function attrs // directly on statepoint and return attrs later for gc_result intrinsic. - call->setAttributes(new_attrs.getFnAttributes()); - return_attributes = new_attrs.getRetAttributes(); + Call->setAttributes(NewAttrs.getFnAttributes()); + ReturnAttrs = NewAttrs.getRetAttributes(); - token = call; + Token = Call; // Put the following gc_result and gc_relocate calls immediately after the // the old call (which we're about to delete) - BasicBlock::iterator next(toReplace); - assert(BB->end() != next && "not a terminator, must have next"); - next++; - Instruction *IP = &*(next); - Builder.SetInsertPoint(IP); - Builder.SetCurrentDebugLocation(IP->getDebugLoc()); - + assert(ToReplace->getNextNode() && "Not a terminator, must have next!"); + Builder.SetInsertPoint(ToReplace->getNextNode()); + Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc()); } else { - InvokeInst *toReplace = cast(CS.getInstruction()); + InvokeInst *ToReplace = cast(CS.getInstruction()); // Insert the new invoke into the old block. We'll remove the old one in a // moment at which point this will become the new terminator for the // original block. - InvokeInst *invoke = InvokeInst::Create( - gc_statepoint_decl, toReplace->getNormalDest(), - toReplace->getUnwindDest(), args, "", toReplace->getParent()); - invoke->setCallingConv(toReplace->getCallingConv()); + InvokeInst *Invoke = Builder.CreateGCStatepointInvoke( + StatepointID, NumPatchBytes, CallTarget, ToReplace->getNormalDest(), + ToReplace->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs, + GCArgs, "statepoint_token"); + + Invoke->setCallingConv(ToReplace->getCallingConv()); // Currently we will fail on parameter attributes and on certain // function attributes. - AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes()); - // In case if we can handle this set of sttributes - set up function attrs + AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes()); + // In case if we can handle this set of attributes - set up function attrs // directly on statepoint and return attrs later for gc_result intrinsic. - invoke->setAttributes(new_attrs.getFnAttributes()); - return_attributes = new_attrs.getRetAttributes(); + Invoke->setAttributes(NewAttrs.getFnAttributes()); + ReturnAttrs = NewAttrs.getRetAttributes(); - token = invoke; + Token = Invoke; // Generate gc relocates in exceptional path - BasicBlock *unwindBlock = toReplace->getUnwindDest(); - assert(!isa(unwindBlock->begin()) && - unwindBlock->getUniquePredecessor() && + BasicBlock *UnwindBlock = ToReplace->getUnwindDest(); + assert(!isa(UnwindBlock->begin()) && + UnwindBlock->getUniquePredecessor() && "can't safely insert in this block!"); - Instruction *IP = &*(unwindBlock->getFirstInsertionPt()); - Builder.SetInsertPoint(IP); - Builder.SetCurrentDebugLocation(toReplace->getDebugLoc()); + Builder.SetInsertPoint(&*UnwindBlock->getFirstInsertionPt()); + Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); - // Extract second element from landingpad return value. We will attach - // exceptional gc relocates to it. - const unsigned idx = 1; - Instruction *exceptional_token = - cast(Builder.CreateExtractValue( - unwindBlock->getLandingPadInst(), idx, "relocate_token")); - result.UnwindToken = exceptional_token; + // Attach exceptional gc relocates to the landingpad. + Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst(); + Result.UnwindToken = ExceptionalToken; - // Just throw away return value. We will use the one we got for normal - // block. - (void)CreateGCRelocates(liveVariables, live_start, basePtrs, - exceptional_token, Builder); + const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx(); + CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, ExceptionalToken, + Builder); // Generate gc relocates and returns for normal block - BasicBlock *normalDest = toReplace->getNormalDest(); - assert(!isa(normalDest->begin()) && - normalDest->getUniquePredecessor() && + BasicBlock *NormalDest = ToReplace->getNormalDest(); + assert(!isa(NormalDest->begin()) && + NormalDest->getUniquePredecessor() && "can't safely insert in this block!"); - IP = &*(normalDest->getFirstInsertionPt()); - Builder.SetInsertPoint(IP); + Builder.SetInsertPoint(&*NormalDest->getFirstInsertionPt()); // gc relocates will be generated later as if it were regular call // statepoint } - assert(token); + assert(Token && "Should be set in one of the above branches!"); - // Take the name of the original value call if it had one. - token->takeName(CS.getInstruction()); + if (UseDeoptBundles) { + Token->setName("statepoint_token"); + if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) { + StringRef Name = + CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : ""; + CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), Name); + GCResult->setAttributes(CS.getAttributes().getRetAttributes()); -// The GCResult is already inserted, we just need to find it -#ifndef NDEBUG - Instruction *toReplace = CS.getInstruction(); - assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) && - "only valid use before rewrite is gc.result"); - assert(!toReplace->hasOneUse() || - isGCResult(cast(*toReplace->user_begin()))); -#endif + // We cannot RAUW or delete CS.getInstruction() because it could be in the + // live set of some other safepoint, in which case that safepoint's + // PartiallyConstructedSafepointRecord will hold a raw pointer to this + // llvm::Instruction. Instead, we defer the replacement and deletion to + // after the live sets have been made explicit in the IR, and we no longer + // have raw pointers to worry about. + Replacements.emplace_back(CS.getInstruction(), GCResult); + } else { + Replacements.emplace_back(CS.getInstruction(), nullptr); + } + } else { + assert(!CS.getInstruction()->hasNUsesOrMore(2) && + "only valid use before rewrite is gc.result"); + assert(!CS.getInstruction()->hasOneUse() || + isGCResult(cast(*CS.getInstruction()->user_begin()))); - // Update the gc.result of the original statepoint (if any) to use the newly - // inserted statepoint. This is safe to do here since the token can't be - // considered a live reference. - CS.getInstruction()->replaceAllUsesWith(token); + // Take the name of the original statepoint token if there was one. + Token->takeName(CS.getInstruction()); - result.StatepointToken = token; + // Update the gc.result of the original statepoint (if any) to use the newly + // inserted statepoint. This is safe to do here since the token can't be + // considered a live reference. + CS.getInstruction()->replaceAllUsesWith(Token); + CS.getInstruction()->eraseFromParent(); + } + + Result.StatepointToken = Token; // Second, create a gc.relocate for every live variable - CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder); + const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx(); + CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, Token, Builder); } namespace { -struct name_ordering { - Value *base; - Value *derived; - bool operator()(name_ordering const &a, name_ordering const &b) { - return -1 == a.derived->getName().compare(b.derived->getName()); +struct NameOrdering { + Value *Base; + Value *Derived; + + bool operator()(NameOrdering const &a, NameOrdering const &b) { + return -1 == a.Derived->getName().compare(b.Derived->getName()); } }; } -static void stablize_order(SmallVectorImpl &basevec, - SmallVectorImpl &livevec) { - assert(basevec.size() == livevec.size()); - SmallVector temp; - for (size_t i = 0; i < basevec.size(); i++) { - name_ordering v; - v.base = basevec[i]; - v.derived = livevec[i]; - temp.push_back(v); +static void StabilizeOrder(SmallVectorImpl &BaseVec, + SmallVectorImpl &LiveVec) { + assert(BaseVec.size() == LiveVec.size()); + + SmallVector Temp; + for (size_t i = 0; i < BaseVec.size(); i++) { + NameOrdering v; + v.Base = BaseVec[i]; + v.Derived = LiveVec[i]; + Temp.push_back(v); } - std::sort(temp.begin(), temp.end(), name_ordering()); - for (size_t i = 0; i < basevec.size(); i++) { - basevec[i] = temp[i].base; - livevec[i] = temp[i].derived; + + std::sort(Temp.begin(), Temp.end(), NameOrdering()); + for (size_t i = 0; i < BaseVec.size(); i++) { + BaseVec[i] = Temp[i].Base; + LiveVec[i] = Temp[i].Derived; } } @@ -1401,40 +1603,39 @@ static void stablize_order(SmallVectorImpl &basevec, // WARNING: Does not do any fixup to adjust users of the original live // values. That's the callers responsibility. static void -makeStatepointExplicit(DominatorTree &DT, const CallSite &CS, Pass *P, - PartiallyConstructedSafepointRecord &result) { - auto liveset = result.liveset; - auto PointerToBase = result.PointerToBase; +makeStatepointExplicit(DominatorTree &DT, const CallSite &CS, + PartiallyConstructedSafepointRecord &Result, + std::vector &Replacements) { + const auto &LiveSet = Result.LiveSet; + const auto &PointerToBase = Result.PointerToBase; // Convert to vector for efficient cross referencing. - SmallVector basevec, livevec; - livevec.reserve(liveset.size()); - basevec.reserve(liveset.size()); - for (Value *L : liveset) { - livevec.push_back(L); - - assert(PointerToBase.find(L) != PointerToBase.end()); - Value *base = PointerToBase[L]; - basevec.push_back(base); + SmallVector BaseVec, LiveVec; + LiveVec.reserve(LiveSet.size()); + BaseVec.reserve(LiveSet.size()); + for (Value *L : LiveSet) { + LiveVec.push_back(L); + assert(PointerToBase.count(L)); + Value *Base = PointerToBase.find(L)->second; + BaseVec.push_back(Base); } - assert(livevec.size() == basevec.size()); + assert(LiveVec.size() == BaseVec.size()); // To make the output IR slightly more stable (for use in diffs), ensure a // fixed order of the values in the safepoint (by sorting the value name). // The order is otherwise meaningless. - stablize_order(basevec, livevec); + StabilizeOrder(BaseVec, LiveVec); // Do the actual rewriting and delete the old statepoint - makeStatepointExplicitImpl(CS, basevec, livevec, P, result); - CS.getInstruction()->eraseFromParent(); + makeStatepointExplicitImpl(CS, BaseVec, LiveVec, Result, Replacements); } // Helper function for the relocationViaAlloca. -// It receives iterator to the statepoint gc relocates and emits store to the -// assigned -// location (via allocaMap) for the each one of them. -// Add visited values into the visitedLiveValues set we will later use them -// for sanity check. +// +// It receives iterator to the statepoint gc relocates and emits a store to the +// assigned location (via allocaMap) for the each one of them. It adds the +// visited values into the visitedLiveValues set, which we will later use them +// for sanity checking. static void insertRelocationStores(iterator_range GCRelocs, DenseMap &AllocaMap, @@ -1459,13 +1660,15 @@ insertRelocationStores(iterator_range GCRelocs, Value *Alloca = AllocaMap[OriginalValue]; // Emit store into the related alloca - // All gc_relocate are i8 addrspace(1)* typed, and it must be bitcasted to + // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to // the correct type according to alloca. - assert(RelocatedValue->getNextNode() && "Should always have one since it's not a terminator"); + assert(RelocatedValue->getNextNode() && + "Should always have one since it's not a terminator"); IRBuilder<> Builder(RelocatedValue->getNextNode()); Value *CastedRelocatedValue = - Builder.CreateBitCast(RelocatedValue, cast(Alloca)->getAllocatedType(), - RelocatedValue->hasName() ? RelocatedValue->getName() + ".casted" : ""); + Builder.CreateBitCast(RelocatedValue, + cast(Alloca)->getAllocatedType(), + suffixed_name_or(RelocatedValue, ".casted", "")); StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca); Store->insertAfter(cast(CastedRelocatedValue)); @@ -1501,10 +1704,10 @@ insertRematerializationStores( } } -/// do all the relocation update via allocas and mem2reg +/// Do all the relocation update via allocas and mem2reg static void relocationViaAlloca( Function &F, DominatorTree &DT, ArrayRef Live, - ArrayRef Records) { + ArrayRef Records) { #ifndef NDEBUG // record initial number of (static) allocas; we'll check we have the same // number when we get done. @@ -1531,15 +1734,12 @@ static void relocationViaAlloca( PromotableAllocas.push_back(Alloca); }; - // emit alloca for each live gc pointer - for (unsigned i = 0; i < Live.size(); i++) { - emitAllocaFor(Live[i]); - } - - // emit allocas for rematerialized values - for (size_t i = 0; i < Records.size(); i++) { - const struct PartiallyConstructedSafepointRecord &Info = Records[i]; + // Emit alloca for each live gc pointer + for (Value *V : Live) + emitAllocaFor(V); + // Emit allocas for rematerialized values + for (const auto &Info : Records) for (auto RematerializedValuePair : Info.RematerializedValues) { Value *OriginalValue = RematerializedValuePair.second; if (AllocaMap.count(OriginalValue) != 0) @@ -1548,20 +1748,17 @@ static void relocationViaAlloca( emitAllocaFor(OriginalValue); ++NumRematerializedValues; } - } // The next two loops are part of the same conceptual operation. We need to // insert a store to the alloca after the original def and at each // redefinition. We need to insert a load before each use. These are split // into distinct loops for performance reasons. - // update gc pointer after each statepoint - // either store a relocated value or null (if no relocated value found for - // this gc pointer and it is not a gc_result) - // this must happen before we update the statepoint with load of alloca - // otherwise we lose the link between statepoint and old def - for (size_t i = 0; i < Records.size(); i++) { - const struct PartiallyConstructedSafepointRecord &Info = Records[i]; + // Update gc pointer after each statepoint: either store a relocated value or + // null (if no relocated value was found for this gc pointer and it is not a + // gc_result). This must happen before we update the statepoint with load of + // alloca otherwise we lose the link between statepoint and old def. + for (const auto &Info : Records) { Value *Statepoint = Info.StatepointToken; // This will be used for consistency check @@ -1582,7 +1779,7 @@ static void relocationViaAlloca( VisitedLiveValues); if (ClobberNonLive) { - // As a debuging aid, pretend that an unrelocated pointer becomes null at + // As a debugging aid, pretend that an unrelocated pointer becomes null at // the gc.statepoint. This will turn some subtle GC problems into // slightly easier to debug SEGVs. Note that on large IR files with // lots of gc.statepoints this is extremely costly both memory and time @@ -1612,23 +1809,22 @@ static void relocationViaAlloca( // Insert the clobbering stores. These may get intermixed with the // gc.results and gc.relocates, but that's fine. if (auto II = dyn_cast(Statepoint)) { - InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt()); - InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt()); + InsertClobbersAt(&*II->getNormalDest()->getFirstInsertionPt()); + InsertClobbersAt(&*II->getUnwindDest()->getFirstInsertionPt()); } else { - BasicBlock::iterator Next(cast(Statepoint)); - Next++; - InsertClobbersAt(Next); + InsertClobbersAt(cast(Statepoint)->getNextNode()); } } } - // update use with load allocas and add store for gc_relocated + + // Update use with load allocas and add store for gc_relocated. for (auto Pair : AllocaMap) { Value *Def = Pair.first; Value *Alloca = Pair.second; - // we pre-record the uses of allocas so that we dont have to worry about - // later update - // that change the user information. + // We pre-record the uses of allocas so that we dont have to worry about + // later update that changes the user information.. + SmallVector Uses; // PERF: trade a linear scan for repeated reallocation Uses.reserve(std::distance(Def->user_begin(), Def->user_end())); @@ -1663,9 +1859,9 @@ static void relocationViaAlloca( } } - // emit store for the initial gc value - // store must be inserted after load, otherwise store will be in alloca's - // use list and an extra load will be inserted before it + // Emit store for the initial gc value. Store must be inserted after load, + // otherwise store will be in alloca's use list and an extra load will be + // inserted before it. StoreInst *Store = new StoreInst(Def, Alloca); if (Instruction *Inst = dyn_cast(Def)) { if (InvokeInst *Invoke = dyn_cast(Inst)) { @@ -1688,14 +1884,13 @@ static void relocationViaAlloca( assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues && "we must have the same allocas with lives"); if (!PromotableAllocas.empty()) { - // apply mem2reg to promote alloca to SSA + // Apply mem2reg to promote alloca to SSA PromoteMemToReg(PromotableAllocas, DT); } #ifndef NDEBUG - for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E; - I++) - if (isa(*I)) + for (auto &I : F.getEntryBlock()) + if (isa(I)) InitialAllocaNum--; assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas"); #endif @@ -1719,28 +1914,27 @@ static void insertUseHolderAfter(CallSite &CS, const ArrayRef Values, // No values to hold live, might as well not insert the empty holder return; - Module *M = CS.getInstruction()->getParent()->getParent()->getParent(); + Module *M = CS.getInstruction()->getModule(); // Use a dummy vararg function to actually hold the values live Function *Func = cast(M->getOrInsertFunction( "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true))); if (CS.isCall()) { // For call safepoints insert dummy calls right after safepoint - BasicBlock::iterator Next(CS.getInstruction()); - Next++; - Holders.push_back(CallInst::Create(Func, Values, "", Next)); + Holders.push_back(CallInst::Create(Func, Values, "", + &*++CS.getInstruction()->getIterator())); return; } // For invoke safepooints insert dummy calls both in normal and // exceptional destination blocks auto *II = cast(CS.getInstruction()); Holders.push_back(CallInst::Create( - Func, Values, "", II->getNormalDest()->getFirstInsertionPt())); + Func, Values, "", &*II->getNormalDest()->getFirstInsertionPt())); Holders.push_back(CallInst::Create( - Func, Values, "", II->getUnwindDest()->getFirstInsertionPt())); + Func, Values, "", &*II->getUnwindDest()->getFirstInsertionPt())); } static void findLiveReferences( - Function &F, DominatorTree &DT, Pass *P, ArrayRef toUpdate, + Function &F, DominatorTree &DT, ArrayRef toUpdate, MutableArrayRef records) { GCPtrLivenessData OriginalLivenessData; computeLiveInValues(DT, F, OriginalLivenessData); @@ -1751,12 +1945,12 @@ static void findLiveReferences( } } -/// Remove any vector of pointers from the liveset by scalarizing them over the -/// statepoint instruction. Adds the scalarized pieces to the liveset. It -/// would be preferrable to include the vector in the statepoint itself, but +/// Remove any vector of pointers from the live set by scalarizing them over the +/// statepoint instruction. Adds the scalarized pieces to the live set. It +/// would be preferable to include the vector in the statepoint itself, but /// the lowering code currently does not handle that. Extending it would be /// slightly non-trivial since it requires a format change. Given how rare -/// such cases are (for the moment?) scalarizing is an acceptable comprimise. +/// such cases are (for the moment?) scalarizing is an acceptable compromise. static void splitVectorValues(Instruction *StatepointInst, StatepointLiveSetTy &LiveSet, DenseMap& PointerToBase, @@ -1887,7 +2081,7 @@ static void splitVectorValues(Instruction *StatepointInst, // Helper function for the "rematerializeLiveValues". It walks use chain // starting from the "CurrentValue" until it meets "BaseValue". Only "simple" // values are visited (currently it is GEP's and casts). Returns true if it -// sucessfully reached "BaseValue" and false otherwise. +// successfully reached "BaseValue" and false otherwise. // Fills "ChainToBase" array with all visited values. "BaseValue" is not // recorded. static bool findRematerializableChainToBasePointer( @@ -1907,16 +2101,12 @@ static bool findRematerializableChainToBasePointer( } if (CastInst *CI = dyn_cast(CurrentValue)) { - Value *Def = CI->stripPointerCasts(); - - // This two checks are basically similar. First one is here for the - // consistency with findBasePointers logic. - assert(!isa(Def) && "not a pointer cast found"); if (!CI->isNoopCast(CI->getModule()->getDataLayout())) return false; ChainToBase.push_back(CI); - return findRematerializableChainToBasePointer(ChainToBase, Def, BaseValue); + return findRematerializableChainToBasePointer(ChainToBase, + CI->getOperand(0), BaseValue); } // Not supported instruction in the chain @@ -1957,8 +2147,8 @@ chainToBasePointerCost(SmallVectorImpl &Chain, return Cost; } -// From the statepoint liveset pick values that are cheaper to recompute then to -// relocate. Remove this values from the liveset, rematerialize them after +// From the statepoint live set pick values that are cheaper to recompute then +// to relocate. Remove this values from the live set, rematerialize them after // statepoint and record them in "Info" structure. Note that similar to // relocated values we don't do any user adjustments here. static void rematerializeLiveValues(CallSite CS, @@ -1970,10 +2160,10 @@ static void rematerializeLiveValues(CallSite CS, // We can not di this in following loop due to iterator invalidation. SmallVector LiveValuesToBeDeleted; - for (Value *LiveValue: Info.liveset) { + for (Value *LiveValue: Info.LiveSet) { // For each live pointer find it's defining chain SmallVector ChainToBase; - assert(Info.PointerToBase.find(LiveValue) != Info.PointerToBase.end()); + assert(Info.PointerToBase.count(LiveValue)); bool FoundChain = findRematerializableChainToBasePointer(ChainToBase, LiveValue, @@ -2059,9 +2249,9 @@ static void rematerializeLiveValues(CallSite CS, InvokeInst *Invoke = cast(CS.getInstruction()); Instruction *NormalInsertBefore = - Invoke->getNormalDest()->getFirstInsertionPt(); + &*Invoke->getNormalDest()->getFirstInsertionPt(); Instruction *UnwindInsertBefore = - Invoke->getUnwindDest()->getFirstInsertionPt(); + &*Invoke->getUnwindDest()->getFirstInsertionPt(); Instruction *NormalRematerializedValue = rematerializeChain(NormalInsertBefore); @@ -2075,22 +2265,23 @@ static void rematerializeLiveValues(CallSite CS, // Remove rematerializaed values from the live set for (auto LiveValue: LiveValuesToBeDeleted) { - Info.liveset.erase(LiveValue); + Info.LiveSet.erase(LiveValue); } } -static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, - SmallVectorImpl &toUpdate) { +static bool insertParsePoints(Function &F, DominatorTree &DT, + TargetTransformInfo &TTI, + SmallVectorImpl &ToUpdate) { #ifndef NDEBUG // sanity check the input - std::set uniqued; - uniqued.insert(toUpdate.begin(), toUpdate.end()); - assert(uniqued.size() == toUpdate.size() && "no duplicates please!"); + std::set Uniqued; + Uniqued.insert(ToUpdate.begin(), ToUpdate.end()); + assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!"); - for (size_t i = 0; i < toUpdate.size(); i++) { - CallSite &CS = toUpdate[i]; + for (CallSite CS : ToUpdate) { assert(CS.getInstruction()->getParent()->getParent() == &F); - assert(isStatepoint(CS) && "expected to already be a deopt statepoint"); + assert((UseDeoptBundles || isStatepoint(CS)) && + "expected to already be a deopt statepoint"); } #endif @@ -2098,50 +2289,45 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, // the top of the successor blocks. See the comment on // normalForInvokeSafepoint on exactly what is needed. Note that this step // may restructure the CFG. - for (CallSite CS : toUpdate) { + for (CallSite CS : ToUpdate) { if (!CS.isInvoke()) continue; - InvokeInst *invoke = cast(CS.getInstruction()); - normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(), - DT); - normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(), - DT); + auto *II = cast(CS.getInstruction()); + normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT); + normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT); } // A list of dummy calls added to the IR to keep various values obviously // live in the IR. We'll remove all of these when done. - SmallVector holders; + SmallVector Holders; // Insert a dummy call with all of the arguments to the vm_state we'll need // for the actual safepoint insertion. This ensures reference arguments in // the deopt argument list are considered live through the safepoint (and // thus makes sure they get relocated.) - for (size_t i = 0; i < toUpdate.size(); i++) { - CallSite &CS = toUpdate[i]; - Statepoint StatepointCS(CS); - + for (CallSite CS : ToUpdate) { SmallVector DeoptValues; - for (Use &U : StatepointCS.vm_state_args()) { - Value *Arg = cast(&U); + + iterator_range DeoptStateRange = + UseDeoptBundles + ? iterator_range(GetDeoptBundleOperands(CS)) + : iterator_range(Statepoint(CS).vm_state_args()); + + for (Value *Arg : DeoptStateRange) { assert(!isUnhandledGCPointerType(Arg->getType()) && "support for FCA unimplemented"); if (isHandledGCPointerType(Arg->getType())) DeoptValues.push_back(Arg); } - insertUseHolderAfter(CS, DeoptValues, holders); + + insertUseHolderAfter(CS, DeoptValues, Holders); } - SmallVector records; - records.reserve(toUpdate.size()); - for (size_t i = 0; i < toUpdate.size(); i++) { - struct PartiallyConstructedSafepointRecord info; - records.push_back(info); - } - assert(records.size() == toUpdate.size()); + SmallVector Records(ToUpdate.size()); - // A) Identify all gc pointers which are staticly live at the given call + // A) Identify all gc pointers which are statically live at the given call // site. - findLiveReferences(F, DT, P, toUpdate, records); + findLiveReferences(F, DT, ToUpdate, Records); // B) Find the base pointers for each live pointer /* scope for caching */ { @@ -2150,10 +2336,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, // large numbers of duplicate base_phis. DefiningValueMapTy DVCache; - for (size_t i = 0; i < records.size(); i++) { - struct PartiallyConstructedSafepointRecord &info = records[i]; - CallSite &CS = toUpdate[i]; - findBasePointers(DT, DVCache, CS, info); + for (size_t i = 0; i < Records.size(); i++) { + PartiallyConstructedSafepointRecord &info = Records[i]; + findBasePointers(DT, DVCache, ToUpdate[i], info); } } // end of cache scope @@ -2170,63 +2355,75 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, // the base pointers which were identified for that safepoint. We'll then // ask liveness for _every_ base inserted to see what is now live. Then we // remove the dummy calls. - holders.reserve(holders.size() + records.size()); - for (size_t i = 0; i < records.size(); i++) { - struct PartiallyConstructedSafepointRecord &info = records[i]; - CallSite &CS = toUpdate[i]; + Holders.reserve(Holders.size() + Records.size()); + for (size_t i = 0; i < Records.size(); i++) { + PartiallyConstructedSafepointRecord &Info = Records[i]; SmallVector Bases; - for (auto Pair : info.PointerToBase) { + for (auto Pair : Info.PointerToBase) Bases.push_back(Pair.second); - } - insertUseHolderAfter(CS, Bases, holders); + + insertUseHolderAfter(ToUpdate[i], Bases, Holders); } // By selecting base pointers, we've effectively inserted new uses. Thus, we // need to rerun liveness. We may *also* have inserted new defs, but that's // not the key issue. - recomputeLiveInValues(F, DT, P, toUpdate, records); + recomputeLiveInValues(F, DT, ToUpdate, Records); if (PrintBasePointers) { - for (size_t i = 0; i < records.size(); i++) { - struct PartiallyConstructedSafepointRecord &info = records[i]; + for (auto &Info : Records) { errs() << "Base Pairs: (w/Relocation)\n"; - for (auto Pair : info.PointerToBase) { - errs() << " derived %" << Pair.first->getName() << " base %" - << Pair.second->getName() << "\n"; + for (auto Pair : Info.PointerToBase) { + errs() << " derived "; + Pair.first->printAsOperand(errs(), false); + errs() << " base "; + Pair.second->printAsOperand(errs(), false); + errs() << "\n"; } } } - for (size_t i = 0; i < holders.size(); i++) { - holders[i]->eraseFromParent(); - holders[i] = nullptr; - } - holders.clear(); + + // It is possible that non-constant live variables have a constant base. For + // example, a GEP with a variable offset from a global. In this case we can + // remove it from the liveset. We already don't add constants to the liveset + // because we assume they won't move at runtime and the GC doesn't need to be + // informed about them. The same reasoning applies if the base is constant. + // Note that the relocation placement code relies on this filtering for + // correctness as it expects the base to be in the liveset, which isn't true + // if the base is constant. + for (auto &Info : Records) + for (auto &BasePair : Info.PointerToBase) + if (isa(BasePair.second)) + Info.LiveSet.erase(BasePair.first); + + for (CallInst *CI : Holders) + CI->eraseFromParent(); + + Holders.clear(); // Do a limited scalarization of any live at safepoint vector values which // contain pointers. This enables this pass to run after vectorization at // the cost of some possible performance loss. TODO: it would be nice to // natively support vectors all the way through the backend so we don't need // to scalarize here. - for (size_t i = 0; i < records.size(); i++) { - struct PartiallyConstructedSafepointRecord &info = records[i]; - Instruction *statepoint = toUpdate[i].getInstruction(); - splitVectorValues(cast(statepoint), info.liveset, - info.PointerToBase, DT); + for (size_t i = 0; i < Records.size(); i++) { + PartiallyConstructedSafepointRecord &Info = Records[i]; + Instruction *Statepoint = ToUpdate[i].getInstruction(); + splitVectorValues(cast(Statepoint), Info.LiveSet, + Info.PointerToBase, DT); } // In order to reduce live set of statepoint we might choose to rematerialize - // some values instead of relocating them. This is purelly an optimization and + // some values instead of relocating them. This is purely an optimization and // does not influence correctness. - TargetTransformInfo &TTI = - P->getAnalysis().getTTI(F); + for (size_t i = 0; i < Records.size(); i++) + rematerializeLiveValues(ToUpdate[i], Records[i], TTI); - for (size_t i = 0; i < records.size(); i++) { - struct PartiallyConstructedSafepointRecord &info = records[i]; - CallSite &CS = toUpdate[i]; - - rematerializeLiveValues(CS, info, TTI); - } + // We need this to safely RAUW and delete call or invoke return values that + // may themselves be live over a statepoint. For details, please see usage in + // makeStatepointExplicitImpl. + std::vector Replacements; // Now run through and replace the existing statepoints with new ones with // the live variables listed. We do not yet update uses of the values being @@ -2234,61 +2431,77 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, // survive to the last iteration of this loop. (By construction, the // previous statepoint can not be a live variable, thus we can and remove // the old statepoint calls as we go.) - for (size_t i = 0; i < records.size(); i++) { - struct PartiallyConstructedSafepointRecord &info = records[i]; - CallSite &CS = toUpdate[i]; - makeStatepointExplicit(DT, CS, P, info); + for (size_t i = 0; i < Records.size(); i++) + makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements); + + ToUpdate.clear(); // prevent accident use of invalid CallSites + + for (auto &PR : Replacements) + PR.doReplacement(); + + Replacements.clear(); + + for (auto &Info : Records) { + // These live sets may contain state Value pointers, since we replaced calls + // with operand bundles with calls wrapped in gc.statepoint, and some of + // those calls may have been def'ing live gc pointers. Clear these out to + // avoid accidentally using them. + // + // TODO: We should create a separate data structure that does not contain + // these live sets, and migrate to using that data structure from this point + // onward. + Info.LiveSet.clear(); + Info.PointerToBase.clear(); } - toUpdate.clear(); // prevent accident use of invalid CallSites // Do all the fixups of the original live variables to their relocated selves - SmallVector live; - for (size_t i = 0; i < records.size(); i++) { - struct PartiallyConstructedSafepointRecord &info = records[i]; + SmallVector Live; + for (size_t i = 0; i < Records.size(); i++) { + PartiallyConstructedSafepointRecord &Info = Records[i]; + // We can't simply save the live set from the original insertion. One of // the live values might be the result of a call which needs a safepoint. // That Value* no longer exists and we need to use the new gc_result. - // Thankfully, the liveset is embedded in the statepoint (and updated), so + // Thankfully, the live set is embedded in the statepoint (and updated), so // we just grab that. - Statepoint statepoint(info.StatepointToken); - live.insert(live.end(), statepoint.gc_args_begin(), - statepoint.gc_args_end()); + Statepoint Statepoint(Info.StatepointToken); + Live.insert(Live.end(), Statepoint.gc_args_begin(), + Statepoint.gc_args_end()); #ifndef NDEBUG // Do some basic sanity checks on our liveness results before performing // relocation. Relocation can and will turn mistakes in liveness results // into non-sensical code which is must harder to debug. // TODO: It would be nice to test consistency as well - assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) && + assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) && "statepoint must be reachable or liveness is meaningless"); - for (Value *V : statepoint.gc_args()) { + for (Value *V : Statepoint.gc_args()) { if (!isa(V)) // Non-instruction values trivial dominate all possible uses continue; - auto LiveInst = cast(V); + auto *LiveInst = cast(V); assert(DT.isReachableFromEntry(LiveInst->getParent()) && "unreachable values should never be live"); - assert(DT.dominates(LiveInst, info.StatepointToken) && + assert(DT.dominates(LiveInst, Info.StatepointToken) && "basic SSA liveness expectation violated by liveness analysis"); } #endif } - unique_unsorted(live); + unique_unsorted(Live); #ifndef NDEBUG // sanity check - for (auto ptr : live) { - assert(isGCPointerType(ptr->getType()) && "must be a gc pointer type"); - } + for (auto *Ptr : Live) + assert(isGCPointerType(Ptr->getType()) && "must be a gc pointer type"); #endif - relocationViaAlloca(F, DT, live, records); - return !records.empty(); + relocationViaAlloca(F, DT, Live, Records); + return !Records.empty(); } // Handles both return values and arguments for Functions and CallSites. template -static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH, - unsigned Index) { +static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH, + unsigned Index) { AttrBuilder R; if (AH.getDereferenceableBytes(Index)) R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable, @@ -2296,6 +2509,8 @@ static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH, if (AH.getDereferenceableOrNullBytes(Index)) R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull, AH.getDereferenceableOrNullBytes(Index))); + if (AH.doesNotAlias(Index)) + R.addAttribute(Attribute::NoAlias); if (!R.empty()) AH.setAttributes(AH.getAttributes().removeAttributes( @@ -2303,25 +2518,25 @@ static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH, } void -RewriteStatepointsForGC::stripDereferenceabilityInfoFromPrototype(Function &F) { +RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) { LLVMContext &Ctx = F.getContext(); for (Argument &A : F.args()) if (isa(A.getType())) - RemoveDerefAttrAtIndex(Ctx, F, A.getArgNo() + 1); + RemoveNonValidAttrAtIndex(Ctx, F, A.getArgNo() + 1); if (isa(F.getReturnType())) - RemoveDerefAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex); + RemoveNonValidAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex); } -void RewriteStatepointsForGC::stripDereferenceabilityInfoFromBody(Function &F) { +void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) { if (F.empty()) return; LLVMContext &Ctx = F.getContext(); MDBuilder Builder(Ctx); - for (Instruction &I : inst_range(F)) { + for (Instruction &I : instructions(F)) { if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) { assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!"); bool IsImmutableTBAA = @@ -2344,9 +2559,9 @@ void RewriteStatepointsForGC::stripDereferenceabilityInfoFromBody(Function &F) { if (CallSite CS = CallSite(&I)) { for (int i = 0, e = CS.arg_size(); i != e; i++) if (isa(CS.getArgument(i)->getType())) - RemoveDerefAttrAtIndex(Ctx, CS, i + 1); + RemoveNonValidAttrAtIndex(Ctx, CS, i + 1); if (isa(CS.getType())) - RemoveDerefAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex); + RemoveNonValidAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex); } } } @@ -2365,17 +2580,17 @@ static bool shouldRewriteStatepointsIn(Function &F) { return false; } -void RewriteStatepointsForGC::stripDereferenceabilityInfo(Module &M) { +void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) { #ifndef NDEBUG assert(std::any_of(M.begin(), M.end(), shouldRewriteStatepointsIn) && "precondition!"); #endif for (Function &F : M) - stripDereferenceabilityInfoFromPrototype(F); + stripNonValidAttributesFromPrototype(F); for (Function &F : M) - stripDereferenceabilityInfoFromBody(F); + stripNonValidAttributesFromBody(F); } bool RewriteStatepointsForGC::runOnFunction(Function &F) { @@ -2389,15 +2604,27 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) { return false; DominatorTree &DT = getAnalysis(F).getDomTree(); + TargetTransformInfo &TTI = + getAnalysis().getTTI(F); + + auto NeedsRewrite = [](Instruction &I) { + if (UseDeoptBundles) { + if (ImmutableCallSite CS = ImmutableCallSite(&I)) + return !callsGCLeafFunction(CS); + return false; + } + + return isStatepoint(I); + }; // Gather all the statepoints which need rewritten. Be careful to only // consider those in reachable code since we need to ask dominance queries // when rewriting. We'll delete the unreachable ones in a moment. SmallVector ParsePointNeeded; bool HasUnreachableStatepoint = false; - for (Instruction &I : inst_range(F)) { + for (Instruction &I : instructions(F)) { // TODO: only the ones with the flag set! - if (isStatepoint(I)) { + if (NeedsRewrite(I)) { if (DT.isReachableFromEntry(I.getParent())) ParsePointNeeded.push_back(CallSite(&I)); else @@ -2428,7 +2655,38 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) { FoldSingleEntryPHINodes(&BB); } - MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded); + // Before we start introducing relocations, we want to tweak the IR a bit to + // avoid unfortunate code generation effects. The main example is that we + // want to try to make sure the comparison feeding a branch is after any + // safepoints. Otherwise, we end up with a comparison of pre-relocation + // values feeding a branch after relocation. This is semantically correct, + // but results in extra register pressure since both the pre-relocation and + // post-relocation copies must be available in registers. For code without + // relocations this is handled elsewhere, but teaching the scheduler to + // reverse the transform we're about to do would be slightly complex. + // Note: This may extend the live range of the inputs to the icmp and thus + // increase the liveset of any statepoint we move over. This is profitable + // as long as all statepoints are in rare blocks. If we had in-register + // lowering for live values this would be a much safer transform. + auto getConditionInst = [](TerminatorInst *TI) -> Instruction* { + if (auto *BI = dyn_cast(TI)) + if (BI->isConditional()) + return dyn_cast(BI->getCondition()); + // TODO: Extend this to handle switches + return nullptr; + }; + for (BasicBlock &BB : F) { + TerminatorInst *TI = BB.getTerminator(); + if (auto *Cond = getConditionInst(TI)) + // TODO: Handle more than just ICmps here. We should be able to move + // most instructions without side effects or memory access. + if (isa(Cond) && Cond->hasOneUse()) { + MadeChange = true; + Cond->moveBefore(TI); + } + } + + MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded); return MadeChange; } @@ -2461,7 +2719,7 @@ static void computeLiveInValues(BasicBlock::reverse_iterator rbegin, "support for FCA unimplemented"); if (isHandledGCPointerType(V->getType()) && !isa(V)) { // The choice to exclude all things constant here is slightly subtle. - // There are two idependent reasons: + // There are two independent reasons: // - We assume that things which are constant (from LLVM's definition) // do not move at runtime. For example, the address of a global // variable is fixed, even though it's contents may not be. @@ -2599,7 +2857,7 @@ static void computeLiveInValues(DominatorTree &DT, Function &F, } // while( !worklist.empty() ) #ifndef NDEBUG - // Sanity check our ouput against SSA properties. This helps catch any + // Sanity check our output against SSA properties. This helps catch any // missing kills during the above iteration. for (BasicBlock &BB : F) { checkBasicSSA(DT, Data, BB); @@ -2620,7 +2878,7 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data, // call result is not live (normal), nor are it's arguments // (unless they're used again later). This adjustment is // specifically what we need to relocate - BasicBlock::reverse_iterator rend(Inst); + BasicBlock::reverse_iterator rend(Inst->getIterator()); computeLiveInValues(BB->rbegin(), rend, LiveOut); LiveOut.erase(Inst); Out.insert(LiveOut.begin(), LiveOut.end()); @@ -2669,5 +2927,5 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, assert(Updated.count(KVPair.first) && "record for non-live value"); #endif - Info.liveset = Updated; + Info.LiveSet = Updated; } diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 4d3a708fa20e..2fca803adde8 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" @@ -479,6 +480,13 @@ private: void visitExtractValueInst(ExtractValueInst &EVI); void visitInsertValueInst(InsertValueInst &IVI); void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); } + void visitFuncletPadInst(FuncletPadInst &FPI) { + markAnythingOverdefined(&FPI); + } + void visitCatchSwitchInst(CatchSwitchInst &CPI) { + markAnythingOverdefined(&CPI); + visitTerminatorInst(CPI); + } // Instructions that cannot be folded away. void visitStoreInst (StoreInst &I); @@ -539,9 +547,9 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, return; } - if (isa(TI)) { - // Invoke instructions successors are always executable. - Succs[0] = Succs[1] = true; + // Unwinding instructions successors are always executable. + if (TI.isExceptional()) { + Succs.assign(TI.getNumSuccessors(), true); return; } @@ -605,8 +613,8 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { return BI->getSuccessor(CI->isZero()) == To; } - // Invoke instructions successors are always executable. - if (isa(TI)) + // Unwinding instructions successors are always executable. + if (TI->isExceptional()) return true; if (SwitchInst *SI = dyn_cast(TI)) { @@ -630,7 +638,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { #ifndef NDEBUG dbgs() << "Unknown terminator instruction: " << *TI << '\n'; #endif - llvm_unreachable(nullptr); + llvm_unreachable("SCCP: Don't know how to handle this terminator!"); } // visit Implementations - Something changed in this instruction, either an @@ -1126,7 +1134,7 @@ CallOverdefined: // entry block executable and merge in the actual arguments to the call into // the formal arguments of the function. if (!TrackingIncomingArguments.empty() && TrackingIncomingArguments.count(F)){ - MarkBlockExecutable(F->begin()); + MarkBlockExecutable(&F->front()); // Propagate information from this call site into the callee. CallSite::arg_iterator CAI = CS.arg_begin(); @@ -1135,17 +1143,17 @@ CallOverdefined: // If this argument is byval, and if the function is not readonly, there // will be an implicit copy formed of the input aggregate. if (AI->hasByValAttr() && !F->onlyReadsMemory()) { - markOverdefined(AI); + markOverdefined(&*AI); continue; } if (StructType *STy = dyn_cast(AI->getType())) { for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { LatticeVal CallArg = getStructValueState(*CAI, i); - mergeInValue(getStructValueState(AI, i), AI, CallArg); + mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg); } } else { - mergeInValue(AI, getValueState(*CAI)); + mergeInValue(&*AI, getValueState(*CAI)); } } } @@ -1246,18 +1254,18 @@ void SCCPSolver::Solve() { /// even if X isn't defined. bool SCCPSolver::ResolvedUndefsIn(Function &F) { for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (!BBExecutable.count(BB)) + if (!BBExecutable.count(&*BB)) continue; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + for (Instruction &I : *BB) { // Look for instructions which produce undef values. - if (I->getType()->isVoidTy()) continue; + if (I.getType()->isVoidTy()) continue; - if (StructType *STy = dyn_cast(I->getType())) { + if (StructType *STy = dyn_cast(I.getType())) { // Only a few things that can be structs matter for undef. // Tracked calls must never be marked overdefined in ResolvedUndefsIn. - if (CallSite CS = CallSite(I)) + if (CallSite CS = CallSite(&I)) if (Function *F = CS.getCalledFunction()) if (MRVFunctionsTracked.count(F)) continue; @@ -1270,14 +1278,14 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // Send the results of everything else to overdefined. We could be // more precise than this but it isn't worth bothering. for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - LatticeVal &LV = getStructValueState(I, i); + LatticeVal &LV = getStructValueState(&I, i); if (LV.isUndefined()) - markOverdefined(LV, I); + markOverdefined(LV, &I); } continue; } - LatticeVal &LV = getValueState(I); + LatticeVal &LV = getValueState(&I); if (!LV.isUndefined()) continue; // extractvalue is safe; check here because the argument is a struct. @@ -1287,24 +1295,24 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // Compute the operand LatticeVals, for convenience below. // Anything taking a struct is conservatively assumed to require // overdefined markings. - if (I->getOperand(0)->getType()->isStructTy()) { - markOverdefined(I); + if (I.getOperand(0)->getType()->isStructTy()) { + markOverdefined(&I); return true; } - LatticeVal Op0LV = getValueState(I->getOperand(0)); + LatticeVal Op0LV = getValueState(I.getOperand(0)); LatticeVal Op1LV; - if (I->getNumOperands() == 2) { - if (I->getOperand(1)->getType()->isStructTy()) { - markOverdefined(I); + if (I.getNumOperands() == 2) { + if (I.getOperand(1)->getType()->isStructTy()) { + markOverdefined(&I); return true; } - Op1LV = getValueState(I->getOperand(1)); + Op1LV = getValueState(I.getOperand(1)); } // If this is an instructions whose result is defined even if the input is // not fully defined, propagate the information. - Type *ITy = I->getType(); - switch (I->getOpcode()) { + Type *ITy = I.getType(); + switch (I.getOpcode()) { case Instruction::Add: case Instruction::Sub: case Instruction::Trunc: @@ -1318,9 +1326,9 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { case Instruction::FRem: // Floating-point binary operation: be conservative. if (Op0LV.isUndefined() && Op1LV.isUndefined()) - markForcedConstant(I, Constant::getNullValue(ITy)); + markForcedConstant(&I, Constant::getNullValue(ITy)); else - markOverdefined(I); + markOverdefined(&I); return true; case Instruction::ZExt: case Instruction::SExt: @@ -1332,7 +1340,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { case Instruction::SIToFP: case Instruction::UIToFP: // undef -> 0; some outputs are impossible - markForcedConstant(I, Constant::getNullValue(ITy)); + markForcedConstant(&I, Constant::getNullValue(ITy)); return true; case Instruction::Mul: case Instruction::And: @@ -1341,7 +1349,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { break; // undef * X -> 0. X could be zero. // undef & X -> 0. X could be zero. - markForcedConstant(I, Constant::getNullValue(ITy)); + markForcedConstant(&I, Constant::getNullValue(ITy)); return true; case Instruction::Or: @@ -1349,7 +1357,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { if (Op0LV.isUndefined() && Op1LV.isUndefined()) break; // undef | X -> -1. X could be -1. - markForcedConstant(I, Constant::getAllOnesValue(ITy)); + markForcedConstant(&I, Constant::getAllOnesValue(ITy)); return true; case Instruction::Xor: @@ -1357,7 +1365,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // necessary, but we try to be nice to people who expect this // behavior in simple cases if (Op0LV.isUndefined() && Op1LV.isUndefined()) { - markForcedConstant(I, Constant::getNullValue(ITy)); + markForcedConstant(&I, Constant::getNullValue(ITy)); return true; } // undef ^ X -> undef @@ -1373,7 +1381,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // undef / X -> 0. X could be maxint. // undef % X -> 0. X could be 1. - markForcedConstant(I, Constant::getNullValue(ITy)); + markForcedConstant(&I, Constant::getNullValue(ITy)); return true; case Instruction::AShr: @@ -1381,7 +1389,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { if (Op1LV.isUndefined()) break; // undef >>a X -> all ones - markForcedConstant(I, Constant::getAllOnesValue(ITy)); + markForcedConstant(&I, Constant::getAllOnesValue(ITy)); return true; case Instruction::LShr: case Instruction::Shl: @@ -1391,17 +1399,17 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // undef << X -> 0 // undef >> X -> 0 - markForcedConstant(I, Constant::getNullValue(ITy)); + markForcedConstant(&I, Constant::getNullValue(ITy)); return true; case Instruction::Select: - Op1LV = getValueState(I->getOperand(1)); + Op1LV = getValueState(I.getOperand(1)); // undef ? X : Y -> X or Y. There could be commonality between X/Y. if (Op0LV.isUndefined()) { if (!Op1LV.isConstant()) // Pick the constant one if there is any. - Op1LV = getValueState(I->getOperand(2)); + Op1LV = getValueState(I.getOperand(2)); } else if (Op1LV.isUndefined()) { // c ? undef : undef -> undef. No change. - Op1LV = getValueState(I->getOperand(2)); + Op1LV = getValueState(I.getOperand(2)); if (Op1LV.isUndefined()) break; // Otherwise, c ? undef : x -> x. @@ -1410,9 +1418,9 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { } if (Op1LV.isConstant()) - markForcedConstant(I, Op1LV.getConstant()); + markForcedConstant(&I, Op1LV.getConstant()); else - markOverdefined(I); + markOverdefined(&I); return true; case Instruction::Load: // A load here means one of two things: a load of undef from a global, @@ -1421,9 +1429,9 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { break; case Instruction::ICmp: // X == undef -> undef. Other comparisons get more complicated. - if (cast(I)->isEquality()) + if (cast(&I)->isEquality()) break; - markOverdefined(I); + markOverdefined(&I); return true; case Instruction::Call: case Instruction::Invoke: { @@ -1432,19 +1440,19 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // 2. It could be constant-foldable. // Because of the way we solve return values, tracked calls must // never be marked overdefined in ResolvedUndefsIn. - if (Function *F = CallSite(I).getCalledFunction()) + if (Function *F = CallSite(&I).getCalledFunction()) if (TrackedRetVals.count(F)) break; // If the call is constant-foldable, we mark it overdefined because // we do not know what return values are valid. - markOverdefined(I); + markOverdefined(&I); return true; } default: // If we don't know what should happen here, conservatively mark it // overdefined. - markOverdefined(I); + markOverdefined(&I); return true; } } @@ -1462,7 +1470,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // false. if (isa(BI->getCondition())) { BI->setCondition(ConstantInt::getFalse(BI->getContext())); - markEdgeExecutable(BB, TI->getSuccessor(1)); + markEdgeExecutable(&*BB, TI->getSuccessor(1)); return true; } @@ -1484,7 +1492,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // the first constant. if (isa(SI->getCondition())) { SI->setCondition(SI->case_begin().getCaseValue()); - markEdgeExecutable(BB, SI->case_begin().getCaseSuccessor()); + markEdgeExecutable(&*BB, SI->case_begin().getCaseSuccessor()); return true; } @@ -1506,6 +1514,7 @@ namespace { struct SCCP : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addPreserved(); } static char ID; // Pass identification, replacement for typeid SCCP() : FunctionPass(ID) { @@ -1541,11 +1550,10 @@ static void DeleteInstructionInBlock(BasicBlock *BB) { Instruction *EndInst = BB->getTerminator(); // Last not to be deleted. while (EndInst != BB->begin()) { // Delete the next to last instruction. - BasicBlock::iterator I = EndInst; - Instruction *Inst = --I; + Instruction *Inst = &*--EndInst->getIterator(); if (!Inst->use_empty()) Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); - if (isa(Inst)) { + if (Inst->isEHPad()) { EndInst = Inst; continue; } @@ -1568,11 +1576,11 @@ bool SCCP::runOnFunction(Function &F) { SCCPSolver Solver(DL, TLI); // Mark the first block of the function as being executable. - Solver.MarkBlockExecutable(F.begin()); + Solver.MarkBlockExecutable(&F.front()); // Mark all arguments to the function as being overdefined. - for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;++AI) - Solver.markAnythingOverdefined(AI); + for (Argument &AI : F.args()) + Solver.markAnythingOverdefined(&AI); // Solve for constants. bool ResolvedUndefs = true; @@ -1589,8 +1597,8 @@ bool SCCP::runOnFunction(Function &F) { // as we cannot modify the CFG of the function. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (!Solver.isBlockExecutable(BB)) { - DeleteInstructionInBlock(BB); + if (!Solver.isBlockExecutable(&*BB)) { + DeleteInstructionInBlock(&*BB); MadeChanges = true; continue; } @@ -1599,7 +1607,7 @@ bool SCCP::runOnFunction(Function &F) { // constants if we have found them to be of constant values. // for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) { - Instruction *Inst = BI++; + Instruction *Inst = &*BI++; if (Inst->getType()->isVoidTy() || isa(Inst)) continue; @@ -1713,36 +1721,34 @@ bool IPSCCP::runOnModule(Module &M) { // If this is a strong or ODR definition of this function, then we can // propagate information about its result into callsites of it. if (!F->mayBeOverridden()) - Solver.AddTrackedFunction(F); + Solver.AddTrackedFunction(&*F); // If this function only has direct calls that we can see, we can track its // arguments and return value aggressively, and can assume it is not called // unless we see evidence to the contrary. if (F->hasLocalLinkage()) { - if (AddressIsTaken(F)) - AddressTakenFunctions.insert(F); + if (AddressIsTaken(&*F)) + AddressTakenFunctions.insert(&*F); else { - Solver.AddArgumentTrackedFunction(F); + Solver.AddArgumentTrackedFunction(&*F); continue; } } // Assume the function is called. - Solver.MarkBlockExecutable(F->begin()); + Solver.MarkBlockExecutable(&F->front()); // Assume nothing about the incoming arguments. - for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); - AI != E; ++AI) - Solver.markAnythingOverdefined(AI); + for (Argument &AI : F->args()) + Solver.markAnythingOverdefined(&AI); } // Loop over global variables. We inform the solver about any internal global // variables that do not have their 'addresses taken'. If they don't have // their addresses taken, we can propagate constants through them. - for (Module::global_iterator G = M.global_begin(), E = M.global_end(); - G != E; ++G) - if (!G->isConstant() && G->hasLocalLinkage() && !AddressIsTaken(G)) - Solver.TrackValueOfGlobalVariable(G); + for (GlobalVariable &G : M.globals()) + if (!G.isConstant() && G.hasLocalLinkage() && !AddressIsTaken(&G)) + Solver.TrackValueOfGlobalVariable(&G); // Solve for constants. bool ResolvedUndefs = true; @@ -1763,7 +1769,10 @@ bool IPSCCP::runOnModule(Module &M) { SmallVector BlocksToErase; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (Solver.isBlockExecutable(F->begin())) { + if (F->isDeclaration()) + continue; + + if (Solver.isBlockExecutable(&F->front())) { for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI) { if (AI->use_empty() || AI->getType()->isStructTy()) continue; @@ -1771,7 +1780,7 @@ bool IPSCCP::runOnModule(Module &M) { // TODO: Could use getStructLatticeValueFor to find out if the entire // result is a constant and replace it entirely if so. - LatticeVal IV = Solver.getLatticeValueFor(AI); + LatticeVal IV = Solver.getLatticeValueFor(&*AI); if (IV.isOverdefined()) continue; Constant *CST = IV.isConstant() ? @@ -1786,28 +1795,27 @@ bool IPSCCP::runOnModule(Module &M) { } for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - if (!Solver.isBlockExecutable(BB)) { - DeleteInstructionInBlock(BB); + if (!Solver.isBlockExecutable(&*BB)) { + DeleteInstructionInBlock(&*BB); MadeChanges = true; TerminatorInst *TI = BB->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - BasicBlock *Succ = TI->getSuccessor(i); + for (BasicBlock *Succ : TI->successors()) { if (!Succ->empty() && isa(Succ->begin())) - TI->getSuccessor(i)->removePredecessor(BB); + Succ->removePredecessor(&*BB); } if (!TI->use_empty()) TI->replaceAllUsesWith(UndefValue::get(TI->getType())); TI->eraseFromParent(); - new UnreachableInst(M.getContext(), BB); + new UnreachableInst(M.getContext(), &*BB); if (&*BB != &F->front()) - BlocksToErase.push_back(BB); + BlocksToErase.push_back(&*BB); continue; } for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) { - Instruction *Inst = BI++; + Instruction *Inst = &*BI++; if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy()) continue; diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 947513a36572..a7361b5fe083 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -23,12 +23,12 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/SROA.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/PtrUseVisitor.h" #include "llvm/Analysis/ValueTracking.h" @@ -37,8 +37,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" @@ -53,9 +51,9 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/TimeValue.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" #if __cplusplus >= 201103L && !defined(NDEBUG) // We only use this for a debug check in C++11 @@ -63,6 +61,7 @@ #endif using namespace llvm; +using namespace llvm::sroa; #define DEBUG_TYPE "sroa" @@ -77,11 +76,6 @@ STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion"); STATISTIC(NumDeleted, "Number of instructions deleted"); STATISTIC(NumVectorized, "Number of vectorized aggregates"); -/// Hidden option to force the pass to not use DomTree and mem2reg, instead -/// forming SSA values through the SSAUpdater infrastructure. -static cl::opt ForceSSAUpdater("force-ssa-updater", cl::init(false), - cl::Hidden); - /// Hidden option to enable randomly shuffling the slices to help uncover /// instability in their order. static cl::opt SROARandomShuffleSlices("sroa-random-shuffle-slices", @@ -205,7 +199,6 @@ template struct isPodLike; template <> struct isPodLike { static const bool value = true; }; } -namespace { /// \brief Representation of the alloca slices. /// /// This class represents the slices of an alloca which are formed by its @@ -213,7 +206,7 @@ namespace { /// for the slices used and we reflect that in this structure. The uses are /// stored, sorted by increasing beginning offset and with unsplittable slices /// starting at a particular offset before splittable slices. -class AllocaSlices { +class llvm::sroa::AllocaSlices { public: /// \brief Construct the slices of a particular alloca. AllocaSlices(const DataLayout &DL, AllocaInst &AI); @@ -253,281 +246,10 @@ public: std::inplace_merge(Slices.begin(), SliceI, Slices.end()); } - // Forward declare an iterator to befriend it. + // Forward declare the iterator and range accessor for walking the + // partitions. class partition_iterator; - - /// \brief A partition of the slices. - /// - /// An ephemeral representation for a range of slices which can be viewed as - /// a partition of the alloca. This range represents a span of the alloca's - /// memory which cannot be split, and provides access to all of the slices - /// overlapping some part of the partition. - /// - /// Objects of this type are produced by traversing the alloca's slices, but - /// are only ephemeral and not persistent. - class Partition { - private: - friend class AllocaSlices; - friend class AllocaSlices::partition_iterator; - - /// \brief The begining and ending offsets of the alloca for this partition. - uint64_t BeginOffset, EndOffset; - - /// \brief The start end end iterators of this partition. - iterator SI, SJ; - - /// \brief A collection of split slice tails overlapping the partition. - SmallVector SplitTails; - - /// \brief Raw constructor builds an empty partition starting and ending at - /// the given iterator. - Partition(iterator SI) : SI(SI), SJ(SI) {} - - public: - /// \brief The start offset of this partition. - /// - /// All of the contained slices start at or after this offset. - uint64_t beginOffset() const { return BeginOffset; } - - /// \brief The end offset of this partition. - /// - /// All of the contained slices end at or before this offset. - uint64_t endOffset() const { return EndOffset; } - - /// \brief The size of the partition. - /// - /// Note that this can never be zero. - uint64_t size() const { - assert(BeginOffset < EndOffset && "Partitions must span some bytes!"); - return EndOffset - BeginOffset; - } - - /// \brief Test whether this partition contains no slices, and merely spans - /// a region occupied by split slices. - bool empty() const { return SI == SJ; } - - /// \name Iterate slices that start within the partition. - /// These may be splittable or unsplittable. They have a begin offset >= the - /// partition begin offset. - /// @{ - // FIXME: We should probably define a "concat_iterator" helper and use that - // to stitch together pointee_iterators over the split tails and the - // contiguous iterators of the partition. That would give a much nicer - // interface here. We could then additionally expose filtered iterators for - // split, unsplit, and unsplittable splices based on the usage patterns. - iterator begin() const { return SI; } - iterator end() const { return SJ; } - /// @} - - /// \brief Get the sequence of split slice tails. - /// - /// These tails are of slices which start before this partition but are - /// split and overlap into the partition. We accumulate these while forming - /// partitions. - ArrayRef splitSliceTails() const { return SplitTails; } - }; - - /// \brief An iterator over partitions of the alloca's slices. - /// - /// This iterator implements the core algorithm for partitioning the alloca's - /// slices. It is a forward iterator as we don't support backtracking for - /// efficiency reasons, and re-use a single storage area to maintain the - /// current set of split slices. - /// - /// It is templated on the slice iterator type to use so that it can operate - /// with either const or non-const slice iterators. - class partition_iterator - : public iterator_facade_base { - friend class AllocaSlices; - - /// \brief Most of the state for walking the partitions is held in a class - /// with a nice interface for examining them. - Partition P; - - /// \brief We need to keep the end of the slices to know when to stop. - AllocaSlices::iterator SE; - - /// \brief We also need to keep track of the maximum split end offset seen. - /// FIXME: Do we really? - uint64_t MaxSplitSliceEndOffset; - - /// \brief Sets the partition to be empty at given iterator, and sets the - /// end iterator. - partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE) - : P(SI), SE(SE), MaxSplitSliceEndOffset(0) { - // If not already at the end, advance our state to form the initial - // partition. - if (SI != SE) - advance(); - } - - /// \brief Advance the iterator to the next partition. - /// - /// Requires that the iterator not be at the end of the slices. - void advance() { - assert((P.SI != SE || !P.SplitTails.empty()) && - "Cannot advance past the end of the slices!"); - - // Clear out any split uses which have ended. - if (!P.SplitTails.empty()) { - if (P.EndOffset >= MaxSplitSliceEndOffset) { - // If we've finished all splits, this is easy. - P.SplitTails.clear(); - MaxSplitSliceEndOffset = 0; - } else { - // Remove the uses which have ended in the prior partition. This - // cannot change the max split slice end because we just checked that - // the prior partition ended prior to that max. - P.SplitTails.erase( - std::remove_if( - P.SplitTails.begin(), P.SplitTails.end(), - [&](Slice *S) { return S->endOffset() <= P.EndOffset; }), - P.SplitTails.end()); - assert(std::any_of(P.SplitTails.begin(), P.SplitTails.end(), - [&](Slice *S) { - return S->endOffset() == MaxSplitSliceEndOffset; - }) && - "Could not find the current max split slice offset!"); - assert(std::all_of(P.SplitTails.begin(), P.SplitTails.end(), - [&](Slice *S) { - return S->endOffset() <= MaxSplitSliceEndOffset; - }) && - "Max split slice end offset is not actually the max!"); - } - } - - // If P.SI is already at the end, then we've cleared the split tail and - // now have an end iterator. - if (P.SI == SE) { - assert(P.SplitTails.empty() && "Failed to clear the split slices!"); - return; - } - - // If we had a non-empty partition previously, set up the state for - // subsequent partitions. - if (P.SI != P.SJ) { - // Accumulate all the splittable slices which started in the old - // partition into the split list. - for (Slice &S : P) - if (S.isSplittable() && S.endOffset() > P.EndOffset) { - P.SplitTails.push_back(&S); - MaxSplitSliceEndOffset = - std::max(S.endOffset(), MaxSplitSliceEndOffset); - } - - // Start from the end of the previous partition. - P.SI = P.SJ; - - // If P.SI is now at the end, we at most have a tail of split slices. - if (P.SI == SE) { - P.BeginOffset = P.EndOffset; - P.EndOffset = MaxSplitSliceEndOffset; - return; - } - - // If the we have split slices and the next slice is after a gap and is - // not splittable immediately form an empty partition for the split - // slices up until the next slice begins. - if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset && - !P.SI->isSplittable()) { - P.BeginOffset = P.EndOffset; - P.EndOffset = P.SI->beginOffset(); - return; - } - } - - // OK, we need to consume new slices. Set the end offset based on the - // current slice, and step SJ past it. The beginning offset of the - // parttion is the beginning offset of the next slice unless we have - // pre-existing split slices that are continuing, in which case we begin - // at the prior end offset. - P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset; - P.EndOffset = P.SI->endOffset(); - ++P.SJ; - - // There are two strategies to form a partition based on whether the - // partition starts with an unsplittable slice or a splittable slice. - if (!P.SI->isSplittable()) { - // When we're forming an unsplittable region, it must always start at - // the first slice and will extend through its end. - assert(P.BeginOffset == P.SI->beginOffset()); - - // Form a partition including all of the overlapping slices with this - // unsplittable slice. - while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) { - if (!P.SJ->isSplittable()) - P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset()); - ++P.SJ; - } - - // We have a partition across a set of overlapping unsplittable - // partitions. - return; - } - - // If we're starting with a splittable slice, then we need to form - // a synthetic partition spanning it and any other overlapping splittable - // splices. - assert(P.SI->isSplittable() && "Forming a splittable partition!"); - - // Collect all of the overlapping splittable slices. - while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset && - P.SJ->isSplittable()) { - P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset()); - ++P.SJ; - } - - // Back upiP.EndOffset if we ended the span early when encountering an - // unsplittable slice. This synthesizes the early end offset of - // a partition spanning only splittable slices. - if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) { - assert(!P.SJ->isSplittable()); - P.EndOffset = P.SJ->beginOffset(); - } - } - - public: - bool operator==(const partition_iterator &RHS) const { - assert(SE == RHS.SE && - "End iterators don't match between compared partition iterators!"); - - // The observed positions of partitions is marked by the P.SI iterator and - // the emptyness of the split slices. The latter is only relevant when - // P.SI == SE, as the end iterator will additionally have an empty split - // slices list, but the prior may have the same P.SI and a tail of split - // slices. - if (P.SI == RHS.P.SI && - P.SplitTails.empty() == RHS.P.SplitTails.empty()) { - assert(P.SJ == RHS.P.SJ && - "Same set of slices formed two different sized partitions!"); - assert(P.SplitTails.size() == RHS.P.SplitTails.size() && - "Same slice position with differently sized non-empty split " - "slice tails!"); - return true; - } - return false; - } - - partition_iterator &operator++() { - advance(); - return *this; - } - - Partition &operator*() { return P; } - }; - - /// \brief A forward range over the partitions of the alloca's slices. - /// - /// This accesses an iterator range over the partitions of the alloca's - /// slices. It computes these partitions on the fly based on the overlapping - /// offsets of the slices and the ability to split them. It will visit "empty" - /// partitions to cover regions of the alloca only accessed via split - /// slices. - iterator_range partitions() { - return make_range(partition_iterator(begin(), end()), - partition_iterator(end(), end())); - } + iterator_range partitions(); /// \brief Access the dead users for this alloca. ArrayRef getDeadUsers() const { return DeadUsers; } @@ -595,6 +317,280 @@ private: /// the alloca. SmallVector DeadOperands; }; + +/// \brief A partition of the slices. +/// +/// An ephemeral representation for a range of slices which can be viewed as +/// a partition of the alloca. This range represents a span of the alloca's +/// memory which cannot be split, and provides access to all of the slices +/// overlapping some part of the partition. +/// +/// Objects of this type are produced by traversing the alloca's slices, but +/// are only ephemeral and not persistent. +class llvm::sroa::Partition { +private: + friend class AllocaSlices; + friend class AllocaSlices::partition_iterator; + + typedef AllocaSlices::iterator iterator; + + /// \brief The beginning and ending offsets of the alloca for this + /// partition. + uint64_t BeginOffset, EndOffset; + + /// \brief The start end end iterators of this partition. + iterator SI, SJ; + + /// \brief A collection of split slice tails overlapping the partition. + SmallVector SplitTails; + + /// \brief Raw constructor builds an empty partition starting and ending at + /// the given iterator. + Partition(iterator SI) : SI(SI), SJ(SI) {} + +public: + /// \brief The start offset of this partition. + /// + /// All of the contained slices start at or after this offset. + uint64_t beginOffset() const { return BeginOffset; } + + /// \brief The end offset of this partition. + /// + /// All of the contained slices end at or before this offset. + uint64_t endOffset() const { return EndOffset; } + + /// \brief The size of the partition. + /// + /// Note that this can never be zero. + uint64_t size() const { + assert(BeginOffset < EndOffset && "Partitions must span some bytes!"); + return EndOffset - BeginOffset; + } + + /// \brief Test whether this partition contains no slices, and merely spans + /// a region occupied by split slices. + bool empty() const { return SI == SJ; } + + /// \name Iterate slices that start within the partition. + /// These may be splittable or unsplittable. They have a begin offset >= the + /// partition begin offset. + /// @{ + // FIXME: We should probably define a "concat_iterator" helper and use that + // to stitch together pointee_iterators over the split tails and the + // contiguous iterators of the partition. That would give a much nicer + // interface here. We could then additionally expose filtered iterators for + // split, unsplit, and unsplittable splices based on the usage patterns. + iterator begin() const { return SI; } + iterator end() const { return SJ; } + /// @} + + /// \brief Get the sequence of split slice tails. + /// + /// These tails are of slices which start before this partition but are + /// split and overlap into the partition. We accumulate these while forming + /// partitions. + ArrayRef splitSliceTails() const { return SplitTails; } +}; + +/// \brief An iterator over partitions of the alloca's slices. +/// +/// This iterator implements the core algorithm for partitioning the alloca's +/// slices. It is a forward iterator as we don't support backtracking for +/// efficiency reasons, and re-use a single storage area to maintain the +/// current set of split slices. +/// +/// It is templated on the slice iterator type to use so that it can operate +/// with either const or non-const slice iterators. +class AllocaSlices::partition_iterator + : public iterator_facade_base { + friend class AllocaSlices; + + /// \brief Most of the state for walking the partitions is held in a class + /// with a nice interface for examining them. + Partition P; + + /// \brief We need to keep the end of the slices to know when to stop. + AllocaSlices::iterator SE; + + /// \brief We also need to keep track of the maximum split end offset seen. + /// FIXME: Do we really? + uint64_t MaxSplitSliceEndOffset; + + /// \brief Sets the partition to be empty at given iterator, and sets the + /// end iterator. + partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE) + : P(SI), SE(SE), MaxSplitSliceEndOffset(0) { + // If not already at the end, advance our state to form the initial + // partition. + if (SI != SE) + advance(); + } + + /// \brief Advance the iterator to the next partition. + /// + /// Requires that the iterator not be at the end of the slices. + void advance() { + assert((P.SI != SE || !P.SplitTails.empty()) && + "Cannot advance past the end of the slices!"); + + // Clear out any split uses which have ended. + if (!P.SplitTails.empty()) { + if (P.EndOffset >= MaxSplitSliceEndOffset) { + // If we've finished all splits, this is easy. + P.SplitTails.clear(); + MaxSplitSliceEndOffset = 0; + } else { + // Remove the uses which have ended in the prior partition. This + // cannot change the max split slice end because we just checked that + // the prior partition ended prior to that max. + P.SplitTails.erase( + std::remove_if( + P.SplitTails.begin(), P.SplitTails.end(), + [&](Slice *S) { return S->endOffset() <= P.EndOffset; }), + P.SplitTails.end()); + assert(std::any_of(P.SplitTails.begin(), P.SplitTails.end(), + [&](Slice *S) { + return S->endOffset() == MaxSplitSliceEndOffset; + }) && + "Could not find the current max split slice offset!"); + assert(std::all_of(P.SplitTails.begin(), P.SplitTails.end(), + [&](Slice *S) { + return S->endOffset() <= MaxSplitSliceEndOffset; + }) && + "Max split slice end offset is not actually the max!"); + } + } + + // If P.SI is already at the end, then we've cleared the split tail and + // now have an end iterator. + if (P.SI == SE) { + assert(P.SplitTails.empty() && "Failed to clear the split slices!"); + return; + } + + // If we had a non-empty partition previously, set up the state for + // subsequent partitions. + if (P.SI != P.SJ) { + // Accumulate all the splittable slices which started in the old + // partition into the split list. + for (Slice &S : P) + if (S.isSplittable() && S.endOffset() > P.EndOffset) { + P.SplitTails.push_back(&S); + MaxSplitSliceEndOffset = + std::max(S.endOffset(), MaxSplitSliceEndOffset); + } + + // Start from the end of the previous partition. + P.SI = P.SJ; + + // If P.SI is now at the end, we at most have a tail of split slices. + if (P.SI == SE) { + P.BeginOffset = P.EndOffset; + P.EndOffset = MaxSplitSliceEndOffset; + return; + } + + // If the we have split slices and the next slice is after a gap and is + // not splittable immediately form an empty partition for the split + // slices up until the next slice begins. + if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset && + !P.SI->isSplittable()) { + P.BeginOffset = P.EndOffset; + P.EndOffset = P.SI->beginOffset(); + return; + } + } + + // OK, we need to consume new slices. Set the end offset based on the + // current slice, and step SJ past it. The beginning offset of the + // partition is the beginning offset of the next slice unless we have + // pre-existing split slices that are continuing, in which case we begin + // at the prior end offset. + P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset; + P.EndOffset = P.SI->endOffset(); + ++P.SJ; + + // There are two strategies to form a partition based on whether the + // partition starts with an unsplittable slice or a splittable slice. + if (!P.SI->isSplittable()) { + // When we're forming an unsplittable region, it must always start at + // the first slice and will extend through its end. + assert(P.BeginOffset == P.SI->beginOffset()); + + // Form a partition including all of the overlapping slices with this + // unsplittable slice. + while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) { + if (!P.SJ->isSplittable()) + P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset()); + ++P.SJ; + } + + // We have a partition across a set of overlapping unsplittable + // partitions. + return; + } + + // If we're starting with a splittable slice, then we need to form + // a synthetic partition spanning it and any other overlapping splittable + // splices. + assert(P.SI->isSplittable() && "Forming a splittable partition!"); + + // Collect all of the overlapping splittable slices. + while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset && + P.SJ->isSplittable()) { + P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset()); + ++P.SJ; + } + + // Back upiP.EndOffset if we ended the span early when encountering an + // unsplittable slice. This synthesizes the early end offset of + // a partition spanning only splittable slices. + if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) { + assert(!P.SJ->isSplittable()); + P.EndOffset = P.SJ->beginOffset(); + } + } + +public: + bool operator==(const partition_iterator &RHS) const { + assert(SE == RHS.SE && + "End iterators don't match between compared partition iterators!"); + + // The observed positions of partitions is marked by the P.SI iterator and + // the emptiness of the split slices. The latter is only relevant when + // P.SI == SE, as the end iterator will additionally have an empty split + // slices list, but the prior may have the same P.SI and a tail of split + // slices. + if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) { + assert(P.SJ == RHS.P.SJ && + "Same set of slices formed two different sized partitions!"); + assert(P.SplitTails.size() == RHS.P.SplitTails.size() && + "Same slice position with differently sized non-empty split " + "slice tails!"); + return true; + } + return false; + } + + partition_iterator &operator++() { + advance(); + return *this; + } + + Partition &operator*() { return P; } +}; + +/// \brief A forward range over the partitions of the alloca's slices. +/// +/// This accesses an iterator range over the partitions of the alloca's +/// slices. It computes these partitions on the fly based on the overlapping +/// offsets of the slices and the ability to split them. It will visit "empty" +/// partitions to cover regions of the alloca only accessed via split +/// slices. +iterator_range AllocaSlices::partitions() { + return make_range(partition_iterator(begin(), end()), + partition_iterator(end(), end())); } static Value *foldSelectInst(SelectInst &SI) { @@ -1072,217 +1068,6 @@ LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); } #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -namespace { -/// \brief Implementation of LoadAndStorePromoter for promoting allocas. -/// -/// This subclass of LoadAndStorePromoter adds overrides to handle promoting -/// the loads and stores of an alloca instruction, as well as updating its -/// debug information. This is used when a domtree is unavailable and thus -/// mem2reg in its full form can't be used to handle promotion of allocas to -/// scalar values. -class AllocaPromoter : public LoadAndStorePromoter { - AllocaInst &AI; - DIBuilder &DIB; - - SmallVector DDIs; - SmallVector DVIs; - -public: - AllocaPromoter(ArrayRef Insts, - SSAUpdater &S, - AllocaInst &AI, DIBuilder &DIB) - : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {} - - void run(const SmallVectorImpl &Insts) { - // Retain the debug information attached to the alloca for use when - // rewriting loads and stores. - if (auto *L = LocalAsMetadata::getIfExists(&AI)) { - if (auto *DINode = MetadataAsValue::getIfExists(AI.getContext(), L)) { - for (User *U : DINode->users()) - if (DbgDeclareInst *DDI = dyn_cast(U)) - DDIs.push_back(DDI); - else if (DbgValueInst *DVI = dyn_cast(U)) - DVIs.push_back(DVI); - } - } - - LoadAndStorePromoter::run(Insts); - - // While we have the debug information, clear it off of the alloca. The - // caller takes care of deleting the alloca. - while (!DDIs.empty()) - DDIs.pop_back_val()->eraseFromParent(); - while (!DVIs.empty()) - DVIs.pop_back_val()->eraseFromParent(); - } - - bool - isInstInList(Instruction *I, - const SmallVectorImpl &Insts) const override { - Value *Ptr; - if (LoadInst *LI = dyn_cast(I)) - Ptr = LI->getOperand(0); - else - Ptr = cast(I)->getPointerOperand(); - - // Only used to detect cycles, which will be rare and quickly found as - // we're walking up a chain of defs rather than down through uses. - SmallPtrSet Visited; - - do { - if (Ptr == &AI) - return true; - - if (BitCastInst *BCI = dyn_cast(Ptr)) - Ptr = BCI->getOperand(0); - else if (GetElementPtrInst *GEPI = dyn_cast(Ptr)) - Ptr = GEPI->getPointerOperand(); - else - return false; - - } while (Visited.insert(Ptr).second); - - return false; - } - - void updateDebugInfo(Instruction *Inst) const override { - for (DbgDeclareInst *DDI : DDIs) - if (StoreInst *SI = dyn_cast(Inst)) - ConvertDebugDeclareToDebugValue(DDI, SI, DIB); - else if (LoadInst *LI = dyn_cast(Inst)) - ConvertDebugDeclareToDebugValue(DDI, LI, DIB); - for (DbgValueInst *DVI : DVIs) { - Value *Arg = nullptr; - if (StoreInst *SI = dyn_cast(Inst)) { - // If an argument is zero extended then use argument directly. The ZExt - // may be zapped by an optimization pass in future. - if (ZExtInst *ZExt = dyn_cast(SI->getOperand(0))) - Arg = dyn_cast(ZExt->getOperand(0)); - else if (SExtInst *SExt = dyn_cast(SI->getOperand(0))) - Arg = dyn_cast(SExt->getOperand(0)); - if (!Arg) - Arg = SI->getValueOperand(); - } else if (LoadInst *LI = dyn_cast(Inst)) { - Arg = LI->getPointerOperand(); - } else { - continue; - } - DIB.insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(), - DVI->getExpression(), DVI->getDebugLoc(), - Inst); - } - } -}; -} // end anon namespace - -namespace { -/// \brief An optimization pass providing Scalar Replacement of Aggregates. -/// -/// This pass takes allocations which can be completely analyzed (that is, they -/// don't escape) and tries to turn them into scalar SSA values. There are -/// a few steps to this process. -/// -/// 1) It takes allocations of aggregates and analyzes the ways in which they -/// are used to try to split them into smaller allocations, ideally of -/// a single scalar data type. It will split up memcpy and memset accesses -/// as necessary and try to isolate individual scalar accesses. -/// 2) It will transform accesses into forms which are suitable for SSA value -/// promotion. This can be replacing a memset with a scalar store of an -/// integer value, or it can involve speculating operations on a PHI or -/// select to be a PHI or select of the results. -/// 3) Finally, this will try to detect a pattern of accesses which map cleanly -/// onto insert and extract operations on a vector value, and convert them to -/// this form. By doing so, it will enable promotion of vector aggregates to -/// SSA vector values. -class SROA : public FunctionPass { - const bool RequiresDomTree; - - LLVMContext *C; - DominatorTree *DT; - AssumptionCache *AC; - - /// \brief Worklist of alloca instructions to simplify. - /// - /// Each alloca in the function is added to this. Each new alloca formed gets - /// added to it as well to recursively simplify unless that alloca can be - /// directly promoted. Finally, each time we rewrite a use of an alloca other - /// the one being actively rewritten, we add it back onto the list if not - /// already present to ensure it is re-visited. - SetVector> Worklist; - - /// \brief A collection of instructions to delete. - /// We try to batch deletions to simplify code and make things a bit more - /// efficient. - SetVector> DeadInsts; - - /// \brief Post-promotion worklist. - /// - /// Sometimes we discover an alloca which has a high probability of becoming - /// viable for SROA after a round of promotion takes place. In those cases, - /// the alloca is enqueued here for re-processing. - /// - /// Note that we have to be very careful to clear allocas out of this list in - /// the event they are deleted. - SetVector> PostPromotionWorklist; - - /// \brief A collection of alloca instructions we can directly promote. - std::vector PromotableAllocas; - - /// \brief A worklist of PHIs to speculate prior to promoting allocas. - /// - /// All of these PHIs have been checked for the safety of speculation and by - /// being speculated will allow promoting allocas currently in the promotable - /// queue. - SetVector> SpeculatablePHIs; - - /// \brief A worklist of select instructions to speculate prior to promoting - /// allocas. - /// - /// All of these select instructions have been checked for the safety of - /// speculation and by being speculated will allow promoting allocas - /// currently in the promotable queue. - SetVector> SpeculatableSelects; - -public: - SROA(bool RequiresDomTree = true) - : FunctionPass(ID), RequiresDomTree(RequiresDomTree), C(nullptr), - DT(nullptr) { - initializeSROAPass(*PassRegistry::getPassRegistry()); - } - bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - - const char *getPassName() const override { return "SROA"; } - static char ID; - -private: - friend class PHIOrSelectSpeculator; - friend class AllocaSliceRewriter; - - bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS); - AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, - AllocaSlices::Partition &P); - bool splitAlloca(AllocaInst &AI, AllocaSlices &AS); - bool runOnAlloca(AllocaInst &AI); - void clobberUse(Use &U); - void deleteDeadInstructions(SmallPtrSetImpl &DeletedAllocas); - bool promoteAllocas(Function &F); -}; -} - -char SROA::ID = 0; - -FunctionPass *llvm::createSROAPass(bool RequiresDomTree) { - return new SROA(RequiresDomTree); -} - -INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates", false, - false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates", false, - false) - /// Walk the range of a partitioning looking for a common type to cover this /// sequence of slices. static Type *findCommonType(AllocaSlices::const_iterator B, @@ -1373,7 +1158,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) { // Ensure that there are no instructions between the PHI and the load that // could store. - for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI) + for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI) if (BBI->mayWriteToMemory()) return false; @@ -1934,10 +1719,10 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, /// \brief Test whether the given slice use can be promoted to a vector. /// -/// This function is called to test each entry in a partioning which is slated +/// This function is called to test each entry in a partition which is slated /// for a single slice. -static bool isVectorPromotionViableForSlice(AllocaSlices::Partition &P, - const Slice &S, VectorType *Ty, +static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, + VectorType *Ty, uint64_t ElementSize, const DataLayout &DL) { // First validate the slice offsets. @@ -2012,8 +1797,7 @@ static bool isVectorPromotionViableForSlice(AllocaSlices::Partition &P, /// SSA value. We only can ensure this for a limited set of operations, and we /// don't want to do the rewrites unless we are confident that the result will /// be promotable, so we have an early test here. -static VectorType *isVectorPromotionViable(AllocaSlices::Partition &P, - const DataLayout &DL) { +static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { // Collect the candidate types for vector-based promotion. Also track whether // we have different element types. SmallVector CandidateTys; @@ -2130,7 +1914,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t RelEnd = S.endOffset() - AllocBeginOffset; // We can't reasonably handle cases where the load or store extends past - // the end of the aloca's type and into its padding. + // the end of the alloca's type and into its padding. if (RelEnd > Size) return false; @@ -2199,7 +1983,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S, /// This is a quick test to check whether we can rewrite the integer loads and /// stores to a particular alloca into wider loads and stores and be able to /// promote the resulting alloca. -static bool isIntegerWideningViable(AllocaSlices::Partition &P, Type *AllocaTy, +static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL) { uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy); // Don't create integer types larger than the maximum bitwidth. @@ -2368,14 +2152,14 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, return V; } -namespace { /// \brief Visitor to rewrite instructions using p particular slice of an alloca /// to use a new alloca. /// /// Also implements the rewriting to vector-based accesses when the partition /// passes the isVectorPromotionViable predicate. Most of the rewriting logic /// lives here. -class AllocaSliceRewriter : public InstVisitor { +class llvm::sroa::AllocaSliceRewriter + : public InstVisitor { // Befriend the base class so it can delegate to private visit methods. friend class llvm::InstVisitor; typedef llvm::InstVisitor Base; @@ -2583,9 +2367,19 @@ private: V = convertValue(DL, IRB, V, IntTy); assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; - if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) - V = extractInteger(DL, IRB, V, cast(LI.getType()), Offset, - "extract"); + if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) { + IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8); + V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract"); + } + // It is possible that the extracted type is not the load type. This + // happens if there is a load past the end of the alloca, and as + // a consequence the slice is narrower but still a candidate for integer + // lowering. To handle this case, we just zero extend the extracted + // integer. + assert(cast(LI.getType())->getBitWidth() >= SliceSize * 8 && + "Can only handle an extract for an overly wide load"); + if (cast(LI.getType())->getBitWidth() > SliceSize * 8) + V = IRB.CreateZExt(V, LI.getType()); return V; } @@ -2648,7 +2442,7 @@ private: DL.getTypeStoreSizeInBits(LI.getType()) && "Non-byte-multiple bit width"); // Move the insertion point just past the load so that we can refer to it. - IRB.SetInsertPoint(std::next(BasicBlock::iterator(&LI))); + IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(&LI))); // Create a placeholder value with the same type as LI to use as the // basis for the new value. This allows us to replace the uses of LI with // the computed value, and then replace the placeholder with LI, leaving @@ -3126,7 +2920,7 @@ private: // dominate the PHI. IRBuilderTy PtrBuilder(IRB); if (isa(OldPtr)) - PtrBuilder.SetInsertPoint(OldPtr->getParent()->getFirstInsertionPt()); + PtrBuilder.SetInsertPoint(&*OldPtr->getParent()->getFirstInsertionPt()); else PtrBuilder.SetInsertPoint(OldPtr); PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc()); @@ -3169,7 +2963,6 @@ private: return true; } }; -} namespace { /// \brief Visitor to rewrite aggregate loads and stores as scalar. @@ -3181,8 +2974,6 @@ class AggLoadStoreRewriter : public InstVisitor { // Befriend the base class so it can delegate to private visit methods. friend class llvm::InstVisitor; - const DataLayout &DL; - /// Queue of pointer uses to analyze and potentially rewrite. SmallVector Queue; @@ -3194,8 +2985,6 @@ class AggLoadStoreRewriter : public InstVisitor { Use *U; public: - AggLoadStoreRewriter(const DataLayout &DL) : DL(DL) {} - /// Rewrite loads and stores through a pointer and all pointers derived from /// it. bool rewrite(Instruction &I) { @@ -3711,7 +3500,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { return true; }), Stores.end()); - // Now we have to go *back* through all te stores, because a later store may + // Now we have to go *back* through all the stores, because a later store may // have caused an earlier store's load to become unsplittable and if it is // unsplittable for the later store, then we can't rely on it being split in // the earlier store either. @@ -3773,7 +3562,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { "Cannot represent alloca access size using 64-bit integers!"); Instruction *BasePtr = cast(LI->getPointerOperand()); - IRB.SetInsertPoint(BasicBlock::iterator(LI)); + IRB.SetInsertPoint(LI); DEBUG(dbgs() << " Splitting load: " << *LI << "\n"); @@ -3825,7 +3614,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { } Value *StoreBasePtr = SI->getPointerOperand(); - IRB.SetInsertPoint(BasicBlock::iterator(SI)); + IRB.SetInsertPoint(SI); DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n"); @@ -3914,7 +3703,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { if (SplitLoads) { PLoad = (*SplitLoads)[Idx]; } else { - IRB.SetInsertPoint(BasicBlock::iterator(LI)); + IRB.SetInsertPoint(LI); PLoad = IRB.CreateAlignedLoad( getAdjustedPtr(IRB, DL, LoadBasePtr, APInt(DL.getPointerSizeInBits(), PartOffset), @@ -3924,7 +3713,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { } // And store this partition. - IRB.SetInsertPoint(BasicBlock::iterator(SI)); + IRB.SetInsertPoint(SI); StoreInst *PStore = IRB.CreateAlignedStore( PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, APInt(DL.getPointerSizeInBits(), PartOffset), @@ -3972,7 +3761,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { // Mark the original store as dead now that we've split it up and kill its // slice. Note that we leave the original load in place unless this store - // was its ownly use. It may in turn be split up if it is an alloca load + // was its only use. It may in turn be split up if it is an alloca load // for some other alloca, but it may be a normal load. This may introduce // redundant loads, but where those can be merged the rest of the optimizer // should handle the merging, and this uncovers SSA splits which is more @@ -4024,7 +3813,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { /// at enabling promotion and if it was successful queues the alloca to be /// promoted. AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, - AllocaSlices::Partition &P) { + Partition &P) { // Try to compute a friendly type for this partition of the alloca. This // won't always succeed, in which case we fall back to a legal integer type // or an i8 array of an appropriate size. @@ -4230,12 +4019,11 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { std::max(NumPartitions, MaxPartitionsPerAlloca); // Migrate debug information from the old alloca to the new alloca(s) - // and the individial partitions. + // and the individual partitions. if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(&AI)) { auto *Var = DbgDecl->getVariable(); auto *Expr = DbgDecl->getExpression(); - DIBuilder DIB(*AI.getParent()->getParent()->getParent(), - /*AllowUnresolved*/ false); + DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false); bool IsSplit = Pieces.size() > 1; for (auto Piece : Pieces) { // Create a piece expression describing the new partition or reuse AI's @@ -4308,7 +4096,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) { // First, split any FCA loads and stores touching this alloca to promote // better splitting and promotion opportunities. - AggLoadStoreRewriter AggRewriter(DL); + AggLoadStoreRewriter AggRewriter; Changed |= AggRewriter.rewrite(AI); // Build the slices using a recursive instruction-visiting builder. @@ -4388,107 +4176,29 @@ void SROA::deleteDeadInstructions( } } -static void enqueueUsersInWorklist(Instruction &I, - SmallVectorImpl &Worklist, - SmallPtrSetImpl &Visited) { - for (User *U : I.users()) - if (Visited.insert(cast(U)).second) - Worklist.push_back(cast(U)); -} - /// \brief Promote the allocas, using the best available technique. /// /// This attempts to promote whatever allocas have been identified as viable in /// the PromotableAllocas list. If that list is empty, there is nothing to do. -/// If there is a domtree available, we attempt to promote using the full power -/// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is -/// based on the SSAUpdater utilities. This function returns whether any -/// promotion occurred. +/// This function returns whether any promotion occurred. bool SROA::promoteAllocas(Function &F) { if (PromotableAllocas.empty()) return false; NumPromoted += PromotableAllocas.size(); - if (DT && !ForceSSAUpdater) { - DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); - PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC); - PromotableAllocas.clear(); - return true; - } - - DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n"); - SSAUpdater SSA; - DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false); - SmallVector Insts; - - // We need a worklist to walk the uses of each alloca. - SmallVector Worklist; - SmallPtrSet Visited; - SmallVector DeadInsts; - - for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) { - AllocaInst *AI = PromotableAllocas[Idx]; - Insts.clear(); - Worklist.clear(); - Visited.clear(); - - enqueueUsersInWorklist(*AI, Worklist, Visited); - - while (!Worklist.empty()) { - Instruction *I = Worklist.pop_back_val(); - - // FIXME: Currently the SSAUpdater infrastructure doesn't reason about - // lifetime intrinsics and so we strip them (and the bitcasts+GEPs - // leading to them) here. Eventually it should use them to optimize the - // scalar values produced. - if (IntrinsicInst *II = dyn_cast(I)) { - assert(II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end); - II->eraseFromParent(); - continue; - } - - // Push the loads and stores we find onto the list. SROA will already - // have validated that all loads and stores are viable candidates for - // promotion. - if (LoadInst *LI = dyn_cast(I)) { - assert(LI->getType() == AI->getAllocatedType()); - Insts.push_back(LI); - continue; - } - if (StoreInst *SI = dyn_cast(I)) { - assert(SI->getValueOperand()->getType() == AI->getAllocatedType()); - Insts.push_back(SI); - continue; - } - - // For everything else, we know that only no-op bitcasts and GEPs will - // make it this far, just recurse through them and recall them for later - // removal. - DeadInsts.push_back(I); - enqueueUsersInWorklist(*I, Worklist, Visited); - } - AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts); - while (!DeadInsts.empty()) - DeadInsts.pop_back_val()->eraseFromParent(); - AI->eraseFromParent(); - } - + DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); + PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC); PromotableAllocas.clear(); return true; } -bool SROA::runOnFunction(Function &F) { - if (skipOptnoneFunction(F)) - return false; - +PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT, + AssumptionCache &RunAC) { DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); C = &F.getContext(); - DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable(); - DT = DTWP ? &DTWP->getDomTree() : nullptr; - AC = &getAnalysis().getAssumptionCache(F); + DT = &RunDT; + AC = &RunAC; BasicBlock &EntryBB = F.getEntryBlock(); for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end()); @@ -4527,12 +4237,55 @@ bool SROA::runOnFunction(Function &F) { PostPromotionWorklist.clear(); } while (!Worklist.empty()); - return Changed; + // FIXME: Even when promoting allocas we should preserve some abstract set of + // CFG-specific analyses. + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } -void SROA::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - if (RequiresDomTree) - AU.addRequired(); - AU.setPreservesCFG(); +PreservedAnalyses SROA::run(Function &F, AnalysisManager *AM) { + return runImpl(F, AM->getResult(F), + AM->getResult(F)); } + +/// A legacy pass for the legacy pass manager that wraps the \c SROA pass. +/// +/// This is in the llvm namespace purely to allow it to be a friend of the \c +/// SROA pass. +class llvm::sroa::SROALegacyPass : public FunctionPass { + /// The SROA implementation. + SROA Impl; + +public: + SROALegacyPass() : FunctionPass(ID) { + initializeSROALegacyPassPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override { + if (skipOptnoneFunction(F)) + return false; + + auto PA = Impl.runImpl( + F, getAnalysis().getDomTree(), + getAnalysis().getAssumptionCache(F)); + return !PA.areAllPreserved(); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.setPreservesCFG(); + } + + const char *getPassName() const override { return "SROA"; } + static char ID; +}; + +char SROALegacyPass::ID = 0; + +FunctionPass *llvm::createSROAPass() { return new SROALegacyPass(); } + +INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa", + "Scalar Replacement Of Aggregates", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates", + false, false) diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index d5d360571f88..52d477cc9573 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -16,7 +16,10 @@ #include "llvm/Transforms/Scalar.h" #include "llvm-c/Initialization.h" #include "llvm-c/Transforms/Scalar.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" @@ -27,10 +30,9 @@ using namespace llvm; /// initializeScalarOptsPasses - Initialize all passes linked into the /// ScalarOpts library. void llvm::initializeScalarOpts(PassRegistry &Registry) { - initializeADCEPass(Registry); + initializeADCELegacyPassPass(Registry); initializeBDCEPass(Registry); initializeAlignmentFromAssumptionsPass(Registry); - initializeSampleProfileLoaderPass(Registry); initializeConstantHoistingPass(Registry); initializeConstantPropagationPass(Registry); initializeCorrelatedValuePropagationPass(Registry); @@ -66,7 +68,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeRewriteStatepointsForGCPass(Registry); initializeSCCPPass(Registry); initializeIPSCCPPass(Registry); - initializeSROAPass(Registry); + initializeSROALegacyPassPass(Registry); initializeSROA_DTPass(Registry); initializeSROA_SSAUpPass(Registry); initializeCFGSimplifyPassPass(Registry); @@ -81,6 +83,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializePlaceSafepointsPass(Registry); initializeFloat2IntPass(Registry); initializeLoopDistributePass(Registry); + initializeLoopLoadEliminationPass(Registry); } void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { @@ -225,15 +228,15 @@ void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) { } void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createTypeBasedAliasAnalysisPass()); + unwrap(PM)->add(createTypeBasedAAWrapperPass()); } void LLVMAddScopedNoAliasAAPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createScopedNoAliasAAPass()); + unwrap(PM)->add(createScopedNoAliasAAWrapperPass()); } void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createBasicAliasAnalysisPass()); + unwrap(PM)->add(createBasicAAWrapperPass()); } void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) { diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index d955da7ce75d..114d22ddf2e4 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -60,6 +60,7 @@ STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion"); STATISTIC(NumConverted, "Number of aggregates converted to scalar"); namespace { +#define SROA SROA_ struct SROA : public FunctionPass { SROA(int T, bool hasDT, char &ID, int ST, int AT, int SLT) : FunctionPass(ID), HasDomTree(hasDT) { @@ -382,8 +383,8 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // Create and insert the integer alloca. NewTy = IntegerType::get(AI->getContext(), BitWidth); } - AllocaInst *NewAI = new AllocaInst(NewTy, nullptr, "", - AI->getParent()->begin()); + AllocaInst *NewAI = + new AllocaInst(NewTy, nullptr, "", &AI->getParent()->front()); ConvertUsesToScalar(AI, NewAI, 0, nullptr); return NewAI; } @@ -1195,7 +1196,7 @@ static bool isSafePHIToSpeculate(PHINode *PN) { // Ensure that there are no instructions between the PHI and the load that // could store. - for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI) + for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI) if (BBI->mayWriteToMemory()) return false; diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp index 049300350857..054bacdc706b 100644 --- a/lib/Transforms/Scalar/Scalarizer.cpp +++ b/lib/Transforms/Scalar/Scalarizer.cpp @@ -253,10 +253,10 @@ bool Scalarizer::doInitialization(Module &M) { } bool Scalarizer::runOnFunction(Function &F) { - for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { - BasicBlock *BB = BBI; - for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { - Instruction *I = II; + assert(Gathered.empty() && Scattered.empty()); + for (BasicBlock &BB : F) { + for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE;) { + Instruction *I = &*II; bool Done = visit(I); ++II; if (Done && I->getType()->isVoidTy()) @@ -285,7 +285,7 @@ Scatterer Scalarizer::scatter(Instruction *Point, Value *V) { } // In the fallback case, just put the scattered before Point and // keep the result local to Point. - return Scatterer(Point->getParent(), Point, V); + return Scatterer(Point->getParent(), Point->getIterator(), V); } // Replace Op with the gathered form of the components in CV. Defer the @@ -377,7 +377,7 @@ bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) { return false; unsigned NumElems = VT->getNumElements(); - IRBuilder<> Builder(I.getParent(), &I); + IRBuilder<> Builder(&I); Scatterer Op0 = scatter(&I, I.getOperand(0)); Scatterer Op1 = scatter(&I, I.getOperand(1)); assert(Op0.size() == NumElems && "Mismatched binary operation"); @@ -397,7 +397,7 @@ bool Scalarizer::visitSelectInst(SelectInst &SI) { return false; unsigned NumElems = VT->getNumElements(); - IRBuilder<> Builder(SI.getParent(), &SI); + IRBuilder<> Builder(&SI); Scatterer Op1 = scatter(&SI, SI.getOperand(1)); Scatterer Op2 = scatter(&SI, SI.getOperand(2)); assert(Op1.size() == NumElems && "Mismatched select"); @@ -438,7 +438,7 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) { if (!VT) return false; - IRBuilder<> Builder(GEPI.getParent(), &GEPI); + IRBuilder<> Builder(&GEPI); unsigned NumElems = VT->getNumElements(); unsigned NumIndices = GEPI.getNumIndices(); @@ -472,7 +472,7 @@ bool Scalarizer::visitCastInst(CastInst &CI) { return false; unsigned NumElems = VT->getNumElements(); - IRBuilder<> Builder(CI.getParent(), &CI); + IRBuilder<> Builder(&CI); Scatterer Op0 = scatter(&CI, CI.getOperand(0)); assert(Op0.size() == NumElems && "Mismatched cast"); ValueVector Res; @@ -492,7 +492,7 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) { unsigned DstNumElems = DstVT->getNumElements(); unsigned SrcNumElems = SrcVT->getNumElements(); - IRBuilder<> Builder(BCI.getParent(), &BCI); + IRBuilder<> Builder(&BCI); Scatterer Op0 = scatter(&BCI, BCI.getOperand(0)); ValueVector Res; Res.resize(DstNumElems); @@ -569,7 +569,7 @@ bool Scalarizer::visitPHINode(PHINode &PHI) { return false; unsigned NumElems = VT->getNumElements(); - IRBuilder<> Builder(PHI.getParent(), &PHI); + IRBuilder<> Builder(&PHI); ValueVector Res; Res.resize(NumElems); @@ -600,7 +600,7 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) { return false; unsigned NumElems = Layout.VecTy->getNumElements(); - IRBuilder<> Builder(LI.getParent(), &LI); + IRBuilder<> Builder(&LI); Scatterer Ptr = scatter(&LI, LI.getPointerOperand()); ValueVector Res; Res.resize(NumElems); @@ -625,7 +625,7 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) { return false; unsigned NumElems = Layout.VecTy->getNumElements(); - IRBuilder<> Builder(SI.getParent(), &SI); + IRBuilder<> Builder(&SI); Scatterer Ptr = scatter(&SI, SI.getPointerOperand()); Scatterer Val = scatter(&SI, FullValue); @@ -642,7 +642,9 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) { // Delete the instructions that we scalarized. If a full vector result // is still needed, recreate it using InsertElements. bool Scalarizer::finish() { - if (Gathered.empty()) + // The presence of data in Gathered or Scattered indicates changes + // made to the Function. + if (Gathered.empty() && Scattered.empty()) return false; for (GatherList::iterator GMI = Gathered.begin(), GME = Gathered.end(); GMI != GME; ++GMI) { @@ -655,7 +657,7 @@ bool Scalarizer::finish() { Value *Res = UndefValue::get(Ty); BasicBlock *BB = Op->getParent(); unsigned Count = Ty->getVectorNumElements(); - IRBuilder<> Builder(BB, Op); + IRBuilder<> Builder(Op); if (isa(Op)) Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); for (unsigned I = 0; I < Count; ++I) diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 4a875311881a..86a10d2a1612 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -156,6 +156,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" @@ -164,6 +168,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Operator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -174,6 +179,7 @@ #include "llvm/IR/IRBuilder.h" using namespace llvm; +using namespace llvm::PatternMatch; static cl::opt DisableSeparateConstOffsetFromGEP( "disable-separate-const-offset-from-gep", cl::init(false), @@ -319,8 +325,11 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.setPreservesCFG(); + AU.addRequired(); } bool doInitialization(Module &M) override { @@ -373,15 +382,42 @@ private: /// /// Verified in @i32_add in split-gep.ll bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP); + /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow. + /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting + /// the constant offset. After extraction, it becomes desirable to reunion the + /// distributed sexts. For example, + /// + /// &a[sext(i +nsw (j +nsw 5)] + /// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)] + /// => constant extraction &a[sext(i) + sext(j)] + 5 + /// => reunion &a[sext(i +nsw j)] + 5 + bool reuniteExts(Function &F); + /// A helper that reunites sexts in an instruction. + bool reuniteExts(Instruction *I); + /// Find the closest dominator of that is equivalent to . + Instruction *findClosestMatchingDominator(const SCEV *Key, + Instruction *Dominatee); /// Verify F is free of dead code. void verifyNoDeadCode(Function &F); + bool hasMoreThanOneUseInLoop(Value *v, Loop *L); + // Swap the index operand of two GEP. + void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second); + // Check if it is safe to swap operand of two GEP. + bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second, + Loop *CurLoop); + const DataLayout *DL; - const DominatorTree *DT; + DominatorTree *DT; + ScalarEvolution *SE; const TargetMachine *TM; + + LoopInfo *LI; + TargetLibraryInfo *TLI; /// Whether to lower a GEP with multiple indices into arithmetic operations or /// multiple GEPs with a single index. bool LowerGEP; + DenseMap> DominatingExprs; }; } // anonymous namespace @@ -391,7 +427,10 @@ INITIALIZE_PASS_BEGIN( "Split GEPs to a variadic base and a constant offset for better CSE", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END( SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", "Split GEPs to a variadic base and a constant offset for better CSE", false, @@ -734,6 +773,13 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( Type *I8PtrTy = Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace()); Value *ResultPtr = Variadic->getOperand(0); + Loop *L = LI->getLoopFor(Variadic->getParent()); + // Check if the base is not loop invariant or used more than once. + bool isSwapCandidate = + L && L->isLoopInvariant(ResultPtr) && + !hasMoreThanOneUseInLoop(ResultPtr, L); + Value *FirstResult = nullptr; + if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); @@ -762,6 +808,8 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( // Create an ugly GEP with a single index for each index. ResultPtr = Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep"); + if (FirstResult == nullptr) + FirstResult = ResultPtr; } } @@ -770,7 +818,17 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset); ResultPtr = Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep"); - } + } else + isSwapCandidate = false; + + // If we created a GEP with constant index, and the base is loop invariant, + // then we swap the first one with it, so LICM can move constant GEP out + // later. + GetElementPtrInst *FirstGEP = dyn_cast(FirstResult); + GetElementPtrInst *SecondGEP = dyn_cast(ResultPtr); + if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L)) + swapGEPOperand(FirstGEP, SecondGEP); + if (ResultPtr->getType() != Variadic->getType()) ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType()); @@ -891,13 +949,13 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // Clear the inbounds attribute because the new index may be off-bound. // e.g., // - // b = add i64 a, 5 - // addr = gep inbounds float* p, i64 b + // b = add i64 a, 5 + // addr = gep inbounds float, float* p, i64 b // // is transformed to: // - // addr2 = gep float* p, i64 a - // addr = gep float* addr2, i64 5 + // addr2 = gep float, float* p, i64 a ; inbounds removed + // addr = gep inbounds float, float* addr2, i64 5 // // If a is -4, although the old index b is in bounds, the new index a is // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the @@ -907,6 +965,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // // TODO(jingyue): do some range analysis to keep as many inbounds as // possible. GEPs with inbounds are more friendly to alias analysis. + bool GEPWasInBounds = GEP->isInBounds(); GEP->setIsInBounds(false); // Lowers a GEP to either GEPs with a single index or arithmetic operations. @@ -968,6 +1027,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP); + // Inherit the inbounds attribute of the original GEP. + cast(NewGEP)->setIsInBounds(GEPWasInBounds); } else { // Unlikely but possible. For example, // #pragma pack(1) @@ -990,6 +1051,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { Type::getInt8Ty(GEP->getContext()), NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep", GEP); + // Inherit the inbounds attribute of the original GEP. + cast(NewGEP)->setIsInBounds(GEPWasInBounds); if (GEP->getType() != I8PtrTy) NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP); } @@ -1008,24 +1071,96 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) { return false; DT = &getAnalysis().getDomTree(); - + SE = &getAnalysis().getSE(); + LI = &getAnalysis().getLoopInfo(); + TLI = &getAnalysis().getTLI(); bool Changed = false; for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) { - for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) { - if (GetElementPtrInst *GEP = dyn_cast(I++)) { + for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE;) + if (GetElementPtrInst *GEP = dyn_cast(I++)) Changed |= splitGEP(GEP); - } - // No need to split GEP ConstantExprs because all its indices are constant - // already. - } + // No need to split GEP ConstantExprs because all its indices are constant + // already. } + Changed |= reuniteExts(F); + if (VerifyNoDeadCode) verifyNoDeadCode(F); return Changed; } +Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator( + const SCEV *Key, Instruction *Dominatee) { + auto Pos = DominatingExprs.find(Key); + if (Pos == DominatingExprs.end()) + return nullptr; + + auto &Candidates = Pos->second; + // Because we process the basic blocks in pre-order of the dominator tree, a + // candidate that doesn't dominate the current instruction won't dominate any + // future instruction either. Therefore, we pop it out of the stack. This + // optimization makes the algorithm O(n). + while (!Candidates.empty()) { + Instruction *Candidate = Candidates.back(); + if (DT->dominates(Candidate, Dominatee)) + return Candidate; + Candidates.pop_back(); + } + return nullptr; +} + +bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) { + if (!SE->isSCEVable(I->getType())) + return false; + + // Dom: LHS+RHS + // I: sext(LHS)+sext(RHS) + // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom). + // TODO: handle zext + Value *LHS = nullptr, *RHS = nullptr; + if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS)))) || + match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) { + if (LHS->getType() == RHS->getType()) { + const SCEV *Key = + SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS)); + if (auto *Dom = findClosestMatchingDominator(Key, I)) { + Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I); + NewSExt->takeName(I); + I->replaceAllUsesWith(NewSExt); + RecursivelyDeleteTriviallyDeadInstructions(I); + return true; + } + } + } + + // Add I to DominatingExprs if it's an add/sub that can't sign overflow. + if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) || + match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) { + if (isKnownNotFullPoison(I)) { + const SCEV *Key = + SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS)); + DominatingExprs[Key].push_back(I); + } + } + return false; +} + +bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) { + bool Changed = false; + DominatingExprs.clear(); + for (auto Node = GraphTraits::nodes_begin(DT); + Node != GraphTraits::nodes_end(DT); ++Node) { + BasicBlock *BB = Node->getBlock(); + for (auto I = BB->begin(); I != BB->end(); ) { + Instruction *Cur = &*I++; + Changed |= reuniteExts(Cur); + } + } + return Changed; +} + void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) { for (auto &B : F) { for (auto &I : B) { @@ -1038,3 +1173,93 @@ void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) { } } } + +bool SeparateConstOffsetFromGEP::isLegalToSwapOperand( + GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) { + if (!FirstGEP || !FirstGEP->hasOneUse()) + return false; + + if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent()) + return false; + + if (FirstGEP == SecondGEP) + return false; + + unsigned FirstNum = FirstGEP->getNumOperands(); + unsigned SecondNum = SecondGEP->getNumOperands(); + // Give up if the number of operands are not 2. + if (FirstNum != SecondNum || FirstNum != 2) + return false; + + Value *FirstBase = FirstGEP->getOperand(0); + Value *SecondBase = SecondGEP->getOperand(0); + Value *FirstOffset = FirstGEP->getOperand(1); + // Give up if the index of the first GEP is loop invariant. + if (CurLoop->isLoopInvariant(FirstOffset)) + return false; + + // Give up if base doesn't have same type. + if (FirstBase->getType() != SecondBase->getType()) + return false; + + Instruction *FirstOffsetDef = dyn_cast(FirstOffset); + + // Check if the second operand of first GEP has constant coefficient. + // For an example, for the following code, we won't gain anything by + // hoisting the second GEP out because the second GEP can be folded away. + // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256 + // %67 = shl i64 %scevgep.sum.ur159, 2 + // %uglygep160 = getelementptr i8* %65, i64 %67 + // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024 + + // Skip constant shift instruction which may be generated by Splitting GEPs. + if (FirstOffsetDef && FirstOffsetDef->isShift() && + isa(FirstOffsetDef->getOperand(1))) + FirstOffsetDef = dyn_cast(FirstOffsetDef->getOperand(0)); + + // Give up if FirstOffsetDef is an Add or Sub with constant. + // Because it may not profitable at all due to constant folding. + if (FirstOffsetDef) + if (BinaryOperator *BO = dyn_cast(FirstOffsetDef)) { + unsigned opc = BO->getOpcode(); + if ((opc == Instruction::Add || opc == Instruction::Sub) && + (isa(BO->getOperand(0)) || + isa(BO->getOperand(1)))) + return false; + } + return true; +} + +bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) { + int UsesInLoop = 0; + for (User *U : V->users()) { + if (Instruction *User = dyn_cast(U)) + if (L->contains(User)) + if (++UsesInLoop > 1) + return true; + } + return false; +} + +void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First, + GetElementPtrInst *Second) { + Value *Offset1 = First->getOperand(1); + Value *Offset2 = Second->getOperand(1); + First->setOperand(1, Offset2); + Second->setOperand(1, Offset1); + + // We changed p+o+c to p+c+o, p+c may not be inbound anymore. + const DataLayout &DAL = First->getModule()->getDataLayout(); + APInt Offset(DAL.getPointerSizeInBits( + cast(First->getType())->getAddressSpace()), + 0); + Value *NewBase = + First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset); + uint64_t ObjectSize; + if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) || + Offset.ugt(ObjectSize)) { + First->setIsInBounds(false); + Second->setIsInBounds(false); + } else + First->setIsInBounds(true); +} diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 231411a16c05..63c8836bf381 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Attributes.h" @@ -67,15 +68,14 @@ static bool mergeEmptyReturnBlocks(Function &F) { // single PHI node that is the operand to the return. if (Ret != &BB.front()) { // Check for something else in the block. - BasicBlock::iterator I = Ret; + BasicBlock::iterator I(Ret); --I; // Skip over debug info. while (isa(I) && I != BB.begin()) --I; if (!isa(I) && - (!isa(I) || I != BB.begin() || - Ret->getNumOperands() == 0 || - Ret->getOperand(0) != I)) + (!isa(I) || I != BB.begin() || Ret->getNumOperands() == 0 || + Ret->getOperand(0) != &*I)) continue; } @@ -136,7 +136,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, // Loop over all of the basic blocks and remove them if they are unneeded. for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { - if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC)) { + if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC)) { LocalChange = true; ++NumSimpl; } @@ -217,6 +217,7 @@ struct CFGSimplifyPass : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addPreserved(); } }; } diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index f49f4eaaedcb..64109b2df117 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -48,7 +48,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); FunctionPass::getAnalysisUsage(AU); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -66,7 +66,7 @@ char Sinking::ID = 0; INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false) FunctionPass *llvm::createSinkingPass() { return new Sinking(); } @@ -99,7 +99,7 @@ bool Sinking::AllUsesDominatedByBlock(Instruction *Inst, bool Sinking::runOnFunction(Function &F) { DT = &getAnalysis().getDomTree(); LI = &getAnalysis().getLoopInfo(); - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); bool MadeChange, EverMadeChange = false; @@ -119,7 +119,7 @@ bool Sinking::runOnFunction(Function &F) { bool Sinking::ProcessBlock(BasicBlock &BB) { // Can't sink anything out of a block that has less than two successors. - if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false; + if (BB.getTerminator()->getNumSuccessors() <= 1) return false; // Don't bother sinking code out of unreachable blocks. In addition to being // unprofitable, it can also lead to infinite looping, because in an @@ -134,7 +134,7 @@ bool Sinking::ProcessBlock(BasicBlock &BB) { bool ProcessedBegin = false; SmallPtrSet Stores; do { - Instruction *Inst = I; // The instruction to sink. + Instruction *Inst = &*I; // The instruction to sink. // Predecrement I (if it's not begin) so that it isn't invalidated by // sinking. @@ -165,14 +165,16 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA, if (LoadInst *L = dyn_cast(Inst)) { MemoryLocation Loc = MemoryLocation::get(L); for (Instruction *S : Stores) - if (AA->getModRefInfo(S, Loc) & AliasAnalysis::Mod) + if (AA->getModRefInfo(S, Loc) & MRI_Mod) return false; } - if (isa(Inst) || isa(Inst)) + if (isa(Inst) || isa(Inst) || Inst->isEHPad() || + Inst->mayThrow()) return false; - // Convergent operations can only be moved to control equivalent blocks. + // Convergent operations cannot be made control-dependent on additional + // values. if (auto CS = CallSite(Inst)) { if (CS.hasFnAttr(Attribute::Convergent)) return false; @@ -193,6 +195,11 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst, if (Inst->getParent() == SuccToSinkTo) return false; + // It's never legal to sink an instruction into a block which terminates in an + // EH-pad. + if (SuccToSinkTo->getTerminator()->isExceptional()) + return false; + // If the block has multiple predecessors, this would introduce computation // on different code paths. We could split the critical edge, but for now we // just punt. @@ -278,6 +285,6 @@ bool Sinking::SinkInstruction(Instruction *Inst, dbgs() << ")\n"); // Move the instruction. - Inst->moveBefore(SuccToSinkTo->getFirstInsertionPt()); + Inst->moveBefore(&*SuccToSinkTo->getFirstInsertionPt()); return true; } diff --git a/lib/Transforms/Scalar/SpeculativeExecution.cpp b/lib/Transforms/Scalar/SpeculativeExecution.cpp index ff3f00a2e2f8..147d615488ff 100644 --- a/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -227,7 +227,7 @@ bool SpeculativeExecution::considerHoistingFromTo(BasicBlock &FromBlock, // changes the list that I is iterating through. auto Current = I; ++I; - if (!NotHoisted.count(Current)) { + if (!NotHoisted.count(&*Current)) { Current->moveBefore(ToBlock.getTerminator()); } } diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index 6d9d417ef943..1faa65eb3417 100644 --- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -131,7 +131,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); // We do not modify the shape of the CFG. AU.setPreservesCFG(); @@ -212,7 +212,7 @@ char StraightLineStrengthReduce::ID = 0; INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr", "Straight line strength reduction", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr", "Straight line strength reduction", false, false) @@ -234,6 +234,7 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis, Basis.CandidateKind == C.CandidateKind); } +// TODO: use TTI->getGEPCost. static bool isGEPFoldable(GetElementPtrInst *GEP, const TargetTransformInfo *TTI, const DataLayout *DL) { @@ -523,7 +524,7 @@ void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP( continue; const SCEV *OrigIndexExpr = IndexExprs[I - 1]; - IndexExprs[I - 1] = SE->getConstant(OrigIndexExpr->getType(), 0); + IndexExprs[I - 1] = SE->getZero(OrigIndexExpr->getType()); // The base of this candidate is GEP's base plus the offsets of all // indices except this current one. @@ -689,7 +690,7 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) { TTI = &getAnalysis().getTTI(F); DT = &getAnalysis().getDomTree(); - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); // Traverse the dominator tree in the depth-first order. This order makes sure // all bases of a candidate are in Candidates when we process it. for (auto node = GraphTraits::nodes_begin(DT); diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index 4f23e20d251d..662513c7d8ae 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -358,13 +358,9 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) { BasicBlock *BB = N->getNodeAs(); BranchInst *Term = cast(BB->getTerminator()); - for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { - BasicBlock *Succ = Term->getSuccessor(i); - - if (Visited.count(Succ)) { + for (BasicBlock *Succ : Term->successors()) + if (Visited.count(Succ)) Loops[Succ] = BB; - } - } } } @@ -903,14 +899,14 @@ void StructurizeCFG::rebuildSSA() { continue; } - if (DT->dominates(II, User)) + if (DT->dominates(&*II, User)) continue; if (!Initialized) { Value *Undef = UndefValue::get(II->getType()); Updater.Initialize(II->getType(), ""); Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); - Updater.AddAvailableValue(BB, II); + Updater.AddAvailableValue(BB, &*II); Initialized = true; } Updater.RewriteUseAfterInsertions(U); diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index c7de2e2965c7..0e0b00df85bb 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -54,6 +54,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InlineCost.h" @@ -136,6 +137,7 @@ FunctionPass *llvm::createTailCallEliminationPass() { void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.addPreserved(); } /// \brief Scan the specified function for alloca instructions. @@ -195,8 +197,8 @@ struct AllocaDerivedValueTracker { case Instruction::Call: case Instruction::Invoke: { CallSite CS(I); - bool IsNocapture = !CS.isCallee(U) && - CS.doesNotCapture(CS.getArgumentNo(U)); + bool IsNocapture = + CS.isDataOperand(U) && CS.doesNotCapture(CS.getDataOperandNo(U)); callUsesLocalStack(CS, IsNocapture); if (IsNocapture) { // If the alloca-derived argument is passed in as nocapture, then it @@ -302,7 +304,9 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) { if (!CI || CI->isTailCall()) continue; - if (CI->doesNotAccessMemory()) { + bool IsNoTail = CI->isNoTailCall(); + + if (!IsNoTail && CI->doesNotAccessMemory()) { // A call to a readnone function whose arguments are all things computed // outside this function can be marked tail. Even if you stored the // alloca address into a global, a readnone function can't load the @@ -330,7 +334,7 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) { } } - if (Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) { + if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) { DeferredTails.push_back(CI); } else { AllCallsAreTailCalls = false; @@ -404,7 +408,7 @@ bool TailCallElim::runTRE(Function &F) { // Until this is resolved, disable this transformation if that would ever // happen. This bug is PR962. for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) { - BasicBlock *BB = BBI++; // FoldReturnAndProcessPred may delete BB. + BasicBlock *BB = &*BBI++; // FoldReturnAndProcessPred may delete BB. if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) { bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, !CanTRETailMarkedCall); @@ -574,7 +578,7 @@ TailCallElim::FindTRECandidate(Instruction *TI, // Scan backwards from the return, checking to see if there is a tail call in // this block. If so, set CI to it. CallInst *CI = nullptr; - BasicBlock::iterator BBI = TI; + BasicBlock::iterator BBI(TI); while (true) { CI = dyn_cast(BBI); if (CI && CI->getCalledFunction() == F) @@ -595,9 +599,8 @@ TailCallElim::FindTRECandidate(Instruction *TI, // and disable this xform in this case, because the code generator will // lower the call to fabs into inline code. if (BB == &F->getEntryBlock() && - FirstNonDbg(BB->front()) == CI && - FirstNonDbg(std::next(BB->begin())) == TI && - CI->getCalledFunction() && + FirstNonDbg(BB->front().getIterator()) == CI && + FirstNonDbg(std::next(BB->begin())) == TI && CI->getCalledFunction() && !TTI->isLoweredToCall(CI->getCalledFunction())) { // A single-block function with just a call and a return. Check that // the arguments match. @@ -636,19 +639,19 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, // tail call if all of the instructions between the call and the return are // movable to above the call itself, leaving the call next to the return. // Check that this is the case now. - BasicBlock::iterator BBI = CI; + BasicBlock::iterator BBI(CI); for (++BBI; &*BBI != Ret; ++BBI) { - if (CanMoveAboveCall(BBI, CI)) continue; + if (CanMoveAboveCall(&*BBI, CI)) continue; // If we can't move the instruction above the call, it might be because it // is an associative and commutative operation that could be transformed // using accumulator recursion elimination. Check to see if this is the // case, and if so, remember the initial accumulator value for later. if ((AccumulatorRecursionEliminationInitVal = - CanTransformAccumulatorRecursion(BBI, CI))) { + CanTransformAccumulatorRecursion(&*BBI, CI))) { // Yes, this is accumulator recursion. Remember which instruction // accumulates. - AccumulatorRecursionInstr = BBI; + AccumulatorRecursionInstr = &*BBI; } else { return false; // Otherwise, we cannot eliminate the tail recursion! } @@ -698,19 +701,19 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, NEBI = NewEntry->begin(); OEBI != E; ) if (AllocaInst *AI = dyn_cast(OEBI++)) if (isa(AI->getArraySize())) - AI->moveBefore(NEBI); + AI->moveBefore(&*NEBI); // Now that we have created a new block, which jumps to the entry // block, insert a PHI node for each argument of the function. // For now, we initialize each PHI to only have the real arguments // which are passed in. - Instruction *InsertPos = OldEntry->begin(); + Instruction *InsertPos = &OldEntry->front(); for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { PHINode *PN = PHINode::Create(I->getType(), 2, I->getName() + ".tr", InsertPos); I->replaceAllUsesWith(PN); // Everyone use the PHI node now! - PN->addIncoming(I, NewEntry); + PN->addIncoming(&*I, NewEntry); ArgumentPHIs.push_back(PN); } } @@ -739,10 +742,9 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, Instruction *AccRecInstr = AccumulatorRecursionInstr; // Start by inserting a new PHI node for the accumulator. pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry); - PHINode *AccPN = - PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(), - std::distance(PB, PE) + 1, - "accumulator.tr", OldEntry->begin()); + PHINode *AccPN = PHINode::Create( + AccumulatorRecursionEliminationInitVal->getType(), + std::distance(PB, PE) + 1, "accumulator.tr", &OldEntry->front()); // Loop over all of the predecessors of the tail recursion block. For the // real entry into the function we seed the PHI with the initial value, diff --git a/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/lib/Transforms/Utils/ASanStackFrameLayout.cpp index 03c3a80170a3..409326eba401 100644 --- a/lib/Transforms/Utils/ASanStackFrameLayout.cpp +++ b/lib/Transforms/Utils/ASanStackFrameLayout.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ASanStackFrameLayout.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include namespace llvm { diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp index e9f62391a44f..0262358fa3d5 100644 --- a/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/lib/Transforms/Utils/AddDiscriminators.cpp @@ -52,32 +52,34 @@ // http://wiki.dwarfstd.org/index.php?title=Path_Discriminators //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; #define DEBUG_TYPE "add-discriminators" namespace { - struct AddDiscriminators : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - AddDiscriminators() : FunctionPass(ID) { - initializeAddDiscriminatorsPass(*PassRegistry::getPassRegistry()); - } +struct AddDiscriminators : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + AddDiscriminators() : FunctionPass(ID) { + initializeAddDiscriminatorsPass(*PassRegistry::getPassRegistry()); + } - bool runOnFunction(Function &F) override; - }; + bool runOnFunction(Function &F) override; +}; } char AddDiscriminators::ID = 0; @@ -89,17 +91,17 @@ INITIALIZE_PASS_END(AddDiscriminators, "add-discriminators", // Command line option to disable discriminator generation even in the // presence of debug information. This is only needed when debugging // debug info generation issues. -static cl::opt -NoDiscriminators("no-discriminators", cl::init(false), - cl::desc("Disable generation of discriminator information.")); +static cl::opt NoDiscriminators( + "no-discriminators", cl::init(false), + cl::desc("Disable generation of discriminator information.")); FunctionPass *llvm::createAddDiscriminatorsPass() { return new AddDiscriminators(); } static bool hasDebugInfo(const Function &F) { - NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu"); - return CUNodes != nullptr; + DISubprogram *S = getDISubprogram(&F); + return S != nullptr; } /// \brief Assign DWARF discriminators. @@ -159,8 +161,7 @@ bool AddDiscriminators::runOnFunction(Function &F) { // Simlarly, if the function has no debug info, do nothing. // Finally, if this module is built with dwarf versions earlier than 4, // do nothing (discriminator support is a DWARF 4 feature). - if (NoDiscriminators || - !hasDebugInfo(F) || + if (NoDiscriminators || !hasDebugInfo(F) || F.getParent()->getDwarfVersion() < 4) return false; @@ -169,59 +170,77 @@ bool AddDiscriminators::runOnFunction(Function &F) { LLVMContext &Ctx = M->getContext(); DIBuilder Builder(*M, /*AllowUnresolved*/ false); - // Traverse all the blocks looking for instructions in different - // blocks that are at the same file:line location. - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - BasicBlock *B = I; - TerminatorInst *Last = B->getTerminator(); - const DILocation *LastDIL = Last->getDebugLoc(); - if (!LastDIL) - continue; + typedef std::pair Location; + typedef DenseMap BBScopeMap; + typedef DenseMap LocationBBMap; - for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) { - BasicBlock *Succ = Last->getSuccessor(I); - Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime(); - const DILocation *FirstDIL = First->getDebugLoc(); - if (!FirstDIL) + LocationBBMap LBM; + + // Traverse all instructions in the function. If the source line location + // of the instruction appears in other basic block, assign a new + // discriminator for this instruction. + for (BasicBlock &B : F) { + for (auto &I : B.getInstList()) { + if (isa(&I)) + continue; + const DILocation *DIL = I.getDebugLoc(); + if (!DIL) + continue; + Location L = std::make_pair(DIL->getFilename(), DIL->getLine()); + auto &BBMap = LBM[L]; + auto R = BBMap.insert(std::make_pair(&B, (Metadata *)nullptr)); + if (BBMap.size() == 1) + continue; + bool InsertSuccess = R.second; + Metadata *&NewScope = R.first->second; + // If we could insert a different block in the same location, a + // discriminator is needed to distinguish both instructions. + if (InsertSuccess) { + auto *Scope = DIL->getScope(); + auto *File = + Builder.createFile(DIL->getFilename(), Scope->getDirectory()); + NewScope = Builder.createLexicalBlockFile( + Scope, File, DIL->computeNewDiscriminator()); + } + I.setDebugLoc(DILocation::get(Ctx, DIL->getLine(), DIL->getColumn(), + NewScope, DIL->getInlinedAt())); + DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn() << ":" + << dyn_cast(NewScope)->getDiscriminator() + << I << "\n"); + Changed = true; + } + } + + // Traverse all instructions and assign new discriminators to call + // instructions with the same lineno that are in the same basic block. + // Sample base profile needs to distinguish different function calls within + // a same source line for correct profile annotation. + for (BasicBlock &B : F) { + const DILocation *FirstDIL = NULL; + for (auto &I : B.getInstList()) { + CallInst *Current = dyn_cast(&I); + if (!Current || isa(&I)) continue; - // If the first instruction (First) of Succ is at the same file - // location as B's last instruction (Last), add a new - // discriminator for First's location and all the instructions - // in Succ that share the same location with First. - if (!FirstDIL->canDiscriminate(*LastDIL)) { - // Create a new lexical scope and compute a new discriminator - // number for it. - StringRef Filename = FirstDIL->getFilename(); - auto *Scope = FirstDIL->getScope(); - auto *File = Builder.createFile(Filename, Scope->getDirectory()); - - // FIXME: Calculate the discriminator here, based on local information, - // and delete DILocation::computeNewDiscriminator(). The current - // solution gives different results depending on other modules in the - // same context. All we really need is to discriminate between - // FirstDIL and LastDIL -- a local map would suffice. - unsigned Discriminator = FirstDIL->computeNewDiscriminator(); - auto *NewScope = - Builder.createLexicalBlockFile(Scope, File, Discriminator); - auto *NewDIL = - DILocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(), - NewScope, FirstDIL->getInlinedAt()); - DebugLoc newDebugLoc = NewDIL; - - // Attach this new debug location to First and every - // instruction following First that shares the same location. - for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1; - ++I1) { - if (I1->getDebugLoc().get() != FirstDIL) - break; - I1->setDebugLoc(newDebugLoc); - DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine() - << ":" << NewDIL->getColumn() << ":" - << NewDIL->getDiscriminator() << *I1 << "\n"); + DILocation *CurrentDIL = Current->getDebugLoc(); + if (FirstDIL) { + if (CurrentDIL && CurrentDIL->getLine() == FirstDIL->getLine() && + CurrentDIL->getFilename() == FirstDIL->getFilename()) { + auto *Scope = FirstDIL->getScope(); + auto *File = Builder.createFile(FirstDIL->getFilename(), + Scope->getDirectory()); + auto *NewScope = Builder.createLexicalBlockFile( + Scope, File, FirstDIL->computeNewDiscriminator()); + Current->setDebugLoc(DILocation::get( + Ctx, CurrentDIL->getLine(), CurrentDIL->getColumn(), NewScope, + CurrentDIL->getInlinedAt())); + Changed = true; + } else { + FirstDIL = CurrentDIL; } - DEBUG(dbgs() << "\n"); - Changed = true; + } else { + FirstDIL = CurrentDIL; } } } diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index ef7dacac79cb..a5137e933e83 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -41,8 +41,8 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { // Loop through all of our successors and make sure they know that one // of their predecessors is going away. - for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) - BBTerm->getSuccessor(i)->removePredecessor(BB); + for (BasicBlock *Succ : BBTerm->successors()) + Succ->removePredecessor(BB); // Zap all the instructions in the block. while (!BB->empty()) { @@ -65,7 +65,7 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { /// any single-entry PHI nodes in it, fold them away. This handles the case /// when all entries to the PHI nodes in a block are guaranteed equal, such as /// when the block has exactly one predecessor. -void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA, +void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceAnalysis *MemDep) { if (!isa(BB->begin())) return; @@ -77,8 +77,6 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA, if (MemDep) MemDep->removeInstruction(PN); // Memdep updates AA itself. - else if (AA && isa(PN->getType())) - AA->deleteValue(PN); PN->eraseFromParent(); } @@ -108,7 +106,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor, /// if possible. The return value indicates success or failure. bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, - LoopInfo *LI, AliasAnalysis *AA, + LoopInfo *LI, MemoryDependenceAnalysis *MemDep) { // Don't merge away blocks who have their address taken. if (BB->hasAddressTaken()) return false; @@ -119,8 +117,9 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, // Don't break self-loops. if (PredBB == BB) return false; - // Don't break invokes. - if (isa(PredBB->getTerminator())) return false; + // Don't break unwinding instructions. + if (PredBB->getTerminator()->isExceptional()) + return false; succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB)); BasicBlock *OnlySucc = BB; @@ -145,7 +144,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, // Begin by getting rid of unneeded PHIs. if (isa(BB->front())) - FoldSingleEntryPHINodes(BB, AA, MemDep); + FoldSingleEntryPHINodes(BB, MemDep); // Delete the unconditional branch from the predecessor... PredBB->getInstList().pop_back(); @@ -253,7 +252,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, // block. assert(SP == BB && "CFG broken"); SP = nullptr; - return SplitBlock(Succ, Succ->begin(), DT, LI); + return SplitBlock(Succ, &Succ->front(), DT, LI); } // Otherwise, if BB has a single successor, split it at the bottom of the @@ -284,8 +283,8 @@ llvm::SplitAllCriticalEdges(Function &F, /// BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI) { - BasicBlock::iterator SplitIt = SplitPt; - while (isa(SplitIt) || isa(SplitIt)) + BasicBlock::iterator SplitIt = SplitPt->getIterator(); + while (isa(SplitIt) || SplitIt->isEHPad()) ++SplitIt; BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split"); @@ -393,7 +392,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, /// from NewBB. This also updates AliasAnalysis, if available. static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, ArrayRef Preds, BranchInst *BI, - AliasAnalysis *AA, bool HasLoopExit) { + bool HasLoopExit) { // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. SmallPtrSet PredSet(Preds.begin(), Preds.end()); for (BasicBlock::iterator I = OrigBB->begin(); isa(I); ) { @@ -474,17 +473,20 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, /// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, ArrayRef Preds, - const char *Suffix, AliasAnalysis *AA, - DominatorTree *DT, LoopInfo *LI, - bool PreserveLCSSA) { + const char *Suffix, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { + // Do not attempt to split that which cannot be split. + if (!BB->canSplitPredecessors()) + return nullptr; + // For the landingpads we need to act a bit differently. // Delegate this work to the SplitLandingPadPredecessors. if (BB->isLandingPad()) { SmallVector NewBBs; std::string NewName = std::string(Suffix) + ".split-lp"; - SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), - NewBBs, AA, DT, LI, PreserveLCSSA); + SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DT, + LI, PreserveLCSSA); return NewBBs[0]; } @@ -523,7 +525,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, HasLoopExit); // Update the PHI nodes in BB with the values coming from NewBB. - UpdatePHINodes(BB, NewBB, Preds, BI, AA, HasLoopExit); + UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit); return NewBB; } @@ -544,8 +546,8 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef Preds, const char *Suffix1, const char *Suffix2, SmallVectorImpl &NewBBs, - AliasAnalysis *AA, DominatorTree *DT, - LoopInfo *LI, bool PreserveLCSSA) { + DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA) { assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!"); // Create a new basic block for OrigBB's predecessors listed in Preds. Insert @@ -574,7 +576,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB1. - UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, AA, HasLoopExit); + UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, HasLoopExit); // Move the remaining edges from OrigBB to point to NewBB2. SmallVector NewBB2Preds; @@ -611,7 +613,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, PreserveLCSSA, HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB2. - UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, AA, HasLoopExit); + UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, HasLoopExit); } LandingPadInst *LPad = OrigBB->getLandingPadInst(); @@ -661,7 +663,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, // return instruction. V = BCI->getOperand(0); NewBC = BCI->clone(); - Pred->getInstList().insert(NewRet, NewBC); + Pred->getInstList().insert(NewRet->getIterator(), NewBC); *i = NewBC; } if (PHINode *PN = dyn_cast(V)) { @@ -707,7 +709,7 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond, MDNode *BranchWeights, DominatorTree *DT) { BasicBlock *Head = SplitBefore->getParent(); - BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); TerminatorInst *HeadOldTerm = Head->getTerminator(); LLVMContext &C = Head->getContext(); BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); @@ -757,7 +759,7 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, TerminatorInst **ElseTerm, MDNode *BranchWeights) { BasicBlock *Head = SplitBefore->getParent(); - BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); TerminatorInst *HeadOldTerm = Head->getTerminator(); LLVMContext &C = Head->getContext(); BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 7e83c9eeceb7..95825991cee9 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -101,10 +101,9 @@ static void createPHIsForSplitLoopExit(ArrayRef Preds, continue; // Otherwise a new PHI is needed. Create one and populate it. - PHINode *NewPN = - PHINode::Create(PN->getType(), Preds.size(), "split", - SplitBB->isLandingPad() ? - SplitBB->begin() : SplitBB->getTerminator()); + PHINode *NewPN = PHINode::Create( + PN->getType(), Preds.size(), "split", + SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator()); for (unsigned i = 0, e = Preds.size(); i != e; ++i) NewPN->addIncoming(V, Preds[i]); @@ -141,9 +140,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); - // Splitting the critical edge to a landing pad block is non-trivial. Don't do + // Splitting the critical edge to a pad block is non-trivial. Don't do // it in this generic function. - if (DestBB->isLandingPad()) return nullptr; + if (DestBB->isEHPad()) return nullptr; // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), @@ -157,7 +156,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // Insert the block into the function... right after the block TI lives in. Function &F = *TIBB->getParent(); - Function::iterator FBBI = TIBB; + Function::iterator FBBI = TIBB->getIterator(); F.getBasicBlockList().insert(++FBBI, NewBB); // If there are any PHI nodes in DestBB, we need to update them so that they @@ -197,7 +196,6 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } // If we have nothing to update, just return. - auto *AA = Options.AA; auto *DT = Options.DT; auto *LI = Options.LI; if (!DT && !LI) @@ -319,10 +317,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, LoopPreds.push_back(P); } if (!LoopPreds.empty()) { - assert(!DestBB->isLandingPad() && - "We don't split edges to landing pads!"); + assert(!DestBB->isEHPad() && "We don't split edges to EH pads!"); BasicBlock *NewExitBB = SplitBlockPredecessors( - DestBB, LoopPreds, "split", AA, DT, LI, Options.PreserveLCSSA); + DestBB, LoopPreds, "split", DT, LI, Options.PreserveLCSSA); if (Options.PreserveLCSSA) createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB); } diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 8aa7b2a65ba9..64b44a6b7919 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -21,7 +22,6 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; @@ -55,32 +55,6 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, return CI; } -/// EmitStrNLen - Emit a call to the strnlen function to the builder, for the -/// specified pointer. Ptr is required to be some pointer type, MaxLen must -/// be of size_t type, and the return value has 'intptr_t' type. -Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, - const DataLayout &DL, const TargetLibraryInfo *TLI) { - if (!TLI->has(LibFunc::strnlen)) - return nullptr; - - Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeSet AS[2]; - AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); - Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); - - LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrNLen = - M->getOrInsertFunction("strnlen", AttributeSet::get(M->getContext(), AS), - DL.getIntPtrType(Context), B.getInt8PtrTy(), - DL.getIntPtrType(Context), nullptr); - CallInst *CI = B.CreateCall(StrNLen, {CastToCStr(Ptr, B), MaxLen}, "strnlen"); - if (const Function *F = dyn_cast(StrNLen->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; -} - /// EmitStrChr - Emit a call to the strchr function to the builder, for the /// specified pointer and character. Ptr is required to be some pointer type, /// and the return value has 'i8*' type. diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index f2d5e0745035..0914699a2e38 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -82,7 +82,7 @@ static bool insertFastDiv(Function &F, bool UseSignedOp, DivCacheTy &PerBBDivCache) { // Get instruction operands - Instruction *Instr = J; + Instruction *Instr = &*J; Value *Dividend = Instr->getOperand(0); Value *Divisor = Instr->getOperand(1); @@ -94,7 +94,7 @@ static bool insertFastDiv(Function &F, } // Basic Block is split before divide - BasicBlock *MainBB = I; + BasicBlock *MainBB = &*I; BasicBlock *SuccessorBB = I->splitBasicBlock(J); ++I; //advance iterator I to successorBB @@ -190,7 +190,7 @@ static bool reuseOrInsertFastDiv(Function &F, bool UseSignedOp, DivCacheTy &PerBBDivCache) { // Get instruction operands - Instruction *Instr = J; + Instruction *Instr = &*J; DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 716e655affb9..8308a9b69149 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -34,6 +34,7 @@ add_llvm_library(LLVMTransformUtils SimplifyIndVar.cpp SimplifyInstructions.cpp SimplifyLibCalls.cpp + SplitModule.cpp SymbolRewriter.cpp UnifyFunctionExitNodes.cpp Utils.cpp diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index cc4d6c6fb192..854a3b855f54 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -52,8 +52,8 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); - VMap[II] = NewInst; // Add instruction map to value. - + VMap[&*II] = NewInst; // Add instruction map to value. + hasCalls |= (isa(II) && !isa(II)); if (const AllocaInst *AI = dyn_cast(II)) { if (isa(AI->getArraySize())) @@ -85,9 +85,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG - for (Function::const_arg_iterator I = OldFunc->arg_begin(), - E = OldFunc->arg_end(); I != E; ++I) - assert(VMap.count(I) && "No mapping from source argument specified!"); + for (const Argument &I : OldFunc->args()) + assert(VMap.count(&I) && "No mapping from source argument specified!"); #endif // Copy all attributes other than those stored in the AttributeSet. We need @@ -96,6 +95,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, NewFunc->copyAttributesFrom(OldFunc); NewFunc->setAttributes(NewAttrs); + // Fix up the personality function that got copied over. + if (OldFunc->hasPersonalityFn()) + NewFunc->setPersonalityFn( + MapValue(OldFunc->getPersonalityFn(), VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer)); + AttributeSet OldAttrs = OldFunc->getAttributes(); // Clone any argument attributes that are present in the VMap. for (const Argument &OldArg : OldFunc->args()) @@ -136,7 +142,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, if (BB.hasAddressTaken()) { Constant *OldBBAddr = BlockAddress::get(const_cast(OldFunc), const_cast(&BB)); - VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); } // Note return instructions for the caller. @@ -146,11 +152,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // Loop over all of the instructions in the function, fixing up operand // references as we go. This uses VMap to do all the hard work. - for (Function::iterator BB = cast(VMap[OldFunc->begin()]), - BE = NewFunc->end(); BB != BE; ++BB) + for (Function::iterator BB = + cast(VMap[&OldFunc->front()])->getIterator(), + BE = NewFunc->end(); + BB != BE; ++BB) // Loop over all instructions, fixing each one as we find it... - for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) - RemapInstruction(II, VMap, + for (Instruction &II : *BB) + RemapInstruction(&II, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer); } @@ -187,11 +195,9 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc, const DISubprogram *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder); if (!OldSubprogramMDNode) return; - // Ensure that OldFunc appears in the map. - // (if it's already there it must point to NewFunc anyway) - VMap[OldFunc] = NewFunc; auto *NewSubprogram = cast(MapMetadata(OldSubprogramMDNode, VMap)); + NewFunc->setSubprogram(NewSubprogram); for (auto *CU : Finder.compile_units()) { auto Subprograms = CU->getSubprograms(); @@ -222,10 +228,9 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, // The user might be deleting arguments to the function by specifying them in // the VMap. If so, we need to not add the arguments to the arg ty vector // - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) - if (VMap.count(I) == 0) // Haven't mapped the argument to anything yet? - ArgTypes.push_back(I->getType()); + for (const Argument &I : F->args()) + if (VMap.count(&I) == 0) // Haven't mapped the argument to anything yet? + ArgTypes.push_back(I.getType()); // Create a new function type... FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(), @@ -236,11 +241,10 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, // Loop over the arguments, copying the names of the mapped arguments over... Function::arg_iterator DestI = NewF->arg_begin(); - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) - if (VMap.count(I) == 0) { // Is this argument preserved? - DestI->setName(I->getName()); // Copy the name over... - VMap[I] = DestI++; // Add mapping to VMap + for (const Argument & I : F->args()) + if (VMap.count(&I) == 0) { // Is this argument preserved? + DestI->setName(I.getName()); // Copy the name over... + VMap[&I] = &*DestI++; // Add mapping to VMap } if (ModuleLevelChanges) @@ -330,8 +334,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, II != IE; ++II) { // If the "Director" remaps the instruction, don't clone it. if (Director) { - CloningDirector::CloningAction Action - = Director->handleInstruction(VMap, II, NewBB); + CloningDirector::CloningAction Action = + Director->handleInstruction(VMap, &*II, NewBB); // If the cloning director says stop, we want to stop everything, not // just break out of the loop (which would cause the terminator to be // cloned). The cloning director is responsible for inserting a proper @@ -365,7 +369,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (Value *MappedV = VMap.lookup(V)) V = MappedV; - VMap[II] = V; + VMap[&*II] = V; delete NewInst; continue; } @@ -373,9 +377,15 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); - VMap[II] = NewInst; // Add instruction map to value. + VMap[&*II] = NewInst; // Add instruction map to value. NewBB->getInstList().push_back(NewInst); hasCalls |= (isa(II) && !isa(II)); + + if (CodeInfo) + if (auto CS = ImmutableCallSite(&*II)) + if (CS.hasOperandBundles()) + CodeInfo->OperandBundleCallSites.push_back(NewInst); + if (const AllocaInst *AI = dyn_cast(II)) { if (isa(AI->getArraySize())) hasStaticAllocas = true; @@ -400,8 +410,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // If the director says to skip with a terminate instruction, we still // need to clone this block's successors. const TerminatorInst *TI = NewBB->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - ToClone.push_back(TI->getSuccessor(i)); + for (const BasicBlock *Succ : TI->successors()) + ToClone.push_back(Succ); return; } assert(Action != CloningDirector::SkipInstruction && @@ -447,11 +457,16 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, NewInst->setName(OldTI->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); VMap[OldTI] = NewInst; // Add instruction map to value. - + + if (CodeInfo) + if (auto CS = ImmutableCallSite(OldTI)) + if (CS.hasOperandBundles()) + CodeInfo->OperandBundleCallSites.push_back(NewInst); + // Recursively clone any reachable successor blocks. const TerminatorInst *TI = BB->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - ToClone.push_back(TI->getSuccessor(i)); + for (const BasicBlock *Succ : TI->successors()) + ToClone.push_back(Succ); } if (CodeInfo) { @@ -484,12 +499,11 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, } #ifndef NDEBUG - // If the cloning starts at the begining of the function, verify that + // If the cloning starts at the beginning of the function, verify that // the function arguments are mapped. if (!StartingInst) - for (Function::const_arg_iterator II = OldFunc->arg_begin(), - E = OldFunc->arg_end(); II != E; ++II) - assert(VMap.count(II) && "No mapping from source argument specified!"); + for (const Argument &II : OldFunc->args()) + assert(VMap.count(&II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, @@ -499,12 +513,12 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, StartingBB = StartingInst->getParent(); else { StartingBB = &OldFunc->getEntryBlock(); - StartingInst = StartingBB->begin(); + StartingInst = &StartingBB->front(); } // Clone the entry block, and anything recursively reachable from it. std::vector CloneWorklist; - PFC.CloneBlock(StartingBB, StartingInst, CloneWorklist); + PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); @@ -517,9 +531,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // // Defer PHI resolution until rest of function is resolved. SmallVector PHIToResolve; - for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); - BI != BE; ++BI) { - Value *V = VMap[BI]; + for (const BasicBlock &BI : *OldFunc) { + Value *V = VMap[&BI]; BasicBlock *NewBB = cast_or_null(V); if (!NewBB) continue; // Dead block. @@ -528,7 +541,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Handle PHI nodes specially, as we have to remove references to dead // blocks. - for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) { + for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) { // PHI nodes may have been remapped to non-PHI nodes by the caller or // during the cloning process. if (const PHINode *PN = dyn_cast(I)) { @@ -621,8 +634,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, while ((PN = dyn_cast(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); - assert(VMap[OldI] == PN && "VMap mismatch"); - VMap[OldI] = NV; + assert(VMap[&*OldI] == PN && "VMap mismatch"); + VMap[&*OldI] = NV; PN->eraseFromParent(); ++OldI; } @@ -644,15 +657,15 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // and zap unconditional fall-through branches. This happens all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. - Function::iterator Begin = cast(VMap[StartingBB]); + Function::iterator Begin = cast(VMap[StartingBB])->getIterator(); Function::iterator I = Begin; while (I != NewFunc->end()) { // Check if this block has become dead during inlining or other // simplifications. Note that the first block will appear dead, as it has // not yet been wired up properly. - if (I != Begin && (pred_begin(I) == pred_end(I) || - I->getSinglePredecessor() == I)) { - BasicBlock *DeadBB = I++; + if (I != Begin && (pred_begin(&*I) == pred_end(&*I) || + I->getSinglePredecessor() == &*I)) { + BasicBlock *DeadBB = &*I++; DeleteDeadBlock(DeadBB); continue; } @@ -662,7 +675,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // simplification required looking through PHI nodes, those are only // available after forming the full basic block. That may leave some here, // and we still want to prune the dead code as early as possible. - ConstantFoldTerminator(I); + ConstantFoldTerminator(&*I); BranchInst *BI = dyn_cast(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } @@ -681,7 +694,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, BI->eraseFromParent(); // Make all PHI nodes that referred to Dest now refer to I as their source. - Dest->replaceAllUsesWith(I); + Dest->replaceAllUsesWith(&*I); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); @@ -695,7 +708,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Make a final pass over the basic blocks from the old function to gather // any return instructions which survived folding. We have to do this here // because we can iteratively remove and merge returns above. - for (Function::iterator I = cast(VMap[StartingBB]), + for (Function::iterator I = cast(VMap[StartingBB])->getIterator(), E = NewFunc->end(); I != E; ++I) if (ReturnInst *RI = dyn_cast(I->getTerminator())) @@ -717,7 +730,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, const char *NameSuffix, ClonedCodeInfo *CodeInfo, Instruction *TheCall) { - CloneAndPruneIntoFromInst(NewFunc, OldFunc, OldFunc->front().begin(), VMap, + CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, ModuleLevelChanges, Returns, NameSuffix, CodeInfo, nullptr); } @@ -780,9 +793,10 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, } // Move them physically from the end of the block list. - F->getBasicBlockList().splice(Before, F->getBasicBlockList(), NewPH); - F->getBasicBlockList().splice(Before, F->getBasicBlockList(), - NewLoop->getHeader(), F->end()); + F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(), + NewPH); + F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(), + NewLoop->getHeader()->getIterator(), F->end()); return NewLoop; } diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 61f1811e7b4a..ab083353ece6 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -20,21 +20,28 @@ #include "llvm-c/Core.h" using namespace llvm; -/// CloneModule - Return an exact copy of the specified module. This is not as -/// easy as it might seem because we have to worry about making copies of global -/// variables and functions, and making their (initializers and references, -/// respectively) refer to the right globals. +/// This is not as easy as it might seem because we have to worry about making +/// copies of global variables and functions, and making their (initializers and +/// references, respectively) refer to the right globals. /// -Module *llvm::CloneModule(const Module *M) { +std::unique_ptr llvm::CloneModule(const Module *M) { // Create the value map that maps things from the old module over to the new // module. ValueToValueMapTy VMap; return CloneModule(M, VMap); } -Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { +std::unique_ptr llvm::CloneModule(const Module *M, + ValueToValueMapTy &VMap) { + return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; }); +} + +std::unique_ptr llvm::CloneModule( + const Module *M, ValueToValueMapTy &VMap, + std::function ShouldCloneDefinition) { // First off, we need to create the new module. - Module *New = new Module(M->getModuleIdentifier(), M->getContext()); + std::unique_ptr New = + llvm::make_unique(M->getModuleIdentifier(), M->getContext()); New->setDataLayout(M->getDataLayout()); New->setTargetTriple(M->getTargetTriple()); New->setModuleInlineAsm(M->getModuleInlineAsm()); @@ -52,26 +59,48 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { (GlobalVariable*) nullptr, I->getThreadLocalMode(), I->getType()->getAddressSpace()); - GV->copyAttributesFrom(I); - VMap[I] = GV; + GV->copyAttributesFrom(&*I); + VMap[&*I] = GV; } // Loop over the functions in the module, making external functions as before for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { Function *NF = - Function::Create(cast(I->getType()->getElementType()), - I->getLinkage(), I->getName(), New); - NF->copyAttributesFrom(I); - VMap[I] = NF; + Function::Create(cast(I->getType()->getElementType()), + I->getLinkage(), I->getName(), New.get()); + NF->copyAttributesFrom(&*I); + VMap[&*I] = NF; } // Loop over the aliases in the module for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { - auto *PTy = cast(I->getType()); - auto *GA = GlobalAlias::create(PTy, I->getLinkage(), I->getName(), New); - GA->copyAttributesFrom(I); - VMap[I] = GA; + if (!ShouldCloneDefinition(&*I)) { + // An alias cannot act as an external reference, so we need to create + // either a function or a global variable depending on the value type. + // FIXME: Once pointee types are gone we can probably pick one or the + // other. + GlobalValue *GV; + if (I->getValueType()->isFunctionTy()) + GV = Function::Create(cast(I->getValueType()), + GlobalValue::ExternalLinkage, I->getName(), + New.get()); + else + GV = new GlobalVariable( + *New, I->getValueType(), false, GlobalValue::ExternalLinkage, + (Constant *)nullptr, I->getName(), (GlobalVariable *)nullptr, + I->getThreadLocalMode(), I->getType()->getAddressSpace()); + VMap[&*I] = GV; + // We do not copy attributes (mainly because copying between different + // kinds of globals is forbidden), but this is generally not required for + // correctness. + continue; + } + auto *GA = GlobalAlias::create(I->getValueType(), + I->getType()->getPointerAddressSpace(), + I->getLinkage(), I->getName(), New.get()); + GA->copyAttributesFrom(&*I); + VMap[&*I] = GA; } // Now that all of the things that global variable initializer can refer to @@ -80,7 +109,12 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { // for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); I != E; ++I) { - GlobalVariable *GV = cast(VMap[I]); + GlobalVariable *GV = cast(VMap[&*I]); + if (!ShouldCloneDefinition(&*I)) { + // Skip after setting the correct linkage for an external reference. + GV->setLinkage(GlobalValue::ExternalLinkage); + continue; + } if (I->hasInitializer()) GV->setInitializer(MapValue(I->getInitializer(), VMap)); } @@ -88,18 +122,22 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { // Similarly, copy over function bodies now... // for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { - Function *F = cast(VMap[I]); + Function *F = cast(VMap[&*I]); + if (!ShouldCloneDefinition(&*I)) { + // Skip after setting the correct linkage for an external reference. + F->setLinkage(GlobalValue::ExternalLinkage); + continue; + } if (!I->isDeclaration()) { Function::arg_iterator DestI = F->arg_begin(); for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end(); ++J) { DestI->setName(J->getName()); - VMap[J] = DestI++; + VMap[&*J] = &*DestI++; } SmallVector Returns; // Ignore returns cloned. - CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns); - + CloneFunctionInto(F, &*I, VMap, /*ModuleLevelChanges=*/true, Returns); } if (I->hasPersonalityFn()) @@ -109,7 +147,10 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { // And aliases for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { - GlobalAlias *GA = cast(VMap[I]); + // We already dealt with undefined aliases above. + if (!ShouldCloneDefinition(&*I)) + continue; + GlobalAlias *GA = cast(VMap[&*I]); if (const Constant *C = I->getAliasee()) GA->setAliasee(MapValue(C, VMap)); } @@ -129,7 +170,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { extern "C" { LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) { - return wrap(CloneModule(unwrap(M))); + return wrap(CloneModule(unwrap(M)).release()); } } diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index ab89b41f6788..823696d88e65 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -51,7 +51,7 @@ AggregateArgsOpt("aggregate-extracted-args", cl::Hidden, /// \brief Test whether a block is valid for extraction. static bool isBlockValidForExtraction(const BasicBlock &BB) { // Landing pads must be in the function where they were inserted for cleanup. - if (BB.isLandingPad()) + if (BB.isEHPad()) return false; // Don't hoist code containing allocas, invokes, or vastarts. @@ -175,7 +175,7 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs, for (User *U : II->users()) if (!definedInRegion(Blocks, U)) { - Outputs.insert(II); + Outputs.insert(&*II); break; } } @@ -211,7 +211,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. - BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI(); + BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI()->getIterator(); BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, Header->getName()+".ce"); @@ -246,7 +246,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, - PN->getName()+".ce", NewBB->begin()); + PN->getName() + ".ce", &NewBB->front()); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they @@ -266,7 +266,8 @@ void CodeExtractor::splitReturnBlocks() { for (SetVector::iterator I = Blocks.begin(), E = Blocks.end(); I != E; ++I) if (ReturnInst *RI = dyn_cast((*I)->getTerminator())) { - BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret"); + BasicBlock *New = + (*I)->splitBasicBlock(RI->getIterator(), (*I)->getName() + ".ret"); if (DT) { // Old dominates New. New node dominates all other nodes dominated // by Old. @@ -365,10 +366,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); TerminatorInst *TI = newFunction->begin()->getTerminator(); GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructTy, AI, Idx, "gep_" + inputs[i]->getName(), TI); + StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI); } else - RewriteVal = AI++; + RewriteVal = &*AI++; std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); for (std::vector::iterator use = Users.begin(), useE = Users.end(); @@ -440,8 +441,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, StructValues.push_back(*i); } else { AllocaInst *alloca = - new AllocaInst((*i)->getType(), nullptr, (*i)->getName()+".loc", - codeReplacer->getParent()->begin()->begin()); + new AllocaInst((*i)->getType(), nullptr, (*i)->getName() + ".loc", + &codeReplacer->getParent()->front().front()); ReloadOutputs.push_back(alloca); params.push_back(alloca); } @@ -457,9 +458,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Allocate a struct at the beginning of this function StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); - Struct = - new AllocaInst(StructArgTy, nullptr, "structArg", - codeReplacer->getParent()->begin()->begin()); + Struct = new AllocaInst(StructArgTy, nullptr, "structArg", + &codeReplacer->getParent()->front().front()); params.push_back(Struct); for (unsigned i = 0, e = inputs.size(); i != e; ++i) { @@ -566,8 +566,12 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, bool DominatesDef = true; - if (InvokeInst *Invoke = dyn_cast(outputs[out])) { - DefBlock = Invoke->getNormalDest(); + BasicBlock *NormalDest = nullptr; + if (auto *Invoke = dyn_cast(outputs[out])) + NormalDest = Invoke->getNormalDest(); + + if (NormalDest) { + DefBlock = NormalDest; // Make sure we are looking at the original successor block, not // at a newly inserted exit block, which won't be in the dominator @@ -606,11 +610,11 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut+out); GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, OAI, Idx, "gep_" + outputs[out]->getName(), + StructArgTy, &*OAI, Idx, "gep_" + outputs[out]->getName(), NTRet); new StoreInst(outputs[out], GEP, NTRet); } else { - new StoreInst(outputs[out], OAI, NTRet); + new StoreInst(outputs[out], &*OAI, NTRet); } } // Advance output iterator even if we don't emit a store diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp index dc95089cd2ca..b56ff684e8a8 100644 --- a/lib/Transforms/Utils/CtorUtils.cpp +++ b/lib/Transforms/Utils/CtorUtils.cpp @@ -50,7 +50,7 @@ void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) { GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), CA, "", GCL->getThreadLocalMode()); - GCL->getParent()->getGlobalList().insert(GCL, NGV); + GCL->getParent()->getGlobalList().insert(GCL->getIterator(), NGV); NGV->takeName(GCL); // Nuke the old list, replacing any uses with the new one. diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index 003da58ee798..75a1dde57c4c 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -35,8 +35,8 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, I.getName()+".reg2mem", AllocaPoint); } else { Function *F = I.getParent()->getParent(); - Slot = new AllocaInst(I.getType(), nullptr, I.getName()+".reg2mem", - F->getEntryBlock().begin()); + Slot = new AllocaInst(I.getType(), nullptr, I.getName() + ".reg2mem", + &F->getEntryBlock().front()); } // We cannot demote invoke instructions to the stack if their normal edge @@ -89,16 +89,15 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, // AFTER the terminator instruction. BasicBlock::iterator InsertPt; if (!isa(I)) { - InsertPt = &I; - ++InsertPt; - for (; isa(InsertPt) || isa(InsertPt); ++InsertPt) + InsertPt = ++I.getIterator(); + for (; isa(InsertPt) || InsertPt->isEHPad(); ++InsertPt) /* empty */; // Don't insert before PHI nodes or landingpad instrs. } else { InvokeInst &II = cast(I); InsertPt = II.getNormalDest()->getFirstInsertionPt(); } - new StoreInst(&I, Slot, InsertPt); + new StoreInst(&I, Slot, &*InsertPt); return Slot; } @@ -118,8 +117,8 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { P->getName()+".reg2mem", AllocaPoint); } else { Function *F = P->getParent()->getParent(); - Slot = new AllocaInst(P->getType(), nullptr, P->getName()+".reg2mem", - F->getEntryBlock().begin()); + Slot = new AllocaInst(P->getType(), nullptr, P->getName() + ".reg2mem", + &F->getEntryBlock().front()); } // Iterate over each operand inserting a store in each predecessor. @@ -133,12 +132,12 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { } // Insert a load in place of the PHI and replace all uses. - BasicBlock::iterator InsertPt = P; + BasicBlock::iterator InsertPt = P->getIterator(); - for (; isa(InsertPt) || isa(InsertPt); ++InsertPt) + for (; isa(InsertPt) || InsertPt->isEHPad(); ++InsertPt) /* empty */; // Don't insert before PHI nodes or landingpad instrs. - Value *V = new LoadInst(Slot, P->getName()+".reload", InsertPt); + Value *V = new LoadInst(Slot, P->getName() + ".reload", &*InsertPt); P->replaceAllUsesWith(V); // Delete PHI. diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp index 4eb3e3dd17d2..492ae9f69a65 100644 --- a/lib/Transforms/Utils/FlattenCFG.cpp +++ b/lib/Transforms/Utils/FlattenCFG.cpp @@ -28,12 +28,11 @@ class FlattenCFGOpt { AliasAnalysis *AA; /// \brief Use parallel-and or parallel-or to generate conditions for /// conditional branches. - bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, - Pass *P = nullptr); + bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder); /// \brief If \param BB is the merge block of an if-region, attempt to merge /// the if-region with an adjacent if-region upstream if two if-regions /// contain identical instructions. - bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = nullptr); + bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder); /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which /// are from two if-regions whose entry blocks are \p Head1 and \p /// Head2. \returns true if \p Block1 and \p Block2 contain identical @@ -122,8 +121,7 @@ public: /// its predecessor. In Case 2, \param BB (BB3) only has conditional branches /// as its predecessors. /// -bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, - Pass *P) { +bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { PHINode *PHI = dyn_cast(BB->begin()); if (PHI) return false; // For simplicity, avoid cases containing PHI nodes. @@ -177,8 +175,9 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, // Instructions in the internal condition blocks should be safe // to hoist up. - for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) { - Instruction *CI = BI++; + for (BasicBlock::iterator BI = Pred->begin(), BE = PBI->getIterator(); + BI != BE;) { + Instruction *CI = &*BI++; if (isa(CI) || !isSafeToSpeculativelyExecute(CI)) return false; } @@ -315,7 +314,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, BasicBlock *Block1, BasicBlock *Block2) { TerminatorInst *PTI2 = Head2->getTerminator(); - Instruction *PBI2 = Head2->begin(); + Instruction *PBI2 = &Head2->front(); bool eq1 = (Block1 == Head1); bool eq2 = (Block2 == Head2); @@ -327,9 +326,9 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, // Check whether instructions in Block1 and Block2 are identical // and do not alias with instructions in Head2. BasicBlock::iterator iter1 = Block1->begin(); - BasicBlock::iterator end1 = Block1->getTerminator(); + BasicBlock::iterator end1 = Block1->getTerminator()->getIterator(); BasicBlock::iterator iter2 = Block2->begin(); - BasicBlock::iterator end2 = Block2->getTerminator(); + BasicBlock::iterator end2 = Block2->getTerminator()->getIterator(); while (1) { if (iter1 == end1) { @@ -338,7 +337,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, break; } - if (!iter1->isIdenticalTo(iter2)) + if (!iter1->isIdenticalTo(&*iter2)) return false; // Illegal to remove instructions with side effects except @@ -356,10 +355,10 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, return false; if (iter1->mayWriteToMemory()) { - for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) { + for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) { if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) { // Check alias with Head2. - if (!AA || AA->alias(iter1, BI)) + if (!AA || AA->alias(&*iter1, &*BI)) return false; } } @@ -386,8 +385,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, /// if (a || b) /// statement; /// -bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, - Pass *P) { +bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { BasicBlock *IfTrue2, *IfFalse2; Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2); Instruction *CInst2 = dyn_cast_or_null(IfCond2); @@ -413,7 +411,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, return false; TerminatorInst *PTI2 = SecondEntryBlock->getTerminator(); - Instruction *PBI2 = SecondEntryBlock->begin(); + Instruction *PBI2 = &SecondEntryBlock->front(); if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1, IfTrue2)) @@ -425,8 +423,8 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, // Check whether \param SecondEntryBlock has side-effect and is safe to // speculate. - for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) { - Instruction *CI = BI; + for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) { + Instruction *CI = &*BI; if (isa(CI) || CI->mayHaveSideEffects() || !isSafeToSpeculativelyExecute(CI)) return false; diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp index 44b7d25d519a..3893a752503b 100644 --- a/lib/Transforms/Utils/GlobalStatus.cpp +++ b/lib/Transforms/Utils/GlobalStatus.cpp @@ -49,6 +49,10 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) { static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, SmallPtrSetImpl &PhiUsers) { + if (const GlobalVariable *GV = dyn_cast(V)) + if (GV->isExternallyInitialized()) + GS.StoredType = GlobalStatus::StoredOnce; + for (const Use &U : V->uses()) { const User *UR = U.getUser(); if (const ConstantExpr *CE = dyn_cast(UR)) { diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index d2d60d7cd9f6..14574119b9a8 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -13,14 +13,15 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" @@ -41,6 +42,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CommandLine.h" #include + using namespace llvm; static cl::opt @@ -54,17 +56,17 @@ PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", cl::desc("Convert align attributes to assumptions during inlining.")); bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, - bool InsertLifetime) { - return InlineFunction(CallSite(CI), IFI, InsertLifetime); + AAResults *CalleeAAR, bool InsertLifetime) { + return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime); } bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, - bool InsertLifetime) { - return InlineFunction(CallSite(II), IFI, InsertLifetime); + AAResults *CalleeAAR, bool InsertLifetime) { + return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime); } namespace { - /// A class for recording information about inlining through an invoke. - class InvokeInliningInfo { + /// A class for recording information about inlining a landing pad. + class LandingPadInliningInfo { BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind. BasicBlock *InnerResumeDest; ///< Destination for the callee's resume. LandingPadInst *CallerLPad; ///< LandingPadInst associated with the invoke. @@ -72,7 +74,7 @@ namespace { SmallVector UnwindDestPHIValues; public: - InvokeInliningInfo(InvokeInst *II) + LandingPadInliningInfo(InvokeInst *II) : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr), CallerLPad(nullptr), InnerEHValuesPHI(nullptr) { // If there are PHI nodes in the unwind destination block, we need to keep @@ -121,14 +123,14 @@ namespace { } } }; -} +} // anonymous namespace /// Get or create a target for the branch from ResumeInsts. -BasicBlock *InvokeInliningInfo::getInnerResumeDest() { +BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { if (InnerResumeDest) return InnerResumeDest; // Split the landing pad. - BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint; + BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator(); InnerResumeDest = OuterResumeDest->splitBasicBlock(SplitPoint, OuterResumeDest->getName() + ".body"); @@ -137,7 +139,7 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() { const unsigned PHICapacity = 2; // Create corresponding new PHIs for all the PHIs in the outer landing pad. - BasicBlock::iterator InsertPoint = InnerResumeDest->begin(); + Instruction *InsertPoint = &InnerResumeDest->front(); BasicBlock::iterator I = OuterResumeDest->begin(); for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { PHINode *OuterPHI = cast(I); @@ -162,8 +164,8 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() { /// When the landing pad block has only one predecessor, this is a simple /// branch. When there is more than one predecessor, we need to split the /// landing pad block after the landingpad instruction and jump to there. -void InvokeInliningInfo::forwardResume(ResumeInst *RI, - SmallPtrSetImpl &InlinedLPads) { +void LandingPadInliningInfo::forwardResume( + ResumeInst *RI, SmallPtrSetImpl &InlinedLPads) { BasicBlock *Dest = getInnerResumeDest(); BasicBlock *Src = RI->getParent(); @@ -182,33 +184,39 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI, /// This function analyze BB to see if there are any calls, and if so, /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI /// nodes in that block with the values specified in InvokeDestPHIValues. -static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, - InvokeInliningInfo &Invoke) { +static BasicBlock * +HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) { for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { - Instruction *I = BBI++; + Instruction *I = &*BBI++; // We only need to check for function calls: inlined invoke // instructions require no special handling. CallInst *CI = dyn_cast(I); - // If this call cannot unwind, don't convert it to an invoke. - // Inline asm calls cannot throw. if (!CI || CI->doesNotThrow() || isa(CI->getCalledValue())) continue; // Convert this function call into an invoke instruction. First, split the // basic block. - BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc"); + BasicBlock *Split = + BB->splitBasicBlock(CI->getIterator(), CI->getName() + ".noexc"); // Delete the unconditional branch inserted by splitBasicBlock BB->getInstList().pop_back(); // Create the new invoke instruction. - ImmutableCallSite CS(CI); - SmallVector InvokeArgs(CS.arg_begin(), CS.arg_end()); - InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, - Invoke.getOuterResumeDest(), - InvokeArgs, CI->getName(), BB); + SmallVector InvokeArgs(CI->arg_begin(), CI->arg_end()); + SmallVector OpBundles; + + CI->getOperandBundlesAsDefs(OpBundles); + + // Note: we're round tripping operand bundles through memory here, and that + // can potentially be avoided with a cleverer API design that we do not have + // as of this time. + + InvokeInst *II = + InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, InvokeArgs, + OpBundles, CI->getName(), BB); II->setDebugLoc(CI->getDebugLoc()); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); @@ -219,12 +227,9 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, // Delete the original call Split->getInstList().pop_front(); - - // Update any PHI nodes in the exceptional block to indicate that there is - // now a new entry in them. - Invoke.addIncomingPHIValuesFor(BB); - return; + return BB; } + return nullptr; } /// If we inlined an invoke site, we need to convert calls @@ -233,8 +238,8 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, /// II is the invoke instruction being inlined. FirstNewBlock is the first /// block of the inlined code (the last block is the end of the function), /// and InlineCodeInfo is information about the code that got inlined. -static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, - ClonedCodeInfo &InlinedCodeInfo) { +static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, + ClonedCodeInfo &InlinedCodeInfo) { BasicBlock *InvokeDest = II->getUnwindDest(); Function *Caller = FirstNewBlock->getParent(); @@ -242,11 +247,12 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, // The inlined code is currently at the end of the function, scan from the // start of the inlined code to its end, checking for stuff we need to // rewrite. - InvokeInliningInfo Invoke(II); + LandingPadInliningInfo Invoke(II); // Get all of the inlined landing pad instructions. SmallPtrSet InlinedLPads; - for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I) + for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end(); + I != E; ++I) if (InvokeInst *II = dyn_cast(I->getTerminator())) InlinedLPads.insert(II->getLandingPadInst()); @@ -262,9 +268,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, InlinedLPad->setCleanup(true); } - for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ + for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); + BB != E; ++BB) { if (InlinedCodeInfo.ContainsCalls) - HandleCallsInBlockInlinedThroughInvoke(BB, Invoke); + if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( + &*BB, Invoke.getOuterResumeDest())) + // Update any PHI nodes in the exceptional block to indicate that there + // is now a new entry in them. + Invoke.addIncomingPHIValuesFor(NewBB); // Forward any resumes that are remaining here. if (ResumeInst *RI = dyn_cast(BB->getTerminator())) @@ -278,6 +289,99 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, InvokeDest->removePredecessor(II->getParent()); } +/// If we inlined an invoke site, we need to convert calls +/// in the body of the inlined function into invokes. +/// +/// II is the invoke instruction being inlined. FirstNewBlock is the first +/// block of the inlined code (the last block is the end of the function), +/// and InlineCodeInfo is information about the code that got inlined. +static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, + ClonedCodeInfo &InlinedCodeInfo) { + BasicBlock *UnwindDest = II->getUnwindDest(); + Function *Caller = FirstNewBlock->getParent(); + + assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!"); + + // If there are PHI nodes in the unwind destination block, we need to keep + // track of which values came into them from the invoke before removing the + // edge from this block. + SmallVector UnwindDestPHIValues; + llvm::BasicBlock *InvokeBB = II->getParent(); + for (Instruction &I : *UnwindDest) { + // Save the value to use for this edge. + PHINode *PHI = dyn_cast(&I); + if (!PHI) + break; + UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); + } + + // Add incoming-PHI values to the unwind destination block for the given basic + // block, using the values for the original invoke's source block. + auto UpdatePHINodes = [&](BasicBlock *Src) { + BasicBlock::iterator I = UnwindDest->begin(); + for (Value *V : UnwindDestPHIValues) { + PHINode *PHI = cast(I); + PHI->addIncoming(V, Src); + ++I; + } + }; + + // This connects all the instructions which 'unwind to caller' to the invoke + // destination. + for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); + BB != E; ++BB) { + if (auto *CRI = dyn_cast(BB->getTerminator())) { + if (CRI->unwindsToCaller()) { + CleanupReturnInst::Create(CRI->getCleanupPad(), UnwindDest, CRI); + CRI->eraseFromParent(); + UpdatePHINodes(&*BB); + } + } + + Instruction *I = BB->getFirstNonPHI(); + if (!I->isEHPad()) + continue; + + Instruction *Replacement = nullptr; + if (auto *CatchSwitch = dyn_cast(I)) { + if (CatchSwitch->unwindsToCaller()) { + auto *NewCatchSwitch = CatchSwitchInst::Create( + CatchSwitch->getParentPad(), UnwindDest, + CatchSwitch->getNumHandlers(), CatchSwitch->getName(), + CatchSwitch); + for (BasicBlock *PadBB : CatchSwitch->handlers()) + NewCatchSwitch->addHandler(PadBB); + Replacement = NewCatchSwitch; + } + } else if (!isa(I)) { + llvm_unreachable("unexpected EHPad!"); + } + + if (Replacement) { + Replacement->takeName(I); + I->replaceAllUsesWith(Replacement); + I->eraseFromParent(); + UpdatePHINodes(&*BB); + } + } + + if (InlinedCodeInfo.ContainsCalls) + for (Function::iterator BB = FirstNewBlock->getIterator(), + E = Caller->end(); + BB != E; ++BB) + if (BasicBlock *NewBB = + HandleCallsInBlockInlinedThroughInvoke(&*BB, UnwindDest)) + // Update any PHI nodes in the exceptional block to indicate that there + // is now a new entry in them. + UpdatePHINodes(NewBB); + + // Now that everything is happy, we have one final detail. The PHI nodes in + // the exception destination block still have entries due to the original + // invoke instruction. Eliminate these entries (which might even delete the + // PHI node) now. + UnwindDest->removePredecessor(InvokeBB); +} + /// When inlining a function that contains noalias scope metadata, /// this metadata needs to be cloned so that the inlined blocks /// have different "unqiue scopes" at every call site. Were this not done, then @@ -395,17 +499,16 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { /// parameters with noalias metadata specifying the new scope, and tag all /// non-derived loads, stores and memory intrinsics with the new alias scopes. static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, - const DataLayout &DL, AliasAnalysis *AA) { + const DataLayout &DL, AAResults *CalleeAAR) { if (!EnableNoAliasConversion) return; const Function *CalledFunc = CS.getCalledFunction(); SmallVector NoAliasArgs; - for (Function::const_arg_iterator I = CalledFunc->arg_begin(), - E = CalledFunc->arg_end(); I != E; ++I) { - if (I->hasNoAliasAttr() && !I->hasNUses(0)) - NoAliasArgs.push_back(I); + for (const Argument &I : CalledFunc->args()) { + if (I.hasNoAliasAttr() && !I.hasNUses(0)) + NoAliasArgs.push_back(&I); } if (NoAliasArgs.empty()) @@ -480,10 +583,10 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, continue; IsFuncCall = true; - if (AA) { - AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(ICS); - if (MRB == AliasAnalysis::OnlyAccessesArgumentPointees || - MRB == AliasAnalysis::OnlyReadsArgumentPointees) + if (CalleeAAR) { + FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(ICS); + if (MRB == FMRB_OnlyAccessesArgumentPointees || + MRB == FMRB_OnlyReadsArgumentPointees) IsArgMemOnlyCall = true; } @@ -518,7 +621,7 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, for (unsigned i = 0, ie = PtrArgs.size(); i != ie; ++i) { SmallVector Objects; GetUnderlyingObjects(const_cast(PtrArgs[i]), - Objects, DL, /* MaxLookup = */ 0); + Objects, DL, /* LI = */ nullptr); for (Value *O : Objects) ObjSet.insert(O); @@ -646,7 +749,7 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { // caller, then don't bother inserting the assumption. Value *Arg = CS.getArgument(I->getArgNo()); if (getKnownAlignment(Arg, DL, CS.getInstruction(), - &IFI.ACT->getAssumptionCache(*CalledFunc), + &IFI.ACT->getAssumptionCache(*CS.getCaller()), &DT) >= Align) continue; @@ -731,7 +834,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, BasicBlock *InsertBlock, InlineFunctionInfo &IFI) { Type *AggTy = cast(Src->getType())->getElementType(); - IRBuilder<> Builder(InsertBlock->begin()); + IRBuilder<> Builder(InsertBlock, InsertBlock->begin()); Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy)); @@ -851,9 +954,8 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx, // Starting from the top, rebuild the nodes to point to the new inlined-at // location (then rebuilding the rest of the chain behind it) and update the // map of already-constructed inlined-at nodes. - for (auto I = InlinedAtLocations.rbegin(), E = InlinedAtLocations.rend(); - I != E; ++I) { - const DILocation *MD = *I; + for (const DILocation *MD : make_range(InlinedAtLocations.rbegin(), + InlinedAtLocations.rend())) { Last = IANodes[MD] = DILocation::getDistinct( Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last); } @@ -917,7 +1019,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, - bool InsertLifetime) { + AAResults *CalleeAAR, bool InsertLifetime) { Instruction *TheCall = CS.getInstruction(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); @@ -930,6 +1032,22 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; + // The inliner does not know how to inline through calls with operand bundles + // in general ... + if (CS.hasOperandBundles()) { + for (int i = 0, e = CS.getNumOperandBundles(); i != e; ++i) { + uint32_t Tag = CS.getOperandBundleAt(i).getTagID(); + // ... but it knows how to inline through "deopt" operand bundles ... + if (Tag == LLVMContext::OB_deopt) + continue; + // ... and "funclet" operand bundles. + if (Tag == LLVMContext::OB_funclet) + continue; + + return false; + } + } + // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CS.doesNotThrow(); @@ -950,13 +1068,17 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Get the personality function from the callee if it contains a landing pad. Constant *CalledPersonality = - CalledFunc->hasPersonalityFn() ? CalledFunc->getPersonalityFn() : nullptr; + CalledFunc->hasPersonalityFn() + ? CalledFunc->getPersonalityFn()->stripPointerCasts() + : nullptr; // Find the personality function used by the landing pads of the caller. If it // exists, then check to see that it matches the personality function used in // the callee. Constant *CallerPersonality = - Caller->hasPersonalityFn() ? Caller->getPersonalityFn() : nullptr; + Caller->hasPersonalityFn() + ? Caller->getPersonalityFn()->stripPointerCasts() + : nullptr; if (CalledPersonality) { if (!CallerPersonality) Caller->setPersonalityFn(CalledPersonality); @@ -968,9 +1090,46 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, return false; } + // We need to figure out which funclet the callsite was in so that we may + // properly nest the callee. + Instruction *CallSiteEHPad = nullptr; + if (CallerPersonality) { + EHPersonality Personality = classifyEHPersonality(CallerPersonality); + if (isFuncletEHPersonality(Personality)) { + Optional ParentFunclet = + CS.getOperandBundle(LLVMContext::OB_funclet); + if (ParentFunclet) + CallSiteEHPad = cast(ParentFunclet->Inputs.front()); + + // OK, the inlining site is legal. What about the target function? + + if (CallSiteEHPad) { + if (Personality == EHPersonality::MSVC_CXX) { + // The MSVC personality cannot tolerate catches getting inlined into + // cleanup funclets. + if (isa(CallSiteEHPad)) { + // Ok, the call site is within a cleanuppad. Let's check the callee + // for catchpads. + for (const BasicBlock &CalledBB : *CalledFunc) { + if (isa(CalledBB.getFirstNonPHI())) + return false; + } + } + } else if (isAsynchronousEHPersonality(Personality)) { + // SEH is even less tolerant, there may not be any sort of exceptional + // funclet in the callee. + for (const BasicBlock &CalledBB : *CalledFunc) { + if (CalledBB.isEHPad()) + return false; + } + } + } + } + } + // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. - Function::iterator LastBlock = &Caller->back(); + Function::iterator LastBlock = --Caller->end(); // Make sure to capture all of the return instructions from the cloned // function. @@ -1007,7 +1166,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); } - VMap[I] = ActualArg; + VMap[&*I] = ActualArg; } // Add alignment assumptions if necessary. We do this before the inlined @@ -1029,7 +1188,61 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Inject byval arguments initialization. for (std::pair &Init : ByValInit) HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(), - FirstNewBlock, IFI); + &*FirstNewBlock, IFI); + + Optional ParentDeopt = + CS.getOperandBundle(LLVMContext::OB_deopt); + if (ParentDeopt) { + SmallVector OpDefs; + + for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { + Instruction *I = dyn_cast_or_null(VH); + if (!I) continue; // instruction was DCE'd or RAUW'ed to undef + + OpDefs.clear(); + + CallSite ICS(I); + OpDefs.reserve(ICS.getNumOperandBundles()); + + for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) { + auto ChildOB = ICS.getOperandBundleAt(i); + if (ChildOB.getTagID() != LLVMContext::OB_deopt) { + // If the inlined call has other operand bundles, let them be + OpDefs.emplace_back(ChildOB); + continue; + } + + // It may be useful to separate this logic (of handling operand + // bundles) out to a separate "policy" component if this gets crowded. + // Prepend the parent's deoptimization continuation to the newly + // inlined call's deoptimization continuation. + std::vector MergedDeoptArgs; + MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() + + ChildOB.Inputs.size()); + + MergedDeoptArgs.insert(MergedDeoptArgs.end(), + ParentDeopt->Inputs.begin(), + ParentDeopt->Inputs.end()); + MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(), + ChildOB.Inputs.end()); + + OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs)); + } + + Instruction *NewI = nullptr; + if (isa(I)) + NewI = CallInst::Create(cast(I), OpDefs, I); + else + NewI = InvokeInst::Create(cast(I), OpDefs, I); + + // Note: the RAUW does the appropriate fixup in VMap, so we need to do + // this even if the call returns void. + I->replaceAllUsesWith(NewI); + + VH = nullptr; + I->eraseFromParent(); + } + } // Update the callgraph if requested. if (IFI.CG) @@ -1042,7 +1255,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, CloneAliasScopeMetadata(CS, VMap); // Add noalias metadata if necessary. - AddAliasScopeMetadata(CS, VMap, DL, IFI.AA); + AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); // FIXME: We could register any cloned assumptions instead of clearing the // whole function's cache. @@ -1085,9 +1298,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. - Caller->getEntryBlock().getInstList().splice(InsertPoint, - FirstNewBlock->getInstList(), - AI, I); + Caller->getEntryBlock().getInstList().splice( + InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I); } // Move any dbg.declares describing the allocas into the entry basic block. DIBuilder DIB(*Caller->getParent()); @@ -1137,7 +1349,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Leave lifetime markers for the static alloca's, scoping them to the // function we just inlined. if (InsertLifetime && !IFI.StaticAllocas.empty()) { - IRBuilder<> builder(FirstNewBlock->begin()); + IRBuilder<> builder(&FirstNewBlock->front()); for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { AllocaInst *AI = IFI.StaticAllocas[ai]; @@ -1189,7 +1401,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); // Insert the llvm.stacksave. - CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin()) + CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) .CreateCall(StackSave, {}, "savedstack"); // Insert a call to llvm.stackrestore before any return instructions in the @@ -1203,10 +1415,74 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } } + // Update the lexical scopes of the new funclets and callsites. + // Anything that had 'none' as its parent is now nested inside the callsite's + // EHPad. + + if (CallSiteEHPad) { + for (Function::iterator BB = FirstNewBlock->getIterator(), + E = Caller->end(); + BB != E; ++BB) { + // Add bundle operands to any top-level call sites. + SmallVector OpBundles; + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) { + Instruction *I = &*BBI++; + CallSite CS(I); + if (!CS) + continue; + + // Skip call sites which are nounwind intrinsics. + auto *CalledFn = + dyn_cast(CS.getCalledValue()->stripPointerCasts()); + if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow()) + continue; + + // Skip call sites which already have a "funclet" bundle. + if (CS.getOperandBundle(LLVMContext::OB_funclet)) + continue; + + CS.getOperandBundlesAsDefs(OpBundles); + OpBundles.emplace_back("funclet", CallSiteEHPad); + + Instruction *NewInst; + if (CS.isCall()) + NewInst = CallInst::Create(cast(I), OpBundles, I); + else + NewInst = InvokeInst::Create(cast(I), OpBundles, I); + NewInst->setDebugLoc(I->getDebugLoc()); + NewInst->takeName(I); + I->replaceAllUsesWith(NewInst); + I->eraseFromParent(); + + OpBundles.clear(); + } + + Instruction *I = BB->getFirstNonPHI(); + if (!I->isEHPad()) + continue; + + if (auto *CatchSwitch = dyn_cast(I)) { + if (isa(CatchSwitch->getParentPad())) + CatchSwitch->setParentPad(CallSiteEHPad); + } else { + auto *FPI = cast(I); + if (isa(FPI->getParentPad())) + FPI->setParentPad(CallSiteEHPad); + } + } + } + // If we are inlining for an invoke instruction, we must make sure to rewrite // any call instructions into invoke instructions. - if (InvokeInst *II = dyn_cast(TheCall)) - HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); + if (auto *II = dyn_cast(TheCall)) { + BasicBlock *UnwindDest = II->getUnwindDest(); + Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); + if (isa(FirstNonPHI)) { + HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); + } else { + HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); + } + } // Handle any inlined musttail call sites. In order for a new call site to be // musttail, the source of the clone and the inlined call site must have been @@ -1250,7 +1526,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. - OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), + OrigBB->getInstList().splice(TheCall->getIterator(), + FirstNewBlock->getInstList(), FirstNewBlock->begin(), FirstNewBlock->end()); // Remove the cloned basic block. Caller->getBasicBlockList().pop_back(); @@ -1297,15 +1574,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. - AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest, - CalledFunc->getName()+".exit"); + AfterCallBB = + OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(), + CalledFunc->getName() + ".exit"); } else { // It's a call // If this is a call instruction, we need to split the basic block that // the call lives in. // - AfterCallBB = OrigBB->splitBasicBlock(TheCall, - CalledFunc->getName()+".exit"); + AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(), + CalledFunc->getName() + ".exit"); } // Change the branch that used to go to AfterCallBB to branch to the first @@ -1314,14 +1592,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, TerminatorInst *Br = OrigBB->getTerminator(); assert(Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!"); - Br->setOperand(0, FirstNewBlock); - + Br->setOperand(0, &*FirstNewBlock); // Now that the function is correct, make it a little bit nicer. In // particular, move the basic blocks inserted from the end of the function // into the space made by splitting the source basic block. - Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), - FirstNewBlock, Caller->end()); + Caller->getBasicBlockList().splice(AfterCallBB->getIterator(), + Caller->getBasicBlockList(), FirstNewBlock, + Caller->end()); // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. @@ -1333,7 +1611,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // possible incoming values. if (!TheCall->use_empty()) { PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), - AfterCallBB->begin()); + &AfterCallBB->front()); // Anything that used the result of the function call should now use the // PHI node as their operand. TheCall->replaceAllUsesWith(PHI); @@ -1350,7 +1628,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } } - // Add a branch to the merge points and remove return instructions. DebugLoc Loc; for (unsigned i = 0, e = Returns.size(); i != e; ++i) { @@ -1413,7 +1690,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Splice the code entry block into calling block, right before the // unconditional branch. CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes - OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); + OrigBB->getInstList().splice(Br->getIterator(), CalleeEntry->getInstList()); // Remove the unconditional branch. OrigBB->getInstList().erase(Br); diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp index 30edf3b7aae4..5687afa61e2a 100644 --- a/lib/Transforms/Utils/IntegerDivision.cpp +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -380,14 +380,10 @@ bool llvm::expandRemainder(BinaryOperator *Rem) { IRBuilder<> Builder(Rem); - Type *RemTy = Rem->getType(); - if (RemTy->isVectorTy()) - llvm_unreachable("Div over vectors not supported"); - - unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); - - if (RemTyBitWidth != 32 && RemTyBitWidth != 64) - llvm_unreachable("Div of bitwidth other than 32 or 64 not supported"); + assert(!Rem->getType()->isVectorTy() && "Div over vectors not supported"); + assert((Rem->getType()->getIntegerBitWidth() == 32 || + Rem->getType()->getIntegerBitWidth() == 64) && + "Div of bitwidth other than 32 or 64 not supported"); // First prepare the sign if it's a signed remainder if (Rem->getOpcode() == Instruction::SRem) { @@ -401,7 +397,7 @@ bool llvm::expandRemainder(BinaryOperator *Rem) { // If we didn't actually generate an urem instruction, we're done // This happens for example if the input were constant. In this case the // Builder insertion point was unchanged - if (Rem == Builder.GetInsertPoint()) + if (Rem == Builder.GetInsertPoint().getNodePtrUnchecked()) return true; BinaryOperator *BO = dyn_cast(Builder.GetInsertPoint()); @@ -440,14 +436,10 @@ bool llvm::expandDivision(BinaryOperator *Div) { IRBuilder<> Builder(Div); - Type *DivTy = Div->getType(); - if (DivTy->isVectorTy()) - llvm_unreachable("Div over vectors not supported"); - - unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); - - if (DivTyBitWidth != 32 && DivTyBitWidth != 64) - llvm_unreachable("Div of bitwidth other than 32 or 64 not supported"); + assert(!Div->getType()->isVectorTy() && "Div over vectors not supported"); + assert((Div->getType()->getIntegerBitWidth() == 32 || + Div->getType()->getIntegerBitWidth() == 64) && + "Div of bitwidth other than 32 or 64 not supported"); // First prepare the sign if it's a signed division if (Div->getOpcode() == Instruction::SDiv) { @@ -461,7 +453,7 @@ bool llvm::expandDivision(BinaryOperator *Div) { // If we didn't actually generate an udiv instruction, we're done // This happens for example if the input were constant. In this case the // Builder insertion point was unchanged - if (Div == Builder.GetInsertPoint()) + if (Div == Builder.GetInsertPoint().getNodePtrUnchecked()) return true; BinaryOperator *BO = dyn_cast(Builder.GetInsertPoint()); @@ -492,15 +484,14 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) { "Trying to expand remainder from a non-remainder function"); Type *RemTy = Rem->getType(); - if (RemTy->isVectorTy()) - llvm_unreachable("Div over vectors not supported"); + assert(!RemTy->isVectorTy() && "Div over vectors not supported"); unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); - if (RemTyBitWidth > 32) - llvm_unreachable("Div of bitwidth greater than 32 not supported"); + assert(RemTyBitWidth <= 32 && + "Div of bitwidth greater than 32 not supported"); - if (RemTyBitWidth == 32) + if (RemTyBitWidth == 32) return expandRemainder(Rem); // If bitwidth smaller than 32 extend inputs, extend output and proceed @@ -542,15 +533,13 @@ bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) { "Trying to expand remainder from a non-remainder function"); Type *RemTy = Rem->getType(); - if (RemTy->isVectorTy()) - llvm_unreachable("Div over vectors not supported"); + assert(!RemTy->isVectorTy() && "Div over vectors not supported"); unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); - if (RemTyBitWidth > 64) - llvm_unreachable("Div of bitwidth greater than 64 not supported"); + assert(RemTyBitWidth <= 64 && "Div of bitwidth greater than 64 not supported"); - if (RemTyBitWidth == 64) + if (RemTyBitWidth == 64) return expandRemainder(Rem); // If bitwidth smaller than 64 extend inputs, extend output and proceed @@ -593,13 +582,11 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) { "Trying to expand division from a non-division function"); Type *DivTy = Div->getType(); - if (DivTy->isVectorTy()) - llvm_unreachable("Div over vectors not supported"); + assert(!DivTy->isVectorTy() && "Div over vectors not supported"); unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); - if (DivTyBitWidth > 32) - llvm_unreachable("Div of bitwidth greater than 32 not supported"); + assert(DivTyBitWidth <= 32 && "Div of bitwidth greater than 32 not supported"); if (DivTyBitWidth == 32) return expandDivision(Div); @@ -643,13 +630,12 @@ bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) { "Trying to expand division from a non-division function"); Type *DivTy = Div->getType(); - if (DivTy->isVectorTy()) - llvm_unreachable("Div over vectors not supported"); + assert(!DivTy->isVectorTy() && "Div over vectors not supported"); unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); - if (DivTyBitWidth > 64) - llvm_unreachable("Div of bitwidth greater than 64 not supported"); + assert(DivTyBitWidth <= 64 && + "Div of bitwidth greater than 64 not supported"); if (DivTyBitWidth == 64) return expandDivision(Div); diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index 9d40b6989d6e..b4b2e148dfbb 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -31,8 +31,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -64,6 +66,13 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, PredIteratorCache &PredCache, LoopInfo *LI) { SmallVector UsesToRewrite; + // Tokens cannot be used in PHI nodes, so we skip over them. + // We can run into tokens which are live out of a loop with catchswitch + // instructions in Windows EH if the catchswitch has one catchpad which + // is inside the loop and another which is not. + if (Inst.getType()->isTokenTy()) + return false; + BasicBlock *InstBB = Inst.getParent(); for (Use &U : Inst.uses()) { @@ -84,9 +93,8 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, // Invoke instructions are special in that their result value is not available // along their unwind edge. The code below tests to see whether DomBB - // dominates - // the value, so adjust DomBB to the normal destination block, which is - // effectively where the value is first usable. + // dominates the value, so adjust DomBB to the normal destination block, + // which is effectively where the value is first usable. BasicBlock *DomBB = Inst.getParent(); if (InvokeInst *Inv = dyn_cast(&Inst)) DomBB = Inv->getNormalDest(); @@ -101,10 +109,7 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, // Insert the LCSSA phi's into all of the exit blocks dominated by the // value, and add them to the Phi's map. - for (SmallVectorImpl::const_iterator BBI = ExitBlocks.begin(), - BBE = ExitBlocks.end(); - BBI != BBE; ++BBI) { - BasicBlock *ExitBB = *BBI; + for (BasicBlock *ExitBB : ExitBlocks) { if (!DT.dominates(DomNode, DT.getNode(ExitBB))) continue; @@ -113,7 +118,7 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, continue; PHINode *PN = PHINode::Create(Inst.getType(), PredCache.size(ExitBB), - Inst.getName() + ".lcssa", ExitBB->begin()); + Inst.getName() + ".lcssa", &ExitBB->front()); // Add inputs from inside the loop for this PHI. for (BasicBlock *Pred : PredCache.get(ExitBB)) { @@ -148,26 +153,26 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, // Rewrite all uses outside the loop in terms of the new PHIs we just // inserted. - for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) { + for (Use *UseToRewrite : UsesToRewrite) { // If this use is in an exit block, rewrite to use the newly inserted PHI. // This is required for correctness because SSAUpdate doesn't handle uses in // the same block. It assumes the PHI we inserted is at the end of the // block. - Instruction *User = cast(UsesToRewrite[i]->getUser()); + Instruction *User = cast(UseToRewrite->getUser()); BasicBlock *UserBB = User->getParent(); if (PHINode *PN = dyn_cast(User)) - UserBB = PN->getIncomingBlock(*UsesToRewrite[i]); + UserBB = PN->getIncomingBlock(*UseToRewrite); if (isa(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { // Tell the VHs that the uses changed. This updates SCEV's caches. - if (UsesToRewrite[i]->get()->hasValueHandle()) - ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin()); - UsesToRewrite[i]->set(UserBB->begin()); + if (UseToRewrite->get()->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front()); + UseToRewrite->set(&UserBB->front()); continue; } // Otherwise, do full PHI insertion. - SSAUpdate.RewriteUse(*UsesToRewrite[i]); + SSAUpdate.RewriteUse(*UseToRewrite); } // Post process PHI instructions that were inserted into another disjoint loop @@ -190,10 +195,9 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT, } // Remove PHI nodes that did not have any uses rewritten. - for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) { - if (AddedPHIs[i]->use_empty()) - AddedPHIs[i]->eraseFromParent(); - } + for (PHINode *PN : AddedPHIs) + if (PN->use_empty()) + PN->eraseFromParent(); return true; } @@ -205,8 +209,8 @@ blockDominatesAnExit(BasicBlock *BB, DominatorTree &DT, const SmallVectorImpl &ExitBlocks) { DomTreeNode *DomNode = DT.getNode(BB); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - if (DT.dominates(DomNode, DT.getNode(ExitBlocks[i]))) + for (BasicBlock *ExitBB : ExitBlocks) + if (DT.dominates(DomNode, DT.getNode(ExitBB))) return true; return false; @@ -227,25 +231,22 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, // Look at all the instructions in the loop, checking to see if they have uses // outside the loop. If so, rewrite those uses. - for (Loop::block_iterator BBI = L.block_begin(), BBE = L.block_end(); - BBI != BBE; ++BBI) { - BasicBlock *BB = *BBI; - + for (BasicBlock *BB : L.blocks()) { // For large loops, avoid use-scanning by using dominance information: In // particular, if a block does not dominate any of the loop exits, then none // of the values defined in the block could be used outside the loop. if (!blockDominatesAnExit(BB, DT, ExitBlocks)) continue; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + for (Instruction &I : *BB) { // Reject two common cases fast: instructions with no uses (like stores) // and instructions with one use that is in the same block as this. - if (I->use_empty() || - (I->hasOneUse() && I->user_back()->getParent() == BB && - !isa(I->user_back()))) + if (I.use_empty() || + (I.hasOneUse() && I.user_back()->getParent() == BB && + !isa(I.user_back()))) continue; - Changed |= processInstruction(L, *I, DT, ExitBlocks, PredCache, LI); + Changed |= processInstruction(L, I, DT, ExitBlocks, PredCache, LI); } } @@ -266,8 +267,8 @@ bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, bool Changed = false; // Recurse depth-first through inner loops. - for (Loop::iterator I = L.begin(), E = L.end(); I != E; ++I) - Changed |= formLCSSARecursively(**I, DT, LI, SE); + for (Loop *SubLoop : L.getSubLoops()) + Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE); Changed |= formLCSSA(L, DT, LI, SE); return Changed; @@ -296,8 +297,10 @@ struct LCSSA : public FunctionPass { AU.addRequired(); AU.addRequired(); AU.addPreservedID(LoopSimplifyID); - AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); } }; } @@ -306,6 +309,8 @@ char LCSSA::ID = 0; INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false) Pass *llvm::createLCSSAPass() { return new LCSSA(); } @@ -317,7 +322,8 @@ bool LCSSA::runOnFunction(Function &F) { bool Changed = false; LI = &getAnalysis().getLoopInfo(); DT = &getAnalysis().getDomTree(); - SE = getAnalysisIfAvailable(); + auto *SEWP = getAnalysisIfAvailable(); + SE = SEWP ? &SEWP->getSE() : nullptr; // Simplify each loop nest in the function. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) diff --git a/lib/Transforms/Utils/LLVMBuild.txt b/lib/Transforms/Utils/LLVMBuild.txt index 6b2d405b1f28..ece0ad4dbf44 100644 --- a/lib/Transforms/Utils/LLVMBuild.txt +++ b/lib/Transforms/Utils/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = TransformUtils parent = Transforms -required_libraries = Analysis Core IPA Support +required_libraries = Analysis Core Support diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index ba8af47b54e1..e75163f323df 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -17,10 +17,11 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LibCallSemantics.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" @@ -188,9 +189,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, BasicBlock *BB = SI->getParent(); // Remove entries from PHI nodes which we no longer branch to... - for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) { + for (BasicBlock *Succ : SI->successors()) { // Found case matching a constant operand? - BasicBlock *Succ = SI->getSuccessor(i); if (Succ == TheOnlyDest) TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest else @@ -230,6 +230,11 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, SIDef->getValue().getZExtValue())); } + // Update make.implicit metadata to the newly-created conditional branch. + MDNode *MakeImplicitMD = SI->getMetadata(LLVMContext::MD_make_implicit); + if (MakeImplicitMD) + NewBr->setMetadata(LLVMContext::MD_make_implicit, MakeImplicitMD); + // Delete the old switch. SI->eraseFromParent(); return true; @@ -283,8 +288,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI) { if (!I->use_empty() || isa(I)) return false; - // We don't want the landingpad instruction removed by anything this general. - if (isa(I)) + // We don't want the landingpad-like instructions removed by anything this + // general. + if (I->isEHPad()) return false; // We don't want debug info removed by anything this general, unless @@ -414,6 +420,49 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, return false; } +static bool +simplifyAndDCEInstruction(Instruction *I, + SmallSetVector &WorkList, + const DataLayout &DL, + const TargetLibraryInfo *TLI) { + if (isInstructionTriviallyDead(I, TLI)) { + // Null out all of the instruction's operands to see if any operand becomes + // dead as we go. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *OpV = I->getOperand(i); + I->setOperand(i, nullptr); + + if (!OpV->use_empty() || I == OpV) + continue; + + // If the operand is an instruction that became dead as we nulled out the + // operand, and if it is 'trivially' dead, delete it in a future loop + // iteration. + if (Instruction *OpI = dyn_cast(OpV)) + if (isInstructionTriviallyDead(OpI, TLI)) + WorkList.insert(OpI); + } + + I->eraseFromParent(); + + return true; + } + + if (Value *SimpleV = SimplifyInstruction(I, DL)) { + // Add the users to the worklist. CAREFUL: an instruction can use itself, + // in the case of a phi node. + for (User *U : I->users()) + if (U != I) + WorkList.insert(cast(U)); + + // Replace the instruction with its simplified value. + I->replaceAllUsesWith(SimpleV); + I->eraseFromParent(); + return true; + } + return false; +} + /// SimplifyInstructionsInBlock - Scan the specified basic block and try to /// simplify any instructions in it and recursively delete dead instructions. /// @@ -422,30 +471,34 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI) { bool MadeChange = false; + const DataLayout &DL = BB->getModule()->getDataLayout(); #ifndef NDEBUG // In debug builds, ensure that the terminator of the block is never replaced // or deleted by these simplifications. The idea of simplification is that it // cannot introduce new instructions, and there is no way to replace the // terminator of a block without introducing a new instruction. - AssertingVH TerminatorVH(--BB->end()); + AssertingVH TerminatorVH(&BB->back()); #endif - for (BasicBlock::iterator BI = BB->begin(), E = --BB->end(); BI != E; ) { + SmallSetVector WorkList; + // Iterate over the original function, only adding insts to the worklist + // if they actually need to be revisited. This avoids having to pre-init + // the worklist with the entire function's worth of instructions. + for (BasicBlock::iterator BI = BB->begin(), E = std::prev(BB->end()); BI != E;) { assert(!BI->isTerminator()); - Instruction *Inst = BI++; + Instruction *I = &*BI; + ++BI; - WeakVH BIHandle(BI); - if (recursivelySimplifyInstruction(Inst, TLI)) { - MadeChange = true; - if (BIHandle != BI) - BI = BB->begin(); - continue; - } + // We're visiting this instruction now, so make sure it's not in the + // worklist from an earlier visit. + if (!WorkList.count(I)) + MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI); + } - MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); - if (BIHandle != BI) - BI = BB->begin(); + while (!WorkList.empty()) { + Instruction *I = WorkList.pop_back_val(); + MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI); } return MadeChange; } @@ -808,7 +861,8 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { // Copy over any phi, debug or lifetime instruction. BB->getTerminator()->eraseFromParent(); - Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList()); + Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(), + BB->getInstList()); } else { while (PHINode *PN = dyn_cast(&BB->front())) { // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. @@ -1017,8 +1071,13 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (LdStHasDebugValue(DIVar, LI)) return true; - Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr, - DDI->getDebugLoc(), LI); + // We are now tracking the loaded value instead of the address. In the + // future if multi-location support is added to the IR, it might be + // preferable to keep tracking both the loaded value and the original + // address in case the alloca can not be elided. + Instruction *DbgValue = Builder.insertDbgValueIntrinsic( + LI, 0, DIVar, DIExpr, DDI->getDebugLoc(), (Instruction *)nullptr); + DbgValue->insertAfter(LI); return true; } @@ -1034,8 +1093,8 @@ bool llvm::LowerDbgDeclare(Function &F) { DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false); SmallVector Dbgs; for (auto &FI : F) - for (BasicBlock::iterator BI : FI) - if (auto DDI = dyn_cast(BI)) + for (Instruction &BI : FI) + if (auto DDI = dyn_cast(&BI)) Dbgs.push_back(DDI); if (Dbgs.empty()) @@ -1060,9 +1119,13 @@ bool llvm::LowerDbgDeclare(Function &F) { // This is a call by-value or some other instruction that // takes a pointer to the variable. Insert a *value* // intrinsic that describes the alloca. + SmallVector NewDIExpr; + auto *DIExpr = DDI->getExpression(); + NewDIExpr.push_back(dwarf::DW_OP_deref); + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(), - DDI->getExpression(), DDI->getDebugLoc(), - CI); + DIB.createExpression(NewDIExpr), + DDI->getDebugLoc(), CI); } DDI->eraseFromParent(); } @@ -1082,9 +1145,10 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { return nullptr; } -bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, - DIBuilder &Builder, bool Deref) { - DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI); +bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, + Instruction *InsertBefore, DIBuilder &Builder, + bool Deref, int Offset) { + DbgDeclareInst *DDI = FindAllocaDbgDeclare(Address); if (!DDI) return false; DebugLoc Loc = DDI->getDebugLoc(); @@ -1092,29 +1156,40 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, auto *DIExpr = DDI->getExpression(); assert(DIVar && "Missing variable"); - if (Deref) { + if (Deref || Offset) { // Create a copy of the original DIDescriptor for user variable, prepending // "deref" operation to a list of address elements, as new llvm.dbg.declare // will take a value storing address of the memory for variable, not // alloca itself. SmallVector NewDIExpr; - NewDIExpr.push_back(dwarf::DW_OP_deref); + if (Deref) + NewDIExpr.push_back(dwarf::DW_OP_deref); + if (Offset > 0) { + NewDIExpr.push_back(dwarf::DW_OP_plus); + NewDIExpr.push_back(Offset); + } else if (Offset < 0) { + NewDIExpr.push_back(dwarf::DW_OP_minus); + NewDIExpr.push_back(-Offset); + } if (DIExpr) NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); DIExpr = Builder.createExpression(NewDIExpr); } - // Insert llvm.dbg.declare in the same basic block as the original alloca, - // and remove old llvm.dbg.declare. - BasicBlock *BB = AI->getParent(); - Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, Loc, BB); + // Insert llvm.dbg.declare immediately after the original alloca, and remove + // old llvm.dbg.declare. + Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore); DDI->eraseFromParent(); return true; } -/// changeToUnreachable - Insert an unreachable instruction before the specified -/// instruction, making it and the rest of the code in the block dead. -static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { +bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, + DIBuilder &Builder, bool Deref, int Offset) { + return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder, + Deref, Offset); +} + +void llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap) { BasicBlock *BB = I->getParent(); // Loop over all of the successors, removing BB's entry from any PHI // nodes. @@ -1132,7 +1207,7 @@ static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { new UnreachableInst(I->getContext(), I); // All instructions after this are dead. - BasicBlock::iterator BBI = I, BBE = BB->end(); + BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end(); while (BBI != BBE) { if (!BBI->use_empty()) BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); @@ -1142,8 +1217,11 @@ static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { /// changeToCall - Convert the specified invoke into a normal call. static void changeToCall(InvokeInst *II) { - SmallVector Args(II->op_begin(), II->op_end() - 3); - CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); + SmallVector Args(II->arg_begin(), II->arg_end()); + SmallVector OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, OpBundles, + "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); @@ -1162,7 +1240,7 @@ static bool markAliveBlocks(Function &F, SmallPtrSetImpl &Reachable) { SmallVector Worklist; - BasicBlock *BB = F.begin(); + BasicBlock *BB = &F.front(); Worklist.push_back(BB); Reachable.insert(BB); bool Changed = false; @@ -1187,7 +1265,7 @@ static bool markAliveBlocks(Function &F, if (MakeUnreachable) { // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(BBI, false); + changeToUnreachable(&*BBI, false); Changed = true; break; } @@ -1201,7 +1279,7 @@ static bool markAliveBlocks(Function &F, ++BBI; if (!isa(BBI)) { // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(BBI, false); + changeToUnreachable(&*BBI, false); Changed = true; } break; @@ -1253,6 +1331,40 @@ static bool markAliveBlocks(Function &F, return Changed; } +void llvm::removeUnwindEdge(BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + + if (auto *II = dyn_cast(TI)) { + changeToCall(II); + return; + } + + TerminatorInst *NewTI; + BasicBlock *UnwindDest; + + if (auto *CRI = dyn_cast(TI)) { + NewTI = CleanupReturnInst::Create(CRI->getCleanupPad(), nullptr, CRI); + UnwindDest = CRI->getUnwindDest(); + } else if (auto *CatchSwitch = dyn_cast(TI)) { + auto *NewCatchSwitch = CatchSwitchInst::Create( + CatchSwitch->getParentPad(), nullptr, CatchSwitch->getNumHandlers(), + CatchSwitch->getName(), CatchSwitch); + for (BasicBlock *PadBB : CatchSwitch->handlers()) + NewCatchSwitch->addHandler(PadBB); + + NewTI = NewCatchSwitch; + UnwindDest = CatchSwitch->getUnwindDest(); + } else { + llvm_unreachable("Could not find unwind successor"); + } + + NewTI->takeName(TI); + NewTI->setDebugLoc(TI->getDebugLoc()); + UnwindDest->removePredecessor(BB); + TI->replaceAllUsesWith(NewTI); + TI->eraseFromParent(); +} + /// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. @@ -1270,17 +1382,18 @@ bool llvm::removeUnreachableBlocks(Function &F) { // Loop over all of the basic blocks that are not reachable, dropping all of // their internal references... for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) { - if (Reachable.count(BB)) + if (Reachable.count(&*BB)) continue; - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + for (succ_iterator SI = succ_begin(&*BB), SE = succ_end(&*BB); SI != SE; + ++SI) if (Reachable.count(*SI)) - (*SI)->removePredecessor(BB); + (*SI)->removePredecessor(&*BB); BB->dropAllReferences(); } for (Function::iterator I = ++F.begin(); I != F.end();) - if (!Reachable.count(I)) + if (!Reachable.count(&*I)) I = F.getBasicBlockList().erase(I); else ++I; @@ -1288,9 +1401,10 @@ bool llvm::removeUnreachableBlocks(Function &F) { return true; } -void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef KnownIDs) { +void llvm::combineMetadata(Instruction *K, const Instruction *J, + ArrayRef KnownIDs) { SmallVector, 4> Metadata; - K->dropUnknownMetadata(KnownIDs); + K->dropUnknownNonDebugMetadata(KnownIDs); K->getAllMetadataOtherThanDebugLoc(Metadata); for (unsigned i = 0, n = Metadata.size(); i < n; ++i) { unsigned Kind = Metadata[i].first; @@ -1326,8 +1440,29 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRefsetMetadata(Kind, JMD); break; + case LLVMContext::MD_invariant_group: + // Preserve !invariant.group in K. + break; + case LLVMContext::MD_align: + K->setMetadata(Kind, + MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); + break; + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: + K->setMetadata(Kind, + MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); + break; } } + // Set !invariant.group from J if J has it. If both instructions have it + // then we will just pick it from J - even when they are different. + // Also make sure that K is load or store - f.e. combining bitcast with load + // could produce bitcast with invariant.group metadata, which is invalid. + // FIXME: we should try to preserve both invariant.group md if they are + // different, but right now instruction can only have one invariant.group. + if (auto *JMD = J->getMetadata(LLVMContext::MD_invariant_group)) + if (isa(K) || isa(K)) + K->setMetadata(LLVMContext::MD_invariant_group, JMD); } unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, @@ -1349,3 +1484,40 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, } return Count; } + +unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, + DominatorTree &DT, + const BasicBlock *BB) { + assert(From->getType() == To->getType()); + + unsigned Count = 0; + for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); + UI != UE;) { + Use &U = *UI++; + auto *I = cast(U.getUser()); + if (DT.dominates(BB, I->getParent())) { + U.set(To); + DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as " + << *To << " in " << *U << "\n"); + ++Count; + } + } + return Count; +} + +bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { + if (isa(CS.getInstruction())) + // Most LLVM intrinsics are things which can never take a safepoint. + // As a result, we don't need to have the stack parsable at the + // callsite. This is a highly useful optimization since intrinsic + // calls are fairly prevalent, particularly in debug builds. + return true; + + // Check if the function is specifically marked as a gc leaf function. + // + // TODO: we should be checking the attributes on the call site as well. + if (const Function *F = CS.getCalledFunction()) + return F->hasFnAttribute("gc-leaf-function"); + + return false; +} diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 5c98043e4632..1fa469595d16 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -44,11 +44,14 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -78,7 +81,7 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB, SmallVectorImpl &SplitPreds, Loop *L) { // Check to see if NewBB is already well placed. - Function::iterator BBI = NewBB; --BBI; + Function::iterator BBI = --NewBB->getIterator(); for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { if (&*BBI == SplitPreds[i]) return; @@ -92,9 +95,8 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB, // block that neighbors a BB actually in the loop. BasicBlock *FoundBB = nullptr; for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { - Function::iterator BBI = SplitPreds[i]; - if (++BBI != NewBB->getParent()->end() && - L->contains(BBI)) { + Function::iterator BBI = SplitPreds[i]->getIterator(); + if (++BBI != NewBB->getParent()->end() && L->contains(&*BBI)) { FoundBB = SplitPreds[i]; break; } @@ -112,17 +114,10 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB, /// preheader, this method is called to insert one. This method has two phases: /// preheader insertion and analysis updating. /// -BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { +BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Header = L->getHeader(); - // Get analyses that we try to update. - auto *AA = PP->getAnalysisIfAvailable(); - auto *DTWP = PP->getAnalysisIfAvailable(); - auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto *LIWP = PP->getAnalysisIfAvailable(); - auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID); - // Compute the set of predecessors of the loop that are not in the loop. SmallVector OutsideBlocks; for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); @@ -141,8 +136,10 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { // Split out the loop pre-header. BasicBlock *PreheaderBB; - PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", - AA, DT, LI, PreserveLCSSA); + PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT, + LI, PreserveLCSSA); + if (!PreheaderBB) + return nullptr; DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << PreheaderBB->getName() << "\n"); @@ -159,8 +156,8 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { /// This method is used to split exit blocks that have predecessors outside of /// the loop. static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, - AliasAnalysis *AA, DominatorTree *DT, - LoopInfo *LI, Pass *PP) { + DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA) { SmallVector LoopBlocks; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { BasicBlock *P = *I; @@ -175,10 +172,10 @@ static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); BasicBlock *NewExitBB = nullptr; - bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID); - - NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", AA, DT, - LI, PreserveLCSSA); + NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", DT, LI, + PreserveLCSSA); + if (!NewExitBB) + return nullptr; DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " << NewExitBB->getName() << "\n"); @@ -206,8 +203,7 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, /// \brief The first part of loop-nestification is to find a PHI node that tells /// us how to partition the loops. -static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA, - DominatorTree *DT, +static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, AssumptionCache *AC) { const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ) { @@ -216,7 +212,6 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA, if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); - if (AA) AA->deleteValue(PN); PN->eraseFromParent(); continue; } @@ -251,18 +246,18 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA, /// created. /// static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, - AliasAnalysis *AA, DominatorTree *DT, - LoopInfo *LI, ScalarEvolution *SE, Pass *PP, + DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, bool PreserveLCSSA, AssumptionCache *AC) { // Don't try to separate loops without a preheader. if (!Preheader) return nullptr; // The header is not a landing pad; preheader insertion should ensure this. - assert(!L->getHeader()->isLandingPad() && - "Can't insert backedge to landing pad"); + BasicBlock *Header = L->getHeader(); + assert(!Header->isEHPad() && "Can't insert backedge to EH pad"); - PHINode *PN = findPHIToPartitionLoops(L, AA, DT, AC); + PHINode *PN = findPHIToPartitionLoops(L, DT, AC); if (!PN) return nullptr; // No known way to partition. // Pull out all predecessors that have varying values in the loop. This @@ -286,11 +281,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, if (SE) SE->forgetLoop(L); - bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID); - - BasicBlock *Header = L->getHeader(); BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", - AA, DT, LI, PreserveLCSSA); + DT, LI, PreserveLCSSA); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. @@ -357,7 +349,6 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, /// and have that block branch to the loop header. This ensures that loops /// have exactly one backedge. static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, - AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI) { assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); @@ -369,8 +360,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, if (!Preheader) return nullptr; - // The header is not a landing pad; preheader insertion should ensure this. - assert(!Header->isLandingPad() && "Can't insert backedge to landing pad"); + // The header is not an EH pad; preheader insertion should ensure this. + assert(!Header->isEHPad() && "Can't insert backedge to EH pad"); // Figure out which basic blocks contain back-edges to the loop header. std::vector BackedgeBlocks; @@ -394,7 +385,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, << BEBlock->getName() << "\n"); // Move the new backedge block to right after the last backedge block. - Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos; + Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator(); F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock); // Now that the block has been inserted into the function, create PHI nodes in @@ -443,7 +434,6 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, // eliminate the PHI Node. if (HasUniqueIncomingValue) { NewPN->replaceAllUsesWith(UniqueValue); - if (AA) AA->deleteValue(NewPN); BEBlock->getInstList().erase(NewPN); } } @@ -470,15 +460,10 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, } /// \brief Simplify one loop and queue further loops for simplification. -/// -/// FIXME: Currently this accepts both lots of analyses that it uses and a raw -/// Pass pointer. The Pass pointer is used by numerous utilities to update -/// specific analyses. Rather than a pass it would be much cleaner and more -/// explicit if they accepted the analysis directly and then updated it. static bool simplifyOneLoop(Loop *L, SmallVectorImpl &Worklist, - AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, - ScalarEvolution *SE, Pass *PP, - AssumptionCache *AC) { + DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, AssumptionCache *AC, + bool PreserveLCSSA) { bool Changed = false; ReprocessLoop: @@ -544,7 +529,7 @@ ReprocessLoop: // Does the loop already have a preheader? If so, don't insert one. BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { - Preheader = InsertPreheaderForLoop(L, PP); + Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); if (Preheader) { ++NumInserted; Changed = true; @@ -568,7 +553,7 @@ ReprocessLoop: // Must be exactly this loop: no subloops, parent loops, or non-loop preds // allowed. if (!L->contains(*PI)) { - if (rewriteLoopExitBlock(L, ExitBlock, AA, DT, LI, PP)) { + if (rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA)) { ++NumInserted; Changed = true; } @@ -585,7 +570,7 @@ ReprocessLoop: // common backedge instead. if (L->getNumBackEdges() < 8) { if (Loop *OuterL = - separateNestedLoop(L, Preheader, AA, DT, LI, SE, PP, AC)) { + separateNestedLoop(L, Preheader, DT, LI, SE, PreserveLCSSA, AC)) { ++NumNested; // Enqueue the outer loop as it should be processed next in our // depth-first nest walk. @@ -602,7 +587,7 @@ ReprocessLoop: // If we either couldn't, or didn't want to, identify nesting of the loops, // insert a new block that all backedges target, then make it jump to the // loop header. - LoopLatch = insertUniqueBackedgeBlock(L, Preheader, AA, DT, LI); + LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI); if (LoopLatch) { ++NumInserted; Changed = true; @@ -618,7 +603,6 @@ ReprocessLoop: for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast(I++)); ) if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { - if (AA) AA->deleteValue(PN); if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); @@ -654,7 +638,7 @@ ReprocessLoop: bool AllInvariant = true; bool AnyInvariant = false; for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) { - Instruction *Inst = I++; + Instruction *Inst = &*I++; // Skip debug info intrinsics. if (isa(Inst)) continue; @@ -716,9 +700,9 @@ ReprocessLoop: return Changed; } -bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, - AliasAnalysis *AA, ScalarEvolution *SE, - AssumptionCache *AC) { +bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, AssumptionCache *AC, + bool PreserveLCSSA) { bool Changed = false; // Worklist maintains our depth-first queue of loops in this nest to process. @@ -734,8 +718,8 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, } while (!Worklist.empty()) - Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI, - SE, PP, AC); + Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE, + AC, PreserveLCSSA); return Changed; } @@ -747,9 +731,6 @@ namespace { initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); } - // AA - If we have an alias analysis object to update, this is it, otherwise - // this is null. - AliasAnalysis *AA; DominatorTree *DT; LoopInfo *LI; ScalarEvolution *SE; @@ -767,8 +748,11 @@ namespace { AU.addRequired(); AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. } @@ -784,6 +768,9 @@ INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", "Canonicalize natural loops", false, false) @@ -796,15 +783,16 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } /// bool LoopSimplify::runOnFunction(Function &F) { bool Changed = false; - AA = getAnalysisIfAvailable(); LI = &getAnalysis().getLoopInfo(); DT = &getAnalysis().getDomTree(); - SE = getAnalysisIfAvailable(); + auto *SEWP = getAnalysisIfAvailable(); + SE = SEWP ? &SEWP->getSE() : nullptr; AC = &getAnalysis().getAssumptionCache(F); + bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); // Simplify each loop nest in the function. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, AC); + Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA); return Changed; } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 1dbce4746835..2499b88741fe 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -73,7 +73,7 @@ static inline void RemapInstruction(Instruction *I, /// of loops that have already been forgotten to prevent redundant, expensive /// calls to ScalarEvolution::forgetLoop. Returns the new combined block. static BasicBlock * -FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM, +FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, ScalarEvolution *SE, SmallPtrSetImpl &ForgottenLoops) { // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and @@ -109,12 +109,10 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM, // Erase basic block from the function... // ScalarEvolution holds references to loop exit blocks. - if (LPM) { - if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable()) { - if (Loop *L = LI->getLoopFor(BB)) { - if (ForgottenLoops.insert(L).second) - SE->forgetLoop(L); - } + if (SE) { + if (Loop *L = LI->getLoopFor(BB)) { + if (ForgottenLoops.insert(L).second) + SE->forgetLoop(L); } } LI->removeBlock(BB); @@ -155,15 +153,13 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM, /// /// The LoopInfo Analysis that is passed will be kept consistent. /// -/// If a LoopPassManager is passed in, and the loop is fully removed, it will be -/// removed from the LoopPassManager as well. LPM can also be NULL. -/// -/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are -/// available from the Pass it must also preserve those analyses. +/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and +/// DominatorTree if they are non-null. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, bool AllowExpensiveTripCount, - unsigned TripMultiple, LoopInfo *LI, Pass *PP, - LPPassManager *LPM, AssumptionCache *AC) { + unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE, + DominatorTree *DT, AssumptionCache *AC, + bool PreserveLCSSA) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -220,6 +216,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; + SmallVector ExitBlocks; + L->getExitBlocks(ExitBlocks); + Loop *ParentL = L->getParentLoop(); + bool AllExitsAreInsideParentLoop = !ParentL || + std::all_of(ExitBlocks.begin(), ExitBlocks.end(), + [&](BasicBlock *BB) { return ParentL->contains(BB); }); // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime @@ -227,13 +229,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); if (RuntimeTripCount && - !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, LPM)) + !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT, + PreserveLCSSA)) return false; // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. - ScalarEvolution *SE = - PP ? PP->getAnalysisIfAvailable() : nullptr; if (SE) SE->forgetLoop(L); @@ -392,7 +393,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, for (unsigned i = 0; i < NewBlocks.size(); ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) - ::RemapInstruction(I, LastValueMap); + ::RemapInstruction(&*I, LastValueMap); } // Loop over the PHI nodes in the original block, setting incoming values. @@ -432,8 +433,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // For a complete unroll, make the last iteration end with a branch // to the exit block. - if (CompletelyUnroll && j == 0) { - Dest = LoopExit; + if (CompletelyUnroll) { + if (j == 0) + Dest = LoopExit; NeedConditional = false; } @@ -473,7 +475,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, BranchInst *Term = cast(Latches[i]->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); - if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM, + if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, SE, ForgottenLoops)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); } @@ -483,29 +485,24 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // whole function's cache. AC->clear(); - DominatorTree *DT = nullptr; - if (PP) { - // FIXME: Reconstruct dom info, because it is not preserved properly. - // Incrementally updating domtree after loop unrolling would be easy. - if (DominatorTreeWrapperPass *DTWP = - PP->getAnalysisIfAvailable()) { - DT = &DTWP->getDomTree(); - DT->recalculate(*L->getHeader()->getParent()); - } + // FIXME: Reconstruct dom info, because it is not preserved properly. + // Incrementally updating domtree after loop unrolling would be easy. + if (DT) + DT->recalculate(*L->getHeader()->getParent()); - // Simplify any new induction variables in the partially unrolled loop. - if (SE && !CompletelyUnroll) { - SmallVector DeadInsts; - simplifyLoopIVs(L, SE, LPM, DeadInsts); + // Simplify any new induction variables in the partially unrolled loop. + if (SE && !CompletelyUnroll) { + SmallVector DeadInsts; + simplifyLoopIVs(L, SE, DT, LI, DeadInsts); - // Aggressively clean up dead instructions that simplifyLoopIVs already - // identified. Any remaining should be cleaned up below. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); - } + // Aggressively clean up dead instructions that simplifyLoopIVs already + // identified. Any remaining should be cleaned up below. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); } + // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. @@ -514,7 +511,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, for (std::vector::const_iterator BB = NewLoopBlocks.begin(), BBE = NewLoopBlocks.end(); BB != BBE; ++BB) for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) { - Instruction *Inst = I++; + Instruction *Inst = &*I++; if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); @@ -529,29 +526,33 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, ++NumUnrolled; Loop *OuterL = L->getParentLoop(); - // Remove the loop from the LoopPassManager if it's completely removed. - if (CompletelyUnroll && LPM != nullptr) - LPM->deleteLoopFromQueue(L); + // Update LoopInfo if the loop is completely removed. + if (CompletelyUnroll) + LI->updateUnloop(L);; // If we have a pass and a DominatorTree we should re-simplify impacted loops // to ensure subsequent analyses can rely on this form. We want to simplify // at least one layer outside of the loop that was unrolled so that any // changes to the parent loop exposed by the unrolling are considered. - if (PP && DT) { + if (DT) { if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { - simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, AC); + bool Simplified = simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after - // deleteLoopFromQueue updates LoopInfo. + // LoopInfo's been updated by updateUnloop. Loop *LatchLoop = LI->getLoopFor(Latches.back()); if (!OuterL->contains(LatchLoop)) while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); - formLCSSARecursively(*OuterL, *DT, LI, SE); + if (CompletelyUnroll && (!AllExitsAreInsideParentLoop || Simplified)) + formLCSSARecursively(*OuterL, *DT, LI, SE); + else + assert(OuterL->isLCSSAForm(*DT) && + "Loops should be in LCSSA form after loop-unroll."); } } diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index add5432aa276..0d68f18ad0e5 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -62,8 +62,8 @@ STATISTIC(NumRuntimeUnrolled, static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *LastPrologBB, BasicBlock *PrologEnd, BasicBlock *OrigPH, BasicBlock *NewPH, - ValueToValueMapTy &VMap, AliasAnalysis *AA, - DominatorTree *DT, LoopInfo *LI, Pass *P) { + ValueToValueMapTy &VMap, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); @@ -127,8 +127,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees SmallVector Preds(pred_begin(Exit), pred_end(Exit)); - SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", AA, DT, LI, - P->mustPreserveAnalysisID(LCSSAID)); + SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI, + PreserveLCSSA); // Add the branch to the exit block (around the unrolled loop) B.CreateCondBr(BrLoopExit, Exit, NewPH); InsertPt->eraseFromParent(); @@ -150,7 +150,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, Function *F = Header->getParent(); LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); - Loop *NewLoop = 0; + Loop *NewLoop = nullptr; Loop *ParentLoop = L->getParentLoop(); if (!UnrollProlog) { NewLoop = new Loop(); @@ -206,9 +206,9 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, // Change the incoming values to the ones defined in the preheader or // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { - PHINode *NewPHI = cast(VMap[I]); + PHINode *NewPHI = cast(VMap[&*I]); if (UnrollProlog) { - VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); + VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); cast(VMap[Header])->getInstList().erase(NewPHI); } else { unsigned idx = NewPHI->getBasicBlockIndex(Preheader); @@ -279,7 +279,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, /// bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, bool AllowExpensiveTripCount, LoopInfo *LI, - LPPassManager *LPM) { + ScalarEvolution *SE, DominatorTree *DT, + bool PreserveLCSSA) { // for now, only unroll loops that contain a single exit if (!L->getExitingBlock()) return false; @@ -291,9 +292,6 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, // Use Scalar Evolution to compute the trip count. This allows more // loops to be unrolled than relying on induction var simplification - if (!LPM) - return false; - ScalarEvolution *SE = LPM->getAnalysisIfAvailable(); if (!SE) return false; @@ -308,7 +306,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, // Add 1 since the backedge count doesn't include the first loop iteration const SCEV *TripCountSC = - SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); + SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); if (isa(TripCountSC)) return false; @@ -333,10 +331,6 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); - // Grab analyses that we preserve. - auto *DTWP = LPM->getAnalysisIfAvailable(); - auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; - BasicBlock *PH = L->getLoopPreheader(); BasicBlock *Latch = L->getLoopLatch(); // It helps to splits the original preheader twice, one for the end of the @@ -397,8 +391,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, VMap, LI); // Insert the cloned blocks into function just before the original loop - F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), NewBlocks[0], - F->end()); + F->getBasicBlockList().splice(PEnd->getIterator(), F->getBasicBlockList(), + NewBlocks[0]->getIterator(), F->end()); // Rewrite the cloned instruction operands to use the values // created when the clone is created. @@ -406,7 +400,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) { - RemapInstruction(I, VMap, + RemapInstruction(&*I, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); } } @@ -414,8 +408,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, // Connect the prolog code to the original loop and update the // PHI functions. BasicBlock *LastLoopBB = cast(VMap[Latch]); - ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, - /*AliasAnalysis*/ nullptr, DT, LI, LPM->getAsPass()); + ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, DT, LI, + PreserveLCSSA); NumRuntimeUnrolled++; return true; } diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index 5cbde94a98ed..e03880526bfa 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -34,6 +34,124 @@ bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, return true; } +bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { + switch (Kind) { + default: + break; + case RK_IntegerAdd: + case RK_IntegerMult: + case RK_IntegerOr: + case RK_IntegerAnd: + case RK_IntegerXor: + case RK_IntegerMinMax: + return true; + } + return false; +} + +bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { + return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); +} + +bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { + switch (Kind) { + default: + break; + case RK_IntegerAdd: + case RK_IntegerMult: + case RK_FloatAdd: + case RK_FloatMult: + return true; + } + return false; +} + +Instruction * +RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT, + SmallPtrSetImpl &Visited, + SmallPtrSetImpl &CI) { + if (!Phi->hasOneUse()) + return Phi; + + const APInt *M = nullptr; + Instruction *I, *J = cast(Phi->use_begin()->getUser()); + + // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT + // with a new integer type of the corresponding bit width. + if (match(J, m_CombineOr(m_And(m_Instruction(I), m_APInt(M)), + m_And(m_APInt(M), m_Instruction(I))))) { + int32_t Bits = (*M + 1).exactLogBase2(); + if (Bits > 0) { + RT = IntegerType::get(Phi->getContext(), Bits); + Visited.insert(Phi); + CI.insert(J); + return J; + } + } + return Phi; +} + +bool RecurrenceDescriptor::getSourceExtensionKind( + Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned, + SmallPtrSetImpl &Visited, + SmallPtrSetImpl &CI) { + + SmallVector Worklist; + bool FoundOneOperand = false; + unsigned DstSize = RT->getPrimitiveSizeInBits(); + Worklist.push_back(Exit); + + // Traverse the instructions in the reduction expression, beginning with the + // exit value. + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + for (Use &U : I->operands()) { + + // Terminate the traversal if the operand is not an instruction, or we + // reach the starting value. + Instruction *J = dyn_cast(U.get()); + if (!J || J == Start) + continue; + + // Otherwise, investigate the operation if it is also in the expression. + if (Visited.count(J)) { + Worklist.push_back(J); + continue; + } + + // If the operand is not in Visited, it is not a reduction operation, but + // it does feed into one. Make sure it is either a single-use sign- or + // zero-extend instruction. + CastInst *Cast = dyn_cast(J); + bool IsSExtInst = isa(J); + if (!Cast || !Cast->hasOneUse() || !(isa(J) || IsSExtInst)) + return false; + + // Ensure the source type of the extend is no larger than the reduction + // type. It is not necessary for the types to be identical. + unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); + if (SrcSize > DstSize) + return false; + + // Furthermore, ensure that all such extends are of the same kind. + if (FoundOneOperand) { + if (IsSigned != IsSExtInst) + return false; + } else { + FoundOneOperand = true; + IsSigned = IsSExtInst; + } + + // Lastly, if the source type of the extend matches the reduction type, + // add the extend to CI so that we can avoid accounting for it in the + // cost model. + if (SrcSize == DstSize) + CI.insert(Cast); + } + } + return true; +} + bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop, bool HasFunNoNaNAttr, RecurrenceDescriptor &RedDes) { @@ -68,10 +186,32 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, unsigned NumCmpSelectPatternInst = 0; InstDesc ReduxDesc(false, nullptr); + // Data used for determining if the recurrence has been type-promoted. + Type *RecurrenceType = Phi->getType(); + SmallPtrSet CastInsts; + Instruction *Start = Phi; + bool IsSigned = false; + SmallPtrSet VisitedInsts; SmallVector Worklist; - Worklist.push_back(Phi); - VisitedInsts.insert(Phi); + + // Return early if the recurrence kind does not match the type of Phi. If the + // recurrence kind is arithmetic, we attempt to look through AND operations + // resulting from the type promotion performed by InstCombine. Vector + // operations are not limited to the legal integer widths, so we may be able + // to evaluate the reduction in the narrower width. + if (RecurrenceType->isFloatingPointTy()) { + if (!isFloatingPointRecurrenceKind(Kind)) + return false; + } else { + if (!isIntegerRecurrenceKind(Kind)) + return false; + if (isArithmeticRecurrenceKind(Kind)) + Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); + } + + Worklist.push_back(Start); + VisitedInsts.insert(Start); // A value in the reduction can be used: // - By the reduction: @@ -110,10 +250,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, !VisitedInsts.count(dyn_cast(Cur->getOperand(0)))) return false; - // Any reduction instruction must be of one of the allowed kinds. - ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); - if (!ReduxDesc.isRecurrence()) - return false; + // Any reduction instruction must be of one of the allowed kinds. We ignore + // the starting value (the Phi or an AND instruction if the Phi has been + // type-promoted). + if (Cur != Start) { + ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); + if (!ReduxDesc.isRecurrence()) + return false; + } // A reduction operation must only have one use of the reduction value. if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && @@ -131,7 +275,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, ++NumCmpSelectPatternInst; // Check whether we found a reduction operator. - FoundReduxOp |= !IsAPhi; + FoundReduxOp |= !IsAPhi && Cur != Start; // Process users of current instruction. Push non-PHI nodes after PHI nodes // onto the stack. This way we are going to have seen all inputs to PHI @@ -193,6 +337,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) return false; + // If we think Phi may have been type-promoted, we also need to ensure that + // all source operands of the reduction are either SExtInsts or ZEstInsts. If + // so, we will be able to evaluate the reduction in the narrower bit width. + if (Start != Phi) + if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType, + IsSigned, VisitedInsts, CastInsts)) + return false; + // We found a reduction var if we have reached the original phi node and we // only have a single instruction with out-of-loop users. @@ -200,9 +352,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, // is saved as part of the RecurrenceDescriptor. // Save the description of this reduction variable. - RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, - ReduxDesc.getMinMaxKind()); - + RecurrenceDescriptor RD( + RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), + ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); RedDes = RD; return true; @@ -263,14 +415,14 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, InstDesc &Prev, bool HasFunNoNaNAttr) { bool FP = I->getType()->isFloatingPointTy(); - bool FastMath = FP && I->hasUnsafeAlgebra(); + Instruction *UAI = Prev.getUnsafeAlgebraInst(); + if (!UAI && FP && !I->hasUnsafeAlgebra()) + UAI = I; // Found an unsafe (unvectorizable) algebra instruction. + switch (I->getOpcode()) { default: return InstDesc(false, I); case Instruction::PHI: - if (FP && - (Kind != RK_FloatMult && Kind != RK_FloatAdd && Kind != RK_FloatMinMax)) - return InstDesc(false, I); return InstDesc(I, Prev.getMinMaxKind()); case Instruction::Sub: case Instruction::Add: @@ -284,10 +436,10 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, case Instruction::Xor: return InstDesc(Kind == RK_IntegerXor, I); case Instruction::FMul: - return InstDesc(Kind == RK_FloatMult && FastMath, I); + return InstDesc(Kind == RK_FloatMult, I, UAI); case Instruction::FSub: case Instruction::FAdd: - return InstDesc(Kind == RK_FloatAdd && FastMath, I); + return InstDesc(Kind == RK_FloatAdd, I, UAI); case Instruction::FCmp: case Instruction::ICmp: case Instruction::Select: @@ -442,6 +594,13 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, break; } + // We only match FP sequences with unsafe algebra, so we can unconditionally + // set it on any generated instructions. + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + FastMathFlags FMF; + FMF.setUnsafeAlgebra(); + Builder.SetFastMathFlags(FMF); + Value *Cmp; if (RK == MRK_FloatMin || RK == MRK_FloatMax) Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); @@ -452,8 +611,54 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, return Select; } -bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, - ConstantInt *&StepValue) { +InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, + ConstantInt *Step) + : StartValue(Start), IK(K), StepValue(Step) { + assert(IK != IK_NoInduction && "Not an induction"); + assert(StartValue && "StartValue is null"); + assert(StepValue && !StepValue->isZero() && "StepValue is zero"); + assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && + "StartValue is not a pointer for pointer induction"); + assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && + "StartValue is not an integer for integer induction"); + assert(StepValue->getType()->isIntegerTy() && + "StepValue is not an integer"); +} + +int InductionDescriptor::getConsecutiveDirection() const { + if (StepValue && (StepValue->isOne() || StepValue->isMinusOne())) + return StepValue->getSExtValue(); + return 0; +} + +Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index) const { + switch (IK) { + case IK_IntInduction: + assert(Index->getType() == StartValue->getType() && + "Index type does not match StartValue type"); + if (StepValue->isMinusOne()) + return B.CreateSub(StartValue, Index); + if (!StepValue->isOne()) + Index = B.CreateMul(Index, StepValue); + return B.CreateAdd(StartValue, Index); + + case IK_PtrInduction: + assert(Index->getType() == StepValue->getType() && + "Index type does not match StepValue type"); + if (StepValue->isMinusOne()) + Index = B.CreateNeg(Index); + else if (!StepValue->isOne()) + Index = B.CreateMul(Index, StepValue); + return B.CreateGEP(nullptr, StartValue, Index); + + case IK_NoInduction: + return nullptr; + } + llvm_unreachable("invalid enum"); +} + +bool InductionDescriptor::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, + InductionDescriptor &D) { Type *PhiTy = Phi->getType(); // We only handle integer and pointer inductions variables. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) @@ -467,6 +672,10 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, return false; } + assert(AR->getLoop()->getHeader() == Phi->getParent() && + "PHI is an AddRec for a different loop?!"); + Value *StartValue = + Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); const SCEV *Step = AR->getStepRecurrence(*SE); // Calculate the pointer stride and check if it is consecutive. const SCEVConstant *C = dyn_cast(Step); @@ -475,7 +684,7 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, ConstantInt *CV = C->getValue(); if (PhiTy->isIntegerTy()) { - StepValue = CV; + D = InductionDescriptor(StartValue, IK_IntInduction, CV); return true; } @@ -494,6 +703,27 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, int64_t CVSize = CV->getSExtValue(); if (CVSize % Size) return false; - StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size); + auto *StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size); + + D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); return true; } + +/// \brief Returns the instructions that use values defined in the loop. +SmallVector llvm::findDefsUsedOutsideOfLoop(Loop *L) { + SmallVector UsedOutside; + + for (auto *Block : L->getBlocks()) + // FIXME: I believe that this could use copy_if if the Inst reference could + // be adapted into a pointer. + for (auto &Inst : *Block) { + auto Users = Inst.users(); + if (std::any_of(Users.begin(), Users.end(), [&](User *U) { + auto *Use = cast(U); + return !L->contains(Use->getParent()); + })) + UsedOutside.push_back(&Inst); + } + + return UsedOutside; +} diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp index 832079d2cf63..9a2a06cf6891 100644 --- a/lib/Transforms/Utils/LoopVersioning.cpp +++ b/lib/Transforms/Utils/LoopVersioning.cpp @@ -13,43 +13,81 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/LoopVersioning.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/Dominators.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/LoopVersioning.h" using namespace llvm; LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, - DominatorTree *DT, - const SmallVector *PtrToPartition) - : VersionedLoop(L), NonVersionedLoop(nullptr), - PtrToPartition(PtrToPartition), LAI(LAI), LI(LI), DT(DT) { + DominatorTree *DT, ScalarEvolution *SE, + bool UseLAIChecks) + : VersionedLoop(L), NonVersionedLoop(nullptr), LAI(LAI), LI(LI), DT(DT), + SE(SE) { assert(L->getExitBlock() && "No single exit block"); assert(L->getLoopPreheader() && "No preheader"); + if (UseLAIChecks) { + setAliasChecks(LAI.getRuntimePointerChecking()->getChecks()); + setSCEVChecks(LAI.PSE.getUnionPredicate()); + } } -bool LoopVersioning::needsRuntimeChecks() const { - return LAI.getRuntimePointerChecking()->needsAnyChecking(PtrToPartition); +void LoopVersioning::setAliasChecks( + const SmallVector Checks) { + AliasChecks = std::move(Checks); } -void LoopVersioning::versionLoop(Pass *P) { +void LoopVersioning::setSCEVChecks(SCEVUnionPredicate Check) { + Preds = std::move(Check); +} + +void LoopVersioning::versionLoop( + const SmallVectorImpl &DefsUsedOutside) { Instruction *FirstCheckInst; Instruction *MemRuntimeCheck; + Value *SCEVRuntimeCheck; + Value *RuntimeCheck = nullptr; + // Add the memcheck in the original preheader (this is empty initially). - BasicBlock *MemCheckBB = VersionedLoop->getLoopPreheader(); + BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader(); std::tie(FirstCheckInst, MemRuntimeCheck) = - LAI.addRuntimeCheck(MemCheckBB->getTerminator(), PtrToPartition); + LAI.addRuntimeChecks(RuntimeCheckBB->getTerminator(), AliasChecks); assert(MemRuntimeCheck && "called even though needsAnyChecking = false"); + const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate(); + SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(), + "scev.check"); + SCEVRuntimeCheck = + Exp.expandCodeForPredicate(&Pred, RuntimeCheckBB->getTerminator()); + auto *CI = dyn_cast(SCEVRuntimeCheck); + + // Discard the SCEV runtime check if it is always true. + if (CI && CI->isZero()) + SCEVRuntimeCheck = nullptr; + + if (MemRuntimeCheck && SCEVRuntimeCheck) { + RuntimeCheck = BinaryOperator::Create(Instruction::Or, MemRuntimeCheck, + SCEVRuntimeCheck, "ldist.safe"); + if (auto *I = dyn_cast(RuntimeCheck)) + I->insertBefore(RuntimeCheckBB->getTerminator()); + } else + RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck; + + assert(RuntimeCheck && "called even though we don't need " + "any runtime checks"); + // Rename the block to make the IR more readable. - MemCheckBB->setName(VersionedLoop->getHeader()->getName() + ".lver.memcheck"); + RuntimeCheckBB->setName(VersionedLoop->getHeader()->getName() + + ".lver.check"); // Create empty preheader for the loop (and after cloning for the // non-versioned loop). - BasicBlock *PH = SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI); + BasicBlock *PH = + SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI); PH->setName(VersionedLoop->getHeader()->getName() + ".ph"); // Clone the loop including the preheader. @@ -58,20 +96,23 @@ void LoopVersioning::versionLoop(Pass *P) { // block is a join between the two loops. SmallVector NonVersionedLoopBlocks; NonVersionedLoop = - cloneLoopWithPreheader(PH, MemCheckBB, VersionedLoop, VMap, ".lver.orig", - LI, DT, NonVersionedLoopBlocks); + cloneLoopWithPreheader(PH, RuntimeCheckBB, VersionedLoop, VMap, + ".lver.orig", LI, DT, NonVersionedLoopBlocks); remapInstructionsInBlocks(NonVersionedLoopBlocks, VMap); // Insert the conditional branch based on the result of the memchecks. - Instruction *OrigTerm = MemCheckBB->getTerminator(); + Instruction *OrigTerm = RuntimeCheckBB->getTerminator(); BranchInst::Create(NonVersionedLoop->getLoopPreheader(), - VersionedLoop->getLoopPreheader(), MemRuntimeCheck, - OrigTerm); + VersionedLoop->getLoopPreheader(), RuntimeCheck, OrigTerm); OrigTerm->eraseFromParent(); // The loops merge in the original exit block. This is now dominated by the // memchecking block. - DT->changeImmediateDominator(VersionedLoop->getExitBlock(), MemCheckBB); + DT->changeImmediateDominator(VersionedLoop->getExitBlock(), RuntimeCheckBB); + + // Adds the necessary PHI nodes for the versioned loops based on the + // loop-defined values used outside of the loop. + addPHINodes(DefsUsedOutside); } void LoopVersioning::addPHINodes( @@ -94,7 +135,7 @@ void LoopVersioning::addPHINodes( // If not create it. if (!PN) { PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver", - PHIBlock->begin()); + &PHIBlock->front()); for (auto *User : Inst->users()) if (!VersionedLoop->contains(cast(User)->getParent())) User->replaceUsesOfWith(Inst, PN); diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 66d57b069fe7..b0ad4d5e84a1 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -69,7 +69,7 @@ bool LowerInvoke::runOnFunction(Function &F) { BranchInst::Create(II->getNormalDest(), II); // Remove any PHI node entries from the exception destination. - II->getUnwindDest()->removePredecessor(BB); + II->getUnwindDest()->removePredecessor(&*BB); // Remove the invoke instruction now. BB->getInstList().erase(II); diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 4acd988691d2..52beb1542497 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -49,8 +49,7 @@ namespace { return I != Ranges.end() && I->Low <= R.Low; } - /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch - /// instructions. + /// Replace all SwitchInst instructions with chained branch instructions. class LowerSwitch : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid @@ -78,7 +77,7 @@ namespace { typedef std::vector CaseVector; typedef std::vector::iterator CaseItr; private: - void processSwitchInst(SwitchInst *SI); + void processSwitchInst(SwitchInst *SI, SmallPtrSetImpl &DeleteList); BasicBlock *switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, ConstantInt *UpperBound, @@ -116,21 +115,30 @@ FunctionPass *llvm::createLowerSwitchPass() { bool LowerSwitch::runOnFunction(Function &F) { bool Changed = false; + SmallPtrSet DeleteList; for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { - BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks + BasicBlock *Cur = &*I++; // Advance over block so we don't traverse new blocks + + // If the block is a dead Default block that will be deleted later, don't + // waste time processing it. + if (DeleteList.count(Cur)) + continue; if (SwitchInst *SI = dyn_cast(Cur->getTerminator())) { Changed = true; - processSwitchInst(SI); + processSwitchInst(SI, DeleteList); } } + for (BasicBlock* BB: DeleteList) { + DeleteDeadBlock(BB); + } + return Changed; } -// operator<< - Used for debugging purposes. -// +/// Used for debugging purposes. static raw_ostream& operator<<(raw_ostream &O, const LowerSwitch::CaseVector &C) LLVM_ATTRIBUTE_USED; @@ -147,23 +155,24 @@ static raw_ostream& operator<<(raw_ostream &O, return O << "]"; } -// \brief Update the first occurrence of the "switch statement" BB in the PHI -// node with the "new" BB. The other occurrences will: -// -// 1) Be updated by subsequent calls to this function. Switch statements may -// have more than one outcoming edge into the same BB if they all have the same -// value. When the switch statement is converted these incoming edges are now -// coming from multiple BBs. -// 2) Removed if subsequent incoming values now share the same case, i.e., -// multiple outcome edges are condensed into one. This is necessary to keep the -// number of phi values equal to the number of branches to SuccBB. +/// \brief Update the first occurrence of the "switch statement" BB in the PHI +/// node with the "new" BB. The other occurrences will: +/// +/// 1) Be updated by subsequent calls to this function. Switch statements may +/// have more than one outcoming edge into the same BB if they all have the same +/// value. When the switch statement is converted these incoming edges are now +/// coming from multiple BBs. +/// 2) Removed if subsequent incoming values now share the same case, i.e., +/// multiple outcome edges are condensed into one. This is necessary to keep the +/// number of phi values equal to the number of branches to SuccBB. static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, unsigned NumMergedCases) { - for (BasicBlock::iterator I = SuccBB->begin(), IE = SuccBB->getFirstNonPHI(); + for (BasicBlock::iterator I = SuccBB->begin(), + IE = SuccBB->getFirstNonPHI()->getIterator(); I != IE; ++I) { PHINode *PN = cast(I); - // Only update the first occurence. + // Only update the first occurrence. unsigned Idx = 0, E = PN->getNumIncomingValues(); unsigned LocalNumMergedCases = NumMergedCases; for (; Idx != E; ++Idx) { @@ -173,7 +182,7 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, } } - // Remove additional occurences coming from condensed cases and keep the + // Remove additional occurrences coming from condensed cases and keep the // number of incoming values equal to the number of branches to SuccBB. SmallVector Indices; for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx) @@ -188,11 +197,11 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, } } -// switchConvert - Convert the switch statement into a binary lookup of -// the case values. The function recursively builds this tree. -// LowerBound and UpperBound are used to keep track of the bounds for Val -// that have already been checked by a block emitted by one of the previous -// calls to switchConvert in the call stack. +/// Convert the switch statement into a binary lookup of the case values. +/// The function recursively builds this tree. LowerBound and UpperBound are +/// used to keep track of the bounds for Val that have already been checked by +/// a block emitted by one of the previous calls to switchConvert in the call +/// stack. BasicBlock * LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, ConstantInt *UpperBound, Value *Val, @@ -278,28 +287,24 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, UpperBound, Val, NewNode, OrigBlock, Default, UnreachableRanges); - Function::iterator FI = OrigBlock; - F->getBasicBlockList().insert(++FI, NewNode); + F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewNode); NewNode->getInstList().push_back(Comp); BranchInst::Create(LBranch, RBranch, Comp, NewNode); return NewNode; } -// newLeafBlock - Create a new leaf block for the binary lookup tree. It -// checks if the switch's value == the case's value. If not, then it -// jumps to the default branch. At this point in the tree, the value -// can't be another valid case value, so the jump to the "default" branch -// is warranted. -// +/// Create a new leaf block for the binary lookup tree. It checks if the +/// switch's value == the case's value. If not, then it jumps to the default +/// branch. At this point in the tree, the value can't be another valid case +/// value, so the jump to the "default" branch is warranted. BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, BasicBlock* OrigBlock, BasicBlock* Default) { Function* F = OrigBlock->getParent(); BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); - Function::iterator FI = OrigBlock; - F->getBasicBlockList().insert(++FI, NewLeaf); + F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf); // Emit comparison ICmpInst* Comp = nullptr; @@ -352,7 +357,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, return NewLeaf; } -// Clusterify - Transform simple list of Cases into list of CaseRange's +/// Transform simple list of Cases into list of CaseRange's. unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { unsigned numCmps = 0; @@ -394,10 +399,10 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { return numCmps; } -// processSwitchInst - Replace the specified switch instruction with a sequence -// of chained if-then insts in a balanced binary search. -// -void LowerSwitch::processSwitchInst(SwitchInst *SI) { +/// Replace the specified switch instruction with a sequence of chained if-then +/// insts in a balanced binary search. +void LowerSwitch::processSwitchInst(SwitchInst *SI, + SmallPtrSetImpl &DeleteList) { BasicBlock *CurBlock = SI->getParent(); BasicBlock *OrigBlock = CurBlock; Function *F = CurBlock->getParent(); @@ -424,7 +429,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { std::vector UnreachableRanges; if (isa(Default->getFirstNonPHIOrDbg())) { - // Make the bounds tightly fitted around the case value range, becase we + // Make the bounds tightly fitted around the case value range, because we // know that the value passed to the switch must be exactly one of the case // values. assert(!Cases.empty()); @@ -495,7 +500,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { // Create a new, empty default block so that the new hierarchy of // if-then statements go to this and the PHI nodes are happy. BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); - F->getBasicBlockList().insert(Default, NewDefault); + F->getBasicBlockList().insert(Default->getIterator(), NewDefault); BranchInst::Create(Default, NewDefault); // If there is an entry in any PHI nodes for the default edge, make sure @@ -518,7 +523,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { BasicBlock *OldDefault = SI->getDefaultDest(); CurBlock->getInstList().erase(SI); - // If the Default block has no more predecessors just remove it. + // If the Default block has no more predecessors just add it to DeleteList. if (pred_begin(OldDefault) == pred_end(OldDefault)) - DeleteDeadBlock(OldDefault); + DeleteList.insert(OldDefault); } diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index 00cf4e6c01c8..aa1e35ddba02 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -63,6 +63,9 @@ bool PromotePass::runOnFunction(Function &F) { BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function + if (F.hasFnAttribute(Attribute::OptimizeNone)) + return false; + bool Changed = false; DominatorTree &DT = getAnalysis().getDomTree(); diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp index 395a46bad97b..c999bd008fef 100644 --- a/lib/Transforms/Utils/MetaRenamer.cpp +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -42,6 +42,24 @@ namespace { } }; + static const char *const metaNames[] = { + // See http://en.wikipedia.org/wiki/Metasyntactic_variable + "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge", + "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam" + }; + + struct Renamer { + Renamer(unsigned int seed) { + prng.srand(seed); + } + + const char *newName() { + return metaNames[prng.rand() % array_lengthof(metaNames)]; + } + + PRNG prng; + }; + struct MetaRenamer : public ModulePass { static char ID; // Pass identification, replacement for typeid MetaRenamer() : ModulePass(ID) { @@ -53,36 +71,26 @@ namespace { } bool runOnModule(Module &M) override { - static const char *const metaNames[] = { - // See http://en.wikipedia.org/wiki/Metasyntactic_variable - "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge", - "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam" - }; - // Seed our PRNG with simple additive sum of ModuleID. We're looking to // simply avoid always having the same function names, and we need to // remain deterministic. unsigned int randSeed = 0; - for (std::string::const_iterator I = M.getModuleIdentifier().begin(), - E = M.getModuleIdentifier().end(); I != E; ++I) - randSeed += *I; + for (auto C : M.getModuleIdentifier()) + randSeed += C; - PRNG prng; - prng.srand(randSeed); + Renamer renamer(randSeed); // Rename all aliases - for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end(); - AI != AE; ++AI) { + for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) { StringRef Name = AI->getName(); if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) continue; AI->setName("alias"); } - + // Rename all global variables - for (Module::global_iterator GI = M.global_begin(), GE = M.global_end(); - GI != GE; ++GI) { + for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) { StringRef Name = GI->getName(); if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) continue; @@ -93,40 +101,37 @@ namespace { // Rename all struct types TypeFinder StructTypes; StructTypes.run(M, true); - for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { - StructType *STy = StructTypes[i]; + for (StructType *STy : StructTypes) { if (STy->isLiteral() || STy->getName().empty()) continue; SmallString<128> NameStorage; - STy->setName((Twine("struct.") + metaNames[prng.rand() % - array_lengthof(metaNames)]).toStringRef(NameStorage)); + STy->setName((Twine("struct.") + + renamer.newName()).toStringRef(NameStorage)); } // Rename all functions - for (Module::iterator FI = M.begin(), FE = M.end(); - FI != FE; ++FI) { - StringRef Name = FI->getName(); + for (auto &F : M) { + StringRef Name = F.getName(); if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) continue; - FI->setName(metaNames[prng.rand() % array_lengthof(metaNames)]); - runOnFunction(*FI); + F.setName(renamer.newName()); + runOnFunction(F); } return true; } bool runOnFunction(Function &F) { - for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); - AI != AE; ++AI) + for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) if (!AI->getType()->isVoidTy()) AI->setName("arg"); - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - BB->setName("bb"); + for (auto &BB : F) { + BB.setName("bb"); - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (!I->getType()->isVoidTy()) - I->setName("tmp"); + for (auto &I : BB) + if (!I.getType()->isVoidTy()) + I.setName("tmp"); } return true; } diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index d69a81ec4741..9ec28a3f3d47 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -43,9 +43,9 @@ static void appendToGlobalArray(const char *Array, } GVCtor->eraseFromParent(); } else { - // Use a simple two-field struct if there isn't one already. + // Use the new three-field struct if there isn't one already. EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), - nullptr); + IRB.getInt8PtrTy(), nullptr); } // Build a 2 or 3 field global_ctor entry. We don't take a comdat key. @@ -107,7 +107,8 @@ Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) { std::pair llvm::createSanitizerCtorAndInitFunctions( Module &M, StringRef CtorName, StringRef InitName, - ArrayRef InitArgTypes, ArrayRef InitArgs) { + ArrayRef InitArgTypes, ArrayRef InitArgs, + StringRef VersionCheckName) { assert(!InitName.empty() && "Expected init function name"); assert(InitArgTypes.size() == InitArgTypes.size() && "Sanitizer's init function expects different number of arguments"); @@ -122,6 +123,13 @@ std::pair llvm::createSanitizerCtorAndInitFunctions( AttributeSet())); InitFunction->setLinkage(Function::ExternalLinkage); IRB.CreateCall(InitFunction, InitArgs); + if (!VersionCheckName.empty()) { + Function *VersionCheckFunction = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false), + AttributeSet())); + IRB.CreateCall(VersionCheckFunction, {}); + } return std::make_pair(Ctor, InitFunction); } diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index a87f8504bfb5..c4f9b9f61407 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -205,10 +205,9 @@ public: // avoid gratuitus rescans. const BasicBlock *BB = I->getParent(); unsigned InstNo = 0; - for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E; - ++BBI) - if (isInterestingInstruction(BBI)) - InstNumbers[BBI] = InstNo++; + for (const Instruction &BBI : *BB) + if (isInterestingInstruction(&BBI)) + InstNumbers[&BBI] = InstNo++; It = InstNumbers.find(I); assert(It != InstNumbers.end() && "Didn't insert instruction?"); @@ -402,8 +401,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, // Record debuginfo for the store and remove the declaration's // debuginfo. if (DbgDeclareInst *DDI = Info.DbgDeclare) { - DIBuilder DIB(*AI->getParent()->getParent()->getParent(), - /*AllowUnresolved*/ false); + DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB); DDI->eraseFromParent(); LBI.deleteValue(DDI); @@ -425,14 +423,17 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, /// using the Alloca. /// /// If we cannot promote this alloca (because it is read before it is written), -/// return true. This is necessary in cases where, due to control flow, the -/// alloca is potentially undefined on some control flow paths. e.g. code like -/// this is potentially correct: -/// -/// for (...) { if (c) { A = undef; undef = B; } } -/// -/// ... so long as A is not used before undef is set. -static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, +/// return false. This is necessary in cases where, due to control flow, the +/// alloca is undefined only on some control flow paths. e.g. code like +/// this is correct in LLVM IR: +/// // A is an alloca with no stores so far +/// for (...) { +/// int t = *A; +/// if (!first_iteration) +/// use(t); +/// *A = 42; +/// } +static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, LargeBlockInfo &LBI, AliasSetTracker *AST) { // The trickiest case to handle is when we have large blocks. Because of this, @@ -467,10 +468,15 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, std::make_pair(LoadIdx, static_cast(nullptr)), less_first()); - - if (I == StoresByIndex.begin()) - // If there is no store before this load, the load takes the undef value. - LI->replaceAllUsesWith(UndefValue::get(LI->getType())); + if (I == StoresByIndex.begin()) { + if (StoresByIndex.empty()) + // If there are no stores, the load takes the undef value. + LI->replaceAllUsesWith(UndefValue::get(LI->getType())); + else + // There is no store before this load, bail out (load may be affected + // by the following stores - see main comment). + return false; + } else // Otherwise, there was a store before this load, the load takes its value. LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0)); @@ -486,8 +492,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, StoreInst *SI = cast(AI->user_back()); // Record debuginfo for the store before removing it. if (DbgDeclareInst *DDI = Info.DbgDeclare) { - DIBuilder DIB(*AI->getParent()->getParent()->getParent(), - /*AllowUnresolved*/ false); + DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); ConvertDebugDeclareToDebugValue(DDI, SI, DIB); } SI->eraseFromParent(); @@ -506,6 +511,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, } ++NumLocalPromoted; + return true; } void PromoteMem2Reg::run() { @@ -557,9 +563,8 @@ void PromoteMem2Reg::run() { // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. - if (Info.OnlyUsedInOneBlock) { - promoteSingleBlockAlloca(AI, Info, LBI, AST); - + if (Info.OnlyUsedInOneBlock && + promoteSingleBlockAlloca(AI, Info, LBI, AST)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); continue; @@ -636,7 +641,7 @@ void PromoteMem2Reg::run() { // and inserting the phi nodes we marked as necessary // std::vector RenamePassWorkList; - RenamePassWorkList.emplace_back(F.begin(), nullptr, std::move(Values)); + RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values)); do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); @@ -854,7 +859,7 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, // BasicBlock. PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), Allocas[AllocaNo]->getName() + "." + Twine(Version++), - BB->begin()); + &BB->front()); ++NumPHIInsert; PhiToAllocaMap[PN] = AllocaNo; @@ -919,7 +924,7 @@ NextIteration: return; for (BasicBlock::iterator II = BB->begin(); !isa(II);) { - Instruction *I = II++; // get the instruction, increment iterator + Instruction *I = &*II++; // get the instruction, increment iterator if (LoadInst *LI = dyn_cast(I)) { AllocaInst *Src = dyn_cast(LI->getPointerOperand()); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 36781c1189cd..d0932f834cf5 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -43,7 +44,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include @@ -73,6 +73,22 @@ static cl::opt HoistCondStores( "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes")); +static cl::opt MergeCondStores( + "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), + cl::desc("Hoist conditional stores even if an unconditional store does not " + "precede - hoist multiple conditional stores into a single " + "predicated store")); + +static cl::opt MergeCondStoresAggressively( + "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), + cl::desc("When merging conditional stores, do so even if the resultant " + "basic blocks are unlikely to be if-converted as a result")); + +static cl::opt SpeculateOneExpensiveInst( + "speculate-one-expensive-inst", cl::Hidden, cl::init(true), + cl::desc("Allow exactly one expensive instruction to be speculatively " + "executed")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); @@ -83,13 +99,13 @@ STATISTIC(NumSpeculations, "Number of speculative executed instructions"); namespace { // The first field contains the value that the switch produces when a certain - // case group is selected, and the second field is a vector containing the cases - // composing the case group. + // case group is selected, and the second field is a vector containing the + // cases composing the case group. typedef SmallVector>, 2> SwitchCaseResultVectorTy; // The first field contains the phi node that generates a result of the switch - // and the second field contains the value generated for a certain case in the switch - // for that PHI. + // and the second field contains the value generated for a certain case in the + // switch for that PHI. typedef SmallVector, 4> SwitchCaseResultsTy; /// ValueEqualityComparisonCase - Represents a case of a switch. @@ -124,6 +140,7 @@ class SimplifyCFGOpt { bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder); bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder); + bool SimplifyCleanupReturn(CleanupReturnInst *RI); bool SimplifyUnreachable(UnreachableInst *UI); bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); bool SimplifyIndirectBr(IndirectBrInst *IBI); @@ -226,6 +243,7 @@ static unsigned ComputeSpeculationCost(const User *I, "Instruction is not safe to speculatively execute!"); return TTI.getUserCost(I); } + /// If we have a merge point of an "if condition" as accepted above, /// return true if the specified value dominates the block. We /// don't handle the true generality of domination here, just a special case @@ -246,7 +264,8 @@ static unsigned ComputeSpeculationCost(const User *I, static bool DominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl *AggressiveInsts, unsigned &CostRemaining, - const TargetTransformInfo &TTI) { + const TargetTransformInfo &TTI, + unsigned Depth = 0) { Instruction *I = dyn_cast(V); if (!I) { // Non-instructions all dominate instructions, but not all constantexprs @@ -284,15 +303,24 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, unsigned Cost = ComputeSpeculationCost(I, TTI); - if (Cost > CostRemaining) + // Allow exactly one instruction to be speculated regardless of its cost + // (as long as it is safe to do so). + // This is intended to flatten the CFG even if the instruction is a division + // or other expensive operation. The speculation of an expensive instruction + // is expected to be undone in CodeGenPrepare if the speculation has not + // enabled further IR optimizations. + if (Cost > CostRemaining && + (!SpeculateOneExpensiveInst || !AggressiveInsts->empty() || Depth > 0)) return false; - CostRemaining -= Cost; + // Avoid unsigned wrap. + CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost; // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI)) + if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI, + Depth + 1)) return false; // Okay, it's safe to do this! Remember this instruction. AggressiveInsts->insert(I); @@ -970,8 +998,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // Okay, at this point, we know which new successor Pred will get. Make // sure we update the number of entries in the PHI nodes for these // successors. - for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i) - AddPredecessorToBlock(NewSuccessors[i], Pred, BB); + for (BasicBlock *NewSuccessor : NewSuccessors) + AddPredecessorToBlock(NewSuccessor, Pred, BB); Builder.SetInsertPoint(PTI); // Convert pointer to int before we switch. @@ -984,8 +1012,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size()); NewSI->setDebugLoc(PTI->getDebugLoc()); - for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - NewSI->addCase(PredCases[i].Value, PredCases[i].Dest); + for (ValueEqualityComparisonCase &V : PredCases) + NewSI->addCase(V.Value, V.Dest); if (PredHasWeights || SuccHasWeights) { // Halve the weights if any of them cannot fit in an uint32_t @@ -1059,15 +1087,15 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, BasicBlock::iterator BB1_Itr = BB1->begin(); BasicBlock::iterator BB2_Itr = BB2->begin(); - Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++; + Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++; // Skip debug info if it is not identical. DbgInfoIntrinsic *DBI1 = dyn_cast(I1); DbgInfoIntrinsic *DBI2 = dyn_cast(I2); if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { while (isa(I1)) - I1 = BB1_Itr++; + I1 = &*BB1_Itr++; while (isa(I2)) - I2 = BB2_Itr++; + I2 = &*BB2_Itr++; } if (isa(I1) || !I1->isIdenticalToWhenDefined(I2) || (isa(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) @@ -1088,31 +1116,30 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, // For a normal instruction, we just move one to right before the branch, // then replace all uses of the other with the first. Finally, we remove // the now redundant second instruction. - BIParent->getInstList().splice(BI, BB1->getInstList(), I1); + BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), I1); if (!I2->use_empty()) I2->replaceAllUsesWith(I1); I1->intersectOptionalDataWith(I2); unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, - LLVMContext::MD_range, - LLVMContext::MD_fpmath, - LLVMContext::MD_invariant_load, - LLVMContext::MD_nonnull - }; + LLVMContext::MD_tbaa, LLVMContext::MD_range, + LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, + LLVMContext::MD_nonnull, LLVMContext::MD_invariant_group, + LLVMContext::MD_align, LLVMContext::MD_dereferenceable, + LLVMContext::MD_dereferenceable_or_null}; combineMetadata(I1, I2, KnownIDs); I2->eraseFromParent(); Changed = true; - I1 = BB1_Itr++; - I2 = BB2_Itr++; + I1 = &*BB1_Itr++; + I2 = &*BB2_Itr++; // Skip debug info if it is not identical. DbgInfoIntrinsic *DBI1 = dyn_cast(I1); DbgInfoIntrinsic *DBI2 = dyn_cast(I2); if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { while (isa(I1)) - I1 = BB1_Itr++; + I1 = &*BB1_Itr++; while (isa(I2)) - I2 = BB2_Itr++; + I2 = &*BB2_Itr++; } } while (I1->isIdenticalToWhenDefined(I2)); @@ -1147,7 +1174,7 @@ HoistTerminator: // Okay, it is safe to hoist the terminator. Instruction *NT = I1->clone(); - BIParent->getInstList().insert(BI, NT); + BIParent->getInstList().insert(BI->getIterator(), NT); if (!NT->getType()->isVoidTy()) { I1->replaceAllUsesWith(NT); I2->replaceAllUsesWith(NT); @@ -1265,7 +1292,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // Cannot move control-flow-involving, volatile loads, vaarg, etc. if (isa(I1) || isa(I2) || isa(I1) || isa(I2) || - isa(I1) || isa(I2) || + I1->isEHPad() || I2->isEHPad() || isa(I1) || isa(I2) || I1->mayHaveSideEffects() || I2->mayHaveSideEffects() || I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() || @@ -1324,7 +1351,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { if (!NewPN) { NewPN = PHINode::Create(DifferentOp1->getType(), 2, - DifferentOp1->getName() + ".sink", BBEnd->begin()); + DifferentOp1->getName() + ".sink", &BBEnd->front()); NewPN->addIncoming(DifferentOp1, BB1); NewPN->addIncoming(DifferentOp2, BB2); DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";); @@ -1339,7 +1366,8 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // instruction in the basic block down. bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin()); // Sink the instruction. - BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1); + BBEnd->getInstList().splice(FirstNonPhiInBBEnd->getIterator(), + BB1->getInstList(), I1); if (!OldPN->use_empty()) OldPN->replaceAllUsesWith(I1); OldPN->eraseFromParent(); @@ -1355,7 +1383,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { RE1 = BB1->getInstList().rend(); if (UpdateRE2) RE2 = BB2->getInstList().rend(); - FirstNonPhiInBBEnd = I1; + FirstNonPhiInBBEnd = &*I1; NumSinkCommons++; Changed = true; } @@ -1491,7 +1519,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, for (BasicBlock::iterator BBI = ThenBB->begin(), BBE = std::prev(ThenBB->end()); BBI != BBE; ++BBI) { - Instruction *I = BBI; + Instruction *I = &*BBI; // Skip debug info. if (isa(I)) continue; @@ -1604,9 +1632,14 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, SpeculatedStore->setOperand(0, S); } + // Metadata can be dependent on the condition we are hoisting above. + // Conservatively strip all metadata on the instruction. + for (auto &I: *ThenBB) + I.dropUnknownNonDebugMetadata(); + // Hoist the instructions. - BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(), - std::prev(ThenBB->end())); + BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(), + ThenBB->begin(), std::prev(ThenBB->end())); // Insert selects and rewrite the PHI operands. IRBuilder Builder(BI); @@ -1747,13 +1780,13 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) { // Check for trivial simplification. if (Value *V = SimplifyInstruction(N, DL)) { - TranslateMap[BBI] = V; + TranslateMap[&*BBI] = V; delete N; // Instruction folded away, don't need actual inst } else { // Insert the new instruction into its new home. EdgeBB->getInstList().insert(InsertPt, N); if (!BBI->use_empty()) - TranslateMap[BBI] = N; + TranslateMap[&*BBI] = N; } } @@ -1850,7 +1883,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } else { DomBlock = *pred_begin(IfBlock1); for (BasicBlock::iterator I = IfBlock1->begin();!isa(I);++I) - if (!AggressiveInsts.count(I) && !isa(I)) { + if (!AggressiveInsts.count(&*I) && !isa(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control // flow, so the xform is not worth it. @@ -1863,7 +1896,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } else { DomBlock = *pred_begin(IfBlock2); for (BasicBlock::iterator I = IfBlock2->begin();!isa(I);++I) - if (!AggressiveInsts.count(I) && !isa(I)) { + if (!AggressiveInsts.count(&*I) && !isa(I)) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control // flow, so the xform is not worth it. @@ -1882,13 +1915,13 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // Move all 'aggressive' instructions, which are defined in the // conditional parts of the if's up to the dominating block. if (IfBlock1) - DomBlock->getInstList().splice(InsertPt, + DomBlock->getInstList().splice(InsertPt->getIterator(), IfBlock1->getInstList(), IfBlock1->begin(), - IfBlock1->getTerminator()); + IfBlock1->getTerminator()->getIterator()); if (IfBlock2) - DomBlock->getInstList().splice(InsertPt, + DomBlock->getInstList().splice(InsertPt->getIterator(), IfBlock2->getInstList(), IfBlock2->begin(), - IfBlock2->getTerminator()); + IfBlock2->getTerminator()->getIterator()); while (PHINode *PN = dyn_cast(BB->begin())) { // Change the PHI node into a select instruction. @@ -2057,7 +2090,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { BI->getSuccessor(0) == PBI->getSuccessor(1))) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { - Instruction *Curr = I++; + Instruction *Curr = &*I++; if (isa(Curr)) { Cond = Curr; break; @@ -2077,7 +2110,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { return false; // Make sure the instruction after the condition is the cond branch. - BasicBlock::iterator CondIt = Cond; ++CondIt; + BasicBlock::iterator CondIt = ++Cond->getIterator(); // Ignore dbg intrinsics. while (isa(CondIt)) ++CondIt; @@ -2095,7 +2128,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { // Ignore dbg intrinsics. if (isa(I)) continue; - if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I)) + if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I)) return false; // I has only one use and can be executed unconditionally. Instruction *User = dyn_cast(I->user_back()); @@ -2192,17 +2225,17 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { Instruction *NewBonusInst = BonusInst->clone(); RemapInstruction(NewBonusInst, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); - VMap[BonusInst] = NewBonusInst; + VMap[&*BonusInst] = NewBonusInst; // If we moved a load, we cannot any longer claim any knowledge about // its potential value. The previous information might have been valid // only given the branch precondition. // For an analogous reason, we must also drop all the metadata whose // semantics we don't understand. - NewBonusInst->dropUnknownMetadata(LLVMContext::MD_dbg); + NewBonusInst->dropUnknownNonDebugMetadata(); - PredBlock->getInstList().insert(PBI, NewBonusInst); - NewBonusInst->takeName(BonusInst); + PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst); + NewBonusInst->takeName(&*BonusInst); BonusInst->setName(BonusInst->getName() + ".old"); } @@ -2211,7 +2244,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { Instruction *New = Cond->clone(); RemapInstruction(New, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); - PredBlock->getInstList().insert(PBI, New); + PredBlock->getInstList().insert(PBI->getIterator(), New); New->takeName(Cond); Cond->setName(New->getName() + ".old"); @@ -2332,11 +2365,297 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { return false; } +// If there is only one store in BB1 and BB2, return it, otherwise return +// nullptr. +static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) { + StoreInst *S = nullptr; + for (auto *BB : {BB1, BB2}) { + if (!BB) + continue; + for (auto &I : *BB) + if (auto *SI = dyn_cast(&I)) { + if (S) + // Multiple stores seen. + return nullptr; + else + S = SI; + } + } + return S; +} + +static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, + Value *AlternativeV = nullptr) { + // PHI is going to be a PHI node that allows the value V that is defined in + // BB to be referenced in BB's only successor. + // + // If AlternativeV is nullptr, the only value we care about in PHI is V. It + // doesn't matter to us what the other operand is (it'll never get used). We + // could just create a new PHI with an undef incoming value, but that could + // increase register pressure if EarlyCSE/InstCombine can't fold it with some + // other PHI. So here we directly look for some PHI in BB's successor with V + // as an incoming operand. If we find one, we use it, else we create a new + // one. + // + // If AlternativeV is not nullptr, we care about both incoming values in PHI. + // PHI must be exactly: phi [ %BB, %V ], [ %OtherBB, %AlternativeV] + // where OtherBB is the single other predecessor of BB's only successor. + PHINode *PHI = nullptr; + BasicBlock *Succ = BB->getSingleSuccessor(); + + for (auto I = Succ->begin(); isa(I); ++I) + if (cast(I)->getIncomingValueForBlock(BB) == V) { + PHI = cast(I); + if (!AlternativeV) + break; + + assert(std::distance(pred_begin(Succ), pred_end(Succ)) == 2); + auto PredI = pred_begin(Succ); + BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI; + if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV) + break; + PHI = nullptr; + } + if (PHI) + return PHI; + + // If V is not an instruction defined in BB, just return it. + if (!AlternativeV && + (!isa(V) || cast(V)->getParent() != BB)) + return V; + + PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front()); + PHI->addIncoming(V, BB); + for (BasicBlock *PredBB : predecessors(Succ)) + if (PredBB != BB) + PHI->addIncoming(AlternativeV ? AlternativeV : UndefValue::get(V->getType()), + PredBB); + return PHI; +} + +static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, + BasicBlock *QTB, BasicBlock *QFB, + BasicBlock *PostBB, Value *Address, + bool InvertPCond, bool InvertQCond) { + auto IsaBitcastOfPointerType = [](const Instruction &I) { + return Operator::getOpcode(&I) == Instruction::BitCast && + I.getType()->isPointerTy(); + }; + + // If we're not in aggressive mode, we only optimize if we have some + // confidence that by optimizing we'll allow P and/or Q to be if-converted. + auto IsWorthwhile = [&](BasicBlock *BB) { + if (!BB) + return true; + // Heuristic: if the block can be if-converted/phi-folded and the + // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to + // thread this store. + unsigned N = 0; + for (auto &I : *BB) { + // Cheap instructions viable for folding. + if (isa(I) || isa(I) || + isa(I)) + ++N; + // Free instructions. + else if (isa(I) || isa(I) || + IsaBitcastOfPointerType(I)) + continue; + else + return false; + } + return N <= PHINodeFoldingThreshold; + }; + + if (!MergeCondStoresAggressively && (!IsWorthwhile(PTB) || + !IsWorthwhile(PFB) || + !IsWorthwhile(QTB) || + !IsWorthwhile(QFB))) + return false; + + // For every pointer, there must be exactly two stores, one coming from + // PTB or PFB, and the other from QTB or QFB. We don't support more than one + // store (to any address) in PTB,PFB or QTB,QFB. + // FIXME: We could relax this restriction with a bit more work and performance + // testing. + StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB); + StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB); + if (!PStore || !QStore) + return false; + + // Now check the stores are compatible. + if (!QStore->isUnordered() || !PStore->isUnordered()) + return false; + + // Check that sinking the store won't cause program behavior changes. Sinking + // the store out of the Q blocks won't change any behavior as we're sinking + // from a block to its unconditional successor. But we're moving a store from + // the P blocks down through the middle block (QBI) and past both QFB and QTB. + // So we need to check that there are no aliasing loads or stores in + // QBI, QTB and QFB. We also need to check there are no conflicting memory + // operations between PStore and the end of its parent block. + // + // The ideal way to do this is to query AliasAnalysis, but we don't + // preserve AA currently so that is dangerous. Be super safe and just + // check there are no other memory operations at all. + for (auto &I : *QFB->getSinglePredecessor()) + if (I.mayReadOrWriteMemory()) + return false; + for (auto &I : *QFB) + if (&I != QStore && I.mayReadOrWriteMemory()) + return false; + if (QTB) + for (auto &I : *QTB) + if (&I != QStore && I.mayReadOrWriteMemory()) + return false; + for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end(); + I != E; ++I) + if (&*I != PStore && I->mayReadOrWriteMemory()) + return false; + + // OK, we're going to sink the stores to PostBB. The store has to be + // conditional though, so first create the predicate. + Value *PCond = cast(PFB->getSinglePredecessor()->getTerminator()) + ->getCondition(); + Value *QCond = cast(QFB->getSinglePredecessor()->getTerminator()) + ->getCondition(); + + Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(), + PStore->getParent()); + Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(), + QStore->getParent(), PPHI); + + IRBuilder<> QB(&*PostBB->getFirstInsertionPt()); + + Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond); + Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond); + + if (InvertPCond) + PPred = QB.CreateNot(PPred); + if (InvertQCond) + QPred = QB.CreateNot(QPred); + Value *CombinedPred = QB.CreateOr(PPred, QPred); + + auto *T = + SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), false); + QB.SetInsertPoint(T); + StoreInst *SI = cast(QB.CreateStore(QPHI, Address)); + AAMDNodes AAMD; + PStore->getAAMetadata(AAMD, /*Merge=*/false); + PStore->getAAMetadata(AAMD, /*Merge=*/true); + SI->setAAMetadata(AAMD); + + QStore->eraseFromParent(); + PStore->eraseFromParent(); + + return true; +} + +static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { + // The intention here is to find diamonds or triangles (see below) where each + // conditional block contains a store to the same address. Both of these + // stores are conditional, so they can't be unconditionally sunk. But it may + // be profitable to speculatively sink the stores into one merged store at the + // end, and predicate the merged store on the union of the two conditions of + // PBI and QBI. + // + // This can reduce the number of stores executed if both of the conditions are + // true, and can allow the blocks to become small enough to be if-converted. + // This optimization will also chain, so that ladders of test-and-set + // sequences can be if-converted away. + // + // We only deal with simple diamonds or triangles: + // + // PBI or PBI or a combination of the two + // / \ | \ + // PTB PFB | PFB + // \ / | / + // QBI QBI + // / \ | \ + // QTB QFB | QFB + // \ / | / + // PostBB PostBB + // + // We model triangles as a type of diamond with a nullptr "true" block. + // Triangles are canonicalized so that the fallthrough edge is represented by + // a true condition, as in the diagram above. + // + BasicBlock *PTB = PBI->getSuccessor(0); + BasicBlock *PFB = PBI->getSuccessor(1); + BasicBlock *QTB = QBI->getSuccessor(0); + BasicBlock *QFB = QBI->getSuccessor(1); + BasicBlock *PostBB = QFB->getSingleSuccessor(); + + bool InvertPCond = false, InvertQCond = false; + // Canonicalize fallthroughs to the true branches. + if (PFB == QBI->getParent()) { + std::swap(PFB, PTB); + InvertPCond = true; + } + if (QFB == PostBB) { + std::swap(QFB, QTB); + InvertQCond = true; + } + + // From this point on we can assume PTB or QTB may be fallthroughs but PFB + // and QFB may not. Model fallthroughs as a nullptr block. + if (PTB == QBI->getParent()) + PTB = nullptr; + if (QTB == PostBB) + QTB = nullptr; + + // Legality bailouts. We must have at least the non-fallthrough blocks and + // the post-dominating block, and the non-fallthroughs must only have one + // predecessor. + auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) { + return BB->getSinglePredecessor() == P && + BB->getSingleSuccessor() == S; + }; + if (!PostBB || + !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || + !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB)) + return false; + if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) || + (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB))) + return false; + if (PostBB->getNumUses() != 2 || QBI->getParent()->getNumUses() != 2) + return false; + + // OK, this is a sequence of two diamonds or triangles. + // Check if there are stores in PTB or PFB that are repeated in QTB or QFB. + SmallPtrSet PStoreAddresses, QStoreAddresses; + for (auto *BB : {PTB, PFB}) { + if (!BB) + continue; + for (auto &I : *BB) + if (StoreInst *SI = dyn_cast(&I)) + PStoreAddresses.insert(SI->getPointerOperand()); + } + for (auto *BB : {QTB, QFB}) { + if (!BB) + continue; + for (auto &I : *BB) + if (StoreInst *SI = dyn_cast(&I)) + QStoreAddresses.insert(SI->getPointerOperand()); + } + + set_intersect(PStoreAddresses, QStoreAddresses); + // set_intersect mutates PStoreAddresses in place. Rename it here to make it + // clear what it contains. + auto &CommonAddresses = PStoreAddresses; + + bool Changed = false; + for (auto *Address : CommonAddresses) + Changed |= mergeConditionalStoreToAddress( + PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond); + return Changed; +} + /// If we have a conditional branch as a predecessor of another block, /// this function tries to simplify it. We know /// that PBI and BI are both conditional branches, and BI is in one of the /// successor blocks of PBI - PBI branches to BI. -static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { +static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, + const DataLayout &DL) { assert(PBI->isConditional() && BI->isConditional()); BasicBlock *BB = BI->getParent(); @@ -2360,10 +2679,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // simplifycfg will thread the block. if (BlockIsSimpleEnoughToThreadThrough(BB)) { pred_iterator PB = pred_begin(BB), PE = pred_end(BB); - PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()), - std::distance(PB, PE), - BI->getCondition()->getName() + ".pr", - BB->begin()); + PHINode *NewPN = PHINode::Create( + Type::getInt1Ty(BB->getContext()), std::distance(PB, PE), + BI->getCondition()->getName() + ".pr", &BB->front()); // Okay, we're going to insert the PHI node. Since PBI is not the only // predecessor, compute the PHI'd conditional value for all of the preds. // Any predecessor where the condition is not computable we keep symbolic. @@ -2386,6 +2704,29 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { } } + if (auto *CE = dyn_cast(BI->getCondition())) + if (CE->canTrap()) + return false; + + // If BI is reached from the true path of PBI and PBI's condition implies + // BI's condition, we know the direction of the BI branch. + if (PBI->getSuccessor(0) == BI->getParent() && + isImpliedCondition(PBI->getCondition(), BI->getCondition(), DL) && + PBI->getSuccessor(0) != PBI->getSuccessor(1) && + BB->getSinglePredecessor()) { + // Turn this into a branch on constant. + auto *OldCond = BI->getCondition(); + BI->setCondition(ConstantInt::getTrue(BB->getContext())); + RecursivelyDeleteTriviallyDeadInstructions(OldCond); + return true; // Nuke the branch on constant. + } + + // If both branches are conditional and both contain stores to the same + // address, remove the stores from the conditionals and create a conditional + // merged store at the end. + if (MergeCondStores && mergeConditionalStores(PBI, BI)) + return true; + // If this is a conditional branch in an empty block, and if any // predecessors are a conditional branch to one of our destinations, // fold the conditions into logical ops and one cond br. @@ -2396,11 +2737,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (&*BBI != BI) return false; - - if (ConstantExpr *CE = dyn_cast(BI->getCondition())) - if (CE->canTrap()) - return false; - int PBIOp, BIOp; if (PBI->getSuccessor(0) == BI->getSuccessor(0)) PBIOp = BIOp = 0; @@ -2565,15 +2901,15 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr; // Then remove the rest. - for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) { - BasicBlock *Succ = OldTerm->getSuccessor(I); + for (BasicBlock *Succ : OldTerm->successors()) { // Make sure only to keep exactly one copy of each edge. if (Succ == KeepEdge1) KeepEdge1 = nullptr; else if (Succ == KeepEdge2) KeepEdge2 = nullptr; else - Succ->removePredecessor(OldTerm->getParent()); + Succ->removePredecessor(OldTerm->getParent(), + /*DontDeleteUselessPHIs=*/true); } IRBuilder<> Builder(OldTerm); @@ -2827,7 +3163,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); // If Extra was used, we require at least two switch values to do the - // transformation. A switch with one value is just an cond branch. + // transformation. A switch with one value is just a conditional branch. if (ExtraCase && Values.size() < 2) return false; // TODO: Preserve branch weight metadata, similarly to how @@ -2847,7 +3183,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, // then we evaluate them with an explicit branch first. Split the block // right before the condbr to handle it. if (ExtraCase) { - BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test"); + BasicBlock *NewBB = + BB->splitBasicBlock(BI->getIterator(), "switch.early.test"); // Remove the uncond branch added to the old block. TerminatorInst *OldTI = BB->getTerminator(); Builder.SetInsertPoint(OldTI); @@ -2911,34 +3248,15 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { return false; // Check that there are no other instructions except for debug intrinsics. - BasicBlock::iterator I = LPInst, E = RI; + BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator(); while (++I != E) if (!isa(I)) return false; // Turn all invokes that unwind here into calls and delete the basic block. for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { - InvokeInst *II = cast((*PI++)->getTerminator()); - SmallVector Args(II->op_begin(), II->op_end() - 3); - // Insert a call instruction before the invoke. - CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); - Call->takeName(II); - Call->setCallingConv(II->getCallingConv()); - Call->setAttributes(II->getAttributes()); - Call->setDebugLoc(II->getDebugLoc()); - - // Anything that used the value produced by the invoke instruction now uses - // the value produced by the call instruction. Note that we do this even - // for void functions and calls with no uses so that the callgraph edge is - // updated. - II->replaceAllUsesWith(Call); - BB->removePredecessor(II->getParent()); - - // Insert a branch to the normal destination right before the invoke. - BranchInst::Create(II->getNormalDest(), II); - - // Finally, delete the invoke instruction! - II->eraseFromParent(); + BasicBlock *Pred = *PI++; + removeUnwindEdge(Pred); } // The landingpad is now unreachable. Zap it. @@ -2946,6 +3264,124 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { return true; } +bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) { + // If this is a trivial cleanup pad that executes no instructions, it can be + // eliminated. If the cleanup pad continues to the caller, any predecessor + // that is an EH pad will be updated to continue to the caller and any + // predecessor that terminates with an invoke instruction will have its invoke + // instruction converted to a call instruction. If the cleanup pad being + // simplified does not continue to the caller, each predecessor will be + // updated to continue to the unwind destination of the cleanup pad being + // simplified. + BasicBlock *BB = RI->getParent(); + CleanupPadInst *CPInst = RI->getCleanupPad(); + if (CPInst->getParent() != BB) + // This isn't an empty cleanup. + return false; + + // Check that there are no other instructions except for debug intrinsics. + BasicBlock::iterator I = CPInst->getIterator(), E = RI->getIterator(); + while (++I != E) + if (!isa(I)) + return false; + + // If the cleanup return we are simplifying unwinds to the caller, this will + // set UnwindDest to nullptr. + BasicBlock *UnwindDest = RI->getUnwindDest(); + Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr; + + // We're about to remove BB from the control flow. Before we do, sink any + // PHINodes into the unwind destination. Doing this before changing the + // control flow avoids some potentially slow checks, since we can currently + // be certain that UnwindDest and BB have no common predecessors (since they + // are both EH pads). + if (UnwindDest) { + // First, go through the PHI nodes in UnwindDest and update any nodes that + // reference the block we are removing + for (BasicBlock::iterator I = UnwindDest->begin(), + IE = DestEHPad->getIterator(); + I != IE; ++I) { + PHINode *DestPN = cast(I); + + int Idx = DestPN->getBasicBlockIndex(BB); + // Since BB unwinds to UnwindDest, it has to be in the PHI node. + assert(Idx != -1); + // This PHI node has an incoming value that corresponds to a control + // path through the cleanup pad we are removing. If the incoming + // value is in the cleanup pad, it must be a PHINode (because we + // verified above that the block is otherwise empty). Otherwise, the + // value is either a constant or a value that dominates the cleanup + // pad being removed. + // + // Because BB and UnwindDest are both EH pads, all of their + // predecessors must unwind to these blocks, and since no instruction + // can have multiple unwind destinations, there will be no overlap in + // incoming blocks between SrcPN and DestPN. + Value *SrcVal = DestPN->getIncomingValue(Idx); + PHINode *SrcPN = dyn_cast(SrcVal); + + // Remove the entry for the block we are deleting. + DestPN->removeIncomingValue(Idx, false); + + if (SrcPN && SrcPN->getParent() == BB) { + // If the incoming value was a PHI node in the cleanup pad we are + // removing, we need to merge that PHI node's incoming values into + // DestPN. + for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues(); + SrcIdx != SrcE; ++SrcIdx) { + DestPN->addIncoming(SrcPN->getIncomingValue(SrcIdx), + SrcPN->getIncomingBlock(SrcIdx)); + } + } else { + // Otherwise, the incoming value came from above BB and + // so we can just reuse it. We must associate all of BB's + // predecessors with this value. + for (auto *pred : predecessors(BB)) { + DestPN->addIncoming(SrcVal, pred); + } + } + } + + // Sink any remaining PHI nodes directly into UnwindDest. + Instruction *InsertPt = DestEHPad; + for (BasicBlock::iterator I = BB->begin(), + IE = BB->getFirstNonPHI()->getIterator(); + I != IE;) { + // The iterator must be incremented here because the instructions are + // being moved to another block. + PHINode *PN = cast(I++); + if (PN->use_empty()) + // If the PHI node has no uses, just leave it. It will be erased + // when we erase BB below. + continue; + + // Otherwise, sink this PHI node into UnwindDest. + // Any predecessors to UnwindDest which are not already represented + // must be back edges which inherit the value from the path through + // BB. In this case, the PHI value must reference itself. + for (auto *pred : predecessors(UnwindDest)) + if (pred != BB) + PN->addIncoming(PN, pred); + PN->moveBefore(InsertPt); + } + } + + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { + // The iterator must be updated here because we are removing this pred. + BasicBlock *PredBB = *PI++; + if (UnwindDest == nullptr) { + removeUnwindEdge(PredBB); + } else { + TerminatorInst *TI = PredBB->getTerminator(); + TI->replaceUsesOfWith(BB, UnwindDest); + } + } + + // The cleanup pad is now unreachable. Zap it. + BB->eraseFromParent(); + return true; +} + bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { BasicBlock *BB = RI->getParent(); if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; @@ -3003,8 +3439,8 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // If there are any instructions immediately before the unreachable that can // be removed, do so. - while (UI != BB->begin()) { - BasicBlock::iterator BBI = UI; + while (UI->getIterator() != BB->begin()) { + BasicBlock::iterator BBI = UI->getIterator(); --BBI; // Do not delete instructions that can have side effects which might cause // the unreachable to not be reachable; specifically, calls and volatile @@ -3075,26 +3511,18 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { --i; --e; Changed = true; } - } else if (InvokeInst *II = dyn_cast(TI)) { - if (II->getUnwindDest() == BB) { - // Convert the invoke to a call instruction. This would be a good - // place to note that the call does not throw though. - BranchInst *BI = Builder.CreateBr(II->getNormalDest()); - II->removeFromParent(); // Take out of symbol table - - // Insert the call now... - SmallVector Args(II->op_begin(), II->op_end()-3); - Builder.SetInsertPoint(BI); - CallInst *CI = Builder.CreateCall(II->getCalledValue(), - Args, II->getName()); - CI->setCallingConv(II->getCallingConv()); - CI->setAttributes(II->getAttributes()); - // If the invoke produced a value, the call does now instead. - II->replaceAllUsesWith(CI); - delete II; - Changed = true; - } + } else if ((isa(TI) && + cast(TI)->getUnwindDest() == BB) || + isa(TI)) { + removeUnwindEdge(TI->getParent()); + Changed = true; + } else if (isa(TI)) { + new UnreachableInst(TI->getContext(), TI); + TI->eraseFromParent(); + Changed = true; } + // TODO: We can remove a catchswitch if all it's catchpads end in + // unreachable. } // If this block is now dead, remove it. @@ -3249,6 +3677,29 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, } } + // If we can prove that the cases must cover all possible values, the + // default destination becomes dead and we can remove it. If we know some + // of the bits in the value, we can use that to more precisely compute the + // number of possible unique case values. + bool HasDefault = + !isa(SI->getDefaultDest()->getFirstNonPHIOrDbg()); + const unsigned NumUnknownBits = Bits - + (KnownZero.Or(KnownOne)).countPopulation(); + assert(NumUnknownBits <= Bits); + if (HasDefault && DeadCases.empty() && + NumUnknownBits < 64 /* avoid overflow */ && + SI->getNumCases() == (1ULL << NumUnknownBits)) { + DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); + BasicBlock *NewDefault = SplitBlockPredecessors(SI->getDefaultDest(), + SI->getParent(), ""); + SI->setDefaultDest(&*NewDefault); + SplitBlock(&*NewDefault, &NewDefault->front()); + auto *OldTI = NewDefault->getTerminator(); + new UnreachableInst(SI->getContext(), OldTI); + EraseTerminatorInstAndDCECond(OldTI); + return true; + } + SmallVector Weights; bool HasWeight = HasBranchWeights(SI); if (HasWeight) { @@ -3439,7 +3890,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, } else if (isa(I)) { // Skip debug intrinsic. continue; - } else if (Constant *C = ConstantFold(I, DL, ConstantPool)) { + } else if (Constant *C = ConstantFold(&*I, DL, ConstantPool)) { // Instruction is side-effect free and constant. // If the instruction has uses outside this block or a phi node slot for @@ -3456,7 +3907,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, return false; } - ConstantPool.insert(std::make_pair(I, C)); + ConstantPool.insert(std::make_pair(&*I, C)); } else { break; } @@ -3664,7 +4115,7 @@ namespace { /// Return true if a table with TableSize elements of /// type ElementType would fit in a target-legal register. static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, - const Type *ElementType); + Type *ElementType); private: // Depending on the contents of the table, it can be represented in @@ -3880,8 +4331,8 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, - const Type *ElementType) { - const IntegerType *IT = dyn_cast(ElementType); + Type *ElementType) { + auto *IT = dyn_cast(ElementType); if (!IT) return false; // FIXME: If the type is wider than it needs to be, e.g. i8 but all values @@ -3992,7 +4443,7 @@ static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, assert((CaseConst == TrueConst || CaseConst == FalseConst) && "Expect true or false as compare result."); } - + // Check if the branch instruction dominates the phi node. It's a simple // dominance check, but sufficient for our needs. // Although this check is invariant in the calling loops, it's better to do it @@ -4422,7 +4873,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ return true; // If the Terminator is the only non-phi instruction, simplify the block. - BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(); + BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; @@ -4457,6 +4908,16 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ return false; } +static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) { + BasicBlock *PredPred = nullptr; + for (auto *P : predecessors(BB)) { + BasicBlock *PPred = P->getSinglePredecessor(); + if (!PPred || (PredPred && PredPred != PPred)) + return nullptr; + PredPred = PPred; + } + return PredPred; +} bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); @@ -4537,9 +4998,17 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (SimplifyCondBranchToCondBranch(PBI, BI)) + if (SimplifyCondBranchToCondBranch(PBI, BI, DL)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + // Look for diamond patterns. + if (MergeCondStores) + if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) + if (BranchInst *PBI = dyn_cast(PrevBB->getTerminator())) + if (PBI != BI && PBI->isConditional()) + if (mergeConditionalStores(PBI, BI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return false; } @@ -4663,6 +5132,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { if (SimplifyReturn(RI, Builder)) return true; } else if (ResumeInst *RI = dyn_cast(BB->getTerminator())) { if (SimplifyResume(RI, Builder)) return true; + } else if (CleanupReturnInst *RI = + dyn_cast(BB->getTerminator())) { + if (SimplifyCleanupReturn(RI)) return true; } else if (SwitchInst *SI = dyn_cast(BB->getTerminator())) { if (SimplifySwitch(SI, Builder)) return true; } else if (UnreachableInst *UI = diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index ab30aa17c76b..ddd8775a8431 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -47,15 +47,16 @@ namespace { Loop *L; LoopInfo *LI; ScalarEvolution *SE; + DominatorTree *DT; SmallVectorImpl &DeadInsts; bool Changed; public: - SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LoopInfo *LI, - SmallVectorImpl &Dead) - : L(Loop), LI(LI), SE(SE), DeadInsts(Dead), Changed(false) { + SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, + LoopInfo *LI,SmallVectorImpl &Dead) + : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) { assert(LI && "IV simplification requires LoopInfo"); } @@ -63,11 +64,13 @@ namespace { /// Iteratively perform simplification on a worklist of users of the /// specified induction variable. This is the top-level driver that applies - /// all simplicitions to users of an IV. + /// all simplifications to users of an IV. void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr); Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); + bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); + bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, @@ -166,19 +169,65 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { S = SE->getSCEVAtScope(S, ICmpLoop); X = SE->getSCEVAtScope(X, ICmpLoop); + ICmpInst::Predicate InvariantPredicate; + const SCEV *InvariantLHS, *InvariantRHS; + // If the condition is always true or always false, replace it with // a constant value. - if (SE->isKnownPredicate(Pred, S, X)) + if (SE->isKnownPredicate(Pred, S, X)) { ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); - else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) + DeadInsts.emplace_back(ICmp); + DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + } else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) { ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); - else + DeadInsts.emplace_back(ICmp); + DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + } else if (isa(IVOperand) && + SE->isLoopInvariantPredicate(Pred, S, X, ICmpLoop, + InvariantPredicate, InvariantLHS, + InvariantRHS)) { + + // Rewrite the comparison to a loop invariant comparison if it can be done + // cheaply, where cheaply means "we don't need to emit any new + // instructions". + + Value *NewLHS = nullptr, *NewRHS = nullptr; + + if (S == InvariantLHS || X == InvariantLHS) + NewLHS = + ICmp->getOperand(S == InvariantLHS ? IVOperIdx : (1 - IVOperIdx)); + + if (S == InvariantRHS || X == InvariantRHS) + NewRHS = + ICmp->getOperand(S == InvariantRHS ? IVOperIdx : (1 - IVOperIdx)); + + for (Value *Incoming : cast(IVOperand)->incoming_values()) { + if (NewLHS && NewRHS) + break; + + const SCEV *IncomingS = SE->getSCEV(Incoming); + + if (!NewLHS && IncomingS == InvariantLHS) + NewLHS = Incoming; + if (!NewRHS && IncomingS == InvariantRHS) + NewRHS = Incoming; + } + + if (!NewLHS || !NewRHS) + // We could not find an existing value to replace either LHS or RHS. + // Generating new instructions has subtler tradeoffs, so avoid doing that + // for now. + return; + + DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n'); + ICmp->setPredicate(InvariantPredicate); + ICmp->setOperand(0, NewLHS); + ICmp->setOperand(1, NewRHS); + } else return; - DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); ++NumElimCmp; Changed = true; - DeadInsts.emplace_back(ICmp); } /// SimplifyIVUsers helper for eliminating useless @@ -207,8 +256,7 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, Rem->replaceAllUsesWith(Rem->getOperand(0)); else { // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). - const SCEV *LessOne = - SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); + const SCEV *LessOne = SE->getMinusSCEV(S, SE->getOne(S->getType())); if (IsSigned && !SE->isKnownNonNegative(LessOne)) return; @@ -232,9 +280,9 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, DeadInsts.emplace_back(Rem); } -/// Eliminate an operation that consumes a simple IV and has -/// no observable side-effect given the range of IV values. -/// IVOperand is guaranteed SCEVable, but UseInst may not be. +/// Eliminate an operation that consumes a simple IV and has no observable +/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable, +/// but UseInst may not be. bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, Instruction *IVOperand) { if (ICmpInst *ICmp = dyn_cast(UseInst)) { @@ -249,12 +297,45 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, } } - // Eliminate any operation that SCEV can prove is an identity function. + if (eliminateIdentitySCEV(UseInst, IVOperand)) + return true; + + return false; +} + +/// Eliminate any operation that SCEV can prove is an identity function. +bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst, + Instruction *IVOperand) { if (!SE->isSCEVable(UseInst->getType()) || (UseInst->getType() != IVOperand->getType()) || (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand))) return false; + // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the + // dominator tree, even if X is an operand to Y. For instance, in + // + // %iv = phi i32 {0,+,1} + // br %cond, label %left, label %merge + // + // left: + // %X = add i32 %iv, 0 + // br label %merge + // + // merge: + // %M = phi (%X, %iv) + // + // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and + // %M.replaceAllUsesWith(%X) would be incorrect. + + if (isa(UseInst)) + // If UseInst is not a PHI node then we know that IVOperand dominates + // UseInst directly from the legality of SSA. + if (!DT || !DT->dominates(IVOperand, UseInst)) + return false; + + if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand)) + return false; + DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); UseInst->replaceAllUsesWith(IVOperand); @@ -436,8 +517,8 @@ static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { /// This algorithm does not require IVUsers analysis. Instead, it simplifies /// instructions in-place during analysis. Rather than rewriting induction /// variables bottom-up from their users, it transforms a chain of IVUsers -/// top-down, updating the IR only when it encouters a clear optimization -/// opportunitiy. +/// top-down, updating the IR only when it encounters a clear optimization +/// opportunity. /// /// Once DisableIVRewrite is default, LSR will be the only client of IVUsers. /// @@ -513,22 +594,21 @@ void IVVisitor::anchor() { } /// Simplify instructions that use this induction variable /// by using ScalarEvolution to analyze the IV's recurrence. -bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM, - SmallVectorImpl &Dead, IVVisitor *V) -{ - LoopInfo *LI = &LPM->getAnalysis().getLoopInfo(); - SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LI, Dead); +bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, + LoopInfo *LI, SmallVectorImpl &Dead, + IVVisitor *V) { + SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead); SIV.simplifyUsers(CurrIV, V); return SIV.hasChanged(); } /// Simplify users of induction variables within this /// loop. This does not actually change or add IVs. -bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM, - SmallVectorImpl &Dead) { +bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, + LoopInfo *LI, SmallVectorImpl &Dead) { bool Changed = false; for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) { - Changed |= simplifyUsersOfIV(cast(I), SE, LPM, Dead); + Changed |= simplifyUsersOfIV(cast(I), SE, DT, LI, Dead); } return Changed; } diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index c499c87b1f0b..d5377f9a4c1f 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -20,12 +20,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -64,7 +64,7 @@ namespace { // Here be subtlety: the iterator must be incremented before the loop // body (not sure why), so a range-for loop won't work here. for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - Instruction *I = BI++; + Instruction *I = &*BI++; // The first time through the loop ToSimplify is empty and we try to // simplify all instructions. On later iterations ToSimplify is not // empty and we only bother simplifying instructions that are in it. diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 6bbf8287e223..81dea6d1b9ae 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" @@ -30,8 +31,8 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace PatternMatch; @@ -52,16 +53,8 @@ static cl::opt //===----------------------------------------------------------------------===// static bool ignoreCallingConv(LibFunc::Func Func) { - switch (Func) { - case LibFunc::abs: - case LibFunc::labs: - case LibFunc::llabs: - case LibFunc::strlen: - return true; - default: - return false; - } - llvm_unreachable("All cases should be covered in the switch."); + return Func == LibFunc::abs || Func == LibFunc::labs || + Func == LibFunc::llabs || Func == LibFunc::strlen; } /// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the @@ -93,16 +86,13 @@ static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { } static bool callHasFloatingPointArgument(const CallInst *CI) { - for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end(); - it != e; ++it) { - if ((*it)->getType()->isFloatingPointTy()) - return true; - } - return false; + return std::any_of(CI->op_begin(), CI->op_end(), [](const Use &OI) { + return OI->getType()->isFloatingPointTy(); + }); } /// \brief Check whether the overloaded unary floating point function -/// corresponing to \a Ty is available. +/// corresponding to \a Ty is available. static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, LibFunc::Func DoubleFn, LibFunc::Func FloatFn, LibFunc::Func LongDoubleFn) { @@ -116,6 +106,23 @@ static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, } } +/// \brief Check whether we can use unsafe floating point math for +/// the function passed as input. +static bool canUseUnsafeFPMath(Function *F) { + + // FIXME: For finer-grain optimization, we need intrinsics to have the same + // fast-math flag decorations that are applied to FP instructions. For now, + // we have to rely on the function-level unsafe-fp-math attribute to do this + // optimization because there's no other way to express that the call can be + // relaxed. + if (F->hasFnAttribute("unsafe-fp-math")) { + Attribute Attr = F->getFnAttribute("unsafe-fp-math"); + if (Attr.getValueAsString() == "true") + return true; + } + return false; +} + /// \brief Returns whether \p F matches the signature expected for the /// string/memory copying library function \p Func. /// Acceptable functions are st[rp][n]?cpy, memove, memcpy, and memset. @@ -467,9 +474,6 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // Verify the "stpcpy" function prototype. - FunctionType *FT = Callee->getFunctionType(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy)) return nullptr; @@ -484,7 +488,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { if (Len == 0) return nullptr; - Type *PT = FT->getParamType(0); + Type *PT = Callee->getFunctionType()->getParamType(0); Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len); Value *DstEnd = B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); @@ -497,8 +501,6 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy)) return nullptr; @@ -531,7 +533,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { if (Len > SrcLen + 1) return nullptr; - Type *PT = FT->getParamType(0); + Type *PT = Callee->getFunctionType()->getParamType(0); // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1); @@ -862,6 +864,27 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { return B.CreateSub(LHSV, RHSV, "chardiff"); } + // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0 + if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) { + + IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8); + unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType); + + if (getKnownAlignment(LHS, DL, CI) >= PrefAlignment && + getKnownAlignment(RHS, DL, CI) >= PrefAlignment) { + + Type *LHSPtrTy = + IntType->getPointerTo(LHS->getType()->getPointerAddressSpace()); + Type *RHSPtrTy = + IntType->getPointerTo(RHS->getType()->getPointerAddressSpace()); + + Value *LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv"); + Value *RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv"); + + return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp"); + } + } + // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) StringRef LHSStr, RHSStr; if (getConstantStringInfo(LHS, LHSStr) && @@ -972,7 +995,7 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, // floor((double)floatval) -> (double)floorf(floatval) if (Callee->isIntrinsic()) { - Module *M = CI->getParent()->getParent()->getParent(); + Module *M = CI->getModule(); Intrinsic::ID IID = Callee->getIntrinsicID(); Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy()); V = B.CreateCall(F, V); @@ -1015,9 +1038,9 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Value *Ret = nullptr; - if (UnsafeFPShrink && Callee->getName() == "cos" && TLI->has(LibFunc::cosf)) { + StringRef Name = Callee->getName(); + if (UnsafeFPShrink && Name == "cos" && hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, true); - } FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the @@ -1035,13 +1058,37 @@ Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) { return Ret; } +static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) { + // Multiplications calculated using Addition Chains. + // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html + + assert(Exp != 0 && "Incorrect exponent 0 not handled"); + + if (InnerChain[Exp]) + return InnerChain[Exp]; + + static const unsigned AddChain[33][2] = { + {0, 0}, // Unused. + {0, 0}, // Unused (base case = pow1). + {1, 1}, // Unused (pre-computed). + {1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4}, + {1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7}, + {3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10}, + {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13}, + {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16}, + }; + + InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B), + getPow(InnerChain, AddChain[Exp][1], B)); + return InnerChain[Exp]; +} + Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - Value *Ret = nullptr; - if (UnsafeFPShrink && Callee->getName() == "pow" && TLI->has(LibFunc::powf)) { + StringRef Name = Callee->getName(); + if (UnsafeFPShrink && Name == "pow" && hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, true); - } FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the @@ -1060,7 +1107,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { if (Op1C->isExactlyValue(2.0) && hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f, LibFunc::exp2l)) - return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes()); + return EmitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp2), B, + Callee->getAttributes()); // pow(10.0, x) -> exp10(x) if (Op1C->isExactlyValue(10.0) && hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f, @@ -1069,6 +1117,32 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { Callee->getAttributes()); } + bool unsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent()); + + // pow(exp(x), y) -> exp(x*y) + // pow(exp2(x), y) -> exp2(x * y) + // We enable these only under fast-math. Besides rounding + // differences the transformation changes overflow and + // underflow behavior quite dramatically. + // Example: x = 1000, y = 0.001. + // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1). + if (unsafeFPMath) { + if (auto *OpC = dyn_cast(Op1)) { + IRBuilder<>::FastMathFlagGuard Guard(B); + FastMathFlags FMF; + FMF.setUnsafeAlgebra(); + B.SetFastMathFlags(FMF); + + LibFunc::Func Func; + Function *OpCCallee = OpC->getCalledFunction(); + if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) && + TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2)) + return EmitUnaryFloatFnCall( + B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"), + OpCCallee->getName(), B, OpCCallee->getAttributes()); + } + } + ConstantFP *Op2C = dyn_cast(Op2); if (!Op2C) return Ret; @@ -1081,10 +1155,15 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { LibFunc::sqrtl) && hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf, LibFunc::fabsl)) { + + // In -ffast-math, pow(x, 0.5) -> sqrt(x). + if (unsafeFPMath) + return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B, + Callee->getAttributes()); + // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). // This is faster than calling pow, and still handles negative zero // and negative infinity correctly. - // TODO: In fast-math mode, this could be just sqrt(x). // TODO: In finite-only mode, this could be just fabs(sqrt(x)). Value *Inf = ConstantFP::getInfinity(CI->getType()); Value *NegInf = ConstantFP::getInfinity(CI->getType(), true); @@ -1102,18 +1181,42 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { return B.CreateFMul(Op1, Op1, "pow2"); if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); + + // In -ffast-math, generate repeated fmul instead of generating pow(x, n). + if (unsafeFPMath) { + APFloat V = abs(Op2C->getValueAPF()); + // We limit to a max of 7 fmul(s). Thus max exponent is 32. + // This transformation applies to integer exponents only. + if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan || + !V.isInteger()) + return nullptr; + + // We will memoize intermediate products of the Addition Chain. + Value *InnerChain[33] = {nullptr}; + InnerChain[1] = Op1; + InnerChain[2] = B.CreateFMul(Op1, Op1); + + // We cannot readily convert a non-double type (like float) to a double. + // So we first convert V to something which could be converted to double. + bool ignored; + V.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored); + Value *FMul = getPow(InnerChain, V.convertToDouble(), B); + // For negative exponents simply compute the reciprocal. + if (Op2C->isNegative()) + FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul); + return FMul; + } + return nullptr; } Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Function *Caller = CI->getParent()->getParent(); - Value *Ret = nullptr; - if (UnsafeFPShrink && Callee->getName() == "exp2" && - TLI->has(LibFunc::exp2f)) { + StringRef Name = Callee->getName(); + if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, true); - } FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the @@ -1162,11 +1265,10 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - Value *Ret = nullptr; - if (Callee->getName() == "fabs" && TLI->has(LibFunc::fabsf)) { + StringRef Name = Callee->getName(); + if (Name == "fabs" && hasFloatVersion(Name)) Ret = optimizeUnaryDoubleFP(CI, B, false); - } FunctionType *FT = Callee->getFunctionType(); // Make sure this has 1 argument of FP type which matches the result type. @@ -1184,6 +1286,105 @@ Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { return Ret; } +Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { + // If we can shrink the call to a float function rather than a double + // function, do that first. + Function *Callee = CI->getCalledFunction(); + StringRef Name = Callee->getName(); + if ((Name == "fmin" && hasFloatVersion(Name)) || + (Name == "fmax" && hasFloatVersion(Name))) { + Value *Ret = optimizeBinaryDoubleFP(CI, B); + if (Ret) + return Ret; + } + + // Make sure this has 2 arguments of FP type which match the result type. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + !FT->getParamType(0)->isFloatingPointTy()) + return nullptr; + + IRBuilder<>::FastMathFlagGuard Guard(B); + FastMathFlags FMF; + Function *F = CI->getParent()->getParent(); + if (canUseUnsafeFPMath(F)) { + // Unsafe algebra sets all fast-math-flags to true. + FMF.setUnsafeAlgebra(); + } else { + // At a minimum, no-nans-fp-math must be true. + Attribute Attr = F->getFnAttribute("no-nans-fp-math"); + if (Attr.getValueAsString() != "true") + return nullptr; + // No-signed-zeros is implied by the definitions of fmax/fmin themselves: + // "Ideally, fmax would be sensitive to the sign of zero, for example + // fmax(-0. 0, +0. 0) would return +0; however, implementation in software + // might be impractical." + FMF.setNoSignedZeros(); + FMF.setNoNaNs(); + } + B.SetFastMathFlags(FMF); + + // We have a relaxed floating-point environment. We can ignore NaN-handling + // and transform to a compare and select. We do not have to consider errno or + // exceptions, because fmin/fmax do not have those. + Value *Op0 = CI->getArgOperand(0); + Value *Op1 = CI->getArgOperand(1); + Value *Cmp = Callee->getName().startswith("fmin") ? + B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1); + return B.CreateSelect(Cmp, Op0, Op1); +} + +Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + Value *Ret = nullptr; + StringRef Name = Callee->getName(); + if (UnsafeFPShrink && hasFloatVersion(Name)) + Ret = optimizeUnaryDoubleFP(CI, B, true); + FunctionType *FT = Callee->getFunctionType(); + + // Just make sure this has 1 argument of FP type, which matches the + // result type. + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + + if (!canUseUnsafeFPMath(CI->getParent()->getParent())) + return Ret; + Value *Op1 = CI->getArgOperand(0); + auto *OpC = dyn_cast(Op1); + if (!OpC) + return Ret; + + // log(pow(x,y)) -> y*log(x) + // This is only applicable to log, log2, log10. + if (Name != "log" && Name != "log2" && Name != "log10") + return Ret; + + IRBuilder<>::FastMathFlagGuard Guard(B); + FastMathFlags FMF; + FMF.setUnsafeAlgebra(); + B.SetFastMathFlags(FMF); + + LibFunc::Func Func; + Function *F = OpC->getCalledFunction(); + if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && + Func == LibFunc::pow) || F->getIntrinsicID() == Intrinsic::pow)) + return B.CreateFMul(OpC->getArgOperand(1), + EmitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B, + Callee->getAttributes()), "mul"); + + // log(exp2(y)) -> y*log(2) + if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) && + TLI->has(Func) && Func == LibFunc::exp2) + return B.CreateFMul( + OpC->getArgOperand(0), + EmitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0), + Callee->getName(), B, Callee->getAttributes()), + "logmul"); + return Ret; +} + Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); @@ -1191,19 +1392,9 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" || Callee->getIntrinsicID() == Intrinsic::sqrt)) Ret = optimizeUnaryDoubleFP(CI, B, true); + if (!canUseUnsafeFPMath(CI->getParent()->getParent())) + return Ret; - // FIXME: For finer-grain optimization, we need intrinsics to have the same - // fast-math flag decorations that are applied to FP instructions. For now, - // we have to rely on the function-level unsafe-fp-math attribute to do this - // optimization because there's no other way to express that the sqrt can be - // reassociated. - Function *F = CI->getParent()->getParent(); - if (F->hasFnAttribute("unsafe-fp-math")) { - // Check for unsafe-fp-math = true. - Attribute Attr = F->getFnAttribute("unsafe-fp-math"); - if (Attr.getValueAsString() != "true") - return Ret; - } Value *Op = CI->getArgOperand(0); if (Instruction *I = dyn_cast(Op)) { if (I->getOpcode() == Instruction::FMul && I->hasUnsafeAlgebra()) { @@ -1238,8 +1429,7 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { // and multiply. // FIXME: We're not checking the sqrt because it doesn't have // fast-math-flags (see earlier comment). - IRBuilder >::FastMathFlagGuard Guard(B); + IRBuilder<>::FastMathFlagGuard Guard(B); B.SetFastMathFlags(I->getFastMathFlags()); // If we found a repeated factor, hoist it out of the square root and // replace it with the fabs of that factor. @@ -1262,6 +1452,40 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { return Ret; } +Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + Value *Ret = nullptr; + StringRef Name = Callee->getName(); + if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(Name)) + Ret = optimizeUnaryDoubleFP(CI, B, true); + FunctionType *FT = Callee->getFunctionType(); + + // Just make sure this has 1 argument of FP type, which matches the + // result type. + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + + if (!canUseUnsafeFPMath(CI->getParent()->getParent())) + return Ret; + Value *Op1 = CI->getArgOperand(0); + auto *OpC = dyn_cast(Op1); + if (!OpC) + return Ret; + + // tan(atan(x)) -> x + // tanf(atanf(x)) -> x + // tanl(atanl(x)) -> x + LibFunc::Func Func; + Function *F = OpC->getCalledFunction(); + if (F && TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) && + ((Func == LibFunc::atan && Callee->getName() == "tan") || + (Func == LibFunc::atanf && Callee->getName() == "tanf") || + (Func == LibFunc::atanl && Callee->getName() == "tanl"))) + Ret = OpC->getArgOperand(0); + return Ret; +} + static bool isTrigLibCall(CallInst *CI); static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, bool UseFloat, Value *&Sin, Value *&Cos, @@ -1329,9 +1553,9 @@ LibCallSimplifier::classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat, return; Function *Callee = CI->getCalledFunction(); - StringRef FuncName = Callee->getName(); LibFunc::Func Func; - if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) || !isTrigLibCall(CI)) + if (!Callee || !TLI->getLibFunc(Callee->getName(), Func) || !TLI->has(Func) || + !isTrigLibCall(CI)) return; if (IsFloat) { @@ -1353,10 +1577,8 @@ LibCallSimplifier::classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat, void LibCallSimplifier::replaceTrigInsts(SmallVectorImpl &Calls, Value *Res) { - for (SmallVectorImpl::iterator I = Calls.begin(), E = Calls.end(); - I != E; ++I) { - replaceAllUsesWith(*I, Res); - } + for (CallInst *C : Calls) + replaceAllUsesWith(C, Res); } void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, @@ -1387,8 +1609,7 @@ void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, if (Instruction *ArgInst = dyn_cast(Arg)) { // If the argument is an instruction, it must dominate all uses so put our // sincos call there. - BasicBlock::iterator Loc = ArgInst; - B.SetInsertPoint(ArgInst->getParent(), ++Loc); + B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator()); } else { // Otherwise (e.g. for a constant) the beginning of the function is as // good a place as any. @@ -1413,15 +1634,16 @@ void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, // Integer Library Call Optimizations //===----------------------------------------------------------------------===// +static bool checkIntUnaryReturnAndParam(Function *Callee) { + FunctionType *FT = Callee->getFunctionType(); + return FT->getNumParams() == 1 && FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0)->isIntegerTy(); +} + Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - // Just make sure this has 2 arguments of the same FP type, which match the - // result type. - if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy(32) || - !FT->getParamType(0)->isIntegerTy()) + if (!checkIntUnaryReturnAndParam(Callee)) return nullptr; - Value *Op = CI->getArgOperand(0); // Constant fold. @@ -1436,7 +1658,7 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) { Type *ArgType = Op->getType(); Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType); - Value *V = B.CreateCall(F, {Op, B.getFalse()}, "cttz"); + Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz"); V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); V = B.CreateIntCast(V, B.getInt32Ty(), false); @@ -1461,11 +1683,7 @@ Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) { } Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - // We require integer(i32) - if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || - !FT->getParamType(0)->isIntegerTy(32)) + if (!checkIntUnaryReturnAndParam(CI->getCalledFunction())) return nullptr; // isdigit(c) -> (c-'0') &B) { } Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - // We require integer(i32) - if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || - !FT->getParamType(0)->isIntegerTy(32)) + if (!checkIntUnaryReturnAndParam(CI->getCalledFunction())) return nullptr; // isascii(c) -> c &B) { } Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - // We require i32(i32) - if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isIntegerTy(32)) + if (!checkIntUnaryReturnAndParam(CI->getCalledFunction())) return nullptr; // toascii(c) -> c & 0x7f @@ -1529,10 +1739,7 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B, } static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) { - if (!ColdErrorCalls) - return false; - - if (!Callee || !Callee->isDeclaration()) + if (!ColdErrorCalls || !Callee || !Callee->isDeclaration()) return false; if (StreamArg < 0) @@ -1968,16 +2175,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { // Command-line parameter overrides function attribute. if (EnableUnsafeFPShrink.getNumOccurrences() > 0) UnsafeFPShrink = EnableUnsafeFPShrink; - else if (Callee->hasFnAttribute("unsafe-fp-math")) { - // FIXME: This is the same problem as described in optimizeSqrt(). - // If calls gain access to IR-level FMF, then use that instead of a - // function attribute. - - // Check for unsafe-fp-math = true. - Attribute Attr = Callee->getFnAttribute("unsafe-fp-math"); - if (Attr.getValueAsString() == "true") - UnsafeFPShrink = true; - } + else if (canUseUnsafeFPMath(Callee)) + UnsafeFPShrink = true; // First, check for intrinsics. if (IntrinsicInst *II = dyn_cast(CI)) { @@ -1990,6 +2189,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return optimizeExp2(CI, Builder); case Intrinsic::fabs: return optimizeFabs(CI, Builder); + case Intrinsic::log: + return optimizeLog(CI, Builder); case Intrinsic::sqrt: return optimizeSqrt(CI, Builder); default: @@ -2001,13 +2202,17 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { if (Value *SimplifiedFortifiedCI = FortifiedSimplifier.optimizeCall(CI)) { // Try to further simplify the result. CallInst *SimplifiedCI = dyn_cast(SimplifiedFortifiedCI); - if (SimplifiedCI && SimplifiedCI->getCalledFunction()) - if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) { + if (SimplifiedCI && SimplifiedCI->getCalledFunction()) { + // Use an IR Builder from SimplifiedCI if available instead of CI + // to guarantee we reach all uses we might replace later on. + IRBuilder<> TmpBuilder(SimplifiedCI); + if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) { // If we were able to further simplify, remove the now redundant call. SimplifiedCI->replaceAllUsesWith(V); SimplifiedCI->eraseFromParent(); return V; } + } return SimplifiedFortifiedCI; } @@ -2068,8 +2273,18 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return optimizeFWrite(CI, Builder); case LibFunc::fputs: return optimizeFPuts(CI, Builder); + case LibFunc::log: + case LibFunc::log10: + case LibFunc::log1p: + case LibFunc::log2: + case LibFunc::logb: + return optimizeLog(CI, Builder); case LibFunc::puts: return optimizePuts(CI, Builder); + case LibFunc::tan: + case LibFunc::tanf: + case LibFunc::tanl: + return optimizeTan(CI, Builder); case LibFunc::perror: return optimizeErrorReporting(CI, Builder); case LibFunc::vfprintf: @@ -2097,24 +2312,23 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { case LibFunc::exp: case LibFunc::exp10: case LibFunc::expm1: - case LibFunc::log: - case LibFunc::log10: - case LibFunc::log1p: - case LibFunc::log2: - case LibFunc::logb: case LibFunc::sin: case LibFunc::sinh: - case LibFunc::tan: case LibFunc::tanh: if (UnsafeFPShrink && hasFloatVersion(FuncName)) return optimizeUnaryDoubleFP(CI, Builder, true); return nullptr; case LibFunc::copysign: - case LibFunc::fmin: - case LibFunc::fmax: if (hasFloatVersion(FuncName)) return optimizeBinaryDoubleFP(CI, Builder); return nullptr; + case LibFunc::fminf: + case LibFunc::fmin: + case LibFunc::fminl: + case LibFunc::fmaxf: + case LibFunc::fmax: + case LibFunc::fmaxl: + return optimizeFMinFMax(CI, Builder); default: return nullptr; } @@ -2133,37 +2347,27 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { Replacer(I, With); } -/*static*/ void LibCallSimplifier::replaceAllUsesWithDefault(Instruction *I, - Value *With) { - I->replaceAllUsesWith(With); - I->eraseFromParent(); -} - // TODO: // Additional cases that we need to add to this file: // // cbrt: // * cbrt(expN(X)) -> expN(x/3) // * cbrt(sqrt(x)) -> pow(x,1/6) -// * cbrt(sqrt(x)) -> pow(x,1/9) +// * cbrt(cbrt(x)) -> pow(x,1/9) // // exp, expf, expl: // * exp(log(x)) -> x // // log, logf, logl: // * log(exp(x)) -> x -// * log(x**y) -> y*log(x) // * log(exp(y)) -> y*log(e) -// * log(exp2(y)) -> y*log(2) // * log(exp10(y)) -> y*log(10) // * log(sqrt(x)) -> 0.5*log(x) -// * log(pow(x,y)) -> y*log(x) // // lround, lroundf, lroundl: // * lround(cnst) -> cnst' // // pow, powf, powl: -// * pow(exp(x),y) -> exp(x*y) // * pow(sqrt(x),y) -> pow(x,y*0.5) // * pow(pow(x,y),z)-> pow(x,y*z) // @@ -2179,9 +2383,6 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { // * sqrt(Nroot(x)) -> pow(x,1/(2*N)) // * sqrt(pow(x,y)) -> pow(|x|,y*0.5) // -// tan, tanf, tanl: -// * tan(atan(x)) -> x -// // trunc, truncf, truncl: // * trunc(cnst) -> cnst' // diff --git a/lib/Transforms/Utils/SplitModule.cpp b/lib/Transforms/Utils/SplitModule.cpp new file mode 100644 index 000000000000..ad6b782caf8b --- /dev/null +++ b/lib/Transforms/Utils/SplitModule.cpp @@ -0,0 +1,85 @@ +//===- SplitModule.cpp - Split a module into partitions -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the function llvm::SplitModule, which splits a module +// into multiple linkable partitions. It can be used to implement parallel code +// generation for link-time optimization. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SplitModule.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Cloning.h" + +using namespace llvm; + +static void externalize(GlobalValue *GV) { + if (GV->hasLocalLinkage()) { + GV->setLinkage(GlobalValue::ExternalLinkage); + GV->setVisibility(GlobalValue::HiddenVisibility); + } + + // Unnamed entities must be named consistently between modules. setName will + // give a distinct name to each such entity. + if (!GV->hasName()) + GV->setName("__llvmsplit_unnamed"); +} + +// Returns whether GV should be in partition (0-based) I of N. +static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) { + if (auto GA = dyn_cast(GV)) + if (const GlobalObject *Base = GA->getBaseObject()) + GV = Base; + + StringRef Name; + if (const Comdat *C = GV->getComdat()) + Name = C->getName(); + else + Name = GV->getName(); + + // Partition by MD5 hash. We only need a few bits for evenness as the number + // of partitions will generally be in the 1-2 figure range; the low 16 bits + // are enough. + MD5 H; + MD5::MD5Result R; + H.update(Name); + H.final(R); + return (R[0] | (R[1] << 8)) % N == I; +} + +void llvm::SplitModule( + std::unique_ptr M, unsigned N, + std::function MPart)> ModuleCallback) { + for (Function &F : *M) + externalize(&F); + for (GlobalVariable &GV : M->globals()) + externalize(&GV); + for (GlobalAlias &GA : M->aliases()) + externalize(&GA); + + // FIXME: We should be able to reuse M as the last partition instead of + // cloning it. + for (unsigned I = 0; I != N; ++I) { + ValueToValueMapTy VMap; + std::unique_ptr MPart( + CloneModule(M.get(), VMap, [=](const GlobalValue *GV) { + return isInPartition(GV, I, N); + })); + if (I != 0) + MPart->setModuleInlineAsm(""); + ModuleCallback(std::move(MPart)); + } +} diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index a2a54da8590c..1d1f602b041d 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -69,7 +69,6 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/YAMLParser.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" using namespace llvm; diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 7e00a80989dc..6b1d1dae5f01 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -50,11 +50,11 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // std::vector ReturningBlocks; std::vector UnreachableBlocks; - for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I) - if (isa(I->getTerminator())) - ReturningBlocks.push_back(I); - else if (isa(I->getTerminator())) - UnreachableBlocks.push_back(I); + for (BasicBlock &I : F) + if (isa(I.getTerminator())) + ReturningBlocks.push_back(&I); + else if (isa(I.getTerminator())) + UnreachableBlocks.push_back(&I); // Then unreachable blocks. if (UnreachableBlocks.empty()) { diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 8c72641da9e7..1add78e01657 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -19,11 +19,14 @@ #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Operator.h" using namespace llvm; // Out of line method to get vtable etc for class. void ValueMapTypeRemapper::anchor() {} void ValueMaterializer::anchor() {} +void ValueMaterializer::materializeInitFor(GlobalValue *New, GlobalValue *Old) { +} Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, @@ -35,15 +38,28 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // If we have a materializer and it can materialize a value, use that. if (Materializer) { - if (Value *NewV = Materializer->materializeValueFor(const_cast(V))) - return VM[V] = NewV; + if (Value *NewV = + Materializer->materializeDeclFor(const_cast(V))) { + VM[V] = NewV; + if (auto *NewGV = dyn_cast(NewV)) + Materializer->materializeInitFor( + NewGV, const_cast(cast(V))); + return NewV; + } } // Global values do not need to be seeded into the VM if they // are using the identity mapping. - if (isa(V)) + if (isa(V)) { + if (Flags & RF_NullMapMissingGlobalValues) { + assert(!(Flags & RF_IgnoreMissingEntries) && + "Illegal to specify both RF_NullMapMissingGlobalValues and " + "RF_IgnoreMissingEntries"); + return nullptr; + } return VM[V] = const_cast(V); - + } + if (const InlineAsm *IA = dyn_cast(V)) { // Inline asm may need *type* remapping. FunctionType *NewTy = IA->getFunctionType(); @@ -73,7 +89,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // correct. For now, just match behaviour from before the metadata/value // split. // - // assert(MappedMD && "Referenced metadata value not in value map"); + // assert((MappedMD || (Flags & RF_NullMapMissingGlobalValues)) && + // "Referenced metadata value not in value map"); return VM[V] = MetadataAsValue::get(V->getContext(), MappedMD); } @@ -127,9 +144,13 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, Ops.push_back(MapValue(cast(C->getOperand(OpNo)), VM, Flags, TypeMapper, Materializer)); } - + Type *NewSrcTy = nullptr; + if (TypeMapper) + if (auto *GEPO = dyn_cast(C)) + NewSrcTy = TypeMapper->remapType(GEPO->getSourceElementType()); + if (ConstantExpr *CE = dyn_cast(C)) - return VM[V] = CE->getWithOperands(Ops, NewTy); + return VM[V] = CE->getWithOperands(Ops, NewTy, false, NewSrcTy); if (isa(C)) return VM[V] = ConstantArray::get(cast(NewTy), Ops); if (isa(C)) @@ -146,29 +167,42 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, } static Metadata *mapToMetadata(ValueToValueMapTy &VM, const Metadata *Key, - Metadata *Val) { + Metadata *Val, ValueMaterializer *Materializer, + RemapFlags Flags) { VM.MD()[Key].reset(Val); + if (Materializer && !(Flags & RF_HaveUnmaterializedMetadata)) { + auto *N = dyn_cast_or_null(Val); + // Need to invoke this once we have non-temporary MD. + if (!N || !N->isTemporary()) + Materializer->replaceTemporaryMetadata(Key, Val); + } return Val; } -static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD) { - return mapToMetadata(VM, MD, const_cast(MD)); +static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD, + ValueMaterializer *Materializer, RemapFlags Flags) { + return mapToMetadata(VM, MD, const_cast(MD), Materializer, Flags); } static Metadata *MapMetadataImpl(const Metadata *MD, - SmallVectorImpl &Cycles, + SmallVectorImpl &DistinctWorklist, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer); -static Metadata *mapMetadataOp(Metadata *Op, SmallVectorImpl &Cycles, +static Metadata *mapMetadataOp(Metadata *Op, + SmallVectorImpl &DistinctWorklist, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { if (!Op) return nullptr; - if (Metadata *MappedOp = - MapMetadataImpl(Op, Cycles, VM, Flags, TypeMapper, Materializer)) + + if (Materializer && !Materializer->isMetadataNeeded(Op)) + return nullptr; + + if (Metadata *MappedOp = MapMetadataImpl(Op, DistinctWorklist, VM, Flags, + TypeMapper, Materializer)) return MappedOp; // Use identity map if MappedOp is null and we can ignore missing entries. if (Flags & RF_IgnoreMissingEntries) @@ -178,89 +212,113 @@ static Metadata *mapMetadataOp(Metadata *Op, SmallVectorImpl &Cycles, // correct. For now, just match behaviour from before the metadata/value // split. // - // llvm_unreachable("Referenced metadata not in value map!"); + // assert((Flags & RF_NullMapMissingGlobalValues) && + // "Referenced metadata not in value map!"); return nullptr; } -/// \brief Remap nodes. -/// -/// Insert \c NewNode in the value map, and then remap \c OldNode's operands. -/// Assumes that \c NewNode is already a clone of \c OldNode. -/// -/// \pre \c NewNode is a clone of \c OldNode. -static bool remap(const MDNode *OldNode, MDNode *NewNode, - SmallVectorImpl &Cycles, ValueToValueMapTy &VM, - RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer) { - assert(OldNode->getNumOperands() == NewNode->getNumOperands() && - "Expected nodes to match"); - assert(OldNode->isResolved() && "Expected resolved node"); - assert(!NewNode->isUniqued() && "Expected non-uniqued node"); +/// Resolve uniquing cycles involving the given metadata. +static void resolveCycles(Metadata *MD, bool MDMaterialized) { + if (auto *N = dyn_cast_or_null(MD)) { + if (!MDMaterialized && N->isTemporary()) + return; + if (!N->isResolved()) + N->resolveCycles(MDMaterialized); + } +} + +/// Remap the operands of an MDNode. +/// +/// If \c Node is temporary, uniquing cycles are ignored. If \c Node is +/// distinct, uniquing cycles are resolved as they're found. +/// +/// \pre \c Node.isDistinct() or \c Node.isTemporary(). +static bool remapOperands(MDNode &Node, + SmallVectorImpl &DistinctWorklist, + ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + assert(!Node.isUniqued() && "Expected temporary or distinct node"); + const bool IsDistinct = Node.isDistinct(); - // Map the node upfront so it's available for cyclic references. - mapToMetadata(VM, OldNode, NewNode); bool AnyChanged = false; - for (unsigned I = 0, E = OldNode->getNumOperands(); I != E; ++I) { - Metadata *Old = OldNode->getOperand(I); - assert(NewNode->getOperand(I) == Old && - "Expected old operands to already be in place"); - - Metadata *New = mapMetadataOp(OldNode->getOperand(I), Cycles, VM, Flags, - TypeMapper, Materializer); + for (unsigned I = 0, E = Node.getNumOperands(); I != E; ++I) { + Metadata *Old = Node.getOperand(I); + Metadata *New = mapMetadataOp(Old, DistinctWorklist, VM, Flags, TypeMapper, + Materializer); if (Old != New) { AnyChanged = true; - NewNode->replaceOperandWith(I, New); + Node.replaceOperandWith(I, New); + + // Resolve uniquing cycles underneath distinct nodes on the fly so they + // don't infect later operands. + if (IsDistinct) + resolveCycles(New, !(Flags & RF_HaveUnmaterializedMetadata)); } } return AnyChanged; } -/// \brief Map a distinct MDNode. +/// Map a distinct MDNode. /// -/// Distinct nodes are not uniqued, so they must always recreated. +/// Whether distinct nodes change is independent of their operands. If \a +/// RF_MoveDistinctMDs, then they are reused, and their operands remapped in +/// place; effectively, they're moved from one graph to another. Otherwise, +/// they're cloned/duplicated, and the new copy's operands are remapped. static Metadata *mapDistinctNode(const MDNode *Node, - SmallVectorImpl &Cycles, + SmallVectorImpl &DistinctWorklist, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { assert(Node->isDistinct() && "Expected distinct node"); - MDNode *NewMD = MDNode::replaceWithDistinct(Node->clone()); - remap(Node, NewMD, Cycles, VM, Flags, TypeMapper, Materializer); + MDNode *NewMD; + if (Flags & RF_MoveDistinctMDs) + NewMD = const_cast(Node); + else + NewMD = MDNode::replaceWithDistinct(Node->clone()); - // Track any cycles beneath this node. - for (Metadata *Op : NewMD->operands()) - if (auto *Node = dyn_cast_or_null(Op)) - if (!Node->isResolved()) - Cycles.push_back(Node); - - return NewMD; + // Remap operands later. + DistinctWorklist.push_back(NewMD); + return mapToMetadata(VM, Node, NewMD, Materializer, Flags); } /// \brief Map a uniqued MDNode. /// /// Uniqued nodes may not need to be recreated (they may map to themselves). static Metadata *mapUniquedNode(const MDNode *Node, - SmallVectorImpl &Cycles, + SmallVectorImpl &DistinctWorklist, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { - assert(Node->isUniqued() && "Expected uniqued node"); + assert(((Flags & RF_HaveUnmaterializedMetadata) || Node->isUniqued()) && + "Expected uniqued node"); - // Create a temporary node upfront in case we have a metadata cycle. + // Create a temporary node and map it upfront in case we have a uniquing + // cycle. If necessary, this mapping will get updated by RAUW logic before + // returning. auto ClonedMD = Node->clone(); - if (!remap(Node, ClonedMD.get(), Cycles, VM, Flags, TypeMapper, Materializer)) - // No operands changed, so use the identity mapping. - return mapToSelf(VM, Node); + mapToMetadata(VM, Node, ClonedMD.get(), Materializer, Flags); + if (!remapOperands(*ClonedMD, DistinctWorklist, VM, Flags, TypeMapper, + Materializer)) { + // No operands changed, so use the original. + ClonedMD->replaceAllUsesWith(const_cast(Node)); + // Even though replaceAllUsesWith would have replaced the value map + // entry, we need to explictly map with the final non-temporary node + // to replace any temporary metadata via the callback. + return mapToSelf(VM, Node, Materializer, Flags); + } - // At least one operand has changed, so uniquify the cloned node. + // Uniquify the cloned node. Explicitly map it with the final non-temporary + // node so that replacement of temporary metadata via the callback occurs. return mapToMetadata(VM, Node, - MDNode::replaceWithUniqued(std::move(ClonedMD))); + MDNode::replaceWithUniqued(std::move(ClonedMD)), + Materializer, Flags); } static Metadata *MapMetadataImpl(const Metadata *MD, - SmallVectorImpl &Cycles, + SmallVectorImpl &DistinctWorklist, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { @@ -269,26 +327,28 @@ static Metadata *MapMetadataImpl(const Metadata *MD, return NewMD; if (isa(MD)) - return mapToSelf(VM, MD); + return mapToSelf(VM, MD, Materializer, Flags); if (isa(MD)) if ((Flags & RF_NoModuleLevelChanges)) - return mapToSelf(VM, MD); + return mapToSelf(VM, MD, Materializer, Flags); if (const auto *VMD = dyn_cast(MD)) { Value *MappedV = MapValue(VMD->getValue(), VM, Flags, TypeMapper, Materializer); if (VMD->getValue() == MappedV || (!MappedV && (Flags & RF_IgnoreMissingEntries))) - return mapToSelf(VM, MD); + return mapToSelf(VM, MD, Materializer, Flags); // FIXME: This assert crashes during bootstrap, but I think it should be // correct. For now, just match behaviour from before the metadata/value // split. // - // assert(MappedV && "Referenced metadata not in value map!"); + // assert((MappedV || (Flags & RF_NullMapMissingGlobalValues)) && + // "Referenced metadata not in value map!"); if (MappedV) - return mapToMetadata(VM, MD, ValueAsMetadata::get(MappedV)); + return mapToMetadata(VM, MD, ValueAsMetadata::get(MappedV), Materializer, + Flags); return nullptr; } @@ -299,37 +359,54 @@ static Metadata *MapMetadataImpl(const Metadata *MD, // If this is a module-level metadata and we know that nothing at the // module level is changing, then use an identity mapping. if (Flags & RF_NoModuleLevelChanges) - return mapToSelf(VM, MD); + return mapToSelf(VM, MD, Materializer, Flags); // Require resolved nodes whenever metadata might be remapped. - assert(Node->isResolved() && "Unexpected unresolved node"); + assert(((Flags & RF_HaveUnmaterializedMetadata) || Node->isResolved()) && + "Unexpected unresolved node"); + + if (Materializer && Node->isTemporary()) { + assert(Flags & RF_HaveUnmaterializedMetadata); + Metadata *TempMD = + Materializer->mapTemporaryMetadata(const_cast(MD)); + // If the above callback returned an existing temporary node, use it + // instead of the current temporary node. This happens when earlier + // function importing passes already created and saved a temporary + // metadata node for the same value id. + if (TempMD) { + mapToMetadata(VM, MD, TempMD, Materializer, Flags); + return TempMD; + } + } if (Node->isDistinct()) - return mapDistinctNode(Node, Cycles, VM, Flags, TypeMapper, Materializer); + return mapDistinctNode(Node, DistinctWorklist, VM, Flags, TypeMapper, + Materializer); - return mapUniquedNode(Node, Cycles, VM, Flags, TypeMapper, Materializer); + return mapUniquedNode(Node, DistinctWorklist, VM, Flags, TypeMapper, + Materializer); } Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { - SmallVector Cycles; - Metadata *NewMD = - MapMetadataImpl(MD, Cycles, VM, Flags, TypeMapper, Materializer); + SmallVector DistinctWorklist; + Metadata *NewMD = MapMetadataImpl(MD, DistinctWorklist, VM, Flags, TypeMapper, + Materializer); - // Resolve cycles underneath MD. - if (NewMD && NewMD != MD) { - if (auto *N = dyn_cast(NewMD)) - if (!N->isResolved()) - N->resolveCycles(); + // When there are no module-level changes, it's possible that the metadata + // graph has temporaries. Skip the logic to resolve cycles, since it's + // unnecessary (and invalid) in that case. + if (Flags & RF_NoModuleLevelChanges) + return NewMD; - for (MDNode *N : Cycles) - if (!N->isResolved()) - N->resolveCycles(); - } else { - // Shouldn't get unresolved cycles if nothing was remapped. - assert(Cycles.empty() && "Expected no unresolved cycles"); - } + // Resolve cycles involving the entry metadata. + resolveCycles(NewMD, !(Flags & RF_HaveUnmaterializedMetadata)); + + // Remap the operands of distinct MDNodes. + while (!DistinctWorklist.empty()) + remapOperands(*DistinctWorklist.pop_back_val(), DistinctWorklist, VM, Flags, + TypeMapper, Materializer); return NewMD; } @@ -374,14 +451,11 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, // Remap attached metadata. SmallVector, 4> MDs; I->getAllMetadata(MDs); - for (SmallVectorImpl>::iterator - MI = MDs.begin(), - ME = MDs.end(); - MI != ME; ++MI) { - MDNode *Old = MI->second; + for (const auto &MI : MDs) { + MDNode *Old = MI.second; MDNode *New = MapMetadata(Old, VMap, Flags, TypeMapper, Materializer); if (New != Old) - I->setMetadata(MI->first, New); + I->setMetadata(MI.first, New); } if (!TypeMapper) diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 215d6f9a1eb6..8844d574a79d 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -25,8 +25,11 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" @@ -204,9 +207,10 @@ namespace { BBVectorize(Pass *P, Function &F, const VectorizeConfig &C) : BasicBlockPass(ID), Config(C) { - AA = &P->getAnalysis(); + AA = &P->getAnalysis().getAAResults(); DT = &P->getAnalysis().getDomTree(); - SE = &P->getAnalysis(); + SE = &P->getAnalysis().getSE(); + TLI = &P->getAnalysis().getTLI(); TTI = IgnoreTargetInfo ? nullptr : &P->getAnalysis().getTTI(F); @@ -221,6 +225,7 @@ namespace { AliasAnalysis *AA; DominatorTree *DT; ScalarEvolution *SE; + const TargetLibraryInfo *TLI; const TargetTransformInfo *TTI; // FIXME: const correct? @@ -437,9 +442,10 @@ namespace { bool runOnBasicBlock(BasicBlock &BB) override { // OptimizeNone check deferred to vectorizeBB(). - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); DT = &getAnalysis().getDomTree(); - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); + TLI = &getAnalysis().getTLI(); TTI = IgnoreTargetInfo ? nullptr : &getAnalysis().getTTI( @@ -450,13 +456,15 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { BasicBlockPass::getAnalysisUsage(AU); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); AU.setPreservesCFG(); } @@ -842,7 +850,7 @@ namespace { // It is important to cleanup here so that future iterations of this // function have less work to do. - (void)SimplifyInstructionsInBlock(&BB, AA->getTargetLibraryInfo()); + (void)SimplifyInstructionsInBlock(&BB, TLI); return true; } @@ -1239,20 +1247,23 @@ namespace { if (I == Start) IAfterStart = true; bool IsSimpleLoadStore; - if (!isInstVectorizable(I, IsSimpleLoadStore)) continue; + if (!isInstVectorizable(&*I, IsSimpleLoadStore)) + continue; // Look for an instruction with which to pair instruction *I... DenseSet Users; AliasSetTracker WriteSet(*AA); - if (I->mayWriteToMemory()) WriteSet.add(I); + if (I->mayWriteToMemory()) + WriteSet.add(&*I); bool JAfterStart = IAfterStart; BasicBlock::iterator J = std::next(I); for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) { - if (J == Start) JAfterStart = true; + if (&*J == Start) + JAfterStart = true; // Determine if J uses I, if so, exit the loop. - bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep); + bool UsesI = trackUsesOfI(Users, WriteSet, &*I, &*J, !Config.FastDep); if (Config.FastDep) { // Note: For this heuristic to be effective, independent operations // must tend to be intermixed. This is likely to be true from some @@ -1269,25 +1280,26 @@ namespace { // J does not use I, and comes before the first use of I, so it can be // merged with I if the instructions are compatible. int CostSavings, FixedOrder; - if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len, - CostSavings, FixedOrder)) continue; + if (!areInstsCompatible(&*I, &*J, IsSimpleLoadStore, NonPow2Len, + CostSavings, FixedOrder)) + continue; // J is a candidate for merging with I. if (PairableInsts.empty() || - PairableInsts[PairableInsts.size()-1] != I) { - PairableInsts.push_back(I); + PairableInsts[PairableInsts.size() - 1] != &*I) { + PairableInsts.push_back(&*I); } - CandidatePairs[I].push_back(J); + CandidatePairs[&*I].push_back(&*J); ++TotalPairs; if (TTI) - CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J), - CostSavings)); + CandidatePairCostSavings.insert( + ValuePairWithCost(ValuePair(&*I, &*J), CostSavings)); if (FixedOrder == 1) - FixedOrderPairs.insert(ValuePair(I, J)); + FixedOrderPairs.insert(ValuePair(&*I, &*J)); else if (FixedOrder == -1) - FixedOrderPairs.insert(ValuePair(J, I)); + FixedOrderPairs.insert(ValuePair(&*J, &*I)); // The next call to this function must start after the last instruction // selected during this invocation. @@ -1468,14 +1480,16 @@ namespace { BasicBlock::iterator E = BB.end(), EL = BasicBlock::iterator(cast(PairableInsts.back())); for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) { - if (IsInPair.find(I) == IsInPair.end()) continue; + if (IsInPair.find(&*I) == IsInPair.end()) + continue; DenseSet Users; AliasSetTracker WriteSet(*AA); - if (I->mayWriteToMemory()) WriteSet.add(I); + if (I->mayWriteToMemory()) + WriteSet.add(&*I); for (BasicBlock::iterator J = std::next(I); J != E; ++J) { - (void) trackUsesOfI(Users, WriteSet, I, J); + (void)trackUsesOfI(Users, WriteSet, &*I, &*J); if (J == EL) break; @@ -1484,7 +1498,7 @@ namespace { for (DenseSet::iterator U = Users.begin(), E = Users.end(); U != E; ++U) { if (IsInPair.find(*U) == IsInPair.end()) continue; - PairableInstUsers.insert(ValuePair(I, *U)); + PairableInstUsers.insert(ValuePair(&*I, *U)); } if (I == EL) @@ -2806,55 +2820,51 @@ namespace { Instruction *J, Instruction *K, Instruction *&InsertionPt, Instruction *&K1, Instruction *&K2) { - if (isa(I)) { - AA->replaceWithNewValue(I, K); - AA->replaceWithNewValue(J, K); + if (isa(I)) + return; + + Type *IType = I->getType(); + Type *JType = J->getType(); + + VectorType *VType = getVecTypeForPair(IType, JType); + unsigned numElem = VType->getNumElements(); + + unsigned numElemI = getNumScalarElements(IType); + unsigned numElemJ = getNumScalarElements(JType); + + if (IType->isVectorTy()) { + std::vector Mask1(numElemI), Mask2(numElemI); + for (unsigned v = 0; v < numElemI; ++v) { + Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); + Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ + v); + } + + K1 = new ShuffleVectorInst(K, UndefValue::get(VType), + ConstantVector::get(Mask1), + getReplacementName(K, false, 1)); } else { - Type *IType = I->getType(); - Type *JType = J->getType(); - - VectorType *VType = getVecTypeForPair(IType, JType); - unsigned numElem = VType->getNumElements(); - - unsigned numElemI = getNumScalarElements(IType); - unsigned numElemJ = getNumScalarElements(JType); - - if (IType->isVectorTy()) { - std::vector Mask1(numElemI), Mask2(numElemI); - for (unsigned v = 0; v < numElemI; ++v) { - Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); - Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ+v); - } - - K1 = new ShuffleVectorInst(K, UndefValue::get(VType), - ConstantVector::get( Mask1), - getReplacementName(K, false, 1)); - } else { - Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); - K1 = ExtractElementInst::Create(K, CV0, - getReplacementName(K, false, 1)); - } - - if (JType->isVectorTy()) { - std::vector Mask1(numElemJ), Mask2(numElemJ); - for (unsigned v = 0; v < numElemJ; ++v) { - Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); - Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI+v); - } - - K2 = new ShuffleVectorInst(K, UndefValue::get(VType), - ConstantVector::get( Mask2), - getReplacementName(K, false, 2)); - } else { - Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1); - K2 = ExtractElementInst::Create(K, CV1, - getReplacementName(K, false, 2)); - } - - K1->insertAfter(K); - K2->insertAfter(K1); - InsertionPt = K2; + Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); + K1 = ExtractElementInst::Create(K, CV0, getReplacementName(K, false, 1)); } + + if (JType->isVectorTy()) { + std::vector Mask1(numElemJ), Mask2(numElemJ); + for (unsigned v = 0; v < numElemJ; ++v) { + Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); + Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI + v); + } + + K2 = new ShuffleVectorInst(K, UndefValue::get(VType), + ConstantVector::get(Mask2), + getReplacementName(K, false, 2)); + } else { + Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem - 1); + K2 = ExtractElementInst::Create(K, CV1, getReplacementName(K, false, 2)); + } + + K1->insertAfter(K); + K2->insertAfter(K1); + InsertionPt = K2; } // Move all uses of the function I (including pairing-induced uses) after J. @@ -2869,7 +2879,7 @@ namespace { if (I->mayWriteToMemory()) WriteSet.add(I); for (; cast(L) != J; ++L) - (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs); + (void)trackUsesOfI(Users, WriteSet, I, &*L, true, &LoadMoveSetPairs); assert(cast(L) == J && "Tracking has not proceeded far enough to check for dependencies"); @@ -2891,9 +2901,9 @@ namespace { if (I->mayWriteToMemory()) WriteSet.add(I); for (; cast(L) != J;) { - if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) { + if (trackUsesOfI(Users, WriteSet, I, &*L, true, &LoadMoveSetPairs)) { // Move this instruction - Instruction *InstToMove = L; ++L; + Instruction *InstToMove = &*L++; DEBUG(dbgs() << "BBV: moving: " << *InstToMove << " to after " << *InsertionPt << "\n"); @@ -2924,11 +2934,11 @@ namespace { // Note: We cannot end the loop when we reach J because J could be moved // farther down the use chain by another instruction pairing. Also, J // could be before I if this is an inverted input. - for (BasicBlock::iterator E = BB.end(); cast(L) != E; ++L) { - if (trackUsesOfI(Users, WriteSet, I, L)) { + for (BasicBlock::iterator E = BB.end(); L != E; ++L) { + if (trackUsesOfI(Users, WriteSet, I, &*L)) { if (L->mayReadFromMemory()) { - LoadMoveSet[L].push_back(I); - LoadMoveSetPairs.insert(ValuePair(L, I)); + LoadMoveSet[&*L].push_back(I); + LoadMoveSetPairs.insert(ValuePair(&*L, I)); } } } @@ -2991,7 +3001,7 @@ namespace { DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) { - DenseMap::iterator P = ChosenPairs.find(PI); + DenseMap::iterator P = ChosenPairs.find(&*PI); if (P == ChosenPairs.end()) { ++PI; continue; @@ -3116,12 +3126,9 @@ namespace { } else if (!isa(K)) K->mutateType(getVecTypeForPair(L->getType(), H->getType())); - unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, - LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, - LLVMContext::MD_fpmath - }; + unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_fpmath, + LLVMContext::MD_invariant_group}; combineMetadata(K, H, KnownIDs); K->intersectOptionalDataWith(H); @@ -3145,8 +3152,6 @@ namespace { if (!isa(I)) { L->replaceAllUsesWith(K1); H->replaceAllUsesWith(K2); - AA->replaceWithNewValue(L, K1); - AA->replaceWithNewValue(H, K2); } // Instructions that may read from memory may be in the load move set. @@ -3197,10 +3202,14 @@ namespace { char BBVectorize::ID = 0; static const char bb_vectorize_name[] = "Basic-Block Vectorization"; INITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) { diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 69ca2688c810..a627dd665179 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -48,7 +48,6 @@ #include "llvm/Transforms/Vectorize.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" @@ -58,10 +57,13 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/DemandedBits.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" @@ -99,6 +101,7 @@ #include "llvm/Analysis/VectorUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include +#include #include #include @@ -123,6 +126,11 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), "trip count that is smaller than this " "value.")); +static cl::opt MaximizeBandwidth( + "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, + cl::desc("Maximize bandwidth when selecting vectorization factor which " + "will be determined by the smallest type in loop.")); + /// This enables versioning on the strides of symbolically striding memory /// accesses in code like the following. /// for (i = 0; i < N; ++i) @@ -136,7 +144,7 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), /// ... static cl::opt EnableMemAccessVersioning( "enable-mem-access-versioning", cl::init(true), cl::Hidden, - cl::desc("Enable symblic stride memory access versioning")); + cl::desc("Enable symbolic stride memory access versioning")); static cl::opt EnableInterleavedMemAccesses( "enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, @@ -214,12 +222,27 @@ static cl::opt MaxNestedScalarReductionIC( cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop.")); +static cl::opt PragmaVectorizeMemoryCheckThreshold( + "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, + cl::desc("The maximum allowed number of runtime memory checks with a " + "vectorize(enable) pragma.")); + +static cl::opt VectorizeSCEVCheckThreshold( + "vectorize-scev-check-threshold", cl::init(16), cl::Hidden, + cl::desc("The maximum number of SCEV checks allowed.")); + +static cl::opt PragmaVectorizeSCEVCheckThreshold( + "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, + cl::desc("The maximum number of SCEV checks allowed with a " + "vectorize(enable) pragma")); + namespace { // Forward declarations. +class LoopVectorizeHints; class LoopVectorizationLegality; class LoopVectorizationCostModel; -class LoopVectorizeHints; +class LoopVectorizationRequirements; /// \brief This modifies LoopAccessReport to initialize message with /// loop-vectorizer-specific part. @@ -245,6 +268,32 @@ static Type* ToVectorTy(Type *Scalar, unsigned VF) { return VectorType::get(Scalar, VF); } +/// A helper function that returns GEP instruction and knows to skip a +/// 'bitcast'. The 'bitcast' may be skipped if the source and the destination +/// pointee types of the 'bitcast' have the same size. +/// For example: +/// bitcast double** %var to i64* - can be skipped +/// bitcast double** %var to i8* - can not +static GetElementPtrInst *getGEPInstruction(Value *Ptr) { + + if (isa(Ptr)) + return cast(Ptr); + + if (isa(Ptr) && + isa(cast(Ptr)->getOperand(0))) { + Type *BitcastTy = Ptr->getType(); + Type *GEPTy = cast(Ptr)->getSrcTy(); + if (!isa(BitcastTy) || !isa(GEPTy)) + return nullptr; + Type *Pointee1Ty = cast(BitcastTy)->getPointerElementType(); + Type *Pointee2Ty = cast(GEPTy)->getPointerElementType(); + const DataLayout &DL = cast(Ptr)->getModule()->getDataLayout(); + if (DL.getTypeSizeInBits(Pointee1Ty) == DL.getTypeSizeInBits(Pointee2Ty)) + return cast(cast(Ptr)->getOperand(0)); + } + return nullptr; +} + /// InnerLoopVectorizer vectorizes loops which contain only one basic /// block to a specified vectorization factor (VF). /// This class performs the widening of scalars into vectors, or multiple @@ -261,25 +310,30 @@ static Type* ToVectorTy(Type *Scalar, unsigned VF) { /// and reduction variables that were found to a given vectorization factor. class InnerLoopVectorizer { public: - InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI, - DominatorTree *DT, const TargetLibraryInfo *TLI, + InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE, + LoopInfo *LI, DominatorTree *DT, + const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, unsigned VecWidth, unsigned UnrollFactor) - : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), - VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), + : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), + VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()), Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor), - Legal(nullptr), AddedSafetyChecks(false) {} + TripCount(nullptr), VectorTripCount(nullptr), Legal(nullptr), + AddedSafetyChecks(false) {} // Perform the actual loop widening (vectorization). - void vectorize(LoopVectorizationLegality *L) { + // MinimumBitWidths maps scalar integer values to the smallest bitwidth they + // can be validly truncated to. The cost model has assumed this truncation + // will happen when vectorizing. + void vectorize(LoopVectorizationLegality *L, + MapVector MinimumBitWidths) { + MinBWs = MinimumBitWidths; Legal = L; // Create a new empty loop. Unlink the old loop and connect the new one. createEmptyLoop(); // Widen each instruction in the old loop to a new one in the new loop. // Use the Legality module to find the induction and reduction variables. vectorizeLoop(); - // Register the new loop and update the analysis passes. - updateAnalysis(); } // Return true if any runtime check is added. @@ -302,14 +356,11 @@ protected: typedef DenseMap, VectorParts> EdgeMaskCache; - /// \brief Add checks for strides that were assumed to be 1. - /// - /// Returns the last check instruction and the first check instruction in the - /// pair as (first, last). - std::pair addStrideCheck(Instruction *Loc); - /// Create an empty loop, based on the loop ranges of the old loop. void createEmptyLoop(); + /// Create a new induction variable inside L. + PHINode *createInductionVariable(Loop *L, Value *Start, Value *End, + Value *Step, Instruction *DL); /// Copy and widen the instructions from the old loop. virtual void vectorizeLoop(); @@ -319,6 +370,9 @@ protected: /// See PR14725. void fixLCSSAPHIs(); + /// Shrinks vector element sizes based on information in "MinBWs". + void truncateToMinimalBitwidths(); + /// A helper function that computes the predicate of the block BB, assuming /// that the header block of the loop is set to True. It returns the *entry* /// mask for the block BB. @@ -329,7 +383,7 @@ protected: /// A helper function to vectorize a single BB within the innermost loop. void vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV); - + /// Vectorize a single PHINode in a block. This method handles the induction /// variable canonicalization. It supports both VF = 1 for unrolled loops and /// arbitrary length vectors. @@ -374,6 +428,23 @@ protected: /// Generate a shuffle sequence that will reverse the vector Vec. virtual Value *reverseVector(Value *Vec); + /// Returns (and creates if needed) the original loop trip count. + Value *getOrCreateTripCount(Loop *NewLoop); + + /// Returns (and creates if needed) the trip count of the widened loop. + Value *getOrCreateVectorTripCount(Loop *NewLoop); + + /// Emit a bypass check to see if the trip count would overflow, or we + /// wouldn't have enough iterations to execute one vector loop. + void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass); + /// Emit a bypass check to see if the vector trip count is nonzero. + void emitVectorLoopEnteredCheck(Loop *L, BasicBlock *Bypass); + /// Emit a bypass check to see if all of the SCEV assumptions we've + /// had to make are correct. + void emitSCEVChecks(Loop *L, BasicBlock *Bypass); + /// Emit bypass checks to check any memory assumptions we may have made. + void emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass); + /// This is a helper class that holds the vectorizer state. It maps scalar /// instructions to vector instructions. When the code is 'unrolled' then /// then a single scalar value is mapped to multiple vector parts. The parts @@ -416,8 +487,10 @@ protected: /// The original loop. Loop *OrigLoop; - /// Scev analysis to use. - ScalarEvolution *SE; + /// A wrapper around ScalarEvolution used to add runtime SCEV checks. Applies + /// dynamic knowledge to simplify SCEV expressions and converts them to a + /// more usable form. + PredicatedScalarEvolution &PSE; /// Loop Info. LoopInfo *LI; /// Dominator Tree. @@ -462,12 +535,21 @@ protected: PHINode *Induction; /// The induction variable of the old basic block. PHINode *OldInduction; - /// Holds the extended (to the widest induction type) start index. - Value *ExtendedIdx; /// Maps scalars to widened vectors. ValueMap WidenMap; + /// Store instructions that should be predicated, as a pair + /// + SmallVector, 4> PredicatedStores; EdgeMaskCache MaskCache; + /// Trip count of the original loop. + Value *TripCount; + /// Trip count of the widened loop (TripCount - TripCount % (VF*UF)) + Value *VectorTripCount; + /// Map of scalar integer values to the smallest bitwidth they can be legally + /// represented as. The vector equivalents of these values should be truncated + /// to this type. + MapVector MinBWs; LoopVectorizationLegality *Legal; // Record whether runtime check is added. @@ -476,10 +558,11 @@ protected: class InnerLoopUnroller : public InnerLoopVectorizer { public: - InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI, - DominatorTree *DT, const TargetLibraryInfo *TLI, + InnerLoopUnroller(Loop *OrigLoop, PredicatedScalarEvolution &PSE, + LoopInfo *LI, DominatorTree *DT, + const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, unsigned UnrollFactor) - : InnerLoopVectorizer(OrigLoop, SE, LI, DT, TLI, TTI, 1, UnrollFactor) {} + : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, 1, UnrollFactor) {} private: void scalarizeInstruction(Instruction *Instr, @@ -551,7 +634,8 @@ static void propagateMetadata(Instruction *To, const Instruction *From) { if (Kind != LLVMContext::MD_tbaa && Kind != LLVMContext::MD_alias_scope && Kind != LLVMContext::MD_noalias && - Kind != LLVMContext::MD_fpmath) + Kind != LLVMContext::MD_fpmath && + Kind != LLVMContext::MD_nontemporal) continue; To->setMetadata(Kind, M.second); @@ -559,7 +643,8 @@ static void propagateMetadata(Instruction *To, const Instruction *From) { } /// \brief Propagate known metadata from one instruction to a vector of others. -static void propagateMetadata(SmallVectorImpl &To, const Instruction *From) { +static void propagateMetadata(SmallVectorImpl &To, + const Instruction *From) { for (Value *V : To) if (Instruction *I = dyn_cast(V)) propagateMetadata(I, From); @@ -699,8 +784,9 @@ private: /// between the member and the group in a map. class InterleavedAccessInfo { public: - InterleavedAccessInfo(ScalarEvolution *SE, Loop *L, DominatorTree *DT) - : SE(SE), TheLoop(L), DT(DT) {} + InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, + DominatorTree *DT) + : PSE(PSE), TheLoop(L), DT(DT) {} ~InterleavedAccessInfo() { SmallSet DelSet; @@ -730,7 +816,11 @@ public: } private: - ScalarEvolution *SE; + /// A wrapper around ScalarEvolution, used to add runtime SCEV checks. + /// Simplifies SCEV expressions in the context of existing SCEV assumptions. + /// The interleaved access analysis can also add new predicates (for example + /// by versioning strides of pointers). + PredicatedScalarEvolution &PSE; Loop *TheLoop; DominatorTree *DT; @@ -778,407 +868,6 @@ private: const ValueToValueMap &Strides); }; -/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and -/// to what vectorization factor. -/// This class does not look at the profitability of vectorization, only the -/// legality. This class has two main kinds of checks: -/// * Memory checks - The code in canVectorizeMemory checks if vectorization -/// will change the order of memory accesses in a way that will change the -/// correctness of the program. -/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory -/// checks for a number of different conditions, such as the availability of a -/// single induction variable, that all types are supported and vectorize-able, -/// etc. This code reflects the capabilities of InnerLoopVectorizer. -/// This class is also used by InnerLoopVectorizer for identifying -/// induction variable and the different reduction variables. -class LoopVectorizationLegality { -public: - LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT, - TargetLibraryInfo *TLI, AliasAnalysis *AA, - Function *F, const TargetTransformInfo *TTI, - LoopAccessAnalysis *LAA) - : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F), - TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(SE, L, DT), - Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {} - - /// This enum represents the kinds of inductions that we support. - enum InductionKind { - IK_NoInduction, ///< Not an induction variable. - IK_IntInduction, ///< Integer induction variable. Step = C. - IK_PtrInduction ///< Pointer induction var. Step = C / sizeof(elem). - }; - - /// A struct for saving information about induction variables. - struct InductionInfo { - InductionInfo(Value *Start, InductionKind K, ConstantInt *Step) - : StartValue(Start), IK(K), StepValue(Step) { - assert(IK != IK_NoInduction && "Not an induction"); - assert(StartValue && "StartValue is null"); - assert(StepValue && !StepValue->isZero() && "StepValue is zero"); - assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && - "StartValue is not a pointer for pointer induction"); - assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && - "StartValue is not an integer for integer induction"); - assert(StepValue->getType()->isIntegerTy() && - "StepValue is not an integer"); - } - InductionInfo() - : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {} - - /// Get the consecutive direction. Returns: - /// 0 - unknown or non-consecutive. - /// 1 - consecutive and increasing. - /// -1 - consecutive and decreasing. - int getConsecutiveDirection() const { - if (StepValue && (StepValue->isOne() || StepValue->isMinusOne())) - return StepValue->getSExtValue(); - return 0; - } - - /// Compute the transformed value of Index at offset StartValue using step - /// StepValue. - /// For integer induction, returns StartValue + Index * StepValue. - /// For pointer induction, returns StartValue[Index * StepValue]. - /// FIXME: The newly created binary instructions should contain nsw/nuw - /// flags, which can be found from the original scalar operations. - Value *transform(IRBuilder<> &B, Value *Index) const { - switch (IK) { - case IK_IntInduction: - assert(Index->getType() == StartValue->getType() && - "Index type does not match StartValue type"); - if (StepValue->isMinusOne()) - return B.CreateSub(StartValue, Index); - if (!StepValue->isOne()) - Index = B.CreateMul(Index, StepValue); - return B.CreateAdd(StartValue, Index); - - case IK_PtrInduction: - assert(Index->getType() == StepValue->getType() && - "Index type does not match StepValue type"); - if (StepValue->isMinusOne()) - Index = B.CreateNeg(Index); - else if (!StepValue->isOne()) - Index = B.CreateMul(Index, StepValue); - return B.CreateGEP(nullptr, StartValue, Index); - - case IK_NoInduction: - return nullptr; - } - llvm_unreachable("invalid enum"); - } - - /// Start value. - TrackingVH StartValue; - /// Induction kind. - InductionKind IK; - /// Step value. - ConstantInt *StepValue; - }; - - /// ReductionList contains the reduction descriptors for all - /// of the reductions that were found in the loop. - typedef DenseMap ReductionList; - - /// InductionList saves induction variables and maps them to the - /// induction descriptor. - typedef MapVector InductionList; - - /// Returns true if it is legal to vectorize this loop. - /// This does not mean that it is profitable to vectorize this - /// loop, only that it is legal to do so. - bool canVectorize(); - - /// Returns the Induction variable. - PHINode *getInduction() { return Induction; } - - /// Returns the reduction variables found in the loop. - ReductionList *getReductionVars() { return &Reductions; } - - /// Returns the induction variables found in the loop. - InductionList *getInductionVars() { return &Inductions; } - - /// Returns the widest induction type. - Type *getWidestInductionType() { return WidestIndTy; } - - /// Returns True if V is an induction variable in this loop. - bool isInductionVariable(const Value *V); - - /// Return true if the block BB needs to be predicated in order for the loop - /// to be vectorized. - bool blockNeedsPredication(BasicBlock *BB); - - /// Check if this pointer is consecutive when vectorizing. This happens - /// when the last index of the GEP is the induction variable, or that the - /// pointer itself is an induction variable. - /// This check allows us to vectorize A[idx] into a wide load/store. - /// Returns: - /// 0 - Stride is unknown or non-consecutive. - /// 1 - Address is consecutive. - /// -1 - Address is consecutive, and decreasing. - int isConsecutivePtr(Value *Ptr); - - /// Returns true if the value V is uniform within the loop. - bool isUniform(Value *V); - - /// Returns true if this instruction will remain scalar after vectorization. - bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); } - - /// Returns the information that we collected about runtime memory check. - const RuntimePointerChecking *getRuntimePointerChecking() const { - return LAI->getRuntimePointerChecking(); - } - - const LoopAccessInfo *getLAI() const { - return LAI; - } - - /// \brief Check if \p Instr belongs to any interleaved access group. - bool isAccessInterleaved(Instruction *Instr) { - return InterleaveInfo.isInterleaved(Instr); - } - - /// \brief Get the interleaved access group that \p Instr belongs to. - const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) { - return InterleaveInfo.getInterleaveGroup(Instr); - } - - unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } - - bool hasStride(Value *V) { return StrideSet.count(V); } - bool mustCheckStrides() { return !StrideSet.empty(); } - SmallPtrSet::iterator strides_begin() { - return StrideSet.begin(); - } - SmallPtrSet::iterator strides_end() { return StrideSet.end(); } - - /// Returns true if the target machine supports masked store operation - /// for the given \p DataType and kind of access to \p Ptr. - bool isLegalMaskedStore(Type *DataType, Value *Ptr) { - return TTI->isLegalMaskedStore(DataType, isConsecutivePtr(Ptr)); - } - /// Returns true if the target machine supports masked load operation - /// for the given \p DataType and kind of access to \p Ptr. - bool isLegalMaskedLoad(Type *DataType, Value *Ptr) { - return TTI->isLegalMaskedLoad(DataType, isConsecutivePtr(Ptr)); - } - /// Returns true if vector representation of the instruction \p I - /// requires mask. - bool isMaskRequired(const Instruction* I) { - return (MaskedOp.count(I) != 0); - } - unsigned getNumStores() const { - return LAI->getNumStores(); - } - unsigned getNumLoads() const { - return LAI->getNumLoads(); - } - unsigned getNumPredStores() const { - return NumPredStores; - } -private: - /// Check if a single basic block loop is vectorizable. - /// At this point we know that this is a loop with a constant trip count - /// and we only need to check individual instructions. - bool canVectorizeInstrs(); - - /// When we vectorize loops we may change the order in which - /// we read and write from memory. This method checks if it is - /// legal to vectorize the code, considering only memory constrains. - /// Returns true if the loop is vectorizable - bool canVectorizeMemory(); - - /// Return true if we can vectorize this loop using the IF-conversion - /// transformation. - bool canVectorizeWithIfConvert(); - - /// Collect the variables that need to stay uniform after vectorization. - void collectLoopUniforms(); - - /// Return true if all of the instructions in the block can be speculatively - /// executed. \p SafePtrs is a list of addresses that are known to be legal - /// and we know that we can read from them without segfault. - bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl &SafePtrs); - - /// Returns the induction kind of Phi and record the step. This function may - /// return NoInduction if the PHI is not an induction variable. - InductionKind isInductionVariable(PHINode *Phi, ConstantInt *&StepValue); - - /// \brief Collect memory access with loop invariant strides. - /// - /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop - /// invariant. - void collectStridedAccess(Value *LoadOrStoreInst); - - /// Report an analysis message to assist the user in diagnosing loops that are - /// not vectorized. These are handled as LoopAccessReport rather than - /// VectorizationReport because the << operator of VectorizationReport returns - /// LoopAccessReport. - void emitAnalysis(const LoopAccessReport &Message) { - LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME); - } - - unsigned NumPredStores; - - /// The loop that we evaluate. - Loop *TheLoop; - /// Scev analysis. - ScalarEvolution *SE; - /// Target Library Info. - TargetLibraryInfo *TLI; - /// Parent function - Function *TheFunction; - /// Target Transform Info - const TargetTransformInfo *TTI; - /// Dominator Tree. - DominatorTree *DT; - // LoopAccess analysis. - LoopAccessAnalysis *LAA; - // And the loop-accesses info corresponding to this loop. This pointer is - // null until canVectorizeMemory sets it up. - const LoopAccessInfo *LAI; - - /// The interleave access information contains groups of interleaved accesses - /// with the same stride and close to each other. - InterleavedAccessInfo InterleaveInfo; - - // --- vectorization state --- // - - /// Holds the integer induction variable. This is the counter of the - /// loop. - PHINode *Induction; - /// Holds the reduction variables. - ReductionList Reductions; - /// Holds all of the induction variables that we found in the loop. - /// Notice that inductions don't need to start at zero and that induction - /// variables can be pointers. - InductionList Inductions; - /// Holds the widest induction type encountered. - Type *WidestIndTy; - - /// Allowed outside users. This holds the reduction - /// vars which can be accessed from outside the loop. - SmallPtrSet AllowedExit; - /// This set holds the variables which are known to be uniform after - /// vectorization. - SmallPtrSet Uniforms; - - /// Can we assume the absence of NaNs. - bool HasFunNoNaNAttr; - - ValueToValueMap Strides; - SmallPtrSet StrideSet; - - /// While vectorizing these instructions we have to generate a - /// call to the appropriate masked intrinsic - SmallPtrSet MaskedOp; -}; - -/// LoopVectorizationCostModel - estimates the expected speedups due to -/// vectorization. -/// In many cases vectorization is not profitable. This can happen because of -/// a number of reasons. In this class we mainly attempt to predict the -/// expected speedup/slowdowns due to the supported instruction set. We use the -/// TargetTransformInfo to query the different backends for the cost of -/// different operations. -class LoopVectorizationCostModel { -public: - LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI, - LoopVectorizationLegality *Legal, - const TargetTransformInfo &TTI, - const TargetLibraryInfo *TLI, AssumptionCache *AC, - const Function *F, const LoopVectorizeHints *Hints) - : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), - TheFunction(F), Hints(Hints) { - CodeMetrics::collectEphemeralValues(L, AC, EphValues); - } - - /// Information about vectorization costs - struct VectorizationFactor { - unsigned Width; // Vector width with best cost - unsigned Cost; // Cost of the loop with that width - }; - /// \return The most profitable vectorization factor and the cost of that VF. - /// This method checks every power of two up to VF. If UserVF is not ZERO - /// then this vectorization factor will be selected if vectorization is - /// possible. - VectorizationFactor selectVectorizationFactor(bool OptForSize); - - /// \return The size (in bits) of the widest type in the code that - /// needs to be vectorized. We ignore values that remain scalar such as - /// 64 bit loop indices. - unsigned getWidestType(); - - /// \return The desired interleave count. - /// If interleave count has been specified by metadata it will be returned. - /// Otherwise, the interleave count is computed and returned. VF and LoopCost - /// are the selected vectorization factor and the cost of the selected VF. - unsigned selectInterleaveCount(bool OptForSize, unsigned VF, - unsigned LoopCost); - - /// \return The most profitable unroll factor. - /// This method finds the best unroll-factor based on register pressure and - /// other parameters. VF and LoopCost are the selected vectorization factor - /// and the cost of the selected VF. - unsigned computeInterleaveCount(bool OptForSize, unsigned VF, - unsigned LoopCost); - - /// \brief A struct that represents some properties of the register usage - /// of a loop. - struct RegisterUsage { - /// Holds the number of loop invariant values that are used in the loop. - unsigned LoopInvariantRegs; - /// Holds the maximum number of concurrent live intervals in the loop. - unsigned MaxLocalUsers; - /// Holds the number of instructions in the loop. - unsigned NumInstructions; - }; - - /// \return information about the register usage of the loop. - RegisterUsage calculateRegisterUsage(); - -private: - /// Returns the expected execution cost. The unit of the cost does - /// not matter because we use the 'cost' units to compare different - /// vector widths. The cost that is returned is *not* normalized by - /// the factor width. - unsigned expectedCost(unsigned VF); - - /// Returns the execution time cost of an instruction for a given vector - /// width. Vector width of one means scalar. - unsigned getInstructionCost(Instruction *I, unsigned VF); - - /// Returns whether the instruction is a load or store and will be a emitted - /// as a vector operation. - bool isConsecutiveLoadOrStore(Instruction *I); - - /// Report an analysis message to assist the user in diagnosing loops that are - /// not vectorized. These are handled as LoopAccessReport rather than - /// VectorizationReport because the << operator of VectorizationReport returns - /// LoopAccessReport. - void emitAnalysis(const LoopAccessReport &Message) { - LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME); - } - - /// Values used only by @llvm.assume calls. - SmallPtrSet EphValues; - - /// The loop that we evaluate. - Loop *TheLoop; - /// Scev analysis. - ScalarEvolution *SE; - /// Loop Info analysis. - LoopInfo *LI; - /// Vectorization legality. - LoopVectorizationLegality *Legal; - /// Vector target information. - const TargetTransformInfo &TTI; - /// Target Library Info. - const TargetLibraryInfo *TLI; - const Function *TheFunction; - // Loop Vectorize Hint. - const LoopVectorizeHints *Hints; -}; - /// Utility class for getting and setting loop vectorizer hints in the form /// of loop metadata. /// This class keeps a number of loop annotations locally (as member variables) @@ -1258,6 +947,41 @@ public: writeHintsToMetadata(Hints); } + bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const { + if (getForce() == LoopVectorizeHints::FK_Disabled) { + DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); + emitOptimizationRemarkAnalysis(F->getContext(), + vectorizeAnalysisPassName(), *F, + L->getStartLoc(), emitRemark()); + return false; + } + + if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) { + DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); + emitOptimizationRemarkAnalysis(F->getContext(), + vectorizeAnalysisPassName(), *F, + L->getStartLoc(), emitRemark()); + return false; + } + + if (getWidth() == 1 && getInterleave() == 1) { + // FIXME: Add a separate metadata to indicate when the loop has already + // been vectorized instead of setting width and count to 1. + DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); + // FIXME: Add interleave.disable metadata. This will allow + // vectorize.disable to be used without disabling the pass and errors + // to differentiate between disabled vectorization and a width of 1. + emitOptimizationRemarkAnalysis( + F->getContext(), vectorizeAnalysisPassName(), *F, L->getStartLoc(), + "loop not vectorized: vectorization and interleaving are explicitly " + "disabled, or vectorize width and interleave count are both set to " + "1"); + return false; + } + + return true; + } + /// Dumps all the hint information. std::string emitRemark() const { VectorizationReport R; @@ -1281,6 +1005,26 @@ public: unsigned getWidth() const { return Width.Value; } unsigned getInterleave() const { return Interleave.Value; } enum ForceKind getForce() const { return (ForceKind)Force.Value; } + const char *vectorizeAnalysisPassName() const { + // If hints are provided that don't disable vectorization use the + // AlwaysPrint pass name to force the frontend to print the diagnostic. + if (getWidth() == 1) + return LV_NAME; + if (getForce() == LoopVectorizeHints::FK_Disabled) + return LV_NAME; + if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0) + return LV_NAME; + return DiagnosticInfo::AlwaysPrint; + } + + bool allowReordering() const { + // When enabling loop hints are provided we allow the vectorizer to change + // the order of operations that is given by the scalar loop. This is not + // enabled by default because can be unsafe or inefficient. For example, + // reordering floating-point operations will change the way round-off + // error accumulates in the loop. + return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1; + } private: /// Find hints specified in the loop metadata and update local values. @@ -1398,10 +1142,17 @@ private: const Loop *TheLoop; }; +static void emitAnalysisDiag(const Function *TheFunction, const Loop *TheLoop, + const LoopVectorizeHints &Hints, + const LoopAccessReport &Message) { + const char *Name = Hints.vectorizeAnalysisPassName(); + LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, Name); +} + static void emitMissedWarning(Function *F, Loop *L, const LoopVectorizeHints &LH) { - emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F, - L->getStartLoc(), LH.emitRemark()); + emitOptimizationRemarkMissed(F->getContext(), LV_NAME, *F, L->getStartLoc(), + LH.emitRemark()); if (LH.getForce() == LoopVectorizeHints::FK_Enabled) { if (LH.getWidth() != 1) @@ -1415,6 +1166,420 @@ static void emitMissedWarning(Function *F, Loop *L, } } +/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and +/// to what vectorization factor. +/// This class does not look at the profitability of vectorization, only the +/// legality. This class has two main kinds of checks: +/// * Memory checks - The code in canVectorizeMemory checks if vectorization +/// will change the order of memory accesses in a way that will change the +/// correctness of the program. +/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory +/// checks for a number of different conditions, such as the availability of a +/// single induction variable, that all types are supported and vectorize-able, +/// etc. This code reflects the capabilities of InnerLoopVectorizer. +/// This class is also used by InnerLoopVectorizer for identifying +/// induction variable and the different reduction variables. +class LoopVectorizationLegality { +public: + LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE, + DominatorTree *DT, TargetLibraryInfo *TLI, + AliasAnalysis *AA, Function *F, + const TargetTransformInfo *TTI, + LoopAccessAnalysis *LAA, + LoopVectorizationRequirements *R, + const LoopVectorizeHints *H) + : NumPredStores(0), TheLoop(L), PSE(PSE), TLI(TLI), TheFunction(F), + TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(PSE, L, DT), + Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false), + Requirements(R), Hints(H) {} + + /// ReductionList contains the reduction descriptors for all + /// of the reductions that were found in the loop. + typedef DenseMap ReductionList; + + /// InductionList saves induction variables and maps them to the + /// induction descriptor. + typedef MapVector InductionList; + + /// Returns true if it is legal to vectorize this loop. + /// This does not mean that it is profitable to vectorize this + /// loop, only that it is legal to do so. + bool canVectorize(); + + /// Returns the Induction variable. + PHINode *getInduction() { return Induction; } + + /// Returns the reduction variables found in the loop. + ReductionList *getReductionVars() { return &Reductions; } + + /// Returns the induction variables found in the loop. + InductionList *getInductionVars() { return &Inductions; } + + /// Returns the widest induction type. + Type *getWidestInductionType() { return WidestIndTy; } + + /// Returns True if V is an induction variable in this loop. + bool isInductionVariable(const Value *V); + + /// Returns True if PN is a reduction variable in this loop. + bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); } + + /// Return true if the block BB needs to be predicated in order for the loop + /// to be vectorized. + bool blockNeedsPredication(BasicBlock *BB); + + /// Check if this pointer is consecutive when vectorizing. This happens + /// when the last index of the GEP is the induction variable, or that the + /// pointer itself is an induction variable. + /// This check allows us to vectorize A[idx] into a wide load/store. + /// Returns: + /// 0 - Stride is unknown or non-consecutive. + /// 1 - Address is consecutive. + /// -1 - Address is consecutive, and decreasing. + int isConsecutivePtr(Value *Ptr); + + /// Returns true if the value V is uniform within the loop. + bool isUniform(Value *V); + + /// Returns true if this instruction will remain scalar after vectorization. + bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); } + + /// Returns the information that we collected about runtime memory check. + const RuntimePointerChecking *getRuntimePointerChecking() const { + return LAI->getRuntimePointerChecking(); + } + + const LoopAccessInfo *getLAI() const { + return LAI; + } + + /// \brief Check if \p Instr belongs to any interleaved access group. + bool isAccessInterleaved(Instruction *Instr) { + return InterleaveInfo.isInterleaved(Instr); + } + + /// \brief Get the interleaved access group that \p Instr belongs to. + const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) { + return InterleaveInfo.getInterleaveGroup(Instr); + } + + unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } + + bool hasStride(Value *V) { return StrideSet.count(V); } + bool mustCheckStrides() { return !StrideSet.empty(); } + SmallPtrSet::iterator strides_begin() { + return StrideSet.begin(); + } + SmallPtrSet::iterator strides_end() { return StrideSet.end(); } + + /// Returns true if the target machine supports masked store operation + /// for the given \p DataType and kind of access to \p Ptr. + bool isLegalMaskedStore(Type *DataType, Value *Ptr) { + return isConsecutivePtr(Ptr) && TTI->isLegalMaskedStore(DataType); + } + /// Returns true if the target machine supports masked load operation + /// for the given \p DataType and kind of access to \p Ptr. + bool isLegalMaskedLoad(Type *DataType, Value *Ptr) { + return isConsecutivePtr(Ptr) && TTI->isLegalMaskedLoad(DataType); + } + /// Returns true if vector representation of the instruction \p I + /// requires mask. + bool isMaskRequired(const Instruction* I) { + return (MaskedOp.count(I) != 0); + } + unsigned getNumStores() const { + return LAI->getNumStores(); + } + unsigned getNumLoads() const { + return LAI->getNumLoads(); + } + unsigned getNumPredStores() const { + return NumPredStores; + } +private: + /// Check if a single basic block loop is vectorizable. + /// At this point we know that this is a loop with a constant trip count + /// and we only need to check individual instructions. + bool canVectorizeInstrs(); + + /// When we vectorize loops we may change the order in which + /// we read and write from memory. This method checks if it is + /// legal to vectorize the code, considering only memory constrains. + /// Returns true if the loop is vectorizable + bool canVectorizeMemory(); + + /// Return true if we can vectorize this loop using the IF-conversion + /// transformation. + bool canVectorizeWithIfConvert(); + + /// Collect the variables that need to stay uniform after vectorization. + void collectLoopUniforms(); + + /// Return true if all of the instructions in the block can be speculatively + /// executed. \p SafePtrs is a list of addresses that are known to be legal + /// and we know that we can read from them without segfault. + bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl &SafePtrs); + + /// \brief Collect memory access with loop invariant strides. + /// + /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop + /// invariant. + void collectStridedAccess(Value *LoadOrStoreInst); + + /// Report an analysis message to assist the user in diagnosing loops that are + /// not vectorized. These are handled as LoopAccessReport rather than + /// VectorizationReport because the << operator of VectorizationReport returns + /// LoopAccessReport. + void emitAnalysis(const LoopAccessReport &Message) const { + emitAnalysisDiag(TheFunction, TheLoop, *Hints, Message); + } + + unsigned NumPredStores; + + /// The loop that we evaluate. + Loop *TheLoop; + /// A wrapper around ScalarEvolution used to add runtime SCEV checks. + /// Applies dynamic knowledge to simplify SCEV expressions in the context + /// of existing SCEV assumptions. The analysis will also add a minimal set + /// of new predicates if this is required to enable vectorization and + /// unrolling. + PredicatedScalarEvolution &PSE; + /// Target Library Info. + TargetLibraryInfo *TLI; + /// Parent function + Function *TheFunction; + /// Target Transform Info + const TargetTransformInfo *TTI; + /// Dominator Tree. + DominatorTree *DT; + // LoopAccess analysis. + LoopAccessAnalysis *LAA; + // And the loop-accesses info corresponding to this loop. This pointer is + // null until canVectorizeMemory sets it up. + const LoopAccessInfo *LAI; + + /// The interleave access information contains groups of interleaved accesses + /// with the same stride and close to each other. + InterleavedAccessInfo InterleaveInfo; + + // --- vectorization state --- // + + /// Holds the integer induction variable. This is the counter of the + /// loop. + PHINode *Induction; + /// Holds the reduction variables. + ReductionList Reductions; + /// Holds all of the induction variables that we found in the loop. + /// Notice that inductions don't need to start at zero and that induction + /// variables can be pointers. + InductionList Inductions; + /// Holds the widest induction type encountered. + Type *WidestIndTy; + + /// Allowed outside users. This holds the reduction + /// vars which can be accessed from outside the loop. + SmallPtrSet AllowedExit; + /// This set holds the variables which are known to be uniform after + /// vectorization. + SmallPtrSet Uniforms; + + /// Can we assume the absence of NaNs. + bool HasFunNoNaNAttr; + + /// Vectorization requirements that will go through late-evaluation. + LoopVectorizationRequirements *Requirements; + + /// Used to emit an analysis of any legality issues. + const LoopVectorizeHints *Hints; + + ValueToValueMap Strides; + SmallPtrSet StrideSet; + + /// While vectorizing these instructions we have to generate a + /// call to the appropriate masked intrinsic + SmallPtrSet MaskedOp; +}; + +/// LoopVectorizationCostModel - estimates the expected speedups due to +/// vectorization. +/// In many cases vectorization is not profitable. This can happen because of +/// a number of reasons. In this class we mainly attempt to predict the +/// expected speedup/slowdowns due to the supported instruction set. We use the +/// TargetTransformInfo to query the different backends for the cost of +/// different operations. +class LoopVectorizationCostModel { +public: + LoopVectorizationCostModel(Loop *L, PredicatedScalarEvolution &PSE, + LoopInfo *LI, LoopVectorizationLegality *Legal, + const TargetTransformInfo &TTI, + const TargetLibraryInfo *TLI, DemandedBits *DB, + AssumptionCache *AC, const Function *F, + const LoopVectorizeHints *Hints) + : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), + AC(AC), TheFunction(F), Hints(Hints) {} + + /// Information about vectorization costs + struct VectorizationFactor { + unsigned Width; // Vector width with best cost + unsigned Cost; // Cost of the loop with that width + }; + /// \return The most profitable vectorization factor and the cost of that VF. + /// This method checks every power of two up to VF. If UserVF is not ZERO + /// then this vectorization factor will be selected if vectorization is + /// possible. + VectorizationFactor selectVectorizationFactor(bool OptForSize); + + /// \return The size (in bits) of the smallest and widest types in the code + /// that needs to be vectorized. We ignore values that remain scalar such as + /// 64 bit loop indices. + std::pair getSmallestAndWidestTypes(); + + /// \return The desired interleave count. + /// If interleave count has been specified by metadata it will be returned. + /// Otherwise, the interleave count is computed and returned. VF and LoopCost + /// are the selected vectorization factor and the cost of the selected VF. + unsigned selectInterleaveCount(bool OptForSize, unsigned VF, + unsigned LoopCost); + + /// \return The most profitable unroll factor. + /// This method finds the best unroll-factor based on register pressure and + /// other parameters. VF and LoopCost are the selected vectorization factor + /// and the cost of the selected VF. + unsigned computeInterleaveCount(bool OptForSize, unsigned VF, + unsigned LoopCost); + + /// \brief A struct that represents some properties of the register usage + /// of a loop. + struct RegisterUsage { + /// Holds the number of loop invariant values that are used in the loop. + unsigned LoopInvariantRegs; + /// Holds the maximum number of concurrent live intervals in the loop. + unsigned MaxLocalUsers; + /// Holds the number of instructions in the loop. + unsigned NumInstructions; + }; + + /// \return Returns information about the register usages of the loop for the + /// given vectorization factors. + SmallVector + calculateRegisterUsage(const SmallVector &VFs); + + /// Collect values we want to ignore in the cost model. + void collectValuesToIgnore(); + +private: + /// Returns the expected execution cost. The unit of the cost does + /// not matter because we use the 'cost' units to compare different + /// vector widths. The cost that is returned is *not* normalized by + /// the factor width. + unsigned expectedCost(unsigned VF); + + /// Returns the execution time cost of an instruction for a given vector + /// width. Vector width of one means scalar. + unsigned getInstructionCost(Instruction *I, unsigned VF); + + /// Returns whether the instruction is a load or store and will be a emitted + /// as a vector operation. + bool isConsecutiveLoadOrStore(Instruction *I); + + /// Report an analysis message to assist the user in diagnosing loops that are + /// not vectorized. These are handled as LoopAccessReport rather than + /// VectorizationReport because the << operator of VectorizationReport returns + /// LoopAccessReport. + void emitAnalysis(const LoopAccessReport &Message) const { + emitAnalysisDiag(TheFunction, TheLoop, *Hints, Message); + } + +public: + /// Map of scalar integer values to the smallest bitwidth they can be legally + /// represented as. The vector equivalents of these values should be truncated + /// to this type. + MapVector MinBWs; + + /// The loop that we evaluate. + Loop *TheLoop; + /// Predicated scalar evolution analysis. + PredicatedScalarEvolution &PSE; + /// Loop Info analysis. + LoopInfo *LI; + /// Vectorization legality. + LoopVectorizationLegality *Legal; + /// Vector target information. + const TargetTransformInfo &TTI; + /// Target Library Info. + const TargetLibraryInfo *TLI; + /// Demanded bits analysis. + DemandedBits *DB; + /// Assumption cache. + AssumptionCache *AC; + const Function *TheFunction; + /// Loop Vectorize Hint. + const LoopVectorizeHints *Hints; + /// Values to ignore in the cost model. + SmallPtrSet ValuesToIgnore; + /// Values to ignore in the cost model when VF > 1. + SmallPtrSet VecValuesToIgnore; +}; + +/// \brief This holds vectorization requirements that must be verified late in +/// the process. The requirements are set by legalize and costmodel. Once +/// vectorization has been determined to be possible and profitable the +/// requirements can be verified by looking for metadata or compiler options. +/// For example, some loops require FP commutativity which is only allowed if +/// vectorization is explicitly specified or if the fast-math compiler option +/// has been provided. +/// Late evaluation of these requirements allows helpful diagnostics to be +/// composed that tells the user what need to be done to vectorize the loop. For +/// example, by specifying #pragma clang loop vectorize or -ffast-math. Late +/// evaluation should be used only when diagnostics can generated that can be +/// followed by a non-expert user. +class LoopVectorizationRequirements { +public: + LoopVectorizationRequirements() + : NumRuntimePointerChecks(0), UnsafeAlgebraInst(nullptr) {} + + void addUnsafeAlgebraInst(Instruction *I) { + // First unsafe algebra instruction. + if (!UnsafeAlgebraInst) + UnsafeAlgebraInst = I; + } + + void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; } + + bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints) { + const char *Name = Hints.vectorizeAnalysisPassName(); + bool Failed = false; + if (UnsafeAlgebraInst && !Hints.allowReordering()) { + emitOptimizationRemarkAnalysisFPCommute( + F->getContext(), Name, *F, UnsafeAlgebraInst->getDebugLoc(), + VectorizationReport() << "cannot prove it is safe to reorder " + "floating-point operations"); + Failed = true; + } + + // Test if runtime memcheck thresholds are exceeded. + bool PragmaThresholdReached = + NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; + bool ThresholdReached = + NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; + if ((ThresholdReached && !Hints.allowReordering()) || + PragmaThresholdReached) { + emitOptimizationRemarkAnalysisAliasing( + F->getContext(), Name, *F, L->getStartLoc(), + VectorizationReport() + << "cannot prove it is safe to reorder memory operations"); + DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); + Failed = true; + } + + return Failed; + } + +private: + unsigned NumRuntimePointerChecks; + Instruction *UnsafeAlgebraInst; +}; + static void addInnerLoop(Loop &L, SmallVectorImpl &V) { if (L.empty()) return V.push_back(&L); @@ -1441,6 +1606,7 @@ struct LoopVectorize : public FunctionPass { DominatorTree *DT; BlockFrequencyInfo *BFI; TargetLibraryInfo *TLI; + DemandedBits *DB; AliasAnalysis *AA; AssumptionCache *AC; LoopAccessAnalysis *LAA; @@ -1450,16 +1616,17 @@ struct LoopVectorize : public FunctionPass { BlockFrequency ColdEntryFreq; bool runOnFunction(Function &F) override { - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); LI = &getAnalysis().getLoopInfo(); TTI = &getAnalysis().getTTI(F); DT = &getAnalysis().getDomTree(); - BFI = &getAnalysis(); + BFI = &getAnalysis().getBFI(); auto *TLIP = getAnalysisIfAvailable(); TLI = TLIP ? &TLIP->getTLI() : nullptr; - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); AC = &getAnalysis().getAssumptionCache(F); LAA = &getAnalysis(); + DB = &getAnalysis(); // Compute some weights outside of the loop over the loops. Compute this // using a BranchProbability to re-use its scaling math. @@ -1562,26 +1729,8 @@ struct LoopVectorize : public FunctionPass { // less verbose reporting vectorized loops and unvectorized loops that may // benefit from vectorization, respectively. - if (Hints.getForce() == LoopVectorizeHints::FK_Disabled) { - DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); - emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F, - L->getStartLoc(), Hints.emitRemark()); - return false; - } - - if (!AlwaysVectorize && Hints.getForce() != LoopVectorizeHints::FK_Enabled) { - DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); - emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F, - L->getStartLoc(), Hints.emitRemark()); - return false; - } - - if (Hints.getWidth() == 1 && Hints.getInterleave() == 1) { - DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); - emitOptimizationRemarkAnalysis( - F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), - "loop not vectorized: vector width and interleave count are " - "explicitly set to 1"); + if (!Hints.allowVectorization(F, L, AlwaysVectorize)) { + DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n"); return false; } @@ -1595,15 +1744,19 @@ struct LoopVectorize : public FunctionPass { DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); else { DEBUG(dbgs() << "\n"); - emitOptimizationRemarkAnalysis( - F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), - "vectorization is not beneficial and is not explicitly forced"); + emitAnalysisDiag(F, L, Hints, VectorizationReport() + << "vectorization is not beneficial " + "and is not explicitly forced"); return false; } } + PredicatedScalarEvolution PSE(*SE); + // Check if it is legal to vectorize the loop. - LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA); + LoopVectorizationRequirements Requirements; + LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, LAA, + &Requirements, &Hints); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); emitMissedWarning(F, L, Hints); @@ -1611,16 +1764,18 @@ struct LoopVectorize : public FunctionPass { } // Use the cost model. - LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, AC, F, &Hints); + LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, F, + &Hints); + CM.collectValuesToIgnore(); // Check the function attributes to find out if this function should be // optimized for size. bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled && - F->hasFnAttribute(Attribute::OptimizeForSize); + F->optForSize(); // Compute the weighted frequency of this loop being executed and see if it // is less than 20% of the function entry baseline frequency. Note that we - // always have a canonical loop here because we think we *can* vectoriez. + // always have a canonical loop here because we think we *can* vectorize. // FIXME: This is hidden behind a flag due to pervasive problems with // exactly what block frequency models. if (LoopVectorizeWithBlockFrequency) { @@ -1630,16 +1785,17 @@ struct LoopVectorize : public FunctionPass { OptForSize = true; } - // Check the function attributes to see if implicit floats are allowed.a + // Check the function attributes to see if implicit floats are allowed. // FIXME: This check doesn't seem possibly correct -- what if the loop is // an integer loop and the vector instructions selected are purely integer // vector instructions? if (F->hasFnAttribute(Attribute::NoImplicitFloat)) { DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" "attribute is used.\n"); - emitOptimizationRemarkAnalysis( - F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), - "loop not vectorized due to NoImplicitFloat attribute"); + emitAnalysisDiag( + F, L, Hints, + VectorizationReport() + << "loop not vectorized due to NoImplicitFloat attribute"); emitMissedWarning(F, L, Hints); return false; } @@ -1651,32 +1807,86 @@ struct LoopVectorize : public FunctionPass { // Select the interleave count. unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost); - DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " - << DebugLocStr << '\n'); - DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); + // Get user interleave count. + unsigned UserIC = Hints.getInterleave(); + + // Identify the diagnostic messages that should be produced. + std::string VecDiagMsg, IntDiagMsg; + bool VectorizeLoop = true, InterleaveLoop = true; + + if (Requirements.doesNotMeet(F, L, Hints)) { + DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization " + "requirements.\n"); + emitMissedWarning(F, L, Hints); + return false; + } if (VF.Width == 1) { - DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial\n"); + DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); + VecDiagMsg = + "the cost-model indicates that vectorization is not beneficial"; + VectorizeLoop = false; + } - if (IC == 1) { - emitOptimizationRemarkAnalysis( - F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), - "not beneficial to vectorize and user disabled interleaving"); - return false; - } - DEBUG(dbgs() << "LV: Trying to at least unroll the loops.\n"); + if (IC == 1 && UserIC <= 1) { + // Tell the user interleaving is not beneficial. + DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n"); + IntDiagMsg = + "the cost-model indicates that interleaving is not beneficial"; + InterleaveLoop = false; + if (UserIC == 1) + IntDiagMsg += + " and is explicitly disabled or interleave count is set to 1"; + } else if (IC > 1 && UserIC == 1) { + // Tell the user interleaving is beneficial, but it explicitly disabled. + DEBUG(dbgs() + << "LV: Interleaving is beneficial but is explicitly disabled."); + IntDiagMsg = "the cost-model indicates that interleaving is beneficial " + "but is explicitly disabled or interleave count is set to 1"; + InterleaveLoop = false; + } - // Report the unrolling decision. - emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), - Twine("interleaved by " + Twine(IC) + - " (vectorization not beneficial)")); + // Override IC if user provided an interleave count. + IC = UserIC > 0 ? UserIC : IC; - InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, IC); - Unroller.vectorize(&LVL); + // Emit diagnostic messages, if any. + const char *VAPassName = Hints.vectorizeAnalysisPassName(); + if (!VectorizeLoop && !InterleaveLoop) { + // Do not vectorize or interleaving the loop. + emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F, + L->getStartLoc(), VecDiagMsg); + emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F, + L->getStartLoc(), IntDiagMsg); + return false; + } else if (!VectorizeLoop && InterleaveLoop) { + DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); + emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F, + L->getStartLoc(), VecDiagMsg); + } else if (VectorizeLoop && !InterleaveLoop) { + DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " + << DebugLocStr << '\n'); + emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F, + L->getStartLoc(), IntDiagMsg); + } else if (VectorizeLoop && InterleaveLoop) { + DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " + << DebugLocStr << '\n'); + DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); + } + + if (!VectorizeLoop) { + assert(IC > 1 && "interleave count should not be 1 or 0"); + // If we decided that it is not legal to vectorize the loop then + // interleave it. + InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, IC); + Unroller.vectorize(&LVL, CM.MinBWs); + + emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), + Twine("interleaved loop (interleaved count: ") + + Twine(IC) + ")"); } else { // If we decided that it is *legal* to vectorize the loop then do it. - InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, IC); - LB.vectorize(&LVL); + InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, VF.Width, IC); + LB.vectorize(&LVL, CM.MinBWs); ++LoopsVectorized; // Add metadata to disable runtime unrolling scalar loop when there's no @@ -1686,7 +1896,7 @@ struct LoopVectorize : public FunctionPass { AddRuntimeUnrollDisableMetaData(L); // Report the vectorization decision. - emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), + emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), Twine("vectorized loop (vectorization width: ") + Twine(VF.Width) + ", interleaved count: " + Twine(IC) + ")"); @@ -1703,16 +1913,19 @@ struct LoopVectorize : public FunctionPass { AU.addRequired(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); } }; @@ -1773,6 +1986,7 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr"); + auto *SE = PSE.getSE(); // Make sure that the pointer does not point to structs. if (Ptr->getType()->getPointerElementType()->isAggregateType()) return 0; @@ -1780,11 +1994,11 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { // If this value is a pointer induction variable we know it is consecutive. PHINode *Phi = dyn_cast_or_null(Ptr); if (Phi && Inductions.count(Phi)) { - InductionInfo II = Inductions[Phi]; + InductionDescriptor II = Inductions[Phi]; return II.getConsecutiveDirection(); } - GetElementPtrInst *Gep = dyn_cast_or_null(Ptr); + GetElementPtrInst *Gep = getGEPInstruction(Ptr); if (!Gep) return 0; @@ -1802,10 +2016,10 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { // Make sure that all of the index operands are loop invariant. for (unsigned i = 1; i < NumOperands; ++i) - if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) + if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) return 0; - InductionInfo II = Inductions[Phi]; + InductionDescriptor II = Inductions[Phi]; return II.getConsecutiveDirection(); } @@ -1815,14 +2029,14 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { // operand. for (unsigned i = 0; i != NumOperands; ++i) if (i != InductionOperand && - !SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) + !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) return 0; // We can emit wide load/stores only if the last non-zero index is the // induction variable. const SCEV *Last = nullptr; if (!Strides.count(Gep)) - Last = SE->getSCEV(Gep->getOperand(InductionOperand)); + Last = PSE.getSCEV(Gep->getOperand(InductionOperand)); else { // Because of the multiplication by a stride we can have a s/zext cast. // We are going to replace this stride by 1 so the cast is safe to ignore. @@ -1833,7 +2047,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { // %idxprom = zext i32 %mul to i64 << Safe cast. // %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom // - Last = replaceSymbolicStrideSCEV(SE, Strides, + Last = replaceSymbolicStrideSCEV(PSE, Strides, Gep->getOperand(InductionOperand), Gep); if (const SCEVCastExpr *C = dyn_cast(Last)) Last = @@ -2177,7 +2391,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { VectorParts &Entry = WidenMap.get(Instr); // Handle consecutive loads/stores. - GetElementPtrInst *Gep = dyn_cast(Ptr); + GetElementPtrInst *Gep = getGEPInstruction(Ptr); if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) { setDebugLocFromInst(Builder, Gep); Value *PtrOperand = Gep->getPointerOperand(); @@ -2191,8 +2405,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { Ptr = Builder.Insert(Gep2); } else if (Gep) { setDebugLocFromInst(Builder, Gep); - assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()), - OrigLoop) && "Base ptr must be invariant"); + assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()), + OrigLoop) && + "Base ptr must be invariant"); // The last index does not have to be the induction. It can be // consecutive and be a function of the index. For example A[I+1]; @@ -2209,7 +2424,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { if (i == InductionOperand || (GepOperandInst && OrigLoop->contains(GepOperandInst))) { assert((i == InductionOperand || - SE->isLoopInvariant(SE->getSCEV(GepOperandInst), OrigLoop)) && + PSE.getSE()->isLoopInvariant(PSE.getSCEV(GepOperandInst), + OrigLoop)) && "Must be last index or loop invariant"); VectorParts &GEPParts = getVectorValue(GepOperand); @@ -2237,14 +2453,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { // We don't want to update the value in the map as it might be used in // another expression. So don't use a reference type for "StoredVal". VectorParts StoredVal = getVectorValue(SI->getValueOperand()); - + for (unsigned Part = 0; Part < UF; ++Part) { // Calculate the pointer for the specific unroll-part. Value *PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF)); if (Reverse) { - // If we store to reverse consecutive memory locations then we need + // If we store to reverse consecutive memory locations, then we need // to reverse the order of elements in the stored value. StoredVal[Part] = reverseVector(StoredVal[Part]); // If the address is consecutive but reversed, then the @@ -2298,7 +2514,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { } } -void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredicateStore) { +void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, + bool IfPredicateStore) { assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); // Holds vector parameters or scalars, in case of uniform vals. SmallVector Params; @@ -2318,7 +2535,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic // Try using previously calculated values. Instruction *SrcInst = dyn_cast(SrcOp); - // If the src is an instruction that appeared earlier in the basic block + // If the src is an instruction that appeared earlier in the basic block, // then it should already be vectorized. if (SrcInst && OrigLoop->contains(SrcInst)) { assert(WidenMap.has(SrcInst) && "Source operand is unavailable"); @@ -2343,19 +2560,12 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic // Create a new entry in the WidenMap and initialize it to Undef or Null. VectorParts &VecResults = WidenMap.splat(Instr, UndefVec); - Instruction *InsertPt = Builder.GetInsertPoint(); - BasicBlock *IfBlock = Builder.GetInsertBlock(); - BasicBlock *CondBlock = nullptr; - VectorParts Cond; - Loop *VectorLp = nullptr; if (IfPredicateStore) { assert(Instr->getParent()->getSinglePredecessor() && "Only support single predecessor blocks"); Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(), Instr->getParent()); - VectorLp = LI->getLoopFor(IfBlock); - assert(VectorLp && "Must have a loop for this block"); } // For each vector unroll 'part': @@ -2367,12 +2577,8 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic Value *Cmp = nullptr; if (IfPredicateStore) { Cmp = Builder.CreateExtractElement(Cond[Part], Builder.getInt32(Width)); - Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp, ConstantInt::get(Cmp->getType(), 1)); - CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); - LoopVectorBody.push_back(CondBlock); - VectorLp->addBasicBlockToLoop(CondBlock, *LI); - // Update Builder with newly created basic block. - Builder.SetInsertPoint(InsertPt); + Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp, + ConstantInt::get(Cmp->getType(), 1)); } Instruction *Cloned = Instr->clone(); @@ -2396,85 +2602,223 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic VecResults[Part] = Builder.CreateInsertElement(VecResults[Part], Cloned, Builder.getInt32(Width)); // End if-block. - if (IfPredicateStore) { - BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); - LoopVectorBody.push_back(NewIfBlock); - VectorLp->addBasicBlockToLoop(NewIfBlock, *LI); - Builder.SetInsertPoint(InsertPt); - ReplaceInstWithInst(IfBlock->getTerminator(), - BranchInst::Create(CondBlock, NewIfBlock, Cmp)); - IfBlock = NewIfBlock; - } + if (IfPredicateStore) + PredicatedStores.push_back(std::make_pair(cast(Cloned), + Cmp)); } } } -static Instruction *getFirstInst(Instruction *FirstInst, Value *V, - Instruction *Loc) { - if (FirstInst) - return FirstInst; - if (Instruction *I = dyn_cast(V)) - return I->getParent() == Loc->getParent() ? I : nullptr; - return nullptr; +PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start, + Value *End, Value *Step, + Instruction *DL) { + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + // As we're just creating this loop, it's possible no latch exists + // yet. If so, use the header as this will be a single block loop. + if (!Latch) + Latch = Header; + + IRBuilder<> Builder(&*Header->getFirstInsertionPt()); + setDebugLocFromInst(Builder, getDebugLocFromInstOrOperands(OldInduction)); + auto *Induction = Builder.CreatePHI(Start->getType(), 2, "index"); + + Builder.SetInsertPoint(Latch->getTerminator()); + + // Create i+1 and fill the PHINode. + Value *Next = Builder.CreateAdd(Induction, Step, "index.next"); + Induction->addIncoming(Start, L->getLoopPreheader()); + Induction->addIncoming(Next, Latch); + // Create the compare. + Value *ICmp = Builder.CreateICmpEQ(Next, End); + Builder.CreateCondBr(ICmp, L->getExitBlock(), Header); + + // Now we have two terminators. Remove the old one from the block. + Latch->getTerminator()->eraseFromParent(); + + return Induction; } -std::pair -InnerLoopVectorizer::addStrideCheck(Instruction *Loc) { - Instruction *tnullptr = nullptr; - if (!Legal->mustCheckStrides()) - return std::pair(tnullptr, tnullptr); +Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) { + if (TripCount) + return TripCount; - IRBuilder<> ChkBuilder(Loc); + IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); + // Find the loop boundaries. + ScalarEvolution *SE = PSE.getSE(); + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop); + assert(BackedgeTakenCount != SE->getCouldNotCompute() && + "Invalid loop count"); - // Emit checks. - Value *Check = nullptr; - Instruction *FirstInst = nullptr; - for (SmallPtrSet::iterator SI = Legal->strides_begin(), - SE = Legal->strides_end(); - SI != SE; ++SI) { - Value *Ptr = stripIntegerCast(*SI); - Value *C = ChkBuilder.CreateICmpNE(Ptr, ConstantInt::get(Ptr->getType(), 1), - "stride.chk"); - // Store the first instruction we create. - FirstInst = getFirstInst(FirstInst, C, Loc); - if (Check) - Check = ChkBuilder.CreateOr(Check, C); - else - Check = C; - } + Type *IdxTy = Legal->getWidestInductionType(); + + // The exit count might have the type of i64 while the phi is i32. This can + // happen if we have an induction variable that is sign extended before the + // compare. The only way that we get a backedge taken count is that the + // induction variable was signed and as such will not overflow. In such a case + // truncation is legal. + if (BackedgeTakenCount->getType()->getPrimitiveSizeInBits() > + IdxTy->getPrimitiveSizeInBits()) + BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, IdxTy); + BackedgeTakenCount = SE->getNoopOrZeroExtend(BackedgeTakenCount, IdxTy); + + // Get the total trip count from the count by adding 1. + const SCEV *ExitCount = SE->getAddExpr( + BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType())); - // We have to do this trickery because the IRBuilder might fold the check to a - // constant expression in which case there is no Instruction anchored in a - // the block. - LLVMContext &Ctx = Loc->getContext(); - Instruction *TheCheck = - BinaryOperator::CreateAnd(Check, ConstantInt::getTrue(Ctx)); - ChkBuilder.Insert(TheCheck, "stride.not.one"); - FirstInst = getFirstInst(FirstInst, TheCheck, Loc); + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - return std::make_pair(FirstInst, TheCheck); + // Expand the trip count and place the new instructions in the preheader. + // Notice that the pre-header does not change, only the loop body. + SCEVExpander Exp(*SE, DL, "induction"); + + // Count holds the overall loop count (N). + TripCount = Exp.expandCodeFor(ExitCount, ExitCount->getType(), + L->getLoopPreheader()->getTerminator()); + + if (TripCount->getType()->isPointerTy()) + TripCount = + CastInst::CreatePointerCast(TripCount, IdxTy, + "exitcount.ptrcnt.to.int", + L->getLoopPreheader()->getTerminator()); + + return TripCount; } +Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) { + if (VectorTripCount) + return VectorTripCount; + + Value *TC = getOrCreateTripCount(L); + IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); + + // Now we need to generate the expression for N - (N % VF), which is + // the part that the vectorized body will execute. + // The loop step is equal to the vectorization factor (num of SIMD elements) + // times the unroll factor (num of SIMD instructions). + Constant *Step = ConstantInt::get(TC->getType(), VF * UF); + Value *R = Builder.CreateURem(TC, Step, "n.mod.vf"); + VectorTripCount = Builder.CreateSub(TC, R, "n.vec"); + + return VectorTripCount; +} + +void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, + BasicBlock *Bypass) { + Value *Count = getOrCreateTripCount(L); + BasicBlock *BB = L->getLoopPreheader(); + IRBuilder<> Builder(BB->getTerminator()); + + // Generate code to check that the loop's trip count that we computed by + // adding one to the backedge-taken count will not overflow. + Value *CheckMinIters = + Builder.CreateICmpULT(Count, + ConstantInt::get(Count->getType(), VF * UF), + "min.iters.check"); + + BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), + "min.iters.checked"); + if (L->getParentLoop()) + L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); + ReplaceInstWithInst(BB->getTerminator(), + BranchInst::Create(Bypass, NewBB, CheckMinIters)); + LoopBypassBlocks.push_back(BB); +} + +void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L, + BasicBlock *Bypass) { + Value *TC = getOrCreateVectorTripCount(L); + BasicBlock *BB = L->getLoopPreheader(); + IRBuilder<> Builder(BB->getTerminator()); + + // Now, compare the new count to zero. If it is zero skip the vector loop and + // jump to the scalar loop. + Value *Cmp = Builder.CreateICmpEQ(TC, Constant::getNullValue(TC->getType()), + "cmp.zero"); + + // Generate code to check that the loop's trip count that we computed by + // adding one to the backedge-taken count will not overflow. + BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(), + "vector.ph"); + if (L->getParentLoop()) + L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); + ReplaceInstWithInst(BB->getTerminator(), + BranchInst::Create(Bypass, NewBB, Cmp)); + LoopBypassBlocks.push_back(BB); +} + +void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) { + BasicBlock *BB = L->getLoopPreheader(); + + // Generate the code to check that the SCEV assumptions that we made. + // We want the new basic block to start at the first instruction in a + // sequence of instructions that form a check. + SCEVExpander Exp(*PSE.getSE(), Bypass->getModule()->getDataLayout(), + "scev.check"); + Value *SCEVCheck = + Exp.expandCodeForPredicate(&PSE.getUnionPredicate(), BB->getTerminator()); + + if (auto *C = dyn_cast(SCEVCheck)) + if (C->isZero()) + return; + + // Create a new block containing the stride check. + BB->setName("vector.scevcheck"); + auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); + if (L->getParentLoop()) + L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); + ReplaceInstWithInst(BB->getTerminator(), + BranchInst::Create(Bypass, NewBB, SCEVCheck)); + LoopBypassBlocks.push_back(BB); + AddedSafetyChecks = true; +} + +void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, + BasicBlock *Bypass) { + BasicBlock *BB = L->getLoopPreheader(); + + // Generate the code that checks in runtime if arrays overlap. We put the + // checks into a separate block to make the more common case of few elements + // faster. + Instruction *FirstCheckInst; + Instruction *MemRuntimeCheck; + std::tie(FirstCheckInst, MemRuntimeCheck) = + Legal->getLAI()->addRuntimeChecks(BB->getTerminator()); + if (!MemRuntimeCheck) + return; + + // Create a new block containing the memory check. + BB->setName("vector.memcheck"); + auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph"); + if (L->getParentLoop()) + L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI); + ReplaceInstWithInst(BB->getTerminator(), + BranchInst::Create(Bypass, NewBB, MemRuntimeCheck)); + LoopBypassBlocks.push_back(BB); + AddedSafetyChecks = true; +} + + void InnerLoopVectorizer::createEmptyLoop() { /* In this function we generate a new loop. The new loop will contain the vectorized instructions while the old loop will continue to run the scalar remainder. - [ ] <-- Back-edge taken count overflow check. + [ ] <-- loop iteration number check. / | / v | [ ] <-- vector loop bypass (may consist of multiple blocks). | / | | / v || [ ] <-- vector pre header. - || | - || v - || [ ] \ - || [ ]_| <-- vector loop. - || | - | \ v - | >[ ] <--- middle-block. + |/ | + | v + | [ ] \ + | [ ]_| <-- vector loop. + | | + | v + | -[ ] <--- middle-block. | / | | / v -|- >[ ] <--- new preheader. @@ -2498,65 +2842,16 @@ void InnerLoopVectorizer::createEmptyLoop() { // don't. One example is c++ iterators that often have multiple pointer // induction variables. In the code below we also support a case where we // don't have a single induction variable. + // + // We try to obtain an induction variable from the original loop as hard + // as possible. However if we don't find one that: + // - is an integer + // - counts from zero, stepping by one + // - is the size of the widest induction variable type + // then we create a new one. OldInduction = Legal->getInduction(); Type *IdxTy = Legal->getWidestInductionType(); - // Find the loop boundaries. - const SCEV *ExitCount = SE->getBackedgeTakenCount(OrigLoop); - assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count"); - - // The exit count might have the type of i64 while the phi is i32. This can - // happen if we have an induction variable that is sign extended before the - // compare. The only way that we get a backedge taken count is that the - // induction variable was signed and as such will not overflow. In such a case - // truncation is legal. - if (ExitCount->getType()->getPrimitiveSizeInBits() > - IdxTy->getPrimitiveSizeInBits()) - ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy); - - const SCEV *BackedgeTakeCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy); - // Get the total trip count from the count by adding 1. - ExitCount = SE->getAddExpr(BackedgeTakeCount, - SE->getConstant(BackedgeTakeCount->getType(), 1)); - - const DataLayout &DL = OldBasicBlock->getModule()->getDataLayout(); - - // Expand the trip count and place the new instructions in the preheader. - // Notice that the pre-header does not change, only the loop body. - SCEVExpander Exp(*SE, DL, "induction"); - - // We need to test whether the backedge-taken count is uint##_max. Adding one - // to it will cause overflow and an incorrect loop trip count in the vector - // body. In case of overflow we want to directly jump to the scalar remainder - // loop. - Value *BackedgeCount = - Exp.expandCodeFor(BackedgeTakeCount, BackedgeTakeCount->getType(), - VectorPH->getTerminator()); - if (BackedgeCount->getType()->isPointerTy()) - BackedgeCount = CastInst::CreatePointerCast(BackedgeCount, IdxTy, - "backedge.ptrcnt.to.int", - VectorPH->getTerminator()); - Instruction *CheckBCOverflow = - CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, BackedgeCount, - Constant::getAllOnesValue(BackedgeCount->getType()), - "backedge.overflow", VectorPH->getTerminator()); - - // The loop index does not have to start at Zero. Find the original start - // value from the induction PHI node. If we don't have an induction variable - // then we know that it starts at zero. - Builder.SetInsertPoint(VectorPH->getTerminator()); - Value *StartIdx = ExtendedIdx = - OldInduction - ? Builder.CreateZExt(OldInduction->getIncomingValueForBlock(VectorPH), - IdxTy) - : ConstantInt::get(IdxTy, 0); - - // Count holds the overall loop count (N). - Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(), - VectorPH->getTerminator()); - - LoopBypassBlocks.push_back(VectorPH); - // Split the single block loop into the two loop structure described above. BasicBlock *VecBody = VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body"); @@ -2580,118 +2875,36 @@ void InnerLoopVectorizer::createEmptyLoop() { } Lp->addBasicBlockToLoop(VecBody, *LI); - // Use this IR builder to create the loop instructions (Phi, Br, Cmp) - // inside the loop. - Builder.SetInsertPoint(VecBody->getFirstNonPHI()); + // Find the loop boundaries. + Value *Count = getOrCreateTripCount(Lp); - // Generate the induction variable. - setDebugLocFromInst(Builder, getDebugLocFromInstOrOperands(OldInduction)); - Induction = Builder.CreatePHI(IdxTy, 2, "index"); - // The loop step is equal to the vectorization factor (num of SIMD elements) - // times the unroll factor (num of SIMD instructions). - Constant *Step = ConstantInt::get(IdxTy, VF * UF); - - // Generate code to check that the loop's trip count that we computed by - // adding one to the backedge-taken count will not overflow. - BasicBlock *NewVectorPH = - VectorPH->splitBasicBlock(VectorPH->getTerminator(), "overflow.checked"); - if (ParentLoop) - ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); - ReplaceInstWithInst( - VectorPH->getTerminator(), - BranchInst::Create(ScalarPH, NewVectorPH, CheckBCOverflow)); - VectorPH = NewVectorPH; - - // This is the IR builder that we use to add all of the logic for bypassing - // the new vector loop. - IRBuilder<> BypassBuilder(VectorPH->getTerminator()); - setDebugLocFromInst(BypassBuilder, - getDebugLocFromInstOrOperands(OldInduction)); - - // We may need to extend the index in case there is a type mismatch. - // We know that the count starts at zero and does not overflow. - if (Count->getType() != IdxTy) { - // The exit count can be of pointer type. Convert it to the correct - // integer type. - if (ExitCount->getType()->isPointerTy()) - Count = BypassBuilder.CreatePointerCast(Count, IdxTy, "ptrcnt.to.int"); - else - Count = BypassBuilder.CreateZExtOrTrunc(Count, IdxTy, "cnt.cast"); - } - - // Add the start index to the loop count to get the new end index. - Value *IdxEnd = BypassBuilder.CreateAdd(Count, StartIdx, "end.idx"); - - // Now we need to generate the expression for N - (N % VF), which is - // the part that the vectorized body will execute. - Value *R = BypassBuilder.CreateURem(Count, Step, "n.mod.vf"); - Value *CountRoundDown = BypassBuilder.CreateSub(Count, R, "n.vec"); - Value *IdxEndRoundDown = BypassBuilder.CreateAdd(CountRoundDown, StartIdx, - "end.idx.rnd.down"); + Value *StartIdx = ConstantInt::get(IdxTy, 0); + // We need to test whether the backedge-taken count is uint##_max. Adding one + // to it will cause overflow and an incorrect loop trip count in the vector + // body. In case of overflow we want to directly jump to the scalar remainder + // loop. + emitMinimumIterationCountCheck(Lp, ScalarPH); // Now, compare the new count to zero. If it is zero skip the vector loop and // jump to the scalar loop. - Value *Cmp = - BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero"); - NewVectorPH = - VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); - if (ParentLoop) - ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); - LoopBypassBlocks.push_back(VectorPH); - ReplaceInstWithInst(VectorPH->getTerminator(), - BranchInst::Create(MiddleBlock, NewVectorPH, Cmp)); - VectorPH = NewVectorPH; - - // Generate the code to check that the strides we assumed to be one are really - // one. We want the new basic block to start at the first instruction in a - // sequence of instructions that form a check. - Instruction *StrideCheck; - Instruction *FirstCheckInst; - std::tie(FirstCheckInst, StrideCheck) = - addStrideCheck(VectorPH->getTerminator()); - if (StrideCheck) { - AddedSafetyChecks = true; - // Create a new block containing the stride check. - VectorPH->setName("vector.stridecheck"); - NewVectorPH = - VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); - if (ParentLoop) - ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); - LoopBypassBlocks.push_back(VectorPH); - - // Replace the branch into the memory check block with a conditional branch - // for the "few elements case". - ReplaceInstWithInst( - VectorPH->getTerminator(), - BranchInst::Create(MiddleBlock, NewVectorPH, StrideCheck)); - - VectorPH = NewVectorPH; - } + emitVectorLoopEnteredCheck(Lp, ScalarPH); + // Generate the code to check any assumptions that we've made for SCEV + // expressions. + emitSCEVChecks(Lp, ScalarPH); // Generate the code that checks in runtime if arrays overlap. We put the // checks into a separate block to make the more common case of few elements // faster. - Instruction *MemRuntimeCheck; - std::tie(FirstCheckInst, MemRuntimeCheck) = - Legal->getLAI()->addRuntimeCheck(VectorPH->getTerminator()); - if (MemRuntimeCheck) { - AddedSafetyChecks = true; - // Create a new block containing the memory check. - VectorPH->setName("vector.memcheck"); - NewVectorPH = - VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); - if (ParentLoop) - ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); - LoopBypassBlocks.push_back(VectorPH); - - // Replace the branch into the memory check block with a conditional branch - // for the "few elements case". - ReplaceInstWithInst( - VectorPH->getTerminator(), - BranchInst::Create(MiddleBlock, NewVectorPH, MemRuntimeCheck)); - - VectorPH = NewVectorPH; - } + emitMemRuntimeChecks(Lp, ScalarPH); + + // Generate the induction variable. + // The loop step is equal to the vectorization factor (num of SIMD elements) + // times the unroll factor (num of SIMD instructions). + Value *CountRoundDown = getOrCreateVectorTripCount(Lp); + Constant *Step = ConstantInt::get(IdxTy, VF * UF); + Induction = + createInductionVariable(Lp, StartIdx, CountRoundDown, Step, + getDebugLocFromInstOrOperands(OldInduction)); // We are going to resume the execution of the scalar loop. // Go over all of the induction variables that we found and fix the @@ -2701,152 +2914,60 @@ void InnerLoopVectorizer::createEmptyLoop() { // If we come from a bypass edge then we need to start from the original // start value. - // This variable saves the new starting index for the scalar loop. - PHINode *ResumeIndex = nullptr; + // This variable saves the new starting index for the scalar loop. It is used + // to test if there are any tail iterations left once the vector loop has + // completed. LoopVectorizationLegality::InductionList::iterator I, E; LoopVectorizationLegality::InductionList *List = Legal->getInductionVars(); - // Set builder to point to last bypass block. - BypassBuilder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator()); for (I = List->begin(), E = List->end(); I != E; ++I) { PHINode *OrigPhi = I->first; - LoopVectorizationLegality::InductionInfo II = I->second; - - Type *ResumeValTy = (OrigPhi == OldInduction) ? IdxTy : OrigPhi->getType(); - PHINode *ResumeVal = PHINode::Create(ResumeValTy, 2, "resume.val", - MiddleBlock->getTerminator()); - // We might have extended the type of the induction variable but we need a - // truncated version for the scalar loop. - PHINode *TruncResumeVal = (OrigPhi == OldInduction) ? - PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val", - MiddleBlock->getTerminator()) : nullptr; + InductionDescriptor II = I->second; // Create phi nodes to merge from the backedge-taken check block. - PHINode *BCResumeVal = PHINode::Create(ResumeValTy, 3, "bc.resume.val", + PHINode *BCResumeVal = PHINode::Create(OrigPhi->getType(), 3, + "bc.resume.val", ScalarPH->getTerminator()); - BCResumeVal->addIncoming(ResumeVal, MiddleBlock); - - PHINode *BCTruncResumeVal = nullptr; + Value *EndValue; if (OrigPhi == OldInduction) { - BCTruncResumeVal = - PHINode::Create(OrigPhi->getType(), 2, "bc.trunc.resume.val", - ScalarPH->getTerminator()); - BCTruncResumeVal->addIncoming(TruncResumeVal, MiddleBlock); - } - - Value *EndValue = nullptr; - switch (II.IK) { - case LoopVectorizationLegality::IK_NoInduction: - llvm_unreachable("Unknown induction"); - case LoopVectorizationLegality::IK_IntInduction: { - // Handle the integer induction counter. - assert(OrigPhi->getType()->isIntegerTy() && "Invalid type"); - - // We have the canonical induction variable. - if (OrigPhi == OldInduction) { - // Create a truncated version of the resume value for the scalar loop, - // we might have promoted the type to a larger width. - EndValue = - BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType()); - // The new PHI merges the original incoming value, in case of a bypass, - // or the value at the end of the vectorized loop. - for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) - TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]); - TruncResumeVal->addIncoming(EndValue, VecBody); - - BCTruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[0]); - - // We know what the end value is. - EndValue = IdxEndRoundDown; - // We also know which PHI node holds it. - ResumeIndex = ResumeVal; - break; - } - - // Not the canonical induction variable - add the vector loop count to the - // start value. - Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown, - II.StartValue->getType(), - "cast.crd"); - EndValue = II.transform(BypassBuilder, CRD); + // We know what the end value is. + EndValue = CountRoundDown; + } else { + IRBuilder<> B(LoopBypassBlocks.back()->getTerminator()); + Value *CRD = B.CreateSExtOrTrunc(CountRoundDown, + II.getStepValue()->getType(), + "cast.crd"); + EndValue = II.transform(B, CRD); EndValue->setName("ind.end"); - break; } - case LoopVectorizationLegality::IK_PtrInduction: { - Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown, - II.StepValue->getType(), - "cast.crd"); - EndValue = II.transform(BypassBuilder, CRD); - EndValue->setName("ptr.ind.end"); - break; - } - }// end of case // The new PHI merges the original incoming value, in case of a bypass, // or the value at the end of the vectorized loop. - for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) { - if (OrigPhi == OldInduction) - ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]); - else - ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]); - } - ResumeVal->addIncoming(EndValue, VecBody); + BCResumeVal->addIncoming(EndValue, MiddleBlock); // Fix the scalar body counter (PHI node). unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH); // The old induction's phi node in the scalar body needs the truncated // value. - if (OrigPhi == OldInduction) { - BCResumeVal->addIncoming(StartIdx, LoopBypassBlocks[0]); - OrigPhi->setIncomingValue(BlockIdx, BCTruncResumeVal); - } else { - BCResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[0]); - OrigPhi->setIncomingValue(BlockIdx, BCResumeVal); - } + for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) + BCResumeVal->addIncoming(II.getStartValue(), LoopBypassBlocks[I]); + OrigPhi->setIncomingValue(BlockIdx, BCResumeVal); } - // If we are generating a new induction variable then we also need to - // generate the code that calculates the exit value. This value is not - // simply the end of the counter because we may skip the vectorized body - // in case of a runtime check. - if (!OldInduction){ - assert(!ResumeIndex && "Unexpected resume value found"); - ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val", - MiddleBlock->getTerminator()); - for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) - ResumeIndex->addIncoming(StartIdx, LoopBypassBlocks[I]); - ResumeIndex->addIncoming(IdxEndRoundDown, VecBody); - } - - // Make sure that we found the index where scalar loop needs to continue. - assert(ResumeIndex && ResumeIndex->getType()->isIntegerTy() && - "Invalid resume Index"); - // Add a check in the middle block to see if we have completed // all of the iterations in the first vector loop. // If (N - N%VF) == N, then we *don't* need to run the remainder. - Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEnd, - ResumeIndex, "cmp.n", + Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count, + CountRoundDown, "cmp.n", MiddleBlock->getTerminator()); ReplaceInstWithInst(MiddleBlock->getTerminator(), BranchInst::Create(ExitBlock, ScalarPH, CmpN)); - // Create i+1 and fill the PHINode. - Value *NextIdx = Builder.CreateAdd(Induction, Step, "index.next"); - Induction->addIncoming(StartIdx, VectorPH); - Induction->addIncoming(NextIdx, VecBody); - // Create the compare. - Value *ICmp = Builder.CreateICmpEQ(NextIdx, IdxEndRoundDown); - Builder.CreateCondBr(ICmp, MiddleBlock, VecBody); - - // Now we have two terminators. Remove the old one from the block. - VecBody->getTerminator()->eraseFromParent(); - // Get ready to start creating new instructions into the vectorized body. - Builder.SetInsertPoint(VecBody->getFirstInsertionPt()); + Builder.SetInsertPoint(&*VecBody->getFirstInsertionPt()); // Save the state. - LoopVectorPreHeader = VectorPH; + LoopVectorPreHeader = Lp->getLoopPreheader(); LoopScalarPreHeader = ScalarPH; LoopMiddleBlock = MiddleBlock; LoopExitBlock = ExitBlock; @@ -2899,7 +3020,7 @@ static void cse(SmallVector &BBs) { for (unsigned i = 0, e = BBs.size(); i != e; ++i) { BasicBlock *BB = BBs[i]; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { - Instruction *In = I++; + Instruction *In = &*I++; if (!CSEDenseMapInfo::canHandle(In)) continue; @@ -3021,6 +3142,117 @@ static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF, return TTI.getIntrinsicInstrCost(ID, RetTy, Tys); } +static Type *smallestIntegerVectorType(Type *T1, Type *T2) { + IntegerType *I1 = cast(T1->getVectorElementType()); + IntegerType *I2 = cast(T2->getVectorElementType()); + return I1->getBitWidth() < I2->getBitWidth() ? T1 : T2; +} +static Type *largestIntegerVectorType(Type *T1, Type *T2) { + IntegerType *I1 = cast(T1->getVectorElementType()); + IntegerType *I2 = cast(T2->getVectorElementType()); + return I1->getBitWidth() > I2->getBitWidth() ? T1 : T2; +} + +void InnerLoopVectorizer::truncateToMinimalBitwidths() { + // For every instruction `I` in MinBWs, truncate the operands, create a + // truncated version of `I` and reextend its result. InstCombine runs + // later and will remove any ext/trunc pairs. + // + for (auto &KV : MinBWs) { + VectorParts &Parts = WidenMap.get(KV.first); + for (Value *&I : Parts) { + if (I->use_empty()) + continue; + Type *OriginalTy = I->getType(); + Type *ScalarTruncatedTy = IntegerType::get(OriginalTy->getContext(), + KV.second); + Type *TruncatedTy = VectorType::get(ScalarTruncatedTy, + OriginalTy->getVectorNumElements()); + if (TruncatedTy == OriginalTy) + continue; + + IRBuilder<> B(cast(I)); + auto ShrinkOperand = [&](Value *V) -> Value* { + if (auto *ZI = dyn_cast(V)) + if (ZI->getSrcTy() == TruncatedTy) + return ZI->getOperand(0); + return B.CreateZExtOrTrunc(V, TruncatedTy); + }; + + // The actual instruction modification depends on the instruction type, + // unfortunately. + Value *NewI = nullptr; + if (BinaryOperator *BO = dyn_cast(I)) { + NewI = B.CreateBinOp(BO->getOpcode(), + ShrinkOperand(BO->getOperand(0)), + ShrinkOperand(BO->getOperand(1))); + cast(NewI)->copyIRFlags(I); + } else if (ICmpInst *CI = dyn_cast(I)) { + NewI = B.CreateICmp(CI->getPredicate(), + ShrinkOperand(CI->getOperand(0)), + ShrinkOperand(CI->getOperand(1))); + } else if (SelectInst *SI = dyn_cast(I)) { + NewI = B.CreateSelect(SI->getCondition(), + ShrinkOperand(SI->getTrueValue()), + ShrinkOperand(SI->getFalseValue())); + } else if (CastInst *CI = dyn_cast(I)) { + switch (CI->getOpcode()) { + default: llvm_unreachable("Unhandled cast!"); + case Instruction::Trunc: + NewI = ShrinkOperand(CI->getOperand(0)); + break; + case Instruction::SExt: + NewI = B.CreateSExtOrTrunc(CI->getOperand(0), + smallestIntegerVectorType(OriginalTy, + TruncatedTy)); + break; + case Instruction::ZExt: + NewI = B.CreateZExtOrTrunc(CI->getOperand(0), + smallestIntegerVectorType(OriginalTy, + TruncatedTy)); + break; + } + } else if (ShuffleVectorInst *SI = dyn_cast(I)) { + auto Elements0 = SI->getOperand(0)->getType()->getVectorNumElements(); + auto *O0 = + B.CreateZExtOrTrunc(SI->getOperand(0), + VectorType::get(ScalarTruncatedTy, Elements0)); + auto Elements1 = SI->getOperand(1)->getType()->getVectorNumElements(); + auto *O1 = + B.CreateZExtOrTrunc(SI->getOperand(1), + VectorType::get(ScalarTruncatedTy, Elements1)); + + NewI = B.CreateShuffleVector(O0, O1, SI->getMask()); + } else if (isa(I)) { + // Don't do anything with the operands, just extend the result. + continue; + } else { + llvm_unreachable("Unhandled instruction type!"); + } + + // Lastly, extend the result. + NewI->takeName(cast(I)); + Value *Res = B.CreateZExtOrTrunc(NewI, OriginalTy); + I->replaceAllUsesWith(Res); + cast(I)->eraseFromParent(); + I = Res; + } + } + + // We'll have created a bunch of ZExts that are now parentless. Clean up. + for (auto &KV : MinBWs) { + VectorParts &Parts = WidenMap.get(KV.first); + for (Value *&I : Parts) { + ZExtInst *Inst = dyn_cast(I); + if (Inst && Inst->use_empty()) { + Value *NewI = Inst->getOperand(0); + Inst->eraseFromParent(); + I = NewI; + } + } + } +} + void InnerLoopVectorizer::vectorizeLoop() { //===------------------------------------------------===// // @@ -3051,6 +3283,11 @@ void InnerLoopVectorizer::vectorizeLoop() { be = DFS.endRPO(); bb != be; ++bb) vectorizeBlockInLoop(*bb, &RdxPHIsToFix); + // Insert truncates and extends for any truncated instructions as hints to + // InstCombine. + if (VF > 1) + truncateToMinimalBitwidths(); + // At this point every instruction in the original loop is widened to // a vector form. We are almost done. Now, we need to fix the PHI nodes // that we vectorized. The PHI nodes are currently empty because we did @@ -3066,7 +3303,7 @@ void InnerLoopVectorizer::vectorizeLoop() { assert(RdxPhi && "Unable to recover vectorized PHI"); // Find the reduction variable descriptor. - assert(Legal->getReductionVars()->count(RdxPhi) && + assert(Legal->isReductionVariable(RdxPhi) && "Unable to find the reduction variable"); RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[RdxPhi]; @@ -3141,21 +3378,33 @@ void InnerLoopVectorizer::vectorizeLoop() { // the PHIs and the values we are going to write. // This allows us to write both PHINodes and the extractelement // instructions. - Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt()); + Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt()); - VectorParts RdxParts; + VectorParts RdxParts = getVectorValue(LoopExitInst); setDebugLocFromInst(Builder, LoopExitInst); - for (unsigned part = 0; part < UF; ++part) { - // This PHINode contains the vectorized reduction variable, or - // the initial value vector, if we bypass the vector loop. - VectorParts &RdxExitVal = getVectorValue(LoopExitInst); - PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi"); - Value *StartVal = (part == 0) ? VectorStart : Identity; - for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) - NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]); - NewPhi->addIncoming(RdxExitVal[part], - LoopVectorBody.back()); - RdxParts.push_back(NewPhi); + + // If the vector reduction can be performed in a smaller type, we truncate + // then extend the loop exit value to enable InstCombine to evaluate the + // entire expression in the smaller type. + if (VF > 1 && RdxPhi->getType() != RdxDesc.getRecurrenceType()) { + Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF); + Builder.SetInsertPoint(LoopVectorBody.back()->getTerminator()); + for (unsigned part = 0; part < UF; ++part) { + Value *Trunc = Builder.CreateTrunc(RdxParts[part], RdxVecTy); + Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy) + : Builder.CreateZExt(Trunc, VecTy); + for (Value::user_iterator UI = RdxParts[part]->user_begin(); + UI != RdxParts[part]->user_end();) + if (*UI != Trunc) { + (*UI++)->replaceUsesOfWith(RdxParts[part], Extnd); + RdxParts[part] = Extnd; + } else { + ++UI; + } + } + Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt()); + for (unsigned part = 0; part < UF; ++part) + RdxParts[part] = Builder.CreateTrunc(RdxParts[part], RdxVecTy); } // Reduce all of the unrolled parts into a single vector. @@ -3208,13 +3457,22 @@ void InnerLoopVectorizer::vectorizeLoop() { // The result is in the first element of the vector. ReducedPartRdx = Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); + + // If the reduction can be performed in a smaller type, we need to extend + // the reduction to the wider type before we branch to the original loop. + if (RdxPhi->getType() != RdxDesc.getRecurrenceType()) + ReducedPartRdx = + RdxDesc.isSigned() + ? Builder.CreateSExt(ReducedPartRdx, RdxPhi->getType()) + : Builder.CreateZExt(ReducedPartRdx, RdxPhi->getType()); } // Create a phi node that merges control-flow from the backedge-taken check // block and the middle block. PHINode *BCBlockPhi = PHINode::Create(RdxPhi->getType(), 2, "bc.merge.rdx", LoopScalarPreHeader->getTerminator()); - BCBlockPhi->addIncoming(ReductionStartValue, LoopBypassBlocks[0]); + for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) + BCBlockPhi->addIncoming(ReductionStartValue, LoopBypassBlocks[I]); BCBlockPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock); // Now, we need to fix the users of the reduction variable @@ -3252,6 +3510,20 @@ void InnerLoopVectorizer::vectorizeLoop() { fixLCSSAPHIs(); + // Make sure DomTree is updated. + updateAnalysis(); + + // Predicate any stores. + for (auto KV : PredicatedStores) { + BasicBlock::iterator I(KV.first); + auto *BB = SplitBlock(I->getParent(), &*std::next(I), DT, LI); + auto *T = SplitBlockAndInsertIfThen(KV.second, &*I, /*Unreachable=*/false, + /*BranchWeights=*/nullptr, DT); + I->moveBefore(T); + I->getParent()->setName("pred.store.if"); + BB->setName("pred.store.continue"); + } + DEBUG(DT->verifyDomTree()); // Remove redundant induction instructions. cse(LoopVectorBody); } @@ -3326,18 +3598,18 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { return BlockMask; } -void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, - InnerLoopVectorizer::VectorParts &Entry, - unsigned UF, unsigned VF, PhiVector *PV) { +void InnerLoopVectorizer::widenPHIInstruction( + Instruction *PN, InnerLoopVectorizer::VectorParts &Entry, unsigned UF, + unsigned VF, PhiVector *PV) { PHINode* P = cast(PN); // Handle reduction variables: - if (Legal->getReductionVars()->count(P)) { + if (Legal->isReductionVariable(P)) { for (unsigned part = 0; part < UF; ++part) { // This is phase one of vectorizing PHIs. Type *VecTy = (VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF); - Entry[part] = PHINode::Create(VecTy, 2, "vec.phi", - LoopVectorBody.back()-> getFirstInsertionPt()); + Entry[part] = PHINode::Create( + VecTy, 2, "vec.phi", &*LoopVectorBody.back()->getFirstInsertionPt()); } PV->push_back(P); return; @@ -3385,53 +3657,44 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, assert(Legal->getInductionVars()->count(P) && "Not an induction variable"); - LoopVectorizationLegality::InductionInfo II = - Legal->getInductionVars()->lookup(P); + InductionDescriptor II = Legal->getInductionVars()->lookup(P); // FIXME: The newly created binary instructions should contain nsw/nuw flags, // which can be found from the original scalar operations. - switch (II.IK) { - case LoopVectorizationLegality::IK_NoInduction: + switch (II.getKind()) { + case InductionDescriptor::IK_NoInduction: llvm_unreachable("Unknown induction"); - case LoopVectorizationLegality::IK_IntInduction: { - assert(P->getType() == II.StartValue->getType() && "Types must match"); - Type *PhiTy = P->getType(); - Value *Broadcasted; - if (P == OldInduction) { - // Handle the canonical induction variable. We might have had to - // extend the type. - Broadcasted = Builder.CreateTrunc(Induction, PhiTy); - } else { - // Handle other induction variables that are now based on the - // canonical one. - Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx, - "normalized.idx"); - NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy); - Broadcasted = II.transform(Builder, NormalizedIdx); - Broadcasted->setName("offset.idx"); + case InductionDescriptor::IK_IntInduction: { + assert(P->getType() == II.getStartValue()->getType() && + "Types must match"); + // Handle other induction variables that are now based on the + // canonical one. + Value *V = Induction; + if (P != OldInduction) { + V = Builder.CreateSExtOrTrunc(Induction, P->getType()); + V = II.transform(Builder, V); + V->setName("offset.idx"); } - Broadcasted = getBroadcastInstrs(Broadcasted); + Value *Broadcasted = getBroadcastInstrs(V); // After broadcasting the induction variable we need to make the vector // consecutive by adding 0, 1, 2, etc. for (unsigned part = 0; part < UF; ++part) - Entry[part] = getStepVector(Broadcasted, VF * part, II.StepValue); + Entry[part] = getStepVector(Broadcasted, VF * part, II.getStepValue()); return; } - case LoopVectorizationLegality::IK_PtrInduction: + case InductionDescriptor::IK_PtrInduction: // Handle the pointer induction variable case. assert(P->getType()->isPointerTy() && "Unexpected type."); // This is the normalized GEP that starts counting at zero. - Value *NormalizedIdx = - Builder.CreateSub(Induction, ExtendedIdx, "normalized.idx"); - NormalizedIdx = - Builder.CreateSExtOrTrunc(NormalizedIdx, II.StepValue->getType()); + Value *PtrInd = Induction; + PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStepValue()->getType()); // This is the vector of results. Notice that we don't generate // vector geps because scalar geps result in better code. for (unsigned part = 0; part < UF; ++part) { if (VF == 1) { int EltIndex = part; - Constant *Idx = ConstantInt::get(NormalizedIdx->getType(), EltIndex); - Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx); + Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex); + Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); Value *SclrGep = II.transform(Builder, GlobalIdx); SclrGep->setName("next.gep"); Entry[part] = SclrGep; @@ -3441,8 +3704,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF)); for (unsigned int i = 0; i < VF; ++i) { int EltIndex = i + part * VF; - Constant *Idx = ConstantInt::get(NormalizedIdx->getType(), EltIndex); - Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx); + Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex); + Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx); Value *SclrGep = II.transform(Builder, GlobalIdx); SclrGep->setName("next.gep"); VecVal = Builder.CreateInsertElement(VecVal, SclrGep, @@ -3458,7 +3721,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { // For each instruction in the old loop. for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { - VectorParts &Entry = WidenMap.get(it); + VectorParts &Entry = WidenMap.get(&*it); + switch (it->getOpcode()) { case Instruction::Br: // Nothing to do for PHIs and BR, since we already took care of the @@ -3466,7 +3730,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { continue; case Instruction::PHI: { // Vectorize PHINodes. - widenPHIInstruction(it, Entry, UF, VF, PV); + widenPHIInstruction(&*it, Entry, UF, VF, PV); continue; }// End of PHI. @@ -3504,16 +3768,17 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { Entry[Part] = V; } - propagateMetadata(Entry, it); + propagateMetadata(Entry, &*it); break; } case Instruction::Select: { // Widen selects. // If the selector is loop invariant we can create a select // instruction with a scalar condition. Otherwise, use vector-select. - bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(it->getOperand(0)), - OrigLoop); - setDebugLocFromInst(Builder, it); + auto *SE = PSE.getSE(); + bool InvariantCond = + SE->isLoopInvariant(PSE.getSCEV(it->getOperand(0)), OrigLoop); + setDebugLocFromInst(Builder, &*it); // The condition can be loop invariant but still defined inside the // loop. This means that we can't just use the original 'cond' value. @@ -3522,7 +3787,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { VectorParts &Cond = getVectorValue(it->getOperand(0)); VectorParts &Op0 = getVectorValue(it->getOperand(1)); VectorParts &Op1 = getVectorValue(it->getOperand(2)); - + Value *ScalarCond = (VF == 1) ? Cond[0] : Builder.CreateExtractElement(Cond[0], Builder.getInt32(0)); @@ -3533,7 +3798,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { Op1[Part]); } - propagateMetadata(Entry, it); + propagateMetadata(Entry, &*it); break; } @@ -3542,25 +3807,27 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { // Widen compares. Generate vector compares. bool FCmp = (it->getOpcode() == Instruction::FCmp); CmpInst *Cmp = dyn_cast(it); - setDebugLocFromInst(Builder, it); + setDebugLocFromInst(Builder, &*it); VectorParts &A = getVectorValue(it->getOperand(0)); VectorParts &B = getVectorValue(it->getOperand(1)); for (unsigned Part = 0; Part < UF; ++Part) { Value *C = nullptr; - if (FCmp) + if (FCmp) { C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]); - else + cast(C)->copyFastMathFlags(&*it); + } else { C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]); + } Entry[Part] = C; } - propagateMetadata(Entry, it); + propagateMetadata(Entry, &*it); break; } case Instruction::Store: case Instruction::Load: - vectorizeMemoryInstruction(it); + vectorizeMemoryInstruction(&*it); break; case Instruction::ZExt: case Instruction::SExt: @@ -3575,7 +3842,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { case Instruction::FPTrunc: case Instruction::BitCast: { CastInst *CI = dyn_cast(it); - setDebugLocFromInst(Builder, it); + setDebugLocFromInst(Builder, &*it); /// Optimize the special case where the source is the induction /// variable. Notice that we can only optimize the 'trunc' case /// because: a. FP conversions lose precision, b. sext/zext may wrap, @@ -3585,13 +3852,13 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction, CI->getType()); Value *Broadcasted = getBroadcastInstrs(ScalarCast); - LoopVectorizationLegality::InductionInfo II = + InductionDescriptor II = Legal->getInductionVars()->lookup(OldInduction); - Constant *Step = - ConstantInt::getSigned(CI->getType(), II.StepValue->getSExtValue()); + Constant *Step = ConstantInt::getSigned( + CI->getType(), II.getStepValue()->getSExtValue()); for (unsigned Part = 0; Part < UF; ++Part) Entry[Part] = getStepVector(Broadcasted, VF * Part, Step); - propagateMetadata(Entry, it); + propagateMetadata(Entry, &*it); break; } /// Vectorize casts. @@ -3601,7 +3868,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { VectorParts &A = getVectorValue(it->getOperand(0)); for (unsigned Part = 0; Part < UF; ++Part) Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy); - propagateMetadata(Entry, it); + propagateMetadata(Entry, &*it); break; } @@ -3609,7 +3876,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { // Ignore dbg intrinsics. if (isa(it)) break; - setDebugLocFromInst(Builder, it); + setDebugLocFromInst(Builder, &*it); Module *M = BB->getParent()->getParent(); CallInst *CI = cast(it); @@ -3625,7 +3892,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || ID == Intrinsic::lifetime_start)) { - scalarizeInstruction(it); + scalarizeInstruction(&*it); break; } // The flag shows whether we use Intrinsic or a usual Call for vectorized @@ -3636,7 +3903,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { bool UseVectorIntrinsic = ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost; if (!UseVectorIntrinsic && NeedToScalarize) { - scalarizeInstruction(it); + scalarizeInstruction(&*it); break; } @@ -3677,13 +3944,13 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { Entry[Part] = Builder.CreateCall(VectorF, Args); } - propagateMetadata(Entry, it); + propagateMetadata(Entry, &*it); break; } default: // All other instructions are unsupported. Scalarize them. - scalarizeInstruction(it); + scalarizeInstruction(&*it); break; }// end of switch. }// end of for_each instr. @@ -3691,7 +3958,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { void InnerLoopVectorizer::updateAnalysis() { // Forget the original basic block. - SE->forgetLoop(OrigLoop); + PSE.getSE()->forgetLoop(OrigLoop); // Update the dominator tree information. assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) && @@ -3701,19 +3968,12 @@ void InnerLoopVectorizer::updateAnalysis() { DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]); DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back()); - // Due to if predication of stores we might create a sequence of "if(pred) - // a[i] = ...; " blocks. - for (unsigned i = 0, e = LoopVectorBody.size(); i != e; ++i) { - if (i == 0) - DT->addNewBlock(LoopVectorBody[0], LoopVectorPreHeader); - else if (isPredicatedBlock(i)) { - DT->addNewBlock(LoopVectorBody[i], LoopVectorBody[i-1]); - } else { - DT->addNewBlock(LoopVectorBody[i], LoopVectorBody[i-2]); - } - } + // We don't predicate stores by this point, so the vector body should be a + // single loop. + assert(LoopVectorBody.size() == 1 && "Expected single block loop!"); + DT->addNewBlock(LoopVectorBody[0], LoopVectorPreHeader); - DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks[1]); + DT->addNewBlock(LoopMiddleBlock, LoopVectorBody.back()); DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]); DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader); DT->changeImmediateDominator(LoopExitBlock, LoopBypassBlocks[0]); @@ -3850,10 +4110,10 @@ bool LoopVectorizationLegality::canVectorize() { } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); - if (ExitCount == SE->getCouldNotCompute()) { - emitAnalysis(VectorizationReport() << - "could not determine number of loop iterations"); + const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); + if (ExitCount == PSE.getSE()->getCouldNotCompute()) { + emitAnalysis(VectorizationReport() + << "could not determine number of loop iterations"); DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); return false; } @@ -3879,10 +4139,28 @@ bool LoopVectorizationLegality::canVectorize() { : "") << "!\n"); + bool UseInterleaved = TTI->enableInterleavedAccessVectorization(); + + // If an override option has been passed in for interleaved accesses, use it. + if (EnableInterleavedMemAccesses.getNumOccurrences() > 0) + UseInterleaved = EnableInterleavedMemAccesses; + // Analyze interleaved memory accesses. - if (EnableInterleavedMemAccesses) + if (UseInterleaved) InterleaveInfo.analyzeInterleaving(Strides); + unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; + if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) + SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; + + if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { + emitAnalysis(VectorizationReport() + << "Too many SCEV assumptions need to be made and checked " + << "at runtime"); + DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n"); + return false; + } + // Okay! We can vectorize. At this point we don't have any other mem analysis // which may limit our maximum vectorization factor, so just return true with // no restrictions. @@ -3929,7 +4207,6 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, } bool LoopVectorizationLegality::canVectorizeInstrs() { - BasicBlock *PreHeader = TheLoop->getLoopPreheader(); BasicBlock *Header = TheLoop->getHeader(); // Look for the attribute signaling the absence of NaNs. @@ -3953,7 +4230,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && !PhiTy->isPointerTy()) { - emitAnalysis(VectorizationReport(it) + emitAnalysis(VectorizationReport(&*it) << "loop control flow is not understood by vectorizer"); DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n"); return false; @@ -3965,9 +4242,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (*bb != Header) { // Check that this instruction has no outside users or is an // identified reduction value with an outside user. - if (!hasOutsideLoopUser(TheLoop, it, AllowedExit)) + if (!hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) continue; - emitAnalysis(VectorizationReport(it) << + emitAnalysis(VectorizationReport(&*it) << "value could not be identified as " "an induction or reduction variable"); return false; @@ -3975,19 +4252,15 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // We only allow if-converted PHIs with exactly two incoming values. if (Phi->getNumIncomingValues() != 2) { - emitAnalysis(VectorizationReport(it) + emitAnalysis(VectorizationReport(&*it) << "control flow not understood by vectorizer"); DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); return false; } - // This is the value coming from the preheader. - Value *StartValue = Phi->getIncomingValueForBlock(PreHeader); - ConstantInt *StepValue = nullptr; - // Check if this is an induction variable. - InductionKind IK = isInductionVariable(Phi, StepValue); - - if (IK_NoInduction != IK) { + InductionDescriptor ID; + if (InductionDescriptor::isInductionPHI(Phi, PSE.getSE(), ID)) { + Inductions[Phi] = ID; // Get the widest type. if (!WidestIndTy) WidestIndTy = convertPointerToIntegerType(DL, PhiTy); @@ -3995,21 +4268,24 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); // Int inductions are special because we only allow one IV. - if (IK == IK_IntInduction && StepValue->isOne()) { + if (ID.getKind() == InductionDescriptor::IK_IntInduction && + ID.getStepValue()->isOne() && + isa(ID.getStartValue()) && + cast(ID.getStartValue())->isNullValue()) { // Use the phi node with the widest type as induction. Use the last // one if there are multiple (no good reason for doing this other - // than it is expedient). + // than it is expedient). We've checked that it begins at zero and + // steps by one, so this is a canonical induction variable. if (!Induction || PhiTy == WidestIndTy) Induction = Phi; } DEBUG(dbgs() << "LV: Found an induction variable.\n"); - Inductions[Phi] = InductionInfo(StartValue, IK, StepValue); // Until we explicitly handle the case of an induction variable with // an outside loop user we have to give up vectorizing this loop. - if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) { - emitAnalysis(VectorizationReport(it) << + if (hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) { + emitAnalysis(VectorizationReport(&*it) << "use of induction value outside of the " "loop is not handled by vectorizer"); return false; @@ -4020,11 +4296,14 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, Reductions[Phi])) { + if (Reductions[Phi].hasUnsafeAlgebra()) + Requirements->addUnsafeAlgebraInst( + Reductions[Phi].getUnsafeAlgebraInst()); AllowedExit.insert(Reductions[Phi].getLoopExitInstr()); continue; } - emitAnalysis(VectorizationReport(it) << + emitAnalysis(VectorizationReport(&*it) << "value that could not be identified as " "reduction is used outside the loop"); DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n"); @@ -4039,8 +4318,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa(CI) && !(CI->getCalledFunction() && TLI && TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) { - emitAnalysis(VectorizationReport(it) << - "call instruction cannot be vectorized"); + emitAnalysis(VectorizationReport(&*it) + << "call instruction cannot be vectorized"); DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n"); return false; } @@ -4049,8 +4328,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // second argument is the same (i.e. loop invariant) if (CI && hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) { - if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) { - emitAnalysis(VectorizationReport(it) + auto *SE = PSE.getSE(); + if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) { + emitAnalysis(VectorizationReport(&*it) << "intrinsic instruction cannot be vectorized"); DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n"); return false; @@ -4061,7 +4341,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Also, we can't vectorize extractelement instructions. if ((!VectorType::isValidElementType(it->getType()) && !it->getType()->isVoidTy()) || isa(it)) { - emitAnalysis(VectorizationReport(it) + emitAnalysis(VectorizationReport(&*it) << "instruction return type cannot be vectorized"); DEBUG(dbgs() << "LV: Found unvectorizable type.\n"); return false; @@ -4085,8 +4365,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Reduction instructions are allowed to have exit users. // All other instructions must not have external users. - if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) { - emitAnalysis(VectorizationReport(it) << + if (hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) { + emitAnalysis(VectorizationReport(&*it) << "value cannot be used outside the loop"); return false; } @@ -4104,6 +4384,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { } } + // Now we know the widest induction type, check if our found induction + // is the same size. If it's not, unset it here and InnerLoopVectorizer + // will create another. + if (Induction && WidestIndTy != Induction->getType()) + Induction = nullptr; + return true; } @@ -4116,7 +4402,7 @@ void LoopVectorizationLegality::collectStridedAccess(Value *MemAccess) { else return; - Value *Stride = getStrideFromPointer(Ptr, SE, TheLoop); + Value *Stride = getStrideFromPointer(Ptr, PSE.getSE(), TheLoop); if (!Stride) return; @@ -4142,7 +4428,7 @@ void LoopVectorizationLegality::collectLoopUniforms() { BE = TheLoop->block_end(); B != BE; ++B) for (BasicBlock::iterator I = (*B)->begin(), IE = (*B)->end(); I != IE; ++I) - if (I->getType()->isPointerTy() && isConsecutivePtr(I)) + if (I->getType()->isPointerTy() && isConsecutivePtr(&*I)) Worklist.insert(Worklist.end(), I->op_begin(), I->op_end()); while (!Worklist.empty()) { @@ -4179,32 +4465,12 @@ bool LoopVectorizationLegality::canVectorizeMemory() { return false; } - if (LAI->getNumRuntimePointerChecks() > - VectorizerParams::RuntimeMemoryCheckThreshold) { - emitAnalysis(VectorizationReport() - << LAI->getNumRuntimePointerChecks() << " exceeds limit of " - << VectorizerParams::RuntimeMemoryCheckThreshold - << " dependent memory operations checked at runtime"); - DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); - return false; - } + Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); + PSE.addPredicate(LAI->PSE.getUnionPredicate()); + return true; } -LoopVectorizationLegality::InductionKind -LoopVectorizationLegality::isInductionVariable(PHINode *Phi, - ConstantInt *&StepValue) { - if (!isInductionPHI(Phi, SE, StepValue)) - return IK_NoInduction; - - Type *PhiTy = Phi->getType(); - // Found an Integer induction variable. - if (PhiTy->isIntegerTy()) - return IK_IntInduction; - // Found an Pointer induction variable. - return IK_PtrInduction; -} - bool LoopVectorizationLegality::isInductionVariable(const Value *V) { Value *In0 = const_cast(V); PHINode *PN = dyn_cast_or_null(In0); @@ -4256,8 +4522,8 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB, if (++NumPredStores > NumberOfStoresToPredicate || !isSafePtr || !isSinglePredecessor) { - // Build a masked store if it is legal for the target, otherwise scalarize - // the block. + // Build a masked store if it is legal for the target, otherwise + // scalarize the block. bool isLegalMaskedOp = isLegalMaskedStore(SI->getValueOperand()->getType(), SI->getPointerOperand()); @@ -4315,7 +4581,7 @@ void InterleavedAccessInfo::collectConstStridedAccesses( StoreInst *SI = dyn_cast(I); Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); - int Stride = isStridedPtr(SE, Ptr, TheLoop, Strides); + int Stride = isStridedPtr(PSE, Ptr, TheLoop, Strides); // The factor of the corresponding interleave group. unsigned Factor = std::abs(Stride); @@ -4324,7 +4590,7 @@ void InterleavedAccessInfo::collectConstStridedAccesses( if (Factor < 2 || Factor > MaxInterleaveGroupFactor) continue; - const SCEV *Scev = replaceSymbolicStrideSCEV(SE, Strides, Ptr); + const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); PointerType *PtrTy = dyn_cast(Ptr->getType()); unsigned Size = DL.getTypeAllocSize(PtrTy->getElementType()); @@ -4411,12 +4677,12 @@ void InterleavedAccessInfo::analyzeInterleaving( continue; // Calculate the distance and prepare for the rule 3. - const SCEVConstant *DistToA = - dyn_cast(SE->getMinusSCEV(DesB.Scev, DesA.Scev)); + const SCEVConstant *DistToA = dyn_cast( + PSE.getSE()->getMinusSCEV(DesB.Scev, DesA.Scev)); if (!DistToA) continue; - int DistanceToA = DistToA->getValue()->getValue().getSExtValue(); + int DistanceToA = DistToA->getAPInt().getSExtValue(); // Skip if the distance is not multiple of size as they are not in the // same group. @@ -4454,8 +4720,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { emitAnalysis(VectorizationReport() << "runtime pointer checks needed. Enable vectorization of this " "loop with '#pragma clang loop vectorize(enable)' when " - "compiling with -Os"); - DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n"); + "compiling with -Os/-Oz"); + DEBUG(dbgs() << + "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n"); return Factor; } @@ -4467,10 +4734,12 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { } // Find the trip count. - unsigned TC = SE->getSmallConstantTripCount(TheLoop); + unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n'); - unsigned WidestType = getWidestType(); + MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); + unsigned SmallestType, WidestType; + std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes(); unsigned WidestRegister = TTI.getRegisterBitWidth(true); unsigned MaxSafeDepDist = -1U; if (Legal->getMaxSafeDepDistBytes() != -1U) @@ -4478,7 +4747,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { WidestRegister = ((WidestRegister < MaxSafeDepDist) ? WidestRegister : MaxSafeDepDist); unsigned MaxVectorSize = WidestRegister / WidestType; - DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n"); + + DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / " + << WidestType << " bits.\n"); DEBUG(dbgs() << "LV: The Widest register is: " << WidestRegister << " bits.\n"); @@ -4491,6 +4762,26 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { " into one vector!"); unsigned VF = MaxVectorSize; + if (MaximizeBandwidth && !OptForSize) { + // Collect all viable vectorization factors. + SmallVector VFs; + unsigned NewMaxVectorSize = WidestRegister / SmallestType; + for (unsigned VS = MaxVectorSize; VS <= NewMaxVectorSize; VS *= 2) + VFs.push_back(VS); + + // For each VF calculate its register usage. + auto RUs = calculateRegisterUsage(VFs); + + // Select the largest VF which doesn't require more registers than existing + // ones. + unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true); + for (int i = RUs.size() - 1; i >= 0; --i) { + if (RUs[i].MaxLocalUsers <= TargetNumRegisters) { + VF = VFs[i]; + break; + } + } + } // If we optimize the program for size, avoid creating the tail loop. if (OptForSize) { @@ -4499,7 +4790,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { emitAnalysis (VectorizationReport() << "unable to calculate the loop count due to complex control flow"); - DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n"); + DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n"); return Factor; } @@ -4515,8 +4806,8 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { "cannot optimize for size and vectorize at the " "same time. Enable vectorization of this loop " "with '#pragma clang loop vectorize(enable)' " - "when compiling with -Os"); - DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n"); + "when compiling with -Os/-Oz"); + DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n"); return Factor; } } @@ -4566,7 +4857,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { return Factor; } -unsigned LoopVectorizationCostModel::getWidestType() { +std::pair +LoopVectorizationCostModel::getSmallestAndWidestTypes() { + unsigned MinWidth = -1U; unsigned MaxWidth = 8; const DataLayout &DL = TheFunction->getParent()->getDataLayout(); @@ -4579,18 +4872,22 @@ unsigned LoopVectorizationCostModel::getWidestType() { for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { Type *T = it->getType(); - // Ignore ephemeral values. - if (EphValues.count(it)) + // Skip ignored values. + if (ValuesToIgnore.count(&*it)) continue; // Only examine Loads, Stores and PHINodes. if (!isa(it) && !isa(it) && !isa(it)) continue; - // Examine PHI nodes that are reduction variables. - if (PHINode *PN = dyn_cast(it)) - if (!Legal->getReductionVars()->count(PN)) + // Examine PHI nodes that are reduction variables. Update the type to + // account for the recurrence type. + if (PHINode *PN = dyn_cast(it)) { + if (!Legal->isReductionVariable(PN)) continue; + RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[PN]; + T = RdxDesc.getRecurrenceType(); + } // Examine the stored values. if (StoreInst *ST = dyn_cast(it)) @@ -4599,15 +4896,17 @@ unsigned LoopVectorizationCostModel::getWidestType() { // Ignore loaded pointer types and stored pointer types that are not // consecutive. However, we do want to take consecutive stores/loads of // pointer vectors into account. - if (T->isPointerTy() && !isConsecutiveLoadOrStore(it)) + if (T->isPointerTy() && !isConsecutiveLoadOrStore(&*it)) continue; + MinWidth = std::min(MinWidth, + (unsigned)DL.getTypeSizeInBits(T->getScalarType())); MaxWidth = std::max(MaxWidth, (unsigned)DL.getTypeSizeInBits(T->getScalarType())); } } - return MaxWidth; + return {MinWidth, MaxWidth}; } unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, @@ -4628,11 +4927,6 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, // 3. We don't interleave if we think that we will spill registers to memory // due to the increased register pressure. - // Use the user preference, unless 'auto' is selected. - int UserUF = Hints->getInterleave(); - if (UserUF != 0) - return UserUF; - // When we optimize for size, we don't interleave. if (OptForSize) return 1; @@ -4642,7 +4936,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, return 1; // Do not interleave loops with a relatively small trip count. - unsigned TC = SE->getSmallConstantTripCount(TheLoop); + unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); if (TC > 1 && TC < TinyTripCountInterleaveThreshold) return 1; @@ -4658,7 +4952,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, TargetNumRegisters = ForceTargetNumVectorRegs; } - LoopVectorizationCostModel::RegisterUsage R = calculateRegisterUsage(); + RegisterUsage R = calculateRegisterUsage({VF})[0]; // We divide by these constants so assume that we have at least one // instruction that uses at least one register. R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U); @@ -4756,8 +5050,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, } // Interleave if this is a large loop (small loops are already dealt with by - // this - // point) that could benefit from interleaving. + // this point) that could benefit from interleaving. bool HasReductions = (Legal->getReductionVars()->size() > 0); if (TTI.enableAggressiveInterleaving(HasReductions)) { DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n"); @@ -4768,8 +5061,9 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, return 1; } -LoopVectorizationCostModel::RegisterUsage -LoopVectorizationCostModel::calculateRegisterUsage() { +SmallVector +LoopVectorizationCostModel::calculateRegisterUsage( + const SmallVector &VFs) { // This function calculates the register usage by measuring the highest number // of values that are alive at a single location. Obviously, this is a very // rough estimation. We scan the loop in a topological order in order and @@ -4790,8 +5084,8 @@ LoopVectorizationCostModel::calculateRegisterUsage() { LoopBlocksDFS DFS(TheLoop); DFS.perform(LI); - RegisterUsage R; - R.NumInstructions = 0; + RegisterUsage RU; + RU.NumInstructions = 0; // Each 'key' in the map opens a new interval. The values // of the map are the index of the 'last seen' usage of the @@ -4810,15 +5104,13 @@ LoopVectorizationCostModel::calculateRegisterUsage() { unsigned Index = 0; for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), be = DFS.endRPO(); bb != be; ++bb) { - R.NumInstructions += (*bb)->size(); - for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e; - ++it) { - Instruction *I = it; - IdxToInstr[Index++] = I; + RU.NumInstructions += (*bb)->size(); + for (Instruction &I : **bb) { + IdxToInstr[Index++] = &I; // Save the end location of each USE. - for (unsigned i = 0; i < I->getNumOperands(); ++i) { - Value *U = I->getOperand(i); + for (unsigned i = 0; i < I.getNumOperands(); ++i) { + Value *U = I.getOperand(i); Instruction *Instr = dyn_cast(U); // Ignore non-instruction values such as arguments, constants, etc. @@ -4847,42 +5139,85 @@ LoopVectorizationCostModel::calculateRegisterUsage() { TransposeEnds[it->second].push_back(it->first); SmallSet OpenIntervals; - unsigned MaxUsage = 0; + // Get the size of the widest register. + unsigned MaxSafeDepDist = -1U; + if (Legal->getMaxSafeDepDistBytes() != -1U) + MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8; + unsigned WidestRegister = + std::min(TTI.getRegisterBitWidth(true), MaxSafeDepDist); + const DataLayout &DL = TheFunction->getParent()->getDataLayout(); + + SmallVector RUs(VFs.size()); + SmallVector MaxUsages(VFs.size(), 0); DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); + + // A lambda that gets the register usage for the given type and VF. + auto GetRegUsage = [&DL, WidestRegister](Type *Ty, unsigned VF) { + unsigned TypeSize = DL.getTypeSizeInBits(Ty->getScalarType()); + return std::max(1, VF * TypeSize / WidestRegister); + }; + for (unsigned int i = 0; i < Index; ++i) { Instruction *I = IdxToInstr[i]; // Ignore instructions that are never used within the loop. if (!Ends.count(I)) continue; - // Ignore ephemeral values. - if (EphValues.count(I)) - continue; - // Remove all of the instructions that end at this location. InstrList &List = TransposeEnds[i]; - for (unsigned int j=0, e = List.size(); j < e; ++j) + for (unsigned int j = 0, e = List.size(); j < e; ++j) OpenIntervals.erase(List[j]); - // Count the number of live interals. - MaxUsage = std::max(MaxUsage, OpenIntervals.size()); + // Skip ignored values. + if (ValuesToIgnore.count(I)) + continue; - DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " << - OpenIntervals.size() << '\n'); + // For each VF find the maximum usage of registers. + for (unsigned j = 0, e = VFs.size(); j < e; ++j) { + if (VFs[j] == 1) { + MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size()); + continue; + } + + // Count the number of live intervals. + unsigned RegUsage = 0; + for (auto Inst : OpenIntervals) { + // Skip ignored values for VF > 1. + if (VecValuesToIgnore.count(Inst)) + continue; + RegUsage += GetRegUsage(Inst->getType(), VFs[j]); + } + MaxUsages[j] = std::max(MaxUsages[j], RegUsage); + } + + DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " + << OpenIntervals.size() << '\n'); // Add the current instruction to the list of open intervals. OpenIntervals.insert(I); } - unsigned Invariant = LoopInvariants.size(); - DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << '\n'); - DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n'); - DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << '\n'); + for (unsigned i = 0, e = VFs.size(); i < e; ++i) { + unsigned Invariant = 0; + if (VFs[i] == 1) + Invariant = LoopInvariants.size(); + else { + for (auto Inst : LoopInvariants) + Invariant += GetRegUsage(Inst->getType(), VFs[i]); + } - R.LoopInvariantRegs = Invariant; - R.MaxLocalUsers = MaxUsage; - return R; + DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n'); + DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n'); + DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n'); + DEBUG(dbgs() << "LV(REG): LoopSize: " << RU.NumInstructions << '\n'); + + RU.LoopInvariantRegs = Invariant; + RU.MaxLocalUsers = MaxUsages[i]; + RUs[i] = RU; + } + + return RUs; } unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { @@ -4900,11 +5235,11 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { if (isa(it)) continue; - // Ignore ephemeral values. - if (EphValues.count(it)) + // Skip ignored values. + if (ValuesToIgnore.count(&*it)) continue; - unsigned C = getInstructionCost(it, VF); + unsigned C = getInstructionCost(&*it, VF); // Check if we should override the cost. if (ForceTargetInstructionCost.getNumOccurrences() > 0) @@ -4969,7 +5304,7 @@ static bool isLikelyComplexAddressComputation(Value *Ptr, if (!C) return true; - const APInt &APStepVal = C->getValue()->getValue(); + const APInt &APStepVal = C->getAPInt(); // Huge step value - give up. if (APStepVal.getBitWidth() > 64) @@ -4981,9 +5316,8 @@ static bool isLikelyComplexAddressComputation(Value *Ptr, } static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) { - if (Legal->hasStride(I->getOperand(0)) || Legal->hasStride(I->getOperand(1))) - return true; - return false; + return Legal->hasStride(I->getOperand(0)) || + Legal->hasStride(I->getOperand(1)); } unsigned @@ -4994,7 +5328,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { VF = 1; Type *RetTy = I->getType(); + if (VF > 1 && MinBWs.count(I)) + RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); Type *VectorTy = ToVectorTy(RetTy, VF); + auto SE = PSE.getSE(); // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { @@ -5076,6 +5413,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); + Instruction *Op0AsInstruction = dyn_cast(I->getOperand(0)); + auto It = MinBWs.find(Op0AsInstruction); + if (VF > 1 && It != MinBWs.end()) + ValTy = IntegerType::get(ValTy->getContext(), It->second); VectorTy = ToVectorTy(ValTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy); } @@ -5199,8 +5540,28 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { Legal->isInductionVariable(I->getOperand(0))) return TTI.getCastInstrCost(I->getOpcode(), I->getType(), I->getOperand(0)->getType()); - - Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF); + + Type *SrcScalarTy = I->getOperand(0)->getType(); + Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF); + if (VF > 1 && MinBWs.count(I)) { + // This cast is going to be shrunk. This may remove the cast or it might + // turn it into slightly different cast. For example, if MinBW == 16, + // "zext i8 %1 to i32" becomes "zext i8 %1 to i16". + // + // Calculate the modified src and dest types. + Type *MinVecTy = VectorTy; + if (I->getOpcode() == Instruction::Trunc) { + SrcVecTy = smallestIntegerVectorType(SrcVecTy, MinVecTy); + VectorTy = largestIntegerVectorType(ToVectorTy(I->getType(), VF), + MinVecTy); + } else if (I->getOpcode() == Instruction::ZExt || + I->getOpcode() == Instruction::SExt) { + SrcVecTy = largestIntegerVectorType(SrcVecTy, MinVecTy); + VectorTy = smallestIntegerVectorType(ToVectorTy(I->getType(), VF), + MinVecTy); + } + } + return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy); } case Instruction::Call: { @@ -5240,15 +5601,18 @@ char LoopVectorize::ID = 0; static const char lv_name[] = "Loop Vectorization"; INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis) +INITIALIZE_PASS_DEPENDENCY(DemandedBits) INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false) namespace llvm { @@ -5269,6 +5633,79 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { return false; } +void LoopVectorizationCostModel::collectValuesToIgnore() { + // Ignore ephemeral values. + CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore); + + // Ignore type-promoting instructions we identified during reduction + // detection. + for (auto &Reduction : *Legal->getReductionVars()) { + RecurrenceDescriptor &RedDes = Reduction.second; + SmallPtrSetImpl &Casts = RedDes.getCastInsts(); + VecValuesToIgnore.insert(Casts.begin(), Casts.end()); + } + + // Ignore induction phis that are only used in either GetElementPtr or ICmp + // instruction to exit loop. Induction variables usually have large types and + // can have big impact when estimating register usage. + // This is for when VF > 1. + for (auto &Induction : *Legal->getInductionVars()) { + auto *PN = Induction.first; + auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch()); + + // Check that the PHI is only used by the induction increment (UpdateV) or + // by GEPs. Then check that UpdateV is only used by a compare instruction or + // the loop header PHI. + // FIXME: Need precise def-use analysis to determine if this instruction + // variable will be vectorized. + if (std::all_of(PN->user_begin(), PN->user_end(), + [&](const User *U) -> bool { + return U == UpdateV || isa(U); + }) && + std::all_of(UpdateV->user_begin(), UpdateV->user_end(), + [&](const User *U) -> bool { + return U == PN || isa(U); + })) { + VecValuesToIgnore.insert(PN); + VecValuesToIgnore.insert(UpdateV); + } + } + + // Ignore instructions that will not be vectorized. + // This is for when VF > 1. + for (auto bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be; + ++bb) { + for (auto &Inst : **bb) { + switch (Inst.getOpcode()) { + case Instruction::GetElementPtr: { + // Ignore GEP if its last operand is an induction variable so that it is + // a consecutive load/store and won't be vectorized as scatter/gather + // pattern. + + GetElementPtrInst *Gep = cast(&Inst); + unsigned NumOperands = Gep->getNumOperands(); + unsigned InductionOperand = getGEPInductionOperand(Gep); + bool GepToIgnore = true; + + // Check that all of the gep indices are uniform except for the + // induction operand. + for (unsigned i = 0; i != NumOperands; ++i) { + if (i != InductionOperand && + !PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), + TheLoop)) { + GepToIgnore = false; + break; + } + } + + if (GepToIgnore) + VecValuesToIgnore.insert(&Inst); + break; + } + } + } + } +} void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, bool IfPredicateStore) { @@ -5316,19 +5753,12 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, // Create a new entry in the WidenMap and initialize it to Undef or Null. VectorParts &VecResults = WidenMap.splat(Instr, UndefVec); - Instruction *InsertPt = Builder.GetInsertPoint(); - BasicBlock *IfBlock = Builder.GetInsertBlock(); - BasicBlock *CondBlock = nullptr; - VectorParts Cond; - Loop *VectorLp = nullptr; if (IfPredicateStore) { assert(Instr->getParent()->getSinglePredecessor() && "Only support single predecessor blocks"); Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(), Instr->getParent()); - VectorLp = LI->getLoopFor(IfBlock); - assert(VectorLp && "Must have a loop for this block"); } // For each vector unroll 'part': @@ -5343,11 +5773,6 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, Builder.CreateExtractElement(Cond[Part], Builder.getInt32(0)); Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cond[Part], ConstantInt::get(Cond[Part]->getType(), 1)); - CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); - LoopVectorBody.push_back(CondBlock); - VectorLp->addBasicBlockToLoop(CondBlock, *LI); - // Update Builder with newly created basic block. - Builder.SetInsertPoint(InsertPt); } Instruction *Cloned = Instr->clone(); @@ -5367,16 +5792,10 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, if (!IsVoidRetTy) VecResults[Part] = Cloned; - // End if-block. - if (IfPredicateStore) { - BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); - LoopVectorBody.push_back(NewIfBlock); - VectorLp->addBasicBlockToLoop(NewIfBlock, *LI); - Builder.SetInsertPoint(InsertPt); - ReplaceInstWithInst(IfBlock->getTerminator(), - BranchInst::Create(CondBlock, NewIfBlock, Cmp)); - IfBlock = NewIfBlock; - } + // End if-block. + if (IfPredicateStore) + PredicatedStores.push_back(std::make_pair(cast(Cloned), + Cmp)); } } diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 7bac407e77e9..40abfc759e0a 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopInfo.h" @@ -61,7 +62,7 @@ static cl::opt "number ")); static cl::opt -ShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden, +ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden, cl::desc("Attempt to vectorize horizontal reductions")); static cl::opt ShouldStartVectorizeHorAtStore( @@ -73,6 +74,14 @@ static cl::opt MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden, cl::desc("Attempt to vectorize for this register size in bits")); +/// Limits the size of scheduling regions in a block. +/// It avoid long compile times for _very_ large blocks where vector +/// instructions are spread over a wide range. +/// This limit is way higher than needed by real-world functions. +static cl::opt +ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden, + cl::desc("Limit the size of the SLP scheduling region per block")); + namespace { // FIXME: Set this via cl::opt to allow overriding. @@ -89,6 +98,10 @@ static const unsigned AliasedCheckLimit = 10; // This limit is useful for very large basic blocks. static const unsigned MaxMemDepDistance = 160; +/// If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling +/// regions to be handled. +static const int MinScheduleRegionSize = 16; + /// \brief Predicate for the element types that the SLP vectorizer supports. /// /// The most important thing to filter here are types which are invalid in LLVM @@ -156,13 +169,11 @@ static unsigned getAltOpcode(unsigned Op) { /// of an alternate sequence which can later be merged as /// a ShuffleVector instruction. static bool canCombineAsAltInst(unsigned Op) { - if (Op == Instruction::FAdd || Op == Instruction::FSub || - Op == Instruction::Sub || Op == Instruction::Add) - return true; - return false; + return Op == Instruction::FAdd || Op == Instruction::FSub || + Op == Instruction::Sub || Op == Instruction::Add; } -/// \returns ShuffleVector instruction if intructions in \p VL have +/// \returns ShuffleVector instruction if instructions in \p VL have /// alternate fadd,fsub / fsub,fadd/add,sub/sub,add sequence. /// (i.e. e.g. opcodes of fadd,fsub,fadd,fsub...) static unsigned isAltInst(ArrayRef VL) { @@ -242,6 +253,9 @@ static Instruction *propagateMetadata(Instruction *I, ArrayRef VL) { case LLVMContext::MD_fpmath: MD = MDNode::getMostGenericFPMath(MD, IMD); break; + case LLVMContext::MD_nontemporal: + MD = MDNode::intersect(MD, IMD); + break; } } I->setMetadata(Kind, MD); @@ -393,7 +407,7 @@ public: /// \brief Perform LICM and CSE on the newly generated gather sequences. void optimizeGatherSequence(); - /// \returns true if it is benefitial to reverse the vector order. + /// \returns true if it is beneficial to reverse the vector order. bool shouldReorder() const { return NumLoadsWantToChangeOrder > NumLoadsWantToKeepOrder; } @@ -441,7 +455,7 @@ private: /// \returns a vector from a collection of scalars in \p VL. Value *Gather(ArrayRef VL, VectorType *Ty); - /// \returns whether the VectorizableTree is fully vectoriable and will + /// \returns whether the VectorizableTree is fully vectorizable and will /// be beneficial even the tree height is tiny. bool isFullyVectorizableTinyTree(); @@ -506,7 +520,7 @@ private: /// This POD struct describes one external user in the vectorized tree. struct ExternalUser { ExternalUser (Value *S, llvm::User *U, int L) : - Scalar(S), User(U), Lane(L){}; + Scalar(S), User(U), Lane(L){} // Which scalar in our function. Value *Scalar; // Which user that uses the scalar. @@ -717,6 +731,8 @@ private: : BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize), ScheduleStart(nullptr), ScheduleEnd(nullptr), FirstLoadStoreInRegion(nullptr), LastLoadStoreInRegion(nullptr), + ScheduleRegionSize(0), + ScheduleRegionSizeLimit(ScheduleRegionSizeBudget), // Make sure that the initial SchedulingRegionID is greater than the // initial SchedulingRegionID in ScheduleData (which is 0). SchedulingRegionID(1) {} @@ -728,6 +744,13 @@ private: FirstLoadStoreInRegion = nullptr; LastLoadStoreInRegion = nullptr; + // Reduce the maximum schedule region size by the size of the + // previous scheduling run. + ScheduleRegionSizeLimit -= ScheduleRegionSize; + if (ScheduleRegionSizeLimit < MinScheduleRegionSize) + ScheduleRegionSizeLimit = MinScheduleRegionSize; + ScheduleRegionSize = 0; + // Make a new scheduling region, i.e. all existing ScheduleData is not // in the new region yet. ++SchedulingRegionID; @@ -804,7 +827,8 @@ private: void cancelScheduling(ArrayRef VL); /// Extends the scheduling region so that V is inside the region. - void extendSchedulingRegion(Value *V); + /// \returns true if the region size is within the limit. + bool extendSchedulingRegion(Value *V); /// Initialize the ScheduleData structures for new instructions in the /// scheduling region. @@ -858,6 +882,12 @@ private: /// (can be null). ScheduleData *LastLoadStoreInRegion; + /// The current size of the scheduling region. + int ScheduleRegionSize; + + /// The maximum size allowed for the scheduling region. + int ScheduleRegionSizeLimit; + /// The ID of the scheduling region. For a new vectorization iteration this /// is incremented which "removes" all ScheduleData from the region. int SchedulingRegionID; @@ -1077,7 +1107,9 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { if (!BS.tryScheduleBundle(VL, this)) { DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n"); - BS.cancelScheduling(VL); + assert((!BS.getScheduleData(VL[0]) || + !BS.getScheduleData(VL[0])->isPartOfBundle()) && + "tryScheduleBundle should cancelScheduling on failure"); newTreeEntry(VL, false); return; } @@ -1125,6 +1157,23 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { return; } case Instruction::Load: { + // Check that a vectorized load would load the same memory as a scalar + // load. + // For example we don't want vectorize loads that are smaller than 8 bit. + // Even though we have a packed struct {} LLVM treats + // loading/storing it as an i8 struct. If we vectorize loads/stores from + // such a struct we read/write packed bits disagreeing with the + // unvectorized version. + const DataLayout &DL = F->getParent()->getDataLayout(); + Type *ScalarTy = VL[0]->getType(); + + if (DL.getTypeSizeInBits(ScalarTy) != + DL.getTypeAllocSizeInBits(ScalarTy)) { + BS.cancelScheduling(VL); + newTreeEntry(VL, false); + DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); + return; + } // Check if the loads are consecutive or of we need to swizzle them. for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) { LoadInst *L = cast(VL[i]); @@ -1134,7 +1183,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); return; } - const DataLayout &DL = F->getParent()->getDataLayout(); + if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) { if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) { ++NumLoadsWantToChangeOrder; @@ -1692,7 +1741,8 @@ int BoUpSLP::getSpillCost() { } // Now find the sequence of instructions between PrevInst and Inst. - BasicBlock::reverse_iterator InstIt(Inst), PrevInstIt(PrevInst); + BasicBlock::reverse_iterator InstIt(Inst->getIterator()), + PrevInstIt(PrevInst->getIterator()); --PrevInstIt; while (InstIt != PrevInstIt) { if (PrevInstIt == PrevInst->getParent()->rend()) { @@ -1892,106 +1942,126 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef VL, } } +// Return true if I should be commuted before adding it's left and right +// operands to the arrays Left and Right. +// +// The vectorizer is trying to either have all elements one side being +// instruction with the same opcode to enable further vectorization, or having +// a splat to lower the vectorizing cost. +static bool shouldReorderOperands(int i, Instruction &I, + SmallVectorImpl &Left, + SmallVectorImpl &Right, + bool AllSameOpcodeLeft, + bool AllSameOpcodeRight, bool SplatLeft, + bool SplatRight) { + Value *VLeft = I.getOperand(0); + Value *VRight = I.getOperand(1); + // If we have "SplatRight", try to see if commuting is needed to preserve it. + if (SplatRight) { + if (VRight == Right[i - 1]) + // Preserve SplatRight + return false; + if (VLeft == Right[i - 1]) { + // Commuting would preserve SplatRight, but we don't want to break + // SplatLeft either, i.e. preserve the original order if possible. + // (FIXME: why do we care?) + if (SplatLeft && VLeft == Left[i - 1]) + return false; + return true; + } + } + // Symmetrically handle Right side. + if (SplatLeft) { + if (VLeft == Left[i - 1]) + // Preserve SplatLeft + return false; + if (VRight == Left[i - 1]) + return true; + } + + Instruction *ILeft = dyn_cast(VLeft); + Instruction *IRight = dyn_cast(VRight); + + // If we have "AllSameOpcodeRight", try to see if the left operands preserves + // it and not the right, in this case we want to commute. + if (AllSameOpcodeRight) { + unsigned RightPrevOpcode = cast(Right[i - 1])->getOpcode(); + if (IRight && RightPrevOpcode == IRight->getOpcode()) + // Do not commute, a match on the right preserves AllSameOpcodeRight + return false; + if (ILeft && RightPrevOpcode == ILeft->getOpcode()) { + // We have a match and may want to commute, but first check if there is + // not also a match on the existing operands on the Left to preserve + // AllSameOpcodeLeft, i.e. preserve the original order if possible. + // (FIXME: why do we care?) + if (AllSameOpcodeLeft && ILeft && + cast(Left[i - 1])->getOpcode() == ILeft->getOpcode()) + return false; + return true; + } + } + // Symmetrically handle Left side. + if (AllSameOpcodeLeft) { + unsigned LeftPrevOpcode = cast(Left[i - 1])->getOpcode(); + if (ILeft && LeftPrevOpcode == ILeft->getOpcode()) + return false; + if (IRight && LeftPrevOpcode == IRight->getOpcode()) + return true; + } + return false; +} + void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef VL, SmallVectorImpl &Left, SmallVectorImpl &Right) { - SmallVector OrigLeft, OrigRight; - - bool AllSameOpcodeLeft = true; - bool AllSameOpcodeRight = true; - for (unsigned i = 0, e = VL.size(); i != e; ++i) { - Instruction *I = cast(VL[i]); - Value *VLeft = I->getOperand(0); - Value *VRight = I->getOperand(1); - - OrigLeft.push_back(VLeft); - OrigRight.push_back(VRight); - - Instruction *ILeft = dyn_cast(VLeft); - Instruction *IRight = dyn_cast(VRight); - - // Check whether all operands on one side have the same opcode. In this case - // we want to preserve the original order and not make things worse by - // reordering. - if (i && AllSameOpcodeLeft && ILeft) { - if (Instruction *PLeft = dyn_cast(OrigLeft[i - 1])) { - if (PLeft->getOpcode() != ILeft->getOpcode()) - AllSameOpcodeLeft = false; - } else - AllSameOpcodeLeft = false; - } - if (i && AllSameOpcodeRight && IRight) { - if (Instruction *PRight = dyn_cast(OrigRight[i - 1])) { - if (PRight->getOpcode() != IRight->getOpcode()) - AllSameOpcodeRight = false; - } else - AllSameOpcodeRight = false; - } - - // Sort two opcodes. In the code below we try to preserve the ability to use - // broadcast of values instead of individual inserts. - // vl1 = load - // vl2 = phi - // vr1 = load - // vr2 = vr2 - // = vl1 x vr1 - // = vl2 x vr2 - // If we just sorted according to opcode we would leave the first line in - // tact but we would swap vl2 with vr2 because opcode(phi) > opcode(load). - // = vl1 x vr1 - // = vr2 x vl2 - // Because vr2 and vr1 are from the same load we loose the opportunity of a - // broadcast for the packed right side in the backend: we have [vr1, vl2] - // instead of [vr1, vr2=vr1]. - if (ILeft && IRight) { - if (!i && ILeft->getOpcode() > IRight->getOpcode()) { - Left.push_back(IRight); - Right.push_back(ILeft); - } else if (i && ILeft->getOpcode() > IRight->getOpcode() && - Right[i - 1] != IRight) { - // Try not to destroy a broad cast for no apparent benefit. - Left.push_back(IRight); - Right.push_back(ILeft); - } else if (i && ILeft->getOpcode() == IRight->getOpcode() && - Right[i - 1] == ILeft) { - // Try preserve broadcasts. - Left.push_back(IRight); - Right.push_back(ILeft); - } else if (i && ILeft->getOpcode() == IRight->getOpcode() && - Left[i - 1] == IRight) { - // Try preserve broadcasts. - Left.push_back(IRight); - Right.push_back(ILeft); - } else { - Left.push_back(ILeft); - Right.push_back(IRight); - } - continue; - } - // One opcode, put the instruction on the right. - if (ILeft) { - Left.push_back(VRight); - Right.push_back(ILeft); - continue; - } + if (VL.size()) { + // Peel the first iteration out of the loop since there's nothing + // interesting to do anyway and it simplifies the checks in the loop. + auto VLeft = cast(VL[0])->getOperand(0); + auto VRight = cast(VL[0])->getOperand(1); + if (!isa(VRight) && isa(VLeft)) + // Favor having instruction to the right. FIXME: why? + std::swap(VLeft, VRight); Left.push_back(VLeft); Right.push_back(VRight); } - bool LeftBroadcast = isSplat(Left); - bool RightBroadcast = isSplat(Right); + // Keep track if we have instructions with all the same opcode on one side. + bool AllSameOpcodeLeft = isa(Left[0]); + bool AllSameOpcodeRight = isa(Right[0]); + // Keep track if we have one side with all the same value (broadcast). + bool SplatLeft = true; + bool SplatRight = true; - // If operands end up being broadcast return this operand order. - if (LeftBroadcast || RightBroadcast) - return; - - // Don't reorder if the operands where good to begin. - if (AllSameOpcodeRight || AllSameOpcodeLeft) { - Left = OrigLeft; - Right = OrigRight; + for (unsigned i = 1, e = VL.size(); i != e; ++i) { + Instruction *I = cast(VL[i]); + assert(I->isCommutative() && "Can only process commutative instruction"); + // Commute to favor either a splat or maximizing having the same opcodes on + // one side. + if (shouldReorderOperands(i, *I, Left, Right, AllSameOpcodeLeft, + AllSameOpcodeRight, SplatLeft, SplatRight)) { + Left.push_back(I->getOperand(1)); + Right.push_back(I->getOperand(0)); + } else { + Left.push_back(I->getOperand(0)); + Right.push_back(I->getOperand(1)); + } + // Update Splat* and AllSameOpcode* after the insertion. + SplatRight = SplatRight && (Right[i - 1] == Right[i]); + SplatLeft = SplatLeft && (Left[i - 1] == Left[i]); + AllSameOpcodeLeft = AllSameOpcodeLeft && isa(Left[i]) && + (cast(Left[i - 1])->getOpcode() == + cast(Left[i])->getOpcode()); + AllSameOpcodeRight = AllSameOpcodeRight && isa(Right[i]) && + (cast(Right[i - 1])->getOpcode() == + cast(Right[i])->getOpcode()); } + // If one operand end up being broadcast, return this operand order. + if (SplatRight || SplatLeft) + return; + const DataLayout &DL = F->getParent()->getDataLayout(); // Finally check if we can get longer vectorizable chain by reordering @@ -2032,7 +2102,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef VL, void BoUpSLP::setInsertPointAfterBundle(ArrayRef VL) { Instruction *VL0 = cast(VL[0]); - BasicBlock::iterator NextInst = VL0; + BasicBlock::iterator NextInst(VL0); ++NextInst; Builder.SetInsertPoint(VL0->getParent(), NextInst); Builder.SetCurrentDebugLocation(VL0->getDebugLoc()); @@ -2489,7 +2559,7 @@ Value *BoUpSLP::vectorizeTree() { scheduleBlock(BSIter.second.get()); } - Builder.SetInsertPoint(F->getEntryBlock().begin()); + Builder.SetInsertPoint(&F->getEntryBlock().front()); vectorizeTree(&VectorizableTree[0]); DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n"); @@ -2534,7 +2604,7 @@ Value *BoUpSLP::vectorizeTree() { User->replaceUsesOfWith(Scalar, Ex); } } else { - Builder.SetInsertPoint(F->getEntryBlock().begin()); + Builder.SetInsertPoint(&F->getEntryBlock().front()); Value *Ex = Builder.CreateExtractElement(Vec, Lane); CSEBlocks.insert(&F->getEntryBlock()); User->replaceUsesOfWith(Scalar, Ex); @@ -2643,7 +2713,7 @@ void BoUpSLP::optimizeGatherSequence() { BasicBlock *BB = (*I)->getBlock(); // For all instructions in blocks containing gather sequences: for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { - Instruction *In = it++; + Instruction *In = &*it++; if (!isa(In) && !isa(In)) continue; @@ -2683,8 +2753,15 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, ScheduleData *Bundle = nullptr; bool ReSchedule = false; DEBUG(dbgs() << "SLP: bundle: " << *VL[0] << "\n"); + + // Make sure that the scheduling region contains all + // instructions of the bundle. + for (Value *V : VL) { + if (!extendSchedulingRegion(V)) + return false; + } + for (Value *V : VL) { - extendSchedulingRegion(V); ScheduleData *BundleMember = getScheduleData(V); assert(BundleMember && "no ScheduleData for bundle member (maybe not in same basic block)"); @@ -2745,7 +2822,11 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, schedule(pickedSD, ReadyInsts); } } - return Bundle->isReady(); + if (!Bundle->isReady()) { + cancelScheduling(VL); + return false; + } + return true; } void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef VL) { @@ -2774,9 +2855,9 @@ void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef VL) { } } -void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) { +bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) { if (getScheduleData(V)) - return; + return true; Instruction *I = dyn_cast(V); assert(I && "bundle member must be an instruction"); assert(!isa(I) && "phi nodes don't need to be scheduled"); @@ -2787,21 +2868,26 @@ void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) { ScheduleEnd = I->getNextNode(); assert(ScheduleEnd && "tried to vectorize a TerminatorInst?"); DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n"); - return; + return true; } // Search up and down at the same time, because we don't know if the new // instruction is above or below the existing scheduling region. - BasicBlock::reverse_iterator UpIter(ScheduleStart); + BasicBlock::reverse_iterator UpIter(ScheduleStart->getIterator()); BasicBlock::reverse_iterator UpperEnd = BB->rend(); BasicBlock::iterator DownIter(ScheduleEnd); BasicBlock::iterator LowerEnd = BB->end(); for (;;) { + if (++ScheduleRegionSize > ScheduleRegionSizeLimit) { + DEBUG(dbgs() << "SLP: exceeded schedule region size limit\n"); + return false; + } + if (UpIter != UpperEnd) { if (&*UpIter == I) { initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion); ScheduleStart = I; DEBUG(dbgs() << "SLP: extend schedule region start to " << *I << "\n"); - return; + return true; } UpIter++; } @@ -2812,13 +2898,14 @@ void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) { ScheduleEnd = I->getNextNode(); assert(ScheduleEnd && "tried to vectorize a TerminatorInst?"); DEBUG(dbgs() << "SLP: extend schedule region end to " << *I << "\n"); - return; + return true; } DownIter++; } assert((UpIter != UpperEnd || DownIter != LowerEnd) && "instruction not found in block"); } + return true; } void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI, @@ -2898,8 +2985,8 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, } } else { // I'm not sure if this can ever happen. But we need to be safe. - // This lets the instruction/bundle never be scheduled and eventally - // disable vectorization. + // This lets the instruction/bundle never be scheduled and + // eventually disable vectorization. BundleMember->Dependencies++; BundleMember->incrementUnscheduledDeps(1); } @@ -3005,7 +3092,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { }; std::set ReadyInsts; - // Ensure that all depencency data is updated and fill the ready-list with + // Ensure that all dependency data is updated and fill the ready-list with // initial instructions. int Idx = 0; int NumToSchedule = 0; @@ -3037,7 +3124,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { Instruction *pickedInst = BundleMember->Inst; if (LastScheduledInst->getNextNode() != pickedInst) { BS->BB->getInstList().remove(pickedInst); - BS->BB->getInstList().insert(LastScheduledInst, pickedInst); + BS->BB->getInstList().insert(LastScheduledInst->getIterator(), + pickedInst); } LastScheduledInst = pickedInst; BundleMember = BundleMember->NextInBundle; @@ -3076,11 +3164,11 @@ struct SLPVectorizer : public FunctionPass { if (skipOptnoneFunction(F)) return false; - SE = &getAnalysis(); + SE = &getAnalysis().getSE(); TTI = &getAnalysis().getTTI(F); auto *TLIP = getAnalysisIfAvailable(); TLI = TLIP ? &TLIP->getTLI() : nullptr; - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); LI = &getAnalysis().getLoopInfo(); DT = &getAnalysis().getDomTree(); AC = &getAnalysis().getAssumptionCache(F); @@ -3141,13 +3229,15 @@ struct SLPVectorizer : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { FunctionPass::getAnalysisUsage(AU); AU.addRequired(); - AU.addRequired(); - AU.addRequired(); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); AU.setPreservesCFG(); } @@ -3262,15 +3352,26 @@ bool SLPVectorizer::vectorizeStores(ArrayRef Stores, // Do a quadratic search on all of the given stores and find // all of the pairs of stores that follow each other. + SmallVector IndexQueue; for (unsigned i = 0, e = Stores.size(); i < e; ++i) { - for (unsigned j = 0; j < e; ++j) { - if (i == j) - continue; - const DataLayout &DL = Stores[i]->getModule()->getDataLayout(); - if (R.isConsecutiveAccess(Stores[i], Stores[j], DL)) { - Tails.insert(Stores[j]); + const DataLayout &DL = Stores[i]->getModule()->getDataLayout(); + IndexQueue.clear(); + // If a store has multiple consecutive store candidates, search Stores + // array according to the sequence: from i+1 to e, then from i-1 to 0. + // This is because usually pairing with immediate succeeding or preceding + // candidate create the best chance to find slp vectorization opportunity. + unsigned j = 0; + for (j = i + 1; j < e; ++j) + IndexQueue.push_back(j); + for (j = i; j > 0; --j) + IndexQueue.push_back(j - 1); + + for (auto &k : IndexQueue) { + if (R.isConsecutiveAccess(Stores[i], Stores[k], DL)) { + Tails.insert(Stores[k]); Heads.insert(Stores[i]); - ConsecutiveChain[Stores[i]] = Stores[j]; + ConsecutiveChain[Stores[i]] = Stores[k]; + break; } } } @@ -3430,7 +3531,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, unsigned VecIdx = 0; for (auto &V : BuildVectorSlice) { IRBuilder Builder( - ++BasicBlock::iterator(InsertAfter)); + InsertAfter->getParent(), ++BasicBlock::iterator(InsertAfter)); InsertElementInst *IE = cast(V); Instruction *Extract = cast(Builder.CreateExtractElement( VectorizedRoot, Builder.getInt32(VecIdx++))); @@ -3554,16 +3655,17 @@ class HorizontalReduction { unsigned ReductionOpcode; /// The opcode of the values we perform a reduction on. unsigned ReducedValueOpcode; - /// The width of one full horizontal reduction operation. - unsigned ReduxWidth; /// Should we model this reduction as a pairwise reduction tree or a tree that /// splits the vector in halves and adds those halves. bool IsPairwiseReduction; public: + /// The width of one full horizontal reduction operation. + unsigned ReduxWidth; + HorizontalReduction() : ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0), - ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {} + ReducedValueOpcode(0), IsPairwiseReduction(false), ReduxWidth(0) {} /// \brief Try to find a reduction tree. bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) { @@ -3609,11 +3711,11 @@ public: return false; // Post order traverse the reduction tree starting at B. We only handle true - // trees containing only binary operators. - SmallVector, 32> Stack; + // trees containing only binary operators or selects. + SmallVector, 32> Stack; Stack.push_back(std::make_pair(B, 0)); while (!Stack.empty()) { - BinaryOperator *TreeN = Stack.back().first; + Instruction *TreeN = Stack.back().first; unsigned EdgeToVist = Stack.back().second++; bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode; @@ -3649,9 +3751,10 @@ public: // Visit left or right. Value *NextV = TreeN->getOperand(EdgeToVist); - BinaryOperator *Next = dyn_cast(NextV); - if (Next) - Stack.push_back(std::make_pair(Next, 0)); + // We currently only allow BinaryOperator's and SelectInst's as reduction + // values in our tree. + if (isa(NextV) || isa(NextV)) + Stack.push_back(std::make_pair(cast(NextV), 0)); else if (NextV != Phi) return false; } @@ -3719,9 +3822,12 @@ public: return VectorizedTree != nullptr; } -private: + unsigned numReductionValues() const { + return ReducedVals.size(); + } - /// \brief Calcuate the cost of a reduction. +private: + /// \brief Calculate the cost of a reduction. int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal) { Type *ScalarTy = FirstReducedVal->getType(); Type *VecTy = VectorType::get(ScalarTy, ReduxWidth); @@ -3827,6 +3933,82 @@ static bool PhiTypeSorterFunc(Value *V, Value *V2) { return V->getType() < V2->getType(); } +/// \brief Try and get a reduction value from a phi node. +/// +/// Given a phi node \p P in a block \p ParentBB, consider possible reductions +/// if they come from either \p ParentBB or a containing loop latch. +/// +/// \returns A candidate reduction value if possible, or \code nullptr \endcode +/// if not possible. +static Value *getReductionValue(const DominatorTree *DT, PHINode *P, + BasicBlock *ParentBB, LoopInfo *LI) { + // There are situations where the reduction value is not dominated by the + // reduction phi. Vectorizing such cases has been reported to cause + // miscompiles. See PR25787. + auto DominatedReduxValue = [&](Value *R) { + return ( + dyn_cast(R) && + DT->dominates(P->getParent(), dyn_cast(R)->getParent())); + }; + + Value *Rdx = nullptr; + + // Return the incoming value if it comes from the same BB as the phi node. + if (P->getIncomingBlock(0) == ParentBB) { + Rdx = P->getIncomingValue(0); + } else if (P->getIncomingBlock(1) == ParentBB) { + Rdx = P->getIncomingValue(1); + } + + if (Rdx && DominatedReduxValue(Rdx)) + return Rdx; + + // Otherwise, check whether we have a loop latch to look at. + Loop *BBL = LI->getLoopFor(ParentBB); + if (!BBL) + return nullptr; + BasicBlock *BBLatch = BBL->getLoopLatch(); + if (!BBLatch) + return nullptr; + + // There is a loop latch, return the incoming value if it comes from + // that. This reduction pattern occassionaly turns up. + if (P->getIncomingBlock(0) == BBLatch) { + Rdx = P->getIncomingValue(0); + } else if (P->getIncomingBlock(1) == BBLatch) { + Rdx = P->getIncomingValue(1); + } + + if (Rdx && DominatedReduxValue(Rdx)) + return Rdx; + + return nullptr; +} + +/// \brief Attempt to reduce a horizontal reduction. +/// If it is legal to match a horizontal reduction feeding +/// the phi node P with reduction operators BI, then check if it +/// can be done. +/// \returns true if a horizontal reduction was matched and reduced. +/// \returns false if a horizontal reduction was not matched. +static bool canMatchHorizontalReduction(PHINode *P, BinaryOperator *BI, + BoUpSLP &R, TargetTransformInfo *TTI) { + if (!ShouldVectorizeHor) + return false; + + HorizontalReduction HorRdx; + if (!HorRdx.matchAssociativeReduction(P, BI)) + return false; + + // If there is a sufficient number of reduction values, reduce + // to a nearby power-of-2. Can safely generate oversized + // vectors and rely on the backend to split them to legal sizes. + HorRdx.ReduxWidth = + std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues())); + + return HorRdx.tryToReduce(R, TTI); +} + bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { bool Changed = false; SmallVector Incoming; @@ -3883,7 +4065,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) { // We may go through BB multiple times so skip the one we have checked. - if (!VisitedInstrs.insert(it).second) + if (!VisitedInstrs.insert(&*it).second) continue; if (isa(it)) @@ -3894,20 +4076,16 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { // Check that the PHI is a reduction PHI. if (P->getNumIncomingValues() != 2) return Changed; - Value *Rdx = - (P->getIncomingBlock(0) == BB - ? (P->getIncomingValue(0)) - : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) - : nullptr)); + + Value *Rdx = getReductionValue(DT, P, BB, LI); + // Check if this is a Binary Operator. BinaryOperator *BI = dyn_cast_or_null(Rdx); if (!BI) continue; // Try to match and vectorize a horizontal reduction. - HorizontalReduction HorRdx; - if (ShouldVectorizeHor && HorRdx.matchAssociativeReduction(P, BI) && - HorRdx.tryToReduce(R, TTI)) { + if (canMatchHorizontalReduction(P, BI, R, TTI)) { Changed = true; it = BB->begin(); e = BB->end(); @@ -3930,15 +4108,12 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { continue; } - // Try to vectorize horizontal reductions feeding into a store. if (ShouldStartVectorizeHorAtStore) if (StoreInst *SI = dyn_cast(it)) if (BinaryOperator *BinOp = dyn_cast(SI->getValueOperand())) { - HorizontalReduction HorRdx; - if (((HorRdx.matchAssociativeReduction(nullptr, BinOp) && - HorRdx.tryToReduce(R, TTI)) || - tryToVectorize(BinOp, R))) { + if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI) || + tryToVectorize(BinOp, R)) { Changed = true; it = BB->begin(); e = BB->end(); @@ -4039,10 +4214,10 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) { char SLPVectorizer::ID = 0; static const char lv_name[] = "SLP Vectorizer"; INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false) diff --git a/llvm.spec.in b/llvm.spec.in index 0a3f6e807849..d4ef08ae5ad3 100644 --- a/llvm.spec.in +++ b/llvm.spec.in @@ -65,3 +65,4 @@ rm -rf %{buildroot} * Mon Feb 09 2003 Brian R. Gaeke - Initial working version of RPM spec file. + diff --git a/projects/CMakeLists.txt b/projects/CMakeLists.txt index d3b2fc37af79..4839dd396706 100644 --- a/projects/CMakeLists.txt +++ b/projects/CMakeLists.txt @@ -8,7 +8,8 @@ foreach(entry ${entries}) (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/dragonegg) AND (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libcxx) AND (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libcxxabi) AND - (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libunwind)) + (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libunwind) AND + (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/test-suite)) add_subdirectory(${entry}) endif() endif() diff --git a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll index 069bd0bcfd8f..b59ee42dec3a 100644 --- a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll +++ b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll @@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu" %struct.usb_hcd = type { %struct.usb_bus, i64, [0 x i64] } @uhci_pci_ids = constant [1 x %struct.pci_device_id] zeroinitializer -@__mod_pci_device_table = alias [1 x %struct.pci_device_id]* @uhci_pci_ids +@__mod_pci_device_table = alias [1 x %struct.pci_device_id], [1 x %struct.pci_device_id]* @uhci_pci_ids ; <[1 x %struct.pci_device_id]*> [#uses=0] define i32 @uhci_suspend(%struct.usb_hcd* %hcd) { diff --git a/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll b/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll index 20be13d153bb..ba9740028180 100644 --- a/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll +++ b/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll @@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu" %struct.usb_hcd = type { %struct.usb_bus, [0 x i64] } @pci_ids = constant [1 x %struct.pci_device_id] zeroinitializer -@__mod_pci_device_table = alias [1 x %struct.pci_device_id]* @pci_ids ; <[1 x %struct.pci_device_id]*> [#uses=0] +@__mod_pci_device_table = alias [1 x %struct.pci_device_id], [1 x %struct.pci_device_id]* @pci_ids ; <[1 x %struct.pci_device_id]*> [#uses=0] define i32 @ehci_pci_setup(%struct.usb_hcd* %hcd) { entry: diff --git a/test/Analysis/BasicAA/bug.23540.ll b/test/Analysis/BasicAA/bug.23540.ll new file mode 100644 index 000000000000..f693bcf73cd6 --- /dev/null +++ b/test/Analysis/BasicAA/bug.23540.ll @@ -0,0 +1,17 @@ +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@c = external global i32 + +; CHECK-LABEL: f +; CHECK: PartialAlias: i32* %arrayidx, i32* %arrayidx6 +define void @f() { + %idxprom = zext i32 undef to i64 + %add4 = add i32 0, 1 + %idxprom5 = zext i32 %add4 to i64 + %arrayidx6 = getelementptr inbounds i32, i32* @c, i64 %idxprom5 + %arrayidx = getelementptr inbounds i32, i32* @c, i64 %idxprom + ret void +} + diff --git a/test/Analysis/BasicAA/bug.23626.ll b/test/Analysis/BasicAA/bug.23626.ll new file mode 100644 index 000000000000..6a1478c65cef --- /dev/null +++ b/test/Analysis/BasicAA/bug.23626.ll @@ -0,0 +1,31 @@ +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-darwin13.4.0" + +; CHECK-LABEL: compute1 +; CHECK: PartialAlias: i32* %arrayidx8, i32* %out +; CHECK: PartialAlias: i32* %arrayidx11, i32* %out +; CHECK: PartialAlias: i32* %arrayidx11, i32* %arrayidx8 +; CHECK: PartialAlias: i32* %arrayidx14, i32* %out +; CHECK: PartialAlias: i32* %arrayidx14, i32* %arrayidx8 +; CHECK: PartialAlias: i32* %arrayidx11, i32* %arrayidx14 +define void @compute1(i32 %num.0.lcssa, i32* %out) { + %idxprom = zext i32 %num.0.lcssa to i64 + %arrayidx8 = getelementptr inbounds i32, i32* %out, i64 %idxprom + %add9 = or i32 %num.0.lcssa, 1 + %idxprom10 = zext i32 %add9 to i64 + %arrayidx11 = getelementptr inbounds i32, i32* %out, i64 %idxprom10 + %add12 = or i32 %num.0.lcssa, 2 + %idxprom13 = zext i32 %add12 to i64 + %arrayidx14 = getelementptr inbounds i32, i32* %out, i64 %idxprom13 + ret void +} + +; CHECK-LABEL: compute2 +; CHECK: PartialAlias: i32* %arrayidx11, i32* %out.addr +define void @compute2(i32 %num, i32* %out.addr) { + %add9 = add i32 %num, 1 + %idxprom10 = zext i32 %add9 to i64 + %arrayidx11 = getelementptr inbounds i32, i32* %out.addr, i64 %idxprom10 + ret void +} diff --git a/test/Analysis/BasicAA/cs-cs.ll b/test/Analysis/BasicAA/cs-cs.ll index 78670b61ca1c..dc298f1668be 100644 --- a/test/Analysis/BasicAA/cs-cs.ll +++ b/test/Analysis/BasicAA/cs-cs.ll @@ -2,8 +2,8 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" target triple = "arm-apple-ios" -declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly -declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind +declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly +declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind @@ -13,27 +13,27 @@ declare void @a_readonly_func(i8 *) noinline nounwind readonly define <8 x i16> @test1(i8* %p, <8 x i16> %y) { entry: %q = getelementptr i8, i8* %p, i64 16 - %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind - call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) - %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind + %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind + call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) + %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind %c = add <8 x i16> %a, %b ret <8 x i16> %c ; CHECK-LABEL: Function: test1: ; CHECK: NoAlias: i8* %p, i8* %q -; CHECK: Just Ref: Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 -; CHECK: NoModRef: Ptr: i8* %q <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 -; CHECK: NoModRef: Ptr: i8* %p <-> call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) -; CHECK: Both ModRef: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) -; CHECK: Just Ref: Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 -; CHECK: NoModRef: Ptr: i8* %q <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 -; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 <-> call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) -; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 -; CHECK: NoModRef: call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 -; CHECK: NoModRef: call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 -; CHECK: NoModRef: %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 -; CHECK: NoModRef: %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 <-> call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK: Just Ref: Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 +; CHECK: NoModRef: Ptr: i8* %q <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 +; CHECK: NoModRef: Ptr: i8* %p <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK: Both ModRef: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK: Just Ref: Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 +; CHECK: NoModRef: Ptr: i8* %q <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 +; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 +; CHECK: NoModRef: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 +; CHECK: NoModRef: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 +; CHECK: NoModRef: %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 +; CHECK: NoModRef: %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) } define void @test2(i8* %P, i8* %Q) nounwind ssp { @@ -233,4 +233,9 @@ define void @test6(i8* %P) nounwind ssp { ; CHECK: Just Ref: call void @a_readonly_func(i8* %P) <-> call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false) } -attributes #0 = { nounwind } +attributes #0 = { nounwind readonly argmemonly } +attributes #1 = { nounwind argmemonly } +attributes #2 = { noinline nounwind readonly } +attributes #3 = { nounwind ssp } +attributes #4 = { nounwind } + diff --git a/test/Analysis/BasicAA/full-store-partial-alias.ll b/test/Analysis/BasicAA/full-store-partial-alias.ll index 341f6ba23b3a..20f6f7ec4ad0 100644 --- a/test/Analysis/BasicAA/full-store-partial-alias.ll +++ b/test/Analysis/BasicAA/full-store-partial-alias.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -tbaa -basicaa -gvn < %s | FileCheck -check-prefix=BASICAA %s -; RUN: opt -S -tbaa -gvn < %s | FileCheck %s +; RUN: opt -S -tbaa -gvn < %s | FileCheck -check-prefix=BASICAA %s +; RUN: opt -S -tbaa -disable-basicaa -gvn < %s | FileCheck %s ; rdar://8875631, rdar://8875069 ; BasicAA should notice that the store stores to the entire %u object, diff --git a/test/Analysis/BasicAA/intrinsics.ll b/test/Analysis/BasicAA/intrinsics.ll index 8c05587ce233..526a039ef7ac 100644 --- a/test/Analysis/BasicAA/intrinsics.ll +++ b/test/Analysis/BasicAA/intrinsics.ll @@ -7,14 +7,14 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32- ; CHECK: define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) { ; CHECK-NEXT: entry: -; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) [[ATTR:#[0-9]+]] -; CHECK-NEXT: call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) [[ATTR:#[0-9]+]] +; CHECK-NEXT: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) ; CHECK-NEXT: %c = add <8 x i16> %a, %a define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) { entry: - %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind - call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) - %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind + %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind + call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) + %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind %c = add <8 x i16> %a, %b ret <8 x i16> %c } @@ -22,21 +22,22 @@ entry: ; CHECK: define <8 x i16> @test1(i8* %p, <8 x i16> %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: %q = getelementptr i8, i8* %p, i64 16 -; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) [[ATTR]] -; CHECK-NEXT: call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) [[ATTR]] +; CHECK-NEXT: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) ; CHECK-NEXT: %c = add <8 x i16> %a, %a define <8 x i16> @test1(i8* %p, <8 x i16> %y) { entry: %q = getelementptr i8, i8* %p, i64 16 - %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind - call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) - %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind + %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind + call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) + %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind %c = add <8 x i16> %a, %b ret <8 x i16> %c } -declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly -declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind +declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly +declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind -; CHECK: attributes #0 = { nounwind readonly } +; CHECK: attributes #0 = { argmemonly nounwind readonly } +; CHECK: attributes #1 = { argmemonly nounwind } ; CHECK: attributes [[ATTR]] = { nounwind } diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll index 3084f809c370..e42793936c3d 100644 --- a/test/Analysis/BasicAA/modref.ll +++ b/test/Analysis/BasicAA/modref.ll @@ -190,6 +190,43 @@ define i32 @test10(i32* %P, i32* %P2) { ; CHECK: ret i32 %Diff } +; CHECK-LABEL: @test11( +define i32 @test11(i32* %P, i32* %P2) { + %V1 = load i32, i32* %P + call i32 @func_argmemonly(i32* readonly %P2) + %V2 = load i32, i32* %P + %Diff = sub i32 %V1, %V2 + ret i32 %Diff + ; CHECK-NOT: load + ; CHECK: ret i32 0 +} + +declare i32 @func_argmemonly_two_args(i32* %P, i32* %P2) argmemonly + +; CHECK-LABEL: @test12( +define i32 @test12(i32* %P, i32* %P2, i32* %P3) { + %V1 = load i32, i32* %P + call i32 @func_argmemonly_two_args(i32* readonly %P2, i32* %P3) + %V2 = load i32, i32* %P + %Diff = sub i32 %V1, %V2 + ret i32 %Diff + ; CHECK: load + ; CHECK: load + ; CHECK: sub + ; CHECK: ret i32 %Diff +} + +; CHECK-LABEL: @test13( +define i32 @test13(i32* %P, i32* %P2) { + %V1 = load i32, i32* %P + call i32 @func_argmemonly(i32* readnone %P2) + %V2 = load i32, i32* %P + %Diff = sub i32 %V1, %V2 + ret i32 %Diff + ; CHECK-NOT: load + ; CHECK: ret i32 0 +} + declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind diff --git a/test/Analysis/BasicAA/noalias-bugs.ll b/test/Analysis/BasicAA/noalias-bugs.ll index acb230c45de4..71b3c443f542 100644 --- a/test/Analysis/BasicAA/noalias-bugs.ll +++ b/test/Analysis/BasicAA/noalias-bugs.ll @@ -24,7 +24,7 @@ define i64 @testcase(%nested * noalias %p1, %nested * noalias %p2, ; CHECK: store i64 2 ; CHECK: load -; CHECK; store i64 1 +; CHECK: store i64 1 store i64 2, i64* %ptr.64, align 8 %r = load i64, i64* %either_ptr.64, align 8 diff --git a/test/Analysis/BasicAA/phi-aa.ll b/test/Analysis/BasicAA/phi-aa.ll index a72778277bb2..3944e9e43566 100644 --- a/test/Analysis/BasicAA/phi-aa.ll +++ b/test/Analysis/BasicAA/phi-aa.ll @@ -39,6 +39,7 @@ return: ; CHECK-LABEL: pr18068 ; CHECK: MayAlias: i32* %0, i32* %arrayidx5 +; CHECK: NoAlias: i32* %arrayidx13, i32* %arrayidx5 define i32 @pr18068(i32* %jj7, i32* %j) { entry: diff --git a/test/Analysis/BasicAA/phi-loop.ll b/test/Analysis/BasicAA/phi-loop.ll new file mode 100644 index 000000000000..6337bfbc1cfb --- /dev/null +++ b/test/Analysis/BasicAA/phi-loop.ll @@ -0,0 +1,75 @@ +; RUN: opt < %s -basicaa -basicaa-recphi=1 -gvn -S | FileCheck %s +; +; Check that section->word_ofs doesn't get reloaded in every iteration of the +; for loop. +; +; Code: +; +; typedef struct { +; unsigned num_words; +; unsigned word_ofs; +; const unsigned *data; +; } section_t; +; +; +; void test2(const section_t * restrict section, unsigned * restrict dst) {; +; while (section->data != NULL) { +; const unsigned *src = section->data; +; for (unsigned i=0; i < section->num_words; ++i) { +; dst[section->word_ofs + i] = src[i]; +; } +; +; ++section; +; } +; } +; + +; CHECK-LABEL: for.body: +; CHECK-NOT: load i32, i32* %word_ofs + +%struct.section_t = type { i32, i32, i32* } + +define void @test2(%struct.section_t* noalias nocapture readonly %section, i32* noalias nocapture %dst) { +entry: + %data13 = getelementptr inbounds %struct.section_t, %struct.section_t* %section, i32 0, i32 2 + %0 = load i32*, i32** %data13, align 4 + %cmp14 = icmp eq i32* %0, null + br i1 %cmp14, label %while.end, label %for.cond.preheader + +for.cond.preheader: ; preds = %entry, %for.end + %1 = phi i32* [ %6, %for.end ], [ %0, %entry ] + %section.addr.015 = phi %struct.section_t* [ %incdec.ptr, %for.end ], [ %section, %entry ] + %num_words = getelementptr inbounds %struct.section_t, %struct.section_t* %section.addr.015, i32 0, i32 0 + %2 = load i32, i32* %num_words, align 4 + %cmp211 = icmp eq i32 %2, 0 + br i1 %cmp211, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %for.cond.preheader + %word_ofs = getelementptr inbounds %struct.section_t, %struct.section_t* %section.addr.015, i32 0, i32 1 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %arrayidx.phi = phi i32* [ %1, %for.body.lr.ph ], [ %arrayidx.inc, %for.body ] + %i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %3 = load i32, i32* %arrayidx.phi, align 4 + %4 = load i32, i32* %word_ofs, align 4 + %add = add i32 %4, %i.012 + %arrayidx3 = getelementptr inbounds i32, i32* %dst, i32 %add + store i32 %3, i32* %arrayidx3, align 4 + %inc = add i32 %i.012, 1 + %5 = load i32, i32* %num_words, align 4 + %cmp2 = icmp ult i32 %inc, %5 + %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1 + br i1 %cmp2, label %for.body, label %for.end + +for.end: ; preds = %for.body, %for.cond.preheader + %incdec.ptr = getelementptr inbounds %struct.section_t, %struct.section_t* %section.addr.015, i32 1 + %data = getelementptr inbounds %struct.section_t, %struct.section_t* %section.addr.015, i32 1, i32 2 + %6 = load i32*, i32** %data, align 4 + %cmp = icmp eq i32* %6, null + br i1 %cmp, label %while.end, label %for.cond.preheader + +while.end: ; preds = %for.end, %entry + ret void +} + diff --git a/test/Analysis/BasicAA/q.bad.ll b/test/Analysis/BasicAA/q.bad.ll new file mode 100644 index 000000000000..f2de6a76c5e0 --- /dev/null +++ b/test/Analysis/BasicAA/q.bad.ll @@ -0,0 +1,180 @@ +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7--linux-gnueabi" + +; CHECK-LABEL: test_zext_sext_amounts255 +; CHECK: NoAlias: i8* %a, i8* %b +define void @test_zext_sext_amounts255(i8* %mem) { + %sext.1 = sext i8 255 to i16 + %sext.zext.1 = zext i16 %sext.1 to i64 + %sext.2 = sext i8 255 to i32 + %sext.zext.2 = zext i32 %sext.2 to i64 + %a = getelementptr inbounds i8, i8* %mem, i64 %sext.zext.1 + %b = getelementptr inbounds i8, i8* %mem, i64 %sext.zext.2 + ret void +} + +; CHECK-LABEL: test_zext_sext_amounts +; CHECK: PartialAlias: i8* %a, i8* %b +; %a and %b only PartialAlias as, although they're both zext(sext(%num)) they'll extend the sign by a different +; number of bits before zext-ing the remainder. +define void @test_zext_sext_amounts(i8* %mem, i8 %num) { + %sext.1 = sext i8 %num to i16 + %sext.zext.1 = zext i16 %sext.1 to i64 + %sext.2 = sext i8 %num to i32 + %sext.zext.2 = zext i32 %sext.2 to i64 + %a = getelementptr inbounds i8, i8* %mem, i64 %sext.zext.1 + %b = getelementptr inbounds i8, i8* %mem, i64 %sext.zext.2 + ret void +} + +; CHECK-LABEL: based_on_pr18068 +; CHECK: NoAlias: i8* %a, i8* %b +; CHECK: NoAlias: i8* %a, i8* %c +define void @based_on_pr18068(i32 %loaded, i8* %mem) { + %loaded.64 = zext i32 %loaded to i64 + %add1 = add i32 %loaded, -1 ; unsigned wraps unless %loaded == 0 + %add1.64 = zext i32 %add1 to i64 ; is zext(%loaded) always != zext(%loaded - 1)? Yes -> NoAlias + %sub1 = sub i32 %loaded, 1 ; unsigned wraps iff %loaded == 0 + %sub1.64 = zext i32 %sub1 to i64 ; is zext(%loaded) always != zext(%loaded - 1)? Yes -> NoAlias + %a = getelementptr inbounds i8, i8* %mem, i64 %loaded.64 + %b = getelementptr inbounds i8, i8* %mem, i64 %add1.64 + %c = getelementptr inbounds i8, i8* %mem, i64 %sub1.64 + ret void +} + +; CHECK-LABEL: test_path_dependence +; CHECK: PartialAlias: i8* %a, i8* %b +; CHECK: MustAlias: i8* %a, i8* %c +; CHECK: PartialAlias: i8* %a, i8* %d +define void @test_path_dependence(i32 %p, i8* %mem) { + %p.minus1 = add i32 %p, -1 ; this will always unsigned-wrap, unless %p == 0 + %p.minus1.64 = zext i32 %p.minus1 to i64 + %p.64.again = add i64 %p.minus1.64, 1 ; either %p (if we wrapped) or 4294967296 (if we didn't) + + %p.nsw.nuw.minus1 = sub nsw nuw i32 %p, 1 ; as nuw we know %p >= 1, and as nsw %p <= 2147483647 + %p.nsw.nuw.minus1.64 = zext i32 %p.nsw.nuw.minus1 to i64 + %p.nsw.nuw.64.again = add nsw nuw i64 %p.nsw.nuw.minus1.64, 1 ; ...so always exactly %p + + %p.nsw.minus1 = sub nsw i32 %p, 1 ; only nsw, so can only guarantee %p != 0x10000000 + %p.nsw.minus1.64 = zext i32 %p.nsw.minus1 to i64 ; when %p > 0x10000000 (ie <= 0 as a signed number) then the zext will make this a huge positive number + %p.nsw.64.again = add nsw i64 %p.nsw.minus1.64, 1 ; ...and so this is very much != %p + + %p.64 = zext i32 %p to i64 + %a = getelementptr inbounds i8, i8* %mem, i64 %p.64 + %b = getelementptr inbounds i8, i8* %mem, i64 %p.64.again + %c = getelementptr inbounds i8, i8* %mem, i64 %p.nsw.nuw.64.again + %d = getelementptr inbounds i8, i8* %mem, i64 %p.nsw.64.again + ret void +} + +; CHECK-LABEL: test_zext_sext_255 +; CHECK: NoAlias: i8* %a, i8* %b +define void @test_zext_sext_255(i8* %mem) { + %zext.255 = zext i8 255 to i16 ; 0x00FF + %sext.255 = sext i8 255 to i16 ; 0xFFFF + %zext.sext.255 = zext i16 %sext.255 to i32 ; 0x0000FFFF + %sext.zext.255 = sext i16 %zext.255 to i32 ; 0x000000FF + %zext.zext.sext.255 = zext i32 %zext.sext.255 to i64 + %zext.sext.zext.255 = zext i32 %sext.zext.255 to i64 + %a = getelementptr inbounds i8, i8* %mem, i64 %zext.zext.sext.255 + %b = getelementptr inbounds i8, i8* %mem, i64 %zext.sext.zext.255 + ret void +} + +; CHECK-LABEL: test_zext_sext_num +; CHECK: PartialAlias: i8* %a, i8* %b +; %a and %b NoAlias if %num == 255 (see @test_zext_sext_255), but %a and %b NoAlias for other values of %num (e.g. 0) +define void @test_zext_sext_num(i8* %mem, i8 %num) { + %zext.num = zext i8 %num to i16 + %sext.num = sext i8 %num to i16 + %zext.sext.num = zext i16 %sext.num to i32 + %sext.zext.num = sext i16 %zext.num to i32 + %zext.zext.sext.num = zext i32 %zext.sext.num to i64 + %zext.sext.zext.num = zext i32 %sext.zext.num to i64 + %a = getelementptr inbounds i8, i8* %mem, i64 %zext.zext.sext.num + %b = getelementptr inbounds i8, i8* %mem, i64 %zext.sext.zext.num + ret void +} + +; CHECK-LABEL: uncompressStream +; CHECK: MustAlias: i8* %a, i8* %b +; CHECK: NoAlias: i8* %a, i8* %c +define void @uncompressStream(i8* %mem) { + %zext.255 = zext i8 255 to i32 + %sext.255 = sext i8 255 to i32 + %a = getelementptr inbounds i8, i8* %mem, i32 255 + %b = getelementptr inbounds i8, i8* %mem, i32 %zext.255 + %c = getelementptr inbounds i8, i8* %mem, i32 %sext.255 + ret void +} + +; CHECK-LABEL: constantOffsetHeuristic_i3_i32 +; CHECK: NoAlias: i32* %a, i32* %b +; CHECK: NoAlias: i32* %a, i32* %c +; CHECK: NoAlias: i32* %b, i32* %c +define void @constantOffsetHeuristic_i3_i32(i32* %mem, i3 %val) { + %zext.plus.7 = add nsw i3 %val, 7 + %zext.plus.4 = add nsw i3 %val, 4 + %zext.val = zext i3 %val to i32 + %zext.4 = zext i3 %zext.plus.4 to i32 + %zext.7 = zext i3 %zext.plus.7 to i32 + %a = getelementptr inbounds i32, i32* %mem, i32 %zext.4 + %b = getelementptr inbounds i32, i32* %mem, i32 %zext.7 + %c = getelementptr inbounds i32, i32* %mem, i32 %zext.val + ret void +} + +; CHECK-LABEL: constantOffsetHeuristic_i8_i32 +; CHECK: NoAlias: i32* %a, i32* %b +; CHECK: NoAlias: i32* %a, i32* %c +; CHECK: NoAlias: i32* %b, i32* %c +define void @constantOffsetHeuristic_i8_i32(i32* %mem, i8 %val) { + %zext.plus.7 = add nsw i8 %val, 7 + %zext.plus.4 = add nsw i8 %val, 4 + %zext.val = zext i8 %val to i32 + %zext.4 = zext i8 %zext.plus.4 to i32 + %zext.7 = zext i8 %zext.plus.7 to i32 + %a = getelementptr inbounds i32, i32* %mem, i32 %zext.4 + %b = getelementptr inbounds i32, i32* %mem, i32 %zext.7 + %c = getelementptr inbounds i32, i32* %mem, i32 %zext.val + ret void +} + +; CHECK-LABEL: constantOffsetHeuristic_i3_i8 +; CHECK: PartialAlias: i32* %a, i32* %b +; CHECK: NoAlias: i32* %a, i32* %c +; CHECK: PartialAlias: i32* %b, i32* %c +define void @constantOffsetHeuristic_i3_i8(i8* %mem, i3 %val) { + %zext.plus.7 = add nsw i3 %val, 7 + %zext.plus.4 = add nsw i3 %val, 4 + %zext.val = zext i3 %val to i32 + %zext.4 = zext i3 %zext.plus.4 to i32 + %zext.7 = zext i3 %zext.plus.7 to i32 + %a.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.4 + %b.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.7 + %c.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.val + %a = bitcast i8* %a.8 to i32* + %b = bitcast i8* %b.8 to i32* + %c = bitcast i8* %c.8 to i32* + ret void +} + +; CHECK-LABEL: constantOffsetHeuristic_i8_i8 +; CHECK: PartialAlias: i32* %a, i32* %b +; CHECK: NoAlias: i32* %a, i32* %c +; CHECK: NoAlias: i32* %b, i32* %c +define void @constantOffsetHeuristic_i8_i8(i8* %mem, i8 %val) { + %zext.plus.7 = add nsw i8 %val, 7 + %zext.plus.4 = add nsw i8 %val, 4 + %zext.val = zext i8 %val to i32 + %zext.4 = zext i8 %zext.plus.4 to i32 + %zext.7 = zext i8 %zext.plus.7 to i32 + %a.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.4 + %b.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.7 + %c.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.val + %a = bitcast i8* %a.8 to i32* + %b = bitcast i8* %b.8 to i32* + %c = bitcast i8* %c.8 to i32* + ret void +} diff --git a/test/Analysis/BasicAA/sequential-gep.ll b/test/Analysis/BasicAA/sequential-gep.ll new file mode 100644 index 000000000000..c17a782aa04b --- /dev/null +++ b/test/Analysis/BasicAA/sequential-gep.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s + +; CHECK: Function: t1 +; CHECK: NoAlias: i32* %gep1, i32* %gep2 +define void @t1([8 x i32]* %p, i32 %addend, i32* %q) { + %knownnonzero = load i32, i32* %q, !range !0 + %add = add nsw nuw i32 %addend, %knownnonzero + %gep1 = getelementptr [8 x i32], [8 x i32]* %p, i32 2, i32 %addend + %gep2 = getelementptr [8 x i32], [8 x i32]* %p, i32 2, i32 %add + ret void +} + +; CHECK: Function: t2 +; CHECK: PartialAlias: i32* %gep1, i32* %gep2 +define void @t2([8 x i32]* %p, i32 %addend, i32* %q) { + %knownnonzero = load i32, i32* %q, !range !0 + %add = add nsw nuw i32 %addend, %knownnonzero + %gep1 = getelementptr [8 x i32], [8 x i32]* %p, i32 1, i32 %addend + %gep2 = getelementptr [8 x i32], [8 x i32]* %p, i32 0, i32 %add + ret void +} + +; CHECK: Function: t3 +; CHECK: MustAlias: i32* %gep1, i32* %gep2 +define void @t3([8 x i32]* %p, i32 %addend, i32* %q) { + %knownnonzero = load i32, i32* %q, !range !0 + %add = add nsw nuw i32 %addend, %knownnonzero + %gep1 = getelementptr [8 x i32], [8 x i32]* %p, i32 0, i32 %add + %gep2 = getelementptr [8 x i32], [8 x i32]* %p, i32 0, i32 %add + ret void +} + +; CHECK: Function: t4 +; CHECK: PartialAlias: i32* %gep1, i32* %gep2 +define void @t4([8 x i32]* %p, i32 %addend, i32* %q) { + %knownnonzero = load i32, i32* %q, !range !0 + %add = add nsw nuw i32 %addend, %knownnonzero + %gep1 = getelementptr [8 x i32], [8 x i32]* %p, i32 1, i32 %addend + %gep2 = getelementptr [8 x i32], [8 x i32]* %p, i32 %add, i32 %add + ret void +} + +; CHECK: Function: t5 +; CHECK: PartialAlias: i32* %gep2, i64* %bc +define void @t5([8 x i32]* %p, i32 %addend, i32* %q) { + %knownnonzero = load i32, i32* %q, !range !0 + %add = add nsw nuw i32 %addend, %knownnonzero + %gep1 = getelementptr [8 x i32], [8 x i32]* %p, i32 2, i32 %addend + %gep2 = getelementptr [8 x i32], [8 x i32]* %p, i32 2, i32 %add + %bc = bitcast i32* %gep1 to i64* + ret void +} + +!0 = !{ i32 1, i32 5 } diff --git a/test/Analysis/BasicAA/zext.ll b/test/Analysis/BasicAA/zext.ll new file mode 100644 index 000000000000..685d45be6151 --- /dev/null +++ b/test/Analysis/BasicAA/zext.ll @@ -0,0 +1,231 @@ +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: test_with_zext +; CHECK: NoAlias: i8* %a, i8* %b + +define void @test_with_zext() { + %1 = tail call i8* @malloc(i64 120) + %a = getelementptr inbounds i8, i8* %1, i64 8 + %2 = getelementptr inbounds i8, i8* %1, i64 16 + %3 = zext i32 3 to i64 + %b = getelementptr inbounds i8, i8* %2, i64 %3 + ret void +} + +; CHECK-LABEL: test_with_lshr +; CHECK: NoAlias: i8* %a, i8* %b + +define void @test_with_lshr(i64 %i) { + %1 = tail call i8* @malloc(i64 120) + %a = getelementptr inbounds i8, i8* %1, i64 8 + %2 = getelementptr inbounds i8, i8* %1, i64 16 + %3 = lshr i64 %i, 2 + %b = getelementptr inbounds i8, i8* %2, i64 %3 + ret void +} + +; CHECK-LABEL: test_with_a_loop +; CHECK: NoAlias: i8* %a, i8* %b + +define void @test_with_a_loop(i8* %mem) { + br label %for.loop + +for.loop: + %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ] + %a = getelementptr inbounds i8, i8* %mem, i64 8 + %a.plus1 = getelementptr inbounds i8, i8* %mem, i64 16 + %i.64 = zext i32 %i to i64 + %b = getelementptr inbounds i8, i8* %a.plus1, i64 %i.64 + %i.plus1 = add nuw nsw i32 %i, 1 + %cmp = icmp eq i32 %i.plus1, 10 + br i1 %cmp, label %for.loop.exit, label %for.loop + +for.loop.exit: + ret void +} + +; CHECK-LABEL: test_with_varying_base_pointer_in_loop +; CHECK: NoAlias: i8* %a, i8* %b + +define void @test_with_varying_base_pointer_in_loop(i8* %mem.orig) { + br label %for.loop + +for.loop: + %mem = phi i8* [ %mem.orig, %0 ], [ %mem.plus1, %for.loop ] + %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ] + %a = getelementptr inbounds i8, i8* %mem, i64 8 + %a.plus1 = getelementptr inbounds i8, i8* %mem, i64 16 + %i.64 = zext i32 %i to i64 + %b = getelementptr inbounds i8, i8* %a.plus1, i64 %i.64 + %i.plus1 = add nuw nsw i32 %i, 1 + %mem.plus1 = getelementptr inbounds i8, i8* %mem, i64 8 + %cmp = icmp eq i32 %i.plus1, 10 + br i1 %cmp, label %for.loop.exit, label %for.loop + +for.loop.exit: + ret void +} + +; CHECK-LABEL: test_sign_extension +; CHECK: PartialAlias: i64* %b.i64, i8* %a + +define void @test_sign_extension(i32 %p) { + %1 = tail call i8* @malloc(i64 120) + %p.64 = zext i32 %p to i64 + %a = getelementptr inbounds i8, i8* %1, i64 %p.64 + %p.minus1 = add i32 %p, -1 + %p.minus1.64 = zext i32 %p.minus1 to i64 + %b.i8 = getelementptr inbounds i8, i8* %1, i64 %p.minus1.64 + %b.i64 = bitcast i8* %b.i8 to i64* + ret void +} + +; CHECK-LABEL: test_fe_tools +; CHECK: PartialAlias: i32* %a, i32* %b + +define void @test_fe_tools([8 x i32]* %values) { + br label %reorder + +for.loop: + %i = phi i32 [ 0, %reorder ], [ %i.next, %for.loop ] + %idxprom = zext i32 %i to i64 + %b = getelementptr inbounds [8 x i32], [8 x i32]* %values, i64 0, i64 %idxprom + %i.next = add nuw nsw i32 %i, 1 + %1 = icmp eq i32 %i.next, 10 + br i1 %1, label %for.loop.exit, label %for.loop + +reorder: + %a = getelementptr inbounds [8 x i32], [8 x i32]* %values, i64 0, i64 1 + br label %for.loop + +for.loop.exit: + ret void +} + +@b = global i32 0, align 4 +@d = global i32 0, align 4 + +; CHECK-LABEL: test_spec2006 +; CHECK: PartialAlias: i32** %x, i32** %y + +define void @test_spec2006() { + %h = alloca [1 x [2 x i32*]], align 16 + %d.val = load i32, i32* @d, align 4 + %d.promoted = sext i32 %d.val to i64 + %1 = icmp slt i32 %d.val, 2 + br i1 %1, label %.lr.ph, label %3 + +.lr.ph: ; preds = %0 + br label %2 + +; n zwXK+aBKwTb6!x0B6Rf^L1%FjDg|;i0#MW7I^Xclm<0i~E150yV#Z==pl~N{rQ#wd~ z{$))hg~ZzD>MJa(yD?c&^If3kBOT{W7y5gqu82(e=erNwS!FwX#pj&XHZgsOAv+G5 zTK1ZT0wB3j?K@*djNdk6eYXd%TKCN>j|p6`$CT*XAx$7gBT80|9f#26M}Y zbjM|r8;?>iXKu`@lYZM)O=^Uc-YG6Uk+d9`b-AkV~d7O|NkkKRB@p58ggY z9(<@K!iH~yo5Vi4_P1;e7!8B-RYp#K@Ci;DNgg#U%O*c8u(o`ry(6`PE7_G+>KN6h zedY?T?YG88m*%6SDPM>j3Ku?&v2Sk@f)kHqo`m3}` zFLBu)oRBKh)h+Z`6MGacYN>`csP&c0tUZlfR8_atE_&2zUDR|*mVxB8hC3H1T}8_# z=GqRMJ6O2%M?8E#Qb?Hqoscutv-oe$? zqVrK(SftJpbrsp|sonEXgiWB6Nn?2VYGm7f=gB5&Q6F7V_ZD_4P#g#>(S*jN+2}J; zn%=&wKJB@v9Ml(85t~NcGRx>YZ|h!TR571#8ZMPd)#>v^%WTT57dslWZUP-nk{sB_tG8 zeU89MBfeSDK5iPlx)hqA`%$@pgAV;YboZlJ#-v$Ns0u5^Tg`jxPhjU@)>vZ&7BEPv z_thsnJCO`iLrsHBB)S{UU}xZnyM%(AWv$<~e4>QKnP)1js>aHdC^q_PL9QM~exQSG z`PzIQ>C*_Unw~}L&uH}>0b<`=v(!a)td-|*C1e6Sr#qAb!*>5$Z2jD zJ;<6ac2Rd&EqKR%?1J`qIi8gN>$5pT#R=^qGOq+V@z`3vCZf)(s>=SgyO{_)0W(*L zF?pb9nJFWbZ5{^p<2Vxt)CZQp%=EQ+MV`VLe1en!76`xk5!D(C;$CrZx$c)#@mxw~B8h@k7Aj{(CfQ;pPo!)>nt> zI2g5lQ}QTzP1vLsk>y}r3kEyS=GS&OF09Me9&3BQ2M)L#=mCFbi!=%~wXCJuC>l)h-xo*vNN@hYU}#$JHJ@#kAI=(OWF+=1%DDGH0# zw(tF!&Hte8g4>tN*pgC|a!z5Zs~xS1oYWJ(XJz*0xyUO^8nJ;e==M;L)I>)8)(^|9 z%C~FRgPI7+OFu}J>)yxCLXRjhcNApV>U!3~1*tK`vLjBJo_eR8XldBx5W~)DUvYu- zcgR?g?_G&irmtL?5!5~d1Ne$5&W(ZAtl+I<0e3W6<@QstL&rOD z>HQP3{g$mo=RsgA1NQKAwQNSE^=kIJ4Nr;^FB%1>FB9A^G9!%nXA*8(3#e9iETQ#N zi{PI61CY4VThNZxld%H9oxVYwo=*Fm(|1plbQP!vtcqj3)bVm~>V~50k1~_E>#$m^ z=V|DQaVckACU-A>edkwLb6_AJ+*2Lh=q@se(4xmtfd7(0!`)~{rKnWyA>MrutD0EK z`t7!g+lr7`z3vK@ENo8DgDWyRwa0JSVS>ojpmm=` z%~_!AqcuNGXA6wh`cJSC=ofR1IQgrD)&+iqCmi8`lnp+ymIq3Un|FQpUvn${LY!>m znoa0e40z1BDwy(?Z9X*u6Et0sUC9NWW{-)$h!^Cjk5h( zF#{j?-&TEK(_$M6D#F(*Api4~4+Be{r+Y5eMB0BCFZ?z8j5blYb-Uo%6i9@b(IDhbo~ewY)05xoYZ(s%Z(;Gs>%T zo2zG^sQyht4ZNh+$cP_h8vx-2c9;TnFJlf|!mQpL^3Nj%?4MUQ7UG%sS`Rm4FEKcqbl_(0HBfug%sy577z0r@ zID(j35qi0_OIGd#hh;5O>!mb~-cF17UTvWPr9xjbO5mSt_Bh)2%EGnJNKO9cB!<7r zw2^rSZdfa3k3@UmQEdLbigb0wtWIKhOJeK9`IRy1vx~DPM>C3(vtmr|%+$5)NP!4iW%1Se_gYz{#A)8TB%WtQbFKaaL)NkT$=~oJjM7-gKrJ)O~iPN*XTV_x3imH2Os%{Mr;&-hKv8A(+08ZcTPkd#jpRP|Xy69*^60y9&CmE=_6y$DxQDLBq z-b;f`n_49UD-QK?%((*w+WxLSiRfgRtZr7trnKcnCOWc5I9ojxSZ9Fi)}HYe;c_eqO6EF+9%!sn3i91fk6f6vR% zzY!IjpACOlA_m(^F(4^*nn@uqLOp^mQSCxroGoBh3UP`25oh54W+-RnHA**_0zV`l zQ4BcFq!MPkn+!jt{od6!hfW_|vjU`k97+q_vM^EyaZ%x*T5r8zr)8 zySf~575zjD-%v-1*ci?7Z5Y0iWDJLih3y2@6yQ^HWUu77chA7;($%$tL%|oHzIOK` z6mmy^4EBHHp%!^e>hcFh!p^C~X~LHaza+umn+dDwBL^```>RM4{Yc;G26jcjnHl6A zYGE%!<6JZVbKpDX3&>pc@F(%ZiE`!z4}LhYmHCSY=Qj@S!;UCVQ}_|(ZrN~!aRUim zHdpZ9M&zXhYJ*VeLKO>vQwlxlW&&)HMPb#w7us(5PSymgl|mvyc1t&qMY*#araJ}Y zvRRHI^fJ(ny@^Q*R{Qy3uCy4otTcGc<$85@eV0T|mcxem2GUevV2jf9kb@{Y`duX^ zJ6cq|gU~XNC{TOVGY0KeXJCkOln%jfgNyS3df;L%L}O(J7U!(+OD6o9iGIgel^P1} zd%U;jw@dS>71eXJTN*OjUj9qIb_x7Uii(Q{;D72poF`$iYcj!(N$+FqA9 zz#L!*FsN+BWP{2fHGCw_fR6|B;bV6hd<<8^hq4YnmTiHLy?*!@=|+@PQOGW5z935n zJbtZmP}yx&6gq11zHn5X^!Ojj8dU!Nc}1b~_xWFRx=wri2l57$hb}4#J%d!>v&w7$&T9cj{v%U~0b5rRs!&2@ zs4K}Y!?`4kS;{GF?$kys9>dGd=9=rwE z2ddV1_3*AU_L!64b96svQ4%Imj#e!C2J&ktw;h)R<(^Pz4@Ck`QfT#{8fM1?@S;TD z4qKc_5_E7B +@const.struct = constant %const.struct.type { i32 -1, i8 undef } +; CHECK: @const.struct = constant %const.struct.type { i32 -1, i8 undef } +@const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }> +; CHECK: @const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }> +@const.array = constant [2 x i32] [i32 -3, i32 -4] +; CHECK: @const.array = constant [2 x i32] [i32 -3, i32 -4] +@const.vector = constant <2 x i32> +; CHECK: @const.vector = constant <2 x i32> + +;; Global Variables +; Format: [@ =] [Linkage] [Visibility] [DLLStorageClass] +; [ThreadLocal] [unnamed_addr] [AddrSpace] [ExternallyInitialized] +; [] +; [, section "name"] [, comdat [($name)]] [, align ] + +; Global Variables -- Simple +@g1 = global i32 0 +; CHECK: @g1 = global i32 0 +@g2 = constant i32 0 +; CHECK: @g2 = constant i32 0 + +; Global Variables -- Linkage +@g.private = private global i32 0 +; CHECK: @g.private = private global i32 0 +@g.internal = internal global i32 0 +; CHECK: @g.internal = internal global i32 0 +@g.available_externally = available_externally global i32 0 +; CHECK: @g.available_externally = available_externally global i32 0 +@g.linkonce = linkonce global i32 0 +; CHECK: @g.linkonce = linkonce global i32 0 +@g.weak = weak global i32 0 +; CHECK: @g.weak = weak global i32 0 +@g.common = common global i32 0 +; CHECK: @g.common = common global i32 0 +@g.appending = appending global [4 x i8] c"test" +; CHECK: @g.appending = appending global [4 x i8] c"test" +@g.extern_weak = extern_weak global i32 +; CHECK: @g.extern_weak = extern_weak global i32 +@g.linkonce_odr = linkonce_odr global i32 0 +; CHECK: @g.linkonce_odr = linkonce_odr global i32 0 +@g.weak_odr = weak_odr global i32 0 +; CHECK: @g.weak_odr = weak_odr global i32 0 +@g.external = external global i32 +; CHECK: @g.external = external global i32 + +; Global Variables -- Visibility +@g.default = default global i32 0 +; CHECK: @g.default = global i32 0 +@g.hidden = hidden global i32 0 +; CHECK: @g.hidden = hidden global i32 0 +@g.protected = protected global i32 0 +; CHECK: @g.protected = protected global i32 0 + +; Global Variables -- DLLStorageClass +@g.dlldefault = default global i32 0 +; CHECK: @g.dlldefault = global i32 0 +@g.dllimport = external dllimport global i32 +; CHECK: @g.dllimport = external dllimport global i32 +@g.dllexport = dllexport global i32 0 +; CHECK: @g.dllexport = dllexport global i32 0 + +; Global Variables -- ThreadLocal +@g.notthreadlocal = global i32 0 +; CHECK: @g.notthreadlocal = global i32 0 +@g.generaldynamic = thread_local global i32 0 +; CHECK: @g.generaldynamic = thread_local global i32 0 +@g.localdynamic = thread_local(localdynamic) global i32 0 +; CHECK: @g.localdynamic = thread_local(localdynamic) global i32 0 +@g.initialexec = thread_local(initialexec) global i32 0 +; CHECK: @g.initialexec = thread_local(initialexec) global i32 0 +@g.localexec = thread_local(localexec) global i32 0 +; CHECK: @g.localexec = thread_local(localexec) global i32 0 + +; Global Variables -- unnamed_addr +@g.unnamed_addr = unnamed_addr global i32 0 +; CHECK: @g.unnamed_addr = unnamed_addr global i32 0 + +; Global Variables -- AddrSpace +@g.addrspace = addrspace(1) global i32 0 +; CHECK: @g.addrspace = addrspace(1) global i32 0 + +; Global Variables -- ExternallyInitialized +@g.externally_initialized = external externally_initialized global i32 +; CHECK: @g.externally_initialized = external externally_initialized global i32 + +; Global Variables -- section +@g.section = global i32 0, section "_DATA" +; CHECK: @g.section = global i32 0, section "_DATA" + +; Global Variables -- comdat +@comdat.any = global i32 0, comdat +; CHECK: @comdat.any = global i32 0, comdat +@comdat.exactmatch = global i32 0, comdat +; CHECK: @comdat.exactmatch = global i32 0, comdat +@comdat.largest = global i32 0, comdat +; CHECK: @comdat.largest = global i32 0, comdat +@comdat.noduplicates = global i32 0, comdat +; CHECK: @comdat.noduplicates = global i32 0, comdat +@comdat.samesize = global i32 0, comdat +; CHECK: @comdat.samesize = global i32 0, comdat + +; Force two globals from different comdats into sections with the same name. +$comdat1 = comdat any +$comdat2 = comdat any +@g.comdat1 = global i32 0, section "SharedSection", comdat($comdat1) +; CHECK: @g.comdat1 = global i32 0, section "SharedSection", comdat($comdat1) +@g.comdat2 = global i32 0, section "SharedSection", comdat($comdat2) +; CHECK: @g.comdat2 = global i32 0, section "SharedSection", comdat($comdat2) + +; Global Variables -- align +@g.align = global i32 0, align 4 +; CHECK: @g.align = global i32 0, align 4 + +; Global Variables -- Intrinsics +%pri.func.data = type { i32, void ()*, i8* } +@g.used1 = global i32 0 +@g.used2 = global i32 0 +@g.used3 = global i8 0 +declare void @g.f1() +@llvm.used = appending global [1 x i32*] [i32* @g.used1], section "llvm.metadata" +; CHECK: @llvm.used = appending global [1 x i32*] [i32* @g.used1], section "llvm.metadata" +@llvm.compiler.used = appending global [1 x i32*] [i32* @g.used2], section "llvm.metadata" +; CHECK: @llvm.compiler.used = appending global [1 x i32*] [i32* @g.used2], section "llvm.metadata" +@llvm.global_ctors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata" +; CHECK: @llvm.global_ctors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata" +@llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata" +; CHECK: @llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata" + +;; Aliases +; Format: @ = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal] +; [unnamed_addr] alias @ + +; Aliases -- Linkage +@a.private = private alias i32* @g.private +; CHECK: @a.private = private alias i32, i32* @g.private +@a.internal = internal alias i32* @g.internal +; CHECK: @a.internal = internal alias i32, i32* @g.internal +@a.linkonce = linkonce alias i32* @g.linkonce +; CHECK: @a.linkonce = linkonce alias i32, i32* @g.linkonce +@a.weak = weak alias i32* @g.weak +; CHECK: @a.weak = weak alias i32, i32* @g.weak +@a.linkonce_odr = linkonce_odr alias i32* @g.linkonce_odr +; CHECK: @a.linkonce_odr = linkonce_odr alias i32, i32* @g.linkonce_odr +@a.weak_odr = weak_odr alias i32* @g.weak_odr +; CHECK: @a.weak_odr = weak_odr alias i32, i32* @g.weak_odr +@a.external = external alias i32* @g1 +; CHECK: @a.external = alias i32, i32* @g1 + +; Aliases -- Visibility +@a.default = default alias i32* @g.default +; CHECK: @a.default = alias i32, i32* @g.default +@a.hidden = hidden alias i32* @g.hidden +; CHECK: @a.hidden = hidden alias i32, i32* @g.hidden +@a.protected = protected alias i32* @g.protected +; CHECK: @a.protected = protected alias i32, i32* @g.protected + +; Aliases -- DLLStorageClass +@a.dlldefault = default alias i32* @g.dlldefault +; CHECK: @a.dlldefault = alias i32, i32* @g.dlldefault +@a.dllimport = dllimport alias i32* @g1 +; CHECK: @a.dllimport = dllimport alias i32, i32* @g1 +@a.dllexport = dllexport alias i32* @g.dllexport +; CHECK: @a.dllexport = dllexport alias i32, i32* @g.dllexport + +; Aliases -- ThreadLocal +@a.notthreadlocal = alias i32* @g.notthreadlocal +; CHECK: @a.notthreadlocal = alias i32, i32* @g.notthreadlocal +@a.generaldynamic = thread_local alias i32* @g.generaldynamic +; CHECK: @a.generaldynamic = thread_local alias i32, i32* @g.generaldynamic +@a.localdynamic = thread_local(localdynamic) alias i32* @g.localdynamic +; CHECK: @a.localdynamic = thread_local(localdynamic) alias i32, i32* @g.localdynamic +@a.initialexec = thread_local(initialexec) alias i32* @g.initialexec +; CHECK: @a.initialexec = thread_local(initialexec) alias i32, i32* @g.initialexec +@a.localexec = thread_local(localexec) alias i32* @g.localexec +; CHECK: @a.localexec = thread_local(localexec) alias i32, i32* @g.localexec + +; Aliases -- unnamed_addr +@a.unnamed_addr = unnamed_addr alias i32* @g.unnamed_addr +; CHECK: @a.unnamed_addr = unnamed_addr alias i32, i32* @g.unnamed_addr + +;; Functions +; Format: define [linkage] [visibility] [DLLStorageClass] +; [cconv] [ret attrs] +; @ ([argument list]) +; [unnamed_addr] [fn Attrs] [section "name"] [comdat [($name)]] +; [align N] [gc] [prefix Constant] [prologue Constant] +; [personality Constant] { ... } + +; Functions -- Simple +declare void @f1 () +; CHECK: declare void @f1() + +define void @f2 () { +; CHECK: define void @f2() +entry: + ret void +} + +; Functions -- linkage +define private void @f.private() { +; CHECK: define private void @f.private() +entry: + ret void +} +define internal void @f.internal() { +; CHECK: define internal void @f.internal() +entry: + ret void +} +define available_externally void @f.available_externally() { +; CHECK: define available_externally void @f.available_externally() +entry: + ret void +} +define linkonce void @f.linkonce() { +; CHECK: define linkonce void @f.linkonce() +entry: + ret void +} +define weak void @f.weak() { +; CHECK: define weak void @f.weak() +entry: + ret void +} +define linkonce_odr void @f.linkonce_odr() { +; CHECK: define linkonce_odr void @f.linkonce_odr() +entry: + ret void +} +define weak_odr void @f.weak_odr() { +; CHECK: define weak_odr void @f.weak_odr() +entry: + ret void +} +declare external void @f.external() +; CHECK: declare void @f.external() +declare extern_weak void @f.extern_weak() +; CHECK: declare extern_weak void @f.extern_weak() + +; Functions -- visibility +declare default void @f.default() +; CHECK: declare void @f.default() +declare hidden void @f.hidden() +; CHECK: declare hidden void @f.hidden() +declare protected void @f.protected() +; CHECK: declare protected void @f.protected() + +; Functions -- DLLStorageClass +declare dllimport void @f.dllimport() +; CHECK: declare dllimport void @f.dllimport() +declare dllexport void @f.dllexport() +; CHECK: declare dllexport void @f.dllexport() + +; Functions -- cconv (Calling conventions) +declare ccc void @f.ccc() +; CHECK: declare void @f.ccc() +declare fastcc void @f.fastcc() +; CHECK: declare fastcc void @f.fastcc() +declare coldcc void @f.coldcc() +; CHECK: declare coldcc void @f.coldcc() +declare cc10 void @f.cc10() +; CHECK: declare ghccc void @f.cc10() +declare ghccc void @f.ghccc() +; CHECK: declare ghccc void @f.ghccc() +declare cc11 void @f.cc11() +; CHECK: declare cc11 void @f.cc11() +declare webkit_jscc void @f.webkit_jscc() +; CHECK: declare webkit_jscc void @f.webkit_jscc() +declare anyregcc void @f.anyregcc() +; CHECK: declare anyregcc void @f.anyregcc() +declare preserve_mostcc void @f.preserve_mostcc() +; CHECK: declare preserve_mostcc void @f.preserve_mostcc() +declare preserve_allcc void @f.preserve_allcc() +; CHECK: declare preserve_allcc void @f.preserve_allcc() +declare cc64 void @f.cc64() +; CHECK: declare x86_stdcallcc void @f.cc64() +declare x86_stdcallcc void @f.x86_stdcallcc() +; CHECK: declare x86_stdcallcc void @f.x86_stdcallcc() +declare cc65 void @f.cc65() +; CHECK: declare x86_fastcallcc void @f.cc65() +declare x86_fastcallcc void @f.x86_fastcallcc() +; CHECK: declare x86_fastcallcc void @f.x86_fastcallcc() +declare cc66 void @f.cc66() +; CHECK: declare arm_apcscc void @f.cc66() +declare arm_apcscc void @f.arm_apcscc() +; CHECK: declare arm_apcscc void @f.arm_apcscc() +declare cc67 void @f.cc67() +; CHECK: declare arm_aapcscc void @f.cc67() +declare arm_aapcscc void @f.arm_aapcscc() +; CHECK: declare arm_aapcscc void @f.arm_aapcscc() +declare cc68 void @f.cc68() +; CHECK: declare arm_aapcs_vfpcc void @f.cc68() +declare arm_aapcs_vfpcc void @f.arm_aapcs_vfpcc() +; CHECK: declare arm_aapcs_vfpcc void @f.arm_aapcs_vfpcc() +declare cc69 void @f.cc69() +; CHECK: declare msp430_intrcc void @f.cc69() +declare msp430_intrcc void @f.msp430_intrcc() +; CHECK: declare msp430_intrcc void @f.msp430_intrcc() +declare cc70 void @f.cc70() +; CHECK: declare x86_thiscallcc void @f.cc70() +declare x86_thiscallcc void @f.x86_thiscallcc() +; CHECK: declare x86_thiscallcc void @f.x86_thiscallcc() +declare cc71 void @f.cc71() +; CHECK: declare ptx_kernel void @f.cc71() +declare ptx_kernel void @f.ptx_kernel() +; CHECK: declare ptx_kernel void @f.ptx_kernel() +declare cc72 void @f.cc72() +; CHECK: declare ptx_device void @f.cc72() +declare ptx_device void @f.ptx_device() +; CHECK: declare ptx_device void @f.ptx_device() +declare cc75 void @f.cc75() +; CHECK: declare spir_func void @f.cc75() +declare spir_func void @f.spir_func() +; CHECK: declare spir_func void @f.spir_func() +declare cc76 void @f.cc76() +; CHECK: declare spir_kernel void @f.cc76() +declare spir_kernel void @f.spir_kernel() +; CHECK: declare spir_kernel void @f.spir_kernel() +declare cc77 void @f.cc77() +; CHECK: declare intel_ocl_bicc void @f.cc77() +declare intel_ocl_bicc void @f.intel_ocl_bicc() +; CHECK: declare intel_ocl_bicc void @f.intel_ocl_bicc() +declare cc78 void @f.cc78() +; CHECK: declare x86_64_sysvcc void @f.cc78() +declare x86_64_sysvcc void @f.x86_64_sysvcc() +; CHECK: declare x86_64_sysvcc void @f.x86_64_sysvcc() +declare cc79 void @f.cc79() +; CHECK: declare x86_64_win64cc void @f.cc79() +declare x86_64_win64cc void @f.x86_64_win64cc() +; CHECK: declare x86_64_win64cc void @f.x86_64_win64cc() +declare cc80 void @f.cc80() +; CHECK: declare x86_vectorcallcc void @f.cc80() +declare x86_vectorcallcc void @f.x86_vectorcallcc() +; CHECK: declare x86_vectorcallcc void @f.x86_vectorcallcc() +declare cc1023 void @f.cc1023() +; CHECK: declare cc1023 void @f.cc1023() + +; Functions -- ret attrs (Return attributes) +declare zeroext i64 @f.zeroext() +; CHECK: declare zeroext i64 @f.zeroext() +declare signext i64 @f.signext() +; CHECK: declare signext i64 @f.signext() +declare inreg i32* @f.inreg() +; CHECK: declare inreg i32* @f.inreg() +declare noalias i32* @f.noalias() +; CHECK: declare noalias i32* @f.noalias() +declare nonnull i32* @f.nonnull() +; CHECK: declare nonnull i32* @f.nonnull() +declare dereferenceable(4) i32* @f.dereferenceable4() +; CHECK: declare dereferenceable(4) i32* @f.dereferenceable4() +declare dereferenceable(8) i32* @f.dereferenceable8() +; CHECK: declare dereferenceable(8) i32* @f.dereferenceable8() +declare dereferenceable(16) i32* @f.dereferenceable16() +; CHECK: declare dereferenceable(16) i32* @f.dereferenceable16() +declare dereferenceable_or_null(4) i32* @f.dereferenceable4_or_null() +; CHECK: declare dereferenceable_or_null(4) i32* @f.dereferenceable4_or_null() +declare dereferenceable_or_null(8) i32* @f.dereferenceable8_or_null() +; CHECK: declare dereferenceable_or_null(8) i32* @f.dereferenceable8_or_null() +declare dereferenceable_or_null(16) i32* @f.dereferenceable16_or_null() +; CHECK: declare dereferenceable_or_null(16) i32* @f.dereferenceable16_or_null() + +; Functions -- Parameter attributes +declare void @f.param.zeroext(i8 zeroext) +; CHECK: declare void @f.param.zeroext(i8 zeroext) +declare void @f.param.signext(i8 signext) +; CHECK: declare void @f.param.signext(i8 signext) +declare void @f.param.inreg(i8 inreg) +; CHECK: declare void @f.param.inreg(i8 inreg) +declare void @f.param.byval({ i8, i8 }* byval) +; CHECK: declare void @f.param.byval({ i8, i8 }* byval) +declare void @f.param.inalloca(i8* inalloca) +; CHECK: declare void @f.param.inalloca(i8* inalloca) +declare void @f.param.sret(i8* sret) +; CHECK: declare void @f.param.sret(i8* sret) +declare void @f.param.noalias(i8* noalias) +; CHECK: declare void @f.param.noalias(i8* noalias) +declare void @f.param.nocapture(i8* nocapture) +; CHECK: declare void @f.param.nocapture(i8* nocapture) +declare void @f.param.nest(i8* nest) +; CHECK: declare void @f.param.nest(i8* nest) +declare i8* @f.param.returned(i8* returned) +; CHECK: declare i8* @f.param.returned(i8* returned) +declare void @f.param.nonnull(i8* nonnull) +; CHECK: declare void @f.param.nonnull(i8* nonnull) +declare void @f.param.dereferenceable(i8* dereferenceable(4)) +; CHECK: declare void @f.param.dereferenceable(i8* dereferenceable(4)) +declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4)) +; CHECK: declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4)) + +; Functions -- unnamed_addr +declare void @f.unnamed_addr() unnamed_addr +; CHECK: declare void @f.unnamed_addr() unnamed_addr + +; Functions -- fn Attrs (Function attributes) +declare void @f.alignstack4() alignstack(4) +; CHECK: declare void @f.alignstack4() #0 +declare void @f.alignstack8() alignstack(8) +; CHECK: declare void @f.alignstack8() #1 +declare void @f.alwaysinline() alwaysinline +; CHECK: declare void @f.alwaysinline() #2 +declare void @f.cold() cold +; CHECK: declare void @f.cold() #3 +declare void @f.convergent() convergent +; CHECK: declare void @f.convergent() #4 +declare void @f.inlinehint() inlinehint +; CHECK: declare void @f.inlinehint() #5 +declare void @f.jumptable() unnamed_addr jumptable +; CHECK: declare void @f.jumptable() unnamed_addr #6 +declare void @f.minsize() minsize +; CHECK: declare void @f.minsize() #7 +declare void @f.naked() naked +; CHECK: declare void @f.naked() #8 +declare void @f.nobuiltin() nobuiltin +; CHECK: declare void @f.nobuiltin() #9 +declare void @f.noduplicate() noduplicate +; CHECK: declare void @f.noduplicate() #10 +declare void @f.noimplicitfloat() noimplicitfloat +; CHECK: declare void @f.noimplicitfloat() #11 +declare void @f.noinline() noinline +; CHECK: declare void @f.noinline() #12 +declare void @f.nonlazybind() nonlazybind +; CHECK: declare void @f.nonlazybind() #13 +declare void @f.noredzone() noredzone +; CHECK: declare void @f.noredzone() #14 +declare void @f.noreturn() noreturn +; CHECK: declare void @f.noreturn() #15 +declare void @f.nounwind() nounwind +; CHECK: declare void @f.nounwind() #16 +declare void @f.optnone() noinline optnone +; CHECK: declare void @f.optnone() #17 +declare void @f.optsize() optsize +; CHECK: declare void @f.optsize() #18 +declare void @f.readnone() readnone +; CHECK: declare void @f.readnone() #19 +declare void @f.readonly() readonly +; CHECK: declare void @f.readonly() #20 +declare void @f.returns_twice() returns_twice +; CHECK: declare void @f.returns_twice() #21 +declare void @f.safestack() safestack +; CHECK: declare void @f.safestack() #22 +declare void @f.sanitize_address() sanitize_address +; CHECK: declare void @f.sanitize_address() #23 +declare void @f.sanitize_memory() sanitize_memory +; CHECK: declare void @f.sanitize_memory() #24 +declare void @f.sanitize_thread() sanitize_thread +; CHECK: declare void @f.sanitize_thread() #25 +declare void @f.ssp() ssp +; CHECK: declare void @f.ssp() #26 +declare void @f.sspreq() sspreq +; CHECK: declare void @f.sspreq() #27 +declare void @f.sspstrong() sspstrong +; CHECK: declare void @f.sspstrong() #28 +declare void @f.thunk() "thunk" +; CHECK: declare void @f.thunk() #29 +declare void @f.uwtable() uwtable +; CHECK: declare void @f.uwtable() #30 +declare void @f.kvpair() "cpu"="cortex-a8" +; CHECK:declare void @f.kvpair() #31 + +; Functions -- section +declare void @f.section() section "80" +; CHECK: declare void @f.section() section "80" + +; Functions -- comdat +define void @f.comdat_any() comdat($comdat.any) { +; CHECK: define void @f.comdat_any() comdat($comdat.any) +entry: + ret void +} +define void @f.comdat_exactmatch() comdat($comdat.exactmatch) { +; CHECK: define void @f.comdat_exactmatch() comdat($comdat.exactmatch) +entry: + ret void +} +define void @f.comdat_largest() comdat($comdat.largest) { +; CHECK: define void @f.comdat_largest() comdat($comdat.largest) +entry: + ret void +} +define void @f.comdat_noduplicates() comdat($comdat.noduplicates) { +; CHECK: define void @f.comdat_noduplicates() comdat($comdat.noduplicates) +entry: + ret void +} +define void @f.comdat_samesize() comdat($comdat.samesize) { +; CHECK: define void @f.comdat_samesize() comdat($comdat.samesize) +entry: + ret void +} + +; Functions -- align +declare void @f.align2() align 2 +; CHECK: declare void @f.align2() align 2 +declare void @f.align4() align 4 +; CHECK: declare void @f.align4() align 4 +declare void @f.align8() align 8 +; CHECK: declare void @f.align8() align 8 + +; Functions -- GC +declare void @f.gcshadow() gc "shadow-stack" +; CHECK: declare void @f.gcshadow() gc "shadow-stack" + +; Functions -- Prefix data +declare void @f.prefixi32() prefix i32 1684365668 +; CHECK: declare void @f.prefixi32() prefix i32 1684365668 +declare void @f.prefixarray() prefix [4 x i32] [i32 0, i32 1, i32 2, i32 3] +; CHECK: declare void @f.prefixarray() prefix [4 x i32] [i32 0, i32 1, i32 2, i32 3] + +; Functions -- Prologue data +declare void @f.prologuei32() prologue i32 1684365669 +; CHECK: declare void @f.prologuei32() prologue i32 1684365669 +declare void @f.prologuearray() prologue [4 x i32] [i32 0, i32 1, i32 2, i32 3] +; CHECK: declare void @f.prologuearray() prologue [4 x i32] [i32 0, i32 1, i32 2, i32 3] + +; Functions -- Personality constant +declare void @llvm.donothing() nounwind readnone +; CHECK: declare void @llvm.donothing() #32 +define void @f.no_personality() personality i8 3 { +; CHECK: define void @f.no_personality() personality i8 3 + invoke void @llvm.donothing() to label %normal unwind label %exception +exception: + %cleanup = landingpad i8 cleanup + br label %normal +normal: + ret void +} + +declare i32 @f.personality_handler() +; CHECK: declare i32 @f.personality_handler() +define void @f.personality() personality i32 ()* @f.personality_handler { +; CHECK: define void @f.personality() personality i32 ()* @f.personality_handler + invoke void @llvm.donothing() to label %normal unwind label %exception +exception: + %cleanup = landingpad i32 cleanup + br label %normal +normal: + ret void +} + +;; Atomic Memory Ordering Constraints +define void @atomics(i32* %word) { + %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 4 monotonic monotonic + ; CHECK: %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 4 monotonic monotonic + %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 5 acq_rel monotonic + ; CHECK: %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 5 acq_rel monotonic + %cmpxchg.2 = cmpxchg i32* %word, i32 0, i32 6 acquire monotonic + ; CHECK: %cmpxchg.2 = cmpxchg i32* %word, i32 0, i32 6 acquire monotonic + %cmpxchg.3 = cmpxchg i32* %word, i32 0, i32 7 release monotonic + ; CHECK: %cmpxchg.3 = cmpxchg i32* %word, i32 0, i32 7 release monotonic + %cmpxchg.4 = cmpxchg i32* %word, i32 0, i32 8 seq_cst monotonic + ; CHECK: %cmpxchg.4 = cmpxchg i32* %word, i32 0, i32 8 seq_cst monotonic + %cmpxchg.5 = cmpxchg weak i32* %word, i32 0, i32 9 seq_cst monotonic + ; CHECK: %cmpxchg.5 = cmpxchg weak i32* %word, i32 0, i32 9 seq_cst monotonic + %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic + ; CHECK: %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic + %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 singlethread seq_cst monotonic + ; CHECK: %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 singlethread seq_cst monotonic + %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic + ; CHECK: %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic + %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic + ; CHECK: %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic + %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic + ; CHECK: %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic + %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic + ; CHECK: %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic + %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic + ; CHECK: %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic + %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic + ; CHECK: %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic + %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic + ; CHECK: %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic + %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic + ; CHECK: %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic + %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic + ; CHECK: %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic + %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 singlethread monotonic + ; CHECK: %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 singlethread monotonic + %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 singlethread monotonic + ; CHECK: %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 singlethread monotonic + fence acquire + ; CHECK: fence acquire + fence release + ; CHECK: fence release + fence acq_rel + ; CHECK: fence acq_rel + fence singlethread seq_cst + ; CHECK: fence singlethread seq_cst + + %ld.1 = load atomic i32, i32* %word monotonic, align 4 + ; CHECK: %ld.1 = load atomic i32, i32* %word monotonic, align 4 + %ld.2 = load atomic volatile i32, i32* %word acquire, align 8 + ; CHECK: %ld.2 = load atomic volatile i32, i32* %word acquire, align 8 + %ld.3 = load atomic volatile i32, i32* %word singlethread seq_cst, align 16 + ; CHECK: %ld.3 = load atomic volatile i32, i32* %word singlethread seq_cst, align 16 + + store atomic i32 23, i32* %word monotonic, align 4 + ; CHECK: store atomic i32 23, i32* %word monotonic, align 4 + store atomic volatile i32 24, i32* %word monotonic, align 4 + ; CHECK: store atomic volatile i32 24, i32* %word monotonic, align 4 + store atomic volatile i32 25, i32* %word singlethread monotonic, align 4 + ; CHECK: store atomic volatile i32 25, i32* %word singlethread monotonic, align 4 + ret void +} + +;; Fast Math Flags +define void @fastmathflags(float %op1, float %op2) { + %f.nnan = fadd nnan float %op1, %op2 + ; CHECK: %f.nnan = fadd nnan float %op1, %op2 + %f.ninf = fadd ninf float %op1, %op2 + ; CHECK: %f.ninf = fadd ninf float %op1, %op2 + %f.nsz = fadd nsz float %op1, %op2 + ; CHECK: %f.nsz = fadd nsz float %op1, %op2 + %f.arcp = fadd arcp float %op1, %op2 + ; CHECK: %f.arcp = fadd arcp float %op1, %op2 + %f.fast = fadd fast float %op1, %op2 + ; CHECK: %f.fast = fadd fast float %op1, %op2 + ret void +} + +;; Type System +%opaquety = type opaque +define void @typesystem() { + %p0 = bitcast i8* null to i32 (i32)* + ; CHECK: %p0 = bitcast i8* null to i32 (i32)* + %p1 = bitcast i8* null to void (i8*)* + ; CHECK: %p1 = bitcast i8* null to void (i8*)* + %p2 = bitcast i8* null to i32 (i8*, ...)* + ; CHECK: %p2 = bitcast i8* null to i32 (i8*, ...)* + %p3 = bitcast i8* null to { i32, i8 } (i8*, ...)* + ; CHECK: %p3 = bitcast i8* null to { i32, i8 } (i8*, ...)* + %p4 = bitcast i8* null to <{ i32, i8 }> (i8*, ...)* + ; CHECK: %p4 = bitcast i8* null to <{ i32, i8 }> (i8*, ...)* + %p5 = bitcast i8* null to <{ i32, i8 }> (<{ i8*, i64 }>*, ...)* + ; CHECK: %p5 = bitcast i8* null to <{ i32, i8 }> (<{ i8*, i64 }>*, ...)* + + %t0 = alloca i1942652 + ; CHECK: %t0 = alloca i1942652 + %t1 = alloca half + ; CHECK: %t1 = alloca half + %t2 = alloca float + ; CHECK: %t2 = alloca float + %t3 = alloca double + ; CHECK: %t3 = alloca double + %t4 = alloca fp128 + ; CHECK: %t4 = alloca fp128 + %t5 = alloca x86_fp80 + ; CHECK: %t5 = alloca x86_fp80 + %t6 = alloca ppc_fp128 + ; CHECK: %t6 = alloca ppc_fp128 + %t7 = alloca x86_mmx + ; CHECK: %t7 = alloca x86_mmx + %t8 = alloca %opaquety* + ; CHECK: %t8 = alloca %opaquety* + + ret void +} + +;; Inline Assembler Expressions +define void @inlineasm(i32 %arg) { + call i32 asm "bswap $0", "=r,r"(i32 %arg) + ; CHECK: call i32 asm "bswap $0", "=r,r"(i32 %arg) + call i32 asm sideeffect "blt $1, $2, $3", "=r,r,rm"(i32 %arg, i32 %arg) + ; CHECK: call i32 asm sideeffect "blt $1, $2, $3", "=r,r,rm"(i32 %arg, i32 %arg) + ret void +} + +;; Instructions + +; Instructions -- Terminators +define void @instructions.terminators(i8 %val) personality i32 -10 { + br i1 false, label %iftrue, label %iffalse + ; CHECK: br i1 false, label %iftrue, label %iffalse + br label %iftrue + ; CHECK: br label %iftrue +iftrue: + ret void + ; CHECK: ret void +iffalse: + + switch i8 %val, label %defaultdest [ + ; CHECK: switch i8 %val, label %defaultdest [ + i8 0, label %defaultdest.0 + ; CHECK: i8 0, label %defaultdest.0 + i8 1, label %defaultdest.1 + ; CHECK: i8 1, label %defaultdest.1 + i8 2, label %defaultdest.2 + ; CHECK: i8 2, label %defaultdest.2 + ] + ; CHECK: ] +defaultdest: + ret void +defaultdest.0: + ret void +defaultdest.1: + ret void +defaultdest.2: + + indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2] + ; CHECK: indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2] + indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2, label %defaultdest.2] + ; CHECK: indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2, label %defaultdest.2] + + invoke fastcc void @f.fastcc() + ; CHECK: invoke fastcc void @f.fastcc() + to label %defaultdest unwind label %exc + ; CHECK: to label %defaultdest unwind label %exc +exc: + %cleanup = landingpad i32 cleanup + + resume i32 undef + ; CHECK: resume i32 undef + unreachable + ; CHECK: unreachable + + ret void +} + +; Instructions -- Binary Operations +define void @instructions.binops(i8 %op1, i8 %op2) { + ; nuw x nsw + add i8 %op1, %op2 + ; CHECK: add i8 %op1, %op2 + add nuw i8 %op1, %op2 + ; CHECK: add nuw i8 %op1, %op2 + add nsw i8 %op1, %op2 + ; CHECK: add nsw i8 %op1, %op2 + add nuw nsw i8 %op1, %op2 + ; CHECK: add nuw nsw i8 %op1, %op2 + sub i8 %op1, %op2 + ; CHECK: sub i8 %op1, %op2 + sub nuw i8 %op1, %op2 + ; CHECK: sub nuw i8 %op1, %op2 + sub nsw i8 %op1, %op2 + ; CHECK: sub nsw i8 %op1, %op2 + sub nuw nsw i8 %op1, %op2 + ; CHECK: sub nuw nsw i8 %op1, %op2 + mul i8 %op1, %op2 + ; CHECK: mul i8 %op1, %op2 + mul nuw i8 %op1, %op2 + ; CHECK: mul nuw i8 %op1, %op2 + mul nsw i8 %op1, %op2 + ; CHECK: mul nsw i8 %op1, %op2 + mul nuw nsw i8 %op1, %op2 + ; CHECK: mul nuw nsw i8 %op1, %op2 + + ; exact + udiv i8 %op1, %op2 + ; CHECK: udiv i8 %op1, %op2 + udiv exact i8 %op1, %op2 + ; CHECK: udiv exact i8 %op1, %op2 + sdiv i8 %op1, %op2 + ; CHECK: sdiv i8 %op1, %op2 + sdiv exact i8 %op1, %op2 + ; CHECK: sdiv exact i8 %op1, %op2 + + ; none + urem i8 %op1, %op2 + ; CHECK: urem i8 %op1, %op2 + srem i8 %op1, %op2 + ; CHECK: srem i8 %op1, %op2 + + ret void +} + +; Instructions -- Bitwise Binary Operations +define void @instructions.bitwise_binops(i8 %op1, i8 %op2) { + ; nuw x nsw + shl i8 %op1, %op2 + ; CHECK: shl i8 %op1, %op2 + shl nuw i8 %op1, %op2 + ; CHECK: shl nuw i8 %op1, %op2 + shl nsw i8 %op1, %op2 + ; CHECK: shl nsw i8 %op1, %op2 + shl nuw nsw i8 %op1, %op2 + ; CHECK: shl nuw nsw i8 %op1, %op2 + + ; exact + lshr i8 %op1, %op2 + ; CHECK: lshr i8 %op1, %op2 + lshr exact i8 %op1, %op2 + ; CHECK: lshr exact i8 %op1, %op2 + ashr i8 %op1, %op2 + ; CHECK: ashr i8 %op1, %op2 + ashr exact i8 %op1, %op2 + ; CHECK: ashr exact i8 %op1, %op2 + + ; none + and i8 %op1, %op2 + ; CHECK: and i8 %op1, %op2 + or i8 %op1, %op2 + ; CHECK: or i8 %op1, %op2 + xor i8 %op1, %op2 + ; CHECK: xor i8 %op1, %op2 + + ret void +} + +; Instructions -- Vector Operations +define void @instructions.vectorops(<4 x float> %vec, <4 x float> %vec2) { + extractelement <4 x float> %vec, i8 0 + ; CHECK: extractelement <4 x float> %vec, i8 0 + insertelement <4 x float> %vec, float 3.500000e+00, i8 0 + ; CHECK: insertelement <4 x float> %vec, float 3.500000e+00, i8 0 + shufflevector <4 x float> %vec, <4 x float> %vec2, <2 x i32> zeroinitializer + ; CHECK: shufflevector <4 x float> %vec, <4 x float> %vec2, <2 x i32> zeroinitializer + + ret void +} + +; Instructions -- Aggregate Operations +define void @instructions.aggregateops({ i8, i32 } %up, <{ i8, i32 }> %p, + [3 x i8] %arr, { i8, { i32 }} %n, + <2 x i8*> %pvec, <2 x i64> %offsets) { + extractvalue { i8, i32 } %up, 0 + ; CHECK: extractvalue { i8, i32 } %up, 0 + extractvalue <{ i8, i32 }> %p, 1 + ; CHECK: extractvalue <{ i8, i32 }> %p, 1 + extractvalue [3 x i8] %arr, 2 + ; CHECK: extractvalue [3 x i8] %arr, 2 + extractvalue { i8, { i32 } } %n, 1, 0 + ; CHECK: extractvalue { i8, { i32 } } %n, 1, 0 + + insertvalue { i8, i32 } %up, i8 1, 0 + ; CHECK: insertvalue { i8, i32 } %up, i8 1, 0 + insertvalue <{ i8, i32 }> %p, i32 2, 1 + ; CHECK: insertvalue <{ i8, i32 }> %p, i32 2, 1 + insertvalue [3 x i8] %arr, i8 0, 0 + ; CHECK: insertvalue [3 x i8] %arr, i8 0, 0 + insertvalue { i8, { i32 } } %n, i32 0, 1, 0 + ; CHECK: insertvalue { i8, { i32 } } %n, i32 0, 1, 0 + + %up.ptr = alloca { i8, i32 } + %p.ptr = alloca <{ i8, i32 }> + %arr.ptr = alloca [3 x i8] + %n.ptr = alloca { i8, { i32 } } + + getelementptr { i8, i32 }, { i8, i32 }* %up.ptr, i8 0 + ; CHECK: getelementptr { i8, i32 }, { i8, i32 }* %up.ptr, i8 0 + getelementptr <{ i8, i32 }>, <{ i8, i32 }>* %p.ptr, i8 1 + ; CHECK: getelementptr <{ i8, i32 }>, <{ i8, i32 }>* %p.ptr, i8 1 + getelementptr [3 x i8], [3 x i8]* %arr.ptr, i8 2 + ; CHECK: getelementptr [3 x i8], [3 x i8]* %arr.ptr, i8 2 + getelementptr { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 0, i32 1 + ; CHECK: getelementptr { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 0, i32 1 + getelementptr inbounds { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 1, i32 0 + ; CHECK: getelementptr inbounds { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 1, i32 0 + getelementptr i8, <2 x i8*> %pvec, <2 x i64> %offsets + ; CHECK: getelementptr i8, <2 x i8*> %pvec, <2 x i64> %offsets + + ret void +} + +; Instructions -- Memory Access and Addressing Operations +!7 = !{i32 1} +!8 = !{} +!9 = !{i64 4} +define void @instructions.memops(i32** %base) { + alloca i32, i8 4, align 4 + ; CHECK: alloca i32, i8 4, align 4 + alloca inalloca i32, i8 4, align 4 + ; CHECK: alloca inalloca i32, i8 4, align 4 + + load i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9 + ; CHECK: load i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9 + load volatile i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9 + ; CHECK: load volatile i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9 + + store i32* null, i32** %base, align 4, !nontemporal !8 + ; CHECK: store i32* null, i32** %base, align 4, !nontemporal !8 + store volatile i32* null, i32** %base, align 4, !nontemporal !8 + ; CHECK: store volatile i32* null, i32** %base, align 4, !nontemporal !8 + + ret void +} + +; Instructions -- Conversion Operations +define void @instructions.conversions() { + trunc i32 -1 to i1 + ; CHECK: trunc i32 -1 to i1 + zext i32 -1 to i64 + ; CHECK: zext i32 -1 to i64 + sext i32 -1 to i64 + ; CHECK: sext i32 -1 to i64 + fptrunc float undef to half + ; CHECK: fptrunc float undef to half + fpext half undef to float + ; CHECK: fpext half undef to float + fptoui float undef to i32 + ; CHECK: fptoui float undef to i32 + fptosi float undef to i32 + ; CHECK: fptosi float undef to i32 + uitofp i32 1 to float + ; CHECK: uitofp i32 1 to float + sitofp i32 -1 to float + ; CHECK: sitofp i32 -1 to float + ptrtoint i8* null to i64 + ; CHECK: ptrtoint i8* null to i64 + inttoptr i64 0 to i8* + ; CHECK: inttoptr i64 0 to i8* + bitcast i32 0 to i32 + ; CHECK: bitcast i32 0 to i32 + addrspacecast i32* null to i32 addrspace(1)* + ; CHECK: addrspacecast i32* null to i32 addrspace(1)* + + ret void +} + +; Instructions -- Other Operations +define void @instructions.other(i32 %op1, i32 %op2, half %fop1, half %fop2) { +entry: + icmp eq i32 %op1, %op2 + ; CHECK: icmp eq i32 %op1, %op2 + icmp ne i32 %op1, %op2 + ; CHECK: icmp ne i32 %op1, %op2 + icmp ugt i32 %op1, %op2 + ; CHECK: icmp ugt i32 %op1, %op2 + icmp uge i32 %op1, %op2 + ; CHECK: icmp uge i32 %op1, %op2 + icmp ult i32 %op1, %op2 + ; CHECK: icmp ult i32 %op1, %op2 + icmp ule i32 %op1, %op2 + ; CHECK: icmp ule i32 %op1, %op2 + icmp sgt i32 %op1, %op2 + ; CHECK: icmp sgt i32 %op1, %op2 + icmp sge i32 %op1, %op2 + ; CHECK: icmp sge i32 %op1, %op2 + icmp slt i32 %op1, %op2 + ; CHECK: icmp slt i32 %op1, %op2 + icmp sle i32 %op1, %op2 + ; CHECK: icmp sle i32 %op1, %op2 + + fcmp false half %fop1, %fop2 + ; CHECK: fcmp false half %fop1, %fop2 + fcmp oeq half %fop1, %fop2 + ; CHECK: fcmp oeq half %fop1, %fop2 + fcmp ogt half %fop1, %fop2 + ; CHECK: fcmp ogt half %fop1, %fop2 + fcmp oge half %fop1, %fop2 + ; CHECK: fcmp oge half %fop1, %fop2 + fcmp olt half %fop1, %fop2 + ; CHECK: fcmp olt half %fop1, %fop2 + fcmp ole half %fop1, %fop2 + ; CHECK: fcmp ole half %fop1, %fop2 + fcmp one half %fop1, %fop2 + ; CHECK: fcmp one half %fop1, %fop2 + fcmp ord half %fop1, %fop2 + ; CHECK: fcmp ord half %fop1, %fop2 + fcmp ueq half %fop1, %fop2 + ; CHECK: fcmp ueq half %fop1, %fop2 + fcmp ugt half %fop1, %fop2 + ; CHECK: fcmp ugt half %fop1, %fop2 + fcmp uge half %fop1, %fop2 + ; CHECK: fcmp uge half %fop1, %fop2 + fcmp ult half %fop1, %fop2 + ; CHECK: fcmp ult half %fop1, %fop2 + fcmp ule half %fop1, %fop2 + ; CHECK: fcmp ule half %fop1, %fop2 + fcmp une half %fop1, %fop2 + ; CHECK: fcmp une half %fop1, %fop2 + fcmp uno half %fop1, %fop2 + ; CHECK: fcmp uno half %fop1, %fop2 + fcmp true half %fop1, %fop2 + ; CHECK: fcmp true half %fop1, %fop2 + + br label %exit +L1: + %v1 = add i32 %op1, %op2 + br label %exit +L2: + %v2 = add i32 %op1, %op2 + br label %exit +exit: + phi i32 [ %v1, %L1 ], [ %v2, %L2 ], [ %op1, %entry ] + ; CHECK: phi i32 [ %v1, %L1 ], [ %v2, %L2 ], [ %op1, %entry ] + + select i1 true, i32 0, i32 1 + ; CHECK: select i1 true, i32 0, i32 1 + select <2 x i1> , <2 x i8> , <2 x i8> + ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> + + call void @f.nobuiltin() builtin + ; CHECK: call void @f.nobuiltin() #36 + + call fastcc noalias i32* @f.noalias() noinline + ; CHECK: call fastcc noalias i32* @f.noalias() #12 + tail call ghccc nonnull i32* @f.nonnull() minsize + ; CHECK: tail call ghccc nonnull i32* @f.nonnull() #7 + + ret void +} + +define void @instructions.call_musttail(i8* inalloca %val) { + musttail call void @f.param.inalloca(i8* inalloca %val) + ; CHECK: musttail call void @f.param.inalloca(i8* inalloca %val) + + ret void +} + +define void @instructions.landingpad() personality i32 -2 { + invoke void @llvm.donothing() to label %proceed unwind label %catch1 + invoke void @llvm.donothing() to label %proceed unwind label %catch2 + invoke void @llvm.donothing() to label %proceed unwind label %catch3 + invoke void @llvm.donothing() to label %proceed unwind label %catch4 + +catch1: + landingpad i32 + ; CHECK: landingpad i32 + cleanup + ; CHECK: cleanup + br label %proceed + +catch2: + landingpad i32 + ; CHECK: landingpad i32 + cleanup + ; CHECK: cleanup + catch i32* null + ; CHECK: catch i32* null + br label %proceed + +catch3: + landingpad i32 + ; CHECK: landingpad i32 + cleanup + ; CHECK: cleanup + catch i32* null + ; CHECK: catch i32* null + catch i32* null + ; CHECK: catch i32* null + br label %proceed + +catch4: + landingpad i32 + ; CHECK: landingpad i32 + filter [2 x i32] zeroinitializer + ; CHECK: filter [2 x i32] zeroinitializer + br label %proceed + +proceed: + ret void +} + +;; Intrinsic Functions + +; Intrinsic Functions -- Variable Argument Handling +declare void @llvm.va_start(i8*) +declare void @llvm.va_copy(i8*, i8*) +declare void @llvm.va_end(i8*) +define void @instructions.va_arg(i8* %v, ...) { + %ap = alloca i8* + %ap2 = bitcast i8** %ap to i8* + + call void @llvm.va_start(i8* %ap2) + ; CHECK: call void @llvm.va_start(i8* %ap2) + + va_arg i8* %ap2, i32 + ; CHECK: va_arg i8* %ap2, i32 + + call void @llvm.va_copy(i8* %v, i8* %ap2) + ; CHECK: call void @llvm.va_copy(i8* %v, i8* %ap2) + + call void @llvm.va_end(i8* %ap2) + ; CHECK: call void @llvm.va_end(i8* %ap2) + + ret void +} + +; Intrinsic Functions -- Accurate Garbage Collection +declare void @llvm.gcroot(i8**, i8*) +declare i8* @llvm.gcread(i8*, i8**) +declare void @llvm.gcwrite(i8*, i8*, i8**) +define void @intrinsics.gc() gc "shadow-stack" { + %ptrloc = alloca i8* + call void @llvm.gcroot(i8** %ptrloc, i8* null) + ; CHECK: call void @llvm.gcroot(i8** %ptrloc, i8* null) + + call i8* @llvm.gcread(i8* null, i8** %ptrloc) + ; CHECK: call i8* @llvm.gcread(i8* null, i8** %ptrloc) + + %ref = alloca i8 + call void @llvm.gcwrite(i8* %ref, i8* null, i8** %ptrloc) + ; CHECK: call void @llvm.gcwrite(i8* %ref, i8* null, i8** %ptrloc) + + ret void +} + +; Intrinsic Functions -- Code Generation +declare i8* @llvm.returnaddress(i32) +declare i8* @llvm.frameaddress(i32) +declare i32 @llvm.read_register.i32(metadata) +declare i64 @llvm.read_register.i64(metadata) +declare void @llvm.write_register.i32(metadata, i32) +declare void @llvm.write_register.i64(metadata, i64) +declare i8* @llvm.stacksave() +declare void @llvm.stackrestore(i8*) +declare void @llvm.prefetch(i8*, i32, i32, i32) +declare void @llvm.pcmarker(i32) +declare i64 @llvm.readcyclecounter() +declare void @llvm.clear_cache(i8*, i8*) +declare void @llvm.instrprof_increment(i8*, i64, i32, i32) + +!10 = !{!"rax"} +define void @intrinsics.codegen() { + call i8* @llvm.returnaddress(i32 1) + ; CHECK: call i8* @llvm.returnaddress(i32 1) + call i8* @llvm.frameaddress(i32 1) + ; CHECK: call i8* @llvm.frameaddress(i32 1) + + call i32 @llvm.read_register.i32(metadata !10) + ; CHECK: call i32 @llvm.read_register.i32(metadata !10) + call i64 @llvm.read_register.i64(metadata !10) + ; CHECK: call i64 @llvm.read_register.i64(metadata !10) + call void @llvm.write_register.i32(metadata !10, i32 0) + ; CHECK: call void @llvm.write_register.i32(metadata !10, i32 0) + call void @llvm.write_register.i64(metadata !10, i64 0) + ; CHECK: call void @llvm.write_register.i64(metadata !10, i64 0) + + %stack = call i8* @llvm.stacksave() + ; CHECK: %stack = call i8* @llvm.stacksave() + call void @llvm.stackrestore(i8* %stack) + ; CHECK: call void @llvm.stackrestore(i8* %stack) + + call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0) + ; CHECK: call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0) + + call void @llvm.pcmarker(i32 1) + ; CHECK: call void @llvm.pcmarker(i32 1) + + call i64 @llvm.readcyclecounter() + ; CHECK: call i64 @llvm.readcyclecounter() + + call void @llvm.clear_cache(i8* null, i8* null) + ; CHECK: call void @llvm.clear_cache(i8* null, i8* null) + + call void @llvm.instrprof_increment(i8* null, i64 0, i32 0, i32 0) + ; CHECK: call void @llvm.instrprof_increment(i8* null, i64 0, i32 0, i32 0) + + ret void +} + +declare void @llvm.localescape(...) +declare i8* @llvm.localrecover(i8* %func, i8* %fp, i32 %idx) +define void @intrinsics.localescape() { + %static.alloca = alloca i32 + call void (...) @llvm.localescape(i32* %static.alloca) + ; CHECK: call void (...) @llvm.localescape(i32* %static.alloca) + + call void @intrinsics.localrecover() + + ret void +} +define void @intrinsics.localrecover() { + %func = bitcast void ()* @intrinsics.localescape to i8* + %fp = call i8* @llvm.frameaddress(i32 1) + call i8* @llvm.localrecover(i8* %func, i8* %fp, i32 0) + ; CHECK: call i8* @llvm.localrecover(i8* %func, i8* %fp, i32 0) + + ret void +} + +; We need this function to provide `uses' for some metadata tests. +define void @misc.metadata() { + call void @f1(), !srcloc !11 + call void @f1(), !srcloc !12 + call void @f1(), !srcloc !13 + call void @f1(), !srcloc !14 + ret void +} + +; CHECK: attributes #0 = { alignstack=4 } +; CHECK: attributes #1 = { alignstack=8 } +; CHECK: attributes #2 = { alwaysinline } +; CHECK: attributes #3 = { cold } +; CHECK: attributes #4 = { convergent } +; CHECK: attributes #5 = { inlinehint } +; CHECK: attributes #6 = { jumptable } +; CHECK: attributes #7 = { minsize } +; CHECK: attributes #8 = { naked } +; CHECK: attributes #9 = { nobuiltin } +; CHECK: attributes #10 = { noduplicate } +; CHECK: attributes #11 = { noimplicitfloat } +; CHECK: attributes #12 = { noinline } +; CHECK: attributes #13 = { nonlazybind } +; CHECK: attributes #14 = { noredzone } +; CHECK: attributes #15 = { noreturn } +; CHECK: attributes #16 = { nounwind } +; CHECK: attributes #17 = { noinline optnone } +; CHECK: attributes #18 = { optsize } +; CHECK: attributes #19 = { readnone } +; CHECK: attributes #20 = { readonly } +; CHECK: attributes #21 = { returns_twice } +; CHECK: attributes #22 = { safestack } +; CHECK: attributes #23 = { sanitize_address } +; CHECK: attributes #24 = { sanitize_memory } +; CHECK: attributes #25 = { sanitize_thread } +; CHECK: attributes #26 = { ssp } +; CHECK: attributes #27 = { sspreq } +; CHECK: attributes #28 = { sspstrong } +; CHECK: attributes #29 = { "thunk" } +; CHECK: attributes #30 = { uwtable } +; CHECK: attributes #31 = { "cpu"="cortex-a8" } +; CHECK: attributes #32 = { nounwind readnone } +; CHECK: attributes #33 = { argmemonly nounwind readonly } +; CHECK: attributes #34 = { argmemonly nounwind } +; CHECK: attributes #35 = { nounwind readonly } +; CHECK: attributes #36 = { builtin } + +;; Metadata + +; Metadata -- Module flags +!llvm.module.flags = !{!0, !1, !2, !4, !5, !6} +; CHECK: !llvm.module.flags = !{!0, !1, !2, !4, !5, !6} + +!0 = !{i32 1, !"mod1", i32 0} +; CHECK: !0 = !{i32 1, !"mod1", i32 0} +!1 = !{i32 2, !"mod2", i32 0} +; CHECK: !1 = !{i32 2, !"mod2", i32 0} +!2 = !{i32 3, !"mod3", !3} +; CHECK: !2 = !{i32 3, !"mod3", !3} +!3 = !{!"mod6", !0} +; CHECK: !3 = !{!"mod6", !0} +!4 = !{i32 4, !"mod4", i32 0} +; CHECK: !4 = !{i32 4, !"mod4", i32 0} +!5 = !{i32 5, !"mod5", !0} +; CHECK: !5 = !{i32 5, !"mod5", !0} +!6 = !{i32 6, !"mod6", !0} +; CHECK: !6 = !{i32 6, !"mod6", !0} + +; Metadata -- Check `distinct' +!11 = distinct !{} +; CHECK: !11 = distinct !{} +!12 = distinct !{} +; CHECK: !12 = distinct !{} +!13 = !{!11} +; CHECK: !13 = !{!11} +!14 = !{!12} +; CHECK: !14 = !{!12} diff --git a/test/Bitcode/compatibility-3.7.ll.bc b/test/Bitcode/compatibility-3.7.ll.bc new file mode 100644 index 0000000000000000000000000000000000000000..14c0f1a6d6f1bda8998e601301911f825864703d GIT binary patch literal 11584 zcmeHtdt6gjw*NlKAvr)2PE61P1U&(yps^kxNUW$Q4`{4W!=o0fb9(~O+Ky;Y$7&z; z-K@9T+@J6 z(V)!eup3L<_5I$jHsQu%NeKmjAIeOr1E7NwYXLasra((COM-?KbQ~~v<$er*hFy`R zpsix3*$s@zY|18_xXAa~v)C3%XP^=j<@3`>8>kCO-iie)#QivX(7j&7^|CC+NmA;v zz=v|58hj$DjN_MzjWOVm`+z9;Ia`6bBq^IF6FpEAXKayl1k(P`eOR=eb#nh5R5Led zQ|i7$H7VQ6x=>{{aJIT{+=I23!6i=I{|jmaWJ=u+s3HaOX(Ts@s$y;h-PR{G_Vu!L zAyiA?54kBXs!agP#6}6K%?4*BZ^^;}-;n!~?xIFem{ONFglZU^+VZeKvYPG*3uHg% z4nzcscZMXD@-X{;Ii3<`x2M!i3$q`XMy?FAyQ=AHVRjxW=?XXYRnuRF8-Z*ZxjNi9 zSWSN&0lLa|hk=5}+y^3>uQXRmk!rJLw|T`+eTu(>74-Mv@w9B3Wqnvkj$+WB5Hx(` zE$lW=?=z?P#N$CqePLLu2JozRPgn>TW3Q&~u%JnfPml!3(=N}%&-waxk*vzI+(geK_QzuR+5?&RC12m` zB&YJMFtyAhj>TdH+8depHDBLuk`MW4Stee|XenEwD?Ti9gF| zD_~cOjrg21^}()!W-Yp9R+;q%Cj4vmooYjBPg znSq6QlyAUNmt+{W=zs1}m$VHm=rz;EAU{T-dZYOD7oXXb<+%CKx7xc>Qe7GJ@Ys~M zd5|wlCqy$d+*pw;jW9}N?*Z^oY8g=Kk9o{0~sLnhvNnsachzMbeu z*%@j(6JVg#pL2{wNu%Us&h$im^#^Meb~nP5`x$NIcHGZ|VcHMzmI5(W5j0{}Is%AE+_POQK)r9nUm)GU0eUZwGJ#YBP zJfp#E==P@U6wmUt8@kOieBzn);z#P;lBYG-_M~=dH8(=?Uae-JUvt_q{)WR=WV=47 z_>NWdvk5Ev6hE+vpFqO54T|fe;snsBy1903ckODb z5&v@(&aA1QTX3pqxgYI7$T*vK1mIbt_0E) zPbsyh&OCjmxR7@nK5Cy4^d5R^D5jK%d6Muap4GLO4V~uc4eh#}XfBU@QMJG6-az|hN!hthQg#Oe zp8i>Y&FPZ@Yv3*bpSDZ@T=GA~U`hMSfUgEhFeulGKpZwZJSYplYXyFOg1Cho^kQn! zMk{X$CJ=UF$PJ2(7M#@rDeS5e!7dAL0)W**(d!rwKLG$7E>TcXOb&|SJ_hS>pcu@c zjIas|!Kc>Ka4;&Ok!)yel#PX*y&*!h5x(Ijh`56Z@C|oCPMS^(A+Zp~P7Ps59Hams z;bsQmuSD2j3m#>Ibw`v90Onf`hZ7Y_-LNw3L9Y^rwn4I)@Kk6tMhc~J!W`ga0bfih zK|@?d8dJK*f)`JDqb_*BuTu6ld?P|zv`~2Mr!mL~;bMMM*xbWJpl}?>i1x@fJPcr( z4HxR>5)h0M%)keLjmqOX*I1N{kyRFb*AS!UbZzVu})Iz838R3Tn7FYzNdN z+wiB7Q3lK&3AcF^+~cF*CXIsIH44r)3hwu#;L1k9O&SRopEz=MM@PXuISTIJD7cDI zaPN+STR0N#waFvl&WwWlX%yV^qu@fL;J8t6_s2ajt!%?1;X^BGp>D2aK@^-B_?ds; z_FU&J3}Yi)&=bU>mIwhoL5lNOgfhRGK=3f12qjQBfqWoNwqX_xq8>=ye31oNkzoQ* zMTFowRT36354vK-G)Rt6)J-2o0!4(9Z8!~+R5(E)_kJ42Uer|PH}imqj;svSFbQCE zL`A5)j0epTA<*j}oIOG@19KqUqzFabd;`LbjZm@;=OA1(MTFrbk-lg|(}+e94U!@~ zLPYB$gb>jyB7_jpeGx*4=DYvHh=w9v6y`#G2Vi4Z zAfig4#|zeI7-0?6Arw9_>E<7y4@b)gqfU$%WE;LmuSf|LwnVG&ZGN+4nEIb^Wuv-KER6__2YpXEh2VXA zc&+Ui!H;lP81)UqK3OtiM%2w#5ec4&iZYLX0*F|2a6yIZ6vwmR+hMWc6&JDzb+Z#P zZU_%VMq30#^e|%?WV|uVxDq44%*a@1w}(gRb0vlbJyAh{!jA#YjtDN8qpVTGQaA5H z&=;d)(Q=6B;n2H)jhv=&wAlU!IT6`B>eWzGpz!)A#NTUp&^e6wz_Eq1M-)2jY!9g5 zj;acn()o#h4x+>}cWY=nd^Ks>#n1-2>49gU%_>&Q%Au3C4p1ohbD!i?z!zkEpBET; z433$*Rf~|DG-eT8X~-ytT0=$+Y=n%Dzyl>@Tn`VWkZ}mQ8AJqdWl2a9kdvq5Aq1ER z|L2Zx&PY8&QH64*e6bxRi88j3|Ky^(y4Vd|0;1-0&_~2vR)N=3sth)!|>S zdFF-mKU7U#lT#HxWB2Y#*{LrrDnYj*sAC?%YdZQh%hcG+IvF*lKJmKs zwJy3gsY|JV-)j9#!b&Cqe#Q;j6?tSw>NTyVSDV`A7{82pWd{6ag7+Us#D(x8#3!E7 zHK<<4NncnlEr4Hg@X6P@8fsmfbX}Ho-B1h^`y5~U6j574q3^s6&2HWIgNpC_6h8$O z-!kIL&OMreJ(@1z`GrGsTC3@Br=E65E;X2^!wzQ#; zJfpR}zI~T;10{tJHP9wCGysFpJ@@~lo|QgxXx@EmUXh0zz~cI*TK5{Zu+7i0M|6qT zr)~v0rPh$BOp-f>imJcI;`36Li-bRb?1A(PUh9KTIpi%)n9|sp`eG7uX<=}A0uXdjwh3R`YX=|MMDb)Y(7Tm z&6nu>zq5DcVIfnqs(fgH#cC;6&TiFtIBavDzGq-uFip7(iaTC96O2h(!IusN=+t0e z#VIPNp4FqXnl;wEU2`%FUBEW025aj8jjrZuP(HNf)@)10ba^YSTJ{-s?ofIC)Omc1 zl`W!P;>APd7l171ZjiC;|A+dX*aEK4tvgR%c6zY|{j*t|*?3(qKdRd=NxXOR?SbxMv&^<2u<_660?;IFh4b}` zUP%*Hznfdv2+g5QWRzo5T47BYvs(LJ^%8skQy<-1AWe%69wI#1Qkyk>o^qnC?t*n= z%w5*mTNNxBlDClFM9GW(`#>2S5-XK4O63+7giLqm-!8a#h+4{)4Q;ugV*@ePyowj0 z|7=bNXZU>JB_yZR_ws&qpDnLrscQ>IvR?e>50;nBQ#R*h`xEpP7Rtk2!p=wB(Bjii zS31sPm%@e~eAMT*s+2!>Rxe&E%vNdsTJQS}42-E*j25~rwXJL@z9(ybI(Z_`vM$A* zc+YIvaRJ2VI__ioc9Z3lrd0<$FF-1TTvnqHXIt{@pL&R(#VScA6OZHiPM@=5cB&GS z-Dy%E_fhhid6}=lNJf6XF0~tRFR-XPGBiz9iL>ljrwEV#F}&ou=7XAm5Y#LATt|Y| z(PLo(L1KRTyp2UfqiUD8GNPK~Ip}H@pFoeP70|$0~(cdSp-N3Y%-BR+vbw=~~Y~{5h7gRQ(l;?Mtv=_A^fA6S&3;=5%m1bNUMu ztu*ia8%Fd^=w!$(i!uVU$ryG@N3OyObsiZ;mMvJZdqV>L0 zD{_1%WMK~jvvAW*LXig=r(!a#Hp%fC_81iXS%o#Pc`bhws{bg?+*La09*bqxTi1_O zsgJ`9a!_LNPeoyJ0%v^CtUpUe!~w5Am%eLU+0dtJe~DA(cT_eZiSrS3MGZe}$oSx) zyRwP%RJ`t3DvwefjHL6b=_j}8O%}$Im&6NMr_7>k3Q_l3b>b3mG55;q;323|ifO>m zn3xDtgU3h*rW$0|^~IDcQOLa`mecVO7K?H^7*n+aD}l3WD~wO0%MKcX&IQH&@t&=V zca{u!TE3aK6y};6A3lz9%|BN@fx6BuZbDuESXhvFkF{8g7E3w_bM4WU9oU#2Ynde$ zrrRAq*rduUtAfK*l6E?^V}6ucuNUt4jg*BhdhbgjXtIh1$)*$+Xl$oF&W$!dwyjL*tVG!( zzDDWs3+*ebQ2U=2?}FO5{*W1i_MDe7Ya`HYrl~oVLu0fsRrPk44z(YmyRKUre~Y+g zta=7?7yFq?sSu)KxP9%XIQ8*9 zo%b`MsCt^ov3NOKGPL7%g?H%?8$-vFx(;Crf_p~hG4=7=^!cakTex$WyvL^OvDVQ) z2RP9yXp>Gq>oi*X1+RV==~r0&CRW-%m-X7L#b}#_v9`E$C`mcJBY#mxMx_4Z`TMRM~Vff0K7>~3yhN*_vV-V25cZhfFylI4ecTAx0C z84R-B9-h)nM3QLKSq_Fe!OZ5Kx6^cTU+m9X++>4 z6+J$vH}!{8=dKlSvx9p;^B#- z^5YTe&Q>bD`rv?NhXZacKNOZ-*ZYoYp{wRs|7CwFK-sL#1kO(dZAOY?%Z2mDUksu_ zTe*e0ST;27KkoPF33PzKq!q&l2(-AfJuc&>@B)c4>-+{(HU#Hvo=81xi&qOY*o zZwM^Nmj}uA1ew{%&Vkz)+8{3e%hJAeeVtDTN}BpyKz`)j+~)FF&vuE%KRdO2FFeO@ zhu>f5`!YTNnC&rU;bPxG1M~$6R5%tuU#sw#52SYTtl9x9usG=d-~a#GKImRWg}3V^ zz*9#qIl-)AD2$m)a|U>$g|54bvt2|yroXsGRD9hcYhA*cmWY0Y`5$%(#X$q`uQG`L zYZ(D34Sr!-KG&$FCSTT*mQR}1&!%ZvJb1@PcuaZcf z#(1(miTZ6KFfq%x^-X{XHM@UYBo;cU3LL$Qsfrl%4p0>W(XkeDKk+LMh!Z>MlsKL#FM#-rp0t#%DNRnYQd>1DZMI5*w2_!pVJ9=~G@Oz1w z29yHbx1hhx6;JL%8^5WeVxw5^{Va|N4ls3OPSX#Tz~t^o&n&Ng-7q0u5}4FLbghZ4 z?Z40ztz8~FJb;l&9tgcRQ4{zi4rU@j=+qMTzsF^WZY^nZ3>|wnT6^!~Xzf3bMr$8@ z5UqW9Fj{;6$2js#-_>cuNZqnylUbejsm1>R4$AIT9}~%$>9RCS{Z~)TcYY{RM2Iq3 z548_Crl03N7}4emU-LO=01}}cRJc19BUX?+*8zyh1m9c|l(xG8K-YA|*^FU9^{XQ9 zt=@InoO+{I?|&waLOYV&Y zja}jdQp&~>TIMra@={HMa6fri)<#PX<03^P zg=^D_UZ(O_(_g;WuGQR9*19woC=i$CTm8wYk3uvae8Wg*AsjW7^LvJTjoT?Q$LOBu zmZ5D(PxcI4c(gTIUn@N;e4UB4^NznHc~IY2-7KnYC0X;#0vBC`>BiKEB&9ZG5zyv@ z_ZyXrChPhk04xqV{JS!kXJWBlnhn-r_(GD&+Dpu+C#coF;Ft`_zKqJvX;{@*Wkq{u z%NxtSt;w`^o5vc}S=sNrp3&~2(ISlSHAsc!hI61XHgttv60GZoatpHdvNL2d_3F1L zS|yLoIWl(X=0y~8S7D$0-()CZUo;0I+uqJC1|E932b)$@ zfW|D$X3KqC%D$joiIz(!4jO>)I)!}%EgxJbus2r9yzavH#?~^QyYS$O_B084#2l8N zk#-g9eocZr<%Y1w-*5}geNZYmw`Bry?#YkRr@6lp9m#)8b+2FeDcfay?}b3yUhh)c zt`hclld3_C(jxQCpyiIqig=ft5U^rzCwZqta6Oxr%ZbnJc-`iq)va%nDX> zk`%VBh~Kg}Z-IZ*fPR4Xlf`30Ejymw)BJ}Ij_JI{iJzHro>)_zRQLAhx#~I5KUz3w zApC+LiW^=7FZA=qGfOdH;CTW*07iz}fRTl5XoBds7PYRz?dfEP z3l9KWTgg*yExgI?eVQYJ8Jd5jQX(c+imVXU3H=@w>Tc^m`G1M4HJnpjl2)=SU*nD> z)&n5k$5h%;j(vJCPK1Ic9K$p~5fZFQ*~i*ZwEizL>nS)p4>tLLDY!+bx+lLa8rNl^ zP=wZn;6$mQXA*u5ZaPS2aW+?2cEsR;VwEL=#IVf&nHQjANAf=%zn`zC)DXn2Yc}PE z)jZ0@-OCAA-{a!$=Tr(C#0Oj)ohC__z(XU~8ov*uovg!<0=GrkVF{t61`|`sqluni z;zE?JDl+ZWbllnS*tL72Mj_)WX()_9yD~TgxJ|hHwMY%2hdqDWyS@q&MJN?hRA>H)$BQ4(P4PFE9EpU0K zao&8pSI>I$7;i4^%^|%E@P7LD_vf8yJ@d|izs`dEs5odKoC%Ph3pQJF0!9{szr3oR xf?Y=12b&jrvEH{{|bE) +@const.struct = constant %const.struct.type { i32 -1, i8 undef } +; CHECK: @const.struct = constant %const.struct.type { i32 -1, i8 undef } +@const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }> +; CHECK: @const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }> +@const.array = constant [2 x i32] [i32 -3, i32 -4] +; CHECK: @const.array = constant [2 x i32] [i32 -3, i32 -4] +@const.vector = constant <2 x i32> +; CHECK: @const.vector = constant <2 x i32> + +;; Global Variables +; Format: [@ =] [Linkage] [Visibility] [DLLStorageClass] +; [ThreadLocal] [unnamed_addr] [AddrSpace] [ExternallyInitialized] +; [] +; [, section "name"] [, comdat [($name)]] [, align ] + +; Global Variables -- Simple +@g1 = global i32 0 +; CHECK: @g1 = global i32 0 +@g2 = constant i32 0 +; CHECK: @g2 = constant i32 0 + +; Global Variables -- Linkage +@g.private = private global i32 0 +; CHECK: @g.private = private global i32 0 +@g.internal = internal global i32 0 +; CHECK: @g.internal = internal global i32 0 +@g.available_externally = available_externally global i32 0 +; CHECK: @g.available_externally = available_externally global i32 0 +@g.linkonce = linkonce global i32 0 +; CHECK: @g.linkonce = linkonce global i32 0 +@g.weak = weak global i32 0 +; CHECK: @g.weak = weak global i32 0 +@g.common = common global i32 0 +; CHECK: @g.common = common global i32 0 +@g.appending = appending global [4 x i8] c"test" +; CHECK: @g.appending = appending global [4 x i8] c"test" +@g.extern_weak = extern_weak global i32 +; CHECK: @g.extern_weak = extern_weak global i32 +@g.linkonce_odr = linkonce_odr global i32 0 +; CHECK: @g.linkonce_odr = linkonce_odr global i32 0 +@g.weak_odr = weak_odr global i32 0 +; CHECK: @g.weak_odr = weak_odr global i32 0 +@g.external = external global i32 +; CHECK: @g.external = external global i32 + +; Global Variables -- Visibility +@g.default = default global i32 0 +; CHECK: @g.default = global i32 0 +@g.hidden = hidden global i32 0 +; CHECK: @g.hidden = hidden global i32 0 +@g.protected = protected global i32 0 +; CHECK: @g.protected = protected global i32 0 + +; Global Variables -- DLLStorageClass +@g.dlldefault = default global i32 0 +; CHECK: @g.dlldefault = global i32 0 +@g.dllimport = external dllimport global i32 +; CHECK: @g.dllimport = external dllimport global i32 +@g.dllexport = dllexport global i32 0 +; CHECK: @g.dllexport = dllexport global i32 0 + +; Global Variables -- ThreadLocal +@g.notthreadlocal = global i32 0 +; CHECK: @g.notthreadlocal = global i32 0 +@g.generaldynamic = thread_local global i32 0 +; CHECK: @g.generaldynamic = thread_local global i32 0 +@g.localdynamic = thread_local(localdynamic) global i32 0 +; CHECK: @g.localdynamic = thread_local(localdynamic) global i32 0 +@g.initialexec = thread_local(initialexec) global i32 0 +; CHECK: @g.initialexec = thread_local(initialexec) global i32 0 +@g.localexec = thread_local(localexec) global i32 0 +; CHECK: @g.localexec = thread_local(localexec) global i32 0 + +; Global Variables -- unnamed_addr +@g.unnamed_addr = unnamed_addr global i32 0 +; CHECK: @g.unnamed_addr = unnamed_addr global i32 0 + +; Global Variables -- AddrSpace +@g.addrspace = addrspace(1) global i32 0 +; CHECK: @g.addrspace = addrspace(1) global i32 0 + +; Global Variables -- ExternallyInitialized +@g.externally_initialized = external externally_initialized global i32 +; CHECK: @g.externally_initialized = external externally_initialized global i32 + +; Global Variables -- section +@g.section = global i32 0, section "_DATA" +; CHECK: @g.section = global i32 0, section "_DATA" + +; Global Variables -- comdat +@comdat.any = global i32 0, comdat +; CHECK: @comdat.any = global i32 0, comdat +@comdat.exactmatch = global i32 0, comdat +; CHECK: @comdat.exactmatch = global i32 0, comdat +@comdat.largest = global i32 0, comdat +; CHECK: @comdat.largest = global i32 0, comdat +@comdat.noduplicates = global i32 0, comdat +; CHECK: @comdat.noduplicates = global i32 0, comdat +@comdat.samesize = global i32 0, comdat +; CHECK: @comdat.samesize = global i32 0, comdat + +; Force two globals from different comdats into sections with the same name. +$comdat1 = comdat any +$comdat2 = comdat any +@g.comdat1 = global i32 0, section "SharedSection", comdat($comdat1) +; CHECK: @g.comdat1 = global i32 0, section "SharedSection", comdat($comdat1) +@g.comdat2 = global i32 0, section "SharedSection", comdat($comdat2) +; CHECK: @g.comdat2 = global i32 0, section "SharedSection", comdat($comdat2) + +; Global Variables -- align +@g.align = global i32 0, align 4 +; CHECK: @g.align = global i32 0, align 4 + +; Global Variables -- Intrinsics +%pri.func.data = type { i32, void ()*, i8* } +@g.used1 = global i32 0 +@g.used2 = global i32 0 +@g.used3 = global i8 0 +declare void @g.f1() +@llvm.used = appending global [1 x i32*] [i32* @g.used1], section "llvm.metadata" +; CHECK: @llvm.used = appending global [1 x i32*] [i32* @g.used1], section "llvm.metadata" +@llvm.compiler.used = appending global [1 x i32*] [i32* @g.used2], section "llvm.metadata" +; CHECK: @llvm.compiler.used = appending global [1 x i32*] [i32* @g.used2], section "llvm.metadata" +@llvm.global_ctors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata" +; CHECK: @llvm.global_ctors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata" +@llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata" +; CHECK: @llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata" + +;; Aliases +; Format: @ = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal] +; [unnamed_addr] alias @ + +; Aliases -- Linkage +@a.private = private alias i32, i32* @g.private +; CHECK: @a.private = private alias i32, i32* @g.private +@a.internal = internal alias i32, i32* @g.internal +; CHECK: @a.internal = internal alias i32, i32* @g.internal +@a.linkonce = linkonce alias i32, i32* @g.linkonce +; CHECK: @a.linkonce = linkonce alias i32, i32* @g.linkonce +@a.weak = weak alias i32, i32* @g.weak +; CHECK: @a.weak = weak alias i32, i32* @g.weak +@a.linkonce_odr = linkonce_odr alias i32, i32* @g.linkonce_odr +; CHECK: @a.linkonce_odr = linkonce_odr alias i32, i32* @g.linkonce_odr +@a.weak_odr = weak_odr alias i32, i32* @g.weak_odr +; CHECK: @a.weak_odr = weak_odr alias i32, i32* @g.weak_odr +@a.external = external alias i32, i32* @g1 +; CHECK: @a.external = alias i32, i32* @g1 + +; Aliases -- Visibility +@a.default = default alias i32, i32* @g.default +; CHECK: @a.default = alias i32, i32* @g.default +@a.hidden = hidden alias i32, i32* @g.hidden +; CHECK: @a.hidden = hidden alias i32, i32* @g.hidden +@a.protected = protected alias i32, i32* @g.protected +; CHECK: @a.protected = protected alias i32, i32* @g.protected + +; Aliases -- DLLStorageClass +@a.dlldefault = default alias i32, i32* @g.dlldefault +; CHECK: @a.dlldefault = alias i32, i32* @g.dlldefault +@a.dllimport = dllimport alias i32, i32* @g1 +; CHECK: @a.dllimport = dllimport alias i32, i32* @g1 +@a.dllexport = dllexport alias i32, i32* @g.dllexport +; CHECK: @a.dllexport = dllexport alias i32, i32* @g.dllexport + +; Aliases -- ThreadLocal +@a.notthreadlocal = alias i32, i32* @g.notthreadlocal +; CHECK: @a.notthreadlocal = alias i32, i32* @g.notthreadlocal +@a.generaldynamic = thread_local alias i32, i32* @g.generaldynamic +; CHECK: @a.generaldynamic = thread_local alias i32, i32* @g.generaldynamic +@a.localdynamic = thread_local(localdynamic) alias i32, i32* @g.localdynamic +; CHECK: @a.localdynamic = thread_local(localdynamic) alias i32, i32* @g.localdynamic +@a.initialexec = thread_local(initialexec) alias i32, i32* @g.initialexec +; CHECK: @a.initialexec = thread_local(initialexec) alias i32, i32* @g.initialexec +@a.localexec = thread_local(localexec) alias i32, i32* @g.localexec +; CHECK: @a.localexec = thread_local(localexec) alias i32, i32* @g.localexec + +; Aliases -- unnamed_addr +@a.unnamed_addr = unnamed_addr alias i32, i32* @g.unnamed_addr +; CHECK: @a.unnamed_addr = unnamed_addr alias i32, i32* @g.unnamed_addr + +;; Functions +; Format: define [linkage] [visibility] [DLLStorageClass] +; [cconv] [ret attrs] +; @ ([argument list]) +; [unnamed_addr] [fn Attrs] [section "name"] [comdat [($name)]] +; [align N] [gc] [prefix Constant] [prologue Constant] +; [personality Constant] { ... } + +; Functions -- Simple +declare void @f1 () +; CHECK: declare void @f1() + +define void @f2 () { +; CHECK: define void @f2() +entry: + ret void +} + +; Functions -- linkage +define private void @f.private() { +; CHECK: define private void @f.private() +entry: + ret void +} +define internal void @f.internal() { +; CHECK: define internal void @f.internal() +entry: + ret void +} +define available_externally void @f.available_externally() { +; CHECK: define available_externally void @f.available_externally() +entry: + ret void +} +define linkonce void @f.linkonce() { +; CHECK: define linkonce void @f.linkonce() +entry: + ret void +} +define weak void @f.weak() { +; CHECK: define weak void @f.weak() +entry: + ret void +} +define linkonce_odr void @f.linkonce_odr() { +; CHECK: define linkonce_odr void @f.linkonce_odr() +entry: + ret void +} +define weak_odr void @f.weak_odr() { +; CHECK: define weak_odr void @f.weak_odr() +entry: + ret void +} +declare external void @f.external() +; CHECK: declare void @f.external() +declare extern_weak void @f.extern_weak() +; CHECK: declare extern_weak void @f.extern_weak() + +; Functions -- visibility +declare default void @f.default() +; CHECK: declare void @f.default() +declare hidden void @f.hidden() +; CHECK: declare hidden void @f.hidden() +declare protected void @f.protected() +; CHECK: declare protected void @f.protected() + +; Functions -- DLLStorageClass +declare dllimport void @f.dllimport() +; CHECK: declare dllimport void @f.dllimport() +declare dllexport void @f.dllexport() +; CHECK: declare dllexport void @f.dllexport() + +; Functions -- cconv (Calling conventions) +declare ccc void @f.ccc() +; CHECK: declare void @f.ccc() +declare fastcc void @f.fastcc() +; CHECK: declare fastcc void @f.fastcc() +declare coldcc void @f.coldcc() +; CHECK: declare coldcc void @f.coldcc() +declare cc10 void @f.cc10() +; CHECK: declare ghccc void @f.cc10() +declare ghccc void @f.ghccc() +; CHECK: declare ghccc void @f.ghccc() +declare cc11 void @f.cc11() +; CHECK: declare cc11 void @f.cc11() +declare webkit_jscc void @f.webkit_jscc() +; CHECK: declare webkit_jscc void @f.webkit_jscc() +declare anyregcc void @f.anyregcc() +; CHECK: declare anyregcc void @f.anyregcc() +declare preserve_mostcc void @f.preserve_mostcc() +; CHECK: declare preserve_mostcc void @f.preserve_mostcc() +declare preserve_allcc void @f.preserve_allcc() +; CHECK: declare preserve_allcc void @f.preserve_allcc() +declare cc64 void @f.cc64() +; CHECK: declare x86_stdcallcc void @f.cc64() +declare x86_stdcallcc void @f.x86_stdcallcc() +; CHECK: declare x86_stdcallcc void @f.x86_stdcallcc() +declare cc65 void @f.cc65() +; CHECK: declare x86_fastcallcc void @f.cc65() +declare x86_fastcallcc void @f.x86_fastcallcc() +; CHECK: declare x86_fastcallcc void @f.x86_fastcallcc() +declare cc66 void @f.cc66() +; CHECK: declare arm_apcscc void @f.cc66() +declare arm_apcscc void @f.arm_apcscc() +; CHECK: declare arm_apcscc void @f.arm_apcscc() +declare cc67 void @f.cc67() +; CHECK: declare arm_aapcscc void @f.cc67() +declare arm_aapcscc void @f.arm_aapcscc() +; CHECK: declare arm_aapcscc void @f.arm_aapcscc() +declare cc68 void @f.cc68() +; CHECK: declare arm_aapcs_vfpcc void @f.cc68() +declare arm_aapcs_vfpcc void @f.arm_aapcs_vfpcc() +; CHECK: declare arm_aapcs_vfpcc void @f.arm_aapcs_vfpcc() +declare cc69 void @f.cc69() +; CHECK: declare msp430_intrcc void @f.cc69() +declare msp430_intrcc void @f.msp430_intrcc() +; CHECK: declare msp430_intrcc void @f.msp430_intrcc() +declare cc70 void @f.cc70() +; CHECK: declare x86_thiscallcc void @f.cc70() +declare x86_thiscallcc void @f.x86_thiscallcc() +; CHECK: declare x86_thiscallcc void @f.x86_thiscallcc() +declare cc71 void @f.cc71() +; CHECK: declare ptx_kernel void @f.cc71() +declare ptx_kernel void @f.ptx_kernel() +; CHECK: declare ptx_kernel void @f.ptx_kernel() +declare cc72 void @f.cc72() +; CHECK: declare ptx_device void @f.cc72() +declare ptx_device void @f.ptx_device() +; CHECK: declare ptx_device void @f.ptx_device() +declare cc75 void @f.cc75() +; CHECK: declare spir_func void @f.cc75() +declare spir_func void @f.spir_func() +; CHECK: declare spir_func void @f.spir_func() +declare cc76 void @f.cc76() +; CHECK: declare spir_kernel void @f.cc76() +declare spir_kernel void @f.spir_kernel() +; CHECK: declare spir_kernel void @f.spir_kernel() +declare cc77 void @f.cc77() +; CHECK: declare intel_ocl_bicc void @f.cc77() +declare intel_ocl_bicc void @f.intel_ocl_bicc() +; CHECK: declare intel_ocl_bicc void @f.intel_ocl_bicc() +declare cc78 void @f.cc78() +; CHECK: declare x86_64_sysvcc void @f.cc78() +declare x86_64_sysvcc void @f.x86_64_sysvcc() +; CHECK: declare x86_64_sysvcc void @f.x86_64_sysvcc() +declare cc79 void @f.cc79() +; CHECK: declare x86_64_win64cc void @f.cc79() +declare x86_64_win64cc void @f.x86_64_win64cc() +; CHECK: declare x86_64_win64cc void @f.x86_64_win64cc() +declare cc80 void @f.cc80() +; CHECK: declare x86_vectorcallcc void @f.cc80() +declare x86_vectorcallcc void @f.x86_vectorcallcc() +; CHECK: declare x86_vectorcallcc void @f.x86_vectorcallcc() +declare cc1023 void @f.cc1023() +; CHECK: declare cc1023 void @f.cc1023() + +; Functions -- ret attrs (Return attributes) +declare zeroext i64 @f.zeroext() +; CHECK: declare zeroext i64 @f.zeroext() +declare signext i64 @f.signext() +; CHECK: declare signext i64 @f.signext() +declare inreg i32* @f.inreg() +; CHECK: declare inreg i32* @f.inreg() +declare noalias i32* @f.noalias() +; CHECK: declare noalias i32* @f.noalias() +declare nonnull i32* @f.nonnull() +; CHECK: declare nonnull i32* @f.nonnull() +declare dereferenceable(4) i32* @f.dereferenceable4() +; CHECK: declare dereferenceable(4) i32* @f.dereferenceable4() +declare dereferenceable(8) i32* @f.dereferenceable8() +; CHECK: declare dereferenceable(8) i32* @f.dereferenceable8() +declare dereferenceable(16) i32* @f.dereferenceable16() +; CHECK: declare dereferenceable(16) i32* @f.dereferenceable16() +declare dereferenceable_or_null(4) i32* @f.dereferenceable4_or_null() +; CHECK: declare dereferenceable_or_null(4) i32* @f.dereferenceable4_or_null() +declare dereferenceable_or_null(8) i32* @f.dereferenceable8_or_null() +; CHECK: declare dereferenceable_or_null(8) i32* @f.dereferenceable8_or_null() +declare dereferenceable_or_null(16) i32* @f.dereferenceable16_or_null() +; CHECK: declare dereferenceable_or_null(16) i32* @f.dereferenceable16_or_null() + +; Functions -- Parameter attributes +declare void @f.param.zeroext(i8 zeroext) +; CHECK: declare void @f.param.zeroext(i8 zeroext) +declare void @f.param.signext(i8 signext) +; CHECK: declare void @f.param.signext(i8 signext) +declare void @f.param.inreg(i8 inreg) +; CHECK: declare void @f.param.inreg(i8 inreg) +declare void @f.param.byval({ i8, i8 }* byval) +; CHECK: declare void @f.param.byval({ i8, i8 }* byval) +declare void @f.param.inalloca(i8* inalloca) +; CHECK: declare void @f.param.inalloca(i8* inalloca) +declare void @f.param.sret(i8* sret) +; CHECK: declare void @f.param.sret(i8* sret) +declare void @f.param.noalias(i8* noalias) +; CHECK: declare void @f.param.noalias(i8* noalias) +declare void @f.param.nocapture(i8* nocapture) +; CHECK: declare void @f.param.nocapture(i8* nocapture) +declare void @f.param.nest(i8* nest) +; CHECK: declare void @f.param.nest(i8* nest) +declare i8* @f.param.returned(i8* returned) +; CHECK: declare i8* @f.param.returned(i8* returned) +declare void @f.param.nonnull(i8* nonnull) +; CHECK: declare void @f.param.nonnull(i8* nonnull) +declare void @f.param.dereferenceable(i8* dereferenceable(4)) +; CHECK: declare void @f.param.dereferenceable(i8* dereferenceable(4)) +declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4)) +; CHECK: declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4)) + +; Functions -- unnamed_addr +declare void @f.unnamed_addr() unnamed_addr +; CHECK: declare void @f.unnamed_addr() unnamed_addr + +; Functions -- fn Attrs (Function attributes) +declare void @f.alignstack4() alignstack(4) +; CHECK: declare void @f.alignstack4() #0 +declare void @f.alignstack8() alignstack(8) +; CHECK: declare void @f.alignstack8() #1 +declare void @f.alwaysinline() alwaysinline +; CHECK: declare void @f.alwaysinline() #2 +declare void @f.cold() cold +; CHECK: declare void @f.cold() #3 +declare void @f.convergent() convergent +; CHECK: declare void @f.convergent() #4 +declare void @f.inlinehint() inlinehint +; CHECK: declare void @f.inlinehint() #5 +declare void @f.jumptable() unnamed_addr jumptable +; CHECK: declare void @f.jumptable() unnamed_addr #6 +declare void @f.minsize() minsize +; CHECK: declare void @f.minsize() #7 +declare void @f.naked() naked +; CHECK: declare void @f.naked() #8 +declare void @f.nobuiltin() nobuiltin +; CHECK: declare void @f.nobuiltin() #9 +declare void @f.noduplicate() noduplicate +; CHECK: declare void @f.noduplicate() #10 +declare void @f.noimplicitfloat() noimplicitfloat +; CHECK: declare void @f.noimplicitfloat() #11 +declare void @f.noinline() noinline +; CHECK: declare void @f.noinline() #12 +declare void @f.nonlazybind() nonlazybind +; CHECK: declare void @f.nonlazybind() #13 +declare void @f.noredzone() noredzone +; CHECK: declare void @f.noredzone() #14 +declare void @f.noreturn() noreturn +; CHECK: declare void @f.noreturn() #15 +declare void @f.nounwind() nounwind +; CHECK: declare void @f.nounwind() #16 +declare void @f.optnone() noinline optnone +; CHECK: declare void @f.optnone() #17 +declare void @f.optsize() optsize +; CHECK: declare void @f.optsize() #18 +declare void @f.readnone() readnone +; CHECK: declare void @f.readnone() #19 +declare void @f.readonly() readonly +; CHECK: declare void @f.readonly() #20 +declare void @f.returns_twice() returns_twice +; CHECK: declare void @f.returns_twice() #21 +declare void @f.safestack() safestack +; CHECK: declare void @f.safestack() #22 +declare void @f.sanitize_address() sanitize_address +; CHECK: declare void @f.sanitize_address() #23 +declare void @f.sanitize_memory() sanitize_memory +; CHECK: declare void @f.sanitize_memory() #24 +declare void @f.sanitize_thread() sanitize_thread +; CHECK: declare void @f.sanitize_thread() #25 +declare void @f.ssp() ssp +; CHECK: declare void @f.ssp() #26 +declare void @f.sspreq() sspreq +; CHECK: declare void @f.sspreq() #27 +declare void @f.sspstrong() sspstrong +; CHECK: declare void @f.sspstrong() #28 +declare void @f.thunk() "thunk" +; CHECK: declare void @f.thunk() #29 +declare void @f.uwtable() uwtable +; CHECK: declare void @f.uwtable() #30 +declare void @f.kvpair() "cpu"="cortex-a8" +; CHECK:declare void @f.kvpair() #31 +declare void @f.norecurse() norecurse +; CHECK: declare void @f.norecurse() #32 +declare void @f.inaccessiblememonly() inaccessiblememonly +; CHECK: declare void @f.inaccessiblememonly() #33 +declare void @f.inaccessiblemem_or_argmemonly() inaccessiblemem_or_argmemonly +; CHECK: declare void @f.inaccessiblemem_or_argmemonly() #34 + +; Functions -- section +declare void @f.section() section "80" +; CHECK: declare void @f.section() section "80" + +; Functions -- comdat +define void @f.comdat_any() comdat($comdat.any) { +; CHECK: define void @f.comdat_any() comdat($comdat.any) +entry: + ret void +} +define void @f.comdat_exactmatch() comdat($comdat.exactmatch) { +; CHECK: define void @f.comdat_exactmatch() comdat($comdat.exactmatch) +entry: + ret void +} +define void @f.comdat_largest() comdat($comdat.largest) { +; CHECK: define void @f.comdat_largest() comdat($comdat.largest) +entry: + ret void +} +define void @f.comdat_noduplicates() comdat($comdat.noduplicates) { +; CHECK: define void @f.comdat_noduplicates() comdat($comdat.noduplicates) +entry: + ret void +} +define void @f.comdat_samesize() comdat($comdat.samesize) { +; CHECK: define void @f.comdat_samesize() comdat($comdat.samesize) +entry: + ret void +} + +; Functions -- align +declare void @f.align2() align 2 +; CHECK: declare void @f.align2() align 2 +declare void @f.align4() align 4 +; CHECK: declare void @f.align4() align 4 +declare void @f.align8() align 8 +; CHECK: declare void @f.align8() align 8 + +; Functions -- GC +declare void @f.gcshadow() gc "shadow-stack" +; CHECK: declare void @f.gcshadow() gc "shadow-stack" + +; Functions -- Prefix data +declare void @f.prefixi32() prefix i32 1684365668 +; CHECK: declare void @f.prefixi32() prefix i32 1684365668 +declare void @f.prefixarray() prefix [4 x i32] [i32 0, i32 1, i32 2, i32 3] +; CHECK: declare void @f.prefixarray() prefix [4 x i32] [i32 0, i32 1, i32 2, i32 3] + +; Functions -- Prologue data +declare void @f.prologuei32() prologue i32 1684365669 +; CHECK: declare void @f.prologuei32() prologue i32 1684365669 +declare void @f.prologuearray() prologue [4 x i32] [i32 0, i32 1, i32 2, i32 3] +; CHECK: declare void @f.prologuearray() prologue [4 x i32] [i32 0, i32 1, i32 2, i32 3] + +; Functions -- Personality constant +declare void @llvm.donothing() nounwind readnone +; CHECK: declare void @llvm.donothing() #35 +define void @f.no_personality() personality i8 3 { +; CHECK: define void @f.no_personality() personality i8 3 + invoke void @llvm.donothing() to label %normal unwind label %exception +exception: + %cleanup = landingpad i8 cleanup + br label %normal +normal: + ret void +} + +declare i32 @f.personality_handler() +; CHECK: declare i32 @f.personality_handler() +define void @f.personality() personality i32 ()* @f.personality_handler { +; CHECK: define void @f.personality() personality i32 ()* @f.personality_handler + invoke void @llvm.donothing() to label %normal unwind label %exception +exception: + %cleanup = landingpad i32 cleanup + br label %normal +normal: + ret void +} + +;; Atomic Memory Ordering Constraints +define void @atomics(i32* %word) { + %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 4 monotonic monotonic + ; CHECK: %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 4 monotonic monotonic + %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 5 acq_rel monotonic + ; CHECK: %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 5 acq_rel monotonic + %cmpxchg.2 = cmpxchg i32* %word, i32 0, i32 6 acquire monotonic + ; CHECK: %cmpxchg.2 = cmpxchg i32* %word, i32 0, i32 6 acquire monotonic + %cmpxchg.3 = cmpxchg i32* %word, i32 0, i32 7 release monotonic + ; CHECK: %cmpxchg.3 = cmpxchg i32* %word, i32 0, i32 7 release monotonic + %cmpxchg.4 = cmpxchg i32* %word, i32 0, i32 8 seq_cst monotonic + ; CHECK: %cmpxchg.4 = cmpxchg i32* %word, i32 0, i32 8 seq_cst monotonic + %cmpxchg.5 = cmpxchg weak i32* %word, i32 0, i32 9 seq_cst monotonic + ; CHECK: %cmpxchg.5 = cmpxchg weak i32* %word, i32 0, i32 9 seq_cst monotonic + %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic + ; CHECK: %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic + %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 singlethread seq_cst monotonic + ; CHECK: %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 singlethread seq_cst monotonic + %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic + ; CHECK: %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic + %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic + ; CHECK: %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic + %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic + ; CHECK: %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic + %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic + ; CHECK: %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic + %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic + ; CHECK: %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic + %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic + ; CHECK: %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic + %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic + ; CHECK: %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic + %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic + ; CHECK: %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic + %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic + ; CHECK: %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic + %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 singlethread monotonic + ; CHECK: %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 singlethread monotonic + %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 singlethread monotonic + ; CHECK: %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 singlethread monotonic + fence acquire + ; CHECK: fence acquire + fence release + ; CHECK: fence release + fence acq_rel + ; CHECK: fence acq_rel + fence singlethread seq_cst + ; CHECK: fence singlethread seq_cst + + %ld.1 = load atomic i32, i32* %word monotonic, align 4 + ; CHECK: %ld.1 = load atomic i32, i32* %word monotonic, align 4 + %ld.2 = load atomic volatile i32, i32* %word acquire, align 8 + ; CHECK: %ld.2 = load atomic volatile i32, i32* %word acquire, align 8 + %ld.3 = load atomic volatile i32, i32* %word singlethread seq_cst, align 16 + ; CHECK: %ld.3 = load atomic volatile i32, i32* %word singlethread seq_cst, align 16 + + store atomic i32 23, i32* %word monotonic, align 4 + ; CHECK: store atomic i32 23, i32* %word monotonic, align 4 + store atomic volatile i32 24, i32* %word monotonic, align 4 + ; CHECK: store atomic volatile i32 24, i32* %word monotonic, align 4 + store atomic volatile i32 25, i32* %word singlethread monotonic, align 4 + ; CHECK: store atomic volatile i32 25, i32* %word singlethread monotonic, align 4 + ret void +} + +;; Fast Math Flags +define void @fastmathflags(float %op1, float %op2) { + %f.nnan = fadd nnan float %op1, %op2 + ; CHECK: %f.nnan = fadd nnan float %op1, %op2 + %f.ninf = fadd ninf float %op1, %op2 + ; CHECK: %f.ninf = fadd ninf float %op1, %op2 + %f.nsz = fadd nsz float %op1, %op2 + ; CHECK: %f.nsz = fadd nsz float %op1, %op2 + %f.arcp = fadd arcp float %op1, %op2 + ; CHECK: %f.arcp = fadd arcp float %op1, %op2 + %f.fast = fadd fast float %op1, %op2 + ; CHECK: %f.fast = fadd fast float %op1, %op2 + ret void +} + +; Check various fast math flags and floating-point types on calls. + +declare float @fmf1() +declare double @fmf2() +declare <4 x double> @fmf3() + +; CHECK-LABEL: fastMathFlagsForCalls( +define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) { + %call.fast = call fast float @fmf1() + ; CHECK: %call.fast = call fast float @fmf1() + + ; Throw in some other attributes to make sure those stay in the right places. + + %call.nsz.arcp = notail call nsz arcp double @fmf2() + ; CHECK: %call.nsz.arcp = notail call nsz arcp double @fmf2() + + %call.nnan.ninf = tail call nnan ninf fastcc <4 x double> @fmf3() + ; CHECK: %call.nnan.ninf = tail call nnan ninf fastcc <4 x double> @fmf3() + + ret void +} + +;; Type System +%opaquety = type opaque +define void @typesystem() { + %p0 = bitcast i8* null to i32 (i32)* + ; CHECK: %p0 = bitcast i8* null to i32 (i32)* + %p1 = bitcast i8* null to void (i8*)* + ; CHECK: %p1 = bitcast i8* null to void (i8*)* + %p2 = bitcast i8* null to i32 (i8*, ...)* + ; CHECK: %p2 = bitcast i8* null to i32 (i8*, ...)* + %p3 = bitcast i8* null to { i32, i8 } (i8*, ...)* + ; CHECK: %p3 = bitcast i8* null to { i32, i8 } (i8*, ...)* + %p4 = bitcast i8* null to <{ i32, i8 }> (i8*, ...)* + ; CHECK: %p4 = bitcast i8* null to <{ i32, i8 }> (i8*, ...)* + %p5 = bitcast i8* null to <{ i32, i8 }> (<{ i8*, i64 }>*, ...)* + ; CHECK: %p5 = bitcast i8* null to <{ i32, i8 }> (<{ i8*, i64 }>*, ...)* + + %t0 = alloca i1942652 + ; CHECK: %t0 = alloca i1942652 + %t1 = alloca half + ; CHECK: %t1 = alloca half + %t2 = alloca float + ; CHECK: %t2 = alloca float + %t3 = alloca double + ; CHECK: %t3 = alloca double + %t4 = alloca fp128 + ; CHECK: %t4 = alloca fp128 + %t5 = alloca x86_fp80 + ; CHECK: %t5 = alloca x86_fp80 + %t6 = alloca ppc_fp128 + ; CHECK: %t6 = alloca ppc_fp128 + %t7 = alloca x86_mmx + ; CHECK: %t7 = alloca x86_mmx + %t8 = alloca %opaquety* + ; CHECK: %t8 = alloca %opaquety* + + ret void +} + +declare void @llvm.token(token) +; CHECK: declare void @llvm.token(token) + +;; Inline Assembler Expressions +define void @inlineasm(i32 %arg) { + call i32 asm "bswap $0", "=r,r"(i32 %arg) + ; CHECK: call i32 asm "bswap $0", "=r,r"(i32 %arg) + call i32 asm sideeffect "blt $1, $2, $3", "=r,r,rm"(i32 %arg, i32 %arg) + ; CHECK: call i32 asm sideeffect "blt $1, $2, $3", "=r,r,rm"(i32 %arg, i32 %arg) + ret void +} + +;; Instructions + +; Instructions -- Terminators +define void @instructions.terminators(i8 %val) personality i32 -10 { + br i1 false, label %iftrue, label %iffalse + ; CHECK: br i1 false, label %iftrue, label %iffalse + br label %iftrue + ; CHECK: br label %iftrue +iftrue: + ret void + ; CHECK: ret void +iffalse: + + switch i8 %val, label %defaultdest [ + ; CHECK: switch i8 %val, label %defaultdest [ + i8 0, label %defaultdest.0 + ; CHECK: i8 0, label %defaultdest.0 + i8 1, label %defaultdest.1 + ; CHECK: i8 1, label %defaultdest.1 + i8 2, label %defaultdest.2 + ; CHECK: i8 2, label %defaultdest.2 + ] + ; CHECK: ] +defaultdest: + ret void +defaultdest.0: + ret void +defaultdest.1: + ret void +defaultdest.2: + + indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2] + ; CHECK: indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2] + indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2, label %defaultdest.2] + ; CHECK: indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2, label %defaultdest.2] + + invoke fastcc void @f.fastcc() + ; CHECK: invoke fastcc void @f.fastcc() + to label %defaultdest unwind label %exc + ; CHECK: to label %defaultdest unwind label %exc +exc: + %cleanup = landingpad i32 cleanup + + resume i32 undef + ; CHECK: resume i32 undef + unreachable + ; CHECK: unreachable + + ret void +} + +define i32 @instructions.win_eh.1() personality i32 -3 { +entry: + %arg1 = alloca i32 + %arg2 = alloca i32 + invoke void @f.ccc() to label %normal unwind label %catchswitch1 + invoke void @f.ccc() to label %normal unwind label %catchswitch2 + invoke void @f.ccc() to label %normal unwind label %catchswitch3 + +catchswitch1: + %cs1 = catchswitch within none [label %catchpad1] unwind to caller + +catchpad1: + catchpad within %cs1 [] + br label %normal + ; CHECK: catchpad within %cs1 [] + ; CHECK-NEXT: br label %normal + +catchswitch2: + %cs2 = catchswitch within none [label %catchpad2] unwind to caller + +catchpad2: + catchpad within %cs2 [i32* %arg1] + br label %normal + ; CHECK: catchpad within %cs2 [i32* %arg1] + ; CHECK-NEXT: br label %normal + +catchswitch3: + %cs3 = catchswitch within none [label %catchpad3] unwind label %cleanuppad1 + +catchpad3: + catchpad within %cs3 [i32* %arg1, i32* %arg2] + br label %normal + ; CHECK: catchpad within %cs3 [i32* %arg1, i32* %arg2] + ; CHECK-NEXT: br label %normal + +cleanuppad1: + %clean.1 = cleanuppad within none [] + unreachable + ; CHECK: %clean.1 = cleanuppad within none [] + ; CHECK-NEXT: unreachable + +normal: + ret i32 0 +} +; +define i32 @instructions.win_eh.2() personality i32 -4 { +entry: + invoke void @f.ccc() to label %invoke.cont unwind label %catchswitch + +invoke.cont: + invoke void @f.ccc() to label %continue unwind label %cleanup + +cleanup: + %clean = cleanuppad within none [] + ; CHECK: %clean = cleanuppad within none [] + cleanupret from %clean unwind to caller + ; CHECK: cleanupret from %clean unwind to caller + +catchswitch: + %cs = catchswitch within none [label %catchpad] unwind label %terminate + +catchpad: + %catch = catchpad within %cs [] + br label %body + ; CHECK: %catch = catchpad within %cs [] + ; CHECK-NEXT: br label %body + +body: + invoke void @f.ccc() to label %continue unwind label %terminate + catchret from %catch to label %return + ; CHECK: catchret from %catch to label %return + +return: + ret i32 0 + +terminate: + cleanuppad within %cs [] + unreachable + ; CHECK: cleanuppad within %cs [] + ; CHECK-NEXT: unreachable + +continue: + ret i32 0 +} + +; Instructions -- Binary Operations +define void @instructions.binops(i8 %op1, i8 %op2) { + ; nuw x nsw + add i8 %op1, %op2 + ; CHECK: add i8 %op1, %op2 + add nuw i8 %op1, %op2 + ; CHECK: add nuw i8 %op1, %op2 + add nsw i8 %op1, %op2 + ; CHECK: add nsw i8 %op1, %op2 + add nuw nsw i8 %op1, %op2 + ; CHECK: add nuw nsw i8 %op1, %op2 + sub i8 %op1, %op2 + ; CHECK: sub i8 %op1, %op2 + sub nuw i8 %op1, %op2 + ; CHECK: sub nuw i8 %op1, %op2 + sub nsw i8 %op1, %op2 + ; CHECK: sub nsw i8 %op1, %op2 + sub nuw nsw i8 %op1, %op2 + ; CHECK: sub nuw nsw i8 %op1, %op2 + mul i8 %op1, %op2 + ; CHECK: mul i8 %op1, %op2 + mul nuw i8 %op1, %op2 + ; CHECK: mul nuw i8 %op1, %op2 + mul nsw i8 %op1, %op2 + ; CHECK: mul nsw i8 %op1, %op2 + mul nuw nsw i8 %op1, %op2 + ; CHECK: mul nuw nsw i8 %op1, %op2 + + ; exact + udiv i8 %op1, %op2 + ; CHECK: udiv i8 %op1, %op2 + udiv exact i8 %op1, %op2 + ; CHECK: udiv exact i8 %op1, %op2 + sdiv i8 %op1, %op2 + ; CHECK: sdiv i8 %op1, %op2 + sdiv exact i8 %op1, %op2 + ; CHECK: sdiv exact i8 %op1, %op2 + + ; none + urem i8 %op1, %op2 + ; CHECK: urem i8 %op1, %op2 + srem i8 %op1, %op2 + ; CHECK: srem i8 %op1, %op2 + + ret void +} + +; Instructions -- Bitwise Binary Operations +define void @instructions.bitwise_binops(i8 %op1, i8 %op2) { + ; nuw x nsw + shl i8 %op1, %op2 + ; CHECK: shl i8 %op1, %op2 + shl nuw i8 %op1, %op2 + ; CHECK: shl nuw i8 %op1, %op2 + shl nsw i8 %op1, %op2 + ; CHECK: shl nsw i8 %op1, %op2 + shl nuw nsw i8 %op1, %op2 + ; CHECK: shl nuw nsw i8 %op1, %op2 + + ; exact + lshr i8 %op1, %op2 + ; CHECK: lshr i8 %op1, %op2 + lshr exact i8 %op1, %op2 + ; CHECK: lshr exact i8 %op1, %op2 + ashr i8 %op1, %op2 + ; CHECK: ashr i8 %op1, %op2 + ashr exact i8 %op1, %op2 + ; CHECK: ashr exact i8 %op1, %op2 + + ; none + and i8 %op1, %op2 + ; CHECK: and i8 %op1, %op2 + or i8 %op1, %op2 + ; CHECK: or i8 %op1, %op2 + xor i8 %op1, %op2 + ; CHECK: xor i8 %op1, %op2 + + ret void +} + +; Instructions -- Vector Operations +define void @instructions.vectorops(<4 x float> %vec, <4 x float> %vec2) { + extractelement <4 x float> %vec, i8 0 + ; CHECK: extractelement <4 x float> %vec, i8 0 + insertelement <4 x float> %vec, float 3.500000e+00, i8 0 + ; CHECK: insertelement <4 x float> %vec, float 3.500000e+00, i8 0 + shufflevector <4 x float> %vec, <4 x float> %vec2, <2 x i32> zeroinitializer + ; CHECK: shufflevector <4 x float> %vec, <4 x float> %vec2, <2 x i32> zeroinitializer + + ret void +} + +; Instructions -- Aggregate Operations +define void @instructions.aggregateops({ i8, i32 } %up, <{ i8, i32 }> %p, + [3 x i8] %arr, { i8, { i32 }} %n, + <2 x i8*> %pvec, <2 x i64> %offsets) { + extractvalue { i8, i32 } %up, 0 + ; CHECK: extractvalue { i8, i32 } %up, 0 + extractvalue <{ i8, i32 }> %p, 1 + ; CHECK: extractvalue <{ i8, i32 }> %p, 1 + extractvalue [3 x i8] %arr, 2 + ; CHECK: extractvalue [3 x i8] %arr, 2 + extractvalue { i8, { i32 } } %n, 1, 0 + ; CHECK: extractvalue { i8, { i32 } } %n, 1, 0 + + insertvalue { i8, i32 } %up, i8 1, 0 + ; CHECK: insertvalue { i8, i32 } %up, i8 1, 0 + insertvalue <{ i8, i32 }> %p, i32 2, 1 + ; CHECK: insertvalue <{ i8, i32 }> %p, i32 2, 1 + insertvalue [3 x i8] %arr, i8 0, 0 + ; CHECK: insertvalue [3 x i8] %arr, i8 0, 0 + insertvalue { i8, { i32 } } %n, i32 0, 1, 0 + ; CHECK: insertvalue { i8, { i32 } } %n, i32 0, 1, 0 + + %up.ptr = alloca { i8, i32 } + %p.ptr = alloca <{ i8, i32 }> + %arr.ptr = alloca [3 x i8] + %n.ptr = alloca { i8, { i32 } } + + getelementptr { i8, i32 }, { i8, i32 }* %up.ptr, i8 0 + ; CHECK: getelementptr { i8, i32 }, { i8, i32 }* %up.ptr, i8 0 + getelementptr <{ i8, i32 }>, <{ i8, i32 }>* %p.ptr, i8 1 + ; CHECK: getelementptr <{ i8, i32 }>, <{ i8, i32 }>* %p.ptr, i8 1 + getelementptr [3 x i8], [3 x i8]* %arr.ptr, i8 2 + ; CHECK: getelementptr [3 x i8], [3 x i8]* %arr.ptr, i8 2 + getelementptr { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 0, i32 1 + ; CHECK: getelementptr { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 0, i32 1 + getelementptr inbounds { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 1, i32 0 + ; CHECK: getelementptr inbounds { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 1, i32 0 + getelementptr i8, <2 x i8*> %pvec, <2 x i64> %offsets + ; CHECK: getelementptr i8, <2 x i8*> %pvec, <2 x i64> %offsets + + ret void +} + +; Instructions -- Memory Access and Addressing Operations +!7 = !{i32 1} +!8 = !{} +!9 = !{i64 4} +define void @instructions.memops(i32** %base) { + alloca i32, i8 4, align 4 + ; CHECK: alloca i32, i8 4, align 4 + alloca inalloca i32, i8 4, align 4 + ; CHECK: alloca inalloca i32, i8 4, align 4 + + load i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9 + ; CHECK: load i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9 + load volatile i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9 + ; CHECK: load volatile i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9 + + store i32* null, i32** %base, align 4, !nontemporal !8 + ; CHECK: store i32* null, i32** %base, align 4, !nontemporal !8 + store volatile i32* null, i32** %base, align 4, !nontemporal !8 + ; CHECK: store volatile i32* null, i32** %base, align 4, !nontemporal !8 + + ret void +} + +; Instructions -- Conversion Operations +define void @instructions.conversions() { + trunc i32 -1 to i1 + ; CHECK: trunc i32 -1 to i1 + zext i32 -1 to i64 + ; CHECK: zext i32 -1 to i64 + sext i32 -1 to i64 + ; CHECK: sext i32 -1 to i64 + fptrunc float undef to half + ; CHECK: fptrunc float undef to half + fpext half undef to float + ; CHECK: fpext half undef to float + fptoui float undef to i32 + ; CHECK: fptoui float undef to i32 + fptosi float undef to i32 + ; CHECK: fptosi float undef to i32 + uitofp i32 1 to float + ; CHECK: uitofp i32 1 to float + sitofp i32 -1 to float + ; CHECK: sitofp i32 -1 to float + ptrtoint i8* null to i64 + ; CHECK: ptrtoint i8* null to i64 + inttoptr i64 0 to i8* + ; CHECK: inttoptr i64 0 to i8* + bitcast i32 0 to i32 + ; CHECK: bitcast i32 0 to i32 + addrspacecast i32* null to i32 addrspace(1)* + ; CHECK: addrspacecast i32* null to i32 addrspace(1)* + + ret void +} + +; Instructions -- Other Operations +define void @instructions.other(i32 %op1, i32 %op2, half %fop1, half %fop2) { +entry: + icmp eq i32 %op1, %op2 + ; CHECK: icmp eq i32 %op1, %op2 + icmp ne i32 %op1, %op2 + ; CHECK: icmp ne i32 %op1, %op2 + icmp ugt i32 %op1, %op2 + ; CHECK: icmp ugt i32 %op1, %op2 + icmp uge i32 %op1, %op2 + ; CHECK: icmp uge i32 %op1, %op2 + icmp ult i32 %op1, %op2 + ; CHECK: icmp ult i32 %op1, %op2 + icmp ule i32 %op1, %op2 + ; CHECK: icmp ule i32 %op1, %op2 + icmp sgt i32 %op1, %op2 + ; CHECK: icmp sgt i32 %op1, %op2 + icmp sge i32 %op1, %op2 + ; CHECK: icmp sge i32 %op1, %op2 + icmp slt i32 %op1, %op2 + ; CHECK: icmp slt i32 %op1, %op2 + icmp sle i32 %op1, %op2 + ; CHECK: icmp sle i32 %op1, %op2 + + fcmp false half %fop1, %fop2 + ; CHECK: fcmp false half %fop1, %fop2 + fcmp oeq half %fop1, %fop2 + ; CHECK: fcmp oeq half %fop1, %fop2 + fcmp ogt half %fop1, %fop2 + ; CHECK: fcmp ogt half %fop1, %fop2 + fcmp oge half %fop1, %fop2 + ; CHECK: fcmp oge half %fop1, %fop2 + fcmp olt half %fop1, %fop2 + ; CHECK: fcmp olt half %fop1, %fop2 + fcmp ole half %fop1, %fop2 + ; CHECK: fcmp ole half %fop1, %fop2 + fcmp one half %fop1, %fop2 + ; CHECK: fcmp one half %fop1, %fop2 + fcmp ord half %fop1, %fop2 + ; CHECK: fcmp ord half %fop1, %fop2 + fcmp ueq half %fop1, %fop2 + ; CHECK: fcmp ueq half %fop1, %fop2 + fcmp ugt half %fop1, %fop2 + ; CHECK: fcmp ugt half %fop1, %fop2 + fcmp uge half %fop1, %fop2 + ; CHECK: fcmp uge half %fop1, %fop2 + fcmp ult half %fop1, %fop2 + ; CHECK: fcmp ult half %fop1, %fop2 + fcmp ule half %fop1, %fop2 + ; CHECK: fcmp ule half %fop1, %fop2 + fcmp une half %fop1, %fop2 + ; CHECK: fcmp une half %fop1, %fop2 + fcmp uno half %fop1, %fop2 + ; CHECK: fcmp uno half %fop1, %fop2 + fcmp true half %fop1, %fop2 + ; CHECK: fcmp true half %fop1, %fop2 + + br label %exit +L1: + %v1 = add i32 %op1, %op2 + br label %exit +L2: + %v2 = add i32 %op1, %op2 + br label %exit +exit: + phi i32 [ %v1, %L1 ], [ %v2, %L2 ], [ %op1, %entry ] + ; CHECK: phi i32 [ %v1, %L1 ], [ %v2, %L2 ], [ %op1, %entry ] + + select i1 true, i32 0, i32 1 + ; CHECK: select i1 true, i32 0, i32 1 + select <2 x i1> , <2 x i8> , <2 x i8> + ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> + + call void @f.nobuiltin() builtin + ; CHECK: call void @f.nobuiltin() #39 + + call fastcc noalias i32* @f.noalias() noinline + ; CHECK: call fastcc noalias i32* @f.noalias() #12 + tail call ghccc nonnull i32* @f.nonnull() minsize + ; CHECK: tail call ghccc nonnull i32* @f.nonnull() #7 + + ret void +} + +define void @instructions.call_musttail(i8* inalloca %val) { + musttail call void @f.param.inalloca(i8* inalloca %val) + ; CHECK: musttail call void @f.param.inalloca(i8* inalloca %val) + + ret void +} + +define void @instructions.call_notail() { + notail call void @f1() + ; CHECK: notail call void @f1() + + ret void +} + +define void @instructions.landingpad() personality i32 -2 { + invoke void @llvm.donothing() to label %proceed unwind label %catch1 + invoke void @llvm.donothing() to label %proceed unwind label %catch2 + invoke void @llvm.donothing() to label %proceed unwind label %catch3 + invoke void @llvm.donothing() to label %proceed unwind label %catch4 + +catch1: + landingpad i32 + ; CHECK: landingpad i32 + cleanup + ; CHECK: cleanup + br label %proceed + +catch2: + landingpad i32 + ; CHECK: landingpad i32 + cleanup + ; CHECK: cleanup + catch i32* null + ; CHECK: catch i32* null + br label %proceed + +catch3: + landingpad i32 + ; CHECK: landingpad i32 + cleanup + ; CHECK: cleanup + catch i32* null + ; CHECK: catch i32* null + catch i32* null + ; CHECK: catch i32* null + br label %proceed + +catch4: + landingpad i32 + ; CHECK: landingpad i32 + filter [2 x i32] zeroinitializer + ; CHECK: filter [2 x i32] zeroinitializer + br label %proceed + +proceed: + ret void +} + +;; Intrinsic Functions + +; Intrinsic Functions -- Variable Argument Handling +declare void @llvm.va_start(i8*) +declare void @llvm.va_copy(i8*, i8*) +declare void @llvm.va_end(i8*) +define void @instructions.va_arg(i8* %v, ...) { + %ap = alloca i8* + %ap2 = bitcast i8** %ap to i8* + + call void @llvm.va_start(i8* %ap2) + ; CHECK: call void @llvm.va_start(i8* %ap2) + + va_arg i8* %ap2, i32 + ; CHECK: va_arg i8* %ap2, i32 + + call void @llvm.va_copy(i8* %v, i8* %ap2) + ; CHECK: call void @llvm.va_copy(i8* %v, i8* %ap2) + + call void @llvm.va_end(i8* %ap2) + ; CHECK: call void @llvm.va_end(i8* %ap2) + + ret void +} + +; Intrinsic Functions -- Accurate Garbage Collection +declare void @llvm.gcroot(i8**, i8*) +declare i8* @llvm.gcread(i8*, i8**) +declare void @llvm.gcwrite(i8*, i8*, i8**) +define void @intrinsics.gc() gc "shadow-stack" { + %ptrloc = alloca i8* + call void @llvm.gcroot(i8** %ptrloc, i8* null) + ; CHECK: call void @llvm.gcroot(i8** %ptrloc, i8* null) + + call i8* @llvm.gcread(i8* null, i8** %ptrloc) + ; CHECK: call i8* @llvm.gcread(i8* null, i8** %ptrloc) + + %ref = alloca i8 + call void @llvm.gcwrite(i8* %ref, i8* null, i8** %ptrloc) + ; CHECK: call void @llvm.gcwrite(i8* %ref, i8* null, i8** %ptrloc) + + ret void +} + +; Intrinsic Functions -- Code Generation +declare i8* @llvm.returnaddress(i32) +declare i8* @llvm.frameaddress(i32) +declare i32 @llvm.read_register.i32(metadata) +declare i64 @llvm.read_register.i64(metadata) +declare void @llvm.write_register.i32(metadata, i32) +declare void @llvm.write_register.i64(metadata, i64) +declare i8* @llvm.stacksave() +declare void @llvm.stackrestore(i8*) +declare void @llvm.prefetch(i8*, i32, i32, i32) +declare void @llvm.pcmarker(i32) +declare i64 @llvm.readcyclecounter() +declare void @llvm.clear_cache(i8*, i8*) +declare void @llvm.instrprof_increment(i8*, i64, i32, i32) + +!10 = !{!"rax"} +define void @intrinsics.codegen() { + call i8* @llvm.returnaddress(i32 1) + ; CHECK: call i8* @llvm.returnaddress(i32 1) + call i8* @llvm.frameaddress(i32 1) + ; CHECK: call i8* @llvm.frameaddress(i32 1) + + call i32 @llvm.read_register.i32(metadata !10) + ; CHECK: call i32 @llvm.read_register.i32(metadata !10) + call i64 @llvm.read_register.i64(metadata !10) + ; CHECK: call i64 @llvm.read_register.i64(metadata !10) + call void @llvm.write_register.i32(metadata !10, i32 0) + ; CHECK: call void @llvm.write_register.i32(metadata !10, i32 0) + call void @llvm.write_register.i64(metadata !10, i64 0) + ; CHECK: call void @llvm.write_register.i64(metadata !10, i64 0) + + %stack = call i8* @llvm.stacksave() + ; CHECK: %stack = call i8* @llvm.stacksave() + call void @llvm.stackrestore(i8* %stack) + ; CHECK: call void @llvm.stackrestore(i8* %stack) + + call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0) + ; CHECK: call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0) + + call void @llvm.pcmarker(i32 1) + ; CHECK: call void @llvm.pcmarker(i32 1) + + call i64 @llvm.readcyclecounter() + ; CHECK: call i64 @llvm.readcyclecounter() + + call void @llvm.clear_cache(i8* null, i8* null) + ; CHECK: call void @llvm.clear_cache(i8* null, i8* null) + + call void @llvm.instrprof_increment(i8* null, i64 0, i32 0, i32 0) + ; CHECK: call void @llvm.instrprof_increment(i8* null, i64 0, i32 0, i32 0) + + ret void +} + +declare void @llvm.localescape(...) +declare i8* @llvm.localrecover(i8* %func, i8* %fp, i32 %idx) +define void @intrinsics.localescape() { + %static.alloca = alloca i32 + call void (...) @llvm.localescape(i32* %static.alloca) + ; CHECK: call void (...) @llvm.localescape(i32* %static.alloca) + + call void @intrinsics.localrecover() + + ret void +} +define void @intrinsics.localrecover() { + %func = bitcast void ()* @intrinsics.localescape to i8* + %fp = call i8* @llvm.frameaddress(i32 1) + call i8* @llvm.localrecover(i8* %func, i8* %fp, i32 0) + ; CHECK: call i8* @llvm.localrecover(i8* %func, i8* %fp, i32 0) + + ret void +} + +; We need this function to provide `uses' for some metadata tests. +define void @misc.metadata() { + call void @f1(), !srcloc !11 + call void @f1(), !srcloc !12 + call void @f1(), !srcloc !13 + call void @f1(), !srcloc !14 + ret void +} + +declare void @op_bundle_callee_0() +declare void @op_bundle_callee_1(i32,i32) + +define void @call_with_operand_bundle0(i32* %ptr) { +; CHECK-LABEL: call_with_operand_bundle0( + entry: + %l = load i32, i32* %ptr + %x = add i32 42, 1 + call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ] +; CHECK: call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ] + ret void +} + +define void @call_with_operand_bundle1(i32* %ptr) { +; CHECK-LABEL: call_with_operand_bundle1( + entry: + %l = load i32, i32* %ptr + %x = add i32 42, 1 + + call void @op_bundle_callee_0() + call void @op_bundle_callee_0() [ "foo"() ] + call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ] +; CHECK: @op_bundle_callee_0(){{$}} +; CHECK-NEXT: call void @op_bundle_callee_0() [ "foo"() ] +; CHECK-NEXT: call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ] + ret void +} + +define void @call_with_operand_bundle2(i32* %ptr) { +; CHECK-LABEL: call_with_operand_bundle2( + entry: + call void @op_bundle_callee_0() [ "foo"() ] +; CHECK: call void @op_bundle_callee_0() [ "foo"() ] + ret void +} + +define void @call_with_operand_bundle3(i32* %ptr) { +; CHECK-LABEL: call_with_operand_bundle3( + entry: + %l = load i32, i32* %ptr + %x = add i32 42, 1 + call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] +; CHECK: call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] + ret void +} + +define void @call_with_operand_bundle4(i32* %ptr) { +; CHECK-LABEL: call_with_operand_bundle4( + entry: + %l = load i32, i32* %ptr + %x = add i32 42, 1 + call void @op_bundle_callee_1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] +; CHECK: call void @op_bundle_callee_1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] + ret void +} + +; Invoke versions of the above tests: + + +define void @invoke_with_operand_bundle0(i32* %ptr) personality i8 3 { +; CHECK-LABEL: @invoke_with_operand_bundle0( + entry: + %l = load i32, i32* %ptr + %x = add i32 42, 1 + invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ] to label %normal unwind label %exception +; CHECK: invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ] + +exception: + %cleanup = landingpad i8 cleanup + br label %normal +normal: + ret void +} + +define void @invoke_with_operand_bundle1(i32* %ptr) personality i8 3 { +; CHECK-LABEL: @invoke_with_operand_bundle1( + entry: + %l = load i32, i32* %ptr + %x = add i32 42, 1 + + invoke void @op_bundle_callee_0() to label %normal unwind label %exception +; CHECK: invoke void @op_bundle_callee_0(){{$}} + +exception: + %cleanup = landingpad i8 cleanup + br label %normal + +normal: + invoke void @op_bundle_callee_0() [ "foo"() ] to label %normal1 unwind label %exception1 +; CHECK: invoke void @op_bundle_callee_0() [ "foo"() ] + +exception1: + %cleanup1 = landingpad i8 cleanup + br label %normal1 + +normal1: + invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] to label %normal2 unwind label %exception2 +; CHECK: invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] + +exception2: + %cleanup2 = landingpad i8 cleanup + br label %normal2 + +normal2: + ret void +} + +define void @invoke_with_operand_bundle2(i32* %ptr) personality i8 3 { +; CHECK-LABEL: @invoke_with_operand_bundle2( + entry: + invoke void @op_bundle_callee_0() [ "foo"() ] to label %normal unwind label %exception +; CHECK: invoke void @op_bundle_callee_0() [ "foo"() ] + +exception: + %cleanup = landingpad i8 cleanup + br label %normal +normal: + ret void +} + +define void @invoke_with_operand_bundle3(i32* %ptr) personality i8 3 { +; CHECK-LABEL: @invoke_with_operand_bundle3( + entry: + %l = load i32, i32* %ptr + %x = add i32 42, 1 + invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] to label %normal unwind label %exception +; CHECK: invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] + +exception: + %cleanup = landingpad i8 cleanup + br label %normal +normal: + ret void +} + +define void @invoke_with_operand_bundle4(i32* %ptr) personality i8 3 { +; CHECK-LABEL: @invoke_with_operand_bundle4( + entry: + %l = load i32, i32* %ptr + %x = add i32 42, 1 + invoke void @op_bundle_callee_1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] + to label %normal unwind label %exception +; CHECK: invoke void @op_bundle_callee_1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] + +exception: + %cleanup = landingpad i8 cleanup + br label %normal +normal: + ret void +} + + +; CHECK: attributes #0 = { alignstack=4 } +; CHECK: attributes #1 = { alignstack=8 } +; CHECK: attributes #2 = { alwaysinline } +; CHECK: attributes #3 = { cold } +; CHECK: attributes #4 = { convergent } +; CHECK: attributes #5 = { inlinehint } +; CHECK: attributes #6 = { jumptable } +; CHECK: attributes #7 = { minsize } +; CHECK: attributes #8 = { naked } +; CHECK: attributes #9 = { nobuiltin } +; CHECK: attributes #10 = { noduplicate } +; CHECK: attributes #11 = { noimplicitfloat } +; CHECK: attributes #12 = { noinline } +; CHECK: attributes #13 = { nonlazybind } +; CHECK: attributes #14 = { noredzone } +; CHECK: attributes #15 = { noreturn } +; CHECK: attributes #16 = { nounwind } +; CHECK: attributes #17 = { noinline optnone } +; CHECK: attributes #18 = { optsize } +; CHECK: attributes #19 = { readnone } +; CHECK: attributes #20 = { readonly } +; CHECK: attributes #21 = { returns_twice } +; CHECK: attributes #22 = { safestack } +; CHECK: attributes #23 = { sanitize_address } +; CHECK: attributes #24 = { sanitize_memory } +; CHECK: attributes #25 = { sanitize_thread } +; CHECK: attributes #26 = { ssp } +; CHECK: attributes #27 = { sspreq } +; CHECK: attributes #28 = { sspstrong } +; CHECK: attributes #29 = { "thunk" } +; CHECK: attributes #30 = { uwtable } +; CHECK: attributes #31 = { "cpu"="cortex-a8" } +; CHECK: attributes #32 = { norecurse } +; CHECK: attributes #33 = { inaccessiblememonly } +; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly } +; CHECK: attributes #35 = { nounwind readnone } +; CHECK: attributes #36 = { argmemonly nounwind readonly } +; CHECK: attributes #37 = { argmemonly nounwind } +; CHECK: attributes #38 = { nounwind readonly } +; CHECK: attributes #39 = { builtin } + +;; Metadata + +; Metadata -- Module flags +!llvm.module.flags = !{!0, !1, !2, !4, !5, !6} +; CHECK: !llvm.module.flags = !{!0, !1, !2, !4, !5, !6} + +!0 = !{i32 1, !"mod1", i32 0} +; CHECK: !0 = !{i32 1, !"mod1", i32 0} +!1 = !{i32 2, !"mod2", i32 0} +; CHECK: !1 = !{i32 2, !"mod2", i32 0} +!2 = !{i32 3, !"mod3", !3} +; CHECK: !2 = !{i32 3, !"mod3", !3} +!3 = !{!"mod6", !0} +; CHECK: !3 = !{!"mod6", !0} +!4 = !{i32 4, !"mod4", i32 0} +; CHECK: !4 = !{i32 4, !"mod4", i32 0} +!5 = !{i32 5, !"mod5", !0} +; CHECK: !5 = !{i32 5, !"mod5", !0} +!6 = !{i32 6, !"mod6", !0} +; CHECK: !6 = !{i32 6, !"mod6", !0} + +; Metadata -- Check `distinct' +!11 = distinct !{} +; CHECK: !11 = distinct !{} +!12 = distinct !{} +; CHECK: !12 = distinct !{} +!13 = !{!11} +; CHECK: !13 = !{!11} +!14 = !{!12} +; CHECK: !14 = !{!12} diff --git a/test/Bitcode/debug-loc-again.ll b/test/Bitcode/debug-loc-again.ll index 6dbea16121aa..adf74a07ebed 100644 --- a/test/Bitcode/debug-loc-again.ll +++ b/test/Bitcode/debug-loc-again.ll @@ -29,8 +29,8 @@ entry: !llvm.module.flags = !{!0} !0 = !{i32 2, !"Debug Info Version", i32 3} -!1 = !DICompileUnit(language: DW_LANG_C99, file: !DIFile(filename: "f", directory: "/d"), - subprograms: !{!2}) -!2 = !DISubprogram(name: "foo") +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !DIFile(filename: "f", directory: "/d"), + subprograms: !{!2}) +!2 = distinct !DISubprogram(name: "foo") !3 = !DILocation(line: 1, scope: !2) !4 = !DILocation(line: 2, scope: !2) diff --git a/test/Bitcode/highLevelStructure.3.2.ll b/test/Bitcode/highLevelStructure.3.2.ll index 54356b9fb139..749b157cffc3 100644 --- a/test/Bitcode/highLevelStructure.3.2.ll +++ b/test/Bitcode/highLevelStructure.3.2.ll @@ -19,16 +19,16 @@ module asm "some assembly" ; Aliases Test ; CHECK: @glob1 = global i32 1 @glob1 = global i32 1 -; CHECK: @aliased1 = alias i32* @glob1 -@aliased1 = alias i32* @glob1 -; CHECK-NEXT: @aliased2 = internal alias i32* @glob1 -@aliased2 = internal alias i32* @glob1 -; CHECK-NEXT: @aliased3 = alias i32* @glob1 -@aliased3 = external alias i32* @glob1 -; CHECK-NEXT: @aliased4 = weak alias i32* @glob1 -@aliased4 = weak alias i32* @glob1 -; CHECK-NEXT: @aliased5 = weak_odr alias i32* @glob1 -@aliased5 = weak_odr alias i32* @glob1 +; CHECK: @aliased1 = alias i32, i32* @glob1 +@aliased1 = alias i32, i32* @glob1 +; CHECK-NEXT: @aliased2 = internal alias i32, i32* @glob1 +@aliased2 = internal alias i32, i32* @glob1 +; CHECK-NEXT: @aliased3 = alias i32, i32* @glob1 +@aliased3 = external alias i32, i32* @glob1 +; CHECK-NEXT: @aliased4 = weak alias i32, i32* @glob1 +@aliased4 = weak alias i32, i32* @glob1 +; CHECK-NEXT: @aliased5 = weak_odr alias i32, i32* @glob1 +@aliased5 = weak_odr alias i32, i32* @glob1 ;Parameter Attribute Test ; CHECK: declare void @ParamAttr1(i8 zeroext) diff --git a/test/Bitcode/identification.ll b/test/Bitcode/identification.ll new file mode 100644 index 000000000000..2f09d5a43281 --- /dev/null +++ b/test/Bitcode/identification.ll @@ -0,0 +1,6 @@ +; Check that a block "IDENTIFICATION_BLOCK_ID" is emitted. +;RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s +;CHECK: &1 | FileCheck %s -; CHECK: llvm-dis{{(\.EXE|\.exe)?}}: error: Unknown attribute kind (48) +; CHECK: llvm-dis{{(\.EXE|\.exe)?}}: error: Unknown attribute kind (52) ; invalid.ll.bc has an invalid attribute number. ; The test checks that LLVM reports the error and doesn't access freed memory diff --git a/test/Bitcode/invalid.ll.bc b/test/Bitcode/invalid.ll.bc index a85c3644b3abb93a450b18dcfa20b5fbf01ed143..60c7afffbc2308e4ceb58663b5e84d21fba6e1c1 100644 GIT binary patch delta 502 zcmX@Zw1L&o$@#!NQxOIRRt5%!Bpw45pU8dB-hQ^z4eb?RJpmL~L z(!vu(wamP^jEp)wCWt6FyLBvOkTi07;=wmzqT-PN$H2y(hs{sBSR~!p0wow2n1MQ^ zJ0AQ80``W9g~E*76I&H@M2cAw0+<5WnB0KU3=Gcm7#JGBEFhs`H$hQhqvL`V8Vn2$ zI&5JR_eR&ZOP&$rdozLWp#guM0srF;b%j0KL!-cyvhp(uMOQ7+A((|S*{^#y0k zEzCB1nr&K`tnyjxdTW>hbb3C)XAfmmfpt&T2y&|E#M54W9LA&I+VE%Uu z{GS5&o=o6-?7;s}f$t5_8wwh-cMi%Oa+JHID08P#?#=_5Cl6&`G0LS$1X^rxwwlvy z&7i=*uo4)SY{1ar<|@(vx}3c#qF{!zEztfA&XyOLCu=dvX%qsLu>+L^@d+rUG%b-) Za5?eB!_uK+!a}ATHUXXn6QCRj003g2t5pC1 delta 144 zcmdnMdWK2V$@#!NMIHu*00ssI?j{8WCLk>|(NN3Clgr3O#e-{tqVkcT1|CVpA{F)% zCj^pKELh=i>;ofERiFd|12d48?s)JY2-q1WP84S3nz&R!LZFx>A%H2s4Jhd>#>mhB kq!}1K8Hi8(7%jvOWELtgfbC&(FkraEFgc4!js>Iw0BZaq0RR91 diff --git a/test/Bitcode/invalid.test b/test/Bitcode/invalid.test index 0aab553bb615..3425adc84100 100644 --- a/test/Bitcode/invalid.test +++ b/test/Bitcode/invalid.test @@ -113,6 +113,11 @@ RUN: FileCheck --check-prefix=ELEMENT-TYPE %s ELEMENT-TYPE: Invalid type +RUN: not llvm-dis -disable-output %p/Inputs/invalid-cast.bc 2>&1 | \ +RUN: FileCheck --check-prefix=INVALID-CAST %s + +INVALID-CAST: Invalid cast + RUN: not llvm-dis -disable-output %p/Inputs/invalid-array-op-not-2nd-to-last.bc 2>&1 | \ RUN: FileCheck --check-prefix=ARRAY-NOT-2LAST %s @@ -202,3 +207,13 @@ RUN: not llvm-dis -disable-output %p/Inputs/invalid-alias-type-mismatch.bc 2>&1 RUN: FileCheck --check-prefix=ALIAS-TYPE-MISMATCH %s ALIAS-TYPE-MISMATCH: Alias and aliasee types don't match + +RUN: not llvm-dis -disable-output %p/Inputs/invalid-no-function-block.bc 2>&1 | \ +RUN: FileCheck --check-prefix=NO-FUNCTION-BLOCK %s + +NO-FUNCTION-BLOCK: Trying to materialize functions before seeing function blocks + +RUN: not llvm-dis -disable-output %p/Inputs/invalid-name-with-0-byte.bc 2>&1 | \ +RUN: FileCheck --check-prefix=NAME-WITH-0 %s + +NAME-WITH-0: Invalid value name diff --git a/test/Bitcode/local-linkage-default-visibility.3.4.ll b/test/Bitcode/local-linkage-default-visibility.3.4.ll index df0cf7653e55..15ff5e3a6af8 100644 --- a/test/Bitcode/local-linkage-default-visibility.3.4.ll +++ b/test/Bitcode/local-linkage-default-visibility.3.4.ll @@ -25,23 +25,23 @@ @global = global i32 0 -@default.internal.alias = alias internal i32* @global -; CHECK: @default.internal.alias = internal alias i32* @global +@default.internal.alias = alias internal i32, internal i32* @global +; CHECK: @default.internal.alias = internal alias i32, i32* @global -@hidden.internal.alias = hidden alias internal i32* @global -; CHECK: @hidden.internal.alias = internal alias i32* @global +@hidden.internal.alias = hidden alias internal i32, internal i32* @global +; CHECK: @hidden.internal.alias = internal alias i32, i32* @global -@protected.internal.alias = protected alias internal i32* @global -; CHECK: @protected.internal.alias = internal alias i32* @global +@protected.internal.alias = protected alias internal i32, internal i32* @global +; CHECK: @protected.internal.alias = internal alias i32, i32* @global -@default.private.alias = alias private i32* @global -; CHECK: @default.private.alias = private alias i32* @global +@default.private.alias = alias private i32, private i32* @global +; CHECK: @default.private.alias = private alias i32, i32* @global -@hidden.private.alias = hidden alias private i32* @global -; CHECK: @hidden.private.alias = private alias i32* @global +@hidden.private.alias = hidden alias private i32, private i32* @global +; CHECK: @hidden.private.alias = private alias i32, i32* @global -@protected.private.alias = protected alias private i32* @global -; CHECK: @protected.private.alias = private alias i32* @global +@protected.private.alias = protected alias private i32, private i32* @global +; CHECK: @protected.private.alias = private alias i32, i32* @global define internal void @default.internal() { ; CHECK: define internal void @default.internal diff --git a/test/Bitcode/old-aliases.ll b/test/Bitcode/old-aliases.ll index 8527f074d04b..1bcc4306477c 100644 --- a/test/Bitcode/old-aliases.ll +++ b/test/Bitcode/old-aliases.ll @@ -10,14 +10,14 @@ @v2 = global [1 x i32] zeroinitializer ; CHECK: @v2 = global [1 x i32] zeroinitializer -@v3 = alias bitcast (i32* @v1 to i16*) -; CHECK: @v3 = alias bitcast (i32* @v1 to i16*) +@v3 = alias i16, bitcast (i32* @v1 to i16*) +; CHECK: @v3 = alias i16, bitcast (i32* @v1 to i16*) -@v4 = alias getelementptr ([1 x i32], [1 x i32]* @v2, i32 0, i32 0) -; CHECK: @v4 = alias getelementptr inbounds ([1 x i32], [1 x i32]* @v2, i32 0, i32 0) +@v4 = alias i32, getelementptr ([1 x i32], [1 x i32]* @v2, i32 0, i32 0) +; CHECK: @v4 = alias i32, getelementptr inbounds ([1 x i32], [1 x i32]* @v2, i32 0, i32 0) -@v5 = alias i32 addrspace(2)* addrspacecast (i32 addrspace(0)* @v1 to i32 addrspace(2)*) -; CHECK: @v5 = alias addrspacecast (i32* @v1 to i32 addrspace(2)*) +@v5 = alias i32, i32 addrspace(2)* addrspacecast (i32 addrspace(0)* @v1 to i32 addrspace(2)*) +; CHECK: @v5 = alias i32, addrspacecast (i32* @v1 to i32 addrspace(2)*) -@v6 = alias i16* @v3 -; CHECK: @v6 = alias i16* @v3 +@v6 = alias i16, i16* @v3 +; CHECK: @v6 = alias i16, i16* @v3 diff --git a/test/Bitcode/operand-bundles.ll b/test/Bitcode/operand-bundles.ll new file mode 100644 index 000000000000..ab28cffd84aa --- /dev/null +++ b/test/Bitcode/operand-bundles.ll @@ -0,0 +1,152 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +declare void @callee0() +declare void @callee1(i32,i32) + +define void @f0(i32* %ptr) { +; CHECK-LABEL: @f0( + entry: + %l = load i32, i32* %ptr + %x = add i32 42, 1 + call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ] +; CHECK: call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ] + ret void +} + +define void @f1(i32* %ptr) { +; CHECK-LABEL: @f1( + entry: + %l = load i32, i32* %ptr + %x = add i32 42, 1 + + call void @callee0() + call void @callee0() [ "foo"() ] + call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ] +; CHECK: @callee0(){{$}} +; CHECK-NEXT: call void @callee0() [ "foo"() ] +; CHECK-NEXT: call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ] + ret void +} + +define void @f2(i32* %ptr) { +; CHECK-LABEL: @f2( + entry: + call void @callee0() [ "foo"() ] +; CHECK: call void @callee0() [ "foo"() ] + ret void +} + +define void @f3(i32* %ptr) { +; CHECK-LABEL: @f3( + entry: + %l = load i32, i32* %ptr + %x = add i32 42, 1 + call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] +; CHECK: call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] + ret void +} + +define void @f4(i32* %ptr) { +; CHECK-LABEL: @f4( + entry: + %l = load i32, i32* %ptr + %x = add i32 42, 1 + call void @callee1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] +; CHECK: call void @callee1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] + ret void +} + +; Invoke versions of the above tests: + + +define void @g0(i32* %ptr) personality i8 3 { +; CHECK-LABEL: @g0( + entry: + %l = load i32, i32* %ptr + %x = add i32 42, 1 + invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ] to label %normal unwind label %exception +; CHECK: invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ] + +exception: + %cleanup = landingpad i8 cleanup + br label %normal +normal: + ret void +} + +define void @g1(i32* %ptr) personality i8 3 { +; CHECK-LABEL: @g1( + entry: + %l = load i32, i32* %ptr + %x = add i32 42, 1 + + invoke void @callee0() to label %normal unwind label %exception +; CHECK: invoke void @callee0(){{$}} + +exception: + %cleanup = landingpad i8 cleanup + br label %normal + +normal: + invoke void @callee0() [ "foo"() ] to label %normal1 unwind label %exception1 +; CHECK: invoke void @callee0() [ "foo"() ] + +exception1: + %cleanup1 = landingpad i8 cleanup + br label %normal1 + +normal1: + invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] to label %normal2 unwind label %exception2 +; CHECK: invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] + +exception2: + %cleanup2 = landingpad i8 cleanup + br label %normal2 + +normal2: + ret void +} + +define void @g2(i32* %ptr) personality i8 3 { +; CHECK-LABEL: @g2( + entry: + invoke void @callee0() [ "foo"() ] to label %normal unwind label %exception +; CHECK: invoke void @callee0() [ "foo"() ] + +exception: + %cleanup = landingpad i8 cleanup + br label %normal +normal: + ret void +} + +define void @g3(i32* %ptr) personality i8 3 { +; CHECK-LABEL: @g3( + entry: + %l = load i32, i32* %ptr + %x = add i32 42, 1 + invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] to label %normal unwind label %exception +; CHECK: invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] + +exception: + %cleanup = landingpad i8 cleanup + br label %normal +normal: + ret void +} + +define void @g4(i32* %ptr) personality i8 3 { +; CHECK-LABEL: @g4( + entry: + %l = load i32, i32* %ptr + %x = add i32 42, 1 + invoke void @callee1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] + to label %normal unwind label %exception +; CHECK: invoke void @callee1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] + +exception: + %cleanup = landingpad i8 cleanup + br label %normal +normal: + ret void +} diff --git a/test/Bitcode/select.ll b/test/Bitcode/select.ll index 3ad06796dccf..666d2960fb5f 100644 --- a/test/Bitcode/select.ll +++ b/test/Bitcode/select.ll @@ -8,3 +8,11 @@ define <2 x i32> @main() { ; CHECK: define <2 x i32> @main() { ; CHECK: ret <2 x i32> ; CHECK: } + +define <2 x float> @f() { + ret <2 x float> select (i1 ptrtoint (<2 x float> ()* @f to i1), <2 x float> , <2 x float> zeroinitializer) +} + +; CHECK: define <2 x float> @f() { +; CHECK: ret <2 x float> select (i1 ptrtoint (<2 x float> ()* @f to i1), <2 x float> , <2 x float> zeroinitializer) +; CHECK: } diff --git a/test/Bitcode/tailcall.ll b/test/Bitcode/tailcall.ll index 01190d74c348..6a4b8885847a 100644 --- a/test/Bitcode/tailcall.ll +++ b/test/Bitcode/tailcall.ll @@ -3,16 +3,16 @@ ; Check that musttail and tail roundtrip. -declare cc8191 void @t1_callee() -define cc8191 void @t1() { -; CHECK: tail call cc8191 void @t1_callee() - tail call cc8191 void @t1_callee() +declare cc1023 void @t1_callee() +define cc1023 void @t1() { +; CHECK: tail call cc1023 void @t1_callee() + tail call cc1023 void @t1_callee() ret void } -declare cc8191 void @t2_callee() -define cc8191 void @t2() { -; CHECK: musttail call cc8191 void @t2_callee() - musttail call cc8191 void @t2_callee() +declare cc1023 void @t2_callee() +define cc1023 void @t2() { +; CHECK: musttail call cc1023 void @t2_callee() + musttail call cc1023 void @t2_callee() ret void } diff --git a/test/Bitcode/thinlto-function-summary.ll b/test/Bitcode/thinlto-function-summary.ll new file mode 100644 index 000000000000..9c2f2acd6c7a --- /dev/null +++ b/test/Bitcode/thinlto-function-summary.ll @@ -0,0 +1,45 @@ +; RUN: llvm-as -function-summary < %s | llvm-bcanalyzer -dump | FileCheck %s -check-prefix=BC +; Check for function summary block/records. + +; BC: [#uses=0] +@h = external global void ()* ; [#uses=0] + +define internal void @0() nounwind { +entry: + store void()* @0, void()** @h + br label %return + +return: ; preds = %entry + ret void +} diff --git a/test/Bitcode/upgrade-subprogram.ll b/test/Bitcode/upgrade-subprogram.ll new file mode 100644 index 000000000000..8d5c000a1480 --- /dev/null +++ b/test/Bitcode/upgrade-subprogram.ll @@ -0,0 +1,17 @@ +; RUN: llvm-dis < %s.bc | FileCheck %s +; RUN: verify-uselistorder < %s.bc + +; CHECK: define void @foo() !dbg [[SP:![0-9]+]] +define void @foo() { + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} + +!llvm.dbg.cu = !{!1} +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !{!3}, emissionKind: 1) +!2 = !DIFile(filename: "foo.c", directory: "/path/to/dir") +; CHECK: [[SP]] = distinct !DISubprogram +!3 = distinct !DISubprogram(file: !2, scope: !2, line: 51, name: "foo", function: void ()* @foo, type: !4) +!4 = !DISubroutineType(types: !{}) diff --git a/test/Bitcode/upgrade-subprogram.ll.bc b/test/Bitcode/upgrade-subprogram.ll.bc new file mode 100644 index 0000000000000000000000000000000000000000..cfab5a2c76a9814c8324d6cece8bf7172fc1ea65 GIT binary patch literal 784 zcmXX@ZAep57(Uy*+r72h-E0H5yc>6Zgn`kRXh$B7HzV`qA0wh4Y+qp7)&RJnwle%*uA1 z0ss*Jz)9#TZC&qo_kUhEeREwVgcQ&c04%{-GC_j^A%_F=RZN=`XscMJH_iwGM3o3v zY0yVbFL6<5nUPp}WO{`GS6(@)*9WbqMB48kz%w zf{Pje?%?6}YbzAoGy!l7fXAs+eye+HJF%Bq>rSSk8@sPjrnJ8PU45y3vG&>G(kU}i z^wj~c)Z$($o}t_dL1U0H1Y#kJ7aJSJhQb5HE|Bia$X{Nvmau4|tLlvi<<^Oc$NXhU zq%pxXhLFbOXlR(l#&K-S%CRhVpG8JSIhN*B$r3WplNp_~Unk8Tl80?%tt_tI)>0|0 zI_l84byKEgzv&ZWe1kMB`x^p?DS4atxd@dkV%kBiX{oqRz2*>q;TdCsF@}ryHfES* z3_-*+8#@CZ{X1dWL`(}Y;$t7jc5;KPf4CPLwR6KtZg`GUZQPI^0O^l5GRKqICTWHy z^RNoKTAg0iM9rGdS}N_+q<5%XM!hG{xRvdO1?Kz /dev/null +; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s +; RUN-DISABLE: bugpoint -disable-namedmd-remove -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crash-too-many-cus -silence-passes > /dev/null +; RUN-DISABLE: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s +; REQUIRES: loadable_module + +; CHECK: !llvm.dbg.cu = !{![[FIRST:[0-9]+]], ![[SECOND:[0-9]+]]} +; CHECK-DISABLE: !llvm.dbg.cu = !{![[FIRST:[0-9]+]], ![[SECOND:[0-9]+]], +; CHECK-DISABLE-SAME: ![[THIRD:[0-9]+]], ![[FOURTH:[0-9]+]], ![[FIFTH:[0-9]+]]} +!llvm.dbg.cu = !{!0, !1, !2, !3, !4, !5} +; CHECK-NOT: !named +; CHECK-DISABLE: !named +!named = !{!0, !1, !2, !3, !4, !5} +; CHECK: !llvm.module.flags = !{![[DIVERSION:[0-9]+]]} +!llvm.module.flags = !{!6, !7} + +; CHECK-DAG: ![[FIRST]] = distinct !DICompileUnit(language: DW_LANG_Julia, +; CHECK-DAG: ![[SECOND]] = distinct !DICompileUnit(language: DW_LANG_Julia, +; CHECK-DAG: ![[DIVERSION]] = !{i32 2, !"Debug Info Version", i32 3} +; CHECK-DAG: !DIFile(filename: "a", directory: "b") + +; 4 nodes survive. Due to renumbering !4 should not exist +; CHECK-NOT: !4 + +!0 = distinct !DICompileUnit(language: DW_LANG_Julia, + file: !8) +!1 = distinct !DICompileUnit(language: DW_LANG_Julia, + file: !8) +!2 = distinct !DICompileUnit(language: DW_LANG_Julia, + file: !8) +!3 = distinct !DICompileUnit(language: DW_LANG_Julia, + file: !8) +!4 = distinct !DICompileUnit(language: DW_LANG_Julia, + file: !8) +!5 = distinct !DICompileUnit(language: DW_LANG_Julia, + file: !8) +!6 = !{i32 2, !"Dwarf Version", i32 2} +!7 = !{i32 2, !"Debug Info Version", i32 3} +!8 = !DIFile(filename: "a", directory: "b") diff --git a/test/BugPoint/remove_arguments_test.ll b/test/BugPoint/remove_arguments_test.ll index 29a03b831077..72be4fe55936 100644 --- a/test/BugPoint/remove_arguments_test.ll +++ b/test/BugPoint/remove_arguments_test.ll @@ -2,10 +2,10 @@ ; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s ; REQUIRES: loadable_module -; Test to make sure that arguments are removed from the function if they are -; unnecessary. And clean up any types that that frees up too. +; Test to make sure that arguments are removed from the function if they are +; unnecessary. And clean up any types that frees up too. -; CHECK: target triple +; CHECK: ModuleID ; CHECK-NOT: struct.anon %struct.anon = type { i32 } diff --git a/test/BugPoint/replace-funcs-with-null.ll b/test/BugPoint/replace-funcs-with-null.ll index 3433c456e90f..622f9eb67a29 100644 --- a/test/BugPoint/replace-funcs-with-null.ll +++ b/test/BugPoint/replace-funcs-with-null.ll @@ -3,7 +3,7 @@ ; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -replace-funcs-with-null -bugpoint-crash-decl-funcs -silence-passes -safe-run-llc ; REQUIRES: loadable_module -@foo2 = alias i32 ()* @foo +@foo2 = alias i32 (), i32 ()* @foo define i32 @foo() { ret i32 1 } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f49df542f4e5..138450ba8e02 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,3 +1,7 @@ +if(LLVM_BUILD_EXAMPLES) + set(ENABLE_EXAMPLES 1) +endif() + configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg @@ -15,11 +19,12 @@ endif() # Set the depends list as a variable so that it can grow conditionally. # NOTE: Sync the substitutions in test/lit.cfg when adding to this list. set(LLVM_TEST_DEPENDS - llvm-config - UnitTests BugpointPasses + FileCheck LLVMHello + UnitTests bugpoint + count llc lli lli-child-target @@ -27,12 +32,14 @@ set(LLVM_TEST_DEPENDS llvm-as llvm-bcanalyzer llvm-c-test + llvm-config llvm-cov llvm-cxxdump llvm-diff llvm-dis llvm-dsymutil llvm-dwarfdump + llvm-dwp llvm-extract llvm-lib llvm-link @@ -41,22 +48,22 @@ set(LLVM_TEST_DEPENDS llvm-mcmarkup llvm-nm llvm-objdump + llvm-pdbdump llvm-profdata llvm-ranlib llvm-readobj llvm-rtdyld llvm-size + llvm-split llvm-symbolizer llvm-tblgen - macho-dump - opt - FileCheck - count not + obj2yaml + opt + sancov + verify-uselistorder yaml-bench yaml2obj - obj2yaml - verify-uselistorder ) # If Intel JIT events are supported, depend on a tool that tests the listener. @@ -95,6 +102,16 @@ if(TARGET ocaml_llvm) ) endif() +if(LLVM_BUILD_EXAMPLES) + list(APPEND LLVM_TEST_DEPENDS + Kaleidoscope-Ch3 + Kaleidoscope-Ch4 + Kaleidoscope-Ch5 + Kaleidoscope-Ch6 + Kaleidoscope-Ch7 + ) +endif() + add_lit_testsuite(check-llvm "Running the LLVM regression tests" ${CMAKE_CURRENT_BINARY_DIR} PARAMS llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg diff --git a/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll index b075573cc674..5eb455f3a22c 100644 --- a/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll +++ b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll @@ -3,7 +3,7 @@ ; Bug 20598 -define void @test() #0 { +define void @test() #0 !dbg !4 { entry: br label %for.body, !dbg !39 @@ -44,39 +44,39 @@ attributes #1 = { nounwind readnone } !llvm.module.flags = !{!36, !37} !llvm.ident = !{!38} -!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2) +!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2) !1 = !DIFile(filename: "test.c", directory: "") !2 = !{} !3 = !{!4} -!4 = !DISubprogram(name: "", line: 140, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 141, file: !1, scope: !1, type: !6, function: void ()* @test, variables: !12) +!4 = distinct !DISubprogram(name: "", line: 140, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 141, file: !1, scope: !1, type: !6, variables: !12) !6 = !DISubroutineType(types: !7) !7 = !{null, !8} !8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9) !9 = !DIDerivedType(tag: DW_TAG_typedef, line: 30, file: !1, baseType: !11) !11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed) !12 = !{!13, !14, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35} -!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 140, arg: 1, scope: !4, file: !1, type: !8) -!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15) +!13 = !DILocalVariable(name: "", line: 140, arg: 1, scope: !4, file: !1, type: !8) +!14 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15) !15 = !DIDerivedType(tag: DW_TAG_typedef, line: 183, file: !1, baseType: !17) !17 = !DIBasicType(tag: DW_TAG_base_type, size: 64, align: 64, encoding: DW_ATE_signed) -!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15) -!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15) -!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15) -!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15) -!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15) -!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15) -!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15) -!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 143, scope: !4, file: !1, type: !15) -!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 143, scope: !4, file: !1, type: !15) -!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 143, scope: !4, file: !1, type: !15) -!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 143, scope: !4, file: !1, type: !15) -!29 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15) -!30 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15) -!31 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15) -!32 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15) -!33 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15) -!34 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 145, scope: !4, file: !1, type: !8) -!35 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 146, scope: !4, file: !1, type: !11) +!18 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15) +!19 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15) +!20 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15) +!21 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15) +!22 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15) +!23 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15) +!24 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15) +!25 = !DILocalVariable(name: "", line: 143, scope: !4, file: !1, type: !15) +!26 = !DILocalVariable(name: "", line: 143, scope: !4, file: !1, type: !15) +!27 = !DILocalVariable(name: "", line: 143, scope: !4, file: !1, type: !15) +!28 = !DILocalVariable(name: "", line: 143, scope: !4, file: !1, type: !15) +!29 = !DILocalVariable(name: "", line: 144, scope: !4, file: !1, type: !15) +!30 = !DILocalVariable(name: "", line: 144, scope: !4, file: !1, type: !15) +!31 = !DILocalVariable(name: "", line: 144, scope: !4, file: !1, type: !15) +!32 = !DILocalVariable(name: "", line: 144, scope: !4, file: !1, type: !15) +!33 = !DILocalVariable(name: "", line: 144, scope: !4, file: !1, type: !15) +!34 = !DILocalVariable(name: "", line: 145, scope: !4, file: !1, type: !8) +!35 = !DILocalVariable(name: "", line: 146, scope: !4, file: !1, type: !11) !36 = !{i32 2, !"Dwarf Version", i32 4} !37 = !{i32 2, !"Debug Info Version", i32 3} !38 = !{!"clang version 3.6.0 "} diff --git a/test/CodeGen/AArch64/aarch64-addv.ll b/test/CodeGen/AArch64/aarch64-addv.ll new file mode 100644 index 000000000000..ca374eea28e7 --- /dev/null +++ b/test/CodeGen/AArch64/aarch64-addv.ll @@ -0,0 +1,98 @@ +; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic < %s | FileCheck %s + +define i8 @add_B(<16 x i8>* %arr) { +; CHECK-LABEL: add_B +; CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b + %bin.rdx = load <16 x i8>, <16 x i8>* %arr + %rdx.shuf0 = shufflevector <16 x i8> %bin.rdx, <16 x i8> undef, <16 x i32> + %bin.rdx0 = add <16 x i8> %bin.rdx, %rdx.shuf0 + %rdx.shuf = shufflevector <16 x i8> %bin.rdx0, <16 x i8> undef, <16 x i32> + %bin.rdx11 = add <16 x i8> %bin.rdx0, %rdx.shuf + %rdx.shuf12 = shufflevector <16 x i8> %bin.rdx11, <16 x i8> undef, <16 x i32> + %bin.rdx13 = add <16 x i8> %bin.rdx11, %rdx.shuf12 + %rdx.shuf13 = shufflevector <16 x i8> %bin.rdx13, <16 x i8> undef, <16 x i32> + %bin.rdx14 = add <16 x i8> %bin.rdx13, %rdx.shuf13 + %r = extractelement <16 x i8> %bin.rdx14, i32 0 + ret i8 %r +} + +define i16 @add_H(<8 x i16>* %arr) { +; CHECK-LABEL: add_H +; CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h + %bin.rdx = load <8 x i16>, <8 x i16>* %arr + %rdx.shuf = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> + %bin.rdx11 = add <8 x i16> %bin.rdx, %rdx.shuf + %rdx.shuf12 = shufflevector <8 x i16> %bin.rdx11, <8 x i16> undef, <8 x i32> + %bin.rdx13 = add <8 x i16> %bin.rdx11, %rdx.shuf12 + %rdx.shuf13 = shufflevector <8 x i16> %bin.rdx13, <8 x i16> undef, <8 x i32> + %bin.rdx14 = add <8 x i16> %bin.rdx13, %rdx.shuf13 + %r = extractelement <8 x i16> %bin.rdx14, i32 0 + ret i16 %r +} + +define i32 @add_S( <4 x i32>* %arr) { +; CHECK-LABEL: add_S +; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s + %bin.rdx = load <4 x i32>, <4 x i32>* %arr + %rdx.shuf = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> + %bin.rdx11 = add <4 x i32> %bin.rdx, %rdx.shuf + %rdx.shuf12 = shufflevector <4 x i32> %bin.rdx11, <4 x i32> undef, <4 x i32> + %bin.rdx13 = add <4 x i32> %bin.rdx11, %rdx.shuf12 + %r = extractelement <4 x i32> %bin.rdx13, i32 0 + ret i32 %r +} + +define i64 @add_D(<2 x i64>* %arr) { +; CHECK-LABEL: add_D +; CHECK-NOT: addv + %bin.rdx = load <2 x i64>, <2 x i64>* %arr + %rdx.shuf0 = shufflevector <2 x i64> %bin.rdx, <2 x i64> undef, <2 x i32> + %bin.rdx0 = add <2 x i64> %bin.rdx, %rdx.shuf0 + %r = extractelement <2 x i64> %bin.rdx0, i32 0 + ret i64 %r +} + +define i32 @oversized_ADDV_256(i8* noalias nocapture readonly %arg1, i8* noalias nocapture readonly %arg2) { +; CHECK-LABEL: oversized_ADDV_256 +; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s +entry: + %0 = bitcast i8* %arg1 to <8 x i8>* + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = zext <8 x i8> %1 to <8 x i32> + %3 = bitcast i8* %arg2 to <8 x i8>* + %4 = load <8 x i8>, <8 x i8>* %3, align 1 + %5 = zext <8 x i8> %4 to <8 x i32> + %6 = sub nsw <8 x i32> %2, %5 + %7 = icmp slt <8 x i32> %6, zeroinitializer + %8 = sub nsw <8 x i32> zeroinitializer, %6 + %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6 + %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> + %bin.rdx = add <8 x i32> %9, %rdx.shuf + %rdx.shuf1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> + %bin.rdx2 = add <8 x i32> %bin.rdx, %rdx.shuf1 + %rdx.shuf3 = shufflevector <8 x i32> %bin.rdx2, <8 x i32> undef, <8 x i32> + %bin.rdx4 = add <8 x i32> %bin.rdx2, %rdx.shuf3 + %10 = extractelement <8 x i32> %bin.rdx4, i32 0 + ret i32 %10 +} + +define i32 @oversized_ADDV_512(<16 x i32>* %arr) { +; CHECK-LABEL: oversized_ADDV_512 +; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s + %bin.rdx = load <16 x i32>, <16 x i32>* %arr + + %rdx.shuf0 = shufflevector <16 x i32> %bin.rdx, <16 x i32> undef, <16 x i32> + %bin.rdx0 = add <16 x i32> %bin.rdx, %rdx.shuf0 + + %rdx.shuf = shufflevector <16 x i32> %bin.rdx0, <16 x i32> undef, <16 x i32> + %bin.rdx11 = add <16 x i32> %bin.rdx0, %rdx.shuf + + %rdx.shuf12 = shufflevector <16 x i32> %bin.rdx11, <16 x i32> undef, <16 x i32> + %bin.rdx13 = add <16 x i32> %bin.rdx11, %rdx.shuf12 + + %rdx.shuf13 = shufflevector <16 x i32> %bin.rdx13, <16 x i32> undef, <16 x i32> + %bin.rdx14 = add <16 x i32> %bin.rdx13, %rdx.shuf13 + + %r = extractelement <16 x i32> %bin.rdx14, i32 0 + ret i32 %r +} diff --git a/test/CodeGen/AArch64/aarch64-deferred-spilling.ll b/test/CodeGen/AArch64/aarch64-deferred-spilling.ll new file mode 100644 index 000000000000..7accdced7d44 --- /dev/null +++ b/test/CodeGen/AArch64/aarch64-deferred-spilling.ll @@ -0,0 +1,514 @@ +;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=true -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=DEFERRED +;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=false -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=REGULAR + +; Check that we do not end up with useless spill code. +; +; Move to the basic block we are interested in. +; +; CHECK: // %if.then.120 +; +; REGULAR: str w21, [sp, #[[OFFSET:[0-9]+]]] // 4-byte Folded Spill +; Check that w21 wouldn't need to be spilled since it is never reused. +; REGULAR-NOT: {{[wx]}}21{{,?}} +; +; Check that w22 is used to carry a value through the call. +; DEFERRED-NOT: str {{[wx]}}22, +; DEFERRED: mov {{[wx]}}22, +; DEFERRED-NOT: str {{[wx]}}22, +; +; CHECK: bl fprintf +; +; DEFERRED-NOT: ldr {{[wx]}}22, +; DEFERRED: mov {{[wx][0-9]+}}, {{[wx]}}22 +; DEFERRED-NOT: ldr {{[wx]}}22, +; +; REGULAR-NOT: {{[wx]}}21{{,?}} +; REGULAR: ldr w21, [sp, #[[OFFSET]]] // 4-byte Folded Reload +; +; End of the basic block we are interested in. +; CHECK: b +; CHECK: {{[^:]+}}: // %sw.bb.123 + +%struct.__sFILE = type { i8*, i32, i32, i32, i32, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } +%struct.__sbuf = type { i8*, i64 } +%struct.DState = type { %struct.bz_stream*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* } +%struct.bz_stream = type { i8*, i32, i32, i32, i8*, i32, i32, i32, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8* } + +@__sF = external global [0 x %struct.__sFILE], align 8 +@.str = private unnamed_addr constant [20 x i8] c"\0A [%d: stuff+mf \00", align 1 + +declare i32 @fprintf(%struct.__sFILE* nocapture, i8* nocapture readonly, ...) + +declare void @bar(i32) + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) + +define i32 @foo(%struct.DState* %s) { +entry: + %state = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 1 + %tmp = load i32, i32* %state, align 4 + %cmp = icmp eq i32 %tmp, 10 + %save_i = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 40 + br i1 %cmp, label %if.end.thread, label %if.end + +if.end.thread: ; preds = %entry + %save_j = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 41 + %save_t = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 42 + %save_alphaSize = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 43 + %save_nGroups = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 44 + %save_nSelectors = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 45 + %save_EOB = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 46 + %save_groupNo = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 47 + %save_groupPos = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 48 + %save_nextSym = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 49 + %save_nblockMAX = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 50 + %save_nblock = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 51 + %save_es = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 52 + %save_N = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 53 + %save_curr = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 54 + %save_zt = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 55 + %save_zn = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 56 + %save_zvec = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 57 + %save_zj = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 58 + %tmp1 = bitcast i32* %save_i to i8* + call void @llvm.memset.p0i8.i64(i8* %tmp1, i8 0, i64 108, i32 4, i1 false) + br label %sw.default + +if.end: ; preds = %entry + %.pre = load i32, i32* %save_i, align 4 + %save_j3.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 41 + %.pre406 = load i32, i32* %save_j3.phi.trans.insert, align 4 + %save_t4.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 42 + %.pre407 = load i32, i32* %save_t4.phi.trans.insert, align 4 + %save_alphaSize5.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 43 + %.pre408 = load i32, i32* %save_alphaSize5.phi.trans.insert, align 4 + %save_nGroups6.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 44 + %.pre409 = load i32, i32* %save_nGroups6.phi.trans.insert, align 4 + %save_nSelectors7.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 45 + %.pre410 = load i32, i32* %save_nSelectors7.phi.trans.insert, align 4 + %save_EOB8.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 46 + %.pre411 = load i32, i32* %save_EOB8.phi.trans.insert, align 4 + %save_groupNo9.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 47 + %.pre412 = load i32, i32* %save_groupNo9.phi.trans.insert, align 4 + %save_groupPos10.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 48 + %.pre413 = load i32, i32* %save_groupPos10.phi.trans.insert, align 4 + %save_nextSym11.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 49 + %.pre414 = load i32, i32* %save_nextSym11.phi.trans.insert, align 4 + %save_nblockMAX12.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 50 + %.pre415 = load i32, i32* %save_nblockMAX12.phi.trans.insert, align 4 + %save_nblock13.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 51 + %.pre416 = load i32, i32* %save_nblock13.phi.trans.insert, align 4 + %save_es14.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 52 + %.pre417 = load i32, i32* %save_es14.phi.trans.insert, align 4 + %save_N15.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 53 + %.pre418 = load i32, i32* %save_N15.phi.trans.insert, align 4 + %save_curr16.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 54 + %.pre419 = load i32, i32* %save_curr16.phi.trans.insert, align 4 + %save_zt17.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 55 + %.pre420 = load i32, i32* %save_zt17.phi.trans.insert, align 4 + %save_zn18.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 56 + %.pre421 = load i32, i32* %save_zn18.phi.trans.insert, align 4 + %save_zvec19.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 57 + %.pre422 = load i32, i32* %save_zvec19.phi.trans.insert, align 4 + %save_zj20.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 58 + %.pre423 = load i32, i32* %save_zj20.phi.trans.insert, align 4 + switch i32 %tmp, label %sw.default [ + i32 13, label %sw.bb + i32 14, label %if.end.sw.bb.65_crit_edge + i32 25, label %if.end.sw.bb.123_crit_edge + ] + +if.end.sw.bb.123_crit_edge: ; preds = %if.end + %.pre433 = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8 + br label %sw.bb.123 + +if.end.sw.bb.65_crit_edge: ; preds = %if.end + %bsLive69.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8 + %.pre426 = load i32, i32* %bsLive69.phi.trans.insert, align 4 + br label %sw.bb.65 + +sw.bb: ; preds = %if.end + %sunkaddr = ptrtoint %struct.DState* %s to i64 + %sunkaddr485 = add i64 %sunkaddr, 8 + %sunkaddr486 = inttoptr i64 %sunkaddr485 to i32* + store i32 13, i32* %sunkaddr486, align 4 + %bsLive = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8 + %tmp2 = load i32, i32* %bsLive, align 4 + %cmp28.400 = icmp sgt i32 %tmp2, 7 + br i1 %cmp28.400, label %sw.bb.if.then.29_crit_edge, label %if.end.33.lr.ph + +sw.bb.if.then.29_crit_edge: ; preds = %sw.bb + %sunkaddr487 = ptrtoint %struct.DState* %s to i64 + %sunkaddr488 = add i64 %sunkaddr487, 32 + %sunkaddr489 = inttoptr i64 %sunkaddr488 to i32* + %.pre425 = load i32, i32* %sunkaddr489, align 4 + br label %if.then.29 + +if.end.33.lr.ph: ; preds = %sw.bb + %tmp3 = bitcast %struct.DState* %s to %struct.bz_stream** + %.pre424 = load %struct.bz_stream*, %struct.bz_stream** %tmp3, align 8 + %avail_in.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre424, i64 0, i32 1 + %.pre430 = load i32, i32* %avail_in.phi.trans.insert, align 4 + %tmp4 = add i32 %.pre430, -1 + br label %if.end.33 + +if.then.29: ; preds = %while.body.backedge, %sw.bb.if.then.29_crit_edge + %tmp5 = phi i32 [ %.pre425, %sw.bb.if.then.29_crit_edge ], [ %or, %while.body.backedge ] + %.lcssa393 = phi i32 [ %tmp2, %sw.bb.if.then.29_crit_edge ], [ %add, %while.body.backedge ] + %sub = add nsw i32 %.lcssa393, -8 + %shr = lshr i32 %tmp5, %sub + %and = and i32 %shr, 255 + %sunkaddr491 = ptrtoint %struct.DState* %s to i64 + %sunkaddr492 = add i64 %sunkaddr491, 36 + %sunkaddr493 = inttoptr i64 %sunkaddr492 to i32* + store i32 %sub, i32* %sunkaddr493, align 4 + %blockSize100k = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 9 + store i32 %and, i32* %blockSize100k, align 4 + %and.off = add nsw i32 %and, -49 + %tmp6 = icmp ugt i32 %and.off, 8 + br i1 %tmp6, label %save_state_and_return, label %if.end.62 + +if.end.33: ; preds = %while.body.backedge, %if.end.33.lr.ph + %lsr.iv482 = phi i32 [ %tmp4, %if.end.33.lr.ph ], [ %lsr.iv.next483, %while.body.backedge ] + %tmp7 = phi i32 [ %tmp2, %if.end.33.lr.ph ], [ %add, %while.body.backedge ] + %cmp35 = icmp eq i32 %lsr.iv482, -1 + br i1 %cmp35, label %save_state_and_return, label %if.end.37 + +if.end.37: ; preds = %if.end.33 + %tmp8 = bitcast %struct.bz_stream* %.pre424 to i8** + %sunkaddr494 = ptrtoint %struct.DState* %s to i64 + %sunkaddr495 = add i64 %sunkaddr494, 32 + %sunkaddr496 = inttoptr i64 %sunkaddr495 to i32* + %tmp9 = load i32, i32* %sunkaddr496, align 4 + %shl = shl i32 %tmp9, 8 + %tmp10 = load i8*, i8** %tmp8, align 8 + %tmp11 = load i8, i8* %tmp10, align 1 + %conv = zext i8 %tmp11 to i32 + %or = or i32 %conv, %shl + store i32 %or, i32* %sunkaddr496, align 4 + %add = add nsw i32 %tmp7, 8 + %sunkaddr497 = ptrtoint %struct.DState* %s to i64 + %sunkaddr498 = add i64 %sunkaddr497, 36 + %sunkaddr499 = inttoptr i64 %sunkaddr498 to i32* + store i32 %add, i32* %sunkaddr499, align 4 + %incdec.ptr = getelementptr inbounds i8, i8* %tmp10, i64 1 + store i8* %incdec.ptr, i8** %tmp8, align 8 + %sunkaddr500 = ptrtoint %struct.bz_stream* %.pre424 to i64 + %sunkaddr501 = add i64 %sunkaddr500, 8 + %sunkaddr502 = inttoptr i64 %sunkaddr501 to i32* + store i32 %lsr.iv482, i32* %sunkaddr502, align 4 + %sunkaddr503 = ptrtoint %struct.bz_stream* %.pre424 to i64 + %sunkaddr504 = add i64 %sunkaddr503, 12 + %sunkaddr505 = inttoptr i64 %sunkaddr504 to i32* + %tmp12 = load i32, i32* %sunkaddr505, align 4 + %inc = add i32 %tmp12, 1 + store i32 %inc, i32* %sunkaddr505, align 4 + %cmp49 = icmp eq i32 %inc, 0 + br i1 %cmp49, label %if.then.51, label %while.body.backedge + +if.then.51: ; preds = %if.end.37 + %sunkaddr506 = ptrtoint %struct.bz_stream* %.pre424 to i64 + %sunkaddr507 = add i64 %sunkaddr506, 16 + %sunkaddr508 = inttoptr i64 %sunkaddr507 to i32* + %tmp13 = load i32, i32* %sunkaddr508, align 4 + %inc53 = add i32 %tmp13, 1 + store i32 %inc53, i32* %sunkaddr508, align 4 + br label %while.body.backedge + +while.body.backedge: ; preds = %if.then.51, %if.end.37 + %lsr.iv.next483 = add i32 %lsr.iv482, -1 + %cmp28 = icmp sgt i32 %add, 7 + br i1 %cmp28, label %if.then.29, label %if.end.33 + +if.end.62: ; preds = %if.then.29 + %sub64 = add nsw i32 %and, -48 + %sunkaddr509 = ptrtoint %struct.DState* %s to i64 + %sunkaddr510 = add i64 %sunkaddr509, 40 + %sunkaddr511 = inttoptr i64 %sunkaddr510 to i32* + store i32 %sub64, i32* %sunkaddr511, align 4 + br label %sw.bb.65 + +sw.bb.65: ; preds = %if.end.62, %if.end.sw.bb.65_crit_edge + %bsLive69.pre-phi = phi i32* [ %bsLive69.phi.trans.insert, %if.end.sw.bb.65_crit_edge ], [ %bsLive, %if.end.62 ] + %tmp14 = phi i32 [ %.pre426, %if.end.sw.bb.65_crit_edge ], [ %sub, %if.end.62 ] + %sunkaddr512 = ptrtoint %struct.DState* %s to i64 + %sunkaddr513 = add i64 %sunkaddr512, 8 + %sunkaddr514 = inttoptr i64 %sunkaddr513 to i32* + store i32 14, i32* %sunkaddr514, align 4 + %cmp70.397 = icmp sgt i32 %tmp14, 7 + br i1 %cmp70.397, label %if.then.72, label %if.end.82.lr.ph + +if.end.82.lr.ph: ; preds = %sw.bb.65 + %tmp15 = bitcast %struct.DState* %s to %struct.bz_stream** + %.pre427 = load %struct.bz_stream*, %struct.bz_stream** %tmp15, align 8 + %avail_in84.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre427, i64 0, i32 1 + %.pre431 = load i32, i32* %avail_in84.phi.trans.insert, align 4 + %tmp16 = add i32 %.pre431, -1 + br label %if.end.82 + +if.then.72: ; preds = %while.body.68.backedge, %sw.bb.65 + %.lcssa390 = phi i32 [ %tmp14, %sw.bb.65 ], [ %add97, %while.body.68.backedge ] + %sub76 = add nsw i32 %.lcssa390, -8 + %sunkaddr516 = ptrtoint %struct.DState* %s to i64 + %sunkaddr517 = add i64 %sunkaddr516, 36 + %sunkaddr518 = inttoptr i64 %sunkaddr517 to i32* + store i32 %sub76, i32* %sunkaddr518, align 4 + %currBlockNo = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 11 + %tmp17 = load i32, i32* %currBlockNo, align 4 + %inc117 = add nsw i32 %tmp17, 1 + store i32 %inc117, i32* %currBlockNo, align 4 + %verbosity = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 12 + %tmp18 = load i32, i32* %verbosity, align 4 + %cmp118 = icmp sgt i32 %tmp18, 1 + br i1 %cmp118, label %if.then.120, label %sw.bb.123, !prof !0 + +if.end.82: ; preds = %while.body.68.backedge, %if.end.82.lr.ph + %lsr.iv480 = phi i32 [ %tmp16, %if.end.82.lr.ph ], [ %lsr.iv.next481, %while.body.68.backedge ] + %tmp19 = phi i32 [ %tmp14, %if.end.82.lr.ph ], [ %add97, %while.body.68.backedge ] + %cmp85 = icmp eq i32 %lsr.iv480, -1 + br i1 %cmp85, label %save_state_and_return, label %if.end.88 + +if.end.88: ; preds = %if.end.82 + %tmp20 = bitcast %struct.bz_stream* %.pre427 to i8** + %sunkaddr519 = ptrtoint %struct.DState* %s to i64 + %sunkaddr520 = add i64 %sunkaddr519, 32 + %sunkaddr521 = inttoptr i64 %sunkaddr520 to i32* + %tmp21 = load i32, i32* %sunkaddr521, align 4 + %shl90 = shl i32 %tmp21, 8 + %tmp22 = load i8*, i8** %tmp20, align 8 + %tmp23 = load i8, i8* %tmp22, align 1 + %conv93 = zext i8 %tmp23 to i32 + %or94 = or i32 %conv93, %shl90 + store i32 %or94, i32* %sunkaddr521, align 4 + %add97 = add nsw i32 %tmp19, 8 + %sunkaddr522 = ptrtoint %struct.DState* %s to i64 + %sunkaddr523 = add i64 %sunkaddr522, 36 + %sunkaddr524 = inttoptr i64 %sunkaddr523 to i32* + store i32 %add97, i32* %sunkaddr524, align 4 + %incdec.ptr100 = getelementptr inbounds i8, i8* %tmp22, i64 1 + store i8* %incdec.ptr100, i8** %tmp20, align 8 + %sunkaddr525 = ptrtoint %struct.bz_stream* %.pre427 to i64 + %sunkaddr526 = add i64 %sunkaddr525, 8 + %sunkaddr527 = inttoptr i64 %sunkaddr526 to i32* + store i32 %lsr.iv480, i32* %sunkaddr527, align 4 + %sunkaddr528 = ptrtoint %struct.bz_stream* %.pre427 to i64 + %sunkaddr529 = add i64 %sunkaddr528, 12 + %sunkaddr530 = inttoptr i64 %sunkaddr529 to i32* + %tmp24 = load i32, i32* %sunkaddr530, align 4 + %inc106 = add i32 %tmp24, 1 + store i32 %inc106, i32* %sunkaddr530, align 4 + %cmp109 = icmp eq i32 %inc106, 0 + br i1 %cmp109, label %if.then.111, label %while.body.68.backedge + +if.then.111: ; preds = %if.end.88 + %sunkaddr531 = ptrtoint %struct.bz_stream* %.pre427 to i64 + %sunkaddr532 = add i64 %sunkaddr531, 16 + %sunkaddr533 = inttoptr i64 %sunkaddr532 to i32* + %tmp25 = load i32, i32* %sunkaddr533, align 4 + %inc114 = add i32 %tmp25, 1 + store i32 %inc114, i32* %sunkaddr533, align 4 + br label %while.body.68.backedge + +while.body.68.backedge: ; preds = %if.then.111, %if.end.88 + %lsr.iv.next481 = add i32 %lsr.iv480, -1 + %cmp70 = icmp sgt i32 %add97, 7 + br i1 %cmp70, label %if.then.72, label %if.end.82 + +if.then.120: ; preds = %if.then.72 + %call = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* getelementptr inbounds ([0 x %struct.__sFILE], [0 x %struct.__sFILE]* @__sF, i64 0, i64 2), i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str, i64 0, i64 0), i32 %inc117) + br label %sw.bb.123 + +sw.bb.123: ; preds = %if.then.120, %if.then.72, %if.end.sw.bb.123_crit_edge + %bsLive127.pre-phi = phi i32* [ %.pre433, %if.end.sw.bb.123_crit_edge ], [ %bsLive69.pre-phi, %if.then.72 ], [ %bsLive69.pre-phi, %if.then.120 ] + %sunkaddr534 = ptrtoint %struct.DState* %s to i64 + %sunkaddr535 = add i64 %sunkaddr534, 8 + %sunkaddr536 = inttoptr i64 %sunkaddr535 to i32* + store i32 25, i32* %sunkaddr536, align 4 + %tmp26 = load i32, i32* %bsLive127.pre-phi, align 4 + %cmp128.395 = icmp sgt i32 %tmp26, 7 + br i1 %cmp128.395, label %sw.bb.123.if.then.130_crit_edge, label %if.end.140.lr.ph + +sw.bb.123.if.then.130_crit_edge: ; preds = %sw.bb.123 + %sunkaddr537 = ptrtoint %struct.DState* %s to i64 + %sunkaddr538 = add i64 %sunkaddr537, 32 + %sunkaddr539 = inttoptr i64 %sunkaddr538 to i32* + %.pre429 = load i32, i32* %sunkaddr539, align 4 + br label %if.then.130 + +if.end.140.lr.ph: ; preds = %sw.bb.123 + %tmp27 = bitcast %struct.DState* %s to %struct.bz_stream** + %.pre428 = load %struct.bz_stream*, %struct.bz_stream** %tmp27, align 8 + %avail_in142.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre428, i64 0, i32 1 + %.pre432 = load i32, i32* %avail_in142.phi.trans.insert, align 4 + %tmp28 = add i32 %.pre432, -1 + br label %if.end.140 + +if.then.130: ; preds = %while.body.126.backedge, %sw.bb.123.if.then.130_crit_edge + %tmp29 = phi i32 [ %.pre429, %sw.bb.123.if.then.130_crit_edge ], [ %or152, %while.body.126.backedge ] + %.lcssa = phi i32 [ %tmp26, %sw.bb.123.if.then.130_crit_edge ], [ %add155, %while.body.126.backedge ] + %sub134 = add nsw i32 %.lcssa, -8 + %shr135 = lshr i32 %tmp29, %sub134 + store i32 %sub134, i32* %bsLive127.pre-phi, align 4 + %origPtr = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 13 + %tmp30 = load i32, i32* %origPtr, align 4 + %shl175 = shl i32 %tmp30, 8 + %conv176 = and i32 %shr135, 255 + %or177 = or i32 %shl175, %conv176 + store i32 %or177, i32* %origPtr, align 4 + %nInUse = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 27 + %tmp31 = load i32, i32* %nInUse, align 4 + %add179 = add nsw i32 %tmp31, 2 + br label %save_state_and_return + +if.end.140: ; preds = %while.body.126.backedge, %if.end.140.lr.ph + %lsr.iv = phi i32 [ %tmp28, %if.end.140.lr.ph ], [ %lsr.iv.next, %while.body.126.backedge ] + %tmp32 = phi i32 [ %tmp26, %if.end.140.lr.ph ], [ %add155, %while.body.126.backedge ] + %cmp143 = icmp eq i32 %lsr.iv, -1 + br i1 %cmp143, label %save_state_and_return, label %if.end.146 + +if.end.146: ; preds = %if.end.140 + %tmp33 = bitcast %struct.bz_stream* %.pre428 to i8** + %sunkaddr541 = ptrtoint %struct.DState* %s to i64 + %sunkaddr542 = add i64 %sunkaddr541, 32 + %sunkaddr543 = inttoptr i64 %sunkaddr542 to i32* + %tmp34 = load i32, i32* %sunkaddr543, align 4 + %shl148 = shl i32 %tmp34, 8 + %tmp35 = load i8*, i8** %tmp33, align 8 + %tmp36 = load i8, i8* %tmp35, align 1 + %conv151 = zext i8 %tmp36 to i32 + %or152 = or i32 %conv151, %shl148 + store i32 %or152, i32* %sunkaddr543, align 4 + %add155 = add nsw i32 %tmp32, 8 + store i32 %add155, i32* %bsLive127.pre-phi, align 4 + %incdec.ptr158 = getelementptr inbounds i8, i8* %tmp35, i64 1 + store i8* %incdec.ptr158, i8** %tmp33, align 8 + %sunkaddr544 = ptrtoint %struct.bz_stream* %.pre428 to i64 + %sunkaddr545 = add i64 %sunkaddr544, 8 + %sunkaddr546 = inttoptr i64 %sunkaddr545 to i32* + store i32 %lsr.iv, i32* %sunkaddr546, align 4 + %sunkaddr547 = ptrtoint %struct.bz_stream* %.pre428 to i64 + %sunkaddr548 = add i64 %sunkaddr547, 12 + %sunkaddr549 = inttoptr i64 %sunkaddr548 to i32* + %tmp37 = load i32, i32* %sunkaddr549, align 4 + %inc164 = add i32 %tmp37, 1 + store i32 %inc164, i32* %sunkaddr549, align 4 + %cmp167 = icmp eq i32 %inc164, 0 + br i1 %cmp167, label %if.then.169, label %while.body.126.backedge + +if.then.169: ; preds = %if.end.146 + %sunkaddr550 = ptrtoint %struct.bz_stream* %.pre428 to i64 + %sunkaddr551 = add i64 %sunkaddr550, 16 + %sunkaddr552 = inttoptr i64 %sunkaddr551 to i32* + %tmp38 = load i32, i32* %sunkaddr552, align 4 + %inc172 = add i32 %tmp38, 1 + store i32 %inc172, i32* %sunkaddr552, align 4 + br label %while.body.126.backedge + +while.body.126.backedge: ; preds = %if.then.169, %if.end.146 + %lsr.iv.next = add i32 %lsr.iv, -1 + %cmp128 = icmp sgt i32 %add155, 7 + br i1 %cmp128, label %if.then.130, label %if.end.140 + +sw.default: ; preds = %if.end, %if.end.thread + %tmp39 = phi i32 [ 0, %if.end.thread ], [ %.pre, %if.end ] + %tmp40 = phi i32 [ 0, %if.end.thread ], [ %.pre406, %if.end ] + %tmp41 = phi i32 [ 0, %if.end.thread ], [ %.pre407, %if.end ] + %tmp42 = phi i32 [ 0, %if.end.thread ], [ %.pre408, %if.end ] + %tmp43 = phi i32 [ 0, %if.end.thread ], [ %.pre409, %if.end ] + %tmp44 = phi i32 [ 0, %if.end.thread ], [ %.pre410, %if.end ] + %tmp45 = phi i32 [ 0, %if.end.thread ], [ %.pre411, %if.end ] + %tmp46 = phi i32 [ 0, %if.end.thread ], [ %.pre412, %if.end ] + %tmp47 = phi i32 [ 0, %if.end.thread ], [ %.pre413, %if.end ] + %tmp48 = phi i32 [ 0, %if.end.thread ], [ %.pre414, %if.end ] + %tmp49 = phi i32 [ 0, %if.end.thread ], [ %.pre415, %if.end ] + %tmp50 = phi i32 [ 0, %if.end.thread ], [ %.pre416, %if.end ] + %tmp51 = phi i32 [ 0, %if.end.thread ], [ %.pre417, %if.end ] + %tmp52 = phi i32 [ 0, %if.end.thread ], [ %.pre418, %if.end ] + %tmp53 = phi i32 [ 0, %if.end.thread ], [ %.pre419, %if.end ] + %tmp54 = phi i32 [ 0, %if.end.thread ], [ %.pre420, %if.end ] + %tmp55 = phi i32 [ 0, %if.end.thread ], [ %.pre421, %if.end ] + %tmp56 = phi i32 [ 0, %if.end.thread ], [ %.pre422, %if.end ] + %tmp57 = phi i32 [ 0, %if.end.thread ], [ %.pre423, %if.end ] + %save_j3.pre-phi469 = phi i32* [ %save_j, %if.end.thread ], [ %save_j3.phi.trans.insert, %if.end ] + %save_t4.pre-phi467 = phi i32* [ %save_t, %if.end.thread ], [ %save_t4.phi.trans.insert, %if.end ] + %save_alphaSize5.pre-phi465 = phi i32* [ %save_alphaSize, %if.end.thread ], [ %save_alphaSize5.phi.trans.insert, %if.end ] + %save_nGroups6.pre-phi463 = phi i32* [ %save_nGroups, %if.end.thread ], [ %save_nGroups6.phi.trans.insert, %if.end ] + %save_nSelectors7.pre-phi461 = phi i32* [ %save_nSelectors, %if.end.thread ], [ %save_nSelectors7.phi.trans.insert, %if.end ] + %save_EOB8.pre-phi459 = phi i32* [ %save_EOB, %if.end.thread ], [ %save_EOB8.phi.trans.insert, %if.end ] + %save_groupNo9.pre-phi457 = phi i32* [ %save_groupNo, %if.end.thread ], [ %save_groupNo9.phi.trans.insert, %if.end ] + %save_groupPos10.pre-phi455 = phi i32* [ %save_groupPos, %if.end.thread ], [ %save_groupPos10.phi.trans.insert, %if.end ] + %save_nextSym11.pre-phi453 = phi i32* [ %save_nextSym, %if.end.thread ], [ %save_nextSym11.phi.trans.insert, %if.end ] + %save_nblockMAX12.pre-phi451 = phi i32* [ %save_nblockMAX, %if.end.thread ], [ %save_nblockMAX12.phi.trans.insert, %if.end ] + %save_nblock13.pre-phi449 = phi i32* [ %save_nblock, %if.end.thread ], [ %save_nblock13.phi.trans.insert, %if.end ] + %save_es14.pre-phi447 = phi i32* [ %save_es, %if.end.thread ], [ %save_es14.phi.trans.insert, %if.end ] + %save_N15.pre-phi445 = phi i32* [ %save_N, %if.end.thread ], [ %save_N15.phi.trans.insert, %if.end ] + %save_curr16.pre-phi443 = phi i32* [ %save_curr, %if.end.thread ], [ %save_curr16.phi.trans.insert, %if.end ] + %save_zt17.pre-phi441 = phi i32* [ %save_zt, %if.end.thread ], [ %save_zt17.phi.trans.insert, %if.end ] + %save_zn18.pre-phi439 = phi i32* [ %save_zn, %if.end.thread ], [ %save_zn18.phi.trans.insert, %if.end ] + %save_zvec19.pre-phi437 = phi i32* [ %save_zvec, %if.end.thread ], [ %save_zvec19.phi.trans.insert, %if.end ] + %save_zj20.pre-phi435 = phi i32* [ %save_zj, %if.end.thread ], [ %save_zj20.phi.trans.insert, %if.end ] + tail call void @bar(i32 4001) + br label %save_state_and_return + +save_state_and_return: ; preds = %sw.default, %if.end.140, %if.then.130, %if.end.82, %if.end.33, %if.then.29 + %tmp58 = phi i32 [ %tmp39, %sw.default ], [ %.pre, %if.then.29 ], [ %.pre, %if.then.130 ], [ %.pre, %if.end.140 ], [ %.pre, %if.end.82 ], [ %.pre, %if.end.33 ] + %tmp59 = phi i32 [ %tmp40, %sw.default ], [ %.pre406, %if.then.29 ], [ %.pre406, %if.then.130 ], [ %.pre406, %if.end.140 ], [ %.pre406, %if.end.82 ], [ %.pre406, %if.end.33 ] + %tmp60 = phi i32 [ %tmp41, %sw.default ], [ %.pre407, %if.then.29 ], [ %.pre407, %if.then.130 ], [ %.pre407, %if.end.140 ], [ %.pre407, %if.end.82 ], [ %.pre407, %if.end.33 ] + %tmp61 = phi i32 [ %tmp43, %sw.default ], [ %.pre409, %if.then.29 ], [ %.pre409, %if.then.130 ], [ %.pre409, %if.end.140 ], [ %.pre409, %if.end.82 ], [ %.pre409, %if.end.33 ] + %tmp62 = phi i32 [ %tmp44, %sw.default ], [ %.pre410, %if.then.29 ], [ %.pre410, %if.then.130 ], [ %.pre410, %if.end.140 ], [ %.pre410, %if.end.82 ], [ %.pre410, %if.end.33 ] + %tmp63 = phi i32 [ %tmp45, %sw.default ], [ %.pre411, %if.then.29 ], [ %.pre411, %if.then.130 ], [ %.pre411, %if.end.140 ], [ %.pre411, %if.end.82 ], [ %.pre411, %if.end.33 ] + %tmp64 = phi i32 [ %tmp46, %sw.default ], [ %.pre412, %if.then.29 ], [ %.pre412, %if.then.130 ], [ %.pre412, %if.end.140 ], [ %.pre412, %if.end.82 ], [ %.pre412, %if.end.33 ] + %tmp65 = phi i32 [ %tmp47, %sw.default ], [ %.pre413, %if.then.29 ], [ %.pre413, %if.then.130 ], [ %.pre413, %if.end.140 ], [ %.pre413, %if.end.82 ], [ %.pre413, %if.end.33 ] + %tmp66 = phi i32 [ %tmp48, %sw.default ], [ %.pre414, %if.then.29 ], [ %.pre414, %if.then.130 ], [ %.pre414, %if.end.140 ], [ %.pre414, %if.end.82 ], [ %.pre414, %if.end.33 ] + %tmp67 = phi i32 [ %tmp49, %sw.default ], [ %.pre415, %if.then.29 ], [ %.pre415, %if.then.130 ], [ %.pre415, %if.end.140 ], [ %.pre415, %if.end.82 ], [ %.pre415, %if.end.33 ] + %tmp68 = phi i32 [ %tmp51, %sw.default ], [ %.pre417, %if.then.29 ], [ %.pre417, %if.then.130 ], [ %.pre417, %if.end.140 ], [ %.pre417, %if.end.82 ], [ %.pre417, %if.end.33 ] + %tmp69 = phi i32 [ %tmp52, %sw.default ], [ %.pre418, %if.then.29 ], [ %.pre418, %if.then.130 ], [ %.pre418, %if.end.140 ], [ %.pre418, %if.end.82 ], [ %.pre418, %if.end.33 ] + %tmp70 = phi i32 [ %tmp53, %sw.default ], [ %.pre419, %if.then.29 ], [ %.pre419, %if.then.130 ], [ %.pre419, %if.end.140 ], [ %.pre419, %if.end.82 ], [ %.pre419, %if.end.33 ] + %tmp71 = phi i32 [ %tmp54, %sw.default ], [ %.pre420, %if.then.29 ], [ %.pre420, %if.then.130 ], [ %.pre420, %if.end.140 ], [ %.pre420, %if.end.82 ], [ %.pre420, %if.end.33 ] + %tmp72 = phi i32 [ %tmp55, %sw.default ], [ %.pre421, %if.then.29 ], [ %.pre421, %if.then.130 ], [ %.pre421, %if.end.140 ], [ %.pre421, %if.end.82 ], [ %.pre421, %if.end.33 ] + %tmp73 = phi i32 [ %tmp56, %sw.default ], [ %.pre422, %if.then.29 ], [ %.pre422, %if.then.130 ], [ %.pre422, %if.end.140 ], [ %.pre422, %if.end.82 ], [ %.pre422, %if.end.33 ] + %tmp74 = phi i32 [ %tmp57, %sw.default ], [ %.pre423, %if.then.29 ], [ %.pre423, %if.then.130 ], [ %.pre423, %if.end.140 ], [ %.pre423, %if.end.82 ], [ %.pre423, %if.end.33 ] + %save_j3.pre-phi468 = phi i32* [ %save_j3.pre-phi469, %sw.default ], [ %save_j3.phi.trans.insert, %if.then.29 ], [ %save_j3.phi.trans.insert, %if.then.130 ], [ %save_j3.phi.trans.insert, %if.end.140 ], [ %save_j3.phi.trans.insert, %if.end.82 ], [ %save_j3.phi.trans.insert, %if.end.33 ] + %save_t4.pre-phi466 = phi i32* [ %save_t4.pre-phi467, %sw.default ], [ %save_t4.phi.trans.insert, %if.then.29 ], [ %save_t4.phi.trans.insert, %if.then.130 ], [ %save_t4.phi.trans.insert, %if.end.140 ], [ %save_t4.phi.trans.insert, %if.end.82 ], [ %save_t4.phi.trans.insert, %if.end.33 ] + %save_alphaSize5.pre-phi464 = phi i32* [ %save_alphaSize5.pre-phi465, %sw.default ], [ %save_alphaSize5.phi.trans.insert, %if.then.29 ], [ %save_alphaSize5.phi.trans.insert, %if.then.130 ], [ %save_alphaSize5.phi.trans.insert, %if.end.140 ], [ %save_alphaSize5.phi.trans.insert, %if.end.82 ], [ %save_alphaSize5.phi.trans.insert, %if.end.33 ] + %save_nGroups6.pre-phi462 = phi i32* [ %save_nGroups6.pre-phi463, %sw.default ], [ %save_nGroups6.phi.trans.insert, %if.then.29 ], [ %save_nGroups6.phi.trans.insert, %if.then.130 ], [ %save_nGroups6.phi.trans.insert, %if.end.140 ], [ %save_nGroups6.phi.trans.insert, %if.end.82 ], [ %save_nGroups6.phi.trans.insert, %if.end.33 ] + %save_nSelectors7.pre-phi460 = phi i32* [ %save_nSelectors7.pre-phi461, %sw.default ], [ %save_nSelectors7.phi.trans.insert, %if.then.29 ], [ %save_nSelectors7.phi.trans.insert, %if.then.130 ], [ %save_nSelectors7.phi.trans.insert, %if.end.140 ], [ %save_nSelectors7.phi.trans.insert, %if.end.82 ], [ %save_nSelectors7.phi.trans.insert, %if.end.33 ] + %save_EOB8.pre-phi458 = phi i32* [ %save_EOB8.pre-phi459, %sw.default ], [ %save_EOB8.phi.trans.insert, %if.then.29 ], [ %save_EOB8.phi.trans.insert, %if.then.130 ], [ %save_EOB8.phi.trans.insert, %if.end.140 ], [ %save_EOB8.phi.trans.insert, %if.end.82 ], [ %save_EOB8.phi.trans.insert, %if.end.33 ] + %save_groupNo9.pre-phi456 = phi i32* [ %save_groupNo9.pre-phi457, %sw.default ], [ %save_groupNo9.phi.trans.insert, %if.then.29 ], [ %save_groupNo9.phi.trans.insert, %if.then.130 ], [ %save_groupNo9.phi.trans.insert, %if.end.140 ], [ %save_groupNo9.phi.trans.insert, %if.end.82 ], [ %save_groupNo9.phi.trans.insert, %if.end.33 ] + %save_groupPos10.pre-phi454 = phi i32* [ %save_groupPos10.pre-phi455, %sw.default ], [ %save_groupPos10.phi.trans.insert, %if.then.29 ], [ %save_groupPos10.phi.trans.insert, %if.then.130 ], [ %save_groupPos10.phi.trans.insert, %if.end.140 ], [ %save_groupPos10.phi.trans.insert, %if.end.82 ], [ %save_groupPos10.phi.trans.insert, %if.end.33 ] + %save_nextSym11.pre-phi452 = phi i32* [ %save_nextSym11.pre-phi453, %sw.default ], [ %save_nextSym11.phi.trans.insert, %if.then.29 ], [ %save_nextSym11.phi.trans.insert, %if.then.130 ], [ %save_nextSym11.phi.trans.insert, %if.end.140 ], [ %save_nextSym11.phi.trans.insert, %if.end.82 ], [ %save_nextSym11.phi.trans.insert, %if.end.33 ] + %save_nblockMAX12.pre-phi450 = phi i32* [ %save_nblockMAX12.pre-phi451, %sw.default ], [ %save_nblockMAX12.phi.trans.insert, %if.then.29 ], [ %save_nblockMAX12.phi.trans.insert, %if.then.130 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.140 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.82 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.33 ] + %save_nblock13.pre-phi448 = phi i32* [ %save_nblock13.pre-phi449, %sw.default ], [ %save_nblock13.phi.trans.insert, %if.then.29 ], [ %save_nblock13.phi.trans.insert, %if.then.130 ], [ %save_nblock13.phi.trans.insert, %if.end.140 ], [ %save_nblock13.phi.trans.insert, %if.end.82 ], [ %save_nblock13.phi.trans.insert, %if.end.33 ] + %save_es14.pre-phi446 = phi i32* [ %save_es14.pre-phi447, %sw.default ], [ %save_es14.phi.trans.insert, %if.then.29 ], [ %save_es14.phi.trans.insert, %if.then.130 ], [ %save_es14.phi.trans.insert, %if.end.140 ], [ %save_es14.phi.trans.insert, %if.end.82 ], [ %save_es14.phi.trans.insert, %if.end.33 ] + %save_N15.pre-phi444 = phi i32* [ %save_N15.pre-phi445, %sw.default ], [ %save_N15.phi.trans.insert, %if.then.29 ], [ %save_N15.phi.trans.insert, %if.then.130 ], [ %save_N15.phi.trans.insert, %if.end.140 ], [ %save_N15.phi.trans.insert, %if.end.82 ], [ %save_N15.phi.trans.insert, %if.end.33 ] + %save_curr16.pre-phi442 = phi i32* [ %save_curr16.pre-phi443, %sw.default ], [ %save_curr16.phi.trans.insert, %if.then.29 ], [ %save_curr16.phi.trans.insert, %if.then.130 ], [ %save_curr16.phi.trans.insert, %if.end.140 ], [ %save_curr16.phi.trans.insert, %if.end.82 ], [ %save_curr16.phi.trans.insert, %if.end.33 ] + %save_zt17.pre-phi440 = phi i32* [ %save_zt17.pre-phi441, %sw.default ], [ %save_zt17.phi.trans.insert, %if.then.29 ], [ %save_zt17.phi.trans.insert, %if.then.130 ], [ %save_zt17.phi.trans.insert, %if.end.140 ], [ %save_zt17.phi.trans.insert, %if.end.82 ], [ %save_zt17.phi.trans.insert, %if.end.33 ] + %save_zn18.pre-phi438 = phi i32* [ %save_zn18.pre-phi439, %sw.default ], [ %save_zn18.phi.trans.insert, %if.then.29 ], [ %save_zn18.phi.trans.insert, %if.then.130 ], [ %save_zn18.phi.trans.insert, %if.end.140 ], [ %save_zn18.phi.trans.insert, %if.end.82 ], [ %save_zn18.phi.trans.insert, %if.end.33 ] + %save_zvec19.pre-phi436 = phi i32* [ %save_zvec19.pre-phi437, %sw.default ], [ %save_zvec19.phi.trans.insert, %if.then.29 ], [ %save_zvec19.phi.trans.insert, %if.then.130 ], [ %save_zvec19.phi.trans.insert, %if.end.140 ], [ %save_zvec19.phi.trans.insert, %if.end.82 ], [ %save_zvec19.phi.trans.insert, %if.end.33 ] + %save_zj20.pre-phi434 = phi i32* [ %save_zj20.pre-phi435, %sw.default ], [ %save_zj20.phi.trans.insert, %if.then.29 ], [ %save_zj20.phi.trans.insert, %if.then.130 ], [ %save_zj20.phi.trans.insert, %if.end.140 ], [ %save_zj20.phi.trans.insert, %if.end.82 ], [ %save_zj20.phi.trans.insert, %if.end.33 ] + %nblock.1 = phi i32 [ %tmp50, %sw.default ], [ %.pre416, %if.then.29 ], [ 0, %if.then.130 ], [ %.pre416, %if.end.140 ], [ %.pre416, %if.end.82 ], [ %.pre416, %if.end.33 ] + %alphaSize.1 = phi i32 [ %tmp42, %sw.default ], [ %.pre408, %if.then.29 ], [ %add179, %if.then.130 ], [ %.pre408, %if.end.140 ], [ %.pre408, %if.end.82 ], [ %.pre408, %if.end.33 ] + %retVal.0 = phi i32 [ 0, %sw.default ], [ -5, %if.then.29 ], [ -4, %if.then.130 ], [ 0, %if.end.140 ], [ 0, %if.end.82 ], [ 0, %if.end.33 ] + store i32 %tmp58, i32* %save_i, align 4 + store i32 %tmp59, i32* %save_j3.pre-phi468, align 4 + store i32 %tmp60, i32* %save_t4.pre-phi466, align 4 + store i32 %alphaSize.1, i32* %save_alphaSize5.pre-phi464, align 4 + store i32 %tmp61, i32* %save_nGroups6.pre-phi462, align 4 + store i32 %tmp62, i32* %save_nSelectors7.pre-phi460, align 4 + store i32 %tmp63, i32* %save_EOB8.pre-phi458, align 4 + store i32 %tmp64, i32* %save_groupNo9.pre-phi456, align 4 + store i32 %tmp65, i32* %save_groupPos10.pre-phi454, align 4 + store i32 %tmp66, i32* %save_nextSym11.pre-phi452, align 4 + store i32 %tmp67, i32* %save_nblockMAX12.pre-phi450, align 4 + store i32 %nblock.1, i32* %save_nblock13.pre-phi448, align 4 + store i32 %tmp68, i32* %save_es14.pre-phi446, align 4 + store i32 %tmp69, i32* %save_N15.pre-phi444, align 4 + store i32 %tmp70, i32* %save_curr16.pre-phi442, align 4 + store i32 %tmp71, i32* %save_zt17.pre-phi440, align 4 + store i32 %tmp72, i32* %save_zn18.pre-phi438, align 4 + store i32 %tmp73, i32* %save_zvec19.pre-phi436, align 4 + store i32 %tmp74, i32* %save_zj20.pre-phi434, align 4 + ret i32 %retVal.0 +} + +!0 = !{!"branch_weights", i32 10, i32 1} diff --git a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll index 739570236da9..1820b8163a90 100644 --- a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll +++ b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra < %s | FileCheck %s ; This test aims to check basic correctness of frame layout & ; frame access code. There are 8 functions in this test file, @@ -252,11 +252,11 @@ entry: ; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] ; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack -; CHECK: ubfx x9, x0, #0, #32 +; CHECK: mov w9, w0 +; CHECK: mov x10, sp ; CHECK: lsl x9, x9, #2 ; CHECK: add x9, x9, #15 ; CHECK: and x9, x9, #0x7fffffff0 -; CHECK: mov x10, sp ; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 ; CHECK: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through frame pointer @@ -299,11 +299,11 @@ entry: ; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] ; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack -; CHECK: ubfx x9, x0, #0, #32 +; CHECK: mov w9, w0 +; CHECK: mov x10, sp ; CHECK: lsl x9, x9, #2 ; CHECK: add x9, x9, #15 ; CHECK: and x9, x9, #0x7fffffff0 -; CHECK: mov x10, sp ; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 ; CHECK: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through frame pointer @@ -361,11 +361,11 @@ entry: ; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). -; CHECK: ubfx x9, x0, #0, #32 +; CHECK: mov w9, w0 +; CHECK: mov x10, sp ; CHECK: lsl x9, x9, #2 ; CHECK: add x9, x9, #15 ; CHECK: and x9, x9, #0x7fffffff0 -; CHECK: mov x10, sp ; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 ; CHECK: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through base pointer @@ -414,11 +414,11 @@ entry: ; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). -; CHECK: ubfx x9, x0, #0, #32 +; CHECK: mov w9, w0 +; CHECK: mov x10, sp ; CHECK: lsl x9, x9, #2 ; CHECK: add x9, x9, #15 ; CHECK: and x9, x9, #0x7fffffff0 -; CHECK: mov x10, sp ; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 ; CHECK: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through base pointer @@ -465,11 +465,11 @@ entry: ; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). -; CHECK: ubfx x9, x0, #0, #32 +; CHECK: mov w9, w0 +; CHECK: mov x10, sp ; CHECK: lsl x9, x9, #2 ; CHECK: add x9, x9, #15 ; CHECK: and x9, x9, #0x7fffffff0 -; CHECK: mov x10, sp ; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 ; CHECK: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through base pointer @@ -522,10 +522,10 @@ bb1: ; CHECK-LABEL: realign_conditional2 ; Extra realignment in the prologue (performance issue). +; CHECK: tbz {{.*}} .[[LABEL:.*]] ; CHECK: sub x9, sp, #32 // =32 ; CHECK: and sp, x9, #0xffffffffffffffe0 ; CHECK: mov x19, sp -; CHECK: tbz {{.*}} .[[LABEL:.*]] ; Stack is realigned in a non-entry BB. ; CHECK: sub [[REG:x[01-9]+]], sp, #64 ; CHECK: and sp, [[REG]], #0xffffffffffffffe0 diff --git a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll index ea3b8fa55732..1bc2a3ccb1ca 100644 --- a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll +++ b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll @@ -1,7 +1,10 @@ -; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic -lower-interleaved-accesses=true < %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON +; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true -mattr=-neon < %s | FileCheck %s -check-prefix=NONEON -; CHECK-LABEL: load_factor2: -; CHECK: ld2 { v0.8b, v1.8b }, [x0] +; NEON-LABEL: load_factor2: +; NEON: ld2 { v0.8b, v1.8b }, [x0] +; NONEON-LABEL: load_factor2: +; NONEON-NOT: ld2 define <8 x i8> @load_factor2(<16 x i8>* %ptr) { %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4 %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> @@ -10,8 +13,10 @@ define <8 x i8> @load_factor2(<16 x i8>* %ptr) { ret <8 x i8> %add } -; CHECK-LABEL: load_factor3: -; CHECK: ld3 { v0.4s, v1.4s, v2.4s }, [x0] +; NEON-LABEL: load_factor3: +; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0] +; NONEON-LABEL: load_factor3: +; NONEON-NOT: ld3 define <4 x i32> @load_factor3(i32* %ptr) { %base = bitcast i32* %ptr to <12 x i32>* %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4 @@ -21,8 +26,10 @@ define <4 x i32> @load_factor3(i32* %ptr) { ret <4 x i32> %add } -; CHECK-LABEL: load_factor4: -; CHECK: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] +; NEON-LABEL: load_factor4: +; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] +; NONEON-LABEL: load_factor4: +; NONEON-NOT: ld4 define <4 x i32> @load_factor4(i32* %ptr) { %base = bitcast i32* %ptr to <16 x i32>* %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4 @@ -32,16 +39,20 @@ define <4 x i32> @load_factor4(i32* %ptr) { ret <4 x i32> %add } -; CHECK-LABEL: store_factor2: -; CHECK: st2 { v0.8b, v1.8b }, [x0] +; NEON-LABEL: store_factor2: +; NEON: st2 { v0.8b, v1.8b }, [x0] +; NONEON-LABEL: store_factor2: +; NONEON-NOT: st2 define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) { %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4 ret void } -; CHECK-LABEL: store_factor3: -; CHECK: st3 { v0.4s, v1.4s, v2.4s }, [x0] +; NEON-LABEL: store_factor3: +; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0] +; NONEON-LABEL: store_factor3: +; NONEON-NOT: st3 define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { %base = bitcast i32* %ptr to <12 x i32>* %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> @@ -51,8 +62,10 @@ define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v ret void } -; CHECK-LABEL: store_factor4: -; CHECK: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] +; NEON-LABEL: store_factor4: +; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] +; NONEON-LABEL: store_factor4: +; NONEON-NOT: st4 define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { %base = bitcast i32* %ptr to <16 x i32>* %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> @@ -65,8 +78,10 @@ define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v ; The following cases test that interleaved access of pointer vectors can be ; matched to ldN/stN instruction. -; CHECK-LABEL: load_ptrvec_factor2: -; CHECK: ld2 { v0.2d, v1.2d }, [x0] +; NEON-LABEL: load_ptrvec_factor2: +; NEON: ld2 { v0.2d, v1.2d }, [x0] +; NONEON-LABEL: load_ptrvec_factor2: +; NONEON-NOT: ld2 define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) { %base = bitcast i32** %ptr to <4 x i32*>* %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4 @@ -74,8 +89,10 @@ define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) { ret <2 x i32*> %strided.v0 } -; CHECK-LABEL: load_ptrvec_factor3: -; CHECK: ld3 { v0.2d, v1.2d, v2.2d }, [x0] +; NEON-LABEL: load_ptrvec_factor3: +; NEON: ld3 { v0.2d, v1.2d, v2.2d }, [x0] +; NONEON-LABEL: load_ptrvec_factor3: +; NONEON-NOT: ld3 define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) { %base = bitcast i32** %ptr to <6 x i32*>* %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4 @@ -86,8 +103,10 @@ define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr ret void } -; CHECK-LABEL: load_ptrvec_factor4: -; CHECK: ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +; NEON-LABEL: load_ptrvec_factor4: +; NEON: ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +; NONEON-LABEL: load_ptrvec_factor4: +; NONEON-NOT: ld4 define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) { %base = bitcast i32** %ptr to <8 x i32*>* %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4 @@ -98,8 +117,10 @@ define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr ret void } -; CHECK-LABEL: store_ptrvec_factor2: -; CHECK: st2 { v0.2d, v1.2d }, [x0] +; NEON-LABEL: store_ptrvec_factor2: +; NEON: st2 { v0.2d, v1.2d }, [x0] +; NONEON-LABEL: store_ptrvec_factor2: +; NONEON-NOT: st2 define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) { %base = bitcast i32** %ptr to <4 x i32*>* %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> @@ -107,8 +128,10 @@ define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) { ret void } -; CHECK-LABEL: store_ptrvec_factor3: -; CHECK: st3 { v0.2d, v1.2d, v2.2d }, [x0] +; NEON-LABEL: store_ptrvec_factor3: +; NEON: st3 { v0.2d, v1.2d, v2.2d }, [x0] +; NONEON-LABEL: store_ptrvec_factor3: +; NONEON-NOT: st3 define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) { %base = bitcast i32** %ptr to <6 x i32*>* %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> @@ -118,8 +141,10 @@ define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 ret void } -; CHECK-LABEL: store_ptrvec_factor4: -; CHECK: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +; NEON-LABEL: store_ptrvec_factor4: +; NEON: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +; NONEON-LABEL: store_ptrvec_factor4: +; NONEON-NOT: st4 define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) { %base = bitcast i32* %ptr to <8 x i32*>* %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> @@ -132,8 +157,10 @@ define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 ; Following cases check that shuffle maskes with undef indices can be matched ; into ldN/stN instruction. -; CHECK-LABEL: load_undef_mask_factor2: -; CHECK: ld2 { v0.4s, v1.4s }, [x0] +; NEON-LABEL: load_undef_mask_factor2: +; NEON: ld2 { v0.4s, v1.4s }, [x0] +; NONEON-LABEL: load_undef_mask_factor2: +; NONEON-NOT: ld2 define <4 x i32> @load_undef_mask_factor2(i32* %ptr) { %base = bitcast i32* %ptr to <8 x i32>* %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4 @@ -143,8 +170,10 @@ define <4 x i32> @load_undef_mask_factor2(i32* %ptr) { ret <4 x i32> %add } -; CHECK-LABEL: load_undef_mask_factor3: -; CHECK: ld3 { v0.4s, v1.4s, v2.4s }, [x0] +; NEON-LABEL: load_undef_mask_factor3: +; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0] +; NONEON-LABEL: load_undef_mask_factor3: +; NONEON-NOT: ld3 define <4 x i32> @load_undef_mask_factor3(i32* %ptr) { %base = bitcast i32* %ptr to <12 x i32>* %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4 @@ -154,8 +183,10 @@ define <4 x i32> @load_undef_mask_factor3(i32* %ptr) { ret <4 x i32> %add } -; CHECK-LABEL: load_undef_mask_factor4: -; CHECK: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] +; NEON-LABEL: load_undef_mask_factor4: +; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] +; NONEON-LABEL: load_undef_mask_factor4: +; NONEON-NOT: ld4 define <4 x i32> @load_undef_mask_factor4(i32* %ptr) { %base = bitcast i32* %ptr to <16 x i32>* %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4 @@ -165,8 +196,10 @@ define <4 x i32> @load_undef_mask_factor4(i32* %ptr) { ret <4 x i32> %add } -; CHECK-LABEL: store_undef_mask_factor2: -; CHECK: st2 { v0.4s, v1.4s }, [x0] +; NEON-LABEL: store_undef_mask_factor2: +; NEON: st2 { v0.4s, v1.4s }, [x0] +; NONEON-LABEL: store_undef_mask_factor2: +; NONEON-NOT: st2 define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) { %base = bitcast i32* %ptr to <8 x i32>* %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> @@ -174,8 +207,10 @@ define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) { ret void } -; CHECK-LABEL: store_undef_mask_factor3: -; CHECK: st3 { v0.4s, v1.4s, v2.4s }, [x0] +; NEON-LABEL: store_undef_mask_factor3: +; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0] +; NONEON-LABEL: store_undef_mask_factor3: +; NONEON-NOT: st3 define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { %base = bitcast i32* %ptr to <12 x i32>* %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> @@ -185,8 +220,10 @@ define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, < ret void } -; CHECK-LABEL: store_undef_mask_factor4: -; CHECK: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] +; NEON-LABEL: store_undef_mask_factor4: +; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] +; NONEON-LABEL: store_undef_mask_factor4: +; NONEON-NOT: st4 define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { %base = bitcast i32* %ptr to <16 x i32>* %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> @@ -195,3 +232,39 @@ define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, < store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4 ret void } + +; Check that we do something sane with illegal types. + +; NEON-LABEL: load_illegal_factor2: +; NEON: BB#0: +; NEON-NEXT: ldr q[[V:[0-9]+]], [x0] +; NEON-NEXT: uzp1 v0.4s, v[[V]].4s, v{{.*}}.4s +; NEON-NEXT: ret +; NONEON-LABEL: load_illegal_factor2: +; NONEON: BB#0: +; NONEON-NEXT: ldr s0, [x0] +; NONEON-NEXT: ldr s1, [x0, #8] +; NONEON-NEXT: ret +define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind { + %tmp1 = load <3 x float>, <3 x float>* %p, align 16 + %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> + ret <3 x float> %tmp2 +} + +; NEON-LABEL: store_illegal_factor2: +; NEON: BB#0: +; NEON-NEXT: uzp1 v0.4s, v0.4s, v{{.*}}.4s +; NEON-NEXT: st1 { v0.d }[0], [x0] +; NEON-NEXT: ret +; NONEON-LABEL: store_illegal_factor2: +; NONEON: BB#0: +; NONEON-NEXT: fmov w[[ELT2:[0-9]+]], s2 +; NONEON-NEXT: fmov w[[RES:[0-9]+]], s0 +; NONEON-NEXT: bfi x[[RES]], x[[ELT2]], #32, #32 +; NONEON-NEXT: str x[[RES]], [x0] +; NONEON-NEXT: ret +define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind { + %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> + store <3 x float> %tmp1, <3 x float>* %p, align 16 + ret void +} diff --git a/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll b/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll new file mode 100644 index 000000000000..84277995ce5b --- /dev/null +++ b/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll @@ -0,0 +1,50 @@ +; RUN: llc -O3 -aarch64-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck <%t %s +; REQUIRES: asserts +target triple = "aarch64--linux-android" + +%typeD = type { i32, i32, [256 x i32], [257 x i32] } + +; Function Attrs: noreturn nounwind uwtable +define i32 @test1(%typeD* nocapture %s) { +entry: +; CHECK-LABEL: entry: +; CHECK: %uglygep = getelementptr i8, i8* %0, i64 1032 +; CHECK: br label %do.body.i + + + %tPos = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 0 + %k0 = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 1 + %.pre = load i32, i32* %tPos, align 4 + br label %do.body.i + +do.body.i: +; CHECK-LABEL: do.body.i: +; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3 +; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32* +; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032 + + + %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ] + %1 = phi i32 [ 0, %entry ], [ %.be6, %do.body.i.backedge ] + %add.i = add nsw i32 %1, %0 + %shr.i = ashr i32 %add.i, 1 + %idxprom.i = sext i32 %shr.i to i64 + %arrayidx.i = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 3, i64 %idxprom.i + %2 = load i32, i32* %arrayidx.i, align 4 + %cmp.i = icmp sle i32 %2, %.pre + %na.1.i = select i1 %cmp.i, i32 %0, i32 %shr.i + %nb.1.i = select i1 %cmp.i, i32 %shr.i, i32 %1 + %sub.i = sub nsw i32 %na.1.i, %nb.1.i + %cmp1.i = icmp eq i32 %sub.i, 1 + br i1 %cmp1.i, label %fooo.exit, label %do.body.i.backedge + +do.body.i.backedge: + %.be = phi i32 [ %na.1.i, %do.body.i ], [ 256, %fooo.exit ] + %.be6 = phi i32 [ %nb.1.i, %do.body.i ], [ 0, %fooo.exit ] + br label %do.body.i + +fooo.exit: ; preds = %do.body.i + store i32 %nb.1.i, i32* %k0, align 4 + br label %do.body.i.backedge +} + diff --git a/test/CodeGen/AArch64/aarch64-minmaxv.ll b/test/CodeGen/AArch64/aarch64-minmaxv.ll new file mode 100644 index 000000000000..fb13b706cfaf --- /dev/null +++ b/test/CodeGen/AArch64/aarch64-minmaxv.ll @@ -0,0 +1,511 @@ +; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-linu--gnu" + +; CHECK-LABEL: smax_B +; CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b +define i8 @smax_B(<16 x i8>* nocapture readonly %arr) { + %arr.load = load <16 x i8>, <16 x i8>* %arr + %rdx.shuf = shufflevector <16 x i8> %arr.load, <16 x i8> undef, <16 x i32> + %rdx.minmax.cmp22 = icmp sgt <16 x i8> %arr.load, %rdx.shuf + %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %arr.load, <16 x i8> %rdx.shuf + %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> + %rdx.minmax.cmp25 = icmp sgt <16 x i8> %rdx.minmax.select23, %rdx.shuf24 + %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24 + %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> + %rdx.minmax.cmp28 = icmp sgt <16 x i8> %rdx.minmax.select26, %rdx.shuf27 + %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27 + %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> + %rdx.minmax.cmp31 = icmp sgt <16 x i8> %rdx.minmax.select29, %rdx.shuf30 + %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 + %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0 + %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1 + %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt + ret i8 %r +} + +; CHECK-LABEL: smax_H +; CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h +define i16 @smax_H(<8 x i16>* nocapture readonly %arr) { + %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr + %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> + %rdx.minmax.cmp23 = icmp sgt <8 x i16> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf + %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> + %rdx.minmax.cmp26 = icmp sgt <8 x i16> %rdx.minmax.select24, %rdx.shuf25 + %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25 + %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> + %rdx.minmax.cmp29 = icmp sgt <8 x i16> %rdx.minmax.select27, %rdx.shuf28 + %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0 + %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0 + %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1 + %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt + ret i16 %r +} + +; CHECK-LABEL: smax_S +; CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s +define i32 @smax_S(<4 x i32> * nocapture readonly %arr) { + %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr + %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> + %rdx.minmax.cmp18 = icmp sgt <4 x i32> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf + %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> + %rdx.minmax.cmp21 = icmp sgt <4 x i32> %rdx.minmax.select19, %rdx.shuf20 + %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0 + %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0 + %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1 + %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt + ret i32 %r +} + +; CHECK-LABEL: smax_D +; CHECK-NOT: smaxv +define i64 @smax_D(<2 x i64>* nocapture readonly %arr) { + %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr + %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> + %rdx.minmax.cmp18 = icmp sgt <2 x i64> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0 + %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0 + %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1 + %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt + ret i64 %r +} + + +; CHECK-LABEL: umax_B +; CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b +define i8 @umax_B(<16 x i8>* nocapture readonly %arr) { + %rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr + %rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> + %rdx.minmax.cmp22 = icmp ugt <16 x i8> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf + %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> + %rdx.minmax.cmp25 = icmp ugt <16 x i8> %rdx.minmax.select23, %rdx.shuf24 + %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24 + %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> + %rdx.minmax.cmp28 = icmp ugt <16 x i8> %rdx.minmax.select26, %rdx.shuf27 + %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27 + %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> + %rdx.minmax.cmp31 = icmp ugt <16 x i8> %rdx.minmax.select29, %rdx.shuf30 + %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 + %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0 + %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1 + %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt + ret i8 %r +} + +; CHECK-LABEL: umax_H +; CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h +define i16 @umax_H(<8 x i16>* nocapture readonly %arr) { + %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr + %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> + %rdx.minmax.cmp23 = icmp ugt <8 x i16> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf + %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> + %rdx.minmax.cmp26 = icmp ugt <8 x i16> %rdx.minmax.select24, %rdx.shuf25 + %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25 + %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> + %rdx.minmax.cmp29 = icmp ugt <8 x i16> %rdx.minmax.select27, %rdx.shuf28 + %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0 + %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0 + %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1 + %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt + ret i16 %r +} + +; CHECK-LABEL: umax_S +; CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s +define i32 @umax_S(<4 x i32>* nocapture readonly %arr) { + %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr + %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> + %rdx.minmax.cmp18 = icmp ugt <4 x i32> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf + %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> + %rdx.minmax.cmp21 = icmp ugt <4 x i32> %rdx.minmax.select19, %rdx.shuf20 + %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0 + %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0 + %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1 + %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt + ret i32 %r +} + +; CHECK-LABEL: umax_D +; CHECK-NOT: umaxv +define i64 @umax_D(<2 x i64>* nocapture readonly %arr) { + %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr + %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> + %rdx.minmax.cmp18 = icmp ugt <2 x i64> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0 + %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0 + %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1 + %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt + ret i64 %r +} + + +; CHECK-LABEL: smin_B +; CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b +define i8 @smin_B(<16 x i8>* nocapture readonly %arr) { + %rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr + %rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> + %rdx.minmax.cmp22 = icmp slt <16 x i8> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf + %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> + %rdx.minmax.cmp25 = icmp slt <16 x i8> %rdx.minmax.select23, %rdx.shuf24 + %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24 + %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> + %rdx.minmax.cmp28 = icmp slt <16 x i8> %rdx.minmax.select26, %rdx.shuf27 + %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27 + %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> + %rdx.minmax.cmp31 = icmp slt <16 x i8> %rdx.minmax.select29, %rdx.shuf30 + %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 + %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0 + %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1 + %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt + ret i8 %r +} + +; CHECK-LABEL: smin_H +; CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h +define i16 @smin_H(<8 x i16>* nocapture readonly %arr) { + %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr + %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> + %rdx.minmax.cmp23 = icmp slt <8 x i16> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf + %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> + %rdx.minmax.cmp26 = icmp slt <8 x i16> %rdx.minmax.select24, %rdx.shuf25 + %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25 + %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> + %rdx.minmax.cmp29 = icmp slt <8 x i16> %rdx.minmax.select27, %rdx.shuf28 + %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0 + %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0 + %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1 + %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt + ret i16 %r +} + +; CHECK-LABEL: smin_S +; CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s +define i32 @smin_S(<4 x i32>* nocapture readonly %arr) { + %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr + %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> + %rdx.minmax.cmp18 = icmp slt <4 x i32> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf + %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> + %rdx.minmax.cmp21 = icmp slt <4 x i32> %rdx.minmax.select19, %rdx.shuf20 + %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0 + %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0 + %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1 + %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt + ret i32 %r +} + +; CHECK-LABEL: smin_D +; CHECK-NOT: sminv +define i64 @smin_D(<2 x i64>* nocapture readonly %arr) { + %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr + %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> + %rdx.minmax.cmp18 = icmp slt <2 x i64> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0 + %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0 + %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1 + %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt + ret i64 %r +} + + +; CHECK-LABEL: umin_B +; CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b +define i8 @umin_B(<16 x i8>* nocapture readonly %arr) { + %rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr + %rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> + %rdx.minmax.cmp22 = icmp ult <16 x i8> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf + %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> + %rdx.minmax.cmp25 = icmp ult <16 x i8> %rdx.minmax.select23, %rdx.shuf24 + %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24 + %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> + %rdx.minmax.cmp28 = icmp ult <16 x i8> %rdx.minmax.select26, %rdx.shuf27 + %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27 + %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> + %rdx.minmax.cmp31 = icmp ult <16 x i8> %rdx.minmax.select29, %rdx.shuf30 + %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 + %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0 + %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1 + %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt + ret i8 %r +} + +; CHECK-LABEL: umin_H +; CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h +define i16 @umin_H(<8 x i16>* nocapture readonly %arr) { + %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr + %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> + %rdx.minmax.cmp23 = icmp ult <8 x i16> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf + %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> + %rdx.minmax.cmp26 = icmp ult <8 x i16> %rdx.minmax.select24, %rdx.shuf25 + %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25 + %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> + %rdx.minmax.cmp29 = icmp ult <8 x i16> %rdx.minmax.select27, %rdx.shuf28 + %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0 + %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0 + %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1 + %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt + ret i16 %r +} + +; CHECK-LABEL: umin_S +; CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s +define i32 @umin_S(<4 x i32>* nocapture readonly %arr) { + %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr + %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> + %rdx.minmax.cmp18 = icmp ult <4 x i32> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf + %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> + %rdx.minmax.cmp21 = icmp ult <4 x i32> %rdx.minmax.select19, %rdx.shuf20 + %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0 + %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0 + %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1 + %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt + ret i32 %r +} + +; CHECK-LABEL: umin_D +; CHECK-NOT: uminv +define i64 @umin_D(<2 x i64>* nocapture readonly %arr) { + %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr + %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> + %rdx.minmax.cmp18 = icmp ult <2 x i64> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0 + %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0 + %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1 + %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt + ret i64 %r +} + +; CHECK-LABEL: fmaxnm_S +; CHECK: fmaxnmv +define float @fmaxnm_S(<4 x float>* nocapture readonly %arr) { + %rdx.minmax.select = load <4 x float>, <4 x float>* %arr + %rdx.shuf = shufflevector <4 x float> %rdx.minmax.select, <4 x float> undef, <4 x i32> + %rdx.minmax.cmp = fcmp fast oge <4 x float> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select1 = select <4 x i1> %rdx.minmax.cmp, <4 x float> %rdx.minmax.select, <4 x float> %rdx.shuf + %rdx.shuf1 = shufflevector <4 x float> %rdx.minmax.select1, <4 x float> undef, <4 x i32> + %rdx.minmax.cmp1 = fcmp fast oge <4 x float> %rdx.minmax.select1, %rdx.shuf1 + %rdx.minmax.cmp1.elt = extractelement <4 x i1> %rdx.minmax.cmp1, i32 0 + %rdx.minmax.select1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 0 + %rdx.shuf1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 1 + %r = select i1 %rdx.minmax.cmp1.elt, float %rdx.minmax.select1.elt, float %rdx.shuf1.elt + ret float %r +} + +; CHECK-LABEL: fminnm_S +; CHECK: fminnmv +define float @fminnm_S(<4 x float>* nocapture readonly %arr) { + %rdx.minmax.select = load <4 x float>, <4 x float>* %arr + %rdx.shuf = shufflevector <4 x float> %rdx.minmax.select, <4 x float> undef, <4 x i32> + %rdx.minmax.cmp = fcmp fast ole <4 x float> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select1 = select <4 x i1> %rdx.minmax.cmp, <4 x float> %rdx.minmax.select, <4 x float> %rdx.shuf + %rdx.shuf1 = shufflevector <4 x float> %rdx.minmax.select1, <4 x float> undef, <4 x i32> + %rdx.minmax.cmp1 = fcmp fast ole <4 x float> %rdx.minmax.select1, %rdx.shuf1 + %rdx.minmax.cmp1.elt = extractelement <4 x i1> %rdx.minmax.cmp1, i32 0 + %rdx.minmax.select1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 0 + %rdx.shuf1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 1 + %r = select i1 %rdx.minmax.cmp1.elt, float %rdx.minmax.select1.elt, float %rdx.shuf1.elt + ret float %r +} + +define i16 @oversized_umax_256(<16 x i16>* nocapture readonly %arr) { +; CHECK-LABEL: oversized_umax_256 +; CHECK: umax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: umaxv {{h[0-9]+}}, [[V0]] + %rdx.minmax.select = load <16 x i16>, <16 x i16>* %arr + %rdx.shuf = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> + %rdx.minmax.cmp22 = icmp ugt <16 x i16> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf + %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> + %rdx.minmax.cmp25 = icmp ugt <16 x i16> %rdx.minmax.select23, %rdx.shuf24 + %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24 + %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> + %rdx.minmax.cmp28 = icmp ugt <16 x i16> %rdx.minmax.select26, %rdx.shuf27 + %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27 + %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> + %rdx.minmax.cmp31 = icmp ugt <16 x i16> %rdx.minmax.select29, %rdx.shuf30 + %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 + %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0 + %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1 + %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt + ret i16 %r +} + +define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr) { +; CHECK-LABEL: oversized_umax_512 +; CHECK: umax v +; CHECK-NEXT: umax v +; CHECK-NEXT: umax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK-NEXT: umaxv {{s[0-9]+}}, [[V0]] + %arr.load = load <16 x i32>, <16 x i32>* %arr + %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> + %rdx.minmax.cmp22 = icmp ugt <16 x i32> %arr.load, %rdx.shuf + %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf + %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> + %rdx.minmax.cmp25 = icmp ugt <16 x i32> %rdx.minmax.select23, %rdx.shuf24 + %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24 + %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> + %rdx.minmax.cmp28 = icmp ugt <16 x i32> %rdx.minmax.select26, %rdx.shuf27 + %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27 + %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> + %rdx.minmax.cmp31 = icmp ugt <16 x i32> %rdx.minmax.select29, %rdx.shuf30 + %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 + %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0 + %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1 + %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt + ret i32 %r +} + +define i16 @oversized_umin_256(<16 x i16>* nocapture readonly %arr) { +; CHECK-LABEL: oversized_umin_256 +; CHECK: umin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: uminv {{h[0-9]+}}, [[V0]] + %rdx.minmax.select = load <16 x i16>, <16 x i16>* %arr + %rdx.shuf = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> + %rdx.minmax.cmp22 = icmp ult <16 x i16> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf + %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> + %rdx.minmax.cmp25 = icmp ult <16 x i16> %rdx.minmax.select23, %rdx.shuf24 + %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24 + %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> + %rdx.minmax.cmp28 = icmp ult <16 x i16> %rdx.minmax.select26, %rdx.shuf27 + %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27 + %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> + %rdx.minmax.cmp31 = icmp ult <16 x i16> %rdx.minmax.select29, %rdx.shuf30 + %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 + %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0 + %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1 + %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt + ret i16 %r +} + +define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr) { +; CHECK-LABEL: oversized_umin_512 +; CHECK: umin v +; CHECK-NEXT: umin v +; CHECK-NEXT: umin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK-NEXT: uminv {{s[0-9]+}}, [[V0]] + %arr.load = load <16 x i32>, <16 x i32>* %arr + %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> + %rdx.minmax.cmp22 = icmp ult <16 x i32> %arr.load, %rdx.shuf + %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf + %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> + %rdx.minmax.cmp25 = icmp ult <16 x i32> %rdx.minmax.select23, %rdx.shuf24 + %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24 + %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> + %rdx.minmax.cmp28 = icmp ult <16 x i32> %rdx.minmax.select26, %rdx.shuf27 + %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27 + %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> + %rdx.minmax.cmp31 = icmp ult <16 x i32> %rdx.minmax.select29, %rdx.shuf30 + %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 + %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0 + %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1 + %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt + ret i32 %r +} + +define i16 @oversized_smax_256(<16 x i16>* nocapture readonly %arr) { +; CHECK-LABEL: oversized_smax_256 +; CHECK: smax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: smaxv {{h[0-9]+}}, [[V0]] + %arr.load = load <16 x i16>, <16 x i16>* %arr + %rdx.shuf = shufflevector <16 x i16> %arr.load, <16 x i16> undef, <16 x i32> + %rdx.minmax.cmp22 = icmp sgt <16 x i16> %arr.load, %rdx.shuf + %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %arr.load, <16 x i16> %rdx.shuf + %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> + %rdx.minmax.cmp25 = icmp sgt <16 x i16> %rdx.minmax.select23, %rdx.shuf24 + %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24 + %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> + %rdx.minmax.cmp28 = icmp sgt <16 x i16> %rdx.minmax.select26, %rdx.shuf27 + %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27 + %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> + %rdx.minmax.cmp31 = icmp sgt <16 x i16> %rdx.minmax.select29, %rdx.shuf30 + %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 + %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0 + %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1 + %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt + ret i16 %r +} + +define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr) { +; CHECK-LABEL: oversized_smax_512 +; CHECK: smax v +; CHECK-NEXT: smax v +; CHECK-NEXT: smax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK-NEXT: smaxv {{s[0-9]+}}, [[V0]] + %arr.load = load <16 x i32>, <16 x i32>* %arr + %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> + %rdx.minmax.cmp22 = icmp sgt <16 x i32> %arr.load, %rdx.shuf + %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf + %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> + %rdx.minmax.cmp25 = icmp sgt <16 x i32> %rdx.minmax.select23, %rdx.shuf24 + %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24 + %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> + %rdx.minmax.cmp28 = icmp sgt <16 x i32> %rdx.minmax.select26, %rdx.shuf27 + %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27 + %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> + %rdx.minmax.cmp31 = icmp sgt <16 x i32> %rdx.minmax.select29, %rdx.shuf30 + %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 + %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0 + %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1 + %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt + ret i32 %r +} + +define i16 @oversized_smin_256(<16 x i16>* nocapture readonly %arr) { +; CHECK-LABEL: oversized_smin_256 +; CHECK: smin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: sminv {{h[0-9]+}}, [[V0]] + %rdx.minmax.select = load <16 x i16>, <16 x i16>* %arr + %rdx.shuf = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> + %rdx.minmax.cmp22 = icmp slt <16 x i16> %rdx.minmax.select, %rdx.shuf + %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf + %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> + %rdx.minmax.cmp25 = icmp slt <16 x i16> %rdx.minmax.select23, %rdx.shuf24 + %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24 + %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> + %rdx.minmax.cmp28 = icmp slt <16 x i16> %rdx.minmax.select26, %rdx.shuf27 + %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27 + %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> + %rdx.minmax.cmp31 = icmp slt <16 x i16> %rdx.minmax.select29, %rdx.shuf30 + %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 + %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0 + %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1 + %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt + ret i16 %r +} + +define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr) { +; CHECK-LABEL: oversized_smin_512 +; CHECK: smin v +; CHECK-NEXT: smin v +; CHECK-NEXT: smin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK-NEXT: sminv {{s[0-9]+}}, [[V0]] + %arr.load = load <16 x i32>, <16 x i32>* %arr + %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> + %rdx.minmax.cmp22 = icmp slt <16 x i32> %arr.load, %rdx.shuf + %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf + %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> + %rdx.minmax.cmp25 = icmp slt <16 x i32> %rdx.minmax.select23, %rdx.shuf24 + %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24 + %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> + %rdx.minmax.cmp28 = icmp slt <16 x i32> %rdx.minmax.select26, %rdx.shuf27 + %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27 + %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> + %rdx.minmax.cmp31 = icmp slt <16 x i32> %rdx.minmax.select29, %rdx.shuf30 + %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 + %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0 + %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1 + %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt + ret i32 %r +} diff --git a/test/CodeGen/AArch64/aarch64-smax-constantfold.ll b/test/CodeGen/AArch64/aarch64-smax-constantfold.ll new file mode 100644 index 000000000000..0e5b59f95126 --- /dev/null +++ b/test/CodeGen/AArch64/aarch64-smax-constantfold.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o -| FileCheck %s + +; Function Attrs: nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>) + +; CHECK-LABEL: test +define <4 x i16> @test() { +entry: +; CHECK: movi d{{[0-9]+}}, #0000000000000000 + %0 = tail call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> , <4 x i16> zeroinitializer) + ret <4 x i16> %0 +} diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll index f0c7572ebf13..f30ab89f238b 100644 --- a/test/CodeGen/AArch64/addsub_ext.ll +++ b/test/CodeGen/AArch64/addsub_ext.ll @@ -80,6 +80,64 @@ end: ret void } +define void @sub_i8rhs() minsize { +; CHECK-LABEL: sub_i8rhs: + %val8_tmp = load i8, i8* @var8 + %lhs32 = load i32, i32* @var32 + %lhs64 = load i64, i64* @var64 + + ; Need this to prevent extension upon load and give a vanilla i8 operand. + %val8 = add i8 %val8_tmp, 123 + + +; Zero-extending to 32-bits + %rhs32_zext = zext i8 %val8 to i32 + %res32_zext = sub i32 %lhs32, %rhs32_zext + store volatile i32 %res32_zext, i32* @var32 +; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb + + %rhs32_zext_shift = shl i32 %rhs32_zext, 3 + %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift + store volatile i32 %res32_zext_shift, i32* @var32 +; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3 + + +; Zero-extending to 64-bits + %rhs64_zext = zext i8 %val8 to i64 + %res64_zext = sub i64 %lhs64, %rhs64_zext + store volatile i64 %res64_zext, i64* @var64 +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb + + %rhs64_zext_shift = shl i64 %rhs64_zext, 1 + %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift + store volatile i64 %res64_zext_shift, i64* @var64 +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1 + +; Sign-extending to 32-bits + %rhs32_sext = sext i8 %val8 to i32 + %res32_sext = sub i32 %lhs32, %rhs32_sext + store volatile i32 %res32_sext, i32* @var32 +; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb + + %rhs32_sext_shift = shl i32 %rhs32_sext, 1 + %res32_sext_shift = sub i32 %lhs32, %rhs32_sext_shift + store volatile i32 %res32_sext_shift, i32* @var32 +; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1 + +; Sign-extending to 64-bits + %rhs64_sext = sext i8 %val8 to i64 + %res64_sext = sub i64 %lhs64, %rhs64_sext + store volatile i64 %res64_sext, i64* @var64 +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb + + %rhs64_sext_shift = shl i64 %rhs64_sext, 4 + %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift + store volatile i64 %res64_sext_shift, i64* @var64 +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4 + + ret void +} + define void @addsub_i16rhs() minsize { ; CHECK-LABEL: addsub_i16rhs: %val16_tmp = load i16, i16* @var16 @@ -155,6 +213,64 @@ end: ret void } +define void @sub_i16rhs() minsize { +; CHECK-LABEL: sub_i16rhs: + %val16_tmp = load i16, i16* @var16 + %lhs32 = load i32, i32* @var32 + %lhs64 = load i64, i64* @var64 + + ; Need this to prevent extension upon load and give a vanilla i16 operand. + %val16 = add i16 %val16_tmp, 123 + + +; Zero-extending to 32-bits + %rhs32_zext = zext i16 %val16 to i32 + %res32_zext = sub i32 %lhs32, %rhs32_zext + store volatile i32 %res32_zext, i32* @var32 +; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth + + %rhs32_zext_shift = shl i32 %rhs32_zext, 3 + %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift + store volatile i32 %res32_zext_shift, i32* @var32 +; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3 + + +; Zero-extending to 64-bits + %rhs64_zext = zext i16 %val16 to i64 + %res64_zext = sub i64 %lhs64, %rhs64_zext + store volatile i64 %res64_zext, i64* @var64 +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth + + %rhs64_zext_shift = shl i64 %rhs64_zext, 1 + %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift + store volatile i64 %res64_zext_shift, i64* @var64 +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1 + +; Sign-extending to 32-bits + %rhs32_sext = sext i16 %val16 to i32 + %res32_sext = sub i32 %lhs32, %rhs32_sext + store volatile i32 %res32_sext, i32* @var32 +; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth + + %rhs32_sext_shift = shl i32 %rhs32_sext, 1 + %res32_sext_shift = sub i32 %lhs32, %rhs32_sext_shift + store volatile i32 %res32_sext_shift, i32* @var32 +; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1 + +; Sign-extending to 64-bits + %rhs64_sext = sext i16 %val16 to i64 + %res64_sext = sub i64 %lhs64, %rhs64_sext + store volatile i64 %res64_sext, i64* @var64 +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth + + %rhs64_sext_shift = shl i64 %rhs64_sext, 4 + %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift + store volatile i64 %res64_sext_shift, i64* @var64 +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4 + + ret void +} + ; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for ; example), but the remaining instructions are probably not idiomatic ; in the face of "add/sub (shifted register)" so I don't intend to. @@ -187,3 +303,33 @@ define void @addsub_i32rhs() minsize { ret void } + +define void @sub_i32rhs() minsize { +; CHECK-LABEL: sub_i32rhs: + %val32_tmp = load i32, i32* @var32 + %lhs64 = load i64, i64* @var64 + + %val32 = add i32 %val32_tmp, 123 + + %rhs64_zext = zext i32 %val32 to i64 + %res64_zext = sub i64 %lhs64, %rhs64_zext + store volatile i64 %res64_zext, i64* @var64 +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw + + %rhs64_zext_shift = shl i64 %rhs64_zext, 2 + %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift + store volatile i64 %res64_zext_shift, i64* @var64 +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2 + + %rhs64_sext = sext i32 %val32 to i64 + %res64_sext = sub i64 %lhs64, %rhs64_sext + store volatile i64 %res64_sext, i64* @var64 +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw + + %rhs64_sext_shift = shl i64 %rhs64_sext, 2 + %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift + store volatile i64 %res64_sext_shift, i64* @var64 +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2 + + ret void +} diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll index 5b2278ce8a35..45754377b2d9 100644 --- a/test/CodeGen/AArch64/alloca.ll +++ b/test/CodeGen/AArch64/alloca.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s +; RUN: llc -mtriple=aarch64-linux-gnu -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s declare void @use_addr(i8*) diff --git a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll index 173a440326ac..a66ea0df2e98 100644 --- a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll +++ b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll @@ -22,22 +22,22 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone !llvm.dbg.sp = !{!1, !7, !10, !11, !12} !0 = !DIGlobalVariable(name: "vsplive", line: 617, isLocal: true, isDefinition: true, scope: !1, file: !2, type: !6) -!1 = !DISubprogram(name: "drt_vsprintf", line: 616, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4) +!1 = distinct !DISubprogram(name: "drt_vsprintf", line: 616, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4) !2 = !DIFile(filename: "print.i", directory: "/Volumes/Ebi/echeng/radars/r9146594") -!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21) +!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21) !4 = !DISubroutineType(types: !5) !5 = !{!6} !6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed) -!7 = !DISubprogram(name: "putc_mem", line: 30, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !8) +!7 = distinct !DISubprogram(name: "putc_mem", line: 30, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !8) !8 = !DISubroutineType(types: !9) !9 = !{null} -!10 = !DISubprogram(name: "print_double", line: 203, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4) -!11 = !DISubprogram(name: "print_number", line: 75, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4) -!12 = !DISubprogram(name: "get_flags", line: 508, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !8) +!10 = distinct !DISubprogram(name: "print_double", line: 203, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4) +!11 = distinct !DISubprogram(name: "print_number", line: 75, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4) +!12 = distinct !DISubprogram(name: "get_flags", line: 508, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !8) !13 = !DILocation(line: 653, column: 5, scope: !14) !14 = distinct !DILexicalBlock(line: 652, column: 35, file: !20, scope: !15) !15 = distinct !DILexicalBlock(line: 616, column: 1, file: !20, scope: !1) -!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "do_tab_convert", line: 853, scope: !17, file: !2, type: !6) +!16 = !DILocalVariable(name: "do_tab_convert", line: 853, scope: !17, file: !2, type: !6) !17 = distinct !DILexicalBlock(line: 850, column: 12, file: !20, scope: !14) !18 = !DILocation(line: 853, column: 11, scope: !17) !19 = !DILocation(line: 853, column: 29, scope: !17) diff --git a/test/CodeGen/AArch64/arm64-aapcs-be.ll b/test/CodeGen/AArch64/arm64-aapcs-be.ll index f27570acc820..e77952e4b8a1 100644 --- a/test/CodeGen/AArch64/arm64-aapcs-be.ll +++ b/test/CodeGen/AArch64/arm64-aapcs-be.ll @@ -32,7 +32,7 @@ define float @test_block_addr([8 x float], [1 x float] %in) { define void @test_block_addr_callee() { ; CHECK-LABEL: test_block_addr_callee: -; CHECK: str {{[a-z0-9]+}}, [sp] +; CHECK: str {{[a-z0-9]+}}, [sp, #-16]! ; CHECK: bl test_block_addr %val = insertvalue [1 x float] undef, float 0.0, 0 call float @test_block_addr([8 x float] undef, [1 x float] %val) diff --git a/test/CodeGen/AArch64/arm64-aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll index d0880cd4f3eb..441f45bf90b3 100644 --- a/test/CodeGen/AArch64/arm64-aapcs.ll +++ b/test/CodeGen/AArch64/arm64-aapcs.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-linux-gnu -enable-misched=false < %s | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -enable-misched=false -disable-post-ra < %s | FileCheck %s @var = global i32 0, align 4 @@ -27,12 +27,13 @@ define [2 x i64] @test_i64x2_align(i32, [2 x i64] %arg, i32 %after) { ; Check stack slots are 64-bit at all times. define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short, i32 %int, i64 %long) { - ; Part of last store. Blasted scheduler. -; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32] - %ext_bool = zext i1 %bool to i64 store volatile i64 %ext_bool, i64* @var64, align 8 ; CHECK: ldrb w[[EXT:[0-9]+]], [sp] + + ; Part of last store. Blasted scheduler. +; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32] + ; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1 ; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64] @@ -63,8 +64,8 @@ define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short, define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) { %ext_bool = zext i1 %bool to i64 store volatile i64 %ext_bool, i64* @var64 -; CHECK: and [[EXT:x[0-9]+]], x0, #0x1 -; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64] +; CHECK: and w[[EXT:[0-9]+]], w0, #0x1 +; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] %ext_char = sext i8 %char to i64 store volatile i64 %ext_char, i64* @var64 @@ -73,13 +74,13 @@ define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) { %ext_short = zext i16 %short to i64 store volatile i64 %ext_short, i64* @var64 -; CHECK: and [[EXT:x[0-9]+]], x2, #0xffff -; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64] +; CHECK: and w[[EXT:[0-9]+]], w2, #0xffff +; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] %ext_int = zext i32 %int to i64 store volatile i64 %ext_int, i64* @var64 -; CHECK: ubfx [[EXT:x[0-9]+]], x3, #0, #32 -; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64] +; CHECK: mov w[[EXT:[0-9]+]], w3 +; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] ret void } diff --git a/test/CodeGen/AArch64/arm64-abi_align.ll b/test/CodeGen/AArch64/arm64-abi_align.ll index 1c1b58b8b140..dc9884f12f57 100644 --- a/test/CodeGen/AArch64/arm64-abi_align.ll +++ b/test/CodeGen/AArch64/arm64-abi_align.ll @@ -508,7 +508,7 @@ entry: ; "i64 %0" should be in register x7. ; "i32 8" should be on stack at [sp]. ; CHECK: ldr x7, [{{x[0-9]+}}] -; CHECK: str {{w[0-9]+}}, [sp] +; CHECK: str {{w[0-9]+}}, [sp, #-16]! ; FAST-LABEL: i64_split ; FAST: ldr x7, [{{x[0-9]+}}] ; FAST: mov x[[R0:[0-9]+]], sp diff --git a/test/CodeGen/AArch64/arm64-addr-type-promotion.ll b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll index 4703d25a6016..d46800d34cac 100644 --- a/test/CodeGen/AArch64/arm64-addr-type-promotion.ll +++ b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll @@ -1,6 +1,7 @@ -; RUN: llc -march arm64 < %s | FileCheck %s +; RUN: llc -march arm64 < %s -aarch64-collect-loh=false | FileCheck %s ; rdar://13452552 -; ModuleID = 'reduced_test.ll' +; Disable the collecting of LOH so that the labels do not get in the +; way of the NEXT patterns. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" target triple = "arm64-apple-ios3.0.0" @@ -13,8 +14,8 @@ define zeroext i8 @fullGtU(i32 %i1, i32 %i2) { ; CHECK-NEXT: ldr [[BLOCKBASE:x[0-9]+]], {{\[}}[[ADDR]]] ; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]], w0, sxtw] ; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], w1, sxtw] -; CHECK-NEXT cmp [[BLOCKVAL1]], [[BLOCKVAL2]] -; CHECK-NEXT b.ne +; CHECK-NEXT: cmp [[BLOCKVAL1]], [[BLOCKVAL2]] +; CHECK-NEXT: b.ne ; Next BB ; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], w1, sxtw ; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], w0, sxtw diff --git a/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll index eb0cd3547bda..36424506bee8 100644 --- a/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll +++ b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll @@ -1,9 +1,9 @@ ; RUN: llc -march=arm64 -mcpu=cyclone < %s | FileCheck %s ; CHECK: foo -; CHECK: ldr w[[REG:[0-9]+]], [x19, #264] -; CHECK: str w[[REG]], [x19, #132] -; CHECK: ldr w{{[0-9]+}}, [x19, #264] +; CHECK: str w[[REG0:[0-9]+]], [x19, #264] +; CHECK: mov w[[REG1:[0-9]+]], w[[REG0]] +; CHECK: str w[[REG1]], [x19, #132] define i32 @foo(i32 %a) nounwind { %retval = alloca i32, align 4 diff --git a/test/CodeGen/AArch64/arm64-arith.ll b/test/CodeGen/AArch64/arm64-arith.ll index f36e706b15dd..d5d9a1b98174 100644 --- a/test/CodeGen/AArch64/arm64-arith.ll +++ b/test/CodeGen/AArch64/arm64-arith.ll @@ -123,7 +123,8 @@ entry: define i64 @t14(i16 %a, i64 %x) nounwind ssp { entry: ; CHECK-LABEL: t14: -; CHECK: add x0, x1, w0, uxth #3 +; CHECK: and w8, w0, #0xffff +; CHECK: add x0, x1, w8, uxtw #3 ; CHECK: ret %c = zext i16 %a to i64 %d = shl i64 %c, 3 diff --git a/test/CodeGen/AArch64/arm64-atomic-128.ll b/test/CodeGen/AArch64/arm64-atomic-128.ll index a76cf74a6d0c..44c24c51f0df 100644 --- a/test/CodeGen/AArch64/arm64-atomic-128.ll +++ b/test/CodeGen/AArch64/arm64-atomic-128.ll @@ -173,10 +173,13 @@ define i128 @atomic_load_seq_cst(i128* %p) { ret i128 %r } -define i128 @atomic_load_relaxed(i128* %p) { +define i128 @atomic_load_relaxed(i64, i64, i128* %p) { ; CHECK-LABEL: atomic_load_relaxed: ; CHECK-NOT: dmb -; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0] +; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x2] +; CHECK-NEXT: stxp [[SUCCESS:w[0-9]+]], [[LO]], [[HI]], [x2] +; CHECK: cbnz [[SUCCESS]], [[LABEL]] ; CHECK-NOT: dmb %r = load atomic i128, i128* %p monotonic, align 16 ret i128 %r diff --git a/test/CodeGen/AArch64/arm64-atomic.ll b/test/CodeGen/AArch64/arm64-atomic.ll index 0824bd881a95..5d8d60de5fc5 100644 --- a/test/CodeGen/AArch64/arm64-atomic.ll +++ b/test/CodeGen/AArch64/arm64-atomic.ll @@ -2,13 +2,17 @@ define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 { ; CHECK-LABEL: val_compare_and_swap: -; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]: -; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0] +; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0 +; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]: +; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]] ; CHECK-NEXT: cmp [[RESULT]], w1 -; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x0] -; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]] -; CHECK-NEXT: [[LABEL2]]: +; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] +; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]] +; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] +; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]] +; CHECK-NEXT: [[FAILBB]]: +; CHECK-NEXT: clrex +; CHECK-NEXT: [[EXITBB]]: %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val @@ -17,13 +21,16 @@ define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 { define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 { ; CHECK-LABEL: val_compare_and_swap_from_load: ; CHECK-NEXT: ldr [[NEW:w[0-9]+]], [x2] -; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]: ; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0] ; CHECK-NEXT: cmp [[RESULT]], w1 -; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] ; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x0] -; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]] -; CHECK-NEXT: [[LABEL2]]: +; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] +; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]] +; CHECK-NEXT: [[FAILBB]]: +; CHECK-NEXT: clrex +; CHECK-NEXT: [[EXITBB]]: %new = load i32, i32* %pnew %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire %val = extractvalue { i32, i1 } %pair, 0 @@ -32,13 +39,17 @@ define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 { define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 { ; CHECK-LABEL: val_compare_and_swap_rel: -; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]: -; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0] +; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0 +; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]: +; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]] ; CHECK-NEXT: cmp [[RESULT]], w1 -; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x0] -; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]] -; CHECK-NEXT: [[LABEL2]]: +; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] +; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]] +; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] +; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]] +; CHECK-NEXT: [[FAILBB]]: +; CHECK-NEXT: clrex +; CHECK-NEXT: [[EXITBB]]: %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic %val = extractvalue { i32, i1 } %pair, 0 ret i32 %val @@ -47,13 +58,16 @@ define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 { define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 { ; CHECK-LABEL: val_compare_and_swap_64: ; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0 -; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]: ; CHECK-NEXT: ldxr [[RESULT:x[0-9]+]], [x[[ADDR]]] ; CHECK-NEXT: cmp [[RESULT]], x1 -; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]] ; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], x2, [x[[ADDR]]] -; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]] -; CHECK-NEXT: [[LABEL2]]: +; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]] +; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]] +; CHECK-NEXT: [[FAILBB]]: +; CHECK-NEXT: clrex +; CHECK-NEXT: [[EXITBB]]: %pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic %val = extractvalue { i64, i1 } %pair, 0 ret i64 %val @@ -61,13 +75,13 @@ define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 { define i32 @fetch_and_nand(i32* %p) #0 { ; CHECK-LABEL: fetch_and_nand: -; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: [[TRYBB:.?LBB[0-9_]+]]: ; CHECK: ldxr w[[DEST_REG:[0-9]+]], [x0] ; CHECK: mvn [[TMP_REG:w[0-9]+]], w[[DEST_REG]] ; CHECK: orr [[SCRATCH2_REG:w[0-9]+]], [[TMP_REG]], #0xfffffff8 ; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]] ; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] -; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]] ; CHECK: mov x0, x[[DEST_REG]] %val = atomicrmw nand i32* %p, i32 7 release ret i32 %val @@ -76,12 +90,12 @@ define i32 @fetch_and_nand(i32* %p) #0 { define i64 @fetch_and_nand_64(i64* %p) #0 { ; CHECK-LABEL: fetch_and_nand_64: ; CHECK: mov x[[ADDR:[0-9]+]], x0 -; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: [[TRYBB:.?LBB[0-9_]+]]: ; CHECK: ldaxr x[[DEST_REG:[0-9]+]], [x[[ADDR]]] ; CHECK: mvn w[[TMP_REG:[0-9]+]], w[[DEST_REG]] ; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], x[[TMP_REG]], #0xfffffffffffffff8 ; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]] -; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]] %val = atomicrmw nand i64* %p, i64 7 acq_rel ret i64 %val @@ -90,12 +104,12 @@ define i64 @fetch_and_nand_64(i64* %p) #0 { define i32 @fetch_and_or(i32* %p) #0 { ; CHECK-LABEL: fetch_and_or: ; CHECK: movz [[OLDVAL_REG:w[0-9]+]], #0x5 -; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: [[TRYBB:.?LBB[0-9_]+]]: ; CHECK: ldaxr w[[DEST_REG:[0-9]+]], [x0] ; CHECK: orr [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], [[OLDVAL_REG]] ; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]] ; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0] -; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]] ; CHECK: mov x0, x[[DEST_REG]] %val = atomicrmw or i32* %p, i32 5 seq_cst ret i32 %val @@ -104,11 +118,11 @@ define i32 @fetch_and_or(i32* %p) #0 { define i64 @fetch_and_or_64(i64* %p) #0 { ; CHECK: fetch_and_or_64: ; CHECK: mov x[[ADDR:[0-9]+]], x0 -; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]: +; CHECK: [[TRYBB:.?LBB[0-9_]+]]: ; CHECK: ldxr [[DEST_REG:x[0-9]+]], [x[[ADDR]]] ; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0x7 ; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]] -; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]] +; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]] %val = atomicrmw or i64* %p, i64 7 monotonic ret i64 %val } diff --git a/test/CodeGen/AArch64/arm64-builtins-linux.ll b/test/CodeGen/AArch64/arm64-builtins-linux.ll new file mode 100644 index 000000000000..34fa1b471561 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-builtins-linux.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=aarch64 -mtriple=aarch64-linux-gnu | FileCheck %s + +; Function Attrs: nounwind readnone +declare i8* @llvm.aarch64.thread.pointer() #1 + +define i8* @thread_pointer() { +; CHECK: thread_pointer: +; CHECK: mrs {{x[0-9]+}}, TPIDR_EL0 + %1 = tail call i8* @llvm.aarch64.thread.pointer() + ret i8* %1 +} diff --git a/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll index 4e47ab6c03f3..25d874e54cb7 100644 --- a/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll +++ b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll @@ -15,10 +15,10 @@ target triple = "arm64-apple-ios7.0.0" ; CHECK: Maze1 ; CHECK: %if.then ; CHECK: cmp x{{[0-9]+}}, #2 -; CHECK-NEXT b.cc +; CHECK-NEXT: b.lo ; CHECK: %if.then ; CHECK: cmp x{{[0-9]+}}, #2 -; CHECK-NEXT b.cc +; CHECK-NEXT: b.lo define i32 @Maze1() nounwind ssp { entry: %0 = load i64, i64* @channelColumns, align 8, !tbaa !0 diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll index ff18f7364337..72d3b8331162 100644 --- a/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/test/CodeGen/AArch64/arm64-ccmp.ll @@ -104,11 +104,14 @@ if.end: ; preds = %if.then, %lor.lhs.f ; Speculatively execute division by zero. ; The sdiv/udiv instructions do not trap when the divisor is zero, so they are ; safe to speculate. -; CHECK: speculate_division -; CHECK-NOT: cmp -; CHECK: sdiv -; CHECK: cmp -; CHECK-NEXT: ccmp +; CHECK-LABEL: speculate_division: +; CHECK: cmp w0, #1 +; CHECK: sdiv [[DIVRES:w[0-9]+]], w1, w0 +; CHECK: ccmp [[DIVRES]], #16, #0, ge +; CHECK: b.gt [[BLOCK:LBB[0-9_]+]] +; CHECK: bl _foo +; CHECK: [[BLOCK]]: +; CHECK: orr w0, wzr, #0x7 define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp { entry: %cmp = icmp sgt i32 %a, 0 @@ -287,3 +290,156 @@ sw.bb.i.i: %code1.i.i.phi.trans.insert = getelementptr inbounds %str1, %str1* %0, i64 0, i32 0, i32 0, i64 16 br label %sw.bb.i.i } + +; CHECK-LABEL: select_and +define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { +; CHECK: cmp w1, #5 +; CHECK-NEXT: ccmp w0, w1, #0, ne +; CHECK-NEXT: csel x0, x2, x3, lt +; CHECK-NEXT: ret + %1 = icmp slt i32 %w0, %w1 + %2 = icmp ne i32 5, %w1 + %3 = and i1 %1, %2 + %sel = select i1 %3, i64 %x2, i64 %x3 + ret i64 %sel +} + +; CHECK-LABEL: select_or +define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { +; CHECK: cmp w1, #5 +; CHECK-NEXT: ccmp w0, w1, #8, eq +; CHECK-NEXT: csel x0, x2, x3, lt +; CHECK-NEXT: ret + %1 = icmp slt i32 %w0, %w1 + %2 = icmp ne i32 5, %w1 + %3 = or i1 %1, %2 + %sel = select i1 %3, i64 %x2, i64 %x3 + ret i64 %sel +} + +; CHECK-LABEL: select_complicated +define i16 @select_complicated(double %v1, double %v2, i16 %a, i16 %b) { +; CHECK: ldr [[REG:d[0-9]+]], +; CHECK: fcmp d0, d2 +; CHECK-NEXT: fmov d2, #13.00000000 +; CHECK-NEXT: fccmp d1, d2, #4, ne +; CHECK-NEXT: fccmp d0, d1, #1, ne +; CHECK-NEXT: fccmp d0, d1, #4, vc +; CEHCK-NEXT: csel w0, w0, w1, eq + %1 = fcmp one double %v1, %v2 + %2 = fcmp oeq double %v2, 13.0 + %3 = fcmp oeq double %v1, 42.0 + %or0 = or i1 %2, %3 + %or1 = or i1 %1, %or0 + %sel = select i1 %or1, i16 %a, i16 %b + ret i16 %sel +} + +; CHECK-LABEL: gccbug +define i64 @gccbug(i64 %x0, i64 %x1) { +; CHECK: cmp x0, #2 +; CHECK-NEXT: ccmp x0, #4, #4, ne +; CHECK-NEXT: ccmp x1, #0, #0, eq +; CHECK-NEXT: orr w[[REGNUM:[0-9]+]], wzr, #0x1 +; CHECK-NEXT: cinc x0, x[[REGNUM]], eq +; CHECK-NEXT: ret + %cmp0 = icmp eq i64 %x1, 0 + %cmp1 = icmp eq i64 %x0, 2 + %cmp2 = icmp eq i64 %x0, 4 + + %or = or i1 %cmp2, %cmp1 + %and = and i1 %or, %cmp0 + + %sel = select i1 %and, i64 2, i64 1 + ret i64 %sel +} + +; CHECK-LABEL: select_ororand +define i32 @select_ororand(i32 %w0, i32 %w1, i32 %w2, i32 %w3) { +; CHECK: cmp w3, #4 +; CHECK-NEXT: ccmp w2, #2, #0, gt +; CHECK-NEXT: ccmp w1, #13, #2, ge +; CHECK-NEXT: ccmp w0, #0, #4, ls +; CHECK-NEXT: csel w0, w3, wzr, eq +; CHECK-NEXT: ret + %c0 = icmp eq i32 %w0, 0 + %c1 = icmp ugt i32 %w1, 13 + %c2 = icmp slt i32 %w2, 2 + %c4 = icmp sgt i32 %w3, 4 + %or = or i1 %c0, %c1 + %and = and i1 %c2, %c4 + %or1 = or i1 %or, %and + %sel = select i1 %or1, i32 %w3, i32 0 + ret i32 %sel +} + +; CHECK-LABEL: select_andor +define i32 @select_andor(i32 %v1, i32 %v2, i32 %v3) { +; CHECK: cmp w1, w2 +; CHECK-NEXT: ccmp w0, #0, #4, lt +; CHECK-NEXT: ccmp w0, w1, #0, eq +; CHECK-NEXT: csel w0, w0, w1, eq +; CHECK-NEXT: ret + %c0 = icmp eq i32 %v1, %v2 + %c1 = icmp sge i32 %v2, %v3 + %c2 = icmp eq i32 %v1, 0 + %or = or i1 %c2, %c1 + %and = and i1 %or, %c0 + %sel = select i1 %and, i32 %v1, i32 %v2 + ret i32 %sel +} + +; CHECK-LABEL: select_noccmp1 +define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) { +; CHECK: cmp x0, #0 +; CHECK-NEXT: cset [[REG0:w[0-9]+]], lt +; CHECK-NEXT: cmp x0, #13 +; CHECK-NOT: ccmp +; CHECK-NEXT: cset [[REG1:w[0-9]+]], gt +; CHECK-NEXT: cmp x2, #2 +; CHECK-NEXT: cset [[REG2:w[0-9]+]], lt +; CHECK-NEXT: cmp x2, #4 +; CHECK-NEXT: cset [[REG3:w[0-9]+]], gt +; CHECK-NEXT: and [[REG4:w[0-9]+]], [[REG0]], [[REG1]] +; CHECK-NEXT: and [[REG5:w[0-9]+]], [[REG2]], [[REG3]] +; CHECK-NEXT: orr [[REG6:w[0-9]+]], [[REG4]], [[REG5]] +; CHECK-NEXT: cmp [[REG6]], #0 +; CHECK-NEXT: csel x0, xzr, x3, ne +; CHECK-NEXT: ret + %c0 = icmp slt i64 %v1, 0 + %c1 = icmp sgt i64 %v1, 13 + %c2 = icmp slt i64 %v3, 2 + %c4 = icmp sgt i64 %v3, 4 + %and0 = and i1 %c0, %c1 + %and1 = and i1 %c2, %c4 + %or = or i1 %and0, %and1 + %sel = select i1 %or, i64 0, i64 %r + ret i64 %sel +} + +@g = global i32 0 + +; Should not use ccmp if we have to compute the or expression in an integer +; register anyway because of other users. +; CHECK-LABEL: select_noccmp2 +define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) { +; CHECK: cmp x0, #0 +; CHECK-NEXT: cset [[REG0:w[0-9]+]], lt +; CHECK-NOT: ccmp +; CHECK-NEXT: cmp x0, #13 +; CHECK-NEXT: cset [[REG1:w[0-9]+]], gt +; CHECK-NEXT: orr [[REG2:w[0-9]+]], [[REG0]], [[REG1]] +; CHECK-NEXT: cmp [[REG2]], #0 +; CHECK-NEXT: csel x0, xzr, x3, ne +; CHECK-NEXT: sbfx [[REG3:w[0-9]+]], [[REG2]], #0, #1 +; CHECK-NEXT: adrp x[[REGN4:[0-9]+]], _g@PAGE +; CHECK-NEXT: str [[REG3]], [x[[REGN4]], _g@PAGEOFF] +; CHECK-NEXT: ret + %c0 = icmp slt i64 %v1, 0 + %c1 = icmp sgt i64 %v1, 13 + %or = or i1 %c0, %c1 + %sel = select i1 %or, i64 0, i64 %r + %ext = sext i1 %or to i32 + store volatile i32 %ext, i32* @g + ret i64 %sel +} diff --git a/test/CodeGen/AArch64/arm64-coalescing-MOVi32imm.ll b/test/CodeGen/AArch64/arm64-coalescing-MOVi32imm.ll new file mode 100644 index 000000000000..528d2538bb4a --- /dev/null +++ b/test/CodeGen/AArch64/arm64-coalescing-MOVi32imm.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s | FileCheck %s + +; CHECK: orr w0, wzr, #0x1 +; CHECK-NEXT: bl foo +; CHECK-NEXT: orr w0, wzr, #0x1 +; CHECK-NEXT: bl foo + +target triple = "aarch64--linux-android" +declare i32 @foo(i32) + +; Function Attrs: nounwind uwtable +define i32 @main() { +entry: + %call = tail call i32 @foo(i32 1) + %call1 = tail call i32 @foo(i32 1) + ret i32 0 +} diff --git a/test/CodeGen/AArch64/arm64-collect-loh.ll b/test/CodeGen/AArch64/arm64-collect-loh.ll index c0aa63cc4331..59147d401a30 100644 --- a/test/CodeGen/AArch64/arm64-collect-loh.ll +++ b/test/CodeGen/AArch64/arm64-collect-loh.ll @@ -51,3 +51,607 @@ if.end4: ; preds = %if.then2, %if.then, %add6 = add nsw i32 %tmp3, %t.addr.0 ret i32 %add6 } + +@C = common global i32 0, align 4 + +; Check that we catch AdrpLdrGotLdr case when we have a simple chain: +; adrp -> ldrgot -> ldr. +; CHECK-LABEL: _getC +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define i32 @getC() { + %res = load i32, i32* @C, align 4 + ret i32 %res +} + +; LDRSW supports loading from a literal. +; Make sure we emit AdrpLdrGotLdr for those. +; CHECK-LABEL: _getSExtC +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldrsw x0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define i64 @getSExtC() { + %res = load i32, i32* @C, align 4 + %sextres = sext i32 %res to i64 + ret i64 %sextres +} + +; It may not be safe to fold the literal in the load if the address is +; used several times. +; Make sure we emit AdrpLdrGot for those. +; CHECK-LABEL: _getSeveralC +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] +; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: add [[ADD:w[0-9]+]], [[LOAD]], w0 +; CHECK-NEXT: str [[ADD]], {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]] +define void @getSeveralC(i32 %t) { +entry: + %tmp = load i32, i32* @C, align 4 + %add = add nsw i32 %tmp, %t + store i32 %add, i32* @C, align 4 + ret void +} + +; Make sure we catch that: +; adrp -> ldrgot -> str. +; CHECK-LABEL: _setC +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: str w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define void @setC(i32 %t) { +entry: + store i32 %t, i32* @C, align 4 + ret void +} + +; Perform the same tests for internal global and a displacement +; in the addressing mode. +; Indeed we will get an ADD for those instead of LOADGot. +@InternalC = internal global i32 0, align 4 + +; Check that we catch AdrpAddLdr case when we have a simple chain: +; adrp -> add -> ldr. +; CHECK-LABEL: _getInternalCPlus4 +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE +; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr w0, {{\[}}[[ADDGOT_REG]], #16] +; CHECK-NEXT: ret +; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]] +define i32 @getInternalCPlus4() { + %addr = getelementptr i32, i32* @InternalC, i32 4 + %res = load i32, i32* %addr, align 4 + ret i32 %res +} + +; LDRSW supports loading from a literal. +; Make sure we emit AdrpLdrGotLdr for those. +; CHECK-LABEL: _getSExtInternalCPlus4 +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE +; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldrsw x0, {{\[}}[[ADDGOT_REG]], #16] +; CHECK-NEXT: ret +; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]] +define i64 @getSExtInternalCPlus4() { + %addr = getelementptr i32, i32* @InternalC, i32 4 + %res = load i32, i32* %addr, align 4 + %sextres = sext i32 %res to i64 + ret i64 %sextres +} + +; It may not be safe to fold the literal in the load if the address is +; used several times. +; Make sure we emit AdrpAdd for those. +; CHECK-LABEL: _getSeveralInternalCPlus4 +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE +; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF +; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], {{\[}}[[ADDGOT_REG]], #16] +; CHECK-NEXT: add [[ADD:w[0-9]+]], [[LOAD]], w0 +; CHECK-NEXT: str [[ADD]], {{\[}}[[ADDGOT_REG]], #16] +; CHECK-NEXT: ret +; CHECK: .loh AdrpAdd [[ADRP_LABEL]], [[ADDGOT_LABEL]] +define void @getSeveralInternalCPlus4(i32 %t) { +entry: + %addr = getelementptr i32, i32* @InternalC, i32 4 + %tmp = load i32, i32* %addr, align 4 + %add = add nsw i32 %tmp, %t + store i32 %add, i32* %addr, align 4 + ret void +} + +; Make sure we catch that: +; adrp -> add -> str. +; CHECK-LABEL: _setInternalCPlus4 +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE +; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: str w0, {{\[}}[[ADDGOT_REG]], #16] +; CHECK-NEXT: ret +; CHECK: .loh AdrpAddStr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]] +define void @setInternalCPlus4(i32 %t) { +entry: + %addr = getelementptr i32, i32* @InternalC, i32 4 + store i32 %t, i32* %addr, align 4 + ret void +} + +; Check that we catch AdrpAddLdr case when we have a simple chain: +; adrp -> ldr. +; CHECK-LABEL: _getInternalC +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr w0, {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdr [[ADRP_LABEL]], [[LDR_LABEL]] +define i32 @getInternalC() { + %res = load i32, i32* @InternalC, align 4 + ret i32 %res +} + +; LDRSW supports loading from a literal. +; Make sure we emit AdrpLdrGotLdr for those. +; CHECK-LABEL: _getSExtInternalC +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldrsw x0, {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdr [[ADRP_LABEL]], [[LDR_LABEL]] +define i64 @getSExtInternalC() { + %res = load i32, i32* @InternalC, align 4 + %sextres = sext i32 %res to i64 + ret i64 %sextres +} + +; It may not be safe to fold the literal in the load if the address is +; used several times. +; Make sure we do not catch anything here. We have a adrp alone, +; there is not much we can do about it. +; CHECK-LABEL: _getSeveralInternalC +; CHECK: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE +; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF] +; CHECK-NEXT: add [[ADD:w[0-9]+]], [[LOAD]], w0 +; CHECK-NEXT: str [[ADD]], {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF] +; CHECK-NEXT: ret +define void @getSeveralInternalC(i32 %t) { +entry: + %tmp = load i32, i32* @InternalC, align 4 + %add = add nsw i32 %tmp, %t + store i32 %add, i32* @InternalC, align 4 + ret void +} + +; Make sure we do not catch anything when: +; adrp -> str. +; We cannot fold anything in the str at this point. +; Indeed, strs do not support litterals. +; CHECK-LABEL: _setInternalC +; CHECK: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE +; CHECK-NEXT: str w0, {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF] +; CHECK-NEXT: ret +define void @setInternalC(i32 %t) { +entry: + store i32 %t, i32* @InternalC, align 4 + ret void +} + +; Now check other variant of loads/stores. + +@D = common global i8 0, align 4 + +; LDRB does not support loading from a literal. +; Make sure we emit AdrpLdrGot and not AdrpLdrGotLdr for those. +; CHECK-LABEL: _getD +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] +; CHECK-NEXT: ldrb w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]] +define i8 @getD() { + %res = load i8, i8* @D, align 4 + ret i8 %res +} + +; CHECK-LABEL: _setD +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] +; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: strb w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] +define void @setD(i8 %t) { + store i8 %t, i8* @D, align 4 + ret void +} + +; LDRSB supports loading from a literal. +; Make sure we emit AdrpLdrGotLdr for those. +; CHECK-LABEL: _getSExtD +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldrsb w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define i32 @getSExtD() { + %res = load i8, i8* @D, align 4 + %sextres = sext i8 %res to i32 + ret i32 %sextres +} + +; LDRSB supports loading from a literal. +; Make sure we emit AdrpLdrGotLdr for those. +; CHECK-LABEL: _getSExt64D +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldrsb x0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define i64 @getSExt64D() { + %res = load i8, i8* @D, align 4 + %sextres = sext i8 %res to i64 + ret i64 %sextres +} + +@E = common global i16 0, align 4 + +; LDRH does not support loading from a literal. +; Make sure we emit AdrpLdrGot and not AdrpLdrGotLdr for those. +; CHECK-LABEL: _getE +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] +; CHECK-NEXT: ldrh w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]] +define i16 @getE() { + %res = load i16, i16* @E, align 4 + ret i16 %res +} + +; LDRSH supports loading from a literal. +; Make sure we emit AdrpLdrGotLdr for those. +; CHECK-LABEL: _getSExtE +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldrsh w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define i32 @getSExtE() { + %res = load i16, i16* @E, align 4 + %sextres = sext i16 %res to i32 + ret i32 %sextres +} + +; CHECK-LABEL: _setE +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] +; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: strh w0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] +define void @setE(i16 %t) { + store i16 %t, i16* @E, align 4 + ret void +} + +; LDRSH supports loading from a literal. +; Make sure we emit AdrpLdrGotLdr for those. +; CHECK-LABEL: _getSExt64E +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldrsh x0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define i64 @getSExt64E() { + %res = load i16, i16* @E, align 4 + %sextres = sext i16 %res to i64 + ret i64 %sextres +} + +@F = common global i64 0, align 4 + +; LDR supports loading from a literal. +; Make sure we emit AdrpLdrGotLdr for those. +; CHECK-LABEL: _getF +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _F@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _F@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr x0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define i64 @getF() { + %res = load i64, i64* @F, align 4 + ret i64 %res +} + +; CHECK-LABEL: _setF +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _F@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _F@GOTPAGEOFF] +; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: str x0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] +define void @setF(i64 %t) { + store i64 %t, i64* @F, align 4 + ret void +} + +@G = common global float 0.0, align 4 + +; LDR float supports loading from a literal. +; Make sure we emit AdrpLdrGotLdr for those. +; CHECK-LABEL: _getG +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _G@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _G@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr s0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define float @getG() { + %res = load float, float* @G, align 4 + ret float %res +} + +; CHECK-LABEL: _setG +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _G@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _G@GOTPAGEOFF] +; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: str s0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] +define void @setG(float %t) { + store float %t, float* @G, align 4 + ret void +} + +@H = common global half 0.0, align 4 + +; LDR half supports loading from a literal. +; Make sure we emit AdrpLdrGotLdr for those. +; CHECK-LABEL: _getH +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _H@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _H@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr h0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define half @getH() { + %res = load half, half* @H, align 4 + ret half %res +} + +; CHECK-LABEL: _setH +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _H@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _H@GOTPAGEOFF] +; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: str h0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] +define void @setH(half %t) { + store half %t, half* @H, align 4 + ret void +} + +@I = common global double 0.0, align 4 + +; LDR double supports loading from a literal. +; Make sure we emit AdrpLdrGotLdr for those. +; CHECK-LABEL: _getI +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _I@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _I@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr d0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define double @getI() { + %res = load double, double* @I, align 4 + ret double %res +} + +; CHECK-LABEL: _setI +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _I@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _I@GOTPAGEOFF] +; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: str d0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] +define void @setI(double %t) { + store double %t, double* @I, align 4 + ret void +} + +@J = common global <2 x i32> , align 4 + +; LDR 64-bit vector supports loading from a literal. +; Make sure we emit AdrpLdrGotLdr for those. +; CHECK-LABEL: _getJ +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _J@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _J@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr d0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define <2 x i32> @getJ() { + %res = load <2 x i32>, <2 x i32>* @J, align 4 + ret <2 x i32> %res +} + +; CHECK-LABEL: _setJ +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _J@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _J@GOTPAGEOFF] +; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: str d0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] +define void @setJ(<2 x i32> %t) { + store <2 x i32> %t, <2 x i32>* @J, align 4 + ret void +} + +@K = common global <4 x i32> , align 4 + +; LDR 128-bit vector supports loading from a literal. +; Make sure we emit AdrpLdrGotLdr for those. +; CHECK-LABEL: _getK +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _K@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _K@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr q0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define <4 x i32> @getK() { + %res = load <4 x i32>, <4 x i32>* @K, align 4 + ret <4 x i32> %res +} + +; CHECK-LABEL: _setK +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _K@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _K@GOTPAGEOFF] +; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: str q0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]] +define void @setK(<4 x i32> %t) { + store <4 x i32> %t, <4 x i32>* @K, align 4 + ret void +} + +@L = common global <1 x i8> , align 4 + +; LDR 8-bit vector supports loading from a literal. +; Make sure we emit AdrpLdrGotLdr for those. +; CHECK-LABEL: _getL +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF] +; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr b0, {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]] +define <1 x i8> @getL() { + %res = load <1 x i8>, <1 x i8>* @L, align 4 + ret <1 x i8> %res +} + +; CHECK-LABEL: _setL +; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L@GOTPAGE +; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]: +; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF] +; Ultimately we should generate str b0, but right now, we match the vector +; variant which does not allow to fold the immediate into the store. +; CHECK-NEXT: st1.b { v0 }[0], {{\[}}[[LDRGOT_REG]]] +; CHECK-NEXT: ret +; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]] +define void @setL(<1 x i8> %t) { + store <1 x i8> %t, <1 x i8>* @L, align 4 + ret void +} + +; Make sure we do not assert when we do not track +; all the aliases of a tuple register. +; Indeed the tuple register can be tracked because of +; one of its element, but the other elements of the tuple +; do not need to be tracked and we used to assert on that. +; Note: The test case is fragile in the sense that we need +; a tuple register to appear in the lowering. Thus, the target +; cpu is required to have the problem reproduced. +; CHECK-LABEL: _uninterestingSub +; CHECK: adrp [[ADRP_REG:x[0-9]+]], [[CONSTPOOL:lCPI[0-9]+_[0-9]+]]@PAGE +; CHECK-NEXT: ldr q[[IDX:[0-9]+]], {{\[}}[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF] +; The tuple comes from the next instruction. +; CHECK-NEXT: tbl.16b v{{[0-9]+}}, { v{{[0-9]+}}, v{{[0-9]+}} }, v[[IDX]] +; CHECK: ret +define void @uninterestingSub(i8* nocapture %row) #0 { + %tmp = bitcast i8* %row to <16 x i8>* + %tmp1 = load <16 x i8>, <16 x i8>* %tmp, align 16 + %vext43 = shufflevector <16 x i8> , <16 x i8> %tmp1, <16 x i32> + %add.i.414 = add <16 x i8> zeroinitializer, %vext43 + store <16 x i8> %add.i.414, <16 x i8>* %tmp, align 16 + %add.ptr51 = getelementptr inbounds i8, i8* %row, i64 16 + %tmp2 = bitcast i8* %add.ptr51 to <16 x i8>* + %tmp3 = load <16 x i8>, <16 x i8>* %tmp2, align 16 + %tmp4 = bitcast i8* undef to <16 x i8>* + %tmp5 = load <16 x i8>, <16 x i8>* %tmp4, align 16 + %vext157 = shufflevector <16 x i8> %tmp3, <16 x i8> %tmp5, <16 x i32> + %add.i.402 = add <16 x i8> zeroinitializer, %vext157 + store <16 x i8> %add.i.402, <16 x i8>* %tmp4, align 16 + ret void +} + +attributes #0 = { "target-cpu"="cyclone" } diff --git a/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/test/CodeGen/AArch64/arm64-fast-isel-br.ll index 0ef7b143df80..55c9c6036ed5 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-br.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-br.ll @@ -94,9 +94,7 @@ entry: store i32 %c, i32* %c.addr, align 4 store i64 %d, i64* %d.addr, align 8 %0 = load i16, i16* %b.addr, align 2 -; CHECK: and w0, w0, #0x1 -; CHECK: cmp w0, #0 -; CHECK: b.eq LBB4_2 +; CHECK: tbz w0, #0, LBB4_2 %conv = trunc i16 %0 to i1 br i1 %conv, label %if.then, label %if.end @@ -106,9 +104,7 @@ if.then: ; preds = %entry if.end: ; preds = %if.then, %entry %1 = load i32, i32* %c.addr, align 4 -; CHECK: and w[[REG:[0-9]+]], w{{[0-9]+}}, #0x1 -; CHECK: cmp w[[REG]], #0 -; CHECK: b.eq LBB4_4 +; CHECK: tbz w{{[0-9]+}}, #0, LBB4_4 %conv1 = trunc i32 %1 to i1 br i1 %conv1, label %if.then3, label %if.end4 @@ -118,8 +114,7 @@ if.then3: ; preds = %if.end if.end4: ; preds = %if.then3, %if.end %2 = load i64, i64* %d.addr, align 8 -; CHECK: cmp w{{[0-9]+}}, #0 -; CHECK: b.eq LBB4_6 +; CHECK: tbz w{{[0-9]+}}, #0, LBB4_6 %conv5 = trunc i64 %2 to i1 br i1 %conv5, label %if.then7, label %if.end8 @@ -139,9 +134,7 @@ define i32 @trunc64(i64 %foo) nounwind { ; CHECK: trunc64 ; CHECK: and [[REG1:x[0-9]+]], x0, #0x1 ; CHECK: mov x[[REG2:[0-9]+]], [[REG1]] -; CHECK: and [[REG3:w[0-9]+]], w[[REG2]], #0x1 -; CHECK: cmp [[REG3]], #0 -; CHECK: b.eq LBB5_2 +; CHECK: tbz w[[REG2]], #0, LBB5_2 %a = and i64 %foo, 1 %b = trunc i64 %a to i1 br i1 %b, label %if.then, label %if.else diff --git a/test/CodeGen/AArch64/arm64-fmax-safe.ll b/test/CodeGen/AArch64/arm64-fmax-safe.ll new file mode 100644 index 000000000000..8b7d66986e78 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-fmax-safe.ll @@ -0,0 +1,53 @@ +; RUN: llc -march=arm64 < %s | FileCheck %s + +define double @test_direct(float %in) { +; CHECK-LABEL: test_direct: + %cmp = fcmp olt float %in, 0.000000e+00 + %val = select i1 %cmp, float 0.000000e+00, float %in + %longer = fpext float %val to double + ret double %longer + +; CHECK: fmax s +} + +define double @test_cross(float %in) { +; CHECK-LABEL: test_cross: + %cmp = fcmp ult float %in, 0.000000e+00 + %val = select i1 %cmp, float %in, float 0.000000e+00 + %longer = fpext float %val to double + ret double %longer + +; CHECK: fmin s +} + +; Same as previous, but with ordered comparison; +; must become fminnm, not fmin. +define double @test_cross_fail_nan(float %in) { +; CHECK-LABEL: test_cross_fail_nan: + %cmp = fcmp olt float %in, 0.000000e+00 + %val = select i1 %cmp, float %in, float 0.000000e+00 + %longer = fpext float %val to double + ret double %longer + +; CHECK: fminnm s +} + +; This isn't a min or a max, but passes the first condition for swapping the +; results. Make sure they're put back before we resort to the normal fcsel. +define float @test_cross_fail(float %lhs, float %rhs) { +; CHECK-LABEL: test_cross_fail: + %tst = fcmp une float %lhs, %rhs + %res = select i1 %tst, float %rhs, float %lhs + ret float %res + + ; The register allocator would have to decide to be deliberately obtuse before + ; other register were used. +; CHECK: fcsel s0, s1, s0, ne +} + +; Make sure the transformation isn't triggered for integers +define i64 @test_integer(i64 %in) { + %cmp = icmp slt i64 %in, 0 + %val = select i1 %cmp, i64 0, i64 %in + ret i64 %val +} diff --git a/test/CodeGen/AArch64/arm64-fmax.ll b/test/CodeGen/AArch64/arm64-fmax.ll index ea281528b84c..40cc36ea52fa 100644 --- a/test/CodeGen/AArch64/arm64-fmax.ll +++ b/test/CodeGen/AArch64/arm64-fmax.ll @@ -1,57 +1,48 @@ ; RUN: llc -march=arm64 -enable-no-nans-fp-math < %s | FileCheck %s -; RUN: llc -march=arm64 < %s | FileCheck %s --check-prefix=CHECK-SAFE define double @test_direct(float %in) { ; CHECK-LABEL: test_direct: -; CHECK-SAFE-LABEL: test_direct: - %cmp = fcmp olt float %in, 0.000000e+00 - %longer = fpext float %in to double - %val = select i1 %cmp, double 0.000000e+00, double %longer - ret double %val + %cmp = fcmp nnan olt float %in, 0.000000e+00 + %val = select i1 %cmp, float 0.000000e+00, float %in + %longer = fpext float %val to double + ret double %longer ; CHECK: fmax -; CHECK-SAFE: fmax } define double @test_cross(float %in) { ; CHECK-LABEL: test_cross: -; CHECK-SAFE-LABEL: test_cross: - %cmp = fcmp ult float %in, 0.000000e+00 - %longer = fpext float %in to double - %val = select i1 %cmp, double %longer, double 0.000000e+00 - ret double %val + %cmp = fcmp nnan ult float %in, 0.000000e+00 + %val = select i1 %cmp, float %in, float 0.000000e+00 + %longer = fpext float %val to double + ret double %longer ; CHECK: fmin -; CHECK-SAFE: fmin } ; Same as previous, but with ordered comparison; ; can't be converted in safe-math mode. define double @test_cross_fail_nan(float %in) { ; CHECK-LABEL: test_cross_fail_nan: -; CHECK-SAFE-LABEL: test_cross_fail_nan: - %cmp = fcmp olt float %in, 0.000000e+00 - %longer = fpext float %in to double - %val = select i1 %cmp, double %longer, double 0.000000e+00 - ret double %val + %cmp = fcmp nnan olt float %in, 0.000000e+00 + %val = select i1 %cmp, float %in, float 0.000000e+00 + %longer = fpext float %val to double + ret double %longer ; CHECK: fmin -; CHECK-SAFE: fcsel d0, d1, d0, mi } ; This isn't a min or a max, but passes the first condition for swapping the ; results. Make sure they're put back before we resort to the normal fcsel. define float @test_cross_fail(float %lhs, float %rhs) { ; CHECK-LABEL: test_cross_fail: -; CHECK-SAFE-LABEL: test_cross_fail: - %tst = fcmp une float %lhs, %rhs + %tst = fcmp nnan une float %lhs, %rhs %res = select i1 %tst, float %rhs, float %lhs ret float %res ; The register allocator would have to decide to be deliberately obtuse before ; other register were used. ; CHECK: fcsel s0, s1, s0, ne -; CHECK-SAFE: fcsel s0, s1, s0, ne } ; Make sure the transformation isn't triggered for integers @@ -60,3 +51,14 @@ define i64 @test_integer(i64 %in) { %val = select i1 %cmp, i64 0, i64 %in ret i64 %val } + +define float @test_f16(half %in) { +; CHECK-LABEL: test_f16: + %cmp = fcmp nnan ult half %in, 0.000000e+00 + %val = select i1 %cmp, half %in, half 0.000000e+00 + %longer = fpext half %val to float + ret float %longer +; FIXME: It'd be nice for this to create an fmin instruction! +; CHECK: fcvt +; CHECK: fcsel +} diff --git a/test/CodeGen/AArch64/arm64-fp128.ll b/test/CodeGen/AArch64/arm64-fp128.ll index aaef39fcf512..097fe2ca6ed9 100644 --- a/test/CodeGen/AArch64/arm64-fp128.ll +++ b/test/CodeGen/AArch64/arm64-fp128.ll @@ -148,14 +148,9 @@ define i1 @test_setcc2() { ; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs] %val = fcmp ugt fp128 %lhs, %rhs -; CHECK: bl __gttf2 +; CHECK: bl __letf2 ; CHECK: cmp w0, #0 -; CHECK: cset [[GT:w[0-9]+]], gt - -; CHECK: bl __unordtf2 -; CHECK: cmp w0, #0 -; CHECK: cset [[UNORDERED:w[0-9]+]], ne -; CHECK: orr w0, [[UNORDERED]], [[GT]] +; CHECK: cset w0, gt ret i1 %val ; CHECK: ret @@ -169,31 +164,21 @@ define i32 @test_br_cc() { ; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs] ; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs] - ; olt == !uge, which LLVM unfortunately "optimizes" this to. + ; olt == !uge, which LLVM optimizes this to. %cond = fcmp olt fp128 %lhs, %rhs -; CHECK: bl __getf2 -; CHECK: cmp w0, #0 -; CHECK: cset [[OGE:w[0-9]+]], ge - -; CHECK: bl __unordtf2 -; CHECK: cmp w0, #0 -; CHECK: cset [[UNORDERED:w[0-9]+]], ne - -; CHECK: orr [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]] -; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]] +; CHECK: bl __lttf2 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: b.ge {{.LBB[0-9]+_[0-9]+}} br i1 %cond, label %iftrue, label %iffalse iftrue: ret i32 42 ; CHECK-NEXT: BB# ; CHECK-NEXT: movz w0, #0x2a -; CHECK-NEXT: b [[REALRET:.LBB[0-9]+_[0-9]+]] - +; CHECK: ret iffalse: ret i32 29 -; CHECK: [[RET29]]: -; CHECK-NEXT: movz w0, #0x1d -; CHECK-NEXT: [[REALRET]]: +; CHECK: movz w0, #0x1d ; CHECK: ret } diff --git a/test/CodeGen/AArch64/arm64-hello.ll b/test/CodeGen/AArch64/arm64-hello.ll index f1c4e9bbaed9..895bfe4b3915 100644 --- a/test/CodeGen/AArch64/arm64-hello.ll +++ b/test/CodeGen/AArch64/arm64-hello.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s -; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s --check-prefix=CHECK-LINUX +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-post-ra | FileCheck %s +; RUN: llc < %s -mtriple=arm64-linux-gnu -disable-post-ra | FileCheck %s --check-prefix=CHECK-LINUX ; CHECK-LABEL: main: ; CHECK: stp x29, x30, [sp, #-16]! diff --git a/test/CodeGen/AArch64/arm64-indexed-memory.ll b/test/CodeGen/AArch64/arm64-indexed-memory.ll index b52cddf600ac..b6ab9934dbc3 100644 --- a/test/CodeGen/AArch64/arm64-indexed-memory.ll +++ b/test/CodeGen/AArch64/arm64-indexed-memory.ll @@ -81,6 +81,17 @@ define void @truncst64to8(i8** nocapture %out, i8 %index, i64 %spacing) nounwind } +define void @storef16(half** %out, half %index, half %spacing) nounwind { +; CHECK-LABEL: storef16: +; CHECK: str h{{[0-9+]}}, [x{{[0-9+]}}], #2 +; CHECK: ret + %tmp = load half*, half** %out, align 2 + %incdec.ptr = getelementptr inbounds half, half* %tmp, i64 1 + store half %spacing, half* %tmp, align 2 + store half* %incdec.ptr, half** %out, align 2 + ret void +} + define void @storef32(float** nocapture %out, float %index, float %spacing) nounwind noinline ssp { ; CHECK-LABEL: storef32: ; CHECK: str s{{[0-9+]}}, [x{{[0-9+]}}], #4 @@ -125,6 +136,17 @@ define float * @pref32(float** nocapture %out, float %spacing) nounwind noinline ret float *%ptr } +define half* @pref16(half** %out, half %spacing) nounwind { +; CHECK-LABEL: pref16: +; CHECK: ldr x0, [x0] +; CHECK-NEXT: str h0, [x0, #6]! +; CHECK-NEXT: ret + %tmp = load half*, half** %out, align 2 + %ptr = getelementptr inbounds half, half* %tmp, i64 3 + store half %spacing, half* %ptr, align 2 + ret half *%ptr +} + define i64 * @pre64(i64** nocapture %out, i64 %spacing) nounwind noinline ssp { ; CHECK-LABEL: pre64: ; CHECK: ldr x0, [x0] @@ -230,6 +252,17 @@ define float* @preidxf32(float* %src, float* %out) { ret float* %ptr } +define half* @preidxf16(half* %src, half* %out) { +; CHECK-LABEL: preidxf16: +; CHECK: ldr h0, [x0, #2]! +; CHECK: str h0, [x1] +; CHECK: ret + %ptr = getelementptr inbounds half, half* %src, i64 1 + %tmp = load half, half* %ptr, align 2 + store half %tmp, half* %out, align 2 + ret half* %ptr +} + define i64* @preidx64(i64* %src, i64* %out) { ; CHECK-LABEL: preidx64: ; CHECK: ldr x[[REG:[0-9]+]], [x0, #8]! diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index ba31513172d5..98d4e3646f56 100644 --- a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s +; RUN: llc -mtriple=arm64-apple-ios7.0 -disable-post-ra -o - %s | FileCheck %s @ptr = global i8* null @@ -6215,3 +6215,27 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(i16* %bar, i16** %pt } declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) + +; CHECK-LABEL: test_ld1lane_build: +; CHECK-DAG: ld1.s { [[REG0:v[0-9]+]] }[0], [x0] +; CHECK-DAG: ld1.s { [[REG0:v[0-9]+]] }[1], [x1] +; CHECK-DAG: ld1.s { [[REG1:v[0-9]+]] }[0], [x2] +; CHECK-DAG: ld1.s { [[REG1:v[0-9]+]] }[1], [x3] +; CHECK: sub.2s v[[REGNUM2:[0-9]+]], [[REG0]], [[REG1]] +; CHECK-NEXT: str d[[REGNUM2]], [x4] +; CHECK-NEXT: ret +define void @test_ld1lane_build(i32* %ptr0, i32* %ptr1, i32* %ptr2, i32* %ptr3, <2 x i32>* %out) { + %load0 = load i32, i32* %ptr0, align 4 + %load1 = load i32, i32* %ptr1, align 4 + %vec0_0 = insertelement <2 x i32> undef, i32 %load0, i32 0 + %vec0_1 = insertelement <2 x i32> %vec0_0, i32 %load1, i32 1 + + %load2 = load i32, i32* %ptr2, align 4 + %load3 = load i32, i32* %ptr3, align 4 + %vec1_0 = insertelement <2 x i32> undef, i32 %load2, i32 0 + %vec1_1 = insertelement <2 x i32> %vec1_0, i32 %load3, i32 1 + + %sub = sub nsw <2 x i32> %vec0_1, %vec1_1 + store <2 x i32> %sub, <2 x i32>* %out, align 16 + ret void +} diff --git a/test/CodeGen/AArch64/arm64-inline-asm.ll b/test/CodeGen/AArch64/arm64-inline-asm.ll index 802d95826ce4..ac6e8a7731c6 100644 --- a/test/CodeGen/AArch64/arm64-inline-asm.ll +++ b/test/CodeGen/AArch64/arm64-inline-asm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -no-integrated-as | FileCheck %s +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s ; rdar://9167275 diff --git a/test/CodeGen/AArch64/arm64-join-reserved.ll b/test/CodeGen/AArch64/arm64-join-reserved.ll index dee034483541..c65cf95be2e5 100644 --- a/test/CodeGen/AArch64/arm64-join-reserved.ll +++ b/test/CodeGen/AArch64/arm64-join-reserved.ll @@ -5,7 +5,7 @@ target triple = "arm64-apple-macosx10" ; A move isn't necessary. ; ; CHECK-LABEL: g: -; CHECK: str xzr, [sp] +; CHECK: str xzr, [sp, #-16]! ; CHECK: bl ; CHECK: ret define void @g() nounwind ssp { diff --git a/test/CodeGen/AArch64/arm64-large-frame.ll b/test/CodeGen/AArch64/arm64-large-frame.ll index c4cce36bcb74..d1244e73b0f3 100644 --- a/test/CodeGen/AArch64/arm64-large-frame.ll +++ b/test/CodeGen/AArch64/arm64-large-frame.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -mtriple=arm64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=arm64-none-linux-gnu -disable-fp-elim -disable-post-ra < %s | FileCheck %s declare void @use_addr(i8*) @addr = global i8* null diff --git a/test/CodeGen/AArch64/arm64-ld-from-st.ll b/test/CodeGen/AArch64/arm64-ld-from-st.ll new file mode 100644 index 000000000000..dd8add70cdb7 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-ld-from-st.ll @@ -0,0 +1,666 @@ +; RUN: llc < %s -mtriple aarch64--none-eabi -verify-machineinstrs | FileCheck %s + +; CHECK-LABEL: Str64Ldr64 +; CHECK: mov x0, x1 +define i64 @Str64Ldr64(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i64* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i64, i64* %0, i64 1 + %1 = load i64, i64* %arrayidx1 + ret i64 %1 +} + +; CHECK-LABEL: Str64Ldr32_0 +; CHECK: and x0, x1, #0xffffffff +define i32 @Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i32* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 2 + %1 = load i32, i32* %arrayidx1 + ret i32 %1 +} + +; CHECK-LABEL: Str64Ldr32_1 +; CHECK: lsr x0, x1, #32 +define i32 @Str64Ldr32_1(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i32* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 3 + %1 = load i32, i32* %arrayidx1 + ret i32 %1 +} + +; CHECK-LABEL: Str64Ldr16_0 +; CHECK: and x0, x1, #0xffff +define i16 @Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i16* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 4 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: Str64Ldr16_1 +; CHECK: ubfx x0, x1, #16, #16 +define i16 @Str64Ldr16_1(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i16* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 5 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: Str64Ldr16_2 +; CHECK: ubfx x0, x1, #32, #16 +define i16 @Str64Ldr16_2(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i16* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 6 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: Str64Ldr16_3 +; CHECK: lsr x0, x1, #48 +define i16 @Str64Ldr16_3(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i16* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 7 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: Str64Ldr8_0 +; CHECK: and x0, x1, #0xff +define i8 @Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i8* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 8 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Str64Ldr8_1 +; CHECK: ubfx x0, x1, #8, #8 +define i8 @Str64Ldr8_1(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i8* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 9 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Str64Ldr8_2 +; CHECK: ubfx x0, x1, #16, #8 +define i8 @Str64Ldr8_2(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i8* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 10 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Str64Ldr8_3 +; CHECK: ubfx x0, x1, #24, #8 +define i8 @Str64Ldr8_3(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i8* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 11 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Str64Ldr8_4 +; CHECK: ubfx x0, x1, #32, #8 +define i8 @Str64Ldr8_4(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i8* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 12 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Str64Ldr8_5 +; CHECK: ubfx x0, x1, #40, #8 +define i8 @Str64Ldr8_5(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i8* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 13 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Str64Ldr8_6 +; CHECK: ubfx x0, x1, #48, #8 +define i8 @Str64Ldr8_6(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i8* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 14 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Str64Ldr8_7 +; CHECK: lsr x0, x1, #56 +define i8 @Str64Ldr8_7(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i8* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 15 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Str32Ldr32 +; CHECK: mov w0, w1 +define i32 @Str32Ldr32(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i32* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 1 + %1 = load i32, i32* %arrayidx1 + ret i32 %1 +} + +; CHECK-LABEL: Str32Ldr16_0 +; CHECK: and w0, w1, #0xffff +define i16 @Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i16* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: Str32Ldr16_1 +; CHECK: lsr w0, w1, #16 +define i16 @Str32Ldr16_1(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i16* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 3 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: Str32Ldr8_0 +; CHECK: and w0, w1, #0xff +define i8 @Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i8* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 4 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Str32Ldr8_1 +; CHECK: ubfx w0, w1, #8, #8 +define i8 @Str32Ldr8_1(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i8* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 5 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Str32Ldr8_2 +; CHECK: ubfx w0, w1, #16, #8 +define i8 @Str32Ldr8_2(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i8* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 6 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Str32Ldr8_3 +; CHECK: lsr w0, w1, #24 +define i8 @Str32Ldr8_3(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i8* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 7 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Str16Ldr16 +; CHECK: and w0, w1, #0xffff +define i16 @Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) { +entry: + %0 = bitcast i16* %P to i16* + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1 + store i16 %v, i16* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: Str16Ldr8_0 +; CHECK: and w0, w1, #0xff +define i8 @Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) { +entry: + %0 = bitcast i16* %P to i8* + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1 + store i16 %v, i16* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 2 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Str16Ldr8_1 +; CHECK: ubfx w0, w1, #8, #8 +define i8 @Str16Ldr8_1(i16* nocapture %P, i16 %v, i64 %n) { +entry: + %0 = bitcast i16* %P to i8* + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1 + store i16 %v, i16* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 3 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + + +; CHECK-LABEL: Unscaled_Str64Ldr64 +; CHECK: mov x0, x1 +define i64 @Unscaled_Str64Ldr64(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i64* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i64, i64* %0, i64 -1 + %1 = load i64, i64* %arrayidx1 + ret i64 %1 +} + +; CHECK-LABEL: Unscaled_Str64Ldr32_0 +; CHECK: and x0, x1, #0xffffffff +define i32 @Unscaled_Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i32* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -2 + %1 = load i32, i32* %arrayidx1 + ret i32 %1 +} + +; CHECK-LABEL: Unscaled_Str64Ldr32_1 +; CHECK: lsr x0, x1, #32 +define i32 @Unscaled_Str64Ldr32_1(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i32* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -1 + %1 = load i32, i32* %arrayidx1 + ret i32 %1 +} + +; CHECK-LABEL: Unscaled_Str64Ldr16_0 +; CHECK: and x0, x1, #0xffff +define i16 @Unscaled_Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i16* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -4 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: Unscaled_Str64Ldr16_1 +; CHECK: ubfx x0, x1, #16, #16 +define i16 @Unscaled_Str64Ldr16_1(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i16* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -3 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: Unscaled_Str64Ldr16_2 +; CHECK: ubfx x0, x1, #32, #16 +define i16 @Unscaled_Str64Ldr16_2(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i16* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -2 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: Unscaled_Str64Ldr16_3 +; CHECK: lsr x0, x1, #48 +define i16 @Unscaled_Str64Ldr16_3(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i16* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: Unscaled_Str64Ldr8_0 +; CHECK: and x0, x1, #0xff +define i8 @Unscaled_Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i8* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -8 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Unscaled_Str64Ldr8_1 +; CHECK: ubfx x0, x1, #8, #8 +define i8 @Unscaled_Str64Ldr8_1(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i8* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -7 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Unscaled_Str64Ldr8_2 +; CHECK: ubfx x0, x1, #16, #8 +define i8 @Unscaled_Str64Ldr8_2(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i8* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -6 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Unscaled_Str64Ldr8_3 +; CHECK: ubfx x0, x1, #24, #8 +define i8 @Unscaled_Str64Ldr8_3(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i8* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -5 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Unscaled_Str64Ldr8_4 +; CHECK: ubfx x0, x1, #32, #8 +define i8 @Unscaled_Str64Ldr8_4(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i8* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -4 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Unscaled_Str64Ldr8_5 +; CHECK: ubfx x0, x1, #40, #8 +define i8 @Unscaled_Str64Ldr8_5(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i8* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -3 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Unscaled_Str64Ldr8_6 +; CHECK: ubfx x0, x1, #48, #8 +define i8 @Unscaled_Str64Ldr8_6(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i8* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Unscaled_Str64Ldr8_7 +; CHECK: lsr x0, x1, #56 +define i8 @Unscaled_Str64Ldr8_7(i64* nocapture %P, i64 %v, i64 %n) { +entry: + %0 = bitcast i64* %P to i8* + %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Unscaled_Str32Ldr32 +; CHECK: mov w0, w1 +define i32 @Unscaled_Str32Ldr32(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i32* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -1 + %1 = load i32, i32* %arrayidx1 + ret i32 %1 +} + +; CHECK-LABEL: Unscaled_Str32Ldr16_0 +; CHECK: and w0, w1, #0xffff +define i16 @Unscaled_Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i16* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -2 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: Unscaled_Str32Ldr16_1 +; CHECK: lsr w0, w1, #16 +define i16 @Unscaled_Str32Ldr16_1(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i16* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: Unscaled_Str32Ldr8_0 +; CHECK: and w0, w1, #0xff +define i8 @Unscaled_Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i8* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -4 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Unscaled_Str32Ldr8_1 +; CHECK: ubfx w0, w1, #8, #8 +define i8 @Unscaled_Str32Ldr8_1(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i8* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -3 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Unscaled_Str32Ldr8_2 +; CHECK: ubfx w0, w1, #16, #8 +define i8 @Unscaled_Str32Ldr8_2(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i8* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Unscaled_Str32Ldr8_3 +; CHECK: lsr w0, w1, #24 +define i8 @Unscaled_Str32Ldr8_3(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i8* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Unscaled_Str16Ldr16 +; CHECK: and w0, w1, #0xffff +define i16 @Unscaled_Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) { +entry: + %0 = bitcast i16* %P to i16* + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1 + store i16 %v, i16* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: Unscaled_Str16Ldr8_0 +; CHECK: and w0, w1, #0xff +define i8 @Unscaled_Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) { +entry: + %0 = bitcast i16* %P to i8* + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1 + store i16 %v, i16* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: Unscaled_Str16Ldr8_1 +; CHECK: ubfx w0, w1, #8, #8 +define i8 @Unscaled_Str16Ldr8_1(i16* nocapture %P, i16 %v, i64 %n) { +entry: + %0 = bitcast i16* %P to i8* + %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1 + store i16 %v, i16* %arrayidx0 + %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1 + %1 = load i8, i8* %arrayidx1 + ret i8 %1 +} + +; CHECK-LABEL: StrVolatileLdr +; CHECK: ldrh +define i16 @StrVolatileLdr(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i16* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2 + %1 = load volatile i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: StrNotInRangeLdr +; CHECK: ldrh +define i16 @StrNotInRangeLdr(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i16* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: Unscaled_StrNotInRangeLdr +; CHECK: ldurh +define i16 @Unscaled_StrNotInRangeLdr(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i16* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1 + store i32 %v, i32* %arrayidx0 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -3 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +; CHECK-LABEL: StrCallLdr +; CHECK: ldrh +define i16 @StrCallLdr(i32* nocapture %P, i32 %v, i64 %n) { +entry: + %0 = bitcast i32* %P to i16* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 %v, i32* %arrayidx0 + %c = call i1 @test_dummy() + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} + +declare i1 @test_dummy() + +; CHECK-LABEL: StrStrLdr +; CHECK: ldrh +define i16 @StrStrLdr(i32 %v, i32* %P, i32* %P2, i32 %n) { +entry: + %0 = bitcast i32* %P to i16* + %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1 + store i32 %v, i32* %arrayidx0 + store i32 %n, i32* %P2 + %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2 + %1 = load i16, i16* %arrayidx1 + ret i16 %1 +} diff --git a/test/CodeGen/AArch64/arm64-ldp.ll b/test/CodeGen/AArch64/arm64-ldp.ll index a192eab112fa..6071d092f8b3 100644 --- a/test/CodeGen/AArch64/arm64-ldp.ll +++ b/test/CodeGen/AArch64/arm64-ldp.ll @@ -1,8 +1,6 @@ ; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\ -; RUN: -verify-machineinstrs | FileCheck -check-prefix=LDUR_CHK %s -; CHECK: ldp_int +; CHECK-LABEL: ldp_int ; CHECK: ldp define i32 @ldp_int(i32* %p) nounwind { %tmp = load i32, i32* %p, align 4 @@ -12,7 +10,7 @@ define i32 @ldp_int(i32* %p) nounwind { ret i32 %add } -; CHECK: ldp_sext_int +; CHECK-LABEL: ldp_sext_int ; CHECK: ldpsw define i64 @ldp_sext_int(i32* %p) nounwind { %tmp = load i32, i32* %p, align 4 @@ -51,7 +49,7 @@ define i64 @ldp_half_sext_res1_int(i32* %p) nounwind { } -; CHECK: ldp_long +; CHECK-LABEL: ldp_long ; CHECK: ldp define i64 @ldp_long(i64* %p) nounwind { %tmp = load i64, i64* %p, align 8 @@ -61,7 +59,7 @@ define i64 @ldp_long(i64* %p) nounwind { ret i64 %add } -; CHECK: ldp_float +; CHECK-LABEL: ldp_float ; CHECK: ldp define float @ldp_float(float* %p) nounwind { %tmp = load float, float* %p, align 4 @@ -71,7 +69,7 @@ define float @ldp_float(float* %p) nounwind { ret float %add } -; CHECK: ldp_double +; CHECK-LABEL: ldp_double ; CHECK: ldp define double @ldp_double(double* %p) nounwind { %tmp = load double, double* %p, align 8 @@ -83,10 +81,10 @@ define double @ldp_double(double* %p) nounwind { ; Test the load/store optimizer---combine ldurs into a ldp, if appropriate define i32 @ldur_int(i32* %a) nounwind { -; LDUR_CHK: ldur_int -; LDUR_CHK: ldp [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8] -; LDUR_CHK-NEXT: add w{{[0-9]+}}, [[DST2]], [[DST1]] -; LDUR_CHK-NEXT: ret +; CHECK-LABEL: ldur_int +; CHECK: ldp [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8] +; CHECK-NEXT: add w{{[0-9]+}}, [[DST2]], [[DST1]] +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i32 -1 %tmp1 = load i32, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 @@ -96,10 +94,10 @@ define i32 @ldur_int(i32* %a) nounwind { } define i64 @ldur_sext_int(i32* %a) nounwind { -; LDUR_CHK: ldur_sext_int -; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8] -; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] -; LDUR_CHK-NEXT: ret +; CHECK-LABEL: ldur_sext_int +; CHECK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8] +; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i32 -1 %tmp1 = load i32, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 @@ -111,11 +109,11 @@ define i64 @ldur_sext_int(i32* %a) nounwind { } define i64 @ldur_half_sext_int_res0(i32* %a) nounwind { -; LDUR_CHK: ldur_half_sext_int_res0 -; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8] -; LDUR_CHK: sxtw x[[DST1]], w[[DST1]] -; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] -; LDUR_CHK-NEXT: ret +; CHECK-LABEL: ldur_half_sext_int_res0 +; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8] +; CHECK: sxtw x[[DST1]], w[[DST1]] +; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i32 -1 %tmp1 = load i32, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 @@ -127,11 +125,11 @@ define i64 @ldur_half_sext_int_res0(i32* %a) nounwind { } define i64 @ldur_half_sext_int_res1(i32* %a) nounwind { -; LDUR_CHK: ldur_half_sext_int_res1 -; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8] -; LDUR_CHK: sxtw x[[DST2]], w[[DST2]] -; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] -; LDUR_CHK-NEXT: ret +; CHECK-LABEL: ldur_half_sext_int_res1 +; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8] +; CHECK: sxtw x[[DST2]], w[[DST2]] +; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i32 -1 %tmp1 = load i32, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %a, i32 -2 @@ -144,10 +142,10 @@ define i64 @ldur_half_sext_int_res1(i32* %a) nounwind { define i64 @ldur_long(i64* %a) nounwind ssp { -; LDUR_CHK: ldur_long -; LDUR_CHK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16] -; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] -; LDUR_CHK-NEXT: ret +; CHECK-LABEL: ldur_long +; CHECK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16] +; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] +; CHECK-NEXT: ret %p1 = getelementptr inbounds i64, i64* %a, i64 -1 %tmp1 = load i64, i64* %p1, align 2 %p2 = getelementptr inbounds i64, i64* %a, i64 -2 @@ -157,10 +155,10 @@ define i64 @ldur_long(i64* %a) nounwind ssp { } define float @ldur_float(float* %a) { -; LDUR_CHK: ldur_float -; LDUR_CHK: ldp [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8] -; LDUR_CHK-NEXT: add s{{[0-9]+}}, [[DST2]], [[DST1]] -; LDUR_CHK-NEXT: ret +; CHECK-LABEL: ldur_float +; CHECK: ldp [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8] +; CHECK-NEXT: add s{{[0-9]+}}, [[DST2]], [[DST1]] +; CHECK-NEXT: ret %p1 = getelementptr inbounds float, float* %a, i64 -1 %tmp1 = load float, float* %p1, align 2 %p2 = getelementptr inbounds float, float* %a, i64 -2 @@ -170,10 +168,10 @@ define float @ldur_float(float* %a) { } define double @ldur_double(double* %a) { -; LDUR_CHK: ldur_double -; LDUR_CHK: ldp [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16] -; LDUR_CHK-NEXT: add d{{[0-9]+}}, [[DST2]], [[DST1]] -; LDUR_CHK-NEXT: ret +; CHECK-LABEL: ldur_double +; CHECK: ldp [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16] +; CHECK-NEXT: add d{{[0-9]+}}, [[DST2]], [[DST1]] +; CHECK-NEXT: ret %p1 = getelementptr inbounds double, double* %a, i64 -1 %tmp1 = load double, double* %p1, align 2 %p2 = getelementptr inbounds double, double* %a, i64 -2 @@ -184,11 +182,11 @@ define double @ldur_double(double* %a) { ; Now check some boundary conditions define i64 @pairUpBarelyIn(i64* %a) nounwind ssp { -; LDUR_CHK: pairUpBarelyIn -; LDUR_CHK-NOT: ldur -; LDUR_CHK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256] -; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] -; LDUR_CHK-NEXT: ret +; CHECK-LABEL: pairUpBarelyIn +; CHECK-NOT: ldur +; CHECK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256] +; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] +; CHECK-NEXT: ret %p1 = getelementptr inbounds i64, i64* %a, i64 -31 %tmp1 = load i64, i64* %p1, align 2 %p2 = getelementptr inbounds i64, i64* %a, i64 -32 @@ -198,11 +196,11 @@ define i64 @pairUpBarelyIn(i64* %a) nounwind ssp { } define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp { -; LDUR_CHK: pairUpBarelyInSext -; LDUR_CHK-NOT: ldur -; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256] -; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] -; LDUR_CHK-NEXT: ret +; CHECK-LABEL: pairUpBarelyInSext +; CHECK-NOT: ldur +; CHECK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256] +; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]] +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i64 -63 %tmp1 = load i32, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 @@ -214,12 +212,12 @@ define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp { } define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp { -; LDUR_CHK: pairUpBarelyInHalfSextRes0 -; LDUR_CHK-NOT: ldur -; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256] -; LDUR_CHK: sxtw x[[DST1]], w[[DST1]] -; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] -; LDUR_CHK-NEXT: ret +; CHECK-LABEL: pairUpBarelyInHalfSextRes0 +; CHECK-NOT: ldur +; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256] +; CHECK: sxtw x[[DST1]], w[[DST1]] +; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i64 -63 %tmp1 = load i32, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 @@ -231,12 +229,12 @@ define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp { } define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp { -; LDUR_CHK: pairUpBarelyInHalfSextRes1 -; LDUR_CHK-NOT: ldur -; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256] -; LDUR_CHK: sxtw x[[DST2]], w[[DST2]] -; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] -; LDUR_CHK-NEXT: ret +; CHECK-LABEL: pairUpBarelyInHalfSextRes1 +; CHECK-NOT: ldur +; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256] +; CHECK: sxtw x[[DST2]], w[[DST2]] +; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]] +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i64 -63 %tmp1 = load i32, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %a, i64 -64 @@ -248,12 +246,12 @@ define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp { } define i64 @pairUpBarelyOut(i64* %a) nounwind ssp { -; LDUR_CHK: pairUpBarelyOut -; LDUR_CHK-NOT: ldp +; CHECK-LABEL: pairUpBarelyOut +; CHECK-NOT: ldp ; Don't be fragile about which loads or manipulations of the base register ; are used---just check that there isn't an ldp before the add -; LDUR_CHK: add -; LDUR_CHK-NEXT: ret +; CHECK: add +; CHECK-NEXT: ret %p1 = getelementptr inbounds i64, i64* %a, i64 -32 %tmp1 = load i64, i64* %p1, align 2 %p2 = getelementptr inbounds i64, i64* %a, i64 -33 @@ -263,12 +261,12 @@ define i64 @pairUpBarelyOut(i64* %a) nounwind ssp { } define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp { -; LDUR_CHK: pairUpBarelyOutSext -; LDUR_CHK-NOT: ldp +; CHECK-LABEL: pairUpBarelyOutSext +; CHECK-NOT: ldp ; Don't be fragile about which loads or manipulations of the base register ; are used---just check that there isn't an ldp before the add -; LDUR_CHK: add -; LDUR_CHK-NEXT: ret +; CHECK: add +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i64 -64 %tmp1 = load i32, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %a, i64 -65 @@ -280,12 +278,12 @@ define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp { } define i64 @pairUpNotAligned(i64* %a) nounwind ssp { -; LDUR_CHK: pairUpNotAligned -; LDUR_CHK-NOT: ldp -; LDUR_CHK: ldur -; LDUR_CHK-NEXT: ldur -; LDUR_CHK-NEXT: add -; LDUR_CHK-NEXT: ret +; CHECK-LABEL: pairUpNotAligned +; CHECK-NOT: ldp +; CHECK: ldur +; CHECK-NEXT: ldur +; CHECK-NEXT: add +; CHECK-NEXT: ret %p1 = getelementptr inbounds i64, i64* %a, i64 -18 %bp1 = bitcast i64* %p1 to i8* %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1 @@ -303,12 +301,12 @@ define i64 @pairUpNotAligned(i64* %a) nounwind ssp { } define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp { -; LDUR_CHK: pairUpNotAlignedSext -; LDUR_CHK-NOT: ldp -; LDUR_CHK: ldursw -; LDUR_CHK-NEXT: ldursw -; LDUR_CHK-NEXT: add -; LDUR_CHK-NEXT: ret +; CHECK-LABEL: pairUpNotAlignedSext +; CHECK-NOT: ldp +; CHECK: ldursw +; CHECK-NEXT: ldursw +; CHECK-NEXT: add +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i64 -18 %bp1 = bitcast i32* %p1 to i8* %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1 @@ -326,3 +324,35 @@ define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp { %tmp3 = add i64 %sexttmp1, %sexttmp2 ret i64 %tmp3 } + +declare void @use-ptr(i32*) + +; CHECK-LABEL: ldp_sext_int_pre +; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}, #8] +define i64 @ldp_sext_int_pre(i32* %p) nounwind { + %ptr = getelementptr inbounds i32, i32* %p, i64 2 + call void @use-ptr(i32* %ptr) + %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 0 + %tmp = load i32, i32* %add.ptr, align 4 + %add.ptr1 = getelementptr inbounds i32, i32* %ptr, i64 1 + %tmp1 = load i32, i32* %add.ptr1, align 4 + %sexttmp = sext i32 %tmp to i64 + %sexttmp1 = sext i32 %tmp1 to i64 + %add = add nsw i64 %sexttmp1, %sexttmp + ret i64 %add +} + +; CHECK-LABEL: ldp_sext_int_post +; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x0], #8 +define i64 @ldp_sext_int_post(i32* %p) nounwind { + %tmp = load i32, i32* %p, align 4 + %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 + %tmp1 = load i32, i32* %add.ptr, align 4 + %sexttmp = sext i32 %tmp to i64 + %sexttmp1 = sext i32 %tmp1 to i64 + %ptr = getelementptr inbounds i32, i32* %add.ptr, i64 1 + call void @use-ptr(i32* %ptr) + %add = add nsw i64 %sexttmp1, %sexttmp + ret i64 %add +} + diff --git a/test/CodeGen/AArch64/arm64-long-shift.ll b/test/CodeGen/AArch64/arm64-long-shift.ll index d5baf16bdd5c..ad89d3ff711b 100644 --- a/test/CodeGen/AArch64/arm64-long-shift.ll +++ b/test/CodeGen/AArch64/arm64-long-shift.ll @@ -2,18 +2,20 @@ define i128 @shl(i128 %r, i128 %s) nounwind readnone { ; CHECK-LABEL: shl: -; CHECK: lsl [[XREG_0:x[0-9]+]], x1, x2 -; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40 -; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2 -; CHECK-NEXT: lsr [[XREG_3:x[0-9]+]], x0, [[XREG_2]] -; CHECK-NEXT: orr [[XREG_6:x[0-9]+]], [[XREG_3]], [[XREG_0]] -; CHECK-NEXT: sub [[XREG_4:x[0-9]+]], x2, #64 -; CHECK-NEXT: lsl [[XREG_5:x[0-9]+]], x0, [[XREG_4]] -; CHECK-NEXT: cmp [[XREG_4]], #0 -; CHECK-NEXT: csel x1, [[XREG_5]], [[XREG_6]], ge -; CHECK-NEXT: lsl [[SMALLSHIFT_LO:x[0-9]+]], x0, x2 -; CHECK-NEXT: csel x0, xzr, [[SMALLSHIFT_LO]], ge -; CHECK-NEXT: ret +; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40 +; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2 +; CHECK: lsr [[LO_FOR_HI_NORMAL:x[0-9]+]], x0, [[REV_SHIFT]] +; CHECK: cmp x2, #0 +; CHECK: csel [[LO_FOR_HI:x[0-9]+]], xzr, [[LO_FOR_HI_NORMAL]], eq +; CHECK: lsl [[HI_FOR_HI:x[0-9]+]], x1, x2 +; CHECK: orr [[HI_NORMAL:x[0-9]+]], [[LO_FOR_HI]], [[HI_FOR_HI]] +; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64 +; CHECK: lsl [[HI_BIG_SHIFT:x[0-9]+]], x0, [[EXTRA_SHIFT]] +; CHECK: cmp [[EXTRA_SHIFT]], #0 +; CHECK: csel x1, [[HI_BIG_SHIFT]], [[HI_NORMAL]], ge +; CHECK: lsl [[SMALLSHIFT_LO:x[0-9]+]], x0, x2 +; CHECK: csel x0, xzr, [[SMALLSHIFT_LO]], ge +; CHECK: ret %shl = shl i128 %r, %s ret i128 %shl @@ -21,19 +23,21 @@ define i128 @shl(i128 %r, i128 %s) nounwind readnone { define i128 @ashr(i128 %r, i128 %s) nounwind readnone { ; CHECK-LABEL: ashr: -; CHECK: lsr [[XREG_0:x[0-9]+]], x0, x2 -; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40 -; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2 -; CHECK-NEXT: lsl [[XREG_3:x[0-9]+]], x1, [[XREG_2]] -; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]] -; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64 -; CHECK-NEXT: asr [[XREG_6:x[0-9]+]], x1, [[XREG_5]] -; CHECK-NEXT: cmp [[XREG_5]], #0 -; CHECK-NEXT: csel x0, [[XREG_6]], [[XREG_4]], ge -; CHECK-NEXT: asr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2 -; CHECK-NEXT: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63 -; CHECK-NEXT: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge -; CHECK-NEXT: ret +; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40 +; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2 +; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]] +; CHECK: cmp x2, #0 +; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq +; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2 +; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]] +; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64 +; CHECK: asr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]] +; CHECK: cmp [[EXTRA_SHIFT]], #0 +; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge +; CHECK: asr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2 +; CHECK: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63 +; CHECK: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge +; CHECK: ret %shr = ashr i128 %r, %s ret i128 %shr @@ -41,18 +45,20 @@ define i128 @ashr(i128 %r, i128 %s) nounwind readnone { define i128 @lshr(i128 %r, i128 %s) nounwind readnone { ; CHECK-LABEL: lshr: -; CHECK: lsr [[XREG_0:x[0-9]+]], x0, x2 -; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40 -; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2 -; CHECK-NEXT: lsl [[XREG_3:x[0-9]+]], x1, [[XREG_2]] -; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]] -; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64 -; CHECK-NEXT: lsr [[XREG_6:x[0-9]+]], x1, [[XREG_5]] -; CHECK-NEXT: cmp [[XREG_5]], #0 -; CHECK-NEXT: csel x0, [[XREG_6]], [[XREG_4]], ge -; CHECK-NEXT: lsr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2 -; CHECK-NEXT: csel x1, xzr, [[SMALLSHIFT_HI]], ge -; CHECK-NEXT: ret +; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40 +; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2 +; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]] +; CHECK: cmp x2, #0 +; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq +; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2 +; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]] +; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64 +; CHECK: lsr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]] +; CHECK: cmp [[EXTRA_SHIFT]], #0 +; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge +; CHECK: lsr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2 +; CHECK: csel x1, xzr, [[SMALLSHIFT_HI]], ge +; CHECK: ret %shr = lshr i128 %r, %s ret i128 %shr diff --git a/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll index 5bc4d71501ba..85572f2cf0f8 100644 --- a/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll +++ b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-apple-ios -aarch64-strict-align < %s | FileCheck %s +; RUN: llc -mtriple=arm64-apple-ios -mattr=+strict-align < %s | FileCheck %s ; Small (16-bytes here) unaligned memcpys should stay memcpy calls if ; strict-alignment is turned on. diff --git a/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll b/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll new file mode 100644 index 000000000000..5276ac334a71 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll @@ -0,0 +1,406 @@ +; RUN: llc < %s -mtriple aarch64--none-eabi -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=LE +; RUN: llc < %s -mtriple aarch64_be--none-eabi -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=BE + +; CHECK-LABEL: Ldrh_merge +; CHECK-NOT: ldrh +; CHECK: ldr [[NEW_DEST:w[0-9]+]] +; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff +; CHECK-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16 +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] +define i16 @Ldrh_merge(i16* nocapture readonly %p) { + %1 = load i16, i16* %p, align 2 + %arrayidx2 = getelementptr inbounds i16, i16* %p, i64 1 + %2 = load i16, i16* %arrayidx2, align 2 + %add = sub nuw nsw i16 %1, %2 + ret i16 %add +} + +; CHECK-LABEL: Ldurh_merge +; CHECK-NOT: ldurh +; CHECK: ldur [[NEW_DEST:w[0-9]+]] +; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff +; CHECK-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]] +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] +define i16 @Ldurh_merge(i16* nocapture readonly %p) { +entry: + %arrayidx = getelementptr inbounds i16, i16* %p, i64 -2 + %0 = load i16, i16* %arrayidx + %arrayidx3 = getelementptr inbounds i16, i16* %p, i64 -1 + %1 = load i16, i16* %arrayidx3 + %add = sub nuw nsw i16 %0, %1 + ret i16 %add +} + +; CHECK-LABEL: Ldrh_4_merge +; CHECK-NOT: ldrh +; CHECK: ldp [[WORD1:w[0-9]+]], [[WORD2:w[0-9]+]], [x0] +; CHECK-DAG: and [[WORD1LO:w[0-9]+]], [[WORD1]], #0xffff +; CHECK-DAG: lsr [[WORD1HI:w[0-9]+]], [[WORD1]], #16 +; CHECK-DAG: and [[WORD2LO:w[0-9]+]], [[WORD2]], #0xffff +; CHECK-DAG: lsr [[WORD2HI:w[0-9]+]], [[WORD2]], #16 +; LE-DAG: sub [[TEMP1:w[0-9]+]], [[WORD1HI]], [[WORD1LO]] +; BE-DAG: sub [[TEMP1:w[0-9]+]], [[WORD1LO]], [[WORD1HI]] +; LE: udiv [[TEMP2:w[0-9]+]], [[TEMP1]], [[WORD2LO]] +; BE: udiv [[TEMP2:w[0-9]+]], [[TEMP1]], [[WORD2HI]] +; LE: sub w0, [[TEMP2]], [[WORD2HI]] +; BE: sub w0, [[TEMP2]], [[WORD2LO]] +define i16 @Ldrh_4_merge(i16* nocapture readonly %P) { + %arrayidx = getelementptr inbounds i16, i16* %P, i64 0 + %l0 = load i16, i16* %arrayidx + %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 1 + %l1 = load i16, i16* %arrayidx2 + %arrayidx7 = getelementptr inbounds i16, i16* %P, i64 2 + %l2 = load i16, i16* %arrayidx7 + %arrayidx12 = getelementptr inbounds i16, i16* %P, i64 3 + %l3 = load i16, i16* %arrayidx12 + %add4 = sub nuw nsw i16 %l1, %l0 + %add9 = udiv i16 %add4, %l2 + %add14 = sub nuw nsw i16 %add9, %l3 + ret i16 %add14 +} + +; CHECK-LABEL: Ldrsh_merge +; CHECK: ldr [[NEW_DEST:w[0-9]+]] +; CHECK-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16 +; CHECK-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]] +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] + +define i32 @Ldrsh_merge(i16* %p) nounwind { + %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4 + %tmp = load i16, i16* %add.ptr0 + %add.ptr = getelementptr inbounds i16, i16* %p, i64 5 + %tmp1 = load i16, i16* %add.ptr + %sexttmp = sext i16 %tmp to i32 + %sexttmp1 = sext i16 %tmp1 to i32 + %add = sub nsw i32 %sexttmp1, %sexttmp + ret i32 %add +} + +; CHECK-LABEL: Ldrsh_zsext_merge +; CHECK: ldr [[NEW_DEST:w[0-9]+]] +; LE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff +; LE-DAG: asr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16 +; BE-DAG: sxth [[LO_PART:w[0-9]+]], [[NEW_DEST]] +; BE-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16 +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] +define i32 @Ldrsh_zsext_merge(i16* %p) nounwind { + %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4 + %tmp = load i16, i16* %add.ptr0 + %add.ptr = getelementptr inbounds i16, i16* %p, i64 5 + %tmp1 = load i16, i16* %add.ptr + %sexttmp = zext i16 %tmp to i32 + %sexttmp1 = sext i16 %tmp1 to i32 + %add = sub nsw i32 %sexttmp, %sexttmp1 + ret i32 %add +} + +; CHECK-LABEL: Ldrsh_szext_merge +; CHECK: ldr [[NEW_DEST:w[0-9]+]] +; LE-DAG: sxth [[LO_PART:w[0-9]+]], [[NEW_DEST]] +; LE-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16 +; BE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff +; BE-DAG: asr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16 +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] +define i32 @Ldrsh_szext_merge(i16* %p) nounwind { + %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4 + %tmp = load i16, i16* %add.ptr0 + %add.ptr = getelementptr inbounds i16, i16* %p, i64 5 + %tmp1 = load i16, i16* %add.ptr + %sexttmp = sext i16 %tmp to i32 + %sexttmp1 = zext i16 %tmp1 to i32 + %add = sub nsw i32 %sexttmp, %sexttmp1 + ret i32 %add +} + +; CHECK-LABEL: Ldrb_merge +; CHECK: ldrh [[NEW_DEST:w[0-9]+]] +; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff +; CHECK-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8 +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] +define i32 @Ldrb_merge(i8* %p) nounwind { + %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2 + %tmp = load i8, i8* %add.ptr0 + %add.ptr = getelementptr inbounds i8, i8* %p, i64 3 + %tmp1 = load i8, i8* %add.ptr + %sexttmp = zext i8 %tmp to i32 + %sexttmp1 = zext i8 %tmp1 to i32 + %add = sub nsw i32 %sexttmp, %sexttmp1 + ret i32 %add +} + +; CHECK-LABEL: Ldrsb_merge +; CHECK: ldrh [[NEW_DEST:w[0-9]+]] +; CHECK-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]] +; CHECK-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8 +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] +define i32 @Ldrsb_merge(i8* %p) nounwind { + %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2 + %tmp = load i8, i8* %add.ptr0 + %add.ptr = getelementptr inbounds i8, i8* %p, i64 3 + %tmp1 = load i8, i8* %add.ptr + %sexttmp = sext i8 %tmp to i32 + %sexttmp1 = sext i8 %tmp1 to i32 + %add = sub nsw i32 %sexttmp, %sexttmp1 + ret i32 %add +} + +; CHECK-LABEL: Ldrsb_zsext_merge +; CHECK: ldrh [[NEW_DEST:w[0-9]+]] +; LE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff +; LE-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8 +; BE-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]] +; BE-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8 +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] +define i32 @Ldrsb_zsext_merge(i8* %p) nounwind { + %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2 + %tmp = load i8, i8* %add.ptr0 + %add.ptr = getelementptr inbounds i8, i8* %p, i64 3 + %tmp1 = load i8, i8* %add.ptr + %sexttmp = zext i8 %tmp to i32 + %sexttmp1 = sext i8 %tmp1 to i32 + %add = sub nsw i32 %sexttmp, %sexttmp1 + ret i32 %add +} + +; CHECK-LABEL: Ldrsb_szext_merge +; CHECK: ldrh [[NEW_DEST:w[0-9]+]] +; LE-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]] +; LE-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8 +; BE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff +; BE-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8 +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] +define i32 @Ldrsb_szext_merge(i8* %p) nounwind { + %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2 + %tmp = load i8, i8* %add.ptr0 + %add.ptr = getelementptr inbounds i8, i8* %p, i64 3 + %tmp1 = load i8, i8* %add.ptr + %sexttmp = sext i8 %tmp to i32 + %sexttmp1 = zext i8 %tmp1 to i32 + %add = sub nsw i32 %sexttmp, %sexttmp1 + ret i32 %add +} + +; CHECK-LABEL: Ldursh_merge +; CHECK: ldur [[NEW_DEST:w[0-9]+]] +; CHECK-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16 +; CHECK-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]] +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] +define i32 @Ldursh_merge(i16* %p) nounwind { + %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1 + %tmp = load i16, i16* %add.ptr0 + %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2 + %tmp1 = load i16, i16* %add.ptr + %sexttmp = sext i16 %tmp to i32 + %sexttmp1 = sext i16 %tmp1 to i32 + %add = sub nsw i32 %sexttmp, %sexttmp1 + ret i32 %add +} + +; CHECK-LABEL: Ldursh_zsext_merge +; CHECK: ldur [[NEW_DEST:w[0-9]+]] +; LE-DAG: lsr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16 +; LE-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]] +; BE-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16 +; BE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xffff +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] +define i32 @Ldursh_zsext_merge(i16* %p) nounwind { + %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1 + %tmp = load i16, i16* %add.ptr0 + %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2 + %tmp1 = load i16, i16* %add.ptr + %sexttmp = zext i16 %tmp to i32 + %sexttmp1 = sext i16 %tmp1 to i32 + %add = sub nsw i32 %sexttmp, %sexttmp1 + ret i32 %add +} + +; CHECK-LABEL: Ldursh_szext_merge +; CHECK: ldur [[NEW_DEST:w[0-9]+]] +; LE-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16 +; LE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xffff +; BE-DAG: lsr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16 +; BE-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]] +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] +define i32 @Ldursh_szext_merge(i16* %p) nounwind { + %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1 + %tmp = load i16, i16* %add.ptr0 + %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2 + %tmp1 = load i16, i16* %add.ptr + %sexttmp = sext i16 %tmp to i32 + %sexttmp1 = zext i16 %tmp1 to i32 + %add = sub nsw i32 %sexttmp, %sexttmp1 + ret i32 %add +} + +; CHECK-LABEL: Ldurb_merge +; CHECK: ldurh [[NEW_DEST:w[0-9]+]] +; CHECK-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8 +; CHECK-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] +define i32 @Ldurb_merge(i8* %p) nounwind { + %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1 + %tmp = load i8, i8* %add.ptr0 + %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2 + %tmp1 = load i8, i8* %add.ptr + %sexttmp = zext i8 %tmp to i32 + %sexttmp1 = zext i8 %tmp1 to i32 + %add = sub nsw i32 %sexttmp, %sexttmp1 + ret i32 %add +} + +; CHECK-LABEL: Ldursb_merge +; CHECK: ldurh [[NEW_DEST:w[0-9]+]] +; CHECK-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8 +; CHECK-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]] +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] +define i32 @Ldursb_merge(i8* %p) nounwind { + %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1 + %tmp = load i8, i8* %add.ptr0 + %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2 + %tmp1 = load i8, i8* %add.ptr + %sexttmp = sext i8 %tmp to i32 + %sexttmp1 = sext i8 %tmp1 to i32 + %add = sub nsw i32 %sexttmp, %sexttmp1 + ret i32 %add +} + +; CHECK-LABEL: Ldursb_zsext_merge +; CHECK: ldurh [[NEW_DEST:w[0-9]+]] +; LE-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8 +; LE-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]] +; BE-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8 +; BE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] +define i32 @Ldursb_zsext_merge(i8* %p) nounwind { + %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1 + %tmp = load i8, i8* %add.ptr0 + %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2 + %tmp1 = load i8, i8* %add.ptr + %sexttmp = zext i8 %tmp to i32 + %sexttmp1 = sext i8 %tmp1 to i32 + %add = sub nsw i32 %sexttmp, %sexttmp1 + ret i32 %add +} + +; CHECK-LABEL: Ldursb_szext_merge +; CHECK: ldurh [[NEW_DEST:w[0-9]+]] +; LE-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8 +; LE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff +; BE-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8 +; BE-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]] +; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]] +; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]] +define i32 @Ldursb_szext_merge(i8* %p) nounwind { + %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1 + %tmp = load i8, i8* %add.ptr0 + %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2 + %tmp1 = load i8, i8* %add.ptr + %sexttmp = sext i8 %tmp to i32 + %sexttmp1 = zext i8 %tmp1 to i32 + %add = sub nsw i32 %sexttmp, %sexttmp1 + ret i32 %add +} + +; CHECK-LABEL: Strh_zero +; CHECK: str wzr +define void @Strh_zero(i16* nocapture %P, i32 %n) { +entry: + %idxprom = sext i32 %n to i64 + %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom + store i16 0, i16* %arrayidx + %add = add nsw i32 %n, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 %idxprom1 + store i16 0, i16* %arrayidx2 + ret void +} + +; CHECK-LABEL: Strh_zero_4 +; CHECK: stp wzr, wzr +define void @Strh_zero_4(i16* nocapture %P, i32 %n) { +entry: + %idxprom = sext i32 %n to i64 + %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom + store i16 0, i16* %arrayidx + %add = add nsw i32 %n, 1 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 %idxprom1 + store i16 0, i16* %arrayidx2 + %add3 = add nsw i32 %n, 2 + %idxprom4 = sext i32 %add3 to i64 + %arrayidx5 = getelementptr inbounds i16, i16* %P, i64 %idxprom4 + store i16 0, i16* %arrayidx5 + %add6 = add nsw i32 %n, 3 + %idxprom7 = sext i32 %add6 to i64 + %arrayidx8 = getelementptr inbounds i16, i16* %P, i64 %idxprom7 + store i16 0, i16* %arrayidx8 + ret void +} + +; CHECK-LABEL: Sturb_zero +; CHECK: sturh wzr +define void @Sturb_zero(i8* nocapture %P, i32 %n) #0 { +entry: + %sub = add nsw i32 %n, -2 + %idxprom = sext i32 %sub to i64 + %arrayidx = getelementptr inbounds i8, i8* %P, i64 %idxprom + store i8 0, i8* %arrayidx + %sub2= add nsw i32 %n, -1 + %idxprom1 = sext i32 %sub2 to i64 + %arrayidx2 = getelementptr inbounds i8, i8* %P, i64 %idxprom1 + store i8 0, i8* %arrayidx2 + ret void +} + +; CHECK-LABEL: Sturh_zero +; CHECK: stur wzr +define void @Sturh_zero(i16* nocapture %P, i32 %n) { +entry: + %sub = add nsw i32 %n, -2 + %idxprom = sext i32 %sub to i64 + %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom + store i16 0, i16* %arrayidx + %sub1 = add nsw i32 %n, -3 + %idxprom2 = sext i32 %sub1 to i64 + %arrayidx3 = getelementptr inbounds i16, i16* %P, i64 %idxprom2 + store i16 0, i16* %arrayidx3 + ret void +} + +; CHECK-LABEL: Sturh_zero_4 +; CHECK: stp wzr, wzr +define void @Sturh_zero_4(i16* nocapture %P, i32 %n) { +entry: + %sub = add nsw i32 %n, -3 + %idxprom = sext i32 %sub to i64 + %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom + store i16 0, i16* %arrayidx + %sub1 = add nsw i32 %n, -4 + %idxprom2 = sext i32 %sub1 to i64 + %arrayidx3 = getelementptr inbounds i16, i16* %P, i64 %idxprom2 + store i16 0, i16* %arrayidx3 + %sub4 = add nsw i32 %n, -2 + %idxprom5 = sext i32 %sub4 to i64 + %arrayidx6 = getelementptr inbounds i16, i16* %P, i64 %idxprom5 + store i16 0, i16* %arrayidx6 + %sub7 = add nsw i32 %n, -1 + %idxprom8 = sext i32 %sub7 to i64 + %arrayidx9 = getelementptr inbounds i16, i16* %P, i64 %idxprom8 + store i16 0, i16* %arrayidx9 + ret void +} diff --git a/test/CodeGen/AArch64/arm64-neon-2velem.ll b/test/CodeGen/AArch64/arm64-neon-2velem.ll index 869966caa3ae..985b5bf483ac 100644 --- a/test/CodeGen/AArch64/arm64-neon-2velem.ll +++ b/test/CodeGen/AArch64/arm64-neon-2velem.ll @@ -535,6 +535,17 @@ entry: declare double @llvm.fma.f64(double, double, double) +define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) { +; CHECK-LABEL: test_vfmss_lane_f32 +; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret +entry: + %extract.rhs = extractelement <2 x float> %v, i32 1 + %extract = fsub float -0.000000e+00, %extract.rhs + %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) + ret float %0 +} + define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) { ; CHECK-LABEL: test_vfmss_laneq_f32 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] @@ -557,6 +568,50 @@ entry: ret double %0 } +define double @test_vfmsd_lane_f64_0(double %a, double %b, <1 x double> %v) { +; CHCK-LABEL: test_vfmsd_lane_f64_0 +; CHCK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +; CHCK-NEXT: ret +entry: + %tmp0 = fsub <1 x double> , %v + %tmp1 = extractelement <1 x double> %tmp0, i32 0 + %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a) + ret double %0 +} + +define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) { +; CHECK-LABEL: test_vfmss_lane_f32_0 +; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret +entry: + %tmp0 = fsub <2 x float> , %v + %tmp1 = extractelement <2 x float> %tmp0, i32 1 + %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a) + ret float %0 +} + +define float @test_vfmss_laneq_f32_0(float %a, float %b, <4 x float> %v) { +; CHECK-LABEL: test_vfmss_laneq_f32_0 +; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret +entry: + %tmp0 = fsub <4 x float>, %v + %tmp1 = extractelement <4 x float> %tmp0, i32 3 + %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a) + ret float %0 +} + +define double @test_vfmsd_laneq_f64_0(double %a, double %b, <2 x double> %v) { +; CHECK-LABEL: test_vfmsd_laneq_f64_0 +; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] +; CHECK-NEXT: ret +entry: + %tmp0 = fsub <2 x double>, %v + %tmp1 = extractelement <2 x double> %tmp0, i32 1 + %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a) + ret double %0 +} + define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_lane_s16: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll index b74a40626cee..83b1cac70f5c 100644 --- a/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -320,21 +320,20 @@ define i32 @smovw8h(<8 x i16> %tmp1) { ret i32 %tmp5 } -define i32 @smovx16b(<16 x i8> %tmp1) { +define i64 @smovx16b(<16 x i8> %tmp1) { ; CHECK-LABEL: smovx16b: -; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[8] +; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8] %tmp3 = extractelement <16 x i8> %tmp1, i32 8 - %tmp4 = sext i8 %tmp3 to i32 - %tmp5 = add i32 %tmp4, %tmp4 - ret i32 %tmp5 + %tmp4 = sext i8 %tmp3 to i64 + ret i64 %tmp4 } -define i32 @smovx8h(<8 x i16> %tmp1) { +define i64 @smovx8h(<8 x i16> %tmp1) { ; CHECK-LABEL: smovx8h: -; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2] +; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] %tmp3 = extractelement <8 x i16> %tmp1, i32 2 - %tmp4 = sext i16 %tmp3 to i32 - ret i32 %tmp4 + %tmp4 = sext i16 %tmp3 to i64 + ret i64 %tmp4 } define i64 @smovx4s(<4 x i32> %tmp1) { diff --git a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll index b8236c5b2479..c2006ccdd064 100644 --- a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll +++ b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll @@ -7,7 +7,7 @@ define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { entry: ; CHECK-LABEL: jscall_patchpoint_codegen: ; CHECK: Ltmp -; CHECK: str x{{.+}}, [sp] +; CHECK: str x{{.+}}, [sp, #-16]! ; CHECK-NEXT: mov x0, x{{.+}} ; CHECK: Ltmp ; CHECK-NEXT: movz x16, #0xffff, lsl #32 @@ -16,7 +16,7 @@ entry: ; CHECK-NEXT: blr x16 ; FAST-LABEL: jscall_patchpoint_codegen: ; FAST: Ltmp -; FAST: str x{{.+}}, [sp] +; FAST: str x{{.+}}, [sp, #-16]! ; FAST: Ltmp ; FAST-NEXT: movz x16, #0xffff, lsl #32 ; FAST-NEXT: movk x16, #0xdead, lsl #16 @@ -50,7 +50,7 @@ entry: ; FAST: orr [[REG1:x[0-9]+]], xzr, #0x2 ; FAST-NEXT: orr [[REG2:w[0-9]+]], wzr, #0x4 ; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6 -; FAST-NEXT: str [[REG1]], [sp] +; FAST-NEXT: str [[REG1]], [sp, #-32]! ; FAST-NEXT: str [[REG2]], [sp, #16] ; FAST-NEXT: str [[REG3]], [sp, #24] ; FAST: Ltmp @@ -90,7 +90,7 @@ entry: ; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6 ; FAST-NEXT: orr [[REG4:w[0-9]+]], wzr, #0x8 ; FAST-NEXT: movz [[REG5:x[0-9]+]], #0xa -; FAST-NEXT: str [[REG1]], [sp] +; FAST-NEXT: str [[REG1]], [sp, #-64]! ; FAST-NEXT: str [[REG2]], [sp, #16] ; FAST-NEXT: str [[REG3]], [sp, #24] ; FAST-NEXT: str [[REG4]], [sp, #36] diff --git a/test/CodeGen/AArch64/arm64-platform-reg.ll b/test/CodeGen/AArch64/arm64-platform-reg.ll index 60672aa38486..f3af01a73559 100644 --- a/test/CodeGen/AArch64/arm64-platform-reg.ll +++ b/test/CodeGen/AArch64/arm64-platform-reg.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18 -; RUN: llc -mtriple=arm64-freebsd-gnu -aarch64-reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18 +; RUN: llc -mtriple=arm64-apple-ios -mattr=+reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18 +; RUN: llc -mtriple=arm64-freebsd-gnu -mattr=+reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18 ; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s ; x18 is reserved as a platform register on Darwin but not on other diff --git a/test/CodeGen/AArch64/arm64-popcnt.ll b/test/CodeGen/AArch64/arm64-popcnt.ll index b0b529a13f41..9ee53a0f92e6 100644 --- a/test/CodeGen/AArch64/arm64-popcnt.ll +++ b/test/CodeGen/AArch64/arm64-popcnt.ll @@ -4,8 +4,8 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone { %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) ret i32 %cnt -; CHECK: ubfx x{{[0-9]+}} -; CHECK: fmov d0, x{{[0-9]+}} +; CHECK: mov w[[IN64:[0-9]+]], w0 +; CHECK: fmov d0, x[[IN64]] ; CHECK: cnt.8b v0, v0 ; CHECK: uaddlv.8b h0, v0 ; CHECK: fmov w0, s0 @@ -59,7 +59,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat { %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) ret i32 %cnt ; CHECK-LABEL: cnt32: -; CHECK-NOT 16b +; CHECK-NOT: 16b ; CHECK: ret } @@ -67,7 +67,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat { %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) ret i64 %cnt ; CHECK-LABEL: cnt64: -; CHECK-NOT 16b +; CHECK-NOT: 16b ; CHECK: ret } diff --git a/test/CodeGen/AArch64/arm64-rounding.ll b/test/CodeGen/AArch64/arm64-rounding.ll index 931114447adf..d487aabccc4f 100644 --- a/test/CodeGen/AArch64/arm64-rounding.ll +++ b/test/CodeGen/AArch64/arm64-rounding.ll @@ -1,10 +1,8 @@ -; RUN: llc -O3 < %s -mcpu=cyclone | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64" -target triple = "arm64-apple-ios6.0.0" +; RUN: llc -O3 < %s -mtriple=arm64 | FileCheck %s -; CHECK: test1 -; CHECK: frintx +; CHECK-LABEL: test1: ; CHECK: frintm +; CHECK-NOT: frintx define float @test1(float %a) #0 { entry: %call = tail call float @floorf(float %a) nounwind readnone @@ -13,9 +11,9 @@ entry: declare float @floorf(float) nounwind readnone -; CHECK: test2 -; CHECK: frintx +; CHECK-LABEL: test2: ; CHECK: frintm +; CHECK-NOT: frintx define double @test2(double %a) #0 { entry: %call = tail call double @floor(double %a) nounwind readnone @@ -24,7 +22,7 @@ entry: declare double @floor(double) nounwind readnone -; CHECK: test3 +; CHECK-LABEL: test3: ; CHECK: frinti define float @test3(float %a) #0 { entry: @@ -34,7 +32,7 @@ entry: declare float @nearbyintf(float) nounwind readnone -; CHECK: test4 +; CHECK-LABEL: test4: ; CHECK: frinti define double @test4(double %a) #0 { entry: @@ -44,9 +42,9 @@ entry: declare double @nearbyint(double) nounwind readnone -; CHECK: test5 -; CHECK: frintx +; CHECK-LABEL: test5: ; CHECK: frintp +; CHECK-NOT: frintx define float @test5(float %a) #0 { entry: %call = tail call float @ceilf(float %a) nounwind readnone @@ -55,9 +53,9 @@ entry: declare float @ceilf(float) nounwind readnone -; CHECK: test6 -; CHECK: frintx +; CHECK-LABEL: test6: ; CHECK: frintp +; CHECK-NOT: frintx define double @test6(double %a) #0 { entry: %call = tail call double @ceil(double %a) nounwind readnone @@ -66,7 +64,7 @@ entry: declare double @ceil(double) nounwind readnone -; CHECK: test7 +; CHECK-LABEL: test7: ; CHECK: frintx define float @test7(float %a) #0 { entry: @@ -76,7 +74,7 @@ entry: declare float @rintf(float) nounwind readnone -; CHECK: test8 +; CHECK-LABEL: test8: ; CHECK: frintx define double @test8(double %a) #0 { entry: @@ -86,9 +84,9 @@ entry: declare double @rint(double) nounwind readnone -; CHECK: test9 -; CHECK: frintx +; CHECK-LABEL: test9: ; CHECK: frintz +; CHECK-NOT: frintx define float @test9(float %a) #0 { entry: %call = tail call float @truncf(float %a) nounwind readnone @@ -97,9 +95,9 @@ entry: declare float @truncf(float) nounwind readnone -; CHECK: test10 -; CHECK: frintx +; CHECK-LABEL: test10: ; CHECK: frintz +; CHECK-NOT: frintx define double @test10(double %a) #0 { entry: %call = tail call double @trunc(double %a) nounwind readnone @@ -108,9 +106,9 @@ entry: declare double @trunc(double) nounwind readnone -; CHECK: test11 -; CHECK: frintx +; CHECK-LABEL: test11: ; CHECK: frinta +; CHECK-NOT: frintx define float @test11(float %a) #0 { entry: %call = tail call float @roundf(float %a) nounwind readnone @@ -119,9 +117,9 @@ entry: declare float @roundf(float %a) nounwind readnone -; CHECK: test12 -; CHECK: frintx +; CHECK-LABEL: test12: ; CHECK: frinta +; CHECK-NOT: frintx define double @test12(double %a) #0 { entry: %call = tail call double @round(double %a) nounwind readnone @@ -130,7 +128,7 @@ entry: declare double @round(double %a) nounwind readnone -; CHECK: test13 +; CHECK-LABEL: test13: ; CHECK-NOT: frintx ; CHECK: frintm define float @test13(float %a) #1 { @@ -139,7 +137,7 @@ entry: ret float %call } -; CHECK: test14 +; CHECK-LABEL: test14: ; CHECK-NOT: frintx ; CHECK: frintm define double @test14(double %a) #1 { @@ -148,7 +146,7 @@ entry: ret double %call } -; CHECK: test15 +; CHECK-LABEL: test15: ; CHECK-NOT: frintx ; CHECK: frintp define float @test15(float %a) #1 { @@ -157,7 +155,7 @@ entry: ret float %call } -; CHECK: test16 +; CHECK-LABEL: test16: ; CHECK-NOT: frintx ; CHECK: frintp define double @test16(double %a) #1 { @@ -166,7 +164,7 @@ entry: ret double %call } -; CHECK: test17 +; CHECK-LABEL: test17: ; CHECK-NOT: frintx ; CHECK: frintz define float @test17(float %a) #1 { @@ -175,7 +173,7 @@ entry: ret float %call } -; CHECK: test18 +; CHECK-LABEL: test18: ; CHECK-NOT: frintx ; CHECK: frintz define double @test18(double %a) #1 { @@ -184,7 +182,7 @@ entry: ret double %call } -; CHECK: test19 +; CHECK-LABEL: test19: ; CHECK-NOT: frintx ; CHECK: frinta define float @test19(float %a) #1 { @@ -193,7 +191,7 @@ entry: ret float %call } -; CHECK: test20 +; CHECK-LABEL: test20: ; CHECK-NOT: frintx ; CHECK: frinta define double @test20(double %a) #1 { @@ -202,7 +200,5 @@ entry: ret double %call } - - attributes #0 = { nounwind } attributes #1 = { nounwind "unsafe-fp-math"="true" } diff --git a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll index 599712be401c..2ecd66ddf5d4 100644 --- a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -1,5 +1,5 @@ -; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE -; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE +; RUN: llc %s -o - -enable-shrink-wrap=true -disable-post-ra | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE +; RUN: llc %s -o - -enable-shrink-wrap=false -disable-post-ra | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios" @@ -539,3 +539,94 @@ if.end: declare void @abort() #0 attributes #0 = { noreturn nounwind } + +; Make sure that we handle infinite loops properly When checking that the Save +; and Restore blocks are control flow equivalent, the loop searches for the +; immediate (post) dominator for the (restore) save blocks. When either the Save +; or Restore block is located in an infinite loop the only immediate (post) +; dominator is itself. In this case, we cannot perform shrink wrapping, but we +; should return gracefully and continue compilation. +; The only condition for this test is the compilation finishes correctly. +; +; CHECK-LABEL: infiniteloop +; CHECK: ret +define void @infiniteloop() { +entry: + br i1 undef, label %if.then, label %if.end + +if.then: + %ptr = alloca i32, i32 4 + br label %for.body + +for.body: ; preds = %for.body, %entry + %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ] + %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)() + %add = add nsw i32 %call, %sum.03 + store i32 %add, i32* %ptr + br label %for.body + +if.end: + ret void +} + +; Another infinite loop test this time with a body bigger than just one block. +; CHECK-LABEL: infiniteloop2 +; CHECK: ret +define void @infiniteloop2() { +entry: + br i1 undef, label %if.then, label %if.end + +if.then: + %ptr = alloca i32, i32 4 + br label %for.body + +for.body: ; preds = %for.body, %entry + %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2] + %call = tail call i32 asm "mov $0, #0", "=r,~{x19}"() + %add = add nsw i32 %call, %sum.03 + store i32 %add, i32* %ptr + br i1 undef, label %body1, label %body2 + +body1: + tail call void asm sideeffect "nop", "~{x19}"() + br label %for.body + +body2: + tail call void asm sideeffect "nop", "~{x19}"() + br label %for.body + +if.end: + ret void +} + +; Another infinite loop test this time with two nested infinite loop. +; CHECK-LABEL: infiniteloop3 +; CHECK: ret +define void @infiniteloop3() { +entry: + br i1 undef, label %loop2a, label %body + +body: ; preds = %entry + br i1 undef, label %loop2a, label %end + +loop1: ; preds = %loop2a, %loop2b + %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ] + %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ] + %0 = icmp eq i32* %var, null + %next.load = load i32*, i32** undef + br i1 %0, label %loop2a, label %loop2b + +loop2a: ; preds = %loop1, %body, %entry + %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ] + %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ] + br label %loop1 + +loop2b: ; preds = %loop1 + %gep1 = bitcast i32* %var.phi to i32* + %next.ptr = bitcast i32* %gep1 to i32** + store i32* %next.phi, i32** %next.ptr + br label %loop1 + +end: + ret void +} diff --git a/test/CodeGen/AArch64/arm64-spill-lr.ll b/test/CodeGen/AArch64/arm64-spill-lr.ll index 88109088a2ff..2ea5d7810a14 100644 --- a/test/CodeGen/AArch64/arm64-spill-lr.ll +++ b/test/CodeGen/AArch64/arm64-spill-lr.ll @@ -1,9 +1,9 @@ ; RUN: llc -mtriple=arm64-apple-ios < %s @bar = common global i32 0, align 4 -; Leaf function which uses all callee-saved registers and allocates >= 256 bytes on the stack -; this will cause processFunctionBeforeCalleeSavedScan() to spill LR as an additional scratch -; register. +; Leaf function which uses all callee-saved registers and allocates >= 256 bytes +; on the stack this will cause determineCalleeSaves() to spill LR as an +; additional scratch register. ; ; This is a crash-only regression test for rdar://15124582. define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) nounwind { diff --git a/test/CodeGen/AArch64/arm64-stackmap.ll b/test/CodeGen/AArch64/arm64-stackmap.ll index 1a4df7a6f2d6..3eb1d2753001 100644 --- a/test/CodeGen/AArch64/arm64-stackmap.ll +++ b/test/CodeGen/AArch64/arm64-stackmap.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=arm64-apple-darwin < %s | FileCheck %s -; RUN: llc -mtriple=arm64-apple-darwin -fast-isel -fast-isel-abort=1 < %s | FileCheck %s +; RUN: llc -mtriple=arm64-apple-darwin -mattr=+reserve-x18 < %s | FileCheck %s +; RUN: llc -mtriple=arm64-apple-darwin -mattr=+reserve-x18 -fast-isel -fast-isel-abort=1 < %s | FileCheck %s ; ; Note: Print verbose stackmaps using -debug-only=stackmaps. diff --git a/test/CodeGen/AArch64/arm64-stp.ll b/test/CodeGen/AArch64/arm64-stp.ll index 72561aac6e87..98242d0bb57e 100644 --- a/test/CodeGen/AArch64/arm64-stp.ll +++ b/test/CodeGen/AArch64/arm64-stp.ll @@ -1,8 +1,6 @@ ; RUN: llc < %s -march=arm64 -aarch64-stp-suppress=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s -; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\ -; RUN: -verify-machineinstrs -mcpu=cyclone | FileCheck -check-prefix=STUR_CHK %s -; CHECK: stp_int +; CHECK-LABEL: stp_int ; CHECK: stp w0, w1, [x2] define void @stp_int(i32 %a, i32 %b, i32* nocapture %p) nounwind { store i32 %a, i32* %p, align 4 @@ -11,7 +9,7 @@ define void @stp_int(i32 %a, i32 %b, i32* nocapture %p) nounwind { ret void } -; CHECK: stp_long +; CHECK-LABEL: stp_long ; CHECK: stp x0, x1, [x2] define void @stp_long(i64 %a, i64 %b, i64* nocapture %p) nounwind { store i64 %a, i64* %p, align 8 @@ -20,7 +18,7 @@ define void @stp_long(i64 %a, i64 %b, i64* nocapture %p) nounwind { ret void } -; CHECK: stp_float +; CHECK-LABEL: stp_float ; CHECK: stp s0, s1, [x0] define void @stp_float(float %a, float %b, float* nocapture %p) nounwind { store float %a, float* %p, align 4 @@ -29,7 +27,7 @@ define void @stp_float(float %a, float %b, float* nocapture %p) nounwind { ret void } -; CHECK: stp_double +; CHECK-LABEL: stp_double ; CHECK: stp d0, d1, [x0] define void @stp_double(double %a, double %b, double* nocapture %p) nounwind { store double %a, double* %p, align 8 @@ -40,9 +38,9 @@ define void @stp_double(double %a, double %b, double* nocapture %p) nounwind { ; Test the load/store optimizer---combine ldurs into a ldp, if appropriate define void @stur_int(i32 %a, i32 %b, i32* nocapture %p) nounwind { -; STUR_CHK: stur_int -; STUR_CHK: stp w{{[0-9]+}}, {{w[0-9]+}}, [x{{[0-9]+}}, #-8] -; STUR_CHK-NEXT: ret +; CHECK-LABEL: stur_int +; CHECK: stp w{{[0-9]+}}, {{w[0-9]+}}, [x{{[0-9]+}}, #-8] +; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %p, i32 -1 store i32 %a, i32* %p1, align 2 %p2 = getelementptr inbounds i32, i32* %p, i32 -2 @@ -51,9 +49,9 @@ define void @stur_int(i32 %a, i32 %b, i32* nocapture %p) nounwind { } define void @stur_long(i64 %a, i64 %b, i64* nocapture %p) nounwind { -; STUR_CHK: stur_long -; STUR_CHK: stp x{{[0-9]+}}, {{x[0-9]+}}, [x{{[0-9]+}}, #-16] -; STUR_CHK-NEXT: ret +; CHECK-LABEL: stur_long +; CHECK: stp x{{[0-9]+}}, {{x[0-9]+}}, [x{{[0-9]+}}, #-16] +; CHECK-NEXT: ret %p1 = getelementptr inbounds i64, i64* %p, i32 -1 store i64 %a, i64* %p1, align 2 %p2 = getelementptr inbounds i64, i64* %p, i32 -2 @@ -62,9 +60,9 @@ define void @stur_long(i64 %a, i64 %b, i64* nocapture %p) nounwind { } define void @stur_float(float %a, float %b, float* nocapture %p) nounwind { -; STUR_CHK: stur_float -; STUR_CHK: stp s{{[0-9]+}}, {{s[0-9]+}}, [x{{[0-9]+}}, #-8] -; STUR_CHK-NEXT: ret +; CHECK-LABEL: stur_float +; CHECK: stp s{{[0-9]+}}, {{s[0-9]+}}, [x{{[0-9]+}}, #-8] +; CHECK-NEXT: ret %p1 = getelementptr inbounds float, float* %p, i32 -1 store float %a, float* %p1, align 2 %p2 = getelementptr inbounds float, float* %p, i32 -2 @@ -73,9 +71,9 @@ define void @stur_float(float %a, float %b, float* nocapture %p) nounwind { } define void @stur_double(double %a, double %b, double* nocapture %p) nounwind { -; STUR_CHK: stur_double -; STUR_CHK: stp d{{[0-9]+}}, {{d[0-9]+}}, [x{{[0-9]+}}, #-16] -; STUR_CHK-NEXT: ret +; CHECK-LABEL: stur_double +; CHECK: stp d{{[0-9]+}}, {{d[0-9]+}}, [x{{[0-9]+}}, #-16] +; CHECK-NEXT: ret %p1 = getelementptr inbounds double, double* %p, i32 -1 store double %a, double* %p1, align 2 %p2 = getelementptr inbounds double, double* %p, i32 -2 diff --git a/test/CodeGen/AArch64/arm64-strict-align.ll b/test/CodeGen/AArch64/arm64-strict-align.ll index 109f4115d801..28c158f7a2eb 100644 --- a/test/CodeGen/AArch64/arm64-strict-align.ll +++ b/test/CodeGen/AArch64/arm64-strict-align.ll @@ -1,7 +1,6 @@ ; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s -; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-no-strict-align | FileCheck %s -; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align | FileCheck %s --check-prefix=CHECK-STRICT -; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align -fast-isel | FileCheck %s --check-prefix=CHECK-STRICT +; RUN: llc < %s -mtriple=arm64-apple-darwin -mattr=+strict-align | FileCheck %s --check-prefix=CHECK-STRICT +; RUN: llc < %s -mtriple=arm64-apple-darwin -mattr=+strict-align -fast-isel | FileCheck %s --check-prefix=CHECK-STRICT define i32 @f0(i32* nocapture %p) nounwind { ; CHECK-STRICT: ldrh [[HIGH:w[0-9]+]], [x0, #2] diff --git a/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll index f94f88a1183f..c95eca062ff6 100644 --- a/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll +++ b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll @@ -1,4 +1,7 @@ -; RUN: llc -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=NOEMU %s +; RUN: llc -emulated-tls -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=EMU %s ; If the .tlsdesccall and blr parts are emitted completely separately (even with ; glue) then LLVM will separate them quite happily (with a spill at O0, hence @@ -13,6 +16,40 @@ define i32 @test_generaldynamic() { %val = load i32, i32* @general_dynamic_var ret i32 %val -; CHECK: .tlsdesccall general_dynamic_var -; CHECK-NEXT: blr {{x[0-9]+}} +; NOEMU: .tlsdesccall general_dynamic_var +; NOEMU-NEXT: blr {{x[0-9]+}} +; NOEMU-NOT: __emutls_v.general_dynamic_var: + +; EMU: adrp{{.+}}__emutls_v.general_dynamic_var +; EMU: bl __emutls_get_address + +; EMU-NOT: __emutls_v.general_dynamic_var +; EMU-NOT: __emutls_t.general_dynamic_var } + +@emulated_init_var = thread_local global i32 37, align 8 + +define i32 @test_emulated_init() { +; COMMON-LABEL: test_emulated_init: + + %val = load i32, i32* @emulated_init_var + ret i32 %val + +; EMU: adrp{{.+}}__emutls_v.emulated_init_var +; EMU: bl __emutls_get_address + +; EMU-NOT: __emutls_v.general_dynamic_var: + +; EMU: .align 3 +; EMU-LABEL: __emutls_v.emulated_init_var: +; EMU-NEXT: .xword 4 +; EMU-NEXT: .xword 8 +; EMU-NEXT: .xword 0 +; EMU-NEXT: .xword __emutls_t.emulated_init_var + +; EMU-LABEL: __emutls_t.emulated_init_var: +; EMU-NEXT: .word 37 +} + +; CHECK-NOT: __emutls_v.general_dynamic_var: +; EMU-NOT: __emutls_t.general_dynamic_var diff --git a/test/CodeGen/AArch64/arm64-trunc-store.ll b/test/CodeGen/AArch64/arm64-trunc-store.ll index 7cde629b33ae..be0388284fb8 100644 --- a/test/CodeGen/AArch64/arm64-trunc-store.ll +++ b/test/CodeGen/AArch64/arm64-trunc-store.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-post-ra | FileCheck %s define void @bar(<8 x i16> %arg, <8 x i8>* %p) nounwind { ; CHECK-LABEL: bar: diff --git a/test/CodeGen/AArch64/arm64-vabs.ll b/test/CodeGen/AArch64/arm64-vabs.ll index a52c4ebf13e7..c1800085884c 100644 --- a/test/CodeGen/AArch64/arm64-vabs.ll +++ b/test/CodeGen/AArch64/arm64-vabs.ll @@ -134,6 +134,72 @@ define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { ret <2 x i64> %tmp4 } +define i16 @uabdl8h_log2_shuffle(<16 x i8>* %a, <16 x i8>* %b) { +; CHECK-LABEL: uabdl8h_log2_shuffle +; CHECK: uabdl2.8h +; CHECK: uabdl.8h + %aload = load <16 x i8>, <16 x i8>* %a, align 1 + %bload = load <16 x i8>, <16 x i8>* %b, align 1 + %aext = zext <16 x i8> %aload to <16 x i16> + %bext = zext <16 x i8> %bload to <16 x i16> + %abdiff = sub nsw <16 x i16> %aext, %bext + %abcmp = icmp slt <16 x i16> %abdiff, zeroinitializer + %ababs = sub nsw <16 x i16> zeroinitializer, %abdiff + %absel = select <16 x i1> %abcmp, <16 x i16> %ababs, <16 x i16> %abdiff + %rdx.shuf = shufflevector <16 x i16> %absel, <16 x i16> undef, <16 x i32> + %bin1.rdx = add <16 x i16> %absel, %rdx.shuf + %rdx.shufx = shufflevector <16 x i16> %bin1.rdx, <16 x i16> undef, <16 x i32> + %bin.rdx = add <16 x i16> %bin1.rdx, %rdx.shufx + %rdx.shuf136 = shufflevector <16 x i16> %bin.rdx, <16 x i16> undef, <16 x i32> + %bin.rdx137 = add <16 x i16> %bin.rdx, %rdx.shuf136 + %rdx.shuf138 = shufflevector <16 x i16> %bin.rdx137, <16 x i16> undef, <16 x i32> + %bin.rdx139 = add <16 x i16> %bin.rdx137, %rdx.shuf138 + %reduced_v = extractelement <16 x i16> %bin.rdx139, i16 0 + ret i16 %reduced_v +} + +define i32 @uabdl4s_log2_shuffle(<8 x i16>* %a, <8 x i16>* %b) { +; CHECK-LABEL: uabdl4s_log2_shuffle +; CHECK: uabdl2.4s +; CHECK: uabdl.4s + %aload = load <8 x i16>, <8 x i16>* %a, align 1 + %bload = load <8 x i16>, <8 x i16>* %b, align 1 + %aext = zext <8 x i16> %aload to <8 x i32> + %bext = zext <8 x i16> %bload to <8 x i32> + %abdiff = sub nsw <8 x i32> %aext, %bext + %abcmp = icmp slt <8 x i32> %abdiff, zeroinitializer + %ababs = sub nsw <8 x i32> zeroinitializer, %abdiff + %absel = select <8 x i1> %abcmp, <8 x i32> %ababs, <8 x i32> %abdiff + %rdx.shuf = shufflevector <8 x i32> %absel, <8 x i32> undef, <8 x i32> + %bin.rdx = add <8 x i32> %absel, %rdx.shuf + %rdx.shuf136 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> + %bin.rdx137 = add <8 x i32> %bin.rdx, %rdx.shuf136 + %rdx.shuf138 = shufflevector <8 x i32> %bin.rdx137, <8 x i32> undef, <8 x i32> + %bin.rdx139 = add <8 x i32> %bin.rdx137, %rdx.shuf138 + %reduced_v = extractelement <8 x i32> %bin.rdx139, i32 0 + ret i32 %reduced_v +} + +define i64 @uabdl2d_log2_shuffle(<4 x i32>* %a, <4 x i32>* %b, i32 %h) { +; CHECK: uabdl2d_log2_shuffle +; CHECK: uabdl2.2d +; CHECK: uabdl.2d + %aload = load <4 x i32>, <4 x i32>* %a, align 1 + %bload = load <4 x i32>, <4 x i32>* %b, align 1 + %aext = zext <4 x i32> %aload to <4 x i64> + %bext = zext <4 x i32> %bload to <4 x i64> + %abdiff = sub nsw <4 x i64> %aext, %bext + %abcmp = icmp slt <4 x i64> %abdiff, zeroinitializer + %ababs = sub nsw <4 x i64> zeroinitializer, %abdiff + %absel = select <4 x i1> %abcmp, <4 x i64> %ababs, <4 x i64> %abdiff + %rdx.shuf136 = shufflevector <4 x i64> %absel, <4 x i64> undef, <4 x i32> + %bin.rdx137 = add <4 x i64> %absel, %rdx.shuf136 + %rdx.shuf138 = shufflevector <4 x i64> %bin.rdx137, <4 x i64> undef, <4 x i32> + %bin.rdx139 = add <4 x i64> %bin.rdx137, %rdx.shuf138 + %reduced_v = extractelement <4 x i64> %bin.rdx139, i16 0 + ret i64 %reduced_v +} + define <2 x float> @fabd_2s(<2 x float>* %A, <2 x float>* %B) nounwind { ;CHECK-LABEL: fabd_2s: ;CHECK: fabd.2s diff --git a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll index 44f2af1c5e79..8702b41023d0 100644 --- a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll +++ b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -mtriple=arm64-linux-gnu -pre-RA-sched=linearize -enable-misched=false < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=arm64-linux-gnu -pre-RA-sched=linearize -enable-misched=false -disable-post-ra < %s | FileCheck %s %va_list = type {i8*, i8*, i8*, i32, i32} diff --git a/test/CodeGen/AArch64/arm64-vector-ext.ll b/test/CodeGen/AArch64/arm64-vector-ext.ll index 5bee1611e6c6..994a9956cf7f 100644 --- a/test/CodeGen/AArch64/arm64-vector-ext.ll +++ b/test/CodeGen/AArch64/arm64-vector-ext.ll @@ -1,27 +1,27 @@ -; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s - -;CHECK: @func30 -;CHECK: ushll.4s v0, v0, #0 -;CHECK: movi.4s v1, #0x1 -;CHECK: and.16b v0, v0, v1 -;CHECK: str q0, [x0] -;CHECK: ret - -%T0_30 = type <4 x i1> -%T1_30 = type <4 x i32> -define void @func30(%T0_30 %v0, %T1_30* %p1) { - %r = zext %T0_30 %v0 to %T1_30 - store %T1_30 %r, %T1_30* %p1 - ret void -} - -; Extend from v1i1 was crashing things (PR20791). Make sure we do something -; sensible instead. -define <1 x i32> @autogen_SD7918() { -; CHECK-LABEL: autogen_SD7918 -; CHECK: movi d0, #0000000000000000 -; CHECK-NEXT: ret - %I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0 - %ZE = zext <1 x i1> %I29 to <1 x i32> - ret <1 x i32> %ZE -} +; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s + +;CHECK: @func30 +;CHECK: movi.4h v1, #0x1 +;CHECK: and.8b v0, v0, v1 +;CHECK: ushll.4s v0, v0, #0 +;CHECK: str q0, [x0] +;CHECK: ret + +%T0_30 = type <4 x i1> +%T1_30 = type <4 x i32> +define void @func30(%T0_30 %v0, %T1_30* %p1) { + %r = zext %T0_30 %v0 to %T1_30 + store %T1_30 %r, %T1_30* %p1 + ret void +} + +; Extend from v1i1 was crashing things (PR20791). Make sure we do something +; sensible instead. +define <1 x i32> @autogen_SD7918() { +; CHECK-LABEL: autogen_SD7918 +; CHECK: movi d0, #0000000000000000 +; CHECK-NEXT: ret + %I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0 + %ZE = zext <1 x i1> %I29 to <1 x i32> + ret <1 x i32> %ZE +} diff --git a/test/CodeGen/AArch64/arm64-vminmaxnm.ll b/test/CodeGen/AArch64/arm64-vminmaxnm.ll index b5aca45cd479..302ba9d681c6 100644 --- a/test/CodeGen/AArch64/arm64-vminmaxnm.ll +++ b/test/CodeGen/AArch64/arm64-vminmaxnm.ll @@ -42,13 +42,28 @@ define <2 x double> @f6(<2 x double> %a, <2 x double> %b) nounwind readnone ssp ret <2 x double> %vminnm2.i } +define float @f7(float %a, float %b) nounwind readnone ssp { +; CHECK: fmaxnm s0, s0, s1 +; CHECK: ret + %vmaxnm2.i = tail call float @llvm.aarch64.neon.fmaxnm.f32(float %a, float %b) nounwind + ret float %vmaxnm2.i +} + +define double @f8(double %a, double %b) nounwind readnone ssp { +; CHECK: fminnm d0, d0, d1 +; CHECK: ret + %vmaxnm2.i = tail call double @llvm.aarch64.neon.fminnm.f64(double %a, double %b) nounwind + ret double %vmaxnm2.i +} + declare <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double>, <2 x double>) nounwind readnone declare <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone declare <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone declare <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double>, <2 x double>) nounwind readnone declare <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone declare <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float>, <2 x float>) nounwind readnone - +declare float @llvm.aarch64.neon.fmaxnm.f32(float, float) nounwind readnone +declare double @llvm.aarch64.neon.fminnm.f64(double, double) nounwind readnone define double @test_fmaxnmv(<2 x double> %in) { ; CHECK-LABEL: test_fmaxnmv: diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll index ce9c0a64b587..ec49110d4052 100644 --- a/test/CodeGen/AArch64/arm64-xaluo.ll +++ b/test/CodeGen/AArch64/arm64-xaluo.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -disable-post-ra -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort=1 -disable-post-ra -verify-machineinstrs < %s | FileCheck %s ; ; Get the actual value of the overflow bit. diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll index cb90caeadc1f..900d2072925f 100644 --- a/test/CodeGen/AArch64/atomic-ops.ll +++ b/test/CodeGen/AArch64/atomic-ops.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG +; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG ; Point of CHECK-REG is to make sure UNPREDICTABLE instructions aren't created @@ -893,6 +893,8 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] ; CHECK: stxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] +; CHECK: [[GET_OUT]]: +; CHECK: clrex ; CHECK-NOT: dmb ; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] @@ -916,6 +918,8 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] ; CHECK: stlxrh [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] +; CHECK: [[GET_OUT]]: +; CHECK: clrex ; CHECK-NOT: dmb ; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] @@ -927,21 +931,21 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic %old = extractvalue { i32, i1 } %pair, 0 +; CHECK: mov {{[xw]}}[[WANTED:[0-9]+]], {{[xw]}}0 + ; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 ; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]: ; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] - ; w0 below is a reasonable guess but could change: it certainly comes into the - ; function there. -; CHECK-NEXT: cmp w[[OLD]], w0 +; CHECK-NEXT: cmp w[[OLD]], w[[WANTED]] ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] ; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] +; CHECK: [[GET_OUT]]: +; CHECK: clrex ; CHECK-NOT: dmb - -; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]] ret i32 %old } @@ -963,6 +967,8 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; As above, w1 is a reasonable guess. ; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]] ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] +; CHECK: [[GET_OUT]]: +; CHECK: clrex ; CHECK-NOT: dmb ; CHECK: str x[[OLD]], diff --git a/test/CodeGen/AArch64/bitcast-v2i8.ll b/test/CodeGen/AArch64/bitcast-v2i8.ll index 4bdac641c5bc..aff3ffc70a71 100644 --- a/test/CodeGen/AArch64/bitcast-v2i8.ll +++ b/test/CodeGen/AArch64/bitcast-v2i8.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=aarch64-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck %s ; Part of PR21549: going through the stack isn't ideal but is correct. diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll index 9b731fa72a47..509b547a5c82 100644 --- a/test/CodeGen/AArch64/bitfield-insert.ll +++ b/test/CodeGen/AArch64/bitfield-insert.ll @@ -196,3 +196,44 @@ define void @test_32bit_with_shr(i32* %existing, i32* %new) { ret void } + +; Bitfield insert where the second or operand is a better match to be folded into the BFM +define void @test_32bit_opnd1_better(i32* %existing, i32* %new) { +; CHECK-LABEL: test_32bit_opnd1_better: + + %oldval = load volatile i32, i32* %existing + %oldval_keep = and i32 %oldval, 65535 ; 0x0000ffff + + %newval = load i32, i32* %new + %newval_shifted = shl i32 %newval, 16 + %newval_masked = and i32 %newval_shifted, 16711680 ; 0x00ff0000 + + %combined = or i32 %oldval_keep, %newval_masked + store volatile i32 %combined, i32* %existing +; CHECK: and [[BIT:w[0-9]+]], {{w[0-9]+}}, #0xffff +; CHECK: bfi [[BIT]], {{w[0-9]+}}, #16, #8 + + ret void +} + +; Tests when all the bits from one operand are not useful +define i32 @test_nouseful_bits(i8 %a, i32 %b) { +; CHECK-LABEL: test_nouseful_bits: +; CHECK: bfi +; CHECK: bfi +; CHECK: bfi +; CHECK-NOT: bfi +; CHECK-NOT: or +; CHECK: lsl + %conv = zext i8 %a to i32 ; 0 0 0 A + %shl = shl i32 %b, 8 ; B2 B1 B0 0 + %or = or i32 %conv, %shl ; B2 B1 B0 A + %shl.1 = shl i32 %or, 8 ; B1 B0 A 0 + %or.1 = or i32 %conv, %shl.1 ; B1 B0 A A + %shl.2 = shl i32 %or.1, 8 ; B0 A A 0 + %or.2 = or i32 %conv, %shl.2 ; B0 A A A + %shl.3 = shl i32 %or.2, 8 ; A A A 0 + %or.3 = or i32 %conv, %shl.3 ; A A A A + %shl.4 = shl i32 %or.3, 8 ; A A A 0 + ret i32 %shl.4 +} diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll index 78399c80b5de..5f19b6943b8e 100644 --- a/test/CodeGen/AArch64/bitfield.ll +++ b/test/CodeGen/AArch64/bitfield.ll @@ -3,13 +3,25 @@ @var32 = global i32 0 @var64 = global i64 0 -define void @test_extendb(i8 %var) { -; CHECK-LABEL: test_extendb: +define void @test_extendb32(i8 %var) { +; CHECK-LABEL: test_extendb32: %sxt32 = sext i8 %var to i32 store volatile i32 %sxt32, i32* @var32 ; CHECK: sxtb {{w[0-9]+}}, {{w[0-9]+}} +; N.b. this doesn't actually produce a bitfield instruction at the +; moment, but it's still a good test to have and the semantics are +; correct. + %uxt32 = zext i8 %var to i32 + store volatile i32 %uxt32, i32* @var32 +; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff + ret void +} + +define void @test_extendb64(i8 %var) { +; CHECK-LABEL: test_extendb64: + %sxt64 = sext i8 %var to i64 store volatile i64 %sxt64, i64* @var64 ; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}} @@ -17,23 +29,31 @@ define void @test_extendb(i8 %var) { ; N.b. this doesn't actually produce a bitfield instruction at the ; moment, but it's still a good test to have and the semantics are ; correct. - %uxt32 = zext i8 %var to i32 - store volatile i32 %uxt32, i32* @var32 -; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff - %uxt64 = zext i8 %var to i64 store volatile i64 %uxt64, i64* @var64 -; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff +; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff ret void } -define void @test_extendh(i16 %var) { -; CHECK-LABEL: test_extendh: +define void @test_extendh32(i16 %var) { +; CHECK-LABEL: test_extendh32: %sxt32 = sext i16 %var to i32 store volatile i32 %sxt32, i32* @var32 ; CHECK: sxth {{w[0-9]+}}, {{w[0-9]+}} +; N.b. this doesn't actually produce a bitfield instruction at the +; moment, but it's still a good test to have and the semantics are +; correct. + %uxt32 = zext i16 %var to i32 + store volatile i32 %uxt32, i32* @var32 +; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff + ret void +} + +define void @test_extendh64(i16 %var) { +; CHECK-LABEL: test_extendh64: + %sxt64 = sext i16 %var to i64 store volatile i64 %sxt64, i64* @var64 ; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}} @@ -41,13 +61,9 @@ define void @test_extendh(i16 %var) { ; N.b. this doesn't actually produce a bitfield instruction at the ; moment, but it's still a good test to have and the semantics are ; correct. - %uxt32 = zext i16 %var to i32 - store volatile i32 %uxt32, i32* @var32 -; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff - %uxt64 = zext i16 %var to i64 store volatile i64 %uxt64, i64* @var64 -; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff +; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff ret void } @@ -60,7 +76,7 @@ define void @test_extendw(i32 %var) { %uxt64 = zext i32 %var to i64 store volatile i64 %uxt64, i64* @var64 -; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #32 +; CHECK: mov {{w[0-9]+}}, w0 ret void } diff --git a/test/CodeGen/AArch64/bitreverse.ll b/test/CodeGen/AArch64/bitreverse.ll new file mode 100644 index 000000000000..936e3554b397 --- /dev/null +++ b/test/CodeGen/AArch64/bitreverse.ll @@ -0,0 +1,87 @@ +; RUN: llc -mtriple=aarch64-eabi %s -o - | FileCheck %s + +; These tests just check that the plumbing is in place for @llvm.bitreverse. The +; actual output is massive at the moment as llvm.bitreverse is not yet legal. + +declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone + +define <2 x i16> @f(<2 x i16> %a) { +; CHECK-LABEL: f: +; CHECK: ushr + %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a) + ret <2 x i16> %b +} + +declare i8 @llvm.bitreverse.i8(i8) readnone + +; Unfortunately some of the shift-and-inserts become BFIs, and some do not :( +define i8 @g(i8 %a) { +; CHECK-LABEL: g: +; CHECK-DAG: lsr [[S5:w.*]], w0, #5 +; CHECK-DAG: lsr [[S4:w.*]], w0, #4 +; CHECK-DAG: lsr [[S3:w.*]], w0, #3 +; CHECK-DAG: lsr [[S2:w.*]], w0, #2 +; CHECK-DAG: lsl [[L1:w.*]], w0, #29 +; CHECK-DAG: lsl [[L2:w.*]], w0, #19 +; CHECK-DAG: lsl [[L3:w.*]], w0, #17 + +; CHECK-DAG: and [[T1:w.*]], [[L1]], #0x40000000 +; CHECK-DAG: bfi [[T1]], w0, #31, #1 +; CHECK-DAG: bfi [[T1]], [[S2]], #29, #1 +; CHECK-DAG: bfi [[T1]], [[S3]], #28, #1 +; CHECK-DAG: bfi [[T1]], [[S4]], #27, #1 +; CHECK-DAG: bfi [[T1]], [[S5]], #26, #1 +; CHECK-DAG: and [[T2:w.*]], [[L2]], #0x2000000 +; CHECK-DAG: and [[T3:w.*]], [[L3]], #0x1000000 +; CHECK-DAG: orr [[T4:w.*]], [[T1]], [[T2]] +; CHECK-DAG: orr [[T5:w.*]], [[T4]], [[T3]] +; CHECK: lsr w0, [[T5]], #24 + + %b = call i8 @llvm.bitreverse.i8(i8 %a) + ret i8 %b +} + +declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>) readnone + +define <8 x i8> @g_vec(<8 x i8> %a) { +; Try and match as much of the sequence as precisely as possible. + +; CHECK-LABEL: g_vec: +; CHECK-DAG: movi [[M1:v.*]], #0x80 +; CHECK-DAG: movi [[M2:v.*]], #0x40 +; CHECK-DAG: movi [[M3:v.*]], #0x20 +; CHECK-DAG: movi [[M4:v.*]], #0x10 +; CHECK-DAG: movi [[M5:v.*]], #0x8 +; CHECK-DAG: movi [[M6:v.*]], #0x4{{$}} +; CHECK-DAG: movi [[M7:v.*]], #0x2{{$}} +; CHECK-DAG: movi [[M8:v.*]], #0x1{{$}} +; CHECK-DAG: shl [[S1:v.*]], v0.8b, #7 +; CHECK-DAG: shl [[S2:v.*]], v0.8b, #5 +; CHECK-DAG: shl [[S3:v.*]], v0.8b, #3 +; CHECK-DAG: shl [[S4:v.*]], v0.8b, #1 +; CHECK-DAG: ushr [[S5:v.*]], v0.8b, #1 +; CHECK-DAG: ushr [[S6:v.*]], v0.8b, #3 +; CHECK-DAG: ushr [[S7:v.*]], v0.8b, #5 +; CHECK-DAG: ushr [[S8:v.*]], v0.8b, #7 +; CHECK-DAG: and [[A1:v.*]], [[S1]], [[M1]] +; CHECK-DAG: and [[A2:v.*]], [[S2]], [[M2]] +; CHECK-DAG: and [[A3:v.*]], [[S3]], [[M3]] +; CHECK-DAG: and [[A4:v.*]], [[S4]], [[M4]] +; CHECK-DAG: and [[A5:v.*]], [[S5]], [[M5]] +; CHECK-DAG: and [[A6:v.*]], [[S6]], [[M6]] +; CHECK-DAG: and [[A7:v.*]], [[S7]], [[M7]] +; CHECK-DAG: and [[A8:v.*]], [[S8]], [[M8]] + +; The rest can be ORRed together in any order; it's not worth the test +; maintenance to match them precisely. +; CHECK-DAG: orr +; CHECK-DAG: orr +; CHECK-DAG: orr +; CHECK-DAG: orr +; CHECK-DAG: orr +; CHECK-DAG: orr +; CHECK-DAG: orr +; CHECK: ret + %b = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a) + ret <8 x i8> %b +} diff --git a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll index c78fabac6187..004267f4e4e0 100644 --- a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -403,6 +403,32 @@ return: ; preds = %land.lhs.true, %con ret i32 %retval.0 } +define void @cmp_shifted(i32 %in, i32 %lhs, i32 %rhs) { +; CHECK-LABEL: cmp_shifted: +; CHECK: cmp w0, #1 +; [...] +; CHECK: cmp w0, #2, lsl #12 + + %tst_low = icmp sgt i32 %in, 0 + br i1 %tst_low, label %true, label %false + +true: + call i32 @zoo(i32 128) + ret void + +false: + %tst = icmp sgt i32 %in, 8191 + br i1 %tst, label %truer, label %falser + +truer: + call i32 @zoo(i32 42) + ret void + +falser: + call i32 @zoo(i32 1) + ret void +} + declare i32 @zoo(i32) declare double @yoo(i32) diff --git a/test/CodeGen/AArch64/cpus.ll b/test/CodeGen/AArch64/cpus.ll index 1266842fcc6d..a8399f92ebe4 100644 --- a/test/CodeGen/AArch64/cpus.ll +++ b/test/CodeGen/AArch64/cpus.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a35 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a57 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a72 2>&1 | FileCheck %s diff --git a/test/CodeGen/AArch64/cxx-tlscc.ll b/test/CodeGen/AArch64/cxx-tlscc.ll new file mode 100644 index 000000000000..a9ae00c8d270 --- /dev/null +++ b/test/CodeGen/AArch64/cxx-tlscc.ll @@ -0,0 +1,76 @@ +; RUN: llc < %s -mtriple=aarch64-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-apple-ios -enable-shrink-wrap=true | FileCheck --check-prefix=CHECK %s +; Shrink wrapping currently does not kick in because we have a TLS CALL +; in the entry block and it will clobber the link register. + +%struct.S = type { i8 } + +@sg = internal thread_local global %struct.S zeroinitializer, align 1 +@__dso_handle = external global i8 +@__tls_guard = internal thread_local unnamed_addr global i1 false + +declare %struct.S* @_ZN1SC1Ev(%struct.S* returned) +declare %struct.S* @_ZN1SD1Ev(%struct.S* returned) +declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*) + +define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind { + %.b.i = load i1, i1* @__tls_guard, align 1 + br i1 %.b.i, label %__tls_init.exit, label %init.i + +init.i: + store i1 true, i1* @__tls_guard, align 1 + %call.i.i = tail call %struct.S* @_ZN1SC1Ev(%struct.S* nonnull @sg) + %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (%struct.S* (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle) + br label %__tls_init.exit + +__tls_init.exit: + ret %struct.S* @sg +} + +; CHECK-LABEL: _ZTW2sg +; CHECK-NOT: stp d31, d30 +; CHECK-NOT: stp d29, d28 +; CHECK-NOT: stp d27, d26 +; CHECK-NOT: stp d25, d24 +; CHECK-NOT: stp d23, d22 +; CHECK-NOT: stp d21, d20 +; CHECK-NOT: stp d19, d18 +; CHECK-NOT: stp d17, d16 +; CHECK-NOT: stp d7, d6 +; CHECK-NOT: stp d5, d4 +; CHECK-NOT: stp d3, d2 +; CHECK-NOT: stp d1, d0 +; CHECK-NOT: stp x20, x19 +; CHECK-NOT: stp x14, x13 +; CHECK-NOT: stp x12, x11 +; CHECK-NOT: stp x10, x9 +; CHECK-NOT: stp x8, x7 +; CHECK-NOT: stp x6, x5 +; CHECK-NOT: stp x4, x3 +; CHECK-NOT: stp x2, x1 +; CHECK: blr +; CHECK: tbnz w{{.*}}, #0, [[BB_end:.?LBB0_[0-9]+]] +; CHECK: blr +; CHECK: tlv_atexit +; CHECK: [[BB_end]]: +; CHECK: blr +; CHECK-NOT: ldp x2, x1 +; CHECK-NOT: ldp x4, x3 +; CHECK-NOT: ldp x6, x5 +; CHECK-NOT: ldp x8, x7 +; CHECK-NOT: ldp x10, x9 +; CHECK-NOT: ldp x12, x11 +; CHECK-NOT: ldp x14, x13 +; CHECK-NOT: ldp x20, x19 +; CHECK-NOT: ldp d1, d0 +; CHECK-NOT: ldp d3, d2 +; CHECK-NOT: ldp d5, d4 +; CHECK-NOT: ldp d7, d6 +; CHECK-NOT: ldp d17, d16 +; CHECK-NOT: ldp d19, d18 +; CHECK-NOT: ldp d21, d20 +; CHECK-NOT: ldp d23, d22 +; CHECK-NOT: ldp d25, d24 +; CHECK-NOT: ldp d27, d26 +; CHECK-NOT: ldp d29, d28 +; CHECK-NOT: ldp d31, d30 diff --git a/test/CodeGen/AArch64/dag-combine-select.ll b/test/CodeGen/AArch64/dag-combine-select.ll new file mode 100644 index 000000000000..45b998d9136d --- /dev/null +++ b/test/CodeGen/AArch64/dag-combine-select.ll @@ -0,0 +1,47 @@ +; RUN: llc -disable-post-ra -o - %s | FileCheck %s +target triple = "arm64--" + +@out = internal global i32 0, align 4 + +; Ensure that we transform select(C0, x, select(C1, x, y)) towards +; select(C0 | C1, x, y) so we can use CMP;CCMP for the implementation. +; CHECK-LABEL: test0: +; CHECK: cmp w0, #7 +; CHECK: ccmp w1, #0, #0, ne +; CHECK: csel w0, w1, w2, gt +; CHECK: ret +define i32 @test0(i32 %v0, i32 %v1, i32 %v2) { + %cmp1 = icmp eq i32 %v0, 7 + %cmp2 = icmp sgt i32 %v1, 0 + %sel0 = select i1 %cmp1, i32 %v1, i32 %v2 + %sel1 = select i1 %cmp2, i32 %v1, i32 %sel0 + ret i32 %sel1 +} + +; Usually we keep select(C0 | C1, x, y) as is on aarch64 to create CMP;CCMP +; sequences. This case should be transformed to select(C0, select(C1, x, y), y) +; anyway to get CSE effects. +; CHECK-LABEL: test1: +; CHECK-NOT: ccmp +; CHECK: cmp w0, #7 +; CHECK: adrp x[[OUTNUM:[0-9]+]], out +; CHECK: csel w[[SEL0NUM:[0-9]+]], w1, w2, eq +; CHECK: cmp w[[SEL0NUM]], #13 +; CHECK: csel w[[SEL1NUM:[0-9]+]], w1, w2, lo +; CHECK: cmp w0, #42 +; CHECK: csel w[[SEL2NUM:[0-9]+]], w1, w[[SEL1NUM]], eq +; CHECK: str w[[SEL1NUM]], [x[[OUTNUM]], :lo12:out] +; CHECK: str w[[SEL2NUM]], [x[[OUTNUM]], :lo12:out] +; CHECK: ret +define void @test1(i32 %bitset, i32 %val0, i32 %val1) { + %cmp1 = icmp eq i32 %bitset, 7 + %cond = select i1 %cmp1, i32 %val0, i32 %val1 + %cmp5 = icmp ult i32 %cond, 13 + %cond11 = select i1 %cmp5, i32 %val0, i32 %val1 + %cmp3 = icmp eq i32 %bitset, 42 + %or.cond = or i1 %cmp3, %cmp5 + %cond17 = select i1 %or.cond, i32 %val0, i32 %val1 + store volatile i32 %cond11, i32* @out, align 4 + store volatile i32 %cond17, i32* @out, align 4 + ret void +} diff --git a/test/CodeGen/AArch64/divrem.ll b/test/CodeGen/AArch64/divrem.ll new file mode 100644 index 000000000000..9f648eb63eac --- /dev/null +++ b/test/CodeGen/AArch64/divrem.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mattr=+neon | FileCheck %s + +; SDIVREM/UDIVREM DAG nodes are generated but expanded when lowering and +; should not generate select error. +define <2 x i32> @test_udivrem(<2 x i32> %x, < 2 x i32> %y, < 2 x i32>* %z) { +; CHECK-LABEL: test_udivrem +; CHECK-DAG: udivrem +; CHECK-NOT: LLVM ERROR: Cannot select + %div = udiv <2 x i32> %x, %y + store <2 x i32> %div, <2 x i32>* %z + %1 = urem <2 x i32> %x, %y + ret <2 x i32> %1 +} + +define <4 x i32> @test_sdivrem(<4 x i32> %x, <4 x i32>* %y) { +; CHECK-LABEL: test_sdivrem +; CHECK-DAG: sdivrem + %div = sdiv <4 x i32> %x, < i32 20, i32 20, i32 20, i32 20 > + store <4 x i32> %div, <4 x i32>* %y + %1 = srem <4 x i32> %x, < i32 20, i32 20, i32 20, i32 20 > + ret <4 x i32> %1 +} diff --git a/test/CodeGen/AArch64/emutls.ll b/test/CodeGen/AArch64/emutls.ll new file mode 100644 index 000000000000..ac5762edba98 --- /dev/null +++ b/test/CodeGen/AArch64/emutls.ll @@ -0,0 +1,116 @@ +; RUN: llc -emulated-tls -mtriple=aarch64-linux-android \ +; RUN: -relocation-model=pic < %s | FileCheck -check-prefix=ARM64 %s + +; Copied from X86/emutls.ll + +; Use my_emutls_get_address like __emutls_get_address. +@my_emutls_v_xyz = external global i8*, align 4 +declare i8* @my_emutls_get_address(i8*) + +define i32 @my_get_xyz() { +; ARM64-LABEL: my_get_xyz: +; ARM64: adrp x0, :got:my_emutls_v_xyz +; ARM64-NEXT: ldr x0, [x0, :got_lo12:my_emutls_v_xyz] +; ARM64-NEXT: bl my_emutls_get_address +; ARM64-NEXT: ldr w0, [x0] +; ARM64-NEXT: ldp x29, x30, [sp] + +entry: + %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*)) + %0 = bitcast i8* %call to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +@i1 = thread_local global i32 15 +@i2 = external thread_local global i32 +@i3 = internal thread_local global i32 15 +@i4 = hidden thread_local global i32 15 +@i5 = external hidden thread_local global i32 +@s1 = thread_local global i16 15 +@b1 = thread_local global i8 0 + +define i32 @f1() { +; ARM64-LABEL: f1: +; ARM64: adrp x0, :got:__emutls_v.i1 +; ARM64-NEXT: ldr x0, [x0, :got_lo12:__emutls_v.i1] +; ARM64-NEXT: bl __emutls_get_address +; ARM64-NEXT: ldr w0, [x0] +; ARM64-NEXT: ldp x29, x30, [sp] + +entry: + %tmp1 = load i32, i32* @i1 + ret i32 %tmp1 +} + +define i32* @f2() { +; ARM64-LABEL: f2: +; ARM64: adrp x0, :got:__emutls_v.i1 +; ARM64-NEXT: ldr x0, [x0, :got_lo12:__emutls_v.i1] +; ARM64-NEXT: bl __emutls_get_address +; ARM64-NEXT: ldp x29, x30, [sp] + +entry: + ret i32* @i1 +} + +;;;;;;;;;;;;;; 64-bit __emutls_v. and __emutls_t. + +; ARM64 .section .data.rel.local, +; ARM64-LABEL: __emutls_v.i1: +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 0 +; ARM64-NEXT: .xword __emutls_t.i1 + +; ARM64 .section .rodata, +; ARM64-LABEL: __emutls_t.i1: +; ARM64-NEXT: .word 15 + +; ARM64-NOT: __emutls_v.i2 + +; ARM64 .section .data.rel.local, +; ARM64-LABEL: __emutls_v.i3: +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 0 +; ARM64-NEXT: .xword __emutls_t.i3 + +; ARM64 .section .rodata, +; ARM64-LABEL: __emutls_t.i3: +; ARM64-NEXT: .word 15 + +; ARM64 .section .data.rel.local, +; ARM64-LABEL: __emutls_v.i4: +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 4 +; ARM64-NEXT: .xword 0 +; ARM64-NEXT: .xword __emutls_t.i4 + +; ARM64 .section .rodata, +; ARM64-LABEL: __emutls_t.i4: +; ARM64-NEXT: .word 15 + +; ARM64-NOT: __emutls_v.i5: +; ARM64 .hidden __emutls_v.i5 +; ARM64-NOT: __emutls_v.i5: + +; ARM64 .section .data.rel.local, +; ARM64-LABEL: __emutls_v.s1: +; ARM64-NEXT: .xword 2 +; ARM64-NEXT: .xword 2 +; ARM64-NEXT: .xword 0 +; ARM64-NEXT: .xword __emutls_t.s1 + +; ARM64 .section .rodata, +; ARM64-LABEL: __emutls_t.s1: +; ARM64-NEXT: .hword 15 + +; ARM64 .section .data.rel.local, +; ARM64-LABEL: __emutls_v.b1: +; ARM64-NEXT: .xword 1 +; ARM64-NEXT: .xword 1 +; ARM64-NEXT: .xword 0 +; ARM64-NEXT: .xword 0 + +; ARM64-NOT: __emutls_t.b1 diff --git a/test/CodeGen/AArch64/emutls_generic.ll b/test/CodeGen/AArch64/emutls_generic.ll new file mode 100644 index 000000000000..7664db3df8d2 --- /dev/null +++ b/test/CodeGen/AArch64/emutls_generic.ll @@ -0,0 +1,59 @@ +; RUN: llc < %s -emulated-tls -mtriple=aarch64-linux-android -relocation-model=pic \ +; RUN: | FileCheck -check-prefix=ARM_64 %s +; RUN: llc < %s -emulated-tls -mtriple=aarch64-linux-android -relocation-model=pic -O3 \ +; RUN: | FileCheck -check-prefix=ARM_64 %s +; RUN: llc < %s -emulated-tls -mtriple=aarch64-linux-android -O3 \ +; RUN: | FileCheck -check-prefix=ARM_64 %s + +; Make sure that TLS symbols are emitted in expected order. + +@external_x = external thread_local global i32, align 8 +@external_y = thread_local global i8 7, align 2 +@internal_y = internal thread_local global i64 9, align 16 + +define i32* @get_external_x() { +entry: + ret i32* @external_x +} + +define i8* @get_external_y() { +entry: + ret i8* @external_y +} + +define i64* @get_internal_y() { +entry: + ret i64* @internal_y +} + +; ARM_64-LABEL: get_external_x: +; ARM_64: __emutls_v.external_x +; ARM_64: __emutls_get_address +; ARM_64-LABEL: get_external_y: +; ARM_64: __emutls_v.external_y +; ARM_64: __emutls_get_address +; ARM_64-LABEL: get_internal_y: +; ARM_64: __emutls_v.internal_y +; ARM_64: __emutls_get_address +; ARM_64-NOT: __emutls_t.external_x +; ARM_64-NOT: __emutls_v.external_x: +; ARM_64: .align 3 +; ARM_64-LABEL: __emutls_v.external_y: +; ARM_64-NEXT: .xword 1 +; ARM_64-NEXT: .xword 2 +; ARM_64-NEXT: .xword 0 +; ARM_64-NEXT: .xword __emutls_t.external_y +; ARM_64-NOT: __emutls_v.external_x: +; ARM_64: .section .rodata, +; ARM_64-LABEL: __emutls_t.external_y: +; ARM_64-NEXT: .byte 7 +; ARM_64: .data +; ARM_64: .align 3 +; ARM_64-LABEL: __emutls_v.internal_y: +; ARM_64-NEXT: .xword 8 +; ARM_64-NEXT: .xword 16 +; ARM_64-NEXT: .xword 0 +; ARM_64-NEXT: .xword __emutls_t.internal_y +; ARM_64: .section .rodata, +; ARM_64-LABEL: __emutls_t.internal_y: +; ARM_64-NEXT: .xword 9 diff --git a/test/CodeGen/AArch64/eon.ll b/test/CodeGen/AArch64/eon.ll new file mode 100644 index 000000000000..ea61ce34c050 --- /dev/null +++ b/test/CodeGen/AArch64/eon.ll @@ -0,0 +1,29 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +; Check that the eon instruction is generated instead of eor,movn +define i64 @test1(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: test1: +; CHECK: eon +; CHECK: ret +entry: + %shl = shl i64 %b, 4 + %neg = xor i64 %a, -1 + %xor = xor i64 %shl, %neg + ret i64 %xor +} + +; Same check with mutliple uses of %neg +define i64 @test2(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: test2: +; CHECK: eon +; CHECK: eon +; CHECK: lsl +; CHECK: ret +entry: + %shl = shl i64 %b, 4 + %neg = xor i64 %shl, -1 + %xor = xor i64 %neg, %a + %xor1 = xor i64 %c, %neg + %shl2 = shl i64 %xor, %xor1 + ret i64 %shl2 +} diff --git a/test/CodeGen/AArch64/f16-instructions.ll b/test/CodeGen/AArch64/f16-instructions.ll index be5e2e51385d..e8ecb13b3564 100644 --- a/test/CodeGen/AArch64/f16-instructions.ll +++ b/test/CodeGen/AArch64/f16-instructions.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s +; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false -disable-post-ra | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -143,6 +143,33 @@ define half @test_select_cc(half %a, half %b, half %c, half %d) #0 { ret half %r } +; CHECK-LABEL: test_select_cc_f32_f16: +; CHECK-DAG: fcvt s2, h2 +; CHECK-DAG: fcvt s3, h3 +; CHECK-NEXT: fcmp s2, s3 +; CHECK-NEXT: fcsel s0, s0, s1, ne +; CHECK-NEXT: ret +define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 { + %cc = fcmp une half %c, %d + %r = select i1 %cc, float %a, float %b + ret float %r +} + +; CHECK-LABEL: test_select_cc_f16_f32: +; CHECK-DAG: fcvt s0, h0 +; CHECK-DAG: fcvt s1, h1 +; CHECK-DAG: fcmp s2, s3 +; CHECK-DAG: cset w8, ne +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: fcsel s0, s0, s1, ne +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ret +define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 { + %cc = fcmp une float %c, %d + %r = select i1 %cc, half %a, half %b + ret half %r +} + ; CHECK-LABEL: test_fcmp_une: ; CHECK-NEXT: fcvt s1, h1 ; CHECK-NEXT: fcvt s0, h0 @@ -644,13 +671,10 @@ define half @test_fabs(half %a) #0 { } ; CHECK-LABEL: test_minnum: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: bl {{_?}}fminf +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fminnm s0, s0, s1 ; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: ldp x29, x30, [sp], #16 ; CHECK-NEXT: ret define half @test_minnum(half %a, half %b) #0 { %r = call half @llvm.minnum.f16(half %a, half %b) @@ -658,13 +682,10 @@ define half @test_minnum(half %a, half %b) #0 { } ; CHECK-LABEL: test_maxnum: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: bl {{_?}}fmaxf +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fmaxnm s0, s0, s1 ; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: ldp x29, x30, [sp], #16 ; CHECK-NEXT: ret define half @test_maxnum(half %a, half %b) #0 { %r = call half @llvm.maxnum.f16(half %a, half %b) @@ -683,11 +704,50 @@ define half @test_copysign(half %a, half %b) #0 { ret half %r } -; CHECK-LABEL: test_floor: -; CHECK-NEXT: fcvt s1, h0 -; CHECK-NEXT: frintm s0, s1 +; CHECK-LABEL: test_copysign_f32: +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: movi.4s v2, #0x80, lsl #24 +; CHECK-NEXT: bit.16b v0, v1, v2 ; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: ret +define half @test_copysign_f32(half %a, float %b) #0 { + %tb = fptrunc float %b to half + %r = call half @llvm.copysign.f16(half %a, half %tb) + ret half %r +} + +; CHECK-LABEL: test_copysign_f64: +; CHECK-NEXT: fcvt s1, d1 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: movi.4s v2, #0x80, lsl #24 +; CHECK-NEXT: bit.16b v0, v1, v2 +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ret +define half @test_copysign_f64(half %a, double %b) #0 { + %tb = fptrunc double %b to half + %r = call half @llvm.copysign.f16(half %a, half %tb) + ret half %r +} + +; Check that the FP promotion will use a truncating FP_ROUND, so we can fold +; away the (fpext (fp_round )) here. + +; CHECK-LABEL: test_copysign_extended: +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: movi.4s v2, #0x80, lsl #24 +; CHECK-NEXT: bit.16b v0, v1, v2 +; CHECK-NEXT: ret +define float @test_copysign_extended(half %a, half %b) #0 { + %r = call half @llvm.copysign.f16(half %a, half %b) + %xr = fpext half %r to float + ret float %xr +} + +; CHECK-LABEL: test_floor: +; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 +; CHECK-NEXT: frintm [[INT32:s[0-9]+]], [[FLOAT32]] +; CHECK-NEXT: fcvt h0, [[INT32]] ; CHECK-NEXT: ret define half @test_floor(half %a) #0 { %r = call half @llvm.floor.f16(half %a) @@ -695,10 +755,9 @@ define half @test_floor(half %a) #0 { } ; CHECK-LABEL: test_ceil: -; CHECK-NEXT: fcvt s1, h0 -; CHECK-NEXT: frintp s0, s1 -; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 +; CHECK-NEXT: frintp [[INT32:s[0-9]+]], [[FLOAT32]] +; CHECK-NEXT: fcvt h0, [[INT32]] ; CHECK-NEXT: ret define half @test_ceil(half %a) #0 { %r = call half @llvm.ceil.f16(half %a) @@ -706,10 +765,9 @@ define half @test_ceil(half %a) #0 { } ; CHECK-LABEL: test_trunc: -; CHECK-NEXT: fcvt s1, h0 -; CHECK-NEXT: frintz s0, s1 -; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 +; CHECK-NEXT: frintz [[INT32:s[0-9]+]], [[FLOAT32]] +; CHECK-NEXT: fcvt h0, [[INT32]] ; CHECK-NEXT: ret define half @test_trunc(half %a) #0 { %r = call half @llvm.trunc.f16(half %a) @@ -737,10 +795,9 @@ define half @test_nearbyint(half %a) #0 { } ; CHECK-LABEL: test_round: -; CHECK-NEXT: fcvt s1, h0 -; CHECK-NEXT: frinta s0, s1 -; CHECK-NEXT: fcvt h0, s0 -; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 +; CHECK-NEXT: frinta [[INT32:s[0-9]+]], [[FLOAT32]] +; CHECK-NEXT: fcvt h0, [[INT32]] ; CHECK-NEXT: ret define half @test_round(half %a) #0 { %r = call half @llvm.round.f16(half %a) diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll new file mode 100644 index 000000000000..55fbf63319ee --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=aarch64-apple-darwin -O0 -fast-isel -fast-isel-abort=0 -verify-machineinstrs < %s | FileCheck %s + +define void @test(i64 %a, i64 %b, i2* %c) { +; CHECK-LABEL: test +; CHECK: and [[REG1:w[0-9]+]], w8, #0x3 +; CHECK-NEXT: strb [[REG1]], {{\[}}x2{{\]}} +; CHECK-NEXT: tbz w9, #0, + %1 = trunc i64 %a to i2 + %2 = trunc i64 %b to i1 +; Force fast-isel to fall back to SDAG. + store i2 %1, i2* %c, align 8 + br i1 %2, label %bb1, label %bb2 + +bb1: + ret void + +bb2: + ret void +} diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll index da6ddbf5101e..e04a62b85c8e 100644 --- a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll +++ b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s -; CHECK-label: test_or +; CHECK-LABEL: test_or ; CHECK: cbnz w0, {{LBB[0-9]+_2}} ; CHECK: cbz w1, {{LBB[0-9]+_1}} define i64 @test_or(i32 %a, i32 %b) { @@ -18,7 +18,7 @@ bb4: ret i64 %2 } -; CHECK-label: test_ans +; CHECK-LABEL: test_and ; CHECK: cbz w0, {{LBB[0-9]+_2}} ; CHECK: cbnz w1, {{LBB[0-9]+_3}} define i64 @test_and(i32 %a, i32 %b) { @@ -36,7 +36,55 @@ bb4: ret i64 %2 } +; If the branch is unpredictable, don't add another branch. + +; CHECK-LABEL: test_or_unpredictable +; CHECK: cmp w0, #0 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: cset w9, eq +; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: tbnz w8, #0, +define i64 @test_or_unpredictable(i32 %a, i32 %b) { +bb1: + %0 = icmp eq i32 %a, 0 + %1 = icmp eq i32 %b, 0 + %or.cond = or i1 %0, %1 + br i1 %or.cond, label %bb3, label %bb4, !unpredictable !2 + +bb3: + ret i64 0 + +bb4: + %2 = call i64 @bar() + ret i64 %2 +} + +; CHECK-LABEL: test_and_unpredictable +; CHECK: cmp w0, #0 +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: cmp w1, #0 +; CHECK-NEXT: cset w9, ne +; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: tbz w8, #0, +define i64 @test_and_unpredictable(i32 %a, i32 %b) { +bb1: + %0 = icmp ne i32 %a, 0 + %1 = icmp ne i32 %b, 0 + %or.cond = and i1 %0, %1 + br i1 %or.cond, label %bb4, label %bb3, !unpredictable !2 + +bb3: + ret i64 0 + +bb4: + %2 = call i64 @bar() + ret i64 %2 +} + declare i64 @bar() !0 = !{!"branch_weights", i32 5128, i32 32} !1 = !{!"branch_weights", i32 1024, i32 4136} +!2 = !{} + diff --git a/test/CodeGen/AArch64/fast-isel-cmp-vec.ll b/test/CodeGen/AArch64/fast-isel-cmp-vec.ll new file mode 100644 index 000000000000..2855419a1ca0 --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-cmp-vec.ll @@ -0,0 +1,100 @@ +; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -verify-machineinstrs \ +; RUN: -aarch64-atomic-cfg-tidy=0 -disable-cgp -disable-branch-fold \ +; RUN: < %s | FileCheck %s + +; +; Verify that we don't mess up vector comparisons in fast-isel. +; + +define <2 x i32> @icmp_v2i32(<2 x i32> %a) { +; CHECK-LABEL: icmp_v2i32: +; CHECK: ; BB#0: +; CHECK-NEXT: cmeq.2s [[CMP:v[0-9]+]], v0, #0 +; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: movi.2s [[MASK:v[0-9]+]], #0x1 +; CHECK-NEXT: and.8b v0, [[CMP]], [[MASK]] +; CHECK-NEXT: ret + %c = icmp eq <2 x i32> %a, zeroinitializer + br label %bb2 +bb2: + %z = zext <2 x i1> %c to <2 x i32> + ret <2 x i32> %z +} + +define <2 x i32> @icmp_constfold_v2i32(<2 x i32> %a) { +; CHECK-LABEL: icmp_constfold_v2i32: +; CHECK: ; BB#0: +; CHECK-NEXT: movi d[[CMP:[0-9]+]], #0xffffffffffffffff +; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: movi.2s [[MASK:v[0-9]+]], #0x1 +; CHECK-NEXT: and.8b v0, v[[CMP]], [[MASK]] +; CHECK-NEXT: ret + %1 = icmp eq <2 x i32> %a, %a + br label %bb2 +bb2: + %2 = zext <2 x i1> %1 to <2 x i32> + ret <2 x i32> %2 +} + +define <4 x i32> @icmp_v4i32(<4 x i32> %a) { +; CHECK-LABEL: icmp_v4i32: +; CHECK: ; BB#0: +; CHECK-NEXT: cmeq.4s [[CMP:v[0-9]+]], v0, #0 +; CHECK-NEXT: xtn.4h [[CMPV4I16:v[0-9]+]], [[CMP]] +; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: movi.4h [[MASK:v[0-9]+]], #0x1 +; CHECK-NEXT: and.8b [[ZEXT:v[0-9]+]], [[CMPV4I16]], [[MASK]] +; CHECK-NEXT: ushll.4s v0, [[ZEXT]], #0 +; CHECK-NEXT: ret + %c = icmp eq <4 x i32> %a, zeroinitializer + br label %bb2 +bb2: + %z = zext <4 x i1> %c to <4 x i32> + ret <4 x i32> %z +} + +define <4 x i32> @icmp_constfold_v4i32(<4 x i32> %a) { +; CHECK-LABEL: icmp_constfold_v4i32: +; CHECK: ; BB#0: +; CHECK-NEXT: movi d[[CMP:[0-9]+]], #0xffffffffffffffff +; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: movi.4h [[MASK:v[0-9]+]], #0x1 +; CHECK-NEXT: and.8b [[ZEXT:v[0-9]+]], v[[CMP]], [[MASK]] +; CHECK-NEXT: ushll.4s v0, [[ZEXT]], #0 +; CHECK-NEXT: ret + %1 = icmp eq <4 x i32> %a, %a + br label %bb2 +bb2: + %2 = zext <4 x i1> %1 to <4 x i32> + ret <4 x i32> %2 +} + +define <16 x i8> @icmp_v16i8(<16 x i8> %a) { +; CHECK-LABEL: icmp_v16i8: +; CHECK: ; BB#0: +; CHECK-NEXT: cmeq.16b [[CMP:v[0-9]+]], v0, #0 +; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: movi.16b [[MASK:v[0-9]+]], #0x1 +; CHECK-NEXT: and.16b v0, [[CMP]], [[MASK]] +; CHECK-NEXT: ret + %c = icmp eq <16 x i8> %a, zeroinitializer + br label %bb2 +bb2: + %z = zext <16 x i1> %c to <16 x i8> + ret <16 x i8> %z +} + +define <16 x i8> @icmp_constfold_v16i8(<16 x i8> %a) { +; CHECK-LABEL: icmp_constfold_v16i8: +; CHECK: ; BB#0: +; CHECK-NEXT: movi.2d [[CMP:v[0-9]+]], #0xffffffffffffffff +; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: movi.16b [[MASK:v[0-9]+]], #0x1 +; CHECK-NEXT: and.16b v0, [[CMP]], [[MASK]] +; CHECK-NEXT: ret + %1 = icmp eq <16 x i8> %a, %a + br label %bb2 +bb2: + %2 = zext <16 x i1> %1 to <16 x i8> + ret <16 x i8> %2 +} diff --git a/test/CodeGen/AArch64/fast-isel-folded-shift.ll b/test/CodeGen/AArch64/fast-isel-folded-shift.ll new file mode 100644 index 000000000000..b881ef5c6d52 --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-folded-shift.ll @@ -0,0 +1,125 @@ +; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=1 -verify-machineinstrs < %s | FileCheck %s + +; Test invalid shift values. This will fall-back to SDAG. +; AND +define zeroext i8 @and_rs_i8(i8 signext %a, i8 signext %b) { +; CHECK-LABEL: and_rs_i8 +; CHECK: and [[REG:w[0-9]+]], w0, w8 +; CHECK-NEXT: and {{w[0-9]+}}, [[REG]], #0xff + %1 = shl i8 %b, 8 + %2 = and i8 %a, %1 + ret i8 %2 +} + +define zeroext i16 @and_rs_i16(i16 signext %a, i16 signext %b) { +; CHECK-LABEL: and_rs_i16 +; CHECK: and [[REG:w[0-9]+]], w0, w8 +; CHECK-NEXT: and {{w[0-9]+}}, [[REG]], #0xffff + %1 = shl i16 %b, 16 + %2 = and i16 %a, %1 + ret i16 %2 +} + +define i32 @and_rs_i32(i32 %a, i32 %b) { +; CHECK-LABEL: and_rs_i32 +; CHECK: and w0, w0, w8 + %1 = shl i32 %b, 32 + %2 = and i32 %a, %1 + ret i32 %2 +} + +define i64 @and_rs_i64(i64 %a, i64 %b) { +; CHECK-LABEL: and_rs_i64 +; CHECK: and x0, x0, x8 + %1 = shl i64 %b, 64 + %2 = and i64 %a, %1 + ret i64 %2 +} + +; OR +define zeroext i8 @or_rs_i8(i8 signext %a, i8 signext %b) { +; CHECK-LABEL: or_rs_i8 +; CHECK: orr [[REG:w[0-9]+]], w0, w8 +; CHECK-NEXT: and {{w[0-9]+}}, [[REG]], #0xff + %1 = shl i8 %b, 8 + %2 = or i8 %a, %1 + ret i8 %2 +} + +define zeroext i16 @or_rs_i16(i16 signext %a, i16 signext %b) { +; CHECK-LABEL: or_rs_i16 +; CHECK: orr [[REG:w[0-9]+]], w0, w8 +; CHECK-NEXT: and {{w[0-9]+}}, [[REG]], #0xffff + %1 = shl i16 %b, 16 + %2 = or i16 %a, %1 + ret i16 %2 +} + +define i32 @or_rs_i32(i32 %a, i32 %b) { +; CHECK-LABEL: or_rs_i32 +; CHECK: orr w0, w0, w8 + %1 = shl i32 %b, 32 + %2 = or i32 %a, %1 + ret i32 %2 +} + +define i64 @or_rs_i64(i64 %a, i64 %b) { +; CHECK-LABEL: or_rs_i64 +; CHECK: orr x0, x0, x8 + %1 = shl i64 %b, 64 + %2 = or i64 %a, %1 + ret i64 %2 +} + +; XOR +define zeroext i8 @xor_rs_i8(i8 %a, i8 %b) { +; CHECK-LABEL: xor_rs_i8 +; CHECK: eor [[REG:w[0-9]+]], w0, w8 +; CHECK-NEXT: and {{w[0-9]+}}, [[REG]], #0xff + %1 = shl i8 %b, 8 + %2 = xor i8 %a, %1 + ret i8 %2 +} + +define zeroext i16 @xor_rs_i16(i16 %a, i16 %b) { +; CHECK-LABEL: xor_rs_i16 +; CHECK: eor [[REG:w[0-9]+]], w0, w8 +; CHECK-NEXT: and {{w[0-9]+}}, [[REG]], #0xffff + %1 = shl i16 %b, 16 + %2 = xor i16 %a, %1 + ret i16 %2 +} + +define i32 @xor_rs_i32(i32 %a, i32 %b) { +; CHECK-LABEL: xor_rs_i32 +; CHECK: eor w0, w0, w8 + %1 = shl i32 %b, 32 + %2 = xor i32 %a, %1 + ret i32 %2 +} + +define i64 @xor_rs_i64(i64 %a, i64 %b) { +; CHECK-LABEL: xor_rs_i64 +; CHECK: eor x0, x0, x8 + %1 = shl i64 %b, 64 + %2 = xor i64 %a, %1 + ret i64 %2 +} + +;ADD +define i32 @add_rs_i32(i32 %a, i32 %b) { +; CHECK-LABEL: add_rs_i32 +; CHECK: add w0, w0, w8 + %1 = shl i32 %b, 32 + %2 = add i32 %a, %1 + ret i32 %2 +} + +define i64 @add_rs_i64(i64 %a, i64 %b) { +; CHECK-LABEL: add_rs_i64 +; CHECK: add x0, x0, x8 + %1 = shl i64 %b, 64 + %2 = add i64 %a, %1 + ret i64 %2 +} + diff --git a/test/CodeGen/AArch64/fast-isel-logic-op.ll b/test/CodeGen/AArch64/fast-isel-logic-op.ll index 89c5f2c48024..16d0429fe98d 100644 --- a/test/CodeGen/AArch64/fast-isel-logic-op.ll +++ b/test/CodeGen/AArch64/fast-isel-logic-op.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=0 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=0 -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=1 -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s ; AND diff --git a/test/CodeGen/AArch64/fastcc-reserved.ll b/test/CodeGen/AArch64/fastcc-reserved.ll index a392619a768d..b5e03f08280f 100644 --- a/test/CodeGen/AArch64/fastcc-reserved.ll +++ b/test/CodeGen/AArch64/fastcc-reserved.ll @@ -16,7 +16,7 @@ define fastcc void @foo(i32 %in) { ; CHECK: mov x29, sp ; Reserve space for call-frame: -; CHECK: sub sp, sp, #16 +; CHECK: str w{{[0-9]+}}, [sp, #-16]! call fastcc void @will_pop([8 x i32] undef, i32 42) ; CHECK: bl will_pop @@ -42,7 +42,7 @@ define void @foo1(i32 %in) { ; CHECK: mov x29, sp ; Reserve space for call-frame -; CHECK: sub sp, sp, #16 +; CHECK: str w{{[0-9]+}}, [sp, #-16]! call void @wont_pop([8 x i32] undef, i32 42) ; CHECK: bl wont_pop diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll index 9917fcd044fd..f021eb232618 100644 --- a/test/CodeGen/AArch64/fastcc.ll +++ b/test/CodeGen/AArch64/fastcc.ll @@ -7,12 +7,12 @@ define fastcc void @func_stack0() { ; CHECK-LABEL: func_stack0: ; CHECK: mov x29, sp -; CHECK-NEXT: sub sp, sp, #32 +; CHECK: str w{{[0-9]+}}, [sp, #-32]! ; CHECK-TAIL-LABEL: func_stack0: ; CHECK-TAIL: stp x29, x30, [sp, #-16]! ; CHECK-TAIL-NEXT: mov x29, sp -; CHECK-TAIL-NEXT: sub sp, sp, #32 +; CHECK-TAIL: str w{{[0-9]+}}, [sp, #-32]! call fastcc void @func_stack8([8 x i32] undef, i32 42) @@ -55,13 +55,13 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) { ; CHECK-LABEL: func_stack8: ; CHECK: stp x29, x30, [sp, #-16]! ; CHECK: mov x29, sp -; CHECK: sub sp, sp, #32 +; CHECK: str w{{[0-9]+}}, [sp, #-32]! ; CHECK-TAIL-LABEL: func_stack8: ; CHECK-TAIL: stp x29, x30, [sp, #-16]! ; CHECK-TAIL: mov x29, sp -; CHECK-TAIL: sub sp, sp, #32 +; CHECK-TAIL: str w{{[0-9]+}}, [sp, #-32]! call fastcc void @func_stack8([8 x i32] undef, i32 42) diff --git a/test/CodeGen/AArch64/fcvt_combine.ll b/test/CodeGen/AArch64/fcvt_combine.ll new file mode 100644 index 000000000000..093ce4a4cd85 --- /dev/null +++ b/test/CodeGen/AArch64/fcvt_combine.ll @@ -0,0 +1,154 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s + +; CHECK-LABEL: test1 +; CHECK-NOT: fmul.2s +; CHECK: fcvtzs.2s v0, v0, #4 +; CHECK: ret +define <2 x i32> @test1(<2 x float> %f) { + %mul.i = fmul <2 x float> %f, + %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> + ret <2 x i32> %vcvt.i +} + +; CHECK-LABEL: test2 +; CHECK-NOT: fmul.4s +; CHECK: fcvtzs.4s v0, v0, #3 +; CHECK: ret +define <4 x i32> @test2(<4 x float> %f) { + %mul.i = fmul <4 x float> %f, + %vcvt.i = fptosi <4 x float> %mul.i to <4 x i32> + ret <4 x i32> %vcvt.i +} + +; CHECK-LABEL: test3 +; CHECK-NOT: fmul.2d +; CHECK: fcvtzs.2d v0, v0, #5 +; CHECK: ret +define <2 x i64> @test3(<2 x double> %d) { + %mul.i = fmul <2 x double> %d, + %vcvt.i = fptosi <2 x double> %mul.i to <2 x i64> + ret <2 x i64> %vcvt.i +} + +; Truncate double to i32 +; CHECK-LABEL: test4 +; CHECK-NOT: fmul.2d v0, v0, #4 +; CHECK: fcvtzs.2d v0, v0 +; CHECK: xtn.2s +; CHECK: ret +define <2 x i32> @test4(<2 x double> %d) { + %mul.i = fmul <2 x double> %d, + %vcvt.i = fptosi <2 x double> %mul.i to <2 x i32> + ret <2 x i32> %vcvt.i +} + +; Truncate float to i16 +; CHECK-LABEL: test5 +; CHECK-NOT: fmul.2s +; CHECK: fcvtzs.2s v0, v0, #4 +; CHECK: ret +define <2 x i16> @test5(<2 x float> %f) { + %mul.i = fmul <2 x float> %f, + %vcvt.i = fptosi <2 x float> %mul.i to <2 x i16> + ret <2 x i16> %vcvt.i +} + +; Don't convert float to i64 +; CHECK-LABEL: test6 +; CHECK: fmov.2s v1, #16.00000000 +; CHECK: fmul.2s v0, v0, v1 +; CHECK: fcvtl v0.2d, v0.2s +; CHECK: fcvtzs.2d v0, v0 +; CHECK: ret +define <2 x i64> @test6(<2 x float> %f) { + %mul.i = fmul <2 x float> %f, + %vcvt.i = fptosi <2 x float> %mul.i to <2 x i64> + ret <2 x i64> %vcvt.i +} + +; Check unsigned conversion. +; CHECK-LABEL: test7 +; CHECK-NOT: fmul.2s +; CHECK: fcvtzu.2s v0, v0, #4 +; CHECK: ret +define <2 x i32> @test7(<2 x float> %f) { + %mul.i = fmul <2 x float> %f, + %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> + ret <2 x i32> %vcvt.i +} + +; Test which should not fold due to non-power of 2. +; CHECK-LABEL: test8 +; CHECK: fmov.2s v1, #17.00000000 +; CHECK: fmul.2s v0, v0, v1 +; CHECK: fcvtzu.2s v0, v0 +; CHECK: ret +define <2 x i32> @test8(<2 x float> %f) { + %mul.i = fmul <2 x float> %f, + %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> + ret <2 x i32> %vcvt.i +} + +; Test which should not fold due to non-matching power of 2. +; CHECK-LABEL: test9 +; CHECK: fmul.2s v0, v0, v1 +; CHECK: fcvtzu.2s v0, v0 +; CHECK: ret +define <2 x i32> @test9(<2 x float> %f) { + %mul.i = fmul <2 x float> %f, + %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> + ret <2 x i32> %vcvt.i +} + +; Don't combine all undefs. +; CHECK-LABEL: test10 +; CHECK: fmul.2s v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; CHECK: fcvtzu.2s v{{[0-9]+}}, v{{[0-9]+}} +; CHECK: ret +define <2 x i32> @test10(<2 x float> %f) { + %mul.i = fmul <2 x float> %f, + %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> + ret <2 x i32> %vcvt.i +} + +; Combine if mix of undef and pow2. +; CHECK-LABEL: test11 +; CHECK: fcvtzu.2s v0, v0, #3 +; CHECK: ret +define <2 x i32> @test11(<2 x float> %f) { + %mul.i = fmul <2 x float> %f, + %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> + ret <2 x i32> %vcvt.i +} + +; Don't combine when multiplied by 0.0. +; CHECK-LABEL: test12 +; CHECK: fmul.2s v0, v0, v1 +; CHECK: fcvtzs.2s v0, v0 +; CHECK: ret +define <2 x i32> @test12(<2 x float> %f) { + %mul.i = fmul <2 x float> %f, + %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> + ret <2 x i32> %vcvt.i +} + +; Test which should not fold due to power of 2 out of range (i.e., 2^33). +; CHECK-LABEL: test13 +; CHECK: fmul.2s v0, v0, v1 +; CHECK: fcvtzs.2s v0, v0 +; CHECK: ret +define <2 x i32> @test13(<2 x float> %f) { + %mul.i = fmul <2 x float> %f, + %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> + ret <2 x i32> %vcvt.i +} + +; Test case where const is max power of 2 (i.e., 2^32). +; CHECK-LABEL: test14 +; CHECK: fcvtzs.2s v0, v0, #32 +; CHECK: ret +define <2 x i32> @test14(<2 x float> %f) { + %mul.i = fmul <2 x float> %f, + %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32> + ret <2 x i32> %vcvt.i +} diff --git a/test/CodeGen/AArch64/fdiv_combine.ll b/test/CodeGen/AArch64/fdiv_combine.ll new file mode 100644 index 000000000000..6f38a267ec3f --- /dev/null +++ b/test/CodeGen/AArch64/fdiv_combine.ll @@ -0,0 +1,115 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s + +; Test signed conversion. +; CHECK-LABEL: @test1 +; CHECK: scvtf.2s v0, v0, #4 +; CHECK: ret +define <2 x float> @test1(<2 x i32> %in) { +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +; Test unsigned conversion. +; CHECK-LABEL: @test2 +; CHECK: ucvtf.2s v0, v0, #3 +; CHECK: ret +define <2 x float> @test2(<2 x i32> %in) { +entry: + %vcvt.i = uitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +; Test which should not fold due to non-power of 2. +; CHECK-LABEL: @test3 +; CHECK: scvtf.2s v0, v0 +; CHECK: fmov.2s v1, #9.00000000 +; CHECK: fdiv.2s v0, v0, v1 +; CHECK: ret +define <2 x float> @test3(<2 x i32> %in) { +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +; Test which should not fold due to power of 2 out of range. +; CHECK-LABEL: @test4 +; CHECK: scvtf.2s v0, v0 +; CHECK: movi.2s v1, #0x50, lsl #24 +; CHECK: fdiv.2s v0, v0, v1 +; CHECK: ret +define <2 x float> @test4(<2 x i32> %in) { +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +; Test case where const is max power of 2 (i.e., 2^32). +; CHECK-LABEL: @test5 +; CHECK: scvtf.2s v0, v0, #32 +; CHECK: ret +define <2 x float> @test5(<2 x i32> %in) { +entry: + %vcvt.i = sitofp <2 x i32> %in to <2 x float> + %div.i = fdiv <2 x float> %vcvt.i, + ret <2 x float> %div.i +} + +; Test quadword. +; CHECK-LABEL: @test6 +; CHECK: scvtf.4s v0, v0, #2 +; CHECK: ret +define <4 x float> @test6(<4 x i32> %in) { +entry: + %vcvt.i = sitofp <4 x i32> %in to <4 x float> + %div.i = fdiv <4 x float> %vcvt.i, + ret <4 x float> %div.i +} + +; Test unsigned i16 to float +; CHECK-LABEL: @test7 +; CHECK: ushll.4s v0, v0, #0 +; CHECK: ucvtf.4s v0, v0, #1 +; CHECK: ret +define <4 x float> @test7(<4 x i16> %in) { + %conv = uitofp <4 x i16> %in to <4 x float> + %shift = fdiv <4 x float> %conv, + ret <4 x float> %shift +} + +; Test signed i16 to float +; CHECK-LABEL: @test8 +; CHECK: sshll.4s v0, v0, #0 +; CHECK: scvtf.4s v0, v0, #2 +; CHECK: ret +define <4 x float> @test8(<4 x i16> %in) { + %conv = sitofp <4 x i16> %in to <4 x float> + %shift = fdiv <4 x float> %conv, + ret <4 x float> %shift +} + +; Can't convert i64 to float. +; CHECK-LABEL: @test9 +; CHECK: ucvtf.2d v0, v0 +; CHECK: fcvtn v0.2s, v0.2d +; CHECK: movi.2s v1, #0x40, lsl #24 +; CHECK: fdiv.2s v0, v0, v1 +; CHECK: ret +define <2 x float> @test9(<2 x i64> %in) { + %conv = uitofp <2 x i64> %in to <2 x float> + %shift = fdiv <2 x float> %conv, + ret <2 x float> %shift +} + +; CHECK-LABEL: @test10 +; CHECK: ucvtf.2d v0, v0, #1 +; CHECK: ret +define <2 x double> @test10(<2 x i64> %in) { + %conv = uitofp <2 x i64> %in to <2 x double> + %shift = fdiv <2 x double> %conv, + ret <2 x double> %shift +} diff --git a/test/CodeGen/AArch64/fold-constants.ll b/test/CodeGen/AArch64/fold-constants.ll index 2dd0d1245930..c0fec4d171cd 100644 --- a/test/CodeGen/AArch64/fold-constants.ll +++ b/test/CodeGen/AArch64/fold-constants.ll @@ -3,9 +3,6 @@ define i64 @dotests_616() { ; CHECK-LABEL: dotests_616 ; CHECK: movi d0, #0000000000000000 -; CHECK-NEXT: umov w8, v0.b[2] -; CHECK-NEXT: sbfx w8, w8, #0, #1 -; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret entry: @@ -19,3 +16,19 @@ entry: %vget_lane = extractelement <1 x i64> %4, i32 0 ret i64 %vget_lane } + +; PR25763 - folding constant vector comparisons with sign-extended result +define <8 x i16> @dotests_458() { +; CHECK-LABEL: dotests_458 +; CHECK: movi d0, #0x00000000ff0000 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ret +entry: + %vclz_v.i = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> , i1 false) #6 + %vsra_n = lshr <8 x i8> %vclz_v.i, + %name_6 = or <8 x i8> %vsra_n, + %cmp.i603 = icmp slt <8 x i8> %name_6, + %vmovl.i4.i = sext <8 x i1> %cmp.i603 to <8 x i16> + ret <8 x i16> %vmovl.i4.i +} +declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) diff --git a/test/CodeGen/AArch64/fp16-v4-instructions.ll b/test/CodeGen/AArch64/fp16-v4-instructions.ll index 0dbda152fca9..f6e4bdf73459 100644 --- a/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ b/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -130,7 +130,6 @@ define <4 x i16> @bitcast_h_to_i(float, <4 x half> %a) { ret <4 x i16> %2 } - define <4 x half> @sitofp_i8(<4 x i8> %a) #0 { ; CHECK-LABEL: sitofp_i8: ; CHECK-NEXT: shl [[OP1:v[0-9]+\.4h]], v0.4h, #8 @@ -218,4 +217,54 @@ define <4 x half> @uitofp_i64(<4 x i64> %a) #0 { ret <4 x half> %1 } +define void @test_insert_at_zero(half %a, <4 x half>* %b) #0 { +; CHECK-LABEL: test_insert_at_zero: +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %1 = insertelement <4 x half> undef, half %a, i64 0 + store <4 x half> %1, <4 x half>* %b, align 4 + ret void +} + +define <4 x i8> @fptosi_i8(<4 x half> %a) #0 { +; CHECK-LABEL: fptosi_i8: +; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h +; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-NEXT: xtn v0.4h, [[REG2]] +; CHECK-NEXT: ret + %1 = fptosi<4 x half> %a to <4 x i8> + ret <4 x i8> %1 +} + +define <4 x i16> @fptosi_i16(<4 x half> %a) #0 { +; CHECK-LABEL: fptosi_i16: +; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h +; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-NEXT: xtn v0.4h, [[REG2]] +; CHECK-NEXT: ret + %1 = fptosi<4 x half> %a to <4 x i16> + ret <4 x i16> %1 +} + +define <4 x i8> @fptoui_i8(<4 x half> %a) #0 { +; CHECK-LABEL: fptoui_i8: +; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h +; NOTE: fcvtzs selected here because the xtn shaves the sign bit +; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-NEXT: xtn v0.4h, [[REG2]] +; CHECK-NEXT: ret + %1 = fptoui<4 x half> %a to <4 x i8> + ret <4 x i8> %1 +} + +define <4 x i16> @fptoui_i16(<4 x half> %a) #0 { +; CHECK-LABEL: fptoui_i16: +; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h +; CHECK-NEXT: fcvtzu [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-NEXT: xtn v0.4h, [[REG2]] +; CHECK-NEXT: ret + %1 = fptoui<4 x half> %a to <4 x i16> + ret <4 x i16> %1 +} + attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/fp16-v8-instructions.ll b/test/CodeGen/AArch64/fp16-v8-instructions.ll index 10a8c22d6f7e..137d1f358a30 100644 --- a/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -358,4 +358,67 @@ define <8 x half> @uitofp_i64(<8 x i64> %a) #0 { ret <8 x half> %1 } +define void @test_insert_at_zero(half %a, <8 x half>* %b) #0 { +; CHECK-LABEL: test_insert_at_zero: +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + %1 = insertelement <8 x half> undef, half %a, i64 0 + store <8 x half> %1, <8 x half>* %b, align 4 + ret void +} + +define <8 x i8> @fptosi_i8(<8 x half> %a) #0 { +; CHECK-LABEL: fptosi_i8: +; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h +; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h +; CHECK-DAG: fcvtzs [[LOF32:v[0-9]+\.4s]], [[LO]] +; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]] +; CHECK-DAG: fcvtzs [[HIF32:v[0-9]+\.4s]], [[HI]] +; CHECK-DAG: xtn2 [[I16]].8h, [[HIF32]] +; CHECK-NEXT: xtn v0.8b, [[I16]].8h +; CHECK-NEXT: ret + %1 = fptosi<8 x half> %a to <8 x i8> + ret <8 x i8> %1 +} + +define <8 x i16> @fptosi_i16(<8 x half> %a) #0 { +; CHECK-LABEL: fptosi_i16: +; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h +; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h +; CHECK-DAG: fcvtzs [[LOF32:v[0-9]+\.4s]], [[LO]] +; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]] +; CHECK-DAG: fcvtzs [[HIF32:v[0-9]+\.4s]], [[HI]] +; CHECK-NEXT: xtn2 [[I16]].8h, [[HIF32]] +; CHECK-NEXT: ret + %1 = fptosi<8 x half> %a to <8 x i16> + ret <8 x i16> %1 +} + +define <8 x i8> @fptoui_i8(<8 x half> %a) #0 { +; CHECK-LABEL: fptoui_i8: +; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h +; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h +; CHECK-DAG: fcvtzu [[LOF32:v[0-9]+\.4s]], [[LO]] +; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]] +; CHECK-DAG: fcvtzu [[HIF32:v[0-9]+\.4s]], [[HI]] +; CHECK-DAG: xtn2 [[I16]].8h, [[HIF32]] +; CHECK-NEXT: xtn v0.8b, [[I16]].8h +; CHECK-NEXT: ret + %1 = fptoui<8 x half> %a to <8 x i8> + ret <8 x i8> %1 +} + +define <8 x i16> @fptoui_i16(<8 x half> %a) #0 { +; CHECK-LABEL: fptoui_i16: +; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h +; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h +; CHECK-DAG: fcvtzu [[LOF32:v[0-9]+\.4s]], [[LO]] +; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]] +; CHECK-DAG: fcvtzu [[HIF32:v[0-9]+\.4s]], [[HI]] +; CHECK-NEXT: xtn2 [[I16]].8h, [[HIF32]] +; CHECK-NEXT: ret + %1 = fptoui<8 x half> %a to <8 x i16> + ret <8 x i16> %1 +} + attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/free-zext.ll b/test/CodeGen/AArch64/free-zext.ll index cff11f85bda4..ea4f1f4e10f3 100644 --- a/test/CodeGen/AArch64/free-zext.ll +++ b/test/CodeGen/AArch64/free-zext.ll @@ -1,7 +1,7 @@ ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s define i64 @test_free_zext(i8* %a, i16* %b) { -; CHECK-LABEL: test_free_zext +; CHECK-LABEL: test_free_zext: ; CHECK-DAG: ldrb w[[A:[0-9]+]], [x0] ; CHECK: ldrh w[[B:[0-9]+]], [x1] ; CHECK: add x0, x[[B]], x[[A]] @@ -12,3 +12,60 @@ define i64 @test_free_zext(i8* %a, i16* %b) { %add = add nsw i64 %conv1, %conv ret i64 %add } + +define void @test_free_zext2(i32* %ptr, i32* %dst1, i64* %dst2) { +; CHECK-LABEL: test_free_zext2: +; CHECK: ldrh w[[A:[0-9]+]], [x0] +; CHECK-NOT: and x +; CHECK: str w[[A]], [x1] +; CHECK: str x[[A]], [x2] + %load = load i32, i32* %ptr, align 8 + %load16 = and i32 %load, 65535 + %load64 = zext i32 %load16 to i64 + store i32 %load16, i32* %dst1, align 4 + store i64 %load64, i64* %dst2, align 8 + ret void +} + +; Test for CodeGenPrepare::optimizeLoadExt(): simple case: two loads +; feeding a phi that zext's each loaded value. +define i32 @test_free_zext3(i32* %ptr, i32* %ptr2, i32* %dst, i32 %c) { +; CHECK-LABEL: test_free_zext3: +bb1: +; CHECK: ldrh [[REG:w[0-9]+]] +; CHECK-NOT: and {{w[0-9]+}}, [[REG]], #0xffff + %tmp1 = load i32, i32* %ptr, align 4 + %cmp = icmp ne i32 %c, 0 + br i1 %cmp, label %bb2, label %bb3 +bb2: +; CHECK: ldrh [[REG2:w[0-9]+]] +; CHECK-NOT: and {{w[0-9]+}}, [[REG2]], #0xffff + %tmp2 = load i32, i32* %ptr2, align 4 + br label %bb3 +bb3: + %tmp3 = phi i32 [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] +; CHECK-NOT: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff + %tmpand = and i32 %tmp3, 65535 + ret i32 %tmpand +} + +; Test for CodeGenPrepare::optimizeLoadExt(): check case of zext-able +; load feeding a phi in the same block. +define void @test_free_zext4(i32* %ptr, i32* %ptr2, i32* %dst) { +; CHECK-LABEL: test_free_zext4: +; CHECK: ldrh [[REG:w[0-9]+]] +; TODO: fix isel to remove final and XCHECK-NOT: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff +; CHECK: ldrh [[REG:w[0-9]+]] +bb1: + %load1 = load i32, i32* %ptr, align 4 + br label %loop +loop: + %phi = phi i32 [ %load1, %bb1 ], [ %load2, %loop ] + %and = and i32 %phi, 65535 + store i32 %and, i32* %dst, align 4 + %load2 = load i32, i32* %ptr2, align 4 + %cmp = icmp ne i32 %and, 0 + br i1 %cmp, label %loop, label %end +end: + ret void +} diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll index 9100ae39282b..2ea13e388867 100644 --- a/test/CodeGen/AArch64/func-argpassing.ll +++ b/test/CodeGen/AArch64/func-argpassing.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -disable-post-ra | FileCheck --check-prefix=CHECK %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -disable-post-ra | FileCheck --check-prefix=CHECK-NOFP %s %myStruct = type { i64 , i8, i32 } diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll index 22a33157fd55..2f45666ba13a 100644 --- a/test/CodeGen/AArch64/func-calls.ll +++ b/test/CodeGen/AArch64/func-calls.ll @@ -89,11 +89,11 @@ define void @check_stack_args() { ; that varstruct is passed on the stack. Rather dependent on how a ; memcpy gets created, but the following works for now. -; CHECK-DAG: str {{q[0-9]+}}, [sp] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #-16] ; CHECK-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0 ; CHECK: mov v0.16b, v[[FINAL_DOUBLE]].16b -; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp] +; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp, #-16]! ; CHECK-NONEON-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0 ; CHECK-NONEON: fmov d0, d[[FINAL_DOUBLE]] diff --git a/test/CodeGen/AArch64/global-alignment.ll b/test/CodeGen/AArch64/global-alignment.ll index 657778e34187..5e820b8bb303 100644 --- a/test/CodeGen/AArch64/global-alignment.ll +++ b/test/CodeGen/AArch64/global-alignment.ll @@ -3,7 +3,7 @@ @var32 = global [3 x i32] zeroinitializer @var64 = global [3 x i64] zeroinitializer @var32_align64 = global [3 x i32] zeroinitializer, align 8 -@alias = alias [3 x i32]* @var32_align64 +@alias = alias [3 x i32], [3 x i32]* @var32_align64 define i64 @test_align32() { ; CHECK-LABEL: test_align32: diff --git a/test/CodeGen/AArch64/global-merge-1.ll b/test/CodeGen/AArch64/global-merge-1.ll index 14b04303ffb3..b93f41c07df9 100644 --- a/test/CodeGen/AArch64/global-merge-1.ll +++ b/test/CodeGen/AArch64/global-merge-1.ll @@ -12,16 +12,20 @@ define void @f1(i32 %a1, i32 %a2) { ;CHECK-APPLE-IOS-NOT: adrp -;CHECK-APPLE-IOS: adrp x8, __MergedGlobals@PAGE +;CHECK-APPLE-IOS: adrp x8, l__MergedGlobals@PAGE ;CHECK-APPLE-IOS-NOT: adrp -;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals@PAGEOFF +;CHECK-APPLE-IOS: add x8, x8, l__MergedGlobals@PAGEOFF store i32 %a1, i32* @m, align 4 store i32 %a2, i32* @n, align 4 ret void } -;CHECK: .type _MergedGlobals,@object // @_MergedGlobals -;CHECK: .local _MergedGlobals -;CHECK: .comm _MergedGlobals,8,8 +;CHECK: .type .L_MergedGlobals,@object // @_MergedGlobals +;CHECK: .local .L_MergedGlobals +;CHECK: .comm .L_MergedGlobals,8,8 +;CHECK: m = .L_MergedGlobals +;CHECK: n = .L_MergedGlobals+4 -;CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3 ; @_MergedGlobals +;CHECK-APPLE-IOS: .zerofill __DATA,__bss,l__MergedGlobals,8,3 ; @_MergedGlobals +;CHECK-APPLE-IOS-NOT: _m = l__MergedGlobals +;CHECK-APPLE-IOS-NOT: _n = l__MergedGlobals+4 diff --git a/test/CodeGen/AArch64/global-merge-2.ll b/test/CodeGen/AArch64/global-merge-2.ll index af684039bf10..53bed1d9bc09 100644 --- a/test/CodeGen/AArch64/global-merge-2.ll +++ b/test/CodeGen/AArch64/global-merge-2.ll @@ -9,8 +9,8 @@ define void @f1(i32 %a1, i32 %a2) { ;CHECK-APPLE-IOS-LABEL: _f1: ;CHECK-APPLE-IOS-NOT: adrp -;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE -;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF +;CHECK-APPLE-IOS: adrp x8, l__MergedGlobals@PAGE +;CHECK-APPLE-IOS: add x8, x8, l__MergedGlobals@PAGEOFF ;CHECK-APPLE-IOS-NOT: adrp store i32 %a1, i32* @x, align 4 store i32 %a2, i32* @y, align 4 @@ -19,34 +19,34 @@ define void @f1(i32 %a1, i32 %a2) { define void @g1(i32 %a1, i32 %a2) { ;CHECK-APPLE-IOS-LABEL: _g1: -;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE -;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF +;CHECK-APPLE-IOS: adrp x8, l__MergedGlobals@PAGE +;CHECK-APPLE-IOS: add x8, x8, l__MergedGlobals@PAGEOFF ;CHECK-APPLE-IOS-NOT: adrp store i32 %a1, i32* @y, align 4 store i32 %a2, i32* @z, align 4 ret void } -;CHECK: .type _MergedGlobals_x,@object // @_MergedGlobals_x -;CHECK: .globl _MergedGlobals_x -;CHECK: .align 3 -;CHECK: _MergedGlobals_x: -;CHECK: .size _MergedGlobals_x, 12 +;CHECK: .type .L_MergedGlobals,@object // @_MergedGlobals +;CHECK: .local .L_MergedGlobals +;CHECK: .comm .L_MergedGlobals,12,8 ;CHECK: .globl x -;CHECK: x = _MergedGlobals_x +;CHECK: x = .L_MergedGlobals +;CHECK: .size x, 4 ;CHECK: .globl y -;CHECK: y = _MergedGlobals_x+4 +;CHECK: y = .L_MergedGlobals+4 +;CHECK: .size y, 4 ;CHECK: .globl z -;CHECK: z = _MergedGlobals_x+8 +;CHECK: z = .L_MergedGlobals+8 +;CHECK: .size z, 4 -;CHECK-APPLE-IOS: .globl __MergedGlobals_x ; @_MergedGlobals_x -;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_x,12,3 +;CHECK-APPLE-IOS: .zerofill __DATA,__bss,l__MergedGlobals,12,3 ;CHECK-APPLE-IOS: .globl _x -;CHECK-APPLE-IOS: _x = __MergedGlobals_x +;CHECK-APPLE-IOS: = l__MergedGlobals ;CHECK-APPLE-IOS: .globl _y -;CHECK-APPLE-IOS: _y = __MergedGlobals_x+4 +;CHECK-APPLE-IOS: _y = l__MergedGlobals+4 ;CHECK-APPLE-IOS: .globl _z -;CHECK-APPLE-IOS: _z = __MergedGlobals_x+8 +;CHECK-APPLE-IOS: _z = l__MergedGlobals+8 ;CHECK-APPLE-IOS: .subsections_via_symbols diff --git a/test/CodeGen/AArch64/global-merge-3.ll b/test/CodeGen/AArch64/global-merge-3.ll index 925108308e56..6895380ca63e 100644 --- a/test/CodeGen/AArch64/global-merge-3.ll +++ b/test/CodeGen/AArch64/global-merge-3.ll @@ -1,17 +1,17 @@ -; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s -; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s -; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS +; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s +; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s +; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS @x = global [1000 x i32] zeroinitializer, align 1 @y = global [1000 x i32] zeroinitializer, align 1 @z = internal global i32 1, align 4 define void @f1(i32 %a1, i32 %a2, i32 %a3) { -;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE +;CHECK-APPLE-IOS: adrp x8, l__MergedGlobals@PAGE ;CHECK-APPLE-IOS-NOT: adrp -;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF -;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y@PAGE -;CHECK-APPLE-IOS: add x9, x9, __MergedGlobals_y@PAGEOFF +;CHECK-APPLE-IOS: add x8, x8, l__MergedGlobals@PAGEOFF +;CHECK-APPLE-IOS: adrp x9, l__MergedGlobals.1@PAGE +;CHECK-APPLE-IOS: add x9, x9, l__MergedGlobals.1@PAGEOFF %x3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @x, i32 0, i64 3 %y3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @y, i32 0, i64 3 store i32 %a1, i32* %x3, align 4 @@ -20,32 +20,32 @@ define void @f1(i32 %a1, i32 %a2, i32 %a3) { ret void } -;CHECK: .type _MergedGlobals_x,@object // @_MergedGlobals_x -;CHECK: .globl _MergedGlobals_x +;CHECK: .type .L_MergedGlobals,@object // @_MergedGlobals ;CHECK: .align 4 -;CHECK: _MergedGlobals_x: -;CHECK: .size _MergedGlobals_x, 4004 +;CHECK: .L_MergedGlobals: +;CHECK: .size .L_MergedGlobals, 4004 -;CHECK: .type _MergedGlobals_y,@object // @_MergedGlobals_y -;CHECK: .globl _MergedGlobals_y -;CHECK: _MergedGlobals_y: -;CHECK: .size _MergedGlobals_y, 4000 +;CHECK: .type .L_MergedGlobals.1,@object // @_MergedGlobals.1 +;CHECK: .local .L_MergedGlobals.1 +;CHECK: .comm .L_MergedGlobals.1,4000,16 -;CHECK-APPLE-IOS: .globl __MergedGlobals_x ; @_MergedGlobals_x ;CHECK-APPLE-IOS: .align 4 -;CHECK-APPLE-IOS: __MergedGlobals_x: +;CHECK-APPLE-IOS: l__MergedGlobals: ;CHECK-APPLE-IOS: .long 1 ;CHECK-APPLE-IOS: .space 4000 -;CHECK-APPLE-IOS: .globl __MergedGlobals_y ; @_MergedGlobals_y -;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_y,4000,4 +;CHECK-APPLE-IOS: .zerofill __DATA,__bss,l__MergedGlobals.1,4000,4 +;CHECK: z = .L_MergedGlobals ;CHECK: .globl x -;CHECK: x = _MergedGlobals_x+4 +;CHECK: x = .L_MergedGlobals+4 +;CHECK: .size x, 4000 ;CHECK: .globl y -;CHECK: y = _MergedGlobals_y +;CHECK: y = .L_MergedGlobals.1 +;CHECK: .size y, 4000 +;CHECK-APPLE-IOS-NOT: _z = l__MergedGlobals ;CHECK-APPLE-IOS:.globl _x -;CHECK-APPLE-IOS: _x = __MergedGlobals_x+4 +;CHECK-APPLE-IOS: _x = l__MergedGlobals+4 ;CHECK-APPLE-IOS:.globl _y -;CHECK-APPLE-IOS: _y = __MergedGlobals_y +;CHECK-APPLE-IOS: _y = l__MergedGlobals.1 diff --git a/test/CodeGen/AArch64/global-merge-4.ll b/test/CodeGen/AArch64/global-merge-4.ll index bc6b68a9c046..a5109f6e8ea5 100644 --- a/test/CodeGen/AArch64/global-merge-4.ll +++ b/test/CodeGen/AArch64/global-merge-4.ll @@ -64,9 +64,9 @@ define internal i32* @returnFoo() #1 { ret i32* getelementptr inbounds ([5 x i32], [5 x i32]* @foo, i64 0, i64 0) } -;CHECK: .type _MergedGlobals,@object // @_MergedGlobals -;CHECK: .local _MergedGlobals -;CHECK: .comm _MergedGlobals,60,16 +;CHECK: .type .L_MergedGlobals,@object // @_MergedGlobals +;CHECK: .local .L_MergedGlobals +;CHECK: .comm .L_MergedGlobals,60,16 attributes #0 = { nounwind ssp } attributes #1 = { nounwind readnone ssp } diff --git a/test/CodeGen/AArch64/global-merge-group-by-use.ll b/test/CodeGen/AArch64/global-merge-group-by-use.ll index ddc044ed9e08..8b3fc97c9e2e 100644 --- a/test/CodeGen/AArch64/global-merge-group-by-use.ll +++ b/test/CodeGen/AArch64/global-merge-group-by-use.ll @@ -12,7 +12,7 @@ ; CHECK-LABEL: f1: define void @f1(i32 %a1, i32 %a2) #0 { -; CHECK-NEXT: adrp x8, [[SET1:__MergedGlobals.[0-9]*]]@PAGE +; CHECK-NEXT: adrp x8, [[SET1:l__MergedGlobals.[0-9]*]]@PAGE ; CHECK-NEXT: add x8, x8, [[SET1]]@PAGEOFF ; CHECK-NEXT: stp w0, w1, [x8] ; CHECK-NEXT: ret @@ -27,7 +27,7 @@ define void @f1(i32 %a1, i32 %a2) #0 { ; CHECK-LABEL: f2: define void @f2(i32 %a1, i32 %a2, i32 %a3) #0 { -; CHECK-NEXT: adrp x8, [[SET2:__MergedGlobals.[0-9]*]]@PAGE +; CHECK-NEXT: adrp x8, [[SET2:l__MergedGlobals.[0-9]*]]@PAGE ; CHECK-NEXT: add x8, x8, [[SET2]]@PAGEOFF ; CHECK-NEXT: stp w0, w1, [x8] ; CHECK-NEXT: str w2, [x8, #8] @@ -48,7 +48,7 @@ define void @f2(i32 %a1, i32 %a2, i32 %a3) #0 { ; CHECK-LABEL: f3: define void @f3(i32 %a1, i32 %a2) #0 { ; CHECK-NEXT: adrp x8, _m3@PAGE -; CHECK-NEXT: adrp x9, [[SET3:__MergedGlobals[0-9]*]]@PAGE +; CHECK-NEXT: adrp x9, [[SET3:l__MergedGlobals[0-9]*]]@PAGE ; CHECK-NEXT: str w0, [x8, _m3@PAGEOFF] ; CHECK-NEXT: str w1, [x9, [[SET3]]@PAGEOFF] ; CHECK-NEXT: ret diff --git a/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll b/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll index e83cbab140a7..399438925771 100644 --- a/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll +++ b/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll @@ -11,7 +11,7 @@ ; CHECK-LABEL: f1: define void @f1(i32 %a1, i32 %a2) minsize nounwind { -; CHECK-NEXT: adrp x8, [[SET:__MergedGlobals]]@PAGE +; CHECK-NEXT: adrp x8, [[SET:l__MergedGlobals]]@PAGE ; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF ; CHECK-NEXT: stp w0, w1, [x8] ; CHECK-NEXT: ret diff --git a/test/CodeGen/AArch64/global-merge-ignore-single-use.ll b/test/CodeGen/AArch64/global-merge-ignore-single-use.ll index e6de4699132a..c3756a85feff 100644 --- a/test/CodeGen/AArch64/global-merge-ignore-single-use.ll +++ b/test/CodeGen/AArch64/global-merge-ignore-single-use.ll @@ -10,7 +10,7 @@ ; CHECK-LABEL: f1: define void @f1(i32 %a1, i32 %a2) #0 { -; CHECK-NEXT: adrp x8, [[SET:__MergedGlobals]]@PAGE +; CHECK-NEXT: adrp x8, [[SET:l__MergedGlobals]]@PAGE ; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF ; CHECK-NEXT: stp w0, w1, [x8] ; CHECK-NEXT: ret diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll index b2c11c7517c0..d2133213f186 100644 --- a/test/CodeGen/AArch64/ldst-opt.ll +++ b/test/CodeGen/AArch64/ldst-opt.ll @@ -3,11 +3,15 @@ ; This file contains tests for the AArch64 load/store optimizer. %padding = type { i8*, i8*, i8*, i8* } +%s.byte = type { i8, i8 } +%s.halfword = type { i16, i16 } %s.word = type { i32, i32 } %s.doubleword = type { i64, i32 } %s.quadword = type { fp128, i32 } %s.float = type { float, i32 } %s.double = type { double, i32 } +%struct.byte = type { %padding, %s.byte } +%struct.halfword = type { %padding, %s.halfword } %struct.word = type { %padding, %s.word } %struct.doubleword = type { %padding, %s.doubleword } %struct.quadword = type { %padding, %s.quadword } @@ -24,6 +28,62 @@ ; ; with X being either w1, x1, s0, d0 or q0. +declare void @bar_byte(%s.byte*, i8) + +define void @load-pre-indexed-byte(%struct.byte* %ptr) nounwind { +; CHECK-LABEL: load-pre-indexed-byte +; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}, #32]! +entry: + %a = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1, i32 0 + %add = load i8, i8* %a, align 4 + br label %bar +bar: + %c = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1 + tail call void @bar_byte(%s.byte* %c, i8 %add) + ret void +} + +define void @store-pre-indexed-byte(%struct.byte* %ptr, i8 %val) nounwind { +; CHECK-LABEL: store-pre-indexed-byte +; CHECK: strb w{{[0-9]+}}, [x{{[0-9]+}}, #32]! +entry: + %a = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1, i32 0 + store i8 %val, i8* %a, align 4 + br label %bar +bar: + %c = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1 + tail call void @bar_byte(%s.byte* %c, i8 %val) + ret void +} + +declare void @bar_halfword(%s.halfword*, i16) + +define void @load-pre-indexed-halfword(%struct.halfword* %ptr) nounwind { +; CHECK-LABEL: load-pre-indexed-halfword +; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}, #32]! +entry: + %a = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1, i32 0 + %add = load i16, i16* %a, align 4 + br label %bar +bar: + %c = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1 + tail call void @bar_halfword(%s.halfword* %c, i16 %add) + ret void +} + +define void @store-pre-indexed-halfword(%struct.halfword* %ptr, i16 %val) nounwind { +; CHECK-LABEL: store-pre-indexed-halfword +; CHECK: strh w{{[0-9]+}}, [x{{[0-9]+}}, #32]! +entry: + %a = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1, i32 0 + store i16 %val, i16* %a, align 4 + br label %bar +bar: + %c = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1 + tail call void @bar_halfword(%s.halfword* %c, i16 %val) + ret void +} + declare void @bar_word(%s.word*, i32) define void @load-pre-indexed-word(%struct.word* %ptr) nounwind { @@ -164,6 +224,48 @@ bar: ret void } +; Check the following transform: +; +; (ldp|stp) w1, w2 [x0, #32] +; ... +; add x0, x0, #32 +; -> +; (ldp|stp) w1, w2, [x0, #32]! +; + +define void @load-pair-pre-indexed-word(%struct.word* %ptr) nounwind { +; CHECK-LABEL: load-pair-pre-indexed-word +; CHECK: ldp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #32]! +; CHECK-NOT: add x0, x0, #32 +entry: + %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0 + %a1 = load i32, i32* %a, align 4 + %b = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 1 + %b1 = load i32, i32* %b, align 4 + %add = add i32 %a1, %b1 + br label %bar +bar: + %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1 + tail call void @bar_word(%s.word* %c, i32 %add) + ret void +} + +define void @store-pair-pre-indexed-word(%struct.word* %ptr, i32 %val) nounwind { +; CHECK-LABEL: store-pair-pre-indexed-word +; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #32]! +; CHECK-NOT: add x0, x0, #32 +entry: + %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0 + store i32 %val, i32* %a, align 4 + %b = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 1 + store i32 %val, i32* %b, align 4 + br label %bar +bar: + %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1 + tail call void @bar_word(%s.word* %c, i32 %val) + ret void +} + ; Check the following transform: ; ; add x8, x8, #16 @@ -174,11 +276,11 @@ bar: ; ; with X being either w0, x0, s0, d0 or q0. -%pre.struct.i32 = type { i32, i32, i32} -%pre.struct.i64 = type { i32, i64, i64} -%pre.struct.i128 = type { i32, <2 x i64>, <2 x i64>} -%pre.struct.float = type { i32, float, float} -%pre.struct.double = type { i32, double, double} +%pre.struct.i32 = type { i32, i32, i32, i32, i32} +%pre.struct.i64 = type { i32, i64, i64, i64, i64} +%pre.struct.i128 = type { i32, <2 x i64>, <2 x i64>, <2 x i64>} +%pre.struct.float = type { i32, float, float, float} +%pre.struct.double = type { i32, double, double, double} define i32 @load-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond, %pre.struct.i32* %load2) nounwind { @@ -270,6 +372,96 @@ return: ret double %ret } +define i32 @load-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond, + %pre.struct.i32* %load2) nounwind { +; CHECK-LABEL: load-pre-indexed-word3 +; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #12]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i32*, %pre.struct.i32** %this + %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 3 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 4 + br label %return +return: + %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ] + %ret = load i32, i32* %retptr + ret i32 %ret +} + +define i64 @load-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond, + %pre.struct.i64* %load2) nounwind { +; CHECK-LABEL: load-pre-indexed-doubleword3 +; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #16]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i64*, %pre.struct.i64** %this + %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 2 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 3 + br label %return +return: + %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ] + %ret = load i64, i64* %retptr + ret i64 %ret +} + +define <2 x i64> @load-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond, + %pre.struct.i128* %load2) nounwind { +; CHECK-LABEL: load-pre-indexed-quadword3 +; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i128*, %pre.struct.i128** %this + %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 2 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 3 + br label %return +return: + %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ] + %ret = load <2 x i64>, <2 x i64>* %retptr + ret <2 x i64> %ret +} + +define float @load-pre-indexed-float3(%pre.struct.float** %this, i1 %cond, + %pre.struct.float* %load2) nounwind { +; CHECK-LABEL: load-pre-indexed-float3 +; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #8]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.float*, %pre.struct.float** %this + %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 2 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 3 + br label %return +return: + %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ] + %ret = load float, float* %retptr + ret float %ret +} + +define double @load-pre-indexed-double3(%pre.struct.double** %this, i1 %cond, + %pre.struct.double* %load2) nounwind { +; CHECK-LABEL: load-pre-indexed-double3 +; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #16]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.double*, %pre.struct.double** %this + %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 2 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 3 + br label %return +return: + %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ] + %ret = load double, double* %retptr + ret double %ret +} + ; Check the following transform: ; ; add x8, x8, #16 @@ -375,6 +567,101 @@ return: ret void } +define void @store-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond, + %pre.struct.i32* %load2, + i32 %val) nounwind { +; CHECK-LABEL: store-pre-indexed-word3 +; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #12]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i32*, %pre.struct.i32** %this + %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 3 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 4 + br label %return +return: + %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ] + store i32 %val, i32* %retptr + ret void +} + +define void @store-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond, + %pre.struct.i64* %load2, + i64 %val) nounwind { +; CHECK-LABEL: store-pre-indexed-doubleword3 +; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #24]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i64*, %pre.struct.i64** %this + %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 3 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 4 + br label %return +return: + %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ] + store i64 %val, i64* %retptr + ret void +} + +define void @store-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond, + %pre.struct.i128* %load2, + <2 x i64> %val) nounwind { +; CHECK-LABEL: store-pre-indexed-quadword3 +; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.i128*, %pre.struct.i128** %this + %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 2 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 3 + br label %return +return: + %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ] + store <2 x i64> %val, <2 x i64>* %retptr + ret void +} + +define void @store-pre-indexed-float3(%pre.struct.float** %this, i1 %cond, + %pre.struct.float* %load2, + float %val) nounwind { +; CHECK-LABEL: store-pre-indexed-float3 +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #8]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.float*, %pre.struct.float** %this + %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 2 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 3 + br label %return +return: + %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ] + store float %val, float* %retptr + ret void +} + +define void @store-pre-indexed-double3(%pre.struct.double** %this, i1 %cond, + %pre.struct.double* %load2, + double %val) nounwind { +; CHECK-LABEL: store-pre-indexed-double3 +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #16]! + br i1 %cond, label %if.then, label %if.end +if.then: + %load1 = load %pre.struct.double*, %pre.struct.double** %this + %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 2 + br label %return +if.end: + %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 3 + br label %return +return: + %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ] + store double %val, double* %retptr + ret void +} + ; Check the following transform: ; ; ldr X, [x20] @@ -385,6 +672,54 @@ return: ; ; with X being either w0, x0, s0, d0 or q0. +define void @load-post-indexed-byte(i8* %array, i64 %count) nounwind { +; CHECK-LABEL: load-post-indexed-byte +; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}], #4 +entry: + %gep1 = getelementptr i8, i8* %array, i64 2 + br label %body + +body: + %iv2 = phi i8* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr i8, i8* %iv2, i64 -1 + %load = load i8, i8* %gep2 + call void @use-byte(i8 %load) + %load2 = load i8, i8* %iv2 + call void @use-byte(i8 %load2) + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr i8, i8* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + +define void @load-post-indexed-halfword(i16* %array, i64 %count) nounwind { +; CHECK-LABEL: load-post-indexed-halfword +; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}], #8 +entry: + %gep1 = getelementptr i16, i16* %array, i64 2 + br label %body + +body: + %iv2 = phi i16* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr i16, i16* %iv2, i64 -1 + %load = load i16, i16* %gep2 + call void @use-halfword(i16 %load) + %load2 = load i16, i16* %iv2 + call void @use-halfword(i16 %load2) + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr i16, i16* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + define void @load-post-indexed-word(i32* %array, i64 %count) nounwind { ; CHECK-LABEL: load-post-indexed-word ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #16 @@ -515,6 +850,52 @@ exit: ; ; with X being either w0, x0, s0, d0 or q0. +define void @store-post-indexed-byte(i8* %array, i64 %count, i8 %val) nounwind { +; CHECK-LABEL: store-post-indexed-byte +; CHECK: strb w{{[0-9]+}}, [x{{[0-9]+}}], #4 +entry: + %gep1 = getelementptr i8, i8* %array, i64 2 + br label %body + +body: + %iv2 = phi i8* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr i8, i8* %iv2, i64 -1 + %load = load i8, i8* %gep2 + call void @use-byte(i8 %load) + store i8 %val, i8* %iv2 + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr i8, i8* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + +define void @store-post-indexed-halfword(i16* %array, i64 %count, i16 %val) nounwind { +; CHECK-LABEL: store-post-indexed-halfword +; CHECK: strh w{{[0-9]+}}, [x{{[0-9]+}}], #8 +entry: + %gep1 = getelementptr i16, i16* %array, i64 2 + br label %body + +body: + %iv2 = phi i16* [ %gep3, %body ], [ %gep1, %entry ] + %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ] + %gep2 = getelementptr i16, i16* %iv2, i64 -1 + %load = load i16, i16* %gep2 + call void @use-halfword(i16 %load) + store i16 %val, i16* %iv2 + %iv.next = add i64 %iv, -4 + %gep3 = getelementptr i16, i16* %iv2, i64 4 + %cond = icmp eq i64 %iv.next, 0 + br i1 %cond, label %exit, label %body + +exit: + ret void +} + define void @store-post-indexed-word(i32* %array, i64 %count, i32 %val) nounwind { ; CHECK-LABEL: store-post-indexed-word ; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #16 @@ -630,12 +1011,98 @@ exit: ret void } +declare void @use-byte(i8) +declare void @use-halfword(i16) declare void @use-word(i32) declare void @use-doubleword(i64) declare void @use-quadword(<2 x i64>) declare void @use-float(float) declare void @use-double(double) +; Check the following transform: +; +; stp w0, [x20] +; ... +; add x20, x20, #32 +; -> +; stp w0, [x20], #32 + +define void @store-pair-post-indexed-word() nounwind { +; CHECK-LABEL: store-pair-post-indexed-word +; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [sp], #16 +; CHECK: ret + %src = alloca { i32, i32 }, align 8 + %dst = alloca { i32, i32 }, align 8 + + %src.realp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %src, i32 0, i32 0 + %src.real = load i32, i32* %src.realp + %src.imagp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %src, i32 0, i32 1 + %src.imag = load i32, i32* %src.imagp + + %dst.realp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %dst, i32 0, i32 0 + %dst.imagp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %dst, i32 0, i32 1 + store i32 %src.real, i32* %dst.realp + store i32 %src.imag, i32* %dst.imagp + ret void +} + +define void @store-pair-post-indexed-doubleword() nounwind { +; CHECK-LABEL: store-pair-post-indexed-doubleword +; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [sp], #32 +; CHECK: ret + %src = alloca { i64, i64 }, align 8 + %dst = alloca { i64, i64 }, align 8 + + %src.realp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %src, i32 0, i32 0 + %src.real = load i64, i64* %src.realp + %src.imagp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %src, i32 0, i32 1 + %src.imag = load i64, i64* %src.imagp + + %dst.realp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %dst, i32 0, i32 0 + %dst.imagp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %dst, i32 0, i32 1 + store i64 %src.real, i64* %dst.realp + store i64 %src.imag, i64* %dst.imagp + ret void +} + +define void @store-pair-post-indexed-float() nounwind { +; CHECK-LABEL: store-pair-post-indexed-float +; CHECK: stp s{{[0-9]+}}, s{{[0-9]+}}, [sp], #16 +; CHECK: ret + %src = alloca { float, float }, align 8 + %dst = alloca { float, float }, align 8 + + %src.realp = getelementptr inbounds { float, float }, { float, float }* %src, i32 0, i32 0 + %src.real = load float, float* %src.realp + %src.imagp = getelementptr inbounds { float, float }, { float, float }* %src, i32 0, i32 1 + %src.imag = load float, float* %src.imagp + + %dst.realp = getelementptr inbounds { float, float }, { float, float }* %dst, i32 0, i32 0 + %dst.imagp = getelementptr inbounds { float, float }, { float, float }* %dst, i32 0, i32 1 + store float %src.real, float* %dst.realp + store float %src.imag, float* %dst.imagp + ret void +} + +define void @store-pair-post-indexed-double() nounwind { +; CHECK-LABEL: store-pair-post-indexed-double +; CHECK: stp d{{[0-9]+}}, d{{[0-9]+}}, [sp], #32 +; CHECK: ret + %src = alloca { double, double }, align 8 + %dst = alloca { double, double }, align 8 + + %src.realp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 0 + %src.real = load double, double* %src.realp + %src.imagp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 1 + %src.imag = load double, double* %src.imagp + + %dst.realp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 0 + %dst.imagp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 1 + store double %src.real, double* %dst.realp + store double %src.imag, double* %dst.imagp + ret void +} + ; Check the following transform: ; ; (ldr|str) X, [x20] diff --git a/test/CodeGen/AArch64/merge-store.ll b/test/CodeGen/AArch64/merge-store.ll index 18dbad4ce25b..86f5edd5da1d 100644 --- a/test/CodeGen/AArch64/merge-store.ll +++ b/test/CodeGen/AArch64/merge-store.ll @@ -1,4 +1,5 @@ ; RUN: llc -march aarch64 %s -o - | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cyclone | FileCheck %s --check-prefix=CYCLONE @g0 = external global <3 x float>, align 16 @g1 = external global <3 x float>, align 4 @@ -18,3 +19,32 @@ define void @blam() { store float %tmp9, float* %tmp7 ret void; } + + +; PR21711 - Merge vector stores into wider vector stores. + +; On Cyclone, the stores should not get merged into a 16-byte store because +; unaligned 16-byte stores are slow. This test would infinite loop when +; the fastness of unaligned accesses was not specified correctly. + +define void @merge_vec_extract_stores(<4 x float> %v1, <2 x float>* %ptr) { + %idx0 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 3 + %idx1 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 4 + + %shuffle0 = shufflevector <4 x float> %v1, <4 x float> undef, <2 x i32> + %shuffle1 = shufflevector <4 x float> %v1, <4 x float> undef, <2 x i32> + + store <2 x float> %shuffle0, <2 x float>* %idx0, align 8 + store <2 x float> %shuffle1, <2 x float>* %idx1, align 8 + ret void + +; CHECK-LABEL: merge_vec_extract_stores +; CHECK: stur q0, [x0, #24] +; CHECK-NEXT: ret + +; CYCLONE-LABEL: merge_vec_extract_stores +; CYCLONE: ext v1.16b, v0.16b, v0.16b, #8 +; CYCLONE-NEXT: str d0, [x0, #24] +; CYCLONE-NEXT: str d1, [x0, #32] +; CYCLONE-NEXT: ret +} diff --git a/test/CodeGen/AArch64/misched-fusion.ll b/test/CodeGen/AArch64/misched-fusion.ll new file mode 100644 index 000000000000..d38869329034 --- /dev/null +++ b/test/CodeGen/AArch64/misched-fusion.ll @@ -0,0 +1,34 @@ +; RUN: llc -o - %s -mcpu=cyclone | FileCheck %s +target triple = "arm64-apple-ios" + +declare void @foobar(i32 %v0, i32 %v1) + +; Make sure sub is scheduled in front of cbnz +; CHECK-LABEL: test_sub_cbz: +; CHECK: add w[[ADDRES:[0-9]+]], w1, #7 +; CHECK: sub w[[SUBRES:[0-9]+]], w0, #13 +; CHECK-NEXT: cbnz w[[SUBRES]], [[SKIPBLOCK:LBB[0-9_]+]] +; CHECK: mov x0, x[[ADDRES]] +; CHECK: mov x1, x[[SUBRES]] +; CHECK: bl _foobar +; CHECK: [[SKIPBLOCK]]: +; CHECK: mov x0, x[[SUBRES]] +; CHECK: mov x1, x[[ADDRES]] +; CHECK: bl _foobar +define void @test_sub_cbz(i32 %a0, i32 %a1) { +entry: + ; except for the fusion opportunity the sub/add should be equal so the + ; scheduler would leave them in source order if it weren't for the scheduling + %v0 = sub i32 %a0, 13 + %cond = icmp eq i32 %v0, 0 + %v1 = add i32 %a1, 7 + br i1 %cond, label %if, label %exit + +if: + call void @foobar(i32 %v1, i32 %v0) + br label %exit + +exit: + call void @foobar(i32 %v0, i32 %v1) + ret void +} diff --git a/test/CodeGen/AArch64/mul-lohi.ll b/test/CodeGen/AArch64/mul-lohi.ll index 4515697b9991..e93521858a31 100644 --- a/test/CodeGen/AArch64/mul-lohi.ll +++ b/test/CodeGen/AArch64/mul-lohi.ll @@ -1,5 +1,6 @@ ; RUN: llc -mtriple=arm64-apple-ios7.0 -mcpu=cyclone %s -o - | FileCheck %s ; RUN: llc -mtriple=aarch64_be-linux-gnu -mcpu=cyclone %s -o - | FileCheck --check-prefix=CHECK-BE %s + define i128 @test_128bitmul(i128 %lhs, i128 %rhs) { ; CHECK-LABEL: test_128bitmul: ; CHECK-DAG: mul [[PART1:x[0-9]+]], x0, x3 @@ -16,3 +17,31 @@ define i128 @test_128bitmul(i128 %lhs, i128 %rhs) { %prod = mul i128 %lhs, %rhs ret i128 %prod } + +; The machine combiner should create madd instructions when +; optimizing for size because that's smaller than mul + add. + +define i128 @test_128bitmul_optsize(i128 %lhs, i128 %rhs) optsize { +; CHECK-LABEL: test_128bitmul_optsize: +; CHECK: umulh [[HI:x[0-9]+]], x0, x2 +; CHECK-NEXT: madd [[TEMP1:x[0-9]+]], x0, x3, [[HI]] +; CHECK-NEXT: madd x1, x1, x2, [[TEMP1]] +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: ret + + %prod = mul i128 %lhs, %rhs + ret i128 %prod +} + +define i128 @test_128bitmul_minsize(i128 %lhs, i128 %rhs) minsize { +; CHECK-LABEL: test_128bitmul_minsize: +; CHECK: umulh [[HI:x[0-9]+]], x0, x2 +; CHECK-NEXT: madd [[TEMP1:x[0-9]+]], x0, x3, [[HI]] +; CHECK-NEXT: madd x1, x1, x2, [[TEMP1]] +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: ret + + %prod = mul i128 %lhs, %rhs + ret i128 %prod +} + diff --git a/test/CodeGen/AArch64/nest-register.ll b/test/CodeGen/AArch64/nest-register.ll index 9c659fb74ec4..cc42913e10a6 100644 --- a/test/CodeGen/AArch64/nest-register.ll +++ b/test/CodeGen/AArch64/nest-register.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s +; RUN: llc -disable-post-ra -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; Tests that the 'nest' parameter attribute causes the relevant parameter to be ; passed in the right register. diff --git a/test/CodeGen/AArch64/nontemporal.ll b/test/CodeGen/AArch64/nontemporal.ll new file mode 100644 index 000000000000..db9779e03190 --- /dev/null +++ b/test/CodeGen/AArch64/nontemporal.ll @@ -0,0 +1,339 @@ +; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra | FileCheck %s + +define void @test_stnp_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 { +; CHECK-LABEL: test_stnp_v4i64: +; CHECK-NEXT: mov d[[HI1:[0-9]+]], v1[1] +; CHECK-NEXT: mov d[[HI0:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d1, d[[HI1]], [x0, #16] +; CHECK-NEXT: stnp d0, d[[HI0]], [x0] +; CHECK-NEXT: ret + store <4 x i64> %v, <4 x i64>* %p, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 { +; CHECK-LABEL: test_stnp_v4i32: +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x0] +; CHECK-NEXT: ret + store <4 x i32> %v, <4 x i32>* %p, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 { +; CHECK-LABEL: test_stnp_v8i16: +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x0] +; CHECK-NEXT: ret + store <8 x i16> %v, <8 x i16>* %p, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 { +; CHECK-LABEL: test_stnp_v16i8: +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x0] +; CHECK-NEXT: ret + store <16 x i8> %v, <16 x i8>* %p, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v2i32(<2 x i32>* %p, <2 x i32> %v) #0 { +; CHECK-LABEL: test_stnp_v2i32: +; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp s0, s[[HI]], [x0] +; CHECK-NEXT: ret + store <2 x i32> %v, <2 x i32>* %p, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v4i16(<4 x i16>* %p, <4 x i16> %v) #0 { +; CHECK-LABEL: test_stnp_v4i16: +; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp s0, s[[HI]], [x0] +; CHECK-NEXT: ret + store <4 x i16> %v, <4 x i16>* %p, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v8i8(<8 x i8>* %p, <8 x i8> %v) #0 { +; CHECK-LABEL: test_stnp_v8i8: +; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp s0, s[[HI]], [x0] +; CHECK-NEXT: ret + store <8 x i8> %v, <8 x i8>* %p, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v2f64(<2 x double>* %p, <2 x double> %v) #0 { +; CHECK-LABEL: test_stnp_v2f64: +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x0] +; CHECK-NEXT: ret + store <2 x double> %v, <2 x double>* %p, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v4f32(<4 x float>* %p, <4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32: +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x0] +; CHECK-NEXT: ret + store <4 x float> %v, <4 x float>* %p, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v2f32(<2 x float>* %p, <2 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v2f32: +; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp s0, s[[HI]], [x0] +; CHECK-NEXT: ret + store <2 x float> %v, <2 x float>* %p, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v1f64(<1 x double>* %p, <1 x double> %v) #0 { +; CHECK-LABEL: test_stnp_v1f64: +; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp s0, s[[HI]], [x0] +; CHECK-NEXT: ret + store <1 x double> %v, <1 x double>* %p, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v1i64(<1 x i64>* %p, <1 x i64> %v) #0 { +; CHECK-LABEL: test_stnp_v1i64: +; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp s0, s[[HI]], [x0] +; CHECK-NEXT: ret + store <1 x i64> %v, <1 x i64>* %p, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_i64(i64* %p, i64 %v) #0 { +; CHECK-LABEL: test_stnp_i64: +; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32 +; CHECK-NEXT: stnp w1, w[[HI]], [x0] +; CHECK-NEXT: ret + store i64 %v, i64* %p, align 1, !nontemporal !0 + ret void +} + + +define void @test_stnp_v2f64_offset(<2 x double>* %p, <2 x double> %v) #0 { +; CHECK-LABEL: test_stnp_v2f64_offset: +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x0, #16] +; CHECK-NEXT: ret + %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 1 + store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v2f64_offset_neg(<2 x double>* %p, <2 x double> %v) #0 { +; CHECK-LABEL: test_stnp_v2f64_offset_neg: +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x0, #-16] +; CHECK-NEXT: ret + %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 -1 + store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v2f32_offset(<2 x float>* %p, <2 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v2f32_offset: +; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp s0, s[[HI]], [x0, #8] +; CHECK-NEXT: ret + %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 1 + store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v2f32_offset_neg: +; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp s0, s[[HI]], [x0, #-8] +; CHECK-NEXT: ret + %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 -1 + store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 { +; CHECK-LABEL: test_stnp_i64_offset: +; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32 +; CHECK-NEXT: stnp w1, w[[HI]], [x0, #8] +; CHECK-NEXT: ret + %tmp0 = getelementptr i64, i64* %p, i32 1 + store i64 %v, i64* %tmp0, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_i64_offset_neg(i64* %p, i64 %v) #0 { +; CHECK-LABEL: test_stnp_i64_offset_neg: +; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32 +; CHECK-NEXT: stnp w1, w[[HI]], [x0, #-8] +; CHECK-NEXT: ret + %tmp0 = getelementptr i64, i64* %p, i32 -1 + store i64 %v, i64* %tmp0, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v4f32_invalid_offset_4(i8* %p, <4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_invalid_offset_4: +; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #4 +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 4 + %tmp1 = bitcast i8* %tmp0 to <4 x float>* + store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v4f32_invalid_offset_neg_4(i8* %p, <4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_4: +; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #4 +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 -4 + %tmp1 = bitcast i8* %tmp0 to <4 x float>* + store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v4f32_invalid_offset_512(i8* %p, <4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_invalid_offset_512: +; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #512 +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 512 + %tmp1 = bitcast i8* %tmp0 to <4 x float>* + store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v4f32_offset_504(i8* %p, <4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_offset_504: +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x0, #504] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 504 + %tmp1 = bitcast i8* %tmp0 to <4 x float>* + store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v4f32_invalid_offset_508(i8* %p, <4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_invalid_offset_508: +; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #508 +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 508 + %tmp1 = bitcast i8* %tmp0 to <4 x float>* + store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v4f32_invalid_offset_neg_520(i8* %p, <4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_520: +; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #520 +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 -520 + %tmp1 = bitcast i8* %tmp0 to <4 x float>* + store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v4f32_offset_neg_512(i8* %p, <4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_offset_neg_512: +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x0, #-512] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 -512 + %tmp1 = bitcast i8* %tmp0 to <4 x float>* + store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + + +define void @test_stnp_v2f32_invalid_offset_256(i8* %p, <2 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v2f32_invalid_offset_256: +; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #256 +; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp s0, s[[HI]], [x[[PTR]]] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 256 + %tmp1 = bitcast i8* %tmp0 to <2 x float>* + store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v2f32_offset_252(i8* %p, <2 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v2f32_offset_252: +; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp s0, s[[HI]], [x0, #252] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 252 + %tmp1 = bitcast i8* %tmp0 to <2 x float>* + store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v2f32_invalid_offset_neg_260(i8* %p, <2 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v2f32_invalid_offset_neg_260: +; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #260 +; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp s0, s[[HI]], [x[[PTR]]] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 -260 + %tmp1 = bitcast i8* %tmp0 to <2 x float>* + store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v2f32_offset_neg_256(i8* %p, <2 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v2f32_offset_neg_256: +; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp s0, s[[HI]], [x0, #-256] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 -256 + %tmp1 = bitcast i8* %tmp0 to <2 x float>* + store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +declare void @dummy(<4 x float>*) + +define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_offset_alloca: +; CHECK: stnp d0, d{{.*}}, [sp] +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: bl _dummy + %tmp0 = alloca <4 x float> + store <4 x float> %v, <4 x float>* %tmp0, align 1, !nontemporal !0 + call void @dummy(<4 x float>* %tmp0) + ret void +} + +define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2: +; CHECK: stnp d0, d{{.*}}, [sp, #16] +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: bl _dummy + %tmp0 = alloca <4 x float>, i32 2 + %tmp1 = getelementptr <4 x float>, <4 x float>* %tmp0, i32 1 + store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 + call void @dummy(<4 x float>* %tmp0) + ret void +} + +!0 = !{ i32 1 } + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll index 143558f7b2c7..c59a5b6743d6 100644 --- a/test/CodeGen/AArch64/pic-eh-stubs.ll +++ b/test/CodeGen/AArch64/pic-eh-stubs.ll @@ -15,7 +15,7 @@ ; CHECK-NEXT: .xword .L_ZTIi.DW.stub-[[TYPEINFO_LBL]] ; .. and which is properly defined (in a writable section for the dynamic loader) later. -; CHECK: .section .data.rel,"aw" +; CHECK: .data ; CHECK: .L_ZTIi.DW.stub: ; CHECK-NEXT: .xword _ZTIi diff --git a/test/CodeGen/AArch64/readcyclecounter.ll b/test/CodeGen/AArch64/readcyclecounter.ll new file mode 100644 index 000000000000..037f11809386 --- /dev/null +++ b/test/CodeGen/AArch64/readcyclecounter.ll @@ -0,0 +1,15 @@ +; RUN: llc -mtriple=aarch64-unknown-unknown -asm-verbose=false < %s |\ +; RUN: FileCheck %s --check-prefix=CHECK --check-prefix=PERFMON +; RUN: llc -mtriple=aarch64-unknown-unknown -mattr=-perfmon -asm-verbose=false < %s |\ +; RUN: FileCheck %s --check-prefix=CHECK --check-prefix=NOPERFMON + +define i64 @test_readcyclecounter() nounwind { + ; CHECK-LABEL: test_readcyclecounter: + ; PERFMON-NEXT: mrs x0, PMCCNTR_EL0 + ; NOPERFMON-NEXT: mov x0, xzr + ; CHECK-NEXT: ret + %tmp0 = call i64 @llvm.readcyclecounter() + ret i64 %tmp0 +} + +declare i64 @llvm.readcyclecounter() diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll index 0d301bbd502a..ba34873eaa5b 100644 --- a/test/CodeGen/AArch64/regress-tblgen-chains.ll +++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll @@ -27,8 +27,8 @@ define i64 @test_chains() { ; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]] ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #1 -; CHECK: sturb {{w[0-9]+}}, [x29, [[LOCADDR]]] -; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR]]] +; CHECK: sturb w[[STRVAL:[0-9]+]], [x29, [[LOCADDR]]] +; CHECK; and w0, w[[STRVAL]], #0xff %ret.1 = load i8, i8* %locvar %ret.2 = zext i8 %ret.1 to i64 diff --git a/test/CodeGen/AArch64/remat.ll b/test/CodeGen/AArch64/remat.ll index 8b3e6dd5ad92..a397c339a2d7 100644 --- a/test/CodeGen/AArch64/remat.ll +++ b/test/CodeGen/AArch64/remat.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a35 -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a57 -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a53 -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a72 -o - %s | FileCheck %s diff --git a/test/CodeGen/AArch64/rotate.ll b/test/CodeGen/AArch64/rotate.ll new file mode 100644 index 000000000000..5ac86d5f59c9 --- /dev/null +++ b/test/CodeGen/AArch64/rotate.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=aarch64--linux-gnueabihf | FileCheck %s + +;; This used to cause a backend crash about not being able to +;; select ROTL. Make sure if generates the basic ushr/shl. +define <2 x i64> @testcase(<2 x i64>* %in) { +; CHECK-LABEL: testcase +; CHECK: ushr {{v[0-9]+}}.2d +; CHECK: shl {{v[0-9]+}}.2d + %1 = load <2 x i64>, <2 x i64>* %in + %2 = lshr <2 x i64> %1, + %3 = shl <2 x i64> %1, + %4 = or <2 x i64> %2, %3 + ret <2 x i64> %4 +} diff --git a/test/CodeGen/AArch64/round-conv.ll b/test/CodeGen/AArch64/round-conv.ll new file mode 100644 index 000000000000..5ed7d9409e3d --- /dev/null +++ b/test/CodeGen/AArch64/round-conv.ll @@ -0,0 +1,330 @@ +; RUN: llc < %s -mtriple=arm64 | FileCheck %s + +; CHECK-LABEL: testmsws: +; CHECK: fcvtms w0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i32 @testmsws(float %a) { +entry: + %call = call float @floorf(float %a) nounwind readnone + %conv = fptosi float %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testmsxs: +; CHECK: fcvtms x0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i64 @testmsxs(float %a) { +entry: + %call = call float @floorf(float %a) nounwind readnone + %conv = fptosi float %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testmswd: +; CHECK: fcvtms w0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i32 @testmswd(double %a) { +entry: + %call = call double @floor(double %a) nounwind readnone + %conv = fptosi double %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testmsxd: +; CHECK: fcvtms x0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i64 @testmsxd(double %a) { +entry: + %call = call double @floor(double %a) nounwind readnone + %conv = fptosi double %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testmuws: +; CHECK: fcvtmu w0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i32 @testmuws(float %a) { +entry: + %call = call float @floorf(float %a) nounwind readnone + %conv = fptoui float %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testmuxs: +; CHECK: fcvtmu x0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i64 @testmuxs(float %a) { +entry: + %call = call float @floorf(float %a) nounwind readnone + %conv = fptoui float %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testmuwd: +; CHECK: fcvtmu w0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i32 @testmuwd(double %a) { +entry: + %call = call double @floor(double %a) nounwind readnone + %conv = fptoui double %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testmuxd: +; CHECK: fcvtmu x0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i64 @testmuxd(double %a) { +entry: + %call = call double @floor(double %a) nounwind readnone + %conv = fptoui double %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testpsws: +; CHECK: fcvtps w0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i32 @testpsws(float %a) { +entry: + %call = call float @ceilf(float %a) nounwind readnone + %conv = fptosi float %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testpsxs: +; CHECK: fcvtps x0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i64 @testpsxs(float %a) { +entry: + %call = call float @ceilf(float %a) nounwind readnone + %conv = fptosi float %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testpswd: +; CHECK: fcvtps w0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i32 @testpswd(double %a) { +entry: + %call = call double @ceil(double %a) nounwind readnone + %conv = fptosi double %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testpsxd: +; CHECK: fcvtps x0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i64 @testpsxd(double %a) { +entry: + %call = call double @ceil(double %a) nounwind readnone + %conv = fptosi double %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testpuws: +; CHECK: fcvtpu w0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i32 @testpuws(float %a) { +entry: + %call = call float @ceilf(float %a) nounwind readnone + %conv = fptoui float %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testpuxs: +; CHECK: fcvtpu x0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i64 @testpuxs(float %a) { +entry: + %call = call float @ceilf(float %a) nounwind readnone + %conv = fptoui float %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testpuwd: +; CHECK: fcvtpu w0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i32 @testpuwd(double %a) { +entry: + %call = call double @ceil(double %a) nounwind readnone + %conv = fptoui double %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testpuxd: +; CHECK: fcvtpu x0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i64 @testpuxd(double %a) { +entry: + %call = call double @ceil(double %a) nounwind readnone + %conv = fptoui double %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testzsws: +; CHECK: fcvtzs w0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i32 @testzsws(float %a) { +entry: + %call = call float @truncf(float %a) nounwind readnone + %conv = fptosi float %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testzsxs: +; CHECK: fcvtzs x0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i64 @testzsxs(float %a) { +entry: + %call = call float @truncf(float %a) nounwind readnone + %conv = fptosi float %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testzswd: +; CHECK: fcvtzs w0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i32 @testzswd(double %a) { +entry: + %call = call double @trunc(double %a) nounwind readnone + %conv = fptosi double %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testzsxd: +; CHECK: fcvtzs x0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i64 @testzsxd(double %a) { +entry: + %call = call double @trunc(double %a) nounwind readnone + %conv = fptosi double %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testzuws: +; CHECK: fcvtzu w0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i32 @testzuws(float %a) { +entry: + %call = call float @truncf(float %a) nounwind readnone + %conv = fptoui float %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testzuxs: +; CHECK: fcvtzu x0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i64 @testzuxs(float %a) { +entry: + %call = call float @truncf(float %a) nounwind readnone + %conv = fptoui float %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testzuwd: +; CHECK: fcvtzu w0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i32 @testzuwd(double %a) { +entry: + %call = call double @trunc(double %a) nounwind readnone + %conv = fptoui double %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testzuxd: +; CHECK: fcvtzu x0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i64 @testzuxd(double %a) { +entry: + %call = call double @trunc(double %a) nounwind readnone + %conv = fptoui double %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testasws: +; CHECK: fcvtas w0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i32 @testasws(float %a) { +entry: + %call = call float @roundf(float %a) nounwind readnone + %conv = fptosi float %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testasxs: +; CHECK: fcvtas x0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i64 @testasxs(float %a) { +entry: + %call = call float @roundf(float %a) nounwind readnone + %conv = fptosi float %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testaswd: +; CHECK: fcvtas w0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i32 @testaswd(double %a) { +entry: + %call = call double @round(double %a) nounwind readnone + %conv = fptosi double %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testasxd: +; CHECK: fcvtas x0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i64 @testasxd(double %a) { +entry: + %call = call double @round(double %a) nounwind readnone + %conv = fptosi double %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testauws: +; CHECK: fcvtau w0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i32 @testauws(float %a) { +entry: + %call = call float @roundf(float %a) nounwind readnone + %conv = fptoui float %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testauxs: +; CHECK: fcvtau x0, s0 +; CHECK-NOT: frintx {{s[0-9]+}}, s0 +define i64 @testauxs(float %a) { +entry: + %call = call float @roundf(float %a) nounwind readnone + %conv = fptoui float %call to i64 + ret i64 %conv +} + +; CHECK-LABEL: testauwd: +; CHECK: fcvtau w0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i32 @testauwd(double %a) { +entry: + %call = call double @round(double %a) nounwind readnone + %conv = fptoui double %call to i32 + ret i32 %conv +} + +; CHECK-LABEL: testauxd: +; CHECK: fcvtau x0, d0 +; CHECK-NOT: frintx {{d[0-9]+}}, d0 +define i64 @testauxd(double %a) { +entry: + %call = call double @round(double %a) nounwind readnone + %conv = fptoui double %call to i64 + ret i64 %conv +} + +declare float @floorf(float) nounwind readnone +declare double @floor(double) nounwind readnone +declare float @ceilf(float) nounwind readnone +declare double @ceil(double) nounwind readnone +declare float @truncf(float) nounwind readnone +declare double @trunc(double) nounwind readnone +declare float @roundf(float) nounwind readnone +declare double @round(double) nounwind readnone diff --git a/test/CodeGen/AArch64/shrink-wrap.ll b/test/CodeGen/AArch64/shrink-wrap.ll new file mode 100755 index 000000000000..ea101a8da15d --- /dev/null +++ b/test/CodeGen/AArch64/shrink-wrap.ll @@ -0,0 +1,184 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s + +; Regression test for a crash in the ShrinkWrap pass not handling targets +; requiring a register scavenger. + +%type1 = type { i32, i32, i32 } + +@g1 = external unnamed_addr global i32, align 4 +@g2 = external unnamed_addr global i1 +@g3 = external unnamed_addr global [144 x i32], align 4 +@g4 = external unnamed_addr constant [144 x i32], align 4 +@g5 = external unnamed_addr constant [144 x i32], align 4 +@g6 = external unnamed_addr constant [144 x i32], align 4 +@g7 = external unnamed_addr constant [144 x i32], align 4 +@g8 = external unnamed_addr constant [144 x i32], align 4 +@g9 = external unnamed_addr constant [144 x i32], align 4 +@g10 = external unnamed_addr constant [144 x i32], align 4 +@g11 = external unnamed_addr global i32, align 4 +@g12 = external unnamed_addr global [144 x [144 x i8]], align 1 +@g13 = external unnamed_addr global %type1*, align 8 +@g14 = external unnamed_addr global [144 x [144 x i8]], align 1 +@g15 = external unnamed_addr global [144 x [144 x i8]], align 1 +@g16 = external unnamed_addr global [144 x [144 x i8]], align 1 +@g17 = external unnamed_addr global [62 x i32], align 4 +@g18 = external unnamed_addr global i32, align 4 +@g19 = external unnamed_addr constant [144 x i32], align 4 +@g20 = external unnamed_addr global [144 x [144 x i8]], align 1 +@g21 = external unnamed_addr global i32, align 4 + +declare fastcc i32 @foo() + +declare fastcc i32 @bar() + +define internal fastcc i32 @func(i32 %alpha, i32 %beta) { +entry: + %v1 = alloca [2 x [11 x i32]], align 4 + %v2 = alloca [11 x i32], align 16 + %v3 = alloca [11 x i32], align 16 + switch i32 undef, label %if.end.9 [ + i32 4, label %if.then.6 + i32 3, label %if.then.2 + ] + +if.then.2: + %call3 = tail call fastcc i32 @bar() + br label %cleanup + +if.then.6: + %call7 = tail call fastcc i32 @foo() + unreachable + +if.end.9: + %tmp = load i32, i32* @g1, align 4 + %rem.i = urem i32 %tmp, 1000000 + %idxprom.1.i = zext i32 %rem.i to i64 + %tmp1 = load %type1*, %type1** @g13, align 8 + %v4 = getelementptr inbounds %type1, %type1* %tmp1, i64 %idxprom.1.i, i32 0 + %.b = load i1, i1* @g2, align 1 + %v5 = select i1 %.b, i32 2, i32 0 + %tmp2 = load i32, i32* @g18, align 4 + %tmp3 = load i32, i32* @g11, align 4 + %idxprom58 = sext i32 %tmp3 to i64 + %tmp4 = load i32, i32* @g21, align 4 + %idxprom69 = sext i32 %tmp4 to i64 + br label %for.body + +for.body: + %v6 = phi i32 [ 0, %if.end.9 ], [ %v7, %for.inc ] + %a.0983 = phi i32 [ 1, %if.end.9 ], [ %a.1, %for.inc ] + %arrayidx = getelementptr inbounds [62 x i32], [62 x i32]* @g17, i64 0, i64 undef + %tmp5 = load i32, i32* %arrayidx, align 4 + br i1 undef, label %for.inc, label %if.else.51 + +if.else.51: + %idxprom53 = sext i32 %tmp5 to i64 + %arrayidx54 = getelementptr inbounds [144 x i32], [144 x i32]* @g3, i64 0, i64 %idxprom53 + %tmp6 = load i32, i32* %arrayidx54, align 4 + switch i32 %tmp6, label %for.inc [ + i32 1, label %block.bb + i32 10, label %block.bb.159 + i32 7, label %block.bb.75 + i32 8, label %block.bb.87 + i32 9, label %block.bb.147 + i32 12, label %block.bb.111 + i32 3, label %block.bb.123 + i32 4, label %block.bb.135 + ] + +block.bb: + %arrayidx56 = getelementptr inbounds [144 x i32], [144 x i32]* @g6, i64 0, i64 %idxprom53 + %tmp7 = load i32, i32* %arrayidx56, align 4 + %shr = ashr i32 %tmp7, %v5 + %add57 = add nsw i32 %shr, 0 + %arrayidx61 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g14, i64 0, i64 %idxprom53, i64 %idxprom58 + %tmp8 = load i8, i8* %arrayidx61, align 1 + %conv = zext i8 %tmp8 to i32 + %add62 = add nsw i32 %conv, %add57 + br label %for.inc + +block.bb.75: + %arrayidx78 = getelementptr inbounds [144 x i32], [144 x i32]* @g10, i64 0, i64 %idxprom53 + %tmp9 = load i32, i32* %arrayidx78, align 4 + %shr79 = ashr i32 %tmp9, %v5 + %add80 = add nsw i32 %shr79, 0 + %add86 = add nsw i32 0, %add80 + br label %for.inc + +block.bb.87: + %arrayidx90 = getelementptr inbounds [144 x i32], [144 x i32]* @g9, i64 0, i64 %idxprom53 + %tmp10 = load i32, i32* %arrayidx90, align 4 + %shr91 = ashr i32 %tmp10, 0 + %sub92 = sub nsw i32 0, %shr91 + %arrayidx96 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g15, i64 0, i64 %idxprom53, i64 %idxprom69 + %tmp11 = load i8, i8* %arrayidx96, align 1 + %conv97 = zext i8 %tmp11 to i32 + %sub98 = sub nsw i32 %sub92, %conv97 + br label %for.inc + +block.bb.111: + %arrayidx114 = getelementptr inbounds [144 x i32], [144 x i32]* @g19, i64 0, i64 %idxprom53 + %tmp12 = load i32, i32* %arrayidx114, align 4 + %shr115 = ashr i32 %tmp12, 0 + %sub116 = sub nsw i32 0, %shr115 + %arrayidx120 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g12, i64 0, i64 %idxprom53, i64 %idxprom69 + %tmp13 = load i8, i8* %arrayidx120, align 1 + %conv121 = zext i8 %tmp13 to i32 + %sub122 = sub nsw i32 %sub116, %conv121 + br label %for.inc + +block.bb.123: + %arrayidx126 = getelementptr inbounds [144 x i32], [144 x i32]* @g5, i64 0, i64 %idxprom53 + %tmp14 = load i32, i32* %arrayidx126, align 4 + %shr127 = ashr i32 %tmp14, %v5 + %add128 = add nsw i32 %shr127, 0 + %add134 = add nsw i32 0, %add128 + br label %for.inc + +block.bb.135: + %arrayidx138 = getelementptr inbounds [144 x i32], [144 x i32]* @g4, i64 0, i64 %idxprom53 + %tmp15 = load i32, i32* %arrayidx138, align 4 + %shr139 = ashr i32 %tmp15, 0 + %sub140 = sub nsw i32 0, %shr139 + %arrayidx144 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g20, i64 0, i64 %idxprom53, i64 %idxprom69 + %tmp16 = load i8, i8* %arrayidx144, align 1 + %conv145 = zext i8 %tmp16 to i32 + %sub146 = sub nsw i32 %sub140, %conv145 + br label %for.inc + +block.bb.147: + %arrayidx150 = getelementptr inbounds [144 x i32], [144 x i32]* @g8, i64 0, i64 %idxprom53 + %tmp17 = load i32, i32* %arrayidx150, align 4 + %shr151 = ashr i32 %tmp17, %v5 + %add152 = add nsw i32 %shr151, 0 + %arrayidx156 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g16, i64 0, i64 %idxprom53, i64 %idxprom58 + %tmp18 = load i8, i8* %arrayidx156, align 1 + %conv157 = zext i8 %tmp18 to i32 + %add158 = add nsw i32 %conv157, %add152 + br label %for.inc + +block.bb.159: + %sub160 = add nsw i32 %v6, -450 + %arrayidx162 = getelementptr inbounds [144 x i32], [144 x i32]* @g7, i64 0, i64 %idxprom53 + %tmp19 = load i32, i32* %arrayidx162, align 4 + %shr163 = ashr i32 %tmp19, 0 + %sub164 = sub nsw i32 %sub160, %shr163 + %sub170 = sub nsw i32 %sub164, 0 + br label %for.inc + +for.inc: + %v7 = phi i32 [ %v6, %for.body ], [ %v6, %if.else.51 ], [ %sub170, %block.bb.159 ], [ %add158, %block.bb.147 ], [ %sub146, %block.bb.135 ], [ %add134, %block.bb.123 ], [ %sub122, %block.bb.111 ], [ %sub98, %block.bb.87 ], [ %add86, %block.bb.75 ], [ %add62, %block.bb ] + %a.1 = phi i32 [ %a.0983, %for.body ], [ undef, %if.else.51 ], [ undef, %block.bb.159 ], [ undef, %block.bb.147 ], [ undef, %block.bb.135 ], [ undef, %block.bb.123 ], [ undef, %block.bb.111 ], [ undef, %block.bb.87 ], [ undef, %block.bb.75 ], [ undef, %block.bb ] + %cmp48 = icmp sgt i32 %a.1, %tmp2 + br i1 %cmp48, label %for.end, label %for.body + +for.end: + store i32 %tmp, i32* %v4, align 4 + %hold_hash.i.7 = getelementptr inbounds %type1, %type1* %tmp1, i64 %idxprom.1.i, i32 1 + store i32 0, i32* %hold_hash.i.7, align 4 + br label %cleanup + +cleanup: + %retval.0 = phi i32 [ %call3, %if.then.2 ], [ undef, %for.end ] + ret i32 %retval.0 +} diff --git a/test/CodeGen/AArch64/stackmap-frame-setup.ll b/test/CodeGen/AArch64/stackmap-frame-setup.ll new file mode 100644 index 000000000000..4712012b0d25 --- /dev/null +++ b/test/CodeGen/AArch64/stackmap-frame-setup.ll @@ -0,0 +1,20 @@ +; RUN: llc -o /dev/null -verify-machineinstrs -mtriple=aarch64-apple-darwin -stop-after machine-sink %s | FileCheck %s --check-prefix=ISEL +; RUN: llc -o /dev/null -verify-machineinstrs -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -stop-after machine-sink %s | FileCheck %s --check-prefix=FAST-ISEL + +define void @caller_meta_leaf() { +entry: + %metadata = alloca i64, i32 3, align 8 + store i64 11, i64* %metadata + store i64 12, i64* %metadata + store i64 13, i64* %metadata +; ISEL: ADJCALLSTACKDOWN 0, implicit-def +; ISEL-NEXT: STACKMAP +; ISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def + call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata) +; FAST-ISEL: ADJCALLSTACKDOWN 0, implicit-def +; FAST-ISEL-NEXT: STACKMAP +; FAST-ISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def + ret void +} + +declare void @llvm.experimental.stackmap(i64, i32, ...) diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll index e5766154bb46..fa5d8b943b6b 100644 --- a/test/CodeGen/AArch64/tail-call.ll +++ b/test/CodeGen/AArch64/tail-call.ll @@ -59,8 +59,7 @@ define fastcc void @caller_to16_from8([8 x i32], i64 %a) { ; callee will not deallocate the space, even in fastcc. tail call fastcc void @callee_stack16([8 x i32] undef, i64 42, i64 2) -; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] -; CHECK-NEXT: add sp, sp, #16 +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]! ; CHECK-NEXT: b callee_stack16 ret void } @@ -89,8 +88,7 @@ define fastcc void @caller_to16_from16([8 x i32], i64 %a, i64 %b) { ret void ; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] -; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] -; CHECK-NEXT: add sp, sp, #16 +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]! ; CHECK-NEXT: b callee_stack16 } diff --git a/test/CodeGen/AArch64/tailcall-explicit-sret.ll b/test/CodeGen/AArch64/tailcall-explicit-sret.ll index 4d80f2ac5c12..bcc8af8d0690 100644 --- a/test/CodeGen/AArch64/tailcall-explicit-sret.ll +++ b/test/CodeGen/AArch64/tailcall-explicit-sret.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false | FileCheck %s +; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false -disable-post-ra | FileCheck %s ; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks. target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" diff --git a/test/CodeGen/AArch64/tbi.ll b/test/CodeGen/AArch64/tbi.ll new file mode 100644 index 000000000000..ab2d31b7cacc --- /dev/null +++ b/test/CodeGen/AArch64/tbi.ll @@ -0,0 +1,102 @@ +; RUN: llc -aarch64-use-tbi -mtriple=arm64-apple-ios8.0.0 < %s \ +; RUN: | FileCheck --check-prefix=TBI --check-prefix=BOTH %s +; RUN: llc -aarch64-use-tbi -mtriple=arm64-apple-ios7.1.0 < %s \ +; RUN: | FileCheck --check-prefix=NO_TBI --check-prefix=BOTH %s + +; BOTH-LABEL:ld_and32: +; TBI-NOT: and x +; NO_TBI: and x +define i32 @ld_and32(i64 %p) { + %and = and i64 %p, 72057594037927935 + %cast = inttoptr i64 %and to i32* + %load = load i32, i32* %cast + ret i32 %load +} + +; load (r & MASK) + 4 +; BOTH-LABEL:ld_and_plus_offset: +; TBI-NOT: and x +; NO_TBI: and x +define i32 @ld_and_plus_offset(i64 %p) { + %and = and i64 %p, 72057594037927935 + %cast = inttoptr i64 %and to i32* + %gep = getelementptr i32, i32* %cast, i64 4 + %load = load i32, i32* %gep + ret i32 %load +} + +; load (r & WIDER_MASK) +; BOTH-LABEL:ld_and32_wider: +; TBI-NOT: and x +; NO_TBI: and x +define i32 @ld_and32_wider(i64 %p) { + %and = and i64 %p, 1152921504606846975 + %cast = inttoptr i64 %and to i32* + %load = load i32, i32* %cast + ret i32 %load +} + +; BOTH-LABEL:ld_and64: +; TBI-NOT: and x +; NO_TBI: and x +define i64 @ld_and64(i64 %p) { + %and = and i64 %p, 72057594037927935 + %cast = inttoptr i64 %and to i64* + %load = load i64, i64* %cast + ret i64 %load +} + +; BOTH-LABEL:st_and32: +; TBI-NOT: and x +; NO_TBI: and x +define void @st_and32(i64 %p, i32 %v) { + %and = and i64 %p, 72057594037927935 + %cast = inttoptr i64 %and to i32* + store i32 %v, i32* %cast + ret void +} + +; load (x1 + x2) & MASK +; BOTH-LABEL:ld_ro: +; TBI-NOT: and x +; NO_TBI: and x +define i32 @ld_ro(i64 %a, i64 %b) { + %p = add i64 %a, %b + %and = and i64 %p, 72057594037927935 + %cast = inttoptr i64 %and to i32* + %load = load i32, i32* %cast + ret i32 %load +} + +; load (r1 & MASK) + r2 +; BOTH-LABEL:ld_ro2: +; TBI-NOT: and x +; NO_TBI: and x +define i32 @ld_ro2(i64 %a, i64 %b) { + %and = and i64 %a, 72057594037927935 + %p = add i64 %and, %b + %cast = inttoptr i64 %p to i32* + %load = load i32, i32* %cast + ret i32 %load +} + +; load (r1 & MASK) | r2 +; BOTH-LABEL:ld_indirect_and: +; TBI-NOT: and x +; NO_TBI: and x +define i32 @ld_indirect_and(i64 %r1, i64 %r2) { + %and = and i64 %r1, 72057594037927935 + %p = or i64 %and, %r2 + %cast = inttoptr i64 %p to i32* + %load = load i32, i32* %cast + ret i32 %load +} + +; BOTH-LABEL:ld_and32_narrower: +; BOTH: and x +define i32 @ld_and32_narrower(i64 %p) { + %and = and i64 %p, 36028797018963967 + %cast = inttoptr i64 %and to i32* + %load = load i32, i32* %cast + ret i32 %load +} diff --git a/test/CodeGen/AArch64/vector-fcopysign.ll b/test/CodeGen/AArch64/vector-fcopysign.ll new file mode 100644 index 000000000000..865a0a5b8580 --- /dev/null +++ b/test/CodeGen/AArch64/vector-fcopysign.ll @@ -0,0 +1,178 @@ +; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +;============ v1f32 + +; WidenVecRes same +define <1 x float> @test_copysign_v1f32_v1f32(<1 x float> %a, <1 x float> %b) #0 { +; CHECK-LABEL: test_copysign_v1f32_v1f32: +; CHECK-NEXT: movi.2s v2, #0x80, lsl #24 +; CHECK-NEXT: bit.8b v0, v1, v2 +; CHECK-NEXT: ret + %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b) + ret <1 x float> %r +} + +; WidenVecRes mismatched +define <1 x float> @test_copysign_v1f32_v1f64(<1 x float> %a, <1 x double> %b) #0 { +; CHECK-LABEL: test_copysign_v1f32_v1f64: +; CHECK-NEXT: fcvt s1, d1 +; CHECK-NEXT: movi.4s v2, #0x80, lsl #24 +; CHECK-NEXT: bit.16b v0, v1, v2 +; CHECK-NEXT: ret + %tmp0 = fptrunc <1 x double> %b to <1 x float> + %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %tmp0) + ret <1 x float> %r +} + +declare <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b) #0 + +;============ v1f64 + +; WidenVecOp #1 +define <1 x double> @test_copysign_v1f64_v1f32(<1 x double> %a, <1 x float> %b) #0 { +; CHECK-LABEL: test_copysign_v1f64_v1f32: +; CHECK-NEXT: fcvt d1, s1 +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: fneg.2d v2, v2 +; CHECK-NEXT: bit.16b v0, v1, v2 +; CHECK-NEXT: ret + %tmp0 = fpext <1 x float> %b to <1 x double> + %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %tmp0) + ret <1 x double> %r +} + +define <1 x double> @test_copysign_v1f64_v1f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK-LABEL: test_copysign_v1f64_v1f64: +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: fneg.2d v2, v2 +; CHECK-NEXT: bit.16b v0, v1, v2 +; CHECK-NEXT: ret + %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b) + ret <1 x double> %r +} + +declare <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b) #0 + +;============ v2f32 + +define <2 x float> @test_copysign_v2f32_v2f32(<2 x float> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_copysign_v2f32_v2f32: +; CHECK-NEXT: movi.2s v2, #0x80, lsl #24 +; CHECK-NEXT: bit.8b v0, v1, v2 +; CHECK-NEXT: ret + %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) + ret <2 x float> %r +} + +define <2 x float> @test_copysign_v2f32_v2f64(<2 x float> %a, <2 x double> %b) #0 { +; CHECK-LABEL: test_copysign_v2f32_v2f64: +; CHECK-NEXT: fcvtn v1.2s, v1.2d +; CHECK-NEXT: movi.2s v2, #0x80, lsl #24 +; CHECK-NEXT: bit.8b v0, v1, v2 +; CHECK-NEXT: ret + %tmp0 = fptrunc <2 x double> %b to <2 x float> + %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0) + ret <2 x float> %r +} + +declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0 + +;============ v4f32 + +define <4 x float> @test_copysign_v4f32_v4f32(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: test_copysign_v4f32_v4f32: +; CHECK-NEXT: movi.4s v2, #0x80, lsl #24 +; CHECK-NEXT: bit.16b v0, v1, v2 +; CHECK-NEXT: ret + %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %r +} + +; SplitVecOp #1 +define <4 x float> @test_copysign_v4f32_v4f64(<4 x float> %a, <4 x double> %b) #0 { +; CHECK-LABEL: test_copysign_v4f32_v4f64: +; CHECK-NEXT: mov s3, v0[1] +; CHECK-NEXT: mov d4, v1[1] +; CHECK-NEXT: movi.4s v5, #0x80, lsl #24 +; CHECK-NEXT: fcvt s1, d1 +; CHECK-NEXT: mov s6, v0[2] +; CHECK-NEXT: mov s7, v0[3] +; CHECK-NEXT: fcvt s16, d2 +; CHECK-NEXT: bit.16b v0, v1, v5 +; CHECK-NEXT: bit.16b v6, v16, v5 +; CHECK-NEXT: fcvt s1, d4 +; CHECK-NEXT: bit.16b v3, v1, v5 +; CHECK-NEXT: mov d1, v2[1] +; CHECK-NEXT: fcvt s1, d1 +; CHECK-NEXT: ins.s v0[1], v3[0] +; CHECK-NEXT: ins.s v0[2], v6[0] +; CHECK-NEXT: bit.16b v7, v1, v5 +; CHECK-NEXT: ins.s v0[3], v7[0] +; CHECK-NEXT: ret + %tmp0 = fptrunc <4 x double> %b to <4 x float> + %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0) + ret <4 x float> %r +} + +declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0 + +;============ v2f64 + +define <2 x double> @test_copysign_v2f64_v232(<2 x double> %a, <2 x float> %b) #0 { +; CHECK-LABEL: test_copysign_v2f64_v232: +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: fneg.2d v2, v2 +; CHECK-NEXT: fcvtl v1.2d, v1.2s +; CHECK-NEXT: bit.16b v0, v1, v2 +; CHECK-NEXT: ret + %tmp0 = fpext <2 x float> %b to <2 x double> + %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0) + ret <2 x double> %r +} + +define <2 x double> @test_copysign_v2f64_v2f64(<2 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: test_copysign_v2f64_v2f64: +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: fneg.2d v2, v2 +; CHECK-NEXT: bit.16b v0, v1, v2 +; CHECK-NEXT: ret + %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) + ret <2 x double> %r +} + +declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0 + +;============ v4f64 + +; SplitVecRes mismatched +define <4 x double> @test_copysign_v4f64_v4f32(<4 x double> %a, <4 x float> %b) #0 { +; CHECK-LABEL: test_copysign_v4f64_v4f32: +; CHECK-NEXT: movi.2d v3, #0000000000000000 +; CHECK-NEXT: fcvtl2 v4.2d, v2.4s +; CHECK-NEXT: fcvtl v2.2d, v2.2s +; CHECK-NEXT: fneg.2d v3, v3 +; CHECK-NEXT: bit.16b v1, v4, v3 +; CHECK-NEXT: bit.16b v0, v2, v3 +; CHECK-NEXT: ret + %tmp0 = fpext <4 x float> %b to <4 x double> + %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0) + ret <4 x double> %r +} + +; SplitVecRes same +define <4 x double> @test_copysign_v4f64_v4f64(<4 x double> %a, <4 x double> %b) #0 { +; CHECK-LABEL: test_copysign_v4f64_v4f64: +; CHECK-NEXT: movi.2d v4, #0000000000000000 +; CHECK-NEXT: fneg.2d v4, v4 +; CHECK-NEXT: bit.16b v0, v2, v4 +; CHECK-NEXT: bit.16b v1, v3, v4 +; CHECK-NEXT: ret + %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) + ret <4 x double> %r +} + +declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0 + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/xbfiz.ll b/test/CodeGen/AArch64/xbfiz.ll index f763400d7f6a..3211cc3f2ced 100644 --- a/test/CodeGen/AArch64/xbfiz.ll +++ b/test/CodeGen/AArch64/xbfiz.ll @@ -31,3 +31,33 @@ define i32 @ubfiz32(i32 %v) { %shr = lshr i32 %shl, 2 ret i32 %shr } + +define i64 @ubfiz64and(i64 %v) { +; CHECK-LABEL: ubfiz64and: +; CHECK: ubfiz x0, x0, #36, #11 + %shl = shl i64 %v, 36 + %and = and i64 %shl, 140668768878592 + ret i64 %and +} + +define i32 @ubfiz32and(i32 %v) { +; CHECK-LABEL: ubfiz32and: +; CHECK: ubfiz w0, w0, #6, #24 + %shl = shl i32 %v, 6 + %and = and i32 %shl, 1073741760 + ret i32 %and +} + +; Check that we don't generate a ubfiz if the lsl has more than one +; use, since we'd just be replacing an and with a ubfiz. +define i32 @noubfiz32(i32 %v) { +; CHECK-LABEL: noubfiz32: +; CHECK: lsl w[[REG1:[0-9]+]], w0, #6 +; CHECK: and w[[REG2:[0-9]+]], w[[REG1]], #0x3fffffc0 +; CHECK: add w0, w[[REG1]], w[[REG2]] +; CHECK: ret + %shl = shl i32 %v, 6 + %and = and i32 %shl, 1073741760 + %add = add i32 %shl, %and + ret i32 %add +} diff --git a/test/CodeGen/AMDGPU/add.ll b/test/CodeGen/AMDGPU/add.ll index 655e75dbc1a4..2ddfa9649ac9 100644 --- a/test/CodeGen/AMDGPU/add.ll +++ b/test/CodeGen/AMDGPU/add.ll @@ -5,7 +5,7 @@ ;FUNC-LABEL: {{^}}test1: ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 [[REG:v[0-9]+]], vcc, {{v[0-9]+, v[0-9]+}} ;SI-NOT: [[REG]] ;SI: buffer_store_dword [[REG]], define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { @@ -21,8 +21,8 @@ define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 @@ -39,10 +39,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 diff --git a/test/CodeGen/AMDGPU/address-space.ll b/test/CodeGen/AMDGPU/address-space.ll index 4be8c5847529..3aa2f653bf9c 100644 --- a/test/CodeGen/AMDGPU/address-space.ll +++ b/test/CodeGen/AMDGPU/address-space.ll @@ -5,15 +5,11 @@ %struct.foo = type { [3 x float], [3 x float] } -; FIXME: Extra V_MOV from SGPR to VGPR for second read. The address is -; already in a VGPR after the first read. - ; CHECK-LABEL: {{^}}do_as_ptr_calcs: ; CHECK: s_load_dword [[SREG1:s[0-9]+]], -; CHECK: v_mov_b32_e32 [[VREG2:v[0-9]+]], [[SREG1]] ; CHECK: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]] ; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG1]] offset:12 -; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG2]] offset:20 +; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG1]] offset:20 define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind { entry: %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0 diff --git a/test/CodeGen/AMDGPU/addrspacecast.ll b/test/CodeGen/AMDGPU/addrspacecast.ll new file mode 100644 index 000000000000..61bcd4b3c093 --- /dev/null +++ b/test/CodeGen/AMDGPU/addrspacecast.ll @@ -0,0 +1,66 @@ +; RUN: not llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s 2>&1 | FileCheck -check-prefix=ERROR %s + +; ERROR: unsupported addrspacecast not implemented + +; XUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s +; XUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s +; XUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s +; XUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s + +; Disable optimizations in case there are optimizations added that +; specialize away generic pointer accesses. + +; CHECK-LABEL: {{^}}branch_use_flat_i32: +; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} +; CHECK: s_endpgm +define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 { +entry: + %cmp = icmp ne i32 %c, 0 + br i1 %cmp, label %local, label %global + +local: + %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)* + br label %end + +global: + %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)* + br label %end + +end: + %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ] + store i32 %x, i32 addrspace(4)* %fptr, align 4 +; %val = load i32, i32 addrspace(4)* %fptr, align 4 +; store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; TODO: This should not be zero when registers are used for small +; scratch allocations again. + +; Check for prologue initializing special SGPRs pointing to scratch. +; CHECK-LABEL: {{^}}store_flat_scratch: +; CHECK: s_movk_i32 flat_scratch_lo, 0 +; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 0x28{{$}} +; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0x0{{$}} +; CHECK: flat_store_dword +; CHECK: s_barrier +; CHECK: flat_load_dword +define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 { + %alloca = alloca i32, i32 9, align 4 + %x = call i32 @llvm.r600.read.tidig.x() #3 + %pptr = getelementptr i32, i32* %alloca, i32 %x + %fptr = addrspacecast i32* %pptr to i32 addrspace(4)* + store i32 %x, i32 addrspace(4)* %fptr + ; Dummy call + call void @llvm.AMDGPU.barrier.local() #1 + %reload = load i32, i32 addrspace(4)* %fptr, align 4 + store i32 %reload, i32 addrspace(1)* %out, align 4 + ret void +} + +declare void @llvm.AMDGPU.barrier.local() #1 +declare i32 @llvm.r600.read.tidig.x() #3 + +attributes #0 = { nounwind } +attributes #1 = { nounwind convergent } +attributes #3 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/and.ll b/test/CodeGen/AMDGPU/and.ll index 5672d470bd7e..f83fb16101fb 100644 --- a/test/CodeGen/AMDGPU/and.ll +++ b/test/CodeGen/AMDGPU/and.ll @@ -2,6 +2,8 @@ ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +declare i32 @llvm.r600.read.tidig.x() #0 + ; FUNC-LABEL: {{^}}test2: ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} @@ -54,13 +56,80 @@ define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) { ret void } -; FUNC-LABEL: {{^}}v_and_i32: -; SI: v_and_b32 -define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) { - %a = load i32, i32 addrspace(1)* %aptr, align 4 - %b = load i32, i32 addrspace(1)* %bptr, align 4 +; FIXME: We should really duplicate the constant so that the SALU use +; can fold into the s_and_b32 and the VALU one is materialized +; directly without copying from the SGPR. + +; Second use is a VGPR use of the constant. +; FUNC-LABEL: {{^}}s_and_multi_use_constant_i32_0: +; SI: s_mov_b32 [[K:s[0-9]+]], 0x12d687 +; SI-DAG: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, [[K]] +; SI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]] +; SI: buffer_store_dword [[VK]] +define void @s_and_multi_use_constant_i32_0(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %and = and i32 %a, 1234567 + + ; Just to stop future replacement of copy to vgpr + store with VALU op. + %foo = add i32 %and, %b + store volatile i32 %foo, i32 addrspace(1)* %out + store volatile i32 1234567, i32 addrspace(1)* %out + ret void +} + +; Second use is another SGPR use of the constant. +; FUNC-LABEL: {{^}}s_and_multi_use_constant_i32_1: +; SI: s_mov_b32 [[K:s[0-9]+]], 0x12d687 +; SI: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, [[K]] +; SI: s_add_i32 +; SI: s_add_i32 [[ADD:s[0-9]+]], s{{[0-9]+}}, [[K]] +; SI: buffer_store_dword [[VK]] +define void @s_and_multi_use_constant_i32_1(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %and = and i32 %a, 1234567 + %foo = add i32 %and, 1234567 + %bar = add i32 %foo, %b + store volatile i32 %bar, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_and_i32_vgpr_vgpr: +; SI: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) { + %tid = call i32 @llvm.r600.read.tidig.x() #0 + %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid + %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid + %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + %a = load i32, i32 addrspace(1)* %gep.a + %b = load i32, i32 addrspace(1)* %gep.b %and = and i32 %a, %b - store i32 %and, i32 addrspace(1)* %out, align 4 + store i32 %and, i32 addrspace(1)* %gep.out + ret void +} + +; FUNC-LABEL: {{^}}v_and_i32_sgpr_vgpr: +; SI-DAG: s_load_dword [[SA:s[0-9]+]] +; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]] +; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]] +define void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i32 addrspace(1)* %bptr) { + %tid = call i32 @llvm.r600.read.tidig.x() #0 + %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid + %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + %b = load i32, i32 addrspace(1)* %gep.b + %and = and i32 %a, %b + store i32 %and, i32 addrspace(1)* %gep.out + ret void +} + +; FUNC-LABEL: {{^}}v_and_i32_vgpr_sgpr: +; SI-DAG: s_load_dword [[SA:s[0-9]+]] +; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]] +; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]] +define void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 %b) { + %tid = call i32 @llvm.r600.read.tidig.x() #0 + %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid + %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + %a = load i32, i32 addrspace(1)* %gep.a + %and = and i32 %a, %b + store i32 %and, i32 addrspace(1)* %gep.out ret void } @@ -148,9 +217,23 @@ endif: } ; FUNC-LABEL: {{^}}v_and_constant_i64: -; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; SI-DAG: s_mov_b32 [[KLO:s[0-9]+]], 0xab19b207 +; SI-DAG: s_movk_i32 [[KHI:s[0-9]+]], 0x11e{{$}} +; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KLO]], {{v[0-9]+}} +; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KHI]], {{v[0-9]+}} +; SI: buffer_store_dwordx2 define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { + %a = load i64, i64 addrspace(1)* %aptr, align 8 + %and = and i64 %a, 1231231234567 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FIXME: Should replace and 0 +; FUNC-LABEL: {{^}}v_and_i64_32_bit_constant: +; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; SI: v_and_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}} +define void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { %a = load i64, i64 addrspace(1)* %aptr, align 8 %and = and i64 %a, 1234567 store i64 %and, i64 addrspace(1)* %out, align 8 @@ -294,3 +377,5 @@ define void @s_and_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 a store i64 %and, i64 addrspace(1)* %out, align 8 ret void } + +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/test/CodeGen/AMDGPU/annotate-kernel-features.ll new file mode 100644 index 000000000000..b116c72322bb --- /dev/null +++ b/test/CodeGen/AMDGPU/annotate-kernel-features.ll @@ -0,0 +1,193 @@ +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA -check-prefix=ALL %s +; RUN: opt -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=NOHSA -check-prefix=ALL %s + +declare i32 @llvm.r600.read.tgid.x() #0 +declare i32 @llvm.r600.read.tgid.y() #0 +declare i32 @llvm.r600.read.tgid.z() #0 + +declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.r600.read.tidig.y() #0 +declare i32 @llvm.r600.read.tidig.z() #0 + +declare i32 @llvm.r600.read.local.size.x() #0 +declare i32 @llvm.r600.read.local.size.y() #0 +declare i32 @llvm.r600.read.local.size.z() #0 + +declare i32 @llvm.r600.read.global.size.x() #0 +declare i32 @llvm.r600.read.global.size.y() #0 +declare i32 @llvm.r600.read.global.size.z() #0 + + +; ALL: define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { +define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { + %val = call i32 @llvm.r600.read.tgid.x() + store i32 %val, i32 addrspace(1)* %ptr + ret void +} + +; ALL: define void @use_tgid_y(i32 addrspace(1)* %ptr) #2 { +define void @use_tgid_y(i32 addrspace(1)* %ptr) #1 { + %val = call i32 @llvm.r600.read.tgid.y() + store i32 %val, i32 addrspace(1)* %ptr + ret void +} + +; ALL: define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 { +define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 { + %val0 = call i32 @llvm.r600.read.tgid.y() + store volatile i32 %val0, i32 addrspace(1)* %ptr + %val1 = call i32 @llvm.r600.read.tgid.y() + store volatile i32 %val1, i32 addrspace(1)* %ptr + ret void +} + +; ALL: define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 { +define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 { + %val0 = call i32 @llvm.r600.read.tgid.x() + %val1 = call i32 @llvm.r600.read.tgid.y() + store volatile i32 %val0, i32 addrspace(1)* %ptr + store volatile i32 %val1, i32 addrspace(1)* %ptr + ret void +} + +; ALL: define void @use_tgid_z(i32 addrspace(1)* %ptr) #3 { +define void @use_tgid_z(i32 addrspace(1)* %ptr) #1 { + %val = call i32 @llvm.r600.read.tgid.z() + store i32 %val, i32 addrspace(1)* %ptr + ret void +} + +; ALL: define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 { +define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 { + %val0 = call i32 @llvm.r600.read.tgid.x() + %val1 = call i32 @llvm.r600.read.tgid.z() + store volatile i32 %val0, i32 addrspace(1)* %ptr + store volatile i32 %val1, i32 addrspace(1)* %ptr + ret void +} + +; ALL: define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 { +define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 { + %val0 = call i32 @llvm.r600.read.tgid.y() + %val1 = call i32 @llvm.r600.read.tgid.z() + store volatile i32 %val0, i32 addrspace(1)* %ptr + store volatile i32 %val1, i32 addrspace(1)* %ptr + ret void +} + +; ALL: define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 { +define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 { + %val0 = call i32 @llvm.r600.read.tgid.x() + %val1 = call i32 @llvm.r600.read.tgid.y() + %val2 = call i32 @llvm.r600.read.tgid.z() + store volatile i32 %val0, i32 addrspace(1)* %ptr + store volatile i32 %val1, i32 addrspace(1)* %ptr + store volatile i32 %val2, i32 addrspace(1)* %ptr + ret void +} + +; ALL: define void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { +define void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { + %val = call i32 @llvm.r600.read.tidig.x() + store i32 %val, i32 addrspace(1)* %ptr + ret void +} + +; ALL: define void @use_tidig_y(i32 addrspace(1)* %ptr) #5 { +define void @use_tidig_y(i32 addrspace(1)* %ptr) #1 { + %val = call i32 @llvm.r600.read.tidig.y() + store i32 %val, i32 addrspace(1)* %ptr + ret void +} + +; ALL: define void @use_tidig_z(i32 addrspace(1)* %ptr) #6 { +define void @use_tidig_z(i32 addrspace(1)* %ptr) #1 { + %val = call i32 @llvm.r600.read.tidig.z() + store i32 %val, i32 addrspace(1)* %ptr + ret void +} + +; ALL: define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { +define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { + %val0 = call i32 @llvm.r600.read.tidig.x() + %val1 = call i32 @llvm.r600.read.tgid.x() + store volatile i32 %val0, i32 addrspace(1)* %ptr + store volatile i32 %val1, i32 addrspace(1)* %ptr + ret void +} + +; ALL: define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 { +define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 { + %val0 = call i32 @llvm.r600.read.tidig.y() + %val1 = call i32 @llvm.r600.read.tgid.y() + store volatile i32 %val0, i32 addrspace(1)* %ptr + store volatile i32 %val1, i32 addrspace(1)* %ptr + ret void +} + +; ALL: define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 { +define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 { + %val0 = call i32 @llvm.r600.read.tidig.x() + %val1 = call i32 @llvm.r600.read.tidig.y() + %val2 = call i32 @llvm.r600.read.tidig.z() + store volatile i32 %val0, i32 addrspace(1)* %ptr + store volatile i32 %val1, i32 addrspace(1)* %ptr + store volatile i32 %val2, i32 addrspace(1)* %ptr + ret void +} + +; ALL: define void @use_all_workitems(i32 addrspace(1)* %ptr) #9 { +define void @use_all_workitems(i32 addrspace(1)* %ptr) #1 { + %val0 = call i32 @llvm.r600.read.tidig.x() + %val1 = call i32 @llvm.r600.read.tidig.y() + %val2 = call i32 @llvm.r600.read.tidig.z() + %val3 = call i32 @llvm.r600.read.tgid.x() + %val4 = call i32 @llvm.r600.read.tgid.y() + %val5 = call i32 @llvm.r600.read.tgid.z() + store volatile i32 %val0, i32 addrspace(1)* %ptr + store volatile i32 %val1, i32 addrspace(1)* %ptr + store volatile i32 %val2, i32 addrspace(1)* %ptr + store volatile i32 %val3, i32 addrspace(1)* %ptr + store volatile i32 %val4, i32 addrspace(1)* %ptr + store volatile i32 %val5, i32 addrspace(1)* %ptr + ret void +} + +; HSA: define void @use_get_local_size_x(i32 addrspace(1)* %ptr) #10 { +; NOHSA: define void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 { +define void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 { + %val = call i32 @llvm.r600.read.local.size.x() + store i32 %val, i32 addrspace(1)* %ptr + ret void +} + +; HSA: define void @use_get_local_size_y(i32 addrspace(1)* %ptr) #10 { +; NOHSA: define void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 { +define void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 { + %val = call i32 @llvm.r600.read.local.size.y() + store i32 %val, i32 addrspace(1)* %ptr + ret void +} + +; HSA: define void @use_get_local_size_z(i32 addrspace(1)* %ptr) #10 { +; NOHSA: define void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 { +define void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 { + %val = call i32 @llvm.r600.read.local.size.z() + store i32 %val, i32 addrspace(1)* %ptr + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + +; HSA: attributes #0 = { nounwind readnone } +; HSA: attributes #1 = { nounwind } +; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" } +; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" } +; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" } +; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" } +; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" } +; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" } +; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" } diff --git a/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll b/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll index 8c2a0795860d..f8a74222d566 100644 --- a/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll +++ b/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll @@ -2,7 +2,7 @@ ; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s declare i32 @llvm.SI.tid() nounwind readnone -declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate +declare void @llvm.AMDGPU.barrier.local() nounwind convergent ; The required pointer calculations for the alloca'd actually requires ; an add and won't be folded into the addressing, which fails with a @@ -14,7 +14,7 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate ; FIXME: We end up with zero argument for ADD, because ; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index ; with the appropriate offset. We should fold this into the store. -; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}} +; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 0, v{{[0-9]+}} ; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}] ; ; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this @@ -22,7 +22,7 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate ; to interpret: ; getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b -; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], 16 +; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 16 ; SI-PROMOTE: ds_write_b32 [[PTRREG]] define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) { %alloca = alloca [4 x i32], i32 4, align 16 @@ -35,7 +35,7 @@ define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 add %alloca_ptr = getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b store i32 %result, i32* %alloca_ptr, align 4 ; Dummy call - call void @llvm.AMDGPU.barrier.local() nounwind noduplicate + call void @llvm.AMDGPU.barrier.local() nounwind convergent %reload = load i32, i32* %alloca_ptr, align 4 %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid store i32 %reload, i32 addrspace(1)* %out_ptr, align 4 diff --git a/test/CodeGen/AMDGPU/bitreverse.ll b/test/CodeGen/AMDGPU/bitreverse.ll new file mode 100644 index 000000000000..0ef7d5184c1f --- /dev/null +++ b/test/CodeGen/AMDGPU/bitreverse.ll @@ -0,0 +1,115 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i16 @llvm.bitreverse.i16(i16) #1 +declare i32 @llvm.bitreverse.i32(i32) #1 +declare i64 @llvm.bitreverse.i64(i64) #1 + +declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) #1 +declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) #1 + +declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) #1 +declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>) #1 + +declare i32 @llvm.AMDGPU.brev(i32) #1 + +; FUNC-LABEL: {{^}}s_brev_i16: +; SI: s_brev_b32 +define void @s_brev_i16(i16 addrspace(1)* noalias %out, i16 %val) #0 { + %brev = call i16 @llvm.bitreverse.i16(i16 %val) #1 + store i16 %brev, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_brev_i16: +; SI: v_bfrev_b32_e32 +define void @v_brev_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) #0 { + %val = load i16, i16 addrspace(1)* %valptr + %brev = call i16 @llvm.bitreverse.i16(i16 %val) #1 + store i16 %brev, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_brev_i32: +; SI: s_load_dword [[VAL:s[0-9]+]], +; SI: s_brev_b32 [[SRESULT:s[0-9]+]], [[VAL]] +; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; SI: buffer_store_dword [[VRESULT]], +; SI: s_endpgm +define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) #0 { + %brev = call i32 @llvm.bitreverse.i32(i32 %val) #1 + store i32 %brev, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_brev_i32: +; SI: buffer_load_dword [[VAL:v[0-9]+]], +; SI: v_bfrev_b32_e32 [[RESULT:v[0-9]+]], [[VAL]] +; SI: buffer_store_dword [[RESULT]], +; SI: s_endpgm +define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #0 { + %val = load i32, i32 addrspace(1)* %valptr + %brev = call i32 @llvm.bitreverse.i32(i32 %val) #1 + store i32 %brev, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_brev_v2i32: +; SI: s_brev_b32 +; SI: s_brev_b32 +define void @s_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> %val) #0 { + %brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1 + store <2 x i32> %brev, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_brev_v2i32: +; SI: v_bfrev_b32_e32 +; SI: v_bfrev_b32_e32 +define void @v_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) #0 { + %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr + %brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1 + store <2 x i32> %brev, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_brev_i64: +define void @s_brev_i64(i64 addrspace(1)* noalias %out, i64 %val) #0 { + %brev = call i64 @llvm.bitreverse.i64(i64 %val) #1 + store i64 %brev, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_brev_i64: +define void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #0 { + %val = load i64, i64 addrspace(1)* %valptr + %brev = call i64 @llvm.bitreverse.i64(i64 %val) #1 + store i64 %brev, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_brev_v2i64: +define void @s_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %val) #0 { + %brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1 + store <2 x i64> %brev, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_brev_v2i64: +define void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %valptr) #0 { + %val = load <2 x i64>, <2 x i64> addrspace(1)* %valptr + %brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1 + store <2 x i64> %brev, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}legacy_s_brev_i32: +; SI: s_brev_b32 +define void @legacy_s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { + %brev = call i32 @llvm.AMDGPU.brev(i32 %val) #1 + store i32 %brev, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/calling-conventions.ll b/test/CodeGen/AMDGPU/calling-conventions.ll new file mode 100644 index 000000000000..57adc8be6a99 --- /dev/null +++ b/test/CodeGen/AMDGPU/calling-conventions.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s + +; Make sure we don't crash or assert on spir_kernel calling convention. + +; SI-LABEL: {{^}}kernel: +; SI: s_endpgm +define spir_kernel void @kernel(i32 addrspace(1)* %out) { +entry: + store i32 0, i32 addrspace(1)* %out + ret void +} + +; FIXME: This is treated like a kernel +; SI-LABEL: {{^}}func: +; SI: s_endpgm +define spir_func void @func(i32 addrspace(1)* %out) { +entry: + store i32 0, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll new file mode 100644 index 000000000000..1c5bed3b905f --- /dev/null +++ b/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll @@ -0,0 +1,98 @@ +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +; OPT-LABEL: @test_no_sink_flat_small_offset_i32( +; OPT: getelementptr i32, i32 addrspace(4)* %in +; OPT: br i1 +; OPT-NOT: ptrtoint + +; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32: +; GCN: flat_load_dword +; GCN: {{^}}BB0_2: +define void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7 + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(4)* %in.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(4)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32( +; OPT: getelementptr i32, i32 addrspace(4)* %out, +; OPT-CI-NOT: getelementptr +; OPT: br i1 + +; OPT-CI: ptrtoint +; OPT-CI: add +; OPT-CI: inttoptr +; OPT: br label + +; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_global_i32: +; CI: buffer_load_dword {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28 +define void @test_sink_noop_addrspacecast_flat_to_global_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7 + %cast = addrspacecast i32 addrspace(4)* %in.gep to i32 addrspace(1)* + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(1)* %cast + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(4)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32( +; OPT: getelementptr i32, i32 addrspace(4)* %out, +; OPT-CI-NOT: getelementptr +; OPT: br i1 + +; OPT-CI: ptrtoint +; OPT-CI: add +; OPT-CI: inttoptr +; OPT: br label + +; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_constant_i32: +; CI: s_load_dword {{s[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd +define void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7 + %cast = addrspacecast i32 addrspace(4)* %in.gep to i32 addrspace(2)* + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(2)* %cast + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(4)* %out.gep + br label %done + +done: + ret void +} diff --git a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll index a68d110fdc96..698494265a7d 100644 --- a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -1,5 +1,7 @@ +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s @@ -115,35 +117,6 @@ done: ret void } -; OPT-LABEL: @test_no_sink_flat_small_offset_i32( -; OPT: getelementptr i32, i32 addrspace(4)* %in -; OPT: br i1 -; OPT-NOT: ptrtoint - -; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32: -; GCN: flat_load_dword -; GCN: {{^}}BB4_2: - -define void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) { -entry: - %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7 - %tmp0 = icmp eq i32 %cond, 0 - br i1 %tmp0, label %endif, label %if - -if: - %tmp1 = load i32, i32 addrspace(4)* %in.gep - br label %endif - -endif: - %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(4)* %out.gep - br label %done - -done: - ret void -} - ; OPT-LABEL: @test_sink_scratch_small_offset_i32( ; OPT-NOT: getelementptr [512 x i32] ; OPT: br i1 @@ -153,7 +126,7 @@ done: ; GCN: s_and_saveexec_b64 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}} ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}} -; GCN: {{^}}BB5_2: +; GCN: {{^}}BB4_2: define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) { entry: %alloca = alloca [512 x i32], align 4 @@ -189,7 +162,7 @@ done: ; GCN: s_and_saveexec_b64 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} -; GCN: {{^}}BB6_2: +; GCN: {{^}}BB5_2: define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) { entry: %alloca = alloca [512 x i32], align 4 @@ -222,7 +195,7 @@ done: ; GCN: s_and_saveexec_b64 ; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] -; GCN: {{^}}BB7_2: +; GCN: {{^}}BB6_2: define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) { entry: %offset.ext = zext i32 %offset to i64 @@ -246,3 +219,220 @@ done: attributes #0 = { nounwind readnone } attributes #1 = { nounwind } + + + +; OPT-LABEL: @test_sink_constant_small_offset_i32 +; OPT-NOT: getelementptr i32, i32 addrspace(2)* +; OPT: br i1 + +; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32: +; GCN: s_and_saveexec_b64 +; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}} +; GCN: s_or_b64 exec, exec +define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7 + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(2)* %in.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(1)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32 +; OPT-NOT: getelementptr i32, i32 addrspace(2)* +; OPT: br i1 + +; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32: +; GCN: s_and_saveexec_b64 +; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}} +; GCN: s_or_b64 exec, exec +define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255 + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(2)* %in.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(1)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32 +; OPT-SI: getelementptr i32, i32 addrspace(2)* +; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* +; OPT-VI-NOT: getelementptr i32, i32 addrspace(2)* +; OPT: br i1 + +; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32: +; GCN: s_and_saveexec_b64 +; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400 + +; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} +; GCN: s_or_b64 exec, exec +define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256 + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(2)* %in.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(1)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32 +; OPT-SI: getelementptr i32, i32 addrspace(2)* +; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* +; OPT: br i1 + +; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32: +; GCN: s_and_saveexec_b64 +; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}} +; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}} +; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} +; GCN: s_or_b64 exec, exec +define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295 + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(2)* %in.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(1)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32 +; OPT: getelementptr i32, i32 addrspace(2)* +; OPT: br i1 + +; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32: +; GCN: s_and_saveexec_b64 +; GCN: s_add_u32 +; GCN: s_addc_u32 +; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} +; GCN: s_or_b64 exec, exec +define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181 + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(2)* %in.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(1)* %out.gep + br label %done + +done: + ret void +} + +; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32: +; GCN: s_and_saveexec_b64 +; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}} +; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} + +; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}} +; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}} + +; GCN: s_or_b64 exec, exec +define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143 + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(2)* %in.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(1)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32 +; OPT-SI: getelementptr i32, i32 addrspace(2)* +; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* +; OPT-VI: getelementptr i32, i32 addrspace(2)* +; OPT: br i1 + +; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32: +; GCN: s_and_saveexec_b64 +; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}} +; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} + +; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}} + +; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}} +; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} + +; GCN: s_or_b64 exec, exec +define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144 + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(2)* %in.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(1)* %out.gep + br label %done + +done: + ret void +} diff --git a/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll b/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll new file mode 100644 index 000000000000..1a37e3c75fa3 --- /dev/null +++ b/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck -check-prefix=HSA-DEFAULT %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=HSA-NODEFAULT %s +; RUN: llc < %s -mtriple=amdgcn -mcpu=kaveri | FileCheck -check-prefix=NOHSA-DEFAULT %s +; RUN: llc < %s -mtriple=amdgcn -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=NOHSA-NODEFAULT %s + + +; HSA-DEFAULT: flat_store_dword +; HSA-NODEFAULT: buffer_store_dword +; NOHSA-DEFAULT: buffer_store_dword +; NOHSA-NODEFAULT: flat_store_dword +define void @test(i32 addrspace(1)* %out) { +entry: + store i32 0, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/AMDGPU/ctpop64.ll b/test/CodeGen/AMDGPU/ctpop64.ll index e1a0ee3ea217..ec2971e98032 100644 --- a/test/CodeGen/AMDGPU/ctpop64.ll +++ b/test/CodeGen/AMDGPU/ctpop64.ll @@ -36,6 +36,24 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali ret void } +; FIXME: or 0 should be replaxed with copy +; FUNC-LABEL: {{^}}v_ctpop_i64_user: +; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, +; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 +; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] +; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] +; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]] +; GCN-DAG: v_or_b32_e64 v[[RESULT_HI:[0-9]+]], 0, s{{[0-9]+}} +; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} +; GCN: s_endpgm +define void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind { + %val = load i64, i64 addrspace(1)* %in, align 8 + %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone + %or = or i64 %ctpop, %s.val + store i64 %or, i64 addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}s_ctpop_v2i64: ; GCN: s_bcnt1_i32_b64 ; GCN: s_bcnt1_i32_b64 @@ -99,8 +117,8 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs ; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd ; VI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x34 ; GCN: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}} -; GCN: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]] -; GCN: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]] +; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]] +; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]] ; GCN: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}} ; GCN: s_endpgm define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) { diff --git a/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index 3399d9da29e3..834922c62cbd 100644 --- a/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -137,14 +137,8 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> ; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[HILOAD]] ; SI-NOT: bfe ; SI-NOT: lshr -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind { %load = load <8 x i8>, <8 x i8> addrspace(1)* %in, align 8 %cvt = uitofp <8 x i8> %load to <8 x float> @@ -154,7 +148,7 @@ define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> ; SI-LABEL: {{^}}i8_zext_inreg_i32_to_f32: ; SI: buffer_load_dword [[LOADREG:v[0-9]+]], -; SI: v_add_i32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]] +; SI: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]] ; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]] ; SI: buffer_store_dword [[CONV]], define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { diff --git a/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll b/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll new file mode 100644 index 000000000000..171883e4c74b --- /dev/null +++ b/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll @@ -0,0 +1,52 @@ +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s + +; The memory operand was dropped from the buffer_load_dword_offset +; when replaced with the addr64 during operand legalization, resulting +; in the global loads not being scheduled together. + +; GCN-LABEL: {{^}}reschedule_global_load_lds_store: +; GCN: buffer_load_dword +; GCN: buffer_load_dword +; GCN: ds_write_b32 +; GCN: ds_write_b32 +; GCN: s_endpgm +define void @reschedule_global_load_lds_store(i32 addrspace(1)* noalias %gptr0, i32 addrspace(1)* noalias %gptr1, i32 addrspace(3)* noalias %lptr, i32 %c) #0 { +entry: + %tid = tail call i32 @llvm.r600.read.tidig.x() #1 + %idx = shl i32 %tid, 2 + %gep0 = getelementptr i32, i32 addrspace(1)* %gptr0, i32 %idx + %gep1 = getelementptr i32, i32 addrspace(1)* %gptr1, i32 %idx + %gep2 = getelementptr i32, i32 addrspace(3)* %lptr, i32 %tid + %cmp0 = icmp eq i32 %c, 0 + br i1 %cmp0, label %for.body, label %exit + +for.body: ; preds = %for.body, %entry + %i = phi i32 [ 0, %entry ], [ %i.inc, %for.body ] + %gptr0.phi = phi i32 addrspace(1)* [ %gep0, %entry ], [ %gep0.inc, %for.body ] + %gptr1.phi = phi i32 addrspace(1)* [ %gep1, %entry ], [ %gep1.inc, %for.body ] + %lptr0.phi = phi i32 addrspace(3)* [ %gep2, %entry ], [ %gep2.inc, %for.body ] + %lptr1 = getelementptr i32, i32 addrspace(3)* %lptr0.phi, i32 1 + %val0 = load i32, i32 addrspace(1)* %gep0 + store i32 %val0, i32 addrspace(3)* %lptr0.phi + %val1 = load i32, i32 addrspace(1)* %gep1 + store i32 %val1, i32 addrspace(3)* %lptr1 + %gep0.inc = getelementptr i32, i32 addrspace(1)* %gptr0.phi, i32 4 + %gep1.inc = getelementptr i32, i32 addrspace(1)* %gptr1.phi, i32 4 + %gep2.inc = getelementptr i32, i32 addrspace(3)* %lptr0.phi, i32 4 + %i.inc = add nsw i32 %i, 1 + %cmp1 = icmp ne i32 %i, 256 + br i1 %cmp1, label %for.body, label %exit + +exit: ; preds = %for.body, %entry + ret void +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.tidig.x() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.tgid.x() #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { convergent nounwind } diff --git a/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll index 5e4654abd91b..e657991557e3 100644 --- a/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll +++ b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll @@ -10,13 +10,13 @@ declare void @llvm.AMDGPU.barrier.local() #1 ; CHECK: BB0_1: ; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]], ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] -; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], vcc, 4, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR4]] -; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], vcc, 0x80, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]] -; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], vcc, 0x84, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x84]] -; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, 0x100, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]] ; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:1 @@ -66,5 +66,5 @@ for.end: ; preds = %for.body } attributes #0 = { nounwind readnone } -attributes #1 = { noduplicate nounwind } +attributes #1 = { convergent nounwind } attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/AMDGPU/ds-sub-offset.ll b/test/CodeGen/AMDGPU/ds-sub-offset.ll new file mode 100644 index 000000000000..7d6eddb01993 --- /dev/null +++ b/test/CodeGen/AMDGPU/ds-sub-offset.ll @@ -0,0 +1,125 @@ +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s + +declare void @llvm.AMDGPU.barrier.local() #2 +declare i32 @llvm.r600.read.tidig.x() #0 + +@lds.obj = addrspace(3) global [256 x i32] undef, align 4 + +; GCN-LABEL: {{^}}write_ds_sub0_offset0_global: +; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 2, v0 +; GCN: v_sub_i32_e32 [[BASEPTR:v[0-9]+]], vcc, 0, [[SHL]] +; GCN: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b +; GCN: ds_write_b32 [[BASEPTR]], [[VAL]] offset:12 +define void @write_ds_sub0_offset0_global() #0 { +entry: + %x.i = call i32 @llvm.r600.read.tidig.x() #1 + %sub1 = sub i32 0, %x.i + %tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1 + %arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3 + store i32 123, i32 addrspace(3)* %arrayidx + ret void +} + +; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_max_offset: +; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 +; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13 +; GCN: ds_write_b8 [[NEG]], [[K]] offset:65535 +define void @add_x_shl_neg_to_sub_max_offset() #1 { + %x.i = call i32 @llvm.r600.read.tidig.x() #0 + %neg = sub i32 0, %x.i + %shl = shl i32 %neg, 2 + %add = add i32 65535, %shl + %ptr = inttoptr i32 %add to i8 addrspace(3)* + store i8 13, i8 addrspace(3)* %ptr + ret void +} + +; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_max_offset_p1: +; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 +; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x10000, [[SCALED]] +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13 +; GCN: ds_write_b8 [[NEG]], [[K]]{{$}} +define void @add_x_shl_neg_to_sub_max_offset_p1() #1 { + %x.i = call i32 @llvm.r600.read.tidig.x() #0 + %neg = sub i32 0, %x.i + %shl = shl i32 %neg, 2 + %add = add i32 65536, %shl + %ptr = inttoptr i32 %add to i8 addrspace(3)* + store i8 13, i8 addrspace(3)* %ptr + ret void +} + +; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_multi_use: +; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 +; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13 +; GCN-NOT: v_sub +; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}} +; GCN-NOT: v_sub +; GCN: ds_write_b32 [[NEG]], [[K]] offset:456{{$}} +; GCN: s_endpgm +define void @add_x_shl_neg_to_sub_multi_use() #1 { + %x.i = call i32 @llvm.r600.read.tidig.x() #0 + %neg = sub i32 0, %x.i + %shl = shl i32 %neg, 2 + %add0 = add i32 123, %shl + %add1 = add i32 456, %shl + %ptr0 = inttoptr i32 %add0 to i32 addrspace(3)* + store volatile i32 13, i32 addrspace(3)* %ptr0 + %ptr1 = inttoptr i32 %add1 to i32 addrspace(3)* + store volatile i32 13, i32 addrspace(3)* %ptr1 + ret void +} + +; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_multi_use_same_offset: +; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 +; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13 +; GCN-NOT: v_sub +; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}} +; GCN-NOT: v_sub +; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}} +; GCN: s_endpgm +define void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 { + %x.i = call i32 @llvm.r600.read.tidig.x() #0 + %neg = sub i32 0, %x.i + %shl = shl i32 %neg, 2 + %add = add i32 123, %shl + %ptr = inttoptr i32 %add to i32 addrspace(3)* + store volatile i32 13, i32 addrspace(3)* %ptr + store volatile i32 13, i32 addrspace(3)* %ptr + ret void +} + +; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset: +; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 +; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] +; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset0:254 offset1:255 +define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 { + %x.i = call i32 @llvm.r600.read.tidig.x() #0 + %neg = sub i32 0, %x.i + %shl = shl i32 %neg, 2 + %add = add i32 1019, %shl + %ptr = inttoptr i32 %add to i64 addrspace(3)* + store i64 123, i64 addrspace(3)* %ptr, align 4 + ret void +} + +; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1: +; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 +; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x3fc, [[SCALED]] +; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset1:1{{$}} +define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #1 { + %x.i = call i32 @llvm.r600.read.tidig.x() #0 + %neg = sub i32 0, %x.i + %shl = shl i32 %neg, 2 + %add = add i32 1020, %shl + %ptr = inttoptr i32 %add to i64 addrspace(3)* + store i64 123, i64 addrspace(3)* %ptr, align 4 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } +attributes #2 = { nounwind convergent } diff --git a/test/CodeGen/AMDGPU/ds_read2.ll b/test/CodeGen/AMDGPU/ds_read2.ll index ec04f8b1acd6..5170d9c82712 100644 --- a/test/CodeGen/AMDGPU/ds_read2.ll +++ b/test/CodeGen/AMDGPU/ds_read2.ll @@ -216,10 +216,8 @@ define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x f ret void } -; We should be able to merge in this case, but probably not worth the effort. -; SI-NOT: ds_read2_b32 -; SI: ds_read_b32 -; SI: ds_read_b32 +; SI-LABEL: {{^}}read2_ptr_is_subreg_f32: +; SI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:8{{$}} ; SI: s_endpgm define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -507,9 +505,9 @@ declare i32 @llvm.r600.read.tidig.x() #1 ; Function Attrs: nounwind readnone declare i32 @llvm.r600.read.tidig.y() #1 -; Function Attrs: noduplicate nounwind +; Function Attrs: convergent nounwind declare void @llvm.AMDGPU.barrier.local() #2 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } -attributes #2 = { noduplicate nounwind } +attributes #2 = { convergent nounwind } diff --git a/test/CodeGen/AMDGPU/ds_read2_superreg.ll b/test/CodeGen/AMDGPU/ds_read2_superreg.ll index 842c2d8bc339..0061aaf2cdbd 100644 --- a/test/CodeGen/AMDGPU/ds_read2_superreg.ll +++ b/test/CodeGen/AMDGPU/ds_read2_superreg.ll @@ -35,14 +35,11 @@ define void @simple_read2_v2f32_superreg(<2 x float> addrspace(1)* %out) #0 { ret void } -; FIXME: Shuffling to new superregister ; CI-LABEL: {{^}}simple_read2_v4f32_superreg_align4: -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}} -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_Y:[0-9]+]]:[[REG_X:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}} -; CI-DAG: v_mov_b32_e32 v[[COPY_REG_Y:[0-9]+]], v[[REG_Y]] -; CI-DAG: v_mov_b32_e32 v[[COPY_REG_Z:[0-9]+]], v[[REG_Z]] -; CI-DAG: v_add_f32_e32 v[[ADD0:[0-9]+]], v[[COPY_REG_Z]], v[[REG_X]] -; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[REG_W]], v[[COPY_REG_Y]] +; CI-DAG: ds_read2_b32 v{{\[}}[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}} +; CI-DAG: ds_read2_b32 v{{\[}}[[REG_Z:[0-9]+]]:[[REG_W:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} +; CI-DAG: v_add_f32_e32 v[[ADD0:[0-9]+]], v[[REG_Z]], v[[REG_X]] +; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[REG_W]], v[[REG_Y]] ; CI: v_add_f32_e32 v[[ADD2:[0-9]+]], v[[ADD1]], v[[ADD0]] ; CI: buffer_store_dword v[[ADD2]] ; CI: s_endpgm @@ -88,8 +85,13 @@ define void @simple_read2_v3f32_superreg_align4(float addrspace(1)* %out) #0 { } ; CI-LABEL: {{^}}simple_read2_v4f32_superreg_align8: -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}} -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}} +; CI-DAG: ds_read2_b64 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}} + +; FIXME: These moves shouldn't be necessary, it should be able to +; store the same register if offset1 was the non-zero offset. + +; CI: v_mov_b32 +; CI: v_mov_b32 ; CI: buffer_store_dwordx4 ; CI: s_endpgm define void @simple_read2_v4f32_superreg_align8(<4 x float> addrspace(1)* %out) #0 { @@ -102,8 +104,9 @@ define void @simple_read2_v4f32_superreg_align8(<4 x float> addrspace(1)* %out) } ; CI-LABEL: {{^}}simple_read2_v4f32_superreg: -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}} -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}} +; CI: ds_read2_b64 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}} +; CI: v_mov_b32 +; CI: v_mov_b32 ; CI: buffer_store_dwordx4 ; CI: s_endpgm define void @simple_read2_v4f32_superreg(<4 x float> addrspace(1)* %out) #0 { @@ -115,19 +118,16 @@ define void @simple_read2_v4f32_superreg(<4 x float> addrspace(1)* %out) #0 { ret void } +; FIXME: Extra moves shuffling superregister ; CI-LABEL: {{^}}simple_read2_v8f32_superreg: -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:7 offset1:6{{$}} -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT5:[0-9]+]]:[[REG_ELT4:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:5 offset1:4{{$}} -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT3:[0-9]+]]:[[REG_ELT2:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}} -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT1:[0-9]+]]:[[REG_ELT0:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}} -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword +; CI: ds_read2_b64 v{{\[}}[[REG_ELT3:[0-9]+]]:[[REG_ELT7:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:3{{$}} +; CI: v_mov_b32 +; CI: v_mov_b32 +; CI: ds_read2_b64 v{{\[}}[[REG_ELT6:[0-9]+]]:[[REG_ELT5:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2{{$}} +; CI: v_mov_b32 +; CI: v_mov_b32 +; CI: buffer_store_dwordx4 +; CI: buffer_store_dwordx4 ; CI: s_endpgm define void @simple_read2_v8f32_superreg(<8 x float> addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -138,33 +138,24 @@ define void @simple_read2_v8f32_superreg(<8 x float> addrspace(1)* %out) #0 { ret void } +; FIXME: Extra moves shuffling superregister ; CI-LABEL: {{^}}simple_read2_v16f32_superreg: -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:15 offset1:14{{$}} -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:13 offset1:12{{$}} -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:11 offset1:10{{$}} -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:9 offset1:8{{$}} -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:7 offset1:6{{$}} -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT5:[0-9]+]]:[[REG_ELT4:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:5 offset1:4{{$}} -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT3:[0-9]+]]:[[REG_ELT2:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}} -; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT1:[0-9]+]]:[[REG_ELT0:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}} +; CI: ds_read2_b64 v{{\[}}[[REG_ELT11:[0-9]+]]:[[REG_ELT15:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:3{{$}} +; CI: v_mov_b32 +; CI: v_mov_b32 +; CI: ds_read2_b64 v{{\[}}[[REG_ELT14:[0-9]+]]:[[REG_ELT13:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:5 offset1:7{{$}} +; CI: ds_read2_b64 v{{\[}}[[REG_ELT14:[0-9]+]]:[[REG_ELT13:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:6 offset1:4{{$}} +; CI: v_mov_b32 +; CI: v_mov_b32 +; CI: ds_read2_b64 v{{\[}}[[REG_ELT12:[0-9]+]]:[[REG_ELT10:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2{{$}} +; CI: v_mov_b32 +; CI: v_mov_b32 ; CI: s_waitcnt lgkmcnt(0) -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword -; CI: buffer_store_dword +; CI: buffer_store_dwordx4 +; CI: buffer_store_dwordx4 +; CI: buffer_store_dwordx4 +; CI: buffer_store_dwordx4 ; CI: s_endpgm define void @simple_read2_v16f32_superreg(<16 x float> addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 @@ -238,9 +229,9 @@ declare i32 @llvm.r600.read.tidig.x() #1 ; Function Attrs: nounwind readnone declare i32 @llvm.r600.read.tidig.y() #1 -; Function Attrs: noduplicate nounwind +; Function Attrs: convergent nounwind declare void @llvm.AMDGPU.barrier.local() #2 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } -attributes #2 = { noduplicate nounwind } +attributes #2 = { convergent nounwind } diff --git a/test/CodeGen/AMDGPU/ds_read2st64.ll b/test/CodeGen/AMDGPU/ds_read2st64.ll index e2e441214b4a..4a0571ea16f2 100644 --- a/test/CodeGen/AMDGPU/ds_read2st64.ll +++ b/test/CodeGen/AMDGPU/ds_read2st64.ll @@ -65,7 +65,7 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add ; SI-LABEL: @simple_read2st64_f32_over_max_offset ; SI-NOT: ds_read2st64_b32 -; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}} ; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 ; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]] ; SI: s_endpgm @@ -197,7 +197,7 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a ; SI-LABEL: @simple_read2st64_f64_over_max_offset ; SI-NOT: ds_read2st64_b64 -; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}} ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]] ; SI: s_endpgm @@ -264,9 +264,5 @@ declare i32 @llvm.r600.read.tidig.x() #1 ; Function Attrs: nounwind readnone declare i32 @llvm.r600.read.tidig.y() #1 -; Function Attrs: noduplicate nounwind -declare void @llvm.AMDGPU.barrier.local() #2 - attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } -attributes #2 = { noduplicate nounwind } diff --git a/test/CodeGen/AMDGPU/ds_write2.ll b/test/CodeGen/AMDGPU/ds_write2.ll index d4973e377b59..9d3a293f3b89 100644 --- a/test/CodeGen/AMDGPU/ds_write2.ll +++ b/test/CodeGen/AMDGPU/ds_write2.ll @@ -345,8 +345,9 @@ define void @store_constant_disjoint_offsets() { ; SI-LABEL: @store_misaligned64_constant_offsets ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} -; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 -; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3 +; SI-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 +; SI-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3 +; SI: s_endpgm define void @store_misaligned64_constant_offsets() { store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4 store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4 @@ -430,9 +431,9 @@ declare i32 @llvm.r600.read.tidig.x() #1 ; Function Attrs: nounwind readnone declare i32 @llvm.r600.read.tidig.y() #1 -; Function Attrs: noduplicate nounwind +; Function Attrs: convergent nounwind declare void @llvm.AMDGPU.barrier.local() #2 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } -attributes #2 = { noduplicate nounwind } +attributes #2 = { convergent nounwind } diff --git a/test/CodeGen/AMDGPU/ds_write2st64.ll b/test/CodeGen/AMDGPU/ds_write2st64.ll index 358aa6a9e363..5a1024ccf6d7 100644 --- a/test/CodeGen/AMDGPU/ds_write2st64.ll +++ b/test/CodeGen/AMDGPU/ds_write2st64.ll @@ -109,9 +109,9 @@ declare i32 @llvm.r600.read.tidig.x() #1 ; Function Attrs: nounwind readnone declare i32 @llvm.r600.read.tidig.y() #1 -; Function Attrs: noduplicate nounwind +; Function Attrs: convergent nounwind declare void @llvm.AMDGPU.barrier.local() #2 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } -attributes #2 = { noduplicate nounwind } +attributes #2 = { convergent nounwind } diff --git a/test/CodeGen/AMDGPU/dynamic_stackalloc.ll b/test/CodeGen/AMDGPU/dynamic_stackalloc.ll new file mode 100644 index 000000000000..f4409a0984a9 --- /dev/null +++ b/test/CodeGen/AMDGPU/dynamic_stackalloc.ll @@ -0,0 +1,11 @@ +; RUN: not llc -march=amdgcn -mcpu=tahiti -mattr=+promote-alloca -verify-machineinstrs < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s 2>&1 | FileCheck %s +; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s + +; CHECK: error: unsupported dynamic alloca in test_dynamic_stackalloc + +define void @test_dynamic_stackalloc(i32 addrspace(1)* %out, i32 %n) { + %alloca = alloca i32, i32 %n + store volatile i32 0, i32* %alloca + ret void +} diff --git a/test/CodeGen/AMDGPU/extract-vector-elt-i64.ll b/test/CodeGen/AMDGPU/extract-vector-elt-i64.ll new file mode 100644 index 000000000000..e32559139623 --- /dev/null +++ b/test/CodeGen/AMDGPU/extract-vector-elt-i64.ll @@ -0,0 +1,43 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; How the replacement of i64 stores with v2i32 stores resulted in +; breaking other users of the bitcast if they already existed + +; GCN-LABEL: {{^}}extract_vector_elt_select_error: +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dwordx2 +define void @extract_vector_elt_select_error(i32 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %val) nounwind { + %vec = bitcast i64 %val to <2 x i32> + %elt0 = extractelement <2 x i32> %vec, i32 0 + %elt1 = extractelement <2 x i32> %vec, i32 1 + + store volatile i32 %elt0, i32 addrspace(1)* %out + store volatile i32 %elt1, i32 addrspace(1)* %out + store volatile i64 %val, i64 addrspace(1)* %in + ret void +} + + +define void @extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo) nounwind { + %p0 = extractelement <2 x i64> %foo, i32 0 + %p1 = extractelement <2 x i64> %foo, i32 1 + %out1 = getelementptr i64, i64 addrspace(1)* %out, i32 1 + store volatile i64 %p1, i64 addrspace(1)* %out + store volatile i64 %p0, i64 addrspace(1)* %out1 + ret void +} + +define void @dyn_extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo, i32 %elt) nounwind { + %dynelt = extractelement <2 x i64> %foo, i32 %elt + store volatile i64 %dynelt, i64 addrspace(1)* %out + ret void +} + +define void @dyn_extract_vector_elt_v2i64_2(i64 addrspace(1)* %out, <2 x i64> addrspace(1)* %foo, i32 %elt, <2 x i64> %arst) nounwind { + %load = load volatile <2 x i64>, <2 x i64> addrspace(1)* %foo + %or = or <2 x i64> %load, %arst + %dynelt = extractelement <2 x i64> %or, i32 %elt + store volatile i64 %dynelt, i64 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/AMDGPU/fadd64.ll b/test/CodeGen/AMDGPU/fadd64.ll index 485c55870c47..19c17289da3d 100644 --- a/test/CodeGen/AMDGPU/fadd64.ll +++ b/test/CodeGen/AMDGPU/fadd64.ll @@ -1,14 +1,44 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -; CHECK: {{^}}fadd_f64: +; CHECK-LABEL: {{^}}v_fadd_f64: ; CHECK: v_add_f64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}} - -define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1, - double addrspace(1)* %in2) { - %r0 = load double, double addrspace(1)* %in1 - %r1 = load double, double addrspace(1)* %in2 - %r2 = fadd double %r0, %r1 - store double %r2, double addrspace(1)* %out - ret void +define void @v_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1, + double addrspace(1)* %in2) { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = fadd double %r0, %r1 + store double %r2, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}s_fadd_f64: +; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @s_fadd_f64(double addrspace(1)* %out, double %r0, double %r1) { + %r2 = fadd double %r0, %r1 + store double %r2, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}v_fadd_v2f64: +; CHECK: v_add_f64 +; CHECK: v_add_f64 +; CHECK: buffer_store_dwordx4 +define void @v_fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1, + <2 x double> addrspace(1)* %in2) { + %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1 + %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2 + %r2 = fadd <2 x double> %r0, %r1 + store <2 x double> %r2, <2 x double> addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}s_fadd_v2f64: +; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} +; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} +; CHECK: buffer_store_dwordx4 +define void @s_fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %r0, <2 x double> %r1) { + %r2 = fadd <2 x double> %r0, %r1 + store <2 x double> %r2, <2 x double> addrspace(1)* %out + ret void } diff --git a/test/CodeGen/AMDGPU/fceil64.ll b/test/CodeGen/AMDGPU/fceil64.ll index e8c34f0141e4..c8ef5b101c4d 100644 --- a/test/CodeGen/AMDGPU/fceil64.ll +++ b/test/CodeGen/AMDGPU/fceil64.ll @@ -17,12 +17,12 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone ; SI: s_lshr_b64 ; SI: s_not_b64 ; SI: s_and_b64 -; SI: cmp_gt_i32 -; SI: cndmask_b32 -; SI: cndmask_b32 -; SI: cmp_lt_i32 -; SI: cndmask_b32 -; SI: cndmask_b32 +; SI-DAG: cmp_gt_i32 +; SI-DAG: cndmask_b32 +; SI-DAG: cndmask_b32 +; SI-DAG: cmp_lt_i32 +; SI-DAG: cndmask_b32 +; SI-DAG: cndmask_b32 ; SI-DAG: v_cmp_lt_f64 ; SI-DAG: v_cmp_lg_f64 ; SI: s_and_b64 diff --git a/test/CodeGen/AMDGPU/fcmp.ll b/test/CodeGen/AMDGPU/fcmp.ll index 5207ab57bade..97d954fcc3c2 100644 --- a/test/CodeGen/AMDGPU/fcmp.ll +++ b/test/CodeGen/AMDGPU/fcmp.ll @@ -20,7 +20,7 @@ entry: ; CHECK: {{^}}fcmp_br: ; CHECK: SET{{[N]*}}E_DX10 * T{{[0-9]+\.[XYZW],}} -; CHECK-NEXT {{[0-9]+(5.0}} +; CHECK-NEXT: {{[0-9]+\(5.0}} define void @fcmp_br(i32 addrspace(1)* %out, float %in) { entry: diff --git a/test/CodeGen/AMDGPU/flat-address-space.ll b/test/CodeGen/AMDGPU/flat-address-space.ll index 8ceca078f2d6..86e0c07323bb 100644 --- a/test/CodeGen/AMDGPU/flat-address-space.ll +++ b/test/CodeGen/AMDGPU/flat-address-space.ll @@ -7,39 +7,16 @@ ; specialize away generic pointer accesses. -; CHECK-LABEL: {{^}}branch_use_flat_i32: -; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} -; CHECK: s_endpgm -define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 { -entry: - %cmp = icmp ne i32 %c, 0 - br i1 %cmp, label %local, label %global - -local: - %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)* - br label %end - -global: - %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)* - br label %end - -end: - %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ] - store i32 %x, i32 addrspace(4)* %fptr, align 4 -; %val = load i32, i32 addrspace(4)* %fptr, align 4 -; store i32 %val, i32 addrspace(1)* %out, align 4 - ret void -} - - - ; These testcases might become useless when there are optimizations to ; remove generic pointers. ; CHECK-LABEL: {{^}}store_flat_i32: -; CHECK: v_mov_b32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}} -; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}} -; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}} +; CHECK-DAG: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]], +; CHECK-DAG: s_load_dword s[[SDATA:[0-9]+]], +; CHECK: s_waitcnt lgkmcnt(0) +; CHECK-DAG: v_mov_b32_e32 v[[DATA:[0-9]+]], s[[SDATA]] +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] ; CHECK: flat_store_dword v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 { %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)* @@ -83,7 +60,7 @@ define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 { -; CHECK-LABEL @load_flat_i32: +; CHECK-LABEL: load_flat_i32: ; CHECK: flat_load_dword define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 { %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)* @@ -92,7 +69,7 @@ define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noa ret void } -; CHECK-LABEL @load_flat_i64: +; CHECK-LABEL: load_flat_i64: ; CHECK: flat_load_dwordx2 define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 { %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)* @@ -101,7 +78,7 @@ define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa ret void } -; CHECK-LABEL @load_flat_v4i32: +; CHECK-LABEL: load_flat_v4i32: ; CHECK: flat_load_dwordx4 define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 { %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)* @@ -110,7 +87,7 @@ define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> add ret void } -; CHECK-LABEL @sextload_flat_i8: +; CHECK-LABEL: sextload_flat_i8: ; CHECK: flat_load_sbyte define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 { %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)* @@ -120,7 +97,7 @@ define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* n ret void } -; CHECK-LABEL @zextload_flat_i8: +; CHECK-LABEL: zextload_flat_i8: ; CHECK: flat_load_ubyte define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 { %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)* @@ -130,7 +107,7 @@ define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* n ret void } -; CHECK-LABEL @sextload_flat_i16: +; CHECK-LABEL: sextload_flat_i16: ; CHECK: flat_load_sshort define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 { %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)* @@ -140,7 +117,7 @@ define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* ret void } -; CHECK-LABEL @zextload_flat_i16: +; CHECK-LABEL: zextload_flat_i16: ; CHECK: flat_load_ushort define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 { %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)* @@ -150,35 +127,9 @@ define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* ret void } - - -; TODO: This should not be zero when registers are used for small -; scratch allocations again. - -; Check for prologue initializing special SGPRs pointing to scratch. -; CHECK-LABEL: {{^}}store_flat_scratch: -; CHECK: s_movk_i32 flat_scratch_lo, 0 -; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 0x28{{$}} -; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0x0{{$}} -; CHECK: flat_store_dword -; CHECK: s_barrier -; CHECK: flat_load_dword -define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 { - %alloca = alloca i32, i32 9, align 4 - %x = call i32 @llvm.r600.read.tidig.x() #3 - %pptr = getelementptr i32, i32* %alloca, i32 %x - %fptr = addrspacecast i32* %pptr to i32 addrspace(4)* - store i32 %x, i32 addrspace(4)* %fptr - ; Dummy call - call void @llvm.AMDGPU.barrier.local() #1 - %reload = load i32, i32 addrspace(4)* %fptr, align 4 - store i32 %reload, i32 addrspace(1)* %out, align 4 - ret void -} - declare void @llvm.AMDGPU.barrier.local() #1 declare i32 @llvm.r600.read.tidig.x() #3 attributes #0 = { nounwind } -attributes #1 = { nounwind noduplicate } +attributes #1 = { nounwind convergent } attributes #3 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/flat-scratch-reg.ll b/test/CodeGen/AMDGPU/flat-scratch-reg.ll new file mode 100644 index 000000000000..e2ae3353ae1d --- /dev/null +++ b/test/CodeGen/AMDGPU/flat-scratch-reg.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -march=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=CI +; RUN: llc < %s -march=amdgcn -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI + +; GCN-LABEL: {{^}}no_vcc_no_flat: +; GCN: ; NumSgprs: 8 +define void @no_vcc_no_flat() { +entry: + call void asm sideeffect "", "~{SGPR7}"() + ret void +} + +; GCN-LABEL: {{^}}vcc_no_flat: +; GCN: ; NumSgprs: 10 +define void @vcc_no_flat() { +entry: + call void asm sideeffect "", "~{SGPR7},~{VCC}"() + ret void +} + +; GCN-LABEL: {{^}}no_vcc_flat: +; CI: ; NumSgprs: 12 +; VI: ; NumSgprs: 14 +define void @no_vcc_flat() { +entry: + call void asm sideeffect "", "~{SGPR7},~{FLAT_SCR}"() + ret void +} + +; GCN-LABEL: {{^}}vcc_flat: +; CI: ; NumSgprs: 12 +; VI: ; NumSgprs: 14 +define void @vcc_flat() { +entry: + call void asm sideeffect "", "~{SGPR7},~{VCC},~{FLAT_SCR}"() + ret void +} diff --git a/test/CodeGen/AMDGPU/fma-combine.ll b/test/CodeGen/AMDGPU/fma-combine.ll index bd574b877117..6f3437048ed8 100644 --- a/test/CodeGen/AMDGPU/fma-combine.ll +++ b/test/CodeGen/AMDGPU/fma-combine.ll @@ -364,5 +364,205 @@ define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias % ret void } +; +; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) +; + +; FUNC-LABEL: {{^}}test_f32_mul_add_x_one_y: +; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]] +define void @test_f32_mul_add_x_one_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %a = fadd float %x, 1.0 + %m = fmul float %a, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_one: +; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]] +define void @test_f32_mul_y_add_x_one(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %a = fadd float %x, 1.0 + %m = fmul float %y, %a + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_add_x_negone_y: +; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %a = fadd float %x, -1.0 + %m = fmul float %a, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_negone: +; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %a = fadd float %x, -1.0 + %m = fmul float %y, %a + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_sub_one_x_y: +; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]] +define void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float 1.0, %x + %m = fmul float %s, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_sub_one_x: +; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]] +define void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float 1.0, %x + %m = fmul float %y, %s + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_sub_negone_x_y: +; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float -1.0, %x + %m = fmul float %s, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_sub_negone_x: +; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float -1.0, %x + %m = fmul float %y, %s + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_sub_x_one_y: +; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float %x, 1.0 + %m = fmul float %s, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_one: +; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]] +define void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float %x, 1.0 + %m = fmul float %y, %s + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_sub_x_negone_y: +; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]] +define void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float %x, -1.0 + %m = fmul float %s, %y + store float %m, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_negone: +; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]] +define void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %s = fsub float %x, -1.0 + %m = fmul float %y, %s + store float %m, float addrspace(1)* %out + ret void +} + +; +; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) +; + +; FUNC-LABEL: {{^}}test_f32_interp: +; SI: v_mad_f32 [[VR:v[0-9]]], -[[VT:v[0-9]]], [[VY:v[0-9]]], [[VY]] +; SI: v_mac_f32_e32 [[VR]], [[VT]], [[VX:v[0-9]]] +define void @test_f32_interp(float addrspace(1)* %out, + float addrspace(1)* %in1, + float addrspace(1)* %in2, + float addrspace(1)* %in3) { + %x = load float, float addrspace(1)* %in1 + %y = load float, float addrspace(1)* %in2 + %t = load float, float addrspace(1)* %in3 + %t1 = fsub float 1.0, %t + %tx = fmul float %x, %t + %ty = fmul float %y, %t1 + %r = fadd float %tx, %ty + store float %r, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_f64_interp: +; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], -[[VT:v\[[0-9]+:[0-9]+\]]], [[VY:v\[[0-9]+:[0-9]+\]]], [[VY]] +; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], [[VX:v\[[0-9]+:[0-9]+\]]], [[VT]], [[VR]] +define void @test_f64_interp(double addrspace(1)* %out, + double addrspace(1)* %in1, + double addrspace(1)* %in2, + double addrspace(1)* %in3) { + %x = load double, double addrspace(1)* %in1 + %y = load double, double addrspace(1)* %in2 + %t = load double, double addrspace(1)* %in3 + %t1 = fsub double 1.0, %t + %tx = fmul double %x, %t + %ty = fmul double %y, %t1 + %r = fadd double %tx, %ty + store double %r, double addrspace(1)* %out + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } diff --git a/test/CodeGen/AMDGPU/fmax_legacy.ll b/test/CodeGen/AMDGPU/fmax_legacy.ll index 413957d2982a..d374fb67350c 100644 --- a/test/CodeGen/AMDGPU/fmax_legacy.ll +++ b/test/CodeGen/AMDGPU/fmax_legacy.ll @@ -87,6 +87,46 @@ define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace( ret void } +; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32: +; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]] +; EG: MAX +define void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1 + + %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0 + %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1 + + %cmp = fcmp ogt <1 x float> %a, %b + %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b + store <1 x float> %val, <1 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32: +; SI-SAFE: v_max_legacy_f32_e32 +; SI-SAFE: v_max_legacy_f32_e32 +; SI-SAFE: v_max_legacy_f32_e32 +; SI-NONAN: v_max_f32_e32 +; SI-NONAN: v_max_f32_e32 +; SI-NONAN: v_max_f32_e32 +define void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1 + + %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0 + %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1 + + %cmp = fcmp ogt <3 x float> %a, %b + %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b + store <3 x float> %val, <3 x float> addrspace(1)* %out + ret void +} ; FUNC-LABEL: @test_fmax_legacy_ogt_f32_multi_use ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} diff --git a/test/CodeGen/AMDGPU/fmin_legacy.ll b/test/CodeGen/AMDGPU/fmin_legacy.ll index 6a625c239d76..52fc3d0d251a 100644 --- a/test/CodeGen/AMDGPU/fmin_legacy.ll +++ b/test/CodeGen/AMDGPU/fmin_legacy.ll @@ -96,6 +96,69 @@ define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace( ret void } +; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v1f32: +; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] +; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]] +define void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1 + + %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0 + %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1 + + %cmp = fcmp ult <1 x float> %a, %b + %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b + store <1 x float> %val, <1 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v2f32: +; SI: buffer_load_dwordx2 +; SI: buffer_load_dwordx2 +; SI-SAFE: v_min_legacy_f32_e32 +; SI-SAFE: v_min_legacy_f32_e32 + +; SI-NONAN: v_min_f32_e32 +; SI-NONAN: v_min_f32_e32 +define void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %gep.0, i32 1 + + %a = load <2 x float>, <2 x float> addrspace(1)* %gep.0 + %b = load <2 x float>, <2 x float> addrspace(1)* %gep.1 + + %cmp = fcmp ult <2 x float> %a, %b + %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + store <2 x float> %val, <2 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v3f32: +; SI-SAFE: v_min_legacy_f32_e32 +; SI-SAFE: v_min_legacy_f32_e32 +; SI-SAFE: v_min_legacy_f32_e32 + +; SI-NONAN: v_min_f32_e32 +; SI-NONAN: v_min_f32_e32 +; SI-NONAN: v_min_f32_e32 +define void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid + %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1 + + %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0 + %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1 + + %cmp = fcmp ult <3 x float> %a, %b + %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b + store <3 x float> %val, <3 x float> addrspace(1)* %out + ret void +} + ; FUNC-LABEL: @test_fmin_legacy_ole_f32_multi_use ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 diff --git a/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll b/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll new file mode 100644 index 000000000000..1ee92b2f7c08 --- /dev/null +++ b/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll @@ -0,0 +1,102 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't +; make add an instruction if the fadd has more than one use. + +declare float @llvm.fabs.f32(float) #1 + +; GCN-LABEL: {{^}}multiple_fadd_use_test: +; GCN: v_max_legacy_f32_e64 [[A16:v[0-9]+]], +; GCN: v_add_f32_e32 [[A17:v[0-9]+]], [[A16]], [[A16]] +; GCN: v_mul_f32_e32 [[A18:v[0-9]+]], [[A17]], [[A17]] +; GCN: v_mad_f32 [[A20:v[0-9]+]], -[[A18]], [[A17]], 1.0 +; GCN: buffer_store_dword [[A20]] +define void @multiple_fadd_use_test(float addrspace(1)* %out, float %x, float %y, float %z) #0 { + %a11 = fadd fast float %y, -1.0 + %a12 = call float @llvm.fabs.f32(float %a11) + %a13 = fadd fast float %x, -1.0 + %a14 = call float @llvm.fabs.f32(float %a13) + %a15 = fcmp ogt float %a12, %a14 + %a16 = select i1 %a15, float %a12, float %a14 + %a17 = fmul fast float %a16, 2.0 + %a18 = fmul fast float %a17, %a17 + %a19 = fmul fast float %a18, %a17 + %a20 = fsub fast float 1.0, %a19 + store float %a20, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}multiple_use_fadd_fmac +; GCN-DAG: v_add_f32_e64 [[MUL2:v[0-9]+]], [[X:s[0-9]+]], s{{[0-9]+}} +; GCN-DAG: v_mac_f32_e64 [[MAD:v[0-9]+]], 2.0, [[X]] +; GCN-DAG: buffer_store_dword [[MUL2]] +; GCN-DAG: buffer_store_dword [[MAD]] +; GCN: s_endpgm +define void @multiple_use_fadd_fmac(float addrspace(1)* %out, float %x, float %y) #0 { + %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 + %mul2 = fmul fast float %x, 2.0 + %mad = fadd fast float %mul2, %y + store float %mul2, float addrspace(1)* %out + store float %mad, float addrspace(1)* %out.gep.1 + ret void +} + +; GCN-LABEL: {{^}}multiple_use_fadd_fmad: +; GCN-DAG: v_add_f32_e64 [[MUL2:v[0-9]+]], |[[X:s[0-9]+]]|, |s{{[0-9]+}}| +; GCN-DAG: v_mad_f32 [[MAD:v[0-9]+]], 2.0, |[[X]]|, v{{[0-9]+}} +; GCN-DAG: buffer_store_dword [[MUL2]] +; GCN-DAG: buffer_store_dword [[MAD]] +; GCN: s_endpgm +define void @multiple_use_fadd_fmad(float addrspace(1)* %out, float %x, float %y) #0 { + %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 + %x.abs = call float @llvm.fabs.f32(float %x) + %mul2 = fmul fast float %x.abs, 2.0 + %mad = fadd fast float %mul2, %y + store float %mul2, float addrspace(1)* %out + store float %mad, float addrspace(1)* %out.gep.1 + ret void +} + +; GCN-LABEL: {{^}}multiple_use_fadd_multi_fmad: +; GCN: v_mad_f32 {{v[0-9]+}}, 2.0, |[[X:s[0-9]+]]|, v{{[0-9]+}} +; GCN: v_mad_f32 {{v[0-9]+}}, 2.0, |[[X]]|, v{{[0-9]+}} +define void @multiple_use_fadd_multi_fmad(float addrspace(1)* %out, float %x, float %y, float %z) #0 { + %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 + %x.abs = call float @llvm.fabs.f32(float %x) + %mul2 = fmul fast float %x.abs, 2.0 + %mad0 = fadd fast float %mul2, %y + %mad1 = fadd fast float %mul2, %z + store float %mad0, float addrspace(1)* %out + store float %mad1, float addrspace(1)* %out.gep.1 + ret void +} + +; GCN-LABEL: {{^}}fmul_x2_xn2: +; GCN: v_mul_f32_e64 [[TMP0:v[0-9]+]], -4.0, [[X:s[0-9]+]] +; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]] +; GCN: buffer_store_dword [[RESULT]] +define void @fmul_x2_xn2(float addrspace(1)* %out, float %x, float %y) #0 { + %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 + %mul2 = fmul fast float %x, 2.0 + %muln2 = fmul fast float %x, -2.0 + %mul = fmul fast float %mul2, %muln2 + store float %mul, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}fmul_x2_xn3: +; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xc0c00000 +; GCN: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[X:s[0-9]+]], [[K]] +; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]] +; GCN: buffer_store_dword [[RESULT]] +define void @fmul_x2_xn3(float addrspace(1)* %out, float %x, float %y) #0 { + %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 + %mul2 = fmul fast float %x, 2.0 + %muln2 = fmul fast float %x, -3.0 + %mul = fmul fast float %mul2, %muln2 + store float %mul, float addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind "unsafe-fp-math"="true" } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/fneg-fabs.ll b/test/CodeGen/AMDGPU/fneg-fabs.ll index 3b4930d9897d..b99d2712ed75 100644 --- a/test/CodeGen/AMDGPU/fneg-fabs.ll +++ b/test/CodeGen/AMDGPU/fneg-fabs.ll @@ -34,8 +34,7 @@ define void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) { ; R600: |PV.{{[XYZW]}}| ; R600: -PV -; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000 -; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] +; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 define void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) { %bc = bitcast i32 %in to float %fabs = call float @llvm.fabs.f32(float %bc) @@ -49,8 +48,7 @@ define void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) { ; R600: |PV.{{[XYZW]}}| ; R600: -PV -; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000 -; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] +; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 define void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) { %bc = bitcast i32 %in to float %fabs = call float @fabs(float %bc) @@ -60,8 +58,7 @@ define void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) { } ; FUNC-LABEL: {{^}}fneg_fabs_f32: -; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000 -; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]] +; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 define void @fneg_fabs_f32(float addrspace(1)* %out, float %in) { %fabs = call float @llvm.fabs.f32(float %in) %fsub = fsub float -0.000000e+00, %fabs @@ -85,11 +82,8 @@ define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) ; R600: |{{(PV|T[0-9])\.[XYZW]}}| ; R600: -PV -; FIXME: SGPR should be used directly for first src operand. -; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000 -; SI-NOT: 0x80000000 -; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]] -; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]] +; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} +; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in) %fsub = fsub <2 x float> , %fabs @@ -97,14 +91,11 @@ define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { ret void } -; FIXME: SGPR should be used directly for first src operand. ; FUNC-LABEL: {{^}}fneg_fabs_v4f32: -; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000 -; SI-NOT: 0x80000000 -; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]] -; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]] -; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]] -; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]] +; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} +; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} +; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} +; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} define void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in) %fsub = fsub <4 x float> , %fabs diff --git a/test/CodeGen/AMDGPU/ftrunc.f64.ll b/test/CodeGen/AMDGPU/ftrunc.f64.ll index 6618d8b5e57e..83a8ad8901d2 100644 --- a/test/CodeGen/AMDGPU/ftrunc.f64.ll +++ b/test/CodeGen/AMDGPU/ftrunc.f64.ll @@ -29,12 +29,12 @@ define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) { ; SI: s_lshr_b64 ; SI: s_not_b64 ; SI: s_and_b64 -; SI: cmp_gt_i32 -; SI: cndmask_b32 -; SI: cndmask_b32 -; SI: cmp_lt_i32 -; SI: cndmask_b32 -; SI: cndmask_b32 +; SI-DAG: cmp_gt_i32 +; SI-DAG: cndmask_b32 +; SI-DAG: cndmask_b32 +; SI-DAG: cmp_lt_i32 +; SI-DAG: cndmask_b32 +; SI-DAG: cndmask_b32 ; SI: s_endpgm define void @ftrunc_f64(double addrspace(1)* %out, double %x) { %y = call double @llvm.trunc.f64(double %x) nounwind readnone diff --git a/test/CodeGen/AMDGPU/gep-address-space.ll b/test/CodeGen/AMDGPU/gep-address-space.ll index 471b0f6b13e7..f5ab390ce686 100644 --- a/test/CodeGen/AMDGPU/gep-address-space.ll +++ b/test/CodeGen/AMDGPU/gep-address-space.ll @@ -11,24 +11,35 @@ define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind { ret void } -define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %array) nounwind { ; CHECK-LABEL: {{^}}use_gep_address_space_large_offset: ; The LDS offset will be 65536 bytes, which is larger than the size of LDS on ; SI, which is why it is being OR'd with the base pointer. ; SI: s_or_b32 ; CI: s_add_i32 ; CHECK: ds_write_b32 +define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %array) nounwind { %p = getelementptr [1024 x i32], [1024 x i32] addrspace(3)* %array, i16 0, i16 16384 store i32 99, i32 addrspace(3)* %p ret void } -define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind { ; CHECK-LABEL: {{^}}gep_as_vector_v4: -; CHECK: s_add_i32 -; CHECK: s_add_i32 -; CHECK: s_add_i32 -; CHECK: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 + +; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}} +; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}} +; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}} +; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}} + +; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64 +; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64 +; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64 +; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64 +; CHECK: s_endpgm +define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind { %p = getelementptr [1024 x i32], <4 x [1024 x i32] addrspace(3)*> %array, <4 x i16> zeroinitializer, <4 x i16> %p0 = extractelement <4 x i32 addrspace(3)*> %p, i32 0 %p1 = extractelement <4 x i32 addrspace(3)*> %p, i32 1 @@ -41,10 +52,15 @@ define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind ret void } -define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind { ; CHECK-LABEL: {{^}}gep_as_vector_v2: -; CHECK: s_add_i32 -; CHECK: s_add_i32 +; SI: s_add_i32 +; SI: s_add_i32 +; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}} +; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}} +; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64 +; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64 +; CHECK: s_endpgm +define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind { %p = getelementptr [1024 x i32], <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> %p0 = extractelement <2 x i32 addrspace(3)*> %p, i32 0 %p1 = extractelement <2 x i32 addrspace(3)*> %p, i32 1 diff --git a/test/CodeGen/AMDGPU/global-constant.ll b/test/CodeGen/AMDGPU/global-constant.ll new file mode 100644 index 000000000000..bc5f031cd4a2 --- /dev/null +++ b/test/CodeGen/AMDGPU/global-constant.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOHSA %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA %s + +@readonly = private unnamed_addr addrspace(2) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0] +@readonly2 = private unnamed_addr addrspace(2) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0] + +; GCN-LABEL: {{^}}main: +; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}} +; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], readonly +; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], 0 +; GCN: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}} +; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], readonly +; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], 0 +; NOHSA: .text +; HSA: .hsatext +; GCN: readonly: +; GCN: readonly2: +define void @main(i32 %index, float addrspace(1)* %out) { + %ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @readonly, i32 0, i32 %index + %val = load float, float addrspace(2)* %ptr + store float %val, float addrspace(1)* %out + %ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @readonly2, i32 0, i32 %index + %val2 = load float, float addrspace(2)* %ptr2 + store float %val2, float addrspace(1)* %out + ret void +} + diff --git a/test/CodeGen/AMDGPU/global-extload-i32.ll b/test/CodeGen/AMDGPU/global-extload-i32.ll index 79b83452939e..e5e6be2199c3 100644 --- a/test/CodeGen/AMDGPU/global-extload-i32.ll +++ b/test/CodeGen/AMDGPU/global-extload-i32.ll @@ -49,8 +49,7 @@ define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i ; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64: ; SI: buffer_load_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx4 ; SI: s_endpgm define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind { %load = load <2 x i32>, <2 x i32> addrspace(1)* %in @@ -63,8 +62,7 @@ define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i ; SI: buffer_load_dwordx2 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx4 ; SI: s_endpgm define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind { %load = load <2 x i32>, <2 x i32> addrspace(1)* %in @@ -75,10 +73,8 @@ define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i ; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64: ; SI: buffer_load_dwordx4 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 ; SI: s_endpgm define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind { %load = load <4 x i32>, <4 x i32> addrspace(1)* %in @@ -93,10 +89,8 @@ define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 ; SI: s_endpgm define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind { %load = load <4 x i32>, <4 x i32> addrspace(1)* %in @@ -106,22 +100,12 @@ define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i } ; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64: -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 ; SI: s_endpgm define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind { %load = load <8 x i32>, <8 x i32> addrspace(1)* %in @@ -131,14 +115,8 @@ define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i } ; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64: -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 @@ -148,15 +126,10 @@ define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 - +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 ; SI: s_endpgm define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind { %load = load <8 x i32>, <8 x i32> addrspace(1)* %in @@ -166,50 +139,34 @@ define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i } ; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64: -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx4 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx4 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx4 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx4 ; SI: s_endpgm define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind { %load = load <16 x i32>, <16 x i32> addrspace(1)* %in @@ -219,40 +176,19 @@ define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 } ; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64 -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword - -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 ; SI: s_endpgm define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind { %load = load <16 x i32>, <16 x i32> addrspace(1)* %in @@ -262,41 +198,15 @@ define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 } ; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64: -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword - -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword - -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 @@ -331,41 +241,25 @@ define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 ; SI-DAG: v_ashrrev_i32 ; SI-DAG: v_ashrrev_i32 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 ; SI: s_endpgm define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind { @@ -376,77 +270,34 @@ define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 } ; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64: -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 - -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 - -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 - -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 -; SI-DAG: buffer_store_dwordx2 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 ; SI: s_endpgm define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind { diff --git a/test/CodeGen/AMDGPU/global_atomics.ll b/test/CodeGen/AMDGPU/global_atomics.ll index 146f0a5fbf26..6786e4a2f375 100644 --- a/test/CodeGen/AMDGPU/global_atomics.ll +++ b/test/CodeGen/AMDGPU/global_atomics.ll @@ -12,7 +12,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset: -; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -105,7 +105,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset: -; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -197,7 +197,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset: -; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -289,7 +289,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset: -; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -381,7 +381,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset: -; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -473,7 +473,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset: -; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -565,7 +565,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset: -; GCN: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -657,7 +657,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset: -; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -749,7 +749,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset: -; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -838,7 +838,7 @@ entry: } ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset: -; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}} +; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: diff --git a/test/CodeGen/AMDGPU/half.ll b/test/CodeGen/AMDGPU/half.ll index bf8f11860b50..a02cbf43c400 100644 --- a/test/CodeGen/AMDGPU/half.ll +++ b/test/CodeGen/AMDGPU/half.ll @@ -105,6 +105,26 @@ define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x hal } ; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg: +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort + +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 + +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 { %ext = fpext <8 x half> %arg to <8 x float> store <8 x float> %ext, <8 x float> addrspace(1)* %out @@ -112,12 +132,24 @@ define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x hal } ; GCN-LABEL: {{^}}extload_f16_to_f64_arg: +; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}} +; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}} +; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]] +; GCN: buffer_store_dwordx2 [[RESULT]] define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 { %ext = fpext half %arg to double store double %ext, double addrspace(1)* %out ret void } + ; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg: +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN: s_endpgm define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 { %ext = fpext <2 x half> %arg to <2 x double> store <2 x double> %ext, <2 x double> addrspace(1)* %out @@ -125,6 +157,16 @@ define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x ha } ; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg: +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN: s_endpgm define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 { %ext = fpext <3 x half> %arg to <3 x double> store <3 x double> %ext, <3 x double> addrspace(1)* %out @@ -132,6 +174,19 @@ define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x ha } ; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg: +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN: s_endpgm define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 { %ext = fpext <4 x half> %arg to <4 x double> store <4 x double> %ext, <4 x double> addrspace(1)* %out @@ -139,6 +194,37 @@ define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x ha } ; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg: +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v + +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v +; GCN-DAG: buffer_load_ushort v + +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 + +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 +; GCN-DAG: v_cvt_f32_f16_e32 + +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 + +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 +; GCN-DAG: v_cvt_f64_f32_e32 + +; GCN: s_endpgm define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 { %ext = fpext <8 x half> %arg to <8 x double> store <8 x double> %ext, <8 x double> addrspace(1)* %out @@ -194,6 +280,12 @@ define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace( } ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32: +; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} +; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]] +; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]] +; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}} +; GCN: s_endpgm define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { %val = load <2 x half>, <2 x half> addrspace(1)* %in %cvt = fpext <2 x half> %val to <2 x float> @@ -226,6 +318,46 @@ define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x } ; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32: +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort + +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 + +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 + +; GCN: s_endpgm define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 { %val = load <16 x half>, <16 x half> addrspace(1)* %in %cvt = fpext <16 x half> %val to <16 x float> @@ -246,6 +378,14 @@ define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace } ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64: +; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} +; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]] +; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]] +; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]] +; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT3_LO:[0-9]+]]:[[CVT3_HI:[0-9]+]]{{\]}}, v[[CVT1]] +; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[CVT2_LO]]:[[CVT3_HI]]{{\]}} +; GCN: s_endpgm define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { %val = load <2 x half>, <2 x half> addrspace(1)* %in %cvt = fpext <2 x half> %val to <2 x double> @@ -254,6 +394,25 @@ define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x } ; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64: + +; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]] +; SI: v_lshr_b64 v{{\[[0-9]+:[0-9]+\]}}, [[LOAD]], 32 +; VI: v_lshrrev_b64 v{{\[[0-9]+:[0-9]+\]}}, 32, [[LOAD]] +; GCN: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}} + +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN: v_cvt_f32_f16_e32 +; GCN-NOT: v_cvt_f32_f16_e32 + +; GCN: v_cvt_f64_f32_e32 +; GCN: v_cvt_f64_f32_e32 +; GCN: v_cvt_f64_f32_e32 +; GCN-NOT: v_cvt_f64_f32_e32 + +; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 +; GCN: s_endpgm define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 { %val = load <3 x half>, <3 x half> addrspace(1)* %in %cvt = fpext <3 x half> %val to <3 x double> @@ -310,13 +469,12 @@ define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 ret void } -; FIXME: Shouldn't do 4th conversion ; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16: ; GCN: buffer_load_dwordx4 ; GCN: v_cvt_f16_f32_e32 ; GCN: v_cvt_f16_f32_e32 ; GCN: v_cvt_f16_f32_e32 -; GCN: v_cvt_f16_f32_e32 +; GCN-NOT: v_cvt_f16_f32_e32 ; GCN: buffer_store_short ; GCN: buffer_store_dword ; GCN: s_endpgm @@ -346,14 +504,8 @@ define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 } ; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16: -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 ; GCN: v_cvt_f16_f32_e32 ; GCN: v_cvt_f16_f32_e32 ; GCN: v_cvt_f16_f32_e32 @@ -379,54 +531,42 @@ define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 } ; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16: -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: v_cvt_f16_f32_e32 -; GCN: v_cvt_f16_f32_e32 -; GCN: v_cvt_f16_f32_e32 -; GCN: v_cvt_f16_f32_e32 -; GCN: v_cvt_f16_f32_e32 -; GCN: v_cvt_f16_f32_e32 -; GCN: v_cvt_f16_f32_e32 -; GCN: v_cvt_f16_f32_e32 -; GCN: v_cvt_f16_f32_e32 -; GCN: v_cvt_f16_f32_e32 -; GCN: v_cvt_f16_f32_e32 -; GCN: v_cvt_f16_f32_e32 -; GCN: v_cvt_f16_f32_e32 -; GCN: v_cvt_f16_f32_e32 -; GCN: v_cvt_f16_f32_e32 -; GCN: v_cvt_f16_f32_e32 -; GCN: buffer_store_short -; GCN: buffer_store_short -; GCN: buffer_store_short -; GCN: buffer_store_short -; GCN: buffer_store_short -; GCN: buffer_store_short -; GCN: buffer_store_short -; GCN: buffer_store_short -; GCN: buffer_store_short -; GCN: buffer_store_short -; GCN: buffer_store_short -; GCN: buffer_store_short -; GCN: buffer_store_short -; GCN: buffer_store_short -; GCN: buffer_store_short -; GCN: buffer_store_short +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +; GCN-DAG: v_cvt_f16_f32_e32 +; GCN-DAG: v_cvt_f16_f32_e32 +; GCN-DAG: v_cvt_f16_f32_e32 +; GCN-DAG: v_cvt_f16_f32_e32 +; GCN-DAG: v_cvt_f16_f32_e32 +; GCN-DAG: v_cvt_f16_f32_e32 +; GCN-DAG: v_cvt_f16_f32_e32 +; GCN-DAG: v_cvt_f16_f32_e32 +; GCN-DAG: v_cvt_f16_f32_e32 +; GCN-DAG: v_cvt_f16_f32_e32 +; GCN-DAG: v_cvt_f16_f32_e32 +; GCN-DAG: v_cvt_f16_f32_e32 +; GCN-DAG: v_cvt_f16_f32_e32 +; GCN-DAG: v_cvt_f16_f32_e32 +; GCN-DAG: v_cvt_f16_f32_e32 +; GCN-DAG: v_cvt_f16_f32_e32 +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_short ; GCN: s_endpgm define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 { %val = load <16 x float>, <16 x float> addrspace(1)* %in diff --git a/test/CodeGen/AMDGPU/hsa-globals.ll b/test/CodeGen/AMDGPU/hsa-globals.ll new file mode 100644 index 000000000000..1d76c40c042e --- /dev/null +++ b/test/CodeGen/AMDGPU/hsa-globals.ll @@ -0,0 +1,132 @@ +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=ASM %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri | llvm-readobj -symbols -s | FileCheck %s --check-prefix=ELF + +@internal_global_program = internal addrspace(1) global i32 0 +@common_global_program = common addrspace(1) global i32 0 +@external_global_program = addrspace(1) global i32 0 + +@internal_global_agent = internal addrspace(1) global i32 0, section ".hsadata_global_agent" +@common_global_agent = common addrspace(1) global i32 0, section ".hsadata_global_agent" +@external_global_agent = addrspace(1) global i32 0, section ".hsadata_global_agent" + +@internal_readonly = internal unnamed_addr addrspace(2) constant i32 0 +@external_readonly = unnamed_addr addrspace(2) constant i32 0 + +define void @test() { + ret void +} + +; ASM: .amdgpu_hsa_module_global internal_global +; ASM: .hsadata_global_program +; ASM: internal_global_program: +; ASM: .long 0 + +; ASM: .amdgpu_hsa_module_global common_global +; ASM: .hsadata_global_program +; ASM: common_global_program: +; ASM: .long 0 + +; ASM: .amdgpu_hsa_program_global external_global +; ASM: .hsadata_global_program +; ASM: external_global_program: +; ASM: .long 0 + +; ASM: .amdgpu_hsa_module_global internal_global +; ASM: .hsadata_global_agent +; ASM: internal_global_agent: +; ASM: .long 0 + +; ASM: .amdgpu_hsa_module_global common_global +; ASM: .hsadata_global_agent +; ASM: common_global_agent: +; ASM: .long 0 + +; ASM: .amdgpu_hsa_program_global external_global +; ASM: .hsadata_global_agent +; ASM: external_global_agent: +; ASM: .long 0 + +; ASM: .amdgpu_hsa_module_global internal_readonly +; ASM: .hsatext +; ASM: internal_readonly: +; ASM: .long 0 + +; ASM: .amdgpu_hsa_program_global external_readonly +; ASM: .hsatext +; ASM: external_readonly: +; ASM: .long 0 + +; ELF: Section { +; ELF: Name: .hsadata_global_program +; ELF: Type: SHT_PROGBITS (0x1) +; ELF: Flags [ (0x100003) +; ELF: SHF_ALLOC (0x2) +; ELF: SHF_AMDGPU_HSA_GLOBAL (0x100000) +; ELF: SHF_WRITE (0x1) +; ELF: ] +; ELF: } + +; ELF: Section { +; ELF: Name: .hsadata_global_agent +; ELF: Type: SHT_PROGBITS (0x1) +; ELF: Flags [ (0x900003) +; ELF: SHF_ALLOC (0x2) +; ELF: SHF_AMDGPU_HSA_AGENT (0x800000) +; ELF: SHF_AMDGPU_HSA_GLOBAL (0x100000) +; ELF: SHF_WRITE (0x1) +; ELF: ] +; ELF: } + +; ELF: Symbol { +; ELF: Name: common_global_agent +; ELF: Binding: Local +; ELF: Section: .hsadata_global_agent +; ELF: } + +; ELF: Symbol { +; ELF: Name: common_global_program +; ELF: Binding: Local +; ELF: Section: .hsadata_global_program +; ELF: } + +; ELF: Symbol { +; ELF: Name: internal_global_agent +; ELF: Binding: Local +; ELF: Type: Object +; ELF: Section: .hsadata_global_agent +; ELF: } + +; ELF: Symbol { +; ELF: Name: internal_global_program +; ELF: Binding: Local +; ELF: Type: Object +; ELF: Section: .hsadata_global_program +; ELF: } + +; ELF: Symbol { +; ELF: Name: internal_readonly +; ELF: Binding: Local +; ELF: Type: Object +; ELF: Section: .hsatext +; ELF: } + +; ELF: Symbol { +; ELF: Name: external_global_agent +; ELF: Binding: Global +; ELF: Type: Object +; ELF: Section: .hsadata_global_agent +; ELF: } + +; ELF: Symbol { +; ELF: Name: external_global_program +; ELF: Binding: Global +; ELF: Type: Object +; ELF: Section: .hsadata_global_program +; ELF: } + +; ELF: Symbol { +; ELF: Name: external_readonly +; ELF: Binding: Global +; ELF: Type: Object +; ELF: Section: .hsatext +; ELF: } diff --git a/test/CodeGen/AMDGPU/hsa-group-segment.ll b/test/CodeGen/AMDGPU/hsa-group-segment.ll new file mode 100644 index 000000000000..1999dc38a6b0 --- /dev/null +++ b/test/CodeGen/AMDGPU/hsa-group-segment.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s + +@internal_group = internal addrspace(3) global i32 undef +@external_group = addrspace(3) global i32 undef + +define void @test() { +entry: + store i32 0, i32 addrspace(3)* @internal_group + store i32 0, i32 addrspace(3)* @external_group + ret void +} + +; HSA-NOT: internal_group: +; HSA-NOT: external_group: diff --git a/test/CodeGen/AMDGPU/hsa.ll b/test/CodeGen/AMDGPU/hsa.ll index 653a6bb1b609..abc89b7fd837 100644 --- a/test/CodeGen/AMDGPU/hsa.ll +++ b/test/CodeGen/AMDGPU/hsa.ll @@ -1,11 +1,24 @@ -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA-CI --check-prefix=HSA %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA-VI --check-prefix=HSA %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj -s -sd | FileCheck --check-prefix=ELF %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri | llvm-readobj -s -sd | FileCheck %s --check-prefix=ELF +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck --check-prefix=HSA-CI %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj -symbols -s -sd | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF ; The SHT_NOTE section contains the output from the .hsa_code_object_* ; directives. +; ELF: Section { +; ELF: Name: .hsatext +; ELF: Type: SHT_PROGBITS (0x1) +; ELF: Flags [ (0xC00007) +; ELF: SHF_ALLOC (0x2) +; ELF: SHF_AMDGPU_HSA_AGENT (0x800000) +; ELF: SHF_AMDGPU_HSA_CODE (0x400000) +; ELF: SHF_EXECINSTR (0x4) +; ELF: SHF_WRITE (0x1) +; ELF: } + ; ELF: SHT_NOTE ; ELF: 0000: 04000000 08000000 01000000 414D4400 ; ELF: 0010: 01000000 00000000 04000000 1B000000 @@ -13,20 +26,31 @@ ; ELF: 0030: 00000000 00000000 414D4400 414D4447 ; ELF: 0040: 50550000 +; ELF: Symbol { +; ELF: Name: simple +; ELF: Type: AMDGPU_HSA_KERNEL (0xA) +; ELF: } + ; HSA: .hsa_code_object_version 1,0 ; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" ; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" +; HSA: .hsatext + +; HSA: .amdgpu_hsa_kernel simple ; HSA: {{^}}simple: ; HSA: .amd_kernel_code_t +; HSA: enable_sgpr_private_segment_buffer = 1 +; HSA: enable_sgpr_kernarg_segment_ptr = 1 ; HSA: .end_amd_kernel_code_t -; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[0:1], 0x0 +; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0 ; Make sure we are setting the ATC bit: ; HSA-CI: s_mov_b32 s[[HI:[0-9]]], 0x100f000 ; On VI+ we also need to set MTYPE = 2 ; HSA-VI: s_mov_b32 s[[HI:[0-9]]], 0x1100f000 -; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0 +; Make sure we generate flat store for HSA +; HSA: flat_store_dword v{{[0-9]+}} define void @simple(i32 addrspace(1)* %out) { entry: diff --git a/test/CodeGen/AMDGPU/image-attributes.ll b/test/CodeGen/AMDGPU/image-attributes.ll new file mode 100644 index 000000000000..5906b2f15709 --- /dev/null +++ b/test/CodeGen/AMDGPU/image-attributes.ll @@ -0,0 +1,206 @@ +; RUN: llc -march=r600 -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; === WIDTH ================================================================== +; 9 implicit args = 9 dwords to first image argument. +; First width at dword index 9+1 -> KC0[2].Z + +; FUNC-LABEL: {{^}}width_2d: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV * [[VAL]], KC0[2].Z +define void @width_2d (%opencl.image2d_t addrspace(1)* %in, + i32 addrspace(1)* %out) { +entry: + %0 = call [3 x i32] @llvm.OpenCL.image.get.size.2d( + %opencl.image2d_t addrspace(1)* %in) #0 + %1 = extractvalue [3 x i32] %0, 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}width_3d: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV * [[VAL]], KC0[2].Z +define void @width_3d (%opencl.image3d_t addrspace(1)* %in, + i32 addrspace(1)* %out) { +entry: + %0 = call [3 x i32] @llvm.OpenCL.image.get.size.3d( + %opencl.image3d_t addrspace(1)* %in) #0 + %1 = extractvalue [3 x i32] %0, 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + + +; === HEIGHT ================================================================= +; First height at dword index 9+2 -> KC0[2].W + +; FUNC-LABEL: {{^}}height_2d: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV * [[VAL]], KC0[2].W +define void @height_2d (%opencl.image2d_t addrspace(1)* %in, + i32 addrspace(1)* %out) { +entry: + %0 = call [3 x i32] @llvm.OpenCL.image.get.size.2d( + %opencl.image2d_t addrspace(1)* %in) #0 + %1 = extractvalue [3 x i32] %0, 1 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}height_3d: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV * [[VAL]], KC0[2].W +define void @height_3d (%opencl.image3d_t addrspace(1)* %in, + i32 addrspace(1)* %out) { +entry: + %0 = call [3 x i32] @llvm.OpenCL.image.get.size.3d( + %opencl.image3d_t addrspace(1)* %in) #0 + %1 = extractvalue [3 x i32] %0, 1 + store i32 %1, i32 addrspace(1)* %out + ret void +} + + +; === DEPTH ================================================================== +; First depth at dword index 9+3 -> KC0[3].X + +; FUNC-LABEL: {{^}}depth_3d: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV * [[VAL]], KC0[3].X +define void @depth_3d (%opencl.image3d_t addrspace(1)* %in, + i32 addrspace(1)* %out) { +entry: + %0 = call [3 x i32] @llvm.OpenCL.image.get.size.3d( + %opencl.image3d_t addrspace(1)* %in) #0 + %1 = extractvalue [3 x i32] %0, 2 + store i32 %1, i32 addrspace(1)* %out + ret void +} + + +; === CHANNEL DATA TYPE ====================================================== +; First channel data type at dword index 9+4 -> KC0[3].Y + +; FUNC-LABEL: {{^}}data_type_2d: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV * [[VAL]], KC0[3].Y +define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in, + i32 addrspace(1)* %out) { +entry: + %0 = call [2 x i32] @llvm.OpenCL.image.get.format.2d( + %opencl.image2d_t addrspace(1)* %in) #0 + %1 = extractvalue [2 x i32] %0, 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}data_type_3d: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV * [[VAL]], KC0[3].Y +define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in, + i32 addrspace(1)* %out) { +entry: + %0 = call [2 x i32] @llvm.OpenCL.image.get.format.3d( + %opencl.image3d_t addrspace(1)* %in) #0 + %1 = extractvalue [2 x i32] %0, 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + + +; === CHANNEL ORDER ========================================================== +; First channel order at dword index 9+5 -> KC0[3].Z + +; FUNC-LABEL: {{^}}channel_order_2d: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV * [[VAL]], KC0[3].Z +define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in, + i32 addrspace(1)* %out) { +entry: + %0 = call [2 x i32] @llvm.OpenCL.image.get.format.2d( + %opencl.image2d_t addrspace(1)* %in) #0 + %1 = extractvalue [2 x i32] %0, 1 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}channel_order_3d: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV * [[VAL]], KC0[3].Z +define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in, + i32 addrspace(1)* %out) { +entry: + %0 = call [2 x i32] @llvm.OpenCL.image.get.format.3d( + %opencl.image3d_t addrspace(1)* %in) #0 + %1 = extractvalue [2 x i32] %0, 1 + store i32 %1, i32 addrspace(1)* %out + ret void +} + + +; === 2ND IMAGE ============================================================== +; 9 implicit args + 2 explicit args + 5 implicit args for 1st image argument +; = 16 dwords to 2nd image argument. +; Height of the second image is at 16+2 -> KC0[4].Z +; +; FUNC-LABEL: {{^}}image_arg_2nd: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV * [[VAL]], KC0[4].Z +define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1, + i32 %x, + %opencl.image2d_t addrspace(1)* %in2, + i32 addrspace(1)* %out) { +entry: + %0 = call [3 x i32] @llvm.OpenCL.image.get.size.2d( + %opencl.image2d_t addrspace(1)* %in2) #0 + %1 = extractvalue [3 x i32] %0, 1 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +%opencl.image2d_t = type opaque +%opencl.image3d_t = type opaque + +declare [3 x i32] @llvm.OpenCL.image.get.size.2d(%opencl.image2d_t addrspace(1)*) #0 +declare [3 x i32] @llvm.OpenCL.image.get.size.3d(%opencl.image3d_t addrspace(1)*) #0 +declare [2 x i32] @llvm.OpenCL.image.get.format.2d(%opencl.image2d_t addrspace(1)*) #0 +declare [2 x i32] @llvm.OpenCL.image.get.format.3d(%opencl.image3d_t addrspace(1)*) #0 + +attributes #0 = { readnone } + +!opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9} +!0 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @width_2d, + !10, !20, !30, !40, !50} +!1 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @width_3d, + !10, !21, !31, !41, !50} +!2 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @height_2d, + !10, !20, !30, !40, !50} +!3 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @height_3d, + !10, !21, !31, !41, !50} +!4 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @depth_3d, + !10, !21, !31, !41, !50} +!5 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @data_type_2d, + !10, !20, !30, !40, !50} +!6 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @data_type_3d, + !10, !21, !31, !41, !50} +!7 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @channel_order_2d, + !10, !20, !30, !40, !50} +!8 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @channel_order_3d, + !10, !21, !31, !41, !50} +!9 = !{void (%opencl.image3d_t addrspace(1)*, i32, %opencl.image2d_t addrspace(1)*, + i32 addrspace(1)*)* @image_arg_2nd, !12, !22, !32, !42, !52} + +!10 = !{!"kernel_arg_addr_space", i32 1, i32 1} +!20 = !{!"kernel_arg_access_qual", !"read_only", !"none"} +!21 = !{!"kernel_arg_access_qual", !"read_only", !"none"} +!30 = !{!"kernel_arg_type", !"image2d_t", !"int*"} +!31 = !{!"kernel_arg_type", !"image3d_t", !"int*"} +!40 = !{!"kernel_arg_base_type", !"image2d_t", !"int*"} +!41 = !{!"kernel_arg_base_type", !"image3d_t", !"int*"} +!50 = !{!"kernel_arg_type_qual", !"", !""} + +!12 = !{!"kernel_arg_addr_space", i32 1, i32 0, i32 1, i32 1} +!22 = !{!"kernel_arg_access_qual", !"read_only", !"none", !"write_only", !"none"} +!32 = !{!"kernel_arg_type", !"image3d_t", !"sampler_t", !"image2d_t", !"int*"} +!42 = !{!"kernel_arg_base_type", !"image3d_t", !"sampler_t", !"image2d_t", !"int*"} +!52 = !{!"kernel_arg_type_qual", !"", !"", !"", !""} diff --git a/test/CodeGen/AMDGPU/image-resource-id.ll b/test/CodeGen/AMDGPU/image-resource-id.ll new file mode 100644 index 000000000000..d4cf34944240 --- /dev/null +++ b/test/CodeGen/AMDGPU/image-resource-id.ll @@ -0,0 +1,409 @@ +; RUN: llc -march=r600 -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; === 1 image arg, read_only =================================================== + +; FUNC-LABEL: {{^}}test_2d_rd_1_0: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 0( +define void @test_2d_rd_1_0(%opencl.image2d_t addrspace(1)* %in, ; read_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)* %in) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_3d_rd_1_0: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 0( +define void @test_3d_rd_1_0(%opencl.image3d_t addrspace(1)* %in, ; read_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d( + %opencl.image3d_t addrspace(1)* %in) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; === 1 image arg, write_only ================================================== + +; FUNC-LABEL: {{^}}test_2d_wr_1_0: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 0( +define void @test_2d_wr_1_0(%opencl.image2d_t addrspace(1)* %in, ; write_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)* %in) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_3d_wr_1_0: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 0( +define void @test_3d_wr_1_0(%opencl.image3d_t addrspace(1)* %in, ; write_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d( + %opencl.image3d_t addrspace(1)* %in) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; === 2 image args, read_only ================================================== + +; FUNC-LABEL: {{^}}test_2d_rd_2_0: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 0( +define void @test_2d_rd_2_0(%opencl.image2d_t addrspace(1)* %in1, ; read_only + %opencl.image2d_t addrspace(1)* %in2, ; read_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)* %in1) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_2d_rd_2_1: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 1( +define void @test_2d_rd_2_1(%opencl.image2d_t addrspace(1)* %in1, ; read_only + %opencl.image2d_t addrspace(1)* %in2, ; read_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)* %in2) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_3d_rd_2_0: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 0( +define void @test_3d_rd_2_0(%opencl.image3d_t addrspace(1)* %in1, ; read_only + %opencl.image3d_t addrspace(1)* %in2, ; read_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d( + %opencl.image3d_t addrspace(1)* %in1) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_3d_rd_2_1: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 1( +define void @test_3d_rd_2_1(%opencl.image3d_t addrspace(1)* %in1, ; read_only + %opencl.image3d_t addrspace(1)* %in2, ; read_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d( + %opencl.image3d_t addrspace(1)* %in2) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; === 2 image args, write_only ================================================= + +; FUNC-LABEL: {{^}}test_2d_wr_2_0: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 0( +define void @test_2d_wr_2_0(%opencl.image2d_t addrspace(1)* %in1, ; write_only + %opencl.image2d_t addrspace(1)* %in2, ; write_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)* %in1) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_2d_wr_2_1: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 1( +define void @test_2d_wr_2_1(%opencl.image2d_t addrspace(1)* %in1, ; write_only + %opencl.image2d_t addrspace(1)* %in2, ; write_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)* %in2) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_3d_wr_2_0: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 0( +define void @test_3d_wr_2_0(%opencl.image3d_t addrspace(1)* %in1, ; write_only + %opencl.image3d_t addrspace(1)* %in2, ; write_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d( + %opencl.image3d_t addrspace(1)* %in1) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_3d_wr_2_1: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 1( +define void @test_3d_wr_2_1(%opencl.image3d_t addrspace(1)* %in1, ; write_only + %opencl.image3d_t addrspace(1)* %in2, ; write_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d( + %opencl.image3d_t addrspace(1)* %in2) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; === 3 image args, read_only ================================================== + +; FUNC-LABEL: {{^}}test_2d_rd_3_0: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 2( +define void @test_2d_rd_3_0(%opencl.image2d_t addrspace(1)* %in1, ; read_only + %opencl.image3d_t addrspace(1)* %in2, ; read_only + %opencl.image2d_t addrspace(1)* %in3, ; read_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)* %in3) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + + +; FUNC-LABEL: {{^}}test_3d_rd_3_0: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 2( +define void @test_3d_rd_3_0(%opencl.image3d_t addrspace(1)* %in1, ; read_only + %opencl.image2d_t addrspace(1)* %in2, ; read_only + %opencl.image3d_t addrspace(1)* %in3, ; read_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d( + %opencl.image3d_t addrspace(1)* %in3) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; === 3 image args, write_only ================================================= + +; FUNC-LABEL: {{^}}test_2d_wr_3_0: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 2( +define void @test_2d_wr_3_0(%opencl.image2d_t addrspace(1)* %in1, ; write_only + %opencl.image3d_t addrspace(1)* %in2, ; write_only + %opencl.image2d_t addrspace(1)* %in3, ; write_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)* %in3) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + + +; FUNC-LABEL: {{^}}test_3d_wr_3_0: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 2( +define void @test_3d_wr_3_0(%opencl.image3d_t addrspace(1)* %in1, ; write_only + %opencl.image2d_t addrspace(1)* %in2, ; write_only + %opencl.image3d_t addrspace(1)* %in3, ; write_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d( + %opencl.image3d_t addrspace(1)* %in3) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; === 3 image args, mixed ====================================================== + +; FUNC-LABEL: {{^}}test_2d_mix_3_0: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 1( +define void @test_2d_mix_3_0(%opencl.image2d_t addrspace(1)* %in1, ; write_only + %opencl.image3d_t addrspace(1)* %in2, ; read_only + %opencl.image2d_t addrspace(1)* %in3, ; read_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)* %in3) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_3d_mix_3_0: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 1( +define void @test_3d_mix_3_0(%opencl.image3d_t addrspace(1)* %in1, ; write_only + %opencl.image2d_t addrspace(1)* %in2, ; read_only + %opencl.image3d_t addrspace(1)* %in3, ; read_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d( + %opencl.image3d_t addrspace(1)* %in3) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_2d_mix_3_1: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 1( +define void @test_2d_mix_3_1(%opencl.image2d_t addrspace(1)* %in1, ; write_only + %opencl.image3d_t addrspace(1)* %in2, ; read_only + %opencl.image2d_t addrspace(1)* %in3, ; write_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d( + %opencl.image2d_t addrspace(1)* %in3) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_3d_mix_3_1: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 1( +define void @test_3d_mix_3_1(%opencl.image3d_t addrspace(1)* %in1, ; write_only + %opencl.image2d_t addrspace(1)* %in2, ; read_only + %opencl.image3d_t addrspace(1)* %in3, ; write_only + i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d( + %opencl.image3d_t addrspace(1)* %in3) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + + +%opencl.image2d_t = type opaque +%opencl.image3d_t = type opaque + +declare i32 @llvm.OpenCL.image.get.resource.id.2d(%opencl.image2d_t addrspace(1)*) #0 +declare i32 @llvm.OpenCL.image.get.resource.id.3d(%opencl.image3d_t addrspace(1)*) #0 + +attributes #0 = { readnone } + +!opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, + !14, !15, !16, !17, !18, !19} +!0 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_rd_1_0, + !110, !120, !130, !140, !150} +!1 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_rd_1_0, + !110, !120, !131, !141, !150} +!2 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_wr_1_0, + !110, !121, !130, !140, !150} +!3 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_wr_1_0, + !110, !121, !131, !141, !150} +!110 = !{!"kernel_arg_addr_space", i32 1, i32 1} +!120 = !{!"kernel_arg_access_qual", !"read_only", !"none"} +!121 = !{!"kernel_arg_access_qual", !"write_only", !"none"} +!130 = !{!"kernel_arg_type", !"image2d_t", !"int*"} +!131 = !{!"kernel_arg_type", !"image3d_t", !"int*"} +!140 = !{!"kernel_arg_base_type", !"image2d_t", !"int*"} +!141 = !{!"kernel_arg_base_type", !"image3d_t", !"int*"} +!150 = !{!"kernel_arg_type_qual", !"", !""} + +!4 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*, + i32 addrspace(1)*)* @test_2d_rd_2_0, !112, !122, !132, !142, !152} +!5 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*, + i32 addrspace(1)*)* @test_2d_rd_2_1, !112, !122, !132, !142, !152} +!6 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*, + i32 addrspace(1)*)* @test_3d_rd_2_0, !112, !122, !133, !143, !152} +!7 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*, + i32 addrspace(1)*)* @test_3d_rd_2_1, !112, !122, !133, !143, !152} +!8 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*, + i32 addrspace(1)*)* @test_2d_wr_2_0, !112, !123, !132, !142, !152} +!9 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*, + i32 addrspace(1)*)* @test_2d_wr_2_1, !112, !123, !132, !142, !152} +!10 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*, + i32 addrspace(1)*)* @test_3d_wr_2_0, !112, !123, !133, !143, !152} +!11 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*, + i32 addrspace(1)*)* @test_3d_wr_2_1, !112, !123, !133, !143, !152} +!112 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1} +!122 = !{!"kernel_arg_access_qual", !"read_only", !"read_only", !"none"} +!123 = !{!"kernel_arg_access_qual", !"write_only", !"write_only", !"none"} +!132 = !{!"kernel_arg_type", !"image2d_t", !"image2d_t", !"int*"} +!133 = !{!"kernel_arg_type", !"image3d_t", !"image3d_t", !"int*"} +!142 = !{!"kernel_arg_base_type", !"image2d_t", !"image2d_t", !"int*"} +!143 = !{!"kernel_arg_base_type", !"image3d_t", !"image3d_t", !"int*"} +!152 = !{!"kernel_arg_type_qual", !"", !"", !""} + +!12 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*, + %opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_rd_3_0, + !114, !124, !134, !144, !154} +!13 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*, + %opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_rd_3_0, + !114, !124, !135, !145, !154} +!14 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*, + %opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_wr_3_0, + !114, !125, !134, !144, !154} +!15 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*, + %opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_wr_3_0, + !114, !125, !135, !145, !154} +!16 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*, + %opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_mix_3_0, + !114, !126, !134, !144, !154} +!17 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*, + %opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_mix_3_0, + !114, !126, !135, !145, !154} +!18 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*, + %opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_mix_3_1, + !114, !127, !134, !144, !154} +!19 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*, + %opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_mix_3_1, + !114, !127, !135, !145, !154} +!114 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1} +!124 = !{!"kernel_arg_access_qual", !"read_only", !"read_only", !"read_only", !"none"} +!125 = !{!"kernel_arg_access_qual", !"write_only", !"write_only", !"write_only", !"none"} +!126 = !{!"kernel_arg_access_qual", !"write_only", !"read_only", !"read_only", !"none"} +!127 = !{!"kernel_arg_access_qual", !"write_only", !"read_only", !"write_only", !"none"} +!134 = !{!"kernel_arg_type", !"image2d_t", !"image3d_t", !"image2d_t", !"int*"} +!135 = !{!"kernel_arg_type", !"image3d_t", !"image2d_t", !"image3d_t", !"int*"} +!144 = !{!"kernel_arg_base_type", !"image2d_t", !"image3d_t", !"image2d_t", !"int*"} +!145 = !{!"kernel_arg_base_type", !"image3d_t", !"image2d_t", !"image3d_t", !"int*"} +!154 = !{!"kernel_arg_type_qual", !"", !"", !"", !""} diff --git a/test/CodeGen/AMDGPU/imm.ll b/test/CodeGen/AMDGPU/imm.ll index 12eed550eb1f..8db9ea4ccf31 100644 --- a/test/CodeGen/AMDGPU/imm.ll +++ b/test/CodeGen/AMDGPU/imm.ll @@ -3,8 +3,7 @@ ; Use a 64-bit value with lo bits that can be represented as an inline constant ; CHECK-LABEL: {{^}}i64_imm_inline_lo: -; CHECK: s_mov_b32 [[LO:s[0-9]+]], 5 -; CHECK: v_mov_b32_e32 v[[LO_VGPR:[0-9]+]], [[LO]] +; CHECK: v_mov_b32_e32 v[[LO_VGPR:[0-9]+]], 5 ; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VGPR]]: define void @i64_imm_inline_lo(i64 addrspace(1) *%out) { entry: @@ -14,8 +13,7 @@ entry: ; Use a 64-bit value with hi bits that can be represented as an inline constant ; CHECK-LABEL: {{^}}i64_imm_inline_hi: -; CHECK: s_mov_b32 [[HI:s[0-9]+]], 5 -; CHECK: v_mov_b32_e32 v[[HI_VGPR:[0-9]+]], [[HI]] +; CHECK: v_mov_b32_e32 v[[HI_VGPR:[0-9]+]], 5 ; CHECK: buffer_store_dwordx2 v{{\[[0-9]+:}}[[HI_VGPR]] define void @i64_imm_inline_hi(i64 addrspace(1) *%out) { entry: @@ -24,10 +22,8 @@ entry: } ; CHECK-LABEL: {{^}}store_imm_neg_0.0_i64: -; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x80000000 -; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}} -; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] -; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x80000000 ; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} define void @store_imm_neg_0.0_i64(i64 addrspace(1) *%out) { store i64 -9223372036854775808, i64 addrspace(1) *%out @@ -523,10 +519,8 @@ define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) { ; CHECK-LABEL: {{^}}store_literal_imm_neg_0.0_f64: -; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x80000000 -; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}} -; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] -; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x80000000 ; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} define void @store_literal_imm_neg_0.0_f64(double addrspace(1)* %out) { store double -0.0, double addrspace(1)* %out @@ -606,10 +600,8 @@ define void @store_inline_imm_m_4.0_f64(double addrspace(1)* %out) { } ; CHECK-LABEL: {{^}}store_literal_imm_f64: -; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x40b00000 -; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}} -; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] -; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] +; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}} +; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x40b00000 ; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} define void @store_literal_imm_f64(double addrspace(1)* %out) { store double 4096.0, double addrspace(1)* %out diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/test/CodeGen/AMDGPU/indirect-addressing-si.ll index f551606d63a7..e40cac22725c 100644 --- a/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -5,23 +5,52 @@ ; indexing of vectors. ; CHECK-LABEL: {{^}}extract_w_offset: +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0 ; CHECK: s_mov_b32 m0 ; CHECK-NEXT: v_movrels_b32_e32 define void @extract_w_offset(float addrspace(1)* %out, i32 %in) { entry: - %0 = add i32 %in, 1 - %1 = extractelement <4 x float> , i32 %0 - store float %1, float addrspace(1)* %out + %idx = add i32 %in, 1 + %elt = extractelement <4 x float> , i32 %idx + store float %elt, float addrspace(1)* %out + ret void +} + +; XXX: Could do v_or_b32 directly +; CHECK-LABEL: {{^}}extract_w_offset_salu_use_vector: +; CHECK-DAG: s_or_b32 +; CHECK-DAG: s_or_b32 +; CHECK-DAG: s_or_b32 +; CHECK-DAG: s_or_b32 +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; CHECK: s_mov_b32 m0 +; CHECK-NEXT: v_movrels_b32_e32 +define void @extract_w_offset_salu_use_vector(i32 addrspace(1)* %out, i32 %in, <4 x i32> %or.val) { +entry: + %idx = add i32 %in, 1 + %vec = or <4 x i32> %or.val, + %elt = extractelement <4 x i32> %vec, i32 %idx + store i32 %elt, i32 addrspace(1)* %out ret void } ; CHECK-LABEL: {{^}}extract_wo_offset: +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0 ; CHECK: s_mov_b32 m0 ; CHECK-NEXT: v_movrels_b32_e32 define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) { entry: - %0 = extractelement <4 x float> , i32 %in - store float %0, float addrspace(1)* %out + %elt = extractelement <4 x float> , i32 %in + store float %elt, float addrspace(1)* %out ret void } @@ -37,6 +66,19 @@ entry: ret void } +; CHECK-LABEL: {{^}}extract_neg_offset_sgpr_loaded: +; The offset depends on the register that holds the first element of the vector. +; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} +; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0 +define void @extract_neg_offset_sgpr_loaded(i32 addrspace(1)* %out, <4 x i32> %vec0, <4 x i32> %vec1, i32 %offset) { +entry: + %index = add i32 %offset, -512 + %or = or <4 x i32> %vec0, %vec1 + %value = extractelement <4 x i32> %or, i32 %index + store i32 %value, i32 addrspace(1)* %out + ret void +} + ; CHECK-LABEL: {{^}}extract_neg_offset_vgpr: ; The offset depends on the register that holds the first element of the vector. ; CHECK: v_readfirstlane_b32 @@ -87,6 +129,21 @@ entry: ret void } +; The vector indexed into is originally loaded into an SGPR rather +; than built with a reg_sequence + +; CHECK-LABEL: {{^}}insert_neg_offset_sgpr_loadreg: +; The offset depends on the register that holds the first element of the vector. +; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} +; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}} +define void @insert_neg_offset_sgpr_loadreg(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %offset) { +entry: + %index = add i32 %offset, -512 + %value = insertelement <4 x i32> %vec, i32 5, i32 %index + store <4 x i32> %value, <4 x i32> addrspace(1)* %out + ret void +} + ; CHECK-LABEL: {{^}}insert_neg_offset_vgpr: ; The offset depends on the register that holds the first element of the vector. ; CHECK: v_readfirstlane_b32 diff --git a/test/CodeGen/AMDGPU/indirect-private-64.ll b/test/CodeGen/AMDGPU/indirect-private-64.ll index d63e1b6c5212..2a3b29f54fa9 100644 --- a/test/CodeGen/AMDGPU/indirect-private-64.ll +++ b/test/CodeGen/AMDGPU/indirect-private-64.ll @@ -4,7 +4,7 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s -declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind +declare void @llvm.AMDGPU.barrier.local() convergent nounwind ; SI-LABEL: {{^}}private_access_f64_alloca: @@ -18,7 +18,7 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double %array = alloca double, i32 16, align 8 %ptr = getelementptr double, double* %array, i32 %b store double %val, double* %ptr, align 8 - call void @llvm.AMDGPU.barrier.local() noduplicate nounwind + call void @llvm.AMDGPU.barrier.local() convergent nounwind %result = load double, double* %ptr, align 8 store double %result, double addrspace(1)* %out, align 8 ret void @@ -29,20 +29,16 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double ; SI-ALLOCA: buffer_store_dwordx4 ; SI-ALLOCA: buffer_load_dwordx4 -; SI-PROMOTE: ds_write_b32 -; SI-PROMOTE: ds_write_b32 -; SI-PROMOTE: ds_write_b32 -; SI-PROMOTE: ds_write_b32 -; SI-PROMOTE: ds_read_b32 -; SI-PROMOTE: ds_read_b32 -; SI-PROMOTE: ds_read_b32 -; SI-PROMOTE: ds_read_b32 +; SI-PROMOTE: ds_write_b64 +; SI-PROMOTE: ds_write_b64 +; SI-PROMOTE: ds_read_b64 +; SI-PROMOTE: ds_read_b64 define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind { %val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16 %array = alloca <2 x double>, i32 16, align 16 %ptr = getelementptr <2 x double>, <2 x double>* %array, i32 %b store <2 x double> %val, <2 x double>* %ptr, align 16 - call void @llvm.AMDGPU.barrier.local() noduplicate nounwind + call void @llvm.AMDGPU.barrier.local() convergent nounwind %result = load <2 x double>, <2 x double>* %ptr, align 16 store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16 ret void @@ -60,7 +56,7 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs %array = alloca i64, i32 16, align 8 %ptr = getelementptr i64, i64* %array, i32 %b store i64 %val, i64* %ptr, align 8 - call void @llvm.AMDGPU.barrier.local() noduplicate nounwind + call void @llvm.AMDGPU.barrier.local() convergent nounwind %result = load i64, i64* %ptr, align 8 store i64 %result, i64 addrspace(1)* %out, align 8 ret void @@ -71,20 +67,16 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs ; SI-ALLOCA: buffer_store_dwordx4 ; SI-ALLOCA: buffer_load_dwordx4 -; SI-PROMOTE: ds_write_b32 -; SI-PROMOTE: ds_write_b32 -; SI-PROMOTE: ds_write_b32 -; SI-PROMOTE: ds_write_b32 -; SI-PROMOTE: ds_read_b32 -; SI-PROMOTE: ds_read_b32 -; SI-PROMOTE: ds_read_b32 -; SI-PROMOTE: ds_read_b32 +; SI-PROMOTE: ds_write_b64 +; SI-PROMOTE: ds_write_b64 +; SI-PROMOTE: ds_read_b64 +; SI-PROMOTE: ds_read_b64 define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind { %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16 %array = alloca <2 x i64>, i32 16, align 16 %ptr = getelementptr <2 x i64>, <2 x i64>* %array, i32 %b store <2 x i64> %val, <2 x i64>* %ptr, align 16 - call void @llvm.AMDGPU.barrier.local() noduplicate nounwind + call void @llvm.AMDGPU.barrier.local() convergent nounwind %result = load <2 x i64>, <2 x i64>* %ptr, align 16 store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16 ret void diff --git a/test/CodeGen/AMDGPU/inline-constraints.ll b/test/CodeGen/AMDGPU/inline-constraints.ll new file mode 100644 index 000000000000..78868710c6a2 --- /dev/null +++ b/test/CodeGen/AMDGPU/inline-constraints.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s + +; GCN-LABEL: {{^}}inline_reg_constraints: +; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] +; GCN: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] +; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] +; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] +; GCN: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] +; GCN: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] + +define void @inline_reg_constraints(i32 addrspace(1)* %ptr) { +entry: + %v32 = tail call i32 asm sideeffect "flat_load_dword $0, $1", "=v,v"(i32 addrspace(1)* %ptr) + %v64 = tail call <2 x i32> asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr) + %v128 = tail call <4 x i32> asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr) + %s32 = tail call i32 asm sideeffect "s_load_dword $0, $1", "=s,s"(i32 addrspace(1)* %ptr) + %s64 = tail call <2 x i32> asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr) + %s128 = tail call <4 x i32> asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr) + %s256 = tail call <8 x i32> asm sideeffect "s_load_dwordx8 $0, $1", "=s,s"(i32 addrspace(1)* %ptr) + ret void +} diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.ll b/test/CodeGen/AMDGPU/insert_vector_elt.ll index 6de3d408c486..7f9579e59782 100644 --- a/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -70,8 +70,9 @@ define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x fl } ; SI-LABEL: {{^}}dynamic_insertelement_v8f32: -; FIXMESI: buffer_store_dwordx4 -; FIXMESI: buffer_store_dwordx4 +; SI: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind { %vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32 @@ -79,10 +80,11 @@ define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x fl } ; SI-LABEL: {{^}}dynamic_insertelement_v16f32: -; FIXMESI: buffer_store_dwordx4 -; FIXMESI: buffer_store_dwordx4 -; FIXMESI: buffer_store_dwordx4 -; FIXMESI: buffer_store_dwordx4 +; SI: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind { %vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64 @@ -202,10 +204,28 @@ endif: } ; SI-LABEL: {{^}}dynamic_insertelement_v2f64: -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 +; SI: s_load_dword [[IDX:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0x11|0x44}}{{$}} +; SI-DAG: s_lshl_b32 [[SCALEDIDX:s[0-9]+]], [[IDX]], 1{{$}} +; SI-DAG: v_mov_b32_e32 [[ELT0:v[0-9]+]], 0{{$}} + +; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} + +; SI: s_mov_b32 m0, [[SCALEDIDX]] +; SI: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT0]] + +; Increment to next element. +; FIXME: Should be able to manipulate m0 directly instead of add and +; copy. + +; SI: s_or_b32 [[IDX1:s[0-9]+]], [[SCALEDIDX]], 1 +; SI-DAG: v_mov_b32_e32 [[ELT1:v[0-9]+]], 0x40200000 +; SI-DAG: s_mov_b32 m0, [[IDX1]] +; SI: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT1]] + +; SI: buffer_store_dwordx4 ; SI: s_endpgm define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, i32 %b) nounwind { %vecins = insertelement <2 x double> %a, double 8.0, i32 %b @@ -213,9 +233,16 @@ define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x d ret void } +; FIXME: Inline immediate should be folded into v_movreld_b32. ; SI-LABEL: {{^}}dynamic_insertelement_v2i64: -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 + +; SI-DAG: v_mov_b32_e32 [[ELT0:v[0-9]+]], 5{{$}} +; SI-DAG: v_mov_b32_e32 [[ELT1:v[0-9]+]], 0{{$}} + +; SI-DAG: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT0]] +; SI-DAG: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT1]] + +; SI: buffer_store_dwordx4 ; SI: s_endpgm define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind { %vecins = insertelement <2 x i64> %a, i64 5, i32 %b @@ -223,12 +250,29 @@ define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> ret void } +; FIXME: Should be able to do without stack access. The used stack +; space is also 2x what should be required. + ; SI-LABEL: {{^}}dynamic_insertelement_v4f64: -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 +; SI: SCRATCH_RSRC_DWORD + +; Stack store +; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} +; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}} + +; Write element +; SI: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} + +; Stack reload +; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}} +; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} + +; Store result +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 ; SI: s_endpgm +; SI: ScratchSize: 64 + define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, i32 %b) nounwind { %vecins = insertelement <4 x double> %a, double 8.0, i32 %b store <4 x double> %vecins, <4 x double> addrspace(1)* %out, align 16 @@ -236,15 +280,26 @@ define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x d } ; SI-LABEL: {{^}}dynamic_insertelement_v8f64: -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 +; SI: SCRATCH_RSRC_DWORD + +; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} +; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}} +; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:32{{$}} +; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:48{{$}} + +; SI: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} + +; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}} +; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} +; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}} +; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} + +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 ; SI: s_endpgm +; SI: ScratchSize: 128 define void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, i32 %b) nounwind { %vecins = insertelement <8 x double> %a, double 8.0, i32 %b store <8 x double> %vecins, <8 x double> addrspace(1)* %out, align 16 diff --git a/test/CodeGen/AMDGPU/kernel-args.ll b/test/CodeGen/AMDGPU/kernel-args.ll index 1dd7c2cb7995..e9d98ac89e72 100644 --- a/test/CodeGen/AMDGPU/kernel-args.ll +++ b/test/CodeGen/AMDGPU/kernel-args.ll @@ -4,8 +4,10 @@ ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC ; FUNC-LABEL: {{^}}i8_arg: -; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z -; GCN: buffer_load_ubyte +; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb +; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c +; GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { entry: @@ -39,8 +41,10 @@ entry: } ; FUNC-LABEL: {{^}}i16_arg: -; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z -; GCN: buffer_load_ushort +; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb +; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c +; GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { entry: @@ -290,8 +294,8 @@ entry: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X -; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11 -; VI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x44 +; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11 +; VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44 define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind { entry: store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4 @@ -307,7 +311,7 @@ entry: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X -; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11 +; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11 define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind { entry: store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4 @@ -409,8 +413,8 @@ entry: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X -; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19 -; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64 +; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19 +; VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64 define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind { entry: store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4 @@ -434,8 +438,8 @@ entry: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X -; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19 -; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64 +; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19 +; VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64 define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind { entry: store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4 diff --git a/test/CodeGen/AMDGPU/large-alloca-compute.ll b/test/CodeGen/AMDGPU/large-alloca-compute.ll new file mode 100644 index 000000000000..8347b8c96ec4 --- /dev/null +++ b/test/CodeGen/AMDGPU/large-alloca-compute.ll @@ -0,0 +1,57 @@ +; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa < %s -mattr=-flat-for-global | FileCheck -check-prefix=GCNHSA -check-prefix=CIHSA -check-prefix=ALL %s +; RUN: llc -march=amdgcn -mcpu=tonga -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCNHSA -check-prefix=VIHSA -check-prefix=ALL %s + +; FIXME: align on alloca seems to be ignored for private_segment_alignment + +; ALL-LABEL: {{^}}large_alloca_compute_shader: + +; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 +; GCN: s_mov_b32 s10, -1 +; CI: s_mov_b32 s11, 0x80f000 +; VI: s_mov_b32 s11, 0x800000 + + +; GCNHSA: .amd_kernel_code_t + +; GCNHSA: compute_pgm_rsrc2_scratch_en = 1 +; GCNHSA: compute_pgm_rsrc2_user_sgpr = 6 +; GCNHSA: compute_pgm_rsrc2_tgid_x_en = 1 +; GCNHSA: compute_pgm_rsrc2_tgid_y_en = 0 +; GCNHSA: compute_pgm_rsrc2_tgid_z_en = 0 +; GCNHSA: compute_pgm_rsrc2_tg_size_en = 0 +; GCNHSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 + +; GCNHSA: enable_sgpr_private_segment_buffer = 1 +; GCNHSA: enable_sgpr_dispatch_ptr = 0 +; GCNHSA: enable_sgpr_queue_ptr = 0 +; GCNHSA: enable_sgpr_kernarg_segment_ptr = 1 +; GCNHSA: enable_sgpr_dispatch_id = 0 +; GCNHSA: enable_sgpr_flat_scratch_init = 0 +; GCNHSA: enable_sgpr_private_segment_size = 0 +; GCNHSA: enable_sgpr_grid_workgroup_count_x = 0 +; GCNHSA: enable_sgpr_grid_workgroup_count_y = 0 +; GCNHSA: enable_sgpr_grid_workgroup_count_z = 0 +; GCNHSA: workitem_private_segment_byte_size = 32772 +; GCNHSA: private_segment_alignment = 4 +; GCNHSA: .end_amd_kernel_code_t + + +; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s7 offen +; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s7 offen + +; Scratch size = alloca size + emergency stack slot +; ALL: ; ScratchSize: 32772 +define void @large_alloca_compute_shader(i32 %x, i32 %y) #0 { + %large = alloca [8192 x i32], align 4 + %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191 + store volatile i32 %x, i32* %gep + %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y + %val = load volatile i32, i32* %gep1 + store volatile i32 %val, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/large-alloca-graphics.ll b/test/CodeGen/AMDGPU/large-alloca-graphics.ll new file mode 100644 index 000000000000..141ee2560152 --- /dev/null +++ b/test/CodeGen/AMDGPU/large-alloca-graphics.ll @@ -0,0 +1,47 @@ +; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s + +; ALL-LABEL: {{^}}large_alloca_pixel_shader: +; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 +; GCN: s_mov_b32 s10, -1 +; CI: s_mov_b32 s11, 0x80f000 +; VI: s_mov_b32 s11, 0x800000 + +; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen + +; ALL: ; ScratchSize: 32772 +define void @large_alloca_pixel_shader(i32 %x, i32 %y) #1 { + %large = alloca [8192 x i32], align 4 + %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191 + store volatile i32 %x, i32* %gep + %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y + %val = load volatile i32, i32* %gep1 + store volatile i32 %val, i32 addrspace(1)* undef + ret void +} + +; ALL-LABEL: {{^}}large_alloca_pixel_shader_inreg: +; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 +; GCN: s_mov_b32 s10, -1 +; CI: s_mov_b32 s11, 0x80f000 +; VI: s_mov_b32 s11, 0x800000 + +; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen + +; ALL: ; ScratchSize: 32772 +define void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) #1 { + %large = alloca [8192 x i32], align 4 + %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191 + store volatile i32 %x, i32* %gep + %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y + %val = load volatile i32, i32* %gep1 + store volatile i32 %val, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "ShaderType"="0" } diff --git a/test/CodeGen/AMDGPU/large-alloca.ll b/test/CodeGen/AMDGPU/large-alloca.ll deleted file mode 100644 index 671833d1a33a..000000000000 --- a/test/CodeGen/AMDGPU/large-alloca.ll +++ /dev/null @@ -1,15 +0,0 @@ -; XFAIL: * -; REQUIRES: asserts -; RUN: llc -march=amdgcn -mcpu=SI < %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s - -define void @large_alloca(i32 addrspace(1)* %out, i32 %x, i32 %y) nounwind { - %large = alloca [8192 x i32], align 4 - %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191 - store i32 %x, i32* %gep - %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y - %0 = load i32, i32* %gep1 - store i32 %0, i32 addrspace(1)* %out - ret void -} - diff --git a/test/CodeGen/AMDGPU/literals.ll b/test/CodeGen/AMDGPU/literals.ll index cff1c24f89d6..9d2320cb2d19 100644 --- a/test/CodeGen/AMDGPU/literals.ll +++ b/test/CodeGen/AMDGPU/literals.ll @@ -7,8 +7,8 @@ ; ADD_INT literal.x KC0[2].Z, 5 ; CHECK: {{^}}i32_literal: -; CHECK: ADD_INT {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: ADD_INT * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y ; CHECK-NEXT: 5 define void @i32_literal(i32 addrspace(1)* %out, i32 %in) { entry: @@ -24,8 +24,8 @@ entry: ; ADD literal.x KC0[2].Z, 5.0 ; CHECK: {{^}}float_literal: -; CHECK: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: ADD * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y ; CHECK-NEXT: 1084227584(5.0 define void @float_literal(float addrspace(1)* %out, float %in) { entry: diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll index 8bf094b8bc7b..ca8ddbae9fbc 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll @@ -8,9 +8,7 @@ declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone ; FUNC-LABEL: {{^}}s_abs_i32: -; SI: s_sub_i32 -; SI: s_max_i32 -; SI: s_endpgm +; SI: s_abs_i32 ; EG: SUB_INT ; EG: MAX_INT diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll index 1168713ca66e..d56b48457285 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll @@ -425,7 +425,7 @@ define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) ; SI: buffer_load_dword [[LOAD:v[0-9]+]] ; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16 ; SI: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]] -; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]] +; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]] ; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]] ; SI: buffer_store_dword [[TMP2]] define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll deleted file mode 100644 index 301de4b1c82d..000000000000 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll +++ /dev/null @@ -1,28 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s - -declare i32 @llvm.AMDGPU.brev(i32) nounwind readnone - -; FUNC-LABEL: {{^}}s_brev_i32: -; SI: s_load_dword [[VAL:s[0-9]+]], -; SI: s_brev_b32 [[SRESULT:s[0-9]+]], [[VAL]] -; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] -; SI: buffer_store_dword [[VRESULT]], -; SI: s_endpgm -define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { - %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone - store i32 %ctlz, i32 addrspace(1)* %out, align 4 - ret void -} - -; FUNC-LABEL: {{^}}v_brev_i32: -; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI: v_bfrev_b32_e32 [[RESULT:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm -define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { - %val = load i32, i32 addrspace(1)* %valptr, align 4 - %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone - store i32 %ctlz, i32 addrspace(1)* %out, align 4 - ret void -} diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll index 805a88b59c72..80eb3b93f8e5 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll @@ -271,7 +271,8 @@ define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 { ; SI: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}} ; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[MASK]] -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI-NOT: vcc +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 { @@ -285,7 +286,8 @@ define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 { ; SI-DAG: buffer_load_dwordx2 [[VA:v\[[0-9]+:[0-9]+\]]] ; SI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}} ; SI: v_cmp_class_f64_e32 vcc, [[VA]], [[MASK]] -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI-NOT: vcc +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 { diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll index f948c987b038..7dc094ed1b4b 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll @@ -4,7 +4,6 @@ ; FIXME: Enable for VI. declare i32 @llvm.r600.read.tidig.x() nounwind readnone -declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll new file mode 100644 index 000000000000..2e299e30b8c7 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}read_workdim: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV * [[VAL]], KC0[2].Z + +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] +define void @read_workdim(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.AMDGPU.read.workdim() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}read_workdim_known_bits: +; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c +; GCN-NOT: 0xff +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] +define void @read_workdim_known_bits(i32 addrspace(1)* %out) { +entry: + %dim = call i32 @llvm.AMDGPU.read.workdim() #0 + %shl = shl i32 %dim, 24 + %shr = lshr i32 %shl, 24 + store i32 %shr, i32 addrspace(1)* %out + ret void +} + +declare i32 @llvm.AMDGPU.read.workdim() #0 + +attributes #0 = { readnone } diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll index 74792e50017f..a30a8e083eb6 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s ; R600: {{^}}amdgpu_trunc: -; R600: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; R600: TRUNC {{\*? *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI: {{^}}amdgpu_trunc: ; SI: v_trunc_f32 diff --git a/test/CodeGen/AMDGPU/llvm.SI.packf16.ll b/test/CodeGen/AMDGPU/llvm.SI.packf16.ll new file mode 100644 index 000000000000..0155757632d4 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.SI.packf16.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}main: +; GCN: v_cvt_pkrtz_f16_f32 +; GCN: v_cvt_pkrtz_f16_f32 +; GCN-NOT: v_cvt_pkrtz_f16_f32 + +define void @main(float %src) #0 { +main_body: + %p1 = call i32 @llvm.SI.packf16(float undef, float %src) + %p2 = call i32 @llvm.SI.packf16(float %src, float undef) + %p3 = call i32 @llvm.SI.packf16(float undef, float undef) + %f1 = bitcast i32 %p1 to float + %f2 = bitcast i32 %p2 to float + %f3 = bitcast i32 %p3 to float + call void @llvm.SI.export(i32 15, i32 1, i32 0, i32 0, i32 1, float undef, float %f1, float undef, float %f1) + call void @llvm.SI.export(i32 15, i32 1, i32 0, i32 0, i32 1, float undef, float %f2, float undef, float %f2) + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %f3, float undef, float %f2) + ret void +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.SI.packf16(float, float) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="0" } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll new file mode 100644 index 000000000000..6d9db65e7d93 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll @@ -0,0 +1,16 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +declare void @llvm.amdgcn.buffer.wbinvl1() #0 + +; GCN-LABEL: {{^}}test_buffer_wbinvl1: +; GCN-NEXT: ; BB#0: +; SI-NEXT: buffer_wbinvl1 ; encoding: [0x00,0x00,0xc4,0xe1,0x00,0x00,0x00,0x00] +; VI-NEXT: buffer_wbinvl1 ; encoding: [0x00,0x00,0xf8,0xe0,0x00,0x00,0x00,0x00] +; GCN-NEXT: s_endpgm +define void @test_buffer_wbinvl1() #0 { + call void @llvm.amdgcn.buffer.wbinvl1() + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll new file mode 100644 index 000000000000..746298465e58 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=SI %s + +declare void @llvm.amdgcn.buffer.wbinvl1.sc() #0 + +; SI-LABEL: {{^}}test_buffer_wbinvl1_sc: +; SI-NEXT: ; BB#0: +; SI-NEXT: buffer_wbinvl1_sc ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00] +; SI-NEXT: s_endpgm +define void @test_buffer_wbinvl1_sc() #0 { + call void @llvm.amdgcn.buffer.wbinvl1.sc() + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll new file mode 100644 index 000000000000..cecfcb1bfe7c --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll @@ -0,0 +1,16 @@ +; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +declare void @llvm.amdgcn.buffer.wbinvl1.vol() #0 + +; GCN-LABEL: {{^}}test_buffer_wbinvl1_vol: +; GCN-NEXT: ; BB#0: +; CI-NEXT: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00] +; VI-NEXT: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00] +; GCN-NEXT: s_endpgm +define void @test_buffer_wbinvl1_vol() #0 { + call void @llvm.amdgcn.buffer.wbinvl1.vol() + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll new file mode 100644 index 000000000000..dc95cd1ee012 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}test: +; GCN: enable_sgpr_dispatch_ptr = 1 +; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 +define void @test(i32 addrspace(1)* %out) { + %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 + %header_ptr = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)* + %value = load i32, i32 addrspace(2)* %header_ptr + store i32 %value, i32 addrspace(1)* %out + ret void +} + +declare noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 + +attributes #0 = { readnone } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll new file mode 100644 index 000000000000..a28e1b1eb241 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll @@ -0,0 +1,30 @@ +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s + +;GCN-LABEL: {{^}}v_interp: +;GCN-NOT: s_wqm +;GCN: s_mov_b32 m0, s{{[0-9]+}} +;GCN: v_interp_p1_f32 +;GCN: v_interp_p2_f32 +define void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) #0 { +main_body: + %i = extractelement <2 x i32> %4, i32 0 + %j = extractelement <2 x i32> %4, i32 1 + %p0_0 = call float @llvm.amdgcn.interp.p1(i32 %i, i32 0, i32 0, i32 %3) + %p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, i32 %j, i32 0, i32 0, i32 %3) + %p0_1 = call float @llvm.amdgcn.interp.p1(i32 %i, i32 1, i32 0, i32 %3) + %p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, i32 %j, i32 1, i32 0, i32 %3) + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %p0_0, float %p0_0, float %p1_1, float %p1_1) + ret void +} + +; Function Attrs: nounwind readnone +declare float @llvm.amdgcn.interp.p1(i32, i32, i32, i32) #1 + +; Function Attrs: nounwind readnone +declare float @llvm.amdgcn.interp.p2(float, i32, i32, i32, i32) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="0" } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll new file mode 100644 index 000000000000..02ee2039542a --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll @@ -0,0 +1,24 @@ +;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s + +;GCN-LABEL: {{^}}mbcnt_intrinsics: +;GCN: v_mbcnt_lo_u32_b32_e64 [[LO:v[0-9]+]], -1, 0 +;SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]] +;VI: v_mbcnt_hi_u32_b32_e64 {{v[0-9]+}}, -1, [[LO]] + +define void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" { +main_body: + %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1 + %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #1 + %4 = bitcast i32 %hi to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %4, float %4, float %4, float %4) + ret void +} + +declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 + +declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll new file mode 100644 index 000000000000..f8af67c17ec2 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +declare void @llvm.amdgcn.s.dcache.inv() #0 + +; GCN-LABEL: {{^}}test_s_dcache_inv: +; GCN-NEXT: ; BB#0: +; SI-NEXT: s_dcache_inv ; encoding: [0x00,0x00,0xc0,0xc7] +; VI-NEXT: s_dcache_inv ; encoding: [0x00,0x00,0x80,0xc0,0x00,0x00,0x00,0x00] +; GCN-NEXT: s_endpgm +define void @test_s_dcache_inv() #0 { + call void @llvm.amdgcn.s.dcache.inv() + ret void +} + +; GCN-LABEL: {{^}}test_s_dcache_inv_insert_wait: +; GCN-NEXT: ; BB#0: +; GCN-NEXT: s_dcache_inv +; GCN-NEXT: s_waitcnt lgkmcnt(0) ; encoding +define void @test_s_dcache_inv_insert_wait() #0 { + call void @llvm.amdgcn.s.dcache.inv() + br label %end + +end: + store volatile i32 3, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll new file mode 100644 index 000000000000..a8502a7c5033 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +declare void @llvm.amdgcn.s.dcache.inv.vol() #0 + +; GCN-LABEL: {{^}}test_s_dcache_inv_vol: +; GCN-NEXT: ; BB#0: +; CI-NEXT: s_dcache_inv_vol ; encoding: [0x00,0x00,0x40,0xc7] +; VI-NEXT: s_dcache_inv_vol ; encoding: [0x00,0x00,0x88,0xc0,0x00,0x00,0x00,0x00] +; GCN-NEXT: s_endpgm +define void @test_s_dcache_inv_vol() #0 { + call void @llvm.amdgcn.s.dcache.inv.vol() + ret void +} + +; GCN-LABEL: {{^}}test_s_dcache_inv_vol_insert_wait: +; GCN-NEXT: ; BB#0: +; GCN-NEXT: s_dcache_inv_vol +; GCN-NEXT: s_waitcnt lgkmcnt(0) ; encoding +define void @test_s_dcache_inv_vol_insert_wait() #0 { + call void @llvm.amdgcn.s.dcache.inv.vol() + br label %end + +end: + store volatile i32 3, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll new file mode 100644 index 000000000000..f9ae09b391aa --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s + +declare void @llvm.amdgcn.s.dcache.wb() #0 + +; VI-LABEL: {{^}}test_s_dcache_wb: +; VI-NEXT: ; BB#0: +; VI-NEXT: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00] +; VI-NEXT: s_endpgm +define void @test_s_dcache_wb() #0 { + call void @llvm.amdgcn.s.dcache.wb() + ret void +} + +; VI-LABEL: {{^}}test_s_dcache_wb_insert_wait: +; VI-NEXT: ; BB#0: +; VI-NEXT: s_dcache_wb +; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding +define void @test_s_dcache_wb_insert_wait() #0 { + call void @llvm.amdgcn.s.dcache.wb() + br label %end + +end: + store volatile i32 3, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll new file mode 100644 index 000000000000..d9145458a1f6 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s + +declare void @llvm.amdgcn.s.dcache.wb.vol() #0 + +; VI-LABEL: {{^}}test_s_dcache_wb_vol: +; VI-NEXT: ; BB#0: +; VI-NEXT: s_dcache_wb_vol ; encoding: [0x00,0x00,0x8c,0xc0,0x00,0x00,0x00,0x00] +; VI-NEXT: s_endpgm +define void @test_s_dcache_wb_vol() #0 { + call void @llvm.amdgcn.s.dcache.wb.vol() + ret void +} + +; VI-LABEL: {{^}}test_s_dcache_wb_vol_insert_wait: +; VI-NEXT: ; BB#0: +; VI-NEXT: s_dcache_wb_vol +; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding +define void @test_s_dcache_wb_vol_insert_wait() #0 { + call void @llvm.amdgcn.s.dcache.wb.vol() + br label %end + +end: + store volatile i32 3, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll b/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll index a64dd0ebd2dd..0c3e4ecaa1a0 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll @@ -4,7 +4,7 @@ declare float @llvm.AMDGPU.lrp(float, float, float) nounwind readnone ; FUNC-LABEL: {{^}}test_lrp: -; SI: v_sub_f32 +; SI: v_mad_f32 ; SI: v_mac_f32_e32 define void @test_lrp(float addrspace(1)* %out, float %src0, float %src1, float %src2) nounwind { %mad = call float @llvm.AMDGPU.lrp(float %src0, float %src1, float %src2) nounwind readnone diff --git a/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/test/CodeGen/AMDGPU/llvm.dbg.value.ll index d001bcb4db17..b01f8ab2bdf9 100644 --- a/test/CodeGen/AMDGPU/llvm.dbg.value.ll +++ b/test/CodeGen/AMDGPU/llvm.dbg.value.ll @@ -1,11 +1,11 @@ -; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs -mattr=-flat-for-global < %s | FileCheck %s ; CHECK-LABEL: {{^}}test_debug_value: -; CHECK: s_load_dwordx2 -; CHECK: DEBUG_VALUE: test_debug_value:globalptr_arg <- SGPR0_SGPR1 +; CHECK: s_load_dwordx2 s[4:5] +; CHECK: DEBUG_VALUE: test_debug_value:globalptr_arg <- %SGPR4_SGPR5 ; CHECK: buffer_store_dword ; CHECK: s_endpgm -define void @test_debug_value(i32 addrspace(1)* nocapture %globalptr_arg) #0 { +define void @test_debug_value(i32 addrspace(1)* nocapture %globalptr_arg) #0 !dbg !4 { entry: tail call void @llvm.dbg.value(metadata i32 addrspace(1)* %globalptr_arg, i64 0, metadata !10, metadata !13), !dbg !14 store i32 123, i32 addrspace(1)* %globalptr_arg, align 4 @@ -24,13 +24,13 @@ attributes #1 = { nounwind readnone } !1 = !DIFile(filename: "/tmp/test_debug_value.cl", directory: "/Users/matt/src/llvm/build_debug") !2 = !{} !3 = !{!4} -!4 = !DISubprogram(name: "test_debug_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, function: void (i32 addrspace(1)*)* @test_debug_value, variables: !9) +!4 = distinct !DISubprogram(name: "test_debug_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !9) !5 = !DISubroutineType(types: !6) !6 = !{null, !7} !7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64, align: 32) !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) !9 = !{!10} -!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "globalptr_arg", arg: 1, scope: !4, file: !1, line: 1, type: !7) +!10 = !DILocalVariable(name: "globalptr_arg", arg: 1, scope: !4, file: !1, line: 1, type: !7) !11 = !{i32 2, !"Dwarf Version", i32 4} !12 = !{i32 2, !"Debug Info Version", i32 3} !13 = !DIExpression() diff --git a/test/CodeGen/AMDGPU/llvm.memcpy.ll b/test/CodeGen/AMDGPU/llvm.memcpy.ll index e491732cf9c5..d83ab562b718 100644 --- a/test/CodeGen/AMDGPU/llvm.memcpy.ll +++ b/test/CodeGen/AMDGPU/llvm.memcpy.ll @@ -132,32 +132,15 @@ define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias % } ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4: -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 +; SI: ds_read2_b32 +; SI: ds_read2_b32 +; SI: ds_read2_b32 +; SI: ds_read2_b32 -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 - -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 - -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 - -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 - -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 - -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 - -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 - -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 +; SI: ds_write2_b32 +; SI: ds_write2_b32 +; SI: ds_write2_b32 +; SI: ds_write2_b32 ; SI: s_endpgm define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { @@ -170,32 +153,15 @@ define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias % ; FIXME: Use 64-bit ops ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8: -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 +; SI: ds_read_b64 +; SI: ds_read_b64 +; SI: ds_read_b64 +; SI: ds_read_b64 -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 - -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 - -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 - -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 - -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 - -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 - -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 - -; SI-DAG: ds_read_b32 -; SI-DAG: ds_write_b32 +; SI: ds_write_b64 +; SI: ds_write_b64 +; SI: ds_write_b64 +; SI: ds_write_b64 ; SI-DAG: s_endpgm define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { diff --git a/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll b/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll new file mode 100644 index 000000000000..13ebee41e844 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll @@ -0,0 +1,184 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + + +; FUNC-LABEL: {{^}}local_size_x: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV * [[VAL]], KC0[1].Z + +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 +; CI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x1 +; VI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x4 + +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] +define void @local_size_x(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.local.size.x() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_y: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV * [[VAL]], KC0[1].W + +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] +define void @local_size_y(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.local.size.y() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_z: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV * [[VAL]], KC0[2].X + +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[VVAL]] +define void @local_size_z(i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.r600.read.local.size.z() #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_xy: +; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6 +; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7 +; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18 +; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c +; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]] +; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VY]] +; GCN: buffer_store_dword [[VAL]] +define void @local_size_xy(i32 addrspace(1)* %out) { +entry: + %x = call i32 @llvm.r600.read.local.size.x() #0 + %y = call i32 @llvm.r600.read.local.size.y() #0 + %val = mul i32 %x, %y + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_xz: + +; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6 +; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8 +; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18 +; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20 +; HSA-DAG: s_and_b32 [[X:s[0-9]+]], [[XY]], 0xffff +; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] +; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VZ]] +; GCN: buffer_store_dword [[VAL]] +define void @local_size_xz(i32 addrspace(1)* %out) { +entry: + %x = call i32 @llvm.r600.read.local.size.x() #0 + %z = call i32 @llvm.r600.read.local.size.z() #0 + %val = mul i32 %x, %z + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_yz: +; HSA: enable_sgpr_private_segment_buffer = 1 +; HSA: enable_sgpr_dispatch_ptr = 1 + +; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7 +; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8 +; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c +; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20 +; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] +; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[Y]], [[VZ]] +; GCN: buffer_store_dword [[VAL]] +define void @local_size_yz(i32 addrspace(1)* %out) { +entry: + %y = call i32 @llvm.r600.read.local.size.y() #0 + %z = call i32 @llvm.r600.read.local.size.z() #0 + %val = mul i32 %y, %z + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_xyz: +; HSA: enable_sgpr_private_segment_buffer = 1 +; HSA: enable_sgpr_dispatch_ptr = 1 + +; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6 +; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7 +; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8 +; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18 +; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c +; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20 +; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]] +; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] +; GCN: v_mad_u32_u24 [[VAL:v[0-9]+]], [[X]], [[VY]], [[VZ]] +; GCN: buffer_store_dword [[VAL]] +define void @local_size_xyz(i32 addrspace(1)* %out) { +entry: + %x = call i32 @llvm.r600.read.local.size.x() #0 + %y = call i32 @llvm.r600.read.local.size.y() #0 + %z = call i32 @llvm.r600.read.local.size.z() #0 + %xy = mul i32 %x, %y + %xyz = add i32 %xy, %z + store i32 %xyz, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_x_known_bits: +; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 +; GCN-NOT: 0xffff +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NEXT: buffer_store_dword [[VVAL]] +define void @local_size_x_known_bits(i32 addrspace(1)* %out) { +entry: + %size = call i32 @llvm.r600.read.local.size.x() #0 + %shl = shl i32 %size, 16 + %shr = lshr i32 %shl, 16 + store i32 %shr, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_y_known_bits: +; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c +; GCN-NOT: 0xffff +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NEXT: buffer_store_dword [[VVAL]] +define void @local_size_y_known_bits(i32 addrspace(1)* %out) { +entry: + %size = call i32 @llvm.r600.read.local.size.y() #0 + %shl = shl i32 %size, 16 + %shr = lshr i32 %shl, 16 + store i32 %shr, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_size_z_known_bits: +; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 +; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 +; GCN-NOT: 0xffff +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NEXT: buffer_store_dword [[VVAL]] +define void @local_size_z_known_bits(i32 addrspace(1)* %out) { +entry: + %size = call i32 @llvm.r600.read.local.size.z() #0 + %shl = shl i32 %size, 16 + %shr = lshr i32 %shl, 16 + store i32 %shr, i32 addrspace(1)* %out + ret void +} + +declare i32 @llvm.r600.read.local.size.x() #0 +declare i32 @llvm.r600.read.local.size.y() #0 +declare i32 @llvm.r600.read.local.size.z() #0 + +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/llvm.round.f64.ll b/test/CodeGen/AMDGPU/llvm.round.f64.ll index 3d0f57e33280..6b365dc09e2a 100644 --- a/test/CodeGen/AMDGPU/llvm.round.f64.ll +++ b/test/CodeGen/AMDGPU/llvm.round.f64.ll @@ -21,12 +21,9 @@ define void @round_f64(double addrspace(1)* %out, double %x) #0 { ; SI-DAG: v_cmp_eq_i32 ; SI-DAG: s_mov_b32 [[BFIMASK:s[0-9]+]], 0x7fffffff -; SI-DAG: v_cmp_gt_i32_e64 +; SI-DAG: v_cmp_gt_i32_e32 ; SI-DAG: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[BFIMASK]] -; SI-DAG: v_cmp_gt_i32_e64 - - ; SI: buffer_store_dwordx2 ; SI: s_endpgm define void @v_round_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { diff --git a/test/CodeGen/AMDGPU/load.ll b/test/CodeGen/AMDGPU/load.ll index 93b1b51a0d07..6a04261fe47b 100644 --- a/test/CodeGen/AMDGPU/load.ll +++ b/test/CodeGen/AMDGPU/load.ll @@ -277,15 +277,9 @@ entry: ; FUNC-LABEL: {{^}}load_v8i32: ; R600: VTX_READ_128 ; R600: VTX_READ_128 -; XXX: We should be using DWORDX4 instructions on SI. -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword + +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) { entry: %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in @@ -298,23 +292,11 @@ entry: ; R600: VTX_READ_128 ; R600: VTX_READ_128 ; R600: VTX_READ_128 -; XXX: We should be using DWORDX4 instructions on SI. -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword + +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) { entry: %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in diff --git a/test/CodeGen/AMDGPU/local-memory-two-objects.ll b/test/CodeGen/AMDGPU/local-memory-two-objects.ll index f501a7ac6274..6b52b80ba082 100644 --- a/test/CodeGen/AMDGPU/local-memory-two-objects.ll +++ b/test/CodeGen/AMDGPU/local-memory-two-objects.ll @@ -10,7 +10,7 @@ ; EG: .long 166120 ; EG-NEXT: .long 8 ; GCN: .long 47180 -; GCN-NEXT: .long 38792 +; GCN-NEXT: .long 32900 ; EG: {{^}}local_memory_two_objects: @@ -30,7 +30,7 @@ ; constant offsets. ; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]] ; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]] -; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}} +; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], vcc, 16, v{{[0-9]+}} ; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]] ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16 ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] diff --git a/test/CodeGen/AMDGPU/local-memory.ll b/test/CodeGen/AMDGPU/local-memory.ll index 9494ed75bd0c..9ffb59e70920 100644 --- a/test/CodeGen/AMDGPU/local-memory.ll +++ b/test/CodeGen/AMDGPU/local-memory.ll @@ -9,9 +9,9 @@ ; EG: .long 166120 ; EG-NEXT: .long 128 ; SI: .long 47180 -; SI-NEXT: .long 71560 +; SI-NEXT: .long 65668 ; CI: .long 47180 -; CI-NEXT: .long 38792 +; CI-NEXT: .long 32900 ; FUNC-LABEL: {{^}}local_memory: diff --git a/test/CodeGen/AMDGPU/max.ll b/test/CodeGen/AMDGPU/max.ll index fef3e2f0a21c..eeb915c10a96 100644 --- a/test/CodeGen/AMDGPU/max.ll +++ b/test/CodeGen/AMDGPU/max.ll @@ -2,7 +2,7 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone -; FUNC-LABEL: @v_test_imax_sge_i32 +; FUNC-LABEL: {{^}}v_test_imax_sge_i32: ; SI: v_max_i32_e32 define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone @@ -17,6 +17,24 @@ define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr ret void } +; FUNC-LABEL: {{^}}v_test_imax_sge_v4i32: +; SI: v_max_i32_e32 +; SI: v_max_i32_e32 +; SI: v_max_i32_e32 +; SI: v_max_i32_e32 +define void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %out, i32 %tid + %a = load <4 x i32>, <4 x i32> addrspace(1)* %gep0, align 4 + %b = load <4 x i32>, <4 x i32> addrspace(1)* %gep1, align 4 + %cmp = icmp sge <4 x i32> %a, %b + %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b + store <4 x i32> %val, <4 x i32> addrspace(1)* %outgep, align 4 + ret void +} + ; FUNC-LABEL: @s_test_imax_sge_i32 ; SI: s_max_i32 define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { @@ -35,6 +53,23 @@ define void @s_test_imax_sge_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { ret void } +; FUNC-LABEL: {{^}}v_test_imax_sge_i8: +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: v_max_i32_e32 +define void @v_test_imax_sge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid + %a = load i8, i8 addrspace(1)* %gep0, align 1 + %b = load i8, i8 addrspace(1)* %gep1, align 1 + %cmp = icmp sge i8 %a, %b + %val = select i1 %cmp, i8 %a, i8 %b + store i8 %val, i8 addrspace(1)* %outgep, align 1 + ret void +} + ; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_i32: ; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 define void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { @@ -44,6 +79,15 @@ define void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { ret void } +; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_v2i32: +; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 +; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 +define void @s_test_imax_sgt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind { + %cmp = icmp sgt <2 x i32> %a, + %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> + store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4 + ret void +} ; FUNC-LABEL: @v_test_imax_sgt_i32 ; SI: v_max_i32_e32 define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { @@ -92,6 +136,36 @@ define void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin ret void } +; FUNC-LABEL: {{^}}s_test_umax_uge_v3i32: +; SI: s_max_u32 +; SI: s_max_u32 +; SI: s_max_u32 +; SI-NOT: s_max_u32 +; SI: s_endpgm +define void @s_test_umax_uge_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, <3 x i32> %b) nounwind { + %cmp = icmp uge <3 x i32> %a, %b + %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b + store <3 x i32> %val, <3 x i32> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_test_umax_uge_i8: +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: v_max_u32_e32 +define void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid + %a = load i8, i8 addrspace(1)* %gep0, align 1 + %b = load i8, i8 addrspace(1)* %gep1, align 1 + %cmp = icmp uge i8 %a, %b + %val = select i1 %cmp, i8 %a, i8 %b + store i8 %val, i8 addrspace(1)* %outgep, align 1 + ret void +} + ; FUNC-LABEL: @v_test_umax_ugt_i32 ; SI: v_max_u32_e32 define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { @@ -107,7 +181,7 @@ define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr ret void } -; FUNC-LABEL: @s_test_umax_ugt_i32 +; FUNC-LABEL: {{^}}s_test_umax_ugt_i32: ; SI: s_max_u32 define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %cmp = icmp ugt i32 %a, %b @@ -116,13 +190,23 @@ define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin ret void } +; FUNC-LABEL: {{^}}s_test_umax_ugt_imm_v2i32: +; SI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 15 +; SI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 23 +define void @s_test_umax_ugt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind { + %cmp = icmp ugt <2 x i32> %a, + %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> + store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4 + ret void +} + ; Make sure redundant and removed ; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umax_ugt_i16: ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc -; SI: s_max_u32 [[MIN:s[0-9]+]], [[A]], [[B]] -; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]] -; SI-NEXT: buffer_store_dword [[VMIN]] +; SI: s_max_u32 [[MAX:s[0-9]+]], [[A]], [[B]] +; SI-NEXT: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]] +; SI-NEXT: buffer_store_dword [[VMAX]] define void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind { %a.ext = zext i16 %a to i32 %b.ext = zext i16 %b to i32 @@ -135,13 +219,13 @@ define void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i1 ; Make sure redundant sign_extend_inreg removed. -; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16: +; FUNC-LABEL: {{^}}simplify_demanded_bits_test_max_slt_i16: ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc -; SI: s_max_i32 [[MIN:s[0-9]+]], [[A]], [[B]] -; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]] -; SI-NEXT: buffer_store_dword [[VMIN]] -define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind { +; SI: s_max_i32 [[MAX:s[0-9]+]], [[A]], [[B]] +; SI-NEXT: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]] +; SI-NEXT: buffer_store_dword [[VMAX]] +define void @simplify_demanded_bits_test_max_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind { %a.ext = sext i16 %a to i32 %b.ext = sext i16 %b to i32 %cmp = icmp sgt i32 %a.ext, %b.ext @@ -152,15 +236,13 @@ define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 ret void } -; FIXME: Should get match min/max through extends inserted by -; legalization. - -; FUNC-LABEL: {{^}}s_test_imin_sge_i16: +; FUNC-LABEL: {{^}}s_test_imax_sge_i16: +; SI: s_load_dword +; SI: s_load_dword ; SI: s_sext_i32_i16 ; SI: s_sext_i32_i16 -; SI: v_cmp_ge_i32_e32 -; SI: v_cndmask_b32 -define void @s_test_imin_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { +; SI: s_max_i32 +define void @s_test_imax_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { %cmp = icmp sge i16 %a, %b %val = select i1 %cmp, i16 %a, i16 %b store i16 %val, i16 addrspace(1)* %out diff --git a/test/CodeGen/AMDGPU/merge-stores.ll b/test/CodeGen/AMDGPU/merge-stores.ll index 34a2fc7ffa74..65b454b5d8cb 100644 --- a/test/CodeGen/AMDGPU/merge-stores.ll +++ b/test/CodeGen/AMDGPU/merge-stores.ll @@ -1,5 +1,8 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s + +; RUN: llc -march=amdgcn -verify-machineinstrs -combiner-alias-analysis < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -combiner-alias-analysis < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s ; Run with devices with different unaligned load restrictions. @@ -65,10 +68,8 @@ define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* } ; GCN-LABEL: {{^}}merge_global_store_2_constants_i32: -; SI-DAG: s_movk_i32 [[SLO:s[0-9]+]], 0x1c8 -; SI-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b -; SI-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[SLO]] -; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHI]] +; SI-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8 +; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} define void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 @@ -89,10 +90,8 @@ define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 { } ; GCN-LABEL: {{^}}merge_global_store_2_constants_f32_i32: -; SI-DAG: s_mov_b32 [[SLO:s[0-9]+]], 4.0 -; SI-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b{{$}} -; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[SLO]] -; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[SHI]] +; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 4.0 +; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0x7b ; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 @@ -121,10 +120,7 @@ define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 { } ; GCN-LABEL: {{^}}merge_global_store_4_constants_f32_order: -; XGCN: buffer_store_dwordx4 -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v -; GCN: buffer_store_dwordx2 v +; GCN: buffer_store_dwordx4 define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 @@ -137,17 +133,9 @@ define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) ret void } -; First store is out of order. Because of order of combines, the -; consecutive store fails because only some of the stores have been -; replaced with integer constant stores, and then won't merge because -; the types are different. - +; First store is out of order. ; GCN-LABEL: {{^}}merge_global_store_4_constants_f32: -; XGCN: buffer_store_dwordx4 -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v +; GCN: buffer_store_dwordx4 define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 @@ -160,6 +148,33 @@ define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 { ret void } +; FIXME: Should be able to merge this +; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32: +; GCN-NOAA: buffer_store_dword v +; GCN-NOAA: buffer_store_dword v +; GCN-NOAA: buffer_store_dword v +; GCN-NOAA: buffer_store_dword v + +; GCN-AA: buffer_store_dwordx2 +; GCN-AA: buffer_store_dword v +; GCN-AA: buffer_store_dword v + +; GCN: s_endpgm +define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 { + %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 + %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 + %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 + + %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)* + %out.gep.3.bc = bitcast float addrspace(1)* %out.gep.3 to i32 addrspace(1)* + + store i32 11, i32 addrspace(1)* %out.gep.1.bc + store float 2.0, float addrspace(1)* %out.gep.2 + store i32 17, i32 addrspace(1)* %out.gep.3.bc + store float 8.0, float addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}merge_global_store_3_constants_i32: ; SI-DAG: buffer_store_dwordx2 ; SI-DAG: buffer_store_dword @@ -176,9 +191,7 @@ define void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 { } ; GCN-LABEL: {{^}}merge_global_store_2_constants_i64: -; XGCN: buffer_store_dwordx4 -; GCN: buffer_store_dwordx2 -; GCN: buffer_store_dwordx2 +; GCN: buffer_store_dwordx4 define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1 @@ -188,13 +201,8 @@ define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 { } ; GCN-LABEL: {{^}}merge_global_store_4_constants_i64: -; XGCN: buffer_store_dwordx4 -; XGCN: buffer_store_dwordx4 - -; GCN: buffer_store_dwordx2 -; GCN: buffer_store_dwordx2 -; GCN: buffer_store_dwordx2 -; GCN: buffer_store_dwordx2 +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 define void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1 %out.gep.2 = getelementptr i64, i64 addrspace(1)* %out, i64 2 @@ -472,11 +480,15 @@ define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1 ; This works once AA is enabled on the subtarget ; GCN-LABEL: {{^}}merge_global_store_4_vector_elts_loads_v4i32: ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]] -; XGCN: buffer_store_dwordx4 [[LOAD]] -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v + +; GCN-NOAA: buffer_store_dword v +; GCN-NOAA: buffer_store_dword v +; GCN-NOAA: buffer_store_dword v +; GCN-NOAA: buffer_store_dword v + +; GCN-AA: buffer_store_dwordx4 [[LOAD]] + +; GCN: s_endpgm define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 @@ -508,10 +520,8 @@ define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 { } ; GCN-LABEL: {{^}}merge_local_store_2_constants_i32: -; GCN-DAG: s_movk_i32 [[SLO:s[0-9]+]], 0x1c8 -; GCN-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b -; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[SLO]] -; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHI]] +; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8 +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b ; GCN: ds_write2_b32 v{{[0-9]+}}, v[[LO]], v[[HI]] offset1:1{{$}} define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1 @@ -522,10 +532,15 @@ define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 { } ; GCN-LABEL: {{^}}merge_local_store_4_constants_i32: -; GCN: ds_write_b32 -; GCN: ds_write_b32 -; GCN: ds_write_b32 -; GCN: ds_write_b32 +; GCN-DAG: v_mov_b32_e32 [[K2:v[0-9]+]], 0x1c8 +; GCN-DAG: v_mov_b32_e32 [[K3:v[0-9]+]], 0x14d +; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, [[K2]], [[K3]] offset0:2 offset1:3 + +; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 0x4d2 +; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0x7b +; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, [[K0]], [[K1]] offset1:1 + +; GCN: s_endpgm define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2 @@ -597,17 +612,9 @@ define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) { } ; GCN-LABEL: {{^}}merge_global_store_8_constants_i32: -; XGCN: buffer_store_dwordx4 -; XGCN: buffer_store_dwordx4 - -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 +; GCN: s_endpgm define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) { store i32 34, i32 addrspace(1)* %out, align 4 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 @@ -627,7 +634,78 @@ define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) { ret void } +; This requires handling of scalar_to_vector for v2i64 to avoid +; scratch usage. +; FIXME: Should do single load and store + +; GCN-LABEL: {{^}}copy_v3i32_align4: +; GCN-NOT: SCRATCH_RSRC_DWORD +; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-NOT: offen +; GCN: s_waitcnt vmcnt +; GCN-NOT: offen +; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 + +; GCN: ScratchSize: 0{{$}} +define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 { + %vec = load <3 x i32>, <3 x i32> addrspace(1)* %in, align 4 + store <3 x i32> %vec, <3 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}copy_v3i64_align4: +; GCN-NOT: SCRATCH_RSRC_DWORD +; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} +; GCN-NOT: offen +; GCN: s_waitcnt vmcnt +; GCN-NOT: offen +; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} +; GCN: ScratchSize: 0{{$}} +define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 { + %vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4 + store <3 x i64> %vec, <3 x i64> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}copy_v3f32_align4: +; GCN-NOT: SCRATCH_RSRC_DWORD +; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-NOT: offen +; GCN: s_waitcnt vmcnt +; GCN-NOT: offen +; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; GCN: ScratchSize: 0{{$}} +define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 { + %vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4 + %fadd = fadd <3 x float> %vec, + store <3 x float> %fadd, <3 x float> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}copy_v3f64_align4: +; GCN-NOT: SCRATCH_RSRC_DWORD +; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} +; GCN-NOT: offen +; GCN: s_waitcnt vmcnt +; GCN-NOT: offen +; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} +; GCN: ScratchSize: 0{{$}} +define void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 { + %vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4 + %fadd = fadd <3 x double> %vec, + store <3 x double> %fadd, <3 x double> addrspace(1)* %out + ret void +} + declare void @llvm.AMDGPU.barrier.local() #1 attributes #0 = { nounwind } -attributes #1 = { noduplicate nounwind } +attributes #1 = { convergent nounwind } diff --git a/test/CodeGen/AMDGPU/min.ll b/test/CodeGen/AMDGPU/min.ll index 0332d1a8e407..215dbeb4b2fd 100644 --- a/test/CodeGen/AMDGPU/min.ll +++ b/test/CodeGen/AMDGPU/min.ll @@ -2,7 +2,7 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone -; FUNC-LABEL: @v_test_imin_sle_i32 +; FUNC-LABEL: {{^}}v_test_imin_sle_i32: ; SI: v_min_i32_e32 define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone @@ -17,7 +17,7 @@ define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr ret void } -; FUNC-LABEL: @s_test_imin_sle_i32 +; FUNC-LABEL: {{^}}s_test_imin_sle_i32: ; SI: s_min_i32 define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %cmp = icmp sle i32 %a, %b @@ -26,6 +26,78 @@ define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin ret void } +; FUNC-LABEL: {{^}}s_test_imin_sle_v1i32: +; SI: s_min_i32 +define void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind { + %cmp = icmp sle <1 x i32> %a, %b + %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b + store <1 x i32> %val, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_test_imin_sle_v4i32: +; SI: s_min_i32 +; SI: s_min_i32 +; SI: s_min_i32 +; SI: s_min_i32 +define void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind { + %cmp = icmp sle <4 x i32> %a, %b + %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b + store <4 x i32> %val, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_test_imin_sle_i8: +; SI: s_load_dword +; SI: s_load_dword +; SI: s_sext_i32_i8 +; SI: s_sext_i32_i8 +; SI: s_min_i32 +define void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) nounwind { + %cmp = icmp sle i8 %a, %b + %val = select i1 %cmp, i8 %a, i8 %b + store i8 %val, i8 addrspace(1)* %out + ret void +} + +; XXX - should be able to use s_min if we stop unnecessarily doing +; extloads with mubuf instructions. + +; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8: +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte + +; SI: v_min_i32 +; SI: v_min_i32 +; SI: v_min_i32 +; SI: v_min_i32 + +; SI: s_endpgm +define void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b) nounwind { + %cmp = icmp sle <4 x i8> %a, %b + %val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b + store <4 x i8> %val, <4 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_test_imin_sle_v4i16: +; SI: v_min_i32 +; SI: v_min_i32 +; SI: v_min_i32 +; SI: v_min_i32 +define void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind { + %cmp = icmp sle <4 x i16> %a, %b + %val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b + store <4 x i16> %val, <4 x i16> addrspace(1)* %out + ret void +} + ; FUNC-LABEL: @v_test_imin_slt_i32 ; SI: v_min_i32_e32 define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { @@ -50,6 +122,16 @@ define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin ret void } +; FUNC-LABEL: {{^}}s_test_imin_slt_v2i32: +; SI: s_min_i32 +; SI: s_min_i32 +define void @s_test_imin_slt_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { + %cmp = icmp slt <2 x i32> %a, %b + %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b + store <2 x i32> %val, <2 x i32> addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32: ; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8 define void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { @@ -83,6 +165,24 @@ define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr ret void } +; FUNC-LABEL: @v_test_umin_ule_v3i32 +; SI: v_min_u32_e32 +; SI: v_min_u32_e32 +; SI: v_min_u32_e32 +; SI-NOT: v_min_u32_e32 +; SI: s_endpgm +define void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %aptr, <3 x i32> addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid + %a = load <3 x i32>, <3 x i32> addrspace(1)* %gep0 + %b = load <3 x i32>, <3 x i32> addrspace(1)* %gep1 + %cmp = icmp ule <3 x i32> %a, %b + %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b + store <3 x i32> %val, <3 x i32> addrspace(1)* %outgep + ret void +} ; FUNC-LABEL: @s_test_umin_ule_i32 ; SI: s_min_u32 define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { @@ -107,6 +207,23 @@ define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr ret void } +; FUNC-LABEL: {{^}}v_test_umin_ult_i8: +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: v_min_u32_e32 +define void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid + %a = load i8, i8 addrspace(1)* %gep0, align 1 + %b = load i8, i8 addrspace(1)* %gep1, align 1 + %cmp = icmp ult i8 %a, %b + %val = select i1 %cmp, i8 %a, i8 %b + store i8 %val, i8 addrspace(1)* %outgep, align 1 + ret void +} + ; FUNC-LABEL: @s_test_umin_ult_i32 ; SI: s_min_u32 define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { @@ -137,6 +254,48 @@ define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace ret void } + +; FUNC-LABEL: @s_test_umin_ult_v1i32 +; SI: s_min_u32 +define void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind { + %cmp = icmp ult <1 x i32> %a, %b + %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b + store <1 x i32> %val, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_test_umin_ult_v8i32: +; SI: s_min_u32 +; SI: s_min_u32 +; SI: s_min_u32 +; SI: s_min_u32 +; SI: s_min_u32 +; SI: s_min_u32 +; SI: s_min_u32 +; SI: s_min_u32 +define void @s_test_umin_ult_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) nounwind { + %cmp = icmp ult <8 x i32> %a, %b + %val = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b + store <8 x i32> %val, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16: +; SI: v_min_u32 +; SI: v_min_u32 +; SI: v_min_u32 +; SI: v_min_u32 +; SI: v_min_u32 +; SI: v_min_u32 +; SI: v_min_u32 +; SI: v_min_u32 +define void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind { + %cmp = icmp ult <8 x i16> %a, %b + %val = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b + store <8 x i16> %val, <8 x i16> addrspace(1)* %out + ret void +} + ; Make sure redundant and removed ; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16: ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb @@ -173,14 +332,8 @@ define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 ret void } -; FIXME: Should get match min/max through extends inserted by -; legalization. - ; FUNC-LABEL: {{^}}s_test_imin_sle_i16: -; SI: s_sext_i32_i16 -; SI: s_sext_i32_i16 -; SI: v_cmp_le_i32_e32 -; SI: v_cndmask_b32 +; SI: s_min_i32 define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { %cmp = icmp sle i16 %a, %b %val = select i1 %cmp, i16 %a, i16 %b diff --git a/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll b/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll new file mode 100644 index 000000000000..e9f641b736d5 --- /dev/null +++ b/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll @@ -0,0 +1,36 @@ +; RUN: llc -march=amdgcn -mcpu=kaveri -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN %s + +; Check that when mubuf addr64 instruction is handled in moveToVALU +; from the pointer, dead register writes are not emitted. + +; FIXME: We should be able to use the SGPR directly as src0 to v_add_i32 + +; GCN-LABEL: {{^}}clobber_vgpr_pair_pointer_add: +; GCN: s_load_dwordx2 s{{\[}}[[ARG1LO:[0-9]+]]:[[ARG1HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}} +; GCN: buffer_load_dwordx2 v{{\[}}[[LDPTRLO:[0-9]+]]:[[LDPTRHI:[0-9]+]]{{\]}} + +; GCN-NOT: v_mov_b32 +; GCN: v_mov_b32_e32 v[[VARG1LO:[0-9]+]], s[[ARG1LO]] +; GCN-NEXT: v_mov_b32_e32 v[[VARG1HI:[0-9]+]], s[[ARG1HI]] +; GCN-NOT: v_mov_b32 + +; GCN: v_add_i32_e32 v[[PTRLO:[0-9]+]], vcc, v[[LDPTRLO]], v[[VARG1LO]] +; GCN: v_addc_u32_e32 v[[PTRHI:[0-9]+]], vcc, v[[LDPTRHI]], v[[VARG1HI]] +; GCN: buffer_load_ubyte v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, + +define void @clobber_vgpr_pair_pointer_add(i64 %arg1, i8 addrspace(1)* addrspace(1)* %ptrarg, i32 %arg3) #0 { +bb: + %tmp = icmp sgt i32 %arg3, 0 + br i1 %tmp, label %bb4, label %bb17 + +bb4: + %tmp14 = load volatile i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %ptrarg + %tmp15 = getelementptr inbounds i8, i8 addrspace(1)* %tmp14, i64 %arg1 + %tmp16 = load volatile i8, i8 addrspace(1)* %tmp15 + br label %bb17 + +bb17: + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll b/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll new file mode 100644 index 000000000000..8bca0575ecd2 --- /dev/null +++ b/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll @@ -0,0 +1,52 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; XUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +; FIXME: broken on VI because flat instructions need to be emitted +; instead of addr64 equivalent of the _OFFSET variants. + +; Check that moving the pointer out of the resource descriptor to +; vaddr works for atomics. + +declare i32 @llvm.r600.read.tidig.x() #1 + +; GCN-LABEL: {{^}}atomic_max_i32: +; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400 glc{{$}} +define void @atomic_max_i32(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() + %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid + %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep + %xor = xor i32 %tid, 1 + %cmp = icmp ne i32 %xor, 0 + br i1 %cmp, label %atomic, label %exit + +atomic: + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 100 + %ret = atomicrmw max i32 addrspace(1)* %gep, i32 %y seq_cst + store i32 %ret, i32 addrspace(1)* %out + br label %exit + +exit: + ret void +} + +; GCN-LABEL: {{^}}atomic_max_i32_noret: +; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400{{$}} +define void @atomic_max_i32_noret(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() + %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid + %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep + %xor = xor i32 %tid, 1 + %cmp = icmp ne i32 %xor, 0 + br i1 %cmp, label %atomic, label %exit + +atomic: + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 100 + %ret = atomicrmw max i32 addrspace(1)* %gep, i32 %y seq_cst + br label %exit + +exit: + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll b/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll new file mode 100644 index 000000000000..73a146710a9f --- /dev/null +++ b/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll @@ -0,0 +1,18 @@ +; RUN: not llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s 2>&1 | FileCheck %s + +; CHECK: error: unsupported non-compute shaders with HSA in pixel_shader +define void @pixel_shader() #0 { + ret void +} + +define void @vertex_shader() #1 { + ret void +} + +define void @geometry_shader() #2 { + ret void +} + +attributes #0 = { nounwind "ShaderType"="0" } +attributes #1 = { nounwind "ShaderType"="1" } +attributes #2 = { nounwind "ShaderType"="2" } diff --git a/test/CodeGen/AMDGPU/no-shrink-extloads.ll b/test/CodeGen/AMDGPU/no-shrink-extloads.ll index e4328ecbaca8..f81911aafe22 100644 --- a/test/CodeGen/AMDGPU/no-shrink-extloads.ll +++ b/test/CodeGen/AMDGPU/no-shrink-extloads.ll @@ -189,3 +189,15 @@ define void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace store i8 %trunc, i8 addrspace(1)* %gep.out ret void } + +; FUNC-LABEL: {{^}}smrd_mask_i32_to_i16 +; SI: s_load_dword [[LOAD:s[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0x0 +; SI: s_waitcnt lgkmcnt(0) +; SI: s_and_b32 s{{[0-9]+}}, [[LOAD]], 0xffff +define void @smrd_mask_i32_to_i16(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { +entry: + %val = load i32, i32 addrspace(2)* %in + %mask = and i32 %val, 65535 + store i32 %mask, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/AMDGPU/opencl-image-metadata.ll b/test/CodeGen/AMDGPU/opencl-image-metadata.ll new file mode 100644 index 000000000000..bc467e47dc31 --- /dev/null +++ b/test/CodeGen/AMDGPU/opencl-image-metadata.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s + +; Make sure the OpenCL Image lowering pass doesn't crash when argument metadata +; is not in expected order. + +; EG: CF_END +; SI: s_endpgm +define void @kernel(i32 addrspace(1)* %out) { +entry: + store i32 0, i32 addrspace(1)* %out + ret void +} + +attributes #3 = { nounwind } + +!opencl.kernels = !{!0} + +!0 = !{void (i32 addrspace(1)*)* @kernel, !1, !2, !3, !4, !5} +!1 = !{!"kernel_arg_addr_space", i32 0} +!2 = !{!"kernel_arg_access_qual", !"none"} +!3 = !{!"kernel_arg_type", !"int*"} +!4 = !{!"kernel_arg_type_qual", !""} +!5 = !{!"kernel_arg_name", !""} diff --git a/test/CodeGen/AMDGPU/operand-folding.ll b/test/CodeGen/AMDGPU/operand-folding.ll index 816755efb07c..9e514ef9970a 100644 --- a/test/CodeGen/AMDGPU/operand-folding.ll +++ b/test/CodeGen/AMDGPU/operand-folding.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s ; CHECK-LABEL: {{^}}fold_sgpr: -; CHECK: v_add_i32_e32 v{{[0-9]+}}, s +; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s define void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) { entry: %tmp0 = icmp ne i32 %fold, 0 diff --git a/test/CodeGen/AMDGPU/or.ll b/test/CodeGen/AMDGPU/or.ll index 1c04090b407f..e40f18f040b7 100644 --- a/test/CodeGen/AMDGPU/or.ll +++ b/test/CodeGen/AMDGPU/or.ll @@ -153,7 +153,7 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { } ; FUNC-LABEL: {{^}}or_i1: -; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} +; EG: OR_INT * {{\** *}}T{{[0-9]+\.[XYZW], PS, PV\.[XYZW]}} ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}] define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { diff --git a/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll b/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll new file mode 100644 index 000000000000..51985af42a29 --- /dev/null +++ b/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs -verify-coalescing < %s + +; The original and requires materializing a 64-bit immediate for +; s_and_b64. This is split into 2 x v_and_i32, part of the immediate +; is folded through the reg_sequence into the v_and_i32 operand, and +; only half of the result is ever used. +; +; During live interval construction, the first sub register def is +; incorrectly marked as dead. + +declare i32 @llvm.r600.read.tidig.x() #1 + +define void @dead_def_subregister(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid + %val = load i64, i64 addrspace(1)* %in.gep + + %lshr = shl i64 %val, 24 + %and1 = and i64 %lshr, 2190433320969 ; (255 << 33) | 9 + %vec = bitcast i64 %and1 to <2 x i32> + %elt1 = extractelement <2 x i32> %vec, i32 1 + + store i32 %elt1, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/private-memory.ll b/test/CodeGen/AMDGPU/private-memory.ll index 645dc04f4420..79778eebd802 100644 --- a/test/CodeGen/AMDGPU/private-memory.ll +++ b/test/CodeGen/AMDGPU/private-memory.ll @@ -1,6 +1,8 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC +; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC +; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC @@ -13,11 +15,21 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; R600: LDS_READ ; R600: LDS_READ +; HSA-PROMOTE: .amd_kernel_code_t +; HSA-PROMOTE: workgroup_group_segment_byte_size = 5120 +; HSA-PROMOTE: .end_amd_kernel_code_t + ; SI-PROMOTE: ds_write_b32 ; SI-PROMOTE: ds_write_b32 ; SI-PROMOTE: ds_read_b32 ; SI-PROMOTE: ds_read_b32 +; HSA-ALLOCA: .amd_kernel_code_t +; FIXME: Creating the emergency stack slots causes us to over-estimate scratch +; by 4 bytes. +; HSA-ALLOCA: workitem_private_segment_byte_size = 24 +; HSA-ALLOCA: .end_amd_kernel_code_t + ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0 ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { diff --git a/test/CodeGen/AMDGPU/register-count-comments.ll b/test/CodeGen/AMDGPU/register-count-comments.ll index de6bfb310883..4bb315049be4 100644 --- a/test/CodeGen/AMDGPU/register-count-comments.ll +++ b/test/CodeGen/AMDGPU/register-count-comments.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs -asm-verbose -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.SI.tid() nounwind readnone diff --git a/test/CodeGen/AMDGPU/reorder-stores.ll b/test/CodeGen/AMDGPU/reorder-stores.ll index 187650ff9a53..d5e10d0be883 100644 --- a/test/CodeGen/AMDGPU/reorder-stores.ll +++ b/test/CodeGen/AMDGPU/reorder-stores.ll @@ -2,14 +2,10 @@ ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: {{^}}no_reorder_v2f64_global_load_store: -; SI: buffer_load_dwordx2 -; SI: buffer_load_dwordx2 -; SI: buffer_load_dwordx2 -; SI: buffer_load_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 -; SI: buffer_store_dwordx2 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 ; SI: s_endpgm define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind { %tmp1 = load <2 x double>, <2 x double> addrspace(1)* %x, align 16 @@ -34,46 +30,16 @@ define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace } ; SI-LABEL: {{^}}no_reorder_split_v8i32_global_load_store: -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword - -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword - -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword - -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword -; SI: buffer_load_dword +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 +; SI: buffer_load_dwordx4 -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword - -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword - -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword - -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 ; SI: s_endpgm define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind { %tmp1 = load <8 x i32>, <8 x i32> addrspace(1)* %x, align 32 diff --git a/test/CodeGen/AMDGPU/s_movk_i32.ll b/test/CodeGen/AMDGPU/s_movk_i32.ll index 6b1a36c979c2..47c7fbb6dd6a 100644 --- a/test/CodeGen/AMDGPU/s_movk_i32.ll +++ b/test/CodeGen/AMDGPU/s_movk_i32.ll @@ -3,10 +3,9 @@ ; SI-LABEL: {{^}}s_movk_i32_k0: ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff{{$}} -; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}} ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] -; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]] ; SI: s_endpgm define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { %loada = load i64, i64 addrspace(1)* %a, align 4 @@ -17,10 +16,9 @@ define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add ; SI-LABEL: {{^}}s_movk_i32_k1: ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}} -; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}} ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] -; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]] ; SI: s_endpgm define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { %loada = load i64, i64 addrspace(1)* %a, align 4 @@ -31,10 +29,9 @@ define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add ; SI-LABEL: {{^}}s_movk_i32_k2: ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}} -; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 64{{$}} ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] -; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 64, v[[HI_VREG]] ; SI: s_endpgm define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { %loada = load i64, i64 addrspace(1)* %a, align 4 @@ -45,10 +42,9 @@ define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add ; SI-LABEL: {{^}}s_movk_i32_k3: ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}} -; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}} ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] -; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]] ; SI: s_endpgm define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { %loada = load i64, i64 addrspace(1)* %a, align 4 @@ -59,10 +55,9 @@ define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add ; SI-LABEL: {{^}}s_movk_i32_k4: ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x20000{{$}} -; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}} ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] -; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]] ; SI: s_endpgm define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { %loada = load i64, i64 addrspace(1)* %a, align 4 @@ -87,10 +82,9 @@ define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add ; SI-LABEL: {{^}}s_movk_i32_k6: ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x41{{$}} -; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 63{{$}} ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] -; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 63, v[[HI_VREG]] ; SI: s_endpgm define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { %loada = load i64, i64 addrspace(1)* %a, align 4 diff --git a/test/CodeGen/AMDGPU/salu-to-valu.ll b/test/CodeGen/AMDGPU/salu-to-valu.ll index 0b9649576545..a30c25e700ab 100644 --- a/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -1,4 +1,8 @@ -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s + +declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.r600.read.tidig.y() #0 ; In this test both the pointer and the offset operands to the ; BUFFER_LOAD instructions end up being stored in vgprs. This @@ -7,94 +11,267 @@ ; sgpr register pair and use that for the pointer operand ; (low 64-bits of srsrc). -; CHECK-LABEL: {{^}}mubuf: +; GCN-LABEL: {{^}}mubuf: ; Make sure we aren't using VGPRs for the source operand of s_mov_b64 -; CHECK-NOT: s_mov_b64 s[{{[0-9]+:[0-9]+}}], v +; GCN-NOT: s_mov_b64 s[{{[0-9]+:[0-9]+}}], v ; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_* ; instructions -; CHECK: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 -; CHECK: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 -define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { +; GCN: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 +; GCN: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 + +define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { entry: - %0 = call i32 @llvm.r600.read.tidig.x() #1 - %1 = call i32 @llvm.r600.read.tidig.y() #1 - %2 = sext i32 %0 to i64 - %3 = sext i32 %1 to i64 + %tmp = call i32 @llvm.r600.read.tidig.x() + %tmp1 = call i32 @llvm.r600.read.tidig.y() + %tmp2 = sext i32 %tmp to i64 + %tmp3 = sext i32 %tmp1 to i64 br label %loop -loop: - %4 = phi i64 [0, %entry], [%5, %loop] - %5 = add i64 %2, %4 - %6 = getelementptr i8, i8 addrspace(1)* %in, i64 %5 - %7 = load i8, i8 addrspace(1)* %6, align 1 - %8 = or i64 %5, 1 - %9 = getelementptr i8, i8 addrspace(1)* %in, i64 %8 - %10 = load i8, i8 addrspace(1)* %9, align 1 - %11 = add i8 %7, %10 - %12 = sext i8 %11 to i32 - store i32 %12, i32 addrspace(1)* %out - %13 = icmp slt i64 %5, 10 - br i1 %13, label %loop, label %done +loop: ; preds = %loop, %entry + %tmp4 = phi i64 [ 0, %entry ], [ %tmp5, %loop ] + %tmp5 = add i64 %tmp2, %tmp4 + %tmp6 = getelementptr i8, i8 addrspace(1)* %in, i64 %tmp5 + %tmp7 = load i8, i8 addrspace(1)* %tmp6, align 1 + %tmp8 = or i64 %tmp5, 1 + %tmp9 = getelementptr i8, i8 addrspace(1)* %in, i64 %tmp8 + %tmp10 = load i8, i8 addrspace(1)* %tmp9, align 1 + %tmp11 = add i8 %tmp7, %tmp10 + %tmp12 = sext i8 %tmp11 to i32 + store i32 %tmp12, i32 addrspace(1)* %out + %tmp13 = icmp slt i64 %tmp5, 10 + br i1 %tmp13, label %loop, label %done -done: +done: ; preds = %loop ret void } -declare i32 @llvm.r600.read.tidig.x() #1 -declare i32 @llvm.r600.read.tidig.y() #1 - -attributes #1 = { nounwind readnone } - ; Test moving an SMRD instruction to the VALU -; CHECK-LABEL: {{^}}smrd_valu: -; CHECK: buffer_load_dword [[OUT:v[0-9]+]] -; CHECK: buffer_store_dword [[OUT]] - -define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 addrspace(1)* %out) { +; GCN-LABEL: {{^}}smrd_valu: +; GCN: buffer_load_dword [[OUT:v[0-9]+]] +; GCN: buffer_store_dword [[OUT]] +define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 { entry: - %0 = icmp ne i32 %a, 0 - br i1 %0, label %if, label %else + %tmp = icmp ne i32 %a, 0 + br i1 %tmp, label %if, label %else -if: - %1 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in +if: ; preds = %entry + %tmp1 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in br label %endif -else: - %2 = getelementptr i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in - %3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %2 +else: ; preds = %entry + %tmp2 = getelementptr i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in + %tmp3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %tmp2 br label %endif -endif: - %4 = phi i32 addrspace(2)* [%1, %if], [%3, %else] - %5 = getelementptr i32, i32 addrspace(2)* %4, i32 3000 - %6 = load i32, i32 addrspace(2)* %5 - store i32 %6, i32 addrspace(1)* %out +endif: ; preds = %else, %if + %tmp4 = phi i32 addrspace(2)* [ %tmp1, %if ], [ %tmp3, %else ] + %tmp5 = getelementptr i32, i32 addrspace(2)* %tmp4, i32 3000 + %tmp6 = load i32, i32 addrspace(2)* %tmp5 + store i32 %tmp6, i32 addrspace(1)* %out ret void } -; Test moving ann SMRD with an immediate offset to the VALU +; Test moving an SMRD with an immediate offset to the VALU -; CHECK-LABEL: {{^}}smrd_valu2: -; CHECK: buffer_load_dword -define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) { +; GCN-LABEL: {{^}}smrd_valu2: +; GCN-NOT: v_add +; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}} +define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 { entry: - %0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone - %1 = add i32 %0, 4 - %2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %0, i32 4 - %3 = load i32, i32 addrspace(2)* %2 - store i32 %3, i32 addrspace(1)* %out + %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp1 = add i32 %tmp, 4 + %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4 + %tmp3 = load i32, i32 addrspace(2)* %tmp2 + store i32 %tmp3, i32 addrspace(1)* %out ret void } -; CHECK-LABEL: {{^}}s_load_imm_v8i32: -; CHECK: buffer_load_dwordx4 -; CHECK: buffer_load_dwordx4 -define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) { +; Use a big offset that will use the SMRD literal offset on CI +; GCN-LABEL: {{^}}smrd_valu_ci_offset: +; GCN-NOT: v_add +; GCN: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4e20{{$}} +; GCN-NOT: v_add +; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} +; GCN: v_add_i32_e32 +; GCN: buffer_store_dword +define void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 { entry: - %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1 + %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp2 = getelementptr i32, i32 addrspace(2)* %in, i32 %tmp + %tmp3 = getelementptr i32, i32 addrspace(2)* %tmp2, i32 5000 + %tmp4 = load i32, i32 addrspace(2)* %tmp3 + %tmp5 = add i32 %tmp4, %c + store i32 %tmp5, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}smrd_valu_ci_offset_x2: +; GCN-NOT: v_add +; GCN: s_mov_b32 [[OFFSET:s[0-9]+]], 0x9c40{{$}} +; GCN-NOT: v_add +; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: buffer_store_dwordx2 +define void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 { +entry: + %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp2 = getelementptr i64, i64 addrspace(2)* %in, i32 %tmp + %tmp3 = getelementptr i64, i64 addrspace(2)* %tmp2, i32 5000 + %tmp4 = load i64, i64 addrspace(2)* %tmp3 + %tmp5 = or i64 %tmp4, %c + store i64 %tmp5, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}smrd_valu_ci_offset_x4: +; GCN-NOT: v_add +; GCN: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4d20{{$}} +; GCN-NOT: v_add +; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: buffer_store_dwordx4 +define void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 { +entry: + %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %in, i32 %tmp + %tmp3 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %tmp2, i32 1234 + %tmp4 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp3 + %tmp5 = or <4 x i32> %tmp4, %c + store <4 x i32> %tmp5, <4 x i32> addrspace(1)* %out + ret void +} + +; Original scalar load uses SGPR offset on SI and 32-bit literal on +; CI. + +; GCN-LABEL: {{^}}smrd_valu_ci_offset_x8: +; GCN-NOT: v_add +; GCN: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}} +; GCN-NOT: v_add +; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} +; GCN-NOT: v_add +; GCN: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}} +; GCN-NOT: v_add +; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} + +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 +define void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 { +entry: + %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %in, i32 %tmp + %tmp3 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %tmp2, i32 1234 + %tmp4 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp3 + %tmp5 = or <8 x i32> %tmp4, %c + store <8 x i32> %tmp5, <8 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16: + +; GCN-NOT: v_add +; GCN: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}} +; GCN-NOT: v_add +; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} +; GCN-NOT: v_add +; GCN: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}} +; GCN-NOT: v_add +; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} +; GCN-NOT: v_add +; GCN: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}} +; GCN-NOT: v_add +; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}} +; GCN-NOT: v_add +; GCN: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}} +; GCN-NOT: v_add +; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}} + +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 + +; GCN: s_endpgm +define void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 { +entry: + %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %in, i32 %tmp + %tmp3 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %tmp2, i32 1234 + %tmp4 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp3 + %tmp5 = or <16 x i32> %tmp4, %c + store <16 x i32> %tmp5, <16 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}smrd_valu2_salu_user: +; GCN: buffer_load_dword [[MOVED:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]] +; GCN: buffer_store_dword [[ADD]] +define void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 { +entry: + %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp1 = add i32 %tmp, 4 + %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4 + %tmp3 = load i32, i32 addrspace(2)* %tmp2 + %tmp4 = add i32 %tmp3, %a + store i32 %tmp4, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset: +; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}} +define void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { +entry: + %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp1 = add i32 %tmp, 4 + %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 255 + %tmp3 = load i32, i32 addrspace(2)* %tmp2 + store i32 %tmp3, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}smrd_valu2_mubuf_offset: +; GCN-NOT: v_add +; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}} +define void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { +entry: + %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp1 = add i32 %tmp, 4 + %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 256 + %tmp3 = load i32, i32 addrspace(2)* %tmp2 + store i32 %tmp3, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}s_load_imm_v8i32: +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { +entry: + %tmp0 = tail call i32 @llvm.r600.read.tidig.x() %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)* %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4 @@ -102,12 +279,51 @@ entry: ret void } -; CHECK-LABEL: {{^}}s_load_imm_v16i32: -; CHECK: buffer_load_dwordx4 -; CHECK: buffer_load_dwordx4 -; CHECK: buffer_load_dwordx4 -; CHECK: buffer_load_dwordx4 -define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) { +; GCN-LABEL: {{^}}s_load_imm_v8i32_salu_user: +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: buffer_store_dword +define void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { +entry: + %tmp0 = tail call i32 @llvm.r600.read.tidig.x() + %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 + %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)* + %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4 + + %elt0 = extractelement <8 x i32> %tmp3, i32 0 + %elt1 = extractelement <8 x i32> %tmp3, i32 1 + %elt2 = extractelement <8 x i32> %tmp3, i32 2 + %elt3 = extractelement <8 x i32> %tmp3, i32 3 + %elt4 = extractelement <8 x i32> %tmp3, i32 4 + %elt5 = extractelement <8 x i32> %tmp3, i32 5 + %elt6 = extractelement <8 x i32> %tmp3, i32 6 + %elt7 = extractelement <8 x i32> %tmp3, i32 7 + + %add0 = add i32 %elt0, %elt1 + %add1 = add i32 %add0, %elt2 + %add2 = add i32 %add1, %elt3 + %add3 = add i32 %add2, %elt4 + %add4 = add i32 %add3, %elt5 + %add5 = add i32 %add4, %elt6 + %add6 = add i32 %add5, %elt7 + + store i32 %add6, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}s_load_imm_v16i32: +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1 %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 @@ -116,3 +332,71 @@ entry: store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32 ret void } + +; GCN-LABEL: {{^}}s_load_imm_v16i32_salu_user: +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: buffer_store_dword +define void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { +entry: + %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1 + %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 + %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)* + %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4 + + %elt0 = extractelement <16 x i32> %tmp3, i32 0 + %elt1 = extractelement <16 x i32> %tmp3, i32 1 + %elt2 = extractelement <16 x i32> %tmp3, i32 2 + %elt3 = extractelement <16 x i32> %tmp3, i32 3 + %elt4 = extractelement <16 x i32> %tmp3, i32 4 + %elt5 = extractelement <16 x i32> %tmp3, i32 5 + %elt6 = extractelement <16 x i32> %tmp3, i32 6 + %elt7 = extractelement <16 x i32> %tmp3, i32 7 + %elt8 = extractelement <16 x i32> %tmp3, i32 8 + %elt9 = extractelement <16 x i32> %tmp3, i32 9 + %elt10 = extractelement <16 x i32> %tmp3, i32 10 + %elt11 = extractelement <16 x i32> %tmp3, i32 11 + %elt12 = extractelement <16 x i32> %tmp3, i32 12 + %elt13 = extractelement <16 x i32> %tmp3, i32 13 + %elt14 = extractelement <16 x i32> %tmp3, i32 14 + %elt15 = extractelement <16 x i32> %tmp3, i32 15 + + %add0 = add i32 %elt0, %elt1 + %add1 = add i32 %add0, %elt2 + %add2 = add i32 %add1, %elt3 + %add3 = add i32 %add2, %elt4 + %add4 = add i32 %add3, %elt5 + %add5 = add i32 %add4, %elt6 + %add6 = add i32 %add5, %elt7 + %add7 = add i32 %add6, %elt8 + %add8 = add i32 %add7, %elt9 + %add9 = add i32 %add8, %elt10 + %add10 = add i32 %add9, %elt11 + %add11 = add i32 %add10, %elt12 + %add12 = add i32 %add11, %elt13 + %add13 = add i32 %add12, %elt14 + %add14 = add i32 %add13, %elt15 + + store i32 %add14, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/test/CodeGen/AMDGPU/sampler-resource-id.ll b/test/CodeGen/AMDGPU/sampler-resource-id.ll new file mode 100644 index 000000000000..c41d345369bf --- /dev/null +++ b/test/CodeGen/AMDGPU/sampler-resource-id.ll @@ -0,0 +1,65 @@ +; RUN: llc -march=r600 -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}test_0: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 0( +define void @test_0(i32 %in0, i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in0) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_1: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 1( +define void @test_1(i32 %in0, i32 %in1, i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in1) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}test_2: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV [[VAL]], literal.x +; EG-NEXT: LSHR +; EG-NEXT: 2( +define void @test_2(i32 %in0, i32 %in1, i32 %in2, i32 addrspace(1)* %out) { +entry: + %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in2) #0 + store i32 %0, i32 addrspace(1)* %out + ret void +} + + +declare i32 @llvm.OpenCL.sampler.get.resource.id(i32) #0 + +attributes #0 = { readnone } + +!opencl.kernels = !{!0, !1, !2} + +!0 = !{void (i32, i32 addrspace(1)*)* @test_0, !10, !20, !30, !40, !50} +!10 = !{!"kernel_arg_addr_space", i32 0, i32 1} +!20 = !{!"kernel_arg_access_qual", !"none", !"none"} +!30 = !{!"kernel_arg_type", !"sampler_t", !"int*"} +!40 = !{!"kernel_arg_base_type", !"sampler_t", !"int*"} +!50 = !{!"kernel_arg_type_qual", !"", !""} + +!1 = !{void (i32, i32, i32 addrspace(1)*)* @test_1, !11, !21, !31, !41, !51} +!11 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 1} +!21 = !{!"kernel_arg_access_qual", !"none", !"none", !"none"} +!31 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"int*"} +!41 = !{!"kernel_arg_base_type", !"sampler_t", !"sampler_t", !"int*"} +!51 = !{!"kernel_arg_type_qual", !"", !"", !""} + +!2 = !{void (i32, i32, i32, i32 addrspace(1)*)* @test_2, !12, !22, !32, !42, !52} +!12 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 0, i32 1} +!22 = !{!"kernel_arg_access_qual", !"none", !"none", !"none", !"none"} +!32 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"sampler_t", !"int*"} +!42 = !{!"kernel_arg_base_type", !"sampler_t", !"sampler_t", !"sampler_t", !"int*"} +!52 = !{!"kernel_arg_type_qual", !"", !"", !"", !""} diff --git a/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll b/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll index 3863afda5dd3..e4b16c0a165f 100644 --- a/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll +++ b/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll @@ -3,7 +3,7 @@ ; RUN: llc -O0 -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI ; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI -declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate +declare void @llvm.AMDGPU.barrier.local() nounwind convergent ; SI-LABEL: {{^}}main( diff --git a/test/CodeGen/AMDGPU/scratch-buffer.ll b/test/CodeGen/AMDGPU/scratch-buffer.ll index 268869daaa32..d43de4766057 100644 --- a/test/CodeGen/AMDGPU/scratch-buffer.ll +++ b/test/CodeGen/AMDGPU/scratch-buffer.ll @@ -51,7 +51,7 @@ done: ; GCN-LABEL: {{^}}legal_offset_fi_offset ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen -; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000 +; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, 0x8000 ; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) { diff --git a/test/CodeGen/AMDGPU/select64.ll b/test/CodeGen/AMDGPU/select64.ll index 13fb575b2b15..a68fdecb00af 100644 --- a/test/CodeGen/AMDGPU/select64.ll +++ b/test/CodeGen/AMDGPU/select64.ll @@ -51,12 +51,8 @@ define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspa } ; CHECK-LABEL: {{^}}v_select_i64_split_imm: -; CHECK: s_mov_b32 [[SHI:s[0-9]+]], 63 -; CHECK: s_mov_b32 [[SLO:s[0-9]+]], 0 -; CHECK-DAG: v_mov_b32_e32 [[VHI:v[0-9]+]], [[SHI]] -; CHECK-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], [[SLO]] -; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VLO]], {{v[0-9]+}} -; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VHI]], {{v[0-9]+}} +; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}} +; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 63, {{v[0-9]+}} ; CHECK: s_endpgm define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { %cmp = icmp ugt i32 %cond, 5 diff --git a/test/CodeGen/AMDGPU/set-dx10.ll b/test/CodeGen/AMDGPU/set-dx10.ll index 53694dcffa66..57365a6e1fc3 100644 --- a/test/CodeGen/AMDGPU/set-dx10.ll +++ b/test/CodeGen/AMDGPU/set-dx10.ll @@ -5,8 +5,8 @@ ; SET*DX10 instructions. ; CHECK: {{^}}fcmp_une_select_fptosi: -; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -19,8 +19,8 @@ entry: } ; CHECK: {{^}}fcmp_une_select_i32: -; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -31,8 +31,8 @@ entry: } ; CHECK: {{^}}fcmp_oeq_select_fptosi: -; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -45,8 +45,8 @@ entry: } ; CHECK: {{^}}fcmp_oeq_select_i32: -; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -57,8 +57,8 @@ entry: } ; CHECK: {{^}}fcmp_ogt_select_fptosi: -; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -71,8 +71,8 @@ entry: } ; CHECK: {{^}}fcmp_ogt_select_i32: -; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -83,8 +83,8 @@ entry: } ; CHECK: {{^}}fcmp_oge_select_fptosi: -; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -97,8 +97,8 @@ entry: } ; CHECK: {{^}}fcmp_oge_select_i32: -; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -109,8 +109,8 @@ entry: } ; CHECK: {{^}}fcmp_ole_select_fptosi: -; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -123,8 +123,8 @@ entry: } ; CHECK: {{^}}fcmp_ole_select_i32: -; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -135,8 +135,8 @@ entry: } ; CHECK: {{^}}fcmp_olt_select_fptosi: -; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -149,8 +149,8 @@ entry: } ; CHECK: {{^}}fcmp_olt_select_i32: -; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) { entry: diff --git a/test/CodeGen/AMDGPU/setcc-opt.ll b/test/CodeGen/AMDGPU/setcc-opt.ll index 4e6a10d6b78d..63d74820f961 100644 --- a/test/CodeGen/AMDGPU/setcc-opt.ll +++ b/test/CodeGen/AMDGPU/setcc-opt.ll @@ -142,11 +142,14 @@ define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind } ; FUNC-LABEL: {{^}}cmp_zext_k_i8max: -; GCN: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 -; GCN: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}} -; GCN: v_cmp_ne_i32_e32 vcc, [[K255]], [[B]] +; SI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; VI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; GCN: s_movk_i32 [[K255:s[0-9]+]], 0xff +; GCN: s_and_b32 [[B:s[0-9]+]], [[VALUE]], [[K255]] +; GCN: v_mov_b32_e32 [[VK255:v[0-9]+]], [[K255]] +; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[VK255]] ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN: buffer_store_byte [[RESULT]] ; GCN: s_endpgm define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind { %b.ext = zext i8 %b to i32 @@ -187,11 +190,14 @@ define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) n ; Should do a buffer_load_sbyte and compare with -1 ; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg: -; GCN-DAG: buffer_load_ubyte [[B:v[0-9]+]] -; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}} -; GCN: v_cmp_ne_i32_e32 vcc, [[K]], [[B]]{{$}} +; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb +; VI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; GCN: s_movk_i32 [[K:s[0-9]+]], 0xff +; GCN: s_and_b32 [[B:s[0-9]+]], [[VAL]], [[K]] +; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]] +; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[VK]]{{$}} ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN: buffer_store_byte [[RESULT]] ; GCN: s_endpgm define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind { %b.ext = sext i8 %b to i32 diff --git a/test/CodeGen/AMDGPU/sext-in-reg.ll b/test/CodeGen/AMDGPU/sext-in-reg.ll index 5aedda2ce1a9..23ae3b967971 100644 --- a/test/CodeGen/AMDGPU/sext-in-reg.ll +++ b/test/CodeGen/AMDGPU/sext-in-reg.ll @@ -12,8 +12,8 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; SI: buffer_store_dword [[EXTRACT]], ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] -; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1 -; EG-NEXT: LSHR * [[ADDR]] +; EG: LSHR * [[ADDR]] +; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1 define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) { %shl = shl i32 %in, 31 %sext = ashr i32 %shl, 31 @@ -609,3 +609,53 @@ define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void } + +; Make sure we propagate the VALUness to users of a moved scalar BFE. + +; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64_move_use: +; SI: buffer_load_dwordx2 +; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} +; SI-DAG: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1 +; SI-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] +; SI-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]] +; SI-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[HI]] +; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} +define void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() + %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid + %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid + %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid + %a = load i64, i64 addrspace(1)* %a.gep, align 8 + %b = load i64, i64 addrspace(1)* %b.gep, align 8 + + %c = shl i64 %a, %b + %shl = shl i64 %c, 63 + %ashr = ashr i64 %shl, 63 + + %and = and i64 %ashr, %s.val + store i64 %and, i64 addrspace(1)* %out.gep, align 8 + ret void +} + +; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64_move_use: +; SI: buffer_load_dwordx2 +; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, +; SI-DAG: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]] +; SI-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]] +; SI-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[SHR]] +; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} +define void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() + %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid + %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid + %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid + %a = load i64, i64 addrspace(1)* %a.gep, align 8 + %b = load i64, i64 addrspace(1)* %b.gep, align 8 + + %c = shl i64 %a, %b + %shl = shl i64 %c, 32 + %ashr = ashr i64 %shl, 32 + %and = and i64 %ashr, %s.val + store i64 %and, i64 addrspace(1)* %out.gep, align 8 + ret void +} diff --git a/test/CodeGen/AMDGPU/shl.ll b/test/CodeGen/AMDGPU/shl.ll index 6f81a39ed96a..55db80731c90 100644 --- a/test/CodeGen/AMDGPU/shl.ll +++ b/test/CodeGen/AMDGPU/shl.ll @@ -53,14 +53,14 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in ret void } -;EG: {{^}}shl_i64: +;EG-LABEL: {{^}}shl_i64: ;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] ;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} -;EG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 -;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-DAG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 ;EG-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]] -;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} -;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]}} +;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}} +;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]|PV.[XYZW]}} ;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal ;EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} ;EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0 @@ -80,7 +80,7 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { ret void } -;EG: {{^}}shl_v2i64: +;EG-LABEL: {{^}}shl_v2i64: ;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] ;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] ;EG-DAG: LSHR {{\*? *}}[[COMPSHA]] @@ -185,8 +185,7 @@ define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in ; Make sure load width gets reduced to i32 load. ; GCN-LABEL: {{^}}s_shl_32_i64: ; GCN-DAG: s_load_dword [[LO_A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}} -; GCN-DAG: s_mov_b32 s[[SLO:[0-9]+]], 0{{$}} -; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] +; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[LO_A]] ; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} define void @s_shl_32_i64(i64 addrspace(1)* %out, i64 %a) { diff --git a/test/CodeGen/AMDGPU/shl_add_constant.ll b/test/CodeGen/AMDGPU/shl_add_constant.ll index b1485bfaaebb..dfb2bf3383fc 100644 --- a/test/CodeGen/AMDGPU/shl_add_constant.ll +++ b/test/CodeGen/AMDGPU/shl_add_constant.ll @@ -6,7 +6,7 @@ declare i32 @llvm.r600.read.tidig.x() #1 ; FUNC-LABEL: {{^}}shl_2_add_9_i32: ; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}} -; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 36, [[REG]] +; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 36, [[REG]] ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { @@ -20,7 +20,7 @@ define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { } ; FUNC-LABEL: {{^}}shl_2_add_9_i32_2_add_uses: -; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], 9, {{v[0-9]+}} +; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], vcc, 9, {{v[0-9]+}} ; SI-DAG: v_lshlrev_b32_e32 [[SHLREG:v[0-9]+]], 2, {{v[0-9]+}} ; SI-DAG: buffer_store_dword [[ADDREG]] ; SI-DAG: buffer_store_dword [[SHLREG]] @@ -40,7 +40,7 @@ define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1 ; FUNC-LABEL: {{^}}shl_2_add_999_i32: ; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}} -; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 0xf9c, [[REG]] +; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xf9c, [[REG]] ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { diff --git a/test/CodeGen/AMDGPU/shl_add_ptr.ll b/test/CodeGen/AMDGPU/shl_add_ptr.ll index 6671e909cd1d..ac94824bd61f 100644 --- a/test/CodeGen/AMDGPU/shl_add_ptr.ll +++ b/test/CodeGen/AMDGPU/shl_add_ptr.ll @@ -35,7 +35,7 @@ define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %ad ; SI-LABEL: {{^}}load_shl_base_lds_1: ; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} ; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 -; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}} +; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}} ; SI-DAG: buffer_store_dword [[RESULT]] ; SI-DAG: buffer_store_dword [[ADDUSE]] ; SI: s_endpgm diff --git a/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll b/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll new file mode 100644 index 000000000000..27a8e70aae13 --- /dev/null +++ b/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll @@ -0,0 +1,16 @@ +; RUN: llc -o /dev/null %s -march=amdgcn -mcpu=verde -verify-machineinstrs -stop-after expand-isel-pseudos 2>&1 | FileCheck %s +; This test verifies that the instruction selection will add the implicit +; register operands in the correct order when modifying the opcode of an +; instruction to V_ADD_I32_e32. + +; CHECK: %{{[0-9]+}} = V_ADD_I32_e32 %{{[0-9]+}}, %{{[0-9]+}}, implicit-def %vcc, implicit %exec + +define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +entry: + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %a = load i32, i32 addrspace(1)* %in + %b = load i32, i32 addrspace(1)* %b_ptr + %result = add i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/AMDGPU/si-literal-folding.ll b/test/CodeGen/AMDGPU/si-literal-folding.ll new file mode 100644 index 000000000000..901b3c3453fc --- /dev/null +++ b/test/CodeGen/AMDGPU/si-literal-folding.ll @@ -0,0 +1,17 @@ +; XFAIL: * +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: {{^}}main: +; CHECK-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0xbf4353f8 + +define void @main(float) #0 { +main_body: + %1 = fmul float %0, 0x3FE86A7F00000000 + %2 = fmul float %0, 0xBFE86A7F00000000 + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %1, float %1, float %2, float %2) + ret void +} + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="1" } diff --git a/test/CodeGen/AMDGPU/si-sgpr-spill.ll b/test/CodeGen/AMDGPU/si-sgpr-spill.ll index 84652701f773..d7b35fc631eb 100644 --- a/test/CodeGen/AMDGPU/si-sgpr-spill.ll +++ b/test/CodeGen/AMDGPU/si-sgpr-spill.ll @@ -6,6 +6,16 @@ ; CHECK-LABEL: {{^}}main: ; CHECK: s_wqm + +; Make sure not emitting unused scratch resource descriptor setup +; CHECK-NOT: s_mov_b32 +; CHECK-NOT: s_mov_b32 +; CHECK-NOT: s_mov_b32 +; CHECK-NOT: s_mov_b32 + +; CHECK: s_mov_b32 m0 + + ; Writing to M0 from an SMRD instruction will hang the GPU. ; CHECK-NOT: s_buffer_load_dword m0 ; CHECK: s_endpgm diff --git a/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll index 5a6129aaa3fa..bc766dbcac67 100644 --- a/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ b/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -155,9 +155,9 @@ define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out, } ; FUNC-LABEL: @reorder_local_offsets -; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12 ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400 ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404 +; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12 ; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400 ; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404 ; CI: buffer_store_dword @@ -181,9 +181,10 @@ define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspa } ; FUNC-LABEL: @reorder_global_offsets -; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12 ; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 ; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404 +; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12 +; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12 ; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 ; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404 ; CI: buffer_store_dword @@ -233,4 +234,4 @@ define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrsp attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } attributes #1 = { "ShaderType"="1" nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } -attributes #2 = { nounwind noduplicate } +attributes #2 = { nounwind convergent } diff --git a/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/test/CodeGen/AMDGPU/sint_to_fp.f64.ll index 0db7cdc171b5..a94ccc32e61c 100644 --- a/test/CodeGen/AMDGPU/sint_to_fp.f64.ll +++ b/test/CodeGen/AMDGPU/sint_to_fp.f64.ll @@ -46,9 +46,9 @@ define void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) { ; SI-LABEL: @v_sint_to_fp_i64_to_f64 ; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; SI: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]] -; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32 -; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]] +; SI-DAG: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]] +; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]] +; SI-DAG: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32 ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]] ; SI: buffer_store_dwordx2 [[RESULT]] define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) { diff --git a/test/CodeGen/AMDGPU/sminmax.ll b/test/CodeGen/AMDGPU/sminmax.ll new file mode 100644 index 000000000000..e646605f7da1 --- /dev/null +++ b/test/CodeGen/AMDGPU/sminmax.ll @@ -0,0 +1,130 @@ +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}s_abs_i32: +; GCN: s_abs_i32 +; GCN: s_add_i32 +define void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind { + %neg = sub i32 0, %val + %cond = icmp sgt i32 %val, %neg + %res = select i1 %cond, i32 %val, i32 %neg + %res2 = add i32 %res, 2 + store i32 %res2, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_abs_i32: +; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] +; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]] +; GCN: v_add_i32 +define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { + %val = load i32, i32 addrspace(1)* %src, align 4 + %neg = sub i32 0, %val + %cond = icmp sgt i32 %val, %neg + %res = select i1 %cond, i32 %val, i32 %neg + %res2 = add i32 %res, 2 + store i32 %res2, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_abs_v2i32: +; GCN: s_abs_i32 +; GCN: s_abs_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +define void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %val) nounwind { + %z0 = insertelement <2 x i32> undef, i32 0, i32 0 + %z1 = insertelement <2 x i32> %z0, i32 0, i32 1 + %t0 = insertelement <2 x i32> undef, i32 2, i32 0 + %t1 = insertelement <2 x i32> %t0, i32 2, i32 1 + %neg = sub <2 x i32> %z1, %val + %cond = icmp sgt <2 x i32> %val, %neg + %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg + %res2 = add <2 x i32> %res, %t1 + store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_abs_v2i32: +; GCN: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] +; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] + +; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]] +; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]] + +; GCN: v_add_i32 +; GCN: v_add_i32 +define void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %src) nounwind { + %z0 = insertelement <2 x i32> undef, i32 0, i32 0 + %z1 = insertelement <2 x i32> %z0, i32 0, i32 1 + %t0 = insertelement <2 x i32> undef, i32 2, i32 0 + %t1 = insertelement <2 x i32> %t0, i32 2, i32 1 + %val = load <2 x i32>, <2 x i32> addrspace(1)* %src, align 4 + %neg = sub <2 x i32> %z1, %val + %cond = icmp sgt <2 x i32> %val, %neg + %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg + %res2 = add <2 x i32> %res, %t1 + store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_abs_v4i32: +; TODO: this should use s_abs_i32 +; GCN: s_abs_i32 +; GCN: s_abs_i32 +; GCN: s_abs_i32 +; GCN: s_abs_i32 + +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +define void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %val) nounwind { + %z0 = insertelement <4 x i32> undef, i32 0, i32 0 + %z1 = insertelement <4 x i32> %z0, i32 0, i32 1 + %z2 = insertelement <4 x i32> %z1, i32 0, i32 2 + %z3 = insertelement <4 x i32> %z2, i32 0, i32 3 + %t0 = insertelement <4 x i32> undef, i32 2, i32 0 + %t1 = insertelement <4 x i32> %t0, i32 2, i32 1 + %t2 = insertelement <4 x i32> %t1, i32 2, i32 2 + %t3 = insertelement <4 x i32> %t2, i32 2, i32 3 + %neg = sub <4 x i32> %z3, %val + %cond = icmp sgt <4 x i32> %val, %neg + %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg + %res2 = add <4 x i32> %res, %t3 + store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_abs_v4i32: +; GCN: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] +; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] +; GCN: v_sub_i32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]] +; GCN: v_sub_i32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]] + +; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]] +; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]] +; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]] +; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]] + +; GCN: v_add_i32 +; GCN: v_add_i32 +; GCN: v_add_i32 +; GCN: v_add_i32 +define void @v_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %src) nounwind { + %z0 = insertelement <4 x i32> undef, i32 0, i32 0 + %z1 = insertelement <4 x i32> %z0, i32 0, i32 1 + %z2 = insertelement <4 x i32> %z1, i32 0, i32 2 + %z3 = insertelement <4 x i32> %z2, i32 0, i32 3 + %t0 = insertelement <4 x i32> undef, i32 2, i32 0 + %t1 = insertelement <4 x i32> %t0, i32 2, i32 1 + %t2 = insertelement <4 x i32> %t1, i32 2, i32 2 + %t3 = insertelement <4 x i32> %t2, i32 2, i32 3 + %val = load <4 x i32>, <4 x i32> addrspace(1)* %src, align 4 + %neg = sub <4 x i32> %z3, %val + %cond = icmp sgt <4 x i32> %val, %neg + %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg + %res2 = add <4 x i32> %res, %t3 + store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/AMDGPU/smrd.ll b/test/CodeGen/AMDGPU/smrd.ll index 0598208e1317..1d6bb9ece8c6 100644 --- a/test/CodeGen/AMDGPU/smrd.ll +++ b/test/CodeGen/AMDGPU/smrd.ll @@ -1,9 +1,10 @@ -; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=SIVI %s +; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=CI --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s ; SMRD load with an immediate offset. ; GCN-LABEL: {{^}}smrd0: -; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01 +; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01 ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { entry: @@ -15,7 +16,7 @@ entry: ; SMRD load with the largest possible immediate offset. ; GCN-LABEL: {{^}}smrd1: -; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff +; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}} ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { entry: @@ -29,6 +30,7 @@ entry: ; GCN-LABEL: {{^}}smrd2: ; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400 ; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] +; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100 ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400 ; GCN: s_endpgm define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { @@ -54,9 +56,37 @@ entry: ret void } +; SMRD load with the largest possible immediate offset on VI +; GCN-LABEL: {{^}}smrd4: +; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc +; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] +; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc +define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { +entry: + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143 + %1 = load i32, i32 addrspace(2)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; SMRD load with an offset greater than the largest possible immediate on VI +; GCN-LABEL: {{^}}smrd5: +; SIVI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000 +; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] +; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000 +; GCN: s_endpgm +define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { +entry: + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144 + %1 = load i32, i32 addrspace(2)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + ; SMRD load using the load.const intrinsic with an immediate offset ; GCN-LABEL: {{^}}smrd_load_const0: -; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04 +; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04 ; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10 define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: @@ -70,7 +100,7 @@ main_body: ; SMRD load using the load.const intrinsic with the largest possible immediate ; offset. ; GCN-LABEL: {{^}}smrd_load_const1: -; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff +; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff ; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: @@ -86,6 +116,7 @@ main_body: ; GCN-LABEL: {{^}}smrd_load_const2: ; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400 ; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] +; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100 ; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400 define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: @@ -96,6 +127,36 @@ main_body: ret void } +; SMRD load with the largest possible immediate offset on VI +; GCN-LABEL: {{^}}smrd_load_const3: +; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc +; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] +; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff +; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc +define void @smrd_load_const3(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { +main_body: + %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0 + %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20 + %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1048572) + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22) + ret void +} + +; SMRD load with an offset greater than the largest possible immediate on VI +; GCN-LABEL: {{^}}smrd_load_const4: +; SIVI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000 +; SIVI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] +; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000 +; GCN: s_endpgm +define void @smrd_load_const4(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { +main_body: + %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0 + %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20 + %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1048576) + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22) + ret void +} + ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 diff --git a/test/CodeGen/AMDGPU/split-scalar-i64-add.ll b/test/CodeGen/AMDGPU/split-scalar-i64-add.ll index 46409cdfae1c..9e181bc14d9d 100644 --- a/test/CodeGen/AMDGPU/split-scalar-i64-add.ll +++ b/test/CodeGen/AMDGPU/split-scalar-i64-add.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() readnone @@ -8,9 +8,22 @@ declare i32 @llvm.r600.read.tidig.x() readnone ; scc instead. ; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_0: -; SI: v_add_i32 -; SI: v_addc_u32 -define void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) { +; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, 0x18f, v{{[0-9]+}} +; SI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc +define void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %s.val) { + %v.val = load volatile i32, i32 addrspace(1)* %in + %vec.0 = insertelement <2 x i32> undef, i32 %s.val, i32 0 + %vec.1 = insertelement <2 x i32> %vec.0, i32 %v.val, i32 1 + %bc = bitcast <2 x i32> %vec.1 to i64 + %add = add i64 %bc, 399 + store i64 %add, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_imp_def_vcc_split_i64_add_0: +; SI: s_add_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x18f +; SI: s_addc_u32 {{s[0-9]+}}, 0xf423f, 0 +define void @s_imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) { %vec.0 = insertelement <2 x i32> undef, i32 %val, i32 0 %vec.1 = insertelement <2 x i32> %vec.0, i32 999999, i32 1 %bc = bitcast <2 x i32> %vec.1 to i64 @@ -22,7 +35,20 @@ define void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) { ; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_1: ; SI: v_add_i32 ; SI: v_addc_u32 -define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64 %val1) { +define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) { + %v.val = load volatile i32, i32 addrspace(1)* %in + %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0 + %vec.1 = insertelement <2 x i32> %vec.0, i32 %v.val, i32 1 + %bc = bitcast <2 x i32> %vec.1 to i64 + %add = add i64 %bc, %val1 + store i64 %add, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_imp_def_vcc_split_i64_add_1: +; SI: s_add_u32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; SI: s_addc_u32 {{s[0-9]+}}, 0x1869f, {{s[0-9]+}} +define void @s_imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64 %val1) { %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0 %vec.1 = insertelement <2 x i32> %vec.0, i32 99999, i32 1 %bc = bitcast <2 x i32> %vec.1 to i64 @@ -32,9 +58,9 @@ define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64 } ; Doesn't use constants -; FUNC-LABEL @imp_def_vcc_split_i64_add_2 -; SI: v_add_i32 -; SI: v_addc_u32 +; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_2: +; SI: v_add_i32_e32 {{v[0-9]+}}, vcc, {{s[0-9]+}}, {{v[0-9]+}} +; SI: v_addc_u32_e32 {{v[0-9]+}}, vcc, {{v[0-9]+}}, {{v[0-9]+}}, vcc define void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) { %tid = call i32 @llvm.r600.read.tidig.x() readnone %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid diff --git a/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll b/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll new file mode 100644 index 000000000000..4c82ed6affc2 --- /dev/null +++ b/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll @@ -0,0 +1,104 @@ +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN %s + +@sPrivateStorage = external addrspace(3) global [256 x [8 x <4 x i64>]] + +; GCN-LABEL: {{^}}ds_reorder_vector_split: + +; Write zeroinitializer +; GCN-DAG: ds_write_b64 [[PTR:v[0-9]+]], [[VAL:v\[[0-9]+:[0-9]+\]]] offset:24 +; GCN-DAG: ds_write_b64 [[PTR]], [[VAL]] offset:16 +; GCN-DAG: ds_write_b64 [[PTR]], [[VAL]] offset:8 +; GCN-DAG: ds_write_b64 [[PTR]], [[VAL]]{{$}} + +; GCN: s_waitcnt vmcnt + +; GCN-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24 +; GCN-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16 +; GCN-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 + +; GCN: s_waitcnt lgkmcnt + +; GCN-DAG ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:8 +; GCN-DAG: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:16 +; GCN-DAG: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:24 + +; Appears to be dead store of vector component. +; GCN: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]$}} + +; GCN: buffer_store_dwordx2 +; GCN: buffer_store_dwordx2 +; GCN: buffer_store_dwordx2 +; GCN: buffer_store_dwordx2 +; GCN: s_endpgm +define void @ds_reorder_vector_split(<4 x i64> addrspace(1)* nocapture readonly %srcValues, i32 addrspace(1)* nocapture readonly %offsets, <4 x i64> addrspace(1)* nocapture %destBuffer, i32 %alignmentOffset) #0 { +entry: + %tmp = tail call i32 @llvm.r600.read.local.size.y() + %tmp1 = tail call i32 @llvm.r600.read.local.size.z() + %tmp2 = tail call i32 @llvm.r600.read.tidig.x() + %tmp3 = tail call i32 @llvm.r600.read.tidig.y() + %tmp4 = tail call i32 @llvm.r600.read.tidig.z() + %tmp6 = mul i32 %tmp2, %tmp + %tmp10 = add i32 %tmp3, %tmp6 + %tmp11 = mul i32 %tmp10, %tmp1 + %tmp9 = add i32 %tmp11, %tmp4 + %x.i.i = tail call i32 @llvm.r600.read.tgid.x() #1 + %x.i.12.i = tail call i32 @llvm.r600.read.local.size.x() #1 + %mul.26.i = mul i32 %x.i.12.i, %x.i.i + %add.i = add i32 %tmp2, %mul.26.i + %arrayidx = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 %add.i + store <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* %arrayidx + %tmp12 = sext i32 %add.i to i64 + %arrayidx1 = getelementptr inbounds <4 x i64>, <4 x i64> addrspace(1)* %srcValues, i64 %tmp12 + %tmp13 = load <4 x i64>, <4 x i64> addrspace(1)* %arrayidx1 + %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %offsets, i64 %tmp12 + %tmp14 = load i32, i32 addrspace(1)* %arrayidx2 + %add.ptr = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 0, i32 %alignmentOffset + %mul.i = shl i32 %tmp14, 2 + %arrayidx.i = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr, i32 %mul.i + %tmp15 = bitcast i64 addrspace(3)* %arrayidx.i to <4 x i64> addrspace(3)* + store <4 x i64> %tmp13, <4 x i64> addrspace(3)* %tmp15 + %add.ptr6 = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 %tmp14, i32 %alignmentOffset + %tmp16 = sext i32 %tmp14 to i64 + %tmp17 = sext i32 %alignmentOffset to i64 + %add.ptr9 = getelementptr inbounds <4 x i64>, <4 x i64> addrspace(1)* %destBuffer, i64 %tmp16, i64 %tmp17 + %tmp18 = bitcast <4 x i64> %tmp13 to i256 + %trunc = trunc i256 %tmp18 to i64 + store i64 %trunc, i64 addrspace(1)* %add.ptr9 + %arrayidx10.1 = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr6, i32 1 + %tmp19 = load i64, i64 addrspace(3)* %arrayidx10.1 + %arrayidx11.1 = getelementptr inbounds i64, i64 addrspace(1)* %add.ptr9, i64 1 + store i64 %tmp19, i64 addrspace(1)* %arrayidx11.1 + %arrayidx10.2 = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr6, i32 2 + %tmp20 = load i64, i64 addrspace(3)* %arrayidx10.2 + %arrayidx11.2 = getelementptr inbounds i64, i64 addrspace(1)* %add.ptr9, i64 2 + store i64 %tmp20, i64 addrspace(1)* %arrayidx11.2 + %arrayidx10.3 = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr6, i32 3 + %tmp21 = load i64, i64 addrspace(3)* %arrayidx10.3 + %arrayidx11.3 = getelementptr inbounds i64, i64 addrspace(1)* %add.ptr9, i64 3 + store i64 %tmp21, i64 addrspace(1)* %arrayidx11.3 + ret void +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.tgid.x() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.local.size.x() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.tidig.x() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.local.size.y() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.local.size.z() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.tidig.y() #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.tidig.z() #1 + +attributes #0 = { norecurse nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/sra.ll b/test/CodeGen/AMDGPU/sra.ll index bcbc32f4c053..3b59bbfb18c0 100644 --- a/test/CodeGen/AMDGPU/sra.ll +++ b/test/CodeGen/AMDGPU/sra.ll @@ -70,11 +70,11 @@ entry: ;EG-LABEL: {{^}}ashr_i64_2: ;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] ;EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} -;EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 -;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 ;EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]] -;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} -;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} +;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}} +;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|PV.[XYZW]|[[SHIFT]]}} ;EG-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal ;EG-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal ;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal diff --git a/test/CodeGen/AMDGPU/srl.ll b/test/CodeGen/AMDGPU/srl.ll index 0dad91e709d9..bbd954356322 100644 --- a/test/CodeGen/AMDGPU/srl.ll +++ b/test/CodeGen/AMDGPU/srl.ll @@ -65,14 +65,14 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i ; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] ; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} -; EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 ; EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +; EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 ; EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]] -; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} -; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} -; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} +; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}} +; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]|PV\.[XYZW]}} ; EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal -; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} +; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]|PS}} +; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], [[SHIFT]] ; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0 define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 @@ -190,8 +190,7 @@ define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %i ; Make sure load width gets reduced to i32 load. ; GCN-LABEL: {{^}}s_lshr_32_i64: ; GCN-DAG: s_load_dword [[HI_A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc{{$}} -; GCN-DAG: s_mov_b32 s[[SHI:[0-9]+]], 0{{$}} -; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] +; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[HI_A]] ; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} define void @s_lshr_32_i64(i64 addrspace(1)* %out, i64 %a) { diff --git a/test/CodeGen/AMDGPU/store-barrier.ll b/test/CodeGen/AMDGPU/store-barrier.ll index 4a72b4d090ad..ba4049f28a6e 100644 --- a/test/CodeGen/AMDGPU/store-barrier.ll +++ b/test/CodeGen/AMDGPU/store-barrier.ll @@ -36,7 +36,7 @@ bb: ret void } -; Function Attrs: noduplicate nounwind +; Function Attrs: convergent nounwind declare void @llvm.AMDGPU.barrier.local() #2 -attributes #2 = { noduplicate nounwind } +attributes #2 = { convergent nounwind } diff --git a/test/CodeGen/AMDGPU/store.ll b/test/CodeGen/AMDGPU/store.ll index 0f89405e073b..d22f43fa05ef 100644 --- a/test/CodeGen/AMDGPU/store.ll +++ b/test/CodeGen/AMDGPU/store.ll @@ -287,16 +287,33 @@ entry: ; CM: LDS_WRITE ; CM: LDS_WRITE -; SI: ds_write_b32 -; SI: ds_write_b32 -; SI: ds_write_b32 -; SI: ds_write_b32 +; SI: ds_write_b64 +; SI: ds_write_b64 define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) { entry: store <4 x i32> %in, <4 x i32> addrspace(3)* %out ret void } +; FUNC-LABEL: {{^}}store_local_v4i32_align4: +; EG: LDS_WRITE +; EG: LDS_WRITE +; EG: LDS_WRITE +; EG: LDS_WRITE + +; CM: LDS_WRITE +; CM: LDS_WRITE +; CM: LDS_WRITE +; CM: LDS_WRITE + +; SI: ds_write2_b32 +; SI: ds_write2_b32 +define void @store_local_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %in) { +entry: + store <4 x i32> %in, <4 x i32> addrspace(3)* %out, align 4 + ret void +} + ; FUNC-LABEL: {{^}}store_local_i64_i8: ; EG: LDS_BYTE_WRITE ; SI: ds_write_b8 diff --git a/test/CodeGen/AMDGPU/store_typed.ll b/test/CodeGen/AMDGPU/store_typed.ll new file mode 100644 index 000000000000..515fcf04f406 --- /dev/null +++ b/test/CodeGen/AMDGPU/store_typed.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck --check-prefix=CM --check-prefix=FUNC %s + +; store to rat 0 +; FUNC-LABEL: {{^}}store_typed_rat0: +; EG: MEM_RAT STORE_TYPED RAT(0) {{T[0-9]+, T[0-9]+}}, 1 +; CM: MEM_RAT STORE_TYPED RAT(0) {{T[0-9]+, T[0-9]+}} + +define void @store_typed_rat0(<4 x i32> %data, <4 x i32> %index) { + call void @llvm.r600.rat.store.typed(<4 x i32> %data, <4 x i32> %index, i32 0) + ret void +} + +; store to rat 11 +; FUNC-LABEL: {{^}}store_typed_rat11: +; EG: MEM_RAT STORE_TYPED RAT(11) {{T[0-9]+, T[0-9]+}}, 1 +; CM: MEM_RAT STORE_TYPED RAT(11) {{T[0-9]+, T[0-9]+}} + +define void @store_typed_rat11(<4 x i32> %data, <4 x i32> %index) { + call void @llvm.r600.rat.store.typed(<4 x i32> %data, <4 x i32> %index, i32 11) + ret void +} + +declare void @llvm.r600.rat.store.typed(<4 x i32>, <4 x i32>, i32) diff --git a/test/CodeGen/AMDGPU/sub.ll b/test/CodeGen/AMDGPU/sub.ll index b7fba0efa5b2..9f9446a4e608 100644 --- a/test/CodeGen/AMDGPU/sub.ll +++ b/test/CodeGen/AMDGPU/sub.ll @@ -7,7 +7,7 @@ declare i32 @llvm.r600.read.tidig.x() readnone ; FUNC-LABEL: {{^}}test_sub_i32: ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; SI: v_subrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_subrev_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 %a = load i32, i32 addrspace(1)* %in @@ -22,8 +22,8 @@ define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 @@ -40,10 +40,10 @@ define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1) ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 diff --git a/test/CodeGen/AMDGPU/trunc.ll b/test/CodeGen/AMDGPU/trunc.ll index bf690ca4cb28..ad52d0f2e238 100644 --- a/test/CodeGen/AMDGPU/trunc.ll +++ b/test/CodeGen/AMDGPU/trunc.ll @@ -61,7 +61,7 @@ define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) { } ; SI-LABEL: {{^}}sgpr_trunc_i32_to_i1: -; SI: v_and_b32_e64 v{{[0-9]+}}, 1, s{{[0-9]+}} +; SI: s_and_b32 s{{[0-9]+}}, 1, s{{[0-9]+}} ; SI: v_cmp_eq_i32 define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) { %trunc = trunc i32 %a to i1 @@ -72,9 +72,9 @@ define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) { ; SI-LABEL: {{^}}s_trunc_i64_to_i1: ; SI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb -; SI: v_and_b32_e64 [[MASKED:v[0-9]+]], 1, s[[SLO]] -; SI: v_cmp_eq_i32_e32 vcc, 1, [[MASKED]] -; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, vcc +; SI: s_and_b32 [[MASKED:s[0-9]+]], 1, s[[SLO]] +; SI: v_cmp_eq_i32_e64 s{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]], 1, [[MASKED]] +; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, s{{\[}}[[VLO]]:[[VHI]]] define void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 %x) { %trunc = trunc i64 %x to i1 %sel = select i1 %trunc, i32 63, i32 -12 diff --git a/test/CodeGen/AMDGPU/udivrem.ll b/test/CodeGen/AMDGPU/udivrem.ll index b3837f28209a..f692b7dfdc27 100644 --- a/test/CodeGen/AMDGPU/udivrem.ll +++ b/test/CodeGen/AMDGPU/udivrem.ll @@ -30,19 +30,19 @@ ; SI: v_rcp_iflag_f32_e32 [[RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]] ; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]] -; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]] ; SI: v_cndmask_b32_e64 ; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]] -; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]] -; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]] +; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]] +; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]] ; SI: v_cndmask_b32_e64 ; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]] ; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]] +; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]] -; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]] +; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]] ; SI-DAG: v_subrev_i32_e32 [[Quotient_S_One:v[0-9]+]], ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 @@ -110,15 +110,15 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]] ; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]] -; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[FIRST_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]] -; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] -; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}} +; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], vcc, [[FIRST_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]] @@ -133,15 +133,15 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]] ; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]] -; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[SECOND_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]] -; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] -; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}} +; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], vcc, [[SECOND_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]] @@ -257,83 +257,83 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3 ; EG-DAG: CNDE_INT ; EG-DAG: CNDE_INT -; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]] -; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]] -; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]] -; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]] +; SI-DAG: v_rcp_iflag_f32_e32 +; SI-DAG: v_mul_hi_u32 +; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_sub_i32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]] -; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] -; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: v_mul_hi_u32 +; SI-DAG: v_add_i32_e32 +; SI-DAG: v_subrev_i32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]] -; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[l0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}} +; SI-DAG: v_mul_hi_u32 +; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_subrev_i32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]] -; SI-DAG: v_add_i32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]] -; SI-DAG: v_subrev_i32_e32 [[FIRST_Quotient_S_One:v[0-9]+]], +; SI-DAG: v_and_b32_e32 +; SI-DAG: v_add_i32_e32 +; SI-DAG: v_subrev_i32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]], -; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]], +; SI-DAG: v_add_i32_e32 +; SI-DAG: v_subrev_i32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]] -; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]] -; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]] -; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]] +; SI-DAG: v_rcp_iflag_f32_e32 +; SI-DAG: v_mul_hi_u32 +; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_sub_i32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]] -; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] -; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: v_mul_hi_u32 +; SI-DAG: v_add_i32_e32 +; SI-DAG: v_subrev_i32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]] -; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}} +; SI-DAG: v_mul_hi_u32 +; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_subrev_i32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]] -; SI-DAG: v_add_i32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]] -; SI-DAG: v_subrev_i32_e32 [[SECOND_Quotient_S_One:v[0-9]+]], +; SI-DAG: v_and_b32_e32 +; SI-DAG: v_add_i32_e32 +; SI-DAG: v_subrev_i32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]], -; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]], +; SI-DAG: v_add_i32_e32 +; SI-DAG: v_subrev_i32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_rcp_iflag_f32_e32 [[THIRD_RCP:v[0-9]+]] -; SI-DAG: v_mul_hi_u32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]] -; SI-DAG: v_mul_lo_i32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]] -; SI-DAG: v_sub_i32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]] +; SI-DAG: v_rcp_iflag_f32_e32 +; SI-DAG: v_mul_hi_u32 +; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_sub_i32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_mul_hi_u32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]] -; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]] -; SI-DAG: v_subrev_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]] +; SI-DAG: v_mul_hi_u32 +; SI-DAG: v_add_i32_e32 +; SI-DAG: v_subrev_i32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_mul_hi_u32 [[THIRD_Quotient:v[0-9]+]] -; SI-DAG: v_mul_lo_i32 [[THIRD_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder:v[0-9]+]], [[THIRD_Num_S_Remainder]], {{v[0-9]+}} +; SI-DAG: v_mul_hi_u32 +; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_subrev_i32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 [[THIRD_Tmp1:v[0-9]+]] -; SI-DAG: v_add_i32_e32 [[THIRD_Quotient_A_One:v[0-9]+]], {{.*}}, [[THIRD_Quotient]] -; SI-DAG: v_subrev_i32_e32 [[THIRD_Quotient_S_One:v[0-9]+]], +; SI-DAG: v_and_b32_e32 +; SI-DAG: v_add_i32_e32 +; SI-DAG: v_subrev_i32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 [[THIRD_Remainder_A_Den:v[0-9]+]], -; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder_S_Den:v[0-9]+]], +; SI-DAG: v_add_i32_e32 +; SI-DAG: v_subrev_i32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_rcp_iflag_f32_e32 [[FOURTH_RCP:v[0-9]+]] -; SI-DAG: v_mul_hi_u32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]] -; SI-DAG: v_mul_lo_i32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]] -; SI-DAG: v_sub_i32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]] +; SI-DAG: v_rcp_iflag_f32_e32 +; SI-DAG: v_mul_hi_u32 +; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_sub_i32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_mul_hi_u32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]] -; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]] -; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]] +; SI-DAG: v_mul_hi_u32 +; SI-DAG: v_add_i32_e32 +; SI-DAG: v_subrev_i32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI: s_endpgm define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { diff --git a/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/test/CodeGen/AMDGPU/uint_to_fp.f64.ll index 6f608df5e9f5..65fe580792a5 100644 --- a/test/CodeGen/AMDGPU/uint_to_fp.f64.ll +++ b/test/CodeGen/AMDGPU/uint_to_fp.f64.ll @@ -4,9 +4,9 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; SI-LABEL: {{^}}v_uint_to_fp_i64_to_f64 ; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; SI: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]] -; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32 -; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]] +; SI-DAG: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]] +; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]] +; SI-DAG: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32 ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]] ; SI: buffer_store_dwordx2 [[RESULT]] define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) { diff --git a/test/CodeGen/AMDGPU/unsupported-cc.ll b/test/CodeGen/AMDGPU/unsupported-cc.ll index 8ab4faf2f145..d120111a71fb 100644 --- a/test/CodeGen/AMDGPU/unsupported-cc.ll +++ b/test/CodeGen/AMDGPU/unsupported-cc.ll @@ -3,8 +3,8 @@ ; These tests are for condition codes that are not supported by the hardware ; CHECK-LABEL: {{^}}slt: -; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 5(7.006492e-45) define void @slt(i32 addrspace(1)* %out, i32 %in) { entry: @@ -15,8 +15,8 @@ entry: } ; CHECK-LABEL: {{^}}ult_i32: -; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 5(7.006492e-45) define void @ult_i32(i32 addrspace(1)* %out, i32 %in) { entry: @@ -40,8 +40,8 @@ entry: } ; CHECK-LABEL: {{^}}ult_float_native: -; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x -; CHECK-NEXT: LSHR * +; CHECK: LSHR +; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}} ; CHECK-NEXT: 1084227584(5.000000e+00) define void @ult_float_native(float addrspace(1)* %out, float %in) { entry: @@ -52,8 +52,8 @@ entry: } ; CHECK-LABEL: {{^}}olt: -; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR * +; CHECK: LSHR +; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 1084227584(5.000000e+00) define void @olt(float addrspace(1)* %out, float %in) { entry: @@ -64,8 +64,8 @@ entry: } ; CHECK-LABEL: {{^}}sle: -; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 6(8.407791e-45) define void @sle(i32 addrspace(1)* %out, i32 %in) { entry: @@ -76,8 +76,8 @@ entry: } ; CHECK-LABEL: {{^}}ule_i32: -; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 6(8.407791e-45) define void @ule_i32(i32 addrspace(1)* %out, i32 %in) { entry: @@ -101,8 +101,8 @@ entry: } ; CHECK-LABEL: {{^}}ule_float_native: -; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x -; CHECK-NEXT: LSHR * +; CHECK: LSHR +; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}} ; CHECK-NEXT: 1084227584(5.000000e+00) define void @ule_float_native(float addrspace(1)* %out, float %in) { entry: @@ -113,8 +113,8 @@ entry: } ; CHECK-LABEL: {{^}}ole: -; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR * +; CHECK: LSHR +; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT:1084227584(5.000000e+00) define void @ole(float addrspace(1)* %out, float %in) { entry: diff --git a/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll b/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll index f26f30022b4f..87b925a24a04 100644 --- a/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll +++ b/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll @@ -2,6 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s declare float @llvm.fma.f32(float, float, float) #1 +declare double @llvm.fma.f64(double, double, double) #1 declare float @llvm.fmuladd.f32(float, float, float) #1 declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1 @@ -40,6 +41,32 @@ define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, floa ret void } +; GCN-LABEL: {{^}}test_use_s_v_s: +; GCN-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; GCN-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}} + +; GCN: buffer_load_dword [[VA0:v[0-9]+]] +; GCN-NOT: v_mov_b32 +; GCN: buffer_load_dword [[VA1:v[0-9]+]] + +; GCN-NOT: v_mov_b32 +; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] +; GCN-NOT: v_mov_b32 + +; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VA0]], [[SA]], [[VB]] +; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VA1]], [[SA]], [[VB]] +; GCN: buffer_store_dword [[RESULT0]] +; GCN: buffer_store_dword [[RESULT1]] +define void @test_use_s_v_s(float addrspace(1)* %out, float %a, float %b, float addrspace(1)* %in) #0 { + %va0 = load volatile float, float addrspace(1)* %in + %va1 = load volatile float, float addrspace(1)* %in + %fma0 = call float @llvm.fma.f32(float %a, float %va0, float %b) #1 + %fma1 = call float @llvm.fma.f32(float %a, float %va1, float %b) #1 + store volatile float %fma0, float addrspace(1)* %out + store volatile float %fma1, float addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a: ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc @@ -99,5 +126,145 @@ define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 ret void } +; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_kimm: +; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]] +; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000 +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[VK]] +; GCN: buffer_store_dword [[RESULT]] +define void @test_sgpr_use_twice_ternary_op_a_a_kimm(float addrspace(1)* %out, float %a) #0 { + %fma = call float @llvm.fma.f32(float %a, float %a, float 1024.0) #1 + store float %fma, float addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_k_s: +; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]] +; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000 +; GCN: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR]] +; GCN: buffer_store_dword [[RESULT0]] +define void @test_literal_use_twice_ternary_op_k_k_s(float addrspace(1)* %out, float %a) #0 { + %fma = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) #1 + store float %fma, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_k_s_x2: +; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}} +; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000 +; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR0]] +; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VK]], [[VK]], [[SGPR1]] +; GCN: buffer_store_dword [[RESULT0]] +; GCN: buffer_store_dword [[RESULT1]] +; GCN: s_endpgm +define void @test_literal_use_twice_ternary_op_k_k_s_x2(float addrspace(1)* %out, float %a, float %b) #0 { + %fma0 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) #1 + %fma1 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %b) #1 + store volatile float %fma0, float addrspace(1)* %out + store volatile float %fma1, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_s_k: +; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]] +; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000 +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[VK]], [[VK]] +; GCN: buffer_store_dword [[RESULT]] +define void @test_literal_use_twice_ternary_op_k_s_k(float addrspace(1)* %out, float %a) #0 { + %fma = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) #1 + store float %fma, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_s_k_x2: +; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}} +; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000 +; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VK]], [[VK]] +; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR1]], [[VK]], [[VK]] +; GCN: buffer_store_dword [[RESULT0]] +; GCN: buffer_store_dword [[RESULT1]] +; GCN: s_endpgm +define void @test_literal_use_twice_ternary_op_k_s_k_x2(float addrspace(1)* %out, float %a, float %b) #0 { + %fma0 = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) #1 + %fma1 = call float @llvm.fma.f32(float 1024.0, float %b, float 1024.0) #1 + store volatile float %fma0, float addrspace(1)* %out + store volatile float %fma1, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_s_k_k: +; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]] +; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000 +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[VK]], [[VK]] +; GCN: buffer_store_dword [[RESULT]] +define void @test_literal_use_twice_ternary_op_s_k_k(float addrspace(1)* %out, float %a) #0 { + %fma = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) #1 + store float %fma, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_s_k_k_x2: +; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}} +; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000 +; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VK]], [[VK]] +; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR1]], [[VK]], [[VK]] +; GCN: buffer_store_dword [[RESULT0]] +; GCN: buffer_store_dword [[RESULT1]] +; GCN: s_endpgm +define void @test_literal_use_twice_ternary_op_s_k_k_x2(float addrspace(1)* %out, float %a, float %b) #0 { + %fma0 = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) #1 + %fma1 = call float @llvm.fma.f32(float %b, float 1024.0, float 1024.0) #1 + store volatile float %fma0, float addrspace(1)* %out + store volatile float %fma1, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_s0_s1_k_f32: +; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}} +; GCN-DAG: v_mov_b32_e32 [[VK0:v[0-9]+]], 0x44800000 +; GCN-DAG: v_mov_b32_e32 [[VS1:v[0-9]+]], [[SGPR1]] + +; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK0]] +; GCN-DAG: v_mov_b32_e32 [[VK1:v[0-9]+]], 0x45800000 +; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK1]] + +; GCN: buffer_store_dword [[RESULT0]] +; GCN: buffer_store_dword [[RESULT1]] +define void @test_s0_s1_k_f32(float addrspace(1)* %out, float %a, float %b) #0 { + %fma0 = call float @llvm.fma.f32(float %a, float %b, float 1024.0) #1 + %fma1 = call float @llvm.fma.f32(float %a, float %b, float 4096.0) #1 + store volatile float %fma0, float addrspace(1)* %out + store volatile float %fma1, float addrspace(1)* %out + ret void +} + +; FIXME: Immediate in SGPRs just copied to VGPRs +; GCN-LABEL: {{^}}test_s0_s1_k_f64: +; GCN-DAG: s_load_dwordx2 [[SGPR0:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; GCN-DAG: s_load_dwordx2 s{{\[}}[[SGPR1_SUB0:[0-9]+]]:[[SGPR1_SUB1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xd|0x34}} +; GCN-DAG: v_mov_b32_e32 v[[VK0_SUB1:[0-9]+]], 0x40900000 +; GCN-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0{{$}} + +; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB0:[0-9]+]], s[[SGPR1_SUB0]] +; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB1:[0-9]+]], s[[SGPR1_SUB1]] +; GCN: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, [[SGPR0]], v{{\[}}[[VZERO]]:[[VK0_SUB1]]{{\]}} + +; Same zero component is re-used for half of each immediate. +; GCN: v_mov_b32_e32 v[[VK1_SUB1:[0-9]+]], 0x40b00000 +; GCN: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, [[SGPR0]], v{{\[}}[[VZERO]]:[[VK1_SUB1]]{{\]}} + +; GCN: buffer_store_dwordx2 [[RESULT0]] +; GCN: buffer_store_dwordx2 [[RESULT1]] +define void @test_s0_s1_k_f64(double addrspace(1)* %out, double %a, double %b) #0 { + %fma0 = call double @llvm.fma.f64(double %a, double %b, double 1024.0) #1 + %fma1 = call double @llvm.fma.f64(double %a, double %b, double 4096.0) #1 + store volatile double %fma0, double addrspace(1)* %out + store volatile double %fma1, double addrspace(1)* %out + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/valu-i1.ll b/test/CodeGen/AMDGPU/valu-i1.ll index 7d0ebd139f51..1cbefba60c95 100644 --- a/test/CodeGen/AMDGPU/valu-i1.ll +++ b/test/CodeGen/AMDGPU/valu-i1.ll @@ -78,8 +78,8 @@ exit: ; SI: BB2_3: ; SI: buffer_load_dword -; SI: buffer_store_dword -; SI: v_cmp_eq_i32_e32 vcc, +; SI-DAG: buffer_store_dword +; SI-DAG: v_cmp_eq_i32_e32 vcc, ; SI: s_or_b64 [[OR_SREG:s\[[0-9]+:[0-9]+\]]] ; SI: s_andn2_b64 exec, exec, [[OR_SREG]] ; SI: s_cbranch_execnz BB2_3 @@ -128,18 +128,18 @@ exit: ; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]] ; SI-DAG: v_cmp_ne_i32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]] ; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]] -; SI: s_and_saveexec_b64 [[ORNEG1]], [[ORNEG1]] -; SI: s_xor_b64 [[ORNEG1]], exec, [[ORNEG1]] +; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]] +; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]] ; SI: s_cbranch_execz BB3_5 ; SI: BB#4: ; SI: buffer_store_dword -; SI: v_cmp_ge_i64_e32 vcc -; SI: s_or_b64 [[COND_STATE]], vcc, [[COND_STATE]] +; SI: v_cmp_ge_i64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]] +; SI: s_or_b64 [[COND_STATE]], [[CMP]], [[COND_STATE]] ; SI: BB3_5: -; SI: s_or_b64 exec, exec, [[ORNEG1]] -; SI: s_or_b64 [[COND_STATE]], [[ORNEG1]], [[COND_STATE]] +; SI: s_or_b64 exec, exec, [[ORNEG2]] +; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[COND_STATE]] ; SI: s_andn2_b64 exec, exec, [[COND_STATE]] ; SI: s_cbranch_execnz BB3_3 diff --git a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll new file mode 100644 index 000000000000..cd7c78f408dd --- /dev/null +++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll @@ -0,0 +1,585 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; XUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA %s +; XUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA %s + +; This ends up using all 256 registers and requires register +; scavenging which will fail to find an unsued register. + +; Check the ScratchSize to avoid regressions from spilling +; intermediate register class copies. + +; FIXME: The same register is initialized to 0 for every spill. + +declare i32 @llvm.r600.read.tgid.x() #1 +declare i32 @llvm.r600.read.tgid.y() #1 +declare i32 @llvm.r600.read.tgid.z() #1 + +; GCN-LABEL: {{^}}spill_vgpr_compute: + +; GCN: s_mov_b32 s16, s3 +; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s14, -1 +; SI-NEXT: s_mov_b32 s15, 0x80f000 +; VI-NEXT: s_mov_b32 s15, 0x800000 + + +; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill + +; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}} +; GCN: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}} + +; GCN: NumVgprs: 256 +; GCN: ScratchSize: 1024 + +; s[0:3] input user SGPRs. s4,s5,s6 = workgroup IDs. s8 scratch offset. +define void @spill_vgpr_compute(<4 x float> %arg6, float addrspace(1)* %arg, i32 %arg1, i32 %arg2, float %arg3, float %arg4, float %arg5) #0 { +bb: + %tmp = add i32 %arg1, %arg2 + %tmp7 = extractelement <4 x float> %arg6, i32 0 + %tmp8 = extractelement <4 x float> %arg6, i32 1 + %tmp9 = extractelement <4 x float> %arg6, i32 2 + %tmp10 = extractelement <4 x float> %arg6, i32 3 + %tmp11 = bitcast float %arg5 to i32 + br label %bb12 + +bb12: ; preds = %bb145, %bb + %tmp13 = phi float [ 0.000000e+00, %bb ], [ %tmp338, %bb145 ] + %tmp14 = phi float [ 0.000000e+00, %bb ], [ %tmp337, %bb145 ] + %tmp15 = phi float [ 0.000000e+00, %bb ], [ %tmp336, %bb145 ] + %tmp16 = phi float [ 0.000000e+00, %bb ], [ %tmp339, %bb145 ] + %tmp17 = phi float [ 0.000000e+00, %bb ], [ %tmp335, %bb145 ] + %tmp18 = phi float [ 0.000000e+00, %bb ], [ %tmp334, %bb145 ] + %tmp19 = phi float [ 0.000000e+00, %bb ], [ %tmp333, %bb145 ] + %tmp20 = phi float [ 0.000000e+00, %bb ], [ %tmp340, %bb145 ] + %tmp21 = phi float [ 0.000000e+00, %bb ], [ %tmp332, %bb145 ] + %tmp22 = phi float [ 0.000000e+00, %bb ], [ %tmp331, %bb145 ] + %tmp23 = phi float [ 0.000000e+00, %bb ], [ %tmp330, %bb145 ] + %tmp24 = phi float [ 0.000000e+00, %bb ], [ %tmp341, %bb145 ] + %tmp25 = phi float [ 0.000000e+00, %bb ], [ %tmp329, %bb145 ] + %tmp26 = phi float [ 0.000000e+00, %bb ], [ %tmp328, %bb145 ] + %tmp27 = phi float [ 0.000000e+00, %bb ], [ %tmp327, %bb145 ] + %tmp28 = phi float [ 0.000000e+00, %bb ], [ %tmp342, %bb145 ] + %tmp29 = phi float [ 0.000000e+00, %bb ], [ %tmp326, %bb145 ] + %tmp30 = phi float [ 0.000000e+00, %bb ], [ %tmp325, %bb145 ] + %tmp31 = phi float [ 0.000000e+00, %bb ], [ %tmp324, %bb145 ] + %tmp32 = phi float [ 0.000000e+00, %bb ], [ %tmp343, %bb145 ] + %tmp33 = phi float [ 0.000000e+00, %bb ], [ %tmp323, %bb145 ] + %tmp34 = phi float [ 0.000000e+00, %bb ], [ %tmp322, %bb145 ] + %tmp35 = phi float [ 0.000000e+00, %bb ], [ %tmp321, %bb145 ] + %tmp36 = phi float [ 0.000000e+00, %bb ], [ %tmp344, %bb145 ] + %tmp37 = phi float [ 0.000000e+00, %bb ], [ %tmp320, %bb145 ] + %tmp38 = phi float [ 0.000000e+00, %bb ], [ %tmp319, %bb145 ] + %tmp39 = phi float [ 0.000000e+00, %bb ], [ %tmp318, %bb145 ] + %tmp40 = phi float [ 0.000000e+00, %bb ], [ %tmp345, %bb145 ] + %tmp41 = phi float [ 0.000000e+00, %bb ], [ %tmp317, %bb145 ] + %tmp42 = phi float [ 0.000000e+00, %bb ], [ %tmp316, %bb145 ] + %tmp43 = phi float [ 0.000000e+00, %bb ], [ %tmp315, %bb145 ] + %tmp44 = phi float [ 0.000000e+00, %bb ], [ %tmp346, %bb145 ] + %tmp45 = phi float [ 0.000000e+00, %bb ], [ %tmp314, %bb145 ] + %tmp46 = phi float [ 0.000000e+00, %bb ], [ %tmp313, %bb145 ] + %tmp47 = phi float [ 0.000000e+00, %bb ], [ %tmp312, %bb145 ] + %tmp48 = phi float [ 0.000000e+00, %bb ], [ %tmp347, %bb145 ] + %tmp49 = phi float [ 0.000000e+00, %bb ], [ %tmp311, %bb145 ] + %tmp50 = phi float [ 0.000000e+00, %bb ], [ %tmp310, %bb145 ] + %tmp51 = phi float [ 0.000000e+00, %bb ], [ %tmp309, %bb145 ] + %tmp52 = phi float [ 0.000000e+00, %bb ], [ %tmp348, %bb145 ] + %tmp53 = phi float [ 0.000000e+00, %bb ], [ %tmp308, %bb145 ] + %tmp54 = phi float [ 0.000000e+00, %bb ], [ %tmp307, %bb145 ] + %tmp55 = phi float [ 0.000000e+00, %bb ], [ %tmp306, %bb145 ] + %tmp56 = phi float [ 0.000000e+00, %bb ], [ %tmp349, %bb145 ] + %tmp57 = phi float [ 0.000000e+00, %bb ], [ %tmp305, %bb145 ] + %tmp58 = phi float [ 0.000000e+00, %bb ], [ %tmp304, %bb145 ] + %tmp59 = phi float [ 0.000000e+00, %bb ], [ %tmp303, %bb145 ] + %tmp60 = phi float [ 0.000000e+00, %bb ], [ %tmp350, %bb145 ] + %tmp61 = phi float [ 0.000000e+00, %bb ], [ %tmp302, %bb145 ] + %tmp62 = phi float [ 0.000000e+00, %bb ], [ %tmp301, %bb145 ] + %tmp63 = phi float [ 0.000000e+00, %bb ], [ %tmp300, %bb145 ] + %tmp64 = phi float [ 0.000000e+00, %bb ], [ %tmp351, %bb145 ] + %tmp65 = phi float [ 0.000000e+00, %bb ], [ %tmp299, %bb145 ] + %tmp66 = phi float [ 0.000000e+00, %bb ], [ %tmp298, %bb145 ] + %tmp67 = phi float [ 0.000000e+00, %bb ], [ %tmp297, %bb145 ] + %tmp68 = phi float [ 0.000000e+00, %bb ], [ %tmp352, %bb145 ] + %tmp69 = phi float [ 0.000000e+00, %bb ], [ %tmp296, %bb145 ] + %tmp70 = phi float [ 0.000000e+00, %bb ], [ %tmp295, %bb145 ] + %tmp71 = phi float [ 0.000000e+00, %bb ], [ %tmp294, %bb145 ] + %tmp72 = phi float [ 0.000000e+00, %bb ], [ %tmp353, %bb145 ] + %tmp73 = phi float [ 0.000000e+00, %bb ], [ %tmp293, %bb145 ] + %tmp74 = phi float [ 0.000000e+00, %bb ], [ %tmp292, %bb145 ] + %tmp75 = phi float [ 0.000000e+00, %bb ], [ %tmp291, %bb145 ] + %tmp76 = phi float [ 0.000000e+00, %bb ], [ %tmp354, %bb145 ] + %tmp77 = phi float [ 0.000000e+00, %bb ], [ %tmp290, %bb145 ] + %tmp78 = phi float [ 0.000000e+00, %bb ], [ %tmp289, %bb145 ] + %tmp79 = phi float [ 0.000000e+00, %bb ], [ %tmp288, %bb145 ] + %tmp80 = phi float [ 0.000000e+00, %bb ], [ %tmp355, %bb145 ] + %tmp81 = phi float [ 0.000000e+00, %bb ], [ %tmp287, %bb145 ] + %tmp82 = phi float [ 0.000000e+00, %bb ], [ %tmp286, %bb145 ] + %tmp83 = phi float [ 0.000000e+00, %bb ], [ %tmp285, %bb145 ] + %tmp84 = phi float [ 0.000000e+00, %bb ], [ %tmp356, %bb145 ] + %tmp85 = phi float [ 0.000000e+00, %bb ], [ %tmp284, %bb145 ] + %tmp86 = phi float [ 0.000000e+00, %bb ], [ %tmp283, %bb145 ] + %tmp87 = phi float [ 0.000000e+00, %bb ], [ %tmp282, %bb145 ] + %tmp88 = phi float [ 0.000000e+00, %bb ], [ %tmp357, %bb145 ] + %tmp89 = phi float [ 0.000000e+00, %bb ], [ %tmp281, %bb145 ] + %tmp90 = phi float [ 0.000000e+00, %bb ], [ %tmp280, %bb145 ] + %tmp91 = phi float [ 0.000000e+00, %bb ], [ %tmp279, %bb145 ] + %tmp92 = phi float [ 0.000000e+00, %bb ], [ %tmp358, %bb145 ] + %tmp93 = phi float [ 0.000000e+00, %bb ], [ %tmp359, %bb145 ] + %tmp94 = phi float [ 0.000000e+00, %bb ], [ %tmp360, %bb145 ] + %tmp95 = phi float [ 0.000000e+00, %bb ], [ %tmp409, %bb145 ] + %tmp96 = phi float [ 0.000000e+00, %bb ], [ %tmp361, %bb145 ] + %tmp97 = phi float [ 0.000000e+00, %bb ], [ %tmp362, %bb145 ] + %tmp98 = phi float [ 0.000000e+00, %bb ], [ %tmp363, %bb145 ] + %tmp99 = phi float [ 0.000000e+00, %bb ], [ %tmp364, %bb145 ] + %tmp100 = phi float [ 0.000000e+00, %bb ], [ %tmp365, %bb145 ] + %tmp101 = phi float [ 0.000000e+00, %bb ], [ %tmp366, %bb145 ] + %tmp102 = phi float [ 0.000000e+00, %bb ], [ %tmp367, %bb145 ] + %tmp103 = phi float [ 0.000000e+00, %bb ], [ %tmp368, %bb145 ] + %tmp104 = phi float [ 0.000000e+00, %bb ], [ %tmp369, %bb145 ] + %tmp105 = phi float [ 0.000000e+00, %bb ], [ %tmp370, %bb145 ] + %tmp106 = phi float [ 0.000000e+00, %bb ], [ %tmp371, %bb145 ] + %tmp107 = phi float [ 0.000000e+00, %bb ], [ %tmp372, %bb145 ] + %tmp108 = phi float [ 0.000000e+00, %bb ], [ %tmp373, %bb145 ] + %tmp109 = phi float [ 0.000000e+00, %bb ], [ %tmp374, %bb145 ] + %tmp110 = phi float [ 0.000000e+00, %bb ], [ %tmp375, %bb145 ] + %tmp111 = phi float [ 0.000000e+00, %bb ], [ %tmp376, %bb145 ] + %tmp112 = phi float [ 0.000000e+00, %bb ], [ %tmp377, %bb145 ] + %tmp113 = phi float [ 0.000000e+00, %bb ], [ %tmp378, %bb145 ] + %tmp114 = phi float [ 0.000000e+00, %bb ], [ %tmp379, %bb145 ] + %tmp115 = phi float [ 0.000000e+00, %bb ], [ %tmp380, %bb145 ] + %tmp116 = phi float [ 0.000000e+00, %bb ], [ %tmp381, %bb145 ] + %tmp117 = phi float [ 0.000000e+00, %bb ], [ %tmp382, %bb145 ] + %tmp118 = phi float [ 0.000000e+00, %bb ], [ %tmp383, %bb145 ] + %tmp119 = phi float [ 0.000000e+00, %bb ], [ %tmp384, %bb145 ] + %tmp120 = phi float [ 0.000000e+00, %bb ], [ %tmp385, %bb145 ] + %tmp121 = phi float [ 0.000000e+00, %bb ], [ %tmp386, %bb145 ] + %tmp122 = phi float [ 0.000000e+00, %bb ], [ %tmp387, %bb145 ] + %tmp123 = phi float [ 0.000000e+00, %bb ], [ %tmp388, %bb145 ] + %tmp124 = phi float [ 0.000000e+00, %bb ], [ %tmp389, %bb145 ] + %tmp125 = phi float [ 0.000000e+00, %bb ], [ %tmp390, %bb145 ] + %tmp126 = phi float [ 0.000000e+00, %bb ], [ %tmp391, %bb145 ] + %tmp127 = phi float [ 0.000000e+00, %bb ], [ %tmp392, %bb145 ] + %tmp128 = phi float [ 0.000000e+00, %bb ], [ %tmp393, %bb145 ] + %tmp129 = phi float [ 0.000000e+00, %bb ], [ %tmp394, %bb145 ] + %tmp130 = phi float [ 0.000000e+00, %bb ], [ %tmp395, %bb145 ] + %tmp131 = phi float [ 0.000000e+00, %bb ], [ %tmp396, %bb145 ] + %tmp132 = phi float [ 0.000000e+00, %bb ], [ %tmp397, %bb145 ] + %tmp133 = phi float [ 0.000000e+00, %bb ], [ %tmp398, %bb145 ] + %tmp134 = phi float [ 0.000000e+00, %bb ], [ %tmp399, %bb145 ] + %tmp135 = phi float [ 0.000000e+00, %bb ], [ %tmp400, %bb145 ] + %tmp136 = phi float [ 0.000000e+00, %bb ], [ %tmp401, %bb145 ] + %tmp137 = phi float [ 0.000000e+00, %bb ], [ %tmp402, %bb145 ] + %tmp138 = phi float [ 0.000000e+00, %bb ], [ %tmp403, %bb145 ] + %tmp139 = phi float [ 0.000000e+00, %bb ], [ %tmp404, %bb145 ] + %tmp140 = phi float [ 0.000000e+00, %bb ], [ %tmp405, %bb145 ] + %tmp141 = phi float [ 0.000000e+00, %bb ], [ %tmp406, %bb145 ] + %tmp142 = bitcast float %tmp95 to i32 + %tmp143 = icmp sgt i32 %tmp142, 125 + br i1 %tmp143, label %bb144, label %bb145 + +bb144: ; preds = %bb12 + store volatile float %arg3, float addrspace(1)* %arg + store volatile float %tmp91, float addrspace(1)* %arg + store volatile float %tmp90, float addrspace(1)* %arg + store volatile float %tmp89, float addrspace(1)* %arg + store volatile float %tmp87, float addrspace(1)* %arg + store volatile float %tmp86, float addrspace(1)* %arg + store volatile float %tmp85, float addrspace(1)* %arg + store volatile float %tmp83, float addrspace(1)* %arg + store volatile float %tmp82, float addrspace(1)* %arg + store volatile float %tmp81, float addrspace(1)* %arg + store volatile float %tmp79, float addrspace(1)* %arg + store volatile float %tmp78, float addrspace(1)* %arg + store volatile float %tmp77, float addrspace(1)* %arg + store volatile float %tmp75, float addrspace(1)* %arg + store volatile float %tmp74, float addrspace(1)* %arg + store volatile float %tmp73, float addrspace(1)* %arg + store volatile float %tmp71, float addrspace(1)* %arg + store volatile float %tmp70, float addrspace(1)* %arg + store volatile float %tmp69, float addrspace(1)* %arg + store volatile float %tmp67, float addrspace(1)* %arg + store volatile float %tmp66, float addrspace(1)* %arg + store volatile float %tmp65, float addrspace(1)* %arg + store volatile float %tmp63, float addrspace(1)* %arg + store volatile float %tmp62, float addrspace(1)* %arg + store volatile float %tmp61, float addrspace(1)* %arg + store volatile float %tmp59, float addrspace(1)* %arg + store volatile float %tmp58, float addrspace(1)* %arg + store volatile float %tmp57, float addrspace(1)* %arg + store volatile float %tmp55, float addrspace(1)* %arg + store volatile float %tmp54, float addrspace(1)* %arg + store volatile float %tmp53, float addrspace(1)* %arg + store volatile float %tmp51, float addrspace(1)* %arg + store volatile float %tmp50, float addrspace(1)* %arg + store volatile float %tmp49, float addrspace(1)* %arg + store volatile float %tmp47, float addrspace(1)* %arg + store volatile float %tmp46, float addrspace(1)* %arg + store volatile float %tmp45, float addrspace(1)* %arg + store volatile float %tmp43, float addrspace(1)* %arg + store volatile float %tmp42, float addrspace(1)* %arg + store volatile float %tmp41, float addrspace(1)* %arg + store volatile float %tmp39, float addrspace(1)* %arg + store volatile float %tmp38, float addrspace(1)* %arg + store volatile float %tmp37, float addrspace(1)* %arg + store volatile float %tmp35, float addrspace(1)* %arg + store volatile float %tmp34, float addrspace(1)* %arg + store volatile float %tmp33, float addrspace(1)* %arg + store volatile float %tmp31, float addrspace(1)* %arg + store volatile float %tmp30, float addrspace(1)* %arg + store volatile float %tmp29, float addrspace(1)* %arg + store volatile float %tmp27, float addrspace(1)* %arg + store volatile float %tmp26, float addrspace(1)* %arg + store volatile float %tmp25, float addrspace(1)* %arg + store volatile float %tmp23, float addrspace(1)* %arg + store volatile float %tmp22, float addrspace(1)* %arg + store volatile float %tmp21, float addrspace(1)* %arg + store volatile float %tmp19, float addrspace(1)* %arg + store volatile float %tmp18, float addrspace(1)* %arg + store volatile float %tmp17, float addrspace(1)* %arg + store volatile float %tmp15, float addrspace(1)* %arg + store volatile float %tmp14, float addrspace(1)* %arg + store volatile float %tmp13, float addrspace(1)* %arg + store volatile float %tmp16, float addrspace(1)* %arg + store volatile float %tmp20, float addrspace(1)* %arg + store volatile float %tmp24, float addrspace(1)* %arg + store volatile float %tmp28, float addrspace(1)* %arg + store volatile float %tmp32, float addrspace(1)* %arg + store volatile float %tmp36, float addrspace(1)* %arg + store volatile float %tmp40, float addrspace(1)* %arg + store volatile float %tmp44, float addrspace(1)* %arg + store volatile float %tmp48, float addrspace(1)* %arg + store volatile float %tmp52, float addrspace(1)* %arg + store volatile float %tmp56, float addrspace(1)* %arg + store volatile float %tmp60, float addrspace(1)* %arg + store volatile float %tmp64, float addrspace(1)* %arg + store volatile float %tmp68, float addrspace(1)* %arg + store volatile float %tmp72, float addrspace(1)* %arg + store volatile float %tmp76, float addrspace(1)* %arg + store volatile float %tmp80, float addrspace(1)* %arg + store volatile float %tmp84, float addrspace(1)* %arg + store volatile float %tmp88, float addrspace(1)* %arg + store volatile float %tmp92, float addrspace(1)* %arg + store volatile float %tmp93, float addrspace(1)* %arg + store volatile float %tmp94, float addrspace(1)* %arg + store volatile float %tmp96, float addrspace(1)* %arg + store volatile float %tmp97, float addrspace(1)* %arg + store volatile float %tmp98, float addrspace(1)* %arg + store volatile float %tmp99, float addrspace(1)* %arg + store volatile float %tmp100, float addrspace(1)* %arg + store volatile float %tmp101, float addrspace(1)* %arg + store volatile float %tmp102, float addrspace(1)* %arg + store volatile float %tmp103, float addrspace(1)* %arg + store volatile float %tmp104, float addrspace(1)* %arg + store volatile float %tmp105, float addrspace(1)* %arg + store volatile float %tmp106, float addrspace(1)* %arg + store volatile float %tmp107, float addrspace(1)* %arg + store volatile float %tmp108, float addrspace(1)* %arg + store volatile float %tmp109, float addrspace(1)* %arg + store volatile float %tmp110, float addrspace(1)* %arg + store volatile float %tmp111, float addrspace(1)* %arg + store volatile float %tmp112, float addrspace(1)* %arg + store volatile float %tmp113, float addrspace(1)* %arg + store volatile float %tmp114, float addrspace(1)* %arg + store volatile float %tmp115, float addrspace(1)* %arg + store volatile float %tmp116, float addrspace(1)* %arg + store volatile float %tmp117, float addrspace(1)* %arg + store volatile float %tmp118, float addrspace(1)* %arg + store volatile float %tmp119, float addrspace(1)* %arg + store volatile float %tmp120, float addrspace(1)* %arg + store volatile float %tmp121, float addrspace(1)* %arg + store volatile float %tmp122, float addrspace(1)* %arg + store volatile float %tmp123, float addrspace(1)* %arg + store volatile float %tmp124, float addrspace(1)* %arg + store volatile float %tmp125, float addrspace(1)* %arg + store volatile float %tmp126, float addrspace(1)* %arg + store volatile float %tmp127, float addrspace(1)* %arg + store volatile float %tmp128, float addrspace(1)* %arg + store volatile float %tmp129, float addrspace(1)* %arg + store volatile float %tmp130, float addrspace(1)* %arg + store volatile float %tmp131, float addrspace(1)* %arg + store volatile float %tmp132, float addrspace(1)* %arg + store volatile float %tmp133, float addrspace(1)* %arg + store volatile float %tmp134, float addrspace(1)* %arg + store volatile float %tmp135, float addrspace(1)* %arg + store volatile float %tmp136, float addrspace(1)* %arg + store volatile float %tmp137, float addrspace(1)* %arg + store volatile float %tmp138, float addrspace(1)* %arg + store volatile float %tmp139, float addrspace(1)* %arg + store volatile float %arg4, float addrspace(1)* %arg + store volatile float %tmp7, float addrspace(1)* %arg + store volatile float %tmp8, float addrspace(1)* %arg + store volatile float %tmp9, float addrspace(1)* %arg + store volatile float %tmp10, float addrspace(1)* %arg + ret void + +bb145: ; preds = %bb12 + %tmp146 = bitcast float %tmp95 to i32 + %tmp147 = bitcast float %tmp95 to i32 + %tmp148 = add i32 %tmp11, %tmp147 + %tmp149 = bitcast i32 %tmp148 to float + %tmp150 = insertelement <128 x float> undef, float %tmp91, i32 0 + %tmp151 = insertelement <128 x float> %tmp150, float %tmp90, i32 1 + %tmp152 = insertelement <128 x float> %tmp151, float %tmp89, i32 2 + %tmp153 = insertelement <128 x float> %tmp152, float %tmp87, i32 3 + %tmp154 = insertelement <128 x float> %tmp153, float %tmp86, i32 4 + %tmp155 = insertelement <128 x float> %tmp154, float %tmp85, i32 5 + %tmp156 = insertelement <128 x float> %tmp155, float %tmp83, i32 6 + %tmp157 = insertelement <128 x float> %tmp156, float %tmp82, i32 7 + %tmp158 = insertelement <128 x float> %tmp157, float %tmp81, i32 8 + %tmp159 = insertelement <128 x float> %tmp158, float %tmp79, i32 9 + %tmp160 = insertelement <128 x float> %tmp159, float %tmp78, i32 10 + %tmp161 = insertelement <128 x float> %tmp160, float %tmp77, i32 11 + %tmp162 = insertelement <128 x float> %tmp161, float %tmp75, i32 12 + %tmp163 = insertelement <128 x float> %tmp162, float %tmp74, i32 13 + %tmp164 = insertelement <128 x float> %tmp163, float %tmp73, i32 14 + %tmp165 = insertelement <128 x float> %tmp164, float %tmp71, i32 15 + %tmp166 = insertelement <128 x float> %tmp165, float %tmp70, i32 16 + %tmp167 = insertelement <128 x float> %tmp166, float %tmp69, i32 17 + %tmp168 = insertelement <128 x float> %tmp167, float %tmp67, i32 18 + %tmp169 = insertelement <128 x float> %tmp168, float %tmp66, i32 19 + %tmp170 = insertelement <128 x float> %tmp169, float %tmp65, i32 20 + %tmp171 = insertelement <128 x float> %tmp170, float %tmp63, i32 21 + %tmp172 = insertelement <128 x float> %tmp171, float %tmp62, i32 22 + %tmp173 = insertelement <128 x float> %tmp172, float %tmp61, i32 23 + %tmp174 = insertelement <128 x float> %tmp173, float %tmp59, i32 24 + %tmp175 = insertelement <128 x float> %tmp174, float %tmp58, i32 25 + %tmp176 = insertelement <128 x float> %tmp175, float %tmp57, i32 26 + %tmp177 = insertelement <128 x float> %tmp176, float %tmp55, i32 27 + %tmp178 = insertelement <128 x float> %tmp177, float %tmp54, i32 28 + %tmp179 = insertelement <128 x float> %tmp178, float %tmp53, i32 29 + %tmp180 = insertelement <128 x float> %tmp179, float %tmp51, i32 30 + %tmp181 = insertelement <128 x float> %tmp180, float %tmp50, i32 31 + %tmp182 = insertelement <128 x float> %tmp181, float %tmp49, i32 32 + %tmp183 = insertelement <128 x float> %tmp182, float %tmp47, i32 33 + %tmp184 = insertelement <128 x float> %tmp183, float %tmp46, i32 34 + %tmp185 = insertelement <128 x float> %tmp184, float %tmp45, i32 35 + %tmp186 = insertelement <128 x float> %tmp185, float %tmp43, i32 36 + %tmp187 = insertelement <128 x float> %tmp186, float %tmp42, i32 37 + %tmp188 = insertelement <128 x float> %tmp187, float %tmp41, i32 38 + %tmp189 = insertelement <128 x float> %tmp188, float %tmp39, i32 39 + %tmp190 = insertelement <128 x float> %tmp189, float %tmp38, i32 40 + %tmp191 = insertelement <128 x float> %tmp190, float %tmp37, i32 41 + %tmp192 = insertelement <128 x float> %tmp191, float %tmp35, i32 42 + %tmp193 = insertelement <128 x float> %tmp192, float %tmp34, i32 43 + %tmp194 = insertelement <128 x float> %tmp193, float %tmp33, i32 44 + %tmp195 = insertelement <128 x float> %tmp194, float %tmp31, i32 45 + %tmp196 = insertelement <128 x float> %tmp195, float %tmp30, i32 46 + %tmp197 = insertelement <128 x float> %tmp196, float %tmp29, i32 47 + %tmp198 = insertelement <128 x float> %tmp197, float %tmp27, i32 48 + %tmp199 = insertelement <128 x float> %tmp198, float %tmp26, i32 49 + %tmp200 = insertelement <128 x float> %tmp199, float %tmp25, i32 50 + %tmp201 = insertelement <128 x float> %tmp200, float %tmp23, i32 51 + %tmp202 = insertelement <128 x float> %tmp201, float %tmp22, i32 52 + %tmp203 = insertelement <128 x float> %tmp202, float %tmp21, i32 53 + %tmp204 = insertelement <128 x float> %tmp203, float %tmp19, i32 54 + %tmp205 = insertelement <128 x float> %tmp204, float %tmp18, i32 55 + %tmp206 = insertelement <128 x float> %tmp205, float %tmp17, i32 56 + %tmp207 = insertelement <128 x float> %tmp206, float %tmp15, i32 57 + %tmp208 = insertelement <128 x float> %tmp207, float %tmp14, i32 58 + %tmp209 = insertelement <128 x float> %tmp208, float %tmp13, i32 59 + %tmp210 = insertelement <128 x float> %tmp209, float %tmp16, i32 60 + %tmp211 = insertelement <128 x float> %tmp210, float %tmp20, i32 61 + %tmp212 = insertelement <128 x float> %tmp211, float %tmp24, i32 62 + %tmp213 = insertelement <128 x float> %tmp212, float %tmp28, i32 63 + %tmp214 = insertelement <128 x float> %tmp213, float %tmp32, i32 64 + %tmp215 = insertelement <128 x float> %tmp214, float %tmp36, i32 65 + %tmp216 = insertelement <128 x float> %tmp215, float %tmp40, i32 66 + %tmp217 = insertelement <128 x float> %tmp216, float %tmp44, i32 67 + %tmp218 = insertelement <128 x float> %tmp217, float %tmp48, i32 68 + %tmp219 = insertelement <128 x float> %tmp218, float %tmp52, i32 69 + %tmp220 = insertelement <128 x float> %tmp219, float %tmp56, i32 70 + %tmp221 = insertelement <128 x float> %tmp220, float %tmp60, i32 71 + %tmp222 = insertelement <128 x float> %tmp221, float %tmp64, i32 72 + %tmp223 = insertelement <128 x float> %tmp222, float %tmp68, i32 73 + %tmp224 = insertelement <128 x float> %tmp223, float %tmp72, i32 74 + %tmp225 = insertelement <128 x float> %tmp224, float %tmp76, i32 75 + %tmp226 = insertelement <128 x float> %tmp225, float %tmp80, i32 76 + %tmp227 = insertelement <128 x float> %tmp226, float %tmp84, i32 77 + %tmp228 = insertelement <128 x float> %tmp227, float %tmp88, i32 78 + %tmp229 = insertelement <128 x float> %tmp228, float %tmp92, i32 79 + %tmp230 = insertelement <128 x float> %tmp229, float %tmp93, i32 80 + %tmp231 = insertelement <128 x float> %tmp230, float %tmp94, i32 81 + %tmp232 = insertelement <128 x float> %tmp231, float %tmp96, i32 82 + %tmp233 = insertelement <128 x float> %tmp232, float %tmp97, i32 83 + %tmp234 = insertelement <128 x float> %tmp233, float %tmp98, i32 84 + %tmp235 = insertelement <128 x float> %tmp234, float %tmp99, i32 85 + %tmp236 = insertelement <128 x float> %tmp235, float %tmp100, i32 86 + %tmp237 = insertelement <128 x float> %tmp236, float %tmp101, i32 87 + %tmp238 = insertelement <128 x float> %tmp237, float %tmp102, i32 88 + %tmp239 = insertelement <128 x float> %tmp238, float %tmp103, i32 89 + %tmp240 = insertelement <128 x float> %tmp239, float %tmp104, i32 90 + %tmp241 = insertelement <128 x float> %tmp240, float %tmp105, i32 91 + %tmp242 = insertelement <128 x float> %tmp241, float %tmp106, i32 92 + %tmp243 = insertelement <128 x float> %tmp242, float %tmp107, i32 93 + %tmp244 = insertelement <128 x float> %tmp243, float %tmp108, i32 94 + %tmp245 = insertelement <128 x float> %tmp244, float %tmp109, i32 95 + %tmp246 = insertelement <128 x float> %tmp245, float %tmp110, i32 96 + %tmp247 = insertelement <128 x float> %tmp246, float %tmp111, i32 97 + %tmp248 = insertelement <128 x float> %tmp247, float %tmp112, i32 98 + %tmp249 = insertelement <128 x float> %tmp248, float %tmp113, i32 99 + %tmp250 = insertelement <128 x float> %tmp249, float %tmp114, i32 100 + %tmp251 = insertelement <128 x float> %tmp250, float %tmp115, i32 101 + %tmp252 = insertelement <128 x float> %tmp251, float %tmp116, i32 102 + %tmp253 = insertelement <128 x float> %tmp252, float %tmp117, i32 103 + %tmp254 = insertelement <128 x float> %tmp253, float %tmp118, i32 104 + %tmp255 = insertelement <128 x float> %tmp254, float %tmp119, i32 105 + %tmp256 = insertelement <128 x float> %tmp255, float %tmp120, i32 106 + %tmp257 = insertelement <128 x float> %tmp256, float %tmp121, i32 107 + %tmp258 = insertelement <128 x float> %tmp257, float %tmp122, i32 108 + %tmp259 = insertelement <128 x float> %tmp258, float %tmp123, i32 109 + %tmp260 = insertelement <128 x float> %tmp259, float %tmp124, i32 110 + %tmp261 = insertelement <128 x float> %tmp260, float %tmp125, i32 111 + %tmp262 = insertelement <128 x float> %tmp261, float %tmp126, i32 112 + %tmp263 = insertelement <128 x float> %tmp262, float %tmp127, i32 113 + %tmp264 = insertelement <128 x float> %tmp263, float %tmp128, i32 114 + %tmp265 = insertelement <128 x float> %tmp264, float %tmp129, i32 115 + %tmp266 = insertelement <128 x float> %tmp265, float %tmp130, i32 116 + %tmp267 = insertelement <128 x float> %tmp266, float %tmp131, i32 117 + %tmp268 = insertelement <128 x float> %tmp267, float %tmp132, i32 118 + %tmp269 = insertelement <128 x float> %tmp268, float %tmp133, i32 119 + %tmp270 = insertelement <128 x float> %tmp269, float %tmp134, i32 120 + %tmp271 = insertelement <128 x float> %tmp270, float %tmp135, i32 121 + %tmp272 = insertelement <128 x float> %tmp271, float %tmp136, i32 122 + %tmp273 = insertelement <128 x float> %tmp272, float %tmp137, i32 123 + %tmp274 = insertelement <128 x float> %tmp273, float %tmp138, i32 124 + %tmp275 = insertelement <128 x float> %tmp274, float %tmp139, i32 125 + %tmp276 = insertelement <128 x float> %tmp275, float %tmp140, i32 126 + %tmp277 = insertelement <128 x float> %tmp276, float %tmp141, i32 127 + %tmp278 = insertelement <128 x float> %tmp277, float %tmp149, i32 %tmp146 + %tmp279 = extractelement <128 x float> %tmp278, i32 0 + %tmp280 = extractelement <128 x float> %tmp278, i32 1 + %tmp281 = extractelement <128 x float> %tmp278, i32 2 + %tmp282 = extractelement <128 x float> %tmp278, i32 3 + %tmp283 = extractelement <128 x float> %tmp278, i32 4 + %tmp284 = extractelement <128 x float> %tmp278, i32 5 + %tmp285 = extractelement <128 x float> %tmp278, i32 6 + %tmp286 = extractelement <128 x float> %tmp278, i32 7 + %tmp287 = extractelement <128 x float> %tmp278, i32 8 + %tmp288 = extractelement <128 x float> %tmp278, i32 9 + %tmp289 = extractelement <128 x float> %tmp278, i32 10 + %tmp290 = extractelement <128 x float> %tmp278, i32 11 + %tmp291 = extractelement <128 x float> %tmp278, i32 12 + %tmp292 = extractelement <128 x float> %tmp278, i32 13 + %tmp293 = extractelement <128 x float> %tmp278, i32 14 + %tmp294 = extractelement <128 x float> %tmp278, i32 15 + %tmp295 = extractelement <128 x float> %tmp278, i32 16 + %tmp296 = extractelement <128 x float> %tmp278, i32 17 + %tmp297 = extractelement <128 x float> %tmp278, i32 18 + %tmp298 = extractelement <128 x float> %tmp278, i32 19 + %tmp299 = extractelement <128 x float> %tmp278, i32 20 + %tmp300 = extractelement <128 x float> %tmp278, i32 21 + %tmp301 = extractelement <128 x float> %tmp278, i32 22 + %tmp302 = extractelement <128 x float> %tmp278, i32 23 + %tmp303 = extractelement <128 x float> %tmp278, i32 24 + %tmp304 = extractelement <128 x float> %tmp278, i32 25 + %tmp305 = extractelement <128 x float> %tmp278, i32 26 + %tmp306 = extractelement <128 x float> %tmp278, i32 27 + %tmp307 = extractelement <128 x float> %tmp278, i32 28 + %tmp308 = extractelement <128 x float> %tmp278, i32 29 + %tmp309 = extractelement <128 x float> %tmp278, i32 30 + %tmp310 = extractelement <128 x float> %tmp278, i32 31 + %tmp311 = extractelement <128 x float> %tmp278, i32 32 + %tmp312 = extractelement <128 x float> %tmp278, i32 33 + %tmp313 = extractelement <128 x float> %tmp278, i32 34 + %tmp314 = extractelement <128 x float> %tmp278, i32 35 + %tmp315 = extractelement <128 x float> %tmp278, i32 36 + %tmp316 = extractelement <128 x float> %tmp278, i32 37 + %tmp317 = extractelement <128 x float> %tmp278, i32 38 + %tmp318 = extractelement <128 x float> %tmp278, i32 39 + %tmp319 = extractelement <128 x float> %tmp278, i32 40 + %tmp320 = extractelement <128 x float> %tmp278, i32 41 + %tmp321 = extractelement <128 x float> %tmp278, i32 42 + %tmp322 = extractelement <128 x float> %tmp278, i32 43 + %tmp323 = extractelement <128 x float> %tmp278, i32 44 + %tmp324 = extractelement <128 x float> %tmp278, i32 45 + %tmp325 = extractelement <128 x float> %tmp278, i32 46 + %tmp326 = extractelement <128 x float> %tmp278, i32 47 + %tmp327 = extractelement <128 x float> %tmp278, i32 48 + %tmp328 = extractelement <128 x float> %tmp278, i32 49 + %tmp329 = extractelement <128 x float> %tmp278, i32 50 + %tmp330 = extractelement <128 x float> %tmp278, i32 51 + %tmp331 = extractelement <128 x float> %tmp278, i32 52 + %tmp332 = extractelement <128 x float> %tmp278, i32 53 + %tmp333 = extractelement <128 x float> %tmp278, i32 54 + %tmp334 = extractelement <128 x float> %tmp278, i32 55 + %tmp335 = extractelement <128 x float> %tmp278, i32 56 + %tmp336 = extractelement <128 x float> %tmp278, i32 57 + %tmp337 = extractelement <128 x float> %tmp278, i32 58 + %tmp338 = extractelement <128 x float> %tmp278, i32 59 + %tmp339 = extractelement <128 x float> %tmp278, i32 60 + %tmp340 = extractelement <128 x float> %tmp278, i32 61 + %tmp341 = extractelement <128 x float> %tmp278, i32 62 + %tmp342 = extractelement <128 x float> %tmp278, i32 63 + %tmp343 = extractelement <128 x float> %tmp278, i32 64 + %tmp344 = extractelement <128 x float> %tmp278, i32 65 + %tmp345 = extractelement <128 x float> %tmp278, i32 66 + %tmp346 = extractelement <128 x float> %tmp278, i32 67 + %tmp347 = extractelement <128 x float> %tmp278, i32 68 + %tmp348 = extractelement <128 x float> %tmp278, i32 69 + %tmp349 = extractelement <128 x float> %tmp278, i32 70 + %tmp350 = extractelement <128 x float> %tmp278, i32 71 + %tmp351 = extractelement <128 x float> %tmp278, i32 72 + %tmp352 = extractelement <128 x float> %tmp278, i32 73 + %tmp353 = extractelement <128 x float> %tmp278, i32 74 + %tmp354 = extractelement <128 x float> %tmp278, i32 75 + %tmp355 = extractelement <128 x float> %tmp278, i32 76 + %tmp356 = extractelement <128 x float> %tmp278, i32 77 + %tmp357 = extractelement <128 x float> %tmp278, i32 78 + %tmp358 = extractelement <128 x float> %tmp278, i32 79 + %tmp359 = extractelement <128 x float> %tmp278, i32 80 + %tmp360 = extractelement <128 x float> %tmp278, i32 81 + %tmp361 = extractelement <128 x float> %tmp278, i32 82 + %tmp362 = extractelement <128 x float> %tmp278, i32 83 + %tmp363 = extractelement <128 x float> %tmp278, i32 84 + %tmp364 = extractelement <128 x float> %tmp278, i32 85 + %tmp365 = extractelement <128 x float> %tmp278, i32 86 + %tmp366 = extractelement <128 x float> %tmp278, i32 87 + %tmp367 = extractelement <128 x float> %tmp278, i32 88 + %tmp368 = extractelement <128 x float> %tmp278, i32 89 + %tmp369 = extractelement <128 x float> %tmp278, i32 90 + %tmp370 = extractelement <128 x float> %tmp278, i32 91 + %tmp371 = extractelement <128 x float> %tmp278, i32 92 + %tmp372 = extractelement <128 x float> %tmp278, i32 93 + %tmp373 = extractelement <128 x float> %tmp278, i32 94 + %tmp374 = extractelement <128 x float> %tmp278, i32 95 + %tmp375 = extractelement <128 x float> %tmp278, i32 96 + %tmp376 = extractelement <128 x float> %tmp278, i32 97 + %tmp377 = extractelement <128 x float> %tmp278, i32 98 + %tmp378 = extractelement <128 x float> %tmp278, i32 99 + %tmp379 = extractelement <128 x float> %tmp278, i32 100 + %tmp380 = extractelement <128 x float> %tmp278, i32 101 + %tmp381 = extractelement <128 x float> %tmp278, i32 102 + %tmp382 = extractelement <128 x float> %tmp278, i32 103 + %tmp383 = extractelement <128 x float> %tmp278, i32 104 + %tmp384 = extractelement <128 x float> %tmp278, i32 105 + %tmp385 = extractelement <128 x float> %tmp278, i32 106 + %tmp386 = extractelement <128 x float> %tmp278, i32 107 + %tmp387 = extractelement <128 x float> %tmp278, i32 108 + %tmp388 = extractelement <128 x float> %tmp278, i32 109 + %tmp389 = extractelement <128 x float> %tmp278, i32 110 + %tmp390 = extractelement <128 x float> %tmp278, i32 111 + %tmp391 = extractelement <128 x float> %tmp278, i32 112 + %tmp392 = extractelement <128 x float> %tmp278, i32 113 + %tmp393 = extractelement <128 x float> %tmp278, i32 114 + %tmp394 = extractelement <128 x float> %tmp278, i32 115 + %tmp395 = extractelement <128 x float> %tmp278, i32 116 + %tmp396 = extractelement <128 x float> %tmp278, i32 117 + %tmp397 = extractelement <128 x float> %tmp278, i32 118 + %tmp398 = extractelement <128 x float> %tmp278, i32 119 + %tmp399 = extractelement <128 x float> %tmp278, i32 120 + %tmp400 = extractelement <128 x float> %tmp278, i32 121 + %tmp401 = extractelement <128 x float> %tmp278, i32 122 + %tmp402 = extractelement <128 x float> %tmp278, i32 123 + %tmp403 = extractelement <128 x float> %tmp278, i32 124 + %tmp404 = extractelement <128 x float> %tmp278, i32 125 + %tmp405 = extractelement <128 x float> %tmp278, i32 126 + %tmp406 = extractelement <128 x float> %tmp278, i32 127 + %tmp407 = bitcast float %tmp95 to i32 + %tmp408 = add i32 %tmp407, 1 + %tmp409 = bitcast i32 %tmp408 to float + br label %bb12 +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll new file mode 100644 index 000000000000..16abb89bb0b8 --- /dev/null +++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll @@ -0,0 +1,494 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +; This ends up using all 255 registers and requires register +; scavenging which will fail to find an unsued register. + +; Check the ScratchSize to avoid regressions from spilling +; intermediate register class copies. + +; FIXME: The same register is initialized to 0 for every spill. + +; GCN-LABEL: {{^}}main: + +; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s14, -1 +; SI-NEXT: s_mov_b32 s15, 0x80f000 +; VI-NEXT: s_mov_b32 s15, 0x800000 + +; s12 is offset user SGPR +; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s11 offset:{{[0-9]+}} ; 4-byte Folded Spill + +; GCN: NumVgprs: 256 +; GCN: ScratchSize: 1024 + +define void @main([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 { +bb: + %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i64 0, i64 0 + %tmp11 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, align 16, !tbaa !0 + %tmp12 = call float @llvm.SI.load.const(<16 x i8> %tmp11, i32 0) + %tmp13 = call float @llvm.SI.load.const(<16 x i8> %tmp11, i32 16) + %tmp14 = call float @llvm.SI.load.const(<16 x i8> %tmp11, i32 32) + %tmp15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 0 + %tmp16 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp15, align 16, !tbaa !0 + %tmp17 = add i32 %arg5, %arg7 + %tmp18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp16, i32 0, i32 %tmp17) + %tmp19 = extractelement <4 x float> %tmp18, i32 0 + %tmp20 = extractelement <4 x float> %tmp18, i32 1 + %tmp21 = extractelement <4 x float> %tmp18, i32 2 + %tmp22 = extractelement <4 x float> %tmp18, i32 3 + %tmp23 = bitcast float %tmp14 to i32 + br label %bb24 + +bb24: ; preds = %bb157, %bb + %tmp25 = phi float [ 0.000000e+00, %bb ], [ %tmp350, %bb157 ] + %tmp26 = phi float [ 0.000000e+00, %bb ], [ %tmp349, %bb157 ] + %tmp27 = phi float [ 0.000000e+00, %bb ], [ %tmp348, %bb157 ] + %tmp28 = phi float [ 0.000000e+00, %bb ], [ %tmp351, %bb157 ] + %tmp29 = phi float [ 0.000000e+00, %bb ], [ %tmp347, %bb157 ] + %tmp30 = phi float [ 0.000000e+00, %bb ], [ %tmp346, %bb157 ] + %tmp31 = phi float [ 0.000000e+00, %bb ], [ %tmp345, %bb157 ] + %tmp32 = phi float [ 0.000000e+00, %bb ], [ %tmp352, %bb157 ] + %tmp33 = phi float [ 0.000000e+00, %bb ], [ %tmp344, %bb157 ] + %tmp34 = phi float [ 0.000000e+00, %bb ], [ %tmp343, %bb157 ] + %tmp35 = phi float [ 0.000000e+00, %bb ], [ %tmp342, %bb157 ] + %tmp36 = phi float [ 0.000000e+00, %bb ], [ %tmp353, %bb157 ] + %tmp37 = phi float [ 0.000000e+00, %bb ], [ %tmp341, %bb157 ] + %tmp38 = phi float [ 0.000000e+00, %bb ], [ %tmp340, %bb157 ] + %tmp39 = phi float [ 0.000000e+00, %bb ], [ %tmp339, %bb157 ] + %tmp40 = phi float [ 0.000000e+00, %bb ], [ %tmp354, %bb157 ] + %tmp41 = phi float [ 0.000000e+00, %bb ], [ %tmp338, %bb157 ] + %tmp42 = phi float [ 0.000000e+00, %bb ], [ %tmp337, %bb157 ] + %tmp43 = phi float [ 0.000000e+00, %bb ], [ %tmp336, %bb157 ] + %tmp44 = phi float [ 0.000000e+00, %bb ], [ %tmp355, %bb157 ] + %tmp45 = phi float [ 0.000000e+00, %bb ], [ %tmp335, %bb157 ] + %tmp46 = phi float [ 0.000000e+00, %bb ], [ %tmp334, %bb157 ] + %tmp47 = phi float [ 0.000000e+00, %bb ], [ %tmp333, %bb157 ] + %tmp48 = phi float [ 0.000000e+00, %bb ], [ %tmp356, %bb157 ] + %tmp49 = phi float [ 0.000000e+00, %bb ], [ %tmp332, %bb157 ] + %tmp50 = phi float [ 0.000000e+00, %bb ], [ %tmp331, %bb157 ] + %tmp51 = phi float [ 0.000000e+00, %bb ], [ %tmp330, %bb157 ] + %tmp52 = phi float [ 0.000000e+00, %bb ], [ %tmp357, %bb157 ] + %tmp53 = phi float [ 0.000000e+00, %bb ], [ %tmp329, %bb157 ] + %tmp54 = phi float [ 0.000000e+00, %bb ], [ %tmp328, %bb157 ] + %tmp55 = phi float [ 0.000000e+00, %bb ], [ %tmp327, %bb157 ] + %tmp56 = phi float [ 0.000000e+00, %bb ], [ %tmp358, %bb157 ] + %tmp57 = phi float [ 0.000000e+00, %bb ], [ %tmp326, %bb157 ] + %tmp58 = phi float [ 0.000000e+00, %bb ], [ %tmp325, %bb157 ] + %tmp59 = phi float [ 0.000000e+00, %bb ], [ %tmp324, %bb157 ] + %tmp60 = phi float [ 0.000000e+00, %bb ], [ %tmp359, %bb157 ] + %tmp61 = phi float [ 0.000000e+00, %bb ], [ %tmp323, %bb157 ] + %tmp62 = phi float [ 0.000000e+00, %bb ], [ %tmp322, %bb157 ] + %tmp63 = phi float [ 0.000000e+00, %bb ], [ %tmp321, %bb157 ] + %tmp64 = phi float [ 0.000000e+00, %bb ], [ %tmp360, %bb157 ] + %tmp65 = phi float [ 0.000000e+00, %bb ], [ %tmp320, %bb157 ] + %tmp66 = phi float [ 0.000000e+00, %bb ], [ %tmp319, %bb157 ] + %tmp67 = phi float [ 0.000000e+00, %bb ], [ %tmp318, %bb157 ] + %tmp68 = phi float [ 0.000000e+00, %bb ], [ %tmp361, %bb157 ] + %tmp69 = phi float [ 0.000000e+00, %bb ], [ %tmp317, %bb157 ] + %tmp70 = phi float [ 0.000000e+00, %bb ], [ %tmp316, %bb157 ] + %tmp71 = phi float [ 0.000000e+00, %bb ], [ %tmp315, %bb157 ] + %tmp72 = phi float [ 0.000000e+00, %bb ], [ %tmp362, %bb157 ] + %tmp73 = phi float [ 0.000000e+00, %bb ], [ %tmp314, %bb157 ] + %tmp74 = phi float [ 0.000000e+00, %bb ], [ %tmp313, %bb157 ] + %tmp75 = phi float [ 0.000000e+00, %bb ], [ %tmp312, %bb157 ] + %tmp76 = phi float [ 0.000000e+00, %bb ], [ %tmp363, %bb157 ] + %tmp77 = phi float [ 0.000000e+00, %bb ], [ %tmp311, %bb157 ] + %tmp78 = phi float [ 0.000000e+00, %bb ], [ %tmp310, %bb157 ] + %tmp79 = phi float [ 0.000000e+00, %bb ], [ %tmp309, %bb157 ] + %tmp80 = phi float [ 0.000000e+00, %bb ], [ %tmp364, %bb157 ] + %tmp81 = phi float [ 0.000000e+00, %bb ], [ %tmp308, %bb157 ] + %tmp82 = phi float [ 0.000000e+00, %bb ], [ %tmp307, %bb157 ] + %tmp83 = phi float [ 0.000000e+00, %bb ], [ %tmp306, %bb157 ] + %tmp84 = phi float [ 0.000000e+00, %bb ], [ %tmp365, %bb157 ] + %tmp85 = phi float [ 0.000000e+00, %bb ], [ %tmp305, %bb157 ] + %tmp86 = phi float [ 0.000000e+00, %bb ], [ %tmp304, %bb157 ] + %tmp87 = phi float [ 0.000000e+00, %bb ], [ %tmp303, %bb157 ] + %tmp88 = phi float [ 0.000000e+00, %bb ], [ %tmp366, %bb157 ] + %tmp89 = phi float [ 0.000000e+00, %bb ], [ %tmp302, %bb157 ] + %tmp90 = phi float [ 0.000000e+00, %bb ], [ %tmp301, %bb157 ] + %tmp91 = phi float [ 0.000000e+00, %bb ], [ %tmp300, %bb157 ] + %tmp92 = phi float [ 0.000000e+00, %bb ], [ %tmp367, %bb157 ] + %tmp93 = phi float [ 0.000000e+00, %bb ], [ %tmp299, %bb157 ] + %tmp94 = phi float [ 0.000000e+00, %bb ], [ %tmp298, %bb157 ] + %tmp95 = phi float [ 0.000000e+00, %bb ], [ %tmp297, %bb157 ] + %tmp96 = phi float [ 0.000000e+00, %bb ], [ %tmp368, %bb157 ] + %tmp97 = phi float [ 0.000000e+00, %bb ], [ %tmp296, %bb157 ] + %tmp98 = phi float [ 0.000000e+00, %bb ], [ %tmp295, %bb157 ] + %tmp99 = phi float [ 0.000000e+00, %bb ], [ %tmp294, %bb157 ] + %tmp100 = phi float [ 0.000000e+00, %bb ], [ %tmp369, %bb157 ] + %tmp101 = phi float [ 0.000000e+00, %bb ], [ %tmp293, %bb157 ] + %tmp102 = phi float [ 0.000000e+00, %bb ], [ %tmp292, %bb157 ] + %tmp103 = phi float [ 0.000000e+00, %bb ], [ %tmp291, %bb157 ] + %tmp104 = phi float [ 0.000000e+00, %bb ], [ %tmp370, %bb157 ] + %tmp105 = phi float [ 0.000000e+00, %bb ], [ %tmp371, %bb157 ] + %tmp106 = phi float [ 0.000000e+00, %bb ], [ %tmp372, %bb157 ] + %tmp107 = phi float [ 0.000000e+00, %bb ], [ %tmp421, %bb157 ] + %tmp108 = phi float [ 0.000000e+00, %bb ], [ %tmp373, %bb157 ] + %tmp109 = phi float [ 0.000000e+00, %bb ], [ %tmp374, %bb157 ] + %tmp110 = phi float [ 0.000000e+00, %bb ], [ %tmp375, %bb157 ] + %tmp111 = phi float [ 0.000000e+00, %bb ], [ %tmp376, %bb157 ] + %tmp112 = phi float [ 0.000000e+00, %bb ], [ %tmp377, %bb157 ] + %tmp113 = phi float [ 0.000000e+00, %bb ], [ %tmp378, %bb157 ] + %tmp114 = phi float [ 0.000000e+00, %bb ], [ %tmp379, %bb157 ] + %tmp115 = phi float [ 0.000000e+00, %bb ], [ %tmp380, %bb157 ] + %tmp116 = phi float [ 0.000000e+00, %bb ], [ %tmp381, %bb157 ] + %tmp117 = phi float [ 0.000000e+00, %bb ], [ %tmp382, %bb157 ] + %tmp118 = phi float [ 0.000000e+00, %bb ], [ %tmp383, %bb157 ] + %tmp119 = phi float [ 0.000000e+00, %bb ], [ %tmp384, %bb157 ] + %tmp120 = phi float [ 0.000000e+00, %bb ], [ %tmp385, %bb157 ] + %tmp121 = phi float [ 0.000000e+00, %bb ], [ %tmp386, %bb157 ] + %tmp122 = phi float [ 0.000000e+00, %bb ], [ %tmp387, %bb157 ] + %tmp123 = phi float [ 0.000000e+00, %bb ], [ %tmp388, %bb157 ] + %tmp124 = phi float [ 0.000000e+00, %bb ], [ %tmp389, %bb157 ] + %tmp125 = phi float [ 0.000000e+00, %bb ], [ %tmp390, %bb157 ] + %tmp126 = phi float [ 0.000000e+00, %bb ], [ %tmp391, %bb157 ] + %tmp127 = phi float [ 0.000000e+00, %bb ], [ %tmp392, %bb157 ] + %tmp128 = phi float [ 0.000000e+00, %bb ], [ %tmp393, %bb157 ] + %tmp129 = phi float [ 0.000000e+00, %bb ], [ %tmp394, %bb157 ] + %tmp130 = phi float [ 0.000000e+00, %bb ], [ %tmp395, %bb157 ] + %tmp131 = phi float [ 0.000000e+00, %bb ], [ %tmp396, %bb157 ] + %tmp132 = phi float [ 0.000000e+00, %bb ], [ %tmp397, %bb157 ] + %tmp133 = phi float [ 0.000000e+00, %bb ], [ %tmp398, %bb157 ] + %tmp134 = phi float [ 0.000000e+00, %bb ], [ %tmp399, %bb157 ] + %tmp135 = phi float [ 0.000000e+00, %bb ], [ %tmp400, %bb157 ] + %tmp136 = phi float [ 0.000000e+00, %bb ], [ %tmp401, %bb157 ] + %tmp137 = phi float [ 0.000000e+00, %bb ], [ %tmp402, %bb157 ] + %tmp138 = phi float [ 0.000000e+00, %bb ], [ %tmp403, %bb157 ] + %tmp139 = phi float [ 0.000000e+00, %bb ], [ %tmp404, %bb157 ] + %tmp140 = phi float [ 0.000000e+00, %bb ], [ %tmp405, %bb157 ] + %tmp141 = phi float [ 0.000000e+00, %bb ], [ %tmp406, %bb157 ] + %tmp142 = phi float [ 0.000000e+00, %bb ], [ %tmp407, %bb157 ] + %tmp143 = phi float [ 0.000000e+00, %bb ], [ %tmp408, %bb157 ] + %tmp144 = phi float [ 0.000000e+00, %bb ], [ %tmp409, %bb157 ] + %tmp145 = phi float [ 0.000000e+00, %bb ], [ %tmp410, %bb157 ] + %tmp146 = phi float [ 0.000000e+00, %bb ], [ %tmp411, %bb157 ] + %tmp147 = phi float [ 0.000000e+00, %bb ], [ %tmp412, %bb157 ] + %tmp148 = phi float [ 0.000000e+00, %bb ], [ %tmp413, %bb157 ] + %tmp149 = phi float [ 0.000000e+00, %bb ], [ %tmp414, %bb157 ] + %tmp150 = phi float [ 0.000000e+00, %bb ], [ %tmp415, %bb157 ] + %tmp151 = phi float [ 0.000000e+00, %bb ], [ %tmp416, %bb157 ] + %tmp152 = phi float [ 0.000000e+00, %bb ], [ %tmp417, %bb157 ] + %tmp153 = phi float [ 0.000000e+00, %bb ], [ %tmp418, %bb157 ] + %tmp154 = bitcast float %tmp107 to i32 + %tmp155 = icmp sgt i32 %tmp154, 125 + br i1 %tmp155, label %bb156, label %bb157 + +bb156: ; preds = %bb24 + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %tmp12, float %tmp103, float %tmp102, float %tmp101) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %tmp99, float %tmp98, float %tmp97, float %tmp95) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %tmp94, float %tmp93, float %tmp91, float %tmp90) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %tmp89, float %tmp87, float %tmp86, float %tmp85) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %tmp83, float %tmp82, float %tmp81, float %tmp79) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %tmp78, float %tmp77, float %tmp75, float %tmp74) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %tmp73, float %tmp71, float %tmp70, float %tmp69) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %tmp67, float %tmp66, float %tmp65, float %tmp63) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 40, i32 0, float %tmp62, float %tmp61, float %tmp59, float %tmp58) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 41, i32 0, float %tmp57, float %tmp55, float %tmp54, float %tmp53) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 42, i32 0, float %tmp51, float %tmp50, float %tmp49, float %tmp47) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 43, i32 0, float %tmp46, float %tmp45, float %tmp43, float %tmp42) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 44, i32 0, float %tmp41, float %tmp39, float %tmp38, float %tmp37) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 45, i32 0, float %tmp35, float %tmp34, float %tmp33, float %tmp31) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 46, i32 0, float %tmp30, float %tmp29, float %tmp27, float %tmp26) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 47, i32 0, float %tmp25, float %tmp28, float %tmp32, float %tmp36) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 48, i32 0, float %tmp40, float %tmp44, float %tmp48, float %tmp52) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 49, i32 0, float %tmp56, float %tmp60, float %tmp64, float %tmp68) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 50, i32 0, float %tmp72, float %tmp76, float %tmp80, float %tmp84) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 51, i32 0, float %tmp88, float %tmp92, float %tmp96, float %tmp100) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 52, i32 0, float %tmp104, float %tmp105, float %tmp106, float %tmp108) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 53, i32 0, float %tmp109, float %tmp110, float %tmp111, float %tmp112) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 54, i32 0, float %tmp113, float %tmp114, float %tmp115, float %tmp116) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 55, i32 0, float %tmp117, float %tmp118, float %tmp119, float %tmp120) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 56, i32 0, float %tmp121, float %tmp122, float %tmp123, float %tmp124) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 57, i32 0, float %tmp125, float %tmp126, float %tmp127, float %tmp128) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 58, i32 0, float %tmp129, float %tmp130, float %tmp131, float %tmp132) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 59, i32 0, float %tmp133, float %tmp134, float %tmp135, float %tmp136) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 60, i32 0, float %tmp137, float %tmp138, float %tmp139, float %tmp140) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 61, i32 0, float %tmp141, float %tmp142, float %tmp143, float %tmp144) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 62, i32 0, float %tmp145, float %tmp146, float %tmp147, float %tmp148) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 63, i32 0, float %tmp149, float %tmp150, float %tmp151, float %tmp13) + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp19, float %tmp20, float %tmp21, float %tmp22) + ret void + +bb157: ; preds = %bb24 + %tmp158 = bitcast float %tmp107 to i32 + %tmp159 = bitcast float %tmp107 to i32 + %tmp160 = add i32 %tmp23, %tmp159 + %tmp161 = bitcast i32 %tmp160 to float + %tmp162 = insertelement <128 x float> undef, float %tmp103, i32 0 + %tmp163 = insertelement <128 x float> %tmp162, float %tmp102, i32 1 + %tmp164 = insertelement <128 x float> %tmp163, float %tmp101, i32 2 + %tmp165 = insertelement <128 x float> %tmp164, float %tmp99, i32 3 + %tmp166 = insertelement <128 x float> %tmp165, float %tmp98, i32 4 + %tmp167 = insertelement <128 x float> %tmp166, float %tmp97, i32 5 + %tmp168 = insertelement <128 x float> %tmp167, float %tmp95, i32 6 + %tmp169 = insertelement <128 x float> %tmp168, float %tmp94, i32 7 + %tmp170 = insertelement <128 x float> %tmp169, float %tmp93, i32 8 + %tmp171 = insertelement <128 x float> %tmp170, float %tmp91, i32 9 + %tmp172 = insertelement <128 x float> %tmp171, float %tmp90, i32 10 + %tmp173 = insertelement <128 x float> %tmp172, float %tmp89, i32 11 + %tmp174 = insertelement <128 x float> %tmp173, float %tmp87, i32 12 + %tmp175 = insertelement <128 x float> %tmp174, float %tmp86, i32 13 + %tmp176 = insertelement <128 x float> %tmp175, float %tmp85, i32 14 + %tmp177 = insertelement <128 x float> %tmp176, float %tmp83, i32 15 + %tmp178 = insertelement <128 x float> %tmp177, float %tmp82, i32 16 + %tmp179 = insertelement <128 x float> %tmp178, float %tmp81, i32 17 + %tmp180 = insertelement <128 x float> %tmp179, float %tmp79, i32 18 + %tmp181 = insertelement <128 x float> %tmp180, float %tmp78, i32 19 + %tmp182 = insertelement <128 x float> %tmp181, float %tmp77, i32 20 + %tmp183 = insertelement <128 x float> %tmp182, float %tmp75, i32 21 + %tmp184 = insertelement <128 x float> %tmp183, float %tmp74, i32 22 + %tmp185 = insertelement <128 x float> %tmp184, float %tmp73, i32 23 + %tmp186 = insertelement <128 x float> %tmp185, float %tmp71, i32 24 + %tmp187 = insertelement <128 x float> %tmp186, float %tmp70, i32 25 + %tmp188 = insertelement <128 x float> %tmp187, float %tmp69, i32 26 + %tmp189 = insertelement <128 x float> %tmp188, float %tmp67, i32 27 + %tmp190 = insertelement <128 x float> %tmp189, float %tmp66, i32 28 + %tmp191 = insertelement <128 x float> %tmp190, float %tmp65, i32 29 + %tmp192 = insertelement <128 x float> %tmp191, float %tmp63, i32 30 + %tmp193 = insertelement <128 x float> %tmp192, float %tmp62, i32 31 + %tmp194 = insertelement <128 x float> %tmp193, float %tmp61, i32 32 + %tmp195 = insertelement <128 x float> %tmp194, float %tmp59, i32 33 + %tmp196 = insertelement <128 x float> %tmp195, float %tmp58, i32 34 + %tmp197 = insertelement <128 x float> %tmp196, float %tmp57, i32 35 + %tmp198 = insertelement <128 x float> %tmp197, float %tmp55, i32 36 + %tmp199 = insertelement <128 x float> %tmp198, float %tmp54, i32 37 + %tmp200 = insertelement <128 x float> %tmp199, float %tmp53, i32 38 + %tmp201 = insertelement <128 x float> %tmp200, float %tmp51, i32 39 + %tmp202 = insertelement <128 x float> %tmp201, float %tmp50, i32 40 + %tmp203 = insertelement <128 x float> %tmp202, float %tmp49, i32 41 + %tmp204 = insertelement <128 x float> %tmp203, float %tmp47, i32 42 + %tmp205 = insertelement <128 x float> %tmp204, float %tmp46, i32 43 + %tmp206 = insertelement <128 x float> %tmp205, float %tmp45, i32 44 + %tmp207 = insertelement <128 x float> %tmp206, float %tmp43, i32 45 + %tmp208 = insertelement <128 x float> %tmp207, float %tmp42, i32 46 + %tmp209 = insertelement <128 x float> %tmp208, float %tmp41, i32 47 + %tmp210 = insertelement <128 x float> %tmp209, float %tmp39, i32 48 + %tmp211 = insertelement <128 x float> %tmp210, float %tmp38, i32 49 + %tmp212 = insertelement <128 x float> %tmp211, float %tmp37, i32 50 + %tmp213 = insertelement <128 x float> %tmp212, float %tmp35, i32 51 + %tmp214 = insertelement <128 x float> %tmp213, float %tmp34, i32 52 + %tmp215 = insertelement <128 x float> %tmp214, float %tmp33, i32 53 + %tmp216 = insertelement <128 x float> %tmp215, float %tmp31, i32 54 + %tmp217 = insertelement <128 x float> %tmp216, float %tmp30, i32 55 + %tmp218 = insertelement <128 x float> %tmp217, float %tmp29, i32 56 + %tmp219 = insertelement <128 x float> %tmp218, float %tmp27, i32 57 + %tmp220 = insertelement <128 x float> %tmp219, float %tmp26, i32 58 + %tmp221 = insertelement <128 x float> %tmp220, float %tmp25, i32 59 + %tmp222 = insertelement <128 x float> %tmp221, float %tmp28, i32 60 + %tmp223 = insertelement <128 x float> %tmp222, float %tmp32, i32 61 + %tmp224 = insertelement <128 x float> %tmp223, float %tmp36, i32 62 + %tmp225 = insertelement <128 x float> %tmp224, float %tmp40, i32 63 + %tmp226 = insertelement <128 x float> %tmp225, float %tmp44, i32 64 + %tmp227 = insertelement <128 x float> %tmp226, float %tmp48, i32 65 + %tmp228 = insertelement <128 x float> %tmp227, float %tmp52, i32 66 + %tmp229 = insertelement <128 x float> %tmp228, float %tmp56, i32 67 + %tmp230 = insertelement <128 x float> %tmp229, float %tmp60, i32 68 + %tmp231 = insertelement <128 x float> %tmp230, float %tmp64, i32 69 + %tmp232 = insertelement <128 x float> %tmp231, float %tmp68, i32 70 + %tmp233 = insertelement <128 x float> %tmp232, float %tmp72, i32 71 + %tmp234 = insertelement <128 x float> %tmp233, float %tmp76, i32 72 + %tmp235 = insertelement <128 x float> %tmp234, float %tmp80, i32 73 + %tmp236 = insertelement <128 x float> %tmp235, float %tmp84, i32 74 + %tmp237 = insertelement <128 x float> %tmp236, float %tmp88, i32 75 + %tmp238 = insertelement <128 x float> %tmp237, float %tmp92, i32 76 + %tmp239 = insertelement <128 x float> %tmp238, float %tmp96, i32 77 + %tmp240 = insertelement <128 x float> %tmp239, float %tmp100, i32 78 + %tmp241 = insertelement <128 x float> %tmp240, float %tmp104, i32 79 + %tmp242 = insertelement <128 x float> %tmp241, float %tmp105, i32 80 + %tmp243 = insertelement <128 x float> %tmp242, float %tmp106, i32 81 + %tmp244 = insertelement <128 x float> %tmp243, float %tmp108, i32 82 + %tmp245 = insertelement <128 x float> %tmp244, float %tmp109, i32 83 + %tmp246 = insertelement <128 x float> %tmp245, float %tmp110, i32 84 + %tmp247 = insertelement <128 x float> %tmp246, float %tmp111, i32 85 + %tmp248 = insertelement <128 x float> %tmp247, float %tmp112, i32 86 + %tmp249 = insertelement <128 x float> %tmp248, float %tmp113, i32 87 + %tmp250 = insertelement <128 x float> %tmp249, float %tmp114, i32 88 + %tmp251 = insertelement <128 x float> %tmp250, float %tmp115, i32 89 + %tmp252 = insertelement <128 x float> %tmp251, float %tmp116, i32 90 + %tmp253 = insertelement <128 x float> %tmp252, float %tmp117, i32 91 + %tmp254 = insertelement <128 x float> %tmp253, float %tmp118, i32 92 + %tmp255 = insertelement <128 x float> %tmp254, float %tmp119, i32 93 + %tmp256 = insertelement <128 x float> %tmp255, float %tmp120, i32 94 + %tmp257 = insertelement <128 x float> %tmp256, float %tmp121, i32 95 + %tmp258 = insertelement <128 x float> %tmp257, float %tmp122, i32 96 + %tmp259 = insertelement <128 x float> %tmp258, float %tmp123, i32 97 + %tmp260 = insertelement <128 x float> %tmp259, float %tmp124, i32 98 + %tmp261 = insertelement <128 x float> %tmp260, float %tmp125, i32 99 + %tmp262 = insertelement <128 x float> %tmp261, float %tmp126, i32 100 + %tmp263 = insertelement <128 x float> %tmp262, float %tmp127, i32 101 + %tmp264 = insertelement <128 x float> %tmp263, float %tmp128, i32 102 + %tmp265 = insertelement <128 x float> %tmp264, float %tmp129, i32 103 + %tmp266 = insertelement <128 x float> %tmp265, float %tmp130, i32 104 + %tmp267 = insertelement <128 x float> %tmp266, float %tmp131, i32 105 + %tmp268 = insertelement <128 x float> %tmp267, float %tmp132, i32 106 + %tmp269 = insertelement <128 x float> %tmp268, float %tmp133, i32 107 + %tmp270 = insertelement <128 x float> %tmp269, float %tmp134, i32 108 + %tmp271 = insertelement <128 x float> %tmp270, float %tmp135, i32 109 + %tmp272 = insertelement <128 x float> %tmp271, float %tmp136, i32 110 + %tmp273 = insertelement <128 x float> %tmp272, float %tmp137, i32 111 + %tmp274 = insertelement <128 x float> %tmp273, float %tmp138, i32 112 + %tmp275 = insertelement <128 x float> %tmp274, float %tmp139, i32 113 + %tmp276 = insertelement <128 x float> %tmp275, float %tmp140, i32 114 + %tmp277 = insertelement <128 x float> %tmp276, float %tmp141, i32 115 + %tmp278 = insertelement <128 x float> %tmp277, float %tmp142, i32 116 + %tmp279 = insertelement <128 x float> %tmp278, float %tmp143, i32 117 + %tmp280 = insertelement <128 x float> %tmp279, float %tmp144, i32 118 + %tmp281 = insertelement <128 x float> %tmp280, float %tmp145, i32 119 + %tmp282 = insertelement <128 x float> %tmp281, float %tmp146, i32 120 + %tmp283 = insertelement <128 x float> %tmp282, float %tmp147, i32 121 + %tmp284 = insertelement <128 x float> %tmp283, float %tmp148, i32 122 + %tmp285 = insertelement <128 x float> %tmp284, float %tmp149, i32 123 + %tmp286 = insertelement <128 x float> %tmp285, float %tmp150, i32 124 + %tmp287 = insertelement <128 x float> %tmp286, float %tmp151, i32 125 + %tmp288 = insertelement <128 x float> %tmp287, float %tmp152, i32 126 + %tmp289 = insertelement <128 x float> %tmp288, float %tmp153, i32 127 + %tmp290 = insertelement <128 x float> %tmp289, float %tmp161, i32 %tmp158 + %tmp291 = extractelement <128 x float> %tmp290, i32 0 + %tmp292 = extractelement <128 x float> %tmp290, i32 1 + %tmp293 = extractelement <128 x float> %tmp290, i32 2 + %tmp294 = extractelement <128 x float> %tmp290, i32 3 + %tmp295 = extractelement <128 x float> %tmp290, i32 4 + %tmp296 = extractelement <128 x float> %tmp290, i32 5 + %tmp297 = extractelement <128 x float> %tmp290, i32 6 + %tmp298 = extractelement <128 x float> %tmp290, i32 7 + %tmp299 = extractelement <128 x float> %tmp290, i32 8 + %tmp300 = extractelement <128 x float> %tmp290, i32 9 + %tmp301 = extractelement <128 x float> %tmp290, i32 10 + %tmp302 = extractelement <128 x float> %tmp290, i32 11 + %tmp303 = extractelement <128 x float> %tmp290, i32 12 + %tmp304 = extractelement <128 x float> %tmp290, i32 13 + %tmp305 = extractelement <128 x float> %tmp290, i32 14 + %tmp306 = extractelement <128 x float> %tmp290, i32 15 + %tmp307 = extractelement <128 x float> %tmp290, i32 16 + %tmp308 = extractelement <128 x float> %tmp290, i32 17 + %tmp309 = extractelement <128 x float> %tmp290, i32 18 + %tmp310 = extractelement <128 x float> %tmp290, i32 19 + %tmp311 = extractelement <128 x float> %tmp290, i32 20 + %tmp312 = extractelement <128 x float> %tmp290, i32 21 + %tmp313 = extractelement <128 x float> %tmp290, i32 22 + %tmp314 = extractelement <128 x float> %tmp290, i32 23 + %tmp315 = extractelement <128 x float> %tmp290, i32 24 + %tmp316 = extractelement <128 x float> %tmp290, i32 25 + %tmp317 = extractelement <128 x float> %tmp290, i32 26 + %tmp318 = extractelement <128 x float> %tmp290, i32 27 + %tmp319 = extractelement <128 x float> %tmp290, i32 28 + %tmp320 = extractelement <128 x float> %tmp290, i32 29 + %tmp321 = extractelement <128 x float> %tmp290, i32 30 + %tmp322 = extractelement <128 x float> %tmp290, i32 31 + %tmp323 = extractelement <128 x float> %tmp290, i32 32 + %tmp324 = extractelement <128 x float> %tmp290, i32 33 + %tmp325 = extractelement <128 x float> %tmp290, i32 34 + %tmp326 = extractelement <128 x float> %tmp290, i32 35 + %tmp327 = extractelement <128 x float> %tmp290, i32 36 + %tmp328 = extractelement <128 x float> %tmp290, i32 37 + %tmp329 = extractelement <128 x float> %tmp290, i32 38 + %tmp330 = extractelement <128 x float> %tmp290, i32 39 + %tmp331 = extractelement <128 x float> %tmp290, i32 40 + %tmp332 = extractelement <128 x float> %tmp290, i32 41 + %tmp333 = extractelement <128 x float> %tmp290, i32 42 + %tmp334 = extractelement <128 x float> %tmp290, i32 43 + %tmp335 = extractelement <128 x float> %tmp290, i32 44 + %tmp336 = extractelement <128 x float> %tmp290, i32 45 + %tmp337 = extractelement <128 x float> %tmp290, i32 46 + %tmp338 = extractelement <128 x float> %tmp290, i32 47 + %tmp339 = extractelement <128 x float> %tmp290, i32 48 + %tmp340 = extractelement <128 x float> %tmp290, i32 49 + %tmp341 = extractelement <128 x float> %tmp290, i32 50 + %tmp342 = extractelement <128 x float> %tmp290, i32 51 + %tmp343 = extractelement <128 x float> %tmp290, i32 52 + %tmp344 = extractelement <128 x float> %tmp290, i32 53 + %tmp345 = extractelement <128 x float> %tmp290, i32 54 + %tmp346 = extractelement <128 x float> %tmp290, i32 55 + %tmp347 = extractelement <128 x float> %tmp290, i32 56 + %tmp348 = extractelement <128 x float> %tmp290, i32 57 + %tmp349 = extractelement <128 x float> %tmp290, i32 58 + %tmp350 = extractelement <128 x float> %tmp290, i32 59 + %tmp351 = extractelement <128 x float> %tmp290, i32 60 + %tmp352 = extractelement <128 x float> %tmp290, i32 61 + %tmp353 = extractelement <128 x float> %tmp290, i32 62 + %tmp354 = extractelement <128 x float> %tmp290, i32 63 + %tmp355 = extractelement <128 x float> %tmp290, i32 64 + %tmp356 = extractelement <128 x float> %tmp290, i32 65 + %tmp357 = extractelement <128 x float> %tmp290, i32 66 + %tmp358 = extractelement <128 x float> %tmp290, i32 67 + %tmp359 = extractelement <128 x float> %tmp290, i32 68 + %tmp360 = extractelement <128 x float> %tmp290, i32 69 + %tmp361 = extractelement <128 x float> %tmp290, i32 70 + %tmp362 = extractelement <128 x float> %tmp290, i32 71 + %tmp363 = extractelement <128 x float> %tmp290, i32 72 + %tmp364 = extractelement <128 x float> %tmp290, i32 73 + %tmp365 = extractelement <128 x float> %tmp290, i32 74 + %tmp366 = extractelement <128 x float> %tmp290, i32 75 + %tmp367 = extractelement <128 x float> %tmp290, i32 76 + %tmp368 = extractelement <128 x float> %tmp290, i32 77 + %tmp369 = extractelement <128 x float> %tmp290, i32 78 + %tmp370 = extractelement <128 x float> %tmp290, i32 79 + %tmp371 = extractelement <128 x float> %tmp290, i32 80 + %tmp372 = extractelement <128 x float> %tmp290, i32 81 + %tmp373 = extractelement <128 x float> %tmp290, i32 82 + %tmp374 = extractelement <128 x float> %tmp290, i32 83 + %tmp375 = extractelement <128 x float> %tmp290, i32 84 + %tmp376 = extractelement <128 x float> %tmp290, i32 85 + %tmp377 = extractelement <128 x float> %tmp290, i32 86 + %tmp378 = extractelement <128 x float> %tmp290, i32 87 + %tmp379 = extractelement <128 x float> %tmp290, i32 88 + %tmp380 = extractelement <128 x float> %tmp290, i32 89 + %tmp381 = extractelement <128 x float> %tmp290, i32 90 + %tmp382 = extractelement <128 x float> %tmp290, i32 91 + %tmp383 = extractelement <128 x float> %tmp290, i32 92 + %tmp384 = extractelement <128 x float> %tmp290, i32 93 + %tmp385 = extractelement <128 x float> %tmp290, i32 94 + %tmp386 = extractelement <128 x float> %tmp290, i32 95 + %tmp387 = extractelement <128 x float> %tmp290, i32 96 + %tmp388 = extractelement <128 x float> %tmp290, i32 97 + %tmp389 = extractelement <128 x float> %tmp290, i32 98 + %tmp390 = extractelement <128 x float> %tmp290, i32 99 + %tmp391 = extractelement <128 x float> %tmp290, i32 100 + %tmp392 = extractelement <128 x float> %tmp290, i32 101 + %tmp393 = extractelement <128 x float> %tmp290, i32 102 + %tmp394 = extractelement <128 x float> %tmp290, i32 103 + %tmp395 = extractelement <128 x float> %tmp290, i32 104 + %tmp396 = extractelement <128 x float> %tmp290, i32 105 + %tmp397 = extractelement <128 x float> %tmp290, i32 106 + %tmp398 = extractelement <128 x float> %tmp290, i32 107 + %tmp399 = extractelement <128 x float> %tmp290, i32 108 + %tmp400 = extractelement <128 x float> %tmp290, i32 109 + %tmp401 = extractelement <128 x float> %tmp290, i32 110 + %tmp402 = extractelement <128 x float> %tmp290, i32 111 + %tmp403 = extractelement <128 x float> %tmp290, i32 112 + %tmp404 = extractelement <128 x float> %tmp290, i32 113 + %tmp405 = extractelement <128 x float> %tmp290, i32 114 + %tmp406 = extractelement <128 x float> %tmp290, i32 115 + %tmp407 = extractelement <128 x float> %tmp290, i32 116 + %tmp408 = extractelement <128 x float> %tmp290, i32 117 + %tmp409 = extractelement <128 x float> %tmp290, i32 118 + %tmp410 = extractelement <128 x float> %tmp290, i32 119 + %tmp411 = extractelement <128 x float> %tmp290, i32 120 + %tmp412 = extractelement <128 x float> %tmp290, i32 121 + %tmp413 = extractelement <128 x float> %tmp290, i32 122 + %tmp414 = extractelement <128 x float> %tmp290, i32 123 + %tmp415 = extractelement <128 x float> %tmp290, i32 124 + %tmp416 = extractelement <128 x float> %tmp290, i32 125 + %tmp417 = extractelement <128 x float> %tmp290, i32 126 + %tmp418 = extractelement <128 x float> %tmp290, i32 127 + %tmp419 = bitcast float %tmp107 to i32 + %tmp420 = add i32 %tmp419, 1 + %tmp421 = bitcast i32 %tmp420 to float + br label %bb24 +} + +; Function Attrs: nounwind readnone +declare float @llvm.SI.load.const(<16 x i8>, i32) #1 + +; Function Attrs: nounwind readnone +declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + +attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } +attributes #1 = { nounwind readnone } + +!0 = !{!1, !1, i64 0, i32 1} +!1 = !{!"const", null} diff --git a/test/CodeGen/AMDGPU/vop-shrink.ll b/test/CodeGen/AMDGPU/vop-shrink.ll index 9b2f229c05af..2bfe1b2bd6ec 100644 --- a/test/CodeGen/AMDGPU/vop-shrink.ll +++ b/test/CodeGen/AMDGPU/vop-shrink.ll @@ -3,8 +3,8 @@ ; Test that we correctly commute a sub instruction ; FUNC-LABEL: {{^}}sub_rev: -; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, s -; SI: v_subrev_i32_e32 v{{[0-9]+}}, s +; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, vcc, s +; SI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, s ; ModuleID = 'vop-shrink.ll' diff --git a/test/CodeGen/AMDGPU/wait.ll b/test/CodeGen/AMDGPU/wait.ll index 5cc7577cad33..107e84b33be9 100644 --- a/test/CodeGen/AMDGPU/wait.ll +++ b/test/CodeGen/AMDGPU/wait.ll @@ -1,11 +1,16 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=DEFAULT +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=DEFAULT +; RUN: llc -march=amdgcn --misched=ilpmax -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=ILPMAX +; RUN: llc -march=amdgcn --misched=ilpmax -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=ILPMAX +; The ilpmax scheduler is used for the second test to get the ordering we want for the test. -; CHECK-LABEL: {{^}}main: -; CHECK: s_load_dwordx4 -; CHECK: s_load_dwordx4 -; CHECK: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} -; CHECK: s_endpgm +; DEFAULT-LABEL: {{^}}main: +; DEFAULT: s_load_dwordx4 +; DEFAULT: s_load_dwordx4 +; DEFAULT: s_waitcnt vmcnt(0) +; DEFAULT: exp +; DEFAULT: s_waitcnt lgkmcnt(0) +; DEFAULT: s_endpgm define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 { main_body: %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 0 @@ -29,7 +34,43 @@ main_body: ret void } -; Function Attrs: noduplicate nounwind +; ILPMAX-LABEL: {{^}}main2: +; ILPMAX: s_load_dwordx4 +; ILPMAX: s_waitcnt lgkmcnt(0) +; ILPMAX: buffer_load +; ILPMAX: s_load_dwordx4 +; ILPMAX: s_waitcnt lgkmcnt(0) +; ILPMAX: buffer_load +; ILPMAX: s_waitcnt vmcnt(1) +; ILPMAX: s_waitcnt vmcnt(0) +; ILPMAX: s_endpgm + +define void @main2([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* +byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { +main_body: + %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 + %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 + %13 = add i32 %5, %7 + %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) + %15 = extractelement <4 x float> %14, i32 0 + %16 = extractelement <4 x float> %14, i32 1 + %17 = extractelement <4 x float> %14, i32 2 + %18 = extractelement <4 x float> %14, i32 3 + %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 + %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 + %21 = add i32 %5, %7 + %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) + %23 = extractelement <4 x float> %22, i32 0 + %24 = extractelement <4 x float> %22, i32 1 + %25 = extractelement <4 x float> %22, i32 2 + %26 = extractelement <4 x float> %22, i32 3 + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18) + call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) + ret void +} + + +; Function Attrs: convergent nounwind declare void @llvm.AMDGPU.barrier.global() #1 ; Function Attrs: nounwind readnone @@ -38,7 +79,7 @@ declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } -attributes #1 = { noduplicate nounwind } +attributes #1 = { convergent nounwind } attributes #2 = { nounwind readnone } !0 = !{!1, !1, i64 0, i32 1} diff --git a/test/CodeGen/AMDGPU/work-item-intrinsics.ll b/test/CodeGen/AMDGPU/work-item-intrinsics.ll index 4328e964c1bf..e7fcd1ff3650 100644 --- a/test/CodeGen/AMDGPU/work-item-intrinsics.ll +++ b/test/CodeGen/AMDGPU/work-item-intrinsics.ll @@ -1,15 +1,34 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=HSA -check-prefix=CI-HSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=HSA -check-prefix=VI-HSA -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}ngroups_x: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[0].X +; EG: MOV {{\*? *}}[[VAL]], KC0[0].X + +; HSA: .amd_kernel_code_t + +; HSA: enable_sgpr_private_segment_buffer = 1 +; HSA: enable_sgpr_dispatch_ptr = 0 +; HSA: enable_sgpr_queue_ptr = 0 +; HSA: enable_sgpr_kernarg_segment_ptr = 1 +; HSA: enable_sgpr_dispatch_id = 0 +; HSA: enable_sgpr_flat_scratch_init = 0 +; HSA: enable_sgpr_private_segment_size = 0 +; HSA: enable_sgpr_grid_workgroup_count_x = 0 +; HSA: enable_sgpr_grid_workgroup_count_y = 0 +; HSA: enable_sgpr_grid_workgroup_count_z = 0 + +; HSA: .end_amd_kernel_code_t + + +; GCN-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] -; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0 -; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN: buffer_store_dword [[VVAL]] define void @ngroups_x (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.ngroups.x() #0 @@ -19,12 +38,12 @@ entry: ; FUNC-LABEL: {{^}}ngroups_y: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[0].Y +; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y -; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 -; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 -; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN: buffer_store_dword [[VVAL]] +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] define void @ngroups_y (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.ngroups.y() #0 @@ -34,12 +53,12 @@ entry: ; FUNC-LABEL: {{^}}ngroups_z: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[0].Z +; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z -; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 -; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 -; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN: buffer_store_dword [[VVAL]] +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] define void @ngroups_z (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.ngroups.z() #0 @@ -49,12 +68,12 @@ entry: ; FUNC-LABEL: {{^}}global_size_x: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[0].W +; EG: MOV {{\*? *}}[[VAL]], KC0[0].W -; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 -; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc -; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN: buffer_store_dword [[VVAL]] +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] define void @global_size_x (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.global.size.x() #0 @@ -64,12 +83,12 @@ entry: ; FUNC-LABEL: {{^}}global_size_y: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[1].X +; EG: MOV {{\*? *}}[[VAL]], KC0[1].X -; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 -; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10 -; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN: buffer_store_dword [[VVAL]] +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] define void @global_size_y (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.global.size.y() #0 @@ -79,12 +98,12 @@ entry: ; FUNC-LABEL: {{^}}global_size_z: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[1].Y +; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y -; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 -; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14 -; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN: buffer_store_dword [[VVAL]] +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] define void @global_size_z (i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.global.size.z() #0 @@ -92,74 +111,34 @@ entry: ret void } -; FUNC-LABEL: {{^}}local_size_x: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[1].Z - -; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 -; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 -; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN: buffer_store_dword [[VVAL]] -define void @local_size_x (i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.local.size.x() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}local_size_y: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[1].W - -; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 -; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c -; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN: buffer_store_dword [[VVAL]] -define void @local_size_y (i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.local.size.y() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}local_size_z: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[2].X - -; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 -; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 -; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN: buffer_store_dword [[VVAL]] -define void @local_size_z (i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.local.size.z() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}get_work_dim: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[2].Z - -; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb -; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c -; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN: buffer_store_dword [[VVAL]] -define void @get_work_dim (i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.AMDGPU.read.workdim() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; The tgid values are stored in sgprs offset by the number of user sgprs. -; Currently we always use exactly 2 user sgprs for the pointer to the -; kernel arguments, but this may change in the future. +; The tgid values are stored in sgprs offset by the number of user +; sgprs. ; FUNC-LABEL: {{^}}tgid_x: -; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4 -; GCN: buffer_store_dword [[VVAL]] -define void @tgid_x (i32 addrspace(1)* %out) { +; HSA: .amd_kernel_code_t +; HSA: compute_pgm_rsrc2_user_sgpr = 6 +; HSA: compute_pgm_rsrc2_tgid_x_en = 1 +; HSA: compute_pgm_rsrc2_tgid_y_en = 0 +; HSA: compute_pgm_rsrc2_tgid_z_en = 0 +; HSA: compute_pgm_rsrc2_tg_size_en = 0 +; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 +; HSA: enable_sgpr_grid_workgroup_count_x = 0 +; HSA: enable_sgpr_grid_workgroup_count_y = 0 +; HSA: enable_sgpr_grid_workgroup_count_z = 0 +; HSA: .end_amd_kernel_code_t + +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}} +; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6{{$}} +; GCN-NOHSA: buffer_store_dword [[VVAL]] +; HSA: flat_store_dword [[VVAL]] + +; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 +define void @tgid_x(i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tgid.x() #0 store i32 %0, i32 addrspace(1)* %out @@ -167,9 +146,26 @@ entry: } ; FUNC-LABEL: {{^}}tgid_y: -; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5 -; GCN: buffer_store_dword [[VVAL]] -define void @tgid_y (i32 addrspace(1)* %out) { +; HSA: compute_pgm_rsrc2_user_sgpr = 6 +; HSA: compute_pgm_rsrc2_tgid_x_en = 1 +; HSA: compute_pgm_rsrc2_tgid_y_en = 1 +; HSA: compute_pgm_rsrc2_tgid_z_en = 0 +; HSA: compute_pgm_rsrc2_tg_size_en = 0 +; HSA: enable_sgpr_grid_workgroup_count_x = 0 +; HSA: enable_sgpr_grid_workgroup_count_y = 0 +; HSA: enable_sgpr_grid_workgroup_count_z = 0 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3 +; GCN-HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7 +; GCN-NOHSA: buffer_store_dword [[VVAL]] +; HSA: flat_store_dword [[VVAL]] + +; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 +define void @tgid_y(i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tgid.y() #0 store i32 %0, i32 addrspace(1)* %out @@ -177,36 +173,81 @@ entry: } ; FUNC-LABEL: {{^}}tgid_z: -; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6 -; GCN: buffer_store_dword [[VVAL]] -define void @tgid_z (i32 addrspace(1)* %out) { +; HSA: compute_pgm_rsrc2_user_sgpr = 6 +; HSA: compute_pgm_rsrc2_tgid_x_en = 1 +; HSA: compute_pgm_rsrc2_tgid_y_en = 0 +; HSA: compute_pgm_rsrc2_tgid_z_en = 1 +; HSA: compute_pgm_rsrc2_tg_size_en = 0 +; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 +; HSA: enable_sgpr_private_segment_buffer = 1 +; HSA: enable_sgpr_dispatch_ptr = 0 +; HSA: enable_sgpr_queue_ptr = 0 +; HSA: enable_sgpr_kernarg_segment_ptr = 1 +; HSA: enable_sgpr_dispatch_id = 0 +; HSA: enable_sgpr_flat_scratch_init = 0 +; HSA: enable_sgpr_private_segment_size = 0 +; HSA: enable_sgpr_grid_workgroup_count_x = 0 +; HSA: enable_sgpr_grid_workgroup_count_y = 0 +; HSA: enable_sgpr_grid_workgroup_count_z = 0 + +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}} +; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7{{$}} +; GCN-NOHSA: buffer_store_dword [[VVAL]] +; HSA: flat_store_dword [[VVAL]] + +; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 +; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 +; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 +; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 +; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 +define void @tgid_z(i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tgid.z() #0 store i32 %0, i32 addrspace(1)* %out ret void } +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 132{{$}} + ; FUNC-LABEL: {{^}}tidig_x: -; GCN: buffer_store_dword v0 -define void @tidig_x (i32 addrspace(1)* %out) { +; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 +; GCN-NOHSA: buffer_store_dword v0 +; HSA: flat_store_dword v0 +define void @tidig_x(i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tidig.x() #0 store i32 %0, i32 addrspace(1)* %out ret void } +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 2180{{$}} + ; FUNC-LABEL: {{^}}tidig_y: -; GCN: buffer_store_dword v1 -define void @tidig_y (i32 addrspace(1)* %out) { + +; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 1 +; GCN-NOHSA: buffer_store_dword v1 +; HSA: flat_store_dword v1 +define void @tidig_y(i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tidig.y() #0 store i32 %0, i32 addrspace(1)* %out ret void } +; GCN-NOHSA: .section .AMDGPU.config +; GCN-NOHSA: .long 47180 +; GCN-NOHSA-NEXT: .long 4228{{$}} + ; FUNC-LABEL: {{^}}tidig_z: -; GCN: buffer_store_dword v2 -define void @tidig_z (i32 addrspace(1)* %out) { +; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 2 +; GCN-NOHSA: buffer_store_dword v2 +; HSA: flat_store_dword v2 +define void @tidig_z(i32 addrspace(1)* %out) { entry: %0 = call i32 @llvm.r600.read.tidig.z() #0 store i32 %0, i32 addrspace(1)* %out @@ -221,10 +262,6 @@ declare i32 @llvm.r600.read.global.size.x() #0 declare i32 @llvm.r600.read.global.size.y() #0 declare i32 @llvm.r600.read.global.size.z() #0 -declare i32 @llvm.r600.read.local.size.x() #0 -declare i32 @llvm.r600.read.local.size.y() #0 -declare i32 @llvm.r600.read.local.size.z() #0 - declare i32 @llvm.r600.read.tgid.x() #0 declare i32 @llvm.r600.read.tgid.y() #0 declare i32 @llvm.r600.read.tgid.z() #0 diff --git a/test/CodeGen/AMDGPU/xor.ll b/test/CodeGen/AMDGPU/xor.ll index ddb920af29d8..655655d92f08 100644 --- a/test/CodeGen/AMDGPU/xor.ll +++ b/test/CodeGen/AMDGPU/xor.ll @@ -38,7 +38,7 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in } ; FUNC-LABEL: {{^}}xor_i1: -; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} +; EG: XOR_INT {{\** *}}{{T[0-9]+\.[XYZW]}}, {{PS|PV\.[XYZW]}}, {{PS|PV\.[XYZW]}} ; SI-DAG: v_cmp_le_f32_e32 [[CMP0:vcc]], 0, {{v[0-9]+}} ; SI-DAG: v_cmp_le_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 1.0, {{v[0-9]+}} diff --git a/test/CodeGen/AMDGPU/zero_extend.ll b/test/CodeGen/AMDGPU/zero_extend.ll index 033055db185a..35ddf2b0a465 100644 --- a/test/CodeGen/AMDGPU/zero_extend.ll +++ b/test/CodeGen/AMDGPU/zero_extend.ll @@ -7,8 +7,7 @@ ; R600: MEM_RAT_CACHELESS STORE_RAW ; SI: {{^}}test: -; SI: s_mov_b32 [[ZERO:s[0-9]]], 0{{$}} -; SI: v_mov_b32_e32 v[[V_ZERO:[0-9]]], [[ZERO]] +; SI: v_mov_b32_e32 v[[V_ZERO:[0-9]]], 0{{$}} ; SI: buffer_store_dwordx2 v[0:[[V_ZERO]]{{\]}} define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { entry: diff --git a/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll index 9c0143be06c3..81a6bb64971d 100644 --- a/test/CodeGen/ARM/2007-03-13-InstrSched.ll +++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \ ; RUN: -mattr=+v6 | grep r9 ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \ -; RUN: -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats 2>&1 | grep asm-printer +; RUN: -mattr=+v6,+reserve-r9 -ifcvt-limit=0 -stats 2>&1 | grep asm-printer ; | grep 35 define void @test(i32 %tmp56222, i32 %tmp36224, i32 %tmp46223, i32 %i.0196.0.ph, i32 %tmp8, i32* %tmp1011, i32** %tmp1, i32* %d2.1.out, i32* %d3.1.out, i32* %d0.1.out, i32* %d1.1.out) { diff --git a/test/CodeGen/ARM/2009-10-16-Scope.ll b/test/CodeGen/ARM/2009-10-16-Scope.ll index 3f47488372b8..613694f091d1 100644 --- a/test/CodeGen/ARM/2009-10-16-Scope.ll +++ b/test/CodeGen/ARM/2009-10-16-Scope.ll @@ -24,9 +24,9 @@ declare i32 @foo(i32) ssp !0 = !DILocation(line: 5, column: 2, scope: !1) !1 = distinct !DILexicalBlock(line: 1, column: 1, file: null, scope: !2) -!2 = !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !3) -!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !8, retainedTypes: !9) -!4 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "count_", line: 5, scope: !5, file: !3, type: !6) +!2 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !3) +!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !8, retainedTypes: !9) +!4 = !DILocalVariable(name: "count_", line: 5, scope: !5, file: !3, type: !6) !5 = distinct !DILexicalBlock(line: 1, column: 1, file: null, scope: !1) !6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed) !7 = !DILocation(line: 6, column: 1, scope: !2) diff --git a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll index 638b26c73146..1341830b4a4b 100644 --- a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll +++ b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll @@ -14,11 +14,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon !llvm.dbg.cu = !{!3} !llvm.module.flags = !{!15} -!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 93, arg: 0, scope: !1, file: !2, type: !6) -!1 = !DISubprogram(name: "__addvsi3", linkageName: "__addvsi3", line: 94, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: null, type: !4) +!0 = !DILocalVariable(name: "b", line: 93, arg: 2, scope: !1, file: !2, type: !6) +!1 = distinct !DISubprogram(name: "__addvsi3", linkageName: "__addvsi3", line: 94, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: null, type: !4) !2 = !DIFile(filename: "libgcc2.c", directory: "/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc") !12 = !DIFile(filename: "libgcc2.c", directory: "/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc") -!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", isOptimized: true, emissionKind: 0, file: !12, enums: !13, retainedTypes: !13, subprograms: !14) +!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", isOptimized: true, emissionKind: 0, file: !12, enums: !13, retainedTypes: !13, subprograms: !14) !4 = !DISubroutineType(types: !5) !5 = !{!6, !6, !6} !6 = !DIDerivedType(tag: DW_TAG_typedef, name: "SItype", line: 152, file: !12, baseType: !8) diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll index cfaffd8234ba..171b6d2bcc5c 100644 --- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll +++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll @@ -1,36 +1,36 @@ ; RUN: llc -mtriple=arm-eabi -mattr=+neon -O0 -optimize-regalloc -regalloc=basic %s -o /dev/null ; This test would crash the rewriter when trying to handle a spill after one of -; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register. +; the @llvm.arm.neon.vld3.v8i8.p0i8 defined three parts of a register. %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } -declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8*, i32) nounwind readonly -declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst3.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind { - %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A2, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A2, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 0 ; <<8 x i8>> [#uses=1] %tmp4b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 1 ; <<8 x i8>> [#uses=1] - %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1] %tmp4d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 1 ; <<8 x i8>> [#uses=1] - %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A5, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1] + %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A5, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1] %tmp2e = extractvalue %struct.__neon_int8x8x3_t %tmp1e, 0 ; <<8 x i8>> [#uses=1] - %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1] + %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1] %tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1] - %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A7, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A7, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 0 ; <<8 x i8>> [#uses=1] %tmp4g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 1 ; <<8 x i8>> [#uses=1] - %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A8, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] + %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A8, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2] %tmp2h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 0 ; <<8 x i8>> [#uses=1] %tmp3h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 2 ; <<8 x i8>> [#uses=1] %tmp2bd = add <8 x i8> %tmp2b, %tmp2d ; <<8 x i8>> [#uses=1] %tmp4bd = add <8 x i8> %tmp4b, %tmp4d ; <<8 x i8>> [#uses=1] %tmp2abcd = mul <8 x i8> undef, %tmp2bd ; <<8 x i8>> [#uses=1] %tmp4abcd = mul <8 x i8> undef, %tmp4bd ; <<8 x i8>> [#uses=2] - call void @llvm.arm.neon.vst3.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd, i32 1) + call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd, i32 1) %tmp2ef = sub <8 x i8> %tmp2e, %tmp2f ; <<8 x i8>> [#uses=1] %tmp2gh = sub <8 x i8> %tmp2g, %tmp2h ; <<8 x i8>> [#uses=1] %tmp3gh = sub <8 x i8> zeroinitializer, %tmp3h ; <<8 x i8>> [#uses=1] @@ -38,8 +38,8 @@ define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A %tmp2efgh = mul <8 x i8> %tmp2ef, %tmp2gh ; <<8 x i8>> [#uses=1] %tmp3efgh = mul <8 x i8> undef, %tmp3gh ; <<8 x i8>> [#uses=1] %tmp4efgh = mul <8 x i8> %tmp4ef, undef ; <<8 x i8>> [#uses=2] - call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh, i32 1) + call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh, i32 1) %tmp4 = sub <8 x i8> %tmp4efgh, %tmp4abcd ; <<8 x i8>> [#uses=1] - tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef, i32 1) + tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef, i32 1) ret <8 x i8> %tmp4 } diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll index 6a6ccf3d0a01..c6c0e2caee42 100644 --- a/test/CodeGen/ARM/2010-05-21-BuildVector.ll +++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll @@ -36,8 +36,8 @@ entry: %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3 %19 = fmul <4 x float> %tmp5, %2 %20 = bitcast float* %fltp to i8* - tail call void @llvm.arm.neon.vst1.v4f32(i8* %20, <4 x float> %19, i32 1) + tail call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %20, <4 x float> %19, i32 1) ret void } -declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind diff --git a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll index f86c3ba9ef6e..1deb98631a4f 100644 --- a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll +++ b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll @@ -12,8 +12,8 @@ entry: %tmp9 = trunc i128 %tmp8 to i64 ; [#uses=1] %tmp16.i = bitcast i64 %tmp6 to <8 x i8> ; <<8 x i8>> [#uses=1] %tmp20.i = bitcast i64 %tmp9 to <8 x i8> ; <<8 x i8>> [#uses=1] - tail call void @llvm.arm.neon.vst2.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i, i32 1) nounwind + tail call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i, i32 1) nounwind ret void } -declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst2.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind diff --git a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll index bead8d9781e8..47a5ef0bc544 100755 --- a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll +++ b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll @@ -20,7 +20,7 @@ @.str51 = external constant [45 x i8] ; <[45 x i8]*> [#uses=1] @__PRETTY_FUNCTION__._ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs = external constant [116 x i8] ; <[116 x i8]*> [#uses=1] -@_ZN4llvm9RecordValC1ERKSsPNS_5RecTyEj = alias void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32)* @_ZN4llvm9RecordValC2ERKSsPNS_5RecTyEj ; [#uses=0] +@_ZN4llvm9RecordValC1ERKSsPNS_5RecTyEj = alias void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32), void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32)* @_ZN4llvm9RecordValC2ERKSsPNS_5RecTyEj ; [#uses=0] declare i8* @__dynamic_cast(i8*, i8*, i8*, i32) diff --git a/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll b/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll index 95bb2769759e..38b352c473b1 100644 --- a/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll +++ b/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll @@ -47,19 +47,19 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon !llvm.dbg.lv.fn = !{!0, !8, !10, !12} !llvm.dbg.gv = !{!14} -!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "buf", line: 4, arg: 0, scope: !1, file: !2, type: !6) -!1 = !DISubprogram(name: "x0", linkageName: "x0", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !26, scope: null, type: !4) +!0 = !DILocalVariable(name: "buf", line: 4, arg: 1, scope: !1, file: !2, type: !6) +!1 = distinct !DISubprogram(name: "x0", linkageName: "x0", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !26, scope: null, type: !4) !2 = !DIFile(filename: "t.c", directory: "/private/tmp") -!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang 2.0", isOptimized: true, file: !26) +!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang 2.0", isOptimized: true, file: !26) !4 = !DISubroutineType(types: !5) !5 = !{null} !6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !26, scope: !2, baseType: !7) !7 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char) -!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "nbytes", line: 4, arg: 0, scope: !1, file: !2, type: !9) +!8 = !DILocalVariable(name: "nbytes", line: 4, arg: 2, scope: !1, file: !2, type: !9) !9 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned long", size: 32, align: 32, encoding: DW_ATE_unsigned) -!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "nread", line: 6, scope: !11, file: !2, type: !9) +!10 = !DILocalVariable(name: "nread", line: 6, scope: !11, file: !2, type: !9) !11 = distinct !DILexicalBlock(line: 5, column: 1, file: !26, scope: !1) -!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 7, scope: !11, file: !2, type: !13) +!12 = !DILocalVariable(name: "c", line: 7, scope: !11, file: !2, type: !13) !13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed) !14 = !DIGlobalVariable(name: "length", linkageName: "length", line: 1, isLocal: false, isDefinition: true, scope: !2, file: !2, type: !13, variable: i32* @length) !15 = !DILocation(line: 4, column: 24, scope: !1) diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll index 1aee5088eee4..130221d38c23 100644 --- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll +++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll @@ -16,10 +16,10 @@ target triple = "thumbv7-apple-darwin10" define i32 @test(i8* %arg) nounwind { entry: - %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %arg, i32 1) + %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* %arg, i32 1) %1 = shufflevector <2 x i64> undef, <2 x i64> %0, <2 x i32> store <2 x i64> %1, <2 x i64>* undef, align 16 ret i32 undef } -declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly +declare <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8*, i32) nounwind readonly diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll index 953e2bbf291c..14ddb59b5387 100644 --- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll +++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll @@ -3,7 +3,7 @@ %struct.SVal = type { i8*, i32 } -define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp { +define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp !dbg !17 { entry: %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !23, metadata !DIExpression()), !dbg !24 @@ -31,7 +31,7 @@ return: ; preds = %bb2 ret i32 %.0, !dbg !29 } -define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 { +define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 !dbg !16 { entry: %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] call void @llvm.dbg.value(metadata %struct.SVal* %this, i64 0, metadata !31, metadata !DIExpression()), !dbg !34 @@ -47,7 +47,7 @@ return: ; preds = %entry declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone -define i32 @main() nounwind ssp { +define i32 @main() nounwind ssp !dbg !20 { entry: %0 = alloca %struct.SVal ; <%struct.SVal*> [#uses=3] %v = alloca %struct.SVal ; <%struct.SVal*> [#uses=4] @@ -80,7 +80,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon !0 = !DISubprogram(name: "SVal", line: 11, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !48, scope: !1, type: !14) !1 = !DICompositeType(tag: DW_TAG_structure_type, name: "SVal", line: 1, size: 128, align: 64, file: !48, elements: !4) !2 = !DIFile(filename: "small.cc", directory: "/Users/manav/R8248330") -!3 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 1, file: !48, enums: !47, retainedTypes: !47, subprograms: !46, globals: !47, imports: !47) +!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 1, file: !48, enums: !47, retainedTypes: !47, subprograms: !46, globals: !47, imports: !47) !4 = !{!5, !7, !0, !9} !5 = !DIDerivedType(tag: DW_TAG_member, name: "Data", line: 7, size: 64, align: 64, file: !48, scope: !1, baseType: !6) !6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !48, baseType: null) @@ -93,35 +93,35 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon !13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed) !14 = !DISubroutineType(types: !15) !15 = !{null, !12} -!16 = !DISubprogram(name: "SVal", linkageName: "_ZN4SValC1Ev", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !1, type: !14, function: void (%struct.SVal*)* @_ZN4SValC1Ev) -!17 = !DISubprogram(name: "foo", linkageName: "_Z3fooi4SVal", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !2, type: !18, function: i32 (i32, %struct.SVal*)* @_Z3fooi4SVal) +!16 = distinct !DISubprogram(name: "SVal", linkageName: "_ZN4SValC1Ev", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !1, type: !14) +!17 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi4SVal", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !2, type: !18) !18 = !DISubroutineType(types: !19) !19 = !{!13, !13, !1} -!20 = !DISubprogram(name: "main", linkageName: "main", line: 23, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !2, type: !21, function: i32 ()* @main) +!20 = distinct !DISubprogram(name: "main", linkageName: "main", line: 23, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !2, type: !21) !21 = !DISubroutineType(types: !22) !22 = !{!13} -!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 16, arg: 0, scope: !17, file: !2, type: !13) +!23 = !DILocalVariable(name: "i", line: 16, arg: 1, scope: !17, file: !2, type: !13) !24 = !DILocation(line: 16, scope: !17) -!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "location", line: 16, arg: 0, scope: !17, file: !2, type: !26) +!25 = !DILocalVariable(name: "location", line: 16, arg: 2, scope: !17, file: !2, type: !26) !26 = !DIDerivedType(tag: DW_TAG_reference_type, name: "SVal", size: 64, align: 64, file: !48, scope: !2, baseType: !1) !27 = !DILocation(line: 17, scope: !28) !28 = distinct !DILexicalBlock(line: 16, column: 0, file: !2, scope: !17) !29 = !DILocation(line: 18, scope: !28) !30 = !DILocation(line: 20, scope: !28) -!31 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 11, arg: 0, scope: !16, file: !2, type: !32) +!31 = !DILocalVariable(name: "this", line: 11, arg: 1, scope: !16, file: !2, type: !32) !32 = !DIDerivedType(tag: DW_TAG_const_type, size: 64, align: 64, flags: DIFlagArtificial, file: !48, scope: !2, baseType: !33) !33 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !48, scope: !2, baseType: !1) !34 = !DILocation(line: 11, scope: !16) !35 = !DILocation(line: 11, scope: !36) !36 = distinct !DILexicalBlock(line: 11, column: 0, file: !48, scope: !37) !37 = distinct !DILexicalBlock(line: 11, column: 0, file: !48, scope: !16) -!38 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "v", line: 24, scope: !39, file: !2, type: !1) +!38 = !DILocalVariable(name: "v", line: 24, scope: !39, file: !2, type: !1) !39 = distinct !DILexicalBlock(line: 23, column: 0, file: !48, scope: !40) !40 = distinct !DILexicalBlock(line: 23, column: 0, file: !48, scope: !20) !41 = !DILocation(line: 24, scope: !39) !42 = !DILocation(line: 25, scope: !39) !43 = !DILocation(line: 26, scope: !39) -!44 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 26, scope: !39, file: !2, type: !13) +!44 = !DILocalVariable(name: "k", line: 26, scope: !39, file: !2, type: !13) !45 = !DILocation(line: 27, scope: !39) !46 = !{!16, !17, !20} !47 = !{} diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll index 9a5baf21b8fb..d5eed8b6a2c4 100644 --- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll +++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll @@ -28,7 +28,7 @@ target triple = "thumbv7-apple-darwin10" ; CHECK-NOT: {{DW_TAG|NULL}} ; CHECK: DW_AT_location [DW_FORM_exprloc] (<0x8> 03 [[ADDR]] 10 01 22 ) -define zeroext i8 @get1(i8 zeroext %a) nounwind optsize { +define zeroext i8 @get1(i8 zeroext %a) nounwind optsize !dbg !0 { entry: tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !10, metadata !DIExpression()), !dbg !30 %0 = load i8, i8* @x1, align 4, !dbg !30 @@ -39,7 +39,7 @@ entry: declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone -define zeroext i8 @get2(i8 zeroext %a) nounwind optsize { +define zeroext i8 @get2(i8 zeroext %a) nounwind optsize !dbg !6 { entry: tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !18, metadata !DIExpression()), !dbg !32 %0 = load i8, i8* @x2, align 4, !dbg !32 @@ -48,7 +48,7 @@ entry: ret i8 %0, !dbg !33 } -define zeroext i8 @get3(i8 zeroext %a) nounwind optsize { +define zeroext i8 @get3(i8 zeroext %a) nounwind optsize !dbg !7 { entry: tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !21, metadata !DIExpression()), !dbg !34 %0 = load i8, i8* @x3, align 4, !dbg !34 @@ -57,7 +57,7 @@ entry: ret i8 %0, !dbg !35 } -define zeroext i8 @get4(i8 zeroext %a) nounwind optsize { +define zeroext i8 @get4(i8 zeroext %a) nounwind optsize !dbg !8 { entry: tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !24, metadata !DIExpression()), !dbg !36 %0 = load i8, i8* @x4, align 4, !dbg !36 @@ -66,7 +66,7 @@ entry: ret i8 %0, !dbg !37 } -define zeroext i8 @get5(i8 zeroext %a) nounwind optsize { +define zeroext i8 @get5(i8 zeroext %a) nounwind optsize !dbg !9 { entry: tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !27, metadata !DIExpression()), !dbg !38 %0 = load i8, i8* @x5, align 4, !dbg !38 @@ -78,35 +78,35 @@ entry: !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!49} -!0 = !DISubprogram(name: "get1", linkageName: "get1", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !47, scope: !1, type: !3, function: i8 (i8)* @get1, variables: !42) +!0 = distinct !DISubprogram(name: "get1", linkageName: "get1", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !47, scope: !1, type: !3, variables: !42) !1 = !DIFile(filename: "foo.c", directory: "/tmp/") -!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", isOptimized: true, emissionKind: 0, file: !47, enums: !48, retainedTypes: !48, subprograms: !40, globals: !41, imports: !48) +!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", isOptimized: true, emissionKind: 0, file: !47, enums: !48, retainedTypes: !48, subprograms: !40, globals: !41, imports: !48) !3 = !DISubroutineType(types: !4) !4 = !{!5, !5} !5 = !DIBasicType(tag: DW_TAG_base_type, name: "_Bool", size: 8, align: 8, encoding: DW_ATE_boolean) -!6 = !DISubprogram(name: "get2", linkageName: "get2", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !47, scope: !1, type: !3, function: i8 (i8)* @get2, variables: !43) -!7 = !DISubprogram(name: "get3", linkageName: "get3", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !47, scope: !1, type: !3, function: i8 (i8)* @get3, variables: !44) -!8 = !DISubprogram(name: "get4", linkageName: "get4", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !47, scope: !1, type: !3, function: i8 (i8)* @get4, variables: !45) -!9 = !DISubprogram(name: "get5", linkageName: "get5", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !47, scope: !1, type: !3, function: i8 (i8)* @get5, variables: !46) -!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 4, arg: 0, scope: !0, file: !1, type: !5) -!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 4, scope: !12, file: !1, type: !5) +!6 = distinct !DISubprogram(name: "get2", linkageName: "get2", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !47, scope: !1, type: !3, variables: !43) +!7 = distinct !DISubprogram(name: "get3", linkageName: "get3", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !47, scope: !1, type: !3, variables: !44) +!8 = distinct !DISubprogram(name: "get4", linkageName: "get4", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !47, scope: !1, type: !3, variables: !45) +!9 = distinct !DISubprogram(name: "get5", linkageName: "get5", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !47, scope: !1, type: !3, variables: !46) +!10 = !DILocalVariable(name: "a", line: 4, arg: 1, scope: !0, file: !1, type: !5) +!11 = !DILocalVariable(name: "b", line: 4, scope: !12, file: !1, type: !5) !12 = distinct !DILexicalBlock(line: 4, column: 0, file: !47, scope: !0) !13 = !DIGlobalVariable(name: "x1", line: 3, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x1) !14 = !DIGlobalVariable(name: "x2", line: 6, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x2) !15 = !DIGlobalVariable(name: "x3", line: 9, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x3) !16 = !DIGlobalVariable(name: "x4", line: 12, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x4) !17 = !DIGlobalVariable(name: "x5", line: 15, isLocal: false, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x5) -!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 7, arg: 0, scope: !6, file: !1, type: !5) -!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 7, scope: !20, file: !1, type: !5) +!18 = !DILocalVariable(name: "a", line: 7, arg: 1, scope: !6, file: !1, type: !5) +!19 = !DILocalVariable(name: "b", line: 7, scope: !20, file: !1, type: !5) !20 = distinct !DILexicalBlock(line: 7, column: 0, file: !47, scope: !6) -!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 10, arg: 0, scope: !7, file: !1, type: !5) -!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 10, scope: !23, file: !1, type: !5) +!21 = !DILocalVariable(name: "a", line: 10, arg: 1, scope: !7, file: !1, type: !5) +!22 = !DILocalVariable(name: "b", line: 10, scope: !23, file: !1, type: !5) !23 = distinct !DILexicalBlock(line: 10, column: 0, file: !47, scope: !7) -!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 13, arg: 0, scope: !8, file: !1, type: !5) -!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 13, scope: !26, file: !1, type: !5) +!24 = !DILocalVariable(name: "a", line: 13, arg: 1, scope: !8, file: !1, type: !5) +!25 = !DILocalVariable(name: "b", line: 13, scope: !26, file: !1, type: !5) !26 = distinct !DILexicalBlock(line: 13, column: 0, file: !47, scope: !8) -!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 16, arg: 0, scope: !9, file: !1, type: !5) -!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 16, scope: !29, file: !1, type: !5) +!27 = !DILocalVariable(name: "a", line: 16, arg: 1, scope: !9, file: !1, type: !5) +!28 = !DILocalVariable(name: "b", line: 16, scope: !29, file: !1, type: !5) !29 = distinct !DILexicalBlock(line: 16, column: 0, file: !47, scope: !9) !30 = !DILocation(line: 4, scope: !0) !31 = !DILocation(line: 4, scope: !12) diff --git a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll index aac8f7b3a026..1097050df54b 100644 --- a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll +++ b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -arm-global-merge -global-merge-group-by-use=false | FileCheck %s -; CHECK: .zerofill __DATA,__bss,__MergedGlobals,16,2 +; CHECK: .zerofill __DATA,__bss,l__MergedGlobals,16,2 @prev = external global [0 x i16] @max_lazy_match = internal unnamed_addr global i32 0, align 4 diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll index 067c719f491c..3d82e706862c 100644 --- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll +++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll @@ -28,7 +28,7 @@ target triple = "thumbv7-apple-macosx10.7.0" @x4 = internal unnamed_addr global i32 4, align 4 @x5 = global i32 0, align 4 -define i32 @get1(i32 %a) nounwind optsize ssp { +define i32 @get1(i32 %a) nounwind optsize ssp !dbg !1 { tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !10, metadata !DIExpression()), !dbg !30 %1 = load i32, i32* @x1, align 4, !dbg !31 tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !11, metadata !DIExpression()), !dbg !31 @@ -36,7 +36,7 @@ define i32 @get1(i32 %a) nounwind optsize ssp { ret i32 %1, !dbg !31 } -define i32 @get2(i32 %a) nounwind optsize ssp { +define i32 @get2(i32 %a) nounwind optsize ssp !dbg !6 { tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !13, metadata !DIExpression()), !dbg !32 %1 = load i32, i32* @x2, align 4, !dbg !33 tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !14, metadata !DIExpression()), !dbg !33 @@ -44,7 +44,7 @@ define i32 @get2(i32 %a) nounwind optsize ssp { ret i32 %1, !dbg !33 } -define i32 @get3(i32 %a) nounwind optsize ssp { +define i32 @get3(i32 %a) nounwind optsize ssp !dbg !7 { tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !16, metadata !DIExpression()), !dbg !34 %1 = load i32, i32* @x3, align 4, !dbg !35 tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !17, metadata !DIExpression()), !dbg !35 @@ -52,7 +52,7 @@ define i32 @get3(i32 %a) nounwind optsize ssp { ret i32 %1, !dbg !35 } -define i32 @get4(i32 %a) nounwind optsize ssp { +define i32 @get4(i32 %a) nounwind optsize ssp !dbg !8 { tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !19, metadata !DIExpression()), !dbg !36 %1 = load i32, i32* @x4, align 4, !dbg !37 tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !20, metadata !DIExpression()), !dbg !37 @@ -60,7 +60,7 @@ define i32 @get4(i32 %a) nounwind optsize ssp { ret i32 %1, !dbg !37 } -define i32 @get5(i32 %a) nounwind optsize ssp { +define i32 @get5(i32 %a) nounwind optsize ssp !dbg !9 { tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !27, metadata !DIExpression()), !dbg !38 %1 = load i32, i32* @x5, align 4, !dbg !39 tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !28, metadata !DIExpression()), !dbg !39 @@ -73,32 +73,32 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!49} -!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 1, file: !47, enums: !48, retainedTypes: !48, subprograms: !40, globals: !41, imports: !48) -!1 = !DISubprogram(name: "get1", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !47, scope: !2, type: !3, function: i32 (i32)* @get1, variables: !42) +!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 1, file: !47, enums: !48, retainedTypes: !48, subprograms: !40, globals: !41, imports: !48) +!1 = distinct !DISubprogram(name: "get1", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !47, scope: !2, type: !3, variables: !42) !2 = !DIFile(filename: "ss3.c", directory: "/private/tmp") !3 = !DISubroutineType(types: !4) !4 = !{!5} !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed) -!6 = !DISubprogram(name: "get2", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !47, scope: !2, type: !3, function: i32 (i32)* @get2, variables: !43) -!7 = !DISubprogram(name: "get3", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !47, scope: !2, type: !3, function: i32 (i32)* @get3, variables: !44) -!8 = !DISubprogram(name: "get4", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !47, scope: !2, type: !3, function: i32 (i32)* @get4, variables: !45) -!9 = !DISubprogram(name: "get5", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 17, file: !47, scope: !2, type: !3, function: i32 (i32)* @get5, variables: !46) -!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 5, arg: 1, scope: !1, file: !2, type: !5) -!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 5, scope: !12, file: !2, type: !5) +!6 = distinct !DISubprogram(name: "get2", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !47, scope: !2, type: !3, variables: !43) +!7 = distinct !DISubprogram(name: "get3", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !47, scope: !2, type: !3, variables: !44) +!8 = distinct !DISubprogram(name: "get4", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !47, scope: !2, type: !3, variables: !45) +!9 = distinct !DISubprogram(name: "get5", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 17, file: !47, scope: !2, type: !3, variables: !46) +!10 = !DILocalVariable(name: "a", line: 5, arg: 1, scope: !1, file: !2, type: !5) +!11 = !DILocalVariable(name: "b", line: 5, scope: !12, file: !2, type: !5) !12 = distinct !DILexicalBlock(line: 5, column: 19, file: !47, scope: !1) -!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 8, arg: 1, scope: !6, file: !2, type: !5) -!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 8, scope: !15, file: !2, type: !5) +!13 = !DILocalVariable(name: "a", line: 8, arg: 1, scope: !6, file: !2, type: !5) +!14 = !DILocalVariable(name: "b", line: 8, scope: !15, file: !2, type: !5) !15 = distinct !DILexicalBlock(line: 8, column: 17, file: !47, scope: !6) -!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 11, arg: 1, scope: !7, file: !2, type: !5) -!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 11, scope: !18, file: !2, type: !5) +!16 = !DILocalVariable(name: "a", line: 11, arg: 1, scope: !7, file: !2, type: !5) +!17 = !DILocalVariable(name: "b", line: 11, scope: !18, file: !2, type: !5) !18 = distinct !DILexicalBlock(line: 11, column: 19, file: !47, scope: !7) -!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 14, arg: 1, scope: !8, file: !2, type: !5) -!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 14, scope: !21, file: !2, type: !5) +!19 = !DILocalVariable(name: "a", line: 14, arg: 1, scope: !8, file: !2, type: !5) +!20 = !DILocalVariable(name: "b", line: 14, scope: !21, file: !2, type: !5) !21 = distinct !DILexicalBlock(line: 14, column: 19, file: !47, scope: !8) !25 = !DIGlobalVariable(name: "x1", line: 4, isLocal: true, isDefinition: true, scope: !0, file: !2, type: !5, variable: i32* @x1) !26 = !DIGlobalVariable(name: "x2", line: 7, isLocal: true, isDefinition: true, scope: !0, file: !2, type: !5, variable: i32* @x2) -!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 17, arg: 1, scope: !9, file: !2, type: !5) -!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 17, scope: !29, file: !2, type: !5) +!27 = !DILocalVariable(name: "a", line: 17, arg: 1, scope: !9, file: !2, type: !5) +!28 = !DILocalVariable(name: "b", line: 17, scope: !29, file: !2, type: !5) !29 = distinct !DILexicalBlock(line: 17, column: 19, file: !47, scope: !9) !30 = !DILocation(line: 5, column: 16, scope: !1) !31 = !DILocation(line: 5, column: 32, scope: !12) diff --git a/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll b/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll index 3cbc4cdcd707..d702af7c0c70 100644 --- a/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll +++ b/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll @@ -4,9 +4,9 @@ define void @test_vmovqqqq_pseudo() nounwind ssp { entry: - %vld3_lane = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> zeroinitializer, i32 7, i32 2) + %vld3_lane = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> zeroinitializer, i32 7, i32 2) store { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, { <8 x i16>, <8 x i16>, <8 x i16> }* undef ret void } -declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly +declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly diff --git a/test/CodeGen/ARM/2011-10-26-memset-inline.ll b/test/CodeGen/ARM/2011-10-26-memset-inline.ll index 17bd291a6b55..5df439389cdb 100644 --- a/test/CodeGen/ARM/2011-10-26-memset-inline.ll +++ b/test/CodeGen/ARM/2011-10-26-memset-inline.ll @@ -1,5 +1,5 @@ ; Make sure short memsets on ARM lower to stores, even when optimizing for size. -; RUN: llc -march=arm < %s | FileCheck %s -check-prefix=CHECK-GENERIC +; RUN: llc -march=arm -mattr=+strict-align < %s | FileCheck %s -check-prefix=CHECK-GENERIC ; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s -check-prefix=CHECK-UNALIGNED target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll index b70b7f6f3b2e..f622ceb584e6 100644 --- a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll +++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll @@ -52,8 +52,8 @@ cond.end295: ; preds = %entry %shuffle.i35.i.i = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer %shuffle.i34.i.i = shufflevector <1 x i64> %shuffle.i36.i.i, <1 x i64> %shuffle.i35.i.i, <2 x i32> %2 = bitcast <2 x i64> %shuffle.i34.i.i to <4 x float> - tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind - tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind + tail call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind + tail call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind unreachable for.end: ; preds = %entry @@ -63,10 +63,10 @@ for.end: ; preds = %entry ; Check that pseudo-expansion preserves flags. define void @foo3(i8* %p) nounwind ssp { entry: - tail call void @llvm.arm.neon.vst2.v4f32(i8* %p, <4 x float> undef, <4 x float> undef, i32 4) + tail call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %p, <4 x float> undef, <4 x float> undef, i32 4) ret void } declare arm_aapcs_vfpcc void @bar(i8*, float, float, float) -declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind -declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst2.p0i8.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind diff --git a/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll b/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll index 7f30ae10e436..606af47a3d8e 100644 --- a/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll +++ b/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll @@ -7,8 +7,8 @@ entry: %vecinit.i = insertelement <2 x i32> undef, i32 %x, i32 0 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %x, i32 1 %0 = bitcast i32* %p to i8* - tail call void @llvm.arm.neon.vst1.v2i32(i8* %0, <2 x i32> %vecinit1.i, i32 4) + tail call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %0, <2 x i32> %vecinit1.i, i32 4) ret void } -declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst1.p0i8.v2i32(i8*, <2 x i32>, i32) nounwind diff --git a/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll b/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll index 545bfc73c590..6cff67614c64 100644 --- a/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll +++ b/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll @@ -5,9 +5,9 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios5.1.0" -declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly +declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8*, i32) nounwind readonly -declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst1.p0i8.v16i8(i8*, <16 x i8>, i32) nounwind define void @findEdges(i8*) nounwind ssp { %2 = icmp sgt i32 undef, 0 @@ -19,16 +19,16 @@ define void @findEdges(i8*) nounwind ssp { ;

j^{`WUIZ9Sj9$G^H={w0afmSB{<$WSl(ymPvS{g^ z7wXONSxz8!TWw|y2pGiSk=~dis|mpz*|s^nWxL>7!6M)*Wn+#RwmEFKRI!X2 zj*U6WQI2xk9Hy<$ZKY^hM-@3bv5sn+9K09db9{Yl{95kfSo}<8F@ZRt3Got6d>-i= zQ$Nzuh4XZ4MLk;hJn()T2H%6eALAy*R~Yw#^I+1Rhhapb$_9 zChapb$_9CdM3>UBubn|2Ee#juGi9o;r0zd4-4lszwv zC8JpJ4LNV&1CDZJruAopyx`_PirVI~vi`Z^A9i!KSVB{7OL< K@c#yYQ~U+ToYW2g literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/absolute_sym.macho.i386.o b/test/tools/dsymutil/Inputs/absolute_sym.macho.i386.o new file mode 100644 index 0000000000000000000000000000000000000000..445e32271cf5a63da5f450bee66e0b6caec17cac GIT binary patch literal 2472 zcmb7F&ubJ(6t14>Nn$#-(UB#v!VYX?v!LU|gK;;qPF!`P&SLQQMT(x-8AfKhhnbEj z2oYr2toR2kdohAHFXGvQ;KF+H?7^!Z{BiT7;`)8n)tyfFDDlDTSM^@KufD2!RsH1M zn~#G+h&TyyA?&V-LNy<})&YfEAv$X}P@{?{; zT#LF=UU2J+4cYV-eEOt3cy{K*;V7)~kE8ce^LsWFa)&Dy4T>N3`VQzxs0ZH+=Z_Sy{<7nfCu*xlELT&T@^@Uyn=TQ`e;2o{Q15Z(|Xb(4`Quk zcyt}F;r6m?Ss%tcmeKSYdXDtj5`#yQ<`I)XOv<(!IC8$_^uU7s;|GrZ%2p^wZg-07@ET1sI(0vc?z(23(W`kG6kd zvE@dEn#Q=NF_tyPj=nsaZKn}KWOzIT2FgnO4UEr;luoAu8lKbU(E?-beR&Lweekbn zjD4#-;-iT${Q5;zWN8c&BgcX9EXyNK<+p< z1yaLW_-;Wm7GIHXB=jkt>SV zOCx#PvVOLvtrtsJR=zA4s}ARF`i>O$kKnk*Mc)3!vbb!PWln786vlgdBfbXNKn>y4 zdwy#%a3yYPxeYI*pjI(fu!kS#3K%`Z!Y|coXMQhTa=X2z@0F&?m2#ytfhWRi?3}7p z%9G`(J>|(C(|Eg%c%8+dCl33*Xgf_$G`&D|MD!L$mUC9r^!-0owSm}6ZMEq?IN{|u zzE!+Ztdsp{uOyOZPtMG;8!7fqN!Sm67#ElRxym|J=L0(#4iU**y3)D-L%wDV_kg;U zU=S9FAqHRzxPLs#kZ0xEvekU`r&UG&B($dewOsRZj?QvXAR&a^G-!mBlLK z_=FI5-f|T}+}7{wom;9n#cjS(dlA2x=<}YF4^!0Tg8c@%DZ~(>%$GO(v8jh#vM!}6 F%umi!4tW3o literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/basic.macho.i386 b/test/tools/dsymutil/Inputs/basic.macho.i386 new file mode 100755 index 0000000000000000000000000000000000000000..ee6be096fe5a1e060efc9db2dc33dc57d4079f85 GIT binary patch literal 9080 zcmeHNO>7%Q6rLq67;55xatH;QRb{mhfo+&I{UbhJn zk|hI*WeNGh1;K#}m-dhg2RNl_OM0s4g$o>dppsFGlmmyTM1=3#-ElUKxg#`Bc{|^m zdGp?z&toaGIt!~paGXdLR&$SEiJ;(nY6~lTK;A zjLzN~E{_Uv68)F4P?cBBRW0j3iHDRYre>#9FIp3-))3TB-c&78mI|`ET+EaUa?!2y zo$vW#ZaDXYh@nKvobB4m+FL`!^z%h}V(iS|oNT#RESFSY?>eGE433BEz>>QnWu{u~ zT6^n=nLF$Lw-}7)@1IlCr{AznOsl<+#sKOkpGzQ+`yibAkPvmca9nXqvscIkKTGBx zfzHBy9<7T25d_>1dV^ZSis+li;3F=(n*t)vQ0CY4(*Yk%NPIp{`y#a_4Z&ofj%x(?0>}5F=ew4@(k9>dEC=sVU7tX$GAQr)>Y}<`~D?=D$FcbCMc@lZI&4oEouG1w~p%fP+y z*AV}G^fNv&Looh%C^6$J%z8}xK7cm;iRnXszHcF>KXD#sY83~dN@VP+lYJqX?e+v6MLWAtPCDb0$C70syIn%wQmt%~1%WQEMw9|^fw zaW2@{&rZqpbB*@~81D(dZSDu)UiVS|c(TtI23axPE5a*UG6;VnAggXIBQyAA3cgm5 Fe*ib5U4j4r literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/basic2-custom-linetable.macho.x86_64.o b/test/tools/dsymutil/Inputs/basic2-custom-linetable.macho.x86_64.o new file mode 100644 index 0000000000000000000000000000000000000000..07b36a20c1da565e05e356028842f8b686c8d9bc GIT binary patch literal 3144 zcmb7G&2Jk;6rb^WH;(N(P68nXf~^op3$2@mh?caaNlA+XRH{NqbD(T&$4OkAwdIcx zq5>76$W|&foZ!G8&?D-J3n-N;n$fj|`6w*YpZ^eJnc-Av`trww`Gwi^V#t z8_!u+{Z`cD(caY)@=GVnvu3eWP-fs=93DHj6|%~`D=eeP&qej{Sf7oT&81YB8!zZe z;Ojl&C6dWPdO_`9Lw*k43MaK!yo@!Iqap76zU2IVXS`lI9H4MO2>gh4TyG&^&8ATp zH(sG38iSoe+gLl`CGz=f+O!h$1h*^RI*;EgUM_iA6~vw2AYCX>k1fQ{kCIj5>~i6y=ggS`s$mZ>&Bphjk1v{YPu3WZ;pxoGIen5__akE|MECx!G2S8%x`20u z=qiBtXg1_WbqiBKzz!s5C%A%m+InA5h#^R*;sTh6*6A=^zXm`G*aN*2ul^BV{wut4 zih}>7rh4{4eEG-lr8_N=A>sJrD|jH``{=<6em;4RLpPLMIlm!qbHv@{yRxd#xOn8{humtx0bebED@LfLvh+CMPR z|4QG%=z(l@Vg5+XcC7E9NG6JzRDUcbj*MFQa;YfFR=Jp-wjb?`RYJg$$QLpTiBj4m z-S!_9Gr63|SSgVx%nI2mp9{6?S}1Hh6>61j$jLJxw9sh#>EK}CM8Jd6Xgx5z8VH~c zUfbt*^q8PALw+qprqd{x5d&n+)Eguue5Z8b;i%Del(umonZ$2kyPQ6CT7u#jEJQ=9 zwVP|)_r%c|0ZVB#YZbO?^tsc$-bu%)HEY+|=Y7{PdYd)pTcfH$0Z((3oR~vT`lS(N z%gmN1Po8$EW^M9(afJ@LY$sLt$VcSlCnxAjEJMm0e|tDiYoZfNj15slHmNK^Y=TIq z(@j~C?*Dc0#1B)IK-;Zb@oKRWh+M4%E+6{kA*D!@)3Uz;-voaPR;fdl!9hp!g@@h*=wi$(69qu{$}-I z%>(d{gDI9avDD7-y4Il! zLtC2A{c6k`9uH>u1c(6a!e1FB>2!g*&a*rNi6jmWLSpTQUxdV8kW=*klyv?abp2$< z8Q-&{6O3`jD$9>q(mZ6Jhs3QgXg^-nS^1!2;IY%V0S S0?%|x;Ok#|$5}Z_`2PdgEnThv literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/basic2.c b/test/tools/dsymutil/Inputs/basic2.c index 13c6d07c2a08..4524e9bc6cc3 100644 --- a/test/tools/dsymutil/Inputs/basic2.c +++ b/test/tools/dsymutil/Inputs/basic2.c @@ -20,3 +20,9 @@ int foo(int arg) { return bar(arg+val) + inc() + baz++; } +/* This file was also used to create basic2-custom-linetable.macho.x86_64.o + with a custom clang that had different settings for the linetable + encoding constants: line_base == -1 and line_range == 4. + + clang -c -g basic2.c -o basic2-custom-linetable.macho.x86_64.o +*/ diff --git a/test/tools/dsymutil/Inputs/dead-stripped/1.o b/test/tools/dsymutil/Inputs/dead-stripped/1.o new file mode 100644 index 0000000000000000000000000000000000000000..fbdffbc61a77479281e8ea3ca1455cfa196661d0 GIT binary patch literal 3200 zcma)7Uu=_A6hF7^_tCXoR~#q?aE%!e473|e`6JN|HZqrGK{nc64@CArjE_8EqAw;I!;3MP=!-$)3oj;OVxppniBF=j{?7f*?YeG+oqXpzf9|=z zd(QoP=fNNUts+uh!Vl~NM=kz=K~I8jvhyL-W4jFr?gKJ5Fo`+LPLShN(z6v;ckSWc z(cuN#Xa_`LJRbI9kjTS6ADv@i$Z;l;m87v4zo&b+d%^A#)ucWLJdLvuvPc77IyF7% zlq)6e2i~jQhfFp^0}}m_$akdPI*>*11}|3_Ul#9g>DVuFx6I?C@RF%iDLtd>U6kK` zUOdouuAmDm#LMP01@6*V?;5GbFSx_Jn4o#A2R^1{>n$bolj-tOybZj;>gaWUo#Cnt zc*$Zhmv-{WDTZ5;pTYZ6U{;D(NS)9N(pYauHcGqT4K#_eAReAyF*)9d*CBX&rGIgS zc$JgIW$VoeUPkafd?em@VG4Owoay|V+5Cj#4D>9NLUdQ~j)`+!fQXmqcPy?fRYSe$ z<4&f8)o?pF&f@dCOU}bp!J8F4F?r7%H#*DO`N-xqV&nXMEqJG;lV3KSFXY}+a4*WQ z9gYac1~D**8`ptDi04uVAiE$L<{-G9xLu~B-n}CIaNp&hujpLIZnv~0i0z?+S2l9z zx2lz3_0BK9R;yKY_rB-y-M$N7`j3v?&SWyNmbQx;pX0}F6Fm!SJcq**ok^F<*+M?L zBepBn5#3xVP3PZ;ZtvRJvHiI%(apKs%v8M2*SUq_m8oKUBAuMrhRZ8kET$)7{j0Jp5P0-AG^pKe!2Cf5SyOB+X*}>z8G;_BGB1NWW@+hH=N@D5=B2d2563HY{b)K zk;gV2i2PlP%%+Xh*~(or9-K}`!H7dsBfsdkE`V|nQQ4oS7Z~;MHQ-^5x#TS=2~s=`U78_y}vnmV&9R> znIDdRdggL@F8J?_`O~+K%>^rH=hHI}z5)IW{4MxLFwYl{`8=43^J4oR8z7z!wokGF z;yL8C;yd9Mn2GD)e2mZZr>aCCAxW*Xn_tz&L+6t-n1td>7 zlj+O(GMk0sTzNjdWMQvhmuj1++L^w1_nlE1|R{%EI`b_1R@|n5=eD`Bya$8sJH@D4~!2qlL3Um<^k#W_>$C$ z5~yM@Kg2a61i^=~*q{uMc`&_TAr^)Pm>LirAD@z%RGJ>2n3PnMS_a~~ggXYg!Ii+w z;fIQ-A^8a^gdzenFEcMKA0&@%o+wn46qGiF>W%=?(BQ#pUQuFRdTKGIdHPUuIH0sY zif*tViiiqONB|_v%>|c~0J0LG_7?zY9*{H;2Y^)|2$1{mL?J>U7FMW7=hN83jDkS@ tOnMB0jG>@_6$4_3>o^!uT!}D%7sv$aVFTIAk8F8-e3Wroem*eD7yvknD>eWC literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/fat-test.arm.dylib b/test/tools/dsymutil/Inputs/fat-test.arm.dylib new file mode 100755 index 0000000000000000000000000000000000000000..a8d4f37be4dea45ec9e97186b332a5826a9b3880 GIT binary patch literal 25180 zcmeI4&r4N76vxj?v&2dlQbwDnP+;T_qF;j46`tV3=m#jZyO~cRNG*MdL5mmBpHR3g za?vW>1ua^HD;Kp0TDU2MRuQyn(L(xu@4a*1^c1ziNcbK&cjnBSGc)%yt2yu7SzLMb z+RUn;d!V(@KC^~7udNRDH|TS=W#WFQHqV*`3rRnF`{V118=wY_aI)?lW)H2w;A8io zVK~rxbpRD#8WIELPE@JvPYJ`~$V@S=$99Fk=UeWDTD6s&U7kDFGtlE&QPS7>qe=kO z^4!A{V`CHJXo@5ZSNm5dzu`4gasF&z{R}h=&*v}n_vZ5M-Lq&+<|k7 zXdfLawiQRFitR&_W9@~zH*Z`wYcVTzqThys`5MuQYvs62u(?myQ07o_zb-*3{ywnJ zpU5FNah@y9bIl*_pIm<0n)y`oZ0X}+v|&mJXE8ehrQL`JdA24t*C!DH5fA|p5CIVo z0TB=Z5fA|p_zwwWLYzvcPlwY(leVwQO4)dgJZ~0~ibb2tqKgQ}C=k zk3}+{f^rZ#3tcm(f*k@rJPG2u|KC1aHq5a|{9qjcey(I^PkuZ9 zB8Z)tGvI`K=){jxu#M|t{9sj#ql89E5Ixg!Au-pAd3548<~I0q{1Smmax*wY%VzU4 zf_O~61INz7=DwFu=1^``en#Jc{s%7?&viW-o|!M?znm?{J$7MMv!hTy6~pDFSioP) z;_^6}0<+y{c6l6p4~YnffCz|y2#A0Ph=2%)fCz|y2#A0Ph=2%)fCz|y2#A0Ph=2%) zfCz|y2#A0Ph=2%)fCz|y2#A0Ph=2%)fCz|y2#A0Ph=2%)fCz|y2#A0Ph=2%)fCy}B z0?|DMr%uH8DdHphlK-`Ne2>4oOOfyC->pbHx?d4rsPleB+WsAjv^ko)XOTAV|BLQ0 X2x#H^^Ym`TP2RVdES1+7BvtzfTBz?% literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/fat-test.arm.o b/test/tools/dsymutil/Inputs/fat-test.arm.o new file mode 100644 index 0000000000000000000000000000000000000000..1cf16803e892f1f0492cff72b12b3bcb7c388f34 GIT binary patch literal 50736 zcmeI5&u<$=6vtY4@ldMyQxB*wb|XI z1#$4PXT-69#0do45eF1bNF3mwXeBPaaiN5i@0*=<9DD6-QdRWuebW4%`R>eTXMKCm zzwP|=vr=jZsem-1)I5sp9b`o$>B}qCLXrIxS<#C6P~AexRiqDzMgk;20wh2JBtQZr zKmsH{0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{ z0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{0wh2JB=7_Tet-0E$2k5g`1Y$|q>4ooRt;d-ITNJ-G^vQrqo#TRvYrX zZjC1T=8(n(KI~dHVvlOiK77Z!=?1+T-{_S}T{jx0DMg2d(`?pV+jG`MqGunX@itN# zjmFA{8fy1h?qmHZ(!I3%XgalSjU%{IqOp}iBmB6Tc$P_Mya$c@12v*MR$ud6Ozq(j zK;sPh&)|J@S)1t?29D~%Xp#)Pf^;2CELU}uCWwZ5U(f5!WZproR>wssZMuH2*6>Qx z<+*aDG#>gJ-mTKP>h$dF%!FEX!l}>=!g5WWUG|zAVW1p;eRI~{bbPhuh01!-8Y>>l z4i%5)P9lg$rmBXD%g5d=oX@|N&peNyAfxDbC!d$^x{4GXdRT9_piY}Dmt|xInZyFI zte3O;YZ{tID?6-~E-x&}f;b5av8bKX_4BWNGA`ech)Dk_-K+Mjm9Fl!k71qqY37Lj zahV8{jan*)GK=56=+~>#x|^4?&g-%0P;wVxZy;sV>&PwZpW+M1t>_dG>0m8Wqu&G+ z1v4X~uc5umUr%C20wh2JBtQZrKmsH{0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{0wh2J zBtQZrKmsH{0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{0$l`R|9|m* z6<<~HQ%&Rl{}m>S@4b}&m;e6-?Da|f|7E?{zuIFts++v+-yO#g=;!}08sfX0Mx&qq z|3_Fa{{IukSN$;l|KDQ0m}4}emd-oQGT8rLG{pZujYdEJ{|8t`CJ$pipc zz==}^09gO-kKr;ST;+TRdIc{{I#6nJ!6>QYVlSeC+j@WMAu* z@NG7J)^Yo=F#%%XSTAm)Y<1M}U+t0ab`biyyZb-nPi#SMt{jGu67y?!wSnr0*7zc03B_V*Wj*Uc+R+@FuXk#_?*|Kin! zOBbWX$iHvbqRRHZ=N%kdAl8fbmsYP|oPj^h{zgqXH^4_rw;oT~ot4`0(&P=FPC!7l1AS~C^*=4V}5eCZf z*JrAS3QHhyGop)qL~pmCPMf`!Wn>0f70I$rjI^VC49%k_WoUxiKm7ffe4`>H4>*M+ zwsd)6N!t3MC=1dFi~4fvG3=moWUagtG1;u8awxNi$s%tFp$J-c^KzJZB$+F9b6*b< gnn1@Z!TO@?xhO)$q>7Yjf9m$`d#Q)(%?if<103qn=Kufz literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/fat-test.c b/test/tools/dsymutil/Inputs/fat-test.c new file mode 100644 index 000000000000..f6c4c76fe2f4 --- /dev/null +++ b/test/tools/dsymutil/Inputs/fat-test.c @@ -0,0 +1,28 @@ +/* Compile with: + clang -c -g -arch x86_64h -arch x86_64 -arch i386 fat-test.c + libtool -static -o libfat-test.a fat-test.o + + to generate a dylib instead: + clang -arch ... -arch ... -arch ... -dynamiclib fat-test.o -o fat-test.dylib + + To reduce the size of the fat binary: + lipo -thin i386 -o fat-test.i386.o fat-test.o + lipo -thin x86_64 -o fat-test.x86_64.o fat-test.o + lipo -thin x86_64h -o fat-test.x86_64h.o fat-test.o + lipo -create -arch x86_64h fat-test.x86_64h.o -arch x86_64 fat-test.x86_64.o -arch i386 fat-test.i386.o -o fat-test.o -segalign i386 8 -segalign x86_64 8 -segalign x86_64h 8 + */ +#ifdef __x86_64h__ +int x86_64h_var; +#elif defined(__x86_64__) +int x86_64_var; +#elif defined(__i386__) +int i386_var; +#elif defined(__ARM_ARCH_7S__) +int armv7s_var; +#elif defined(__ARM_ARCH_7A__) +int armv7_var; +#elif defined(__ARM64_ARCH_8__) +int arm64_var; +#else +#error "Unknown architecture" +#endif diff --git a/test/tools/dsymutil/Inputs/fat-test.dylib b/test/tools/dsymutil/Inputs/fat-test.dylib new file mode 100755 index 0000000000000000000000000000000000000000..4def340a9507e4e2a9587a555a17db8bcf4c5f81 GIT binary patch literal 13012 zcmeHNO-NKx6h4zyQi&E4Mw>w*G|&;W$%ulfF_NN;Mi3${&W|PNOw7Cqhg=AeZEIJX zTDOcAQA8Uz6;UmtUAs0Rw2Y9x@6Ee!t~%jh>FOLfbMHOxzI)I8?#$wR^Sxhw^+rUR zA|gB4!q17ce-HXWtg92T*Ur2ak-h!SJo)tN$1)~IxWEp0EBx&?k-O54p2z|CL%1=i z;qeqBtE@V$77WHrU^^W7`DToloArVLnl7{(8-J0$RWmnq9{Rm=`Vr=esa+@%(Ud#9|f=X4t%fmKR22UhZ1F zhi*77a7BD_!t3>NC9f}AnVw3EbcxLOW6rvU{kRr$XB)+sd)`Moj&?VE6i(y4*}LO+ z!p)1%8?l3!#C9KoL*`6ahs*5l{pa z0U!`DvqNzcA2hcniqh63^U)Qsum;OI5!Q+Bct6IIU?E<9em2Hwth=J@1jgDi>lBbj zF>d3cL0y(W7mvdkQHhP@Z;BfJJcs|Y(+@>J5l{pa0YyL&Py`eKML-cy z1hyUl%X8fDiH`QwL`OqBM@!>8M;s?~d5$v}{|nDiO*r$H%Ik!xw~Do?{M& zqn?8#%N%@-_5v}>$FSY7c4PH_?T%)nws#%g!Fg{Z`V0sJeB&Kj3kf9z*r%qTt|o(f z!zq&43JpdF_FHnL_->k!63&frt51k ztp1_{XW@NV{RPjcrU)nkihv@Z2q*%IfFhs>C<2OrBCrVvM9kCx1RYd!v^pdQ>o}|1 na5WsRe#6ynux7TpjREv6mvIt(JoC%z#eE=5oL(B9NgF@CoL)3PW zcVq;4D8|S(IYkUT* z%2MHoS2By%2o>+1usbQr9*=n0{D>poyugvgO^kymXz@ZN9> zmf`C}$Nq4beBKOgVH@$QX+NSX@T<{$P>qeH&2G~unQ?wq`siTB1_sU!#wLwoDU;8| zQt?DQ5!*77#p&-(CD4fPOvQI>wVQ4~nk!r}OKfWQE`3*WLZ3_*Ste(){xg03Vk#>G z%Z$nU__p}X8-uiNS}yf(fRfm4sjb@ngfu#a@K+HSghHyZAj$yqTYJk`qI@e%A*UeI zSgMO#Q){YLz>qr_Ep{-4dc#jQuG;eHJ*fxKTI1^7y&&Q{#BH{0}G8{3`JzKh^h-+=cxB^tJ~Kw|CA5*ps~XfZ@g+-Y{(W81t!oK^Vjp(vMl}iwLGIrnNbu0~^^1 zjP@G2T-R-WIPk(=fsyin;l>3zFo*{(2#mQJYjMl}PlX?kYxqHHb$XM0gAL5t0?G(V z`uqNGzDX#h@J;d4G?+tV4OV%RB+#vCZ2{}1`tTo@=FRt+9y~}Id5m;?+SyABZl=cw tMVR)2;!ei`;_Wdab}q7ou_vw8Zn8&o7XkF-o@mo~rYe-bRrE^C*e@I33YGu> literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/frame-dw2.ll b/test/tools/dsymutil/Inputs/frame-dw2.ll index 7ffc93397315..d07c529d3108 100644 --- a/test/tools/dsymutil/Inputs/frame-dw2.ll +++ b/test/tools/dsymutil/Inputs/frame-dw2.ll @@ -4,7 +4,7 @@ target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" target triple = "i386-apple-macosx10.11.0" ; Function Attrs: nounwind ssp -define i32 @bar(i32 %b) #0 { +define i32 @bar(i32 %b) #0 !dbg !4 { entry: %b.addr = alloca i32, align 4 %var = alloca i32, align 4 @@ -24,7 +24,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 declare i32 @foo(i32*) #2 ; Function Attrs: nounwind ssp -define i32 @baz(i32 %b) #0 { +define i32 @baz(i32 %b) #0 !dbg !8 { entry: %b.addr = alloca i32, align 4 store i32 %b, i32* %b.addr, align 4 @@ -42,29 +42,29 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n !llvm.module.flags = !{!9, !10, !11} !llvm.ident = !{!12} -!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2) +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2) !1 = !DIFile(filename: "frame.c", directory: "/tmp") !2 = !{} !3 = !{!4, !8} -!4 = !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @bar, variables: !2) +!4 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2) !5 = !DISubroutineType(types: !6) !6 = !{!7, !7} !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) -!8 = !DISubprogram(name: "baz", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @baz, variables: !2) +!8 = distinct !DISubprogram(name: "baz", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, variables: !2) !9 = !{i32 2, !"Dwarf Version", i32 2} !10 = !{i32 2, !"Debug Info Version", i32 3} !11 = !{i32 1, !"PIC Level", i32 2} !12 = !{!"clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)"} -!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 1, scope: !4, file: !1, line: 3, type: !7) +!13 = !DILocalVariable(name: "b", arg: 1, scope: !4, file: !1, line: 3, type: !7) !14 = !DIExpression() !15 = !DILocation(line: 3, column: 13, scope: !4) -!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "var", scope: !4, file: !1, line: 4, type: !7) +!16 = !DILocalVariable(name: "var", scope: !4, file: !1, line: 4, type: !7) !17 = !DILocation(line: 4, column: 6, scope: !4) !18 = !DILocation(line: 4, column: 12, scope: !4) !19 = !DILocation(line: 4, column: 14, scope: !4) !20 = !DILocation(line: 5, column: 9, scope: !4) !21 = !DILocation(line: 5, column: 2, scope: !4) -!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 1, scope: !8, file: !1, line: 8, type: !7) +!22 = !DILocalVariable(name: "b", arg: 1, scope: !8, file: !1, line: 8, type: !7) !23 = !DILocation(line: 8, column: 13, scope: !8) !24 = !DILocation(line: 9, column: 13, scope: !8) !25 = !DILocation(line: 9, column: 9, scope: !8) diff --git a/test/tools/dsymutil/Inputs/frame-dw4.ll b/test/tools/dsymutil/Inputs/frame-dw4.ll index c8674b13e585..f3df896a3bb5 100644 --- a/test/tools/dsymutil/Inputs/frame-dw4.ll +++ b/test/tools/dsymutil/Inputs/frame-dw4.ll @@ -4,7 +4,7 @@ target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" target triple = "i386-apple-macosx10.11.0" ; Function Attrs: nounwind ssp -define i32 @bar(i32 %b) #0 { +define i32 @bar(i32 %b) #0 !dbg !4 { entry: %b.addr = alloca i32, align 4 %var = alloca i32, align 4 @@ -24,7 +24,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 declare i32 @foo(i32*) #2 ; Function Attrs: nounwind ssp -define i32 @baz(i32 %b) #0 { +define i32 @baz(i32 %b) #0 !dbg !8 { entry: %b.addr = alloca i32, align 4 store i32 %b, i32* %b.addr, align 4 @@ -42,29 +42,29 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n !llvm.module.flags = !{!9, !10, !11} !llvm.ident = !{!12} -!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2) +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2) !1 = !DIFile(filename: "frame.c", directory: "/tmp") !2 = !{} !3 = !{!4, !8} -!4 = !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @bar, variables: !2) +!4 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2) !5 = !DISubroutineType(types: !6) !6 = !{!7, !7} !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) -!8 = !DISubprogram(name: "baz", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @baz, variables: !2) +!8 = distinct !DISubprogram(name: "baz", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, variables: !2) !9 = !{i32 2, !"Dwarf Version", i32 4} !10 = !{i32 2, !"Debug Info Version", i32 3} !11 = !{i32 1, !"PIC Level", i32 2} !12 = !{!"clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)"} -!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 1, scope: !4, file: !1, line: 3, type: !7) +!13 = !DILocalVariable(name: "b", arg: 1, scope: !4, file: !1, line: 3, type: !7) !14 = !DIExpression() !15 = !DILocation(line: 3, column: 13, scope: !4) -!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "var", scope: !4, file: !1, line: 4, type: !7) +!16 = !DILocalVariable(name: "var", scope: !4, file: !1, line: 4, type: !7) !17 = !DILocation(line: 4, column: 6, scope: !4) !18 = !DILocation(line: 4, column: 12, scope: !4) !19 = !DILocation(line: 4, column: 14, scope: !4) !20 = !DILocation(line: 5, column: 9, scope: !4) !21 = !DILocation(line: 5, column: 2, scope: !4) -!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 1, scope: !8, file: !1, line: 8, type: !7) +!22 = !DILocalVariable(name: "b", arg: 1, scope: !8, file: !1, line: 8, type: !7) !23 = !DILocation(line: 8, column: 13, scope: !8) !24 = !DILocation(line: 9, column: 13, scope: !8) !25 = !DILocation(line: 9, column: 9, scope: !8) diff --git a/test/tools/dsymutil/Inputs/inlined-low_pc/1.o b/test/tools/dsymutil/Inputs/inlined-low_pc/1.o new file mode 100644 index 0000000000000000000000000000000000000000..7ab4e9205bbda61e7873fad3ad8bba40a86cf5e6 GIT binary patch literal 1960 zcmb7F%}*0i5Pz?`TPj;CrJDE=<3=FB!O|)UYD6)JfQdmRS}v~JvXJ7k+iVwti3uDy zVB*=6@nAf7GVx|OdG_MXyMKTuJ!zuOyf?3fg=m>%-fw1h=C`x&Yd`<|_B+iOXCOG} zpamQ(P>^GwA2ea^IebsZ@HEzdD8CJCR1$X#?gR&HhsHU}IS;Zdk7dzI84?oG(~?QMl|sw$2WoA1)5{ zTYa9lvNWJ#7YfCT{jBQa1kB|tY#>^yvH2i?P{gXN?1ao|w4e?PXR7zcBW3&}g=cwB zCTD1wj^v5V*R^z(cfw~U@?6VYGB2CG>B$^KEx5WggUQ%r4z9*-a7IiM;zk}gI{CHs zX2sPs)WBIYSR&c1X*9{v5cz{#Gbt)0t5AR#9V_Dk(3`!bPrKh?ec16Y~?u|}hAJ`=- z?iLh`ak^8`-g>lYKC5qV23pW#Qrp9 zz~4;%e+uiH<{-pzt}qv;fNqWvFcZvzk@H`fgD`T;0Oi|uBSaWG&^Q!FjIQ>q(RnMO pLq2;h^OGBdvxIjDdkIewo*~3F$>7UK&^)xwXEyG74-P_s{RM!ev_Sv> literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/libfat-test.a b/test/tools/dsymutil/Inputs/libfat-test.a new file mode 100644 index 0000000000000000000000000000000000000000..6b34b0fd6f90004bcc4cda6295704ad477562ddb GIT binary patch literal 5136 zcmdT{&u<$=6rS~uxV0Uts7Qc7XsZOJP;l3dDeA9S7{f_S)c&|0rH?P}$r{}6|d-0`(E9d<~-1jA= z3BgPF2W!0cM#EJH@idN}^Pciow^4`U)z{a1%`N)>Py009(+VIiUaND>-Mt{*TMFl_ zs=rnC{gIaY8f9Ov-nriF58C&c!qL^ABmJ#+yX~gkslQ3M0e>}~wl6N;#`^2-h6Mfn zRT(y|>eKKuAc6hx+1IT%f_Rha9BaH+V&XYByP^J;6y618)UAoQrBqXK$%<3Uca@`F=val zqGcX;dYhd$%yOlCx@tWsyfD-dCoguon@(SB*Q$24VsF)ZqSbMP@qqC};b=NpI5P2Q z0T-(xl7)*$=ksTB&*l>FCpitpxm=FM*HGk6A9C%@iz#JXkv&yBDkz4LIqZe8M@_47nHdYAo*q8mcc}WRG{o^PU|6I0%bN_QM^P9o{u;fQCviV;BI2F zbdC*hEHTJ8aWJBI<__lbihU5*ju39i;7k$Yn-~~Td~yO97l3hRD8_IO=>dcM@zfCR zpq*j7v8ML0jhdluJb0ru3*tBkYcPt+bCklO(4{F>5RWx1Gz{Z#+PzH4d9-3O#zVi% z&GQP46JyYPI!dRw#gG}4xQyX(MxG!jk~3^?{r|Ul4-z3}ABFayE}L|dMzfocY)geg zihn`Di$}qO*9w080SX@VU-00^MbMicpw8^h+q9b$QwQeFd-G;~J3H@f^7iMK-`xNr z1UR6EQQy&^p`r|;F3@@h*-*TVuN))S$OO_p&eL_rTy=PXM%U-gFU&R9az-_f5O3RK z!?5md@Sy9J+?~8$vW%ixvI`YmKbM`$HX|e_%C2wQV6HRXjHY@P&D?TPuRAr@jlB8n z^$UJT56^D1{n5(fPIv*5SFYq;N03)!jx_TTIrD8>o{`Jd%oVD4dwr63i_<&hQST@v z$a~G42HR&`-#Q*D7uL4Enqe2sdMod&JP_uMcJZWX0z9KyEt|S+ScGe-Px2;r$)j6C zDZ%$`aD9iFSJ_Q{RU;qdJr3|5bjWk=RolMr8|EG7`d0gSYmzbAC4#~qEc+_X_1MGL>C_Rb#h~qcW(kB)}y^6Y3 zs6MW7tGlE5D;bq9<4x68%v!xvv9;0kczRe%Ikjc`t~PRVd}Qp@kd`W!SFDWh8$B^H z1e0}Vv1Fft3|OVQWjOgINJA!_k*jnyZ$SY>LL5;CA|W-t|A6|T5le`@*kI)#gw*vl z_5PZQ-HyWP*hDNjfG?T`3i#9*oKA*R@=oFJZX^Ls#{0(L)0ytg8BBz0sMH&PsN04( zjJYd-DYX8v(VmjU;SwtCgoHST#!JNs1&H~ZI4TMp^>?D~ryd7Ny^KofAD=v*Km%7$ zXK!R@-Bg^D?L{T=eBQl`y-DcyD)fgX#jA=mz27I?-N4^$ik}t&1f=d1i5~_J23g2}iEAMY4x}rCBQC3QMZPr8d@JGEOUR$`n(Wod`X>d*8j^`~C6x{dOZG?WS30}U7gCEy#8d9xX_CW`V`}Z z$M&<^>Ek`Ej3=Jy=@(bY&#dEx1_lldJxk+j=@aAGl*jLPJmMWal0M#_DD-_s8Sh}f za*_JTO5Y&}dFXKFc<0mBd*+_wg~rFjL&4F|K^iWDJ~7@Fg`WEzkF@m$Av^ zZ%4|`By#^@SnjdQef_fkj-e+{?smxe&%bB9(qOzcXS}d#Iv+K@=`)^dFkP{m-j3_M zeJj@1n5{QvITo{eb+%*k7XOrWM<{3Wd2i`lqI5xDek+mt^iji}R=KMc0~)&T7;r(u zKKv`=n^EI;qsG_Gpv@F_8ZYwk<3%O=JU0J|#XoQLPFa;PS2p^mEbjJ}l39J(ZGHLO zNbz}n>6ygV>yhHKiLx^^QN;f3#q0XAMQ_RVNa?M_)>Hb@>j}r@#7RfL>>HQ6F3P?- ztV`~hH1xH~J>&A8rPaI{v+>Pw56ASMrz*N?>ET)JcHk1*mFnTnvN7&6&8(VU zjT$fej8|7pugn?I|NHUKgWybm%r?BTvAxI&=G)dSoY-PHFl9UDvGlH3TS8X1Q&;|h z9s*7AC)~fz?~r%(6D3_v5N7CGvgcoFFkXO?M2(o`f}NP~YQOQS&o~1qfZ;pltync? z+2OGrgTi3^n8h7#DL#=XJ`*X6dn@khi_d`nk+Sn%N8DTTGjDM;u{G)~KCOoca({Z( z(2Gf1Wgq69miJD|ek|YQGJ)-w^ePzc#Cqp<496^cNiA*udE1Vajjb^YCf!>^vooGI z&fM}AgXgF9<<}!+cN3*&BgMG0m?#VPpY6t48c1)0lXA~dL-&%oCN7z7IE`0)rq>%x zw61K4Tdb3g?LqD;Zq!2v9`}!4#X_OA)9;fM`hQ&;Er5&!?4|ft;$&WlVehmYjLZH_hCV3y9SEH`yk}{$`P#Dan$vWl!FZ$J zcnP-SG@XNj`HbhG;&T_}Oz zx}aJPL(eAJk0qa#{jgR+K<`nxr(e#0q1kk8)pSiVod=(N#v34Dl2{EU6n-jf+W~83 zOm2aFd2Bnd@KDa5|GgJtExGG0pCy?>nhq=$(mao^ky3B~Y7ap}tj@)Lk{4NM2MjZ2 z?VqyxL$H0@F-R>0OCf2(f`s^v&pgjm;%)?{3bk>ZhShkBXUS|&e(n2)Uc2m@Cd_%N zRo>HT01y334M#YmZum^`24f6zgpKqYUnQ=Zz`b8VNr?WKt)HP8xaSW4H^;KR;%>xo zg4nh7mL7i{=YbX8;sup0U{d4y)*>*$a!h9%grzfv^c7hk5n2?6+8c5juVLO*(^biI z!D+n2U`ip$Zn^+7iBE0F3BbVZvOmfMk!RSm$q=|>=*#0vuzzE!crLMZE>Z@2nejTH zw{!aP^NG?~h%&mc3@f(m4Z&tN?$B){69@tKh=J~=As8m4Hckr%6nuBp#8LNZyvTKY z_qg0MYv>UWy=PMPK^b6K?>#z`D2;l{89F6xWGkkgOXTL~A6T(I1~u6}$0!P9rRc*gkFq+FxQ2^qScik9+(%9K)e|JyAoS!vMhv%{Ra1 zDVfoi!AJp&P@W_`P@9GPeI837WH}Zh@3UjYLO36T?lt~87bBko9;!rK=&XawfL27Y zG$6L2r%n#G60YKAz$ph{1z2*|W49w%axs*1md$v|TfWHcMPD4g_`xKc2ti7Jk!3Js z+c9MUjzRySv2;I_<9>hx?`u)xU%*l%)0<}FcNa`khA9wxiFyU-bLBpYu@t6#XU#mK6n;%8)i*#ZBRn9q5G)3s~2o> znqGtIGMAx1(5i``o*Z{ znFWzcjX1({r}hj(;n`ewWdEF@H;?=pc{h9=H-NCcy=KQFTY4coECD1rWGtjdO<|$V z6kjzZ<(>>JF7=b8Uu52ag&8XwV3ibLge-$AR>T9(%v)OkFR&EIqR83}mlV*I!O)=R zrz2(8$va%84hRKkii$K79+3koUsqN$z0Xvb>^NPLa z?mR2^nN1&CwN>MJp9y~d#-#Cr8P~=)<5Qc{2pC?m z_Uo)WKtl|=eRKCMZ}};2>2-baZSFF>rKbpQOM$nb=X?a1b9&E3!>&|}CkKWNz3^Pq z@LBdxrY#&UBaX;~z&7Mfn|d&28Hm~Z9$1jgJ^VxM+P%fg3Am4PSS1i(8g7NSQ5K#( zqn+K*4WsUrgZ**0Vw8B03*IHu8 zWtS|G2QR*>FFE6NfU1SW$y+aJ#Q~g|GxS|#fl7mj-nSsElqq1*S4sMcrhGp10FwHRUS}7t| z-Yp`flsR`anBJDiE+7=dgm3sLo+-+=mqnO#!%hwk=mf+#hp6fDab=>xc?4$Becn$8VRyN`q?)y+E=ccmiNI5+IB2o{Se2fL0450cDS|Bz9 zkVfVFKW|jl`zqo?C!sNP?8dSY-iKmxWH<4tKNizz(5ByG+c#y~7i!G;8->Yi)y&w# zr#35V;i5>l)0DBaXFGt z?4C4q=P~EVnS6&_51di{ceV=-`;4zirpr9;b&^t@6W*3Wsa1gd)bA9MrYt*PKJ!*^ z29EF8yww{6g*P9a6xkE70;WXqFxm1}U}al)TT>CvsBk-6Zek`jhmnOVaa#Fif|IoGao$E=*F} zbljydMBW30N4x|{t<5f&KU8S<()8KnfD|>AhA;kyNkv=WWMN!~5afVbq`mVLt@VX+ zj^`>82ZvXUh?>sDV{L+LJK#(9El^^T|7V|2OcIbADM9`MrbQDccN8SUCpZzVlb8s> zm=gL{A8tUpWJFDXPXyohYJ4SzxJcgBFS5uy_zu~J@NdT_AtNN4cOfQk`Aj1B-2y%) z0aS=+ZVla>cmRVS=ehwV`zS6t2Q_d~5(*nBd~6$$%}Ns&kyROZaUg%!r<9SIDxRk3 zqIgkX0{0r8{+CG$21f*_1hmlYDH|fF_96>`Vf&ZG5|lt1QROPJ3-ex%udIl*B<(s* z)|B7->9iRkWgbYNnnB5zz7!1n;X}#-SFBAQir*-QjafQ?AyBIRucl3{is9d0HC9~%<4(WdhPfJPpy$)(CULTvz=5U75Vw^|D}l>mBGMvYEc3_+)n;J}ND||*XOwJ+oI68oUL^=1LPmU! z+SiXLVIieOJh2 zmOVn0eHb5QE)<;_C@1ZimGl3$Oqs}AjzZsT0#O0QfrP6R6cvY;hsBJrci7{B6>IN2 z%aUTY1JLRf3pv~=^7!trARXq0iQ)$E3S}z<3>Q%15X9WzfVOM|!!N`=Zwfvjz#Er$ z=gD1&KzK!HN&0&s+0*)p<-}H$8W1%tz_=#I1xYQsjpIDb)=|{$4!kW0P*N&x#dgy* zIB`U(@u~O3jGGL5=j0$b10;;feK2Z7xl0WnX(z>uvj`X>Wnd0!hT&63#nfOs4EaFF zhAaswUP~zFKMKzG8&S7JTy#Dj`)4to(|`&m5+Cx9D%8!tt(3xGIPe;`NQcYMz=Gg& zagjJ#DLtUT8jLZIQY5Xnm66&5Q3C{bhA+VJvASjc7tI9sve>q@V&{$6iZ?DsL8T< z_6>wDBFuzVL&@p7$T~sU6P5Vfdp{Jy)kJ0LspGmT!LI3#iAmz807?^^C=lu4aVl==SRU_d_ejtX5Sq}ir zp-~OByNt8s3MuaZkb-rHnQ>A0*vq{5BMNKC7X3~+e|HB`PLvbx^2{zlX*HEN045Z` zmxkjLuUN))wn1zSOj-9a<01@)`6IFl$n zODcrgPB9D|dqClf?}~*hDIstM<(iPK1=*s`HXJ(cwNXI>XoiSrj@DTGb!Cl6$%jcl z_bmBdYF5^DiWC44Q9(6Zwjx*)lFEN7Qx=8T5>><%EBq$X8Omjftk^I@`~!c1oD{_w zDB;P}q{uN*7#PS``s}ES9f?Ww* zhvR=I7S4+R*xf*7UL*1TuwXlnRgj>agGGQ9KH6G(4T69O{_R^r1Q2%sW&*Db=`Z=- z7CZrPCASNabdVB4+=<*@mMc6VETx@0L@$6-umy!>+7w8Y=o@o4DpH8DGz}pWc(Q7G z3tH!+d>`5fdoTRS5A@rl#1hQb)TV8gTND#e zh(uyVAu(KPZhLhaVW@|LGc72Ifb*z60eMhS|AZo9QI~+x5dx#I!%{FpytwZ1Zz$HM zO@cVkDW&hFxfj!j(WC$rGy(^>1jDl(=M?A@yA0NFNU2i^+HT6}v_bh1J^==KZ4IOr zzWKKbn2Y4dNf1QAe7{k+O7H~ehfS8fF!Fe9B z^SlokJ!u8)^;mmDjYkR8?2g@P8N9%e)Uo z?@QAk3Qgeo0=x|0W`g(mfaj-_Qk91-T4LG(&cG06ykJX` zq~8jXkV!F~P|*YV9jm0N{PKLxzfr7)69yXW0v=Hh0b(z*Z6F1@`{nIPH%&>_sYK~@ zDpsokhQ)WZ6k?OB7lgrMGAvDn4S}BssNo93i!(Ss04KsMI<{F$8?Q%U!|sk!GtBS! zk^oz#Mf&n-ZwX2Vf3;0f*AVP?BVunKN(U=8ci^Ym8Hp!mI*0;$LB#Ui_`Fv;BSK}T z>_gxR0YP5mGA1viaex9RWWg*KfJ0|4KD=~W%Y*|=X%R>yliy>=g^R4ip~kD9(x@?_ zwQ*CS{kIy`MOFx)piD6TsV{4nj7$@|GsW1VB4?^EU+@Ak=tx6&?tMXnFkTy&??ZV9 zn@vR)$fX}!EtB(U0~Vh_v4U-YqSTL=9~y{o?ShDF!bv zG};;f_gE1OKC5eK`3G9QA?yd(-b$3lBUEpsLQteMNq>0h13^4TfQ({-6Jou z-JjMlFr#jab4pj@MjTxztp1&bt1Kac^_`XBTUjN@6=-w9^9I|S5@gMmZZlYOjQ1I=U(%Vr)=vEjvPK1n%F<=4fuXdCkD5+jgB9g zIK1`X*x->cPD$px^vbhGA2@XJfoC0G`_lOFYd2;bNUmbQU+M0WW%%y&Cuj`x}uYyMD8MUBtd=GEa0+wQDaY zXBwZp)%e*HIiEY7^Q1JFHxgaR`P9<<)+OC1XFPRtIoslm^)tHq6UVo`-}vNg<72m{ z3g_@2DAm7_`)lVzANnrOZ@bl4zc9c4(~{I+X6s>7pv%cu7Ib=P&a89(pk&>cBz|)!g&uf`2g=jLjANqW__fR&#II3w{$V_(em(=yJh7E*JdVTrk>D z@F8xF7mV5I&e(G9=xXjSq6I%*&Hc$-!OC*MPgipfFXtXz&K)<;>dfoF!ck|gUTPo$ z&gHEu+VHdI(q*)@0&O}l=^=a$WV_@V)VJm}yt{Dfv44&hJ;GY_r}7$}kY17o7uH41 zlj|RKwX_7@ctU#Ag+cPn_PHr(P#=A;;Uro%Y-x?6@1w54h3NBkj~*B6EC=<;kscEx zMT;KyMAkis$zWvYHt?sdx+EzDNm6Kh5Vry2<3hsBi{r}7QG3zkDP`u<&p+H%ZEQH% z&K8z8ZYOGK*2&X(4-!rE@S_{HL@QnTulKTv8Zf)Asa33b;e{fiN0&?V;9|p;1^eSS z7fx-gnmo&=dJ#?O&B*ehb8@6FCil){VC8nb7I`4_5 zR^M|uCEuJh7kN)Cz`#ilq=uXv$*z;Oab+tX8QmW~GB|Yj_tEqg-j9zQ93MM0Vc(om z)Xf-L`pfmwGCjX-;K)dL0*~Q=w|WfH!0^1%864(WU_Da>Q z{q&27toIIvo*6rY7WDwU_rZ}-)>4--&NxA3c<9-oFdlSfG(S6Z=?-`wu$ZEn&mL}_a5?C9FPvsp8b(&TB}wY$yVtRXNpV6&&I zS;NxQQ#7B3N~Ni*c~`KpD`2ltEx_mZY~Sg@6HU!6Z5_=){4S=$lSYrSIO2A7g{z{{ z-Z*v;0vZ_z57`?@?Dqc?A#{3oG`i9_+=1zX{@opcww=wIrZ#Wqk%{oo1avPnF>z>Q z;K;;KFz9fM42-e+#l^`riLQ-f;qcJ@38;w!L>@lefgfKKul4-?Y@IcOde~6*XU9ec zi!+D_u|_owZKH?BF?)!3q#`ZY)ZEp!y~E>eZ%*CD&xfD|0Z&ID*x1q)@V9kr@6xQE zSQYGO?g{$*oxWy&psl$}qcGc{h5kUGO>?u;-_-06wzmZW?GS56Q=6xQ26a`b^_O?@ zwrx-KsIGFUm7e$THThCKs;Vp1O77hgXb$pjLGU;0yh#=WGbu{7wfo%Fnl;c34JW{= zR*ygEZ42PG8aBGv{hgj(w#M4jn8B`Ae;~*ks$8{hbyFKS&RblS)wR`iRqkqab9-mU zcBQ@E<*KTxbvditbv4zswKa9l+IH0(tmi$ttNZt;sLeVxC?mo+arcxw1DfiKXcgAC0vnr>k_UWUUOE5_#4Ee4UsT_LsG> ztF;q4sdYWdu!7?m`pxO0UcoimxKgdL)E;8O8f|E*bk%_#%{p?W zCsd43I<)Kz45Gv4@SxhorW!|B<4Bgq#$ea(mX`IozAARkZ?*nJKI9X=@6!;%T>9DU%Xjld+;l zcFw9=v#?gQW}O;jq5b1UN*%IKtUQgzEb}UjS*PVnt6J_F(q#Rzi&8XYk)vqJB1F-Y zMS`p;E47LladNd9&zrtm z^zWqDA7)uuQSGdB)z?+KhsvvKT2#!UuC1Of(OsJcuyG(d*mc??P0fvMJ3Z~eCe~V? z*4pgrYJ=Bc6Ia%|ob|4H=TLcVwt3q+5RA74_E6unQHU($1O&ucAYIjXF9?>kC z9$8b4Y&<5ZN_9rWr%?;CQXLdhb4rq~3YW`?n9EsR@2YWDRaR9Fm214HpkNJ&isrQ> zO6@9%)~Iu6-lmqQQ!O^ptccW^!5EGyl@+!1weEV5UR~#QRaMrIY%>Q+ldV%DTsDuahi>*`#H({aG3rV1uo zUsI`$8`zU;N}G?%#jR$OGM`%Bn*4{grvzSE;jF86VSTlz{MOV!_Ei882)#ZlDQk#5 zN$Og1XB||6*8s87sFHkjy~Y_OJ1DYuSJkPZAlb~Ut**YdsuH}cs;PzeYuztdk~aVxm}g8nChy^N?>7CjnnNS98Q~9#L-z19eL~a=B_}{ z(~g{aXEO`)HP#2$TPIwe59Xh5%lb9}jHr~W$OP>r)YfgroTpa^sPYuvX zOdHNf4R|9DBg5%aqfs?=yIKv&zb6Hn40X~($~$3l7J3X1XXFG-s4Vgr9?nP!SR)HO zhKDmU0@ldlj^W{qgkUXw;PfaFT#ee*u+=Cq>{2@!dL*So$%UpXlhQ4@)QpvKb>d6S zSSfGhOU+m*Z{$nOSScD+5>lzYLJ(h)y9St|YUFF;ZRwR5dN8HK$pvebS?MH}o3UE1 z&TO?Ai{))>u^DUSZEUR>OGTS%sk>FnQK;eTsdm*@*Sl+7^)>1)d{Mkd)-n%`kUp)k6t& z&H_|o2y|Hsu2fx^T(f|dstXft8gwOB1*=w;m5wv&E_^xZ_N(rZWN^B>;e>Ex$>QQt zOi59aWN*D&S^RdPOrgePFHr2=1k;07@yZ*Gw;FJ zoSn-6BXxwLT~?BZNyy3uLMo(JKOmT%5z=tlF(i8Mz(y0-q*#3WLcCy2on605B z$?t5CttQ2liP*`h)`~2ZC4<^E@j5vM<7+m!PWE7I&4Sj{AqA1?$S63E9-ODyuv!@+ z$<_=ENp)BYskKxj*_uucQ%a>Trza~;r{?3l%|wq>kF`)b)r0dk6FpKrIBzr1gF2*e zF%vhGcD|6TA)R19$0)Wll9jo7jNQ_u_bx!u!MI6b(FX z&(3|mjI|IsH3nmU7C=t*VEotO@$N1<4B+wNP>auxqaM^HsPY}G-b~xb*s$vCXs61v z`q&LS`qC9>Y7Dk@w9`2eZDD$QW24x~2(W@oM^ms1CF+2GcXPL=T~owX_4Rgkwg=lg z8~-5K;&0X-bW!zUd*%1li$L-I#)F>TM^CY%de6!EhVDJPk%Hf|ld`dV52m!=UpG-c z%RXyIXLfN?v7Y1>+}Rvx?QBZZ1#0zsnwScyx4^W!A2{g2E4ix5s!HER$3{nwgv0h= z2ft7^;eHSni1RZe2S%{x8|+BClh&CXvSFR*O4S%TI0&QCsw%suQOKip?qJ%v6Mif0 zNq_V9wl3P!Pa2SFDzK8-4mS_&@tm%%=JbP3>=t`KOC9c~%}RH&blka^K~0W2Lha z2klhz;Q`ubSF?kGRhC<=osc0LMD;xDoI2))eF+q$)ny3QVRxV<2#CotD764mv@MLu z>=rvpxqs~7!O-ZSo&FKq-``BMrk}8M(Rs}9`M&iH%AxJZgnd(9?(TKcLnY?#4d|p_ z;ypd6lk9KkrJp=_aw<1Gy*gEQyYb0*&gW)4PtG3yblq2+1^={I@R7OTmkkB|b9D6i z!{rC&oOw70{@VA}>E^y?e7gjvOXqR26la;~4_+R86W5WiJdE?GU*HY+`-+9WPkA3L zz!6ir>%;pZUC=YX{j1N86|(ct?8ayQPTx%zsWYaMSq#kod2cpG>K1$I|EoWA7W;l) z17&8%Obor}9%kKjQqwa!X+2KecQv=OliqZjj&19t=JyO|)=O3?7-WZZM@EAO4+ozO z1;Zow7s% zKW46zzSZ&H*R99Vb9Q(#`OlZr@^n(qwI9B_9y11m$%pAoJiX|wx=#8Cv#rNNBZJZ- z>^487tdkD?*WhQ?<6ye)(AdNnPF=Dy>(qnJuIr@FSGVrSV`O!Ef*wDegJh@JC5is+ z^9H<${PY(Q^(nmD(i@cTf4Ii|^xNOWjVtLl|1W)C%reeDX1RB-TX`^^$YXS7{D@%1 zAU*RPyy=G}FjHv*`Z(~}h4Ku_?d@vGkFcI8m_eIF*4wjG`rr1R?OKc&$+^W)L+^~n=}T*ur`7lNJ{lTPU#DbPUD+-#79Vd0M$Cm9x zsbnb;AJK>nSs+0-NbJ}^DMErZs@fHFNxOnY1uT%dMEuY3yz#dY?v?KM-FM!d`#zfQ z|M=~nkcego1ULY`Nf8Hw#=&Rl??H|lyQ{SC15?PuYMyP^a+|dlh;1*OIJ?yHeQQD) z#Oqi#AlrTpwQZL&7qWJ~=&a@P?rO=lkEfT?tqdP0tU?EatubXq+atEUlFO{G+0|O5 z5eD9w^y!n$l$5fzl|Iyqr#F*f_;`guj!x>xux+PYF63<2DH5)y zJ|AyZVf2f~Q^P6U-+N81o=|vS-A{dGC)QZFrwThV|X^oAh3r@x0|az1P2kltCWgU?B1tROCwrst^O3&ei>LOgaIH zJO?EYH*8wrAvH`u#FkauzXEl? z99Wm5kec?VH4LdSAFL&%dH)b8mR8maxoWZqyUk&8K$pShfi!N5776d)eM=7l3@04$ jF~}Zx@1JPTK}^`4d^TWjiFTC4XRhj4fn=jZ2YbY+|PZTre3r z%}`4qN?O`cQj(IMbjJ#ikOqPpOpnuH54K3N&N^lDo{1%NzYh3Q-_a7K|a*uyxU*L(Mk^OrQ`29^TpDWc{ zjFT_lx*bTh$yY|@9lw8YXyEV@{zGF2*=>y1Ep4oMe;{^r=4h}s<gnJkA#7%{2Z6Mn{7~{{4Y{G+YLKV!Vz2%JE2R=#ljGJ}T2E#w+>P&^H>` zlRn<=wDCIcalElFk7iwOSRQY$yxxO%9gpjCaAf~bw)K7}k5?j(H-DG$h%ek=?q^Ss zu-4+8JIHa!gimTW;O)ViUW8BdrkD8S-|qcyqo=V|vr>{iyboiCpKw>{FE8jPt%l3f z`s-Hx#U=gOxW4eY33GeM;trYI_!~I(+)a1MOX1Sv+VWfBveSvYe;khVSR;M?5$|nX zPkv;FEmH98d-}@_`m59W^Gk+vQT>}9{n-Y?Wvk(>xW?VLXl{*JdSj-eF|%7^IXYwV zhRp4Oob~72rPGPhS#9~vMBWoebURxk-K`iaMn)3Ix5NL`&!TwyYEwZhjDCxF? zFkRogwcxb|{dp)!RF7HCTZsv;^y{y9^plVR7~Vc(#;P$(yUTJE3WM=uCTFmv_;{lD zbhs?;uDGKuJ`Mhd%g(uNad*jw?&4@-Q`B92N(&L>{ql^i7n8O|JeYSPvTHoz#qtdf z1K5s9uYloptaqk;IA+>KYH9J#SlSoYw8l)BbXO70&UoH9dDC4Co}bc|UkjJrNtB)m z7vs)cqAb{dW(U^NKzi#RkMtbT?U*;##3jRZyZ*Ar@J54y*41x_P3y0E^o8~(EW2Zt z!GH;~5U+|%e2Sxs=8hTD-k51PU~vxqr0klzd^TYN1LKL(Q*Ikqsyo`US&)%{y%gU} zoX9WH?V5=A;}P##T_2SEHiS+b-Z?+se04#8)owW7pugU)e+{-{H=KondGzO?m*$bW~#*grzfv^c9&P5n2?6+7+nW5)ZA; z@xj2Y5pR?UB44+2tOnRMHrx6|74bBWR^h%!3604p}_ z3czO9v}@Lo2?T(9#6V}$5DXJi8>NK<3SM0@aMZmLFLE5)J{swn()9?4-Z>ueKp9|J z?>#h`D2=+y89F6xWD};IPUIC7>|HcJ>9VxM7+eEg7fsBr%~&$b0a9yX=>2@&Wvl-C zc0DYJ*gmQU+P}si=#o(%kGs4%9K)e|JyBhsO$WgV%{N|hl}u{OV59&>C{L0esLgD_ zZkNdyFdYq$_i0}=5zfb;dyT)%!^mfWhYArF+UwvlpcPRp4T!Dlsf+kq30HB`XOH+` z1z2+Tx?5o^xfseh#b!L|E}!G}qAd|fE-)-;e=&Z%mRFX!E)Sy)P{@JK!OBTi)(fjnfkz3EN#XN z)oO+E)6$OhtT;Pvm=fZ<4u2OV=z}G`HbKw?U{`3h=1LA0NPuRG_-(pfiJ!Ly*-Fl&NqgAy8bJB~!Q^@1&S!zHLLa~U!OEg1;v$#KV4 z?~R#{1}wCY#WhVa3!slQfs7jFu*T`RQ8r2DLk<+ybPGe$uu2Lr0;a)5Gva|KXUr{t7g!2pQDoi$m*mrw!O)=Rr^02|$kJ^Wn6tQ}Ej^bg zL11%XT-SR$vg3%(dx63aV%l0Ftl(XV$e3KxzwXh$5w{LM^4JxR;pMqh%#(Dx+PyJL zQ_Q?OWO@>2vbYA~hRKkii$K6k*{!fGsqN$z0XyN+bF#hXZ9Nm|Gb#wU5ZPfQ7~g^5 z*M6>ePjDgclk+o z={0TfE$%Yhr6&n*OM$nb=Uf<=b86=W-L_PW7x4}2df~Yy;IpisOj|fyMjVj|fo;H@ zHuYf4G!V0RU9cdFbNF5L+TFzq3Am4PSS1i(0&a!4Q5Kv!t)5-C14i8&@%Kl3UpI1 zoVfX-S{%TcXp_mhtqt^BmMV$NNDTwAIbIXwO1!l^{*q`Q=yW}*z{ zLm@<>BzR$b+mXnQVP+sO>S)9>74ff)^bAK}@4z(OjAOBgT=@mI6e7tzzMfL8Ma|k z6#)~gWdg8X1l|*XEL|TkEeg3ucFp$}Peu)hjtCwh@i~1QzORAS8?zE1zZDPlaj-oK z0^l{ct6E%xYq;-&ft(xis>9{*_;W}-VDd2*a58}ELui563_u!<6nv*qUhgZ25AB4; z(6Q?adUzj-$&uZ}Lw_NrQ=v`2%d$IU*&S%i`KiogwrXbV!ISIdHFEJG%;wmL8BaCI zGxB8u@C(m8BUp+sX3;ztGC6G~nTODvTkg`}M2S4$5#2UVWZUhC4@`sCvje*jAVqh| zOk2{wJfS}eKLW4KvNL$G%its!Xf+hR^bL6;pakM2zAk6OS}~Wl^my0?e~ldQG(6^# zL!Ol36(j}k;{{3prf`AShAR?6j%Z}ZeE*0{12_q7u~V}7Ll)&C`lYxfUi6=3P9idN zncD;87U4rdi92A~EBvOjX`M_DK{uRE0@wxIhA8tcp5<~RpSWXOwo*oAp7#updGT9S4hCu=I`{Z!hFkTMS>P|cv^ zOIr#CzI(sCz(sSDi{dxRVPmFFUb&#D=LG?i@T73%NTA@pwdL<8PP{E1nt~UiA5a5kQw&zE>5rOcE(7f~yLb9i{ z6$^i+={J+t8n6oRO6xd#EffoyQU+4 za0W;ijr75&5#`P|e59TfGfp932$z95s2K)N9uZT6?J(rM0SmGuq*mA z5^>SFcz`|3v^XwZ4UqqM*t%j1*b&+|D zvL`C>Id^>^gsY0mlvBqwRf1g;9}|;Ie=kQyttqOsz-#k(SjqWKu((--SPOGP;($;J zp&6EDAwFhycKoxT5Oqt6B~T$@%tw@kdg01AeFWfum3E|78ZHGqBAkGQe)YwcGSlWNw8Zh_Vl0 z(fsJ|nu-_59dA0NEjf;<$)Qk*HSR8<3pZkh6F>e8s+tG`j>3z%D9m;?d`?VF0OirY z!6t#mEm1{WG{bKqouOQ&$czmm#6R#C$VpM0ffAlbO^O^7H7*XnAb|Fp z7`K_{3bYGicqG;u*#>Ukj%tOsph=uba0CqP}nO8}? zKPcGFV-+N5XJHXwg@?A5EW1BpsxL5O*T) zSLHHK2uo?_4$%wX6l_6ZnKlJdCHls^HL?_7P&RU!{a+Ppyv!^1 zFe`++Q3G~-z(zPGXj9j%l5k3J1d2J1(7QXrSGMJ0x$W9PM!F<0` zn$i#6k$stvrVUOFco>ZK?U8+%Ov#MAm)(n0sOb&iK9VAX{nq=sJ(S=H&<~p| zyGA418c?j(p=9?3?6%C?XVRudMus>?TY~)PXNePk=1db4^k;X-tipuHt_U*ZSlXzc z#1?!!785k^FlK^PWxMwYcgFYEifI!Tupcgc$z2?r3JT8ih@Izs$mmHcXs^fI8)!US zC~JjXKQ(T+js#&&|1tm&$vA8uTKdS(gqCib)&c*wVZF@zQ1m`O@qy3;o-e@5@NFh| zpZ9rwN-0%&@WPmy7`Sh$EkF}ktO7@HRZU5Q{wso0dNL}FyjSVk|h0Jkc3Q%@q~&V$nRJs zP2rbka{fxT8crB!unKrYJp_n7$F_lF=v0T)iL+ z9+P2dDs1rmLO=~y7+##g`2jc)X3??rYT9@`3LAE}<(gqZ=a&W8GA+`UPq<4^I{0^w z$?6(_{jNdm?Lp~a(c<*|QavN_#7GBGU@wSRUX9PV)iWYgwnsb&Tp=LHi(JO|`7{nt z;DjuggEEmnkg*iDdG76uEGbc{tE`)Q00_J;A-oa*5kqL6?$5zYuOxl3O zr%|k68=xrlBj$$&B3!wHs)D=pB!y&omE##)LS%(p6@H4r3k;362EaX51cT4&o?rN} znr{gE0k$_2rSUM;8>tW!E=|%OoPS>s&k-P_m|(`1YZKz4zzyf8R1D0h8{?eP<+vV4 z7YeKYpyDb^h+utZBJiy&8=}1f1S$pIzf>_fTzZY|K!Gy|PTu{j;HSC2h&HWIP6;CS z#6_TS?a#zA74aS|ElP1L)dUb^rbU4HpOXg^n=e!#CEYmoyjWV&3yT6T;5~*oCmwoF z$buJO(+j-Dj|4pkv_5nLW}&MTU0;M87sRv4%KTlk!uip>KMEtF-93QYlHm=MP2h3( zhKn-4Mj8Uj=O`JB(Lp24Z^evu3d`j%MwHw|4LSZtK{K{rS@18H#8E?2qjX+rQ_|9( z44H5QWd~GZ1_#m@5;8Z@ygp4mu159ea4L!Ie35tl{_0}3Ee;34+ywUQP}%FTi|lxQ z`b?V)dMLSID=O-?mF1tg?;$$6gVgC-!iItyG#S|nY;ewPI|mVmk+85$x(TUw9FP@3 zHD+5hLfhHO-R*gg4kwFnh9|Xc&4F43_Jwv09Y-O!2#}8PIPEBo${+(`$RCWEaHQ{X z{CN(a>kEu)bMr#3{;1UzUY#RJ;~Jb(enw*(J9v2i7p;!!y84=`Vyi6}d}`mO)IDca zu_SG{Z+7y9p3TOehhEUQ0|zCy&+{Oi7Tkn)&WkTUb!5fC3oD+oeeKJm$CB4i<*zs* zosf>_<4|HR`aL+;ET8SorHOHElw1<7#SunhE-qbo)A8wCjp!_HQUhKM$y1AR2f8_K zm86s7`S~Ghex6l(*JF(jXb{VT5buNa%lTa@l!nqKw6T;BU=9KHXa z7FK=9vtqwQ|4CB*+n>03TcbrzZR;)ook`OshPrf7Yf3a4EQc@0KzXliCH zW|R+cp3ToKTK(Yz(xwXB3QN)|P&kkmEo$+!di2jNJ}@=YPO_^Q#{G@AXEux;dp>{P zfx*MUp+gn?b6;Q-ZS)FPTN_)RwKjZpt2}t8{E4={DeA9^Ujx zXVvKw+QmNUJSu%B$*#s*Cx55&sB|C4N2PUm)A@G#WHi5TJ?nmK8x0`^Vv_VDd=9L% zHa1?ne{f@NzI9{6xTMkMTK5dTUGU6o^gg4NZZzdu9~|$H9D~}{{DyaCPpjnPVn_8Yxyq6W;aX=)W~o_(%} z=+Wd6J-ArCan`!w#_Y*8RpY1FwNCl zY-GA0UTL)D>L1t2gXIUG)JV_Z9X8SZW@Zy;Tl*><5FgP?_`HJ8-{5n=_kgXM zjLOPwitMQ5U=b4<6Z;u;`+{b4DV{FofKS@>y z@G%Yu1m9r`TB`4-+AM)r*!I5=iq&wyk-MwO(-}nnc&e z1Hs_Xo-wG24MZL~)QKOx6fgJuo@|{}gSyyI)~61P3>Ie)5nzo<8rt?B8pZ4Z;*o+h ze^YaJ+m=q3yQ4XE8$a8E7WiD9K7V6Nx6j+wxusjRdSaEov$@Cb@pgHdy}q{QZk57p zffjmwzBbj(E^kw_*Wc0R^L0S1olR}7P8!rvrPN>E$=$Xk)uXz~p;UU_!_(wR^{A?@ zR4Tc9r?1)1yZOQ2tn(&W;LoHe)z;y0R;$)PH&mPet6E)Nzq`$c+e+BzX7{(cdf6Il zQ)BwOTfII%Z>VzAI+aar;5ct_R94qk*Ht;ImCYSpom=GgdWWN`s@7qza@N&U*Vfk5 z*=su#bFiLw>8|YGqo6kH)S!$A>?14E<#z4awJQl0Ng2Q9Iy#oyGm9fp%GdEVC2(d8ngP-?gQ z80N~}z$BKYM|?EalANy6sgSiiXiDT+2k>-ZR@h(G#_rZG=%m{9$ioVbXXrPli+cH& zY2!+@%u;)Z3CpyhsnSsgdQ|JgT~a^aP7iTBcmOazvVTm$2)RScR^K2xtPc(9a&Z*OU7Ba_Xhqso~N?>{v1#QvedG!le5g9iLq6*L@BPNQf&%vujG(|Uxp z9$BW9=vyD`&DN?|fy&I})%UbD`C9R`Q>m0m2`|f7(IY!&6|GrVD_XNo4YJVw@gk)T z*(a8tMq`$F<;JYja-~%)cNuB2e%VDSnzG1IG-VN@Xv!i%)|8c6*^D^3YK`Yj;;Aa7 z6Xf=l^&zmvpY<6;0)9C(yIcFmcqwuvl};hlBx?^1Bhpt`D|M@KWOP$6P*{v0N2Ry| zv+vO^Wkp)XF6GuN@)XlR5weI>(V9i7qBSf1Ege3DUkLiQQtS`2tgNWES32tJs+~jS z)io^&W>MExSGVY{P6OCD5FP9~?UAPD#mBxbN4FmRPJd=jm^ z3Em)MRN{6O80M%$1X(*&T2+@$I59Ojx~;sbHWd&1{cHks;1k%&D;+i2nn@^?wKdiC z)m1e^EdUQ|%9j6_BAauTI>g+yzVIW%umO4P0tn`o9r zYR_N{$CS#7+WJ~&JxH&vb2_RjYe=@41EtB@UOxltkc>Uxzk zN_LQC@2sj*LP4^bSzBFwZB-?BSyfXD@z*+Q?2e&Q8Zcu)X$FoyDk<|}m1=-_v-cn< zsd73hVKLQJm6gE4sv5h~K{%W?v52FyB0BQcEzR9NzpDc|_10z<=Bum^uD4FOJUM{e zGaNXm05{e{#5=>o=`m~_#X>CH86Hj#Ve9;g@J~Ht${(k@Va$id(-@isnT`g~|x zcEXfrS37h}hbA#y`fR+3&6YkHnwFgmIk_?h$etRYlbAM`ks9zu9!3V!r$(b<>JFtE zl7CMMG#TooiIjK3BxemUMbXID#M{yF}sJGu`E_JgN?6FoCx+ zCIo{SY~Zbo4Z+NG7p+Q0WSAw|`9iZ7bCg!h7tMQTt8JzA^>?7woih7naa{D1$xs;- zrWDNrl`_Fz(IXq=ldwb=8UK=U$^t>6t1Nz%;VXv{>YN3r#1QDR99${7Fu7&{Ekze5 z+%)J)t_oHyFDo5qlwJ68((PB-Bgx=&cf$$c$dbjyrI?bUB+1}(l2n~JP3d@}>XPJg z25Km&S_UbVT{xFBj7Qmpb6Eu?N&Wy&WG1VzP1!-#m~16!WjDd+>}=+smg(D5TuG6V zdjkde;K1pSEU{%nmnnQ>o(?=*N<(?wLQ7F4GL1cOh zWfvw1waXx$wcHzQ^KM+fDl<@tR^GLTQxmN&SEG|(8r*&Jfc8_?Cj+P9ja%v35{w#o;>cRN0 z#^deXbQr+p#-SFE7e_s)i(lb8SiPCHk+EUb)!9LnXXUXQcJ!s&*VO25>+GO&BI?5Q zmc~Z0li_0pna(DEH%in#@Al>$t`1cZThZ6u)z#te=xY2ke~Y(Seb7bGi|v))Q!fI= zdm9gWdJjFtj_O?}=Nr0r?M4cI*G}@r@?DtHac|v3`7Haaon6_*NyU1Sn}2JwueGZw zO&6%u>uO>uq}&2i?|$H*2e0HRDys^8+kasH{=>nb)!)f4lufwrhXvyN%*fsm?D_gT z)9$2oW`}H8C%RHJ2KEiYsMM;;?x_^=NS)K4cJ74VN_*1Vyrr$1_Vkkmq?ih-!9itX(@vZ)!>KqW zL4KSr+R^6i;#J(Ou10@Xx4%~$t7H3h-ez_jjKDLTlJA~)vp>>^-r(DG58M_RfMF+U(*FW^H04S-m*yd zzstW%e>GjOgD&~ucB2@RpF014>r66O`G4&-P-e7E{uC$hm)K7?z{e(>EMAEN$=%Hz z$$w#mC=J#}`S4w8d?`Ma4NA~;o9r8aF@CQfmFU3C~lSqg7HPSmBf1_K8!~LnQ zq931zAM)2or{4H^%Sue=_a8>kM#qK5q&4zL^s@nt^g`!<&s~Y160pOT?56?LOf1nz zJy+j-XC?j_YrjAFu$fx%;yU%oo^R#n;`*sT@bC~j%dC|?g7&P$ZxKcY*+0Nfo#~+R zrJC;)(O4e*?qz7}Q0IX$_PYgYrt{Jo>EM6ye`X~J@Eklab^r%E**~96&GhpHjdbJR`doEudp=v^4wv8Mr9+tfT)cz>;tlA6pG{=EXn_yo-Ime9Z+vN) z`{}p8jT@KKZ_3~2mbrhgx8Kjwab3#Lq|J)!EM1egwxeHo9i3%AELg#Qa`K~|38tp5f`5|7|3dgog8ZN!C?CRRV{ts^*Ow?8@Im|q%T?* z{xsuFcXQ}VuBJS93qA>sH5C|r8!n{0t@xwMm0G)9tu!7R6Q4rF{9c~)C%pbo_aTF( zHgks$v{;GF``!7k8SLXH4Gs8C%cs>4UDbF$h&Nue3c5v$+lV(_kKnD9 ptoWMmBUevIPND(i;{D0T*^Z&iZ{{lyi1$zJh literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/modules/Foo.pcm b/test/tools/dsymutil/Inputs/modules/Foo.pcm new file mode 100644 index 0000000000000000000000000000000000000000..4a39a06c24dd94b16cba5c21d94c484df03db7f4 GIT binary patch literal 26060 zcmd^o4Rll2o#&Hm86#mw5J^DIBr>6?{D~~v!Y-t>2bR3V2d>CoWpG0EbaQ7?R4xmX}jH%soQS5Y?s;D z-~ZlsrS~K`FmqhGW&w6_IzI*@o|MB~Oy!*%x{`BGhUM5MJED3*bWaF5nKUN(0 zti{nyznA!Yua`G0%6-Idj49EP$D1X;f4KkHu>1pGe!s7=+n4;EdM3tGTpv-K&a z*yYsm%zKo*B}vD-DKBOmFR*v-;r^#+d^LSyysyh6-0yhAJGv!xyh}2Dm*nwYzEAoN zL&*I{(#Pxe%LB<@&3)?M_?5EWz|c^z-+wT0fQCz>PmH(WpE(|B4c(Hu-lt{y#CXAf z4t+y`eW~M(rHmK3&+&%8FqCnvYgZeDh!T|F_;#->g|7Nxxc-u|rR| zEA?0A^%GXZl_~uVtNzlW{(MYd@cfv$b=2Y>HM#LOaN_w}?&6n2B`39Iw?n08<2k<= z2zOb-J-uP?9bH#$c&9C#_uPB>D|Py7Q~C>whVh90O^^P3o#Ber@K#LY?pZK5M=jk^ z)A6X;t+5=RHhD+Qt%0od{dMIiDSB5&Ed{w45-_AM~4#{cI9vNZ$|Xr zis;`kf;L0UuD{5~j};d0aap_zChxS_J!+Q6Tv+WLH96awiYK+Dx3y(=Lq+4-lC$xR z*F#0;;-zP4qLB3)v)8qyv+m;Sp^{tijiIOZ<88!vr7j_HafKt^)joY(8Z-dvs6rrUKVyvr8$M8eza!v15r?y<1% zn6B$wIPV3G0c?yxG??h70ECWA(35;Sw|jicx_%E^B5);4X;M@mpuBb zi-uRG^yvS3EU+A$>5WXRMA%h8uSM6_4SKIs>h%*ASi3 zU-Re->`z$sL@j**6J{Y^6`J@I#}~|P)297V(?G!D?E69Kb$8iJ+y(~5;w5L?Hm+26 zwWTv4BMy5hx)nc_TddnX7WT)&-Zi=&DES=-ojAN}Zlv+ry#AWqaG_3rqgVeLY{zam z4+Zn+$D!n~ped|6`h2g((!8*mGZ|#G1}rKv>|ZOJb=#oEAmcWejK$6rLvWo?Et{@u zP1uVip9_0ot%88=W8tpeaNhHchHHz4Ym#9ceD>&XfPfKVHJDKFP|(r}Yh_Gsf_=Fx ztyp*<>+65#hFFX5y2~a>rjVu$i-k1D@iSBc4nXZ8Xo%H5+e`8y3vGvCM$NsWW^Vwt zZ#fRB1z;&8O<0f+--(Imm`dCY!Bn9(wllCAchMx7?Wr%luIsjjJ!6D9k2i;RHS54b z?_Awc&Zrw6L#$38g&bicz4}*)s|IlIw@?zIKWgb^Xa?>%gMV+E*OuQ6*-jF>Hr~?W zuWcMy;Vzm{*a9ZiuV^j=6HLc7mOfZIV@OY-2@;`2VW`~!yZ##HT{K*k3>WPB*BDI6 zBv}m?U?#EARarh5xHasJFhS(%cCFF*?&x}Q`4X&O87-QMZ=4F1!d@oaHt6k?wro6J zG6_*eX69kVrriP9?CMs{YBGTUaE}=1Z0Lt!LTW>_a6rMg77ZMAuf_@;Cw2^lyC!vA z0-|?~ggsCOSk`-MC*ma$cNs&cgpF*()Kl@CyuAGj<|kd2rYM7JpzDH(*|ixSHmNH;ZF9bgwI->#^w|IHCE*%dX-HZ7GZt zzzF3@&;zxZ$=l;H`2wcn0rEbr3ns$(D0Hv>_c<8(Jn&E<;sSdOTn4lvf~5hmbzL=K ze>34KF8b_YAFKdN?p${}ge4b2IVahSr`=_<++MUr!He&Yz=;r~^cI@>0+!ZM6L1Xr z4~?bkfh^}T4!oBm`tQL~B*U9V{kLXG?g~JTuK3lMWp~s9e1O4n+u3Rgpd1Xc^H zcNdy^z*sD8+6>idhVs+WPIN6hKVp~^;=2KV7a{0_CB8OB&;(#tXtm~Q78Xc=W()gm zy4@qRMj&U`9dcnn(cQCQZ?|#7gBN!l3-`dYpxXdzf@^~k8g)C5g|~NuEq23Ys4jCE zG6XFe24N(iAk2Had8s@Ot>A6`tLFPja6xMVbPL^2^xzvy? zICXm002H3hbtmkd(sk#OUnB2^pF=tjw!7PCdvHTHWQQeyBwH8@=~7ZypfSW24GFm? zLW|dW$XDfg?o$& z0xpJk8VSaCA~?CDOTb9p5AB9)R{iBg{kX>fzkg#yf5C`z{hP7T^(h1lESP&W=2p-U zg>K*6am!tH+Ff#8TXdVd40p+C!rKzyE$A5!0dvmmx~SWpjPb(00bMsd*BE@3_0Lik z4wn%_WI|vYaHmY&7d7pTTD&e;ki|Lhu6ph6qWL)7M;WXV2rve>Lfj|~PM%fIuG`fu237Rl%f+j#KoPB?*CWc&g@f>;Z zqPyDSvu+!xnu(vf^^#f~z?mss&qWrf)QR|Q&Y1Iqj~K1&thRjCU3OAiq8K?e_0s&A zc*%sjgq&u)6y`%AM7%h7ab)|k@Xi5dATa7k*fSaSuL*YzgkkT%G~Kjgp^#kpP7x_3 z&AGkK@Rme&0ihr!{Emm>nSyVOS`KRn+`xez4aMO@f%^Jccli`87BLK>n)my7!Vy1JVN5L`WSp)9j!NN zB|v^FHrm6%_BaTD*Wj*dVKvU-z7GeoZpy0;mBHiBBK3gDM_Ith0ICO}1!6M*X(XKY z^?G@|uOdFQ6Bc;%1mafX2u>oy zY4Cb>U>5?U$S#>_i~3i_^ylG6;I&zH1}}C6oCE`{hJu&BEKdZKK)l4)<*ZvH=F*m& z4B6nXkprHE$6R#ClQO)5q~LwLKncJUP7vF0MMB6C3GbZi9h7MRC!sBNN;bdCqFhA3 z6t~0*|BK8?M20SNYk=G$d?+Y!2Q2%A-*h&tlj$MohSP}yyMWseWzMDZT#n=ucaG?G z<}&BVnS6&_51dinH@6B7d-Rtj!zCW~+DWO-3vWxI)FMEB^iMKLqo!7v&$Jnwf#W+q zZFWaN;mx%pB6|W>z?3K+CR^SJ3_)xII3+NNm!j#6yBOv_9x4a-SX_6iY(?h513=5w z6n-EMaar7OvZZ^4QOA&klJ^?sob;R^4Aadb=L*@73zO8=oN&kt z3GV{JBVK}}mSz{sA1btCZtPrQK#H15f*1eeh^#GevM{d02y#Fz(%xx`)_MY2Cvs$o zgTsq@L`~;o(H23r74Q}I%ur&I_tlTdCJD$56(fHE(<1Rxt@(-Y2~LFTBql;ICWpRN zN9vF+=}{Bl6T$br8e51WE(&k&6JwHRW|b zoH8S%%mWEjGbs7emVklpJ|Hh}!Q9}Y_>FSdsHq(o0;TGGCS__x4FA@m;WY-W1rEwt zXvyI&qi)9<#MR8QpfJo6!O|&~n~Q~^`EmF!7EqB@+ODIpL;7FL(^67?w+;DJi!4ec zu$(G_N*I?5$snjYpF$O52y7G4MUpBD)J7IWGqPHpch2~NAYcNX6s{Zz6x_GA?EUzu zx5bRiBRDxOsBHMipRUL^uNOV?Tu+Aq2=ZJx{op|mmmU)h7jYUnjutrK7HzI|c zh@WaCaA0XN#BHY1itmz`h_nb8%RDkdwMp0llEhf_NjV!L=gttDR|$NGkP)Aw_Voiw zSV(D6c|)8)NJ1HL@FFr!+Ad{l_zqSq0l!OM^r#@AmJ(nMMc5+0Wl-&9rn0?!Q4H~vZSbG zKeT$mL=Jb9Jiha3q{G}WQQQDtp=^bK;Q~q=f|wf|(B}1E_=TA3O~D5QcthbGx#3Pk zAiN?pC;gp}>=|wOe0(EH4Tzd%U|b_Zf}|$R>LDIxYbfe=`rZ-*$SD=KVyodAoH!!Y z*ywv=#x=U#Q(-?i10;-udtlUva_8zkR8NW-ClN4&O2Hh|41=eSiK)SM81nvr1z8eO zyrw|be-fPU)uV2SxM(~U{hgT3u0w?ri4XZl1?uMAmP=tU9C(deq{C%rVL|Y@IEkOC zke11?24l>n6iMxE!$|Fcs6K)_-DlzWSlu%3bH;ob(14Rh{Z+dGoWH!tcNHEd~al(Y$?nhRarLAzJc(C zgqhH4C^?-MnujTSq7t8T_Xk3_s;Epkbxcz!*fsVMG0D_7vt-noq)H3CHjjsuoZkqG zn?Z;*KPw~-2&E93VQB{9qh@E@uLXsuTT(263JGIAqAb))9~ZQ7-7}JV^1>Lo1e5iO z&B7!RoC{+F_@J%`<@aR$xhxlvR^unCd?@$~3E!s{7P)dnViX9m#9!ZhOItPp#{q{6 zw2bF`V#EGXOAiZgkhg;Wy~yzc^^dH=DlP*dtrY45Cs7$FIQ!f=UC#go4_!!;QAo^h z5u-8WkC#xfC7yFZjEQnb0Me}ACq@H{di7DHu5hd`eoc&qm~A^!-8m`xHo^c3rBVZ! zI`3Y~K-7$yws+d(v=>(xCT3yOhO0vc!1(zQeZh+_$fJcHuM7Kn!```(m>4aB(51OY zjL-&G7&!4WLDCR%d4%+VzjzkWEOILpkx(o4p)|3Ea*)nr00*qJExFQA3E&ao1T6I9 zkILM0nE;P&Dy=&NZNR_@kKxUEeZkq6#7en2=e2DUqXE{a8VOhOeKAzjydPi=jjG$U zT|Y^#kn#=yDOiV?87BqnUgpIgQCLH^=(UIQcC;hqL^<&;&+OuqR#S-sU_t?WNia72 zifKq=>BFOeQS%;VT!i6pAEV@pf#lBJo5iGXl+huY5jGAld?`Y;Vq{yFVxvEMmZ~~b z)g=66ZiOI-vJYU<+|VByiss23Z#<(dK8dNxp-_o6<}RiaH)4iU-~R-vng|1q!;85n z%y!m&T1-s<<tM^%;f~OKV55|5h z7S4+Rc)Eegyh`G|LBVz&s~|x;4~qaRJoISkG6Vq;{G0Cz5kTBNmKEKNhm1fDD!z6-7M zP`(dsguNGhV^ZjU*D+?W;Ktn4jGzSdoV$=t9MQrr#Pab>vl+^UywYWAg`V4n#i}{b z6`cE?Sm5@$FowmmxxBA_Rxp;*2gGA2oyFogzb+S2ws=@|6r~jeLU3D5tT&$*t4KUo zE&umoa6Fyqf-XUV^!ShBJ(L|-I34+epkE}pr7Vdnd0A~Zf4fnhUBuE23+5rlPktRt zhO&{kbQMhtl312)1jLDLF8IV)(BqkW*?8ab9=wx%>~W4K8+0hvf7 zRumG$rRKC%r4WXCI5^XUk_b4D>JyL$74?tFA{KQC7#$%n3Og(XBgBho9{GxFed;8L z1D#y@o|}3hg&0)|KtUsLfJ-nq*?wMzKJk>n91O^HDnZ-La4Kz3euPheL0($}sReKT zNCtC}9N7tiD46e6N>lpIJF+hm(zL;;0S|+bo_(?}lPQ^&pJjI=6>4}xxQ~R$p#Q8a z@A~3bWDg~H0`$W}mfb_)?R6+t>rk@$ES|Q^*{4&cMn;A>M_Y{i=qK@0pK+#$3Hq~V z)U3jUEms8@ax87sPv8-JEE*Lw@GxeKRb@L52zSPxuNBclSipX$v~Ch&X#UWPwrg7@ZR)%`(D6(tlDcWsoxdFE*~F>L^6U@A=z;u>RnioGc{=N_Wvk(Yfd;F9N7O@r*t6_0kPO|u;jIZbO-a`2c*%7t zRx1LA*|*gc;vrWz2!qFDSegtQd_NaZ!xe@XXRv<&PJ~%>biJB3UXQ{9yE}5tFt7a! z0&JNUY0Jjk#V8&8hb^+Y24KId5qo=3I#{qceZN%CNIWspMikf!B9?E(rrqip5h~lm z9t5rs5adNJW8^{#2Pkkt7R+)1ICSRXgLAjlOxVwq7J)<}`8|$YxX?ThsK5G|?Gj1RTZD|Nj{Y20pjMoC@dr;oN z!=^$L3mqlz_hwC_99KX8pz2R5uCjy()^{!p-^#KfdUk+7CC~epDkg_YuCpgl;0%J3 zcRwZgY3?nghgK-31d)5Ctu*)C^0l;e2dUHbxD5q2Xfm=D*x;<& zHVzSokg%|abYoKY2p}trYRvXXm>y>(30wLe!=IP%y*|&lCOc=;)f=(8LaVYQX+(p4%Fk+S!-tO^{G8QM zRa0ACS!A^ZgHIjUn7rn!ERv*0mN!LwpWSWz+31U!rbCA$x6kt+?H1gKBkQGCo;tSd z@Wo|M*}n9Jp%aPor*fAalTOV_fy^$e?#-qkAL^%vMa|S^T}M= zuh@;_!P!Q6-)}Y%g>$3il5j4zHyX2X>cT-=tFtwtv$#kN_|zqKG|C<5;)qp}PLJf~ zj#_hbtlE2Sd0g@8V*YQi;&Ioh2afeFyV1LBcs6H2dSG!X|AX0__l^0#8+zcs&gXyL zv+STm|4CBr+aJ4ibIaNK@8ta6{=f&;WtC53Ru?EvQn}`@)-S`J=lbj^)9c3T8fkI5 zU@pEnR{z*UeeJEA1rc}6Y<OXntVF{(T@Ygf{wws;u=*&sppI?R@Zg`J37bp=&?KzR(l7D++sTnq`mRS`H5SrhV-6 z4b5ZUBzyWzl3j&EC;z6ss&pM&OW8qOq~9xXtiwTl=^zE%`@Z$Od&~BlG>0Cm+vpA) zE+5)=VB?Se=^0#-{z?9bbNZ%FASy}k;(PB3YkmFo2l_T-=UO+^jYt}8wsl|M+j-B< zM3x(^bfF>J`rt^LrSC%)rRH>`mS~K%|xEFy0kdSHXYC=Mp`FEiWENL3S}?HWH3^6 z+xu6|ngl8N2~uc$5Vs2B<3z&DvqSRC5o_VdX?f-|&#mmN($}48V++e2vJ$m4>&Thh zbLQJYPukXmW@ioUGa~V}0c2%<0vYBWKvJ2y0(8 z6IuDW{nA=CXyJ)Aqhs%fE39lOjy|l(eepF-ALg^VPMwkVo}3wX2j5%yKnJsrDJj=A z^5fQ6XhaviY_2I;y!3{7Ps?)c-eTUzS$-8?z)eJhXc1 z;ng`C8hV{EjWiW((^|hd*fTUcech}b$q#>_1QvB(|0pdow8Ci1);|Uovv&cEwFfoQ zZX978wnG{zWTN&)GG8)nf)Hs8!dkr-1LONFzHi~%yiX(j6yMeZ8tK>geg*gM^{up3 zkzvyeG3PxvcZGdm0PlTZ z@E~icNgHPnxE=Fd}Q#@LA0%}EUziITkq|?*DD6u+P<$8-5Qwzbsz zy^U?I?#2eyLgc3Uj`q$S+Zt8#$W5;L?K@h$jVc0@12(!k8&xb#-bM4NsFa&J8@K!G zJAKw_#R5ED*Vb(=+|kh3)Y9JQ$9q)mt`vIY#Syow${pnu*7`#SAfUm$!G3E!iQW1y z5kiN%wce4!;dV^#_wH!-wQOrtHMO`qjt&R=hoO6c;o-xBdyfwH`~5cC;NC;*dQnkg zO`>c4pjLpe{C)^{GRHeMMX0OlB8!8=}oXVyaaGbX|DylYB)l@pGl+A4&?OWycT8E>ua+AYe>8z=) z+O(;<#=fadF$e2;kM7F;T?%S5P7TV4FizY(Chu(NX_R|b)KpbDH&s>HH&t%3*H%|o zI4i1mC@0>b8u*^wJw9(kOHyl{{~2YU^;3 zQYf`sz72C_ZeS8i(IY+@Ye`I3;Z(?49yBTPj01Q&Fe~gYV`FD?2Xs>HdgNgR$J6wi z(?z}fOSEyNT4JeP#Dpc<&{W~50X?d9;wq`1ZeILI0YGc?xwJ9jiSwUEhX(oyBiM-LtueBxk#UkV9Aok0U$vIPx?l+!3$kFwUI zOSB$it;d#VCHmF}yEC;aR-iI7dG%c_4ZddF?NlmdLc&WjR`kfsSw(9G){54QQ-dtD zf3!%cL*|L)yV00oUb!*jv|MQw%Uwd6tY2nPilz*56ipd~D4H@zkTqqbRyHF}u3F=H zlenu==>)lbWxNTj@n^gTk$_(g&F%EGx>Z>=lmM znkr|1SygqDf?3qH+0`k!tJ45B4nzmLPPwI_vA$)StIglQT5D5U8$F#Z@EUC5idu)g z)=_KkFWZ!9-j;R*<1M~j)Hh`mA`5vzqtD;Y1G<#D(r~yJs7xB z89s?t-UM%uHY#zu5)5XXmEqGO8Hfl*H+n`)y}Hw8bqX3&gv@MqFOZFvZNf@cuZ0i%8ZCl zqZDL?GAN|xq$C~X4u>5vm%XaiQEjiRsI2HOQ+ZKA!4eV`%}Ys?+7%M5R_4&WO({{k zQf#7G7O6drF&tAW$~V<+a@KR}4Za-q3KE%Wwl?al5_4raBwSI8vV28NjRSEyHiT7I!enc!E0l2qyK_lt^KrSj zm28sdQ_5SF|FHI?z$?n_HB}C*ZxfyrR98dxl>iY4y*48$ONc!|>QZuN9Tb9B0kPbu zkbG6G${8g($g+1<)+nJM(afx^rgl?h1$bFmy$Ryq8OCkR8>|~01GRt?M?^baLU9Yj?Rea$XmBIcKZCTHssXX8d;dHvOc)p z8sYNf0CL|z;IIPRSPv2J3=E{kur(A5v2bT#AT@-o@hieV^^i$_oa%-#9~u)Arq0LP z_=KtRp>4?tlb&7e&@mmF#B`~%@g_D~>SSnIax&!P$`~MXYJg5++F)90z#DlO8BCoT zjf$z;lxj%+Jt5FUsFNa6-U*Yl&|_dAEhk_?Ws%3gKw3(`8d=~mFp!oJutpYl3=E_t z1WV}yr$>(9s+F#Wtwx4nhtkQ=ElC|pEHqV_da|M z#T!+Z1eeoLLrK*VNU7|?xtwM^$}XJCDkw?t2Y4bgS&ePV4zk8%DoHE52|j0LGY7SF z-=5@3l9U9O)ikNPa-uThoT{VX^b(BbbY%u<&P3jW@p%b0^B#=NnYj!wQbs81WhHo+ zfUHa)q(FM*4T9-uAq}S;L!t){Y*cYgl0_-B-IMnTDl-GCf=<;OLJa9XLP;t26lsn^ z*@X!r-CHQTFiEIg2Jx)v{$QJTUu@h@F^f zsmM}ZGN@e=uM=Z1zGi~!L=VQ+3}{Uqk`S4SjDqv%!FieqtK}gQY)!+EWQV1YT1`cQ zt*PWNrIh<}dNSg4az4)6bo5B}SPG?+JveXE(IeS|^EM4Vs6!GL({V#?=L^Xg(h2r+ zjABnlGBTI9FM-c$G7`L1)05yVhT;T$1Ukzj3RK99CyauoNoGJ(LdPUK~$Qcz>9jq=ConnYquGu@oXF$6)Nw z0LaN6jQ?sp-qA^$0bFiuYVmln)q}eD6~2Sjo9Qt!9$0m>w^8L;x$TB+ed+Wy)caf7 z+i0JNx-h-9zFs`Z@UenSdxO6dC2F5{N8?Uco2rPd=OCjt4|MO@jTHQzo#Y40_h3rf{dE)Nv&^%$cVreP73)cE z{%wuE=8lFGU7%L4tAVMI@)4N&=?6A?@Jg}*Wk=)^9u8`RX{ddjSHB}2z;+sJ~I4~JYH5B5gD5b<2fZdV#F zZ&CKX*M_1$?+<(~b*mKnC#|2@XQQ?v#nvc4{j=1xJ3zq=NH4IzA3{9bKODf8!9HbU z!!GPF!>%|bL4NEm+S%gm;8omhu6ln*r@vclt7FgWyp3!-9BE^Py#gEU6!YN*dd{w7 z2Lr1Nmzp~uLpF%we%3j;%?-~bP?T1dAy|hUz9v5)Cc~iA0!UG}FebB0Y%AryLkA87 z4)$5;A3grmdZIOThoyryWLAy0uIQyL!FAYkI&58&o3kTZ`X9x{)?tnGRUFo*G}2gH zEB$==snMMIvBgooN&c}3*C!`WJY4g%J^wee`5zkd|Di6wcPbxS^gozi*0T9}R_*Pq z&1Y|JnRIQIZd$YJq=|2@%9dU)e*fFp(P}(x%$2M<$@l`^Hdy)4jNaIA85{RYBK4B= z!^g6<(s;wexsq`m?JpO5ie1>3D$YYsuf*P0`5z%ipI1!ud(^!a|6n_oAxRxL{wPTRlJt-vPaWLMFTz43xR^kRZWI`hWQnpS`ezyBzLIod`v zEUiWhedtvOjns+1D?o0iR6rN;p{>mt>BaW{nY{w9MX=48Y&2@4{ngmce(l|NR)7+} zKQSnFYqJm6!4IKXE3naeu#f#u1*nBKF>9qaU)+o_bkg8cfy3-QiNinje_{oO_Z&Vn zd=hP1^!ifVWlJWCw#;g3|>x#CUIRAUa?|;=(rBTTPuT@-YQJjC&yicHCXdUf+ zKT1l5w_cpX?4E*cHKGr6mL zr0|zO`A#4`TFN>%yN#9CS4+h(B9#Xg-zw8GK2a zPwS{XTjMh3Y{SVBy5=U+(^5Ba-!l4xNy)VVwl{E z@f$y@a9D8QQ-MP(mEf;N^S|=*@K-}O;m}Bl_tS78p`~;V6~Rq3X!=cb5l22PNPPVA M{{_X*|2HWA2i3Y!YXATM literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/odr-anon-namespace/1.o b/test/tools/dsymutil/Inputs/odr-anon-namespace/1.o new file mode 100644 index 0000000000000000000000000000000000000000..20d4b7fd84ed29a70148b93034b2c440accf94ce GIT binary patch literal 2084 zcmb7E%}*0i5TCdGptev8c))1P9@La5Z39F;G!X)7HAW4fG#Y*B7f@_>Yrg;w>PaCn zMsJ?|6HL5%@va^{dDEB}jVEJ_iZi?O+Om{bCz&^I=FR+e-pssx^YhD}4glc-T#((! zZ)wqyk@g}dNZ)}zYWBf5_E9=w0&y3|Nm z%GM~~t@$auiyYoAUd~uBDa48QnsJs`zU2JgVH5c3E%PfSjm2zvJKixGh9)|!3fiza z@RG%1J}Zr6f#9~~r|=RS-!7h+TDBVEyze8Hwy1V_OX8- z%1$~)zJ%ONbYH%6v$?JP<@m;j>#x#z9DBW%EtPYop$|nyBQZT(DOHV|`rz>J;8?6* z59jl>LbPEUiS>h-DGekI(-^R(9J`f`q>Dv}RtiN}FintGhfvfY4PkOyE11=?Uia;X zDG=S_P_WnI3WmJ@H;CSG8;3Qt$H4K{HR?qh06CM+E0^rfW z6$}Jy1u}SNyYN1%AZ{4pDyTk?sz#4n)G&xE^LRXqETY1tJz15sjorW%+~SC5g;a4A zAU-=8rwqC?m*3^f`)Ay27=RM|&&WW*_o% z`k)CqZ~c0RX+jd2e6ArAIb-+OX6xlp-Y)Rq-;rhv;L{5YhEZCVMoCquR}A+yaV=HC z$_xrw+@1lk8WN$-4&P(+qB-A>y#AYNw7IEz0h7VdeQl~s>F3Z#g8^KH*mQT{D(D-? yVyCggV}M=t6kQSF5q@3MFcz#iK;ezrAl}0)53oc*;NEeTxQE@eD3WTJ0saCMl-;fX literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/odr-anon-namespace/2.o b/test/tools/dsymutil/Inputs/odr-anon-namespace/2.o new file mode 100644 index 0000000000000000000000000000000000000000..df8e567bc3a58bcf26b9d45eade8b54b3b72c317 GIT binary patch literal 2084 zcmb7E&1(};5TCdCXiVCeekc`1_MjyNo0c}%4+UH5M-)+Ms||`i(j;wDliirFwjR`z zwjiQ6&;AL5S1;bxqbF~QAc`juQRB?+yd>SG7zgIfn|U+8oi{UY-~9aYrwu^302gF8 z@>^PTWTbt_ang67kD7h(jeV4km_XdYaZ;AjYb91tNI7>ZK3CV(&dVFZ+wxgA+*3Xz zrA(z4%1g3XDp+PIPtH%xoUXYjoOQN*-vVYCIdj`V;bqOFWpLu%WgI0;kJ{B_Q@q5& zLLptDe7EMO@Gf$At9Th>(WDS3-fPBLX8DrydxuTnt2fQBkT90g#qD@UX&4%)PZhLo zbKoWN`D|Jmi5$Ug%TM9OIlfgqb7945i1WUWIKM-Tcjtfd%O{dfyyuK}objHuh*w(A zH@)vy#yi7!@BfLHG;^3&NtTV(jFFP^{7DYA#28mwXJZ)qN^kZDO8%6iAL(VeSio-B z0nYapXYWbIi!mOn5CBl4WSMI3BV$;I&iXxNybBDlfp`}An&(G-3z0*>Z@E!;FS3vQ zdr`L2G4dtkMxy)jog2+9?JvhRKU{y6%;MPVm2{z)F%5krG8P%q!=*ymxTz0Eqr>Av z{dzc?t>gylw$Y(}FjIwrgkc&3)|6wn(vf674}+y#9u`d#n?(47bsT z{G2{$g7#a#9v(rNKqjAS$VATA9@}ia9Ln1T9{fAfi~)Rlp+OX-b!n7Tg<8dMZxh#2 zC9KS#kj3p85Njb3YH#yBMlYK41IQb{sYcbR>IF;%L-)1m4yB(%9}Na@6{70yz*W#U zki|}8hsOZBYAL#Ugh%*wO~Y8Q<^Y9v&<64Lu{_8U1%Z1fSmGXb)1pYKVFvgMHRatb literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/odr-member-functions/1.o b/test/tools/dsymutil/Inputs/odr-member-functions/1.o new file mode 100644 index 0000000000000000000000000000000000000000..f957a9b9d4231eb0601998a84ceb8a6e6e769706 GIT binary patch literal 2236 zcmb7FOKTHR6h3z*=_AvoY2!jsWELuQVH&ky)nZ$$QlW^L))qmAB%Rb~G6|DuS_?wK zN9{svSGx2Uh$!xb{s8?6Zd{4bm57M(JCl3UWZGgqaGv*^Z_YjU-sID-?|<5fgia9P z5co3-6AU^8o@IXzTH!*_mdsH;Fp0UF7eSWg*-KvCuKQD0W~a7&Wwl9z@mf9`q7{|H z+Op>C%)-1?@mxP_S>rQfH?KBQ6wZc>{AdC5gIXgQhQce9vVJ&-_p&(|_G&l~Alel# zoyoX%mGN8qQ+Ue~X1{oOCs*QYkzLqnYSxD@irx%groOr zpenFqzv*(hU|UYQ$Z$LQQ+Ttoe!qC7%su}=g8MxzcXLeQ8GPAVDX`zB{>tfW5N{-a zH?U7U??HLj{bnUzPWF`gCtkKxL|>k@;4J2yIm?nT_Mt)?f|zTVl)7I5v&s_jPOeggM6Wm=jfeGUtnF~LSM*_fT3qN+!?<8v z#0>O=SRIY(QTB$rMlhLBn6AYKVT9QrYU}Y;`!Uj^mka|n9SNZJiP1=-lIuDLNsH8h zxQyO_>vy5UpJ|BB$tgCEUA=y78jC`7>NC*%FhxX&&=MP@&#RcBV&R7fk-iA=2rRnP z0{TrL(9S;<;gGy52*PC&w^kv2{65jCZ7jXuV4O>cCQ@S)+$wj?%-4j8V*(I05c_MD zXU4bw^t~b26JQ?>`Z1V^KfyK1SGfKd5Z?{=e{xiR2Y|(-gSkJQ!uV_Bi%ov7KR;-c zTzO`U!cT$y5kvEC`Gi0`CT@$z$uDOo$;qkS8wu*;V)-VCrI?6ywRfx|i&2h%w|?`u zYBe=^8jHrBYL~l}KZzWT1|Wr~g}NaHe*-M`ehx^$MXTQ`SbDDri-@UdsD2I~#cfZd YJ|Xo{siFM!VX66JJuDPi>d_GW1sA*cNdN!< literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/odr-member-functions/2.o b/test/tools/dsymutil/Inputs/odr-member-functions/2.o new file mode 100644 index 0000000000000000000000000000000000000000..c696866fcad6673668009c72209d0f1e96f117cd GIT binary patch literal 2660 zcmb7FO=w(I6h3!e-XxhfGo3byifx@mS`ug`lQi_lmNub{PD>FLPqgm zjbZQ#<*esw#{1bMYWd?f@zR-0H8&%&72ci4#LJf^%AC@ScZ-Jy8qjhf=-c`*Y`Wi1HQt0S=)?cT%a)6%tL{#huH;MOu6you z!(JrD%K(adBLuHEsr;dw%BpH_6w(&K7WU z;xoBwEnhCh49*GGpUT%H4>#w`NKMpE zwJ+LXThUnX+2~HS3!4daC@bnX&pXG^O)H_U4@Rvht>fX7VZ5xioaq>VVp9r|9%y|{C zQ5(@dOqL|8gv3&8J|6qS6rjelKjXV3Z~=-^?bb!eEcK95Ewfa_nlwJg`0P2j#JYlE z9<0J9fUPFrXsuzeTiOmW_%4R_>lkj+@)O$5QvYN_B=k0Yjt9+e)FV@%dWA4vW&2b( zaP!hFm});4i(JF-1emnP7oxhNftypDBax3V9MJw#TB>fuXITJb{d3=&8XZcpS-f~; zwA&;4P`Ipu=wM$h@KSBjdH=@khaZD~>q=i{XQ#mI+aj3dGVe?9ui%h(5+ZCURb#X1 zw3$O<*|a&2Wa`q>H?8Y&a#%mvG>tWKf0o$Tx8Hm2b>&V#l2;dy1L>nPMXyE7s_Lx~QL`}>3%9nthb&It4gTz3uD!Zyj$VeMv4y~?i2nV0 z9S%q@GL>3wiC|P*1FW`w3y^@X(7zDStWITHA{poj+z? zrZG?m1Z=X9G$10l(;|XD3#nTvw1{+*F1m0lSP-qdE`oybJNIXj%p^1(IQRVC@1A?^ zyLV>e@4xL6}_#m9FfGv8jn| z+i1^gh4FaVd{os1W69*ORCt+EDjyUXHVAg(`J4obD?yhvo;JyC!kf;f=VyazSdl3~ zFnV$1{97iSdbM_2%Yxo-J%PyDp`#v!moH_cyAki#oypK&h%`9%!Aqsnm28doySqMv z_nn40C|<5OQ{pL&cx(JIfS%EEL)RDLJoM+V=YA`x;%v6sjCV_C4=7mrjXw_1$NS=? z%H@1ED5eSw*Hqs_jdxR`5FKvQ%C=~zPvMo)SLA^-?su9m0H|9Dx^+N2SzkGoX~bJ; z!29`tc;N@-J@@;K#+%XgJ$@=)rc^*(VK863mMcyN!MRaARf;3}5QnvMs-1|JxQ+AB zjce#%b}pEy;55{P#``v<@#Zw%ry5V2Xbk*-it|$>3`*%mrhb z-~?ACa`8vN4|V<%NPb&T2fYX0Nu0ZDvpa)N(I=CukCMxe6UmRRZ~pi)l7Ht3$>n>A zOLtk5@zv{_tMgAT-Oc22JN#O+-L(6}jbYFB*cytx5ySJ~(Alm*C=M$`k$wmpKLo8E(r)V}+dJdBnB3k6_(Nhi zT8ChO=pb29g~;<=lQiA+Hq4kKqiTt`GI()LrA|Co>L}af!_`73`4m7+hdv3X5bgFw z$PDXIAzD6Rh3r}5bB)jW3@);-VOR!>mGim;wYZW^aZ{UEJWpaF zAri6HwwuV}MHqACAMXCvmN`=z@e&_eZ^q5v2+kV?FX=qJ4mIZSmu^u%Mj^IZ;ur*TuSpWb4 literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/odr-uniquing/1.o b/test/tools/dsymutil/Inputs/odr-uniquing/1.o new file mode 100644 index 0000000000000000000000000000000000000000..c520930300555c8166c93b145416c707e7ae9a6c GIT binary patch literal 2544 zcmb7FU1(fY5T0}QH@nHY%}P?Kl*P7o^N>wwmo}+rv!uqvwy892U5awE+1w->ckkWo zPfXMYL9G@Pst-j3!G|K)f(S(rU)2YTpbz@!n}Q-jLD5ow;y34>vp+Wp#DO_;X3ora z&Y78W@7H&K{WnA;e1rk)2Y%tBl`yK8^RTg4rov(Oz&k#EbVZ7Y`(q98?Lo*4518 zmglSH?M17$8Sjw}Z=y68vy(bJOfz4plq}QEml`SCA1_la}!0ui%t%uDaQGcqdg*4tPvoZF9c;Sc|hmG{U2NJi8&kRsKIfc0&uCUaONesl4sj z>t&}=OUlQBqFV!$g>jr3BzB1du|&WZiwF0_c8Olx(?|$kEE+u)eF(|$C8(wNN3r7C z14f%Y*xwcr>cPrXg55@t>u?%d^op^_t79=Y_&D<1l_4YIs#6J~NFPllG<0-SxO>n) zNU_^xVVQqF9aTx6Y)iiW^ctsO6r#^}M#_HjU*O3D0e)jb?4#gCIT;MGfxAkijeVq+ z(;*Ijo~=W$`dqLfYQ7`Ajkquir>EHQanWVGxyF4fPX3G$T7~~P;?xmWTf&!^#OVoF zn_XvZ`cGHOz0KMgJ}gDj7?xQ-78PTGw|FM9M3XbwNxnHJftfp$iEV;{v;kuMa_zO~ zFRA?U2wwmo}+rv!uqvwy892U5awE+1w->ckkWo zPfXMYL9G@Pst-j3!G|K)f(S(rU)2YTpbz@!n}Q-jLD5ow;y34>vp+Wp#DO_;X3ora z&Y78W@7H&K{WnA;e1rk)2Y%tBl`yK8^RTg4rov(Oz&k#EbVZ7Y`(q98?Lo*4518 zmglSH?M17$8Sjw}Z=y68vy(bJOfz4plq}QEml`SCA1_la}!0ui%t%uDaQGcqdg*4tPvoZF9c;Sc|hmG{U2NJi8&kRsKIfc0&uCUaONesl4sj z>t&}=OUlQBqFV!$g>jr3BzB1du|&WZiwF0_c8Olx(?|$kEE+u)eF(|$C8(wNN3r7C z14f%Y*xwcr>cPrXg55@t>u?%d^op^_t79=Y_&D<1l_4YIs#6J~NFPllG<0-SxO>n) zNU_^xVVQqF9aTx6Y)iiW^ctsO6r#^}M#_HjU*O3D0e)jb?4#gCIT;MGfxAkijeVq+ z(;*Ijo~=W$`dqLfYQ7`Ajkquir>EHQanWVGxyF4fPX3G$T7~~P;?xmWTf&!^#OVoF zn_XvZ`cGHOz0KMgJ}gDj7?xQ-78PTGw|FM9M3XbwNxnHJftfp$iEV;{v;kuMa_zO~ zFRA?U2{_!)oDxh&Uhw7Y-cYA0TnqaVM7s@wkO z(|=#1L?T2Oz$*A(9yBm$1bl_dh*J2%MP^7 zQenJ~ZB<$dVptkRId?5@*d=q;vYnZ-VVufNXIl{gPE5MgaBEI#$##!n6s%lr)~I@J z!wtO4*$clnL$*n+E@l5}Jf#^7CBQ3|^9^@5-g0{|^jkgRnYo;6%`tvwegWQJ61P`8 zt{pGw#@o%!0D9vK+7j`VB@|R1bj|a+rZa0*SL2C}Xrlimo`h%75n&i+rBbvE$1E}2 zs{8`HBNCujJnkA^(tX~ea`%+P+t1D0N$clVG4tJcW*6T19`U?|%9`iBDe-1xPPHH6 z`TacOqJUV_o!^SY%SgPpz~ifY2j5qI`mXc(ctzV`xK((XduKMwSMnt)nYKeVf=qWd&%+$$ubM82L9sJ26b>blTec?NFUAMOCfpEZ zy9gzg>WM$<34EUI)H2i<{Tlx*K0HJe*0Fd~%cB9 zigZEqp*bxTw@J(=`=hZt9M;^Db*3PNv(OcEFwn&3#72yQWCgo8ndGi4t0x}vmh`s}LT=A>` literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/Inputs/submodules/Parent.pcm b/test/tools/dsymutil/Inputs/submodules/Parent.pcm new file mode 100644 index 0000000000000000000000000000000000000000..e6909d7afa5e3c2801623df405f418a7ff572900 GIT binary patch literal 25260 zcmchA4Rll2o#&H`j3e2OLL>n-ZDm4J`4d^TWxFA*CE3PyjV)w>jYA^~Y+|PZTre5h zW~n6*L#A|ANl8mO=^iUU3JporV0xSmd$2{Cb?4Bnn`L%BAahJpv4 z2p*Q#3v|+Rx+QbHzmVzckk|Y2J<@jwN*+F( zz23VS{NB3jdcm==&~V^Da6c`VMW0x2lT6>eu18iww`B5LAp3<_@AkijzOmrm%=IcW z*4wa0CN%w)VK1ED@z0OtKidA$$I^n~uR9?69}%>C>R zB7PlSr*WSK?=Rw=#tnFR@uDBPPA~cq@BG`n|LgI3+BJoe^vnCOcH{|nt^V~z{gl=4 z`i%a%ReyO|e=eafd2Z6&8Me5?CO7^K9)IqpyZpsS#R+ZItw`mWWd6@bqJ7rrz+lvO zTh~_*-D!&!J^POS^=AFm8U2N2!}*y04X^%Ov*C5C;mw4`J+NeMk6ZfVrekrlTVpvk zYx0H7ox!|~=iL=E$%;8`)y-u76GwHs+M~VgSWvg~whkX?*#~~3e!@q2%ST$yaLXo;5!nw2YLR+m_ZouAvW)&zkoI^IFfk z%MMPLsY`R^yqq}TTZ!EgKIT|>s>z|DJkLvo)MvI=; z7$C+3RAbj)up6!{>)%*5ye=80Vx+Pw3G2keEteMc39n&l+3<2qf7z?QvTS&1MvwWg zB!c%rGJ|o;$kN)bQZs~aSv7ZJlWAYra?EAwUoy7^%}%?f>Rl}qnwC#!aFx#%-9AW^ z^x8p~ZeYP$bg5Z?0Y(zjW0wn762i-a`YT@j6tn<=cg~u@HFoKAS&qSAuzuX+47HV= zNS2+6R3_ZjceG_^Apc0^dABX$F8`&wESB6HbC;dgLIwH1IIHW&rtMKL_MMFGo{0Lu zzQJLD*sI;b7^gR+=NYcm(uP`=ANmW?lQ>yw6^M6r1DO( z;%uZ0SLTzIp~15|!AmpQZD1nWcT~4?!C0S=4A<@Y*S&_}K+0LAh@CWHgY7`qt?NVC1)SFh4hH7cpNEmd zgJ!^W{JBAmrG05F7c$7`3|drV*uPpe@3z5=LB=fz8O+X=Lvg(@Et{@yUDOAb&qjUl zRzX1j(P-abwCK53!_{TORmpH3a`x)4gMbMVHH1*|k&vYm-pYjB2LEzdI>C4_@9Tf( zhFZ(-xT~f~r_iPi%tD*z@ikHb3Bc^3XsFdbKS=r_5AB9y#?6Cavo8qWw;Y4kg76g5 zCOk-}@A%ZS%p~qa;Hoei+i7@>yKI`=_T-2!H!u>SKW-UhXa?yyLw|2u)K=e#*iMkRHs93ZU)yA()P7EJN^gCXj(r6Es3hVX56gyZ$QnT{c{i3>WPBOAMxDlB|XcaFaxM zO`abPZjJh4Oc4dTUF&rI+q!`Q&VuzT;j)?J=9x$({AJ2*gWb+(tIj7YrlHE%+#T7-2jqdSEtl zMUT5o{-EhtkfKlLl8JCW4%_qmE*~qO10E_=Tw-rT$bePEz#0%+*Vh;gv=gr4qTe3% z!wbN2?}l3uuv`Y?oMteT}Ql&5%j?mFHI6O0oWB*t+|p11_{t?(SS|2dxCfba`xS( z5C#<8Js8P8Tgs%T~bmvi>?;@oiB(!xzSkVt8B4cV<|B6@tTEaT=@RloH!%Oq&oF^4_wfo|h zmbm%xu<1#-$1nfjA&ddIuzwK;vz^EYLVsxjGV0LYZn$dIzq+hH?=>LqU!TxlFygcRjYN231_2{W=0T0Q6EwtO+c$RHbXT2n zS6tJU-Qpp`U2%%=wgPwydd^3HIj47B)NN1acv1g|t{;(W5;4pA;S9zRG7`v42yBDy zjID>_raf_s&jk;%I7fb<=G|Skm_+!ff>#0oCJ|Oh8kM2xGwR)SJK@y*(ZFET4^aR- z=b}Y_<507YAihY+2>?(wl|*n)1~F~0gjo|T0aoE0`k|T_3fbig6v4~xXv@#IZJ=r{ zdGh89YIOi-W^@A=S)$S`^0x(J{*OLjwW>4P>Unq732lX9<;ctni>H$nQ|<~1n#oGI z52X;v^3cVJ?MI_KN0@`asbf*^bTqIo+BXt~zXQ{BvyP=w3gtURrj&N)?qUdm@mz7e(@(h#^o0s|V#!^eUi&%C>OhL}YTgKW8qB!5;vFh`kKYC+1BAbf_d zMPGBti(eLL(se5ZIG_^{;~cW4%g5!7hGvnZnV$sELG=tYki8(FIK2gO!ilgx6|rGc z6#qr$M{ya(#xO|)?oD84_kXj%&1CYj|MPK*G{JxC*&`xL! z8@s-!NA#hb9Mw%C`~|U{3T*~mmdC@E$Ag}{AIm~!Tr+16o!Th#$km55n`0w(Jl!Ji z$k_zomz;e@h!knel6fd>a@tI?3}HF9+!dk8a(Tg{y6xWR_S;cEgodbR2X-MritUnx zwyb|?QhyF{1W}t+XNY32Ly{1n)ll-{m*tIs63CZ0U(V)rVlQpQiHHsH8a3bHm-=iOkSt z?hH~`L<|Kb?x1C#h?~xq4Kh6h-3U5KU>9&3s?5K9j_Z+P;?4=(&H@%3xsY#D=s_?l z`sP+4VXyvG$#9wHy>>FHb0XSODzyxd5C1_XDQxP5`^=gl83ev#vu1Z36y8`rA*v@} z1zd^pVe;k8z!2m%fKvjKWF>}9yUXGJ=OfjS9?R=aRu$%6JP5R0OX&yl5SPV`AX~Xd zICTO=C`F&}hw{?mT^HTK?I#sm7o=weVc2dSHCM!jT9~x9?zlr1NOTtv9{Cb9wX(S2 z{xG2(3zKJ43sTlp5xV$aCuD0ukcD#{LXrb&QTEPKwl)yVJDx9V91>pEBWpUBh<6CG zt$?qnca92^qCfwT?2>@oNIB{k2rZU8*;$-QpAba2O=2S?V{+~4^yaP44tEQ6qZx{1537~>Rb9?Z{_-ZVIn(I1* z?4`Wu9L&H@MJRlvWWyFBo3$n`qN>vI=0MT*kH{-CQ#?)CMcKTz9N{%I`A-ujERGCN zPH17Q03vwcjta6#eg?%q4mX-uBDZfsTHx>1NG-F3-nI{tHW>E2^ zt$+Z3aKFsplDWl2`5V=+aZ@)i1V%OZrHri=IsBW;hD!`uOB__Q5X;d%qi)AKfPkYx5SPtA~?O@5?gQ{ zQSDwrS4W}<5}((XZ19W4Ibxt}Cz<=pWtk&eV_DNp@JMCjW|WXq$&;-F4y;Uuy3N#D z@n04jkre@BSwu#vHVt1uk(h`-DOW=j+!@DrZ!Ef_Ali!zgm;7%q`woIJ*}-? zOm0T20a?==oNHoCkkqDGJI2#&4Q1U<|C@pUxuoJ=Y&Bd(5J#q(2)`qCT&LST6AeHz zK*Csb08Wi8ccJ-x^`_Wy8VN(B62d{xFm&pu*cxJoBkv1ZP$i+nYYXQ6HzD~!J^Gf& zi_RzFzZKirb?9)S@S*sqK;5ESaw`mugQ#(nY`E$SJP0utACf0)q}4L4!5IsvL{dlF zC`x-Es-NIa_Zb8}*0(JBtg%=IG~lFBf5mQqj`yt8&-O({o9Dwy$!~_o%^}5FoEI7ggi;F4 zurvqpakI1QSAs(HEh(2khlB|qSr+=Gj|tki?HMUN`QVIPgXw<7Bf=$-oC{|J_@J){ ze4 zJW~4L7oI^ji`oiJB=m~?Xico68l?9qzyX|gr8$jM03MM}z(YUwuq-{73GnEq*1AK` z1`M3?8s1pcmz;S)aLV1esB4>84X{StNQ9E_i>2b`eE@S!jE~Wd@Nq=p%Q5N|quRQh2>grHelkk&;6@nnDK7d6F zW4~`HTcmKj`LwqD1h%GtLM_&WyPQ6_ku#k9{>RbPL>h1mQOreYwzK(DVrv2@ul_Z* zDeSPsmXP_44vdjK??z?<$B9wVK_%Ps{W9Z8B)!#Vk`-skgwWe5gM;H4Q0U^@f^oSd z1kRvc6STCUTGUuZg2&w!YG?q>kP*!gk7Zw$c|=J*LiV|9!TUn1%-1P006;`F^=uhO z@FoV1)b!@d7m|nlmuMlj%)SW1`2!;THtZ^Cl*3 z7P*3*f*78OwMVx@+P9Vc@qFnH_(|^X?!pw z#LjaS6lmw*5fFu!9xc5JML-4r>AOM&P`4j$f~XDcFL>V+G68U<9v7nMAR~mjlli}{ zl4U|zN>A>Py#P)j7BrUWp+LGt@5x^)YazzUG^9+B$+F?QusSc*`>;m%d&xJZh5h#( zWey8zEX>RaO3=@_1MMV{E&N=tk7t_gFgDbcE>kD$+%_(_=0I0+_IrZC?afgvi)V91 zfBqRESSlZokD+yzNap{lT4>qgW!+J9db(k)BoF(yxb9YTh&QP3RxlHl=m-m!;U zA>EA`@WcmVL|}rYm3(4e&?aa5K)-czF2Q_F>DtC1bIwj0q*+F<-hp8$irw+2#6-uP=7%tdizCkUcseo(1R z={s-Bu}o;whM)#K48;ca%CStQWLAEb-H%eJQDik_^3p7ogfgPtQL zvQ^mi(-Vg4C=llLF985ijKlX~r4Ro^Sn2i|9q@lU_+`}KW0Mo zd6(CxR8m!iE{>~-LHMTL0xW^$DhLEu)s!^r-(&>~rE>@kXbU7T&n z5tCtQI&JX(Oh64c7~Y)0`2hqG=F#zuYT9@|3J>gV%RR%Q?#~ObWmcrEnsk?=b?{%d z$mSY^|E@*u?M3Tg$>Q|?LcJrGBG;4jEnzLl7Dt9L}IY>#@8xI#hD7ln+83mFoi z#0ga}s|66yS%?oU+)@i+A2V7c5~Ggyj zM;A)!{-Ba7D~RBIXQPO%tQw+c2S`+keE&fug=y6fpo9fKpwJKZ~%l{YQMD%nI;I?dd4Q&%d z9R9#XSzn_J0p;^l494l8k>+<|M?0nEavCFA?xKgB_!mJl9>22WUv7z`ho(m9{EC*8 zr#~4s;Rwo3n8Yj&q%kC9VWM?ohIw3#>CfX-5_|GR(fQkJ%GhIZ1PB%;@XQXKy*|6B zju&RmcF3TImJ1$5#oV^ag0uInr=vS4ovtNqXt=?WQLVrS=iRpRP;mqW3wua6DfLeP zvZCn5Y>!3hakld5_JUW3lSMeglYVT?fm$T?C3X!RMApaNpZABvlB zr0-Gu^A~vESY%wcDnINRj9FcgH8{mPp}{%jXEe6)Lq`sL*6OHhs;zaFS#6=vQ~Nik zuQ?oLlJwAh?)9%e_LT9bVN0GjcxdX^oPd`6) zJoWjhg4IW*lhTO-984U*ya(THmCyICqRr{^s%E3)!aF-Njel_@5m)eo4=(v4JEv&G z+2hm?zn8u$jxb`EiGouT#)8P8w2>{InxEstgvPLBz5l(%;(r2v=Upf7KRUSj`rzvE z`TQm6{^gnC_vZ88H5UJN?EZgPEdHE#^#O_glca*TK6LZOmNTC3rdN&EG}7(al7-|W3D2Wbo~D~ON@DKDc~9dt%_E89k51hv zxf5x;<@rP+uW35`$ob=sT=XEPn^0@aysvk z6UQHU@kYtbaLHo0WX83a-)vl^F;35}!=_vu=L%MpuKDEy(&lPhiAd5K(6}c*R@&xm z_v)p=-v^R^rD@$iHvakQ5oyzW@>kbSJfG(o84V5Ld)@8u4SX-ye{kqXX!vk7f8QS* zlVupGvwGToY#;plZG5l6ZvMU~ztgl**xcuFKJ<7&Vd;R)rF#h1^6{c~I?qn;ut4?> z(_rtUUZPVcf2T96G!I8ir44w|8TdW#TF<$+Y^PhLU|f>+(zQaX$8+uep-rm_tecuA zB#m~Jb??wyMbFH|?lW5HLdz=agA-knV@TUx(ERq?sSUr4l|INuw5JN1AC_K_hUQkq zj1z_H9c^v??>sE6cVLkMqje@M4QXTdHJ`-DnoaF7%w6vonu|Sab!qWom1(~=wbBN$ zQmphLS7g>ZeG7;Q>@0J{c=Css2RI!TG|EAbI+9$J(_%?2OrjKnzKH1 zWA4=2+KJQbTa1mbnTtK}*?rP_wrJ_`E~8`5`-N7*Q%^&xyJ7 z?$A4?Rj&EJ(VtyUR=0P}M7|U$G}>0_ALRxCXFfZqks>C#)+(+Qyj3nq4YqgnAH|kSYWV7Fhoh0dFMbaYK-eybq_gt$_dmWFTOzQC!+fzeSPr+Wz2^2M=LHxd5NN zfAjzwY0O$@EI7V*BsBcga0oZrvxc70KRSAT1ZiV=Cp)6?C%V_U0gA9={-*}kL0*Qz2gyW{#v?SM8{-v3d^fhk{1;godpiQoHrv zqJ$oIr^k^Y;cjdn@a^dKcWi4_4RyGCj*N$f$6pkH`ipV=$FO^l zWTYT1(9+u5v9;Uf?rKe6#;+W~3jD5af56k$>-TkZZ|zlaPofHRxAq0Rz8-I@&)?D7 zt1_6autJ~T-=VtL<7;X41-d%?{w}DsyQRa`O^Z5emG;XgxjVL|XVld?lt#~Icw4;b z8MSpaN+Wmg^0x-~v;gFrbKjH*0@)O$$GW`EIu#FeK_v-@s@>%axI6r~tb~nTc72*|~hb#?ZJ+6H@5eSM9y zrhbQV;~lDn?>gP<_qBAiwcQ&XSQ_*1Z7$J~o_tq2h{c|^KugaKI8LVK`B+<5kBf{# zY2ET|*emw{(^!Tb@zvN!YP%YzLf7)5X_e<(z}tge;eR;?d)s?plj_hTFDoRTW#61G zniW`Kj2qPotoD%*Rv5!jjiV9tsQAQHGC%(=FG)Of5HLS_U|b;xc|zMZ{}3i@3=Jtm zYNKAeis|YyfUN`7!LKuJX=(LzY;$!5TG(h)#%Qazw*ygwZCumjus1oH z?88+Jx%TboMl#;v-$ipXRw1%b7qt2V-8`Yo7~e)|BqzY3gdL)qtft8S{ZJL{cw^^M3#>zws>pC-qzUEP+J38*48&b{SD4~yJNV57R+KO!@)5}rDeXX(hRU~ z?imCnwN6J3Jf^O;rUqD8TW@zd2!}H^7I}0|L`U7awYAqDaCM=k-qy;}e3kbh^fro+ zCnu15M}mhG;KpW%d}m}NGly-YT!^JRBO{q9Y-2!?{;8Kt$K%W}jQuc}+AwoJKE^l9 z+z(?bZkUej>VS^zFeJ9i+>H;h-7+`B(2AR(CRgSFxl02~65EEdN&`N~)5uWf))-W5 z-K8``itj0frc#{@mGVj0oTVNkBUv>8Q!2|mMnh8@ZvL${=DD8*=|G0D?XtY&e_&55&`#VH@;tY&e_2RW-*oMKR+ zA%*G71aX#}^}rOxAm@pXWp-ld#O?6Gq21ir9atddP&&b*4p*3=O(+3EEyAIE)v&&sR%CqWFmJs+T z6GAAIB?dmq#1P6ZcQL9IM3!4(oHLrcnWOY#&NQE$tGAUgH_(M%ciQb&#BniCCPU>= z*isA&RLTZ>#f)5#Ps$QgWc*9%DF+0Jsj~W2j;~xwm~#%G5=&spN^qr^!t|O0v=mdA zaxuuDO}1~&ZC^drL2OIlzf0EvXIr-rko&KOsqd8Eh+6v`<~5!ul~ zIfZFL9WqE}ZTAM-d>Xf}nk>}jqeARCKsz;IMa)*wk&<^V$X1f#$VTkcRx4$eGRvTL zMZ8X}!Q`3?u2VCZSaYB?O-MszCNc`iV+NOLF07W9NQpHIL(&sgLTWV?DY0gf!;Dg% z%jwC9)9L-VY_riLJ!2)5PS4=7%|?&(3@+O&^q>i8T+GG|d7LwnGo=&a=NQGFjN}w9 zA725V)nue(tEML2?YX7T*;omY(`zvC=K$pN3?_dy z9`ER-!vHQf4z+lFIO;)D0*ctd`pxth84s*_y1S_JtUPwZj=uEzTRee|?k+kfqHauY z^?1aS3_okgbhiY0(W3VIcC_wvb*Y-zin;Edo~}SwkLS~YHeajypo?M_dsco=vj`OL zZ9UlOJq zhx&L)`^+eRL&d^qItdT}-aj)><{cDvGf`G|7%-3}D} z$#C$qnMbAApEZ7BuZ_ma6i1^1^rwmGbbvw{&|YxgFqC*`csz(BgG0)}mR&euhEs7$ zf&w^Qw6nw4!@IcKT%JHrZ=hcst7FgWe68#_99d(Hy#@#E6#L-@dd{vC2Lr1dm)d)v zL$-+Gel|IM%ni>a(3DoTA=rc+{?q~lgZuXf4-8r958r-r zBhi|9!qPzpGH;#lES#i+)W(@#eEWy&FgU(QQe@n^t{{JhPWo}V>96)`q~als)Vxn4 z&Ag$NOjoqh&+a=J&i~oua+n{bess$9iRt4XZG75Z{Ezd+?;DGM-CR64Q;dV$?=7yr zx#iaJ#;F^R#5|8Ca31@3$yA=b(2aBGuavu_#JAnAbz;MCL0FP@NRsJ8tD2kN4_}rv zI)h8PmTX!jHUERhD$RU4h6%GcvPx%P*^dif#OKJ<58%wJ{KtGT`jUyhhu!P(hrlOj zHvPfszoZXz&#uqSeQ%v~{}lf&z4FI3JL#i1kZlyJ@-y-p>H0T+{RPaIr>EwN1Ko=G z^mn$K@uD-X{~kZ9{yjGNH|$Y|d-hZsZBwN{E9(uYZp6ZPFLx4 zu|{frM|Y-BGE0E~I~Y8AAh7>%;Hh9BG>X5JF8&xh9Z=UuZ+HErt`G;;(^JJfp&&ZI zu8~f^_S3dPY!?U|LE=Y8j>aYS`x6+YBk>yPYu*2ARiTtGu>+gz7(6bs-+<6aeOG_* zb|LX$I|~@qJ6!>nkB(-tKPHXegP=*xc(H@j z^hNPQcJL9rIuzHGA3vx3@&TW}im}%}VEq4OXgx+2(Ena&IwbnZPzQn`$94W%@R>m)sc4y^Zc zyabF8oz&KA8!;*gbAjVKkQVyRX&`4 z&xfL=Er-X4=-mBgNoJs$Q>-*u%>$Z`6+fuUD>kkzDSq!uhVQuHG1`x4@`@*(E{;46 z>E=Bs%u?nc(JPl`Ql5S7hmXmud5v* zgA?!{{5lK!;n(t|KjP>A*BqDw`{8Jm51-Y6DFfS+nA9@p&}ja5agn6K4=&-wDW!jl Qm4F|R|1S>#{=a$nzwf{jLI3~& literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/X86/basic-linking-bundle.test b/test/tools/dsymutil/X86/basic-linking-bundle.test new file mode 100644 index 000000000000..c07fa5894f36 --- /dev/null +++ b/test/tools/dsymutil/X86/basic-linking-bundle.test @@ -0,0 +1,38 @@ +RUN: rm -rf %T/basic-linking-bundle +RUN: mkdir -p %T/basic-linking-bundle/dsymdest +RUN: cat %p/../Inputs/basic.macho.x86_64 > %T/basic-linking-bundle/basic.macho.x86_64 + +RUN: llvm-dsymutil -oso-prepend-path=%p/.. %T/basic-linking-bundle/basic.macho.x86_64 + +Check that the object file in the bundle exists and is sane: +RUN: llvm-dwarfdump %T/basic-linking-bundle/basic.macho.x86_64.dSYM/Contents/Resources/DWARF/basic.macho.x86_64 | FileCheck %S/basic-linking-x86.test + +Check that llvm-dwarfdump recognizes the bundle as a dSYM: +RUN: llvm-dwarfdump %T/basic-linking-bundle/basic.macho.x86_64.dSYM | FileCheck %S/basic-linking-x86.test + +RUN: FileCheck %s --input-file %T/basic-linking-bundle/basic.macho.x86_64.dSYM/Contents/Info.plist + +RUN: llvm-dsymutil -oso-prepend-path=%p/.. %T/basic-linking-bundle/basic.macho.x86_64 -o %T/basic-linking-bundle/dsymdest/basic.macho.x86_64.dSYM +RUN: llvm-dwarfdump %T/basic-linking-bundle/dsymdest/basic.macho.x86_64.dSYM/Contents/Resources/DWARF/basic.macho.x86_64 | FileCheck %S/basic-linking-x86.test +RUN: FileCheck %s --input-file %T/basic-linking-bundle/dsymdest/basic.macho.x86_64.dSYM/Contents/Info.plist + +CHECK: +CHECK-NEXT: +CHECK-NEXT: +CHECK-NEXT: +CHECK-NEXT: CFBundleDevelopmentRegion +CHECK-NEXT: English +CHECK-NEXT: CFBundleIdentifier +CHECK-NEXT: com.apple.xcode.dsym.basic.macho.x86_64 +CHECK-NEXT: CFBundleInfoDictionaryVersion +CHECK-NEXT: 6.0 +CHECK-NEXT: CFBundlePackageType +CHECK-NEXT: dSYM +CHECK-NEXT: CFBundleSignature +CHECK-NEXT: ???? +CHECK-NEXT: CFBundleShortVersionString +CHECK-NEXT: 1.0 +CHECK-NEXT: CFBundleVersion +CHECK-NEXT: 1 +CHECK-NEXT: +CHECK-NEXT: diff --git a/test/tools/dsymutil/X86/basic-linking-x86.test b/test/tools/dsymutil/X86/basic-linking-x86.test index 19b4e3bef663..37797a323504 100644 --- a/test/tools/dsymutil/X86/basic-linking-x86.test +++ b/test/tools/dsymutil/X86/basic-linking-x86.test @@ -1,13 +1,12 @@ -REQUIRES: shell RUN: cat %p/../Inputs/basic.macho.x86_64 > %t1 -RUN: llvm-dsymutil -oso-prepend-path=%p/.. %t1 +RUN: llvm-dsymutil -f -oso-prepend-path=%p/.. %t1 RUN: llvm-dwarfdump %t1.dwarf | FileCheck %s -RUN: llvm-dsymutil -o %t2 -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 +RUN: llvm-dsymutil -f -o %t2 -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 RUN: llvm-dwarfdump %t2 | FileCheck %s -RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=BASIC -RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=ARCHIVE -RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | llvm-dsymutil -y -o - - | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=BASIC -RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | llvm-dsymutil -o - -y - | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=ARCHIVE +RUN: llvm-dsymutil -f -o - -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=BASIC +RUN: llvm-dsymutil -f -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=ARCHIVE +RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | llvm-dsymutil -f -y -o - - | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=BASIC +RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | llvm-dsymutil -f -o - -y - | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=ARCHIVE CHECK: file format Mach-O 64-bit x86-64 diff --git a/test/tools/dsymutil/X86/basic-lto-dw4-linking-x86.test b/test/tools/dsymutil/X86/basic-lto-dw4-linking-x86.test index ad3ba5a15b6b..56b78588b758 100644 --- a/test/tools/dsymutil/X86/basic-lto-dw4-linking-x86.test +++ b/test/tools/dsymutil/X86/basic-lto-dw4-linking-x86.test @@ -1,5 +1,4 @@ -REQUIRES: shell -RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-lto-dw4.macho.x86_64 | llvm-dwarfdump - | FileCheck %s +RUN: llvm-dsymutil -f -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-lto-dw4.macho.x86_64 | llvm-dwarfdump - | FileCheck %s CHECK: file format Mach-O 64-bit x86-64 diff --git a/test/tools/dsymutil/X86/basic-lto-linking-x86.test b/test/tools/dsymutil/X86/basic-lto-linking-x86.test index 395234e96166..68103aa44ca9 100644 --- a/test/tools/dsymutil/X86/basic-lto-linking-x86.test +++ b/test/tools/dsymutil/X86/basic-lto-linking-x86.test @@ -1,6 +1,5 @@ -REQUIRES: shell -RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-lto.macho.x86_64 | llvm-dwarfdump - | FileCheck %s -RUN: llvm-dsymutil -oso-prepend-path=%p/.. -dump-debug-map %p/../Inputs/basic-lto.macho.x86_64 | llvm-dsymutil -o - -y - | llvm-dwarfdump - | FileCheck %s +RUN: llvm-dsymutil -f -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-lto.macho.x86_64 | llvm-dwarfdump - | FileCheck %s +RUN: llvm-dsymutil -oso-prepend-path=%p/.. -dump-debug-map %p/../Inputs/basic-lto.macho.x86_64 | llvm-dsymutil -f -o - -y - | llvm-dwarfdump - | FileCheck %s CHECK: file format Mach-O 64-bit x86-64 @@ -117,7 +116,7 @@ CHECK: DW_AT_type [DW_FORM_ref_addr] (0x0000000000000063) CHECK: DW_AT_location [DW_FORM_data4] (0x00000025) CHECK: DW_TAG_lexical_block [14] * CHECK: DW_AT_low_pc [DW_FORM_addr] (0x0000000100000f94) -CHECK DW_AT_high_pc [DW_FORM_addr] (0x0000000100000fa7) +CHECK: DW_AT_high_pc [DW_FORM_addr] (0x0000000100000fa7) CHECK: DW_TAG_inlined_subroutine [15] CHECK: DW_AT_abstract_origin [DW_FORM_ref4] (cu + 0x009a => {0x000001d4} "inc") CHECK: DW_AT_ranges [DW_FORM_data4] (0x00000000 diff --git a/test/tools/dsymutil/X86/custom-line-table.test b/test/tools/dsymutil/X86/custom-line-table.test new file mode 100644 index 000000000000..86fd7e294686 --- /dev/null +++ b/test/tools/dsymutil/X86/custom-line-table.test @@ -0,0 +1,40 @@ +# RUN: llvm-dsymutil -oso-prepend-path %p/../Inputs -y %s -f -o - | llvm-dwarfdump - -debug-dump=line | FileCheck %s + +# This test runs dsymutil on an object file with non-standard (as far +# as llvm is concerned) line table settings. + +--- +triple: 'x86_64-apple-darwin' +objects: + - filename: basic2-custom-linetable.macho.x86_64.o + symbols: + - { sym: _foo, objAddr: 0x0, binAddr: 0x1000, size: 0x12 } + +# CHECK: 0x0000000000001000 19 0 {{.*}} is_stmt +# CHECK: 0x0000000000001012 20 14 {{.*}} is_stmt prologue_end +# CHECK: 0x0000000000001015 20 18 {{.*}} +# CHECK: 0x0000000000001017 20 17 {{.*}} +# CHECK: 0x0000000000001019 20 10 {{.*}} +# CHECK: 0x000000000000101e 20 25 {{.*}} +# CHECK: 0x0000000000001026 20 23 {{.*}} +# CHECK: 0x000000000000102b 20 36 {{.*}} +# CHECK: 0x000000000000103c 20 31 {{.*}} +# CHECK: 0x000000000000103e 20 3 {{.*}} +# CHECK: 0x0000000000001046 20 3 {{.*}} end_sequence + + - { sym: _inc, objAddr: 0x0, binAddr: 0x2000, size: 0x12 } + +# CHECK: 0x0000000000002000 14 0 {{.*}} is_stmt +# CHECK: 0x0000000000002004 15 10 {{.*}} is_stmt prologue_end +# CHECK: 0x0000000000002013 15 3 {{.*}} +# CHECK: 0x0000000000002015 15 3 {{.*}} end_sequence + + - { sym: _unused1, objAddr: 0x0, binAddr: 0x3000, size: 0x12 } + +# CHECK: 0x0000000000003000 10 0 {{.*}} is_stmt +# CHECK: 0x0000000000003004 11 7 {{.*}} is_stmt prologue_end +# CHECK: 0x000000000000300e 11 3 {{.*}} +# CHECK: 0x0000000000003013 12 1 {{.*}} is_stmt +# CHECK: 0x000000000000301c 12 1 {{.*}} is_stmt end_sequence +... + diff --git a/test/tools/dsymutil/X86/dead-stripped.cpp b/test/tools/dsymutil/X86/dead-stripped.cpp new file mode 100644 index 000000000000..ecab7580ec0f --- /dev/null +++ b/test/tools/dsymutil/X86/dead-stripped.cpp @@ -0,0 +1,48 @@ +// RUN: llvm-dsymutil -f -y %p/dummy-debug-map.map -oso-prepend-path %p/../Inputs/dead-stripped -o - | llvm-dwarfdump - -debug-dump=info | FileCheck %s + +// The test was compiled with: +// clang++ -O2 -g -c dead-strip.cpp -o 1.o + +// The goal of the test is to exercise dsymutil's behavior in presence of +// functions/variables that have been dead-stripped by the linker but +// that are still present in the linked debug info (in this case because +// they have been DW_TAG_import'd in another namespace). + +// Everything in the N namespace bellow doesn't have a debug map entry, and +// thus is considered dead (::foo() has a debug map entry, otherwse dsymutil +// would just drop the CU altogether). + +namespace N { +int blah = 42; +// This is actually a dsymutil-classic bug that we reproduced +// CHECK: DW_TAG_variable +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_location + +__attribute__((always_inline)) int foo() { return blah; } +// CHECK: DW_TAG_subprogram +// CHECK-NOT: DW_AT_low_pc +// CHECK-NOT: DW_AT_high_pc +// CHECK: DW_AT_frame_base + +// CHECK: DW_TAG_subprogram + +int bar(unsigned i) { + int val = foo(); + if (i) + return val + bar(i-1); + return foo(); +} +// CHECK: DW_TAG_subprogram +// CHECK-NOT: DW_AT_low_pc +// CHECK-NOT: DW_AT_high_pc +// CHECK: DW_AT_frame_base +// CHECK-NOT: DW_AT_location +// CHECK-NOT: DW_AT_low_pc +// CHECK-NOT: DW_AT_high_pc + +} +// CHECK: TAG_imported_module +using namespace N; + +void foo() {} diff --git a/test/tools/dsymutil/X86/dsym-companion.test b/test/tools/dsymutil/X86/dsym-companion.test new file mode 100644 index 000000000000..4327a2953d97 --- /dev/null +++ b/test/tools/dsymutil/X86/dsym-companion.test @@ -0,0 +1,339 @@ +RUN: llvm-dsymutil -o - %p/../Inputs/basic.macho.i386 -f | llvm-readobj -file-headers -program-headers -sections -symbols - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK32 +RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 -f | llvm-readobj -file-headers -program-headers -sections -symbols - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK64 + +This test checks that the dSYM companion binaries generated in 32 and 64 bits +are correct. The check are pretty strict (we check even the offsets and sizes +of the sections) in order to test the VM address layout algorithm. As the +debug sections are generated, this is a bit risky, but I don't expect +llvm-dsymutil's output to change much for these tiny C programs so this should +be OK. +The 32bits version doesn't have object files, thus it has basically no debug +sections. + +CHECK32: Format: Mach-O 32-bit i386 +CHECK32: Arch: i386 +CHECK32: AddressSize: 32bit +CHECK64: Format: Mach-O 64-bit x86-64 +CHECK64: Arch: x86_64 +CHECK64: AddressSize: 64bit +CHECK: MachHeader { +CHECK32: Magic: Magic (0xFEEDFACE) +CHECK32: CpuType: X86 (0x7) +CHECK32: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3) +CHECK64: Magic: Magic64 (0xFEEDFACF) +CHECK64: CpuType: X86-64 (0x1000007) +CHECK64: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3) +CHECK: FileType: DWARFSymbol (0xA) +CHECK: NumOfLoadCommands: 7 +CHECK: Flags [ (0x0) +CHECK: } +CHECK: Sections [ +CHECK: Section { +CHECK: Name: __text +CHECK: Segment: __TEXT +CHECK32: Address: 0x1E90 +CHECK32: Size: 0x11A +CHECK64: Address: 0x100000EA0 +CHECK64: Size: 0x109 +CHECK: Offset: 0 +CHECK: Alignment: 4 +CHECK: RelocationOffset: 0x0 +CHECK: RelocationCount: 0 +CHECK: Type: 0x0 +CHECK: Attributes [ (0x800004) +CHECK: PureInstructions (0x800000) +CHECK: SomeInstructions (0x4) +CHECK: ] +CHECK: Reserved1: 0x0 +CHECK: Reserved2: 0x0 +CHECK: } +CHECK: Section { +CHECK: Name: __unwind_info +CHECK: Segment: __TEXT +CHECK32: Address: 0x1FAC +CHECK64: Address: 0x100000FAC +CHECK: Size: 0x48 +CHECK: Offset: 0 +CHECK: Alignment: 2 +CHECK: RelocationOffset: 0x0 +CHECK: RelocationCount: 0 +CHECK: Type: 0x0 +CHECK: Attributes [ (0x0) +CHECK: ] +CHECK: Reserved1: 0x0 +CHECK: Reserved2: 0x0 +CHECK: } +CHECK32: Section { +CHECK32: Name: __nl_symbol_ptr +CHECK32: Segment: __DATA +CHECK32: Address: 0x2000 +CHECK32: Size: 0x4 +CHECK32: Offset: 0 +CHECK32: Alignment: 2 +CHECK32: RelocationOffset: 0x0 +CHECK32: RelocationCount: 0 +CHECK32: Type: 0x6 +CHECK32: Attributes [ (0x0) +CHECK32: ] +CHECK32: Reserved1: 0x0 +CHECK32: Reserved2: 0x0 +CHECK32: } +CHECK: Section { +CHECK: Name: __data +CHECK: Segment: __DATA +CHECK32: Address: 0x2004 +CHECK64: Address: 0x100001000 +CHECK: Size: 0x4 +CHECK: Offset: 0 +CHECK: Alignment: 2 +CHECK: RelocationOffset: 0x0 +CHECK: RelocationCount: 0 +CHECK: Type: 0x0 +CHECK: Attributes [ (0x0) +CHECK: ] +CHECK: Reserved1: 0x0 +CHECK: Reserved2: 0x0 +CHECK: } +CHECK: Section { +CHECK: Name: __common +CHECK: Segment: __DATA +CHECK32: Address: 0x2008 +CHECK64: Address: 0x100001004 +CHECK: Size: 0x4 +CHECK: Offset: 0 +CHECK: Alignment: 2 +CHECK: RelocationOffset: 0x0 +CHECK: RelocationCount: 0 +CHECK: Type: LocReloc (0x1) +CHECK: Attributes [ (0x0) +CHECK: ] +CHECK: Reserved1: 0x0 +CHECK: Reserved2: 0x0 +CHECK: } +CHECK: Section { +CHECK: Name: __bss +CHECK: Segment: __DATA +CHECK32: Address: 0x200C +CHECK64: Address: 0x100001008 +CHECK: Size: 0x4 +CHECK: Offset: 0 +CHECK: Alignment: 2 +CHECK: RelocationOffset: 0x0 +CHECK: RelocationCount: 0 +CHECK: Type: LocReloc (0x1) +CHECK: Attributes [ (0x0) +CHECK: ] +CHECK: Reserved1: 0x0 +CHECK: Reserved2: 0x0 +CHECK: } +CHECK64: Section { +CHECK64: Name: __debug_line +CHECK64: Segment: __DWARF +CHECK64: Address: 0x100003000 +CHECK64: Size: 0xEA +CHECK64: Offset: 8192 +CHECK64: Alignment: 0 +CHECK64: RelocationOffset: 0x0 +CHECK64: RelocationCount: 0 +CHECK64: Type: 0x0 +CHECK64: Attributes [ (0x0) +CHECK64: ] +CHECK64: Reserved1: 0x0 +CHECK64: Reserved2: 0x0 +CHECK64: } +CHECK64: Section { +CHECK64: Name: __debug_pubnames +CHECK64: Segment: __DWARF +CHECK64: Address: 0x1000030EA +CHECK64: Size: 0x7F +CHECK64: Offset: 8426 +CHECK64: Alignment: 0 +CHECK64: RelocationOffset: 0x0 +CHECK64: RelocationCount: 0 +CHECK64: Type: 0x0 +CHECK64: Attributes [ (0x0) +CHECK64: ] +CHECK64: Reserved1: 0x0 +CHECK64: Reserved2: 0x0 +CHECK64: } +CHECK64: Section { +CHECK64: Name: __debug_pubtypes +CHECK64: Segment: __DWARF +CHECK64: Address: 0x100003169 +CHECK64: Size: 0x57 +CHECK64: Offset: 8553 +CHECK64: Alignment: 0 +CHECK64: RelocationOffset: 0x0 +CHECK64: RelocationCount: 0 +CHECK64: Type: 0x0 +CHECK64: Attributes [ (0x0) +CHECK64: ] +CHECK64: Reserved1: 0x0 +CHECK64: Reserved2: 0x0 +CHECK64: } +CHECK64: Section { +CHECK64: Name: __debug_aranges +CHECK64: Segment: __DWARF +CHECK64: Address: 0x1000031C0 +CHECK64: Size: 0xB0 +CHECK64: Offset: 8640 +CHECK64: Alignment: 0 +CHECK64: RelocationOffset: 0x0 +CHECK64: RelocationCount: 0 +CHECK64: Type: 0x0 +CHECK64: Attributes [ (0x0) +CHECK64: ] +CHECK64: Reserved1: 0x0 +CHECK64: Reserved2: 0x0 +CHECK64: } +CHECK64: Section { +CHECK64: Name: __debug_info +CHECK64: Segment: __DWARF +CHECK64: Address: 0x100003270 +CHECK64: Size: 0x1BC +CHECK64: Offset: 8816 +CHECK64: Alignment: 0 +CHECK64: RelocationOffset: 0x0 +CHECK64: RelocationCount: 0 +CHECK64: Type: 0x0 +CHECK64: Attributes [ (0x0) +CHECK64: ] +CHECK64: Reserved1: 0x0 +CHECK64: Reserved2: 0x0 +CHECK64: } +CHECK: Section { +CHECK: Name: __debug_abbrev +CHECK: Segment: __DWARF +CHECK32: Address: 0x4000 +CHECK32: Size: 0x1 +CHECK32: Offset: 8192 +CHECK64: Address: 0x10000342C +CHECK64: Size: 0x8F +CHECK64: Offset: 9260 +CHECK: Alignment: 0 +CHECK: RelocationOffset: 0x0 +CHECK: RelocationCount: 0 +CHECK: Type: 0x0 +CHECK: Attributes [ (0x0) +CHECK: ] +CHECK: Reserved1: 0x0 +CHECK: Reserved2: 0x0 +CHECK: } +CHECK: Section { +CHECK: Name: __debug_str +CHECK: Segment: __DWARF +CHECK32: Address: 0x4001 +CHECK32: Size: 0x1 +CHECK32: Offset: 8193 +CHECK64: Address: 0x1000034BB +CHECK64: Size: 0x9F +CHECK64: Offset: 9403 +CHECK: Alignment: 0 +CHECK: RelocationOffset: 0x0 +CHECK: RelocationCount: 0 +CHECK: Type: 0x0 +CHECK: Attributes [ (0x0) +CHECK: ] +CHECK: Reserved1: 0x0 +CHECK: Reserved2: 0x0 +CHECK: } +CHECK: ] +CHECK: Symbols [ +CHECK: Symbol { +CHECK: Name: _inc (2) +CHECK: Type: Section (0xE) +CHECK: Section: __text +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x0) +CHECK: ] +CHECK32: Value: 0x1F20 +CHECK64: Value: 0x100000F20 +CHECK: } +CHECK: Symbol { +CHECK: Name: _inc (2) +CHECK: Type: Section (0xE) +CHECK: Section: __text +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x0) +CHECK: ] +CHECK32: Value: 0x1F90 +CHECK64: Value: 0x100000F90 +CHECK: } +CHECK: Symbol { +CHECK: Name: _baz (7) +CHECK: Type: Section (0xE) +CHECK: Section: __data +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x0) +CHECK: ] +CHECK32: Value: 0x2004 +CHECK64: Value: 0x100001000 +CHECK: } +CHECK: Symbol { +CHECK: Name: _private_int (12) +CHECK: Type: Section (0xE) +CHECK: Section: __bss +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x0) +CHECK: ] +CHECK32: Value: 0x200C +CHECK64: Value: 0x100001008 +CHECK: } +CHECK: Symbol { +CHECK: Name: __mh_execute_header (25) +CHECK: Extern +CHECK: Type: Section (0xE) +CHECK: Section: __text +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x10) +CHECK: ReferencedDynamically (0x10) +CHECK: ] +CHECK32: Value: 0x1000 +CHECK64: Value: 0x100000000 +CHECK: } +CHECK: Symbol { +CHECK: Name: _bar (45) +CHECK: Extern +CHECK: Type: Section (0xE) +CHECK: Section: __text +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x0) +CHECK: ] +CHECK32: Value: 0x1F40 +CHECK64: Value: 0x100000F40 +CHECK: } +CHECK: Symbol { +CHECK: Name: _foo (50) +CHECK: Extern +CHECK: Type: Section (0xE) +CHECK: Section: __text +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x0) +CHECK: ] +CHECK32: Value: 0x1EC0 +CHECK64: Value: 0x100000ED0 +CHECK: } +CHECK: Symbol { +CHECK: Name: _main (55) +CHECK: Extern +CHECK: Type: Section (0xE) +CHECK: Section: __text +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x0) +CHECK: ] +CHECK32: Value: 0x1E90 +CHECK64: Value: 0x100000EA0 +CHECK: } +CHECK: Symbol { +CHECK: Name: _val (61) +CHECK: Extern +CHECK: Type: Section (0xE) +CHECK: Section: __common +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x0) +CHECK: ] +CHECK32: Value: 0x2008 +CHECK64: Value: 0x100001004 +CHECK: } +CHECK: ] + diff --git a/test/tools/dsymutil/X86/dummy-debug-map.map b/test/tools/dsymutil/X86/dummy-debug-map.map new file mode 100644 index 000000000000..f9bc7b099858 --- /dev/null +++ b/test/tools/dsymutil/X86/dummy-debug-map.map @@ -0,0 +1,22 @@ +# This is a dummy debug map used for some tests where the contents of the +# map are just an implementation detail. The tests wanting to use that file +# should put all there object files in an explicitely named sub-directory +# of Inputs, and they should be named 1.o, 2.o, ... +# As not finding an object file or symbols isn't a fatal error for dsymutil, +# you can extend this file with as much object files and symbols as needed. + +--- +triple: 'x86_64-apple-darwin' +objects: + - filename: 1.o + symbols: + - { sym: __Z3foov, objAddr: 0x0, binAddr: 0x10000, size: 0x10 } + - filename: 2.o + symbols: + - { sym: __Z3foov, objAddr: 0x0, binAddr: 0x20000, size: 0x10 } + - filename: 3.o + symbols: + - { sym: __Z3foov, objAddr: 0x0, binAddr: 0x30000, size: 0x10 } + - { sym: __ZN1S3bazIiEEvT_, objAddr: 0x0, binAddr: 0x30010, size: 0x10 } +... + diff --git a/test/tools/dsymutil/X86/empty_range.s b/test/tools/dsymutil/X86/empty_range.s new file mode 100644 index 000000000000..dfe734f1b2bb --- /dev/null +++ b/test/tools/dsymutil/X86/empty_range.s @@ -0,0 +1,61 @@ +# This test verifies that an empty range list in the .debug_ranges section +# doesn't crash llvm-dsymutil. As clang does not produce this kind of debug +# info anymore, we used this hand-crafted assembly file to produce a testcase +# Compile with: +# llvm-mc -triple x86_64-apple-darwin -filetype=obj -o 1.o empty_range.o + +# RUN: llvm-dsymutil -f -y %p/dummy-debug-map.map -oso-prepend-path %p/../Inputs/empty_range -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s + + .section __TEXT,__text,regular,pure_instructions + .macosx_version_min 10, 11 + .globl __Z3foov + .align 4, 0x90 +__Z3foov: ## @_Z3foov +Lfunc_begin0: + pushq %rbp + movq %rsp, %rbp + popq %rbp + retq +Lfunc_end0: + .section __DWARF,__debug_abbrev,regular,debug +Lsection_abbrev: + .byte 1 ## Abbreviation Code + .byte 17 ## DW_TAG_compile_unit + .byte 1 ## DW_CHILDREN_yes + .byte 0 ## EOM(1) + .byte 0 ## EOM(2) + .byte 2 ## Abbreviation Code + .byte 46 ## DW_TAG_subprogram + .byte 0 ## DW_CHILDREN_no + .byte 17 ## DW_AT_low_pc + .byte 1 ## DW_FORM_addr + .byte 0x55 ## DW_AT_ranges + .byte 6 ## DW_FORM_data4 + .byte 0 ## EOM(1) + .byte 0 ## EOM(2) + .byte 0 ## EOM(3) + .section __DWARF,__debug_info,regular,debug +Lsection_info: + .long 22 ## Length of Unit + .short 2 ## DWARF version number + .long 0 ## Offset Into Abbrev. Section + .byte 8 ## Address Size (in bytes) + .byte 1 ## Abbrev [1] DW_TAG_compile_unit + .byte 2 ## Abbrev [2] DW_TAG_subprogram + .quad Lfunc_begin0 ## DW_AT_low_pc + .long 0 ## DW_AT_ranges (pointing at an empty entry) + .byte 0 ## End Of Children Mark + .section __DWARF,__debug_ranges,regular,debug +Ldebug_range: + .long 0 + .long 0 + +# CHECK: DW_TAG_compile_unit +# CHECK: DW_TAG_subprogram +# CHECK-NEXT: DW_AT_low_pc{{.*}}(0x0000000000010000) +# CHECK-NEXT: DW_AT_ranges{{.*}}(0x00000000) + +# There was a bug that would use the currently active object file when a +# debug map object isn't found. Check that we only linked one file. +# CHECK-NOT: DW_TAG_compile_unit + diff --git a/test/tools/dsymutil/X86/fat-archive-input-i386.test b/test/tools/dsymutil/X86/fat-archive-input-i386.test new file mode 100644 index 000000000000..f4ea288768c8 --- /dev/null +++ b/test/tools/dsymutil/X86/fat-archive-input-i386.test @@ -0,0 +1,16 @@ +# REQUIRES: object-emission +# RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs -y %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s + +--- +triple: 'i386-apple-darwin' +objects: + - filename: libfat-test.a(fat-test.o) + symbols: + - { sym: _i386_var, objAddr: 0x0, binAddr: 0x1000, size: 0x4 } +... + +# CHECK: .debug_info contents: +# CHECK: DW_TAG_variable +# CHECK-NOT: {{DW_TAG|NULL}} +# CHECK: DW_AT_name{{.*}}"i386_var" + diff --git a/test/tools/dsymutil/X86/fat-object-input-x86_64.test b/test/tools/dsymutil/X86/fat-object-input-x86_64.test new file mode 100644 index 000000000000..cdd5a4c08d9c --- /dev/null +++ b/test/tools/dsymutil/X86/fat-object-input-x86_64.test @@ -0,0 +1,16 @@ +# REQUIRES: object-emission +# RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs -y %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s + +--- +triple: 'x86_64-apple-darwin' +objects: + - filename: fat-test.o + symbols: + - { sym: _x86_64_var, objAddr: 0x0, binAddr: 0x1000, size: 0x4 } +... + +# CHECK: .debug_info contents: +# CHECK: DW_TAG_variable +# CHECK-NOT: {{DW_TAG|NULL}} +# CHECK: DW_AT_name{{.*}}"x86_64_var" + diff --git a/test/tools/dsymutil/X86/fat-object-input-x86_64h.test b/test/tools/dsymutil/X86/fat-object-input-x86_64h.test new file mode 100644 index 000000000000..53aed1ec4443 --- /dev/null +++ b/test/tools/dsymutil/X86/fat-object-input-x86_64h.test @@ -0,0 +1,16 @@ +# REQUIRES: object-emission +# RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs -y %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s + +--- +triple: 'x86_64h-apple-darwin' +objects: + - filename: fat-test.o + symbols: + - { sym: _x86_64h_var, objAddr: 0x0, binAddr: 0x1000, size: 0x4 } +... + +# CHECK: .debug_info contents: +# CHECK: DW_TAG_variable +# CHECK-NOT: {{DW_TAG|NULL}} +# CHECK: DW_AT_name{{.*}}"x86_64h_var" + diff --git a/test/tools/dsymutil/X86/frame-1.test b/test/tools/dsymutil/X86/frame-1.test index 7852e68a142a..27bc17d75984 100644 --- a/test/tools/dsymutil/X86/frame-1.test +++ b/test/tools/dsymutil/X86/frame-1.test @@ -2,14 +2,14 @@ # RUN: rm -rf %t # RUN: mkdir -p %t # RUN: llc -filetype=obj %p/../Inputs/frame-dw2.ll -o %t/frame-dw2.o -# RUN: llvm-dsymutil -oso-prepend-path=%t -y %s -o - | llvm-dwarfdump -debug-dump=frames - | FileCheck %s +# RUN: llvm-dsymutil -f -oso-prepend-path=%t -y %s -o - | llvm-dwarfdump -debug-dump=frames - | FileCheck %s # This test is meant to verify that identical CIEs will get reused # in the same file but also inbetween files. For this to happen, we # link twice the same file using this made-up debug map: --- -triple: 'i386-unknown-unknown-macho' +triple: 'i386-apple-darwin' objects: - filename: frame-dw2.o symbols: diff --git a/test/tools/dsymutil/X86/frame-2.test b/test/tools/dsymutil/X86/frame-2.test index 168e342a4f74..89a7670f86f0 100644 --- a/test/tools/dsymutil/X86/frame-2.test +++ b/test/tools/dsymutil/X86/frame-2.test @@ -3,7 +3,7 @@ # RUN: mkdir -p %t # RUN: llc -filetype=obj %p/../Inputs/frame-dw2.ll -o %t/frame-dw2.o # RUN: llc -filetype=obj %p/../Inputs/frame-dw4.ll -o %t/frame-dw4.o -# RUN: llvm-dsymutil -oso-prepend-path=%t -y %s -o - | llvm-dwarfdump -debug-dump=frames - | FileCheck %s +# RUN: llvm-dsymutil -f -oso-prepend-path=%t -y %s -o - | llvm-dwarfdump -debug-dump=frames - | FileCheck %s # Check the handling of multiple different CIEs. To have CIEs that # appear to be different, use a dwarf2 version of the file along with @@ -12,7 +12,7 @@ # appears again. This is a behavior we inherited from dsymutil-classic # but this should be fixed (see comment in patchFrameInfoForObject()) --- -triple: 'i386-unknown-unknown-macho' +triple: 'i386-apple-darwin' objects: - filename: frame-dw2.o symbols: diff --git a/test/tools/dsymutil/X86/lit.local.cfg b/test/tools/dsymutil/X86/lit.local.cfg index c8625f4d9d24..05f8b38b3346 100644 --- a/test/tools/dsymutil/X86/lit.local.cfg +++ b/test/tools/dsymutil/X86/lit.local.cfg @@ -1,2 +1,4 @@ if not 'X86' in config.root.targets: config.unsupported = True + +config.suffixes = ['.test', '.cpp', '.m', '.s'] diff --git a/test/tools/dsymutil/X86/mismatch.m b/test/tools/dsymutil/X86/mismatch.m new file mode 100644 index 000000000000..33ae782df057 --- /dev/null +++ b/test/tools/dsymutil/X86/mismatch.m @@ -0,0 +1,23 @@ +/* Compile with: + cat >modules.modulemap < mismatch.h + clang -cc1 -emit-obj -fmodules -fmodule-map-file=modules.modulemap \ + -fmodule-format=obj -g -dwarf-ext-refs -fmodules-cache-path=. \ + -fdisable-module-hash mismatch.m -o 1.o + echo > mismatch.h + clang -cc1 -emit-obj -fmodules -fmodule-map-file=modules.modulemap \ + -fmodule-format=obj -g -dwarf-ext-refs -fmodules-cache-path=. \ + -fdisable-module-hash mismatch.m -o /dev/null +*/ + +// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/mismatch \ +// RUN: -y %p/dummy-debug-map.map -o %t.bin 2>&1 | FileCheck %s + +@import mismatch; + +void f() {} +// CHECK: warning: hash mismatch diff --git a/test/tools/dsymutil/X86/modules.m b/test/tools/dsymutil/X86/modules.m new file mode 100644 index 000000000000..046a8c1304a0 --- /dev/null +++ b/test/tools/dsymutil/X86/modules.m @@ -0,0 +1,117 @@ +/* Compile with: + cat >modules.modulemap <&1 | FileCheck --check-prefix=WARN %s + +// WARN-NOT: warning: hash mismatch + +// --------------------------------------------------------------------- +#ifdef BAR_H +// --------------------------------------------------------------------- +// CHECK: DW_TAG_compile_unit +// CHECK-NOT: DW_TAG +// CHECK: DW_TAG_module +// CHECK-NEXT: DW_AT_name{{.*}}"Bar" +// CHECK: 0x0[[BAR:.*]]: DW_TAG_structure_type +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_name {{.*}}"Bar" +// CHECK-NOT: DW_TAG +// CHECK: DW_TAG_member +// CHECK: DW_AT_name {{.*}}"value" +// CHECK: DW_TAG_structure_type +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_name {{.*}}"PruneMeNot" + +struct Bar { + int value; +}; + +struct PruneMeNot; + +#else +// --------------------------------------------------------------------- +#ifdef FOO_H +// --------------------------------------------------------------------- +// CHECK: DW_TAG_compile_unit +// CHECK-NOT: DW_TAG +// CHECK: 0x0[[FOO:.*]]: DW_TAG_module +// CHECK-NEXT: DW_AT_name{{.*}}"Foo" +// CHECK-NOT: DW_TAG +// CHECK: DW_TAG_typedef +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_type [DW_FORM_ref_addr] (0x{{0*}}[[BAR]]) +// CHECK: DW_TAG_structure_type +// CHECK-NEXT: DW_AT_name{{.*}}"S" +// CHECK-NOT: DW_TAG +// CHECK: 0x0[[INTERFACE:.*]]: DW_TAG_structure_type +// CHECK-NEXT: DW_AT_name{{.*}}"Foo" + +@import Bar; +typedef struct Bar Bar; +struct S {}; + +@interface Foo { + int ivar; +} +@end + +// --------------------------------------------------------------------- +#else +// --------------------------------------------------------------------- + +// CHECK: DW_TAG_compile_unit +// CHECK: DW_AT_low_pc +// CHECK-NOT:DW_TAG +// CHECK: DW_TAG_module +// CHECK-NEXT: DW_AT_name{{.*}}"Foo" +// CHECK-NOT: DW_TAG +// CHECK: DW_TAG_typedef +// CHECK-NOT: DW_TAG +// CHECK: NULL +// +// CHECK: DW_TAG_imported_declaration +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_import [DW_FORM_ref_addr] (0x{{0*}}[[FOO]] +// +// CHECK: DW_TAG_subprogram +// CHECK: DW_AT_name {{.*}}"main" +// +// CHECK: DW_TAG_variable +// CHECK: DW_TAG_variable +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_name{{.*}}"foo" +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_type {{.*}}{0x{{0*}}[[PTR:.*]]} +// +// CHECK: 0x{{0*}}[[PTR]]: DW_TAG_pointer_type +// CHECK-NEXT DW_AT_type [DW_FORM_ref_addr] {0x{{0*}}[[INTERFACE]]) + +@import Foo; +int main(int argc, char **argv) { + Bar bar; + Foo *foo = 0; + bar.value = 42; + return bar.value; +} +#endif +#endif diff --git a/test/tools/dsymutil/X86/multiple-inputs.test b/test/tools/dsymutil/X86/multiple-inputs.test new file mode 100644 index 000000000000..01b4f1bf52b5 --- /dev/null +++ b/test/tools/dsymutil/X86/multiple-inputs.test @@ -0,0 +1,31 @@ +RUN: rm -rf %T/multiple-inputs +RUN: mkdir -p %T/multiple-inputs + +RUN: cat %p/../Inputs/basic.macho.x86_64 > %T/multiple-inputs/basic.macho.x86_64 +RUN: cat %p/../Inputs/basic-archive.macho.x86_64 > %T/multiple-inputs/basic-archive.macho.x86_64 +RUN: cat %p/../Inputs/basic-lto.macho.x86_64 > %T/multiple-inputs/basic-lto.macho.x86_64 +RUN: cat %p/../Inputs/basic-lto-dw4.macho.x86_64 > %T/multiple-inputs/basic-lto-dw4.macho.x86_64 + +# Multiple inputs in flat mode +RUN: llvm-dsymutil -f -oso-prepend-path=%p/.. %T/multiple-inputs/basic.macho.x86_64 %T/multiple-inputs/basic-archive.macho.x86_64 %T/multiple-inputs/basic-lto.macho.x86_64 %T/multiple-inputs/basic-lto-dw4.macho.x86_64 +RUN: llvm-dwarfdump %T/multiple-inputs/basic.macho.x86_64.dwarf \ +RUN: | FileCheck %S/basic-linking-x86.test --check-prefix=CHECK --check-prefix=BASIC +RUN: llvm-dwarfdump %T/multiple-inputs/basic-archive.macho.x86_64.dwarf \ +RUN: | FileCheck %S/basic-linking-x86.test --check-prefix=CHECK --check-prefix=ARCHIVE +RUN: llvm-dwarfdump %T/multiple-inputs/basic-lto.macho.x86_64.dwarf | FileCheck %S/basic-lto-linking-x86.test +RUN: llvm-dwarfdump %T/multiple-inputs/basic-lto-dw4.macho.x86_64.dwarf | FileCheck %S/basic-lto-dw4-linking-x86.test + +# Multiple inputs that end up in the same named bundle +RUN: llvm-dsymutil -oso-prepend-path=%p/.. %T/multiple-inputs/basic.macho.x86_64 %T/multiple-inputs/basic-archive.macho.x86_64 %T/multiple-inputs/basic-lto.macho.x86_64 %T/multiple-inputs/basic-lto-dw4.macho.x86_64 -o %t.dSYM +RUN: llvm-dwarfdump %t.dSYM/Contents/Resources/DWARF/basic.macho.x86_64 \ +RUN: | FileCheck %S/basic-linking-x86.test --check-prefix=CHECK --check-prefix=BASIC +RUN: llvm-dwarfdump %t.dSYM/Contents/Resources/DWARF/basic-archive.macho.x86_64 \ +RUN: | FileCheck %S/basic-linking-x86.test --check-prefix=CHECK --check-prefix=ARCHIVE +RUN: llvm-dwarfdump %t.dSYM/Contents/Resources/DWARF/basic-lto.macho.x86_64 | FileCheck %S/basic-lto-linking-x86.test +RUN: llvm-dwarfdump %t.dSYM/Contents/Resources/DWARF/basic-lto-dw4.macho.x86_64 | FileCheck %S/basic-lto-dw4-linking-x86.test + +# Multiple inputs in a named bundle in flat mode... impossible. +RUN: not llvm-dsymutil -f -oso-prepend-path=%p/.. %T/multiple-inputs/basic.macho.x86_64 %T/multiple-inputs/basic-archive.macho.x86_64 %T/multiple-inputs/basic-lto.macho.x86_64 %T/multiple-inputs/basic-lto-dw4.macho.x86_64 -o %t.dSYM 2>&1 | FileCheck %s + +CHECK: error: cannot use -o with multiple inputs in flat mode + diff --git a/test/tools/dsymutil/X86/odr-anon-namespace.cpp b/test/tools/dsymutil/X86/odr-anon-namespace.cpp new file mode 100644 index 000000000000..a66fc830b67a --- /dev/null +++ b/test/tools/dsymutil/X86/odr-anon-namespace.cpp @@ -0,0 +1,65 @@ +/* Compile with: + for FILE in `seq 2`; do + clang -g -c odr-anon-namespace.cpp -DFILE$FILE -o odr-anon-namespace/$FILE.o + done + */ + +// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/odr-anon-namespace -y %p/dummy-debug-map.map -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s + +#ifdef FILE1 +// Currently llvm-dsymutil will unique the contents of anonymous +// namespaces if they are from the same file/line. Force this +// namespace to appear different eventhough it's the same (this +// uniquing is actually a bug kept for backward compatibility, see the +// comments in DeclContextTree::getChildDeclContext()). +#line 42 +#endif +namespace { +class C {}; +} + +void foo() { + C c; +} + +// Keep the ifdef guards for FILE1 and FILE2 even if all the code is +// above to clearly show what the CHECK lines are testing. +#ifdef FILE1 + +// CHECK: TAG_compile_unit +// CHECK-NOT: DW_TAG +// CHECK: AT_name{{.*}}"odr-anon-namespace.cpp" + +// CHECK: DW_TAG_variable +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_name {{.*}}"c" +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_type {{.*}}0x00000000[[C_FILE1:[0-9a-f]*]] + +// CHECK: DW_TAG_namespace +// CHECK-NOT: {{DW_AT_name|NULL|DW_TAG}} +// CHECK: 0x[[C_FILE1]]:{{.*}}DW_TAG_class_type +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_name{{.*}}"C" + +#elif defined(FILE2) + +// CHECK: TAG_compile_unit +// CHECK-NOT: DW_TAG +// CHECK: AT_name{{.*}}"odr-anon-namespace.cpp" + +// CHECK: DW_TAG_variable +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_name {{.*}}"c" +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_type {{.*}}0x00000000[[C_FILE2:[0-9a-f]*]] + +// CHECK: DW_TAG_namespace +// CHECK-NOT: {{DW_AT_name|NULL|DW_TAG}} +// CHECK: 0x[[C_FILE2]]:{{.*}}DW_TAG_class_type +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_name{{.*}}"C" + +#else +#error "You must define which file you generate" +#endif diff --git a/test/tools/dsymutil/X86/odr-member-functions.cpp b/test/tools/dsymutil/X86/odr-member-functions.cpp new file mode 100644 index 000000000000..737d5a7abf61 --- /dev/null +++ b/test/tools/dsymutil/X86/odr-member-functions.cpp @@ -0,0 +1,109 @@ +/* Compile with: + for FILE in `seq 3`; do + clang -g -c odr-member-functions.cpp -DFILE$FILE -o odr-member-functions/$FILE.o + done + */ + +// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/odr-member-functions -y %p/dummy-debug-map.map -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s + +struct S { + __attribute__((always_inline)) void foo() { bar(); } + __attribute__((always_inline)) void foo(int i) { if (i) bar(); } + void bar(); + + template void baz(T t) {} +}; + +#ifdef FILE1 +void foo() { + S s; +} + +// CHECK: TAG_compile_unit +// CHECK-NOT: {{DW_TAG|NULL}} +// CHECK: AT_name{{.*}}"odr-member-functions.cpp" + +// CHECK: 0x[[S:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type +// CHECK-NOT: {{DW_TAG|NULL}} +// CHECK: DW_AT_name{{.*}}"S" +// CHECK-NOT: NULL +// CHECK: 0x[[FOO:[0-9a-f]*]]:{{.*}}DW_TAG_subprogram +// CHECK-NEXT: DW_AT_MIPS_linkage_name{{.*}}"_ZN1S3fooEv" +// CHECK: NULL +// CHECK: 0x[[FOOI:[0-9a-f]*]]:{{.*}}DW_TAG_subprogram +// CHECK-NEXT: DW_AT_MIPS_linkage_name{{.*}}"_ZN1S3fooEi" + +#elif defined(FILE2) +void foo() { + S s; + // Check that the overloaded member functions are resolved correctly + s.foo(); + s.foo(1); +} + +// CHECK: TAG_compile_unit +// CHECK-NOT: DW_TAG +// CHECK: AT_name{{.*}}"odr-member-functions.cpp" + +// Normal member functions should be desribed by the type in the first +// CU, thus we should be able to reuse its definition and avoid +// reemiting it. +// CHECK-NOT: DW_TAG_structure_type + +// CHECK: 0x[[FOO_SUB:[0-9a-f]*]]:{{.*}}DW_TAG_subprogram +// CHECK-NEXT: DW_AT_specification{{.*}}[[FOO]] +// CHECK-NOT: DW_TAG_structure_type +// CHECK: 0x[[FOOI_SUB:[0-9a-f]*]]:{{.*}}DW_TAG_subprogram +// CHECK-NEXT: DW_AT_specification{{.*}}[[FOOI]] +// CHECK-NOT: DW_TAG_structure_type + +// CHECK: DW_TAG_variable +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_name {{.*}}"s" +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_type {{.*}}[[S]]) +// CHECK: DW_TAG_inlined_subroutine +// CHECK-NEXT: DW_AT_abstract_origin{{.*}}[[FOO_SUB]] +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_call_line{{.*}}40 +// CHECK: DW_TAG_inlined_subroutine +// CHECK-NEXT: DW_AT_abstract_origin{{.*}}[[FOOI_SUB]] +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_call_line{{.*}}41 + +#elif defined(FILE3) +void foo() { + S s; + s.baz(42); +} + +// CHECK: TAG_compile_unit +// CHECK-NOT: DW_TAG +// CHECK: AT_name{{.*}}"odr-member-functions.cpp" + +// Template or other implicit members will be included in the type +// only if they are generated. Thus actually creating a new type. +// CHECK: DW_TAG_structure_type + +// Skip 'normal' member functions +// CHECK: DW_TAG_subprogram +// CHECK: DW_TAG_subprogram +// CHECK: DW_TAG_subprogram + +// This is the 'baz' member +// CHECK: 0x[[BAZ:[0-9a-f]*]]: DW_TAG_subprogram +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_MIPS_linkage_name {{.*}}"_ZN1S3bazIiEEvT_" +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_name {{.*}}"baz" + +// Skip foo3 +// CHECK: DW_TAG_subprogram + +// baz instanciation: +// CHECK: DW_TAG_subprogram +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_specification {{.*}}[[BAZ]] "_ZN1S3bazIiEEvT_" +#else +#error "You must define which file you generate" +#endif diff --git a/test/tools/dsymutil/X86/odr-uniquing.cpp b/test/tools/dsymutil/X86/odr-uniquing.cpp new file mode 100644 index 000000000000..bb7ae50a2c72 --- /dev/null +++ b/test/tools/dsymutil/X86/odr-uniquing.cpp @@ -0,0 +1,187 @@ +/* Compile with: + clang -g -c odr-uniquing.cpp -o odr-uniquing/1.o + cp odr-uniquing/1.o odr-uniquing/2.o + The aim of these test is to check that all the 'type types' that + should be uniqued through the ODR really are. + + The resulting object file is linked against itself using a fake + debug map. The end result is: + - with ODR uniquing: all types (expect for the union for now) in + the second CU should point back to the types of the first CU. + - without ODR uniquing: all types are re-emited in the second CU + */ + +// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/odr-uniquing -y %p/dummy-debug-map.map -o - | llvm-dwarfdump -debug-dump=info - | FileCheck -check-prefix=ODR -check-prefix=CHECK %s +// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/odr-uniquing -y %p/dummy-debug-map.map -no-odr -o - | llvm-dwarfdump -debug-dump=info - | FileCheck -check-prefix=NOODR -check-prefix=CHECK %s + +// The first compile unit contains all the types: +// CHECK: TAG_compile_unit +// CHECK-NOT: DW_TAG +// CHECK: AT_name{{.*}}"odr-uniquing.cpp" + +struct S { + struct Nested {}; +}; + +// CHECK: 0x[[S:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type +// CHECK-NEXT: DW_AT_name{{.*}}"S" +// CHECK-NOT: NULL +// CHECK: 0x[[NESTED:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type +// CHECK-NOT: DW_TAG +// CHECK: DW_AT_name{{.*}}"Nested" +// CHECK: NULL + +namespace N { +class C {}; +} + +// CHECK: DW_TAG_namespace +// CHECK-NEXT: DW_AT_name{{.*}}"N" +// CHECK-NOT: NULL +// CHECK: 0x[[NC:[0-9a-f]*]]:{{.*}}DW_TAG_class_type +// CHECK-NEXT: DW_AT_name{{.*}}"C" +// CHECK: NULL + +union U { + class C {} C; + struct S {} S; +}; + +// CHECK: 0x[[U:[0-9a-f]*]]:{{.*}}DW_TAG_union_type +// CHECK-NEXT: DW_AT_name{{.*}}"U" +// CHECK-NOT: NULL +// CHECK: 0x[[UC:[0-9a-f]*]]:{{.*}}DW_TAG_class_type +// CHECK-NOT: NULL +// CHECK: 0x[[US:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type +// CHECK: NULL + +typedef S AliasForS; + +// CHECK: 0x[[ALIASFORS:[0-9a-f]*]]:{{.*}}DW_TAG_typedef +// CHECK-NEXT: DW_AT_type{{.*}}[[S]] +// CHECK-NEXT: DW_AT_name{{.*}}"AliasForS" + +namespace { +class AnonC {}; +} + +// CHECK: DW_TAG_namespace +// CHECK-NOT: {{DW_AT_name|NULL|DW_TAG}} +// CHECK: 0x[[ANONC:[0-9a-f]*]]:{{.*}}DW_TAG_class_type +// CHECK-NEXT: DW_AT_name{{.*}}"AnonC" + +// This function is only here to hold objects that refer to the above types. +void foo() { + AliasForS s; + S::Nested n; + N::C nc; + AnonC ac; + U u; +} + +// The second CU contents depend on wether we disabled ODR uniquing or +// not. + +// CHECK: TAG_compile_unit +// CHECK-NOT: DW_TAG +// CHECK: AT_name{{.*}}"odr-uniquing.cpp" + +// The union itself is not uniqued for now (for dsymutil-compatibility), +// but the types defined inside it should be. +// ODR: DW_TAG_union_type +// ODR-NEXT: DW_AT_name{{.*}}"U" +// ODR: DW_TAG_member +// ODR-NEXT: DW_AT_name{{.*}}"C" +// ODR-NOT: DW_TAG +// ODR: DW_AT_type{{.*}}[[UC]] +// ODR: DW_TAG_member +// ODR-NEXT: DW_AT_name{{.*}}"S" +// ODR-NOT: DW_TAG +// ODR: DW_AT_type{{.*}}[[US]] + +// Check that the variables point to the right type +// ODR: DW_TAG_subprogram +// ODR-NOT: DW_TAG +// ODR: DW_AT_name{{.*}}"foo" +// ODR-NOT: NULL +// ODR: DW_TAG_variable +// ODR-NOT: DW_TAG +// ODR: DW_AT_name{{.*}}"s" +// ODR-NOT: DW_TAG +// ODR: DW_AT_type{{.*}}[[ALIASFORS]] +// ODR: DW_AT_name{{.*}}"n" +// ODR-NOT: DW_TAG +// ODR: DW_AT_type{{.*}}[[NESTED]] +// ODR: DW_TAG_variable +// ODR-NOT: DW_TAG +// ODR: DW_AT_name{{.*}}"nc" +// ODR-NOT: DW_TAG +// ODR: DW_AT_type{{.*}}[[NC]] +// ODR: DW_TAG_variable +// ODR-NOT: DW_TAG +// ODR: DW_AT_name{{.*}}"ac" +// ODR-NOT: DW_TAG +// ODR: DW_AT_type{{.*}}[[ANONC]] + +// With no ODR uniquing, we should get copies of all the types: + +// This is "struct S" +// NOODR: 0x[[DUP_S:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type +// NOODR-NEXT: DW_AT_name{{.*}}"S" +// NOODR-NOT: NULL +// NOODR: 0x[[DUP_NESTED:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type +// NOODR-NOT: DW_TAG +// NOODR: DW_AT_name{{.*}}"Nested" + +// This is "class N::C" +// NOODR: DW_TAG_namespace +// NOODR-NEXT: DW_AT_name{{.*}}"N" +// NOODR: 0x[[DUP_NC:[0-9a-f]*]]:{{.*}}DW_TAG_class_type +// NOODR-NEXT: DW_AT_name{{.*}}"C" + +// This is "union U" +// NOODR: 0x[[DUP_U:[0-9a-f]*]]:{{.*}}DW_TAG_union_type +// NOODR-NEXT: DW_AT_name{{.*}}"U" +// NOODR-NOT: NULL +// NOODR: 0x[[DUP_UC:[0-9a-f]*]]:{{.*}}DW_TAG_class_type +// NOODR-NOT: NULL +// NOODR: 0x[[DUP_US:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type +// NOODR: NULL + +// Check that the variables point to the right type +// NOODR: DW_TAG_subprogram +// NOODR-NOT: DW_TAG +// NOODR: DW_AT_name{{.*}}"foo" +// NOODR-NOT: NULL +// NOODR: DW_TAG_variable +// NOODR-NOT: DW_TAG +// NOODR: DW_AT_name{{.*}}"s" +// NOODR-NOT: DW_TAG +// NOODR: DW_AT_type{{.*}}0x[[DUP_ALIASFORS:[0-9a-f]*]] +// NOODR: DW_TAG_variable +// NOODR-NOT: DW_TAG +// NOODR: DW_AT_name{{.*}}"n" +// NOODR-NOT: DW_TAG +// NOODR: DW_AT_type{{.*}}[[DUP_NESTED]] +// NOODR: DW_TAG_variable +// NOODR-NOT: DW_TAG +// NOODR: DW_AT_name{{.*}}"nc" +// NOODR-NOT: DW_TAG +// NOODR: DW_AT_type{{.*}}[[DUP_NC]] +// NOODR: DW_TAG_variable +// NOODR-NOT: DW_TAG +// NOODR: DW_AT_name{{.*}}"ac" +// NOODR-NOT: DW_TAG +// NOODR: DW_AT_type{{.*}}0x[[DUP_ANONC:[0-9a-f]*]] + +// This is "AliasForS" +// NOODR: 0x[[DUP_ALIASFORS]]:{{.*}}DW_TAG_typedef +// NOODR-NOT: DW_TAG +// NOODR: DW_AT_name{{.*}}"AliasForS" + +// This is "(anonymous namespace)::AnonC" +// NOODR: DW_TAG_namespace +// NOODR-NOT: {{DW_AT_name|NULL|DW_TAG}} +// NOODR: 0x[[DUP_ANONC]]:{{.*}}DW_TAG_class_type +// NOODR-NEXT: DW_AT_name{{.*}}"AnonC" + diff --git a/test/tools/dsymutil/X86/submodules.m b/test/tools/dsymutil/X86/submodules.m new file mode 100644 index 000000000000..b2425a91cbda --- /dev/null +++ b/test/tools/dsymutil/X86/submodules.m @@ -0,0 +1,52 @@ +/* Compile with: + cat >modules.modulemap <&1 | FileCheck %s -check-prefix=BADARCH +RUN: not llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch i386 2>&1 | FileCheck %s -check-prefix=EMPTY + + +ARMV7: --- +ARMV7-NOT: ... +ARMV7: triple: 'thumbv7-apple-darwin' +ARMV7-NOT: ... +ARMV7: sym: _armv7_var +ARMV7-NOT: --- + +ARMV7S: --- +ARMV7S-NOT: ... +ARMV7S: triple: 'thumbv7s-apple-darwin' +ARMV7S-NOT: ... +ARMV7S: sym: _armv7s_var +ARMV7S-NOT: --- + +ARM64: --- +ARM64-NOT: ... +ARM64: triple: 'arm64-apple-darwin' +ARM64-NOT: ... +ARM64: sym: _arm64_var +ARM64-NOT: --- + +CHECK: ... + +BADARCH: error: Unsupported cpu architecture: 'arm42' +EMPTY: error: no architecture to link diff --git a/test/tools/dsymutil/archive-timestamp.test b/test/tools/dsymutil/archive-timestamp.test new file mode 100644 index 000000000000..f3f2162fa595 --- /dev/null +++ b/test/tools/dsymutil/archive-timestamp.test @@ -0,0 +1,24 @@ +# RUN: llvm-dsymutil -no-output -oso-prepend-path=%p -y %s 2>&1 | FileCheck %s + +# This is the archive member part of basic-archive.macho.x86_64 debug map with corrupted timestamps. + +# CHECK: warning: {{.*}}libbasic.a(basic2.macho.x86_64.o): {{[Nn]o}} such file +# CHECK: warning: {{.*}}libbasic.a(basic3.macho.x86_64.o): {{[Nn]o}} such file + +--- +triple: 'x86_64-apple-darwin' +objects: + - filename: '/Inputs/libbasic.a(basic2.macho.x86_64.o)' + timestamp: 141869239 + symbols: + - { sym: _foo, objAddr: 0x0000000000000020, binAddr: 0x0000000100000ED0, size: 0x00000050 } + - { sym: _private_int, objAddr: 0x0000000000000560, binAddr: 0x0000000100001004, size: 0x00000000 } + - { sym: _inc, objAddr: 0x0000000000000070, binAddr: 0x0000000100000F20, size: 0x00000017 } + - { sym: _baz, objAddr: 0x0000000000000310, binAddr: 0x0000000100001000, size: 0x00000000 } + - filename: '/Inputs/libbasic.a(basic3.macho.x86_64.o)' + timestamp: 418692393 + symbols: + - { sym: _val, objAddr: 0x0000000000000004, binAddr: 0x0000000100001008, size: 0x00000000 } + - { sym: _bar, objAddr: 0x0000000000000020, binAddr: 0x0000000100000F40, size: 0x00000050 } + - { sym: _inc, objAddr: 0x0000000000000070, binAddr: 0x0000000100000F90, size: 0x00000019 } +... diff --git a/test/tools/dsymutil/basic-linking.test b/test/tools/dsymutil/basic-linking.test index ec6a5b771461..bff5b5df9e66 100644 --- a/test/tools/dsymutil/basic-linking.test +++ b/test/tools/dsymutil/basic-linking.test @@ -1,6 +1,7 @@ -RUN: llvm-dsymutil -no-output -v -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s -RUN: llvm-dsymutil -no-output -v -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO -RUN: llvm-dsymutil -no-output -v -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE +RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s +RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO +RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE +RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 %p/Inputs/basic-lto.macho.x86_64 %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LTO --check-prefix=CHECK-ARCHIVE This test check the basic Dwarf linking process through the debug dumps. diff --git a/test/tools/dsymutil/debug-map-parsing.test b/test/tools/dsymutil/debug-map-parsing.test index 5091dfbfc24d..2b9d0917609d 100644 --- a/test/tools/dsymutil/debug-map-parsing.test +++ b/test/tools/dsymutil/debug-map-parsing.test @@ -1,15 +1,16 @@ -RUN: llvm-dsymutil -v -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s -RUN: llvm-dsymutil -v -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO -RUN: llvm-dsymutil -v -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE -RUN: llvm-dsymutil -v -dump-debug-map %p/Inputs/basic.macho.x86_64 2>&1 | FileCheck %s --check-prefix=NOT-FOUND -RUN: not llvm-dsymutil -v -dump-debug-map %p/Inputs/inexistant 2>&1 | FileCheck %s --check-prefix=NO-EXECUTABLE +RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s +RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO +RUN: llvm-dsymutil -verbose -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE +RUN: llvm-dsymutil -dump-debug-map %p/Inputs/basic.macho.x86_64 2>&1 | FileCheck %s --check-prefix=NOT-FOUND +RUN: not llvm-dsymutil -dump-debug-map %p/Inputs/inexistant 2>&1 | FileCheck %s --check-prefix=NO-EXECUTABLE Check that We can parse the debug map of the basic executable. CHECK-NOT: error CHECK: --- -CHECK: triple: 'x86_64-unknown-unknown-macho' +CHECK: triple: 'x86_64-apple-darwin' +CHECK: binary-path:{{.*}}/Inputs/basic.macho.x86_64 CHECK: filename:{{.*}}/Inputs/basic1.macho.x86_64.o CHECK-DAG: sym: _main, objAddr: 0x0000000000000000, binAddr: 0x0000000100000EA0, size: 0x00000024 CHECK: filename{{.*}}/Inputs/basic2.macho.x86_64.o @@ -28,7 +29,8 @@ Check that we can parse the debug-map of the basic-lto executable CHECK-LTO-NOT: error CHECK-LTO: --- -CHECK-LTO: triple: 'x86_64-unknown-unknown-macho' +CHECK-LTO: triple: 'x86_64-apple-darwin' +CHECK-LTO: binary-path:{{.*}}/Inputs/basic-lto.macho.x86_64 CHECK-LTO: /Inputs/basic-lto.macho.x86_64.o CHECK-LTO-DAG: sym: _bar, objAddr: 0x0000000000000050, binAddr: 0x0000000100000F90, size: 0x00000024 CHECK-LTO-DAG: sym: _baz, objAddr: 0x0000000000000658, binAddr: 0x0000000100001000, size: 0x00000000 @@ -51,7 +53,8 @@ CHECK-ARCHIVE-NEXT: found member in current archive. CHECK-ARCHIVE-NEXT: trying to open {{.*}}/libbasic.a(basic3.macho.x86_64.o)' CHECK-ARCHIVE-NEXT: found member in current archive. CHECK-ARCHIVE: --- -CHECK-ARCHIVE: triple: 'x86_64-unknown-unknown-macho' +CHECK-ARCHIVE: triple: 'x86_64-apple-darwin' +CHECK-ARCHIVE: binary-path:{{.*}}/Inputs/basic-archive.macho.x86_64 CHECK-ARCHIVE: /Inputs/basic1.macho.x86_64.o CHECK-ARCHIVE-DAG: sym: _main, objAddr: 0x0000000000000000, binAddr: 0x0000000100000EA0, size: 0x00000024 CHECK-ARCHIVE: /Inputs/./libbasic.a(basic2.macho.x86_64.o) @@ -72,7 +75,8 @@ NOT-FOUND: cannot open{{.*}}"/Inputs/basic1.macho.x86_64.o": {{[Nn]o}} such file NOT-FOUND: cannot open{{.*}}"/Inputs/basic2.macho.x86_64.o": {{[Nn]o}} such file NOT-FOUND: cannot open{{.*}}"/Inputs/basic3.macho.x86_64.o": {{[Nn]o}} such file NOT-FOUND: --- -NOT-FOUND-NEXT: triple: 'x86_64-unknown-unknown-macho' +NOT-FOUND-NEXT: triple: 'x86_64-apple-darwin' +NOT-FOUND-NEXT: binary-path:{{.*}}/Inputs/basic.macho.x86_64 NOT-FOUND-NEXT: ... Check that we correctly error out on invalid executatble. diff --git a/test/tools/dsymutil/dump-symtab.test b/test/tools/dsymutil/dump-symtab.test new file mode 100644 index 000000000000..b83ac7f7ad17 --- /dev/null +++ b/test/tools/dsymutil/dump-symtab.test @@ -0,0 +1,44 @@ +RUN: llvm-dsymutil -s %p/Inputs/fat-test.dylib | FileCheck -check-prefix=ALL -check-prefix=I386 %s +RUN: llvm-dsymutil -arch i386 -s %p/Inputs/fat-test.dylib | FileCheck -check-prefix=I386 -check-prefix=ONE %s + + +ALL: ---------------------------------------------------------------------- +ALL-NEXT: Symbol table for: '{{.*}}fat-test.dylib' (x86_64) +ALL-NEXT: ---------------------------------------------------------------------- +ALL-NEXT: Index n_strx n_type n_sect n_desc n_value +ALL-NEXT: ======== -------- ------------------ ------ ------ ---------------- +ALL-NEXT: [ 0] 00000002 64 (N_SO ) 00 0000 0000000000000000 '/Inputs/' +ALL-NEXT: [ 1] 0000000b 64 (N_SO ) 00 0000 0000000000000000 'fat-test.c' +ALL-NEXT: [ 2] 00000016 66 (N_OSO ) 03 0001 0000000055b1d0b9 '/Inputs/fat-test.o' +ALL-NEXT: [ 3] 00000029 20 (N_GSYM ) 00 0000 0000000000000000 '_x86_64_var' +ALL-NEXT: [ 4] 00000001 64 (N_SO ) 01 0000 0000000000000000 +ALL-NEXT: [ 5] 00000035 0f ( SECT EXT) 02 0000 0000000000001000 '_x86_64_var' +ALL-NEXT: [ 6] 00000041 01 ( UNDF EXT) 00 0100 0000000000000000 'dyld_stub_binder' + +I386: ---------------------------------------------------------------------- +I386-NEXT: Symbol table for: '{{.*}}fat-test.dylib' (i386) +I386-NEXT: ---------------------------------------------------------------------- +I386-NEXT: Index n_strx n_type n_sect n_desc n_value +I386-NEXT: ======== -------- ------------------ ------ ------ ---------------- +I386-NEXT: [ 0] 00000002 64 (N_SO ) 00 0000 0000000000000000 '/Inputs/' +I386-NEXT: [ 1] 0000000b 64 (N_SO ) 00 0000 0000000000000000 'fat-test.c' +I386-NEXT: [ 2] 00000016 66 (N_OSO ) 03 0001 0000000055b1d0b9 '/Inputs/fat-test.o' +I386-NEXT: [ 3] 00000029 20 (N_GSYM ) 00 0000 0000000000000000 '_i386_var' +I386-NEXT: [ 4] 00000001 64 (N_SO ) 01 0000 0000000000000000 +I386-NEXT: [ 5] 00000033 0f ( SECT EXT) 02 0000 0000000000001000 '_i386_var' +I386-NEXT: [ 6] 0000003d 01 ( UNDF EXT) 00 0100 0000000000000000 'dyld_stub_binder' + +ONE-NOT: Symbol table + +ALL: ---------------------------------------------------------------------- +ALL-NEXT: Symbol table for: '{{.*}}fat-test.dylib' (x86_64h) +ALL-NEXT: ---------------------------------------------------------------------- +ALL-NEXT: Index n_strx n_type n_sect n_desc n_value +ALL-NEXT: ======== -------- ------------------ ------ ------ ---------------- +ALL-NEXT: [ 0] 00000002 64 (N_SO ) 00 0000 0000000000000000 '/Inputs/' +ALL-NEXT: [ 1] 0000000b 64 (N_SO ) 00 0000 0000000000000000 'fat-test.c' +ALL-NEXT: [ 2] 00000016 66 (N_OSO ) 08 0001 0000000055b1d0b9 '/Inputs/fat-test.o' +ALL-NEXT: [ 3] 00000029 20 (N_GSYM ) 00 0000 0000000000000000 '_x86_64h_var' +ALL-NEXT: [ 4] 00000001 64 (N_SO ) 01 0000 0000000000000000 +ALL-NEXT: [ 5] 00000036 0f ( SECT EXT) 02 0000 0000000000001000 '_x86_64h_var' +ALL-NEXT: [ 6] 00000043 01 ( UNDF EXT) 00 0100 0000000000000000 'dyld_stub_binder' \ No newline at end of file diff --git a/test/tools/dsymutil/fat-binary-output.test b/test/tools/dsymutil/fat-binary-output.test new file mode 100644 index 000000000000..fafef14ebe9b --- /dev/null +++ b/test/tools/dsymutil/fat-binary-output.test @@ -0,0 +1,32 @@ +RUN: llvm-dsymutil -f -verbose -no-output %p/Inputs/fat-test.dylib -oso-prepend-path %p | FileCheck %s + +This test doesn't produce any filesytstem output, we just look at the verbose +log output. + +For each arch in the binary, check that we emit the right triple with the right +file and the right symbol inside it (each slice has a different symbol, so that +means that the logic is looking at the right file slice too). + +After the link of each architecture, check that lipo is correctly invoked to +generate the fat output binary. + +CHECK: triple: 'x86_64-apple-darwin' +CHECK: - filename: {{'?}}[[INPUTS_PATH:.*]]fat-test.o +CHECK: DW_AT_name{{.*}} "x86_64_var" + +CHECK: triple: 'i386-apple-darwin' +CHECK: - filename: {{'?}}[[INPUTS_PATH]]fat-test.o +CHECK: DW_AT_name{{.*}} "i386_var" + +CHECK: triple: 'x86_64h-apple-darwin' +CHECK: - filename: {{'?}}[[INPUTS_PATH]]fat-test.o +CHECK: DW_AT_name{{.*}} "x86_64h_var" + +CHECK: Running lipo +CHECK-NEXT: lipo -create +CHECK-SAME: [[INPUTS_PATH]]fat-test.dylib.tmp{{......}}.dwarf +CHECK-SAME: [[INPUTS_PATH]]fat-test.dylib.tmp{{......}}.dwarf +CHECK-SAME: [[INPUTS_PATH]]fat-test.dylib.tmp{{......}}.dwarf +CHECK-SAME: -segalign x86_64 20 -segalign i386 20 -segalign x86_64h 20 +CHECK-SAME: -output [[INPUTS_PATH]]fat-test.dylib.dwarf + diff --git a/test/tools/dsymutil/yaml-object-address-rewrite.test b/test/tools/dsymutil/yaml-object-address-rewrite.test index dcb39be891cd..749719fc5bd9 100644 --- a/test/tools/dsymutil/yaml-object-address-rewrite.test +++ b/test/tools/dsymutil/yaml-object-address-rewrite.test @@ -1,16 +1,19 @@ -# RUN: llvm-dsymutil -v -dump-debug-map -oso-prepend-path=%p -y %s | FileCheck %s +# RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p -y %s | FileCheck %s # # The YAML debug map bellow is the one from basic-archive.macho.x86_64 with # the object addresses set to zero. Check that the YAML import is able to # rewrite these addresses to the right values. # # CHECK: --- -# CHECK-NEXT: triple:{{.*}}'x86_64-unknown-unknown-macho' +# CHECK-NEXT: triple:{{.*}}'x86_64-apple-darwin' +# CHECK-NEXT: binary-path:{{.*}}'' # CHECK-NEXT: objects: # CHECK-NEXT: filename:{{.*}}/Inputs/basic1.macho.x86_64.o +# CHECK-NEXT: timestamp: 0 # CHECK-NEXT: symbols: # CHECK-NEXT: sym: _main, objAddr: 0x0000000000000000, binAddr: 0x0000000100000EA0, size: 0x00000024 # CHECK-NEXT: filename:{{.*}}/Inputs/./libbasic.a(basic2.macho.x86_64.o)' +# CHECK-NEXT: timestamp: 0 # CHECK-NEXT: symbols: # CHECK-DAG: sym: _foo, objAddr: 0x0000000000000020, binAddr: 0x0000000100000ED0, size: 0x00000050 # CHECK-DAG: sym: _private_int, objAddr: 0x0000000000000560, binAddr: 0x0000000100001004, size: 0x00000000 @@ -18,6 +21,7 @@ # CHECK-DAG: sym: _baz, objAddr: 0x0000000000000310, binAddr: 0x0000000100001000, size: 0x00000000 # CHECK-NOT: { sym: # CHECK-NEXT: filename:{{.*}}/Inputs/./libbasic.a(basic3.macho.x86_64.o)' +# CHECK-NEXT: timestamp: 0 # CHECK-NEXT: symbols: # CHECK-DAG: sym: _val, objAddr: 0x0000000000000004, binAddr: 0x0000000100001008, size: 0x00000000 # CHECK-DAG: sym: _bar, objAddr: 0x0000000000000020, binAddr: 0x0000000100000F40, size: 0x00000050 @@ -25,7 +29,7 @@ # CHECK-NOT: { sym: # CHECK-NEXT: ... --- -triple: 'x86_64-unknown-unknown-macho' +triple: 'x86_64-apple-darwin' objects: - filename: /Inputs/basic1.macho.x86_64.o symbols: diff --git a/test/tools/gold/Inputs/linkonce-weak.ll b/test/tools/gold/Inputs/linkonce-weak.ll deleted file mode 100644 index f42af8faa844..000000000000 --- a/test/tools/gold/Inputs/linkonce-weak.ll +++ /dev/null @@ -1,3 +0,0 @@ -define weak_odr void @f() { - ret void -} diff --git a/test/tools/gold/lit.local.cfg b/test/tools/gold/PowerPC/lit.local.cfg similarity index 75% rename from test/tools/gold/lit.local.cfg rename to test/tools/gold/PowerPC/lit.local.cfg index a59549d47abe..d968938d24ce 100644 --- a/test/tools/gold/lit.local.cfg +++ b/test/tools/gold/PowerPC/lit.local.cfg @@ -1,4 +1,3 @@ if (not 'ld_plugin' in config.available_features or - not 'X86' in config.root.targets or not 'PowerPC' in config.root.targets): config.unsupported = True diff --git a/test/tools/gold/mtriple.ll b/test/tools/gold/PowerPC/mtriple.ll similarity index 100% rename from test/tools/gold/mtriple.ll rename to test/tools/gold/PowerPC/mtriple.ll diff --git a/test/tools/gold/Inputs/alias-1.ll b/test/tools/gold/X86/Inputs/alias-1.ll similarity index 100% rename from test/tools/gold/Inputs/alias-1.ll rename to test/tools/gold/X86/Inputs/alias-1.ll diff --git a/test/tools/gold/X86/Inputs/available-externally.ll b/test/tools/gold/X86/Inputs/available-externally.ll new file mode 100644 index 000000000000..cbc5c12c65d5 --- /dev/null +++ b/test/tools/gold/X86/Inputs/available-externally.ll @@ -0,0 +1,3 @@ +define void @zed() { + ret void +} diff --git a/test/tools/gold/Inputs/bcsection.s b/test/tools/gold/X86/Inputs/bcsection.s similarity index 100% rename from test/tools/gold/Inputs/bcsection.s rename to test/tools/gold/X86/Inputs/bcsection.s diff --git a/test/tools/gold/Inputs/comdat.ll b/test/tools/gold/X86/Inputs/comdat.ll similarity index 69% rename from test/tools/gold/Inputs/comdat.ll rename to test/tools/gold/X86/Inputs/comdat.ll index 464aefa49dc1..734115180fff 100644 --- a/test/tools/gold/Inputs/comdat.ll +++ b/test/tools/gold/X86/Inputs/comdat.ll @@ -17,9 +17,9 @@ bb21: @r21 = global i32* @v1 @r22 = global i32(i8*)* @f1 -@a21 = alias i32* @v1 -@a22 = alias bitcast (i32* @v1 to i16*) +@a21 = alias i32, i32* @v1 +@a22 = alias i16, bitcast (i32* @v1 to i16*) -@a23 = alias i32(i8*)* @f1 -@a24 = alias bitcast (i32(i8*)* @f1 to i16*) -@a25 = alias i16* @a24 +@a23 = alias i32(i8*), i32(i8*)* @f1 +@a24 = alias i16, bitcast (i32(i8*)* @f1 to i16*) +@a25 = alias i16, i16* @a24 diff --git a/test/tools/gold/X86/Inputs/comdat2.ll b/test/tools/gold/X86/Inputs/comdat2.ll new file mode 100644 index 000000000000..5b7f74cf0b24 --- /dev/null +++ b/test/tools/gold/X86/Inputs/comdat2.ll @@ -0,0 +1,9 @@ +$foo = comdat any +@foo = global i8 1, comdat +define void @zed() { + call void @bar() + ret void +} +define void @bar() comdat($foo) { + ret void +} diff --git a/test/tools/gold/Inputs/common.ll b/test/tools/gold/X86/Inputs/common.ll similarity index 100% rename from test/tools/gold/Inputs/common.ll rename to test/tools/gold/X86/Inputs/common.ll diff --git a/test/tools/gold/X86/Inputs/ctors2.ll b/test/tools/gold/X86/Inputs/ctors2.ll new file mode 100644 index 000000000000..af1590eb277c --- /dev/null +++ b/test/tools/gold/X86/Inputs/ctors2.ll @@ -0,0 +1,5 @@ +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @bar, i8* null }] + +define void @bar() { + ret void +} diff --git a/test/tools/gold/Inputs/drop-debug.bc b/test/tools/gold/X86/Inputs/drop-debug.bc similarity index 100% rename from test/tools/gold/Inputs/drop-debug.bc rename to test/tools/gold/X86/Inputs/drop-debug.bc diff --git a/test/tools/gold/X86/Inputs/drop-linkage.ll b/test/tools/gold/X86/Inputs/drop-linkage.ll new file mode 100644 index 000000000000..075306114331 --- /dev/null +++ b/test/tools/gold/X86/Inputs/drop-linkage.ll @@ -0,0 +1,9 @@ +$foo = comdat any +define linkonce void @foo() comdat { + ret void +} + +define void @bar() { + call void @foo() + ret void +} diff --git a/test/tools/gold/Inputs/invalid.bc b/test/tools/gold/X86/Inputs/invalid.bc similarity index 100% rename from test/tools/gold/Inputs/invalid.bc rename to test/tools/gold/X86/Inputs/invalid.bc diff --git a/test/tools/gold/Inputs/linker-script.export b/test/tools/gold/X86/Inputs/linker-script.export similarity index 100% rename from test/tools/gold/Inputs/linker-script.export rename to test/tools/gold/X86/Inputs/linker-script.export diff --git a/test/tools/gold/X86/Inputs/linkonce-weak.ll b/test/tools/gold/X86/Inputs/linkonce-weak.ll new file mode 100644 index 000000000000..3b7dad1b1eff --- /dev/null +++ b/test/tools/gold/X86/Inputs/linkonce-weak.ll @@ -0,0 +1,19 @@ +define weak_odr void @f() !dbg !4 { + ret void, !dbg !10 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3) +!1 = !DIFile(filename: "linkonce-weak.c", directory: ".") +!2 = !{} +!3 = !{!4} +!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, variables: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{null} +!7 = !{i32 2, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{!"clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)"} +!10 = !DILocation(line: 2, column: 1, scope: !4) diff --git a/test/tools/gold/Inputs/pr19901-1.ll b/test/tools/gold/X86/Inputs/pr19901-1.ll similarity index 100% rename from test/tools/gold/Inputs/pr19901-1.ll rename to test/tools/gold/X86/Inputs/pr19901-1.ll diff --git a/test/tools/gold/X86/Inputs/resolve-to-alias.ll b/test/tools/gold/X86/Inputs/resolve-to-alias.ll new file mode 100644 index 000000000000..eff02a6f4d1e --- /dev/null +++ b/test/tools/gold/X86/Inputs/resolve-to-alias.ll @@ -0,0 +1,4 @@ +@bar = alias void (), void ()* @zed +define void @zed() { + ret void +} diff --git a/test/tools/gold/X86/Inputs/thinlto.ll b/test/tools/gold/X86/Inputs/thinlto.ll new file mode 100644 index 000000000000..4e0840f3691e --- /dev/null +++ b/test/tools/gold/X86/Inputs/thinlto.ll @@ -0,0 +1,4 @@ +define void @g() { +entry: + ret void +} diff --git a/test/tools/gold/X86/Inputs/type-merge.ll b/test/tools/gold/X86/Inputs/type-merge.ll new file mode 100644 index 000000000000..4dc214922dc5 --- /dev/null +++ b/test/tools/gold/X86/Inputs/type-merge.ll @@ -0,0 +1,5 @@ +define void @zed() { + call void @bar() + ret void +} +declare void @bar() diff --git a/test/tools/gold/X86/Inputs/type-merge2.ll b/test/tools/gold/X86/Inputs/type-merge2.ll new file mode 100644 index 000000000000..a354757ee2e7 --- /dev/null +++ b/test/tools/gold/X86/Inputs/type-merge2.ll @@ -0,0 +1,5 @@ +%zed = type { i16 } +define void @bar(%zed* %this) { + store %zed* %this, %zed** null + ret void +} diff --git a/test/tools/gold/Inputs/weak.ll b/test/tools/gold/X86/Inputs/weak.ll similarity index 100% rename from test/tools/gold/Inputs/weak.ll rename to test/tools/gold/X86/Inputs/weak.ll diff --git a/test/tools/gold/alias.ll b/test/tools/gold/X86/alias.ll similarity index 92% rename from test/tools/gold/alias.ll rename to test/tools/gold/X86/alias.ll index b4edb05a4e46..c659f73d7e83 100644 --- a/test/tools/gold/alias.ll +++ b/test/tools/gold/X86/alias.ll @@ -9,5 +9,5 @@ ; CHECK-NEXT: @b = global i32 1 ; CHECK-NOT: alias -@a = weak alias i32* @b +@a = weak alias i32, i32* @b @b = global i32 1 diff --git a/test/tools/gold/X86/alias2.ll b/test/tools/gold/X86/alias2.ll new file mode 100644 index 000000000000..4727e0508fae --- /dev/null +++ b/test/tools/gold/X86/alias2.ll @@ -0,0 +1,23 @@ +; RUN: llvm-as %s -o %t.o +; RUN: %gold -shared -o %t2.bc -plugin %llvmshlibdir/LLVMgold.so %t.o -plugin-opt=emit-llvm +; RUN: llvm-dis %t2.bc -o - | FileCheck %s + +@bar = alias void (), void ()* @zed +define void @foo() { + call void @bar() + ret void +} +define void @zed() { + ret void +} + +; CHECK: @bar = alias void (), void ()* @zed + +; CHECK: define void @foo() { +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; CHECK: define void @zed() { +; CHECK-NEXT: ret void +; CHECK-NEXT: } diff --git a/test/tools/gold/linkonce-weak.ll b/test/tools/gold/X86/available-externally.ll similarity index 54% rename from test/tools/gold/linkonce-weak.ll rename to test/tools/gold/X86/available-externally.ll index a0cccea56cfe..d47a536dc094 100644 --- a/test/tools/gold/linkonce-weak.ll +++ b/test/tools/gold/X86/available-externally.ll @@ -1,5 +1,5 @@ ; RUN: llvm-as %s -o %t.o -; RUN: llvm-as %p/Inputs/linkonce-weak.ll -o %t2.o +; RUN: llvm-as %p/Inputs/available-externally.ll -o %t2.o ; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ ; RUN: --plugin-opt=emit-llvm \ @@ -11,9 +11,17 @@ ; RUN: -shared %t2.o %t.o -o %t3.o ; RUN: llvm-dis %t3.o -o - | FileCheck %s -define linkonce_odr void @f() { +define void @foo() { + call void @bar() + call void @zed() + ret void +} +define available_externally void @bar() { + ret void +} +define available_externally void @zed() { ret void } -; Test that we get a weak_odr regardless of the order of the files -; CHECK: define weak_odr void @f() { +; CHECK-DAG: define available_externally void @bar() { +; CHECK-DAG: define void @zed() { diff --git a/test/tools/gold/bad-alias.ll b/test/tools/gold/X86/bad-alias.ll similarity index 64% rename from test/tools/gold/bad-alias.ll rename to test/tools/gold/X86/bad-alias.ll index a98bf710b454..c4e3c3fe82fc 100644 --- a/test/tools/gold/bad-alias.ll +++ b/test/tools/gold/X86/bad-alias.ll @@ -9,5 +9,5 @@ @g1 = global i32 1 @g2 = global i32 2 -@a = alias inttoptr(i32 sub (i32 ptrtoint (i32* @g1 to i32), - i32 ptrtoint (i32* @g2 to i32)) to i32*) +@a = alias i32, inttoptr(i32 sub (i32 ptrtoint (i32* @g1 to i32), + i32 ptrtoint (i32* @g2 to i32)) to i32*) diff --git a/test/tools/gold/X86/bcsection.ll b/test/tools/gold/X86/bcsection.ll new file mode 100644 index 000000000000..f7ebe375770e --- /dev/null +++ b/test/tools/gold/X86/bcsection.ll @@ -0,0 +1,13 @@ +; RUN: llvm-as -o %T/bcsection.bc %s + +; RUN: llvm-mc -I=%T -filetype=obj -triple=x86_64-unknown-unknown -o %T/bcsection.bco %p/Inputs/bcsection.s +; RUN: llvm-nm -no-llvm-bc %T/bcsection.bco | count 0 +; RUN: %gold -r -o %T/bcsection.o -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so %T/bcsection.bco +; RUN: llvm-nm -no-llvm-bc %T/bcsection.o | FileCheck %s + +target triple = "x86_64-unknown-unknown" + +; CHECK: main +define i32 @main() { + ret i32 0 +} diff --git a/test/tools/gold/coff.ll b/test/tools/gold/X86/coff.ll similarity index 100% rename from test/tools/gold/coff.ll rename to test/tools/gold/X86/coff.ll diff --git a/test/tools/gold/X86/comdat.ll b/test/tools/gold/X86/comdat.ll new file mode 100644 index 000000000000..f65ca73c8063 --- /dev/null +++ b/test/tools/gold/X86/comdat.ll @@ -0,0 +1,65 @@ +; RUN: llvm-as %s -o %t.o +; RUN: llvm-as %p/Inputs/comdat.ll -o %t2.o +; RUN: %gold -shared -o %t3.o -plugin %llvmshlibdir/LLVMgold.so %t.o %t2.o \ +; RUN: -plugin-opt=emit-llvm +; RUN: llvm-dis %t3.o -o - | FileCheck %s + +$c1 = comdat any + +@v1 = weak_odr global i32 42, comdat($c1) +define weak_odr i32 @f1(i8*) comdat($c1) { +bb10: + br label %bb11 +bb11: + ret i32 42 +} + +@r11 = global i32* @v1 +@r12 = global i32 (i8*)* @f1 + +@a11 = alias i32, i32* @v1 +@a12 = alias i16, bitcast (i32* @v1 to i16*) + +@a13 = alias i32 (i8*), i32 (i8*)* @f1 +@a14 = alias i16, bitcast (i32 (i8*)* @f1 to i16*) +@a15 = alias i16, i16* @a14 + +; CHECK: $c1 = comdat any +; CHECK: $c2 = comdat any + +; CHECK-DAG: @v1 = weak_odr global i32 42, comdat($c1) + +; CHECK-DAG: @r11 = global i32* @v1{{$}} +; CHECK-DAG: @r12 = global i32 (i8*)* @f1{{$}} + +; CHECK-DAG: @r21 = global i32* @v1{{$}} +; CHECK-DAG: @r22 = global i32 (i8*)* @f1{{$}} + +; CHECK-DAG: @v1.1 = internal global i32 41, comdat($c2) + +; CHECK-DAG: @a11 = alias i32, i32* @v1{{$}} +; CHECK-DAG: @a12 = alias i16, bitcast (i32* @v1 to i16*) + +; CHECK-DAG: @a13 = alias i32 (i8*), i32 (i8*)* @f1{{$}} +; CHECK-DAG: @a14 = alias i16, bitcast (i32 (i8*)* @f1 to i16*) + +; CHECK-DAG: @a21 = alias i32, i32* @v1.1{{$}} +; CHECK-DAG: @a22 = alias i16, bitcast (i32* @v1.1 to i16*) + +; CHECK-DAG: @a23 = alias i32 (i8*), i32 (i8*)* @f1.2{{$}} +; CHECK-DAG: @a24 = alias i16, bitcast (i32 (i8*)* @f1.2 to i16*) + +; CHECK: define weak_odr protected i32 @f1(i8*) comdat($c1) { +; CHECK-NEXT: bb10: +; CHECK-NEXT: br label %bb11{{$}} +; CHECK: bb11: +; CHECK-NEXT: ret i32 42 +; CHECK-NEXT: } + +; CHECK: define internal i32 @f1.2(i8* %this) comdat($c2) { +; CHECK-NEXT: bb20: +; CHECK-NEXT: store i8* %this, i8** null +; CHECK-NEXT: br label %bb21 +; CHECK: bb21: +; CHECK-NEXT: ret i32 41 +; CHECK-NEXT: } diff --git a/test/tools/gold/X86/comdat2.ll b/test/tools/gold/X86/comdat2.ll new file mode 100644 index 000000000000..2156efd207bb --- /dev/null +++ b/test/tools/gold/X86/comdat2.ll @@ -0,0 +1,19 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-as %p/Inputs/comdat2.ll -o %t2.bc +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=emit-llvm \ +; RUN: -shared %t.bc %t2.bc -o %t3.bc +; RUN: llvm-dis %t3.bc -o - | FileCheck %s + + +$foo = comdat any +@foo = global i8 0, comdat + +; CHECK: @foo = global i8 0, comdat + +; CHECK: define void @zed() { +; CHECK: call void @bar() +; CHECK: ret void +; CHECK: } + +; CHECK: declare void @bar() diff --git a/test/tools/gold/common.ll b/test/tools/gold/X86/common.ll similarity index 100% rename from test/tools/gold/common.ll rename to test/tools/gold/X86/common.ll diff --git a/test/tools/gold/X86/ctors.ll b/test/tools/gold/X86/ctors.ll new file mode 100644 index 000000000000..24c8e342beb0 --- /dev/null +++ b/test/tools/gold/X86/ctors.ll @@ -0,0 +1,13 @@ +; RUN: llvm-as %s -o %t.o +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=emit-llvm \ +; RUN: -shared %t.o -o %t2.o +; RUN: llvm-dis %t2.o -o - | FileCheck %s + +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* null }] + +define internal void @foo() { + ret void +} + +; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* null }] diff --git a/test/tools/gold/X86/ctors2.ll b/test/tools/gold/X86/ctors2.ll new file mode 100644 index 000000000000..c39cb7132d93 --- /dev/null +++ b/test/tools/gold/X86/ctors2.ll @@ -0,0 +1,14 @@ +; RUN: llvm-as %s -o %t.o +; RUN: llvm-as %p/Inputs/ctors2.ll -o %t2.o +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=emit-llvm \ +; RUN: -shared %t.o %t2.o -o %t3.o +; RUN: llvm-dis %t3.o -o - | FileCheck %s + +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* null }] + +define void @foo() { + ret void +} + +; CHECK: @llvm.global_ctors = appending global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* null }, { i32, void ()*, i8* } { i32 65535, void ()* @bar, i8* null }] diff --git a/test/tools/gold/X86/disable-verify.ll b/test/tools/gold/X86/disable-verify.ll new file mode 100644 index 000000000000..5b8dbb054478 --- /dev/null +++ b/test/tools/gold/X86/disable-verify.ll @@ -0,0 +1,25 @@ +; RUN: llvm-as %s -o %t.o +; REQUIRES: asserts + +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=disable-verify \ +; RUN: --plugin-opt=-debug-pass=Arguments \ +; RUN: -shared %t.o -o %t2.o 2>&1 | FileCheck %s + +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=-debug-pass=Arguments \ +; RUN: -shared %t.o -o %t2.o 2>&1 | FileCheck %s -check-prefix=VERIFY + +target triple = "x86_64-unknown-linux-gnu" + +; -disable-verify should disable output verification from the optimization +; pipeline. +; CHECK: Pass Arguments: {{.*}} -verify -forceattrs +; CHECK-NOT: -verify + +; VERIFY: Pass Arguments: {{.*}} -verify {{.*}} -verify + +define void @f() { +entry: + ret void +} diff --git a/test/tools/gold/drop-debug.ll b/test/tools/gold/X86/drop-debug.ll similarity index 100% rename from test/tools/gold/drop-debug.ll rename to test/tools/gold/X86/drop-debug.ll diff --git a/test/tools/gold/X86/drop-linkage.ll b/test/tools/gold/X86/drop-linkage.ll new file mode 100644 index 000000000000..d4c1dd052c79 --- /dev/null +++ b/test/tools/gold/X86/drop-linkage.ll @@ -0,0 +1,14 @@ +; RUN: llc %s -o %t.s +; RUN: llvm-mc %t.s -o %t.o -filetype=obj +; RUN: llvm-as %p/Inputs/drop-linkage.ll -o %t2.o + +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=emit-llvm \ +; RUN: -shared %t.o %t2.o -o %t3.o +; RUN: llvm-dis %t3.o -o - | FileCheck %s + +define void @foo() { + ret void +} + +; CHECK: declare extern_weak void @foo(){{$}} diff --git a/test/tools/gold/emit-llvm.ll b/test/tools/gold/X86/emit-llvm.ll similarity index 78% rename from test/tools/gold/emit-llvm.ll rename to test/tools/gold/X86/emit-llvm.ll index bfb90c4bc28a..4a6d5963cff0 100644 --- a/test/tools/gold/emit-llvm.ll +++ b/test/tools/gold/X86/emit-llvm.ll @@ -12,6 +12,7 @@ ; RUN: -shared %t.o -o %t3.o ; RUN: llvm-dis %t3.o.bc -o - | FileCheck %s ; RUN: llvm-dis %t3.o.opt.bc -o - | FileCheck --check-prefix=OPT %s +; RUN: llvm-dis %t3.o.opt.bc -o - | FileCheck --check-prefix=OPT2 %s ; RUN: llvm-nm %t3.o.o | FileCheck --check-prefix=NM %s ; RUN: rm -f %t4.o @@ -29,42 +30,42 @@ target triple = "x86_64-unknown-linux-gnu" @g8 = external global i32 -; CHECK: define internal void @f1() -; OPT-NOT: @f1 +; CHECK-DAG: define internal void @f1() +; OPT2-NOT: @f1 define hidden void @f1() { ret void } -; CHECK: define hidden void @f2() -; OPT: define hidden void @f2() +; CHECK-DAG: define hidden void @f2() +; OPT-DAG: define hidden void @f2() define hidden void @f2() { ret void } @llvm.used = appending global [1 x i8*] [ i8* bitcast (void ()* @f2 to i8*)] -; CHECK: define void @f3() -; OPT: define void @f3() +; CHECK-DAG: define void @f3() +; OPT-DAG: define void @f3() define void @f3() { call void @f4() ret void } -; CHECK: define internal void @f4() -; OPT-NOT: @f4 +; CHECK-DAG: define internal void @f4() +; OPT2-NOT: @f4 define linkonce_odr void @f4() { ret void } -; CHECK: define linkonce_odr void @f5() -; OPT: define linkonce_odr void @f5() +; CHECK-DAG: define linkonce_odr void @f5() +; OPT-DAG: define linkonce_odr void @f5() define linkonce_odr void @f5() { ret void } @g5 = global void()* @f5 -; CHECK: define internal void @f6() unnamed_addr -; OPT: define internal void @f6() unnamed_addr +; CHECK-DAG: define internal void @f6() unnamed_addr +; OPT-DAG: define internal void @f6() unnamed_addr define linkonce_odr void @f6() unnamed_addr { ret void } diff --git a/test/tools/gold/invalid.ll b/test/tools/gold/X86/invalid.ll similarity index 100% rename from test/tools/gold/invalid.ll rename to test/tools/gold/X86/invalid.ll diff --git a/test/tools/gold/linker-script.ll b/test/tools/gold/X86/linker-script.ll similarity index 100% rename from test/tools/gold/linker-script.ll rename to test/tools/gold/X86/linker-script.ll diff --git a/test/tools/gold/X86/linkonce-weak.ll b/test/tools/gold/X86/linkonce-weak.ll new file mode 100644 index 000000000000..3397c3480a7c --- /dev/null +++ b/test/tools/gold/X86/linkonce-weak.ll @@ -0,0 +1,39 @@ +; RUN: llvm-as %s -o %t.o +; RUN: llvm-as %p/Inputs/linkonce-weak.ll -o %t2.o + +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=emit-llvm \ +; RUN: -shared %t.o %t2.o -o %t3.o +; RUN: llvm-dis %t3.o -o - | FileCheck %s + +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=emit-llvm \ +; RUN: -shared %t2.o %t.o -o %t3.o +; RUN: llvm-dis %t3.o -o - | FileCheck %s + +define linkonce_odr void @f() !dbg !4 { + ret void, !dbg !10 +} + +; Test that we get a weak_odr regardless of the order of the files +; CHECK: define weak_odr void @f() + +; Test that we only get a single DISubprogram for @f +; CHECK: !DISubprogram(name: "f" +; CHECK-NOT: !DISubprogram(name: "f" + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3) +!1 = !DIFile(filename: "linkonce-weak.c", directory: ".") +!2 = !{} +!3 = !{!4} +!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, variables: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{null} +!7 = !{i32 2, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{!"clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)"} +!10 = !DILocation(line: 2, column: 1, scope: !4) diff --git a/test/tools/gold/X86/lit.local.cfg b/test/tools/gold/X86/lit.local.cfg new file mode 100644 index 000000000000..ddcd48ca470d --- /dev/null +++ b/test/tools/gold/X86/lit.local.cfg @@ -0,0 +1,3 @@ +if (not 'ld_plugin' in config.available_features or + not 'X86' in config.root.targets): + config.unsupported = True diff --git a/test/tools/gold/no-map-whole-file.ll b/test/tools/gold/X86/no-map-whole-file.ll similarity index 100% rename from test/tools/gold/no-map-whole-file.ll rename to test/tools/gold/X86/no-map-whole-file.ll diff --git a/test/tools/gold/opt-level.ll b/test/tools/gold/X86/opt-level.ll similarity index 100% rename from test/tools/gold/opt-level.ll rename to test/tools/gold/X86/opt-level.ll diff --git a/test/tools/gold/X86/parallel.ll b/test/tools/gold/X86/parallel.ll new file mode 100644 index 000000000000..00a0bafda251 --- /dev/null +++ b/test/tools/gold/X86/parallel.ll @@ -0,0 +1,22 @@ +; RUN: llvm-as -o %t.bc %s +; RUN: env LD_PRELOAD=%llvmshlibdir/LLVMgold.so %gold -plugin %llvmshlibdir/LLVMgold.so -u foo -u bar -plugin-opt jobs=2 -plugin-opt save-temps -m elf_x86_64 -o %t %t.bc +; RUN: llvm-nm %t.o0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-nm %t.o1 | FileCheck --check-prefix=CHECK1 %s + +target triple = "x86_64-unknown-linux-gnu" + +; CHECK0-NOT: bar +; CHECK0: T foo +; CHECK0-NOT: bar +define void @foo() { + call void @bar() + ret void +} + +; CHECK1-NOT: foo +; CHECK1: T bar +; CHECK1-NOT: foo +define void @bar() { + call void @foo() + ret void +} diff --git a/test/tools/gold/pr19901.ll b/test/tools/gold/X86/pr19901.ll similarity index 100% rename from test/tools/gold/pr19901.ll rename to test/tools/gold/X86/pr19901.ll diff --git a/test/tools/gold/X86/pr25907.ll b/test/tools/gold/X86/pr25907.ll new file mode 100644 index 000000000000..502938cf8126 --- /dev/null +++ b/test/tools/gold/X86/pr25907.ll @@ -0,0 +1,28 @@ +; RUN: llvm-as %s -o %t.o +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: -shared %t.o -o %t2 +; RUN: llvm-nm %t2 | FileCheck %s +; CHECK: T main + +@main.L = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@main, %L1), i8* blockaddress(@main, %L2), i8* null], align 16 + +define i32 @main() #0 { +entry: + br label %L1 + +L1: ; preds = %entry, %L1 + %i.0 = phi i32 [ 0, %entry ], [ %inc, %L1 ] + %inc = add i32 %i.0, 1 + %idxprom = zext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds [3 x i8*], [3 x i8*]* @main.L, i64 0, i64 %idxprom + %0 = load i8*, i8** %arrayidx, align 8, !tbaa !1 + indirectbr i8* %0, [label %L1, label %L2] + +L2: ; preds = %L1 + ret i32 0 +} + +!1 = !{!2, !2, i64 0} +!2 = !{!"any pointer", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} diff --git a/test/tools/gold/X86/pr25915.ll b/test/tools/gold/X86/pr25915.ll new file mode 100644 index 000000000000..2d8807e04bb4 --- /dev/null +++ b/test/tools/gold/X86/pr25915.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as %s -o %t.o +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: -plugin-opt=emit-llvm \ +; RUN: -shared %t.o -o %t2 +; RUN: llvm-dis %t2 -o - | FileCheck %s +; CHECK-NOT: subprograms + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 256170) (llvm/trunk 256171)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2) +!1 = !DIFile(filename: "pr25915.cc", directory: ".") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{!"clang version 3.8.0 (trunk 256170) (llvm/trunk 256171)"} diff --git a/test/tools/gold/remarks.ll b/test/tools/gold/X86/remarks.ll similarity index 69% rename from test/tools/gold/remarks.ll rename to test/tools/gold/X86/remarks.ll index c4fa7f787f26..51bd121cebce 100644 --- a/test/tools/gold/remarks.ll +++ b/test/tools/gold/X86/remarks.ll @@ -1,9 +1,9 @@ ; RUN: llvm-as %s -o %t.o -; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: not %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \ ; RUN: -plugin-opt=-pass-remarks=inline %t.o -o %t2.o 2>&1 | FileCheck %s -; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: not %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \ ; RUN: %t.o -o %t2.o 2>&1 | FileCheck -allow-empty --check-prefix=NO-REMARK %s @@ -12,8 +12,11 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" +declare i32 @bar() + define i32 @f() { - ret i32 0 + %a = call i32 @bar() + ret i32 %a } define i32 @_start() { diff --git a/test/tools/gold/X86/resolve-to-alias.ll b/test/tools/gold/X86/resolve-to-alias.ll new file mode 100644 index 000000000000..102da6f80f4d --- /dev/null +++ b/test/tools/gold/X86/resolve-to-alias.ll @@ -0,0 +1,33 @@ +; RUN: llvm-as %s -o %t.o +; RUN: llvm-as %p/Inputs/resolve-to-alias.ll -o %t2.o + +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=emit-llvm \ +; RUN: -shared %t.o %t2.o -o %t.bc +; RUN: llvm-dis %t.bc -o %t.ll +; RUN: FileCheck --check-prefix=PASS1 %s < %t.ll +; RUN: FileCheck --check-prefix=PASS2 %s < %t.ll + +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=emit-llvm \ +; RUN: -shared %t2.o %t.o -o %t.bc +; RUN: llvm-dis %t.bc -o %t.ll +; RUN: FileCheck --check-prefix=PASS1 %s < %t.ll +; RUN: FileCheck --check-prefix=PASS2 %s < %t.ll + +define void @foo() { + call void @bar() + ret void +} +declare void @bar() + +; PASS1: @bar = alias void (), void ()* @zed + +; PASS1: define void @foo() { +; PASS1-NEXT: call void @bar() +; PASS1-NEXT: ret void +; PASS1-NEXT: } + +; PASS2: define void @zed() { +; PASS2-NEXT: ret void +; PASS2-NEXT: } diff --git a/test/tools/gold/slp-vectorize.ll b/test/tools/gold/X86/slp-vectorize.ll similarity index 100% rename from test/tools/gold/slp-vectorize.ll rename to test/tools/gold/X86/slp-vectorize.ll diff --git a/test/tools/gold/stats.ll b/test/tools/gold/X86/stats.ll similarity index 100% rename from test/tools/gold/stats.ll rename to test/tools/gold/X86/stats.ll diff --git a/test/tools/gold/X86/thinlto.ll b/test/tools/gold/X86/thinlto.ll new file mode 100644 index 000000000000..97def3d7a14d --- /dev/null +++ b/test/tools/gold/X86/thinlto.ll @@ -0,0 +1,34 @@ +; First ensure that the ThinLTO handling in the gold plugin handles +; bitcode without function summary sections gracefully. +; RUN: llvm-as %s -o %t.o +; RUN: llvm-as %p/Inputs/thinlto.ll -o %t2.o +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=thinlto \ +; RUN: -shared %t.o %t2.o -o %t3 + +; RUN: llvm-as -function-summary %s -o %t.o +; RUN: llvm-as -function-summary %p/Inputs/thinlto.ll -o %t2.o + +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=thinlto \ +; RUN: -shared %t.o %t2.o -o %t3 +; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED +; RUN: not test -e %t3 + +; COMBINED: &1 | FileCheck --check-prefix=ENOENT %s +ENOENT: {{.*}}.blah: {{[Nn]}}o such file or directory diff --git a/test/tools/llvm-dwp/Inputs/simple/notypes/a.dwo b/test/tools/llvm-dwp/Inputs/simple/notypes/a.dwo new file mode 100644 index 0000000000000000000000000000000000000000..7bdb2a7b9f826e0dfcbafb94b64ef9285b42f4c6 GIT binary patch literal 1193 zcmbtTT}s115T4ytTP=#EFN!Y#5n3N&wTf1;EsBC5;tgtAQ!S=RNqaHDd+~HrgfYZvW8JtxbWTT1RyNqS7Wk=hfB>29j%Xx`; ztjk<&eH3I?6(f32BQ!qp%mZ2`ghABrI>U;t@#a?^&{jhjM7`I?imvhIPce}cC_(S# zbta1nev7=PXgt;DIj34rK*K)(`l2?8$Uoe;;P`O|#`UQC`(i%oJ~8?yuwUDTBP@Dj W+#_lezMoh|yOF`TrpWUre(OK6DqH*j literal 0 HcmV?d00001 diff --git a/test/tools/llvm-dwp/Inputs/simple/notypes/b.dwo b/test/tools/llvm-dwp/Inputs/simple/notypes/b.dwo new file mode 100644 index 0000000000000000000000000000000000000000..f41243dc722b011d346dccede7fd91268525d578 GIT binary patch literal 1241 zcmbtU%`O8`6h8M(>#rfHT2yRiBf(;{8ll7(f`nLDk=RtHrZmQkOsiOl+FI}gEG>Bo zPayFgmNw4my)(wmBo=OR?m6FgzVkC}r`PM-8=Nr+4+9f^P{;zj#7L`4TQC5#95{n^(LJ-yMq zI&D>L^sPYn<1jjO?DM0F>(`9K3m*&tF{!|cU#~ZvRx?gFyt*I9Rof1ncARk=p3}v+ z!@tiMc8GnIQ9mn+eg~>{3e&_De#^j;cw6Da`p}m#AI5R%odhFsGC{kOz){H)U3g}N zi*9w+0cKQ=#P4+BSrab0Rc(M7l_T+}B(qVa2vjZQHAO>kN(wtlPOT@*6%E0HcuC+{ z=K9gokGD@^@nQfFpEUnN)+f!AE&g;`;wMA%k~StjX%ph*GRi~^(h4J>_#04C-$$xq AOaK4? literal 0 HcmV?d00001 diff --git a/test/tools/llvm-dwp/Inputs/simple/types/a.dwo b/test/tools/llvm-dwp/Inputs/simple/types/a.dwo new file mode 100644 index 0000000000000000000000000000000000000000..1fc71ca8d17560f4ce433262fedc9bcb4fc14a2f GIT binary patch literal 1369 zcmbtU%}VP)6h1T4YOeuN0($*H0$0%r#@Z@ce`>v#D+n%Ji91bes>PV}wrRzc;C%!) zzJ_lg_zEt>rT7-Q@JupO(=p=01LyBM-*b=3%PtIpEjl(r*%5}j?HD$V43Aw4J@d5Ua;6A z+BMX5)HG@W6(B`0AgP`Er`_&5M(Hoa2&FH;UdxZ8Pq)n_Fc1=CiH%7p0ZB&iY^0w| zTo&{syDZ%N?gpUqfZ>+DKD4Z@X!Iul(_wX}ifbh{NZ+8I1W4+fqiLLh>kXh%=tE1$ zOD_@gV+vq4&QE=-gx;@*{#ZtbcBy%^?|5bN@XP}`(CBeu>D39uDTU3n)~N9 zyMthrN_D#xr8rgB4)S2EZ<-P8k6d|swPHK!3{>tkmN}N`q(M>Se-vEyOP|NOjHA^5 zBH>pN6_vY#MHS=xE73F~ymTt}01GO{xwqV)jEAmmRqj8ms2C?di;mCEFUr8r8P`84 zz+p)gUgh>9&J+$$Hw00B2T4);c}`_9;;+NU*8UDXSW5}zG<-~f{6HJG{~o?AE^+&X lm9H)P&>i-$*opZZag%Ud97fraff(`w-lZUNU;L@4@&*J$Z#Dn` literal 0 HcmV?d00001 diff --git a/test/tools/llvm-dwp/Inputs/simple/types/b.dwo b/test/tools/llvm-dwp/Inputs/simple/types/b.dwo new file mode 100644 index 0000000000000000000000000000000000000000..a69cd69eb995177ad6c1d2579310f49c55a8a2c3 GIT binary patch literal 1409 zcmbtU&2G~`5T0EpAz%qL7(ogG`2bSO!K95U2tOu80twUu5+VmKAx@nV;n=}(13dxZ z!hs9$0=xn*fGWY{*2Zlq1ZWZ9!aUO`dO45V zY1jr6s?ZO5;g&u1C{VTucNM`-bGzQEujKb1qR~%clqE;>Bk@Wrt`J&`KLT%Kn07UW0Zs} z$#>6%tfzDvt!mWNvj~-`Sb_?kS%9dC^E1~f8#DLQ%|m!b3$85=Z2-d&wO8)3O1mD; zT&!L(ubMX&v9S!YPRSh$N3Dy6lNJuFSb!YUqykPqfhu7Etq{MC3UyXY0i4OsDCv3V zlQ{E_b+qYw+gsfr>N}gyB5<-4i!AL%gTcg4CV6-m4x)UX?2P??2tCh>{b`;O3`0N7 z({25SIgPzV|4aLTnVfq;^*0z__ir#<_iNu${fq{06>yvtbf1RrVZbq}eI$cI^Yg;F zkFdxw;(sH9axQawmUEwCg=0kitvWi&Ysa~-vA{9n-dEr$-Kae0)(|%shtiQEeqc4; zPkHWX*kkWzcMo8)7pFr1DrR@iEH1svO(Ou$(M zFCLHM2%IiB!ywIq$jkESm(kup9iZMreTd3)4Hck8Fd*@-&4c;AHly?$VuaE;u)Qq& z{^r9MhiZHR(deA8y@O?UKTbY^g^(B^d1oc0fzAj%h&o5~)j2vL49FrmzuPx;w%h-D z>nH%d1Q9x4YF{`)B$vpa}0ws@XV8_6c*4DGCx7Y1Hlx) zPG{rHanWZ<;h)NA=({`HTVd?k!xu5wMT$iw-HJz})X&m797LnIo@cX(e~93?ZsJet zlrV_=JkQtqKXV4#oBmgWzl>G67es$Y;miKV3NHJl?}>he2JZ!c@9dDsJ_$a;K*f0N zvmk5;FP+MLf&~@h+@IW_+%H^dRqk`Fs2C@&3-ET?Sp+KgHNq;!nJ)#nT(p%|Bp4UDhCHqHlAK{xoj-zi0<&b{}Wy&WV;p*!g>;K z>P_(MK?VH-#Djl>XOABA50GAblguR9G2+1o@9Xz|@6Bt{$>S@xu2Mok3V{w3nM48V zw1~TD*aZV>;0FD0!}p<+q2j2`&8o-h(_mxwLXPUZ+pFWU?C(1NZv6CX`nNL52D18ePxbL2m`W6PV96{ zo$Yi!T-pmjR{_Ht=EbG6#yNu?24E(fU-EI5woVofnL2=Oa*kne2A+8kmBIpALgojE zcp#Vp*zs(X^d0ocB=h%WH1M3Qjb0GB_Fy{#J4>;sq`hc3jJ+f-!+to7%6T#yd%FmI z$C-H3G9~asFU`}n{?8o6_NxC?_b+2r?n%+#Q24U{u7b;c>D!`Tp}|W5;5*wTvQL6{ zFiageS)xxappq-9?9Br zt8y2y!t!bnJQu_l1(Zf=KhH~_kevNJyN~c$Ap6nRgmOr}mwgylo&N@}7MH5?vdX_L jUg$~^tK69HQMC!j#g9>@F%V1sf$vd}EM&1 | FileCheck --check-prefix=ENOENT %s + +# ENOENT: {{.*}}.blah: {{[Nn]}}o such file or directory diff --git a/test/tools/llvm-mc/fatal_warnings.test b/test/tools/llvm-mc/fatal_warnings.test new file mode 100644 index 000000000000..e9405ada7a04 --- /dev/null +++ b/test/tools/llvm-mc/fatal_warnings.test @@ -0,0 +1,4 @@ +# RUN: not llvm-mc --fatal-warnings %s 2>&1 | FileCheck %s + +# CHECK: error: .warning directive invoked in source file +.warning diff --git a/test/tools/llvm-mc/line_end_with_space.test b/test/tools/llvm-mc/line_end_with_space.test index 2ce313990af0..673d05a68067 100644 --- a/test/tools/llvm-mc/line_end_with_space.test +++ b/test/tools/llvm-mc/line_end_with_space.test @@ -1,2 +1 @@ RUN: llvm-mc -disassemble %s - \ No newline at end of file diff --git a/test/tools/llvm-mc/lit.local.cfg b/test/tools/llvm-mc/lit.local.cfg new file mode 100644 index 000000000000..a1bda0f1fdc8 --- /dev/null +++ b/test/tools/llvm-mc/lit.local.cfg @@ -0,0 +1,4 @@ +# Requires a non-empty default triple for these tests +if 'default_triple' not in config.available_features: + config.unsupported = True + diff --git a/test/tools/llvm-mc/no_warnings.test b/test/tools/llvm-mc/no_warnings.test new file mode 100644 index 000000000000..973dc271a581 --- /dev/null +++ b/test/tools/llvm-mc/no_warnings.test @@ -0,0 +1,4 @@ +# RUN: llvm-mc --no-warn %s 2>&1 | FileCheck %s + +# CHECK-NOT: warning: +.warning diff --git a/test/tools/llvm-nm/X86/IRobj.test b/test/tools/llvm-nm/X86/IRobj.test new file mode 100644 index 000000000000..e6fa517d00e8 --- /dev/null +++ b/test/tools/llvm-nm/X86/IRobj.test @@ -0,0 +1,11 @@ +# RUN: llvm-nm -format darwin %p/Inputs/test.IRobj-x86_64 | FileCheck %s + +# CHECK: ---------------- (LTO,RODATA) external _global_const +# CHECK: ---------------- (LTO,DATA) external _global_data +# CHECK: ---------------- (LTO,CODE) external _global_func +# CHECK: ---------------- (LTO,RODATA) private external _hidden_const +# CHECK: ---------------- (LTO,DATA) private external _hidden_data +# CHECK: ---------------- (LTO,CODE) private external _hidden_func +# CHECK: ---------------- (LTO,RODATA) non-external _static_const +# CHECK: ---------------- (LTO,DATA) non-external _static_data +# CHECK: ---------------- (LTO,CODE) non-external _static_func diff --git a/test/tools/llvm-nm/X86/Inputs/hello.obj.macho-x86_64 b/test/tools/llvm-nm/X86/Inputs/hello.obj.macho-x86_64 new file mode 100644 index 0000000000000000000000000000000000000000..2b59a1cfc63b05797c8a012aed7da860c610a434 GIT binary patch literal 844 zcmaJY5S`QbfnT}8LP7Bg!A8a2M$v->EkqKqktHM-6G$!+FC+*;KoGAG{27*( zS}Vj(e}saCLP`-)-{fX7#*cxyH*aU=?cVI(zCOSA5n0`Y51g^dfdhw)!;hK&VP~># zcSV8W&?NSl<7`0CC1?9ZIBa5ASMrT;^ zYjQrJk)|O)=#`4QsdS(#el_oBv|e9rOOq1f2yQ`-XLSRV@qF7sYcHp>hRjxzEWPHP;#oJav?Losg}t-{5Z;}U_|3MZ@i2<1yia-;Z!O41Y~IV!x}L}TBUeS2yuiJm{_0#J literal 0 HcmV?d00001 diff --git a/test/tools/llvm-nm/X86/Inputs/test.IRobj-x86_64 b/test/tools/llvm-nm/X86/Inputs/test.IRobj-x86_64 new file mode 100644 index 0000000000000000000000000000000000000000..8bfa857fa7d163f3b74daa2df6e179e34a5c44e0 GIT binary patch literal 1168 zcmZuxPfR0K82_d-%$x0>=|G!M+?jEPqQQj2SP09yAp*tNO<04*7;jDql6qJ&w4}7` z9;VQBYN)2H9Ehh*vIhpm4e%TR;98Twf!BI)Lz&>_WQDp;bD^tuiLCpD zE~ZbUHs*BhHOJIl*Ho4Z`Nx>Rb!TXLAmS{@!M(S1cb^;|KozEYZl}02v44&s;mrWR z4Ln|63_gB}jVOSJA=iXzRP30Z)}&aC@&{3|YIMn;3EHq6I z-f(Do8YW*Z@lY0yOwU0{s6|EmRm=2X(uafq2?iy%jW{qXDPvSKG|fn3cHJ0_8Cnr7 zLrvxzSSKNMGGaN(|9m0-2opv5hi&25^z4~pt;E~Bs(3K@e3h_BeFv><8=7M4|39cx z3}|{fZK0O2#WdPNIU_z(0@rcw>>T>m06vlcSnP-TnM>0A9m>~#FN?beS+n5pnaSl{ z+3XBAl%OI#@HI~a>Ur2IR%i#)VJ2({@QvKI$ZL1T#^%NTfgEysdUPQberUu?^Arrjk7JCBp=a%!Ux>HrX zCzAJ3f&IP3sw!Fpr0Vnoi*2{qU+Z)dkk+E{lPI7MfR0IQTk)PuuOCvaL4Q(3w_@l@1jPr@O3`=;M`ZK&V*X)9=l9zDf}a@Ag~zh@^HVxXr%+;h$=w0( zZyful&JMe$#Bxda!Q>Z|u8VE!muuvBB|M%d$7`uPuIXD0N=I~l+gJBEx0NdaI6ou_`ybNb Sn7=p)JJ_PqfB5$MzxFqnH)G=f literal 0 HcmV?d00001 diff --git a/test/tools/llvm-nm/X86/externalonly.test b/test/tools/llvm-nm/X86/externalonly.test new file mode 100644 index 000000000000..c37412987865 --- /dev/null +++ b/test/tools/llvm-nm/X86/externalonly.test @@ -0,0 +1,4 @@ +# RUN: llvm-nm -g %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s + +# CHECK-NOT: EH_frame0 +# CHECK: _main diff --git a/test/tools/llvm-nm/X86/groupingflags.test b/test/tools/llvm-nm/X86/groupingflags.test new file mode 100644 index 000000000000..a1a258506e97 --- /dev/null +++ b/test/tools/llvm-nm/X86/groupingflags.test @@ -0,0 +1,5 @@ +# RUN: llvm-nm -gjp %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s + +# CHECK: _main +# CHECK: _main.eh +# CHECK: _printf diff --git a/test/tools/llvm-nm/X86/posixMachO.test b/test/tools/llvm-nm/X86/posixMachO.test new file mode 100644 index 000000000000..a0d114237ce0 --- /dev/null +++ b/test/tools/llvm-nm/X86/posixMachO.test @@ -0,0 +1,7 @@ +# RUN: llvm-nm -P %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s + +# CHECK: EH_frame0 s 104 0 +# CHECK: L_.str s 59 0 +# CHECK: _main T 0 0 +# CHECK: _main.eh S 128 0 +# CHECK: _printf U 0 0 diff --git a/test/tools/llvm-nm/lit.local.cfg b/test/tools/llvm-nm/lit.local.cfg new file mode 100644 index 000000000000..c8625f4d9d24 --- /dev/null +++ b/test/tools/llvm-nm/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'X86' in config.root.targets: + config.unsupported = True diff --git a/test/tools/llvm-objdump/AArch64/elf-aarch64-mapping-symbols.test b/test/tools/llvm-objdump/AArch64/elf-aarch64-mapping-symbols.test new file mode 100644 index 000000000000..cb9560d74dfb --- /dev/null +++ b/test/tools/llvm-objdump/AArch64/elf-aarch64-mapping-symbols.test @@ -0,0 +1,30 @@ +# RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-freebsd %s -o %t +# RUN: llvm-objdump -d %t | FileCheck %s + +.section .mysection,"ax",@progbits +.globl _start +_start: + adr x1,msg +msg: .asciz "Hello, world\n" +msgend: + +.section .myothersection,"ax",@progbits + adrp x1,mystr +mystr: + .asciz "blah" + .size mystr, 4 + +# CHECK: Disassembly of section .mysection: +# CHECK: _start: +# CHECK: 0: 21 00 00 10 adr x1, #4 +# CHECK: msg: +# CHECK: 4: 48 65 6c 6c .word +# CHECK: 8: 6f 2c 20 77 .word +# CHECK: c: 6f 72 6c 64 .word +# CHECK: 10: 0a 00 .short +# CHECK: Disassembly of section .myothersection: +# CHECK: $x.2: +# CHECK: 0: 01 00 00 90 adrp x1, #0 +# CHECK: mystr: +# CHECK: 4: 62 6c 61 68 .word +# CHECK: 8: 00 .byte diff --git a/test/tools/llvm-objdump/Inputs/eh_frame.macho-arm64 b/test/tools/llvm-objdump/Inputs/eh_frame.macho-arm64 new file mode 100644 index 0000000000000000000000000000000000000000..1c5413230d7aa512792735533eb7d939fd9ce037 GIT binary patch literal 888 zcmb7CK}!Nr5S~r5BHKJgLD&)mT>`61m!Ly|2M;11qRT^cO<{ExT~YGl(PKf^I@_?KcH)WAh7!8?JTW!YGCG@?>*;pX7;^&etac~41#fpb210FI3e1A5B064f5K2s zEUtltd;vMT)&nZ#+dUtNm{P^F^J2(;(UO$)SaO6ARED* z*|eQVe?ql(m@`7pZFr@!uR6_cy;)HzcN)FV2(bk7($^R$>zVoorR+#~8J)SgGq&iy3>0spc||M6iG zqZ<@m!x?Xh28BugX*7P$u0fVc$W9? zz@!#KnW=l@jz`tAV}?Eco|q@M7~nDAfhRVBcx!1!%xvxgO&(l~cVt4z623jdxz3aw x@=w;7LonRs3ixV7kJAXn1M)h4n{o=T5|8QPu +hello.c 1444941273 124 0 100644 10% ` +#include +#include +int +main() +{ + printf("Hello World\n"); + return EXIT_SUCCESS; +} +foo.c 1444941645 124 0 100644 1% ` +void foo(void){} + diff --git a/test/tools/llvm-objdump/Inputs/libbogus2.a b/test/tools/llvm-objdump/Inputs/libbogus2.a new file mode 100644 index 000000000000..2ccb7f31c09d --- /dev/null +++ b/test/tools/llvm-objdump/Inputs/libbogus2.a @@ -0,0 +1,13 @@ +! +hello.c 1444941273 124 0 100644 102 ` +#include +#include +int +main() +{ + printf("Hello World\n"); + return EXIT_SUCCESS; +} +foo.c 1444941645 124 0 100644 1% ` +void foo(void){} + diff --git a/test/tools/llvm-objdump/Inputs/libbogus3.a b/test/tools/llvm-objdump/Inputs/libbogus3.a new file mode 100644 index 000000000000..f15a7329f9f4 --- /dev/null +++ b/test/tools/llvm-objdump/Inputs/libbogus3.a @@ -0,0 +1,16 @@ +! +hello.c 1444941273 124 0 100644 102 ` +#include +#include +int +main() +{ + printf("Hello World\n"); + return EXIT_SUCCESS; +} +foo.c 1444941645 124 0 100644 171 ` +void foo(void){} + +bar.c 1445026190 124 0 100644 17 ` +void foo(void){} + diff --git a/test/tools/llvm-objdump/Inputs/section-filter.obj b/test/tools/llvm-objdump/Inputs/section-filter.obj new file mode 100644 index 0000000000000000000000000000000000000000..7dc5dae26b797d196c5b64ff429346a300eeb988 GIT binary patch literal 441 zcma)0I}XAy6mtlo3=B++jEDhs;s7i#aR^d@#KJ-{l!?o52#$xHk6I8CSn`XlIM2Cn z_d8<@3Jf0T9B|-;xF&%mDhz=-oZ5i8@gB85`*01ep(^L?68+Hy7hnA|)G3Z~IQ4Ze z*&4CnRL&-w|CJ4olB=l4VFIplle`17kpG1qOozy_Cd~L?m%0ARAqs1B1Z literal 0 HcmV?d00001 diff --git a/test/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386 b/test/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386 new file mode 100755 index 0000000000000000000000000000000000000000..c0fbc88036305c35cd29378d1eece2698757a0bf GIT binary patch literal 1536 zcmeZ`n!v!!z`(!)#Q*;@Fzf)*Am9Kd@e?4meDb^a7`AYpRoubob5=1VBePhcpeR4R zC^1(dIWaFUzeFJ^RiUUfPa!i;!NosVAvZrIRgagKO9g7Qcg~A@{Jd?wFkDoU*2@Rv z7nMXo=?E|#l$o3XQUh~QfGY!o4#0AU3n z2C+bFpd1^Ri=Y_{AmSilCI$yquqKdNkOnXfr4$$#+(5zu1oTQ$D@uTNYl7?txeLj! zAR!P?U|>klD@sW$Nrbu;C;(Omiex0Nt4KnW-TC7=Y9 zKqrAxB!LUgm^`Bw@FSi#)%JqGdg9ES6i=?-!RoXB@!mAe%V71|U%#pSZvt)>hc}<( ziR9ON@yTnGEdp^KNr@xIOpG5qx*C2RP)3V|3$X+xOed*w_KTV@|D@t;CetYcXQ`B^ V8C7!^ai&Qnjl*GIr0X9L_y!zxswV&d literal 0 HcmV?d00001 diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/00000031.a b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/00000031.a new file mode 100644 index 0000000000000000000000000000000000000000..b784d8163f54f0d06633bf372aff80258e421507 GIT binary patch literal 2768 zcmd5-O-NKx6u$nYjSWHy^9e3m2oYfe zA+21htI&dWrHc?GqR48~&P4?SwUFGjs6hM9efN%&j;4mxfqTF6bMKkC=YIFRSIaZc z7-PvvjHLrpFf5FvjRBJ}gfHzKd@xyZL4{B2=&mvyww||gVGgh0!OmbnTOVr(`~1zmuJwV|fYu)jhg`ZA3?Z!c>po5A81BQ5 zKsG$5Bk?&M4p|kisl&)u5;jut`R$LzWfV1u3XPics(z-3LK9>AF-EzTcxn*tV%iLm zHeD1A;l>$sl!sOrqN>exZYm^6h5V}b_Bj`j8UwopF1XO+P@WPaSlhF5zaYhggI}|>M89jAFQ+O zbMcW|v)OAMO}{xJ)#moO5eU@==c+6KB5S2hV2!}MoE&Jz*$?4A=5foo7P2dZeHuvi z31Pd1EllQxf^bZF*4Px-UG8cD-;G{hFvQfJ-mqT{hC1W<2R>WnOQZ)Gd$$|#xztt^ zd$+{raGycvzvZ#4ujHMyx5eXFl~?ae=5yi__&p@xG4a)nAaR=bt^g&E7cx6$-hVHj zo}N!yynF83N6F(Rtc&L{?ca*Tvo*q=#CSZxS!wVRsVZeIdV9n#DOphBOzx&Wh5n?f zK6E^O)#A+}x@-Me{GmVfXYYUBpY0BOjJPE3&u@(W%s+%wcj`c|>FkPe4>w6qAXfM? zuz_$V9G3ejbp`Se58`_;ag})MMU49I1K=P~@;3VT>&xf%%E3DeUs@jq3g;znJtCLv zzqEfV(lQ(!r*$#HqOA@`pPJz^H2QFGZJeG3Z?g2CG0b~`sGGS!AoWVJb4@0GF5-7C zdjZhxMiEA;wkiNDTChCK~0OCW!rvLx| literal 0 HcmV?d00001 diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0001.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0001.macho new file mode 100644 index 0000000000000000000000000000000000000000..d81f9f00f4a7dd78ea805814837537a9548f54d8 GIT binary patch literal 9248 zcmeHNO>7%Q6dpG%4Ixmca0pPqK`9E*cvI9y0jW}cD+fZ7#tA~8YU-a&vh8};^{&+z zQQ@c*5k-}J;78(w5Vu}>;DP{A2@c#iz#%8p1E3%kDF-B~N)W!cGqYZ=Pba&ax6!a#(cO)PAh(uw5Lfok2+~doc z8OI^Quzl;2?>}`yfk0oD95)f|i zpCXny=lU{VERpLx3D0*^@^#DhgeitwNj;9oN9IQYtyKKB$SIDASNVZ+%j8a_khM)- zbW8YMcIH!bw2XpwQM?7?kaIJ$8(&-!sqqO1`ILQXM!`5S4aW`J*B#;`9OMI!Y#|sW zpHavx>Gacv`4S;M!n^;jB93j#@!keuzCOu!Lr#cr$UZeyFPHW?x>?c}@j3|eZAd=l zKf*yitUuYJVZKfouK{8V^W6&Z5f1W!WhcIT#?4538{=hDww@KkGr}Q0sSz2)F|8ra zrzWRgni!u{@8pEg`=d*o*e~iSPJ(fFKkyI|tshZO&luCnPDwNET*lN)BdfWFRq4-H zP)vGH_9SD~0kmWZG0`~kDxi{D!c(yu$jMHM1sY?Zhq1%R&w=13hNAbBnlqItQCqT0 zy%SMvl3DbZQ)LU}OU&SZ zl4p@Te*h17HPIvTK;(hQ1Ca+J4@4f=vj-MZYrmwf+>WQ#E^npQ54?vxO=@l8PHO$+ zDiyC?-rA(Se(K7%@w4A-AN>^NscVnDk8C8pee^2w+A<#Ow_mrnw++j18SS;W*kuhL z9~>Tdf%XmHVn?VQB$(4)g}#wbAg8n>zlW$y`^Qf7jC;KSK=Y87q435Wi><_C@y>(o zH6jnmo03QEOO<*Rg+7^79p&Ng%%E`Dd@c(+C zt0z7;UK~h|&5ftWiitCahxyFti(`CjY9yWJy?i=7lRdBJTz2N-R;(tKFHg*mG&QJp z*Ps{I)Ow-lR(LLxTh>{10|y+Q#6GJ%IE(|1ymxocv$!tCU3+!oFQu9JvbPt;rhsv&mR~0-Jd$?y6G4a z?pJ(KCnMgK5NiD^IjYF}&BPr_5TAl>u`R|vnfY;nnh{YF4|TqA6Sy~kN#Bd4{_lbC zG#(270GvQlavOz&0?4U-o*q)}Mfm|FHQo`F58=#0or9c|Se=K=0pm~2lNllWb_jnM z!q)}%{X@qi@~CB)$_b_|IJg_rW=poKXB=%@cP_YgQQRyR5FvKpl2HpBf+Y)k=sH?9 z%)B;fxq7Lf=Z%c3I|1fo)>F^vCPvl+<4A=FZKvO|O9fnbt_%(G!4qCHaSoTXS^w+x rC`!;V-C5SSK7%Q6dpG%4Ixmca0pPqK`9E*cvI9y0jW}cD+fZ7#tA~8YU-a&vh8};^{&+z zQQ@c*5k-}J;78(w5Vu}>;DP{A2@c#iz#%8p1E3%kDF-B~N)W!cGqYZ=Pba&ax6!a#(cO)PAh(uw5Lfok2+~doc z8OI^Quzl;2?>}`yfk0oD95)f|i zpCXny=lU{VERpLx3D0*^@^#DhgeitwNj;9oN9IQYtyKKB$SIDASNVZ+%j8a_khM)- zbW8YMcIH!bw2XpwQM?7?kaIJ$8(&-!sqqO1`ILQXM!`5S4aW`J*B#;`9OMI!Y#|sW zpHavx>Gacv`4S;M!n^;jB93j#@!keuzCOu!Lr#eJLiVYtdbzaE(an;+h}S`wZ$t7a z{}B%IVg1P#4fA!%cnuI^nD17Ik8qF=EIaY#Gj2xO+ZZpKvh}POo)Hf5NsY)Tj%f{X zJ~cW0(!}_rdM77@-XC4!#C}mvaT1KX`+6CLR{@pO5}u0PKu&g2EYKJOJ&YYjehvgTF%-S0)SRhIiQ1A~ z>Ya#Ulgy&OoKl1CQ6L)Ude>`zeEIe9Ki~Z#{n7jr<)L2G!KI9Kba?3*g{cnGiF6K$ z`qKC;{ubzu#k6$AarJ^WWn@d4QbjvWucAwK>4Kx3w#ycNPs6s7-V2IqwO90#gBnL0 z3+O>d{6k1SKl>R-`57LDYiUI!N2G(HC28*W73^y4CrnI^275k*zBJ!E3GvQ)T4DzO zlRS&$`2%>stBD?w2OA%V@8~#V%|3 z_~7us3$$+l7dt}jAiL?F?X9jgUm1!SD!>aFH^&x@<8N)$ODlFA`e6!h&&K^Ao4)uf&bS7 zT|M!+@!~*wY;HU~R!p2ZJj`cCUmW9OQzPj#@8#3!ne2Hz=dv>ww_-J^e0gGiq^Uu* zy9T|urq&BZx59Io+_KK98#v(bB=%YD!C@S5)s{!|;=?7_iE4cAAWeEzt|@BY+D*GKx>x#Ogd`4j6xGp3Dg0w?p{D z5WX(3?;koIkw-1VR8BB$!NJ{_He0e?J>zKOx^uy`i{fUnfC#YzmyBBA5G+~PL)X!= zVdk|-%hgK-J#S=O-3c%!vz~fRH!-pv7)L5ZXgmFuT`J(hb7g3d51#OviF3H5&H7)j rM^S=~>CUpo1yWZ_MT-dh&xjg__t&SW4h~*dH80AYmRQruq>%atMcx|< literal 0 HcmV?d00001 diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0010.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0010.macho new file mode 100644 index 0000000000000000000000000000000000000000..4c7e78d93b62d51b57e665aa3b8ba6b224481077 GIT binary patch literal 9248 zcmeHNO>7%Q6dpG%4I$7?;SivJgHlwW@usMa0#cCfRnq>w4E} zjHqx_iio00KJX)PLWo;0J#ayQs00UY9N>@>>H$!Yij)HqRV4`D+nKS~>$v2=1!d*#a8=1kbLU*Q@A}}*}iDnd2ZT^j&E;synHPAQj&mh zlm8U4%sDrf`C^G&=Sz6Lo06|rwkJ$6+)f&CJf56S23o23ZIe?R6R+|E=eEV&N+Ii5 zyy%tiyX?%T=x7-Q?V@-K#v$ic=5BnNBvRuO4)Q7c)Qp00VwtWNwy!tDM>xm_9@#=L zNz|h>viP4=g+JYW@H`ugY+$M=eQij!d6-48sBMC(V?(=*2OvRl$ECzr8w%gpMYX;%jF z6%-TxlRe2;br3CCTuf9$UIkQAi~A~e9XZ)au|Q)C_Az!4`B@O$z)R-`57LDYiUI!lG4G@lC<{w3U)R36DB4`gFT-@Uz+djxOithEir@t zNuEXW{Q*4a*F=xV1Ca+J4@4e_JP>(c*B)3%t^Ja^d`nBMUD`^m?|Tn>n$+6F?bP~- zRVrS+w6#fl{nX`ewKLyrAN~~OsjH8@k1UzqK70jvZ5a>t+ppW(+oo-LjP_bw?6O9V z4UG)GK>G%8u|w1j63l6@Lf^>8kyBcd-$PWU{b)COYJP73&^+X4D7-PpVk=rq>)zj4 zBl3{EDS6bsRH;`{I3km(qdfeb8Px4mrhO0%tG;*DccuE?uR_-;6SBIfaf<%trC!~( zp!n51w4Sz2eMN1Ty!1U1N#udZ1Ca+J4@4e_JP>&x@<8N)$ODlFA`e6!_`i9er%#(3 zFAk>1=El=w#rWxiBYbA`#W6lMl}x93Kc7y|WX~BnkDb1-6{|_*%MJ`LT&wuk1F!@X5tPdh)+Vd*b!r&%>1}O&4?(0hdST50o)(Jr0+*k|Mx(68V`kk z0FEOmxrIVp0p!#^PY)^gqWl1o8t)Lw2XJPg&OuH{tjAQQRyR5Ft+Bl2H#Ff+Y&O=sH?9t-L;I zdq$~XqFN7DhG#<4A=FZFj(SN(Eeat_%7%Q6dpG%4Ixmca0pPqK`9E*cvI9y0jW}cD+fZ7#tA~8YU-a&vh8};^{&+z zQQ@c*5k-}J;78(w5Vu}>;DP{A2@c#iz#%8p1E3%kDF-B~N)W!cGqYZ=Pba&ax6!a#(cO)PAh(uw5Lfok2+~doc z8OI^Quzl;2?>}`yfk0oD95)f|i zpCXny=lU{VERpLx3D0*^@^#DhgeitwNj;9oN9IQYtyKKB$SIDASNVZ+%j8a_khM)- zbW8YMcIH!bw2XpwQM?7?kaIJ$8(&-!sqqO1`ILQXM!`5S4aW`J*WJJ;ED7>~N45}* zlFuk)mUQ}Q!+ePbKE?>|{=14ewk^kd8-)4#B;O4=A;Ka1)KtA(+UMwINngb4Ak4QR z`IP?%2l=r6WQ&IRI%T{Dh%wA}E5t`Q$Oo34`0^PyBkgUBmrdDvRt(PwhxnvMWE97= zhB%*^oPKFyd{VuW6GHEgE^%VNsHZpy#@+qELrAoKL_IxYOe;Gj&9rkFQ!|aM<{DO| zKVLyH={?z#j8zBFk|o4MbqymonOllJ5A*WzNA zHGF(~j9PJ0#lMm~X@(vtihqB89tJJB=l^#%aVLtci$8*?nS5|70@ z54P8cJS1;Q9Qxl_WKwmMhrcs}x}D0j529h!_pbV`RNwnm=-OpM=8GDq=xtv7 z>Xrq?ujUc&YT49N)b_~Rx5sw7j}~bo4@4e_JP>&x@<8N)$ODlFA`e6!h&&K^Ao9Tf z>w&JG_}qALAU!rWo*pYE&Kw@*Govq#@v*6qbei|_>GVwYyq|-5lpE4nEa55A$El_`|jcxYeV5ElYBTqhmT;z9u>ZI$Y zV@SAP@kO1CcvnKG^{?cpBJVd7cPK%83cAI%82e=A#|3IeL`gi<`NmD)-T)?jFOvGd z2g1{MDEtF(0!hhj6cP#`r}lYzNVyl~2awcwM^HY5GYfSNa#CV-9x?}vKQ&Kggz(!T z{9y=R7ufd?9goPPmSHLpn!tEHkv1pa44jl=uvQ&a~BudA9D2CHWx$WJb?!8O9 zVsS&Q7(!tm{DD4+_|}&`_#z?{^uaeD^r25u@P`N$f<9OZ8ox6?yR&yUX^YiJx`)iq zIcMfOXU?6uJ9j4ET>JH}O^nU8Fvjjc3L{;H;Ebp-_5k>=H!#Ne#PBNmFoPwC&Gk5^1{*?;V9lF(T5ut>g?E<%(EN4q$UncBtM~5%g{PmN=2ylmWKKAl6n8SyS>bQ(eECS>#S{SX zHSyEHs^r|B<a|MZB=dEz>rAM~&DH`!9-bq4UD zRTgDCh3BLbGd4ZiAYLTkAMvX{S22$T_yuW#?L;4QOe9`Sr@Dzk`RCbg-k!$mAn4yE zU=!)_5^vhV`co}rJB?S!DRJQWUJl?9Z{h)^5nd|cCxn5Ze~YU1L1~^5AHY)@S;g*iljZ`g(Vp#$ZAEzm02g73-cx$a zRHsC3$uIp*M753%qQ4qajqV|EWas>r*T|ti-u*m&ZsO5GZx>_;DP!&JB0Z&fl7Tvq zjv!HAvd_YA0e)A=iWfcKPFtf+GM~s7t^M>WI+4vE^Q`^ZLI%&%$!4PB1w~TpWjpUd z;#q@+9&m&&e*9Ei?nNxo&G9(_{-%TpP)W^_Tl$Y^v9QXoIzQh zMaF*l89ON_0D#4|Q9F=X(yoKPc6XzsG^4(3s7_C|13kl{ zHyFU&Ckj;F7-ylA;ZV3^Q(Kw91N5fC(K}47ok`$8uv39mWyI>rS^qQ_2 z@_j9AaXSt_%DC(i^w}e_y!L$uT@LL+GNY_dKg0zNbd}vV2~tSxRm$Xzhu|g4_Mx0^ z39*mIzMCWw0Y#DNGmMMiy9}Q6T}b-B6o@~7O!MD@k05Eff=Wa)D5-slGDUZyeh-qi zw+;2pIKR+m9eWh7&pgJ#<4=#1IRXCd5NkE{9|rX2C0~gT9hInGT%nJ zW!tc$6mn{N-I;7YjqAyiy?goI-J+R1Yl~)C_v_VJ6rrQI!>o3JRMkqc!VJ7+K(*#8 Y>r;wN<5gAtqHMIpurgCME0d;NN_0_9-ullQM z=*fGw_~!i@AquO6nEOHqQG_n67UH(B&=X=abQCJ(#PE-micgyh_(($hNG44H&2Ra_loI;$Bk$94p=lTjN<;d{laK?l4>p9o-d^6{Y zJJ*dZrRU0UwpsIC*@}YyjF*PtMdkWb$bXzq4@l`&KNhcu^_<_Q=M!h+@!nI><7D@>3-4K(6Rae6AM7S#EtZwMgJoF!#r)9U0_cYw+9||a z*sq}zP|8}Z1=w}akDXT;>+yOo6W$*S${cn(3~bk_*^ODZ0ZiEnGHFkl!k3>XFs1D~IPsnNTCkKX%% zAFr?YHGXK}(v!~M9D1kjZNC9Ce6BNCg`J;9BmVuT)9H*V*Yyur=l#HK?8KL{5Zma3 zWG%m?`K^5zmijaL6!!UhrXMSd@oJ7Pbz+3(zW)7}Vq*;hh5^HXVZbn87%&VN1`Gp+ z0mFb{z%XDK`2RC7u&a26PcbmWVqaVgmo?OfQM^7AhAW>6RSd?MS9A{?*U2{YEJNRnZkrOlyi9Ea&i1;47;N;?DT>ccg4#bW_^{lSI7|imawNn z6@>Og;76(&+9N8w9{DXha=h7t&1!wx&vr$#>UgF4CoDSAcHD+N=0z%Ks)kdIRG0}= ys>SZJ%0*7K+;O^gP1U3Dpyvn8s%y_2J0g!9w&Uc!khE#L!f7Vwz3e{35B&jAYWsQs literal 0 HcmV?d00001 diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0337.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0337.macho new file mode 100644 index 0000000000000000000000000000000000000000..cc438414f1121968f2508320693ca7aed318ce52 GIT binary patch literal 9248 zcmeHNO>7%Q6dpGX4I$7?;SivJgHjZr@usMa0#YSAKa~R^N#g_|P&M_>Cf;^`+}*Vr zBPv{#BBH305Bx}+5aQNL4_pu+D#3vp2RP(}dH@upBISTYRSC>{^J9 zkZw}1_Xr)Ofw2}e8;1FwKge|>qKU)^lla(As+dWk)DOxzU&1!~Rs~nMH^3*nhqLa0aHokWRU$m1cAi=!iV++l~TKX9|%7A`$^oHuk2hjodPwT6_?rD8ZRFUzO*1f z9G;`(#+*{4NLM^`9h2~Uw*=n+R};h-!=0oV$5W}Ll+sGZZ-*S?78c^tcr&)x=MzG1<4Lre&9$UZq$mJ9oQ!}5$3cpZfKHU*#b zA7Ygc)}Lt6GGD(8uK{EX^W6&ZAy)Z-WhcHuHpmKlTjOO*v|ixD6T~4tq2U?EF|Hv> zXJ_VKnx2}G@8mewH%1pYzMt1)oTzbkKj3jlxPEv&K4U~L`<`yO`K+Z|W=;=Gr!rEg zKrz9d~CFhJ~OxPCf zlJ7()w#fqY7gK7{JqifNx!(WUA76fb?9X?<$b7W)M0tD&>cFLh^!Bp!jKo+6(g*1* zB!dKBn*E{d)r`)mwzo+Rs3HE}*TKyHn^P$Gq zClx)=A^$j}20!^3kn}Tn7+i}hB9Rgf1}#c^zpucq)_&Z?#Dv=ODd>yyy%Xo}tfvJg z;D3~7Au)dd9%VJrBl1Axfye`q2O zapE!-uU*>S!o7a_%D38?Z|X-rh4SpR$KHo5m8l=O3VE#v5Axfu^?KcO%z)rti}GDo z^4M5%^ab2E02euo?LgEV_bT{}d>nF2tKxeI%ea5+gPt1e4FG5!Vi^?PSR}DEEvEGy z=&2!jNZu4Ya$hRdm!U8$lB~l#{GF-lb}HjO2#1y5yYjnIe(zU7*CP^S4UyxN*yg2C z-LW9~+y&}hc*P*i*pbSqsk^2z*<%h(Kw67Z1c8#e(DDH!!bkmUb8 zAfAJV#6JLzLy~eE3ULV_$Myw$2)P%^4?vRR9ftBjIJ1!FASVQt=OK%L;ZM$!m?8Xj z2!9yD*Ew$Z2aiX@QA-lbaiTB#a5ttecy3^1eSOOC&j)Ua-z?e?A+B=Cs4ItH340G+ zC(5Q(&}W>$@NA=CW&^`lm=igsUNkHi*-*w23Le`2h~s)TTzIaHkI}K?tQkLtOSoj@ t^=1^s@tE!mXb@;~m diff --git a/test/tools/llvm-objdump/X86/coff-disassemble-export.test b/test/tools/llvm-objdump/X86/coff-disassemble-export.test new file mode 100644 index 000000000000..2f0b211815cf --- /dev/null +++ b/test/tools/llvm-objdump/X86/coff-disassemble-export.test @@ -0,0 +1,8 @@ +// RUN: llvm-objdump -d %p/Inputs/disassemble.dll.coff-i386 | \ +// RUN: FileCheck %s + +// CHECK-LABEL: g: +// CHECK: calll 8 + +// CHECK-LABEL: f: +// CHECK: calll -24 diff --git a/test/tools/llvm-objdump/X86/disassemble-data.test b/test/tools/llvm-objdump/X86/disassemble-data.test new file mode 100644 index 000000000000..e9c4e7e1e5f2 --- /dev/null +++ b/test/tools/llvm-objdump/X86/disassemble-data.test @@ -0,0 +1,4 @@ +// This test checks that -D disassembles from a data section +// RUN: llvm-objdump -D %p/Inputs/disassemble-data.obj | FileCheck %s + +// CHECK: Disassembly of section .data: \ No newline at end of file diff --git a/test/tools/llvm-objdump/X86/macho-symbol-table.test b/test/tools/llvm-objdump/X86/macho-symbol-table.test index 826d78af68b1..19c619e73d07 100644 --- a/test/tools/llvm-objdump/X86/macho-symbol-table.test +++ b/test/tools/llvm-objdump/X86/macho-symbol-table.test @@ -1,8 +1,8 @@ RUN: llvm-objdump -macho -t %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s CHECK: SYMBOL TABLE: -CHECK: 000000000000003b l F __TEXT,__cstring L_.str -CHECK: 0000000000000068 l F __TEXT,__eh_frame EH_frame0 +CHECK: 000000000000003b l __TEXT,__cstring L_.str +CHECK: 0000000000000068 l __TEXT,__eh_frame EH_frame0 CHECK: 0000000000000000 g F __TEXT,__text _main -CHECK: 0000000000000080 g F __TEXT,__eh_frame _main.eh +CHECK: 0000000000000080 g __TEXT,__eh_frame _main.eh CHECK: 0000000000000000 *UND* _printf diff --git a/test/tools/llvm-objdump/X86/malformed-machos.test b/test/tools/llvm-objdump/X86/malformed-machos.test new file mode 100644 index 000000000000..a47e43443c5d --- /dev/null +++ b/test/tools/llvm-objdump/X86/malformed-machos.test @@ -0,0 +1,41 @@ +// These test checks that llvm-objdump will not crash with malformed Mach-O +// files. So the check line is not all that important but the bug fixes to +// make sure llvm-objdump is robust is what matters. +# RUN: llvm-objdump -macho -objc-meta-data \ +# RUN: %p/Inputs/malformed-machos/mem-crup-0001.macho \ +# RUN: | FileCheck -check-prefix=m0001 %s + +# m0001: (method_t extends past the end of the section) + +# RUN: llvm-objdump -macho -objc-meta-data \ +# RUN: %p/Inputs/malformed-machos/mem-crup-0006.macho \ +# RUN: | FileCheck -check-prefix=m0006 %s + +# m0006: ivarLayout 0x8 + +# RUN: llvm-objdump -macho -objc-meta-data \ +# RUN: %p/Inputs/malformed-machos/mem-crup-0006.macho \ +# RUN: | FileCheck -check-prefix=m0010 %s + +# m0010: 00000000000010e0 0x10e8 _OBJC_CLASS_ + +# RUN: llvm-objdump -macho -objc-meta-data \ +# RUN: %p/Inputs/malformed-machos/mem-crup-0040.macho \ +# RUN: | FileCheck -check-prefix=m0040 %s + +# m0040: 00000000000010a0 0xf39 -[tiny_dylib init] + +# RUN: llvm-objdump -macho -objc-meta-data \ +# RUN: %p/Inputs/malformed-machos/mem-crup-0080.macho \ +# RUN: | FileCheck -check-prefix=m0080 %s + +# m0080: data 0xf960000 (struct class_ro_t *) + +# RUN: llvm-objdump -macho -objc-meta-data \ +# RUN: %p/Inputs/malformed-machos/mem-crup-0261.macho + +# RUN: llvm-objdump -macho -disassemble \ +# RUN: %p/Inputs/malformed-machos/mem-crup-0337.macho \ +# RUN: | FileCheck -check-prefix=m0337 %s + +# m0337: subq $16, %rsp diff --git a/test/tools/llvm-objdump/eh_frame-arm64.test b/test/tools/llvm-objdump/eh_frame-arm64.test new file mode 100644 index 000000000000..f25e035a266e --- /dev/null +++ b/test/tools/llvm-objdump/eh_frame-arm64.test @@ -0,0 +1,23 @@ +# RUN: llvm-objdump -unwind-info %p/Inputs/eh_frame.macho-arm64 2>/dev/null | FileCheck %s + +# CHECK: Contents of __eh_frame section: +# CHECK: CIE: +# CHECK: Length: 16 +# CHECK: CIE ID: 0 +# CHECK: Version: 1 +# CHECK: Augmentation String: zR +# CHECK: Code Alignment Factor: 1 +# CHECK: Data Alignment Factor: -8 +# CHECK: Return Address Register: 30 +# CHECK: Augmentation Data Length: 1 +# CHECK: FDE Address Pointer Encoding: 16 +# CHECK: Instructions: +# CHECK: 0c 1f 00 +# CHECK: FDE: +# CHECK: Length: 32 +# CHECK: CIE Offset: 0 +# CHECK: PC Begin: ffffffffffffffe4 +# CHECK: PC Range: 0000000000000020 +# CHECK: Augmentation Data Length: 0 +# CHECK: Instructions: +# CHECK: 48 0e 10 9e 01 9d 02 00 00 00 00 diff --git a/test/tools/llvm-objdump/malformed-archives.test b/test/tools/llvm-objdump/malformed-archives.test new file mode 100644 index 000000000000..e0f165d37ed7 --- /dev/null +++ b/test/tools/llvm-objdump/malformed-archives.test @@ -0,0 +1,20 @@ +// These test checks that llvm-objdump will not crash with malformed Archive +// files. So the check line is not all that important but the bug fixes to +// make sure llvm-objdump is robust is what matters. +# RUN: llvm-objdump -macho -archive-headers \ +# RUN: %p/Inputs/libbogus1.a \ +# RUN: 2>&1 | FileCheck -check-prefix=bogus1 %s + +# bogus1: Invalid data was encountered while parsing the file + +# RUN: not llvm-objdump -macho -archive-headers \ +# RUN: %p/Inputs/libbogus2.a \ +# RUN: 2>&1 | FileCheck -check-prefix=bogus2 %s + +# bogus2: LLVM ERROR: Invalid data was encountered while parsing the file + +# RUN: not llvm-objdump -macho -archive-headers \ +# RUN: %p/Inputs/libbogus3.a \ +# RUN: 2>&1 | FileCheck -check-prefix=bogus3 %s + +# bogus3: LLVM ERROR: Invalid data was encountered while parsing the file diff --git a/test/tools/llvm-objdump/section-filter.test b/test/tools/llvm-objdump/section-filter.test new file mode 100644 index 000000000000..9c7ab31b0d72 --- /dev/null +++ b/test/tools/llvm-objdump/section-filter.test @@ -0,0 +1,7 @@ +// This test checks that --section works correctly +// RUN: llvm-objdump -h %p/Inputs/section-filter.obj -j=.text \ +// RUN: --section=.bss | FileCheck %s + +# CHECK: .text +# CHECK-NOT: .data +# CHECK: .bss \ No newline at end of file diff --git a/test/tools/llvm-pdbdump/regex-filter.test b/test/tools/llvm-pdbdump/regex-filter.test index 8b9eca63f585..cfc910e07171 100644 --- a/test/tools/llvm-pdbdump/regex-filter.test +++ b/test/tools/llvm-pdbdump/regex-filter.test @@ -10,6 +10,10 @@ ; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=EXCLUDE_WHOLE_CLASS %s ; RUN: llvm-pdbdump -symbols -globals -exclude-compilands="FilterTest.obj" \ ; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=EXCLUDE_COMPILAND %s +; RUN: llvm-pdbdump -types -include-types="FilterTestClass" \ +; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=INCLUDE_ONLY_TYPES %s +; RUN: llvm-pdbdump -types -symbols -globals -include-symbols="[[:<:]](IntGlobalVar|DoubleGlobalVar)[[:>:]]" \ +; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=INCLUDE_ONLY_VARS %s ; NO_FILTER: ---TYPES--- ; NO_FILTER: Enums: @@ -73,3 +77,19 @@ ; EXCLUDE_COMPILAND-NOT: __cdecl main ; EXCLUDE_COMPILAND: * Linker * ; EXCLUDE_COMPILAND: ---GLOBALS--- + +; Everything but types are displayed normally. But FilterTestClass is +; the only type that should be displayed. +; INCLUDE_ONLY_TYPES: ---TYPES--- +; INCLUDE_ONLY_TYPES-NOT: GlobalTypedef +; INCLUDE_ONLY_TYPES: class FilterTestClass + +; We should only see DoubleGlobalVar and IntGlobalVar. This means that even +; variables printed in class definitions should be filtered out. +; INCLUDE_ONLY_VARS: ---TYPES--- +; INCLUDE_ONLY_VARS: class FilterTestClass +; INCLUDE_ONLY_VARS-NOT: IntMemberVar +; INCLUDE_ONLY_VARS-NOT: IntDoubleVar +; INCLUDE_ONLY_VARS: ---GLOBALS--- +; INCLUDE_ONLY_VARS: DoubleGlobalVar +; INCLUDE_ONLY_VARS: IntGlobalVar diff --git a/test/tools/llvm-profdata/Inputs/basic.proftext b/test/tools/llvm-profdata/Inputs/basic.proftext new file mode 100644 index 000000000000..db934da7c07a --- /dev/null +++ b/test/tools/llvm-profdata/Inputs/basic.proftext @@ -0,0 +1,19 @@ +foo +10 +2 +499500 +179900 + +main +16650 +4 +1 +1000 +1000000 +499500 + +foo2 +10 +2 +500500 +180100 diff --git a/test/tools/llvm-profdata/Inputs/c-general.profraw b/test/tools/llvm-profdata/Inputs/c-general.profraw index e8cef21de5f258c5e08a1089280f282be0a77455..a4d94858a9e7cecbec88d7bb7de76f51152a4587 100644 GIT binary patch delta 599 zcmaFC^?}#8u_!ISs37M*69WYBKLylAa7zClL*CqXj=i9S2a2Fc7cm)wBQfL_%!_RNV=v zx-EhbbzDF-fGc13&~29EYkafT@#$sN)05fx`Ke1CRiM z+fa2KP<1}i5Op9mj9?WI;w4nw3aGk2P<7%!X;6HF)PulVsJabMbty7n^ClNCaZEnI y$gz0^qX6^d049ma4_JT#EcGJE`FSasC7Jnoi8=B41*t`eCHX~@?O8R!1_A&O*>n2< literal 1384 zcmah|yGjE=6dg69QSk}BDx}*gw?JWHSOFzTX-o_8` z11tp#otrr)gaz@!aORwI@7h~{IiGyN+iD6(9dx*!@;bS$3KEH|BPYszmnioq-x|0|5vfBS)=}rXBT^@F zUqtFe?uSUdi4SfA>p*!=64Yy+D&9xdlk;AYZb>a57GaS@L93rezx<17yZ`_I diff --git a/test/tools/llvm-profdata/Inputs/compat.profdata.v2 b/test/tools/llvm-profdata/Inputs/compat.profdata.v2 new file mode 100644 index 0000000000000000000000000000000000000000..969867584a9934a408363896ef9ae250406b9e3f GIT binary patch literal 712 zcmeyLQ&5zjmf6U}00BDl*`X{5wFAmyWLUFT;fBWuhChXCFj_ zk>S_X-sfg1Q*X0CRT@C)+{Da0E=LFlCdUYsdjYk{!HFL%g5op>s3Mq^Q0^#2qY%)b Ok-4~xO~oaFLmU7ea2Txs literal 0 HcmV?d00001 diff --git a/test/tools/llvm-profdata/Inputs/gcc-sample-profile.gcov b/test/tools/llvm-profdata/Inputs/gcc-sample-profile.gcov new file mode 100644 index 0000000000000000000000000000000000000000..0099280af2f9ebf9b78efcca8d015f59ce663147 GIT binary patch literal 1960 zcmYdHNlw=?H!xvJOZS$ivjzZHDlfaj1vLg3Lj6F9(V0f#$43GGToK zlml}=$evL)B(w>I11RWVYDU?RFe5b_K=}d|h9jMgC=(bM2(dwB5lqc+V8hB&kYC7U egX$KTUSip>au=kYo@`kC0J4`{HY<`|O4tB4*X#QL literal 0 HcmV?d00001 diff --git a/test/tools/llvm-profdata/Inputs/inline-samples.afdo b/test/tools/llvm-profdata/Inputs/inline-samples.afdo new file mode 100644 index 000000000000..f8680d86fcb4 --- /dev/null +++ b/test/tools/llvm-profdata/Inputs/inline-samples.afdo @@ -0,0 +1,20 @@ +main:366846:0 + 2.1: 60401 + 4: 0 + 3: 0 + 0: 0 + 2.3: 60401 + 1: 0 + 2.3: _Z3fool:246044 + 1.2: 39280 + 1.4: 46871 + 1: 60401 + 1.3: _Z3bari:0 + 1.2: 0 + 1.1: 0 + 1.8: _Z3bari:0 + 1.2: 0 + 1.1: 0 + 1.7: _Z3bari:99492 + 1.2: 46732 + 1.1: 52760 diff --git a/test/tools/llvm-profdata/Inputs/overflow-instr.proftext b/test/tools/llvm-profdata/Inputs/overflow-instr.proftext new file mode 100644 index 000000000000..48d1db88bcdf --- /dev/null +++ b/test/tools/llvm-profdata/Inputs/overflow-instr.proftext @@ -0,0 +1,6 @@ +overflow +1 +3 +18446744073709551615 +9223372036854775808 +18446744073709551615 diff --git a/test/tools/llvm-profdata/Inputs/overflow-sample.proftext b/test/tools/llvm-profdata/Inputs/overflow-sample.proftext new file mode 100644 index 000000000000..a5486bbd819c --- /dev/null +++ b/test/tools/llvm-profdata/Inputs/overflow-sample.proftext @@ -0,0 +1,7 @@ +_Z3bari:18446744073709551615:1000 + 1: 18446744073709551615 +_Z3fooi:18446744073709551615:1000 + 1: 18446744073709551615 +main:1000:0 + 1: 500 _Z3bari:18446744073709551615 + 2: 500 _Z3fooi:18446744073709551615 diff --git a/test/tools/llvm-profdata/Inputs/sample-profile.proftext b/test/tools/llvm-profdata/Inputs/sample-profile.proftext index 9dc6d4310da9..54c821243afa 100644 --- a/test/tools/llvm-profdata/Inputs/sample-profile.proftext +++ b/test/tools/llvm-profdata/Inputs/sample-profile.proftext @@ -1,12 +1,12 @@ _Z3bari:20301:1437 -1: 1437 + 1: 1437 _Z3fooi:7711:610 -1: 610 + 1: 610 main:184019:0 -4: 534 -4.2: 534 -5: 1075 -5.1: 1075 -6: 2080 -7: 534 -9: 2064 _Z3bari:1471 _Z3fooi:631 + 4: 534 + 4.2: 534 + 5: 1075 + 5.1: 1075 + 6: 2080 + 7: 534 + 9: 2064 _Z3bari:1471 _Z3fooi:631 diff --git a/test/tools/llvm-profdata/Inputs/text-format-errors.text.bin b/test/tools/llvm-profdata/Inputs/text-format-errors.text.bin new file mode 100644 index 000000000000..9e2e3f5c2b83 --- /dev/null +++ b/test/tools/llvm-profdata/Inputs/text-format-errors.text.bin @@ -0,0 +1 @@ +ÿåбôÉ”¨ \ No newline at end of file diff --git a/test/tools/llvm-profdata/Inputs/vp-malform.proftext b/test/tools/llvm-profdata/Inputs/vp-malform.proftext new file mode 100644 index 000000000000..2db3096cecf1 --- /dev/null +++ b/test/tools/llvm-profdata/Inputs/vp-malform.proftext @@ -0,0 +1,42 @@ +foo +# Func Hash: +10 +# Num Counters: +2 +# Counter Values: +999000 +359800 + +foo2 +# Func Hash: +10 +# Num Counters: +2 +# Counter Values: +1001000 +360200 + +main +# Func Hash: +16650 +# Num Counters: +4 +# Counter Values: +2 +2000 +2000000 +999000 +# NumValueKinds +1 +# Value Kind IPVK_IndirectCallTarget +0 +# NumSites +3 +# Values for each site +0 +2 +# !!!! Malformed Value/Count pair +foo+100 +foo2:1000 +1 +foo2:20000 diff --git a/test/tools/llvm-profdata/Inputs/vp-malform2.proftext b/test/tools/llvm-profdata/Inputs/vp-malform2.proftext new file mode 100644 index 000000000000..02ed5a968d80 --- /dev/null +++ b/test/tools/llvm-profdata/Inputs/vp-malform2.proftext @@ -0,0 +1,32 @@ +foo +# Func Hash: +10 +# Num Counters: +2 +# Counter Values: +999000 +359800 + +main +# Func Hash: +16650 +# Num Counters: +4 +# Counter Values: +2 +2000 +2000000 +999000 +# NumValueKinds +1 +# Value Kind IPVK_IndirectCallTarget +0 +# NumSites +3 +# Values for each site +0 +# !! Malformed value site, missing one value +2 +foo:100 +1 +foo2:20000 diff --git a/test/tools/llvm-profdata/Inputs/vp-truncate.proftext b/test/tools/llvm-profdata/Inputs/vp-truncate.proftext new file mode 100644 index 000000000000..98b4b572b65c --- /dev/null +++ b/test/tools/llvm-profdata/Inputs/vp-truncate.proftext @@ -0,0 +1,36 @@ +foo +# Func Hash: +10 +# Num Counters: +2 +# Counter Values: +999000 +359800 + +foo2 +# Func Hash: +10 +# Num Counters: +2 +# Counter Values: +1001000 +360200 + +main +# Func Hash: +16650 +# Num Counters: +4 +# Counter Values: +2 +2000 +2000000 +999000 +# NumValueKinds +1 +# Value Kind IPVK_IndirectCallTarget +0 +# NumSites +3 +# Values for each site +0 diff --git a/test/tools/llvm-profdata/Inputs/weight-instr-bar.profdata b/test/tools/llvm-profdata/Inputs/weight-instr-bar.profdata new file mode 100644 index 0000000000000000000000000000000000000000..4ed07660f654090e750b19be4e0af609bc1c61db GIT binary patch literal 1320 zcmeyLQ&5zjmf6V600ExHYmK2yFeL$%U}Tt_rlBWzFff!ADy;yeON$fJQ=x1IMi>K1 zb3kcEhBbR7Zu=bi5d*Wx04kG~pWnpK1 zb3kcEhBbR7Zu=bi5d*Wx04kG~pWnpjI*ICuRlWs>LPl7M$03g{ F3;+VlD0u(? literal 0 HcmV?d00001 diff --git a/test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext b/test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext new file mode 100644 index 000000000000..a910f745e6c7 --- /dev/null +++ b/test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext @@ -0,0 +1,8 @@ +bar:1772037:35370 + 17: 35370 + 18: 35370 + 19: 7005 + 20: 29407 + 21: 12170 + 23: 18150 bar:19829 + 25: 36666 diff --git a/test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext b/test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext new file mode 100644 index 000000000000..155ec5d00315 --- /dev/null +++ b/test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext @@ -0,0 +1,8 @@ +foo:1763288:35327 + 7: 35327 + 8: 35327 + 9: 6930 + 10: 29341 + 11: 11906 + 13: 18185 foo:19531 + 15: 36458 diff --git a/test/tools/llvm-profdata/c-general.test b/test/tools/llvm-profdata/c-general.test index 01435303d445..efa9bfa18d73 100644 --- a/test/tools/llvm-profdata/c-general.test +++ b/test/tools/llvm-profdata/c-general.test @@ -6,7 +6,7 @@ REGENERATE: $ SRC=path/to/llvm REGENERATE: $ CFE=$SRC/tools/clang REGENERATE: $ TESTDIR=$SRC/test/tools/llvm-profdata REGENERATE: $ CFE_TESTDIR=$CFE/test/Profile -REGENERATE: $ clang -o a.out -fprofile-instr-generate $CFE_TESTDIR/test/Profile/c-general.c +REGENERATE: $ clang -o a.out -fprofile-instr-generate $CFE_TESTDIR/c-general.c REGENERATE: $ LLVM_PROFILE_FILE=$TESTDIR/Inputs/c-general.profraw ./a.out RUN: llvm-profdata show %p/Inputs/c-general.profraw -o - | FileCheck %s -check-prefix=CHECK @@ -14,11 +14,11 @@ RUN: llvm-profdata show %p/Inputs/c-general.profraw -o - --function=switches | F SWITCHES-LABEL: Counters: SWITCHES-NEXT: switches: -SWITCHES-NEXT: Hash: 0x0000000000000013 +SWITCHES-NEXT: Hash: 0x2618e4f23f2e8daa SWITCHES-NEXT: Counters: 19 SWITCHES-NEXT: Function count: 1 SWITCHES-LABEL: Functions shown: 1 -CHECK-LABEL: Total functions: 11 +CHECK-LABEL: Total functions: 12 CHECK-NEXT: Maximum function count: 1 CHECK-NEXT: Maximum internal block count: 100 diff --git a/test/tools/llvm-profdata/compat.proftext b/test/tools/llvm-profdata/compat.proftext index 14da3374b5e9..139202d162e6 100644 --- a/test/tools/llvm-profdata/compat.proftext +++ b/test/tools/llvm-profdata/compat.proftext @@ -45,3 +45,23 @@ large_numbers # SUMMARY: Total functions: 3 # SUMMARY: Maximum function count: 2305843009213693952 # SUMMARY: Maximum internal block count: 1152921504606846976 + +# RUN: llvm-profdata show %S/Inputs/compat.profdata.v2 -all-functions --counts | FileCheck %s -check-prefix=FORMATV2 + +# FORMATV2: Counters: +# FORMATV2-NEXT: foo: +# FORMATV2-NEXT: Hash: 0x000000000000000a +# FORMATV2-NEXT: Counters: 2 +# FORMATV2-NEXT: Function count: 499500 +# FORMATV2-NEXT: Block counts: [179900] +# FORMATV2-NEXT: main: +# FORMATV2-NEXT: Hash: 0x000000000000410a +# FORMATV2-NEXT: Counters: 4 +# FORMATV2-NEXT: Function count: 1 +# FORMATV2-NEXT: Block counts: [1000, 1000000, 499500] +# FORMATV2-NEXT: Functions shown: 2 +# FORMATV2-NEXT: Total functions: 2 +# FORMATV2-NEXT: Maximum function count: 499500 +# FORMATV2-NEXT: Maximum internal block count: 1000000 + + diff --git a/test/tools/llvm-profdata/count-mismatch.proftext b/test/tools/llvm-profdata/count-mismatch.proftext index 1a2e73fbffdb..b42b11dbcf9d 100644 --- a/test/tools/llvm-profdata/count-mismatch.proftext +++ b/test/tools/llvm-profdata/count-mismatch.proftext @@ -14,7 +14,8 @@ foo # The hash matches, but we can't combine these because the number of # counters differs. -# MERGE_ERRS: count-mismatch.proftext: foo: Function count mismatch +# MERGE_ERRS: count-mismatch.proftext: foo: Function basic block count change detected (counter mismatch) +# MERGE_ERRS: Make sure that all profile data to be merged is generated from the same binary. foo 1024 3 diff --git a/test/tools/llvm-profdata/gcc-gcov-sample-profile.test b/test/tools/llvm-profdata/gcc-gcov-sample-profile.test new file mode 100644 index 000000000000..dbcc74e1284f --- /dev/null +++ b/test/tools/llvm-profdata/gcc-gcov-sample-profile.test @@ -0,0 +1,29 @@ +The input gcov file has been generated on a little endian machine. Expect +failures on big endian systems. + +XFAIL: powerpc64-, s390x, mips-, mips64-, sparc + +Tests for sample profiles encoded in GCC's gcov format. + +1- Show all functions. This profile has a single main() function with several + inlined callees. +RUN: llvm-profdata show --sample %p/Inputs/gcc-sample-profile.gcov | FileCheck %s --check-prefix=SHOW1 +SHOW1: Function: main: 364084, 0, 6 sampled lines +SHOW1: 2.3: inlined callee: _Z3fool: 243786, 0, 3 sampled lines +SHOW1: 1.3: inlined callee: _Z3bari: 0, 0, 2 sampled lines +SHOW1: 1.7: inlined callee: _Z3bari: 98558, 0, 2 sampled lines +SHOW1: 1.8: inlined callee: _Z3bari: 0, 0, 2 sampled lines + +2- Convert the profile to text encoding and check that they are both + identical. +RUN: llvm-profdata merge --sample %p/Inputs/gcc-sample-profile.gcov --text -o - | llvm-profdata show --sample - -o %t-text +RUN: llvm-profdata show --sample %p/Inputs/gcc-sample-profile.gcov -o %t-gcov +RUN: diff %t-text %t-gcov + +4- Merge the gcov and text encodings of the profile and check that the + counters have doubled. +RUN: llvm-profdata merge --sample --text %p/Inputs/gcc-sample-profile.gcov -o %t-gcov +RUN: llvm-profdata merge --sample --text %p/Inputs/gcc-sample-profile.gcov %t-gcov -o - | FileCheck %s --check-prefix=MERGE1 +MERGE1: main:728168:0 +MERGE1: 2.3: 120298 +MERGE1: 2.3: _Z3fool:487572 diff --git a/test/tools/llvm-profdata/inline-samples.test b/test/tools/llvm-profdata/inline-samples.test new file mode 100644 index 000000000000..421f002da9f9 --- /dev/null +++ b/test/tools/llvm-profdata/inline-samples.test @@ -0,0 +1,30 @@ +Tests for conversion between text and binary encoded sample profiles. + +1- Encode the original profile into binary form. All the tests below will use + the binary profile. +RUN: llvm-profdata merge --sample %p/Inputs/inline-samples.afdo -o %t.profbin + +2- Show all functions. This profile has a single main() function with several + inlined callees. +RUN: llvm-profdata show --sample %t.profbin | FileCheck %s --check-prefix=SHOW1 +SHOW1: Function: main: 366846, 0, 6 sampled lines +SHOW1: 2.3: inlined callee: _Z3fool: 246044, 0, 3 sampled lines +SHOW1: 1.3: inlined callee: _Z3bari: 0, 0, 2 sampled lines +SHOW1: 1.7: inlined callee: _Z3bari: 99492, 0, 2 sampled lines +SHOW1: 1.2: 46732 +SHOW1: 1.8: inlined callee: _Z3bari: 0, 0, 2 sampled lines + +3- Convert the binary profile to text encoding and check that they are both + identical. +RUN: llvm-profdata merge --sample %t.profbin --text -o - | llvm-profdata show --sample - -o %t-bintext +RUN: llvm-profdata show --sample %p/Inputs/inline-samples.afdo -o %t-text +RUN: diff %t-bintext %t-text + +4- Merge the binary and text encodings of the profile and check that the + counters have doubled. +RUN: llvm-profdata merge --sample --text %t.profbin %p/Inputs/inline-samples.afdo -o - | FileCheck %s --check-prefix=MERGE1 +MERGE1: main:733692:0 +MERGE1: 2.3: 120802 +MERGE1: 2.3: _Z3fool:492088 +MERGE1: 1.7: _Z3bari:198984 +MERGE1: 1.1: 105520 diff --git a/test/tools/llvm-profdata/overflow-instr.test b/test/tools/llvm-profdata/overflow-instr.test new file mode 100644 index 000000000000..5b9a94af9b29 --- /dev/null +++ b/test/tools/llvm-profdata/overflow-instr.test @@ -0,0 +1,17 @@ +Tests for overflow when merging instrumented profiles. + +1- Merge profile having maximum counts with itself and verify overflow detected and saturation occurred +RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext %p/Inputs/overflow-instr.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_OVERFLOW +RUN: llvm-profdata show -instr %t.out | FileCheck %s --check-prefix=SHOW_OVERFLOW +MERGE_OVERFLOW: {{.*}}: overflow: Counter overflow +SHOW_OVERFLOW: Total functions: 1 +SHOW_OVERFLOW-NEXT: Maximum function count: 18446744073709551615 +SHOW_OVERFLOW-NEXT: Maximum internal block count: 18446744073709551615 + +2- Merge profile having maximum counts by itself and verify no overflow +RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_NO_OVERFLOW -allow-empty +RUN: llvm-profdata show -instr %t.out | FileCheck %s --check-prefix=SHOW_NO_OVERFLOW +MERGE_NO_OVERFLOW-NOT: {{.*}}: overflow: Counter overflow +SHOW_NO_OVERFLOW: Total functions: 1 +SHOW_NO_OVERFLOW-NEXT: Maximum function count: 18446744073709551615 +SHOW_NO_OVERFLOW-NEXT: Maximum internal block count: 18446744073709551615 diff --git a/test/tools/llvm-profdata/overflow-sample.test b/test/tools/llvm-profdata/overflow-sample.test new file mode 100644 index 000000000000..cd6268db2ab9 --- /dev/null +++ b/test/tools/llvm-profdata/overflow-sample.test @@ -0,0 +1,43 @@ +Tests for overflow when merging sampled profiles. + +1- Merge profile having maximum counts with itself and verify overflow detected +RUN: llvm-profdata merge -sample %p/Inputs/overflow-sample.proftext %p/Inputs/overflow-sample.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_OVERFLOW +RUN: llvm-profdata show -sample %t.out | FileCheck %s --check-prefix=SHOW_OVERFLOW +MERGE_OVERFLOW: {{.*}}: main: Counter overflow +SHOW_OVERFLOW: Function: main: 2000, 0, 2 sampled lines +SHOW_OVERFLOW-NEXT: Samples collected in the function's body { +SHOW_OVERFLOW-NEXT: 1: 1000, calls: _Z3bari:18446744073709551615 +SHOW_OVERFLOW-NEXT: 2: 1000, calls: _Z3fooi:18446744073709551615 +SHOW_OVERFLOW-NEXT: } +SHOW_OVERFLOW-NEXT: No inlined callsites in this function +SHOW_OVERFLOW-NEXT: Function: _Z3fooi: 18446744073709551615, 2000, 1 sampled lines +SHOW_OVERFLOW-NEXT: Samples collected in the function's body { +SHOW_OVERFLOW-NEXT: 1: 18446744073709551615 +SHOW_OVERFLOW-NEXT: } +SHOW_OVERFLOW-NEXT: No inlined callsites in this function +SHOW_OVERFLOW-NEXT: Function: _Z3bari: 18446744073709551615, 2000, 1 sampled lines +SHOW_OVERFLOW-NEXT: Samples collected in the function's body { +SHOW_OVERFLOW-NEXT: 1: 18446744073709551615 +SHOW_OVERFLOW-NEXT: } +SHOW_OVERFLOW-NEXT: No inlined callsites in this function + +2- Merge profile having maximum counts by itself and verify no overflow +RUN: llvm-profdata merge -sample %p/Inputs/overflow-sample.proftext -o %t.out 2>&1 | FileCheck %s -allow-empty -check-prefix=MERGE_NO_OVERFLOW +RUN: llvm-profdata show -sample %t.out | FileCheck %s --check-prefix=SHOW_NO_OVERFLOW +MERGE_NO_OVERFLOW-NOT: {{.*}}: main: Counter overflow +SHOW_NO_OVERFLOW: Function: main: 1000, 0, 2 sampled lines +SHOW_NO_OVERFLOW-NEXT: Samples collected in the function's body { +SHOW_NO_OVERFLOW-NEXT: 1: 500, calls: _Z3bari:18446744073709551615 +SHOW_NO_OVERFLOW-NEXT: 2: 500, calls: _Z3fooi:18446744073709551615 +SHOW_NO_OVERFLOW-NEXT: } +SHOW_NO_OVERFLOW-NEXT: No inlined callsites in this function +SHOW_NO_OVERFLOW-NEXT: Function: _Z3fooi: 18446744073709551615, 1000, 1 sampled lines +SHOW_NO_OVERFLOW-NEXT: Samples collected in the function's body { +SHOW_NO_OVERFLOW-NEXT: 1: 18446744073709551615 +SHOW_NO_OVERFLOW-NEXT: } +SHOW_NO_OVERFLOW-NEXT: No inlined callsites in this function +SHOW_NO_OVERFLOW-NEXT: Function: _Z3bari: 18446744073709551615, 1000, 1 sampled lines +SHOW_NO_OVERFLOW-NEXT: Samples collected in the function's body { +SHOW_NO_OVERFLOW-NEXT: 1: 18446744073709551615 +SHOW_NO_OVERFLOW-NEXT: } +SHOW_NO_OVERFLOW-NEXT: No inlined callsites in this function diff --git a/test/tools/llvm-profdata/overflow.proftext b/test/tools/llvm-profdata/overflow.proftext deleted file mode 100644 index cbf3bf161823..000000000000 --- a/test/tools/llvm-profdata/overflow.proftext +++ /dev/null @@ -1,12 +0,0 @@ -# RUN: llvm-profdata merge %s -o %t.out 2>&1 | FileCheck %s -# CHECK: overflow.proftext: overflow: Counter overflow - -overflow -1 -1 -9223372036854775808 - -overflow -1 -1 -9223372036854775808 diff --git a/test/tools/llvm-profdata/raw-32-bits-be.test b/test/tools/llvm-profdata/raw-32-bits-be.test index 86ac56d39f26..d20c36022fa6 100644 --- a/test/tools/llvm-profdata/raw-32-bits-be.test +++ b/test/tools/llvm-profdata/raw-32-bits-be.test @@ -1,27 +1,36 @@ RUN: printf '\377lprofR\201' > %t -RUN: printf '\0\0\0\0\0\0\0\1' >> %t +RUN: printf '\0\0\0\0\0\0\0\2' >> %t RUN: printf '\0\0\0\0\0\0\0\2' >> %t RUN: printf '\0\0\0\0\0\0\0\3' >> %t RUN: printf '\0\0\0\0\0\0\0\6' >> %t RUN: printf '\0\0\0\0\1\0\0\0' >> %t RUN: printf '\0\0\0\0\2\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\3' >> %t RUN: printf '\0\0\0\1' >> %t RUN: printf '\0\0\0\0\0\0\0\1' >> %t RUN: printf '\2\0\0\0' >> %t RUN: printf '\1\0\0\0' >> %t +RUN: printf '\0\0\0\0' >> %t +RUN: printf '\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\3' >> %t RUN: printf '\0\0\0\2' >> %t RUN: printf '\0\0\0\0\0\0\0\2' >> %t RUN: printf '\2\0\0\03' >> %t RUN: printf '\1\0\0\10' >> %t +RUN: printf '\0\0\0\0' >> %t +RUN: printf '\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\023' >> %t RUN: printf '\0\0\0\0\0\0\0\067' >> %t RUN: printf '\0\0\0\0\0\0\0\101' >> %t -RUN: printf 'foobar' >> %t +RUN: printf 'foobar\0\0' >> %t RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s diff --git a/test/tools/llvm-profdata/raw-32-bits-le.test b/test/tools/llvm-profdata/raw-32-bits-le.test index 9325e7eb0f52..1bd81a87fbac 100644 --- a/test/tools/llvm-profdata/raw-32-bits-le.test +++ b/test/tools/llvm-profdata/raw-32-bits-le.test @@ -1,27 +1,36 @@ RUN: printf '\201Rforpl\377' > %t -RUN: printf '\1\0\0\0\0\0\0\0' >> %t +RUN: printf '\2\0\0\0\0\0\0\0' >> %t RUN: printf '\2\0\0\0\0\0\0\0' >> %t RUN: printf '\3\0\0\0\0\0\0\0' >> %t RUN: printf '\6\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\1\0\0\0\0' >> %t RUN: printf '\0\0\0\2\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\3\0\0\0' >> %t RUN: printf '\1\0\0\0' >> %t RUN: printf '\1\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\2' >> %t RUN: printf '\0\0\0\1' >> %t +RUN: printf '\0\0\0\0' >> %t +RUN: printf '\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\3\0\0\0' >> %t RUN: printf '\2\0\0\0' >> %t RUN: printf '\02\0\0\0\0\0\0\0' >> %t RUN: printf '\03\0\0\2' >> %t RUN: printf '\10\0\0\1' >> %t +RUN: printf '\0\0\0\0' >> %t +RUN: printf '\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\023\0\0\0\0\0\0\0' >> %t RUN: printf '\067\0\0\0\0\0\0\0' >> %t RUN: printf '\101\0\0\0\0\0\0\0' >> %t -RUN: printf 'foobar' >> %t +RUN: printf 'foobar\0\0' >> %t RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s diff --git a/test/tools/llvm-profdata/raw-64-bits-be.test b/test/tools/llvm-profdata/raw-64-bits-be.test index b97d8b5dac6d..902cedd476ab 100644 --- a/test/tools/llvm-profdata/raw-64-bits-be.test +++ b/test/tools/llvm-profdata/raw-64-bits-be.test @@ -1,27 +1,36 @@ RUN: printf '\377lprofr\201' > %t -RUN: printf '\0\0\0\0\0\0\0\1' >> %t +RUN: printf '\0\0\0\0\0\0\0\2' >> %t RUN: printf '\0\0\0\0\0\0\0\2' >> %t RUN: printf '\0\0\0\0\0\0\0\3' >> %t RUN: printf '\0\0\0\0\0\0\0\6' >> %t RUN: printf '\0\0\0\1\0\4\0\0' >> %t RUN: printf '\0\0\0\2\0\4\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\3' >> %t RUN: printf '\0\0\0\1' >> %t RUN: printf '\0\0\0\0\0\0\0\1' >> %t RUN: printf '\0\0\0\2\0\4\0\0' >> %t RUN: printf '\0\0\0\1\0\4\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\3' >> %t RUN: printf '\0\0\0\2' >> %t RUN: printf '\0\0\0\0\0\0\0\02' >> %t RUN: printf '\0\0\0\2\0\4\0\03' >> %t RUN: printf '\0\0\0\1\0\4\0\10' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\0\0\0\0\0\023' >> %t RUN: printf '\0\0\0\0\0\0\0\067' >> %t RUN: printf '\0\0\0\0\0\0\0\101' >> %t -RUN: printf 'foobar' >> %t +RUN: printf 'foobar\0\0' >> %t RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s diff --git a/test/tools/llvm-profdata/raw-64-bits-le.test b/test/tools/llvm-profdata/raw-64-bits-le.test index 0e6853811ec4..d2f410a6bb95 100644 --- a/test/tools/llvm-profdata/raw-64-bits-le.test +++ b/test/tools/llvm-profdata/raw-64-bits-le.test @@ -1,27 +1,36 @@ RUN: printf '\201rforpl\377' > %t -RUN: printf '\1\0\0\0\0\0\0\0' >> %t +RUN: printf '\2\0\0\0\0\0\0\0' >> %t RUN: printf '\2\0\0\0\0\0\0\0' >> %t RUN: printf '\3\0\0\0\0\0\0\0' >> %t RUN: printf '\6\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\4\0\1\0\0\0' >> %t RUN: printf '\0\0\4\0\2\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\3\0\0\0' >> %t RUN: printf '\1\0\0\0' >> %t RUN: printf '\1\0\0\0\0\0\0\0' >> %t RUN: printf '\0\0\4\0\2\0\0\0' >> %t RUN: printf '\0\0\4\0\1\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\03\0\0\0' >> %t RUN: printf '\02\0\0\0' >> %t RUN: printf '\02\0\0\0\0\0\0\0' >> %t RUN: printf '\03\0\4\0\2\0\0\0' >> %t RUN: printf '\10\0\4\0\1\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t +RUN: printf '\0\0\0\0\0\0\0\0' >> %t RUN: printf '\023\0\0\0\0\0\0\0' >> %t RUN: printf '\067\0\0\0\0\0\0\0' >> %t RUN: printf '\101\0\0\0\0\0\0\0' >> %t -RUN: printf 'foobar' >> %t +RUN: printf 'foobar\0\0' >> %t RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s diff --git a/test/tools/llvm-profdata/raw-magic-but-no-header.test b/test/tools/llvm-profdata/raw-magic-but-no-header.test index b2a697042b0a..76894faa183c 100644 --- a/test/tools/llvm-profdata/raw-magic-but-no-header.test +++ b/test/tools/llvm-profdata/raw-magic-but-no-header.test @@ -3,4 +3,4 @@ RUN: not llvm-profdata show %t 2>&1 | FileCheck %s RUN: printf '\377lprofr\201' > %t RUN: not llvm-profdata show %t 2>&1 | FileCheck %s -CHECK: error: {{.+}}: Invalid profile data (file header is corrupt) +CHECK: error: {{.+}}: Invalid instrumentation profile data (file header is corrupt) diff --git a/test/tools/llvm-profdata/raw-two-profiles.test b/test/tools/llvm-profdata/raw-two-profiles.test index be78793215ed..09eb121adf3f 100644 --- a/test/tools/llvm-profdata/raw-two-profiles.test +++ b/test/tools/llvm-profdata/raw-two-profiles.test @@ -1,48 +1,51 @@ RUN: printf '\201rforpl\377' > %t-foo.profraw -RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\2\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\3\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\4\0\2\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\3\0\0\0' >> %t-foo.profraw RUN: printf '\1\0\0\0' >> %t-foo.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\4\0\2\0\0\0' >> %t-foo.profraw RUN: printf '\0\0\4\0\1\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\023\0\0\0\0\0\0\0' >> %t-foo.profraw -RUN: printf 'foo' >> %t-foo.profraw +RUN: printf 'foo\0\0\0\0\0' >> %t-foo.profraw RUN: printf '\201rforpl\377' > %t-bar.profraw -RUN: printf '\1\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\2\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\1\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\2\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\3\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\6\0\1\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\6\0\2\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\3\0\0\0' >> %t-bar.profraw RUN: printf '\2\0\0\0' >> %t-bar.profraw RUN: printf '\2\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\6\0\2\0\0\0' >> %t-bar.profraw RUN: printf '\0\0\6\0\1\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw +RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\067\0\0\0\0\0\0\0' >> %t-bar.profraw RUN: printf '\101\0\0\0\0\0\0\0' >> %t-bar.profraw -RUN: printf 'bar' >> %t-bar.profraw +RUN: printf 'bar\0\0\0\0\0' >> %t-bar.profraw -Versions of the profiles that are padded to eight byte alignment. -RUN: cat %t-foo.profraw > %t-foo-padded.profraw -RUN: printf '\0\0\0\0\0' >> %t-foo-padded.profraw -RUN: cat %t-bar.profraw > %t-bar-padded.profraw -RUN: printf '\0\0\0\0\0' >> %t-bar-padded.profraw - -RUN: cat %t-foo-padded.profraw %t-bar.profraw > %t-pad-between.profraw -RUN: cat %t-foo-padded.profraw %t-bar-padded.profraw > %t-pad.profraw - -RUN: llvm-profdata show %t-pad-between.profraw -all-functions -counts | FileCheck %s +RUN: cat %t-foo.profraw %t-bar.profraw > %t-pad.profraw RUN: llvm-profdata show %t-pad.profraw -all-functions -counts | FileCheck %s CHECK: Counters: diff --git a/test/tools/llvm-profdata/sample-profile-basic.test b/test/tools/llvm-profdata/sample-profile-basic.test index 0651c513e965..5116b98f3335 100644 --- a/test/tools/llvm-profdata/sample-profile-basic.test +++ b/test/tools/llvm-profdata/sample-profile-basic.test @@ -3,15 +3,15 @@ Basic tests for sample profiles. 1- Show all functions RUN: llvm-profdata show --sample %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=SHOW1 SHOW1: Function: main: 184019, 0, 7 sampled lines -SHOW1: line offset: 9, discriminator: 0, number of samples: 2064, calls: _Z3fooi:631 _Z3bari:1471 +SHOW1: 9: 2064, calls: _Z3fooi:631 _Z3bari:1471 SHOW1: Function: _Z3fooi: 7711, 610, 1 sampled lines SHOW1: Function: _Z3bari: 20301, 1437, 1 sampled lines -SHOW1: line offset: 1, discriminator: 0, number of samples: 1437 +SHOW1: 1: 1437 2- Show only bar RUN: llvm-profdata show --sample --function=_Z3bari %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=SHOW2 SHOW2: Function: _Z3bari: 20301, 1437, 1 sampled lines -SHOW2: line offset: 1, discriminator: 0, number of samples: 1437 +SHOW2: 1: 1437 SHOW2-NOT: Function: main: 184019, 0, 7 sampled lines SHOW2-NOT: Function: _Z3fooi: 7711, 610, 1 sampled lines @@ -28,3 +28,7 @@ RUN: llvm-profdata merge --sample --text %p/Inputs/sample-profile.proftext %t-bi MERGE1: main:368038:0 MERGE1: 9: 4128 _Z3fooi:1262 _Z3bari:2942 MERGE1: _Z3fooi:15422:1220 + +5- Detect invalid text encoding (e.g. instrumentation profile text format). +RUN: not llvm-profdata show --sample %p/Inputs/foo3bar3-1.proftext 2>&1 | FileCheck %s --check-prefix=BADTEXT +BADTEXT: error: {{.+}}: Unrecognized sample profile encoding format diff --git a/test/tools/llvm-profdata/text-dump.test b/test/tools/llvm-profdata/text-dump.test new file mode 100644 index 000000000000..94a78d9dbac3 --- /dev/null +++ b/test/tools/llvm-profdata/text-dump.test @@ -0,0 +1,21 @@ +Basic tests for testing text dump functions. + +RUN: llvm-profdata show --all-functions -counts --text %p/Inputs/basic.proftext > %t-basic.proftext1 +RUN: llvm-profdata merge -o %t-basic.proftext2 --text %p/Inputs/basic.proftext + +RUN: llvm-profdata merge -binary -o %t-basic.profdata1 %t-basic.proftext1 +RUN: llvm-profdata merge -o %t-basic.profdata2 %t-basic.proftext2 + +RUN: llvm-profdata show --all-functions -counts %t-basic.profdata1 > %t-basic.dump3 +RUN: llvm-profdata show --all-functions -counts %t-basic.profdata2 > %t-basic.dump4 + +RUN: llvm-profdata merge -text -o %t-basic.proftext5 %t-basic.profdata1 +RUN: llvm-profdata merge -text -o %t-basic.proftext6 %t-basic.profdata2 + +RUN: diff %t-basic.dump3 %t-basic.dump4 +RUN: diff %t-basic.proftext5 %t-basic.proftext6 + +RUN: not llvm-profdata merge -gcc -o %t-basic-profdata3 %t-basic.proftext2 2>&1 | FileCheck %s --check-prefix=UNKNOWN +UNKNOWN: Unknown + + diff --git a/test/tools/llvm-profdata/text-format-errors.test b/test/tools/llvm-profdata/text-format-errors.test index 01513e4fcb9e..05de2e38af1f 100644 --- a/test/tools/llvm-profdata/text-format-errors.test +++ b/test/tools/llvm-profdata/text-format-errors.test @@ -1,10 +1,29 @@ -RUN: not llvm-profdata show %p/Inputs/invalid-count-later.proftext 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER -RUN: not llvm-profdata merge %p/Inputs/invalid-count-later.proftext %p/Inputs/invalid-count-later.profdata -o %t.out 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER -INVALID-COUNT-LATER: error: {{.*}}invalid-count-later.proftext: Malformed profile data +Tests for instrumentation profile bad encoding. +1- Detect invalid count +RUN: not llvm-profdata show %p/Inputs/invalid-count-later.proftext 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER +RUN: not llvm-profdata merge %p/Inputs/invalid-count-later.proftext %p/Inputs/invalid-count-later.proftext -o %t.out 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER +INVALID-COUNT-LATER: error: {{.*}}invalid-count-later.proftext: Malformed instrumentation profile data + +2- Detect bad hash RUN: not llvm-profdata show %p/Inputs/bad-hash.proftext 2>&1 | FileCheck %s --check-prefix=BAD-HASH RUN: not llvm-profdata merge %p/Inputs/bad-hash.proftext %p/Inputs/bad-hash.proftext -o %t.out 2>&1 | FileCheck %s --check-prefix=BAD-HASH -BAD-HASH: error: {{.*}}bad-hash.proftext: Malformed profile data +BAD-HASH: error: {{.*}}bad-hash.proftext: Malformed instrumentation profile data +3- Detect no counts RUN: not llvm-profdata show %p/Inputs/no-counts.proftext 2>&1 | FileCheck %s --check-prefix=NO-COUNTS -NO-COUNTS: error: {{.*}}no-counts.proftext: Malformed profile data +NO-COUNTS: error: {{.*}}no-counts.proftext: Malformed instrumentation profile data + +4- Detect binary input +RUN: not llvm-profdata show %p/Inputs/text-format-errors.text.bin 2>&1 | FileCheck %s --check-prefix=BINARY +BINARY: error: {{.+}}: Unrecognized instrumentation profile encoding format +BINARY: Perhaps you forgot to use the -sample option? + +5- Detect malformed value profile data +RUN: not llvm-profdata show %p/Inputs/vp-malform.proftext 2>&1 | FileCheck %s --check-prefix=VP +RUN: not llvm-profdata show %p/Inputs/vp-malform2.proftext 2>&1 | FileCheck %s --check-prefix=VP +VP: Malformed instrumentation profile data + +6- Detect truncated value profile data +RUN: not llvm-profdata show %p/Inputs/vp-truncate.proftext 2>&1 | FileCheck %s --check-prefix=VPTRUNC +VPTRUNC: Truncated profile data diff --git a/test/tools/llvm-profdata/value-prof.proftext b/test/tools/llvm-profdata/value-prof.proftext new file mode 100644 index 000000000000..ca2b1f822097 --- /dev/null +++ b/test/tools/llvm-profdata/value-prof.proftext @@ -0,0 +1,57 @@ +# RUN: llvm-profdata show -ic-targets -all-functions %s | FileCheck %s --check-prefix=IC +# RUN: llvm-profdata show -ic-targets -counts -text -all-functions %s | FileCheck %s --check-prefix=ICTEXT +# RUN: llvm-profdata merge -o %t.profdata %s +# RUN: llvm-profdata show -ic-targets -all-functions %t.profdata | FileCheck %s --check-prefix=IC + +foo +# Func Hash: +10 +# Num Counters: +2 +# Counter Values: +999000 +359800 + +foo2 +# Func Hash: +10 +# Num Counters: +2 +# Counter Values: +1001000 +360200 + +main +# Func Hash: +16650 +# Num Counters: +4 +# Counter Values: +2 +2000 +2000000 +999000 +# NumValueKinds +1 +# Value Kind IPVK_IndirectCallTarget +0 +# NumSites +3 +# Values for each site +0 +2 +foo:100 +foo2:1000 +1 +foo2:20000 + +#IC: Indirect Call Site Count: 3 +#IC-NEXT: Indirect Target Results: +#IC-NEXT: [ 1, foo, 100 ] +#IC-NEXT: [ 1, foo2, 1000 ] +#IC-NEXT: [ 2, foo2, 20000 ] + +#ICTEXT: foo:100 +#ICTEXT-NEXT: foo2:1000 +#ICTEXT-NEXT: 1 +#ICTEXT-NEXT: foo2:20000 diff --git a/test/tools/llvm-profdata/weight-instr.test b/test/tools/llvm-profdata/weight-instr.test new file mode 100644 index 000000000000..7294cf3b01f0 --- /dev/null +++ b/test/tools/llvm-profdata/weight-instr.test @@ -0,0 +1,69 @@ +Tests for weighted merge of instrumented profiles. + +1- Merge the foo and bar profiles with unity weight and verify the combined output +RUN: llvm-profdata merge -instr -weighted-input=1,%p/Inputs/weight-instr-bar.profdata -weighted-input=1,%p/Inputs/weight-instr-foo.profdata -o %t +RUN: llvm-profdata show -instr -all-functions %t | FileCheck %s -check-prefix=1X_1X_WEIGHT +RUN: llvm-profdata merge -instr -weighted-input=1,%p/Inputs/weight-instr-bar.profdata %p/Inputs/weight-instr-foo.profdata -o %t +RUN: llvm-profdata show -instr -all-functions %t | FileCheck %s -check-prefix=1X_1X_WEIGHT +1X_1X_WEIGHT: Counters: +1X_1X_WEIGHT-NEXT: usage: +1X_1X_WEIGHT-NEXT: Hash: 0x0000000000000000 +1X_1X_WEIGHT-NEXT: Counters: 1 +1X_1X_WEIGHT-NEXT: Function count: 0 +1X_1X_WEIGHT-NEXT: foo: +1X_1X_WEIGHT-NEXT: Hash: 0x000000000000028a +1X_1X_WEIGHT-NEXT: Counters: 3 +1X_1X_WEIGHT-NEXT: Function count: 866988873 +1X_1X_WEIGHT-NEXT: bar: +1X_1X_WEIGHT-NEXT: Hash: 0x000000000000028a +1X_1X_WEIGHT-NEXT: Counters: 3 +1X_1X_WEIGHT-NEXT: Function count: 866988873 +1X_1X_WEIGHT-NEXT: main: +1X_1X_WEIGHT-NEXT: Hash: 0x7d31c47ea98f8248 +1X_1X_WEIGHT-NEXT: Counters: 60 +1X_1X_WEIGHT-NEXT: Function count: 2 +1X_1X_WEIGHT-NEXT: Functions shown: 4 +1X_1X_WEIGHT-NEXT: Total functions: 4 +1X_1X_WEIGHT-NEXT: Maximum function count: 866988873 +1X_1X_WEIGHT-NEXT: Maximum internal block count: 267914296 + +2- Merge the foo and bar profiles with weight 3x and 5x respectively and verify the combined output +RUN: llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=5,%p/Inputs/weight-instr-foo.profdata -o %t +RUN: llvm-profdata show -instr -all-functions %t | FileCheck %s -check-prefix=3X_5X_WEIGHT +3X_5X_WEIGHT: Counters: +3X_5X_WEIGHT-NEXT: usage: +3X_5X_WEIGHT-NEXT: Hash: 0x0000000000000000 +3X_5X_WEIGHT-NEXT: Counters: 1 +3X_5X_WEIGHT-NEXT: Function count: 0 +3X_5X_WEIGHT-NEXT: foo: +3X_5X_WEIGHT-NEXT: Hash: 0x000000000000028a +3X_5X_WEIGHT-NEXT: Counters: 3 +3X_5X_WEIGHT-NEXT: Function count: 4334944365 +3X_5X_WEIGHT-NEXT: bar: +3X_5X_WEIGHT-NEXT: Hash: 0x000000000000028a +3X_5X_WEIGHT-NEXT: Counters: 3 +3X_5X_WEIGHT-NEXT: Function count: 2600966619 +3X_5X_WEIGHT-NEXT: main: +3X_5X_WEIGHT-NEXT: Hash: 0x7d31c47ea98f8248 +3X_5X_WEIGHT-NEXT: Counters: 60 +3X_5X_WEIGHT-NEXT: Function count: 8 +3X_5X_WEIGHT-NEXT: Functions shown: 4 +3X_5X_WEIGHT-NEXT: Total functions: 4 +3X_5X_WEIGHT-NEXT: Maximum function count: 4334944365 +3X_5X_WEIGHT-NEXT: Maximum internal block count: 1339571480 + +3- Bad merge: invalid weight +RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=0,%p/Inputs/weight-instr-foo.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT +RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=0.75,%p/Inputs/weight-instr-foo.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT +RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=-5,%p/Inputs/weight-instr-foo.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT +RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=,%p/Inputs/weight-instr-foo.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT +RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=%p/Inputs/weight-instr-foo.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT +INVALID_WEIGHT: error: Input weight must be a positive integer. + +4- Bad merge: input path does not exist +RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/does-not-exist.profdata -weighted-input=2,%p/Inputs/does-not-exist-either.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_INPUT +INVALID_INPUT: {{.*}}: {{.*}}does-not-exist.profdata: {{[Nn]}}o such file or directory + +5- No inputs +RUN: not llvm-profdata merge -instr -o %t.out 2>&1 | FileCheck %s -check-prefix=NO_INPUT +NO_INPUT: {{.*}}: No input files specified. See llvm-profdata{{(\.EXE|\.exe)?}} merge -help diff --git a/test/tools/llvm-profdata/weight-sample.test b/test/tools/llvm-profdata/weight-sample.test new file mode 100644 index 000000000000..7b22c5f88f1f --- /dev/null +++ b/test/tools/llvm-profdata/weight-sample.test @@ -0,0 +1,56 @@ +Tests for weighted merge of sample profiles. + +1- Merge the foo and bar profiles with unity weight and verify the combined output +RUN: llvm-profdata merge -sample -text -weighted-input=1,%p/Inputs/weight-sample-bar.proftext -weighted-input=1,%p/Inputs/weight-sample-foo.proftext -o - | FileCheck %s -check-prefix=1X_1X_WEIGHT +RUN: llvm-profdata merge -sample -text -weighted-input=1,%p/Inputs/weight-sample-bar.proftext %p/Inputs/weight-sample-foo.proftext -o - | FileCheck %s -check-prefix=1X_1X_WEIGHT +1X_1X_WEIGHT: foo:1763288:35327 +1X_1X_WEIGHT-NEXT: 7: 35327 +1X_1X_WEIGHT-NEXT: 8: 35327 +1X_1X_WEIGHT-NEXT: 9: 6930 +1X_1X_WEIGHT-NEXT: 10: 29341 +1X_1X_WEIGHT-NEXT: 11: 11906 +1X_1X_WEIGHT-NEXT: 13: 18185 foo:19531 +1X_1X_WEIGHT-NEXT: 15: 36458 +1X_1X_WEIGHT-NEXT: bar:1772037:35370 +1X_1X_WEIGHT-NEXT: 17: 35370 +1X_1X_WEIGHT-NEXT: 18: 35370 +1X_1X_WEIGHT-NEXT: 19: 7005 +1X_1X_WEIGHT-NEXT: 20: 29407 +1X_1X_WEIGHT-NEXT: 21: 12170 +1X_1X_WEIGHT-NEXT: 23: 18150 bar:19829 +1X_1X_WEIGHT-NEXT: 25: 36666 + +2- Merge the foo and bar profiles with weight 3x and 5x respectively and verify the combined output +RUN: llvm-profdata merge -sample -text -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=5,%p/Inputs/weight-sample-foo.proftext -o - | FileCheck %s -check-prefix=3X_5X_WEIGHT +3X_5X_WEIGHT: foo:8816440:176635 +3X_5X_WEIGHT-NEXT: 7: 176635 +3X_5X_WEIGHT-NEXT: 8: 176635 +3X_5X_WEIGHT-NEXT: 9: 34650 +3X_5X_WEIGHT-NEXT: 10: 146705 +3X_5X_WEIGHT-NEXT: 11: 59530 +3X_5X_WEIGHT-NEXT: 13: 90925 foo:97655 +3X_5X_WEIGHT-NEXT: 15: 182290 +3X_5X_WEIGHT-NEXT: bar:5316111:106110 +3X_5X_WEIGHT-NEXT: 17: 106110 +3X_5X_WEIGHT-NEXT: 18: 106110 +3X_5X_WEIGHT-NEXT: 19: 21015 +3X_5X_WEIGHT-NEXT: 20: 88221 +3X_5X_WEIGHT-NEXT: 21: 36510 +3X_5X_WEIGHT-NEXT: 23: 54450 bar:59487 +3X_5X_WEIGHT-NEXT: 25: 109998 + +3- Bad merge: invalid weight +RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=0,%p/Inputs/weight-sample-foo.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT +RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=0.75,%p/Inputs/weight-sample-foo.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT +RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=-5,%p/Inputs/weight-sample-foo.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT +RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=,%p/Inputs/weight-sample-foo.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT +RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=%p/Inputs/weight-sample-foo.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT +INVALID_WEIGHT: error: Input weight must be a positive integer. + +4- Bad merge: input path does not exist +RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/does-not-exist.proftext -weighted-input=2,%p/Inputs/does-not-exist-either.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_INPUT +INVALID_INPUT: {{.*}}: {{.*}}does-not-exist.proftext: {{[Nn]}}o such file or directory + +5- No inputs +RUN: not llvm-profdata merge -sample -o %t.out 2>&1 | FileCheck %s -check-prefix=NO_INPUT +NO_INPUT: {{.*}}: No input files specified. See llvm-profdata{{(\.EXE|\.exe)?}} merge -help diff --git a/test/tools/llvm-readobj/ARM/attribute-4.s b/test/tools/llvm-readobj/ARM/attribute-4.s index dd0a4a6d6a73..2c27785410ed 100644 --- a/test/tools/llvm-readobj/ARM/attribute-4.s +++ b/test/tools/llvm-readobj/ARM/attribute-4.s @@ -15,6 +15,13 @@ @CHECK-OBJ-NEXT: TagName: FP_arch @CHECK-OBJ-NEXT: Description: VFPv3-D16 +.eabi_attribute Tag_Advanced_SIMD_arch, 4 +@CHECK: .eabi_attribute 12, 4 +@CHECK-OBJ: Tag: 12 +@CHECK-OBJ-NEXT: Value: 4 +@CHECK-OBJ-NEXT: TagName: Advanced_SIMD_arch +@CHECK-OBJ-NEXT: Description: ARMv8.1-a NEON + .eabi_attribute Tag_PCS_config, 4 @CHECK: .eabi_attribute 13, 4 @CHECK-OBJ: Tag: 13 diff --git a/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-i386 b/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-i386 new file mode 100644 index 0000000000000000000000000000000000000000..c222a899ba7178b4d305949c330c4033f6ea1081 GIT binary patch literal 1080 zcmb7DF-t;G6h8JUG#d;Wf(l%MhI%v@5d@*76<7v2*mQZOW`jz;*B}I~4o=~}X=&&O zG}O@C+FE1weeb@DPYI$AKF|HmJ@?#q&bi)Y^SMYQA_kUNLY2Z#3if$I@1!J&omGj+ zw7+lSY5iElPf{dgczzChUTjdaqD7NI!%OW>$SzM)>W(H9rANJ1*y3RQg7OM998|Vl$>o)zQPKGr5y#^hg z$Hv(H)|eKxe}0laKH3v?kS}D?c~!{e&ay>SOlR_0H8PX%sDkkz+uvA3$k$-dV)g6| z$?poDyI(cm+X~2e($5)_r=WSGjNz=;10UnqOOkKFfITBNL9Q9co|wKs`d~g{7v#I* zJ?uT>_#e{K(f@@gG{<0kSV|9Zhg<~UT?DPEjrs+SM_A?{O&u|;!}v8Y}* zOhZwslO!rh!(j@$H|U~RJo@z$>a(sZUKyHq`3|u6XFv%+EB_~3r?P}TI71^ra@1JUMgb@LPEa(8vF!Y@oj-xU?{rvI5X%9%f_a!+;gmqr3$; zV;?hedHnBq28DUk0Ajix{#Wk;CNS~sGLM)6+`BOK-Zo)4asAoeoSWB{Yv7>;Yw^D1 OWA$>^;5ERBuJ;@8Tx)Uw literal 0 HcmV?d00001 diff --git a/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc64 b/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc64 new file mode 100644 index 0000000000000000000000000000000000000000..25ce5c71b38b682305e8c98fcb1f8c79a06a677d GIT binary patch literal 1784 zcmcgsJ#Q015S=|cB*YX@fGj>FQ$WX6Ai0Z42vQ`VpaH7W1v@bk_QBmDB@z-*D%Yv_ zDUoPW@&}NhrlFvP67yzfW_1o{3sErA?%R1Yv$Hod_Pfmo_oK*BkOp1ESqXQnd{401 z_1L2>HK<1|T7k~KO|j40?H)zBo%b^mVf-m1`lvSwN?i08aVHvQ$nR`m@eMw&e|Tqo zqPl;7Yf?q8u^&{tl0<^Ixr< z7otnx>+)Wn~`Jhp{q%mB;?W@k%zG{?s_Fd+3MteQW7ie+ zXSji7^Lwgqt^*qj*OPl^{kHfvxLaxiSPtv2A=&SQeOT-sKHtHz|2@mE{hsM>-TOKJ z8kXI+C?0M>EFTkFSo-_c`3!Rv@}Izmzu&~ZAFZdySe9RWT*UQ)7|zt24HnOM0jqj9 x+f||iJv=c+(ekm%=W1=#g(3PZit7UTKkKaMAwSoTxjFfp{CzAp1;QTk{{{~(Y}5b% literal 0 HcmV?d00001 diff --git a/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-x86_64 b/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-x86_64 new file mode 100644 index 0000000000000000000000000000000000000000..612e7b2112ff441571330bbfc3f63b5a346270b0 GIT binary patch literal 1616 zcmcgs&o2W(6n^a@_2W=C36j0Iv2h?AgtoLuiUS8ulci-H+D3OJBof!2o%|V&690sY zgR_f+D@XO-?0oE2S0WNG>3lQa``*m!n>Tw|ESJ)$6u~T7Bw5`WDrX$o9w>-%G*1?f z{nV$m5!aW<%&-rT$kra&zK?(4Z&Nm}WNi!3jUK-cacSm%6PGdjSd4Ne&TSQJnPy}_ z{w=>XSSUWc+&x`gJf7^1u`AIeF3XX~AHrVE_mY>kL1`RG;D-`;I&3sZRV!}bl2ZG= zul&Gm229l}vf=A{yCL{21Dl*Hb}fmqNsYwxzhoTHE243Og9at9e=)~DuLbTtIeyR# z(ZlShe(HFx?~zkEud^(~ph-^EQ-@8rR@IDKJ3ja^Rf}&I^@s{BN`KZN&Rd`Liu2#^ zRs0rL8T|>plsnahmwF4|*#|cB*)IH~=D(jiouHpf;7P6QZRR)Dmr1Rbw+i{P+A5W{ zi#uv3Unmz=z&6@g9Kaf3pqWQQT&EDIDVQzJYNye?U5jC0@(hlN`j*FNnu?4SvLDLpcpI t!e=NAo=IpeO7grj|7h+}>5Yj;9pzm7M2GtI^Ss;n_l8^$al|^${{W#QYM}rC literal 0 HcmV?d00001 diff --git a/test/tools/llvm-readobj/Inputs/mips-rld-map-rel.elf-mipsel b/test/tools/llvm-readobj/Inputs/mips-rld-map-rel.elf-mipsel new file mode 100755 index 0000000000000000000000000000000000000000..a8e396c2ec36e25dd16cac0458cb291982ccc806 GIT binary patch literal 2484 zcmb7FON$g&6h3W@Nyj|w84U#Tum=Ulz}#tdRLG08^EN#Xd=M0|rh2-jr=TB2bqyIw zAPs@60&yu>xy&xVK*_=%ke@IM7g;JOxDYzuSGP`A(MCxQe084p+}Am`eq5+6777JX za-=BKHXM;z@JDZm94tvVD`j}5WI_%LOo@+vA{l@|2O*9D7%H>S74JeP=woj9!(|AH z4`3tcF9M)&6MjYBuQ_a({q~WN;QX9{b68seypZH(4z8R21NiPCNbx&#g1L#;5&bi* zpgz-Tm|oYM&SJ|4aEx{dnUVSSb7~^?e<~#Nl(7&Fcmd9}-2^`bz6`t#lz?+Mi+v=1 zA2Nw&dk1^M@ zo9CD}Y5W&rU6EL4$H93&q!MPJWHqk$$>1aC8ec*@_A-ngM?4Sz?r{7e`E*2|8_~ZU z(Z!v=zE)jbnR9C^Yu~J_Es5)1uFbov)eUjCV}CCOYk09Iu5X!J@AceX>_zlIhNII! z){LGH?w=f!2Y*aHzUNHh0w@3e(}DhLQSP5C%6_{0EdCWze`#*+v+~r^+O<>V^QK}d zlDYQXY}=p!C;{IcvqhQ$ABsepxN1#0&itZuL*KS z>{XJJ-T-^k>h^{=hq~PYd&l~D^CGNel9N^eYuW0yVp!u=x0Rvy$~quv9XLs)*&4U; ztiFt#q%-pRJDCE>NnV=FzLl`u8Jxe?*sd5cZ~{R%5#D zh0~GWGF!va7PlFSyHs1hSgpD1i;GtluDVyN7i$adu(n@r0UE*sFubk>xuo1)bQBLNTi=8&`~154KxgvnM9h1|C96$WgK(P z18iQ#vEPUuEf5≧O_iTN1Y0T`!q-9=pT;K4?skN7U-UUxAYT{~^eMfwF$nfg=2oT6968TeiTVD5 Zk>mOHaD7|{|FQY09^$fphD}NH{R8kf?yCR* literal 0 HcmV?d00001 diff --git a/test/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveri b/test/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveri new file mode 100755 index 0000000000000000000000000000000000000000..9566ed5c0f1481484b014ec74f281531f0d2ac3f GIT binary patch literal 13208 zcmeI3&uSA<6vj`+(b}rC8$nQE7P^v5gM<)VjS&Aqg)S7_+0+CQm?R}LVgi!l&V>)) zWB3N`BlrL=d<=Jb&b{ZGj6)kr5$WPw=>5()ch0>hzggYeJBLqpo(8oVp#}7l#A^pW zlIXzKh8iphwP~IE>)c;q|FVnu@x%*{`u0nwhF;b9CDu^vNIGtj>kGZ3E>x#+wdv4_ z@=@!C(w2Wr^4G*vzpD3i0Vu{LysFM`GAB1^IZxP7Lpp9%_0eU&c%cy6xrto9@yOSh2j@`$d00fPJV$1GoEIj)!6B zw!+KE1+r?%@$Y~Z-#1?*hxFx$f?!b#!UfU650B#0(FLm3N-+XPzz7%tBVYuKfDteP zM!*Od0V8lO0^8|uzt!AMd-ZfUem|?v+O2x48E2FDp_@ z5Eb|B2mLgiy!|klroC19XC1@2g?5 zdDrNeEp8}SwRmrSbUCj616H~8f%Q-OjsfRGs2|HTIag|u-{4xKqUu7x4gMoGid+Om rb)Hw{7~9p~-{BZ)FF*Q5^HC1o5$Xs39ZouF{m+_sqdY0w)%E`XtEE!O literal 0 HcmV?d00001 diff --git a/test/tools/llvm-readobj/Inputs/verdef.elf-x86-64 b/test/tools/llvm-readobj/Inputs/verdef.elf-x86-64 new file mode 100755 index 0000000000000000000000000000000000000000..4b907694e800a622f28357c2cbe4a4129316febf GIT binary patch literal 2256 zcmcIl%Wl(95FIBah4LzeMHe8o5(^e!DToCNgo2<&l}bRMA|W9x2Zy>+6QkHbvVmVf ztoj?U%90P@16XFq7WSYp$DZT3ZlbCZOy$g-$GLOI_l)~tZF9ZiI48uUD!0X}4hTN1njhvCwQd9SwJ`D?(^~n6x$IV8K!j|Tps9_~rQ_Esz1BaUQ50@;@^e8ih znmBBC6dccK+Z|BL>RFUus(wHuiXvwC-4(rwxdWquNB8$ocO&&Ua4Kx{qPP@Z(bkVaZ$(- zyjR>K@i!qn`Zwu`?=YF%+C6>P5z_8`7o5w_nTXK zN4vpF-L2OTsfl)-%DiwcMwXBMr||~#o#w|~P2>0K41dPsgM7U@BOmuLjo;$kd)B=3 zVB_wDsyTTN=4iSpvHFzLLBRJo-oT}ofYCaPt^7q&+*KoUROM3jW*{m z*C?)nIQnE=_#ikJ9)2JA{iH@tQOfu&4FbzK=Nu6~GBD4_{@GVw&1 | FileCheck --check-prefix=ENOENT %s +ENOENT: {{.*}}.blah: {{[Nn]}}o such file or directory diff --git a/test/tools/llvm-readobj/codeview-linetables.test b/test/tools/llvm-readobj/codeview-linetables.test index d124e6e2d454..880b6d52f095 100644 --- a/test/tools/llvm-readobj/codeview-linetables.test +++ b/test/tools/llvm-readobj/codeview-linetables.test @@ -50,7 +50,7 @@ MFUN32: ] MFUN32-NEXT: Subsection [ MFUN32-NEXT: Type: 0xF2 MFUN32-NEXT: PayloadSize: 0x30 -MFUN32: FunctionName: _x +MFUN32: LinkageName: _x MFUN32-NEXT: ] MFUN32-NEXT: Subsection [ MFUN32-NEXT: Type: 0xF5 @@ -69,7 +69,7 @@ MFUN32: ] MFUN32-NEXT: Subsection [ MFUN32-NEXT: Type: 0xF2 MFUN32-NEXT: PayloadSize: 0x30 -MFUN32: FunctionName: _y +MFUN32: LinkageName: _y MFUN32-NEXT: ] MFUN32-NEXT: Subsection [ MFUN32-NEXT: Type: 0xF5 @@ -88,7 +88,7 @@ MFUN32: ] MFUN32-NEXT: Subsection [ MFUN32-NEXT: Type: 0xF2 MFUN32-NEXT: PayloadSize: 0x40 -MFUN32: FunctionName: _f +MFUN32: LinkageName: _f MFUN32-NEXT: ] MFUN32-NEXT: Subsection [ MFUN32-NEXT: Type: 0xF4 @@ -103,7 +103,7 @@ MFUN32-NEXT: Type: 0xF1 MFUN32-NEXT: PayloadSize: 0x8 MFUN32: ] MFUN32-NEXT: FunctionLineTable [ -MFUN32-NEXT: FunctionName: _x +MFUN32-NEXT: LinkageName: _x MFUN32-NEXT: Flags: 0x0 MFUN32-NEXT: CodeSize: 0xA MFUN32-NEXT: FilenameSegment [ @@ -114,7 +114,7 @@ MFUN32-NEXT: +0x8: 5 MFUN32-NEXT: ] MFUN32-NEXT: ] MFUN32-NEXT: FunctionLineTable [ -MFUN32-NEXT: FunctionName: _y +MFUN32-NEXT: LinkageName: _y MFUN32-NEXT: Flags: 0x0 MFUN32-NEXT: CodeSize: 0xA MFUN32-NEXT: FilenameSegment [ @@ -125,7 +125,7 @@ MFUN32-NEXT: +0x8: 9 MFUN32-NEXT: ] MFUN32-NEXT: ] MFUN32-NEXT: FunctionLineTable [ -MFUN32-NEXT: FunctionName: _f +MFUN32-NEXT: LinkageName: _f MFUN32-NEXT: Flags: 0x0 MFUN32-NEXT: CodeSize: 0x14 MFUN32-NEXT: FilenameSegment [ @@ -158,7 +158,7 @@ MFUN64: ] MFUN64-NEXT: Subsection [ MFUN64-NEXT: Type: 0xF2 MFUN64-NEXT: PayloadSize: 0x30 -MFUN64: FunctionName: x +MFUN64: LinkageName: x MFUN64-NEXT: ] MFUN64-NEXT: Subsection [ MFUN64-NEXT: Type: 0xF1 @@ -173,7 +173,7 @@ MFUN64: ] MFUN64-NEXT: Subsection [ MFUN64-NEXT: Type: 0xF2 MFUN64-NEXT: PayloadSize: 0x30 -MFUN64: FunctionName: y +MFUN64: LinkageName: y MFUN64-NEXT: ] MFUN64-NEXT: Subsection [ MFUN64-NEXT: Type: 0xF1 @@ -188,7 +188,7 @@ MFUN64: ] MFUN64-NEXT: Subsection [ MFUN64-NEXT: Type: 0xF2 MFUN64-NEXT: PayloadSize: 0x40 -MFUN64: FunctionName: f +MFUN64: LinkageName: f MFUN64-NEXT: ] MFUN64-NEXT: Subsection [ MFUN64-NEXT: Type: 0xF4 @@ -203,7 +203,7 @@ MFUN64-NEXT: Type: 0xF1 MFUN64-NEXT: PayloadSize: 0x8 MFUN64: ] MFUN64-NEXT: FunctionLineTable [ -MFUN64-NEXT: FunctionName: x +MFUN64-NEXT: LinkageName: x MFUN64-NEXT: Flags: 0x0 MFUN64-NEXT: CodeSize: 0xE MFUN64-NEXT: FilenameSegment [ @@ -214,7 +214,7 @@ MFUN64-NEXT: +0x9: 5 MFUN64-NEXT: ] MFUN64-NEXT: ] MFUN64-NEXT: FunctionLineTable [ -MFUN64-NEXT: FunctionName: y +MFUN64-NEXT: LinkageName: y MFUN64-NEXT: Flags: 0x0 MFUN64-NEXT: CodeSize: 0xE MFUN64-NEXT: FilenameSegment [ @@ -225,7 +225,7 @@ MFUN64-NEXT: +0x9: 9 MFUN64-NEXT: ] MFUN64-NEXT: ] MFUN64-NEXT: FunctionLineTable [ -MFUN64-NEXT: FunctionName: f +MFUN64-NEXT: LinkageName: f MFUN64-NEXT: Flags: 0x0 MFUN64-NEXT: CodeSize: 0x18 MFUN64-NEXT: FilenameSegment [ @@ -286,7 +286,7 @@ MFILE32: ] MFILE32-NEXT: Subsection [ MFILE32-NEXT: Type: 0xF2 MFILE32-NEXT: PayloadSize: 0x64 -MFILE32: FunctionName: _f +MFILE32: LinkageName: _f MFILE32-NEXT: ] MFILE32-NEXT: Subsection [ MFILE32-NEXT: Type: 0xF4 @@ -301,7 +301,7 @@ MFILE32-NEXT: Type: 0xF1 MFILE32-NEXT: PayloadSize: 0x8 MFILE32: ] MFILE32-NEXT: FunctionLineTable [ -MFILE32-NEXT: FunctionName: _f +MFILE32-NEXT: LinkageName: _f MFILE32-NEXT: Flags: 0x0 MFILE32-NEXT: CodeSize: 0x14 MFILE32-NEXT: FilenameSegment [ @@ -343,7 +343,7 @@ MFILE64: ] MFILE64-NEXT: Subsection [ MFILE64-NEXT: Type: 0xF2 MFILE64-NEXT: PayloadSize: 0x64 -MFILE64: FunctionName: f +MFILE64: LinkageName: f MFILE64-NEXT: ] MFILE64-NEXT: Subsection [ MFILE64-NEXT: Type: 0xF4 @@ -358,7 +358,7 @@ MFILE64-NEXT: Type: 0xF1 MFILE64-NEXT: PayloadSize: 0x8 MFILE64: ] MFILE64-NEXT: FunctionLineTable [ -MFILE64-NEXT: FunctionName: f +MFILE64-NEXT: LinkageName: f MFILE64-NEXT: Flags: 0x0 MFILE64-NEXT: CodeSize: 0x18 MFILE64-NEXT: FilenameSegment [ @@ -406,7 +406,7 @@ MCOMDAT-NEXT: Section: ?f@@YAHXZ MCOMDAT-NEXT: CodeSize: 0x7 MCOMDAT-NEXT: } MCOMDAT: FunctionLineTable [ -MCOMDAT-NEXT: FunctionName: ?f@@YAHXZ +MCOMDAT-NEXT: LinkageName: ?f@@YAHXZ MCOMDAT-NEXT: Flags: 0x0 MCOMDAT-NEXT: CodeSize: 0x7 MCOMDAT-NEXT: FilenameSegment [ @@ -422,7 +422,7 @@ MCOMDAT-NEXT: Section: ?g@@YAHXZ MCOMDAT-NEXT: CodeSize: 0x7 MCOMDAT-NEXT: } MCOMDAT: FunctionLineTable [ -MCOMDAT-NEXT: FunctionName: ?g@@YAHXZ +MCOMDAT-NEXT: LinkageName: ?g@@YAHXZ MCOMDAT-NEXT: Flags: 0x0 MCOMDAT-NEXT: CodeSize: 0x7 MCOMDAT-NEXT: FilenameSegment [ diff --git a/test/tools/llvm-readobj/elf-gnuhash.test b/test/tools/llvm-readobj/elf-gnuhash.test new file mode 100644 index 000000000000..8642a4dc9d7b --- /dev/null +++ b/test/tools/llvm-readobj/elf-gnuhash.test @@ -0,0 +1,63 @@ +// Check dumping of the GNU Hash section +// The input was generated using the following: +// $ llvm-mc -filetype=obj -triple=i386-pc-linux -o example-i386.o example.s +// $ llvm-mc -filetype=obj -triple=x86_64-pc-linux -o example-x86_64.o example.s +// $ llvm-mc -filetype=obj -triple=powerpc-pc-linux -o example-ppc.o example.s +// $ llvm-mc -filetype=obj -triple=powerpc64-pc-linux -o example-ppc64.o example.s +// $ ld -shared -m elf_i386 -hash-style=gnu -o gnuhash.so.elf-i386 example-i386.o +// $ ld -shared -m elf_x86_64 -hash-style=gnu -o gnuhash.so.elf-x86_64 example-x86_64.o +// $ ld -shared -m elf32ppc -hash-style=gnu -o gnuhash.so.elf-ppc example-ppc.o +// $ ld -shared -m elf64ppc -hash-style=gnu -o gnuhash.so.elf-ppc64 example-ppc64.o +// $ cat example.s +// .globl foo +// foo: + +RUN: llvm-readobj -gnu-hash-table %p/Inputs/gnuhash.so.elf-i386 | FileCheck %s -check-prefix I386 +RUN: llvm-readobj -gnu-hash-table %p/Inputs/gnuhash.so.elf-x86_64 | FileCheck %s -check-prefix X86_64 +RUN: llvm-readobj -gnu-hash-table %p/Inputs/gnuhash.so.elf-ppc | FileCheck %s -check-prefix PPC +RUN: llvm-readobj -gnu-hash-table %p/Inputs/gnuhash.so.elf-ppc64 | FileCheck %s -check-prefix PPC64 + +I386: Arch: i386 +I386: GnuHashTable { +I386-NEXT: Num Buckets: 3 +I386-NEXT: First Hashed Symbol Index: 1 +I386-NEXT: Num Mask Words: 1 +I386-NEXT: Shift Count: 5 +I386-NEXT: Bloom Filter: [0x39004608] +I386-NEXT: Buckets: [1, 4, 0] +I386-NEXT: Values: [0xB887388, 0xECD54542, 0x7C92E3BB, 0x1C5871D9] +I386-NEXT: } + +X86_64: Arch: x86_64 +X86_64: GnuHashTable { +X86_64-NEXT: Num Buckets: 3 +X86_64-NEXT: First Hashed Symbol Index: 1 +X86_64-NEXT: Num Mask Words: 1 +X86_64-NEXT: Shift Count: 6 +X86_64-NEXT: Bloom Filter: [0x800000001204288] +X86_64-NEXT: Buckets: [1, 4, 0] +X86_64-NEXT: Values: [0xB887388, 0xECD54542, 0x7C92E3BB, 0x1C5871D9] +X86_64-NEXT: } + +PPC: Arch: powerpc +PPC: GnuHashTable { +PPC-NEXT: Num Buckets: 3 +PPC-NEXT: First Hashed Symbol Index: 1 +PPC-NEXT: Num Mask Words: 1 +PPC-NEXT: Shift Count: 5 +PPC-NEXT: Bloom Filter: [0x3D00460A] +PPC-NEXT: Buckets: [1, 5, 0] +PPC-NEXT: Values: [0xEEBEC3A, 0xB887388, 0xECD54542, 0x7C92E3BB, 0x1C5871D9] +PPC-NEXT: } + +PPC64: Arch: powerpc64 +PPC64: GnuHashTable { +PPC64-NEXT: Num Buckets: 3 +PPC64-NEXT: First Hashed Symbol Index: 1 +PPC64-NEXT: Num Mask Words: 1 +PPC64-NEXT: Shift Count: 6 +PPC64-NEXT: Bloom Filter: [0x800000001204288] +PPC64-NEXT: Buckets: [1, 4, 0] +PPC64-NEXT: Values: [0xB887388, 0xECD54542, 0x7C92E3BB, 0x1C5871D9] +PPC64-NEXT: } + diff --git a/test/tools/llvm-readobj/elf-versioninfo.test b/test/tools/llvm-readobj/elf-versioninfo.test new file mode 100644 index 000000000000..e8113e4b2fed --- /dev/null +++ b/test/tools/llvm-readobj/elf-versioninfo.test @@ -0,0 +1,81 @@ +// Test that llvm-readobj dumps version info tags correctly. + +RUN: llvm-readobj -dynamic-table -V %p/Inputs/verdef.elf-x86-64 | FileCheck %s + +CHECK: 0x000000006FFFFFF0 VERSYM 0x24C +CHECK: 0x000000006FFFFFFC VERDEF 0x25C +CHECK: 0x000000006FFFFFFD VERDEFNUM 3 + +CHECK: Version symbols { +CHECK-NEXT: Section Name: .gnu.version (20) +CHECK-NEXT: Address: 0x24C +CHECK-NEXT: Offset: 0x24C +CHECK-NEXT: Link: 1 +CHECK-NEXT: Symbols [ +CHECK-NEXT: Symbol { +CHECK-NEXT: Version: 0 +CHECK-NEXT: Name: @ +CHECK-NEXT: } +CHECK-NEXT: Symbol { +CHECK-NEXT: Version: 1 +CHECK-NEXT: Name: _end@ +CHECK-NEXT: } +CHECK-NEXT: Symbol { +CHECK-NEXT: Version: 1 +CHECK-NEXT: Name: _edata@ +CHECK-NEXT: } +CHECK-NEXT: Symbol { +CHECK-NEXT: Version: 3 +CHECK-NEXT: Name: goo@@VERSION2 +CHECK-NEXT: } +CHECK-NEXT: Symbol { +CHECK-NEXT: Version: 1 +CHECK-NEXT: Name: __bss_start@ +CHECK-NEXT: } +CHECK-NEXT: Symbol { +CHECK-NEXT: Version: 2 +CHECK-NEXT: Name: foo@@VERSION1 +CHECK-NEXT: } +CHECK-NEXT: Symbol { +CHECK-NEXT: Version: 2 +CHECK-NEXT: Name: VERSION1@@VERSION1 +CHECK-NEXT: } +CHECK-NEXT: Symbol { +CHECK-NEXT: Version: 3 +CHECK-NEXT: Name: VERSION2@@VERSION2 +CHECK-NEXT: } +CHECK-NEXT: ] +CHECK-NEXT: } + +CHECK: Version definition { +CHECK-NEXT: Section Name: .gnu.version_d (70) +CHECK-NEXT: Address: 0x25C +CHECK-NEXT: Offset: 0x25C +CHECK-NEXT: Link: 2 +CHECK-NEXT: Entries [ +CHECK-NEXT: Entry { +CHECK-NEXT: Offset: 0x0 +CHECK-NEXT: Rev: 1 +CHECK-NEXT: Flags: 1 +CHECK-NEXT: Index: 1 +CHECK-NEXT: Cnt: 1 +CHECK-NEXT: Name: blah +CHECK-NEXT: } +CHECK-NEXT: Entry { +CHECK-NEXT: Offset: 0x1C +CHECK-NEXT: Rev: 1 +CHECK-NEXT: Flags: 0 +CHECK-NEXT: Index: 2 +CHECK-NEXT: Cnt: 1 +CHECK-NEXT: Name: VERSION1 +CHECK-NEXT: } +CHECK-NEXT: Entry { +CHECK-NEXT: Offset: 0x38 +CHECK-NEXT: Rev: 1 +CHECK-NEXT: Flags: 0 +CHECK-NEXT: Index: 3 +CHECK-NEXT: Cnt: 2 +CHECK-NEXT: Name: VERSION2 +CHECK-NEXT: } +CHECK-NEXT: ] +CHECK-NEXT: } diff --git a/test/tools/llvm-readobj/file-headers.test b/test/tools/llvm-readobj/file-headers.test index fd030ef0b56e..2d67089d6118 100644 --- a/test/tools/llvm-readobj/file-headers.test +++ b/test/tools/llvm-readobj/file-headers.test @@ -330,16 +330,8 @@ COFF-UNKNOWN-NEXT: Characteristics [ (0x0) COFF-UNKNOWN-NEXT: ] COFF-UNKNOWN-NEXT: } -COFF-IMPORTLIB: Format: COFF- -COFF-IMPORTLIB-NEXT: Arch: unknown -COFF-IMPORTLIB-NEXT: AddressSize: 32bit -COFF-IMPORTLIB-NEXT: ImageFileHeader { -COFF-IMPORTLIB-NEXT: Machine: IMAGE_FILE_MACHINE_UNKNOWN (0x0) -COFF-IMPORTLIB-NEXT: SectionCount: 0 -COFF-IMPORTLIB-NEXT: TimeDateStamp: 1970-09-09 19:52:32 (0x14C0000) -COFF-IMPORTLIB-NEXT: PointerToSymbolTable: 0x0 -COFF-IMPORTLIB-NEXT: SymbolCount: 0 -COFF-IMPORTLIB-NEXT: OptionalHeaderSize: 0 -COFF-IMPORTLIB-NEXT: Characteristics [ (0x0) -COFF-IMPORTLIB-NEXT: ] -COFF-IMPORTLIB-NEXT: } +COFF-IMPORTLIB: Format: COFF-import-file +COFF-IMPORTLIB-NEXT: Type: code +COFF-IMPORTLIB-NEXT: Name type: noprefix +COFF-IMPORTLIB-NEXT: Symbol: __imp__func +COFF-IMPORTLIB-NEXT: Symbol: _func diff --git a/test/tools/llvm-readobj/mips-rld-map-rel.test b/test/tools/llvm-readobj/mips-rld-map-rel.test new file mode 100644 index 000000000000..adde78784d63 --- /dev/null +++ b/test/tools/llvm-readobj/mips-rld-map-rel.test @@ -0,0 +1,24 @@ +# Check DT_MIPS_RLD_MAP_REL .dynamic section tag reading + +RUN: llvm-readobj -dynamic-table %p/Inputs/mips-rld-map-rel.elf-mipsel | \ +RUN: FileCheck %s + +CHECK: DynamicSection [ (16 entries) +CHECK-NEXT: Tag Type Name/Value +CHECK-NEXT: 0x00000004 HASH 0x220 +CHECK-NEXT: 0x00000005 STRTAB 0x2FC +CHECK-NEXT: 0x00000006 SYMTAB 0x25C +CHECK-NEXT: 0x0000000A STRSZ 72 (bytes) +CHECK-NEXT: 0x0000000B SYMENT 16 (bytes) +CHECK-NEXT: 0x70000035 MIPS_RLD_MAP_REL 0x101E0 +CHECK-NEXT: 0x00000015 DEBUG 0x0 +CHECK-NEXT: 0x00000003 PLTGOT 0x10390 +CHECK-NEXT: 0x70000001 MIPS_RLD_VERSION 1 +CHECK-NEXT: 0x70000005 MIPS_FLAGS NOTPOT +CHECK-NEXT: 0x70000006 MIPS_BASE_ADDRESS 0x0 +CHECK-NEXT: 0x7000000A MIPS_LOCAL_GOTNO 2 +CHECK-NEXT: 0x70000011 MIPS_SYMTABNO 10 +CHECK-NEXT: 0x70000012 MIPS_UNREFEXTNO 15 +CHECK-NEXT: 0x70000013 MIPS_GOTSYM 0xA +CHECK-NEXT: 0x00000000 NULL 0x0 +CHECK-NEXT: ] diff --git a/test/tools/llvm-readobj/sections-ext.test b/test/tools/llvm-readobj/sections-ext.test index 19b7aa0516d1..70ae0f22a0e4 100644 --- a/test/tools/llvm-readobj/sections-ext.test +++ b/test/tools/llvm-readobj/sections-ext.test @@ -223,6 +223,7 @@ MACHO-X86-64-NEXT: SomeInstructions (0x4) MACHO-X86-64-NEXT: ] MACHO-X86-64-NEXT: Reserved1: 0x0 MACHO-X86-64-NEXT: Reserved2: 0x0 +MACHO-X86-64-NEXT: Reserved3: 0x0 MACHO-X86-64-NEXT: Relocations [ MACHO-X86-64-NEXT: 0xE 1 2 1 X86_64_RELOC_BRANCH 0 _SomeOtherFunction MACHO-X86-64-NEXT: 0x9 1 2 1 X86_64_RELOC_BRANCH 0 _puts @@ -260,6 +261,7 @@ MACHO-X86-64-NEXT: Attributes [ (0x0) MACHO-X86-64-NEXT: ] MACHO-X86-64-NEXT: Reserved1: 0x0 MACHO-X86-64-NEXT: Reserved2: 0x0 +MACHO-X86-64-NEXT: Reserved3: 0x0 MACHO-X86-64-NEXT: Relocations [ MACHO-X86-64-NEXT: ] MACHO-X86-64-NEXT: Symbols [ @@ -514,6 +516,7 @@ MACHO-PPC64-NEXT: SomeInstructions (0x4) MACHO-PPC64-NEXT: ] MACHO-PPC64-NEXT: Reserved1: 0x0 MACHO-PPC64-NEXT: Reserved2: 0x0 +MACHO-PPC64-NEXT: Reserved3: 0x0 MACHO-PPC64-NEXT: Relocations [ MACHO-PPC64-NEXT: Relocation { MACHO-PPC64-NEXT: Offset: 0x24 @@ -587,6 +590,7 @@ MACHO-PPC64-NEXT: SomeInstructions (0x4) MACHO-PPC64-NEXT: ] MACHO-PPC64-NEXT: Reserved1: 0x0 MACHO-PPC64-NEXT: Reserved2: 0x20 +MACHO-PPC64-NEXT: Reserved3: 0x0 MACHO-PPC64-NEXT: Relocations [ MACHO-PPC64-NEXT: Relocation { MACHO-PPC64-NEXT: Offset: 0x14 @@ -639,6 +643,7 @@ MACHO-PPC64-NEXT: Attributes [ (0x0) MACHO-PPC64-NEXT: ] MACHO-PPC64-NEXT: Reserved1: 0x0 MACHO-PPC64-NEXT: Reserved2: 0x0 +MACHO-PPC64-NEXT: Reserved3: 0x0 MACHO-PPC64-NEXT: Relocations [ MACHO-PPC64-NEXT: ] MACHO-PPC64-NEXT: Symbols [ @@ -672,6 +677,7 @@ MACHO-PPC64-NEXT: Attributes [ (0x0) MACHO-PPC64-NEXT: ] MACHO-PPC64-NEXT: Reserved1: 0x1 MACHO-PPC64-NEXT: Reserved2: 0x0 +MACHO-PPC64-NEXT: Reserved3: 0x0 MACHO-PPC64-NEXT: Relocations [ MACHO-PPC64-NEXT: ] MACHO-PPC64-NEXT: Symbols [ @@ -695,6 +701,7 @@ MACHO-PPC64-NEXT: Attributes [ (0x0) MACHO-PPC64-NEXT: ] MACHO-PPC64-NEXT: Reserved1: 0x2 MACHO-PPC64-NEXT: Reserved2: 0x0 +MACHO-PPC64-NEXT: Reserved3: 0x0 MACHO-PPC64-NEXT: Relocations [ MACHO-PPC64-NEXT: Relocation { MACHO-PPC64-NEXT: Offset: 0x0 diff --git a/test/tools/llvm-readobj/sections.test b/test/tools/llvm-readobj/sections.test index fe734d77e34b..54654e7070ef 100644 --- a/test/tools/llvm-readobj/sections.test +++ b/test/tools/llvm-readobj/sections.test @@ -172,6 +172,7 @@ MACHO-X86-64-NEXT: SomeInstructions (0x4) MACHO-X86-64-NEXT: ] MACHO-X86-64-NEXT: Reserved1: 0x0 MACHO-X86-64-NEXT: Reserved2: 0x0 +MACHO-X86-64-NEXT: Reserved3: 0x0 MACHO-X86-64-NEXT: } MACHO-X86-64-NEXT: Section { MACHO-X86-64-NEXT: Index: 1 @@ -188,6 +189,7 @@ MACHO-X86-64-NEXT: Attributes [ (0x0) MACHO-X86-64-NEXT: ] MACHO-X86-64-NEXT: Reserved1: 0x0 MACHO-X86-64-NEXT: Reserved2: 0x0 +MACHO-X86-64-NEXT: Reserved3: 0x0 MACHO-X86-64-NEXT: } MACHO-X86-64-NEXT:] @@ -296,6 +298,7 @@ MACHO-PPC64-NEXT: SomeInstructions (0x4) MACHO-PPC64-NEXT: ] MACHO-PPC64-NEXT: Reserved1: 0x0 MACHO-PPC64-NEXT: Reserved2: 0x0 +MACHO-PPC64-NEXT: Reserved3: 0x0 MACHO-PPC64-NEXT: } MACHO-PPC64-NEXT: Section { MACHO-PPC64-NEXT: Index: 1 @@ -314,6 +317,7 @@ MACHO-PPC64-NEXT: SomeInstructions (0x4) MACHO-PPC64-NEXT: ] MACHO-PPC64-NEXT: Reserved1: 0x0 MACHO-PPC64-NEXT: Reserved2: 0x20 +MACHO-PPC64-NEXT: Reserved3: 0x0 MACHO-PPC64-NEXT: } MACHO-PPC64-NEXT: Section { MACHO-PPC64-NEXT: Index: 2 @@ -330,6 +334,7 @@ MACHO-PPC64-NEXT: Attributes [ (0x0) MACHO-PPC64-NEXT: ] MACHO-PPC64-NEXT: Reserved1: 0x0 MACHO-PPC64-NEXT: Reserved2: 0x0 +MACHO-PPC64-NEXT: Reserved3: 0x0 MACHO-PPC64-NEXT: } MACHO-PPC64-NEXT: Section { MACHO-PPC64-NEXT: Index: 3 @@ -346,6 +351,7 @@ MACHO-PPC64-NEXT: Attributes [ (0x0) MACHO-PPC64-NEXT: ] MACHO-PPC64-NEXT: Reserved1: 0x1 MACHO-PPC64-NEXT: Reserved2: 0x0 +MACHO-PPC64-NEXT: Reserved3: 0x0 MACHO-PPC64-NEXT: } MACHO-PPC64-NEXT: Section { MACHO-PPC64-NEXT: Index: 4 @@ -362,6 +368,7 @@ MACHO-PPC64-NEXT: Attributes [ (0x0) MACHO-PPC64-NEXT: ] MACHO-PPC64-NEXT: Reserved1: 0x2 MACHO-PPC64-NEXT: Reserved2: 0x0 +MACHO-PPC64-NEXT: Reserved3: 0x0 MACHO-PPC64-NEXT: } MACHO-PPC64-NEXT: ] diff --git a/test/tools/llvm-size/basic.test b/test/tools/llvm-size/basic.test new file mode 100644 index 000000000000..8b2d66eef3ce --- /dev/null +++ b/test/tools/llvm-size/basic.test @@ -0,0 +1,2 @@ +RUN: llvm-size %t.blah 2>&1 | FileCheck --check-prefix=ENOENT %s +ENOENT: {{.*}}llvm-size{{(\.EXE|\.exe)?}}: {{.*}}.blah: {{[Nn]}}o such file or directory diff --git a/test/tools/llvm-split/alias.ll b/test/tools/llvm-split/alias.ll new file mode 100644 index 000000000000..18e0e7f12d69 --- /dev/null +++ b/test/tools/llvm-split/alias.ll @@ -0,0 +1,19 @@ +; RUN: llvm-split -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s + +; CHECK0-DAG: @afoo = alias [2 x i8*], [2 x i8*]* @foo +; CHECK1-DAG: @afoo = external global [2 x i8*] +@afoo = alias [2 x i8*], [2 x i8*]* @foo + +; CHECK0-DAG: declare void @abar() +; CHECK1-DAG: @abar = alias void (), void ()* @bar +@abar = alias void (), void ()* @bar + +@foo = global [2 x i8*] [i8* bitcast (void ()* @bar to i8*), i8* bitcast (void ()* @abar to i8*)] + +define void @bar() { + store [2 x i8*] zeroinitializer, [2 x i8*]* @foo + store [2 x i8*] zeroinitializer, [2 x i8*]* @afoo + ret void +} diff --git a/test/tools/llvm-split/comdat.ll b/test/tools/llvm-split/comdat.ll new file mode 100644 index 000000000000..45faf4bfe26e --- /dev/null +++ b/test/tools/llvm-split/comdat.ll @@ -0,0 +1,19 @@ +; RUN: llvm-split -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s + +$foo = comdat any + +; CHECK0: define void @foo() +; CHECK1: declare void @foo() +define void @foo() comdat { + call void @bar() + ret void +} + +; CHECK0: define void @bar() +; CHECK1: declare void @bar() +define void @bar() comdat($foo) { + call void @foo() + ret void +} diff --git a/test/tools/llvm-split/function.ll b/test/tools/llvm-split/function.ll new file mode 100644 index 000000000000..37272dbbcee2 --- /dev/null +++ b/test/tools/llvm-split/function.ll @@ -0,0 +1,17 @@ +; RUN: llvm-split -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s + +; CHECK0: define void @foo() +; CHECK1: declare void @foo() +define void @foo() { + call void @bar() + ret void +} + +; CHECK0: declare void @bar() +; CHECK1: define void @bar() +define void @bar() { + call void @foo() + ret void +} diff --git a/test/tools/llvm-split/global.ll b/test/tools/llvm-split/global.ll new file mode 100644 index 000000000000..6d2425691e10 --- /dev/null +++ b/test/tools/llvm-split/global.ll @@ -0,0 +1,11 @@ +; RUN: llvm-split -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s + +; CHECK0: @foo = global i8* bitcast +; CHECK1: @foo = external global i8* +@foo = global i8* bitcast (i8** @bar to i8*) + +; CHECK0: @bar = external global i8* +; CHECK1: @bar = global i8* bitcast +@bar = global i8* bitcast (i8** @foo to i8*) diff --git a/test/tools/llvm-split/internal.ll b/test/tools/llvm-split/internal.ll new file mode 100644 index 000000000000..ce4272c5f0dd --- /dev/null +++ b/test/tools/llvm-split/internal.ll @@ -0,0 +1,17 @@ +; RUN: llvm-split -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s + +; CHECK0: define hidden void @foo() +; CHECK1: declare hidden void @foo() +define internal void @foo() { + call void @bar() + ret void +} + +; CHECK0: declare void @bar() +; CHECK1: define void @bar() +define void @bar() { + call void @foo() + ret void +} diff --git a/test/tools/llvm-split/unnamed.ll b/test/tools/llvm-split/unnamed.ll new file mode 100644 index 000000000000..fd24b4ca92bb --- /dev/null +++ b/test/tools/llvm-split/unnamed.ll @@ -0,0 +1,31 @@ +; RUN: llvm-split -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s + +; CHECK0: declare hidden void @__llvmsplit_unnamed() +; CHECK1: define hidden void @__llvmsplit_unnamed() +define internal void @0() { + ; CHECK1: call void @foo() + call void @foo() + ret void +} + +; CHECK0: declare hidden void @__llvmsplit_unnamed.1() +; CHECK1: define hidden void @__llvmsplit_unnamed.1() +define internal void @1() { + ; CHECK1: call void @foo() + ; CHECK1: call void @foo() + call void @foo() + call void @foo() + ret void +} + +; CHECK0: define void @foo() +; CHECK1: declare void @foo() +define void @foo() { + ; CHECK0: call void @__llvmsplit_unnamed.1() + ; CHECK0: call void @__llvmsplit_unnamed() + call void @1() + call void @0() + ret void +} diff --git a/test/tools/llvm-symbolizer/Inputs/addr.exe b/test/tools/llvm-symbolizer/Inputs/addr.exe new file mode 100755 index 0000000000000000000000000000000000000000..38d88b65741e9bc98b5b692fc7fe5f649839a2d5 GIT binary patch literal 10109 zcmeHNU2I&%6`p&qf3dOGb|7{V@B(W{0N>b&u?&>tI*yawXcJ6u+JXqzYwyP1cD-xu z-c4-Ek1IpPDuL#ql~5%_UQp8q+CEfOgeq{FinNr6(3eInm0SeW;YW2vN-ZjIzcY7^ zefN4@T8X|?8ENOt`R06Q&di;inLE2r^hfvTnkG2uVuv8Dqs)?!n+q|wPO1v)5!J#H zH;bD@1(0%lELlU+EwNlwu31(oI|H%|pB~)>dUQ(`SZ;D*!IC2+N$ss9OBG#NR_UrN zg$PQ3coggW)l!8Vk_DC=ho$0S9;w6=bqH6d6s?>_JpMOplN!Eqi`?X5K~%_@GDs=H!qDmmQU zwKWxQPbJg&$@a;f?)L63GnX;9$$kgWJlXC&a7YHi<5&-kVy5XtbAaj_ANyfM-_&~c z#S<^*_O%Y){>p8SzQgl_ByqW26pIq!{3)vV0%Zg08%x;Sum(0@MjX>JLr?QPP@^`#3?J3opP-h4P*cso4v&ceW8TT;B0RHL7cRVeOh0_?3%ZFd3 zs-?O5)@j1dkjD0B7Y*2l3%?AXeSc?IyAXbJF%R;~WEg(xcOZuguetoU6@U7kEeJ>C z8xDav|2P6Y{95_7fV7X#QUAj;_smtnz`1@ogms5rUBqeu`=twqFMSJ?`c`o%2Z@dh zyt)bB&{t@DI%nr!`Fv@~1!&9xyaeD3J+BAPxxN1Q+!0D#oxtZ5@B*Z`??m}`D(pII&rIca^=FM|s6$uMl@Yiy0#`=h z$_QK;f&cRn2;f{sX`#%OQbERNMm~4es)_}}wNtN4qPD`-TU0&2H*HipzfaJ)n$m)L zzvA~duIFv{Io%>N@%PV{G9+hp3;D{sou%66cbYlfLah*2$y|t?O4caZtmHLHvf#7w zeEpDw_>3UWAVB!MQK<;)m7G=WGk#j>+z)QATG_LXyczSmf9kGe!I-=?Om?lJ6w-I z1ReC;2lhHjq$SfM&asR*f}(tvI^bf+g?J4gcY0OH(XJKk630Otl3q+GNuS*)X!#nD|%IdoyC@e}^gVt8pRBqG-)^Du)^Ajx%T4MzwGBu;_u9~}RZfS`$0S#jA z8|A{qB25TC1Z77}4e;`MK}dxOSB({ad7{N?X1kp&;3C{Ep|58QAzi0+Hj(YuYaRf< zx*WBg@F2c|k1ptIt!e+PyBfK?Ey4(EV$vJ|j#RC4Z;TqP+_=g(rd7T1qOQaGznT@~ zaH}&u!Ot0Q&>}{P`?M#~rknWH-4G4>3#`znQUYka;1*Z%)yzOCX5h{vD%n{pXk{>BK*;5{~;96{3=I7DX3R$Pz6xtuUZGUMY3jD;Cb4ChDf ziTp4a2~@axGJPb&<=F6WHgS|S+%_e+i~-|fDudRReK}JT@b#r6+TUpExQ~(UD=la% z_m;fBNe~%NGuCfHfmR0Q59$`Qm3zUU)K)Uy9~q&g%gc9kOIA6`VMuB#nV%m<72TI| z`_JhXG$Eez@v*+h#QPij*$wm)>{-m8)h!^jUrVukKkMhawL;J;NP9H%=X4925OWF` zlA7B`8ar;E_tUQeqdlDYmSI5?!Xglf&jGHsdi)7#pm`oezs%zP4HOiqGd5;?r5p&wmye z#l`;lT(_Y3b*S?ue(sDFQ1J3andMd~LvEk#`5ejedp&h7I&cn_GjeZB{uqfQ~Qy{oP0jDLU%FQ4B7<`sW46P08h z%ga7~ATT5?11dm{Q_)w&^C>FG7x$m8uPFr;k=v0VGXMNp0!Hz7uz`}yf6Rxg;glSz zqN6ZlUeo7Q`=%E}*>Bst2LHE;PX`-s^7BJ$@W0>SvQBu(DEs-_*5Kcu4t>@a@OLP_ zfBa&Ke_#zh#}Qeh{huiQkJsQcJh%q`FrBTD=wR(l+%97@OgwiOBY8~m<355s{#+(2|ay0xm(z-l{K5;ufX z5--Df#uENy;N&x_TYeb*slfT86kdsQMJc>WOqax~1)m3mCmFb<;CdKCsJGI%fBlZx68`jKMpjoM`3|TY zc&YeXfK&W_KW%H+@09*atj{B{RLb+$4>6K;a!wwyB$Y|yYN7;W+wqJ&n#v5vQg+VJGBmk;`%aN@H2xf6lD>UyZ-o>&)|@WDn!Hj6D`h$I+%8xfe~w zlW9AjOQ3@*;Zg??M;f5&ppCmLRhGBU_P(R`LG^U|Qn6ePkJh%)B_2xZD?jU^p32b^ F{{q#g%t8PF literal 0 HcmV?d00001 diff --git a/test/tools/llvm-symbolizer/Inputs/addr.inp b/test/tools/llvm-symbolizer/Inputs/addr.inp new file mode 100644 index 000000000000..4de096479dae --- /dev/null +++ b/test/tools/llvm-symbolizer/Inputs/addr.inp @@ -0,0 +1 @@ +0x40054d diff --git a/test/tools/llvm-symbolizer/Inputs/coff-dwarf.cpp b/test/tools/llvm-symbolizer/Inputs/coff-dwarf.cpp new file mode 100644 index 000000000000..3a832a9fcff2 --- /dev/null +++ b/test/tools/llvm-symbolizer/Inputs/coff-dwarf.cpp @@ -0,0 +1,19 @@ +// To generate the corresponding EXE, run: +// clang-cl -MD -O2 -gdwarf -c coff-dwarf.cpp && lld-link -debug coff-dwarf.obj + +extern "C" int puts(const char *str); + +void __declspec(noinline) foo() { + puts("foo1"); + puts("foo2"); +} + +// LLVM should inline this into main. +static void bar() { + foo(); +} + +int main() { + bar(); + return 0; +} diff --git a/test/tools/llvm-symbolizer/Inputs/coff-dwarf.exe b/test/tools/llvm-symbolizer/Inputs/coff-dwarf.exe new file mode 100644 index 0000000000000000000000000000000000000000..018053b979b9179a38870212ac33289972d96159 GIT binary patch literal 18944 zcmeHP3v^t?d7hQDvi#74V1anpSwJQZ1ff?Ff$iwZvg|7=o5hwv2+`}+>dI`g+THHn zwPnLGHnQ7_MLj+ZaBRv+wmE4a9MYOopN2;uD@?H*3@DVOPH~HPG!Zzo4&i_kAlvW1 zGjnI}O15(jr>Ed*j^_UJfAh^hGyk2rul?=S-E7*VO^dog;`rMb>-+O^UhyTyydK7m zvy0i>0zh++7xJn%*FyU3vIYuc70BQe+mMp%Jpe@J~;v33Zeq!F)yoWWQR9n#3e^C9OXjd+dqHgY)~_aPJSVp3Gni1%gQ@A(90 zkcn4~oRc)-eU(qd?tL7U#JeC~J6PR0%+--R-+Cv4$*#>NPT z97j$Q4*4tOJt$8?`mP5?9LO%@qbQTygsh>w9Wsr)6J?T5AZJjXNq?LN^r2rb)<;Ue zoXKV<_u(Fnat+G*&@+qmx4sSFusacd@*u7kbs6FuApOzRc!=!3%De9$vRSLB;n1-r zJ?MQE3XI)PqHuIa6Fn#<-*}9k3;VCqtCB|=4h=m@cl)wUv#EX7d%y^yTl4&KnR5}?`SY}@)hKpOaL4j;yRn!xpvfqg|}@@kk|kt)5C zEPH?R0?o?%HblpW_g`(Ta?BQ`Pm&m3QSHlC7Qp1NHv?c-X3Lz%=$UIW4wGZzhdwW!fkdPzgJA#+?Wf28y%+E``!lF{U>ACnu76&(A|l=ms1Lnl1RIV1YQ z$uGjtvF4Qb`?mSy{5ASSnixGQ7Lf0VA`Zym?sptnR&Q{Jx& zn+wpYn78`xC()`Ktqyv1)WUyk7rX@L{Drp_fIhVB!{^~n>tb~AE=rR(qg!@kl8%)30UM~?1Cc8ID;y_(@G*JyRuqSp4dTd> zC8@D&m>8HyUN|&ZRg^p@rTa#9O#D9l%b^nmo62|mj%j;6 z)-16$CIeUwv_5FRz){)B4FKfAX~-O~H%dpaWocW(p^nr`9fhMU7x@qz!iD} z$_;Ihqxxf*2om_0Kxq&UB-u!`^0aAaoMDUP*dKTC0xXR9(W)s_!PuU)Y21 zQ>7K)WhMpjg7`bcMW)8sX1k3s6Gj_RgtWtYwN-h-xuXxYk@9Qh^?-r?_PWv8&ppTUS} zeNL`I&-w>PF35avBs)fn{(yZR#X1^U@Z#gBA6YgAajZ;F71FXPz?L04P@*q+HU3V{ zlF38YQlN+x@8B6iatZ{|CNFYsJm!=W8bxcFqs?;cHI zL)c8Gp3k)ub9EWXE?Rg&?oBGYs2{I#V)H;`7u}nu=hx@!&*t&B<>}X-MqhZE`mg2H zKXsmayx+>xzn7<%<+ab(|2~f&$;2QVhuzt8irAdSVm7C~*s<{g zybnwu)3ZBC$3|%pD{Y+3O6!XqduMU`wX**k@J{d~GCk{4a@q=>C#S#MGH1^m^F6RM zrjS_{WAvT#qa5t_3C7+7HeLEsoTHkMYlQ3<@=hW52>GCp`-OZ~$bS=ZQpop(WayjH zJRvU@@-iVCh17&xBjoi$ZV>Y8LdJ#URMqjgK%4D^8<2%D^{fSVXBF#0*~wE}x6= z4-+8!kkEBo5|%m885%HK5^$6&6LXUSSSCT-g=E6A^;AcaPI2R#%ZlKq>2$hNp*FyX$73X=1 z@D14}RgiuI-N-!^x8dk*#X(liCipB?yqPL^?zd zJuDdQ@4upNb0FSd6O4&x!r9I;IM-V^=j`8>papKO^2`>NdjOZ;Q!tD0ne?gmz!xyW z0(+D@aikxh@nbHd&DtA?vuh#g5e#A`osk|m2eVS=pRUw+8a*RtIy0$hJs@xDLdpxd zjQg0j%?x>;V}UN>lJ67cv=nN(dq* z2oW)>gwV4B5D`sE2yaEDARUmr%m<$2)Y=HOl24Kxu{k1YRs)A#7pOnRqR1LDA+RGj zZ35w(JE2L0KfnAP5K3jpoU_l?8Kch)9C<<@^rJ9G4he)_NjdT}fs{JP8v;>wek%~# z74U|LnI#YrGdT$LBM=d%OB{ORMJs`j4USM!M))FLL81vJ%gxXO`Hj9?~)U_v%mjwdviz9=q3vSOv z1|n}_3TixC(1N#;<4FPG9G~#veL%P!;#7C6sjtIW9uanm327iiTDFmT@_In@&XIZ` z(OjevKu#;Ueky?S)k5|Eh~5<uQ@_9l^nmrkw`iZ8D!frDYsXTI~)vyy$y{c(yJc> z;g%&b50mDWCGtfe+_Ko+O=N3AShoHARYHbE@9JFa%|+_y3LwW^?A!}B09EH=HTqJB z^kEnrbuQv4Dny=(oi01~f};>=XV7KmX>b%Gz3Kz1*5NzQI6~gQs28oSlXIz_s&`#lq?8^rff3U-mL6)Af4#yo1eiEghEg45> zJn|PnZif&`x1t5N{y$J9C9 z1D;x$!{|pL@|^AlN1f9>;3!0{%(TnS_h?=YBJJD{MA`W#a1vh_ticWoH)X3aN=(!Rq#w7V4d^9~9UEa3n&{qQvR$Cd-QR z1#l!HIGrsm<1S7eI1-VZ?(QQl&Ju7WqSpK*W`aLw9S%f%;Bf>nYlFr)a*eGA!a0Os zjny@PC+*m4!$zbXwKiykXpFun^`WQ`>B9_a)Va6~9EHepaTFY7=bI=hMA|v*vhxr) z3X#_h*$*F21{{gNKjN*V3wCI46*BotzC;wK$KjyBeg-=dQJf79$B3Al>6dXr)v*I*?BN1@K zdz|0t3k9|w9EotRZUpisrU`+W@>mJ1G5DI+Jm*Wrk;6y|erW@GW(r8pu+ zrPjqc2b}vI4$Zw5i00y~077p!0ENeH0^)aZ!a#oH;%o!5!Ns`~$bJ{+J|Hm{=es~w zxHvxsvfagb8AzXt^CpmoUC3{M-0eabUdkI?NEMLXF3Xnz>2Ptb1Cn-e;y|8pI6OiJ z|0vYO*#^$biX&onGA_5s~CI218uIhO2=1P07Rj!w~CjvmLCYNaJC z_owCNW}{UzY8zKyvtsqNtNbf9d~2?2u5M}3n5??Cb@}p+l`B?JRXwi~10-Xjh*&LI zJo&KQLpj~R?MWiI&Fl{(!w6~P0crrYL<}pOu!G6$=(OGdV#jU?B#dCtipKH1EE3pg z_94uT#p0$Bw28ck$T$0YiM&~~h}%YMOA9j$J8TZOk#%_KvXU|T!&{Czg;3p(@V)9< zR-KG&4n_K^b9$?`zf!r7T#_6|F3rINaY}bgu&1dwSrcc3d^l5DI`zT;eQL~^vFI=l zO^|!+)!2-%#lgXo#U|3P7uWdPTGktFU9D~Y&h~5982)S9*L1e6U8iYUH$0BkrwVSA zj{SuFh-rLMD~^*-H)`qZ)9{Sz17Z5_0L=CQtk>m%gvpGRSV#W0b!L!4Jekp+SQCwG zj)$y3Z`f>)h_FVRf6B`I^Lk1{a#@|KGkGRXWytAN%>NYjB$WbEgGtP9i`dQ4=r=;9 zv!PB_Egd{<<7uuK0gH!~+I!y+Z^Q?&JxKYF(hoilA%rL@$ajmeD31;*~H;2M~!9W~dHV_CM+GHB` zbtD-hX<>-@upRyDpoq4m%aDNx;{@#twQz!b%}hb)wI&ue2M`C$@bD7o6H&?r!kuXp zp@EoTKPK~HZalWDY!h>0qH~M9&J}U(j=vVR-Er46)SNJE&at0<1wFUSA7(|xG;!9` z#a`$1nj70~_v;483HH{bsBA=HyUKiav}x>^ZKqB+Z`zpklYn+VwgHMn7vtmaaL&|t zI5}6RM$At|M8nfX$e%WZUf2=q%^synW`?T)GmKaqLa22*w)$$B1!pC%BGDLtz;A?w_#N@O@nqFZ{Kzl4_eU!sk*l2dopbJY&%O8Dd!MhJwk_TT{IC?LBJ{1TpuIb2Zu#>maxu2od}!wS}~NoZBz{~)o;yDkA=ByW(-uzfBJ( zJw90m7)#gc;dDwJI213JETf4?yr@pp<_L$zvl;{g;1+9Tsb7X2q&hEz)`p~@I2r4# z?bqW?lfM%}{T3xmqjT3Zb#hhZGc%c#_WRBL4t{;@tr$C?fGxo7wN@ z(Z|gg(}YQ1RrNyiuYdmB^8L#gJF(4qI`~Iby%}?IQ_b1S*jFI*uCD{GhFGDCK%9M6 z1)cD&^^7^7<2}I?h!grAAbqaaKqstd$43kFDj?nytOGis57G%e4YX}!YzK4^cnPu( zI^HeptD6`*MSkEkWQ6>{Ihz?9hHeFlkTb*q_Cqc}C;TylLmvjFA$y<`o`sATcKo`iwJBR4L7;NN@#2ME_ z^5El$R?%11_!M2aDu0w2vTc1`RS(h2O11yWNyC^>ReRHyp((Zt<7pP=sgk z95QjmegKt<&`YK{J72&kL+C2mY{p<&!iJ$-ETO2|!Amwzzl|v*%CzX;fybrK=J;{U zrvY3W1vrR1k0LBa$X3PnMT#uEd6H%&oCbJO!mFBPe@O$|3j~x$)&AZhWKMKVlF_d_kX9g`ZFNgg;rxi9cqz3wi{VOa-I~H!NHTw@1Q-C7o&@}pk*a= zi!N?;yV5Nwuh})h6(NHP^%T}?7z*r*xZd$0t49&OuQ}`k0L9jaXZD5vP*wv_lg*uY zvV>I`SM5uYzhv`T%`sQCA43F1Xofkpd`P%QW7lhyWyC&$>f^~~M~?oiMz4U0d^>%k zv`4c|7%A=8cX#1qP+R93u9yZJ0`cC-#2v|ghb1vz;m*|Hpth`Y1&h<2%j+JBiOJ4Idy%DQuQ8OUaxSNi}&KCvbLtCq@+P>k+n>^Kg_s=P} zuV+`%8l|xy%g%A-59IkShQ{HlRK)5`cG2Zi$w||c8RJy8R@dnX!9cl^Iiw$3t6YM< zc!00sCF8F2V54?Y=N{Hi>Uu}zKAIZ!@7%0wGkb0ZX2c4iS61zlg;-Xqg{3HVLF4U2Qjujd@JF~ zI9JZ7_Vc6<35&o1)DY}Gn-EfK98*}TJN4CdK8*Pr@sNAPHvJlgWq(d<+gjbglULNl zEnGRK+PmPJ+NGcOEjT`V!T!C*j^^2lEv>gp7I|u8bwl<+w8tR1 z;;C*>?F|}RNT4ZsMAd$m?uo;rclZJAa0R>l2u7^!`L!4)zkKFP>C0!-aoYT&+BzEh zD7E0gVbrIZM*ve5Tq&V#W5&@MXLLKPgCLY`}4umLZjbmhK`f&-y3^Y!{%ZJ6tfAnKBk*iRey z69Zo|@Th^42Ikt`tn+r6R?AYXHx%~Ek|=(?rK4jD>oEB`0#W(?P!FouR+G=S#utxD zflx3M@a^!2yL;ef`{577rON2F_qg3zlbZImpV@Ekmtx^E}dLyAQlZeHnu#s8?!!)<1sE%%CHVf(nX=iV2E}cIN&4EkUu+#vs6a2@`BY6vUaLQU@9>XvDCw zu$%W@2`ek>Qd&@vVS0TjGAc7lGgPW$Qqhg8ROI}>>&$?3?|q)<|9d{a&yVgt-?i3$ zTYK-d_g;ISGv2c5@UpXmt4%Dw-8|dTX+1n+@3NOluZMl57r|#viz3&Q^c4Y1 zN}m&Oe(49mPp77rJ}=;U0dHHi!bxRppA@sbeNeiDdBpqf?M&#eoe{A^SsB9=unhB; zpwKiD7>Q5@8Dk65B8C|Rj{f?P;rt4AR#?hMER*D~6~)OSMNBjD7MHV(Zu_Khqa#~b z%6|P11#;~6NwFhmx$;Y0kpH?0RZvw0{_ofccaF>fNwwYqTuZfI|Fc1rS$WG{%OOg+ z5s!AnlcvZ2EnV}hlKjG=6^x(_1Fe`Sr2B&1|EOpk>#rBZ`#DLp?b|>gP&Gfm--1 zn7Z~ZyMJt$Hm0Ww;+UW8j3c1#0v%9y;>p+eone{mRR7yyw~BLqvHK$i<;$KJ!!Rw* zIU{Whe_?;wpx4>4?RMsHAW?>r1BsD%7D%1U2gvR4-ztsq{5qat7I`-%Gp-?ldy@If zK~^QOD*kMcH3_VV{~*ZP1lGnM46-Q#o5KGs$fgNw8o!&^@=Z&c%k6?>3BQdb1;hAn z$5Ea6y^L>x{~PHD&(}yWcp<-nqK~EaUPJHl+34B=8DCFHA)|rvZ7SRna81tU--(CN zXzsXxd{N~Ao>1nsGVa0I{Je<+3YmahS^Q8cmwy-o{;q`axuwwHVjM(=iI(x7%@i06 zR4s$P^7aH=Q?n4Y<%)Z_gHIccio1lEMFot5&-T|1nc<<%R8wkS56=iG79sguXOozw z1HTU?`nncD=2B(_6Y@P6P)M3xF$K^m5t=Pp6o{q)@h*@ADhFL2lEs@y6XlicK(rL{ zJAFr&9mZ%S(P%9jfg;_~V{@oK{qw^o4@a+gpKU~i3>uW`-xMD2Nr*-M%8k9gR9WSm z!Y$2iX)SdU=)gUNym4UvLK}vUMxlq5fw{p>DW<+Yj2^jLAoxclNOA*cDu~CNfUO^P zW=6rZp1`cAqxLVN?z)&2Y`BNz??>`b%V+kr{3sF&EgwfM{~5#k$CfLbZOabxs0`&V zJKR4A===r9=^aM(1QuP|HI`v8NDDKRXR`E5rS8%EYtzv7(aS11zBdWg9=+{SB?|Gm zW$5>d{3n;HHCKdwJZa{_q^_bOvjWjk(6Nq$wcWqckh+FB_;nPA z+z{0E^h*~9r6j05^{t)hia5kASnM7rFbC@~cZ?B7DZ5fgs74yUZuo3Z`}=l=agB8n z^50BF(WoUH_7aFD(IQ?vonf2~ENM<19=sk46WO+Vw)4sqOpFg<4#y-3DoVgnf(tMQ zuQ?eyBb{rUlPJC_RIsdoDG<>hkyP2d^sxWe;j_k!oziF4xmYHAM=uZe4P$hc% zIp0ANGA8O>>Xh;|6ichN*UV#t*oe(1(6(b?k*J=Q z=SNZ_pO%64Zj><*TXsPbG%cT+Oe=8O*H*do)Rr?YS3u37xe)YJF72q?*j~ButoKJr zIb{NGyYaGH=sqDuD;e(4i23DV}X^aZB&&1c;uv*JnHaF>hQOO^~G68 z0%y6O9WmY`s0)42h{?1HV>tPXzXbx`jggE?3h6MC9_IfXj^z*&2&00TBIVyjQLxCR zhil`|uET+76|_^Hs3UGmbHw9^e1{Hu6NzIE7vwpMsk5R`M!`^CJ_;pbb}a2Ju;~io z@66_}Qui(L9j}~!cFPI(xPog{sa+0ddnfXg3&SAu9uHhv43DEO9wkQ+Y}It4Q~cBFYy68djre1R>S*9|N+V#^M+(l-n_OGm0iNc{517%>X)pBNd( z2K?WQOpK)BSd@JeoQh=X$^GF!zQN#%svvf=)rQRAcD8cba;@d-b}wxZLh9yK{mm1fF~BmkCL;W`G-bNk#* zZ}dlwiSoE3(9=DYW?Ij%00dm`1@mN-VcoQ3@xNkB%Pi(aRXMEdh5|N&PeF#t!NNXh z%gS)@?%<$f=}7Y)lZbytKKJop8Cm>Sn4aE5(t+_##maCj%!IblEL*e~{S;775Z2K* zgk(fAQ1WsI?QZzvfA=36Gf|xMv)zAj%#GGSN1hLVhhz16utzZZIQ>@W4cEjl#~eNNcOY5^t6x9X zgJt|~1uOtNZU+7F7vXx5vrqO*y(si9m@Unvle|5mO~brBV)q!|F!<@r<)J~WBgS`V z>0y2v+@@pXNHTm83c_Gg{>V;mXOuU;oAKuN1jon{J673=0#;U~c4KxfA+t~IQKojo zi3vKBaw2jd2T_&UiA0%b%p=mC`mVakgFt(cL< zt0h0d$`Id1@$~pl@#Jd2Rn0~DBgZL*p-#b-a%mm>cK^b0V;4b!!5@;r|A0jit05BS zk;aEF4+)XV=EckIBwhNBPXam}Nj4@Aelz}jDG8C4g$a>W2s7HT00_2vXxvT_Om=Zx z^`rz^A6yfiguII!q9YQtWOxB9?Ai$te%bi(lz7H?8^N0K1=01iQaAsObB1~eh9Kzt z_~R#x8wE3FTHte6Q;G_t;2-nHPME4LxEu)hY#V}Ibmnn5b2nJr1UB>=QGt(SBJ$%*3dG zJ2K#w2~FJ9pRBVlncqG!n$}(graOkIwz)5Ye7gReP=(e5O3?a0CD?sE!QfCcmIe|x zqYlcxr#}{PE(tX<;EAs8S8VTph>x!5kKQ*Rn%Wd9A$qHjqV{ZmGz_#|(6{}%NE+$~ z)LGkcT`U?IXvcYlw2Q-_1P7#Zatw7#U#t1+$>HM(y@^r^N`vRGP%3X{N>yr8aQ9`d zr1Gq5P((v&=Vft0cn+CvZA@SB45=50-a$2dEYw_oy@I}eR|IXKZ}5r*b!@^YEX2O! zXs6xlzC^*2lvjdk-#BTs2XbZemMhC5{wiEJU?=R{N~XIL1^CqP>9HL-rB&Fq0;hxa z=8Ph)jz+Fu6eoyixcWs&f`~?_U!)R5G%Ecfk3*1AgZgE;f{0AEU*r-*WUl?9t%9g& zplFXEBBSh2)hLLL3>38qqT@l4N0>s+EhKJd?Gns>KZ8O6hu7(MZJQkd!R`6 zD7C0}peW8G$QYPffBDIRNHS2ACWxd1MN0&cY@n!65Je6Yl?ftRz50r;BeQl^lbl4! zw1PS7g=93J`jXvQ7Y|Nw-D-*YkMoQMKWLM#zQv<&3et#&qJ{_D1C}*RM4G!ah@tqQ ze^|K?DXe~luzVq@z?JrM9y+sa0E?Mh)+eRI1Z5D?I{T!xWMiR92COEif$o94rhn$N z+u?(b%Hp?Ud!W;vv|t3EaS0b00hJILh~ACDaQNcK^acV*Ff`zCB}h>9UdWvRWaC*L z>1k7?(tOu4d=qgP8sqgySl0+#Mc9xqgHOT+1dnf#_n5RiPZCd0WK{%IC3qGJ&b;VL2e`^4nIh<+y*my7XJCf`E(e6DQ16ZbXf z9C$YY)!(T#f#^NZUKj=MMk(W(iT*`bt4WnW^ghI)OJcJ4CittqF5w=rp zjl?aVCMVc2?Nx%b>9f^cGyt-n(0lnE3_tQKxC>prVcnQCTNV_%Zt!j@WL$Cluje2y zF2t!qS`g3Q>u;VuPISTU|8jaRfpx~{QNiQR$b#SbF}M+1bSz94;djo6vrVO(J(ZiJ zzU;!v(Y{-ql?x=sXyFQUM6FQG++z1&{&RS$m4ke9BzGFa=-$H>?OPzFc>j?Zcg3ju z)Ye)q2+2W6^4tBEa|Qg6>S_sC;m#KIUcevKHv%gNrJ-~T8+K;md`Pp>=D zm)a#{@IRTnQ1rFk-u0WEV}2l<<4 zMYGHOr)R~_B864L<WEDK2GGx%K07MU&1yfq_@f(Ldk}@`?qI~Z~yMG%uHo3#j%(?uvpoI4XX()A%5;${EO9%hN zKcQvic)Cvdgi9t>Sot(kC0VW(LAXM$BCigolFfV3xmBq=T|+j(h2AHx5~I)kCWV3$ z7Aq17Y83MbE+{gn5UsMEPW59|x+$$tHt4s8reYmQf(xO#Khl4%>b~TDj+Q-|eo@ds zNRA6;ELr}NIZSO-QX=*icjrHsSdDeZILt2GyIBpv7~uZ}K*Fd@PUVse%p#+?9N z0vE`f#Wb*zj?jc!;eT2?bAqKU5TNER%z&p9gA)2~FL5)3i=gkdMe%*b3M)J{wCWZ~ zrFdbe(MYsGHzVOY>N|?8d`JfxM;`CBq>^vwV125uU;Xdv#$M+Jc3pv_j#BaSzQkJm959I$KZL{rSfDDHI=23s2!1w?K3``dF%zas<@Elf?cPjWiwo zM~!xIJ#vPdbn)dM2&>A*wk-koXu9XP3s!$DwS&s4#lwG>$ux&5t)<81SVU2Iwe+|w z14p{l(gSzvl}p>P;nA>Erk?fo;2z-&T8@=}aDkW~;qNe|Dsph|;QOfZ_LkZupu~ra zts4_ozbEs}UE#|^Zq7sm16P=vq|WXf`FR3B6OGDmnO3> z4aHraGjOE)%i;Rc2;F?cz`^WKBa`(dqJinL-Z`cV1qwEX`G6OLeO3F{+HM?;#yIK9 zE#oaGmiY|aq?#od{QWk0_+|(&P&hL9f3PJ_L`p;#;P?HVuaF@yge)3j1TQh%>KPE0uNCPPH~g8pMX+5i5$ zo1`rJkblkWDW1*nxUveOmXUQdGFX%^4>BIzIM{czj$#TV4*tT0K%lM+jBvd^p+J&V zsirvo#2+HU3SzNrC_jE97ZbTsCh{JNuewcA{tLOQ8wcZl@+RgoPfH5Fgu{)TnQo{V zmpY995Z5;WHO2Ebl;}+)5|@{aR1nQP@&|-cjX#y#P=2i$>+jr@1e!r>Qf?$G7rGuB zW-!sAl8I(Yb0FmgO4F7ygXVfz>Y6YQR8jsNcOyuf;Tuo(l9i2nR^=7Er^~8LZ4Z2c zoqZ$E-*Xz>nA)jVGr!q!gx@5Z#KLcsUbrPYJ$?^7dp$Y@xUBS^t_M zVsi$>&U$3a#z7oIEo?bf2pxe>oJ}OJqgn(r5M(WaO!5a=yC9Pel+{Io!!<~hvsWlw zC>v$g$sqS_<3@9r&@CCBP`Y_uyTu;t- zMF-vMc2wkR+j7ReEEv-}UmXjPgTIU1eY{w{zz4p$zLMRt;MGt@($IGtDo^*CsmPJH zT0P&Bv*q9ie@0u|oJeC>WJvN)*d`=V#;~Z61pd`f@mJ*U=1SM(4 zu(XiG9+adT!_wiq&u|#w!tZxF-V>24ugduT=1@BA$3o*I#xO}pk{FaojbYM|WK>YX z8N;}cgbhkm#xNC%Tb-$+(x1uv8#@UJDhJh4S(S4nwNygo9BD1(tDM8tBK3ZER+MBI zj#Ive=|!2j?7V&d6L&;PmxeuukxDBx4ih~4$j}KQtr#yr4j+Ss^uS~f^ItuSRIR?7 z3XNfZNOv=(dkX1%Wz{J5aBU8%6db07f&(I6v4Ykn=ZZ}P{EF`hKFd2zzUANYjl^~4 zduV~ee8Bx;3@*KKEf(E@2+vBlI2~4Ub>$@Lz1X{R#JOPIcPB*#?T|+{xFsU~5&uhf z*He*qI^UooV^^&g$M3THt5>PTwmQ52_^K(RQ|jmzda*D;YU%PrIDY$YE|d>_2%0a# z1nNdsTY*`efUIQ&cY06+$`ARNp;vT)=r}A0OYvO?%~HBo2_54j=nY=q4Job?u=HZ1 z|K%PVQ%5;)!H4&Qcy|XnI7@5kx+RnuUuXo}acKU)3kmzsMQ;!;R-+}*-N9>+ z0Y~*<7t;NXkk&u3aO_CL2gP*96BOTEXrQrpsPNCiC{9^3g=RJz@!%IYW=6c3h@suT zWXgOnW2T0`YQ{tDP5;I5%9_f*S5?A+tjsub6|GGGy z?eGs;Hw{m2oqP~QSIzSmt&1Mg4DvX_8UL1b^F{?L=?346AlqbP;`g6l_Y6CRUYD+q z^BwY@pLnPz?9ll^KFJn(!};g+pNhNSl)7D6;>Zu}{*$iB!^lU8s3Ie#m!sEQ@ghLH zdm+Jk_dK@E|F%0XqP_eNu92U+$32B)I7=^-`EHZx7fRh>jo7Z4!x;e3E)^b3E*b?vn)fC!nh5Wq*TlQL@GN-WZ37 z0=CX{4UY0>*za&s;(y);TQWoBfA(IrC%zX8)v!2vM#s=|d=x#^k@Pgn@LahyS|`A; zWC6xY1qhP}kjMxyvlp{#*P~$^IA1DLmmr{|E(utL*JLWTY1TSE4IobXpL>G4l0te|+J-u4kv!>%iv!9=bS<>sW zEGNnMsrBF&)q(bZd;j<$OJJgh8*^Zl!yc$hkQ_v{qTqWI{sDY&;5`ymehfCltki2= zqqCjTgLeO^2ObjlzHRp}-EbTyjVT-F(>~9ZF`1UY^BWb!MQ^%+b@_Fh9uW5+ z{p*{SN6_|BwS~j7sx-rl{8K#dBmMrLJrS(93==M_Tuv7{`0j@;>mDe#49%g2oqG~* z3Hkj;%41lPpL=k(crQ|yd#ACi|3&XTqp{6HYyRz<@Wz$~>V=YrO2s_H-#=s$ zBcDI{VZ|s!Gn+o~y{4lL=BS9W?_c+D1xx9Fs~F4H?2oSeo)tThEzBn;XF%tBR=gLv z&->=neJHnebv%aizi+`?Tdyh8Fu;trGY+5h%^oto4DBlj=Qn&smj)4=(=oEj6P4pB zk+M1BXm6BVE(IC#At8w7~_F?2UJg@}7(jio^QE{>hIfi~6&uISo=(roOH859F#JJc`%I2451l<`KHf0h0?U_WA7_M+6IHC1nwt6;O_M`?ZhFl#w)`VsQ&m zjyuE0;zWRLj}=j_$NXfz{tx`g*HfiK5Vj-ZkAl1%U#O{^ij;7bPx5CXkEMuo=ziwX z7(9cAO<&KYa4^0tNnc}b%Y^-J{p0Wap{V_qCx)f z4nfqDDxsmbJo%E?hHm-oNw>#X^xrtSS06>BO>&rz{_j$KB&3oa=6_uK-<0(tQibhC zYY+3suBRJ1z_a%M`lnem9@C%t8rtq3v8z-xCgyQ;(?dIuZTFKz?F1G7lzsp1za$P~ zzubRx&)|`sZx7fQ@f$WK?QI*g=@52&@P&;w=9MNJ^KG+@`Cz||In`ie#)5Z%SA#c$ zfAB3E)A){!`OB*|=GoV6%;h(2%p~xG!AF441pmlE8&mv>jVXQ2#=MO>enuUi*V~w% zP{$PTi{G~GPj8TivG?uY)O2}-=S+)@8TFBk+4iZ8nf`^1Ne5rmYGZypWn-TD+{V20 zzK!|eL+Am%x6Q^32Y)O0S3a>ZOTibtXJhJ5+L)2x4}NB2T2I)R5#ZNXR3vcHt|`EdIG)X`#d+#pc}ezTKaGs*pZE zfv4^5j1f*ce?Rpjd)5Eir%G}2al8NWr*d&5c87_l*OIk8db)E=U)e&n+599045W>Z z^*{ERo|1g%*@uajx;OV?pB5i)&v=3J=j6#ocy zol)qzLuV&ZF&$@@(9o$(yy6B9h(kqs*5})L8BFpmFgt8#*&*-qH z1Z7gmwJe@|j77qGhWXNr>@6osml*N+C{8<>zR!KX9QrNH`|ZR--{QO@Yztl!Uf&yn zs1H2 zLkB4);$TXQ#M{ogit(W=z33Bu9fD-RbJl}=!ZAl2nhd`GEEaVA5r z2GcIT^c-14_;&JXySMk$wcP@v>^H&_56O&Y^dF*9P{F|E|ly?YrXgy~6m5 zI*p-iKRh@%^E%@wV!yNVU9lLRh<|36Ukqn(|KYAeRv;Ar_=e4gY7q^Rw99eyB|lV3 zmEtK}!!TFd#x+k)?Tu(?_RW_?G@O`xNcaXR)EfK@rso>nxke{If4JZHp@A6xbE4~Q z&SuOk{2Br=bx6N|WA~x3;jkUyW-<8t2Y>pn$suzZ6V!O~uXl`wf|zvIFI3!voxAZy zwENeaLs6KIp}Uq~)c(xhz7^4iX}-3doUztx4-hv$KtAW=gV>Vo?%2R z#ofD{&V44_z}FcN=;hq8FdLpKBT*@!wh2@ks8J>)Nf(kh@GKYdW`ar=lH`CofKrZy zEdljc$b#?F@X6?=p$=%w_twl3vGk(2AzQ>|i;1WYV^@WNsHuu%UE=2N?G2I<>_m2G z6Nbe1?u;;!%o@o)!wx+HiQRYTN$-V;H8)1H(KX|Tu%l~+M6pXE8)^KuK27F6jFQzn zHH3|;nHa$qC4y6)5vwW1>`RYR3Uv$U^87CBdmiymLWY{eXtq*}oHegRvr*#XAdW|~ zi8T*IvXLR(+~`(PEuIZWcG9dZsM)iE;6Ez1Gy+Nc^g0W=U$3=OI>3@*t-wCTNH#z* zJ@Qt6vKO2a-b^M03GxwMrQ z+8DU8_l3Wi_M^QaE|%R!{a>?hJiCRR+Mt-g=8DH72U62ZzG3}InXBg@(=UyxDOa=O z8s3}4&KJ*2osPyJ{Dg4B4CF!>lx$}vB0PwYhj0Yp4TNnCDT(ZmFmcEG_8P}@HdCB= z&0h1|bka`|guIlRcaqsJLm1QW`b_p9nKLynq2^BZz!j zQX4y7AB-I_uy}8Kd|%;FIK2N$;eAPKP9pp8f2r^nRQR8Z6!jJP;lCHz*SMO6c6MT@ z$o^_J+54;Mmpu>Jf3Ak@tELFY-+$4(ubPH{on0d=u{A}rG3s*kIMR<;9@T?qRSnj6Z~ST{=> zSi>*#*q3Q;G(4ZqKFp55WC|{`^QAbi)6}vrV8;t{Z21D#`LE;r*#fqW=F%&-vTuny zF(Zo{?C6o**rzTJGrsFi#LKBk&R)oC9P9+Kq{Uv-=wLOp@^m@a#UiX6^B1x|gbSKj zmatbO0;OHfszd^{b_FXHHrj^!Ro4w}aUG^tdvz(~Q zUZYseT7<&ZuV$A_!EZFqER{ZW<&e}B`P5hKXJ*zz^5xQ=r@&k}6zK{-GqVYq{<)gf z&=<;qMBEbKDHWcyB?o`J8%U(LGl4{UNg7C`tK2|hyzopAo=L(pS$J~7Qzblg!qX%? z>1H^PNbY_hF->@;3r~me%q;UzM2^5L5uUliGf#Lrg=eAgEEb+F;aMs?Hwe!%;kj9O zRtV3n!n0a>gCVPhjeW=K3jXc-m}Oo@IEZi(;Q~Srg7g!Xi9wixU_w}ckc)5+!q!h% z5AzogFC#P|oI>~pp%+1Vie)As=nxhlI1wI1*oDxDa2nwn!thU7W;DWd1Pj8=2ssEV z5sDGY5OyH!MQBE7L*Nm*5eA+1uuSA>mWe}1LNFoZAQU5PLa0XAh43oEL4+2Ba|qud z^db!ZjAh~xIE0%JZbNV)Y(jVp;c0{e2uBdw5O{pMa5Vb;!8->|6=IL8)%#kN; z%)=vX{b>$8WMdXFb|%_!UDg-b_m33NEnfygP750FX0U?3I2q(o2>TGXFBC^9C{N9f zZ`d}E&1oj~{~OfeAe{+LoAU29eV z2LY>rxLs#<0)-9$QU~k>QU}xn@rzKV5lG$G3>*eL4jc|_1>##`rVU6P)eb~Qc^Do9 z^;8#-da4IVW5B$KaRf+#qk)k?>cJQw^Wz9 zbAaecCKq@E>SvrFCPPpROaPVw6M<#GDZmQgRA4o58gM6YI&e1--;pqTfyuyn;7nj6 z5I-klnt>YNaiA913e*AHfO=p%(1Q9I9)uOx1+)QsfOa6W4_-Gw3QPe;0_Ol@fOCQI zzf0AW2$7;rcELBM^$!N5kK6nF%PH*`!K(hUW+f|mi$0*3*4;Ba8~KCJ2yAefh6 z6hIkp1dvR1B#=yX6fg$zC}09G8pr`h15LmfU>a~NFcTOH%mt1E76RjdrNHsP&A)j!6e`T;0?fL;ACJ6Fag*GOayiSrvSTvQ-Qs}X+Y`AunAxka0W0Q zm<&t?&IG0aZv;Ak3g8l;3h4BJP=jy*HNY~U7Pu9t1MURsfqQ@kU_HHJAviE9v~})xk>6_R9I1HX&MZS z1V02A2Mh-$0cAiHFcN43jsm6wqkuWU(ZD=lEU*|j9#{cP2JQeFf%||KU?Xrg@Cb0V z6y|;w1pR|Goj?h&2RIlgc?F69BZ1+-IG_xe1dIi$fJR_Ca5XRo$YN=80wq8fa4@h8 zI0U#A7!KSClmYhuV?7}1K^TEeKo(113s3@V0}cju0EYm(fZ@PipbRK|6$JpJfJR_4 zki`;d0uBMD0mFfrKp8L>7z-=}8iAXE9u{ln4iH0tyMf`reZW{?BhUzJ1+wulF5nPg zComk?L-9b#e#9q05#SJDJTM%X3^W3D6rTizC?1$e@xa{unEz}tG@=M#DMbJ`16dC9 zl;pr&BnR##xeC*i>%ER9wQ#uOS}y|b^tsuiZBI=9>Dw$PlKi; zNQb6`4j3a)vH|lI7z@k=(yDk96I{=sBjbc@9gucV`g4JA1C{~50B!}o0^AAQ4cr5K z6j%@Z5ZDC#l$P7$AU+130zMCH2fhL91ReqQ0M7y?uLbvmNMH;2IN&?LB;au%Z9ud^ zslY!4Ob4z87DG-uULN=rKqnMtNeB{_c^L2rcyyxR8<4Ni3jS%}S>Sdc52TH%8@M0Hyl!V+1Yp1M2s@n&0v`msQQ%-; z9C+H%_JWrJlfc&jRlpB`HsC&BI`9NA2lyB;5BMgq82ESK2H*i;1@JU*2e1{m8+Z$F z-|HCvW)O`Kd;+XTg+qWxz<&g61%3%U3;Ybo1K$I7178C&4R+=bKn6Soi~;TeCIDXo za=@>ECZHdf25bjr0^b7WdO(~6Q3yN_ECrqcZU&wMRs%l=?gAbKl2bAnxEFjrkem&2 zMh<{41nvYs1=tMUN%3e1wU4xYRm9eBa0QLYSp3V3ox$lD>W$N}C3OahNJ$ipmw;BOE( zf#i&|BI7Wi3;eA>a<7trW#9{dx!@-Pw}M{|B!_Z1a3^@oT*0x70PX=#SE=>Dg+TH% zBY{oeZv&D$Hj`964nYY7faEq=ft`?70(*fOKyup@ zKj z;BoMqfKtRq0Z)O?0!D(52DXFG26h5>0(*d4fzmf|vkr^_?j=O}(ZD3|Ily@EV}PnR zG5;5VAP?09v?0M3U^?(QU=HveU>*jz~jL8fo;HJK;4U&|FIx=2$le8oxcd|2LCQl_LiM_8<+rm7H9%K3ACZ&allOQ ze*)$LmjW3yL{3F05@H$`x_%a~pfj|#p2L$E7G!z^M+zoyia363j z&;j{)U?ccDfsx3k1|9*w0q8`0Jg^n~UBDdh6M$#I=K`fjuLAPm{{m$2HO~WpjNM2$ z6+{dKtAGi>A|MA818u+yKnJiIxCB@WbOK)mx_~bL%Ybu$Tbt|+#t4yl5;zK2(x#w(5LWoXVVq;7px+2%cWQOQD4S$~`n!$SF4$+RZ zKrm}=0G&C2E)CICYc=QvSom%Ny*nhI4f+7+JFsV^fo=-X^FSW~JsVP*&!+~Y=?Tg` zLPY$#!OWB)1vF)#`BVtHud)izeTwc2(WKCk5KU`wM~EhcdP6iR8h<#Lm&(_L=$k?3 z9EP=0p?82O4n>fryFxT6(Hf%Xg62asEdtDu>k7qyo`Xe@D$|8%QX(frlX4qEG?l*- zbP8(5Y=s4e7|PHdqA7#q@4-ZrAs#d-N-Z#fo`?P+c@F3WSk-R>T{wWQ4$-tx902V= zdQz?(bSiREUdd6cR>V+50vMvHVq1u&%_1*ElX6=_bPDJrA(||-BSfcymK_V0L%qd? zXtIgS5KTLuD?~#%53?i0P|F%aG>vOph^C?G4bjxfnB&0;Xb5Z}npV8r5KYVJX3!Z} zSV_4(psA~=#Wbg>7fHE#A$>&9@*4#Pbd7w9XjFr2ifG4Ro%+Ghyl z8#Gxc^)2%r(RZL!G#zwbnz#Y94m4RYbz2(fGXMr>h0@9Z-{+%Y(ipWDv~z*X41b`{XLnnfQ;!m%5}+=^V)W=+gIx zU2i@5s^P_1qG%Djr{)kRDwKA@$!o!KRwK8I#)J(!NK8K6VTsJ+m>|R`cUr(b!?@#5O6E`~; zb*AP%M6nZVo}?68=hei{6HO3x%@txr@0Hcu4?g0havNzT{M}n_tC=`YG?uNXvEK~U z@7Q)-%T?Qgsz9Q#PkD+FKGd_>S%^AkZ!&4_f-xN#Hig>!{a_z~M* zy`&9v8M}Q_wC8_$NB)27irn+Ryd!7-FI|y){?G5oFR*j-U9*eUtXaM`FMZY8eEKRH zlJw>4U8yA{MI{U~CM@F*{8Wmc?Jg$FJ_o8Ns{5p3)YH%-HUjtAoITBd}cz;x&qM{&u76z%S&AD;(2RV7G*OV zND|bS5-(W32EE7p6qa4G$|dN>Z2b2WXpu>>(4Aj$pCi9yWl;(GeMSE4BDc^#36wt6 zwV_@?H+GYBbjE$_UHNM=SD}*if^MJ~Tg~{@qOpS-*f}Nn`8AVPi=x=-nuV*exHxLo ztQL)DSJwDei>7+sV`gKZT=}W@V9=n#onaY;`T51n# zR!07s<;BjTl6=T)HA@T8-EWK2^OxU~KO23yYQ^%xjQka@RYhyDtFm+OPcIkRi;Ky0 za6}W+7z$M~RpNQ;p~tGc*<^Fe@6Klql6Bi>JK)7p^|z+ZC;9`y<{dPOj2C%Q^6zrr zeRqC|1NMdfVJgJ)YJM)ns`z2eur<)7QRv?ERdFe|4{j(`V0LUfj>GrDD>H(2w~+(`6p* z>#CrXiKS9+U!Px+lD~2}=2H6XdDzI8WB$6=c!H)EOt~H^yu{8abgy@k72f1txiY^5 zlk!S%9a+1+s4$=USV~pSE)sam5wzt-T1N)VvLVD%8KKo7wG?ON;N%JA%VxGQvkQyD z*N4^v=5)i0YehpuuRBC#tf%EsT*@GvLTE!ci_nhHfxshlA@m?H)woeWh(U-)NJiig zR0ui*6M_vP1tAR~9l?Q+iI9V^1R)Qh7@-t_(w9}kr>y`{jj$7855hi#MuZ~>#}Qf( z+7LPs`gW@x(9Cw+P9a1h#2~~YBq4AJHUtMkE`kf80%0e@K7<1ZO$bL2TDGr7%R!t) z=s@U1=tk&8kUWMKBg7$a2quIU$kTwC2)PKw2%8ahAnZkGLO6oZg3yj2^&>w5{l|m-8|NaT8oZawUhUl^w zzmjSAEL*f(qvej& zey)DLezm?-U#8!s->ZLF|AzjUzE%H){yY7T`fh!gA>1&|5O0`iP#9(#Dh=BWdklXw z955U)v>HA&v>U!NFvgpW_Zh2Yji6Q)z9tEOI))I7|b zY+h@2n>UyrG5gI&&1cN#J?62NL`%M9v*iuTDa%;vMC%mm1J?c4qgEy!XDsy07;Y|i zH&?~|nR|hIhdaT2$bHJ4;jHo$dACBWG$`jNA5lJ`d|LU6vPt=aa*S%a%AmSWwO#d; zszG&7bwu^Cs!jF1>Y6HCJwhF$R;tbF)#^>^57gc2NKK5!Gaen5rzzHWG!>f1HAgfp zn$I=oG{0!BX~t_8YVXzhv`=VX(Vo;gb-(Gv`dIYV{rYe9|Iqj7CmQk$1%@KSeFm@L z8N-W)Cd0diPYjn z%tKQnrNMF<*YiZ**e>Li`8MxvM#f( zu&%PMv)*IfVBKu3v_5LBvF@@yZGFM|H|uNGx2^A5k6Bx-pIXmYFIq2If42VOvC`e+ zSxk{p+(a&sGjKNU4lb8_fUCre_zU+UcYr&HIdP8rjbr68@)>fwJQK5Gz1$;zME(k9 z#;@`u#Z1L*in|qS6kbK8;wi;@imw#EDdLoimBq^El?Rolm0u};Q!Y~-P_0$hs&Cgk zrFlj3ohDtIuf0d>c>$w)O#6{mqciJLbhqeM>fAbyZmaGI-Ltx1bTjmG^sgH}FoYRn zjY&p1EIi6&Hf5XcG!>Z2O;4F#Hyt#!m_9PKn}(b9=3?`+<~PjqEE_F*EI(ROtZ0=D zTOW*g3b!0ayq?>{ZRQyHAo&RSX!$hxQh9?%{;m8Wg-`Kk#aoIG6`v{|R6eZSs;pCX zDu0H~t5wCYVw=>gL| zQ-i75)NT6BG|j9v&oN&#|7;#&Ny2nqXW3w_!+5`E{Tw6xPwELc0s8O3~OV-WRE!1V}{5qw6v3`|4+Nd<98#9cH(TY5y)41K(VEh!r({AiA z^2SayrQ6tJyvdYiN;f%7nWi^Qe}{p7Yw9$0VX&gjR`WV@DHh=}^JcTB!o1a7ZQfx% zXYMfb=1y~$`3cNcG@}d`@qh=h*s^k&e3rai-XfPP%!=1wsBMa072}jQDCa4&FwIse zpH%KfD=#a5QeIX5t`w_=s3KKkR1;K*s$`WC3-V3avTjo?Q{Am9RF$YUs2);1p?VfZ z{fV@h>>OA!-b&>i$wO9Rw`YH9h>f`E@ z>Qm}3)a~kT)jz2JsqR&aHR0&gv6@MmX&RMAud!lDU$1#k^8)7i5zR@>DGjgb)MWVb3GYsaQ@=m@iwlQOm1wmj>Xt6z(3boZH4d$?f6x zac22!`R($R^5^A8hV)Ffj}eO z42_1*44sB6h6v+W;}j!jywmuoai8%$<5!qXX&#dY>-{^Xo6N79H5M0^j;AcctY27} z-MBV`5<|F=oReF}Es!sg7s|`zHz{sWWGi!(>FB_}s*b59VFi$Cd>TL6`iAB+%~zU$ zW-u&jg6>H}o#7<|y|<8|oKHDCn(;~m@TKCs;%mhv#rKMz6g3;}7bQo{S1wfEu3V(7 zqL&J-xVHqU4%ny6e5-l6`5o+IzneLW#$&NqN-fQpX_qWRvFj|emS8tIWj$wQx`ZW#nTPqA zi}5+keU2q094qSrdA59vDg#UBqpEr84E0*|7|b}o=Dem$6QR99y9f)%@Pw-Mm|}tkROzPDwiqFC^Slu zCK9IphTd+dHSag$|GG#hd@44_8|6y5R&JDAVXJfHx5#gmXULao4rs1wT68b!y{7Y~ z$IOf^*wPaDX8BdsZ>oSwtRAc$s*X^P!UP(Com^?$c*W{nbJLNyif0N@=C52QW zS7a;PiVcb~#b)@hTNTxc9g3Zb3kqJ*spwL4EACL{E6ar4uUff7`5N}@5vmxEDoz!z zN>C+X9pO|e)h5+uRfTG+s#-Nj^SaJvSYTKSkE_FQ(73_$xM>d-y5CKAn3-JMph1^g zbSdTsNU36c?Fx8EF;QKwUWTQi30~emG{0+xVe2f@p3`<|MLL7drkew+xJ{R%Tc*py zwppz6+@sr|dr()Q+lF1`PrBXMS@!8()ivr4>W*M9ZPlH|ZhAq->weI6>weWS`ayb` zexyD|KS7_MpRVWh8ofz>lRizqK%a@7C|AEyU#KsEqrC~U;1PYb{s}nTPYW*h0sWg; zmyYUN^dIZn^yl;)aK5`1>#yi>mnkd-Cb$YU<-GOJIi%&om@B9%So^~ zMakoEUP_j$}er zN@-e$HbawE3M;SBR%>^{z1XX*hr86QJ+5ulwrSgOFznLyXc?VU7paTU#p{xE zoKAe_VeF#axG4@_UGkA&gJ>yu#i z9-ZC>t9R&gVDnCWv2e7kfW7b3?}oM4>l@@8%?KbU&(^+piU}`iq!Sy_1Iu7@{6$i{V(^>d99i~W292`)U z$6|v$=fIkaVapY;vbE?}1%L%A0j9=%Nl? z&9(d;k}mQP*ZA6!sK#T zi!q^gVk!ld>LG<%SvnvyF;FE3?&)T3H`k1+8F@VBfK5*SFLZRTPzL+2zznFC?`ZhY ID@qmpf3^A9pa1{> delta 56093 zcmeHweOQ#$*7tqSfQ&i{Dj+Hyhx3ovbJ=$fz*PkFuh&qB6s@x(zyq%9M(n_qXmDkUHmip7-VAdjEoR*8c6a_TFo+ zz4rIMr^~Xler44osts)W{l-}P<(>s|>K}{V(o5@C-LvHo&s(q=Wg(UvfOdB(c8C$oCU&=}l^91nfvw%rKv@;~1vZ zBc4AN7y#;WO=PFC?JgZV-*@*50w&T`E(jGj3z##=k=(;Fn)|0s3hr7cU@V@Jp0HlP z>}KzuHqq72id1oDSdX$#fIj`RF-#c4WX&(i$+l%POrQ;EUC6T;__=F}VB*RsHKK(A zh6x5H1N;417^ZptT2IlXBanbf8MK30^Kbbq^NVux3sy7C%Rxwo^}N0!!Jz`dMpkl~ z{w}2<<l806Btb+U8!y0(jz`f&PEj(-CnmlYA&&F}jde}ssP2?UYwtQRW zzMg1alF98PN!}>#n@Ln>ZV%&J;<_vv6IvX@*!-4q*_3=DzULY|#-+o>d1CH4*RaUnw4)%q_$?a(3}4%kr3H?iTQ_!^0yLR)_QR1*wY@ZE@ybYIj_A}un*#93*_>&Qm ze>dVD=prLxk~xd(IdSM;O-MmHOxWc*FP=2n@<%k2$GC&Scsp(5tr-xwi+kKb;q@`D z38TX7NI0FA?p6k3>2L>yfORS}u|zn#;i(XZiLs4>w$X6#GRJYB;(vNlQ_{HKL*P#( ztuOK~swPtr8o}eWQ}_a3B9$-jFHfE{?j1* zaZFq_nHDSkHJiV7GFfKBbaj%-GTmhw_Gz7I7ebQwfDG6%N z+!4cc`5%=mxvh8-&%9obS)%tpPQ|5sMkVsN7LJa!H!qA~7~2FZA$M^WRHK#=7#Z#$ z5-sD{ISgYB|0SBSihhb_I4$-S8pXx2R&FMyz`N-96F%WSKOzIug$!S#;~ho@HY1;H zl9ghd)2mpV$K(m9lSnFWUf$vo`OhCeerE3gUJ*e9xMQ?)R2CNX#r`1~93-B>y`V2G z2macA%66ejgqgCuC5;r4r<`4B+>@{e9S}}o*<1>r zzkM$xo}uMb($I2$;%uv0erEd@Hk(@+faXHbUA6pTRcU)w=~>6We5y)aj|7a6A*yvAEmRm8)=Upa5BSj8zDb+B7yKj4xh%)+wkO=8SK?Dl#MAU46?o;Db?2iuwju(VJ>6#EY zIq(%fYUW68BDTn7EotemQ}G)Dr_eg$9PKz^DL=}HP-((t{n4WHSUG;cKH`|kuLb4T z%67ysTN_udApbh!MRi#)XgM)UmpY6gG(C!axo0r$Qqt0`J{V7LPzm`L=_!?yEAFNR zDv|0@(mAKfNTspl_-LjS3tZm?O4*NH6M|-nXyvHGDaVx%l*iuXdNJrj_JC`9u+@GR zo80N^^<-lxr#Y1tWKWm7u_KSC&Ugqz#9*|s32EFsDZ?yWChWRrSVePj$F2l6&nKDN z1eapW8fgi`oOj#a_NYiH!%}HVfWxR4oOCby#6~z zFxoT8YC!%pD`ntvdYkFLoz!}S&p>Fw!N9Ot=@C2vL7vc>k8-=j@kuD?JUWlcf6zU* zD{U#~;Nodk-&c!_|@@W%)?9?^YPvC_{%ttcuq%Biex~J z$gcQKWJ*P2_Dq~dYs=LSszzM|u3SNeV~!qM7Q?)8?A939`H3N-)3<>+of+d|Crzf+ zBW#jv6cq7C9h*9tyU(?9(u8zK(4oUqxK1o(Si^Aa%po&cI);0r^5)xCto+mE>kwm`Y9-`sIAA!A`C#Q4 zoyYP!E%&4=bn;}%te@nj)uhATFaEmB_*E zcFmbGOPSx{b~`Owp;+KX%Gy@@fd$$>*ru+LsLPxAXRvT^EVGen6eI71zarvF>)o8zX{<%=pgmCFRN@?t{lv;all5|xBI>R_kw`?jsE7JQ zfxL)%rB7s+@G@#ppUlFGXvp@7Qh5;#*FI4`FUssMs^CR5DEo5l;YB(9MUA}3>Jiy# zLNV4>UPg1OuO!EdXaM(#dU%nopQw)Jn>Ao~fA>TYS4V@ZuV4}{qQTWCvhtz|dw*Fa zFQd`Zmu)XEq6ye1YUD-L{Y9sF(VqUIv%F|`e^Dnd+S^~$W9Mc2`pd*4$o~EPMUlMd zK!1^e7ai;`O5#QJ{Y6>4=x{$#9Sv)1A<3ylQM~)B`D`?wdRw`&t{EJE6sx6}XTbht zo>Av}*TCMzqjw5Yk9(6wyNmlRYu{rdSUqxQQ z9mB}qA&Nt0aOR)f?z}17g8+{OfBTLeMx3Q0`4`&IKVXLIaKwU%u%4dh_Ho?ip;f<;OW35SQtdyika1yyKd? z_$}v&m2fL+!`AOKBF%Me<`f~})LDj*UIi2XqQJFjmZ~rDG`gg}cGuBaL6KC_xmZ*+ ziXwak)Yoa;eSgh-ZC18mV~i_1GF4C-<7$X(ByQtuDZ&2PFA`+TiB+~!2Z(=!_i`^X zT;NwYE_69ZUB;TVeMzBhx?@{DV+-Ya&Ou(7k6VH$FO1vbYMnDlP!!|(VNMpoO>=|C zd9E*G^Zww*X`D(ebNqX}}t1+>PQjRX&SDA&&0g_>}TrSv0II`E}0Nzi(wXae8U^zI7JqUipTMs(PL#Y_s}n}vT8Ct0bP(pktt)Ov&j^LR4IUP zlT<;cI@~iV4#2s);yHRGY{CJ$NxDl2pSzM|GRjyciy(MPwus;-SqdphkhjxWd$vL| zQvzc>=T`4jtRsnMAyoDyy6#qNiR$BM*`w(f2n+b^c>Tqa_0k!B$r{$ExDzjq%1Ci} zuRsDpmNG+765~3eoIkt7TLE|RBff-ho;U;LLvb3Wse~sZzJBg4*BDjtL<@SxbCRz` zOPORxr>OV%zE5>w0og#$Z*}y5g9+iA7m~4}1$TUUqRkUqv#4WzE_x?auIokhyeXD8 zx0{-~GzBN65NPOqal~s59&i3o7tjkOua;losiC`W^r;dq^)?!rc53E^-$}ls#Ht+{ z#PK5OxE5LTEnSQiy?%9lp_y=<>+iZepE}CLE&2g{&ASjL^)ESFJ9Ql8;Js1axf`wL zZ!NcJZ>9OrpuK}l(D3P8<$Gvga}Fww>#dk+;fi=^R&UQ5kW^=(fHKu3B3Ry%7hr8( z8RII}hs~sPw~`O}r`J3|N6?9Q5tVQ&`H;k}SM@X_wrXJ} zO|uH5d&E|J);MHRleSKiGAapHGsO$?SnJ9Jy2+h{;jp^cuZ~hWg1OHymwR`s#b$}U z&y(20u!cE)7uZTLdHLDIJ0#DoBv*LfzkBnf_U56qj)nbux=)X!HxKXT>HRyiFOOK< zn~6F`WLY^r84CF^hWUW+a=lf%wpkVgqcK)`LQ8oy5zBlEUO$aK==>IobVLpW=qOwn zT-Pj7Q;`$NdAL5fKY4|e+^yAj<6|5>@1I7VMbVGU&V=~tLa`XWEKYS8UP}cJ)%5+f zI3k7n0n@a~homlXOsJ4>Zj8>qbxGefp2q(nTa1WhH@iw>XWGMX;!4X4TtTCwk-?(e zF-(8F)X#amj#B7_?CY=H?z&Jg{FC^UJfE~GC8cqv{*>UGHxbK*{KHqWFp;ao0>{y? zT{rub|4PTzQomYK(81A?!e!zOsLV*O%8X6v%e{{m8Ml(sISVv8A}ET-$VO5`Gmp*# zyjJ~>Mbpc#HDmovj|-<6R2sK{Mmg_#Y?z)*M~kK!DbKOE>6C|yn@e*&D1N=K9j3_N z!pjKqrZ^|lcu7mgBdPofzM`d7#kadZ!p>gGahpGZ8{<2*$}G6o`%*=Hm}V_Om$`#l zQ4$|i+{H6JABr5^>HgdE&?jks&Ea2>0f08xxGHZWXT_HF;g9KX7^2&ML&{>xcxwDkRw++5B+azcFMdb5Kb}tNSgGgK4 zX`7tqlX2-aEDFQ~xcPQ^W=ozB`zo(uGPzcN!giC^NLASqpxmuQ)OGwu%OtW7iQy1J z`DAyivkT{ipjz1InwAtSM4Mc?q=aa=yjbva%cVGcp+H|cZxPpaL3Z^+Y*aH^XDk+v z{Gcj`B-p=p{*!hyJl+R$8UMnGE`Zp7=Jc@4g3OT#m)IAkXo-9Thp-wtrE)!jmFiqMQg(GPjD3`}pen zHRt~J`bu`&k{7)N$wKEzm^{Z}B$d-|GTTe(u$9dDeM(!~!T`N*fLHPh#wH|zdfz~= zMqW2B)NygCiedD~66ptiP@0;kAM0+Gjdfz0R-Sx?O9N`a@B)=`7qkLS- z4>x%8sXyc!=cD)a@k$~*5|Q3lv?H;BYSe9&k)ff3S;A=|>8e-gP`oiX`(-#_KMf$}|ye9kg@#crw1K$ZM~X(@jI z8Mk^htxYAXw-I!&{(<1ToKNU%`9`juxSm`)El`*bcwY>`J2oB}gDxWBzBR?dgBT@# zw@)MQ&AB~8cpB{X+am)!no&k&e}~vW_*lm-&&cPOYU%NVzka)J&6kec0n3+R0$oPY&{j+DG#=yrqS#yLYuwzZ zP*-oET6)ImE#Y|;oz6e32Km7DWKM+w{1-)PI=%1kd0h+gCyYh9$Fuat;}PGQucN+r zDE~3O7gg(L(#&T4|Moeqnf^y2(6xv1*FVjsYPRAq)PD5@{p_PS=;$SZ<7=VgS5eWN zpa1R-e~?nEohOb^s^gMRk=W5L6rOa4Z%?`l#lm+bQabQ7*y>CKm7s&JwFQx^#`Qo! z1j5FG$>F4C*CJ*CAQTh`5`%m|Jt%M))am-QV9_|r=ya3TWKbrk&~-~;uKr9Z$*+Ef zswxnLu3n>l^%ymmjA}!E9f5IpAMhh z^s%rVht}=3G$E4}<2q-XF^bMG{<~<9<8a~{T^vT=L?y*b2^__X*psezi*x*2%l~wA za#=PP&miw! z@H--=(+S7vQ}1XF!#Jb&CjNNjR@8Ii=P~#w+qydzgd!o2t=szmPDY<%Kg3On`}rWo znh_$`%XcX4Wz|^7Ms1}qsD#4FHVT!66dLmpuGty1hlf!+d6;bF!8eD8h%6rFWdioz z=UWYM#xr#lJo?mC0(arF?r!3@pT&dB3Ex&sR2KiMMPde1w-f9M-!g>5eHeV*tl*RB z7X;Pb7)~1Nnwb-2yTRjli6bTP&QWwOtV;6j27k02JbmFJ-ddu@o?jyQ|J%5WXdE@^ zdW+`$Z%v|`b|tlnxA^fb;s1XvZvQunNA|PWm2~HO@g9gf&XUy( z_c^rCw-bDz!Q*9X7DN~Jx!teR59_o_eovc&pNakaG=?R8PD`_rjGI*tepwyjuB&%W z9)1!-w572TqjK_Hb>SB^s8-@>UwJkW^%4uR;7w_63JPTaE z^(0&AQk5>IeQJBDfuN-{ik8S9OJ&4GY@3d|O5(P=gl))wd|S3ZZ7;jFOR((9jTlBQ zmHi#$KO`*kXN85Ba6#o#dhkgskEX{wXSr!)A~o#X!}yB${h>4EAuP%N`L}(-J;;5( zV>T;vz3SK;yb=3Rv6y@92JCzMPlQhR?nj#vDkK^VPPSO?-kk4g|Gn7van9rq2hQ>e z4C^{N5IDQD@)~w{j=^jZfoCv0~$A1$ZZIBnf`0gA2*7 zgz&2P%hpyjaP4-B5CvQZw@(x{VJe*3-auaX+l~_9Nr?a0VGyF6%W$`B9Fm!BA30yr z&<1o|K*bN0-(AU4{*hG^*oO~ARQ|dv6G(zj|+ICNCDqo39?FAOG4GJKpm-;Y;dh!Kfl_ z3?(rln~cW*+Z@MxB2O0D06kd_gxSV9-V5`HeS5_&`F&BsF7)%N`{vP)Z0^5r68{^S z=kJ>|9a{r)B~FNk|HyQ1d5Y(_BgUqUD+WGV&^^8LzD!|2c8u%BY6X=puMXi$@2#G} zPiEDMu_hiLQdlLREe`3!}lE1wtgCP821#k0h4?ioE zz%L^oDYhF6{)bcW)%%cW@xcd~|0&l8d@fN7_v42DLDw_L<+~59ZQ)K`&o{E)(c3lR z(fQONx<|i(wTGtdEfI|G_J^Bx(BofOAjS3|#stS4s((CU82j#_PxkwbwSRip!u;H5 zVKlElQOe*+H@U=}A=F?^i^V%T`bF9I_3nqlLg)i{@GV7%mvhOs@}q!M;*c7Z+y*iD6>K8%dS0i zZ&Sw@`#YyCjPL^sv+ZLGGx2i^V*p>;YGJ-QgQM1G7Ur>cEX=w0VF&omHVbp@Z3|-t zU;B}TSqy$9_=Ma zTR*lO>V9t&zl8X;zR0Gyj^}YQZNPXte#;C!OPab@SDxb2DTZAZ=Wz>xoabl;z7$GomV`kaf9V z-iMCmmT$yr$O*5uEt}Mw$6Bm~7c6*Qu16Ao*>GjIO&y0Rj*B4fgq}}YY3bZtm#b~D zcd&7e`0n$+|FzKc`F(}PyPo=jFZ9(H=e(|y{p59>o_E~`XQz>x%V#6w;6bcV zKKRw}$GA@+`rZos2IR2Eu{f8=o7d5R=F3xpx^%Bmst!_UiivL)pdP!udoE`MtS=LejRzNJz*}bV~%FA z@B~i7`_Ks3%KfUt7X}O_>@(@{@Iy;ph&37I{Lfp0DF1f1f5q#l0Y5eMtc0$d^O<8& z_(}qn^J%*h#89>$6@GYrE>-lv*AjkZcKFgAAv9UnQ5lS^?YgI7IwvW=IXq5(1Sd;UkS z$=}Bc+hD~*-zUXlyQgoTBe?S6xSUnVy?5Im;pv=ZouQO) zsHF2vp%5n#m$Azw#9{EzM_u_WABp+tTQ(P_`8W6^c7*PV?@5M9@gju$GYxaKZBp}$ z_#Xd;X6ItDf5WL6NBQ3QluH%r7t2ij|H$cVRLCrL*N+v*JmS}&?U|{ z=lYVwG?x84JMtJL(axg}JHDP;V-8}2YUT}RgKH)PvY7#m)PFmlpy57>vei5@oDHp6 z;LjFBfRleAR8o%EOAk^GGw zz-wDE+)Ac}u_I8NENk;v_9QR(kA^MxN7i1u&LZAt*M^xF!IELEz+S^h*3U4MAj9rK z&16>)AdVSpJqMX~d0@@`N_JAi=hN85 z!g613OWpW8nmq;HG&=PD?GL^rY36+n<_l~Kuk@; z95$Nl{%sEX5gk%$-j8Cx^kPWE@p-kl6yGfsX;VgVGzqwd_yB^!$t~9*3z> zTe8Ws7)1Et@Qrl{(@@0K;AcF`mho?Z4&$FQ*DSTLQ$swmNl@VUbvVAgTks*DRMk{k z*vVQ?>X`oO9qnPg$^&tE|BLe8tTpFQeDq%`93IV1{m&`|y(-`Tca^=3tI0-jugbn^ z?v3uNrcd??WdFGuwzrxBTz~&!d2clh{xR%&eu=HI$D-His`%eNlBTuYjQ{@WzkMX7 z)c^QM+PM_P!gq0$4$LR)fh-b{q?@B%>k4KHQP{P+Uz0 zaDO6O%+dzeF#HDgIhs2SuOzW|vtuyXJj?K65$^Le^_DJSC-ZY~;}X{T*KzRO61I)z z+R>ZXSB0lBW9`Xo@Yplh$2vyo-z<*6$E|7BvyeZP%uW$rMC4>LtHR3k&{Fm`0T!0! zOWA*p;0;-l$zJi{V+q-;Lcqtit!731KHN~XhJA>a)l6TDr3jYS99qjh!$-37*lBzO z-(uB#V1D;Z^9R`fGSm_2jt?XNUt9+YJum z$+DQ5fOV`y0E}73ns{xFb!_I$vKZ!z<)TNg92MCD9{sBQi+S~se7U^)Q7~7I2G~Y? zF|P@QeDhfq{leKDfmZ~CB0i)o+w+O_~!84hBn8k-Vd}!svd_FAXLmMBK@ZnZI zEaSrpKCI-!oqSl$hr9W34o$cIgQ*vyB=`0ykj zp60_=K0L#RZG3o^58L_hA|G;m*vW@oe0Z4;yZNvOq1_#Un+J@b?=$dHt{9^$*$Uf`;Y~*)V~tTCl}GMU8)XI@ZEGaFc~O{IKOvxxkkd zZW(K7c;Q2K0cH8z?}Z%}W)Tz3C$YXY*#+!4j{*ugRzC9mgj=Wv0VoBeNEa}QXyO>A zH%*LqAgB^)mC=HlrwZ6_>|xiU84-kKK>V|^OdD_nP~yhF&Ic?3jsdm;$C8|hVaAah z7(#O31fYeDVI~5rfs=rYAchGA6auFJn}AbEF2p}G3^)m#20IyvPYg31g0NvRPzvjS zTYv|EcLI+Adw}gg{6n`4H!P;Xypg@+?;kQ6|E?-X0y2z_X;7E5KZxv>h!}?2oe0D~ zWyvH1eSjH2U!WB@3|I*C1C{`J2LQ$^V05}?05A+8%0?AR$ zKsd_IoJ52?)e0m}wE@ZP?Z6-)2Mh*w0m*~iK=L3n)8j!AkUSUwBoBrFCj-NPLz($}J*bFqGe&!@1W?(DO0&D|D1KWYIKn@rO>;f(Xb^{ZD z%&Zs;a)uEBZvX}WZv=(_mjJ_nHvuDo$v_D(4X6R811&)Nt%xKdk_${G0Wbr|Vi05k z1wboM2($rxfEYcDFR%>!FyKz0A8-#)1l$iC4(u6)F#)UxF9tRNM*&X)M+48y!awfk zk4QTN0l-e+7$6PRu|OKCpdPpv zXaXJpnt_Lb7GN_l8h9EQ3v2_%0lR?lz#ia2plA-p5YQfo$Rb3-fHwf6fHwj)z$9QC za0xIOcoQ%am<+T6ZwA_cX}~gII&dfOR^VP>F7P059k2;gX=*aI8^6wSrp0R{pCfMLLKz$joKPy-AG#sMb+lYx_gR$vs+2Gj$00!_d@ zz*yjZ;5re8dlMoomaNl2A7C5M4|owc9M}aM0qg;afubl>1Plb~fl25BT|is9=I3CV(B{w^Z_;k{eZ`S!-1{95x}!RF^~gJ z1YQQ}f#P{6k0mk`I2;%W9061S#Xt*iA}|T42j&3nEY?gLBEx}Yz!AWmz=^;;Ks~S? z$cABX0fz%m14jVcC>?l_(!*g0r30DyNC%36dSD2Ujf6qK;XnnY11q!lt(dRCi9idGR>c^Uq4gX#GKr9F1kw&lf2rWxfLXxLf%(7} zfF;0vzzX1fz-r+8z`ekaX}LXs$cMngz^8%Dz*m5$fyaPtz_Y-MK-v$wfTw{yz&C&* zNepumNE;AsP=Vkd1x5ka0h1x89WM_2YG5J^W=Rg-PF~1Fq!efcHc$dG(3zkNd_2j) z`v7->r;TS1@NQrs0?z=u zf%}0zvKZ!BU;ywdU?|W9j0Cm=6~I@47CR#EA(8|<56l350n7oO0~P{518xN#2WFu! zX8@!EXZE_aVbbM2W@47VuMn`QWpGbf6p!ECG+1%O5QL zffe8%1Xcr=0#71e0B|q(n}PMvGmlI?06`H1tq@3ohryQsL(n1}uo?W{C>;vM0O{Cd z2A+nz3V0Tn0&GKh8IS|N1y~3^0(cqx?LaH|a3G_IVb%b|zXI|d=Ri1d^+$na1XEzxD&_${|nd+JV1#2 z!9bA`t0<6x{_%i7CFcJ!MCgQS0EQvMc3>3nDWC?p85jrL3k*QP3BY9Vw*fQ2PX}g# z&jDJ2uLDbfEx<}(J#Y`O5qJRj4zLk;0vIwA^M4{DCn3lL(mHxz&`@C0G9*XA(sP_z~2d^leGny0sbDK2)qVp1z!dXutT6l#0Eh* zFcJzvfo0%V0Cxg6040!52JQjB64(V^3EU5UD=-m!7_c7vDxe1Z6krqhEFcG70Xzx* zabP>}E?`O-5@sRN4Z&KVk2;1a00sbsz%by~KnbuKXaLp%6M-)RQ-S{gW&sxh^VKm8 zx9wq#V;7g*hs73j5-Ife+zt!VhWOTvSgM}1FkRkw*i#n9N8^!eGA&G~H(ruuVHDoD zs^Ob|uya{^kDhjZ@ECQn(?AL5PQfVjql(Mn2h1(WPeAWOG;<77m=H=cm_NpGAMf%bm!Zp0U25v0mO z%$_(Iq4CDaxJ+-H^p_wWhnk~lwc6uls6ewfP6aM{<5YmLTyH@r;)~!Pl4}rOf>r%y z#FP5P3%zmLD0U;BjQnI=Gve_mNo6l0ZYPEke4;%GRB@O$PMbxXH%`Xod*gA4@At-O zgdX$8;}P%l#>ra&F&>>XCKTQ{?R=@;IE=G1HZMbcv&S2!er@!|scX)9gV~8ifQDngx#A$?*Z`=9!tFyQX&fsy!=-F z6p`95o{2d0+xZF#Jq#bOKs*^HkgN70PEDjk*FnT@g_+dclZfBw9T4sP7XDl3bz4h=_!#fRZkbvD?bN&@JU$hmwKn9jG&jaE0hrlrJtKz>) z4pj>R*aLRg(sv+)Rf)1jS$Kz4)$XOmX=wza&! zc059!sA)gK2KrWvu`qjTem%lY^mhpeLms@HUG_e!d zlA5Kj!SoyMy>8{Qdp)N6Kfdd_@E^M@HTR*w#2>0HUL6ecOuzV%YTjzWZ#($f1f%zQ zthln*W8;dNWv`>6hyHoJ_{kqU6|q0m)FO?&v*swlyEW$tnrqlM08KU1-e4#BjlLtA znR7?9w-3UOvyZbcpJdBe`@s?De9&P~Bd7_~3_1on32Ftkf!aZxpl%Q|5@%OX04NX? z0ty9%fxHBQ9K>qY5 z3-f}gx`MT&3 z+xOSwmDlV)$%Zskf6Q+5@vcZ1$6vAbOpUU z&VVj9%o8wI5E<48OF=TwLQopW3Mv8Z1nmX=qfvknv36Y33xYoHHbTzS3+|s5G~~$t zS4Y}UvI)7i*n;)zvp3`Kh-1rjQJs%?}Wr@zoDWYt+mPHrkE?k?R8(p||Q}mLQMa%~^*+s=I9B9G3nyGKH6UL9A zM6chxO@@;`sksS>%2|{czhU#*qJjZq&ao%>UFUN#T$HV40C%;c?&yk z-g&PjDY@&j3#|o3xv(YzqXMHL7G7JsIy*llceQP8!3KtTqULA|J3cpqU6^gl&W|oE zjK|oBX5>QZG`<$5NVsSds$QEDOQS0L_FQH^4d3Y4WQN&6CfpRinBs@{L7hz9#@voF zak;CCZ@)daC>a9?E@9T!7>}`&>@7ZMN^HS~O$GV6v9_Xo&v+rYH{ngiw!&f?UxlaF(8dM4(H@cjhw8I7*H~XDlozTAxjwch%$C_17gX2J^KryC}D>x`%{Gv5T_{ z`}kBUlT-lj=6Z%MW9#*tN6PddhuwC|rre^q+%?&lD@n17a34rO-xRM;*|N!&yWW$N zhTsG2!u;Y*RvPp%#cS5&7GV;e^{g8n-@)bo?8jq{pefU7T=tt~e~`CHhj%%MpI(Cd zif8tCJ0TtSuh{&8P2TyBE#m51>~I0j>Wf+WjuL|7Q=z0vvRm?qRiJu7B~Xji^VN>JIs+ht%(?KUWXa1ZZ+K<(hjnuWH`X{8Mv9pn>wnsZkH&!=Q7p_z4?$SAQ59w-k zPwHOQy{S8)`%rgQ_mj?`U#GY0ry5j-Si?<*m4;lyX2XMqXACbHUNsyyTr&LA@SDMH zm}1N_-frAr+-lrzY&5=W{K#$`X&P@zH*GdOV>)gcX&z$^F|RlO-P~wq!f$qSNKB}XMEB=1V3QkC>u*)OsvxkkQKUMb%#uaiF~Zl6;f zF2&P|dd16%V~V#GUn#m2Jqn?6gfc?8K)GC5sC+~Dtx}*Gp|bnKVQH#Ws*S2URd=gi zR=uuzN7bt8P<5&N)v@Ze>QZ&3x>o(VIz!W;`B^hU8woFcs{LBaY5jGnx?6QCb!&B- zbPwtt)g9EmsB6)k(p}bF)A{J<>ot0l{uX_<{&xM}^p*NL{nPqa^hfn4^q=V4_224$ z(Yy7&hVgd86hpW{YET;z4NDBm4XX{MhAP8-hB`xo;eEr$hOZ3Y8@dc5jgySw#wcT) zG2OV_xZ0R!++-{Nk=4SI-=6B2=nLjgMFkiBp z=_&XuPCY`2za&U9Mxj}nn|l2j^-3Vt!ce!x9KaB z!i-i~aR0#|4wfvHq)S#yHb`ueF3A-MD;*}CAYCkd$}as>x>M}6&DrXD}KRf4p0UwHOfTge&rPP0<}_o zgZd_Qs(Kmb`*yWceZP9I`U&*`b&J|hbE{^h=5bA(=Ah;U%@NHTn$w!unDR@s8QK$C zdylqMw_Udv^Z9e#82u!)ajsscU$5V!uh3WN@7LGp59puOzodUve_H>6{xkho`Yw#! zbju8~@mFJr=@wJADc|fue;+ZQLQj8VzHI)Tynz+< z06;96D2b5FljtOIc1a%k_yA`1#}a{bqpD0*r#h@UqB^enM0FO^@F%QeBK2r>usT#d zT|GxFQLEG@b%J_{I!(P&y-{7IzE8bJ{Ww~BNPR^;S~E*isHs5LJ+HZ-5om+8bG0{U zv$ciV@3cQ?f7MEKiMl^@LcPB}S-(ne)9=vR_vrtjZ_>YqIr$U%HrNn}mPyS_7T(fe zX_!PNiIb#Btdb`rDY9E-t7LhyB3Y%%rF!1ffCcMQ^z$z!#+-{bGKKh}fHG$!2c?&! zd9tmt%d+wEBKc!jnlcsRmAjO0DgUYbUCFA9s#4Wgs$W#B+Ru(w%~H=(%h9ShEH?M3 ztJMe9uc+TppI3LNe^d)K!J1HwS`)2F&^RN&m4pfMN>7DJrjA6SH24WAm$8rsoWzQzT{Y@^kfZ!9$0jP??& z#bw3{<9o(4#x~v?2)0@eIWrg-oHtmbFBYuE(w8qZ`BlO>$T^!V|9mgVT4m!?S{mcA@4K>KIQSI9TWyA=XVzH{n%nnyH$*VJnoHLqY=ozq^@ zhUiLlJ9PKz4q%TwtNT{x)(zJO>%;UnVV~TqKcashvq@t&6kxr7-XJqRX`E@w!P4P0 z{b73B{3{u<4{xaefh0qcEip+K;WSVvmC5w71bMRD02kh;Xi$v78qlr0OXWaopH`hv zy|4P0>Z)oM=0cUurF%>V1@)M6fVU(Fn(=}Scvtp;>|@zyvYMQ)SxsOmecNfplQCee zZoO`!u2@%7j}-f5{nHpR%Z;UEPdnau0UF67NrU7~2`3Ge?Udaqe_KvnTcBuFe5Oz+ zE$F9DmEo#0s=L&u)z4~P*PPdk)K1q*wKnYx-Tk_U;f#wqwce?JT>qthgW(>-8H~#w zgW0&$_&oNnAB7e-{%&MVzSwL1O+ls!rc@IX!17-@pOuDSoq9p@iY8n4GX}yg z{e${ieZAgbNJ0Bq;|$YW(?{gtFqZ#`MlOb(8>hf=cIkBKbJDk@J<@k%Gvz-i84U5K zwNl;v#yTS-@hBgUO>wGphIE#6u5{?COa!@mtB zvqqDzDbkc^?Ax%L+@q<+jBz+3^u%K;mybg2me59m*MItwL)#cwvvQ{L8dxKov*g3x2h}DJJq{!JlKzo=CHa+ zeGJp|jQXtlqPkOkS>2=d(TFvH*mS}*Q8*D9G;x|FcqLPl1E1J%GOEz*)aT-1XI-72*u0pp{m$MsZ zmi;($AJ#SLj_FQgG|;lriPLuvPTyjEpgvR|u8-0yuvNzCldw@{>T~q@*z~vJWV2Jh z8*A`>{XzXc~Re~zpU@UIY4X(G=v&zn!aVr>|v%zlfsl-%fs>E4&k7*xH%Jrs3Q?u!$snyhGYBzDFE>ky71tN0*t}}E(mY6j-A19iV z%^Bt_v(;Q^E-{yxE6vsBJ?4Gp1Lk^Mc$&>8&8_A(bGw-{cbU6!0TM77EdA0$B9a71 zLL_05NSux}5{o1eOJ9a03&)Z|Nr|LPQYopH?2+ulIjUaLC~20Ql(b6PB<&Iohs|yY zBNa&lq#@EUX{1yl)krPUMBFViq*+oc_LCB6nY2<`E!`vCCp{pomo`e9r6;AW(l%+k zl*24u+AU>dB3XbeL>4BClu2Y7nMIZ;OO|EGvSe0F>=I1sN?EmRk8GdpfUI8DC~L;~ zpcPZRUB=0}WZg1GF2Web2$o06C2~!TlVi*0GZH+QVi1>LNS5J}wG$_w-8jqbRqw;l zJb=MjkKx&f0osfqS`*yC&a|Jw%;?m1V}t}?d_-b&STHs+Ffs};E-H1qb^9?y2lsHj1j8T^gCGk-pacV;8r^>Yo!^YEZ$rm-=^2ZF|1HNp`9XQ3{FuBI zXK+q_83!3LR-$l)L}9Qi5*4Y~U-GdURVb|fe3R^f(up# z#>j4rkHZ)nZ5R(B7z3&3{}S|hBl;OVj1|jME<_TJZq1V9qbql#6PwV59IiW&=qfAL g>uPjNt8`SCG(Z-GE@ED4z)tr604%o%-v9sr diff --git a/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input b/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input deleted file mode 100644 index affda60449b2..000000000000 --- a/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input +++ /dev/null @@ -1,4 +0,0 @@ -0x401030 -0x401040 -0x401060 -0x500000 diff --git a/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb b/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb index 974e565e87f4224cbee60144db23681bf03c0bc2..d26d33a862d8f79c2143dff97d7178ef4ca57b6d 100644 GIT binary patch delta 443545 zcmeEv34Bk-*Y}-!vl8Sds|3l6rMi&jfZD@sc(rKP2%eZOb!_RAIh_xZp7=jrGDyia_N^P8D-X3m^Bb7r}7 z=jNXtnZIK6f<{&5GfA55C5gYeC2T>^CraL*_@jt_2S)f@4hDn&`Y4pQwLral`V?wK-r z%-GQx5|1oqwbcXh-cd&SX)H?U&mExX@L_dHipOszv2mH1DLJ`gMyF+DsBgtLXl1aq zm647=SGHGcNKzu=bhdG6Ik|%G?fCl9F}gSlE8rU#AW5kf--BaE=VneNPE*>(9#S92 zl|V@{iuWQ>@mCq(sY8EclckPGNP-@U6IwzKWeIKaenM(EAOb*DC4uRWe*24Gk_5(l zgDzfAtB4K)m~>i2E6y*-qU){KtYX4o5MD=tz!FcFBwx1e{p32Iul%WNSel zTb7IU5wUj$HZ2$HD`I1l%F~jf6V->38nT0GQBqSjT0NB1uctN zkU_mmEf^HWQq`hCO{4dV^k=0YDOZ#TJC{y~+}uT&O1xhWQ_feB^DdPQZpy}~%?Gy( zI3oC~vsa|OH3_~HL^L*8J)8w-a9d7-7&rVe3=8(CT_mUosq z5UjrupbsDeFb}W^um^A)a1-E<_HG1d3+M?L3djX41grsU27CrM54Z{Nf~f@q+5(0G zG64C21%P#cQovz%DNnkMAO0TbP=I(qA|Mqo6R-fV045aQ6oNc9u>d}@LFJIiyricyX@dv{>gh(9?E3)+T288m?ouDn08m@1?i*j25SSN_+92d`WC6c~s`qEz zW{e)2MHL@7c68>{F?#w8UrpPDv#tb{5!9QjCpRq%JooRT|s zMy}3HdCx)GPLb9%Jtr$IcQjZvs=QoLI8}R7`N9)3(~-^898@RP@r0@#1U`~8DGk!8-C{K~H^gdZO3zJ~kmW5Iz_1XW79^1+)k zmw2pklm$0lIba8WRAmn@zJ~O^DAJcbw&3E3fu96^ z0ayio1(*Y`lBB%+CHS!(5!-+_0DlDhKJYid9{_tnh=agYfT;j~;8%e|fsX@+0e=bH z6!;`?XW*}aX>_>)+#mQS;52|M7riz-+)uz&5}^z)8R*z-@ppJXSM6JRlK}3Yd|H|H!?q z0~`d9YoqbrAJ74i2$%qv0hj}L0k9cx9B>KXgB;Upe>*$pm#F<%K*#-%mXY1tOINV90Z&Mkc$h32O}3Z29OU}03a8)2XG$X=3Shy z*nLCRAXe)j>;k$;>DQ-6O25q9^l=YGijEQ@xjQbY+1YJ8)(Q%~^Fp-{b$@m}c3Qoh z9mbBURi*|pqVQK^r`Bcl)e%$Mvr=`*)G+pox@Br8EJ*N%3uEynr)|jZ5 zH5z^)eaf`4DNtV^%BiPj=L7|u)>DNjJu*egB6S&3`spbd(^7IXVb|5cb3pwthqKa+ zX}QlSd9y2FSODw?oCkPe+SUlr6EFsFX{gp;Mx#Lbqp3xP_$5h?LPgNEG!K|22#bKJ z|GWs?5}52i2KWu&1mHJ;djP);oCLg?`o;bDu>}#sfZqcisfVdQ)Py;a$yH=-)F6h* zWqDU@b`@FNY|t83x70iDZnHB?ZaoOSP(3oco&dw-_8|UWnOia}>2FzEL)4-xU<|<3 z+{n~ELqS&ozGxOT{I%8__dk^7$&RYA1uZ?McA{qC*M6uLq&`v5kabqy#Q#sK#|m0u zuIT+(7<)`@{8&rYKppm26725H$J%>5qwyTMSS?U>d%T{+v3##S}h1 z+AVSqTKzEKI-n++yAhx@ARdqk$N($=tOtAsxV^FdoHpLDiy{D2J%|$-!#r&|rS`V^F6aU_I6N5fQw! z5!iMHldsx$>Hb&lEMM_@{TH_P`QiN|PpBOqh~ioMSj6H3ELe>g8NB$6XD~m!&^>tZ zeq<4z;FHQr@hfm2n$eg2GWW{mq#uHcXMXte{cXM-aE@Q8B9G!LUjuJAie8@_%uA{0 z!^JE@4NnPPyn#jV@K2DOm=YldE*{=Vj^L+1Kze>kDh*ZY>6Dnehop|FA$&m{8ADJz z&@Xrw{~fM}|NINw%pr<~!x?-537WAQV*6f2ZoUr$FRm{Kt62}An1lN8h3+DqPrwQe z_yE8cP8rh>X&r$6qF_SMxYQb$)|YL7BZ1ojQ#ZuWl|K}?JMb7_jILt%>IaOjEcF3? z95@m9N#G&C^MS#Wm;W4oJb(!FZHdOorNHQ_`9;9+4f(5p#{#bgP6yrroB>=4JXy!o zjj21kjt(nfWi%*k0w&A*46qbNUw$w^H>IKGEE1_ZQdhKhLBreAP(&Wz-UZ2=mm=N` zCTlo;fBYuLZ+H4+^5pp20`~zVyW_tMpW*~rd45Hy+>@!hRz$Hq>WvjW*dR5oD5&{? zr>Q?qhJTd-T`-|#5)E*ffQ5+qs=?NJMgnmVeBlxmoYb#`1wY_Ph$=ss4c zdcV??-PsuZ%8y9#nIHQfd>Jy02!#{9N;~`{wSvrAQ$ig;>hXID&jMM zeFc$^q10G0L_mr0fJA`n0>M}mbcJHDAZS`{ML^oBXeF=`+FDb!IANsn{Q8q@zj|tY zQ~d8;O#eq0ck(&`yJ4_ao*2sf)dj_a*k$!xaYOdAT4h5JetQ>(si)uZ3iw6PJtp*p zd8dSIrORsCh9pPIOO!)(=qo{JH(O16*H1mZAwIU7Nb$vV!c(NuJU>Cixq)6=s}2_7 zFy!a2(=chUI$&c`Y{Lq;>)N>6h;SUZqfG8L>M3x0SHSIVZwgl10EIWfo7f2Zqf|3);puwLbc$XhBaOk>Gw&^q|kCS3)P+PGz?fKQhS;yq_KtS zop+KP2Stjdv9F=Aq}UB2wx!UPuT<02*jpmrO@wKwwM&G(Oo?}?m*34{=hU>4hK)Zc zrnZGvMNQk|r>5@nQWx$V>UdDlSW-T%-q|@Rc8o2jK_YBPnIXd2es0o3?oYV8YP4wA zkIv3E?kJtxkh!;7ygL+HIBMsKMX_DqWX zRK!{u-7UhFtY3++44+xaBhbTD)+5_$sITq)+*xePp-9ickavr^?1NC~t>{BP_238b z&gX3O1tM(8%+==m!Z4%nyD!JlU!+<39jzYOH^~`i%b~dlTQYZ3N9@3*dQz7nE3XT=lj_`BR^IL|im)X!dsz+qs5WB|I#f;k zXsBa{Ag}~Ks&4scQtUcgHcLg=5`2>gHx6)m(SZQKv*+(tE?S0^6~ z#irccgGtT_HrfY7*b+EPy>l>(WvR6eENZ7h5!%3t@uyWPqmNU7lA_H$%Z}nL728fzB^%S$rtMmcZYr=ROIO zzgepG+ZUpSA0FyhDhMp8*Q?vN1gR?y4~l)-mes=|Y)SsS2;UdzCM9{Lc)4o1`H>%; z6Kvc=b#9|BS!&$TP-rgu6F)WMXuPw#jXp-Fr!8A@YW>thN5fb@_43gnu{*>tpl#2R zt9@OB{Y_VUVI`T_^(tOcCAT_m2fb-9CL_F{&iZsn+zW!*(()V;wzO<g4hLh?iVReZlirlr?n*WJ|4=JtM!j3Ij7lZ$Lh33D_&5S91oLctyf2W z9-@ABJlWA&5V(q$aAK0PjxC$&dN!7Lhfa`q=T3}v(&m=ySaubwd{TxbUiz1zve$a` z$u9>wzZQvBy+788E%|PJ876=Cwvey&SILg0g1}Y2qOT@7pSET5u%3-2U-OeBU$>Lv zofB-dLv>n9zC$OeBV0I{@kX4YPB81#bcef0bCt~f>q*X^-zu+-Z$;SZ zv=`LEuSv4CUr&lVVx!%u)0&c*-RQjcAKvI*m5wWhyE&l__$JBuwk?O{dJdMnd%p=| zCF+1Pq0gUAc1#h;t|Cu9Jt;QXmPsEy6H}R{7&+c~-kEMq%zoo2%XGH0aW~Pq%}S_6 zWue%n81Rjsdblj!8Em8X(&;UcBhQ4faELtJ@x_~@OjmhNoS786SHxO2^o|I-sMwVE zq}O>b5BCJ2yHu#Fj%PJ%_t>8|fA2RqReukWczvy>l+bd2?NPV>&Owu3c-^ccHTMruyP{ z1D&7Sh(FMYEn%yCAI82`n}0vuu}Gx3D)yc4CpjOt<&dN2U<#`yoDX8(tHaKZcBa{A z2k5kx8c&@MW$Egz^GVK5HrkdtttHvy3t_B>I{(6S$DKFqO&;*WB^~Q8w;6I=3b3+RLHzB=~ZY)7M5zyQ}2~ zo!Bd3*h0E?F;vaHlI-~8b-O0Zu1s=n7qONm-xOh2O-}xiH2K7jnQ<@KXrIw(P3g?K zZE)Z3?k}Bouj*y^r&ra;t4Yp>Y&oRsIavB#bu~;bcwM;r(^r!nZ3Tg=lKWnp6x+a- zO`x95-Aayg@8!PA%jogntJkjO#JSnHudmez)wY*y1eGL!L5ULlxZ+& z*QYxt+qg&Q+-6wq)AJCFQ2_i#Ks=6RzgJ4tH*} zalfi_TUA;6Ybe{QI)B4%&#&>$LK}U7PH$Bu_GXy8WR0+;+?&acL_y%H|FWBtoE>f1 zw9vD$sy6u+4bV^AN{Op!qxI2gO%crswo-a2{!*GUzmj1&^&ohX^S9NcI7>x8h_LHS zanWyKvaAVx__t)oWWZEZyI@9VSm)Ep#7oC)wc?1g_Osc4ty-HCr}+ zzNYoWyQ|Saj*_=ku47gs;g9jo%Yxd{K=K&k-R-M)oc-Ws| z>{DLG&{-G$ne2F45V#1uh_Ok|8MbUD=-HS8FPhI_EQ?lw_I?{}cb(Q0k4K^~Hj?*c zEXmp0MqN**HU;E684F__NW$rkpRvncZX(3fWi~0cOhj86IU>Rpb-oVoET z3d?l9Z)1O3XSXC?ra(rLS&D|jJD&E(Ps*67P5jQBQpNQ5r z8)@a(QP0FwC?DZY8s*d6S&lQpMjftGTLOA}K)}|#z6S)XZljiUYD>T+9xO}_Tt#bd z{+S0G>i9(T8|`EhdAo0UoF^L+yHiA4QWlG_rPsY899-2+3diQ`cU6q_?OuKe>~XK! z*j1g~5PP)<$5gF~RZ<;f(HmJ9{cSZ*^Fm#ouw^k#&%&t7TzkD7$qXs0y~=k~Wq~sA=T+G#=Q)vXHO4WW-jMS$A65-= zUgp!PLC)Pa>Wwpp&H~o@QRSLo-gElOpsV|F6M>4&}Nn`r&K*BL(UnzmIKQkhY|Z;tM{~F4ldGe-FXFkOt0()D z->41&Pl#wsF&~PsrN}Z~;0FQA_!2)hDXzpuy;`R>1bknFYdf@kIyWUsu{zlMJi#AQ zFS2Fvgr0>V^)mjBKXkjSNa*&eKXf}tkXX8%z()k2gLSs$6s6~6=(e{As}48mC1o`P zF`RI3&H~|t!)@%oI=fMqM!YZ(%5Fqr`|;917T@^~@l3%|))f)9I+%yZ;tNsz2I|s3+(Rs3inV;iT@tDSXWLCnj1u;KX!rg<}B=(3Psz2RE^o$4(#MGv% zcv3{(^BKN2m?g0hTDHS%9Fw`T2C|*Zqie7vM{AK9Wb%H&mms=+posP__eih0Eu$vf z5rT}G@cJQaNW1G|FRnbtRS{>^$V=pK48ra+A8UDS;O~a8fsKy}+J{7m{!*%*E=eET z^7xKNhB80-yJB_XrrLacW2L&Mj{_r^bc)XkWy4YH6QQhSjd2^vz8yi+z1^kaDz(Hs zn$E|b;?9~Z$T31BQwPwF@Q&cUYO=b~qeZlT5RC$)$%j6oT=GGtofGy#Ij*ce6y`-> zjm;3Om4l{w8{^g8?1Ee`c`-`%73_Cvvej&U$*Ni`RAy87&M?-7P3O15*c)tm$*ORc zA|vQtm*vzSzL~_K2i)|04v*PII7IS8+_Q+I$A;7SvbyX!HikE^$A(152nx+aiKBrC z`-&Whx}ylMbn}w98&#T~`gP<9^;r-OO0W*m{x*I$8-Gm^{s8=2O#Yh0pA`L52?=Wz zeMW?>qJI?5(YM3Pm{N+Mjoo()9YHq>*S*qWkxHsD7Zv6Psb@0M5d zc@ehiDGAmyDEXLKvQ)yaG-iQ(W@Cu{xh;baY#Dql!fqa3(ywL)U-O+!ncw0jm}P9T zv9GkTZ{ulApsa2DNHetTf+j50@u(oNYCMTE2#5#9^_ z%_e^>s_Y~6zj>#8cReV=lmAg3*%I}P7fDuo zHW%UY_3Xuuv}Ar>y+qPtPo5aX{1->CL}#dov8r5Igsm#q<=GKXP+bz$kB3JxH@-9i z^4{81-tI4mu*JtjK4p;iGgDq>W)koG!p6VP#{Ug3h{QPa%_d&E5zb7nkA&eC2^y=S z&vIuJ4EJ>r-4-V|8+p;Jf(-X{-Ybe_M$fdBl3^=lwg`{(@{$IKy)I&%%`Zp63q4?C z?QLTn#qr969~(u~L3~~-=%lTUrIC##UW8vkX^YI#;`yCcYwwmJ{Pl$%J=lH2;NShH2X}8)~-mtOk7U3{& zFR8k>VeGq!Kgqeo#yZ!=TEqv$pz(?}Vm}RTE{4U|A1Q|N8s%m*_lr2>uMa1ucp2E( zDSjh{4XNK&q?Vu2v=(vJjOG;2ZUc@!{E0SflB2pv)n?1|`pYTK+G6b{>BVT3z?sqy z$#h-GX0^qn`V?P8)L)BKOOqe#)HD&*GIhjaBKjT2X-_{M9}6G6&Bn4$XCW`Du@uE( z>c5BYj74{STBKS{Jza#$H}xr=)=p?*T052!J=T{0U_F0RZ`-IsFZj4i&-fTEHJRiK z;^FPl(p_x)tt#Yy5d7&T|9!-t6z#OJIx1ux1lArVYh$WsC~w^XUH0Ps@;>-DXPwYx zPYH?ZfUUZZG1i>ohdQv)&JP5KrT=$D*s|nPd_Wvj^D)01hx*NoL;Y6TSQgq@L}_1C zc9%X98+z1n))9Y_^D!IiR68qA>j+DJdcWp|7IZ|N`Uo1U0b^b3^f*e%5j%EN$|>Hs z6IPN9ZKVX=RZ20mBDdVn`^95kaIzEY5aVv+xv@{P+VYLw7R5C+D^*2gFwps}ptW@P zshyYaj7NDF_i44d8jo7NDQK)(y~xuN%$7P^$zA%a5e}()@z(3FApF;deFj8w4J1Zy?3{A;|hD|*yE z8*530tiNI1blG&fpAc&hkLU)it+Meis*wLU_z#%;Z}OsU@I0A<)T(r{2-_;n6S@n} zGq^j;iSBJHtX+kL%|c;WW?=(KSU>LD1D>&wjX&7V&$D}=v!aq3AeCS1 zjmFOJ4L!uz_#0HnpAPNSludQ?E%(yCTq>TnnPH4AG*lbd&;}+10rm* z7NM{JWa92c1El61n)kt=wMCFyx?U^7_AYYC(_Q+;oFMMxJNuxEa2xAW6|#N=*6k+i za$?1rp)a&H&Bi~rLjE=2f63&Z#gFtw7wIHOttM_J!nR8D?0%w)JkbwpqYztRl`1T( zFA9q{3#&uI`tgYV=pr|FlbS6(oflyjJ@LZ+C`~e1-1xTssO0B1z7OntqG`NQ?rn2n z^fhmuh$Zg|L26ZUz6jeYDcHa8aF_OZ(DNs)Wy=z=N_xar@}vq&ehaKcX33B9SpzWK z4zscLva|AQ15nvCq7LFU2cn*BY%C4!EJDzUC~bsUS|>h#AnM5ksa4M_yYA)W+k(B3 zhc&+8Mg2+9Ck46HG6zN2rDa6P)xav7C7k`oem4=+TBI0==XzA z=@)GLb1LLN0R9~&{|bI>5G;6YiD`mzaYYP6*mKGn90hho}oN<2z=BoL2p&| zZ4tIrRy5NsH+SiRTMN^Ys|>}1wPm*A=i7_tMMI&dRXlDODtmA!D*K3yWs;pml=c=% zD>Ap$9w&Ynzm2t*os}04!>lZAr&u+e9LC}uwFHT_J*;h?Hh%yMotvekHcd_1@V52= zHpKaZh}QN$gRtB9rA}rJZ`?UtEOcUrgZiM2dYevdEp&>9W1(}H9~utf-?CA^tW#TS zoP=boaW?R@WEkO0k!rO|x(M6ag|iXD2m?l7*)>E^Se>nhp1(O#@)DYlJ3 zQ$*WX>f2dFQ%q3YrQzbdAi1zO;!kq=*;o}jD=!*}(n6^=q5SYj%#TmKPdc}%^bt=; zF$aHt6yl~>BVc+8M!?O2!>ZC65w;umMcG~YS~i?ODZi5f`4-z)pR}{`>{Q72GEoQd zrK!+Hu8n1kokeJ4HA;KYENv#QlExl!bQGjk)tb6iZ5lYn%GM%nXBq~*8n(i`D=e%J zSUZ`8Ir*?r=<>hrDBtD37h$_rnt-*I$@(WR18dAN8|&T*S-rseyZH{ysU6ysX7*^f zi{*mBs^~lswwa~S)91`x`hZyjU#&41_#U>EKe58{-vsLlv;4<+!dNuX;W22Up*H@W z74km={zpyzRN_yHiLtRZsE{=stjQ*8N4@H2$D-jMOANuOd2w4ESxnC zW8qhV+_L2ZB5ZH9s$gYi`QP$_beiRi$1G=yjeo73pBIjYLGLtKFv}Sam2n&2Q+7Vl zS|4FsX}kGIYdLQ|flZH16{J>;`-*TyHRfP{O0t%CrzT)j>?Fu7XWFvDlBa+*%`CYK ziH}Va8>^#2)mcwY{LTnyrxtmv6SNBJO2evGlAs zV?aW7<+CPZ#T6)$tU7v#u%)}&BD@C^?h;dXwRx2(kofx6@}B9e2wSYTdBGHn&$qYo zs#CB_vu+CP^QetymyPF3EuDEuznjH<$!ld|cDhs~Srwft!sRQvO2oyn%2IP?j4rGA zkxZQRn6Bll=PF6#Mc9%yj}OQ~kILhhvtU;aKPJMjqOPja@h zvDUS*w&!Wt&`tZT`aW4UM$9|!l^-#G;A|@M3-l1ICsC(^%otWYw(=%Z*{JB_g2igR z{UU5t>a>=n>?PeWB|Ob{PDQ7C*T(vqjdh0z*8^*iIj-y=R?K2^pqzy^{@FJEr6SxN z{IMqgQhp={k{*v2wPgcBf{svf86B1!y_NWMDBf&WL0gQ z2$wg~{rt#-sONru?m--3c~MYUP5873TZO&Cv!{dl6<%cq$$7txwY!aVBrlkO(neBkLizd`a4_M5#Hvyi5iVb)#v*QuQdwH77~?`? z-s>Tj8S}gCk^Dv5BY8!zegk2Tn!+ku_?3s)K<5b??}s+tGM@G@q%ET-_hH|8pt$H*BknLSc!c@IWeQ6ZNzq19RnebCTq`5)O)JhUst-RxS)4JYXl-VZEe1Tw zqIGK{3x}Anh`1;tZasI-L>B$UY?G2&wKbBiSaC5%-276qdqHPr>&LSxi`HUcC6df~ ze`>}>nf1;QYgwX;YGY(uo$sV9ipB1NNHXi4C^mmYoLTQ5&A3Q2+h@6B7P2@k_T-2> z($q!1*!mQ4rqE$xw_3!Ry4bIiY03(x3*i%m|ecq=6y7n&G1weYjYQNZ;yxu>|DMZG$bE?~hG zDKFrrmwQ~*@-lZDb+5Zv_~`<92KRlVas+N;!Syk(%i6VHcrj7B{eadKpW=5o`h*|w zN&x+J#PuWrz+HetfV%?M1MUIb7`PX33*g?s)L8}qQy&`)jBLd2xU|T@=Q7(eqd| z6!PsrHQqL;`er_;(P#p!<=38Pel>O&$q8d7PQwkZQ*yQBO}v)MzGb&d*h2P*EWc2V zKU+t})kFVJciS0I=waPMK8fGdZOJ4u0Q840m=9+wt~a9HzV-;$1daoyfu<8MeoOJd zoq-d8(S;@a7S|wAcOQjtH{cv#>h2E$_oQx%LE5}|jJjR?-|ljM-Q)gVZ;RJ%-}<}P z6`9=)>Y?6N8eH=6QdY^0wc#gLu!bywvmzFxwCTZk*iO$N{^%Z`Kpe=!4N<&r5o_9b zLJ#VR8Mx|JTkg}*)6*i(6O$zE=qYKw4KFG}c5(c}BCK57@C#sO-FUz&EX*-q(Dy(W z(2ho)F6s6PYv~QDX>YKtd30eBU1D^v3yc0kmlfUXuA&h0+R}Kqk1W)EB_I*MqpkIK z$=P-6y24iS6YDWUc!PTv<3#Mro4o1~C8i{~nE5cIrEg&0=Fv4vbHGg3E3E)t57+}Z zPH})-Pq-*xy5>0?&=!z0b!_f~X&K_Mk3W=o?G$;eE|QFYwQ;*B+_3Z{5?tYBA>*4!tbU4V@4#60jBzm16~FU6BW11O$1(pFzSi^JArOyiaqsljSg#Puq!*%8Mwpng{HH^IFZD z?-pqC2W%2Qy^Dpj9h~h3-3~2f2OqJUJ%e}pE>Pq?$|{IA+ryICSA6asM17^veMNLZ z{PrG}%YNe7dy(=Jm6XJ zw^kYqknj8id#1@p&|7Fwp~2-|1Iz5{nyX$2?0Op>x$R&WK|Wv|pcG)Zaa!Y%D~|`Z z-;PJ_-Q|Y7yu0qqHvIly-IPaT!b%9V8E}{c1JIJhAJ7cY6~LrQ!ll!D23tiKcUEB> zAO}uM1lpYTm(ZzyMP~Gph@14V)mskzg#deMkMqd*r2jTt@h zd%xgoiE)DBn26Kz`i%vn_q~Kp9Lq{Xo{6A02B}rhz|(L(1kdEre}FZJq)}=i6{?L= zVvsTl1i!RW6i6FST7eWdO2pBc6ZJ@Vq? z$t%%rPDqLHP+07@g2&V1s#$TZjkuphjG-@gehaluhQt+2>2;2A)Rzh#Ke^aE7FM5Gsfz);lQ~7Yb16Zozt;0a5zZk z)Q0_8`8K3c`VCDmX*tsKD1xksv`~Zy3XjriBfYAJXXs%{r(r!ts)j7IJCg~A0w5>e z64U-Xgx2y?(s6@$dTzaO=~L=OPztpZ)gIN-YLD^}+0D@prN<*$W28$KJ32@kAQaj# zXh=&24P{2!(K0i%qvb`KGw3KU(iX;s3G}~_S2%uql#He}_qX*8EfW&i$i(Op(p_Cb zqa}44w3RR)BW;pZ>(i)!Jm1IQYEbt7teN$@mzg2qE;G}J4MjQ};SiV<*Uy~rS!ULc zl{d3~HZvnDX`z1^WCrH5WV%ugpc;VY7%@;zFzf}s2WEn6=CS`_Z*}Uzy`ae$LDyS! z>0a|abw?=|J4YwpEV_4J3hlol(pyMsKuoS%raNZ^1+-o$QyIkK)PYrn8Eti zMrhqdi|y zr{#jR5-fvWhXfnnQigfKA$|>oK?3hH*f|-@>z`p^fi(p!&eOZBSNELf{Z_gM^N`D| z5}$d7dDUlvYKTb%6D}tV3=XBNJ;RzdED@yP!epr~Ux=_b!rHe-w0h0q%P+GUL3>4_ zb%B7U2q*vKGIIuGh(upJt(65gEv1L@v~OA6T1`dLv)I?DEX7Jrbk+btLUOd=Mc=Z} zfCxdd8s9Fd46(JB^4Ibs-?C5GX|v%J&%$XPVSX@2J{YXuZ}PJ~9BV{bDDuksyh z7!djcDV$zIwLUoG;6*=x(7_8Yfz83$MG-po9jnXQ@>}2GtaYo_c|~$zy)tf1G7e%9tM{-szZ65bu4CRj1gI=U8X% z^#i!__%zBkPs=?|%RNuaJ&$sK8_&{DU1Sa8LQUGohV zse>ssgcn@KBdei&(PeC3W($EVJD9{jyo^$v{2CF3aQ7>$ZvC60tZ+C|?I|IdcCcWi zM--iL?E*gg#rs}C&*~9V)V)92LoH zGbNDw$nqF0W3pwrAzBaA!`V1~LzY_wOpYXL3n|xG#_>2sp3dg-cNDp0z=8@W<`P9H z_ji+%**c!>Cb!37z&G9GAbDLRe`{yuI&2fa;3kj8Q>MP|ax0($cNBetuXUGmWS=Nr z+e2)?6!08RtQ0&Y_n828}bb$2*f0o@<&gHhMC1cY(OU96aD zBHne?L>phUVu}aYS4&tajTT=R(k~M)w?7wwjQ(ns)D!8nQpy0P-LI8^ZGhu|>wsWv zM6?F@?B=xPRO0I=AFRTT@>Ahx?W4SkQ*Ib=qBpf=w%L|m=vzm5FQ?qJM(g`1DFDNj zb!j!Z=A(Q*B`YGiJ7$)$)ENDed=71l;!RQ~lMsTss$BZ#6xUXA;2+K9-~bTDwk5vHL<*p}oR6THEyDL9tmPAo@B{d$7q*Ge zDJKJr`P_tfno3u+|44Cdkw&HTXGSTpj?tn7d@lrztSwfw@8zKg@z+E{;@dr;Fp!mk zKA(p*QE*G@F4@h4T%zwrM#}viCq=45G$fq{+rY0y!eMOSkx_7L zKlAxf=o3Hl?NM^W>XqwKPR&J5bUF?%H~$hP*O7rkTFL#YHxTJn%)Ai)a4R{6{mF}4 z$xVsu>sE4S3^mTy$g~|F&>9Z69gmEW{rG~`vVTxNkwGg{)C4U<{(F>M3lAesw1&Ww zMVuvY0*{TBJIM(m^FY2JT8?M4`H^UN%Gp#x4PHA&u8!X*rYX>f7`b84OCl2*HMM!s zGa~G(74!_>5`%)a+6sDx*Jy)+p3w`+Y9kNwJSu1k_$NbrYw)ve5PUs#~oedBy;yFAA6UE%P`!Y8t9lL9&l26o@0W^&;gfa zB{(t>UMComJOd#>I|wjcsV9{vF;ZtwAF*c0#7a{WW|1z;H029ru+kPU1<~FEIRb2@ z<#g$jiBDQ;9u96&dSM+EPDi#3n!Mahd}}Qy6GYm9d~C)FqN}oxbV{}eGscb1oJNmj zg^=(~Qlqyhvuf^6DM@-n+9Rc~uF4v>O41>>jv}wgsK~c(6HOdTl%Ch~$0|z1Np7Cf zPfFFMm=?w(vl3lJR)N!}rsw94#hy6Ev94mzh$nVa{IMxSXICX@yWn_5C^ZXVxWCDn zW5x>cl2Nq3sK!OyAE8KrN~W|~IfgHY{K3BT&~%t=>hLA#unHP_7$g;$hp3*t@tJ2qaU#K-2i5ikTkk&}^^hO%3ysbpYhdi>R z98X#?b#x4a^}t?o$KZde7;Rw|2*In0MuPLh-;0>noR+NfDD8npFM4KKv>2 zq|b?nJO_YfRF{GH-w@l#|mz8g%VT#7nMf%3AuT0e;KBuN%(axkt zDtyg)YOi0B4}}fmp}t4X)IpukL;lUZfppfRC%$gy&NJ^*0`kbwd?PrUi)zwCgHJ@b zE7a1?rL=6bG;-kw1r2=~wR|g*YW@o~%Y!=VvxDxb%y6w+g-WL7j%%ADI#!(u)iPSt zMz4vBkXfdXq>^pLq>={rPzmKxRz#_zL1zrqH zupPe%jA=KW7chR)6g)muD(vi^mjGd5?d=>9*y`^O!=lM|K7QvS-k1$j+#JNwXx*P_ zqcz1XKpZtmf2NJd^k^dyVU5>v9u!Y*m;7;mX8GelDbFGND*4;~%<{LyK<+MEFj&5p z;!_c4FMx!h-bFpGKht_#MRCMWbYzD885%H8w?j(EN5<5$B0+Ci+HNiu9tvMsMH+<< z>)~B`__%PTlC)P32MX_n2WfivL0edJiIVg%(#1m>%`mhGDqzTZG6ySzuELnCa4lgj zR)OR6uu%cpJZvgKG{tb8^@bADhmx@;~nXY9Wp}3TX2;;Qr zQnXY&)NDU-x(@N%04sd3cBu@h@teNhN5?D90xCLUu^F^4U?1r^(v3r@w9GI+>W6e% z%I2NKk48u^&RtwZ*!Ap1Pq@>HPOM_wD)-?{`s&|pbdRN|tsC;_gFKBR7?)6xKj`Tz zfL_&69)8mn3cd!|=B|%&x?$C4d=79Aq^EOs;b3Y@86d40pl4m)@lmV-SB#L`D!AeD zL@L(aIXpT|ZUZzq4XfojCCk#}@4b-t#6%IhozNV9W1`&3JS0aONrurItHJ+{&7%%* z>tj%1I<7X`6R*7e)do@`h_e9mLQ6iLBv*20Kk^e$MVVSxqAVIgc2TEAopj*B?SfY`(B7mcv|jK zp|AhHHTwn7=S;ZV1%MR*mm@p&6FXslY4$5>mehGhe%Br6luj%_O?GhaLb+-4&xKXy zODPyvCy32b?N!mSV!xR#XtQo?#R(^{1SleD9M0y^H}AED?+?7FP|kDyE^@=X366yZ z!;XR!o=6YhUy|^=?CFidd%Yy@7JkCjs{e;AI^kaqjwhl0eC#OMjb&Q{NjeQEIk;5D zs{rXZo{Xm{FN)fh|Ndn6?(P{&H~ zu&NcmQ;G*#Vc*jOtzh0_r(B7x<*_^E>9W@VUbV9l5^&;s;fQgxDzX*4H#&m5<0voN zDf=}!XeLEP7)e%KWRwxN|9k#$XQd90--R#j{>JC-f{n=MDcgx=zruV(`i&pj1*ekC zZ|#zMIQ#@fxM+P^{&waAcFXM@y+yQyg^$*?29+$^Eq_4|)}|el-}e4ZlSz7!JDXR< zb%uxJp`sZgYGXJD{MF;qH4q{dH1`@ba*<{nEnJA-5ip#;@m!~!a8SnBycvx8`0Q#; zwq}A5pu1Eu;A8nI85%ip7*DXWo*|umBE60|r{RxCZ^^vY5$J6q?{x%E9;HOS-takXmWZ6*~ z1CsfOui(-QegCWDoc~Qa@Aq%%yk{7xtRWUw*m=b?nvR%#hsmadLotfqKzdmAO(~JQ zEspCiM2QrD3Izx!m*Wv4fX&8pn)&B4eZuzQ*mrnrc);J`!#-H!# z=g}at(@x5>7d9IkWAy3g2*Hj4=G0uCbtXIO37!gq>n>ou%!HC*@AnUT#A59Af7c(`KZPnch7kZ9L7?D4weCC*m^_ z)260P#!nGXb)!1>XL(kowP$+T0mOsVh!hVV0Q7Su;14j&6N2R@PR$%WHYcYGHg*%! z#*fu>OakJ4l`JC`B8qNL3V&7bJukwvoulOwBn5iujl;NOlZRg@J%G^C=&YjlKx2jA zUCujg@(Acf8=mkP(oZ6?BQ))W&D};I#s(kaw}|+s&~lpgH$vM4)B+=PY&-(ZBejB3 zJAG&9*HwH*_@(PS5?0^5z?Bek}Z zG_{`gMI`YRk7w{9Ou~hF$8hABilRO8J$%3!If!-T>1X7I%|8;^Y0uor zmsdt@=ro!A{4+h^Na5Sh$ZZ1Jh|KBZ2VKx?jNW{ZMsx3P<>3LfM3Q#yn3Cx49DnFr z2pGzjeJh8`S$+9?-^w9zT|_zJyl*T~(csnIjB6cncU+7)YPIK%v$7wC$ogmH$0O^C zGVyYmKJ1j};U0RpSkw^jmg?a>{KQ$zF1vE?b8RZMKedsX3>SpjIwx(Q6sUrl^#5wPIUDp#0h<7IWhs9abYBm+0JuKzPrzA#tJ`q?n&C%FKq&wVetMNe z{M`T{{rCkKD?sTMFxEoSZ@}KbZiz`p{o1HK8o8Tc0Pm%#8&`R9PKQ=VT18l#%k0j3(X0VYBF08@c!z?9#Ez{GE~ z^}13`<)@Gxe(Ft4=kM{?9vRmG?f~2nxHIrj;BLUffx82b0`3J&qgEea4%`p;CEx+T zuK^DP-lwM<1#1I^Hb|8s&DF;8^T2@KEIke!39JHl1?Iq1;5=Y*!^?oD0zU=(Jn$Ug z#lUlcUj`<+mB2;7R4@$(nzz>6cqzhe05VAW+XOm1rI+&F!;cvJ*a4gfyi3oZCw{Nl zrKxNRewP9~0387IcN}yWsH79Xn91NdHg*|-zXl!#Tn0Q6_zW-!ln)5rt>w1_ze@oa z8uG+0J$~cyCX5BR1u%7&NMI7IHSi$d7+`9WHozFlrMAG-8)AVc0=EOs0B#SQ2~7Od zR2_lo@k1wIp&1zFCS8zuGTOfsiS+=@kU)QHAZ&Bs*MXye-vFjs7Xv2)zXME7Qv&=T z@K)f*fp-8u2@Iv>F9hBLOii>Om}~@HGQSA;Fz^=Oqrl{UFqCO;?tce-9N`~;$tzs} zb^t@_{6OIAz`?*bft|p=0XG7^4NOxg@Z@Izy8{;jR|4J$>;rrdxEk;TyK|tn!CMa)`&4<-1k65X0OkPfySv6_Ra+3#MpaK>W3wt1zt4LhV?aflxfz~V zX#?f~Xd`ziFm2+x?ooMFAEON<*tNZD)An8)wlh%h3cx|Yb%1XYj1({wFaxj@PzpE+ zaD(NB58{8UVJ-N&hvnds+c)Kto@~aV6G}t&C}&IwWiv`@F$E7VX7DPN6zx>cS?52t zfQd$CS*htR%ZLe}2MOXM%u1{Q+X*dH)nJv8JxXM^)+R z&7L}i3AGnz$>aFAaAhicgr5jkTGn{_Bq=0M>Vp}L>$bro-04&r1{8|qZfH;KnI7%5 zea=TXm6r0HMEv z@|BZ(hreeaZ&pVMmc38$?sb$F0S&&UGTK6nD6tp`MjlCgZXKl+9%62?6~M%-&=Ty&%v$B)#7eZFiawKA)tzc!C-TSXb< zWoEUlw+xSSXFbI);JB4!=DvaVs;3NbY}b+`diRj*@enVr2UFg_%j&^sH}fj>l^}V; z*Lc9<8N%c1D{W=(Z}`LYl@@quV{Lu3Uq_4u3>~eKIKgH;UTMp()mQS^a-Q8lX`?Lv zM&c>O-a&jy6t-6PH&A>WhytVbQ8IX0X*oaFK&k7jbef{IJ!@+0$3&Qp`*n5@*#+LX zAtal`2NODuk5efP9mOKAk5GdIeDRJxDzl}b(q3MZ$gk9O55ZA*MO zn{iR5Cat(gnokJhc!Z}lQbs$Dih^)^hpw)Z{8%HX>mBai7}9Lzk&P8U<(<=dWk)tv zss>yUCD1_`YmfdNUeH(>FME~o9~wiIZFr3)P-R>hRlPP={`5|hrOGz=sFc!$4K7*L zL}}xVj&rJ&@*GUFWJ+u0Wrnt@5~FbMwz_EJdBGlcylfJqbY;s++}kK^803p<3z9aX z0DKpXr%j|=sM_MN2)P);-H@xfS@&Nr<^BW9IAawz;+{*mzw2K7>tQqnBmy!3^8o7r z`vE*_zr3mBg?35@51(%HNV^3obbf@NE|qJtVf;?K5=2*uB%txS@dOP`PEhLNrSQT8 z<735{1BZthXAK-yoHcOB^J&xhX?&m3y$kFojYoG;!fFi` z+*_qOlDiZEKa(#ufV3Mq8k|Cv$&Dn(HxDY}k+& zIE(LOpnh3=Sa*YM42byssPv`*!MG0}jp-N4Ch5Vr& zU^&f~^+1PM&X0izU$wc@LkSBQDhj?2XVmE}9BnfyiAVNS{F)a$Pqp$?8epCEzN|nB}%3V391l!Z)sD@`4@1Vx0<1?>l2<4^QP-e>vR z-l7RV>aeXlOX7KDjN{~Yx z9=R8kEa{^>&hj1+dd4Lly1QPYhqLtX@#jcdylAY4eMK8c(t~<9(-xjssL>lwN_z=S zS@a`CbH-Sfgk#H9jgewYPJPMcn`O0A7`6@H%w*k8A+iS&Fdt6ZNYVXlbtLCApK zyt>DPQL;Y(KM&e_?ajG5_65k7Udf}Ev+lh|R}<2Mke*`Ft7JVvOq0sZh&$Y%&r}>7O)n` zX`i>gA!-OPrrakf?4n}(wEGg$Nu_pG(t8uPkw$7XrrUqxQCyt)wxTh;{yUH1YBs{Q z27oX4Qf^Jx2Z@^;>5fDmLH8fg8{8dU!GdYtc(-8j;x^sl$g{TK!lA>sfoR18c*nYC zHQa(!!$W%k`!C-yrwbhy;O0hgp<^k29QKw=`HFWut)qQ+o$AA@;J6nqa2*=imtE;J zeNDSnpGAETzp(G@!YBVwcMJ{TS3kxaI`sgnw|G5XGaQ$NwE>NZk;o9vg zvJqGNKo>K67}<(#Ip(?=>v7L+^>^%rdd))E%eqD-AJ0;DR1&qJebD&E*!2S}08s5R zV7==hiO~ZL>tASd9AxdTH6iKa0f~UWacUv#U!3jWFUR3xLAs}kFTlM9<^j@w*UGY9 zlY5rC9gL%|%n;Xv(q*RBJz$5&y5oMVQFxpG((kxWp|lZhEpWXIjIZBJ_j2L-(8Ooa zml9JV#6@2`yc4>YxZn$yd;Qf-VTqfyJHPUN!SI9T7k~69wh`vz-vE9>j2FhHZC$h> z=Nx`h7~Y$<)x=*0;LwVuL26;aOaftEmVTMytQ=fGII6-7A*Nolt!jKk_}R? z`A^v&*|L2+ayg4=JR1tC;7kexXnPa77HO^>r7BHH@kbll zRe>oCS12~NiH`QI>jDP=M*w5VY448YZ4C)C8u77PW!L0psbI zxB(Bl?D^;vQXOD)3aK9O)4=tCG1tVGKBWAmz}STsJ+cY#TL{y$jE%s}flGnunZ{mV zbh`Xwz>#!D8{6+vD@0(sUp(ZXo=Oin{sg9n9B!yNJ>(#bw1o?ytAY`Q{e4X%UGcLS z#p5plJv9utJ8(Ty^*2qCU1>X&H1z2o)+;QFN)s9TE2A&z-4UgD=Q&bHn%>Ta5A7 zEu~~}vGSf57zezk^hcPt%kT){+uqZ6EwY+nu@6`Y*ar9va24Rw9Qp;25BYa*9#{dF zOk z-gh5W(t4Y+(v9Ww6Q$Ve${)?A{Nf(N^ZRiq=DW z7XElYR7>p0DFr`#LEXmyzFdm&N5!FnSRAtnn214+OySm4wPlCT7IE;Ekfpf8jq?;i zt8F}-&DbX8%VyqwlWcFl>1e<@f+QDZ@_Tv&_!%6&3OVhaoGd6OumDGFLn3LsBkxa4ApHMa{cwQ3r`^W ze>c}Udko`NXRhVlfkRBLN<56L4O;Fe<^ozvbwnM;H@k5UIUT^iqiDVv z^U`k#+sx9AWdqp`a00k8%taXdNe6Ng7F_+m0d50)3Yfdl?|^#&e-GRj_zZ9sFgi2o zL4O412+c**r5CI1%PdVDwE`Tt^St79TrckqW#Mm~#Xo zDFL9<@{WNSsq{_*<}vKIz!QPb0p|j91SSJt0_L$Ts=SPCF)q+%!CW2~Ip#&Tk3E)Z zEc__&0GJ==!S7UjJOR8Mco8tpT4_swcLOg4{s5TiJ_YvT?=#?hU~G)ho(BE{cqK57 zOo=%XIj|1q1mN|++ z#j6vA?qS&IJigG~paLo=T8LY;YN(NpcG!c&{~1M<3td%d<>#e5R8;8p=#d5A7P>zU z4LcXbKCl%-tsxvdhEedJbnY0hS|UvDba@G3E_$5c!Mig~{io@05*myD!X(m}LsrrL zKp#*&GK3DGNcpGSwaRuBCv^iD&nZ$5 zP|xShs(K6h@)SyW7_;R{j~cLX!MSrcDGA>QYe$4dw)^Q zqSQ0)S}}pv9B*ZQNn7%rJKhR(g*iaXD-l3tYDr()(M?WY@$(y0*oP2J=-vxog!JSje%?^0DJM-70qeCD&W&97tpH#X14Q zF8`?*&MPe4TyWQN-GKnMnpIsx3+en;vqIS%Ax{Bkb*gB1Ays(EyqDrHy5sd-lzP$K zs>TOGbHx`rw<){q&>hLzMXN3%q8mTqi26h3hz62!Om@+^i%|Mw9rtHsb)YL3+b(MH zGeq=L;lD}-BfMsGH;w<Ec*Vp8mn&WuA6=`Nc;){ZS$fbK`S5-i{sXwlyje5J8bsDI^BsA8 zLCe$o9AH-Bdx3A%j0VqF7J-iS_G^H-S>4qS-Os;nTNBVLqDjSJO_{6wtqK6$f-_e+ zgut!(E1~w05etYQr zxjsLX-#Y!hU-~usruVnR_nfNh>sOZbbfql?7$N#D4TUOt-9lVrN-NvI`ruZmz?*L< zsjk!7@@@zQcX$x!e|FxS-xA2=7maRmQL88nm+%O-*egS{vQnA2yc7u5zR4_TNH<_V zK6u#P7{I??C{(T`w*iL%BTKw>fo}(H1kCem_MC))&unzESB9dm%S5IFasVl?+g}?J z1y}?#pBfI>W|vp&`JR=iRD6h<_u26tyHkK0odLZBd*|S^v$Zbp{O7;lPS^cr`|199 zJKZ2eHXe0=&wg$~%4qs&ckP0RlAhzGWXR#n^@Hot|FW*y8D<2IGD-!UT|>mY`}NvB ziR5P&3X;~BjMwE#xBo2}RJY@@LfUtQaM!<)NfAfBd3On(u~3@yO%Iz%#pX)#2E;lI z>kjple9psn5o8USuCca?f7Mz3@I!;-g{a{$klKfQ3oz3`R%Idp7TyGy8vx@Q`xw-G zAQ_nFr;ufDJ77%XyxoAC@x);WK2RxSLcofq@i2qfi!r$n&H#od?<2q|z&`{30T_;f z9pcGXAxMPd@O>7*zkJX|0zU+t0*pj?djmfPjK)cO9QaP)g&2j%zTd^LSd!!!RWK^v zQ_X)oatj)x{{zt~S4_0hkmdUZ6jV<1EDv$TSEN^yJXKve73oBhr?P8lYr4kf)|F}b zvIyL@(a=*nWYI*_g54u2l0PQddAOfGa0=Xr}?rDl{wH1^`%JJs;pG-6Fv!a88g3GZvTp zCqcM~iU9vY!+*p10c#0YBEtItX8|#2Y*PUE6j56WF*%s3MC}s3S@j&D(d1r6>AZ?C zj{qH-T|WLKbJ5YjXuoAD2|H-BupJs0V(f)DYBPZ!1)dH33^0r0YTyOH1;7YT4h1X& zJ_PfVz?g{3GRzmisFM;e)UDFmzV<3&<>9b}tPG=E9`Y3GDOE4QSC)9V&)7B$X#|`C zaDSlVCJlSH+`?3DJ+pKw{@pn6{xl5?dkp>I^ z@KtS#0Ci@y`F6mvm;2q((v9{mvmm0C$Ez32?ds_pii;7y%<%NLc1Lq|@obkjrywt> zQ*TcnJ)M^I_T1BOthj30kLdV#JLC-8?Cj5YAjrJ1pvkSC5n*8d)E%CU#g?!3K>`BX zjkH03FZnF>fqQfVB|dN|m4*E#f?iAds7wU(cSE(@K?Uard46!~({W{pr;$Frpj4J8 zFBHZ{M|q$xcBxr>VBDP=8(dpC&Q2}mVdl-^e3SUzs7tLs?6Cs4|C}9gQ5b zXx~!{?;o0clPl=sZ~RPU;1F@Q^mQ3rRnVWH8fzT^JQjn*$v2SJ4|RqyhFV?^@Lj;w zfJXtN-x4#L16elh06ZD?+$C|H*HRSUbbK=3P~bEG|4`5KyruBLs-Ae@bvEPm!05_E zwZntGmtjr;ehs)i@aw=FKs;c60q>es-y8scb3ezwS8O_tNJffsW4``~P`?>QS$P-< z_HvL2Ix{bd4i`m0zXjYFI03jhFn8eWeg&8tjGU=z@Hr2d2bb%BQ-FgT4h?eQG#>jw za|*!YOb--qIKGir?ED6|B(>pi3WHH+kl7LDfEMLno!J-wKT`oWJG%tRe1ZGtBTGym)fp!_6=@*KS|0qhP_;RLYzh3ZX2n12Ql zMbY?)AiB!zTG7FYo_L%;xCqMVD#WoX{sclS$ℑk?R?NnbG=Obn8iUG}lw7#^j|; zz7c1u=F0q-8^x!Vv0WLM)dAaqlsL&#ORq=0Cwa;uf&x-sXKT~3B9F$E3^X4 znD-zm#=KI~-YNp)bQ%n=`ccZC>?voZ*uk>vA-G~^=w!sV6CIfBsng_B$&d+{VyXR; zF-YOd5+>iYt@bDeaySAGgQ(sVPsJJ~g^V>#_8Da{??>h6VcY52b{aFqL#~wNRQmf8 z(G)k;lcL8J44#UCR9K#r8t=Fse;F?TWaN7qhpjDcx03BXu>i3#(#m75 z>a_beym#Vkf0O6?cu7XRTZ|n(UFGp&il=}VjE}E&$5QY2^d=G4=^36*aoy=m~>cBi)M{g&?b@X<)=S3YHy$84r%shODzCvDD?#1RjSPZ~NF7Pnmslf0g1;yIrJ`Y0`U95wSckQB#UGRfcwGx3-Czb!@!e)KL@@KxCj`Lm2=oU z$L4d|%YgaDzb83;d?0%@EKUQj13m-H#qcBWZ@_1P{{TJ+Y$9@nz}Re~;gG1DK)nLY zi)L4Wu@^>jK{l}WMGFDOd0nkEaC6`?z`cQS2;4guxH51y_P)qr{JUXM0~Wc!HG%I3 zt`9s5xC!u+z}WZVebF}W0_Jnt$Q7*}FfYEP0Dl790T`*5^VwemcZc~Da1UTaR_g=o zMve~v4$s5KKzu|44+5?TJQ$eEh>PQxU8a@s%|#Sg{ct>1%9JBe2JRdI_Q$p2L;M3d zza=dG0&B%&&@mrWbFYloy!faIi&?^{#Ha)4^ z0(9&+FB0q;4&FU!*#e9wCX1&n!mBK-@20|F>Ik8}%G7~Dpz#EgCQk%e$B6Y9SnJDa z@e>|?-)IjLWi{nkhhrqt-kOSW!HabH3FtniQ>TTXolj#HLL<_J<}CEou~rFf8yOhz zJ>y+y&q5@n3w^r~oODcKLFX+rp};9Yn6$nYiHfj6?tEba^>sB zQ|0Nw6+%6j+94hxtHp2!ZGP6M)aq$5@V))cW#?l?4IeHqF^X@Nzc+-2ch#r`v<+g8 zq>$A{rFsX%Pyv_L@MZLBRILd7@7MhYT%5~((j?i=Wj|%2?Oj56V4!E#81#H9`7Q0_ z7a43LGf`-;r03F}Eswf9OC^8vY9ls}nec^2Y4T2iV#5Pq&Iv`>9~b<%B$uj!iH^JQ zJB-QcY+ukM(dPbG(bg0*!qAc>kYSQHP8jOGr-nk44*rR=(A)46I-GXif~A=70V9>6 zc&1_4$PWDKf%3!|T2mO`Eq+G%{P1BeE=H9Y6;LwlI$6CyQ`;9oenL)Nw5`IRy=#oJ z1DS@~b``#&nG16T@O+w6gUwPM#*N9znvktncbW>`sn)IVS54xRf;ZM&P%f)C2d~&= zmf_p&#buTWK6aTE%WifX2{+DOzWgdMdwEA#PJllr3)vnc2Og!sk3W~#-dl`*6yLFx zjPk?%+t+zWMmDyv21*j3sX$WMJjXUa2Q$~F&bB~}4g~}JHUqdQ7_S4(3Zcg%Ikl~L z>hhdn4dcw=Dyv-DBXjuPN;UgBEYz8>=^|2`qfU!BxTgL%97 zU5pp&OnUb@qtYFGFW1Xrxm23~DQAz|cqotMMJhM|JQ*+Vxe})@uXJ~sJA1kF*m%Tz^`&JQY)(yAoRU*a`Bebr zXQ-Xxbn6r<<{`9<>Nd6+d>LDuBRcDlu`G66u-LgtAM`Y16Jv#GL+GuzrRa&Qz1?zy zpfj4!tMG6QAlI?^>WqIK0Ukg*z%QVgC-h#bI~^OBwrnswb=arBspg1s4AB(f=R{5Us!LyO zF`_8vkl|^~jkB_C2nlfB-SS_h$3D63RW!{YM{TVqkty8fQN!uck&^7Lpdg7+UCP^N z#FlzX45dWuxFpo3)~a?%a8&*G{1uNXw?sUqb(%Y;D`EQ-*mH#8*LT++XWsJ?8Wu+o zF*d($n>WMEnb}gy<`adONAhG6zk31P=kd&KE{G0_2d@-ym+ah*S-IJr$4wZMm5c0C z1AxH1E#3_(zg@Ld&E?P6JdUPv@ppQ^Nhm!fpC3S|Tv{9yNTr0hfjB2LgT&1Fu6U?E z&*{HZ)RZ4?;eHT5w}~H~F~LtVn5#}twVx^=h#UgduiG z?kq>#@+?Kfx$uLtiCZ}C@V=MGG{i5*E*S0jjOVsnxZ#4t=NP_ymH5NYT}h!sV=e$wphmaYh=vQsU&Z}NX8RQQbqdT$7Ta?#6j)wM zwm=7;Mn|`J>O}SF!72JhL^}#}9zYt!ZuL9^b>Ggdm?hmxXSQO3*%gyp=bjO6kh@aU zOP)qeuJ&YWzGy=!oyQB=c)Ydqg^SxiRyu#(OL=9r@*kM|4Wt<_d8$AO`_xOG{t0|W zMn{Li)SQ>!3dfpg{?snZX%zLcr%K`z!nlt+RO{;IzmIQMe#jGCkS1bzFmr$N0-!r^PAZ3$FjEqp0l zE#yzRdT6~|91-TUlv1~0%BkYhTjIkr-783JE{)penPNpq9;gfP`Mgedy130##i}pX z{H&EqQQ3loS3DuQj_X;rdm2RjB+hEI?Uc?L8nfL~t87`+9$s5;eE!;t9y{#mUT|T% zXJ4qER`AN3%%^L?>$^Q6F7SJMujfAB@wdiV*DJOcq39ijHqT&B!yLG!z-#yyDgSQv zx>C(tub;SvU`oQX)_+o1^UO635gZMe2gpa!tx`h?S&nCo+S4%q49rbPnzWi~khDcF zz68m-NEYd_z4wOvP?Gxm>I_d)Zj%8YM3$pd`pCTY`RR? zss3G!GnkGCZG8d!dlz&Zf%k#s@Xm+$U|~E2ToIVftnfMrTm!fexEb&#z&v1KI*w2Z zK=moA3+8}OBEF4pCqWkh^Ebe?fxiXr0DK0RyXqf+Wz{wX2Gu^P>#YZ_fPx zAIzu{=t{tz4+mBOP5|cl0Uriz3S1MI`LHYuu=yn8oB2T8WcUY!!+=nLb6F(8LnLr> z;27X!U@nSgz#LPy=M=NqiQqh&@#C-$j$kLyan^SM=B&rK!h0()#w1>doQ4^Z@X29A z+=D8mx8a)^l>{CR;2#9i%NX(?%FcLT7FIA7O?-7=NjY$1*dwx{!({g^Fy8{)8yK?$ zIUG_Scq+p1d=DOgyc9RG2_K^Ya{=oB`v66ND}eaZ5H!Fbz{nbEk(w8%XTy9PJ!%TR zd69Z0%)0<<5c^FqZv*Dh_Ho$pW=Ott@CtvUDmxbs%B6#m5Xahpkp?uafVqIBfab94 z3*bfU9AI9=o)7FSWb;LYH(SWw1iIt!*EIwY2ONa?#+MKx{{wF!Wc3+uk2wnuf47hw zin>!B&=rsiSPIw=;3vW3euQ7ZNWeV67Qk_U=PVKmDE8Z$(IaHZL3F+eI7EWZw>Csp z$NEa(2K)qyEyTv>xwNd%(;)E)p}3-N)BUegAiuS_Vz0KKBL&)Ax>)F`mN*3Yu46Bt z8%wm6>}A@raPB1b21akHPu)&!KJk=~>4042O*~HD`Jr&#irU_kN$#`cKeVH52RFdI7YSPS@Xo&VAyk)uf38=Mw zd=JoHV#gy1)p2g6Zl8L3)p$|j)Il$WGrJDgi$dB*;(ZByY&^aGDK;M+ChL%=T!?bD z=)UcqSSv$9_IHz?(`|=5ui<2cbr@rtMU;BjQ_l6+tu%JKr>0d9M7+G<(ZilvVJNOw zPGINx+=4b=d8X*_{7TXPeP`PIUw5Xbv#_LbC*%>JYvp)y?Z4ZcrKc)D-3>pn+t}gR zJibTXwy?Z#K=YwZL5IcWSpjAq&^NUw+I4xbjUTUz@Q;npVn9Dn=+)G)^XQ3~_VklS zsiQSIal(kk?cYk5rRew+RaJ0@m+OtowTYba8rFgVyFcCgHbgY9lh z$Vkb9;=%5QR22!mmo}d?5=SnECmk=BjzXL`C{_q50=0pK^;y5!LGw}CH^4yG@G!1T z&lw*`&&QJ;`ddK12lQMJG_IW$?@Qc|!qJ~N$A^=+VUjp`uMoaDjyO{+ab!E5i*Y>g zKB66u7S<&@6F9uUypNOcRi1N9IIF`^SFTj0^IsdcxbBUn3f~wtO%`=`HL7ogCUo}Y zY&bf8*vq?wfjKM7FR*9LAQxzDzy$=v&zoV~q0};zlw!r!aP(PiEhV-iJCCy8Pv)Ff z9^&co^F~4|&VH`N%CaUZhT{oh2VeXU2J3i6jT2iejTHA2oLPIOkPoZUC1`<)4_FH6eFWPQVi?@nFKWY$=+M z{!-i=UAAQ(z4YQfIC?z#MILC+z@3Z5S&v)C%^7|y{9Fk@rbQrnE347_-y4-(AC{*d zzBejJ25uA{(LO=TkdT@C4x9Ujwh*XC=>1jBX+x8E-5U0C9DoO&sR zNm3n@Jtv*Vo^{0Dg^HbSHAcCN1k*ki8iKN>#~o>gyTeL$FiV8H47z;KsFGYw$aw{% zt)(ME+#NtSAndqyl@fQ=-EdD=r#jtUXjG2n5i)0#CGHInfu0tNE-URyr@$jx$Z%O% z8cFFt8s(cM2qEHZr`oAVIY>DF{&cHEUjpX-H$~{Zw0mcCOiPS`RquxM^vIqtHhVPg zCw3ZuK0?VaJ*Qf%!Oj%(QMVHvRkED(>2@4q?aHkvF=w&O_uKA{c}0-<)gbecAoFiQ z=ExdcMwpm743oveZ_WrZkM)`9+E1Qi`a5*=f~Qf`0U3r*ftt^MF8Q5;go~aJbiFg3 z`x!H*Ui~mY=|RXFVdrz!1J?$otHdMmeAiRxa}{`PcFow6C_ICTkxgJ zDCrip+U`3sYjyn}sMQjn)^aw4{2SHUaYXbAAZD0SKzJaH+^q9&h)ZcmGJ$zWd?zrP z25kiJ&%n(An;^j~@QDCcV0c)q<}f&>YhKvPTem!ZH~}*=z}_$)gCjNr?ot#3zkl8U z2Wxq$1hqmQ6k$G>Z2uYjZTsgwnEi*aTBRd^f6pSOoT4?roPtfj5O?i4VD2O~0CRi2 z5tw@mcIT9(0IFA2DdPw;V*?n{EWc3D@ka?@j#xM_M=Sz3j|XzN_SeU#_1`dR4?2)* z9f6rqXJBT;JLi~B4`3&&``UbNlv6Y8oSMmMHD)v$7R+d_omE^`Ie)|8F`L0cVD`V* z_P;E^znYk-c~NjtJ!AV{!)i5V@SM$HLjVIc54tgftu}-0!0dmA?H~IMeL1ktP5>8O za0Ko~PH|C>9*GYw>#@LGc6jQ>$3V>o)Wjp0!Bm^Uy|(}9w*P$Fe+qbA2`oygXPOoB zIIeR+!kNvx2f5Uo-GdqURzqGjM&uxQywnlEzn4MJapv8`9Oc)6Sv(7XIgPIYvv}?T zX7OZqmImglMun(tKEY>80RNrC4Gky}78MC`1X1UVgs!AA~ zM{6~;n|K{?S@_$C@V(XXu?ZH;petad&43v*0~5G4fPeR(G&y2pfH`8=bt(}X2h0(h z2n_FH@1#_XchmqYI0xUuE?!~y0WdTC6qr+b0GJsT0w(}td#a?E-I;L;Kn*R_kb@Zq z*6{n0AddJPU{2|S!0mt^2JQ?z519GnK|O4D*Qy(5Mw}ycf%$h1S<8&h1G9`=0Imsq z5x67p&%k#A{{rjm;7ykzqIlxzd zQJduKc4`iXG*F3=Y0~^4Q3(W+66*vU= z2yiGcBBg}^N4nA4mcU1{8@g-Y4B&9!vB0H(=K+@n-T+(%_+#J*U>@jTXN_bbOLJ=o z3&-t7!n_X=XPJ2in2Y!$U@qeQz!kZ*{}3OXr3Zjn=$ODMAqyavC)Bwb z^FC@WA|3v@8oO*h&G9|k=EE|@Ma&W6ZjQs(VnE0i6@WR0aljm4LI8u6H(>xWdAExF z^Zsb|f1B<9cH2KEYM1Sw+YgQaN`O}gBz$czq!DKA9bis5d)Rkl4@{;@~ zerVHI!FON^neI#Q4FxU&<{UZ+%tcVd_20?rvo?cxI5302>cq=MVX*%T7~^q`_d#;jwu z@b#|ofR2FSfcbzefP;VwfJnqJ1u&Z7do<~Qo2`e;1I;?XAwW%MafBCBW&_p$K8!thoG$3&4Ht_t>ZWL0`8Ph)h=8j>} zHuHI-zuR=|oHN=soUXm%dV(_QV_+TesyVeFZkK1OyCAKKF;91eRi|U|MsjqS>YOW& z7aPQfRbN>7mCBToV06`=riT)YMm3%jvR=6Q$Uhx;TI~GW-JhnD3C8pKA9zc~Xr^DI zJynfzu0LuxyppJTH6un}N^Pnc4fM@4rkYVJ>OW{f_|APdHrDZYpLZ1Or}fp04z3I7 z^5RK)`C1~Fyw5KQ5`~x=RvEutL&q-MZ`YXZ_)=`Y9q%15to?Lbb)yT8Q$1DPXcbjc ze3UDe_82->-KY~)MV#9EoVXgsP`w&P9lc(`z#7J|FwVg@>KiX2hsHE8en+yilZ-c! zyc!LS#}UFS4UPNqSeQ$z))OniZW~Z$G zO9FKXk&e6zH3ZNXkb74_=|;weQmz!-SD#`e>*MKMiqQxsOA}I!R=Bfsa4HIVB|Vyo zTw6&y8SSHQ;pWQesV+YMrjb*&yoKDbPWa5~;`1+jW>xk~YQ`XXn1rePrs?qF22YKW6kp|bcbqy-&}TJ6Kc zx`IuOgDqlKzPb5WC}yPu5pH7UEi`KB%+;9w>|j)g>MHj9)Ui5x$$ooMVn?GEuG{L} z(I|&QCD|Q~{;9vKv}y@umCnW`9?_lI=21cBDM990bhRV$dj%dTG3vNhG?r7c1v#CJ zxG$zGJ8~@5^3vl z767%QiPd~hh`9_jj;U6EnENhe_j}!OS`F-~$kvCuvhb{9-s4iq0lQ4`Jl0Jq+i=;Yx4r3!C@pa-0hv;7VAG=Z zX7$?l3fBaTQB<;7(oTq7D3Tqt?KRI)Wpxz_UY2%Fd5oIaaom)#qq8Sxk4ErZgD_9f zdJEm7STLE0sI|3DaR0B3#leh;5|2!n_$_WPq=gq$WcPOo?=EcrE;|cS6tn}J8g({f z+jHjfz+{KYAa`x7q;%6>#SoAm=T^){`k9?#W;0s^GLwhBn-+=x9`25K@z{(*YRsQK=`%RD`_P$ME1u8C1Gds%r=}a`GcJdK=;G6+gX(<1TvFs9Dqisiw6zu&cz2P?1h+j*7v-G&C3AS^0R zoeCA0hQ<&gB`C8>a~gOK78H-X5>Ikv*@A6$s*-Sis>olgt! zBks<#;?an2c|Z&x7T`SaUIE|y{sWg1S3+DeMx^K*%|SoJqM%%ewuQu&;)1thHb%tlQWQ zBeNz!?{DyZ$~LpTt&We%Q;XwVTF*nng&=q|X_uB^J?pn?46ahDkNNFxA`# zz^K+PR?-qzN%TKR?Z3%?E|Y(ozxC}Fw|Zq2=Pp3pNTdRIqj@q)zf)5XFv=bWjWqreJQS_Mwyf)rLan#Se%UVi(x)3_ao zBr8W)!^TRHLonweNm!GRBj22p+^{xT$pyMm=VLpZmD1++?jWW#7jVBT!!3a@s$3i-b3c-JZHaWHOKl* zTtd`$7z9;$bvmyGA@br@yQ16Fb%vsaWpAa={`= z)o5Sx-_~2=uu5~?Baz{3xLrmSqtU3#V=ic?)Qq1CvKq%s8=cK+54*+5(|(Zv`=crx z5lcP^K3DASfs)m26FRYOpv6dq;uGV3yIT;+5_TE#DIG35t~b+#&qw-C$l&_=9NXS0 zySGv9c|YMbr-uSO=LiwEg=NI^hx#dPxHd>{XWIwnMmZb~o`D=`YYkGV_RVPe2Bba( zyjV35gPmV1>N^!RzpYf%QYfULn%c9n6iFRD+@3!-MY|i0EQ?3YOaN>{?c_Nj@5<$Q zAlp|3R0D7gX7f0-09<3W;?uwjh>J4~?7qgSzD;F>z|}X!b^QZ4U167t;Ctyqhv7iy zh;b99Aun(g9rkL<=+e6dbP0sK9ojc@u<&9~K1V76``N zPNYeMy>xJ1bR=!Q-85ps3WzJ08-`S)1?0bN$w&Kc{ z=&K7A`qHoCjVVwkkC|X}0NOeMdgPDj%L$-8A+-Ld7pjmo5z5bup44`tQK4lVreBCN zCVT2ti;6yTLm2{dvm55O!xuM^Y)4N_G}_~Kz{3-v$?i;6u2CMQ#9X6JNPqEAfnUa} zL~#?0xPnEw#_4_pxBvE(|4zCGog|1LtAGWjiLuacM$k+1f)Rgqpn1vq*+FiK8~_x z8Lg~wLgznnG>%@Fg$i{{tV^REQRi=$(V1CBkEpd`T~YJcZ9FQywwt=mHk!E(_LRrq zqy7>Snc(?q#2>VEwoxweg5L>WY7QA-XE$<&b(+r2Hu@(&qsV>B72AW%g+XSsIZd8p z)Yl_v?Hp9-CR8{Fbu5z3%t7wdrq~CJ1_`s8bG9gLc3DJWTnjdb3M zx^HOfTw`;?W8&mr=($UI_vPdRwmAb?!1}n=wBR8m_hZ__=qtMT5EhX>CgWiw_hV}D zFw%LLvKf6r3m!J6xWZaeiARiED%5Pr+4qjc_PnU2rc!xgR(Ck?sHlEx_Qx0VD1S5Q*GG&#W#1LDI;cNN zDXg`k+vgeet@1WIe&fD8f;~jb<{8yoVXf(ndBzaee5@B1x+)~r6Ruo1jgvtog;T?C zhn88{RiUg$A;MMsPCn_5^4m$7b3xRnryhmyRisxQH74dYXvI;-eh@pqyV_>w;_Q6e zoM5{@5oBH)WZoWRelN&;thl+Xb*{LDb**6MV@9(uG$a=Zy=@yxTnf~kvKaNDWlIg{ zlPe$NO1|LFCyn>j{q}>P)|v`f3fKlX2)F`>@uG_W3;;|8 z@I;aCyANKR?h08!GpRZ2e*b2H+P(uH_uB_72nNq2GlM#?A`TpY4(x^#O5Op1hg^NT zAm$^|H|oEfoe-P6aYzw}iU53+y*i*HU;rQ|e`{)0eaT8=hbs<`5v(*?hRnu9lpoc~ zoEsfY)+(clKARe@LPPU74P1qR%EL4R$XYIBd?|&J<;Q6&6D`FPxyI0toi-D`g^lib z)~KQHr2J>${tm5w7EJcg0U+xmA!BU|3%;(sL;7l?TGUCgPQdudzs>#~YO&fF=L)%t zHmo+v-${;e}hLKn)4ibn?P$GNcW*V&l$B6CkgdH93kMxG*t^C&H-E^ zgxq=_DO@1dZO~hWNaA^B^cr=0-l$^D5htF(tBie(rolSuNwIFE#oIpko`~1zh36st zo9XKFMwh6Mgn}DczjFU08o18LLxn)fqUiiOqp9oTyQp+(=@@Fe-Y9JqiJx-5z<#B{ z>#;_DnGUSSaPu-*8;o8q_XxUkgHb=E)CeUR&aFjt^-@&01sPY0^sTskCVrz4=PEUV z+HN$O>QOXlBMLf|mTfd9TD8S14zk!e-ij)1g2(QZz6m+rU9suTY%0=`O-3ZF8g3PW zEVgR?f>-GitR2J@JBUYU&WlC^*TNBWVqb~! zbl^ou-$D*3mP&6nqVX+}Zob)w!#6@JN11DkFnaIp5@pD{+3>)2&Svx~Pt(rL23ebJ z{`oflS1F6rzLyqkK@#?=BtdONhOYD1ncr#J%Lwi-+W9iFwG;+`I;1Mk_F_?+ZSYc+ z#%x3JRi&-l&_>=uXSNyrttK|@D4Vtu^?n7k>6HHp7^c&nSByrk^pRYhDw6RvG9iAu z;b9w8tcvvOkFd$GZ7RP2n>kG5b!c8d(eyi8qBL!R4cNW19kIQQ&TKc{wfJNiM>E$B zdI9YzK+wzRVgW{D%Mj)v+@jomhfy|qy^!|x)uB*$1HrWpxa9fctdd~{_m3-S!4A~W zWkRdQgIu?k(JMQ!tLj;?&W38ct52@qpo=?2c-0L-}w9}|qp&=ltHH6BD`7WS6$I5#@W-!gil;Wz^$YT2}X7;Rmq zqx+0nVXC%34NCd@AX{;78yz9GS#P7lE~Kq*V|JE0Nv+PFGwA!Fq zX<~K-NW=FHd%FZglFwBHZiN{M;v02!vzZ}aAK(HY@;rJtKrUb@U^n0tAoM5X8ejmx z3s?gfnU*zn#H6ed=yFi@D^` z=v85#iOyc5J*&+sd8~iP6sNB+Mr~_dVJB_Eh+#qJ8CgB#34<7v(6ng?*PUOzWOvgD zk3i|LUMrWq;ROutU-;-=yZ1Rg)ukgJ2|d^_F@KGUuoxNA(@KWM2zik#OL4p$mk5>6 zR$zvg0IWXZZwJ?byR5d5KEHglf7?I=0%LsnHNdKl{=TDTCTzLq9~kG45onUfX@M3l3hQLl5_;XxycTr zcGkf9OLm;l`Ab&$tK~1*$A!ROvTMb>L>L8@?0sUtPb*%s>0Gi8i_=b&!p%#zGD-qf z=7t6F|0~nCO1%7;>Qs-@r4SMPdMdfWIBGxc?vzy(2Zt=WW<(FpLWfj>J+eu z9aj%Us9tj9Ywyym5+RL5M4N|~WRXJcM9|GfC6M2M6 zqH$A$!|WNW%@{Phj`B4ZE`i%*xZ<6WaQziA`T!@^Z?0>b_kv?!lbXhzG=8-e;>&}y zYp_#q-+KFIPt5I@eOK0`(Ukh4SuI>y*Oo?W$wA||o=aaIgc@i8T4miH1#>6EqPrzcm(5e%x%wjvufHj^a-T4VI93%`is zpAvjbj~>FX>|=WS5Qa#f)8#|P!m>XKA+`|MLnK#4dipTtH6<$2M~9(CD?tfgViZyL zGb5hudVYpE4ej;J*9cld$M5!a4RF>9Jn9``*QDc@h*!KCE?Qyvbg}l@ar)7Y)4Q~uT@TRFFA?zrWStN-P{Xf4 zTV&IAvuRJ!;uD}fOK+b5-?OBD1=A&B+OmH+wCFck(%o_N@K?yxl5zCvS4K<5zuD%R zMMnLwa#GD>v%&thuv9Yp#mn(>`6)Y@o!xomscO6R_V7mT65H}1n5$jiQm=>)y^ zz0oe@8cakK659CbQvGzj@ktn=lv0$Op}sO|bKjwkgMsOU6~O8iEbPa{)) zqZ((7c460qI6*5%`eCCEEj?p&waQddGX1g57tbc0K@_S}!Vg9ts6Z$EV00*-CPYeA z$GLf{m_w;Sp%H5}7lXgexRoydVBCXYdiIZq;BawOC*tYlAJGs@qrxAJI(heq1Fla& zzr_uEYeg}O7S`q<^WGry;UM$rAoG8M%%!UoPhdizIS*OhIM4#-EuRF`Ck*pN$MCQm+0CzWtbQE1*%oKxNVo?`NYa=tpn=f-Z1~`1k9J##59yF5yHf3eI;=M#q{SL}k>A>%3 z2w$bBKfwQ0y6q36L)pDT>u**+v6Ei(2Rf^R1!0$spF%OjxU8G~F-+*~GTWiwU%=>N zDs-8>!b;ekLrW@jo8_pr+iZ*;bG#e$W9U7%*$aJItOsZ_jq;dX&<$;66M7f}zDwGT zU{&RNv;p7sXq5r}ZRjMU{*++C+b9}kg5_e`!{{}N4T0MU8W;k%-)L0`EUVLHHVrRG z4TT#ETNrFB($O%4*OU@Vz!t^FXd;~~VOFGlCCoS!0lu3dzhg@Rt)bo}LGUUqC~3Cx zJr=q08k7it{eU9CAAtD3a99>F2;c>*1bhfM3kZi0)dge#rUTXi{z+M}A2g=`5tt0x zii48dXop~z1U?slV{p8)H6RUU z)^H5RH)}O=ffoTXE31==Tkw6zg(Ka;nvJt?xWcLt%$q$pGg=pr#%D)icJ%opVcyMV z;FW+afJ1;qV9t--p2l}5-rcDUNCU7oBolZ(U=3gw;0Pct50ee-9nw-jkikTNz*TKl zl?+EpB0v+UE=hvfx3CTOjxY}gIERR);(IP&9bgyWW=ALUj>9n&4+B*PqycyZoR>gm zgXw6P=L2>CiU1xw98?$37vKe~1MCA70klwEiwC3uynro$B0yvqiUW`dm=D+nI1UIe zfsIxr^zvFJKIQ^;01g5E0K}Ip7*g82*M-V(G{S74Z>2EHtms)0nGLL_ z(0A$DDp6ptR=kLoS!ON$OWI?Z<@6uuuw~Xsz=3f%oj`Zp);cTm5dUUnr~;r;Oekws z(?6k3Wz9Yo?&bisy77h;fj`iCxLPlWbq!38)aHy&=*zO^M2ioYBjuX2M0tX;$|1HF zXhAvDvkUZIIkOSa)pF+JdTDwz(rge_0g|X|HGT5}7`4(=2q!E8>rrMoS7{HmeAu`} zk1j}!Li3?7E?7|69H)bi9*Loz61X358t~BoP5TMjtqA@5H-}!Mdw)Sr9DJW&M6ZRH{6bun zBhI&*vaaRYL&@)?EJ~lf;DKxOSXmKIwKf%7j6@oKn`R>exDYwTg zEE*U?DHq@+=_71!$$3{#k~i2RZ*wQn%B_0Lb-O?o#t)5&W<^MZK6Cz$g9i;9umwH5( zC;xxUQzBdw-&-KdERS^o{KItJ%jQI2v}>#k^(Fw<0X_tr0`Z>>s0*SLK=2!~E5ji# z(CF~Byg?68=gHN(eB~}*x9eQByB{8o1Dxx2FW~z$OYI@V4^G*xFJH zBaM-5=%XZa2%Z70*U%h_EfZrMUDs69u zmfJIxlAD{ixJpl@AMhW?t0mKKD7}SQG4Ww>9*F%v{D!V-vcD0#_L_m4+??GR zx6pzX<{eSDiqjFS0lg{YJU+gK^p+TDou+Or%?B~1ENqD`r2&20(!2vTG_@7VrZtUk zWtMj(O{1Qst8y5}p**7AmbSJ+hth)%w=yS&JUor_C!dn$MMu%_)) z9x=3sQB%6u)~sU1+*@1>V<@4WS#5BNSbroBBVcr`)fY2AP@-bJSIm`N)wFN0$e^b)0xdH#?n7|;ouwrOcJF`x)kXcY-=u22b@e!6{RxIWUH6>h|q(DXkdBuqJ zv=j(qX9;@0i#uGlZYk#HT&XD2i`sOFB6s;SXith+z8Dt_x54}jm^Yj>o%2F%q+%{n zVrC&!;<7}{>fYzLYk&Q9AEl?7wVI5Qz(R$Lqqlw*N%z)e3=4|57IeJy+~`zn?0; zRTS14LH{P!{ye^qvOAm4M3RGzPP;TvrA5L$o+QyOFvf8c(8C~&Y<(Nv14FGH_RX3p~7xvqw+_E#qGABVqAVd zC)uR0(#$AI>TZUj{c6$Oyu&r1ms~1>S?<>EX1T$mg-%k`$O(Fs->&%$>?D=ENqD4> zPMw|0^qEx$a62XRFw51r-Dj!JL-IMo#YNTaw`=AU>{Z&+8)A9Y>m&E= z;{4Tb=aYjoKD%UJ4LCy9tzdD`@8o0gzR#|aPrUqg$v(UHC<`pw?(vcP>(XmJJ6u1C zAJ;j*T|;aQRd%Jk^wO>7a#v_R_3VS0VGGLdW0o%)Drxo?9Go}wF)JsO75iFJ+}xH% z&-52CKMz!q&h$YMR;5yXG3yH#*Ag=L;GVr9rS>%|qJtjX7el#8w6(7}EGk!sxG??C zz~)loZ4m59H2yZTQTlSB80PEC7KwSVYmL?qS1zixD2~j<*&G>e$5dC#L+?pK>rcoe zx_Fyet==PI-54XaP>jy`z88P*9O#9ma}uTYGizCa*4#T!qG|n*)D84hKcwjk+S$*n z617I6kOZ#$2Amo=wWM=!(%+*}{W0?TKtitRnxOT^1(cSEHus5NQ1AX`xq5}-T-O!6 z-+;4klrXNekcwyJpt*H`@(9I*U`^7M)fjq2S&#TI~46Dz!C-`|##s*H4O& zJBRYZ)p_3mT*bTw%Y@6pr@hq?-Q#6S9e^a!|==6%+F30NJ^=YTI#?}6sT#H$h>mw!{Aqz2ge>dQqsH_&`Oqj)e@@-`E0l91U2nrG4F&N&2z2Apn$g$ zW2&MhQky$azPRP)4zqldxpO%XwHU(1HAsB%k!-)<1O~{0bB$hL4zW~t2gYxKt}H*- z=qkI8p@cyw_ZJ>u{y$>~{W^GdkY_?U5tGHzGLnk%8+(TJ|kgbDg+8`9lHCi^v zY?b(yN|q~J!#NgLqOW+Ji1U!W`qjVw8bntIL5*@;$h*73w8p4c?X4JbN*!$WPl%Kt zs|yDfqBn)KrPN|Bw=~*07!p5-4i82htxc7NVAQ-?i0%+iESrNU9o98!O14cw#@*?Q z(PKjDkx4|n1iN4eI+br}&k(bmr81UZ)@^UaiR-sRkWH5;Y$zrs_eXEZ z$8+z4oPBP@5x47zuk@iv<#r+GZqJ{iJ88jC)SB6}cBt7Qs-8rqzm9B^6MmfFKk;(6 zSutU@cvhn*J~eP!!r((g?XA`bqY<^pL_XA^+cMFbU8eP!W?t0ul3yU*Nsd^oC(ptdB;|6w9b0^&O45P8AwI+RVc&_9DB? zqr@@buz&_KdV?Ms1Is0}XN);c=8I#^6PR0B<1nXwnDWPAlDVA<8O0PN!zE?85|yT_rG&hu@$^WZHiR!?Kooo~-wutqvA8i$NP=j^p} zu9(^aX1v$#!y!sBw+7#=X+8q`UHDGHH!GEc70Opkj6lMnAmL+DJ_TI@Z@lv>>29Vm zb`->pk}e4m>kF6**aY|xa2gOE1@$+eBOntn8?X+rA8;xU|1{hzS{=|DFbI$fm=D+n zI0Co=s9v78jiKw+)VU=zvEJ4&V&FIF z)wQWr^@49^n1@}i3w&bCY^!I|WiP5+CN<1AN1#VomXGQ^o8HSu^D>6M&4(HzAIHBi zhJBY(XQH)jir}#_;A{T6)A*SfZ#5R{Sh3~-Q97-iX->(T)|#pP2ZeUo=7GooUV#0; zHaiDZJ`Zxg?6}+X;Svm#1eWq27i6w)n-kEOfQ=ce^U(T@p#$^G5AsIFsOE#4iMjaZH6Uk^ix;?R1A?0gUbx~`;SeCWHSik# zowm;U(>n0wn$OqY2i5vK*>d}LD*3;w>AS$V2=L!myVgT96N*JFfEREhwd)Ru)e*o2 zh}K(=nU*Ug8sjfko7H_8$4GvkZ^l}~#ioG>2ajMDQo;gstUajH0*oT+QZ}P%G>0t* z(E0^ttJI3(Mbn1bW!1|zPqYhuQjqx}+nlWm-da>p;|UXM!_gG=q`BI+Qu=qz{34WB zMomr21UTh4JNEiD#=tVjO8y;e?iP)^!MY4KOd^EsE7?v&TJY z$wlZ0bFlJ28Pmd{|Noz_nk?Kh2&*c>HrqX)c7ISZiQ>8&G1o7jgf0)~Ws#g?OIDgi z*d2OurP(ZM>8D(}RehsRWE%EW*IQ-Q3VBtWMpAL6oQYYt5LDu)|7ZQy#~`^0(HS z^{}pgaV<1AVTb9&v(Pd{Qj_P*VYv5l*>ecvPI~V-G@2txe;zb<9;OW&&C0a$lsnWK zBb>@2T&3r_o6?_$RNqbGpNIc>bohC*6;|=Bb?`XvFdsgziIz2e9U9mzG-(}jY!|Ix zXTFy3tuVqqa$BI2KjSF+&0o`;^@vJ7y|Nyqvg~tqtyV0#`J`_E+g+5%Xf5^LU^WWf zDugRE+Op1!qjeihuYQaYH-gh38o1GH6?Nwk=G3hir$MxSBUp^2!i{FN*leLtIy)Y3 zj~8=usNJ!L7YB+qnQ8h)8nel4miUr5&Cq(FKX#@oEHE3z-hU`~BOTaewsOOodac5| zC;kO9GRip0F=>dQbyc(+>X`7m)a?b65`Gf@{>+FKQolKg^cRt&rqu97vr%+waphBQ z{>gdMqrB$Ql(xKRCZWwa3A(7egsy8bx&f598HF-{25-i`ivjf1W{hb^(z(qTRnMel zTM)`#dT$FzzM-pI%<}HNM>QI{HLN_<-)few@t&~E^07qi)izGOZbQui3M&Zo`2tOPp# zk{K7(QEaL~>F!T^T`Ki5RAF7jNyqX4iz>!ll>Rd2c4&@YHtRI(DXxFYjWg`pUCe4q z!n46ozu=(M)Qa2mCbZ{e%x&t@;g_)%e}_;{72nFW#Wsu*CW@!M(h;!eG$|Of4W3dv ziM_vDUuK*4*sc90+q~Sa41d{Xr*cFW?0JP-dt9~f2IeeD)bLGACvxbaH&JNw=)E^F zOF>m`xf=_Bcha)maGY0gaX03Ws6Z|Dngr=D+>1tGBei%744Tr;x6ld=pxAvtdvQbv z3rTeg^tZ8Smv;(%LO4d~S_Smmz&tkKDMlR3PXEH=0zS6mj19PNaSoq4vjs2w-e`=# zGlsxEhL`P~*#Tw-{^k3?sX+qfU|(mIP43-!Mt(L)w|Im{G}{cO!T|xF(dTr7QN5^uB#gwir&ML{Zc$%cB({) zn1vUH#PC%O|FlRS|1p;4i!aObYWza)(5bu-Pf4%3nG0`3irBGUdJd2a$?c!q6Ui=TCCs}ecb@HqJkzCTdAnv(xRe*OGOL%``j~U%OOE% zecxXH>z|9^d!Ctl_L+O`nYm|X61WyD04u>+;Ah~kK?!Ugh?0YTBbvFBV7146KSUaQM4_;0L^j(Z%JTV^j( zA{~=08IUc7!JcERSZ`Om{v0(95vlG$CRUZ&*F7Og>~)ZJq+7MMtaJ|1T$<8z+^eoW zi2U)U+6ANR%pY=&vM4fMCRC{0~$Jp%3PZjH|Ep)@L%R2OKtv1_lpD;zT9>g zyJOJQIIcR)YTIetcnHV(N4?0 zgNfeG^-TG8Cq?NYQ00Bc*V+UKP&BJ$2wi{gv?eNTDSl6UhXk}6y}9+fuVCJIo1Iqk z*oZdT2IhS`bJJv&x&BBlG5(W5S4K=xTU%ekkDNQ}BzAwWNCxeGg?V>3_|@yb?sCk1v5DR4fAV?Dc{| zzY-Mxt3YVIyFnS_WlTSNJP#kvsg6Qg{u0b;%qGlPI(ktTKAy{=-pw@jaNRsTrn^Fh z^aMV_7l<0SLnm+d=TV7~44{Wjz%>4+BKc(0?_BmSYC)k~ zuq7z6sZ_r-1DXJ`Kaqv&YX0AebR_#3%1CrG&#M0#sz0lpRng*f)Khh=@pl|@c1oI3 zETz_rP45^=BRC-)Uy*D%nZAuz_1#YMu2D^Gz+e9|82nrtknNRFK7SsIbpH0xSzliu zOkMI=TbmF*o|wWS-Ry|Psls~R#_XUIN~@M?^HNh%?fWtEr<;YUy400mwN*!7bGAqo zh69B+8ypP6Ty&*NBalWi9&8OxQRyRH31zqP39s_r4t59c0DFV?f_=fQ;6>mz@M7?f zAS>S9ouKTkgtes$ZAxXo2s_z)KCGmMrZlx(1s4;16|wOMPZZCJagL79qM%Xooil$< zDUB)G(}x<*v(&3G&hD%ror!VIWi~L!;~eDt-5{xzCaUbih~$Wd8R~V9a|9ZoNsXMB zTZ7etM$S%g!!v}WcePeISnX=$9Ahm}R;*JN+R|g4?UPn%!sbjSsxL=!U<&7m6&Xsa zf$ofTW?S3EXM&1p>`a9Hr8cGqRA}#=xTm3EwYYXkTnTDrV`pC$lAdi$grBL;#m7nQ zqpj9D4DBX**2#3r&7#f#%_Ke#2m*+U#!Cte=|%PMo#_63p=6vrlCUH z23o$H=#hS!2REy&ua@E#vK?BcJFJ|-5e6PAoP`T!H3?+$EVO?M^nChm78%LIEW>QV zJl2YXEtUK@-pW$%ueXxxPjYZ*>3>Xz?x(6QbH!c?|MOz&5HTVSqz8N3fqlS^pmf77 zpme*gU_TH${SD951GG$x*T5LN&5Ar~$?1U;6-OS6IAvArXz%Q7TN~7&G-s#yyN^l4 z?W;AC4ad~IU${FeMD4_isp-xWE~rOcM9uk^pu$JZ|09lBcHSH_rL{br=Fp-N6{bZb zTFQUOo8Y>?gsAiH@}{kkBIcy@cRpE%qkD%koSgt`urpRAW;z?kd1(!yJ0@^9$0C)P z>Ab%FR_(MRcTwr|#j}(r)7doc58CRUY8Ag#LTk@?$JD{jaW;Wpd#N**U4(Zpb^f7j z6GbY^kJ1LD$NnF;fj<6W^Ss-@P7=JYwXpNO5gT)yZ*#QvP>!>`N1l-JUzI6~!(3!H z7M5Cfk-BKCb8tl8G5Y?`)b*N7>q#;S8dbtuf_6RpcB|2objHT54m#0$4Y^l+G8SGq zzM|!2B6fVIJXcVVq3llzQ`lT>h52u=x1Zx~V{3b1Oc(HlvbR$d^7<3oHdLfcb3Pe~ z=INn4XScX=WCkt^s@AyfRG#V1)+!_4>7q#u&v%wZ)ZM6F&3ATe-fW|g=P_EjI4df8 zdO?{Dn&$b=R<>W|y|EfF-Pr>Tq6O2PvD}S%!*pk#h>RTlXx{LpIv}Ytqi9?j_zvny z&kx$^Iuu>tpquFb|w@#S4leGD{@}<-)4W^&b#UNY=gc{8!gFL8%t>V z`?-ypdik>!j0aRJ;QcCjfpaNWf!w)(zTbqKrYYA(s7*PO)E5h!4Xku^W`VPX)l4NX zMAo@d-L%j-gO?0HUx?JOzC>znsH0ohE^C?iZ*M)SCNFXhia6nW)9|mFTB^sWZC3`q zO|ick_ztRV2kng7Hus_}a*ku8cT_3yU$a2sAIuylcpwR>!11& zXG)!8B4T_R&`)bmx@R+Rny&h2z%Hn1|Bza@*cm(Ep@37mjIk76UC%uMyP!Vu+Mr#4 zIk1^c5UE!MoPyfULA&k&{!&_ze5G@cO?6ExbIzs~UMh25MUfBiI{VmVrgR*W?`TO?W<1(N-Ghi#! z?Lyghddv< z(wVeA{Z|PaKX4{){F9SInv7)Kt1N5FXA-iM2%^nE5ky7S>ISv| z5mNPZQ`Q`0x|sut>@^OQ>E=X`y7f*43CnvWmV;I7JJ?K%~w);YJj2Y4%G!eDm3&;9t>`%n)%xQi_a>{twT-7$c^-pcj zcR8{@3xU*wzZ%NUx%_m3I*`u0o8piK!p*5{$AjVP#Nk`Mq07UM;iuXWZkcvt%yw_# z`S+)@_Ijz$7YRHIEWmg%vNSxBIz5Q}DNJHVbOkUuoYq~N@W@7Qy+T#E(^~coqG-{W#eHSZB8de6PO+y%uIRGYF@`>7B9*hZU&m31< zcF{(DPzNNm#4K*vaSm3`>~;39nEI8o)T(&$Wx=+*H#VSTaXyphB+w27rQLrpyXlF#S zm$GcEhVNyf>d_SGEPWeaqUJNW;_AK5ZZ-+n{+jch|8{ZmwiiVhx{%+OiTd5#ak38< zyBXXM-UC*G_kwSNzXksSZUz4aJ^;Q8J_vpQ{to;*xDEUS+zuWGe-C~MJ`8>Z?zGAr z-hc9A7Z#_$N5IqIqaY>gcoD1vz683!m%(UoALs#J0TaPj!DirVU<$Y&YzrO$JA$u+ zUBEZMbg&Za1y+^u<05{%2@V4P3=RPgg2TYKz>(lzz_H+8!ExXr@N)2P;1%FI;IF_V zU_N*hoC$samVo~NOTkaTCE%yvGO+A3ew6X!IJg4*61)LC0j>hS2X6s?0M~;50yls^ zftx@?OYhyF1(t(Nz^!0Ya68xxdY3$Q^;KAWtrNTYwY6R^a7eJ8%*x z%xW?yL%z5OzJ zzqx8lbAX{@C)z4OZu*Y8r;5eNPc`{uPM#ZbVZQZ-s`sj#O*|g$t&wIVvWoLTD4A z)2l)tlVejhuo=hqsK2sY75!SpHAkJzEyB9<1LusgW7LMMOBcd=@~4|i{^bFW#qu0cBLPPh)M}&}UV% zHw#5VKdPF2nqDdp`w7+U^_oCLnQwBMN1#u#$`WI}(>3k2sb7BozmuZdwcWP$~MDqgt2P-hfkks z_2Q-$bJq?SmwHX^nx*EgDCvYL*Xk-1t^p6??>bB~9AprsqP>DLL5cqwAV#Y;@!cn!D##E(AjwF118Z%Wa7H;7;FJ>W{P z9J~SiElACH9{^W@+rZUeg+#cXA2JsuP%qWZ73mJ|UT`D$CyEc_&P{r-YW1e z@GX!?y@x=i9^Q9BBKN)z-V2@pe-C~O{sH_R+yR~j9|FsM)!&kn7dB z8iWxad>U*BJ_CvdDT(uPc+T+xh*~WVF~a@9m%tQoKgjy7;{ezmB+=fk;2U6fuo6V$ zR30!Z^Y-J%n^=hE>7T(&@E|x8BzNB7;9tN?!MDLNAi4Bj2EGSQ1j(&84?GUe06zz3 zgI|EZ2EPQ0z^}j(@M~}(c#8V>F5$<&uviLGEZ$|HlM((#kW%p84BFs2&<)-R)&mjH z9F>KCnJ`KiB|7dT?|AzX3ae-+|=bi}c_~2Y&>6fJhOJUS<4wP5UjL6n5hMvix6uq8XqFcl}ZS>A|>t)Dr2*5w_rLnj!$ zM=R^3Gl8$;4Jjy~Ehc*LZSO0`_=IcYrBa>a6&&e(KUtXg8Z8gm`! zt(L2WXRP+nfj~0#vqoboo)r(a35Xh3TRw2cZj5wgkMWP`n;6p_j+6}fzk%=8dbk>m zV1`3hMZ5dnzT-%GgqEqj@+(G-1tVv!&QdLB(wziMm8kD{sSzZC7iEmOcc#Gd6z!8V@vCPZ}4 zB4x6Z9XTv2BAQdh@^(Z6Zo#HuI#ytR9Yz*NGSrF=c6^x*QgX^*;(HQw24Q(TllV?$ z7#m}i@U-U~oi@40KLz_o$tml_*yk=TDc}K7>_ST?T8f0f@X{$VrIR~nQGQWLVSa9r zL%msLH_~Nf+=6Aa_&mzU_^G*!R@PERyD6Uyb^rTiG!+R_{^nt%xMpLd1yu&hXnIlp z{JGW2$hZZ|=#ZwB&A-~)k?~V=8GTht8O4yGk1)C&U3OmWD9~em$#%5EMWOJQi;*&# zjFB?R=%srZ?P$jQ(uGCU%E-6{%V@Z!op8=FGJa|}=9d&!<5I>gSVku`?YSiaGo1aMa5UmTv)A)j9ajbCTQAow%YGrv^I=r3LE20`)C5CjYAaqZ#Ujmh|_R!}K^uKZ#x1+_88^|tj}?f`dn ziumnNSv-qZ%{;CcYkNf(kE@=ATfauGA!Gf*^iG87)lAQ7YM4hnoOWO=`fqpMWsOat zhd0bhoo;EEi%Uwv8m4gzmiHk|d+vs5{8TINfH;4wmNH6WvX^P?FJ)c3L|2MaSxVLKTUt`c4_?7Tu#~A|Jh7V&iIZejsGns z*VQ*EJj`@S{>m^Su3n9iPQAgDlgxVNFGk*XJJdD3Z1-q?IT?4e@GjHCt(tzm7PTtL z*EYfxQ7f_yqSmnJT0|{9MGQz*uhddjoe66zMweAVP;9L(S(#_s!l#8ILHYA~Jb}#d z7oOL~7wICnB$+r^Hr4G&XrvvzXrl7XdTK6PLlZ39gWAs=-Ily$VNw!hd!sa_Ry3xp z^J`4s=|Ijd+rtDnp2X_1&8@95{e0POZc=lGGg#9Tas2&0c3hyh7|UP|b2O33;aUDK)zK0@=gKS~7f^082^if7)A~X;yQM_U~_Mv|&FxPVGEsyDl*{!R(FH6s9_N zl9JVgbkhY^XR!|B?COjrtd*Gmv^w`E*Ib=Xn%vDioQx3^ya88!-QAbslbBb9MG*3qm^Rpo>B}sId&@Fk2Iu8m%NT+X(EI3#`$( zI*hYxv@>CC#r&r=db~x=8GOuSaL%GdCHXU0=JemB8#jPBVmac9Q18!08y4By*6qDjq9$25P%YO?VgIWXVEPE%g4hKc{%2-)FX zH2YXY>jsG@W^Ma$^F6=2>n-);0J}Tao~5aR^m<0~Sd@j=pO}<*`>zsJMl(BGl{X`d zsoUM0s$Xl@@K_7?){Iaoa{UbN$cQR(qS`R2UZQ?x}+W0yr;N|eU zQ5-(ljPv?haX3$uhdBv9oyX(W4va`F`zLL5gK4L_{wrC=u7_wL>q@dybun(Y`hJh& zx3EpIpUZC<5#={qt&ZD#zwhR^v`evnl;6@R<@am+7I{;C|BK(kjrd*Wh$S)^X~cm| zEiFgL?;-sD3c@tZRyOMRMDQ_mY)Lq1|N3t-yh46V0a?6pTmkz1?ezKUfW3qn>hD_U zTq0^N=SM3DWDn5q@ok^Swl#R1uRS^(#h~%bM;GCw31vmg?PrI9Zdn!NTB z=ZhsiB$gOGZx6)ssxKD0kM?rF7mMuW`x91D-OD3BFZ4d`<#C^vCqlfKm4iSmm-)QV z!L^qOJ}(nPyqHO}KQCo>`Ml8IwU>K9DY1KfUNZP?=AdRiCk;VbeNT-1MUrQk&!Xty zZ3ISv(xqi<|7uXS^d*BC7&Eu47S4n~KtqU70?Gm8=KG>!^qDLz>sgCUN_?)!Iom zhJ%W5*Hzq_?&1~4>(qhnt}6mcUPoBNYD3(8F^Azv*`RFQ6=9ZP)?oaPU4<%5e={X` zp+aqn(@Yl~^3K(oZrmn=c?NS3bG)17cDT}r7?Yt+j=;L3`%Jzd$+TpxKT!__J2rJ+(jyExHYUGexC(MOOM zgh!!@05KW~?c+{p;tcM2XdF;H{D0p&kR+~_c}YK$7;8MTB>SUN2<1jI+6Pi8QcSI=4N71Mjl&twm(eGD zKR$FzYb7h#@+Wc2UtP>o%+IrRI=m|PS9VfAf8vbgxf5q-LK7F9=U!;yCi#~C!xJ~& zB#srttNcrdrZJFszlbe<3|*&lC)f~$CU|;=NFshcjteQx94T)8Xw-TMW)LV#FnK1y zY*)<8FPMIH=}c8sWyfj-2ICZbr>ans_SY*fQ^|ibyx)%SRL{jiNvGhtkSJyNlss;` z!H&05)R{4^cX;&nP>!opyXQ2kl0%&4#DE{q-{_pVRqny;Qy49;-1XLt|nJE+d7uzR;Mq))G^+iAlX*V8=D|cL8I0 zzL`j)lH@Nu6N!OLm?^)^52c-jHeN$>zL|KYmYCZ3GSP5WNG6Qs`DQ}i2#~+Nm-T|#A(=3iCg{Rio0DJjs=&AK9G;2nKqd?)F&RC(HVmDK ztwpV|eS`NBmw!5GCSm%TZFz}w{Sjt)Ofb{bM^|&y(?7$Mx$W7r%vukso;A&kuXCx) zX`hVVyK<1bu=U|BZN|R5%=-uR;|XTJGG+X^*=JzVx#T~cbTW@kQ>)i8@!b3;X6{E4 znbGP4zcES7c=NLO&p)Ma9uxk^syg3^d`<%g?`=li(+0O!hZXVl!qxuu#X z&Ygd)<2Qke@4m+OHR>L5UK#G}i@Q=C5a(l|&g1jv6#1P0rOx1-+=z^jITN@zj~v=V z%1GjGs?uk0@lu^q;WSqw@B=ZwW(sZKps@bKRjHs#tS8S@hHbQ~<$mQv(x}HtO zPKqY0_^SDcuXR1URsETi+Ww%E6oGb@e14=dXOgy4#@X)_NLw?tK%BFfMiQCdIgqnW zYKu6R8)s7}5_E5qvu&zMoOgyh`>Os)WzE8QcerzR<9nY<7w31vozsl-H!2_J;irwW zE(eKPk}esGX2$C;AF(G;ZJ!cmFL@FA47G>gB1X?wKd!Ul)AJ13$Xvpc%RHV9eCO-$ zO8>Vnu}4+Q*(COJzqLuvRmBSnbm%{*u{cLq#p=1+dDg)@+f}DaOtC4{+{wsb$}AnYX~>y=d!MBQHCmmV&79@1@)Ww7 za)oJ^Li*50>Y-VlrdGDf7t1B;+CtYjE2g5V(DhEevJMDBS(vGqC789C9hiNXZ6_tM zEwpepigJBffB%l}fu52maa&ZnkfIcsLpF9_)$T*}_oaLr|00XkJV(f}m`oWWIrFR) zfkj5_m7xP2=y@)YAC95|IeHVtB%+Wn32cEDB8JW{$kFj@w>;%p5y55tx7aSV`#QUh zv*A~g!pl{|8|~y#@%p2uPMWO!j@93FiFK66%hc=?^9jA??A@sM`DDMi5j&eOYOPj$ zCoyiuh<2WLU1ofl97;gaxX7~v9AnAE$m3bd?|r0sj3qVb@oeH-3TljH3Q8tA<=BN! zbVTtu1N)j3k0l(*x0J;g%aldDqs-&U!%_-jj1?@1P;pVF?($d9fR^(s38X}4-J6?V zT0E<$R5CHp^E~$4kgkho1UWM__4qc09a_^;vn5S+Aab?!D9?-JFpJ3UAu-X!rgEBE zjo0tasMkJ23duh|kvRMX6N9};yA+K2_Pm26aXqQz>Cu%|b)s$94CliyJ z)9QCWM=O-L8d3l*A~3tz_r|UNV@7gNeOwY(dj^D?c=Xf3;WZmtAtP~~|vIFy1{I02H0$8-JvvD_v155zt zWE{T4{4em0f9k~e_{JIi5#$?vzm|?-_(l%(xzFwA8+X>qH_Fe8Z-iFPCJH>Xa(+3! z(YJQK@ePr(04C?~5Yx|+dYiw&dk1V8$sW+gTM?M`)=^qq!$fnt47LdI>tm*|pTy>r zDu1pnx8dX^>f2#9Oyq9+n)j6A$6hM&eYSknv&Rp=KVV?$hP~xaH2szO74Iu-T#F>` zeVY9!xougc$s0f^6%n=UY6W(o^ahmdJ(+!{ltk{6E}7E!2`>j#tToD-?TJ;{`K}Zm zu$i+0S-Vp0TESh3M>$eOW2{luj-n&smRVXnZ%E->BRzCBc4rCRgEB1DaJnn5>BLZi z5Yd}ipc*6uCE)}<;d>7R#X_(@oS?6Xx=E&`?h}GGj6J zDj&t93ZfK4qMJ^0X1ZWj~3e`s|GO9bL_?yUtQdH9I4V_ceCaWbwQq zw1%0kq@<#tqhF@hNnNQjZy>3gjI-YL#h%~-KtrO?nVa}3tskb9) z56+QaWYG}US#jxda#7ldU;3UF_%_maiT~S|xD~49DuTb&Z*AfaNZ)sm9NUUzAYy=Tr-7({2bI9@IAz>l6MXqydht*LHf|9F-PW;NeF6?V?N|*Z0?F z{iWuCL1oL+0L^Bx1T}T2TxOQZpu=F0r(*t9n_|Py3;{kx1IBdy1ZU32GMJma1i4U! ze}QG1!HnyC>Qn80`bp=YU+SY@1`2(i7$5yCeoOcw|BW|$hh|d{;XxmJmo>jJcx(z@ z2DUYX`glA$nfk)BY2YO6bHO|i<2?>$AV#jlEa`v-rKjfml%8qDI#V?62zCeSp=fXr z!jCZWE@#C=`fxv+3wrNw_LV#0y}fz5_E|D7qWHeN@WYR9|2s* z{-GCTzRj4&E|z;JAGDq*ca5~H6Kd2J*QH5`GjtsWmc}wE65%e*c*YOz(A_u}2wTY2 zlWXHVge$mQ32~!BoEPC!7EflY`uDqIv*crSAMAgM1EI@kda**CDzPx#$mwyG|3Q$_m zbs!6*qO+v$qgV+Nw!R)>G58z4Nu*;P_z1Xxko2_=cVKY>3xC(&N7v6`WBns=YYf+) z^Sd@yu#=t~WCa(-2`bBeAvPe-F6B6Yn7zI1#9?RbI5nWy@0WaQNM zbe^Sd2kSr`4|O|LIfP+fc0J-NnDWI~3=&4}I?Yz^2AfmEnqr!N4I}i3;=4Ph&^Gc2KrqKCqHys!CM=jB0 zY=};)NxyOJ6ENqq^cz))XgrFHaTceMD>wX{W~Wro-*iiJjHRB-af`)nD@Oj9uV~J~ zWKG{?D*A82g)+c?U?wPx;$m~x9igg$0cg?9#`+=0Voqvek@WhoKd(ichRDmoc3l5tvP&Up4zep zVgFXOXAcbMKGkxst8w&u(foxXRj+Zvd1t>jk(_yr;hJ+jVIj| zgI{pn9@*d+-0R|5cQiWgd~*Asqw2@6Wm0hNxxg_ALilLE?$4|_w234 zBgqFhdhJ4atUA9-I(M0(vvOGIIdKdbYiTZ*{Q0cPy)#A%dcXS3qtpd~vx zsFv^9v3i7OMDRvp#b}|sm-P2EAL(T@2hrCH?eCXBN9bD5uQ>=i)sJk5NPh+2(rxF0 zmx0%V6TxjDLV|Y(I2n{U=+seBeb)WNB(jIs_&>*Am4Hy6*MDhE&6+h@4JWZ`+t1jy?o!teBZr&-zRG%Otl=77u`B(TwKnGysyrytH*n3nlaX( zth5jBxNq*Iab+_eKl<6bH)?&5|6i>H{F5S$!N{L#_fSo|5xBBLBsj9AgGjQ`BxEbAP4_fS}UbVPS{g-)#-(+HOA2xz-O3vttl8!oWagpgKH>qhy=_e)XrK7H= z%d+Vvq7=A~zO?j&P!FRz>(ZmxHY8K*xaiGfk@;oMYR{KKQ`eFm;?8_a^-B|p^nM%I z>1obxp?DP?$IBXHq%F}VcKE(!x41-thQ$tx3GNxg_>uJ`Jsyvj-wolp$o@J3ve&a3 zyH69vZsz;~lPq|F*%Rc}LlQm`R^b(o2tikwX?@vdwlBr<&4K zVI2RBjBLLdFQU`pcW6%eee%qD-+%D**5V_Pdmig{<{7p8X?yy=u@$xR zB+oOST1P7M6intqzIj+eL*^r*eRjfT?p93&jgwqkq0>(^RK6Q^u619TA_W(J^DbtR z!!tAav7>LE}jP7i~^Iv)db!6(5y@D-3aysv>X!4JXNA{c$ak2zS-jP;$N z--56jujFx3kJ&d`5m7)A&WR`)u@xMgu#0>XK(~WsyBS!ei7=7 z@4F`@CK6E8NntULk-dE1u9ND)ldf6T+KT#LxxTi_^5FHgv8reqrL5(gi&gn)B#ctx zupD!y=QHyXR#r&Fe)n2bO1PRaKoqH!lTrx8ubJ_1h-QX=f#`hLp3uTtwfr&0G6|AF zLBb9n6hgH*3iRMcr-V-avs?2R%doh2Cd?P zmpAm5PAvzh z=TjJ-F7fK8;h`%SzbSjJ_r^QBGI~S0H7v zR7*6!#KuW*vDXM0M!t}<-)lD^0~|TbI*puP*Fr{ZTfFC9<#pAk@x)l=-#liIwBl0C zoHaLpaZ%Bt!iCgdT~C_Ix9nK!BDL7E=W-6{sAb<>wiZ^+NnCxi^u6G-43OKw=fH=- z=fTIo-QXVZPvD=xec&q~Gcz{A%nj33;Ol(<8GHlu56FgHo4zc&IUxgbnd1nAG%&}} z5D$h48I~F5LEjWD^o-tt2uwe%F#>&4wb1`Hgx<7t2_J?M9>FKR1bqq~$D?07sCf!k z`wZ^9B!V&)UjJQ@KqP|yL>kzMG|+G#{~f`c#xx#eRjjIGuZTt?%l6oVtiPz?9=n_M zuDaS|_W&O9*u}iTm)wZ2zpJs0>`oEfuDraFJ)+SmWCN;QFYGpO^z&f1}xyh&^i^0d*`LC>tMilefC+tn=@j2CRvQvA zF6NhB@Iu~SDxmQFuLmFIx4inDNH}d=EIfq2BGUnfk(eUP8q8MAt~jI3+*h=jv!&>S zZ)Sku!bmoD;d3ydg~OdCj7-ArT*64A2Lk@uVkiMe3g)N8H1bYUrxg6ch?YiVpr~oK zx#M`nk1cGD%v`N{wX{2_(=F{7RnpQPZ{4I`YH7EQTdQyQOCU$)0JOw(lRDFq*0WvB zYh`ze+};NTx|dK3+S$sUWc^9mt?f=geoK3ZDz8F?+}qktw5rsd*hf}D-0cOcOGH&4 z^?Db(d&I|m)N(GYRvB&V9agIPybVODeSJi!eKir6g}V&b zZN2QCs-!C;$L>_S4VSKeEHtAH&1j)n#Q}s@I$~L@EhCEinM_>YPYvXXY*o^UOzcs~ zogvv{NcI>K>n-(AXCmf_)GjzO{B0 z!E8gYR0v+Mo>Fl=aeKW!yzmDvYR<;Ih@51w9Ahs5eNW%Y&NpK3v0zW!+lcJ_s! zwX*wR)mo+ZgJ87UB44BBD?uIU2f`_|;SYj5SIofLRbWsU^Mqrho(bfkT0;K`ln zE~qS}S&k0iN8D6#gx{xOTcJF41ASW_Bi#l@Gp9HRhVs^C*oE@eozSK+1xj3G*1nf0IESZY@Z` z^v6uXlw#Imc685RFO(l2VNPQja{)|eOcrJ;rWCUVBRA1JgE@#fj`54lPie9FGC~Q8 z&5QXx#wuMTlUWgkd;6|*T-^{0{z$w2ll0d~Vjbi%!;R-Gnm2v=+>*ufXUOfB`GxM- zz&&+_qygQf=EFTQq|DI4#OmSpQofC!$greNtu?7Ky`{d+IijQp0e_7Q=q38P1*RO zX@Z>?;ru`?oM3mfoNC2H7@t$^nuxFd6YcQ2j2)vWKzcbeNFW znFRUeDtQtvmm8PMjmt4{S*onbxGXg;OO4C^$+)ahAB)QxVKX}@zGA^z>JJA!(8l2@=5ppeoC)I9hMXN2-;id(7Fw121 zY#!_}S*$Ks{qqSgT}{ZxD&1He6sw`?bF3nUez?B*gS-@TKZE8>=r>{%pJ0M-4s~>_&66%4Rs1;H{=19T z*=2#IcOPkR?H1aX(5&Vy z>Is_O!~iQ2u8kS^yLZia-V;?z{-EW+0sHE$6ENx8AwEmphKyJGc{u}Uk&>My*X zjnzMlRk>K5QqN))5&fZ>zmV`_7GsyJx-7;n{X?Dt$IdHuL)AUl5%pV(iF%<*z7mS- zeZESrgkpobRqVF;?2d@tQ|gS^9q`%ZT!r0xYTi}Yed)8?jUDBs-n+`~UgKzTKBGuq zgvg&8NwT4l73_12D3>^*8BM}Rl#vW+7c#1#sDG#3qO2$PE9GE{Fsm_JF}pE`F{dz% zxfWM$M;eJKz%0Ya)kixpFJca3zQIIuAdOYSrn1N zC+h*Cq^z`nRwm3;cE}#o9)71sfaZokdlE)0G%^A->n$x)?(;B5L|@W=k{E={gL|@h zbVl2mD|LpmE7n8%i6e}`o;1RGPrH|}bI@jrhiQ`Ij>bH43EN)=ZUbAv?wgZJ8QmNa zs(5Zpoa~$%mcK*$PJw_|bJu0Ym6?k+WIc5NlA2G^7$vIY#jGQ=K0N&i$GbOJj6hPq7p5aoT3dLwH5ieCsqY7e!Xfn z(RA}^l#le4hMb_YmVrMJdvtqqESy<7qqtxub`nQ1Vf;ad(I(=oo+G_rBm|FYf}gBt zYoGSSPKYl8dBSm^r>5RC9DmPifB)k8EPv#4=oNAi6>piMKVqRfP>(0m+fu@vC=*GL zWO7Vj=H(Wa#=>%R)42)0uGF2%kOnI~t0@)I4`JyIemCd0^s><2Cf~BzEsdagyc*5f zUA+f9Nr%}?S7uuEjc+B*Zrac|EVfoH$Jf@q zyc2k_WiC2P#O6M2|1PbnTg6Xhb}!2sr+TfhJ9!rWOFUesTaCm&OD$Mo$3~3%mm0X* zZm!m@s9z^30sHxy?pmra__*jewPyuF?KpK(LRzXRmO&xwZph)P%XM~w4rwn-5L2(S z3!Qgqn(=a8KS>05$Kw^BT*rE`^_@C%J<`#4s{Rdj3$^4XJDT5PSK4I}(Wlj$EA4i1 zolZ+arw0-s%qv=D-fXvzYpI>)1)PLcMXPIXCT-E`p_}cpM$#)hv zN$oU0;3Qc(Y_fD%XUWr5Yr_Py)JzSy#cu8Cpsj;BnpCmi7Tf8jlCHkPRurmdBSI8~ zI%1=JBe|;LlkC_EAh`mE)z!+rzD%Ck3!R=d+7b7CrJ1J1?|2MO59-T84xiLCO9*%Ted3kujm`QfJl3g*5gh z)WTY=GB?qBVz-@TF`*s02iCmhV`++V)rXOn52zJ;;JVkVJNDSElWxGA zH_V2``3~*;p_YsMoJxeq>s9@|aMHUYTqeS|itvK=q4Eg6f5+(@@geijlHv}mMTSvHM_jg6?jm2MlxABBss0u6X{j5Is zs-iYi)wC*mhSgK;;j8Tkw)!kbs^xDm(ocJ8yI?=K@2bTWX5_DCgUb)Q~oPLWiEB3qiL1c2t~v)BdexU#00+ zl(I9@kHK6XY>2y2+leMaaNxbF;=nb%n2;_1Jh zI-{gSV}35mrd_d-+PKKk-(%$=j#7H)F^`p|sPT?<4cph*Nc&cq|7aMi-iT>vmu=&d zCsr5EoHe6(@%&OM$E6icBQ7e7L zP>)u>vFau138c)kjAeh_mgYe;RR`Id*6hE2sHkMz!WrX4nOHiVL~Z=^)8FIyo~->` zto_K`hU+wE6yk}Wnr2SGPjCIrq_WI0QG4o(LNd2L>fT*cGKAwsOA-H9YRgDn05Z#1 zrN7VMiTRp3dV`&ikhM(8Ezn-SQV^wivy{2XU&rUE5qrzPWPP2zr66yV0y`ii` z)S(n*j_%`?IZ|c*g{mKPY#x|M9X)}np3FrY?ybwTb2JUnggq$`_DeiG6jirDSy;eQ z#4?BfSO;9cmOy9gK;>2~w&?Yy)?N}E?ewmagtn#_xQc7eHB`8tq5sa8P+7nbhHlF8 z6GxeogUK7vjFnXSOF%RZUXT)4F`!-+>8#6G?q7%z&VR0vBVQM8eP6^Vfa&ij9lrl{ z)J23*hG7JWh{<3PK)GLZyHRqH_r@d0v(Ye+4#y%?wTG9pp=qV*3a%Q?bIkr`Eq-Kl zl=HHqtzh30`$^bK|6%?d=s&W0l!u*k5uvAxlm+^Tc#`Jp^Au?RVlR!=*iRq2aAC2u zPx9K&vs@Rj!|?&UNWKT^d-$8nRJ*d$ix}%!= znM^2sIrP58NVzz$XNLZ!^HBMs5u+C!CkGcD{q0m19bdzUq@zxLSSP{U{q1iC;TkN` zrj`)2v`;QjJF9(qv6u4ae!f8YOIWM1mnOy9RkilH7CUL5LT_03RKvr?laztaQ=q)W zzC2*>FBfybBLn-|M8|J%bJ*(a%UMECNO0yr&KJaF@ZZ6g`A^^96+AF;Sg!NZH+a>2 zSiId)9h&^eg5o>rbhB zU$WvWQogp2xuCI6v*Wp5tvg-Id?j2)^iLlzR4(+k``JnTr8Ul1E_70V4B4>Gpo-hh z*bhcpLsT!z-N~BHuA@6PV#p6_d2e^Sy26+ysV#mhSuz=-KDOL*t;f{Z2zPs{pyKKX zcS8%Ek((mjogxPQOD#`ycas&pgo@LV?rk>o57c*m2f;lJ+`)<7ZY1;2d3bo?)walV zzLPzFA!g7sLsms*)F@ydBr$&dy9+JEBltaqd8D-ETi5(GHa4mI`rEE(qP37Ke;bih zai*cWk7c!0y*!jwx|-&3C-Ak{Ls@rHTRiUA4!+e^Nr5C4(V*(9ShMcyl3V*2+ zP729nM^i395m})TPQ}{WMFDb|s<%}w8@bziwrKL1?0A?J+Er?DBX`rdcuV`Fzm`EIb+*7jXL zVFiSpZL4iXax3@4mZa+S*6!Yt0 zH_WHiKJ@>qBZPj#Oy9SpFHJkgiep%3BBCsKzn=yZbkY!}KCkvGIpP^t!a2VJN#J?9e{e2M3U`Shu2~ip^``^2*gOs*G z+B)}5V3}MS0+C;1{CscgNNMD|sbj2Fad#K@osrfo)hnI;J?m@p8q)G~cLVEMb!$3| zXO`NY4r{tbm*m!nILCDn&3(M@WA#b8yEPQ`yTes~qk46Bw}_newL=YhD>^~V@9u8U zvH{`}+J@}g8XkC32a}?wYK57a%~-3f6i{}i%D0iGyMv(ziy;Vq}t_Y(U5sFpq4 z?UVlbtq^8Fn21vL5Vm3&JRz=9`90j1TaT-!dbnHnI;dk2Wo3C8$Z}Na@BD~J$4H$L z@n!5Hp&9J?1{V4!<>^U2d*}!}j*bK_j}gh=BWf&8)+6fLo(vlgt2=s5+` zALtAQgLy=K($n1{=}8@9VMK;ij~lh(b$$~F?o>s~UKFaZuHM8pNay>(K)xj_y;apz zcWYJN+wF>bPP?ZB++{H8t*mL3OKdF~;cK5-+{gVw`lZ?%*J?58 zm-*G@zV8Iz_YGn0zfp7g667c9P6H%BzSdnqu0K)f)7%NFe?Rw1D^~64hfAzFX`tmr zF!0##^niiwwHLV`wYsWw84|i0nyyOIboJ46Ri9rBO&_(TKQw&|O&?_q&|m2T-1k`- zs!G0i#7g4GF#a=q{xj6A1MxpxWe&n|xS<)YY52mwPH2(J$#5rHi`1M9s1_NjMJA+0 zI;3ur*1Og2Ovr9ihcY3%&5+$@$Zj(cMu2)~+cB7-O{w8Spj3uZ8A|1&1T|%|EGWy> zQ(2T&7b6Ms>WZ=c-vJixSY1uN1&;CQLHl5QL!4X zk|sgCNQnDTl$zkNNf0PiC06Cio(w#$G6kM91REwp@Uq$^R+Yx8&J?Uj;uM6`@SOxi431XV4~ZL0{9|16Z*dms`}X)$U@otN@n>)Exy_Jz$8N&D0IJ*G!0?GsMps zV%IEO-c~JVLHxE57hB)+QUrA08@lfe-G_J^GHF@Laf*yD1^ADT2Y8g zPqnL%UNKtro5R;=L$`YlbOq|r9Ow!RU4hUQs~x|_WvP1U*ASzK!e6W;VS5BEbDZe55Xp?Z~HixR6ca>Km0wI=x~os* ztGm#RQH3Q4$l2SE~LGIVcAptCE!S?u2F zqP;w@oX);beZCy&`<6^K`g$btj52!e18Q0scF+0jc8cBG>OdLf->Wz;&{R$HLeX=u z@wL+n#b|W^J1XtW3iwLO3apl@0V~{nZ0bah!6Vgz?_ou2SGaphcwWek2-z3nrH_>I z>t523a~*a~)x7Jl>+ZALEq2-JU9rm@qP|!TUE1~7Em5Pc$L@9?UAfq8Q_qUs^FF(n zmDs(dx~vSDGM{U@>|bm61y4@+cc{w(Q{(g55B2YbI=Jnu8Ky60Jf;M*2D1(GBIXF@ zG$xTv$-bEJm=eqy%nr;x%rQ)z;Yn~A*4q1HCSgi3Ycbr+q*`>eTUKnk!JSes9#sz( zY)iP%wUjM_Xsyn-zlnN%t$RvDhgj8do%=55iuO{cE7Tuv@g%Eb>)eSRV-uz4)OuS& zd2V;NPP$nWmSLS2U}4e)SEy09yMGg5cTn%$?j9)Ee7$>$m7uO(Pb*4LyVkqsT4Po6 z2KU9lXQxbKX|U~U|JP=;aWV}pdPq`=D%0Hbq53KE6UpFj^&GR zTBwYcOgKhvbf;M@)wLVl-QxPCis$)yKZo^6Eq%6BdvLPmsgoPso#L<16e6R>W6Uyv ztE1|!+=Hz}6^j)jvz4Xp+=P!A>Xl9I+18^f{Z77KQ~7ri^LuLXon&8bKa$mkNJn>e zf)ahDV>#YayY6%kiijDlzGz*yN!%n&rZs81lN@5K>&a0q?{decjtV;U@jHdsrTgvP zQ`7FEYfe@V+(jYHSBLI$ueWYi^X_(cO0CdgEVVKnY1OiPv$mHzA&eHt&1%ox?$%xI z)Yj{);cOV^I_g+OR%@Mbk+?guZ7A17#QVIQQl8DotnaFRo83*@mTR9kTU~hL*~}LF zK7wl@9o5yF-JSX!(4<#b9W7a=$@EB@`GdAAvs(GQi`egvP=XWTyQ6xSn7F;@dm!oW z+Q(q)X0AZ_D;Ld~kSxdjs>?m@+083a5|V{VS@ zeI1=kZP#18h}~%&mbMF2#PpQwiQ1Xhcv`W^Nw0kN| zj9g+T6P}f7%YE+Fo|`o3bnR70SE{$}bGJyHqkXt&4o?O`nW9Obpk{xM-C)m0%%XCDSZFPEJ^g}qK89pY0>}>XXsaNmM)^x8-t6efId!QFIlv-EH46A0nr68IN&ebk%s ziNA!Z*D7sfgC^Xei%hE3c#NUAvvk$Ses3~~5|5%0K=_&#-sYnW7@BD zRDTi3`2^`2f*(*6)w)UWz|UU;T_Skk=gpvqIi5ff~8`ZnXkyPL@8j7n!e2RRNw=nn;wO%@*aYs_qL+$Uan=FEGcsTTOU@dD)|C zm-%{0zPhNEFS>7GU-qFFk?lsPcVBd095G_1x~((2TzBnpH^FYyOWNsL0}pAac=sju zFgK}dTZz~=LOoRJzW9Q4nL}3${dxi=6HyYQcJ28Hx9u5mFo|nIH_97aX6m|yajGHZ0!Dhdh+hzbbq zIx7EPRo`>ZO$_RM&-?tp?`qRIy;N`2)z#J2)d;6sY~<6_!L2m!f=JY=H&V@pgW=UV z)5l4>?G#?*AMUMcB*X*xpjz7#$P!#><)rB|%BN3|3+Zbp0gHjs3i1AQ{Dhaqdliw$ zraT5eF9u{{DL0{}5|-j8l#ZJ|e%h2707WZ?`K&4eXU#63iP}-KY0;EszfwROY4IK*jFi)c{taH7q;8#Gl{ zB_OFjRtAOPNe(@sdNy!dMH9ycv}R>l^6vm zmHE(wm<_^b8h#i7=HWL@G|9AbgbBY#ARUb&FW($% zQD%RloEUn!%WaR{u=Qxme;2J8uxRCyU&OtgZLzC&jR3)a6J#7_`SaP*9a-ha(;xX_ zSC%arhAy<|$f}W~dZk?wV~gjeZ~wewhgaX}wZ|jwsI`xUQQGL$r$$B#dXrwhRt95x z`TU$%=wn*++WiPlF2f}-R^lIgGY})Y?D{naQ zy2@d|khXW(ydR#i#A8KRo?sM@A5z;+Ci0H91#K^T6#vkIxVjRrm~14gd3ewuUnIx6 zdavGI%8y?1qRFahyk4^2T)p+OaUHpbi)>?gMWn@$UrWJ)xJ&Hn9;8{xO=>$2kSV;Axc3I|B#OD(>;c-HQsbz;qUE81iYeWkoX65s3Gmta`@5k$HFIa z1wOonc5;7)-wj&5{OCYmZ@_T)wTQr6R+hqF1^*uS55Qjm|55lW;r|`}diV$6KMemM z{LS!Bz~2G?6#Tp4e+>U0@Tp-%py-kaNY3RJMRkK_1)mCTtJ;D~Mc31iqr&$swDj@p z>EM>+e|8$20DN!&aiYzc;3iEIONn%x_fA5Hk;>?s?AAyL74;V~+;T={eAtH3efU{t zuz2B2aA0H$PMzXQ9WPVAely2v*yR9cRJhbQk;126u61F^G6y$=!wL9Q3f7 ziZ{P~8yo>0v11p4lLBHm(zE*MqIhv%rS25*p9c5Fnel{A zgWGvpb7cK1KrO_MPa*v7B4R#+Dnt)XVIdK599_i7&rrcG<>ILsdZIY|DGn}oeHPpu z>JHA&!AuSn#h-&+9a=7t6+R;?d`8x9K8NfPAB2cV`;bkGAv_Fo63Pt~Q!WBFq6w4K zp>+o?2H&Zo-v?X{&dJyy8^dEp*C3wZZwm>jcI)dNx*WVA2y>&Z^Y_6nl{@l>;4x_P z)*m3T>>^J75Zo(hC}+i;9ifJ4;iBlr;B=HS;m6>{t#X+H#-gUyeLH_4qj|Dz@~Qt% z@~B6_pZ)|F0T!e^+>LOv;hc9uo`c<8s9O1q79k$qZwp~cPjnxTSLUzY2>QQbIUW~2 zg_kUOMZkN<;_wB#+SqGu1~Izcrl}|z%T{xG8FOP0 zmQ$=0kBP(U@Ylvp_=nbvND>L`SE2U_N2Ij`E-Sn4muooOQR_sps=?x>s8zLA@Sah% zJX>A47`Vk5CxDGjM9H&G2O>N+trbN)wZ$1DKHd}T&OYu#FiE}k^Ea3XKgvgINgbha z9zYzlX*pr7u(bH^#Oz~5uw83rT;trEF?r34J?=X=sWcHo7`;kNv1@6tptRhsb%^Z6 zv69Z1+d2UI(60R{GK%9Qp^?NDkGhN%10CA8Dc^BH7Kxe5X|3Bdn61*{4@>5OE&JbM5(Phh z2L=tV)K0(~l!p(3<$CavmM==aMltWb4&lLX@QPoTV><9;p|@n~@ri$gJ7jjyWCcZX z7sxD#-2(wAlTR{4Ou+*SnE2IS1LO5d7*>Mo2zMjgG`K}@55nz)E5Zp5>1EO*YDIA7 z18Ps=?}m@4(c`R>!SVS&mh=MhHhiE}84r224i2rc_gG5OAp?S3>9=(=rD~=F%cQ@0 zCYd~p#!^ZLGUMTD;8wu3M>>)(je=`$NYc_`*4(?PwKeSG2Q$==~mJ{V4j6xOp z9VPyfqNUW!1A?cdiigs)w0gnB@7sJz{XW z_6Uqsf0GU(d4p(^p|yiIU3yzHw0`dP`I70N?Hv)R5}`KL2C2>K1~=7`R5e{xWunp0 zd&vZZsp52|mZMG(Miv66!J@0y&NeNdEek`%!x<)CLa#w({$!Tc#XXNfTiq~EVSD-Q#LxlKbSgTo;cPpHFdx|VKf76T`CHip&uUMlBplsa6ddE z<~7sqbT8y!MM(~m+}Uzrbn2*Cyie=Fn8uibCf`E`tG(e&Z9byYjmh9f zU>euJt$;JN_egUu0{d`(8eN8Oe~YAh@eQk_joTXdJ&UA2VaGTE6`l{b5pEaUDY(!f zcw!uG1Y8Z=PPn6Rw3#E35FKX1p4SYRZfBOVx7rPI2%Jz7FHOi!@``l+Zs`+Se}TfT zz+Hv=67DOwui+j5!qk^x5OW{Rx$!OwnmBgCq$x8HMztexnM^NtF*qq!9_IZ09LOZaigP)jo6=NqaGOm) zc(#av(BYE=Lerqw-46nxm7FK1L32oS0=!oU0*-WiRUz?9d%tiAxzN3yp~&lpvgz_z zFgg^8@hgO*6QCAw1JU#sIK9>M(h0KZ{YjMAu3-G|5iUuiAeUA1sz%jgO|l@e`p z#*3RS28W34#HY5XTijLqAP5P{`)l2N5_Osc{?>(${WCr@M#Q5#8xLmTE_23 zjmMQu-`Pz6+mMolQS?02+B@{-Yl5eUDJ9zLP~vJcSj$x37o!JjQMT7F(v6DL7))8^ z`-_Arq-Z2*FqVA<_M8W6X}K*gQIu5Fp>;2Sza1=qtwJ)~hTwUI386qOc8C^k3%DdA zhiD7cF=G7?Ehl?6=lC0Ny#u2ai4x*26VxWY0zpZYG2+q?EwkzEocwW4N`i}YPFmj} z&6rDUoyi9Osk?)ki8Vu^8*rR6b_LX?EV7{5J|%V!)#7Y{m&J!ewb+XYH{i}b-6cc)j{Z}ZKJjOQ2$P2vutwEnW-za#+3dYM@Ee~e^BK(Y}-=^>`E&z5PeRkaYh*jSEl7UktYgu=`6YE6rR zzz{5to60eS2>Yd*Jw_ZV*NlvTJOayjM5wnu<8M0Vk=R1aNuf~30%VEE9jirY_i>6c zklW$n<-wXK>|Y!p?R3((+;dqZKJSd=>!bIGm1DtteIeS6({h@*z97uls}$kcFCs#O zEw6D{tgk@AmM^SY2YZOK$>c=O`D*!**gXy{K{{^97vnU;-JR16WP~M2+iSuYuSKC~ zR6blJ?|~s%q=$qiQcN4KMUV#gcr83(6=w_e;d~%&_tV-!l&VJTpd^Di$$aoJH{jY< z8$@?=gQMr))8h1aVAo*rGdzz8%s<73y4(p`T@VK5+zcQdl+h}*fBoM^yvF&E|AxA) z0OCCc_um+HNyb@<)G@+}cahc_{J=>0ej~8tw2mcNUk!}J&`Zsdk)e|h*g*1`Vj<2z zrjyS;(yE5|Zj<2?Vuiyu<4x6r6ckSU3m3L}*^V<qdZG@^D2KK`TC`Bd{= (QK zH}#`v8KPf09N}vc+|8(ZM_KjKaTBM_9(OAl!ofKnR~@G_NJCYls>^ga(sYAto6nL8 zCnBarCQ1SwgSik3J%PAW%0q)Cd)R6Lg;)B*D`!p^H)Yn8iW!)4)zWZ-%rLIJ5>Wh^uHL8+{qBIH zA(;m^8beZ6e@J{!qxlbuJc>2b&V!vH8{QqX;jKZEyn0Von=Ae;BR;b6405>K2lS|J zG+eU$U&Ew!_zxXPxM=8P`bkO}&JEh*N_V{>l#$*ORUX0~<`DWvQO8of8(KpmnojA7 zne|U!DHZ@53YK$Mk(rqc*xYggGssMcJcLgJCZxk=IW zjCxUGO69HP)8>F-wWEu}S$z_f$)R&TTnyi25X6 zAf*y$#3Vqexk3h>=W%8NW)7I_9-<==s97t9(aqirQ>hPF*1tlr!z9fTRy+PC_D5C( zCKbral>g{c1_E}KffaCu>8Q+bZf?hVnidkM#Ey-~^IMZYAMyU_AHz2_q?-B`)3t)g zv)S^!3Diyh;P2rm`~@yN5V{T2GiL4Ea)DHNh&dM%LnktVgDeaUqh_>uu%Qrfy2M%%%q;n;x%VhT_^4R?|T9CP*j?^k@ zHBl9aI46vqQcmsQN{|vuM~1OLouLLXoEuU3=ec56RZ;Z19>G@OLXe|~Q{HSRXjFa3 zPv%xu%$NYXb_|Wy?}xeiZz(wU1vInN>E*MgT3z=AL%)ObP*=$&4@Vx-F^2IahCP1D zB-~j-cUCh;*@OYRSve>OO_e|%MW$xhV)ej@d@y&BcdWQ6xF@AVaAB6DpfkgfibB18 z>9mkP0Sn81|9)-B+#S!Z#uf^hHist-ug^)|g#$#1#ARPG ztPeS!)`;7ZDZZH!>7Xgj9C$V5lW>xGGSLEcf%6?#cd29<$E4yWlF4P|XHK(M-3Zc2 z#B;I_4oM}=JaTv_p7;V;PD?I_DF2U%>Ht*eRdqWmN0}lp(MSe3%}bha@~sTwM$J^S z)Yiyo*^!jM_~Sg?}*SGlGvd436etW1*HA<)517i~LR!P13bY(Rp=nV7SwLm8(UE7_Ct? ze2e7^wU^Z|afw#T@r(vmsLC9rKq0*zDNiU7%kRV?2y|iYgpe^r?4yLioUjE}AbQ4^ zo>D+dg>#XXWD5uvO&4i5d2Zt*gF(f5a1wbAv~rPlv*#_2;u9sJ0f=_G->Op=YiU8~ zzKnYz1O2jY!o8YZW#a+)Sa9fWS8w=};m+FX3odIAHXB^0DZpHVHqueU1vr0s5*;&` zM-2H`oQvQ};2Ow~d?KV918}@B9)#JnW@R2LqzBel@1>*P6{y5>R1s4XLHo(6S6rwsazLz}T20^eAMjJ-J_9o_pVLI`bnBplKAj2>1!19 zp_{3l-IFkJ3C=9Qp}z;~Rf4gv@wWZMze*rM_r!W%9syAXE0MP)zYsOkQT%3)s^&1VoV zD)2o7Hr$bz6qWjQkP6wnw_hoo`h)t3!p(m2-|x;%MFRZ18yPIPFgJv4%m^MYEHpDK zD*rrTX2mQBHPLyEv(7Sc0D4E>Nox_g`l+G>Yo`x9sn;9+75HAVT2OA2wzhYeZG+HtVLy7##L?qwkr)vhj6%<)b zTsr;!%mC*zMkV9hELc%y&911lVC>{{*xd6W|A`E^wwLc)=Dz+Q32@@RMUymD#~Zio-n-Qi_T;81l3 zsaAC*LSqoJIbwXpoUzj;a6^#Te?%Y5L;!vbX;%@q6C+wNa~$Lil~j4F<1O(U4GxhD zncgB_`GkM(N6gHKefcf+c9g%)!D)UOKR&0tPH zhw-K_ST8RC|M9nGzKLA25FaI@jjSUX6%b25)8lQMn~Fz2(-V8pe7}#uw`cma5xw_h zmX|(Fq8*jA+-!>Kh(3J;oIeFeKGUjtJ1T3it%lSeY^$!nN2OYNZ6xpVb|gjw>~YAOSKfD6DGCrY8mWpM&Kap zhohYr`4ds<32&*hCQq42SY&b*q#9vjk4z&EKZ*Un!g-Rs@ufJy%p?LMS#1J>8~jt~ z-Bkhrj?i0(wBbxRaESpVc;o%^%ZnhS4}BvX4G5jKx8iLTy?~VuXmVE00>Vpe11b>~ zin&B;^4IuY@tX#Kp*wJ>Wa zYAz$H@(XCi7fb4Yt z%NN#)I-kQH<*@p9fwv0xBU*U&DqmE~_GWS`te}+{_7^eW5$p!S#qvj37wW}Fv{Kb0 z61G4^%wrRYsy$NVSJ`wicZ)Vk?O1ngi}rSqdVAgDPa!!pm5x8HRUmzz?NC_iDAsS+ zq|Mw)Y%`a_(Qcw>zM^cz-*ig76F%)W-hh7=ZfOf#xWGIhWus9@XR1r+SYxxWyXXzq zaQ8u%5FbR^#&#f3cB)q1LnLiQ{575e7t}x2E<=IW+m^IPT5L@k-I>fqTw}YFH3Kg2n|d~9!CwY!x49jkhQzC=WI)D;(vB&ZPmBM)m_>wn;s!1?$%mHhGA~8oe1yI zH5#iR?y22csv3b;ZnR0zJ1yL!D74L35v_)xMaVg681rb@1j__4|Cz7*t$3IuT z0Se3JFtXp|@`eK%e%~^%$&jM;glvmiB*Qa$QiUG~g3(Y{ktFTJQaRn*22gIZI z?pIThb~4gVqZ$onAblas$^xH{Ev9NyVsW{PIMzzUOJMH;*cT3B$uS!EL6AO|NNq98 zx?5s12l<1oR=LENr%2xYPYxro^PiMu${baMrWq0Xbks1BV?q^Cw#102CK%-i{WFIK z+gd6c&|?&)ddsGU@&}hBHFf8?zqx>zEpk{Nu zr=Sz%^P5j|+~y1@OG+V|)b*Tw;YJrdPpePC+z8z`wM}UotI2yt4nePs5PD z%vB!FH9`Z^*j6_S3E*{ z8QF@#6V_7oBG4RJJ^M0}!UThQHmuqVRP6_bHIkA zn{gv|3x7!5Bm^h43WS|+M(|~t)(uT?7=S@jRwDQknT7Lb-^(B7OGHxX;ut2bzoU)A zu6iQP?I?vrn#9tUK{1flAd=ujBW@o01lxU_58o&QYmoRQFL1KLlZb>X>V=i!+_ZEy z1Wl6>%z!rf;%85>s`CMY}m%qg&xve95JmsZV_0h1_MQecuJ`%sf_@G)47+m8LbN zq0xF=s28;1P*ojy0sYfT)Y8{5ar^?>Um=1%MJr|ph=He_P3tN@)n*3;b!9xlIwHz% zv4^ky3YTJfi-fO`e4rRW-i>t&zQS$=BlY6f+VG&4&At|!UU7zt{BN|@LGOCQ@RlNl z)qJDPMrVKXjrI{La_n1eLr?{l2^x_Eq5c~wR(z*rRg+aiI_Y{2>%~zxvS4@xzNt+e z3NFsvCYbxthFb;N9+(%4FW_(g)3A6fZT{N?b9><38sJR?^xyUNpuTMZDo$3*NK$wa zvZL#53;MfMHdtsHw9P{0d&5nJTLkwY+)lVR;4Z>BX7s>jb%v_EoKHl!E0Drudgp-a zCBiG2-lokde!U||8S9wskk?v>@s;@kMMYmDAq%~xywAd?bAhXz$#%*-4OD(wE@fV6 zpP+2ACEHsmVtZdBnorx#aq9O2OPs@@Cq*v+njlGQ$z0FpOv}fhna?OR%BQt)hK5#Sfkhh18Afxlc@Gpma9(t7Dqa-q<=kDq5LQ81Fwe7}iMqY!Eq#zv z&mO7|%Cir473hJr0p3kTHHxa=I~%wnAK1geZYC?s(FqyKAMG4!;2tcj=$wk%N(=IV zKcEKE&`ul1f#LR!Y8mgEZbAoDwSX-kNPlcG=`#Fib+9hA}sSsMcJSZRe!PtizabKLiN-<8gaVR)7eek8mQ1g%k=UY0AxxA@Tc-J zr=#0)QZhhc|MG^BJYxzlfPO9xdj%Nafm~oPw$rJYLzXz>3di@rylsOt-KyPFkr5PH z$>xZ;!0lGhax_H^G`3J!;kZ2PD5X4uc90}JuyJ@UWSJThOq?+uA`y zLv*&xH<^ap=R=|TIqW?iQ3t8!gf!`@zdT|g!w6v#DdVO5VLISX;<$hM#oa>q(~*bc zZH|oQSv-}=0##Q44+IV!@Eh=8q}xu>{xO*^ixz5lr0N?j`q zRiC6jBIc-ijxE?NHmiCwypnNH)zk1k&Q*BsL|`txS3pQjN;UmOhE4yACmHm)p>?%` zVxLXVQ*RRA*z|Vpn>eE-YQ0Hxvg=7dNy)bl(GWvQ47q%b2n z`AX(9U-L12i{KjJsjQnr4VkJZQK9QGRHSnB z0^;9;y8`zhn6P?A3ZOVn??`3ACm4+xD@eW_R$X-LsmWqNK0ZdlC#wb9vHTr|t9v6{ zzo6U7V#Kq_`c3LlVWePuKS-wW&B1_Jn}efbK#D%xb}d=F6cG?9UP#fSZGkD`ofQ2B z^21W~zA$GnIu-buC6>c;w@x7_nT+V9gxpdbOx4?kb>U!C3ByTymB}d*V)HaTTAeR? zr0MP8ElATd-McyI5QadkNu+otP48tp?A?W=r&0lY`cy#ZK26L zMXNF=4;NF?^?2PalPQYGn-) z6S_7yCiuf!f#1Sd^JL&CDtQ6UQ7LVLbcbIC7owy}t+{EzL~4n5mPftoL7XgnzJ(Oo$eT`YgA|db@p#0OcxEpAS7cZzE_at!LPw57Ff|CT0!XMfv(_A? zU5YSTA_l3`=S-U|cdrz_0%6%;q;Pl5t3HKO3j$Jv2!D|ciC@eU{GHw_jky>(?A^e# z!B!mjmo?nBn$pr~;|p4%97Lg=l9|MqQ+@F*pJO=0X}{#x&ibOoGB5VG)u4tY2zZ?n zd~16~?cooEIZ|3fh$EGmdZWTpFZTrJdD1pQ?az-|l5TEmojSZ)4aC|r5eG2+@mE7e zi}F*OzC!+1s&c<=yK)lrSB}^|21?e2D3Pi>q7lGEo30GPJI_#t(fY`s8J1< zMm6rrCP^=G=A*XXhif?bAxHsHS6az67g35Y}NS0V@g`ZVbi z1~yGeLFMR#i1-fr$?;jPMj&*W(n;A5&Wpx~($I+$Rm4m~he)?$9K%!_ry+U-Y;|UvV-eG9v1~O?Wx4bFX8TooJ&0QK2J0r|XqI3z6q#Uijkpm)a$!Kb4 z7-b|(BlbFsyC|M9jn1c9bGqfgwANtEJW7Rfj@c9fOus;me;#wF@$;&uIbK>6BqsP} zO%;O*U7ikDR+R@hvbXvXOsA5xTr!Z%B)UPslt(#!5m&AP1xmD+r%}BRabzC%A#p)6 zvhhA7F?}E?#4aq>-V>?XA}tFWEE}IlWf3Mw0Jq^ND;#|$PoyZ*Tns;D((ScvL|M8K zvGx@saL^GhK9`v=YM9JDA531Tk^%&8DE%j%?kMMJ#zRQUvdmKpjqqmjKx!%){RSrr z=R}QFi{$lGty8EL%j;{mVh{JL1F60|kp2f!T{y%#kRl-6L2~#&Xd!I`<$+X|-+|Oa zoJSr=xi`xX?i^Yx@)^FN){RN=&dl}I|=ciX=F@e+`!UtYb{D?g*0Zav{kR~BSl%}x2 zsV{0Q0rcYWBlSj&2{B5VP?3dd6OdOT+j9U1_n>^sCRK(=J;4Sm zc{>BA(@V4PxypS^@2@AAQMeK_?LE}qsDxzl&Aet0g30G$qDu8cVPiSZLE9Z_50GOD zuO>tgV#?l^by$l?G4ygG( z7*&?B)a->+r8`6)aoQ9JEl9?aqxHjIQpVzeTw|=Cj3tB)Rs;2FSAcF0gqXzzDM*mZ z60^{^fo~gxA9jdabckE*pawxCZZW*#7PoGXVsT59<|l3;n&HseLh6?86}F_Zu;n%g zTT%hm17S-V1S|vaU$vCB^n%2t2l6>xsBnPRoP{zr%(plPiCyAS(Bj6}0a`sCF5Y@z9J4~K>NRu9>$;r1jLTWqIxR_atxb=wUX2PuGwsV2(aG?Z}4VhhZ%_&v5@ zF6XPy7R=!=dJ)ND3tnfOt-~4-ffZ$tvPSXgm`JwZNzT*_t7&~DlM9@27TAK;*w)rp zGTF&_^3gpBT-YsW2(8Aia-K9b27<~yXw+l4LT^fvsN0TvD=mdGUbbKl2hu^V3+tOD zlv%>zi%8glspRyw3MJupiVKQj)?gxZ9CkGj%9y=45<;1$)hs2_ma9Atp$yFdVhyqY zBV-7sFb#4;rIIIkAn&wo@eVGq9oC*`nBq_g$?BN}*pwz}f;vK3sg|l=+B_l;D|vI3 zi4$GqA`J(+q(hnM!7sW?C0k$uuyj$%5TS9q9FYC2;eJTK3Qm<~fFKgzNOZqgtp{ESxjCXOB>6kz!mTS6weiOCsS{Hpn z5MNDdUxjG6$#6Ar>)^J-?T0%BcNUyYV>>n4q0#Q_kL=1=0TF4NLMS*F@~1|+DLzaM*>xJE_E!Cb%>bj9d!K1L-Ez&IO+I@{bhTQatIRXb9UOT znY}4x_kb@ae}Zi>f@kD&PT&Ojwj&Bc~vAb83O zTr57TmMEia0rol0raX$YUGp7qg-*gE?Vkaq_dqNSpHJc&=p;28Gw$#cBWbxkeK@*YJ4HHC%oR?*Fsr8UZ|5*15)a+)g>k-)U0k#mikIAi1I8kwBw*XJ5} zoag%I8htsRxwDoV>y{kW$hpRk46N~UjR72Y{c{Zyhi5C@)PJkZlmeVENg?B0`TJZEC+qe`Vlr+DX#i<@lR6g!DJ;Log+vzwgE0 zbOcR*qtkt6z2jL5A$74ru^wZC4V7Q{D}*d69e;L)UktwpehEG4&=r3SLqIq9BjERh z|0npp;C~6f1c0Pm4Z=N)6%~p&r8~muSA+ja=!Z(xbb-GDJ^=I652N6(!uL4%tKpMO zVJ-ao;p3TznvE!5c?f^}4FM0se-i#i_%FcU3V$E`N8!H%|1tOfPWhPF7NkY_!R#R{Il>= zPv8%R3J8);Z9x*tH^e9E$BB29MLLi>tg~%wm^&zh`-? z@mKG+S?sF~;*SXTcoh4UC?2nOfVW^g4ntzG6;;9b6OTjN*fjCXcztS$jf1fn0V_jI zitv6%dz+XdiYEY8rl^{LGsH}ewayST#q$*0k%RfVK7csg9fdj(&1p+{qeRDvIGO1r zhE3G-T!T5K_@&4hE4FuYI-JqyuAySvM16Rt?>Ua{A<0v42ckl-5t4V4tgTKEhmi?O z8Aj=nMDZj&GoXN@nuw25^$d3phNmbg*l$wPE{Ls@(3HO7*d)D`yDulTn$lOqOxC-& zCvdRUlnG+WWSsC!#8dBjsX9lvr@&hzs-~c47m00C^qjC4Wfou0x-{|G6n%z!Gl%2A z#oM*Nh$*+=cqmX4f4xO-M*hS5aOQR5799J;i^8cmL24!*nu;psiyhPSC|ffP+I#_t z!ZA&!x6@Omq3DsKb{bNY$`m8LDK1co1(YIBw4Sc#!y@&B=}5L(+&^6(r2a*mqpw}U zGeghobO|PA=ynAyH6(<6oxe%@fS&D;!kg0^7LE|hR4DcHE>SxJIJ`@2o1xzf^}(PD z!2MPf!t?yhc}P%V9s7Mt>2b7Gf#be!HHq;vzIFsVfy&A?ail^YYSVPlv{G+@n`^@= z(FZBwdGeZxgepD9-c0wtRWi0p596;voA6-py;V4(ZYH)?q4?Xyu_}Eirg-*Dy+hD5 zIHIIITC7+(6D8No)bri%b67mLj&OgK*gX>`j3+r*N_I&4x=N_C^uO6k28btS>9L(g z8U%?^p$%$_+)nop4&yUJlJRg@cPk95g*Qy7v-R+7zo>S;C=0AFdAjH{TYov_Hm;C` zu}d3#ZxWs6=uw3xLf18je%bro4tqgD_=64N-xJg4pua|nhvw*+wvh(o-``Hm(Sy-> zC#!Md4N6r5?<>)%7HE?xme+!AbrSn(^-t)zy@jBp6UC*42%JM+oG7{zh_^_LzEiIz zg#PeOy)AIrvj{1gi2;iM1f`L;R9spF)rMVS*kYW7e#@vr2ISRHTe#S@Snr78|9LU8 zrH~gVN|pd{v%2X^P+@{`7uX(BcwCo;OQCb5sSi4bufgBz;I_l0Ez1{QYnjQvd_L<2L9U-sIh%=kw_*?m_uoyeev1 z|MHdj>;&EJD}$rWyJ;l(_xLgM0@g;mswAV}#y9j>^BZpHv*^~I3u1jNMBU+h3IZwC zWvqMvq|JSQ_}{~Cc>B-)=|S4)Pe!^ma694NqdaiYb8(8&7XS2z8xJ=hZUx+9aC_lS z!6{hGqv2Y^^@p1bhXtN5AY?!MElnnA4~yp78vf41uAR2v=H(Dv)0Oi0-n5LG=cjYf zLL9iVsujuGdn1lmBFN=^<|=7}Z<>U95ZFb($KkMyL5c@yN;7O)8tEV){ksARoW~TsF9Kx7WS^i&Rmk>()R&QQ3F4!#4Ub0Tv^$qFP7M0Y zNQtB3hjKuwHwXHLzXPxX2=VAs0lpmv1Sw+URU^V67AcWK?o+bG=~QS2|>LmY+ z8>Y5#K^^K|e_Yqxi2KIUTXa@S%I&;RE8vh1L50@*!a-wgxGvu1mKf!{@jpb9O zQFLSdL){_Mx}Dsj!3cPUGxrf6y#-w@YO+j$Jx%g)BRc93M@tJgH1;R>J6LS^#qi|N zLhvyM$g3-KB(R6S=}A$^tH=yCap;l}BTjv2I0E0{6j|I6V|>U0K1lKJ2tpvgju;EW zZ`>IsqQr+F1Ab^f|A-l>rH}6=qLjk0-ZZ+q6+Vt+)-pJT(O?cfO%$OPn{ig3&6+bE z!PJBjG|7x_c%(+7>fqu$bbJ;Q=;DuXzaggKA@b5<}!F3EF1B)z9ju+T{3EfapsZK30&aX(WcvH~F= zJOD{RoD>i(WT2g$#7KZVd-HB_zN*wXNOH3p1^ZiJrF>ba-0hsNhm-n?#%wJ)PZuiKgwqtnynYj4vf}KvH)(#gO^mTv(Z^;kcZn?sTR}Lvosdeqeb#OGmnuYv|7WgHPl(O95n@tkh$oo3C>e*)Ko2Z{G_MICqlMa|wOp-@){oSP zT^b&-+n5>>tpNv=#XnJ{S~C4kd99D(K>Rzo7< z=oF!XLoRZI>G1z%9^ad}X_KgFOF2OfSDo(4QPnTTL%zUGTQow4@Z7&y7ZbeA(slcy z6RvQGmD5RYKFA)7cDY=kI9#mPE{7slCTf|&rOTEjQ_DhR0PtTbRR-|)C`|3{axqGx zM!d$zzu6vRpWthVoH@g}As!mI|8PU(g+e-4uWWFxUWVe#bM@``n+OdnVe#>{+x;BE z=&~dXaG;~z?p^pMT0!^UOk>+b=t>YyQ`O`L{0ZIOJVfI6EC^gK{Ck{WnchU=UEHo3 z)~A|XLBz_ZOy#9?lF-R?pKvRQ(1m~>qunPFy8VD-61scJSk)h!g|P7I|z7v}*ESG=C%a3$FDT*-E*y4wS_$+)E$Xb;87Dh>v;349hj zUZZ2!n-J6AUg{d4PqpbSXWHA+=`etV;ZPw>yTz4-GhxWXUC{s=+uP&X?+{la5GD!;6Nv+fD0?y=|E6ngPy@Rw2P0$oR4~rt$s2vu&H|hTz5&dUXiNOxVBG@y=A{Qeg(ZUMuV24_(l<_eFsRk^& zMx7pmllov~xYES7-8L5aySh6f-`_d?>kbk6t`V&fTX8pssG`YWTTCWNoECGy3g5Yo zhrx};t)MvNuo7&$WD68u{Dix-<2fRQ5rH<0vfL(O(9cFp=0b)Nh?Qu7yJIS}Xwz}Z zMuUudTRm~BWeV;x1iBPvZ}O-fZ*T@_p@QbhYW}7-nk0L3n!o2l;i-aHoP^$cIDn4J z{1e!j643o+DmA^SqAZ|NYg2GtBG&G~nG(OA_kqq9q;YG7>>bI!x(O_Tx_ZDuW%N!r6enm!SSs@suOWyKfoQ0CYNP+^^k1# z+FqyvuGt2Xfhh~Wtj=8rzyAX<2bUQ@)=mkm!R^MtLDkx+^5Q6E!b z$!IN!dhN9{fd1)x^VunOB!oouYr(KVa~i2@LvD-O)`ru|vn9eQ*|r>i)Z%Tmo_xDw7IM$fHN+$h8`T@R=| zLwP!T!=KFb()2_<=*~^9?ipC4aLnaO)Lnrf>_I#giLf{KU$8*qsCBdtGPff%l5`vm z+uTg_<7jhn7Y9gl*u=I3Gff-H%Ue2O!#Yl2EpIFM`wpD2c2i1m-U^ElArU&^?L3b0 zU*BZfQXW@Y-z4zyyexE~_3akULZQ|2@}vFRr!`np(fnSK%0vu?;0^WHF^psz)Ol(euDjn8#vmUyfV3q zCa-4%VZc~UzSv7?pkvM%`T(=TP=@43m+}m_?4bMU+c7}hv*jdrg>tps_S)@ovTJRD z^q?CXe=#C>wm-_y2M(&%|t>B4_)99AM3MnZTOuRD2owjA#48|M<=J zc<*cng@-w4Iosc5Xgbe!jmGyF2Q*`H#FPCynS#5MhV?c6wkl}n?q~-7CY~pIZ!~>l zGw=MnGX;u3>D1(4n(Qu~>>WO05Ydd0v@M}ln>_6&97hiu5RZ!&MHxK3>>i0jlQ0$! zbpDtVCG(IFfslzZ0Wg+vhv)#+V-)&<>A;=+s2-9I*u=w$M&uxk0b5c&|8_J+2;7_= z)>nR`V$}vGqQ`RNmZ4$AOaegH90|FXHkXrGw=Z;p_p}A*s>>CoyBy%vg0&f#)WHr{ zk`4u}NUXlhvgrYFnxC}u@@(6rI!67S;J|F`PG$+5y)q9`z!EwLvDW^!ib?JZr`7S zk1s|9eijFhlPRQ|%ZrxDz)!?zO9tM{X3`pX%ZH z^f7op)J56BDV}wZ$)F$9pVdN#R2!q)cze8wpZ?hJu+5|hWY`=)5Oyd|=*1INB<)z4 za5%nA6{@j(ZJjitXpQ=j3ptMUr@J~^ZE4$Pe?l1v2?}X}kHG$8ZYvn-qeV2Z0<3o` zvz&w##B5OfidHxymfS{-pa|kND59pFTz!GrHBW*vIqa?kFdAAA-O0u_Aq-(fc9*VC z?TXcSrj{#LW3DJM8;Vk&P0xf(9OCkNY2Fe3>Kf>kH0NGeLa8s@#%l&0Bhxz39OZy zN}Z4X{$qUlGYn1g=`N!1|8$6f=Zr{;O+UgZEH?cy{6K{lb8UZ8+( zk-ho5F)qD;WBk`hnbzXcC2&73J(aUqTspP)1?;J7{u?em0Tw*`x%6-b>c^$0Z~~J{ z|B}C~sVkHFap|#~z~a(B;_v!gdJ!Xlm^> zaid(i;4F=E=^HrC;?gDf23-0BoXFzRWup3AI@Mz*`r&$9`YYao%V2HZ;?f&d%%4jqsx-5g+X>Hovu4Y>4&IMAO455w77I?2`Q#sk4vZc57`fBm@hA2`4wL09>^5ib1|j_{{IM>)je z()}qAbLp2jfyJd$)jOi!g+qunbN|mh-y0X}^&n)B^xMTI3#J-~$fmnYjZv(c`uj;H_^}M*0AR zQU*TzjBO$T5d;zJIm879k;QO6IUA2LMHYkFpZe9$Hdy6#+S+73SDbtuvh{fJ1H71c?)Y-3=mz7_Njh@x>OKn~r6TtY zJ*`-0AOXC;qhblb>s+0ItRp~u2yqBXhdL*RZ|5`3NHl`__1|LU8yKt~#Ev)gIL}Yq z%%y5FZfubdg(T&R>Q27_%`l)~r#BfBr@x6=P@7>Q@Imh0=8`spB~b4 zvY+85+<*(UgMDL4_1wVWa-&9j*bSo7VLiv~A57KWAm$xL6<-(Y59@LDVhM*fh<%4) zM8!W7wQGa;0hxNlHYG%mOaXPzk6a|Fic4LO1sqT3#TKghgQBtDAVwc)P(nNhuRH=R zO#jr$9DLx2-pf6c0pIMyz{4WtEdW`{!SZ2k67g&l!w~F|Pi7Qj#0nXKq5vpCEPo6A zK3RPFmOjh#1!tr~;noF+1YB6m6IE}6zFZLlj$&Z0hy_RW%t&c*f3vSe)Uj8Z(qLsJ z$?4rOllr6zC_zuN%63yfUlHmt*oO(`=)e1-sFSaVPRB6lfb;$Vkew@H{V~+~Zcdqo zlp)-qge}{|kz;xn*m|NelEhz5>IrqN-+{WQ`jQy%F38|ZnPLXfC-_XS;1j*Fzm%zp zyw5?WlHb$U!9vdS@9BB=;|NUJ4Lh=TfY{=$5U$XPgyX1WLKeYa?W-i!|F{^5C>097 z$1(Ama|*h!EL%v;I4-swM>CFR%4QrFLGSB>QZ8{_r5-Keh_s=W#wP_Nx$G_Gy|2e5 zT;*g(St8od+6<--lBg}}4!#fiqV5w{Kfs9W6DcQ9*Hc`RzxrxI&DbYKpMZj8NH#TN zvM-97u}^HGT!kFf(HBMa-zQEY7vLHvQOqz=a1umqj_7d`T72)-Ej|h2t1b~cK17-& z;`E33Tq2B*V1{UkDE^_TxJH^$rsLC56;~X?4Z8_M7>;F(&3-vsDskO*Gk2I~tXz49K zuWwNMi`-A3QrbUW2Pe`X?BkDI6@#89-zQ`c6@l-;Nxj$)2EGKnS`3=o9PO zT+jnl^j=(!8xz-O>56E2p0j$*@`qu3+%HsZ?XdUG{bBwR$2j-vP?RMT^ti!nR! zI>n5OdWfer>zB~mu5xBj)pP4sUewzKgYh}=ExNZ%T>TcBo3$eQJN>pGun_0~=&*_6 zgrIQo!go;Q%&0s5oetq>tyuOWQf{c*{Uc~JD8uRhU_4$DgMUKk@w#O{QOoym>Co!9M=GEr%1V`--jXCaSciOi%+jXxAflo2OEHW|1#q) zw9>5_z0j_as$nU?kcG^>Q0B~ne-CIoS)1`!j4|JbBi&8Yc)n>Jvwb_LLIOnsaqt!m z*U&m9naAmkH0CAD74uc4$mKPlPgXKFB47piaQ-@*Z~g6j@93T`gk z8o2Fn`|nW2(?pHW+KlfF^!Le7BhOPffQD9zjfwWrZ<sC7!7z#@opXsra;`0K8#&p-@T9$}G_~d9L@Ys-mug9%8w>4R`j5CWg4S0Y zaU0>;3pn|6IG%ixSreL0MD2F=lgWwWp(TB%Fv5%+w|_7}*(v&j8A*Ml#q`I5Xl19u zlDK}vuu5&Q?p#Vc+Z5$dltLm~FhpF!lR;VP!a!noXxWVtJHm{p!fl-VE(pUG@(p+5 zfeX0)ux#ROGMDaGN#Y823N_qF^K9p&q`=yZp&|yS9);oP^6nfpTxp9ak_l4B%@hm5 zjdlqo9Q829V>V>HWbQ;YJjQzB9tegV!d)$Wgb6@(7HW?8slx zk^LaTC%zAw(jcXC#L5Vx6%50Oe@7TSJ-2Wvv@%+^GS-O{j}h*^pQEUSUEO<;bAjmL zF*4P2V!Fpj$~eevO;tDGg?N*JnmmXGX%u%et^Y}42LY1UVP|HB_?*4s7y7){JwV(xKztXP z+133ZqPL2wC}WaaTEdq|MK$|ad>Uo+av$ZXLa(Apmic2*9Br(Q8NhA%CZN(@W4qH; z&S*!d*-KoCHZn6*j$Vmv)-l#!qjLpESNC2@*H7_2F%T{qUiB-E`DN8*g|*v~n-Ihaa`w$x5S+kiphmpCIf zCKf8QFd+q&Q|?u0`ceE?Q4kO4-*I-BP)E-bv7w_-`t29xh{jEVdGXw|hw#-&wjl-8 zkb4`#aT(SYnJKK6@jje^j#1~}E+KUTZSv4N@kVA$9rw;tyrHEwnIktq6eJkTQzc+t z%@I`zz_544`UE2`X68U@g3Xo+MWt2HgCLMQ8RWqPRBnQ}DziEhjU@NIoOn2 z64CvCeOzG?3h3usvu*B^>8On_=DA6c0>6UF0^hzu18!F}HBZkE7g5 z7(mP9(zgTNjp8%_xrcLf6_-xy$uME0qx4xE+=+vUR^B2yr5jz;Tg3vz!hGOE@H|U6 zQ6vvJRg;L)FmWW^=x4hySlp3hBtjXlQwA_#qga-Ke4EAN8Nh(2#Z~&+A>2(7cSVeD zim!`eK~tkwz*BM|;Kvix$Hlp(Mny&ihjqdNETt>7LOsc0k|;fgaucDi@ft zc)G5$AAUoF=rv&b{GSK*WAWJd;dMB?_SUr=FPnEgTO0nY-Fr3a>Oqg;9hO%?&-fPf z;ghcDwE?!oUp<*1E?-q+`L?x4KA=Xg{TeySI^f}i6|q6lt7}H)uPz!Lz4lF9E37$$ zsC@b>@Ignkn4cVsxIg8K@daA+>Rlu7MjW1vTYY6j^r|t@q9{J7x7ZjTlwUmn9UH1H zT=DtnQ6ooe+Ses2;`zg4a5?!)g!hJ{_n~NtwTD}Z`^_{ZaRk6WQudDhbM-X8SZKV0-y{8LT?6A20AXCKzVokdtWS~G%P!P<{M z-h;aUM=uzZq$3g|+8*u%0QZJJ3T_(Qe7Jkyz9yjXpM-k>jx4U8fKS#|_hM_@9sYQ@ z8n_j3WN5WDDtaRv>H_fru0rs_R5!4rvC4r7e!2*(+z8a9O!1GF&9fy?|v~@ zhNg5kk`jpNqGe$f@8C!;%97s97n_O!X1+L5j1}TOP7&fuLCfuY;qHn>ZM*2y)##x9 zkC@lh_yB9$ux>_Pk1L$MU7*Q5U*d1#epXwg=l%UKHo zA923!<9NQ}u;zBBk_u}d*0%Fqfp+Df5?uFp#R7cU-fb>^*#iOlp6*5%qK{)IC3e= zJcUlYdZH~vw-yMar!iEm5#>D%Jn|xT^u&^~Kpg04bntlcDg8PukoO`-SNA$5e=)s` zG@F(yI`=XnL1>5d0(TvdEAH-Pq-Jc(qZ|h?V_x)9bSPdi6q52vt$dF-(hCbqM{%ha z>eQBFsrudAl$UlPT|{^}XBq?jID{aN-NN|b%9J;2*M^r5KjhuKglc2HvOZ;v?B z&xrNx;121E@oDFm+TGu{GgsoxF#9#^T+Qv^0Z#5vZOY3qeV*VASC!>rcYh${t>RLD zV9RFV8~`@u6|T*0+j-#D2(^z&27PF#HjCl`NW580M=JLgP9-l+68>$j+cbbGd5Xig zz{KiNTuGAiY_m8r(8zO7XhnSi-AatYJ?M+g!aa!iKn|AN7j0_j7cK@5LT`U4ssiEyYem6e?(aT>!4%!YsqO>#K|C6FIH)rw<-v6Xs;gPU+LQH#yO7-IAa+snZ@JdbG{A8#ox?>1sZ z%@D(p@(xGJw{EFm3EE=FH;b)9j6t^SRw85txU+QO847-Effzm1xVg`FOg|-avzBR+ zv|0_70}L`m`&L*!t-%yFoF@dOD{O}#4tr3jHyYuc0(crxR6DG?GU{GmSla?K>d=k1~+UckAviGahj;N4B?v zv|0UoT-hR^ohguOc#(P|BtImK^XJSOzGT@5msaW%#9aUYe_@>O0yWlX63G38_)u&@ zXw7cy^0Yt4Mwh2cYH^TQTNgRe_;)alHcwU>nQiB>vgLBd3);_A^0(ZH)2e-nznh|a zE!o)UU&Q4hT8yYJHUgsL5#BwV?9X2G!i(&ps>%qrA>|!+Ls$#xZ+~sIEr5`fT0fGqK4;?JvzXRu89P zIt52evj0#Jb4=ZDb~jlAr>-U@n0obS7d$oT*MrnkB(kJlT2ZTtE?bRFb#reuW(UJ8 z`lT936|RQSEGFTO)elESK`ltvC60>mMG+M`DynLY%*f9;sy=sfG)xTaZnPkK>y-Nw zPEwz}IVutsf_n%I7oEErH>hD^@j~!7d7}JI?7PQ@Q%wX2lwAJ$h==Y3mCO?_-U({c zgJavN$6&M3tSeP0PyCGNs1lBT%DbGa${QX6mdCcOJ+zp%<$!KT(A9kyanONvZCr1t$IBVrgadMFn)ngq;9|e#20_tGNSNb#TKwlQH zsfvxm9>!_#a;#yNjBvgvTnx^niARiCY(&Pq%^*L34s@M&gD!W?hs5&5X#7gC4XNQB zS&Zf9khruM9W|WWy3LD|XsgIx0y167!8UH;duU;W7`X&_VU~Xh_PqgI&?hjh`2X?t z<$+OD$^Sh=0twJP(~yKaB-}R%aw=#Jjs!^v5Fj8BCdmMST;$+TG2nWD0!t7mR8UZK z!4(BhT+sy=Tu{)Upx}ant1BpIaB;mrf1j$?Jv|fM@9+Ej=NBz{Ue!D5)vH(au6{jE zE7lgs%E(2;!gVT$)oYfJ)!o5tDOL1bT2txG~mEwx4 zy*89TwGmB8_v&HIx=XBv5{tKt8b#5hd-W8vui$$J+N&+?xL0rAAz9GQ`7~9CTI~Ex zCI9kXeYAU1l$uXJL=%@|&Wtb5E!Y2u5l-R?y>DV{47XxJ16Cv+RC^>mu)z{737-ub zskL{rmv+?sXR28N4QnaUesW(2cblv|AXj6vKf0NYt$;uKnJyr6JX+nS_iP&_k*{xX zvui!pp4Ce1&4R;c54{m(~I|^(K1v%;71K~ zZ+`jn`%svD($Tx-@IWwNe9&(?Zo(i-;2OX!SP{ zMX~!)-2X5PIjdox3(JzltMwKU0TJ+PNahEH3*HQsx|tb`BtGCy<^NIsdlIf%4|i?Y&Kd^X?r~~ob6UVEvrEWQ)z3B zehU_Et|2Al7QYr=sM?A=cCD`CGMVXX5wV7rtVP6U>C{@x84pPjf5a7+EDr2WOW2t+ zhQ+hEoH=8RghS_y&r-$%de0&6NYWLJ{uB0&?6z=rZwpRcNx|dPX6-mTUa@!PkI&L> zBzHgCO1uL;3rIcm#HtC(^-nn7JT0X@s8HdzoaL*I*RN20E}}zh0BULL2F!0ip%WYQjw3s?n$8bD-MtYy@-N!KkGJUPMyN#$T>`D$`%=xj8eKy*BXK`# zA5@e36K~tsQsRSp$94zenD_B`BP9%7){7;h2&ie@p&{P?^8eiiaSE7<%F%u<;QcL{{!5?da==Fzzs^-NP_iknO;S zp+ne~co-?)p{j@V>fTCKCZpQ@ECZrxXQU-#F>m|`*W9U7xIBT`OYxf^`X?x3lis39 zP)hP-=&f2^7FK^dgM^xJvRXR23DcGLB$hLStbI9rGgyx{Vp+VQGss#R@QB{G?E~$Z zq^uLK@&}cy@sfTVW{g`OL7kH6#YZq&X)2-}hW6p(q0Rs^i!ME)x9Gh?vI`xRL`jn0 zPA*d^@ljav!Ib_eGLDm2znxoNrez$fmL7am@7qeT7doNjMSw@?+@oN!mcDxwCj(6C zy#;=*xm4RxgIb!t1wMDMpnp4uJV2YbNJ%apNxZd1Z`}JY5;b%pnJ!h_4iDQGzP&&U zIM2On3F+F$#|YV(S$-&4sF@|(idstEil+Lfpw-M$p%-t(+WEJFc4n3z(lO37Ofq%m zlqyp-MBz=CS~4HQivQ&HYEC(xmOQ49a{swK&6uE1jqfT3R!!4Jh*jX}T%vrWmM4%z z$&c&Jt1kyg&OEY%bQz95n5UblY`FP{0M40IHWanyc+RAwsfX;y@_v*H^I$Q7Ya@;n(R}+l#GoFPp?fdfPXZ6``42!~_)1QsZ6#IfVx#?VN zqvllmoIVsjOMf0QpM81y^LoYq+bs0I%|QRB+2`-fJOdL?OgjJjcfJ0%v&}=xFxLX$ z)PWWU7!1e)Oj*pApnq7XG#)w3Ozu{q(HuT*%I z9FfAC-`x~GJwj#F*`=Nj%El;OGX)b|z>0?IdCJ0{0W*xh_Jv9xE$0&PPI3wGC|^5> zHSkt=4vSR7!NnatHOyxLrL@s5KR>_h8hY3+io&sPlML(78AjaXNMafYLf!BJG^4 z@MkcmE(YEw$16}XtAG)hf-=7I0u?V3zKjpsb^cN@Uf5Y(Y)p@@EDPVq(wF zSFn!4&tjSQKzS_2{5PruPu!-S%Pi4PnV{l_D+7TSCvpEA((i`ox!_CLDt@um_Ag>9 zgcI)BSx61ket}#PGj?g(yMc66Ff6*Jzri|Zj z)##ihktv{aZwa^Bc0QIpINk}X*Tl=j8xlcNFh1O3{0T^69^P%xYAkI%*F{Cx-Bvw! z*xkjlVVf;cr(2>PmZcb4Z!uT{hMAE|9aPQl&QbYXuq5lZbb~LvgwV7p(v)cbRaSo- zU=?u9(qf-g@aHm-*EB36Ne%o~+wZpQ1!K8p*DBTXMN7FZTN2x9b+^}kwkn39I>#^O z-ogEUgk_l`@oqm9TYJr7XosmXOz)?jmqFu%h11nDysN_RY^Tl#EVCqc3dV#bQKi-* zwY$ez@y1wBIn<7FAc1PmaD>%1-&y^*w`Jj8n}N$lK>u8|m3Gu>su6`MqvQtaIo;}( zJ}|=htr)MGBL2K$XmxZ7f2$2ApRW+_=b`XV#5tdn}4vzljs}{v5$&iRc8{hF6NyEe5Vx zy<(?T)9{XJV|OQ%6_wDsUsM?n`BgnHSd1LCo|i_d{5L}>*#w`kn)a~O1Usv(HaceM z0Dqi~87hW}2M^>P|`uGZ3kG&w92b8@S%@zf#=Yc{M=z>{EDRmpIf$l!#@2qE=fJJ5A%WE z<>dkA;H70i1Fc#>0Fmn==FLO5^tHslEFPSsmQrC9Yx#H^h9!{!Eg(dyye=3Ak9C318t8>zVBhzSILJ>_k)7tWq=g9xbZ=H4N+T#OD-GHYWINT9E41# zYasW2N>aV+ir4T?6;=cY-<(p*3@+_$q4B_5y-o8hjXS8%?)-+JuXi=Ychc>t@;icS zYE1+&U!*e!G4ek|-yPIDcHSvG80dp)a?N5rBRI9vQilQe+cfM=9A*NW9twR(l9q<;kp$#5-P4l)i%I zuM82G1ik~)ZPd<(?nmk+*q`$9sZ8S?r;d1p4WxcP5H7XFyV7~jORip6|7(>s78?Dfh z*T0YUFA*HS(WDkB7t^`-(a`e+ZNKA(BM@XBhknizG+*4K7Lyj!h~t=^E)}%;gfuIW zmC4^6MuyEpU^ut zc~VGx%&YRKILboB@5>h~1+AsXj>>ir>R*j>RkkLPU@aYpjKP6WWKXU(bjLu_%M74&Jo zXfB$ze5gl6x0Qrjuz*x4vV{xN4%2JM)9fiEb-68fqU-4rO2n&v>~x$vZ(CFRN%V-f zDETDBbd)BZ)NhLRN?Kl-QU@Ru4?9jmbWMbG0}gF;7k0TJTGNG-*i_EbDD!J=_{T{{$4gvB$KFldERg7MZgiGgq=cJSr-o)aC|0A^I9Q1W4Z9Ru`30uNB!xyoMbWcFa?=@sx!$A2g z>6hrtIV{-T+kk%Z82uvcjouq5`BRt>m4a!}!QI z@ASBFi`hT1cKiVy{U_@49zJ4?74#Tt^%=$o@6mwI^g(7zNvDq5a{oR-OFq-9J1-P8 ze=$#KTR$1Gcfs722Qw)q@0cc1{O72`6w3G(pQC#x2}v0PW@aMA|4Z-K>@LaL8GsFFK(UwD`20%` zbB(2{f9W^HY_=E<3o>I{-Sql3y@yFcQv(&wwjYImfwR9y1l^2%TdVkE5>TtDn7?Ts zOPDt`Qp`QV;rcJIn)*4S&K~xJNE_!=M)>rDgJ`qw^bPDu+$xje<)RHRa991WNxBHf(G- zmAH)9#CAefZ!@!{uaIyL5eNI@Dk+=4a+hKr5vd(Sa`Og>mL~?5rxc0g`bzKEy`P{* zqGz$SU~aM8Be-;^8Gj8^)gX~FzJkUkQsGz7*gT2BpQ=<^o=CgD((?z5l-V}#&Z&$t zCA=GJ=f%Pv_mw6=NtFhtU&KJE3oX8gH+2#v#X%IzQvx+yo+mhdDMS^hly(0_^rQdK zg^SSHaS}sapu}x_iP~L)o9`@WmlVwnxKP6UMU)itkVt0wC4Fr4Y(eW74>;a;9__yb zq4|X5oNoV;t|5hsDC29me049BnuKseZKWlQR`*TKl44muzoeZ&yY|r0uc4;vN&g0F z`e(`6QLe`&?5OEqC7b{?Wly1`{3K2P2DN=eNY1S&d#L6cJ+;x_1>Kb1{kt9=|Gosw zK-G^(*wNj;l3531r%Jh1TyV3)5+}-PJ$w&4j7G;oYs>2Nf+3HKRz-{Y@GPZ3!b9CA z<~<$pX;F5GEaTS&ufAQ(l}{x7GWN_?!CbUAdSFLw+-1mO6V1OY+O+C2#yEQ{ zb+&}H_AnhqjKnzMN$vk~-?(T+wLNg8(AA8Fw;11}R^P%Se<9HvYxSOlS-MJ+uaf^; zj4u967ruqrd`DDrtkz35mX%5P2etb*)Do}N{~MZgP-3WwH}^S1d=HP-m1R0atNx9a z`9h*iv!u@4?Eg2~I#FV)J#el>n1ovf1g)-EdKD&Mt0d)$d`adNROG7UsOra>{iTH2 z>8Xl*PSdYooIRdSUBTg&!g>YWCSi5C1uN*XhV*dC}$uLJ9ND9m;gV#{~f_i*9;8_^8Ih(U14_mJ%9M(pg<10#wg+WkEg zDE{T6-@|ZY@aFnazoB!7rX0&Jr!y2cIEpVg<@CAJA0^+9aD*FZ)sOn9=uwhyvv800 zYZx=K7cnyE!jEv(S)~7j$19Zh6G}>@?IG0E|$JLLb`9JH`=1D<2uKodXUB!kNN;8_H*ZhT& zucFtyNj_ksF`7j!y|7l1V#*GH=MA*tDtyGv5%f%Ru$L!M9RJRSb_1Qe3P--xO3m_N zjd}?mEQ>NaH2YjwaJtNfh#&|53>993ga5^1GuTq%j$Co@KM2in@DI?B*WlpQ!mH)r z`$^f3gTG0_Zk&Tq1{t;Z1*Z?%3)*q;576OX;FpJjQ@kHHKUtxpi($QbhFBeNf#|D>6*fEm98QBhg!}@kcU$aPJI{rOx{dD7ToC9~c zjP~*OHRkqFiq75rF$p{Vd>tjbjGnINsTi>nmkHHz=O0Sgap$L`DCN$1(7J(MLu&I; ziRQTT4fLbS=or00(4p>p4<)({QKlPP8LgrDKDRN*#6ebY>A3I>w8L%W4|pn)(T)p$ zPQsxsyfi2&JL3(M+`#C|_X#yH+QY%sG%%V+zbgqHudP(@l?Fyv95p)E!06cQMTy~f z?QxA+<6=9k5WM5I{~_THfdLJAMP?f#hKj&-Fw}y>u{jJ_3{${~D zwr8M(9e3TH{E^1}Sf0Kd2}MqALchcreK09An;Pvq^lrl9a(wdw3HKBi$xbVw36;bf z9n7(UpBZq@!)Zrjqs_nv1+5m(xwjTc*m2HxOZX_9^PdCGd3+PacY)9x?>t_@j(5(W z5lxKh4*w9e9WdQdmoxMw$|XF3_2Fj^o~Wf&dJy;6qbyq}{ThSA3Lz{>{> z1I~Lo{pc}n=(0}s21A|qOM-Kp_f001e2TMu5jw5@5w{q;A~}b;@4wPHuhG%{hN-6R zJ*m41E?#ec!5#J1E z^BXDAap12|VKbw;%V^2Yap2(wXYb6_*-WBn_U7(hI^X{q+hee#37xH1TKNR~cBH%S zs@}u(8trUuw1_z_Q9^w?qI}dGI^JB0btdoKNRNW0oh(V5{uN;a`Sv*68)4k!DxHdx{~?j882+?dSmZDphlY%5GUPQH_bL!JCN!8tQ-WN>|JqmTQlLAPFn z_BIlF=uACXaG^dQ>HgBd=;A)&p)W7$9q4Qu1Gi9%Ddpkk-#p6c|15HGoPLI6Wg*z-2s^j7p(tJiw6}02w_t93M zaVOLMw&)1+$kh&Bc$q|U^l7n#9o1Yf;ZXm6JB@1xdsitW$GxwnyW1J5jW!EfP0(-H zApyt9KPF*EU4yf?oxH-S*}D0tBye2)dPIoq3-?wU5-Z;J`j@Q?GH%U z@%DeEVI7R>_zWq*@%FhAcD%ix&UApt%T49%hYQ(p_NyiAID6#+9cMp^rgVg|PNc;h z;hkVlpK5CK>+pi4a=iU55_Y`(7L$HyYV_%Fhv1$5GFQTmyPpECbfYchbuwBsCI`pqduLD9x)x|P>FM!81FZLR?^w%(mhUSFUWELuk5#!@Gfix zJOG@t5fIKVe&IxCXCo?pgTzsOj8{XRlrYOQ#XJv{NTcvBM#olt1zm|HsJWO4JFiO( zq2w+YemqH2x?rN!i|+1Xw2w}d$mLjLQpf6ft@KHHrHio;JMoEKA=BA3uB(ySW{N~~ z-c%}*@J(2XYfqoNskbt35rjHXz-wHy=~P!^l&g~ByFtU2TFH6P%LB^A5_WoaPOZ#&F==ov_PV&O)$>5=;f{5(8$)$xt)-gY#$>$nccHiOy$g#d=lUYy1Jo+P7{rY>KEb%Py6g}5=pEo` zoDQ>J2ekL#w*znQ!|%oK?_O*-zNTq20owrQ z0co#e?;cPKi25sTUid58eiMG21$5en4SvAW^kP46*Xn71!$v*eGeEZk7?A^Z0j>ZB zAH;?}U>_jtP2>w$38)1`{T=xNHUZ88Ivql`fTsaiMtPUA=15v*|nwgWB#5{_bD6tD+y6_E4}jtT${0h+#xH$(yJ04D)$Yf*W? zHo$p6!ZD-;>;YT_B)x~UfJ1<$?;|Z>9pEIO?eS_PK90nI^MGC_aEt-)0^kS0&<`|i zDc}IW`ysBT0Bi?b1SFh9L4ZAgtAM0aCA43X&?SPAbgilZaU=QFbAn7a$0DJ}*eh%UU909ca6zKrl0T%%Y|3o^# z9>5Vy?s$&P)7mwF&j4wdb!-Rlz@$0{|EvV;1JnU}!Ol(t@W+bw0=zhbu@Vr`3+)To z1K=wFhXZa0908>DLCpZ!eW440tOVcz35k#_Afg}g0Mr5ITn7mOru7GY0NQ`f03ZOp z20{@5EeAoV0AYhs0D$j=UkSJZD7YR6LIAyPKy3lNhJs1J&|zpYK&Km#2cX+ckSHK4 z32z+(W+vka$WB37fHA31YQWIp)!=YAI30lu0lh{d3}`zFi2=(2+W=Pp!_yE3909aV z2UCF4fWf1|FrXID>1Jp>;54Aq7{me80dg|HC}4OdBnap>7EAz=s>dM#APFZ9k9S1E zVJ=OZkM_A0AC3m}zXKN#0@eWb0n$1{bob(|V8BYiHo(S0G+P@mR)7M1kiZP+L05ET z{Jv`;q?U#Y3;|Dr-oG!*4^Bl!;I)TED5`pBHfoMcBf3HK(;33sO2I+5-Je^d=8%di}6A?p34DO zQ1BK!4*=4#aenpo=3t-|#L^yJusgcl-B`|xf|7ybZh$nvM!?k!m$s*yOFMfD#2t@< zkx>qq+y+>OJk~+W`R3*#FxL_cx5V6$=hr#Nb52_{Lo5mgocG@pI0-e$0d$Kj^ti1+Mdv~M2t0HBql#(>jbL-X-G3&*`C9%4uQG&DmI zU?pG=psolt2Q6zBnhSaC0Yv1$rr>qMrvYWv;3VxLj1FKE-~iwXAPoiI1=tQa0JsQ< zLV^7O*?>yG8o&zx?Hfog7IaAP07N|oj110#o&feha$}}|HW9V`Bbc}q6^TYy2b{)9 z=<03|U0F*+hF|K2XRpt|Eua#x5wI6<77+G1Xn;Mf0jSYwzyTC`x*am?gzkp48=)VE zmP0uKTL5KVqy@|btN|vw^S8DiS7by z0`S-EcR^YBqwoh%D!)8m|MsdQpxZZC@mnhO?@0K)H&pnNQug3UcxC}`KWX)Uy`Z%I zoe2M%w^R0j`Qv~(z_R`7IPLz2)pK(yPn+@C@q1J_(a+&(9O#ASGT{Qo2K?LsBf`&v z`&GoMjp|wRfO`6OsAq4qF~{q@M#UdtonpVxQ-zOO&*B3r-r>j8Gx;6$^b?*aUpw{> zg}79q68e+W^ELEoE}-T%^$ee)@JpbfjGvyR@aJw(&vXm_-4zx75emlf9vrUXAB3iJ zcti*FJoPTD86zUCn$^Us1QXFk7{BD8dR84%&qx>r#-CcHGRS}p;P6%$34SJ8@oM_1 z@EO(%qbtLoYLZuPt%t6I2j{CgF4b+F>i zRT-VIBy$1Xi}M@MOX1TkNfhe}fA=5M^NEq_IsX;NpA#(0R|(E+P~pQ7DjeNkJ=lA$)M9TP6->Ro;g?gSTRq=~qLYbZcPsYzB*DbRURugoFbz{Oo zSTBCID^bt&H(87v$0?#Lqt-P!;vRHRfITF(hrq}#{M?Gl@~ZqwP3r*s%ovUKKi&|BKW>k1M0ZT|HV%j6wZ^n+qPNA6 zb^KGyEhw!h_Lcd|%ejOYtuu@Rj28F?4cL-W4!5@O1r0d)86=ON6DCn+`zxJj9js_` z!a3TbB{40i*CcO5L^oImWLH6FCwil*`HC|R1*gO@b}6_POoeN?xrN1LxoLi1S#l97 zg%kIfLgN#uu%M|dz}$RaQBjG1;T$lg@Hx{0cnAThi?mWs!_04Q!><73=9gE_L5^B= zu}!$Ew-As~acO~H170`z^U?JZKq|U?BDG$8%%>tG{+O&uW4VRwa9VQa@G&DCsqF(3 zO%&5$jwVg^8nbv<0j7$5ix&C{(Oz~1hfn2-6r+AzZwG%>@a2A+V8^c&Ymr;-XOSRD z7fT>pP&eE@^6q4BtLlK14q%J}>9Hs#6#4yS$iLi{;4VqP%;frV^Oz-v*0z_VQ(STC zAlfF3jja|zoP+(grFu=JGS<7Y} zZd_H$18P7aNBJwq_$n$#l$V!EZ?Tn@$C1HGJCbvNh?-g5VGjbj$`_l@N?~7slZDv0 z`>LW5TGEtv{(nr4{}( zUkU4i-N*Ny76Iim30YLwI+S)yH3<6syxdvPAWnooo4)M>p;wit2=woK2yp2cC$rp= zQYHlw+;v=Ntdyk{K^fXD%u5DnDefT>p`<|b+s1U4va-2lg{6fhvr2_|X6E)3Wn?Y( znus-B4$1FoLz&aPF)iyw05cKh{h-tC>E5WuoCW)`6*2V8bZ>_+p(M`qHcvrDw&r4Z z4?{N)u~ zy4?yd$UreSzqGiRds;}8 zTld(2SYxs#WG9c$o|u)MIdUu}q-+b~;P=O5rA%SV#AB+BG4u%HOP`H#Y8PnT?eH)G zyw$q4Mw~`oLbL3yEvdLwN+K1$%F4yyi)*QRLj76>6uTPduX79S1nk_APRj~X*$Mti zZv8BXAH|JW#B{sB>{C);QH9?>uW(@hKtCV)Q3&2P1C?CyKS4lQW0J>@8j+ixnN5%R zyj?^HEujuhKB|Z91s_3pyOW_DRe_V}WkGbi9CT<^kF?NwglM|0$us%g5UV>-LY_C8 z=U8Djbxx3ZALI8e@TZm+Ru*C`G{K)=Sy+nUwiafa!?+$&>*7jQR95YhD)x##+q#`q z=2Q(;*#5!1*|aMsBrT6wt1XgaXMa#;v$V`#5^OgmV-%q6!#swy+gDbWTgkl?Yb30( z=qXMJuF(s_`2(()6>gCu64Zj_V6LGt=nWd4x<7)r&ajZ(43ffC}4ZfbOw~mwF z4bfk$n?fbb+dwJi4~W~E!t;#wLk0`FOml1Txcw#GijFT03m&auodw)3Vduq_GMbL$ z<}bqF9Erx;65DANqkk6kB0=*e&31YXXg5BU^l_ds7sn{l^HIrXX+9p^-;}HuX#9ON zd)J36`>f!3-&eie_$=+uH#(-jCFpXCGkj@U!s>;#7D)CV36I2UJbX!!+C*X#9-JLZ z|5@r?fTLi+XpD(~E-WzqfX~TXDljIPL$S<;1C_XvWDp7rH;2AF&TrsM(igP3#P|d? zSW;?C2rT5(U#PJcX@ajA=UWtmzbdmd9NQ3peSqItr1{^h(F~A_D}Qg1CJ7a<{{@*W zOkC#x=xl{I%bip}{VKhA?wkVJQ0XmqFDjtMRS2&sppq)@o$fsa^eu;L3uw{;?*eyS z0ll-pJHs8}r#=h4E1HBqWtoS;etKh}cWV>(3=2`_r;JMhPNWeAN+L3ttcsO7P)Wp&T=QsqPe#LKXVp+dYkt?_wrd(ay#g|X3@8|gI+s} zrY`Z8xa(%oTm1CSrt9zUPI7mfO>6J)-s;YpO-421FG9H5yV|{dHg#C)y~ABQn;u)r zHJDAUmvOo|bl)<>A3TQ|-s!!^J#!ANzSH|e6MThG8N7{i=*B;IV_#Zo%yBhXdXJ0# zewTN#JE4$L?nX9Qh4h!ZQH`=f>T{3x53WLb?jCOi9$oH5re_Q3v3tQRF6(c+9F^!c zmo_X%sx$?;enu42$E!h)E2fk`dPlkw zis`;TqE1=G^z9#kpHobe)*!sRm|kCl@RnlgPy<08DyFAuAer;U6uZ`2(j^Rr-Ks;8 zuVi+WZ?<1mM$?jQ=Mqsund>q8O}6d-%wb}~3riN1&hzIk^i@`fe}LChgPtwtn7%N- zB7Y7~?|e2s?JqXIxGWey2208kzsS#iCJ_IC9lsCIaY!VYmC#b<#c8ZR&!UA zS%dQq%O}$6nno?@#u~$Lt)Yw>Y!DL_*BJBg63nR@{?-SgvVF9p^7{j9AK#$Y))`%+ zmrUlUF=# zMBoB_x8ArHoA66EAoL79xWTB1ep`|>YB0?GcM*jSeTSS|SPblmonr~7C642rPty`4jecmVBQb7e_KP|6 z%0oD4uuDkQsD>e;VK?^zT>{DVZ_*zIhu_n(hY|XPwmu9m^c{8HgwSCcvB~(WE0@AI zV|RZZ6>m0rM$Z?HxTcCD*0FiCbF>a8F34Zd(>!-(6}u|d#YV;goX2vW;(5U6iw!*9gl*O zD`ai~CqGM0q1t|h6LxKOtzIFGr_+5~jHx)U`#DmY|Cr7^uxm{=v0$N1p!lsgF7XHz zZp9ge6xzQPJp7qXY&G)Di4s?xz2MVCb13~WM5?2$kD+VCQ267;LX^r4Z557 zfjCav#8B1aXq$b4=C^s&$Vk1VzWZ?q`Aa(XI7&N3pCcl!GBw{{5)1O4Mh`&3BQww{IMLT+kQqncA(q# z6Vmd4ZuX&bJJ4_p4{Y;=toCd>G$kJ`4a zL4njCua=+rGj!?^ihLS%-9k&AM(zGYo1aEq=TQ7kgi0xSr}0O8p6J+4#Oz7ec4D7w z3N3jCjh0)$)vXE9^eOZTNG>0pdj=<6>L~eHoWQMf?qiN|bBhHmaTR~`$=XcTAu??f?$S$J{R%O_3 zYy-}x;@#+1uhZ7uu-#RXmJcK;Lv=4jK4(mbt`PLyT8`FJNPNQaZJPfal5=h2|osOov6XETo^yg^!xBQ=>t&pmHsnx^3S zmKNn|VqZ32Fy4=3{}sF6XuaWe<2W4FiPv#BsGN%bio=nx`F}N@g$*p;XDp1&6*Iys zuD-bqns3-|w1w?yx8L{%mPCI8+nEz7;|+wa(6GN56gf~jDE1?0*@5upRQESyIJ)J4 z1IWH59X((SitH#t=Wid6`_yPoaR-etXyf98$OB&YSK{Uai@%gW1Xd2GD~5Inw4!hD}MKxYE52^zSb?*M+E1>l-Kttr-? zhx2t!|J#kwZ4mnl_u>!);3|N>JTn-O16TxD2Y3Nc3%CdfTjA2;07C)UfFi(3z&1c` zYEo|EfZ;a}A5CArgpLL_?V}AZHikcn46O*iw z{ESGWTlwitqi6WpGL1gvXIvV^9L2L+8ja>>LK@w1)H~2UIE`N7XHpt{&Cj$n>hg}a zhdV2c#=pbE$u!!)&$2Z7h@Z7_6#K3>)s;qh?;^M+jh^J^mNfd3pS#kiPc5GN(x{xD zwaA{IXVd5>e%7VYjmPlR(&|FOD+b!|1!#2y@}9P~peW(TZun zF=)pUT7L|ldI=ppX7p^ZWR{D5O>KbhCcS5Pv9%um9?S_|qn*p-Tw0YBXDQw-(y~7;|#&M%{_vVISE7Y zIlXui{jclG8K;b&BOu50Pr=<1+Wjdy8{S*^Cp!K6xS{}$Z)pEN5!FvqK7$LsgSLJK z?}}ITIE3%_evT-}^mFvpuC(l5hWf_pzWcE258xikz4gD%fPb%ta_{`To*CRLgL-5A z&N%tMexiYU<9RR@hCS~%z)-+6!0mudfIWa(z@>q)f8tFG8-PgcxM>>A)K{|dOLb`A8Jc)GQ zAbuUn`^Gz(a+Y{?O04q^a(lCAZXII8WzjSIOvs{d>ku<7i^g9@&N*51$YtQ?WYMSm zT$Dv!zV)UwSe~WPuiqGFTnp(;ov5dI87E{HQuoV7-v$eD{|I_l3s14HVjeG6vQxw6 zIW0q^=xw994>@;kvnO5n^3KafMIsy;>`U&f2m^0ibC#-)Rq4N`P zUj?2&OrVkcjL4?@_}Ma>-r;ArY-;81)>`1SIZZA{~bz`v$Crr4z z*8lwR?>SWa^T(D`9rqir8r**i;8pqiApDgTd$FozFB}?1)rSLu8z})yOao9akJ;ay zHktA~kZsOndXk@WCexSvTr`>bc=235nacUOaWcKc&+U`xCw}gkOgEZ%9-2&d^7G_m zdXt~$CsX5Qc>XY%GMkyPFL^x>%x5&j^xR-xmCet_fiYSP-EDfhx_4#JjbAirozbQ> zHyO8$;|_vw;tsgYno2x&8-v7UGzo+g6VTFvi=c7MJ>Jalk!hxp6p`&p) ztTl~p=IMjCOcyuvTxTXobceyw^jb4dPxl&&!LdRPWiSXAU@$3As$%e_K-gh$s4!>_ z4UiOrgQ=>yr!U@uf1)|~$q}lWPVu0223=_GxdE=Ga}*-w)5Ivxb?*GuWcb1c(!MB< z*EQ$mV^N-OA|Ubo?L4zlukNvUWKm75XA}~giuGJq9kv>yB5U~fKYg@&J2bWa2fI5V z?QlQ@91Nd@tN-EddRiMY473zBgU5ebu*S|rhW|IMO~ZB!AK1GcaW?^80Mr6#=^1Yn z<#aHkUW!BgapR8}@9Bwag@*CtODc?qr2az>atI@QJlu_2y!>sv=Yt5uJ>0`H7SvWf zJ?hBdOq`5d1Xu&u4q!3u!!x*J+Z~!a^8d8jY;|1ibl1?)@PFyIeg3Z^I0;660K`S( zUVYly$82A{1*`JcVGR?DbtQ#ZWy|$d&EyWhu zqDs*0?W41y@y3*v&Z{aLSydu?Mj}(Sn`sY%{8|5g3p@xueUNsZwSPprRT$2 z(dxctQ{EG^bBPFwk&5#OmHrV6{3Vsx=hX6fI}1;IhttW$ItjGEt|uS#kf_x1fmGMHTAU1TJ{A=?S59lsU;)ROQDu zm`Xo5ln%)#x?PzhKKE&Ji~{8NjV+C|pv1^IyD)!VDzAEjuL`>+S)p+(O+ba{qnp1H*eb}Qt859ZpGm=2l zpDQg~9<7*e^Ui7cuRldIPSxAOy+xa+A??%54u7|m|K6c?(>$drKmJ}T|IMWxw;*lN zaKt-l;YQN1TRgW=-5B7~Fe>4{@pN>mC)bjIg&Ry0r`r7Ww^qMv>D(0Lw{)~8GQvWA zK*fv__TuD#41dwGD_dueeFq!q=hp!ge2E{-C}R$90$#bgILV{p&6$HQOn74G;sj4l$ITWrJPRxq@WDF##nA4tb{;1!+!h+fI4L?SO2vDf4rba_*=OPIq=}iV z6kNRocsu^Po<7g8tGCUpU<6RB@8&2m&fK%yI77FW=K$!zA8TP`F z4gULxE)4fnsP4Sc!r@cq!?`=NS@gD7@qVE2;V4BoTx#Kdq4lYDYt~sfFLh=di?VS+ zoJt#peclvLo1Yz` z8|%>?3-=UFPx7?rt|H(C57?%Ga5Y8wJcz0+Ez0K+ggUm91M4u5mSvmm!+W8#52U^; zV`AvlY%?-E0X=&l9nLn}R4+xpMGpQ&Wqk0k6lZSyI4+}d*aJlZO%4iLB;>9R4Mjm0 zLTPetD3Lj{bO>{l~*@3w}TIyZ3>*u*~fI6J`tiwh?A(ib&OqQ=aeD02eMd*V?(wSkX(tO zpmRN%#js7^)8+T*34!!YF$S>mZmlwx4EhCRn2oj`2z!iHY}u24p5UJw`|{6sSH?t> zG0AM)aOeQehT7*flyl;`IS##f-!!$o4&^|F{Ig4OM!+78?SV(P2SGfL7*m*6?gQN( zB<%~Lk16#PSo9H#F#6#*?CsaxEJGDOhgFtaTv(BxTMiu}&$5Q$xZTf+v`Hc{A(zi# zR?GsP+y87F{cDnma}e>AIoV90M<$!GDY05pGaH=m!la{tHaoX$Rtbjaqx>b~k-oGT zbK?l1XN?+>IUdLJ$7VR{ej1q$!h+S(hS9xIy3*h>RAZ19C_OM^*oVk=^=a!QPm8F) zS+qkAiL!FcmWCwa`r=kajP-gxAJYzIfKMC7V+1iKYjSEDXg#bF_u}_pi3Bybr5RUZ zA9Ubz4uu7orIo||vwT%W7{KxovRu@8=?uDqyfF>=_pZ4VJH^ZlXI_TT{3&Le@a^DW z2-QqM%o062ao16w^9_F#xM z#yV&*0>|cf=*#Edlq!D)BLRATVZq4q(&9A#q6wAdg(b5C^ouYWLo{n`gAkaUDT33o^Hl;FBE#_a1<#z zC1E8 zuGvy8HJ^o-1Nb$AN){H^_;iZY_>;PZt@wTh-CP1*cHt_V8D>n(+ZdE&jhuwJx5L8? z8gb<6Kxh78rOFv6RppH$yPGk?L*XbKyz1m}a0lQruL|e3ZSI+q>`cigmrX}SWkF#n zx`#cd$b z?E>14wF)oI%bkTq0^}MfeCZ!m1?3bvj*v1frrMHU+G<7Q{Fr8E*hysw^NsWS5J7N; z`xMPSywB7?VJqvU>leg`H%rQrbj?(@{E0wS+m&tTm%g4p4IASi&ulR)hkvRQ<91a7(V}2>f@xqkK>LAdbQxKMZ-Yr1 zMtYvvqTSGsmB8R%paQ%{;ON$kl$?i`DfXyFLQ6}GfTnq2*)fN%s8r|x{^S^yWqtgS zRSK`#nv2rScc9Y0&EH|+CM}oV&oj;H7hpvY_)TGFpXW?x1-+8sBu;|52Wd&bN)Cnu zgK74N!=9%XNS;<&b(Fe#9&1c+=TEjX51h4jgtraMCWkO(nQs(qjM~k5y=vb-n4S0<(3*KKxF#sdTo$j0lfIL!?R` z)%K`r7hD?55Fho=^)2uf7Wwkn97vBx2HFcjbbkpG3q-S-woJ^MZR#Vf1k$)N*%dM5M#em#YgV%!5;q@z+ zOCMeG7~$KJSf-a>HJgXCKN#Ws97(1h*n|nF2_^GSjxb7ow%doFX&MZANq*U4j1g@e z;wb4fAF^cUF}^IXoIC4C%MiEJ>B~9huqFXSTb=q91|#Bhx)q3)ff${1mg-H}bKt-V zhWjb0(CmwW*JvxpZaOudYbIY8SOZ@q2J)udvXVLc)?iZ7by)+G({gW4nmQsGoD|uM zi7iuU(p(eYTL&d)DkcNU=O?7n$8*hgv5e+~2}x6vrXpRXtzIvra-;mZCnmb7mStzw zO{E&ksnQTXLEhxSXhCsfGowZ@>wwSA%1lG9GNRdt@TiIEU1wWw!q8X7YveH_qbYYD zW}uafpyGKL*|vnS7*DJ4vt3iD+<5h(sJs_!CujFEUYNwtCIAC#{h zdZ2zrD#5`!Bmt(0*}gm}SaljK;Aj)M08oGqFw|{r{d`n>uD~Sj5lTflPg|qqPGQ2jUm|H&3N+X2OG*4U1EmDcB zG?0J75G#?QaH_zIV$N%)5rUP*9t*Cs z#GQu;9V|kiMzyxMg%DcMI$@k!LS9)4q=~T82q84h9!uI~n2IrT)pm`Zw#6-!&_sJJ zanHv1A0yvDLFa983n4Vk0jom!*e-)a18I^5Ske|kZgq&#ZM#m0?3-lg&K_@Qsz$k$ zrJ?wff%y68v9r(PI6>&A4iy|4Sx_9O!jXw$OxS5gKl$ zQ8{f12nW>x$sT$nG*#n(vdZGTe0k+j8^f+R7L&aIE!rW9BYzZjb4@t?QfkJA_{ggV_XLG;TwgC#~_5+AvegUs!;p z(`9E(+-v1unfCQj34_OZ5RTWf>Np9*R2d{6u8r8uRUqiIR_zfZZk z@Pf$+s2FhjH2mS4?}be1gmo;YVJs-|ISy#4bpkoeA8q7(Dt{b3Lr&|fV zR;K7u0b)AbUvPnAf@xDiX>6BmX42~wm>opRN>=GGHswlGC*=eaA&KAuXNJ&{LuduT zw0Z@yQ?Z%BQbS0(qF~xom`pD4{Al4OsZ0^AhTt*eAQMJH#GNxnG8I~<0!-RwD>~X( z==*}{72(%DCL(2{%w9@gE4>c`ErrCD70w@UQ zc~;SclTM3NMUhNs!^cTFe=F;b72z~W5%&p^=CAX#n46>GDgC=n&EDo9URdM=QWye6sN4)gQ1D=@v1z2%j3cWuIeQcoh$*ZVP zX~U!|xwR|JByCEcazE@F*9Hqr(<(jNtFL~*=8&fs7>cs}1Ov7|IR2r~Y!ch4fLE*y zO$Sbu`Qj@8#m`H2V%h@vb+c(j7^mhfuEcNyt7T&rnBCg3amsPnKz3@}a)j|zvjB?# zMQ;AN6F-M-gAqb(Fy^>7JH;Y2qfNuGML4i&V<6@naxFw|ahL+;P&@n_c7=aRZY%*w z5GbEFY8~3Fklgm7Gv?5I&TS2rM002r=T?9{R+YjTwAkhIa;c&@9@eJeE@EVHj&nOJ zKd*6aFJNe|QaIx#JH>Ltra7Y)n>NkExs6+7wy4gAPUK(|r)1?Ad)5^TFT*=~?owph zcy)5d#$UAY*7(H6FM@7KK9-xe@nOvcUtO`#8lu^RHHuKN&>BnF__pXsl94qGwDH?* zyft94@%^zDBk`?KmW|(M<7Hq5Q(+VM_BTmj4Nh(RNgHpC3j?nBvgop}va}!%x*r%? z-1V)EFA{|P8$Y%OX9OLxJIl&I0?tU3E6~Ai573YqNcLh+Z5;}y_}k3r z3985KyC&Qe6(sfpquMN+n?(&|Af@oL9DG7#5Wc9u$rz5;Mf3NY(e%cx=2RIkOyPps zgzrOtn?lLAVd$_F-D`@Lt7HPUY#H*Q&nYyYqpU%nQBf4fFH>kMqEu(0zo;mI ziU)*}^|{oCt&Tk<2?!-eXoVHnnUvS%?adIF$H3kTpO2AUhHOk|CbKnDri4w*RMpL6 z?2H5K_lI$rO5<(^Z$I!)?k}o{K&vlOi#gUz$7So%|{`oq7a+I)&> zOlpR&OtsVGfVE{&&R}+&w7gNEc;a@M5mcb9=ZCmM@Ine}zuoKb-1vKhEa}IGW7P_HZ8I5Qr%fC+LY6u>udEW74}P6^QE_q7;Dpe>`e9DMO!4!O$_!;x zRbW9bp&3T%$-oB}(imGg7cf8{IVL+$NtQsarZ(OSm}%1S*_7%nX#U*asbjbEfx1_* zg|gc`O7JS1s(@ipi=SM?OqJInW!02?Yf;8bt7uMQ>v7v~m0YQk9H92mZ{nrUmZhF< zVyAtybr~%0UEsuLH6e3tcO4B)#zlb#KmYxZ#9H8O;IVRe)S`2G^ z6n`fVPcVw{(J=fRmZdu>s`wrm{Mjj{VR&Na#-x!{jIAM7fOfGyEht@7kQA)3 zVjx`t)2i89LiF}}mR;b}Adcx>8%`m~?|w*@n!7PSHe?yXNly1;^0RuHGwU^I|$ zPJLQXfs2AN<+%(itIg=jdNd>p(84B2Qw)^aV<){3v;eWCB04lF(6QNh6n_u;3%kKQ z8it?4*1%%tS+cYzLU!-tX^=yc0X|AnmfnkLP#)DtZt`=dl@w?`%H2e6mOW%6ZHa(sak z@!=kY_f_zbbWX#}GHqs8`uaXICa@p527bss=7gd4*by12=$dNe1J%@44wTAaUjoIH zX=81Vnx*iwZM;2n-<1-mtc=QeZ?Mo;gv%KDjxDvF2!T+~`^;98{(J3pb0F-#Rr}z| z!~W~$AkgYnUNRpHyf=a#y3cIw-j#w|aLm}&yib{co@MV-@`fm%s8NE5bVSPGv!Td2_aWwn%hVk#9EkHJh3ZK}%{v4Qm?5c8|dCrExNhEY9P@gg6eFTjDGB zR|K8#f>85m=ly0+ekJLCvuo#R;2>X1&8jR_%hZ`jfI0-u(p+IWbzX^6T5HhN^C@E` zJm_|G=X?#apOBIYbAmc5`;mseolnaUC7e(Hm$+3Na7 zd`qUEHN#36hU(JR8ndfmZ@g||ltz8~G>JD#Y~-4u9M4r_b}&k8ac&V@VvQMNl-s!N zfs2=@xW4FQ5gdtY8bz;nZ4yaq*5LdEn8H_Zm|fbn#%vn?G%V6=+P?W zJ@6h9%D4X_hW4;#4GI&{wqIz1rh`Hm%7E;G8T=8=5*&_9)in4e`!C`j2nRD*Hww)M zH)#Jw@E1Bz3HvXi{g}`iWMyLG$f7pT8Z2yupzHkCKy$m_TFN{Bfxn1_%h;L){vviF94zwA zcj8@Ni|hu&I}q*|h>jhTD?q?z2L6`+AykBo?+q2%Gpj=Bf2kLqx;m7BLWF~xT@rH-*-?;pWhA$&;bsYZpTcJb6h?D6I@5p8PtPUadvkSEu1R zn807enJZ|>Kv0XQ{ZDWLE#k=cp#`o%?^=)e+3}| zS(|NKh|6mXFZi>c_XwRpmf@_IQQII~+HZnQG=nm*eI9B^EHV5FFw-FEgW? zW$1c)QUveIFD@u50W-F{K7!vxV7{_m#cgkO2<`Vfbh!1RO1@cH^1-M>`wUzb(Xb5| z#V>-pEu!iCvpzdKOHiMqjoSu6G;a$z? z&PO?36pi0v-dc^zN`Gw~+4u6EcpN{5PF2lNx_Y7)H$4vB%u>$_s7o%we#B$_3tz#j zvYPgJZ$%%_Nrkt<1u}kIYZWfOTRnHTcS*V-T~v6LPvTW;-SboeeUi#J5uJcD_&{Gk%}Kc>^$B;%(2nH9X9WV*=7TA@-$vcFWzO%)z_qs0k)^2IB= zW28R*p3MsNs)BuE&3q&`?M{~TX5`|d0lZUVoFoh^7WP-U!v5R}8qg6$-y`|mzGBMC zO(MG^@Nq}&7zOh%cJn}R7JhGnW%Dz_M?Hia76|W!#miXZ$YM!Ys~va4bX>7OJ1}37 zhF3qS6?Lu$)gsmQM^DhPJ*sV|d+@_Exvo;d_wT|taBE<%rxv@3_xiBhP#|2Y^;f5gd{x`8gZ;T&mZPMf zf3Zw1661v{)HEkl^CWCoT)`7jjCrjplI$eqTbBvju&-e8m`;>SIbVBVg%7ForXmD! zeScNVTNOzt+bUka7x5w=1&WfSK{MvX-eJ7RfqB9us=4daA{Xf#7ho=NMmPE1CFzw~ z-ZN0-TAiA1ED4OobQ212e`UQol5X|NEX`P{t6>oFfDbw=e=H3}t1nbMD{_Z=x8rh= z+tnw|!yGVz=_)GoKyCuewrVMtBlXKnt2N*h1WSQob;8q=#mh@{q=&WR=uy&u|9IgH zHF2MIP<^kYUsb29#HwH?FAbOUrtj6V_T`QB=NTXwCw?TnT02@9BeGf7yQi9ZznVA$ zah%VbiR$gA)CqIc2@j`BN0m6I@W@lu>xX?sF5zX3{!IlBnC%)m=1YgE>h&jdB)%F4 zn*Aj|P8Caq0tMW#hSWUV$Z%83X#c9Ww+6g8FDM~vmJ13m>y!2eM}mt6-oDRzo( zUH83eDziFW2|_Q&{`Sja(!2*{BndkA)!MPUpQK;ZIB^&Xa6cK7%>`qJvhrQYy#Q8?}~k=t{H zTZ?r4r|O)QY9wr5Dg~~NEF+vRSkXW@C36KVz{PD|PDX)KYa=mh61BSPIPh z7i-7rOCe?Ir5QR>=dY!qEOokMbsoo!Qt_0U)~s_lfJM)W_&P~G!iQb1)^dS5=Y7g? zP=w3JJt*bNcIyaFL`y=JM!ZwnaI#wDur5nq-SzhBT%W+O*k_t`3L0<^z#Vw%fYxfv zzxjl;-=*XlHLcbVpWuRYUjJEIFj|jJ+jY5x>%L#4KC)lmK9=Jl z9aLa7V3thXx(WF%l=N^NK-NT&OUlIpHf^_5`)T^SuK!XU*?>m2wdta9DfDK|lQh2@ z>zCz0m3!4fD|MY$`btN;!CWpQTR2tQanvbu)LpP?lNHmodU>w0)ERTsIk#go ziA8#aI#1tdX}GpR6zr-J-mi1s4$H6ty=vK`x(prFg0=Bd{?uK%{s*3ygkB9iH`Nys z5H~r**6N%lt75w=rF@>oiM%M0D-oTzV{aYEMQ2G5)IfAf<4m3|&+sy6#v@*3qNqQbwy+9NzGwX7= zsG;*T7=t#|s^)RqrQl87X4mTeU8)xAR>dPV6xV5o)mm==KNZLWa@BK1N2YRO^lAfXQA_T2*gT1(s;Ps!Z0m^rz1C6Bq zI-m?_$ML04ozRz0K)n9-KBbkQ=Ga;;F~)!#d(&T?OZLQ(CF=DLoG) z`$~I>8V91`-0TC+s+c==Tll}pbQsRUeA-_|*s9)Iqzdr&4mtn4T4or#8_RJPG4Wwd z&(af6`wnSmt!^@Ab=vJ}={$~nsDEOLuK&Gy=VHcZdYsXbCL98Zp^C}&iSDFSTzR7K9Lb3?c2VD{W+n-R~lZaPSvF&$WToql|$4SZ>ot4 zG+wOK0CQ@Av=itcb6o_lFGl;0Ns@lJO1M`o+)xGuctj=2&AJK>e=q4buxX$z({_z) zN7b7XHH7-=GIP}Y!%t{O8Yz#e$&(!<{k+BpXANj&c`y}QVEY5&guUg`v8%Um#ovWj z>KsPv3FojXkcwr;16Zj}+g&LgrD{Y>_=)5<|A4HQ&J zg}pk$Zar-7Rd4Rrz|yN0JDsN^RA=6c5X_3@sKQqfFx=O;KFz=$fx9VzB zUnmXH^xk>7NVy`+IiHXsHGmDn z$$1K{YQ55A#6M0LQ12ZK5;;k?(^@nL8NxjvoaI=Qu zSa60a=5}}x<>M)q)=AUwVSbvdFpA#C?B5166;Fl5P}LNv&R43d<*>#TSB)zdy`+4#M%Z?&jxt^zCA$7| zwBTO#@&PEq9URr=c+*Fw#-^5PRo@xL?#JaKwcb^Wj(nN$VQ1ky4N!-}MGp6c{n^Ae z_42@A$#7NgJ**>aR_}M*A^ADzjXT^6CuELO?@mx}Pr}GB%_etU=W&0Ra%)GHsUBcz z12O;HfVY}78FSCxz4mG8Xf^g;&R?mU%u2M1;Al$1RQQ6U;Tv3jb%m50UMKAPnA8tZ z=dM&IJykC09lC6j$|g&Nn^R@88P?@dqR!H*bJ?vXUX6T?{aqR{z4h++qITRmUD^xR z4#RcXHK@s5bv1RN4IWt8jS&+s7tL-yA{l#Ui=3fzcNCiQNOLsC*I~KhU~I}5CFO3Y z1&4JioORpH!6w6dOgNb97r~-z!AdwD6eu(0A(z+Xc7#lxGjBa!U)6p7Jn~u7JoV-z zEGHB-wWcu4qpHSypi>YNxnAvCGf2Pywoq;;i z)fzw&VP78ksp--{r5-$v21^6oYWhG8xkW#ga@F?82JhFY3sl7pYks9#axEgM{rn$- zBNJ;h=r0ZU>xsw{%Mts9sdS1cQa2J(m21&4FW+d~a+|3vMu?*Va=Qx#I60il%@D^La3>T-?LRgoGi<=Qo5H>lijpZx77P$S(% zbMN}p`~BfroSvfxk#^mr z!VxE6eN&T0)?`dEFWLTezGxnqs6=6QNDao2S}s4+iIohVTc9Ibaq zLv^0Q`_(yf)Hz*o9`HbWseYnJ9Ck^J$<%?k!j|1%F6oI98Ou!j10=y!z1bVJu_K!T zbr0}`rmXpX4NTrTHAgYWoZh4(D~cC666JZ_uhvbiPA#-s15&M8@;s)-zWyt9B)&n? z(Fq^n^J=O(4Y4=Xw5QaxHr;LuRKcsh;;gmUj(MbAI+aDRGb_4QEt{yma>Bq50`vNJ z)&SzI>vRASjVt6#6Ry>cYvV-jRfU>$&aZ0xaMYn3D=|5Q5}g5TC$>f8QP$h>+{vn>m%vm*pAsE1G@ZH zFBb*lo=5!S2Da-#Az9}N@1M7)SBz}C+oeLg`bft**)1zH6gxg9`TiOQyzdb?NjII; zFGYbKxMx!2<2559;zj%$i@S zo0zvccenaVGunwpxj^lwvP`|a8BW8Y^y&;T-Co@Tnze&!gj1?M*4^%=`b_k_Qm!3Sz*AM^%lC$u|7IL0IKf>tY)1nu zx5HG-U3yqd#8TjqZ`TG7!-;sx>MT;fR5z8>XG9*}A$(QY@p+N=s=St$E&4a*lt_l} z$cYErDtEkk~Mz%4=c#8VKy42eVCq5q_7>EKE0x3W?P)cxEYf}uHo_);Dj~e2JoZw!N5Ww1y}=Q z1BF06+1!nZwBnRC*qNXC;^*mGD8tOcp+Yqj6fL*{Lpc6O;Tmtwd4o+|b zH|jfO4RYRt%Y(oX8bqU>0oYIDfu+DIAQRXMRFUsEV>Xq4X!WDwafXX?C-&ZRz$L03 z2cvd=X!UR|#|dx;P*11E855lioPxc802&-;1Vy&t@;Jc1S=i4F1<`QNw$s*FTdW_n1y%sLKruOfY@JEBqTy3f{+KKv8z>}O3~UoI!7#k9 zKpq0{W(1P~1OO30Jb;(o#h~rRkF7pb9)qEJ(DoPuPl8Q{C1~Jd{Ccs=Ji{f~2!ufa ze$jjc5DzQ`GJ$+xD^LaO1X_SL;5cvr=m)L?E;C?tz>ivgZyoELi_2nQ8=d~WHHh}1 z8#+D_CUQGt4U3LI1da!m0+~QQuob8R8i5v|4LA;50Qvz_2s)tzFXNtMCj@xB!NKGM z1OrjPLSPM$4HN?9Ks~SrI0zg8P6B6vE5Hz7%*42L)U%;vLDfZOVOk?aEXgu;&rZM(i`a!%monMf#Wm7J`PCfDn<@@$3_B?*c z{5xfzShu_S=c6$^7rriIbNr%Bo8p%(kB?7HNLoc>~=R!;!dG<2?hdlhU<9hshRy-G9HIkoV&yVN28#1;Ojug0^ zpKj0R?8or}_)Sim?Iq&FXnBt{ux#X;k`ZRs;0FapzA}X^MANl%v^J7=>2}^o$NbEr zd;=gB=9TvBz1llP)!TI&?fJ{RmdC^_+O#Pq2A2Eb9VCA60X(Ql2fnaQaKYCAMt)}Y zX*%6woiggSTuwK8@G_SLyj|}45~1X_$>?aW z^-GK2%y-`W!uqVE`Ed&Gw@$>1N0a-llRjsfdLJR~4f*h$F{&E1s4bp4&m0qa;ZUboY58P%q}(|+0dnA!Uvf-UAbZF%D2 zwEWHZ!ulf-w|(FSC0wyC$4ltyu2`p97EhvguUO}MF2#|VAE4ErGRzn;#lvK~Y7H8P zQy|=1epgU`z|e%M_g}TfSe8zr7p__#!rSQvu3CR>zC{gRS?7|+i#8v+_LbEIx($D2 z9dCc|^9kp0Gd6A$u*1nzTH=gruOck_Hmqe_`}E*H{fF(u{<=p54s3Ub-1s^9bS<9`uQbZ33OSY;(Z$^m18KhO32)FKp}nRoN6e`h_jXu^u{vlI;t;S}7uQ z)4E6h>2Ei27oK$N&M^+=gS7k4sJ7~7zqhQR@Lt<;^0|aV&A(9UICVTbW<2e@WOH-q zxpTUG6f1r5i_$W4w&efIE+?B$(%L>Ad>RIX!LOj6KAZKQcBbmxqPV}HLeK3A$NuuK zwVO-ce?^6Jlc8Ao#4%Zu^9wiOck=$FUK8o}E5_VOIPL^F45f~_gAPtZ#ccclLCz!p zKNWv{1&^IPhj(hjB5nh)h|dG79k_lb)eP8N{^`&rlD*b(T3b72{41AH$ykm`*S@r^ zr>8F4M*q`Flg*Q<`!W~bB4&aZs9yX zN$=|r*&jQz{Z=heZ?lj7UNXAlgbQX$g(MuF*)qNQ_k+5hN&3-Og&Y1|@_nBdKJkjA z52|ebPf4%PkN28C6}bTA>>t&1*Gq-rLCJ`FS~wENJ??n_d|_9FWS0BX)H$Q2US)!$ zJH9P!t`Uy>sqp?^33qRn@+;8>w=>wLAJvAdDSdUsT~Lb)RN^GT-2IwVaLyOL`kZjq zZsFbfQO(-#MJ{m z+&CoV{gKD{r?AN}H(eL**1s1F28o>YiE#MK!g(u%2g|fV(_~3F-7nniBORai6*)Cj zX2C{H+CrzCNXluLR^cv#!p{hY`v)p;FpDq@Apl0zgT@-azhP&pO`OvUO!3cwNEwHf3?p2 z>2*>dPZbGQZ}-Qcm>cpuC>*KFC`XrV`+rG#xCR=>%Oc19PS{)HgQqH%_o1X$vooT9 z)4-p!pf7g#M^q^qS8 z%Mtz$6X7#g=3(3!Ghe(Q+@+rhwLT^J+xJMizfM_(I$5$h+4i4HzI(E8lR78=k`1?e z`U&Bi`WaH*0$u<9Yb3*Wt8l}8!X>(H2Xzi3H;E$2*Cf9hpDS|1eQKHBcO>0;s&I$K zlhr!n(!-LTdQ>>)SFpc*#OgfT*NS{tS3%P+MDBfAI_er(xA`J>>qtvfZrCN|eG4SN zS<{;kD7c>vm1~vHm+3`Eh!k+vin}j~y!SojDB%-6(y()-$ko*%n|BJA=qImNHQ)^B zKwR-*3oFp@XW`UJDPPuPzjwl;lxo_6Vkyu!MJueAg5kX)m;6$=xkA#bpBB0Hq;Taq z;Z(JltxM$O{lZ16XmyyqJ|5N3ZSki5A)+K(3fk0^?SByYv_`%JjKXe8?Z90}QgoLn zbY4xpT_a+*`bzIUsW+?^j8116^WUr?)3H_>@Ye0Mp;jt1tP&p3J;1y`$`xt(-9aMP z>In0;N%`6mtv4hbtU_ z!@W9?u1%7@TFbYqkCa*^y?}%RF$MPZ-=y2@(dQ+@Q=P_Hx6$hRHN8d@9R7{S?HUIz z>KxZSBKfItn!iPOzwRyjKNh)3Ep$2rC3aPr^7Y&GXk`k#qELdNW;Q&jZ30YT483oY7_4P>F{WaD7o4PW)EnBHf(wbeTo# z_G;cE`5hY3DuPtrF7>CU>`OEq+tb?69_s-$3uE~{qUE^Bovnl-ST zcuUG%oFIHdEmqwra>IIIN0kRvp|u*ytCvcCyKG`*rmS?)tfWOUoYlK;c!|7nBx1cH z<#H}cdhdS=uT=|$=xMlFSA(za4bFPxEm#^{ttXih4J3j1 z+?9Q#Pp4o&G&dEfNgXv_TvSDx6Q#UOm*Y{5m^X7IeZK~}8*3r+`Y+;x061Ld#{5$$ z*raE?6ZIlrTqB$`C_MaQt>+_rRpUm3Ze}66{9HBiwQrDmt&a+y{zCs$TXakk64X>h zOGJ*FFYKtxt4QZ|w+0s1XC*&cBVeV5@=|Tb_a({SuTFkKIZ6K(z55&8|J@^{V4W6l zo~|9MGhY06k-hcaux`1QKQ8P!M!5HevPL@3)nb{}65)~}$-klJf!&LA{pa13jO{Q9 zPsRR!6Hc8YZ2nw$_?B>7rEt>UH2r1a-D)ZCS43{OAndwK6mZu=tE)!D$g+o}fWMmT zCeCE6fU8EVq%k5FjhAwPdN#CaIp2wrep5$ieplq)?}S@F6wc5+A?~NrUY`!AtU$fp zUqkQB52V6?nl4W}>Ru`7j=C&QjVOX0gcbJPE%_I9Q!2pb!s&y$dp7B+aaCvUk{g;b z)9Dx~Sg}zW@YY@MhR*d>HL<6z|6cXtXg!MU*KN6brqoX|q`edN^F4M~b=ulll3(p3 z`DVL!7oSyzGoe`!0< z3+J&VdHv_KNy6#^;SimI!6cEJI)qPrEnIi6uCfzs`Bq zeUiRXBWG%~uK(09$!PMH5kzZ9cAh43QM}~4ZxOj&ms=~W$4=(IL)cfHG*M%`?^}n)95~2?ax2wsTblJ4)+}{{Ea{X1xUDXjK>UNxEM7*`(Ig3x>aEqjl3qGl(rY&hx9Tc6t=nD|Oxaua$}++Caq{B5$_|cd7Gv>Ie^O;JTS6`R-%1VRg#ZB1x}M zXHHe;%hBEPD6cB?Z%Wm`(C~s(DA2&NUlqEjtK#Nb?LgB{os;qzQ-z2BAl%e1+?*rj zou3fdQTK}Tbs}Hgt^0q&fFu;@vNG%boUH3USvL#MA4|DvXW^y;!X>{K&QQgMbuZcb zx}?{sh23X}Lhe)KCG53 z8kTf_Z{f%aDc^fZN%{wvb#*Z67}sP?TQsmLo@&-y{!#z6ZU>iI9#VFNfph}*x!sf z<%pQ7@ni2;k$cr?`_yuM6_Q?-s?M<5A_<<_@b*86eAFrmmAoo)#ysKh6iHv{D%_;Y zYOkJ*e04cCsD%UdAkwG4mh5A1r_6LBTpH}sWp+`$xm!1r6S`a3^oArsk65jTq~1kc zeytj6lkj&zwm@>QYP)^*h>G>+xmq$Z^ju!6pN2R0XajF)zM8OB6&cp^ zz2gJYVV4G|WL5a0-gYN+Xvew=^3<98f>96sn=U>n4bp>r0g6)P7@`hJao zwNRQ(JfIP<=r@|Ln@dus$b)wahvW#G*Gb2xej@V96}tc91%UPj3qBC;$`p?N58+aE z!iHx=&eGfH?l~fN?-#zR0p=pKV+Czd!Y9;{yXDYn%1}!b+ynb_!Hns`ks8UI-9=ul zBfdIK_Tkpp#42z*gG&eK(NLw%}C_msqUrM&k7DQAljxhz$;U2ko;$VUpK zz9|JVbe(5-id?Cd$y9dGds#jr`#0OU%v7Ss?B+1(_^^Fn z#xFAX9DYjjJN~BY zKU_OJugff1cP+EV_L3y2kbF{jzs_Moq?-Jcq}S;bB>!6Ez-5wOu}tf!1rm0O>^%eD z_Tl9b*&+$sE9ebZ<3qDA6^u68%=>7D)rj&<@w((>>2dN{;?`k0IsJ}WpN{kn-@1DO zt?}jrzfmsJ9Nzh<)oA#UBR@8HhJ42xlg+WT@*_N-QuC43ZRC-_){m?c=^#HI*hB3o zL;W9FJsjI`LO4#Yop_?n#{*A@?VH3;r^b00Q_WFS(P_PpT09Id>glvPI5uL~E!5>< zjCVHq;@LPLj0U;rK63XoeoW=3tq#6Nrr@zOpa&RvB+x)0^#THb2r9+nfv#1!Yy?_> zHfoB-Lwp{7c)XE1qK&C!d~9_H>4ba^xCC4S+;F(?0fGU3crXR{;Q>P538sS1RMi8flI(Oz=?LxGiI9i(CK-`M6&}8%rmCC z^Mipqf&cMfAo=2{J*bN30jHaH(b`xH;p7>s;b?ybkea;k(CQJ|AB)EyjV_Grte4?o z-b(&mR+!7%@NrBJ;D`I%!eKEW3Rp;MGEvmzRKRz%9THf!_xR2BLt4 zKnk!1$Oa059W*@INONtQhqVEm0s1M&#|TO)j)m?3|MD&Ws_h}*7|;ov1FiwL0H-+g z1o!|^Knk!1$Oa05a=Peaj4wNj%O2ngFa#L$F+3muhya!XtAI=(AJ_`)1R8-B;5cvw zxB&D6*8#jR+T;g>0kOaeARWjBih&wn7qIVc{6OC!TpR;Bfpfqms`NFcmaU10sz4!7 z4%7pCfP=sh;3RMs=mCZRhXm*i1OV~CDj*Zc2daQZpba<wwDw%oX4VgaM0z z75qohTwD|b+khJ25O5B-1Y850?ty)QU|=Dz1}F!10QJBg;2>}WI1BUuSAZeFVId3+ zcmWYWJg^j41!Mw#yeVQ(c=sws0Mq~n0siNJ!|tjbX49D!cx}yHX44A54`-D&+=1-^ z&H>k%5t|mfV}!_Cg&ln%_!ET= z`Owd;r!a~Qa3oAqJWw?mCJllTKsHb~9{J$2YY`OEQSWK^7dp=c?*VxK*a6%yd1zdI#|Ls7beSjMf2Baf%D{vgwJ%H0RuodgqKtq3paw{+@pbF>8 zeZWCjWGO}z0XTTU#F(aHpoRfU48a2DfRh=BML_yGOe@g89`k<&_mbCu0Gyy)0OMiI zDL4Xr9gQ6efwO`0Vd9}QI1U;LyNr&2-~miUKb#a8f7qiI+I0EbR0Oc6jRyg1;U=^?w4wwy`9gQ^)*$vmR;33H8%5V{Zgd^Z=jCu$+ z##>?PVyx9%m^y4SmXAM{B;W=`d!Y5MH84F!(*mRe7a-%6Tc+zk=V&O#&J0C-V3Eb( zGvFQIs3);HaCevPjR=a>)&nj(hX0O0Xave}-Gke_eSpVYcylB)j(~rF*C6c{a2#ku zS^#((_&V}d0e(OVE!V4nd|+P%mNEty z4-6sS0Poz80|Jn?5J(4#ft|oXppzNS>V{6P zm!ljSkHGbcndlhU0ptU_Lg6>T7(F$;rbFP zrht7=u^eevaJ>}V2KEB_aqR&d0t{Fw02qSr3}FSG!4q8pFnQFoa5f$!tXd3Y`(r@B zPAH!q2dz({lda$txC`Hj!cpKBumkucxF7Jsb^3EC2lxSnTo%}c>v-VUc1#Q4gzIbI zt@wzb=Vd5{JMv&~3J_e20o7pS;KH3y3h*dHLMNfKS79b3#MhxQ6m~!%FI@W}Z7K4W za$z*)fV^!;uLs{knR5*=Khk%+0t=xnKgd1kJQkb|)B|n6R^TA&?Ls@pP;M#emZRUY zHe5uYvKNHKsCWzo(!s}(b{#<|8;u7*-iLfQM8WG|59Fl)>1Z$?TnJ^(;Cc-jPDj28 zZN!7GBdw5?1RA+*;4Ip!0fI58*c!}#BPxZhL`B~7uVHOI2DASJYZ%*W4-l~h9u3?Y zMBE0a8&C+bfxj`gm7^nY9JmfdY=ni7=Y-QxF51`yTnC0cP{$L4`WiB@6#eV~4qYq5 zsRS1$G#&sf26BNK;2>}o@cV`X4;lwtHo^SpxC%k!Ah2@@MzI$A1NMP_SRxjCdi;1Z zz99%F!zkBy!BW@(+pvLMiG@<&Z3ycvz**o{8UFLb?zI#s1R4P+jAR8yQ4Cx{G;{-U z15geT&}TZrHB7fJALay#fg0c(a1C%Oz-j?H;puKOpa8H3xB{Gna+d%H4B8J^2$Xde z!u&;WkOwjUt1!2#;qkzchu~B{f$7)4bok_L2srsLygm)1ONY;7V4l~ZBcOggmQ^NJ zP8Q6+0c`>Oz?w%;9$10taL9quz$L)zQ7Db`;n26(|NZ_8TP?5+XajBmVM7=(&;s-W zem5Zl`+!S;*LOG>0rfx+;Pc-&83T<#58!nR76f(x$AMcw>@X}0>;o}TaCrE4@E4%b8|(i9 zF1#ioF$lp1s0R)KRTb#`H&6g9U(XKpSuc@Od873RD3{fNOx?4onxY12|TO|E>c8)o>)B1~?830l_a|u7G;r z4B+%4Mhp}KM}UPeYZcH57`5mKs0S_pJ}`eKun#E26tn@ifcTwI5I70A)M5Tp@ZV10 z9N_f|It2Cr{Xj6mY``I42#Br6h=JpPQv)mj)Bx9jh*zN)a0D>@9LfPzKqp}Af^tAT zZ~^ey4dsA+yRrVS;9}`(Xb3n9c>DsE02+ZyKtLnr6F3N52cnuV6~HmT<#m`6*aKVx zB7O-a0Mi~Q08{~;fUy?}0QJBHz~>DN1UUXi8Rp~P;6y+R;Ia=50=s}7z^@s3KnrjU zNO=?10(tcNE$AFL0yyo*pnw|SEZ}hfg8~`>)31>S)BtD8@Sn#)WCBM3 zr&eSFHNaUQ;w@+j90#2K9eF@KZ~^c+1Tz8$f$KojVORz@1~|M8rv-KZX8^Z%Ffd>j z&;$6jL3yC81^-D3-bY0z*!*dH*g}L4Y&owAAu7ACjpnEunMpfI0tyW z2Ok3V0R2GlZ{b70A;9ez+5y^tTR?mV=D)E6^9%&Mk9h_T0@s14|G;Q~V}QePbOP)I z&H-L0&1q=?k!eo4JTi~Z43@FKrwI#xWov>RL;cI0cU}f zFibaK3dj7f2*>;a#{sula1?+)DCbYe10tX);1-E_1+D=6V#2cke|mio;184|Rbvz!e~NDFzIj0m|HxQ2;m# z_$|Y-0WJX^KZXLpF~Bbc*T7jIDiuoyaJV0h0H)!6`w|1K8nQ z24{1H`Jin%G`)q}?rd;9G(7}dUk;6Nb7Eiw@ee>DAQ#sckiHGP6*nA>;5J}~FHSe$ zA;1siTE;^GZ+Q7s1SmM*j%q09fhCy)hXfp;_=+<8ckahH8V91$G?;8EOoI^=Ld#>o zaWrxW-n|P-UI5%G(Kzm2tB{t>=^>bBlyC8YWx%U0!w;ad$0V#CC{ptXrU$r$eq2^y zrC=~E&Ioj{x^ofcA7xE_3G&CwCS}l$^V>0gz8HW>n6v&{xb!Zq5GwimahJsT zgC9$JVvTU?3gPC7Dh~=*JSyy|BR!ue^&@>Gz33a^%CZ}B5sBL=?x6ab$f?>ujv?}4 z+)Q%*aIJ8TDpG`b<@EEK9==J+cVTUF`i-xJtKSy3{Z-gqr?jlKN)o~sNkeO=h}<_U ze4|m=R~5}!rsd`cx8_TJDwY#>cvvmkd{g9vBf@bdT24pahZTVGWv1N-vs^JpO`Y|r z$c{6mgMvz}s8e(kUeD$GMikfdB1BM5_y3jf?w<%3>D0wprJg(X2rie5aE#AC;1pg` zF!Gnei3mVkAS+qqCahnU^Zq7WizUf&IPPYc>*_>4e_Yt}7s3PEl{ZMaqJEL1cZi&U zhzS43?@yhOjBcHq(?1t^FhUCMeOeoOMfhloaJYK0qppquRiW@9t%!R!PVd$Z!v7%Y8LC*S`bPIjNk4s7IQ4F+ z=R8W}9QBpcBVUTh(Tv1gDcFGc&K)&0h@7b2zg-*Lu0Al}C*^MH+@4Pt`C_EhGpkP} z;=@XAujFTvUw2+OxdHa)jF8_7m#S0Ud|E1aYs0I*lloY^xTp3_vmPfG2ZicKV);Y+u8p~SCKv8n}5hg{_X5Luh67aV_fFT zKji1kiQwPuk)OHwxBrnY8ja))e5)GqFzmH{7|B?>1F;Px4>c`RpKA1!*)B`1oWe~{ zq;2_Od@-7y`@ho5G`&bOMtbkDdr(>$I+hy$V8_gnIzMEN*x21vW8bRqj>@9k+|0)} z=h>&mWw~*@%w?9ShW9n6Wo_QP5qr`|HPiNR$-~Q^|DU|cnwOTjY0F5xk>4J=s(IYz z57{QmNdAT&+P(ju7Wkv%?T+&cGt)M1l6QIDR&16wfX4Agb9B!?AgZ(1LQN}-NoLDC z$5t2z&1MS~Jb)*!CQ|JKc+%$++7DXdWs_eVTEDq4I}5KwXTO3;Oz}90@h=f=%z7dZ zZ)ZoktL+X49Q~#INO5~QyLbE~8uOrWzvsaTQZFx~_=yZS{)nFjQ|W`o_{jH??8Did zg&(%sgvCBGojFTnn47cKYmUh>GK^2?)Ps29sDcI`gx-DRx(f6+3IsX-%D(U8)zcKd z3J)j6PzGmiqtaEzWXr(`^x7)pL(eTzh~GbL8hIZ(h60zYSdC}HEVOsEG25I+=S3P` zZA=(-+rV4M_aQV{MR5-qKQm9K6A!_Lf1o~)cbJ=Ky)1e8y=f2LpR^*;bpObRz9!$F z7(Yh6f}fzeg{psI+y^@U6JxIV8rjyMM++scF(!LXw(nOMLoWQsWZE?18&6RwvJqBx zGkr%#nWE?-(_1vW#+YdFvC)LJ#)Jt0Gj+DIGd5=C%4|)?Od)^ST4S6gU?y!?YrKyh z10TkS*HH4q(9J@-S+Y=vP~XGGl+kvxvj;4+TgW}lnCf}fN5+XMdkm?CBa`|trKTbH z)VNA^&L(~%><`)ajVEN|r?nc=jG315Q2H>OR<45!ZKcw6@C)aQ^x8TsjKg$v9cui7&U0$OSo&_AF@bs9dgCi!mp|L} z%Gr{em65rwq!3eMe}v0rdo`p6q9XC;pB(2?a3M0cvx*$wqbDI zfo#~Ii%w@76U-Z^Kif$0bh=CCLV}EG-H4_wl$rxgt0^Z3ql=^J9OxE9`&n|Mt{kJx z%>!$Jr-~!S`ofZYHWy_+YWP?@#?!Np8lh!Ap8rrH7tsnLPaYez5n}aQP8v~) zwHBQ?D7o66hgyvjXmqaerX?M}&X#LTG2@xITx05;>Gse_zs)t=X=0ucVp-)$OY&gE ztCW=oyL?BLAWnTF&vY(CjVgkCYXNO6LIXbc&^Ihc-9ryNZai*Tbq{^~I5f<^heDq)Ub5`E zhu(hz_BwM9Z7en(rv9-uS2|y8EazW+3oS9$(U2Qbc9$5BSfUovolkNb3+d@6(e~Db zWO>TiV5wh7k3NO;wuSV?Q^s?ao`tk=EAkx@>6@*{_erFON}+RXB7MlbDv@HIHXg9# zCelk!V+=KkG`bAs4?!+7Ua<5e(v))JX^X=mdc7R&`Yoaf6~;z#@_{CG6~-n@<|4ZD z8RJ>Yjz#psGsfP#ERU;sjxVD5KSh;Gi|Fm2LKT;LX~8z*89L{Os$JVqbp=u?QKIl( zdbbh=+j%dAK8w!V?xnY$g?#Q_n)Wj&b?aVw{bx|#V=;}YLQC6I$fOJ7W*x1;>F z#q`p4$a|3goUy=?9Yo8XgOeUyOwTa4EvC1h!&DzzOkX{R5Op?*Mm>)yzm!A|F<(!j zH=jp6(-QiM*<}gM+JSVhCG^Y=7%6HAeZ%tNCA6U0c*wG53B6X0i7Z?~mKV_e)+IFO z1-Mqt5_*n#&l39l1>_$=`4^2vmQJL<2>tuf&WlJl-A519K=!(i-m8HeaUV^og$Y;P zM{m|*+{O3Nq?gd|&im+vmk@yJ@1wWC=HFBIONfX3nmj~&91hoK=Bu=<|+=o@B? zW=5TTH18~{GeRvEV?I4im<~+KtAx+m&XH?9>g_9{(e;Km{b{y?qw~lwICfG-J({*q zEsEf2>vpCWsh4RdIX9r5&qR7R%wdWpeK6f+X}e+c{%Lciey zh{pW?9bP%Fal$(Xb{k8~(dYEcn3kVXBnQAB4&LwE?+DZIQ-!x@`?ju2DEt@3)E`Z^ z-$bTSeET?e%KwGoWH~p1p8SRJr0-(v(|oi@<8w(`-lmONc$t044?D+Fa&I(#?OT=d zkHt0?J<`?8!+RY=47;h0Jdl6Q3FUwggP1$Emvz)Wi66On5+RWTzrB|6RS?PV|ek)zxXZYS_;g!eTU$fG< z=6_5WveJTPV}=DkIl8eK>)6Gh*O)yF`k2#w4Em1cV1s77i59~Qx}P}$zy8e}Wzg%) zu?Bs}9Bg@GJ0QgBCC^HRw_16oX!3USZI?%&QFgf_aTWqgv2kra^OB&|khm z8(1zzdFFD1-eayZ=rVJSLF4v=>(Tyxl;2~}W|mu^Kg(^i*0NiQN zvI9szXVB9JjL5t02vy5mK?j^MWYAX!5a3KUn)Yk3lZ_r`Hf;0?vxkj7X2!|=&V%3p z8_j19v(e)Ru|34w=so5X`a98H8|v>HLk}?@8AAu!ux}nmd(53nqxdGoG5cytOIx)*zc68a;fjoPrYA;LUJ4>QZxBD^-Lt7^ zV%S&sG1@CO(0?Uf> zI2$Kt7uz;uU6?@cmOD_jCGqf7p7&bFU;A5eSM@X|9<=(Cfw2G z-Oj@InOtoAslt^B^woQC@ndG1@LPn36bsGyEkel(3q8QR%0iDbud%%I%5RO)<{3Wt zR2&K|U7wS=V$)K4j9iwHmj$cft{7RfMm%!{eSFLaylcZqLiP-@bo^sN{tTL9PgtT= zif7QH9Y(@kx7(rk; z2GQ%xXM*Tc=5s+b>O*i(5Y1=q52DS?*MjH`=Aj_^j2RF1jQ$Ah6if@4jbM75*&~?V zWcCTB&zS>)X-p?LESMHCM+MW9%<;i=fO&B+eZia(Oyf?2R|V4&=Ja5C`ZS_Ob}+ro zoF7bIF&78Zw2$G0TZ8HTj}bMhg6S3Jox$`O^O~_V>G$BqU|PrA5=?I}w*}K>=HtN> zdIo$Zm^L$CK>N)7!So&T^USK{KLZ2{S3ZZd-1YZlGhnR1L&}+<2q4Wi_TPRIC2lff2 zN11~|>A*SMCN2!6(dQwjgwp-YYeMN2=Il`VjJYtBCVd9R2Se+acZAX#%=MvknR!nr zh5je_U?^>7J`zfAGoK8l@0ibq()>FmBPv(3Mj-N?SGB2G;?=i2MNtQo>GiTBw=KPuT9P`$h^f7bQOd8Vz-Z_&VU~ZgA zuQIpHq|bYBq&qT`X7qwPXVT-$7iQA?%vWa8=r2%iXeQmy><~t;FfSZSpD`O@H0jS^ zuP|E291uotFh_*ZW#;%W3cUng8b+I$SB25r%$Z^I9dmvd&F{l_tHS8DKFB-6=nIxx z!f3%?Ah(6lv&_fC=)=Dt{X!Vc_$%ubMvwm$=|f@kF0(^8ea~!!(}I4mS2#V(91u<) zGDn2d=mFGU8crJrAg>Ciw+7HoHtMllhW%X3=|Bp-1p6n)DUw$IhZh zze4)rS#*GT#Vq=UIeiw*`5K&ydS9dbwpsMq*C@Yp7EQZ`^hVTUZb3cfHq^U@a%X1I z{evjiGmBmuMEaFk^ab+}>U{%tm`#srv*|nL;Mp|)Z{Voe^vvIozj!u% z%<_uaH0C-ueKtM7oI9IdWiFmgpEGZpO;c`wYi831=3TStP3C>G=_}?#vuW11;A6At zapun1^e*$c+4Md0rP;LLzrfdK)3eOCX48kvP7ySE2<#R?_cQxM&@0Tr5%d{zR0K`B z30@dM>zGp_=#87uJ3WHFxoIGNPNiwz;XIigLCd~FeqjW?#9SUhpEB==pb7sCu8*LH znfFA{>&you=r7DiB51}f@W}|;$b2?}-eT^Fpl_M4L{Q8ycqoFNW_F0A_nD1Iy7POm zS0pWA4v3@|m?I+TlkeI8B5Bs&!I_ct(%+2*)ZbyaQt#i5xt6VwbrWn@sF8IK+a9(w zM%KLtsU?yuW^h|1En+?%NzZ|4)p1liZ??^~T!^GG7Uc9t(gV!bBk5IU(;WJo*<}t* zxx==^;x&gh-T^sa4!y-3F^9foj-Nv@4&bG8=xH$3{|+smc0kLSbLdV-<8x@*kC1a_4n4|zVGbQ&?w>>7 zFkhcTbDY4Yx%4En%UpU7OvRmO!POZp_|2t7&d3RyOV2UK&ZUo;7tf_JqrfZX(gV!t zbLmy)+`05QbMai7au;~pT-v}~Gnd|E-ZhuLV%|5GX1Rb5&85egkIkibnLFpw_sr+! z(gIiTrMdJh^R>D3A@i-dGMbSFug;Dedb4nCl zW?mCTp$0fRiZ(MBM$y~MwWpW!Wj3E;GADQ>Yu*Cz>`h2S?M}%u&(w9rMCyn(q!y ziKb_m*F@6?%-PX2Y8<#QnwBw_N7GBpJEG}R=K5%wFdn=onjU687)`G;ABm>FFrSR3 z86M!X(X^4dCz{@3z7kE}G7m*lj3>O)DTbb9_K2ZRm;+*H60e4c7+S|1A46|2FO8wg z%&TH3bRsx2hBh4%+(HqQu^XSgW;MjSzfqBI|`d~83rO%^LKH%JWw2ZlU9=+s){;KBD zXDsiWN0WTPjq_+7a|_yIZktD!nUBw-&?(?E^Jp{kg?aQgbN@X0ZVLLrFE>0i73>j9 z?@mR!A3k1V4vVD)e$Y2QmR?{^iKWl{kiI6CCi#Q2V`&|8VJyADTpml8nRmoe=rr2` z^Sx9s%{Cr?6xlw_#((JBI}I4Z5Y233@}R6hoApPH zq4vK9(t#N^&ry7vRfk(LuZi!x7HAt~wls#)n=@?FEIwZJ$qd^Rg!Om6oniax4rjiR zv;U!u-?7S9~W)l`%k|4bK?6cN}35(ER;9XHf!`9{P81a@vWPM z_RqABxqBwGv=BbD^rX>YwlTQ&4YTJYh1qj*!t6~{3$;pOmq<6Hq<6TzLL!qN<%Ofd zZ>W}~Ez}+kyZxF5!qM1ia-M}rxI+H3Y?Cc_dC{U-wn?yU)-2mp%czO;+$`HP`~kdm z7A%%WwX-qIGoLv+y)xV8WS+yxZsa}NHrD(uh0nIlc4@+`sr~aq{JEQ&qHJ#XL#OHX zOL<=427=5FNWCu9`w{=Z2`bRnpE!Erm5B0TYzVnVK*czUi?Gc$KS%{E)ln0qA1$;$ zftNazG8m#6Unr0TFR36_7cZ1e|g>(x|1T zSv^@<+Nd_D4Gy<5?WS32X}7vsS=zmo4VqfFvdq5!v({ew?1OOL`@Y}z`THFg`O-B& zI?y}({X_?9JqY0r$QgvfcLaF;KGT8raTw5%oQZmZ^RkXKI1%4{JJRe#@Mm_UZHb_- z>_`{+dr3zcIvDykbflXHgYMOiv}-WnM}X(=AHaVtrL)``C4@aj$! zF$DA@JJIz+@I9py-7^GsS9YR(L%_ed6FHOcy|NP}Btg%XPTOxv(tioW()!F%y3r1^ z3oy<5@1u}x1luSv8$R4=Azh!X zM>tl|EQa1fYXEgDqn#YwMMtys$hNZ$6c4y+|Qr^yu(Uv4C%`vJ^I8nMAFT z45X|vdZ=s1y9!-9y$-ATYP$4lP5^2gqjz&&{Ry4M$_XXkptq+(V^Bsq)6p@oBAosl z0|yPG_#9Y)C+~8g_I|3!(IZ{{m>&{l25BmHI2DQcB#vP}S`P;2$s~Fs2bp*#i7w_K z6Ms&keq#~-mPDmv_1?}4Np$~M;Qfcv2mBo{l$^QxC}WbGIOeG$hz!)bb;c(DBzmuJ z>xACwIjkvWB4fi13@F506LxCh-<6nX=4uHKrD3*-p2@;nQpa=kfQYbA;R9;T8iFNZ zTGV#uIDM#Jta1{&kElBE03zTVS)K}_ahFH33Vtom}#OyOcPTc*z{#Y1?W zeLs&~XL<{-(BRFviW<+-Pe`(fE2%D)C=YIk!d28vSL^*K^4E*F{#qv)OT~^F>?cAd zsrq0xb{(*L3i|}ggBxZF<)x1NQfxJ`@olBC1*kY0sR}{d_a^16YKyj?W>O=BZ_=g$ zJ%W>EUxD6pnwcyrRm2__7y>%cQuBIAX=P!VfGNf{rPmXQrL~R8r za8P!k{xMb&ub8g)3FoIcdE*vR4D2!o*a4oPs_Cfm>XXd)Hwdg%D+JS8pq)QQ(6;IN z@Ss*=KV`m5LH6;$BK>l7IpT|S1HLt~NKX!T%cLwdv$|Tj+5uWGsdg0UOJeiPoUfVH zP&=!kw5G0#?-KNsiF7I~Mwz&dZbRVAi=s`%`b~}>wqH>KcSTE{UW%ILGt$cgBH_Kg z$^$|j|EA|kkryMW5J1;lY~gcg=Ag}`dbr~x6#{{ZY<-zNI!F_n$$71@ju)n#bfHX-!Q{QG9L2GgvdZ-l zju^TRU(OFBXkR(X;06bMSFR_ucMdmOiDwIstCzcGxDco&U?@2%^-y<@L5ifXvbMaCw|813hm!=ZqJCB_ zb`@&*s{E9REy65TAiQ|GDxS)kId-1#!Ag%nD4y1$u)a%cVT-FKQPGGEvx-KLT%f~j z?oLbfTt#D<`;<~$Ut3sI;(6OM#NrVMFR}9#Pc1eSHF%=bgcr{*;&=-ZzO1gWw%o(B z$4WVYP(0Gyb(B;MTRyeSEvjy)s}wXUN9UP#@&>&6q6*$!Y}J65K|GgNN16 zt3p-d@t&pLxOUo5vl(inW7T>>dofaA=Wg4bqH54wjivM&eW+*GWi~CZK@sn@ePfM2 z$IlT@vGs_?PbXyMRZ%}jd939DKdS9|bELpVQlW0^-L z=`HjcY)PE&p|QMC@oWDUI$@^tRC|*CMjaaT$neiC)zV`X>&KJ=GzKzL8{lSpJxVNJ zRE2mVtpbU&&6l*PL5~P`SptQ-Ssman=ostUX`yR{%PDV` zD{s|)OZHMt+nzaF&-07_^$(eih1gL)9lObuzPBz|@JdD~_=9wK9y}({g9cVew4D1h z4~|*A3dLp*uC!8=L(g`Qai%Hx7!~Hj9+SW_DYlI)_?5-+RCbfzA!4;S>4H&4aeP0J zTUpZz%hEUL5n-Eg@Q|x&Zh6XZBmiW>nC-i7(t{jHCx5g&SXJd#9DbL$ihA_5<)%x| zZVosh+OQ4_FnVxO_XT>M<3n1wKp)-ZmanA1s_ED|UnCQSK3kv<3ZEm7IKT!`gIiJR zLA@60piZ^01UGGRWgx&?xl z(Q^o#ty1ZoTl9#e$W*%ot9G-v_%cP=e_;)>(oz~uQ|zsF4Np?at$Jkat`r-KY9L^< zis(JGkjXaKHCu{3WJhQx$db<6rCF`Khpf(ymnl%G47`i<+w2DBQ~YhvJl9LJYT-R1 zBdLnX2H2x+)x#dLezXx}u90?WTxr5BR8(K56Ceru$WGEwS6E?If!n*>t{-v^eaRdv zF7IN{QHC7%`(l91HD>VmioU4L$Cmmj@hYp*3mCQ>VxPkgwjNheMgUpi-Y%*7P zVrySTVI}shpf+LicRfx5Y`^g?eU}4+lOD_U$R525rA=Alh)Q8bJ@1yusLthS9M-L( zu!gFZqoq8w{kG+Ljl+2;f__VGdmM9|p$J6)Q1jKNPV&ccp&X(Pi2xX$GpLiXt40;BGtR-J=ib^5ba>Q*EN0 z^aL=Db9CY!J+$9%h$UxgN%icCI&8vWi#bz{oA5#)Y8&dTQ!+!aguNQWoa5L+wpurw z_%2lC_GjLeg*K;6kekF ziat3i&$Rgx^Rx-ZR!UPJ$CA}CJk}p4+NF*;WiE#TOE;JjbI-6113d(JcdwCO=`=Cq>O58?ZPRy6!! zhPNVPEy9A<^z~Zcx3s1g9>I4^8@l;Xge%)n?mECfYeP}%5yk}2cO0G!ply!`oj=l=He;FJ1|&VRS3)=%kmPA!mbdJ1z$0fF?wQ~GT5IeI*; zcZ&<`B6D;^dR~5RUV3gW&PpLoE1ox_(LIprp4OjtP6@>RPW>_GvOwCm0aH--29k3l z#H|aY!5g7|iIduG(ie8LoGMT_)qR^#P_k*~CbTdk>7z}0v?~wE$v(vUb4x4hd5f!c z;BhJG&%kGfn_kGdgJX4~(sM%IFiLp_K2t-5pmJP6^PYj&?sOjmm(a`4=+|NC;*TFXKFiq$~*EUiUmIlL(CHL_L~6w;~_)l2w#*Fw?zR zH^OnRgo(#As0}}Wj&0UMBl0YCtrxo3K^?cCEa2`llO26r8+)Y#dq06!>P_v!kGH`- zsILazQa@K}7x?YK51_SMZ0hGq?G_s9-?OWqE45pAsK2-cQ^F^MsP|SqBI3B&O&Oje zs*^NT<6PZ`rf=1+O!!$9={cg%)^nyHHrfRf&gD?lgz!jJ9r$BA>sH-}3NUjF$C2u1 z%;vq9T7{bU#n#w!TlF|s7n3F*vrn?1d(Q9uLh&!aIdS$(2J>FfqdNL43%f%c77H2m z6TQtr5}kShezuG{yr{>u^H=rQP)d9e;GQ)3MKt=8+e`o8gQ#;%Us9g-3#|f)<1%`V zgCcqxe9i(VefJ_SQ>dMebDfm^lHM=sq)dX;{B+Dd=BHuTHak%VeFt*s_-K0Mr518| zr_AN|fNwUJ|0Z(z&zD-rWkw^Hd%tXRGJ7sF9=SXXcqejk_RCnR{24o(ncmDbE7Me^ ziX3~7@n()$85W1=G$5UkTNhtOGuB1l9=AVL6{&iWPZRxf8R46KVNQ%{ zMsFS|(HJpeITefZmr2h@|O(XUA8?1Vq{yd7{B`V}EyhgydOv?HMUn^)U10SV> z9CW5{IXFYFeIlwW*DDD2(kcFWg0lGQc5;1+S%QC$-oEG+eUZ~~5B>QXz}a;07y>u- z+68R4Yv|%JOzi$aQ+L7D`q08%5c?91{ahdC{9!cq;G)R=j6*%IV`=fiXd3l8zWp=l zrq}iR9seYKH<%VsJO^uN>~6gtBh9_L^kresESQC4BKr&120 z4sT%a*vmwt8m+v(q^^c(R0_RHDIf{o=8y@C2(vCOvKCT>d z&N;uduwG8A%RS9p#lJ^cZ^F)3sOn99v@-`s``*+abbLeO_v$zB8t`5uOBlVqSETD# zd(oa0iOURlbY9pntG=`rLQ0igT`6`S82+TheJGrUxBv#LA2pa|tEuFxmP%DIeuHN2 zgWiA8Dh__!zI7k^bQm`u+mCoXf$Ju@$tuF+92C?m@!hDy+en%BO*Ee(XDgUQ75Z(I z@-}Q;L{r~}+$pq>zZTLO{wmqN^KJbh2Wo`z@8Wa#_F3;@HrzQSoHiZMd*a0DM+e}$ z|EBW?u-XS`+(G?2=YdS>_#S%Bhcc-r!oeSAnk_+H1J1e>a$``Ws?j4}xSMWb92ZKy z=>~EZy@x`{_ce);B-krJqVON$1S;^(Bbl`CJ-yUAi`78Kj_MKa>s7*?t`KRH%BQ18 zisQ}V&`Mu5uke(Kuc@jntScGUfMHeH#N>VhV2dT=Ycyv_-&D*TG?b*zE~#$7!d!wj zKz>PmzVOUV?eBu(t;_nfau<{F zM33rY(NB={P0!;LDf;e0C-t2k6xLq5URm-cH6Fp5ud;kP6&}GzvtE4)rz(LK6_Imv>SH~$ z1EiLz1j32FkM*ejmK~!!cFd})Lng~L7B2-BAb*!{b+5UnRw@h&)ffG>pj;7VC z$UI@musW0U#YcKzJIk^ybnqiRR2NIiFjO&?)54GRs8LozU1lkhf@GPQ$QJigxtVFv zsNuI(tRh=YE~v2_>~{L|Lriq3s61ksISwPVhFP^TE?nCBDGAGLG{v4GYvt1lOS>$s zoSRqy}1UD=Rc@;%7#Gw{WaM3+u?sYbNRmpI%=xEjn z7R@ele5gmdRg&B+Sw`X#j1<0`g8T3aH`s$4jzI;ZtRce^4=%UF-bA!;Ht!$$mDLT3CRxhP zsGoodkm6K6)`uw^;hPOAGj@2d1Uxeb+aX_xD3twC8;OiK^zQEtnnzLc?O?urP@Y0{)h(0EEI{5Dyo z#AlX_aSh6C(FP=|shUqL{_)k=x`TT*=t5ejAh)o7fNHXTFsUj_N@{^AR_XSegy&)N zN<}qVp`5y!qLTW0WQA6(eC47^Q&}$^2`(*~T6XCmXLM;a7DuMQTU6msfHf` ziAosGFiSdt6}5I29&nON3-CZLG}1jI5k7xq10%>Elv!Gjj6A~KPmBjl+aL&k~iYh4e*m4aqAtz-!Kf?3-~LY_<$!a1`O4-R>1WLLOY9{ ziGwgHRE=+~Ew17W)U7+eDB2X8PF$(VG0LyZ$j4N2>pQn1YvJ{U%(X3Sjhb= z5NeO&<2gSlgrH+!E=D*V0=wgPJB+v%dRAlVYZWG``nN)zj^FqA{fwO)`Iu=qh+msF zXbcA>WaVb1^pDrdLNNVHxTXT(Vd!MXj(TQZi7`u zBrdy!u!A5Pim)#X+5}}k;_pH*J&50J5dI!uXxg+t5Ua27eIM|DKuI_piq8h}1!hYT z{)XT6FlrzoS&QF1@a2I1Wklj%ZUFu-S)h!t8^B4f4B(VuBRF+9Ta+=JQk+F>6sMS* z^GI38F<=W-K3P`&7e?+x7O}4=KVctuyvwEj!g0^ZKP^J_8kN(rtca)!u>5Tk;u^DGMf}~`>V5ed4XUAb5 zuscuq5RcEC#F$9=OklLfZ#b`&-!xll-2b;?^Iw&khLG-2b;?^Z!|?asS_n z&41yL%~f3X>`O zrUO6eK`oGP{y}f$h^8Jt>sR?}J)YNCelcx{3ihYnKkIFq<>Jw4$bZ0a@-k=;3>*HV zcWjn6f)0JB`{Q~e$1h-5(i0l`{-W3UqMb{pn08VCgoC!27XRv7co!;Mi8MO%E7B%!r&huc{@*o(tTnUEE-}Mg7 zGDPF@rxUAT(yPD2q(g00LcBls$RoSqPlS47(# zz8QX}!lkh4YA3X8=&O=6IxM&iJ?{)|-oL|X!38~vqWpq`{a404F3Lm^kzcUSGS|@` z8p3|yD+QIbN4A}oeE7%{l2A(H`m$eS=+^lf3 zxGkW)t)Zdd3TO~TXi}RNk|~lz4dABrcRIACgIMjSTS3l}(O0;CeLxTs!(J zu0d^mb7j(fZ8`eks#tVy7wog(#nNrs{6(h%0(H^V+!&aSwuJ?vT+tkeVq`nKz3 zI>B5E0>QQaN~D~qMt%znZlRDn+g?t&9fEuM?@xeXi#r52G@B)63eTjJjvVVpFZgqH z(^3s6O7!qfaHYQ8m3zG2sRhH$LeZC8K@c7DH|2YqgIY)*(>(@ITOACGN!4Tz>%lEm z-^cxfThV`*ydVlGDJs4t!7UVpXh#E`twbVT2u3016Uw0V1{lJg0t4#5 znayT!H13mI9tyH`Pb;~oUm{wdy@?JkKvW+NLsYwOWVxe)ODQ6}g$yvGx|L>yLqlz( ztoMp3Ai{T4^JwAC?936IqM?$jh<=Um9pA@jY$Vid*#x`Lb7(2P_fpC#2+z9=!UsJo zX-lFpp@OaGy=$i5sHF~`lGA+mMASC*G*MIT~r zY;c!mGu|}GMGo#4oB^5t%BQdNa?`hoVwLCxYYE`XWAB{aEf3wD`m z-k_Jef?;={V#v6>1$T*dd`eT<-$n1E5{my~WB^bVZXW zs|RS4P1=gc{QCdKXK`s=Pq39U8#1lbRgEn>J{e87eU-Ly`lelll+uRCh?hpoOVE7s zN8{OS*VWLu($dK%ynOPQI)kW?OaJA)g1fYuwB&u~a?<+*7t$lWf&+c|*%orF2U}oo zu=TyOpWkbO%~gdcXXr$4n7iSQ_nj2fC%D>|svJt;R5{)Us^)?rica)}h1d7> zX`xdaj=e3x2JP>Q)C_}(bi8kHO|y#3q)4W<@xfQMI<&N(pPO||qpSM0Px;JT z6v3{C`@yb}kiS{*L{(N+&8qa72GJPfRP~3Zx#0R-YWkyp3#HDqEruo!0LKp4)`{ZB z!M1M(v}BtcM;)S+YmmfSzCsOEdrk1HW@8?WeP{6rAbX$(x)22kzRSZ@at#FS+$6Xr zXi=>WY{3T8>T%Rz5Eur10~>PEz_4ym3yqgqeVwG`TIUuEZ8P5oQhZ zA$^+&HM>tj4eTnS`Gb9@<}Gw|Fc?ODrx<2j+d@Q4yL!-b>tNSs*TSwt-z%<(Lwu|G zlGff2t^-5BRi3P3Ga|`%Y*OhgbG@1buH$RbVc9)2xSWOz4Q|(L7Md1bL92!$N|R2( zI54!-X%zKZDebuz8t%Og8sRZ zK+~X&Xv9SJe4pYw-AZXahI>>nj8ot=A_U(lp<}~ZDDctFcKMW$ithEvhvxfB;R*Few!Y3Wid;X+#SQW|>N)k4C_*LurcP#*rVZ&pR!Ds6`j8=^XZA|Smi0x?K z!=U@AF%z^qW`Y)$`;_}3&CCLAPCaPXWwp>7jifVvm;{-fjUm&>;YdD?o{Kke`Sg4< znmciZ8Y*^=Fc~9DeYfdmfmue0Ibb`!3C=H)^ZlF_Jk!kI@zi517zSlPj~IEpGS)Z4 zAqvSw)35=J_jR&qcs#eoGATOMIpi1z6+6H|eaHE3eX-f;!QjyQ^WdXPo>c{Sex7fW zu#M<3c=iv>wPh41*d$b8mE&6|_GSt%ptr}vtiI24QZYlp1m9!9sr1?eFsytl5UvV_ zs5wPAYV0#J&0P7Ao|_2HlMV0}p(<@sb4fO~TW}1mp9IR0Ill_Zxy{zWW(0!hzR93n zY|@s~)z>#C6;rm+*tWFvde8cyO6y8i~a?G6^bZc1RolHb1yLx}v0{WY9pL z$!z-a6iUel>$)sh0@gXrT4K`9p+C7tnOOjz347AiMF$F6Ou^yQqY%Ou&qf}ipj?07 zv`!u9^}-fhCK`L58hO59z;q~B_Y@Q;rsyfC};) zC~85eOk}@@wL_~?BI(28VBMw0x)U?{rw;G8D1OSI-hHRw?tm%SO*)9&#lhiSv8;<_ zZ!EN8l}w8XiYd4oq37R2+N*J^QX*(jad6mwA&zF>5FF#bR$J!3D#dWF_oo|6f+NC9 zATLJCDPXZf%l^55LsnZ$J4=EiUGwoLZx@aU3M#0>-|^#(cj3C>^O7hS6bW z7)hr%$QFUv;*H0W1VaNT2b5P7DTbw#QU=P`X(|W*qklW#g>Dz8zpg&M3WJ?_~PcvYz)cmwqW^2 zrcx=bV#-20UwC0m`1_;$X4}TuZ>fsiPPgGxOIg zhTSxiDSyGs3<$z#BL}fE=t(D}ls`GZwKu2rQ0jYQu)nJw>&I9^6cVvyti@gXVYZUE zgvQ;&0>=g_H5%%wrGy{QPXcwtz zSSYq|SFV0@g~FxLLY7iRYY@0rfHKxbnfi3bvx;&v9cIddbc%x?sBJYE+QZr6Yz%+D zSd1k{skRrTfCQd7m4k^ij~Ob!z&_9J!*)*m@QPOy!+o@sDPN(39DGjSGQ%%m=xt;8 zjk32WhRZ3o29(8=#KDg=8G$P@M$-@tOGB>*UMW@#3ABnSC(v^o;8Y?9jWW2K9JTm* zf?OQ@jS@K!y|~a=PWAvK3`7gLTtxbB-mw^g#<=dZoT(SkMg*=akRn*MwCP+Y4x_<)uoNc^fR9u~DRP*IXq zRq_4o_nEkOgx7BH=Rnx`2pwZ7ztMRFE`E|^F)*ALO!CKDZ7rX{tvYp}EaK4;Cs73g zSKIEI#`ZBo1;T%=8x6OUqC;stQ;wzAILM=;%#c`ID+X+KXUg|!AqT(H8fM^!wXi!uFa#p(K4nt6V(3SQ znQ|PR;-Hk;&O)$M2G3Fof1RSK2%LfaXxc1M*1_lqzNtf1ZBro0 zUbTs3Fm6U%bHFfw5)rtPP&5?-S5)inxSZQujeUI-&1A}Pw48$)+Q}ZiAJEHUq|6HC{0_GUa?)%K`4wVg^2QY-hMNzj2q+aGK77gpV(GoR74Oq#pCZ z(J$4To_9)OGIf z#ETXCNXN63v=E3Nsqsf-dF?`!hO0Bs6*E=qtaJ0TVPuK{?NrFm9YyHdm)rvu^#&VKiS3#uHKc6Dp!4Zk=kSYGA3a)6Ue)`#I z#nG`4lJLRO7-s|)_f)vvRON>H>u4zg*EmoPw^7a?6U240=D=?a?O{qWpCr?DGo5FM znB5KyWSYfA^43J!dRfTu?ZdZp`K>KI)H5WUV&3W8s&ipgal(Kp_`klSsI0zsVSSZG zhf9o}9j45v7r5RPc=t+y=3WA-DlD>K6uZ7lAAekwR!xrfUE=x(#1y`84!c(CdKYP> zgQpZ1PnliRP*YP`KLvvEHy>f+m({8(Y37HWUBar4lC!Md-0&KIO-P&xyRoWv7T>u< zCysXR8Y}54araqKX_-_o9s6ACDoPL)jv1wV)43Zx|5fL(R<`fm9YR9w-#2v%vEi?? zqb_=(x>0hFowOVEKH0ewjR^{AYiH`&L|>)n`*6pQ1iJWDXM>La*tw0#)c8r~E5vu8 zj1X?*z+F(dh%v+^!0ja%N3ZQrlNc|9VsVFNb`QBiio4Iz|5B5=I{N^XagK6k|V8x`fRbe1XOsKd<&vZx0F*BVs*)i#C? z)=qg!F^B~z?$cmZnyXOk@Cv0ZHzSP>)OgAwcM5Vs2lP-}!E~6(XVWPT+|>3Ks5w%L z!`G%7W~eN`gFi*=c2f#S@ch$M4(_FS%y76K{W6mQohdCOZqaXw;Q(!A$|H1;gAMd8 zGwhm;tTGw66S_S5Z$B!AJ-D#~l$|MwgK(ORz!g0Qdx>ofM^Q`{Yf+cXoQxAgDb8&A;Z%i(Q&eHC}6S;aJj4HxRH`sfQ9yBr)p;aU)IwE2?mWRAEmJrfPg zQE<#@RTuqf4|7bUV;sz<^US#HOM2vP6v}lJzXH&$l*PebszBiU_)F@z63B08Jwtz^ z*BBc3FZyH!bn)e2s%mFlWB=T_6M@D~8c+J&;7F%l2%HoDC9SIi2lx3@eC*cULXFG; zx8`62?PSKC|DtTD!YsvEhJH>RRt862$+xoE#T1vs^vXczqe@wobKN-~qy%X)+*><#Qmtol8wC?N9tthI*7`nPPqpOK{{ad3Ot$5cM+*bQVYh`dW z>Bkad0c9UBa7)Emy6jyeiguqgf+(=exGErFK)&pdjQvjUV#+vegc_^okG|5bEBgzh zQ+rrp3GMxZv7atHZgj^~)YeCg=#YsS*$D$;dZneMPl)e}5a0d#(b+eR#dKGN(aoKo zl9dK+z?grH%g7p^nZk!+v~#AZ%^`ErbBEiwCXUG+osv6de0G`uDy>2|vyU5{k&~Y> zJl`W7(CoBydTOtc=|6B_VhWvjAD78StTSQ0+CzNFsrV*ALImU zZH(4{FNV2Y@$)xf{(xoS`|<=2EWm_4l;VLcg)XLZ|A+`H=4B?3Pq`@A7ba|0M-OZT zU_4JV+@|EygvY|1}~3^0dtF(JZ2K!WK;5|NeJUD<}D`O*EXYf zn6UekJ#?=F#s+f3uN3byVGr40tqnA6D?!LbQxU#n3Vgz5%s))#4K}(XCf&a}czDN| zN!`kp3g4Q%yO@^>jnE!ExWN!d4JLRT;_WlMuHf;RIMWZ>!E^HSM~ssvN$XO}OZdoV z6`pg{&MKNfWA(H#5rym?_H1w6jC&}UJ?9jxGgLhkChL9$v&XG9ZJlm}cTUaCn^Id- zk1KR5DvPI7HE3t2lU`(8<6W0@kgf`2S_w4d|0G!JLM~2{2_01`Lm_Yur%`(1tXP?bIehWNeebls^aeMM8+G z?^6tVuhKBw=>k_&-%x`q3vr8?N}&;o#|9e-n7oEjv*cu0{CuljQ+rCh>`}V2z%w^t z0Pd&EC#o_!xW+PoKkGT^+ZcQ+4#%8lpxV)%Dx)**7n6xN4Wx|XL4krz2kZ#)SCE@@ zzndbso62zt^A^6%OSnqO9R@|sLFp`JB}h@e zGEVu?F2LkPy0QX!$CTd-@*^*a*y~N?+Y7}WFB~V*KOpNYx@nyt%cY&8A|te$+w4_X zGp$Y2`XN+Do;>o7-aB3@RP;XLXbDfjBeR9m>l$G8m1c8w5gZxy=4Aynb;C=@RcLg= ztr>R94|$1=GFd+nVrSJBM(u>U){PI$7NT}Hv*hL81&$LBf|eQB+*|lw2B(!X|R6;3{8({ zoRznol`!*3a@HuDyCDX3Twp{ve=?}&0^>^Oe+-PjjVLGJkE}3^_DcJ&(0+Ki-kF|S zV04T)4{3ZYF<;rq38b|AfrfH{Vf17?&o~2X)jWSF3jkDkb<&_k3!&97lsYa%Y)*oP z++X^+C}W|~UVi5P%P+$1G58`#%*<8ShghB+fJrJ=i>DAQwPQe^Um@E|?_}o7F)8v`+xQZO}$<1w6q*@s9B^EQs}J@pvZUU_Jwj&5CL zP)MN4myU=s{q;+$Ft;dvsG~wqj?v~Ljb$g`Bfz$x&cYqv7Ay)}(UqFUGhT}BO2CQ$ zi!ou|_BbHLV~;nEZw(mXw)Gp^82uq^GzO=y`C$?KlE}4gB22nsB6vihM#;lV@uKi4 z$C}BxC|h{G#2k0oNSNuhLoqY>5L?28uF&THC&@MRsIh$uO!p~AgG zu2%}vAs8K8TY0fpAHiNiR9VUZPie*6{Ia^5S-7XSj7x@6Fd~!|-E0`sy;IKclH;M} zgy5zg&pg#s&(6nV43}0z;am4YZIm2yacOA zO=Uh@_sG{iR^z7kq8V(g^niL)A-5sIHX69U-Pqn|z zo~Qzh9>2rTGa2b^{4g)Y%6P8e^SM=!H#y5!7ElD#OxoiRPG4+j6+OtRoHqf)4BS*H z`=6CC6kUvPz^=S&2wI>$SEB`bV~G*$zk!8Cch{ro*Cj>=_f5TdKc=kk>~mBK_72E7 zgWf?$QNgyF8B}BA-D$#ZvB5qxdGE5p>~mR=Cnv`~2QzonHbIe6VxN^$uwOvOHS#%= z&O33(dL^zmF3$_6QmlIz4ZYJa60XENQ2wN%+6G*0gjMk-#bA;buPQ!QhtaM(QAB%% z(I*J~dWUHgJ6;c=z`KmL87z<+)WQal6{-iB062r|YpbC|mczl4R@_HuRx1|nnlPGn z7n=9hoN1fK;8CEpccFP#pGKgkILC+4dm!ch$9H!bU9t71<1!-}LGNWo*YQFJ`pmM0 zxTG#fV?5A&|GGk|L{e@gUY4^}%)_7%8VngF@v@w9=;269c3J^WBDa>Yk5_x=x32xMwS~sG@jyT}@S5NogT&Kl8K*z|*S9dYAUr^yKs~ zin!Z|^J9$TdOgGvT?$n|V1MiSyN%B7!DgYWL39_ur@O(Y6|5RsHw@!ZnS^=!@@-x* z@y0FjfxFX=lLhZqDB`i^mFVImU6R(z?F>z7vQ<8CtG#f__ZLH#e2b0q*)7vs0&$@j z=E#)8-NuCZ)@yG~6hF|SaPtMTCiD+^(JLLd#|vxZfpmLE{C0+OLn&W{gD_<%UBB9h z$QF3pTX2_lFCOz!sA9i^>-af*eVV7pE{3bzYeb|i@zTk65lWo`Zqs#XL$=A%MO*GQ zy0l6|9uB6h1xB}IId&|not0l)QD2MscNAT3U&%todpdO7yf23#L#yR-3l$ila(_$@ zDN62-@yz6_37#B}7@qpSTx^~(hpt@Ll`~5d)wv#?OT2E26(^oc^)~qhAGnw7ICm4q zZFXc6+zubOzxlwu>I3KAwvTY|@)0LTKSDUf!@{ng0<8r^V ztsNBgE>=X-zcuDGSvtX4mS0xuqX&7fmY77bYmFF>psu_!)0P6hFgnvSYmJWdZkf@8 zSBy`qWtYOojotKmxe*ql7H!2+u`Nnir1)Q~8vpMWOt`SC#cSqNHuzpRrpNahW4u?5 zC-lcPkNyUlVr>Spi&suQgcnxg?`U+`st{J=$9_iyY4qS`y&(GUM%4e{1-=uE3qsQbTWCu#F<)cv z(|lzn*dmc&!FYM5JtDLj2G1{UsHt1XZK;Gk&oJ!rWSDB^wgZM|+0Ocr0pFq-+Q(Rrvs?J`j-O>%FQZ7}}f7F#-lQn@#bzI)n;hFhadGN*dF3$&HlnuOl597UGy0c{blW}+e&wM@4f*=63{Cs8vBtN9UI zcsZwi`*5tv?#|`O15CwUl4=RfDhh2q0}n&vTQV!Nb?cOw|%?F(sM1}X)eIi@^ai??ZQ?oaD`>s@I6W}|JYXQtS5Z+ME- z(L_Urmj-255@v{>TcJnvyu_kRFM6f@2TWJ3U!~(RTq|m=*^jtpN-eC#_OOyvdFOvB z-rgvgd&zq27%#Eji7?g+r^<6A=MqYg`H-57mfKpw&TE#*WiqMf*Sr-?GLL;g0lBtj_+*~(IJELS6$-#HC{qh zDGS8AATJpaLu*yaX5oTzTqr-RX70pGU8liIhRUeN3`%$zbFR-CGy!3R<*Rs!i)E-t z_riVmFQbO|6_Z#Z7l)gdo?l|?VlO?4pH~OOdav~ck}Mm25O2H#QeTqKZ_wc#3drO1 z*o>O^;mLCE9?#>NiOlOPyjSOTFD2eNu*3)MP9L~sUN|*xThE4P)MwXJ^T3}jyc*Pb ztSw!#VZV6O5L1p-`MmW<#w#CeSVzU2y{9J4vC|?>Di3BGba96f<%hJV_*M3KC}4)} z#5@#ZVs{61>4tQ$)hT@aNo{^fdA_&q#aRQsV6%swo@>wr944bfN29JUm$fIFJ$fIF~@n~4Vco4ua z9t|rP4~7|5V1w~CEFR`;WSHoNT5+_Gc9aPFjz$HukAM`+J_1rO?xu1+q}tT;<|ZCJ zp(rUcY0j%ghw+NZzFktm%%PMRNhuh2YFYRck07ssKGXH&S*sASMe-~<#E!a_cUhVn z7mI3Zc>^PD`xJY;x4dSw7tFg}v&QKsUNbiPo%7yc`9JJFZ+^q&D;VoLf6-{)F=H}4 zRD#{)$qyU;H0`9Z)QN4LVm@K%ZX=NHJZTuz{afQ+&&)#i_9*ezX3RaM#_a#qz*29k zsVNW|`-L%+V%{{0cqe+_tHaQYp5(F*yYeL*PhQJjnX0=B z6}-$2$JpCKnBBNb7_fvZ8#q~M6)8csEm@NQQwFNavy|!T%PUH4m1hOR)I)4s0aMa` zqfG?IKEJfK0lRza?Cn)aI8ED+@lusiS;}v8b2}nUsX=?TAF~zpLTq6@)(opbV8gTA zn!2Lm5-cgHXn5DBvzw_;70kX{S;6K)*UTH_xf2sM*x+#_m#e-gSBlVHCKb$H{up*s zGiA&4*mmz2asEgm>`E>&)bUJf^D+t-R^#6>9-(O^MkF0OW{mcDx;Tk3zpd8JoU&V; zGU^~5KVZb`;@Aah1i8JK4~W{%UnCqf2HU4pu+O+Okh4S0Ur|)KF@`>Q+!!I|(^}ql z+{2qK_EBTL+cRzE`OE9D#3jcwa{k|*z|hWm4tUU}M~xis88xw|*u_i3>MTYg{E!PW zSNGTAhxJ;GH`5Nq?>hW)k#NZfso)7?@SvtgF;xFXmF1Ht%Y#6jf}h7{7lu>RVIyL= zn|)SH65zH<;e*nr8wI?uxY#!Ja*Ke~msjTFAtNp}rvKj(PKyp3#x+tFcuaiLKEoU# z-+Pz-92)x%G}Q`acXb7`xAqEV-`%HR*C2vw5>UbHo1_)Y-in$qx4}E74~Ub2cO1HS;vn;Io@t@Io2X#+ZlZ$O^IE~| zu~RU6UMrYAuT7ZSuqT5ev}d-0*^@!R?D?u-_Iy<^dzvuJlP0;`aCY@1ikTW6(2x78 z0LSj&Sj`E!$x8^!=w`{d|6`*IP5^QV=`r7wXYZIPnBDmKA`fp=D7(E3^VrL(`dF&C zZcr|_ipx2_!kLFh0&v&R4Ds4RRyyTITehDl8-_zud6CpkqP6Wq=NBKlIeITsbD;kWEhVmP1rPphms8B zp`;?@p(Mk2D5+pPlw=rcAq)1f8Odk;J+Mbj*b593!^`nX#1V+#E?y`jz++YTlFLbttXq_x}u6=#1h!@Q;Hx^ka$+{la^Pv6t?*Z*(5(AM&pF|FHc zyo~!KO0XI>ZU^jb^c#7T%1-<^f&PY&VsA7C(WreGVArn0qWSAL z8Y%SWJ|jRb?#e3RB@lAs@)!iZjK5#O@7&qmW@j-9;}d#49y`~1oi^G`wU_)xPsz6m zzJl2cxq{gXxq{gXc_Xen#LPjXEUt>eUNjZVKJ%ks_Ts8w_Ts8w_Ts8w_Tp;761nMn}QyGdc=ppS@8qdl6PJdl6*-kRZRZ<(GJnfPu#t_br`A;OhJ@%MpppbU~l;K~3) zPaDIpu8J=eLp`l$h82>5B*Qu|pty?*Pguh(uSP6Y3@=N{T{3t}28U#DR0iUC2XP73 z{Sf^GbvS9>W+Yy4h;$644g`dF1OwOkP_V!A*FR_x;c}M13wRRN)SQoS&R+{~a#zOf zL0ZUcn`sRPZ^^IE=rDiP@|z>9tL+0wp;!L&T$Xt&xO@B<(_2Y-k5WM55RVe@%{90w z9rpx2fZL0L{*qTx@HH>Dfc*U2s4a@C7j0#(A#{*~N9bFj1`G~#UKNAB@^9}ehF2+e z4Jb!Z5(oFtWCX5lpq|mn!|>_icC1L`Tr90(%GLB70%xmWdS?xMRNTid{iF{$9t6Q3 z)Jh=-u`3i^{78!7xDsCX0?n~pVO$sB2?k3Fn7&d^wpptMzSaFmuZ!`|)=Z4W4v zilFi<<|~HVX%ACALB}|FoX#`DOFbRvM0*%E4B7?%D>Yo4nnByqhKhz)|(;SnGd&@ zskW0+K;qD8DhESp9y81s;;?R`V{_-flUuA9*3nj`e3uS#@C$v*3|*5P+Et)XCVhIL z*eXWZ6uSg+67w0l;Xh{zLoTCW|(uG!@7sL2t@n) z4Ct>Iw(v_zk>+zn5vbB!-{79Hob{+Z#YO%yW8mbV!u7SwIf$W+2wc~VQg_4$72x2w zmNK^2Atg4QPB5joRiAHG4u1>`exn`QR#0$E3lUBapR!#sh&LM|9ZRSRl#X~>$_#ZA z9O76wGb{n+(0NH#(SL*XFy&`-jDvhS&kQqeaEN6}#qi{7$N2IHku+T>{&7%_rz{SB zrV0eEgsBegI0zI&msg@Wg#-iM)MLspdX0mwbd(t;7dtevG5EK7vqUkRA^izZ7Evz_ z{-g{J#3KYU6&F$?L$=Xc4*o$q5x9OVbzs#*TENwRT>njI9Hj*%be1U>QHLiHtfn3a zTu+pF{Dm2wsX6|rVtAhlnX(%#MBpCO!X^IfC_{bV_}Ti_powX6`9JfG@`20o#&HcG z|1skPZ=^zrl%C=PH{A!W+y}0@+}(04T6*4q4ZX>S(8WG*w>QP9__NSuO_65&8F!Bl z+yg#vkNChn0i3&~Jly1iaf=V!HXpcGFU86Da}3|O6e;4*xcxqG@A<%e=mYnOz(Id= z9(>#fp#nJe*2ZUz_N1IL|p=53BIs0Eyx6=GJVMMj=EX-Vj1K5#KUaF=^< z?n`3d-GjUYceM{(Umv&uE#lz1gIYw&Yd>0weX+3Rk62533BS6>r}uoB-vT54rU zVfC!q5qy`4gs%oXjJAFfa(Ni6i_vlmi%QZed8wU~=w4vE)2UBzJN^Usv+wG!21SPt z!j8*PNGp_Y=NdrOzhs*ESxDIGYZ9X=rzFHbq^)JXA74Q#E=rxdx-jv&*oVLspry|( zDyeO#sHx5^DXOXC6FRJtBN()LPa?)xtB)mywt5(}F{{rfMt4|i)AhjWifgWNNR?3L<{N^c4^Pe##CJ0~1 zNgtM-K+_?Oll-!i8^t|-kanhdLyv+O-2!s$$ z-mNN5rh%L#xX82F^1|5^pqa-_L`e!Q)`po@zi795^`4=9RxeHnojxvgOisF4PQaOX}&3{(Xi1EozX&z(@l>HRk|eq0$aoh+N$ zS^j1o{zm8GY_=KySA~(eCAc1Qc1dx(s0>~MEI?DRet=E()4JmvW{j3momEm*Q#Wr+ zU2#cWR!#A&%96~AdR}i!K&<@`>s>-1wBTP~E#L05&3W-^cS6|pX&K{kQu0zq!q-yA zWap(%%9Es*!jTFb%iYby5=GZ5mO1zyY}Ts(bF-&iZzjka%_PX*0Sod<5I^lrzzVdC zDxBuYDQqYg2iw>q7+wn}uByT<<_3>T>=nXg;w;wkeH0*>06qwi66ekiZ?V;o z{LR&(W)i*yQ@Gx{XvUiz0|ygu)~l|jGN(v50>c7i9ETOwmk8Li;0w}*^MPQrcU+0P z=UK$4)$1Dyr&m_gmzUHHuM>y;R0|QKckox#rT|-K(@03pq~#-dD>ktS*~{ zv^B4&HmfuC;6a)R6XgxnoExdWA56)p=WwpFZeAWNFq@`0#9*X0uBfKAB%`_*+da)@ zjp3OZ@2u-9Y8b2^I^Tv2^)E-uzts6!a7uTcnWTKZIwz^U>^V)ZjDciMPK;H&ytJv; zV!lM+75(BS^ysOa-R6)z!~??MT(?)@Wk#nq4A;|@rC}RTSy_op)`KASYvRN20p=;_ z{YCOW4_KI%SzKo(`3v}tQ8yaq6mp*8AXUAf<%*)a=2kVu00HDYO4d>uD)Y)`RnHj5 z)k6+;{#!kwk(LqH2zt(n91E6U$(Urp23m9l7Hp6OD|TO^tiRYu&5rjzEG)N}5(U#8 zfE{uuqvQK3pLiGWzRnz+Q!wvu%)sF-Y_P6v9fmhPN>O<_S=PJao*S#4E%!?c_M zO$p%^*EEH7*K!i9_(g7 zng=k1ztcV6L-~8S=R29dM|i%6@%K3S%?i`BbZLSu1x9LVb$v}==M>4oH{ZF(M!Fd= z34ne!f&zT=xdN3k<@ippeb?f9pzV7u1|q!AThTApa*OQWOSIf#`}ayMce?rQ7Ww*s zmRrvUER}$DAh3UL0KUS8-vWX5?;Tq1AbXzg)^Zc=-}|*(6atU@L->{{;Ff*+BLLV_ z;5feRDR2_se0Q8O@C?4Mv3>uhR?eyPxO#5P$dgd>`lU0iN$O{C$n*TXW(& zL4LbgK@bB5dI)0pdywb5FMlU`zLWVI3CgL#{5j%V_N@53KzwtrO5$t9cYo`9vG^8s zsaptGDFCvv{1^f>zX>IL0ZA@8t@V75p3R%r`AeMHjtime#2frYW1zrRR&DXmEUa%B zR##YERGwGk$tLF6;a~1S^0jP?R(ZTzFk1b1$!jxYzc*g7W5Z&rkYv<<|Ky06{|@=D z-ZeO)$N#QNPS4H=x$KHSZPcu)TG_GSbl_en_q~tW#0RZDJg_Tm(G7q1CjfKQxYh0H zH3IMqk3R*Czw9ztqj@z5`OGG^3wG20Z$=-zH9xnQ*kJ>(%Zq!CUcIUSuU^$WK9D5l zh(5O|#dxi`KAnA_*)l%aFIl@%jV-;K8}3vehaS~^QA34-Cf$#@f!^Jdb#1)6Gw6DF zb#D$e;g7-hcbZ!^N?(eIS&gaitrLJ1sP0C3_3Vnen(C^O>V{!_S)5s4{0lVYR--OU z(!T*a9;jr=()1Nzi@W4i){m>K!3B}R>3O`f*lj36*=6|}z$F$;mNZU*Nui2vcun0H z^eIgT{|5907S7XUSsV&w>3shpxAsyiH~8(~kte~U{<)Wg=I5yvAAg`B$6HLuf{A7i zvbhP2JrH1@!1x~D?J&{dfK)b?aScL)Lpuc=&V2;?nK;IC187d!@jQ#4_9KXJz&#i^ zA%KbOGbY@pHW@(CT01KgI)lz3lrtu+f@6dvHeUYS3MYZp-~0@Y^|wFoRc;N=#ZGgc zM>#UjNU}5L*#RO&xO4LxSh&D|>+BT8i6BOoitG_cvz+ zSd6NDu!G3?yD$ct@UOL1Vl%RbkCCO$ZI)4nh?uIuXN+KBai%WQ;n+BbsceineqzHM z+L&6Nl2DGxy(Zkqz@2o#(TkVga!Xb7S^X!;)E}kbq&>cfSgTm7_0XU?46+M zYNwzbR-sC>oRdq%Rl*f2!otnqEv@~G)lq}4bPqM1#$T|`5|s?I`D;R7Nio;KW@2%E zGcG{lge^l+eo>5^@>DIG%2CGSN%(2RJTj*I}^M}L#{zXiOaIIaBjdSzcYM(O`&817HxP%_Zyr+ce@1( zJREJHld;7AEr!J1`&fnUe?q}aL%D-L9bcaCKCAHEx70ig$>DZ=`ODBPCx)7x$H=r4 zBw6=*L?%|Z%&3lE+wlCt$+*k-y~6*^@(q0XNpu~3F<%dwcH2m7F2m8PF>KlIu)eSWSp9huuMt0ZAiGEl={X;0r zT`3cAUr*aqpx(|+fFY1 z;r;2jpBvyCm_`pB){%&yp3Mxp!|rza4%a0B0<@xwX^=i;qnV@ig(4QLAX`E;nM!F|52a&0K5*k#dctO$@gO;?lro1R%w z4H@{-&#)`qConGiyqBdWMQ`$X9sR1gQA>`#^V(432u(#?3C~R3>`JbSt!oI;g^Otk zzFZ5wToZqpKbe&0Ry}FR;OhnrzTygRP>y>wFe>LwPh@8RsYDN+6sma%e{NOyWE`GO zQX~1e&JPS8Q!*x|p~Ao!;7W?f(fjk{zOUF;5)QP z?C1kvFwEae2{+r}_tk{0^kMw20h`W-^Zsf_cm6q)U9~mn%inrO?;006y@?wWaFa0w zYYO6K0kPsccCEWy7rJO@Tac)XATbdSl#u|q?QVyAS&U$=;5@duA-+xk_uwjYVX{&U z_i~&uS9o^n+Bv%I3L)k$d`*J<6uJiNqtOsqTJ3R)D6skcTP=d@7WKt27? z(NNtB7>5U6iEmE&iEMvxg*fcq!|T>uOXj_%0q-7WU|p;IBksXddtZI!Aon%fwB!M} z*H2gZB~-WS<@hOo=|SB&5^Cx`tB1neM*iS8a*Ym557n;2HljzFFoG}3_n#I}SLorV zyPPllB~%;lX~nB+hQwwN(Wi{Swrk9nc(dw^#AwfitFOWrB8&ilE(G7`<4w;CbAX4Z zVPnjDEL-4yxp2(*;XWSqSctOtSRHTHcZEU3p33b|(N}phOGGTv7aR-KV&~p33ialh z2rUK~x;BbLJAG0bfj5leRkesr>!pZnR*BC+fn@Bp-p|Wf7!Oki| zWDpcELw|b=3ZJc?zA;p*Ik4tl>x32NpYTkZ=63S6_2j2uPHtTnP^=NL@ci-gw z+J0Xk)2(K=ei^s&QQ=F77wCvxF?UPqxTQrcX?)QN8xQ$H^VwmI2}gn zB8~^IzBJjMjKH9h9(cE$|1>*FAPe`sN67;5G{7=BfVpEg4(Ym$;JXK1EzneR1A}5S zM!pu@>FNBypuQu2<}iA;0ymCy+P=Uk=|3LWTa;r zK0+u@g#F5RzbJN}4>%e{3-pTt&XfwMh~hMT?B-CWGbBxq4dC-fBhqxC_SSgD(4@^3B10{$1}$b#379JvKJZ( z92rwE-!fst0h4hArOQKb*icRdj06tKM4|*aqLM;HVsK?a5|%=v-AbK|F*vj9lSlSa zNetIfS9=xsWxWxb;FF$5J0ip#HO^w7G|`}%AVk>+FksIFk!cc-Nh7Exp$xl$$vDfX zr84?Iw*e8f&g-aS;`drB!!{>ls$Y;q$yBn6Ib4h3;Q5MQ@pQku^u44njJif16I+_P z$uD%XU&vGmb?=Eyq!v`S`uL||0fVQKVCYY{=P~N_4b6SnOn>-sJK%U+#OJ3C>5Q)X zdNTv)e(zcDe?JLXzLb|?SHab+S7{sYgj<=S| zqvN|P#dj*cTpy}iJy*r1#W+FK9ACQ+Wma_;1<~VqFHe6vJ5;mEokr#Pto#pk-0=r6 z$ghL0w4&eV*Tg65L$#ZvD|{Z3qb$GP5}=)^N=70YM%Rl`E0y)jKyRPHF*m-B#d=#q z=mmhHM#_nH4t2+#{gVqj?uDy`sF(iy=hdQWd~FN0t;Q>045(w842dJ38PgK#^0TM zgSp9<2E~od(^rTX*a4@j(Iql}Y~F_qO1cjnU-ha9S9|F=k+bwQcZX`8lcR=A>gTuH zRUxh~Gp=(9=d7l2uQ_q7d->S-jFpb93DWO^a-cx}ayKRkI^vFJ4t~!MwhMKOdqTA@ z?+X5fUwPceyrrkn4!KWR6sZvxO~8t%5mO@E)qGyo)`o~aJ=4m73d58?PXlpK7POVtEay4-$=`f^x*puye`l;+>fgZ_1nC8 zk$#dl`|7}9TwSa`<<%wn7ogO+RE-_vzWot?F9|h$rk`%1y?TrFjVM-uH%I7&heLJU zpC$Sw`8ipCEI+5~U*zWueezF!u{k<04`JdN2v7dz>AS~;YU+;jLh0^k0nhC4BJ6Qv z2m9-g84=U<Tg$)a##zG^u)KnLz;oMJqL!sb^thHY-@xP2aJ0i& zWw4T?Py93Hf=s`l>e-)i;22YXQ1K-*X3XaB$Gpb2gShL9t9aMffDT)<3=ew5_b5+J z$MQbvkW?tBUUDmmEeDU!y&Uy7z-jO6ah8zzlkYEh-*yemsJV>O{yCs$V`yhQvqH(m zcuh-v)uq?P$=Ki%l#w`{hf*LRGQrpG#H3L{z3vut@R1ak5q;PEPEL&-KE|32L)n+1p-a(dh!4Nh#;G zu7Y~UFSpe%7j}aBqy?duuqXG;1);_jDE=J%!-7yF_h+s?c_FNpck^}og)nyhEYO!N z44vZsEYyGIU9#Cz-vHbm?BaW?ZWnczhi1M#@_wJ_lT>f)NMc2*$B>Z|+?Abf3}^(= z_R}p#hiW$2Hou6MSfDVu5Km#)Mng>JK z&c~Vhg9kBx2H{Z`@qs$1K0$Fh$!?tKB)aI zn5rFmEgzV9&niAAOa5PmnlN~H@AJFq?GK0QN6@qSvSyte{dtb*JwWDbV(2$Pw`{TONnA1Ay8=o0g!fv_) zuBDeHKJSybFyZ-8nmUMk61qOcZ&T-1tPD~)H)QNLLD7=pOzDZjlYv6hVRw@5?P z#v^~WjxXPfaTrt%u4xahZKTBtq~ZQVRQ`oqnItknRlzMEo2sP?kscZ4_}~PeUIm{S z+B6w!#3ctAX`UmyH*j6zPCt4E$ek7>6eJyRUfHckre?isGJ0doaNVJ%gI<)CWB{L% z1vuJ$QEfc#LnGX5-y@D^ygrv(4K53TC6s>l#Jl;FPY3Pj5G_BO%6G<3>na zgB(BSN1qTF!`CISBV*7oEMj$IZ)WV?#J2vzwc4?JBMbaIBFnhf(qF(wqfHN@hA6Y*iolko-Rzl0iIzy>mQQGy~dxfY4Z zwMa~^ML=@x3P`tjEYzs;DPFbcI4Bk`Z_0_@g9xNQi`@gq9$rIya^PW$Hx#jYXQf!%*ME;CFbI6hEV4HG1m&HraoRX3ydn@Z6k^thq^^9$Yyy6gg3afh{=}DbrVd%xJh}Z82$`pgN+Agj^a(Yxi0Ll%qQ1 znyvf;&Jqn-DSw--sK&V`Rw!v_tda_vKaj%ig zlVkqb`4MiRK#W_@KymHH_<-UyMNGyhR$=xM+!tnfW`A(0kIS|792C^j_j4%u~}RaXMp<1`p*fxeX@oDPZf(V?e8U%UBr z*7GN?3_Tb?dj8VWe#%{>KYiNI$@}SFt`y(B8s&OU=;Dx6lLV5!LwE-gou;4D5txT)oF zfH6k)ph9$Db*P=QEmMbAhq9|oG+4n?$JH1nA48Wa#>T#%e_n)h+(CwCYfc6~)#c%N z2@PY-6t|}I44!Gg*W*lM=hv=mTn<>sr8jVK)HJtjAmZ+lo`j3|;N__4KCv5d_J{>T z{n#W_Ran(?Q+}Pwyr$RjI^9i{oQ#1!Ba)p=mEq_Hq)IOV{tN3DvH0Cs5@tLl1o+)S{b{!EYtOAoz; z1~M_5jyWan$l}@;#3W}<9HEzA2sOc&rNnQobSlzXo+(`t=J_5JrljJ>SSPA`ai8j& zi?hd2{E$8-HIEMIMg#SJ-TN!+S5^{iiLm0u`E&{6YXv}EQvA12JORfc#INUNe+ zjWU8CA#zy-*7%IP=vT&ys~W$T_#Pyomv3HJ2RIoHf<9I7H=o9Oquk^8c)TB^Jl(wl zpxA;GeAO?w!6?W*E)v{c$9?K^x}SqI^7YGW%yPNgA>IHE)+~>{dN4NH4L6^PWBU;a zVCpHlMjy!Lne@H&In{$PpMS-kdl{O~%5*Z#nX*SWm*cl_`=pV$6?iPr_Z=7t|` zEaiM%+-q+(jn~EQwYN`!*CX6(Z;t}6OWbR3D*~@4 zyVq@0grRaHZcKM)o`(+|C(@DO3&c6y++K$38RqsfTw^;~YbjW zOvmfJxb}9Jar+r=a64Iadl{~=oy^@{)*l@Qyu*1D5{mu>2A$jI{63eQkGHd*gLh~6 z+sOwNoqrDYm0#$;xF97;Ava^tJKQ-75|-TIEmTCymS{y+3W6beM$Dap>TO)p4YyeM z@8FvF`}`c10x#TU2yMN==rtuyrHuVHdG zL-oPp+&+S zp!&kE7_+3Oej$}c4Lwhz~){jf0Y=W8E-zmHF5Kk>ya6|&qe+~Zj) z^g7ox^)J8tL9cuy+5u7IDB~W|Iw?#-9MOO07yjO;{90c+l4kw@7^T9p9MJvfm-%t6!uCm->Mg&6N~0d>^J`^CqT&W=-<@nZ=`{p#bQpYZ!BuYWs$`|fir zPE~uNag+ljiZkdVON$=yU04{c;Nw;F@kC|p{2GA?*-Nz?<*2fBAOA!jUml84Lhgze z79=t&;uwZRJ^+UlBj1p~Ztx_ZNM)ah=$dL{19(tvEaRywO;z>FR7+6CSGAY+nT4wR zg=+YPD3M>HgDg*cd}c1TA<_%^|D0?g^5-tHqq2QM zb$w(}!%MzIL=7hc&T_oZN_fpmPVvi$T~Hbmfk9khwt3O46CsV4~7;%YE&)S=uX<_BFK2jl1H2LBGP)!iLr1qx3HvZ z0oB$g?!jaIQ=U&uj8C%ZGvdl)sXng7Rs_duh|6@u&kNI3E4QG>8J`4*F-fai`-Isd zGRo&2A_dL>$!w6P9!Iq^=#hrdlZLhjyuONfTN0@siqf7DjOnbR>VN{&Z%3RdM#-E> zSXw4$#sZe*H3L^?mzneoE-d9AdwExKO#8{X$)Q&$sJgg?V!C4C7(a||D$gW1vGLg10Ti*RPs#EC^Qo>gI+Bj94fWZk5N=t1=a)uleMeiktq z+6b}!KHeXFyqNG~esg&g=0B|R+Uq8PYy_L?&5W>{LjA?yiiAA74yeH(ARMy*D;FG( zk)XN~IN}==emvY!LtS|6F+eRpLX7sEnvyHwZ>A^Wy(3YGBH|Gr&(hQ=z*y)ioGI?H zzBP=-HP!@Se^cyZRFE{AOgibwO zcwDoI6MQ_v zM&P$eWJ@Xnh-OaA)l zH~$f;8=2m|#Q~4^6cvbg!pVn}c&K>#tSnvs!%%h$aYN}@*h)U4VHY+Vtt37M)}HI@1|NoM zcchig0f^Qp)@(i*A_?*Idw}OU9vp}Gdc=-Uz5mi%EcJ;Wg*rrEV|gN8R&pO|mp|h* zmOH->6=d=eXuA8Pp|#?B3DVGFou!)|3N`F8*Doh#<6X{?uPu`M+IcNxiy@ENfTbPj{82yW-Pb zWa;igcGpUU| z!Y2%W^|uo0#W#e~A}3*F#gTHhI?>IIdHp1j9dBlZajg^#_m~**k1$SHM9blx31h2c zFnln~FE`)Kl>_=QC$mmxZeYy7&QfgEOmVj}BJ9O3z`y?# zJ`V0=W@fGg{Oq9hN38;iq_Rw1dBo!c z)pNj+Y?&);;^B^3?ZShh9Ke_W;zd_jed#^dTjR2TPgG+ZiT#}?gs?Elc`-|`sE~Pj zeL>=aHFP6G@C3+RtVW!dvUJ;unciZx=1>lEXc_SS4Mdr?laVec8*5OSS`QfW?tH%t ze-oGGWq$P);JoV7V1vy=8aI(FVYh?Y07Pj$3$k)n*bo(fPxU&kSxGy;Lc!gL`?BPV z6%hAL+!JXFTq^W%g6b`ozBdAUB@6QmL^LaW+dvRW#3bOTcZ^B|ZddXyaL9Bwi|N1P zejL*gaJ|hSCrr<90iGQHiPK^rPyEswcuGr>GNvwrwYNty) zw%84W+i#^HdmieWh8WLen)(FgWO9)DWhFBFV>zlIR!>FsDQ68tUnb!gQc!*FGn0eD ztdz_U&K6SV^bV<%Es~b8vP6}B34E&Na#uC@yq9ai;Eon7%#hGNAW93nxRv1{5bg=i zSNKgl!QnO8`vzxGmCzx>9t}tR3viL9yFfAk7bCYK9=3qb&b~DfeTIl574k^}@xFH} zlm|cFwJ<3R2yL%SQ2pqaaaTW(Yu=E_8&<%AHe(dOQWexse&Is*+LH=f=uV^fv|`sW z2EFpJ%5lwGF$9lbyoj1)>VcIrvquK-ty~c+jZ37!meCG^E<9$m7nqHjvB~5Q&w>D=babGl)@%>}leA8@!LgXTX~{66TzY9NcFksl-3~3N8r*^uTJF z^((N#bbVvB%$iji0Z;xi^h4D$8@bQ@4!N6|V!J_djqP}x+h#zd;MsQ18-uDbNYQ?B z{i;PqOfq8f#ENvl#ZC#c9?wKnb0COv*e2DQ(3W7{mS7E_mY^zfn5Rt!(*ehP!;x_3O zy4LG;rZg@_`BX2~em<&#+&A6>6I%8KS_L-EFlubcXfaUY(Nm}r6; zV1b@40U;Pv=lBKq>2Knb5w^%=q&HwZOh(RguRTUD13nK^s^>?5Aodr}CBFY_z~Wno zH+2Fgt4$}lt>z1&hj8hTJ5R!U>B_%{#b5Q^32;0tLOJ=vqL=aKd0!pi3LcM3*M5OP zF8Q0{ycC6aO;PijlfKeXcL5IZXa^I#oeC9Ie-x0N3ULTT@{jl+a)KIwGU7~jnRwg^ zY9MeEGCm!y1cz_ zQXSo=c4jNRwsvOxt1!Ev9BDBQUFXsgPfSx&a4n5s`Yd>6Bc{4#JV6kLM+}aI=LQYd zfg|E>-y;$UG7e1#k~A=$KtSCDnAj@w_W<0QVS{BwP$bM0>lWae!zmM&_}+my7+)}5 z-G+kZb1z&*dkkuY6~r?ZRJU7WM(@!XXa!!bpBjNhjTLX@Wtf@qe30Z`z5o}b6gkoK?`AnA2kH?O^Z$Ws;4Wu$((|5G*p@?1{s zo)sdIE3glLfKn}|W9{Vij#_>`l2=gpe4?Y;zKj*;>rvucM;&@Epl9ldIHKsmVqb?Hrb*Gz#@ho%SA`n>`5YJ0fT?0{N% zi=#3RDb=d0QmfVk)F*{Ob@UZH1%1U)!`}<4_ihNP+TXz_COB%yh@cwv2^xZ<(q#d) z_|AaZjKo8)f?+-fN!=S%AN>(4M&{$=k+Xh=ap4VpUk=BTlcJz{qa>(W?Za&T5xm|f zpwx@qW`27#d=pWqSfugDl7#r61vc)^PSHDezdJ2a@4Mee~^;GuxJeJS3TeIF`>c&cm( zs;ABls0C}3y74TC7nT0`FG02K`GDG(>8M-ZbkyX>l{)1lN43bpn~U)c@K+$(Ytn=2 zuCs&c9whxY&rs^o`i?sLXDrWYhmCHK)0l}*rwQdg9QH=|#?k_uP z$1JGq&49`~)lsLdMQc9`swR-gzrM#x!m6M-b@`rvT6Y0(j>6`^%(c}J zS{q**RMRHI;HCuC#e1Pca5oauhu8NHsL!tns7tN`p|^0a4)pf1qw==`=O(2lA;f+D zBf?P+rJ6Ow8<6T@djF-Mx_h^yo_##1CV#2akw0R~cev2)cGRZ7zDj(Y!2D#NyfBy-V{+^>gt{qT^Hz@TfKHT2DPEf4_ z!#^XO|8zP&*IWfNS&?V3X#<8a;oYEm0XiEsJg6>tPpP`29JTOUxZfVUmk|VSI0GNO zY=T8uO9JW%xapIX@o4_9@BpabT6n_+XTj~>hEYC*7uK|O)Pai})nO-$?n9+shr3-? z0E4ThRQ0Z~pDUI63Tnw4g7p*MU5S-_&jr=Mw*zYD>ku!*vnz3+E9 z!`~|PF<{MSBj9Wcs@a&)J#lJKy}CuI7GQGqKD>;5Gv3#N5ntQtj(PyWXUWc>%F2cH zV@6xIwazwe77=b<6&rx%a z1ym7QodxlCI_jvG(2~Ldj=Bv#ymf6*eQ*MTN-sy<1-m(A3l?Jjg4In^5rq0+wdr?C zUGTP}DuTgxt`Df^-wdkPpqeim2h`VJ1=Z7ttUdVg+6NKg{`O-)t%9ZZ-sz|dpMV~r zg1Io3>%YW%0xre702(Xx;fap={28cgAhznhhb|YZMrLnDf&pVY*aq*wnCz&ZQRb=r zkO;!~DTf^O$5wE&<_K)?rnj#Ms_z4ix?^QfE#B*>UA-L@c^s{KWEwh!e7tMy)PQ;l zgbMF~BsSr5X7Jh9PgClbyRd3>2lkarRqC(slEYbeSNUA4@tcpR9#PmO?1KR)xD0oe^pR@cq44(O{M+>x4yR?{C^$v z@fiwEJ430p6R^1KyPzsMH>ggV<*41zb)z>Bm|le6F9@g|4T9?YjR92``W^)$X*00C zs0A4M&QbIK1se4OYUyXlr#A=GZP58S_u$=!e?z21cwT>TKs7FKR7M2^ygWxWgv!@N zzJT6uhEd#u2nC;h0Jbm;26Q!I!PgMev7eOM0ZxnG52yow2&xmh1k@}TY^9~J?y~|a z_-R1ByBG{z6jas0%xUPD&YI@UO>KS`yOD+mR62UGJ_t;s9>Qa!2OKqVI;7Ytpw2uI zZw(wU7Wvg$N1b~atQJ0c1&rlKSZ6gLk3kM_9b&`Z??a{N!{0wSpl*iojQt1P?tHl6 z2Y7D_?p>9x)OQ^)^L%+wU3)SN`dL%}@n<4Ntlt+@yB@MT1`b_lQ8M)^vB&jwWeNm(16Fxo*Y!g_oKh4q}1aNgZOJfwHt=~%HNSwRDnA}UHktCyU7ozgHXpd zbn}784m=p@2%SMx@6HUWaRE$0J%MRXq=2t~5mYC=;ot@T(9<*Mz1~7RN2HsKY^Y5= ztn^#vs5Vev;|YbXH$qhGUkmx;^8xj5@~7P{Tf zkW9cRKY@Kb{wGApJJE~4wjM{a)^A7w+n55j8L&t) zqFYlG+?E2iJq2uMX_(i3ws3c8Antgr-;)BiHwA2e3fMsxhWZOL^4H&m3;826hj1j5 z%;6NUqbXnt;kXn{wx1QIB?9`e^c1klDPS2XV6}W$BvI>;-j(LmX9hiQQdmw37*fkb z<^JHdJ~uJYg&}$I3ab3|v%!S?QEK@Cz2(Uo*-mT!T;QE6bYvO*()t>;okDyDV+ByU z<2BAhd8Mx!wkBoCU+@G~BBCdgpLI0~a}4^pA7~7_gJ05U-A6j}K>v zPj7^O9_^n?0(v(1be0D6GV;4zXl3LV-T_)ehp%N2ars4tBQkYicWgXynZQ;B^fA&| z9nje;L1#U17Xw(R3JVKx9@qG?34&V;rn3k`9oOM}L#q#a32d^X$D+J5!_hNI2lEK{ zyfbkN3&-QUa91~Wgo^E63|L>B1=g$im-zxa9Mro=rzlMyA)R?}g?=s)w-Qm$W z2$)$%)sr8aDX^jny3f;~Gp~Xkg>(2IlsL+uQ*~1Pkl=D%Wp4xa*oPZF5!lL#dL8NH zoS=7*&K!*7*l6MX=SkgM)v;YW0qg9-^4&JSbp623g-%Yo&U*%Q_NMDzIERZ)L^=yu z3!O`Eo0luNg-FhkIz?f6*j4QWw)7;uh;%ldq}Pzn9^lqG}2jI zSG4 z_utr#xp9w9Yrv$x53am&Jx^NfRQNWL)c3@ zZ^Pigt7AzOy@hmESJC@OXFG6dFoJu3?Z;;c?m-K-z39jn0!y!|JO7pKs;c|r9L}$b zPs12=#{T8f<$~KAuu}}!jYoU871-#idIjmssH!)S&QjpkGH|!Nd*d>}U2E0x)24mj z3T%5-UH>`I*Y|?37O)n#zp1`eP z&~YBPIBGuw07ErV*}Z-GWw!{O(be=Z(wSaOXFm@*bAVgl!2SC@TZ;sDC19rBD;HE= zEwIhi^jMU4wpP2Odu_P@)NS4XwTa%*!Veo^5(&IR@1vkXLmJygmm^; zxR1Yd>?6V53m9D9Z5Qj9@#xgi0y|hu_gM`(hpXvPIEN|7N(OF|H@(CM3@tF8f^ zei^zc&fx*Tl^8=ARJ^up)P9BlX3D4XlHzEeRFa{mk^(N9O1g`ir z<-Gdi`#uxgBEZ0Iw9UIe4Gk1nadln)CD55rU59ZF&#>rpuKxPRf;$f|i7Bk(#eX$< zUtr6t>)E8Us=8iAI%|Pz+RwHtE3OpW4HoRt#~Pg>u+7!=G1A#qU1zTYot+l${z_f% z5!^i%?8UpAuxoMkwZFO^i}KE)>Ut*W9JO#W?)j8ynTwl-Mwz_UtaQUpfmN=dcacuL z8u|$7alOX5js6<=ste~oxU~nD4fH^z%}(1&izBw-%9|K zzJzsr^=Q*gLg$tmdL8M^t)X|2&Qjpw|EO-in74GH;I0Kg@(t2ya@mw81-7Av&RY*U zn``J^IEQxwH`kyu;Pg5#3T}GM_&O#Zy7gy))vKu&kxp(+y@qrOfGhH$A*ZkTC5qeK zg7vS`eu2;_s;R5K3_1g9>ZUk{M*vsymN1acdSy_);BrH#w3)EbH@jJ-tIK&c^)%91 zSyRs^owdLf+wTONkA^)lR&X~1hW}mJ5qA5~xfcp-cTIhObdJ{4m0kgzEGQ%$MpB48 zydjY0o_A*~mqaI62xSt+1mVp;biZBb6gSlaQQjHRR8JwDlBO}85ci&Mjv!m{c%KZI z*aTrGWu~xn3+c>hs`ru3Jd4f^y%s(wxQi{=g&({bjd4qx>dvoHK23FhoWrXv z+%0`?o+otHTd<;A9$-4892^^)>J_B3wW;1jI@>MW-4|SQiQw+GV3&>j%Uc55+f>)z z06GVo>M+jX!xrw{r&sMJxT;ya>>{_nc!9vuo9Wr4lhI5sBb_V@_rY_YI!$nMEZC5v zC(+d0cI7tH$4Do?na+L9`@9Z1bDHT< zIEUw1xDBSZyIgP=Td<2Sxj4$t(q?)c>8xy~caY9%3-|0T+nElzvRrS$ZX11GKY?v* zrt{taovqDuFPy{ME!-L>?R`mbcU!P^ZEk;9V0)YCMWl1EnO;LWhb`Q@GSZ^CiE18i zKfS&jc)id`Z?3Cu1f7iLx+%`#EDQIsg{x->ZjJ@(eRak;0?Tc#r;$#6b3LDQ3M|~m zBb#0j+(HX>?}P!ENcUvdv$;M%Iz`QOr8hyRpM`tbq`(ltEe1^bFUF1HYto`VJfgWC zi1JQJb3KK0CIgrL%lwD9^2d8q0jrF&z`CD3b0*=0&1kN-kj~uZdLQX5ws3bBzZ8vQ z%PrX9?Yo{5I;)%O&YM7ILv!69=kQhwx8~K~KQFjDE!aJ;JsQn__BPimNas*K=jZJVSSW7j*WYq5I<;K5W$)czV$Y z!A(QPY0B>Omb)ekEF)j9Af0;odK2m7TDU)tzImMBwzgmo?>!Rh1M+qK_dut2z7FFY z?q}hi_DS7nj2U9V?(H_^TdAWYU(Y6;8Ton{>C6SLX(J~!{&le6F1P4>+Wv(cfvwKh z$4F;GzRvzT=4e;PYQV2AVdSd@3tTI!jklYvaulx2l!*GKcQ zdKOHNd^gsgw$!^wr*%txgmijaxYvXxw3j;j0cP_0)3EpV3T#A6-DexkxqImy@qsZwTjpKIkTtUB)H81Gv$-9 z>Ar^r*0GhY`T^+lY^9sx9PSHTQ(sRE-LYG6i)~niH^vBTbSpiLbZ%*-=abG{3wPP@ z{qG3wVheU(@9OsoY%Eu_=@OudhE3M@KD&U*G4!R=|$X8rps|*IKwg4gEYCV>Vi_{ogfsTj*?St=Eyx?$&w- z>Fl>~Yt8wfrr;j7U}yKJfKYZI@NhuLvAehfM@+UTY@hZkG8%i1-G=9$Ya z*xpTzmIDN~8Bb^}@?yrHTIty-z1zY#WyP|pB)V8|wC!jN$8nA3Hn&{=H3 z2E6s=9|X3%tzJPotJ~^Lq_e@o-T%)kqBgtLf~mT14Ij%Sh)`3%5o8qNsfGEm&~S!w02~j_vd@(&^bw zXMYMheJ$Ka-syg};1*l3L;X(QD6rA(^jMU4Cb!cwN#_;|x7mC3scP5O=2)=e2B${* z>;>)gF49@rP9GtiRTl2{XD|3%=&ZM3OTIibQDB?f={}!<&h~bC6wcv27VfgHL+T6e zK?~NeV*hAgd$gTiM>^>RdI#y$Du~xfj|r0|2%Q`Y_Q+G0Z4y}X0-g6c=oA#_UO0!l zTezcF?TzM#MHX!2f{*VLIs*#yBGMUApx2NNx1g9l)cV*-(cFK!1#|N6>n(I<73iva zKxbZoZi;hwiG{o8FWdVG?n(>x=$Khi9jq(kA*ZW9kgoXP+r8bueE;DkI*Af4C@+yH%Z?8N56Le;^ z*Zpx0&$Do=-CC`I;4ZOX$11%RZP&{7dIjmMZLc?x&PEIO!L#en5<1%~*uO8$|F^() zx7YQ*1fBiubr|RHVc?pvyuw|3#|mzG*n+7J!1nZ&71py!CpWB@kxpyinlk9SW8`AN zEwo@)-#7hdf%OjSW2DnBth4um&JYXtt9u*&NpMRn*vM@yuM^nRupW!@&Wy00Njmd@ zYqslO4}ATL;4Zab?e9MJzQ9(8^)Axc7}iHfXS;>F?4LJ8bGN+~KZ6HOm@jk=g>|2= zK&NsCJqqXWshFM11W?BBS3TP%+Mjl`U1?%d502Z583xZrc3Q9sk2iWtV0%00s$YYS>ZqII9L~aQq1mq2YD|ds38z}HjZ;p4 zLg?go)YC|(u%n(&Iz<-lg^xW3Eq%)zTuCyO?rdzl}?i`vf zxU(!+wFBoxi#zHmq_Y>eq7ypL;g&Dw3Z27%iA@mp`a9vJ0?X*6w~)@M zo%BA^>DVbAcfo`Uvjw*=U`(87Tcp!`YOgB=Hlma6{0->r?WFtT96ku#Gy`{Wq-kTp zJ!HYA=5%occDR#XK{`h}=}n}QhU~(mbLf<@_X=*h1*@|Cn?iwQbk_9`fKI(GI*fBT zA2Ya2bjdqAz-L#cNBhja7OY?75jM~5FD7@ zQpe3dT+m%$YrE)Uq_d%m&i)tZY_saD_u!!01otptryF#xn0rdpXL7pgu_*6!@2Y2# z&gibOwL)yy`*YhqCUj;1X0~}j)#^n8Ti#XgBApFg^%2t91Kcd5&IXTjBZ?=3qkz%D zrCqPyI{$#ca=Ph02SKNxn;wO8xL>!J{g9vQ26ZhI+!DZ=;zH;&?K9y#fi38!*OAWF zZh8ml?6v4T@!22y3a)}wOk?oTXha zpZxP|p>yh4`T*$^o~0{&4?07DYuei7D?XnrxU&GmupvsP=E9eH3v9z#dLYU>d(P5R zNT>4I@%+5LGzjq|TrIG^XX`DbGvRE#k96i*bgp`Q;yHr58Zai)(ynGR zc0enhjcz|%cRmC8Ur7&Jy5Ce?dAUDtb_}P=Qu!6t}dg)`N z)3cY({tFn*LkC2W+c47MBye6jw1h*bwCa&{Q) zeL*l``_I*jNaxVGdJX9uHE^c^-f-8{@q(KMuRH-4)H`95?rR&3&l$aS)t@Mz-nuEy z6_x|HmTvJ=joKB~<7Z`k_fyyvu(`LMMy##=d7FRU?w@z})&~ez=jlpEaL)D4^@|X{ zJ!u!ZE9JMQ{o^v>s^mO9khpWs(^Gg}>7DCuHOkN>>)+FOl$1Gmp58*-j6QlF&f)w% zG5RBcw7mP#=m?|8fVH;hl=RV^e`ei%bbp+~e6}lmwgGPc+qaw~b#Ap_W*rCn=oO@s zalYO}ItAy)=%9m&`8SrKQ z!YjkT74KEvAarty^lZ{8EYizJrx>_WXI&swIq3}=q-#Ml3_66Fbu2B?$4F;mkGQ1VQF#*eyw67)8xU3dFvG6tFodU~^Nz=9w^e6i0pGkE&Q; z0^PgbKZGq#0b7y+wloDSvOEQFWeV7;6tLAPU~A)HvG%it>*ImZ_7k=t1#Dvq*ya?l ztrwvFWERZ5-N|6vQ^0nnfbF(n(tfsZj}7$OPuSiRu>C1u2UEZf0hV;v9TCr+^itfOYg?33p8vrYP7w1*~TZSntFzzD`)FqQpQK);9&LUkccO6tLnb zEK*wcL!!Xaun{R>qf@|2lEJ9|3CVzfO-=!ungTXG1?(0B7P$+R$RAZP!vNZ_St($1 zQo!aWhegyp0VYU3764WQOciG2lycD(fMq0stxEyh0+=^EAWbj+xB_77KLe1NQlbhE zry!b+nTce~WC2z+3C+_Z04A%jBVaX>6zr1%R-A(7wH7SeeztyA3ZjbuOV+{_fF*0; zTEM&!U}+U@y9n}6*1~_HU^2-Z1*~!s$)xv9Uamf1-cVJT@!QYV7o;fIr&K`{z#q$v zNCCSk1#AIe^;mx*V_OX%)A>ZO4*>Iq4$=xB;*ZQ611y=Es$MLUoUw9l`&lp(z*;yb zs<3GaSZBbJsiGHP$?T$^2a6=Ea2SA|@~WJK$wfaL;~NHXsDuOoooxHV=N5Q;0Aet;#D zObKAg^g9i(WUf62FuxI{$A1d|%uJ&DXJQ3Q^#|(!OUBG*z!Eb9{qIUabYIefN*zrB zt9og&3Ne2PSkz`*|D&w&M?=g{QLw97&@JKp!*Uk_mWgvB_Z^V}7MTWMBEuljds0;R zFkp#AiZ|`0D7PWLzF7NN;r0{-_X3tozdxpcrSiL(kCKO*e~tToPrT_*sX39DEAz5_L}G_Sg@6sUzIq=yb2I!#Ia~qH7d>+<9^znYcxONgcNc5u6F@TVKy6 zoqqN8*fg96_~&B(JiWd?MmV-WxbR!NbN$76N|DxdRlI(VMf21%JltIW+}uCs`{&mF zxxhbn^v~E%fI2cStbh&Qc&8fv0_s=%@z;@&0fkqc;SV1s_wUt!9iQah3r6pCKLgv& z-Fs=#d+TdF|3Uql1MxmG{B^*=AJ5|W3I|^5=Ni_nI6sa9@9a}}6QRPZ4b|;9@Wx7Y z7)Luus|3fBI6lWw8xk$TaT|`;aQuwpOh|q_j=$pg8b@Oo_aGc|alBm-)t-Qv?anyH z;kXaS8XW(^kqgybfny$yx2Q%u25E$2FpisXyny3l95s*v6yO+%;}IM?aQuuT4->%s zam>cC0moq+`RGrE;Fy8qH5|XS zx8isa$3YxT;PHcT%*XK#j$n0^!!aJmG93TF@hgtg;qK?+n1X}9x%2o#9BzFPRCp8@ zQt2u1|MFCPKJb!No=Tn9p&V68DN=%bAEPTmQ@<1;^-B@{4mwVy;z&XrQ@_-$saNWB z8MrT7r%G)B-XR=RD>ooZYiTQN92<6)dms9z7)*ezOVa=;T8hy0mPsHa9z)TZ-auoc zQJY@Dy*W|Oh*+*cw;=cQ2)2N(0ylBJf}TPzc>@IKA!e(@TcmaL6p4O2kYF+g z%$tnAfNPWa6pTk)uD?P7>W^GYAHWvV%GoknH#wHpk#n|=mQAB$BP3AK>ZuyW530s& z6y2P0gtp~*0QApbU>XQD!U5Oouhlvd} ziNJv9?`F`U&$AOTjgE#!aSH(xpSYr;>b1+?jZ<C1qrzcT@REa4;+E;1yK>vyQ zcF`MKB)X%{+CjYR`~QeDRT9D3R)yVPkYV;0to&J=zrw)=xP1owpPdFp{sf3o?4~Lb zxe-Ool!_uYRYB_(bx`E*fF4Dis1K10D?+`LCkD{?OjU>>+G?QNidWEe>9SNCb4TeX z4uLVhuZXw_WGa`5x(dWCm1beC!4|uCfQVSSWHLkvQ@Uh`5|%h}JM&+&*D6c&6g$Ol zik)hp*eQN`f^=MFxHYX`5?KbSk0bb}yq0f@x)2gDd$>dvpGc~j*j7%1CK+y|k zK)S?vhXF+VlZBc4Gsk-r=R`qBVua}xwkL=R=@B$mQ5g-Dfk!-q1{)DKp}y!QV!hO( zcmwsvv0cn2E9B5o#=EC4Z-QayATswPpNz_?B|F}#YZL~>^6 zwufv_y9N&Zo8vZ$j1@6niHuZxg0mEK55{+@fGs!G!uU?lpi0UUXeh}<>M&!C+nYuZwd`35 z>dFopR0&m+O#Bo*%TQ{j${1{DxD7B@$bch8D_wF#1_>lUAEw?or$c>;8&Rib#~~hI z>e3EE9H7_%i0*6!A;=$Mr^v<4>_8+zD48o!Wwxp$A!VSUG?|1-lx0^ehTG^w)SpaG zsR9mF!7wWUhX$NV{O2SQ|LuIT!Z7+j5(m`J8FJeiU)#{-QZZN?jV0ge7<#(#UJOS)jXT&aMOAYRb$ z7$4~O^d3pS%Ze9t^@#Ym#Awr}C6O?Fn?4PpCG8zWyv+3;fQT7i=t)M-XZDVfq!R@k z$C=`#xMg%xR-Pv!rfpHo$t3)9B{U^bx5)5F@r!>`#IK+=6ftc;M4Tubf8qN&U7G4J zdqN50BJC9sWG`ijJdtH5vua7PE8uxc5|PJAJYwc+)RQT4wvHl~VKl?)?=?+NX(Dxt zxZNtPoZV_NRhVj^NU0jLm!rtp)0t_vBr{^*6upQ&CUNLLk(HMva!QKXvT5zK^cLk5 zdsOQ7{BKzzXV+__&%f#E2&EWFCUWYB$s=d0nPQtN5YbWv6svdxlMkxLR)wfQ#Lqm^ zS3xA@V-z#+BvJ|0kjN7*h8oPlN^(zt{$&J1ov_vAjqxqtw3^LRu0}NHb`2H9wNG4vh z8!bD=Cyy%~L(so%-Lz_|M68;6P88=QmP^o~-I%dntev8!SJ(qk*SI5?yOoEAw)p8L ziL6^Jd(&~MfD*RV5RqrHzomQq9*nSyWpFaBTN0TRNlYXfSo{vb6+OFKGrlJhKSfH> zGw86Z74gdeHkrkX!BegC`TQqQkjboD{wya9+%mG9xGpyu`;kLl6)7Xj;j_ zL?RO;CQPKtY6n9Vv;~t9VZvk}_Vhf7{&72+;o6-HV>|nPiS3dD(At@PF_9^EWaQ3@ z@^&~3ov9)s^>1goW%Vr)Ti+sP>BDD2q-2mPqW+l+K8btDCMy$(oBC&bu|3V5|DoPH zSc3{v)EN<5bULxW7z0f}KUDoR9577ZCM5=F#lBxL@_k&Rfe z?f-TpwBf1Z!vm0{*%7fZ_Hw<5`GA=*-qep7D305*%V*e;Qfu~@=_J#asEVqQRWr;F zn7Ei77sZ|^DJiWA=wC!1)rqf?mmxkH7h^wruSA{D@eE2CXTAbWnWAyy48F%(N++gD zsE+dH2lR?m`GMI0>9`_xfMNDhR?=XqA(4lqO3|!v9!cmPiBuwez5N`(WfoFO)VndB zRL6#Si6@i}r5Sr30R6{55b(x>6c2T72Nw3{$wo6|S|r2BX^NXtGUE!3m%}?Z|2IcE z3@!|@v|eU4(m63%5MQPU%||sd(qXI+3#SUqjMp}9c{~%Lg+mDLMAEZ3e}#h`k^~lZ zlgV1_W-xvN(;i~ByKr$)#tt@&xg$qbjFyxx<`N@rN|nl2O);7+5^*N7l|*hS4We|t zzMs-X)8qWH9PXIR&`cvsHdGV0i+WuIef;snblSib5cThsq(rC-a{hbG>y{(ui+Ty{ zB@#Ezm#)cx*^0V;ciFI#0|C#kWiuAZ7%I80{a2m$%mG?=fWyh67UKWvn zQ6AAgM=9Z;!G^JcVk!>CjY4#XhHadPO6G#`(gyp# zHO=0OR#iSFe!_}|oR52^$j9MYLN#aAUCBounJE^Tu#*Wg)dwPD{zkVllM8u3 z$$rCLJts`cn9=mMcR2rRuKyBF$q=~}@d3?^6LzGdNXa;}W->2HD($6Q6H?6onYYo^ zsFSji3yM0Cy_8BUz*u?gBs%{moyLFL>L)>k5~dL!y!}7_*H$maeyBDSOY8qXw0PIx zX*0C3Te<$*6uB8>>~UZ!+juZutkH>yvIaMCHT+x1tUSQ0C0I8Q(<};@-BQyZL{ruvwPKC1NIs?eUfl2}egg z2u{AOMN;n1tkuwS6T2@kV?P;WtJnuH`C$6ZKEO=9B<-TK#Fo72KImUM1X?m@J{ilU zRratsSu1JTu|Z|>F(pHM+)i0O|4Fq~R+0tVkZ2>cc9}bt`0vh!pdn?=poJk>N;N`P zVCuwfmtAYrVDLM|xILkGMYN_-igam!l&-`JU&4`64~1+In=_yyW=<{}K_ogeLbp1e z#Rz|`0R7v~e<*4(Fte*oCUVoN$rxK|CZjY^Q1@%ZHrGnTM?TBJU%N=Y~(cVNU||n zB9$C3qt8Ix_JA`k`lTWNOBMa74n{!9@BhO=W{NQ;^v`%-R=Bh~An8VvxvuH9$@&sE z>EZhC|5TSBLcDhWAhX-d`^$24UOQx zSe~9p><>UP@8S^gS331~6n{n3Zn_Y~SzghbZX{k*Ua^Y@{Tlf{6g#DBhkJMb2LnT* z)+CDwZJ+}4*$?9YU6VFM>t@$l&NvWp2Zqem#i}_qO2gua=24s}Qi%%;)ske=rOenM zF~N=njC{#EF6f`B5rc{t*zx+d_;u8$eU}z@rajRO8LUc4G|~PqengQO3X@5Sl<~lH zO*>6zUYIIzNi-4?DMm!>6t^i(vE*c8wMCl9brVUHeTmWG9pC)F>8ecG=&m%rR4$vy zuw%?a|6jp)%SyTa1eL77QARd%{K*W*|8yC~eHuxsmQYn}O{(Wav<(_rs-(uuo+ETO ziT#ZA^xZvdEkrAQ!0}`|w1Z?elc1-KxU-+O_$3_|1FC!4?-}#WkPrR;pT~SNxjnf^+f zDJd2GPD1a`1<15W8jW;|sU{g|8;pkZdvlmgBfS-&+?rr4V}23(H&?(>1+-uhDaC1b zDCM1Gq&gUANDtLSCnLq!+3$>xK*Y&(V?KUkrEHz4q6H|+h{%`IP~23Lc#L#nj4Ni) zGGh$8@aX$L$`f2t1r`oA%)r8lDcS?qlu=!j2ICCvL*fjUdPSnvPyKNnSEAlZ0*Y8X z4Uj6L5pr7vhv`%m=lyJTB?vl3%zMOVP^^JK5B!h~D%JQHw3|SoiAnt|@W@y#MRJDC)BJ z5JCSkE%tkhnwD>i+f38kc|i96i7dXn;${W1MBmgaT~4~~(CNwN7$c@W|I-OZwv-+u zhD$GEJ}FO8sDI9O&~O=yn4I`IVA=0~;RK@@^(aC{Juz5DJsK?|Ad|~7CLE&!9ePgz z_y5Y+EtxCHoUS?Ip-a)Y=u*;YnQqn*BBxaBR6VKwNsKKh?~t0JW-lqmYrCMt2)ZIe zuGs;Z?ii8iCy`G&4ztrJPX}eHi0YwHQZ+I~W2%Kl>rYU5?|-qMVw86t_&*6i!=M^z zw7dVWye+4S_(+y&lC>V}qG@#H83T%UFoU9Z&;YNn<3IJrK7cO5fJE_?BaJE*^C@D} zAnh2zO)cz(e^dOo8uycAdZ?xU=1&E$*N%hbrBOGutN=aT% zt4x&JOMpt$OD2n)3_N0JbXAI7qNN|a@MQ;xnc|X&Wm>KDXvuQ>@~#_TCr1xpu1J-| z(t7A$w%?}8>&=kKo=)70@t=f+QFgo1;!?35lKvFpkL&KKe-1#{^&XG@>3R}SD8Xd$ zol>PcGMfLn7==*<+Ihp7a>SPhGJkgH!mSUZ;0i%q$0*h1nGtg-ZmMOtj2UZkl(JVwb4bV4(M)U@J5Wu+@&A)t|7SfCO*x6k)QUz%9n;X* zhugWIIi6&kFe|qMS!5{mBb`a2j)ZEVzf(1KDo$qju3s|a=6X1by^uND%N)>WdTvf^ zQbn{IsK<-{VnEc9X+YE!jVf6OBe6g_6|?W4otSnb_Ci~s{djH<*@)kZOO)jizeU`u zA&od`bN!D57Y3H!D(*e0#qD~6DebuRfB$`Pmy&j*%)R=*Mch?#EpeBUHuV2nth)de zq_FN%;x1*}k^ic7mlAg=X-DiY8Wf{N5{k_PLprJ7qK?~3t0VHeT7MbACV@Jje;NP( z53FB3T2}tQstk4PDg%B(lho>rv?>GPx9EESXvamnQtpTN|6APec%xk@_p1s0|MxnV zvfb}id;iajGSu-5;Gf~B)D#aFaPaq4{`mp_{FZ+{igU|m9?oE#Z#I5E!klPnP3{lOonL`Ke56c6A}+NXyGHAXMw_zH0J=QbMvJZ?BJM)%s$CWY!nBE}&C&$eq?LdZC3=HzHd-%ew0)-tD)nRu|x1~YbKKCol?9V&ZL) zguPM%k+YYiBxuF9G;H@PTkKJ(d(Zz9xnOys9$U`V*tirojgf(d4z}L2rnZCs(sQf`%uOp{UI?iy~)0qsZA>Nx3NHuTU|iEYgCcB5v9x9gJi% zse;M>)83naM^UV8!__lvVUZ=71dy#+WRWe%BB;OsVUv9m5Da4yAV3I&1VM#i6jW4D z)<G-HT(Uf=V5|9?@})w!pttE;P? zy{fA__ki3db1)>W+t2?w^}v5dyC&$Qs>K_XlE~(!8rKd<;u9T<_A8_msYgL&3+GEduxQZDswQso$Nonjj87oea4Z;6K+g@a3Vr$1G z*_iG%!T)%*iD!Y~KP8=NX`&ZKGHz8eGC@h_LL|vgc`8XR7$3y6ir;@WLU9ZvFDdkP zcZmAzb~ua=-Lg?)kWodeB!!XcNMXddW9nGMfA=h?Bz2@NQ%h)8x&BYHN~xzwHP-A; ze0`DAarqp*{#R0%ZDKsPe>Wkxv36t$A5M@Z)FK)kF2~U!kqxwZWP@ShAK3*um__4w zJ~eG!92=4?ZvAEFC<`SyA5ti`^QA3VcDUbu}ljD7#v7@${B@$Z~)o&q)X z{vRWehMXlyGgU2SaG+jXc5zEICHYh+XzYiHsMGoW&tZ((Y$h|tgNB1-bCcW{50c(J zk)Y9`4dEkvNgiNpj9`$OZ-mTm?I`jSdvuw%={{ z-8P?sYlQd|NFIA{+)z5U|C^Hs+IM&OMZqbuWU%q{lE#?O&XZ#F{WQB|1tVxuj}bK4 zWYz*s#u~B!Q#}3sH@E$#Y8gJcLorS&+)?ab(Snn?;aTSWZ<3y5Bp#>S z5Zzc^{Qn0?xp5+H9vW#U9~t|R_gsiD9d-vN)DkoKq^6k1aOK+m*%L5yg*(~YvmCC5 zP{oX9xTjP`cuJC&lID?gNby?2Xo>L#qomz@{%`b>MuV!t(27sO7+NvVU|7YVgKEQ| z!^n$UV@eG&VxF#u+V^1)=F+`Ko4x0g?2AE4HexoD zvJ-sIkHShhF_RXO(ySjg!iXtc6f6#g2904HHHeg@SteUKV9F5XCrDDcKa0;`lPXF5 zH{0smFPa#&G}6w+3$lQu=Ch-+AS>mDg6mdMqn^|>swV>;I6D3Omyh15TAaNYuU5QA zo6l*rMo>o118ZqOsFsfa3vn3xz-bxn1MPunj}FJ(rirGhf?huVW#c3*RgPr62Bhar z=+Aj0dNgw*{wGvQu(=UO%A3WH^g*;iYf4raV`Sukl%+GHOkG3!&nXgPeG`hg<-zRt zbX!Tv1b0IiRxo&_x|uDxqzoxVev>kki<0DlvJf0hRlYnD+33TjUsCLvr!PoU@USbyKyD$MdtK8&cg)RIy_9HLCjv_)oQk7U}Q58Z~m0)~!+`DGdlo zYIsgnWu#2CqRfxvvQkR z69JdTcgjUfyU2g47gZykJSAxY4U3^n4m$8xG=ZZg1xOL25kZpUM3p#^5?q`zBchcg zWk~w}iFUX&BN+2Sf5rG1$^CmY1N_&HKuc*n3re%|w}AX)k@0M&Vu*|pDiT1h|CJ_y zBt3n+vFVg(w;|%phtV^Ik`tMDpKpA`$%HZX|KDRl0U9#)o+g$;Nkb8D>uD1hfW+H- zsu%y&He-R3Y=23%0(K%8d-ubvesX=!_v61}J zT`-1fM(-9jcSQU57@8AhtjI-k{jW54nbJ<%Y&T1cQDO{eJf~aePG*$os2IS~Q&QF# zDFw}Xj1j_95kNQTO|)dlO}{|xg!}GAez;Z z>;l!2c7$vtJBw`u=xU-`?czt)11xIfm|&V;cG?{eoJ{o?Vc#8uJtr^B=;DQt zq;{KN+Io(FQXfB3vVdy^G(^QC{Nn(Ngb%V8EKT?*NF@oL5jwL2(k*;{(HrXZ|LGv% zr_GfLJ4jY=5z;MakH8?2yp;Zuo^+B*49mDJBYrJ_F@tfv?#K9Vx5My^e!FKclvD}} zEh0&2MtZV!fWzpxS+u0|$1hycsF9L|IB3*3J&UcpaQ{D5&b?w;=N#>nNM;HXpAVB2 zoa2$yl;o2=`=O+}m+{;WAgHSGkK=;$<0tG$N6*}=VGKOzSj^H)Y}V>HtkP@+*}+k} zsmmFbS*-A5kf21cD3ElkkXwD??YCLsa4%YrVx*E0W}{7%A8Hh}hE${0&}NXb+F*ex zZd>n`WKt$xnom`&;^PSuzLAoQF)1%G%`!sa4l3wk&0+y zTSX3nUsa-wVds~j9@$Gx*#L~`Kg}kiAY%!)qY}x*c%S@Z=V)w-9*F(xE6<;X1M z8h;Q^iMR+mShSQv$>qpN8vYMJQh=-}X>5Qp*Y9HF6x z5lXV^%!Zuc(D?n= zzt^MvHHQ`~O`Y6oRID;ODmtl0a|~H&4sAUs^=b3i8bUCp<8FsS@|aFh_~@mKV|oJ@ z8YkrLoYC$0PLgH6;2PQ!T0zQK!;V^QwbjMnlFTkmE@M7A5PlmtUf^gS7+TTW(NQt5 zFpkP-&MC)&0I3}U;^s;2sE34 z3stwC1@SkNN$N0FWerGAVd9jGQ)vp6nNTvCIoV`#JFcT(nmHo}LXDt`HbS}kalM%< ze#Y;=FcK~4wYx$`BZ?-A&Rmhtb15&z>M>R{F(Orzf@XF}FzThyaZXs$R?Q&wlCG6% zlF&)}dSfo*CyT9do|0VJe38pAW|Cs&jGTD7=aep04i-JdG?mT8B%k4(;hB5xPgc>U zk3gpkdq}a9v51}E!01azJ+dmVJos<;#nGRPUmST6&*E5S;sRRILL4+=WEtg$)OGK$ zaBoVA*#p{t@`XG!JfrP4{3L6P*^OB;@XO5OkB2Qcto{ z>PcEd){~Kxi-{!X<)98p8*ky<+OXcCE+1I9G$I&_erbX^l9Q2`9x|T1CnGBC=+QqfS@%mp(mk0~an(SmrX|1*iFFwwXe!F2>#Qawp6<@Iez)#UA+i7jQxn~4cq zAAC9Swwm`GNE))W|K7x>>tWdH`1i-ZN0-&dWn|fck{GOO(JvPDD_k~txs0!5} zYziu+&cRfsqViN52r;f)#!|IaLhKy#!*M9F8Yd@KAAz5j2J%!3Rn~`AJtL(_PwM}KJ#_(TC;cHC{5pTgUEEcGsa zGZ~5$pDQMVq5f2-AJcsD!BB9iM|Iek)=eCtBOvvms7j`_3BA`&CfW13BPn74Ve#YHD^?-mL80>FS4~vYM)(AUjjn zVgDPbX&ENZzBg7GxzqFWa;LejX(l<}_)_Y&Rg<%(&6t&+mpy$FD`m~eSLRmdO86UJ zVlCdad#u{T>}gIg}&CczH%6Ah|T<5)|Jf%e2qMp z`{extz80P>*q;-zO~&L6Cy6Yp#Ngv1&m6l^2eP zBQ-V$%vK>>Mwz+_NSSJ}BGfT?(7@Do=lWIaZvK(!v7g<$gVbjzuWuo$H9`{$lsX}g zD`WWvILGr7aDIyAtwGw`GH`+~C1|7AqlV`P@(X5Nl$%#X$Ob4cqecWK#R@giSyomJ z%bJyyH#;jcHO4|-Urwdv<>lrLo{^E8ndNZ5A<8Q_7$4HN2R6r9z1NIe}v|rDXGgM@R7`JhDRP`c0*V_a_D;B*5hrGJ=LVz z<_fj9+>!K#CBq66>zxtE$?VvE9RAMA%9u1uqPr3sIb~CFW)xhOJv~!qHuE*9tn1}u zR#^kB9@~5)#6sZMcPCYJ zepLcy&&r_xYMr4bb{V(z=^qpOS3GGb(6Q2vRO?)}4VJFQBizQhmbw#-u!q0@j(a2B ze3m{Y;P(Oie%#vHv3v54s*g=bUbhDSW2AiYD9D0jQ*+qq9}co?9LnBCSu{2fU6$dL zJ?E;I?UY@NfthP_ow9$T8>VcIQ&z`S?@AwH5#D5)tHA|M#R*PX7gaVHKQ%ks(w?+) zN_ut~H)Po0KEwR;+l?F8ujjDA!{@he-FDolp5uCCPp9-j1~aC$&YA;b-J__L%|9id z7d-A{TaC?b&}!g_2Ce#?ys_-*8Pf_fvr4TK$e)%oF=JfGRWIsyPS4)$=BJK37rpDp z)`FQ?<7Q0`zu`E5 z;~0+PI5e-zL82Cb?2T}Eow7108;E>kOr=a&1*hy*e{mTf%d_71nCX~$H#%i}Xypw$ z4K77t!c?VtyKb`9sd(;y;*0Na%6#aJY49$m>~c&pOxZu3vM*dWe$*-Js>c~$Eml`RqsQm-e*u>)4@HutcPloP+U0NAz@k9 zh(B>IE^ytX(7DMoyom&+CD-cHbJ&P+GxD-7Ey$jhIW9MUG91FVoP7EO1o<(wff32f z9G9P)J8jmuIp=mv*J?34W89_Ck)8uur;Sci9p$Yh$!FEj3Ntw{J#$)C9_-J-%cXio za)$~X+v}@j=-1Vg9J~Boa!O;*!5?I1MsiK3$Xc6R$BPch;FOd{E1?TNI9-r%O3FF1 z;F;tq7G@&VlImO4#0IRcmeg3vR7{eBhY}*}k}#pSLbtKG-T1bSDDpA51otvO@*NHy`g^MLX*e)T$%iurY z6^RgbNtE!DSTzB^h)t*|euAx`g@l;vC1C>YK**6OVU}360N064xLN#!^CUok!zZLk zm;gc(R!fv{w^+3STg4_kE`Gu`2@pm}h%jElgn&c{SrR3@E!LTUAH)XuE!dC;Kni!_ zKeE*Xf`H-t2KPz)J`g*vMejlhv9)(346rLV=9EOIk_g^bqO9FZtU3S)BVnxg3Hv2L z_(ej5E)pgTk_ch2L<#SURTuDq*n|(oPXOHsKkLFHx-eNHEId=9gcf4e13(7|)x=Lo zk^rHmga|_M*@KVC{Eev#3@krSW~zuTg;FMAzz|| zuviTMk85~C{K(lA&sM67Ls+hlFu5feg2Ib(BurSU;US4Kw^J+|@NEq2N-gp5K{rrj zJ=`s|k^pN%